From 48fc9d613323ada9702a7d5c78c23eb0e8cae8a8 Mon Sep 17 00:00:00 2001 From: David Hoffman Date: Wed, 1 Feb 2017 15:42:58 -0500 Subject: [PATCH 001/933] BUG: Fix overflow error in cartesian_product When the numbers in `X` are large it can cause an overflow error on windows machine where the native `int` is 32 bit. Switching to np.intp alleviates this problem. Other fixes would include switching to np.uint32 or np.uint64. closes #15234 Author: David Hoffman Closes #15265 from david-hoffman/patch-1 and squashes the following commits: c9c8d5e [David Hoffman] Update v0.19.2.txt d54583e [David Hoffman] Remove `test_large_input` because it's too big 47a6c6c [David Hoffman] Update test so that it will actually run on "normal" machine 7aeee85 [David Hoffman] Added tests for large numbers b196878 [David Hoffman] Fix overflow error in cartesian_product --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/tools/util.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index f87fad051fad2..34048b8cc372d 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -444,6 +444,7 @@ Bug Fixes - Bug in compat for passing long integers to ``Timestamp.replace`` (:issue:`15030`) - Bug in ``.loc`` that would not return the correct dtype for scalar access for a DataFrame (:issue:`11617`) - Bug in ``GroupBy.get_group()`` failing with a categorical grouper (:issue:`15155`) +- Bug in ``pandas.tools.utils.cartesian_product()`` with large input can cause overflow on windows (:issue:`15265`) diff --git a/pandas/tools/util.py b/pandas/tools/util.py index 381e29283d417..8ec074fbf5950 100644 --- a/pandas/tools/util.py +++ b/pandas/tools/util.py @@ -58,7 +58,7 @@ def cartesian_product(X): if len(X) == 0: return [] - lenX = np.fromiter((len(x) for x in X), dtype=int) + lenX = np.fromiter((len(x) for x in X), dtype=np.intp) cumprodX = np.cumproduct(lenX) a = np.roll(cumprodX, 1) From 845208055845b0db58d2bfee7ba39f6862ce141c Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 1 Feb 2017 17:55:40 -0500 Subject: [PATCH 002/933] COMPAT: xarray 0.8.2 test compat w.r.t. CategoricalIndex indempotency (#15285) closes #15282 --- ci/install_travis.sh | 2 +- ci/requirements-2.7.run | 2 +- ci/requirements-3.5.pip | 1 + ci/requirements-3.5.run | 1 - pandas/tests/test_generic.py | 19 +++++++++++-------- 5 files changed, 14 insertions(+), 11 deletions(-) create mode 100644 ci/requirements-3.5.pip diff --git a/ci/install_travis.sh b/ci/install_travis.sh index ded428c677f17..52b52d787aade 100755 --- a/ci/install_travis.sh +++ b/ci/install_travis.sh @@ -143,7 +143,7 @@ else echo "[pip installs]" REQ="ci/requirements-${PYTHON_VERSION}${JOB_TAG}.pip" if [ -e ${REQ} ]; then - pip install --upgrade -r $REQ + pip install -r $REQ fi # may have addtl installation instructions for this build diff --git a/ci/requirements-2.7.run b/ci/requirements-2.7.run index 2bfb8a3777fdf..b5fc919297c76 100644 --- a/ci/requirements-2.7.run +++ b/ci/requirements-2.7.run @@ -20,4 +20,4 @@ html5lib=1.0b2 beautiful-soup=4.2.1 statsmodels jinja2=2.8 -xarray +xarray=0.8.0 diff --git a/ci/requirements-3.5.pip b/ci/requirements-3.5.pip new file mode 100644 index 0000000000000..0d9e44cf39fa4 --- /dev/null +++ b/ci/requirements-3.5.pip @@ -0,0 +1 @@ +xarray==0.9.1 diff --git a/ci/requirements-3.5.run b/ci/requirements-3.5.run index e15ca6079b4fe..ef354195c8f23 100644 --- a/ci/requirements-3.5.run +++ b/ci/requirements-3.5.run @@ -16,6 +16,5 @@ bottleneck sqlalchemy pymysql psycopg2 -xarray s3fs beautifulsoup4 diff --git a/pandas/tests/test_generic.py b/pandas/tests/test_generic.py index f7b7ae8c66382..0ca8ba47b8a8f 100644 --- a/pandas/tests/test_generic.py +++ b/pandas/tests/test_generic.py @@ -7,6 +7,7 @@ from numpy import nan import pandas as pd +from distutils.version import LooseVersion from pandas.types.common import is_scalar from pandas import (Index, Series, DataFrame, Panel, isnull, date_range, period_range, Panel4D) @@ -870,6 +871,7 @@ def test_describe_none(self): def test_to_xarray(self): tm._skip_if_no_xarray() + import xarray from xarray import DataArray s = Series([]) @@ -895,15 +897,16 @@ def testit(index, check_index_type=True, check_categorical=True): check_index_type=check_index_type, check_categorical=check_categorical) - for index in [tm.makeFloatIndex, tm.makeIntIndex, - tm.makeStringIndex, tm.makeUnicodeIndex, - tm.makeDateIndex, tm.makePeriodIndex, - tm.makeTimedeltaIndex]: - testit(index) + l = [tm.makeFloatIndex, tm.makeIntIndex, + tm.makeStringIndex, tm.makeUnicodeIndex, + tm.makeDateIndex, tm.makePeriodIndex, + tm.makeTimedeltaIndex] + + if LooseVersion(xarray.__version__) >= '0.8.0': + l.append(tm.makeCategoricalIndex) - # not idempotent - testit(tm.makeCategoricalIndex, check_index_type=False, - check_categorical=False) + for index in l: + testit(index) s = Series(range(6)) s.index.name = 'foo' From f6cfaabad9b9de6d0382e51a77b080723f84d778 Mon Sep 17 00:00:00 2001 From: Michael Lamparski Date: Thu, 2 Feb 2017 15:26:12 -0500 Subject: [PATCH 003/933] BUG: Support empty dict-likes in replace() closes #15289 Author: Michael Lamparski Closes #15294 from ExpHP/bugfix-15289 and squashes the following commits: f349e0a [Michael Lamparski] BUG: Support empty dict-likes in replace() --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/core/generic.py | 6 +++--- pandas/tests/frame/test_replace.py | 10 ++++++++++ pandas/tests/series/test_replace.py | 6 ++++++ 4 files changed, 20 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 34048b8cc372d..d76a78c68fb73 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -512,3 +512,4 @@ Bug Fixes - Bug in ``DataFrame.boxplot`` where ``fontsize`` was not applied to the tick labels on both axes (:issue:`15108`) +- Bug in ``Series.replace`` and ``DataFrame.replace`` which failed on empty replacement dicts (:issue:`15289`) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 869062bd231fe..8074b167ff176 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -48,7 +48,7 @@ from pandas.tseries.frequencies import to_offset from pandas import compat from pandas.compat.numpy import function as nv -from pandas.compat import (map, zip, lrange, string_types, +from pandas.compat import (map, zip, lzip, lrange, string_types, isidentifier, set_function_name) import pandas.core.nanops as nanops from pandas.util.decorators import Appender, Substitution, deprecate_kwarg @@ -3509,7 +3509,7 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, regex = True items = list(compat.iteritems(to_replace)) - keys, values = zip(*items) + keys, values = lzip(*items) or ([], []) are_mappings = [is_dict_like(v) for v in values] @@ -3523,7 +3523,7 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, value_dict = {} for k, v in items: - keys, values = zip(*v.items()) + keys, values = lzip(*v.items()) or ([], []) if set(keys) & set(values): raise ValueError("Replacement not allowed with " "overlapping keys and values") diff --git a/pandas/tests/frame/test_replace.py b/pandas/tests/frame/test_replace.py index adc7af225588c..f46215105b375 100644 --- a/pandas/tests/frame/test_replace.py +++ b/pandas/tests/frame/test_replace.py @@ -1055,3 +1055,13 @@ def test_replace_datetimetz(self): Timestamp('20130103', tz='US/Eastern')], 'B': [0, np.nan, 2]}) assert_frame_equal(result, expected) + + def test_replace_with_empty_dictlike(self): + # GH 15289 + mix = {'a': lrange(4), 'b': list('ab..'), 'c': ['a', 'b', nan, 'd']} + df = DataFrame(mix) + assert_frame_equal(df, df.replace({})) + assert_frame_equal(df, df.replace(Series([]))) + + assert_frame_equal(df, df.replace({'b': {}})) + assert_frame_equal(df, df.replace(Series({'b': {}}))) diff --git a/pandas/tests/series/test_replace.py b/pandas/tests/series/test_replace.py index d80328ea3863a..aa16f2cca9475 100644 --- a/pandas/tests/series/test_replace.py +++ b/pandas/tests/series/test_replace.py @@ -223,3 +223,9 @@ def test_replace2(self): self.assertTrue((ser[:5] == -1).all()) self.assertTrue((ser[6:10] == -1).all()) self.assertTrue((ser[20:30] == -1).all()) + + def test_replace_with_empty_dictlike(self): + # GH 15289 + s = pd.Series(list('abcd')) + tm.assert_series_equal(s, s.replace(dict())) + tm.assert_series_equal(s, s.replace(pd.Series([]))) From da92a5c94cb1d3c0f6044b783bc0ac4e7acb2dc2 Mon Sep 17 00:00:00 2001 From: TrigonaMinima Date: Thu, 2 Feb 2017 20:13:54 -0500 Subject: [PATCH 004/933] TST: DatetimeIndex compiled together in test_datetime.py xref #14854 Author: TrigonaMinima Closes #15266 from TrigonaMinima/issue-14854-datetime and squashes the following commits: 6ee2bd9 [TrigonaMinima] TST: Splitting test_datetime.py into smaller chunks (gh14854) 415a748 [TrigonaMinima] TST: Moving DatetimeIndex related tests from test_timeseries.py and flake8 fixes c43c7de [TrigonaMinima] TST: proper naming of files 458d141 [TrigonaMinima] TST: splitting test_datetime.py 1ff0819 [TrigonaMinima] TST: fix flake8 errors - test_datetime.py (GH14854) 9311161 [TrigonaMinima] TST: reorg of DatetimeIndex tests from tseries/tests/test_base.py to test_datetime.py (GH14854) 54421a5 [TrigonaMinima] TST: reorg of DatetimeIndex tests from test_datetimelike.py to test_datetime.py (GH14854) f83814b [TrigonaMinima] TST: reorg of DatetimeIndex tests from test_timeseries.py to test_datetime.py --- pandas/tests/indexes/datetimes/__init__.py | 0 pandas/tests/indexes/datetimes/test_astype.py | 122 ++ .../indexes/datetimes/test_construction.py | 425 +++++ .../tests/indexes/datetimes/test_datetime.py | 836 +++++++++ .../tests/indexes/datetimes/test_indexing.py | 244 +++ pandas/tests/indexes/datetimes/test_misc.py | 333 ++++ .../tests/indexes/datetimes/test_missing.py | 51 + pandas/tests/indexes/datetimes/test_ops.py | 1073 ++++++++++++ pandas/tests/indexes/datetimes/test_setops.py | 168 ++ pandas/tests/indexes/test_datetimelike.py | 669 +------- pandas/tseries/tests/test_base.py | 897 +--------- pandas/tseries/tests/test_timeseries.py | 1521 +---------------- setup.py | 1 + 13 files changed, 3259 insertions(+), 3081 deletions(-) create mode 100644 pandas/tests/indexes/datetimes/__init__.py create mode 100644 pandas/tests/indexes/datetimes/test_astype.py create mode 100644 pandas/tests/indexes/datetimes/test_construction.py create mode 100644 pandas/tests/indexes/datetimes/test_datetime.py create mode 100644 pandas/tests/indexes/datetimes/test_indexing.py create mode 100644 pandas/tests/indexes/datetimes/test_misc.py create mode 100644 pandas/tests/indexes/datetimes/test_missing.py create mode 100644 pandas/tests/indexes/datetimes/test_ops.py create mode 100644 pandas/tests/indexes/datetimes/test_setops.py diff --git a/pandas/tests/indexes/datetimes/__init__.py b/pandas/tests/indexes/datetimes/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/tests/indexes/datetimes/test_astype.py b/pandas/tests/indexes/datetimes/test_astype.py new file mode 100644 index 0000000000000..f64d18a69a093 --- /dev/null +++ b/pandas/tests/indexes/datetimes/test_astype.py @@ -0,0 +1,122 @@ +import numpy as np + +import pandas as pd +import pandas.util.testing as tm +from pandas import (DatetimeIndex, date_range, Series, NaT, Index, Timestamp, + Int64Index) + + +class TestDatetimeIndex(tm.TestCase): + _multiprocess_can_split_ = True + + def test_astype(self): + # GH 13149, GH 13209 + idx = DatetimeIndex(['2016-05-16', 'NaT', NaT, np.NaN]) + + result = idx.astype(object) + expected = Index([Timestamp('2016-05-16')] + [NaT] * 3, dtype=object) + tm.assert_index_equal(result, expected) + + result = idx.astype(int) + expected = Int64Index([1463356800000000000] + + [-9223372036854775808] * 3, dtype=np.int64) + tm.assert_index_equal(result, expected) + + rng = date_range('1/1/2000', periods=10) + result = rng.astype('i8') + self.assert_index_equal(result, Index(rng.asi8)) + self.assert_numpy_array_equal(result.values, rng.asi8) + + def test_astype_with_tz(self): + + # with tz + rng = date_range('1/1/2000', periods=10, tz='US/Eastern') + result = rng.astype('datetime64[ns]') + expected = (date_range('1/1/2000', periods=10, + tz='US/Eastern') + .tz_convert('UTC').tz_localize(None)) + tm.assert_index_equal(result, expected) + + # BUG#10442 : testing astype(str) is correct for Series/DatetimeIndex + result = pd.Series(pd.date_range('2012-01-01', periods=3)).astype(str) + expected = pd.Series( + ['2012-01-01', '2012-01-02', '2012-01-03'], dtype=object) + tm.assert_series_equal(result, expected) + + result = Series(pd.date_range('2012-01-01', periods=3, + tz='US/Eastern')).astype(str) + expected = Series(['2012-01-01 00:00:00-05:00', + '2012-01-02 00:00:00-05:00', + '2012-01-03 00:00:00-05:00'], + dtype=object) + tm.assert_series_equal(result, expected) + + def test_astype_str_compat(self): + # GH 13149, GH 13209 + # verify that we are returing NaT as a string (and not unicode) + + idx = DatetimeIndex(['2016-05-16', 'NaT', NaT, np.NaN]) + result = idx.astype(str) + expected = Index(['2016-05-16', 'NaT', 'NaT', 'NaT'], dtype=object) + tm.assert_index_equal(result, expected) + + def test_astype_str(self): + # test astype string - #10442 + result = date_range('2012-01-01', periods=4, + name='test_name').astype(str) + expected = Index(['2012-01-01', '2012-01-02', '2012-01-03', + '2012-01-04'], name='test_name', dtype=object) + tm.assert_index_equal(result, expected) + + # test astype string with tz and name + result = date_range('2012-01-01', periods=3, name='test_name', + tz='US/Eastern').astype(str) + expected = Index(['2012-01-01 00:00:00-05:00', + '2012-01-02 00:00:00-05:00', + '2012-01-03 00:00:00-05:00'], + name='test_name', dtype=object) + tm.assert_index_equal(result, expected) + + # test astype string with freqH and name + result = date_range('1/1/2011', periods=3, freq='H', + name='test_name').astype(str) + expected = Index(['2011-01-01 00:00:00', '2011-01-01 01:00:00', + '2011-01-01 02:00:00'], + name='test_name', dtype=object) + tm.assert_index_equal(result, expected) + + # test astype string with freqH and timezone + result = date_range('3/6/2012 00:00', periods=2, freq='H', + tz='Europe/London', name='test_name').astype(str) + expected = Index(['2012-03-06 00:00:00+00:00', + '2012-03-06 01:00:00+00:00'], + dtype=object, name='test_name') + tm.assert_index_equal(result, expected) + + def test_astype_datetime64(self): + # GH 13149, GH 13209 + idx = DatetimeIndex(['2016-05-16', 'NaT', NaT, np.NaN]) + + result = idx.astype('datetime64[ns]') + tm.assert_index_equal(result, idx) + self.assertFalse(result is idx) + + result = idx.astype('datetime64[ns]', copy=False) + tm.assert_index_equal(result, idx) + self.assertTrue(result is idx) + + idx_tz = DatetimeIndex(['2016-05-16', 'NaT', NaT, np.NaN], tz='EST') + result = idx_tz.astype('datetime64[ns]') + expected = DatetimeIndex(['2016-05-16 05:00:00', 'NaT', 'NaT', 'NaT'], + dtype='datetime64[ns]') + tm.assert_index_equal(result, expected) + + def test_astype_raises(self): + # GH 13149, GH 13209 + idx = DatetimeIndex(['2016-05-16', 'NaT', NaT, np.NaN]) + + self.assertRaises(ValueError, idx.astype, float) + self.assertRaises(ValueError, idx.astype, 'timedelta64') + self.assertRaises(ValueError, idx.astype, 'timedelta64[ns]') + self.assertRaises(ValueError, idx.astype, 'datetime64') + self.assertRaises(ValueError, idx.astype, 'datetime64[D]') diff --git a/pandas/tests/indexes/datetimes/test_construction.py b/pandas/tests/indexes/datetimes/test_construction.py new file mode 100644 index 0000000000000..ae4eb6ee397b6 --- /dev/null +++ b/pandas/tests/indexes/datetimes/test_construction.py @@ -0,0 +1,425 @@ +import numpy as np +from datetime import timedelta + +import pandas as pd +import pandas.util.testing as tm +from pandas.tslib import OutOfBoundsDatetime +from pandas import (DatetimeIndex, Index, Timestamp, datetime, date_range) + + +class TestDatetimeIndex(tm.TestCase): + _multiprocess_can_split_ = True + + def test_construction_with_alt(self): + + i = pd.date_range('20130101', periods=5, freq='H', tz='US/Eastern') + i2 = DatetimeIndex(i, dtype=i.dtype) + self.assert_index_equal(i, i2) + + i2 = DatetimeIndex(i.tz_localize(None).asi8, tz=i.dtype.tz) + self.assert_index_equal(i, i2) + + i2 = DatetimeIndex(i.tz_localize(None).asi8, dtype=i.dtype) + self.assert_index_equal(i, i2) + + i2 = DatetimeIndex( + i.tz_localize(None).asi8, dtype=i.dtype, tz=i.dtype.tz) + self.assert_index_equal(i, i2) + + # localize into the provided tz + i2 = DatetimeIndex(i.tz_localize(None).asi8, tz='UTC') + expected = i.tz_localize(None).tz_localize('UTC') + self.assert_index_equal(i2, expected) + + # incompat tz/dtype + self.assertRaises(ValueError, lambda: DatetimeIndex( + i.tz_localize(None).asi8, dtype=i.dtype, tz='US/Pacific')) + + def test_construction_index_with_mixed_timezones(self): + # GH 11488 + # no tz results in DatetimeIndex + result = Index([Timestamp('2011-01-01'), + Timestamp('2011-01-02')], name='idx') + exp = DatetimeIndex([Timestamp('2011-01-01'), + Timestamp('2011-01-02')], name='idx') + self.assert_index_equal(result, exp, exact=True) + self.assertTrue(isinstance(result, DatetimeIndex)) + self.assertIsNone(result.tz) + + # same tz results in DatetimeIndex + result = Index([Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), + Timestamp('2011-01-02 10:00', tz='Asia/Tokyo')], + name='idx') + exp = DatetimeIndex( + [Timestamp('2011-01-01 10:00'), Timestamp('2011-01-02 10:00') + ], tz='Asia/Tokyo', name='idx') + self.assert_index_equal(result, exp, exact=True) + self.assertTrue(isinstance(result, DatetimeIndex)) + self.assertIsNotNone(result.tz) + self.assertEqual(result.tz, exp.tz) + + # same tz results in DatetimeIndex (DST) + result = Index([Timestamp('2011-01-01 10:00', tz='US/Eastern'), + Timestamp('2011-08-01 10:00', tz='US/Eastern')], + name='idx') + exp = DatetimeIndex([Timestamp('2011-01-01 10:00'), + Timestamp('2011-08-01 10:00')], + tz='US/Eastern', name='idx') + self.assert_index_equal(result, exp, exact=True) + self.assertTrue(isinstance(result, DatetimeIndex)) + self.assertIsNotNone(result.tz) + self.assertEqual(result.tz, exp.tz) + + # different tz results in Index(dtype=object) + result = Index([Timestamp('2011-01-01 10:00'), + Timestamp('2011-01-02 10:00', tz='US/Eastern')], + name='idx') + exp = Index([Timestamp('2011-01-01 10:00'), + Timestamp('2011-01-02 10:00', tz='US/Eastern')], + dtype='object', name='idx') + self.assert_index_equal(result, exp, exact=True) + self.assertFalse(isinstance(result, DatetimeIndex)) + + result = Index([Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), + Timestamp('2011-01-02 10:00', tz='US/Eastern')], + name='idx') + exp = Index([Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), + Timestamp('2011-01-02 10:00', tz='US/Eastern')], + dtype='object', name='idx') + self.assert_index_equal(result, exp, exact=True) + self.assertFalse(isinstance(result, DatetimeIndex)) + + # length = 1 + result = Index([Timestamp('2011-01-01')], name='idx') + exp = DatetimeIndex([Timestamp('2011-01-01')], name='idx') + self.assert_index_equal(result, exp, exact=True) + self.assertTrue(isinstance(result, DatetimeIndex)) + self.assertIsNone(result.tz) + + # length = 1 with tz + result = Index( + [Timestamp('2011-01-01 10:00', tz='Asia/Tokyo')], name='idx') + exp = DatetimeIndex([Timestamp('2011-01-01 10:00')], tz='Asia/Tokyo', + name='idx') + self.assert_index_equal(result, exp, exact=True) + self.assertTrue(isinstance(result, DatetimeIndex)) + self.assertIsNotNone(result.tz) + self.assertEqual(result.tz, exp.tz) + + def test_construction_index_with_mixed_timezones_with_NaT(self): + # GH 11488 + result = Index([pd.NaT, Timestamp('2011-01-01'), + pd.NaT, Timestamp('2011-01-02')], name='idx') + exp = DatetimeIndex([pd.NaT, Timestamp('2011-01-01'), + pd.NaT, Timestamp('2011-01-02')], name='idx') + self.assert_index_equal(result, exp, exact=True) + self.assertTrue(isinstance(result, DatetimeIndex)) + self.assertIsNone(result.tz) + + # same tz results in DatetimeIndex + result = Index([pd.NaT, Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), + pd.NaT, Timestamp('2011-01-02 10:00', + tz='Asia/Tokyo')], + name='idx') + exp = DatetimeIndex([pd.NaT, Timestamp('2011-01-01 10:00'), + pd.NaT, Timestamp('2011-01-02 10:00')], + tz='Asia/Tokyo', name='idx') + self.assert_index_equal(result, exp, exact=True) + self.assertTrue(isinstance(result, DatetimeIndex)) + self.assertIsNotNone(result.tz) + self.assertEqual(result.tz, exp.tz) + + # same tz results in DatetimeIndex (DST) + result = Index([Timestamp('2011-01-01 10:00', tz='US/Eastern'), + pd.NaT, + Timestamp('2011-08-01 10:00', tz='US/Eastern')], + name='idx') + exp = DatetimeIndex([Timestamp('2011-01-01 10:00'), pd.NaT, + Timestamp('2011-08-01 10:00')], + tz='US/Eastern', name='idx') + self.assert_index_equal(result, exp, exact=True) + self.assertTrue(isinstance(result, DatetimeIndex)) + self.assertIsNotNone(result.tz) + self.assertEqual(result.tz, exp.tz) + + # different tz results in Index(dtype=object) + result = Index([pd.NaT, Timestamp('2011-01-01 10:00'), + pd.NaT, Timestamp('2011-01-02 10:00', + tz='US/Eastern')], + name='idx') + exp = Index([pd.NaT, Timestamp('2011-01-01 10:00'), + pd.NaT, Timestamp('2011-01-02 10:00', tz='US/Eastern')], + dtype='object', name='idx') + self.assert_index_equal(result, exp, exact=True) + self.assertFalse(isinstance(result, DatetimeIndex)) + + result = Index([pd.NaT, Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), + pd.NaT, Timestamp('2011-01-02 10:00', + tz='US/Eastern')], name='idx') + exp = Index([pd.NaT, Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), + pd.NaT, Timestamp('2011-01-02 10:00', tz='US/Eastern')], + dtype='object', name='idx') + self.assert_index_equal(result, exp, exact=True) + self.assertFalse(isinstance(result, DatetimeIndex)) + + # all NaT + result = Index([pd.NaT, pd.NaT], name='idx') + exp = DatetimeIndex([pd.NaT, pd.NaT], name='idx') + self.assert_index_equal(result, exp, exact=True) + self.assertTrue(isinstance(result, DatetimeIndex)) + self.assertIsNone(result.tz) + + # all NaT with tz + result = Index([pd.NaT, pd.NaT], tz='Asia/Tokyo', name='idx') + exp = DatetimeIndex([pd.NaT, pd.NaT], tz='Asia/Tokyo', name='idx') + self.assert_index_equal(result, exp, exact=True) + self.assertTrue(isinstance(result, DatetimeIndex)) + self.assertIsNotNone(result.tz) + self.assertEqual(result.tz, exp.tz) + + def test_construction_dti_with_mixed_timezones(self): + # GH 11488 (not changed, added explicit tests) + + # no tz results in DatetimeIndex + result = DatetimeIndex( + [Timestamp('2011-01-01'), Timestamp('2011-01-02')], name='idx') + exp = DatetimeIndex( + [Timestamp('2011-01-01'), Timestamp('2011-01-02')], name='idx') + self.assert_index_equal(result, exp, exact=True) + self.assertTrue(isinstance(result, DatetimeIndex)) + + # same tz results in DatetimeIndex + result = DatetimeIndex([Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), + Timestamp('2011-01-02 10:00', + tz='Asia/Tokyo')], + name='idx') + exp = DatetimeIndex([Timestamp('2011-01-01 10:00'), + Timestamp('2011-01-02 10:00')], + tz='Asia/Tokyo', name='idx') + self.assert_index_equal(result, exp, exact=True) + self.assertTrue(isinstance(result, DatetimeIndex)) + + # same tz results in DatetimeIndex (DST) + result = DatetimeIndex([Timestamp('2011-01-01 10:00', tz='US/Eastern'), + Timestamp('2011-08-01 10:00', + tz='US/Eastern')], + name='idx') + exp = DatetimeIndex([Timestamp('2011-01-01 10:00'), + Timestamp('2011-08-01 10:00')], + tz='US/Eastern', name='idx') + self.assert_index_equal(result, exp, exact=True) + self.assertTrue(isinstance(result, DatetimeIndex)) + + # different tz coerces tz-naive to tz-awareIndex(dtype=object) + result = DatetimeIndex([Timestamp('2011-01-01 10:00'), + Timestamp('2011-01-02 10:00', + tz='US/Eastern')], name='idx') + exp = DatetimeIndex([Timestamp('2011-01-01 05:00'), + Timestamp('2011-01-02 10:00')], + tz='US/Eastern', name='idx') + self.assert_index_equal(result, exp, exact=True) + self.assertTrue(isinstance(result, DatetimeIndex)) + + # tz mismatch affecting to tz-aware raises TypeError/ValueError + + with tm.assertRaises(ValueError): + DatetimeIndex([Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), + Timestamp('2011-01-02 10:00', tz='US/Eastern')], + name='idx') + + with tm.assertRaisesRegexp(TypeError, 'data is already tz-aware'): + DatetimeIndex([Timestamp('2011-01-01 10:00'), + Timestamp('2011-01-02 10:00', tz='US/Eastern')], + tz='Asia/Tokyo', name='idx') + + with tm.assertRaises(ValueError): + DatetimeIndex([Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), + Timestamp('2011-01-02 10:00', tz='US/Eastern')], + tz='US/Eastern', name='idx') + + with tm.assertRaisesRegexp(TypeError, 'data is already tz-aware'): + # passing tz should results in DatetimeIndex, then mismatch raises + # TypeError + Index([pd.NaT, Timestamp('2011-01-01 10:00'), + pd.NaT, Timestamp('2011-01-02 10:00', tz='US/Eastern')], + tz='Asia/Tokyo', name='idx') + + def test_construction_base_constructor(self): + arr = [pd.Timestamp('2011-01-01'), pd.NaT, pd.Timestamp('2011-01-03')] + tm.assert_index_equal(pd.Index(arr), pd.DatetimeIndex(arr)) + tm.assert_index_equal(pd.Index(np.array(arr)), + pd.DatetimeIndex(np.array(arr))) + + arr = [np.nan, pd.NaT, pd.Timestamp('2011-01-03')] + tm.assert_index_equal(pd.Index(arr), pd.DatetimeIndex(arr)) + tm.assert_index_equal(pd.Index(np.array(arr)), + pd.DatetimeIndex(np.array(arr))) + + def test_construction_outofbounds(self): + # GH 13663 + dates = [datetime(3000, 1, 1), datetime(4000, 1, 1), + datetime(5000, 1, 1), datetime(6000, 1, 1)] + exp = Index(dates, dtype=object) + # coerces to object + tm.assert_index_equal(Index(dates), exp) + + with tm.assertRaises(OutOfBoundsDatetime): + # can't create DatetimeIndex + DatetimeIndex(dates) + + def test_construction_with_ndarray(self): + # GH 5152 + dates = [datetime(2013, 10, 7), + datetime(2013, 10, 8), + datetime(2013, 10, 9)] + data = DatetimeIndex(dates, freq=pd.tseries.frequencies.BDay()).values + result = DatetimeIndex(data, freq=pd.tseries.frequencies.BDay()) + expected = DatetimeIndex(['2013-10-07', + '2013-10-08', + '2013-10-09'], + freq='B') + tm.assert_index_equal(result, expected) + + def test_constructor_coverage(self): + rng = date_range('1/1/2000', periods=10.5) + exp = date_range('1/1/2000', periods=10) + tm.assert_index_equal(rng, exp) + + self.assertRaises(ValueError, DatetimeIndex, start='1/1/2000', + periods='foo', freq='D') + + self.assertRaises(ValueError, DatetimeIndex, start='1/1/2000', + end='1/10/2000') + + self.assertRaises(ValueError, DatetimeIndex, '1/1/2000') + + # generator expression + gen = (datetime(2000, 1, 1) + timedelta(i) for i in range(10)) + result = DatetimeIndex(gen) + expected = DatetimeIndex([datetime(2000, 1, 1) + timedelta(i) + for i in range(10)]) + tm.assert_index_equal(result, expected) + + # NumPy string array + strings = np.array(['2000-01-01', '2000-01-02', '2000-01-03']) + result = DatetimeIndex(strings) + expected = DatetimeIndex(strings.astype('O')) + tm.assert_index_equal(result, expected) + + from_ints = DatetimeIndex(expected.asi8) + tm.assert_index_equal(from_ints, expected) + + # string with NaT + strings = np.array(['2000-01-01', '2000-01-02', 'NaT']) + result = DatetimeIndex(strings) + expected = DatetimeIndex(strings.astype('O')) + tm.assert_index_equal(result, expected) + + from_ints = DatetimeIndex(expected.asi8) + tm.assert_index_equal(from_ints, expected) + + # non-conforming + self.assertRaises(ValueError, DatetimeIndex, + ['2000-01-01', '2000-01-02', '2000-01-04'], freq='D') + + self.assertRaises(ValueError, DatetimeIndex, start='2011-01-01', + freq='b') + self.assertRaises(ValueError, DatetimeIndex, end='2011-01-01', + freq='B') + self.assertRaises(ValueError, DatetimeIndex, periods=10, freq='D') + + def test_constructor_datetime64_tzformat(self): + # GH 6572 + tm._skip_if_no_pytz() + import pytz + # ISO 8601 format results in pytz.FixedOffset + for freq in ['AS', 'W-SUN']: + idx = date_range('2013-01-01T00:00:00-05:00', + '2016-01-01T23:59:59-05:00', freq=freq) + expected = date_range('2013-01-01T00:00:00', '2016-01-01T23:59:59', + freq=freq, tz=pytz.FixedOffset(-300)) + tm.assert_index_equal(idx, expected) + # Unable to use `US/Eastern` because of DST + expected_i8 = date_range('2013-01-01T00:00:00', + '2016-01-01T23:59:59', freq=freq, + tz='America/Lima') + self.assert_numpy_array_equal(idx.asi8, expected_i8.asi8) + + idx = date_range('2013-01-01T00:00:00+09:00', + '2016-01-01T23:59:59+09:00', freq=freq) + expected = date_range('2013-01-01T00:00:00', '2016-01-01T23:59:59', + freq=freq, tz=pytz.FixedOffset(540)) + tm.assert_index_equal(idx, expected) + expected_i8 = date_range('2013-01-01T00:00:00', + '2016-01-01T23:59:59', freq=freq, + tz='Asia/Tokyo') + self.assert_numpy_array_equal(idx.asi8, expected_i8.asi8) + + tm._skip_if_no_dateutil() + + # Non ISO 8601 format results in dateutil.tz.tzoffset + for freq in ['AS', 'W-SUN']: + idx = date_range('2013/1/1 0:00:00-5:00', '2016/1/1 23:59:59-5:00', + freq=freq) + expected = date_range('2013-01-01T00:00:00', '2016-01-01T23:59:59', + freq=freq, tz=pytz.FixedOffset(-300)) + tm.assert_index_equal(idx, expected) + # Unable to use `US/Eastern` because of DST + expected_i8 = date_range('2013-01-01T00:00:00', + '2016-01-01T23:59:59', freq=freq, + tz='America/Lima') + self.assert_numpy_array_equal(idx.asi8, expected_i8.asi8) + + idx = date_range('2013/1/1 0:00:00+9:00', + '2016/1/1 23:59:59+09:00', freq=freq) + expected = date_range('2013-01-01T00:00:00', '2016-01-01T23:59:59', + freq=freq, tz=pytz.FixedOffset(540)) + tm.assert_index_equal(idx, expected) + expected_i8 = date_range('2013-01-01T00:00:00', + '2016-01-01T23:59:59', freq=freq, + tz='Asia/Tokyo') + self.assert_numpy_array_equal(idx.asi8, expected_i8.asi8) + + def test_constructor_dtype(self): + + # passing a dtype with a tz should localize + idx = DatetimeIndex(['2013-01-01', '2013-01-02'], + dtype='datetime64[ns, US/Eastern]') + expected = DatetimeIndex(['2013-01-01', '2013-01-02'] + ).tz_localize('US/Eastern') + tm.assert_index_equal(idx, expected) + + idx = DatetimeIndex(['2013-01-01', '2013-01-02'], + tz='US/Eastern') + tm.assert_index_equal(idx, expected) + + # if we already have a tz and its not the same, then raise + idx = DatetimeIndex(['2013-01-01', '2013-01-02'], + dtype='datetime64[ns, US/Eastern]') + + self.assertRaises(ValueError, + lambda: DatetimeIndex(idx, + dtype='datetime64[ns]')) + + # this is effectively trying to convert tz's + self.assertRaises(TypeError, + lambda: DatetimeIndex(idx, + dtype='datetime64[ns, CET]')) + self.assertRaises(ValueError, + lambda: DatetimeIndex( + idx, tz='CET', + dtype='datetime64[ns, US/Eastern]')) + result = DatetimeIndex(idx, dtype='datetime64[ns, US/Eastern]') + tm.assert_index_equal(idx, result) + + def test_constructor_name(self): + idx = DatetimeIndex(start='2000-01-01', periods=1, freq='A', + name='TEST') + self.assertEqual(idx.name, 'TEST') + + def test_000constructor_resolution(self): + # 2252 + t1 = Timestamp((1352934390 * 1000000000) + 1000000 + 1000 + 1) + idx = DatetimeIndex([t1]) + + self.assertEqual(idx.nanosecond[0], t1.nanosecond) diff --git a/pandas/tests/indexes/datetimes/test_datetime.py b/pandas/tests/indexes/datetimes/test_datetime.py new file mode 100644 index 0000000000000..a69406804cd97 --- /dev/null +++ b/pandas/tests/indexes/datetimes/test_datetime.py @@ -0,0 +1,836 @@ +import numpy as np +from datetime import date, timedelta, time + +import pandas as pd +import pandas.util.testing as tm +from pandas.compat import lrange +from pandas.compat.numpy import np_datetime64_compat +from pandas import (DatetimeIndex, Index, date_range, Series, DataFrame, + Timestamp, datetime, offsets, _np_version_under1p8) + +from pandas.util.testing import assert_series_equal, assert_almost_equal + +randn = np.random.randn + + +class TestDatetimeIndex(tm.TestCase): + _multiprocess_can_split_ = True + + def test_get_loc(self): + idx = pd.date_range('2000-01-01', periods=3) + + for method in [None, 'pad', 'backfill', 'nearest']: + self.assertEqual(idx.get_loc(idx[1], method), 1) + self.assertEqual(idx.get_loc(idx[1].to_pydatetime(), method), 1) + self.assertEqual(idx.get_loc(str(idx[1]), method), 1) + if method is not None: + self.assertEqual(idx.get_loc(idx[1], method, + tolerance=pd.Timedelta('0 days')), + 1) + + self.assertEqual(idx.get_loc('2000-01-01', method='nearest'), 0) + self.assertEqual(idx.get_loc('2000-01-01T12', method='nearest'), 1) + + self.assertEqual(idx.get_loc('2000-01-01T12', method='nearest', + tolerance='1 day'), 1) + self.assertEqual(idx.get_loc('2000-01-01T12', method='nearest', + tolerance=pd.Timedelta('1D')), 1) + self.assertEqual(idx.get_loc('2000-01-01T12', method='nearest', + tolerance=np.timedelta64(1, 'D')), 1) + self.assertEqual(idx.get_loc('2000-01-01T12', method='nearest', + tolerance=timedelta(1)), 1) + with tm.assertRaisesRegexp(ValueError, 'must be convertible'): + idx.get_loc('2000-01-01T12', method='nearest', tolerance='foo') + with tm.assertRaises(KeyError): + idx.get_loc('2000-01-01T03', method='nearest', tolerance='2 hours') + + self.assertEqual(idx.get_loc('2000', method='nearest'), slice(0, 3)) + self.assertEqual(idx.get_loc('2000-01', method='nearest'), slice(0, 3)) + + self.assertEqual(idx.get_loc('1999', method='nearest'), 0) + self.assertEqual(idx.get_loc('2001', method='nearest'), 2) + + with tm.assertRaises(KeyError): + idx.get_loc('1999', method='pad') + with tm.assertRaises(KeyError): + idx.get_loc('2001', method='backfill') + + with tm.assertRaises(KeyError): + idx.get_loc('foobar') + with tm.assertRaises(TypeError): + idx.get_loc(slice(2)) + + idx = pd.to_datetime(['2000-01-01', '2000-01-04']) + self.assertEqual(idx.get_loc('2000-01-02', method='nearest'), 0) + self.assertEqual(idx.get_loc('2000-01-03', method='nearest'), 1) + self.assertEqual(idx.get_loc('2000-01', method='nearest'), slice(0, 2)) + + # time indexing + idx = pd.date_range('2000-01-01', periods=24, freq='H') + tm.assert_numpy_array_equal(idx.get_loc(time(12)), + np.array([12]), check_dtype=False) + tm.assert_numpy_array_equal(idx.get_loc(time(12, 30)), + np.array([]), check_dtype=False) + with tm.assertRaises(NotImplementedError): + idx.get_loc(time(12, 30), method='pad') + + def test_get_indexer(self): + idx = pd.date_range('2000-01-01', periods=3) + exp = np.array([0, 1, 2], dtype=np.intp) + tm.assert_numpy_array_equal(idx.get_indexer(idx), exp) + + target = idx[0] + pd.to_timedelta(['-1 hour', '12 hours', + '1 day 1 hour']) + tm.assert_numpy_array_equal(idx.get_indexer(target, 'pad'), + np.array([-1, 0, 1], dtype=np.intp)) + tm.assert_numpy_array_equal(idx.get_indexer(target, 'backfill'), + np.array([0, 1, 2], dtype=np.intp)) + tm.assert_numpy_array_equal(idx.get_indexer(target, 'nearest'), + np.array([0, 1, 1], dtype=np.intp)) + tm.assert_numpy_array_equal( + idx.get_indexer(target, 'nearest', + tolerance=pd.Timedelta('1 hour')), + np.array([0, -1, 1], dtype=np.intp)) + with tm.assertRaises(ValueError): + idx.get_indexer(idx[[0]], method='nearest', tolerance='foo') + + def test_roundtrip_pickle_with_tz(self): + + # GH 8367 + # round-trip of timezone + index = date_range('20130101', periods=3, tz='US/Eastern', name='foo') + unpickled = self.round_trip_pickle(index) + self.assert_index_equal(index, unpickled) + + def test_reindex_preserves_tz_if_target_is_empty_list_or_array(self): + # GH7774 + index = date_range('20130101', periods=3, tz='US/Eastern') + self.assertEqual(str(index.reindex([])[0].tz), 'US/Eastern') + self.assertEqual(str(index.reindex(np.array([]))[0].tz), 'US/Eastern') + + def test_time_loc(self): # GH8667 + from datetime import time + from pandas.index import _SIZE_CUTOFF + + ns = _SIZE_CUTOFF + np.array([-100, 100], dtype=np.int64) + key = time(15, 11, 30) + start = key.hour * 3600 + key.minute * 60 + key.second + step = 24 * 3600 + + for n in ns: + idx = pd.date_range('2014-11-26', periods=n, freq='S') + ts = pd.Series(np.random.randn(n), index=idx) + i = np.arange(start, n, step) + + tm.assert_numpy_array_equal(ts.index.get_loc(key), i, + check_dtype=False) + tm.assert_series_equal(ts[key], ts.iloc[i]) + + left, right = ts.copy(), ts.copy() + left[key] *= -10 + right.iloc[i] *= -10 + tm.assert_series_equal(left, right) + + def test_time_overflow_for_32bit_machines(self): + # GH8943. On some machines NumPy defaults to np.int32 (for example, + # 32-bit Linux machines). In the function _generate_regular_range + # found in tseries/index.py, `periods` gets multiplied by `strides` + # (which has value 1e9) and since the max value for np.int32 is ~2e9, + # and since those machines won't promote np.int32 to np.int64, we get + # overflow. + periods = np.int_(1000) + + idx1 = pd.date_range(start='2000', periods=periods, freq='S') + self.assertEqual(len(idx1), periods) + + idx2 = pd.date_range(end='2000', periods=periods, freq='S') + self.assertEqual(len(idx2), periods) + + def test_nat(self): + self.assertIs(DatetimeIndex([np.nan])[0], pd.NaT) + + def test_ufunc_coercions(self): + idx = date_range('2011-01-01', periods=3, freq='2D', name='x') + + delta = np.timedelta64(1, 'D') + for result in [idx + delta, np.add(idx, delta)]: + tm.assertIsInstance(result, DatetimeIndex) + exp = date_range('2011-01-02', periods=3, freq='2D', name='x') + tm.assert_index_equal(result, exp) + self.assertEqual(result.freq, '2D') + + for result in [idx - delta, np.subtract(idx, delta)]: + tm.assertIsInstance(result, DatetimeIndex) + exp = date_range('2010-12-31', periods=3, freq='2D', name='x') + tm.assert_index_equal(result, exp) + self.assertEqual(result.freq, '2D') + + delta = np.array([np.timedelta64(1, 'D'), np.timedelta64(2, 'D'), + np.timedelta64(3, 'D')]) + for result in [idx + delta, np.add(idx, delta)]: + tm.assertIsInstance(result, DatetimeIndex) + exp = DatetimeIndex(['2011-01-02', '2011-01-05', '2011-01-08'], + freq='3D', name='x') + tm.assert_index_equal(result, exp) + self.assertEqual(result.freq, '3D') + + for result in [idx - delta, np.subtract(idx, delta)]: + tm.assertIsInstance(result, DatetimeIndex) + exp = DatetimeIndex(['2010-12-31', '2011-01-01', '2011-01-02'], + freq='D', name='x') + tm.assert_index_equal(result, exp) + self.assertEqual(result.freq, 'D') + + def test_week_of_month_frequency(self): + # GH 5348: "ValueError: Could not evaluate WOM-1SUN" shouldn't raise + d1 = date(2002, 9, 1) + d2 = date(2013, 10, 27) + d3 = date(2012, 9, 30) + idx1 = DatetimeIndex([d1, d2]) + idx2 = DatetimeIndex([d3]) + result_append = idx1.append(idx2) + expected = DatetimeIndex([d1, d2, d3]) + tm.assert_index_equal(result_append, expected) + result_union = idx1.union(idx2) + expected = DatetimeIndex([d1, d3, d2]) + tm.assert_index_equal(result_union, expected) + + # GH 5115 + result = date_range("2013-1-1", periods=4, freq='WOM-1SAT') + dates = ['2013-01-05', '2013-02-02', '2013-03-02', '2013-04-06'] + expected = DatetimeIndex(dates, freq='WOM-1SAT') + tm.assert_index_equal(result, expected) + + def test_hash_error(self): + index = date_range('20010101', periods=10) + with tm.assertRaisesRegexp(TypeError, "unhashable type: %r" % + type(index).__name__): + hash(index) + + def test_stringified_slice_with_tz(self): + # GH2658 + import datetime + start = datetime.datetime.now() + idx = DatetimeIndex(start=start, freq="1d", periods=10) + df = DataFrame(lrange(10), index=idx) + df["2013-01-14 23:44:34.437768-05:00":] # no exception here + + def test_append_join_nondatetimeindex(self): + rng = date_range('1/1/2000', periods=10) + idx = Index(['a', 'b', 'c', 'd']) + + result = rng.append(idx) + tm.assertIsInstance(result[0], Timestamp) + + # it works + rng.join(idx, how='outer') + + def test_to_period_nofreq(self): + idx = DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-04']) + self.assertRaises(ValueError, idx.to_period) + + idx = DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-03'], + freq='infer') + self.assertEqual(idx.freqstr, 'D') + expected = pd.PeriodIndex(['2000-01-01', '2000-01-02', + '2000-01-03'], freq='D') + tm.assert_index_equal(idx.to_period(), expected) + + # GH 7606 + idx = DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-03']) + self.assertEqual(idx.freqstr, None) + tm.assert_index_equal(idx.to_period(), expected) + + def test_comparisons_coverage(self): + rng = date_range('1/1/2000', periods=10) + + # raise TypeError for now + self.assertRaises(TypeError, rng.__lt__, rng[3].value) + + result = rng == list(rng) + exp = rng == rng + self.assert_numpy_array_equal(result, exp) + + def test_comparisons_nat(self): + + fidx1 = pd.Index([1.0, np.nan, 3.0, np.nan, 5.0, 7.0]) + fidx2 = pd.Index([2.0, 3.0, np.nan, np.nan, 6.0, 7.0]) + + didx1 = pd.DatetimeIndex(['2014-01-01', pd.NaT, '2014-03-01', pd.NaT, + '2014-05-01', '2014-07-01']) + didx2 = pd.DatetimeIndex(['2014-02-01', '2014-03-01', pd.NaT, pd.NaT, + '2014-06-01', '2014-07-01']) + darr = np.array([np_datetime64_compat('2014-02-01 00:00Z'), + np_datetime64_compat('2014-03-01 00:00Z'), + np_datetime64_compat('nat'), np.datetime64('nat'), + np_datetime64_compat('2014-06-01 00:00Z'), + np_datetime64_compat('2014-07-01 00:00Z')]) + + if _np_version_under1p8: + # cannot test array because np.datetime('nat') returns today's date + cases = [(fidx1, fidx2), (didx1, didx2)] + else: + cases = [(fidx1, fidx2), (didx1, didx2), (didx1, darr)] + + # Check pd.NaT is handles as the same as np.nan + with tm.assert_produces_warning(None): + for idx1, idx2 in cases: + + result = idx1 < idx2 + expected = np.array([True, False, False, False, True, False]) + self.assert_numpy_array_equal(result, expected) + + result = idx2 > idx1 + expected = np.array([True, False, False, False, True, False]) + self.assert_numpy_array_equal(result, expected) + + result = idx1 <= idx2 + expected = np.array([True, False, False, False, True, True]) + self.assert_numpy_array_equal(result, expected) + + result = idx2 >= idx1 + expected = np.array([True, False, False, False, True, True]) + self.assert_numpy_array_equal(result, expected) + + result = idx1 == idx2 + expected = np.array([False, False, False, False, False, True]) + self.assert_numpy_array_equal(result, expected) + + result = idx1 != idx2 + expected = np.array([True, True, True, True, True, False]) + self.assert_numpy_array_equal(result, expected) + + with tm.assert_produces_warning(None): + for idx1, val in [(fidx1, np.nan), (didx1, pd.NaT)]: + result = idx1 < val + expected = np.array([False, False, False, False, False, False]) + self.assert_numpy_array_equal(result, expected) + result = idx1 > val + self.assert_numpy_array_equal(result, expected) + + result = idx1 <= val + self.assert_numpy_array_equal(result, expected) + result = idx1 >= val + self.assert_numpy_array_equal(result, expected) + + result = idx1 == val + self.assert_numpy_array_equal(result, expected) + + result = idx1 != val + expected = np.array([True, True, True, True, True, True]) + self.assert_numpy_array_equal(result, expected) + + # Check pd.NaT is handles as the same as np.nan + with tm.assert_produces_warning(None): + for idx1, val in [(fidx1, 3), (didx1, datetime(2014, 3, 1))]: + result = idx1 < val + expected = np.array([True, False, False, False, False, False]) + self.assert_numpy_array_equal(result, expected) + result = idx1 > val + expected = np.array([False, False, False, False, True, True]) + self.assert_numpy_array_equal(result, expected) + + result = idx1 <= val + expected = np.array([True, False, True, False, False, False]) + self.assert_numpy_array_equal(result, expected) + result = idx1 >= val + expected = np.array([False, False, True, False, True, True]) + self.assert_numpy_array_equal(result, expected) + + result = idx1 == val + expected = np.array([False, False, True, False, False, False]) + self.assert_numpy_array_equal(result, expected) + + result = idx1 != val + expected = np.array([True, True, False, True, True, True]) + self.assert_numpy_array_equal(result, expected) + + def test_map(self): + rng = date_range('1/1/2000', periods=10) + + f = lambda x: x.strftime('%Y%m%d') + result = rng.map(f) + exp = Index([f(x) for x in rng], dtype='= -1') + with tm.assertRaisesRegexp(ValueError, msg): + idx.take(np.array([1, 0, -2]), fill_value=True) + with tm.assertRaisesRegexp(ValueError, msg): + idx.take(np.array([1, 0, -5]), fill_value=True) + + with tm.assertRaises(IndexError): + idx.take(np.array([1, -5])) + + def test_take_fill_value_with_timezone(self): + idx = pd.DatetimeIndex(['2011-01-01', '2011-02-01', '2011-03-01'], + name='xxx', tz='US/Eastern') + result = idx.take(np.array([1, 0, -1])) + expected = pd.DatetimeIndex(['2011-02-01', '2011-01-01', '2011-03-01'], + name='xxx', tz='US/Eastern') + tm.assert_index_equal(result, expected) + + # fill_value + result = idx.take(np.array([1, 0, -1]), fill_value=True) + expected = pd.DatetimeIndex(['2011-02-01', '2011-01-01', 'NaT'], + name='xxx', tz='US/Eastern') + tm.assert_index_equal(result, expected) + + # allow_fill=False + result = idx.take(np.array([1, 0, -1]), allow_fill=False, + fill_value=True) + expected = pd.DatetimeIndex(['2011-02-01', '2011-01-01', '2011-03-01'], + name='xxx', tz='US/Eastern') + tm.assert_index_equal(result, expected) + + msg = ('When allow_fill=True and fill_value is not None, ' + 'all indices must be >= -1') + with tm.assertRaisesRegexp(ValueError, msg): + idx.take(np.array([1, 0, -2]), fill_value=True) + with tm.assertRaisesRegexp(ValueError, msg): + idx.take(np.array([1, 0, -5]), fill_value=True) + + with tm.assertRaises(IndexError): + idx.take(np.array([1, -5])) + + def test_map_bug_1677(self): + index = DatetimeIndex(['2012-04-25 09:30:00.393000']) + f = index.asof + + result = index.map(f) + expected = Index([f(index[0])]) + tm.assert_index_equal(result, expected) + + def test_groupby_function_tuple_1677(self): + df = DataFrame(np.random.rand(100), + index=date_range("1/1/2000", periods=100)) + monthly_group = df.groupby(lambda x: (x.year, x.month)) + + result = monthly_group.mean() + tm.assertIsInstance(result.index[0], tuple) + + def test_append_numpy_bug_1681(self): + # another datetime64 bug + dr = date_range('2011/1/1', '2012/1/1', freq='W-FRI') + a = DataFrame() + c = DataFrame({'A': 'foo', 'B': dr}, index=dr) + + result = a.append(c) + self.assertTrue((result['B'] == dr).all()) + + def test_isin(self): + index = tm.makeDateIndex(4) + result = index.isin(index) + self.assertTrue(result.all()) + + result = index.isin(list(index)) + self.assertTrue(result.all()) + + assert_almost_equal(index.isin([index[2], 5]), + np.array([False, False, True, False])) + + def test_time(self): + rng = pd.date_range('1/1/2000', freq='12min', periods=10) + result = pd.Index(rng).time + expected = [t.time() for t in rng] + self.assertTrue((result == expected).all()) + + def test_date(self): + rng = pd.date_range('1/1/2000', freq='12H', periods=10) + result = pd.Index(rng).date + expected = [t.date() for t in rng] + self.assertTrue((result == expected).all()) + + def test_does_not_convert_mixed_integer(self): + df = tm.makeCustomDataframe(10, 10, + data_gen_f=lambda *args, **kwargs: randn(), + r_idx_type='i', c_idx_type='dt') + cols = df.columns.join(df.index, how='outer') + joined = cols.join(df.columns) + self.assertEqual(cols.dtype, np.dtype('O')) + self.assertEqual(cols.dtype, joined.dtype) + tm.assert_numpy_array_equal(cols.values, joined.values) + + def test_slice_keeps_name(self): + # GH4226 + st = pd.Timestamp('2013-07-01 00:00:00', tz='America/Los_Angeles') + et = pd.Timestamp('2013-07-02 00:00:00', tz='America/Los_Angeles') + dr = pd.date_range(st, et, freq='H', name='timebucket') + self.assertEqual(dr[1:].name, dr.name) + + def test_join_self(self): + index = date_range('1/1/2000', periods=10) + kinds = 'outer', 'inner', 'left', 'right' + for kind in kinds: + joined = index.join(index, how=kind) + self.assertIs(index, joined) + + def assert_index_parameters(self, index): + assert index.freq == '40960N' + assert index.inferred_freq == '40960N' + + def test_ns_index(self): + nsamples = 400 + ns = int(1e9 / 24414) + dtstart = np.datetime64('2012-09-20T00:00:00') + + dt = dtstart + np.arange(nsamples) * np.timedelta64(ns, 'ns') + freq = ns * offsets.Nano() + index = pd.DatetimeIndex(dt, freq=freq, name='time') + self.assert_index_parameters(index) + + new_index = pd.DatetimeIndex(start=index[0], end=index[-1], + freq=index.freq) + self.assert_index_parameters(new_index) + + def test_join_with_period_index(self): + df = tm.makeCustomDataframe( + 10, 10, data_gen_f=lambda *args: np.random.randint(2), + c_idx_type='p', r_idx_type='dt') + s = df.iloc[:5, 0] + joins = 'left', 'right', 'inner', 'outer' + + for join in joins: + with tm.assertRaisesRegexp(ValueError, 'can only call with other ' + 'PeriodIndex-ed objects'): + df.columns.join(s.index, how=join) + + def test_factorize(self): + idx1 = DatetimeIndex(['2014-01', '2014-01', '2014-02', '2014-02', + '2014-03', '2014-03']) + + exp_arr = np.array([0, 0, 1, 1, 2, 2], dtype=np.intp) + exp_idx = DatetimeIndex(['2014-01', '2014-02', '2014-03']) + + arr, idx = idx1.factorize() + self.assert_numpy_array_equal(arr, exp_arr) + tm.assert_index_equal(idx, exp_idx) + + arr, idx = idx1.factorize(sort=True) + self.assert_numpy_array_equal(arr, exp_arr) + tm.assert_index_equal(idx, exp_idx) + + # tz must be preserved + idx1 = idx1.tz_localize('Asia/Tokyo') + exp_idx = exp_idx.tz_localize('Asia/Tokyo') + + arr, idx = idx1.factorize() + self.assert_numpy_array_equal(arr, exp_arr) + tm.assert_index_equal(idx, exp_idx) + + idx2 = pd.DatetimeIndex(['2014-03', '2014-03', '2014-02', '2014-01', + '2014-03', '2014-01']) + + exp_arr = np.array([2, 2, 1, 0, 2, 0], dtype=np.intp) + exp_idx = DatetimeIndex(['2014-01', '2014-02', '2014-03']) + arr, idx = idx2.factorize(sort=True) + self.assert_numpy_array_equal(arr, exp_arr) + tm.assert_index_equal(idx, exp_idx) + + exp_arr = np.array([0, 0, 1, 2, 0, 2], dtype=np.intp) + exp_idx = DatetimeIndex(['2014-03', '2014-02', '2014-01']) + arr, idx = idx2.factorize() + self.assert_numpy_array_equal(arr, exp_arr) + tm.assert_index_equal(idx, exp_idx) + + # freq must be preserved + idx3 = date_range('2000-01', periods=4, freq='M', tz='Asia/Tokyo') + exp_arr = np.array([0, 1, 2, 3], dtype=np.intp) + arr, idx = idx3.factorize() + self.assert_numpy_array_equal(arr, exp_arr) + tm.assert_index_equal(idx, idx3) + + def test_factorize_tz(self): + # GH 13750 + for tz in [None, 'UTC', 'US/Eastern', 'Asia/Tokyo']: + base = pd.date_range('2016-11-05', freq='H', periods=100, tz=tz) + idx = base.repeat(5) + + exp_arr = np.arange(100, dtype=np.intp).repeat(5) + + for obj in [idx, pd.Series(idx)]: + arr, res = obj.factorize() + self.assert_numpy_array_equal(arr, exp_arr) + tm.assert_index_equal(res, base) + + def test_factorize_dst(self): + # GH 13750 + idx = pd.date_range('2016-11-06', freq='H', periods=12, + tz='US/Eastern') + + for obj in [idx, pd.Series(idx)]: + arr, res = obj.factorize() + self.assert_numpy_array_equal(arr, np.arange(12, dtype=np.intp)) + tm.assert_index_equal(res, idx) + + idx = pd.date_range('2016-06-13', freq='H', periods=12, + tz='US/Eastern') + + for obj in [idx, pd.Series(idx)]: + arr, res = obj.factorize() + self.assert_numpy_array_equal(arr, np.arange(12, dtype=np.intp)) + tm.assert_index_equal(res, idx) + + def test_slice_with_negative_step(self): + ts = Series(np.arange(20), + date_range('2014-01-01', periods=20, freq='MS')) + SLC = pd.IndexSlice + + def assert_slices_equivalent(l_slc, i_slc): + assert_series_equal(ts[l_slc], ts.iloc[i_slc]) + assert_series_equal(ts.loc[l_slc], ts.iloc[i_slc]) + assert_series_equal(ts.loc[l_slc], ts.iloc[i_slc]) + + assert_slices_equivalent(SLC[Timestamp('2014-10-01')::-1], SLC[9::-1]) + assert_slices_equivalent(SLC['2014-10-01'::-1], SLC[9::-1]) + + assert_slices_equivalent(SLC[:Timestamp('2014-10-01'):-1], SLC[:8:-1]) + assert_slices_equivalent(SLC[:'2014-10-01':-1], SLC[:8:-1]) + + assert_slices_equivalent(SLC['2015-02-01':'2014-10-01':-1], + SLC[13:8:-1]) + assert_slices_equivalent(SLC[Timestamp('2015-02-01'):Timestamp( + '2014-10-01'):-1], SLC[13:8:-1]) + assert_slices_equivalent(SLC['2015-02-01':Timestamp('2014-10-01'):-1], + SLC[13:8:-1]) + assert_slices_equivalent(SLC[Timestamp('2015-02-01'):'2014-10-01':-1], + SLC[13:8:-1]) + + assert_slices_equivalent(SLC['2014-10-01':'2015-02-01':-1], SLC[:0]) + + def test_slice_with_zero_step_raises(self): + ts = Series(np.arange(20), + date_range('2014-01-01', periods=20, freq='MS')) + self.assertRaisesRegexp(ValueError, 'slice step cannot be zero', + lambda: ts[::0]) + self.assertRaisesRegexp(ValueError, 'slice step cannot be zero', + lambda: ts.loc[::0]) + self.assertRaisesRegexp(ValueError, 'slice step cannot be zero', + lambda: ts.loc[::0]) + + def test_slice_bounds_empty(self): + # GH 14354 + empty_idx = DatetimeIndex(freq='1H', periods=0, end='2015') + + right = empty_idx._maybe_cast_slice_bound('2015-01-02', 'right', 'loc') + exp = Timestamp('2015-01-02 23:59:59.999999999') + self.assertEqual(right, exp) + + left = empty_idx._maybe_cast_slice_bound('2015-01-02', 'left', 'loc') + exp = Timestamp('2015-01-02 00:00:00') + self.assertEqual(left, exp) diff --git a/pandas/tests/indexes/datetimes/test_indexing.py b/pandas/tests/indexes/datetimes/test_indexing.py new file mode 100644 index 0000000000000..5b6bcffe71856 --- /dev/null +++ b/pandas/tests/indexes/datetimes/test_indexing.py @@ -0,0 +1,244 @@ +import numpy as np + +import pandas as pd +import pandas.util.testing as tm +import pandas.compat as compat +from pandas import notnull, Index, DatetimeIndex, datetime, date_range + + +class TestDatetimeIndex(tm.TestCase): + _multiprocess_can_split_ = True + + def test_where_other(self): + + # other is ndarray or Index + i = pd.date_range('20130101', periods=3, tz='US/Eastern') + + for arr in [np.nan, pd.NaT]: + result = i.where(notnull(i), other=np.nan) + expected = i + tm.assert_index_equal(result, expected) + + i2 = i.copy() + i2 = Index([pd.NaT, pd.NaT] + i[2:].tolist()) + result = i.where(notnull(i2), i2) + tm.assert_index_equal(result, i2) + + i2 = i.copy() + i2 = Index([pd.NaT, pd.NaT] + i[2:].tolist()) + result = i.where(notnull(i2), i2.values) + tm.assert_index_equal(result, i2) + + def test_where_tz(self): + i = pd.date_range('20130101', periods=3, tz='US/Eastern') + result = i.where(notnull(i)) + expected = i + tm.assert_index_equal(result, expected) + + i2 = i.copy() + i2 = Index([pd.NaT, pd.NaT] + i[2:].tolist()) + result = i.where(notnull(i2)) + expected = i2 + tm.assert_index_equal(result, expected) + + def test_insert(self): + idx = DatetimeIndex( + ['2000-01-04', '2000-01-01', '2000-01-02'], name='idx') + + result = idx.insert(2, datetime(2000, 1, 5)) + exp = DatetimeIndex(['2000-01-04', '2000-01-01', '2000-01-05', + '2000-01-02'], name='idx') + tm.assert_index_equal(result, exp) + + # insertion of non-datetime should coerce to object index + result = idx.insert(1, 'inserted') + expected = Index([datetime(2000, 1, 4), 'inserted', + datetime(2000, 1, 1), + datetime(2000, 1, 2)], name='idx') + self.assertNotIsInstance(result, DatetimeIndex) + tm.assert_index_equal(result, expected) + self.assertEqual(result.name, expected.name) + + idx = date_range('1/1/2000', periods=3, freq='M', name='idx') + + # preserve freq + expected_0 = DatetimeIndex(['1999-12-31', '2000-01-31', '2000-02-29', + '2000-03-31'], name='idx', freq='M') + expected_3 = DatetimeIndex(['2000-01-31', '2000-02-29', '2000-03-31', + '2000-04-30'], name='idx', freq='M') + + # reset freq to None + expected_1_nofreq = DatetimeIndex(['2000-01-31', '2000-01-31', + '2000-02-29', + '2000-03-31'], name='idx', + freq=None) + expected_3_nofreq = DatetimeIndex(['2000-01-31', '2000-02-29', + '2000-03-31', + '2000-01-02'], name='idx', + freq=None) + + cases = [(0, datetime(1999, 12, 31), expected_0), + (-3, datetime(1999, 12, 31), expected_0), + (3, datetime(2000, 4, 30), expected_3), + (1, datetime(2000, 1, 31), expected_1_nofreq), + (3, datetime(2000, 1, 2), expected_3_nofreq)] + + for n, d, expected in cases: + result = idx.insert(n, d) + tm.assert_index_equal(result, expected) + self.assertEqual(result.name, expected.name) + self.assertEqual(result.freq, expected.freq) + + # reset freq to None + result = idx.insert(3, datetime(2000, 1, 2)) + expected = DatetimeIndex(['2000-01-31', '2000-02-29', '2000-03-31', + '2000-01-02'], name='idx', freq=None) + tm.assert_index_equal(result, expected) + self.assertEqual(result.name, expected.name) + self.assertTrue(result.freq is None) + + # GH 7299 + tm._skip_if_no_pytz() + import pytz + + idx = date_range('1/1/2000', periods=3, freq='D', tz='Asia/Tokyo', + name='idx') + with tm.assertRaises(ValueError): + result = idx.insert(3, pd.Timestamp('2000-01-04')) + with tm.assertRaises(ValueError): + result = idx.insert(3, datetime(2000, 1, 4)) + with tm.assertRaises(ValueError): + result = idx.insert(3, pd.Timestamp('2000-01-04', tz='US/Eastern')) + with tm.assertRaises(ValueError): + result = idx.insert(3, + datetime(2000, 1, 4, + tzinfo=pytz.timezone('US/Eastern'))) + + for tz in ['US/Pacific', 'Asia/Singapore']: + idx = date_range('1/1/2000 09:00', periods=6, freq='H', tz=tz, + name='idx') + # preserve freq + expected = date_range('1/1/2000 09:00', periods=7, freq='H', tz=tz, + name='idx') + for d in [pd.Timestamp('2000-01-01 15:00', tz=tz), + pytz.timezone(tz).localize(datetime(2000, 1, 1, 15))]: + + result = idx.insert(6, d) + tm.assert_index_equal(result, expected) + self.assertEqual(result.name, expected.name) + self.assertEqual(result.freq, expected.freq) + self.assertEqual(result.tz, expected.tz) + + expected = DatetimeIndex(['2000-01-01 09:00', '2000-01-01 10:00', + '2000-01-01 11:00', + '2000-01-01 12:00', '2000-01-01 13:00', + '2000-01-01 14:00', + '2000-01-01 10:00'], name='idx', + tz=tz, freq=None) + # reset freq to None + for d in [pd.Timestamp('2000-01-01 10:00', tz=tz), + pytz.timezone(tz).localize(datetime(2000, 1, 1, 10))]: + result = idx.insert(6, d) + tm.assert_index_equal(result, expected) + self.assertEqual(result.name, expected.name) + self.assertTrue(result.freq is None) + self.assertEqual(result.tz, expected.tz) + + def test_delete(self): + idx = date_range(start='2000-01-01', periods=5, freq='M', name='idx') + + # prserve freq + expected_0 = date_range(start='2000-02-01', periods=4, freq='M', + name='idx') + expected_4 = date_range(start='2000-01-01', periods=4, freq='M', + name='idx') + + # reset freq to None + expected_1 = DatetimeIndex(['2000-01-31', '2000-03-31', '2000-04-30', + '2000-05-31'], freq=None, name='idx') + + cases = {0: expected_0, + -5: expected_0, + -1: expected_4, + 4: expected_4, + 1: expected_1} + for n, expected in compat.iteritems(cases): + result = idx.delete(n) + tm.assert_index_equal(result, expected) + self.assertEqual(result.name, expected.name) + self.assertEqual(result.freq, expected.freq) + + with tm.assertRaises((IndexError, ValueError)): + # either depeidnig on numpy version + result = idx.delete(5) + + for tz in [None, 'Asia/Tokyo', 'US/Pacific']: + idx = date_range(start='2000-01-01 09:00', periods=10, freq='H', + name='idx', tz=tz) + + expected = date_range(start='2000-01-01 10:00', periods=9, + freq='H', name='idx', tz=tz) + result = idx.delete(0) + tm.assert_index_equal(result, expected) + self.assertEqual(result.name, expected.name) + self.assertEqual(result.freqstr, 'H') + self.assertEqual(result.tz, expected.tz) + + expected = date_range(start='2000-01-01 09:00', periods=9, + freq='H', name='idx', tz=tz) + result = idx.delete(-1) + tm.assert_index_equal(result, expected) + self.assertEqual(result.name, expected.name) + self.assertEqual(result.freqstr, 'H') + self.assertEqual(result.tz, expected.tz) + + def test_delete_slice(self): + idx = date_range(start='2000-01-01', periods=10, freq='D', name='idx') + + # prserve freq + expected_0_2 = date_range(start='2000-01-04', periods=7, freq='D', + name='idx') + expected_7_9 = date_range(start='2000-01-01', periods=7, freq='D', + name='idx') + + # reset freq to None + expected_3_5 = DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-03', + '2000-01-07', '2000-01-08', '2000-01-09', + '2000-01-10'], freq=None, name='idx') + + cases = {(0, 1, 2): expected_0_2, + (7, 8, 9): expected_7_9, + (3, 4, 5): expected_3_5} + for n, expected in compat.iteritems(cases): + result = idx.delete(n) + tm.assert_index_equal(result, expected) + self.assertEqual(result.name, expected.name) + self.assertEqual(result.freq, expected.freq) + + result = idx.delete(slice(n[0], n[-1] + 1)) + tm.assert_index_equal(result, expected) + self.assertEqual(result.name, expected.name) + self.assertEqual(result.freq, expected.freq) + + for tz in [None, 'Asia/Tokyo', 'US/Pacific']: + ts = pd.Series(1, index=pd.date_range( + '2000-01-01 09:00', periods=10, freq='H', name='idx', tz=tz)) + # preserve freq + result = ts.drop(ts.index[:5]).index + expected = pd.date_range('2000-01-01 14:00', periods=5, freq='H', + name='idx', tz=tz) + tm.assert_index_equal(result, expected) + self.assertEqual(result.name, expected.name) + self.assertEqual(result.freq, expected.freq) + self.assertEqual(result.tz, expected.tz) + + # reset freq to None + result = ts.drop(ts.index[[1, 3, 5, 7, 9]]).index + expected = DatetimeIndex(['2000-01-01 09:00', '2000-01-01 11:00', + '2000-01-01 13:00', + '2000-01-01 15:00', '2000-01-01 17:00'], + freq=None, name='idx', tz=tz) + tm.assert_index_equal(result, expected) + self.assertEqual(result.name, expected.name) + self.assertEqual(result.freq, expected.freq) + self.assertEqual(result.tz, expected.tz) diff --git a/pandas/tests/indexes/datetimes/test_misc.py b/pandas/tests/indexes/datetimes/test_misc.py new file mode 100644 index 0000000000000..3dfe95fa77b85 --- /dev/null +++ b/pandas/tests/indexes/datetimes/test_misc.py @@ -0,0 +1,333 @@ +import numpy as np + +import pandas.lib as lib +import pandas.util.testing as tm +from pandas import Float64Index, date_range, Timestamp +from pandas import (Index, DatetimeIndex, datetime, offsets, to_datetime, + Series, DataFrame) + + +class TestDateTimeIndexToJulianDate(tm.TestCase): + + def test_1700(self): + r1 = Float64Index([2345897.5, 2345898.5, 2345899.5, 2345900.5, + 2345901.5]) + r2 = date_range(start=Timestamp('1710-10-01'), periods=5, + freq='D').to_julian_date() + self.assertIsInstance(r2, Float64Index) + tm.assert_index_equal(r1, r2) + + def test_2000(self): + r1 = Float64Index([2451601.5, 2451602.5, 2451603.5, 2451604.5, + 2451605.5]) + r2 = date_range(start=Timestamp('2000-02-27'), periods=5, + freq='D').to_julian_date() + self.assertIsInstance(r2, Float64Index) + tm.assert_index_equal(r1, r2) + + def test_hour(self): + r1 = Float64Index( + [2451601.5, 2451601.5416666666666666, 2451601.5833333333333333, + 2451601.625, 2451601.6666666666666666]) + r2 = date_range(start=Timestamp('2000-02-27'), periods=5, + freq='H').to_julian_date() + self.assertIsInstance(r2, Float64Index) + tm.assert_index_equal(r1, r2) + + def test_minute(self): + r1 = Float64Index( + [2451601.5, 2451601.5006944444444444, 2451601.5013888888888888, + 2451601.5020833333333333, 2451601.5027777777777777]) + r2 = date_range(start=Timestamp('2000-02-27'), periods=5, + freq='T').to_julian_date() + self.assertIsInstance(r2, Float64Index) + tm.assert_index_equal(r1, r2) + + def test_second(self): + r1 = Float64Index( + [2451601.5, 2451601.500011574074074, 2451601.5000231481481481, + 2451601.5000347222222222, 2451601.5000462962962962]) + r2 = date_range(start=Timestamp('2000-02-27'), periods=5, + freq='S').to_julian_date() + self.assertIsInstance(r2, Float64Index) + tm.assert_index_equal(r1, r2) + + +class TestTimeSeries(tm.TestCase): + _multiprocess_can_split_ = True + + def test_pass_datetimeindex_to_index(self): + # Bugs in #1396 + rng = date_range('1/1/2000', '3/1/2000') + idx = Index(rng, dtype=object) + + expected = Index(rng.to_pydatetime(), dtype=object) + + self.assert_numpy_array_equal(idx.values, expected.values) + + +class TestDatetime64(tm.TestCase): + + def test_datetimeindex_accessors(self): + dti = DatetimeIndex(freq='D', start=datetime(1998, 1, 1), periods=365) + + self.assertEqual(dti.year[0], 1998) + self.assertEqual(dti.month[0], 1) + self.assertEqual(dti.day[0], 1) + self.assertEqual(dti.hour[0], 0) + self.assertEqual(dti.minute[0], 0) + self.assertEqual(dti.second[0], 0) + self.assertEqual(dti.microsecond[0], 0) + self.assertEqual(dti.dayofweek[0], 3) + + self.assertEqual(dti.dayofyear[0], 1) + self.assertEqual(dti.dayofyear[120], 121) + + self.assertEqual(dti.weekofyear[0], 1) + self.assertEqual(dti.weekofyear[120], 18) + + self.assertEqual(dti.quarter[0], 1) + self.assertEqual(dti.quarter[120], 2) + + self.assertEqual(dti.days_in_month[0], 31) + self.assertEqual(dti.days_in_month[90], 30) + + self.assertEqual(dti.is_month_start[0], True) + self.assertEqual(dti.is_month_start[1], False) + self.assertEqual(dti.is_month_start[31], True) + self.assertEqual(dti.is_quarter_start[0], True) + self.assertEqual(dti.is_quarter_start[90], True) + self.assertEqual(dti.is_year_start[0], True) + self.assertEqual(dti.is_year_start[364], False) + self.assertEqual(dti.is_month_end[0], False) + self.assertEqual(dti.is_month_end[30], True) + self.assertEqual(dti.is_month_end[31], False) + self.assertEqual(dti.is_month_end[364], True) + self.assertEqual(dti.is_quarter_end[0], False) + self.assertEqual(dti.is_quarter_end[30], False) + self.assertEqual(dti.is_quarter_end[89], True) + self.assertEqual(dti.is_quarter_end[364], True) + self.assertEqual(dti.is_year_end[0], False) + self.assertEqual(dti.is_year_end[364], True) + + # GH 11128 + self.assertEqual(dti.weekday_name[4], u'Monday') + self.assertEqual(dti.weekday_name[5], u'Tuesday') + self.assertEqual(dti.weekday_name[6], u'Wednesday') + self.assertEqual(dti.weekday_name[7], u'Thursday') + self.assertEqual(dti.weekday_name[8], u'Friday') + self.assertEqual(dti.weekday_name[9], u'Saturday') + self.assertEqual(dti.weekday_name[10], u'Sunday') + + self.assertEqual(Timestamp('2016-04-04').weekday_name, u'Monday') + self.assertEqual(Timestamp('2016-04-05').weekday_name, u'Tuesday') + self.assertEqual(Timestamp('2016-04-06').weekday_name, u'Wednesday') + self.assertEqual(Timestamp('2016-04-07').weekday_name, u'Thursday') + self.assertEqual(Timestamp('2016-04-08').weekday_name, u'Friday') + self.assertEqual(Timestamp('2016-04-09').weekday_name, u'Saturday') + self.assertEqual(Timestamp('2016-04-10').weekday_name, u'Sunday') + + self.assertEqual(len(dti.year), 365) + self.assertEqual(len(dti.month), 365) + self.assertEqual(len(dti.day), 365) + self.assertEqual(len(dti.hour), 365) + self.assertEqual(len(dti.minute), 365) + self.assertEqual(len(dti.second), 365) + self.assertEqual(len(dti.microsecond), 365) + self.assertEqual(len(dti.dayofweek), 365) + self.assertEqual(len(dti.dayofyear), 365) + self.assertEqual(len(dti.weekofyear), 365) + self.assertEqual(len(dti.quarter), 365) + self.assertEqual(len(dti.is_month_start), 365) + self.assertEqual(len(dti.is_month_end), 365) + self.assertEqual(len(dti.is_quarter_start), 365) + self.assertEqual(len(dti.is_quarter_end), 365) + self.assertEqual(len(dti.is_year_start), 365) + self.assertEqual(len(dti.is_year_end), 365) + self.assertEqual(len(dti.weekday_name), 365) + + dti = DatetimeIndex(freq='BQ-FEB', start=datetime(1998, 1, 1), + periods=4) + + self.assertEqual(sum(dti.is_quarter_start), 0) + self.assertEqual(sum(dti.is_quarter_end), 4) + self.assertEqual(sum(dti.is_year_start), 0) + self.assertEqual(sum(dti.is_year_end), 1) + + # Ensure is_start/end accessors throw ValueError for CustomBusinessDay, + # CBD requires np >= 1.7 + bday_egypt = offsets.CustomBusinessDay(weekmask='Sun Mon Tue Wed Thu') + dti = date_range(datetime(2013, 4, 30), periods=5, freq=bday_egypt) + self.assertRaises(ValueError, lambda: dti.is_month_start) + + dti = DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-03']) + + self.assertEqual(dti.is_month_start[0], 1) + + tests = [ + (Timestamp('2013-06-01', freq='M').is_month_start, 1), + (Timestamp('2013-06-01', freq='BM').is_month_start, 0), + (Timestamp('2013-06-03', freq='M').is_month_start, 0), + (Timestamp('2013-06-03', freq='BM').is_month_start, 1), + (Timestamp('2013-02-28', freq='Q-FEB').is_month_end, 1), + (Timestamp('2013-02-28', freq='Q-FEB').is_quarter_end, 1), + (Timestamp('2013-02-28', freq='Q-FEB').is_year_end, 1), + (Timestamp('2013-03-01', freq='Q-FEB').is_month_start, 1), + (Timestamp('2013-03-01', freq='Q-FEB').is_quarter_start, 1), + (Timestamp('2013-03-01', freq='Q-FEB').is_year_start, 1), + (Timestamp('2013-03-31', freq='QS-FEB').is_month_end, 1), + (Timestamp('2013-03-31', freq='QS-FEB').is_quarter_end, 0), + (Timestamp('2013-03-31', freq='QS-FEB').is_year_end, 0), + (Timestamp('2013-02-01', freq='QS-FEB').is_month_start, 1), + (Timestamp('2013-02-01', freq='QS-FEB').is_quarter_start, 1), + (Timestamp('2013-02-01', freq='QS-FEB').is_year_start, 1), + (Timestamp('2013-06-30', freq='BQ').is_month_end, 0), + (Timestamp('2013-06-30', freq='BQ').is_quarter_end, 0), + (Timestamp('2013-06-30', freq='BQ').is_year_end, 0), + (Timestamp('2013-06-28', freq='BQ').is_month_end, 1), + (Timestamp('2013-06-28', freq='BQ').is_quarter_end, 1), + (Timestamp('2013-06-28', freq='BQ').is_year_end, 0), + (Timestamp('2013-06-30', freq='BQS-APR').is_month_end, 0), + (Timestamp('2013-06-30', freq='BQS-APR').is_quarter_end, 0), + (Timestamp('2013-06-30', freq='BQS-APR').is_year_end, 0), + (Timestamp('2013-06-28', freq='BQS-APR').is_month_end, 1), + (Timestamp('2013-06-28', freq='BQS-APR').is_quarter_end, 1), + (Timestamp('2013-03-29', freq='BQS-APR').is_year_end, 1), + (Timestamp('2013-11-01', freq='AS-NOV').is_year_start, 1), + (Timestamp('2013-10-31', freq='AS-NOV').is_year_end, 1), + (Timestamp('2012-02-01').days_in_month, 29), + (Timestamp('2013-02-01').days_in_month, 28)] + + for ts, value in tests: + self.assertEqual(ts, value) + + def test_datetimeindex_diff(self): + dti1 = DatetimeIndex(freq='Q-JAN', start=datetime(1997, 12, 31), + periods=100) + dti2 = DatetimeIndex(freq='Q-JAN', start=datetime(1997, 12, 31), + periods=98) + self.assertEqual(len(dti1.difference(dti2)), 2) + + def test_nanosecond_field(self): + dti = DatetimeIndex(np.arange(10)) + + self.assert_numpy_array_equal(dti.nanosecond, + np.arange(10, dtype=np.int32)) + + def test_datetimeindex_constructor(self): + arr = ['1/1/2005', '1/2/2005', 'Jn 3, 2005', '2005-01-04'] + self.assertRaises(Exception, DatetimeIndex, arr) + + arr = ['1/1/2005', '1/2/2005', '1/3/2005', '2005-01-04'] + idx1 = DatetimeIndex(arr) + + arr = [datetime(2005, 1, 1), '1/2/2005', '1/3/2005', '2005-01-04'] + idx2 = DatetimeIndex(arr) + + arr = [lib.Timestamp(datetime(2005, 1, 1)), '1/2/2005', '1/3/2005', + '2005-01-04'] + idx3 = DatetimeIndex(arr) + + arr = np.array(['1/1/2005', '1/2/2005', '1/3/2005', + '2005-01-04'], dtype='O') + idx4 = DatetimeIndex(arr) + + arr = to_datetime(['1/1/2005', '1/2/2005', '1/3/2005', '2005-01-04']) + idx5 = DatetimeIndex(arr) + + arr = to_datetime(['1/1/2005', '1/2/2005', 'Jan 3, 2005', '2005-01-04' + ]) + idx6 = DatetimeIndex(arr) + + idx7 = DatetimeIndex(['12/05/2007', '25/01/2008'], dayfirst=True) + idx8 = DatetimeIndex(['2007/05/12', '2008/01/25'], dayfirst=False, + yearfirst=True) + tm.assert_index_equal(idx7, idx8) + + for other in [idx2, idx3, idx4, idx5, idx6]: + self.assertTrue((idx1.values == other.values).all()) + + sdate = datetime(1999, 12, 25) + edate = datetime(2000, 1, 1) + idx = DatetimeIndex(start=sdate, freq='1B', periods=20) + self.assertEqual(len(idx), 20) + self.assertEqual(idx[0], sdate + 0 * offsets.BDay()) + self.assertEqual(idx.freq, 'B') + + idx = DatetimeIndex(end=edate, freq=('D', 5), periods=20) + self.assertEqual(len(idx), 20) + self.assertEqual(idx[-1], edate) + self.assertEqual(idx.freq, '5D') + + idx1 = DatetimeIndex(start=sdate, end=edate, freq='W-SUN') + idx2 = DatetimeIndex(start=sdate, end=edate, + freq=offsets.Week(weekday=6)) + self.assertEqual(len(idx1), len(idx2)) + self.assertEqual(idx1.offset, idx2.offset) + + idx1 = DatetimeIndex(start=sdate, end=edate, freq='QS') + idx2 = DatetimeIndex(start=sdate, end=edate, + freq=offsets.QuarterBegin(startingMonth=1)) + self.assertEqual(len(idx1), len(idx2)) + self.assertEqual(idx1.offset, idx2.offset) + + idx1 = DatetimeIndex(start=sdate, end=edate, freq='BQ') + idx2 = DatetimeIndex(start=sdate, end=edate, + freq=offsets.BQuarterEnd(startingMonth=12)) + self.assertEqual(len(idx1), len(idx2)) + self.assertEqual(idx1.offset, idx2.offset) + + def test_dayfirst(self): + # GH 5917 + arr = ['10/02/2014', '11/02/2014', '12/02/2014'] + expected = DatetimeIndex([datetime(2014, 2, 10), datetime(2014, 2, 11), + datetime(2014, 2, 12)]) + idx1 = DatetimeIndex(arr, dayfirst=True) + idx2 = DatetimeIndex(np.array(arr), dayfirst=True) + idx3 = to_datetime(arr, dayfirst=True) + idx4 = to_datetime(np.array(arr), dayfirst=True) + idx5 = DatetimeIndex(Index(arr), dayfirst=True) + idx6 = DatetimeIndex(Series(arr), dayfirst=True) + tm.assert_index_equal(expected, idx1) + tm.assert_index_equal(expected, idx2) + tm.assert_index_equal(expected, idx3) + tm.assert_index_equal(expected, idx4) + tm.assert_index_equal(expected, idx5) + tm.assert_index_equal(expected, idx6) + + def test_dti_set_index_reindex(self): + # GH 6631 + df = DataFrame(np.random.random(6)) + idx1 = date_range('2011/01/01', periods=6, freq='M', tz='US/Eastern') + idx2 = date_range('2013', periods=6, freq='A', tz='Asia/Tokyo') + + df = df.set_index(idx1) + tm.assert_index_equal(df.index, idx1) + df = df.reindex(idx2) + tm.assert_index_equal(df.index, idx2) + + # 11314 + # with tz + index = date_range(datetime(2015, 10, 1), + datetime(2015, 10, 1, 23), + freq='H', tz='US/Eastern') + df = DataFrame(np.random.randn(24, 1), columns=['a'], index=index) + new_index = date_range(datetime(2015, 10, 2), + datetime(2015, 10, 2, 23), + freq='H', tz='US/Eastern') + + # TODO: unused? + result = df.set_index(new_index) # noqa + + self.assertEqual(new_index.freq, index.freq) + + def test_datetimeindex_union_join_empty(self): + dti = DatetimeIndex(start='1/1/2001', end='2/1/2001', freq='D') + empty = Index([]) + + result = dti.union(empty) + tm.assertIsInstance(result, DatetimeIndex) + self.assertIs(result, result) + + result = dti.join(empty) + tm.assertIsInstance(result, DatetimeIndex) diff --git a/pandas/tests/indexes/datetimes/test_missing.py b/pandas/tests/indexes/datetimes/test_missing.py new file mode 100644 index 0000000000000..5c408d5300cdc --- /dev/null +++ b/pandas/tests/indexes/datetimes/test_missing.py @@ -0,0 +1,51 @@ +import pandas as pd +import pandas.util.testing as tm + + +class TestDatetimeIndex(tm.TestCase): + _multiprocess_can_split_ = True + + def test_fillna_datetime64(self): + # GH 11343 + for tz in ['US/Eastern', 'Asia/Tokyo']: + idx = pd.DatetimeIndex(['2011-01-01 09:00', pd.NaT, + '2011-01-01 11:00']) + + exp = pd.DatetimeIndex(['2011-01-01 09:00', '2011-01-01 10:00', + '2011-01-01 11:00']) + self.assert_index_equal( + idx.fillna(pd.Timestamp('2011-01-01 10:00')), exp) + + # tz mismatch + exp = pd.Index([pd.Timestamp('2011-01-01 09:00'), + pd.Timestamp('2011-01-01 10:00', tz=tz), + pd.Timestamp('2011-01-01 11:00')], dtype=object) + self.assert_index_equal( + idx.fillna(pd.Timestamp('2011-01-01 10:00', tz=tz)), exp) + + # object + exp = pd.Index([pd.Timestamp('2011-01-01 09:00'), 'x', + pd.Timestamp('2011-01-01 11:00')], dtype=object) + self.assert_index_equal(idx.fillna('x'), exp) + + idx = pd.DatetimeIndex(['2011-01-01 09:00', pd.NaT, + '2011-01-01 11:00'], tz=tz) + + exp = pd.DatetimeIndex(['2011-01-01 09:00', '2011-01-01 10:00', + '2011-01-01 11:00'], tz=tz) + self.assert_index_equal( + idx.fillna(pd.Timestamp('2011-01-01 10:00', tz=tz)), exp) + + exp = pd.Index([pd.Timestamp('2011-01-01 09:00', tz=tz), + pd.Timestamp('2011-01-01 10:00'), + pd.Timestamp('2011-01-01 11:00', tz=tz)], + dtype=object) + self.assert_index_equal( + idx.fillna(pd.Timestamp('2011-01-01 10:00')), exp) + + # object + exp = pd.Index([pd.Timestamp('2011-01-01 09:00', tz=tz), + 'x', + pd.Timestamp('2011-01-01 11:00', tz=tz)], + dtype=object) + self.assert_index_equal(idx.fillna('x'), exp) diff --git a/pandas/tests/indexes/datetimes/test_ops.py b/pandas/tests/indexes/datetimes/test_ops.py new file mode 100644 index 0000000000000..c25cd6a3fa90e --- /dev/null +++ b/pandas/tests/indexes/datetimes/test_ops.py @@ -0,0 +1,1073 @@ +import warnings +import numpy as np +from datetime import timedelta + +import pandas as pd +import pandas.tslib as tslib +import pandas.util.testing as tm +from pandas.core.common import PerformanceWarning +from pandas import (DatetimeIndex, PeriodIndex, Series, Timestamp, Timedelta, + date_range, TimedeltaIndex, _np_version_under1p10, Index, + datetime, Float64Index) + +from pandas.tests.test_base import Ops + + +class TestDatetimeIndexOps(Ops): + tz = [None, 'UTC', 'Asia/Tokyo', 'US/Eastern', 'dateutil/Asia/Singapore', + 'dateutil/US/Pacific'] + + def setUp(self): + super(TestDatetimeIndexOps, self).setUp() + mask = lambda x: (isinstance(x, DatetimeIndex) or + isinstance(x, PeriodIndex)) + self.is_valid_objs = [o for o in self.objs if mask(o)] + self.not_valid_objs = [o for o in self.objs if not mask(o)] + + def test_ops_properties(self): + self.check_ops_properties( + ['year', 'month', 'day', 'hour', 'minute', 'second', 'weekofyear', + 'week', 'dayofweek', 'dayofyear', 'quarter']) + self.check_ops_properties(['date', 'time', 'microsecond', 'nanosecond', + 'is_month_start', 'is_month_end', + 'is_quarter_start', + 'is_quarter_end', 'is_year_start', + 'is_year_end', 'weekday_name'], + lambda x: isinstance(x, DatetimeIndex)) + + def test_ops_properties_basic(self): + + # sanity check that the behavior didn't change + # GH7206 + for op in ['year', 'day', 'second', 'weekday']: + self.assertRaises(TypeError, lambda x: getattr(self.dt_series, op)) + + # attribute access should still work! + s = Series(dict(year=2000, month=1, day=10)) + self.assertEqual(s.year, 2000) + self.assertEqual(s.month, 1) + self.assertEqual(s.day, 10) + self.assertRaises(AttributeError, lambda: s.weekday) + + def test_asobject_tolist(self): + idx = pd.date_range(start='2013-01-01', periods=4, freq='M', + name='idx') + expected_list = [Timestamp('2013-01-31'), + Timestamp('2013-02-28'), + Timestamp('2013-03-31'), + Timestamp('2013-04-30')] + expected = pd.Index(expected_list, dtype=object, name='idx') + result = idx.asobject + self.assertTrue(isinstance(result, Index)) + + self.assertEqual(result.dtype, object) + self.assert_index_equal(result, expected) + self.assertEqual(result.name, expected.name) + self.assertEqual(idx.tolist(), expected_list) + + idx = pd.date_range(start='2013-01-01', periods=4, freq='M', + name='idx', tz='Asia/Tokyo') + expected_list = [Timestamp('2013-01-31', tz='Asia/Tokyo'), + Timestamp('2013-02-28', tz='Asia/Tokyo'), + Timestamp('2013-03-31', tz='Asia/Tokyo'), + Timestamp('2013-04-30', tz='Asia/Tokyo')] + expected = pd.Index(expected_list, dtype=object, name='idx') + result = idx.asobject + self.assertTrue(isinstance(result, Index)) + self.assertEqual(result.dtype, object) + self.assert_index_equal(result, expected) + self.assertEqual(result.name, expected.name) + self.assertEqual(idx.tolist(), expected_list) + + idx = DatetimeIndex([datetime(2013, 1, 1), datetime(2013, 1, 2), + pd.NaT, datetime(2013, 1, 4)], name='idx') + expected_list = [Timestamp('2013-01-01'), + Timestamp('2013-01-02'), pd.NaT, + Timestamp('2013-01-04')] + expected = pd.Index(expected_list, dtype=object, name='idx') + result = idx.asobject + self.assertTrue(isinstance(result, Index)) + self.assertEqual(result.dtype, object) + self.assert_index_equal(result, expected) + self.assertEqual(result.name, expected.name) + self.assertEqual(idx.tolist(), expected_list) + + def test_minmax(self): + for tz in self.tz: + # monotonic + idx1 = pd.DatetimeIndex(['2011-01-01', '2011-01-02', + '2011-01-03'], tz=tz) + self.assertTrue(idx1.is_monotonic) + + # non-monotonic + idx2 = pd.DatetimeIndex(['2011-01-01', pd.NaT, '2011-01-03', + '2011-01-02', pd.NaT], tz=tz) + self.assertFalse(idx2.is_monotonic) + + for idx in [idx1, idx2]: + self.assertEqual(idx.min(), Timestamp('2011-01-01', tz=tz)) + self.assertEqual(idx.max(), Timestamp('2011-01-03', tz=tz)) + self.assertEqual(idx.argmin(), 0) + self.assertEqual(idx.argmax(), 2) + + for op in ['min', 'max']: + # Return NaT + obj = DatetimeIndex([]) + self.assertTrue(pd.isnull(getattr(obj, op)())) + + obj = DatetimeIndex([pd.NaT]) + self.assertTrue(pd.isnull(getattr(obj, op)())) + + obj = DatetimeIndex([pd.NaT, pd.NaT, pd.NaT]) + self.assertTrue(pd.isnull(getattr(obj, op)())) + + def test_numpy_minmax(self): + dr = pd.date_range(start='2016-01-15', end='2016-01-20') + + self.assertEqual(np.min(dr), + Timestamp('2016-01-15 00:00:00', freq='D')) + self.assertEqual(np.max(dr), + Timestamp('2016-01-20 00:00:00', freq='D')) + + errmsg = "the 'out' parameter is not supported" + tm.assertRaisesRegexp(ValueError, errmsg, np.min, dr, out=0) + tm.assertRaisesRegexp(ValueError, errmsg, np.max, dr, out=0) + + self.assertEqual(np.argmin(dr), 0) + self.assertEqual(np.argmax(dr), 5) + + if not _np_version_under1p10: + errmsg = "the 'out' parameter is not supported" + tm.assertRaisesRegexp(ValueError, errmsg, np.argmin, dr, out=0) + tm.assertRaisesRegexp(ValueError, errmsg, np.argmax, dr, out=0) + + def test_round(self): + for tz in self.tz: + rng = pd.date_range(start='2016-01-01', periods=5, + freq='30Min', tz=tz) + elt = rng[1] + + expected_rng = DatetimeIndex([ + Timestamp('2016-01-01 00:00:00', tz=tz, freq='30T'), + Timestamp('2016-01-01 00:00:00', tz=tz, freq='30T'), + Timestamp('2016-01-01 01:00:00', tz=tz, freq='30T'), + Timestamp('2016-01-01 02:00:00', tz=tz, freq='30T'), + Timestamp('2016-01-01 02:00:00', tz=tz, freq='30T'), + ]) + expected_elt = expected_rng[1] + + tm.assert_index_equal(rng.round(freq='H'), expected_rng) + self.assertEqual(elt.round(freq='H'), expected_elt) + + msg = pd.tseries.frequencies._INVALID_FREQ_ERROR + with tm.assertRaisesRegexp(ValueError, msg): + rng.round(freq='foo') + with tm.assertRaisesRegexp(ValueError, msg): + elt.round(freq='foo') + + msg = " is a non-fixed frequency" + tm.assertRaisesRegexp(ValueError, msg, rng.round, freq='M') + tm.assertRaisesRegexp(ValueError, msg, elt.round, freq='M') + + def test_repeat_range(self): + rng = date_range('1/1/2000', '1/1/2001') + + result = rng.repeat(5) + self.assertIsNone(result.freq) + self.assertEqual(len(result), 5 * len(rng)) + + for tz in self.tz: + index = pd.date_range('2001-01-01', periods=2, freq='D', tz=tz) + exp = pd.DatetimeIndex(['2001-01-01', '2001-01-01', + '2001-01-02', '2001-01-02'], tz=tz) + for res in [index.repeat(2), np.repeat(index, 2)]: + tm.assert_index_equal(res, exp) + self.assertIsNone(res.freq) + + index = pd.date_range('2001-01-01', periods=2, freq='2D', tz=tz) + exp = pd.DatetimeIndex(['2001-01-01', '2001-01-01', + '2001-01-03', '2001-01-03'], tz=tz) + for res in [index.repeat(2), np.repeat(index, 2)]: + tm.assert_index_equal(res, exp) + self.assertIsNone(res.freq) + + index = pd.DatetimeIndex(['2001-01-01', 'NaT', '2003-01-01'], + tz=tz) + exp = pd.DatetimeIndex(['2001-01-01', '2001-01-01', '2001-01-01', + 'NaT', 'NaT', 'NaT', + '2003-01-01', '2003-01-01', '2003-01-01'], + tz=tz) + for res in [index.repeat(3), np.repeat(index, 3)]: + tm.assert_index_equal(res, exp) + self.assertIsNone(res.freq) + + def test_repeat(self): + reps = 2 + msg = "the 'axis' parameter is not supported" + + for tz in self.tz: + rng = pd.date_range(start='2016-01-01', periods=2, + freq='30Min', tz=tz) + + expected_rng = DatetimeIndex([ + Timestamp('2016-01-01 00:00:00', tz=tz, freq='30T'), + Timestamp('2016-01-01 00:00:00', tz=tz, freq='30T'), + Timestamp('2016-01-01 00:30:00', tz=tz, freq='30T'), + Timestamp('2016-01-01 00:30:00', tz=tz, freq='30T'), + ]) + + res = rng.repeat(reps) + tm.assert_index_equal(res, expected_rng) + self.assertIsNone(res.freq) + + tm.assert_index_equal(np.repeat(rng, reps), expected_rng) + tm.assertRaisesRegexp(ValueError, msg, np.repeat, + rng, reps, axis=1) + + def test_representation(self): + + idx = [] + idx.append(DatetimeIndex([], freq='D')) + idx.append(DatetimeIndex(['2011-01-01'], freq='D')) + idx.append(DatetimeIndex(['2011-01-01', '2011-01-02'], freq='D')) + idx.append(DatetimeIndex( + ['2011-01-01', '2011-01-02', '2011-01-03'], freq='D')) + idx.append(DatetimeIndex( + ['2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00' + ], freq='H', tz='Asia/Tokyo')) + idx.append(DatetimeIndex( + ['2011-01-01 09:00', '2011-01-01 10:00', pd.NaT], tz='US/Eastern')) + idx.append(DatetimeIndex( + ['2011-01-01 09:00', '2011-01-01 10:00', pd.NaT], tz='UTC')) + + exp = [] + exp.append("""DatetimeIndex([], dtype='datetime64[ns]', freq='D')""") + exp.append("DatetimeIndex(['2011-01-01'], dtype='datetime64[ns]', " + "freq='D')") + exp.append("DatetimeIndex(['2011-01-01', '2011-01-02'], " + "dtype='datetime64[ns]', freq='D')") + exp.append("DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03'], " + "dtype='datetime64[ns]', freq='D')") + exp.append("DatetimeIndex(['2011-01-01 09:00:00+09:00', " + "'2011-01-01 10:00:00+09:00', '2011-01-01 11:00:00+09:00']" + ", dtype='datetime64[ns, Asia/Tokyo]', freq='H')") + exp.append("DatetimeIndex(['2011-01-01 09:00:00-05:00', " + "'2011-01-01 10:00:00-05:00', 'NaT'], " + "dtype='datetime64[ns, US/Eastern]', freq=None)") + exp.append("DatetimeIndex(['2011-01-01 09:00:00+00:00', " + "'2011-01-01 10:00:00+00:00', 'NaT'], " + "dtype='datetime64[ns, UTC]', freq=None)""") + + with pd.option_context('display.width', 300): + for indx, expected in zip(idx, exp): + for func in ['__repr__', '__unicode__', '__str__']: + result = getattr(indx, func)() + self.assertEqual(result, expected) + + def test_representation_to_series(self): + idx1 = DatetimeIndex([], freq='D') + idx2 = DatetimeIndex(['2011-01-01'], freq='D') + idx3 = DatetimeIndex(['2011-01-01', '2011-01-02'], freq='D') + idx4 = DatetimeIndex( + ['2011-01-01', '2011-01-02', '2011-01-03'], freq='D') + idx5 = DatetimeIndex(['2011-01-01 09:00', '2011-01-01 10:00', + '2011-01-01 11:00'], freq='H', tz='Asia/Tokyo') + idx6 = DatetimeIndex(['2011-01-01 09:00', '2011-01-01 10:00', pd.NaT], + tz='US/Eastern') + idx7 = DatetimeIndex(['2011-01-01 09:00', '2011-01-02 10:15']) + + exp1 = """Series([], dtype: datetime64[ns])""" + + exp2 = """0 2011-01-01 +dtype: datetime64[ns]""" + + exp3 = """0 2011-01-01 +1 2011-01-02 +dtype: datetime64[ns]""" + + exp4 = """0 2011-01-01 +1 2011-01-02 +2 2011-01-03 +dtype: datetime64[ns]""" + + exp5 = """0 2011-01-01 09:00:00+09:00 +1 2011-01-01 10:00:00+09:00 +2 2011-01-01 11:00:00+09:00 +dtype: datetime64[ns, Asia/Tokyo]""" + + exp6 = """0 2011-01-01 09:00:00-05:00 +1 2011-01-01 10:00:00-05:00 +2 NaT +dtype: datetime64[ns, US/Eastern]""" + + exp7 = """0 2011-01-01 09:00:00 +1 2011-01-02 10:15:00 +dtype: datetime64[ns]""" + + with pd.option_context('display.width', 300): + for idx, expected in zip([idx1, idx2, idx3, idx4, + idx5, idx6, idx7], + [exp1, exp2, exp3, exp4, + exp5, exp6, exp7]): + result = repr(Series(idx)) + self.assertEqual(result, expected) + + def test_summary(self): + # GH9116 + idx1 = DatetimeIndex([], freq='D') + idx2 = DatetimeIndex(['2011-01-01'], freq='D') + idx3 = DatetimeIndex(['2011-01-01', '2011-01-02'], freq='D') + idx4 = DatetimeIndex( + ['2011-01-01', '2011-01-02', '2011-01-03'], freq='D') + idx5 = DatetimeIndex(['2011-01-01 09:00', '2011-01-01 10:00', + '2011-01-01 11:00'], + freq='H', tz='Asia/Tokyo') + idx6 = DatetimeIndex(['2011-01-01 09:00', '2011-01-01 10:00', pd.NaT], + tz='US/Eastern') + + exp1 = """DatetimeIndex: 0 entries +Freq: D""" + + exp2 = """DatetimeIndex: 1 entries, 2011-01-01 to 2011-01-01 +Freq: D""" + + exp3 = """DatetimeIndex: 2 entries, 2011-01-01 to 2011-01-02 +Freq: D""" + + exp4 = """DatetimeIndex: 3 entries, 2011-01-01 to 2011-01-03 +Freq: D""" + + exp5 = ("DatetimeIndex: 3 entries, 2011-01-01 09:00:00+09:00 " + "to 2011-01-01 11:00:00+09:00\n" + "Freq: H") + + exp6 = """DatetimeIndex: 3 entries, 2011-01-01 09:00:00-05:00 to NaT""" + + for idx, expected in zip([idx1, idx2, idx3, idx4, idx5, idx6], + [exp1, exp2, exp3, exp4, exp5, exp6]): + result = idx.summary() + self.assertEqual(result, expected) + + def test_resolution(self): + for freq, expected in zip(['A', 'Q', 'M', 'D', 'H', 'T', + 'S', 'L', 'U'], + ['day', 'day', 'day', 'day', 'hour', + 'minute', 'second', 'millisecond', + 'microsecond']): + for tz in self.tz: + idx = pd.date_range(start='2013-04-01', periods=30, freq=freq, + tz=tz) + self.assertEqual(idx.resolution, expected) + + def test_union(self): + for tz in self.tz: + # union + rng1 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz) + other1 = pd.date_range('1/6/2000', freq='D', periods=5, tz=tz) + expected1 = pd.date_range('1/1/2000', freq='D', periods=10, tz=tz) + + rng2 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz) + other2 = pd.date_range('1/4/2000', freq='D', periods=5, tz=tz) + expected2 = pd.date_range('1/1/2000', freq='D', periods=8, tz=tz) + + rng3 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz) + other3 = pd.DatetimeIndex([], tz=tz) + expected3 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz) + + for rng, other, expected in [(rng1, other1, expected1), + (rng2, other2, expected2), + (rng3, other3, expected3)]: + + result_union = rng.union(other) + tm.assert_index_equal(result_union, expected) + + def test_add_iadd(self): + for tz in self.tz: + + # offset + offsets = [pd.offsets.Hour(2), timedelta(hours=2), + np.timedelta64(2, 'h'), Timedelta(hours=2)] + + for delta in offsets: + rng = pd.date_range('2000-01-01', '2000-02-01', tz=tz) + result = rng + delta + expected = pd.date_range('2000-01-01 02:00', + '2000-02-01 02:00', tz=tz) + tm.assert_index_equal(result, expected) + rng += delta + tm.assert_index_equal(rng, expected) + + # int + rng = pd.date_range('2000-01-01 09:00', freq='H', periods=10, + tz=tz) + result = rng + 1 + expected = pd.date_range('2000-01-01 10:00', freq='H', periods=10, + tz=tz) + tm.assert_index_equal(result, expected) + rng += 1 + tm.assert_index_equal(rng, expected) + + idx = DatetimeIndex(['2011-01-01', '2011-01-02']) + msg = "cannot add a datelike to a DatetimeIndex" + with tm.assertRaisesRegexp(TypeError, msg): + idx + Timestamp('2011-01-01') + + with tm.assertRaisesRegexp(TypeError, msg): + Timestamp('2011-01-01') + idx + + def test_add_dti_dti(self): + # previously performed setop (deprecated in 0.16.0), now raises + # TypeError (GH14164) + + dti = date_range('20130101', periods=3) + dti_tz = date_range('20130101', periods=3).tz_localize('US/Eastern') + + with tm.assertRaises(TypeError): + dti + dti + + with tm.assertRaises(TypeError): + dti_tz + dti_tz + + with tm.assertRaises(TypeError): + dti_tz + dti + + with tm.assertRaises(TypeError): + dti + dti_tz + + def test_difference(self): + for tz in self.tz: + # diff + rng1 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz) + other1 = pd.date_range('1/6/2000', freq='D', periods=5, tz=tz) + expected1 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz) + + rng2 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz) + other2 = pd.date_range('1/4/2000', freq='D', periods=5, tz=tz) + expected2 = pd.date_range('1/1/2000', freq='D', periods=3, tz=tz) + + rng3 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz) + other3 = pd.DatetimeIndex([], tz=tz) + expected3 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz) + + for rng, other, expected in [(rng1, other1, expected1), + (rng2, other2, expected2), + (rng3, other3, expected3)]: + result_diff = rng.difference(other) + tm.assert_index_equal(result_diff, expected) + + def test_sub_isub(self): + for tz in self.tz: + + # offset + offsets = [pd.offsets.Hour(2), timedelta(hours=2), + np.timedelta64(2, 'h'), Timedelta(hours=2)] + + for delta in offsets: + rng = pd.date_range('2000-01-01', '2000-02-01', tz=tz) + expected = pd.date_range('1999-12-31 22:00', + '2000-01-31 22:00', tz=tz) + + result = rng - delta + tm.assert_index_equal(result, expected) + rng -= delta + tm.assert_index_equal(rng, expected) + + # int + rng = pd.date_range('2000-01-01 09:00', freq='H', periods=10, + tz=tz) + result = rng - 1 + expected = pd.date_range('2000-01-01 08:00', freq='H', periods=10, + tz=tz) + tm.assert_index_equal(result, expected) + rng -= 1 + tm.assert_index_equal(rng, expected) + + def test_sub_dti_dti(self): + # previously performed setop (deprecated in 0.16.0), now changed to + # return subtraction -> TimeDeltaIndex (GH ...) + + dti = date_range('20130101', periods=3) + dti_tz = date_range('20130101', periods=3).tz_localize('US/Eastern') + dti_tz2 = date_range('20130101', periods=3).tz_localize('UTC') + expected = TimedeltaIndex([0, 0, 0]) + + result = dti - dti + tm.assert_index_equal(result, expected) + + result = dti_tz - dti_tz + tm.assert_index_equal(result, expected) + + with tm.assertRaises(TypeError): + dti_tz - dti + + with tm.assertRaises(TypeError): + dti - dti_tz + + with tm.assertRaises(TypeError): + dti_tz - dti_tz2 + + # isub + dti -= dti + tm.assert_index_equal(dti, expected) + + # different length raises ValueError + dti1 = date_range('20130101', periods=3) + dti2 = date_range('20130101', periods=4) + with tm.assertRaises(ValueError): + dti1 - dti2 + + # NaN propagation + dti1 = DatetimeIndex(['2012-01-01', np.nan, '2012-01-03']) + dti2 = DatetimeIndex(['2012-01-02', '2012-01-03', np.nan]) + expected = TimedeltaIndex(['1 days', np.nan, np.nan]) + result = dti2 - dti1 + tm.assert_index_equal(result, expected) + + def test_sub_period(self): + # GH 13078 + # not supported, check TypeError + p = pd.Period('2011-01-01', freq='D') + + for freq in [None, 'D']: + idx = pd.DatetimeIndex(['2011-01-01', '2011-01-02'], freq=freq) + + with tm.assertRaises(TypeError): + idx - p + + with tm.assertRaises(TypeError): + p - idx + + def test_comp_nat(self): + left = pd.DatetimeIndex([pd.Timestamp('2011-01-01'), pd.NaT, + pd.Timestamp('2011-01-03')]) + right = pd.DatetimeIndex([pd.NaT, pd.NaT, pd.Timestamp('2011-01-03')]) + + for l, r in [(left, right), (left.asobject, right.asobject)]: + result = l == r + expected = np.array([False, False, True]) + tm.assert_numpy_array_equal(result, expected) + + result = l != r + expected = np.array([True, True, False]) + tm.assert_numpy_array_equal(result, expected) + + expected = np.array([False, False, False]) + tm.assert_numpy_array_equal(l == pd.NaT, expected) + tm.assert_numpy_array_equal(pd.NaT == r, expected) + + expected = np.array([True, True, True]) + tm.assert_numpy_array_equal(l != pd.NaT, expected) + tm.assert_numpy_array_equal(pd.NaT != l, expected) + + expected = np.array([False, False, False]) + tm.assert_numpy_array_equal(l < pd.NaT, expected) + tm.assert_numpy_array_equal(pd.NaT > l, expected) + + def test_value_counts_unique(self): + # GH 7735 + for tz in self.tz: + idx = pd.date_range('2011-01-01 09:00', freq='H', periods=10) + # create repeated values, 'n'th element is repeated by n+1 times + idx = DatetimeIndex(np.repeat(idx.values, range(1, len(idx) + 1)), + tz=tz) + + exp_idx = pd.date_range('2011-01-01 18:00', freq='-1H', periods=10, + tz=tz) + expected = Series(range(10, 0, -1), index=exp_idx, dtype='int64') + + for obj in [idx, Series(idx)]: + tm.assert_series_equal(obj.value_counts(), expected) + + expected = pd.date_range('2011-01-01 09:00', freq='H', periods=10, + tz=tz) + tm.assert_index_equal(idx.unique(), expected) + + idx = DatetimeIndex(['2013-01-01 09:00', '2013-01-01 09:00', + '2013-01-01 09:00', '2013-01-01 08:00', + '2013-01-01 08:00', pd.NaT], tz=tz) + + exp_idx = DatetimeIndex(['2013-01-01 09:00', '2013-01-01 08:00'], + tz=tz) + expected = Series([3, 2], index=exp_idx) + + for obj in [idx, Series(idx)]: + tm.assert_series_equal(obj.value_counts(), expected) + + exp_idx = DatetimeIndex(['2013-01-01 09:00', '2013-01-01 08:00', + pd.NaT], tz=tz) + expected = Series([3, 2, 1], index=exp_idx) + + for obj in [idx, Series(idx)]: + tm.assert_series_equal(obj.value_counts(dropna=False), + expected) + + tm.assert_index_equal(idx.unique(), exp_idx) + + def test_nonunique_contains(self): + # GH 9512 + for idx in map(DatetimeIndex, + ([0, 1, 0], [0, 0, -1], [0, -1, -1], + ['2015', '2015', '2016'], ['2015', '2015', '2014'])): + tm.assertIn(idx[0], idx) + + def test_order(self): + # with freq + idx1 = DatetimeIndex(['2011-01-01', '2011-01-02', + '2011-01-03'], freq='D', name='idx') + idx2 = DatetimeIndex(['2011-01-01 09:00', '2011-01-01 10:00', + '2011-01-01 11:00'], freq='H', + tz='Asia/Tokyo', name='tzidx') + + for idx in [idx1, idx2]: + ordered = idx.sort_values() + self.assert_index_equal(ordered, idx) + self.assertEqual(ordered.freq, idx.freq) + + ordered = idx.sort_values(ascending=False) + expected = idx[::-1] + self.assert_index_equal(ordered, expected) + self.assertEqual(ordered.freq, expected.freq) + self.assertEqual(ordered.freq.n, -1) + + ordered, indexer = idx.sort_values(return_indexer=True) + self.assert_index_equal(ordered, idx) + self.assert_numpy_array_equal(indexer, + np.array([0, 1, 2]), + check_dtype=False) + self.assertEqual(ordered.freq, idx.freq) + + ordered, indexer = idx.sort_values(return_indexer=True, + ascending=False) + expected = idx[::-1] + self.assert_index_equal(ordered, expected) + self.assert_numpy_array_equal(indexer, + np.array([2, 1, 0]), + check_dtype=False) + self.assertEqual(ordered.freq, expected.freq) + self.assertEqual(ordered.freq.n, -1) + + # without freq + for tz in self.tz: + idx1 = DatetimeIndex(['2011-01-01', '2011-01-03', '2011-01-05', + '2011-01-02', '2011-01-01'], + tz=tz, name='idx1') + exp1 = DatetimeIndex(['2011-01-01', '2011-01-01', '2011-01-02', + '2011-01-03', '2011-01-05'], + tz=tz, name='idx1') + + idx2 = DatetimeIndex(['2011-01-01', '2011-01-03', '2011-01-05', + '2011-01-02', '2011-01-01'], + tz=tz, name='idx2') + + exp2 = DatetimeIndex(['2011-01-01', '2011-01-01', '2011-01-02', + '2011-01-03', '2011-01-05'], + tz=tz, name='idx2') + + idx3 = DatetimeIndex([pd.NaT, '2011-01-03', '2011-01-05', + '2011-01-02', pd.NaT], tz=tz, name='idx3') + exp3 = DatetimeIndex([pd.NaT, pd.NaT, '2011-01-02', '2011-01-03', + '2011-01-05'], tz=tz, name='idx3') + + for idx, expected in [(idx1, exp1), (idx2, exp2), (idx3, exp3)]: + ordered = idx.sort_values() + self.assert_index_equal(ordered, expected) + self.assertIsNone(ordered.freq) + + ordered = idx.sort_values(ascending=False) + self.assert_index_equal(ordered, expected[::-1]) + self.assertIsNone(ordered.freq) + + ordered, indexer = idx.sort_values(return_indexer=True) + self.assert_index_equal(ordered, expected) + + exp = np.array([0, 4, 3, 1, 2]) + self.assert_numpy_array_equal(indexer, exp, check_dtype=False) + self.assertIsNone(ordered.freq) + + ordered, indexer = idx.sort_values(return_indexer=True, + ascending=False) + self.assert_index_equal(ordered, expected[::-1]) + + exp = np.array([2, 1, 3, 4, 0]) + self.assert_numpy_array_equal(indexer, exp, check_dtype=False) + self.assertIsNone(ordered.freq) + + def test_getitem(self): + idx1 = pd.date_range('2011-01-01', '2011-01-31', freq='D', name='idx') + idx2 = pd.date_range('2011-01-01', '2011-01-31', freq='D', + tz='Asia/Tokyo', name='idx') + + for idx in [idx1, idx2]: + result = idx[0] + self.assertEqual(result, Timestamp('2011-01-01', tz=idx.tz)) + + result = idx[0:5] + expected = pd.date_range('2011-01-01', '2011-01-05', freq='D', + tz=idx.tz, name='idx') + self.assert_index_equal(result, expected) + self.assertEqual(result.freq, expected.freq) + + result = idx[0:10:2] + expected = pd.date_range('2011-01-01', '2011-01-09', freq='2D', + tz=idx.tz, name='idx') + self.assert_index_equal(result, expected) + self.assertEqual(result.freq, expected.freq) + + result = idx[-20:-5:3] + expected = pd.date_range('2011-01-12', '2011-01-24', freq='3D', + tz=idx.tz, name='idx') + self.assert_index_equal(result, expected) + self.assertEqual(result.freq, expected.freq) + + result = idx[4::-1] + expected = DatetimeIndex(['2011-01-05', '2011-01-04', '2011-01-03', + '2011-01-02', '2011-01-01'], + freq='-1D', tz=idx.tz, name='idx') + self.assert_index_equal(result, expected) + self.assertEqual(result.freq, expected.freq) + + def test_drop_duplicates_metadata(self): + # GH 10115 + idx = pd.date_range('2011-01-01', '2011-01-31', freq='D', name='idx') + result = idx.drop_duplicates() + self.assert_index_equal(idx, result) + self.assertEqual(idx.freq, result.freq) + + idx_dup = idx.append(idx) + self.assertIsNone(idx_dup.freq) # freq is reset + result = idx_dup.drop_duplicates() + self.assert_index_equal(idx, result) + self.assertIsNone(result.freq) + + def test_drop_duplicates(self): + # to check Index/Series compat + base = pd.date_range('2011-01-01', '2011-01-31', freq='D', name='idx') + idx = base.append(base[:5]) + + res = idx.drop_duplicates() + tm.assert_index_equal(res, base) + res = Series(idx).drop_duplicates() + tm.assert_series_equal(res, Series(base)) + + res = idx.drop_duplicates(keep='last') + exp = base[5:].append(base[:5]) + tm.assert_index_equal(res, exp) + res = Series(idx).drop_duplicates(keep='last') + tm.assert_series_equal(res, Series(exp, index=np.arange(5, 36))) + + res = idx.drop_duplicates(keep=False) + tm.assert_index_equal(res, base[5:]) + res = Series(idx).drop_duplicates(keep=False) + tm.assert_series_equal(res, Series(base[5:], index=np.arange(5, 31))) + + def test_take(self): + # GH 10295 + idx1 = pd.date_range('2011-01-01', '2011-01-31', freq='D', name='idx') + idx2 = pd.date_range('2011-01-01', '2011-01-31', freq='D', + tz='Asia/Tokyo', name='idx') + + for idx in [idx1, idx2]: + result = idx.take([0]) + self.assertEqual(result, Timestamp('2011-01-01', tz=idx.tz)) + + result = idx.take([0, 1, 2]) + expected = pd.date_range('2011-01-01', '2011-01-03', freq='D', + tz=idx.tz, name='idx') + self.assert_index_equal(result, expected) + self.assertEqual(result.freq, expected.freq) + + result = idx.take([0, 2, 4]) + expected = pd.date_range('2011-01-01', '2011-01-05', freq='2D', + tz=idx.tz, name='idx') + self.assert_index_equal(result, expected) + self.assertEqual(result.freq, expected.freq) + + result = idx.take([7, 4, 1]) + expected = pd.date_range('2011-01-08', '2011-01-02', freq='-3D', + tz=idx.tz, name='idx') + self.assert_index_equal(result, expected) + self.assertEqual(result.freq, expected.freq) + + result = idx.take([3, 2, 5]) + expected = DatetimeIndex(['2011-01-04', '2011-01-03', + '2011-01-06'], + freq=None, tz=idx.tz, name='idx') + self.assert_index_equal(result, expected) + self.assertIsNone(result.freq) + + result = idx.take([-3, 2, 5]) + expected = DatetimeIndex(['2011-01-29', '2011-01-03', + '2011-01-06'], + freq=None, tz=idx.tz, name='idx') + self.assert_index_equal(result, expected) + self.assertIsNone(result.freq) + + def test_take_invalid_kwargs(self): + idx = pd.date_range('2011-01-01', '2011-01-31', freq='D', name='idx') + indices = [1, 6, 5, 9, 10, 13, 15, 3] + + msg = r"take\(\) got an unexpected keyword argument 'foo'" + tm.assertRaisesRegexp(TypeError, msg, idx.take, + indices, foo=2) + + msg = "the 'out' parameter is not supported" + tm.assertRaisesRegexp(ValueError, msg, idx.take, + indices, out=indices) + + msg = "the 'mode' parameter is not supported" + tm.assertRaisesRegexp(ValueError, msg, idx.take, + indices, mode='clip') + + def test_infer_freq(self): + # GH 11018 + for freq in ['A', '2A', '-2A', 'Q', '-1Q', 'M', '-1M', 'D', '3D', + '-3D', 'W', '-1W', 'H', '2H', '-2H', 'T', '2T', 'S', + '-3S']: + idx = pd.date_range('2011-01-01 09:00:00', freq=freq, periods=10) + result = pd.DatetimeIndex(idx.asi8, freq='infer') + tm.assert_index_equal(idx, result) + self.assertEqual(result.freq, freq) + + def test_nat_new(self): + idx = pd.date_range('2011-01-01', freq='D', periods=5, name='x') + result = idx._nat_new() + exp = pd.DatetimeIndex([pd.NaT] * 5, name='x') + tm.assert_index_equal(result, exp) + + result = idx._nat_new(box=False) + exp = np.array([tslib.iNaT] * 5, dtype=np.int64) + tm.assert_numpy_array_equal(result, exp) + + def test_shift(self): + # GH 9903 + for tz in self.tz: + idx = pd.DatetimeIndex([], name='xxx', tz=tz) + tm.assert_index_equal(idx.shift(0, freq='H'), idx) + tm.assert_index_equal(idx.shift(3, freq='H'), idx) + + idx = pd.DatetimeIndex(['2011-01-01 10:00', '2011-01-01 11:00' + '2011-01-01 12:00'], name='xxx', tz=tz) + tm.assert_index_equal(idx.shift(0, freq='H'), idx) + exp = pd.DatetimeIndex(['2011-01-01 13:00', '2011-01-01 14:00' + '2011-01-01 15:00'], name='xxx', tz=tz) + tm.assert_index_equal(idx.shift(3, freq='H'), exp) + exp = pd.DatetimeIndex(['2011-01-01 07:00', '2011-01-01 08:00' + '2011-01-01 09:00'], name='xxx', tz=tz) + tm.assert_index_equal(idx.shift(-3, freq='H'), exp) + + def test_nat(self): + self.assertIs(pd.DatetimeIndex._na_value, pd.NaT) + self.assertIs(pd.DatetimeIndex([])._na_value, pd.NaT) + + for tz in [None, 'US/Eastern', 'UTC']: + idx = pd.DatetimeIndex(['2011-01-01', '2011-01-02'], tz=tz) + self.assertTrue(idx._can_hold_na) + + tm.assert_numpy_array_equal(idx._isnan, np.array([False, False])) + self.assertFalse(idx.hasnans) + tm.assert_numpy_array_equal(idx._nan_idxs, + np.array([], dtype=np.intp)) + + idx = pd.DatetimeIndex(['2011-01-01', 'NaT'], tz=tz) + self.assertTrue(idx._can_hold_na) + + tm.assert_numpy_array_equal(idx._isnan, np.array([False, True])) + self.assertTrue(idx.hasnans) + tm.assert_numpy_array_equal(idx._nan_idxs, + np.array([1], dtype=np.intp)) + + def test_equals(self): + # GH 13107 + for tz in [None, 'UTC', 'US/Eastern', 'Asia/Tokyo']: + idx = pd.DatetimeIndex(['2011-01-01', '2011-01-02', 'NaT']) + self.assertTrue(idx.equals(idx)) + self.assertTrue(idx.equals(idx.copy())) + self.assertTrue(idx.equals(idx.asobject)) + self.assertTrue(idx.asobject.equals(idx)) + self.assertTrue(idx.asobject.equals(idx.asobject)) + self.assertFalse(idx.equals(list(idx))) + self.assertFalse(idx.equals(pd.Series(idx))) + + idx2 = pd.DatetimeIndex(['2011-01-01', '2011-01-02', 'NaT'], + tz='US/Pacific') + self.assertFalse(idx.equals(idx2)) + self.assertFalse(idx.equals(idx2.copy())) + self.assertFalse(idx.equals(idx2.asobject)) + self.assertFalse(idx.asobject.equals(idx2)) + self.assertFalse(idx.equals(list(idx2))) + self.assertFalse(idx.equals(pd.Series(idx2))) + + # same internal, different tz + idx3 = pd.DatetimeIndex._simple_new(idx.asi8, tz='US/Pacific') + tm.assert_numpy_array_equal(idx.asi8, idx3.asi8) + self.assertFalse(idx.equals(idx3)) + self.assertFalse(idx.equals(idx3.copy())) + self.assertFalse(idx.equals(idx3.asobject)) + self.assertFalse(idx.asobject.equals(idx3)) + self.assertFalse(idx.equals(list(idx3))) + self.assertFalse(idx.equals(pd.Series(idx3))) + + +class TestDateTimeIndexToJulianDate(tm.TestCase): + + def test_1700(self): + r1 = Float64Index([2345897.5, 2345898.5, 2345899.5, 2345900.5, + 2345901.5]) + r2 = date_range(start=Timestamp('1710-10-01'), periods=5, + freq='D').to_julian_date() + self.assertIsInstance(r2, Float64Index) + tm.assert_index_equal(r1, r2) + + def test_2000(self): + r1 = Float64Index([2451601.5, 2451602.5, 2451603.5, 2451604.5, + 2451605.5]) + r2 = date_range(start=Timestamp('2000-02-27'), periods=5, + freq='D').to_julian_date() + self.assertIsInstance(r2, Float64Index) + tm.assert_index_equal(r1, r2) + + def test_hour(self): + r1 = Float64Index( + [2451601.5, 2451601.5416666666666666, 2451601.5833333333333333, + 2451601.625, 2451601.6666666666666666]) + r2 = date_range(start=Timestamp('2000-02-27'), periods=5, + freq='H').to_julian_date() + self.assertIsInstance(r2, Float64Index) + tm.assert_index_equal(r1, r2) + + def test_minute(self): + r1 = Float64Index( + [2451601.5, 2451601.5006944444444444, 2451601.5013888888888888, + 2451601.5020833333333333, 2451601.5027777777777777]) + r2 = date_range(start=Timestamp('2000-02-27'), periods=5, + freq='T').to_julian_date() + self.assertIsInstance(r2, Float64Index) + tm.assert_index_equal(r1, r2) + + def test_second(self): + r1 = Float64Index( + [2451601.5, 2451601.500011574074074, 2451601.5000231481481481, + 2451601.5000347222222222, 2451601.5000462962962962]) + r2 = date_range(start=Timestamp('2000-02-27'), periods=5, + freq='S').to_julian_date() + self.assertIsInstance(r2, Float64Index) + tm.assert_index_equal(r1, r2) + + +class TestDatetimeIndex(tm.TestCase): + _multiprocess_can_split_ = True + + # GH 10699 + def test_datetime64_with_DateOffset(self): + for klass, assert_func in zip([Series, DatetimeIndex], + [self.assert_series_equal, + tm.assert_index_equal]): + s = klass(date_range('2000-01-01', '2000-01-31'), name='a') + result = s + pd.DateOffset(years=1) + result2 = pd.DateOffset(years=1) + s + exp = klass(date_range('2001-01-01', '2001-01-31'), name='a') + assert_func(result, exp) + assert_func(result2, exp) + + result = s - pd.DateOffset(years=1) + exp = klass(date_range('1999-01-01', '1999-01-31'), name='a') + assert_func(result, exp) + + s = klass([Timestamp('2000-01-15 00:15:00', tz='US/Central'), + pd.Timestamp('2000-02-15', tz='US/Central')], name='a') + result = s + pd.offsets.Day() + result2 = pd.offsets.Day() + s + exp = klass([Timestamp('2000-01-16 00:15:00', tz='US/Central'), + Timestamp('2000-02-16', tz='US/Central')], name='a') + assert_func(result, exp) + assert_func(result2, exp) + + s = klass([Timestamp('2000-01-15 00:15:00', tz='US/Central'), + pd.Timestamp('2000-02-15', tz='US/Central')], name='a') + result = s + pd.offsets.MonthEnd() + result2 = pd.offsets.MonthEnd() + s + exp = klass([Timestamp('2000-01-31 00:15:00', tz='US/Central'), + Timestamp('2000-02-29', tz='US/Central')], name='a') + assert_func(result, exp) + assert_func(result2, exp) + + # array of offsets - valid for Series only + if klass is Series: + with tm.assert_produces_warning(PerformanceWarning): + s = klass([Timestamp('2000-1-1'), Timestamp('2000-2-1')]) + result = s + Series([pd.offsets.DateOffset(years=1), + pd.offsets.MonthEnd()]) + exp = klass([Timestamp('2001-1-1'), Timestamp('2000-2-29') + ]) + assert_func(result, exp) + + # same offset + result = s + Series([pd.offsets.DateOffset(years=1), + pd.offsets.DateOffset(years=1)]) + exp = klass([Timestamp('2001-1-1'), Timestamp('2001-2-1')]) + assert_func(result, exp) + + s = klass([Timestamp('2000-01-05 00:15:00'), + Timestamp('2000-01-31 00:23:00'), + Timestamp('2000-01-01'), + Timestamp('2000-03-31'), + Timestamp('2000-02-29'), + Timestamp('2000-12-31'), + Timestamp('2000-05-15'), + Timestamp('2001-06-15')]) + + # DateOffset relativedelta fastpath + relative_kwargs = [('years', 2), ('months', 5), ('days', 3), + ('hours', 5), ('minutes', 10), ('seconds', 2), + ('microseconds', 5)] + for i, kwd in enumerate(relative_kwargs): + op = pd.DateOffset(**dict([kwd])) + assert_func(klass([x + op for x in s]), s + op) + assert_func(klass([x - op for x in s]), s - op) + op = pd.DateOffset(**dict(relative_kwargs[:i + 1])) + assert_func(klass([x + op for x in s]), s + op) + assert_func(klass([x - op for x in s]), s - op) + + # assert these are equal on a piecewise basis + offsets = ['YearBegin', ('YearBegin', {'month': 5}), 'YearEnd', + ('YearEnd', {'month': 5}), 'MonthBegin', 'MonthEnd', + 'SemiMonthEnd', 'SemiMonthBegin', + 'Week', ('Week', { + 'weekday': 3 + }), 'BusinessDay', 'BDay', 'QuarterEnd', 'QuarterBegin', + 'CustomBusinessDay', 'CDay', 'CBMonthEnd', + 'CBMonthBegin', 'BMonthBegin', 'BMonthEnd', + 'BusinessHour', 'BYearBegin', 'BYearEnd', + 'BQuarterBegin', ('LastWeekOfMonth', { + 'weekday': 2 + }), ('FY5253Quarter', {'qtr_with_extra_week': 1, + 'startingMonth': 1, + 'weekday': 2, + 'variation': 'nearest'}), + ('FY5253', {'weekday': 0, + 'startingMonth': 2, + 'variation': + 'nearest'}), ('WeekOfMonth', {'weekday': 2, + 'week': 2}), + 'Easter', ('DateOffset', {'day': 4}), + ('DateOffset', {'month': 5})] + + with warnings.catch_warnings(record=True): + for normalize in (True, False): + for do in offsets: + if isinstance(do, tuple): + do, kwargs = do + else: + do = do + kwargs = {} + + for n in [0, 5]: + if (do in ['WeekOfMonth', 'LastWeekOfMonth', + 'FY5253Quarter', 'FY5253'] and n == 0): + continue + op = getattr(pd.offsets, do)(n, + normalize=normalize, + **kwargs) + assert_func(klass([x + op for x in s]), s + op) + assert_func(klass([x - op for x in s]), s - op) + assert_func(klass([op + x for x in s]), op + s) diff --git a/pandas/tests/indexes/datetimes/test_setops.py b/pandas/tests/indexes/datetimes/test_setops.py new file mode 100644 index 0000000000000..ba6beb03c7f24 --- /dev/null +++ b/pandas/tests/indexes/datetimes/test_setops.py @@ -0,0 +1,168 @@ +import numpy as np + +import pandas as pd +import pandas.util.testing as tm +from pandas import (DatetimeIndex, date_range, Series, bdate_range, DataFrame, + Int64Index) + + +class TestDatetimeIndex(tm.TestCase): + _multiprocess_can_split_ = True + + def test_union(self): + i1 = Int64Index(np.arange(0, 20, 2)) + i2 = Int64Index(np.arange(10, 30, 2)) + result = i1.union(i2) + expected = Int64Index(np.arange(0, 30, 2)) + tm.assert_index_equal(result, expected) + + def test_union_coverage(self): + idx = DatetimeIndex(['2000-01-03', '2000-01-01', '2000-01-02']) + ordered = DatetimeIndex(idx.sort_values(), freq='infer') + result = ordered.union(idx) + tm.assert_index_equal(result, ordered) + + result = ordered[:0].union(ordered) + tm.assert_index_equal(result, ordered) + self.assertEqual(result.freq, ordered.freq) + + def test_union_bug_1730(self): + rng_a = date_range('1/1/2012', periods=4, freq='3H') + rng_b = date_range('1/1/2012', periods=4, freq='4H') + + result = rng_a.union(rng_b) + exp = DatetimeIndex(sorted(set(list(rng_a)) | set(list(rng_b)))) + tm.assert_index_equal(result, exp) + + def test_union_bug_1745(self): + left = DatetimeIndex(['2012-05-11 15:19:49.695000']) + right = DatetimeIndex(['2012-05-29 13:04:21.322000', + '2012-05-11 15:27:24.873000', + '2012-05-11 15:31:05.350000']) + + result = left.union(right) + exp = DatetimeIndex(sorted(set(list(left)) | set(list(right)))) + tm.assert_index_equal(result, exp) + + def test_union_bug_4564(self): + from pandas import DateOffset + left = date_range("2013-01-01", "2013-02-01") + right = left + DateOffset(minutes=15) + + result = left.union(right) + exp = DatetimeIndex(sorted(set(list(left)) | set(list(right)))) + tm.assert_index_equal(result, exp) + + def test_union_freq_both_none(self): + # GH11086 + expected = bdate_range('20150101', periods=10) + expected.freq = None + + result = expected.union(expected) + tm.assert_index_equal(result, expected) + self.assertIsNone(result.freq) + + def test_union_dataframe_index(self): + rng1 = date_range('1/1/1999', '1/1/2012', freq='MS') + s1 = Series(np.random.randn(len(rng1)), rng1) + + rng2 = date_range('1/1/1980', '12/1/2001', freq='MS') + s2 = Series(np.random.randn(len(rng2)), rng2) + df = DataFrame({'s1': s1, 's2': s2}) + + exp = pd.date_range('1/1/1980', '1/1/2012', freq='MS') + tm.assert_index_equal(df.index, exp) + + def test_union_with_DatetimeIndex(self): + i1 = Int64Index(np.arange(0, 20, 2)) + i2 = DatetimeIndex(start='2012-01-03 00:00:00', periods=10, freq='D') + i1.union(i2) # Works + i2.union(i1) # Fails with "AttributeError: can't set attribute" + + def test_intersection(self): + # GH 4690 (with tz) + for tz in [None, 'Asia/Tokyo', 'US/Eastern', 'dateutil/US/Pacific']: + base = date_range('6/1/2000', '6/30/2000', freq='D', name='idx') + + # if target has the same name, it is preserved + rng2 = date_range('5/15/2000', '6/20/2000', freq='D', name='idx') + expected2 = date_range('6/1/2000', '6/20/2000', freq='D', + name='idx') + + # if target name is different, it will be reset + rng3 = date_range('5/15/2000', '6/20/2000', freq='D', name='other') + expected3 = date_range('6/1/2000', '6/20/2000', freq='D', + name=None) + + rng4 = date_range('7/1/2000', '7/31/2000', freq='D', name='idx') + expected4 = DatetimeIndex([], name='idx') + + for (rng, expected) in [(rng2, expected2), (rng3, expected3), + (rng4, expected4)]: + result = base.intersection(rng) + tm.assert_index_equal(result, expected) + self.assertEqual(result.name, expected.name) + self.assertEqual(result.freq, expected.freq) + self.assertEqual(result.tz, expected.tz) + + # non-monotonic + base = DatetimeIndex(['2011-01-05', '2011-01-04', + '2011-01-02', '2011-01-03'], + tz=tz, name='idx') + + rng2 = DatetimeIndex(['2011-01-04', '2011-01-02', + '2011-02-02', '2011-02-03'], + tz=tz, name='idx') + expected2 = DatetimeIndex( + ['2011-01-04', '2011-01-02'], tz=tz, name='idx') + + rng3 = DatetimeIndex(['2011-01-04', '2011-01-02', + '2011-02-02', '2011-02-03'], + tz=tz, name='other') + expected3 = DatetimeIndex( + ['2011-01-04', '2011-01-02'], tz=tz, name=None) + + # GH 7880 + rng4 = date_range('7/1/2000', '7/31/2000', freq='D', tz=tz, + name='idx') + expected4 = DatetimeIndex([], tz=tz, name='idx') + + for (rng, expected) in [(rng2, expected2), (rng3, expected3), + (rng4, expected4)]: + result = base.intersection(rng) + tm.assert_index_equal(result, expected) + self.assertEqual(result.name, expected.name) + self.assertIsNone(result.freq) + self.assertEqual(result.tz, expected.tz) + + # empty same freq GH2129 + rng = date_range('6/1/2000', '6/15/2000', freq='T') + result = rng[0:0].intersection(rng) + self.assertEqual(len(result), 0) + + result = rng.intersection(rng[0:0]) + self.assertEqual(len(result), 0) + + def test_intersection_bug_1708(self): + from pandas import DateOffset + index_1 = date_range('1/1/2012', periods=4, freq='12H') + index_2 = index_1 + DateOffset(hours=1) + + result = index_1 & index_2 + self.assertEqual(len(result), 0) + + def test_difference_freq(self): + # GH14323: difference of DatetimeIndex should not preserve frequency + + index = date_range("20160920", "20160925", freq="D") + other = date_range("20160921", "20160924", freq="D") + expected = DatetimeIndex(["20160920", "20160925"], freq=None) + idx_diff = index.difference(other) + tm.assert_index_equal(idx_diff, expected) + tm.assert_attr_equal('freq', idx_diff, expected) + + other = date_range("20160922", "20160925", freq="D") + idx_diff = index.difference(other) + expected = DatetimeIndex(["20160920", "20160921"], freq=None) + tm.assert_index_equal(idx_diff, expected) + tm.assert_attr_equal('freq', idx_diff, expected) diff --git a/pandas/tests/indexes/test_datetimelike.py b/pandas/tests/indexes/test_datetimelike.py index 2cd73ec8d254a..32e4029a57fe9 100644 --- a/pandas/tests/indexes/test_datetimelike.py +++ b/pandas/tests/indexes/test_datetimelike.py @@ -1,18 +1,15 @@ # -*- coding: utf-8 -*- -from datetime import datetime, timedelta, time, date - import numpy as np +from datetime import timedelta +import pandas as pd +import pandas.util.testing as tm from pandas import (DatetimeIndex, Float64Index, Index, Int64Index, NaT, Period, PeriodIndex, Series, Timedelta, TimedeltaIndex, date_range, period_range, timedelta_range, notnull) -import pandas.util.testing as tm - -import pandas as pd -from pandas.tslib import Timestamp, OutOfBoundsDatetime from .common import Base @@ -88,553 +85,9 @@ def test_shift(self): '2013-01-11'], freq='D') self.assert_index_equal(result, expected) - def test_construction_with_alt(self): - - i = pd.date_range('20130101', periods=5, freq='H', tz='US/Eastern') - i2 = DatetimeIndex(i, dtype=i.dtype) - self.assert_index_equal(i, i2) - - i2 = DatetimeIndex(i.tz_localize(None).asi8, tz=i.dtype.tz) - self.assert_index_equal(i, i2) - - i2 = DatetimeIndex(i.tz_localize(None).asi8, dtype=i.dtype) - self.assert_index_equal(i, i2) - - i2 = DatetimeIndex( - i.tz_localize(None).asi8, dtype=i.dtype, tz=i.dtype.tz) - self.assert_index_equal(i, i2) - - # localize into the provided tz - i2 = DatetimeIndex(i.tz_localize(None).asi8, tz='UTC') - expected = i.tz_localize(None).tz_localize('UTC') - self.assert_index_equal(i2, expected) - - # incompat tz/dtype - self.assertRaises(ValueError, lambda: DatetimeIndex( - i.tz_localize(None).asi8, dtype=i.dtype, tz='US/Pacific')) - def test_pickle_compat_construction(self): pass - def test_construction_index_with_mixed_timezones(self): - # GH 11488 - # no tz results in DatetimeIndex - result = Index([Timestamp('2011-01-01'), - Timestamp('2011-01-02')], name='idx') - exp = DatetimeIndex([Timestamp('2011-01-01'), - Timestamp('2011-01-02')], name='idx') - self.assert_index_equal(result, exp, exact=True) - self.assertTrue(isinstance(result, DatetimeIndex)) - self.assertIsNone(result.tz) - - # same tz results in DatetimeIndex - result = Index([Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), - Timestamp('2011-01-02 10:00', tz='Asia/Tokyo')], - name='idx') - exp = DatetimeIndex( - [Timestamp('2011-01-01 10:00'), Timestamp('2011-01-02 10:00') - ], tz='Asia/Tokyo', name='idx') - self.assert_index_equal(result, exp, exact=True) - self.assertTrue(isinstance(result, DatetimeIndex)) - self.assertIsNotNone(result.tz) - self.assertEqual(result.tz, exp.tz) - - # same tz results in DatetimeIndex (DST) - result = Index([Timestamp('2011-01-01 10:00', tz='US/Eastern'), - Timestamp('2011-08-01 10:00', tz='US/Eastern')], - name='idx') - exp = DatetimeIndex([Timestamp('2011-01-01 10:00'), - Timestamp('2011-08-01 10:00')], - tz='US/Eastern', name='idx') - self.assert_index_equal(result, exp, exact=True) - self.assertTrue(isinstance(result, DatetimeIndex)) - self.assertIsNotNone(result.tz) - self.assertEqual(result.tz, exp.tz) - - # different tz results in Index(dtype=object) - result = Index([Timestamp('2011-01-01 10:00'), - Timestamp('2011-01-02 10:00', tz='US/Eastern')], - name='idx') - exp = Index([Timestamp('2011-01-01 10:00'), - Timestamp('2011-01-02 10:00', tz='US/Eastern')], - dtype='object', name='idx') - self.assert_index_equal(result, exp, exact=True) - self.assertFalse(isinstance(result, DatetimeIndex)) - - result = Index([Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), - Timestamp('2011-01-02 10:00', tz='US/Eastern')], - name='idx') - exp = Index([Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), - Timestamp('2011-01-02 10:00', tz='US/Eastern')], - dtype='object', name='idx') - self.assert_index_equal(result, exp, exact=True) - self.assertFalse(isinstance(result, DatetimeIndex)) - - # length = 1 - result = Index([Timestamp('2011-01-01')], name='idx') - exp = DatetimeIndex([Timestamp('2011-01-01')], name='idx') - self.assert_index_equal(result, exp, exact=True) - self.assertTrue(isinstance(result, DatetimeIndex)) - self.assertIsNone(result.tz) - - # length = 1 with tz - result = Index( - [Timestamp('2011-01-01 10:00', tz='Asia/Tokyo')], name='idx') - exp = DatetimeIndex([Timestamp('2011-01-01 10:00')], tz='Asia/Tokyo', - name='idx') - self.assert_index_equal(result, exp, exact=True) - self.assertTrue(isinstance(result, DatetimeIndex)) - self.assertIsNotNone(result.tz) - self.assertEqual(result.tz, exp.tz) - - def test_construction_index_with_mixed_timezones_with_NaT(self): - # GH 11488 - result = Index([pd.NaT, Timestamp('2011-01-01'), - pd.NaT, Timestamp('2011-01-02')], name='idx') - exp = DatetimeIndex([pd.NaT, Timestamp('2011-01-01'), - pd.NaT, Timestamp('2011-01-02')], name='idx') - self.assert_index_equal(result, exp, exact=True) - self.assertTrue(isinstance(result, DatetimeIndex)) - self.assertIsNone(result.tz) - - # same tz results in DatetimeIndex - result = Index([pd.NaT, Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), - pd.NaT, Timestamp('2011-01-02 10:00', - tz='Asia/Tokyo')], - name='idx') - exp = DatetimeIndex([pd.NaT, Timestamp('2011-01-01 10:00'), - pd.NaT, Timestamp('2011-01-02 10:00')], - tz='Asia/Tokyo', name='idx') - self.assert_index_equal(result, exp, exact=True) - self.assertTrue(isinstance(result, DatetimeIndex)) - self.assertIsNotNone(result.tz) - self.assertEqual(result.tz, exp.tz) - - # same tz results in DatetimeIndex (DST) - result = Index([Timestamp('2011-01-01 10:00', tz='US/Eastern'), - pd.NaT, - Timestamp('2011-08-01 10:00', tz='US/Eastern')], - name='idx') - exp = DatetimeIndex([Timestamp('2011-01-01 10:00'), pd.NaT, - Timestamp('2011-08-01 10:00')], - tz='US/Eastern', name='idx') - self.assert_index_equal(result, exp, exact=True) - self.assertTrue(isinstance(result, DatetimeIndex)) - self.assertIsNotNone(result.tz) - self.assertEqual(result.tz, exp.tz) - - # different tz results in Index(dtype=object) - result = Index([pd.NaT, Timestamp('2011-01-01 10:00'), - pd.NaT, Timestamp('2011-01-02 10:00', - tz='US/Eastern')], - name='idx') - exp = Index([pd.NaT, Timestamp('2011-01-01 10:00'), - pd.NaT, Timestamp('2011-01-02 10:00', tz='US/Eastern')], - dtype='object', name='idx') - self.assert_index_equal(result, exp, exact=True) - self.assertFalse(isinstance(result, DatetimeIndex)) - - result = Index([pd.NaT, Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), - pd.NaT, Timestamp('2011-01-02 10:00', - tz='US/Eastern')], name='idx') - exp = Index([pd.NaT, Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), - pd.NaT, Timestamp('2011-01-02 10:00', tz='US/Eastern')], - dtype='object', name='idx') - self.assert_index_equal(result, exp, exact=True) - self.assertFalse(isinstance(result, DatetimeIndex)) - - # all NaT - result = Index([pd.NaT, pd.NaT], name='idx') - exp = DatetimeIndex([pd.NaT, pd.NaT], name='idx') - self.assert_index_equal(result, exp, exact=True) - self.assertTrue(isinstance(result, DatetimeIndex)) - self.assertIsNone(result.tz) - - # all NaT with tz - result = Index([pd.NaT, pd.NaT], tz='Asia/Tokyo', name='idx') - exp = DatetimeIndex([pd.NaT, pd.NaT], tz='Asia/Tokyo', name='idx') - self.assert_index_equal(result, exp, exact=True) - self.assertTrue(isinstance(result, DatetimeIndex)) - self.assertIsNotNone(result.tz) - self.assertEqual(result.tz, exp.tz) - - def test_construction_dti_with_mixed_timezones(self): - # GH 11488 (not changed, added explicit tests) - - # no tz results in DatetimeIndex - result = DatetimeIndex( - [Timestamp('2011-01-01'), Timestamp('2011-01-02')], name='idx') - exp = DatetimeIndex( - [Timestamp('2011-01-01'), Timestamp('2011-01-02')], name='idx') - self.assert_index_equal(result, exp, exact=True) - self.assertTrue(isinstance(result, DatetimeIndex)) - - # same tz results in DatetimeIndex - result = DatetimeIndex([Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), - Timestamp('2011-01-02 10:00', - tz='Asia/Tokyo')], - name='idx') - exp = DatetimeIndex([Timestamp('2011-01-01 10:00'), - Timestamp('2011-01-02 10:00')], - tz='Asia/Tokyo', name='idx') - self.assert_index_equal(result, exp, exact=True) - self.assertTrue(isinstance(result, DatetimeIndex)) - - # same tz results in DatetimeIndex (DST) - result = DatetimeIndex([Timestamp('2011-01-01 10:00', tz='US/Eastern'), - Timestamp('2011-08-01 10:00', - tz='US/Eastern')], - name='idx') - exp = DatetimeIndex([Timestamp('2011-01-01 10:00'), - Timestamp('2011-08-01 10:00')], - tz='US/Eastern', name='idx') - self.assert_index_equal(result, exp, exact=True) - self.assertTrue(isinstance(result, DatetimeIndex)) - - # different tz coerces tz-naive to tz-awareIndex(dtype=object) - result = DatetimeIndex([Timestamp('2011-01-01 10:00'), - Timestamp('2011-01-02 10:00', - tz='US/Eastern')], name='idx') - exp = DatetimeIndex([Timestamp('2011-01-01 05:00'), - Timestamp('2011-01-02 10:00')], - tz='US/Eastern', name='idx') - self.assert_index_equal(result, exp, exact=True) - self.assertTrue(isinstance(result, DatetimeIndex)) - - # tz mismatch affecting to tz-aware raises TypeError/ValueError - - with tm.assertRaises(ValueError): - DatetimeIndex([Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), - Timestamp('2011-01-02 10:00', tz='US/Eastern')], - name='idx') - - with tm.assertRaisesRegexp(TypeError, 'data is already tz-aware'): - DatetimeIndex([Timestamp('2011-01-01 10:00'), - Timestamp('2011-01-02 10:00', tz='US/Eastern')], - tz='Asia/Tokyo', name='idx') - - with tm.assertRaises(ValueError): - DatetimeIndex([Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), - Timestamp('2011-01-02 10:00', tz='US/Eastern')], - tz='US/Eastern', name='idx') - - with tm.assertRaisesRegexp(TypeError, 'data is already tz-aware'): - # passing tz should results in DatetimeIndex, then mismatch raises - # TypeError - Index([pd.NaT, Timestamp('2011-01-01 10:00'), - pd.NaT, Timestamp('2011-01-02 10:00', tz='US/Eastern')], - tz='Asia/Tokyo', name='idx') - - def test_construction_base_constructor(self): - arr = [pd.Timestamp('2011-01-01'), pd.NaT, pd.Timestamp('2011-01-03')] - tm.assert_index_equal(pd.Index(arr), pd.DatetimeIndex(arr)) - tm.assert_index_equal(pd.Index(np.array(arr)), - pd.DatetimeIndex(np.array(arr))) - - arr = [np.nan, pd.NaT, pd.Timestamp('2011-01-03')] - tm.assert_index_equal(pd.Index(arr), pd.DatetimeIndex(arr)) - tm.assert_index_equal(pd.Index(np.array(arr)), - pd.DatetimeIndex(np.array(arr))) - - def test_construction_outofbounds(self): - # GH 13663 - dates = [datetime(3000, 1, 1), datetime(4000, 1, 1), - datetime(5000, 1, 1), datetime(6000, 1, 1)] - exp = Index(dates, dtype=object) - # coerces to object - tm.assert_index_equal(Index(dates), exp) - - with tm.assertRaises(OutOfBoundsDatetime): - # can't create DatetimeIndex - DatetimeIndex(dates) - - def test_construction_with_ndarray(self): - # GH 5152 - dates = [datetime(2013, 10, 7), - datetime(2013, 10, 8), - datetime(2013, 10, 9)] - data = DatetimeIndex(dates, freq=pd.tseries.frequencies.BDay()).values - result = DatetimeIndex(data, freq=pd.tseries.frequencies.BDay()) - expected = DatetimeIndex(['2013-10-07', - '2013-10-08', - '2013-10-09'], - freq='B') - tm.assert_index_equal(result, expected) - - def test_astype(self): - # GH 13149, GH 13209 - idx = DatetimeIndex(['2016-05-16', 'NaT', NaT, np.NaN]) - - result = idx.astype(object) - expected = Index([Timestamp('2016-05-16')] + [NaT] * 3, dtype=object) - tm.assert_index_equal(result, expected) - - result = idx.astype(int) - expected = Int64Index([1463356800000000000] + - [-9223372036854775808] * 3, dtype=np.int64) - tm.assert_index_equal(result, expected) - - rng = date_range('1/1/2000', periods=10) - result = rng.astype('i8') - self.assert_index_equal(result, Index(rng.asi8)) - self.assert_numpy_array_equal(result.values, rng.asi8) - - def test_astype_with_tz(self): - - # with tz - rng = date_range('1/1/2000', periods=10, tz='US/Eastern') - result = rng.astype('datetime64[ns]') - expected = (date_range('1/1/2000', periods=10, - tz='US/Eastern') - .tz_convert('UTC').tz_localize(None)) - tm.assert_index_equal(result, expected) - - # BUG#10442 : testing astype(str) is correct for Series/DatetimeIndex - result = pd.Series(pd.date_range('2012-01-01', periods=3)).astype(str) - expected = pd.Series( - ['2012-01-01', '2012-01-02', '2012-01-03'], dtype=object) - tm.assert_series_equal(result, expected) - - result = Series(pd.date_range('2012-01-01', periods=3, - tz='US/Eastern')).astype(str) - expected = Series(['2012-01-01 00:00:00-05:00', - '2012-01-02 00:00:00-05:00', - '2012-01-03 00:00:00-05:00'], - dtype=object) - tm.assert_series_equal(result, expected) - - def test_astype_str_compat(self): - # GH 13149, GH 13209 - # verify that we are returing NaT as a string (and not unicode) - - idx = DatetimeIndex(['2016-05-16', 'NaT', NaT, np.NaN]) - result = idx.astype(str) - expected = Index(['2016-05-16', 'NaT', 'NaT', 'NaT'], dtype=object) - tm.assert_index_equal(result, expected) - - def test_astype_str(self): - # test astype string - #10442 - result = date_range('2012-01-01', periods=4, - name='test_name').astype(str) - expected = Index(['2012-01-01', '2012-01-02', '2012-01-03', - '2012-01-04'], name='test_name', dtype=object) - tm.assert_index_equal(result, expected) - - # test astype string with tz and name - result = date_range('2012-01-01', periods=3, name='test_name', - tz='US/Eastern').astype(str) - expected = Index(['2012-01-01 00:00:00-05:00', - '2012-01-02 00:00:00-05:00', - '2012-01-03 00:00:00-05:00'], - name='test_name', dtype=object) - tm.assert_index_equal(result, expected) - - # test astype string with freqH and name - result = date_range('1/1/2011', periods=3, freq='H', - name='test_name').astype(str) - expected = Index(['2011-01-01 00:00:00', '2011-01-01 01:00:00', - '2011-01-01 02:00:00'], - name='test_name', dtype=object) - tm.assert_index_equal(result, expected) - - # test astype string with freqH and timezone - result = date_range('3/6/2012 00:00', periods=2, freq='H', - tz='Europe/London', name='test_name').astype(str) - expected = Index(['2012-03-06 00:00:00+00:00', - '2012-03-06 01:00:00+00:00'], - dtype=object, name='test_name') - tm.assert_index_equal(result, expected) - - def test_astype_datetime64(self): - # GH 13149, GH 13209 - idx = DatetimeIndex(['2016-05-16', 'NaT', NaT, np.NaN]) - - result = idx.astype('datetime64[ns]') - tm.assert_index_equal(result, idx) - self.assertFalse(result is idx) - - result = idx.astype('datetime64[ns]', copy=False) - tm.assert_index_equal(result, idx) - self.assertTrue(result is idx) - - idx_tz = DatetimeIndex(['2016-05-16', 'NaT', NaT, np.NaN], tz='EST') - result = idx_tz.astype('datetime64[ns]') - expected = DatetimeIndex(['2016-05-16 05:00:00', 'NaT', 'NaT', 'NaT'], - dtype='datetime64[ns]') - tm.assert_index_equal(result, expected) - - def test_astype_raises(self): - # GH 13149, GH 13209 - idx = DatetimeIndex(['2016-05-16', 'NaT', NaT, np.NaN]) - - self.assertRaises(ValueError, idx.astype, float) - self.assertRaises(ValueError, idx.astype, 'timedelta64') - self.assertRaises(ValueError, idx.astype, 'timedelta64[ns]') - self.assertRaises(ValueError, idx.astype, 'datetime64') - self.assertRaises(ValueError, idx.astype, 'datetime64[D]') - - def test_where_other(self): - - # other is ndarray or Index - i = pd.date_range('20130101', periods=3, tz='US/Eastern') - - for arr in [np.nan, pd.NaT]: - result = i.where(notnull(i), other=np.nan) - expected = i - tm.assert_index_equal(result, expected) - - i2 = i.copy() - i2 = Index([pd.NaT, pd.NaT] + i[2:].tolist()) - result = i.where(notnull(i2), i2) - tm.assert_index_equal(result, i2) - - i2 = i.copy() - i2 = Index([pd.NaT, pd.NaT] + i[2:].tolist()) - result = i.where(notnull(i2), i2.values) - tm.assert_index_equal(result, i2) - - def test_where_tz(self): - i = pd.date_range('20130101', periods=3, tz='US/Eastern') - result = i.where(notnull(i)) - expected = i - tm.assert_index_equal(result, expected) - - i2 = i.copy() - i2 = Index([pd.NaT, pd.NaT] + i[2:].tolist()) - result = i.where(notnull(i2)) - expected = i2 - tm.assert_index_equal(result, expected) - - def test_get_loc(self): - idx = pd.date_range('2000-01-01', periods=3) - - for method in [None, 'pad', 'backfill', 'nearest']: - self.assertEqual(idx.get_loc(idx[1], method), 1) - self.assertEqual(idx.get_loc(idx[1].to_pydatetime(), method), 1) - self.assertEqual(idx.get_loc(str(idx[1]), method), 1) - if method is not None: - self.assertEqual(idx.get_loc(idx[1], method, - tolerance=pd.Timedelta('0 days')), - 1) - - self.assertEqual(idx.get_loc('2000-01-01', method='nearest'), 0) - self.assertEqual(idx.get_loc('2000-01-01T12', method='nearest'), 1) - - self.assertEqual(idx.get_loc('2000-01-01T12', method='nearest', - tolerance='1 day'), 1) - self.assertEqual(idx.get_loc('2000-01-01T12', method='nearest', - tolerance=pd.Timedelta('1D')), 1) - self.assertEqual(idx.get_loc('2000-01-01T12', method='nearest', - tolerance=np.timedelta64(1, 'D')), 1) - self.assertEqual(idx.get_loc('2000-01-01T12', method='nearest', - tolerance=timedelta(1)), 1) - with tm.assertRaisesRegexp(ValueError, 'must be convertible'): - idx.get_loc('2000-01-01T12', method='nearest', tolerance='foo') - with tm.assertRaises(KeyError): - idx.get_loc('2000-01-01T03', method='nearest', tolerance='2 hours') - - self.assertEqual(idx.get_loc('2000', method='nearest'), slice(0, 3)) - self.assertEqual(idx.get_loc('2000-01', method='nearest'), slice(0, 3)) - - self.assertEqual(idx.get_loc('1999', method='nearest'), 0) - self.assertEqual(idx.get_loc('2001', method='nearest'), 2) - - with tm.assertRaises(KeyError): - idx.get_loc('1999', method='pad') - with tm.assertRaises(KeyError): - idx.get_loc('2001', method='backfill') - - with tm.assertRaises(KeyError): - idx.get_loc('foobar') - with tm.assertRaises(TypeError): - idx.get_loc(slice(2)) - - idx = pd.to_datetime(['2000-01-01', '2000-01-04']) - self.assertEqual(idx.get_loc('2000-01-02', method='nearest'), 0) - self.assertEqual(idx.get_loc('2000-01-03', method='nearest'), 1) - self.assertEqual(idx.get_loc('2000-01', method='nearest'), slice(0, 2)) - - # time indexing - idx = pd.date_range('2000-01-01', periods=24, freq='H') - tm.assert_numpy_array_equal(idx.get_loc(time(12)), - np.array([12]), check_dtype=False) - tm.assert_numpy_array_equal(idx.get_loc(time(12, 30)), - np.array([]), check_dtype=False) - with tm.assertRaises(NotImplementedError): - idx.get_loc(time(12, 30), method='pad') - - def test_get_indexer(self): - idx = pd.date_range('2000-01-01', periods=3) - exp = np.array([0, 1, 2], dtype=np.intp) - tm.assert_numpy_array_equal(idx.get_indexer(idx), exp) - - target = idx[0] + pd.to_timedelta(['-1 hour', '12 hours', - '1 day 1 hour']) - tm.assert_numpy_array_equal(idx.get_indexer(target, 'pad'), - np.array([-1, 0, 1], dtype=np.intp)) - tm.assert_numpy_array_equal(idx.get_indexer(target, 'backfill'), - np.array([0, 1, 2], dtype=np.intp)) - tm.assert_numpy_array_equal(idx.get_indexer(target, 'nearest'), - np.array([0, 1, 1], dtype=np.intp)) - tm.assert_numpy_array_equal( - idx.get_indexer(target, 'nearest', - tolerance=pd.Timedelta('1 hour')), - np.array([0, -1, 1], dtype=np.intp)) - with tm.assertRaises(ValueError): - idx.get_indexer(idx[[0]], method='nearest', tolerance='foo') - - def test_roundtrip_pickle_with_tz(self): - - # GH 8367 - # round-trip of timezone - index = date_range('20130101', periods=3, tz='US/Eastern', name='foo') - unpickled = self.round_trip_pickle(index) - self.assert_index_equal(index, unpickled) - - def test_reindex_preserves_tz_if_target_is_empty_list_or_array(self): - # GH7774 - index = date_range('20130101', periods=3, tz='US/Eastern') - self.assertEqual(str(index.reindex([])[0].tz), 'US/Eastern') - self.assertEqual(str(index.reindex(np.array([]))[0].tz), 'US/Eastern') - - def test_time_loc(self): # GH8667 - from datetime import time - from pandas.index import _SIZE_CUTOFF - - ns = _SIZE_CUTOFF + np.array([-100, 100], dtype=np.int64) - key = time(15, 11, 30) - start = key.hour * 3600 + key.minute * 60 + key.second - step = 24 * 3600 - - for n in ns: - idx = pd.date_range('2014-11-26', periods=n, freq='S') - ts = pd.Series(np.random.randn(n), index=idx) - i = np.arange(start, n, step) - - tm.assert_numpy_array_equal(ts.index.get_loc(key), i, - check_dtype=False) - tm.assert_series_equal(ts[key], ts.iloc[i]) - - left, right = ts.copy(), ts.copy() - left[key] *= -10 - right.iloc[i] *= -10 - tm.assert_series_equal(left, right) - - def test_time_overflow_for_32bit_machines(self): - # GH8943. On some machines NumPy defaults to np.int32 (for example, - # 32-bit Linux machines). In the function _generate_regular_range - # found in tseries/index.py, `periods` gets multiplied by `strides` - # (which has value 1e9) and since the max value for np.int32 is ~2e9, - # and since those machines won't promote np.int32 to np.int64, we get - # overflow. - periods = np.int_(1000) - - idx1 = pd.date_range(start='2000', periods=periods, freq='S') - self.assertEqual(len(idx1), periods) - - idx2 = pd.date_range(end='2000', periods=periods, freq='S') - self.assertEqual(len(idx2), periods) - def test_intersection(self): first = self.index second = self.index[5:] @@ -665,122 +118,6 @@ def test_union(self): result = first.union(case) self.assertTrue(tm.equalContents(result, everything)) - def test_nat(self): - self.assertIs(DatetimeIndex([np.nan])[0], pd.NaT) - - def test_ufunc_coercions(self): - idx = date_range('2011-01-01', periods=3, freq='2D', name='x') - - delta = np.timedelta64(1, 'D') - for result in [idx + delta, np.add(idx, delta)]: - tm.assertIsInstance(result, DatetimeIndex) - exp = date_range('2011-01-02', periods=3, freq='2D', name='x') - tm.assert_index_equal(result, exp) - self.assertEqual(result.freq, '2D') - - for result in [idx - delta, np.subtract(idx, delta)]: - tm.assertIsInstance(result, DatetimeIndex) - exp = date_range('2010-12-31', periods=3, freq='2D', name='x') - tm.assert_index_equal(result, exp) - self.assertEqual(result.freq, '2D') - - delta = np.array([np.timedelta64(1, 'D'), np.timedelta64(2, 'D'), - np.timedelta64(3, 'D')]) - for result in [idx + delta, np.add(idx, delta)]: - tm.assertIsInstance(result, DatetimeIndex) - exp = DatetimeIndex(['2011-01-02', '2011-01-05', '2011-01-08'], - freq='3D', name='x') - tm.assert_index_equal(result, exp) - self.assertEqual(result.freq, '3D') - - for result in [idx - delta, np.subtract(idx, delta)]: - tm.assertIsInstance(result, DatetimeIndex) - exp = DatetimeIndex(['2010-12-31', '2011-01-01', '2011-01-02'], - freq='D', name='x') - tm.assert_index_equal(result, exp) - self.assertEqual(result.freq, 'D') - - def test_fillna_datetime64(self): - # GH 11343 - for tz in ['US/Eastern', 'Asia/Tokyo']: - idx = pd.DatetimeIndex(['2011-01-01 09:00', pd.NaT, - '2011-01-01 11:00']) - - exp = pd.DatetimeIndex(['2011-01-01 09:00', '2011-01-01 10:00', - '2011-01-01 11:00']) - self.assert_index_equal( - idx.fillna(pd.Timestamp('2011-01-01 10:00')), exp) - - # tz mismatch - exp = pd.Index([pd.Timestamp('2011-01-01 09:00'), - pd.Timestamp('2011-01-01 10:00', tz=tz), - pd.Timestamp('2011-01-01 11:00')], dtype=object) - self.assert_index_equal( - idx.fillna(pd.Timestamp('2011-01-01 10:00', tz=tz)), exp) - - # object - exp = pd.Index([pd.Timestamp('2011-01-01 09:00'), 'x', - pd.Timestamp('2011-01-01 11:00')], dtype=object) - self.assert_index_equal(idx.fillna('x'), exp) - - idx = pd.DatetimeIndex(['2011-01-01 09:00', pd.NaT, - '2011-01-01 11:00'], tz=tz) - - exp = pd.DatetimeIndex(['2011-01-01 09:00', '2011-01-01 10:00', - '2011-01-01 11:00'], tz=tz) - self.assert_index_equal( - idx.fillna(pd.Timestamp('2011-01-01 10:00', tz=tz)), exp) - - exp = pd.Index([pd.Timestamp('2011-01-01 09:00', tz=tz), - pd.Timestamp('2011-01-01 10:00'), - pd.Timestamp('2011-01-01 11:00', tz=tz)], - dtype=object) - self.assert_index_equal( - idx.fillna(pd.Timestamp('2011-01-01 10:00')), exp) - - # object - exp = pd.Index([pd.Timestamp('2011-01-01 09:00', tz=tz), - 'x', - pd.Timestamp('2011-01-01 11:00', tz=tz)], - dtype=object) - self.assert_index_equal(idx.fillna('x'), exp) - - def test_difference_freq(self): - # GH14323: difference of DatetimeIndex should not preserve frequency - - index = date_range("20160920", "20160925", freq="D") - other = date_range("20160921", "20160924", freq="D") - expected = DatetimeIndex(["20160920", "20160925"], freq=None) - idx_diff = index.difference(other) - tm.assert_index_equal(idx_diff, expected) - tm.assert_attr_equal('freq', idx_diff, expected) - - other = date_range("20160922", "20160925", freq="D") - idx_diff = index.difference(other) - expected = DatetimeIndex(["20160920", "20160921"], freq=None) - tm.assert_index_equal(idx_diff, expected) - tm.assert_attr_equal('freq', idx_diff, expected) - - def test_week_of_month_frequency(self): - # GH 5348: "ValueError: Could not evaluate WOM-1SUN" shouldn't raise - d1 = date(2002, 9, 1) - d2 = date(2013, 10, 27) - d3 = date(2012, 9, 30) - idx1 = DatetimeIndex([d1, d2]) - idx2 = DatetimeIndex([d3]) - result_append = idx1.append(idx2) - expected = DatetimeIndex([d1, d2, d3]) - tm.assert_index_equal(result_append, expected) - result_union = idx1.union(idx2) - expected = DatetimeIndex([d1, d3, d2]) - tm.assert_index_equal(result_union, expected) - - # GH 5115 - result = date_range("2013-1-1", periods=4, freq='WOM-1SAT') - dates = ['2013-01-05', '2013-02-02', '2013-03-02', '2013-04-06'] - expected = DatetimeIndex(dates, freq='WOM-1SAT') - tm.assert_index_equal(result, expected) - class TestPeriodIndex(DatetimeLike, tm.TestCase): _holder = PeriodIndex diff --git a/pandas/tseries/tests/test_base.py b/pandas/tseries/tests/test_base.py index bca50237081e1..4f2ac3ff0d87e 100644 --- a/pandas/tseries/tests/test_base.py +++ b/pandas/tseries/tests/test_base.py @@ -1,5 +1,5 @@ from __future__ import print_function -from datetime import datetime, timedelta +from datetime import timedelta import numpy as np import pandas as pd from pandas import (Series, Index, Int64Index, Timestamp, Period, @@ -14,901 +14,6 @@ from pandas.tests.test_base import Ops -class TestDatetimeIndexOps(Ops): - tz = [None, 'UTC', 'Asia/Tokyo', 'US/Eastern', 'dateutil/Asia/Singapore', - 'dateutil/US/Pacific'] - - def setUp(self): - super(TestDatetimeIndexOps, self).setUp() - mask = lambda x: (isinstance(x, DatetimeIndex) or - isinstance(x, PeriodIndex)) - self.is_valid_objs = [o for o in self.objs if mask(o)] - self.not_valid_objs = [o for o in self.objs if not mask(o)] - - def test_ops_properties(self): - self.check_ops_properties( - ['year', 'month', 'day', 'hour', 'minute', 'second', 'weekofyear', - 'week', 'dayofweek', 'dayofyear', 'quarter']) - self.check_ops_properties(['date', 'time', 'microsecond', 'nanosecond', - 'is_month_start', 'is_month_end', - 'is_quarter_start', - 'is_quarter_end', 'is_year_start', - 'is_year_end', 'weekday_name'], - lambda x: isinstance(x, DatetimeIndex)) - - def test_ops_properties_basic(self): - - # sanity check that the behavior didn't change - # GH7206 - for op in ['year', 'day', 'second', 'weekday']: - self.assertRaises(TypeError, lambda x: getattr(self.dt_series, op)) - - # attribute access should still work! - s = Series(dict(year=2000, month=1, day=10)) - self.assertEqual(s.year, 2000) - self.assertEqual(s.month, 1) - self.assertEqual(s.day, 10) - self.assertRaises(AttributeError, lambda: s.weekday) - - def test_asobject_tolist(self): - idx = pd.date_range(start='2013-01-01', periods=4, freq='M', - name='idx') - expected_list = [Timestamp('2013-01-31'), - Timestamp('2013-02-28'), - Timestamp('2013-03-31'), - Timestamp('2013-04-30')] - expected = pd.Index(expected_list, dtype=object, name='idx') - result = idx.asobject - self.assertTrue(isinstance(result, Index)) - - self.assertEqual(result.dtype, object) - self.assert_index_equal(result, expected) - self.assertEqual(result.name, expected.name) - self.assertEqual(idx.tolist(), expected_list) - - idx = pd.date_range(start='2013-01-01', periods=4, freq='M', - name='idx', tz='Asia/Tokyo') - expected_list = [Timestamp('2013-01-31', tz='Asia/Tokyo'), - Timestamp('2013-02-28', tz='Asia/Tokyo'), - Timestamp('2013-03-31', tz='Asia/Tokyo'), - Timestamp('2013-04-30', tz='Asia/Tokyo')] - expected = pd.Index(expected_list, dtype=object, name='idx') - result = idx.asobject - self.assertTrue(isinstance(result, Index)) - self.assertEqual(result.dtype, object) - self.assert_index_equal(result, expected) - self.assertEqual(result.name, expected.name) - self.assertEqual(idx.tolist(), expected_list) - - idx = DatetimeIndex([datetime(2013, 1, 1), datetime(2013, 1, 2), - pd.NaT, datetime(2013, 1, 4)], name='idx') - expected_list = [Timestamp('2013-01-01'), - Timestamp('2013-01-02'), pd.NaT, - Timestamp('2013-01-04')] - expected = pd.Index(expected_list, dtype=object, name='idx') - result = idx.asobject - self.assertTrue(isinstance(result, Index)) - self.assertEqual(result.dtype, object) - self.assert_index_equal(result, expected) - self.assertEqual(result.name, expected.name) - self.assertEqual(idx.tolist(), expected_list) - - def test_minmax(self): - for tz in self.tz: - # monotonic - idx1 = pd.DatetimeIndex(['2011-01-01', '2011-01-02', - '2011-01-03'], tz=tz) - self.assertTrue(idx1.is_monotonic) - - # non-monotonic - idx2 = pd.DatetimeIndex(['2011-01-01', pd.NaT, '2011-01-03', - '2011-01-02', pd.NaT], tz=tz) - self.assertFalse(idx2.is_monotonic) - - for idx in [idx1, idx2]: - self.assertEqual(idx.min(), Timestamp('2011-01-01', tz=tz)) - self.assertEqual(idx.max(), Timestamp('2011-01-03', tz=tz)) - self.assertEqual(idx.argmin(), 0) - self.assertEqual(idx.argmax(), 2) - - for op in ['min', 'max']: - # Return NaT - obj = DatetimeIndex([]) - self.assertTrue(pd.isnull(getattr(obj, op)())) - - obj = DatetimeIndex([pd.NaT]) - self.assertTrue(pd.isnull(getattr(obj, op)())) - - obj = DatetimeIndex([pd.NaT, pd.NaT, pd.NaT]) - self.assertTrue(pd.isnull(getattr(obj, op)())) - - def test_numpy_minmax(self): - dr = pd.date_range(start='2016-01-15', end='2016-01-20') - - self.assertEqual(np.min(dr), - Timestamp('2016-01-15 00:00:00', freq='D')) - self.assertEqual(np.max(dr), - Timestamp('2016-01-20 00:00:00', freq='D')) - - errmsg = "the 'out' parameter is not supported" - tm.assertRaisesRegexp(ValueError, errmsg, np.min, dr, out=0) - tm.assertRaisesRegexp(ValueError, errmsg, np.max, dr, out=0) - - self.assertEqual(np.argmin(dr), 0) - self.assertEqual(np.argmax(dr), 5) - - if not _np_version_under1p10: - errmsg = "the 'out' parameter is not supported" - tm.assertRaisesRegexp(ValueError, errmsg, np.argmin, dr, out=0) - tm.assertRaisesRegexp(ValueError, errmsg, np.argmax, dr, out=0) - - def test_round(self): - for tz in self.tz: - rng = pd.date_range(start='2016-01-01', periods=5, - freq='30Min', tz=tz) - elt = rng[1] - - expected_rng = DatetimeIndex([ - Timestamp('2016-01-01 00:00:00', tz=tz, freq='30T'), - Timestamp('2016-01-01 00:00:00', tz=tz, freq='30T'), - Timestamp('2016-01-01 01:00:00', tz=tz, freq='30T'), - Timestamp('2016-01-01 02:00:00', tz=tz, freq='30T'), - Timestamp('2016-01-01 02:00:00', tz=tz, freq='30T'), - ]) - expected_elt = expected_rng[1] - - tm.assert_index_equal(rng.round(freq='H'), expected_rng) - self.assertEqual(elt.round(freq='H'), expected_elt) - - msg = pd.tseries.frequencies._INVALID_FREQ_ERROR - with tm.assertRaisesRegexp(ValueError, msg): - rng.round(freq='foo') - with tm.assertRaisesRegexp(ValueError, msg): - elt.round(freq='foo') - - msg = " is a non-fixed frequency" - tm.assertRaisesRegexp(ValueError, msg, rng.round, freq='M') - tm.assertRaisesRegexp(ValueError, msg, elt.round, freq='M') - - def test_repeat_range(self): - rng = date_range('1/1/2000', '1/1/2001') - - result = rng.repeat(5) - self.assertIsNone(result.freq) - self.assertEqual(len(result), 5 * len(rng)) - - for tz in self.tz: - index = pd.date_range('2001-01-01', periods=2, freq='D', tz=tz) - exp = pd.DatetimeIndex(['2001-01-01', '2001-01-01', - '2001-01-02', '2001-01-02'], tz=tz) - for res in [index.repeat(2), np.repeat(index, 2)]: - tm.assert_index_equal(res, exp) - self.assertIsNone(res.freq) - - index = pd.date_range('2001-01-01', periods=2, freq='2D', tz=tz) - exp = pd.DatetimeIndex(['2001-01-01', '2001-01-01', - '2001-01-03', '2001-01-03'], tz=tz) - for res in [index.repeat(2), np.repeat(index, 2)]: - tm.assert_index_equal(res, exp) - self.assertIsNone(res.freq) - - index = pd.DatetimeIndex(['2001-01-01', 'NaT', '2003-01-01'], - tz=tz) - exp = pd.DatetimeIndex(['2001-01-01', '2001-01-01', '2001-01-01', - 'NaT', 'NaT', 'NaT', - '2003-01-01', '2003-01-01', '2003-01-01'], - tz=tz) - for res in [index.repeat(3), np.repeat(index, 3)]: - tm.assert_index_equal(res, exp) - self.assertIsNone(res.freq) - - def test_repeat(self): - reps = 2 - msg = "the 'axis' parameter is not supported" - - for tz in self.tz: - rng = pd.date_range(start='2016-01-01', periods=2, - freq='30Min', tz=tz) - - expected_rng = DatetimeIndex([ - Timestamp('2016-01-01 00:00:00', tz=tz, freq='30T'), - Timestamp('2016-01-01 00:00:00', tz=tz, freq='30T'), - Timestamp('2016-01-01 00:30:00', tz=tz, freq='30T'), - Timestamp('2016-01-01 00:30:00', tz=tz, freq='30T'), - ]) - - res = rng.repeat(reps) - tm.assert_index_equal(res, expected_rng) - self.assertIsNone(res.freq) - - tm.assert_index_equal(np.repeat(rng, reps), expected_rng) - tm.assertRaisesRegexp(ValueError, msg, np.repeat, - rng, reps, axis=1) - - def test_representation(self): - - idx = [] - idx.append(DatetimeIndex([], freq='D')) - idx.append(DatetimeIndex(['2011-01-01'], freq='D')) - idx.append(DatetimeIndex(['2011-01-01', '2011-01-02'], freq='D')) - idx.append(DatetimeIndex( - ['2011-01-01', '2011-01-02', '2011-01-03'], freq='D')) - idx.append(DatetimeIndex( - ['2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00' - ], freq='H', tz='Asia/Tokyo')) - idx.append(DatetimeIndex( - ['2011-01-01 09:00', '2011-01-01 10:00', pd.NaT], tz='US/Eastern')) - idx.append(DatetimeIndex( - ['2011-01-01 09:00', '2011-01-01 10:00', pd.NaT], tz='UTC')) - - exp = [] - exp.append("""DatetimeIndex([], dtype='datetime64[ns]', freq='D')""") - exp.append("DatetimeIndex(['2011-01-01'], dtype='datetime64[ns]', " - "freq='D')") - exp.append("DatetimeIndex(['2011-01-01', '2011-01-02'], " - "dtype='datetime64[ns]', freq='D')") - exp.append("DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03'], " - "dtype='datetime64[ns]', freq='D')") - exp.append("DatetimeIndex(['2011-01-01 09:00:00+09:00', " - "'2011-01-01 10:00:00+09:00', '2011-01-01 11:00:00+09:00']" - ", dtype='datetime64[ns, Asia/Tokyo]', freq='H')") - exp.append("DatetimeIndex(['2011-01-01 09:00:00-05:00', " - "'2011-01-01 10:00:00-05:00', 'NaT'], " - "dtype='datetime64[ns, US/Eastern]', freq=None)") - exp.append("DatetimeIndex(['2011-01-01 09:00:00+00:00', " - "'2011-01-01 10:00:00+00:00', 'NaT'], " - "dtype='datetime64[ns, UTC]', freq=None)""") - - with pd.option_context('display.width', 300): - for indx, expected in zip(idx, exp): - for func in ['__repr__', '__unicode__', '__str__']: - result = getattr(indx, func)() - self.assertEqual(result, expected) - - def test_representation_to_series(self): - idx1 = DatetimeIndex([], freq='D') - idx2 = DatetimeIndex(['2011-01-01'], freq='D') - idx3 = DatetimeIndex(['2011-01-01', '2011-01-02'], freq='D') - idx4 = DatetimeIndex( - ['2011-01-01', '2011-01-02', '2011-01-03'], freq='D') - idx5 = DatetimeIndex(['2011-01-01 09:00', '2011-01-01 10:00', - '2011-01-01 11:00'], freq='H', tz='Asia/Tokyo') - idx6 = DatetimeIndex(['2011-01-01 09:00', '2011-01-01 10:00', pd.NaT], - tz='US/Eastern') - idx7 = DatetimeIndex(['2011-01-01 09:00', '2011-01-02 10:15']) - - exp1 = """Series([], dtype: datetime64[ns])""" - - exp2 = """0 2011-01-01 -dtype: datetime64[ns]""" - - exp3 = """0 2011-01-01 -1 2011-01-02 -dtype: datetime64[ns]""" - - exp4 = """0 2011-01-01 -1 2011-01-02 -2 2011-01-03 -dtype: datetime64[ns]""" - - exp5 = """0 2011-01-01 09:00:00+09:00 -1 2011-01-01 10:00:00+09:00 -2 2011-01-01 11:00:00+09:00 -dtype: datetime64[ns, Asia/Tokyo]""" - - exp6 = """0 2011-01-01 09:00:00-05:00 -1 2011-01-01 10:00:00-05:00 -2 NaT -dtype: datetime64[ns, US/Eastern]""" - - exp7 = """0 2011-01-01 09:00:00 -1 2011-01-02 10:15:00 -dtype: datetime64[ns]""" - - with pd.option_context('display.width', 300): - for idx, expected in zip([idx1, idx2, idx3, idx4, - idx5, idx6, idx7], - [exp1, exp2, exp3, exp4, - exp5, exp6, exp7]): - result = repr(Series(idx)) - self.assertEqual(result, expected) - - def test_summary(self): - # GH9116 - idx1 = DatetimeIndex([], freq='D') - idx2 = DatetimeIndex(['2011-01-01'], freq='D') - idx3 = DatetimeIndex(['2011-01-01', '2011-01-02'], freq='D') - idx4 = DatetimeIndex( - ['2011-01-01', '2011-01-02', '2011-01-03'], freq='D') - idx5 = DatetimeIndex(['2011-01-01 09:00', '2011-01-01 10:00', - '2011-01-01 11:00'], - freq='H', tz='Asia/Tokyo') - idx6 = DatetimeIndex(['2011-01-01 09:00', '2011-01-01 10:00', pd.NaT], - tz='US/Eastern') - - exp1 = """DatetimeIndex: 0 entries -Freq: D""" - - exp2 = """DatetimeIndex: 1 entries, 2011-01-01 to 2011-01-01 -Freq: D""" - - exp3 = """DatetimeIndex: 2 entries, 2011-01-01 to 2011-01-02 -Freq: D""" - - exp4 = """DatetimeIndex: 3 entries, 2011-01-01 to 2011-01-03 -Freq: D""" - - exp5 = ("DatetimeIndex: 3 entries, 2011-01-01 09:00:00+09:00 " - "to 2011-01-01 11:00:00+09:00\n" - "Freq: H") - - exp6 = """DatetimeIndex: 3 entries, 2011-01-01 09:00:00-05:00 to NaT""" - - for idx, expected in zip([idx1, idx2, idx3, idx4, idx5, idx6], - [exp1, exp2, exp3, exp4, exp5, exp6]): - result = idx.summary() - self.assertEqual(result, expected) - - def test_resolution(self): - for freq, expected in zip(['A', 'Q', 'M', 'D', 'H', 'T', - 'S', 'L', 'U'], - ['day', 'day', 'day', 'day', 'hour', - 'minute', 'second', 'millisecond', - 'microsecond']): - for tz in self.tz: - idx = pd.date_range(start='2013-04-01', periods=30, freq=freq, - tz=tz) - self.assertEqual(idx.resolution, expected) - - def test_union(self): - for tz in self.tz: - # union - rng1 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz) - other1 = pd.date_range('1/6/2000', freq='D', periods=5, tz=tz) - expected1 = pd.date_range('1/1/2000', freq='D', periods=10, tz=tz) - - rng2 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz) - other2 = pd.date_range('1/4/2000', freq='D', periods=5, tz=tz) - expected2 = pd.date_range('1/1/2000', freq='D', periods=8, tz=tz) - - rng3 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz) - other3 = pd.DatetimeIndex([], tz=tz) - expected3 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz) - - for rng, other, expected in [(rng1, other1, expected1), - (rng2, other2, expected2), - (rng3, other3, expected3)]: - - result_union = rng.union(other) - tm.assert_index_equal(result_union, expected) - - def test_add_iadd(self): - for tz in self.tz: - - # offset - offsets = [pd.offsets.Hour(2), timedelta(hours=2), - np.timedelta64(2, 'h'), Timedelta(hours=2)] - - for delta in offsets: - rng = pd.date_range('2000-01-01', '2000-02-01', tz=tz) - result = rng + delta - expected = pd.date_range('2000-01-01 02:00', - '2000-02-01 02:00', tz=tz) - tm.assert_index_equal(result, expected) - rng += delta - tm.assert_index_equal(rng, expected) - - # int - rng = pd.date_range('2000-01-01 09:00', freq='H', periods=10, - tz=tz) - result = rng + 1 - expected = pd.date_range('2000-01-01 10:00', freq='H', periods=10, - tz=tz) - tm.assert_index_equal(result, expected) - rng += 1 - tm.assert_index_equal(rng, expected) - - idx = DatetimeIndex(['2011-01-01', '2011-01-02']) - msg = "cannot add a datelike to a DatetimeIndex" - with tm.assertRaisesRegexp(TypeError, msg): - idx + Timestamp('2011-01-01') - - with tm.assertRaisesRegexp(TypeError, msg): - Timestamp('2011-01-01') + idx - - def test_add_dti_dti(self): - # previously performed setop (deprecated in 0.16.0), now raises - # TypeError (GH14164) - - dti = date_range('20130101', periods=3) - dti_tz = date_range('20130101', periods=3).tz_localize('US/Eastern') - - with tm.assertRaises(TypeError): - dti + dti - - with tm.assertRaises(TypeError): - dti_tz + dti_tz - - with tm.assertRaises(TypeError): - dti_tz + dti - - with tm.assertRaises(TypeError): - dti + dti_tz - - def test_difference(self): - for tz in self.tz: - # diff - rng1 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz) - other1 = pd.date_range('1/6/2000', freq='D', periods=5, tz=tz) - expected1 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz) - - rng2 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz) - other2 = pd.date_range('1/4/2000', freq='D', periods=5, tz=tz) - expected2 = pd.date_range('1/1/2000', freq='D', periods=3, tz=tz) - - rng3 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz) - other3 = pd.DatetimeIndex([], tz=tz) - expected3 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz) - - for rng, other, expected in [(rng1, other1, expected1), - (rng2, other2, expected2), - (rng3, other3, expected3)]: - result_diff = rng.difference(other) - tm.assert_index_equal(result_diff, expected) - - def test_sub_isub(self): - for tz in self.tz: - - # offset - offsets = [pd.offsets.Hour(2), timedelta(hours=2), - np.timedelta64(2, 'h'), Timedelta(hours=2)] - - for delta in offsets: - rng = pd.date_range('2000-01-01', '2000-02-01', tz=tz) - expected = pd.date_range('1999-12-31 22:00', - '2000-01-31 22:00', tz=tz) - - result = rng - delta - tm.assert_index_equal(result, expected) - rng -= delta - tm.assert_index_equal(rng, expected) - - # int - rng = pd.date_range('2000-01-01 09:00', freq='H', periods=10, - tz=tz) - result = rng - 1 - expected = pd.date_range('2000-01-01 08:00', freq='H', periods=10, - tz=tz) - tm.assert_index_equal(result, expected) - rng -= 1 - tm.assert_index_equal(rng, expected) - - def test_sub_dti_dti(self): - # previously performed setop (deprecated in 0.16.0), now changed to - # return subtraction -> TimeDeltaIndex (GH ...) - - dti = date_range('20130101', periods=3) - dti_tz = date_range('20130101', periods=3).tz_localize('US/Eastern') - dti_tz2 = date_range('20130101', periods=3).tz_localize('UTC') - expected = TimedeltaIndex([0, 0, 0]) - - result = dti - dti - tm.assert_index_equal(result, expected) - - result = dti_tz - dti_tz - tm.assert_index_equal(result, expected) - - with tm.assertRaises(TypeError): - dti_tz - dti - - with tm.assertRaises(TypeError): - dti - dti_tz - - with tm.assertRaises(TypeError): - dti_tz - dti_tz2 - - # isub - dti -= dti - tm.assert_index_equal(dti, expected) - - # different length raises ValueError - dti1 = date_range('20130101', periods=3) - dti2 = date_range('20130101', periods=4) - with tm.assertRaises(ValueError): - dti1 - dti2 - - # NaN propagation - dti1 = DatetimeIndex(['2012-01-01', np.nan, '2012-01-03']) - dti2 = DatetimeIndex(['2012-01-02', '2012-01-03', np.nan]) - expected = TimedeltaIndex(['1 days', np.nan, np.nan]) - result = dti2 - dti1 - tm.assert_index_equal(result, expected) - - def test_sub_period(self): - # GH 13078 - # not supported, check TypeError - p = pd.Period('2011-01-01', freq='D') - - for freq in [None, 'D']: - idx = pd.DatetimeIndex(['2011-01-01', '2011-01-02'], freq=freq) - - with tm.assertRaises(TypeError): - idx - p - - with tm.assertRaises(TypeError): - p - idx - - def test_comp_nat(self): - left = pd.DatetimeIndex([pd.Timestamp('2011-01-01'), pd.NaT, - pd.Timestamp('2011-01-03')]) - right = pd.DatetimeIndex([pd.NaT, pd.NaT, pd.Timestamp('2011-01-03')]) - - for l, r in [(left, right), (left.asobject, right.asobject)]: - result = l == r - expected = np.array([False, False, True]) - tm.assert_numpy_array_equal(result, expected) - - result = l != r - expected = np.array([True, True, False]) - tm.assert_numpy_array_equal(result, expected) - - expected = np.array([False, False, False]) - tm.assert_numpy_array_equal(l == pd.NaT, expected) - tm.assert_numpy_array_equal(pd.NaT == r, expected) - - expected = np.array([True, True, True]) - tm.assert_numpy_array_equal(l != pd.NaT, expected) - tm.assert_numpy_array_equal(pd.NaT != l, expected) - - expected = np.array([False, False, False]) - tm.assert_numpy_array_equal(l < pd.NaT, expected) - tm.assert_numpy_array_equal(pd.NaT > l, expected) - - def test_value_counts_unique(self): - # GH 7735 - for tz in self.tz: - idx = pd.date_range('2011-01-01 09:00', freq='H', periods=10) - # create repeated values, 'n'th element is repeated by n+1 times - idx = DatetimeIndex(np.repeat(idx.values, range(1, len(idx) + 1)), - tz=tz) - - exp_idx = pd.date_range('2011-01-01 18:00', freq='-1H', periods=10, - tz=tz) - expected = Series(range(10, 0, -1), index=exp_idx, dtype='int64') - - for obj in [idx, Series(idx)]: - tm.assert_series_equal(obj.value_counts(), expected) - - expected = pd.date_range('2011-01-01 09:00', freq='H', periods=10, - tz=tz) - tm.assert_index_equal(idx.unique(), expected) - - idx = DatetimeIndex(['2013-01-01 09:00', '2013-01-01 09:00', - '2013-01-01 09:00', '2013-01-01 08:00', - '2013-01-01 08:00', pd.NaT], tz=tz) - - exp_idx = DatetimeIndex(['2013-01-01 09:00', '2013-01-01 08:00'], - tz=tz) - expected = Series([3, 2], index=exp_idx) - - for obj in [idx, Series(idx)]: - tm.assert_series_equal(obj.value_counts(), expected) - - exp_idx = DatetimeIndex(['2013-01-01 09:00', '2013-01-01 08:00', - pd.NaT], tz=tz) - expected = Series([3, 2, 1], index=exp_idx) - - for obj in [idx, Series(idx)]: - tm.assert_series_equal(obj.value_counts(dropna=False), - expected) - - tm.assert_index_equal(idx.unique(), exp_idx) - - def test_nonunique_contains(self): - # GH 9512 - for idx in map(DatetimeIndex, - ([0, 1, 0], [0, 0, -1], [0, -1, -1], - ['2015', '2015', '2016'], ['2015', '2015', '2014'])): - tm.assertIn(idx[0], idx) - - def test_order(self): - # with freq - idx1 = DatetimeIndex(['2011-01-01', '2011-01-02', - '2011-01-03'], freq='D', name='idx') - idx2 = DatetimeIndex(['2011-01-01 09:00', '2011-01-01 10:00', - '2011-01-01 11:00'], freq='H', - tz='Asia/Tokyo', name='tzidx') - - for idx in [idx1, idx2]: - ordered = idx.sort_values() - self.assert_index_equal(ordered, idx) - self.assertEqual(ordered.freq, idx.freq) - - ordered = idx.sort_values(ascending=False) - expected = idx[::-1] - self.assert_index_equal(ordered, expected) - self.assertEqual(ordered.freq, expected.freq) - self.assertEqual(ordered.freq.n, -1) - - ordered, indexer = idx.sort_values(return_indexer=True) - self.assert_index_equal(ordered, idx) - self.assert_numpy_array_equal(indexer, - np.array([0, 1, 2]), - check_dtype=False) - self.assertEqual(ordered.freq, idx.freq) - - ordered, indexer = idx.sort_values(return_indexer=True, - ascending=False) - expected = idx[::-1] - self.assert_index_equal(ordered, expected) - self.assert_numpy_array_equal(indexer, - np.array([2, 1, 0]), - check_dtype=False) - self.assertEqual(ordered.freq, expected.freq) - self.assertEqual(ordered.freq.n, -1) - - # without freq - for tz in self.tz: - idx1 = DatetimeIndex(['2011-01-01', '2011-01-03', '2011-01-05', - '2011-01-02', '2011-01-01'], - tz=tz, name='idx1') - exp1 = DatetimeIndex(['2011-01-01', '2011-01-01', '2011-01-02', - '2011-01-03', '2011-01-05'], - tz=tz, name='idx1') - - idx2 = DatetimeIndex(['2011-01-01', '2011-01-03', '2011-01-05', - '2011-01-02', '2011-01-01'], - tz=tz, name='idx2') - - exp2 = DatetimeIndex(['2011-01-01', '2011-01-01', '2011-01-02', - '2011-01-03', '2011-01-05'], - tz=tz, name='idx2') - - idx3 = DatetimeIndex([pd.NaT, '2011-01-03', '2011-01-05', - '2011-01-02', pd.NaT], tz=tz, name='idx3') - exp3 = DatetimeIndex([pd.NaT, pd.NaT, '2011-01-02', '2011-01-03', - '2011-01-05'], tz=tz, name='idx3') - - for idx, expected in [(idx1, exp1), (idx2, exp2), (idx3, exp3)]: - ordered = idx.sort_values() - self.assert_index_equal(ordered, expected) - self.assertIsNone(ordered.freq) - - ordered = idx.sort_values(ascending=False) - self.assert_index_equal(ordered, expected[::-1]) - self.assertIsNone(ordered.freq) - - ordered, indexer = idx.sort_values(return_indexer=True) - self.assert_index_equal(ordered, expected) - - exp = np.array([0, 4, 3, 1, 2]) - self.assert_numpy_array_equal(indexer, exp, check_dtype=False) - self.assertIsNone(ordered.freq) - - ordered, indexer = idx.sort_values(return_indexer=True, - ascending=False) - self.assert_index_equal(ordered, expected[::-1]) - - exp = np.array([2, 1, 3, 4, 0]) - self.assert_numpy_array_equal(indexer, exp, check_dtype=False) - self.assertIsNone(ordered.freq) - - def test_getitem(self): - idx1 = pd.date_range('2011-01-01', '2011-01-31', freq='D', name='idx') - idx2 = pd.date_range('2011-01-01', '2011-01-31', freq='D', - tz='Asia/Tokyo', name='idx') - - for idx in [idx1, idx2]: - result = idx[0] - self.assertEqual(result, Timestamp('2011-01-01', tz=idx.tz)) - - result = idx[0:5] - expected = pd.date_range('2011-01-01', '2011-01-05', freq='D', - tz=idx.tz, name='idx') - self.assert_index_equal(result, expected) - self.assertEqual(result.freq, expected.freq) - - result = idx[0:10:2] - expected = pd.date_range('2011-01-01', '2011-01-09', freq='2D', - tz=idx.tz, name='idx') - self.assert_index_equal(result, expected) - self.assertEqual(result.freq, expected.freq) - - result = idx[-20:-5:3] - expected = pd.date_range('2011-01-12', '2011-01-24', freq='3D', - tz=idx.tz, name='idx') - self.assert_index_equal(result, expected) - self.assertEqual(result.freq, expected.freq) - - result = idx[4::-1] - expected = DatetimeIndex(['2011-01-05', '2011-01-04', '2011-01-03', - '2011-01-02', '2011-01-01'], - freq='-1D', tz=idx.tz, name='idx') - self.assert_index_equal(result, expected) - self.assertEqual(result.freq, expected.freq) - - def test_drop_duplicates_metadata(self): - # GH 10115 - idx = pd.date_range('2011-01-01', '2011-01-31', freq='D', name='idx') - result = idx.drop_duplicates() - self.assert_index_equal(idx, result) - self.assertEqual(idx.freq, result.freq) - - idx_dup = idx.append(idx) - self.assertIsNone(idx_dup.freq) # freq is reset - result = idx_dup.drop_duplicates() - self.assert_index_equal(idx, result) - self.assertIsNone(result.freq) - - def test_drop_duplicates(self): - # to check Index/Series compat - base = pd.date_range('2011-01-01', '2011-01-31', freq='D', name='idx') - idx = base.append(base[:5]) - - res = idx.drop_duplicates() - tm.assert_index_equal(res, base) - res = Series(idx).drop_duplicates() - tm.assert_series_equal(res, Series(base)) - - res = idx.drop_duplicates(keep='last') - exp = base[5:].append(base[:5]) - tm.assert_index_equal(res, exp) - res = Series(idx).drop_duplicates(keep='last') - tm.assert_series_equal(res, Series(exp, index=np.arange(5, 36))) - - res = idx.drop_duplicates(keep=False) - tm.assert_index_equal(res, base[5:]) - res = Series(idx).drop_duplicates(keep=False) - tm.assert_series_equal(res, Series(base[5:], index=np.arange(5, 31))) - - def test_take(self): - # GH 10295 - idx1 = pd.date_range('2011-01-01', '2011-01-31', freq='D', name='idx') - idx2 = pd.date_range('2011-01-01', '2011-01-31', freq='D', - tz='Asia/Tokyo', name='idx') - - for idx in [idx1, idx2]: - result = idx.take([0]) - self.assertEqual(result, Timestamp('2011-01-01', tz=idx.tz)) - - result = idx.take([0, 1, 2]) - expected = pd.date_range('2011-01-01', '2011-01-03', freq='D', - tz=idx.tz, name='idx') - self.assert_index_equal(result, expected) - self.assertEqual(result.freq, expected.freq) - - result = idx.take([0, 2, 4]) - expected = pd.date_range('2011-01-01', '2011-01-05', freq='2D', - tz=idx.tz, name='idx') - self.assert_index_equal(result, expected) - self.assertEqual(result.freq, expected.freq) - - result = idx.take([7, 4, 1]) - expected = pd.date_range('2011-01-08', '2011-01-02', freq='-3D', - tz=idx.tz, name='idx') - self.assert_index_equal(result, expected) - self.assertEqual(result.freq, expected.freq) - - result = idx.take([3, 2, 5]) - expected = DatetimeIndex(['2011-01-04', '2011-01-03', - '2011-01-06'], - freq=None, tz=idx.tz, name='idx') - self.assert_index_equal(result, expected) - self.assertIsNone(result.freq) - - result = idx.take([-3, 2, 5]) - expected = DatetimeIndex(['2011-01-29', '2011-01-03', - '2011-01-06'], - freq=None, tz=idx.tz, name='idx') - self.assert_index_equal(result, expected) - self.assertIsNone(result.freq) - - def test_take_invalid_kwargs(self): - idx = pd.date_range('2011-01-01', '2011-01-31', freq='D', name='idx') - indices = [1, 6, 5, 9, 10, 13, 15, 3] - - msg = r"take\(\) got an unexpected keyword argument 'foo'" - tm.assertRaisesRegexp(TypeError, msg, idx.take, - indices, foo=2) - - msg = "the 'out' parameter is not supported" - tm.assertRaisesRegexp(ValueError, msg, idx.take, - indices, out=indices) - - msg = "the 'mode' parameter is not supported" - tm.assertRaisesRegexp(ValueError, msg, idx.take, - indices, mode='clip') - - def test_infer_freq(self): - # GH 11018 - for freq in ['A', '2A', '-2A', 'Q', '-1Q', 'M', '-1M', 'D', '3D', - '-3D', 'W', '-1W', 'H', '2H', '-2H', 'T', '2T', 'S', - '-3S']: - idx = pd.date_range('2011-01-01 09:00:00', freq=freq, periods=10) - result = pd.DatetimeIndex(idx.asi8, freq='infer') - tm.assert_index_equal(idx, result) - self.assertEqual(result.freq, freq) - - def test_nat_new(self): - idx = pd.date_range('2011-01-01', freq='D', periods=5, name='x') - result = idx._nat_new() - exp = pd.DatetimeIndex([pd.NaT] * 5, name='x') - tm.assert_index_equal(result, exp) - - result = idx._nat_new(box=False) - exp = np.array([tslib.iNaT] * 5, dtype=np.int64) - tm.assert_numpy_array_equal(result, exp) - - def test_shift(self): - # GH 9903 - for tz in self.tz: - idx = pd.DatetimeIndex([], name='xxx', tz=tz) - tm.assert_index_equal(idx.shift(0, freq='H'), idx) - tm.assert_index_equal(idx.shift(3, freq='H'), idx) - - idx = pd.DatetimeIndex(['2011-01-01 10:00', '2011-01-01 11:00' - '2011-01-01 12:00'], name='xxx', tz=tz) - tm.assert_index_equal(idx.shift(0, freq='H'), idx) - exp = pd.DatetimeIndex(['2011-01-01 13:00', '2011-01-01 14:00' - '2011-01-01 15:00'], name='xxx', tz=tz) - tm.assert_index_equal(idx.shift(3, freq='H'), exp) - exp = pd.DatetimeIndex(['2011-01-01 07:00', '2011-01-01 08:00' - '2011-01-01 09:00'], name='xxx', tz=tz) - tm.assert_index_equal(idx.shift(-3, freq='H'), exp) - - def test_nat(self): - self.assertIs(pd.DatetimeIndex._na_value, pd.NaT) - self.assertIs(pd.DatetimeIndex([])._na_value, pd.NaT) - - for tz in [None, 'US/Eastern', 'UTC']: - idx = pd.DatetimeIndex(['2011-01-01', '2011-01-02'], tz=tz) - self.assertTrue(idx._can_hold_na) - - tm.assert_numpy_array_equal(idx._isnan, np.array([False, False])) - self.assertFalse(idx.hasnans) - tm.assert_numpy_array_equal(idx._nan_idxs, - np.array([], dtype=np.intp)) - - idx = pd.DatetimeIndex(['2011-01-01', 'NaT'], tz=tz) - self.assertTrue(idx._can_hold_na) - - tm.assert_numpy_array_equal(idx._isnan, np.array([False, True])) - self.assertTrue(idx.hasnans) - tm.assert_numpy_array_equal(idx._nan_idxs, - np.array([1], dtype=np.intp)) - - def test_equals(self): - # GH 13107 - for tz in [None, 'UTC', 'US/Eastern', 'Asia/Tokyo']: - idx = pd.DatetimeIndex(['2011-01-01', '2011-01-02', 'NaT']) - self.assertTrue(idx.equals(idx)) - self.assertTrue(idx.equals(idx.copy())) - self.assertTrue(idx.equals(idx.asobject)) - self.assertTrue(idx.asobject.equals(idx)) - self.assertTrue(idx.asobject.equals(idx.asobject)) - self.assertFalse(idx.equals(list(idx))) - self.assertFalse(idx.equals(pd.Series(idx))) - - idx2 = pd.DatetimeIndex(['2011-01-01', '2011-01-02', 'NaT'], - tz='US/Pacific') - self.assertFalse(idx.equals(idx2)) - self.assertFalse(idx.equals(idx2.copy())) - self.assertFalse(idx.equals(idx2.asobject)) - self.assertFalse(idx.asobject.equals(idx2)) - self.assertFalse(idx.equals(list(idx2))) - self.assertFalse(idx.equals(pd.Series(idx2))) - - # same internal, different tz - idx3 = pd.DatetimeIndex._simple_new(idx.asi8, tz='US/Pacific') - tm.assert_numpy_array_equal(idx.asi8, idx3.asi8) - self.assertFalse(idx.equals(idx3)) - self.assertFalse(idx.equals(idx3.copy())) - self.assertFalse(idx.equals(idx3.asobject)) - self.assertFalse(idx.asobject.equals(idx3)) - self.assertFalse(idx.equals(list(idx3))) - self.assertFalse(idx.equals(pd.Series(idx3))) - - class TestTimedeltaIndexOps(Ops): def setUp(self): super(TestTimedeltaIndexOps, self).setUp() diff --git a/pandas/tseries/tests/test_timeseries.py b/pandas/tseries/tests/test_timeseries.py index b5daf1ac0ec68..ff6cc4bb9853c 100644 --- a/pandas/tseries/tests/test_timeseries.py +++ b/pandas/tseries/tests/test_timeseries.py @@ -3,7 +3,6 @@ import calendar import operator import sys -import warnings from datetime import datetime, time, timedelta from numpy.random import rand @@ -23,11 +22,9 @@ import pandas.util.testing as tm from pandas import ( Index, Series, DataFrame, isnull, date_range, Timestamp, Period, - DatetimeIndex, Int64Index, to_datetime, bdate_range, Float64Index, - NaT, timedelta_range, Timedelta, _np_version_under1p8, concat) + DatetimeIndex, to_datetime, bdate_range, Float64Index, + NaT, timedelta_range, Timedelta, concat) from pandas.compat import range, long, StringIO, lrange, lmap, zip, product -from pandas.compat.numpy import np_datetime64_compat -from pandas.core.common import PerformanceWarning from pandas.tslib import iNaT from pandas.util.testing import ( assert_frame_equal, assert_series_equal, assert_almost_equal, @@ -323,15 +320,6 @@ def test_dti_slicing(self): # don't carry freq through irregular slicing self.assertIsNone(dti2.freq) - def test_pass_datetimeindex_to_index(self): - # Bugs in #1396 - rng = date_range('1/1/2000', '3/1/2000') - idx = Index(rng, dtype=object) - - expected = Index(rng.to_pydatetime(), dtype=object) - - self.assert_numpy_array_equal(idx.values, expected.values) - def test_contiguous_boolean_preserve_freq(self): rng = date_range('1/1/2000', '3/1/2000', freq='B') @@ -2718,1247 +2706,6 @@ def test_dataframe_dtypes(self): to_datetime(df) -class TestDatetimeIndex(tm.TestCase): - _multiprocess_can_split_ = True - - def test_hash_error(self): - index = date_range('20010101', periods=10) - with tm.assertRaisesRegexp(TypeError, "unhashable type: %r" % - type(index).__name__): - hash(index) - - def test_stringified_slice_with_tz(self): - # GH2658 - import datetime - start = datetime.datetime.now() - idx = DatetimeIndex(start=start, freq="1d", periods=10) - df = DataFrame(lrange(10), index=idx) - df["2013-01-14 23:44:34.437768-05:00":] # no exception here - - def test_append_join_nondatetimeindex(self): - rng = date_range('1/1/2000', periods=10) - idx = Index(['a', 'b', 'c', 'd']) - - result = rng.append(idx) - tm.assertIsInstance(result[0], Timestamp) - - # it works - rng.join(idx, how='outer') - - def test_to_period_nofreq(self): - idx = DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-04']) - self.assertRaises(ValueError, idx.to_period) - - idx = DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-03'], - freq='infer') - self.assertEqual(idx.freqstr, 'D') - expected = pd.PeriodIndex(['2000-01-01', '2000-01-02', - '2000-01-03'], freq='D') - tm.assert_index_equal(idx.to_period(), expected) - - # GH 7606 - idx = DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-03']) - self.assertEqual(idx.freqstr, None) - tm.assert_index_equal(idx.to_period(), expected) - - def test_000constructor_resolution(self): - # 2252 - t1 = Timestamp((1352934390 * 1000000000) + 1000000 + 1000 + 1) - idx = DatetimeIndex([t1]) - - self.assertEqual(idx.nanosecond[0], t1.nanosecond) - - def test_constructor_coverage(self): - rng = date_range('1/1/2000', periods=10.5) - exp = date_range('1/1/2000', periods=10) - tm.assert_index_equal(rng, exp) - - self.assertRaises(ValueError, DatetimeIndex, start='1/1/2000', - periods='foo', freq='D') - - self.assertRaises(ValueError, DatetimeIndex, start='1/1/2000', - end='1/10/2000') - - self.assertRaises(ValueError, DatetimeIndex, '1/1/2000') - - # generator expression - gen = (datetime(2000, 1, 1) + timedelta(i) for i in range(10)) - result = DatetimeIndex(gen) - expected = DatetimeIndex([datetime(2000, 1, 1) + timedelta(i) - for i in range(10)]) - tm.assert_index_equal(result, expected) - - # NumPy string array - strings = np.array(['2000-01-01', '2000-01-02', '2000-01-03']) - result = DatetimeIndex(strings) - expected = DatetimeIndex(strings.astype('O')) - tm.assert_index_equal(result, expected) - - from_ints = DatetimeIndex(expected.asi8) - tm.assert_index_equal(from_ints, expected) - - # string with NaT - strings = np.array(['2000-01-01', '2000-01-02', 'NaT']) - result = DatetimeIndex(strings) - expected = DatetimeIndex(strings.astype('O')) - tm.assert_index_equal(result, expected) - - from_ints = DatetimeIndex(expected.asi8) - tm.assert_index_equal(from_ints, expected) - - # non-conforming - self.assertRaises(ValueError, DatetimeIndex, - ['2000-01-01', '2000-01-02', '2000-01-04'], freq='D') - - self.assertRaises(ValueError, DatetimeIndex, start='2011-01-01', - freq='b') - self.assertRaises(ValueError, DatetimeIndex, end='2011-01-01', - freq='B') - self.assertRaises(ValueError, DatetimeIndex, periods=10, freq='D') - - def test_constructor_datetime64_tzformat(self): - # GH 6572 - tm._skip_if_no_pytz() - import pytz - # ISO 8601 format results in pytz.FixedOffset - for freq in ['AS', 'W-SUN']: - idx = date_range('2013-01-01T00:00:00-05:00', - '2016-01-01T23:59:59-05:00', freq=freq) - expected = date_range('2013-01-01T00:00:00', '2016-01-01T23:59:59', - freq=freq, tz=pytz.FixedOffset(-300)) - tm.assert_index_equal(idx, expected) - # Unable to use `US/Eastern` because of DST - expected_i8 = date_range('2013-01-01T00:00:00', - '2016-01-01T23:59:59', freq=freq, - tz='America/Lima') - self.assert_numpy_array_equal(idx.asi8, expected_i8.asi8) - - idx = date_range('2013-01-01T00:00:00+09:00', - '2016-01-01T23:59:59+09:00', freq=freq) - expected = date_range('2013-01-01T00:00:00', '2016-01-01T23:59:59', - freq=freq, tz=pytz.FixedOffset(540)) - tm.assert_index_equal(idx, expected) - expected_i8 = date_range('2013-01-01T00:00:00', - '2016-01-01T23:59:59', freq=freq, - tz='Asia/Tokyo') - self.assert_numpy_array_equal(idx.asi8, expected_i8.asi8) - - tm._skip_if_no_dateutil() - - # Non ISO 8601 format results in dateutil.tz.tzoffset - for freq in ['AS', 'W-SUN']: - idx = date_range('2013/1/1 0:00:00-5:00', '2016/1/1 23:59:59-5:00', - freq=freq) - expected = date_range('2013-01-01T00:00:00', '2016-01-01T23:59:59', - freq=freq, tz=pytz.FixedOffset(-300)) - tm.assert_index_equal(idx, expected) - # Unable to use `US/Eastern` because of DST - expected_i8 = date_range('2013-01-01T00:00:00', - '2016-01-01T23:59:59', freq=freq, - tz='America/Lima') - self.assert_numpy_array_equal(idx.asi8, expected_i8.asi8) - - idx = date_range('2013/1/1 0:00:00+9:00', - '2016/1/1 23:59:59+09:00', freq=freq) - expected = date_range('2013-01-01T00:00:00', '2016-01-01T23:59:59', - freq=freq, tz=pytz.FixedOffset(540)) - tm.assert_index_equal(idx, expected) - expected_i8 = date_range('2013-01-01T00:00:00', - '2016-01-01T23:59:59', freq=freq, - tz='Asia/Tokyo') - self.assert_numpy_array_equal(idx.asi8, expected_i8.asi8) - - def test_constructor_dtype(self): - - # passing a dtype with a tz should localize - idx = DatetimeIndex(['2013-01-01', '2013-01-02'], - dtype='datetime64[ns, US/Eastern]') - expected = DatetimeIndex(['2013-01-01', '2013-01-02'] - ).tz_localize('US/Eastern') - tm.assert_index_equal(idx, expected) - - idx = DatetimeIndex(['2013-01-01', '2013-01-02'], - tz='US/Eastern') - tm.assert_index_equal(idx, expected) - - # if we already have a tz and its not the same, then raise - idx = DatetimeIndex(['2013-01-01', '2013-01-02'], - dtype='datetime64[ns, US/Eastern]') - - self.assertRaises(ValueError, - lambda: DatetimeIndex(idx, - dtype='datetime64[ns]')) - - # this is effectively trying to convert tz's - self.assertRaises(TypeError, - lambda: DatetimeIndex(idx, - dtype='datetime64[ns, CET]')) - self.assertRaises(ValueError, - lambda: DatetimeIndex( - idx, tz='CET', - dtype='datetime64[ns, US/Eastern]')) - result = DatetimeIndex(idx, dtype='datetime64[ns, US/Eastern]') - tm.assert_index_equal(idx, result) - - def test_constructor_name(self): - idx = DatetimeIndex(start='2000-01-01', periods=1, freq='A', - name='TEST') - self.assertEqual(idx.name, 'TEST') - - def test_comparisons_coverage(self): - rng = date_range('1/1/2000', periods=10) - - # raise TypeError for now - self.assertRaises(TypeError, rng.__lt__, rng[3].value) - - result = rng == list(rng) - exp = rng == rng - self.assert_numpy_array_equal(result, exp) - - def test_comparisons_nat(self): - - fidx1 = pd.Index([1.0, np.nan, 3.0, np.nan, 5.0, 7.0]) - fidx2 = pd.Index([2.0, 3.0, np.nan, np.nan, 6.0, 7.0]) - - didx1 = pd.DatetimeIndex(['2014-01-01', pd.NaT, '2014-03-01', pd.NaT, - '2014-05-01', '2014-07-01']) - didx2 = pd.DatetimeIndex(['2014-02-01', '2014-03-01', pd.NaT, pd.NaT, - '2014-06-01', '2014-07-01']) - darr = np.array([np_datetime64_compat('2014-02-01 00:00Z'), - np_datetime64_compat('2014-03-01 00:00Z'), - np_datetime64_compat('nat'), np.datetime64('nat'), - np_datetime64_compat('2014-06-01 00:00Z'), - np_datetime64_compat('2014-07-01 00:00Z')]) - - if _np_version_under1p8: - # cannot test array because np.datetime('nat') returns today's date - cases = [(fidx1, fidx2), (didx1, didx2)] - else: - cases = [(fidx1, fidx2), (didx1, didx2), (didx1, darr)] - - # Check pd.NaT is handles as the same as np.nan - with tm.assert_produces_warning(None): - for idx1, idx2 in cases: - - result = idx1 < idx2 - expected = np.array([True, False, False, False, True, False]) - self.assert_numpy_array_equal(result, expected) - - result = idx2 > idx1 - expected = np.array([True, False, False, False, True, False]) - self.assert_numpy_array_equal(result, expected) - - result = idx1 <= idx2 - expected = np.array([True, False, False, False, True, True]) - self.assert_numpy_array_equal(result, expected) - - result = idx2 >= idx1 - expected = np.array([True, False, False, False, True, True]) - self.assert_numpy_array_equal(result, expected) - - result = idx1 == idx2 - expected = np.array([False, False, False, False, False, True]) - self.assert_numpy_array_equal(result, expected) - - result = idx1 != idx2 - expected = np.array([True, True, True, True, True, False]) - self.assert_numpy_array_equal(result, expected) - - with tm.assert_produces_warning(None): - for idx1, val in [(fidx1, np.nan), (didx1, pd.NaT)]: - result = idx1 < val - expected = np.array([False, False, False, False, False, False]) - self.assert_numpy_array_equal(result, expected) - result = idx1 > val - self.assert_numpy_array_equal(result, expected) - - result = idx1 <= val - self.assert_numpy_array_equal(result, expected) - result = idx1 >= val - self.assert_numpy_array_equal(result, expected) - - result = idx1 == val - self.assert_numpy_array_equal(result, expected) - - result = idx1 != val - expected = np.array([True, True, True, True, True, True]) - self.assert_numpy_array_equal(result, expected) - - # Check pd.NaT is handles as the same as np.nan - with tm.assert_produces_warning(None): - for idx1, val in [(fidx1, 3), (didx1, datetime(2014, 3, 1))]: - result = idx1 < val - expected = np.array([True, False, False, False, False, False]) - self.assert_numpy_array_equal(result, expected) - result = idx1 > val - expected = np.array([False, False, False, False, True, True]) - self.assert_numpy_array_equal(result, expected) - - result = idx1 <= val - expected = np.array([True, False, True, False, False, False]) - self.assert_numpy_array_equal(result, expected) - result = idx1 >= val - expected = np.array([False, False, True, False, True, True]) - self.assert_numpy_array_equal(result, expected) - - result = idx1 == val - expected = np.array([False, False, True, False, False, False]) - self.assert_numpy_array_equal(result, expected) - - result = idx1 != val - expected = np.array([True, True, False, True, True, True]) - self.assert_numpy_array_equal(result, expected) - - def test_map(self): - rng = date_range('1/1/2000', periods=10) - - f = lambda x: x.strftime('%Y%m%d') - result = rng.map(f) - exp = Index([f(x) for x in rng], dtype='= -1') - with tm.assertRaisesRegexp(ValueError, msg): - idx.take(np.array([1, 0, -2]), fill_value=True) - with tm.assertRaisesRegexp(ValueError, msg): - idx.take(np.array([1, 0, -5]), fill_value=True) - - with tm.assertRaises(IndexError): - idx.take(np.array([1, -5])) - - def test_take_fill_value_with_timezone(self): - idx = pd.DatetimeIndex(['2011-01-01', '2011-02-01', '2011-03-01'], - name='xxx', tz='US/Eastern') - result = idx.take(np.array([1, 0, -1])) - expected = pd.DatetimeIndex(['2011-02-01', '2011-01-01', '2011-03-01'], - name='xxx', tz='US/Eastern') - tm.assert_index_equal(result, expected) - - # fill_value - result = idx.take(np.array([1, 0, -1]), fill_value=True) - expected = pd.DatetimeIndex(['2011-02-01', '2011-01-01', 'NaT'], - name='xxx', tz='US/Eastern') - tm.assert_index_equal(result, expected) - - # allow_fill=False - result = idx.take(np.array([1, 0, -1]), allow_fill=False, - fill_value=True) - expected = pd.DatetimeIndex(['2011-02-01', '2011-01-01', '2011-03-01'], - name='xxx', tz='US/Eastern') - tm.assert_index_equal(result, expected) - - msg = ('When allow_fill=True and fill_value is not None, ' - 'all indices must be >= -1') - with tm.assertRaisesRegexp(ValueError, msg): - idx.take(np.array([1, 0, -2]), fill_value=True) - with tm.assertRaisesRegexp(ValueError, msg): - idx.take(np.array([1, 0, -5]), fill_value=True) - - with tm.assertRaises(IndexError): - idx.take(np.array([1, -5])) - - def test_map_bug_1677(self): - index = DatetimeIndex(['2012-04-25 09:30:00.393000']) - f = index.asof - - result = index.map(f) - expected = Index([f(index[0])]) - tm.assert_index_equal(result, expected) - - def test_groupby_function_tuple_1677(self): - df = DataFrame(np.random.rand(100), - index=date_range("1/1/2000", periods=100)) - monthly_group = df.groupby(lambda x: (x.year, x.month)) - - result = monthly_group.mean() - tm.assertIsInstance(result.index[0], tuple) - - def test_append_numpy_bug_1681(self): - # another datetime64 bug - dr = date_range('2011/1/1', '2012/1/1', freq='W-FRI') - a = DataFrame() - c = DataFrame({'A': 'foo', 'B': dr}, index=dr) - - result = a.append(c) - self.assertTrue((result['B'] == dr).all()) - - def test_isin(self): - index = tm.makeDateIndex(4) - result = index.isin(index) - self.assertTrue(result.all()) - - result = index.isin(list(index)) - self.assertTrue(result.all()) - - assert_almost_equal(index.isin([index[2], 5]), - np.array([False, False, True, False])) - - def test_union(self): - i1 = Int64Index(np.arange(0, 20, 2)) - i2 = Int64Index(np.arange(10, 30, 2)) - result = i1.union(i2) - expected = Int64Index(np.arange(0, 30, 2)) - tm.assert_index_equal(result, expected) - - def test_union_with_DatetimeIndex(self): - i1 = Int64Index(np.arange(0, 20, 2)) - i2 = DatetimeIndex(start='2012-01-03 00:00:00', periods=10, freq='D') - i1.union(i2) # Works - i2.union(i1) # Fails with "AttributeError: can't set attribute" - - def test_time(self): - rng = pd.date_range('1/1/2000', freq='12min', periods=10) - result = pd.Index(rng).time - expected = [t.time() for t in rng] - self.assertTrue((result == expected).all()) - - def test_date(self): - rng = pd.date_range('1/1/2000', freq='12H', periods=10) - result = pd.Index(rng).date - expected = [t.date() for t in rng] - self.assertTrue((result == expected).all()) - - def test_does_not_convert_mixed_integer(self): - df = tm.makeCustomDataframe(10, 10, - data_gen_f=lambda *args, **kwargs: randn(), - r_idx_type='i', c_idx_type='dt') - cols = df.columns.join(df.index, how='outer') - joined = cols.join(df.columns) - self.assertEqual(cols.dtype, np.dtype('O')) - self.assertEqual(cols.dtype, joined.dtype) - tm.assert_numpy_array_equal(cols.values, joined.values) - - def test_slice_keeps_name(self): - # GH4226 - st = pd.Timestamp('2013-07-01 00:00:00', tz='America/Los_Angeles') - et = pd.Timestamp('2013-07-02 00:00:00', tz='America/Los_Angeles') - dr = pd.date_range(st, et, freq='H', name='timebucket') - self.assertEqual(dr[1:].name, dr.name) - - def test_join_self(self): - index = date_range('1/1/2000', periods=10) - kinds = 'outer', 'inner', 'left', 'right' - for kind in kinds: - joined = index.join(index, how=kind) - self.assertIs(index, joined) - - def assert_index_parameters(self, index): - assert index.freq == '40960N' - assert index.inferred_freq == '40960N' - - def test_ns_index(self): - nsamples = 400 - ns = int(1e9 / 24414) - dtstart = np.datetime64('2012-09-20T00:00:00') - - dt = dtstart + np.arange(nsamples) * np.timedelta64(ns, 'ns') - freq = ns * offsets.Nano() - index = pd.DatetimeIndex(dt, freq=freq, name='time') - self.assert_index_parameters(index) - - new_index = pd.DatetimeIndex(start=index[0], end=index[-1], - freq=index.freq) - self.assert_index_parameters(new_index) - - def test_join_with_period_index(self): - df = tm.makeCustomDataframe( - 10, 10, data_gen_f=lambda *args: np.random.randint(2), - c_idx_type='p', r_idx_type='dt') - s = df.iloc[:5, 0] - joins = 'left', 'right', 'inner', 'outer' - - for join in joins: - with tm.assertRaisesRegexp(ValueError, 'can only call with other ' - 'PeriodIndex-ed objects'): - df.columns.join(s.index, how=join) - - def test_factorize(self): - idx1 = DatetimeIndex(['2014-01', '2014-01', '2014-02', '2014-02', - '2014-03', '2014-03']) - - exp_arr = np.array([0, 0, 1, 1, 2, 2], dtype=np.intp) - exp_idx = DatetimeIndex(['2014-01', '2014-02', '2014-03']) - - arr, idx = idx1.factorize() - self.assert_numpy_array_equal(arr, exp_arr) - tm.assert_index_equal(idx, exp_idx) - - arr, idx = idx1.factorize(sort=True) - self.assert_numpy_array_equal(arr, exp_arr) - tm.assert_index_equal(idx, exp_idx) - - # tz must be preserved - idx1 = idx1.tz_localize('Asia/Tokyo') - exp_idx = exp_idx.tz_localize('Asia/Tokyo') - - arr, idx = idx1.factorize() - self.assert_numpy_array_equal(arr, exp_arr) - tm.assert_index_equal(idx, exp_idx) - - idx2 = pd.DatetimeIndex(['2014-03', '2014-03', '2014-02', '2014-01', - '2014-03', '2014-01']) - - exp_arr = np.array([2, 2, 1, 0, 2, 0], dtype=np.intp) - exp_idx = DatetimeIndex(['2014-01', '2014-02', '2014-03']) - arr, idx = idx2.factorize(sort=True) - self.assert_numpy_array_equal(arr, exp_arr) - tm.assert_index_equal(idx, exp_idx) - - exp_arr = np.array([0, 0, 1, 2, 0, 2], dtype=np.intp) - exp_idx = DatetimeIndex(['2014-03', '2014-02', '2014-01']) - arr, idx = idx2.factorize() - self.assert_numpy_array_equal(arr, exp_arr) - tm.assert_index_equal(idx, exp_idx) - - # freq must be preserved - idx3 = date_range('2000-01', periods=4, freq='M', tz='Asia/Tokyo') - exp_arr = np.array([0, 1, 2, 3], dtype=np.intp) - arr, idx = idx3.factorize() - self.assert_numpy_array_equal(arr, exp_arr) - tm.assert_index_equal(idx, idx3) - - def test_factorize_tz(self): - # GH 13750 - for tz in [None, 'UTC', 'US/Eastern', 'Asia/Tokyo']: - base = pd.date_range('2016-11-05', freq='H', periods=100, tz=tz) - idx = base.repeat(5) - - exp_arr = np.arange(100, dtype=np.intp).repeat(5) - - for obj in [idx, pd.Series(idx)]: - arr, res = obj.factorize() - self.assert_numpy_array_equal(arr, exp_arr) - tm.assert_index_equal(res, base) - - def test_factorize_dst(self): - # GH 13750 - idx = pd.date_range('2016-11-06', freq='H', periods=12, - tz='US/Eastern') - - for obj in [idx, pd.Series(idx)]: - arr, res = obj.factorize() - self.assert_numpy_array_equal(arr, np.arange(12, dtype=np.intp)) - tm.assert_index_equal(res, idx) - - idx = pd.date_range('2016-06-13', freq='H', periods=12, - tz='US/Eastern') - - for obj in [idx, pd.Series(idx)]: - arr, res = obj.factorize() - self.assert_numpy_array_equal(arr, np.arange(12, dtype=np.intp)) - tm.assert_index_equal(res, idx) - - def test_slice_with_negative_step(self): - ts = Series(np.arange(20), - date_range('2014-01-01', periods=20, freq='MS')) - SLC = pd.IndexSlice - - def assert_slices_equivalent(l_slc, i_slc): - assert_series_equal(ts[l_slc], ts.iloc[i_slc]) - assert_series_equal(ts.loc[l_slc], ts.iloc[i_slc]) - assert_series_equal(ts.loc[l_slc], ts.iloc[i_slc]) - - assert_slices_equivalent(SLC[Timestamp('2014-10-01')::-1], SLC[9::-1]) - assert_slices_equivalent(SLC['2014-10-01'::-1], SLC[9::-1]) - - assert_slices_equivalent(SLC[:Timestamp('2014-10-01'):-1], SLC[:8:-1]) - assert_slices_equivalent(SLC[:'2014-10-01':-1], SLC[:8:-1]) - - assert_slices_equivalent(SLC['2015-02-01':'2014-10-01':-1], - SLC[13:8:-1]) - assert_slices_equivalent(SLC[Timestamp('2015-02-01'):Timestamp( - '2014-10-01'):-1], SLC[13:8:-1]) - assert_slices_equivalent(SLC['2015-02-01':Timestamp('2014-10-01'):-1], - SLC[13:8:-1]) - assert_slices_equivalent(SLC[Timestamp('2015-02-01'):'2014-10-01':-1], - SLC[13:8:-1]) - - assert_slices_equivalent(SLC['2014-10-01':'2015-02-01':-1], SLC[:0]) - - def test_slice_with_zero_step_raises(self): - ts = Series(np.arange(20), - date_range('2014-01-01', periods=20, freq='MS')) - self.assertRaisesRegexp(ValueError, 'slice step cannot be zero', - lambda: ts[::0]) - self.assertRaisesRegexp(ValueError, 'slice step cannot be zero', - lambda: ts.loc[::0]) - self.assertRaisesRegexp(ValueError, 'slice step cannot be zero', - lambda: ts.loc[::0]) - - def test_slice_bounds_empty(self): - # GH 14354 - empty_idx = DatetimeIndex(freq='1H', periods=0, end='2015') - - right = empty_idx._maybe_cast_slice_bound('2015-01-02', 'right', 'loc') - exp = Timestamp('2015-01-02 23:59:59.999999999') - self.assertEqual(right, exp) - - left = empty_idx._maybe_cast_slice_bound('2015-01-02', 'left', 'loc') - exp = Timestamp('2015-01-02 00:00:00') - self.assertEqual(left, exp) - - class TestDatetime64(tm.TestCase): """ Also test support for datetime64[ns] in Series / DataFrame @@ -3969,152 +2716,6 @@ def setUp(self): end=datetime(2005, 1, 10), freq='Min') self.series = Series(rand(len(dti)), dti) - def test_datetimeindex_accessors(self): - dti = DatetimeIndex(freq='D', start=datetime(1998, 1, 1), periods=365) - - self.assertEqual(dti.year[0], 1998) - self.assertEqual(dti.month[0], 1) - self.assertEqual(dti.day[0], 1) - self.assertEqual(dti.hour[0], 0) - self.assertEqual(dti.minute[0], 0) - self.assertEqual(dti.second[0], 0) - self.assertEqual(dti.microsecond[0], 0) - self.assertEqual(dti.dayofweek[0], 3) - - self.assertEqual(dti.dayofyear[0], 1) - self.assertEqual(dti.dayofyear[120], 121) - - self.assertEqual(dti.weekofyear[0], 1) - self.assertEqual(dti.weekofyear[120], 18) - - self.assertEqual(dti.quarter[0], 1) - self.assertEqual(dti.quarter[120], 2) - - self.assertEqual(dti.days_in_month[0], 31) - self.assertEqual(dti.days_in_month[90], 30) - - self.assertEqual(dti.is_month_start[0], True) - self.assertEqual(dti.is_month_start[1], False) - self.assertEqual(dti.is_month_start[31], True) - self.assertEqual(dti.is_quarter_start[0], True) - self.assertEqual(dti.is_quarter_start[90], True) - self.assertEqual(dti.is_year_start[0], True) - self.assertEqual(dti.is_year_start[364], False) - self.assertEqual(dti.is_month_end[0], False) - self.assertEqual(dti.is_month_end[30], True) - self.assertEqual(dti.is_month_end[31], False) - self.assertEqual(dti.is_month_end[364], True) - self.assertEqual(dti.is_quarter_end[0], False) - self.assertEqual(dti.is_quarter_end[30], False) - self.assertEqual(dti.is_quarter_end[89], True) - self.assertEqual(dti.is_quarter_end[364], True) - self.assertEqual(dti.is_year_end[0], False) - self.assertEqual(dti.is_year_end[364], True) - - # GH 11128 - self.assertEqual(dti.weekday_name[4], u'Monday') - self.assertEqual(dti.weekday_name[5], u'Tuesday') - self.assertEqual(dti.weekday_name[6], u'Wednesday') - self.assertEqual(dti.weekday_name[7], u'Thursday') - self.assertEqual(dti.weekday_name[8], u'Friday') - self.assertEqual(dti.weekday_name[9], u'Saturday') - self.assertEqual(dti.weekday_name[10], u'Sunday') - - self.assertEqual(Timestamp('2016-04-04').weekday_name, u'Monday') - self.assertEqual(Timestamp('2016-04-05').weekday_name, u'Tuesday') - self.assertEqual(Timestamp('2016-04-06').weekday_name, u'Wednesday') - self.assertEqual(Timestamp('2016-04-07').weekday_name, u'Thursday') - self.assertEqual(Timestamp('2016-04-08').weekday_name, u'Friday') - self.assertEqual(Timestamp('2016-04-09').weekday_name, u'Saturday') - self.assertEqual(Timestamp('2016-04-10').weekday_name, u'Sunday') - - self.assertEqual(len(dti.year), 365) - self.assertEqual(len(dti.month), 365) - self.assertEqual(len(dti.day), 365) - self.assertEqual(len(dti.hour), 365) - self.assertEqual(len(dti.minute), 365) - self.assertEqual(len(dti.second), 365) - self.assertEqual(len(dti.microsecond), 365) - self.assertEqual(len(dti.dayofweek), 365) - self.assertEqual(len(dti.dayofyear), 365) - self.assertEqual(len(dti.weekofyear), 365) - self.assertEqual(len(dti.quarter), 365) - self.assertEqual(len(dti.is_month_start), 365) - self.assertEqual(len(dti.is_month_end), 365) - self.assertEqual(len(dti.is_quarter_start), 365) - self.assertEqual(len(dti.is_quarter_end), 365) - self.assertEqual(len(dti.is_year_start), 365) - self.assertEqual(len(dti.is_year_end), 365) - self.assertEqual(len(dti.weekday_name), 365) - - dti = DatetimeIndex(freq='BQ-FEB', start=datetime(1998, 1, 1), - periods=4) - - self.assertEqual(sum(dti.is_quarter_start), 0) - self.assertEqual(sum(dti.is_quarter_end), 4) - self.assertEqual(sum(dti.is_year_start), 0) - self.assertEqual(sum(dti.is_year_end), 1) - - # Ensure is_start/end accessors throw ValueError for CustomBusinessDay, - # CBD requires np >= 1.7 - bday_egypt = offsets.CustomBusinessDay(weekmask='Sun Mon Tue Wed Thu') - dti = date_range(datetime(2013, 4, 30), periods=5, freq=bday_egypt) - self.assertRaises(ValueError, lambda: dti.is_month_start) - - dti = DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-03']) - - self.assertEqual(dti.is_month_start[0], 1) - - tests = [ - (Timestamp('2013-06-01', freq='M').is_month_start, 1), - (Timestamp('2013-06-01', freq='BM').is_month_start, 0), - (Timestamp('2013-06-03', freq='M').is_month_start, 0), - (Timestamp('2013-06-03', freq='BM').is_month_start, 1), - (Timestamp('2013-02-28', freq='Q-FEB').is_month_end, 1), - (Timestamp('2013-02-28', freq='Q-FEB').is_quarter_end, 1), - (Timestamp('2013-02-28', freq='Q-FEB').is_year_end, 1), - (Timestamp('2013-03-01', freq='Q-FEB').is_month_start, 1), - (Timestamp('2013-03-01', freq='Q-FEB').is_quarter_start, 1), - (Timestamp('2013-03-01', freq='Q-FEB').is_year_start, 1), - (Timestamp('2013-03-31', freq='QS-FEB').is_month_end, 1), - (Timestamp('2013-03-31', freq='QS-FEB').is_quarter_end, 0), - (Timestamp('2013-03-31', freq='QS-FEB').is_year_end, 0), - (Timestamp('2013-02-01', freq='QS-FEB').is_month_start, 1), - (Timestamp('2013-02-01', freq='QS-FEB').is_quarter_start, 1), - (Timestamp('2013-02-01', freq='QS-FEB').is_year_start, 1), - (Timestamp('2013-06-30', freq='BQ').is_month_end, 0), - (Timestamp('2013-06-30', freq='BQ').is_quarter_end, 0), - (Timestamp('2013-06-30', freq='BQ').is_year_end, 0), - (Timestamp('2013-06-28', freq='BQ').is_month_end, 1), - (Timestamp('2013-06-28', freq='BQ').is_quarter_end, 1), - (Timestamp('2013-06-28', freq='BQ').is_year_end, 0), - (Timestamp('2013-06-30', freq='BQS-APR').is_month_end, 0), - (Timestamp('2013-06-30', freq='BQS-APR').is_quarter_end, 0), - (Timestamp('2013-06-30', freq='BQS-APR').is_year_end, 0), - (Timestamp('2013-06-28', freq='BQS-APR').is_month_end, 1), - (Timestamp('2013-06-28', freq='BQS-APR').is_quarter_end, 1), - (Timestamp('2013-03-29', freq='BQS-APR').is_year_end, 1), - (Timestamp('2013-11-01', freq='AS-NOV').is_year_start, 1), - (Timestamp('2013-10-31', freq='AS-NOV').is_year_end, 1), - (Timestamp('2012-02-01').days_in_month, 29), - (Timestamp('2013-02-01').days_in_month, 28)] - - for ts, value in tests: - self.assertEqual(ts, value) - - def test_nanosecond_field(self): - dti = DatetimeIndex(np.arange(10)) - - self.assert_numpy_array_equal(dti.nanosecond, - np.arange(10, dtype=np.int32)) - - def test_datetimeindex_diff(self): - dti1 = DatetimeIndex(freq='Q-JAN', start=datetime(1997, 12, 31), - periods=100) - dti2 = DatetimeIndex(freq='Q-JAN', start=datetime(1997, 12, 31), - periods=98) - self.assertEqual(len(dti1.difference(dti2)), 2) - def test_fancy_getitem(self): dti = DatetimeIndex(freq='WOM-1FRI', start=datetime(2005, 1, 1), end=datetime(2010, 1, 1)) @@ -4143,87 +2744,6 @@ def test_fancy_setitem(self): s['1/2/2009':'2009-06-05'] = -3 self.assertTrue((s[48:54] == -3).all()) - def test_datetimeindex_constructor(self): - arr = ['1/1/2005', '1/2/2005', 'Jn 3, 2005', '2005-01-04'] - self.assertRaises(Exception, DatetimeIndex, arr) - - arr = ['1/1/2005', '1/2/2005', '1/3/2005', '2005-01-04'] - idx1 = DatetimeIndex(arr) - - arr = [datetime(2005, 1, 1), '1/2/2005', '1/3/2005', '2005-01-04'] - idx2 = DatetimeIndex(arr) - - arr = [lib.Timestamp(datetime(2005, 1, 1)), '1/2/2005', '1/3/2005', - '2005-01-04'] - idx3 = DatetimeIndex(arr) - - arr = np.array(['1/1/2005', '1/2/2005', '1/3/2005', - '2005-01-04'], dtype='O') - idx4 = DatetimeIndex(arr) - - arr = to_datetime(['1/1/2005', '1/2/2005', '1/3/2005', '2005-01-04']) - idx5 = DatetimeIndex(arr) - - arr = to_datetime(['1/1/2005', '1/2/2005', 'Jan 3, 2005', '2005-01-04' - ]) - idx6 = DatetimeIndex(arr) - - idx7 = DatetimeIndex(['12/05/2007', '25/01/2008'], dayfirst=True) - idx8 = DatetimeIndex(['2007/05/12', '2008/01/25'], dayfirst=False, - yearfirst=True) - tm.assert_index_equal(idx7, idx8) - - for other in [idx2, idx3, idx4, idx5, idx6]: - self.assertTrue((idx1.values == other.values).all()) - - sdate = datetime(1999, 12, 25) - edate = datetime(2000, 1, 1) - idx = DatetimeIndex(start=sdate, freq='1B', periods=20) - self.assertEqual(len(idx), 20) - self.assertEqual(idx[0], sdate + 0 * offsets.BDay()) - self.assertEqual(idx.freq, 'B') - - idx = DatetimeIndex(end=edate, freq=('D', 5), periods=20) - self.assertEqual(len(idx), 20) - self.assertEqual(idx[-1], edate) - self.assertEqual(idx.freq, '5D') - - idx1 = DatetimeIndex(start=sdate, end=edate, freq='W-SUN') - idx2 = DatetimeIndex(start=sdate, end=edate, - freq=offsets.Week(weekday=6)) - self.assertEqual(len(idx1), len(idx2)) - self.assertEqual(idx1.offset, idx2.offset) - - idx1 = DatetimeIndex(start=sdate, end=edate, freq='QS') - idx2 = DatetimeIndex(start=sdate, end=edate, - freq=offsets.QuarterBegin(startingMonth=1)) - self.assertEqual(len(idx1), len(idx2)) - self.assertEqual(idx1.offset, idx2.offset) - - idx1 = DatetimeIndex(start=sdate, end=edate, freq='BQ') - idx2 = DatetimeIndex(start=sdate, end=edate, - freq=offsets.BQuarterEnd(startingMonth=12)) - self.assertEqual(len(idx1), len(idx2)) - self.assertEqual(idx1.offset, idx2.offset) - - def test_dayfirst(self): - # GH 5917 - arr = ['10/02/2014', '11/02/2014', '12/02/2014'] - expected = DatetimeIndex([datetime(2014, 2, 10), datetime(2014, 2, 11), - datetime(2014, 2, 12)]) - idx1 = DatetimeIndex(arr, dayfirst=True) - idx2 = DatetimeIndex(np.array(arr), dayfirst=True) - idx3 = to_datetime(arr, dayfirst=True) - idx4 = to_datetime(np.array(arr), dayfirst=True) - idx5 = DatetimeIndex(Index(arr), dayfirst=True) - idx6 = DatetimeIndex(Series(arr), dayfirst=True) - tm.assert_index_equal(expected, idx1) - tm.assert_index_equal(expected, idx2) - tm.assert_index_equal(expected, idx3) - tm.assert_index_equal(expected, idx4) - tm.assert_index_equal(expected, idx5) - tm.assert_index_equal(expected, idx6) - def test_dti_snap(self): dti = DatetimeIndex(['1/1/2002', '1/2/2002', '1/3/2002', '1/4/2002', '1/5/2002', '1/6/2002', '1/7/2002'], freq='D') @@ -4255,43 +2775,6 @@ def test_dti_reset_index_round_trip(self): self.assertEqual(df.index[0], stamp) self.assertEqual(df.reset_index()['Date'][0], stamp) - def test_dti_set_index_reindex(self): - # GH 6631 - df = DataFrame(np.random.random(6)) - idx1 = date_range('2011/01/01', periods=6, freq='M', tz='US/Eastern') - idx2 = date_range('2013', periods=6, freq='A', tz='Asia/Tokyo') - - df = df.set_index(idx1) - tm.assert_index_equal(df.index, idx1) - df = df.reindex(idx2) - tm.assert_index_equal(df.index, idx2) - - # 11314 - # with tz - index = date_range(datetime(2015, 10, 1), - datetime(2015, 10, 1, 23), - freq='H', tz='US/Eastern') - df = DataFrame(np.random.randn(24, 1), columns=['a'], index=index) - new_index = date_range(datetime(2015, 10, 2), - datetime(2015, 10, 2, 23), - freq='H', tz='US/Eastern') - - # TODO: unused? - result = df.set_index(new_index) # noqa - - self.assertEqual(new_index.freq, index.freq) - - def test_datetimeindex_union_join_empty(self): - dti = DatetimeIndex(start='1/1/2001', end='2/1/2001', freq='D') - empty = Index([]) - - result = dti.union(empty) - tm.assertIsInstance(result, DatetimeIndex) - self.assertIs(result, result) - - result = dti.join(empty) - tm.assertIsInstance(result, DatetimeIndex) - def test_series_set_value(self): # #1561 diff --git a/setup.py b/setup.py index 0c4dd33a70482..2ba4331aa1561 100755 --- a/setup.py +++ b/setup.py @@ -640,6 +640,7 @@ def pxd(name): 'pandas.tests', 'pandas.tests.frame', 'pandas.tests.indexes', + 'pandas.tests.indexes.datetimes', 'pandas.tests.groupby', 'pandas.tests.series', 'pandas.tests.formats', From df9fc4f17d342cc75b596918b1da1ecaaf0ae54f Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Thu, 2 Feb 2017 20:18:13 -0500 Subject: [PATCH 005/933] TST: create the pandas/tests/scalar directory structure --- pandas/tests/scalar/__init__.py | 0 pandas/tests/scalar/test_timedelta.py | 1 + pandas/tests/scalar/test_timestamp.py | 1 + setup.py | 1 + 4 files changed, 3 insertions(+) create mode 100644 pandas/tests/scalar/__init__.py create mode 100644 pandas/tests/scalar/test_timedelta.py create mode 100644 pandas/tests/scalar/test_timestamp.py diff --git a/pandas/tests/scalar/__init__.py b/pandas/tests/scalar/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/tests/scalar/test_timedelta.py b/pandas/tests/scalar/test_timedelta.py new file mode 100644 index 0000000000000..fab790c4bf948 --- /dev/null +++ b/pandas/tests/scalar/test_timedelta.py @@ -0,0 +1 @@ +""" test the scalar Timedelta """ diff --git a/pandas/tests/scalar/test_timestamp.py b/pandas/tests/scalar/test_timestamp.py new file mode 100644 index 0000000000000..2159c59de72ce --- /dev/null +++ b/pandas/tests/scalar/test_timestamp.py @@ -0,0 +1 @@ +""" test the scalar Timestamp """ diff --git a/setup.py b/setup.py index 2ba4331aa1561..93a044bc3cc7d 100755 --- a/setup.py +++ b/setup.py @@ -644,6 +644,7 @@ def pxd(name): 'pandas.tests.groupby', 'pandas.tests.series', 'pandas.tests.formats', + 'pandas.tests.scalar', 'pandas.tests.types', 'pandas.tests.test_msgpack', 'pandas.tests.plotting', From 9ddba8dc811184c49d9bc4df5dc97d505345ec23 Mon Sep 17 00:00:00 2001 From: Kacawi Date: Fri, 3 Feb 2017 14:50:25 +0100 Subject: [PATCH 006/933] Added a tutorial for pandas dataframes (#15295) --- doc/source/tutorials.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/tutorials.rst b/doc/source/tutorials.rst index c1c1c81915c46..2489b787560d0 100644 --- a/doc/source/tutorials.rst +++ b/doc/source/tutorials.rst @@ -177,3 +177,4 @@ Various Tutorials - `Intro to pandas data structures, by Greg Reda `_ - `Pandas and Python: Top 10, by Manish Amde `_ - `Pandas Tutorial, by Mikhail Semeniuk `_ +- `Pandas DataFrames Tutorial, by Karlijn Willems `_ From 69a9b05b23819dc5b6ccc79a41be80e4697b6eea Mon Sep 17 00:00:00 2001 From: TrigonaMinima Date: Sat, 4 Feb 2017 11:02:30 -0500 Subject: [PATCH 007/933] TST: Timestamp and Timeseries tests reorg (gh14854) xref partial on #14854 Author: TrigonaMinima Closes #15301 from TrigonaMinima/gh14854-timestamp and squashes the following commits: d8e3f4d [TrigonaMinima] splitting test_timeseries.py further 4072d93 [TrigonaMinima] TST: tseries/tests/test_timeseries.py tests moved to appropriate places. dbfd2ba [TrigonaMinima] TST: Timestamp tests compiled (gh14854) --- .../indexes/datetimes/test_construction.py | 69 +- .../indexes/datetimes/test_date_range.py | 112 + .../tests/indexes/datetimes/test_datetime.py | 68 - pandas/tests/indexes/datetimes/test_misc.py | 214 +- pandas/tests/indexes/test_timedelta.py | 43 + pandas/tests/scalar/test_timestamp.py | 1577 +++++++ pandas/tests/series/test_missing.py | 103 + pandas/tests/series/test_timeseries.py | 3147 ++++++++++++- pandas/tseries/tests/test_timeseries.py | 4184 ----------------- pandas/tseries/tests/test_tslib.py | 869 +--- 10 files changed, 5183 insertions(+), 5203 deletions(-) create mode 100644 pandas/tests/indexes/datetimes/test_date_range.py create mode 100644 pandas/tests/indexes/test_timedelta.py delete mode 100644 pandas/tseries/tests/test_timeseries.py diff --git a/pandas/tests/indexes/datetimes/test_construction.py b/pandas/tests/indexes/datetimes/test_construction.py index ae4eb6ee397b6..f8eca0f0d91d0 100644 --- a/pandas/tests/indexes/datetimes/test_construction.py +++ b/pandas/tests/indexes/datetimes/test_construction.py @@ -4,7 +4,8 @@ import pandas as pd import pandas.util.testing as tm from pandas.tslib import OutOfBoundsDatetime -from pandas import (DatetimeIndex, Index, Timestamp, datetime, date_range) +from pandas import (DatetimeIndex, Index, Timestamp, datetime, date_range, + to_datetime) class TestDatetimeIndex(tm.TestCase): @@ -423,3 +424,69 @@ def test_000constructor_resolution(self): idx = DatetimeIndex([t1]) self.assertEqual(idx.nanosecond[0], t1.nanosecond) + + +class TestTimeSeries(tm.TestCase): + _multiprocess_can_split_ = True + + def test_dti_constructor_preserve_dti_freq(self): + rng = date_range('1/1/2000', '1/2/2000', freq='5min') + + rng2 = DatetimeIndex(rng) + self.assertEqual(rng.freq, rng2.freq) + + def test_dti_constructor_years_only(self): + # GH 6961 + for tz in [None, 'UTC', 'Asia/Tokyo', 'dateutil/US/Pacific']: + rng1 = date_range('2014', '2015', freq='M', tz=tz) + expected1 = date_range('2014-01-31', '2014-12-31', freq='M', tz=tz) + + rng2 = date_range('2014', '2015', freq='MS', tz=tz) + expected2 = date_range('2014-01-01', '2015-01-01', freq='MS', + tz=tz) + + rng3 = date_range('2014', '2020', freq='A', tz=tz) + expected3 = date_range('2014-12-31', '2019-12-31', freq='A', tz=tz) + + rng4 = date_range('2014', '2020', freq='AS', tz=tz) + expected4 = date_range('2014-01-01', '2020-01-01', freq='AS', + tz=tz) + + for rng, expected in [(rng1, expected1), (rng2, expected2), + (rng3, expected3), (rng4, expected4)]: + tm.assert_index_equal(rng, expected) + + def test_dti_constructor_small_int(self): + # GH 13721 + exp = DatetimeIndex(['1970-01-01 00:00:00.00000000', + '1970-01-01 00:00:00.00000001', + '1970-01-01 00:00:00.00000002']) + + for dtype in [np.int64, np.int32, np.int16, np.int8]: + arr = np.array([0, 10, 20], dtype=dtype) + tm.assert_index_equal(DatetimeIndex(arr), exp) + + def test_dti_constructor_numpy_timeunits(self): + # GH 9114 + base = pd.to_datetime(['2000-01-01T00:00', '2000-01-02T00:00', 'NaT']) + + for dtype in ['datetime64[h]', 'datetime64[m]', 'datetime64[s]', + 'datetime64[ms]', 'datetime64[us]', 'datetime64[ns]']: + values = base.values.astype(dtype) + + tm.assert_index_equal(DatetimeIndex(values), base) + tm.assert_index_equal(to_datetime(values), base) + + def test_constructor_int64_nocopy(self): + # #1624 + arr = np.arange(1000, dtype=np.int64) + index = DatetimeIndex(arr) + + arr[50:100] = -1 + self.assertTrue((index.asi8[50:100] == -1).all()) + + arr = np.arange(1000, dtype=np.int64) + index = DatetimeIndex(arr, copy=True) + + arr[50:100] = -1 + self.assertTrue((index.asi8[50:100] != -1).all()) diff --git a/pandas/tests/indexes/datetimes/test_date_range.py b/pandas/tests/indexes/datetimes/test_date_range.py new file mode 100644 index 0000000000000..b3d6c41573ab8 --- /dev/null +++ b/pandas/tests/indexes/datetimes/test_date_range.py @@ -0,0 +1,112 @@ +from datetime import datetime, timedelta, time + +import pandas as pd +import pandas.util.testing as tm +from pandas import date_range, offsets, DatetimeIndex, Timestamp + +from pandas.tests.series.common import TestData + + +class TestTimeSeries(TestData, tm.TestCase): + _multiprocess_can_split_ = True + + def test_date_range_gen_error(self): + rng = date_range('1/1/2000 00:00', '1/1/2000 00:18', freq='5min') + self.assertEqual(len(rng), 4) + + def test_date_range_negative_freq(self): + # GH 11018 + rng = date_range('2011-12-31', freq='-2A', periods=3) + exp = pd.DatetimeIndex(['2011-12-31', '2009-12-31', + '2007-12-31'], freq='-2A') + tm.assert_index_equal(rng, exp) + self.assertEqual(rng.freq, '-2A') + + rng = date_range('2011-01-31', freq='-2M', periods=3) + exp = pd.DatetimeIndex(['2011-01-31', '2010-11-30', + '2010-09-30'], freq='-2M') + tm.assert_index_equal(rng, exp) + self.assertEqual(rng.freq, '-2M') + + def test_date_range_bms_bug(self): + # #1645 + rng = date_range('1/1/2000', periods=10, freq='BMS') + + ex_first = Timestamp('2000-01-03') + self.assertEqual(rng[0], ex_first) + + def test_date_range_normalize(self): + snap = datetime.today() + n = 50 + + rng = date_range(snap, periods=n, normalize=False, freq='2D') + + offset = timedelta(2) + values = DatetimeIndex([snap + i * offset for i in range(n)]) + + tm.assert_index_equal(rng, values) + + rng = date_range('1/1/2000 08:15', periods=n, normalize=False, + freq='B') + the_time = time(8, 15) + for val in rng: + self.assertEqual(val.time(), the_time) + + def test_date_range_fy5252(self): + dr = date_range(start="2013-01-01", periods=2, freq=offsets.FY5253( + startingMonth=1, weekday=3, variation="nearest")) + self.assertEqual(dr[0], Timestamp('2013-01-31')) + self.assertEqual(dr[1], Timestamp('2014-01-30')) + + def test_date_range_ambiguous_arguments(self): + # #2538 + start = datetime(2011, 1, 1, 5, 3, 40) + end = datetime(2011, 1, 1, 8, 9, 40) + + self.assertRaises(ValueError, date_range, start, end, freq='s', + periods=10) + + def test_date_range_businesshour(self): + idx = DatetimeIndex(['2014-07-04 09:00', '2014-07-04 10:00', + '2014-07-04 11:00', + '2014-07-04 12:00', '2014-07-04 13:00', + '2014-07-04 14:00', + '2014-07-04 15:00', '2014-07-04 16:00'], + freq='BH') + rng = date_range('2014-07-04 09:00', '2014-07-04 16:00', freq='BH') + tm.assert_index_equal(idx, rng) + + idx = DatetimeIndex( + ['2014-07-04 16:00', '2014-07-07 09:00'], freq='BH') + rng = date_range('2014-07-04 16:00', '2014-07-07 09:00', freq='BH') + tm.assert_index_equal(idx, rng) + + idx = DatetimeIndex(['2014-07-04 09:00', '2014-07-04 10:00', + '2014-07-04 11:00', + '2014-07-04 12:00', '2014-07-04 13:00', + '2014-07-04 14:00', + '2014-07-04 15:00', '2014-07-04 16:00', + '2014-07-07 09:00', '2014-07-07 10:00', + '2014-07-07 11:00', + '2014-07-07 12:00', '2014-07-07 13:00', + '2014-07-07 14:00', + '2014-07-07 15:00', '2014-07-07 16:00', + '2014-07-08 09:00', '2014-07-08 10:00', + '2014-07-08 11:00', + '2014-07-08 12:00', '2014-07-08 13:00', + '2014-07-08 14:00', + '2014-07-08 15:00', '2014-07-08 16:00'], + freq='BH') + rng = date_range('2014-07-04 09:00', '2014-07-08 16:00', freq='BH') + tm.assert_index_equal(idx, rng) + + def test_range_misspecified(self): + # GH #1095 + + self.assertRaises(ValueError, date_range, '1/1/2000') + self.assertRaises(ValueError, date_range, end='1/1/2000') + self.assertRaises(ValueError, date_range, periods=10) + + self.assertRaises(ValueError, date_range, '1/1/2000', freq='H') + self.assertRaises(ValueError, date_range, end='1/1/2000', freq='H') + self.assertRaises(ValueError, date_range, periods=10, freq='H') diff --git a/pandas/tests/indexes/datetimes/test_datetime.py b/pandas/tests/indexes/datetimes/test_datetime.py index a69406804cd97..f92fca6ecfa14 100644 --- a/pandas/tests/indexes/datetimes/test_datetime.py +++ b/pandas/tests/indexes/datetimes/test_datetime.py @@ -457,74 +457,6 @@ def test_sort_values(self): self.assert_numpy_array_equal(dexer, np.array([0, 2, 1], dtype=np.intp)) - def test_round(self): - - # round - dt = Timestamp('20130101 09:10:11') - result = dt.round('D') - expected = Timestamp('20130101') - self.assertEqual(result, expected) - - dt = Timestamp('20130101 19:10:11') - result = dt.round('D') - expected = Timestamp('20130102') - self.assertEqual(result, expected) - - dt = Timestamp('20130201 12:00:00') - result = dt.round('D') - expected = Timestamp('20130202') - self.assertEqual(result, expected) - - dt = Timestamp('20130104 12:00:00') - result = dt.round('D') - expected = Timestamp('20130105') - self.assertEqual(result, expected) - - dt = Timestamp('20130104 12:32:00') - result = dt.round('30Min') - expected = Timestamp('20130104 12:30:00') - self.assertEqual(result, expected) - - dti = date_range('20130101 09:10:11', periods=5) - result = dti.round('D') - expected = date_range('20130101', periods=5) - tm.assert_index_equal(result, expected) - - # floor - dt = Timestamp('20130101 09:10:11') - result = dt.floor('D') - expected = Timestamp('20130101') - self.assertEqual(result, expected) - - # ceil - dt = Timestamp('20130101 09:10:11') - result = dt.ceil('D') - expected = Timestamp('20130102') - self.assertEqual(result, expected) - - # round with tz - dt = Timestamp('20130101 09:10:11', tz='US/Eastern') - result = dt.round('D') - expected = Timestamp('20130101', tz='US/Eastern') - self.assertEqual(result, expected) - - dt = Timestamp('20130101 09:10:11', tz='US/Eastern') - result = dt.round('s') - self.assertEqual(result, dt) - - dti = date_range('20130101 09:10:11', - periods=5).tz_localize('UTC').tz_convert('US/Eastern') - result = dti.round('D') - expected = date_range('20130101', periods=5).tz_localize('US/Eastern') - tm.assert_index_equal(result, expected) - - result = dti.round('s') - tm.assert_index_equal(result, dti) - - # invalid - for freq in ['Y', 'M', 'foobar']: - self.assertRaises(ValueError, lambda: dti.round(freq)) - def test_take(self): dates = [datetime(2010, 1, 1, 14), datetime(2010, 1, 1, 15), datetime(2010, 1, 1, 17), datetime(2010, 1, 1, 21)] diff --git a/pandas/tests/indexes/datetimes/test_misc.py b/pandas/tests/indexes/datetimes/test_misc.py index 3dfe95fa77b85..4685df580190b 100644 --- a/pandas/tests/indexes/datetimes/test_misc.py +++ b/pandas/tests/indexes/datetimes/test_misc.py @@ -1,10 +1,12 @@ import numpy as np +import pandas as pd import pandas.lib as lib import pandas.util.testing as tm -from pandas import Float64Index, date_range, Timestamp from pandas import (Index, DatetimeIndex, datetime, offsets, to_datetime, - Series, DataFrame) + Series, DataFrame, Float64Index, date_range, Timestamp) + +from pandas.util.testing import assert_series_equal class TestDateTimeIndexToJulianDate(tm.TestCase): @@ -65,6 +67,196 @@ def test_pass_datetimeindex_to_index(self): self.assert_numpy_array_equal(idx.values, expected.values) + def test_range_edges(self): + # GH 13672 + idx = DatetimeIndex(start=Timestamp('1970-01-01 00:00:00.000000001'), + end=Timestamp('1970-01-01 00:00:00.000000004'), + freq='N') + exp = DatetimeIndex(['1970-01-01 00:00:00.000000001', + '1970-01-01 00:00:00.000000002', + '1970-01-01 00:00:00.000000003', + '1970-01-01 00:00:00.000000004']) + tm.assert_index_equal(idx, exp) + + idx = DatetimeIndex(start=Timestamp('1970-01-01 00:00:00.000000004'), + end=Timestamp('1970-01-01 00:00:00.000000001'), + freq='N') + exp = DatetimeIndex([]) + tm.assert_index_equal(idx, exp) + + idx = DatetimeIndex(start=Timestamp('1970-01-01 00:00:00.000000001'), + end=Timestamp('1970-01-01 00:00:00.000000001'), + freq='N') + exp = DatetimeIndex(['1970-01-01 00:00:00.000000001']) + tm.assert_index_equal(idx, exp) + + idx = DatetimeIndex(start=Timestamp('1970-01-01 00:00:00.000001'), + end=Timestamp('1970-01-01 00:00:00.000004'), + freq='U') + exp = DatetimeIndex(['1970-01-01 00:00:00.000001', + '1970-01-01 00:00:00.000002', + '1970-01-01 00:00:00.000003', + '1970-01-01 00:00:00.000004']) + tm.assert_index_equal(idx, exp) + + idx = DatetimeIndex(start=Timestamp('1970-01-01 00:00:00.001'), + end=Timestamp('1970-01-01 00:00:00.004'), + freq='L') + exp = DatetimeIndex(['1970-01-01 00:00:00.001', + '1970-01-01 00:00:00.002', + '1970-01-01 00:00:00.003', + '1970-01-01 00:00:00.004']) + tm.assert_index_equal(idx, exp) + + idx = DatetimeIndex(start=Timestamp('1970-01-01 00:00:01'), + end=Timestamp('1970-01-01 00:00:04'), freq='S') + exp = DatetimeIndex(['1970-01-01 00:00:01', '1970-01-01 00:00:02', + '1970-01-01 00:00:03', '1970-01-01 00:00:04']) + tm.assert_index_equal(idx, exp) + + idx = DatetimeIndex(start=Timestamp('1970-01-01 00:01'), + end=Timestamp('1970-01-01 00:04'), freq='T') + exp = DatetimeIndex(['1970-01-01 00:01', '1970-01-01 00:02', + '1970-01-01 00:03', '1970-01-01 00:04']) + tm.assert_index_equal(idx, exp) + + idx = DatetimeIndex(start=Timestamp('1970-01-01 01:00'), + end=Timestamp('1970-01-01 04:00'), freq='H') + exp = DatetimeIndex(['1970-01-01 01:00', '1970-01-01 02:00', + '1970-01-01 03:00', '1970-01-01 04:00']) + tm.assert_index_equal(idx, exp) + + idx = DatetimeIndex(start=Timestamp('1970-01-01'), + end=Timestamp('1970-01-04'), freq='D') + exp = DatetimeIndex(['1970-01-01', '1970-01-02', + '1970-01-03', '1970-01-04']) + tm.assert_index_equal(idx, exp) + + def test_datetimeindex_integers_shift(self): + rng = date_range('1/1/2000', periods=20) + + result = rng + 5 + expected = rng.shift(5) + tm.assert_index_equal(result, expected) + + result = rng - 5 + expected = rng.shift(-5) + tm.assert_index_equal(result, expected) + + def test_datetimeindex_repr_short(self): + dr = date_range(start='1/1/2012', periods=1) + repr(dr) + + dr = date_range(start='1/1/2012', periods=2) + repr(dr) + + dr = date_range(start='1/1/2012', periods=3) + repr(dr) + + def test_getitem_setitem_datetimeindex(self): + N = 50 + # testing with timezone, GH #2785 + rng = date_range('1/1/1990', periods=N, freq='H', tz='US/Eastern') + ts = Series(np.random.randn(N), index=rng) + + result = ts["1990-01-01 04:00:00"] + expected = ts[4] + self.assertEqual(result, expected) + + result = ts.copy() + result["1990-01-01 04:00:00"] = 0 + result["1990-01-01 04:00:00"] = ts[4] + assert_series_equal(result, ts) + + result = ts["1990-01-01 04:00:00":"1990-01-01 07:00:00"] + expected = ts[4:8] + assert_series_equal(result, expected) + + result = ts.copy() + result["1990-01-01 04:00:00":"1990-01-01 07:00:00"] = 0 + result["1990-01-01 04:00:00":"1990-01-01 07:00:00"] = ts[4:8] + assert_series_equal(result, ts) + + lb = "1990-01-01 04:00:00" + rb = "1990-01-01 07:00:00" + result = ts[(ts.index >= lb) & (ts.index <= rb)] + expected = ts[4:8] + assert_series_equal(result, expected) + + # repeat all the above with naive datetimes + result = ts[datetime(1990, 1, 1, 4)] + expected = ts[4] + self.assertEqual(result, expected) + + result = ts.copy() + result[datetime(1990, 1, 1, 4)] = 0 + result[datetime(1990, 1, 1, 4)] = ts[4] + assert_series_equal(result, ts) + + result = ts[datetime(1990, 1, 1, 4):datetime(1990, 1, 1, 7)] + expected = ts[4:8] + assert_series_equal(result, expected) + + result = ts.copy() + result[datetime(1990, 1, 1, 4):datetime(1990, 1, 1, 7)] = 0 + result[datetime(1990, 1, 1, 4):datetime(1990, 1, 1, 7)] = ts[4:8] + assert_series_equal(result, ts) + + lb = datetime(1990, 1, 1, 4) + rb = datetime(1990, 1, 1, 7) + result = ts[(ts.index >= lb) & (ts.index <= rb)] + expected = ts[4:8] + assert_series_equal(result, expected) + + result = ts[ts.index[4]] + expected = ts[4] + self.assertEqual(result, expected) + + result = ts[ts.index[4:8]] + expected = ts[4:8] + assert_series_equal(result, expected) + + result = ts.copy() + result[ts.index[4:8]] = 0 + result[4:8] = ts[4:8] + assert_series_equal(result, ts) + + # also test partial date slicing + result = ts["1990-01-02"] + expected = ts[24:48] + assert_series_equal(result, expected) + + result = ts.copy() + result["1990-01-02"] = 0 + result["1990-01-02"] = ts[24:48] + assert_series_equal(result, ts) + + def test_normalize(self): + rng = date_range('1/1/2000 9:30', periods=10, freq='D') + + result = rng.normalize() + expected = date_range('1/1/2000', periods=10, freq='D') + tm.assert_index_equal(result, expected) + + rng_ns = pd.DatetimeIndex(np.array([1380585623454345752, + 1380585612343234312]).astype( + "datetime64[ns]")) + rng_ns_normalized = rng_ns.normalize() + expected = pd.DatetimeIndex(np.array([1380585600000000000, + 1380585600000000000]).astype( + "datetime64[ns]")) + tm.assert_index_equal(rng_ns_normalized, expected) + + self.assertTrue(result.is_normalized) + self.assertFalse(rng.is_normalized) + + def test_series_ctor_plus_datetimeindex(self): + rng = date_range('20090415', '20090519', freq='B') + data = dict((k, 1) for k in rng) + + result = Series(data, index=rng) + self.assertIs(result.index, rng) + class TestDatetime64(tm.TestCase): @@ -331,3 +523,21 @@ def test_datetimeindex_union_join_empty(self): result = dti.join(empty) tm.assertIsInstance(result, DatetimeIndex) + + +class TestTimeSeriesDuplicates(tm.TestCase): + _multiprocess_can_split_ = True + + def test_recreate_from_data(self): + freqs = ['M', 'Q', 'A', 'D', 'B', 'BH', 'T', 'S', 'L', 'U', 'H', 'N', + 'C'] + + for f in freqs: + org = DatetimeIndex(start='2001/02/01 09:00', freq=f, periods=1) + idx = DatetimeIndex(org, freq=f) + tm.assert_index_equal(idx, org) + + org = DatetimeIndex(start='2001/02/01 09:00', freq=f, + tz='US/Pacific', periods=1) + idx = DatetimeIndex(org, freq=f, tz='US/Pacific') + tm.assert_index_equal(idx, org) diff --git a/pandas/tests/indexes/test_timedelta.py b/pandas/tests/indexes/test_timedelta.py new file mode 100644 index 0000000000000..be01ad03a0660 --- /dev/null +++ b/pandas/tests/indexes/test_timedelta.py @@ -0,0 +1,43 @@ +import numpy as np +from datetime import timedelta + +import pandas as pd +import pandas.util.testing as tm +from pandas import (timedelta_range, date_range, Series, Timedelta, + DatetimeIndex) + + +class TestSlicing(tm.TestCase): + + def test_timedelta(self): + # this is valid too + index = date_range('1/1/2000', periods=50, freq='B') + shifted = index + timedelta(1) + back = shifted + timedelta(-1) + self.assertTrue(tm.equalContents(index, back)) + self.assertEqual(shifted.freq, index.freq) + self.assertEqual(shifted.freq, back.freq) + + result = index - timedelta(1) + expected = index + timedelta(-1) + tm.assert_index_equal(result, expected) + + # GH4134, buggy with timedeltas + rng = date_range('2013', '2014') + s = Series(rng) + result1 = rng - pd.offsets.Hour(1) + result2 = DatetimeIndex(s - np.timedelta64(100000000)) + result3 = rng - np.timedelta64(100000000) + result4 = DatetimeIndex(s - pd.offsets.Hour(1)) + tm.assert_index_equal(result1, result4) + tm.assert_index_equal(result2, result3) + + +class TestTimeSeries(tm.TestCase): + _multiprocess_can_split_ = True + + def test_series_box_timedelta(self): + rng = timedelta_range('1 day 1 s', periods=5, freq='h') + s = Series(rng) + tm.assertIsInstance(s[1], Timedelta) + tm.assertIsInstance(s.iat[2], Timedelta) diff --git a/pandas/tests/scalar/test_timestamp.py b/pandas/tests/scalar/test_timestamp.py index 2159c59de72ce..94369ebbd0a19 100644 --- a/pandas/tests/scalar/test_timestamp.py +++ b/pandas/tests/scalar/test_timestamp.py @@ -1 +1,1578 @@ """ test the scalar Timestamp """ + +import sys +import operator +import calendar +import numpy as np +from datetime import datetime, timedelta +from distutils.version import LooseVersion + +import pandas as pd +import pandas.util.testing as tm +import pandas._period as period +from pandas.tseries import offsets, frequencies +from pandas.tslib import get_timezone, iNaT +from pandas.compat import lrange, long +from pandas.util.testing import assert_series_equal +from pandas.compat.numpy import np_datetime64_compat +from pandas import (Timestamp, date_range, Period, Timedelta, tslib, compat, + Series, NaT, isnull, DataFrame, DatetimeIndex) +from pandas.tseries.frequencies import (RESO_DAY, RESO_HR, RESO_MIN, RESO_US, + RESO_MS, RESO_SEC) + +randn = np.random.randn + + +class TestTimestamp(tm.TestCase): + + def test_constructor(self): + base_str = '2014-07-01 09:00' + base_dt = datetime(2014, 7, 1, 9) + base_expected = 1404205200000000000 + + # confirm base representation is correct + import calendar + self.assertEqual(calendar.timegm(base_dt.timetuple()) * 1000000000, + base_expected) + + tests = [(base_str, base_dt, base_expected), + ('2014-07-01 10:00', datetime(2014, 7, 1, 10), + base_expected + 3600 * 1000000000), + ('2014-07-01 09:00:00.000008000', + datetime(2014, 7, 1, 9, 0, 0, 8), + base_expected + 8000), + ('2014-07-01 09:00:00.000000005', + Timestamp('2014-07-01 09:00:00.000000005'), + base_expected + 5)] + + tm._skip_if_no_pytz() + tm._skip_if_no_dateutil() + import pytz + import dateutil + timezones = [(None, 0), ('UTC', 0), (pytz.utc, 0), ('Asia/Tokyo', 9), + ('US/Eastern', -4), ('dateutil/US/Pacific', -7), + (pytz.FixedOffset(-180), -3), + (dateutil.tz.tzoffset(None, 18000), 5)] + + for date_str, date, expected in tests: + for result in [Timestamp(date_str), Timestamp(date)]: + # only with timestring + self.assertEqual(result.value, expected) + self.assertEqual(tslib.pydt_to_i8(result), expected) + + # re-creation shouldn't affect to internal value + result = Timestamp(result) + self.assertEqual(result.value, expected) + self.assertEqual(tslib.pydt_to_i8(result), expected) + + # with timezone + for tz, offset in timezones: + for result in [Timestamp(date_str, tz=tz), Timestamp(date, + tz=tz)]: + expected_tz = expected - offset * 3600 * 1000000000 + self.assertEqual(result.value, expected_tz) + self.assertEqual(tslib.pydt_to_i8(result), expected_tz) + + # should preserve tz + result = Timestamp(result) + self.assertEqual(result.value, expected_tz) + self.assertEqual(tslib.pydt_to_i8(result), expected_tz) + + # should convert to UTC + result = Timestamp(result, tz='UTC') + expected_utc = expected - offset * 3600 * 1000000000 + self.assertEqual(result.value, expected_utc) + self.assertEqual(tslib.pydt_to_i8(result), expected_utc) + + def test_constructor_with_stringoffset(self): + # GH 7833 + base_str = '2014-07-01 11:00:00+02:00' + base_dt = datetime(2014, 7, 1, 9) + base_expected = 1404205200000000000 + + # confirm base representation is correct + import calendar + self.assertEqual(calendar.timegm(base_dt.timetuple()) * 1000000000, + base_expected) + + tests = [(base_str, base_expected), + ('2014-07-01 12:00:00+02:00', + base_expected + 3600 * 1000000000), + ('2014-07-01 11:00:00.000008000+02:00', base_expected + 8000), + ('2014-07-01 11:00:00.000000005+02:00', base_expected + 5)] + + tm._skip_if_no_pytz() + tm._skip_if_no_dateutil() + import pytz + import dateutil + timezones = [(None, 0), ('UTC', 0), (pytz.utc, 0), ('Asia/Tokyo', 9), + ('US/Eastern', -4), ('dateutil/US/Pacific', -7), + (pytz.FixedOffset(-180), -3), + (dateutil.tz.tzoffset(None, 18000), 5)] + + for date_str, expected in tests: + for result in [Timestamp(date_str)]: + # only with timestring + self.assertEqual(result.value, expected) + self.assertEqual(tslib.pydt_to_i8(result), expected) + + # re-creation shouldn't affect to internal value + result = Timestamp(result) + self.assertEqual(result.value, expected) + self.assertEqual(tslib.pydt_to_i8(result), expected) + + # with timezone + for tz, offset in timezones: + result = Timestamp(date_str, tz=tz) + expected_tz = expected + self.assertEqual(result.value, expected_tz) + self.assertEqual(tslib.pydt_to_i8(result), expected_tz) + + # should preserve tz + result = Timestamp(result) + self.assertEqual(result.value, expected_tz) + self.assertEqual(tslib.pydt_to_i8(result), expected_tz) + + # should convert to UTC + result = Timestamp(result, tz='UTC') + expected_utc = expected + self.assertEqual(result.value, expected_utc) + self.assertEqual(tslib.pydt_to_i8(result), expected_utc) + + # This should be 2013-11-01 05:00 in UTC + # converted to Chicago tz + result = Timestamp('2013-11-01 00:00:00-0500', tz='America/Chicago') + self.assertEqual(result.value, Timestamp('2013-11-01 05:00').value) + expected = "Timestamp('2013-11-01 00:00:00-0500', tz='America/Chicago')" # noqa + self.assertEqual(repr(result), expected) + self.assertEqual(result, eval(repr(result))) + + # This should be 2013-11-01 05:00 in UTC + # converted to Tokyo tz (+09:00) + result = Timestamp('2013-11-01 00:00:00-0500', tz='Asia/Tokyo') + self.assertEqual(result.value, Timestamp('2013-11-01 05:00').value) + expected = "Timestamp('2013-11-01 14:00:00+0900', tz='Asia/Tokyo')" + self.assertEqual(repr(result), expected) + self.assertEqual(result, eval(repr(result))) + + # GH11708 + # This should be 2015-11-18 10:00 in UTC + # converted to Asia/Katmandu + result = Timestamp("2015-11-18 15:45:00+05:45", tz="Asia/Katmandu") + self.assertEqual(result.value, Timestamp("2015-11-18 10:00").value) + expected = "Timestamp('2015-11-18 15:45:00+0545', tz='Asia/Katmandu')" + self.assertEqual(repr(result), expected) + self.assertEqual(result, eval(repr(result))) + + # This should be 2015-11-18 10:00 in UTC + # converted to Asia/Kolkata + result = Timestamp("2015-11-18 15:30:00+05:30", tz="Asia/Kolkata") + self.assertEqual(result.value, Timestamp("2015-11-18 10:00").value) + expected = "Timestamp('2015-11-18 15:30:00+0530', tz='Asia/Kolkata')" + self.assertEqual(repr(result), expected) + self.assertEqual(result, eval(repr(result))) + + def test_constructor_invalid(self): + with tm.assertRaisesRegexp(TypeError, 'Cannot convert input'): + Timestamp(slice(2)) + with tm.assertRaisesRegexp(ValueError, 'Cannot convert Period'): + Timestamp(Period('1000-01-01')) + + def test_constructor_positional(self): + # GH 10758 + with tm.assertRaises(TypeError): + Timestamp(2000, 1) + with tm.assertRaises(ValueError): + Timestamp(2000, 0, 1) + with tm.assertRaises(ValueError): + Timestamp(2000, 13, 1) + with tm.assertRaises(ValueError): + Timestamp(2000, 1, 0) + with tm.assertRaises(ValueError): + Timestamp(2000, 1, 32) + + # GH 11630 + self.assertEqual( + repr(Timestamp(2015, 11, 12)), + repr(Timestamp('20151112'))) + + self.assertEqual( + repr(Timestamp(2015, 11, 12, 1, 2, 3, 999999)), + repr(Timestamp('2015-11-12 01:02:03.999999'))) + + self.assertIs(Timestamp(None), pd.NaT) + + def test_constructor_keyword(self): + # GH 10758 + with tm.assertRaises(TypeError): + Timestamp(year=2000, month=1) + with tm.assertRaises(ValueError): + Timestamp(year=2000, month=0, day=1) + with tm.assertRaises(ValueError): + Timestamp(year=2000, month=13, day=1) + with tm.assertRaises(ValueError): + Timestamp(year=2000, month=1, day=0) + with tm.assertRaises(ValueError): + Timestamp(year=2000, month=1, day=32) + + self.assertEqual( + repr(Timestamp(year=2015, month=11, day=12)), + repr(Timestamp('20151112'))) + + self.assertEqual( + repr(Timestamp(year=2015, month=11, day=12, + hour=1, minute=2, second=3, microsecond=999999)), + repr(Timestamp('2015-11-12 01:02:03.999999'))) + + def test_constructor_fromordinal(self): + base = datetime(2000, 1, 1) + + ts = Timestamp.fromordinal(base.toordinal(), freq='D') + self.assertEqual(base, ts) + self.assertEqual(ts.freq, 'D') + self.assertEqual(base.toordinal(), ts.toordinal()) + + ts = Timestamp.fromordinal(base.toordinal(), tz='US/Eastern') + self.assertEqual(pd.Timestamp('2000-01-01', tz='US/Eastern'), ts) + self.assertEqual(base.toordinal(), ts.toordinal()) + + def test_constructor_offset_depr(self): + # GH 12160 + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + ts = Timestamp('2011-01-01', offset='D') + self.assertEqual(ts.freq, 'D') + + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + self.assertEqual(ts.offset, 'D') + + msg = "Can only specify freq or offset, not both" + with tm.assertRaisesRegexp(TypeError, msg): + Timestamp('2011-01-01', offset='D', freq='D') + + def test_constructor_offset_depr_fromordinal(self): + # GH 12160 + base = datetime(2000, 1, 1) + + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + ts = Timestamp.fromordinal(base.toordinal(), offset='D') + self.assertEqual(pd.Timestamp('2000-01-01'), ts) + self.assertEqual(ts.freq, 'D') + self.assertEqual(base.toordinal(), ts.toordinal()) + + msg = "Can only specify freq or offset, not both" + with tm.assertRaisesRegexp(TypeError, msg): + Timestamp.fromordinal(base.toordinal(), offset='D', freq='D') + + def test_conversion(self): + # GH 9255 + ts = Timestamp('2000-01-01') + + result = ts.to_pydatetime() + expected = datetime(2000, 1, 1) + self.assertEqual(result, expected) + self.assertEqual(type(result), type(expected)) + + result = ts.to_datetime64() + expected = np.datetime64(ts.value, 'ns') + self.assertEqual(result, expected) + self.assertEqual(type(result), type(expected)) + self.assertEqual(result.dtype, expected.dtype) + + def test_repr(self): + tm._skip_if_no_pytz() + tm._skip_if_no_dateutil() + + dates = ['2014-03-07', '2014-01-01 09:00', + '2014-01-01 00:00:00.000000001'] + + # dateutil zone change (only matters for repr) + import dateutil + if (dateutil.__version__ >= LooseVersion('2.3') and + (dateutil.__version__ <= LooseVersion('2.4.0') or + dateutil.__version__ >= LooseVersion('2.6.0'))): + timezones = ['UTC', 'Asia/Tokyo', 'US/Eastern', + 'dateutil/US/Pacific'] + else: + timezones = ['UTC', 'Asia/Tokyo', 'US/Eastern', + 'dateutil/America/Los_Angeles'] + + freqs = ['D', 'M', 'S', 'N'] + + for date in dates: + for tz in timezones: + for freq in freqs: + + # avoid to match with timezone name + freq_repr = "'{0}'".format(freq) + if tz.startswith('dateutil'): + tz_repr = tz.replace('dateutil', '') + else: + tz_repr = tz + + date_only = Timestamp(date) + self.assertIn(date, repr(date_only)) + self.assertNotIn(tz_repr, repr(date_only)) + self.assertNotIn(freq_repr, repr(date_only)) + self.assertEqual(date_only, eval(repr(date_only))) + + date_tz = Timestamp(date, tz=tz) + self.assertIn(date, repr(date_tz)) + self.assertIn(tz_repr, repr(date_tz)) + self.assertNotIn(freq_repr, repr(date_tz)) + self.assertEqual(date_tz, eval(repr(date_tz))) + + date_freq = Timestamp(date, freq=freq) + self.assertIn(date, repr(date_freq)) + self.assertNotIn(tz_repr, repr(date_freq)) + self.assertIn(freq_repr, repr(date_freq)) + self.assertEqual(date_freq, eval(repr(date_freq))) + + date_tz_freq = Timestamp(date, tz=tz, freq=freq) + self.assertIn(date, repr(date_tz_freq)) + self.assertIn(tz_repr, repr(date_tz_freq)) + self.assertIn(freq_repr, repr(date_tz_freq)) + self.assertEqual(date_tz_freq, eval(repr(date_tz_freq))) + + # this can cause the tz field to be populated, but it's redundant to + # information in the datestring + tm._skip_if_no_pytz() + import pytz # noqa + date_with_utc_offset = Timestamp('2014-03-13 00:00:00-0400', tz=None) + self.assertIn('2014-03-13 00:00:00-0400', repr(date_with_utc_offset)) + self.assertNotIn('tzoffset', repr(date_with_utc_offset)) + self.assertIn('pytz.FixedOffset(-240)', repr(date_with_utc_offset)) + expr = repr(date_with_utc_offset).replace("'pytz.FixedOffset(-240)'", + 'pytz.FixedOffset(-240)') + self.assertEqual(date_with_utc_offset, eval(expr)) + + def test_bounds_with_different_units(self): + out_of_bounds_dates = ('1677-09-21', '2262-04-12', ) + + time_units = ('D', 'h', 'm', 's', 'ms', 'us') + + for date_string in out_of_bounds_dates: + for unit in time_units: + self.assertRaises(ValueError, Timestamp, np.datetime64( + date_string, dtype='M8[%s]' % unit)) + + in_bounds_dates = ('1677-09-23', '2262-04-11', ) + + for date_string in in_bounds_dates: + for unit in time_units: + Timestamp(np.datetime64(date_string, dtype='M8[%s]' % unit)) + + def test_tz(self): + t = '2014-02-01 09:00' + ts = Timestamp(t) + local = ts.tz_localize('Asia/Tokyo') + self.assertEqual(local.hour, 9) + self.assertEqual(local, Timestamp(t, tz='Asia/Tokyo')) + conv = local.tz_convert('US/Eastern') + self.assertEqual(conv, Timestamp('2014-01-31 19:00', tz='US/Eastern')) + self.assertEqual(conv.hour, 19) + + # preserves nanosecond + ts = Timestamp(t) + offsets.Nano(5) + local = ts.tz_localize('Asia/Tokyo') + self.assertEqual(local.hour, 9) + self.assertEqual(local.nanosecond, 5) + conv = local.tz_convert('US/Eastern') + self.assertEqual(conv.nanosecond, 5) + self.assertEqual(conv.hour, 19) + + def test_tz_localize_ambiguous(self): + + ts = Timestamp('2014-11-02 01:00') + ts_dst = ts.tz_localize('US/Eastern', ambiguous=True) + ts_no_dst = ts.tz_localize('US/Eastern', ambiguous=False) + + rng = date_range('2014-11-02', periods=3, freq='H', tz='US/Eastern') + self.assertEqual(rng[1], ts_dst) + self.assertEqual(rng[2], ts_no_dst) + self.assertRaises(ValueError, ts.tz_localize, 'US/Eastern', + ambiguous='infer') + + # GH 8025 + with tm.assertRaisesRegexp(TypeError, + 'Cannot localize tz-aware Timestamp, use ' + 'tz_convert for conversions'): + Timestamp('2011-01-01', tz='US/Eastern').tz_localize('Asia/Tokyo') + + with tm.assertRaisesRegexp(TypeError, + 'Cannot convert tz-naive Timestamp, use ' + 'tz_localize to localize'): + Timestamp('2011-01-01').tz_convert('Asia/Tokyo') + + def test_tz_localize_nonexistent(self): + # See issue 13057 + from pytz.exceptions import NonExistentTimeError + times = ['2015-03-08 02:00', '2015-03-08 02:30', + '2015-03-29 02:00', '2015-03-29 02:30'] + timezones = ['US/Eastern', 'US/Pacific', + 'Europe/Paris', 'Europe/Belgrade'] + for t, tz in zip(times, timezones): + ts = Timestamp(t) + self.assertRaises(NonExistentTimeError, ts.tz_localize, + tz) + self.assertRaises(NonExistentTimeError, ts.tz_localize, + tz, errors='raise') + self.assertIs(ts.tz_localize(tz, errors='coerce'), + pd.NaT) + + def test_tz_localize_errors_ambiguous(self): + # See issue 13057 + from pytz.exceptions import AmbiguousTimeError + ts = pd.Timestamp('2015-11-1 01:00') + self.assertRaises(AmbiguousTimeError, + ts.tz_localize, 'US/Pacific', errors='coerce') + + def test_tz_localize_roundtrip(self): + for tz in ['UTC', 'Asia/Tokyo', 'US/Eastern', 'dateutil/US/Pacific']: + for t in ['2014-02-01 09:00', '2014-07-08 09:00', + '2014-11-01 17:00', '2014-11-05 00:00']: + ts = Timestamp(t) + localized = ts.tz_localize(tz) + self.assertEqual(localized, Timestamp(t, tz=tz)) + + with tm.assertRaises(TypeError): + localized.tz_localize(tz) + + reset = localized.tz_localize(None) + self.assertEqual(reset, ts) + self.assertTrue(reset.tzinfo is None) + + def test_tz_convert_roundtrip(self): + for tz in ['UTC', 'Asia/Tokyo', 'US/Eastern', 'dateutil/US/Pacific']: + for t in ['2014-02-01 09:00', '2014-07-08 09:00', + '2014-11-01 17:00', '2014-11-05 00:00']: + ts = Timestamp(t, tz='UTC') + converted = ts.tz_convert(tz) + + reset = converted.tz_convert(None) + self.assertEqual(reset, Timestamp(t)) + self.assertTrue(reset.tzinfo is None) + self.assertEqual(reset, + converted.tz_convert('UTC').tz_localize(None)) + + def test_barely_oob_dts(self): + one_us = np.timedelta64(1).astype('timedelta64[us]') + + # By definition we can't go out of bounds in [ns], so we + # convert the datetime64s to [us] so we can go out of bounds + min_ts_us = np.datetime64(Timestamp.min).astype('M8[us]') + max_ts_us = np.datetime64(Timestamp.max).astype('M8[us]') + + # No error for the min/max datetimes + Timestamp(min_ts_us) + Timestamp(max_ts_us) + + # One us less than the minimum is an error + self.assertRaises(ValueError, Timestamp, min_ts_us - one_us) + + # One us more than the maximum is an error + self.assertRaises(ValueError, Timestamp, max_ts_us + one_us) + + def test_utc_z_designator(self): + self.assertEqual(get_timezone( + Timestamp('2014-11-02 01:00Z').tzinfo), 'UTC') + + def test_now(self): + # #9000 + ts_from_string = Timestamp('now') + ts_from_method = Timestamp.now() + ts_datetime = datetime.now() + + ts_from_string_tz = Timestamp('now', tz='US/Eastern') + ts_from_method_tz = Timestamp.now(tz='US/Eastern') + + # Check that the delta between the times is less than 1s (arbitrarily + # small) + delta = Timedelta(seconds=1) + self.assertTrue(abs(ts_from_method - ts_from_string) < delta) + self.assertTrue(abs(ts_datetime - ts_from_method) < delta) + self.assertTrue(abs(ts_from_method_tz - ts_from_string_tz) < delta) + self.assertTrue(abs(ts_from_string_tz.tz_localize(None) - + ts_from_method_tz.tz_localize(None)) < delta) + + def test_today(self): + + ts_from_string = Timestamp('today') + ts_from_method = Timestamp.today() + ts_datetime = datetime.today() + + ts_from_string_tz = Timestamp('today', tz='US/Eastern') + ts_from_method_tz = Timestamp.today(tz='US/Eastern') + + # Check that the delta between the times is less than 1s (arbitrarily + # small) + delta = Timedelta(seconds=1) + self.assertTrue(abs(ts_from_method - ts_from_string) < delta) + self.assertTrue(abs(ts_datetime - ts_from_method) < delta) + self.assertTrue(abs(ts_from_method_tz - ts_from_string_tz) < delta) + self.assertTrue(abs(ts_from_string_tz.tz_localize(None) - + ts_from_method_tz.tz_localize(None)) < delta) + + def test_asm8(self): + np.random.seed(7960929) + ns = [Timestamp.min.value, Timestamp.max.value, 1000, ] + for n in ns: + self.assertEqual(Timestamp(n).asm8.view('i8'), + np.datetime64(n, 'ns').view('i8'), n) + self.assertEqual(Timestamp('nat').asm8.view('i8'), + np.datetime64('nat', 'ns').view('i8')) + + def test_fields(self): + def check(value, equal): + # that we are int/long like + self.assertTrue(isinstance(value, (int, compat.long))) + self.assertEqual(value, equal) + + # GH 10050 + ts = Timestamp('2015-05-10 09:06:03.000100001') + check(ts.year, 2015) + check(ts.month, 5) + check(ts.day, 10) + check(ts.hour, 9) + check(ts.minute, 6) + check(ts.second, 3) + self.assertRaises(AttributeError, lambda: ts.millisecond) + check(ts.microsecond, 100) + check(ts.nanosecond, 1) + check(ts.dayofweek, 6) + check(ts.quarter, 2) + check(ts.dayofyear, 130) + check(ts.week, 19) + check(ts.daysinmonth, 31) + check(ts.daysinmonth, 31) + + def test_nat_fields(self): + # GH 10050 + ts = Timestamp('NaT') + self.assertTrue(np.isnan(ts.year)) + self.assertTrue(np.isnan(ts.month)) + self.assertTrue(np.isnan(ts.day)) + self.assertTrue(np.isnan(ts.hour)) + self.assertTrue(np.isnan(ts.minute)) + self.assertTrue(np.isnan(ts.second)) + self.assertTrue(np.isnan(ts.microsecond)) + self.assertTrue(np.isnan(ts.nanosecond)) + self.assertTrue(np.isnan(ts.dayofweek)) + self.assertTrue(np.isnan(ts.quarter)) + self.assertTrue(np.isnan(ts.dayofyear)) + self.assertTrue(np.isnan(ts.week)) + self.assertTrue(np.isnan(ts.daysinmonth)) + self.assertTrue(np.isnan(ts.days_in_month)) + + def test_pprint(self): + # GH12622 + import pprint + nested_obj = {'foo': 1, + 'bar': [{'w': {'a': Timestamp('2011-01-01')}}] * 10} + result = pprint.pformat(nested_obj, width=50) + expected = r"""{'bar': [{'w': {'a': Timestamp('2011-01-01 00:00:00')}}, + {'w': {'a': Timestamp('2011-01-01 00:00:00')}}, + {'w': {'a': Timestamp('2011-01-01 00:00:00')}}, + {'w': {'a': Timestamp('2011-01-01 00:00:00')}}, + {'w': {'a': Timestamp('2011-01-01 00:00:00')}}, + {'w': {'a': Timestamp('2011-01-01 00:00:00')}}, + {'w': {'a': Timestamp('2011-01-01 00:00:00')}}, + {'w': {'a': Timestamp('2011-01-01 00:00:00')}}, + {'w': {'a': Timestamp('2011-01-01 00:00:00')}}, + {'w': {'a': Timestamp('2011-01-01 00:00:00')}}], + 'foo': 1}""" + self.assertEqual(result, expected) + + def to_datetime_depr(self): + # see gh-8254 + ts = Timestamp('2011-01-01') + + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + expected = datetime(2011, 1, 1) + result = ts.to_datetime() + self.assertEqual(result, expected) + + def to_pydatetime_nonzero_nano(self): + ts = Timestamp('2011-01-01 9:00:00.123456789') + + # Warn the user of data loss (nanoseconds). + with tm.assert_produces_warning(UserWarning, + check_stacklevel=False): + expected = datetime(2011, 1, 1, 9, 0, 0, 123456) + result = ts.to_pydatetime() + self.assertEqual(result, expected) + + def test_round(self): + + # round + dt = Timestamp('20130101 09:10:11') + result = dt.round('D') + expected = Timestamp('20130101') + self.assertEqual(result, expected) + + dt = Timestamp('20130101 19:10:11') + result = dt.round('D') + expected = Timestamp('20130102') + self.assertEqual(result, expected) + + dt = Timestamp('20130201 12:00:00') + result = dt.round('D') + expected = Timestamp('20130202') + self.assertEqual(result, expected) + + dt = Timestamp('20130104 12:00:00') + result = dt.round('D') + expected = Timestamp('20130105') + self.assertEqual(result, expected) + + dt = Timestamp('20130104 12:32:00') + result = dt.round('30Min') + expected = Timestamp('20130104 12:30:00') + self.assertEqual(result, expected) + + dti = date_range('20130101 09:10:11', periods=5) + result = dti.round('D') + expected = date_range('20130101', periods=5) + tm.assert_index_equal(result, expected) + + # floor + dt = Timestamp('20130101 09:10:11') + result = dt.floor('D') + expected = Timestamp('20130101') + self.assertEqual(result, expected) + + # ceil + dt = Timestamp('20130101 09:10:11') + result = dt.ceil('D') + expected = Timestamp('20130102') + self.assertEqual(result, expected) + + # round with tz + dt = Timestamp('20130101 09:10:11', tz='US/Eastern') + result = dt.round('D') + expected = Timestamp('20130101', tz='US/Eastern') + self.assertEqual(result, expected) + + dt = Timestamp('20130101 09:10:11', tz='US/Eastern') + result = dt.round('s') + self.assertEqual(result, dt) + + dti = date_range('20130101 09:10:11', + periods=5).tz_localize('UTC').tz_convert('US/Eastern') + result = dti.round('D') + expected = date_range('20130101', periods=5).tz_localize('US/Eastern') + tm.assert_index_equal(result, expected) + + result = dti.round('s') + tm.assert_index_equal(result, dti) + + # invalid + for freq in ['Y', 'M', 'foobar']: + self.assertRaises(ValueError, lambda: dti.round(freq)) + + def test_class_ops_pytz(self): + tm._skip_if_no_pytz() + from pytz import timezone + + def compare(x, y): + self.assertEqual(int(Timestamp(x).value / 1e9), + int(Timestamp(y).value / 1e9)) + + compare(Timestamp.now(), datetime.now()) + compare(Timestamp.now('UTC'), datetime.now(timezone('UTC'))) + compare(Timestamp.utcnow(), datetime.utcnow()) + compare(Timestamp.today(), datetime.today()) + current_time = calendar.timegm(datetime.now().utctimetuple()) + compare(Timestamp.utcfromtimestamp(current_time), + datetime.utcfromtimestamp(current_time)) + compare(Timestamp.fromtimestamp(current_time), + datetime.fromtimestamp(current_time)) + + date_component = datetime.utcnow() + time_component = (date_component + timedelta(minutes=10)).time() + compare(Timestamp.combine(date_component, time_component), + datetime.combine(date_component, time_component)) + + def test_class_ops_dateutil(self): + tm._skip_if_no_dateutil() + from dateutil.tz import tzutc + + def compare(x, y): + self.assertEqual(int(np.round(Timestamp(x).value / 1e9)), + int(np.round(Timestamp(y).value / 1e9))) + + compare(Timestamp.now(), datetime.now()) + compare(Timestamp.now('UTC'), datetime.now(tzutc())) + compare(Timestamp.utcnow(), datetime.utcnow()) + compare(Timestamp.today(), datetime.today()) + current_time = calendar.timegm(datetime.now().utctimetuple()) + compare(Timestamp.utcfromtimestamp(current_time), + datetime.utcfromtimestamp(current_time)) + compare(Timestamp.fromtimestamp(current_time), + datetime.fromtimestamp(current_time)) + + date_component = datetime.utcnow() + time_component = (date_component + timedelta(minutes=10)).time() + compare(Timestamp.combine(date_component, time_component), + datetime.combine(date_component, time_component)) + + def test_basics_nanos(self): + val = np.int64(946684800000000000).view('M8[ns]') + stamp = Timestamp(val.view('i8') + 500) + self.assertEqual(stamp.year, 2000) + self.assertEqual(stamp.month, 1) + self.assertEqual(stamp.microsecond, 0) + self.assertEqual(stamp.nanosecond, 500) + + # GH 14415 + val = np.iinfo(np.int64).min + 80000000000000 + stamp = Timestamp(val) + self.assertEqual(stamp.year, 1677) + self.assertEqual(stamp.month, 9) + self.assertEqual(stamp.day, 21) + self.assertEqual(stamp.microsecond, 145224) + self.assertEqual(stamp.nanosecond, 192) + + def test_unit(self): + + def check(val, unit=None, h=1, s=1, us=0): + stamp = Timestamp(val, unit=unit) + self.assertEqual(stamp.year, 2000) + self.assertEqual(stamp.month, 1) + self.assertEqual(stamp.day, 1) + self.assertEqual(stamp.hour, h) + if unit != 'D': + self.assertEqual(stamp.minute, 1) + self.assertEqual(stamp.second, s) + self.assertEqual(stamp.microsecond, us) + else: + self.assertEqual(stamp.minute, 0) + self.assertEqual(stamp.second, 0) + self.assertEqual(stamp.microsecond, 0) + self.assertEqual(stamp.nanosecond, 0) + + ts = Timestamp('20000101 01:01:01') + val = ts.value + days = (ts - Timestamp('1970-01-01')).days + + check(val) + check(val / long(1000), unit='us') + check(val / long(1000000), unit='ms') + check(val / long(1000000000), unit='s') + check(days, unit='D', h=0) + + # using truediv, so these are like floats + if compat.PY3: + check((val + 500000) / long(1000000000), unit='s', us=500) + check((val + 500000000) / long(1000000000), unit='s', us=500000) + check((val + 500000) / long(1000000), unit='ms', us=500) + + # get chopped in py2 + else: + check((val + 500000) / long(1000000000), unit='s') + check((val + 500000000) / long(1000000000), unit='s') + check((val + 500000) / long(1000000), unit='ms') + + # ok + check((val + 500000) / long(1000), unit='us', us=500) + check((val + 500000000) / long(1000000), unit='ms', us=500000) + + # floats + check(val / 1000.0 + 5, unit='us', us=5) + check(val / 1000.0 + 5000, unit='us', us=5000) + check(val / 1000000.0 + 0.5, unit='ms', us=500) + check(val / 1000000.0 + 0.005, unit='ms', us=5) + check(val / 1000000000.0 + 0.5, unit='s', us=500000) + check(days + 0.5, unit='D', h=12) + + # nan + result = Timestamp(np.nan) + self.assertIs(result, NaT) + + result = Timestamp(None) + self.assertIs(result, NaT) + + result = Timestamp(iNaT) + self.assertIs(result, NaT) + + result = Timestamp(NaT) + self.assertIs(result, NaT) + + result = Timestamp('NaT') + self.assertIs(result, NaT) + + self.assertTrue(isnull(Timestamp('nat'))) + + def test_roundtrip(self): + + # test value to string and back conversions + # further test accessors + base = Timestamp('20140101 00:00:00') + + result = Timestamp(base.value + pd.Timedelta('5ms').value) + self.assertEqual(result, Timestamp(str(base) + ".005000")) + self.assertEqual(result.microsecond, 5000) + + result = Timestamp(base.value + pd.Timedelta('5us').value) + self.assertEqual(result, Timestamp(str(base) + ".000005")) + self.assertEqual(result.microsecond, 5) + + result = Timestamp(base.value + pd.Timedelta('5ns').value) + self.assertEqual(result, Timestamp(str(base) + ".000000005")) + self.assertEqual(result.nanosecond, 5) + self.assertEqual(result.microsecond, 0) + + result = Timestamp(base.value + pd.Timedelta('6ms 5us').value) + self.assertEqual(result, Timestamp(str(base) + ".006005")) + self.assertEqual(result.microsecond, 5 + 6 * 1000) + + result = Timestamp(base.value + pd.Timedelta('200ms 5us').value) + self.assertEqual(result, Timestamp(str(base) + ".200005")) + self.assertEqual(result.microsecond, 5 + 200 * 1000) + + def test_comparison(self): + # 5-18-2012 00:00:00.000 + stamp = long(1337299200000000000) + + val = Timestamp(stamp) + + self.assertEqual(val, val) + self.assertFalse(val != val) + self.assertFalse(val < val) + self.assertTrue(val <= val) + self.assertFalse(val > val) + self.assertTrue(val >= val) + + other = datetime(2012, 5, 18) + self.assertEqual(val, other) + self.assertFalse(val != other) + self.assertFalse(val < other) + self.assertTrue(val <= other) + self.assertFalse(val > other) + self.assertTrue(val >= other) + + other = Timestamp(stamp + 100) + + self.assertNotEqual(val, other) + self.assertNotEqual(val, other) + self.assertTrue(val < other) + self.assertTrue(val <= other) + self.assertTrue(other > val) + self.assertTrue(other >= val) + + def test_compare_invalid(self): + + # GH 8058 + val = Timestamp('20130101 12:01:02') + self.assertFalse(val == 'foo') + self.assertFalse(val == 10.0) + self.assertFalse(val == 1) + self.assertFalse(val == long(1)) + self.assertFalse(val == []) + self.assertFalse(val == {'foo': 1}) + self.assertFalse(val == np.float64(1)) + self.assertFalse(val == np.int64(1)) + + self.assertTrue(val != 'foo') + self.assertTrue(val != 10.0) + self.assertTrue(val != 1) + self.assertTrue(val != long(1)) + self.assertTrue(val != []) + self.assertTrue(val != {'foo': 1}) + self.assertTrue(val != np.float64(1)) + self.assertTrue(val != np.int64(1)) + + # ops testing + df = DataFrame(randn(5, 2)) + a = df[0] + b = Series(randn(5)) + b.name = Timestamp('2000-01-01') + tm.assert_series_equal(a / b, 1 / (b / a)) + + def test_cant_compare_tz_naive_w_aware(self): + tm._skip_if_no_pytz() + # #1404 + a = Timestamp('3/12/2012') + b = Timestamp('3/12/2012', tz='utc') + + self.assertRaises(Exception, a.__eq__, b) + self.assertRaises(Exception, a.__ne__, b) + self.assertRaises(Exception, a.__lt__, b) + self.assertRaises(Exception, a.__gt__, b) + self.assertRaises(Exception, b.__eq__, a) + self.assertRaises(Exception, b.__ne__, a) + self.assertRaises(Exception, b.__lt__, a) + self.assertRaises(Exception, b.__gt__, a) + + if sys.version_info < (3, 3): + self.assertRaises(Exception, a.__eq__, b.to_pydatetime()) + self.assertRaises(Exception, a.to_pydatetime().__eq__, b) + else: + self.assertFalse(a == b.to_pydatetime()) + self.assertFalse(a.to_pydatetime() == b) + + def test_cant_compare_tz_naive_w_aware_explicit_pytz(self): + tm._skip_if_no_pytz() + from pytz import utc + # #1404 + a = Timestamp('3/12/2012') + b = Timestamp('3/12/2012', tz=utc) + + self.assertRaises(Exception, a.__eq__, b) + self.assertRaises(Exception, a.__ne__, b) + self.assertRaises(Exception, a.__lt__, b) + self.assertRaises(Exception, a.__gt__, b) + self.assertRaises(Exception, b.__eq__, a) + self.assertRaises(Exception, b.__ne__, a) + self.assertRaises(Exception, b.__lt__, a) + self.assertRaises(Exception, b.__gt__, a) + + if sys.version_info < (3, 3): + self.assertRaises(Exception, a.__eq__, b.to_pydatetime()) + self.assertRaises(Exception, a.to_pydatetime().__eq__, b) + else: + self.assertFalse(a == b.to_pydatetime()) + self.assertFalse(a.to_pydatetime() == b) + + def test_cant_compare_tz_naive_w_aware_dateutil(self): + tm._skip_if_no_dateutil() + from dateutil.tz import tzutc + utc = tzutc() + # #1404 + a = Timestamp('3/12/2012') + b = Timestamp('3/12/2012', tz=utc) + + self.assertRaises(Exception, a.__eq__, b) + self.assertRaises(Exception, a.__ne__, b) + self.assertRaises(Exception, a.__lt__, b) + self.assertRaises(Exception, a.__gt__, b) + self.assertRaises(Exception, b.__eq__, a) + self.assertRaises(Exception, b.__ne__, a) + self.assertRaises(Exception, b.__lt__, a) + self.assertRaises(Exception, b.__gt__, a) + + if sys.version_info < (3, 3): + self.assertRaises(Exception, a.__eq__, b.to_pydatetime()) + self.assertRaises(Exception, a.to_pydatetime().__eq__, b) + else: + self.assertFalse(a == b.to_pydatetime()) + self.assertFalse(a.to_pydatetime() == b) + + def test_delta_preserve_nanos(self): + val = Timestamp(long(1337299200000000123)) + result = val + timedelta(1) + self.assertEqual(result.nanosecond, val.nanosecond) + + def test_frequency_misc(self): + self.assertEqual(frequencies.get_freq_group('T'), + frequencies.FreqGroup.FR_MIN) + + code, stride = frequencies.get_freq_code(offsets.Hour()) + self.assertEqual(code, frequencies.FreqGroup.FR_HR) + + code, stride = frequencies.get_freq_code((5, 'T')) + self.assertEqual(code, frequencies.FreqGroup.FR_MIN) + self.assertEqual(stride, 5) + + offset = offsets.Hour() + result = frequencies.to_offset(offset) + self.assertEqual(result, offset) + + result = frequencies.to_offset((5, 'T')) + expected = offsets.Minute(5) + self.assertEqual(result, expected) + + self.assertRaises(ValueError, frequencies.get_freq_code, (5, 'baz')) + + self.assertRaises(ValueError, frequencies.to_offset, '100foo') + + self.assertRaises(ValueError, frequencies.to_offset, ('', '')) + + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = frequencies.get_standard_freq(offsets.Hour()) + self.assertEqual(result, 'H') + + def test_hash_equivalent(self): + d = {datetime(2011, 1, 1): 5} + stamp = Timestamp(datetime(2011, 1, 1)) + self.assertEqual(d[stamp], 5) + + def test_timestamp_compare_scalars(self): + # case where ndim == 0 + lhs = np.datetime64(datetime(2013, 12, 6)) + rhs = Timestamp('now') + nat = Timestamp('nat') + + ops = {'gt': 'lt', + 'lt': 'gt', + 'ge': 'le', + 'le': 'ge', + 'eq': 'eq', + 'ne': 'ne'} + + for left, right in ops.items(): + left_f = getattr(operator, left) + right_f = getattr(operator, right) + expected = left_f(lhs, rhs) + + result = right_f(rhs, lhs) + self.assertEqual(result, expected) + + expected = left_f(rhs, nat) + result = right_f(nat, rhs) + self.assertEqual(result, expected) + + def test_timestamp_compare_series(self): + # make sure we can compare Timestamps on the right AND left hand side + # GH4982 + s = Series(date_range('20010101', periods=10), name='dates') + s_nat = s.copy(deep=True) + + s[0] = pd.Timestamp('nat') + s[3] = pd.Timestamp('nat') + + ops = {'lt': 'gt', 'le': 'ge', 'eq': 'eq', 'ne': 'ne'} + + for left, right in ops.items(): + left_f = getattr(operator, left) + right_f = getattr(operator, right) + + # no nats + expected = left_f(s, Timestamp('20010109')) + result = right_f(Timestamp('20010109'), s) + tm.assert_series_equal(result, expected) + + # nats + expected = left_f(s, Timestamp('nat')) + result = right_f(Timestamp('nat'), s) + tm.assert_series_equal(result, expected) + + # compare to timestamp with series containing nats + expected = left_f(s_nat, Timestamp('20010109')) + result = right_f(Timestamp('20010109'), s_nat) + tm.assert_series_equal(result, expected) + + # compare to nat with series containing nats + expected = left_f(s_nat, Timestamp('nat')) + result = right_f(Timestamp('nat'), s_nat) + tm.assert_series_equal(result, expected) + + def test_is_leap_year(self): + # GH 13727 + for tz in [None, 'UTC', 'US/Eastern', 'Asia/Tokyo']: + dt = Timestamp('2000-01-01 00:00:00', tz=tz) + self.assertTrue(dt.is_leap_year) + self.assertIsInstance(dt.is_leap_year, bool) + + dt = Timestamp('1999-01-01 00:00:00', tz=tz) + self.assertFalse(dt.is_leap_year) + + dt = Timestamp('2004-01-01 00:00:00', tz=tz) + self.assertTrue(dt.is_leap_year) + + dt = Timestamp('2100-01-01 00:00:00', tz=tz) + self.assertFalse(dt.is_leap_year) + + self.assertFalse(pd.NaT.is_leap_year) + self.assertIsInstance(pd.NaT.is_leap_year, bool) + + def test_round_nat(self): + # GH14940 + ts = Timestamp('nat') + print(dir(ts)) + for method in ["round", "floor", "ceil"]: + round_method = getattr(ts, method) + for freq in ["s", "5s", "min", "5min", "h", "5h"]: + self.assertIs(round_method(freq), ts) + + +class TestTimestampNsOperations(tm.TestCase): + def setUp(self): + self.timestamp = Timestamp(datetime.utcnow()) + + def assert_ns_timedelta(self, modified_timestamp, expected_value): + value = self.timestamp.value + modified_value = modified_timestamp.value + + self.assertEqual(modified_value - value, expected_value) + + def test_timedelta_ns_arithmetic(self): + self.assert_ns_timedelta(self.timestamp + np.timedelta64(-123, 'ns'), + -123) + + def test_timedelta_ns_based_arithmetic(self): + self.assert_ns_timedelta(self.timestamp + np.timedelta64( + 1234567898, 'ns'), 1234567898) + + def test_timedelta_us_arithmetic(self): + self.assert_ns_timedelta(self.timestamp + np.timedelta64(-123, 'us'), + -123000) + + def test_timedelta_ms_arithmetic(self): + time = self.timestamp + np.timedelta64(-123, 'ms') + self.assert_ns_timedelta(time, -123000000) + + def test_nanosecond_string_parsing(self): + ts = Timestamp('2013-05-01 07:15:45.123456789') + # GH 7878 + expected_repr = '2013-05-01 07:15:45.123456789' + expected_value = 1367392545123456789 + self.assertEqual(ts.value, expected_value) + self.assertIn(expected_repr, repr(ts)) + + ts = Timestamp('2013-05-01 07:15:45.123456789+09:00', tz='Asia/Tokyo') + self.assertEqual(ts.value, expected_value - 9 * 3600 * 1000000000) + self.assertIn(expected_repr, repr(ts)) + + ts = Timestamp('2013-05-01 07:15:45.123456789', tz='UTC') + self.assertEqual(ts.value, expected_value) + self.assertIn(expected_repr, repr(ts)) + + ts = Timestamp('2013-05-01 07:15:45.123456789', tz='US/Eastern') + self.assertEqual(ts.value, expected_value + 4 * 3600 * 1000000000) + self.assertIn(expected_repr, repr(ts)) + + # GH 10041 + ts = Timestamp('20130501T071545.123456789') + self.assertEqual(ts.value, expected_value) + self.assertIn(expected_repr, repr(ts)) + + def test_nanosecond_timestamp(self): + # GH 7610 + expected = 1293840000000000005 + t = Timestamp('2011-01-01') + offsets.Nano(5) + self.assertEqual(repr(t), "Timestamp('2011-01-01 00:00:00.000000005')") + self.assertEqual(t.value, expected) + self.assertEqual(t.nanosecond, 5) + + t = Timestamp(t) + self.assertEqual(repr(t), "Timestamp('2011-01-01 00:00:00.000000005')") + self.assertEqual(t.value, expected) + self.assertEqual(t.nanosecond, 5) + + t = Timestamp(np_datetime64_compat('2011-01-01 00:00:00.000000005Z')) + self.assertEqual(repr(t), "Timestamp('2011-01-01 00:00:00.000000005')") + self.assertEqual(t.value, expected) + self.assertEqual(t.nanosecond, 5) + + expected = 1293840000000000010 + t = t + offsets.Nano(5) + self.assertEqual(repr(t), "Timestamp('2011-01-01 00:00:00.000000010')") + self.assertEqual(t.value, expected) + self.assertEqual(t.nanosecond, 10) + + t = Timestamp(t) + self.assertEqual(repr(t), "Timestamp('2011-01-01 00:00:00.000000010')") + self.assertEqual(t.value, expected) + self.assertEqual(t.nanosecond, 10) + + t = Timestamp(np_datetime64_compat('2011-01-01 00:00:00.000000010Z')) + self.assertEqual(repr(t), "Timestamp('2011-01-01 00:00:00.000000010')") + self.assertEqual(t.value, expected) + self.assertEqual(t.nanosecond, 10) + + def test_nat_arithmetic(self): + # GH 6873 + i = 2 + f = 1.5 + + for (left, right) in [(pd.NaT, i), (pd.NaT, f), (pd.NaT, np.nan)]: + self.assertIs(left / right, pd.NaT) + self.assertIs(left * right, pd.NaT) + self.assertIs(right * left, pd.NaT) + with tm.assertRaises(TypeError): + right / left + + # Timestamp / datetime + t = Timestamp('2014-01-01') + dt = datetime(2014, 1, 1) + for (left, right) in [(pd.NaT, pd.NaT), (pd.NaT, t), (pd.NaT, dt)]: + # NaT __add__ or __sub__ Timestamp-like (or inverse) returns NaT + self.assertIs(right + left, pd.NaT) + self.assertIs(left + right, pd.NaT) + self.assertIs(left - right, pd.NaT) + self.assertIs(right - left, pd.NaT) + + # timedelta-like + # offsets are tested in test_offsets.py + + delta = timedelta(3600) + td = Timedelta('5s') + + for (left, right) in [(pd.NaT, delta), (pd.NaT, td)]: + # NaT + timedelta-like returns NaT + self.assertIs(right + left, pd.NaT) + self.assertIs(left + right, pd.NaT) + self.assertIs(right - left, pd.NaT) + self.assertIs(left - right, pd.NaT) + + # GH 11718 + tm._skip_if_no_pytz() + import pytz + + t_utc = Timestamp('2014-01-01', tz='UTC') + t_tz = Timestamp('2014-01-01', tz='US/Eastern') + dt_tz = pytz.timezone('Asia/Tokyo').localize(dt) + + for (left, right) in [(pd.NaT, t_utc), (pd.NaT, t_tz), + (pd.NaT, dt_tz)]: + # NaT __add__ or __sub__ Timestamp-like (or inverse) returns NaT + self.assertIs(right + left, pd.NaT) + self.assertIs(left + right, pd.NaT) + self.assertIs(left - right, pd.NaT) + self.assertIs(right - left, pd.NaT) + + # int addition / subtraction + for (left, right) in [(pd.NaT, 2), (pd.NaT, 0), (pd.NaT, -3)]: + self.assertIs(right + left, pd.NaT) + self.assertIs(left + right, pd.NaT) + self.assertIs(left - right, pd.NaT) + self.assertIs(right - left, pd.NaT) + + def test_nat_arithmetic_index(self): + # GH 11718 + + # datetime + tm._skip_if_no_pytz() + + dti = pd.DatetimeIndex(['2011-01-01', '2011-01-02'], name='x') + exp = pd.DatetimeIndex([pd.NaT, pd.NaT], name='x') + self.assert_index_equal(dti + pd.NaT, exp) + self.assert_index_equal(pd.NaT + dti, exp) + + dti_tz = pd.DatetimeIndex(['2011-01-01', '2011-01-02'], + tz='US/Eastern', name='x') + exp = pd.DatetimeIndex([pd.NaT, pd.NaT], name='x', tz='US/Eastern') + self.assert_index_equal(dti_tz + pd.NaT, exp) + self.assert_index_equal(pd.NaT + dti_tz, exp) + + exp = pd.TimedeltaIndex([pd.NaT, pd.NaT], name='x') + for (left, right) in [(pd.NaT, dti), (pd.NaT, dti_tz)]: + self.assert_index_equal(left - right, exp) + self.assert_index_equal(right - left, exp) + + # timedelta + tdi = pd.TimedeltaIndex(['1 day', '2 day'], name='x') + exp = pd.DatetimeIndex([pd.NaT, pd.NaT], name='x') + for (left, right) in [(pd.NaT, tdi)]: + self.assert_index_equal(left + right, exp) + self.assert_index_equal(right + left, exp) + self.assert_index_equal(left - right, exp) + self.assert_index_equal(right - left, exp) + + +class TestTimestampOps(tm.TestCase): + def test_timestamp_and_datetime(self): + self.assertEqual((Timestamp(datetime( + 2013, 10, 13)) - datetime(2013, 10, 12)).days, 1) + self.assertEqual((datetime(2013, 10, 12) - + Timestamp(datetime(2013, 10, 13))).days, -1) + + def test_timestamp_and_series(self): + timestamp_series = Series(date_range('2014-03-17', periods=2, freq='D', + tz='US/Eastern')) + first_timestamp = timestamp_series[0] + + delta_series = Series([np.timedelta64(0, 'D'), np.timedelta64(1, 'D')]) + assert_series_equal(timestamp_series - first_timestamp, delta_series) + assert_series_equal(first_timestamp - timestamp_series, -delta_series) + + def test_addition_subtraction_types(self): + # Assert on the types resulting from Timestamp +/- various date/time + # objects + datetime_instance = datetime(2014, 3, 4) + timedelta_instance = timedelta(seconds=1) + # build a timestamp with a frequency, since then it supports + # addition/subtraction of integers + timestamp_instance = date_range(datetime_instance, periods=1, + freq='D')[0] + + self.assertEqual(type(timestamp_instance + 1), Timestamp) + self.assertEqual(type(timestamp_instance - 1), Timestamp) + + # Timestamp + datetime not supported, though subtraction is supported + # and yields timedelta more tests in tseries/base/tests/test_base.py + self.assertEqual( + type(timestamp_instance - datetime_instance), Timedelta) + self.assertEqual( + type(timestamp_instance + timedelta_instance), Timestamp) + self.assertEqual( + type(timestamp_instance - timedelta_instance), Timestamp) + + # Timestamp +/- datetime64 not supported, so not tested (could possibly + # assert error raised?) + timedelta64_instance = np.timedelta64(1, 'D') + self.assertEqual( + type(timestamp_instance + timedelta64_instance), Timestamp) + self.assertEqual( + type(timestamp_instance - timedelta64_instance), Timestamp) + + def test_addition_subtraction_preserve_frequency(self): + timestamp_instance = date_range('2014-03-05', periods=1, freq='D')[0] + timedelta_instance = timedelta(days=1) + original_freq = timestamp_instance.freq + self.assertEqual((timestamp_instance + 1).freq, original_freq) + self.assertEqual((timestamp_instance - 1).freq, original_freq) + self.assertEqual( + (timestamp_instance + timedelta_instance).freq, original_freq) + self.assertEqual( + (timestamp_instance - timedelta_instance).freq, original_freq) + + timedelta64_instance = np.timedelta64(1, 'D') + self.assertEqual( + (timestamp_instance + timedelta64_instance).freq, original_freq) + self.assertEqual( + (timestamp_instance - timedelta64_instance).freq, original_freq) + + def test_resolution(self): + + for freq, expected in zip(['A', 'Q', 'M', 'D', 'H', 'T', + 'S', 'L', 'U'], + [RESO_DAY, RESO_DAY, + RESO_DAY, RESO_DAY, + RESO_HR, RESO_MIN, + RESO_SEC, RESO_MS, + RESO_US]): + for tz in [None, 'Asia/Tokyo', 'US/Eastern', + 'dateutil/US/Eastern']: + idx = date_range(start='2013-04-01', periods=30, freq=freq, + tz=tz) + result = period.resolution(idx.asi8, idx.tz) + self.assertEqual(result, expected) + + +class TestTimestampToJulianDate(tm.TestCase): + def test_compare_1700(self): + r = Timestamp('1700-06-23').to_julian_date() + self.assertEqual(r, 2342145.5) + + def test_compare_2000(self): + r = Timestamp('2000-04-12').to_julian_date() + self.assertEqual(r, 2451646.5) + + def test_compare_2100(self): + r = Timestamp('2100-08-12').to_julian_date() + self.assertEqual(r, 2488292.5) + + def test_compare_hour01(self): + r = Timestamp('2000-08-12T01:00:00').to_julian_date() + self.assertEqual(r, 2451768.5416666666666666) + + def test_compare_hour13(self): + r = Timestamp('2000-08-12T13:00:00').to_julian_date() + self.assertEqual(r, 2451769.0416666666666666) + + +class TestTimeSeries(tm.TestCase): + _multiprocess_can_split_ = True + + def test_timestamp_to_datetime(self): + tm._skip_if_no_pytz() + rng = date_range('20090415', '20090519', tz='US/Eastern') + + stamp = rng[0] + dtval = stamp.to_pydatetime() + self.assertEqual(stamp, dtval) + self.assertEqual(stamp.tzinfo, dtval.tzinfo) + + def test_timestamp_to_datetime_dateutil(self): + tm._skip_if_no_pytz() + rng = date_range('20090415', '20090519', tz='dateutil/US/Eastern') + + stamp = rng[0] + dtval = stamp.to_pydatetime() + self.assertEqual(stamp, dtval) + self.assertEqual(stamp.tzinfo, dtval.tzinfo) + + def test_timestamp_to_datetime_explicit_pytz(self): + tm._skip_if_no_pytz() + import pytz + rng = date_range('20090415', '20090519', + tz=pytz.timezone('US/Eastern')) + + stamp = rng[0] + dtval = stamp.to_pydatetime() + self.assertEqual(stamp, dtval) + self.assertEqual(stamp.tzinfo, dtval.tzinfo) + + def test_timestamp_to_datetime_explicit_dateutil(self): + tm._skip_if_windows_python_3() + tm._skip_if_no_dateutil() + from pandas.tslib import _dateutil_gettz as gettz + rng = date_range('20090415', '20090519', tz=gettz('US/Eastern')) + + stamp = rng[0] + dtval = stamp.to_pydatetime() + self.assertEqual(stamp, dtval) + self.assertEqual(stamp.tzinfo, dtval.tzinfo) + + def test_timestamp_fields(self): + # extra fields from DatetimeIndex like quarter and week + idx = tm.makeDateIndex(100) + + fields = ['dayofweek', 'dayofyear', 'week', 'weekofyear', 'quarter', + 'days_in_month', 'is_month_start', 'is_month_end', + 'is_quarter_start', 'is_quarter_end', 'is_year_start', + 'is_year_end', 'weekday_name'] + for f in fields: + expected = getattr(idx, f)[-1] + result = getattr(Timestamp(idx[-1]), f) + self.assertEqual(result, expected) + + self.assertEqual(idx.freq, Timestamp(idx[-1], idx.freq).freq) + self.assertEqual(idx.freqstr, Timestamp(idx[-1], idx.freq).freqstr) + + def test_timestamp_date_out_of_range(self): + self.assertRaises(ValueError, Timestamp, '1676-01-01') + self.assertRaises(ValueError, Timestamp, '2263-01-01') + + # 1475 + self.assertRaises(ValueError, DatetimeIndex, ['1400-01-01']) + self.assertRaises(ValueError, DatetimeIndex, [datetime(1400, 1, 1)]) + + def test_timestamp_repr(self): + # pre-1900 + stamp = Timestamp('1850-01-01', tz='US/Eastern') + repr(stamp) + + iso8601 = '1850-01-01 01:23:45.012345' + stamp = Timestamp(iso8601, tz='US/Eastern') + result = repr(stamp) + self.assertIn(iso8601, result) + + def test_timestamp_from_ordinal(self): + + # GH 3042 + dt = datetime(2011, 4, 16, 0, 0) + ts = Timestamp.fromordinal(dt.toordinal()) + self.assertEqual(ts.to_pydatetime(), dt) + + # with a tzinfo + stamp = Timestamp('2011-4-16', tz='US/Eastern') + dt_tz = stamp.to_pydatetime() + ts = Timestamp.fromordinal(dt_tz.toordinal(), tz='US/Eastern') + self.assertEqual(ts.to_pydatetime(), dt_tz) + + def test_timestamp_compare_with_early_datetime(self): + # e.g. datetime.min + stamp = Timestamp('2012-01-01') + + self.assertFalse(stamp == datetime.min) + self.assertFalse(stamp == datetime(1600, 1, 1)) + self.assertFalse(stamp == datetime(2700, 1, 1)) + self.assertNotEqual(stamp, datetime.min) + self.assertNotEqual(stamp, datetime(1600, 1, 1)) + self.assertNotEqual(stamp, datetime(2700, 1, 1)) + self.assertTrue(stamp > datetime(1600, 1, 1)) + self.assertTrue(stamp >= datetime(1600, 1, 1)) + self.assertTrue(stamp < datetime(2700, 1, 1)) + self.assertTrue(stamp <= datetime(2700, 1, 1)) + + def test_timestamp_equality(self): + + # GH 11034 + s = Series([Timestamp('2000-01-29 01:59:00'), 'NaT']) + result = s != s + assert_series_equal(result, Series([False, True])) + result = s != s[0] + assert_series_equal(result, Series([False, True])) + result = s != s[1] + assert_series_equal(result, Series([True, True])) + + result = s == s + assert_series_equal(result, Series([True, False])) + result = s == s[0] + assert_series_equal(result, Series([True, False])) + result = s == s[1] + assert_series_equal(result, Series([False, False])) + + def test_series_box_timestamp(self): + rng = date_range('20090415', '20090519', freq='B') + s = Series(rng) + + tm.assertIsInstance(s[5], Timestamp) + + rng = date_range('20090415', '20090519', freq='B') + s = Series(rng, index=rng) + tm.assertIsInstance(s[5], Timestamp) + + tm.assertIsInstance(s.iat[5], Timestamp) + + def test_frame_setitem_timestamp(self): + # 2155 + columns = DatetimeIndex(start='1/1/2012', end='2/1/2012', + freq=offsets.BDay()) + index = lrange(10) + data = DataFrame(columns=columns, index=index) + t = datetime(2012, 11, 1) + ts = Timestamp(t) + data[ts] = np.nan # works + + def test_to_html_timestamp(self): + rng = date_range('2000-01-01', periods=10) + df = DataFrame(np.random.randn(10, 4), index=rng) + + result = df.to_html() + self.assertIn('2000-01-01', result) + + def test_series_map_box_timestamps(self): + # #2689, #2627 + s = Series(date_range('1/1/2000', periods=10)) + + def f(x): + return (x.hour, x.day, x.month) + + # it works! + s.map(f) + s.apply(f) + DataFrame(s).applymap(f) + + def test_dti_slicing(self): + dti = DatetimeIndex(start='1/1/2005', end='12/1/2005', freq='M') + dti2 = dti[[1, 3, 5]] + + v1 = dti2[0] + v2 = dti2[1] + v3 = dti2[2] + + self.assertEqual(v1, Timestamp('2/28/2005')) + self.assertEqual(v2, Timestamp('4/30/2005')) + self.assertEqual(v3, Timestamp('6/30/2005')) + + # don't carry freq through irregular slicing + self.assertIsNone(dti2.freq) + + def test_woy_boundary(self): + # make sure weeks at year boundaries are correct + d = datetime(2013, 12, 31) + result = Timestamp(d).week + expected = 1 # ISO standard + self.assertEqual(result, expected) + + d = datetime(2008, 12, 28) + result = Timestamp(d).week + expected = 52 # ISO standard + self.assertEqual(result, expected) + + d = datetime(2009, 12, 31) + result = Timestamp(d).week + expected = 53 # ISO standard + self.assertEqual(result, expected) + + d = datetime(2010, 1, 1) + result = Timestamp(d).week + expected = 53 # ISO standard + self.assertEqual(result, expected) + + d = datetime(2010, 1, 3) + result = Timestamp(d).week + expected = 53 # ISO standard + self.assertEqual(result, expected) + + result = np.array([Timestamp(datetime(*args)).week + for args in [(2000, 1, 1), (2000, 1, 2), ( + 2005, 1, 1), (2005, 1, 2)]]) + self.assertTrue((result == [52, 52, 53, 53]).all()) diff --git a/pandas/tests/series/test_missing.py b/pandas/tests/series/test_missing.py index 3c82e4ed82969..91da36161e188 100644 --- a/pandas/tests/series/test_missing.py +++ b/pandas/tests/series/test_missing.py @@ -34,6 +34,11 @@ def _skip_if_no_akima(): raise nose.SkipTest('scipy.interpolate.Akima1DInterpolator missing') +def _simple_ts(start, end, freq='D'): + rng = date_range(start, end, freq=freq) + return Series(np.random.randn(len(rng)), index=rng) + + class TestSeriesMissingData(TestData, tm.TestCase): _multiprocess_can_split_ = True @@ -530,6 +535,79 @@ def test_fill_value_when_combine_const(self): res = s.add(2, fill_value=0) assert_series_equal(res, exp) + def test_series_fillna_limit(self): + index = np.arange(10) + s = Series(np.random.randn(10), index=index) + + result = s[:2].reindex(index) + result = result.fillna(method='pad', limit=5) + + expected = s[:2].reindex(index).fillna(method='pad') + expected[-3:] = np.nan + assert_series_equal(result, expected) + + result = s[-2:].reindex(index) + result = result.fillna(method='bfill', limit=5) + + expected = s[-2:].reindex(index).fillna(method='backfill') + expected[:3] = np.nan + assert_series_equal(result, expected) + + def test_sparse_series_fillna_limit(self): + index = np.arange(10) + s = Series(np.random.randn(10), index=index) + + ss = s[:2].reindex(index).to_sparse() + result = ss.fillna(method='pad', limit=5) + expected = ss.fillna(method='pad', limit=5) + expected = expected.to_dense() + expected[-3:] = np.nan + expected = expected.to_sparse() + assert_series_equal(result, expected) + + ss = s[-2:].reindex(index).to_sparse() + result = ss.fillna(method='backfill', limit=5) + expected = ss.fillna(method='backfill') + expected = expected.to_dense() + expected[:3] = np.nan + expected = expected.to_sparse() + assert_series_equal(result, expected) + + def test_sparse_series_pad_backfill_limit(self): + index = np.arange(10) + s = Series(np.random.randn(10), index=index) + s = s.to_sparse() + + result = s[:2].reindex(index, method='pad', limit=5) + expected = s[:2].reindex(index).fillna(method='pad') + expected = expected.to_dense() + expected[-3:] = np.nan + expected = expected.to_sparse() + assert_series_equal(result, expected) + + result = s[-2:].reindex(index, method='backfill', limit=5) + expected = s[-2:].reindex(index).fillna(method='backfill') + expected = expected.to_dense() + expected[:3] = np.nan + expected = expected.to_sparse() + assert_series_equal(result, expected) + + def test_series_pad_backfill_limit(self): + index = np.arange(10) + s = Series(np.random.randn(10), index=index) + + result = s[:2].reindex(index, method='pad', limit=5) + + expected = s[:2].reindex(index).fillna(method='pad') + expected[-3:] = np.nan + assert_series_equal(result, expected) + + result = s[-2:].reindex(index, method='backfill', limit=5) + + expected = s[-2:].reindex(index).fillna(method='backfill') + expected[:3] = np.nan + assert_series_equal(result, expected) + class TestSeriesInterpolateData(TestData, tm.TestCase): @@ -932,6 +1010,31 @@ def test_interp_timedelta64(self): index=pd.to_timedelta([1, 2, 4])) assert_series_equal(result, expected) + def test_series_interpolate_method_values(self): + # #1646 + ts = _simple_ts('1/1/2000', '1/20/2000') + ts[::2] = np.nan + + result = ts.interpolate(method='values') + exp = ts.interpolate() + assert_series_equal(result, exp) + + def test_series_interpolate_intraday(self): + # #1698 + index = pd.date_range('1/1/2012', periods=4, freq='12D') + ts = pd.Series([0, 12, 24, 36], index) + new_index = index.append(index + pd.DateOffset(days=1)).sort_values() + + exp = ts.reindex(new_index).interpolate(method='time') + + index = pd.date_range('1/1/2012', periods=4, freq='12H') + ts = pd.Series([0, 12, 24, 36], index) + new_index = index.append(index + pd.DateOffset(hours=1)).sort_values() + result = ts.reindex(new_index).interpolate(method='time') + + self.assert_numpy_array_equal(result.values, exp.values) + + if __name__ == '__main__': import nose nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], diff --git a/pandas/tests/series/test_timeseries.py b/pandas/tests/series/test_timeseries.py index df7ab24430746..073b8bfeee131 100644 --- a/pandas/tests/series/test_timeseries.py +++ b/pandas/tests/series/test_timeseries.py @@ -1,22 +1,48 @@ # coding=utf-8 # pylint: disable-msg=E1101,W0612 -from datetime import datetime - +import sys +import nose +import locale +import calendar import numpy as np +from numpy.random import rand +from datetime import datetime, timedelta, time -from pandas import Index, Series, date_range, NaT +import pandas as pd +import pandas.index as _index +import pandas.tseries.tools as tools +import pandas.core.common as com +import pandas.util.testing as tm +from pandas.tslib import iNaT +from pandas.compat import lrange, lmap, StringIO, product +from pandas.tseries.tdi import TimedeltaIndex from pandas.tseries.index import DatetimeIndex from pandas.tseries.offsets import BDay, BMonthEnd -from pandas.tseries.tdi import TimedeltaIndex - -from pandas.util.testing import assert_series_equal, assert_almost_equal -import pandas.util.testing as tm +from pandas.types.common import is_datetime64_ns_dtype +from pandas import (Index, Series, date_range, NaT, concat, DataFrame, + Timestamp, lib, isnull, to_datetime, offsets, Timedelta, + tslib, bdate_range, Period, timedelta_range, compat) +from pandas.util.testing import (assert_series_equal, assert_almost_equal, + slow, assert_frame_equal, _skip_if_has_locale) from pandas.tests.series.common import TestData +randn = np.random.randn + + +def _simple_ts(start, end, freq='D'): + rng = date_range(start, end, freq=freq) + return Series(np.random.randn(len(rng)), index=rng) -class TestSeriesTimeSeries(TestData, tm.TestCase): + +def assert_range_equal(left, right): + assert (left.equals(right)) + assert (left.freq == right.freq) + assert (left.tz == right.tz) + + +class TestTimeSeries(TestData, tm.TestCase): _multiprocess_can_split_ = True def test_shift(self): @@ -204,86 +230,6 @@ def test_truncate(self): before=self.ts.index[-1] + offset, after=self.ts.index[0] - offset) - def test_getitem_setitem_datetimeindex(self): - from pandas import date_range - - N = 50 - # testing with timezone, GH #2785 - rng = date_range('1/1/1990', periods=N, freq='H', tz='US/Eastern') - ts = Series(np.random.randn(N), index=rng) - - result = ts["1990-01-01 04:00:00"] - expected = ts[4] - self.assertEqual(result, expected) - - result = ts.copy() - result["1990-01-01 04:00:00"] = 0 - result["1990-01-01 04:00:00"] = ts[4] - assert_series_equal(result, ts) - - result = ts["1990-01-01 04:00:00":"1990-01-01 07:00:00"] - expected = ts[4:8] - assert_series_equal(result, expected) - - result = ts.copy() - result["1990-01-01 04:00:00":"1990-01-01 07:00:00"] = 0 - result["1990-01-01 04:00:00":"1990-01-01 07:00:00"] = ts[4:8] - assert_series_equal(result, ts) - - lb = "1990-01-01 04:00:00" - rb = "1990-01-01 07:00:00" - result = ts[(ts.index >= lb) & (ts.index <= rb)] - expected = ts[4:8] - assert_series_equal(result, expected) - - # repeat all the above with naive datetimes - result = ts[datetime(1990, 1, 1, 4)] - expected = ts[4] - self.assertEqual(result, expected) - - result = ts.copy() - result[datetime(1990, 1, 1, 4)] = 0 - result[datetime(1990, 1, 1, 4)] = ts[4] - assert_series_equal(result, ts) - - result = ts[datetime(1990, 1, 1, 4):datetime(1990, 1, 1, 7)] - expected = ts[4:8] - assert_series_equal(result, expected) - - result = ts.copy() - result[datetime(1990, 1, 1, 4):datetime(1990, 1, 1, 7)] = 0 - result[datetime(1990, 1, 1, 4):datetime(1990, 1, 1, 7)] = ts[4:8] - assert_series_equal(result, ts) - - lb = datetime(1990, 1, 1, 4) - rb = datetime(1990, 1, 1, 7) - result = ts[(ts.index >= lb) & (ts.index <= rb)] - expected = ts[4:8] - assert_series_equal(result, expected) - - result = ts[ts.index[4]] - expected = ts[4] - self.assertEqual(result, expected) - - result = ts[ts.index[4:8]] - expected = ts[4:8] - assert_series_equal(result, expected) - - result = ts.copy() - result[ts.index[4:8]] = 0 - result[4:8] = ts[4:8] - assert_series_equal(result, ts) - - # also test partial date slicing - result = ts["1990-01-02"] - expected = ts[24:48] - assert_series_equal(result, expected) - - result = ts.copy() - result["1990-01-02"] = 0 - result["1990-01-02"] = ts[24:48] - assert_series_equal(result, ts) - def test_getitem_setitem_datetime_tz_pytz(self): tm._skip_if_no_pytz() from pytz import timezone as tz @@ -567,8 +513,3031 @@ def test_empty_series_ops(self): assert_series_equal(a, b + a) self.assertRaises(TypeError, lambda x, y: x - y, b, a) + def test_is_(self): + dti = DatetimeIndex(start='1/1/2005', end='12/1/2005', freq='M') + self.assertTrue(dti.is_(dti)) + self.assertTrue(dti.is_(dti.view())) + self.assertFalse(dti.is_(dti.copy())) + + def test_contiguous_boolean_preserve_freq(self): + rng = date_range('1/1/2000', '3/1/2000', freq='B') + + mask = np.zeros(len(rng), dtype=bool) + mask[10:20] = True + + masked = rng[mask] + expected = rng[10:20] + self.assertIsNotNone(expected.freq) + assert_range_equal(masked, expected) + + mask[22] = True + masked = rng[mask] + self.assertIsNone(masked.freq) + + def test_getitem_median_slice_bug(self): + index = date_range('20090415', '20090519', freq='2B') + s = Series(np.random.randn(13), index=index) + + indexer = [slice(6, 7, None)] + result = s[indexer] + expected = s[indexer[0]] + assert_series_equal(result, expected) + + def test_ctor_str_intraday(self): + rng = DatetimeIndex(['1-1-2000 00:00:01']) + self.assertEqual(rng[0].second, 1) + + def test_frame_pad_backfill_limit(self): + index = np.arange(10) + df = DataFrame(np.random.randn(10, 4), index=index) + + result = df[:2].reindex(index, method='pad', limit=5) + + expected = df[:2].reindex(index).fillna(method='pad') + expected.values[-3:] = np.nan + tm.assert_frame_equal(result, expected) + + result = df[-2:].reindex(index, method='backfill', limit=5) + + expected = df[-2:].reindex(index).fillna(method='backfill') + expected.values[:3] = np.nan + tm.assert_frame_equal(result, expected) + + def test_frame_fillna_limit(self): + index = np.arange(10) + df = DataFrame(np.random.randn(10, 4), index=index) + + result = df[:2].reindex(index) + result = result.fillna(method='pad', limit=5) + + expected = df[:2].reindex(index).fillna(method='pad') + expected.values[-3:] = np.nan + tm.assert_frame_equal(result, expected) + + result = df[-2:].reindex(index) + result = result.fillna(method='backfill', limit=5) + + expected = df[-2:].reindex(index).fillna(method='backfill') + expected.values[:3] = np.nan + tm.assert_frame_equal(result, expected) + + def test_sparse_frame_pad_backfill_limit(self): + index = np.arange(10) + df = DataFrame(np.random.randn(10, 4), index=index) + sdf = df.to_sparse() + + result = sdf[:2].reindex(index, method='pad', limit=5) + + expected = sdf[:2].reindex(index).fillna(method='pad') + expected = expected.to_dense() + expected.values[-3:] = np.nan + expected = expected.to_sparse() + tm.assert_frame_equal(result, expected) + + result = sdf[-2:].reindex(index, method='backfill', limit=5) + + expected = sdf[-2:].reindex(index).fillna(method='backfill') + expected = expected.to_dense() + expected.values[:3] = np.nan + expected = expected.to_sparse() + tm.assert_frame_equal(result, expected) + + def test_sparse_frame_fillna_limit(self): + index = np.arange(10) + df = DataFrame(np.random.randn(10, 4), index=index) + sdf = df.to_sparse() + + result = sdf[:2].reindex(index) + result = result.fillna(method='pad', limit=5) + + expected = sdf[:2].reindex(index).fillna(method='pad') + expected = expected.to_dense() + expected.values[-3:] = np.nan + expected = expected.to_sparse() + tm.assert_frame_equal(result, expected) + + result = sdf[-2:].reindex(index) + result = result.fillna(method='backfill', limit=5) + + expected = sdf[-2:].reindex(index).fillna(method='backfill') + expected = expected.to_dense() + expected.values[:3] = np.nan + expected = expected.to_sparse() + tm.assert_frame_equal(result, expected) + + def test_pad_require_monotonicity(self): + rng = date_range('1/1/2000', '3/1/2000', freq='B') + + # neither monotonic increasing or decreasing + rng2 = rng[[1, 0, 2]] + + self.assertRaises(ValueError, rng2.get_indexer, rng, method='pad') + + def test_frame_ctor_datetime64_column(self): + rng = date_range('1/1/2000 00:00:00', '1/1/2000 1:59:50', freq='10s') + dates = np.asarray(rng) + + df = DataFrame({'A': np.random.randn(len(rng)), 'B': dates}) + self.assertTrue(np.issubdtype(df['B'].dtype, np.dtype('M8[ns]'))) + + def test_frame_add_datetime64_column(self): + rng = date_range('1/1/2000 00:00:00', '1/1/2000 1:59:50', freq='10s') + df = DataFrame(index=np.arange(len(rng))) + + df['A'] = rng + self.assertTrue(np.issubdtype(df['A'].dtype, np.dtype('M8[ns]'))) + + def test_frame_datetime64_pre1900_repr(self): + df = DataFrame({'year': date_range('1/1/1700', periods=50, + freq='A-DEC')}) + # it works! + repr(df) + + def test_frame_add_datetime64_col_other_units(self): + n = 100 + + units = ['h', 'm', 's', 'ms', 'D', 'M', 'Y'] + + ns_dtype = np.dtype('M8[ns]') + + for unit in units: + dtype = np.dtype('M8[%s]' % unit) + vals = np.arange(n, dtype=np.int64).view(dtype) + + df = DataFrame({'ints': np.arange(n)}, index=np.arange(n)) + df[unit] = vals + + ex_vals = to_datetime(vals.astype('O')).values + + self.assertEqual(df[unit].dtype, ns_dtype) + self.assertTrue((df[unit].values == ex_vals).all()) + + # Test insertion into existing datetime64 column + df = DataFrame({'ints': np.arange(n)}, index=np.arange(n)) + df['dates'] = np.arange(n, dtype=np.int64).view(ns_dtype) + + for unit in units: + dtype = np.dtype('M8[%s]' % unit) + vals = np.arange(n, dtype=np.int64).view(dtype) + + tmp = df.copy() + + tmp['dates'] = vals + ex_vals = to_datetime(vals.astype('O')).values + + self.assertTrue((tmp['dates'].values == ex_vals).all()) + + def test_to_datetime_unit(self): + + epoch = 1370745748 + s = Series([epoch + t for t in range(20)]) + result = to_datetime(s, unit='s') + expected = Series([Timestamp('2013-06-09 02:42:28') + timedelta( + seconds=t) for t in range(20)]) + assert_series_equal(result, expected) + + s = Series([epoch + t for t in range(20)]).astype(float) + result = to_datetime(s, unit='s') + expected = Series([Timestamp('2013-06-09 02:42:28') + timedelta( + seconds=t) for t in range(20)]) + assert_series_equal(result, expected) + + s = Series([epoch + t for t in range(20)] + [iNaT]) + result = to_datetime(s, unit='s') + expected = Series([Timestamp('2013-06-09 02:42:28') + timedelta( + seconds=t) for t in range(20)] + [NaT]) + assert_series_equal(result, expected) + + s = Series([epoch + t for t in range(20)] + [iNaT]).astype(float) + result = to_datetime(s, unit='s') + expected = Series([Timestamp('2013-06-09 02:42:28') + timedelta( + seconds=t) for t in range(20)] + [NaT]) + assert_series_equal(result, expected) + + # GH13834 + s = Series([epoch + t for t in np.arange(0, 2, .25)] + + [iNaT]).astype(float) + result = to_datetime(s, unit='s') + expected = Series([Timestamp('2013-06-09 02:42:28') + timedelta( + seconds=t) for t in np.arange(0, 2, .25)] + [NaT]) + assert_series_equal(result, expected) + + s = concat([Series([epoch + t for t in range(20)] + ).astype(float), Series([np.nan])], + ignore_index=True) + result = to_datetime(s, unit='s') + expected = Series([Timestamp('2013-06-09 02:42:28') + timedelta( + seconds=t) for t in range(20)] + [NaT]) + assert_series_equal(result, expected) + + result = to_datetime([1, 2, 'NaT', pd.NaT, np.nan], unit='D') + expected = DatetimeIndex([Timestamp('1970-01-02'), + Timestamp('1970-01-03')] + ['NaT'] * 3) + tm.assert_index_equal(result, expected) + + with self.assertRaises(ValueError): + to_datetime([1, 2, 'foo'], unit='D') + with self.assertRaises(ValueError): + to_datetime([1, 2, 111111111], unit='D') + + # coerce we can process + expected = DatetimeIndex([Timestamp('1970-01-02'), + Timestamp('1970-01-03')] + ['NaT'] * 1) + result = to_datetime([1, 2, 'foo'], unit='D', errors='coerce') + tm.assert_index_equal(result, expected) + + result = to_datetime([1, 2, 111111111], unit='D', errors='coerce') + tm.assert_index_equal(result, expected) + + def test_series_ctor_datetime64(self): + rng = date_range('1/1/2000 00:00:00', '1/1/2000 1:59:50', freq='10s') + dates = np.asarray(rng) + + series = Series(dates) + self.assertTrue(np.issubdtype(series.dtype, np.dtype('M8[ns]'))) + + def test_index_cast_datetime64_other_units(self): + arr = np.arange(0, 100, 10, dtype=np.int64).view('M8[D]') + + idx = Index(arr) + + self.assertTrue((idx.values == tslib.cast_to_nanoseconds(arr)).all()) + + def test_reindex_series_add_nat(self): + rng = date_range('1/1/2000 00:00:00', periods=10, freq='10s') + series = Series(rng) + + result = series.reindex(lrange(15)) + self.assertTrue(np.issubdtype(result.dtype, np.dtype('M8[ns]'))) + + mask = result.isnull() + self.assertTrue(mask[-5:].all()) + self.assertFalse(mask[:-5].any()) + + def test_reindex_frame_add_nat(self): + rng = date_range('1/1/2000 00:00:00', periods=10, freq='10s') + df = DataFrame({'A': np.random.randn(len(rng)), 'B': rng}) + + result = df.reindex(lrange(15)) + self.assertTrue(np.issubdtype(result['B'].dtype, np.dtype('M8[ns]'))) + + mask = com.isnull(result)['B'] + self.assertTrue(mask[-5:].all()) + self.assertFalse(mask[:-5].any()) + + def test_series_repr_nat(self): + series = Series([0, 1000, 2000, iNaT], dtype='M8[ns]') + + result = repr(series) + expected = ('0 1970-01-01 00:00:00.000000\n' + '1 1970-01-01 00:00:00.000001\n' + '2 1970-01-01 00:00:00.000002\n' + '3 NaT\n' + 'dtype: datetime64[ns]') + self.assertEqual(result, expected) + + def test_fillna_nat(self): + series = Series([0, 1, 2, iNaT], dtype='M8[ns]') + + filled = series.fillna(method='pad') + filled2 = series.fillna(value=series.values[2]) + + expected = series.copy() + expected.values[3] = expected.values[2] + + assert_series_equal(filled, expected) + assert_series_equal(filled2, expected) + + df = DataFrame({'A': series}) + filled = df.fillna(method='pad') + filled2 = df.fillna(value=series.values[2]) + expected = DataFrame({'A': expected}) + assert_frame_equal(filled, expected) + assert_frame_equal(filled2, expected) + + series = Series([iNaT, 0, 1, 2], dtype='M8[ns]') + + filled = series.fillna(method='bfill') + filled2 = series.fillna(value=series[1]) + + expected = series.copy() + expected[0] = expected[1] + + assert_series_equal(filled, expected) + assert_series_equal(filled2, expected) + + df = DataFrame({'A': series}) + filled = df.fillna(method='bfill') + filled2 = df.fillna(value=series[1]) + expected = DataFrame({'A': expected}) + assert_frame_equal(filled, expected) + assert_frame_equal(filled2, expected) + + def test_string_na_nat_conversion(self): + # GH #999, #858 + + from pandas.compat import parse_date + + strings = np.array(['1/1/2000', '1/2/2000', np.nan, + '1/4/2000, 12:34:56'], dtype=object) + + expected = np.empty(4, dtype='M8[ns]') + for i, val in enumerate(strings): + if com.isnull(val): + expected[i] = iNaT + else: + expected[i] = parse_date(val) + + result = tslib.array_to_datetime(strings) + assert_almost_equal(result, expected) + + result2 = to_datetime(strings) + tm.assertIsInstance(result2, DatetimeIndex) + tm.assert_numpy_array_equal(result, result2.values) + + malformed = np.array(['1/100/2000', np.nan], dtype=object) + + # GH 10636, default is now 'raise' + self.assertRaises(ValueError, + lambda: to_datetime(malformed, errors='raise')) + + result = to_datetime(malformed, errors='ignore') + tm.assert_numpy_array_equal(result, malformed) + + self.assertRaises(ValueError, to_datetime, malformed, errors='raise') + + idx = ['a', 'b', 'c', 'd', 'e'] + series = Series(['1/1/2000', np.nan, '1/3/2000', np.nan, + '1/5/2000'], index=idx, name='foo') + dseries = Series([to_datetime('1/1/2000'), np.nan, + to_datetime('1/3/2000'), np.nan, + to_datetime('1/5/2000')], index=idx, name='foo') + + result = to_datetime(series) + dresult = to_datetime(dseries) + + expected = Series(np.empty(5, dtype='M8[ns]'), index=idx) + for i in range(5): + x = series[i] + if isnull(x): + expected[i] = iNaT + else: + expected[i] = to_datetime(x) + + assert_series_equal(result, expected, check_names=False) + self.assertEqual(result.name, 'foo') + + assert_series_equal(dresult, expected, check_names=False) + self.assertEqual(dresult.name, 'foo') + + def test_nat_vector_field_access(self): + idx = DatetimeIndex(['1/1/2000', None, None, '1/4/2000']) + + fields = ['year', 'quarter', 'month', 'day', 'hour', 'minute', + 'second', 'microsecond', 'nanosecond', 'week', 'dayofyear', + 'days_in_month', 'is_leap_year'] + + for field in fields: + result = getattr(idx, field) + expected = [getattr(x, field) for x in idx] + self.assert_numpy_array_equal(result, np.array(expected)) + + s = pd.Series(idx) + + for field in fields: + result = getattr(s.dt, field) + expected = [getattr(x, field) for x in idx] + self.assert_series_equal(result, pd.Series(expected)) + + def test_nat_scalar_field_access(self): + fields = ['year', 'quarter', 'month', 'day', 'hour', 'minute', + 'second', 'microsecond', 'nanosecond', 'week', 'dayofyear', + 'days_in_month', 'daysinmonth', 'dayofweek', 'weekday_name'] + for field in fields: + result = getattr(NaT, field) + self.assertTrue(np.isnan(result)) + + def test_NaT_methods(self): + # GH 9513 + raise_methods = ['astimezone', 'combine', 'ctime', 'dst', + 'fromordinal', 'fromtimestamp', 'isocalendar', + 'strftime', 'strptime', 'time', 'timestamp', + 'timetuple', 'timetz', 'toordinal', 'tzname', + 'utcfromtimestamp', 'utcnow', 'utcoffset', + 'utctimetuple'] + nat_methods = ['date', 'now', 'replace', 'to_datetime', 'today'] + nan_methods = ['weekday', 'isoweekday'] + + for method in raise_methods: + if hasattr(NaT, method): + self.assertRaises(ValueError, getattr(NaT, method)) + + for method in nan_methods: + if hasattr(NaT, method): + self.assertTrue(np.isnan(getattr(NaT, method)())) + + for method in nat_methods: + if hasattr(NaT, method): + # see gh-8254 + exp_warning = None + if method == 'to_datetime': + exp_warning = FutureWarning + with tm.assert_produces_warning( + exp_warning, check_stacklevel=False): + self.assertIs(getattr(NaT, method)(), NaT) + + # GH 12300 + self.assertEqual(NaT.isoformat(), 'NaT') + + def test_index_convert_to_datetime_array(self): + tm._skip_if_no_pytz() + + def _check_rng(rng): + converted = rng.to_pydatetime() + tm.assertIsInstance(converted, np.ndarray) + for x, stamp in zip(converted, rng): + tm.assertIsInstance(x, datetime) + self.assertEqual(x, stamp.to_pydatetime()) + self.assertEqual(x.tzinfo, stamp.tzinfo) + + rng = date_range('20090415', '20090519') + rng_eastern = date_range('20090415', '20090519', tz='US/Eastern') + rng_utc = date_range('20090415', '20090519', tz='utc') + + _check_rng(rng) + _check_rng(rng_eastern) + _check_rng(rng_utc) + + def test_index_convert_to_datetime_array_explicit_pytz(self): + tm._skip_if_no_pytz() + import pytz + + def _check_rng(rng): + converted = rng.to_pydatetime() + tm.assertIsInstance(converted, np.ndarray) + for x, stamp in zip(converted, rng): + tm.assertIsInstance(x, datetime) + self.assertEqual(x, stamp.to_pydatetime()) + self.assertEqual(x.tzinfo, stamp.tzinfo) + + rng = date_range('20090415', '20090519') + rng_eastern = date_range('20090415', '20090519', + tz=pytz.timezone('US/Eastern')) + rng_utc = date_range('20090415', '20090519', tz=pytz.utc) + + _check_rng(rng) + _check_rng(rng_eastern) + _check_rng(rng_utc) + + def test_index_convert_to_datetime_array_dateutil(self): + tm._skip_if_no_dateutil() + import dateutil + + def _check_rng(rng): + converted = rng.to_pydatetime() + tm.assertIsInstance(converted, np.ndarray) + for x, stamp in zip(converted, rng): + tm.assertIsInstance(x, datetime) + self.assertEqual(x, stamp.to_pydatetime()) + self.assertEqual(x.tzinfo, stamp.tzinfo) + + rng = date_range('20090415', '20090519') + rng_eastern = date_range('20090415', '20090519', + tz='dateutil/US/Eastern') + rng_utc = date_range('20090415', '20090519', tz=dateutil.tz.tzutc()) + + _check_rng(rng) + _check_rng(rng_eastern) + _check_rng(rng_utc) + + def test_reasonable_keyerror(self): + # GH #1062 + index = DatetimeIndex(['1/3/2000']) + try: + index.get_loc('1/1/2000') + except KeyError as e: + self.assertIn('2000', str(e)) + + def test_reindex_with_datetimes(self): + rng = date_range('1/1/2000', periods=20) + ts = Series(np.random.randn(20), index=rng) + + result = ts.reindex(list(ts.index[5:10])) + expected = ts[5:10] + tm.assert_series_equal(result, expected) + + result = ts[list(ts.index[5:10])] + tm.assert_series_equal(result, expected) + + def test_asfreq_keep_index_name(self): + # GH #9854 + index_name = 'bar' + index = pd.date_range('20130101', periods=20, name=index_name) + df = pd.DataFrame([x for x in range(20)], columns=['foo'], index=index) + + self.assertEqual(index_name, df.index.name) + self.assertEqual(index_name, df.asfreq('10D').index.name) + + def test_promote_datetime_date(self): + rng = date_range('1/1/2000', periods=20) + ts = Series(np.random.randn(20), index=rng) + + ts_slice = ts[5:] + ts2 = ts_slice.copy() + ts2.index = [x.date() for x in ts2.index] + + result = ts + ts2 + result2 = ts2 + ts + expected = ts + ts[5:] + assert_series_equal(result, expected) + assert_series_equal(result2, expected) + + # test asfreq + result = ts2.asfreq('4H', method='ffill') + expected = ts[5:].asfreq('4H', method='ffill') + assert_series_equal(result, expected) + + result = rng.get_indexer(ts2.index) + expected = rng.get_indexer(ts_slice.index) + self.assert_numpy_array_equal(result, expected) + + def test_asfreq_normalize(self): + rng = date_range('1/1/2000 09:30', periods=20) + norm = date_range('1/1/2000', periods=20) + vals = np.random.randn(20) + ts = Series(vals, index=rng) + + result = ts.asfreq('D', normalize=True) + norm = date_range('1/1/2000', periods=20) + expected = Series(vals, index=norm) + + assert_series_equal(result, expected) + + vals = np.random.randn(20, 3) + ts = DataFrame(vals, index=rng) + + result = ts.asfreq('D', normalize=True) + expected = DataFrame(vals, index=norm) + + assert_frame_equal(result, expected) + + def test_first_subset(self): + ts = _simple_ts('1/1/2000', '1/1/2010', freq='12h') + result = ts.first('10d') + self.assertEqual(len(result), 20) + + ts = _simple_ts('1/1/2000', '1/1/2010') + result = ts.first('10d') + self.assertEqual(len(result), 10) + + result = ts.first('3M') + expected = ts[:'3/31/2000'] + assert_series_equal(result, expected) + + result = ts.first('21D') + expected = ts[:21] + assert_series_equal(result, expected) + + result = ts[:0].first('3M') + assert_series_equal(result, ts[:0]) + + def test_last_subset(self): + ts = _simple_ts('1/1/2000', '1/1/2010', freq='12h') + result = ts.last('10d') + self.assertEqual(len(result), 20) + + ts = _simple_ts('1/1/2000', '1/1/2010') + result = ts.last('10d') + self.assertEqual(len(result), 10) + + result = ts.last('21D') + expected = ts['12/12/2009':] + assert_series_equal(result, expected) + + result = ts.last('21D') + expected = ts[-21:] + assert_series_equal(result, expected) + + result = ts[:0].last('3M') + assert_series_equal(result, ts[:0]) + + def test_format_pre_1900_dates(self): + rng = date_range('1/1/1850', '1/1/1950', freq='A-DEC') + rng.format() + ts = Series(1, index=rng) + repr(ts) + + def test_at_time(self): + rng = date_range('1/1/2000', '1/5/2000', freq='5min') + ts = Series(np.random.randn(len(rng)), index=rng) + rs = ts.at_time(rng[1]) + self.assertTrue((rs.index.hour == rng[1].hour).all()) + self.assertTrue((rs.index.minute == rng[1].minute).all()) + self.assertTrue((rs.index.second == rng[1].second).all()) + + result = ts.at_time('9:30') + expected = ts.at_time(time(9, 30)) + assert_series_equal(result, expected) + + df = DataFrame(np.random.randn(len(rng), 3), index=rng) + + result = ts[time(9, 30)] + result_df = df.loc[time(9, 30)] + expected = ts[(rng.hour == 9) & (rng.minute == 30)] + exp_df = df[(rng.hour == 9) & (rng.minute == 30)] + + # expected.index = date_range('1/1/2000', '1/4/2000') + + assert_series_equal(result, expected) + tm.assert_frame_equal(result_df, exp_df) + + chunk = df.loc['1/4/2000':] + result = chunk.loc[time(9, 30)] + expected = result_df[-1:] + tm.assert_frame_equal(result, expected) + + # midnight, everything + rng = date_range('1/1/2000', '1/31/2000') + ts = Series(np.random.randn(len(rng)), index=rng) + + result = ts.at_time(time(0, 0)) + assert_series_equal(result, ts) + + # time doesn't exist + rng = date_range('1/1/2012', freq='23Min', periods=384) + ts = Series(np.random.randn(len(rng)), rng) + rs = ts.at_time('16:00') + self.assertEqual(len(rs), 0) + + def test_at_time_frame(self): + rng = date_range('1/1/2000', '1/5/2000', freq='5min') + ts = DataFrame(np.random.randn(len(rng), 2), index=rng) + rs = ts.at_time(rng[1]) + self.assertTrue((rs.index.hour == rng[1].hour).all()) + self.assertTrue((rs.index.minute == rng[1].minute).all()) + self.assertTrue((rs.index.second == rng[1].second).all()) + + result = ts.at_time('9:30') + expected = ts.at_time(time(9, 30)) + assert_frame_equal(result, expected) + + result = ts.loc[time(9, 30)] + expected = ts.loc[(rng.hour == 9) & (rng.minute == 30)] + + assert_frame_equal(result, expected) + + # midnight, everything + rng = date_range('1/1/2000', '1/31/2000') + ts = DataFrame(np.random.randn(len(rng), 3), index=rng) + + result = ts.at_time(time(0, 0)) + assert_frame_equal(result, ts) + + # time doesn't exist + rng = date_range('1/1/2012', freq='23Min', periods=384) + ts = DataFrame(np.random.randn(len(rng), 2), rng) + rs = ts.at_time('16:00') + self.assertEqual(len(rs), 0) + + def test_between_time(self): + rng = date_range('1/1/2000', '1/5/2000', freq='5min') + ts = Series(np.random.randn(len(rng)), index=rng) + stime = time(0, 0) + etime = time(1, 0) + + close_open = product([True, False], [True, False]) + for inc_start, inc_end in close_open: + filtered = ts.between_time(stime, etime, inc_start, inc_end) + exp_len = 13 * 4 + 1 + if not inc_start: + exp_len -= 5 + if not inc_end: + exp_len -= 4 + + self.assertEqual(len(filtered), exp_len) + for rs in filtered.index: + t = rs.time() + if inc_start: + self.assertTrue(t >= stime) + else: + self.assertTrue(t > stime) + + if inc_end: + self.assertTrue(t <= etime) + else: + self.assertTrue(t < etime) + + result = ts.between_time('00:00', '01:00') + expected = ts.between_time(stime, etime) + assert_series_equal(result, expected) + + # across midnight + rng = date_range('1/1/2000', '1/5/2000', freq='5min') + ts = Series(np.random.randn(len(rng)), index=rng) + stime = time(22, 0) + etime = time(9, 0) + + close_open = product([True, False], [True, False]) + for inc_start, inc_end in close_open: + filtered = ts.between_time(stime, etime, inc_start, inc_end) + exp_len = (12 * 11 + 1) * 4 + 1 + if not inc_start: + exp_len -= 4 + if not inc_end: + exp_len -= 4 + + self.assertEqual(len(filtered), exp_len) + for rs in filtered.index: + t = rs.time() + if inc_start: + self.assertTrue((t >= stime) or (t <= etime)) + else: + self.assertTrue((t > stime) or (t <= etime)) + + if inc_end: + self.assertTrue((t <= etime) or (t >= stime)) + else: + self.assertTrue((t < etime) or (t >= stime)) + + def test_between_time_frame(self): + rng = date_range('1/1/2000', '1/5/2000', freq='5min') + ts = DataFrame(np.random.randn(len(rng), 2), index=rng) + stime = time(0, 0) + etime = time(1, 0) + + close_open = product([True, False], [True, False]) + for inc_start, inc_end in close_open: + filtered = ts.between_time(stime, etime, inc_start, inc_end) + exp_len = 13 * 4 + 1 + if not inc_start: + exp_len -= 5 + if not inc_end: + exp_len -= 4 + + self.assertEqual(len(filtered), exp_len) + for rs in filtered.index: + t = rs.time() + if inc_start: + self.assertTrue(t >= stime) + else: + self.assertTrue(t > stime) + + if inc_end: + self.assertTrue(t <= etime) + else: + self.assertTrue(t < etime) + + result = ts.between_time('00:00', '01:00') + expected = ts.between_time(stime, etime) + assert_frame_equal(result, expected) + + # across midnight + rng = date_range('1/1/2000', '1/5/2000', freq='5min') + ts = DataFrame(np.random.randn(len(rng), 2), index=rng) + stime = time(22, 0) + etime = time(9, 0) + + close_open = product([True, False], [True, False]) + for inc_start, inc_end in close_open: + filtered = ts.between_time(stime, etime, inc_start, inc_end) + exp_len = (12 * 11 + 1) * 4 + 1 + if not inc_start: + exp_len -= 4 + if not inc_end: + exp_len -= 4 + + self.assertEqual(len(filtered), exp_len) + for rs in filtered.index: + t = rs.time() + if inc_start: + self.assertTrue((t >= stime) or (t <= etime)) + else: + self.assertTrue((t > stime) or (t <= etime)) + + if inc_end: + self.assertTrue((t <= etime) or (t >= stime)) + else: + self.assertTrue((t < etime) or (t >= stime)) + + def test_between_time_types(self): + # GH11818 + rng = date_range('1/1/2000', '1/5/2000', freq='5min') + self.assertRaises(ValueError, rng.indexer_between_time, + datetime(2010, 1, 2, 1), datetime(2010, 1, 2, 5)) + + frame = DataFrame({'A': 0}, index=rng) + self.assertRaises(ValueError, frame.between_time, + datetime(2010, 1, 2, 1), datetime(2010, 1, 2, 5)) + + series = Series(0, index=rng) + self.assertRaises(ValueError, series.between_time, + datetime(2010, 1, 2, 1), datetime(2010, 1, 2, 5)) + + def test_between_time_formats(self): + # GH11818 + _skip_if_has_locale() + + rng = date_range('1/1/2000', '1/5/2000', freq='5min') + ts = DataFrame(np.random.randn(len(rng), 2), index=rng) + + strings = [("2:00", "2:30"), ("0200", "0230"), ("2:00am", "2:30am"), + ("0200am", "0230am"), ("2:00:00", "2:30:00"), + ("020000", "023000"), ("2:00:00am", "2:30:00am"), + ("020000am", "023000am")] + expected_length = 28 + + for time_string in strings: + self.assertEqual(len(ts.between_time(*time_string)), + expected_length, + "%s - %s" % time_string) + + def test_to_period(self): + from pandas.tseries.period import period_range + + ts = _simple_ts('1/1/2000', '1/1/2001') + + pts = ts.to_period() + exp = ts.copy() + exp.index = period_range('1/1/2000', '1/1/2001') + assert_series_equal(pts, exp) + + pts = ts.to_period('M') + exp.index = exp.index.asfreq('M') + tm.assert_index_equal(pts.index, exp.index.asfreq('M')) + assert_series_equal(pts, exp) + + # GH 7606 without freq + idx = DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03', + '2011-01-04']) + exp_idx = pd.PeriodIndex(['2011-01-01', '2011-01-02', '2011-01-03', + '2011-01-04'], freq='D') + + s = Series(np.random.randn(4), index=idx) + expected = s.copy() + expected.index = exp_idx + assert_series_equal(s.to_period(), expected) + + df = DataFrame(np.random.randn(4, 4), index=idx, columns=idx) + expected = df.copy() + expected.index = exp_idx + assert_frame_equal(df.to_period(), expected) + + expected = df.copy() + expected.columns = exp_idx + assert_frame_equal(df.to_period(axis=1), expected) + + def create_dt64_based_index(self): + data = [Timestamp('2007-01-01 10:11:12.123456Z'), + Timestamp('2007-01-01 10:11:13.789123Z')] + index = DatetimeIndex(data) + return index + + def test_to_period_millisecond(self): + index = self.create_dt64_based_index() + + period = index.to_period(freq='L') + self.assertEqual(2, len(period)) + self.assertEqual(period[0], Period('2007-01-01 10:11:12.123Z', 'L')) + self.assertEqual(period[1], Period('2007-01-01 10:11:13.789Z', 'L')) + + def test_to_period_microsecond(self): + index = self.create_dt64_based_index() + + period = index.to_period(freq='U') + self.assertEqual(2, len(period)) + self.assertEqual(period[0], Period('2007-01-01 10:11:12.123456Z', 'U')) + self.assertEqual(period[1], Period('2007-01-01 10:11:13.789123Z', 'U')) + + def test_to_period_tz_pytz(self): + tm._skip_if_no_pytz() + from dateutil.tz import tzlocal + from pytz import utc as UTC + + xp = date_range('1/1/2000', '4/1/2000').to_period() + + ts = date_range('1/1/2000', '4/1/2000', tz='US/Eastern') + + result = ts.to_period()[0] + expected = ts[0].to_period() + + self.assertEqual(result, expected) + tm.assert_index_equal(ts.to_period(), xp) + + ts = date_range('1/1/2000', '4/1/2000', tz=UTC) + + result = ts.to_period()[0] + expected = ts[0].to_period() + + self.assertEqual(result, expected) + tm.assert_index_equal(ts.to_period(), xp) + + ts = date_range('1/1/2000', '4/1/2000', tz=tzlocal()) + + result = ts.to_period()[0] + expected = ts[0].to_period() + + self.assertEqual(result, expected) + tm.assert_index_equal(ts.to_period(), xp) + + def test_to_period_tz_explicit_pytz(self): + tm._skip_if_no_pytz() + import pytz + from dateutil.tz import tzlocal + + xp = date_range('1/1/2000', '4/1/2000').to_period() + + ts = date_range('1/1/2000', '4/1/2000', tz=pytz.timezone('US/Eastern')) + + result = ts.to_period()[0] + expected = ts[0].to_period() + + self.assertTrue(result == expected) + tm.assert_index_equal(ts.to_period(), xp) + + ts = date_range('1/1/2000', '4/1/2000', tz=pytz.utc) + + result = ts.to_period()[0] + expected = ts[0].to_period() + + self.assertTrue(result == expected) + tm.assert_index_equal(ts.to_period(), xp) + + ts = date_range('1/1/2000', '4/1/2000', tz=tzlocal()) + + result = ts.to_period()[0] + expected = ts[0].to_period() + + self.assertTrue(result == expected) + tm.assert_index_equal(ts.to_period(), xp) + + def test_to_period_tz_dateutil(self): + tm._skip_if_no_dateutil() + import dateutil + from dateutil.tz import tzlocal + + xp = date_range('1/1/2000', '4/1/2000').to_period() + + ts = date_range('1/1/2000', '4/1/2000', tz='dateutil/US/Eastern') + + result = ts.to_period()[0] + expected = ts[0].to_period() + + self.assertTrue(result == expected) + tm.assert_index_equal(ts.to_period(), xp) + + ts = date_range('1/1/2000', '4/1/2000', tz=dateutil.tz.tzutc()) + + result = ts.to_period()[0] + expected = ts[0].to_period() + + self.assertTrue(result == expected) + tm.assert_index_equal(ts.to_period(), xp) + + ts = date_range('1/1/2000', '4/1/2000', tz=tzlocal()) + + result = ts.to_period()[0] + expected = ts[0].to_period() + + self.assertTrue(result == expected) + tm.assert_index_equal(ts.to_period(), xp) + + def test_frame_to_period(self): + K = 5 + from pandas.tseries.period import period_range + + dr = date_range('1/1/2000', '1/1/2001') + pr = period_range('1/1/2000', '1/1/2001') + df = DataFrame(randn(len(dr), K), index=dr) + df['mix'] = 'a' + + pts = df.to_period() + exp = df.copy() + exp.index = pr + assert_frame_equal(pts, exp) + + pts = df.to_period('M') + tm.assert_index_equal(pts.index, exp.index.asfreq('M')) + + df = df.T + pts = df.to_period(axis=1) + exp = df.copy() + exp.columns = pr + assert_frame_equal(pts, exp) + + pts = df.to_period('M', axis=1) + tm.assert_index_equal(pts.columns, exp.columns.asfreq('M')) + + self.assertRaises(ValueError, df.to_period, axis=2) + + def test_compat_replace(self): + # https://github.com/statsmodels/statsmodels/issues/3349 + # replace should take ints/longs for compat + + for f in [compat.long, int]: + result = date_range(Timestamp('1960-04-01 00:00:00', + freq='QS-JAN'), + periods=f(76), + freq='QS-JAN') + self.assertEqual(len(result), 76) + + def test_astype_object(self): + # NumPy 1.6.1 weak ns support + rng = date_range('1/1/2000', periods=20) + + casted = rng.astype('O') + exp_values = list(rng) + + tm.assert_index_equal(casted, Index(exp_values, dtype=np.object_)) + self.assertEqual(casted.tolist(), exp_values) + + def test_catch_infinite_loop(self): + offset = offsets.DateOffset(minute=5) + # blow up, don't loop forever + self.assertRaises(Exception, date_range, datetime(2011, 11, 11), + datetime(2011, 11, 12), freq=offset) + + def test_append_concat(self): + rng = date_range('5/8/2012 1:45', periods=10, freq='5T') + ts = Series(np.random.randn(len(rng)), rng) + df = DataFrame(np.random.randn(len(rng), 4), index=rng) + + result = ts.append(ts) + result_df = df.append(df) + ex_index = DatetimeIndex(np.tile(rng.values, 2)) + tm.assert_index_equal(result.index, ex_index) + tm.assert_index_equal(result_df.index, ex_index) + + appended = rng.append(rng) + tm.assert_index_equal(appended, ex_index) + + appended = rng.append([rng, rng]) + ex_index = DatetimeIndex(np.tile(rng.values, 3)) + tm.assert_index_equal(appended, ex_index) + + # different index names + rng1 = rng.copy() + rng2 = rng.copy() + rng1.name = 'foo' + rng2.name = 'bar' + self.assertEqual(rng1.append(rng1).name, 'foo') + self.assertIsNone(rng1.append(rng2).name) + + def test_append_concat_tz(self): + # GH 2938 + tm._skip_if_no_pytz() + + rng = date_range('5/8/2012 1:45', periods=10, freq='5T', + tz='US/Eastern') + rng2 = date_range('5/8/2012 2:35', periods=10, freq='5T', + tz='US/Eastern') + rng3 = date_range('5/8/2012 1:45', periods=20, freq='5T', + tz='US/Eastern') + ts = Series(np.random.randn(len(rng)), rng) + df = DataFrame(np.random.randn(len(rng), 4), index=rng) + ts2 = Series(np.random.randn(len(rng2)), rng2) + df2 = DataFrame(np.random.randn(len(rng2), 4), index=rng2) + + result = ts.append(ts2) + result_df = df.append(df2) + tm.assert_index_equal(result.index, rng3) + tm.assert_index_equal(result_df.index, rng3) + + appended = rng.append(rng2) + tm.assert_index_equal(appended, rng3) + + def test_append_concat_tz_explicit_pytz(self): + # GH 2938 + tm._skip_if_no_pytz() + from pytz import timezone as timezone + + rng = date_range('5/8/2012 1:45', periods=10, freq='5T', + tz=timezone('US/Eastern')) + rng2 = date_range('5/8/2012 2:35', periods=10, freq='5T', + tz=timezone('US/Eastern')) + rng3 = date_range('5/8/2012 1:45', periods=20, freq='5T', + tz=timezone('US/Eastern')) + ts = Series(np.random.randn(len(rng)), rng) + df = DataFrame(np.random.randn(len(rng), 4), index=rng) + ts2 = Series(np.random.randn(len(rng2)), rng2) + df2 = DataFrame(np.random.randn(len(rng2), 4), index=rng2) + + result = ts.append(ts2) + result_df = df.append(df2) + tm.assert_index_equal(result.index, rng3) + tm.assert_index_equal(result_df.index, rng3) + + appended = rng.append(rng2) + tm.assert_index_equal(appended, rng3) + + def test_append_concat_tz_dateutil(self): + # GH 2938 + tm._skip_if_no_dateutil() + rng = date_range('5/8/2012 1:45', periods=10, freq='5T', + tz='dateutil/US/Eastern') + rng2 = date_range('5/8/2012 2:35', periods=10, freq='5T', + tz='dateutil/US/Eastern') + rng3 = date_range('5/8/2012 1:45', periods=20, freq='5T', + tz='dateutil/US/Eastern') + ts = Series(np.random.randn(len(rng)), rng) + df = DataFrame(np.random.randn(len(rng), 4), index=rng) + ts2 = Series(np.random.randn(len(rng2)), rng2) + df2 = DataFrame(np.random.randn(len(rng2), 4), index=rng2) + + result = ts.append(ts2) + result_df = df.append(df2) + tm.assert_index_equal(result.index, rng3) + tm.assert_index_equal(result_df.index, rng3) + + appended = rng.append(rng2) + tm.assert_index_equal(appended, rng3) + + def test_set_dataframe_column_ns_dtype(self): + x = DataFrame([datetime.now(), datetime.now()]) + self.assertEqual(x[0].dtype, np.dtype('M8[ns]')) + + def test_groupby_count_dateparseerror(self): + dr = date_range(start='1/1/2012', freq='5min', periods=10) + + # BAD Example, datetimes first + s = Series(np.arange(10), index=[dr, lrange(10)]) + grouped = s.groupby(lambda x: x[1] % 2 == 0) + result = grouped.count() + + s = Series(np.arange(10), index=[lrange(10), dr]) + grouped = s.groupby(lambda x: x[0] % 2 == 0) + expected = grouped.count() + + assert_series_equal(result, expected) + + def test_frame_datetime64_handling_groupby(self): + # it works! + df = DataFrame([(3, np.datetime64('2012-07-03')), + (3, np.datetime64('2012-07-04'))], + columns=['a', 'date']) + result = df.groupby('a').first() + self.assertEqual(result['date'][3], Timestamp('2012-07-03')) + + def test_frame_dict_constructor_datetime64_1680(self): + dr = date_range('1/1/2012', periods=10) + s = Series(dr, index=dr) + + # it works! + DataFrame({'a': 'foo', 'b': s}, index=dr) + DataFrame({'a': 'foo', 'b': s.values}, index=dr) + + def test_frame_datetime64_mixed_index_ctor_1681(self): + dr = date_range('2011/1/1', '2012/1/1', freq='W-FRI') + ts = Series(dr) + + # it works! + d = DataFrame({'A': 'foo', 'B': ts}, index=dr) + self.assertTrue(d['B'].isnull().all()) + + def test_frame_timeseries_to_records(self): + index = date_range('1/1/2000', periods=10) + df = DataFrame(np.random.randn(10, 3), index=index, + columns=['a', 'b', 'c']) + + result = df.to_records() + result['index'].dtype == 'M8[ns]' + + result = df.to_records(index=False) + + def test_to_csv_numpy_16_bug(self): + frame = DataFrame({'a': date_range('1/1/2000', periods=10)}) + + buf = StringIO() + frame.to_csv(buf) + + result = buf.getvalue() + self.assertIn('2000-01-01', result) + + def test_series_map_box_timedelta(self): + # GH 11349 + s = Series(timedelta_range('1 day 1 s', periods=5, freq='h')) + + def f(x): + return x.total_seconds() + + s.map(f) + s.apply(f) + DataFrame(s).applymap(f) + + def test_concat_datetime_datetime64_frame(self): + # #2624 + rows = [] + rows.append([datetime(2010, 1, 1), 1]) + rows.append([datetime(2010, 1, 2), 'hi']) + + df2_obj = DataFrame.from_records(rows, columns=['date', 'test']) + + ind = date_range(start="2000/1/1", freq="D", periods=10) + df1 = DataFrame({'date': ind, 'test': lrange(10)}) + + # it works! + pd.concat([df1, df2_obj]) + + def test_asfreq_resample_set_correct_freq(self): + # GH5613 + # we test if .asfreq() and .resample() set the correct value for .freq + df = pd.DataFrame({'date': ["2012-01-01", "2012-01-02", "2012-01-03"], + 'col': [1, 2, 3]}) + df = df.set_index(pd.to_datetime(df.date)) + + # testing the settings before calling .asfreq() and .resample() + self.assertEqual(df.index.freq, None) + self.assertEqual(df.index.inferred_freq, 'D') + + # does .asfreq() set .freq correctly? + self.assertEqual(df.asfreq('D').index.freq, 'D') + + # does .resample() set .freq correctly? + self.assertEqual(df.resample('D').asfreq().index.freq, 'D') + + def test_pickle(self): + + # GH4606 + p = self.round_trip_pickle(NaT) + self.assertTrue(p is NaT) + + idx = pd.to_datetime(['2013-01-01', NaT, '2014-01-06']) + idx_p = self.round_trip_pickle(idx) + self.assertTrue(idx_p[0] == idx[0]) + self.assertTrue(idx_p[1] is NaT) + self.assertTrue(idx_p[2] == idx[2]) + + # GH11002 + # don't infer freq + idx = date_range('1750-1-1', '2050-1-1', freq='7D') + idx_p = self.round_trip_pickle(idx) + tm.assert_index_equal(idx, idx_p) + + +class TestTimeSeriesDuplicates(tm.TestCase): + _multiprocess_can_split_ = True + + def setUp(self): + dates = [datetime(2000, 1, 2), datetime(2000, 1, 2), + datetime(2000, 1, 2), datetime(2000, 1, 3), + datetime(2000, 1, 3), datetime(2000, 1, 3), + datetime(2000, 1, 4), datetime(2000, 1, 4), + datetime(2000, 1, 4), datetime(2000, 1, 5)] + + self.dups = Series(np.random.randn(len(dates)), index=dates) + + def test_constructor(self): + tm.assertIsInstance(self.dups, Series) + tm.assertIsInstance(self.dups.index, DatetimeIndex) + + def test_is_unique_monotonic(self): + self.assertFalse(self.dups.index.is_unique) + + def test_index_unique(self): + uniques = self.dups.index.unique() + expected = DatetimeIndex([datetime(2000, 1, 2), datetime(2000, 1, 3), + datetime(2000, 1, 4), datetime(2000, 1, 5)]) + self.assertEqual(uniques.dtype, 'M8[ns]') # sanity + tm.assert_index_equal(uniques, expected) + self.assertEqual(self.dups.index.nunique(), 4) + + # #2563 + self.assertTrue(isinstance(uniques, DatetimeIndex)) + + dups_local = self.dups.index.tz_localize('US/Eastern') + dups_local.name = 'foo' + result = dups_local.unique() + expected = DatetimeIndex(expected, name='foo') + expected = expected.tz_localize('US/Eastern') + self.assertTrue(result.tz is not None) + self.assertEqual(result.name, 'foo') + tm.assert_index_equal(result, expected) + + # NaT, note this is excluded + arr = [1370745748 + t for t in range(20)] + [iNaT] + idx = DatetimeIndex(arr * 3) + tm.assert_index_equal(idx.unique(), DatetimeIndex(arr)) + self.assertEqual(idx.nunique(), 20) + self.assertEqual(idx.nunique(dropna=False), 21) + + arr = [Timestamp('2013-06-09 02:42:28') + timedelta(seconds=t) + for t in range(20)] + [NaT] + idx = DatetimeIndex(arr * 3) + tm.assert_index_equal(idx.unique(), DatetimeIndex(arr)) + self.assertEqual(idx.nunique(), 20) + self.assertEqual(idx.nunique(dropna=False), 21) + + def test_index_dupes_contains(self): + d = datetime(2011, 12, 5, 20, 30) + ix = DatetimeIndex([d, d]) + self.assertTrue(d in ix) + + def test_duplicate_dates_indexing(self): + ts = self.dups + + uniques = ts.index.unique() + for date in uniques: + result = ts[date] + + mask = ts.index == date + total = (ts.index == date).sum() + expected = ts[mask] + if total > 1: + assert_series_equal(result, expected) + else: + assert_almost_equal(result, expected[0]) + + cp = ts.copy() + cp[date] = 0 + expected = Series(np.where(mask, 0, ts), index=ts.index) + assert_series_equal(cp, expected) + + self.assertRaises(KeyError, ts.__getitem__, datetime(2000, 1, 6)) + + # new index + ts[datetime(2000, 1, 6)] = 0 + self.assertEqual(ts[datetime(2000, 1, 6)], 0) + + def test_range_slice(self): + idx = DatetimeIndex(['1/1/2000', '1/2/2000', '1/2/2000', '1/3/2000', + '1/4/2000']) + + ts = Series(np.random.randn(len(idx)), index=idx) + + result = ts['1/2/2000':] + expected = ts[1:] + assert_series_equal(result, expected) + + result = ts['1/2/2000':'1/3/2000'] + expected = ts[1:4] + assert_series_equal(result, expected) + + def test_groupby_average_dup_values(self): + result = self.dups.groupby(level=0).mean() + expected = self.dups.groupby(self.dups.index).mean() + assert_series_equal(result, expected) + + def test_indexing_over_size_cutoff(self): + import datetime + # #1821 + + old_cutoff = _index._SIZE_CUTOFF + try: + _index._SIZE_CUTOFF = 1000 + + # create large list of non periodic datetime + dates = [] + sec = datetime.timedelta(seconds=1) + half_sec = datetime.timedelta(microseconds=500000) + d = datetime.datetime(2011, 12, 5, 20, 30) + n = 1100 + for i in range(n): + dates.append(d) + dates.append(d + sec) + dates.append(d + sec + half_sec) + dates.append(d + sec + sec + half_sec) + d += 3 * sec + + # duplicate some values in the list + duplicate_positions = np.random.randint(0, len(dates) - 1, 20) + for p in duplicate_positions: + dates[p + 1] = dates[p] + + df = DataFrame(np.random.randn(len(dates), 4), + index=dates, + columns=list('ABCD')) + + pos = n * 3 + timestamp = df.index[pos] + self.assertIn(timestamp, df.index) + + # it works! + df.loc[timestamp] + self.assertTrue(len(df.loc[[timestamp]]) > 0) + finally: + _index._SIZE_CUTOFF = old_cutoff + + def test_indexing_unordered(self): + # GH 2437 + rng = date_range(start='2011-01-01', end='2011-01-15') + ts = Series(randn(len(rng)), index=rng) + ts2 = concat([ts[0:4], ts[-4:], ts[4:-4]]) + + for t in ts.index: + # TODO: unused? + s = str(t) # noqa + + expected = ts[t] + result = ts2[t] + self.assertTrue(expected == result) + + # GH 3448 (ranges) + def compare(slobj): + result = ts2[slobj].copy() + result = result.sort_index() + expected = ts[slobj] + assert_series_equal(result, expected) + + compare(slice('2011-01-01', '2011-01-15')) + compare(slice('2010-12-30', '2011-01-15')) + compare(slice('2011-01-01', '2011-01-16')) + + # partial ranges + compare(slice('2011-01-01', '2011-01-6')) + compare(slice('2011-01-06', '2011-01-8')) + compare(slice('2011-01-06', '2011-01-12')) + + # single values + result = ts2['2011'].sort_index() + expected = ts['2011'] + assert_series_equal(result, expected) + + # diff freq + rng = date_range(datetime(2005, 1, 1), periods=20, freq='M') + ts = Series(np.arange(len(rng)), index=rng) + ts = ts.take(np.random.permutation(20)) + + result = ts['2005'] + for t in result.index: + self.assertTrue(t.year == 2005) + + def test_indexing(self): + + idx = date_range("2001-1-1", periods=20, freq='M') + ts = Series(np.random.rand(len(idx)), index=idx) + + # getting + + # GH 3070, make sure semantics work on Series/Frame + expected = ts['2001'] + expected.name = 'A' + + df = DataFrame(dict(A=ts)) + result = df['2001']['A'] + assert_series_equal(expected, result) + + # setting + ts['2001'] = 1 + expected = ts['2001'] + expected.name = 'A' + + df.loc['2001', 'A'] = 1 + + result = df['2001']['A'] + assert_series_equal(expected, result) + + # GH3546 (not including times on the last day) + idx = date_range(start='2013-05-31 00:00', end='2013-05-31 23:00', + freq='H') + ts = Series(lrange(len(idx)), index=idx) + expected = ts['2013-05'] + assert_series_equal(expected, ts) + + idx = date_range(start='2013-05-31 00:00', end='2013-05-31 23:59', + freq='S') + ts = Series(lrange(len(idx)), index=idx) + expected = ts['2013-05'] + assert_series_equal(expected, ts) + + idx = [Timestamp('2013-05-31 00:00'), + Timestamp(datetime(2013, 5, 31, 23, 59, 59, 999999))] + ts = Series(lrange(len(idx)), index=idx) + expected = ts['2013'] + assert_series_equal(expected, ts) + + # GH14826, indexing with a seconds resolution string / datetime object + df = DataFrame(randn(5, 5), + columns=['open', 'high', 'low', 'close', 'volume'], + index=date_range('2012-01-02 18:01:00', + periods=5, tz='US/Central', freq='s')) + expected = df.loc[[df.index[2]]] + + # this is a single date, so will raise + self.assertRaises(KeyError, df.__getitem__, '2012-01-02 18:01:02', ) + self.assertRaises(KeyError, df.__getitem__, df.index[2], ) + + +class TestDatetime64(tm.TestCase): + """ + Also test support for datetime64[ns] in Series / DataFrame + """ + + def setUp(self): + dti = DatetimeIndex(start=datetime(2005, 1, 1), + end=datetime(2005, 1, 10), freq='Min') + self.series = Series(rand(len(dti)), dti) + + def test_fancy_getitem(self): + dti = DatetimeIndex(freq='WOM-1FRI', start=datetime(2005, 1, 1), + end=datetime(2010, 1, 1)) + + s = Series(np.arange(len(dti)), index=dti) + + self.assertEqual(s[48], 48) + self.assertEqual(s['1/2/2009'], 48) + self.assertEqual(s['2009-1-2'], 48) + self.assertEqual(s[datetime(2009, 1, 2)], 48) + self.assertEqual(s[lib.Timestamp(datetime(2009, 1, 2))], 48) + self.assertRaises(KeyError, s.__getitem__, '2009-1-3') + + assert_series_equal(s['3/6/2009':'2009-06-05'], + s[datetime(2009, 3, 6):datetime(2009, 6, 5)]) + + def test_fancy_setitem(self): + dti = DatetimeIndex(freq='WOM-1FRI', start=datetime(2005, 1, 1), + end=datetime(2010, 1, 1)) + + s = Series(np.arange(len(dti)), index=dti) + s[48] = -1 + self.assertEqual(s[48], -1) + s['1/2/2009'] = -2 + self.assertEqual(s[48], -2) + s['1/2/2009':'2009-06-05'] = -3 + self.assertTrue((s[48:54] == -3).all()) + + def test_dti_snap(self): + dti = DatetimeIndex(['1/1/2002', '1/2/2002', '1/3/2002', '1/4/2002', + '1/5/2002', '1/6/2002', '1/7/2002'], freq='D') + + res = dti.snap(freq='W-MON') + exp = date_range('12/31/2001', '1/7/2002', freq='w-mon') + exp = exp.repeat([3, 4]) + self.assertTrue((res == exp).all()) + + res = dti.snap(freq='B') + + exp = date_range('1/1/2002', '1/7/2002', freq='b') + exp = exp.repeat([1, 1, 1, 2, 2]) + self.assertTrue((res == exp).all()) + + def test_dti_reset_index_round_trip(self): + dti = DatetimeIndex(start='1/1/2001', end='6/1/2001', freq='D') + d1 = DataFrame({'v': np.random.rand(len(dti))}, index=dti) + d2 = d1.reset_index() + self.assertEqual(d2.dtypes[0], np.dtype('M8[ns]')) + d3 = d2.set_index('index') + assert_frame_equal(d1, d3, check_names=False) + + # #2329 + stamp = datetime(2012, 11, 22) + df = DataFrame([[stamp, 12.1]], columns=['Date', 'Value']) + df = df.set_index('Date') + + self.assertEqual(df.index[0], stamp) + self.assertEqual(df.reset_index()['Date'][0], stamp) + + def test_series_set_value(self): + # #1561 + + dates = [datetime(2001, 1, 1), datetime(2001, 1, 2)] + index = DatetimeIndex(dates) + + s = Series().set_value(dates[0], 1.) + s2 = s.set_value(dates[1], np.nan) + + exp = Series([1., np.nan], index=index) + + assert_series_equal(s2, exp) + + # s = Series(index[:1], index[:1]) + # s2 = s.set_value(dates[1], index[1]) + # self.assertEqual(s2.values.dtype, 'M8[ns]') + + @slow + def test_slice_locs_indexerror(self): + times = [datetime(2000, 1, 1) + timedelta(minutes=i * 10) + for i in range(100000)] + s = Series(lrange(100000), times) + s.loc[datetime(1900, 1, 1):datetime(2100, 1, 1)] + + def test_slicing_datetimes(self): + + # GH 7523 + + # unique + df = DataFrame(np.arange(4., dtype='float64'), + index=[datetime(2001, 1, i, 10, 00) + for i in [1, 2, 3, 4]]) + result = df.loc[datetime(2001, 1, 1, 10):] + assert_frame_equal(result, df) + result = df.loc[:datetime(2001, 1, 4, 10)] + assert_frame_equal(result, df) + result = df.loc[datetime(2001, 1, 1, 10):datetime(2001, 1, 4, 10)] + assert_frame_equal(result, df) + + result = df.loc[datetime(2001, 1, 1, 11):] + expected = df.iloc[1:] + assert_frame_equal(result, expected) + result = df.loc['20010101 11':] + assert_frame_equal(result, expected) + + # duplicates + df = pd.DataFrame(np.arange(5., dtype='float64'), + index=[datetime(2001, 1, i, 10, 00) + for i in [1, 2, 2, 3, 4]]) + + result = df.loc[datetime(2001, 1, 1, 10):] + assert_frame_equal(result, df) + result = df.loc[:datetime(2001, 1, 4, 10)] + assert_frame_equal(result, df) + result = df.loc[datetime(2001, 1, 1, 10):datetime(2001, 1, 4, 10)] + assert_frame_equal(result, df) + + result = df.loc[datetime(2001, 1, 1, 11):] + expected = df.iloc[1:] + assert_frame_equal(result, expected) + result = df.loc['20010101 11':] + assert_frame_equal(result, expected) + + def test_frame_datetime64_duplicated(self): + dates = date_range('2010-07-01', end='2010-08-05') + + tst = DataFrame({'symbol': 'AAA', 'date': dates}) + result = tst.duplicated(['date', 'symbol']) + self.assertTrue((-result).all()) + + tst = DataFrame({'date': dates}) + result = tst.duplicated() + self.assertTrue((-result).all()) + + +class TestSeriesDatetime64(tm.TestCase): + def setUp(self): + self.series = Series(date_range('1/1/2000', periods=10)) + + def test_auto_conversion(self): + series = Series(list(date_range('1/1/2000', periods=10))) + self.assertEqual(series.dtype, 'M8[ns]') + + def test_constructor_cant_cast_datetime64(self): + msg = "Cannot cast datetime64 to " + with tm.assertRaisesRegexp(TypeError, msg): + Series(date_range('1/1/2000', periods=10), dtype=float) + + with tm.assertRaisesRegexp(TypeError, msg): + Series(date_range('1/1/2000', periods=10), dtype=int) + + def test_constructor_cast_object(self): + s = Series(date_range('1/1/2000', periods=10), dtype=object) + exp = Series(date_range('1/1/2000', periods=10)) + tm.assert_series_equal(s, exp) + + def test_series_comparison_scalars(self): + val = datetime(2000, 1, 4) + result = self.series > val + expected = Series([x > val for x in self.series]) + self.assert_series_equal(result, expected) + + val = self.series[5] + result = self.series > val + expected = Series([x > val for x in self.series]) + self.assert_series_equal(result, expected) + + def test_between(self): + left, right = self.series[[2, 7]] + + result = self.series.between(left, right) + expected = (self.series >= left) & (self.series <= right) + assert_series_equal(result, expected) + + # --------------------------------------------------------------------- + # NaT support + + def test_NaT_scalar(self): + series = Series([0, 1000, 2000, iNaT], dtype='M8[ns]') + + val = series[3] + self.assertTrue(com.isnull(val)) + + series[2] = val + self.assertTrue(com.isnull(series[2])) + + def test_NaT_cast(self): + # GH10747 + result = Series([np.nan]).astype('M8[ns]') + expected = Series([NaT]) + assert_series_equal(result, expected) + + def test_set_none_nan(self): + self.series[3] = None + self.assertIs(self.series[3], NaT) + + self.series[3:5] = None + self.assertIs(self.series[4], NaT) + + self.series[5] = np.nan + self.assertIs(self.series[5], NaT) + + self.series[5:7] = np.nan + self.assertIs(self.series[6], NaT) + + def test_intercept_astype_object(self): + + # this test no longer makes sense as series is by default already + # M8[ns] + expected = self.series.astype('object') + + df = DataFrame({'a': self.series, + 'b': np.random.randn(len(self.series))}) + exp_dtypes = pd.Series([np.dtype('datetime64[ns]'), + np.dtype('float64')], index=['a', 'b']) + tm.assert_series_equal(df.dtypes, exp_dtypes) + + result = df.values.squeeze() + self.assertTrue((result[:, 0] == expected.values).all()) + + df = DataFrame({'a': self.series, 'b': ['foo'] * len(self.series)}) + + result = df.values.squeeze() + self.assertTrue((result[:, 0] == expected.values).all()) + + def test_nat_operations(self): + # GH 8617 + s = Series([0, pd.NaT], dtype='m8[ns]') + exp = s[0] + self.assertEqual(s.median(), exp) + self.assertEqual(s.min(), exp) + self.assertEqual(s.max(), exp) + + def test_round_nat(self): + # GH14940 + s = Series([pd.NaT]) + expected = Series(pd.NaT) + for method in ["round", "floor", "ceil"]: + round_method = getattr(s.dt, method) + for freq in ["s", "5s", "min", "5min", "h", "5h"]: + assert_series_equal(round_method(freq), expected) + + +class TestDaysInMonth(tm.TestCase): + # tests for issue #10154 + def test_day_not_in_month_coerce(self): + self.assertTrue(isnull(to_datetime('2015-02-29', errors='coerce'))) + self.assertTrue(isnull(to_datetime('2015-02-29', format="%Y-%m-%d", + errors='coerce'))) + self.assertTrue(isnull(to_datetime('2015-02-32', format="%Y-%m-%d", + errors='coerce'))) + self.assertTrue(isnull(to_datetime('2015-04-31', format="%Y-%m-%d", + errors='coerce'))) + + def test_day_not_in_month_raise(self): + self.assertRaises(ValueError, to_datetime, '2015-02-29', + errors='raise') + self.assertRaises(ValueError, to_datetime, '2015-02-29', + errors='raise', format="%Y-%m-%d") + self.assertRaises(ValueError, to_datetime, '2015-02-32', + errors='raise', format="%Y-%m-%d") + self.assertRaises(ValueError, to_datetime, '2015-04-31', + errors='raise', format="%Y-%m-%d") + + def test_day_not_in_month_ignore(self): + self.assertEqual(to_datetime( + '2015-02-29', errors='ignore'), '2015-02-29') + self.assertEqual(to_datetime( + '2015-02-29', errors='ignore', format="%Y-%m-%d"), '2015-02-29') + self.assertEqual(to_datetime( + '2015-02-32', errors='ignore', format="%Y-%m-%d"), '2015-02-32') + self.assertEqual(to_datetime( + '2015-04-31', errors='ignore', format="%Y-%m-%d"), '2015-04-31') + + +class TestGuessDatetimeFormat(tm.TestCase): + + def test_guess_datetime_format_with_parseable_formats(self): + tm._skip_if_not_us_locale() + dt_string_to_format = (('20111230', '%Y%m%d'), + ('2011-12-30', '%Y-%m-%d'), + ('30-12-2011', '%d-%m-%Y'), + ('2011-12-30 00:00:00', '%Y-%m-%d %H:%M:%S'), + ('2011-12-30T00:00:00', '%Y-%m-%dT%H:%M:%S'), + ('2011-12-30 00:00:00.000000', + '%Y-%m-%d %H:%M:%S.%f'), ) + + for dt_string, dt_format in dt_string_to_format: + self.assertEqual( + tools._guess_datetime_format(dt_string), + dt_format + ) + + def test_guess_datetime_format_with_dayfirst(self): + ambiguous_string = '01/01/2011' + self.assertEqual( + tools._guess_datetime_format(ambiguous_string, dayfirst=True), + '%d/%m/%Y' + ) + self.assertEqual( + tools._guess_datetime_format(ambiguous_string, dayfirst=False), + '%m/%d/%Y' + ) + + def test_guess_datetime_format_with_locale_specific_formats(self): + # The month names will vary depending on the locale, in which + # case these wont be parsed properly (dateutil can't parse them) + _skip_if_has_locale() + + dt_string_to_format = (('30/Dec/2011', '%d/%b/%Y'), + ('30/December/2011', '%d/%B/%Y'), + ('30/Dec/2011 00:00:00', '%d/%b/%Y %H:%M:%S'), ) + + for dt_string, dt_format in dt_string_to_format: + self.assertEqual( + tools._guess_datetime_format(dt_string), + dt_format + ) + + def test_guess_datetime_format_invalid_inputs(self): + # A datetime string must include a year, month and a day for it + # to be guessable, in addition to being a string that looks like + # a datetime + invalid_dts = [ + '2013', + '01/2013', + '12:00:00', + '1/1/1/1', + 'this_is_not_a_datetime', + '51a', + 9, + datetime(2011, 1, 1), + ] + + for invalid_dt in invalid_dts: + self.assertTrue(tools._guess_datetime_format(invalid_dt) is None) + + def test_guess_datetime_format_nopadding(self): + # GH 11142 + dt_string_to_format = (('2011-1-1', '%Y-%m-%d'), + ('30-1-2011', '%d-%m-%Y'), + ('1/1/2011', '%m/%d/%Y'), + ('2011-1-1 00:00:00', '%Y-%m-%d %H:%M:%S'), + ('2011-1-1 0:0:0', '%Y-%m-%d %H:%M:%S'), + ('2011-1-3T00:00:0', '%Y-%m-%dT%H:%M:%S')) + + for dt_string, dt_format in dt_string_to_format: + self.assertEqual( + tools._guess_datetime_format(dt_string), + dt_format + ) + + def test_guess_datetime_format_for_array(self): + tm._skip_if_not_us_locale() + expected_format = '%Y-%m-%d %H:%M:%S.%f' + dt_string = datetime(2011, 12, 30, 0, 0, 0).strftime(expected_format) + + test_arrays = [ + np.array([dt_string, dt_string, dt_string], dtype='O'), + np.array([np.nan, np.nan, dt_string], dtype='O'), + np.array([dt_string, 'random_string'], dtype='O'), + ] + + for test_array in test_arrays: + self.assertEqual( + tools._guess_datetime_format_for_array(test_array), + expected_format + ) + + format_for_string_of_nans = tools._guess_datetime_format_for_array( + np.array( + [np.nan, np.nan, np.nan], dtype='O')) + self.assertTrue(format_for_string_of_nans is None) + + +class TestToDatetimeInferFormat(tm.TestCase): + + def test_to_datetime_infer_datetime_format_consistent_format(self): + s = pd.Series(pd.date_range('20000101', periods=50, freq='H')) + + test_formats = ['%m-%d-%Y', '%m/%d/%Y %H:%M:%S.%f', + '%Y-%m-%dT%H:%M:%S.%f'] + + for test_format in test_formats: + s_as_dt_strings = s.apply(lambda x: x.strftime(test_format)) + + with_format = pd.to_datetime(s_as_dt_strings, format=test_format) + no_infer = pd.to_datetime(s_as_dt_strings, + infer_datetime_format=False) + yes_infer = pd.to_datetime(s_as_dt_strings, + infer_datetime_format=True) + + # Whether the format is explicitly passed, it is inferred, or + # it is not inferred, the results should all be the same + self.assert_series_equal(with_format, no_infer) + self.assert_series_equal(no_infer, yes_infer) + + def test_to_datetime_infer_datetime_format_inconsistent_format(self): + s = pd.Series(np.array(['01/01/2011 00:00:00', + '01-02-2011 00:00:00', + '2011-01-03T00:00:00'])) + + # When the format is inconsistent, infer_datetime_format should just + # fallback to the default parsing + tm.assert_series_equal(pd.to_datetime(s, infer_datetime_format=False), + pd.to_datetime(s, infer_datetime_format=True)) + + s = pd.Series(np.array(['Jan/01/2011', 'Feb/01/2011', 'Mar/01/2011'])) + + tm.assert_series_equal(pd.to_datetime(s, infer_datetime_format=False), + pd.to_datetime(s, infer_datetime_format=True)) + + def test_to_datetime_infer_datetime_format_series_with_nans(self): + s = pd.Series(np.array(['01/01/2011 00:00:00', np.nan, + '01/03/2011 00:00:00', np.nan])) + tm.assert_series_equal(pd.to_datetime(s, infer_datetime_format=False), + pd.to_datetime(s, infer_datetime_format=True)) + + def test_to_datetime_infer_datetime_format_series_starting_with_nans(self): + s = pd.Series(np.array([np.nan, np.nan, '01/01/2011 00:00:00', + '01/02/2011 00:00:00', '01/03/2011 00:00:00'])) + + tm.assert_series_equal(pd.to_datetime(s, infer_datetime_format=False), + pd.to_datetime(s, infer_datetime_format=True)) + + def test_to_datetime_iso8601_noleading_0s(self): + # GH 11871 + s = pd.Series(['2014-1-1', '2014-2-2', '2015-3-3']) + expected = pd.Series([pd.Timestamp('2014-01-01'), + pd.Timestamp('2014-02-02'), + pd.Timestamp('2015-03-03')]) + tm.assert_series_equal(pd.to_datetime(s), expected) + tm.assert_series_equal(pd.to_datetime(s, format='%Y-%m-%d'), expected) + + +class TimeConversionFormats(tm.TestCase): + def test_to_datetime_format(self): + values = ['1/1/2000', '1/2/2000', '1/3/2000'] + + results1 = [Timestamp('20000101'), Timestamp('20000201'), + Timestamp('20000301')] + results2 = [Timestamp('20000101'), Timestamp('20000102'), + Timestamp('20000103')] + for vals, expecteds in [(values, (Index(results1), Index(results2))), + (Series(values), + (Series(results1), Series(results2))), + (values[0], (results1[0], results2[0])), + (values[1], (results1[1], results2[1])), + (values[2], (results1[2], results2[2]))]: + + for i, fmt in enumerate(['%d/%m/%Y', '%m/%d/%Y']): + result = to_datetime(vals, format=fmt) + expected = expecteds[i] + + if isinstance(expected, Series): + assert_series_equal(result, Series(expected)) + elif isinstance(expected, Timestamp): + self.assertEqual(result, expected) + else: + tm.assert_index_equal(result, expected) + + def test_to_datetime_format_YYYYMMDD(self): + s = Series([19801222, 19801222] + [19810105] * 5) + expected = Series([Timestamp(x) for x in s.apply(str)]) + + result = to_datetime(s, format='%Y%m%d') + assert_series_equal(result, expected) + + result = to_datetime(s.apply(str), format='%Y%m%d') + assert_series_equal(result, expected) + + # with NaT + expected = Series([Timestamp("19801222"), Timestamp("19801222")] + + [Timestamp("19810105")] * 5) + expected[2] = np.nan + s[2] = np.nan + + result = to_datetime(s, format='%Y%m%d') + assert_series_equal(result, expected) + + # string with NaT + s = s.apply(str) + s[2] = 'nat' + result = to_datetime(s, format='%Y%m%d') + assert_series_equal(result, expected) + + # coercion + # GH 7930 + s = Series([20121231, 20141231, 99991231]) + result = pd.to_datetime(s, format='%Y%m%d', errors='ignore') + expected = Series([datetime(2012, 12, 31), + datetime(2014, 12, 31), datetime(9999, 12, 31)], + dtype=object) + self.assert_series_equal(result, expected) + + result = pd.to_datetime(s, format='%Y%m%d', errors='coerce') + expected = Series(['20121231', '20141231', 'NaT'], dtype='M8[ns]') + assert_series_equal(result, expected) + + # GH 10178 + def test_to_datetime_format_integer(self): + s = Series([2000, 2001, 2002]) + expected = Series([Timestamp(x) for x in s.apply(str)]) + + result = to_datetime(s, format='%Y') + assert_series_equal(result, expected) + + s = Series([200001, 200105, 200206]) + expected = Series([Timestamp(x[:4] + '-' + x[4:]) for x in s.apply(str) + ]) + + result = to_datetime(s, format='%Y%m') + assert_series_equal(result, expected) + + def test_to_datetime_format_microsecond(self): + + # these are locale dependent + lang, _ = locale.getlocale() + month_abbr = calendar.month_abbr[4] + val = '01-{}-2011 00:00:01.978'.format(month_abbr) + + format = '%d-%b-%Y %H:%M:%S.%f' + result = to_datetime(val, format=format) + exp = datetime.strptime(val, format) + self.assertEqual(result, exp) + + def test_to_datetime_format_time(self): + data = [ + ['01/10/2010 15:20', '%m/%d/%Y %H:%M', + Timestamp('2010-01-10 15:20')], + ['01/10/2010 05:43', '%m/%d/%Y %I:%M', + Timestamp('2010-01-10 05:43')], + ['01/10/2010 13:56:01', '%m/%d/%Y %H:%M:%S', + Timestamp('2010-01-10 13:56:01')] # , + # ['01/10/2010 08:14 PM', '%m/%d/%Y %I:%M %p', + # Timestamp('2010-01-10 20:14')], + # ['01/10/2010 07:40 AM', '%m/%d/%Y %I:%M %p', + # Timestamp('2010-01-10 07:40')], + # ['01/10/2010 09:12:56 AM', '%m/%d/%Y %I:%M:%S %p', + # Timestamp('2010-01-10 09:12:56')] + ] + for s, format, dt in data: + self.assertEqual(to_datetime(s, format=format), dt) + + def test_to_datetime_with_non_exact(self): + # GH 10834 + _skip_if_has_locale() + + # 8904 + # exact kw + if sys.version_info < (2, 7): + raise nose.SkipTest('on python version < 2.7') + + s = Series(['19MAY11', 'foobar19MAY11', '19MAY11:00:00:00', + '19MAY11 00:00:00Z']) + result = to_datetime(s, format='%d%b%y', exact=False) + expected = to_datetime(s.str.extract(r'(\d+\w+\d+)', expand=False), + format='%d%b%y') + assert_series_equal(result, expected) + + def test_parse_nanoseconds_with_formula(self): + + # GH8989 + # trunctaing the nanoseconds when a format was provided + for v in ["2012-01-01 09:00:00.000000001", + "2012-01-01 09:00:00.000001", + "2012-01-01 09:00:00.001", + "2012-01-01 09:00:00.001000", + "2012-01-01 09:00:00.001000000", ]: + expected = pd.to_datetime(v) + result = pd.to_datetime(v, format="%Y-%m-%d %H:%M:%S.%f") + self.assertEqual(result, expected) + + def test_to_datetime_format_weeks(self): + data = [ + ['2009324', '%Y%W%w', Timestamp('2009-08-13')], + ['2013020', '%Y%U%w', Timestamp('2013-01-13')] + ] + for s, format, dt in data: + self.assertEqual(to_datetime(s, format=format), dt) + + +class TestSlicing(tm.TestCase): + def test_slice_year(self): + dti = DatetimeIndex(freq='B', start=datetime(2005, 1, 1), periods=500) + + s = Series(np.arange(len(dti)), index=dti) + result = s['2005'] + expected = s[s.index.year == 2005] + assert_series_equal(result, expected) + + df = DataFrame(np.random.rand(len(dti), 5), index=dti) + result = df.loc['2005'] + expected = df[df.index.year == 2005] + assert_frame_equal(result, expected) + + rng = date_range('1/1/2000', '1/1/2010') + + result = rng.get_loc('2009') + expected = slice(3288, 3653) + self.assertEqual(result, expected) + + def test_slice_quarter(self): + dti = DatetimeIndex(freq='D', start=datetime(2000, 6, 1), periods=500) + + s = Series(np.arange(len(dti)), index=dti) + self.assertEqual(len(s['2001Q1']), 90) + + df = DataFrame(np.random.rand(len(dti), 5), index=dti) + self.assertEqual(len(df.loc['1Q01']), 90) + + def test_slice_month(self): + dti = DatetimeIndex(freq='D', start=datetime(2005, 1, 1), periods=500) + s = Series(np.arange(len(dti)), index=dti) + self.assertEqual(len(s['2005-11']), 30) + + df = DataFrame(np.random.rand(len(dti), 5), index=dti) + self.assertEqual(len(df.loc['2005-11']), 30) + + assert_series_equal(s['2005-11'], s['11-2005']) + + def test_partial_slice(self): + rng = DatetimeIndex(freq='D', start=datetime(2005, 1, 1), periods=500) + s = Series(np.arange(len(rng)), index=rng) + + result = s['2005-05':'2006-02'] + expected = s['20050501':'20060228'] + assert_series_equal(result, expected) + + result = s['2005-05':] + expected = s['20050501':] + assert_series_equal(result, expected) + + result = s[:'2006-02'] + expected = s[:'20060228'] + assert_series_equal(result, expected) + + result = s['2005-1-1'] + self.assertEqual(result, s.iloc[0]) + + self.assertRaises(Exception, s.__getitem__, '2004-12-31') + + def test_partial_slice_daily(self): + rng = DatetimeIndex(freq='H', start=datetime(2005, 1, 31), periods=500) + s = Series(np.arange(len(rng)), index=rng) + + result = s['2005-1-31'] + assert_series_equal(result, s.iloc[:24]) + + self.assertRaises(Exception, s.__getitem__, '2004-12-31 00') + + def test_partial_slice_hourly(self): + rng = DatetimeIndex(freq='T', start=datetime(2005, 1, 1, 20, 0, 0), + periods=500) + s = Series(np.arange(len(rng)), index=rng) + + result = s['2005-1-1'] + assert_series_equal(result, s.iloc[:60 * 4]) + + result = s['2005-1-1 20'] + assert_series_equal(result, s.iloc[:60]) + + self.assertEqual(s['2005-1-1 20:00'], s.iloc[0]) + self.assertRaises(Exception, s.__getitem__, '2004-12-31 00:15') + + def test_partial_slice_minutely(self): + rng = DatetimeIndex(freq='S', start=datetime(2005, 1, 1, 23, 59, 0), + periods=500) + s = Series(np.arange(len(rng)), index=rng) + + result = s['2005-1-1 23:59'] + assert_series_equal(result, s.iloc[:60]) + + result = s['2005-1-1'] + assert_series_equal(result, s.iloc[:60]) + + self.assertEqual(s[Timestamp('2005-1-1 23:59:00')], s.iloc[0]) + self.assertRaises(Exception, s.__getitem__, '2004-12-31 00:00:00') + + def test_partial_slice_second_precision(self): + rng = DatetimeIndex(start=datetime(2005, 1, 1, 0, 0, 59, + microsecond=999990), + periods=20, freq='US') + s = Series(np.arange(20), rng) + + assert_series_equal(s['2005-1-1 00:00'], s.iloc[:10]) + assert_series_equal(s['2005-1-1 00:00:59'], s.iloc[:10]) + + assert_series_equal(s['2005-1-1 00:01'], s.iloc[10:]) + assert_series_equal(s['2005-1-1 00:01:00'], s.iloc[10:]) + + self.assertEqual(s[Timestamp('2005-1-1 00:00:59.999990')], s.iloc[0]) + self.assertRaisesRegexp(KeyError, '2005-1-1 00:00:00', + lambda: s['2005-1-1 00:00:00']) + + def test_partial_slicing_dataframe(self): + # GH14856 + # Test various combinations of string slicing resolution vs. + # index resolution + # - If string resolution is less precise than index resolution, + # string is considered a slice + # - If string resolution is equal to or more precise than index + # resolution, string is considered an exact match + formats = ['%Y', '%Y-%m', '%Y-%m-%d', '%Y-%m-%d %H', + '%Y-%m-%d %H:%M', '%Y-%m-%d %H:%M:%S'] + resolutions = ['year', 'month', 'day', 'hour', 'minute', 'second'] + for rnum, resolution in enumerate(resolutions[2:], 2): + # we check only 'day', 'hour', 'minute' and 'second' + unit = Timedelta("1 " + resolution) + middate = datetime(2012, 1, 1, 0, 0, 0) + index = DatetimeIndex([middate - unit, + middate, middate + unit]) + values = [1, 2, 3] + df = DataFrame({'a': values}, index, dtype=np.int64) + self.assertEqual(df.index.resolution, resolution) + + # Timestamp with the same resolution as index + # Should be exact match for Series (return scalar) + # and raise KeyError for Frame + for timestamp, expected in zip(index, values): + ts_string = timestamp.strftime(formats[rnum]) + # make ts_string as precise as index + result = df['a'][ts_string] + self.assertIsInstance(result, np.int64) + self.assertEqual(result, expected) + self.assertRaises(KeyError, df.__getitem__, ts_string) + + # Timestamp with resolution less precise than index + for fmt in formats[:rnum]: + for element, theslice in [[0, slice(None, 1)], + [1, slice(1, None)]]: + ts_string = index[element].strftime(fmt) + + # Series should return slice + result = df['a'][ts_string] + expected = df['a'][theslice] + assert_series_equal(result, expected) + + # Frame should return slice as well + result = df[ts_string] + expected = df[theslice] + assert_frame_equal(result, expected) + + # Timestamp with resolution more precise than index + # Compatible with existing key + # Should return scalar for Series + # and raise KeyError for Frame + for fmt in formats[rnum + 1:]: + ts_string = index[1].strftime(fmt) + result = df['a'][ts_string] + self.assertIsInstance(result, np.int64) + self.assertEqual(result, 2) + self.assertRaises(KeyError, df.__getitem__, ts_string) + + # Not compatible with existing key + # Should raise KeyError + for fmt, res in list(zip(formats, resolutions))[rnum + 1:]: + ts = index[1] + Timedelta("1 " + res) + ts_string = ts.strftime(fmt) + self.assertRaises(KeyError, df['a'].__getitem__, ts_string) + self.assertRaises(KeyError, df.__getitem__, ts_string) + + def test_partial_slicing_with_multiindex(self): + + # GH 4758 + # partial string indexing with a multi-index buggy + df = DataFrame({'ACCOUNT': ["ACCT1", "ACCT1", "ACCT1", "ACCT2"], + 'TICKER': ["ABC", "MNP", "XYZ", "XYZ"], + 'val': [1, 2, 3, 4]}, + index=date_range("2013-06-19 09:30:00", + periods=4, freq='5T')) + df_multi = df.set_index(['ACCOUNT', 'TICKER'], append=True) + + expected = DataFrame([ + [1] + ], index=Index(['ABC'], name='TICKER'), columns=['val']) + result = df_multi.loc[('2013-06-19 09:30:00', 'ACCT1')] + assert_frame_equal(result, expected) + + expected = df_multi.loc[ + (pd.Timestamp('2013-06-19 09:30:00', tz=None), 'ACCT1', 'ABC')] + result = df_multi.loc[('2013-06-19 09:30:00', 'ACCT1', 'ABC')] + assert_series_equal(result, expected) + + # this is a KeyError as we don't do partial string selection on + # multi-levels + def f(): + df_multi.loc[('2013-06-19', 'ACCT1', 'ABC')] + + self.assertRaises(KeyError, f) + + # GH 4294 + # partial slice on a series mi + s = pd.DataFrame(randn(1000, 1000), index=pd.date_range( + '2000-1-1', periods=1000)).stack() + + s2 = s[:-1].copy() + expected = s2['2000-1-4'] + result = s2[pd.Timestamp('2000-1-4')] + assert_series_equal(result, expected) + + result = s[pd.Timestamp('2000-1-4')] + expected = s['2000-1-4'] + assert_series_equal(result, expected) + + df2 = pd.DataFrame(s) + expected = df2.xs('2000-1-4') + result = df2.loc[pd.Timestamp('2000-1-4')] + assert_frame_equal(result, expected) + + def test_shift(self): + ts = Series(np.random.randn(5), + index=date_range('1/1/2000', periods=5, freq='H')) + + result = ts.shift(1, freq='5T') + exp_index = ts.index.shift(1, freq='5T') + tm.assert_index_equal(result.index, exp_index) + + # GH #1063, multiple of same base + result = ts.shift(1, freq='4H') + exp_index = ts.index + offsets.Hour(4) + tm.assert_index_equal(result.index, exp_index) + + idx = DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-04']) + self.assertRaises(ValueError, idx.shift, 1) + + def test_setops_preserve_freq(self): + for tz in [None, 'Asia/Tokyo', 'US/Eastern']: + rng = date_range('1/1/2000', '1/1/2002', name='idx', tz=tz) + + result = rng[:50].union(rng[50:100]) + self.assertEqual(result.name, rng.name) + self.assertEqual(result.freq, rng.freq) + self.assertEqual(result.tz, rng.tz) + + result = rng[:50].union(rng[30:100]) + self.assertEqual(result.name, rng.name) + self.assertEqual(result.freq, rng.freq) + self.assertEqual(result.tz, rng.tz) + + result = rng[:50].union(rng[60:100]) + self.assertEqual(result.name, rng.name) + self.assertIsNone(result.freq) + self.assertEqual(result.tz, rng.tz) + + result = rng[:50].intersection(rng[25:75]) + self.assertEqual(result.name, rng.name) + self.assertEqual(result.freqstr, 'D') + self.assertEqual(result.tz, rng.tz) + + nofreq = DatetimeIndex(list(rng[25:75]), name='other') + result = rng[:50].union(nofreq) + self.assertIsNone(result.name) + self.assertEqual(result.freq, rng.freq) + self.assertEqual(result.tz, rng.tz) + + result = rng[:50].intersection(nofreq) + self.assertIsNone(result.name) + self.assertEqual(result.freq, rng.freq) + self.assertEqual(result.tz, rng.tz) + + def test_min_max(self): + rng = date_range('1/1/2000', '12/31/2000') + rng2 = rng.take(np.random.permutation(len(rng))) + + the_min = rng2.min() + the_max = rng2.max() + tm.assertIsInstance(the_min, Timestamp) + tm.assertIsInstance(the_max, Timestamp) + self.assertEqual(the_min, rng[0]) + self.assertEqual(the_max, rng[-1]) + + self.assertEqual(rng.min(), rng[0]) + self.assertEqual(rng.max(), rng[-1]) + + def test_min_max_series(self): + rng = date_range('1/1/2000', periods=10, freq='4h') + lvls = ['A', 'A', 'A', 'B', 'B', 'B', 'C', 'C', 'C', 'C'] + df = DataFrame({'TS': rng, 'V': np.random.randn(len(rng)), 'L': lvls}) + + result = df.TS.max() + exp = Timestamp(df.TS.iat[-1]) + self.assertTrue(isinstance(result, Timestamp)) + self.assertEqual(result, exp) + + result = df.TS.min() + exp = Timestamp(df.TS.iat[0]) + self.assertTrue(isinstance(result, Timestamp)) + self.assertEqual(result, exp) + + def test_from_M8_structured(self): + dates = [(datetime(2012, 9, 9, 0, 0), datetime(2012, 9, 8, 15, 10))] + arr = np.array(dates, + dtype=[('Date', 'M8[us]'), ('Forecasting', 'M8[us]')]) + df = DataFrame(arr) + + self.assertEqual(df['Date'][0], dates[0][0]) + self.assertEqual(df['Forecasting'][0], dates[0][1]) + + s = Series(arr['Date']) + self.assertTrue(s[0], Timestamp) + self.assertEqual(s[0], dates[0][0]) + + s = Series.from_array(arr['Date'], Index([0])) + self.assertEqual(s[0], dates[0][0]) + + def test_get_level_values_box(self): + from pandas import MultiIndex + + dates = date_range('1/1/2000', periods=4) + levels = [dates, [0, 1]] + labels = [[0, 0, 1, 1, 2, 2, 3, 3], [0, 1, 0, 1, 0, 1, 0, 1]] + + index = MultiIndex(levels=levels, labels=labels) + + self.assertTrue(isinstance(index.get_level_values(0)[0], Timestamp)) + + def test_frame_apply_dont_convert_datetime64(self): + from pandas.tseries.offsets import BDay + df = DataFrame({'x1': [datetime(1996, 1, 1)]}) + + df = df.applymap(lambda x: x + BDay()) + df = df.applymap(lambda x: x + BDay()) + + self.assertTrue(df.x1.dtype == 'M8[ns]') + + def test_partial_slice_doesnt_require_monotonicity(self): + # For historical reasons. + s = pd.Series(np.arange(10), pd.date_range('2014-01-01', periods=10)) + + nonmonotonic = s[[3, 5, 4]] + expected = nonmonotonic.iloc[:0] + timestamp = pd.Timestamp('2014-01-10') + + assert_series_equal(nonmonotonic['2014-01-10':], expected) + self.assertRaisesRegexp(KeyError, + r"Timestamp\('2014-01-10 00:00:00'\)", + lambda: nonmonotonic[timestamp:]) + + assert_series_equal(nonmonotonic.loc['2014-01-10':], expected) + self.assertRaisesRegexp(KeyError, + r"Timestamp\('2014-01-10 00:00:00'\)", + lambda: nonmonotonic.loc[timestamp:]) + + +class TestToDatetime(tm.TestCase): + _multiprocess_can_split_ = True + + def test_to_datetime_dt64s(self): + in_bound_dts = [ + np.datetime64('2000-01-01'), + np.datetime64('2000-01-02'), + ] + + for dt in in_bound_dts: + self.assertEqual(pd.to_datetime(dt), Timestamp(dt)) + + oob_dts = [np.datetime64('1000-01-01'), np.datetime64('5000-01-02'), ] + + for dt in oob_dts: + self.assertRaises(ValueError, pd.to_datetime, dt, errors='raise') + self.assertRaises(ValueError, tslib.Timestamp, dt) + self.assertIs(pd.to_datetime(dt, errors='coerce'), NaT) + + def test_to_datetime_array_of_dt64s(self): + dts = [np.datetime64('2000-01-01'), np.datetime64('2000-01-02'), ] + + # Assuming all datetimes are in bounds, to_datetime() returns + # an array that is equal to Timestamp() parsing + self.assert_numpy_array_equal( + pd.to_datetime(dts, box=False), + np.array([Timestamp(x).asm8 for x in dts]) + ) + + # A list of datetimes where the last one is out of bounds + dts_with_oob = dts + [np.datetime64('9999-01-01')] + + self.assertRaises(ValueError, pd.to_datetime, dts_with_oob, + errors='raise') + + self.assert_numpy_array_equal( + pd.to_datetime(dts_with_oob, box=False, errors='coerce'), + np.array( + [ + Timestamp(dts_with_oob[0]).asm8, + Timestamp(dts_with_oob[1]).asm8, + iNaT, + ], + dtype='M8' + ) + ) + + # With errors='ignore', out of bounds datetime64s + # are converted to their .item(), which depending on the version of + # numpy is either a python datetime.datetime or datetime.date + self.assert_numpy_array_equal( + pd.to_datetime(dts_with_oob, box=False, errors='ignore'), + np.array( + [dt.item() for dt in dts_with_oob], + dtype='O' + ) + ) + + def test_to_datetime_tz(self): + + # xref 8260 + # uniform returns a DatetimeIndex + arr = [pd.Timestamp('2013-01-01 13:00:00-0800', tz='US/Pacific'), + pd.Timestamp('2013-01-02 14:00:00-0800', tz='US/Pacific')] + result = pd.to_datetime(arr) + expected = DatetimeIndex( + ['2013-01-01 13:00:00', '2013-01-02 14:00:00'], tz='US/Pacific') + tm.assert_index_equal(result, expected) + + # mixed tzs will raise + arr = [pd.Timestamp('2013-01-01 13:00:00', tz='US/Pacific'), + pd.Timestamp('2013-01-02 14:00:00', tz='US/Eastern')] + self.assertRaises(ValueError, lambda: pd.to_datetime(arr)) + + def test_to_datetime_tz_pytz(self): + + # xref 8260 + tm._skip_if_no_pytz() + import pytz + + us_eastern = pytz.timezone('US/Eastern') + arr = np.array([us_eastern.localize(datetime(year=2000, month=1, day=1, + hour=3, minute=0)), + us_eastern.localize(datetime(year=2000, month=6, day=1, + hour=3, minute=0))], + dtype=object) + result = pd.to_datetime(arr, utc=True) + expected = DatetimeIndex(['2000-01-01 08:00:00+00:00', + '2000-06-01 07:00:00+00:00'], + dtype='datetime64[ns, UTC]', freq=None) + tm.assert_index_equal(result, expected) + + def test_to_datetime_utc_is_true(self): + # See gh-11934 + start = pd.Timestamp('2014-01-01', tz='utc') + end = pd.Timestamp('2014-01-03', tz='utc') + date_range = pd.bdate_range(start, end) + + result = pd.to_datetime(date_range, utc=True) + expected = pd.DatetimeIndex(data=date_range) + tm.assert_index_equal(result, expected) + + def test_to_datetime_tz_psycopg2(self): + + # xref 8260 + try: + import psycopg2 + except ImportError: + raise nose.SkipTest("no psycopg2 installed") + + # misc cases + tz1 = psycopg2.tz.FixedOffsetTimezone(offset=-300, name=None) + tz2 = psycopg2.tz.FixedOffsetTimezone(offset=-240, name=None) + arr = np.array([datetime(2000, 1, 1, 3, 0, tzinfo=tz1), + datetime(2000, 6, 1, 3, 0, tzinfo=tz2)], + dtype=object) + + result = pd.to_datetime(arr, errors='coerce', utc=True) + expected = DatetimeIndex(['2000-01-01 08:00:00+00:00', + '2000-06-01 07:00:00+00:00'], + dtype='datetime64[ns, UTC]', freq=None) + tm.assert_index_equal(result, expected) + + # dtype coercion + i = pd.DatetimeIndex([ + '2000-01-01 08:00:00+00:00' + ], tz=psycopg2.tz.FixedOffsetTimezone(offset=-300, name=None)) + self.assertTrue(is_datetime64_ns_dtype(i)) + + # tz coerceion + result = pd.to_datetime(i, errors='coerce') + tm.assert_index_equal(result, i) + + result = pd.to_datetime(i, errors='coerce', utc=True) + expected = pd.DatetimeIndex(['2000-01-01 13:00:00'], + dtype='datetime64[ns, UTC]') + tm.assert_index_equal(result, expected) + + def test_datetime_bool(self): + # GH13176 + with self.assertRaises(TypeError): + to_datetime(False) + self.assertTrue(to_datetime(False, errors="coerce") is tslib.NaT) + self.assertEqual(to_datetime(False, errors="ignore"), False) + with self.assertRaises(TypeError): + to_datetime(True) + self.assertTrue(to_datetime(True, errors="coerce") is tslib.NaT) + self.assertEqual(to_datetime(True, errors="ignore"), True) + with self.assertRaises(TypeError): + to_datetime([False, datetime.today()]) + with self.assertRaises(TypeError): + to_datetime(['20130101', True]) + tm.assert_index_equal(to_datetime([0, False, tslib.NaT, 0.0], + errors="coerce"), + DatetimeIndex([to_datetime(0), tslib.NaT, + tslib.NaT, to_datetime(0)])) + + def test_datetime_invalid_datatype(self): + # GH13176 + + with self.assertRaises(TypeError): + pd.to_datetime(bool) + with self.assertRaises(TypeError): + pd.to_datetime(pd.to_datetime) + + def test_unit(self): + # GH 11758 + # test proper behavior with erros + + with self.assertRaises(ValueError): + to_datetime([1], unit='D', format='%Y%m%d') + + values = [11111111, 1, 1.0, tslib.iNaT, pd.NaT, np.nan, + 'NaT', ''] + result = to_datetime(values, unit='D', errors='ignore') + expected = Index([11111111, Timestamp('1970-01-02'), + Timestamp('1970-01-02'), pd.NaT, + pd.NaT, pd.NaT, pd.NaT, pd.NaT], + dtype=object) + tm.assert_index_equal(result, expected) + + result = to_datetime(values, unit='D', errors='coerce') + expected = DatetimeIndex(['NaT', '1970-01-02', '1970-01-02', + 'NaT', 'NaT', 'NaT', 'NaT', 'NaT']) + tm.assert_index_equal(result, expected) + + with self.assertRaises(tslib.OutOfBoundsDatetime): + to_datetime(values, unit='D', errors='raise') + + values = [1420043460000, tslib.iNaT, pd.NaT, np.nan, 'NaT'] + + result = to_datetime(values, errors='ignore', unit='s') + expected = Index([1420043460000, pd.NaT, pd.NaT, + pd.NaT, pd.NaT], dtype=object) + tm.assert_index_equal(result, expected) + + result = to_datetime(values, errors='coerce', unit='s') + expected = DatetimeIndex(['NaT', 'NaT', 'NaT', 'NaT', 'NaT']) + tm.assert_index_equal(result, expected) + + with self.assertRaises(tslib.OutOfBoundsDatetime): + to_datetime(values, errors='raise', unit='s') + + # if we have a string, then we raise a ValueError + # and NOT an OutOfBoundsDatetime + for val in ['foo', Timestamp('20130101')]: + try: + to_datetime(val, errors='raise', unit='s') + except tslib.OutOfBoundsDatetime: + raise AssertionError("incorrect exception raised") + except ValueError: + pass + + def test_unit_consistency(self): + + # consistency of conversions + expected = Timestamp('1970-05-09 14:25:11') + result = pd.to_datetime(11111111, unit='s', errors='raise') + self.assertEqual(result, expected) + self.assertIsInstance(result, Timestamp) + + result = pd.to_datetime(11111111, unit='s', errors='coerce') + self.assertEqual(result, expected) + self.assertIsInstance(result, Timestamp) + + result = pd.to_datetime(11111111, unit='s', errors='ignore') + self.assertEqual(result, expected) + self.assertIsInstance(result, Timestamp) + + def test_unit_with_numeric(self): + + # GH 13180 + # coercions from floats/ints are ok + expected = DatetimeIndex(['2015-06-19 05:33:20', + '2015-05-27 22:33:20']) + arr1 = [1.434692e+18, 1.432766e+18] + arr2 = np.array(arr1).astype('int64') + for errors in ['ignore', 'raise', 'coerce']: + result = pd.to_datetime(arr1, errors=errors) + tm.assert_index_equal(result, expected) + + result = pd.to_datetime(arr2, errors=errors) + tm.assert_index_equal(result, expected) + + # but we want to make sure that we are coercing + # if we have ints/strings + expected = DatetimeIndex(['NaT', + '2015-06-19 05:33:20', + '2015-05-27 22:33:20']) + arr = ['foo', 1.434692e+18, 1.432766e+18] + result = pd.to_datetime(arr, errors='coerce') + tm.assert_index_equal(result, expected) + + expected = DatetimeIndex(['2015-06-19 05:33:20', + '2015-05-27 22:33:20', + 'NaT', + 'NaT']) + arr = [1.434692e+18, 1.432766e+18, 'foo', 'NaT'] + result = pd.to_datetime(arr, errors='coerce') + tm.assert_index_equal(result, expected) + + def test_unit_mixed(self): + + # mixed integers/datetimes + expected = DatetimeIndex(['2013-01-01', 'NaT', 'NaT']) + arr = [pd.Timestamp('20130101'), 1.434692e+18, 1.432766e+18] + result = pd.to_datetime(arr, errors='coerce') + tm.assert_index_equal(result, expected) + + with self.assertRaises(ValueError): + pd.to_datetime(arr, errors='raise') + + expected = DatetimeIndex(['NaT', + 'NaT', + '2013-01-01']) + arr = [1.434692e+18, 1.432766e+18, pd.Timestamp('20130101')] + result = pd.to_datetime(arr, errors='coerce') + tm.assert_index_equal(result, expected) + + with self.assertRaises(ValueError): + pd.to_datetime(arr, errors='raise') + + def test_dataframe(self): + + df = DataFrame({'year': [2015, 2016], + 'month': [2, 3], + 'day': [4, 5], + 'hour': [6, 7], + 'minute': [58, 59], + 'second': [10, 11], + 'ms': [1, 1], + 'us': [2, 2], + 'ns': [3, 3]}) + + result = to_datetime({'year': df['year'], + 'month': df['month'], + 'day': df['day']}) + expected = Series([Timestamp('20150204 00:00:00'), + Timestamp('20160305 00:0:00')]) + assert_series_equal(result, expected) + + # dict-like + result = to_datetime(df[['year', 'month', 'day']].to_dict()) + assert_series_equal(result, expected) + + # dict but with constructable + df2 = df[['year', 'month', 'day']].to_dict() + df2['month'] = 2 + result = to_datetime(df2) + expected2 = Series([Timestamp('20150204 00:00:00'), + Timestamp('20160205 00:0:00')]) + assert_series_equal(result, expected2) + + # unit mappings + units = [{'year': 'years', + 'month': 'months', + 'day': 'days', + 'hour': 'hours', + 'minute': 'minutes', + 'second': 'seconds'}, + {'year': 'year', + 'month': 'month', + 'day': 'day', + 'hour': 'hour', + 'minute': 'minute', + 'second': 'second'}, + ] + + for d in units: + result = to_datetime(df[list(d.keys())].rename(columns=d)) + expected = Series([Timestamp('20150204 06:58:10'), + Timestamp('20160305 07:59:11')]) + assert_series_equal(result, expected) + + d = {'year': 'year', + 'month': 'month', + 'day': 'day', + 'hour': 'hour', + 'minute': 'minute', + 'second': 'second', + 'ms': 'ms', + 'us': 'us', + 'ns': 'ns'} + + result = to_datetime(df.rename(columns=d)) + expected = Series([Timestamp('20150204 06:58:10.001002003'), + Timestamp('20160305 07:59:11.001002003')]) + assert_series_equal(result, expected) + + # coerce back to int + result = to_datetime(df.astype(str)) + assert_series_equal(result, expected) + + # passing coerce + df2 = DataFrame({'year': [2015, 2016], + 'month': [2, 20], + 'day': [4, 5]}) + with self.assertRaises(ValueError): + to_datetime(df2) + result = to_datetime(df2, errors='coerce') + expected = Series([Timestamp('20150204 00:00:00'), + pd.NaT]) + assert_series_equal(result, expected) + + # extra columns + with self.assertRaises(ValueError): + df2 = df.copy() + df2['foo'] = 1 + to_datetime(df2) + + # not enough + for c in [['year'], + ['year', 'month'], + ['year', 'month', 'second'], + ['month', 'day'], + ['year', 'day', 'second']]: + with self.assertRaises(ValueError): + to_datetime(df[c]) + + # duplicates + df2 = DataFrame({'year': [2015, 2016], + 'month': [2, 20], + 'day': [4, 5]}) + df2.columns = ['year', 'year', 'day'] + with self.assertRaises(ValueError): + to_datetime(df2) + + df2 = DataFrame({'year': [2015, 2016], + 'month': [2, 20], + 'day': [4, 5], + 'hour': [4, 5]}) + df2.columns = ['year', 'month', 'day', 'day'] + with self.assertRaises(ValueError): + to_datetime(df2) + + def test_dataframe_dtypes(self): + # #13451 + df = DataFrame({'year': [2015, 2016], + 'month': [2, 3], + 'day': [4, 5]}) + + # int16 + result = to_datetime(df.astype('int16')) + expected = Series([Timestamp('20150204 00:00:00'), + Timestamp('20160305 00:00:00')]) + assert_series_equal(result, expected) + + # mixed dtypes + df['month'] = df['month'].astype('int8') + df['day'] = df['day'].astype('int8') + result = to_datetime(df) + expected = Series([Timestamp('20150204 00:00:00'), + Timestamp('20160305 00:00:00')]) + assert_series_equal(result, expected) + + # float + df = DataFrame({'year': [2000, 2001], + 'month': [1.5, 1], + 'day': [1, 1]}) + with self.assertRaises(ValueError): + to_datetime(df) + + def test_index_to_datetime(self): + idx = Index(['1/1/2000', '1/2/2000', '1/3/2000']) + + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + result = idx.to_datetime() + expected = DatetimeIndex(pd.to_datetime(idx.values)) + tm.assert_index_equal(result, expected) + + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + today = datetime.today() + idx = Index([today], dtype=object) + result = idx.to_datetime() + expected = DatetimeIndex([today]) + tm.assert_index_equal(result, expected) + + def test_to_datetime_iso8601(self): + result = to_datetime(["2012-01-01 00:00:00"]) + exp = Timestamp("2012-01-01 00:00:00") + self.assertEqual(result[0], exp) + + result = to_datetime(['20121001']) # bad iso 8601 + exp = Timestamp('2012-10-01') + self.assertEqual(result[0], exp) + + def test_to_datetime_default(self): + rs = to_datetime('2001') + xp = datetime(2001, 1, 1) + self.assertTrue(rs, xp) + + # dayfirst is essentially broken + + # to_datetime('01-13-2012', dayfirst=True) + # self.assertRaises(ValueError, to_datetime('01-13-2012', + # dayfirst=True)) + + def test_to_datetime_on_datetime64_series(self): + # #2699 + s = Series(date_range('1/1/2000', periods=10)) + + result = to_datetime(s) + self.assertEqual(result[0], s[0]) + + def test_to_datetime_with_space_in_series(self): + # GH 6428 + s = Series(['10/18/2006', '10/18/2008', ' ']) + tm.assertRaises(ValueError, lambda: to_datetime(s, errors='raise')) + result_coerce = to_datetime(s, errors='coerce') + expected_coerce = Series([datetime(2006, 10, 18), + datetime(2008, 10, 18), + pd.NaT]) + tm.assert_series_equal(result_coerce, expected_coerce) + result_ignore = to_datetime(s, errors='ignore') + tm.assert_series_equal(result_ignore, s) + + def test_to_datetime_with_apply(self): + # this is only locale tested with US/None locales + _skip_if_has_locale() + + # GH 5195 + # with a format and coerce a single item to_datetime fails + td = Series(['May 04', 'Jun 02', 'Dec 11'], index=[1, 2, 3]) + expected = pd.to_datetime(td, format='%b %y') + result = td.apply(pd.to_datetime, format='%b %y') + assert_series_equal(result, expected) + + td = pd.Series(['May 04', 'Jun 02', ''], index=[1, 2, 3]) + self.assertRaises(ValueError, + lambda: pd.to_datetime(td, format='%b %y', + errors='raise')) + self.assertRaises(ValueError, + lambda: td.apply(pd.to_datetime, format='%b %y', + errors='raise')) + expected = pd.to_datetime(td, format='%b %y', errors='coerce') + + result = td.apply( + lambda x: pd.to_datetime(x, format='%b %y', errors='coerce')) + assert_series_equal(result, expected) + + def test_to_datetime_types(self): + + # empty string + result = to_datetime('') + self.assertIs(result, NaT) + + result = to_datetime(['', '']) + self.assertTrue(isnull(result).all()) + + # ints + result = Timestamp(0) + expected = to_datetime(0) + self.assertEqual(result, expected) + + # GH 3888 (strings) + expected = to_datetime(['2012'])[0] + result = to_datetime('2012') + self.assertEqual(result, expected) + + # array = ['2012','20120101','20120101 12:01:01'] + array = ['20120101', '20120101 12:01:01'] + expected = list(to_datetime(array)) + result = lmap(Timestamp, array) + tm.assert_almost_equal(result, expected) + + # currently fails ### + # result = Timestamp('2012') + # expected = to_datetime('2012') + # self.assertEqual(result, expected) + + def test_to_datetime_unprocessable_input(self): + # GH 4928 + self.assert_numpy_array_equal( + to_datetime([1, '1'], errors='ignore'), + np.array([1, '1'], dtype='O') + ) + self.assertRaises(TypeError, to_datetime, [1, '1'], errors='raise') + + def test_to_datetime_other_datetime64_units(self): + # 5/25/2012 + scalar = np.int64(1337904000000000).view('M8[us]') + as_obj = scalar.astype('O') + + index = DatetimeIndex([scalar]) + self.assertEqual(index[0], scalar.astype('O')) + + value = Timestamp(scalar) + self.assertEqual(value, as_obj) + + def test_to_datetime_list_of_integers(self): + rng = date_range('1/1/2000', periods=20) + rng = DatetimeIndex(rng.values) + + ints = list(rng.asi8) + + result = DatetimeIndex(ints) + + tm.assert_index_equal(rng, result) + + def test_to_datetime_freq(self): + xp = bdate_range('2000-1-1', periods=10, tz='UTC') + rs = xp.to_datetime() + self.assertEqual(xp.freq, rs.freq) + self.assertEqual(xp.tzinfo, rs.tzinfo) + if __name__ == '__main__': - import nose nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], exit=False) diff --git a/pandas/tseries/tests/test_timeseries.py b/pandas/tseries/tests/test_timeseries.py deleted file mode 100644 index ff6cc4bb9853c..0000000000000 --- a/pandas/tseries/tests/test_timeseries.py +++ /dev/null @@ -1,4184 +0,0 @@ -# pylint: disable-msg=E1101,W0612 -import locale -import calendar -import operator -import sys -from datetime import datetime, time, timedelta -from numpy.random import rand - -import nose -import numpy as np -import pandas.index as _index -import pandas.lib as lib -import pandas.tslib as tslib - -from pandas.types.common import is_datetime64_ns_dtype -import pandas as pd -import pandas.compat as compat -import pandas.core.common as com -import pandas.tseries.frequencies as frequencies -import pandas.tseries.offsets as offsets -import pandas.tseries.tools as tools -import pandas.util.testing as tm -from pandas import ( - Index, Series, DataFrame, isnull, date_range, Timestamp, Period, - DatetimeIndex, to_datetime, bdate_range, Float64Index, - NaT, timedelta_range, Timedelta, concat) -from pandas.compat import range, long, StringIO, lrange, lmap, zip, product -from pandas.tslib import iNaT -from pandas.util.testing import ( - assert_frame_equal, assert_series_equal, assert_almost_equal, - _skip_if_has_locale, slow) - -randn = np.random.randn - - -class TestTimeSeriesDuplicates(tm.TestCase): - _multiprocess_can_split_ = True - - def setUp(self): - dates = [datetime(2000, 1, 2), datetime(2000, 1, 2), - datetime(2000, 1, 2), datetime(2000, 1, 3), - datetime(2000, 1, 3), datetime(2000, 1, 3), - datetime(2000, 1, 4), datetime(2000, 1, 4), - datetime(2000, 1, 4), datetime(2000, 1, 5)] - - self.dups = Series(np.random.randn(len(dates)), index=dates) - - def test_constructor(self): - tm.assertIsInstance(self.dups, Series) - tm.assertIsInstance(self.dups.index, DatetimeIndex) - - def test_is_unique_monotonic(self): - self.assertFalse(self.dups.index.is_unique) - - def test_index_unique(self): - uniques = self.dups.index.unique() - expected = DatetimeIndex([datetime(2000, 1, 2), datetime(2000, 1, 3), - datetime(2000, 1, 4), datetime(2000, 1, 5)]) - self.assertEqual(uniques.dtype, 'M8[ns]') # sanity - tm.assert_index_equal(uniques, expected) - self.assertEqual(self.dups.index.nunique(), 4) - - # #2563 - self.assertTrue(isinstance(uniques, DatetimeIndex)) - - dups_local = self.dups.index.tz_localize('US/Eastern') - dups_local.name = 'foo' - result = dups_local.unique() - expected = DatetimeIndex(expected, name='foo') - expected = expected.tz_localize('US/Eastern') - self.assertTrue(result.tz is not None) - self.assertEqual(result.name, 'foo') - tm.assert_index_equal(result, expected) - - # NaT, note this is excluded - arr = [1370745748 + t for t in range(20)] + [iNaT] - idx = DatetimeIndex(arr * 3) - tm.assert_index_equal(idx.unique(), DatetimeIndex(arr)) - self.assertEqual(idx.nunique(), 20) - self.assertEqual(idx.nunique(dropna=False), 21) - - arr = [Timestamp('2013-06-09 02:42:28') + timedelta(seconds=t) - for t in range(20)] + [NaT] - idx = DatetimeIndex(arr * 3) - tm.assert_index_equal(idx.unique(), DatetimeIndex(arr)) - self.assertEqual(idx.nunique(), 20) - self.assertEqual(idx.nunique(dropna=False), 21) - - def test_index_dupes_contains(self): - d = datetime(2011, 12, 5, 20, 30) - ix = DatetimeIndex([d, d]) - self.assertTrue(d in ix) - - def test_duplicate_dates_indexing(self): - ts = self.dups - - uniques = ts.index.unique() - for date in uniques: - result = ts[date] - - mask = ts.index == date - total = (ts.index == date).sum() - expected = ts[mask] - if total > 1: - assert_series_equal(result, expected) - else: - assert_almost_equal(result, expected[0]) - - cp = ts.copy() - cp[date] = 0 - expected = Series(np.where(mask, 0, ts), index=ts.index) - assert_series_equal(cp, expected) - - self.assertRaises(KeyError, ts.__getitem__, datetime(2000, 1, 6)) - - # new index - ts[datetime(2000, 1, 6)] = 0 - self.assertEqual(ts[datetime(2000, 1, 6)], 0) - - def test_range_slice(self): - idx = DatetimeIndex(['1/1/2000', '1/2/2000', '1/2/2000', '1/3/2000', - '1/4/2000']) - - ts = Series(np.random.randn(len(idx)), index=idx) - - result = ts['1/2/2000':] - expected = ts[1:] - assert_series_equal(result, expected) - - result = ts['1/2/2000':'1/3/2000'] - expected = ts[1:4] - assert_series_equal(result, expected) - - def test_groupby_average_dup_values(self): - result = self.dups.groupby(level=0).mean() - expected = self.dups.groupby(self.dups.index).mean() - assert_series_equal(result, expected) - - def test_indexing_over_size_cutoff(self): - import datetime - # #1821 - - old_cutoff = _index._SIZE_CUTOFF - try: - _index._SIZE_CUTOFF = 1000 - - # create large list of non periodic datetime - dates = [] - sec = datetime.timedelta(seconds=1) - half_sec = datetime.timedelta(microseconds=500000) - d = datetime.datetime(2011, 12, 5, 20, 30) - n = 1100 - for i in range(n): - dates.append(d) - dates.append(d + sec) - dates.append(d + sec + half_sec) - dates.append(d + sec + sec + half_sec) - d += 3 * sec - - # duplicate some values in the list - duplicate_positions = np.random.randint(0, len(dates) - 1, 20) - for p in duplicate_positions: - dates[p + 1] = dates[p] - - df = DataFrame(np.random.randn(len(dates), 4), - index=dates, - columns=list('ABCD')) - - pos = n * 3 - timestamp = df.index[pos] - self.assertIn(timestamp, df.index) - - # it works! - df.loc[timestamp] - self.assertTrue(len(df.loc[[timestamp]]) > 0) - finally: - _index._SIZE_CUTOFF = old_cutoff - - def test_indexing_unordered(self): - # GH 2437 - rng = date_range(start='2011-01-01', end='2011-01-15') - ts = Series(randn(len(rng)), index=rng) - ts2 = concat([ts[0:4], ts[-4:], ts[4:-4]]) - - for t in ts.index: - # TODO: unused? - s = str(t) # noqa - - expected = ts[t] - result = ts2[t] - self.assertTrue(expected == result) - - # GH 3448 (ranges) - def compare(slobj): - result = ts2[slobj].copy() - result = result.sort_index() - expected = ts[slobj] - assert_series_equal(result, expected) - - compare(slice('2011-01-01', '2011-01-15')) - compare(slice('2010-12-30', '2011-01-15')) - compare(slice('2011-01-01', '2011-01-16')) - - # partial ranges - compare(slice('2011-01-01', '2011-01-6')) - compare(slice('2011-01-06', '2011-01-8')) - compare(slice('2011-01-06', '2011-01-12')) - - # single values - result = ts2['2011'].sort_index() - expected = ts['2011'] - assert_series_equal(result, expected) - - # diff freq - rng = date_range(datetime(2005, 1, 1), periods=20, freq='M') - ts = Series(np.arange(len(rng)), index=rng) - ts = ts.take(np.random.permutation(20)) - - result = ts['2005'] - for t in result.index: - self.assertTrue(t.year == 2005) - - def test_indexing(self): - - idx = date_range("2001-1-1", periods=20, freq='M') - ts = Series(np.random.rand(len(idx)), index=idx) - - # getting - - # GH 3070, make sure semantics work on Series/Frame - expected = ts['2001'] - expected.name = 'A' - - df = DataFrame(dict(A=ts)) - result = df['2001']['A'] - assert_series_equal(expected, result) - - # setting - ts['2001'] = 1 - expected = ts['2001'] - expected.name = 'A' - - df.loc['2001', 'A'] = 1 - - result = df['2001']['A'] - assert_series_equal(expected, result) - - # GH3546 (not including times on the last day) - idx = date_range(start='2013-05-31 00:00', end='2013-05-31 23:00', - freq='H') - ts = Series(lrange(len(idx)), index=idx) - expected = ts['2013-05'] - assert_series_equal(expected, ts) - - idx = date_range(start='2013-05-31 00:00', end='2013-05-31 23:59', - freq='S') - ts = Series(lrange(len(idx)), index=idx) - expected = ts['2013-05'] - assert_series_equal(expected, ts) - - idx = [Timestamp('2013-05-31 00:00'), - Timestamp(datetime(2013, 5, 31, 23, 59, 59, 999999))] - ts = Series(lrange(len(idx)), index=idx) - expected = ts['2013'] - assert_series_equal(expected, ts) - - # GH14826, indexing with a seconds resolution string / datetime object - df = DataFrame(randn(5, 5), - columns=['open', 'high', 'low', 'close', 'volume'], - index=date_range('2012-01-02 18:01:00', - periods=5, tz='US/Central', freq='s')) - expected = df.loc[[df.index[2]]] - - # this is a single date, so will raise - self.assertRaises(KeyError, df.__getitem__, '2012-01-02 18:01:02', ) - self.assertRaises(KeyError, df.__getitem__, df.index[2], ) - - def test_recreate_from_data(self): - freqs = ['M', 'Q', 'A', 'D', 'B', 'BH', 'T', 'S', 'L', 'U', 'H', 'N', - 'C'] - - for f in freqs: - org = DatetimeIndex(start='2001/02/01 09:00', freq=f, periods=1) - idx = DatetimeIndex(org, freq=f) - tm.assert_index_equal(idx, org) - - org = DatetimeIndex(start='2001/02/01 09:00', freq=f, - tz='US/Pacific', periods=1) - idx = DatetimeIndex(org, freq=f, tz='US/Pacific') - tm.assert_index_equal(idx, org) - - -def assert_range_equal(left, right): - assert (left.equals(right)) - assert (left.freq == right.freq) - assert (left.tz == right.tz) - - -class TestTimeSeries(tm.TestCase): - _multiprocess_can_split_ = True - - def test_is_(self): - dti = DatetimeIndex(start='1/1/2005', end='12/1/2005', freq='M') - self.assertTrue(dti.is_(dti)) - self.assertTrue(dti.is_(dti.view())) - self.assertFalse(dti.is_(dti.copy())) - - def test_dti_slicing(self): - dti = DatetimeIndex(start='1/1/2005', end='12/1/2005', freq='M') - dti2 = dti[[1, 3, 5]] - - v1 = dti2[0] - v2 = dti2[1] - v3 = dti2[2] - - self.assertEqual(v1, Timestamp('2/28/2005')) - self.assertEqual(v2, Timestamp('4/30/2005')) - self.assertEqual(v3, Timestamp('6/30/2005')) - - # don't carry freq through irregular slicing - self.assertIsNone(dti2.freq) - - def test_contiguous_boolean_preserve_freq(self): - rng = date_range('1/1/2000', '3/1/2000', freq='B') - - mask = np.zeros(len(rng), dtype=bool) - mask[10:20] = True - - masked = rng[mask] - expected = rng[10:20] - self.assertIsNotNone(expected.freq) - assert_range_equal(masked, expected) - - mask[22] = True - masked = rng[mask] - self.assertIsNone(masked.freq) - - def test_getitem_median_slice_bug(self): - index = date_range('20090415', '20090519', freq='2B') - s = Series(np.random.randn(13), index=index) - - indexer = [slice(6, 7, None)] - result = s[indexer] - expected = s[indexer[0]] - assert_series_equal(result, expected) - - def test_series_box_timestamp(self): - rng = date_range('20090415', '20090519', freq='B') - s = Series(rng) - - tm.assertIsInstance(s[5], Timestamp) - - rng = date_range('20090415', '20090519', freq='B') - s = Series(rng, index=rng) - tm.assertIsInstance(s[5], Timestamp) - - tm.assertIsInstance(s.iat[5], Timestamp) - - def test_series_box_timedelta(self): - rng = timedelta_range('1 day 1 s', periods=5, freq='h') - s = Series(rng) - tm.assertIsInstance(s[1], Timedelta) - tm.assertIsInstance(s.iat[2], Timedelta) - - def test_date_range_ambiguous_arguments(self): - # #2538 - start = datetime(2011, 1, 1, 5, 3, 40) - end = datetime(2011, 1, 1, 8, 9, 40) - - self.assertRaises(ValueError, date_range, start, end, freq='s', - periods=10) - - def test_timestamp_to_datetime(self): - tm._skip_if_no_pytz() - rng = date_range('20090415', '20090519', tz='US/Eastern') - - stamp = rng[0] - dtval = stamp.to_pydatetime() - self.assertEqual(stamp, dtval) - self.assertEqual(stamp.tzinfo, dtval.tzinfo) - - def test_timestamp_to_datetime_dateutil(self): - tm._skip_if_no_pytz() - rng = date_range('20090415', '20090519', tz='dateutil/US/Eastern') - - stamp = rng[0] - dtval = stamp.to_pydatetime() - self.assertEqual(stamp, dtval) - self.assertEqual(stamp.tzinfo, dtval.tzinfo) - - def test_timestamp_to_datetime_explicit_pytz(self): - tm._skip_if_no_pytz() - import pytz - rng = date_range('20090415', '20090519', - tz=pytz.timezone('US/Eastern')) - - stamp = rng[0] - dtval = stamp.to_pydatetime() - self.assertEqual(stamp, dtval) - self.assertEqual(stamp.tzinfo, dtval.tzinfo) - - def test_timestamp_to_datetime_explicit_dateutil(self): - tm._skip_if_windows_python_3() - tm._skip_if_no_dateutil() - from pandas.tslib import _dateutil_gettz as gettz - rng = date_range('20090415', '20090519', tz=gettz('US/Eastern')) - - stamp = rng[0] - dtval = stamp.to_pydatetime() - self.assertEqual(stamp, dtval) - self.assertEqual(stamp.tzinfo, dtval.tzinfo) - - def test_index_convert_to_datetime_array(self): - tm._skip_if_no_pytz() - - def _check_rng(rng): - converted = rng.to_pydatetime() - tm.assertIsInstance(converted, np.ndarray) - for x, stamp in zip(converted, rng): - tm.assertIsInstance(x, datetime) - self.assertEqual(x, stamp.to_pydatetime()) - self.assertEqual(x.tzinfo, stamp.tzinfo) - - rng = date_range('20090415', '20090519') - rng_eastern = date_range('20090415', '20090519', tz='US/Eastern') - rng_utc = date_range('20090415', '20090519', tz='utc') - - _check_rng(rng) - _check_rng(rng_eastern) - _check_rng(rng_utc) - - def test_index_convert_to_datetime_array_explicit_pytz(self): - tm._skip_if_no_pytz() - import pytz - - def _check_rng(rng): - converted = rng.to_pydatetime() - tm.assertIsInstance(converted, np.ndarray) - for x, stamp in zip(converted, rng): - tm.assertIsInstance(x, datetime) - self.assertEqual(x, stamp.to_pydatetime()) - self.assertEqual(x.tzinfo, stamp.tzinfo) - - rng = date_range('20090415', '20090519') - rng_eastern = date_range('20090415', '20090519', - tz=pytz.timezone('US/Eastern')) - rng_utc = date_range('20090415', '20090519', tz=pytz.utc) - - _check_rng(rng) - _check_rng(rng_eastern) - _check_rng(rng_utc) - - def test_index_convert_to_datetime_array_dateutil(self): - tm._skip_if_no_dateutil() - import dateutil - - def _check_rng(rng): - converted = rng.to_pydatetime() - tm.assertIsInstance(converted, np.ndarray) - for x, stamp in zip(converted, rng): - tm.assertIsInstance(x, datetime) - self.assertEqual(x, stamp.to_pydatetime()) - self.assertEqual(x.tzinfo, stamp.tzinfo) - - rng = date_range('20090415', '20090519') - rng_eastern = date_range('20090415', '20090519', - tz='dateutil/US/Eastern') - rng_utc = date_range('20090415', '20090519', tz=dateutil.tz.tzutc()) - - _check_rng(rng) - _check_rng(rng_eastern) - _check_rng(rng_utc) - - def test_ctor_str_intraday(self): - rng = DatetimeIndex(['1-1-2000 00:00:01']) - self.assertEqual(rng[0].second, 1) - - def test_series_ctor_plus_datetimeindex(self): - rng = date_range('20090415', '20090519', freq='B') - data = dict((k, 1) for k in rng) - - result = Series(data, index=rng) - self.assertIs(result.index, rng) - - def test_series_pad_backfill_limit(self): - index = np.arange(10) - s = Series(np.random.randn(10), index=index) - - result = s[:2].reindex(index, method='pad', limit=5) - - expected = s[:2].reindex(index).fillna(method='pad') - expected[-3:] = np.nan - assert_series_equal(result, expected) - - result = s[-2:].reindex(index, method='backfill', limit=5) - - expected = s[-2:].reindex(index).fillna(method='backfill') - expected[:3] = np.nan - assert_series_equal(result, expected) - - def test_series_fillna_limit(self): - index = np.arange(10) - s = Series(np.random.randn(10), index=index) - - result = s[:2].reindex(index) - result = result.fillna(method='pad', limit=5) - - expected = s[:2].reindex(index).fillna(method='pad') - expected[-3:] = np.nan - assert_series_equal(result, expected) - - result = s[-2:].reindex(index) - result = result.fillna(method='bfill', limit=5) - - expected = s[-2:].reindex(index).fillna(method='backfill') - expected[:3] = np.nan - assert_series_equal(result, expected) - - def test_frame_pad_backfill_limit(self): - index = np.arange(10) - df = DataFrame(np.random.randn(10, 4), index=index) - - result = df[:2].reindex(index, method='pad', limit=5) - - expected = df[:2].reindex(index).fillna(method='pad') - expected.values[-3:] = np.nan - tm.assert_frame_equal(result, expected) - - result = df[-2:].reindex(index, method='backfill', limit=5) - - expected = df[-2:].reindex(index).fillna(method='backfill') - expected.values[:3] = np.nan - tm.assert_frame_equal(result, expected) - - def test_frame_fillna_limit(self): - index = np.arange(10) - df = DataFrame(np.random.randn(10, 4), index=index) - - result = df[:2].reindex(index) - result = result.fillna(method='pad', limit=5) - - expected = df[:2].reindex(index).fillna(method='pad') - expected.values[-3:] = np.nan - tm.assert_frame_equal(result, expected) - - result = df[-2:].reindex(index) - result = result.fillna(method='backfill', limit=5) - - expected = df[-2:].reindex(index).fillna(method='backfill') - expected.values[:3] = np.nan - tm.assert_frame_equal(result, expected) - - def test_frame_setitem_timestamp(self): - # 2155 - columns = DatetimeIndex(start='1/1/2012', end='2/1/2012', - freq=offsets.BDay()) - index = lrange(10) - data = DataFrame(columns=columns, index=index) - t = datetime(2012, 11, 1) - ts = Timestamp(t) - data[ts] = np.nan # works - - def test_sparse_series_fillna_limit(self): - index = np.arange(10) - s = Series(np.random.randn(10), index=index) - - ss = s[:2].reindex(index).to_sparse() - result = ss.fillna(method='pad', limit=5) - expected = ss.fillna(method='pad', limit=5) - expected = expected.to_dense() - expected[-3:] = np.nan - expected = expected.to_sparse() - assert_series_equal(result, expected) - - ss = s[-2:].reindex(index).to_sparse() - result = ss.fillna(method='backfill', limit=5) - expected = ss.fillna(method='backfill') - expected = expected.to_dense() - expected[:3] = np.nan - expected = expected.to_sparse() - assert_series_equal(result, expected) - - def test_sparse_series_pad_backfill_limit(self): - index = np.arange(10) - s = Series(np.random.randn(10), index=index) - s = s.to_sparse() - - result = s[:2].reindex(index, method='pad', limit=5) - expected = s[:2].reindex(index).fillna(method='pad') - expected = expected.to_dense() - expected[-3:] = np.nan - expected = expected.to_sparse() - assert_series_equal(result, expected) - - result = s[-2:].reindex(index, method='backfill', limit=5) - expected = s[-2:].reindex(index).fillna(method='backfill') - expected = expected.to_dense() - expected[:3] = np.nan - expected = expected.to_sparse() - assert_series_equal(result, expected) - - def test_sparse_frame_pad_backfill_limit(self): - index = np.arange(10) - df = DataFrame(np.random.randn(10, 4), index=index) - sdf = df.to_sparse() - - result = sdf[:2].reindex(index, method='pad', limit=5) - - expected = sdf[:2].reindex(index).fillna(method='pad') - expected = expected.to_dense() - expected.values[-3:] = np.nan - expected = expected.to_sparse() - tm.assert_frame_equal(result, expected) - - result = sdf[-2:].reindex(index, method='backfill', limit=5) - - expected = sdf[-2:].reindex(index).fillna(method='backfill') - expected = expected.to_dense() - expected.values[:3] = np.nan - expected = expected.to_sparse() - tm.assert_frame_equal(result, expected) - - def test_sparse_frame_fillna_limit(self): - index = np.arange(10) - df = DataFrame(np.random.randn(10, 4), index=index) - sdf = df.to_sparse() - - result = sdf[:2].reindex(index) - result = result.fillna(method='pad', limit=5) - - expected = sdf[:2].reindex(index).fillna(method='pad') - expected = expected.to_dense() - expected.values[-3:] = np.nan - expected = expected.to_sparse() - tm.assert_frame_equal(result, expected) - - result = sdf[-2:].reindex(index) - result = result.fillna(method='backfill', limit=5) - - expected = sdf[-2:].reindex(index).fillna(method='backfill') - expected = expected.to_dense() - expected.values[:3] = np.nan - expected = expected.to_sparse() - tm.assert_frame_equal(result, expected) - - def test_pad_require_monotonicity(self): - rng = date_range('1/1/2000', '3/1/2000', freq='B') - - # neither monotonic increasing or decreasing - rng2 = rng[[1, 0, 2]] - - self.assertRaises(ValueError, rng2.get_indexer, rng, method='pad') - - def test_frame_ctor_datetime64_column(self): - rng = date_range('1/1/2000 00:00:00', '1/1/2000 1:59:50', freq='10s') - dates = np.asarray(rng) - - df = DataFrame({'A': np.random.randn(len(rng)), 'B': dates}) - self.assertTrue(np.issubdtype(df['B'].dtype, np.dtype('M8[ns]'))) - - def test_frame_add_datetime64_column(self): - rng = date_range('1/1/2000 00:00:00', '1/1/2000 1:59:50', freq='10s') - df = DataFrame(index=np.arange(len(rng))) - - df['A'] = rng - self.assertTrue(np.issubdtype(df['A'].dtype, np.dtype('M8[ns]'))) - - def test_frame_datetime64_pre1900_repr(self): - df = DataFrame({'year': date_range('1/1/1700', periods=50, - freq='A-DEC')}) - # it works! - repr(df) - - def test_frame_add_datetime64_col_other_units(self): - n = 100 - - units = ['h', 'm', 's', 'ms', 'D', 'M', 'Y'] - - ns_dtype = np.dtype('M8[ns]') - - for unit in units: - dtype = np.dtype('M8[%s]' % unit) - vals = np.arange(n, dtype=np.int64).view(dtype) - - df = DataFrame({'ints': np.arange(n)}, index=np.arange(n)) - df[unit] = vals - - ex_vals = to_datetime(vals.astype('O')).values - - self.assertEqual(df[unit].dtype, ns_dtype) - self.assertTrue((df[unit].values == ex_vals).all()) - - # Test insertion into existing datetime64 column - df = DataFrame({'ints': np.arange(n)}, index=np.arange(n)) - df['dates'] = np.arange(n, dtype=np.int64).view(ns_dtype) - - for unit in units: - dtype = np.dtype('M8[%s]' % unit) - vals = np.arange(n, dtype=np.int64).view(dtype) - - tmp = df.copy() - - tmp['dates'] = vals - ex_vals = to_datetime(vals.astype('O')).values - - self.assertTrue((tmp['dates'].values == ex_vals).all()) - - def test_to_datetime_unit(self): - - epoch = 1370745748 - s = Series([epoch + t for t in range(20)]) - result = to_datetime(s, unit='s') - expected = Series([Timestamp('2013-06-09 02:42:28') + timedelta( - seconds=t) for t in range(20)]) - assert_series_equal(result, expected) - - s = Series([epoch + t for t in range(20)]).astype(float) - result = to_datetime(s, unit='s') - expected = Series([Timestamp('2013-06-09 02:42:28') + timedelta( - seconds=t) for t in range(20)]) - assert_series_equal(result, expected) - - s = Series([epoch + t for t in range(20)] + [iNaT]) - result = to_datetime(s, unit='s') - expected = Series([Timestamp('2013-06-09 02:42:28') + timedelta( - seconds=t) for t in range(20)] + [NaT]) - assert_series_equal(result, expected) - - s = Series([epoch + t for t in range(20)] + [iNaT]).astype(float) - result = to_datetime(s, unit='s') - expected = Series([Timestamp('2013-06-09 02:42:28') + timedelta( - seconds=t) for t in range(20)] + [NaT]) - assert_series_equal(result, expected) - - # GH13834 - s = Series([epoch + t for t in np.arange(0, 2, .25)] + - [iNaT]).astype(float) - result = to_datetime(s, unit='s') - expected = Series([Timestamp('2013-06-09 02:42:28') + timedelta( - seconds=t) for t in np.arange(0, 2, .25)] + [NaT]) - assert_series_equal(result, expected) - - s = concat([Series([epoch + t for t in range(20)] - ).astype(float), Series([np.nan])], - ignore_index=True) - result = to_datetime(s, unit='s') - expected = Series([Timestamp('2013-06-09 02:42:28') + timedelta( - seconds=t) for t in range(20)] + [NaT]) - assert_series_equal(result, expected) - - result = to_datetime([1, 2, 'NaT', pd.NaT, np.nan], unit='D') - expected = DatetimeIndex([Timestamp('1970-01-02'), - Timestamp('1970-01-03')] + ['NaT'] * 3) - tm.assert_index_equal(result, expected) - - with self.assertRaises(ValueError): - to_datetime([1, 2, 'foo'], unit='D') - with self.assertRaises(ValueError): - to_datetime([1, 2, 111111111], unit='D') - - # coerce we can process - expected = DatetimeIndex([Timestamp('1970-01-02'), - Timestamp('1970-01-03')] + ['NaT'] * 1) - result = to_datetime([1, 2, 'foo'], unit='D', errors='coerce') - tm.assert_index_equal(result, expected) - - result = to_datetime([1, 2, 111111111], unit='D', errors='coerce') - tm.assert_index_equal(result, expected) - - def test_series_ctor_datetime64(self): - rng = date_range('1/1/2000 00:00:00', '1/1/2000 1:59:50', freq='10s') - dates = np.asarray(rng) - - series = Series(dates) - self.assertTrue(np.issubdtype(series.dtype, np.dtype('M8[ns]'))) - - def test_index_cast_datetime64_other_units(self): - arr = np.arange(0, 100, 10, dtype=np.int64).view('M8[D]') - - idx = Index(arr) - - self.assertTrue((idx.values == tslib.cast_to_nanoseconds(arr)).all()) - - def test_reindex_series_add_nat(self): - rng = date_range('1/1/2000 00:00:00', periods=10, freq='10s') - series = Series(rng) - - result = series.reindex(lrange(15)) - self.assertTrue(np.issubdtype(result.dtype, np.dtype('M8[ns]'))) - - mask = result.isnull() - self.assertTrue(mask[-5:].all()) - self.assertFalse(mask[:-5].any()) - - def test_reindex_frame_add_nat(self): - rng = date_range('1/1/2000 00:00:00', periods=10, freq='10s') - df = DataFrame({'A': np.random.randn(len(rng)), 'B': rng}) - - result = df.reindex(lrange(15)) - self.assertTrue(np.issubdtype(result['B'].dtype, np.dtype('M8[ns]'))) - - mask = com.isnull(result)['B'] - self.assertTrue(mask[-5:].all()) - self.assertFalse(mask[:-5].any()) - - def test_series_repr_nat(self): - series = Series([0, 1000, 2000, iNaT], dtype='M8[ns]') - - result = repr(series) - expected = ('0 1970-01-01 00:00:00.000000\n' - '1 1970-01-01 00:00:00.000001\n' - '2 1970-01-01 00:00:00.000002\n' - '3 NaT\n' - 'dtype: datetime64[ns]') - self.assertEqual(result, expected) - - def test_fillna_nat(self): - series = Series([0, 1, 2, iNaT], dtype='M8[ns]') - - filled = series.fillna(method='pad') - filled2 = series.fillna(value=series.values[2]) - - expected = series.copy() - expected.values[3] = expected.values[2] - - assert_series_equal(filled, expected) - assert_series_equal(filled2, expected) - - df = DataFrame({'A': series}) - filled = df.fillna(method='pad') - filled2 = df.fillna(value=series.values[2]) - expected = DataFrame({'A': expected}) - assert_frame_equal(filled, expected) - assert_frame_equal(filled2, expected) - - series = Series([iNaT, 0, 1, 2], dtype='M8[ns]') - - filled = series.fillna(method='bfill') - filled2 = series.fillna(value=series[1]) - - expected = series.copy() - expected[0] = expected[1] - - assert_series_equal(filled, expected) - assert_series_equal(filled2, expected) - - df = DataFrame({'A': series}) - filled = df.fillna(method='bfill') - filled2 = df.fillna(value=series[1]) - expected = DataFrame({'A': expected}) - assert_frame_equal(filled, expected) - assert_frame_equal(filled2, expected) - - def test_string_na_nat_conversion(self): - # GH #999, #858 - - from pandas.compat import parse_date - - strings = np.array(['1/1/2000', '1/2/2000', np.nan, - '1/4/2000, 12:34:56'], dtype=object) - - expected = np.empty(4, dtype='M8[ns]') - for i, val in enumerate(strings): - if com.isnull(val): - expected[i] = iNaT - else: - expected[i] = parse_date(val) - - result = tslib.array_to_datetime(strings) - assert_almost_equal(result, expected) - - result2 = to_datetime(strings) - tm.assertIsInstance(result2, DatetimeIndex) - tm.assert_numpy_array_equal(result, result2.values) - - malformed = np.array(['1/100/2000', np.nan], dtype=object) - - # GH 10636, default is now 'raise' - self.assertRaises(ValueError, - lambda: to_datetime(malformed, errors='raise')) - - result = to_datetime(malformed, errors='ignore') - tm.assert_numpy_array_equal(result, malformed) - - self.assertRaises(ValueError, to_datetime, malformed, errors='raise') - - idx = ['a', 'b', 'c', 'd', 'e'] - series = Series(['1/1/2000', np.nan, '1/3/2000', np.nan, - '1/5/2000'], index=idx, name='foo') - dseries = Series([to_datetime('1/1/2000'), np.nan, - to_datetime('1/3/2000'), np.nan, - to_datetime('1/5/2000')], index=idx, name='foo') - - result = to_datetime(series) - dresult = to_datetime(dseries) - - expected = Series(np.empty(5, dtype='M8[ns]'), index=idx) - for i in range(5): - x = series[i] - if isnull(x): - expected[i] = iNaT - else: - expected[i] = to_datetime(x) - - assert_series_equal(result, expected, check_names=False) - self.assertEqual(result.name, 'foo') - - assert_series_equal(dresult, expected, check_names=False) - self.assertEqual(dresult.name, 'foo') - - def test_to_datetime_iso8601(self): - result = to_datetime(["2012-01-01 00:00:00"]) - exp = Timestamp("2012-01-01 00:00:00") - self.assertEqual(result[0], exp) - - result = to_datetime(['20121001']) # bad iso 8601 - exp = Timestamp('2012-10-01') - self.assertEqual(result[0], exp) - - def test_to_datetime_default(self): - rs = to_datetime('2001') - xp = datetime(2001, 1, 1) - self.assertTrue(rs, xp) - - # dayfirst is essentially broken - - # to_datetime('01-13-2012', dayfirst=True) - # self.assertRaises(ValueError, to_datetime('01-13-2012', - # dayfirst=True)) - - def test_to_datetime_on_datetime64_series(self): - # #2699 - s = Series(date_range('1/1/2000', periods=10)) - - result = to_datetime(s) - self.assertEqual(result[0], s[0]) - - def test_to_datetime_with_space_in_series(self): - # GH 6428 - s = Series(['10/18/2006', '10/18/2008', ' ']) - tm.assertRaises(ValueError, lambda: to_datetime(s, errors='raise')) - result_coerce = to_datetime(s, errors='coerce') - expected_coerce = Series([datetime(2006, 10, 18), - datetime(2008, 10, 18), - pd.NaT]) - tm.assert_series_equal(result_coerce, expected_coerce) - result_ignore = to_datetime(s, errors='ignore') - tm.assert_series_equal(result_ignore, s) - - def test_to_datetime_with_apply(self): - # this is only locale tested with US/None locales - _skip_if_has_locale() - - # GH 5195 - # with a format and coerce a single item to_datetime fails - td = Series(['May 04', 'Jun 02', 'Dec 11'], index=[1, 2, 3]) - expected = pd.to_datetime(td, format='%b %y') - result = td.apply(pd.to_datetime, format='%b %y') - assert_series_equal(result, expected) - - td = pd.Series(['May 04', 'Jun 02', ''], index=[1, 2, 3]) - self.assertRaises(ValueError, - lambda: pd.to_datetime(td, format='%b %y', - errors='raise')) - self.assertRaises(ValueError, - lambda: td.apply(pd.to_datetime, format='%b %y', - errors='raise')) - expected = pd.to_datetime(td, format='%b %y', errors='coerce') - - result = td.apply( - lambda x: pd.to_datetime(x, format='%b %y', errors='coerce')) - assert_series_equal(result, expected) - - def test_nat_vector_field_access(self): - idx = DatetimeIndex(['1/1/2000', None, None, '1/4/2000']) - - fields = ['year', 'quarter', 'month', 'day', 'hour', 'minute', - 'second', 'microsecond', 'nanosecond', 'week', 'dayofyear', - 'days_in_month', 'is_leap_year'] - - for field in fields: - result = getattr(idx, field) - expected = [getattr(x, field) for x in idx] - self.assert_numpy_array_equal(result, np.array(expected)) - - s = pd.Series(idx) - - for field in fields: - result = getattr(s.dt, field) - expected = [getattr(x, field) for x in idx] - self.assert_series_equal(result, pd.Series(expected)) - - def test_nat_scalar_field_access(self): - fields = ['year', 'quarter', 'month', 'day', 'hour', 'minute', - 'second', 'microsecond', 'nanosecond', 'week', 'dayofyear', - 'days_in_month', 'daysinmonth', 'dayofweek', 'weekday_name'] - for field in fields: - result = getattr(NaT, field) - self.assertTrue(np.isnan(result)) - - def test_NaT_methods(self): - # GH 9513 - raise_methods = ['astimezone', 'combine', 'ctime', 'dst', - 'fromordinal', 'fromtimestamp', 'isocalendar', - 'strftime', 'strptime', 'time', 'timestamp', - 'timetuple', 'timetz', 'toordinal', 'tzname', - 'utcfromtimestamp', 'utcnow', 'utcoffset', - 'utctimetuple'] - nat_methods = ['date', 'now', 'replace', 'to_datetime', 'today'] - nan_methods = ['weekday', 'isoweekday'] - - for method in raise_methods: - if hasattr(NaT, method): - self.assertRaises(ValueError, getattr(NaT, method)) - - for method in nan_methods: - if hasattr(NaT, method): - self.assertTrue(np.isnan(getattr(NaT, method)())) - - for method in nat_methods: - if hasattr(NaT, method): - # see gh-8254 - exp_warning = None - if method == 'to_datetime': - exp_warning = FutureWarning - with tm.assert_produces_warning( - exp_warning, check_stacklevel=False): - self.assertIs(getattr(NaT, method)(), NaT) - - # GH 12300 - self.assertEqual(NaT.isoformat(), 'NaT') - - def test_to_datetime_types(self): - - # empty string - result = to_datetime('') - self.assertIs(result, NaT) - - result = to_datetime(['', '']) - self.assertTrue(isnull(result).all()) - - # ints - result = Timestamp(0) - expected = to_datetime(0) - self.assertEqual(result, expected) - - # GH 3888 (strings) - expected = to_datetime(['2012'])[0] - result = to_datetime('2012') - self.assertEqual(result, expected) - - # array = ['2012','20120101','20120101 12:01:01'] - array = ['20120101', '20120101 12:01:01'] - expected = list(to_datetime(array)) - result = lmap(Timestamp, array) - tm.assert_almost_equal(result, expected) - - # currently fails ### - # result = Timestamp('2012') - # expected = to_datetime('2012') - # self.assertEqual(result, expected) - - def test_to_datetime_unprocessable_input(self): - # GH 4928 - self.assert_numpy_array_equal( - to_datetime([1, '1'], errors='ignore'), - np.array([1, '1'], dtype='O') - ) - self.assertRaises(TypeError, to_datetime, [1, '1'], errors='raise') - - def test_to_datetime_other_datetime64_units(self): - # 5/25/2012 - scalar = np.int64(1337904000000000).view('M8[us]') - as_obj = scalar.astype('O') - - index = DatetimeIndex([scalar]) - self.assertEqual(index[0], scalar.astype('O')) - - value = Timestamp(scalar) - self.assertEqual(value, as_obj) - - def test_to_datetime_list_of_integers(self): - rng = date_range('1/1/2000', periods=20) - rng = DatetimeIndex(rng.values) - - ints = list(rng.asi8) - - result = DatetimeIndex(ints) - - tm.assert_index_equal(rng, result) - - def test_to_datetime_freq(self): - xp = bdate_range('2000-1-1', periods=10, tz='UTC') - rs = xp.to_datetime() - self.assertEqual(xp.freq, rs.freq) - self.assertEqual(xp.tzinfo, rs.tzinfo) - - def test_range_edges(self): - # GH 13672 - idx = DatetimeIndex(start=Timestamp('1970-01-01 00:00:00.000000001'), - end=Timestamp('1970-01-01 00:00:00.000000004'), - freq='N') - exp = DatetimeIndex(['1970-01-01 00:00:00.000000001', - '1970-01-01 00:00:00.000000002', - '1970-01-01 00:00:00.000000003', - '1970-01-01 00:00:00.000000004']) - tm.assert_index_equal(idx, exp) - - idx = DatetimeIndex(start=Timestamp('1970-01-01 00:00:00.000000004'), - end=Timestamp('1970-01-01 00:00:00.000000001'), - freq='N') - exp = DatetimeIndex([]) - tm.assert_index_equal(idx, exp) - - idx = DatetimeIndex(start=Timestamp('1970-01-01 00:00:00.000000001'), - end=Timestamp('1970-01-01 00:00:00.000000001'), - freq='N') - exp = DatetimeIndex(['1970-01-01 00:00:00.000000001']) - tm.assert_index_equal(idx, exp) - - idx = DatetimeIndex(start=Timestamp('1970-01-01 00:00:00.000001'), - end=Timestamp('1970-01-01 00:00:00.000004'), - freq='U') - exp = DatetimeIndex(['1970-01-01 00:00:00.000001', - '1970-01-01 00:00:00.000002', - '1970-01-01 00:00:00.000003', - '1970-01-01 00:00:00.000004']) - tm.assert_index_equal(idx, exp) - - idx = DatetimeIndex(start=Timestamp('1970-01-01 00:00:00.001'), - end=Timestamp('1970-01-01 00:00:00.004'), - freq='L') - exp = DatetimeIndex(['1970-01-01 00:00:00.001', - '1970-01-01 00:00:00.002', - '1970-01-01 00:00:00.003', - '1970-01-01 00:00:00.004']) - tm.assert_index_equal(idx, exp) - - idx = DatetimeIndex(start=Timestamp('1970-01-01 00:00:01'), - end=Timestamp('1970-01-01 00:00:04'), freq='S') - exp = DatetimeIndex(['1970-01-01 00:00:01', '1970-01-01 00:00:02', - '1970-01-01 00:00:03', '1970-01-01 00:00:04']) - tm.assert_index_equal(idx, exp) - - idx = DatetimeIndex(start=Timestamp('1970-01-01 00:01'), - end=Timestamp('1970-01-01 00:04'), freq='T') - exp = DatetimeIndex(['1970-01-01 00:01', '1970-01-01 00:02', - '1970-01-01 00:03', '1970-01-01 00:04']) - tm.assert_index_equal(idx, exp) - - idx = DatetimeIndex(start=Timestamp('1970-01-01 01:00'), - end=Timestamp('1970-01-01 04:00'), freq='H') - exp = DatetimeIndex(['1970-01-01 01:00', '1970-01-01 02:00', - '1970-01-01 03:00', '1970-01-01 04:00']) - tm.assert_index_equal(idx, exp) - - idx = DatetimeIndex(start=Timestamp('1970-01-01'), - end=Timestamp('1970-01-04'), freq='D') - exp = DatetimeIndex(['1970-01-01', '1970-01-02', - '1970-01-03', '1970-01-04']) - tm.assert_index_equal(idx, exp) - - def test_range_misspecified(self): - # GH #1095 - - self.assertRaises(ValueError, date_range, '1/1/2000') - self.assertRaises(ValueError, date_range, end='1/1/2000') - self.assertRaises(ValueError, date_range, periods=10) - - self.assertRaises(ValueError, date_range, '1/1/2000', freq='H') - self.assertRaises(ValueError, date_range, end='1/1/2000', freq='H') - self.assertRaises(ValueError, date_range, periods=10, freq='H') - - def test_reasonable_keyerror(self): - # GH #1062 - index = DatetimeIndex(['1/3/2000']) - try: - index.get_loc('1/1/2000') - except KeyError as e: - self.assertIn('2000', str(e)) - - def test_reindex_with_datetimes(self): - rng = date_range('1/1/2000', periods=20) - ts = Series(np.random.randn(20), index=rng) - - result = ts.reindex(list(ts.index[5:10])) - expected = ts[5:10] - tm.assert_series_equal(result, expected) - - result = ts[list(ts.index[5:10])] - tm.assert_series_equal(result, expected) - - def test_asfreq_keep_index_name(self): - # GH #9854 - index_name = 'bar' - index = pd.date_range('20130101', periods=20, name=index_name) - df = pd.DataFrame([x for x in range(20)], columns=['foo'], index=index) - - self.assertEqual(index_name, df.index.name) - self.assertEqual(index_name, df.asfreq('10D').index.name) - - def test_promote_datetime_date(self): - rng = date_range('1/1/2000', periods=20) - ts = Series(np.random.randn(20), index=rng) - - ts_slice = ts[5:] - ts2 = ts_slice.copy() - ts2.index = [x.date() for x in ts2.index] - - result = ts + ts2 - result2 = ts2 + ts - expected = ts + ts[5:] - assert_series_equal(result, expected) - assert_series_equal(result2, expected) - - # test asfreq - result = ts2.asfreq('4H', method='ffill') - expected = ts[5:].asfreq('4H', method='ffill') - assert_series_equal(result, expected) - - result = rng.get_indexer(ts2.index) - expected = rng.get_indexer(ts_slice.index) - self.assert_numpy_array_equal(result, expected) - - def test_asfreq_normalize(self): - rng = date_range('1/1/2000 09:30', periods=20) - norm = date_range('1/1/2000', periods=20) - vals = np.random.randn(20) - ts = Series(vals, index=rng) - - result = ts.asfreq('D', normalize=True) - norm = date_range('1/1/2000', periods=20) - expected = Series(vals, index=norm) - - assert_series_equal(result, expected) - - vals = np.random.randn(20, 3) - ts = DataFrame(vals, index=rng) - - result = ts.asfreq('D', normalize=True) - expected = DataFrame(vals, index=norm) - - assert_frame_equal(result, expected) - - def test_date_range_gen_error(self): - rng = date_range('1/1/2000 00:00', '1/1/2000 00:18', freq='5min') - self.assertEqual(len(rng), 4) - - def test_date_range_negative_freq(self): - # GH 11018 - rng = date_range('2011-12-31', freq='-2A', periods=3) - exp = pd.DatetimeIndex(['2011-12-31', '2009-12-31', - '2007-12-31'], freq='-2A') - tm.assert_index_equal(rng, exp) - self.assertEqual(rng.freq, '-2A') - - rng = date_range('2011-01-31', freq='-2M', periods=3) - exp = pd.DatetimeIndex(['2011-01-31', '2010-11-30', - '2010-09-30'], freq='-2M') - tm.assert_index_equal(rng, exp) - self.assertEqual(rng.freq, '-2M') - - def test_date_range_bms_bug(self): - # #1645 - rng = date_range('1/1/2000', periods=10, freq='BMS') - - ex_first = Timestamp('2000-01-03') - self.assertEqual(rng[0], ex_first) - - def test_date_range_businesshour(self): - idx = DatetimeIndex(['2014-07-04 09:00', '2014-07-04 10:00', - '2014-07-04 11:00', - '2014-07-04 12:00', '2014-07-04 13:00', - '2014-07-04 14:00', - '2014-07-04 15:00', '2014-07-04 16:00'], - freq='BH') - rng = date_range('2014-07-04 09:00', '2014-07-04 16:00', freq='BH') - tm.assert_index_equal(idx, rng) - - idx = DatetimeIndex( - ['2014-07-04 16:00', '2014-07-07 09:00'], freq='BH') - rng = date_range('2014-07-04 16:00', '2014-07-07 09:00', freq='BH') - tm.assert_index_equal(idx, rng) - - idx = DatetimeIndex(['2014-07-04 09:00', '2014-07-04 10:00', - '2014-07-04 11:00', - '2014-07-04 12:00', '2014-07-04 13:00', - '2014-07-04 14:00', - '2014-07-04 15:00', '2014-07-04 16:00', - '2014-07-07 09:00', '2014-07-07 10:00', - '2014-07-07 11:00', - '2014-07-07 12:00', '2014-07-07 13:00', - '2014-07-07 14:00', - '2014-07-07 15:00', '2014-07-07 16:00', - '2014-07-08 09:00', '2014-07-08 10:00', - '2014-07-08 11:00', - '2014-07-08 12:00', '2014-07-08 13:00', - '2014-07-08 14:00', - '2014-07-08 15:00', '2014-07-08 16:00'], - freq='BH') - rng = date_range('2014-07-04 09:00', '2014-07-08 16:00', freq='BH') - tm.assert_index_equal(idx, rng) - - def test_first_subset(self): - ts = _simple_ts('1/1/2000', '1/1/2010', freq='12h') - result = ts.first('10d') - self.assertEqual(len(result), 20) - - ts = _simple_ts('1/1/2000', '1/1/2010') - result = ts.first('10d') - self.assertEqual(len(result), 10) - - result = ts.first('3M') - expected = ts[:'3/31/2000'] - assert_series_equal(result, expected) - - result = ts.first('21D') - expected = ts[:21] - assert_series_equal(result, expected) - - result = ts[:0].first('3M') - assert_series_equal(result, ts[:0]) - - def test_last_subset(self): - ts = _simple_ts('1/1/2000', '1/1/2010', freq='12h') - result = ts.last('10d') - self.assertEqual(len(result), 20) - - ts = _simple_ts('1/1/2000', '1/1/2010') - result = ts.last('10d') - self.assertEqual(len(result), 10) - - result = ts.last('21D') - expected = ts['12/12/2009':] - assert_series_equal(result, expected) - - result = ts.last('21D') - expected = ts[-21:] - assert_series_equal(result, expected) - - result = ts[:0].last('3M') - assert_series_equal(result, ts[:0]) - - def test_format_pre_1900_dates(self): - rng = date_range('1/1/1850', '1/1/1950', freq='A-DEC') - rng.format() - ts = Series(1, index=rng) - repr(ts) - - def test_at_time(self): - rng = date_range('1/1/2000', '1/5/2000', freq='5min') - ts = Series(np.random.randn(len(rng)), index=rng) - rs = ts.at_time(rng[1]) - self.assertTrue((rs.index.hour == rng[1].hour).all()) - self.assertTrue((rs.index.minute == rng[1].minute).all()) - self.assertTrue((rs.index.second == rng[1].second).all()) - - result = ts.at_time('9:30') - expected = ts.at_time(time(9, 30)) - assert_series_equal(result, expected) - - df = DataFrame(np.random.randn(len(rng), 3), index=rng) - - result = ts[time(9, 30)] - result_df = df.loc[time(9, 30)] - expected = ts[(rng.hour == 9) & (rng.minute == 30)] - exp_df = df[(rng.hour == 9) & (rng.minute == 30)] - - # expected.index = date_range('1/1/2000', '1/4/2000') - - assert_series_equal(result, expected) - tm.assert_frame_equal(result_df, exp_df) - - chunk = df.loc['1/4/2000':] - result = chunk.loc[time(9, 30)] - expected = result_df[-1:] - tm.assert_frame_equal(result, expected) - - # midnight, everything - rng = date_range('1/1/2000', '1/31/2000') - ts = Series(np.random.randn(len(rng)), index=rng) - - result = ts.at_time(time(0, 0)) - assert_series_equal(result, ts) - - # time doesn't exist - rng = date_range('1/1/2012', freq='23Min', periods=384) - ts = Series(np.random.randn(len(rng)), rng) - rs = ts.at_time('16:00') - self.assertEqual(len(rs), 0) - - def test_at_time_frame(self): - rng = date_range('1/1/2000', '1/5/2000', freq='5min') - ts = DataFrame(np.random.randn(len(rng), 2), index=rng) - rs = ts.at_time(rng[1]) - self.assertTrue((rs.index.hour == rng[1].hour).all()) - self.assertTrue((rs.index.minute == rng[1].minute).all()) - self.assertTrue((rs.index.second == rng[1].second).all()) - - result = ts.at_time('9:30') - expected = ts.at_time(time(9, 30)) - assert_frame_equal(result, expected) - - result = ts.loc[time(9, 30)] - expected = ts.loc[(rng.hour == 9) & (rng.minute == 30)] - - assert_frame_equal(result, expected) - - # midnight, everything - rng = date_range('1/1/2000', '1/31/2000') - ts = DataFrame(np.random.randn(len(rng), 3), index=rng) - - result = ts.at_time(time(0, 0)) - assert_frame_equal(result, ts) - - # time doesn't exist - rng = date_range('1/1/2012', freq='23Min', periods=384) - ts = DataFrame(np.random.randn(len(rng), 2), rng) - rs = ts.at_time('16:00') - self.assertEqual(len(rs), 0) - - def test_between_time(self): - rng = date_range('1/1/2000', '1/5/2000', freq='5min') - ts = Series(np.random.randn(len(rng)), index=rng) - stime = time(0, 0) - etime = time(1, 0) - - close_open = product([True, False], [True, False]) - for inc_start, inc_end in close_open: - filtered = ts.between_time(stime, etime, inc_start, inc_end) - exp_len = 13 * 4 + 1 - if not inc_start: - exp_len -= 5 - if not inc_end: - exp_len -= 4 - - self.assertEqual(len(filtered), exp_len) - for rs in filtered.index: - t = rs.time() - if inc_start: - self.assertTrue(t >= stime) - else: - self.assertTrue(t > stime) - - if inc_end: - self.assertTrue(t <= etime) - else: - self.assertTrue(t < etime) - - result = ts.between_time('00:00', '01:00') - expected = ts.between_time(stime, etime) - assert_series_equal(result, expected) - - # across midnight - rng = date_range('1/1/2000', '1/5/2000', freq='5min') - ts = Series(np.random.randn(len(rng)), index=rng) - stime = time(22, 0) - etime = time(9, 0) - - close_open = product([True, False], [True, False]) - for inc_start, inc_end in close_open: - filtered = ts.between_time(stime, etime, inc_start, inc_end) - exp_len = (12 * 11 + 1) * 4 + 1 - if not inc_start: - exp_len -= 4 - if not inc_end: - exp_len -= 4 - - self.assertEqual(len(filtered), exp_len) - for rs in filtered.index: - t = rs.time() - if inc_start: - self.assertTrue((t >= stime) or (t <= etime)) - else: - self.assertTrue((t > stime) or (t <= etime)) - - if inc_end: - self.assertTrue((t <= etime) or (t >= stime)) - else: - self.assertTrue((t < etime) or (t >= stime)) - - def test_between_time_frame(self): - rng = date_range('1/1/2000', '1/5/2000', freq='5min') - ts = DataFrame(np.random.randn(len(rng), 2), index=rng) - stime = time(0, 0) - etime = time(1, 0) - - close_open = product([True, False], [True, False]) - for inc_start, inc_end in close_open: - filtered = ts.between_time(stime, etime, inc_start, inc_end) - exp_len = 13 * 4 + 1 - if not inc_start: - exp_len -= 5 - if not inc_end: - exp_len -= 4 - - self.assertEqual(len(filtered), exp_len) - for rs in filtered.index: - t = rs.time() - if inc_start: - self.assertTrue(t >= stime) - else: - self.assertTrue(t > stime) - - if inc_end: - self.assertTrue(t <= etime) - else: - self.assertTrue(t < etime) - - result = ts.between_time('00:00', '01:00') - expected = ts.between_time(stime, etime) - assert_frame_equal(result, expected) - - # across midnight - rng = date_range('1/1/2000', '1/5/2000', freq='5min') - ts = DataFrame(np.random.randn(len(rng), 2), index=rng) - stime = time(22, 0) - etime = time(9, 0) - - close_open = product([True, False], [True, False]) - for inc_start, inc_end in close_open: - filtered = ts.between_time(stime, etime, inc_start, inc_end) - exp_len = (12 * 11 + 1) * 4 + 1 - if not inc_start: - exp_len -= 4 - if not inc_end: - exp_len -= 4 - - self.assertEqual(len(filtered), exp_len) - for rs in filtered.index: - t = rs.time() - if inc_start: - self.assertTrue((t >= stime) or (t <= etime)) - else: - self.assertTrue((t > stime) or (t <= etime)) - - if inc_end: - self.assertTrue((t <= etime) or (t >= stime)) - else: - self.assertTrue((t < etime) or (t >= stime)) - - def test_between_time_types(self): - # GH11818 - rng = date_range('1/1/2000', '1/5/2000', freq='5min') - self.assertRaises(ValueError, rng.indexer_between_time, - datetime(2010, 1, 2, 1), datetime(2010, 1, 2, 5)) - - frame = DataFrame({'A': 0}, index=rng) - self.assertRaises(ValueError, frame.between_time, - datetime(2010, 1, 2, 1), datetime(2010, 1, 2, 5)) - - series = Series(0, index=rng) - self.assertRaises(ValueError, series.between_time, - datetime(2010, 1, 2, 1), datetime(2010, 1, 2, 5)) - - def test_between_time_formats(self): - # GH11818 - _skip_if_has_locale() - - rng = date_range('1/1/2000', '1/5/2000', freq='5min') - ts = DataFrame(np.random.randn(len(rng), 2), index=rng) - - strings = [("2:00", "2:30"), ("0200", "0230"), ("2:00am", "2:30am"), - ("0200am", "0230am"), ("2:00:00", "2:30:00"), - ("020000", "023000"), ("2:00:00am", "2:30:00am"), - ("020000am", "023000am")] - expected_length = 28 - - for time_string in strings: - self.assertEqual(len(ts.between_time(*time_string)), - expected_length, - "%s - %s" % time_string) - - def test_dti_constructor_preserve_dti_freq(self): - rng = date_range('1/1/2000', '1/2/2000', freq='5min') - - rng2 = DatetimeIndex(rng) - self.assertEqual(rng.freq, rng2.freq) - - def test_dti_constructor_years_only(self): - # GH 6961 - for tz in [None, 'UTC', 'Asia/Tokyo', 'dateutil/US/Pacific']: - rng1 = date_range('2014', '2015', freq='M', tz=tz) - expected1 = date_range('2014-01-31', '2014-12-31', freq='M', tz=tz) - - rng2 = date_range('2014', '2015', freq='MS', tz=tz) - expected2 = date_range('2014-01-01', '2015-01-01', freq='MS', - tz=tz) - - rng3 = date_range('2014', '2020', freq='A', tz=tz) - expected3 = date_range('2014-12-31', '2019-12-31', freq='A', tz=tz) - - rng4 = date_range('2014', '2020', freq='AS', tz=tz) - expected4 = date_range('2014-01-01', '2020-01-01', freq='AS', - tz=tz) - - for rng, expected in [(rng1, expected1), (rng2, expected2), - (rng3, expected3), (rng4, expected4)]: - tm.assert_index_equal(rng, expected) - - def test_dti_constructor_small_int(self): - # GH 13721 - exp = DatetimeIndex(['1970-01-01 00:00:00.00000000', - '1970-01-01 00:00:00.00000001', - '1970-01-01 00:00:00.00000002']) - - for dtype in [np.int64, np.int32, np.int16, np.int8]: - arr = np.array([0, 10, 20], dtype=dtype) - tm.assert_index_equal(DatetimeIndex(arr), exp) - - def test_dti_constructor_numpy_timeunits(self): - # GH 9114 - base = pd.to_datetime(['2000-01-01T00:00', '2000-01-02T00:00', 'NaT']) - - for dtype in ['datetime64[h]', 'datetime64[m]', 'datetime64[s]', - 'datetime64[ms]', 'datetime64[us]', 'datetime64[ns]']: - values = base.values.astype(dtype) - - tm.assert_index_equal(DatetimeIndex(values), base) - tm.assert_index_equal(to_datetime(values), base) - - def test_normalize(self): - rng = date_range('1/1/2000 9:30', periods=10, freq='D') - - result = rng.normalize() - expected = date_range('1/1/2000', periods=10, freq='D') - tm.assert_index_equal(result, expected) - - rng_ns = pd.DatetimeIndex(np.array([1380585623454345752, - 1380585612343234312]).astype( - "datetime64[ns]")) - rng_ns_normalized = rng_ns.normalize() - expected = pd.DatetimeIndex(np.array([1380585600000000000, - 1380585600000000000]).astype( - "datetime64[ns]")) - tm.assert_index_equal(rng_ns_normalized, expected) - - self.assertTrue(result.is_normalized) - self.assertFalse(rng.is_normalized) - - def test_to_period(self): - from pandas.tseries.period import period_range - - ts = _simple_ts('1/1/2000', '1/1/2001') - - pts = ts.to_period() - exp = ts.copy() - exp.index = period_range('1/1/2000', '1/1/2001') - assert_series_equal(pts, exp) - - pts = ts.to_period('M') - exp.index = exp.index.asfreq('M') - tm.assert_index_equal(pts.index, exp.index.asfreq('M')) - assert_series_equal(pts, exp) - - # GH 7606 without freq - idx = DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03', - '2011-01-04']) - exp_idx = pd.PeriodIndex(['2011-01-01', '2011-01-02', '2011-01-03', - '2011-01-04'], freq='D') - - s = Series(np.random.randn(4), index=idx) - expected = s.copy() - expected.index = exp_idx - assert_series_equal(s.to_period(), expected) - - df = DataFrame(np.random.randn(4, 4), index=idx, columns=idx) - expected = df.copy() - expected.index = exp_idx - assert_frame_equal(df.to_period(), expected) - - expected = df.copy() - expected.columns = exp_idx - assert_frame_equal(df.to_period(axis=1), expected) - - def create_dt64_based_index(self): - data = [Timestamp('2007-01-01 10:11:12.123456Z'), - Timestamp('2007-01-01 10:11:13.789123Z')] - index = DatetimeIndex(data) - return index - - def test_to_period_millisecond(self): - index = self.create_dt64_based_index() - - period = index.to_period(freq='L') - self.assertEqual(2, len(period)) - self.assertEqual(period[0], Period('2007-01-01 10:11:12.123Z', 'L')) - self.assertEqual(period[1], Period('2007-01-01 10:11:13.789Z', 'L')) - - def test_to_period_microsecond(self): - index = self.create_dt64_based_index() - - period = index.to_period(freq='U') - self.assertEqual(2, len(period)) - self.assertEqual(period[0], Period('2007-01-01 10:11:12.123456Z', 'U')) - self.assertEqual(period[1], Period('2007-01-01 10:11:13.789123Z', 'U')) - - def test_to_period_tz_pytz(self): - tm._skip_if_no_pytz() - from dateutil.tz import tzlocal - from pytz import utc as UTC - - xp = date_range('1/1/2000', '4/1/2000').to_period() - - ts = date_range('1/1/2000', '4/1/2000', tz='US/Eastern') - - result = ts.to_period()[0] - expected = ts[0].to_period() - - self.assertEqual(result, expected) - tm.assert_index_equal(ts.to_period(), xp) - - ts = date_range('1/1/2000', '4/1/2000', tz=UTC) - - result = ts.to_period()[0] - expected = ts[0].to_period() - - self.assertEqual(result, expected) - tm.assert_index_equal(ts.to_period(), xp) - - ts = date_range('1/1/2000', '4/1/2000', tz=tzlocal()) - - result = ts.to_period()[0] - expected = ts[0].to_period() - - self.assertEqual(result, expected) - tm.assert_index_equal(ts.to_period(), xp) - - def test_to_period_tz_explicit_pytz(self): - tm._skip_if_no_pytz() - import pytz - from dateutil.tz import tzlocal - - xp = date_range('1/1/2000', '4/1/2000').to_period() - - ts = date_range('1/1/2000', '4/1/2000', tz=pytz.timezone('US/Eastern')) - - result = ts.to_period()[0] - expected = ts[0].to_period() - - self.assertTrue(result == expected) - tm.assert_index_equal(ts.to_period(), xp) - - ts = date_range('1/1/2000', '4/1/2000', tz=pytz.utc) - - result = ts.to_period()[0] - expected = ts[0].to_period() - - self.assertTrue(result == expected) - tm.assert_index_equal(ts.to_period(), xp) - - ts = date_range('1/1/2000', '4/1/2000', tz=tzlocal()) - - result = ts.to_period()[0] - expected = ts[0].to_period() - - self.assertTrue(result == expected) - tm.assert_index_equal(ts.to_period(), xp) - - def test_to_period_tz_dateutil(self): - tm._skip_if_no_dateutil() - import dateutil - from dateutil.tz import tzlocal - - xp = date_range('1/1/2000', '4/1/2000').to_period() - - ts = date_range('1/1/2000', '4/1/2000', tz='dateutil/US/Eastern') - - result = ts.to_period()[0] - expected = ts[0].to_period() - - self.assertTrue(result == expected) - tm.assert_index_equal(ts.to_period(), xp) - - ts = date_range('1/1/2000', '4/1/2000', tz=dateutil.tz.tzutc()) - - result = ts.to_period()[0] - expected = ts[0].to_period() - - self.assertTrue(result == expected) - tm.assert_index_equal(ts.to_period(), xp) - - ts = date_range('1/1/2000', '4/1/2000', tz=tzlocal()) - - result = ts.to_period()[0] - expected = ts[0].to_period() - - self.assertTrue(result == expected) - tm.assert_index_equal(ts.to_period(), xp) - - def test_frame_to_period(self): - K = 5 - from pandas.tseries.period import period_range - - dr = date_range('1/1/2000', '1/1/2001') - pr = period_range('1/1/2000', '1/1/2001') - df = DataFrame(randn(len(dr), K), index=dr) - df['mix'] = 'a' - - pts = df.to_period() - exp = df.copy() - exp.index = pr - assert_frame_equal(pts, exp) - - pts = df.to_period('M') - tm.assert_index_equal(pts.index, exp.index.asfreq('M')) - - df = df.T - pts = df.to_period(axis=1) - exp = df.copy() - exp.columns = pr - assert_frame_equal(pts, exp) - - pts = df.to_period('M', axis=1) - tm.assert_index_equal(pts.columns, exp.columns.asfreq('M')) - - self.assertRaises(ValueError, df.to_period, axis=2) - - def test_timestamp_fields(self): - # extra fields from DatetimeIndex like quarter and week - idx = tm.makeDateIndex(100) - - fields = ['dayofweek', 'dayofyear', 'week', 'weekofyear', 'quarter', - 'days_in_month', 'is_month_start', 'is_month_end', - 'is_quarter_start', 'is_quarter_end', 'is_year_start', - 'is_year_end', 'weekday_name'] - for f in fields: - expected = getattr(idx, f)[-1] - result = getattr(Timestamp(idx[-1]), f) - self.assertEqual(result, expected) - - self.assertEqual(idx.freq, Timestamp(idx[-1], idx.freq).freq) - self.assertEqual(idx.freqstr, Timestamp(idx[-1], idx.freq).freqstr) - - def test_woy_boundary(self): - # make sure weeks at year boundaries are correct - d = datetime(2013, 12, 31) - result = Timestamp(d).week - expected = 1 # ISO standard - self.assertEqual(result, expected) - - d = datetime(2008, 12, 28) - result = Timestamp(d).week - expected = 52 # ISO standard - self.assertEqual(result, expected) - - d = datetime(2009, 12, 31) - result = Timestamp(d).week - expected = 53 # ISO standard - self.assertEqual(result, expected) - - d = datetime(2010, 1, 1) - result = Timestamp(d).week - expected = 53 # ISO standard - self.assertEqual(result, expected) - - d = datetime(2010, 1, 3) - result = Timestamp(d).week - expected = 53 # ISO standard - self.assertEqual(result, expected) - - result = np.array([Timestamp(datetime(*args)).week - for args in [(2000, 1, 1), (2000, 1, 2), ( - 2005, 1, 1), (2005, 1, 2)]]) - self.assertTrue((result == [52, 52, 53, 53]).all()) - - def test_timestamp_date_out_of_range(self): - self.assertRaises(ValueError, Timestamp, '1676-01-01') - self.assertRaises(ValueError, Timestamp, '2263-01-01') - - # 1475 - self.assertRaises(ValueError, DatetimeIndex, ['1400-01-01']) - self.assertRaises(ValueError, DatetimeIndex, [datetime(1400, 1, 1)]) - - def test_compat_replace(self): - # https://github.com/statsmodels/statsmodels/issues/3349 - # replace should take ints/longs for compat - - for f in [compat.long, int]: - result = date_range(Timestamp('1960-04-01 00:00:00', - freq='QS-JAN'), - periods=f(76), - freq='QS-JAN') - self.assertEqual(len(result), 76) - - def test_timestamp_repr(self): - # pre-1900 - stamp = Timestamp('1850-01-01', tz='US/Eastern') - repr(stamp) - - iso8601 = '1850-01-01 01:23:45.012345' - stamp = Timestamp(iso8601, tz='US/Eastern') - result = repr(stamp) - self.assertIn(iso8601, result) - - def test_timestamp_from_ordinal(self): - - # GH 3042 - dt = datetime(2011, 4, 16, 0, 0) - ts = Timestamp.fromordinal(dt.toordinal()) - self.assertEqual(ts.to_pydatetime(), dt) - - # with a tzinfo - stamp = Timestamp('2011-4-16', tz='US/Eastern') - dt_tz = stamp.to_pydatetime() - ts = Timestamp.fromordinal(dt_tz.toordinal(), tz='US/Eastern') - self.assertEqual(ts.to_pydatetime(), dt_tz) - - def test_datetimeindex_integers_shift(self): - rng = date_range('1/1/2000', periods=20) - - result = rng + 5 - expected = rng.shift(5) - tm.assert_index_equal(result, expected) - - result = rng - 5 - expected = rng.shift(-5) - tm.assert_index_equal(result, expected) - - def test_astype_object(self): - # NumPy 1.6.1 weak ns support - rng = date_range('1/1/2000', periods=20) - - casted = rng.astype('O') - exp_values = list(rng) - - tm.assert_index_equal(casted, Index(exp_values, dtype=np.object_)) - self.assertEqual(casted.tolist(), exp_values) - - def test_catch_infinite_loop(self): - offset = offsets.DateOffset(minute=5) - # blow up, don't loop forever - self.assertRaises(Exception, date_range, datetime(2011, 11, 11), - datetime(2011, 11, 12), freq=offset) - - def test_append_concat(self): - rng = date_range('5/8/2012 1:45', periods=10, freq='5T') - ts = Series(np.random.randn(len(rng)), rng) - df = DataFrame(np.random.randn(len(rng), 4), index=rng) - - result = ts.append(ts) - result_df = df.append(df) - ex_index = DatetimeIndex(np.tile(rng.values, 2)) - tm.assert_index_equal(result.index, ex_index) - tm.assert_index_equal(result_df.index, ex_index) - - appended = rng.append(rng) - tm.assert_index_equal(appended, ex_index) - - appended = rng.append([rng, rng]) - ex_index = DatetimeIndex(np.tile(rng.values, 3)) - tm.assert_index_equal(appended, ex_index) - - # different index names - rng1 = rng.copy() - rng2 = rng.copy() - rng1.name = 'foo' - rng2.name = 'bar' - self.assertEqual(rng1.append(rng1).name, 'foo') - self.assertIsNone(rng1.append(rng2).name) - - def test_append_concat_tz(self): - # GH 2938 - tm._skip_if_no_pytz() - - rng = date_range('5/8/2012 1:45', periods=10, freq='5T', - tz='US/Eastern') - rng2 = date_range('5/8/2012 2:35', periods=10, freq='5T', - tz='US/Eastern') - rng3 = date_range('5/8/2012 1:45', periods=20, freq='5T', - tz='US/Eastern') - ts = Series(np.random.randn(len(rng)), rng) - df = DataFrame(np.random.randn(len(rng), 4), index=rng) - ts2 = Series(np.random.randn(len(rng2)), rng2) - df2 = DataFrame(np.random.randn(len(rng2), 4), index=rng2) - - result = ts.append(ts2) - result_df = df.append(df2) - tm.assert_index_equal(result.index, rng3) - tm.assert_index_equal(result_df.index, rng3) - - appended = rng.append(rng2) - tm.assert_index_equal(appended, rng3) - - def test_append_concat_tz_explicit_pytz(self): - # GH 2938 - tm._skip_if_no_pytz() - from pytz import timezone as timezone - - rng = date_range('5/8/2012 1:45', periods=10, freq='5T', - tz=timezone('US/Eastern')) - rng2 = date_range('5/8/2012 2:35', periods=10, freq='5T', - tz=timezone('US/Eastern')) - rng3 = date_range('5/8/2012 1:45', periods=20, freq='5T', - tz=timezone('US/Eastern')) - ts = Series(np.random.randn(len(rng)), rng) - df = DataFrame(np.random.randn(len(rng), 4), index=rng) - ts2 = Series(np.random.randn(len(rng2)), rng2) - df2 = DataFrame(np.random.randn(len(rng2), 4), index=rng2) - - result = ts.append(ts2) - result_df = df.append(df2) - tm.assert_index_equal(result.index, rng3) - tm.assert_index_equal(result_df.index, rng3) - - appended = rng.append(rng2) - tm.assert_index_equal(appended, rng3) - - def test_append_concat_tz_dateutil(self): - # GH 2938 - tm._skip_if_no_dateutil() - rng = date_range('5/8/2012 1:45', periods=10, freq='5T', - tz='dateutil/US/Eastern') - rng2 = date_range('5/8/2012 2:35', periods=10, freq='5T', - tz='dateutil/US/Eastern') - rng3 = date_range('5/8/2012 1:45', periods=20, freq='5T', - tz='dateutil/US/Eastern') - ts = Series(np.random.randn(len(rng)), rng) - df = DataFrame(np.random.randn(len(rng), 4), index=rng) - ts2 = Series(np.random.randn(len(rng2)), rng2) - df2 = DataFrame(np.random.randn(len(rng2), 4), index=rng2) - - result = ts.append(ts2) - result_df = df.append(df2) - tm.assert_index_equal(result.index, rng3) - tm.assert_index_equal(result_df.index, rng3) - - appended = rng.append(rng2) - tm.assert_index_equal(appended, rng3) - - def test_set_dataframe_column_ns_dtype(self): - x = DataFrame([datetime.now(), datetime.now()]) - self.assertEqual(x[0].dtype, np.dtype('M8[ns]')) - - def test_groupby_count_dateparseerror(self): - dr = date_range(start='1/1/2012', freq='5min', periods=10) - - # BAD Example, datetimes first - s = Series(np.arange(10), index=[dr, lrange(10)]) - grouped = s.groupby(lambda x: x[1] % 2 == 0) - result = grouped.count() - - s = Series(np.arange(10), index=[lrange(10), dr]) - grouped = s.groupby(lambda x: x[0] % 2 == 0) - expected = grouped.count() - - assert_series_equal(result, expected) - - def test_datetimeindex_repr_short(self): - dr = date_range(start='1/1/2012', periods=1) - repr(dr) - - dr = date_range(start='1/1/2012', periods=2) - repr(dr) - - dr = date_range(start='1/1/2012', periods=3) - repr(dr) - - def test_constructor_int64_nocopy(self): - # #1624 - arr = np.arange(1000, dtype=np.int64) - index = DatetimeIndex(arr) - - arr[50:100] = -1 - self.assertTrue((index.asi8[50:100] == -1).all()) - - arr = np.arange(1000, dtype=np.int64) - index = DatetimeIndex(arr, copy=True) - - arr[50:100] = -1 - self.assertTrue((index.asi8[50:100] != -1).all()) - - def test_series_interpolate_method_values(self): - # #1646 - ts = _simple_ts('1/1/2000', '1/20/2000') - ts[::2] = np.nan - - result = ts.interpolate(method='values') - exp = ts.interpolate() - assert_series_equal(result, exp) - - def test_frame_datetime64_handling_groupby(self): - # it works! - df = DataFrame([(3, np.datetime64('2012-07-03')), - (3, np.datetime64('2012-07-04'))], - columns=['a', 'date']) - result = df.groupby('a').first() - self.assertEqual(result['date'][3], Timestamp('2012-07-03')) - - def test_series_interpolate_intraday(self): - # #1698 - index = pd.date_range('1/1/2012', periods=4, freq='12D') - ts = pd.Series([0, 12, 24, 36], index) - new_index = index.append(index + pd.DateOffset(days=1)).sort_values() - - exp = ts.reindex(new_index).interpolate(method='time') - - index = pd.date_range('1/1/2012', periods=4, freq='12H') - ts = pd.Series([0, 12, 24, 36], index) - new_index = index.append(index + pd.DateOffset(hours=1)).sort_values() - result = ts.reindex(new_index).interpolate(method='time') - - self.assert_numpy_array_equal(result.values, exp.values) - - def test_frame_dict_constructor_datetime64_1680(self): - dr = date_range('1/1/2012', periods=10) - s = Series(dr, index=dr) - - # it works! - DataFrame({'a': 'foo', 'b': s}, index=dr) - DataFrame({'a': 'foo', 'b': s.values}, index=dr) - - def test_frame_datetime64_mixed_index_ctor_1681(self): - dr = date_range('2011/1/1', '2012/1/1', freq='W-FRI') - ts = Series(dr) - - # it works! - d = DataFrame({'A': 'foo', 'B': ts}, index=dr) - self.assertTrue(d['B'].isnull().all()) - - def test_frame_timeseries_to_records(self): - index = date_range('1/1/2000', periods=10) - df = DataFrame(np.random.randn(10, 3), index=index, - columns=['a', 'b', 'c']) - - result = df.to_records() - result['index'].dtype == 'M8[ns]' - - result = df.to_records(index=False) - - def test_frame_datetime64_duplicated(self): - dates = date_range('2010-07-01', end='2010-08-05') - - tst = DataFrame({'symbol': 'AAA', 'date': dates}) - result = tst.duplicated(['date', 'symbol']) - self.assertTrue((-result).all()) - - tst = DataFrame({'date': dates}) - result = tst.duplicated() - self.assertTrue((-result).all()) - - def test_timestamp_compare_with_early_datetime(self): - # e.g. datetime.min - stamp = Timestamp('2012-01-01') - - self.assertFalse(stamp == datetime.min) - self.assertFalse(stamp == datetime(1600, 1, 1)) - self.assertFalse(stamp == datetime(2700, 1, 1)) - self.assertNotEqual(stamp, datetime.min) - self.assertNotEqual(stamp, datetime(1600, 1, 1)) - self.assertNotEqual(stamp, datetime(2700, 1, 1)) - self.assertTrue(stamp > datetime(1600, 1, 1)) - self.assertTrue(stamp >= datetime(1600, 1, 1)) - self.assertTrue(stamp < datetime(2700, 1, 1)) - self.assertTrue(stamp <= datetime(2700, 1, 1)) - - def test_to_html_timestamp(self): - rng = date_range('2000-01-01', periods=10) - df = DataFrame(np.random.randn(10, 4), index=rng) - - result = df.to_html() - self.assertIn('2000-01-01', result) - - def test_to_csv_numpy_16_bug(self): - frame = DataFrame({'a': date_range('1/1/2000', periods=10)}) - - buf = StringIO() - frame.to_csv(buf) - - result = buf.getvalue() - self.assertIn('2000-01-01', result) - - def test_series_map_box_timestamps(self): - # #2689, #2627 - s = Series(date_range('1/1/2000', periods=10)) - - def f(x): - return (x.hour, x.day, x.month) - - # it works! - s.map(f) - s.apply(f) - DataFrame(s).applymap(f) - - def test_series_map_box_timedelta(self): - # GH 11349 - s = Series(timedelta_range('1 day 1 s', periods=5, freq='h')) - - def f(x): - return x.total_seconds() - - s.map(f) - s.apply(f) - DataFrame(s).applymap(f) - - def test_concat_datetime_datetime64_frame(self): - # #2624 - rows = [] - rows.append([datetime(2010, 1, 1), 1]) - rows.append([datetime(2010, 1, 2), 'hi']) - - df2_obj = DataFrame.from_records(rows, columns=['date', 'test']) - - ind = date_range(start="2000/1/1", freq="D", periods=10) - df1 = DataFrame({'date': ind, 'test': lrange(10)}) - - # it works! - pd.concat([df1, df2_obj]) - - def test_asfreq_resample_set_correct_freq(self): - # GH5613 - # we test if .asfreq() and .resample() set the correct value for .freq - df = pd.DataFrame({'date': ["2012-01-01", "2012-01-02", "2012-01-03"], - 'col': [1, 2, 3]}) - df = df.set_index(pd.to_datetime(df.date)) - - # testing the settings before calling .asfreq() and .resample() - self.assertEqual(df.index.freq, None) - self.assertEqual(df.index.inferred_freq, 'D') - - # does .asfreq() set .freq correctly? - self.assertEqual(df.asfreq('D').index.freq, 'D') - - # does .resample() set .freq correctly? - self.assertEqual(df.resample('D').asfreq().index.freq, 'D') - - def test_pickle(self): - - # GH4606 - p = self.round_trip_pickle(NaT) - self.assertTrue(p is NaT) - - idx = pd.to_datetime(['2013-01-01', NaT, '2014-01-06']) - idx_p = self.round_trip_pickle(idx) - self.assertTrue(idx_p[0] == idx[0]) - self.assertTrue(idx_p[1] is NaT) - self.assertTrue(idx_p[2] == idx[2]) - - # GH11002 - # don't infer freq - idx = date_range('1750-1-1', '2050-1-1', freq='7D') - idx_p = self.round_trip_pickle(idx) - tm.assert_index_equal(idx, idx_p) - - def test_timestamp_equality(self): - - # GH 11034 - s = Series([Timestamp('2000-01-29 01:59:00'), 'NaT']) - result = s != s - assert_series_equal(result, Series([False, True])) - result = s != s[0] - assert_series_equal(result, Series([False, True])) - result = s != s[1] - assert_series_equal(result, Series([True, True])) - - result = s == s - assert_series_equal(result, Series([True, False])) - result = s == s[0] - assert_series_equal(result, Series([True, False])) - result = s == s[1] - assert_series_equal(result, Series([False, False])) - - -def _simple_ts(start, end, freq='D'): - rng = date_range(start, end, freq=freq) - return Series(np.random.randn(len(rng)), index=rng) - - -class TestToDatetime(tm.TestCase): - _multiprocess_can_split_ = True - - def test_to_datetime_dt64s(self): - in_bound_dts = [ - np.datetime64('2000-01-01'), - np.datetime64('2000-01-02'), - ] - - for dt in in_bound_dts: - self.assertEqual(pd.to_datetime(dt), Timestamp(dt)) - - oob_dts = [np.datetime64('1000-01-01'), np.datetime64('5000-01-02'), ] - - for dt in oob_dts: - self.assertRaises(ValueError, pd.to_datetime, dt, errors='raise') - self.assertRaises(ValueError, tslib.Timestamp, dt) - self.assertIs(pd.to_datetime(dt, errors='coerce'), NaT) - - def test_to_datetime_array_of_dt64s(self): - dts = [np.datetime64('2000-01-01'), np.datetime64('2000-01-02'), ] - - # Assuming all datetimes are in bounds, to_datetime() returns - # an array that is equal to Timestamp() parsing - self.assert_numpy_array_equal( - pd.to_datetime(dts, box=False), - np.array([Timestamp(x).asm8 for x in dts]) - ) - - # A list of datetimes where the last one is out of bounds - dts_with_oob = dts + [np.datetime64('9999-01-01')] - - self.assertRaises(ValueError, pd.to_datetime, dts_with_oob, - errors='raise') - - self.assert_numpy_array_equal( - pd.to_datetime(dts_with_oob, box=False, errors='coerce'), - np.array( - [ - Timestamp(dts_with_oob[0]).asm8, - Timestamp(dts_with_oob[1]).asm8, - iNaT, - ], - dtype='M8' - ) - ) - - # With errors='ignore', out of bounds datetime64s - # are converted to their .item(), which depending on the version of - # numpy is either a python datetime.datetime or datetime.date - self.assert_numpy_array_equal( - pd.to_datetime(dts_with_oob, box=False, errors='ignore'), - np.array( - [dt.item() for dt in dts_with_oob], - dtype='O' - ) - ) - - def test_to_datetime_tz(self): - - # xref 8260 - # uniform returns a DatetimeIndex - arr = [pd.Timestamp('2013-01-01 13:00:00-0800', tz='US/Pacific'), - pd.Timestamp('2013-01-02 14:00:00-0800', tz='US/Pacific')] - result = pd.to_datetime(arr) - expected = DatetimeIndex( - ['2013-01-01 13:00:00', '2013-01-02 14:00:00'], tz='US/Pacific') - tm.assert_index_equal(result, expected) - - # mixed tzs will raise - arr = [pd.Timestamp('2013-01-01 13:00:00', tz='US/Pacific'), - pd.Timestamp('2013-01-02 14:00:00', tz='US/Eastern')] - self.assertRaises(ValueError, lambda: pd.to_datetime(arr)) - - def test_to_datetime_tz_pytz(self): - - # xref 8260 - tm._skip_if_no_pytz() - import pytz - - us_eastern = pytz.timezone('US/Eastern') - arr = np.array([us_eastern.localize(datetime(year=2000, month=1, day=1, - hour=3, minute=0)), - us_eastern.localize(datetime(year=2000, month=6, day=1, - hour=3, minute=0))], - dtype=object) - result = pd.to_datetime(arr, utc=True) - expected = DatetimeIndex(['2000-01-01 08:00:00+00:00', - '2000-06-01 07:00:00+00:00'], - dtype='datetime64[ns, UTC]', freq=None) - tm.assert_index_equal(result, expected) - - def test_to_datetime_utc_is_true(self): - # See gh-11934 - start = pd.Timestamp('2014-01-01', tz='utc') - end = pd.Timestamp('2014-01-03', tz='utc') - date_range = pd.bdate_range(start, end) - - result = pd.to_datetime(date_range, utc=True) - expected = pd.DatetimeIndex(data=date_range) - tm.assert_index_equal(result, expected) - - def test_to_datetime_tz_psycopg2(self): - - # xref 8260 - try: - import psycopg2 - except ImportError: - raise nose.SkipTest("no psycopg2 installed") - - # misc cases - tz1 = psycopg2.tz.FixedOffsetTimezone(offset=-300, name=None) - tz2 = psycopg2.tz.FixedOffsetTimezone(offset=-240, name=None) - arr = np.array([datetime(2000, 1, 1, 3, 0, tzinfo=tz1), - datetime(2000, 6, 1, 3, 0, tzinfo=tz2)], - dtype=object) - - result = pd.to_datetime(arr, errors='coerce', utc=True) - expected = DatetimeIndex(['2000-01-01 08:00:00+00:00', - '2000-06-01 07:00:00+00:00'], - dtype='datetime64[ns, UTC]', freq=None) - tm.assert_index_equal(result, expected) - - # dtype coercion - i = pd.DatetimeIndex([ - '2000-01-01 08:00:00+00:00' - ], tz=psycopg2.tz.FixedOffsetTimezone(offset=-300, name=None)) - self.assertTrue(is_datetime64_ns_dtype(i)) - - # tz coerceion - result = pd.to_datetime(i, errors='coerce') - tm.assert_index_equal(result, i) - - result = pd.to_datetime(i, errors='coerce', utc=True) - expected = pd.DatetimeIndex(['2000-01-01 13:00:00'], - dtype='datetime64[ns, UTC]') - tm.assert_index_equal(result, expected) - - def test_datetime_bool(self): - # GH13176 - with self.assertRaises(TypeError): - to_datetime(False) - self.assertTrue(to_datetime(False, errors="coerce") is tslib.NaT) - self.assertEqual(to_datetime(False, errors="ignore"), False) - with self.assertRaises(TypeError): - to_datetime(True) - self.assertTrue(to_datetime(True, errors="coerce") is tslib.NaT) - self.assertEqual(to_datetime(True, errors="ignore"), True) - with self.assertRaises(TypeError): - to_datetime([False, datetime.today()]) - with self.assertRaises(TypeError): - to_datetime(['20130101', True]) - tm.assert_index_equal(to_datetime([0, False, tslib.NaT, 0.0], - errors="coerce"), - DatetimeIndex([to_datetime(0), tslib.NaT, - tslib.NaT, to_datetime(0)])) - - def test_datetime_invalid_datatype(self): - # GH13176 - - with self.assertRaises(TypeError): - pd.to_datetime(bool) - with self.assertRaises(TypeError): - pd.to_datetime(pd.to_datetime) - - def test_unit(self): - # GH 11758 - # test proper behavior with erros - - with self.assertRaises(ValueError): - to_datetime([1], unit='D', format='%Y%m%d') - - values = [11111111, 1, 1.0, tslib.iNaT, pd.NaT, np.nan, - 'NaT', ''] - result = to_datetime(values, unit='D', errors='ignore') - expected = Index([11111111, Timestamp('1970-01-02'), - Timestamp('1970-01-02'), pd.NaT, - pd.NaT, pd.NaT, pd.NaT, pd.NaT], - dtype=object) - tm.assert_index_equal(result, expected) - - result = to_datetime(values, unit='D', errors='coerce') - expected = DatetimeIndex(['NaT', '1970-01-02', '1970-01-02', - 'NaT', 'NaT', 'NaT', 'NaT', 'NaT']) - tm.assert_index_equal(result, expected) - - with self.assertRaises(tslib.OutOfBoundsDatetime): - to_datetime(values, unit='D', errors='raise') - - values = [1420043460000, tslib.iNaT, pd.NaT, np.nan, 'NaT'] - - result = to_datetime(values, errors='ignore', unit='s') - expected = Index([1420043460000, pd.NaT, pd.NaT, - pd.NaT, pd.NaT], dtype=object) - tm.assert_index_equal(result, expected) - - result = to_datetime(values, errors='coerce', unit='s') - expected = DatetimeIndex(['NaT', 'NaT', 'NaT', 'NaT', 'NaT']) - tm.assert_index_equal(result, expected) - - with self.assertRaises(tslib.OutOfBoundsDatetime): - to_datetime(values, errors='raise', unit='s') - - # if we have a string, then we raise a ValueError - # and NOT an OutOfBoundsDatetime - for val in ['foo', Timestamp('20130101')]: - try: - to_datetime(val, errors='raise', unit='s') - except tslib.OutOfBoundsDatetime: - raise AssertionError("incorrect exception raised") - except ValueError: - pass - - def test_unit_consistency(self): - - # consistency of conversions - expected = Timestamp('1970-05-09 14:25:11') - result = pd.to_datetime(11111111, unit='s', errors='raise') - self.assertEqual(result, expected) - self.assertIsInstance(result, Timestamp) - - result = pd.to_datetime(11111111, unit='s', errors='coerce') - self.assertEqual(result, expected) - self.assertIsInstance(result, Timestamp) - - result = pd.to_datetime(11111111, unit='s', errors='ignore') - self.assertEqual(result, expected) - self.assertIsInstance(result, Timestamp) - - def test_unit_with_numeric(self): - - # GH 13180 - # coercions from floats/ints are ok - expected = DatetimeIndex(['2015-06-19 05:33:20', - '2015-05-27 22:33:20']) - arr1 = [1.434692e+18, 1.432766e+18] - arr2 = np.array(arr1).astype('int64') - for errors in ['ignore', 'raise', 'coerce']: - result = pd.to_datetime(arr1, errors=errors) - tm.assert_index_equal(result, expected) - - result = pd.to_datetime(arr2, errors=errors) - tm.assert_index_equal(result, expected) - - # but we want to make sure that we are coercing - # if we have ints/strings - expected = DatetimeIndex(['NaT', - '2015-06-19 05:33:20', - '2015-05-27 22:33:20']) - arr = ['foo', 1.434692e+18, 1.432766e+18] - result = pd.to_datetime(arr, errors='coerce') - tm.assert_index_equal(result, expected) - - expected = DatetimeIndex(['2015-06-19 05:33:20', - '2015-05-27 22:33:20', - 'NaT', - 'NaT']) - arr = [1.434692e+18, 1.432766e+18, 'foo', 'NaT'] - result = pd.to_datetime(arr, errors='coerce') - tm.assert_index_equal(result, expected) - - def test_unit_mixed(self): - - # mixed integers/datetimes - expected = DatetimeIndex(['2013-01-01', 'NaT', 'NaT']) - arr = [pd.Timestamp('20130101'), 1.434692e+18, 1.432766e+18] - result = pd.to_datetime(arr, errors='coerce') - tm.assert_index_equal(result, expected) - - with self.assertRaises(ValueError): - pd.to_datetime(arr, errors='raise') - - expected = DatetimeIndex(['NaT', - 'NaT', - '2013-01-01']) - arr = [1.434692e+18, 1.432766e+18, pd.Timestamp('20130101')] - result = pd.to_datetime(arr, errors='coerce') - tm.assert_index_equal(result, expected) - - with self.assertRaises(ValueError): - pd.to_datetime(arr, errors='raise') - - def test_index_to_datetime(self): - idx = Index(['1/1/2000', '1/2/2000', '1/3/2000']) - - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): - result = idx.to_datetime() - expected = DatetimeIndex(pd.to_datetime(idx.values)) - tm.assert_index_equal(result, expected) - - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): - today = datetime.today() - idx = Index([today], dtype=object) - result = idx.to_datetime() - expected = DatetimeIndex([today]) - tm.assert_index_equal(result, expected) - - def test_dataframe(self): - - df = DataFrame({'year': [2015, 2016], - 'month': [2, 3], - 'day': [4, 5], - 'hour': [6, 7], - 'minute': [58, 59], - 'second': [10, 11], - 'ms': [1, 1], - 'us': [2, 2], - 'ns': [3, 3]}) - - result = to_datetime({'year': df['year'], - 'month': df['month'], - 'day': df['day']}) - expected = Series([Timestamp('20150204 00:00:00'), - Timestamp('20160305 00:0:00')]) - assert_series_equal(result, expected) - - # dict-like - result = to_datetime(df[['year', 'month', 'day']].to_dict()) - assert_series_equal(result, expected) - - # dict but with constructable - df2 = df[['year', 'month', 'day']].to_dict() - df2['month'] = 2 - result = to_datetime(df2) - expected2 = Series([Timestamp('20150204 00:00:00'), - Timestamp('20160205 00:0:00')]) - assert_series_equal(result, expected2) - - # unit mappings - units = [{'year': 'years', - 'month': 'months', - 'day': 'days', - 'hour': 'hours', - 'minute': 'minutes', - 'second': 'seconds'}, - {'year': 'year', - 'month': 'month', - 'day': 'day', - 'hour': 'hour', - 'minute': 'minute', - 'second': 'second'}, - ] - - for d in units: - result = to_datetime(df[list(d.keys())].rename(columns=d)) - expected = Series([Timestamp('20150204 06:58:10'), - Timestamp('20160305 07:59:11')]) - assert_series_equal(result, expected) - - d = {'year': 'year', - 'month': 'month', - 'day': 'day', - 'hour': 'hour', - 'minute': 'minute', - 'second': 'second', - 'ms': 'ms', - 'us': 'us', - 'ns': 'ns'} - - result = to_datetime(df.rename(columns=d)) - expected = Series([Timestamp('20150204 06:58:10.001002003'), - Timestamp('20160305 07:59:11.001002003')]) - assert_series_equal(result, expected) - - # coerce back to int - result = to_datetime(df.astype(str)) - assert_series_equal(result, expected) - - # passing coerce - df2 = DataFrame({'year': [2015, 2016], - 'month': [2, 20], - 'day': [4, 5]}) - with self.assertRaises(ValueError): - to_datetime(df2) - result = to_datetime(df2, errors='coerce') - expected = Series([Timestamp('20150204 00:00:00'), - pd.NaT]) - assert_series_equal(result, expected) - - # extra columns - with self.assertRaises(ValueError): - df2 = df.copy() - df2['foo'] = 1 - to_datetime(df2) - - # not enough - for c in [['year'], - ['year', 'month'], - ['year', 'month', 'second'], - ['month', 'day'], - ['year', 'day', 'second']]: - with self.assertRaises(ValueError): - to_datetime(df[c]) - - # duplicates - df2 = DataFrame({'year': [2015, 2016], - 'month': [2, 20], - 'day': [4, 5]}) - df2.columns = ['year', 'year', 'day'] - with self.assertRaises(ValueError): - to_datetime(df2) - - df2 = DataFrame({'year': [2015, 2016], - 'month': [2, 20], - 'day': [4, 5], - 'hour': [4, 5]}) - df2.columns = ['year', 'month', 'day', 'day'] - with self.assertRaises(ValueError): - to_datetime(df2) - - def test_dataframe_dtypes(self): - # #13451 - df = DataFrame({'year': [2015, 2016], - 'month': [2, 3], - 'day': [4, 5]}) - - # int16 - result = to_datetime(df.astype('int16')) - expected = Series([Timestamp('20150204 00:00:00'), - Timestamp('20160305 00:00:00')]) - assert_series_equal(result, expected) - - # mixed dtypes - df['month'] = df['month'].astype('int8') - df['day'] = df['day'].astype('int8') - result = to_datetime(df) - expected = Series([Timestamp('20150204 00:00:00'), - Timestamp('20160305 00:00:00')]) - assert_series_equal(result, expected) - - # float - df = DataFrame({'year': [2000, 2001], - 'month': [1.5, 1], - 'day': [1, 1]}) - with self.assertRaises(ValueError): - to_datetime(df) - - -class TestDatetime64(tm.TestCase): - """ - Also test support for datetime64[ns] in Series / DataFrame - """ - - def setUp(self): - dti = DatetimeIndex(start=datetime(2005, 1, 1), - end=datetime(2005, 1, 10), freq='Min') - self.series = Series(rand(len(dti)), dti) - - def test_fancy_getitem(self): - dti = DatetimeIndex(freq='WOM-1FRI', start=datetime(2005, 1, 1), - end=datetime(2010, 1, 1)) - - s = Series(np.arange(len(dti)), index=dti) - - self.assertEqual(s[48], 48) - self.assertEqual(s['1/2/2009'], 48) - self.assertEqual(s['2009-1-2'], 48) - self.assertEqual(s[datetime(2009, 1, 2)], 48) - self.assertEqual(s[lib.Timestamp(datetime(2009, 1, 2))], 48) - self.assertRaises(KeyError, s.__getitem__, '2009-1-3') - - assert_series_equal(s['3/6/2009':'2009-06-05'], - s[datetime(2009, 3, 6):datetime(2009, 6, 5)]) - - def test_fancy_setitem(self): - dti = DatetimeIndex(freq='WOM-1FRI', start=datetime(2005, 1, 1), - end=datetime(2010, 1, 1)) - - s = Series(np.arange(len(dti)), index=dti) - s[48] = -1 - self.assertEqual(s[48], -1) - s['1/2/2009'] = -2 - self.assertEqual(s[48], -2) - s['1/2/2009':'2009-06-05'] = -3 - self.assertTrue((s[48:54] == -3).all()) - - def test_dti_snap(self): - dti = DatetimeIndex(['1/1/2002', '1/2/2002', '1/3/2002', '1/4/2002', - '1/5/2002', '1/6/2002', '1/7/2002'], freq='D') - - res = dti.snap(freq='W-MON') - exp = date_range('12/31/2001', '1/7/2002', freq='w-mon') - exp = exp.repeat([3, 4]) - self.assertTrue((res == exp).all()) - - res = dti.snap(freq='B') - - exp = date_range('1/1/2002', '1/7/2002', freq='b') - exp = exp.repeat([1, 1, 1, 2, 2]) - self.assertTrue((res == exp).all()) - - def test_dti_reset_index_round_trip(self): - dti = DatetimeIndex(start='1/1/2001', end='6/1/2001', freq='D') - d1 = DataFrame({'v': np.random.rand(len(dti))}, index=dti) - d2 = d1.reset_index() - self.assertEqual(d2.dtypes[0], np.dtype('M8[ns]')) - d3 = d2.set_index('index') - assert_frame_equal(d1, d3, check_names=False) - - # #2329 - stamp = datetime(2012, 11, 22) - df = DataFrame([[stamp, 12.1]], columns=['Date', 'Value']) - df = df.set_index('Date') - - self.assertEqual(df.index[0], stamp) - self.assertEqual(df.reset_index()['Date'][0], stamp) - - def test_series_set_value(self): - # #1561 - - dates = [datetime(2001, 1, 1), datetime(2001, 1, 2)] - index = DatetimeIndex(dates) - - s = Series().set_value(dates[0], 1.) - s2 = s.set_value(dates[1], np.nan) - - exp = Series([1., np.nan], index=index) - - assert_series_equal(s2, exp) - - # s = Series(index[:1], index[:1]) - # s2 = s.set_value(dates[1], index[1]) - # self.assertEqual(s2.values.dtype, 'M8[ns]') - - @slow - def test_slice_locs_indexerror(self): - times = [datetime(2000, 1, 1) + timedelta(minutes=i * 10) - for i in range(100000)] - s = Series(lrange(100000), times) - s.loc[datetime(1900, 1, 1):datetime(2100, 1, 1)] - - def test_slicing_datetimes(self): - - # GH 7523 - - # unique - df = DataFrame(np.arange(4., dtype='float64'), - index=[datetime(2001, 1, i, 10, 00) - for i in [1, 2, 3, 4]]) - result = df.loc[datetime(2001, 1, 1, 10):] - assert_frame_equal(result, df) - result = df.loc[:datetime(2001, 1, 4, 10)] - assert_frame_equal(result, df) - result = df.loc[datetime(2001, 1, 1, 10):datetime(2001, 1, 4, 10)] - assert_frame_equal(result, df) - - result = df.loc[datetime(2001, 1, 1, 11):] - expected = df.iloc[1:] - assert_frame_equal(result, expected) - result = df.loc['20010101 11':] - assert_frame_equal(result, expected) - - # duplicates - df = pd.DataFrame(np.arange(5., dtype='float64'), - index=[datetime(2001, 1, i, 10, 00) - for i in [1, 2, 2, 3, 4]]) - - result = df.loc[datetime(2001, 1, 1, 10):] - assert_frame_equal(result, df) - result = df.loc[:datetime(2001, 1, 4, 10)] - assert_frame_equal(result, df) - result = df.loc[datetime(2001, 1, 1, 10):datetime(2001, 1, 4, 10)] - assert_frame_equal(result, df) - - result = df.loc[datetime(2001, 1, 1, 11):] - expected = df.iloc[1:] - assert_frame_equal(result, expected) - result = df.loc['20010101 11':] - assert_frame_equal(result, expected) - - -class TestSeriesDatetime64(tm.TestCase): - def setUp(self): - self.series = Series(date_range('1/1/2000', periods=10)) - - def test_auto_conversion(self): - series = Series(list(date_range('1/1/2000', periods=10))) - self.assertEqual(series.dtype, 'M8[ns]') - - def test_constructor_cant_cast_datetime64(self): - msg = "Cannot cast datetime64 to " - with tm.assertRaisesRegexp(TypeError, msg): - Series(date_range('1/1/2000', periods=10), dtype=float) - - with tm.assertRaisesRegexp(TypeError, msg): - Series(date_range('1/1/2000', periods=10), dtype=int) - - def test_constructor_cast_object(self): - s = Series(date_range('1/1/2000', periods=10), dtype=object) - exp = Series(date_range('1/1/2000', periods=10)) - tm.assert_series_equal(s, exp) - - def test_series_comparison_scalars(self): - val = datetime(2000, 1, 4) - result = self.series > val - expected = Series([x > val for x in self.series]) - self.assert_series_equal(result, expected) - - val = self.series[5] - result = self.series > val - expected = Series([x > val for x in self.series]) - self.assert_series_equal(result, expected) - - def test_between(self): - left, right = self.series[[2, 7]] - - result = self.series.between(left, right) - expected = (self.series >= left) & (self.series <= right) - assert_series_equal(result, expected) - - # --------------------------------------------------------------------- - # NaT support - - def test_NaT_scalar(self): - series = Series([0, 1000, 2000, iNaT], dtype='M8[ns]') - - val = series[3] - self.assertTrue(com.isnull(val)) - - series[2] = val - self.assertTrue(com.isnull(series[2])) - - def test_NaT_cast(self): - # GH10747 - result = Series([np.nan]).astype('M8[ns]') - expected = Series([NaT]) - assert_series_equal(result, expected) - - def test_set_none_nan(self): - self.series[3] = None - self.assertIs(self.series[3], NaT) - - self.series[3:5] = None - self.assertIs(self.series[4], NaT) - - self.series[5] = np.nan - self.assertIs(self.series[5], NaT) - - self.series[5:7] = np.nan - self.assertIs(self.series[6], NaT) - - def test_intercept_astype_object(self): - - # this test no longer makes sense as series is by default already - # M8[ns] - expected = self.series.astype('object') - - df = DataFrame({'a': self.series, - 'b': np.random.randn(len(self.series))}) - exp_dtypes = pd.Series([np.dtype('datetime64[ns]'), - np.dtype('float64')], index=['a', 'b']) - tm.assert_series_equal(df.dtypes, exp_dtypes) - - result = df.values.squeeze() - self.assertTrue((result[:, 0] == expected.values).all()) - - df = DataFrame({'a': self.series, 'b': ['foo'] * len(self.series)}) - - result = df.values.squeeze() - self.assertTrue((result[:, 0] == expected.values).all()) - - def test_nat_operations(self): - # GH 8617 - s = Series([0, pd.NaT], dtype='m8[ns]') - exp = s[0] - self.assertEqual(s.median(), exp) - self.assertEqual(s.min(), exp) - self.assertEqual(s.max(), exp) - - def test_round_nat(self): - # GH14940 - s = Series([pd.NaT]) - expected = Series(pd.NaT) - for method in ["round", "floor", "ceil"]: - round_method = getattr(s.dt, method) - for freq in ["s", "5s", "min", "5min", "h", "5h"]: - assert_series_equal(round_method(freq), expected) - - -class TestTimestamp(tm.TestCase): - def test_class_ops_pytz(self): - tm._skip_if_no_pytz() - from pytz import timezone - - def compare(x, y): - self.assertEqual(int(Timestamp(x).value / 1e9), - int(Timestamp(y).value / 1e9)) - - compare(Timestamp.now(), datetime.now()) - compare(Timestamp.now('UTC'), datetime.now(timezone('UTC'))) - compare(Timestamp.utcnow(), datetime.utcnow()) - compare(Timestamp.today(), datetime.today()) - current_time = calendar.timegm(datetime.now().utctimetuple()) - compare(Timestamp.utcfromtimestamp(current_time), - datetime.utcfromtimestamp(current_time)) - compare(Timestamp.fromtimestamp(current_time), - datetime.fromtimestamp(current_time)) - - date_component = datetime.utcnow() - time_component = (date_component + timedelta(minutes=10)).time() - compare(Timestamp.combine(date_component, time_component), - datetime.combine(date_component, time_component)) - - def test_class_ops_dateutil(self): - tm._skip_if_no_dateutil() - from dateutil.tz import tzutc - - def compare(x, y): - self.assertEqual(int(np.round(Timestamp(x).value / 1e9)), - int(np.round(Timestamp(y).value / 1e9))) - - compare(Timestamp.now(), datetime.now()) - compare(Timestamp.now('UTC'), datetime.now(tzutc())) - compare(Timestamp.utcnow(), datetime.utcnow()) - compare(Timestamp.today(), datetime.today()) - current_time = calendar.timegm(datetime.now().utctimetuple()) - compare(Timestamp.utcfromtimestamp(current_time), - datetime.utcfromtimestamp(current_time)) - compare(Timestamp.fromtimestamp(current_time), - datetime.fromtimestamp(current_time)) - - date_component = datetime.utcnow() - time_component = (date_component + timedelta(minutes=10)).time() - compare(Timestamp.combine(date_component, time_component), - datetime.combine(date_component, time_component)) - - def test_basics_nanos(self): - val = np.int64(946684800000000000).view('M8[ns]') - stamp = Timestamp(val.view('i8') + 500) - self.assertEqual(stamp.year, 2000) - self.assertEqual(stamp.month, 1) - self.assertEqual(stamp.microsecond, 0) - self.assertEqual(stamp.nanosecond, 500) - - # GH 14415 - val = np.iinfo(np.int64).min + 80000000000000 - stamp = Timestamp(val) - self.assertEqual(stamp.year, 1677) - self.assertEqual(stamp.month, 9) - self.assertEqual(stamp.day, 21) - self.assertEqual(stamp.microsecond, 145224) - self.assertEqual(stamp.nanosecond, 192) - - def test_unit(self): - - def check(val, unit=None, h=1, s=1, us=0): - stamp = Timestamp(val, unit=unit) - self.assertEqual(stamp.year, 2000) - self.assertEqual(stamp.month, 1) - self.assertEqual(stamp.day, 1) - self.assertEqual(stamp.hour, h) - if unit != 'D': - self.assertEqual(stamp.minute, 1) - self.assertEqual(stamp.second, s) - self.assertEqual(stamp.microsecond, us) - else: - self.assertEqual(stamp.minute, 0) - self.assertEqual(stamp.second, 0) - self.assertEqual(stamp.microsecond, 0) - self.assertEqual(stamp.nanosecond, 0) - - ts = Timestamp('20000101 01:01:01') - val = ts.value - days = (ts - Timestamp('1970-01-01')).days - - check(val) - check(val / long(1000), unit='us') - check(val / long(1000000), unit='ms') - check(val / long(1000000000), unit='s') - check(days, unit='D', h=0) - - # using truediv, so these are like floats - if compat.PY3: - check((val + 500000) / long(1000000000), unit='s', us=500) - check((val + 500000000) / long(1000000000), unit='s', us=500000) - check((val + 500000) / long(1000000), unit='ms', us=500) - - # get chopped in py2 - else: - check((val + 500000) / long(1000000000), unit='s') - check((val + 500000000) / long(1000000000), unit='s') - check((val + 500000) / long(1000000), unit='ms') - - # ok - check((val + 500000) / long(1000), unit='us', us=500) - check((val + 500000000) / long(1000000), unit='ms', us=500000) - - # floats - check(val / 1000.0 + 5, unit='us', us=5) - check(val / 1000.0 + 5000, unit='us', us=5000) - check(val / 1000000.0 + 0.5, unit='ms', us=500) - check(val / 1000000.0 + 0.005, unit='ms', us=5) - check(val / 1000000000.0 + 0.5, unit='s', us=500000) - check(days + 0.5, unit='D', h=12) - - # nan - result = Timestamp(np.nan) - self.assertIs(result, NaT) - - result = Timestamp(None) - self.assertIs(result, NaT) - - result = Timestamp(iNaT) - self.assertIs(result, NaT) - - result = Timestamp(NaT) - self.assertIs(result, NaT) - - result = Timestamp('NaT') - self.assertIs(result, NaT) - - self.assertTrue(isnull(Timestamp('nat'))) - - def test_roundtrip(self): - - # test value to string and back conversions - # further test accessors - base = Timestamp('20140101 00:00:00') - - result = Timestamp(base.value + pd.Timedelta('5ms').value) - self.assertEqual(result, Timestamp(str(base) + ".005000")) - self.assertEqual(result.microsecond, 5000) - - result = Timestamp(base.value + pd.Timedelta('5us').value) - self.assertEqual(result, Timestamp(str(base) + ".000005")) - self.assertEqual(result.microsecond, 5) - - result = Timestamp(base.value + pd.Timedelta('5ns').value) - self.assertEqual(result, Timestamp(str(base) + ".000000005")) - self.assertEqual(result.nanosecond, 5) - self.assertEqual(result.microsecond, 0) - - result = Timestamp(base.value + pd.Timedelta('6ms 5us').value) - self.assertEqual(result, Timestamp(str(base) + ".006005")) - self.assertEqual(result.microsecond, 5 + 6 * 1000) - - result = Timestamp(base.value + pd.Timedelta('200ms 5us').value) - self.assertEqual(result, Timestamp(str(base) + ".200005")) - self.assertEqual(result.microsecond, 5 + 200 * 1000) - - def test_comparison(self): - # 5-18-2012 00:00:00.000 - stamp = long(1337299200000000000) - - val = Timestamp(stamp) - - self.assertEqual(val, val) - self.assertFalse(val != val) - self.assertFalse(val < val) - self.assertTrue(val <= val) - self.assertFalse(val > val) - self.assertTrue(val >= val) - - other = datetime(2012, 5, 18) - self.assertEqual(val, other) - self.assertFalse(val != other) - self.assertFalse(val < other) - self.assertTrue(val <= other) - self.assertFalse(val > other) - self.assertTrue(val >= other) - - other = Timestamp(stamp + 100) - - self.assertNotEqual(val, other) - self.assertNotEqual(val, other) - self.assertTrue(val < other) - self.assertTrue(val <= other) - self.assertTrue(other > val) - self.assertTrue(other >= val) - - def test_compare_invalid(self): - - # GH 8058 - val = Timestamp('20130101 12:01:02') - self.assertFalse(val == 'foo') - self.assertFalse(val == 10.0) - self.assertFalse(val == 1) - self.assertFalse(val == long(1)) - self.assertFalse(val == []) - self.assertFalse(val == {'foo': 1}) - self.assertFalse(val == np.float64(1)) - self.assertFalse(val == np.int64(1)) - - self.assertTrue(val != 'foo') - self.assertTrue(val != 10.0) - self.assertTrue(val != 1) - self.assertTrue(val != long(1)) - self.assertTrue(val != []) - self.assertTrue(val != {'foo': 1}) - self.assertTrue(val != np.float64(1)) - self.assertTrue(val != np.int64(1)) - - # ops testing - df = DataFrame(randn(5, 2)) - a = df[0] - b = Series(randn(5)) - b.name = Timestamp('2000-01-01') - tm.assert_series_equal(a / b, 1 / (b / a)) - - def test_cant_compare_tz_naive_w_aware(self): - tm._skip_if_no_pytz() - # #1404 - a = Timestamp('3/12/2012') - b = Timestamp('3/12/2012', tz='utc') - - self.assertRaises(Exception, a.__eq__, b) - self.assertRaises(Exception, a.__ne__, b) - self.assertRaises(Exception, a.__lt__, b) - self.assertRaises(Exception, a.__gt__, b) - self.assertRaises(Exception, b.__eq__, a) - self.assertRaises(Exception, b.__ne__, a) - self.assertRaises(Exception, b.__lt__, a) - self.assertRaises(Exception, b.__gt__, a) - - if sys.version_info < (3, 3): - self.assertRaises(Exception, a.__eq__, b.to_pydatetime()) - self.assertRaises(Exception, a.to_pydatetime().__eq__, b) - else: - self.assertFalse(a == b.to_pydatetime()) - self.assertFalse(a.to_pydatetime() == b) - - def test_cant_compare_tz_naive_w_aware_explicit_pytz(self): - tm._skip_if_no_pytz() - from pytz import utc - # #1404 - a = Timestamp('3/12/2012') - b = Timestamp('3/12/2012', tz=utc) - - self.assertRaises(Exception, a.__eq__, b) - self.assertRaises(Exception, a.__ne__, b) - self.assertRaises(Exception, a.__lt__, b) - self.assertRaises(Exception, a.__gt__, b) - self.assertRaises(Exception, b.__eq__, a) - self.assertRaises(Exception, b.__ne__, a) - self.assertRaises(Exception, b.__lt__, a) - self.assertRaises(Exception, b.__gt__, a) - - if sys.version_info < (3, 3): - self.assertRaises(Exception, a.__eq__, b.to_pydatetime()) - self.assertRaises(Exception, a.to_pydatetime().__eq__, b) - else: - self.assertFalse(a == b.to_pydatetime()) - self.assertFalse(a.to_pydatetime() == b) - - def test_cant_compare_tz_naive_w_aware_dateutil(self): - tm._skip_if_no_dateutil() - from dateutil.tz import tzutc - utc = tzutc() - # #1404 - a = Timestamp('3/12/2012') - b = Timestamp('3/12/2012', tz=utc) - - self.assertRaises(Exception, a.__eq__, b) - self.assertRaises(Exception, a.__ne__, b) - self.assertRaises(Exception, a.__lt__, b) - self.assertRaises(Exception, a.__gt__, b) - self.assertRaises(Exception, b.__eq__, a) - self.assertRaises(Exception, b.__ne__, a) - self.assertRaises(Exception, b.__lt__, a) - self.assertRaises(Exception, b.__gt__, a) - - if sys.version_info < (3, 3): - self.assertRaises(Exception, a.__eq__, b.to_pydatetime()) - self.assertRaises(Exception, a.to_pydatetime().__eq__, b) - else: - self.assertFalse(a == b.to_pydatetime()) - self.assertFalse(a.to_pydatetime() == b) - - def test_delta_preserve_nanos(self): - val = Timestamp(long(1337299200000000123)) - result = val + timedelta(1) - self.assertEqual(result.nanosecond, val.nanosecond) - - def test_frequency_misc(self): - self.assertEqual(frequencies.get_freq_group('T'), - frequencies.FreqGroup.FR_MIN) - - code, stride = frequencies.get_freq_code(offsets.Hour()) - self.assertEqual(code, frequencies.FreqGroup.FR_HR) - - code, stride = frequencies.get_freq_code((5, 'T')) - self.assertEqual(code, frequencies.FreqGroup.FR_MIN) - self.assertEqual(stride, 5) - - offset = offsets.Hour() - result = frequencies.to_offset(offset) - self.assertEqual(result, offset) - - result = frequencies.to_offset((5, 'T')) - expected = offsets.Minute(5) - self.assertEqual(result, expected) - - self.assertRaises(ValueError, frequencies.get_freq_code, (5, 'baz')) - - self.assertRaises(ValueError, frequencies.to_offset, '100foo') - - self.assertRaises(ValueError, frequencies.to_offset, ('', '')) - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = frequencies.get_standard_freq(offsets.Hour()) - self.assertEqual(result, 'H') - - def test_hash_equivalent(self): - d = {datetime(2011, 1, 1): 5} - stamp = Timestamp(datetime(2011, 1, 1)) - self.assertEqual(d[stamp], 5) - - def test_timestamp_compare_scalars(self): - # case where ndim == 0 - lhs = np.datetime64(datetime(2013, 12, 6)) - rhs = Timestamp('now') - nat = Timestamp('nat') - - ops = {'gt': 'lt', - 'lt': 'gt', - 'ge': 'le', - 'le': 'ge', - 'eq': 'eq', - 'ne': 'ne'} - - for left, right in ops.items(): - left_f = getattr(operator, left) - right_f = getattr(operator, right) - expected = left_f(lhs, rhs) - - result = right_f(rhs, lhs) - self.assertEqual(result, expected) - - expected = left_f(rhs, nat) - result = right_f(nat, rhs) - self.assertEqual(result, expected) - - def test_timestamp_compare_series(self): - # make sure we can compare Timestamps on the right AND left hand side - # GH4982 - s = Series(date_range('20010101', periods=10), name='dates') - s_nat = s.copy(deep=True) - - s[0] = pd.Timestamp('nat') - s[3] = pd.Timestamp('nat') - - ops = {'lt': 'gt', 'le': 'ge', 'eq': 'eq', 'ne': 'ne'} - - for left, right in ops.items(): - left_f = getattr(operator, left) - right_f = getattr(operator, right) - - # no nats - expected = left_f(s, Timestamp('20010109')) - result = right_f(Timestamp('20010109'), s) - tm.assert_series_equal(result, expected) - - # nats - expected = left_f(s, Timestamp('nat')) - result = right_f(Timestamp('nat'), s) - tm.assert_series_equal(result, expected) - - # compare to timestamp with series containing nats - expected = left_f(s_nat, Timestamp('20010109')) - result = right_f(Timestamp('20010109'), s_nat) - tm.assert_series_equal(result, expected) - - # compare to nat with series containing nats - expected = left_f(s_nat, Timestamp('nat')) - result = right_f(Timestamp('nat'), s_nat) - tm.assert_series_equal(result, expected) - - def test_is_leap_year(self): - # GH 13727 - for tz in [None, 'UTC', 'US/Eastern', 'Asia/Tokyo']: - dt = Timestamp('2000-01-01 00:00:00', tz=tz) - self.assertTrue(dt.is_leap_year) - self.assertIsInstance(dt.is_leap_year, bool) - - dt = Timestamp('1999-01-01 00:00:00', tz=tz) - self.assertFalse(dt.is_leap_year) - - dt = Timestamp('2004-01-01 00:00:00', tz=tz) - self.assertTrue(dt.is_leap_year) - - dt = Timestamp('2100-01-01 00:00:00', tz=tz) - self.assertFalse(dt.is_leap_year) - - self.assertFalse(pd.NaT.is_leap_year) - self.assertIsInstance(pd.NaT.is_leap_year, bool) - - def test_round_nat(self): - # GH14940 - ts = Timestamp('nat') - print(dir(ts)) - for method in ["round", "floor", "ceil"]: - round_method = getattr(ts, method) - for freq in ["s", "5s", "min", "5min", "h", "5h"]: - self.assertIs(round_method(freq), ts) - - -class TestSlicing(tm.TestCase): - def test_slice_year(self): - dti = DatetimeIndex(freq='B', start=datetime(2005, 1, 1), periods=500) - - s = Series(np.arange(len(dti)), index=dti) - result = s['2005'] - expected = s[s.index.year == 2005] - assert_series_equal(result, expected) - - df = DataFrame(np.random.rand(len(dti), 5), index=dti) - result = df.loc['2005'] - expected = df[df.index.year == 2005] - assert_frame_equal(result, expected) - - rng = date_range('1/1/2000', '1/1/2010') - - result = rng.get_loc('2009') - expected = slice(3288, 3653) - self.assertEqual(result, expected) - - def test_slice_quarter(self): - dti = DatetimeIndex(freq='D', start=datetime(2000, 6, 1), periods=500) - - s = Series(np.arange(len(dti)), index=dti) - self.assertEqual(len(s['2001Q1']), 90) - - df = DataFrame(np.random.rand(len(dti), 5), index=dti) - self.assertEqual(len(df.loc['1Q01']), 90) - - def test_slice_month(self): - dti = DatetimeIndex(freq='D', start=datetime(2005, 1, 1), periods=500) - s = Series(np.arange(len(dti)), index=dti) - self.assertEqual(len(s['2005-11']), 30) - - df = DataFrame(np.random.rand(len(dti), 5), index=dti) - self.assertEqual(len(df.loc['2005-11']), 30) - - assert_series_equal(s['2005-11'], s['11-2005']) - - def test_partial_slice(self): - rng = DatetimeIndex(freq='D', start=datetime(2005, 1, 1), periods=500) - s = Series(np.arange(len(rng)), index=rng) - - result = s['2005-05':'2006-02'] - expected = s['20050501':'20060228'] - assert_series_equal(result, expected) - - result = s['2005-05':] - expected = s['20050501':] - assert_series_equal(result, expected) - - result = s[:'2006-02'] - expected = s[:'20060228'] - assert_series_equal(result, expected) - - result = s['2005-1-1'] - self.assertEqual(result, s.iloc[0]) - - self.assertRaises(Exception, s.__getitem__, '2004-12-31') - - def test_partial_slice_daily(self): - rng = DatetimeIndex(freq='H', start=datetime(2005, 1, 31), periods=500) - s = Series(np.arange(len(rng)), index=rng) - - result = s['2005-1-31'] - assert_series_equal(result, s.iloc[:24]) - - self.assertRaises(Exception, s.__getitem__, '2004-12-31 00') - - def test_partial_slice_hourly(self): - rng = DatetimeIndex(freq='T', start=datetime(2005, 1, 1, 20, 0, 0), - periods=500) - s = Series(np.arange(len(rng)), index=rng) - - result = s['2005-1-1'] - assert_series_equal(result, s.iloc[:60 * 4]) - - result = s['2005-1-1 20'] - assert_series_equal(result, s.iloc[:60]) - - self.assertEqual(s['2005-1-1 20:00'], s.iloc[0]) - self.assertRaises(Exception, s.__getitem__, '2004-12-31 00:15') - - def test_partial_slice_minutely(self): - rng = DatetimeIndex(freq='S', start=datetime(2005, 1, 1, 23, 59, 0), - periods=500) - s = Series(np.arange(len(rng)), index=rng) - - result = s['2005-1-1 23:59'] - assert_series_equal(result, s.iloc[:60]) - - result = s['2005-1-1'] - assert_series_equal(result, s.iloc[:60]) - - self.assertEqual(s[Timestamp('2005-1-1 23:59:00')], s.iloc[0]) - self.assertRaises(Exception, s.__getitem__, '2004-12-31 00:00:00') - - def test_partial_slice_second_precision(self): - rng = DatetimeIndex(start=datetime(2005, 1, 1, 0, 0, 59, - microsecond=999990), - periods=20, freq='US') - s = Series(np.arange(20), rng) - - assert_series_equal(s['2005-1-1 00:00'], s.iloc[:10]) - assert_series_equal(s['2005-1-1 00:00:59'], s.iloc[:10]) - - assert_series_equal(s['2005-1-1 00:01'], s.iloc[10:]) - assert_series_equal(s['2005-1-1 00:01:00'], s.iloc[10:]) - - self.assertEqual(s[Timestamp('2005-1-1 00:00:59.999990')], s.iloc[0]) - self.assertRaisesRegexp(KeyError, '2005-1-1 00:00:00', - lambda: s['2005-1-1 00:00:00']) - - def test_partial_slicing_dataframe(self): - # GH14856 - # Test various combinations of string slicing resolution vs. - # index resolution - # - If string resolution is less precise than index resolution, - # string is considered a slice - # - If string resolution is equal to or more precise than index - # resolution, string is considered an exact match - formats = ['%Y', '%Y-%m', '%Y-%m-%d', '%Y-%m-%d %H', - '%Y-%m-%d %H:%M', '%Y-%m-%d %H:%M:%S'] - resolutions = ['year', 'month', 'day', 'hour', 'minute', 'second'] - for rnum, resolution in enumerate(resolutions[2:], 2): - # we check only 'day', 'hour', 'minute' and 'second' - unit = Timedelta("1 " + resolution) - middate = datetime(2012, 1, 1, 0, 0, 0) - index = DatetimeIndex([middate - unit, - middate, middate + unit]) - values = [1, 2, 3] - df = DataFrame({'a': values}, index, dtype=np.int64) - self.assertEqual(df.index.resolution, resolution) - - # Timestamp with the same resolution as index - # Should be exact match for Series (return scalar) - # and raise KeyError for Frame - for timestamp, expected in zip(index, values): - ts_string = timestamp.strftime(formats[rnum]) - # make ts_string as precise as index - result = df['a'][ts_string] - self.assertIsInstance(result, np.int64) - self.assertEqual(result, expected) - self.assertRaises(KeyError, df.__getitem__, ts_string) - - # Timestamp with resolution less precise than index - for fmt in formats[:rnum]: - for element, theslice in [[0, slice(None, 1)], - [1, slice(1, None)]]: - ts_string = index[element].strftime(fmt) - - # Series should return slice - result = df['a'][ts_string] - expected = df['a'][theslice] - assert_series_equal(result, expected) - - # Frame should return slice as well - result = df[ts_string] - expected = df[theslice] - assert_frame_equal(result, expected) - - # Timestamp with resolution more precise than index - # Compatible with existing key - # Should return scalar for Series - # and raise KeyError for Frame - for fmt in formats[rnum + 1:]: - ts_string = index[1].strftime(fmt) - result = df['a'][ts_string] - self.assertIsInstance(result, np.int64) - self.assertEqual(result, 2) - self.assertRaises(KeyError, df.__getitem__, ts_string) - - # Not compatible with existing key - # Should raise KeyError - for fmt, res in list(zip(formats, resolutions))[rnum + 1:]: - ts = index[1] + Timedelta("1 " + res) - ts_string = ts.strftime(fmt) - self.assertRaises(KeyError, df['a'].__getitem__, ts_string) - self.assertRaises(KeyError, df.__getitem__, ts_string) - - def test_partial_slicing_with_multiindex(self): - - # GH 4758 - # partial string indexing with a multi-index buggy - df = DataFrame({'ACCOUNT': ["ACCT1", "ACCT1", "ACCT1", "ACCT2"], - 'TICKER': ["ABC", "MNP", "XYZ", "XYZ"], - 'val': [1, 2, 3, 4]}, - index=date_range("2013-06-19 09:30:00", - periods=4, freq='5T')) - df_multi = df.set_index(['ACCOUNT', 'TICKER'], append=True) - - expected = DataFrame([ - [1] - ], index=Index(['ABC'], name='TICKER'), columns=['val']) - result = df_multi.loc[('2013-06-19 09:30:00', 'ACCT1')] - assert_frame_equal(result, expected) - - expected = df_multi.loc[ - (pd.Timestamp('2013-06-19 09:30:00', tz=None), 'ACCT1', 'ABC')] - result = df_multi.loc[('2013-06-19 09:30:00', 'ACCT1', 'ABC')] - assert_series_equal(result, expected) - - # this is a KeyError as we don't do partial string selection on - # multi-levels - def f(): - df_multi.loc[('2013-06-19', 'ACCT1', 'ABC')] - - self.assertRaises(KeyError, f) - - # GH 4294 - # partial slice on a series mi - s = pd.DataFrame(randn(1000, 1000), index=pd.date_range( - '2000-1-1', periods=1000)).stack() - - s2 = s[:-1].copy() - expected = s2['2000-1-4'] - result = s2[pd.Timestamp('2000-1-4')] - assert_series_equal(result, expected) - - result = s[pd.Timestamp('2000-1-4')] - expected = s['2000-1-4'] - assert_series_equal(result, expected) - - df2 = pd.DataFrame(s) - expected = df2.xs('2000-1-4') - result = df2.loc[pd.Timestamp('2000-1-4')] - assert_frame_equal(result, expected) - - def test_date_range_normalize(self): - snap = datetime.today() - n = 50 - - rng = date_range(snap, periods=n, normalize=False, freq='2D') - - offset = timedelta(2) - values = DatetimeIndex([snap + i * offset for i in range(n)]) - - tm.assert_index_equal(rng, values) - - rng = date_range('1/1/2000 08:15', periods=n, normalize=False, - freq='B') - the_time = time(8, 15) - for val in rng: - self.assertEqual(val.time(), the_time) - - def test_timedelta(self): - # this is valid too - index = date_range('1/1/2000', periods=50, freq='B') - shifted = index + timedelta(1) - back = shifted + timedelta(-1) - self.assertTrue(tm.equalContents(index, back)) - self.assertEqual(shifted.freq, index.freq) - self.assertEqual(shifted.freq, back.freq) - - result = index - timedelta(1) - expected = index + timedelta(-1) - tm.assert_index_equal(result, expected) - - # GH4134, buggy with timedeltas - rng = date_range('2013', '2014') - s = Series(rng) - result1 = rng - pd.offsets.Hour(1) - result2 = DatetimeIndex(s - np.timedelta64(100000000)) - result3 = rng - np.timedelta64(100000000) - result4 = DatetimeIndex(s - pd.offsets.Hour(1)) - tm.assert_index_equal(result1, result4) - tm.assert_index_equal(result2, result3) - - def test_shift(self): - ts = Series(np.random.randn(5), - index=date_range('1/1/2000', periods=5, freq='H')) - - result = ts.shift(1, freq='5T') - exp_index = ts.index.shift(1, freq='5T') - tm.assert_index_equal(result.index, exp_index) - - # GH #1063, multiple of same base - result = ts.shift(1, freq='4H') - exp_index = ts.index + offsets.Hour(4) - tm.assert_index_equal(result.index, exp_index) - - idx = DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-04']) - self.assertRaises(ValueError, idx.shift, 1) - - def test_setops_preserve_freq(self): - for tz in [None, 'Asia/Tokyo', 'US/Eastern']: - rng = date_range('1/1/2000', '1/1/2002', name='idx', tz=tz) - - result = rng[:50].union(rng[50:100]) - self.assertEqual(result.name, rng.name) - self.assertEqual(result.freq, rng.freq) - self.assertEqual(result.tz, rng.tz) - - result = rng[:50].union(rng[30:100]) - self.assertEqual(result.name, rng.name) - self.assertEqual(result.freq, rng.freq) - self.assertEqual(result.tz, rng.tz) - - result = rng[:50].union(rng[60:100]) - self.assertEqual(result.name, rng.name) - self.assertIsNone(result.freq) - self.assertEqual(result.tz, rng.tz) - - result = rng[:50].intersection(rng[25:75]) - self.assertEqual(result.name, rng.name) - self.assertEqual(result.freqstr, 'D') - self.assertEqual(result.tz, rng.tz) - - nofreq = DatetimeIndex(list(rng[25:75]), name='other') - result = rng[:50].union(nofreq) - self.assertIsNone(result.name) - self.assertEqual(result.freq, rng.freq) - self.assertEqual(result.tz, rng.tz) - - result = rng[:50].intersection(nofreq) - self.assertIsNone(result.name) - self.assertEqual(result.freq, rng.freq) - self.assertEqual(result.tz, rng.tz) - - def test_min_max(self): - rng = date_range('1/1/2000', '12/31/2000') - rng2 = rng.take(np.random.permutation(len(rng))) - - the_min = rng2.min() - the_max = rng2.max() - tm.assertIsInstance(the_min, Timestamp) - tm.assertIsInstance(the_max, Timestamp) - self.assertEqual(the_min, rng[0]) - self.assertEqual(the_max, rng[-1]) - - self.assertEqual(rng.min(), rng[0]) - self.assertEqual(rng.max(), rng[-1]) - - def test_min_max_series(self): - rng = date_range('1/1/2000', periods=10, freq='4h') - lvls = ['A', 'A', 'A', 'B', 'B', 'B', 'C', 'C', 'C', 'C'] - df = DataFrame({'TS': rng, 'V': np.random.randn(len(rng)), 'L': lvls}) - - result = df.TS.max() - exp = Timestamp(df.TS.iat[-1]) - self.assertTrue(isinstance(result, Timestamp)) - self.assertEqual(result, exp) - - result = df.TS.min() - exp = Timestamp(df.TS.iat[0]) - self.assertTrue(isinstance(result, Timestamp)) - self.assertEqual(result, exp) - - def test_from_M8_structured(self): - dates = [(datetime(2012, 9, 9, 0, 0), datetime(2012, 9, 8, 15, 10))] - arr = np.array(dates, - dtype=[('Date', 'M8[us]'), ('Forecasting', 'M8[us]')]) - df = DataFrame(arr) - - self.assertEqual(df['Date'][0], dates[0][0]) - self.assertEqual(df['Forecasting'][0], dates[0][1]) - - s = Series(arr['Date']) - self.assertTrue(s[0], Timestamp) - self.assertEqual(s[0], dates[0][0]) - - s = Series.from_array(arr['Date'], Index([0])) - self.assertEqual(s[0], dates[0][0]) - - def test_get_level_values_box(self): - from pandas import MultiIndex - - dates = date_range('1/1/2000', periods=4) - levels = [dates, [0, 1]] - labels = [[0, 0, 1, 1, 2, 2, 3, 3], [0, 1, 0, 1, 0, 1, 0, 1]] - - index = MultiIndex(levels=levels, labels=labels) - - self.assertTrue(isinstance(index.get_level_values(0)[0], Timestamp)) - - def test_frame_apply_dont_convert_datetime64(self): - from pandas.tseries.offsets import BDay - df = DataFrame({'x1': [datetime(1996, 1, 1)]}) - - df = df.applymap(lambda x: x + BDay()) - df = df.applymap(lambda x: x + BDay()) - - self.assertTrue(df.x1.dtype == 'M8[ns]') - - def test_date_range_fy5252(self): - dr = date_range(start="2013-01-01", periods=2, freq=offsets.FY5253( - startingMonth=1, weekday=3, variation="nearest")) - self.assertEqual(dr[0], Timestamp('2013-01-31')) - self.assertEqual(dr[1], Timestamp('2014-01-30')) - - def test_partial_slice_doesnt_require_monotonicity(self): - # For historical reasons. - s = pd.Series(np.arange(10), pd.date_range('2014-01-01', periods=10)) - - nonmonotonic = s[[3, 5, 4]] - expected = nonmonotonic.iloc[:0] - timestamp = pd.Timestamp('2014-01-10') - - assert_series_equal(nonmonotonic['2014-01-10':], expected) - self.assertRaisesRegexp(KeyError, - r"Timestamp\('2014-01-10 00:00:00'\)", - lambda: nonmonotonic[timestamp:]) - - assert_series_equal(nonmonotonic.loc['2014-01-10':], expected) - self.assertRaisesRegexp(KeyError, - r"Timestamp\('2014-01-10 00:00:00'\)", - lambda: nonmonotonic.loc[timestamp:]) - - -class TimeConversionFormats(tm.TestCase): - def test_to_datetime_format(self): - values = ['1/1/2000', '1/2/2000', '1/3/2000'] - - results1 = [Timestamp('20000101'), Timestamp('20000201'), - Timestamp('20000301')] - results2 = [Timestamp('20000101'), Timestamp('20000102'), - Timestamp('20000103')] - for vals, expecteds in [(values, (Index(results1), Index(results2))), - (Series(values), - (Series(results1), Series(results2))), - (values[0], (results1[0], results2[0])), - (values[1], (results1[1], results2[1])), - (values[2], (results1[2], results2[2]))]: - - for i, fmt in enumerate(['%d/%m/%Y', '%m/%d/%Y']): - result = to_datetime(vals, format=fmt) - expected = expecteds[i] - - if isinstance(expected, Series): - assert_series_equal(result, Series(expected)) - elif isinstance(expected, Timestamp): - self.assertEqual(result, expected) - else: - tm.assert_index_equal(result, expected) - - def test_to_datetime_format_YYYYMMDD(self): - s = Series([19801222, 19801222] + [19810105] * 5) - expected = Series([Timestamp(x) for x in s.apply(str)]) - - result = to_datetime(s, format='%Y%m%d') - assert_series_equal(result, expected) - - result = to_datetime(s.apply(str), format='%Y%m%d') - assert_series_equal(result, expected) - - # with NaT - expected = Series([Timestamp("19801222"), Timestamp("19801222")] + - [Timestamp("19810105")] * 5) - expected[2] = np.nan - s[2] = np.nan - - result = to_datetime(s, format='%Y%m%d') - assert_series_equal(result, expected) - - # string with NaT - s = s.apply(str) - s[2] = 'nat' - result = to_datetime(s, format='%Y%m%d') - assert_series_equal(result, expected) - - # coercion - # GH 7930 - s = Series([20121231, 20141231, 99991231]) - result = pd.to_datetime(s, format='%Y%m%d', errors='ignore') - expected = Series([datetime(2012, 12, 31), - datetime(2014, 12, 31), datetime(9999, 12, 31)], - dtype=object) - self.assert_series_equal(result, expected) - - result = pd.to_datetime(s, format='%Y%m%d', errors='coerce') - expected = Series(['20121231', '20141231', 'NaT'], dtype='M8[ns]') - assert_series_equal(result, expected) - - # GH 10178 - def test_to_datetime_format_integer(self): - s = Series([2000, 2001, 2002]) - expected = Series([Timestamp(x) for x in s.apply(str)]) - - result = to_datetime(s, format='%Y') - assert_series_equal(result, expected) - - s = Series([200001, 200105, 200206]) - expected = Series([Timestamp(x[:4] + '-' + x[4:]) for x in s.apply(str) - ]) - - result = to_datetime(s, format='%Y%m') - assert_series_equal(result, expected) - - def test_to_datetime_format_microsecond(self): - - # these are locale dependent - lang, _ = locale.getlocale() - month_abbr = calendar.month_abbr[4] - val = '01-{}-2011 00:00:01.978'.format(month_abbr) - - format = '%d-%b-%Y %H:%M:%S.%f' - result = to_datetime(val, format=format) - exp = datetime.strptime(val, format) - self.assertEqual(result, exp) - - def test_to_datetime_format_time(self): - data = [ - ['01/10/2010 15:20', '%m/%d/%Y %H:%M', - Timestamp('2010-01-10 15:20')], - ['01/10/2010 05:43', '%m/%d/%Y %I:%M', - Timestamp('2010-01-10 05:43')], - ['01/10/2010 13:56:01', '%m/%d/%Y %H:%M:%S', - Timestamp('2010-01-10 13:56:01')] # , - # ['01/10/2010 08:14 PM', '%m/%d/%Y %I:%M %p', - # Timestamp('2010-01-10 20:14')], - # ['01/10/2010 07:40 AM', '%m/%d/%Y %I:%M %p', - # Timestamp('2010-01-10 07:40')], - # ['01/10/2010 09:12:56 AM', '%m/%d/%Y %I:%M:%S %p', - # Timestamp('2010-01-10 09:12:56')] - ] - for s, format, dt in data: - self.assertEqual(to_datetime(s, format=format), dt) - - def test_to_datetime_with_non_exact(self): - # GH 10834 - _skip_if_has_locale() - - # 8904 - # exact kw - if sys.version_info < (2, 7): - raise nose.SkipTest('on python version < 2.7') - - s = Series(['19MAY11', 'foobar19MAY11', '19MAY11:00:00:00', - '19MAY11 00:00:00Z']) - result = to_datetime(s, format='%d%b%y', exact=False) - expected = to_datetime(s.str.extract(r'(\d+\w+\d+)', expand=False), - format='%d%b%y') - assert_series_equal(result, expected) - - def test_parse_nanoseconds_with_formula(self): - - # GH8989 - # trunctaing the nanoseconds when a format was provided - for v in ["2012-01-01 09:00:00.000000001", - "2012-01-01 09:00:00.000001", - "2012-01-01 09:00:00.001", - "2012-01-01 09:00:00.001000", - "2012-01-01 09:00:00.001000000", ]: - expected = pd.to_datetime(v) - result = pd.to_datetime(v, format="%Y-%m-%d %H:%M:%S.%f") - self.assertEqual(result, expected) - - def test_to_datetime_format_weeks(self): - data = [ - ['2009324', '%Y%W%w', Timestamp('2009-08-13')], - ['2013020', '%Y%U%w', Timestamp('2013-01-13')] - ] - for s, format, dt in data: - self.assertEqual(to_datetime(s, format=format), dt) - - -class TestToDatetimeInferFormat(tm.TestCase): - - def test_to_datetime_infer_datetime_format_consistent_format(self): - s = pd.Series(pd.date_range('20000101', periods=50, freq='H')) - - test_formats = ['%m-%d-%Y', '%m/%d/%Y %H:%M:%S.%f', - '%Y-%m-%dT%H:%M:%S.%f'] - - for test_format in test_formats: - s_as_dt_strings = s.apply(lambda x: x.strftime(test_format)) - - with_format = pd.to_datetime(s_as_dt_strings, format=test_format) - no_infer = pd.to_datetime(s_as_dt_strings, - infer_datetime_format=False) - yes_infer = pd.to_datetime(s_as_dt_strings, - infer_datetime_format=True) - - # Whether the format is explicitly passed, it is inferred, or - # it is not inferred, the results should all be the same - self.assert_series_equal(with_format, no_infer) - self.assert_series_equal(no_infer, yes_infer) - - def test_to_datetime_infer_datetime_format_inconsistent_format(self): - s = pd.Series(np.array(['01/01/2011 00:00:00', - '01-02-2011 00:00:00', - '2011-01-03T00:00:00'])) - - # When the format is inconsistent, infer_datetime_format should just - # fallback to the default parsing - tm.assert_series_equal(pd.to_datetime(s, infer_datetime_format=False), - pd.to_datetime(s, infer_datetime_format=True)) - - s = pd.Series(np.array(['Jan/01/2011', 'Feb/01/2011', 'Mar/01/2011'])) - - tm.assert_series_equal(pd.to_datetime(s, infer_datetime_format=False), - pd.to_datetime(s, infer_datetime_format=True)) - - def test_to_datetime_infer_datetime_format_series_with_nans(self): - s = pd.Series(np.array(['01/01/2011 00:00:00', np.nan, - '01/03/2011 00:00:00', np.nan])) - tm.assert_series_equal(pd.to_datetime(s, infer_datetime_format=False), - pd.to_datetime(s, infer_datetime_format=True)) - - def test_to_datetime_infer_datetime_format_series_starting_with_nans(self): - s = pd.Series(np.array([np.nan, np.nan, '01/01/2011 00:00:00', - '01/02/2011 00:00:00', '01/03/2011 00:00:00'])) - - tm.assert_series_equal(pd.to_datetime(s, infer_datetime_format=False), - pd.to_datetime(s, infer_datetime_format=True)) - - def test_to_datetime_iso8601_noleading_0s(self): - # GH 11871 - s = pd.Series(['2014-1-1', '2014-2-2', '2015-3-3']) - expected = pd.Series([pd.Timestamp('2014-01-01'), - pd.Timestamp('2014-02-02'), - pd.Timestamp('2015-03-03')]) - tm.assert_series_equal(pd.to_datetime(s), expected) - tm.assert_series_equal(pd.to_datetime(s, format='%Y-%m-%d'), expected) - - -class TestGuessDatetimeFormat(tm.TestCase): - - def test_guess_datetime_format_with_parseable_formats(self): - tm._skip_if_not_us_locale() - dt_string_to_format = (('20111230', '%Y%m%d'), - ('2011-12-30', '%Y-%m-%d'), - ('30-12-2011', '%d-%m-%Y'), - ('2011-12-30 00:00:00', '%Y-%m-%d %H:%M:%S'), - ('2011-12-30T00:00:00', '%Y-%m-%dT%H:%M:%S'), - ('2011-12-30 00:00:00.000000', - '%Y-%m-%d %H:%M:%S.%f'), ) - - for dt_string, dt_format in dt_string_to_format: - self.assertEqual( - tools._guess_datetime_format(dt_string), - dt_format - ) - - def test_guess_datetime_format_with_dayfirst(self): - ambiguous_string = '01/01/2011' - self.assertEqual( - tools._guess_datetime_format(ambiguous_string, dayfirst=True), - '%d/%m/%Y' - ) - self.assertEqual( - tools._guess_datetime_format(ambiguous_string, dayfirst=False), - '%m/%d/%Y' - ) - - def test_guess_datetime_format_with_locale_specific_formats(self): - # The month names will vary depending on the locale, in which - # case these wont be parsed properly (dateutil can't parse them) - _skip_if_has_locale() - - dt_string_to_format = (('30/Dec/2011', '%d/%b/%Y'), - ('30/December/2011', '%d/%B/%Y'), - ('30/Dec/2011 00:00:00', '%d/%b/%Y %H:%M:%S'), ) - - for dt_string, dt_format in dt_string_to_format: - self.assertEqual( - tools._guess_datetime_format(dt_string), - dt_format - ) - - def test_guess_datetime_format_invalid_inputs(self): - # A datetime string must include a year, month and a day for it - # to be guessable, in addition to being a string that looks like - # a datetime - invalid_dts = [ - '2013', - '01/2013', - '12:00:00', - '1/1/1/1', - 'this_is_not_a_datetime', - '51a', - 9, - datetime(2011, 1, 1), - ] - - for invalid_dt in invalid_dts: - self.assertTrue(tools._guess_datetime_format(invalid_dt) is None) - - def test_guess_datetime_format_nopadding(self): - # GH 11142 - dt_string_to_format = (('2011-1-1', '%Y-%m-%d'), - ('30-1-2011', '%d-%m-%Y'), - ('1/1/2011', '%m/%d/%Y'), - ('2011-1-1 00:00:00', '%Y-%m-%d %H:%M:%S'), - ('2011-1-1 0:0:0', '%Y-%m-%d %H:%M:%S'), - ('2011-1-3T00:00:0', '%Y-%m-%dT%H:%M:%S')) - - for dt_string, dt_format in dt_string_to_format: - self.assertEqual( - tools._guess_datetime_format(dt_string), - dt_format - ) - - def test_guess_datetime_format_for_array(self): - tm._skip_if_not_us_locale() - expected_format = '%Y-%m-%d %H:%M:%S.%f' - dt_string = datetime(2011, 12, 30, 0, 0, 0).strftime(expected_format) - - test_arrays = [ - np.array([dt_string, dt_string, dt_string], dtype='O'), - np.array([np.nan, np.nan, dt_string], dtype='O'), - np.array([dt_string, 'random_string'], dtype='O'), - ] - - for test_array in test_arrays: - self.assertEqual( - tools._guess_datetime_format_for_array(test_array), - expected_format - ) - - format_for_string_of_nans = tools._guess_datetime_format_for_array( - np.array( - [np.nan, np.nan, np.nan], dtype='O')) - self.assertTrue(format_for_string_of_nans is None) - - -class TestTimestampToJulianDate(tm.TestCase): - def test_compare_1700(self): - r = Timestamp('1700-06-23').to_julian_date() - self.assertEqual(r, 2342145.5) - - def test_compare_2000(self): - r = Timestamp('2000-04-12').to_julian_date() - self.assertEqual(r, 2451646.5) - - def test_compare_2100(self): - r = Timestamp('2100-08-12').to_julian_date() - self.assertEqual(r, 2488292.5) - - def test_compare_hour01(self): - r = Timestamp('2000-08-12T01:00:00').to_julian_date() - self.assertEqual(r, 2451768.5416666666666666) - - def test_compare_hour13(self): - r = Timestamp('2000-08-12T13:00:00').to_julian_date() - self.assertEqual(r, 2451769.0416666666666666) - - -class TestDateTimeIndexToJulianDate(tm.TestCase): - def test_1700(self): - r1 = Float64Index([2345897.5, 2345898.5, 2345899.5, 2345900.5, - 2345901.5]) - r2 = date_range(start=Timestamp('1710-10-01'), periods=5, - freq='D').to_julian_date() - self.assertIsInstance(r2, Float64Index) - tm.assert_index_equal(r1, r2) - - def test_2000(self): - r1 = Float64Index([2451601.5, 2451602.5, 2451603.5, 2451604.5, - 2451605.5]) - r2 = date_range(start=Timestamp('2000-02-27'), periods=5, - freq='D').to_julian_date() - self.assertIsInstance(r2, Float64Index) - tm.assert_index_equal(r1, r2) - - def test_hour(self): - r1 = Float64Index( - [2451601.5, 2451601.5416666666666666, 2451601.5833333333333333, - 2451601.625, 2451601.6666666666666666]) - r2 = date_range(start=Timestamp('2000-02-27'), periods=5, - freq='H').to_julian_date() - self.assertIsInstance(r2, Float64Index) - tm.assert_index_equal(r1, r2) - - def test_minute(self): - r1 = Float64Index( - [2451601.5, 2451601.5006944444444444, 2451601.5013888888888888, - 2451601.5020833333333333, 2451601.5027777777777777]) - r2 = date_range(start=Timestamp('2000-02-27'), periods=5, - freq='T').to_julian_date() - self.assertIsInstance(r2, Float64Index) - tm.assert_index_equal(r1, r2) - - def test_second(self): - r1 = Float64Index( - [2451601.5, 2451601.500011574074074, 2451601.5000231481481481, - 2451601.5000347222222222, 2451601.5000462962962962]) - r2 = date_range(start=Timestamp('2000-02-27'), periods=5, - freq='S').to_julian_date() - self.assertIsInstance(r2, Float64Index) - tm.assert_index_equal(r1, r2) - - -class TestDaysInMonth(tm.TestCase): - # tests for issue #10154 - def test_day_not_in_month_coerce(self): - self.assertTrue(isnull(to_datetime('2015-02-29', errors='coerce'))) - self.assertTrue(isnull(to_datetime('2015-02-29', format="%Y-%m-%d", - errors='coerce'))) - self.assertTrue(isnull(to_datetime('2015-02-32', format="%Y-%m-%d", - errors='coerce'))) - self.assertTrue(isnull(to_datetime('2015-04-31', format="%Y-%m-%d", - errors='coerce'))) - - def test_day_not_in_month_raise(self): - self.assertRaises(ValueError, to_datetime, '2015-02-29', - errors='raise') - self.assertRaises(ValueError, to_datetime, '2015-02-29', - errors='raise', format="%Y-%m-%d") - self.assertRaises(ValueError, to_datetime, '2015-02-32', - errors='raise', format="%Y-%m-%d") - self.assertRaises(ValueError, to_datetime, '2015-04-31', - errors='raise', format="%Y-%m-%d") - - def test_day_not_in_month_ignore(self): - self.assertEqual(to_datetime( - '2015-02-29', errors='ignore'), '2015-02-29') - self.assertEqual(to_datetime( - '2015-02-29', errors='ignore', format="%Y-%m-%d"), '2015-02-29') - self.assertEqual(to_datetime( - '2015-02-32', errors='ignore', format="%Y-%m-%d"), '2015-02-32') - self.assertEqual(to_datetime( - '2015-04-31', errors='ignore', format="%Y-%m-%d"), '2015-04-31') - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tseries/tests/test_tslib.py b/pandas/tseries/tests/test_tslib.py index 58ec1561b2535..cf5dbd671d38c 100644 --- a/pandas/tseries/tests/test_tslib.py +++ b/pandas/tseries/tests/test_tslib.py @@ -1,29 +1,18 @@ import nose -from distutils.version import LooseVersion -import numpy as np - -from pandas import tslib, lib -import pandas._period as period import datetime +import numpy as np +from distutils.version import LooseVersion import pandas as pd -from pandas.core.api import (Timestamp, Index, Series, Timedelta, Period, - to_datetime) -from pandas.tslib import get_timezone -from pandas._period import period_asfreq, period_ordinal -from pandas.tseries.index import date_range, DatetimeIndex -from pandas.tseries.frequencies import ( - get_freq, - RESO_US, RESO_MS, RESO_SEC, RESO_HR, RESO_DAY, RESO_MIN -) -import pandas.tseries.tools as tools -import pandas.tseries.offsets as offsets import pandas.util.testing as tm -import pandas.compat as compat -from pandas.compat.numpy import (np_datetime64_compat, - np_array_datetime64_compat) - -from pandas.util.testing import assert_series_equal, _skip_if_has_locale +from pandas import tslib, lib, compat +from pandas.tseries import offsets, tools +from pandas.tseries.frequencies import get_freq +from pandas.tseries.index import date_range, DatetimeIndex +from pandas.util.testing import _skip_if_has_locale +from pandas._period import period_ordinal, period_asfreq +from pandas.compat.numpy import np_array_datetime64_compat +from pandas.core.api import Timestamp, to_datetime, Index, Series class TestTsUtil(tm.TestCase): @@ -60,589 +49,6 @@ def test_to_datetime_bijective(self): Timestamp.min.value / 1000) -class TestTimestamp(tm.TestCase): - - def test_constructor(self): - base_str = '2014-07-01 09:00' - base_dt = datetime.datetime(2014, 7, 1, 9) - base_expected = 1404205200000000000 - - # confirm base representation is correct - import calendar - self.assertEqual(calendar.timegm(base_dt.timetuple()) * 1000000000, - base_expected) - - tests = [(base_str, base_dt, base_expected), - ('2014-07-01 10:00', datetime.datetime(2014, 7, 1, 10), - base_expected + 3600 * 1000000000), - ('2014-07-01 09:00:00.000008000', - datetime.datetime(2014, 7, 1, 9, 0, 0, 8), - base_expected + 8000), - ('2014-07-01 09:00:00.000000005', - Timestamp('2014-07-01 09:00:00.000000005'), - base_expected + 5)] - - tm._skip_if_no_pytz() - tm._skip_if_no_dateutil() - import pytz - import dateutil - timezones = [(None, 0), ('UTC', 0), (pytz.utc, 0), ('Asia/Tokyo', 9), - ('US/Eastern', -4), ('dateutil/US/Pacific', -7), - (pytz.FixedOffset(-180), -3), - (dateutil.tz.tzoffset(None, 18000), 5)] - - for date_str, date, expected in tests: - for result in [Timestamp(date_str), Timestamp(date)]: - # only with timestring - self.assertEqual(result.value, expected) - self.assertEqual(tslib.pydt_to_i8(result), expected) - - # re-creation shouldn't affect to internal value - result = Timestamp(result) - self.assertEqual(result.value, expected) - self.assertEqual(tslib.pydt_to_i8(result), expected) - - # with timezone - for tz, offset in timezones: - for result in [Timestamp(date_str, tz=tz), Timestamp(date, - tz=tz)]: - expected_tz = expected - offset * 3600 * 1000000000 - self.assertEqual(result.value, expected_tz) - self.assertEqual(tslib.pydt_to_i8(result), expected_tz) - - # should preserve tz - result = Timestamp(result) - self.assertEqual(result.value, expected_tz) - self.assertEqual(tslib.pydt_to_i8(result), expected_tz) - - # should convert to UTC - result = Timestamp(result, tz='UTC') - expected_utc = expected - offset * 3600 * 1000000000 - self.assertEqual(result.value, expected_utc) - self.assertEqual(tslib.pydt_to_i8(result), expected_utc) - - def test_constructor_with_stringoffset(self): - # GH 7833 - base_str = '2014-07-01 11:00:00+02:00' - base_dt = datetime.datetime(2014, 7, 1, 9) - base_expected = 1404205200000000000 - - # confirm base representation is correct - import calendar - self.assertEqual(calendar.timegm(base_dt.timetuple()) * 1000000000, - base_expected) - - tests = [(base_str, base_expected), - ('2014-07-01 12:00:00+02:00', - base_expected + 3600 * 1000000000), - ('2014-07-01 11:00:00.000008000+02:00', base_expected + 8000), - ('2014-07-01 11:00:00.000000005+02:00', base_expected + 5)] - - tm._skip_if_no_pytz() - tm._skip_if_no_dateutil() - import pytz - import dateutil - timezones = [(None, 0), ('UTC', 0), (pytz.utc, 0), ('Asia/Tokyo', 9), - ('US/Eastern', -4), ('dateutil/US/Pacific', -7), - (pytz.FixedOffset(-180), -3), - (dateutil.tz.tzoffset(None, 18000), 5)] - - for date_str, expected in tests: - for result in [Timestamp(date_str)]: - # only with timestring - self.assertEqual(result.value, expected) - self.assertEqual(tslib.pydt_to_i8(result), expected) - - # re-creation shouldn't affect to internal value - result = Timestamp(result) - self.assertEqual(result.value, expected) - self.assertEqual(tslib.pydt_to_i8(result), expected) - - # with timezone - for tz, offset in timezones: - result = Timestamp(date_str, tz=tz) - expected_tz = expected - self.assertEqual(result.value, expected_tz) - self.assertEqual(tslib.pydt_to_i8(result), expected_tz) - - # should preserve tz - result = Timestamp(result) - self.assertEqual(result.value, expected_tz) - self.assertEqual(tslib.pydt_to_i8(result), expected_tz) - - # should convert to UTC - result = Timestamp(result, tz='UTC') - expected_utc = expected - self.assertEqual(result.value, expected_utc) - self.assertEqual(tslib.pydt_to_i8(result), expected_utc) - - # This should be 2013-11-01 05:00 in UTC - # converted to Chicago tz - result = Timestamp('2013-11-01 00:00:00-0500', tz='America/Chicago') - self.assertEqual(result.value, Timestamp('2013-11-01 05:00').value) - expected = "Timestamp('2013-11-01 00:00:00-0500', tz='America/Chicago')" # noqa - self.assertEqual(repr(result), expected) - self.assertEqual(result, eval(repr(result))) - - # This should be 2013-11-01 05:00 in UTC - # converted to Tokyo tz (+09:00) - result = Timestamp('2013-11-01 00:00:00-0500', tz='Asia/Tokyo') - self.assertEqual(result.value, Timestamp('2013-11-01 05:00').value) - expected = "Timestamp('2013-11-01 14:00:00+0900', tz='Asia/Tokyo')" - self.assertEqual(repr(result), expected) - self.assertEqual(result, eval(repr(result))) - - # GH11708 - # This should be 2015-11-18 10:00 in UTC - # converted to Asia/Katmandu - result = Timestamp("2015-11-18 15:45:00+05:45", tz="Asia/Katmandu") - self.assertEqual(result.value, Timestamp("2015-11-18 10:00").value) - expected = "Timestamp('2015-11-18 15:45:00+0545', tz='Asia/Katmandu')" - self.assertEqual(repr(result), expected) - self.assertEqual(result, eval(repr(result))) - - # This should be 2015-11-18 10:00 in UTC - # converted to Asia/Kolkata - result = Timestamp("2015-11-18 15:30:00+05:30", tz="Asia/Kolkata") - self.assertEqual(result.value, Timestamp("2015-11-18 10:00").value) - expected = "Timestamp('2015-11-18 15:30:00+0530', tz='Asia/Kolkata')" - self.assertEqual(repr(result), expected) - self.assertEqual(result, eval(repr(result))) - - def test_constructor_invalid(self): - with tm.assertRaisesRegexp(TypeError, 'Cannot convert input'): - Timestamp(slice(2)) - with tm.assertRaisesRegexp(ValueError, 'Cannot convert Period'): - Timestamp(Period('1000-01-01')) - - def test_constructor_positional(self): - # GH 10758 - with tm.assertRaises(TypeError): - Timestamp(2000, 1) - with tm.assertRaises(ValueError): - Timestamp(2000, 0, 1) - with tm.assertRaises(ValueError): - Timestamp(2000, 13, 1) - with tm.assertRaises(ValueError): - Timestamp(2000, 1, 0) - with tm.assertRaises(ValueError): - Timestamp(2000, 1, 32) - - # GH 11630 - self.assertEqual( - repr(Timestamp(2015, 11, 12)), - repr(Timestamp('20151112'))) - - self.assertEqual( - repr(Timestamp(2015, 11, 12, 1, 2, 3, 999999)), - repr(Timestamp('2015-11-12 01:02:03.999999'))) - - self.assertIs(Timestamp(None), pd.NaT) - - def test_constructor_keyword(self): - # GH 10758 - with tm.assertRaises(TypeError): - Timestamp(year=2000, month=1) - with tm.assertRaises(ValueError): - Timestamp(year=2000, month=0, day=1) - with tm.assertRaises(ValueError): - Timestamp(year=2000, month=13, day=1) - with tm.assertRaises(ValueError): - Timestamp(year=2000, month=1, day=0) - with tm.assertRaises(ValueError): - Timestamp(year=2000, month=1, day=32) - - self.assertEqual( - repr(Timestamp(year=2015, month=11, day=12)), - repr(Timestamp('20151112'))) - - self.assertEqual( - repr(Timestamp(year=2015, month=11, day=12, - hour=1, minute=2, second=3, microsecond=999999)), - repr(Timestamp('2015-11-12 01:02:03.999999'))) - - def test_constructor_fromordinal(self): - base = datetime.datetime(2000, 1, 1) - - ts = Timestamp.fromordinal(base.toordinal(), freq='D') - self.assertEqual(base, ts) - self.assertEqual(ts.freq, 'D') - self.assertEqual(base.toordinal(), ts.toordinal()) - - ts = Timestamp.fromordinal(base.toordinal(), tz='US/Eastern') - self.assertEqual(pd.Timestamp('2000-01-01', tz='US/Eastern'), ts) - self.assertEqual(base.toordinal(), ts.toordinal()) - - def test_constructor_offset_depr(self): - # GH 12160 - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): - ts = Timestamp('2011-01-01', offset='D') - self.assertEqual(ts.freq, 'D') - - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): - self.assertEqual(ts.offset, 'D') - - msg = "Can only specify freq or offset, not both" - with tm.assertRaisesRegexp(TypeError, msg): - Timestamp('2011-01-01', offset='D', freq='D') - - def test_constructor_offset_depr_fromordinal(self): - # GH 12160 - base = datetime.datetime(2000, 1, 1) - - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): - ts = Timestamp.fromordinal(base.toordinal(), offset='D') - self.assertEqual(pd.Timestamp('2000-01-01'), ts) - self.assertEqual(ts.freq, 'D') - self.assertEqual(base.toordinal(), ts.toordinal()) - - msg = "Can only specify freq or offset, not both" - with tm.assertRaisesRegexp(TypeError, msg): - Timestamp.fromordinal(base.toordinal(), offset='D', freq='D') - - def test_conversion(self): - # GH 9255 - ts = Timestamp('2000-01-01') - - result = ts.to_pydatetime() - expected = datetime.datetime(2000, 1, 1) - self.assertEqual(result, expected) - self.assertEqual(type(result), type(expected)) - - result = ts.to_datetime64() - expected = np.datetime64(ts.value, 'ns') - self.assertEqual(result, expected) - self.assertEqual(type(result), type(expected)) - self.assertEqual(result.dtype, expected.dtype) - - def test_repr(self): - tm._skip_if_no_pytz() - tm._skip_if_no_dateutil() - - dates = ['2014-03-07', '2014-01-01 09:00', - '2014-01-01 00:00:00.000000001'] - - # dateutil zone change (only matters for repr) - import dateutil - if (dateutil.__version__ >= LooseVersion('2.3') and - (dateutil.__version__ <= LooseVersion('2.4.0') or - dateutil.__version__ >= LooseVersion('2.6.0'))): - timezones = ['UTC', 'Asia/Tokyo', 'US/Eastern', - 'dateutil/US/Pacific'] - else: - timezones = ['UTC', 'Asia/Tokyo', 'US/Eastern', - 'dateutil/America/Los_Angeles'] - - freqs = ['D', 'M', 'S', 'N'] - - for date in dates: - for tz in timezones: - for freq in freqs: - - # avoid to match with timezone name - freq_repr = "'{0}'".format(freq) - if tz.startswith('dateutil'): - tz_repr = tz.replace('dateutil', '') - else: - tz_repr = tz - - date_only = Timestamp(date) - self.assertIn(date, repr(date_only)) - self.assertNotIn(tz_repr, repr(date_only)) - self.assertNotIn(freq_repr, repr(date_only)) - self.assertEqual(date_only, eval(repr(date_only))) - - date_tz = Timestamp(date, tz=tz) - self.assertIn(date, repr(date_tz)) - self.assertIn(tz_repr, repr(date_tz)) - self.assertNotIn(freq_repr, repr(date_tz)) - self.assertEqual(date_tz, eval(repr(date_tz))) - - date_freq = Timestamp(date, freq=freq) - self.assertIn(date, repr(date_freq)) - self.assertNotIn(tz_repr, repr(date_freq)) - self.assertIn(freq_repr, repr(date_freq)) - self.assertEqual(date_freq, eval(repr(date_freq))) - - date_tz_freq = Timestamp(date, tz=tz, freq=freq) - self.assertIn(date, repr(date_tz_freq)) - self.assertIn(tz_repr, repr(date_tz_freq)) - self.assertIn(freq_repr, repr(date_tz_freq)) - self.assertEqual(date_tz_freq, eval(repr(date_tz_freq))) - - # this can cause the tz field to be populated, but it's redundant to - # information in the datestring - tm._skip_if_no_pytz() - import pytz # noqa - date_with_utc_offset = Timestamp('2014-03-13 00:00:00-0400', tz=None) - self.assertIn('2014-03-13 00:00:00-0400', repr(date_with_utc_offset)) - self.assertNotIn('tzoffset', repr(date_with_utc_offset)) - self.assertIn('pytz.FixedOffset(-240)', repr(date_with_utc_offset)) - expr = repr(date_with_utc_offset).replace("'pytz.FixedOffset(-240)'", - 'pytz.FixedOffset(-240)') - self.assertEqual(date_with_utc_offset, eval(expr)) - - def test_bounds_with_different_units(self): - out_of_bounds_dates = ('1677-09-21', '2262-04-12', ) - - time_units = ('D', 'h', 'm', 's', 'ms', 'us') - - for date_string in out_of_bounds_dates: - for unit in time_units: - self.assertRaises(ValueError, Timestamp, np.datetime64( - date_string, dtype='M8[%s]' % unit)) - - in_bounds_dates = ('1677-09-23', '2262-04-11', ) - - for date_string in in_bounds_dates: - for unit in time_units: - Timestamp(np.datetime64(date_string, dtype='M8[%s]' % unit)) - - def test_tz(self): - t = '2014-02-01 09:00' - ts = Timestamp(t) - local = ts.tz_localize('Asia/Tokyo') - self.assertEqual(local.hour, 9) - self.assertEqual(local, Timestamp(t, tz='Asia/Tokyo')) - conv = local.tz_convert('US/Eastern') - self.assertEqual(conv, Timestamp('2014-01-31 19:00', tz='US/Eastern')) - self.assertEqual(conv.hour, 19) - - # preserves nanosecond - ts = Timestamp(t) + offsets.Nano(5) - local = ts.tz_localize('Asia/Tokyo') - self.assertEqual(local.hour, 9) - self.assertEqual(local.nanosecond, 5) - conv = local.tz_convert('US/Eastern') - self.assertEqual(conv.nanosecond, 5) - self.assertEqual(conv.hour, 19) - - def test_tz_localize_ambiguous(self): - - ts = Timestamp('2014-11-02 01:00') - ts_dst = ts.tz_localize('US/Eastern', ambiguous=True) - ts_no_dst = ts.tz_localize('US/Eastern', ambiguous=False) - - rng = date_range('2014-11-02', periods=3, freq='H', tz='US/Eastern') - self.assertEqual(rng[1], ts_dst) - self.assertEqual(rng[2], ts_no_dst) - self.assertRaises(ValueError, ts.tz_localize, 'US/Eastern', - ambiguous='infer') - - # GH 8025 - with tm.assertRaisesRegexp(TypeError, - 'Cannot localize tz-aware Timestamp, use ' - 'tz_convert for conversions'): - Timestamp('2011-01-01', tz='US/Eastern').tz_localize('Asia/Tokyo') - - with tm.assertRaisesRegexp(TypeError, - 'Cannot convert tz-naive Timestamp, use ' - 'tz_localize to localize'): - Timestamp('2011-01-01').tz_convert('Asia/Tokyo') - - def test_tz_localize_nonexistent(self): - # See issue 13057 - from pytz.exceptions import NonExistentTimeError - times = ['2015-03-08 02:00', '2015-03-08 02:30', - '2015-03-29 02:00', '2015-03-29 02:30'] - timezones = ['US/Eastern', 'US/Pacific', - 'Europe/Paris', 'Europe/Belgrade'] - for t, tz in zip(times, timezones): - ts = Timestamp(t) - self.assertRaises(NonExistentTimeError, ts.tz_localize, - tz) - self.assertRaises(NonExistentTimeError, ts.tz_localize, - tz, errors='raise') - self.assertIs(ts.tz_localize(tz, errors='coerce'), - pd.NaT) - - def test_tz_localize_errors_ambiguous(self): - # See issue 13057 - from pytz.exceptions import AmbiguousTimeError - ts = pd.Timestamp('2015-11-1 01:00') - self.assertRaises(AmbiguousTimeError, - ts.tz_localize, 'US/Pacific', errors='coerce') - - def test_tz_localize_roundtrip(self): - for tz in ['UTC', 'Asia/Tokyo', 'US/Eastern', 'dateutil/US/Pacific']: - for t in ['2014-02-01 09:00', '2014-07-08 09:00', - '2014-11-01 17:00', '2014-11-05 00:00']: - ts = Timestamp(t) - localized = ts.tz_localize(tz) - self.assertEqual(localized, Timestamp(t, tz=tz)) - - with tm.assertRaises(TypeError): - localized.tz_localize(tz) - - reset = localized.tz_localize(None) - self.assertEqual(reset, ts) - self.assertTrue(reset.tzinfo is None) - - def test_tz_convert_roundtrip(self): - for tz in ['UTC', 'Asia/Tokyo', 'US/Eastern', 'dateutil/US/Pacific']: - for t in ['2014-02-01 09:00', '2014-07-08 09:00', - '2014-11-01 17:00', '2014-11-05 00:00']: - ts = Timestamp(t, tz='UTC') - converted = ts.tz_convert(tz) - - reset = converted.tz_convert(None) - self.assertEqual(reset, Timestamp(t)) - self.assertTrue(reset.tzinfo is None) - self.assertEqual(reset, - converted.tz_convert('UTC').tz_localize(None)) - - def test_barely_oob_dts(self): - one_us = np.timedelta64(1).astype('timedelta64[us]') - - # By definition we can't go out of bounds in [ns], so we - # convert the datetime64s to [us] so we can go out of bounds - min_ts_us = np.datetime64(Timestamp.min).astype('M8[us]') - max_ts_us = np.datetime64(Timestamp.max).astype('M8[us]') - - # No error for the min/max datetimes - Timestamp(min_ts_us) - Timestamp(max_ts_us) - - # One us less than the minimum is an error - self.assertRaises(ValueError, Timestamp, min_ts_us - one_us) - - # One us more than the maximum is an error - self.assertRaises(ValueError, Timestamp, max_ts_us + one_us) - - def test_utc_z_designator(self): - self.assertEqual(get_timezone( - Timestamp('2014-11-02 01:00Z').tzinfo), 'UTC') - - def test_now(self): - # #9000 - ts_from_string = Timestamp('now') - ts_from_method = Timestamp.now() - ts_datetime = datetime.datetime.now() - - ts_from_string_tz = Timestamp('now', tz='US/Eastern') - ts_from_method_tz = Timestamp.now(tz='US/Eastern') - - # Check that the delta between the times is less than 1s (arbitrarily - # small) - delta = Timedelta(seconds=1) - self.assertTrue(abs(ts_from_method - ts_from_string) < delta) - self.assertTrue(abs(ts_datetime - ts_from_method) < delta) - self.assertTrue(abs(ts_from_method_tz - ts_from_string_tz) < delta) - self.assertTrue(abs(ts_from_string_tz.tz_localize(None) - - ts_from_method_tz.tz_localize(None)) < delta) - - def test_today(self): - - ts_from_string = Timestamp('today') - ts_from_method = Timestamp.today() - ts_datetime = datetime.datetime.today() - - ts_from_string_tz = Timestamp('today', tz='US/Eastern') - ts_from_method_tz = Timestamp.today(tz='US/Eastern') - - # Check that the delta between the times is less than 1s (arbitrarily - # small) - delta = Timedelta(seconds=1) - self.assertTrue(abs(ts_from_method - ts_from_string) < delta) - self.assertTrue(abs(ts_datetime - ts_from_method) < delta) - self.assertTrue(abs(ts_from_method_tz - ts_from_string_tz) < delta) - self.assertTrue(abs(ts_from_string_tz.tz_localize(None) - - ts_from_method_tz.tz_localize(None)) < delta) - - def test_asm8(self): - np.random.seed(7960929) - ns = [Timestamp.min.value, Timestamp.max.value, 1000, ] - for n in ns: - self.assertEqual(Timestamp(n).asm8.view('i8'), - np.datetime64(n, 'ns').view('i8'), n) - self.assertEqual(Timestamp('nat').asm8.view('i8'), - np.datetime64('nat', 'ns').view('i8')) - - def test_fields(self): - def check(value, equal): - # that we are int/long like - self.assertTrue(isinstance(value, (int, compat.long))) - self.assertEqual(value, equal) - - # GH 10050 - ts = Timestamp('2015-05-10 09:06:03.000100001') - check(ts.year, 2015) - check(ts.month, 5) - check(ts.day, 10) - check(ts.hour, 9) - check(ts.minute, 6) - check(ts.second, 3) - self.assertRaises(AttributeError, lambda: ts.millisecond) - check(ts.microsecond, 100) - check(ts.nanosecond, 1) - check(ts.dayofweek, 6) - check(ts.quarter, 2) - check(ts.dayofyear, 130) - check(ts.week, 19) - check(ts.daysinmonth, 31) - check(ts.daysinmonth, 31) - - def test_nat_fields(self): - # GH 10050 - ts = Timestamp('NaT') - self.assertTrue(np.isnan(ts.year)) - self.assertTrue(np.isnan(ts.month)) - self.assertTrue(np.isnan(ts.day)) - self.assertTrue(np.isnan(ts.hour)) - self.assertTrue(np.isnan(ts.minute)) - self.assertTrue(np.isnan(ts.second)) - self.assertTrue(np.isnan(ts.microsecond)) - self.assertTrue(np.isnan(ts.nanosecond)) - self.assertTrue(np.isnan(ts.dayofweek)) - self.assertTrue(np.isnan(ts.quarter)) - self.assertTrue(np.isnan(ts.dayofyear)) - self.assertTrue(np.isnan(ts.week)) - self.assertTrue(np.isnan(ts.daysinmonth)) - self.assertTrue(np.isnan(ts.days_in_month)) - - def test_pprint(self): - # GH12622 - import pprint - nested_obj = {'foo': 1, - 'bar': [{'w': {'a': Timestamp('2011-01-01')}}] * 10} - result = pprint.pformat(nested_obj, width=50) - expected = r"""{'bar': [{'w': {'a': Timestamp('2011-01-01 00:00:00')}}, - {'w': {'a': Timestamp('2011-01-01 00:00:00')}}, - {'w': {'a': Timestamp('2011-01-01 00:00:00')}}, - {'w': {'a': Timestamp('2011-01-01 00:00:00')}}, - {'w': {'a': Timestamp('2011-01-01 00:00:00')}}, - {'w': {'a': Timestamp('2011-01-01 00:00:00')}}, - {'w': {'a': Timestamp('2011-01-01 00:00:00')}}, - {'w': {'a': Timestamp('2011-01-01 00:00:00')}}, - {'w': {'a': Timestamp('2011-01-01 00:00:00')}}, - {'w': {'a': Timestamp('2011-01-01 00:00:00')}}], - 'foo': 1}""" - self.assertEqual(result, expected) - - def to_datetime_depr(self): - # see gh-8254 - ts = Timestamp('2011-01-01') - - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): - expected = datetime.datetime(2011, 1, 1) - result = ts.to_datetime() - self.assertEqual(result, expected) - - def to_pydatetime_nonzero_nano(self): - ts = Timestamp('2011-01-01 9:00:00.123456789') - - # Warn the user of data loss (nanoseconds). - with tm.assert_produces_warning(UserWarning, - check_stacklevel=False): - expected = datetime.datetime(2011, 1, 1, 9, 0, 0, 123456) - result = ts.to_pydatetime() - self.assertEqual(result, expected) - - class TestDatetimeParsingWrappers(tm.TestCase): def test_does_not_convert_mixed_integer(self): bad_date_strings = ('-50000', '999', '123.1234', 'm', 'T') @@ -1117,181 +523,6 @@ def test_parsing_timezone_offsets(self): ) -class TestTimestampNsOperations(tm.TestCase): - def setUp(self): - self.timestamp = Timestamp(datetime.datetime.utcnow()) - - def assert_ns_timedelta(self, modified_timestamp, expected_value): - value = self.timestamp.value - modified_value = modified_timestamp.value - - self.assertEqual(modified_value - value, expected_value) - - def test_timedelta_ns_arithmetic(self): - self.assert_ns_timedelta(self.timestamp + np.timedelta64(-123, 'ns'), - -123) - - def test_timedelta_ns_based_arithmetic(self): - self.assert_ns_timedelta(self.timestamp + np.timedelta64( - 1234567898, 'ns'), 1234567898) - - def test_timedelta_us_arithmetic(self): - self.assert_ns_timedelta(self.timestamp + np.timedelta64(-123, 'us'), - -123000) - - def test_timedelta_ms_arithmetic(self): - time = self.timestamp + np.timedelta64(-123, 'ms') - self.assert_ns_timedelta(time, -123000000) - - def test_nanosecond_string_parsing(self): - ts = Timestamp('2013-05-01 07:15:45.123456789') - # GH 7878 - expected_repr = '2013-05-01 07:15:45.123456789' - expected_value = 1367392545123456789 - self.assertEqual(ts.value, expected_value) - self.assertIn(expected_repr, repr(ts)) - - ts = Timestamp('2013-05-01 07:15:45.123456789+09:00', tz='Asia/Tokyo') - self.assertEqual(ts.value, expected_value - 9 * 3600 * 1000000000) - self.assertIn(expected_repr, repr(ts)) - - ts = Timestamp('2013-05-01 07:15:45.123456789', tz='UTC') - self.assertEqual(ts.value, expected_value) - self.assertIn(expected_repr, repr(ts)) - - ts = Timestamp('2013-05-01 07:15:45.123456789', tz='US/Eastern') - self.assertEqual(ts.value, expected_value + 4 * 3600 * 1000000000) - self.assertIn(expected_repr, repr(ts)) - - # GH 10041 - ts = Timestamp('20130501T071545.123456789') - self.assertEqual(ts.value, expected_value) - self.assertIn(expected_repr, repr(ts)) - - def test_nanosecond_timestamp(self): - # GH 7610 - expected = 1293840000000000005 - t = Timestamp('2011-01-01') + offsets.Nano(5) - self.assertEqual(repr(t), "Timestamp('2011-01-01 00:00:00.000000005')") - self.assertEqual(t.value, expected) - self.assertEqual(t.nanosecond, 5) - - t = Timestamp(t) - self.assertEqual(repr(t), "Timestamp('2011-01-01 00:00:00.000000005')") - self.assertEqual(t.value, expected) - self.assertEqual(t.nanosecond, 5) - - t = Timestamp(np_datetime64_compat('2011-01-01 00:00:00.000000005Z')) - self.assertEqual(repr(t), "Timestamp('2011-01-01 00:00:00.000000005')") - self.assertEqual(t.value, expected) - self.assertEqual(t.nanosecond, 5) - - expected = 1293840000000000010 - t = t + offsets.Nano(5) - self.assertEqual(repr(t), "Timestamp('2011-01-01 00:00:00.000000010')") - self.assertEqual(t.value, expected) - self.assertEqual(t.nanosecond, 10) - - t = Timestamp(t) - self.assertEqual(repr(t), "Timestamp('2011-01-01 00:00:00.000000010')") - self.assertEqual(t.value, expected) - self.assertEqual(t.nanosecond, 10) - - t = Timestamp(np_datetime64_compat('2011-01-01 00:00:00.000000010Z')) - self.assertEqual(repr(t), "Timestamp('2011-01-01 00:00:00.000000010')") - self.assertEqual(t.value, expected) - self.assertEqual(t.nanosecond, 10) - - def test_nat_arithmetic(self): - # GH 6873 - i = 2 - f = 1.5 - - for (left, right) in [(pd.NaT, i), (pd.NaT, f), (pd.NaT, np.nan)]: - self.assertIs(left / right, pd.NaT) - self.assertIs(left * right, pd.NaT) - self.assertIs(right * left, pd.NaT) - with tm.assertRaises(TypeError): - right / left - - # Timestamp / datetime - t = Timestamp('2014-01-01') - dt = datetime.datetime(2014, 1, 1) - for (left, right) in [(pd.NaT, pd.NaT), (pd.NaT, t), (pd.NaT, dt)]: - # NaT __add__ or __sub__ Timestamp-like (or inverse) returns NaT - self.assertIs(right + left, pd.NaT) - self.assertIs(left + right, pd.NaT) - self.assertIs(left - right, pd.NaT) - self.assertIs(right - left, pd.NaT) - - # timedelta-like - # offsets are tested in test_offsets.py - - delta = datetime.timedelta(3600) - td = Timedelta('5s') - - for (left, right) in [(pd.NaT, delta), (pd.NaT, td)]: - # NaT + timedelta-like returns NaT - self.assertIs(right + left, pd.NaT) - self.assertIs(left + right, pd.NaT) - self.assertIs(right - left, pd.NaT) - self.assertIs(left - right, pd.NaT) - - # GH 11718 - tm._skip_if_no_pytz() - import pytz - - t_utc = Timestamp('2014-01-01', tz='UTC') - t_tz = Timestamp('2014-01-01', tz='US/Eastern') - dt_tz = pytz.timezone('Asia/Tokyo').localize(dt) - - for (left, right) in [(pd.NaT, t_utc), (pd.NaT, t_tz), - (pd.NaT, dt_tz)]: - # NaT __add__ or __sub__ Timestamp-like (or inverse) returns NaT - self.assertIs(right + left, pd.NaT) - self.assertIs(left + right, pd.NaT) - self.assertIs(left - right, pd.NaT) - self.assertIs(right - left, pd.NaT) - - # int addition / subtraction - for (left, right) in [(pd.NaT, 2), (pd.NaT, 0), (pd.NaT, -3)]: - self.assertIs(right + left, pd.NaT) - self.assertIs(left + right, pd.NaT) - self.assertIs(left - right, pd.NaT) - self.assertIs(right - left, pd.NaT) - - def test_nat_arithmetic_index(self): - # GH 11718 - - # datetime - tm._skip_if_no_pytz() - - dti = pd.DatetimeIndex(['2011-01-01', '2011-01-02'], name='x') - exp = pd.DatetimeIndex([pd.NaT, pd.NaT], name='x') - self.assert_index_equal(dti + pd.NaT, exp) - self.assert_index_equal(pd.NaT + dti, exp) - - dti_tz = pd.DatetimeIndex(['2011-01-01', '2011-01-02'], - tz='US/Eastern', name='x') - exp = pd.DatetimeIndex([pd.NaT, pd.NaT], name='x', tz='US/Eastern') - self.assert_index_equal(dti_tz + pd.NaT, exp) - self.assert_index_equal(pd.NaT + dti_tz, exp) - - exp = pd.TimedeltaIndex([pd.NaT, pd.NaT], name='x') - for (left, right) in [(pd.NaT, dti), (pd.NaT, dti_tz)]: - self.assert_index_equal(left - right, exp) - self.assert_index_equal(right - left, exp) - - # timedelta - tdi = pd.TimedeltaIndex(['1 day', '2 day'], name='x') - exp = pd.DatetimeIndex([pd.NaT, pd.NaT], name='x') - for (left, right) in [(pd.NaT, tdi)]: - self.assert_index_equal(left + right, exp) - self.assert_index_equal(right + left, exp) - self.assert_index_equal(left - right, exp) - self.assert_index_equal(right - left, exp) - - class TestTslib(tm.TestCase): def test_intraday_conversion_factors(self): self.assertEqual(period_asfreq( @@ -1461,86 +692,6 @@ def _check_round(freq, expected): stamp.round('foo') -class TestTimestampOps(tm.TestCase): - def test_timestamp_and_datetime(self): - self.assertEqual((Timestamp(datetime.datetime( - 2013, 10, 13)) - datetime.datetime(2013, 10, 12)).days, 1) - self.assertEqual((datetime.datetime(2013, 10, 12) - - Timestamp(datetime.datetime(2013, 10, 13))).days, -1) - - def test_timestamp_and_series(self): - timestamp_series = Series(date_range('2014-03-17', periods=2, freq='D', - tz='US/Eastern')) - first_timestamp = timestamp_series[0] - - delta_series = Series([np.timedelta64(0, 'D'), np.timedelta64(1, 'D')]) - assert_series_equal(timestamp_series - first_timestamp, delta_series) - assert_series_equal(first_timestamp - timestamp_series, -delta_series) - - def test_addition_subtraction_types(self): - # Assert on the types resulting from Timestamp +/- various date/time - # objects - datetime_instance = datetime.datetime(2014, 3, 4) - timedelta_instance = datetime.timedelta(seconds=1) - # build a timestamp with a frequency, since then it supports - # addition/subtraction of integers - timestamp_instance = date_range(datetime_instance, periods=1, - freq='D')[0] - - self.assertEqual(type(timestamp_instance + 1), Timestamp) - self.assertEqual(type(timestamp_instance - 1), Timestamp) - - # Timestamp + datetime not supported, though subtraction is supported - # and yields timedelta more tests in tseries/base/tests/test_base.py - self.assertEqual( - type(timestamp_instance - datetime_instance), Timedelta) - self.assertEqual( - type(timestamp_instance + timedelta_instance), Timestamp) - self.assertEqual( - type(timestamp_instance - timedelta_instance), Timestamp) - - # Timestamp +/- datetime64 not supported, so not tested (could possibly - # assert error raised?) - timedelta64_instance = np.timedelta64(1, 'D') - self.assertEqual( - type(timestamp_instance + timedelta64_instance), Timestamp) - self.assertEqual( - type(timestamp_instance - timedelta64_instance), Timestamp) - - def test_addition_subtraction_preserve_frequency(self): - timestamp_instance = date_range('2014-03-05', periods=1, freq='D')[0] - timedelta_instance = datetime.timedelta(days=1) - original_freq = timestamp_instance.freq - self.assertEqual((timestamp_instance + 1).freq, original_freq) - self.assertEqual((timestamp_instance - 1).freq, original_freq) - self.assertEqual( - (timestamp_instance + timedelta_instance).freq, original_freq) - self.assertEqual( - (timestamp_instance - timedelta_instance).freq, original_freq) - - timedelta64_instance = np.timedelta64(1, 'D') - self.assertEqual( - (timestamp_instance + timedelta64_instance).freq, original_freq) - self.assertEqual( - (timestamp_instance - timedelta64_instance).freq, original_freq) - - def test_resolution(self): - - for freq, expected in zip(['A', 'Q', 'M', 'D', 'H', 'T', - 'S', 'L', 'U'], - [RESO_DAY, RESO_DAY, - RESO_DAY, RESO_DAY, - RESO_HR, RESO_MIN, - RESO_SEC, RESO_MS, - RESO_US]): - for tz in [None, 'Asia/Tokyo', 'US/Eastern', - 'dateutil/US/Eastern']: - idx = date_range(start='2013-04-01', periods=30, freq=freq, - tz=tz) - result = period.resolution(idx.asi8, idx.tz) - self.assertEqual(result, expected) - - if __name__ == '__main__': nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], exit=False) From a7f7127eb5dc0db51475a5eeb68d45c7590b74b6 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sat, 4 Feb 2017 11:31:12 -0500 Subject: [PATCH 008/933] TST: making test files a bit more balanced TST: move parts of test_datetimelike.py to indexes/datetimes --- pandas/sparse/tests/test_frame.py | 44 ++ pandas/tests/frame/test_missing.py | 34 ++ pandas/tests/frame/test_timeseries.py | 58 ++- pandas/tests/indexes/datetimelike.py | 40 ++ pandas/tests/indexes/datetimes/test_astype.py | 62 +++ .../indexes/datetimes/test_construction.py | 17 + .../tests/indexes/datetimes/test_datetime.py | 8 + .../indexes/datetimes/test_datetimelike.py | 76 +++ pandas/tests/indexes/datetimes/test_misc.py | 62 ++- pandas/tests/indexes/test_datetimelike.py | 111 +--- pandas/tests/scalar/test_timestamp.py | 59 +++ pandas/tests/series/test_indexing.py | 129 +++++ pandas/tests/series/test_missing.py | 53 +- pandas/tests/series/test_timeseries.py | 489 ------------------ 14 files changed, 638 insertions(+), 604 deletions(-) create mode 100644 pandas/tests/indexes/datetimelike.py create mode 100644 pandas/tests/indexes/datetimes/test_datetimelike.py diff --git a/pandas/sparse/tests/test_frame.py b/pandas/sparse/tests/test_frame.py index b9e8a31393931..23bb827974c61 100644 --- a/pandas/sparse/tests/test_frame.py +++ b/pandas/sparse/tests/test_frame.py @@ -705,6 +705,50 @@ def test_fillna_fill_value(self): tm.assert_frame_equal(sparse.fillna(-1).to_dense(), df.fillna(-1), check_dtype=False) + def test_sparse_frame_pad_backfill_limit(self): + index = np.arange(10) + df = DataFrame(np.random.randn(10, 4), index=index) + sdf = df.to_sparse() + + result = sdf[:2].reindex(index, method='pad', limit=5) + + expected = sdf[:2].reindex(index).fillna(method='pad') + expected = expected.to_dense() + expected.values[-3:] = np.nan + expected = expected.to_sparse() + tm.assert_frame_equal(result, expected) + + result = sdf[-2:].reindex(index, method='backfill', limit=5) + + expected = sdf[-2:].reindex(index).fillna(method='backfill') + expected = expected.to_dense() + expected.values[:3] = np.nan + expected = expected.to_sparse() + tm.assert_frame_equal(result, expected) + + def test_sparse_frame_fillna_limit(self): + index = np.arange(10) + df = DataFrame(np.random.randn(10, 4), index=index) + sdf = df.to_sparse() + + result = sdf[:2].reindex(index) + result = result.fillna(method='pad', limit=5) + + expected = sdf[:2].reindex(index).fillna(method='pad') + expected = expected.to_dense() + expected.values[-3:] = np.nan + expected = expected.to_sparse() + tm.assert_frame_equal(result, expected) + + result = sdf[-2:].reindex(index) + result = result.fillna(method='backfill', limit=5) + + expected = sdf[-2:].reindex(index).fillna(method='backfill') + expected = expected.to_dense() + expected.values[:3] = np.nan + expected = expected.to_sparse() + tm.assert_frame_equal(result, expected) + def test_rename(self): # just check this works renamed = self.frame.rename(index=str) # noqa diff --git a/pandas/tests/frame/test_missing.py b/pandas/tests/frame/test_missing.py index c4f037e85edf6..a8c9c72956463 100644 --- a/pandas/tests/frame/test_missing.py +++ b/pandas/tests/frame/test_missing.py @@ -322,6 +322,40 @@ def test_bfill(self): assert_frame_equal(self.tsframe.bfill(), self.tsframe.fillna(method='bfill')) + def test_frame_pad_backfill_limit(self): + index = np.arange(10) + df = DataFrame(np.random.randn(10, 4), index=index) + + result = df[:2].reindex(index, method='pad', limit=5) + + expected = df[:2].reindex(index).fillna(method='pad') + expected.values[-3:] = np.nan + tm.assert_frame_equal(result, expected) + + result = df[-2:].reindex(index, method='backfill', limit=5) + + expected = df[-2:].reindex(index).fillna(method='backfill') + expected.values[:3] = np.nan + tm.assert_frame_equal(result, expected) + + def test_frame_fillna_limit(self): + index = np.arange(10) + df = DataFrame(np.random.randn(10, 4), index=index) + + result = df[:2].reindex(index) + result = result.fillna(method='pad', limit=5) + + expected = df[:2].reindex(index).fillna(method='pad') + expected.values[-3:] = np.nan + tm.assert_frame_equal(result, expected) + + result = df[-2:].reindex(index) + result = result.fillna(method='backfill', limit=5) + + expected = df[-2:].reindex(index).fillna(method='backfill') + expected.values[:3] = np.nan + tm.assert_frame_equal(result, expected) + def test_fillna_skip_certain_blocks(self): # don't try to fill boolean, int blocks diff --git a/pandas/tests/frame/test_timeseries.py b/pandas/tests/frame/test_timeseries.py index 85967e9eda0d6..934aafc500611 100644 --- a/pandas/tests/frame/test_timeseries.py +++ b/pandas/tests/frame/test_timeseries.py @@ -8,7 +8,9 @@ from numpy.random import randn import numpy as np -from pandas import DataFrame, Series, Index, Timestamp, DatetimeIndex +from pandas import (DataFrame, Series, Index, + Timestamp, DatetimeIndex, + to_datetime, date_range) import pandas as pd import pandas.tseries.offsets as offsets @@ -117,6 +119,60 @@ def test_pct_change_shift_over_nas(self): edf = DataFrame({'a': expected, 'b': expected}) assert_frame_equal(chg, edf) + def test_frame_ctor_datetime64_column(self): + rng = date_range('1/1/2000 00:00:00', '1/1/2000 1:59:50', freq='10s') + dates = np.asarray(rng) + + df = DataFrame({'A': np.random.randn(len(rng)), 'B': dates}) + self.assertTrue(np.issubdtype(df['B'].dtype, np.dtype('M8[ns]'))) + + def test_frame_add_datetime64_column(self): + rng = date_range('1/1/2000 00:00:00', '1/1/2000 1:59:50', freq='10s') + df = DataFrame(index=np.arange(len(rng))) + + df['A'] = rng + self.assertTrue(np.issubdtype(df['A'].dtype, np.dtype('M8[ns]'))) + + def test_frame_datetime64_pre1900_repr(self): + df = DataFrame({'year': date_range('1/1/1700', periods=50, + freq='A-DEC')}) + # it works! + repr(df) + + def test_frame_add_datetime64_col_other_units(self): + n = 100 + + units = ['h', 'm', 's', 'ms', 'D', 'M', 'Y'] + + ns_dtype = np.dtype('M8[ns]') + + for unit in units: + dtype = np.dtype('M8[%s]' % unit) + vals = np.arange(n, dtype=np.int64).view(dtype) + + df = DataFrame({'ints': np.arange(n)}, index=np.arange(n)) + df[unit] = vals + + ex_vals = to_datetime(vals.astype('O')).values + + self.assertEqual(df[unit].dtype, ns_dtype) + self.assertTrue((df[unit].values == ex_vals).all()) + + # Test insertion into existing datetime64 column + df = DataFrame({'ints': np.arange(n)}, index=np.arange(n)) + df['dates'] = np.arange(n, dtype=np.int64).view(ns_dtype) + + for unit in units: + dtype = np.dtype('M8[%s]' % unit) + vals = np.arange(n, dtype=np.int64).view(dtype) + + tmp = df.copy() + + tmp['dates'] = vals + ex_vals = to_datetime(vals.astype('O')).values + + self.assertTrue((tmp['dates'].values == ex_vals).all()) + def test_shift(self): # naive shift shiftedFrame = self.tsframe.shift(5) diff --git a/pandas/tests/indexes/datetimelike.py b/pandas/tests/indexes/datetimelike.py new file mode 100644 index 0000000000000..964511a2e9d5b --- /dev/null +++ b/pandas/tests/indexes/datetimelike.py @@ -0,0 +1,40 @@ +""" generic datetimelike tests """ + +from .common import Base +import pandas.util.testing as tm + + +class DatetimeLike(Base): + + def test_shift_identity(self): + + idx = self.create_index() + self.assert_index_equal(idx, idx.shift(0)) + + def test_str(self): + + # test the string repr + idx = self.create_index() + idx.name = 'foo' + self.assertFalse("length=%s" % len(idx) in str(idx)) + self.assertTrue("'foo'" in str(idx)) + self.assertTrue(idx.__class__.__name__ in str(idx)) + + if hasattr(idx, 'tz'): + if idx.tz is not None: + self.assertTrue(idx.tz in str(idx)) + if hasattr(idx, 'freq'): + self.assertTrue("freq='%s'" % idx.freqstr in str(idx)) + + def test_view(self): + super(DatetimeLike, self).test_view() + + i = self.create_index() + + i_view = i.view('i8') + result = self._holder(i) + tm.assert_index_equal(result, i) + + i_view = i.view(self._holder) + result = self._holder(i) + tm.assert_index_equal(result, i_view) diff --git a/pandas/tests/indexes/datetimes/test_astype.py b/pandas/tests/indexes/datetimes/test_astype.py index f64d18a69a093..d452a7e1840d7 100644 --- a/pandas/tests/indexes/datetimes/test_astype.py +++ b/pandas/tests/indexes/datetimes/test_astype.py @@ -1,5 +1,6 @@ import numpy as np +from datetime import datetime import pandas as pd import pandas.util.testing as tm from pandas import (DatetimeIndex, date_range, Series, NaT, Index, Timestamp, @@ -120,3 +121,64 @@ def test_astype_raises(self): self.assertRaises(ValueError, idx.astype, 'timedelta64[ns]') self.assertRaises(ValueError, idx.astype, 'datetime64') self.assertRaises(ValueError, idx.astype, 'datetime64[D]') + + def test_index_convert_to_datetime_array(self): + tm._skip_if_no_pytz() + + def _check_rng(rng): + converted = rng.to_pydatetime() + tm.assertIsInstance(converted, np.ndarray) + for x, stamp in zip(converted, rng): + tm.assertIsInstance(x, datetime) + self.assertEqual(x, stamp.to_pydatetime()) + self.assertEqual(x.tzinfo, stamp.tzinfo) + + rng = date_range('20090415', '20090519') + rng_eastern = date_range('20090415', '20090519', tz='US/Eastern') + rng_utc = date_range('20090415', '20090519', tz='utc') + + _check_rng(rng) + _check_rng(rng_eastern) + _check_rng(rng_utc) + + def test_index_convert_to_datetime_array_explicit_pytz(self): + tm._skip_if_no_pytz() + import pytz + + def _check_rng(rng): + converted = rng.to_pydatetime() + tm.assertIsInstance(converted, np.ndarray) + for x, stamp in zip(converted, rng): + tm.assertIsInstance(x, datetime) + self.assertEqual(x, stamp.to_pydatetime()) + self.assertEqual(x.tzinfo, stamp.tzinfo) + + rng = date_range('20090415', '20090519') + rng_eastern = date_range('20090415', '20090519', + tz=pytz.timezone('US/Eastern')) + rng_utc = date_range('20090415', '20090519', tz=pytz.utc) + + _check_rng(rng) + _check_rng(rng_eastern) + _check_rng(rng_utc) + + def test_index_convert_to_datetime_array_dateutil(self): + tm._skip_if_no_dateutil() + import dateutil + + def _check_rng(rng): + converted = rng.to_pydatetime() + tm.assertIsInstance(converted, np.ndarray) + for x, stamp in zip(converted, rng): + tm.assertIsInstance(x, datetime) + self.assertEqual(x, stamp.to_pydatetime()) + self.assertEqual(x.tzinfo, stamp.tzinfo) + + rng = date_range('20090415', '20090519') + rng_eastern = date_range('20090415', '20090519', + tz='dateutil/US/Eastern') + rng_utc = date_range('20090415', '20090519', tz=dateutil.tz.tzutc()) + + _check_rng(rng) + _check_rng(rng_eastern) + _check_rng(rng_utc) diff --git a/pandas/tests/indexes/datetimes/test_construction.py b/pandas/tests/indexes/datetimes/test_construction.py index f8eca0f0d91d0..03bc0e0c554b0 100644 --- a/pandas/tests/indexes/datetimes/test_construction.py +++ b/pandas/tests/indexes/datetimes/test_construction.py @@ -2,6 +2,7 @@ from datetime import timedelta import pandas as pd +from pandas import tslib import pandas.util.testing as tm from pandas.tslib import OutOfBoundsDatetime from pandas import (DatetimeIndex, Index, Timestamp, datetime, date_range, @@ -477,6 +478,22 @@ def test_dti_constructor_numpy_timeunits(self): tm.assert_index_equal(DatetimeIndex(values), base) tm.assert_index_equal(to_datetime(values), base) + def test_ctor_str_intraday(self): + rng = DatetimeIndex(['1-1-2000 00:00:01']) + self.assertEqual(rng[0].second, 1) + + def test_is_(self): + dti = DatetimeIndex(start='1/1/2005', end='12/1/2005', freq='M') + self.assertTrue(dti.is_(dti)) + self.assertTrue(dti.is_(dti.view())) + self.assertFalse(dti.is_(dti.copy())) + + def test_index_cast_datetime64_other_units(self): + arr = np.arange(0, 100, 10, dtype=np.int64).view('M8[D]') + idx = Index(arr) + + self.assertTrue((idx.values == tslib.cast_to_nanoseconds(arr)).all()) + def test_constructor_int64_nocopy(self): # #1624 arr = np.arange(1000, dtype=np.int64) diff --git a/pandas/tests/indexes/datetimes/test_datetime.py b/pandas/tests/indexes/datetimes/test_datetime.py index f92fca6ecfa14..628cb9df94e39 100644 --- a/pandas/tests/indexes/datetimes/test_datetime.py +++ b/pandas/tests/indexes/datetimes/test_datetime.py @@ -94,6 +94,14 @@ def test_get_indexer(self): with tm.assertRaises(ValueError): idx.get_indexer(idx[[0]], method='nearest', tolerance='foo') + def test_reasonable_keyerror(self): + # GH #1062 + index = DatetimeIndex(['1/3/2000']) + try: + index.get_loc('1/1/2000') + except KeyError as e: + self.assertIn('2000', str(e)) + def test_roundtrip_pickle_with_tz(self): # GH 8367 diff --git a/pandas/tests/indexes/datetimes/test_datetimelike.py b/pandas/tests/indexes/datetimes/test_datetimelike.py new file mode 100644 index 0000000000000..b32801a8bcf25 --- /dev/null +++ b/pandas/tests/indexes/datetimes/test_datetimelike.py @@ -0,0 +1,76 @@ +""" generic tests from the Datetimelike class """ + +import numpy as np +import pandas as pd +from pandas.util import testing as tm +from pandas import Series, Index, DatetimeIndex, date_range + +from ..datetimelike import DatetimeLike + +class TestDatetimeIndex(DatetimeLike, tm.TestCase): + _holder = DatetimeIndex + _multiprocess_can_split_ = True + + def setUp(self): + self.indices = dict(index=tm.makeDateIndex(10)) + self.setup_indices() + + def create_index(self): + return date_range('20130101', periods=5) + + def test_shift(self): + + # test shift for datetimeIndex and non datetimeIndex + # GH8083 + + drange = self.create_index() + result = drange.shift(1) + expected = DatetimeIndex(['2013-01-02', '2013-01-03', '2013-01-04', + '2013-01-05', + '2013-01-06'], freq='D') + self.assert_index_equal(result, expected) + + result = drange.shift(-1) + expected = DatetimeIndex(['2012-12-31', '2013-01-01', '2013-01-02', + '2013-01-03', '2013-01-04'], + freq='D') + self.assert_index_equal(result, expected) + + result = drange.shift(3, freq='2D') + expected = DatetimeIndex(['2013-01-07', '2013-01-08', '2013-01-09', + '2013-01-10', + '2013-01-11'], freq='D') + self.assert_index_equal(result, expected) + + def test_pickle_compat_construction(self): + pass + + def test_intersection(self): + first = self.index + second = self.index[5:] + intersect = first.intersection(second) + self.assertTrue(tm.equalContents(intersect, second)) + + # GH 10149 + cases = [klass(second.values) for klass in [np.array, Series, list]] + for case in cases: + result = first.intersection(case) + self.assertTrue(tm.equalContents(result, second)) + + third = Index(['a', 'b', 'c']) + result = first.intersection(third) + expected = pd.Index([], dtype=object) + self.assert_index_equal(result, expected) + + def test_union(self): + first = self.index[:5] + second = self.index[5:] + everything = self.index + union = first.union(second) + self.assertTrue(tm.equalContents(union, everything)) + + # GH 10149 + cases = [klass(second.values) for klass in [np.array, Series, list]] + for case in cases: + result = first.union(case) + self.assertTrue(tm.equalContents(result, everything)) diff --git a/pandas/tests/indexes/datetimes/test_misc.py b/pandas/tests/indexes/datetimes/test_misc.py index 4685df580190b..92aad5a0b1997 100644 --- a/pandas/tests/indexes/datetimes/test_misc.py +++ b/pandas/tests/indexes/datetimes/test_misc.py @@ -4,8 +4,9 @@ import pandas.lib as lib import pandas.util.testing as tm from pandas import (Index, DatetimeIndex, datetime, offsets, to_datetime, - Series, DataFrame, Float64Index, date_range, Timestamp) - + Series, DataFrame, Float64Index, date_range, + Timestamp, isnull) +from pandas import tslib from pandas.util.testing import assert_series_equal @@ -143,6 +144,63 @@ def test_datetimeindex_integers_shift(self): expected = rng.shift(-5) tm.assert_index_equal(result, expected) + def test_string_na_nat_conversion(self): + # GH #999, #858 + + from pandas.compat import parse_date + + strings = np.array(['1/1/2000', '1/2/2000', np.nan, + '1/4/2000, 12:34:56'], dtype=object) + + expected = np.empty(4, dtype='M8[ns]') + for i, val in enumerate(strings): + if isnull(val): + expected[i] = tslib.iNaT + else: + expected[i] = parse_date(val) + + result = tslib.array_to_datetime(strings) + tm.assert_almost_equal(result, expected) + + result2 = to_datetime(strings) + tm.assertIsInstance(result2, DatetimeIndex) + tm.assert_numpy_array_equal(result, result2.values) + + malformed = np.array(['1/100/2000', np.nan], dtype=object) + + # GH 10636, default is now 'raise' + self.assertRaises(ValueError, + lambda: to_datetime(malformed, errors='raise')) + + result = to_datetime(malformed, errors='ignore') + tm.assert_numpy_array_equal(result, malformed) + + self.assertRaises(ValueError, to_datetime, malformed, errors='raise') + + idx = ['a', 'b', 'c', 'd', 'e'] + series = Series(['1/1/2000', np.nan, '1/3/2000', np.nan, + '1/5/2000'], index=idx, name='foo') + dseries = Series([to_datetime('1/1/2000'), np.nan, + to_datetime('1/3/2000'), np.nan, + to_datetime('1/5/2000')], index=idx, name='foo') + + result = to_datetime(series) + dresult = to_datetime(dseries) + + expected = Series(np.empty(5, dtype='M8[ns]'), index=idx) + for i in range(5): + x = series[i] + if isnull(x): + expected[i] = tslib.iNaT + else: + expected[i] = to_datetime(x) + + assert_series_equal(result, expected, check_names=False) + self.assertEqual(result.name, 'foo') + + assert_series_equal(dresult, expected, check_names=False) + self.assertEqual(dresult.name, 'foo') + def test_datetimeindex_repr_short(self): dr = date_range(start='1/1/2012', periods=1) repr(dr) diff --git a/pandas/tests/indexes/test_datetimelike.py b/pandas/tests/indexes/test_datetimelike.py index 32e4029a57fe9..e5a4ced4ced4d 100644 --- a/pandas/tests/indexes/test_datetimelike.py +++ b/pandas/tests/indexes/test_datetimelike.py @@ -4,119 +4,14 @@ from datetime import timedelta import pandas as pd -import pandas.util.testing as tm +from pandas.util import testing as tm from pandas import (DatetimeIndex, Float64Index, Index, Int64Index, NaT, Period, PeriodIndex, Series, Timedelta, - TimedeltaIndex, date_range, period_range, + TimedeltaIndex, period_range, timedelta_range, notnull) -from .common import Base - - -class DatetimeLike(Base): - - def test_shift_identity(self): - - idx = self.create_index() - self.assert_index_equal(idx, idx.shift(0)) - - def test_str(self): - - # test the string repr - idx = self.create_index() - idx.name = 'foo' - self.assertFalse("length=%s" % len(idx) in str(idx)) - self.assertTrue("'foo'" in str(idx)) - self.assertTrue(idx.__class__.__name__ in str(idx)) - - if hasattr(idx, 'tz'): - if idx.tz is not None: - self.assertTrue(idx.tz in str(idx)) - if hasattr(idx, 'freq'): - self.assertTrue("freq='%s'" % idx.freqstr in str(idx)) - - def test_view(self): - super(DatetimeLike, self).test_view() - - i = self.create_index() - - i_view = i.view('i8') - result = self._holder(i) - tm.assert_index_equal(result, i) - - i_view = i.view(self._holder) - result = self._holder(i) - tm.assert_index_equal(result, i_view) - - -class TestDatetimeIndex(DatetimeLike, tm.TestCase): - _holder = DatetimeIndex - _multiprocess_can_split_ = True - - def setUp(self): - self.indices = dict(index=tm.makeDateIndex(10)) - self.setup_indices() - - def create_index(self): - return date_range('20130101', periods=5) - - def test_shift(self): - - # test shift for datetimeIndex and non datetimeIndex - # GH8083 - - drange = self.create_index() - result = drange.shift(1) - expected = DatetimeIndex(['2013-01-02', '2013-01-03', '2013-01-04', - '2013-01-05', - '2013-01-06'], freq='D') - self.assert_index_equal(result, expected) - - result = drange.shift(-1) - expected = DatetimeIndex(['2012-12-31', '2013-01-01', '2013-01-02', - '2013-01-03', '2013-01-04'], - freq='D') - self.assert_index_equal(result, expected) - - result = drange.shift(3, freq='2D') - expected = DatetimeIndex(['2013-01-07', '2013-01-08', '2013-01-09', - '2013-01-10', - '2013-01-11'], freq='D') - self.assert_index_equal(result, expected) - - def test_pickle_compat_construction(self): - pass - - def test_intersection(self): - first = self.index - second = self.index[5:] - intersect = first.intersection(second) - self.assertTrue(tm.equalContents(intersect, second)) - - # GH 10149 - cases = [klass(second.values) for klass in [np.array, Series, list]] - for case in cases: - result = first.intersection(case) - self.assertTrue(tm.equalContents(result, second)) - - third = Index(['a', 'b', 'c']) - result = first.intersection(third) - expected = pd.Index([], dtype=object) - self.assert_index_equal(result, expected) - - def test_union(self): - first = self.index[:5] - second = self.index[5:] - everything = self.index - union = first.union(second) - self.assertTrue(tm.equalContents(union, everything)) - - # GH 10149 - cases = [klass(second.values) for klass in [np.array, Series, list]] - for case in cases: - result = first.union(case) - self.assertTrue(tm.equalContents(result, everything)) +from .datetimelike import DatetimeLike class TestPeriodIndex(DatetimeLike, tm.TestCase): diff --git a/pandas/tests/scalar/test_timestamp.py b/pandas/tests/scalar/test_timestamp.py index 94369ebbd0a19..f686f1aa6dc47 100644 --- a/pandas/tests/scalar/test_timestamp.py +++ b/pandas/tests/scalar/test_timestamp.py @@ -566,6 +566,65 @@ def test_nat_fields(self): self.assertTrue(np.isnan(ts.daysinmonth)) self.assertTrue(np.isnan(ts.days_in_month)) + def test_nat_vector_field_access(self): + idx = DatetimeIndex(['1/1/2000', None, None, '1/4/2000']) + + fields = ['year', 'quarter', 'month', 'day', 'hour', 'minute', + 'second', 'microsecond', 'nanosecond', 'week', 'dayofyear', + 'days_in_month', 'is_leap_year'] + + for field in fields: + result = getattr(idx, field) + expected = [getattr(x, field) for x in idx] + self.assert_numpy_array_equal(result, np.array(expected)) + + s = pd.Series(idx) + + for field in fields: + result = getattr(s.dt, field) + expected = [getattr(x, field) for x in idx] + self.assert_series_equal(result, pd.Series(expected)) + + def test_nat_scalar_field_access(self): + fields = ['year', 'quarter', 'month', 'day', 'hour', 'minute', + 'second', 'microsecond', 'nanosecond', 'week', 'dayofyear', + 'days_in_month', 'daysinmonth', 'dayofweek', 'weekday_name'] + for field in fields: + result = getattr(NaT, field) + self.assertTrue(np.isnan(result)) + + def test_NaT_methods(self): + # GH 9513 + raise_methods = ['astimezone', 'combine', 'ctime', 'dst', + 'fromordinal', 'fromtimestamp', 'isocalendar', + 'strftime', 'strptime', 'time', 'timestamp', + 'timetuple', 'timetz', 'toordinal', 'tzname', + 'utcfromtimestamp', 'utcnow', 'utcoffset', + 'utctimetuple'] + nat_methods = ['date', 'now', 'replace', 'to_datetime', 'today'] + nan_methods = ['weekday', 'isoweekday'] + + for method in raise_methods: + if hasattr(NaT, method): + self.assertRaises(ValueError, getattr(NaT, method)) + + for method in nan_methods: + if hasattr(NaT, method): + self.assertTrue(np.isnan(getattr(NaT, method)())) + + for method in nat_methods: + if hasattr(NaT, method): + # see gh-8254 + exp_warning = None + if method == 'to_datetime': + exp_warning = FutureWarning + with tm.assert_produces_warning( + exp_warning, check_stacklevel=False): + self.assertIs(getattr(NaT, method)(), NaT) + + # GH 12300 + self.assertEqual(NaT.isoformat(), 'NaT') + def test_pprint(self): # GH12622 import pprint diff --git a/pandas/tests/series/test_indexing.py b/pandas/tests/series/test_indexing.py index e6209a853e958..d4b6e7dd5349f 100644 --- a/pandas/tests/series/test_indexing.py +++ b/pandas/tests/series/test_indexing.py @@ -346,6 +346,135 @@ def test_getitem_setitem_slice_integers(self): self.assertTrue((s[:4] == 0).all()) self.assertTrue(not (s[4:] == 0).any()) + def test_getitem_setitem_datetime_tz_pytz(self): + tm._skip_if_no_pytz() + from pytz import timezone as tz + + from pandas import date_range + + N = 50 + # testing with timezone, GH #2785 + rng = date_range('1/1/1990', periods=N, freq='H', tz='US/Eastern') + ts = Series(np.random.randn(N), index=rng) + + # also test Timestamp tz handling, GH #2789 + result = ts.copy() + result["1990-01-01 09:00:00+00:00"] = 0 + result["1990-01-01 09:00:00+00:00"] = ts[4] + assert_series_equal(result, ts) + + result = ts.copy() + result["1990-01-01 03:00:00-06:00"] = 0 + result["1990-01-01 03:00:00-06:00"] = ts[4] + assert_series_equal(result, ts) + + # repeat with datetimes + result = ts.copy() + result[datetime(1990, 1, 1, 9, tzinfo=tz('UTC'))] = 0 + result[datetime(1990, 1, 1, 9, tzinfo=tz('UTC'))] = ts[4] + assert_series_equal(result, ts) + + result = ts.copy() + + # comparison dates with datetime MUST be localized! + date = tz('US/Central').localize(datetime(1990, 1, 1, 3)) + result[date] = 0 + result[date] = ts[4] + assert_series_equal(result, ts) + + def test_getitem_setitem_datetime_tz_dateutil(self): + tm._skip_if_no_dateutil() + from dateutil.tz import tzutc + from pandas.tslib import _dateutil_gettz as gettz + + tz = lambda x: tzutc() if x == 'UTC' else gettz( + x) # handle special case for utc in dateutil + + from pandas import date_range + + N = 50 + + # testing with timezone, GH #2785 + rng = date_range('1/1/1990', periods=N, freq='H', + tz='America/New_York') + ts = Series(np.random.randn(N), index=rng) + + # also test Timestamp tz handling, GH #2789 + result = ts.copy() + result["1990-01-01 09:00:00+00:00"] = 0 + result["1990-01-01 09:00:00+00:00"] = ts[4] + assert_series_equal(result, ts) + + result = ts.copy() + result["1990-01-01 03:00:00-06:00"] = 0 + result["1990-01-01 03:00:00-06:00"] = ts[4] + assert_series_equal(result, ts) + + # repeat with datetimes + result = ts.copy() + result[datetime(1990, 1, 1, 9, tzinfo=tz('UTC'))] = 0 + result[datetime(1990, 1, 1, 9, tzinfo=tz('UTC'))] = ts[4] + assert_series_equal(result, ts) + + result = ts.copy() + result[datetime(1990, 1, 1, 3, tzinfo=tz('America/Chicago'))] = 0 + result[datetime(1990, 1, 1, 3, tzinfo=tz('America/Chicago'))] = ts[4] + assert_series_equal(result, ts) + + def test_getitem_setitem_periodindex(self): + from pandas import period_range + + N = 50 + rng = period_range('1/1/1990', periods=N, freq='H') + ts = Series(np.random.randn(N), index=rng) + + result = ts["1990-01-01 04"] + expected = ts[4] + self.assertEqual(result, expected) + + result = ts.copy() + result["1990-01-01 04"] = 0 + result["1990-01-01 04"] = ts[4] + assert_series_equal(result, ts) + + result = ts["1990-01-01 04":"1990-01-01 07"] + expected = ts[4:8] + assert_series_equal(result, expected) + + result = ts.copy() + result["1990-01-01 04":"1990-01-01 07"] = 0 + result["1990-01-01 04":"1990-01-01 07"] = ts[4:8] + assert_series_equal(result, ts) + + lb = "1990-01-01 04" + rb = "1990-01-01 07" + result = ts[(ts.index >= lb) & (ts.index <= rb)] + expected = ts[4:8] + assert_series_equal(result, expected) + + # GH 2782 + result = ts[ts.index[4]] + expected = ts[4] + self.assertEqual(result, expected) + + result = ts[ts.index[4:8]] + expected = ts[4:8] + assert_series_equal(result, expected) + + result = ts.copy() + result[ts.index[4:8]] = 0 + result[4:8] = ts[4:8] + assert_series_equal(result, ts) + + def test_getitem_median_slice_bug(self): + index = date_range('20090415', '20090519', freq='2B') + s = Series(np.random.randn(13), index=index) + + indexer = [slice(6, 7, None)] + result = s[indexer] + expected = s[indexer[0]] + assert_series_equal(result, expected) + def test_getitem_out_of_bounds(self): # don't segfault, GH #495 self.assertRaises(IndexError, self.ts.__getitem__, len(self.ts)) diff --git a/pandas/tests/series/test_missing.py b/pandas/tests/series/test_missing.py index 91da36161e188..8cf0d190a95cc 100644 --- a/pandas/tests/series/test_missing.py +++ b/pandas/tests/series/test_missing.py @@ -8,11 +8,11 @@ import numpy as np import pandas as pd -from pandas import (Series, isnull, date_range, - MultiIndex, Index) -from pandas.tseries.index import Timestamp +from pandas import (Series, DataFrame, isnull, date_range, + MultiIndex, Index, Timestamp) from pandas.compat import range -from pandas.util.testing import assert_series_equal +from pandas import tslib +from pandas.util.testing import assert_series_equal, assert_frame_equal import pandas.util.testing as tm from .common import TestData @@ -283,6 +283,43 @@ def test_fillna_raise(self): self.assertRaises(TypeError, s.fillna, [1, 2]) self.assertRaises(TypeError, s.fillna, (1, 2)) + def test_fillna_nat(self): + series = Series([0, 1, 2, tslib.iNaT], dtype='M8[ns]') + + filled = series.fillna(method='pad') + filled2 = series.fillna(value=series.values[2]) + + expected = series.copy() + expected.values[3] = expected.values[2] + + assert_series_equal(filled, expected) + assert_series_equal(filled2, expected) + + df = DataFrame({'A': series}) + filled = df.fillna(method='pad') + filled2 = df.fillna(value=series.values[2]) + expected = DataFrame({'A': expected}) + assert_frame_equal(filled, expected) + assert_frame_equal(filled2, expected) + + series = Series([tslib.iNaT, 0, 1, 2], dtype='M8[ns]') + + filled = series.fillna(method='bfill') + filled2 = series.fillna(value=series[1]) + + expected = series.copy() + expected[0] = expected[1] + + assert_series_equal(filled, expected) + assert_series_equal(filled2, expected) + + df = DataFrame({'A': series}) + filled = df.fillna(method='bfill') + filled2 = df.fillna(value=series[1]) + expected = DataFrame({'A': expected}) + assert_frame_equal(filled, expected) + assert_frame_equal(filled2, expected) + def test_isnull_for_inf(self): s = Series(['a', np.inf, np.nan, 1.0]) with pd.option_context('mode.use_inf_as_null', True): @@ -518,6 +555,14 @@ def test_pad_nan(self): assert_series_equal(x[1:], expected[1:]) self.assertTrue(np.isnan(x[0]), np.isnan(expected[0])) + def test_pad_require_monotonicity(self): + rng = date_range('1/1/2000', '3/1/2000', freq='B') + + # neither monotonic increasing or decreasing + rng2 = rng[[1, 0, 2]] + + self.assertRaises(ValueError, rng2.get_indexer, rng, method='pad') + def test_dropna_preserve_name(self): self.ts[:5] = np.nan result = self.ts.dropna() diff --git a/pandas/tests/series/test_timeseries.py b/pandas/tests/series/test_timeseries.py index 073b8bfeee131..571a802e37211 100644 --- a/pandas/tests/series/test_timeseries.py +++ b/pandas/tests/series/test_timeseries.py @@ -230,126 +230,6 @@ def test_truncate(self): before=self.ts.index[-1] + offset, after=self.ts.index[0] - offset) - def test_getitem_setitem_datetime_tz_pytz(self): - tm._skip_if_no_pytz() - from pytz import timezone as tz - - from pandas import date_range - - N = 50 - # testing with timezone, GH #2785 - rng = date_range('1/1/1990', periods=N, freq='H', tz='US/Eastern') - ts = Series(np.random.randn(N), index=rng) - - # also test Timestamp tz handling, GH #2789 - result = ts.copy() - result["1990-01-01 09:00:00+00:00"] = 0 - result["1990-01-01 09:00:00+00:00"] = ts[4] - assert_series_equal(result, ts) - - result = ts.copy() - result["1990-01-01 03:00:00-06:00"] = 0 - result["1990-01-01 03:00:00-06:00"] = ts[4] - assert_series_equal(result, ts) - - # repeat with datetimes - result = ts.copy() - result[datetime(1990, 1, 1, 9, tzinfo=tz('UTC'))] = 0 - result[datetime(1990, 1, 1, 9, tzinfo=tz('UTC'))] = ts[4] - assert_series_equal(result, ts) - - result = ts.copy() - - # comparison dates with datetime MUST be localized! - date = tz('US/Central').localize(datetime(1990, 1, 1, 3)) - result[date] = 0 - result[date] = ts[4] - assert_series_equal(result, ts) - - def test_getitem_setitem_datetime_tz_dateutil(self): - tm._skip_if_no_dateutil() - from dateutil.tz import tzutc - from pandas.tslib import _dateutil_gettz as gettz - - tz = lambda x: tzutc() if x == 'UTC' else gettz( - x) # handle special case for utc in dateutil - - from pandas import date_range - - N = 50 - - # testing with timezone, GH #2785 - rng = date_range('1/1/1990', periods=N, freq='H', - tz='America/New_York') - ts = Series(np.random.randn(N), index=rng) - - # also test Timestamp tz handling, GH #2789 - result = ts.copy() - result["1990-01-01 09:00:00+00:00"] = 0 - result["1990-01-01 09:00:00+00:00"] = ts[4] - assert_series_equal(result, ts) - - result = ts.copy() - result["1990-01-01 03:00:00-06:00"] = 0 - result["1990-01-01 03:00:00-06:00"] = ts[4] - assert_series_equal(result, ts) - - # repeat with datetimes - result = ts.copy() - result[datetime(1990, 1, 1, 9, tzinfo=tz('UTC'))] = 0 - result[datetime(1990, 1, 1, 9, tzinfo=tz('UTC'))] = ts[4] - assert_series_equal(result, ts) - - result = ts.copy() - result[datetime(1990, 1, 1, 3, tzinfo=tz('America/Chicago'))] = 0 - result[datetime(1990, 1, 1, 3, tzinfo=tz('America/Chicago'))] = ts[4] - assert_series_equal(result, ts) - - def test_getitem_setitem_periodindex(self): - from pandas import period_range - - N = 50 - rng = period_range('1/1/1990', periods=N, freq='H') - ts = Series(np.random.randn(N), index=rng) - - result = ts["1990-01-01 04"] - expected = ts[4] - self.assertEqual(result, expected) - - result = ts.copy() - result["1990-01-01 04"] = 0 - result["1990-01-01 04"] = ts[4] - assert_series_equal(result, ts) - - result = ts["1990-01-01 04":"1990-01-01 07"] - expected = ts[4:8] - assert_series_equal(result, expected) - - result = ts.copy() - result["1990-01-01 04":"1990-01-01 07"] = 0 - result["1990-01-01 04":"1990-01-01 07"] = ts[4:8] - assert_series_equal(result, ts) - - lb = "1990-01-01 04" - rb = "1990-01-01 07" - result = ts[(ts.index >= lb) & (ts.index <= rb)] - expected = ts[4:8] - assert_series_equal(result, expected) - - # GH 2782 - result = ts[ts.index[4]] - expected = ts[4] - self.assertEqual(result, expected) - - result = ts[ts.index[4:8]] - expected = ts[4:8] - assert_series_equal(result, expected) - - result = ts.copy() - result[ts.index[4:8]] = 0 - result[4:8] = ts[4:8] - assert_series_equal(result, ts) - def test_asfreq(self): ts = Series([0., 1., 2.], index=[datetime(2009, 10, 30), datetime( 2009, 11, 30), datetime(2009, 12, 31)]) @@ -513,12 +393,6 @@ def test_empty_series_ops(self): assert_series_equal(a, b + a) self.assertRaises(TypeError, lambda x, y: x - y, b, a) - def test_is_(self): - dti = DatetimeIndex(start='1/1/2005', end='12/1/2005', freq='M') - self.assertTrue(dti.is_(dti)) - self.assertTrue(dti.is_(dti.view())) - self.assertFalse(dti.is_(dti.copy())) - def test_contiguous_boolean_preserve_freq(self): rng = date_range('1/1/2000', '3/1/2000', freq='B') @@ -534,159 +408,6 @@ def test_contiguous_boolean_preserve_freq(self): masked = rng[mask] self.assertIsNone(masked.freq) - def test_getitem_median_slice_bug(self): - index = date_range('20090415', '20090519', freq='2B') - s = Series(np.random.randn(13), index=index) - - indexer = [slice(6, 7, None)] - result = s[indexer] - expected = s[indexer[0]] - assert_series_equal(result, expected) - - def test_ctor_str_intraday(self): - rng = DatetimeIndex(['1-1-2000 00:00:01']) - self.assertEqual(rng[0].second, 1) - - def test_frame_pad_backfill_limit(self): - index = np.arange(10) - df = DataFrame(np.random.randn(10, 4), index=index) - - result = df[:2].reindex(index, method='pad', limit=5) - - expected = df[:2].reindex(index).fillna(method='pad') - expected.values[-3:] = np.nan - tm.assert_frame_equal(result, expected) - - result = df[-2:].reindex(index, method='backfill', limit=5) - - expected = df[-2:].reindex(index).fillna(method='backfill') - expected.values[:3] = np.nan - tm.assert_frame_equal(result, expected) - - def test_frame_fillna_limit(self): - index = np.arange(10) - df = DataFrame(np.random.randn(10, 4), index=index) - - result = df[:2].reindex(index) - result = result.fillna(method='pad', limit=5) - - expected = df[:2].reindex(index).fillna(method='pad') - expected.values[-3:] = np.nan - tm.assert_frame_equal(result, expected) - - result = df[-2:].reindex(index) - result = result.fillna(method='backfill', limit=5) - - expected = df[-2:].reindex(index).fillna(method='backfill') - expected.values[:3] = np.nan - tm.assert_frame_equal(result, expected) - - def test_sparse_frame_pad_backfill_limit(self): - index = np.arange(10) - df = DataFrame(np.random.randn(10, 4), index=index) - sdf = df.to_sparse() - - result = sdf[:2].reindex(index, method='pad', limit=5) - - expected = sdf[:2].reindex(index).fillna(method='pad') - expected = expected.to_dense() - expected.values[-3:] = np.nan - expected = expected.to_sparse() - tm.assert_frame_equal(result, expected) - - result = sdf[-2:].reindex(index, method='backfill', limit=5) - - expected = sdf[-2:].reindex(index).fillna(method='backfill') - expected = expected.to_dense() - expected.values[:3] = np.nan - expected = expected.to_sparse() - tm.assert_frame_equal(result, expected) - - def test_sparse_frame_fillna_limit(self): - index = np.arange(10) - df = DataFrame(np.random.randn(10, 4), index=index) - sdf = df.to_sparse() - - result = sdf[:2].reindex(index) - result = result.fillna(method='pad', limit=5) - - expected = sdf[:2].reindex(index).fillna(method='pad') - expected = expected.to_dense() - expected.values[-3:] = np.nan - expected = expected.to_sparse() - tm.assert_frame_equal(result, expected) - - result = sdf[-2:].reindex(index) - result = result.fillna(method='backfill', limit=5) - - expected = sdf[-2:].reindex(index).fillna(method='backfill') - expected = expected.to_dense() - expected.values[:3] = np.nan - expected = expected.to_sparse() - tm.assert_frame_equal(result, expected) - - def test_pad_require_monotonicity(self): - rng = date_range('1/1/2000', '3/1/2000', freq='B') - - # neither monotonic increasing or decreasing - rng2 = rng[[1, 0, 2]] - - self.assertRaises(ValueError, rng2.get_indexer, rng, method='pad') - - def test_frame_ctor_datetime64_column(self): - rng = date_range('1/1/2000 00:00:00', '1/1/2000 1:59:50', freq='10s') - dates = np.asarray(rng) - - df = DataFrame({'A': np.random.randn(len(rng)), 'B': dates}) - self.assertTrue(np.issubdtype(df['B'].dtype, np.dtype('M8[ns]'))) - - def test_frame_add_datetime64_column(self): - rng = date_range('1/1/2000 00:00:00', '1/1/2000 1:59:50', freq='10s') - df = DataFrame(index=np.arange(len(rng))) - - df['A'] = rng - self.assertTrue(np.issubdtype(df['A'].dtype, np.dtype('M8[ns]'))) - - def test_frame_datetime64_pre1900_repr(self): - df = DataFrame({'year': date_range('1/1/1700', periods=50, - freq='A-DEC')}) - # it works! - repr(df) - - def test_frame_add_datetime64_col_other_units(self): - n = 100 - - units = ['h', 'm', 's', 'ms', 'D', 'M', 'Y'] - - ns_dtype = np.dtype('M8[ns]') - - for unit in units: - dtype = np.dtype('M8[%s]' % unit) - vals = np.arange(n, dtype=np.int64).view(dtype) - - df = DataFrame({'ints': np.arange(n)}, index=np.arange(n)) - df[unit] = vals - - ex_vals = to_datetime(vals.astype('O')).values - - self.assertEqual(df[unit].dtype, ns_dtype) - self.assertTrue((df[unit].values == ex_vals).all()) - - # Test insertion into existing datetime64 column - df = DataFrame({'ints': np.arange(n)}, index=np.arange(n)) - df['dates'] = np.arange(n, dtype=np.int64).view(ns_dtype) - - for unit in units: - dtype = np.dtype('M8[%s]' % unit) - vals = np.arange(n, dtype=np.int64).view(dtype) - - tmp = df.copy() - - tmp['dates'] = vals - ex_vals = to_datetime(vals.astype('O')).values - - self.assertTrue((tmp['dates'].values == ex_vals).all()) - def test_to_datetime_unit(self): epoch = 1370745748 @@ -756,13 +477,6 @@ def test_series_ctor_datetime64(self): series = Series(dates) self.assertTrue(np.issubdtype(series.dtype, np.dtype('M8[ns]'))) - def test_index_cast_datetime64_other_units(self): - arr = np.arange(0, 100, 10, dtype=np.int64).view('M8[D]') - - idx = Index(arr) - - self.assertTrue((idx.values == tslib.cast_to_nanoseconds(arr)).all()) - def test_reindex_series_add_nat(self): rng = date_range('1/1/2000 00:00:00', periods=10, freq='10s') series = Series(rng) @@ -796,159 +510,6 @@ def test_series_repr_nat(self): 'dtype: datetime64[ns]') self.assertEqual(result, expected) - def test_fillna_nat(self): - series = Series([0, 1, 2, iNaT], dtype='M8[ns]') - - filled = series.fillna(method='pad') - filled2 = series.fillna(value=series.values[2]) - - expected = series.copy() - expected.values[3] = expected.values[2] - - assert_series_equal(filled, expected) - assert_series_equal(filled2, expected) - - df = DataFrame({'A': series}) - filled = df.fillna(method='pad') - filled2 = df.fillna(value=series.values[2]) - expected = DataFrame({'A': expected}) - assert_frame_equal(filled, expected) - assert_frame_equal(filled2, expected) - - series = Series([iNaT, 0, 1, 2], dtype='M8[ns]') - - filled = series.fillna(method='bfill') - filled2 = series.fillna(value=series[1]) - - expected = series.copy() - expected[0] = expected[1] - - assert_series_equal(filled, expected) - assert_series_equal(filled2, expected) - - df = DataFrame({'A': series}) - filled = df.fillna(method='bfill') - filled2 = df.fillna(value=series[1]) - expected = DataFrame({'A': expected}) - assert_frame_equal(filled, expected) - assert_frame_equal(filled2, expected) - - def test_string_na_nat_conversion(self): - # GH #999, #858 - - from pandas.compat import parse_date - - strings = np.array(['1/1/2000', '1/2/2000', np.nan, - '1/4/2000, 12:34:56'], dtype=object) - - expected = np.empty(4, dtype='M8[ns]') - for i, val in enumerate(strings): - if com.isnull(val): - expected[i] = iNaT - else: - expected[i] = parse_date(val) - - result = tslib.array_to_datetime(strings) - assert_almost_equal(result, expected) - - result2 = to_datetime(strings) - tm.assertIsInstance(result2, DatetimeIndex) - tm.assert_numpy_array_equal(result, result2.values) - - malformed = np.array(['1/100/2000', np.nan], dtype=object) - - # GH 10636, default is now 'raise' - self.assertRaises(ValueError, - lambda: to_datetime(malformed, errors='raise')) - - result = to_datetime(malformed, errors='ignore') - tm.assert_numpy_array_equal(result, malformed) - - self.assertRaises(ValueError, to_datetime, malformed, errors='raise') - - idx = ['a', 'b', 'c', 'd', 'e'] - series = Series(['1/1/2000', np.nan, '1/3/2000', np.nan, - '1/5/2000'], index=idx, name='foo') - dseries = Series([to_datetime('1/1/2000'), np.nan, - to_datetime('1/3/2000'), np.nan, - to_datetime('1/5/2000')], index=idx, name='foo') - - result = to_datetime(series) - dresult = to_datetime(dseries) - - expected = Series(np.empty(5, dtype='M8[ns]'), index=idx) - for i in range(5): - x = series[i] - if isnull(x): - expected[i] = iNaT - else: - expected[i] = to_datetime(x) - - assert_series_equal(result, expected, check_names=False) - self.assertEqual(result.name, 'foo') - - assert_series_equal(dresult, expected, check_names=False) - self.assertEqual(dresult.name, 'foo') - - def test_nat_vector_field_access(self): - idx = DatetimeIndex(['1/1/2000', None, None, '1/4/2000']) - - fields = ['year', 'quarter', 'month', 'day', 'hour', 'minute', - 'second', 'microsecond', 'nanosecond', 'week', 'dayofyear', - 'days_in_month', 'is_leap_year'] - - for field in fields: - result = getattr(idx, field) - expected = [getattr(x, field) for x in idx] - self.assert_numpy_array_equal(result, np.array(expected)) - - s = pd.Series(idx) - - for field in fields: - result = getattr(s.dt, field) - expected = [getattr(x, field) for x in idx] - self.assert_series_equal(result, pd.Series(expected)) - - def test_nat_scalar_field_access(self): - fields = ['year', 'quarter', 'month', 'day', 'hour', 'minute', - 'second', 'microsecond', 'nanosecond', 'week', 'dayofyear', - 'days_in_month', 'daysinmonth', 'dayofweek', 'weekday_name'] - for field in fields: - result = getattr(NaT, field) - self.assertTrue(np.isnan(result)) - - def test_NaT_methods(self): - # GH 9513 - raise_methods = ['astimezone', 'combine', 'ctime', 'dst', - 'fromordinal', 'fromtimestamp', 'isocalendar', - 'strftime', 'strptime', 'time', 'timestamp', - 'timetuple', 'timetz', 'toordinal', 'tzname', - 'utcfromtimestamp', 'utcnow', 'utcoffset', - 'utctimetuple'] - nat_methods = ['date', 'now', 'replace', 'to_datetime', 'today'] - nan_methods = ['weekday', 'isoweekday'] - - for method in raise_methods: - if hasattr(NaT, method): - self.assertRaises(ValueError, getattr(NaT, method)) - - for method in nan_methods: - if hasattr(NaT, method): - self.assertTrue(np.isnan(getattr(NaT, method)())) - - for method in nat_methods: - if hasattr(NaT, method): - # see gh-8254 - exp_warning = None - if method == 'to_datetime': - exp_warning = FutureWarning - with tm.assert_produces_warning( - exp_warning, check_stacklevel=False): - self.assertIs(getattr(NaT, method)(), NaT) - - # GH 12300 - self.assertEqual(NaT.isoformat(), 'NaT') - def test_index_convert_to_datetime_array(self): tm._skip_if_no_pytz() @@ -968,56 +529,6 @@ def _check_rng(rng): _check_rng(rng_eastern) _check_rng(rng_utc) - def test_index_convert_to_datetime_array_explicit_pytz(self): - tm._skip_if_no_pytz() - import pytz - - def _check_rng(rng): - converted = rng.to_pydatetime() - tm.assertIsInstance(converted, np.ndarray) - for x, stamp in zip(converted, rng): - tm.assertIsInstance(x, datetime) - self.assertEqual(x, stamp.to_pydatetime()) - self.assertEqual(x.tzinfo, stamp.tzinfo) - - rng = date_range('20090415', '20090519') - rng_eastern = date_range('20090415', '20090519', - tz=pytz.timezone('US/Eastern')) - rng_utc = date_range('20090415', '20090519', tz=pytz.utc) - - _check_rng(rng) - _check_rng(rng_eastern) - _check_rng(rng_utc) - - def test_index_convert_to_datetime_array_dateutil(self): - tm._skip_if_no_dateutil() - import dateutil - - def _check_rng(rng): - converted = rng.to_pydatetime() - tm.assertIsInstance(converted, np.ndarray) - for x, stamp in zip(converted, rng): - tm.assertIsInstance(x, datetime) - self.assertEqual(x, stamp.to_pydatetime()) - self.assertEqual(x.tzinfo, stamp.tzinfo) - - rng = date_range('20090415', '20090519') - rng_eastern = date_range('20090415', '20090519', - tz='dateutil/US/Eastern') - rng_utc = date_range('20090415', '20090519', tz=dateutil.tz.tzutc()) - - _check_rng(rng) - _check_rng(rng_eastern) - _check_rng(rng_utc) - - def test_reasonable_keyerror(self): - # GH #1062 - index = DatetimeIndex(['1/3/2000']) - try: - index.get_loc('1/1/2000') - except KeyError as e: - self.assertIn('2000', str(e)) - def test_reindex_with_datetimes(self): rng = date_range('1/1/2000', periods=20) ts = Series(np.random.randn(20), index=rng) From 72992df66854465a15f18f7d6445ae5e1a3e0c3d Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sat, 4 Feb 2017 12:21:28 -0500 Subject: [PATCH 009/933] TST: more test moving from series/test_timeseries.py --- pandas/tests/frame/test_alter_axes.py | 28 +- pandas/tests/frame/test_apply.py | 9 + pandas/tests/frame/test_combine_concat.py | 16 +- pandas/tests/frame/test_constructors.py | 27 + pandas/tests/frame/test_indexing.py | 15 + pandas/tests/frame/test_timeseries.py | 121 +- pandas/tests/groupby/test_groupby.py | 8 + pandas/tests/indexes/datetimes/test_astype.py | 130 +- .../indexes/datetimes/test_construction.py | 90 +- .../indexes/datetimes/test_date_range.py | 18 + .../indexes/datetimes/test_datetimelike.py | 1 + pandas/tests/indexes/datetimes/test_misc.py | 293 +-- .../indexes/datetimes/test_partial_slcing.py | 256 ++ pandas/tests/indexes/datetimes/test_setops.py | 22 +- pandas/tests/indexes/datetimes/test_tools.py | 1019 ++++++++ pandas/tests/series/test_combine_concat.py | 102 +- pandas/tests/series/test_constructors.py | 45 +- pandas/tests/series/test_dtypes.py | 25 +- pandas/tests/series/test_indexing.py | 540 +++- pandas/tests/series/test_operators.py | 13 + pandas/tests/series/test_timeseries.py | 2173 +---------------- 21 files changed, 2485 insertions(+), 2466 deletions(-) create mode 100644 pandas/tests/indexes/datetimes/test_partial_slcing.py create mode 100644 pandas/tests/indexes/datetimes/test_tools.py diff --git a/pandas/tests/frame/test_alter_axes.py b/pandas/tests/frame/test_alter_axes.py index edeca0a664a87..cab627dec63cb 100644 --- a/pandas/tests/frame/test_alter_axes.py +++ b/pandas/tests/frame/test_alter_axes.py @@ -8,7 +8,7 @@ from pandas.compat import lrange from pandas import (DataFrame, Series, Index, MultiIndex, - RangeIndex) + RangeIndex, date_range) import pandas as pd from pandas.util.testing import (assert_series_equal, @@ -325,6 +325,32 @@ def test_set_columns(self): with assertRaisesRegexp(ValueError, 'Length mismatch'): self.mixed_frame.columns = cols[::2] + def test_dti_set_index_reindex(self): + # GH 6631 + df = DataFrame(np.random.random(6)) + idx1 = date_range('2011/01/01', periods=6, freq='M', tz='US/Eastern') + idx2 = date_range('2013', periods=6, freq='A', tz='Asia/Tokyo') + + df = df.set_index(idx1) + tm.assert_index_equal(df.index, idx1) + df = df.reindex(idx2) + tm.assert_index_equal(df.index, idx2) + + # 11314 + # with tz + index = date_range(datetime(2015, 10, 1), + datetime(2015, 10, 1, 23), + freq='H', tz='US/Eastern') + df = DataFrame(np.random.randn(24, 1), columns=['a'], index=index) + new_index = date_range(datetime(2015, 10, 2), + datetime(2015, 10, 2, 23), + freq='H', tz='US/Eastern') + + # TODO: unused? + result = df.set_index(new_index) # noqa + + self.assertEqual(new_index.freq, index.freq) + # Renaming def test_rename(self): diff --git a/pandas/tests/frame/test_apply.py b/pandas/tests/frame/test_apply.py index fe04d1005e003..19fa98afd2163 100644 --- a/pandas/tests/frame/test_apply.py +++ b/pandas/tests/frame/test_apply.py @@ -433,6 +433,15 @@ def test_applymap_box(self): 'd': ['Period', 'Period']}) tm.assert_frame_equal(res, exp) + def test_frame_apply_dont_convert_datetime64(self): + from pandas.tseries.offsets import BDay + df = DataFrame({'x1': [datetime(1996, 1, 1)]}) + + df = df.applymap(lambda x: x + BDay()) + df = df.applymap(lambda x: x + BDay()) + + self.assertTrue(df.x1.dtype == 'M8[ns]') + # See gh-12244 def test_apply_non_numpy_dtype(self): df = DataFrame({'dt': pd.date_range( diff --git a/pandas/tests/frame/test_combine_concat.py b/pandas/tests/frame/test_combine_concat.py index 71b6500e7184a..1167662b69375 100644 --- a/pandas/tests/frame/test_combine_concat.py +++ b/pandas/tests/frame/test_combine_concat.py @@ -9,7 +9,7 @@ import pandas as pd -from pandas import DataFrame, Index, Series, Timestamp +from pandas import DataFrame, Index, Series, Timestamp, date_range from pandas.compat import lrange from pandas.tests.frame.common import TestData @@ -735,3 +735,17 @@ def test_combine_first_int(self): res = df1.combine_first(df2) tm.assert_frame_equal(res, df1) self.assertEqual(res['a'].dtype, 'int64') + + def test_concat_datetime_datetime64_frame(self): + # #2624 + rows = [] + rows.append([datetime(2010, 1, 1), 1]) + rows.append([datetime(2010, 1, 2), 'hi']) + + df2_obj = DataFrame.from_records(rows, columns=['date', 'test']) + + ind = date_range(start="2000/1/1", freq="D", periods=10) + df1 = DataFrame({'date': ind, 'test': lrange(10)}) + + # it works! + pd.concat([df1, df2_obj]) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 07cf6816330bc..fe6a12fcca28a 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -1920,6 +1920,33 @@ def test_from_index(self): df2 = DataFrame(Series(idx2)) tm.assert_series_equal(df2[0], Series(idx2, name=0)) + def test_frame_dict_constructor_datetime64_1680(self): + dr = date_range('1/1/2012', periods=10) + s = Series(dr, index=dr) + + # it works! + DataFrame({'a': 'foo', 'b': s}, index=dr) + DataFrame({'a': 'foo', 'b': s.values}, index=dr) + + def test_frame_datetime64_mixed_index_ctor_1681(self): + dr = date_range('2011/1/1', '2012/1/1', freq='W-FRI') + ts = Series(dr) + + # it works! + d = DataFrame({'A': 'foo', 'B': ts}, index=dr) + self.assertTrue(d['B'].isnull().all()) + + def test_frame_timeseries_to_records(self): + index = date_range('1/1/2000', periods=10) + df = DataFrame(np.random.randn(10, 3), index=index, + columns=['a', 'b', 'c']) + + result = df.to_records() + result['index'].dtype == 'M8[ns]' + + result = df.to_records(index=False) + + if __name__ == '__main__': import nose # noqa diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py index bc0a68f765903..7d68eac47766e 100644 --- a/pandas/tests/frame/test_indexing.py +++ b/pandas/tests/frame/test_indexing.py @@ -1945,6 +1945,21 @@ def test_reindex_methods(self): actual = df.reindex(target, method='nearest', tolerance=0.2) assert_frame_equal(expected, actual) + def test_reindex_frame_add_nat(self): + rng = date_range('1/1/2000 00:00:00', periods=10, freq='10s') + df = DataFrame({'A': np.random.randn(len(rng)), 'B': rng}) + + result = df.reindex(lrange(15)) + self.assertTrue(np.issubdtype(result['B'].dtype, np.dtype('M8[ns]'))) + + mask = com.isnull(result)['B'] + self.assertTrue(mask[-5:].all()) + self.assertFalse(mask[:-5].any()) + + def test_set_dataframe_column_ns_dtype(self): + x = DataFrame([datetime.now(), datetime.now()]) + self.assertEqual(x[0].dtype, np.dtype('M8[ns]')) + def test_non_monotonic_reindex_methods(self): dr = pd.date_range('2013-08-01', periods=6, freq='B') data = np.random.randn(6, 1) diff --git a/pandas/tests/frame/test_timeseries.py b/pandas/tests/frame/test_timeseries.py index 934aafc500611..9a9f0ee67fb89 100644 --- a/pandas/tests/frame/test_timeseries.py +++ b/pandas/tests/frame/test_timeseries.py @@ -2,7 +2,7 @@ from __future__ import print_function -from datetime import datetime +from datetime import datetime, time from numpy import nan from numpy.random import randn @@ -20,6 +20,7 @@ assertRaisesRegexp) import pandas.util.testing as tm +from pandas.compat import product from pandas.tests.frame.common import TestData @@ -418,6 +419,96 @@ def test_first_last_valid(self): self.assertIsNone(empty.last_valid_index()) self.assertIsNone(empty.first_valid_index()) + def test_at_time_frame(self): + rng = date_range('1/1/2000', '1/5/2000', freq='5min') + ts = DataFrame(np.random.randn(len(rng), 2), index=rng) + rs = ts.at_time(rng[1]) + self.assertTrue((rs.index.hour == rng[1].hour).all()) + self.assertTrue((rs.index.minute == rng[1].minute).all()) + self.assertTrue((rs.index.second == rng[1].second).all()) + + result = ts.at_time('9:30') + expected = ts.at_time(time(9, 30)) + assert_frame_equal(result, expected) + + result = ts.loc[time(9, 30)] + expected = ts.loc[(rng.hour == 9) & (rng.minute == 30)] + + assert_frame_equal(result, expected) + + # midnight, everything + rng = date_range('1/1/2000', '1/31/2000') + ts = DataFrame(np.random.randn(len(rng), 3), index=rng) + + result = ts.at_time(time(0, 0)) + assert_frame_equal(result, ts) + + # time doesn't exist + rng = date_range('1/1/2012', freq='23Min', periods=384) + ts = DataFrame(np.random.randn(len(rng), 2), rng) + rs = ts.at_time('16:00') + self.assertEqual(len(rs), 0) + + def test_between_time_frame(self): + rng = date_range('1/1/2000', '1/5/2000', freq='5min') + ts = DataFrame(np.random.randn(len(rng), 2), index=rng) + stime = time(0, 0) + etime = time(1, 0) + + close_open = product([True, False], [True, False]) + for inc_start, inc_end in close_open: + filtered = ts.between_time(stime, etime, inc_start, inc_end) + exp_len = 13 * 4 + 1 + if not inc_start: + exp_len -= 5 + if not inc_end: + exp_len -= 4 + + self.assertEqual(len(filtered), exp_len) + for rs in filtered.index: + t = rs.time() + if inc_start: + self.assertTrue(t >= stime) + else: + self.assertTrue(t > stime) + + if inc_end: + self.assertTrue(t <= etime) + else: + self.assertTrue(t < etime) + + result = ts.between_time('00:00', '01:00') + expected = ts.between_time(stime, etime) + assert_frame_equal(result, expected) + + # across midnight + rng = date_range('1/1/2000', '1/5/2000', freq='5min') + ts = DataFrame(np.random.randn(len(rng), 2), index=rng) + stime = time(22, 0) + etime = time(9, 0) + + close_open = product([True, False], [True, False]) + for inc_start, inc_end in close_open: + filtered = ts.between_time(stime, etime, inc_start, inc_end) + exp_len = (12 * 11 + 1) * 4 + 1 + if not inc_start: + exp_len -= 4 + if not inc_end: + exp_len -= 4 + + self.assertEqual(len(filtered), exp_len) + for rs in filtered.index: + t = rs.time() + if inc_start: + self.assertTrue((t >= stime) or (t <= etime)) + else: + self.assertTrue((t > stime) or (t <= etime)) + + if inc_end: + self.assertTrue((t <= etime) or (t >= stime)) + else: + self.assertTrue((t < etime) or (t >= stime)) + def test_operation_on_NaT(self): # Both NaT and Timestamp are in DataFrame. df = pd.DataFrame({'foo': [pd.NaT, pd.NaT, @@ -457,6 +548,34 @@ def test_datetime_assignment_with_NaT_and_diff_time_units(self): 'new': [1e9, None]}, dtype='datetime64[ns]') tm.assert_frame_equal(result, expected) + def test_frame_to_period(self): + K = 5 + from pandas.tseries.period import period_range + + dr = date_range('1/1/2000', '1/1/2001') + pr = period_range('1/1/2000', '1/1/2001') + df = DataFrame(randn(len(dr), K), index=dr) + df['mix'] = 'a' + + pts = df.to_period() + exp = df.copy() + exp.index = pr + assert_frame_equal(pts, exp) + + pts = df.to_period('M') + tm.assert_index_equal(pts.index, exp.index.asfreq('M')) + + df = df.T + pts = df.to_period(axis=1) + exp = df.copy() + exp.columns = pr + assert_frame_equal(pts, exp) + + pts = df.to_period('M', axis=1) + tm.assert_index_equal(pts.columns, exp.columns.asfreq('M')) + + self.assertRaises(ValueError, df.to_period, axis=2) + if __name__ == '__main__': import nose diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index ffb6025163a6b..bf61f5ef83859 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -4167,6 +4167,14 @@ def test_groupby_groups_datetimeindex_tz(self): result = df.groupby(level=0).sum() assert_frame_equal(result, expected) + def test_frame_datetime64_handling_groupby(self): + # it works! + df = DataFrame([(3, np.datetime64('2012-07-03')), + (3, np.datetime64('2012-07-04'))], + columns=['a', 'date']) + result = df.groupby('a').first() + self.assertEqual(result['date'][3], Timestamp('2012-07-03')) + def test_groupby_multi_timezone(self): # combining multiple / different timezones yields UTC diff --git a/pandas/tests/indexes/datetimes/test_astype.py b/pandas/tests/indexes/datetimes/test_astype.py index d452a7e1840d7..edb044a3cb2d7 100644 --- a/pandas/tests/indexes/datetimes/test_astype.py +++ b/pandas/tests/indexes/datetimes/test_astype.py @@ -4,7 +4,7 @@ import pandas as pd import pandas.util.testing as tm from pandas import (DatetimeIndex, date_range, Series, NaT, Index, Timestamp, - Int64Index) + Int64Index, Period) class TestDatetimeIndex(tm.TestCase): @@ -182,3 +182,131 @@ def _check_rng(rng): _check_rng(rng) _check_rng(rng_eastern) _check_rng(rng_utc) + + +class TestToPeriod(tm.TestCase): + _multiprocess_can_split_ = True + + def setUp(self): + data = [Timestamp('2007-01-01 10:11:12.123456Z'), + Timestamp('2007-01-01 10:11:13.789123Z')] + self.index = DatetimeIndex(data) + + def test_to_period_millisecond(self): + index = self.index + + period = index.to_period(freq='L') + self.assertEqual(2, len(period)) + self.assertEqual(period[0], Period('2007-01-01 10:11:12.123Z', 'L')) + self.assertEqual(period[1], Period('2007-01-01 10:11:13.789Z', 'L')) + + def test_to_period_microsecond(self): + index = self.index + + period = index.to_period(freq='U') + self.assertEqual(2, len(period)) + self.assertEqual(period[0], Period('2007-01-01 10:11:12.123456Z', 'U')) + self.assertEqual(period[1], Period('2007-01-01 10:11:13.789123Z', 'U')) + + def test_to_period_tz_pytz(self): + tm._skip_if_no_pytz() + from dateutil.tz import tzlocal + from pytz import utc as UTC + + xp = date_range('1/1/2000', '4/1/2000').to_period() + + ts = date_range('1/1/2000', '4/1/2000', tz='US/Eastern') + + result = ts.to_period()[0] + expected = ts[0].to_period() + + self.assertEqual(result, expected) + tm.assert_index_equal(ts.to_period(), xp) + + ts = date_range('1/1/2000', '4/1/2000', tz=UTC) + + result = ts.to_period()[0] + expected = ts[0].to_period() + + self.assertEqual(result, expected) + tm.assert_index_equal(ts.to_period(), xp) + + ts = date_range('1/1/2000', '4/1/2000', tz=tzlocal()) + + result = ts.to_period()[0] + expected = ts[0].to_period() + + self.assertEqual(result, expected) + tm.assert_index_equal(ts.to_period(), xp) + + def test_to_period_tz_explicit_pytz(self): + tm._skip_if_no_pytz() + import pytz + from dateutil.tz import tzlocal + + xp = date_range('1/1/2000', '4/1/2000').to_period() + + ts = date_range('1/1/2000', '4/1/2000', tz=pytz.timezone('US/Eastern')) + + result = ts.to_period()[0] + expected = ts[0].to_period() + + self.assertTrue(result == expected) + tm.assert_index_equal(ts.to_period(), xp) + + ts = date_range('1/1/2000', '4/1/2000', tz=pytz.utc) + + result = ts.to_period()[0] + expected = ts[0].to_period() + + self.assertTrue(result == expected) + tm.assert_index_equal(ts.to_period(), xp) + + ts = date_range('1/1/2000', '4/1/2000', tz=tzlocal()) + + result = ts.to_period()[0] + expected = ts[0].to_period() + + self.assertTrue(result == expected) + tm.assert_index_equal(ts.to_period(), xp) + + def test_to_period_tz_dateutil(self): + tm._skip_if_no_dateutil() + import dateutil + from dateutil.tz import tzlocal + + xp = date_range('1/1/2000', '4/1/2000').to_period() + + ts = date_range('1/1/2000', '4/1/2000', tz='dateutil/US/Eastern') + + result = ts.to_period()[0] + expected = ts[0].to_period() + + self.assertTrue(result == expected) + tm.assert_index_equal(ts.to_period(), xp) + + ts = date_range('1/1/2000', '4/1/2000', tz=dateutil.tz.tzutc()) + + result = ts.to_period()[0] + expected = ts[0].to_period() + + self.assertTrue(result == expected) + tm.assert_index_equal(ts.to_period(), xp) + + ts = date_range('1/1/2000', '4/1/2000', tz=tzlocal()) + + result = ts.to_period()[0] + expected = ts[0].to_period() + + self.assertTrue(result == expected) + tm.assert_index_equal(ts.to_period(), xp) + + def test_astype_object(self): + # NumPy 1.6.1 weak ns support + rng = date_range('1/1/2000', periods=20) + + casted = rng.astype('O') + exp_values = list(rng) + + tm.assert_index_equal(casted, Index(exp_values, dtype=np.object_)) + self.assertEqual(casted.tolist(), exp_values) diff --git a/pandas/tests/indexes/datetimes/test_construction.py b/pandas/tests/indexes/datetimes/test_construction.py index 03bc0e0c554b0..e54ebe3d93bc6 100644 --- a/pandas/tests/indexes/datetimes/test_construction.py +++ b/pandas/tests/indexes/datetimes/test_construction.py @@ -2,7 +2,7 @@ from datetime import timedelta import pandas as pd -from pandas import tslib +from pandas import tslib, offsets, lib import pandas.util.testing as tm from pandas.tslib import OutOfBoundsDatetime from pandas import (DatetimeIndex, Index, Timestamp, datetime, date_range, @@ -467,17 +467,6 @@ def test_dti_constructor_small_int(self): arr = np.array([0, 10, 20], dtype=dtype) tm.assert_index_equal(DatetimeIndex(arr), exp) - def test_dti_constructor_numpy_timeunits(self): - # GH 9114 - base = pd.to_datetime(['2000-01-01T00:00', '2000-01-02T00:00', 'NaT']) - - for dtype in ['datetime64[h]', 'datetime64[m]', 'datetime64[s]', - 'datetime64[ms]', 'datetime64[us]', 'datetime64[ns]']: - values = base.values.astype(dtype) - - tm.assert_index_equal(DatetimeIndex(values), base) - tm.assert_index_equal(to_datetime(values), base) - def test_ctor_str_intraday(self): rng = DatetimeIndex(['1-1-2000 00:00:01']) self.assertEqual(rng[0].second, 1) @@ -507,3 +496,80 @@ def test_constructor_int64_nocopy(self): arr[50:100] = -1 self.assertTrue((index.asi8[50:100] != -1).all()) + + def test_from_freq_recreate_from_data(self): + freqs = ['M', 'Q', 'A', 'D', 'B', 'BH', 'T', 'S', 'L', 'U', 'H', 'N', + 'C'] + + for f in freqs: + org = DatetimeIndex(start='2001/02/01 09:00', freq=f, periods=1) + idx = DatetimeIndex(org, freq=f) + tm.assert_index_equal(idx, org) + + org = DatetimeIndex(start='2001/02/01 09:00', freq=f, + tz='US/Pacific', periods=1) + idx = DatetimeIndex(org, freq=f, tz='US/Pacific') + tm.assert_index_equal(idx, org) + + def test_datetimeindex_constructor_misc(self): + arr = ['1/1/2005', '1/2/2005', 'Jn 3, 2005', '2005-01-04'] + self.assertRaises(Exception, DatetimeIndex, arr) + + arr = ['1/1/2005', '1/2/2005', '1/3/2005', '2005-01-04'] + idx1 = DatetimeIndex(arr) + + arr = [datetime(2005, 1, 1), '1/2/2005', '1/3/2005', '2005-01-04'] + idx2 = DatetimeIndex(arr) + + arr = [lib.Timestamp(datetime(2005, 1, 1)), '1/2/2005', '1/3/2005', + '2005-01-04'] + idx3 = DatetimeIndex(arr) + + arr = np.array(['1/1/2005', '1/2/2005', '1/3/2005', + '2005-01-04'], dtype='O') + idx4 = DatetimeIndex(arr) + + arr = to_datetime(['1/1/2005', '1/2/2005', '1/3/2005', '2005-01-04']) + idx5 = DatetimeIndex(arr) + + arr = to_datetime(['1/1/2005', '1/2/2005', 'Jan 3, 2005', '2005-01-04' + ]) + idx6 = DatetimeIndex(arr) + + idx7 = DatetimeIndex(['12/05/2007', '25/01/2008'], dayfirst=True) + idx8 = DatetimeIndex(['2007/05/12', '2008/01/25'], dayfirst=False, + yearfirst=True) + tm.assert_index_equal(idx7, idx8) + + for other in [idx2, idx3, idx4, idx5, idx6]: + self.assertTrue((idx1.values == other.values).all()) + + sdate = datetime(1999, 12, 25) + edate = datetime(2000, 1, 1) + idx = DatetimeIndex(start=sdate, freq='1B', periods=20) + self.assertEqual(len(idx), 20) + self.assertEqual(idx[0], sdate + 0 * offsets.BDay()) + self.assertEqual(idx.freq, 'B') + + idx = DatetimeIndex(end=edate, freq=('D', 5), periods=20) + self.assertEqual(len(idx), 20) + self.assertEqual(idx[-1], edate) + self.assertEqual(idx.freq, '5D') + + idx1 = DatetimeIndex(start=sdate, end=edate, freq='W-SUN') + idx2 = DatetimeIndex(start=sdate, end=edate, + freq=offsets.Week(weekday=6)) + self.assertEqual(len(idx1), len(idx2)) + self.assertEqual(idx1.offset, idx2.offset) + + idx1 = DatetimeIndex(start=sdate, end=edate, freq='QS') + idx2 = DatetimeIndex(start=sdate, end=edate, + freq=offsets.QuarterBegin(startingMonth=1)) + self.assertEqual(len(idx1), len(idx2)) + self.assertEqual(idx1.offset, idx2.offset) + + idx1 = DatetimeIndex(start=sdate, end=edate, freq='BQ') + idx2 = DatetimeIndex(start=sdate, end=edate, + freq=offsets.BQuarterEnd(startingMonth=12)) + self.assertEqual(len(idx1), len(idx2)) + self.assertEqual(idx1.offset, idx2.offset) diff --git a/pandas/tests/indexes/datetimes/test_date_range.py b/pandas/tests/indexes/datetimes/test_date_range.py index b3d6c41573ab8..b2161aa5c75c6 100644 --- a/pandas/tests/indexes/datetimes/test_date_range.py +++ b/pandas/tests/indexes/datetimes/test_date_range.py @@ -3,6 +3,7 @@ import pandas as pd import pandas.util.testing as tm from pandas import date_range, offsets, DatetimeIndex, Timestamp +from pandas import compat from pandas.tests.series.common import TestData @@ -110,3 +111,20 @@ def test_range_misspecified(self): self.assertRaises(ValueError, date_range, '1/1/2000', freq='H') self.assertRaises(ValueError, date_range, end='1/1/2000', freq='H') self.assertRaises(ValueError, date_range, periods=10, freq='H') + + def test_compat_replace(self): + # https://github.com/statsmodels/statsmodels/issues/3349 + # replace should take ints/longs for compat + + for f in [compat.long, int]: + result = date_range(Timestamp('1960-04-01 00:00:00', + freq='QS-JAN'), + periods=f(76), + freq='QS-JAN') + self.assertEqual(len(result), 76) + + def test_catch_infinite_loop(self): + offset = offsets.DateOffset(minute=5) + # blow up, don't loop forever + self.assertRaises(Exception, date_range, datetime(2011, 11, 11), + datetime(2011, 11, 12), freq=offset) diff --git a/pandas/tests/indexes/datetimes/test_datetimelike.py b/pandas/tests/indexes/datetimes/test_datetimelike.py index b32801a8bcf25..eea08febc86e6 100644 --- a/pandas/tests/indexes/datetimes/test_datetimelike.py +++ b/pandas/tests/indexes/datetimes/test_datetimelike.py @@ -7,6 +7,7 @@ from ..datetimelike import DatetimeLike + class TestDatetimeIndex(DatetimeLike, tm.TestCase): _holder = DatetimeIndex _multiprocess_can_split_ = True diff --git a/pandas/tests/indexes/datetimes/test_misc.py b/pandas/tests/indexes/datetimes/test_misc.py index 92aad5a0b1997..dda2785d2b0ae 100644 --- a/pandas/tests/indexes/datetimes/test_misc.py +++ b/pandas/tests/indexes/datetimes/test_misc.py @@ -1,13 +1,9 @@ import numpy as np import pandas as pd -import pandas.lib as lib import pandas.util.testing as tm -from pandas import (Index, DatetimeIndex, datetime, offsets, to_datetime, - Series, DataFrame, Float64Index, date_range, - Timestamp, isnull) -from pandas import tslib -from pandas.util.testing import assert_series_equal +from pandas import (Index, DatetimeIndex, datetime, offsets, + Float64Index, date_range, Timestamp) class TestDateTimeIndexToJulianDate(tm.TestCase): @@ -144,63 +140,6 @@ def test_datetimeindex_integers_shift(self): expected = rng.shift(-5) tm.assert_index_equal(result, expected) - def test_string_na_nat_conversion(self): - # GH #999, #858 - - from pandas.compat import parse_date - - strings = np.array(['1/1/2000', '1/2/2000', np.nan, - '1/4/2000, 12:34:56'], dtype=object) - - expected = np.empty(4, dtype='M8[ns]') - for i, val in enumerate(strings): - if isnull(val): - expected[i] = tslib.iNaT - else: - expected[i] = parse_date(val) - - result = tslib.array_to_datetime(strings) - tm.assert_almost_equal(result, expected) - - result2 = to_datetime(strings) - tm.assertIsInstance(result2, DatetimeIndex) - tm.assert_numpy_array_equal(result, result2.values) - - malformed = np.array(['1/100/2000', np.nan], dtype=object) - - # GH 10636, default is now 'raise' - self.assertRaises(ValueError, - lambda: to_datetime(malformed, errors='raise')) - - result = to_datetime(malformed, errors='ignore') - tm.assert_numpy_array_equal(result, malformed) - - self.assertRaises(ValueError, to_datetime, malformed, errors='raise') - - idx = ['a', 'b', 'c', 'd', 'e'] - series = Series(['1/1/2000', np.nan, '1/3/2000', np.nan, - '1/5/2000'], index=idx, name='foo') - dseries = Series([to_datetime('1/1/2000'), np.nan, - to_datetime('1/3/2000'), np.nan, - to_datetime('1/5/2000')], index=idx, name='foo') - - result = to_datetime(series) - dresult = to_datetime(dseries) - - expected = Series(np.empty(5, dtype='M8[ns]'), index=idx) - for i in range(5): - x = series[i] - if isnull(x): - expected[i] = tslib.iNaT - else: - expected[i] = to_datetime(x) - - assert_series_equal(result, expected, check_names=False) - self.assertEqual(result.name, 'foo') - - assert_series_equal(dresult, expected, check_names=False) - self.assertEqual(dresult.name, 'foo') - def test_datetimeindex_repr_short(self): dr = date_range(start='1/1/2012', periods=1) repr(dr) @@ -211,84 +150,6 @@ def test_datetimeindex_repr_short(self): dr = date_range(start='1/1/2012', periods=3) repr(dr) - def test_getitem_setitem_datetimeindex(self): - N = 50 - # testing with timezone, GH #2785 - rng = date_range('1/1/1990', periods=N, freq='H', tz='US/Eastern') - ts = Series(np.random.randn(N), index=rng) - - result = ts["1990-01-01 04:00:00"] - expected = ts[4] - self.assertEqual(result, expected) - - result = ts.copy() - result["1990-01-01 04:00:00"] = 0 - result["1990-01-01 04:00:00"] = ts[4] - assert_series_equal(result, ts) - - result = ts["1990-01-01 04:00:00":"1990-01-01 07:00:00"] - expected = ts[4:8] - assert_series_equal(result, expected) - - result = ts.copy() - result["1990-01-01 04:00:00":"1990-01-01 07:00:00"] = 0 - result["1990-01-01 04:00:00":"1990-01-01 07:00:00"] = ts[4:8] - assert_series_equal(result, ts) - - lb = "1990-01-01 04:00:00" - rb = "1990-01-01 07:00:00" - result = ts[(ts.index >= lb) & (ts.index <= rb)] - expected = ts[4:8] - assert_series_equal(result, expected) - - # repeat all the above with naive datetimes - result = ts[datetime(1990, 1, 1, 4)] - expected = ts[4] - self.assertEqual(result, expected) - - result = ts.copy() - result[datetime(1990, 1, 1, 4)] = 0 - result[datetime(1990, 1, 1, 4)] = ts[4] - assert_series_equal(result, ts) - - result = ts[datetime(1990, 1, 1, 4):datetime(1990, 1, 1, 7)] - expected = ts[4:8] - assert_series_equal(result, expected) - - result = ts.copy() - result[datetime(1990, 1, 1, 4):datetime(1990, 1, 1, 7)] = 0 - result[datetime(1990, 1, 1, 4):datetime(1990, 1, 1, 7)] = ts[4:8] - assert_series_equal(result, ts) - - lb = datetime(1990, 1, 1, 4) - rb = datetime(1990, 1, 1, 7) - result = ts[(ts.index >= lb) & (ts.index <= rb)] - expected = ts[4:8] - assert_series_equal(result, expected) - - result = ts[ts.index[4]] - expected = ts[4] - self.assertEqual(result, expected) - - result = ts[ts.index[4:8]] - expected = ts[4:8] - assert_series_equal(result, expected) - - result = ts.copy() - result[ts.index[4:8]] = 0 - result[4:8] = ts[4:8] - assert_series_equal(result, ts) - - # also test partial date slicing - result = ts["1990-01-02"] - expected = ts[24:48] - assert_series_equal(result, expected) - - result = ts.copy() - result["1990-01-02"] = 0 - result["1990-01-02"] = ts[24:48] - assert_series_equal(result, ts) - def test_normalize(self): rng = date_range('1/1/2000 9:30', periods=10, freq='D') @@ -308,13 +169,6 @@ def test_normalize(self): self.assertTrue(result.is_normalized) self.assertFalse(rng.is_normalized) - def test_series_ctor_plus_datetimeindex(self): - rng = date_range('20090415', '20090519', freq='B') - data = dict((k, 1) for k in rng) - - result = Series(data, index=rng) - self.assertIs(result.index, rng) - class TestDatetime64(tm.TestCase): @@ -451,151 +305,8 @@ def test_datetimeindex_accessors(self): for ts, value in tests: self.assertEqual(ts, value) - def test_datetimeindex_diff(self): - dti1 = DatetimeIndex(freq='Q-JAN', start=datetime(1997, 12, 31), - periods=100) - dti2 = DatetimeIndex(freq='Q-JAN', start=datetime(1997, 12, 31), - periods=98) - self.assertEqual(len(dti1.difference(dti2)), 2) - def test_nanosecond_field(self): dti = DatetimeIndex(np.arange(10)) self.assert_numpy_array_equal(dti.nanosecond, np.arange(10, dtype=np.int32)) - - def test_datetimeindex_constructor(self): - arr = ['1/1/2005', '1/2/2005', 'Jn 3, 2005', '2005-01-04'] - self.assertRaises(Exception, DatetimeIndex, arr) - - arr = ['1/1/2005', '1/2/2005', '1/3/2005', '2005-01-04'] - idx1 = DatetimeIndex(arr) - - arr = [datetime(2005, 1, 1), '1/2/2005', '1/3/2005', '2005-01-04'] - idx2 = DatetimeIndex(arr) - - arr = [lib.Timestamp(datetime(2005, 1, 1)), '1/2/2005', '1/3/2005', - '2005-01-04'] - idx3 = DatetimeIndex(arr) - - arr = np.array(['1/1/2005', '1/2/2005', '1/3/2005', - '2005-01-04'], dtype='O') - idx4 = DatetimeIndex(arr) - - arr = to_datetime(['1/1/2005', '1/2/2005', '1/3/2005', '2005-01-04']) - idx5 = DatetimeIndex(arr) - - arr = to_datetime(['1/1/2005', '1/2/2005', 'Jan 3, 2005', '2005-01-04' - ]) - idx6 = DatetimeIndex(arr) - - idx7 = DatetimeIndex(['12/05/2007', '25/01/2008'], dayfirst=True) - idx8 = DatetimeIndex(['2007/05/12', '2008/01/25'], dayfirst=False, - yearfirst=True) - tm.assert_index_equal(idx7, idx8) - - for other in [idx2, idx3, idx4, idx5, idx6]: - self.assertTrue((idx1.values == other.values).all()) - - sdate = datetime(1999, 12, 25) - edate = datetime(2000, 1, 1) - idx = DatetimeIndex(start=sdate, freq='1B', periods=20) - self.assertEqual(len(idx), 20) - self.assertEqual(idx[0], sdate + 0 * offsets.BDay()) - self.assertEqual(idx.freq, 'B') - - idx = DatetimeIndex(end=edate, freq=('D', 5), periods=20) - self.assertEqual(len(idx), 20) - self.assertEqual(idx[-1], edate) - self.assertEqual(idx.freq, '5D') - - idx1 = DatetimeIndex(start=sdate, end=edate, freq='W-SUN') - idx2 = DatetimeIndex(start=sdate, end=edate, - freq=offsets.Week(weekday=6)) - self.assertEqual(len(idx1), len(idx2)) - self.assertEqual(idx1.offset, idx2.offset) - - idx1 = DatetimeIndex(start=sdate, end=edate, freq='QS') - idx2 = DatetimeIndex(start=sdate, end=edate, - freq=offsets.QuarterBegin(startingMonth=1)) - self.assertEqual(len(idx1), len(idx2)) - self.assertEqual(idx1.offset, idx2.offset) - - idx1 = DatetimeIndex(start=sdate, end=edate, freq='BQ') - idx2 = DatetimeIndex(start=sdate, end=edate, - freq=offsets.BQuarterEnd(startingMonth=12)) - self.assertEqual(len(idx1), len(idx2)) - self.assertEqual(idx1.offset, idx2.offset) - - def test_dayfirst(self): - # GH 5917 - arr = ['10/02/2014', '11/02/2014', '12/02/2014'] - expected = DatetimeIndex([datetime(2014, 2, 10), datetime(2014, 2, 11), - datetime(2014, 2, 12)]) - idx1 = DatetimeIndex(arr, dayfirst=True) - idx2 = DatetimeIndex(np.array(arr), dayfirst=True) - idx3 = to_datetime(arr, dayfirst=True) - idx4 = to_datetime(np.array(arr), dayfirst=True) - idx5 = DatetimeIndex(Index(arr), dayfirst=True) - idx6 = DatetimeIndex(Series(arr), dayfirst=True) - tm.assert_index_equal(expected, idx1) - tm.assert_index_equal(expected, idx2) - tm.assert_index_equal(expected, idx3) - tm.assert_index_equal(expected, idx4) - tm.assert_index_equal(expected, idx5) - tm.assert_index_equal(expected, idx6) - - def test_dti_set_index_reindex(self): - # GH 6631 - df = DataFrame(np.random.random(6)) - idx1 = date_range('2011/01/01', periods=6, freq='M', tz='US/Eastern') - idx2 = date_range('2013', periods=6, freq='A', tz='Asia/Tokyo') - - df = df.set_index(idx1) - tm.assert_index_equal(df.index, idx1) - df = df.reindex(idx2) - tm.assert_index_equal(df.index, idx2) - - # 11314 - # with tz - index = date_range(datetime(2015, 10, 1), - datetime(2015, 10, 1, 23), - freq='H', tz='US/Eastern') - df = DataFrame(np.random.randn(24, 1), columns=['a'], index=index) - new_index = date_range(datetime(2015, 10, 2), - datetime(2015, 10, 2, 23), - freq='H', tz='US/Eastern') - - # TODO: unused? - result = df.set_index(new_index) # noqa - - self.assertEqual(new_index.freq, index.freq) - - def test_datetimeindex_union_join_empty(self): - dti = DatetimeIndex(start='1/1/2001', end='2/1/2001', freq='D') - empty = Index([]) - - result = dti.union(empty) - tm.assertIsInstance(result, DatetimeIndex) - self.assertIs(result, result) - - result = dti.join(empty) - tm.assertIsInstance(result, DatetimeIndex) - - -class TestTimeSeriesDuplicates(tm.TestCase): - _multiprocess_can_split_ = True - - def test_recreate_from_data(self): - freqs = ['M', 'Q', 'A', 'D', 'B', 'BH', 'T', 'S', 'L', 'U', 'H', 'N', - 'C'] - - for f in freqs: - org = DatetimeIndex(start='2001/02/01 09:00', freq=f, periods=1) - idx = DatetimeIndex(org, freq=f) - tm.assert_index_equal(idx, org) - - org = DatetimeIndex(start='2001/02/01 09:00', freq=f, - tz='US/Pacific', periods=1) - idx = DatetimeIndex(org, freq=f, tz='US/Pacific') - tm.assert_index_equal(idx, org) diff --git a/pandas/tests/indexes/datetimes/test_partial_slcing.py b/pandas/tests/indexes/datetimes/test_partial_slcing.py new file mode 100644 index 0000000000000..a960f5cf9235a --- /dev/null +++ b/pandas/tests/indexes/datetimes/test_partial_slcing.py @@ -0,0 +1,256 @@ +""" test partial slicing on Series/Frame """ +from datetime import datetime +import numpy as np +import pandas as pd + +from pandas import (DatetimeIndex, Series, DataFrame, + date_range, Index, Timedelta, Timestamp) +from pandas.util import testing as tm + + +class TestSlicing(tm.TestCase): + + def test_slice_year(self): + dti = DatetimeIndex(freq='B', start=datetime(2005, 1, 1), periods=500) + + s = Series(np.arange(len(dti)), index=dti) + result = s['2005'] + expected = s[s.index.year == 2005] + tm.assert_series_equal(result, expected) + + df = DataFrame(np.random.rand(len(dti), 5), index=dti) + result = df.loc['2005'] + expected = df[df.index.year == 2005] + tm.assert_frame_equal(result, expected) + + rng = date_range('1/1/2000', '1/1/2010') + + result = rng.get_loc('2009') + expected = slice(3288, 3653) + self.assertEqual(result, expected) + + def test_slice_quarter(self): + dti = DatetimeIndex(freq='D', start=datetime(2000, 6, 1), periods=500) + + s = Series(np.arange(len(dti)), index=dti) + self.assertEqual(len(s['2001Q1']), 90) + + df = DataFrame(np.random.rand(len(dti), 5), index=dti) + self.assertEqual(len(df.loc['1Q01']), 90) + + def test_slice_month(self): + dti = DatetimeIndex(freq='D', start=datetime(2005, 1, 1), periods=500) + s = Series(np.arange(len(dti)), index=dti) + self.assertEqual(len(s['2005-11']), 30) + + df = DataFrame(np.random.rand(len(dti), 5), index=dti) + self.assertEqual(len(df.loc['2005-11']), 30) + + tm.assert_series_equal(s['2005-11'], s['11-2005']) + + def test_partial_slice(self): + rng = DatetimeIndex(freq='D', start=datetime(2005, 1, 1), periods=500) + s = Series(np.arange(len(rng)), index=rng) + + result = s['2005-05':'2006-02'] + expected = s['20050501':'20060228'] + tm.assert_series_equal(result, expected) + + result = s['2005-05':] + expected = s['20050501':] + tm.assert_series_equal(result, expected) + + result = s[:'2006-02'] + expected = s[:'20060228'] + tm.assert_series_equal(result, expected) + + result = s['2005-1-1'] + self.assertEqual(result, s.iloc[0]) + + self.assertRaises(Exception, s.__getitem__, '2004-12-31') + + def test_partial_slice_daily(self): + rng = DatetimeIndex(freq='H', start=datetime(2005, 1, 31), periods=500) + s = Series(np.arange(len(rng)), index=rng) + + result = s['2005-1-31'] + tm.assert_series_equal(result, s.iloc[:24]) + + self.assertRaises(Exception, s.__getitem__, '2004-12-31 00') + + def test_partial_slice_hourly(self): + rng = DatetimeIndex(freq='T', start=datetime(2005, 1, 1, 20, 0, 0), + periods=500) + s = Series(np.arange(len(rng)), index=rng) + + result = s['2005-1-1'] + tm.assert_series_equal(result, s.iloc[:60 * 4]) + + result = s['2005-1-1 20'] + tm.assert_series_equal(result, s.iloc[:60]) + + self.assertEqual(s['2005-1-1 20:00'], s.iloc[0]) + self.assertRaises(Exception, s.__getitem__, '2004-12-31 00:15') + + def test_partial_slice_minutely(self): + rng = DatetimeIndex(freq='S', start=datetime(2005, 1, 1, 23, 59, 0), + periods=500) + s = Series(np.arange(len(rng)), index=rng) + + result = s['2005-1-1 23:59'] + tm.assert_series_equal(result, s.iloc[:60]) + + result = s['2005-1-1'] + tm.assert_series_equal(result, s.iloc[:60]) + + self.assertEqual(s[Timestamp('2005-1-1 23:59:00')], s.iloc[0]) + self.assertRaises(Exception, s.__getitem__, '2004-12-31 00:00:00') + + def test_partial_slice_second_precision(self): + rng = DatetimeIndex(start=datetime(2005, 1, 1, 0, 0, 59, + microsecond=999990), + periods=20, freq='US') + s = Series(np.arange(20), rng) + + tm.assert_series_equal(s['2005-1-1 00:00'], s.iloc[:10]) + tm.assert_series_equal(s['2005-1-1 00:00:59'], s.iloc[:10]) + + tm.assert_series_equal(s['2005-1-1 00:01'], s.iloc[10:]) + tm.assert_series_equal(s['2005-1-1 00:01:00'], s.iloc[10:]) + + self.assertEqual(s[Timestamp('2005-1-1 00:00:59.999990')], s.iloc[0]) + self.assertRaisesRegexp(KeyError, '2005-1-1 00:00:00', + lambda: s['2005-1-1 00:00:00']) + + def test_partial_slicing_dataframe(self): + # GH14856 + # Test various combinations of string slicing resolution vs. + # index resolution + # - If string resolution is less precise than index resolution, + # string is considered a slice + # - If string resolution is equal to or more precise than index + # resolution, string is considered an exact match + formats = ['%Y', '%Y-%m', '%Y-%m-%d', '%Y-%m-%d %H', + '%Y-%m-%d %H:%M', '%Y-%m-%d %H:%M:%S'] + resolutions = ['year', 'month', 'day', 'hour', 'minute', 'second'] + for rnum, resolution in enumerate(resolutions[2:], 2): + # we check only 'day', 'hour', 'minute' and 'second' + unit = Timedelta("1 " + resolution) + middate = datetime(2012, 1, 1, 0, 0, 0) + index = DatetimeIndex([middate - unit, + middate, middate + unit]) + values = [1, 2, 3] + df = DataFrame({'a': values}, index, dtype=np.int64) + self.assertEqual(df.index.resolution, resolution) + + # Timestamp with the same resolution as index + # Should be exact match for Series (return scalar) + # and raise KeyError for Frame + for timestamp, expected in zip(index, values): + ts_string = timestamp.strftime(formats[rnum]) + # make ts_string as precise as index + result = df['a'][ts_string] + self.assertIsInstance(result, np.int64) + self.assertEqual(result, expected) + self.assertRaises(KeyError, df.__getitem__, ts_string) + + # Timestamp with resolution less precise than index + for fmt in formats[:rnum]: + for element, theslice in [[0, slice(None, 1)], + [1, slice(1, None)]]: + ts_string = index[element].strftime(fmt) + + # Series should return slice + result = df['a'][ts_string] + expected = df['a'][theslice] + tm.assert_series_equal(result, expected) + + # Frame should return slice as well + result = df[ts_string] + expected = df[theslice] + tm.assert_frame_equal(result, expected) + + # Timestamp with resolution more precise than index + # Compatible with existing key + # Should return scalar for Series + # and raise KeyError for Frame + for fmt in formats[rnum + 1:]: + ts_string = index[1].strftime(fmt) + result = df['a'][ts_string] + self.assertIsInstance(result, np.int64) + self.assertEqual(result, 2) + self.assertRaises(KeyError, df.__getitem__, ts_string) + + # Not compatible with existing key + # Should raise KeyError + for fmt, res in list(zip(formats, resolutions))[rnum + 1:]: + ts = index[1] + Timedelta("1 " + res) + ts_string = ts.strftime(fmt) + self.assertRaises(KeyError, df['a'].__getitem__, ts_string) + self.assertRaises(KeyError, df.__getitem__, ts_string) + + def test_partial_slicing_with_multiindex(self): + + # GH 4758 + # partial string indexing with a multi-index buggy + df = DataFrame({'ACCOUNT': ["ACCT1", "ACCT1", "ACCT1", "ACCT2"], + 'TICKER': ["ABC", "MNP", "XYZ", "XYZ"], + 'val': [1, 2, 3, 4]}, + index=date_range("2013-06-19 09:30:00", + periods=4, freq='5T')) + df_multi = df.set_index(['ACCOUNT', 'TICKER'], append=True) + + expected = DataFrame([ + [1] + ], index=Index(['ABC'], name='TICKER'), columns=['val']) + result = df_multi.loc[('2013-06-19 09:30:00', 'ACCT1')] + tm.assert_frame_equal(result, expected) + + expected = df_multi.loc[ + (pd.Timestamp('2013-06-19 09:30:00', tz=None), 'ACCT1', 'ABC')] + result = df_multi.loc[('2013-06-19 09:30:00', 'ACCT1', 'ABC')] + tm.assert_series_equal(result, expected) + + # this is a KeyError as we don't do partial string selection on + # multi-levels + def f(): + df_multi.loc[('2013-06-19', 'ACCT1', 'ABC')] + + self.assertRaises(KeyError, f) + + # GH 4294 + # partial slice on a series mi + s = pd.DataFrame(np.random.rand(1000, 1000), index=pd.date_range( + '2000-1-1', periods=1000)).stack() + + s2 = s[:-1].copy() + expected = s2['2000-1-4'] + result = s2[pd.Timestamp('2000-1-4')] + tm.assert_series_equal(result, expected) + + result = s[pd.Timestamp('2000-1-4')] + expected = s['2000-1-4'] + tm.assert_series_equal(result, expected) + + df2 = pd.DataFrame(s) + expected = df2.xs('2000-1-4') + result = df2.loc[pd.Timestamp('2000-1-4')] + tm.assert_frame_equal(result, expected) + + def test_partial_slice_doesnt_require_monotonicity(self): + # For historical reasons. + s = pd.Series(np.arange(10), pd.date_range('2014-01-01', periods=10)) + + nonmonotonic = s[[3, 5, 4]] + expected = nonmonotonic.iloc[:0] + timestamp = pd.Timestamp('2014-01-10') + + tm.assert_series_equal(nonmonotonic['2014-01-10':], expected) + self.assertRaisesRegexp(KeyError, + r"Timestamp\('2014-01-10 00:00:00'\)", + lambda: nonmonotonic[timestamp:]) + + tm.assert_series_equal(nonmonotonic.loc['2014-01-10':], expected) + self.assertRaisesRegexp(KeyError, + r"Timestamp\('2014-01-10 00:00:00'\)", + lambda: nonmonotonic.loc[timestamp:]) diff --git a/pandas/tests/indexes/datetimes/test_setops.py b/pandas/tests/indexes/datetimes/test_setops.py index ba6beb03c7f24..229ae803aa2ff 100644 --- a/pandas/tests/indexes/datetimes/test_setops.py +++ b/pandas/tests/indexes/datetimes/test_setops.py @@ -1,9 +1,11 @@ +from datetime import datetime + import numpy as np import pandas as pd import pandas.util.testing as tm from pandas import (DatetimeIndex, date_range, Series, bdate_range, DataFrame, - Int64Index) + Int64Index, Index) class TestDatetimeIndex(tm.TestCase): @@ -166,3 +168,21 @@ def test_difference_freq(self): expected = DatetimeIndex(["20160920", "20160921"], freq=None) tm.assert_index_equal(idx_diff, expected) tm.assert_attr_equal('freq', idx_diff, expected) + + def test_datetimeindex_diff(self): + dti1 = DatetimeIndex(freq='Q-JAN', start=datetime(1997, 12, 31), + periods=100) + dti2 = DatetimeIndex(freq='Q-JAN', start=datetime(1997, 12, 31), + periods=98) + self.assertEqual(len(dti1.difference(dti2)), 2) + + def test_datetimeindex_union_join_empty(self): + dti = DatetimeIndex(start='1/1/2001', end='2/1/2001', freq='D') + empty = Index([]) + + result = dti.union(empty) + tm.assertIsInstance(result, DatetimeIndex) + self.assertIs(result, result) + + result = dti.join(empty) + tm.assertIsInstance(result, DatetimeIndex) diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py new file mode 100644 index 0000000000000..42d135f634298 --- /dev/null +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -0,0 +1,1019 @@ +""" test to_datetime """ + +import nose + +import sys +import calendar +import locale +from datetime import datetime + +import numpy as np +from pandas.types.common import is_datetime64_ns_dtype +from pandas import (isnull, to_datetime, Timestamp, Series, DataFrame, + Index, DatetimeIndex, NaT, date_range, bdate_range) +from pandas import tslib +from pandas.compat import lmap +import pandas as pd +from pandas.tseries import tools +from pandas.util import testing as tm +from pandas.util.testing import assert_series_equal + + +class TimeConversionFormats(tm.TestCase): + + def test_to_datetime_format(self): + values = ['1/1/2000', '1/2/2000', '1/3/2000'] + + results1 = [Timestamp('20000101'), Timestamp('20000201'), + Timestamp('20000301')] + results2 = [Timestamp('20000101'), Timestamp('20000102'), + Timestamp('20000103')] + for vals, expecteds in [(values, (Index(results1), Index(results2))), + (Series(values), + (Series(results1), Series(results2))), + (values[0], (results1[0], results2[0])), + (values[1], (results1[1], results2[1])), + (values[2], (results1[2], results2[2]))]: + + for i, fmt in enumerate(['%d/%m/%Y', '%m/%d/%Y']): + result = to_datetime(vals, format=fmt) + expected = expecteds[i] + + if isinstance(expected, Series): + assert_series_equal(result, Series(expected)) + elif isinstance(expected, Timestamp): + self.assertEqual(result, expected) + else: + tm.assert_index_equal(result, expected) + + def test_to_datetime_format_YYYYMMDD(self): + s = Series([19801222, 19801222] + [19810105] * 5) + expected = Series([Timestamp(x) for x in s.apply(str)]) + + result = to_datetime(s, format='%Y%m%d') + assert_series_equal(result, expected) + + result = to_datetime(s.apply(str), format='%Y%m%d') + assert_series_equal(result, expected) + + # with NaT + expected = Series([Timestamp("19801222"), Timestamp("19801222")] + + [Timestamp("19810105")] * 5) + expected[2] = np.nan + s[2] = np.nan + + result = to_datetime(s, format='%Y%m%d') + assert_series_equal(result, expected) + + # string with NaT + s = s.apply(str) + s[2] = 'nat' + result = to_datetime(s, format='%Y%m%d') + assert_series_equal(result, expected) + + # coercion + # GH 7930 + s = Series([20121231, 20141231, 99991231]) + result = pd.to_datetime(s, format='%Y%m%d', errors='ignore') + expected = Series([datetime(2012, 12, 31), + datetime(2014, 12, 31), datetime(9999, 12, 31)], + dtype=object) + self.assert_series_equal(result, expected) + + result = pd.to_datetime(s, format='%Y%m%d', errors='coerce') + expected = Series(['20121231', '20141231', 'NaT'], dtype='M8[ns]') + assert_series_equal(result, expected) + + # GH 10178 + def test_to_datetime_format_integer(self): + s = Series([2000, 2001, 2002]) + expected = Series([Timestamp(x) for x in s.apply(str)]) + + result = to_datetime(s, format='%Y') + assert_series_equal(result, expected) + + s = Series([200001, 200105, 200206]) + expected = Series([Timestamp(x[:4] + '-' + x[4:]) for x in s.apply(str) + ]) + + result = to_datetime(s, format='%Y%m') + assert_series_equal(result, expected) + + def test_to_datetime_format_microsecond(self): + + # these are locale dependent + lang, _ = locale.getlocale() + month_abbr = calendar.month_abbr[4] + val = '01-{}-2011 00:00:01.978'.format(month_abbr) + + format = '%d-%b-%Y %H:%M:%S.%f' + result = to_datetime(val, format=format) + exp = datetime.strptime(val, format) + self.assertEqual(result, exp) + + def test_to_datetime_format_time(self): + data = [ + ['01/10/2010 15:20', '%m/%d/%Y %H:%M', + Timestamp('2010-01-10 15:20')], + ['01/10/2010 05:43', '%m/%d/%Y %I:%M', + Timestamp('2010-01-10 05:43')], + ['01/10/2010 13:56:01', '%m/%d/%Y %H:%M:%S', + Timestamp('2010-01-10 13:56:01')] # , + # ['01/10/2010 08:14 PM', '%m/%d/%Y %I:%M %p', + # Timestamp('2010-01-10 20:14')], + # ['01/10/2010 07:40 AM', '%m/%d/%Y %I:%M %p', + # Timestamp('2010-01-10 07:40')], + # ['01/10/2010 09:12:56 AM', '%m/%d/%Y %I:%M:%S %p', + # Timestamp('2010-01-10 09:12:56')] + ] + for s, format, dt in data: + self.assertEqual(to_datetime(s, format=format), dt) + + def test_to_datetime_with_non_exact(self): + # GH 10834 + tm._skip_if_has_locale() + + # 8904 + # exact kw + if sys.version_info < (2, 7): + raise nose.SkipTest('on python version < 2.7') + + s = Series(['19MAY11', 'foobar19MAY11', '19MAY11:00:00:00', + '19MAY11 00:00:00Z']) + result = to_datetime(s, format='%d%b%y', exact=False) + expected = to_datetime(s.str.extract(r'(\d+\w+\d+)', expand=False), + format='%d%b%y') + assert_series_equal(result, expected) + + def test_parse_nanoseconds_with_formula(self): + + # GH8989 + # trunctaing the nanoseconds when a format was provided + for v in ["2012-01-01 09:00:00.000000001", + "2012-01-01 09:00:00.000001", + "2012-01-01 09:00:00.001", + "2012-01-01 09:00:00.001000", + "2012-01-01 09:00:00.001000000", ]: + expected = pd.to_datetime(v) + result = pd.to_datetime(v, format="%Y-%m-%d %H:%M:%S.%f") + self.assertEqual(result, expected) + + def test_to_datetime_format_weeks(self): + data = [ + ['2009324', '%Y%W%w', Timestamp('2009-08-13')], + ['2013020', '%Y%U%w', Timestamp('2013-01-13')] + ] + for s, format, dt in data: + self.assertEqual(to_datetime(s, format=format), dt) + + +class TestToDatetime(tm.TestCase): + _multiprocess_can_split_ = True + + def test_to_datetime_dt64s(self): + in_bound_dts = [ + np.datetime64('2000-01-01'), + np.datetime64('2000-01-02'), + ] + + for dt in in_bound_dts: + self.assertEqual(pd.to_datetime(dt), Timestamp(dt)) + + oob_dts = [np.datetime64('1000-01-01'), np.datetime64('5000-01-02'), ] + + for dt in oob_dts: + self.assertRaises(ValueError, pd.to_datetime, dt, errors='raise') + self.assertRaises(ValueError, Timestamp, dt) + self.assertIs(pd.to_datetime(dt, errors='coerce'), NaT) + + def test_to_datetime_array_of_dt64s(self): + dts = [np.datetime64('2000-01-01'), np.datetime64('2000-01-02'), ] + + # Assuming all datetimes are in bounds, to_datetime() returns + # an array that is equal to Timestamp() parsing + self.assert_numpy_array_equal( + pd.to_datetime(dts, box=False), + np.array([Timestamp(x).asm8 for x in dts]) + ) + + # A list of datetimes where the last one is out of bounds + dts_with_oob = dts + [np.datetime64('9999-01-01')] + + self.assertRaises(ValueError, pd.to_datetime, dts_with_oob, + errors='raise') + + self.assert_numpy_array_equal( + pd.to_datetime(dts_with_oob, box=False, errors='coerce'), + np.array( + [ + Timestamp(dts_with_oob[0]).asm8, + Timestamp(dts_with_oob[1]).asm8, + tslib.iNaT, + ], + dtype='M8' + ) + ) + + # With errors='ignore', out of bounds datetime64s + # are converted to their .item(), which depending on the version of + # numpy is either a python datetime.datetime or datetime.date + self.assert_numpy_array_equal( + pd.to_datetime(dts_with_oob, box=False, errors='ignore'), + np.array( + [dt.item() for dt in dts_with_oob], + dtype='O' + ) + ) + + def test_to_datetime_tz(self): + + # xref 8260 + # uniform returns a DatetimeIndex + arr = [pd.Timestamp('2013-01-01 13:00:00-0800', tz='US/Pacific'), + pd.Timestamp('2013-01-02 14:00:00-0800', tz='US/Pacific')] + result = pd.to_datetime(arr) + expected = DatetimeIndex( + ['2013-01-01 13:00:00', '2013-01-02 14:00:00'], tz='US/Pacific') + tm.assert_index_equal(result, expected) + + # mixed tzs will raise + arr = [pd.Timestamp('2013-01-01 13:00:00', tz='US/Pacific'), + pd.Timestamp('2013-01-02 14:00:00', tz='US/Eastern')] + self.assertRaises(ValueError, lambda: pd.to_datetime(arr)) + + def test_to_datetime_tz_pytz(self): + + # xref 8260 + tm._skip_if_no_pytz() + import pytz + + us_eastern = pytz.timezone('US/Eastern') + arr = np.array([us_eastern.localize(datetime(year=2000, month=1, day=1, + hour=3, minute=0)), + us_eastern.localize(datetime(year=2000, month=6, day=1, + hour=3, minute=0))], + dtype=object) + result = pd.to_datetime(arr, utc=True) + expected = DatetimeIndex(['2000-01-01 08:00:00+00:00', + '2000-06-01 07:00:00+00:00'], + dtype='datetime64[ns, UTC]', freq=None) + tm.assert_index_equal(result, expected) + + def test_to_datetime_utc_is_true(self): + # See gh-11934 + start = pd.Timestamp('2014-01-01', tz='utc') + end = pd.Timestamp('2014-01-03', tz='utc') + date_range = pd.bdate_range(start, end) + + result = pd.to_datetime(date_range, utc=True) + expected = pd.DatetimeIndex(data=date_range) + tm.assert_index_equal(result, expected) + + def test_to_datetime_tz_psycopg2(self): + + # xref 8260 + try: + import psycopg2 + except ImportError: + raise nose.SkipTest("no psycopg2 installed") + + # misc cases + tz1 = psycopg2.tz.FixedOffsetTimezone(offset=-300, name=None) + tz2 = psycopg2.tz.FixedOffsetTimezone(offset=-240, name=None) + arr = np.array([datetime(2000, 1, 1, 3, 0, tzinfo=tz1), + datetime(2000, 6, 1, 3, 0, tzinfo=tz2)], + dtype=object) + + result = pd.to_datetime(arr, errors='coerce', utc=True) + expected = DatetimeIndex(['2000-01-01 08:00:00+00:00', + '2000-06-01 07:00:00+00:00'], + dtype='datetime64[ns, UTC]', freq=None) + tm.assert_index_equal(result, expected) + + # dtype coercion + i = pd.DatetimeIndex([ + '2000-01-01 08:00:00+00:00' + ], tz=psycopg2.tz.FixedOffsetTimezone(offset=-300, name=None)) + self.assertTrue(is_datetime64_ns_dtype(i)) + + # tz coerceion + result = pd.to_datetime(i, errors='coerce') + tm.assert_index_equal(result, i) + + result = pd.to_datetime(i, errors='coerce', utc=True) + expected = pd.DatetimeIndex(['2000-01-01 13:00:00'], + dtype='datetime64[ns, UTC]') + tm.assert_index_equal(result, expected) + + def test_datetime_bool(self): + # GH13176 + with self.assertRaises(TypeError): + to_datetime(False) + self.assertTrue(to_datetime(False, errors="coerce") is NaT) + self.assertEqual(to_datetime(False, errors="ignore"), False) + with self.assertRaises(TypeError): + to_datetime(True) + self.assertTrue(to_datetime(True, errors="coerce") is NaT) + self.assertEqual(to_datetime(True, errors="ignore"), True) + with self.assertRaises(TypeError): + to_datetime([False, datetime.today()]) + with self.assertRaises(TypeError): + to_datetime(['20130101', True]) + tm.assert_index_equal(to_datetime([0, False, NaT, 0.0], + errors="coerce"), + DatetimeIndex([to_datetime(0), NaT, + NaT, to_datetime(0)])) + + def test_datetime_invalid_datatype(self): + # GH13176 + + with self.assertRaises(TypeError): + pd.to_datetime(bool) + with self.assertRaises(TypeError): + pd.to_datetime(pd.to_datetime) + + +class ToDatetimeUnit(tm.TestCase): + + def test_unit(self): + # GH 11758 + # test proper behavior with erros + + with self.assertRaises(ValueError): + to_datetime([1], unit='D', format='%Y%m%d') + + values = [11111111, 1, 1.0, tslib.iNaT, NaT, np.nan, + 'NaT', ''] + result = to_datetime(values, unit='D', errors='ignore') + expected = Index([11111111, Timestamp('1970-01-02'), + Timestamp('1970-01-02'), NaT, + NaT, NaT, NaT, NaT], + dtype=object) + tm.assert_index_equal(result, expected) + + result = to_datetime(values, unit='D', errors='coerce') + expected = DatetimeIndex(['NaT', '1970-01-02', '1970-01-02', + 'NaT', 'NaT', 'NaT', 'NaT', 'NaT']) + tm.assert_index_equal(result, expected) + + with self.assertRaises(tslib.OutOfBoundsDatetime): + to_datetime(values, unit='D', errors='raise') + + values = [1420043460000, tslib.iNaT, NaT, np.nan, 'NaT'] + + result = to_datetime(values, errors='ignore', unit='s') + expected = Index([1420043460000, NaT, NaT, + NaT, NaT], dtype=object) + tm.assert_index_equal(result, expected) + + result = to_datetime(values, errors='coerce', unit='s') + expected = DatetimeIndex(['NaT', 'NaT', 'NaT', 'NaT', 'NaT']) + tm.assert_index_equal(result, expected) + + with self.assertRaises(tslib.OutOfBoundsDatetime): + to_datetime(values, errors='raise', unit='s') + + # if we have a string, then we raise a ValueError + # and NOT an OutOfBoundsDatetime + for val in ['foo', Timestamp('20130101')]: + try: + to_datetime(val, errors='raise', unit='s') + except tslib.OutOfBoundsDatetime: + raise AssertionError("incorrect exception raised") + except ValueError: + pass + + def test_unit_consistency(self): + + # consistency of conversions + expected = Timestamp('1970-05-09 14:25:11') + result = pd.to_datetime(11111111, unit='s', errors='raise') + self.assertEqual(result, expected) + self.assertIsInstance(result, Timestamp) + + result = pd.to_datetime(11111111, unit='s', errors='coerce') + self.assertEqual(result, expected) + self.assertIsInstance(result, Timestamp) + + result = pd.to_datetime(11111111, unit='s', errors='ignore') + self.assertEqual(result, expected) + self.assertIsInstance(result, Timestamp) + + def test_unit_with_numeric(self): + + # GH 13180 + # coercions from floats/ints are ok + expected = DatetimeIndex(['2015-06-19 05:33:20', + '2015-05-27 22:33:20']) + arr1 = [1.434692e+18, 1.432766e+18] + arr2 = np.array(arr1).astype('int64') + for errors in ['ignore', 'raise', 'coerce']: + result = pd.to_datetime(arr1, errors=errors) + tm.assert_index_equal(result, expected) + + result = pd.to_datetime(arr2, errors=errors) + tm.assert_index_equal(result, expected) + + # but we want to make sure that we are coercing + # if we have ints/strings + expected = DatetimeIndex(['NaT', + '2015-06-19 05:33:20', + '2015-05-27 22:33:20']) + arr = ['foo', 1.434692e+18, 1.432766e+18] + result = pd.to_datetime(arr, errors='coerce') + tm.assert_index_equal(result, expected) + + expected = DatetimeIndex(['2015-06-19 05:33:20', + '2015-05-27 22:33:20', + 'NaT', + 'NaT']) + arr = [1.434692e+18, 1.432766e+18, 'foo', 'NaT'] + result = pd.to_datetime(arr, errors='coerce') + tm.assert_index_equal(result, expected) + + def test_unit_mixed(self): + + # mixed integers/datetimes + expected = DatetimeIndex(['2013-01-01', 'NaT', 'NaT']) + arr = [pd.Timestamp('20130101'), 1.434692e+18, 1.432766e+18] + result = pd.to_datetime(arr, errors='coerce') + tm.assert_index_equal(result, expected) + + with self.assertRaises(ValueError): + pd.to_datetime(arr, errors='raise') + + expected = DatetimeIndex(['NaT', + 'NaT', + '2013-01-01']) + arr = [1.434692e+18, 1.432766e+18, pd.Timestamp('20130101')] + result = pd.to_datetime(arr, errors='coerce') + tm.assert_index_equal(result, expected) + + with self.assertRaises(ValueError): + pd.to_datetime(arr, errors='raise') + + def test_dataframe(self): + + df = DataFrame({'year': [2015, 2016], + 'month': [2, 3], + 'day': [4, 5], + 'hour': [6, 7], + 'minute': [58, 59], + 'second': [10, 11], + 'ms': [1, 1], + 'us': [2, 2], + 'ns': [3, 3]}) + + result = to_datetime({'year': df['year'], + 'month': df['month'], + 'day': df['day']}) + expected = Series([Timestamp('20150204 00:00:00'), + Timestamp('20160305 00:0:00')]) + assert_series_equal(result, expected) + + # dict-like + result = to_datetime(df[['year', 'month', 'day']].to_dict()) + assert_series_equal(result, expected) + + # dict but with constructable + df2 = df[['year', 'month', 'day']].to_dict() + df2['month'] = 2 + result = to_datetime(df2) + expected2 = Series([Timestamp('20150204 00:00:00'), + Timestamp('20160205 00:0:00')]) + assert_series_equal(result, expected2) + + # unit mappings + units = [{'year': 'years', + 'month': 'months', + 'day': 'days', + 'hour': 'hours', + 'minute': 'minutes', + 'second': 'seconds'}, + {'year': 'year', + 'month': 'month', + 'day': 'day', + 'hour': 'hour', + 'minute': 'minute', + 'second': 'second'}, + ] + + for d in units: + result = to_datetime(df[list(d.keys())].rename(columns=d)) + expected = Series([Timestamp('20150204 06:58:10'), + Timestamp('20160305 07:59:11')]) + assert_series_equal(result, expected) + + d = {'year': 'year', + 'month': 'month', + 'day': 'day', + 'hour': 'hour', + 'minute': 'minute', + 'second': 'second', + 'ms': 'ms', + 'us': 'us', + 'ns': 'ns'} + + result = to_datetime(df.rename(columns=d)) + expected = Series([Timestamp('20150204 06:58:10.001002003'), + Timestamp('20160305 07:59:11.001002003')]) + assert_series_equal(result, expected) + + # coerce back to int + result = to_datetime(df.astype(str)) + assert_series_equal(result, expected) + + # passing coerce + df2 = DataFrame({'year': [2015, 2016], + 'month': [2, 20], + 'day': [4, 5]}) + with self.assertRaises(ValueError): + to_datetime(df2) + result = to_datetime(df2, errors='coerce') + expected = Series([Timestamp('20150204 00:00:00'), + NaT]) + assert_series_equal(result, expected) + + # extra columns + with self.assertRaises(ValueError): + df2 = df.copy() + df2['foo'] = 1 + to_datetime(df2) + + # not enough + for c in [['year'], + ['year', 'month'], + ['year', 'month', 'second'], + ['month', 'day'], + ['year', 'day', 'second']]: + with self.assertRaises(ValueError): + to_datetime(df[c]) + + # duplicates + df2 = DataFrame({'year': [2015, 2016], + 'month': [2, 20], + 'day': [4, 5]}) + df2.columns = ['year', 'year', 'day'] + with self.assertRaises(ValueError): + to_datetime(df2) + + df2 = DataFrame({'year': [2015, 2016], + 'month': [2, 20], + 'day': [4, 5], + 'hour': [4, 5]}) + df2.columns = ['year', 'month', 'day', 'day'] + with self.assertRaises(ValueError): + to_datetime(df2) + + def test_dataframe_dtypes(self): + # #13451 + df = DataFrame({'year': [2015, 2016], + 'month': [2, 3], + 'day': [4, 5]}) + + # int16 + result = to_datetime(df.astype('int16')) + expected = Series([Timestamp('20150204 00:00:00'), + Timestamp('20160305 00:00:00')]) + assert_series_equal(result, expected) + + # mixed dtypes + df['month'] = df['month'].astype('int8') + df['day'] = df['day'].astype('int8') + result = to_datetime(df) + expected = Series([Timestamp('20150204 00:00:00'), + Timestamp('20160305 00:00:00')]) + assert_series_equal(result, expected) + + # float + df = DataFrame({'year': [2000, 2001], + 'month': [1.5, 1], + 'day': [1, 1]}) + with self.assertRaises(ValueError): + to_datetime(df) + + +class ToDatetimeMisc(tm.TestCase): + + def test_index_to_datetime(self): + idx = Index(['1/1/2000', '1/2/2000', '1/3/2000']) + + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + result = idx.to_datetime() + expected = DatetimeIndex(pd.to_datetime(idx.values)) + tm.assert_index_equal(result, expected) + + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + today = datetime.today() + idx = Index([today], dtype=object) + result = idx.to_datetime() + expected = DatetimeIndex([today]) + tm.assert_index_equal(result, expected) + + def test_to_datetime_iso8601(self): + result = to_datetime(["2012-01-01 00:00:00"]) + exp = Timestamp("2012-01-01 00:00:00") + self.assertEqual(result[0], exp) + + result = to_datetime(['20121001']) # bad iso 8601 + exp = Timestamp('2012-10-01') + self.assertEqual(result[0], exp) + + def test_to_datetime_default(self): + rs = to_datetime('2001') + xp = datetime(2001, 1, 1) + self.assertTrue(rs, xp) + + # dayfirst is essentially broken + + # to_datetime('01-13-2012', dayfirst=True) + # self.assertRaises(ValueError, to_datetime('01-13-2012', + # dayfirst=True)) + + def test_to_datetime_on_datetime64_series(self): + # #2699 + s = Series(date_range('1/1/2000', periods=10)) + + result = to_datetime(s) + self.assertEqual(result[0], s[0]) + + def test_to_datetime_with_space_in_series(self): + # GH 6428 + s = Series(['10/18/2006', '10/18/2008', ' ']) + tm.assertRaises(ValueError, lambda: to_datetime(s, errors='raise')) + result_coerce = to_datetime(s, errors='coerce') + expected_coerce = Series([datetime(2006, 10, 18), + datetime(2008, 10, 18), + NaT]) + tm.assert_series_equal(result_coerce, expected_coerce) + result_ignore = to_datetime(s, errors='ignore') + tm.assert_series_equal(result_ignore, s) + + def test_to_datetime_with_apply(self): + # this is only locale tested with US/None locales + tm._skip_if_has_locale() + + # GH 5195 + # with a format and coerce a single item to_datetime fails + td = Series(['May 04', 'Jun 02', 'Dec 11'], index=[1, 2, 3]) + expected = pd.to_datetime(td, format='%b %y') + result = td.apply(pd.to_datetime, format='%b %y') + assert_series_equal(result, expected) + + td = pd.Series(['May 04', 'Jun 02', ''], index=[1, 2, 3]) + self.assertRaises(ValueError, + lambda: pd.to_datetime(td, format='%b %y', + errors='raise')) + self.assertRaises(ValueError, + lambda: td.apply(pd.to_datetime, format='%b %y', + errors='raise')) + expected = pd.to_datetime(td, format='%b %y', errors='coerce') + + result = td.apply( + lambda x: pd.to_datetime(x, format='%b %y', errors='coerce')) + assert_series_equal(result, expected) + + def test_to_datetime_types(self): + + # empty string + result = to_datetime('') + self.assertIs(result, NaT) + + result = to_datetime(['', '']) + self.assertTrue(isnull(result).all()) + + # ints + result = Timestamp(0) + expected = to_datetime(0) + self.assertEqual(result, expected) + + # GH 3888 (strings) + expected = to_datetime(['2012'])[0] + result = to_datetime('2012') + self.assertEqual(result, expected) + + # array = ['2012','20120101','20120101 12:01:01'] + array = ['20120101', '20120101 12:01:01'] + expected = list(to_datetime(array)) + result = lmap(Timestamp, array) + tm.assert_almost_equal(result, expected) + + # currently fails ### + # result = Timestamp('2012') + # expected = to_datetime('2012') + # self.assertEqual(result, expected) + + def test_to_datetime_unprocessable_input(self): + # GH 4928 + self.assert_numpy_array_equal( + to_datetime([1, '1'], errors='ignore'), + np.array([1, '1'], dtype='O') + ) + self.assertRaises(TypeError, to_datetime, [1, '1'], errors='raise') + + def test_to_datetime_other_datetime64_units(self): + # 5/25/2012 + scalar = np.int64(1337904000000000).view('M8[us]') + as_obj = scalar.astype('O') + + index = DatetimeIndex([scalar]) + self.assertEqual(index[0], scalar.astype('O')) + + value = Timestamp(scalar) + self.assertEqual(value, as_obj) + + def test_to_datetime_list_of_integers(self): + rng = date_range('1/1/2000', periods=20) + rng = DatetimeIndex(rng.values) + + ints = list(rng.asi8) + + result = DatetimeIndex(ints) + + tm.assert_index_equal(rng, result) + + def test_to_datetime_freq(self): + xp = bdate_range('2000-1-1', periods=10, tz='UTC') + rs = xp.to_datetime() + self.assertEqual(xp.freq, rs.freq) + self.assertEqual(xp.tzinfo, rs.tzinfo) + + def test_string_na_nat_conversion(self): + # GH #999, #858 + + from pandas.compat import parse_date + + strings = np.array(['1/1/2000', '1/2/2000', np.nan, + '1/4/2000, 12:34:56'], dtype=object) + + expected = np.empty(4, dtype='M8[ns]') + for i, val in enumerate(strings): + if isnull(val): + expected[i] = tslib.iNaT + else: + expected[i] = parse_date(val) + + result = tslib.array_to_datetime(strings) + tm.assert_almost_equal(result, expected) + + result2 = to_datetime(strings) + tm.assertIsInstance(result2, DatetimeIndex) + tm.assert_numpy_array_equal(result, result2.values) + + malformed = np.array(['1/100/2000', np.nan], dtype=object) + + # GH 10636, default is now 'raise' + self.assertRaises(ValueError, + lambda: to_datetime(malformed, errors='raise')) + + result = to_datetime(malformed, errors='ignore') + tm.assert_numpy_array_equal(result, malformed) + + self.assertRaises(ValueError, to_datetime, malformed, errors='raise') + + idx = ['a', 'b', 'c', 'd', 'e'] + series = Series(['1/1/2000', np.nan, '1/3/2000', np.nan, + '1/5/2000'], index=idx, name='foo') + dseries = Series([to_datetime('1/1/2000'), np.nan, + to_datetime('1/3/2000'), np.nan, + to_datetime('1/5/2000')], index=idx, name='foo') + + result = to_datetime(series) + dresult = to_datetime(dseries) + + expected = Series(np.empty(5, dtype='M8[ns]'), index=idx) + for i in range(5): + x = series[i] + if isnull(x): + expected[i] = tslib.iNaT + else: + expected[i] = to_datetime(x) + + assert_series_equal(result, expected, check_names=False) + self.assertEqual(result.name, 'foo') + + assert_series_equal(dresult, expected, check_names=False) + self.assertEqual(dresult.name, 'foo') + + def test_dti_constructor_numpy_timeunits(self): + # GH 9114 + base = pd.to_datetime(['2000-01-01T00:00', '2000-01-02T00:00', 'NaT']) + + for dtype in ['datetime64[h]', 'datetime64[m]', 'datetime64[s]', + 'datetime64[ms]', 'datetime64[us]', 'datetime64[ns]']: + values = base.values.astype(dtype) + + tm.assert_index_equal(DatetimeIndex(values), base) + tm.assert_index_equal(to_datetime(values), base) + + def test_dayfirst(self): + # GH 5917 + arr = ['10/02/2014', '11/02/2014', '12/02/2014'] + expected = DatetimeIndex([datetime(2014, 2, 10), datetime(2014, 2, 11), + datetime(2014, 2, 12)]) + idx1 = DatetimeIndex(arr, dayfirst=True) + idx2 = DatetimeIndex(np.array(arr), dayfirst=True) + idx3 = to_datetime(arr, dayfirst=True) + idx4 = to_datetime(np.array(arr), dayfirst=True) + idx5 = DatetimeIndex(Index(arr), dayfirst=True) + idx6 = DatetimeIndex(Series(arr), dayfirst=True) + tm.assert_index_equal(expected, idx1) + tm.assert_index_equal(expected, idx2) + tm.assert_index_equal(expected, idx3) + tm.assert_index_equal(expected, idx4) + tm.assert_index_equal(expected, idx5) + tm.assert_index_equal(expected, idx6) + + +class TestGuessDatetimeFormat(tm.TestCase): + + def test_guess_datetime_format_with_parseable_formats(self): + tm._skip_if_not_us_locale() + dt_string_to_format = (('20111230', '%Y%m%d'), + ('2011-12-30', '%Y-%m-%d'), + ('30-12-2011', '%d-%m-%Y'), + ('2011-12-30 00:00:00', '%Y-%m-%d %H:%M:%S'), + ('2011-12-30T00:00:00', '%Y-%m-%dT%H:%M:%S'), + ('2011-12-30 00:00:00.000000', + '%Y-%m-%d %H:%M:%S.%f'), ) + + for dt_string, dt_format in dt_string_to_format: + self.assertEqual( + tools._guess_datetime_format(dt_string), + dt_format + ) + + def test_guess_datetime_format_with_dayfirst(self): + ambiguous_string = '01/01/2011' + self.assertEqual( + tools._guess_datetime_format(ambiguous_string, dayfirst=True), + '%d/%m/%Y' + ) + self.assertEqual( + tools._guess_datetime_format(ambiguous_string, dayfirst=False), + '%m/%d/%Y' + ) + + def test_guess_datetime_format_with_locale_specific_formats(self): + # The month names will vary depending on the locale, in which + # case these wont be parsed properly (dateutil can't parse them) + tm._skip_if_has_locale() + + dt_string_to_format = (('30/Dec/2011', '%d/%b/%Y'), + ('30/December/2011', '%d/%B/%Y'), + ('30/Dec/2011 00:00:00', '%d/%b/%Y %H:%M:%S'), ) + + for dt_string, dt_format in dt_string_to_format: + self.assertEqual( + tools._guess_datetime_format(dt_string), + dt_format + ) + + def test_guess_datetime_format_invalid_inputs(self): + # A datetime string must include a year, month and a day for it + # to be guessable, in addition to being a string that looks like + # a datetime + invalid_dts = [ + '2013', + '01/2013', + '12:00:00', + '1/1/1/1', + 'this_is_not_a_datetime', + '51a', + 9, + datetime(2011, 1, 1), + ] + + for invalid_dt in invalid_dts: + self.assertTrue(tools._guess_datetime_format(invalid_dt) is None) + + def test_guess_datetime_format_nopadding(self): + # GH 11142 + dt_string_to_format = (('2011-1-1', '%Y-%m-%d'), + ('30-1-2011', '%d-%m-%Y'), + ('1/1/2011', '%m/%d/%Y'), + ('2011-1-1 00:00:00', '%Y-%m-%d %H:%M:%S'), + ('2011-1-1 0:0:0', '%Y-%m-%d %H:%M:%S'), + ('2011-1-3T00:00:0', '%Y-%m-%dT%H:%M:%S')) + + for dt_string, dt_format in dt_string_to_format: + self.assertEqual( + tools._guess_datetime_format(dt_string), + dt_format + ) + + def test_guess_datetime_format_for_array(self): + tm._skip_if_not_us_locale() + expected_format = '%Y-%m-%d %H:%M:%S.%f' + dt_string = datetime(2011, 12, 30, 0, 0, 0).strftime(expected_format) + + test_arrays = [ + np.array([dt_string, dt_string, dt_string], dtype='O'), + np.array([np.nan, np.nan, dt_string], dtype='O'), + np.array([dt_string, 'random_string'], dtype='O'), + ] + + for test_array in test_arrays: + self.assertEqual( + tools._guess_datetime_format_for_array(test_array), + expected_format + ) + + format_for_string_of_nans = tools._guess_datetime_format_for_array( + np.array( + [np.nan, np.nan, np.nan], dtype='O')) + self.assertTrue(format_for_string_of_nans is None) + + +class TestToDatetimeInferFormat(tm.TestCase): + + def test_to_datetime_infer_datetime_format_consistent_format(self): + s = pd.Series(pd.date_range('20000101', periods=50, freq='H')) + + test_formats = ['%m-%d-%Y', '%m/%d/%Y %H:%M:%S.%f', + '%Y-%m-%dT%H:%M:%S.%f'] + + for test_format in test_formats: + s_as_dt_strings = s.apply(lambda x: x.strftime(test_format)) + + with_format = pd.to_datetime(s_as_dt_strings, format=test_format) + no_infer = pd.to_datetime(s_as_dt_strings, + infer_datetime_format=False) + yes_infer = pd.to_datetime(s_as_dt_strings, + infer_datetime_format=True) + + # Whether the format is explicitly passed, it is inferred, or + # it is not inferred, the results should all be the same + self.assert_series_equal(with_format, no_infer) + self.assert_series_equal(no_infer, yes_infer) + + def test_to_datetime_infer_datetime_format_inconsistent_format(self): + s = pd.Series(np.array(['01/01/2011 00:00:00', + '01-02-2011 00:00:00', + '2011-01-03T00:00:00'])) + + # When the format is inconsistent, infer_datetime_format should just + # fallback to the default parsing + tm.assert_series_equal(pd.to_datetime(s, infer_datetime_format=False), + pd.to_datetime(s, infer_datetime_format=True)) + + s = pd.Series(np.array(['Jan/01/2011', 'Feb/01/2011', 'Mar/01/2011'])) + + tm.assert_series_equal(pd.to_datetime(s, infer_datetime_format=False), + pd.to_datetime(s, infer_datetime_format=True)) + + def test_to_datetime_infer_datetime_format_series_with_nans(self): + s = pd.Series(np.array(['01/01/2011 00:00:00', np.nan, + '01/03/2011 00:00:00', np.nan])) + tm.assert_series_equal(pd.to_datetime(s, infer_datetime_format=False), + pd.to_datetime(s, infer_datetime_format=True)) + + def test_to_datetime_infer_datetime_format_series_starting_with_nans(self): + s = pd.Series(np.array([np.nan, np.nan, '01/01/2011 00:00:00', + '01/02/2011 00:00:00', '01/03/2011 00:00:00'])) + + tm.assert_series_equal(pd.to_datetime(s, infer_datetime_format=False), + pd.to_datetime(s, infer_datetime_format=True)) + + def test_to_datetime_iso8601_noleading_0s(self): + # GH 11871 + s = pd.Series(['2014-1-1', '2014-2-2', '2015-3-3']) + expected = pd.Series([pd.Timestamp('2014-01-01'), + pd.Timestamp('2014-02-02'), + pd.Timestamp('2015-03-03')]) + tm.assert_series_equal(pd.to_datetime(s), expected) + tm.assert_series_equal(pd.to_datetime(s, format='%Y-%m-%d'), expected) + + +class TestDaysInMonth(tm.TestCase): + # tests for issue #10154 + def test_day_not_in_month_coerce(self): + self.assertTrue(isnull(to_datetime('2015-02-29', errors='coerce'))) + self.assertTrue(isnull(to_datetime('2015-02-29', format="%Y-%m-%d", + errors='coerce'))) + self.assertTrue(isnull(to_datetime('2015-02-32', format="%Y-%m-%d", + errors='coerce'))) + self.assertTrue(isnull(to_datetime('2015-04-31', format="%Y-%m-%d", + errors='coerce'))) + + def test_day_not_in_month_raise(self): + self.assertRaises(ValueError, to_datetime, '2015-02-29', + errors='raise') + self.assertRaises(ValueError, to_datetime, '2015-02-29', + errors='raise', format="%Y-%m-%d") + self.assertRaises(ValueError, to_datetime, '2015-02-32', + errors='raise', format="%Y-%m-%d") + self.assertRaises(ValueError, to_datetime, '2015-04-31', + errors='raise', format="%Y-%m-%d") + + def test_day_not_in_month_ignore(self): + self.assertEqual(to_datetime( + '2015-02-29', errors='ignore'), '2015-02-29') + self.assertEqual(to_datetime( + '2015-02-29', errors='ignore', format="%Y-%m-%d"), '2015-02-29') + self.assertEqual(to_datetime( + '2015-02-32', errors='ignore', format="%Y-%m-%d"), '2015-02-32') + self.assertEqual(to_datetime( + '2015-04-31', errors='ignore', format="%Y-%m-%d"), '2015-04-31') diff --git a/pandas/tests/series/test_combine_concat.py b/pandas/tests/series/test_combine_concat.py index 23261c2ef79e2..7bcd1763537dc 100644 --- a/pandas/tests/series/test_combine_concat.py +++ b/pandas/tests/series/test_combine_concat.py @@ -7,7 +7,7 @@ import numpy as np import pandas as pd -from pandas import Series, DataFrame +from pandas import Series, DataFrame, date_range, DatetimeIndex from pandas import compat from pandas.util.testing import assert_series_equal @@ -218,3 +218,103 @@ def test_combine_first_dt64(self): rs = s0.combine_first(s1) xp = Series([datetime(2010, 1, 1), '2011']) assert_series_equal(rs, xp) + + +class TestTimeseries(tm.TestCase): + + _multiprocess_can_split_ = True + + def test_append_concat(self): + rng = date_range('5/8/2012 1:45', periods=10, freq='5T') + ts = Series(np.random.randn(len(rng)), rng) + df = DataFrame(np.random.randn(len(rng), 4), index=rng) + + result = ts.append(ts) + result_df = df.append(df) + ex_index = DatetimeIndex(np.tile(rng.values, 2)) + tm.assert_index_equal(result.index, ex_index) + tm.assert_index_equal(result_df.index, ex_index) + + appended = rng.append(rng) + tm.assert_index_equal(appended, ex_index) + + appended = rng.append([rng, rng]) + ex_index = DatetimeIndex(np.tile(rng.values, 3)) + tm.assert_index_equal(appended, ex_index) + + # different index names + rng1 = rng.copy() + rng2 = rng.copy() + rng1.name = 'foo' + rng2.name = 'bar' + self.assertEqual(rng1.append(rng1).name, 'foo') + self.assertIsNone(rng1.append(rng2).name) + + def test_append_concat_tz(self): + # GH 2938 + tm._skip_if_no_pytz() + + rng = date_range('5/8/2012 1:45', periods=10, freq='5T', + tz='US/Eastern') + rng2 = date_range('5/8/2012 2:35', periods=10, freq='5T', + tz='US/Eastern') + rng3 = date_range('5/8/2012 1:45', periods=20, freq='5T', + tz='US/Eastern') + ts = Series(np.random.randn(len(rng)), rng) + df = DataFrame(np.random.randn(len(rng), 4), index=rng) + ts2 = Series(np.random.randn(len(rng2)), rng2) + df2 = DataFrame(np.random.randn(len(rng2), 4), index=rng2) + + result = ts.append(ts2) + result_df = df.append(df2) + tm.assert_index_equal(result.index, rng3) + tm.assert_index_equal(result_df.index, rng3) + + appended = rng.append(rng2) + tm.assert_index_equal(appended, rng3) + + def test_append_concat_tz_explicit_pytz(self): + # GH 2938 + tm._skip_if_no_pytz() + from pytz import timezone as timezone + + rng = date_range('5/8/2012 1:45', periods=10, freq='5T', + tz=timezone('US/Eastern')) + rng2 = date_range('5/8/2012 2:35', periods=10, freq='5T', + tz=timezone('US/Eastern')) + rng3 = date_range('5/8/2012 1:45', periods=20, freq='5T', + tz=timezone('US/Eastern')) + ts = Series(np.random.randn(len(rng)), rng) + df = DataFrame(np.random.randn(len(rng), 4), index=rng) + ts2 = Series(np.random.randn(len(rng2)), rng2) + df2 = DataFrame(np.random.randn(len(rng2), 4), index=rng2) + + result = ts.append(ts2) + result_df = df.append(df2) + tm.assert_index_equal(result.index, rng3) + tm.assert_index_equal(result_df.index, rng3) + + appended = rng.append(rng2) + tm.assert_index_equal(appended, rng3) + + def test_append_concat_tz_dateutil(self): + # GH 2938 + tm._skip_if_no_dateutil() + rng = date_range('5/8/2012 1:45', periods=10, freq='5T', + tz='dateutil/US/Eastern') + rng2 = date_range('5/8/2012 2:35', periods=10, freq='5T', + tz='dateutil/US/Eastern') + rng3 = date_range('5/8/2012 1:45', periods=20, freq='5T', + tz='dateutil/US/Eastern') + ts = Series(np.random.randn(len(rng)), rng) + df = DataFrame(np.random.randn(len(rng), 4), index=rng) + ts2 = Series(np.random.randn(len(rng2)), rng2) + df2 = DataFrame(np.random.randn(len(rng2), 4), index=rng2) + + result = ts.append(ts2) + result_df = df.append(df2) + tm.assert_index_equal(result.index, rng3) + tm.assert_index_equal(result_df.index, rng3) + + appended = rng.append(rng2) + tm.assert_index_equal(appended, rng3) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 05818b013ac52..777b188b8fdd9 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -9,11 +9,12 @@ import pandas as pd from pandas.types.common import is_categorical_dtype, is_datetime64tz_dtype -from pandas import Index, Series, isnull, date_range, period_range +from pandas import (Index, Series, isnull, date_range, + period_range, NaT) from pandas.core.index import MultiIndex from pandas.tseries.index import Timestamp, DatetimeIndex -import pandas.lib as lib +from pandas import lib, tslib from pandas.compat import lrange, range, zip, OrderedDict, long from pandas import compat @@ -214,7 +215,6 @@ def test_constructor_maskedarray(self): expected = Series([True, True, False], index=index, dtype=bool) assert_series_equal(result, expected) - from pandas import tslib data = ma.masked_all((3, ), dtype='M8[ns]') result = Series(data) expected = Series([tslib.iNaT, tslib.iNaT, tslib.iNaT], dtype='M8[ns]') @@ -234,6 +234,13 @@ def test_constructor_maskedarray(self): datetime(2001, 1, 3)], index=index, dtype='M8[ns]') assert_series_equal(result, expected) + def test_series_ctor_plus_datetimeindex(self): + rng = date_range('20090415', '20090519', freq='B') + data = dict((k, 1) for k in rng) + + result = Series(data, index=rng) + self.assertIs(result.index, rng) + def test_constructor_default_index(self): s = Series([0, 1, 2]) tm.assert_index_equal(s.index, pd.Index(np.arange(3))) @@ -800,6 +807,21 @@ def f(): s = Series([pd.NaT, np.nan, '1 Day']) self.assertEqual(s.dtype, 'timedelta64[ns]') + def test_NaT_scalar(self): + series = Series([0, 1000, 2000, tslib.iNaT], dtype='M8[ns]') + + val = series[3] + self.assertTrue(isnull(val)) + + series[2] = val + self.assertTrue(isnull(series[2])) + + def test_NaT_cast(self): + # GH10747 + result = Series([np.nan]).astype('M8[ns]') + expected = Series([NaT]) + assert_series_equal(result, expected) + def test_constructor_name_hashable(self): for n in [777, 777., 'name', datetime(2001, 11, 11), (1, ), u"\u05D0"]: for data in [[1, 2, 3], np.ones(3), {'a': 0, 'b': 1}]: @@ -810,3 +832,20 @@ def test_constructor_name_unhashable(self): for n in [['name_list'], np.ones(2), {1: 2}]: for data in [['name_list'], np.ones(2), {1: 2}]: self.assertRaises(TypeError, Series, data, name=n) + + def test_auto_conversion(self): + series = Series(list(date_range('1/1/2000', periods=10))) + self.assertEqual(series.dtype, 'M8[ns]') + + def test_constructor_cant_cast_datetime64(self): + msg = "Cannot cast datetime64 to " + with tm.assertRaisesRegexp(TypeError, msg): + Series(date_range('1/1/2000', periods=10), dtype=float) + + with tm.assertRaisesRegexp(TypeError, msg): + Series(date_range('1/1/2000', periods=10), dtype=int) + + def test_constructor_cast_object(self): + s = Series(date_range('1/1/2000', periods=10), dtype=object) + exp = Series(date_range('1/1/2000', periods=10)) + tm.assert_series_equal(s, exp) diff --git a/pandas/tests/series/test_dtypes.py b/pandas/tests/series/test_dtypes.py index 1a1ff28bbb398..127a410f66fdb 100644 --- a/pandas/tests/series/test_dtypes.py +++ b/pandas/tests/series/test_dtypes.py @@ -8,9 +8,7 @@ from numpy import nan import numpy as np -from pandas import Series -from pandas.tseries.index import Timestamp -from pandas.tseries.tdi import Timedelta +from pandas import Series, Timestamp, Timedelta, DataFrame, date_range from pandas.compat import lrange, range, u from pandas import compat @@ -181,3 +179,24 @@ def test_arg_for_errors_in_astype(self): sr.astype(np.int8, raise_on_error=True) sr.astype(np.int8, errors='raise') + + def test_intercept_astype_object(self): + series = Series(date_range('1/1/2000', periods=10)) + + # this test no longer makes sense as series is by default already + # M8[ns] + expected = series.astype('object') + + df = DataFrame({'a': series, + 'b': np.random.randn(len(series))}) + exp_dtypes = Series([np.dtype('datetime64[ns]'), + np.dtype('float64')], index=['a', 'b']) + tm.assert_series_equal(df.dtypes, exp_dtypes) + + result = df.values.squeeze() + self.assertTrue((result[:, 0] == expected.values).all()) + + df = DataFrame({'a': series, 'b': ['foo'] * len(series)}) + + result = df.values.squeeze() + self.assertTrue((result[:, 0] == expected.values).all()) diff --git a/pandas/tests/series/test_indexing.py b/pandas/tests/series/test_indexing.py index d4b6e7dd5349f..bdae11770de65 100644 --- a/pandas/tests/series/test_indexing.py +++ b/pandas/tests/series/test_indexing.py @@ -7,17 +7,21 @@ import numpy as np import pandas as pd +import pandas.index as _index from pandas.types.common import is_integer, is_scalar -from pandas import Index, Series, DataFrame, isnull, date_range -from pandas.core.index import MultiIndex +from pandas import (Index, Series, DataFrame, isnull, + date_range, NaT, MultiIndex, + Timestamp, DatetimeIndex, Timedelta) from pandas.core.indexing import IndexingError -from pandas.tseries.index import Timestamp from pandas.tseries.offsets import BDay -from pandas.tseries.tdi import Timedelta +from pandas import lib, tslib from pandas.compat import lrange, range from pandas import compat -from pandas.util.testing import assert_series_equal, assert_almost_equal +from pandas.util.testing import (slow, + assert_series_equal, + assert_almost_equal, + assert_frame_equal) import pandas.util.testing as tm from pandas.tests.series.common import TestData @@ -421,6 +425,84 @@ def test_getitem_setitem_datetime_tz_dateutil(self): result[datetime(1990, 1, 1, 3, tzinfo=tz('America/Chicago'))] = ts[4] assert_series_equal(result, ts) + def test_getitem_setitem_datetimeindex(self): + N = 50 + # testing with timezone, GH #2785 + rng = date_range('1/1/1990', periods=N, freq='H', tz='US/Eastern') + ts = Series(np.random.randn(N), index=rng) + + result = ts["1990-01-01 04:00:00"] + expected = ts[4] + self.assertEqual(result, expected) + + result = ts.copy() + result["1990-01-01 04:00:00"] = 0 + result["1990-01-01 04:00:00"] = ts[4] + assert_series_equal(result, ts) + + result = ts["1990-01-01 04:00:00":"1990-01-01 07:00:00"] + expected = ts[4:8] + assert_series_equal(result, expected) + + result = ts.copy() + result["1990-01-01 04:00:00":"1990-01-01 07:00:00"] = 0 + result["1990-01-01 04:00:00":"1990-01-01 07:00:00"] = ts[4:8] + assert_series_equal(result, ts) + + lb = "1990-01-01 04:00:00" + rb = "1990-01-01 07:00:00" + result = ts[(ts.index >= lb) & (ts.index <= rb)] + expected = ts[4:8] + assert_series_equal(result, expected) + + # repeat all the above with naive datetimes + result = ts[datetime(1990, 1, 1, 4)] + expected = ts[4] + self.assertEqual(result, expected) + + result = ts.copy() + result[datetime(1990, 1, 1, 4)] = 0 + result[datetime(1990, 1, 1, 4)] = ts[4] + assert_series_equal(result, ts) + + result = ts[datetime(1990, 1, 1, 4):datetime(1990, 1, 1, 7)] + expected = ts[4:8] + assert_series_equal(result, expected) + + result = ts.copy() + result[datetime(1990, 1, 1, 4):datetime(1990, 1, 1, 7)] = 0 + result[datetime(1990, 1, 1, 4):datetime(1990, 1, 1, 7)] = ts[4:8] + assert_series_equal(result, ts) + + lb = datetime(1990, 1, 1, 4) + rb = datetime(1990, 1, 1, 7) + result = ts[(ts.index >= lb) & (ts.index <= rb)] + expected = ts[4:8] + assert_series_equal(result, expected) + + result = ts[ts.index[4]] + expected = ts[4] + self.assertEqual(result, expected) + + result = ts[ts.index[4:8]] + expected = ts[4:8] + assert_series_equal(result, expected) + + result = ts.copy() + result[ts.index[4:8]] = 0 + result[4:8] = ts[4:8] + assert_series_equal(result, ts) + + # also test partial date slicing + result = ts["1990-01-02"] + expected = ts[24:48] + assert_series_equal(result, expected) + + result = ts.copy() + result["1990-01-02"] = 0 + result["1990-01-02"] = ts[24:48] + assert_series_equal(result, ts) + def test_getitem_setitem_periodindex(self): from pandas import period_range @@ -1835,6 +1917,28 @@ def test_reindex_nan(self): # reindex coerces index.dtype to float, loc/iloc doesn't assert_series_equal(ts.reindex(i), ts.iloc[j], check_index_type=False) + def test_reindex_series_add_nat(self): + rng = date_range('1/1/2000 00:00:00', periods=10, freq='10s') + series = Series(rng) + + result = series.reindex(lrange(15)) + self.assertTrue(np.issubdtype(result.dtype, np.dtype('M8[ns]'))) + + mask = result.isnull() + self.assertTrue(mask[-5:].all()) + self.assertFalse(mask[:-5].any()) + + def test_reindex_with_datetimes(self): + rng = date_range('1/1/2000', periods=20) + ts = Series(np.random.randn(20), index=rng) + + result = ts.reindex(list(ts.index[5:10])) + expected = ts[5:10] + tm.assert_series_equal(result, expected) + + result = ts[list(ts.index[5:10])] + tm.assert_series_equal(result, expected) + def test_reindex_corner(self): # (don't forget to fix this) I think it's fixed self.empty.reindex(self.ts.index, method='pad') # it works @@ -2110,6 +2214,432 @@ def test_setitem_slice_into_readonly_backing_data(self): ' array was still mutated!', ) + +class TestTimeSeriesDuplicates(tm.TestCase): + _multiprocess_can_split_ = True + + def setUp(self): + dates = [datetime(2000, 1, 2), datetime(2000, 1, 2), + datetime(2000, 1, 2), datetime(2000, 1, 3), + datetime(2000, 1, 3), datetime(2000, 1, 3), + datetime(2000, 1, 4), datetime(2000, 1, 4), + datetime(2000, 1, 4), datetime(2000, 1, 5)] + + self.dups = Series(np.random.randn(len(dates)), index=dates) + + def test_constructor(self): + tm.assertIsInstance(self.dups, Series) + tm.assertIsInstance(self.dups.index, DatetimeIndex) + + def test_is_unique_monotonic(self): + self.assertFalse(self.dups.index.is_unique) + + def test_index_unique(self): + uniques = self.dups.index.unique() + expected = DatetimeIndex([datetime(2000, 1, 2), datetime(2000, 1, 3), + datetime(2000, 1, 4), datetime(2000, 1, 5)]) + self.assertEqual(uniques.dtype, 'M8[ns]') # sanity + tm.assert_index_equal(uniques, expected) + self.assertEqual(self.dups.index.nunique(), 4) + + # #2563 + self.assertTrue(isinstance(uniques, DatetimeIndex)) + + dups_local = self.dups.index.tz_localize('US/Eastern') + dups_local.name = 'foo' + result = dups_local.unique() + expected = DatetimeIndex(expected, name='foo') + expected = expected.tz_localize('US/Eastern') + self.assertTrue(result.tz is not None) + self.assertEqual(result.name, 'foo') + tm.assert_index_equal(result, expected) + + # NaT, note this is excluded + arr = [1370745748 + t for t in range(20)] + [tslib.iNaT] + idx = DatetimeIndex(arr * 3) + tm.assert_index_equal(idx.unique(), DatetimeIndex(arr)) + self.assertEqual(idx.nunique(), 20) + self.assertEqual(idx.nunique(dropna=False), 21) + + arr = [Timestamp('2013-06-09 02:42:28') + timedelta(seconds=t) + for t in range(20)] + [NaT] + idx = DatetimeIndex(arr * 3) + tm.assert_index_equal(idx.unique(), DatetimeIndex(arr)) + self.assertEqual(idx.nunique(), 20) + self.assertEqual(idx.nunique(dropna=False), 21) + + def test_index_dupes_contains(self): + d = datetime(2011, 12, 5, 20, 30) + ix = DatetimeIndex([d, d]) + self.assertTrue(d in ix) + + def test_duplicate_dates_indexing(self): + ts = self.dups + + uniques = ts.index.unique() + for date in uniques: + result = ts[date] + + mask = ts.index == date + total = (ts.index == date).sum() + expected = ts[mask] + if total > 1: + assert_series_equal(result, expected) + else: + assert_almost_equal(result, expected[0]) + + cp = ts.copy() + cp[date] = 0 + expected = Series(np.where(mask, 0, ts), index=ts.index) + assert_series_equal(cp, expected) + + self.assertRaises(KeyError, ts.__getitem__, datetime(2000, 1, 6)) + + # new index + ts[datetime(2000, 1, 6)] = 0 + self.assertEqual(ts[datetime(2000, 1, 6)], 0) + + def test_range_slice(self): + idx = DatetimeIndex(['1/1/2000', '1/2/2000', '1/2/2000', '1/3/2000', + '1/4/2000']) + + ts = Series(np.random.randn(len(idx)), index=idx) + + result = ts['1/2/2000':] + expected = ts[1:] + assert_series_equal(result, expected) + + result = ts['1/2/2000':'1/3/2000'] + expected = ts[1:4] + assert_series_equal(result, expected) + + def test_groupby_average_dup_values(self): + result = self.dups.groupby(level=0).mean() + expected = self.dups.groupby(self.dups.index).mean() + assert_series_equal(result, expected) + + def test_indexing_over_size_cutoff(self): + import datetime + # #1821 + + old_cutoff = _index._SIZE_CUTOFF + try: + _index._SIZE_CUTOFF = 1000 + + # create large list of non periodic datetime + dates = [] + sec = datetime.timedelta(seconds=1) + half_sec = datetime.timedelta(microseconds=500000) + d = datetime.datetime(2011, 12, 5, 20, 30) + n = 1100 + for i in range(n): + dates.append(d) + dates.append(d + sec) + dates.append(d + sec + half_sec) + dates.append(d + sec + sec + half_sec) + d += 3 * sec + + # duplicate some values in the list + duplicate_positions = np.random.randint(0, len(dates) - 1, 20) + for p in duplicate_positions: + dates[p + 1] = dates[p] + + df = DataFrame(np.random.randn(len(dates), 4), + index=dates, + columns=list('ABCD')) + + pos = n * 3 + timestamp = df.index[pos] + self.assertIn(timestamp, df.index) + + # it works! + df.loc[timestamp] + self.assertTrue(len(df.loc[[timestamp]]) > 0) + finally: + _index._SIZE_CUTOFF = old_cutoff + + def test_indexing_unordered(self): + # GH 2437 + rng = date_range(start='2011-01-01', end='2011-01-15') + ts = Series(np.random.rand(len(rng)), index=rng) + ts2 = pd.concat([ts[0:4], ts[-4:], ts[4:-4]]) + + for t in ts.index: + # TODO: unused? + s = str(t) # noqa + + expected = ts[t] + result = ts2[t] + self.assertTrue(expected == result) + + # GH 3448 (ranges) + def compare(slobj): + result = ts2[slobj].copy() + result = result.sort_index() + expected = ts[slobj] + assert_series_equal(result, expected) + + compare(slice('2011-01-01', '2011-01-15')) + compare(slice('2010-12-30', '2011-01-15')) + compare(slice('2011-01-01', '2011-01-16')) + + # partial ranges + compare(slice('2011-01-01', '2011-01-6')) + compare(slice('2011-01-06', '2011-01-8')) + compare(slice('2011-01-06', '2011-01-12')) + + # single values + result = ts2['2011'].sort_index() + expected = ts['2011'] + assert_series_equal(result, expected) + + # diff freq + rng = date_range(datetime(2005, 1, 1), periods=20, freq='M') + ts = Series(np.arange(len(rng)), index=rng) + ts = ts.take(np.random.permutation(20)) + + result = ts['2005'] + for t in result.index: + self.assertTrue(t.year == 2005) + + def test_indexing(self): + + idx = date_range("2001-1-1", periods=20, freq='M') + ts = Series(np.random.rand(len(idx)), index=idx) + + # getting + + # GH 3070, make sure semantics work on Series/Frame + expected = ts['2001'] + expected.name = 'A' + + df = DataFrame(dict(A=ts)) + result = df['2001']['A'] + assert_series_equal(expected, result) + + # setting + ts['2001'] = 1 + expected = ts['2001'] + expected.name = 'A' + + df.loc['2001', 'A'] = 1 + + result = df['2001']['A'] + assert_series_equal(expected, result) + + # GH3546 (not including times on the last day) + idx = date_range(start='2013-05-31 00:00', end='2013-05-31 23:00', + freq='H') + ts = Series(lrange(len(idx)), index=idx) + expected = ts['2013-05'] + assert_series_equal(expected, ts) + + idx = date_range(start='2013-05-31 00:00', end='2013-05-31 23:59', + freq='S') + ts = Series(lrange(len(idx)), index=idx) + expected = ts['2013-05'] + assert_series_equal(expected, ts) + + idx = [Timestamp('2013-05-31 00:00'), + Timestamp(datetime(2013, 5, 31, 23, 59, 59, 999999))] + ts = Series(lrange(len(idx)), index=idx) + expected = ts['2013'] + assert_series_equal(expected, ts) + + # GH14826, indexing with a seconds resolution string / datetime object + df = DataFrame(np.random.rand(5, 5), + columns=['open', 'high', 'low', 'close', 'volume'], + index=date_range('2012-01-02 18:01:00', + periods=5, tz='US/Central', freq='s')) + expected = df.loc[[df.index[2]]] + + # this is a single date, so will raise + self.assertRaises(KeyError, df.__getitem__, '2012-01-02 18:01:02', ) + self.assertRaises(KeyError, df.__getitem__, df.index[2], ) + + +class TestDatetimeIndexing(tm.TestCase): + """ + Also test support for datetime64[ns] in Series / DataFrame + """ + + def setUp(self): + dti = DatetimeIndex(start=datetime(2005, 1, 1), + end=datetime(2005, 1, 10), freq='Min') + self.series = Series(np.random.rand(len(dti)), dti) + + def test_fancy_getitem(self): + dti = DatetimeIndex(freq='WOM-1FRI', start=datetime(2005, 1, 1), + end=datetime(2010, 1, 1)) + + s = Series(np.arange(len(dti)), index=dti) + + self.assertEqual(s[48], 48) + self.assertEqual(s['1/2/2009'], 48) + self.assertEqual(s['2009-1-2'], 48) + self.assertEqual(s[datetime(2009, 1, 2)], 48) + self.assertEqual(s[lib.Timestamp(datetime(2009, 1, 2))], 48) + self.assertRaises(KeyError, s.__getitem__, '2009-1-3') + + assert_series_equal(s['3/6/2009':'2009-06-05'], + s[datetime(2009, 3, 6):datetime(2009, 6, 5)]) + + def test_fancy_setitem(self): + dti = DatetimeIndex(freq='WOM-1FRI', start=datetime(2005, 1, 1), + end=datetime(2010, 1, 1)) + + s = Series(np.arange(len(dti)), index=dti) + s[48] = -1 + self.assertEqual(s[48], -1) + s['1/2/2009'] = -2 + self.assertEqual(s[48], -2) + s['1/2/2009':'2009-06-05'] = -3 + self.assertTrue((s[48:54] == -3).all()) + + def test_dti_snap(self): + dti = DatetimeIndex(['1/1/2002', '1/2/2002', '1/3/2002', '1/4/2002', + '1/5/2002', '1/6/2002', '1/7/2002'], freq='D') + + res = dti.snap(freq='W-MON') + exp = date_range('12/31/2001', '1/7/2002', freq='w-mon') + exp = exp.repeat([3, 4]) + self.assertTrue((res == exp).all()) + + res = dti.snap(freq='B') + + exp = date_range('1/1/2002', '1/7/2002', freq='b') + exp = exp.repeat([1, 1, 1, 2, 2]) + self.assertTrue((res == exp).all()) + + def test_dti_reset_index_round_trip(self): + dti = DatetimeIndex(start='1/1/2001', end='6/1/2001', freq='D') + d1 = DataFrame({'v': np.random.rand(len(dti))}, index=dti) + d2 = d1.reset_index() + self.assertEqual(d2.dtypes[0], np.dtype('M8[ns]')) + d3 = d2.set_index('index') + assert_frame_equal(d1, d3, check_names=False) + + # #2329 + stamp = datetime(2012, 11, 22) + df = DataFrame([[stamp, 12.1]], columns=['Date', 'Value']) + df = df.set_index('Date') + + self.assertEqual(df.index[0], stamp) + self.assertEqual(df.reset_index()['Date'][0], stamp) + + def test_series_set_value(self): + # #1561 + + dates = [datetime(2001, 1, 1), datetime(2001, 1, 2)] + index = DatetimeIndex(dates) + + s = Series().set_value(dates[0], 1.) + s2 = s.set_value(dates[1], np.nan) + + exp = Series([1., np.nan], index=index) + + assert_series_equal(s2, exp) + + # s = Series(index[:1], index[:1]) + # s2 = s.set_value(dates[1], index[1]) + # self.assertEqual(s2.values.dtype, 'M8[ns]') + + @slow + def test_slice_locs_indexerror(self): + times = [datetime(2000, 1, 1) + timedelta(minutes=i * 10) + for i in range(100000)] + s = Series(lrange(100000), times) + s.loc[datetime(1900, 1, 1):datetime(2100, 1, 1)] + + def test_slicing_datetimes(self): + + # GH 7523 + + # unique + df = DataFrame(np.arange(4., dtype='float64'), + index=[datetime(2001, 1, i, 10, 00) + for i in [1, 2, 3, 4]]) + result = df.loc[datetime(2001, 1, 1, 10):] + assert_frame_equal(result, df) + result = df.loc[:datetime(2001, 1, 4, 10)] + assert_frame_equal(result, df) + result = df.loc[datetime(2001, 1, 1, 10):datetime(2001, 1, 4, 10)] + assert_frame_equal(result, df) + + result = df.loc[datetime(2001, 1, 1, 11):] + expected = df.iloc[1:] + assert_frame_equal(result, expected) + result = df.loc['20010101 11':] + assert_frame_equal(result, expected) + + # duplicates + df = pd.DataFrame(np.arange(5., dtype='float64'), + index=[datetime(2001, 1, i, 10, 00) + for i in [1, 2, 2, 3, 4]]) + + result = df.loc[datetime(2001, 1, 1, 10):] + assert_frame_equal(result, df) + result = df.loc[:datetime(2001, 1, 4, 10)] + assert_frame_equal(result, df) + result = df.loc[datetime(2001, 1, 1, 10):datetime(2001, 1, 4, 10)] + assert_frame_equal(result, df) + + result = df.loc[datetime(2001, 1, 1, 11):] + expected = df.iloc[1:] + assert_frame_equal(result, expected) + result = df.loc['20010101 11':] + assert_frame_equal(result, expected) + + def test_frame_datetime64_duplicated(self): + dates = date_range('2010-07-01', end='2010-08-05') + + tst = DataFrame({'symbol': 'AAA', 'date': dates}) + result = tst.duplicated(['date', 'symbol']) + self.assertTrue((-result).all()) + + tst = DataFrame({'date': dates}) + result = tst.duplicated() + self.assertTrue((-result).all()) + + +class TestNatIndexing(tm.TestCase): + def setUp(self): + self.series = Series(date_range('1/1/2000', periods=10)) + + # --------------------------------------------------------------------- + # NaT support + + def test_set_none_nan(self): + self.series[3] = None + self.assertIs(self.series[3], NaT) + + self.series[3:5] = None + self.assertIs(self.series[4], NaT) + + self.series[5] = np.nan + self.assertIs(self.series[5], NaT) + + self.series[5:7] = np.nan + self.assertIs(self.series[6], NaT) + + def test_nat_operations(self): + # GH 8617 + s = Series([0, pd.NaT], dtype='m8[ns]') + exp = s[0] + self.assertEqual(s.median(), exp) + self.assertEqual(s.min(), exp) + self.assertEqual(s.max(), exp) + + def test_round_nat(self): + # GH14940 + s = Series([pd.NaT]) + expected = Series(pd.NaT) + for method in ["round", "floor", "ceil"]: + round_method = getattr(s.dt, method) + for freq in ["s", "5s", "min", "5min", "h", "5h"]: + assert_series_equal(round_method(freq), expected) + + if __name__ == '__main__': import nose nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], diff --git a/pandas/tests/series/test_operators.py b/pandas/tests/series/test_operators.py index b013e1a6f1c10..7b1201b971c71 100644 --- a/pandas/tests/series/test_operators.py +++ b/pandas/tests/series/test_operators.py @@ -30,6 +30,19 @@ class TestSeriesOperators(TestData, tm.TestCase): _multiprocess_can_split_ = True + def test_series_comparison_scalars(self): + series = Series(date_range('1/1/2000', periods=10)) + + val = datetime(2000, 1, 4) + result = series > val + expected = Series([x > val for x in series]) + self.assert_series_equal(result, expected) + + val = series[5] + result = series > val + expected = Series([x > val for x in series]) + self.assert_series_equal(result, expected) + def test_comparisons(self): left = np.random.randn(10) right = np.random.randn(10) diff --git a/pandas/tests/series/test_timeseries.py b/pandas/tests/series/test_timeseries.py index 571a802e37211..9754a9d3737e3 100644 --- a/pandas/tests/series/test_timeseries.py +++ b/pandas/tests/series/test_timeseries.py @@ -1,35 +1,24 @@ # coding=utf-8 # pylint: disable-msg=E1101,W0612 -import sys -import nose -import locale -import calendar import numpy as np -from numpy.random import rand from datetime import datetime, timedelta, time import pandas as pd -import pandas.index as _index -import pandas.tseries.tools as tools -import pandas.core.common as com import pandas.util.testing as tm from pandas.tslib import iNaT -from pandas.compat import lrange, lmap, StringIO, product +from pandas.compat import lrange, StringIO, product from pandas.tseries.tdi import TimedeltaIndex from pandas.tseries.index import DatetimeIndex from pandas.tseries.offsets import BDay, BMonthEnd -from pandas.types.common import is_datetime64_ns_dtype from pandas import (Index, Series, date_range, NaT, concat, DataFrame, - Timestamp, lib, isnull, to_datetime, offsets, Timedelta, - tslib, bdate_range, Period, timedelta_range, compat) + Timestamp, to_datetime, offsets, + timedelta_range) from pandas.util.testing import (assert_series_equal, assert_almost_equal, - slow, assert_frame_equal, _skip_if_has_locale) + assert_frame_equal, _skip_if_has_locale) from pandas.tests.series.common import TestData -randn = np.random.randn - def _simple_ts(start, end, freq='D'): rng = date_range(start, end, freq=freq) @@ -118,6 +107,22 @@ def test_shift(self): tz='CET'), name='foo') self.assertRaises(ValueError, lambda: s - s2) + def test_shift2(self): + ts = Series(np.random.randn(5), + index=date_range('1/1/2000', periods=5, freq='H')) + + result = ts.shift(1, freq='5T') + exp_index = ts.index.shift(1, freq='5T') + tm.assert_index_equal(result.index, exp_index) + + # GH #1063, multiple of same base + result = ts.shift(1, freq='4H') + exp_index = ts.index + offsets.Hour(4) + tm.assert_index_equal(result.index, exp_index) + + idx = DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-04']) + self.assertRaises(ValueError, idx.shift, 1) + def test_shift_dst(self): # GH 13926 dates = date_range('2016-11-06', freq='H', periods=10, tz='US/Eastern') @@ -477,28 +482,6 @@ def test_series_ctor_datetime64(self): series = Series(dates) self.assertTrue(np.issubdtype(series.dtype, np.dtype('M8[ns]'))) - def test_reindex_series_add_nat(self): - rng = date_range('1/1/2000 00:00:00', periods=10, freq='10s') - series = Series(rng) - - result = series.reindex(lrange(15)) - self.assertTrue(np.issubdtype(result.dtype, np.dtype('M8[ns]'))) - - mask = result.isnull() - self.assertTrue(mask[-5:].all()) - self.assertFalse(mask[:-5].any()) - - def test_reindex_frame_add_nat(self): - rng = date_range('1/1/2000 00:00:00', periods=10, freq='10s') - df = DataFrame({'A': np.random.randn(len(rng)), 'B': rng}) - - result = df.reindex(lrange(15)) - self.assertTrue(np.issubdtype(result['B'].dtype, np.dtype('M8[ns]'))) - - mask = com.isnull(result)['B'] - self.assertTrue(mask[-5:].all()) - self.assertFalse(mask[:-5].any()) - def test_series_repr_nat(self): series = Series([0, 1000, 2000, iNaT], dtype='M8[ns]') @@ -510,36 +493,6 @@ def test_series_repr_nat(self): 'dtype: datetime64[ns]') self.assertEqual(result, expected) - def test_index_convert_to_datetime_array(self): - tm._skip_if_no_pytz() - - def _check_rng(rng): - converted = rng.to_pydatetime() - tm.assertIsInstance(converted, np.ndarray) - for x, stamp in zip(converted, rng): - tm.assertIsInstance(x, datetime) - self.assertEqual(x, stamp.to_pydatetime()) - self.assertEqual(x.tzinfo, stamp.tzinfo) - - rng = date_range('20090415', '20090519') - rng_eastern = date_range('20090415', '20090519', tz='US/Eastern') - rng_utc = date_range('20090415', '20090519', tz='utc') - - _check_rng(rng) - _check_rng(rng_eastern) - _check_rng(rng_utc) - - def test_reindex_with_datetimes(self): - rng = date_range('1/1/2000', periods=20) - ts = Series(np.random.randn(20), index=rng) - - result = ts.reindex(list(ts.index[5:10])) - expected = ts[5:10] - tm.assert_series_equal(result, expected) - - result = ts[list(ts.index[5:10])] - tm.assert_series_equal(result, expected) - def test_asfreq_keep_index_name(self): # GH #9854 index_name = 'bar' @@ -680,35 +633,13 @@ def test_at_time(self): rs = ts.at_time('16:00') self.assertEqual(len(rs), 0) - def test_at_time_frame(self): - rng = date_range('1/1/2000', '1/5/2000', freq='5min') - ts = DataFrame(np.random.randn(len(rng), 2), index=rng) - rs = ts.at_time(rng[1]) - self.assertTrue((rs.index.hour == rng[1].hour).all()) - self.assertTrue((rs.index.minute == rng[1].minute).all()) - self.assertTrue((rs.index.second == rng[1].second).all()) - - result = ts.at_time('9:30') - expected = ts.at_time(time(9, 30)) - assert_frame_equal(result, expected) - - result = ts.loc[time(9, 30)] - expected = ts.loc[(rng.hour == 9) & (rng.minute == 30)] - - assert_frame_equal(result, expected) - - # midnight, everything - rng = date_range('1/1/2000', '1/31/2000') - ts = DataFrame(np.random.randn(len(rng), 3), index=rng) - - result = ts.at_time(time(0, 0)) - assert_frame_equal(result, ts) + def test_between(self): + series = Series(date_range('1/1/2000', periods=10)) + left, right = series[[2, 7]] - # time doesn't exist - rng = date_range('1/1/2012', freq='23Min', periods=384) - ts = DataFrame(np.random.randn(len(rng), 2), rng) - rs = ts.at_time('16:00') - self.assertEqual(len(rs), 0) + result = series.between(left, right) + expected = (series >= left) & (series <= right) + assert_series_equal(result, expected) def test_between_time(self): rng = date_range('1/1/2000', '1/5/2000', freq='5min') @@ -770,66 +701,6 @@ def test_between_time(self): else: self.assertTrue((t < etime) or (t >= stime)) - def test_between_time_frame(self): - rng = date_range('1/1/2000', '1/5/2000', freq='5min') - ts = DataFrame(np.random.randn(len(rng), 2), index=rng) - stime = time(0, 0) - etime = time(1, 0) - - close_open = product([True, False], [True, False]) - for inc_start, inc_end in close_open: - filtered = ts.between_time(stime, etime, inc_start, inc_end) - exp_len = 13 * 4 + 1 - if not inc_start: - exp_len -= 5 - if not inc_end: - exp_len -= 4 - - self.assertEqual(len(filtered), exp_len) - for rs in filtered.index: - t = rs.time() - if inc_start: - self.assertTrue(t >= stime) - else: - self.assertTrue(t > stime) - - if inc_end: - self.assertTrue(t <= etime) - else: - self.assertTrue(t < etime) - - result = ts.between_time('00:00', '01:00') - expected = ts.between_time(stime, etime) - assert_frame_equal(result, expected) - - # across midnight - rng = date_range('1/1/2000', '1/5/2000', freq='5min') - ts = DataFrame(np.random.randn(len(rng), 2), index=rng) - stime = time(22, 0) - etime = time(9, 0) - - close_open = product([True, False], [True, False]) - for inc_start, inc_end in close_open: - filtered = ts.between_time(stime, etime, inc_start, inc_end) - exp_len = (12 * 11 + 1) * 4 + 1 - if not inc_start: - exp_len -= 4 - if not inc_end: - exp_len -= 4 - - self.assertEqual(len(filtered), exp_len) - for rs in filtered.index: - t = rs.time() - if inc_start: - self.assertTrue((t >= stime) or (t <= etime)) - else: - self.assertTrue((t > stime) or (t <= etime)) - - if inc_end: - self.assertTrue((t <= etime) or (t >= stime)) - else: - self.assertTrue((t < etime) or (t >= stime)) - def test_between_time_types(self): # GH11818 rng = date_range('1/1/2000', '1/5/2000', freq='5min') @@ -897,275 +768,6 @@ def test_to_period(self): expected.columns = exp_idx assert_frame_equal(df.to_period(axis=1), expected) - def create_dt64_based_index(self): - data = [Timestamp('2007-01-01 10:11:12.123456Z'), - Timestamp('2007-01-01 10:11:13.789123Z')] - index = DatetimeIndex(data) - return index - - def test_to_period_millisecond(self): - index = self.create_dt64_based_index() - - period = index.to_period(freq='L') - self.assertEqual(2, len(period)) - self.assertEqual(period[0], Period('2007-01-01 10:11:12.123Z', 'L')) - self.assertEqual(period[1], Period('2007-01-01 10:11:13.789Z', 'L')) - - def test_to_period_microsecond(self): - index = self.create_dt64_based_index() - - period = index.to_period(freq='U') - self.assertEqual(2, len(period)) - self.assertEqual(period[0], Period('2007-01-01 10:11:12.123456Z', 'U')) - self.assertEqual(period[1], Period('2007-01-01 10:11:13.789123Z', 'U')) - - def test_to_period_tz_pytz(self): - tm._skip_if_no_pytz() - from dateutil.tz import tzlocal - from pytz import utc as UTC - - xp = date_range('1/1/2000', '4/1/2000').to_period() - - ts = date_range('1/1/2000', '4/1/2000', tz='US/Eastern') - - result = ts.to_period()[0] - expected = ts[0].to_period() - - self.assertEqual(result, expected) - tm.assert_index_equal(ts.to_period(), xp) - - ts = date_range('1/1/2000', '4/1/2000', tz=UTC) - - result = ts.to_period()[0] - expected = ts[0].to_period() - - self.assertEqual(result, expected) - tm.assert_index_equal(ts.to_period(), xp) - - ts = date_range('1/1/2000', '4/1/2000', tz=tzlocal()) - - result = ts.to_period()[0] - expected = ts[0].to_period() - - self.assertEqual(result, expected) - tm.assert_index_equal(ts.to_period(), xp) - - def test_to_period_tz_explicit_pytz(self): - tm._skip_if_no_pytz() - import pytz - from dateutil.tz import tzlocal - - xp = date_range('1/1/2000', '4/1/2000').to_period() - - ts = date_range('1/1/2000', '4/1/2000', tz=pytz.timezone('US/Eastern')) - - result = ts.to_period()[0] - expected = ts[0].to_period() - - self.assertTrue(result == expected) - tm.assert_index_equal(ts.to_period(), xp) - - ts = date_range('1/1/2000', '4/1/2000', tz=pytz.utc) - - result = ts.to_period()[0] - expected = ts[0].to_period() - - self.assertTrue(result == expected) - tm.assert_index_equal(ts.to_period(), xp) - - ts = date_range('1/1/2000', '4/1/2000', tz=tzlocal()) - - result = ts.to_period()[0] - expected = ts[0].to_period() - - self.assertTrue(result == expected) - tm.assert_index_equal(ts.to_period(), xp) - - def test_to_period_tz_dateutil(self): - tm._skip_if_no_dateutil() - import dateutil - from dateutil.tz import tzlocal - - xp = date_range('1/1/2000', '4/1/2000').to_period() - - ts = date_range('1/1/2000', '4/1/2000', tz='dateutil/US/Eastern') - - result = ts.to_period()[0] - expected = ts[0].to_period() - - self.assertTrue(result == expected) - tm.assert_index_equal(ts.to_period(), xp) - - ts = date_range('1/1/2000', '4/1/2000', tz=dateutil.tz.tzutc()) - - result = ts.to_period()[0] - expected = ts[0].to_period() - - self.assertTrue(result == expected) - tm.assert_index_equal(ts.to_period(), xp) - - ts = date_range('1/1/2000', '4/1/2000', tz=tzlocal()) - - result = ts.to_period()[0] - expected = ts[0].to_period() - - self.assertTrue(result == expected) - tm.assert_index_equal(ts.to_period(), xp) - - def test_frame_to_period(self): - K = 5 - from pandas.tseries.period import period_range - - dr = date_range('1/1/2000', '1/1/2001') - pr = period_range('1/1/2000', '1/1/2001') - df = DataFrame(randn(len(dr), K), index=dr) - df['mix'] = 'a' - - pts = df.to_period() - exp = df.copy() - exp.index = pr - assert_frame_equal(pts, exp) - - pts = df.to_period('M') - tm.assert_index_equal(pts.index, exp.index.asfreq('M')) - - df = df.T - pts = df.to_period(axis=1) - exp = df.copy() - exp.columns = pr - assert_frame_equal(pts, exp) - - pts = df.to_period('M', axis=1) - tm.assert_index_equal(pts.columns, exp.columns.asfreq('M')) - - self.assertRaises(ValueError, df.to_period, axis=2) - - def test_compat_replace(self): - # https://github.com/statsmodels/statsmodels/issues/3349 - # replace should take ints/longs for compat - - for f in [compat.long, int]: - result = date_range(Timestamp('1960-04-01 00:00:00', - freq='QS-JAN'), - periods=f(76), - freq='QS-JAN') - self.assertEqual(len(result), 76) - - def test_astype_object(self): - # NumPy 1.6.1 weak ns support - rng = date_range('1/1/2000', periods=20) - - casted = rng.astype('O') - exp_values = list(rng) - - tm.assert_index_equal(casted, Index(exp_values, dtype=np.object_)) - self.assertEqual(casted.tolist(), exp_values) - - def test_catch_infinite_loop(self): - offset = offsets.DateOffset(minute=5) - # blow up, don't loop forever - self.assertRaises(Exception, date_range, datetime(2011, 11, 11), - datetime(2011, 11, 12), freq=offset) - - def test_append_concat(self): - rng = date_range('5/8/2012 1:45', periods=10, freq='5T') - ts = Series(np.random.randn(len(rng)), rng) - df = DataFrame(np.random.randn(len(rng), 4), index=rng) - - result = ts.append(ts) - result_df = df.append(df) - ex_index = DatetimeIndex(np.tile(rng.values, 2)) - tm.assert_index_equal(result.index, ex_index) - tm.assert_index_equal(result_df.index, ex_index) - - appended = rng.append(rng) - tm.assert_index_equal(appended, ex_index) - - appended = rng.append([rng, rng]) - ex_index = DatetimeIndex(np.tile(rng.values, 3)) - tm.assert_index_equal(appended, ex_index) - - # different index names - rng1 = rng.copy() - rng2 = rng.copy() - rng1.name = 'foo' - rng2.name = 'bar' - self.assertEqual(rng1.append(rng1).name, 'foo') - self.assertIsNone(rng1.append(rng2).name) - - def test_append_concat_tz(self): - # GH 2938 - tm._skip_if_no_pytz() - - rng = date_range('5/8/2012 1:45', periods=10, freq='5T', - tz='US/Eastern') - rng2 = date_range('5/8/2012 2:35', periods=10, freq='5T', - tz='US/Eastern') - rng3 = date_range('5/8/2012 1:45', periods=20, freq='5T', - tz='US/Eastern') - ts = Series(np.random.randn(len(rng)), rng) - df = DataFrame(np.random.randn(len(rng), 4), index=rng) - ts2 = Series(np.random.randn(len(rng2)), rng2) - df2 = DataFrame(np.random.randn(len(rng2), 4), index=rng2) - - result = ts.append(ts2) - result_df = df.append(df2) - tm.assert_index_equal(result.index, rng3) - tm.assert_index_equal(result_df.index, rng3) - - appended = rng.append(rng2) - tm.assert_index_equal(appended, rng3) - - def test_append_concat_tz_explicit_pytz(self): - # GH 2938 - tm._skip_if_no_pytz() - from pytz import timezone as timezone - - rng = date_range('5/8/2012 1:45', periods=10, freq='5T', - tz=timezone('US/Eastern')) - rng2 = date_range('5/8/2012 2:35', periods=10, freq='5T', - tz=timezone('US/Eastern')) - rng3 = date_range('5/8/2012 1:45', periods=20, freq='5T', - tz=timezone('US/Eastern')) - ts = Series(np.random.randn(len(rng)), rng) - df = DataFrame(np.random.randn(len(rng), 4), index=rng) - ts2 = Series(np.random.randn(len(rng2)), rng2) - df2 = DataFrame(np.random.randn(len(rng2), 4), index=rng2) - - result = ts.append(ts2) - result_df = df.append(df2) - tm.assert_index_equal(result.index, rng3) - tm.assert_index_equal(result_df.index, rng3) - - appended = rng.append(rng2) - tm.assert_index_equal(appended, rng3) - - def test_append_concat_tz_dateutil(self): - # GH 2938 - tm._skip_if_no_dateutil() - rng = date_range('5/8/2012 1:45', periods=10, freq='5T', - tz='dateutil/US/Eastern') - rng2 = date_range('5/8/2012 2:35', periods=10, freq='5T', - tz='dateutil/US/Eastern') - rng3 = date_range('5/8/2012 1:45', periods=20, freq='5T', - tz='dateutil/US/Eastern') - ts = Series(np.random.randn(len(rng)), rng) - df = DataFrame(np.random.randn(len(rng), 4), index=rng) - ts2 = Series(np.random.randn(len(rng2)), rng2) - df2 = DataFrame(np.random.randn(len(rng2), 4), index=rng2) - - result = ts.append(ts2) - result_df = df.append(df2) - tm.assert_index_equal(result.index, rng3) - tm.assert_index_equal(result_df.index, rng3) - - appended = rng.append(rng2) - tm.assert_index_equal(appended, rng3) - - def test_set_dataframe_column_ns_dtype(self): - x = DataFrame([datetime.now(), datetime.now()]) - self.assertEqual(x[0].dtype, np.dtype('M8[ns]')) - def test_groupby_count_dateparseerror(self): dr = date_range(start='1/1/2012', freq='5min', periods=10) @@ -1180,40 +782,6 @@ def test_groupby_count_dateparseerror(self): assert_series_equal(result, expected) - def test_frame_datetime64_handling_groupby(self): - # it works! - df = DataFrame([(3, np.datetime64('2012-07-03')), - (3, np.datetime64('2012-07-04'))], - columns=['a', 'date']) - result = df.groupby('a').first() - self.assertEqual(result['date'][3], Timestamp('2012-07-03')) - - def test_frame_dict_constructor_datetime64_1680(self): - dr = date_range('1/1/2012', periods=10) - s = Series(dr, index=dr) - - # it works! - DataFrame({'a': 'foo', 'b': s}, index=dr) - DataFrame({'a': 'foo', 'b': s.values}, index=dr) - - def test_frame_datetime64_mixed_index_ctor_1681(self): - dr = date_range('2011/1/1', '2012/1/1', freq='W-FRI') - ts = Series(dr) - - # it works! - d = DataFrame({'A': 'foo', 'B': ts}, index=dr) - self.assertTrue(d['B'].isnull().all()) - - def test_frame_timeseries_to_records(self): - index = date_range('1/1/2000', periods=10) - df = DataFrame(np.random.randn(10, 3), index=index, - columns=['a', 'b', 'c']) - - result = df.to_records() - result['index'].dtype == 'M8[ns]' - - result = df.to_records(index=False) - def test_to_csv_numpy_16_bug(self): frame = DataFrame({'a': date_range('1/1/2000', periods=10)}) @@ -1234,20 +802,6 @@ def f(x): s.apply(f) DataFrame(s).applymap(f) - def test_concat_datetime_datetime64_frame(self): - # #2624 - rows = [] - rows.append([datetime(2010, 1, 1), 1]) - rows.append([datetime(2010, 1, 2), 'hi']) - - df2_obj = DataFrame.from_records(rows, columns=['date', 'test']) - - ind = date_range(start="2000/1/1", freq="D", periods=10) - df1 = DataFrame({'date': ind, 'test': lrange(10)}) - - # it works! - pd.concat([df1, df2_obj]) - def test_asfreq_resample_set_correct_freq(self): # GH5613 # we test if .asfreq() and .resample() set the correct value for .freq @@ -1283,1085 +837,6 @@ def test_pickle(self): idx_p = self.round_trip_pickle(idx) tm.assert_index_equal(idx, idx_p) - -class TestTimeSeriesDuplicates(tm.TestCase): - _multiprocess_can_split_ = True - - def setUp(self): - dates = [datetime(2000, 1, 2), datetime(2000, 1, 2), - datetime(2000, 1, 2), datetime(2000, 1, 3), - datetime(2000, 1, 3), datetime(2000, 1, 3), - datetime(2000, 1, 4), datetime(2000, 1, 4), - datetime(2000, 1, 4), datetime(2000, 1, 5)] - - self.dups = Series(np.random.randn(len(dates)), index=dates) - - def test_constructor(self): - tm.assertIsInstance(self.dups, Series) - tm.assertIsInstance(self.dups.index, DatetimeIndex) - - def test_is_unique_monotonic(self): - self.assertFalse(self.dups.index.is_unique) - - def test_index_unique(self): - uniques = self.dups.index.unique() - expected = DatetimeIndex([datetime(2000, 1, 2), datetime(2000, 1, 3), - datetime(2000, 1, 4), datetime(2000, 1, 5)]) - self.assertEqual(uniques.dtype, 'M8[ns]') # sanity - tm.assert_index_equal(uniques, expected) - self.assertEqual(self.dups.index.nunique(), 4) - - # #2563 - self.assertTrue(isinstance(uniques, DatetimeIndex)) - - dups_local = self.dups.index.tz_localize('US/Eastern') - dups_local.name = 'foo' - result = dups_local.unique() - expected = DatetimeIndex(expected, name='foo') - expected = expected.tz_localize('US/Eastern') - self.assertTrue(result.tz is not None) - self.assertEqual(result.name, 'foo') - tm.assert_index_equal(result, expected) - - # NaT, note this is excluded - arr = [1370745748 + t for t in range(20)] + [iNaT] - idx = DatetimeIndex(arr * 3) - tm.assert_index_equal(idx.unique(), DatetimeIndex(arr)) - self.assertEqual(idx.nunique(), 20) - self.assertEqual(idx.nunique(dropna=False), 21) - - arr = [Timestamp('2013-06-09 02:42:28') + timedelta(seconds=t) - for t in range(20)] + [NaT] - idx = DatetimeIndex(arr * 3) - tm.assert_index_equal(idx.unique(), DatetimeIndex(arr)) - self.assertEqual(idx.nunique(), 20) - self.assertEqual(idx.nunique(dropna=False), 21) - - def test_index_dupes_contains(self): - d = datetime(2011, 12, 5, 20, 30) - ix = DatetimeIndex([d, d]) - self.assertTrue(d in ix) - - def test_duplicate_dates_indexing(self): - ts = self.dups - - uniques = ts.index.unique() - for date in uniques: - result = ts[date] - - mask = ts.index == date - total = (ts.index == date).sum() - expected = ts[mask] - if total > 1: - assert_series_equal(result, expected) - else: - assert_almost_equal(result, expected[0]) - - cp = ts.copy() - cp[date] = 0 - expected = Series(np.where(mask, 0, ts), index=ts.index) - assert_series_equal(cp, expected) - - self.assertRaises(KeyError, ts.__getitem__, datetime(2000, 1, 6)) - - # new index - ts[datetime(2000, 1, 6)] = 0 - self.assertEqual(ts[datetime(2000, 1, 6)], 0) - - def test_range_slice(self): - idx = DatetimeIndex(['1/1/2000', '1/2/2000', '1/2/2000', '1/3/2000', - '1/4/2000']) - - ts = Series(np.random.randn(len(idx)), index=idx) - - result = ts['1/2/2000':] - expected = ts[1:] - assert_series_equal(result, expected) - - result = ts['1/2/2000':'1/3/2000'] - expected = ts[1:4] - assert_series_equal(result, expected) - - def test_groupby_average_dup_values(self): - result = self.dups.groupby(level=0).mean() - expected = self.dups.groupby(self.dups.index).mean() - assert_series_equal(result, expected) - - def test_indexing_over_size_cutoff(self): - import datetime - # #1821 - - old_cutoff = _index._SIZE_CUTOFF - try: - _index._SIZE_CUTOFF = 1000 - - # create large list of non periodic datetime - dates = [] - sec = datetime.timedelta(seconds=1) - half_sec = datetime.timedelta(microseconds=500000) - d = datetime.datetime(2011, 12, 5, 20, 30) - n = 1100 - for i in range(n): - dates.append(d) - dates.append(d + sec) - dates.append(d + sec + half_sec) - dates.append(d + sec + sec + half_sec) - d += 3 * sec - - # duplicate some values in the list - duplicate_positions = np.random.randint(0, len(dates) - 1, 20) - for p in duplicate_positions: - dates[p + 1] = dates[p] - - df = DataFrame(np.random.randn(len(dates), 4), - index=dates, - columns=list('ABCD')) - - pos = n * 3 - timestamp = df.index[pos] - self.assertIn(timestamp, df.index) - - # it works! - df.loc[timestamp] - self.assertTrue(len(df.loc[[timestamp]]) > 0) - finally: - _index._SIZE_CUTOFF = old_cutoff - - def test_indexing_unordered(self): - # GH 2437 - rng = date_range(start='2011-01-01', end='2011-01-15') - ts = Series(randn(len(rng)), index=rng) - ts2 = concat([ts[0:4], ts[-4:], ts[4:-4]]) - - for t in ts.index: - # TODO: unused? - s = str(t) # noqa - - expected = ts[t] - result = ts2[t] - self.assertTrue(expected == result) - - # GH 3448 (ranges) - def compare(slobj): - result = ts2[slobj].copy() - result = result.sort_index() - expected = ts[slobj] - assert_series_equal(result, expected) - - compare(slice('2011-01-01', '2011-01-15')) - compare(slice('2010-12-30', '2011-01-15')) - compare(slice('2011-01-01', '2011-01-16')) - - # partial ranges - compare(slice('2011-01-01', '2011-01-6')) - compare(slice('2011-01-06', '2011-01-8')) - compare(slice('2011-01-06', '2011-01-12')) - - # single values - result = ts2['2011'].sort_index() - expected = ts['2011'] - assert_series_equal(result, expected) - - # diff freq - rng = date_range(datetime(2005, 1, 1), periods=20, freq='M') - ts = Series(np.arange(len(rng)), index=rng) - ts = ts.take(np.random.permutation(20)) - - result = ts['2005'] - for t in result.index: - self.assertTrue(t.year == 2005) - - def test_indexing(self): - - idx = date_range("2001-1-1", periods=20, freq='M') - ts = Series(np.random.rand(len(idx)), index=idx) - - # getting - - # GH 3070, make sure semantics work on Series/Frame - expected = ts['2001'] - expected.name = 'A' - - df = DataFrame(dict(A=ts)) - result = df['2001']['A'] - assert_series_equal(expected, result) - - # setting - ts['2001'] = 1 - expected = ts['2001'] - expected.name = 'A' - - df.loc['2001', 'A'] = 1 - - result = df['2001']['A'] - assert_series_equal(expected, result) - - # GH3546 (not including times on the last day) - idx = date_range(start='2013-05-31 00:00', end='2013-05-31 23:00', - freq='H') - ts = Series(lrange(len(idx)), index=idx) - expected = ts['2013-05'] - assert_series_equal(expected, ts) - - idx = date_range(start='2013-05-31 00:00', end='2013-05-31 23:59', - freq='S') - ts = Series(lrange(len(idx)), index=idx) - expected = ts['2013-05'] - assert_series_equal(expected, ts) - - idx = [Timestamp('2013-05-31 00:00'), - Timestamp(datetime(2013, 5, 31, 23, 59, 59, 999999))] - ts = Series(lrange(len(idx)), index=idx) - expected = ts['2013'] - assert_series_equal(expected, ts) - - # GH14826, indexing with a seconds resolution string / datetime object - df = DataFrame(randn(5, 5), - columns=['open', 'high', 'low', 'close', 'volume'], - index=date_range('2012-01-02 18:01:00', - periods=5, tz='US/Central', freq='s')) - expected = df.loc[[df.index[2]]] - - # this is a single date, so will raise - self.assertRaises(KeyError, df.__getitem__, '2012-01-02 18:01:02', ) - self.assertRaises(KeyError, df.__getitem__, df.index[2], ) - - -class TestDatetime64(tm.TestCase): - """ - Also test support for datetime64[ns] in Series / DataFrame - """ - - def setUp(self): - dti = DatetimeIndex(start=datetime(2005, 1, 1), - end=datetime(2005, 1, 10), freq='Min') - self.series = Series(rand(len(dti)), dti) - - def test_fancy_getitem(self): - dti = DatetimeIndex(freq='WOM-1FRI', start=datetime(2005, 1, 1), - end=datetime(2010, 1, 1)) - - s = Series(np.arange(len(dti)), index=dti) - - self.assertEqual(s[48], 48) - self.assertEqual(s['1/2/2009'], 48) - self.assertEqual(s['2009-1-2'], 48) - self.assertEqual(s[datetime(2009, 1, 2)], 48) - self.assertEqual(s[lib.Timestamp(datetime(2009, 1, 2))], 48) - self.assertRaises(KeyError, s.__getitem__, '2009-1-3') - - assert_series_equal(s['3/6/2009':'2009-06-05'], - s[datetime(2009, 3, 6):datetime(2009, 6, 5)]) - - def test_fancy_setitem(self): - dti = DatetimeIndex(freq='WOM-1FRI', start=datetime(2005, 1, 1), - end=datetime(2010, 1, 1)) - - s = Series(np.arange(len(dti)), index=dti) - s[48] = -1 - self.assertEqual(s[48], -1) - s['1/2/2009'] = -2 - self.assertEqual(s[48], -2) - s['1/2/2009':'2009-06-05'] = -3 - self.assertTrue((s[48:54] == -3).all()) - - def test_dti_snap(self): - dti = DatetimeIndex(['1/1/2002', '1/2/2002', '1/3/2002', '1/4/2002', - '1/5/2002', '1/6/2002', '1/7/2002'], freq='D') - - res = dti.snap(freq='W-MON') - exp = date_range('12/31/2001', '1/7/2002', freq='w-mon') - exp = exp.repeat([3, 4]) - self.assertTrue((res == exp).all()) - - res = dti.snap(freq='B') - - exp = date_range('1/1/2002', '1/7/2002', freq='b') - exp = exp.repeat([1, 1, 1, 2, 2]) - self.assertTrue((res == exp).all()) - - def test_dti_reset_index_round_trip(self): - dti = DatetimeIndex(start='1/1/2001', end='6/1/2001', freq='D') - d1 = DataFrame({'v': np.random.rand(len(dti))}, index=dti) - d2 = d1.reset_index() - self.assertEqual(d2.dtypes[0], np.dtype('M8[ns]')) - d3 = d2.set_index('index') - assert_frame_equal(d1, d3, check_names=False) - - # #2329 - stamp = datetime(2012, 11, 22) - df = DataFrame([[stamp, 12.1]], columns=['Date', 'Value']) - df = df.set_index('Date') - - self.assertEqual(df.index[0], stamp) - self.assertEqual(df.reset_index()['Date'][0], stamp) - - def test_series_set_value(self): - # #1561 - - dates = [datetime(2001, 1, 1), datetime(2001, 1, 2)] - index = DatetimeIndex(dates) - - s = Series().set_value(dates[0], 1.) - s2 = s.set_value(dates[1], np.nan) - - exp = Series([1., np.nan], index=index) - - assert_series_equal(s2, exp) - - # s = Series(index[:1], index[:1]) - # s2 = s.set_value(dates[1], index[1]) - # self.assertEqual(s2.values.dtype, 'M8[ns]') - - @slow - def test_slice_locs_indexerror(self): - times = [datetime(2000, 1, 1) + timedelta(minutes=i * 10) - for i in range(100000)] - s = Series(lrange(100000), times) - s.loc[datetime(1900, 1, 1):datetime(2100, 1, 1)] - - def test_slicing_datetimes(self): - - # GH 7523 - - # unique - df = DataFrame(np.arange(4., dtype='float64'), - index=[datetime(2001, 1, i, 10, 00) - for i in [1, 2, 3, 4]]) - result = df.loc[datetime(2001, 1, 1, 10):] - assert_frame_equal(result, df) - result = df.loc[:datetime(2001, 1, 4, 10)] - assert_frame_equal(result, df) - result = df.loc[datetime(2001, 1, 1, 10):datetime(2001, 1, 4, 10)] - assert_frame_equal(result, df) - - result = df.loc[datetime(2001, 1, 1, 11):] - expected = df.iloc[1:] - assert_frame_equal(result, expected) - result = df.loc['20010101 11':] - assert_frame_equal(result, expected) - - # duplicates - df = pd.DataFrame(np.arange(5., dtype='float64'), - index=[datetime(2001, 1, i, 10, 00) - for i in [1, 2, 2, 3, 4]]) - - result = df.loc[datetime(2001, 1, 1, 10):] - assert_frame_equal(result, df) - result = df.loc[:datetime(2001, 1, 4, 10)] - assert_frame_equal(result, df) - result = df.loc[datetime(2001, 1, 1, 10):datetime(2001, 1, 4, 10)] - assert_frame_equal(result, df) - - result = df.loc[datetime(2001, 1, 1, 11):] - expected = df.iloc[1:] - assert_frame_equal(result, expected) - result = df.loc['20010101 11':] - assert_frame_equal(result, expected) - - def test_frame_datetime64_duplicated(self): - dates = date_range('2010-07-01', end='2010-08-05') - - tst = DataFrame({'symbol': 'AAA', 'date': dates}) - result = tst.duplicated(['date', 'symbol']) - self.assertTrue((-result).all()) - - tst = DataFrame({'date': dates}) - result = tst.duplicated() - self.assertTrue((-result).all()) - - -class TestSeriesDatetime64(tm.TestCase): - def setUp(self): - self.series = Series(date_range('1/1/2000', periods=10)) - - def test_auto_conversion(self): - series = Series(list(date_range('1/1/2000', periods=10))) - self.assertEqual(series.dtype, 'M8[ns]') - - def test_constructor_cant_cast_datetime64(self): - msg = "Cannot cast datetime64 to " - with tm.assertRaisesRegexp(TypeError, msg): - Series(date_range('1/1/2000', periods=10), dtype=float) - - with tm.assertRaisesRegexp(TypeError, msg): - Series(date_range('1/1/2000', periods=10), dtype=int) - - def test_constructor_cast_object(self): - s = Series(date_range('1/1/2000', periods=10), dtype=object) - exp = Series(date_range('1/1/2000', periods=10)) - tm.assert_series_equal(s, exp) - - def test_series_comparison_scalars(self): - val = datetime(2000, 1, 4) - result = self.series > val - expected = Series([x > val for x in self.series]) - self.assert_series_equal(result, expected) - - val = self.series[5] - result = self.series > val - expected = Series([x > val for x in self.series]) - self.assert_series_equal(result, expected) - - def test_between(self): - left, right = self.series[[2, 7]] - - result = self.series.between(left, right) - expected = (self.series >= left) & (self.series <= right) - assert_series_equal(result, expected) - - # --------------------------------------------------------------------- - # NaT support - - def test_NaT_scalar(self): - series = Series([0, 1000, 2000, iNaT], dtype='M8[ns]') - - val = series[3] - self.assertTrue(com.isnull(val)) - - series[2] = val - self.assertTrue(com.isnull(series[2])) - - def test_NaT_cast(self): - # GH10747 - result = Series([np.nan]).astype('M8[ns]') - expected = Series([NaT]) - assert_series_equal(result, expected) - - def test_set_none_nan(self): - self.series[3] = None - self.assertIs(self.series[3], NaT) - - self.series[3:5] = None - self.assertIs(self.series[4], NaT) - - self.series[5] = np.nan - self.assertIs(self.series[5], NaT) - - self.series[5:7] = np.nan - self.assertIs(self.series[6], NaT) - - def test_intercept_astype_object(self): - - # this test no longer makes sense as series is by default already - # M8[ns] - expected = self.series.astype('object') - - df = DataFrame({'a': self.series, - 'b': np.random.randn(len(self.series))}) - exp_dtypes = pd.Series([np.dtype('datetime64[ns]'), - np.dtype('float64')], index=['a', 'b']) - tm.assert_series_equal(df.dtypes, exp_dtypes) - - result = df.values.squeeze() - self.assertTrue((result[:, 0] == expected.values).all()) - - df = DataFrame({'a': self.series, 'b': ['foo'] * len(self.series)}) - - result = df.values.squeeze() - self.assertTrue((result[:, 0] == expected.values).all()) - - def test_nat_operations(self): - # GH 8617 - s = Series([0, pd.NaT], dtype='m8[ns]') - exp = s[0] - self.assertEqual(s.median(), exp) - self.assertEqual(s.min(), exp) - self.assertEqual(s.max(), exp) - - def test_round_nat(self): - # GH14940 - s = Series([pd.NaT]) - expected = Series(pd.NaT) - for method in ["round", "floor", "ceil"]: - round_method = getattr(s.dt, method) - for freq in ["s", "5s", "min", "5min", "h", "5h"]: - assert_series_equal(round_method(freq), expected) - - -class TestDaysInMonth(tm.TestCase): - # tests for issue #10154 - def test_day_not_in_month_coerce(self): - self.assertTrue(isnull(to_datetime('2015-02-29', errors='coerce'))) - self.assertTrue(isnull(to_datetime('2015-02-29', format="%Y-%m-%d", - errors='coerce'))) - self.assertTrue(isnull(to_datetime('2015-02-32', format="%Y-%m-%d", - errors='coerce'))) - self.assertTrue(isnull(to_datetime('2015-04-31', format="%Y-%m-%d", - errors='coerce'))) - - def test_day_not_in_month_raise(self): - self.assertRaises(ValueError, to_datetime, '2015-02-29', - errors='raise') - self.assertRaises(ValueError, to_datetime, '2015-02-29', - errors='raise', format="%Y-%m-%d") - self.assertRaises(ValueError, to_datetime, '2015-02-32', - errors='raise', format="%Y-%m-%d") - self.assertRaises(ValueError, to_datetime, '2015-04-31', - errors='raise', format="%Y-%m-%d") - - def test_day_not_in_month_ignore(self): - self.assertEqual(to_datetime( - '2015-02-29', errors='ignore'), '2015-02-29') - self.assertEqual(to_datetime( - '2015-02-29', errors='ignore', format="%Y-%m-%d"), '2015-02-29') - self.assertEqual(to_datetime( - '2015-02-32', errors='ignore', format="%Y-%m-%d"), '2015-02-32') - self.assertEqual(to_datetime( - '2015-04-31', errors='ignore', format="%Y-%m-%d"), '2015-04-31') - - -class TestGuessDatetimeFormat(tm.TestCase): - - def test_guess_datetime_format_with_parseable_formats(self): - tm._skip_if_not_us_locale() - dt_string_to_format = (('20111230', '%Y%m%d'), - ('2011-12-30', '%Y-%m-%d'), - ('30-12-2011', '%d-%m-%Y'), - ('2011-12-30 00:00:00', '%Y-%m-%d %H:%M:%S'), - ('2011-12-30T00:00:00', '%Y-%m-%dT%H:%M:%S'), - ('2011-12-30 00:00:00.000000', - '%Y-%m-%d %H:%M:%S.%f'), ) - - for dt_string, dt_format in dt_string_to_format: - self.assertEqual( - tools._guess_datetime_format(dt_string), - dt_format - ) - - def test_guess_datetime_format_with_dayfirst(self): - ambiguous_string = '01/01/2011' - self.assertEqual( - tools._guess_datetime_format(ambiguous_string, dayfirst=True), - '%d/%m/%Y' - ) - self.assertEqual( - tools._guess_datetime_format(ambiguous_string, dayfirst=False), - '%m/%d/%Y' - ) - - def test_guess_datetime_format_with_locale_specific_formats(self): - # The month names will vary depending on the locale, in which - # case these wont be parsed properly (dateutil can't parse them) - _skip_if_has_locale() - - dt_string_to_format = (('30/Dec/2011', '%d/%b/%Y'), - ('30/December/2011', '%d/%B/%Y'), - ('30/Dec/2011 00:00:00', '%d/%b/%Y %H:%M:%S'), ) - - for dt_string, dt_format in dt_string_to_format: - self.assertEqual( - tools._guess_datetime_format(dt_string), - dt_format - ) - - def test_guess_datetime_format_invalid_inputs(self): - # A datetime string must include a year, month and a day for it - # to be guessable, in addition to being a string that looks like - # a datetime - invalid_dts = [ - '2013', - '01/2013', - '12:00:00', - '1/1/1/1', - 'this_is_not_a_datetime', - '51a', - 9, - datetime(2011, 1, 1), - ] - - for invalid_dt in invalid_dts: - self.assertTrue(tools._guess_datetime_format(invalid_dt) is None) - - def test_guess_datetime_format_nopadding(self): - # GH 11142 - dt_string_to_format = (('2011-1-1', '%Y-%m-%d'), - ('30-1-2011', '%d-%m-%Y'), - ('1/1/2011', '%m/%d/%Y'), - ('2011-1-1 00:00:00', '%Y-%m-%d %H:%M:%S'), - ('2011-1-1 0:0:0', '%Y-%m-%d %H:%M:%S'), - ('2011-1-3T00:00:0', '%Y-%m-%dT%H:%M:%S')) - - for dt_string, dt_format in dt_string_to_format: - self.assertEqual( - tools._guess_datetime_format(dt_string), - dt_format - ) - - def test_guess_datetime_format_for_array(self): - tm._skip_if_not_us_locale() - expected_format = '%Y-%m-%d %H:%M:%S.%f' - dt_string = datetime(2011, 12, 30, 0, 0, 0).strftime(expected_format) - - test_arrays = [ - np.array([dt_string, dt_string, dt_string], dtype='O'), - np.array([np.nan, np.nan, dt_string], dtype='O'), - np.array([dt_string, 'random_string'], dtype='O'), - ] - - for test_array in test_arrays: - self.assertEqual( - tools._guess_datetime_format_for_array(test_array), - expected_format - ) - - format_for_string_of_nans = tools._guess_datetime_format_for_array( - np.array( - [np.nan, np.nan, np.nan], dtype='O')) - self.assertTrue(format_for_string_of_nans is None) - - -class TestToDatetimeInferFormat(tm.TestCase): - - def test_to_datetime_infer_datetime_format_consistent_format(self): - s = pd.Series(pd.date_range('20000101', periods=50, freq='H')) - - test_formats = ['%m-%d-%Y', '%m/%d/%Y %H:%M:%S.%f', - '%Y-%m-%dT%H:%M:%S.%f'] - - for test_format in test_formats: - s_as_dt_strings = s.apply(lambda x: x.strftime(test_format)) - - with_format = pd.to_datetime(s_as_dt_strings, format=test_format) - no_infer = pd.to_datetime(s_as_dt_strings, - infer_datetime_format=False) - yes_infer = pd.to_datetime(s_as_dt_strings, - infer_datetime_format=True) - - # Whether the format is explicitly passed, it is inferred, or - # it is not inferred, the results should all be the same - self.assert_series_equal(with_format, no_infer) - self.assert_series_equal(no_infer, yes_infer) - - def test_to_datetime_infer_datetime_format_inconsistent_format(self): - s = pd.Series(np.array(['01/01/2011 00:00:00', - '01-02-2011 00:00:00', - '2011-01-03T00:00:00'])) - - # When the format is inconsistent, infer_datetime_format should just - # fallback to the default parsing - tm.assert_series_equal(pd.to_datetime(s, infer_datetime_format=False), - pd.to_datetime(s, infer_datetime_format=True)) - - s = pd.Series(np.array(['Jan/01/2011', 'Feb/01/2011', 'Mar/01/2011'])) - - tm.assert_series_equal(pd.to_datetime(s, infer_datetime_format=False), - pd.to_datetime(s, infer_datetime_format=True)) - - def test_to_datetime_infer_datetime_format_series_with_nans(self): - s = pd.Series(np.array(['01/01/2011 00:00:00', np.nan, - '01/03/2011 00:00:00', np.nan])) - tm.assert_series_equal(pd.to_datetime(s, infer_datetime_format=False), - pd.to_datetime(s, infer_datetime_format=True)) - - def test_to_datetime_infer_datetime_format_series_starting_with_nans(self): - s = pd.Series(np.array([np.nan, np.nan, '01/01/2011 00:00:00', - '01/02/2011 00:00:00', '01/03/2011 00:00:00'])) - - tm.assert_series_equal(pd.to_datetime(s, infer_datetime_format=False), - pd.to_datetime(s, infer_datetime_format=True)) - - def test_to_datetime_iso8601_noleading_0s(self): - # GH 11871 - s = pd.Series(['2014-1-1', '2014-2-2', '2015-3-3']) - expected = pd.Series([pd.Timestamp('2014-01-01'), - pd.Timestamp('2014-02-02'), - pd.Timestamp('2015-03-03')]) - tm.assert_series_equal(pd.to_datetime(s), expected) - tm.assert_series_equal(pd.to_datetime(s, format='%Y-%m-%d'), expected) - - -class TimeConversionFormats(tm.TestCase): - def test_to_datetime_format(self): - values = ['1/1/2000', '1/2/2000', '1/3/2000'] - - results1 = [Timestamp('20000101'), Timestamp('20000201'), - Timestamp('20000301')] - results2 = [Timestamp('20000101'), Timestamp('20000102'), - Timestamp('20000103')] - for vals, expecteds in [(values, (Index(results1), Index(results2))), - (Series(values), - (Series(results1), Series(results2))), - (values[0], (results1[0], results2[0])), - (values[1], (results1[1], results2[1])), - (values[2], (results1[2], results2[2]))]: - - for i, fmt in enumerate(['%d/%m/%Y', '%m/%d/%Y']): - result = to_datetime(vals, format=fmt) - expected = expecteds[i] - - if isinstance(expected, Series): - assert_series_equal(result, Series(expected)) - elif isinstance(expected, Timestamp): - self.assertEqual(result, expected) - else: - tm.assert_index_equal(result, expected) - - def test_to_datetime_format_YYYYMMDD(self): - s = Series([19801222, 19801222] + [19810105] * 5) - expected = Series([Timestamp(x) for x in s.apply(str)]) - - result = to_datetime(s, format='%Y%m%d') - assert_series_equal(result, expected) - - result = to_datetime(s.apply(str), format='%Y%m%d') - assert_series_equal(result, expected) - - # with NaT - expected = Series([Timestamp("19801222"), Timestamp("19801222")] + - [Timestamp("19810105")] * 5) - expected[2] = np.nan - s[2] = np.nan - - result = to_datetime(s, format='%Y%m%d') - assert_series_equal(result, expected) - - # string with NaT - s = s.apply(str) - s[2] = 'nat' - result = to_datetime(s, format='%Y%m%d') - assert_series_equal(result, expected) - - # coercion - # GH 7930 - s = Series([20121231, 20141231, 99991231]) - result = pd.to_datetime(s, format='%Y%m%d', errors='ignore') - expected = Series([datetime(2012, 12, 31), - datetime(2014, 12, 31), datetime(9999, 12, 31)], - dtype=object) - self.assert_series_equal(result, expected) - - result = pd.to_datetime(s, format='%Y%m%d', errors='coerce') - expected = Series(['20121231', '20141231', 'NaT'], dtype='M8[ns]') - assert_series_equal(result, expected) - - # GH 10178 - def test_to_datetime_format_integer(self): - s = Series([2000, 2001, 2002]) - expected = Series([Timestamp(x) for x in s.apply(str)]) - - result = to_datetime(s, format='%Y') - assert_series_equal(result, expected) - - s = Series([200001, 200105, 200206]) - expected = Series([Timestamp(x[:4] + '-' + x[4:]) for x in s.apply(str) - ]) - - result = to_datetime(s, format='%Y%m') - assert_series_equal(result, expected) - - def test_to_datetime_format_microsecond(self): - - # these are locale dependent - lang, _ = locale.getlocale() - month_abbr = calendar.month_abbr[4] - val = '01-{}-2011 00:00:01.978'.format(month_abbr) - - format = '%d-%b-%Y %H:%M:%S.%f' - result = to_datetime(val, format=format) - exp = datetime.strptime(val, format) - self.assertEqual(result, exp) - - def test_to_datetime_format_time(self): - data = [ - ['01/10/2010 15:20', '%m/%d/%Y %H:%M', - Timestamp('2010-01-10 15:20')], - ['01/10/2010 05:43', '%m/%d/%Y %I:%M', - Timestamp('2010-01-10 05:43')], - ['01/10/2010 13:56:01', '%m/%d/%Y %H:%M:%S', - Timestamp('2010-01-10 13:56:01')] # , - # ['01/10/2010 08:14 PM', '%m/%d/%Y %I:%M %p', - # Timestamp('2010-01-10 20:14')], - # ['01/10/2010 07:40 AM', '%m/%d/%Y %I:%M %p', - # Timestamp('2010-01-10 07:40')], - # ['01/10/2010 09:12:56 AM', '%m/%d/%Y %I:%M:%S %p', - # Timestamp('2010-01-10 09:12:56')] - ] - for s, format, dt in data: - self.assertEqual(to_datetime(s, format=format), dt) - - def test_to_datetime_with_non_exact(self): - # GH 10834 - _skip_if_has_locale() - - # 8904 - # exact kw - if sys.version_info < (2, 7): - raise nose.SkipTest('on python version < 2.7') - - s = Series(['19MAY11', 'foobar19MAY11', '19MAY11:00:00:00', - '19MAY11 00:00:00Z']) - result = to_datetime(s, format='%d%b%y', exact=False) - expected = to_datetime(s.str.extract(r'(\d+\w+\d+)', expand=False), - format='%d%b%y') - assert_series_equal(result, expected) - - def test_parse_nanoseconds_with_formula(self): - - # GH8989 - # trunctaing the nanoseconds when a format was provided - for v in ["2012-01-01 09:00:00.000000001", - "2012-01-01 09:00:00.000001", - "2012-01-01 09:00:00.001", - "2012-01-01 09:00:00.001000", - "2012-01-01 09:00:00.001000000", ]: - expected = pd.to_datetime(v) - result = pd.to_datetime(v, format="%Y-%m-%d %H:%M:%S.%f") - self.assertEqual(result, expected) - - def test_to_datetime_format_weeks(self): - data = [ - ['2009324', '%Y%W%w', Timestamp('2009-08-13')], - ['2013020', '%Y%U%w', Timestamp('2013-01-13')] - ] - for s, format, dt in data: - self.assertEqual(to_datetime(s, format=format), dt) - - -class TestSlicing(tm.TestCase): - def test_slice_year(self): - dti = DatetimeIndex(freq='B', start=datetime(2005, 1, 1), periods=500) - - s = Series(np.arange(len(dti)), index=dti) - result = s['2005'] - expected = s[s.index.year == 2005] - assert_series_equal(result, expected) - - df = DataFrame(np.random.rand(len(dti), 5), index=dti) - result = df.loc['2005'] - expected = df[df.index.year == 2005] - assert_frame_equal(result, expected) - - rng = date_range('1/1/2000', '1/1/2010') - - result = rng.get_loc('2009') - expected = slice(3288, 3653) - self.assertEqual(result, expected) - - def test_slice_quarter(self): - dti = DatetimeIndex(freq='D', start=datetime(2000, 6, 1), periods=500) - - s = Series(np.arange(len(dti)), index=dti) - self.assertEqual(len(s['2001Q1']), 90) - - df = DataFrame(np.random.rand(len(dti), 5), index=dti) - self.assertEqual(len(df.loc['1Q01']), 90) - - def test_slice_month(self): - dti = DatetimeIndex(freq='D', start=datetime(2005, 1, 1), periods=500) - s = Series(np.arange(len(dti)), index=dti) - self.assertEqual(len(s['2005-11']), 30) - - df = DataFrame(np.random.rand(len(dti), 5), index=dti) - self.assertEqual(len(df.loc['2005-11']), 30) - - assert_series_equal(s['2005-11'], s['11-2005']) - - def test_partial_slice(self): - rng = DatetimeIndex(freq='D', start=datetime(2005, 1, 1), periods=500) - s = Series(np.arange(len(rng)), index=rng) - - result = s['2005-05':'2006-02'] - expected = s['20050501':'20060228'] - assert_series_equal(result, expected) - - result = s['2005-05':] - expected = s['20050501':] - assert_series_equal(result, expected) - - result = s[:'2006-02'] - expected = s[:'20060228'] - assert_series_equal(result, expected) - - result = s['2005-1-1'] - self.assertEqual(result, s.iloc[0]) - - self.assertRaises(Exception, s.__getitem__, '2004-12-31') - - def test_partial_slice_daily(self): - rng = DatetimeIndex(freq='H', start=datetime(2005, 1, 31), periods=500) - s = Series(np.arange(len(rng)), index=rng) - - result = s['2005-1-31'] - assert_series_equal(result, s.iloc[:24]) - - self.assertRaises(Exception, s.__getitem__, '2004-12-31 00') - - def test_partial_slice_hourly(self): - rng = DatetimeIndex(freq='T', start=datetime(2005, 1, 1, 20, 0, 0), - periods=500) - s = Series(np.arange(len(rng)), index=rng) - - result = s['2005-1-1'] - assert_series_equal(result, s.iloc[:60 * 4]) - - result = s['2005-1-1 20'] - assert_series_equal(result, s.iloc[:60]) - - self.assertEqual(s['2005-1-1 20:00'], s.iloc[0]) - self.assertRaises(Exception, s.__getitem__, '2004-12-31 00:15') - - def test_partial_slice_minutely(self): - rng = DatetimeIndex(freq='S', start=datetime(2005, 1, 1, 23, 59, 0), - periods=500) - s = Series(np.arange(len(rng)), index=rng) - - result = s['2005-1-1 23:59'] - assert_series_equal(result, s.iloc[:60]) - - result = s['2005-1-1'] - assert_series_equal(result, s.iloc[:60]) - - self.assertEqual(s[Timestamp('2005-1-1 23:59:00')], s.iloc[0]) - self.assertRaises(Exception, s.__getitem__, '2004-12-31 00:00:00') - - def test_partial_slice_second_precision(self): - rng = DatetimeIndex(start=datetime(2005, 1, 1, 0, 0, 59, - microsecond=999990), - periods=20, freq='US') - s = Series(np.arange(20), rng) - - assert_series_equal(s['2005-1-1 00:00'], s.iloc[:10]) - assert_series_equal(s['2005-1-1 00:00:59'], s.iloc[:10]) - - assert_series_equal(s['2005-1-1 00:01'], s.iloc[10:]) - assert_series_equal(s['2005-1-1 00:01:00'], s.iloc[10:]) - - self.assertEqual(s[Timestamp('2005-1-1 00:00:59.999990')], s.iloc[0]) - self.assertRaisesRegexp(KeyError, '2005-1-1 00:00:00', - lambda: s['2005-1-1 00:00:00']) - - def test_partial_slicing_dataframe(self): - # GH14856 - # Test various combinations of string slicing resolution vs. - # index resolution - # - If string resolution is less precise than index resolution, - # string is considered a slice - # - If string resolution is equal to or more precise than index - # resolution, string is considered an exact match - formats = ['%Y', '%Y-%m', '%Y-%m-%d', '%Y-%m-%d %H', - '%Y-%m-%d %H:%M', '%Y-%m-%d %H:%M:%S'] - resolutions = ['year', 'month', 'day', 'hour', 'minute', 'second'] - for rnum, resolution in enumerate(resolutions[2:], 2): - # we check only 'day', 'hour', 'minute' and 'second' - unit = Timedelta("1 " + resolution) - middate = datetime(2012, 1, 1, 0, 0, 0) - index = DatetimeIndex([middate - unit, - middate, middate + unit]) - values = [1, 2, 3] - df = DataFrame({'a': values}, index, dtype=np.int64) - self.assertEqual(df.index.resolution, resolution) - - # Timestamp with the same resolution as index - # Should be exact match for Series (return scalar) - # and raise KeyError for Frame - for timestamp, expected in zip(index, values): - ts_string = timestamp.strftime(formats[rnum]) - # make ts_string as precise as index - result = df['a'][ts_string] - self.assertIsInstance(result, np.int64) - self.assertEqual(result, expected) - self.assertRaises(KeyError, df.__getitem__, ts_string) - - # Timestamp with resolution less precise than index - for fmt in formats[:rnum]: - for element, theslice in [[0, slice(None, 1)], - [1, slice(1, None)]]: - ts_string = index[element].strftime(fmt) - - # Series should return slice - result = df['a'][ts_string] - expected = df['a'][theslice] - assert_series_equal(result, expected) - - # Frame should return slice as well - result = df[ts_string] - expected = df[theslice] - assert_frame_equal(result, expected) - - # Timestamp with resolution more precise than index - # Compatible with existing key - # Should return scalar for Series - # and raise KeyError for Frame - for fmt in formats[rnum + 1:]: - ts_string = index[1].strftime(fmt) - result = df['a'][ts_string] - self.assertIsInstance(result, np.int64) - self.assertEqual(result, 2) - self.assertRaises(KeyError, df.__getitem__, ts_string) - - # Not compatible with existing key - # Should raise KeyError - for fmt, res in list(zip(formats, resolutions))[rnum + 1:]: - ts = index[1] + Timedelta("1 " + res) - ts_string = ts.strftime(fmt) - self.assertRaises(KeyError, df['a'].__getitem__, ts_string) - self.assertRaises(KeyError, df.__getitem__, ts_string) - - def test_partial_slicing_with_multiindex(self): - - # GH 4758 - # partial string indexing with a multi-index buggy - df = DataFrame({'ACCOUNT': ["ACCT1", "ACCT1", "ACCT1", "ACCT2"], - 'TICKER': ["ABC", "MNP", "XYZ", "XYZ"], - 'val': [1, 2, 3, 4]}, - index=date_range("2013-06-19 09:30:00", - periods=4, freq='5T')) - df_multi = df.set_index(['ACCOUNT', 'TICKER'], append=True) - - expected = DataFrame([ - [1] - ], index=Index(['ABC'], name='TICKER'), columns=['val']) - result = df_multi.loc[('2013-06-19 09:30:00', 'ACCT1')] - assert_frame_equal(result, expected) - - expected = df_multi.loc[ - (pd.Timestamp('2013-06-19 09:30:00', tz=None), 'ACCT1', 'ABC')] - result = df_multi.loc[('2013-06-19 09:30:00', 'ACCT1', 'ABC')] - assert_series_equal(result, expected) - - # this is a KeyError as we don't do partial string selection on - # multi-levels - def f(): - df_multi.loc[('2013-06-19', 'ACCT1', 'ABC')] - - self.assertRaises(KeyError, f) - - # GH 4294 - # partial slice on a series mi - s = pd.DataFrame(randn(1000, 1000), index=pd.date_range( - '2000-1-1', periods=1000)).stack() - - s2 = s[:-1].copy() - expected = s2['2000-1-4'] - result = s2[pd.Timestamp('2000-1-4')] - assert_series_equal(result, expected) - - result = s[pd.Timestamp('2000-1-4')] - expected = s['2000-1-4'] - assert_series_equal(result, expected) - - df2 = pd.DataFrame(s) - expected = df2.xs('2000-1-4') - result = df2.loc[pd.Timestamp('2000-1-4')] - assert_frame_equal(result, expected) - - def test_shift(self): - ts = Series(np.random.randn(5), - index=date_range('1/1/2000', periods=5, freq='H')) - - result = ts.shift(1, freq='5T') - exp_index = ts.index.shift(1, freq='5T') - tm.assert_index_equal(result.index, exp_index) - - # GH #1063, multiple of same base - result = ts.shift(1, freq='4H') - exp_index = ts.index + offsets.Hour(4) - tm.assert_index_equal(result.index, exp_index) - - idx = DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-04']) - self.assertRaises(ValueError, idx.shift, 1) - def test_setops_preserve_freq(self): for tz in [None, 'Asia/Tokyo', 'US/Eastern']: rng = date_range('1/1/2000', '1/1/2002', name='idx', tz=tz) @@ -2453,602 +928,8 @@ def test_get_level_values_box(self): self.assertTrue(isinstance(index.get_level_values(0)[0], Timestamp)) - def test_frame_apply_dont_convert_datetime64(self): - from pandas.tseries.offsets import BDay - df = DataFrame({'x1': [datetime(1996, 1, 1)]}) - - df = df.applymap(lambda x: x + BDay()) - df = df.applymap(lambda x: x + BDay()) - - self.assertTrue(df.x1.dtype == 'M8[ns]') - - def test_partial_slice_doesnt_require_monotonicity(self): - # For historical reasons. - s = pd.Series(np.arange(10), pd.date_range('2014-01-01', periods=10)) - - nonmonotonic = s[[3, 5, 4]] - expected = nonmonotonic.iloc[:0] - timestamp = pd.Timestamp('2014-01-10') - - assert_series_equal(nonmonotonic['2014-01-10':], expected) - self.assertRaisesRegexp(KeyError, - r"Timestamp\('2014-01-10 00:00:00'\)", - lambda: nonmonotonic[timestamp:]) - - assert_series_equal(nonmonotonic.loc['2014-01-10':], expected) - self.assertRaisesRegexp(KeyError, - r"Timestamp\('2014-01-10 00:00:00'\)", - lambda: nonmonotonic.loc[timestamp:]) - - -class TestToDatetime(tm.TestCase): - _multiprocess_can_split_ = True - - def test_to_datetime_dt64s(self): - in_bound_dts = [ - np.datetime64('2000-01-01'), - np.datetime64('2000-01-02'), - ] - - for dt in in_bound_dts: - self.assertEqual(pd.to_datetime(dt), Timestamp(dt)) - - oob_dts = [np.datetime64('1000-01-01'), np.datetime64('5000-01-02'), ] - - for dt in oob_dts: - self.assertRaises(ValueError, pd.to_datetime, dt, errors='raise') - self.assertRaises(ValueError, tslib.Timestamp, dt) - self.assertIs(pd.to_datetime(dt, errors='coerce'), NaT) - - def test_to_datetime_array_of_dt64s(self): - dts = [np.datetime64('2000-01-01'), np.datetime64('2000-01-02'), ] - - # Assuming all datetimes are in bounds, to_datetime() returns - # an array that is equal to Timestamp() parsing - self.assert_numpy_array_equal( - pd.to_datetime(dts, box=False), - np.array([Timestamp(x).asm8 for x in dts]) - ) - - # A list of datetimes where the last one is out of bounds - dts_with_oob = dts + [np.datetime64('9999-01-01')] - - self.assertRaises(ValueError, pd.to_datetime, dts_with_oob, - errors='raise') - - self.assert_numpy_array_equal( - pd.to_datetime(dts_with_oob, box=False, errors='coerce'), - np.array( - [ - Timestamp(dts_with_oob[0]).asm8, - Timestamp(dts_with_oob[1]).asm8, - iNaT, - ], - dtype='M8' - ) - ) - - # With errors='ignore', out of bounds datetime64s - # are converted to their .item(), which depending on the version of - # numpy is either a python datetime.datetime or datetime.date - self.assert_numpy_array_equal( - pd.to_datetime(dts_with_oob, box=False, errors='ignore'), - np.array( - [dt.item() for dt in dts_with_oob], - dtype='O' - ) - ) - - def test_to_datetime_tz(self): - - # xref 8260 - # uniform returns a DatetimeIndex - arr = [pd.Timestamp('2013-01-01 13:00:00-0800', tz='US/Pacific'), - pd.Timestamp('2013-01-02 14:00:00-0800', tz='US/Pacific')] - result = pd.to_datetime(arr) - expected = DatetimeIndex( - ['2013-01-01 13:00:00', '2013-01-02 14:00:00'], tz='US/Pacific') - tm.assert_index_equal(result, expected) - - # mixed tzs will raise - arr = [pd.Timestamp('2013-01-01 13:00:00', tz='US/Pacific'), - pd.Timestamp('2013-01-02 14:00:00', tz='US/Eastern')] - self.assertRaises(ValueError, lambda: pd.to_datetime(arr)) - - def test_to_datetime_tz_pytz(self): - - # xref 8260 - tm._skip_if_no_pytz() - import pytz - - us_eastern = pytz.timezone('US/Eastern') - arr = np.array([us_eastern.localize(datetime(year=2000, month=1, day=1, - hour=3, minute=0)), - us_eastern.localize(datetime(year=2000, month=6, day=1, - hour=3, minute=0))], - dtype=object) - result = pd.to_datetime(arr, utc=True) - expected = DatetimeIndex(['2000-01-01 08:00:00+00:00', - '2000-06-01 07:00:00+00:00'], - dtype='datetime64[ns, UTC]', freq=None) - tm.assert_index_equal(result, expected) - - def test_to_datetime_utc_is_true(self): - # See gh-11934 - start = pd.Timestamp('2014-01-01', tz='utc') - end = pd.Timestamp('2014-01-03', tz='utc') - date_range = pd.bdate_range(start, end) - - result = pd.to_datetime(date_range, utc=True) - expected = pd.DatetimeIndex(data=date_range) - tm.assert_index_equal(result, expected) - - def test_to_datetime_tz_psycopg2(self): - - # xref 8260 - try: - import psycopg2 - except ImportError: - raise nose.SkipTest("no psycopg2 installed") - - # misc cases - tz1 = psycopg2.tz.FixedOffsetTimezone(offset=-300, name=None) - tz2 = psycopg2.tz.FixedOffsetTimezone(offset=-240, name=None) - arr = np.array([datetime(2000, 1, 1, 3, 0, tzinfo=tz1), - datetime(2000, 6, 1, 3, 0, tzinfo=tz2)], - dtype=object) - - result = pd.to_datetime(arr, errors='coerce', utc=True) - expected = DatetimeIndex(['2000-01-01 08:00:00+00:00', - '2000-06-01 07:00:00+00:00'], - dtype='datetime64[ns, UTC]', freq=None) - tm.assert_index_equal(result, expected) - - # dtype coercion - i = pd.DatetimeIndex([ - '2000-01-01 08:00:00+00:00' - ], tz=psycopg2.tz.FixedOffsetTimezone(offset=-300, name=None)) - self.assertTrue(is_datetime64_ns_dtype(i)) - - # tz coerceion - result = pd.to_datetime(i, errors='coerce') - tm.assert_index_equal(result, i) - - result = pd.to_datetime(i, errors='coerce', utc=True) - expected = pd.DatetimeIndex(['2000-01-01 13:00:00'], - dtype='datetime64[ns, UTC]') - tm.assert_index_equal(result, expected) - - def test_datetime_bool(self): - # GH13176 - with self.assertRaises(TypeError): - to_datetime(False) - self.assertTrue(to_datetime(False, errors="coerce") is tslib.NaT) - self.assertEqual(to_datetime(False, errors="ignore"), False) - with self.assertRaises(TypeError): - to_datetime(True) - self.assertTrue(to_datetime(True, errors="coerce") is tslib.NaT) - self.assertEqual(to_datetime(True, errors="ignore"), True) - with self.assertRaises(TypeError): - to_datetime([False, datetime.today()]) - with self.assertRaises(TypeError): - to_datetime(['20130101', True]) - tm.assert_index_equal(to_datetime([0, False, tslib.NaT, 0.0], - errors="coerce"), - DatetimeIndex([to_datetime(0), tslib.NaT, - tslib.NaT, to_datetime(0)])) - - def test_datetime_invalid_datatype(self): - # GH13176 - - with self.assertRaises(TypeError): - pd.to_datetime(bool) - with self.assertRaises(TypeError): - pd.to_datetime(pd.to_datetime) - - def test_unit(self): - # GH 11758 - # test proper behavior with erros - - with self.assertRaises(ValueError): - to_datetime([1], unit='D', format='%Y%m%d') - - values = [11111111, 1, 1.0, tslib.iNaT, pd.NaT, np.nan, - 'NaT', ''] - result = to_datetime(values, unit='D', errors='ignore') - expected = Index([11111111, Timestamp('1970-01-02'), - Timestamp('1970-01-02'), pd.NaT, - pd.NaT, pd.NaT, pd.NaT, pd.NaT], - dtype=object) - tm.assert_index_equal(result, expected) - - result = to_datetime(values, unit='D', errors='coerce') - expected = DatetimeIndex(['NaT', '1970-01-02', '1970-01-02', - 'NaT', 'NaT', 'NaT', 'NaT', 'NaT']) - tm.assert_index_equal(result, expected) - - with self.assertRaises(tslib.OutOfBoundsDatetime): - to_datetime(values, unit='D', errors='raise') - - values = [1420043460000, tslib.iNaT, pd.NaT, np.nan, 'NaT'] - - result = to_datetime(values, errors='ignore', unit='s') - expected = Index([1420043460000, pd.NaT, pd.NaT, - pd.NaT, pd.NaT], dtype=object) - tm.assert_index_equal(result, expected) - - result = to_datetime(values, errors='coerce', unit='s') - expected = DatetimeIndex(['NaT', 'NaT', 'NaT', 'NaT', 'NaT']) - tm.assert_index_equal(result, expected) - - with self.assertRaises(tslib.OutOfBoundsDatetime): - to_datetime(values, errors='raise', unit='s') - - # if we have a string, then we raise a ValueError - # and NOT an OutOfBoundsDatetime - for val in ['foo', Timestamp('20130101')]: - try: - to_datetime(val, errors='raise', unit='s') - except tslib.OutOfBoundsDatetime: - raise AssertionError("incorrect exception raised") - except ValueError: - pass - - def test_unit_consistency(self): - - # consistency of conversions - expected = Timestamp('1970-05-09 14:25:11') - result = pd.to_datetime(11111111, unit='s', errors='raise') - self.assertEqual(result, expected) - self.assertIsInstance(result, Timestamp) - - result = pd.to_datetime(11111111, unit='s', errors='coerce') - self.assertEqual(result, expected) - self.assertIsInstance(result, Timestamp) - - result = pd.to_datetime(11111111, unit='s', errors='ignore') - self.assertEqual(result, expected) - self.assertIsInstance(result, Timestamp) - - def test_unit_with_numeric(self): - - # GH 13180 - # coercions from floats/ints are ok - expected = DatetimeIndex(['2015-06-19 05:33:20', - '2015-05-27 22:33:20']) - arr1 = [1.434692e+18, 1.432766e+18] - arr2 = np.array(arr1).astype('int64') - for errors in ['ignore', 'raise', 'coerce']: - result = pd.to_datetime(arr1, errors=errors) - tm.assert_index_equal(result, expected) - - result = pd.to_datetime(arr2, errors=errors) - tm.assert_index_equal(result, expected) - - # but we want to make sure that we are coercing - # if we have ints/strings - expected = DatetimeIndex(['NaT', - '2015-06-19 05:33:20', - '2015-05-27 22:33:20']) - arr = ['foo', 1.434692e+18, 1.432766e+18] - result = pd.to_datetime(arr, errors='coerce') - tm.assert_index_equal(result, expected) - - expected = DatetimeIndex(['2015-06-19 05:33:20', - '2015-05-27 22:33:20', - 'NaT', - 'NaT']) - arr = [1.434692e+18, 1.432766e+18, 'foo', 'NaT'] - result = pd.to_datetime(arr, errors='coerce') - tm.assert_index_equal(result, expected) - - def test_unit_mixed(self): - - # mixed integers/datetimes - expected = DatetimeIndex(['2013-01-01', 'NaT', 'NaT']) - arr = [pd.Timestamp('20130101'), 1.434692e+18, 1.432766e+18] - result = pd.to_datetime(arr, errors='coerce') - tm.assert_index_equal(result, expected) - - with self.assertRaises(ValueError): - pd.to_datetime(arr, errors='raise') - - expected = DatetimeIndex(['NaT', - 'NaT', - '2013-01-01']) - arr = [1.434692e+18, 1.432766e+18, pd.Timestamp('20130101')] - result = pd.to_datetime(arr, errors='coerce') - tm.assert_index_equal(result, expected) - - with self.assertRaises(ValueError): - pd.to_datetime(arr, errors='raise') - - def test_dataframe(self): - - df = DataFrame({'year': [2015, 2016], - 'month': [2, 3], - 'day': [4, 5], - 'hour': [6, 7], - 'minute': [58, 59], - 'second': [10, 11], - 'ms': [1, 1], - 'us': [2, 2], - 'ns': [3, 3]}) - - result = to_datetime({'year': df['year'], - 'month': df['month'], - 'day': df['day']}) - expected = Series([Timestamp('20150204 00:00:00'), - Timestamp('20160305 00:0:00')]) - assert_series_equal(result, expected) - - # dict-like - result = to_datetime(df[['year', 'month', 'day']].to_dict()) - assert_series_equal(result, expected) - - # dict but with constructable - df2 = df[['year', 'month', 'day']].to_dict() - df2['month'] = 2 - result = to_datetime(df2) - expected2 = Series([Timestamp('20150204 00:00:00'), - Timestamp('20160205 00:0:00')]) - assert_series_equal(result, expected2) - - # unit mappings - units = [{'year': 'years', - 'month': 'months', - 'day': 'days', - 'hour': 'hours', - 'minute': 'minutes', - 'second': 'seconds'}, - {'year': 'year', - 'month': 'month', - 'day': 'day', - 'hour': 'hour', - 'minute': 'minute', - 'second': 'second'}, - ] - - for d in units: - result = to_datetime(df[list(d.keys())].rename(columns=d)) - expected = Series([Timestamp('20150204 06:58:10'), - Timestamp('20160305 07:59:11')]) - assert_series_equal(result, expected) - - d = {'year': 'year', - 'month': 'month', - 'day': 'day', - 'hour': 'hour', - 'minute': 'minute', - 'second': 'second', - 'ms': 'ms', - 'us': 'us', - 'ns': 'ns'} - - result = to_datetime(df.rename(columns=d)) - expected = Series([Timestamp('20150204 06:58:10.001002003'), - Timestamp('20160305 07:59:11.001002003')]) - assert_series_equal(result, expected) - - # coerce back to int - result = to_datetime(df.astype(str)) - assert_series_equal(result, expected) - - # passing coerce - df2 = DataFrame({'year': [2015, 2016], - 'month': [2, 20], - 'day': [4, 5]}) - with self.assertRaises(ValueError): - to_datetime(df2) - result = to_datetime(df2, errors='coerce') - expected = Series([Timestamp('20150204 00:00:00'), - pd.NaT]) - assert_series_equal(result, expected) - - # extra columns - with self.assertRaises(ValueError): - df2 = df.copy() - df2['foo'] = 1 - to_datetime(df2) - - # not enough - for c in [['year'], - ['year', 'month'], - ['year', 'month', 'second'], - ['month', 'day'], - ['year', 'day', 'second']]: - with self.assertRaises(ValueError): - to_datetime(df[c]) - - # duplicates - df2 = DataFrame({'year': [2015, 2016], - 'month': [2, 20], - 'day': [4, 5]}) - df2.columns = ['year', 'year', 'day'] - with self.assertRaises(ValueError): - to_datetime(df2) - - df2 = DataFrame({'year': [2015, 2016], - 'month': [2, 20], - 'day': [4, 5], - 'hour': [4, 5]}) - df2.columns = ['year', 'month', 'day', 'day'] - with self.assertRaises(ValueError): - to_datetime(df2) - - def test_dataframe_dtypes(self): - # #13451 - df = DataFrame({'year': [2015, 2016], - 'month': [2, 3], - 'day': [4, 5]}) - - # int16 - result = to_datetime(df.astype('int16')) - expected = Series([Timestamp('20150204 00:00:00'), - Timestamp('20160305 00:00:00')]) - assert_series_equal(result, expected) - - # mixed dtypes - df['month'] = df['month'].astype('int8') - df['day'] = df['day'].astype('int8') - result = to_datetime(df) - expected = Series([Timestamp('20150204 00:00:00'), - Timestamp('20160305 00:00:00')]) - assert_series_equal(result, expected) - - # float - df = DataFrame({'year': [2000, 2001], - 'month': [1.5, 1], - 'day': [1, 1]}) - with self.assertRaises(ValueError): - to_datetime(df) - - def test_index_to_datetime(self): - idx = Index(['1/1/2000', '1/2/2000', '1/3/2000']) - - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): - result = idx.to_datetime() - expected = DatetimeIndex(pd.to_datetime(idx.values)) - tm.assert_index_equal(result, expected) - - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): - today = datetime.today() - idx = Index([today], dtype=object) - result = idx.to_datetime() - expected = DatetimeIndex([today]) - tm.assert_index_equal(result, expected) - - def test_to_datetime_iso8601(self): - result = to_datetime(["2012-01-01 00:00:00"]) - exp = Timestamp("2012-01-01 00:00:00") - self.assertEqual(result[0], exp) - - result = to_datetime(['20121001']) # bad iso 8601 - exp = Timestamp('2012-10-01') - self.assertEqual(result[0], exp) - - def test_to_datetime_default(self): - rs = to_datetime('2001') - xp = datetime(2001, 1, 1) - self.assertTrue(rs, xp) - - # dayfirst is essentially broken - - # to_datetime('01-13-2012', dayfirst=True) - # self.assertRaises(ValueError, to_datetime('01-13-2012', - # dayfirst=True)) - - def test_to_datetime_on_datetime64_series(self): - # #2699 - s = Series(date_range('1/1/2000', periods=10)) - - result = to_datetime(s) - self.assertEqual(result[0], s[0]) - - def test_to_datetime_with_space_in_series(self): - # GH 6428 - s = Series(['10/18/2006', '10/18/2008', ' ']) - tm.assertRaises(ValueError, lambda: to_datetime(s, errors='raise')) - result_coerce = to_datetime(s, errors='coerce') - expected_coerce = Series([datetime(2006, 10, 18), - datetime(2008, 10, 18), - pd.NaT]) - tm.assert_series_equal(result_coerce, expected_coerce) - result_ignore = to_datetime(s, errors='ignore') - tm.assert_series_equal(result_ignore, s) - - def test_to_datetime_with_apply(self): - # this is only locale tested with US/None locales - _skip_if_has_locale() - - # GH 5195 - # with a format and coerce a single item to_datetime fails - td = Series(['May 04', 'Jun 02', 'Dec 11'], index=[1, 2, 3]) - expected = pd.to_datetime(td, format='%b %y') - result = td.apply(pd.to_datetime, format='%b %y') - assert_series_equal(result, expected) - - td = pd.Series(['May 04', 'Jun 02', ''], index=[1, 2, 3]) - self.assertRaises(ValueError, - lambda: pd.to_datetime(td, format='%b %y', - errors='raise')) - self.assertRaises(ValueError, - lambda: td.apply(pd.to_datetime, format='%b %y', - errors='raise')) - expected = pd.to_datetime(td, format='%b %y', errors='coerce') - - result = td.apply( - lambda x: pd.to_datetime(x, format='%b %y', errors='coerce')) - assert_series_equal(result, expected) - - def test_to_datetime_types(self): - - # empty string - result = to_datetime('') - self.assertIs(result, NaT) - - result = to_datetime(['', '']) - self.assertTrue(isnull(result).all()) - - # ints - result = Timestamp(0) - expected = to_datetime(0) - self.assertEqual(result, expected) - - # GH 3888 (strings) - expected = to_datetime(['2012'])[0] - result = to_datetime('2012') - self.assertEqual(result, expected) - - # array = ['2012','20120101','20120101 12:01:01'] - array = ['20120101', '20120101 12:01:01'] - expected = list(to_datetime(array)) - result = lmap(Timestamp, array) - tm.assert_almost_equal(result, expected) - - # currently fails ### - # result = Timestamp('2012') - # expected = to_datetime('2012') - # self.assertEqual(result, expected) - - def test_to_datetime_unprocessable_input(self): - # GH 4928 - self.assert_numpy_array_equal( - to_datetime([1, '1'], errors='ignore'), - np.array([1, '1'], dtype='O') - ) - self.assertRaises(TypeError, to_datetime, [1, '1'], errors='raise') - - def test_to_datetime_other_datetime64_units(self): - # 5/25/2012 - scalar = np.int64(1337904000000000).view('M8[us]') - as_obj = scalar.astype('O') - - index = DatetimeIndex([scalar]) - self.assertEqual(index[0], scalar.astype('O')) - - value = Timestamp(scalar) - self.assertEqual(value, as_obj) - - def test_to_datetime_list_of_integers(self): - rng = date_range('1/1/2000', periods=20) - rng = DatetimeIndex(rng.values) - - ints = list(rng.asi8) - - result = DatetimeIndex(ints) - - tm.assert_index_equal(rng, result) - - def test_to_datetime_freq(self): - xp = bdate_range('2000-1-1', periods=10, tz='UTC') - rs = xp.to_datetime() - self.assertEqual(xp.freq, rs.freq) - self.assertEqual(xp.tzinfo, rs.tzinfo) - if __name__ == '__main__': + import nose nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], exit=False) From 7d6afc4b22fde9ce32161917c2440947505bf4ad Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sat, 4 Feb 2017 13:28:49 -0500 Subject: [PATCH 010/933] ENH: .isnull and .notnull have been added as methods to Index to make this more consistent with the Series API Author: Jeff Reback Closes #15300 from jreback/null and squashes the following commits: 8c35656 [Jeff Reback] DOC: move Index.where to shared_docs e4502bf [Jeff Reback] ENH: .isnull and .notnull have been added as methods to Index to make this more consistent with the Series API --- doc/source/api.rst | 8 +++++++ doc/source/whatsnew/v0.20.0.txt | 2 +- pandas/indexes/base.py | 38 +++++++++++++++++++++++++++++++-- pandas/indexes/category.py | 13 +---------- pandas/tests/indexes/common.py | 27 ++++++++++++++++++++++- pandas/tseries/base.py | 13 +---------- 6 files changed, 73 insertions(+), 28 deletions(-) diff --git a/doc/source/api.rst b/doc/source/api.rst index 92f290b5ee0a9..6c4a3cff5b4cf 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -1356,8 +1356,16 @@ Modifying and Computations Index.unique Index.nunique Index.value_counts + +Missing Values +~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: generated/ + Index.fillna Index.dropna + Index.isnull + Index.notnull Conversion ~~~~~~~~~~ diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index d76a78c68fb73..c6d757c6884d0 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -125,7 +125,7 @@ Other enhancements - ``pd.read_excel`` now preserves sheet order when using ``sheetname=None`` (:issue:`9930`) - Multiple offset aliases with decimal points are now supported (e.g. '0.5min' is parsed as '30s') (:issue:`8419`) - +- ``.isnull()`` and ``.notnull()`` have been added to ``Index`` object to make them more consistent with the ``Series`` API (:issue:`15300`) - ``pd.read_gbq`` method now allows query configuration preferences (:issue:`14742`) - New ``UnsortedIndexError`` (subclass of ``KeyError``) raised when indexing/slicing into an diff --git a/pandas/indexes/base.py b/pandas/indexes/base.py index bc2dce4e97e5b..dcd565ee5f0e9 100644 --- a/pandas/indexes/base.py +++ b/pandas/indexes/base.py @@ -564,8 +564,7 @@ def repeat(self, repeats, *args, **kwargs): nv.validate_repeat(args, kwargs) return self._shallow_copy(self._values.repeat(repeats)) - def where(self, cond, other=None): - """ + _index_shared_docs['where'] = """ .. versionadded:: 0.19.0 Return an Index of same shape as self and whose corresponding @@ -577,6 +576,9 @@ def where(self, cond, other=None): cond : boolean same length as self other : scalar, or array-like """ + + @Appender(_index_shared_docs['where']) + def where(self, cond, other=None): if other is None: other = self._na_value values = np.where(cond, self.values, other) @@ -1662,6 +1664,38 @@ def hasnans(self): else: return False + def isnull(self): + """ + Detect missing values + + .. versionadded:: 0.20.0 + + Returns + ------- + a boolean array of whether my values are null + + See also + -------- + pandas.isnull : pandas version + """ + return self._isnan + + def notnull(self): + """ + Reverse of isnull + + .. versionadded:: 0.20.0 + + Returns + ------- + a boolean array of whether my values are not null + + See also + -------- + pandas.notnull : pandas version + """ + return ~self.isnull() + def putmask(self, mask, value): """ return a new Index of the values set with the mask diff --git a/pandas/indexes/category.py b/pandas/indexes/category.py index e3ffa40f5f94a..e2e0fd056b111 100644 --- a/pandas/indexes/category.py +++ b/pandas/indexes/category.py @@ -332,19 +332,8 @@ def _can_reindex(self, indexer): """ always allow reindexing """ pass + @Appender(_index_shared_docs['where']) def where(self, cond, other=None): - """ - .. versionadded:: 0.19.0 - - Return an Index of same shape as self and whose corresponding - entries are from self where cond is True and otherwise are from - other. - - Parameters - ---------- - cond : boolean same length as self - other : scalar, or array-like - """ if other is None: other = self._na_value values = np.where(cond, self.values, other) diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index 5a482acf403cd..81ad0524807f3 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -7,7 +7,7 @@ from pandas import (Series, Index, Float64Index, Int64Index, UInt64Index, RangeIndex, MultiIndex, CategoricalIndex, DatetimeIndex, - TimedeltaIndex, PeriodIndex, notnull) + TimedeltaIndex, PeriodIndex, notnull, isnull) from pandas.types.common import needs_i8_conversion from pandas.util.testing import assertRaisesRegexp @@ -879,3 +879,28 @@ def test_fillna(self): expected[1] = True self.assert_numpy_array_equal(idx._isnan, expected) self.assertTrue(idx.hasnans) + + def test_nulls(self): + # this is really a smoke test for the methods + # as these are adequantely tested for function elsewhere + + for name, index in self.indices.items(): + if len(index) == 0: + self.assert_numpy_array_equal( + index.isnull(), np.array([], dtype=bool)) + elif isinstance(index, MultiIndex): + idx = index.copy() + msg = "isnull is not defined for MultiIndex" + with self.assertRaisesRegexp(NotImplementedError, msg): + idx.isnull() + else: + + if not index.hasnans: + self.assert_numpy_array_equal( + index.isnull(), np.zeros(len(index), dtype=bool)) + self.assert_numpy_array_equal( + index.notnull(), np.ones(len(index), dtype=bool)) + else: + result = isnull(index) + self.assert_numpy_array_equal(index.isnull(), result) + self.assert_numpy_array_equal(index.notnull(), ~result) diff --git a/pandas/tseries/base.py b/pandas/tseries/base.py index a8dd2238c2063..ee9234d6c8237 100644 --- a/pandas/tseries/base.py +++ b/pandas/tseries/base.py @@ -786,19 +786,8 @@ def repeat(self, repeats, *args, **kwargs): return self._shallow_copy(self.asi8.repeat(repeats), freq=freq) + @Appender(_index_shared_docs['where']) def where(self, cond, other=None): - """ - .. versionadded:: 0.19.0 - - Return an Index of same shape as self and whose corresponding - entries are from self where cond is True and otherwise are from - other. - - Parameters - ---------- - cond : boolean same length as self - other : scalar, or array-like - """ other = _ensure_datetimelike_to_i8(other) values = _ensure_datetimelike_to_i8(self) result = np.where(cond, values, other).astype('i8') From f742a66a9b1c5c7756ecfefb5d38c5fca14700b2 Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral Date: Mon, 6 Feb 2017 10:23:26 -0500 Subject: [PATCH 011/933] BUG: Fix downcast argument for DataFrame.fillna() closes #15277 Author: Albert Villanova del Moral Closes #15278 from albertvillanova/fix-15277 and squashes the following commits: 1b594a9 [Albert Villanova del Moral] Fix tab indentation 631a2dc [Albert Villanova del Moral] Add whatsnew note d691954 [Albert Villanova del Moral] BUG: Fix downcast argument for DataFrame.fillna() --- doc/source/whatsnew/v0.20.0.txt | 3 +-- pandas/core/generic.py | 2 +- pandas/tests/frame/test_missing.py | 14 ++++++++++++++ pandas/tests/series/test_missing.py | 14 ++++++++++++++ 4 files changed, 30 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index c6d757c6884d0..16caef57673f7 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -496,6 +496,7 @@ Bug Fixes - Bug in ``pd.read_csv()`` for the C engine where ``usecols`` were being indexed incorrectly with ``parse_dates`` (:issue:`14792`) - Incorrect dtyped ``Series`` was returned by comparison methods (e.g., ``lt``, ``gt``, ...) against a constant for an empty ``DataFrame`` (:issue:`15077`) - Bug in ``Series.dt.round`` inconsistent behaviour on NAT's with different arguments (:issue:`14940`) +- Bug in ``DataFrame.fillna()`` where the argument ``downcast`` was ignored when fillna value was of type ``dict`` (:issue:`15277`) - Bug in ``.read_json()`` for Python 2 where ``lines=True`` and contents contain non-ascii unicode characters (:issue:`15132`) @@ -509,7 +510,5 @@ Bug Fixes - - - Bug in ``DataFrame.boxplot`` where ``fontsize`` was not applied to the tick labels on both axes (:issue:`15108`) - Bug in ``Series.replace`` and ``DataFrame.replace`` which failed on empty replacement dicts (:issue:`15289`) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 8074b167ff176..bb2664a5b8d28 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3347,7 +3347,7 @@ def fillna(self, value=None, method=None, axis=None, inplace=False, if k not in result: continue obj = result[k] - obj.fillna(v, limit=limit, inplace=True) + obj.fillna(v, limit=limit, inplace=True, downcast=downcast) return result elif not is_list_like(value): new_data = self._data.fillna(value=value, limit=limit, diff --git a/pandas/tests/frame/test_missing.py b/pandas/tests/frame/test_missing.py index a8c9c72956463..eabdb79295c27 100644 --- a/pandas/tests/frame/test_missing.py +++ b/pandas/tests/frame/test_missing.py @@ -252,6 +252,20 @@ def test_fillna(self): result = df.fillna(value={'Date': df['Date2']}) assert_frame_equal(result, expected) + def test_fillna_downcast(self): + # GH 15277 + # infer int64 from float64 + df = pd.DataFrame({'a': [1., np.nan]}) + result = df.fillna(0, downcast='infer') + expected = pd.DataFrame({'a': [1, 0]}) + assert_frame_equal(result, expected) + + # infer int64 from float64 when fillna value is a dict + df = pd.DataFrame({'a': [1., np.nan]}) + result = df.fillna({'a': 0}, downcast='infer') + expected = pd.DataFrame({'a': [1, 0]}) + assert_frame_equal(result, expected) + def test_fillna_dtype_conversion(self): # make sure that fillna on an empty frame works df = DataFrame(index=["A", "B", "C"], columns=[1, 2, 3, 4, 5]) diff --git a/pandas/tests/series/test_missing.py b/pandas/tests/series/test_missing.py index 8cf0d190a95cc..8c877ade6fe98 100644 --- a/pandas/tests/series/test_missing.py +++ b/pandas/tests/series/test_missing.py @@ -273,6 +273,20 @@ def test_datetime64tz_fillna_round_issue(self): assert_series_equal(filled, expected) + def test_fillna_downcast(self): + # GH 15277 + # infer int64 from float64 + s = pd.Series([1., np.nan]) + result = s.fillna(0, downcast='infer') + expected = pd.Series([1, 0]) + assert_series_equal(result, expected) + + # infer int64 from float64 when fillna value is a dict + s = pd.Series([1., np.nan]) + result = s.fillna({1: 0}, downcast='infer') + expected = pd.Series([1, 0]) + assert_series_equal(result, expected) + def test_fillna_int(self): s = Series(np.random.randint(-100, 100, 50)) s.fillna(method='ffill', inplace=True) From f93714b793f170bd12f5c818752d2b862cd0045b Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 6 Feb 2017 16:55:38 +0100 Subject: [PATCH 012/933] DOC/CI: ensure correct pandas version (GH15311) (#15317) --- ci/build_docs.sh | 2 +- ci/requirements-3.5_DOC_BUILD.sh | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/ci/build_docs.sh b/ci/build_docs.sh index 4dc9a203f1978..5dc649a91c4f7 100755 --- a/ci/build_docs.sh +++ b/ci/build_docs.sh @@ -22,8 +22,8 @@ if [ x"$DOC_BUILD" != x"" ]; then echo "Will build docs" source activate pandas - conda install -n pandas -c r r rpy2 --yes + # install sudo deps time sudo apt-get $APT_ARGS install dvipng texlive-latex-base texlive-latex-extra mv "$TRAVIS_BUILD_DIR"/doc /tmp diff --git a/ci/requirements-3.5_DOC_BUILD.sh b/ci/requirements-3.5_DOC_BUILD.sh index ca18ad976d46d..25bc63acc96d1 100644 --- a/ci/requirements-3.5_DOC_BUILD.sh +++ b/ci/requirements-3.5_DOC_BUILD.sh @@ -2,6 +2,8 @@ source activate pandas -echo "install DOC_BUILD" +echo "[install DOC_BUILD deps]" conda install -n pandas -c conda-forge feather-format + +conda install -n pandas -c r r rpy2 --yes From 34cdfa48881118a6327fe0e599fb41467ef6ffcc Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Mon, 6 Feb 2017 18:03:26 -0500 Subject: [PATCH 013/933] CLN: reorg pandas/io/json to sub-dirs xref #14904 Author: Jeff Reback Closes #15322 from jreback/json and squashes the following commits: 0c2da60 [Jeff Reback] DOC: whatsnew update fa3deef [Jeff Reback] CLN: reorg pandas/io/json to sub-dirs --- doc/source/whatsnew/v0.20.0.txt | 3 + pandas/io/json/__init__.py | 4 + pandas/io/{ => json}/json.py | 246 +---------------- pandas/io/json/normalize.py | 248 ++++++++++++++++++ .../{test_json_norm.py => test_normalize.py} | 3 +- setup.py | 1 + 6 files changed, 259 insertions(+), 246 deletions(-) create mode 100644 pandas/io/json/__init__.py rename pandas/io/{ => json}/json.py (73%) create mode 100644 pandas/io/json/normalize.py rename pandas/io/tests/json/{test_json_norm.py => test_normalize.py} (99%) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 16caef57673f7..1a32498d53c23 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -96,6 +96,9 @@ support for bz2 compression in the python 2 c-engine improved (:issue:`14874`). .. _whatsnew_0200.enhancements.uint64_support: +UInt64 Support Improved +^^^^^^^^^^^^^^^^^^^^^^^ + Pandas has significantly improved support for operations involving unsigned, or purely non-negative, integers. Previously, handling these integers would result in improper rounding or data-type casting, leading to incorrect results. diff --git a/pandas/io/json/__init__.py b/pandas/io/json/__init__.py new file mode 100644 index 0000000000000..a9390a04cc2cd --- /dev/null +++ b/pandas/io/json/__init__.py @@ -0,0 +1,4 @@ +from .json import to_json, read_json, loads, dumps # noqa +from .normalize import json_normalize # noqa + +del json, normalize # noqa diff --git a/pandas/io/json.py b/pandas/io/json/json.py similarity index 73% rename from pandas/io/json.py rename to pandas/io/json/json.py index 767a2212d92da..d29f4a371dd4d 100644 --- a/pandas/io/json.py +++ b/pandas/io/json/json.py @@ -1,8 +1,6 @@ # pylint: disable-msg=E1101,W0613,W0603 import os -import copy -from collections import defaultdict import numpy as np import pandas.json as _json @@ -13,6 +11,7 @@ from pandas.io.common import get_filepath_or_buffer, _get_handle from pandas.core.common import AbstractMethodError from pandas.formats.printing import pprint_thing +from .normalize import _convert_to_line_delimits loads = _json.loads dumps = _json.dumps @@ -641,246 +640,3 @@ def is_ok(col): lambda col, c: self._try_convert_to_date(c), lambda col, c: ((self.keep_default_dates and is_ok(col)) or col in convert_dates)) - -# --------------------------------------------------------------------- -# JSON normalization routines - - -def _convert_to_line_delimits(s): - """Helper function that converts json lists to line delimited json.""" - - # Determine we have a JSON list to turn to lines otherwise just return the - # json object, only lists can - if not s[0] == '[' and s[-1] == ']': - return s - s = s[1:-1] - - from pandas.lib import convert_json_to_lines - return convert_json_to_lines(s) - - -def nested_to_record(ds, prefix="", level=0): - """a simplified json_normalize - - converts a nested dict into a flat dict ("record"), unlike json_normalize, - it does not attempt to extract a subset of the data. - - Parameters - ---------- - ds : dict or list of dicts - prefix: the prefix, optional, default: "" - level: the number of levels in the jason string, optional, default: 0 - - Returns - ------- - d - dict or list of dicts, matching `ds` - - Examples - -------- - - IN[52]: nested_to_record(dict(flat1=1,dict1=dict(c=1,d=2), - nested=dict(e=dict(c=1,d=2),d=2))) - Out[52]: - {'dict1.c': 1, - 'dict1.d': 2, - 'flat1': 1, - 'nested.d': 2, - 'nested.e.c': 1, - 'nested.e.d': 2} - """ - singleton = False - if isinstance(ds, dict): - ds = [ds] - singleton = True - - new_ds = [] - for d in ds: - - new_d = copy.deepcopy(d) - for k, v in d.items(): - # each key gets renamed with prefix - if not isinstance(k, compat.string_types): - k = str(k) - if level == 0: - newkey = k - else: - newkey = prefix + '.' + k - - # only dicts gets recurse-flattend - # only at level>1 do we rename the rest of the keys - if not isinstance(v, dict): - if level != 0: # so we skip copying for top level, common case - v = new_d.pop(k) - new_d[newkey] = v - continue - else: - v = new_d.pop(k) - new_d.update(nested_to_record(v, newkey, level + 1)) - new_ds.append(new_d) - - if singleton: - return new_ds[0] - return new_ds - - -def json_normalize(data, record_path=None, meta=None, - meta_prefix=None, - record_prefix=None, - errors='raise'): - - """ - "Normalize" semi-structured JSON data into a flat table - - Parameters - ---------- - data : dict or list of dicts - Unserialized JSON objects - record_path : string or list of strings, default None - Path in each object to list of records. If not passed, data will be - assumed to be an array of records - meta : list of paths (string or list of strings), default None - Fields to use as metadata for each record in resulting table - record_prefix : string, default None - If True, prefix records with dotted (?) path, e.g. foo.bar.field if - path to records is ['foo', 'bar'] - meta_prefix : string, default None - errors : {'raise', 'ignore'}, default 'raise' - - * ignore : will ignore KeyError if keys listed in meta are not - always present - * raise : will raise KeyError if keys listed in meta are not - always present - - .. versionadded:: 0.20.0 - - Returns - ------- - frame : DataFrame - - Examples - -------- - - >>> data = [{'state': 'Florida', - ... 'shortname': 'FL', - ... 'info': { - ... 'governor': 'Rick Scott' - ... }, - ... 'counties': [{'name': 'Dade', 'population': 12345}, - ... {'name': 'Broward', 'population': 40000}, - ... {'name': 'Palm Beach', 'population': 60000}]}, - ... {'state': 'Ohio', - ... 'shortname': 'OH', - ... 'info': { - ... 'governor': 'John Kasich' - ... }, - ... 'counties': [{'name': 'Summit', 'population': 1234}, - ... {'name': 'Cuyahoga', 'population': 1337}]}] - >>> from pandas.io.json import json_normalize - >>> result = json_normalize(data, 'counties', ['state', 'shortname', - ... ['info', 'governor']]) - >>> result - name population info.governor state shortname - 0 Dade 12345 Rick Scott Florida FL - 1 Broward 40000 Rick Scott Florida FL - 2 Palm Beach 60000 Rick Scott Florida FL - 3 Summit 1234 John Kasich Ohio OH - 4 Cuyahoga 1337 John Kasich Ohio OH - - """ - def _pull_field(js, spec): - result = js - if isinstance(spec, list): - for field in spec: - result = result[field] - else: - result = result[spec] - - return result - - # A bit of a hackjob - if isinstance(data, dict): - data = [data] - - if record_path is None: - if any([isinstance(x, dict) for x in compat.itervalues(data[0])]): - # naive normalization, this is idempotent for flat records - # and potentially will inflate the data considerably for - # deeply nested structures: - # {VeryLong: { b: 1,c:2}} -> {VeryLong.b:1 ,VeryLong.c:@} - # - # TODO: handle record value which are lists, at least error - # reasonably - data = nested_to_record(data) - return DataFrame(data) - elif not isinstance(record_path, list): - record_path = [record_path] - - if meta is None: - meta = [] - elif not isinstance(meta, list): - meta = [meta] - - for i, x in enumerate(meta): - if not isinstance(x, list): - meta[i] = [x] - - # Disastrously inefficient for now - records = [] - lengths = [] - - meta_vals = defaultdict(list) - meta_keys = ['.'.join(val) for val in meta] - - def _recursive_extract(data, path, seen_meta, level=0): - if len(path) > 1: - for obj in data: - for val, key in zip(meta, meta_keys): - if level + 1 == len(val): - seen_meta[key] = _pull_field(obj, val[-1]) - - _recursive_extract(obj[path[0]], path[1:], - seen_meta, level=level + 1) - else: - for obj in data: - recs = _pull_field(obj, path[0]) - - # For repeating the metadata later - lengths.append(len(recs)) - - for val, key in zip(meta, meta_keys): - if level + 1 > len(val): - meta_val = seen_meta[key] - else: - try: - meta_val = _pull_field(obj, val[level:]) - except KeyError as e: - if errors == 'ignore': - meta_val = np.nan - else: - raise \ - KeyError("Try running with " - "errors='ignore' as key " - "%s is not always present", e) - meta_vals[key].append(meta_val) - - records.extend(recs) - - _recursive_extract(data, record_path, {}, level=0) - - result = DataFrame(records) - - if record_prefix is not None: - result.rename(columns=lambda x: record_prefix + x, inplace=True) - - # Data types, a problem - for k, v in compat.iteritems(meta_vals): - if meta_prefix is not None: - k = meta_prefix + k - - if k in result: - raise ValueError('Conflicting metadata name %s, ' - 'need distinguishing prefix ' % k) - - result[k] = np.array(v).repeat(lengths) - - return result diff --git a/pandas/io/json/normalize.py b/pandas/io/json/normalize.py new file mode 100644 index 0000000000000..aa80954233682 --- /dev/null +++ b/pandas/io/json/normalize.py @@ -0,0 +1,248 @@ +# --------------------------------------------------------------------- +# JSON normalization routines + +import copy +from collections import defaultdict +import numpy as np + +from pandas.lib import convert_json_to_lines +from pandas import compat, DataFrame + + +def _convert_to_line_delimits(s): + """Helper function that converts json lists to line delimited json.""" + + # Determine we have a JSON list to turn to lines otherwise just return the + # json object, only lists can + if not s[0] == '[' and s[-1] == ']': + return s + s = s[1:-1] + + return convert_json_to_lines(s) + + +def nested_to_record(ds, prefix="", level=0): + """a simplified json_normalize + + converts a nested dict into a flat dict ("record"), unlike json_normalize, + it does not attempt to extract a subset of the data. + + Parameters + ---------- + ds : dict or list of dicts + prefix: the prefix, optional, default: "" + level: the number of levels in the jason string, optional, default: 0 + + Returns + ------- + d - dict or list of dicts, matching `ds` + + Examples + -------- + + IN[52]: nested_to_record(dict(flat1=1,dict1=dict(c=1,d=2), + nested=dict(e=dict(c=1,d=2),d=2))) + Out[52]: + {'dict1.c': 1, + 'dict1.d': 2, + 'flat1': 1, + 'nested.d': 2, + 'nested.e.c': 1, + 'nested.e.d': 2} + """ + singleton = False + if isinstance(ds, dict): + ds = [ds] + singleton = True + + new_ds = [] + for d in ds: + + new_d = copy.deepcopy(d) + for k, v in d.items(): + # each key gets renamed with prefix + if not isinstance(k, compat.string_types): + k = str(k) + if level == 0: + newkey = k + else: + newkey = prefix + '.' + k + + # only dicts gets recurse-flattend + # only at level>1 do we rename the rest of the keys + if not isinstance(v, dict): + if level != 0: # so we skip copying for top level, common case + v = new_d.pop(k) + new_d[newkey] = v + continue + else: + v = new_d.pop(k) + new_d.update(nested_to_record(v, newkey, level + 1)) + new_ds.append(new_d) + + if singleton: + return new_ds[0] + return new_ds + + +def json_normalize(data, record_path=None, meta=None, + meta_prefix=None, + record_prefix=None, + errors='raise'): + + """ + "Normalize" semi-structured JSON data into a flat table + + Parameters + ---------- + data : dict or list of dicts + Unserialized JSON objects + record_path : string or list of strings, default None + Path in each object to list of records. If not passed, data will be + assumed to be an array of records + meta : list of paths (string or list of strings), default None + Fields to use as metadata for each record in resulting table + record_prefix : string, default None + If True, prefix records with dotted (?) path, e.g. foo.bar.field if + path to records is ['foo', 'bar'] + meta_prefix : string, default None + errors : {'raise', 'ignore'}, default 'raise' + + * ignore : will ignore KeyError if keys listed in meta are not + always present + * raise : will raise KeyError if keys listed in meta are not + always present + + .. versionadded:: 0.20.0 + + Returns + ------- + frame : DataFrame + + Examples + -------- + + >>> data = [{'state': 'Florida', + ... 'shortname': 'FL', + ... 'info': { + ... 'governor': 'Rick Scott' + ... }, + ... 'counties': [{'name': 'Dade', 'population': 12345}, + ... {'name': 'Broward', 'population': 40000}, + ... {'name': 'Palm Beach', 'population': 60000}]}, + ... {'state': 'Ohio', + ... 'shortname': 'OH', + ... 'info': { + ... 'governor': 'John Kasich' + ... }, + ... 'counties': [{'name': 'Summit', 'population': 1234}, + ... {'name': 'Cuyahoga', 'population': 1337}]}] + >>> from pandas.io.json import json_normalize + >>> result = json_normalize(data, 'counties', ['state', 'shortname', + ... ['info', 'governor']]) + >>> result + name population info.governor state shortname + 0 Dade 12345 Rick Scott Florida FL + 1 Broward 40000 Rick Scott Florida FL + 2 Palm Beach 60000 Rick Scott Florida FL + 3 Summit 1234 John Kasich Ohio OH + 4 Cuyahoga 1337 John Kasich Ohio OH + + """ + def _pull_field(js, spec): + result = js + if isinstance(spec, list): + for field in spec: + result = result[field] + else: + result = result[spec] + + return result + + # A bit of a hackjob + if isinstance(data, dict): + data = [data] + + if record_path is None: + if any([isinstance(x, dict) for x in compat.itervalues(data[0])]): + # naive normalization, this is idempotent for flat records + # and potentially will inflate the data considerably for + # deeply nested structures: + # {VeryLong: { b: 1,c:2}} -> {VeryLong.b:1 ,VeryLong.c:@} + # + # TODO: handle record value which are lists, at least error + # reasonably + data = nested_to_record(data) + return DataFrame(data) + elif not isinstance(record_path, list): + record_path = [record_path] + + if meta is None: + meta = [] + elif not isinstance(meta, list): + meta = [meta] + + for i, x in enumerate(meta): + if not isinstance(x, list): + meta[i] = [x] + + # Disastrously inefficient for now + records = [] + lengths = [] + + meta_vals = defaultdict(list) + meta_keys = ['.'.join(val) for val in meta] + + def _recursive_extract(data, path, seen_meta, level=0): + if len(path) > 1: + for obj in data: + for val, key in zip(meta, meta_keys): + if level + 1 == len(val): + seen_meta[key] = _pull_field(obj, val[-1]) + + _recursive_extract(obj[path[0]], path[1:], + seen_meta, level=level + 1) + else: + for obj in data: + recs = _pull_field(obj, path[0]) + + # For repeating the metadata later + lengths.append(len(recs)) + + for val, key in zip(meta, meta_keys): + if level + 1 > len(val): + meta_val = seen_meta[key] + else: + try: + meta_val = _pull_field(obj, val[level:]) + except KeyError as e: + if errors == 'ignore': + meta_val = np.nan + else: + raise \ + KeyError("Try running with " + "errors='ignore' as key " + "%s is not always present", e) + meta_vals[key].append(meta_val) + + records.extend(recs) + + _recursive_extract(data, record_path, {}, level=0) + + result = DataFrame(records) + + if record_prefix is not None: + result.rename(columns=lambda x: record_prefix + x, inplace=True) + + # Data types, a problem + for k, v in compat.iteritems(meta_vals): + if meta_prefix is not None: + k = meta_prefix + k + + if k in result: + raise ValueError('Conflicting metadata name %s, ' + 'need distinguishing prefix ' % k) + + result[k] = np.array(v).repeat(lengths) + + return result diff --git a/pandas/io/tests/json/test_json_norm.py b/pandas/io/tests/json/test_normalize.py similarity index 99% rename from pandas/io/tests/json/test_json_norm.py rename to pandas/io/tests/json/test_normalize.py index 36110898448ea..e5aba43648d0c 100644 --- a/pandas/io/tests/json/test_json_norm.py +++ b/pandas/io/tests/json/test_normalize.py @@ -7,7 +7,8 @@ import pandas.util.testing as tm from pandas import compat -from pandas.io.json import json_normalize, nested_to_record +from pandas.io.json import json_normalize +from pandas.io.json.normalize import nested_to_record def _assert_equal_data(left, right): diff --git a/setup.py b/setup.py index 93a044bc3cc7d..4d6bb76fd6b7c 100755 --- a/setup.py +++ b/setup.py @@ -631,6 +631,7 @@ def pxd(name): 'pandas.core', 'pandas.indexes', 'pandas.io', + 'pandas.io.json', 'pandas.io.sas', 'pandas.formats', 'pandas.sparse', From 6d2293f7399390800ec00b2cf78afa3b9043bef9 Mon Sep 17 00:00:00 2001 From: Nicholas Ver Halen Date: Tue, 7 Feb 2017 08:47:18 -0500 Subject: [PATCH 014/933] BUG: bug in passing 'on=' keyword for groupby(..).resample() closes #15021 Author: Nicholas Ver Halen Closes #15326 from verhalenn/issue15021 and squashes the following commits: 9fc3b4f [Nicholas Ver Halen] Updated the whatsnew for issue 15021 ec1f316 [Nicholas Ver Halen] Created a test for GH 15021 b8b10b0 [Nicholas Ver Halen] Added the on arg to resample on a grouped dataframe. --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/tseries/resample.py | 4 ++++ pandas/tseries/tests/test_resample.py | 14 ++++++++++++++ 3 files changed, 19 insertions(+) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 1a32498d53c23..3f6c06e20b546 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -434,6 +434,7 @@ Bug Fixes - Bug in ``pd.read_csv()`` in which missing data was being improperly handled with ``usecols`` (:issue:`6710`) - Bug in ``pd.read_csv()`` in which a file containing a row with many columns followed by rows with fewer columns would cause a crash (:issue:`14125`) - Bug in ``pd.tools.hashing.hash_pandas_object()`` in which hashing of categoricals depended on the ordering of categories, instead of just their values. (:issue:`15143`) +- Bug in ``.groupby(..).resample()`` when passed the ``on=`` kwarg. (:issue:`15021`) - Bug in ``DataFrame.loc`` with indexing a ``MultiIndex`` with a ``Series`` indexer (:issue:`14730`) diff --git a/pandas/tseries/resample.py b/pandas/tseries/resample.py index e93e5637099c1..5692d6c5cabde 100755 --- a/pandas/tseries/resample.py +++ b/pandas/tseries/resample.py @@ -975,6 +975,10 @@ def resample(obj, kind=None, **kwds): def get_resampler_for_grouping(groupby, rule, how=None, fill_method=None, limit=None, kind=None, **kwargs): """ return our appropriate resampler when grouping as well """ + + # .resample uses 'on' similar to how .groupby uses 'key' + kwargs['key'] = kwargs.pop('on', None) + tg = TimeGrouper(freq=rule, **kwargs) resampler = tg._get_resampler(groupby.obj, kind=kind) r = resampler._get_resampler_for_grouping(groupby=groupby) diff --git a/pandas/tseries/tests/test_resample.py b/pandas/tseries/tests/test_resample.py index 56953541265a6..c40f930fbd094 100755 --- a/pandas/tseries/tests/test_resample.py +++ b/pandas/tseries/tests/test_resample.py @@ -217,6 +217,20 @@ def test_groupby_resample_api(self): lambda x: x.resample('1D').ffill())[['val']] assert_frame_equal(result, expected) + def test_groupby_resample_on_api(self): + + # GH 15021 + # .groupby(...).resample(on=...) results in an unexpected + # keyword warning. + df = pd.DataFrame({'key': ['A', 'B'] * 5, + 'dates': pd.date_range('2016-01-01', periods=10), + 'values': np.random.randn(10)}) + + expected = df.set_index('dates').groupby('key').resample('D').mean() + + result = df.groupby('key').resample('D', on='dates').mean() + assert_frame_equal(result, expected) + def test_plot_api(self): tm._skip_if_no_mpl() From 8d574508d458072cc85488d1b432a8fa8813545a Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 7 Feb 2017 11:13:29 -0500 Subject: [PATCH 015/933] TST: remove __main__ from all test files (#15330) --- pandas/api/tests/test_api.py | 5 ---- pandas/computation/tests/test_compat.py | 5 ---- pandas/computation/tests/test_eval.py | 5 ---- pandas/core/frame.py | 5 ---- pandas/formats/format.py | 14 ----------- pandas/io/tests/json/test_normalize.py | 7 ------ pandas/io/tests/json/test_pandas.py | 5 ---- pandas/io/tests/json/test_ujson.py | 5 ---- pandas/io/tests/parser/test_network.py | 4 --- pandas/io/tests/parser/test_parsers.py | 5 ---- pandas/io/tests/parser/test_textreader.py | 5 ---- pandas/io/tests/parser/test_unsupported.py | 6 ----- pandas/io/tests/test_date_converters.py | 6 ----- pandas/io/tests/test_excel.py | 5 ---- pandas/io/tests/test_feather.py | 5 ---- pandas/io/tests/test_gbq.py | 4 --- pandas/io/tests/test_html.py | 4 --- pandas/io/tests/test_pickle.py | 6 ----- pandas/io/tests/test_pytables.py | 6 ----- pandas/io/tests/test_s3.py | 6 +---- pandas/io/tests/test_sql.py | 5 ---- pandas/io/tests/test_stata.py | 5 ---- pandas/sparse/tests/test_array.py | 6 ----- pandas/sparse/tests/test_combine_concat.py | 7 ------ pandas/sparse/tests/test_frame.py | 5 ---- pandas/sparse/tests/test_libsparse.py | 10 ++------ pandas/sparse/tests/test_list.py | 6 ----- pandas/sparse/tests/test_series.py | 6 ----- pandas/stats/tests/test_fama_macbeth.py | 5 ---- pandas/stats/tests/test_math.py | 4 --- pandas/stats/tests/test_ols.py | 6 ----- pandas/stats/tests/test_var.py | 5 ---- pandas/tests/formats/test_format.py | 5 ---- pandas/tests/formats/test_printing.py | 6 ----- pandas/tests/frame/test_analytics.py | 4 --- pandas/tests/frame/test_asof.py | 6 ----- pandas/tests/frame/test_constructors.py | 8 ------ pandas/tests/frame/test_indexing.py | 6 ----- pandas/tests/frame/test_misc_api.py | 6 ----- pandas/tests/frame/test_missing.py | 7 ------ pandas/tests/frame/test_operators.py | 5 ---- pandas/tests/frame/test_query_eval.py | 5 ---- pandas/tests/frame/test_timeseries.py | 6 ----- pandas/tests/frame/test_to_csv.py | 6 ----- pandas/tests/groupby/test_aggregate.py | 6 ----- pandas/tests/groupby/test_categorical.py | 7 ------ pandas/tests/groupby/test_filters.py | 7 ------ pandas/tests/groupby/test_groupby.py | 5 ---- pandas/tests/indexes/test_base.py | 6 ----- pandas/tests/indexes/test_numeric.py | 6 ----- pandas/tests/indexing/test_callable.py | 6 ----- pandas/tests/indexing/test_indexing.py | 5 ---- pandas/tests/plotting/test_boxplot_method.py | 5 ---- pandas/tests/plotting/test_datetimelike.py | 10 ++------ pandas/tests/plotting/test_frame.py | 10 ++------ pandas/tests/plotting/test_groupby.py | 11 ++------ pandas/tests/plotting/test_hist_method.py | 10 +------- pandas/tests/plotting/test_misc.py | 9 +------ pandas/tests/plotting/test_series.py | 12 +++------ pandas/tests/series/test_asof.py | 7 ------ pandas/tests/series/test_indexing.py | 6 ----- pandas/tests/series/test_missing.py | 7 ------ pandas/tests/series/test_timeseries.py | 6 ----- pandas/tests/test_algos.py | 6 ----- pandas/tests/test_base.py | 10 +------- pandas/tests/test_categorical.py | 7 ------ pandas/tests/test_common.py | 6 ----- pandas/tests/test_expressions.py | 10 ++------ pandas/tests/test_generic.py | 4 --- pandas/tests/test_internals.py | 5 ---- pandas/tests/test_join.py | 6 ----- pandas/tests/test_lib.py | 7 ------ pandas/tests/test_multilevel.py | 5 ---- pandas/tests/test_nanops.py | 6 ----- pandas/tests/test_panel.py | 5 ---- pandas/tests/test_panel4d.py | 5 ---- pandas/tests/test_panelnd.py | 6 ----- pandas/tests/test_reshape.py | 6 ----- pandas/tests/test_stats.py | 6 ----- pandas/tests/test_strings.py | 7 ------ pandas/tests/test_take.py | 6 ----- pandas/tests/test_testing.py | 5 ---- pandas/tests/test_util.py | 7 ------ pandas/tests/types/test_cast.py | 7 ------ pandas/tests/types/test_common.py | 6 ----- pandas/tests/types/test_concat.py | 6 ----- pandas/tests/types/test_dtypes.py | 6 ----- pandas/tests/types/test_generic.py | 6 ----- pandas/tests/types/test_inference.py | 6 ----- pandas/tests/types/test_io.py | 7 ------ pandas/tests/types/test_missing.py | 6 ----- pandas/tools/plotting.py | 25 ------------------- pandas/tools/tests/test_concat.py | 7 ------ pandas/tools/tests/test_join.py | 7 ------ pandas/tools/tests/test_merge.py | 7 ------ pandas/tools/tests/test_merge_asof.py | 6 ----- pandas/tools/tests/test_merge_ordered.py | 6 ----- pandas/tools/tests/test_pivot.py | 6 ----- pandas/tools/tests/test_tile.py | 6 ----- pandas/tools/tests/test_util.py | 5 ---- pandas/tseries/tests/test_base.py | 7 ------ pandas/tseries/tests/test_converter.py | 9 +------ pandas/tseries/tests/test_daterange.py | 6 ----- pandas/tseries/tests/test_holiday.py | 6 ----- pandas/tseries/tests/test_offsets.py | 10 +++----- pandas/tseries/tests/test_period.py | 6 ----- pandas/tseries/tests/test_resample.py | 6 ----- pandas/tseries/tests/test_timedeltas.py | 6 ----- .../tseries/tests/test_timeseries_legacy.py | 5 ---- pandas/tseries/tests/test_timezones.py | 7 ------ pandas/tseries/tests/test_tslib.py | 6 ----- pandas/tseries/tests/test_util.py | 6 ----- 112 files changed, 21 insertions(+), 703 deletions(-) diff --git a/pandas/api/tests/test_api.py b/pandas/api/tests/test_api.py index 410d70c65404f..f925fd792f9ca 100644 --- a/pandas/api/tests/test_api.py +++ b/pandas/api/tests/test_api.py @@ -227,8 +227,3 @@ def test_deprecation_access_obj(self): with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): pd.datetools.monthEnd - -if __name__ == '__main__': - import nose - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/computation/tests/test_compat.py b/pandas/computation/tests/test_compat.py index 8e8924379f153..900dd2c28b4c5 100644 --- a/pandas/computation/tests/test_compat.py +++ b/pandas/computation/tests/test_compat.py @@ -61,8 +61,3 @@ def testit(): testit() else: testit() - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/computation/tests/test_eval.py b/pandas/computation/tests/test_eval.py index 3a446bfc36c21..dbac72c619a52 100644 --- a/pandas/computation/tests/test_eval.py +++ b/pandas/computation/tests/test_eval.py @@ -1977,8 +1977,3 @@ def test_validate_bool_args(self): for value in invalid_values: with self.assertRaises(ValueError): pd.eval("2+2", inplace=value) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index cf306034001db..cc81c66100a6f 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5762,8 +5762,3 @@ def boxplot(self, column=None, by=None, ax=None, fontsize=None, rot=0, ops.add_flex_arithmetic_methods(DataFrame, **ops.frame_flex_funcs) ops.add_special_arithmetic_methods(DataFrame, **ops.frame_special_funcs) - -if __name__ == '__main__': - import nose - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/formats/format.py b/pandas/formats/format.py index adfb54c02d926..3bac7d2821760 100644 --- a/pandas/formats/format.py +++ b/pandas/formats/format.py @@ -2679,17 +2679,3 @@ def _binify(cols, line_width): bins.append(len(cols)) return bins - - -if __name__ == '__main__': - arr = np.array([746.03, 0.00, 5620.00, 1592.36]) - # arr = np.array([11111111.1, 1.55]) - # arr = [314200.0034, 1.4125678] - arr = np.array( - [327763.3119, 345040.9076, 364460.9915, 398226.8688, 383800.5172, - 433442.9262, 539415.0568, 568590.4108, 599502.4276, 620921.8593, - 620898.5294, 552427.1093, 555221.2193, 519639.7059, 388175.7, - 379199.5854, 614898.25, 504833.3333, 560600., 941214.2857, 1134250., - 1219550., 855736.85, 1042615.4286, 722621.3043, 698167.1818, 803750.]) - fmt = FloatArrayFormatter(arr, digits=7) - print(fmt.get_result()) diff --git a/pandas/io/tests/json/test_normalize.py b/pandas/io/tests/json/test_normalize.py index e5aba43648d0c..c60b81ffe504d 100644 --- a/pandas/io/tests/json/test_normalize.py +++ b/pandas/io/tests/json/test_normalize.py @@ -1,5 +1,3 @@ -import nose - from pandas import DataFrame import numpy as np import json @@ -283,8 +281,3 @@ def test_json_normalize_errors(self): ['general', 'trade_version']], errors='raise' ) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', - '--pdb-failure', '-s'], exit=False) diff --git a/pandas/io/tests/json/test_pandas.py b/pandas/io/tests/json/test_pandas.py index 345d181a0e53a..ee5039c38b182 100644 --- a/pandas/io/tests/json/test_pandas.py +++ b/pandas/io/tests/json/test_pandas.py @@ -1044,8 +1044,3 @@ def roundtrip(s, encoding='latin-1'): for s in examples: roundtrip(s) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', - '--pdb-failure', '-s'], exit=False) diff --git a/pandas/io/tests/json/test_ujson.py b/pandas/io/tests/json/test_ujson.py index 704023bd847b7..3da61b7696fdc 100644 --- a/pandas/io/tests/json/test_ujson.py +++ b/pandas/io/tests/json/test_ujson.py @@ -1611,8 +1611,3 @@ def test_encodeSet(self): def _clean_dict(d): return dict((str(k), v) for k, v in compat.iteritems(d)) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/io/tests/parser/test_network.py b/pandas/io/tests/parser/test_network.py index d84c2ae3beb0c..e06f94c780c8b 100644 --- a/pandas/io/tests/parser/test_network.py +++ b/pandas/io/tests/parser/test_network.py @@ -182,7 +182,3 @@ def test_s3_fails(self): # It's irrelevant here that this isn't actually a table. with tm.assertRaises(IOError): read_csv('s3://cant_get_it/') - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/io/tests/parser/test_parsers.py b/pandas/io/tests/parser/test_parsers.py index a90f546d37fc8..93b5fdcffed4c 100644 --- a/pandas/io/tests/parser/test_parsers.py +++ b/pandas/io/tests/parser/test_parsers.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- import os -import nose import pandas.util.testing as tm @@ -99,7 +98,3 @@ def read_table(self, *args, **kwds): kwds = kwds.copy() kwds['engine'] = self.engine return read_table(*args, **kwds) - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/io/tests/parser/test_textreader.py b/pandas/io/tests/parser/test_textreader.py index 98cb09cd85480..0e91ca806e8fe 100644 --- a/pandas/io/tests/parser/test_textreader.py +++ b/pandas/io/tests/parser/test_textreader.py @@ -10,7 +10,6 @@ import os import sys -import nose from numpy import nan import numpy as np @@ -402,7 +401,3 @@ def test_empty_csv_input(self): def assert_array_dicts_equal(left, right): for k, v in compat.iteritems(left): assert(np.array_equal(v, right[k])) - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/io/tests/parser/test_unsupported.py b/pandas/io/tests/parser/test_unsupported.py index 4d93df16a0279..e941c9186cd6a 100644 --- a/pandas/io/tests/parser/test_unsupported.py +++ b/pandas/io/tests/parser/test_unsupported.py @@ -9,8 +9,6 @@ test suite as new feature support is added to the parsers. """ -import nose - import pandas.io.parsers as parsers import pandas.util.testing as tm @@ -142,7 +140,3 @@ def test_deprecated_args(self): kwargs = {arg: non_default_val} read_csv(StringIO(data), engine=engine, **kwargs) - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/io/tests/test_date_converters.py b/pandas/io/tests/test_date_converters.py index 99abbacb604fa..5b54925c65fbd 100644 --- a/pandas/io/tests/test_date_converters.py +++ b/pandas/io/tests/test_date_converters.py @@ -1,8 +1,6 @@ from pandas.compat import StringIO from datetime import date, datetime -import nose - import numpy as np from pandas import DataFrame, MultiIndex @@ -150,7 +148,3 @@ def test_parse_date_column_with_empty_string(self): [621, ' ']] expected = DataFrame(expected_data, columns=['case', 'opdate']) assert_frame_equal(result, expected) - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/io/tests/test_excel.py b/pandas/io/tests/test_excel.py index 12aecfd50c3a6..2791e397d5b86 100644 --- a/pandas/io/tests/test_excel.py +++ b/pandas/io/tests/test_excel.py @@ -2325,8 +2325,3 @@ def check_called(func): check_called(lambda: panel.to_excel('something.test')) check_called(lambda: df.to_excel('something.xlsx')) check_called(lambda: df.to_excel('something.xls', engine='dummy')) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/io/tests/test_feather.py b/pandas/io/tests/test_feather.py index b8b85d7dbbece..dcb057ec30004 100644 --- a/pandas/io/tests/test_feather.py +++ b/pandas/io/tests/test_feather.py @@ -116,8 +116,3 @@ def test_write_with_index(self): df.index = [0, 1, 2] df.columns = pd.MultiIndex.from_tuples([('a', 1), ('a', 2), ('b', 1)]), self.check_error_on_write(df, ValueError) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/io/tests/test_gbq.py b/pandas/io/tests/test_gbq.py index 8a414dcd3ba4f..ac481a44de5e8 100644 --- a/pandas/io/tests/test_gbq.py +++ b/pandas/io/tests/test_gbq.py @@ -1224,7 +1224,3 @@ def test_upload_data_as_service_account_with_key_contents(self): project_id=_get_project_id(), private_key=_get_private_key_contents()) self.assertEqual(result['NUM_ROWS'][0], test_size) - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/io/tests/test_html.py b/pandas/io/tests/test_html.py index 9ac8def3a074d..356adb92829c6 100644 --- a/pandas/io/tests/test_html.py +++ b/pandas/io/tests/test_html.py @@ -918,7 +918,3 @@ def test_same_ordering(): dfs_lxml = read_html(filename, index_col=0, flavor=['lxml']) dfs_bs4 = read_html(filename, index_col=0, flavor=['bs4']) assert_framelist_equal(dfs_lxml, dfs_bs4) - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/io/tests/test_pickle.py b/pandas/io/tests/test_pickle.py index a49f50b1bcb9f..b5c316b326b8d 100644 --- a/pandas/io/tests/test_pickle.py +++ b/pandas/io/tests/test_pickle.py @@ -283,9 +283,3 @@ def test_pickle_v0_15_2(self): # with open(pickle_path, 'wb') as f: pickle.dump(cat, f) # tm.assert_categorical_equal(cat, pd.read_pickle(pickle_path)) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - # '--with-coverage', '--cover-package=pandas.core'], - exit=False) diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py index 40db10c42d5a7..f4f03856f94e2 100644 --- a/pandas/io/tests/test_pytables.py +++ b/pandas/io/tests/test_pytables.py @@ -5516,9 +5516,3 @@ def _test_sort(obj): return obj.reindex(major=sorted(obj.major_axis)) else: raise ValueError('type not supported here') - - -if __name__ == '__main__': - import nose - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/io/tests/test_s3.py b/pandas/io/tests/test_s3.py index 8058698a906ea..2983fa647445c 100644 --- a/pandas/io/tests/test_s3.py +++ b/pandas/io/tests/test_s3.py @@ -1,14 +1,10 @@ -import nose from pandas.util import testing as tm from pandas.io.common import _is_s3_url class TestS3URL(tm.TestCase): + def test_is_s3_url(self): self.assertTrue(_is_s3_url("s3://pandas/somethingelse.com")) self.assertFalse(_is_s3_url("s4://pandas/somethingelse.com")) - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/io/tests/test_sql.py b/pandas/io/tests/test_sql.py index 9e639f7ef6057..4bcde764001c1 100644 --- a/pandas/io/tests/test_sql.py +++ b/pandas/io/tests/test_sql.py @@ -2658,8 +2658,3 @@ def clean_up(test_table_to_drop): self.assertEqual(tquery(sql_select, con=self.conn), [(1, 'A'), (2, 'B'), (3, 'C'), (4, 'D'), (5, 'E')]) clean_up(table_name) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/io/tests/test_stata.py b/pandas/io/tests/test_stata.py index 8cfd5d98fe05f..fcb935925e61f 100644 --- a/pandas/io/tests/test_stata.py +++ b/pandas/io/tests/test_stata.py @@ -1276,8 +1276,3 @@ def test_out_of_range_float(self): original.to_stata(path) tm.assertTrue('ColumnTooBig' in cm.exception) tm.assertTrue('infinity' in cm.exception) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/sparse/tests/test_array.py b/pandas/sparse/tests/test_array.py index 592926f8e821d..55f292a8a231a 100644 --- a/pandas/sparse/tests/test_array.py +++ b/pandas/sparse/tests/test_array.py @@ -810,9 +810,3 @@ def test_ufunc_args(self): sparse = SparseArray([1, -1, 0, -2], fill_value=0) result = SparseArray([2, 0, 1, -1], fill_value=1) tm.assert_sp_array_equal(np.add(sparse, 1), result) - - -if __name__ == '__main__': - import nose - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/sparse/tests/test_combine_concat.py b/pandas/sparse/tests/test_combine_concat.py index fcdc6d9580dd5..5240d592810ad 100644 --- a/pandas/sparse/tests/test_combine_concat.py +++ b/pandas/sparse/tests/test_combine_concat.py @@ -1,6 +1,5 @@ # pylint: disable-msg=E1101,W0612 -import nose # noqa import numpy as np import pandas as pd import pandas.util.testing as tm @@ -356,9 +355,3 @@ def test_concat_sparse_dense(self): exp = pd.concat([self.dense1, self.dense3], axis=1) self.assertIsInstance(res, pd.SparseDataFrame) tm.assert_frame_equal(res, exp) - - -if __name__ == '__main__': - import nose # noqa - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/sparse/tests/test_frame.py b/pandas/sparse/tests/test_frame.py index 23bb827974c61..e26c0ed1afe58 100644 --- a/pandas/sparse/tests/test_frame.py +++ b/pandas/sparse/tests/test_frame.py @@ -1193,8 +1193,3 @@ def test_numpy_func_call(self): 'std', 'min', 'max'] for func in funcs: getattr(np, func)(self.frame) - -if __name__ == '__main__': - import nose - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/sparse/tests/test_libsparse.py b/pandas/sparse/tests/test_libsparse.py index c289b4a1b204f..b3aa3368e9455 100644 --- a/pandas/sparse/tests/test_libsparse.py +++ b/pandas/sparse/tests/test_libsparse.py @@ -1,6 +1,6 @@ from pandas import Series -import nose # noqa +import nose import numpy as np import operator import pandas.util.testing as tm @@ -196,7 +196,7 @@ def _check_correct(a, b, expected): assert (result.equals(expected)) def _check_length_exc(a, longer): - nose.tools.assert_raises(Exception, a.intersect, longer) + self.assertRaises(Exception, a.intersect, longer) def _check_case(xloc, xlen, yloc, ylen, eloc, elen): xindex = BlockIndex(TEST_LENGTH, xloc, xlen) @@ -585,9 +585,3 @@ def f(self): g = make_optestf(op) setattr(TestSparseOperators, g.__name__, g) del g - - -if __name__ == '__main__': - import nose # noqa - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/sparse/tests/test_list.py b/pandas/sparse/tests/test_list.py index b117685b6e968..458681cdc1de0 100644 --- a/pandas/sparse/tests/test_list.py +++ b/pandas/sparse/tests/test_list.py @@ -112,9 +112,3 @@ def test_getitem(self): for i in range(len(arr)): tm.assert_almost_equal(splist[i], arr[i]) tm.assert_almost_equal(splist[-i], arr[-i]) - - -if __name__ == '__main__': - import nose - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/sparse/tests/test_series.py b/pandas/sparse/tests/test_series.py index 06d76bdd4dd3d..b34f5dd2cee9f 100644 --- a/pandas/sparse/tests/test_series.py +++ b/pandas/sparse/tests/test_series.py @@ -1366,9 +1366,3 @@ def test_numpy_func_call(self): for func in funcs: for series in ('bseries', 'zbseries'): getattr(np, func)(getattr(self, series)) - - -if __name__ == '__main__': - import nose - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/stats/tests/test_fama_macbeth.py b/pandas/stats/tests/test_fama_macbeth.py index 706becfa730c4..0c9fcf775ad2d 100644 --- a/pandas/stats/tests/test_fama_macbeth.py +++ b/pandas/stats/tests/test_fama_macbeth.py @@ -66,8 +66,3 @@ def _check_stuff_works(self, result): # does it work? result.summary - -if __name__ == '__main__': - import nose - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/stats/tests/test_math.py b/pandas/stats/tests/test_math.py index bc09f33d2f467..3f89dbcd20065 100644 --- a/pandas/stats/tests/test_math.py +++ b/pandas/stats/tests/test_math.py @@ -57,7 +57,3 @@ def test_inv_illformed(self): rs = pmath.inv(singular) expected = np.array([[0.1, 0.2], [0.1, 0.2]]) self.assertTrue(np.allclose(rs, expected)) - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/stats/tests/test_ols.py b/pandas/stats/tests/test_ols.py index 2935f986cca9f..09fa21d58ea9d 100644 --- a/pandas/stats/tests/test_ols.py +++ b/pandas/stats/tests/test_ols.py @@ -974,9 +974,3 @@ def testFilterWithDictRHS(self): def tsAssertEqual(self, ts1, ts2, **kwargs): self.assert_series_equal(ts1, ts2, **kwargs) - - -if __name__ == '__main__': - import nose - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/stats/tests/test_var.py b/pandas/stats/tests/test_var.py index 9f2c95a2d3d5c..04e2019f00a82 100644 --- a/pandas/stats/tests/test_var.py +++ b/pandas/stats/tests/test_var.py @@ -6,7 +6,6 @@ from pandas.compat import range import nose -import unittest raise nose.SkipTest('skipping this for now') @@ -93,7 +92,3 @@ def __init__(self): self.res1 = VAR2(endog=data).fit(maxlag=2) from results import results_var self.res2 = results_var.MacrodataResults() - - -if __name__ == '__main__': - unittest.main() diff --git a/pandas/tests/formats/test_format.py b/pandas/tests/formats/test_format.py index 9eff64b40625d..7a2c5f3b7f7c1 100644 --- a/pandas/tests/formats/test_format.py +++ b/pandas/tests/formats/test_format.py @@ -4999,8 +4999,3 @@ def test_format_percentiles(): tm.assertRaises(ValueError, fmt.format_percentiles, [-0.001, 0.1, 0.5]) tm.assertRaises(ValueError, fmt.format_percentiles, [2, 0.1, 0.5]) tm.assertRaises(ValueError, fmt.format_percentiles, [0.1, 0.5, 'a']) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tests/formats/test_printing.py b/pandas/tests/formats/test_printing.py index 3bcceca1f50a7..d1eb1faecc401 100644 --- a/pandas/tests/formats/test_printing.py +++ b/pandas/tests/formats/test_printing.py @@ -1,5 +1,4 @@ # -*- coding: utf-8 -*- -import nose from pandas import compat import pandas.formats.printing as printing import pandas.formats.format as fmt @@ -135,8 +134,3 @@ def test_ambiguous_width(self): # result = printing.console_encode(u"\u05d0") # expected = u"\u05d0".encode('utf-8') # assert (result == expected) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index 5d51306363053..0dbb78ec89b2e 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -2202,7 +2202,3 @@ def test_dot(self): with tm.assertRaisesRegexp(ValueError, 'aligned'): df.dot(df2) - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tests/frame/test_asof.py b/pandas/tests/frame/test_asof.py index f68219120b48e..323960d54a42c 100644 --- a/pandas/tests/frame/test_asof.py +++ b/pandas/tests/frame/test_asof.py @@ -1,7 +1,5 @@ # coding=utf-8 -import nose - import numpy as np from pandas import (DataFrame, date_range, Timestamp, Series, to_datetime) @@ -84,7 +82,3 @@ def test_missing(self): expected = DataFrame(index=to_datetime(['1989-12-31']), columns=['A', 'B'], dtype='float64') assert_frame_equal(result, expected) - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index fe6a12fcca28a..1676c57a274cd 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -7,7 +7,6 @@ import itertools import nose - from numpy.random import randn import numpy as np @@ -1945,10 +1944,3 @@ def test_frame_timeseries_to_records(self): result['index'].dtype == 'M8[ns]' result = df.to_records(index=False) - - -if __name__ == '__main__': - import nose # noqa - - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py index 7d68eac47766e..f0e6ab4c17915 100644 --- a/pandas/tests/frame/test_indexing.py +++ b/pandas/tests/frame/test_indexing.py @@ -2954,9 +2954,3 @@ def test_transpose(self): expected = DataFrame(self.df.values.T) expected.index = ['A', 'B'] assert_frame_equal(result, expected) - - -if __name__ == '__main__': - import nose - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tests/frame/test_misc_api.py b/pandas/tests/frame/test_misc_api.py index f5719fa1d8b85..2fc14d9e4d123 100644 --- a/pandas/tests/frame/test_misc_api.py +++ b/pandas/tests/frame/test_misc_api.py @@ -4,7 +4,6 @@ # pylint: disable-msg=W0612,E1101 from copy import deepcopy import sys -import nose from distutils.version import LooseVersion from pandas.compat import range, lrange @@ -486,8 +485,3 @@ def _check_f(base, f): # rename f = lambda x: x.rename({1: 'foo'}, inplace=True) _check_f(d.copy(), f) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tests/frame/test_missing.py b/pandas/tests/frame/test_missing.py index eabdb79295c27..8c25f71c00684 100644 --- a/pandas/tests/frame/test_missing.py +++ b/pandas/tests/frame/test_missing.py @@ -711,10 +711,3 @@ def test_interp_ignore_all_good(self): # all good result = df[['B', 'D']].interpolate(downcast=None) assert_frame_equal(result, df[['B', 'D']]) - - -if __name__ == '__main__': - import nose - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - # '--with-coverage', '--cover-package=pandas.core'] - exit=False) diff --git a/pandas/tests/frame/test_operators.py b/pandas/tests/frame/test_operators.py index f843a5c08ce05..15f98abe1445d 100644 --- a/pandas/tests/frame/test_operators.py +++ b/pandas/tests/frame/test_operators.py @@ -1275,8 +1275,3 @@ def test_alignment_non_pandas(self): align(df, val, 'index') with tm.assertRaises(ValueError): align(df, val, 'columns') - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tests/frame/test_query_eval.py b/pandas/tests/frame/test_query_eval.py index 36ae5dac733a5..a9a90a6f5cd40 100644 --- a/pandas/tests/frame/test_query_eval.py +++ b/pandas/tests/frame/test_query_eval.py @@ -1155,8 +1155,3 @@ class TestDataFrameEvalPythonPython(TestDataFrameEvalNumExprPython): def setUpClass(cls): super(TestDataFrameEvalPythonPython, cls).tearDownClass() cls.engine = cls.parser = 'python' - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tests/frame/test_timeseries.py b/pandas/tests/frame/test_timeseries.py index 9a9f0ee67fb89..55848847f2266 100644 --- a/pandas/tests/frame/test_timeseries.py +++ b/pandas/tests/frame/test_timeseries.py @@ -575,9 +575,3 @@ def test_frame_to_period(self): tm.assert_index_equal(pts.columns, exp.columns.asfreq('M')) self.assertRaises(ValueError, df.to_period, axis=2) - - -if __name__ == '__main__': - import nose - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tests/frame/test_to_csv.py b/pandas/tests/frame/test_to_csv.py index b585462365606..5c47b0357b4f6 100644 --- a/pandas/tests/frame/test_to_csv.py +++ b/pandas/tests/frame/test_to_csv.py @@ -1145,9 +1145,3 @@ def test_to_csv_quoting(self): df = df.set_index(['a', 'b']) expected = '"a","b","c"\n"1","3","5"\n"2","4","6"\n' self.assertEqual(df.to_csv(quoting=csv.QUOTE_ALL), expected) - - -if __name__ == '__main__': - import nose - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tests/groupby/test_aggregate.py b/pandas/tests/groupby/test_aggregate.py index 6b162b71f79de..5f680a6876873 100644 --- a/pandas/tests/groupby/test_aggregate.py +++ b/pandas/tests/groupby/test_aggregate.py @@ -1,6 +1,5 @@ # -*- coding: utf-8 -*- from __future__ import print_function -import nose from datetime import datetime @@ -487,8 +486,3 @@ def testit(label_list, shape): shape = (10000, 10000) label_list = [np.tile(np.arange(10000), 5), np.tile(np.arange(10000), 5)] testit(label_list, shape) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure', '-s' - ], exit=False) diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py index 81aa183426be9..82ec1832be961 100644 --- a/pandas/tests/groupby/test_categorical.py +++ b/pandas/tests/groupby/test_categorical.py @@ -1,9 +1,7 @@ # -*- coding: utf-8 -*- from __future__ import print_function -import nose from numpy import nan - from pandas.core.index import Index, MultiIndex, CategoricalIndex from pandas.core.api import DataFrame, Categorical @@ -490,8 +488,3 @@ def testit(label_list, shape): shape = (10000, 10000) label_list = [np.tile(np.arange(10000), 5), np.tile(np.arange(10000), 5)] testit(label_list, shape) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure', '-s' - ], exit=False) diff --git a/pandas/tests/groupby/test_filters.py b/pandas/tests/groupby/test_filters.py index 40d8039f71576..663fbd04e7e5a 100644 --- a/pandas/tests/groupby/test_filters.py +++ b/pandas/tests/groupby/test_filters.py @@ -1,7 +1,5 @@ # -*- coding: utf-8 -*- from __future__ import print_function -import nose - from numpy import nan @@ -641,8 +639,3 @@ def testit(label_list, shape): shape = (10000, 10000) label_list = [np.tile(np.arange(10000), 5), np.tile(np.arange(10000), 5)] testit(label_list, shape) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure', '-s' - ], exit=False) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index bf61f5ef83859..01c81bd7904bd 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -6057,8 +6057,3 @@ def testit(label_list, shape): shape = (10000, 10000) label_list = [np.tile(np.arange(10000), 5), np.tile(np.arange(10000), 5)] testit(label_list, shape) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure', '-s' - ], exit=False) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index a0f2a090c9a06..c574a4a1f01a7 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -11,7 +11,6 @@ import operator import os -import nose import numpy as np from pandas import (period_range, date_range, Series, @@ -2078,8 +2077,3 @@ def test_intersect_str_dates(self): res = i2.intersection(i1) self.assertEqual(len(res), 0) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py index c7acbf51a17e5..4dab7ae76a011 100644 --- a/pandas/tests/indexes/test_numeric.py +++ b/pandas/tests/indexes/test_numeric.py @@ -3,7 +3,6 @@ from datetime import datetime from pandas.compat import range, PY3 -import nose import numpy as np from pandas import (date_range, Series, Index, Float64Index, @@ -1144,8 +1143,3 @@ def test_join_outer(self): self.assert_index_equal(res, eres) tm.assert_numpy_array_equal(lidx, elidx) tm.assert_numpy_array_equal(ridx, eridx) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tests/indexing/test_callable.py b/pandas/tests/indexing/test_callable.py index ab225f72934ce..bcadc41b13370 100644 --- a/pandas/tests/indexing/test_callable.py +++ b/pandas/tests/indexing/test_callable.py @@ -1,6 +1,5 @@ # -*- coding: utf-8 -*- # pylint: disable-msg=W0612,E1101 -import nose import numpy as np import pandas as pd @@ -268,8 +267,3 @@ def test_frame_iloc_callable_setitem(self): exp = df.copy() exp.iloc[[1, 3], [0]] = [-5, -5] tm.assert_frame_equal(res, exp) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index f7fa07916ca74..a9dfcf2672357 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -5532,8 +5532,3 @@ def test_boolean_indexing(self): index=pd.to_timedelta(range(10), unit='s'), columns=['x']) tm.assert_frame_equal(expected, result) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tests/plotting/test_boxplot_method.py b/pandas/tests/plotting/test_boxplot_method.py index 289d48ba6d4cc..f7fd6a8519533 100644 --- a/pandas/tests/plotting/test_boxplot_method.py +++ b/pandas/tests/plotting/test_boxplot_method.py @@ -378,8 +378,3 @@ def test_fontsize(self): df = DataFrame({"a": [1, 2, 3, 4, 5, 6], "b": [0, 0, 0, 1, 1, 1]}) self._check_ticks_props(df.boxplot("a", by="b", fontsize=16), xlabelsize=16, ylabelsize=16) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tests/plotting/test_datetimelike.py b/pandas/tests/plotting/test_datetimelike.py index 6486c8aa21c1b..bcc9c7ceea8b5 100644 --- a/pandas/tests/plotting/test_datetimelike.py +++ b/pandas/tests/plotting/test_datetimelike.py @@ -1,3 +1,5 @@ +""" Test cases for time series specific (freq conversion, etc) """ + from datetime import datetime, timedelta, date, time import nose @@ -18,9 +20,6 @@ _skip_if_no_scipy_gaussian_kde) -""" Test cases for time series specific (freq conversion, etc) """ - - @tm.mplskip class TestTSPlot(TestPlotBase): @@ -1309,8 +1308,3 @@ def _check_plot_works(f, freq=None, series=None, *args, **kwargs): plt.savefig(path) finally: plt.close(fig) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tests/plotting/test_frame.py b/pandas/tests/plotting/test_frame.py index fba554b03f191..81a54bd38b3f8 100644 --- a/pandas/tests/plotting/test_frame.py +++ b/pandas/tests/plotting/test_frame.py @@ -1,5 +1,7 @@ # coding: utf-8 +""" Test cases for DataFrame.plot """ + import nose import string import warnings @@ -26,9 +28,6 @@ _ok_for_gaussian_kde) -""" Test cases for DataFrame.plot """ - - @tm.mplskip class TestDataFramePlots(TestPlotBase): @@ -2726,8 +2725,3 @@ def _generate_4_axes_via_gridspec(): ax_lr = plt.subplot(gs[1, 1]) return gs, [ax_tl, ax_ll, ax_tr, ax_lr] - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tests/plotting/test_groupby.py b/pandas/tests/plotting/test_groupby.py index 3c682fbfbb89e..93efb3f994c38 100644 --- a/pandas/tests/plotting/test_groupby.py +++ b/pandas/tests/plotting/test_groupby.py @@ -1,6 +1,7 @@ # coding: utf-8 -import nose +""" Test cases for GroupBy.plot """ + from pandas import Series, DataFrame import pandas.util.testing as tm @@ -10,9 +11,6 @@ from pandas.tests.plotting.common import TestPlotBase -""" Test cases for GroupBy.plot """ - - @tm.mplskip class TestDataFrameGroupByPlots(TestPlotBase): @@ -74,8 +72,3 @@ def test_plot_kwargs(self): res = df.groupby('z').plot.scatter(x='x', y='y') self.assertEqual(len(res['a'].collections), 1) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tests/plotting/test_hist_method.py b/pandas/tests/plotting/test_hist_method.py index bde5544390b85..4f64f66bd3c4d 100644 --- a/pandas/tests/plotting/test_hist_method.py +++ b/pandas/tests/plotting/test_hist_method.py @@ -1,6 +1,6 @@ # coding: utf-8 -import nose +""" Test cases for .hist method """ from pandas import Series, DataFrame import pandas.util.testing as tm @@ -13,9 +13,6 @@ from pandas.tests.plotting.common import (TestPlotBase, _check_plot_works) -""" Test cases for .hist method """ - - @tm.mplskip class TestSeriesPlots(TestPlotBase): @@ -418,8 +415,3 @@ def test_axis_share_xy(self): self.assertTrue(ax1._shared_y_axes.joined(ax1, ax2)) self.assertTrue(ax2._shared_y_axes.joined(ax1, ax2)) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tests/plotting/test_misc.py b/pandas/tests/plotting/test_misc.py index 2650ce2879db7..c92287b2bdc42 100644 --- a/pandas/tests/plotting/test_misc.py +++ b/pandas/tests/plotting/test_misc.py @@ -1,6 +1,6 @@ # coding: utf-8 -import nose +""" Test cases for misc plot functions """ from pandas import Series, DataFrame from pandas.compat import lmap @@ -15,8 +15,6 @@ from pandas.tests.plotting.common import (TestPlotBase, _check_plot_works, _ok_for_gaussian_kde) -""" Test cases for misc plot functions """ - @tm.mplskip class TestSeriesPlots(TestPlotBase): @@ -298,8 +296,3 @@ def test_subplot_titles(self): title=title[:-1]) title_list = [ax.get_title() for sublist in plot for ax in sublist] self.assertEqual(title_list, title[:3] + ['']) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tests/plotting/test_series.py b/pandas/tests/plotting/test_series.py index f668c46a15173..8c00d606059a4 100644 --- a/pandas/tests/plotting/test_series.py +++ b/pandas/tests/plotting/test_series.py @@ -1,6 +1,8 @@ # coding: utf-8 -import nose +""" Test cases for Series.plot """ + + import itertools from datetime import datetime @@ -20,9 +22,6 @@ _ok_for_gaussian_kde) -""" Test cases for Series.plot """ - - @tm.mplskip class TestSeriesPlots(TestPlotBase): @@ -811,8 +810,3 @@ def test_custom_business_day_freq(self): freq=CustomBusinessDay(holidays=['2014-05-26']))) _check_plot_works(s.plot) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tests/series/test_asof.py b/pandas/tests/series/test_asof.py index e2092feab9004..db306d2a742c1 100644 --- a/pandas/tests/series/test_asof.py +++ b/pandas/tests/series/test_asof.py @@ -1,9 +1,6 @@ # coding=utf-8 -import nose - import numpy as np - from pandas import (offsets, Series, notnull, isnull, date_range, Timestamp) @@ -152,7 +149,3 @@ def test_errors(self): s = Series(np.random.randn(N), index=rng) with self.assertRaises(ValueError): s.asof(s.index[0], subset='foo') - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tests/series/test_indexing.py b/pandas/tests/series/test_indexing.py index bdae11770de65..e0d83d6eeadac 100644 --- a/pandas/tests/series/test_indexing.py +++ b/pandas/tests/series/test_indexing.py @@ -2638,9 +2638,3 @@ def test_round_nat(self): round_method = getattr(s.dt, method) for freq in ["s", "5s", "min", "5min", "h", "5h"]: assert_series_equal(round_method(freq), expected) - - -if __name__ == '__main__': - import nose - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tests/series/test_missing.py b/pandas/tests/series/test_missing.py index 8c877ade6fe98..6821a8b9f4221 100644 --- a/pandas/tests/series/test_missing.py +++ b/pandas/tests/series/test_missing.py @@ -1092,10 +1092,3 @@ def test_series_interpolate_intraday(self): result = ts.reindex(new_index).interpolate(method='time') self.assert_numpy_array_equal(result.values, exp.values) - - -if __name__ == '__main__': - import nose - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - # '--with-coverage', '--cover-package=pandas.core'] - exit=False) diff --git a/pandas/tests/series/test_timeseries.py b/pandas/tests/series/test_timeseries.py index 9754a9d3737e3..bd346fb9bb0c8 100644 --- a/pandas/tests/series/test_timeseries.py +++ b/pandas/tests/series/test_timeseries.py @@ -927,9 +927,3 @@ def test_get_level_values_box(self): index = MultiIndex(levels=levels, labels=labels) self.assertTrue(isinstance(index.get_level_values(0)[0], Timestamp)) - - -if __name__ == '__main__': - import nose - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index 99453b9793007..40b277f3f1f8a 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -1372,9 +1372,3 @@ def test_index(self): idx = Index(['1 day', '1 day', '-1 day', '-1 day 2 min', '2 min', '2 min'], dtype='timedelta64[ns]') tm.assert_series_equal(algos.mode(idx), exp) - - -if __name__ == '__main__': - import nose - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py index f750936961831..1d1ef1a08859c 100644 --- a/pandas/tests/test_base.py +++ b/pandas/tests/test_base.py @@ -4,7 +4,7 @@ import re import sys from datetime import datetime, timedelta - +import nose import numpy as np import pandas as pd @@ -1105,11 +1105,3 @@ def f(): self.assertRaises(AttributeError, f) self.assertFalse(hasattr(t, "b")) - - -if __name__ == '__main__': - import nose - - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - # '--with-coverage', '--cover-package=pandas.core'], - exit=False) diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py index 745914d3e7ef5..be55d6e1976ec 100644 --- a/pandas/tests/test_categorical.py +++ b/pandas/tests/test_categorical.py @@ -4576,10 +4576,3 @@ def test_map(self): self.assertIsInstance(res, tm.SubclassedCategorical) exp = Categorical(['A', 'B', 'C']) tm.assert_categorical_equal(res, exp) - - -if __name__ == '__main__': - import nose - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - # '--with-coverage', '--cover-package=pandas.core'] - exit=False) diff --git a/pandas/tests/test_common.py b/pandas/tests/test_common.py index 09dd3f7ab517c..0239250129494 100644 --- a/pandas/tests/test_common.py +++ b/pandas/tests/test_common.py @@ -1,6 +1,5 @@ # -*- coding: utf-8 -*- -import nose import numpy as np from pandas import Series, Timestamp @@ -196,8 +195,3 @@ def test_dict_compat(): assert (com._dict_compat(data_datetime64) == expected) assert (com._dict_compat(expected) == expected) assert (com._dict_compat(data_unchanged) == data_unchanged) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tests/test_expressions.py b/pandas/tests/test_expressions.py index c037f02f20609..18b078d0a677e 100644 --- a/pandas/tests/test_expressions.py +++ b/pandas/tests/test_expressions.py @@ -2,12 +2,12 @@ from __future__ import print_function # pylint: disable-msg=W0612,E1101 -import nose import re +import operator +import nose from numpy.random import randn -import operator import numpy as np from pandas.core.api import DataFrame, Panel @@ -439,9 +439,3 @@ def test_bool_ops_warn_on_arithmetic(self): r = f(df, True) e = fe(df, True) tm.assert_frame_equal(r, e) - - -if __name__ == '__main__': - import nose - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tests/test_generic.py b/pandas/tests/test_generic.py index 0ca8ba47b8a8f..5bf2eda47ea27 100644 --- a/pandas/tests/test_generic.py +++ b/pandas/tests/test_generic.py @@ -2022,7 +2022,3 @@ def test_pipe_panel(self): with tm.assertRaises(ValueError): result = wp.pipe((f, 'y'), x=1, y=1) - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tests/test_internals.py b/pandas/tests/test_internals.py index 5000d6d4510fb..2bfe31ad4260e 100644 --- a/pandas/tests/test_internals.py +++ b/pandas/tests/test_internals.py @@ -1188,8 +1188,3 @@ def assert_add_equals(val, inc, result): lambda: BlockPlacement([1, 2, 4]).add(-10)) self.assertRaises(ValueError, lambda: BlockPlacement(slice(2, None, -1)).add(-1)) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tests/test_join.py b/pandas/tests/test_join.py index bfdb77f3fb350..0e7dda05a0c27 100644 --- a/pandas/tests/test_join.py +++ b/pandas/tests/test_join.py @@ -193,9 +193,3 @@ def test_inner_join_indexer2(): exp_ridx = np.array([0, 1, 2, 3], dtype=np.int64) assert_almost_equal(ridx, exp_ridx) - - -if __name__ == '__main__': - import nose - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tests/test_lib.py b/pandas/tests/test_lib.py index 945f8004687cd..2381c52ef14b6 100644 --- a/pandas/tests/test_lib.py +++ b/pandas/tests/test_lib.py @@ -232,10 +232,3 @@ def test_empty_like(self): expected = np.array([True]) self._check_behavior(arr, expected) - - -if __name__ == '__main__': - import nose - - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index 37bfe667b0205..d87ad8d906854 100755 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -2478,8 +2478,3 @@ def test_iloc_mi(self): for r in range(5)]) assert_frame_equal(result, expected) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tests/test_nanops.py b/pandas/tests/test_nanops.py index dd3a49de55d73..937c20d009b6b 100644 --- a/pandas/tests/test_nanops.py +++ b/pandas/tests/test_nanops.py @@ -1000,9 +1000,3 @@ def test_nans_skipna(self): @property def prng(self): return np.random.RandomState(1234) - - -if __name__ == '__main__': - import nose - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure', '-s' - ], exit=False) diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index d79081a06dbc0..89e8fb78ad821 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -2538,8 +2538,3 @@ def test_panel_index(): np.repeat([1, 2, 3], 4)], names=['time', 'panel']) tm.assert_index_equal(index, expected) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tests/test_panel4d.py b/pandas/tests/test_panel4d.py index 0769b8916a11b..aeca24964222a 100644 --- a/pandas/tests/test_panel4d.py +++ b/pandas/tests/test_panel4d.py @@ -949,8 +949,3 @@ def test_rename(self): def test_get_attr(self): assert_panel_equal(self.panel4d['l1'], self.panel4d.l1) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tests/test_panelnd.py b/pandas/tests/test_panelnd.py index 92805f3b30ec6..6a578d85d3ee3 100644 --- a/pandas/tests/test_panelnd.py +++ b/pandas/tests/test_panelnd.py @@ -1,6 +1,4 @@ # -*- coding: utf-8 -*- -import nose - from pandas.core import panelnd from pandas.core.panel import Panel @@ -101,7 +99,3 @@ def test_5d_construction(self): # test a transpose # results = p5d.transpose(1,2,3,4,0) # expected = - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tests/test_reshape.py b/pandas/tests/test_reshape.py index 603674ac01bc0..b5fa945a5bb8f 100644 --- a/pandas/tests/test_reshape.py +++ b/pandas/tests/test_reshape.py @@ -1,6 +1,5 @@ # -*- coding: utf-8 -*- # pylint: disable-msg=W0612,E1101 -import nose from pandas import DataFrame, Series from pandas.core.sparse import SparseDataFrame @@ -914,8 +913,3 @@ def test_multiple_id_columns(self): exp_frame = exp_frame.set_index(['famid', 'birth', 'age'])[['ht']] long_frame = wide_to_long(df, 'ht', i=['famid', 'birth'], j='age') tm.assert_frame_equal(long_frame, exp_frame) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tests/test_stats.py b/pandas/tests/test_stats.py index 41d25b9662b5b..eb8ab02c29548 100644 --- a/pandas/tests/test_stats.py +++ b/pandas/tests/test_stats.py @@ -1,6 +1,5 @@ # -*- coding: utf-8 -*- from pandas import compat -import nose from distutils.version import LooseVersion from numpy import nan @@ -185,8 +184,3 @@ def test_rank_object_bug(self): # smoke tests Series([np.nan] * 32).astype(object).rank(ascending=True) Series([np.nan] * 32).astype(object).rank(ascending=False) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index f59127c853ed1..f358946983dce 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -4,8 +4,6 @@ from datetime import datetime, timedelta import re -import nose - from numpy import nan as NA import numpy as np from numpy.random import randint @@ -2715,8 +2713,3 @@ def test_method_on_bytes(self): expected = Series(np.array( ['ad', 'be', 'cf'], 'S2').astype(object)) tm.assert_series_equal(result, expected) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tests/test_take.py b/pandas/tests/test_take.py index 98b3b474f785d..bf8a3ab370625 100644 --- a/pandas/tests/test_take.py +++ b/pandas/tests/test_take.py @@ -2,7 +2,6 @@ import re from datetime import datetime -import nose import numpy as np from pandas.compat import long import pandas.core.algorithms as algos @@ -448,8 +447,3 @@ def test_2d_datetime64(self): expected = arr.take(indexer, axis=1) expected[:, [2, 4]] = datetime(2007, 1, 1) tm.assert_almost_equal(result, expected) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tests/test_testing.py b/pandas/tests/test_testing.py index e2f295a5343bc..5e60efd153ab1 100644 --- a/pandas/tests/test_testing.py +++ b/pandas/tests/test_testing.py @@ -802,8 +802,3 @@ def f(): with assertRaises(ValueError): f() raise ValueError - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tests/test_util.py b/pandas/tests/test_util.py index ed82604035358..e2f6a7f6cc1ed 100644 --- a/pandas/tests/test_util.py +++ b/pandas/tests/test_util.py @@ -1,6 +1,4 @@ # -*- coding: utf-8 -*- -import nose - from collections import OrderedDict import sys import unittest @@ -402,8 +400,3 @@ def test_numpy_errstate_is_default(): from pandas.compat import numpy # noqa # The errstate should be unchanged after that import. tm.assert_equal(np.geterr(), expected) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tests/types/test_cast.py b/pandas/tests/types/test_cast.py index 56a14a51105ca..a8579e89aeb1f 100644 --- a/pandas/tests/types/test_cast.py +++ b/pandas/tests/types/test_cast.py @@ -5,8 +5,6 @@ """ - -import nose from datetime import datetime import numpy as np @@ -278,8 +276,3 @@ def test_period_dtype(self): np.dtype('datetime64[ns]'), np.object, np.int64]: self.assertEqual(_find_common_type([dtype, dtype2]), np.object) self.assertEqual(_find_common_type([dtype2, dtype]), np.object) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tests/types/test_common.py b/pandas/tests/types/test_common.py index 4d6f50862c562..7c17c61aec440 100644 --- a/pandas/tests/types/test_common.py +++ b/pandas/tests/types/test_common.py @@ -1,6 +1,5 @@ # -*- coding: utf-8 -*- -import nose import numpy as np from pandas.types.dtypes import DatetimeTZDtype, PeriodDtype, CategoricalDtype @@ -55,8 +54,3 @@ def test_dtype_equal(): assert not DatetimeTZDtype.is_dtype(np.int64) assert not PeriodDtype.is_dtype(np.int64) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tests/types/test_concat.py b/pandas/tests/types/test_concat.py index 6403dcb5a5350..8acafe0af1792 100644 --- a/pandas/tests/types/test_concat.py +++ b/pandas/tests/types/test_concat.py @@ -1,6 +1,5 @@ # -*- coding: utf-8 -*- -import nose import pandas as pd import pandas.types.concat as _concat import pandas.util.testing as tm @@ -79,8 +78,3 @@ def test_get_dtype_kinds_period(self): pd.Series([pd.Period('2011-02', freq='D')])] res = _concat.get_dtype_kinds(to_concat) self.assertEqual(res, set(['object'])) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tests/types/test_dtypes.py b/pandas/tests/types/test_dtypes.py index f190c85404ff9..68105cfd7c886 100644 --- a/pandas/tests/types/test_dtypes.py +++ b/pandas/tests/types/test_dtypes.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- from itertools import product -import nose import numpy as np import pandas as pd from pandas import Series, Categorical, date_range @@ -353,8 +352,3 @@ def test_empty(self): def test_not_string(self): # though PeriodDtype has object kind, it cannot be string self.assertFalse(is_string_dtype(PeriodDtype('D'))) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tests/types/test_generic.py b/pandas/tests/types/test_generic.py index 28600687e8062..2861252bef26a 100644 --- a/pandas/tests/types/test_generic.py +++ b/pandas/tests/types/test_generic.py @@ -1,6 +1,5 @@ # -*- coding: utf-8 -*- -import nose import numpy as np import pandas as pd import pandas.util.testing as tm @@ -41,8 +40,3 @@ def test_abc_types(self): self.assertIsInstance(self.sparse_array, gt.ABCSparseArray) self.assertIsInstance(self.categorical, gt.ABCCategorical) self.assertIsInstance(pd.Period('2012', freq='A-DEC'), gt.ABCPeriod) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tests/types/test_inference.py b/pandas/tests/types/test_inference.py index 5c35112d0fe19..15f9545f3476c 100644 --- a/pandas/tests/types/test_inference.py +++ b/pandas/tests/types/test_inference.py @@ -6,7 +6,6 @@ """ -import nose import collections import re from datetime import datetime, date, timedelta, time @@ -968,8 +967,3 @@ def test_ensure_categorical(): values = Categorical(values) result = _ensure_categorical(values) tm.assert_categorical_equal(result, values) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tests/types/test_io.py b/pandas/tests/types/test_io.py index 545edf8f1386c..ce8e23342bf5a 100644 --- a/pandas/tests/types/test_io.py +++ b/pandas/tests/types/test_io.py @@ -107,10 +107,3 @@ def test_convert_downcast_int64(self): expected = np.array([int8_na, 2, 3, 10, 15], dtype=np.int8) result = lib.downcast_int64(arr, na_values) self.assert_numpy_array_equal(result, expected) - - -if __name__ == '__main__': - import nose - - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tests/types/test_missing.py b/pandas/tests/types/test_missing.py index fa2bd535bb8d5..2b09cf5ab633d 100644 --- a/pandas/tests/types/test_missing.py +++ b/pandas/tests/types/test_missing.py @@ -1,6 +1,5 @@ # -*- coding: utf-8 -*- -import nose import numpy as np from datetime import datetime from pandas.util import testing as tm @@ -304,8 +303,3 @@ def test_na_value_for_dtype(): for dtype in ['O']: assert np.isnan(na_value_for_dtype(np.dtype(dtype))) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tools/plotting.py b/pandas/tools/plotting.py index 012d67d29cc3f..ee70515850b25 100644 --- a/pandas/tools/plotting.py +++ b/pandas/tools/plotting.py @@ -4003,28 +4003,3 @@ def hexbin(self, x, y, C=None, reduce_C_function=None, gridsize=None, if gridsize is not None: kwds['gridsize'] = gridsize return self(kind='hexbin', x=x, y=y, C=C, **kwds) - - -if __name__ == '__main__': - # import pandas.rpy.common as com - # sales = com.load_data('sanfrancisco.home.sales', package='nutshell') - # top10 = sales['zip'].value_counts()[:10].index - # sales2 = sales[sales.zip.isin(top10)] - # _ = scatter_plot(sales2, 'squarefeet', 'price', by='zip') - - # plt.show() - - import matplotlib.pyplot as plt - - import pandas.tools.plotting as plots - import pandas.core.frame as fr - reload(plots) # noqa - reload(fr) # noqa - from pandas.core.frame import DataFrame - - data = DataFrame([[3, 6, -5], [4, 8, 2], [4, 9, -6], - [4, 9, -3], [2, 5, -1]], - columns=['A', 'B', 'C']) - data.plot(kind='barh', stacked=True) - - plt.show() diff --git a/pandas/tools/tests/test_concat.py b/pandas/tools/tests/test_concat.py index 2be7e75573d6e..dae24c48b8238 100644 --- a/pandas/tools/tests/test_concat.py +++ b/pandas/tools/tests/test_concat.py @@ -1,5 +1,3 @@ -import nose - import numpy as np from numpy.random import randn @@ -2171,8 +2169,3 @@ def test_concat_multiindex_dfs_with_deepcopy(self): tm.assert_frame_equal(result_copy, expected) result_no_copy = pd.concat(example_dict, names=['testname']) tm.assert_frame_equal(result_no_copy, expected) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tools/tests/test_join.py b/pandas/tools/tests/test_join.py index 4a2b64d080b4b..605a85026d605 100644 --- a/pandas/tools/tests/test_join.py +++ b/pandas/tools/tests/test_join.py @@ -1,7 +1,5 @@ # pylint: disable=E1103 -import nose - from numpy.random import randn import numpy as np @@ -799,8 +797,3 @@ def _join_by_hand(a, b, how='left'): for col, s in compat.iteritems(b_re): a_re[col] = s return a_re.reindex(columns=result_columns) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tools/tests/test_merge.py b/pandas/tools/tests/test_merge.py index e08074649f7e8..88856a012da6f 100644 --- a/pandas/tools/tests/test_merge.py +++ b/pandas/tools/tests/test_merge.py @@ -1,7 +1,5 @@ # pylint: disable=E1103 -import nose - from datetime import datetime from numpy.random import randn from numpy import nan @@ -1370,8 +1368,3 @@ def f(): def f(): household.join(log_return, how='outer') self.assertRaises(NotImplementedError, f) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tools/tests/test_merge_asof.py b/pandas/tools/tests/test_merge_asof.py index ef7b25008e80a..8e7323f72a8f5 100644 --- a/pandas/tools/tests/test_merge_asof.py +++ b/pandas/tools/tests/test_merge_asof.py @@ -1,4 +1,3 @@ -import nose import os import pytz @@ -938,8 +937,3 @@ def test_on_float_by_int(self): columns=['symbol', 'exch', 'price', 'mpv']) assert_frame_equal(result, expected) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tools/tests/test_merge_ordered.py b/pandas/tools/tests/test_merge_ordered.py index d163468abc88e..e08cc98e50794 100644 --- a/pandas/tools/tests/test_merge_ordered.py +++ b/pandas/tools/tests/test_merge_ordered.py @@ -1,5 +1,3 @@ -import nose - import pandas as pd from pandas import DataFrame, merge_ordered from pandas.util import testing as tm @@ -92,7 +90,3 @@ def test_empty_sequence_concat(self): pd.concat([pd.DataFrame()]) pd.concat([None, pd.DataFrame()]) pd.concat([pd.DataFrame(), None]) - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tools/tests/test_pivot.py b/pandas/tools/tests/test_pivot.py index 9cc520a7adb05..398e57d4ad0a4 100644 --- a/pandas/tools/tests/test_pivot.py +++ b/pandas/tools/tests/test_pivot.py @@ -1321,9 +1321,3 @@ def test_crosstab_with_numpy_size(self): index=expected_index, columns=expected_column) tm.assert_frame_equal(result, expected) - - -if __name__ == '__main__': - import nose - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tools/tests/test_tile.py b/pandas/tools/tests/test_tile.py index 5c7cee862ccd3..c5261597cf35d 100644 --- a/pandas/tools/tests/test_tile.py +++ b/pandas/tools/tests/test_tile.py @@ -1,5 +1,4 @@ import os -import nose import numpy as np from pandas.compat import zip @@ -351,8 +350,3 @@ def test_datetime_bin(self): def curpath(): pth, _ = os.path.split(os.path.abspath(__file__)) return pth - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tools/tests/test_util.py b/pandas/tools/tests/test_util.py index 8a8960a057926..e1d057eb3c3c0 100644 --- a/pandas/tools/tests/test_util.py +++ b/pandas/tools/tests/test_util.py @@ -478,8 +478,3 @@ def test_downcast_limits(self): for dtype, downcast, min_max in dtype_downcast_min_max: series = pd.to_numeric(pd.Series(min_max), downcast=downcast) tm.assert_equal(series.dtype, dtype) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tseries/tests/test_base.py b/pandas/tseries/tests/test_base.py index 4f2ac3ff0d87e..2ff06517f175a 100644 --- a/pandas/tseries/tests/test_base.py +++ b/pandas/tseries/tests/test_base.py @@ -1860,10 +1860,3 @@ def test_equals(self): self.assertFalse(idx.asobject.equals(idx3)) self.assertFalse(idx.equals(list(idx3))) self.assertFalse(idx.equals(pd.Series(idx3))) - - -if __name__ == '__main__': - import nose - - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tseries/tests/test_converter.py b/pandas/tseries/tests/test_converter.py index f6cf11c871bba..8caed80f5a45b 100644 --- a/pandas/tseries/tests/test_converter.py +++ b/pandas/tseries/tests/test_converter.py @@ -1,7 +1,5 @@ from datetime import datetime, date -import nose - import numpy as np from pandas import Timestamp, Period, Index from pandas.compat import u @@ -12,6 +10,7 @@ try: import pandas.tseries.converter as converter except ImportError: + import nose raise nose.SkipTest("no pandas.tseries.converter, skipping") @@ -199,9 +198,3 @@ def test_integer_passthrough(self): rs = self.pc.convert([0, 1], None, self.axis) xp = [0, 1] self.assertEqual(rs, xp) - - -if __name__ == '__main__': - import nose - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tseries/tests/test_daterange.py b/pandas/tseries/tests/test_daterange.py index 87f9f55e0189c..209e6e40d5cf0 100644 --- a/pandas/tseries/tests/test_daterange.py +++ b/pandas/tseries/tests/test_daterange.py @@ -1,6 +1,5 @@ from datetime import datetime from pandas.compat import range -import nose import numpy as np from pandas.core.index import Index @@ -817,8 +816,3 @@ def test_cdaterange_weekmask_and_holidays(self): holidays=['2013-05-01']) xp = DatetimeIndex(['2013-05-02', '2013-05-05', '2013-05-06']) self.assert_index_equal(xp, rng) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tseries/tests/test_holiday.py b/pandas/tseries/tests/test_holiday.py index 62446e8e637c6..d4d273347e6e3 100644 --- a/pandas/tseries/tests/test_holiday.py +++ b/pandas/tseries/tests/test_holiday.py @@ -15,7 +15,6 @@ USLaborDay, USColumbusDay, USMartinLutherKingJr, USPresidentsDay) from pytz import utc -import nose class TestCalendar(tm.TestCase): @@ -385,8 +384,3 @@ def test_both_offset_observance_raises(self): Holiday("Cyber Monday", month=11, day=1, offset=[DateOffset(weekday=SA(4))], observance=next_monday) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tseries/tests/test_offsets.py b/pandas/tseries/tests/test_offsets.py index 768e9212e6c42..ac488a3dfdcb2 100644 --- a/pandas/tseries/tests/test_offsets.py +++ b/pandas/tseries/tests/test_offsets.py @@ -2,10 +2,11 @@ from distutils.version import LooseVersion from datetime import date, datetime, timedelta from dateutil.relativedelta import relativedelta -from pandas.compat import range, iteritems -from pandas import compat + import nose from nose.tools import assert_raises +from pandas.compat import range, iteritems +from pandas import compat import numpy as np @@ -4956,8 +4957,3 @@ def test_all_offset_classes(self): first = Timestamp(test_values[0], tz='US/Eastern') + offset() second = Timestamp(test_values[1], tz='US/Eastern') self.assertEqual(first, second, msg=str(offset)) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tseries/tests/test_period.py b/pandas/tseries/tests/test_period.py index a707cc3eb74ce..fdc067a827a5b 100644 --- a/pandas/tseries/tests/test_period.py +++ b/pandas/tseries/tests/test_period.py @@ -4967,9 +4967,3 @@ def test_get_period_field_raises_on_out_of_range(self): def test_get_period_field_array_raises_on_out_of_range(self): self.assertRaises(ValueError, _period.get_period_field_arr, -1, np.empty(1), 0) - - -if __name__ == '__main__': - import nose - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tseries/tests/test_resample.py b/pandas/tseries/tests/test_resample.py index c40f930fbd094..222ffb735921a 100755 --- a/pandas/tseries/tests/test_resample.py +++ b/pandas/tseries/tests/test_resample.py @@ -3,7 +3,6 @@ from datetime import datetime, timedelta from functools import partial -import nose import numpy as np import pandas as pd @@ -3188,8 +3187,3 @@ def test_aggregate_with_nat(self): # if NaT is included, 'var', 'std', 'mean', 'first','last' # and 'nth' doesn't work yet - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tseries/tests/test_timedeltas.py b/pandas/tseries/tests/test_timedeltas.py index 6efa024d81b98..13263259e0b8a 100644 --- a/pandas/tseries/tests/test_timedeltas.py +++ b/pandas/tseries/tests/test_timedeltas.py @@ -2,7 +2,6 @@ from __future__ import division from datetime import timedelta, time -import nose from distutils.version import LooseVersion import numpy as np @@ -2051,8 +2050,3 @@ def test_add_overflow(self): result = (to_timedelta([pd.NaT, '5 days', '1 hours']) + to_timedelta(['7 seconds', pd.NaT, '4 hours'])) tm.assert_index_equal(result, exp) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tseries/tests/test_timeseries_legacy.py b/pandas/tseries/tests/test_timeseries_legacy.py index d8c01c53fb2e5..5395056c93412 100644 --- a/pandas/tseries/tests/test_timeseries_legacy.py +++ b/pandas/tseries/tests/test_timeseries_legacy.py @@ -219,8 +219,3 @@ def test_ms_vs_MS(self): def test_rule_aliases(self): rule = to_offset('10us') self.assertEqual(rule, Micro(10)) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tseries/tests/test_timezones.py b/pandas/tseries/tests/test_timezones.py index 64787b6e4e79a..00b60ba620c4b 100644 --- a/pandas/tseries/tests/test_timezones.py +++ b/pandas/tseries/tests/test_timezones.py @@ -1,7 +1,5 @@ # pylint: disable-msg=E1101,W0612 from datetime import datetime, timedelta, tzinfo, date -import nose - import numpy as np import pytz from distutils.version import LooseVersion @@ -1683,8 +1681,3 @@ def test_nat(self): idx = idx.tz_convert('US/Eastern') expected = ['2010-12-01 11:00', '2010-12-02 11:00', NaT] self.assert_index_equal(idx, DatetimeIndex(expected, tz='US/Eastern')) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tseries/tests/test_tslib.py b/pandas/tseries/tests/test_tslib.py index cf5dbd671d38c..20e91a6f5bc44 100644 --- a/pandas/tseries/tests/test_tslib.py +++ b/pandas/tseries/tests/test_tslib.py @@ -1,4 +1,3 @@ -import nose import datetime import numpy as np from distutils.version import LooseVersion @@ -690,8 +689,3 @@ def _check_round(freq, expected): msg = pd.tseries.frequencies._INVALID_FREQ_ERROR with self.assertRaisesRegexp(ValueError, msg): stamp.round('foo') - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tseries/tests/test_util.py b/pandas/tseries/tests/test_util.py index 96da32a4a845c..3feffe924c291 100644 --- a/pandas/tseries/tests/test_util.py +++ b/pandas/tseries/tests/test_util.py @@ -1,5 +1,4 @@ from pandas.compat import range -import nose import numpy as np @@ -125,8 +124,3 @@ def test_normalize_date(): result = normalize_date(value) assert (result == datetime(2012, 9, 7)) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) From 542c9166a6ceff4a4889caae3843c3a82a2301cd Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 7 Feb 2017 13:50:37 -0500 Subject: [PATCH 016/933] TST/STYLE: remove multiprocess nose flags and slight PEP fixes xref https://github.com/pandas-dev/pandas/pull/13856#issuecomment-278058522 Author: Jeff Reback Closes #15333 from jreback/mcs and squashes the following commits: 2edc842 [Jeff Reback] TST/STYLE: remove multiprocess nose flags and slight PEP fixes --- pandas/__init__.py | 8 +-- pandas/_version.py | 2 +- pandas/api/tests/test_api.py | 2 - pandas/compat/__init__.py | 24 +++++---- pandas/compat/chainmap.py | 1 + pandas/compat/numpy/function.py | 1 + pandas/compat/pickle_compat.py | 9 +++- pandas/computation/tests/test_eval.py | 10 ++-- pandas/core/base.py | 2 + pandas/core/config.py | 1 + pandas/core/frame.py | 5 +- pandas/core/indexing.py | 1 + pandas/core/internals.py | 1 + pandas/core/nanops.py | 2 + pandas/core/panel.py | 2 + pandas/core/series.py | 1 + pandas/core/window.py | 3 +- pandas/formats/format.py | 12 +++++ pandas/indexes/base.py | 6 +-- pandas/indexes/multi.py | 2 +- pandas/io/common.py | 1 + pandas/io/json/json.py | 4 +- pandas/io/json/normalize.py | 1 - pandas/io/sas/sasreader.py | 1 - pandas/io/tests/parser/c_parser_only.py | 9 ++-- pandas/io/tests/parser/compression.py | 1 + pandas/io/tests/parser/converters.py | 1 + pandas/io/tests/parser/dtypes.py | 1 + pandas/io/tests/parser/parse_dates.py | 1 + pandas/io/tests/parser/python_parser_only.py | 1 + pandas/io/tests/parser/test_parsers.py | 1 + pandas/io/tests/parser/test_unsupported.py | 2 + pandas/io/tests/test_clipboard.py | 2 +- pandas/io/tests/test_feather.py | 1 - pandas/io/tests/test_gbq.py | 3 ++ pandas/io/tests/test_packers.py | 4 +- pandas/io/tests/test_pickle.py | 1 - pandas/io/tests/test_pytables.py | 1 - pandas/msgpack/exceptions.py | 1 + pandas/sparse/array.py | 3 +- pandas/sparse/series.py | 1 + pandas/sparse/tests/test_arithmetics.py | 2 - pandas/sparse/tests/test_array.py | 2 +- pandas/sparse/tests/test_combine_concat.py | 4 -- pandas/sparse/tests/test_format.py | 2 - pandas/sparse/tests/test_frame.py | 2 +- pandas/sparse/tests/test_groupby.py | 2 - pandas/sparse/tests/test_indexing.py | 7 +-- pandas/sparse/tests/test_libsparse.py | 6 --- pandas/sparse/tests/test_list.py | 2 - pandas/sparse/tests/test_pivot.py | 2 - pandas/sparse/tests/test_series.py | 2 +- pandas/stats/fama_macbeth.py | 1 + pandas/stats/ols.py | 1 + pandas/stats/tests/test_ols.py | 8 --- pandas/tests/formats/test_format.py | 35 +++++++------ pandas/tests/formats/test_printing.py | 2 - pandas/tests/formats/test_style.py | 2 +- pandas/tests/frame/test_alter_axes.py | 2 - pandas/tests/frame/test_analytics.py | 2 - pandas/tests/frame/test_apply.py | 2 - pandas/tests/frame/test_asof.py | 1 - .../tests/frame/test_axis_select_reindex.py | 2 - pandas/tests/frame/test_block_internals.py | 2 - pandas/tests/frame/test_combine_concat.py | 4 -- pandas/tests/frame/test_constructors.py | 4 -- pandas/tests/frame/test_convert_to.py | 2 - pandas/tests/frame/test_dtypes.py | 4 -- pandas/tests/frame/test_indexing.py | 6 --- pandas/tests/frame/test_misc_api.py | 4 -- pandas/tests/frame/test_missing.py | 2 - pandas/tests/frame/test_mutate_columns.py | 2 - pandas/tests/frame/test_nonunique_indexes.py | 2 - pandas/tests/frame/test_operators.py | 2 - pandas/tests/frame/test_quantile.py | 2 - pandas/tests/frame/test_query_eval.py | 4 -- pandas/tests/frame/test_replace.py | 2 - pandas/tests/frame/test_repr_info.py | 2 - pandas/tests/frame/test_reshape.py | 2 - pandas/tests/frame/test_sorting.py | 2 - pandas/tests/frame/test_subclass.py | 2 - pandas/tests/frame/test_timeseries.py | 6 +-- pandas/tests/frame/test_to_csv.py | 2 - pandas/tests/groupby/test_aggregate.py | 2 - pandas/tests/groupby/test_categorical.py | 2 - pandas/tests/groupby/test_filters.py | 2 - pandas/tests/groupby/test_groupby.py | 6 +-- pandas/tests/indexes/datetimes/test_astype.py | 2 - .../indexes/datetimes/test_construction.py | 2 - .../indexes/datetimes/test_date_range.py | 1 - .../tests/indexes/datetimes/test_datetime.py | 1 - .../indexes/datetimes/test_datetimelike.py | 1 - .../tests/indexes/datetimes/test_indexing.py | 1 - pandas/tests/indexes/datetimes/test_misc.py | 1 - .../tests/indexes/datetimes/test_missing.py | 1 - pandas/tests/indexes/datetimes/test_ops.py | 1 - pandas/tests/indexes/datetimes/test_setops.py | 1 - pandas/tests/indexes/datetimes/test_tools.py | 2 +- pandas/tests/indexes/test_base.py | 4 +- pandas/tests/indexes/test_category.py | 6 ++- pandas/tests/indexes/test_datetimelike.py | 2 - pandas/tests/indexes/test_multi.py | 21 ++++---- pandas/tests/indexes/test_numeric.py | 3 -- pandas/tests/indexes/test_timedelta.py | 1 - pandas/tests/indexing/test_callable.py | 2 - pandas/tests/indexing/test_coercion.py | 4 +- pandas/tests/indexing/test_indexing.py | 2 - pandas/tests/indexing/test_indexing_slow.py | 2 - pandas/tests/plotting/test_misc.py | 2 + pandas/tests/scalar/test_timestamp.py | 4 +- pandas/tests/series/test_alter_axes.py | 2 - pandas/tests/series/test_analytics.py | 2 - pandas/tests/series/test_apply.py | 4 -- pandas/tests/series/test_asof.py | 1 - pandas/tests/series/test_combine_concat.py | 4 -- pandas/tests/series/test_constructors.py | 2 - pandas/tests/series/test_datetime_values.py | 6 +-- pandas/tests/series/test_dtypes.py | 2 - pandas/tests/series/test_indexing.py | 4 +- pandas/tests/series/test_internals.py | 2 - pandas/tests/series/test_io.py | 6 --- pandas/tests/series/test_misc_api.py | 2 - pandas/tests/series/test_missing.py | 2 - pandas/tests/series/test_operators.py | 2 - pandas/tests/series/test_replace.py | 2 - pandas/tests/series/test_repr.py | 2 - pandas/tests/series/test_sorting.py | 2 - pandas/tests/series/test_subclass.py | 4 -- pandas/tests/series/test_timeseries.py | 1 - pandas/tests/test_algos.py | 11 ---- pandas/tests/test_categorical.py | 50 +++++++++---------- pandas/tests/test_common.py | 2 - pandas/tests/test_config.py | 1 - pandas/tests/test_expressions.py | 2 - pandas/tests/test_generic.py | 2 - pandas/tests/test_internals.py | 5 -- pandas/tests/test_join.py | 1 - pandas/tests/test_multilevel.py | 2 - pandas/tests/test_panel.py | 6 --- pandas/tests/test_panel4d.py | 8 --- pandas/tests/test_reshape.py | 2 - pandas/tests/test_stats.py | 1 - pandas/tests/test_strings.py | 2 - pandas/tests/test_take.py | 4 -- pandas/tests/test_testing.py | 6 --- pandas/tests/test_util.py | 1 + pandas/tests/test_window.py | 2 - pandas/tests/types/test_cast.py | 2 - pandas/tests/types/test_common.py | 2 - pandas/tests/types/test_concat.py | 2 - pandas/tests/types/test_dtypes.py | 2 - pandas/tests/types/test_generic.py | 2 - pandas/tests/types/test_inference.py | 3 -- pandas/tests/types/test_missing.py | 2 - pandas/tools/tests/test_concat.py | 2 - pandas/tools/tests/test_hashing.py | 2 - pandas/tools/tests/test_join.py | 2 - pandas/tools/tests/test_merge.py | 2 - pandas/tools/tests/test_merge_asof.py | 3 +- pandas/tools/tests/test_pivot.py | 10 ++-- pandas/tools/tests/test_tile.py | 8 +-- pandas/tseries/period.py | 2 +- pandas/tseries/tests/test_base.py | 4 +- pandas/tseries/tests/test_bin_groupby.py | 2 +- pandas/tseries/tests/test_converter.py | 2 + pandas/tseries/tests/test_daterange.py | 2 + pandas/tseries/tests/test_frequencies.py | 1 + pandas/tseries/tests/test_holiday.py | 4 ++ pandas/tseries/tests/test_offsets.py | 19 ++++--- pandas/tseries/tests/test_period.py | 3 ++ pandas/tseries/tests/test_resample.py | 11 ++-- pandas/tseries/tests/test_timedeltas.py | 3 +- .../tseries/tests/test_timeseries_legacy.py | 2 - pandas/tseries/tests/test_timezones.py | 4 +- pandas/tseries/tests/test_tslib.py | 3 ++ pandas/types/generic.py | 1 + pandas/util/clipboard/__init__.py | 2 +- pandas/util/clipboard/clipboards.py | 1 + pandas/util/clipboard/exceptions.py | 1 + pandas/util/clipboard/windows.py | 4 +- pandas/util/decorators.py | 2 + pandas/util/depr_module.py | 1 + pandas/util/testing.py | 5 +- 183 files changed, 229 insertions(+), 418 deletions(-) diff --git a/pandas/__init__.py b/pandas/__init__.py index 2d91c97144e3c..9133e11beaa2b 100644 --- a/pandas/__init__.py +++ b/pandas/__init__.py @@ -15,7 +15,8 @@ missing_dependencies.append(dependency) if missing_dependencies: - raise ImportError("Missing required dependencies {0}".format(missing_dependencies)) + raise ImportError( + "Missing required dependencies {0}".format(missing_dependencies)) del hard_dependencies, dependency, missing_dependencies # numpy compat @@ -24,7 +25,8 @@ try: from pandas import hashtable, tslib, lib except ImportError as e: # pragma: no cover - module = str(e).lstrip('cannot import name ') # hack but overkill to use re + # hack but overkill to use re + module = str(e).lstrip('cannot import name ') raise ImportError("C extension: {0} not built. If you want to import " "pandas from the source directory, you may need to run " "'python setup.py build_ext --inplace --force' to build " @@ -61,5 +63,5 @@ # use the closest tagged version if possible from ._version import get_versions v = get_versions() -__version__ = v.get('closest-tag',v['version']) +__version__ = v.get('closest-tag', v['version']) del get_versions, v diff --git a/pandas/_version.py b/pandas/_version.py index 77b2fdca59576..d764923fd7247 100644 --- a/pandas/_version.py +++ b/pandas/_version.py @@ -157,7 +157,7 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): # "stabilization", as well as "HEAD" and "master". tags = set([r for r in refs if re.search(r'\d', r)]) if verbose: - print("discarding '%s', no digits" % ",".join(refs-tags)) + print("discarding '%s', no digits" % ",".join(refs - tags)) if verbose: print("likely tags: %s" % ",".join(sorted(tags))) for ref in sorted(tags): diff --git a/pandas/api/tests/test_api.py b/pandas/api/tests/test_api.py index f925fd792f9ca..02165d82d4232 100644 --- a/pandas/api/tests/test_api.py +++ b/pandas/api/tests/test_api.py @@ -8,8 +8,6 @@ from pandas.api import types from pandas.util import testing as tm -_multiprocess_can_split_ = True - class Base(object): diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py index 532f960468204..7ebdd9735b967 100644 --- a/pandas/compat/__init__.py +++ b/pandas/compat/__init__.py @@ -79,25 +79,25 @@ def signature(f): args = [ p.name for p in sig.parameters.values() if p.kind == inspect.Parameter.POSITIONAL_OR_KEYWORD - ] + ] varargs = [ p.name for p in sig.parameters.values() if p.kind == inspect.Parameter.VAR_POSITIONAL - ] + ] varargs = varargs[0] if varargs else None keywords = [ p.name for p in sig.parameters.values() if p.kind == inspect.Parameter.VAR_KEYWORD - ] + ] keywords = keywords[0] if keywords else None defaults = [ p.default for p in sig.parameters.values() if p.kind == inspect.Parameter.POSITIONAL_OR_KEYWORD and p.default is not p.empty - ] or None - argspec = namedtuple('Signature',['args','defaults', - 'varargs','keywords']) - return argspec(args,defaults,varargs,keywords) + ] or None + argspec = namedtuple('Signature', ['args', 'defaults', + 'varargs', 'keywords']) + return argspec(args, defaults, varargs, keywords) # have to explicitly put builtins into the namespace range = range @@ -170,7 +170,7 @@ def iterkeys(obj, **kw): def itervalues(obj, **kw): return obj.itervalues(**kw) - next = lambda it : it.next() + next = lambda it: it.next() else: def iteritems(obj, **kw): return iter(obj.items(**kw)) @@ -183,6 +183,7 @@ def itervalues(obj, **kw): next = next + def bind_method(cls, name, func): """Bind a method to class, python 2 and python 3 compatible. @@ -307,7 +308,8 @@ def set_function_name(f, name, cls): f.__name__ = name return f - class ResourceWarning(Warning): pass + class ResourceWarning(Warning): + pass string_and_binary_types = string_types + (binary_type,) @@ -398,14 +400,18 @@ def is_platform_little_endian(): """ am I little endian """ return sys.byteorder == 'little' + def is_platform_windows(): return sys.platform == 'win32' or sys.platform == 'cygwin' + def is_platform_linux(): return sys.platform == 'linux2' + def is_platform_mac(): return sys.platform == 'darwin' + def is_platform_32bit(): return struct.calcsize("P") * 8 < 64 diff --git a/pandas/compat/chainmap.py b/pandas/compat/chainmap.py index 9edd2ef056a52..cf1cad5694570 100644 --- a/pandas/compat/chainmap.py +++ b/pandas/compat/chainmap.py @@ -5,6 +5,7 @@ class DeepChainMap(ChainMap): + def __setitem__(self, key, value): for mapping in self.maps: if key in mapping: diff --git a/pandas/compat/numpy/function.py b/pandas/compat/numpy/function.py index 895a376457f09..72e89586d0280 100644 --- a/pandas/compat/numpy/function.py +++ b/pandas/compat/numpy/function.py @@ -27,6 +27,7 @@ class CompatValidator(object): + def __init__(self, defaults, fname=None, method=None, max_fname_arg_count=None): self.fname = fname diff --git a/pandas/compat/pickle_compat.py b/pandas/compat/pickle_compat.py index 7ed9e7ff90bd8..1cdf8afd563c6 100644 --- a/pandas/compat/pickle_compat.py +++ b/pandas/compat/pickle_compat.py @@ -9,6 +9,7 @@ from pandas import compat, Index from pandas.compat import u, string_types + def load_reduce(self): stack = self.stack args = stack.pop() @@ -34,7 +35,7 @@ def load_reduce(self): pass # try to reencode the arguments - if getattr(self,'encoding',None) is not None: + if getattr(self, 'encoding', None) is not None: args = tuple([arg.encode(self.encoding) if isinstance(arg, string_types) else arg for arg in args]) @@ -44,7 +45,7 @@ def load_reduce(self): except: pass - if getattr(self,'is_verbose',None): + if getattr(self, 'is_verbose', None): print(sys.exc_info()) print(func, args) raise @@ -61,6 +62,7 @@ class Unpickler(pkl.Unpickler): Unpickler.dispatch = copy.copy(Unpickler.dispatch) Unpickler.dispatch[pkl.REDUCE[0]] = load_reduce + def load_newobj(self): args = self.stack.pop() cls = self.stack[-1] @@ -75,6 +77,8 @@ def load_newobj(self): Unpickler.dispatch[pkl.NEWOBJ[0]] = load_newobj # py3 compat + + def load_newobj_ex(self): kwargs = self.stack.pop() args = self.stack.pop() @@ -91,6 +95,7 @@ def load_newobj_ex(self): except: pass + def load(fh, encoding=None, compat=False, is_verbose=False): """load a pickle, with a provided encoding diff --git a/pandas/computation/tests/test_eval.py b/pandas/computation/tests/test_eval.py index dbac72c619a52..aa05626af9175 100644 --- a/pandas/computation/tests/test_eval.py +++ b/pandas/computation/tests/test_eval.py @@ -201,7 +201,7 @@ def check_complex_cmp_op(self, lhs, cmp1, rhs, binop, cmp2): binop=binop, cmp2=cmp2) scalar_with_in_notin = (is_scalar(rhs) and (cmp1 in skip_these or - cmp2 in skip_these)) + cmp2 in skip_these)) if scalar_with_in_notin: with tm.assertRaises(TypeError): pd.eval(ex, engine=self.engine, parser=self.parser) @@ -702,7 +702,6 @@ def test_float_truncation(self): tm.assert_frame_equal(expected, result) - class TestEvalNumexprPython(TestEvalNumexprPandas): @classmethod @@ -782,6 +781,7 @@ def check_chained_cmp_op(self, lhs, cmp1, mid, cmp2, rhs): # typecasting rules consistency with python # issue #12388 + class TestTypeCasting(object): def check_binop_typecasting(self, engine, parser, op, dt): @@ -803,7 +803,8 @@ def test_binop_typecasting(self): for engine, parser in ENGINES_PARSERS: for op in ['+', '-', '*', '**', '/']: # maybe someday... numexpr has too many upcasting rules now - #for dt in chain(*(np.sctypes[x] for x in ['uint', 'int', 'float'])): + # for dt in chain(*(np.sctypes[x] for x in ['uint', 'int', + # 'float'])): for dt in [np.float32, np.float64]: yield self.check_binop_typecasting, engine, parser, op, dt @@ -1969,10 +1970,11 @@ def test_negate_lt_eq_le(): for engine, parser in product(_engines, expr._parsers): yield check_negate_lt_eq_le, engine, parser + class TestValidate(tm.TestCase): def test_validate_bool_args(self): - invalid_values = [1, "True", [1,2,3], 5.0] + invalid_values = [1, "True", [1, 2, 3], 5.0] for value in invalid_values: with self.assertRaises(ValueError): diff --git a/pandas/core/base.py b/pandas/core/base.py index e7a79c3291a92..657da859ddde2 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -230,6 +230,7 @@ def f(self, *args, **kwargs): class AccessorProperty(object): """Descriptor for implementing accessor properties like Series.str """ + def __init__(self, accessor_cls, construct_accessor): self.accessor_cls = accessor_cls self.construct_accessor = construct_accessor @@ -651,6 +652,7 @@ class GroupByMixin(object): @staticmethod def _dispatch(name, *args, **kwargs): """ dispatch to apply """ + def outer(self, *args, **kwargs): def f(x): x = self._shallow_copy(x, groupby=self._groupby) diff --git a/pandas/core/config.py b/pandas/core/config.py index 618de4e02b56f..ed63c865ebfb4 100644 --- a/pandas/core/config.py +++ b/pandas/core/config.py @@ -215,6 +215,7 @@ def __dir__(self): class CallableDynamicDoc(object): + def __init__(self, func, doc_tmpl): self.__doc_tmpl__ = doc_tmpl self.__func__ = func diff --git a/pandas/core/frame.py b/pandas/core/frame.py index cc81c66100a6f..79bdad82af5a3 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5326,9 +5326,10 @@ def isin(self, values): "allowed to be passed to DataFrame.isin(), " "you passed a " "{0!r}".format(type(values).__name__)) - return DataFrame(lib.ismember(self.values.ravel(), + return DataFrame( + lib.ismember(self.values.ravel(), set(values)).reshape(self.shape), self.index, - self.columns) + self.columns) # ---------------------------------------------------------------------- # Deprecated stuff diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 40050d6d769a6..6bb2d1c479844 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -42,6 +42,7 @@ def get_indexers_list(): # the public IndexSlicerMaker class _IndexSlice(object): + def __getitem__(self, arg): return arg diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 289ce150eb46b..f0b1516d786c6 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -5122,6 +5122,7 @@ def trim_join_unit(join_unit, length): class JoinUnit(object): + def __init__(self, block, shape, indexers=None): # Passing shape explicitly is required for cases when block is None. if indexers is None: diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 1f76bc850cee9..0cc3a2d039b5e 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -26,6 +26,7 @@ class disallow(object): + def __init__(self, *dtypes): super(disallow, self).__init__() self.dtypes = tuple(np.dtype(dtype).type for dtype in dtypes) @@ -58,6 +59,7 @@ def _f(*args, **kwargs): class bottleneck_switch(object): + def __init__(self, zero_value=None, **kwargs): self.zero_value = zero_value self.kwargs = kwargs diff --git a/pandas/core/panel.py b/pandas/core/panel.py index a11ef53de1af9..6da10305eb4fc 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -1560,6 +1560,7 @@ def f(self, other, axis=0): # legacy class WidePanel(Panel): + def __init__(self, *args, **kwargs): # deprecation, #10892 warnings.warn("WidePanel is deprecated. Please use Panel", @@ -1569,6 +1570,7 @@ def __init__(self, *args, **kwargs): class LongPanel(DataFrame): + def __init__(self, *args, **kwargs): # deprecation, #10892 warnings.warn("LongPanel is deprecated. Please use DataFrame", diff --git a/pandas/core/series.py b/pandas/core/series.py index 9845e1cd4ad47..43f16f690692a 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2987,6 +2987,7 @@ def create_from_value(value, index, dtype): # backwards compatiblity class TimeSeries(Series): + def __init__(self, *args, **kwargs): # deprecation TimeSeries, #10890 warnings.warn("TimeSeries is deprecated. Please use Series", diff --git a/pandas/core/window.py b/pandas/core/window.py index bda134dd8a2a4..50de6b84d7cba 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -659,6 +659,7 @@ def f(x, name=name, *args): class _Rolling(_Window): + @property def _constructor(self): return Rolling @@ -1718,7 +1719,7 @@ def dataframe_from_int_dict(data, frame_template): def _get_center_of_mass(com, span, halflife, alpha): valid_count = len([x for x in [com, span, halflife, alpha] - if x is not None]) + if x is not None]) if valid_count > 1: raise ValueError("com, span, halflife, and alpha " "are mutually exclusive") diff --git a/pandas/formats/format.py b/pandas/formats/format.py index 3bac7d2821760..439b96d650204 100644 --- a/pandas/formats/format.py +++ b/pandas/formats/format.py @@ -89,6 +89,7 @@ class CategoricalFormatter(object): + def __init__(self, categorical, buf=None, length=True, na_rep='NaN', footer=True): self.categorical = categorical @@ -142,6 +143,7 @@ def to_string(self): class SeriesFormatter(object): + def __init__(self, series, buf=None, length=True, header=True, index=True, na_rep='NaN', name=False, float_format=None, dtype=True, max_rows=None): @@ -272,6 +274,7 @@ def to_string(self): class TextAdjustment(object): + def __init__(self): self.encoding = get_option("display.encoding") @@ -287,6 +290,7 @@ def adjoin(self, space, *lists, **kwargs): class EastAsianTextAdjustment(TextAdjustment): + def __init__(self): super(EastAsianTextAdjustment, self).__init__() if get_option("display.unicode.ambiguous_as_wide"): @@ -1366,6 +1370,7 @@ def _get_level_lengths(levels, sentinel=''): class CSVFormatter(object): + def __init__(self, obj, path_or_buf=None, sep=",", na_rep='', float_format=None, cols=None, header=True, index=True, index_label=None, mode='w', nanRep=None, encoding=None, @@ -1950,6 +1955,7 @@ def format_array(values, formatter, float_format=None, na_rep='NaN', class GenericArrayFormatter(object): + def __init__(self, values, digits=7, formatter=None, na_rep='NaN', space=12, float_format=None, justify='right', decimal='.', quoting=None, fixed_width=True): @@ -2151,6 +2157,7 @@ def _format_strings(self): class IntArrayFormatter(GenericArrayFormatter): + def _format_strings(self): formatter = self.formatter or (lambda x: '% d' % x) fmt_values = [formatter(x) for x in self.values] @@ -2158,6 +2165,7 @@ def _format_strings(self): class Datetime64Formatter(GenericArrayFormatter): + def __init__(self, values, nat_rep='NaT', date_format=None, **kwargs): super(Datetime64Formatter, self).__init__(values, **kwargs) self.nat_rep = nat_rep @@ -2183,6 +2191,7 @@ def _format_strings(self): class PeriodArrayFormatter(IntArrayFormatter): + def _format_strings(self): from pandas.tseries.period import IncompatibleFrequency try: @@ -2197,6 +2206,7 @@ def _format_strings(self): class CategoricalArrayFormatter(GenericArrayFormatter): + def __init__(self, values, *args, **kwargs): GenericArrayFormatter.__init__(self, values, *args, **kwargs) @@ -2328,6 +2338,7 @@ def _get_format_datetime64_from_values(values, date_format): class Datetime64TZFormatter(Datetime64Formatter): + def _format_strings(self): """ we by definition have a TZ """ @@ -2342,6 +2353,7 @@ def _format_strings(self): class Timedelta64Formatter(GenericArrayFormatter): + def __init__(self, values, nat_rep='NaT', box=False, **kwargs): super(Timedelta64Formatter, self).__init__(values, **kwargs) self.nat_rep = nat_rep diff --git a/pandas/indexes/base.py b/pandas/indexes/base.py index dcd565ee5f0e9..bb2941a121452 100644 --- a/pandas/indexes/base.py +++ b/pandas/indexes/base.py @@ -168,8 +168,8 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, elif isinstance(data, (np.ndarray, Index, ABCSeries)): if (is_datetime64_any_dtype(data) or - (dtype is not None and is_datetime64_any_dtype(dtype)) or - 'tz' in kwargs): + (dtype is not None and is_datetime64_any_dtype(dtype)) or + 'tz' in kwargs): from pandas.tseries.index import DatetimeIndex result = DatetimeIndex(data, copy=copy, name=name, dtype=dtype, **kwargs) @@ -3606,7 +3606,7 @@ def _validate_for_numeric_binop(self, other, op, opstr): typ=type(other)) ) elif isinstance(other, np.ndarray) and not other.ndim: - other = other.item() + other = other.item() if isinstance(other, (Index, ABCSeries, np.ndarray)): if len(self) != len(other): diff --git a/pandas/indexes/multi.py b/pandas/indexes/multi.py index 00ead012a916a..d2469cf1a3eed 100644 --- a/pandas/indexes/multi.py +++ b/pandas/indexes/multi.py @@ -1813,7 +1813,7 @@ def partial_selection(key, indexer=None): for k, l in zip(key, self.levels)] can_index_exactly = any(all_dates) if (any([l.is_all_dates - for k, l in zip(key, self.levels)]) and + for k, l in zip(key, self.levels)]) and not can_index_exactly): indexer = self.get_loc(key) diff --git a/pandas/io/common.py b/pandas/io/common.py index 6817c824ad786..b24acb256c4a9 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -109,6 +109,7 @@ class BaseIterator(object): """Subclass this and provide a "__next__()" method to obtain an iterator. Useful only when the object being iterated is non-reusable (e.g. OK for a parser, not for an in-memory table, yes for its iterator).""" + def __iter__(self): return self diff --git a/pandas/io/json/json.py b/pandas/io/json/json.py index d29f4a371dd4d..6fc766081eefe 100644 --- a/pandas/io/json/json.py +++ b/pandas/io/json/json.py @@ -23,8 +23,8 @@ def to_json(path_or_buf, obj, orient=None, date_format='epoch', default_handler=None, lines=False): if lines and orient != 'records': - raise ValueError( - "'lines' keyword only valid when 'orient' is records") + raise ValueError( + "'lines' keyword only valid when 'orient' is records") if isinstance(obj, Series): s = SeriesWriter( diff --git a/pandas/io/json/normalize.py b/pandas/io/json/normalize.py index aa80954233682..d684441c5974d 100644 --- a/pandas/io/json/normalize.py +++ b/pandas/io/json/normalize.py @@ -89,7 +89,6 @@ def json_normalize(data, record_path=None, meta=None, meta_prefix=None, record_prefix=None, errors='raise'): - """ "Normalize" semi-structured JSON data into a flat table diff --git a/pandas/io/sas/sasreader.py b/pandas/io/sas/sasreader.py index 29e7f131fd9bc..3e4d9c9024dbd 100644 --- a/pandas/io/sas/sasreader.py +++ b/pandas/io/sas/sasreader.py @@ -6,7 +6,6 @@ def read_sas(filepath_or_buffer, format=None, index=None, encoding=None, chunksize=None, iterator=False): - """ Read SAS files stored as either XPORT or SAS7BDAT format files. diff --git a/pandas/io/tests/parser/c_parser_only.py b/pandas/io/tests/parser/c_parser_only.py index 73edda90720af..11073f3f108ba 100644 --- a/pandas/io/tests/parser/c_parser_only.py +++ b/pandas/io/tests/parser/c_parser_only.py @@ -18,6 +18,7 @@ class CParserTests(object): + def test_buffer_overflow(self): # see gh-9205: test certain malformed input files that cause # buffer overflows in tokenizer.c @@ -375,13 +376,13 @@ def test_internal_null_byte(self): def test_read_nrows_large(self): # gh-7626 - Read only nrows of data in for large inputs (>262144b) header_narrow = '\t'.join(['COL_HEADER_' + str(i) - for i in range(10)]) + '\n' + for i in range(10)]) + '\n' data_narrow = '\t'.join(['somedatasomedatasomedata1' - for i in range(10)]) + '\n' + for i in range(10)]) + '\n' header_wide = '\t'.join(['COL_HEADER_' + str(i) - for i in range(15)]) + '\n' + for i in range(15)]) + '\n' data_wide = '\t'.join(['somedatasomedatasomedata2' - for i in range(15)]) + '\n' + for i in range(15)]) + '\n' test_input = (header_narrow + data_narrow * 1050 + header_wide + data_wide * 2) diff --git a/pandas/io/tests/parser/compression.py b/pandas/io/tests/parser/compression.py index e95617faf2071..308ca6e8a5a2c 100644 --- a/pandas/io/tests/parser/compression.py +++ b/pandas/io/tests/parser/compression.py @@ -11,6 +11,7 @@ class CompressionTests(object): + def test_zip(self): try: import zipfile diff --git a/pandas/io/tests/parser/converters.py b/pandas/io/tests/parser/converters.py index 68231d67534ee..2ceaff9291e7e 100644 --- a/pandas/io/tests/parser/converters.py +++ b/pandas/io/tests/parser/converters.py @@ -19,6 +19,7 @@ class ConverterTests(object): + def test_converters_type_must_be_dict(self): data = """index,A,B,C,D foo,2,3,4,5 diff --git a/pandas/io/tests/parser/dtypes.py b/pandas/io/tests/parser/dtypes.py index abcd14e9499cb..fa95c18c4d7a9 100644 --- a/pandas/io/tests/parser/dtypes.py +++ b/pandas/io/tests/parser/dtypes.py @@ -16,6 +16,7 @@ class DtypeTests(object): + def test_passing_dtype(self): # see gh-6607 df = DataFrame(np.random.rand(5, 2).round(4), columns=list( diff --git a/pandas/io/tests/parser/parse_dates.py b/pandas/io/tests/parser/parse_dates.py index e4af1ff70a498..ad3d5f2382a49 100644 --- a/pandas/io/tests/parser/parse_dates.py +++ b/pandas/io/tests/parser/parse_dates.py @@ -25,6 +25,7 @@ class ParseDatesTests(object): + def test_separator_date_conflict(self): # Regression test for gh-4678: make sure thousands separator and # date parsing do not conflict. diff --git a/pandas/io/tests/parser/python_parser_only.py b/pandas/io/tests/parser/python_parser_only.py index ad62aaa275127..283ff366b5efd 100644 --- a/pandas/io/tests/parser/python_parser_only.py +++ b/pandas/io/tests/parser/python_parser_only.py @@ -18,6 +18,7 @@ class PythonParserTests(object): + def test_negative_skipfooter_raises(self): text = """#foo,a,b,c #foo,a,b,c diff --git a/pandas/io/tests/parser/test_parsers.py b/pandas/io/tests/parser/test_parsers.py index 93b5fdcffed4c..2ae557a7d57db 100644 --- a/pandas/io/tests/parser/test_parsers.py +++ b/pandas/io/tests/parser/test_parsers.py @@ -32,6 +32,7 @@ class BaseParser(CommentTests, CompressionTests, ParseDatesTests, ParserTests, SkipRowsTests, UsecolsTests, QuotingTests, DtypeTests): + def read_csv(self, *args, **kwargs): raise NotImplementedError diff --git a/pandas/io/tests/parser/test_unsupported.py b/pandas/io/tests/parser/test_unsupported.py index e941c9186cd6a..999db47cf2eaf 100644 --- a/pandas/io/tests/parser/test_unsupported.py +++ b/pandas/io/tests/parser/test_unsupported.py @@ -18,6 +18,7 @@ class TestUnsupportedFeatures(tm.TestCase): + def test_mangle_dupe_cols_false(self): # see gh-12935 data = 'a b c\n1 2 3' @@ -111,6 +112,7 @@ def test_python_engine(self): class TestDeprecatedFeatures(tm.TestCase): + def test_deprecated_args(self): data = '1,2,3' diff --git a/pandas/io/tests/test_clipboard.py b/pandas/io/tests/test_clipboard.py index 93d14077aeacf..98a4152754b55 100644 --- a/pandas/io/tests/test_clipboard.py +++ b/pandas/io/tests/test_clipboard.py @@ -54,7 +54,7 @@ def setUpClass(cls): 'es': 'en español'.split()}) # unicode round trip test for GH 13747, GH 12529 cls.data['utf8'] = pd.DataFrame({'a': ['µasd', 'Ωœ∑´'], - 'b': ['øπ∆˚¬', 'œ∑´®']}) + 'b': ['øπ∆˚¬', 'œ∑´®']}) cls.data_types = list(cls.data.keys()) @classmethod diff --git a/pandas/io/tests/test_feather.py b/pandas/io/tests/test_feather.py index dcb057ec30004..218175e5ef527 100644 --- a/pandas/io/tests/test_feather.py +++ b/pandas/io/tests/test_feather.py @@ -18,7 +18,6 @@ class TestFeather(tm.TestCase): - _multiprocess_can_split_ = True def setUp(self): pass diff --git a/pandas/io/tests/test_gbq.py b/pandas/io/tests/test_gbq.py index ac481a44de5e8..0507f0d89661c 100644 --- a/pandas/io/tests/test_gbq.py +++ b/pandas/io/tests/test_gbq.py @@ -294,6 +294,7 @@ def test_get_application_default_credentials_returns_credentials(self): class TestGBQConnectorServiceAccountKeyPathIntegration(tm.TestCase): + def setUp(self): _setup_common() @@ -325,6 +326,7 @@ def test_should_be_able_to_get_results_from_query(self): class TestGBQConnectorServiceAccountKeyContentsIntegration(tm.TestCase): + def setUp(self): _setup_common() @@ -356,6 +358,7 @@ def test_should_be_able_to_get_results_from_query(self): class GBQUnitTests(tm.TestCase): + def setUp(self): _setup_common() diff --git a/pandas/io/tests/test_packers.py b/pandas/io/tests/test_packers.py index 6b368bb2bb5ce..8a0cfb92bd3c0 100644 --- a/pandas/io/tests/test_packers.py +++ b/pandas/io/tests/test_packers.py @@ -40,8 +40,6 @@ else: _ZLIB_INSTALLED = True -_multiprocess_can_split_ = False - def check_arbitrary(a, b): @@ -870,7 +868,7 @@ def read_msgpacks(self, version): for f in os.listdir(pth): # GH12142 0.17 files packed in P2 can't be read in P3 if (compat.PY3 and version.startswith('0.17.') and - f.split('.')[-4][-1] == '2'): + f.split('.')[-4][-1] == '2'): continue vf = os.path.join(pth, f) try: diff --git a/pandas/io/tests/test_pickle.py b/pandas/io/tests/test_pickle.py index b5c316b326b8d..73a9173e85906 100644 --- a/pandas/io/tests/test_pickle.py +++ b/pandas/io/tests/test_pickle.py @@ -30,7 +30,6 @@ class TestPickle(): http://stackoverflow.com/questions/6689537/ nose-test-generators-inside-class """ - _multiprocess_can_split_ = True def setUp(self): from pandas.io.tests.generate_legacy_storage_files import ( diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py index f4f03856f94e2..501e744ad308c 100644 --- a/pandas/io/tests/test_pytables.py +++ b/pandas/io/tests/test_pytables.py @@ -50,7 +50,6 @@ _default_compressor = ('blosc' if LooseVersion(tables.__version__) >= '2.2' else 'zlib') -_multiprocess_can_split_ = False # testing on windows/py3 seems to fault # for using compression diff --git a/pandas/msgpack/exceptions.py b/pandas/msgpack/exceptions.py index 40f5a8af8f583..ae0f74a6700bd 100644 --- a/pandas/msgpack/exceptions.py +++ b/pandas/msgpack/exceptions.py @@ -15,6 +15,7 @@ class UnpackValueError(UnpackException, ValueError): class ExtraData(ValueError): + def __init__(self, unpacked, extra): self.unpacked = unpacked self.extra = extra diff --git a/pandas/sparse/array.py b/pandas/sparse/array.py index da13726e88a14..c65e0dd5c9f7b 100644 --- a/pandas/sparse/array.py +++ b/pandas/sparse/array.py @@ -239,7 +239,7 @@ def _simple_new(cls, data, sp_index, fill_value): fill_value = na_value_for_dtype(data.dtype) if (is_integer_dtype(data) and is_float(fill_value) and - sp_index.ngaps > 0): + sp_index.ngaps > 0): # if float fill_value is being included in dense repr, # convert values to float data = data.astype(float) @@ -405,7 +405,6 @@ def __iter__(self): yield self._get_val_at(i) def __getitem__(self, key): - """ """ diff --git a/pandas/sparse/series.py b/pandas/sparse/series.py index d6bc892921c42..2d3a9effe6939 100644 --- a/pandas/sparse/series.py +++ b/pandas/sparse/series.py @@ -847,6 +847,7 @@ def from_coo(cls, A, dense_index=False): # backwards compatiblity class SparseTimeSeries(SparseSeries): + def __init__(self, *args, **kwargs): # deprecation TimeSeries, #10890 warnings.warn("SparseTimeSeries is deprecated. Please use " diff --git a/pandas/sparse/tests/test_arithmetics.py b/pandas/sparse/tests/test_arithmetics.py index f24244b38c42b..eb926082a7b7c 100644 --- a/pandas/sparse/tests/test_arithmetics.py +++ b/pandas/sparse/tests/test_arithmetics.py @@ -5,8 +5,6 @@ class TestSparseArrayArithmetics(tm.TestCase): - _multiprocess_can_split_ = True - _base = np.array _klass = pd.SparseArray diff --git a/pandas/sparse/tests/test_array.py b/pandas/sparse/tests/test_array.py index 55f292a8a231a..70aaea5b5b1f0 100644 --- a/pandas/sparse/tests/test_array.py +++ b/pandas/sparse/tests/test_array.py @@ -14,7 +14,6 @@ class TestSparseArray(tm.TestCase): - _multiprocess_can_split_ = True def setUp(self): self.arr_data = np.array([nan, nan, 1, 2, 3, nan, 4, 5, nan, 6]) @@ -655,6 +654,7 @@ def test_fillna_overlap(self): class TestSparseArrayAnalytics(tm.TestCase): + def test_sum(self): data = np.arange(10).astype(float) out = SparseArray(data).sum() diff --git a/pandas/sparse/tests/test_combine_concat.py b/pandas/sparse/tests/test_combine_concat.py index 5240d592810ad..81655daec6164 100644 --- a/pandas/sparse/tests/test_combine_concat.py +++ b/pandas/sparse/tests/test_combine_concat.py @@ -7,8 +7,6 @@ class TestSparseSeriesConcat(tm.TestCase): - _multiprocess_can_split_ = True - def test_concat(self): val1 = np.array([1, 2, np.nan, np.nan, 0, np.nan]) val2 = np.array([3, np.nan, 4, 0, 0]) @@ -126,8 +124,6 @@ def test_concat_sparse_dense(self): class TestSparseDataFrameConcat(tm.TestCase): - _multiprocess_can_split_ = True - def setUp(self): self.dense1 = pd.DataFrame({'A': [0., 1., 2., np.nan], diff --git a/pandas/sparse/tests/test_format.py b/pandas/sparse/tests/test_format.py index 377eaa20565a2..0c0e773d19bb9 100644 --- a/pandas/sparse/tests/test_format.py +++ b/pandas/sparse/tests/test_format.py @@ -15,8 +15,6 @@ class TestSparseSeriesFormatting(tm.TestCase): - _multiprocess_can_split_ = True - @property def dtype_format_for_platform(self): return '' if use_32bit_repr else ', dtype=int32' diff --git a/pandas/sparse/tests/test_frame.py b/pandas/sparse/tests/test_frame.py index e26c0ed1afe58..e3b865492c043 100644 --- a/pandas/sparse/tests/test_frame.py +++ b/pandas/sparse/tests/test_frame.py @@ -22,7 +22,6 @@ class TestSparseDataFrame(tm.TestCase, SharedWithSparse): klass = SparseDataFrame - _multiprocess_can_split_ = True def setUp(self): self.data = {'A': [nan, nan, nan, 0, 1, 2, 3, 4, 5, 6], @@ -1150,6 +1149,7 @@ def test_comparison_op_scalar(self): class TestSparseDataFrameAnalytics(tm.TestCase): + def setUp(self): self.data = {'A': [nan, nan, nan, 0, 1, 2, 3, 4, 5, 6], 'B': [0, 1, 2, nan, nan, nan, 3, 4, 5, 6], diff --git a/pandas/sparse/tests/test_groupby.py b/pandas/sparse/tests/test_groupby.py index 0cb33f4ea0a56..23bea94a2aef8 100644 --- a/pandas/sparse/tests/test_groupby.py +++ b/pandas/sparse/tests/test_groupby.py @@ -6,8 +6,6 @@ class TestSparseGroupBy(tm.TestCase): - _multiprocess_can_split_ = True - def setUp(self): self.dense = pd.DataFrame({'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'], diff --git a/pandas/sparse/tests/test_indexing.py b/pandas/sparse/tests/test_indexing.py index a634c34139186..c400b68c8a7d8 100644 --- a/pandas/sparse/tests/test_indexing.py +++ b/pandas/sparse/tests/test_indexing.py @@ -8,8 +8,6 @@ class TestSparseSeriesIndexing(tm.TestCase): - _multiprocess_can_split_ = True - def setUp(self): self.orig = pd.Series([1, np.nan, np.nan, 3, np.nan]) self.sparse = self.orig.to_sparse() @@ -431,8 +429,6 @@ def tests_indexing_with_sparse(self): class TestSparseSeriesMultiIndexing(TestSparseSeriesIndexing): - _multiprocess_can_split_ = True - def setUp(self): # Mi with duplicated values idx = pd.MultiIndex.from_tuples([('A', 0), ('A', 1), ('B', 0), @@ -544,8 +540,6 @@ def test_loc_slice(self): class TestSparseDataFrameIndexing(tm.TestCase): - _multiprocess_can_split_ = True - def test_getitem(self): orig = pd.DataFrame([[1, np.nan, np.nan], [2, 3, np.nan], @@ -908,6 +902,7 @@ def test_reindex_fill_value(self): class TestMultitype(tm.TestCase): + def setUp(self): self.cols = ['string', 'int', 'float', 'object'] diff --git a/pandas/sparse/tests/test_libsparse.py b/pandas/sparse/tests/test_libsparse.py index b3aa3368e9455..491005db2ae79 100644 --- a/pandas/sparse/tests/test_libsparse.py +++ b/pandas/sparse/tests/test_libsparse.py @@ -241,8 +241,6 @@ def test_intersect_identical(self): class TestSparseIndexCommon(tm.TestCase): - _multiprocess_can_split_ = True - def test_int_internal(self): idx = _make_index(4, np.array([2, 3], dtype=np.int32), kind='integer') self.assertIsInstance(idx, IntIndex) @@ -391,8 +389,6 @@ def _check(index): class TestBlockIndex(tm.TestCase): - _multiprocess_can_split_ = True - def test_block_internal(self): idx = _make_index(4, np.array([2, 3], dtype=np.int32), kind='block') self.assertIsInstance(idx, BlockIndex) @@ -478,8 +474,6 @@ def test_to_block_index(self): class TestIntIndex(tm.TestCase): - _multiprocess_can_split_ = True - def test_int_internal(self): idx = _make_index(4, np.array([2, 3], dtype=np.int32), kind='integer') self.assertIsInstance(idx, IntIndex) diff --git a/pandas/sparse/tests/test_list.py b/pandas/sparse/tests/test_list.py index 458681cdc1de0..8511cd5997368 100644 --- a/pandas/sparse/tests/test_list.py +++ b/pandas/sparse/tests/test_list.py @@ -10,8 +10,6 @@ class TestSparseList(unittest.TestCase): - _multiprocess_can_split_ = True - def setUp(self): self.na_data = np.array([nan, nan, 1, 2, 3, nan, 4, 5, nan, 6]) self.zero_data = np.array([0, 0, 1, 2, 3, 0, 4, 5, 0, 6]) diff --git a/pandas/sparse/tests/test_pivot.py b/pandas/sparse/tests/test_pivot.py index 482a99a96194f..4ff9f20093c67 100644 --- a/pandas/sparse/tests/test_pivot.py +++ b/pandas/sparse/tests/test_pivot.py @@ -5,8 +5,6 @@ class TestPivotTable(tm.TestCase): - _multiprocess_can_split_ = True - def setUp(self): self.dense = pd.DataFrame({'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'], diff --git a/pandas/sparse/tests/test_series.py b/pandas/sparse/tests/test_series.py index b34f5dd2cee9f..db6ae14b096d3 100644 --- a/pandas/sparse/tests/test_series.py +++ b/pandas/sparse/tests/test_series.py @@ -56,7 +56,6 @@ def _test_data2_zero(): class TestSparseSeries(tm.TestCase, SharedWithSparse): - _multiprocess_can_split_ = True def setUp(self): arr, index = _test_data1() @@ -941,6 +940,7 @@ def test_combine_first(self): class TestSparseHandlingMultiIndexes(tm.TestCase): + def setUp(self): miindex = pd.MultiIndex.from_product( [["x", "y"], ["10", "20"]], names=['row-foo', 'row-bar']) diff --git a/pandas/stats/fama_macbeth.py b/pandas/stats/fama_macbeth.py index f7d50e8e72a5c..d564f9cb6c425 100644 --- a/pandas/stats/fama_macbeth.py +++ b/pandas/stats/fama_macbeth.py @@ -9,6 +9,7 @@ # flake8: noqa + def fama_macbeth(**kwargs): """Runs Fama-MacBeth regression. diff --git a/pandas/stats/ols.py b/pandas/stats/ols.py index b533d255bd196..96ec70d59488a 100644 --- a/pandas/stats/ols.py +++ b/pandas/stats/ols.py @@ -24,6 +24,7 @@ _FP_ERR = 1e-8 + class OLS(StringMixin): """ Runs a full sample ordinary least squares regression. diff --git a/pandas/stats/tests/test_ols.py b/pandas/stats/tests/test_ols.py index 09fa21d58ea9d..b90c51366c86f 100644 --- a/pandas/stats/tests/test_ols.py +++ b/pandas/stats/tests/test_ols.py @@ -60,8 +60,6 @@ def _compare_moving_ols(model1, model2): class TestOLS(BaseTest): - _multiprocess_can_split_ = True - # TODO: Add tests for OLS y predict # TODO: Right now we just check for consistency between full-sample and # rolling/expanding results of the panel OLS. We should also cross-check @@ -262,8 +260,6 @@ def test_ols_object_dtype(self): class TestOLSMisc(tm.TestCase): - _multiprocess_can_split_ = True - """ For test coverage with faux data """ @@ -511,8 +507,6 @@ def test_columns_tuples_summary(self): class TestPanelOLS(BaseTest): - _multiprocess_can_split_ = True - FIELDS = ['beta', 'df', 'df_model', 'df_resid', 'f_stat', 'p_value', 'r2', 'r2_adj', 'rmse', 'std_err', 't_stat', 'var_beta'] @@ -894,8 +888,6 @@ def _period_slice(panelModel, i): class TestOLSFilter(tm.TestCase): - _multiprocess_can_split_ = True - def setUp(self): date_index = date_range(datetime(2009, 12, 11), periods=3, freq=offsets.BDay()) diff --git a/pandas/tests/formats/test_format.py b/pandas/tests/formats/test_format.py index 7a2c5f3b7f7c1..a9553d9ea10cb 100644 --- a/pandas/tests/formats/test_format.py +++ b/pandas/tests/formats/test_format.py @@ -113,7 +113,6 @@ def has_expanded_repr(df): class TestDataFrameFormatting(tm.TestCase): - _multiprocess_can_split_ = True def setUp(self): self.warn_filters = warnings.filters @@ -762,14 +761,15 @@ def test_truncate_with_different_dtypes(self): # 11594 import datetime - s = Series([datetime.datetime(2012, 1, 1)]*10 + [datetime.datetime(1012,1,2)] + [datetime.datetime(2012, 1, 3)]*10) + s = Series([datetime.datetime(2012, 1, 1)] * 10 + + [datetime.datetime(1012, 1, 2)] + [datetime.datetime(2012, 1, 3)] * 10) with pd.option_context('display.max_rows', 8): result = str(s) self.assertTrue('object' in result) # 12045 - df = DataFrame({'text': ['some words'] + [None]*9}) + df = DataFrame({'text': ['some words'] + [None] * 9}) with pd.option_context('display.max_rows', 8, 'display.max_columns', 3): result = str(df) @@ -779,7 +779,8 @@ def test_truncate_with_different_dtypes(self): def test_datetimelike_frame(self): # GH 12211 - df = DataFrame({'date' : [pd.Timestamp('20130101').tz_localize('UTC')] + [pd.NaT]*5}) + df = DataFrame( + {'date': [pd.Timestamp('20130101').tz_localize('UTC')] + [pd.NaT] * 5}) with option_context("display.max_rows", 5): result = str(df) @@ -1219,8 +1220,8 @@ def test_to_html_multiindex_odd_even_truncate(self): mi = MultiIndex.from_product([[100, 200, 300], [10, 20, 30], [1, 2, 3, 4, 5, 6, 7]], - names=['a','b','c']) - df = DataFrame({'n' : range(len(mi))}, index = mi) + names=['a', 'b', 'c']) + df = DataFrame({'n': range(len(mi))}, index=mi) result = df.to_html(max_rows=60) expected = """\ @@ -3451,8 +3452,8 @@ def test_to_latex_with_formatters(self): 'float': [1.0, 2.0, 3.0], 'object': [(1, 2), True, False], 'datetime64': [datetime(2016, 1, 1), - datetime(2016, 2, 5), - datetime(2016, 3, 3)]}) + datetime(2016, 2, 5), + datetime(2016, 3, 3)]}) formatters = {'int': lambda x: '0x%x' % x, 'float': lambda x: '[% 4.1f]' % x, @@ -3896,7 +3897,7 @@ def test_to_csv_date_format(self): def test_to_csv_multi_index(self): # see gh-6618 - df = DataFrame([1], columns=pd.MultiIndex.from_arrays([[1],[2]])) + df = DataFrame([1], columns=pd.MultiIndex.from_arrays([[1], [2]])) exp = ",1\n,2\n0,1\n" self.assertEqual(df.to_csv(), exp) @@ -3904,8 +3905,8 @@ def test_to_csv_multi_index(self): exp = "1\n2\n1\n" self.assertEqual(df.to_csv(index=False), exp) - df = DataFrame([1], columns=pd.MultiIndex.from_arrays([[1],[2]]), - index=pd.MultiIndex.from_arrays([[1],[2]])) + df = DataFrame([1], columns=pd.MultiIndex.from_arrays([[1], [2]]), + index=pd.MultiIndex.from_arrays([[1], [2]])) exp = ",,1\n,,2\n1,2,1\n" self.assertEqual(df.to_csv(), exp) @@ -3913,7 +3914,8 @@ def test_to_csv_multi_index(self): exp = "1\n2\n1\n" self.assertEqual(df.to_csv(index=False), exp) - df = DataFrame([1], columns=pd.MultiIndex.from_arrays([['foo'],['bar']])) + df = DataFrame( + [1], columns=pd.MultiIndex.from_arrays([['foo'], ['bar']])) exp = ",foo\n,bar\n0,1\n" self.assertEqual(df.to_csv(), exp) @@ -3938,8 +3940,6 @@ def test_period(self): class TestSeriesFormatting(tm.TestCase): - _multiprocess_can_split_ = True - def setUp(self): self.ts = tm.makeTimeSeries() @@ -4452,7 +4452,6 @@ def test_to_string_header(self): class TestEngFormatter(tm.TestCase): - _multiprocess_can_split_ = True def test_eng_float_formatter(self): df = DataFrame({'A': [1.41, 141., 14100, 1410000.]}) @@ -4605,9 +4604,9 @@ def test_nan(self): result = formatter(np.nan) self.assertEqual(result, u('NaN')) - df = pd.DataFrame({'a':[1.5, 10.3, 20.5], - 'b':[50.3, 60.67, 70.12], - 'c':[100.2, 101.33, 120.33]}) + df = pd.DataFrame({'a': [1.5, 10.3, 20.5], + 'b': [50.3, 60.67, 70.12], + 'c': [100.2, 101.33, 120.33]}) pt = df.pivot_table(values='a', index='b', columns='c') fmt.set_eng_float_format(accuracy=1) result = pt.to_string() diff --git a/pandas/tests/formats/test_printing.py b/pandas/tests/formats/test_printing.py index d1eb1faecc401..1e6794c1c9c69 100644 --- a/pandas/tests/formats/test_printing.py +++ b/pandas/tests/formats/test_printing.py @@ -5,8 +5,6 @@ import pandas.util.testing as tm import pandas.core.config as cf -_multiprocess_can_split_ = True - def test_adjoin(): data = [['a', 'b', 'c'], ['dd', 'ee', 'ff'], ['ggg', 'hhh', 'iii']] diff --git a/pandas/tests/formats/test_style.py b/pandas/tests/formats/test_style.py index 2fec04b9c1aa3..eaa209178b2e9 100644 --- a/pandas/tests/formats/test_style.py +++ b/pandas/tests/formats/test_style.py @@ -660,7 +660,7 @@ def test_mi_sparse_disabled(self): with pd.option_context('display.multi_sparse', False): df = pd.DataFrame({'A': [1, 2]}, index=pd.MultiIndex.from_arrays([['a', 'a'], - [0, 1]])) + [0, 1]])) result = df.style._translate() body = result['body'] for row in body: diff --git a/pandas/tests/frame/test_alter_axes.py b/pandas/tests/frame/test_alter_axes.py index cab627dec63cb..e84bb6407fafc 100644 --- a/pandas/tests/frame/test_alter_axes.py +++ b/pandas/tests/frame/test_alter_axes.py @@ -22,8 +22,6 @@ class TestDataFrameAlterAxes(tm.TestCase, TestData): - _multiprocess_can_split_ = True - def test_set_index(self): idx = Index(np.arange(len(self.mixed_frame))) diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index 0dbb78ec89b2e..a55d2cfb2fb2b 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -25,8 +25,6 @@ class TestDataFrameAnalytics(tm.TestCase, TestData): - _multiprocess_can_split_ = True - # ---------------------------------------------------------------------= # Correlation and covariance diff --git a/pandas/tests/frame/test_apply.py b/pandas/tests/frame/test_apply.py index 19fa98afd2163..30fde4b5b78d8 100644 --- a/pandas/tests/frame/test_apply.py +++ b/pandas/tests/frame/test_apply.py @@ -19,8 +19,6 @@ class TestDataFrameApply(tm.TestCase, TestData): - _multiprocess_can_split_ = True - def test_apply(self): with np.errstate(all='ignore'): # ufunc diff --git a/pandas/tests/frame/test_asof.py b/pandas/tests/frame/test_asof.py index 323960d54a42c..8bb26d3d7474c 100644 --- a/pandas/tests/frame/test_asof.py +++ b/pandas/tests/frame/test_asof.py @@ -11,7 +11,6 @@ class TestFrameAsof(TestData, tm.TestCase): - _multiprocess_can_split_ = True def setUp(self): self.N = N = 50 diff --git a/pandas/tests/frame/test_axis_select_reindex.py b/pandas/tests/frame/test_axis_select_reindex.py index ff6215531fc64..839ceb5368240 100644 --- a/pandas/tests/frame/test_axis_select_reindex.py +++ b/pandas/tests/frame/test_axis_select_reindex.py @@ -26,8 +26,6 @@ class TestDataFrameSelectReindex(tm.TestCase, TestData): # These are specific reindex-based tests; other indexing tests should go in # test_indexing - _multiprocess_can_split_ = True - def test_drop_names(self): df = DataFrame([[1, 2, 3], [3, 4, 5], [5, 6, 7]], index=['a', 'b', 'c'], diff --git a/pandas/tests/frame/test_block_internals.py b/pandas/tests/frame/test_block_internals.py index 33550670720c3..7b64dea8c102d 100644 --- a/pandas/tests/frame/test_block_internals.py +++ b/pandas/tests/frame/test_block_internals.py @@ -29,8 +29,6 @@ class TestDataFrameBlockInternals(tm.TestCase, TestData): - _multiprocess_can_split_ = True - def test_cast_internals(self): casted = DataFrame(self.frame._data, dtype=int) expected = DataFrame(self.frame._series, dtype=int) diff --git a/pandas/tests/frame/test_combine_concat.py b/pandas/tests/frame/test_combine_concat.py index 1167662b69375..eed4d6261d6e8 100644 --- a/pandas/tests/frame/test_combine_concat.py +++ b/pandas/tests/frame/test_combine_concat.py @@ -22,8 +22,6 @@ class TestDataFrameConcatCommon(tm.TestCase, TestData): - _multiprocess_can_split_ = True - def test_concat_multiple_frames_dtypes(self): # GH 2759 @@ -427,8 +425,6 @@ def test_concat_axis_parameter(self): class TestDataFrameCombineFirst(tm.TestCase, TestData): - _multiprocess_can_split_ = True - def test_combine_first_mixed(self): a = Series(['a', 'b'], index=lrange(2)) b = Series(lrange(2), index=lrange(2)) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 1676c57a274cd..66a235e1260bd 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -36,8 +36,6 @@ class TestDataFrameConstructors(tm.TestCase, TestData): - _multiprocess_can_split_ = True - def test_constructor(self): df = DataFrame() self.assertEqual(len(df.index), 0) @@ -1886,8 +1884,6 @@ def test_from_records_len0_with_columns(self): class TestDataFrameConstructorWithDatetimeTZ(tm.TestCase, TestData): - _multiprocess_can_split_ = True - def test_from_dict(self): # 8260 diff --git a/pandas/tests/frame/test_convert_to.py b/pandas/tests/frame/test_convert_to.py index 53083a602e183..1bc8313726d0c 100644 --- a/pandas/tests/frame/test_convert_to.py +++ b/pandas/tests/frame/test_convert_to.py @@ -16,8 +16,6 @@ class TestDataFrameConvertTo(tm.TestCase, TestData): - _multiprocess_can_split_ = True - def test_to_dict(self): test_data = { 'A': {'1': 1, '2': 2}, diff --git a/pandas/tests/frame/test_dtypes.py b/pandas/tests/frame/test_dtypes.py index 798982bcbdedf..f7d2c1a654cd5 100644 --- a/pandas/tests/frame/test_dtypes.py +++ b/pandas/tests/frame/test_dtypes.py @@ -18,8 +18,6 @@ class TestDataFrameDataTypes(tm.TestCase, TestData): - _multiprocess_can_split_ = True - def test_concat_empty_dataframe_dtypes(self): df = DataFrame(columns=list("abc")) df['a'] = df['a'].astype(np.bool_) @@ -539,8 +537,6 @@ def test_arg_for_errors_in_astype(self): class TestDataFrameDatetimeWithTZ(tm.TestCase, TestData): - _multiprocess_can_split_ = True - def test_interleave(self): # interleave with object diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py index f0e6ab4c17915..c06faa75ed346 100644 --- a/pandas/tests/frame/test_indexing.py +++ b/pandas/tests/frame/test_indexing.py @@ -37,8 +37,6 @@ class TestDataFrameIndexing(tm.TestCase, TestData): - _multiprocess_can_split_ = True - def test_getitem(self): # slicing sl = self.frame[:20] @@ -2841,8 +2839,6 @@ def test_type_error_multiindex(self): class TestDataFrameIndexingDatetimeWithTZ(tm.TestCase, TestData): - _multiprocess_can_split_ = True - def setUp(self): self.idx = Index(date_range('20130101', periods=3, tz='US/Eastern'), name='foo') @@ -2902,8 +2898,6 @@ def test_transpose(self): class TestDataFrameIndexingUInt64(tm.TestCase, TestData): - _multiprocess_can_split_ = True - def setUp(self): self.ir = Index(np.arange(3), dtype=np.uint64) self.idx = Index([2**63, 2**63 + 5, 2**63 + 10], name='foo') diff --git a/pandas/tests/frame/test_misc_api.py b/pandas/tests/frame/test_misc_api.py index 2fc14d9e4d123..674202980807a 100644 --- a/pandas/tests/frame/test_misc_api.py +++ b/pandas/tests/frame/test_misc_api.py @@ -27,8 +27,6 @@ class SharedWithSparse(object): - _multiprocess_can_split_ = True - def test_copy_index_name_checking(self): # don't want to be able to modify the index stored elsewhere after # making a copy @@ -159,8 +157,6 @@ class TestDataFrameMisc(tm.TestCase, SharedWithSparse, TestData): klass = DataFrame - _multiprocess_can_split_ = True - def test_get_axis(self): f = self.frame self.assertEqual(f._get_axis_number(0), 0) diff --git a/pandas/tests/frame/test_missing.py b/pandas/tests/frame/test_missing.py index 8c25f71c00684..ef800f0dface3 100644 --- a/pandas/tests/frame/test_missing.py +++ b/pandas/tests/frame/test_missing.py @@ -29,8 +29,6 @@ def _skip_if_no_pchip(): class TestDataFrameMissingData(tm.TestCase, TestData): - _multiprocess_can_split_ = True - def test_dropEmptyRows(self): N = len(self.frame.index) mat = random.randn(N) diff --git a/pandas/tests/frame/test_mutate_columns.py b/pandas/tests/frame/test_mutate_columns.py index 5beab1565e538..6b4c56747c981 100644 --- a/pandas/tests/frame/test_mutate_columns.py +++ b/pandas/tests/frame/test_mutate_columns.py @@ -21,8 +21,6 @@ class TestDataFrameMutateColumns(tm.TestCase, TestData): - _multiprocess_can_split_ = True - def test_assign(self): df = DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}) original = df.copy() diff --git a/pandas/tests/frame/test_nonunique_indexes.py b/pandas/tests/frame/test_nonunique_indexes.py index 835c18ffc6081..4ad88a12a2625 100644 --- a/pandas/tests/frame/test_nonunique_indexes.py +++ b/pandas/tests/frame/test_nonunique_indexes.py @@ -19,8 +19,6 @@ class TestDataFrameNonuniqueIndexes(tm.TestCase, TestData): - _multiprocess_can_split_ = True - def test_column_dups_operations(self): def check(result, expected=None): diff --git a/pandas/tests/frame/test_operators.py b/pandas/tests/frame/test_operators.py index 15f98abe1445d..ec73689088035 100644 --- a/pandas/tests/frame/test_operators.py +++ b/pandas/tests/frame/test_operators.py @@ -31,8 +31,6 @@ class TestDataFrameOperators(tm.TestCase, TestData): - _multiprocess_can_split_ = True - def test_operators(self): garbage = random.random(4) colSeries = Series(garbage, index=np.array(self.frame.columns)) diff --git a/pandas/tests/frame/test_quantile.py b/pandas/tests/frame/test_quantile.py index 22414a6ba8a53..400ead788aa7c 100644 --- a/pandas/tests/frame/test_quantile.py +++ b/pandas/tests/frame/test_quantile.py @@ -21,8 +21,6 @@ class TestDataFrameQuantile(tm.TestCase, TestData): - _multiprocess_can_split_ = True - def test_quantile(self): from numpy import percentile diff --git a/pandas/tests/frame/test_query_eval.py b/pandas/tests/frame/test_query_eval.py index a9a90a6f5cd40..aed02b7323f85 100644 --- a/pandas/tests/frame/test_query_eval.py +++ b/pandas/tests/frame/test_query_eval.py @@ -90,8 +90,6 @@ def test_query_numexpr(self): class TestDataFrameEval(tm.TestCase, TestData): - _multiprocess_can_split_ = True - def test_ops(self): # tst ops and reversed ops in evaluation @@ -168,8 +166,6 @@ def test_eval_resolvers_as_list(self): class TestDataFrameQueryWithMultiIndex(tm.TestCase): - _multiprocess_can_split_ = True - def check_query_with_named_multiindex(self, parser, engine): tm.skip_if_no_ne(engine) a = np.random.choice(['red', 'green'], size=10) diff --git a/pandas/tests/frame/test_replace.py b/pandas/tests/frame/test_replace.py index f46215105b375..8b50036cd50f8 100644 --- a/pandas/tests/frame/test_replace.py +++ b/pandas/tests/frame/test_replace.py @@ -23,8 +23,6 @@ class TestDataFrameReplace(tm.TestCase, TestData): - _multiprocess_can_split_ = True - def test_replace_inplace(self): self.tsframe['A'][:5] = nan self.tsframe['A'][-5:] = nan diff --git a/pandas/tests/frame/test_repr_info.py b/pandas/tests/frame/test_repr_info.py index 12cd62f8b4cc0..2df297d03bcdf 100644 --- a/pandas/tests/frame/test_repr_info.py +++ b/pandas/tests/frame/test_repr_info.py @@ -25,8 +25,6 @@ class TestDataFrameReprInfoEtc(tm.TestCase, TestData): - _multiprocess_can_split_ = True - def test_repr_empty(self): # empty foo = repr(self.empty) # noqa diff --git a/pandas/tests/frame/test_reshape.py b/pandas/tests/frame/test_reshape.py index 705270b695b77..1890b33e3dbaa 100644 --- a/pandas/tests/frame/test_reshape.py +++ b/pandas/tests/frame/test_reshape.py @@ -25,8 +25,6 @@ class TestDataFrameReshape(tm.TestCase, TestData): - _multiprocess_can_split_ = True - def test_pivot(self): data = { 'index': ['A', 'B', 'C', 'C', 'B', 'A'], diff --git a/pandas/tests/frame/test_sorting.py b/pandas/tests/frame/test_sorting.py index bbd8dd9b48b5c..7779afdc47b48 100644 --- a/pandas/tests/frame/test_sorting.py +++ b/pandas/tests/frame/test_sorting.py @@ -19,8 +19,6 @@ class TestDataFrameSorting(tm.TestCase, TestData): - _multiprocess_can_split_ = True - def test_sort_index(self): # GH13496 diff --git a/pandas/tests/frame/test_subclass.py b/pandas/tests/frame/test_subclass.py index 8bd6d3ba54371..9052a16bf973c 100644 --- a/pandas/tests/frame/test_subclass.py +++ b/pandas/tests/frame/test_subclass.py @@ -13,8 +13,6 @@ class TestDataFrameSubclassing(tm.TestCase, TestData): - _multiprocess_can_split_ = True - def test_frame_subclassing_and_slicing(self): # Subclass frame and ensure it returns the right class on slicing it # In reference to PR 9632 diff --git a/pandas/tests/frame/test_timeseries.py b/pandas/tests/frame/test_timeseries.py index 55848847f2266..862f76b4ecc05 100644 --- a/pandas/tests/frame/test_timeseries.py +++ b/pandas/tests/frame/test_timeseries.py @@ -27,8 +27,6 @@ class TestDataFrameTimeSeriesMethods(tm.TestCase, TestData): - _multiprocess_can_split_ = True - def test_diff(self): the_diff = self.tsframe.diff(1) @@ -539,13 +537,13 @@ def test_datetime_assignment_with_NaT_and_diff_time_units(self): result = pd.Series(data_ns).to_frame() result['new'] = data_ns expected = pd.DataFrame({0: [1, None], - 'new': [1, None]}, dtype='datetime64[ns]') + 'new': [1, None]}, dtype='datetime64[ns]') tm.assert_frame_equal(result, expected) # OutOfBoundsDatetime error shouldn't occur data_s = np.array([1, 'nat'], dtype='datetime64[s]') result['new'] = data_s expected = pd.DataFrame({0: [1, None], - 'new': [1e9, None]}, dtype='datetime64[ns]') + 'new': [1e9, None]}, dtype='datetime64[ns]') tm.assert_frame_equal(result, expected) def test_frame_to_period(self): diff --git a/pandas/tests/frame/test_to_csv.py b/pandas/tests/frame/test_to_csv.py index 5c47b0357b4f6..471fc536a90f6 100644 --- a/pandas/tests/frame/test_to_csv.py +++ b/pandas/tests/frame/test_to_csv.py @@ -31,8 +31,6 @@ class TestDataFrameToCSV(tm.TestCase, TestData): - _multiprocess_can_split_ = True - def test_to_csv_from_csv1(self): with ensure_clean('__tmp_to_csv_from_csv1__') as path: diff --git a/pandas/tests/groupby/test_aggregate.py b/pandas/tests/groupby/test_aggregate.py index 5f680a6876873..00ddd293f6014 100644 --- a/pandas/tests/groupby/test_aggregate.py +++ b/pandas/tests/groupby/test_aggregate.py @@ -27,8 +27,6 @@ class TestGroupByAggregate(tm.TestCase): - _multiprocess_can_split_ = True - def setUp(self): self.ts = tm.makeTimeSeries() diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py index 82ec1832be961..605b327208a03 100644 --- a/pandas/tests/groupby/test_categorical.py +++ b/pandas/tests/groupby/test_categorical.py @@ -23,8 +23,6 @@ class TestGroupByCategorical(tm.TestCase): - _multiprocess_can_split_ = True - def setUp(self): self.ts = tm.makeTimeSeries() diff --git a/pandas/tests/groupby/test_filters.py b/pandas/tests/groupby/test_filters.py index 663fbd04e7e5a..1640858802047 100644 --- a/pandas/tests/groupby/test_filters.py +++ b/pandas/tests/groupby/test_filters.py @@ -24,8 +24,6 @@ class TestGroupByFilter(tm.TestCase): - _multiprocess_can_split_ = True - def setUp(self): self.ts = tm.makeTimeSeries() diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 01c81bd7904bd..df4707fcef3f0 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -36,8 +36,6 @@ class TestGroupBy(tm.TestCase): - _multiprocess_can_split_ = True - def setUp(self): self.ts = tm.makeTimeSeries() @@ -5908,8 +5906,8 @@ def test_group_shift_with_null_key(self): g = df.groupby(["A", "B"]) expected = DataFrame([(i + 12 if i % 3 and i < n_rows - 12 - else np.nan) - for i in range(n_rows)], dtype=float, + else np.nan) + for i in range(n_rows)], dtype=float, columns=["Z"], index=None) result = g.shift(-1) diff --git a/pandas/tests/indexes/datetimes/test_astype.py b/pandas/tests/indexes/datetimes/test_astype.py index edb044a3cb2d7..c9a695ee8db3b 100644 --- a/pandas/tests/indexes/datetimes/test_astype.py +++ b/pandas/tests/indexes/datetimes/test_astype.py @@ -8,7 +8,6 @@ class TestDatetimeIndex(tm.TestCase): - _multiprocess_can_split_ = True def test_astype(self): # GH 13149, GH 13209 @@ -185,7 +184,6 @@ def _check_rng(rng): class TestToPeriod(tm.TestCase): - _multiprocess_can_split_ = True def setUp(self): data = [Timestamp('2007-01-01 10:11:12.123456Z'), diff --git a/pandas/tests/indexes/datetimes/test_construction.py b/pandas/tests/indexes/datetimes/test_construction.py index e54ebe3d93bc6..772d76305cff2 100644 --- a/pandas/tests/indexes/datetimes/test_construction.py +++ b/pandas/tests/indexes/datetimes/test_construction.py @@ -10,7 +10,6 @@ class TestDatetimeIndex(tm.TestCase): - _multiprocess_can_split_ = True def test_construction_with_alt(self): @@ -428,7 +427,6 @@ def test_000constructor_resolution(self): class TestTimeSeries(tm.TestCase): - _multiprocess_can_split_ = True def test_dti_constructor_preserve_dti_freq(self): rng = date_range('1/1/2000', '1/2/2000', freq='5min') diff --git a/pandas/tests/indexes/datetimes/test_date_range.py b/pandas/tests/indexes/datetimes/test_date_range.py index b2161aa5c75c6..9d5f397329c76 100644 --- a/pandas/tests/indexes/datetimes/test_date_range.py +++ b/pandas/tests/indexes/datetimes/test_date_range.py @@ -9,7 +9,6 @@ class TestTimeSeries(TestData, tm.TestCase): - _multiprocess_can_split_ = True def test_date_range_gen_error(self): rng = date_range('1/1/2000 00:00', '1/1/2000 00:18', freq='5min') diff --git a/pandas/tests/indexes/datetimes/test_datetime.py b/pandas/tests/indexes/datetimes/test_datetime.py index 628cb9df94e39..2c87c48bcda11 100644 --- a/pandas/tests/indexes/datetimes/test_datetime.py +++ b/pandas/tests/indexes/datetimes/test_datetime.py @@ -14,7 +14,6 @@ class TestDatetimeIndex(tm.TestCase): - _multiprocess_can_split_ = True def test_get_loc(self): idx = pd.date_range('2000-01-01', periods=3) diff --git a/pandas/tests/indexes/datetimes/test_datetimelike.py b/pandas/tests/indexes/datetimes/test_datetimelike.py index eea08febc86e6..2b254bc8be931 100644 --- a/pandas/tests/indexes/datetimes/test_datetimelike.py +++ b/pandas/tests/indexes/datetimes/test_datetimelike.py @@ -10,7 +10,6 @@ class TestDatetimeIndex(DatetimeLike, tm.TestCase): _holder = DatetimeIndex - _multiprocess_can_split_ = True def setUp(self): self.indices = dict(index=tm.makeDateIndex(10)) diff --git a/pandas/tests/indexes/datetimes/test_indexing.py b/pandas/tests/indexes/datetimes/test_indexing.py index 5b6bcffe71856..23271a8d45499 100644 --- a/pandas/tests/indexes/datetimes/test_indexing.py +++ b/pandas/tests/indexes/datetimes/test_indexing.py @@ -7,7 +7,6 @@ class TestDatetimeIndex(tm.TestCase): - _multiprocess_can_split_ = True def test_where_other(self): diff --git a/pandas/tests/indexes/datetimes/test_misc.py b/pandas/tests/indexes/datetimes/test_misc.py index dda2785d2b0ae..6b0191edbda5a 100644 --- a/pandas/tests/indexes/datetimes/test_misc.py +++ b/pandas/tests/indexes/datetimes/test_misc.py @@ -53,7 +53,6 @@ def test_second(self): class TestTimeSeries(tm.TestCase): - _multiprocess_can_split_ = True def test_pass_datetimeindex_to_index(self): # Bugs in #1396 diff --git a/pandas/tests/indexes/datetimes/test_missing.py b/pandas/tests/indexes/datetimes/test_missing.py index 5c408d5300cdc..8f3752227b6d0 100644 --- a/pandas/tests/indexes/datetimes/test_missing.py +++ b/pandas/tests/indexes/datetimes/test_missing.py @@ -3,7 +3,6 @@ class TestDatetimeIndex(tm.TestCase): - _multiprocess_can_split_ = True def test_fillna_datetime64(self): # GH 11343 diff --git a/pandas/tests/indexes/datetimes/test_ops.py b/pandas/tests/indexes/datetimes/test_ops.py index c25cd6a3fa90e..a46980a0f742a 100644 --- a/pandas/tests/indexes/datetimes/test_ops.py +++ b/pandas/tests/indexes/datetimes/test_ops.py @@ -955,7 +955,6 @@ def test_second(self): class TestDatetimeIndex(tm.TestCase): - _multiprocess_can_split_ = True # GH 10699 def test_datetime64_with_DateOffset(self): diff --git a/pandas/tests/indexes/datetimes/test_setops.py b/pandas/tests/indexes/datetimes/test_setops.py index 229ae803aa2ff..7777de869bb20 100644 --- a/pandas/tests/indexes/datetimes/test_setops.py +++ b/pandas/tests/indexes/datetimes/test_setops.py @@ -9,7 +9,6 @@ class TestDatetimeIndex(tm.TestCase): - _multiprocess_can_split_ = True def test_union(self): i1 = Int64Index(np.arange(0, 20, 2)) diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index 42d135f634298..841d0be605058 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -168,7 +168,6 @@ def test_to_datetime_format_weeks(self): class TestToDatetime(tm.TestCase): - _multiprocess_can_split_ = True def test_to_datetime_dt64s(self): in_bound_dts = [ @@ -989,6 +988,7 @@ def test_to_datetime_iso8601_noleading_0s(self): class TestDaysInMonth(tm.TestCase): # tests for issue #10154 + def test_day_not_in_month_coerce(self): self.assertTrue(isnull(to_datetime('2015-02-29', errors='coerce'))) self.assertTrue(isnull(to_datetime('2015-02-29', format="%Y-%m-%d", diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index c574a4a1f01a7..2f5b98d145e57 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -31,7 +31,6 @@ class TestIndex(Base, tm.TestCase): _holder = Index - _multiprocess_can_split_ = True def setUp(self): self.indices = dict(unicodeIndex=tm.makeUnicodeIndex(100), @@ -1795,7 +1794,6 @@ class TestMixedIntIndex(Base, tm.TestCase): # (GH 13514) _holder = Index - _multiprocess_can_split_ = True def setUp(self): self.indices = dict(mixedIndex=Index([0, 'a', 1, 'b', 2, 'c'])) @@ -1993,7 +1991,7 @@ def test_dropna(self): idx = pd.TimedeltaIndex(['1 days', '2 days', '3 days']) tm.assert_index_equal(idx.dropna(), idx) nanidx = pd.TimedeltaIndex([pd.NaT, '1 days', '2 days', - '3 days', pd.NaT]) + '3 days', pd.NaT]) tm.assert_index_equal(nanidx.dropna(), idx) idx = pd.PeriodIndex(['2012-02', '2012-04', '2012-05'], freq='M') diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py index 708f424d9bad1..6b6885c082533 100644 --- a/pandas/tests/indexes/test_category.py +++ b/pandas/tests/indexes/test_category.py @@ -225,6 +225,7 @@ def test_map(self): # change categories dtype ci = pd.CategoricalIndex(list('ABABC'), categories=list('BAC'), ordered=False) + def f(x): return {'A': 10, 'B': 20, 'C': 30}.get(x) @@ -360,7 +361,8 @@ def test_reindexing(self): expected = oidx.get_indexer_non_unique(finder)[0] actual = ci.get_indexer(finder) - tm.assert_numpy_array_equal(expected.values, actual, check_dtype=False) + tm.assert_numpy_array_equal( + expected.values, actual, check_dtype=False) def test_reindex_dtype(self): c = CategoricalIndex(['a', 'b', 'c', 'a']) @@ -519,7 +521,7 @@ def test_ensure_copied_data(self): # GH12309 # Must be tested separately from other indexes because # self.value is not an ndarray - _base = lambda ar : ar if ar.base is None else ar.base + _base = lambda ar: ar if ar.base is None else ar.base for index in self.indices.values(): result = CategoricalIndex(index.values, copy=True) tm.assert_index_equal(index, result) diff --git a/pandas/tests/indexes/test_datetimelike.py b/pandas/tests/indexes/test_datetimelike.py index e5a4ced4ced4d..b212a7b75904c 100644 --- a/pandas/tests/indexes/test_datetimelike.py +++ b/pandas/tests/indexes/test_datetimelike.py @@ -16,7 +16,6 @@ class TestPeriodIndex(DatetimeLike, tm.TestCase): _holder = PeriodIndex - _multiprocess_can_split_ = True def setUp(self): self.indices = dict(index=tm.makePeriodIndex(10)) @@ -240,7 +239,6 @@ def test_difference_freq(self): class TestTimedeltaIndex(DatetimeLike, tm.TestCase): _holder = TimedeltaIndex - _multiprocess_can_split_ = True def setUp(self): self.indices = dict(index=tm.makeTimedeltaIndex(10)) diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index 7d9ceb526b912..365236f72e80e 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -30,7 +30,6 @@ class TestMultiIndex(Base, tm.TestCase): _holder = MultiIndex - _multiprocess_can_split_ = True _compat_props = ['shape', 'ndim', 'size', 'itemsize'] def setUp(self): @@ -900,11 +899,11 @@ def test_append_mixed_dtypes(self): res = mi.append(mi) exp = MultiIndex.from_arrays([[1, 2, 3, 1, 2, 3], - [1.1, np.nan, 3.3, 1.1, np.nan, 3.3], - ['a', 'b', 'c', 'a', 'b', 'c'], - dti.append(dti), - dti_tz.append(dti_tz), - pi.append(pi)]) + [1.1, np.nan, 3.3, 1.1, np.nan, 3.3], + ['a', 'b', 'c', 'a', 'b', 'c'], + dti.append(dti), + dti_tz.append(dti_tz), + pi.append(pi)]) tm.assert_index_equal(res, exp) other = MultiIndex.from_arrays([['x', 'y', 'z'], ['x', 'y', 'z'], @@ -913,11 +912,11 @@ def test_append_mixed_dtypes(self): res = mi.append(other) exp = MultiIndex.from_arrays([[1, 2, 3, 'x', 'y', 'z'], - [1.1, np.nan, 3.3, 'x', 'y', 'z'], - ['a', 'b', 'c', 'x', 'y', 'z'], - dti.append(pd.Index(['x', 'y', 'z'])), - dti_tz.append(pd.Index(['x', 'y', 'z'])), - pi.append(pd.Index(['x', 'y', 'z']))]) + [1.1, np.nan, 3.3, 'x', 'y', 'z'], + ['a', 'b', 'c', 'x', 'y', 'z'], + dti.append(pd.Index(['x', 'y', 'z'])), + dti_tz.append(pd.Index(['x', 'y', 'z'])), + pi.append(pd.Index(['x', 'y', 'z']))]) tm.assert_index_equal(res, exp) def test_get_level_values(self): diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py index 4dab7ae76a011..1bf9a10628542 100644 --- a/pandas/tests/indexes/test_numeric.py +++ b/pandas/tests/indexes/test_numeric.py @@ -176,7 +176,6 @@ def test_modulo(self): class TestFloat64Index(Numeric, tm.TestCase): _holder = Float64Index - _multiprocess_can_split_ = True def setUp(self): self.indices = dict(mixed=Float64Index([1.5, 2, 3, 4, 5]), @@ -624,7 +623,6 @@ def test_ufunc_coercions(self): class TestInt64Index(NumericInt, tm.TestCase): _dtype = 'int64' _holder = Int64Index - _multiprocess_can_split_ = True def setUp(self): self.indices = dict(index=Int64Index(np.arange(0, 20, 2))) @@ -895,7 +893,6 @@ class TestUInt64Index(NumericInt, tm.TestCase): _dtype = 'uint64' _holder = UInt64Index - _multiprocess_can_split_ = True def setUp(self): self.indices = dict(index=UInt64Index([2**63, 2**63 + 10, 2**63 + 15, diff --git a/pandas/tests/indexes/test_timedelta.py b/pandas/tests/indexes/test_timedelta.py index be01ad03a0660..e6071b8c4fa06 100644 --- a/pandas/tests/indexes/test_timedelta.py +++ b/pandas/tests/indexes/test_timedelta.py @@ -34,7 +34,6 @@ def test_timedelta(self): class TestTimeSeries(tm.TestCase): - _multiprocess_can_split_ = True def test_series_box_timedelta(self): rng = timedelta_range('1 day 1 s', periods=5, freq='h') diff --git a/pandas/tests/indexing/test_callable.py b/pandas/tests/indexing/test_callable.py index bcadc41b13370..1d70205076b86 100644 --- a/pandas/tests/indexing/test_callable.py +++ b/pandas/tests/indexing/test_callable.py @@ -8,8 +8,6 @@ class TestIndexingCallable(tm.TestCase): - _multiprocess_can_split_ = True - def test_frame_loc_ix_callable(self): # GH 11485 df = pd.DataFrame({'A': [1, 2, 3, 4], 'B': list('aabb'), diff --git a/pandas/tests/indexing/test_coercion.py b/pandas/tests/indexing/test_coercion.py index 0cfa7258461f1..b9a746cd25c7a 100644 --- a/pandas/tests/indexing/test_coercion.py +++ b/pandas/tests/indexing/test_coercion.py @@ -15,8 +15,6 @@ class CoercionBase(object): - _multiprocess_can_split_ = True - klasses = ['index', 'series'] dtypes = ['object', 'int64', 'float64', 'complex128', 'bool', 'datetime64', 'datetime64tz', 'timedelta64', 'period'] @@ -1187,7 +1185,7 @@ def _assert_replace_conversion(self, from_key, to_key, how): to_key in ('bool')) or # TODO_GH12747 The result must be int? - (from_key == 'bool' and to_key == 'int64')): + (from_key == 'bool' and to_key == 'int64')): # buggy on 32-bit if tm.is_platform_32bit(): diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index a9dfcf2672357..b06b1067b7c6b 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -99,8 +99,6 @@ def _mklbl(prefix, n): class TestIndexing(tm.TestCase): - _multiprocess_can_split_ = True - _objs = set(['series', 'frame', 'panel']) _typs = set(['ints', 'uints', 'labels', 'mixed', 'ts', 'floats', 'empty', 'ts_rev']) diff --git a/pandas/tests/indexing/test_indexing_slow.py b/pandas/tests/indexing/test_indexing_slow.py index 5d563e20087b9..42b50e37f0492 100644 --- a/pandas/tests/indexing/test_indexing_slow.py +++ b/pandas/tests/indexing/test_indexing_slow.py @@ -10,8 +10,6 @@ class TestIndexingSlow(tm.TestCase): - _multiprocess_can_split_ = True - @tm.slow def test_multiindex_get_loc(self): # GH7724, GH2646 diff --git a/pandas/tests/plotting/test_misc.py b/pandas/tests/plotting/test_misc.py index c92287b2bdc42..11f00386ec592 100644 --- a/pandas/tests/plotting/test_misc.py +++ b/pandas/tests/plotting/test_misc.py @@ -18,6 +18,7 @@ @tm.mplskip class TestSeriesPlots(TestPlotBase): + def setUp(self): TestPlotBase.setUp(self) import matplotlib as mpl @@ -49,6 +50,7 @@ def test_bootstrap_plot(self): @tm.mplskip class TestDataFramePlots(TestPlotBase): + @slow def test_scatter_plot_legacy(self): tm._skip_if_no_scipy() diff --git a/pandas/tests/scalar/test_timestamp.py b/pandas/tests/scalar/test_timestamp.py index f686f1aa6dc47..0cef27d2e41fc 100644 --- a/pandas/tests/scalar/test_timestamp.py +++ b/pandas/tests/scalar/test_timestamp.py @@ -1149,6 +1149,7 @@ def test_round_nat(self): class TestTimestampNsOperations(tm.TestCase): + def setUp(self): self.timestamp = Timestamp(datetime.utcnow()) @@ -1324,6 +1325,7 @@ def test_nat_arithmetic_index(self): class TestTimestampOps(tm.TestCase): + def test_timestamp_and_datetime(self): self.assertEqual((Timestamp(datetime( 2013, 10, 13)) - datetime(2013, 10, 12)).days, 1) @@ -1404,6 +1406,7 @@ def test_resolution(self): class TestTimestampToJulianDate(tm.TestCase): + def test_compare_1700(self): r = Timestamp('1700-06-23').to_julian_date() self.assertEqual(r, 2342145.5) @@ -1426,7 +1429,6 @@ def test_compare_hour13(self): class TestTimeSeries(tm.TestCase): - _multiprocess_can_split_ = True def test_timestamp_to_datetime(self): tm._skip_if_no_pytz() diff --git a/pandas/tests/series/test_alter_axes.py b/pandas/tests/series/test_alter_axes.py index 2ddfa27eea377..6473dbeeaa1bc 100644 --- a/pandas/tests/series/test_alter_axes.py +++ b/pandas/tests/series/test_alter_axes.py @@ -18,8 +18,6 @@ class TestSeriesAlterAxes(TestData, tm.TestCase): - _multiprocess_can_split_ = True - def test_setindex(self): # wrong type series = self.series.copy() diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index 07e1be609670f..52b85c89a7009 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -30,8 +30,6 @@ class TestSeriesAnalytics(TestData, tm.TestCase): - _multiprocess_can_split_ = True - def test_sum_zero(self): arr = np.array([]) self.assertEqual(nanops.nansum(arr), 0) diff --git a/pandas/tests/series/test_apply.py b/pandas/tests/series/test_apply.py index ec7ffde344d31..16d1466bb90fe 100644 --- a/pandas/tests/series/test_apply.py +++ b/pandas/tests/series/test_apply.py @@ -15,8 +15,6 @@ class TestSeriesApply(TestData, tm.TestCase): - _multiprocess_can_split_ = True - def test_apply(self): with np.errstate(all='ignore'): assert_series_equal(self.ts.apply(np.sqrt), np.sqrt(self.ts)) @@ -141,8 +139,6 @@ def f(x): class TestSeriesMap(TestData, tm.TestCase): - _multiprocess_can_split_ = True - def test_map(self): index, data = tm.getMixedTypeDict() diff --git a/pandas/tests/series/test_asof.py b/pandas/tests/series/test_asof.py index db306d2a742c1..d2fd8858e7647 100644 --- a/pandas/tests/series/test_asof.py +++ b/pandas/tests/series/test_asof.py @@ -10,7 +10,6 @@ class TestSeriesAsof(TestData, tm.TestCase): - _multiprocess_can_split_ = True def test_basic(self): diff --git a/pandas/tests/series/test_combine_concat.py b/pandas/tests/series/test_combine_concat.py index 7bcd1763537dc..d4e5d36c15c68 100644 --- a/pandas/tests/series/test_combine_concat.py +++ b/pandas/tests/series/test_combine_concat.py @@ -18,8 +18,6 @@ class TestSeriesCombine(TestData, tm.TestCase): - _multiprocess_can_split_ = True - def test_append(self): appendedSeries = self.series.append(self.objSeries) for idx, value in compat.iteritems(appendedSeries): @@ -222,8 +220,6 @@ def test_combine_first_dt64(self): class TestTimeseries(tm.TestCase): - _multiprocess_can_split_ = True - def test_append_concat(self): rng = date_range('5/8/2012 1:45', periods=10, freq='5T') ts = Series(np.random.randn(len(rng)), rng) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 777b188b8fdd9..aef4c9269bc62 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -26,8 +26,6 @@ class TestSeriesConstructors(TestData, tm.TestCase): - _multiprocess_can_split_ = True - def test_scalar_conversion(self): # Pass in scalar is disabled diff --git a/pandas/tests/series/test_datetime_values.py b/pandas/tests/series/test_datetime_values.py index b9f999a6c6ffe..4c697c7e52bb8 100644 --- a/pandas/tests/series/test_datetime_values.py +++ b/pandas/tests/series/test_datetime_values.py @@ -22,8 +22,6 @@ class TestSeriesDatetimeValues(TestData, tm.TestCase): - _multiprocess_can_split_ = True - def test_dt_namespace_accessor(self): # GH 7207, 11128 @@ -168,9 +166,9 @@ def compare(s, name): cases = [Series(timedelta_range('1 day', periods=5), index=list('abcde'), name='xxx'), Series(timedelta_range('1 day 01:23:45', periods=5, - freq='s'), name='xxx'), + freq='s'), name='xxx'), Series(timedelta_range('2 days 01:23:45.012345', periods=5, - freq='ms'), name='xxx')] + freq='ms'), name='xxx')] for s in cases: for prop in ok_for_td: # we test freq below diff --git a/pandas/tests/series/test_dtypes.py b/pandas/tests/series/test_dtypes.py index 127a410f66fdb..13375ab886d8d 100644 --- a/pandas/tests/series/test_dtypes.py +++ b/pandas/tests/series/test_dtypes.py @@ -20,8 +20,6 @@ class TestSeriesDtypes(TestData, tm.TestCase): - _multiprocess_can_split_ = True - def test_astype(self): s = Series(np.random.randn(5), name='foo') diff --git a/pandas/tests/series/test_indexing.py b/pandas/tests/series/test_indexing.py index e0d83d6eeadac..a20cb8324d2a3 100644 --- a/pandas/tests/series/test_indexing.py +++ b/pandas/tests/series/test_indexing.py @@ -31,8 +31,6 @@ class TestSeriesIndexing(TestData, tm.TestCase): - _multiprocess_can_split_ = True - def test_get(self): # GH 6383 @@ -2216,7 +2214,6 @@ def test_setitem_slice_into_readonly_backing_data(self): class TestTimeSeriesDuplicates(tm.TestCase): - _multiprocess_can_split_ = True def setUp(self): dates = [datetime(2000, 1, 2), datetime(2000, 1, 2), @@ -2603,6 +2600,7 @@ def test_frame_datetime64_duplicated(self): class TestNatIndexing(tm.TestCase): + def setUp(self): self.series = Series(date_range('1/1/2000', periods=10)) diff --git a/pandas/tests/series/test_internals.py b/pandas/tests/series/test_internals.py index e3a0e056f4da1..a3b13ba9b993a 100644 --- a/pandas/tests/series/test_internals.py +++ b/pandas/tests/series/test_internals.py @@ -16,8 +16,6 @@ class TestSeriesInternals(tm.TestCase): - _multiprocess_can_split_ = True - def test_convert_objects(self): s = Series([1., 2, 3], index=['a', 'b', 'c']) diff --git a/pandas/tests/series/test_io.py b/pandas/tests/series/test_io.py index 48528dc54adbd..d514fbfc142f0 100644 --- a/pandas/tests/series/test_io.py +++ b/pandas/tests/series/test_io.py @@ -18,8 +18,6 @@ class TestSeriesToCSV(TestData, tm.TestCase): - _multiprocess_can_split_ = True - def test_from_csv(self): with ensure_clean() as path: @@ -112,8 +110,6 @@ def test_to_csv_path_is_none(self): class TestSeriesIO(TestData, tm.TestCase): - _multiprocess_can_split_ = True - def test_to_frame(self): self.ts.name = None rs = self.ts.to_frame() @@ -174,8 +170,6 @@ class SubclassedFrame(DataFrame): class TestSeriesToList(TestData, tm.TestCase): - _multiprocess_can_split_ = True - def test_tolist(self): rs = self.ts.tolist() xp = self.ts.values.tolist() diff --git a/pandas/tests/series/test_misc_api.py b/pandas/tests/series/test_misc_api.py index b1b06cc7be8a4..2facbaf1fe31e 100644 --- a/pandas/tests/series/test_misc_api.py +++ b/pandas/tests/series/test_misc_api.py @@ -118,8 +118,6 @@ def test_to_sparse_pass_name(self): class TestSeriesMisc(TestData, SharedWithSparse, tm.TestCase): - _multiprocess_can_split_ = True - def test_tab_completion(self): # GH 9910 s = Series(list('abcd')) diff --git a/pandas/tests/series/test_missing.py b/pandas/tests/series/test_missing.py index 6821a8b9f4221..702fa2acb5106 100644 --- a/pandas/tests/series/test_missing.py +++ b/pandas/tests/series/test_missing.py @@ -41,8 +41,6 @@ def _simple_ts(start, end, freq='D'): class TestSeriesMissingData(TestData, tm.TestCase): - _multiprocess_can_split_ = True - def test_timedelta_fillna(self): # GH 3371 s = Series([Timestamp('20130101'), Timestamp('20130101'), Timestamp( diff --git a/pandas/tests/series/test_operators.py b/pandas/tests/series/test_operators.py index 7b1201b971c71..3d609dec7958a 100644 --- a/pandas/tests/series/test_operators.py +++ b/pandas/tests/series/test_operators.py @@ -28,8 +28,6 @@ class TestSeriesOperators(TestData, tm.TestCase): - _multiprocess_can_split_ = True - def test_series_comparison_scalars(self): series = Series(date_range('1/1/2000', periods=10)) diff --git a/pandas/tests/series/test_replace.py b/pandas/tests/series/test_replace.py index aa16f2cca9475..7fe31bab87537 100644 --- a/pandas/tests/series/test_replace.py +++ b/pandas/tests/series/test_replace.py @@ -11,8 +11,6 @@ class TestSeriesReplace(TestData, tm.TestCase): - _multiprocess_can_split_ = True - def test_replace(self): N = 100 ser = pd.Series(np.random.randn(N)) diff --git a/pandas/tests/series/test_repr.py b/pandas/tests/series/test_repr.py index af52f6e712e61..99a406a71b12b 100644 --- a/pandas/tests/series/test_repr.py +++ b/pandas/tests/series/test_repr.py @@ -18,8 +18,6 @@ class TestSeriesRepr(TestData, tm.TestCase): - _multiprocess_can_split_ = True - def test_multilevel_name_print(self): index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two', 'three']], diff --git a/pandas/tests/series/test_sorting.py b/pandas/tests/series/test_sorting.py index fb3817eb84acd..db506f12a2293 100644 --- a/pandas/tests/series/test_sorting.py +++ b/pandas/tests/series/test_sorting.py @@ -13,8 +13,6 @@ class TestSeriesSorting(TestData, tm.TestCase): - _multiprocess_can_split_ = True - def test_sort(self): ts = self.ts.copy() diff --git a/pandas/tests/series/test_subclass.py b/pandas/tests/series/test_subclass.py index 5bcf258020349..3b1b8aca426e1 100644 --- a/pandas/tests/series/test_subclass.py +++ b/pandas/tests/series/test_subclass.py @@ -8,8 +8,6 @@ class TestSeriesSubclassing(tm.TestCase): - _multiprocess_can_split_ = True - def test_indexing_sliced(self): s = tm.SubclassedSeries([1, 2, 3, 4], index=list('abcd')) res = s.loc[['a', 'b']] @@ -37,8 +35,6 @@ def test_to_frame(self): class TestSparseSeriesSubclassing(tm.TestCase): - _multiprocess_can_split_ = True - def test_subclass_sparse_slice(self): # int64 s = tm.SubclassedSparseSeries([1, 2, 3, 4, 5]) diff --git a/pandas/tests/series/test_timeseries.py b/pandas/tests/series/test_timeseries.py index bd346fb9bb0c8..e0db813e60c14 100644 --- a/pandas/tests/series/test_timeseries.py +++ b/pandas/tests/series/test_timeseries.py @@ -32,7 +32,6 @@ def assert_range_equal(left, right): class TestTimeSeries(TestData, tm.TestCase): - _multiprocess_can_split_ = True def test_shift(self): shifted = self.ts.shift(1) diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index 40b277f3f1f8a..fab04f7fa4bf2 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -20,7 +20,6 @@ class TestMatch(tm.TestCase): - _multiprocess_can_split_ = True def test_ints(self): values = np.array([0, 2, 1]) @@ -57,7 +56,6 @@ def test_strings(self): class TestSafeSort(tm.TestCase): - _multiprocess_can_split_ = True def test_basic_sort(self): values = [3, 1, 2, 0, 4] @@ -144,7 +142,6 @@ def test_exceptions(self): class TestFactorize(tm.TestCase): - _multiprocess_can_split_ = True def test_basic(self): @@ -306,7 +303,6 @@ def test_uint64_factorize(self): class TestUnique(tm.TestCase): - _multiprocess_can_split_ = True def test_ints(self): arr = np.random.randint(0, 100, size=50) @@ -389,7 +385,6 @@ def test_uint64_overflow(self): class TestIsin(tm.TestCase): - _multiprocess_can_split_ = True def test_invalid(self): @@ -472,7 +467,6 @@ def test_large(self): class TestValueCounts(tm.TestCase): - _multiprocess_can_split_ = True def test_value_counts(self): np.random.seed(1234) @@ -659,8 +653,6 @@ def test_value_counts_uint64(self): class TestDuplicated(tm.TestCase): - _multiprocess_can_split_ = True - def test_duplicated_with_nas(self): keys = np.array([0, 1, np.nan, 0, 2, np.nan], dtype=object) @@ -896,7 +888,6 @@ def test_group_var_constant(self): class TestGroupVarFloat64(tm.TestCase, GroupVarTestMixin): __test__ = True - _multiprocess_can_split_ = True algo = algos.algos.group_var_float64 dtype = np.float64 @@ -920,7 +911,6 @@ def test_group_var_large_inputs(self): class TestGroupVarFloat32(tm.TestCase, GroupVarTestMixin): __test__ = True - _multiprocess_can_split_ = True algo = algos.algos.group_var_float32 dtype = np.float32 @@ -1068,7 +1058,6 @@ def test_arrmap(): class TestTseriesUtil(tm.TestCase): - _multiprocess_can_split_ = True def test_combineFunc(self): pass diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py index be55d6e1976ec..cc99cf0f830aa 100644 --- a/pandas/tests/test_categorical.py +++ b/pandas/tests/test_categorical.py @@ -26,7 +26,6 @@ class TestCategorical(tm.TestCase): - _multiprocess_can_split_ = True def setUp(self): self.factor = Categorical(['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c'], @@ -1574,12 +1573,12 @@ def test_searchsorted(self): # https://github.com/pandas-dev/pandas/issues/14522 c1 = pd.Categorical(['cheese', 'milk', 'apple', 'bread', 'bread'], - categories=['cheese', 'milk', 'apple', 'bread'], - ordered=True) + categories=['cheese', 'milk', 'apple', 'bread'], + ordered=True) s1 = pd.Series(c1) c2 = pd.Categorical(['cheese', 'milk', 'apple', 'bread', 'bread'], - categories=['cheese', 'milk', 'apple', 'bread'], - ordered=False) + categories=['cheese', 'milk', 'apple', 'bread'], + ordered=False) s2 = pd.Series(c2) # Searching for single item argument, side='left' (default) @@ -1697,8 +1696,8 @@ def test_map(self): tm.assert_index_equal(result, Index(np.array([1] * 5, dtype=np.int64))) def test_validate_inplace(self): - cat = Categorical(['A','B','B','C','A']) - invalid_values = [1, "True", [1,2,3], 5.0] + cat = Categorical(['A', 'B', 'B', 'C', 'A']) + invalid_values = [1, "True", [1, 2, 3], 5.0] for value in invalid_values: with self.assertRaises(ValueError): @@ -1711,19 +1710,21 @@ def test_validate_inplace(self): cat.as_unordered(inplace=value) with self.assertRaises(ValueError): - cat.set_categories(['X','Y','Z'], rename=True, inplace=value) + cat.set_categories(['X', 'Y', 'Z'], rename=True, inplace=value) with self.assertRaises(ValueError): - cat.rename_categories(['X','Y','Z'], inplace=value) + cat.rename_categories(['X', 'Y', 'Z'], inplace=value) with self.assertRaises(ValueError): - cat.reorder_categories(['X','Y','Z'], ordered=True, inplace=value) + cat.reorder_categories( + ['X', 'Y', 'Z'], ordered=True, inplace=value) with self.assertRaises(ValueError): - cat.add_categories(new_categories=['D','E','F'], inplace=value) + cat.add_categories( + new_categories=['D', 'E', 'F'], inplace=value) with self.assertRaises(ValueError): - cat.remove_categories(removals=['D','E','F'], inplace=value) + cat.remove_categories(removals=['D', 'E', 'F'], inplace=value) with self.assertRaises(ValueError): cat.remove_unused_categories(inplace=value) @@ -1733,7 +1734,6 @@ def test_validate_inplace(self): class TestCategoricalAsBlock(tm.TestCase): - _multiprocess_can_split_ = True def setUp(self): self.factor = Categorical(['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c']) @@ -3045,13 +3045,15 @@ def test_value_counts_with_nan(self): tm.assert_series_equal(res, exp) # we don't exclude the count of None and sort by counts - exp = pd.Series([3, 2, 1], index=pd.CategoricalIndex([np.nan, "a", "b"])) + exp = pd.Series( + [3, 2, 1], index=pd.CategoricalIndex([np.nan, "a", "b"])) res = s.value_counts(dropna=False) tm.assert_series_equal(res, exp) # When we aren't sorting by counts, and np.nan isn't a # category, it should be last. - exp = pd.Series([2, 1, 3], index=pd.CategoricalIndex(["a", "b", np.nan])) + exp = pd.Series( + [2, 1, 3], index=pd.CategoricalIndex(["a", "b", np.nan])) res = s.value_counts(dropna=False, sort=False) tm.assert_series_equal(res, exp) @@ -3703,7 +3705,8 @@ def f(): # assign a part of a column with dtype == categorical -> # exp_parts_cats_col df = orig.copy() - df.loc["j":"k", df.columns[0]] = pd.Categorical(["b", "b"], categories=["a", "b"]) + df.loc["j":"k", df.columns[0]] = pd.Categorical( + ["b", "b"], categories=["a", "b"]) tm.assert_frame_equal(df, exp_parts_cats_col) with tm.assertRaises(ValueError): @@ -4013,7 +4016,6 @@ def test_concat_append_gh7864(self): self.assert_index_equal(df['grade'].cat.categories, dfa['grade'].cat.categories) - def test_concat_preserve(self): # GH 8641 series concat not preserving category dtype @@ -4042,7 +4044,7 @@ def test_concat_preserve(self): res = pd.concat([df2, df2]) exp = DataFrame({'A': pd.concat([a, a]), 'B': pd.concat([b, b]).astype( - 'category', categories=list('cab'))}) + 'category', categories=list('cab'))}) tm.assert_frame_equal(res, exp) def test_categorical_index_preserver(self): @@ -4052,18 +4054,18 @@ def test_categorical_index_preserver(self): df2 = DataFrame({'A': a, 'B': b.astype('category', categories=list('cab')) - }).set_index('B') + }).set_index('B') result = pd.concat([df2, df2]) expected = DataFrame({'A': pd.concat([a, a]), 'B': pd.concat([b, b]).astype( 'category', categories=list('cab')) - }).set_index('B') + }).set_index('B') tm.assert_frame_equal(result, expected) # wrong catgories df3 = DataFrame({'A': a, 'B': pd.Categorical(b, categories=list('abc')) - }).set_index('B') + }).set_index('B') self.assertRaises(TypeError, lambda: pd.concat([df2, df3])) def test_merge(self): @@ -4391,8 +4393,8 @@ def test_str_accessor_api_for_categorical(self): ('decode', ("UTF-8",), {}), ('encode', ("UTF-8",), {}), ('endswith', ("a",), {}), - ('extract', ("([a-z]*) ",), {"expand":False}), - ('extract', ("([a-z]*) ",), {"expand":True}), + ('extract', ("([a-z]*) ",), {"expand": False}), + ('extract', ("([a-z]*) ",), {"expand": True}), ('extractall', ("([a-z]*) ",), {}), ('find', ("a",), {}), ('findall', ("a",), {}), @@ -4550,8 +4552,6 @@ def test_concat_categorical(self): class TestCategoricalSubclassing(tm.TestCase): - _multiprocess_can_split_ = True - def test_constructor(self): sc = tm.SubclassedCategorical(['a', 'b', 'c']) self.assertIsInstance(sc, tm.SubclassedCategorical) diff --git a/pandas/tests/test_common.py b/pandas/tests/test_common.py index 0239250129494..90b1157572be1 100644 --- a/pandas/tests/test_common.py +++ b/pandas/tests/test_common.py @@ -7,8 +7,6 @@ import pandas.core.common as com import pandas.util.testing as tm -_multiprocess_can_split_ = True - def test_mut_exclusive(): msg = "mutually exclusive arguments: '[ab]' and '[ab]'" diff --git a/pandas/tests/test_config.py b/pandas/tests/test_config.py index ed8c37fd6dd20..c58aada193b15 100644 --- a/pandas/tests/test_config.py +++ b/pandas/tests/test_config.py @@ -5,7 +5,6 @@ class TestConfig(unittest.TestCase): - _multiprocess_can_split_ = True def __init__(self, *args): super(TestConfig, self).__init__(*args) diff --git a/pandas/tests/test_expressions.py b/pandas/tests/test_expressions.py index 18b078d0a677e..eca4a8f3c9e66 100644 --- a/pandas/tests/test_expressions.py +++ b/pandas/tests/test_expressions.py @@ -58,8 +58,6 @@ class TestExpressions(tm.TestCase): - _multiprocess_can_split_ = False - def setUp(self): self.frame = _frame.copy() diff --git a/pandas/tests/test_generic.py b/pandas/tests/test_generic.py index 5bf2eda47ea27..916d7ae0b0ec4 100644 --- a/pandas/tests/test_generic.py +++ b/pandas/tests/test_generic.py @@ -33,8 +33,6 @@ class Generic(object): - _multiprocess_can_split_ = True - def setUp(self): pass diff --git a/pandas/tests/test_internals.py b/pandas/tests/test_internals.py index 2bfe31ad4260e..1dfea168c067c 100644 --- a/pandas/tests/test_internals.py +++ b/pandas/tests/test_internals.py @@ -182,8 +182,6 @@ def create_mgr(descr, item_shape=None): class TestBlock(tm.TestCase): - _multiprocess_can_split_ = True - def setUp(self): # self.fblock = get_float_ex() # a,c,e # self.cblock = get_complex_ex() # @@ -299,7 +297,6 @@ def test_split_block_at(self): class TestDatetimeBlock(tm.TestCase): - _multiprocess_can_split_ = True def test_try_coerce_arg(self): block = create_block('datetime', [0]) @@ -318,7 +315,6 @@ def test_try_coerce_arg(self): class TestBlockManager(tm.TestCase): - _multiprocess_can_split_ = True def setUp(self): self.mgr = create_mgr( @@ -1057,7 +1053,6 @@ def assert_reindex_indexer_is_ok(mgr, axis, new_labels, indexer, class TestBlockPlacement(tm.TestCase): - _multiprocess_can_split_ = True def test_slice_len(self): self.assertEqual(len(BlockPlacement(slice(0, 4))), 4) diff --git a/pandas/tests/test_join.py b/pandas/tests/test_join.py index 0e7dda05a0c27..2a16d7663b0cf 100644 --- a/pandas/tests/test_join.py +++ b/pandas/tests/test_join.py @@ -9,7 +9,6 @@ class TestIndexer(tm.TestCase): - _multiprocess_can_split_ = True def test_outer_join_indexer(self): typemap = [('int32', _join.outer_join_indexer_int32), diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index d87ad8d906854..1fe2d701f5a41 100755 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -25,8 +25,6 @@ class TestMultiLevel(tm.TestCase): - _multiprocess_can_split_ = True - def setUp(self): index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two', diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index 89e8fb78ad821..4f56419b1323a 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -52,7 +52,6 @@ def not_hashable(self): class SafeForLongAndSparse(object): - _multiprocess_can_split_ = True def test_repr(self): repr(self.panel) @@ -177,7 +176,6 @@ def wrapper(x): class SafeForSparse(object): - _multiprocess_can_split_ = True @classmethod def assert_panel_equal(cls, x, y): @@ -422,8 +420,6 @@ def test_abs(self): class CheckIndexing(object): - _multiprocess_can_split_ = True - def test_getitem(self): self.assertRaises(Exception, self.panel.__getitem__, 'ItemQ') @@ -869,7 +865,6 @@ def test_set_value(self): class TestPanel(tm.TestCase, PanelTests, CheckIndexing, SafeForLongAndSparse, SafeForSparse): - _multiprocess_can_split_ = True @classmethod def assert_panel_equal(cls, x, y): @@ -2278,7 +2273,6 @@ class TestLongPanel(tm.TestCase): """ LongPanel no longer exists, but... """ - _multiprocess_can_split_ = True def setUp(self): import warnings diff --git a/pandas/tests/test_panel4d.py b/pandas/tests/test_panel4d.py index aeca24964222a..96864c626ba7f 100644 --- a/pandas/tests/test_panel4d.py +++ b/pandas/tests/test_panel4d.py @@ -29,8 +29,6 @@ def add_nans(panel4d): class SafeForLongAndSparse(object): - _multiprocess_can_split_ = True - def test_repr(self): repr(self.panel4d) @@ -148,8 +146,6 @@ def wrapper(x): class SafeForSparse(object): - _multiprocess_can_split_ = True - @classmethod def assert_panel_equal(cls, x, y): assert_panel_equal(x, y) @@ -305,8 +301,6 @@ def test_abs(self): class CheckIndexing(object): - _multiprocess_can_split_ = True - def test_getitem(self): self.assertRaises(Exception, self.panel4d.__getitem__, 'ItemQ') @@ -604,8 +598,6 @@ def test_set_value(self): class TestPanel4d(tm.TestCase, CheckIndexing, SafeForSparse, SafeForLongAndSparse): - _multiprocess_can_split_ = True - @classmethod def assert_panel4d_equal(cls, x, y): assert_panel4d_equal(x, y) diff --git a/pandas/tests/test_reshape.py b/pandas/tests/test_reshape.py index b5fa945a5bb8f..ed5ec970ba33c 100644 --- a/pandas/tests/test_reshape.py +++ b/pandas/tests/test_reshape.py @@ -14,8 +14,6 @@ import pandas.util.testing as tm from pandas.compat import range, u -_multiprocess_can_split_ = True - class TestMelt(tm.TestCase): diff --git a/pandas/tests/test_stats.py b/pandas/tests/test_stats.py index eb8ab02c29548..118c4147a2019 100644 --- a/pandas/tests/test_stats.py +++ b/pandas/tests/test_stats.py @@ -13,7 +13,6 @@ class TestRank(tm.TestCase): - _multiprocess_can_split_ = True s = Series([1, 3, 4, 2, nan, 2, 1, 5, nan, 3]) df = DataFrame({'A': s, 'B': s}) diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index f358946983dce..ce97b09b7e3ca 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -20,8 +20,6 @@ class TestStringMethods(tm.TestCase): - _multiprocess_can_split_ = True - def test_api(self): # GH 6106, GH 9322 diff --git a/pandas/tests/test_take.py b/pandas/tests/test_take.py index bf8a3ab370625..3aed22c140ffe 100644 --- a/pandas/tests/test_take.py +++ b/pandas/tests/test_take.py @@ -8,15 +8,11 @@ import pandas.util.testing as tm from pandas.tslib import iNaT -_multiprocess_can_split_ = True - class TestTake(tm.TestCase): # standard incompatible fill error fill_error = re.compile("Incompatible type for fill_value") - _multiprocess_can_split_ = True - def test_1d_with_out(self): def _test_dtype(dtype, can_hold_na, writeable=True): data = np.random.randint(0, 2, 4).astype(dtype) diff --git a/pandas/tests/test_testing.py b/pandas/tests/test_testing.py index 5e60efd153ab1..466e9ee5a30b8 100644 --- a/pandas/tests/test_testing.py +++ b/pandas/tests/test_testing.py @@ -18,7 +18,6 @@ class TestAssertAlmostEqual(tm.TestCase): - _multiprocess_can_split_ = True def _assert_almost_equal_both(self, a, b, **kwargs): assert_almost_equal(a, b, **kwargs) @@ -146,7 +145,6 @@ def test_assert_almost_equal_object(self): class TestUtilTesting(tm.TestCase): - _multiprocess_can_split_ = True def test_raise_with_traceback(self): with assertRaisesRegexp(LookupError, "error_text"): @@ -347,7 +345,6 @@ def test_assert_almost_equal_iterable_message(self): class TestAssertIndexEqual(unittest.TestCase): - _multiprocess_can_split_ = True def test_index_equal_message(self): @@ -495,7 +492,6 @@ def test_index_equal_metadata_message(self): class TestAssertSeriesEqual(tm.TestCase): - _multiprocess_can_split_ = True def _assert_equal(self, x, y, **kwargs): assert_series_equal(x, y, **kwargs) @@ -590,7 +586,6 @@ def test_series_equal_message(self): class TestAssertFrameEqual(tm.TestCase): - _multiprocess_can_split_ = True def _assert_equal(self, x, y, **kwargs): assert_frame_equal(x, y, **kwargs) @@ -701,7 +696,6 @@ def test_notisinstance(self): class TestAssertCategoricalEqual(unittest.TestCase): - _multiprocess_can_split_ = True def test_categorical_equal_message(self): diff --git a/pandas/tests/test_util.py b/pandas/tests/test_util.py index e2f6a7f6cc1ed..1bf9f4da45bff 100644 --- a/pandas/tests/test_util.py +++ b/pandas/tests/test_util.py @@ -314,6 +314,7 @@ def test_validation(self): class TestMove(tm.TestCase): + def test_cannot_create_instance_of_stolenbuffer(self): """Stolen buffers need to be created through the smart constructor ``move_into_mutable_buffer`` which has a bunch of checks in it. diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index dc23469976e35..48861fc6a9528 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -32,8 +32,6 @@ def assert_equal(left, right): class Base(tm.TestCase): - _multiprocess_can_split_ = True - _nan_locs = np.arange(20, 40) _inf_locs = np.array([]) diff --git a/pandas/tests/types/test_cast.py b/pandas/tests/types/test_cast.py index a8579e89aeb1f..497130b117289 100644 --- a/pandas/tests/types/test_cast.py +++ b/pandas/tests/types/test_cast.py @@ -19,8 +19,6 @@ DatetimeTZDtype, PeriodDtype) from pandas.util import testing as tm -_multiprocess_can_split_ = True - class TestPossiblyDowncast(tm.TestCase): diff --git a/pandas/tests/types/test_common.py b/pandas/tests/types/test_common.py index 7c17c61aec440..4667bbd47ad18 100644 --- a/pandas/tests/types/test_common.py +++ b/pandas/tests/types/test_common.py @@ -7,8 +7,6 @@ import pandas.util.testing as tm -_multiprocess_can_split_ = True - class TestPandasDtype(tm.TestCase): diff --git a/pandas/tests/types/test_concat.py b/pandas/tests/types/test_concat.py index 8acafe0af1792..f4faab45f4ba2 100644 --- a/pandas/tests/types/test_concat.py +++ b/pandas/tests/types/test_concat.py @@ -7,8 +7,6 @@ class TestConcatCompat(tm.TestCase): - _multiprocess_can_split_ = True - def check_concat(self, to_concat, exp): for klass in [pd.Index, pd.Series]: to_concat_klass = [klass(c) for c in to_concat] diff --git a/pandas/tests/types/test_dtypes.py b/pandas/tests/types/test_dtypes.py index 68105cfd7c886..8ef2868ae324f 100644 --- a/pandas/tests/types/test_dtypes.py +++ b/pandas/tests/types/test_dtypes.py @@ -15,8 +15,6 @@ _coerce_to_dtype) import pandas.util.testing as tm -_multiprocess_can_split_ = True - class Base(object): diff --git a/pandas/tests/types/test_generic.py b/pandas/tests/types/test_generic.py index 2861252bef26a..c7c8b0becad63 100644 --- a/pandas/tests/types/test_generic.py +++ b/pandas/tests/types/test_generic.py @@ -5,8 +5,6 @@ import pandas.util.testing as tm from pandas.types import generic as gt -_multiprocess_can_split_ = True - class TestABCClasses(tm.TestCase): tuples = [[1, 2, 2], ['red', 'blue', 'red']] diff --git a/pandas/tests/types/test_inference.py b/pandas/tests/types/test_inference.py index 15f9545f3476c..629aa63f4a0ae 100644 --- a/pandas/tests/types/test_inference.py +++ b/pandas/tests/types/test_inference.py @@ -35,8 +35,6 @@ from pandas.types.missing import isnull from pandas.util import testing as tm -_multiprocess_can_split_ = True - def test_is_sequence(): is_seq = inference.is_sequence @@ -340,7 +338,6 @@ def test_mixed_dtypes_remain_object_array(self): class TestTypeInference(tm.TestCase): - _multiprocess_can_split_ = True def test_length_zero(self): result = lib.infer_dtype(np.array([], dtype='i4')) diff --git a/pandas/tests/types/test_missing.py b/pandas/tests/types/test_missing.py index 2b09cf5ab633d..cab44f1122ae1 100644 --- a/pandas/tests/types/test_missing.py +++ b/pandas/tests/types/test_missing.py @@ -14,8 +14,6 @@ from pandas.types.missing import (array_equivalent, isnull, notnull, na_value_for_dtype) -_multiprocess_can_split_ = True - def test_notnull(): assert notnull(1.) diff --git a/pandas/tools/tests/test_concat.py b/pandas/tools/tests/test_concat.py index dae24c48b8238..87a0dda34a525 100644 --- a/pandas/tools/tests/test_concat.py +++ b/pandas/tools/tests/test_concat.py @@ -17,8 +17,6 @@ class ConcatenateBase(tm.TestCase): - _multiprocess_can_split_ = True - def setUp(self): self.frame = DataFrame(tm.getSeriesData()) self.mixed_frame = self.frame.copy() diff --git a/pandas/tools/tests/test_hashing.py b/pandas/tools/tests/test_hashing.py index fb1f187ddd5c0..05a352f259e8b 100644 --- a/pandas/tools/tests/test_hashing.py +++ b/pandas/tools/tests/test_hashing.py @@ -8,8 +8,6 @@ class TestHashing(tm.TestCase): - _multiprocess_can_split_ = True - def setUp(self): self.df = DataFrame( {'i32': np.array([1, 2, 3] * 3, dtype='int32'), diff --git a/pandas/tools/tests/test_join.py b/pandas/tools/tests/test_join.py index 605a85026d605..ff0a494bd7d02 100644 --- a/pandas/tools/tests/test_join.py +++ b/pandas/tools/tests/test_join.py @@ -20,8 +20,6 @@ class TestJoin(tm.TestCase): - _multiprocess_can_split_ = True - def setUp(self): # aggregate multiple columns self.df = DataFrame({'key1': get_test_data(), diff --git a/pandas/tools/tests/test_merge.py b/pandas/tools/tests/test_merge.py index 88856a012da6f..a348a901442c9 100644 --- a/pandas/tools/tests/test_merge.py +++ b/pandas/tools/tests/test_merge.py @@ -33,8 +33,6 @@ def get_test_data(ngroups=NGROUPS, n=N): class TestMerge(tm.TestCase): - _multiprocess_can_split_ = True - def setUp(self): # aggregate multiple columns self.df = DataFrame({'key1': get_test_data(), diff --git a/pandas/tools/tests/test_merge_asof.py b/pandas/tools/tests/test_merge_asof.py index 8e7323f72a8f5..76798b3c895ea 100644 --- a/pandas/tools/tests/test_merge_asof.py +++ b/pandas/tools/tests/test_merge_asof.py @@ -11,7 +11,6 @@ class TestAsOfMerge(tm.TestCase): - _multiprocess_can_split_ = True def read_data(self, name, dedupe=False): path = os.path.join(tm.get_data_path(), name) @@ -686,7 +685,7 @@ def test_allow_exact_matches_and_tolerance3(self): # GH 13709 df1 = pd.DataFrame({ 'time': pd.to_datetime(['2016-07-15 13:30:00.030', - '2016-07-15 13:30:00.030']), + '2016-07-15 13:30:00.030']), 'username': ['bob', 'charlie']}) df2 = pd.DataFrame({ 'time': pd.to_datetime(['2016-07-15 13:30:00.000', diff --git a/pandas/tools/tests/test_pivot.py b/pandas/tools/tests/test_pivot.py index 398e57d4ad0a4..40b46c5413c8f 100644 --- a/pandas/tools/tests/test_pivot.py +++ b/pandas/tools/tests/test_pivot.py @@ -12,8 +12,6 @@ class TestPivotTable(tm.TestCase): - _multiprocess_can_split_ = True - def setUp(self): self.data = DataFrame({'A': ['foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'bar', @@ -1152,8 +1150,8 @@ def test_crosstab_normalize(self): pd.crosstab(df.a, df.b, normalize='index')) row_normal_margins = pd.DataFrame([[1.0, 0], - [0.25, 0.75], - [0.4, 0.6]], + [0.25, 0.75], + [0.4, 0.6]], index=pd.Index([1, 2, 'All'], name='a', dtype='object'), @@ -1165,8 +1163,8 @@ def test_crosstab_normalize(self): name='b')) all_normal_margins = pd.DataFrame([[0.2, 0, 0.2], - [0.2, 0.6, 0.8], - [0.4, 0.6, 1]], + [0.2, 0.6, 0.8], + [0.4, 0.6, 1]], index=pd.Index([1, 2, 'All'], name='a', dtype='object'), diff --git a/pandas/tools/tests/test_tile.py b/pandas/tools/tests/test_tile.py index c5261597cf35d..de44eadc15751 100644 --- a/pandas/tools/tests/test_tile.py +++ b/pandas/tools/tests/test_tile.py @@ -303,7 +303,7 @@ def test_datetime_cut(self): data = to_datetime(Series(['2013-01-01', '2013-01-02', '2013-01-03'])) result, bins = cut(data, 3, retbins=True) expected = Series(['(2012-12-31 23:57:07.200000, 2013-01-01 16:00:00]', - '(2013-01-01 16:00:00, 2013-01-02 08:00:00]', + '(2013-01-01 16:00:00, 2013-01-02 08:00:00]', '(2013-01-02 08:00:00, 2013-01-03 00:00:00]'], ).astype("category", ordered=True) tm.assert_series_equal(result, expected) @@ -316,8 +316,8 @@ def test_datetime_cut(self): # testing for time data to be present as ndarray data = np.array([np.datetime64('2013-01-01'), - np.datetime64('2013-01-02'), - np.datetime64('2013-01-03')]) + np.datetime64('2013-01-02'), + np.datetime64('2013-01-03')]) result, bins = cut(data, 3, retbins=True) tm.assert_series_equal(Series(result), expected) @@ -330,7 +330,7 @@ def test_datetime_bin(self): data = [np.datetime64('2012-12-13'), np.datetime64('2012-12-15')] bin_data = ['2012-12-12', '2012-12-14', '2012-12-16'] expected = Series(['(2012-12-12 00:00:00, 2012-12-14 00:00:00]', - '(2012-12-14 00:00:00, 2012-12-16 00:00:00]'], + '(2012-12-14 00:00:00, 2012-12-16 00:00:00]'], ).astype("category", ordered=True) for conv in [Timestamp, Timestamp, np.datetime64]: diff --git a/pandas/tseries/period.py b/pandas/tseries/period.py index 8c75195b25ef5..98151d5b6130c 100644 --- a/pandas/tseries/period.py +++ b/pandas/tseries/period.py @@ -393,7 +393,7 @@ def __array_wrap__(self, result, context=None): left = context[1][0] right = context[1][1] if (isinstance(left, PeriodIndex) and - isinstance(right, PeriodIndex)): + isinstance(right, PeriodIndex)): name = left.name if left.name == right.name else None return Index(result, name=name) elif isinstance(left, Period) or isinstance(right, Period): diff --git a/pandas/tseries/tests/test_base.py b/pandas/tseries/tests/test_base.py index 2ff06517f175a..be3b917cb8117 100644 --- a/pandas/tseries/tests/test_base.py +++ b/pandas/tseries/tests/test_base.py @@ -15,6 +15,7 @@ class TestTimedeltaIndexOps(Ops): + def setUp(self): super(TestTimedeltaIndexOps, self).setUp() mask = lambda x: isinstance(x, TimedeltaIndex) @@ -490,7 +491,7 @@ def test_addition_ops(self): def test_comp_nat(self): left = pd.TimedeltaIndex([pd.Timedelta('1 days'), pd.NaT, - pd.Timedelta('3 days')]) + pd.Timedelta('3 days')]) right = pd.TimedeltaIndex([pd.NaT, pd.NaT, pd.Timedelta('3 days')]) for l, r in [(left, right), (left.asobject, right.asobject)]: @@ -854,6 +855,7 @@ def test_equals(self): class TestPeriodIndexOps(Ops): + def setUp(self): super(TestPeriodIndexOps, self).setUp() mask = lambda x: (isinstance(x, DatetimeIndex) or diff --git a/pandas/tseries/tests/test_bin_groupby.py b/pandas/tseries/tests/test_bin_groupby.py index 08c0833be0cd6..51a10f4141ab5 100644 --- a/pandas/tseries/tests/test_bin_groupby.py +++ b/pandas/tseries/tests/test_bin_groupby.py @@ -46,7 +46,6 @@ def test_series_bin_grouper(): class TestBinGroupers(tm.TestCase): - _multiprocess_can_split_ = True def setUp(self): self.obj = np.random.randn(10, 1) @@ -122,6 +121,7 @@ class TestMoments(tm.TestCase): class TestReducer(tm.TestCase): + def test_int_index(self): from pandas.core.series import Series diff --git a/pandas/tseries/tests/test_converter.py b/pandas/tseries/tests/test_converter.py index 8caed80f5a45b..b934aaed7d41f 100644 --- a/pandas/tseries/tests/test_converter.py +++ b/pandas/tseries/tests/test_converter.py @@ -19,6 +19,7 @@ def test_timtetonum_accepts_unicode(): class TestDateTimeConverter(tm.TestCase): + def setUp(self): self.dtc = converter.DatetimeConverter() self.tc = converter.TimeFormatter(None) @@ -142,6 +143,7 @@ def _assert_less(ts1, ts2): class TestPeriodConverter(tm.TestCase): + def setUp(self): self.pc = converter.PeriodConverter() diff --git a/pandas/tseries/tests/test_daterange.py b/pandas/tseries/tests/test_daterange.py index 209e6e40d5cf0..a64882380850b 100644 --- a/pandas/tseries/tests/test_daterange.py +++ b/pandas/tseries/tests/test_daterange.py @@ -73,6 +73,7 @@ def test_precision_finer_than_offset(self): class TestDateRange(tm.TestCase): + def setUp(self): self.rng = bdate_range(START, END) @@ -588,6 +589,7 @@ def test_freq_divides_end_in_nanos(self): class TestCustomDateRange(tm.TestCase): + def setUp(self): self.rng = cdate_range(START, END) diff --git a/pandas/tseries/tests/test_frequencies.py b/pandas/tseries/tests/test_frequencies.py index dfb7b26371d7a..9983bf5270b29 100644 --- a/pandas/tseries/tests/test_frequencies.py +++ b/pandas/tseries/tests/test_frequencies.py @@ -477,6 +477,7 @@ def test_get_freq_code(self): class TestFrequencyInference(tm.TestCase): + def test_raise_if_period_index(self): index = PeriodIndex(start="1/1/1990", periods=20, freq="M") self.assertRaises(TypeError, frequencies.infer_freq, index) diff --git a/pandas/tseries/tests/test_holiday.py b/pandas/tseries/tests/test_holiday.py index d4d273347e6e3..2adf28a506c53 100644 --- a/pandas/tseries/tests/test_holiday.py +++ b/pandas/tseries/tests/test_holiday.py @@ -18,6 +18,7 @@ class TestCalendar(tm.TestCase): + def setUp(self): self.holiday_list = [ datetime(2012, 1, 2), @@ -54,6 +55,7 @@ def test_calendar_caching(self): # Test for issue #9552 class TestCalendar(AbstractHolidayCalendar): + def __init__(self, name=None, rules=None): super(TestCalendar, self).__init__(name=name, rules=rules) @@ -83,6 +85,7 @@ def test_rule_from_name(self): class TestHoliday(tm.TestCase): + def setUp(self): self.start_date = datetime(2011, 1, 1) self.end_date = datetime(2020, 12, 31) @@ -288,6 +291,7 @@ def test_factory(self): class TestObservanceRules(tm.TestCase): + def setUp(self): self.we = datetime(2014, 4, 9) self.th = datetime(2014, 4, 10) diff --git a/pandas/tseries/tests/test_offsets.py b/pandas/tseries/tests/test_offsets.py index ac488a3dfdcb2..7c5a4c3df28b2 100644 --- a/pandas/tseries/tests/test_offsets.py +++ b/pandas/tseries/tests/test_offsets.py @@ -38,8 +38,6 @@ import pandas.util.testing as tm from pandas.tseries.holiday import USFederalHolidayCalendar -_multiprocess_can_split_ = True - def test_monthrange(): import calendar @@ -507,7 +505,6 @@ def test_pickle_v0_15_2(self): class TestDateOffset(Base): - _multiprocess_can_split_ = True def setUp(self): self.d = Timestamp(datetime(2008, 1, 2)) @@ -547,7 +544,6 @@ def test_eq(self): class TestBusinessDay(Base): - _multiprocess_can_split_ = True _offset = BDay def setUp(self): @@ -725,7 +721,6 @@ def test_offsets_compare_equal(self): class TestBusinessHour(Base): - _multiprocess_can_split_ = True _offset = BusinessHour def setUp(self): @@ -1432,7 +1427,6 @@ def test_datetimeindex(self): class TestCustomBusinessHour(Base): - _multiprocess_can_split_ = True _offset = CustomBusinessHour def setUp(self): @@ -1693,7 +1687,6 @@ def test_apply_nanoseconds(self): class TestCustomBusinessDay(Base): - _multiprocess_can_split_ = True _offset = CDay def setUp(self): @@ -1931,7 +1924,6 @@ def test_pickle_compat_0_14_1(self): class CustomBusinessMonthBase(object): - _multiprocess_can_split_ = True def setUp(self): self.d = datetime(2008, 1, 1) @@ -3257,6 +3249,7 @@ def makeFY5253LastOfMonth(*args, **kwds): class TestFY5253LastOfMonth(Base): + def test_onOffset(self): offset_lom_sat_aug = makeFY5253LastOfMonth(1, startingMonth=8, @@ -3342,6 +3335,7 @@ def test_apply(self): class TestFY5253NearestEndMonth(Base): + def test_get_target_month_end(self): self.assertEqual(makeFY5253NearestEndMonth(startingMonth=8, weekday=WeekDay.SAT) @@ -3507,6 +3501,7 @@ def test_apply(self): class TestFY5253LastOfMonthQuarter(Base): + def test_isAnchored(self): self.assertTrue( makeFY5253LastOfMonthQuarter(startingMonth=1, weekday=WeekDay.SAT, @@ -3729,6 +3724,7 @@ def test_get_weeks(self): class TestFY5253NearestEndMonthQuarter(Base): + def test_onOffset(self): offset_nem_sat_aug_4 = makeFY5253NearestEndMonthQuarter( @@ -3814,6 +3810,7 @@ def test_offset(self): class TestQuarterBegin(Base): + def test_repr(self): self.assertEqual(repr(QuarterBegin()), "") @@ -4168,6 +4165,7 @@ def test_onOffset(self): class TestBYearEndLagged(Base): + def test_bad_month_fail(self): self.assertRaises(Exception, BYearEnd, month=13) self.assertRaises(Exception, BYearEnd, month=0) @@ -4307,6 +4305,7 @@ def test_onOffset(self): class TestYearEndDiffMonth(Base): + def test_offset(self): tests = [] @@ -4542,6 +4541,7 @@ def test_compare_ticks(self): class TestOffsetNames(tm.TestCase): + def test_get_offset_name(self): self.assertEqual(BDay().freqstr, 'B') self.assertEqual(BDay(2).freqstr, '2B') @@ -4600,6 +4600,7 @@ def test_get_offset_legacy(): class TestParseTimeString(tm.TestCase): + def test_parse_time_string(self): (date, parsed, reso) = parse_time_string('4Q1984') (date_lower, parsed_lower, reso_lower) = parse_time_string('4q1984') @@ -4662,6 +4663,7 @@ def test_quarterly_dont_normalize(): class TestOffsetAliases(tm.TestCase): + def setUp(self): _offset_map.clear() @@ -4797,6 +4799,7 @@ def test_week_of_month_index_creation(self): class TestReprNames(tm.TestCase): + def test_str_for_named_is_name(self): # look at all the amazing combinations! month_prefixes = ['A', 'AS', 'BA', 'BAS', 'Q', 'BQ', 'BQS', 'QS'] diff --git a/pandas/tseries/tests/test_period.py b/pandas/tseries/tests/test_period.py index fdc067a827a5b..a39830b6aede6 100644 --- a/pandas/tseries/tests/test_period.py +++ b/pandas/tseries/tests/test_period.py @@ -1652,6 +1652,7 @@ def test_is_leap_year(self): class TestPeriodIndex(tm.TestCase): + def setUp(self): pass @@ -4456,6 +4457,7 @@ def test_negone_ordinals(self): class TestComparisons(tm.TestCase): + def setUp(self): self.january1 = Period('2000-01', 'M') self.january2 = Period('2000-01', 'M') @@ -4961,6 +4963,7 @@ def test_ops_frame_period(self): class TestPeriodField(tm.TestCase): + def test_get_period_field_raises_on_out_of_range(self): self.assertRaises(ValueError, _period.get_period_field, -1, 0, 0) diff --git a/pandas/tseries/tests/test_resample.py b/pandas/tseries/tests/test_resample.py index 222ffb735921a..afb44887fe7d1 100755 --- a/pandas/tseries/tests/test_resample.py +++ b/pandas/tseries/tests/test_resample.py @@ -49,7 +49,6 @@ def _simple_pts(start, end, freq='D'): class TestResampleAPI(tm.TestCase): - _multiprocess_can_split_ = True def setUp(self): dti = DatetimeIndex(start=datetime(2005, 1, 1), @@ -754,8 +753,8 @@ def test_resample_empty_series(self): self.assertEqual(result.index.freq, expected.index.freq) if (method == 'size' and - isinstance(result.index, PeriodIndex) and - freq in ['M', 'D']): + isinstance(result.index, PeriodIndex) and + freq in ['M', 'D']): # GH12871 - TODO: name should propagate, but currently # doesn't on lower / same frequency with PeriodIndex assert_series_equal(result, expected, check_dtype=False, @@ -839,7 +838,6 @@ def test_resample_loffset_arg_type(self): class TestDatetimeIndex(Base, tm.TestCase): - _multiprocess_can_split_ = True _index_factory = lambda x: date_range def setUp(self): @@ -990,6 +988,7 @@ def fn(x, a=1): return str(type(x)) class fn_class: + def __call__(self, x): return str(type(x)) @@ -2135,7 +2134,6 @@ def test_resample_datetime_values(self): class TestPeriodIndex(Base, tm.TestCase): - _multiprocess_can_split_ = True _index_factory = lambda x: period_range def create_series(self): @@ -2744,7 +2742,6 @@ def test_evenly_divisible_with_no_extra_bins(self): class TestTimedeltaIndex(Base, tm.TestCase): - _multiprocess_can_split_ = True _index_factory = lambda x: timedelta_range def create_series(self): @@ -2766,6 +2763,7 @@ def test_asfreq_bug(self): class TestResamplerGrouper(tm.TestCase): + def setUp(self): self.frame = DataFrame({'A': [1] * 20 + [2] * 12 + [3] * 8, 'B': np.arange(40)}, @@ -2960,6 +2958,7 @@ def test_median_duplicate_columns(self): class TestTimeGrouper(tm.TestCase): + def setUp(self): self.ts = Series(np.random.randn(1000), index=date_range('1/1/2000', periods=1000)) diff --git a/pandas/tseries/tests/test_timedeltas.py b/pandas/tseries/tests/test_timedeltas.py index 13263259e0b8a..170d5cdafa60b 100644 --- a/pandas/tseries/tests/test_timedeltas.py +++ b/pandas/tseries/tests/test_timedeltas.py @@ -24,7 +24,6 @@ class TestTimedeltas(tm.TestCase): - _multiprocess_can_split_ = True def setUp(self): pass @@ -1231,7 +1230,6 @@ def test_timedelta_arithmetic(self): class TestTimedeltaIndex(tm.TestCase): - _multiprocess_can_split_ = True def test_pass_TimedeltaIndex_to_index(self): @@ -1907,6 +1905,7 @@ def test_factorize(self): class TestSlicing(tm.TestCase): + def test_partial_slice(self): rng = timedelta_range('1 day 10:11:12', freq='h', periods=500) s = Series(np.arange(len(rng)), index=rng) diff --git a/pandas/tseries/tests/test_timeseries_legacy.py b/pandas/tseries/tests/test_timeseries_legacy.py index 5395056c93412..17cc93ac42639 100644 --- a/pandas/tseries/tests/test_timeseries_legacy.py +++ b/pandas/tseries/tests/test_timeseries_legacy.py @@ -27,8 +27,6 @@ # class TestLegacySupport(unittest.TestCase): class LegacySupport(object): - _multiprocess_can_split_ = True - @classmethod def setUpClass(cls): if compat.PY3: diff --git a/pandas/tseries/tests/test_timezones.py b/pandas/tseries/tests/test_timezones.py index 00b60ba620c4b..38cd8079faf93 100644 --- a/pandas/tseries/tests/test_timezones.py +++ b/pandas/tseries/tests/test_timezones.py @@ -52,7 +52,6 @@ def dst(self, dt): class TestTimeZoneSupportPytz(tm.TestCase): - _multiprocess_can_split_ = True def setUp(self): tm._skip_if_no_pytz() @@ -899,7 +898,6 @@ def test_datetimeindex_tz_nat(self): class TestTimeZoneSupportDateutil(TestTimeZoneSupportPytz): - _multiprocess_can_split_ = True def setUp(self): tm._skip_if_no_dateutil() @@ -1142,6 +1140,7 @@ def test_tz_convert_tzlocal(self): class TestTimeZoneCacheKey(tm.TestCase): + def test_cache_keys_are_distinct_for_pytz_vs_dateutil(self): tzs = pytz.common_timezones for tz_name in tzs: @@ -1158,7 +1157,6 @@ def test_cache_keys_are_distinct_for_pytz_vs_dateutil(self): class TestTimeZones(tm.TestCase): - _multiprocess_can_split_ = True timezones = ['UTC', 'Asia/Tokyo', 'US/Eastern', 'dateutil/US/Pacific'] def setUp(self): diff --git a/pandas/tseries/tests/test_tslib.py b/pandas/tseries/tests/test_tslib.py index 20e91a6f5bc44..a141d445e6035 100644 --- a/pandas/tseries/tests/test_tslib.py +++ b/pandas/tseries/tests/test_tslib.py @@ -49,6 +49,7 @@ def test_to_datetime_bijective(self): class TestDatetimeParsingWrappers(tm.TestCase): + def test_does_not_convert_mixed_integer(self): bad_date_strings = ('-50000', '999', '123.1234', 'm', 'T') @@ -408,6 +409,7 @@ def test_parsers_iso8601(self): class TestArrayToDatetime(tm.TestCase): + def test_parsing_valid_dates(self): arr = np.array(['01-01-2013', '01-02-2013'], dtype=object) self.assert_numpy_array_equal( @@ -523,6 +525,7 @@ def test_parsing_timezone_offsets(self): class TestTslib(tm.TestCase): + def test_intraday_conversion_factors(self): self.assertEqual(period_asfreq( 1, get_freq('D'), get_freq('H'), False), 24) diff --git a/pandas/types/generic.py b/pandas/types/generic.py index 86d266f4595e2..756fb47596700 100644 --- a/pandas/types/generic.py +++ b/pandas/types/generic.py @@ -53,6 +53,7 @@ def _check(cls, inst): class _ABCGeneric(type): + def __instancecheck__(cls, inst): return hasattr(inst, "_data") diff --git a/pandas/util/clipboard/__init__.py b/pandas/util/clipboard/__init__.py index 358c9b5f8035a..9e2b2faf858db 100644 --- a/pandas/util/clipboard/__init__.py +++ b/pandas/util/clipboard/__init__.py @@ -107,4 +107,4 @@ def set_clipboard(clipboard): # pandas aliases clipboard_get = paste -clipboard_set = copy \ No newline at end of file +clipboard_set = copy diff --git a/pandas/util/clipboard/clipboards.py b/pandas/util/clipboard/clipboards.py index 182a685f956e6..f73f4f191d577 100644 --- a/pandas/util/clipboard/clipboards.py +++ b/pandas/util/clipboard/clipboards.py @@ -123,6 +123,7 @@ def paste_klipper(): def init_no_clipboard(): class ClipboardUnavailable(object): + def __call__(self, *args, **kwargs): raise PyperclipException(EXCEPT_MSG) diff --git a/pandas/util/clipboard/exceptions.py b/pandas/util/clipboard/exceptions.py index 615335f3a58da..f42d263a02993 100644 --- a/pandas/util/clipboard/exceptions.py +++ b/pandas/util/clipboard/exceptions.py @@ -7,6 +7,7 @@ class PyperclipException(RuntimeError): class PyperclipWindowsException(PyperclipException): + def __init__(self, message): message += " (%s)" % ctypes.WinError() super(PyperclipWindowsException, self).__init__(message) diff --git a/pandas/util/clipboard/windows.py b/pandas/util/clipboard/windows.py index 956d5b9d34025..5c9be9ddaf508 100644 --- a/pandas/util/clipboard/windows.py +++ b/pandas/util/clipboard/windows.py @@ -10,6 +10,7 @@ class CheckedCall(object): + def __init__(self, f): super(CheckedCall, self).__setattr__("f", f) @@ -133,7 +134,8 @@ def copy_windows(text): count * sizeof(c_wchar)) locked_handle = safeGlobalLock(handle) - ctypes.memmove(c_wchar_p(locked_handle), c_wchar_p(text), count * sizeof(c_wchar)) + ctypes.memmove(c_wchar_p(locked_handle), + c_wchar_p(text), count * sizeof(c_wchar)) safeGlobalUnlock(handle) safeSetClipboardData(CF_UNICODETEXT, handle) diff --git a/pandas/util/decorators.py b/pandas/util/decorators.py index e1888a3ffd62a..85d77c2f6f57c 100644 --- a/pandas/util/decorators.py +++ b/pandas/util/decorators.py @@ -125,6 +125,7 @@ def some_function(x): def some_function(x): "%s %s wrote the Raven" """ + def __init__(self, *args, **kwargs): if (args and kwargs): raise AssertionError("Only positional or keyword args are allowed") @@ -171,6 +172,7 @@ def my_dog(has='fleas'): "This docstring will have a copyright below" pass """ + def __init__(self, addendum, join='', indents=0): if indents > 0: self.addendum = indent(addendum, indents=indents) diff --git a/pandas/util/depr_module.py b/pandas/util/depr_module.py index 736d2cdaab31c..cf8b0f7960f17 100644 --- a/pandas/util/depr_module.py +++ b/pandas/util/depr_module.py @@ -16,6 +16,7 @@ class _DeprecatedModule(object): removals : objects or methods in module that will no longer be accessible once module is removed. """ + def __init__(self, deprmod, removals=None): self.deprmod = deprmod self.removals = removals diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 6ea91543677a7..6b2e920a24063 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -165,7 +165,7 @@ def assert_almost_equal(left, right, check_exact=False, pass else: if (isinstance(left, np.ndarray) or - isinstance(right, np.ndarray)): + isinstance(right, np.ndarray)): obj = 'numpy array' else: obj = 'Input' @@ -1103,7 +1103,6 @@ def assert_series_equal(left, right, check_dtype=True, check_datetimelike_compat=False, check_categorical=True, obj='Series'): - """Check that left and right Series are equal. Parameters @@ -1211,7 +1210,6 @@ def assert_frame_equal(left, right, check_dtype=True, check_categorical=True, check_like=False, obj='DataFrame'): - """Check that left and right DataFrame are equal. Parameters @@ -2446,6 +2444,7 @@ class _AssertRaisesContextmanager(object): Handles the behind the scenes work for assertRaises and assertRaisesRegexp """ + def __init__(self, exception, regexp=None, *args, **kwargs): self.exception = exception if regexp is not None and not hasattr(regexp, "search"): From bf8194a74c84c0ba3976d40cc8380df76aa32cdb Mon Sep 17 00:00:00 2001 From: TrigonaMinima Date: Tue, 7 Feb 2017 03:56:12 +0530 Subject: [PATCH 017/933] TST/CLN: reorg more of tseries/tests xref #14854 closes #15324 --- .../indexes/datetimes/test_date_range.py | 810 ++++++- pandas/tests/indexes/datetimes/test_ops.py | 17 +- pandas/tests/indexes/datetimes/test_tools.py | 626 ++++- pandas/tests/indexes/period/__init__.py | 0 pandas/tests/indexes/period/test_period.py | 233 ++ pandas/tests/indexes/test_datetimelike.py | 465 ---- pandas/tests/indexes/test_timedelta.py | 42 - pandas/tests/indexes/timedeltas/__init__.py | 0 .../tests/indexes/timedeltas/test_astype.py | 121 + .../indexes/timedeltas/test_construction.py | 88 + .../tests/indexes/timedeltas/test_indexing.py | 110 + pandas/tests/indexes/timedeltas/test_ops.py | 1276 ++++++++++ .../timedeltas/test_partial_slicing.py | 81 + .../tests/indexes/timedeltas/test_setops.py | 76 + .../indexes/timedeltas/test_timedelta.py | 592 +++++ .../timedeltas/test_timedelta_range.py | 51 + pandas/tests/indexes/timedeltas/test_tools.py | 201 ++ pandas/tests/scalar/test_timedelta.py | 712 ++++++ pandas/tests/scalar/test_timestamp.py | 45 + pandas/tseries/tests/test_base.py | 846 +------ pandas/tseries/tests/test_daterange.py | 820 ------- pandas/tseries/tests/test_period.py | 119 +- pandas/tseries/tests/test_timedeltas.py | 2051 ----------------- pandas/tseries/tests/test_timezones.py | 70 +- pandas/tseries/tests/test_tslib.py | 694 ------ pandas/tseries/tests/test_util.py | 126 - 26 files changed, 5192 insertions(+), 5080 deletions(-) create mode 100644 pandas/tests/indexes/period/__init__.py create mode 100644 pandas/tests/indexes/period/test_period.py delete mode 100644 pandas/tests/indexes/test_datetimelike.py delete mode 100644 pandas/tests/indexes/test_timedelta.py create mode 100644 pandas/tests/indexes/timedeltas/__init__.py create mode 100644 pandas/tests/indexes/timedeltas/test_astype.py create mode 100644 pandas/tests/indexes/timedeltas/test_construction.py create mode 100644 pandas/tests/indexes/timedeltas/test_indexing.py create mode 100644 pandas/tests/indexes/timedeltas/test_ops.py create mode 100644 pandas/tests/indexes/timedeltas/test_partial_slicing.py create mode 100644 pandas/tests/indexes/timedeltas/test_setops.py create mode 100644 pandas/tests/indexes/timedeltas/test_timedelta.py create mode 100644 pandas/tests/indexes/timedeltas/test_timedelta_range.py create mode 100644 pandas/tests/indexes/timedeltas/test_tools.py delete mode 100644 pandas/tseries/tests/test_daterange.py delete mode 100644 pandas/tseries/tests/test_timedeltas.py delete mode 100644 pandas/tseries/tests/test_tslib.py delete mode 100644 pandas/tseries/tests/test_util.py diff --git a/pandas/tests/indexes/datetimes/test_date_range.py b/pandas/tests/indexes/datetimes/test_date_range.py index 9d5f397329c76..8dab10269f76d 100644 --- a/pandas/tests/indexes/datetimes/test_date_range.py +++ b/pandas/tests/indexes/datetimes/test_date_range.py @@ -1,12 +1,20 @@ +import numpy as np from datetime import datetime, timedelta, time import pandas as pd import pandas.util.testing as tm -from pandas import date_range, offsets, DatetimeIndex, Timestamp from pandas import compat +from pandas.core import common as com +from pandas.util.testing import assertRaisesRegexp +from pandas.tseries.index import bdate_range, cdate_range +from pandas import date_range, offsets, DatetimeIndex, Timestamp, Index +from pandas.tseries.offsets import (generate_range, CDay, BDay, Minute, + BMonthEnd, DateOffset, MonthEnd) from pandas.tests.series.common import TestData +START, END = datetime(2009, 1, 1), datetime(2010, 1, 1) + class TestTimeSeries(TestData, tm.TestCase): @@ -127,3 +135,803 @@ def test_catch_infinite_loop(self): # blow up, don't loop forever self.assertRaises(Exception, date_range, datetime(2011, 11, 11), datetime(2011, 11, 12), freq=offset) + + +def eq_gen_range(kwargs, expected): + rng = generate_range(**kwargs) + assert (np.array_equal(list(rng), expected)) + + +class TestGenRangeGeneration(tm.TestCase): + + def test_generate(self): + rng1 = list(generate_range(START, END, offset=BDay())) + rng2 = list(generate_range(START, END, time_rule='B')) + self.assertEqual(rng1, rng2) + + def test_generate_cday(self): + rng1 = list(generate_range(START, END, offset=CDay())) + rng2 = list(generate_range(START, END, time_rule='C')) + self.assertEqual(rng1, rng2) + + def test_1(self): + eq_gen_range(dict(start=datetime(2009, 3, 25), periods=2), + [datetime(2009, 3, 25), datetime(2009, 3, 26)]) + + def test_2(self): + eq_gen_range(dict(start=datetime(2008, 1, 1), + end=datetime(2008, 1, 3)), + [datetime(2008, 1, 1), + datetime(2008, 1, 2), + datetime(2008, 1, 3)]) + + def test_3(self): + eq_gen_range(dict(start=datetime(2008, 1, 5), + end=datetime(2008, 1, 6)), + []) + + def test_precision_finer_than_offset(self): + # GH 9907 + result1 = DatetimeIndex(start='2015-04-15 00:00:03', + end='2016-04-22 00:00:00', freq='Q') + result2 = DatetimeIndex(start='2015-04-15 00:00:03', + end='2015-06-22 00:00:04', freq='W') + expected1_list = ['2015-06-30 00:00:03', '2015-09-30 00:00:03', + '2015-12-31 00:00:03', '2016-03-31 00:00:03'] + expected2_list = ['2015-04-19 00:00:03', '2015-04-26 00:00:03', + '2015-05-03 00:00:03', '2015-05-10 00:00:03', + '2015-05-17 00:00:03', '2015-05-24 00:00:03', + '2015-05-31 00:00:03', '2015-06-07 00:00:03', + '2015-06-14 00:00:03', '2015-06-21 00:00:03'] + expected1 = DatetimeIndex(expected1_list, dtype='datetime64[ns]', + freq='Q-DEC', tz=None) + expected2 = DatetimeIndex(expected2_list, dtype='datetime64[ns]', + freq='W-SUN', tz=None) + self.assert_index_equal(result1, expected1) + self.assert_index_equal(result2, expected2) + + +class TestDateRange(tm.TestCase): + def setUp(self): + self.rng = bdate_range(START, END) + + def test_constructor(self): + bdate_range(START, END, freq=BDay()) + bdate_range(START, periods=20, freq=BDay()) + bdate_range(end=START, periods=20, freq=BDay()) + self.assertRaises(ValueError, date_range, '2011-1-1', '2012-1-1', 'B') + self.assertRaises(ValueError, bdate_range, '2011-1-1', '2012-1-1', 'B') + + def test_naive_aware_conflicts(self): + naive = bdate_range(START, END, freq=BDay(), tz=None) + aware = bdate_range(START, END, freq=BDay(), + tz="Asia/Hong_Kong") + assertRaisesRegexp(TypeError, "tz-naive.*tz-aware", naive.join, aware) + assertRaisesRegexp(TypeError, "tz-naive.*tz-aware", aware.join, naive) + + def test_cached_range(self): + DatetimeIndex._cached_range(START, END, offset=BDay()) + DatetimeIndex._cached_range(START, periods=20, offset=BDay()) + DatetimeIndex._cached_range(end=START, periods=20, offset=BDay()) + + assertRaisesRegexp(TypeError, "offset", DatetimeIndex._cached_range, + START, END) + + assertRaisesRegexp(TypeError, "specify period", + DatetimeIndex._cached_range, START, + offset=BDay()) + + assertRaisesRegexp(TypeError, "specify period", + DatetimeIndex._cached_range, end=END, + offset=BDay()) + + assertRaisesRegexp(TypeError, "start or end", + DatetimeIndex._cached_range, periods=20, + offset=BDay()) + + def test_cached_range_bug(self): + rng = date_range('2010-09-01 05:00:00', periods=50, + freq=DateOffset(hours=6)) + self.assertEqual(len(rng), 50) + self.assertEqual(rng[0], datetime(2010, 9, 1, 5)) + + def test_timezone_comparaison_bug(self): + start = Timestamp('20130220 10:00', tz='US/Eastern') + try: + date_range(start, periods=2, tz='US/Eastern') + except AssertionError: + self.fail() + + def test_timezone_comparaison_assert(self): + start = Timestamp('20130220 10:00', tz='US/Eastern') + self.assertRaises(AssertionError, date_range, start, periods=2, + tz='Europe/Berlin') + + def test_comparison(self): + d = self.rng[10] + + comp = self.rng > d + self.assertTrue(comp[11]) + self.assertFalse(comp[9]) + + def test_copy(self): + cp = self.rng.copy() + repr(cp) + self.assert_index_equal(cp, self.rng) + + def test_repr(self): + # only really care that it works + repr(self.rng) + + def test_getitem(self): + smaller = self.rng[:5] + exp = DatetimeIndex(self.rng.view(np.ndarray)[:5]) + self.assert_index_equal(smaller, exp) + + self.assertEqual(smaller.offset, self.rng.offset) + + sliced = self.rng[::5] + self.assertEqual(sliced.offset, BDay() * 5) + + fancy_indexed = self.rng[[4, 3, 2, 1, 0]] + self.assertEqual(len(fancy_indexed), 5) + tm.assertIsInstance(fancy_indexed, DatetimeIndex) + self.assertIsNone(fancy_indexed.freq) + + # 32-bit vs. 64-bit platforms + self.assertEqual(self.rng[4], self.rng[np.int_(4)]) + + def test_getitem_matplotlib_hackaround(self): + values = self.rng[:, None] + expected = self.rng.values[:, None] + self.assert_numpy_array_equal(values, expected) + + def test_shift(self): + shifted = self.rng.shift(5) + self.assertEqual(shifted[0], self.rng[5]) + self.assertEqual(shifted.offset, self.rng.offset) + + shifted = self.rng.shift(-5) + self.assertEqual(shifted[5], self.rng[0]) + self.assertEqual(shifted.offset, self.rng.offset) + + shifted = self.rng.shift(0) + self.assertEqual(shifted[0], self.rng[0]) + self.assertEqual(shifted.offset, self.rng.offset) + + rng = date_range(START, END, freq=BMonthEnd()) + shifted = rng.shift(1, freq=BDay()) + self.assertEqual(shifted[0], rng[0] + BDay()) + + def test_pickle_unpickle(self): + unpickled = self.round_trip_pickle(self.rng) + self.assertIsNotNone(unpickled.offset) + + def test_union(self): + # overlapping + left = self.rng[:10] + right = self.rng[5:10] + + the_union = left.union(right) + tm.assertIsInstance(the_union, DatetimeIndex) + + # non-overlapping, gap in middle + left = self.rng[:5] + right = self.rng[10:] + + the_union = left.union(right) + tm.assertIsInstance(the_union, Index) + + # non-overlapping, no gap + left = self.rng[:5] + right = self.rng[5:10] + + the_union = left.union(right) + tm.assertIsInstance(the_union, DatetimeIndex) + + # order does not matter + tm.assert_index_equal(right.union(left), the_union) + + # overlapping, but different offset + rng = date_range(START, END, freq=BMonthEnd()) + + the_union = self.rng.union(rng) + tm.assertIsInstance(the_union, DatetimeIndex) + + def test_outer_join(self): + # should just behave as union + + # overlapping + left = self.rng[:10] + right = self.rng[5:10] + + the_join = left.join(right, how='outer') + tm.assertIsInstance(the_join, DatetimeIndex) + + # non-overlapping, gap in middle + left = self.rng[:5] + right = self.rng[10:] + + the_join = left.join(right, how='outer') + tm.assertIsInstance(the_join, DatetimeIndex) + self.assertIsNone(the_join.freq) + + # non-overlapping, no gap + left = self.rng[:5] + right = self.rng[5:10] + + the_join = left.join(right, how='outer') + tm.assertIsInstance(the_join, DatetimeIndex) + + # overlapping, but different offset + rng = date_range(START, END, freq=BMonthEnd()) + + the_join = self.rng.join(rng, how='outer') + tm.assertIsInstance(the_join, DatetimeIndex) + self.assertIsNone(the_join.freq) + + def test_union_not_cacheable(self): + rng = date_range('1/1/2000', periods=50, freq=Minute()) + rng1 = rng[10:] + rng2 = rng[:25] + the_union = rng1.union(rng2) + self.assert_index_equal(the_union, rng) + + rng1 = rng[10:] + rng2 = rng[15:35] + the_union = rng1.union(rng2) + expected = rng[10:] + self.assert_index_equal(the_union, expected) + + def test_intersection(self): + rng = date_range('1/1/2000', periods=50, freq=Minute()) + rng1 = rng[10:] + rng2 = rng[:25] + the_int = rng1.intersection(rng2) + expected = rng[10:25] + self.assert_index_equal(the_int, expected) + tm.assertIsInstance(the_int, DatetimeIndex) + self.assertEqual(the_int.offset, rng.offset) + + the_int = rng1.intersection(rng2.view(DatetimeIndex)) + self.assert_index_equal(the_int, expected) + + # non-overlapping + the_int = rng[:10].intersection(rng[10:]) + expected = DatetimeIndex([]) + self.assert_index_equal(the_int, expected) + + def test_intersection_bug(self): + # GH #771 + a = bdate_range('11/30/2011', '12/31/2011') + b = bdate_range('12/10/2011', '12/20/2011') + result = a.intersection(b) + self.assert_index_equal(result, b) + + def test_summary(self): + self.rng.summary() + self.rng[2:2].summary() + + def test_summary_pytz(self): + tm._skip_if_no_pytz() + import pytz + bdate_range('1/1/2005', '1/1/2009', tz=pytz.utc).summary() + + def test_summary_dateutil(self): + tm._skip_if_no_dateutil() + import dateutil + bdate_range('1/1/2005', '1/1/2009', tz=dateutil.tz.tzutc()).summary() + + def test_misc(self): + end = datetime(2009, 5, 13) + dr = bdate_range(end=end, periods=20) + firstDate = end - 19 * BDay() + + assert len(dr) == 20 + assert dr[0] == firstDate + assert dr[-1] == end + + def test_date_parse_failure(self): + badly_formed_date = '2007/100/1' + + self.assertRaises(ValueError, Timestamp, badly_formed_date) + + self.assertRaises(ValueError, bdate_range, start=badly_formed_date, + periods=10) + self.assertRaises(ValueError, bdate_range, end=badly_formed_date, + periods=10) + self.assertRaises(ValueError, bdate_range, badly_formed_date, + badly_formed_date) + + def test_equals(self): + self.assertFalse(self.rng.equals(list(self.rng))) + + def test_identical(self): + t1 = self.rng.copy() + t2 = self.rng.copy() + self.assertTrue(t1.identical(t2)) + + # name + t1 = t1.rename('foo') + self.assertTrue(t1.equals(t2)) + self.assertFalse(t1.identical(t2)) + t2 = t2.rename('foo') + self.assertTrue(t1.identical(t2)) + + # freq + t2v = Index(t2.values) + self.assertTrue(t1.equals(t2v)) + self.assertFalse(t1.identical(t2v)) + + def test_daterange_bug_456(self): + # GH #456 + rng1 = bdate_range('12/5/2011', '12/5/2011') + rng2 = bdate_range('12/2/2011', '12/5/2011') + rng2.offset = BDay() + + result = rng1.union(rng2) + tm.assertIsInstance(result, DatetimeIndex) + + def test_error_with_zero_monthends(self): + self.assertRaises(ValueError, date_range, '1/1/2000', '1/1/2001', + freq=MonthEnd(0)) + + def test_range_bug(self): + # GH #770 + offset = DateOffset(months=3) + result = date_range("2011-1-1", "2012-1-31", freq=offset) + + start = datetime(2011, 1, 1) + exp_values = [start + i * offset for i in range(5)] + tm.assert_index_equal(result, DatetimeIndex(exp_values)) + + def test_range_tz_pytz(self): + # GH 2906 + tm._skip_if_no_pytz() + from pytz import timezone + + tz = timezone('US/Eastern') + start = tz.localize(datetime(2011, 1, 1)) + end = tz.localize(datetime(2011, 1, 3)) + + dr = date_range(start=start, periods=3) + self.assertEqual(dr.tz.zone, tz.zone) + self.assertEqual(dr[0], start) + self.assertEqual(dr[2], end) + + dr = date_range(end=end, periods=3) + self.assertEqual(dr.tz.zone, tz.zone) + self.assertEqual(dr[0], start) + self.assertEqual(dr[2], end) + + dr = date_range(start=start, end=end) + self.assertEqual(dr.tz.zone, tz.zone) + self.assertEqual(dr[0], start) + self.assertEqual(dr[2], end) + + def test_range_tz_dst_straddle_pytz(self): + + tm._skip_if_no_pytz() + from pytz import timezone + tz = timezone('US/Eastern') + dates = [(tz.localize(datetime(2014, 3, 6)), + tz.localize(datetime(2014, 3, 12))), + (tz.localize(datetime(2013, 11, 1)), + tz.localize(datetime(2013, 11, 6)))] + for (start, end) in dates: + dr = date_range(start, end, freq='D') + self.assertEqual(dr[0], start) + self.assertEqual(dr[-1], end) + self.assertEqual(np.all(dr.hour == 0), True) + + dr = date_range(start, end, freq='D', tz='US/Eastern') + self.assertEqual(dr[0], start) + self.assertEqual(dr[-1], end) + self.assertEqual(np.all(dr.hour == 0), True) + + dr = date_range(start.replace(tzinfo=None), end.replace( + tzinfo=None), freq='D', tz='US/Eastern') + self.assertEqual(dr[0], start) + self.assertEqual(dr[-1], end) + self.assertEqual(np.all(dr.hour == 0), True) + + def test_range_tz_dateutil(self): + # GH 2906 + tm._skip_if_no_dateutil() + # Use maybe_get_tz to fix filename in tz under dateutil. + from pandas.tslib import maybe_get_tz + tz = lambda x: maybe_get_tz('dateutil/' + x) + + start = datetime(2011, 1, 1, tzinfo=tz('US/Eastern')) + end = datetime(2011, 1, 3, tzinfo=tz('US/Eastern')) + + dr = date_range(start=start, periods=3) + self.assertTrue(dr.tz == tz('US/Eastern')) + self.assertTrue(dr[0] == start) + self.assertTrue(dr[2] == end) + + dr = date_range(end=end, periods=3) + self.assertTrue(dr.tz == tz('US/Eastern')) + self.assertTrue(dr[0] == start) + self.assertTrue(dr[2] == end) + + dr = date_range(start=start, end=end) + self.assertTrue(dr.tz == tz('US/Eastern')) + self.assertTrue(dr[0] == start) + self.assertTrue(dr[2] == end) + + def test_month_range_union_tz_pytz(self): + tm._skip_if_no_pytz() + from pytz import timezone + tz = timezone('US/Eastern') + + early_start = datetime(2011, 1, 1) + early_end = datetime(2011, 3, 1) + + late_start = datetime(2011, 3, 1) + late_end = datetime(2011, 5, 1) + + early_dr = date_range(start=early_start, end=early_end, tz=tz, + freq=MonthEnd()) + late_dr = date_range(start=late_start, end=late_end, tz=tz, + freq=MonthEnd()) + + early_dr.union(late_dr) + + def test_month_range_union_tz_dateutil(self): + tm._skip_if_windows_python_3() + tm._skip_if_no_dateutil() + from pandas.tslib import _dateutil_gettz as timezone + tz = timezone('US/Eastern') + + early_start = datetime(2011, 1, 1) + early_end = datetime(2011, 3, 1) + + late_start = datetime(2011, 3, 1) + late_end = datetime(2011, 5, 1) + + early_dr = date_range(start=early_start, end=early_end, tz=tz, + freq=MonthEnd()) + late_dr = date_range(start=late_start, end=late_end, tz=tz, + freq=MonthEnd()) + + early_dr.union(late_dr) + + def test_range_closed(self): + begin = datetime(2011, 1, 1) + end = datetime(2014, 1, 1) + + for freq in ["1D", "3D", "2M", "7W", "3H", "A"]: + closed = date_range(begin, end, closed=None, freq=freq) + left = date_range(begin, end, closed="left", freq=freq) + right = date_range(begin, end, closed="right", freq=freq) + expected_left = left + expected_right = right + + if end == closed[-1]: + expected_left = closed[:-1] + if begin == closed[0]: + expected_right = closed[1:] + + self.assert_index_equal(expected_left, left) + self.assert_index_equal(expected_right, right) + + def test_range_closed_with_tz_aware_start_end(self): + # GH12409, GH12684 + begin = Timestamp('2011/1/1', tz='US/Eastern') + end = Timestamp('2014/1/1', tz='US/Eastern') + + for freq in ["1D", "3D", "2M", "7W", "3H", "A"]: + closed = date_range(begin, end, closed=None, freq=freq) + left = date_range(begin, end, closed="left", freq=freq) + right = date_range(begin, end, closed="right", freq=freq) + expected_left = left + expected_right = right + + if end == closed[-1]: + expected_left = closed[:-1] + if begin == closed[0]: + expected_right = closed[1:] + + self.assert_index_equal(expected_left, left) + self.assert_index_equal(expected_right, right) + + begin = Timestamp('2011/1/1') + end = Timestamp('2014/1/1') + begintz = Timestamp('2011/1/1', tz='US/Eastern') + endtz = Timestamp('2014/1/1', tz='US/Eastern') + + for freq in ["1D", "3D", "2M", "7W", "3H", "A"]: + closed = date_range(begin, end, closed=None, freq=freq, + tz='US/Eastern') + left = date_range(begin, end, closed="left", freq=freq, + tz='US/Eastern') + right = date_range(begin, end, closed="right", freq=freq, + tz='US/Eastern') + expected_left = left + expected_right = right + + if endtz == closed[-1]: + expected_left = closed[:-1] + if begintz == closed[0]: + expected_right = closed[1:] + + self.assert_index_equal(expected_left, left) + self.assert_index_equal(expected_right, right) + + def test_range_closed_boundary(self): + # GH 11804 + for closed in ['right', 'left', None]: + right_boundary = date_range('2015-09-12', '2015-12-01', + freq='QS-MAR', closed=closed) + left_boundary = date_range('2015-09-01', '2015-09-12', + freq='QS-MAR', closed=closed) + both_boundary = date_range('2015-09-01', '2015-12-01', + freq='QS-MAR', closed=closed) + expected_right = expected_left = expected_both = both_boundary + + if closed == 'right': + expected_left = both_boundary[1:] + if closed == 'left': + expected_right = both_boundary[:-1] + if closed is None: + expected_right = both_boundary[1:] + expected_left = both_boundary[:-1] + + self.assert_index_equal(right_boundary, expected_right) + self.assert_index_equal(left_boundary, expected_left) + self.assert_index_equal(both_boundary, expected_both) + + def test_years_only(self): + # GH 6961 + dr = date_range('2014', '2015', freq='M') + self.assertEqual(dr[0], datetime(2014, 1, 31)) + self.assertEqual(dr[-1], datetime(2014, 12, 31)) + + def test_freq_divides_end_in_nanos(self): + # GH 10885 + result_1 = date_range('2005-01-12 10:00', '2005-01-12 16:00', + freq='345min') + result_2 = date_range('2005-01-13 10:00', '2005-01-13 16:00', + freq='345min') + expected_1 = DatetimeIndex(['2005-01-12 10:00:00', + '2005-01-12 15:45:00'], + dtype='datetime64[ns]', freq='345T', + tz=None) + expected_2 = DatetimeIndex(['2005-01-13 10:00:00', + '2005-01-13 15:45:00'], + dtype='datetime64[ns]', freq='345T', + tz=None) + self.assert_index_equal(result_1, expected_1) + self.assert_index_equal(result_2, expected_2) + + +class TestCustomDateRange(tm.TestCase): + def setUp(self): + self.rng = cdate_range(START, END) + + def test_constructor(self): + cdate_range(START, END, freq=CDay()) + cdate_range(START, periods=20, freq=CDay()) + cdate_range(end=START, periods=20, freq=CDay()) + self.assertRaises(ValueError, date_range, '2011-1-1', '2012-1-1', 'C') + self.assertRaises(ValueError, cdate_range, '2011-1-1', '2012-1-1', 'C') + + def test_cached_range(self): + DatetimeIndex._cached_range(START, END, offset=CDay()) + DatetimeIndex._cached_range(START, periods=20, + offset=CDay()) + DatetimeIndex._cached_range(end=START, periods=20, + offset=CDay()) + + self.assertRaises(Exception, DatetimeIndex._cached_range, START, END) + + self.assertRaises(Exception, DatetimeIndex._cached_range, START, + freq=CDay()) + + self.assertRaises(Exception, DatetimeIndex._cached_range, end=END, + freq=CDay()) + + self.assertRaises(Exception, DatetimeIndex._cached_range, periods=20, + freq=CDay()) + + def test_comparison(self): + d = self.rng[10] + + comp = self.rng > d + self.assertTrue(comp[11]) + self.assertFalse(comp[9]) + + def test_copy(self): + cp = self.rng.copy() + repr(cp) + self.assert_index_equal(cp, self.rng) + + def test_repr(self): + # only really care that it works + repr(self.rng) + + def test_getitem(self): + smaller = self.rng[:5] + exp = DatetimeIndex(self.rng.view(np.ndarray)[:5]) + self.assert_index_equal(smaller, exp) + self.assertEqual(smaller.offset, self.rng.offset) + + sliced = self.rng[::5] + self.assertEqual(sliced.offset, CDay() * 5) + + fancy_indexed = self.rng[[4, 3, 2, 1, 0]] + self.assertEqual(len(fancy_indexed), 5) + tm.assertIsInstance(fancy_indexed, DatetimeIndex) + self.assertIsNone(fancy_indexed.freq) + + # 32-bit vs. 64-bit platforms + self.assertEqual(self.rng[4], self.rng[np.int_(4)]) + + def test_getitem_matplotlib_hackaround(self): + values = self.rng[:, None] + expected = self.rng.values[:, None] + self.assert_numpy_array_equal(values, expected) + + def test_shift(self): + + shifted = self.rng.shift(5) + self.assertEqual(shifted[0], self.rng[5]) + self.assertEqual(shifted.offset, self.rng.offset) + + shifted = self.rng.shift(-5) + self.assertEqual(shifted[5], self.rng[0]) + self.assertEqual(shifted.offset, self.rng.offset) + + shifted = self.rng.shift(0) + self.assertEqual(shifted[0], self.rng[0]) + self.assertEqual(shifted.offset, self.rng.offset) + + with tm.assert_produces_warning(com.PerformanceWarning): + rng = date_range(START, END, freq=BMonthEnd()) + shifted = rng.shift(1, freq=CDay()) + self.assertEqual(shifted[0], rng[0] + CDay()) + + def test_pickle_unpickle(self): + unpickled = self.round_trip_pickle(self.rng) + self.assertIsNotNone(unpickled.offset) + + def test_union(self): + # overlapping + left = self.rng[:10] + right = self.rng[5:10] + + the_union = left.union(right) + tm.assertIsInstance(the_union, DatetimeIndex) + + # non-overlapping, gap in middle + left = self.rng[:5] + right = self.rng[10:] + + the_union = left.union(right) + tm.assertIsInstance(the_union, Index) + + # non-overlapping, no gap + left = self.rng[:5] + right = self.rng[5:10] + + the_union = left.union(right) + tm.assertIsInstance(the_union, DatetimeIndex) + + # order does not matter + self.assert_index_equal(right.union(left), the_union) + + # overlapping, but different offset + rng = date_range(START, END, freq=BMonthEnd()) + + the_union = self.rng.union(rng) + tm.assertIsInstance(the_union, DatetimeIndex) + + def test_outer_join(self): + # should just behave as union + + # overlapping + left = self.rng[:10] + right = self.rng[5:10] + + the_join = left.join(right, how='outer') + tm.assertIsInstance(the_join, DatetimeIndex) + + # non-overlapping, gap in middle + left = self.rng[:5] + right = self.rng[10:] + + the_join = left.join(right, how='outer') + tm.assertIsInstance(the_join, DatetimeIndex) + self.assertIsNone(the_join.freq) + + # non-overlapping, no gap + left = self.rng[:5] + right = self.rng[5:10] + + the_join = left.join(right, how='outer') + tm.assertIsInstance(the_join, DatetimeIndex) + + # overlapping, but different offset + rng = date_range(START, END, freq=BMonthEnd()) + + the_join = self.rng.join(rng, how='outer') + tm.assertIsInstance(the_join, DatetimeIndex) + self.assertIsNone(the_join.freq) + + def test_intersection_bug(self): + # GH #771 + a = cdate_range('11/30/2011', '12/31/2011') + b = cdate_range('12/10/2011', '12/20/2011') + result = a.intersection(b) + self.assert_index_equal(result, b) + + def test_summary(self): + self.rng.summary() + self.rng[2:2].summary() + + def test_summary_pytz(self): + tm._skip_if_no_pytz() + import pytz + cdate_range('1/1/2005', '1/1/2009', tz=pytz.utc).summary() + + def test_summary_dateutil(self): + tm._skip_if_no_dateutil() + import dateutil + cdate_range('1/1/2005', '1/1/2009', tz=dateutil.tz.tzutc()).summary() + + def test_misc(self): + end = datetime(2009, 5, 13) + dr = cdate_range(end=end, periods=20) + firstDate = end - 19 * CDay() + + assert len(dr) == 20 + assert dr[0] == firstDate + assert dr[-1] == end + + def test_date_parse_failure(self): + badly_formed_date = '2007/100/1' + + self.assertRaises(ValueError, Timestamp, badly_formed_date) + + self.assertRaises(ValueError, cdate_range, start=badly_formed_date, + periods=10) + self.assertRaises(ValueError, cdate_range, end=badly_formed_date, + periods=10) + self.assertRaises(ValueError, cdate_range, badly_formed_date, + badly_formed_date) + + def test_equals(self): + self.assertFalse(self.rng.equals(list(self.rng))) + + def test_daterange_bug_456(self): + # GH #456 + rng1 = cdate_range('12/5/2011', '12/5/2011') + rng2 = cdate_range('12/2/2011', '12/5/2011') + rng2.offset = CDay() + + result = rng1.union(rng2) + tm.assertIsInstance(result, DatetimeIndex) + + def test_cdaterange(self): + rng = cdate_range('2013-05-01', periods=3) + xp = DatetimeIndex(['2013-05-01', '2013-05-02', '2013-05-03']) + self.assert_index_equal(xp, rng) + + def test_cdaterange_weekmask(self): + rng = cdate_range('2013-05-01', periods=3, + weekmask='Sun Mon Tue Wed Thu') + xp = DatetimeIndex(['2013-05-01', '2013-05-02', '2013-05-05']) + self.assert_index_equal(xp, rng) + + def test_cdaterange_holidays(self): + rng = cdate_range('2013-05-01', periods=3, holidays=['2013-05-01']) + xp = DatetimeIndex(['2013-05-02', '2013-05-03', '2013-05-06']) + self.assert_index_equal(xp, rng) + + def test_cdaterange_weekmask_and_holidays(self): + rng = cdate_range('2013-05-01', periods=3, + weekmask='Sun Mon Tue Wed Thu', + holidays=['2013-05-01']) + xp = DatetimeIndex(['2013-05-02', '2013-05-05', '2013-05-06']) + self.assert_index_equal(xp, rng) diff --git a/pandas/tests/indexes/datetimes/test_ops.py b/pandas/tests/indexes/datetimes/test_ops.py index a46980a0f742a..c7cdcd9318a0e 100644 --- a/pandas/tests/indexes/datetimes/test_ops.py +++ b/pandas/tests/indexes/datetimes/test_ops.py @@ -8,7 +8,7 @@ from pandas.core.common import PerformanceWarning from pandas import (DatetimeIndex, PeriodIndex, Series, Timestamp, Timedelta, date_range, TimedeltaIndex, _np_version_under1p10, Index, - datetime, Float64Index) + datetime, Float64Index, offsets) from pandas.tests.test_base import Ops @@ -1070,3 +1070,18 @@ def test_datetime64_with_DateOffset(self): assert_func(klass([x + op for x in s]), s + op) assert_func(klass([x - op for x in s]), s - op) assert_func(klass([op + x for x in s]), op + s) + + +class TestTslib(tm.TestCase): + + def test_shift_months(self): + s = DatetimeIndex([Timestamp('2000-01-05 00:15:00'), Timestamp( + '2000-01-31 00:23:00'), Timestamp('2000-01-01'), Timestamp( + '2000-02-29'), Timestamp('2000-12-31')]) + for years in [-1, 0, 1]: + for months in [-2, 0, 2]: + actual = DatetimeIndex(tslib.shift_months(s.asi8, years * 12 + + months)) + expected = DatetimeIndex([x + offsets.DateOffset( + years=years, months=months) for x in s]) + tm.assert_index_equal(actual, expected) diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index 841d0be605058..bf1f82b90d5d6 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -1,22 +1,26 @@ """ test to_datetime """ -import nose - import sys -import calendar +import nose import locale -from datetime import datetime - +import calendar import numpy as np -from pandas.types.common import is_datetime64_ns_dtype -from pandas import (isnull, to_datetime, Timestamp, Series, DataFrame, - Index, DatetimeIndex, NaT, date_range, bdate_range) -from pandas import tslib -from pandas.compat import lmap +from datetime import datetime, date, time +from distutils.version import LooseVersion + import pandas as pd +from pandas import tslib from pandas.tseries import tools +from pandas.tseries.tools import normalize_date +from pandas.tseries.util import pivot_annual, isleapyear +from pandas.compat import lmap +from pandas.compat.numpy import np_array_datetime64_compat +from pandas.types.common import is_datetime64_ns_dtype from pandas.util import testing as tm -from pandas.util.testing import assert_series_equal +from pandas.util.testing import assert_series_equal, _skip_if_has_locale +from pandas import (isnull, to_datetime, Timestamp, Series, DataFrame, + Index, DatetimeIndex, NaT, date_range, bdate_range, + compat, lib) class TimeConversionFormats(tm.TestCase): @@ -1017,3 +1021,603 @@ def test_day_not_in_month_ignore(self): '2015-02-32', errors='ignore', format="%Y-%m-%d"), '2015-02-32') self.assertEqual(to_datetime( '2015-04-31', errors='ignore', format="%Y-%m-%d"), '2015-04-31') + + +class TestDatetimeParsingWrappers(tm.TestCase): + def test_does_not_convert_mixed_integer(self): + bad_date_strings = ('-50000', '999', '123.1234', 'm', 'T') + + for bad_date_string in bad_date_strings: + self.assertFalse(tslib._does_string_look_like_datetime( + bad_date_string)) + + good_date_strings = ('2012-01-01', + '01/01/2012', + 'Mon Sep 16, 2013', + '01012012', + '0101', + '1-1', ) + + for good_date_string in good_date_strings: + self.assertTrue(tslib._does_string_look_like_datetime( + good_date_string)) + + def test_parsers(self): + + # https://github.com/dateutil/dateutil/issues/217 + import dateutil + yearfirst = dateutil.__version__ >= LooseVersion('2.5.0') + + cases = {'2011-01-01': datetime(2011, 1, 1), + '2Q2005': datetime(2005, 4, 1), + '2Q05': datetime(2005, 4, 1), + '2005Q1': datetime(2005, 1, 1), + '05Q1': datetime(2005, 1, 1), + '2011Q3': datetime(2011, 7, 1), + '11Q3': datetime(2011, 7, 1), + '3Q2011': datetime(2011, 7, 1), + '3Q11': datetime(2011, 7, 1), + + # quarterly without space + '2000Q4': datetime(2000, 10, 1), + '00Q4': datetime(2000, 10, 1), + '4Q2000': datetime(2000, 10, 1), + '4Q00': datetime(2000, 10, 1), + '2000q4': datetime(2000, 10, 1), + '2000-Q4': datetime(2000, 10, 1), + '00-Q4': datetime(2000, 10, 1), + '4Q-2000': datetime(2000, 10, 1), + '4Q-00': datetime(2000, 10, 1), + '00q4': datetime(2000, 10, 1), + '2005': datetime(2005, 1, 1), + '2005-11': datetime(2005, 11, 1), + '2005 11': datetime(2005, 11, 1), + '11-2005': datetime(2005, 11, 1), + '11 2005': datetime(2005, 11, 1), + '200511': datetime(2020, 5, 11), + '20051109': datetime(2005, 11, 9), + '20051109 10:15': datetime(2005, 11, 9, 10, 15), + '20051109 08H': datetime(2005, 11, 9, 8, 0), + '2005-11-09 10:15': datetime(2005, 11, 9, 10, 15), + '2005-11-09 08H': datetime(2005, 11, 9, 8, 0), + '2005/11/09 10:15': datetime(2005, 11, 9, 10, 15), + '2005/11/09 08H': datetime(2005, 11, 9, 8, 0), + "Thu Sep 25 10:36:28 2003": datetime(2003, 9, 25, 10, + 36, 28), + "Thu Sep 25 2003": datetime(2003, 9, 25), + "Sep 25 2003": datetime(2003, 9, 25), + "January 1 2014": datetime(2014, 1, 1), + + # GH 10537 + '2014-06': datetime(2014, 6, 1), + '06-2014': datetime(2014, 6, 1), + '2014-6': datetime(2014, 6, 1), + '6-2014': datetime(2014, 6, 1), + + '20010101 12': datetime(2001, 1, 1, 12), + '20010101 1234': datetime(2001, 1, 1, 12, 34), + '20010101 123456': datetime(2001, 1, 1, 12, 34, 56), + } + + for date_str, expected in compat.iteritems(cases): + result1, _, _ = tools.parse_time_string(date_str, + yearfirst=yearfirst) + result2 = to_datetime(date_str, yearfirst=yearfirst) + result3 = to_datetime([date_str], yearfirst=yearfirst) + # result5 is used below + result4 = to_datetime(np.array([date_str], dtype=object), + yearfirst=yearfirst) + result6 = DatetimeIndex([date_str], yearfirst=yearfirst) + # result7 is used below + result8 = DatetimeIndex(Index([date_str]), yearfirst=yearfirst) + result9 = DatetimeIndex(Series([date_str]), yearfirst=yearfirst) + + for res in [result1, result2]: + self.assertEqual(res, expected) + for res in [result3, result4, result6, result8, result9]: + exp = DatetimeIndex([pd.Timestamp(expected)]) + tm.assert_index_equal(res, exp) + + # these really need to have yearfist, but we don't support + if not yearfirst: + result5 = Timestamp(date_str) + self.assertEqual(result5, expected) + result7 = date_range(date_str, freq='S', periods=1, + yearfirst=yearfirst) + self.assertEqual(result7, expected) + + # NaT + result1, _, _ = tools.parse_time_string('NaT') + result2 = to_datetime('NaT') + result3 = Timestamp('NaT') + result4 = DatetimeIndex(['NaT'])[0] + self.assertTrue(result1 is tslib.NaT) + self.assertTrue(result1 is tslib.NaT) + self.assertTrue(result1 is tslib.NaT) + self.assertTrue(result1 is tslib.NaT) + + def test_parsers_quarter_invalid(self): + + cases = ['2Q 2005', '2Q-200A', '2Q-200', '22Q2005', '6Q-20', '2Q200.'] + for case in cases: + self.assertRaises(ValueError, tools.parse_time_string, case) + + def test_parsers_dayfirst_yearfirst(self): + tm._skip_if_no_dateutil() + + # OK + # 2.5.1 10-11-12 [dayfirst=0, yearfirst=0] -> 2012-10-11 00:00:00 + # 2.5.2 10-11-12 [dayfirst=0, yearfirst=1] -> 2012-10-11 00:00:00 + # 2.5.3 10-11-12 [dayfirst=0, yearfirst=0] -> 2012-10-11 00:00:00 + + # OK + # 2.5.1 10-11-12 [dayfirst=0, yearfirst=1] -> 2010-11-12 00:00:00 + # 2.5.2 10-11-12 [dayfirst=0, yearfirst=1] -> 2010-11-12 00:00:00 + # 2.5.3 10-11-12 [dayfirst=0, yearfirst=1] -> 2010-11-12 00:00:00 + + # bug fix in 2.5.2 + # 2.5.1 10-11-12 [dayfirst=1, yearfirst=1] -> 2010-11-12 00:00:00 + # 2.5.2 10-11-12 [dayfirst=1, yearfirst=1] -> 2010-12-11 00:00:00 + # 2.5.3 10-11-12 [dayfirst=1, yearfirst=1] -> 2010-12-11 00:00:00 + + # OK + # 2.5.1 10-11-12 [dayfirst=1, yearfirst=0] -> 2012-11-10 00:00:00 + # 2.5.2 10-11-12 [dayfirst=1, yearfirst=0] -> 2012-11-10 00:00:00 + # 2.5.3 10-11-12 [dayfirst=1, yearfirst=0] -> 2012-11-10 00:00:00 + + # OK + # 2.5.1 20/12/21 [dayfirst=0, yearfirst=0] -> 2021-12-20 00:00:00 + # 2.5.2 20/12/21 [dayfirst=0, yearfirst=0] -> 2021-12-20 00:00:00 + # 2.5.3 20/12/21 [dayfirst=0, yearfirst=0] -> 2021-12-20 00:00:00 + + # OK + # 2.5.1 20/12/21 [dayfirst=0, yearfirst=1] -> 2020-12-21 00:00:00 + # 2.5.2 20/12/21 [dayfirst=0, yearfirst=1] -> 2020-12-21 00:00:00 + # 2.5.3 20/12/21 [dayfirst=0, yearfirst=1] -> 2020-12-21 00:00:00 + + # revert of bug in 2.5.2 + # 2.5.1 20/12/21 [dayfirst=1, yearfirst=1] -> 2020-12-21 00:00:00 + # 2.5.2 20/12/21 [dayfirst=1, yearfirst=1] -> month must be in 1..12 + # 2.5.3 20/12/21 [dayfirst=1, yearfirst=1] -> 2020-12-21 00:00:00 + + # OK + # 2.5.1 20/12/21 [dayfirst=1, yearfirst=0] -> 2021-12-20 00:00:00 + # 2.5.2 20/12/21 [dayfirst=1, yearfirst=0] -> 2021-12-20 00:00:00 + # 2.5.3 20/12/21 [dayfirst=1, yearfirst=0] -> 2021-12-20 00:00:00 + + import dateutil + is_lt_253 = dateutil.__version__ < LooseVersion('2.5.3') + + # str : dayfirst, yearfirst, expected + cases = {'10-11-12': [(False, False, + datetime(2012, 10, 11)), + (True, False, + datetime(2012, 11, 10)), + (False, True, + datetime(2010, 11, 12)), + (True, True, + datetime(2010, 12, 11))], + '20/12/21': [(False, False, + datetime(2021, 12, 20)), + (True, False, + datetime(2021, 12, 20)), + (False, True, + datetime(2020, 12, 21)), + (True, True, + datetime(2020, 12, 21))]} + + from dateutil.parser import parse + for date_str, values in compat.iteritems(cases): + for dayfirst, yearfirst, expected in values: + + # odd comparisons across version + # let's just skip + if dayfirst and yearfirst and is_lt_253: + continue + + # compare with dateutil result + dateutil_result = parse(date_str, dayfirst=dayfirst, + yearfirst=yearfirst) + self.assertEqual(dateutil_result, expected) + + result1, _, _ = tools.parse_time_string(date_str, + dayfirst=dayfirst, + yearfirst=yearfirst) + + # we don't support dayfirst/yearfirst here: + if not dayfirst and not yearfirst: + result2 = Timestamp(date_str) + self.assertEqual(result2, expected) + + result3 = to_datetime(date_str, dayfirst=dayfirst, + yearfirst=yearfirst) + + result4 = DatetimeIndex([date_str], dayfirst=dayfirst, + yearfirst=yearfirst)[0] + + self.assertEqual(result1, expected) + self.assertEqual(result3, expected) + self.assertEqual(result4, expected) + + def test_parsers_timestring(self): + tm._skip_if_no_dateutil() + from dateutil.parser import parse + + # must be the same as dateutil result + cases = {'10:15': (parse('10:15'), datetime(1, 1, 1, 10, 15)), + '9:05': (parse('9:05'), datetime(1, 1, 1, 9, 5))} + + for date_str, (exp_now, exp_def) in compat.iteritems(cases): + result1, _, _ = tools.parse_time_string(date_str) + result2 = to_datetime(date_str) + result3 = to_datetime([date_str]) + result4 = Timestamp(date_str) + result5 = DatetimeIndex([date_str])[0] + # parse time string return time string based on default date + # others are not, and can't be changed because it is used in + # time series plot + self.assertEqual(result1, exp_def) + self.assertEqual(result2, exp_now) + self.assertEqual(result3, exp_now) + self.assertEqual(result4, exp_now) + self.assertEqual(result5, exp_now) + + def test_parsers_time(self): + # GH11818 + _skip_if_has_locale() + strings = ["14:15", "1415", "2:15pm", "0215pm", "14:15:00", "141500", + "2:15:00pm", "021500pm", time(14, 15)] + expected = time(14, 15) + + for time_string in strings: + self.assertEqual(tools.to_time(time_string), expected) + + new_string = "14.15" + self.assertRaises(ValueError, tools.to_time, new_string) + self.assertEqual(tools.to_time(new_string, format="%H.%M"), expected) + + arg = ["14:15", "20:20"] + expected_arr = [time(14, 15), time(20, 20)] + self.assertEqual(tools.to_time(arg), expected_arr) + self.assertEqual(tools.to_time(arg, format="%H:%M"), expected_arr) + self.assertEqual(tools.to_time(arg, infer_time_format=True), + expected_arr) + self.assertEqual(tools.to_time(arg, format="%I:%M%p", errors="coerce"), + [None, None]) + + res = tools.to_time(arg, format="%I:%M%p", errors="ignore") + self.assert_numpy_array_equal(res, np.array(arg, dtype=np.object_)) + + with tm.assertRaises(ValueError): + tools.to_time(arg, format="%I:%M%p", errors="raise") + + self.assert_series_equal(tools.to_time(Series(arg, name="test")), + Series(expected_arr, name="test")) + + res = tools.to_time(np.array(arg)) + self.assertIsInstance(res, list) + self.assert_equal(res, expected_arr) + + def test_parsers_monthfreq(self): + cases = {'201101': datetime(2011, 1, 1, 0, 0), + '200005': datetime(2000, 5, 1, 0, 0)} + + for date_str, expected in compat.iteritems(cases): + result1, _, _ = tools.parse_time_string(date_str, freq='M') + self.assertEqual(result1, expected) + + def test_parsers_quarterly_with_freq(self): + msg = ('Incorrect quarterly string is given, quarter ' + 'must be between 1 and 4: 2013Q5') + with tm.assertRaisesRegexp(tslib.DateParseError, msg): + tools.parse_time_string('2013Q5') + + # GH 5418 + msg = ('Unable to retrieve month information from given freq: ' + 'INVLD-L-DEC-SAT') + with tm.assertRaisesRegexp(tslib.DateParseError, msg): + tools.parse_time_string('2013Q1', freq='INVLD-L-DEC-SAT') + + cases = {('2013Q2', None): datetime(2013, 4, 1), + ('2013Q2', 'A-APR'): datetime(2012, 8, 1), + ('2013-Q2', 'A-DEC'): datetime(2013, 4, 1)} + + for (date_str, freq), exp in compat.iteritems(cases): + result, _, _ = tools.parse_time_string(date_str, freq=freq) + self.assertEqual(result, exp) + + def test_parsers_timezone_minute_offsets_roundtrip(self): + # GH11708 + base = to_datetime("2013-01-01 00:00:00") + dt_strings = [ + ('2013-01-01 05:45+0545', + "Asia/Katmandu", + "Timestamp('2013-01-01 05:45:00+0545', tz='Asia/Katmandu')"), + ('2013-01-01 05:30+0530', + "Asia/Kolkata", + "Timestamp('2013-01-01 05:30:00+0530', tz='Asia/Kolkata')") + ] + + for dt_string, tz, dt_string_repr in dt_strings: + dt_time = to_datetime(dt_string) + self.assertEqual(base, dt_time) + converted_time = dt_time.tz_localize('UTC').tz_convert(tz) + self.assertEqual(dt_string_repr, repr(converted_time)) + + def test_parsers_iso8601(self): + # GH 12060 + # test only the iso parser - flexibility to different + # separators and leadings 0s + # Timestamp construction falls back to dateutil + cases = {'2011-01-02': datetime(2011, 1, 2), + '2011-1-2': datetime(2011, 1, 2), + '2011-01': datetime(2011, 1, 1), + '2011-1': datetime(2011, 1, 1), + '2011 01 02': datetime(2011, 1, 2), + '2011.01.02': datetime(2011, 1, 2), + '2011/01/02': datetime(2011, 1, 2), + '2011\\01\\02': datetime(2011, 1, 2), + '2013-01-01 05:30:00': datetime(2013, 1, 1, 5, 30), + '2013-1-1 5:30:00': datetime(2013, 1, 1, 5, 30)} + for date_str, exp in compat.iteritems(cases): + actual = tslib._test_parse_iso8601(date_str) + self.assertEqual(actual, exp) + + # seperators must all match - YYYYMM not valid + invalid_cases = ['2011-01/02', '2011^11^11', + '201401', '201111', '200101', + # mixed separated and unseparated + '2005-0101', '200501-01', + '20010101 12:3456', '20010101 1234:56', + # HHMMSS must have two digits in each component + # if unseparated + '20010101 1', '20010101 123', '20010101 12345', + '20010101 12345Z', + # wrong separator for HHMMSS + '2001-01-01 12-34-56'] + for date_str in invalid_cases: + with tm.assertRaises(ValueError): + tslib._test_parse_iso8601(date_str) + # If no ValueError raised, let me know which case failed. + raise Exception(date_str) + + +class TestTsUtil(tm.TestCase): + + def test_try_parse_dates(self): + from dateutil.parser import parse + arr = np.array(['5/1/2000', '6/1/2000', '7/1/2000'], dtype=object) + + result = lib.try_parse_dates(arr, dayfirst=True) + expected = [parse(d, dayfirst=True) for d in arr] + self.assertTrue(np.array_equal(result, expected)) + + +class TestArrayToDatetime(tm.TestCase): + def test_parsing_valid_dates(self): + arr = np.array(['01-01-2013', '01-02-2013'], dtype=object) + self.assert_numpy_array_equal( + tslib.array_to_datetime(arr), + np_array_datetime64_compat( + [ + '2013-01-01T00:00:00.000000000-0000', + '2013-01-02T00:00:00.000000000-0000' + ], + dtype='M8[ns]' + ) + ) + + arr = np.array(['Mon Sep 16 2013', 'Tue Sep 17 2013'], dtype=object) + self.assert_numpy_array_equal( + tslib.array_to_datetime(arr), + np_array_datetime64_compat( + [ + '2013-09-16T00:00:00.000000000-0000', + '2013-09-17T00:00:00.000000000-0000' + ], + dtype='M8[ns]' + ) + ) + + def test_parsing_timezone_offsets(self): + # All of these datetime strings with offsets are equivalent + # to the same datetime after the timezone offset is added + dt_strings = [ + '01-01-2013 08:00:00+08:00', + '2013-01-01T08:00:00.000000000+0800', + '2012-12-31T16:00:00.000000000-0800', + '12-31-2012 23:00:00-01:00' + ] + + expected_output = tslib.array_to_datetime(np.array( + ['01-01-2013 00:00:00'], dtype=object)) + + for dt_string in dt_strings: + self.assert_numpy_array_equal( + tslib.array_to_datetime( + np.array([dt_string], dtype=object) + ), + expected_output + ) + + def test_number_looking_strings_not_into_datetime(self): + # #4601 + # These strings don't look like datetimes so they shouldn't be + # attempted to be converted + arr = np.array(['-352.737091', '183.575577'], dtype=object) + self.assert_numpy_array_equal( + tslib.array_to_datetime(arr, errors='ignore'), arr) + + arr = np.array(['1', '2', '3', '4', '5'], dtype=object) + self.assert_numpy_array_equal( + tslib.array_to_datetime(arr, errors='ignore'), arr) + + def test_coercing_dates_outside_of_datetime64_ns_bounds(self): + invalid_dates = [ + date(1000, 1, 1), + datetime(1000, 1, 1), + '1000-01-01', + 'Jan 1, 1000', + np.datetime64('1000-01-01'), + ] + + for invalid_date in invalid_dates: + self.assertRaises(ValueError, + tslib.array_to_datetime, + np.array( + [invalid_date], dtype='object'), + errors='raise', ) + self.assert_numpy_array_equal( + tslib.array_to_datetime( + np.array([invalid_date], dtype='object'), + errors='coerce'), + np.array([tslib.iNaT], dtype='M8[ns]') + ) + + arr = np.array(['1/1/1000', '1/1/2000'], dtype=object) + self.assert_numpy_array_equal( + tslib.array_to_datetime(arr, errors='coerce'), + np_array_datetime64_compat( + [ + tslib.iNaT, + '2000-01-01T00:00:00.000000000-0000' + ], + dtype='M8[ns]' + ) + ) + + def test_coerce_of_invalid_datetimes(self): + arr = np.array(['01-01-2013', 'not_a_date', '1'], dtype=object) + + # Without coercing, the presence of any invalid dates prevents + # any values from being converted + self.assert_numpy_array_equal( + tslib.array_to_datetime(arr, errors='ignore'), arr) + + # With coercing, the invalid dates becomes iNaT + self.assert_numpy_array_equal( + tslib.array_to_datetime(arr, errors='coerce'), + np_array_datetime64_compat( + [ + '2013-01-01T00:00:00.000000000-0000', + tslib.iNaT, + tslib.iNaT + ], + dtype='M8[ns]' + ) + ) + + +class TestPivotAnnual(tm.TestCase): + """ + New pandas of scikits.timeseries pivot_annual + """ + + def test_daily(self): + rng = date_range('1/1/2000', '12/31/2004', freq='D') + ts = Series(np.random.randn(len(rng)), index=rng) + + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + annual = pivot_annual(ts, 'D') + + doy = ts.index.dayofyear + + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + doy[(~isleapyear(ts.index.year)) & (doy >= 60)] += 1 + + for i in range(1, 367): + subset = ts[doy == i] + subset.index = [x.year for x in subset.index] + + result = annual[i].dropna() + tm.assert_series_equal(result, subset, check_names=False) + self.assertEqual(result.name, i) + + # check leap days + leaps = ts[(ts.index.month == 2) & (ts.index.day == 29)] + day = leaps.index.dayofyear[0] + leaps.index = leaps.index.year + leaps.name = 60 + tm.assert_series_equal(annual[day].dropna(), leaps) + + def test_hourly(self): + rng_hourly = date_range('1/1/1994', periods=(18 * 8760 + 4 * 24), + freq='H') + data_hourly = np.random.randint(100, 350, rng_hourly.size) + ts_hourly = Series(data_hourly, index=rng_hourly) + + grouped = ts_hourly.groupby(ts_hourly.index.year) + hoy = grouped.apply(lambda x: x.reset_index(drop=True)) + hoy = hoy.index.droplevel(0).values + + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + hoy[~isleapyear(ts_hourly.index.year) & (hoy >= 1416)] += 24 + hoy += 1 + + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + annual = pivot_annual(ts_hourly) + + ts_hourly = ts_hourly.astype(float) + for i in [1, 1416, 1417, 1418, 1439, 1440, 1441, 8784]: + subset = ts_hourly[hoy == i] + subset.index = [x.year for x in subset.index] + + result = annual[i].dropna() + tm.assert_series_equal(result, subset, check_names=False) + self.assertEqual(result.name, i) + + leaps = ts_hourly[(ts_hourly.index.month == 2) & ( + ts_hourly.index.day == 29) & (ts_hourly.index.hour == 0)] + hour = leaps.index.dayofyear[0] * 24 - 23 + leaps.index = leaps.index.year + leaps.name = 1417 + tm.assert_series_equal(annual[hour].dropna(), leaps) + + def test_weekly(self): + pass + + def test_monthly(self): + rng = date_range('1/1/2000', '12/31/2004', freq='M') + ts = Series(np.random.randn(len(rng)), index=rng) + + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + annual = pivot_annual(ts, 'M') + + month = ts.index.month + for i in range(1, 13): + subset = ts[month == i] + subset.index = [x.year for x in subset.index] + result = annual[i].dropna() + tm.assert_series_equal(result, subset, check_names=False) + self.assertEqual(result.name, i) + + def test_period_monthly(self): + pass + + def test_period_daily(self): + pass + + def test_period_weekly(self): + pass + + def test_isleapyear_deprecate(self): + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + self.assertTrue(isleapyear(2000)) + + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + self.assertFalse(isleapyear(2001)) + + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + self.assertTrue(isleapyear(2004)) + + +def test_normalize_date(): + value = date(2012, 9, 7) + + result = normalize_date(value) + assert (result == datetime(2012, 9, 7)) + + value = datetime(2012, 9, 7, 12) + + result = normalize_date(value) + assert (result == datetime(2012, 9, 7)) diff --git a/pandas/tests/indexes/period/__init__.py b/pandas/tests/indexes/period/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/tests/indexes/period/test_period.py b/pandas/tests/indexes/period/test_period.py new file mode 100644 index 0000000000000..33653c92da719 --- /dev/null +++ b/pandas/tests/indexes/period/test_period.py @@ -0,0 +1,233 @@ +import numpy as np +from datetime import timedelta + +import pandas as pd +from pandas.util import testing as tm +from pandas import (PeriodIndex, period_range, notnull, DatetimeIndex, NaT, + Index, Period, Int64Index) + +from ..datetimelike import DatetimeLike + + +class TestPeriodIndex(DatetimeLike, tm.TestCase): + _holder = PeriodIndex + _multiprocess_can_split_ = True + + def setUp(self): + self.indices = dict(index=tm.makePeriodIndex(10)) + self.setup_indices() + + def create_index(self): + return period_range('20130101', periods=5, freq='D') + + def test_construction_base_constructor(self): + # GH 13664 + arr = [pd.Period('2011-01', freq='M'), pd.NaT, + pd.Period('2011-03', freq='M')] + tm.assert_index_equal(pd.Index(arr), pd.PeriodIndex(arr)) + tm.assert_index_equal(pd.Index(np.array(arr)), + pd.PeriodIndex(np.array(arr))) + + arr = [np.nan, pd.NaT, pd.Period('2011-03', freq='M')] + tm.assert_index_equal(pd.Index(arr), pd.PeriodIndex(arr)) + tm.assert_index_equal(pd.Index(np.array(arr)), + pd.PeriodIndex(np.array(arr))) + + arr = [pd.Period('2011-01', freq='M'), pd.NaT, + pd.Period('2011-03', freq='D')] + tm.assert_index_equal(pd.Index(arr), pd.Index(arr, dtype=object)) + + tm.assert_index_equal(pd.Index(np.array(arr)), + pd.Index(np.array(arr), dtype=object)) + + def test_astype(self): + # GH 13149, GH 13209 + idx = PeriodIndex(['2016-05-16', 'NaT', NaT, np.NaN], freq='D') + + result = idx.astype(object) + expected = Index([Period('2016-05-16', freq='D')] + + [Period(NaT, freq='D')] * 3, dtype='object') + tm.assert_index_equal(result, expected) + + result = idx.astype(int) + expected = Int64Index([16937] + [-9223372036854775808] * 3, + dtype=np.int64) + tm.assert_index_equal(result, expected) + + idx = period_range('1990', '2009', freq='A') + result = idx.astype('i8') + self.assert_index_equal(result, Index(idx.asi8)) + self.assert_numpy_array_equal(result.values, idx.asi8) + + def test_astype_raises(self): + # GH 13149, GH 13209 + idx = PeriodIndex(['2016-05-16', 'NaT', NaT, np.NaN], freq='D') + + self.assertRaises(ValueError, idx.astype, str) + self.assertRaises(ValueError, idx.astype, float) + self.assertRaises(ValueError, idx.astype, 'timedelta64') + self.assertRaises(ValueError, idx.astype, 'timedelta64[ns]') + + def test_shift(self): + + # test shift for PeriodIndex + # GH8083 + drange = self.create_index() + result = drange.shift(1) + expected = PeriodIndex(['2013-01-02', '2013-01-03', '2013-01-04', + '2013-01-05', '2013-01-06'], freq='D') + self.assert_index_equal(result, expected) + + def test_pickle_compat_construction(self): + pass + + def test_get_loc(self): + idx = pd.period_range('2000-01-01', periods=3) + + for method in [None, 'pad', 'backfill', 'nearest']: + self.assertEqual(idx.get_loc(idx[1], method), 1) + self.assertEqual( + idx.get_loc(idx[1].asfreq('H', how='start'), method), 1) + self.assertEqual(idx.get_loc(idx[1].to_timestamp(), method), 1) + self.assertEqual( + idx.get_loc(idx[1].to_timestamp().to_pydatetime(), method), 1) + self.assertEqual(idx.get_loc(str(idx[1]), method), 1) + + idx = pd.period_range('2000-01-01', periods=5)[::2] + self.assertEqual(idx.get_loc('2000-01-02T12', method='nearest', + tolerance='1 day'), 1) + self.assertEqual(idx.get_loc('2000-01-02T12', method='nearest', + tolerance=pd.Timedelta('1D')), 1) + self.assertEqual(idx.get_loc('2000-01-02T12', method='nearest', + tolerance=np.timedelta64(1, 'D')), 1) + self.assertEqual(idx.get_loc('2000-01-02T12', method='nearest', + tolerance=timedelta(1)), 1) + with tm.assertRaisesRegexp(ValueError, 'must be convertible'): + idx.get_loc('2000-01-10', method='nearest', tolerance='foo') + + msg = 'Input has different freq from PeriodIndex\\(freq=D\\)' + with tm.assertRaisesRegexp(ValueError, msg): + idx.get_loc('2000-01-10', method='nearest', tolerance='1 hour') + with tm.assertRaises(KeyError): + idx.get_loc('2000-01-10', method='nearest', tolerance='1 day') + + def test_where(self): + i = self.create_index() + result = i.where(notnull(i)) + expected = i + tm.assert_index_equal(result, expected) + + i2 = i.copy() + i2 = pd.PeriodIndex([pd.NaT, pd.NaT] + i[2:].tolist(), + freq='D') + result = i.where(notnull(i2)) + expected = i2 + tm.assert_index_equal(result, expected) + + def test_where_other(self): + + i = self.create_index() + for arr in [np.nan, pd.NaT]: + result = i.where(notnull(i), other=np.nan) + expected = i + tm.assert_index_equal(result, expected) + + i2 = i.copy() + i2 = pd.PeriodIndex([pd.NaT, pd.NaT] + i[2:].tolist(), + freq='D') + result = i.where(notnull(i2), i2) + tm.assert_index_equal(result, i2) + + i2 = i.copy() + i2 = pd.PeriodIndex([pd.NaT, pd.NaT] + i[2:].tolist(), + freq='D') + result = i.where(notnull(i2), i2.values) + tm.assert_index_equal(result, i2) + + def test_get_indexer(self): + idx = pd.period_range('2000-01-01', periods=3).asfreq('H', how='start') + tm.assert_numpy_array_equal(idx.get_indexer(idx), + np.array([0, 1, 2], dtype=np.intp)) + + target = pd.PeriodIndex(['1999-12-31T23', '2000-01-01T12', + '2000-01-02T01'], freq='H') + tm.assert_numpy_array_equal(idx.get_indexer(target, 'pad'), + np.array([-1, 0, 1], dtype=np.intp)) + tm.assert_numpy_array_equal(idx.get_indexer(target, 'backfill'), + np.array([0, 1, 2], dtype=np.intp)) + tm.assert_numpy_array_equal(idx.get_indexer(target, 'nearest'), + np.array([0, 1, 1], dtype=np.intp)) + tm.assert_numpy_array_equal(idx.get_indexer(target, 'nearest', + tolerance='1 hour'), + np.array([0, -1, 1], dtype=np.intp)) + + msg = 'Input has different freq from PeriodIndex\\(freq=H\\)' + with self.assertRaisesRegexp(ValueError, msg): + idx.get_indexer(target, 'nearest', tolerance='1 minute') + + tm.assert_numpy_array_equal(idx.get_indexer(target, 'nearest', + tolerance='1 day'), + np.array([0, 1, 1], dtype=np.intp)) + + def test_repeat(self): + # GH10183 + idx = pd.period_range('2000-01-01', periods=3, freq='D') + res = idx.repeat(3) + exp = PeriodIndex(idx.values.repeat(3), freq='D') + self.assert_index_equal(res, exp) + self.assertEqual(res.freqstr, 'D') + + def test_period_index_indexer(self): + # GH4125 + idx = pd.period_range('2002-01', '2003-12', freq='M') + df = pd.DataFrame(pd.np.random.randn(24, 10), index=idx) + self.assert_frame_equal(df, df.loc[idx]) + self.assert_frame_equal(df, df.loc[list(idx)]) + self.assert_frame_equal(df, df.loc[list(idx)]) + self.assert_frame_equal(df.iloc[0:5], df.loc[idx[0:5]]) + self.assert_frame_equal(df, df.loc[list(idx)]) + + def test_fillna_period(self): + # GH 11343 + idx = pd.PeriodIndex(['2011-01-01 09:00', pd.NaT, + '2011-01-01 11:00'], freq='H') + + exp = pd.PeriodIndex(['2011-01-01 09:00', '2011-01-01 10:00', + '2011-01-01 11:00'], freq='H') + self.assert_index_equal( + idx.fillna(pd.Period('2011-01-01 10:00', freq='H')), exp) + + exp = pd.Index([pd.Period('2011-01-01 09:00', freq='H'), 'x', + pd.Period('2011-01-01 11:00', freq='H')], dtype=object) + self.assert_index_equal(idx.fillna('x'), exp) + + exp = pd.Index([pd.Period('2011-01-01 09:00', freq='H'), + pd.Period('2011-01-01', freq='D'), + pd.Period('2011-01-01 11:00', freq='H')], dtype=object) + self.assert_index_equal(idx.fillna(pd.Period('2011-01-01', freq='D')), + exp) + + def test_no_millisecond_field(self): + with self.assertRaises(AttributeError): + DatetimeIndex.millisecond + + with self.assertRaises(AttributeError): + DatetimeIndex([]).millisecond + + def test_difference_freq(self): + # GH14323: difference of Period MUST preserve frequency + # but the ability to union results must be preserved + + index = period_range("20160920", "20160925", freq="D") + + other = period_range("20160921", "20160924", freq="D") + expected = PeriodIndex(["20160920", "20160925"], freq='D') + idx_diff = index.difference(other) + tm.assert_index_equal(idx_diff, expected) + tm.assert_attr_equal('freq', idx_diff, expected) + + other = period_range("20160922", "20160925", freq="D") + idx_diff = index.difference(other) + expected = PeriodIndex(["20160920", "20160921"], freq='D') + tm.assert_index_equal(idx_diff, expected) + tm.assert_attr_equal('freq', idx_diff, expected) diff --git a/pandas/tests/indexes/test_datetimelike.py b/pandas/tests/indexes/test_datetimelike.py deleted file mode 100644 index b212a7b75904c..0000000000000 --- a/pandas/tests/indexes/test_datetimelike.py +++ /dev/null @@ -1,465 +0,0 @@ -# -*- coding: utf-8 -*- - -import numpy as np -from datetime import timedelta - -import pandas as pd -from pandas.util import testing as tm -from pandas import (DatetimeIndex, Float64Index, Index, Int64Index, - NaT, Period, PeriodIndex, Series, Timedelta, - TimedeltaIndex, period_range, - timedelta_range, notnull) - - -from .datetimelike import DatetimeLike - - -class TestPeriodIndex(DatetimeLike, tm.TestCase): - _holder = PeriodIndex - - def setUp(self): - self.indices = dict(index=tm.makePeriodIndex(10)) - self.setup_indices() - - def create_index(self): - return period_range('20130101', periods=5, freq='D') - - def test_construction_base_constructor(self): - # GH 13664 - arr = [pd.Period('2011-01', freq='M'), pd.NaT, - pd.Period('2011-03', freq='M')] - tm.assert_index_equal(pd.Index(arr), pd.PeriodIndex(arr)) - tm.assert_index_equal(pd.Index(np.array(arr)), - pd.PeriodIndex(np.array(arr))) - - arr = [np.nan, pd.NaT, pd.Period('2011-03', freq='M')] - tm.assert_index_equal(pd.Index(arr), pd.PeriodIndex(arr)) - tm.assert_index_equal(pd.Index(np.array(arr)), - pd.PeriodIndex(np.array(arr))) - - arr = [pd.Period('2011-01', freq='M'), pd.NaT, - pd.Period('2011-03', freq='D')] - tm.assert_index_equal(pd.Index(arr), pd.Index(arr, dtype=object)) - - tm.assert_index_equal(pd.Index(np.array(arr)), - pd.Index(np.array(arr), dtype=object)) - - def test_astype(self): - # GH 13149, GH 13209 - idx = PeriodIndex(['2016-05-16', 'NaT', NaT, np.NaN], freq='D') - - result = idx.astype(object) - expected = Index([Period('2016-05-16', freq='D')] + - [Period(NaT, freq='D')] * 3, dtype='object') - tm.assert_index_equal(result, expected) - - result = idx.astype(int) - expected = Int64Index([16937] + [-9223372036854775808] * 3, - dtype=np.int64) - tm.assert_index_equal(result, expected) - - idx = period_range('1990', '2009', freq='A') - result = idx.astype('i8') - self.assert_index_equal(result, Index(idx.asi8)) - self.assert_numpy_array_equal(result.values, idx.asi8) - - def test_astype_raises(self): - # GH 13149, GH 13209 - idx = PeriodIndex(['2016-05-16', 'NaT', NaT, np.NaN], freq='D') - - self.assertRaises(ValueError, idx.astype, str) - self.assertRaises(ValueError, idx.astype, float) - self.assertRaises(ValueError, idx.astype, 'timedelta64') - self.assertRaises(ValueError, idx.astype, 'timedelta64[ns]') - - def test_shift(self): - - # test shift for PeriodIndex - # GH8083 - drange = self.create_index() - result = drange.shift(1) - expected = PeriodIndex(['2013-01-02', '2013-01-03', '2013-01-04', - '2013-01-05', '2013-01-06'], freq='D') - self.assert_index_equal(result, expected) - - def test_pickle_compat_construction(self): - pass - - def test_get_loc(self): - idx = pd.period_range('2000-01-01', periods=3) - - for method in [None, 'pad', 'backfill', 'nearest']: - self.assertEqual(idx.get_loc(idx[1], method), 1) - self.assertEqual( - idx.get_loc(idx[1].asfreq('H', how='start'), method), 1) - self.assertEqual(idx.get_loc(idx[1].to_timestamp(), method), 1) - self.assertEqual( - idx.get_loc(idx[1].to_timestamp().to_pydatetime(), method), 1) - self.assertEqual(idx.get_loc(str(idx[1]), method), 1) - - idx = pd.period_range('2000-01-01', periods=5)[::2] - self.assertEqual(idx.get_loc('2000-01-02T12', method='nearest', - tolerance='1 day'), 1) - self.assertEqual(idx.get_loc('2000-01-02T12', method='nearest', - tolerance=pd.Timedelta('1D')), 1) - self.assertEqual(idx.get_loc('2000-01-02T12', method='nearest', - tolerance=np.timedelta64(1, 'D')), 1) - self.assertEqual(idx.get_loc('2000-01-02T12', method='nearest', - tolerance=timedelta(1)), 1) - with tm.assertRaisesRegexp(ValueError, 'must be convertible'): - idx.get_loc('2000-01-10', method='nearest', tolerance='foo') - - msg = 'Input has different freq from PeriodIndex\\(freq=D\\)' - with tm.assertRaisesRegexp(ValueError, msg): - idx.get_loc('2000-01-10', method='nearest', tolerance='1 hour') - with tm.assertRaises(KeyError): - idx.get_loc('2000-01-10', method='nearest', tolerance='1 day') - - def test_where(self): - i = self.create_index() - result = i.where(notnull(i)) - expected = i - tm.assert_index_equal(result, expected) - - i2 = i.copy() - i2 = pd.PeriodIndex([pd.NaT, pd.NaT] + i[2:].tolist(), - freq='D') - result = i.where(notnull(i2)) - expected = i2 - tm.assert_index_equal(result, expected) - - def test_where_other(self): - - i = self.create_index() - for arr in [np.nan, pd.NaT]: - result = i.where(notnull(i), other=np.nan) - expected = i - tm.assert_index_equal(result, expected) - - i2 = i.copy() - i2 = pd.PeriodIndex([pd.NaT, pd.NaT] + i[2:].tolist(), - freq='D') - result = i.where(notnull(i2), i2) - tm.assert_index_equal(result, i2) - - i2 = i.copy() - i2 = pd.PeriodIndex([pd.NaT, pd.NaT] + i[2:].tolist(), - freq='D') - result = i.where(notnull(i2), i2.values) - tm.assert_index_equal(result, i2) - - def test_get_indexer(self): - idx = pd.period_range('2000-01-01', periods=3).asfreq('H', how='start') - tm.assert_numpy_array_equal(idx.get_indexer(idx), - np.array([0, 1, 2], dtype=np.intp)) - - target = pd.PeriodIndex(['1999-12-31T23', '2000-01-01T12', - '2000-01-02T01'], freq='H') - tm.assert_numpy_array_equal(idx.get_indexer(target, 'pad'), - np.array([-1, 0, 1], dtype=np.intp)) - tm.assert_numpy_array_equal(idx.get_indexer(target, 'backfill'), - np.array([0, 1, 2], dtype=np.intp)) - tm.assert_numpy_array_equal(idx.get_indexer(target, 'nearest'), - np.array([0, 1, 1], dtype=np.intp)) - tm.assert_numpy_array_equal(idx.get_indexer(target, 'nearest', - tolerance='1 hour'), - np.array([0, -1, 1], dtype=np.intp)) - - msg = 'Input has different freq from PeriodIndex\\(freq=H\\)' - with self.assertRaisesRegexp(ValueError, msg): - idx.get_indexer(target, 'nearest', tolerance='1 minute') - - tm.assert_numpy_array_equal(idx.get_indexer(target, 'nearest', - tolerance='1 day'), - np.array([0, 1, 1], dtype=np.intp)) - - def test_repeat(self): - # GH10183 - idx = pd.period_range('2000-01-01', periods=3, freq='D') - res = idx.repeat(3) - exp = PeriodIndex(idx.values.repeat(3), freq='D') - self.assert_index_equal(res, exp) - self.assertEqual(res.freqstr, 'D') - - def test_period_index_indexer(self): - # GH4125 - idx = pd.period_range('2002-01', '2003-12', freq='M') - df = pd.DataFrame(pd.np.random.randn(24, 10), index=idx) - self.assert_frame_equal(df, df.loc[idx]) - self.assert_frame_equal(df, df.loc[list(idx)]) - self.assert_frame_equal(df, df.loc[list(idx)]) - self.assert_frame_equal(df.iloc[0:5], df.loc[idx[0:5]]) - self.assert_frame_equal(df, df.loc[list(idx)]) - - def test_fillna_period(self): - # GH 11343 - idx = pd.PeriodIndex(['2011-01-01 09:00', pd.NaT, - '2011-01-01 11:00'], freq='H') - - exp = pd.PeriodIndex(['2011-01-01 09:00', '2011-01-01 10:00', - '2011-01-01 11:00'], freq='H') - self.assert_index_equal( - idx.fillna(pd.Period('2011-01-01 10:00', freq='H')), exp) - - exp = pd.Index([pd.Period('2011-01-01 09:00', freq='H'), 'x', - pd.Period('2011-01-01 11:00', freq='H')], dtype=object) - self.assert_index_equal(idx.fillna('x'), exp) - - exp = pd.Index([pd.Period('2011-01-01 09:00', freq='H'), - pd.Period('2011-01-01', freq='D'), - pd.Period('2011-01-01 11:00', freq='H')], dtype=object) - self.assert_index_equal(idx.fillna(pd.Period('2011-01-01', freq='D')), - exp) - - def test_no_millisecond_field(self): - with self.assertRaises(AttributeError): - DatetimeIndex.millisecond - - with self.assertRaises(AttributeError): - DatetimeIndex([]).millisecond - - def test_difference_freq(self): - # GH14323: difference of Period MUST preserve frequency - # but the ability to union results must be preserved - - index = period_range("20160920", "20160925", freq="D") - - other = period_range("20160921", "20160924", freq="D") - expected = PeriodIndex(["20160920", "20160925"], freq='D') - idx_diff = index.difference(other) - tm.assert_index_equal(idx_diff, expected) - tm.assert_attr_equal('freq', idx_diff, expected) - - other = period_range("20160922", "20160925", freq="D") - idx_diff = index.difference(other) - expected = PeriodIndex(["20160920", "20160921"], freq='D') - tm.assert_index_equal(idx_diff, expected) - tm.assert_attr_equal('freq', idx_diff, expected) - - -class TestTimedeltaIndex(DatetimeLike, tm.TestCase): - _holder = TimedeltaIndex - - def setUp(self): - self.indices = dict(index=tm.makeTimedeltaIndex(10)) - self.setup_indices() - - def create_index(self): - return pd.to_timedelta(range(5), unit='d') + pd.offsets.Hour(1) - - def test_construction_base_constructor(self): - arr = [pd.Timedelta('1 days'), pd.NaT, pd.Timedelta('3 days')] - tm.assert_index_equal(pd.Index(arr), pd.TimedeltaIndex(arr)) - tm.assert_index_equal(pd.Index(np.array(arr)), - pd.TimedeltaIndex(np.array(arr))) - - arr = [np.nan, pd.NaT, pd.Timedelta('1 days')] - tm.assert_index_equal(pd.Index(arr), pd.TimedeltaIndex(arr)) - tm.assert_index_equal(pd.Index(np.array(arr)), - pd.TimedeltaIndex(np.array(arr))) - - def test_shift(self): - # test shift for TimedeltaIndex - # err8083 - - drange = self.create_index() - result = drange.shift(1) - expected = TimedeltaIndex(['1 days 01:00:00', '2 days 01:00:00', - '3 days 01:00:00', - '4 days 01:00:00', '5 days 01:00:00'], - freq='D') - self.assert_index_equal(result, expected) - - result = drange.shift(3, freq='2D 1s') - expected = TimedeltaIndex(['6 days 01:00:03', '7 days 01:00:03', - '8 days 01:00:03', '9 days 01:00:03', - '10 days 01:00:03'], freq='D') - self.assert_index_equal(result, expected) - - def test_astype(self): - # GH 13149, GH 13209 - idx = TimedeltaIndex([1e14, 'NaT', pd.NaT, np.NaN]) - - result = idx.astype(object) - expected = Index([Timedelta('1 days 03:46:40')] + [pd.NaT] * 3, - dtype=object) - tm.assert_index_equal(result, expected) - - result = idx.astype(int) - expected = Int64Index([100000000000000] + [-9223372036854775808] * 3, - dtype=np.int64) - tm.assert_index_equal(result, expected) - - rng = timedelta_range('1 days', periods=10) - - result = rng.astype('i8') - self.assert_index_equal(result, Index(rng.asi8)) - self.assert_numpy_array_equal(rng.asi8, result.values) - - def test_astype_timedelta64(self): - # GH 13149, GH 13209 - idx = TimedeltaIndex([1e14, 'NaT', pd.NaT, np.NaN]) - - result = idx.astype('timedelta64') - expected = Float64Index([1e+14] + [np.NaN] * 3, dtype='float64') - tm.assert_index_equal(result, expected) - - result = idx.astype('timedelta64[ns]') - tm.assert_index_equal(result, idx) - self.assertFalse(result is idx) - - result = idx.astype('timedelta64[ns]', copy=False) - tm.assert_index_equal(result, idx) - self.assertTrue(result is idx) - - def test_astype_raises(self): - # GH 13149, GH 13209 - idx = TimedeltaIndex([1e14, 'NaT', pd.NaT, np.NaN]) - - self.assertRaises(ValueError, idx.astype, float) - self.assertRaises(ValueError, idx.astype, str) - self.assertRaises(ValueError, idx.astype, 'datetime64') - self.assertRaises(ValueError, idx.astype, 'datetime64[ns]') - - def test_get_loc(self): - idx = pd.to_timedelta(['0 days', '1 days', '2 days']) - - for method in [None, 'pad', 'backfill', 'nearest']: - self.assertEqual(idx.get_loc(idx[1], method), 1) - self.assertEqual(idx.get_loc(idx[1].to_pytimedelta(), method), 1) - self.assertEqual(idx.get_loc(str(idx[1]), method), 1) - - self.assertEqual( - idx.get_loc(idx[1], 'pad', tolerance=pd.Timedelta(0)), 1) - self.assertEqual( - idx.get_loc(idx[1], 'pad', tolerance=np.timedelta64(0, 's')), 1) - self.assertEqual(idx.get_loc(idx[1], 'pad', tolerance=timedelta(0)), 1) - - with tm.assertRaisesRegexp(ValueError, 'must be convertible'): - idx.get_loc(idx[1], method='nearest', tolerance='foo') - - for method, loc in [('pad', 1), ('backfill', 2), ('nearest', 1)]: - self.assertEqual(idx.get_loc('1 day 1 hour', method), loc) - - def test_get_indexer(self): - idx = pd.to_timedelta(['0 days', '1 days', '2 days']) - tm.assert_numpy_array_equal(idx.get_indexer(idx), - np.array([0, 1, 2], dtype=np.intp)) - - target = pd.to_timedelta(['-1 hour', '12 hours', '1 day 1 hour']) - tm.assert_numpy_array_equal(idx.get_indexer(target, 'pad'), - np.array([-1, 0, 1], dtype=np.intp)) - tm.assert_numpy_array_equal(idx.get_indexer(target, 'backfill'), - np.array([0, 1, 2], dtype=np.intp)) - tm.assert_numpy_array_equal(idx.get_indexer(target, 'nearest'), - np.array([0, 1, 1], dtype=np.intp)) - - res = idx.get_indexer(target, 'nearest', - tolerance=pd.Timedelta('1 hour')) - tm.assert_numpy_array_equal(res, np.array([0, -1, 1], dtype=np.intp)) - - def test_numeric_compat(self): - - idx = self._holder(np.arange(5, dtype='int64')) - didx = self._holder(np.arange(5, dtype='int64') ** 2) - result = idx * 1 - tm.assert_index_equal(result, idx) - - result = 1 * idx - tm.assert_index_equal(result, idx) - - result = idx / 1 - tm.assert_index_equal(result, idx) - - result = idx // 1 - tm.assert_index_equal(result, idx) - - result = idx * np.array(5, dtype='int64') - tm.assert_index_equal(result, - self._holder(np.arange(5, dtype='int64') * 5)) - - result = idx * np.arange(5, dtype='int64') - tm.assert_index_equal(result, didx) - - result = idx * Series(np.arange(5, dtype='int64')) - tm.assert_index_equal(result, didx) - - result = idx * Series(np.arange(5, dtype='float64') + 0.1) - tm.assert_index_equal(result, self._holder(np.arange( - 5, dtype='float64') * (np.arange(5, dtype='float64') + 0.1))) - - # invalid - self.assertRaises(TypeError, lambda: idx * idx) - self.assertRaises(ValueError, lambda: idx * self._holder(np.arange(3))) - self.assertRaises(ValueError, lambda: idx * np.array([1, 2])) - - def test_pickle_compat_construction(self): - pass - - def test_ufunc_coercions(self): - # normal ops are also tested in tseries/test_timedeltas.py - idx = TimedeltaIndex(['2H', '4H', '6H', '8H', '10H'], - freq='2H', name='x') - - for result in [idx * 2, np.multiply(idx, 2)]: - tm.assertIsInstance(result, TimedeltaIndex) - exp = TimedeltaIndex(['4H', '8H', '12H', '16H', '20H'], - freq='4H', name='x') - tm.assert_index_equal(result, exp) - self.assertEqual(result.freq, '4H') - - for result in [idx / 2, np.divide(idx, 2)]: - tm.assertIsInstance(result, TimedeltaIndex) - exp = TimedeltaIndex(['1H', '2H', '3H', '4H', '5H'], - freq='H', name='x') - tm.assert_index_equal(result, exp) - self.assertEqual(result.freq, 'H') - - idx = TimedeltaIndex(['2H', '4H', '6H', '8H', '10H'], - freq='2H', name='x') - for result in [-idx, np.negative(idx)]: - tm.assertIsInstance(result, TimedeltaIndex) - exp = TimedeltaIndex(['-2H', '-4H', '-6H', '-8H', '-10H'], - freq='-2H', name='x') - tm.assert_index_equal(result, exp) - self.assertEqual(result.freq, '-2H') - - idx = TimedeltaIndex(['-2H', '-1H', '0H', '1H', '2H'], - freq='H', name='x') - for result in [abs(idx), np.absolute(idx)]: - tm.assertIsInstance(result, TimedeltaIndex) - exp = TimedeltaIndex(['2H', '1H', '0H', '1H', '2H'], - freq=None, name='x') - tm.assert_index_equal(result, exp) - self.assertEqual(result.freq, None) - - def test_fillna_timedelta(self): - # GH 11343 - idx = pd.TimedeltaIndex(['1 day', pd.NaT, '3 day']) - - exp = pd.TimedeltaIndex(['1 day', '2 day', '3 day']) - self.assert_index_equal(idx.fillna(pd.Timedelta('2 day')), exp) - - exp = pd.TimedeltaIndex(['1 day', '3 hour', '3 day']) - idx.fillna(pd.Timedelta('3 hour')) - - exp = pd.Index( - [pd.Timedelta('1 day'), 'x', pd.Timedelta('3 day')], dtype=object) - self.assert_index_equal(idx.fillna('x'), exp) - - def test_difference_freq(self): - # GH14323: Difference of TimedeltaIndex should not preserve frequency - - index = timedelta_range("0 days", "5 days", freq="D") - - other = timedelta_range("1 days", "4 days", freq="D") - expected = TimedeltaIndex(["0 days", "5 days"], freq=None) - idx_diff = index.difference(other) - tm.assert_index_equal(idx_diff, expected) - tm.assert_attr_equal('freq', idx_diff, expected) - - other = timedelta_range("2 days", "5 days", freq="D") - idx_diff = index.difference(other) - expected = TimedeltaIndex(["0 days", "1 days"], freq=None) - tm.assert_index_equal(idx_diff, expected) - tm.assert_attr_equal('freq', idx_diff, expected) diff --git a/pandas/tests/indexes/test_timedelta.py b/pandas/tests/indexes/test_timedelta.py deleted file mode 100644 index e6071b8c4fa06..0000000000000 --- a/pandas/tests/indexes/test_timedelta.py +++ /dev/null @@ -1,42 +0,0 @@ -import numpy as np -from datetime import timedelta - -import pandas as pd -import pandas.util.testing as tm -from pandas import (timedelta_range, date_range, Series, Timedelta, - DatetimeIndex) - - -class TestSlicing(tm.TestCase): - - def test_timedelta(self): - # this is valid too - index = date_range('1/1/2000', periods=50, freq='B') - shifted = index + timedelta(1) - back = shifted + timedelta(-1) - self.assertTrue(tm.equalContents(index, back)) - self.assertEqual(shifted.freq, index.freq) - self.assertEqual(shifted.freq, back.freq) - - result = index - timedelta(1) - expected = index + timedelta(-1) - tm.assert_index_equal(result, expected) - - # GH4134, buggy with timedeltas - rng = date_range('2013', '2014') - s = Series(rng) - result1 = rng - pd.offsets.Hour(1) - result2 = DatetimeIndex(s - np.timedelta64(100000000)) - result3 = rng - np.timedelta64(100000000) - result4 = DatetimeIndex(s - pd.offsets.Hour(1)) - tm.assert_index_equal(result1, result4) - tm.assert_index_equal(result2, result3) - - -class TestTimeSeries(tm.TestCase): - - def test_series_box_timedelta(self): - rng = timedelta_range('1 day 1 s', periods=5, freq='h') - s = Series(rng) - tm.assertIsInstance(s[1], Timedelta) - tm.assertIsInstance(s.iat[2], Timedelta) diff --git a/pandas/tests/indexes/timedeltas/__init__.py b/pandas/tests/indexes/timedeltas/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/tests/indexes/timedeltas/test_astype.py b/pandas/tests/indexes/timedeltas/test_astype.py new file mode 100644 index 0000000000000..88e7b1387feff --- /dev/null +++ b/pandas/tests/indexes/timedeltas/test_astype.py @@ -0,0 +1,121 @@ +import numpy as np + +import pandas as pd +import pandas.util.testing as tm +from pandas import (TimedeltaIndex, timedelta_range, Int64Index, Float64Index, + Index, Timedelta, Series) + +from ..datetimelike import DatetimeLike + + +class TestTimedeltaIndex(DatetimeLike, tm.TestCase): + _holder = TimedeltaIndex + _multiprocess_can_split_ = True + + def setUp(self): + self.indices = dict(index=tm.makeTimedeltaIndex(10)) + self.setup_indices() + + def create_index(self): + return pd.to_timedelta(range(5), unit='d') + pd.offsets.Hour(1) + + def test_astype(self): + # GH 13149, GH 13209 + idx = TimedeltaIndex([1e14, 'NaT', pd.NaT, np.NaN]) + + result = idx.astype(object) + expected = Index([Timedelta('1 days 03:46:40')] + [pd.NaT] * 3, + dtype=object) + tm.assert_index_equal(result, expected) + + result = idx.astype(int) + expected = Int64Index([100000000000000] + [-9223372036854775808] * 3, + dtype=np.int64) + tm.assert_index_equal(result, expected) + + rng = timedelta_range('1 days', periods=10) + + result = rng.astype('i8') + self.assert_index_equal(result, Index(rng.asi8)) + self.assert_numpy_array_equal(rng.asi8, result.values) + + def test_astype_timedelta64(self): + # GH 13149, GH 13209 + idx = TimedeltaIndex([1e14, 'NaT', pd.NaT, np.NaN]) + + result = idx.astype('timedelta64') + expected = Float64Index([1e+14] + [np.NaN] * 3, dtype='float64') + tm.assert_index_equal(result, expected) + + result = idx.astype('timedelta64[ns]') + tm.assert_index_equal(result, idx) + self.assertFalse(result is idx) + + result = idx.astype('timedelta64[ns]', copy=False) + tm.assert_index_equal(result, idx) + self.assertTrue(result is idx) + + def test_astype_raises(self): + # GH 13149, GH 13209 + idx = TimedeltaIndex([1e14, 'NaT', pd.NaT, np.NaN]) + + self.assertRaises(ValueError, idx.astype, float) + self.assertRaises(ValueError, idx.astype, str) + self.assertRaises(ValueError, idx.astype, 'datetime64') + self.assertRaises(ValueError, idx.astype, 'datetime64[ns]') + + def test_pickle_compat_construction(self): + pass + + def test_shift(self): + # test shift for TimedeltaIndex + # err8083 + + drange = self.create_index() + result = drange.shift(1) + expected = TimedeltaIndex(['1 days 01:00:00', '2 days 01:00:00', + '3 days 01:00:00', + '4 days 01:00:00', '5 days 01:00:00'], + freq='D') + self.assert_index_equal(result, expected) + + result = drange.shift(3, freq='2D 1s') + expected = TimedeltaIndex(['6 days 01:00:03', '7 days 01:00:03', + '8 days 01:00:03', '9 days 01:00:03', + '10 days 01:00:03'], freq='D') + self.assert_index_equal(result, expected) + + def test_numeric_compat(self): + + idx = self._holder(np.arange(5, dtype='int64')) + didx = self._holder(np.arange(5, dtype='int64') ** 2) + result = idx * 1 + tm.assert_index_equal(result, idx) + + result = 1 * idx + tm.assert_index_equal(result, idx) + + result = idx / 1 + tm.assert_index_equal(result, idx) + + result = idx // 1 + tm.assert_index_equal(result, idx) + + result = idx * np.array(5, dtype='int64') + tm.assert_index_equal(result, + self._holder(np.arange(5, dtype='int64') * 5)) + + result = idx * np.arange(5, dtype='int64') + tm.assert_index_equal(result, didx) + + result = idx * Series(np.arange(5, dtype='int64')) + tm.assert_index_equal(result, didx) + + result = idx * Series(np.arange(5, dtype='float64') + 0.1) + tm.assert_index_equal(result, self._holder(np.arange( + 5, dtype='float64') * (np.arange(5, dtype='float64') + 0.1))) + + # invalid + self.assertRaises(TypeError, lambda: idx * idx) + self.assertRaises(ValueError, lambda: idx * self._holder(np.arange(3))) + self.assertRaises(ValueError, lambda: idx * np.array([1, 2])) diff --git a/pandas/tests/indexes/timedeltas/test_construction.py b/pandas/tests/indexes/timedeltas/test_construction.py new file mode 100644 index 0000000000000..0810b13eb0f53 --- /dev/null +++ b/pandas/tests/indexes/timedeltas/test_construction.py @@ -0,0 +1,88 @@ +import numpy as np +from datetime import timedelta + +import pandas as pd +import pandas.util.testing as tm +from pandas import TimedeltaIndex, timedelta_range, tslib, to_timedelta + +iNaT = tslib.iNaT + + +class TestTimedeltaIndex(tm.TestCase): + _multiprocess_can_split_ = True + + def test_construction_base_constructor(self): + arr = [pd.Timedelta('1 days'), pd.NaT, pd.Timedelta('3 days')] + tm.assert_index_equal(pd.Index(arr), pd.TimedeltaIndex(arr)) + tm.assert_index_equal(pd.Index(np.array(arr)), + pd.TimedeltaIndex(np.array(arr))) + + arr = [np.nan, pd.NaT, pd.Timedelta('1 days')] + tm.assert_index_equal(pd.Index(arr), pd.TimedeltaIndex(arr)) + tm.assert_index_equal(pd.Index(np.array(arr)), + pd.TimedeltaIndex(np.array(arr))) + + def test_constructor(self): + expected = TimedeltaIndex(['1 days', '1 days 00:00:05', '2 days', + '2 days 00:00:02', '0 days 00:00:03']) + result = TimedeltaIndex(['1 days', '1 days, 00:00:05', np.timedelta64( + 2, 'D'), timedelta(days=2, seconds=2), pd.offsets.Second(3)]) + tm.assert_index_equal(result, expected) + + # unicode + result = TimedeltaIndex([u'1 days', '1 days, 00:00:05', np.timedelta64( + 2, 'D'), timedelta(days=2, seconds=2), pd.offsets.Second(3)]) + + expected = TimedeltaIndex(['0 days 00:00:00', '0 days 00:00:01', + '0 days 00:00:02']) + tm.assert_index_equal(TimedeltaIndex(range(3), unit='s'), expected) + expected = TimedeltaIndex(['0 days 00:00:00', '0 days 00:00:05', + '0 days 00:00:09']) + tm.assert_index_equal(TimedeltaIndex([0, 5, 9], unit='s'), expected) + expected = TimedeltaIndex( + ['0 days 00:00:00.400', '0 days 00:00:00.450', + '0 days 00:00:01.200']) + tm.assert_index_equal(TimedeltaIndex([400, 450, 1200], unit='ms'), + expected) + + def test_constructor_coverage(self): + rng = timedelta_range('1 days', periods=10.5) + exp = timedelta_range('1 days', periods=10) + self.assert_index_equal(rng, exp) + + self.assertRaises(ValueError, TimedeltaIndex, start='1 days', + periods='foo', freq='D') + + self.assertRaises(ValueError, TimedeltaIndex, start='1 days', + end='10 days') + + self.assertRaises(ValueError, TimedeltaIndex, '1 days') + + # generator expression + gen = (timedelta(i) for i in range(10)) + result = TimedeltaIndex(gen) + expected = TimedeltaIndex([timedelta(i) for i in range(10)]) + self.assert_index_equal(result, expected) + + # NumPy string array + strings = np.array(['1 days', '2 days', '3 days']) + result = TimedeltaIndex(strings) + expected = to_timedelta([1, 2, 3], unit='d') + self.assert_index_equal(result, expected) + + from_ints = TimedeltaIndex(expected.asi8) + self.assert_index_equal(from_ints, expected) + + # non-conforming freq + self.assertRaises(ValueError, TimedeltaIndex, + ['1 days', '2 days', '4 days'], freq='D') + + self.assertRaises(ValueError, TimedeltaIndex, periods=10, freq='D') + + def test_constructor_name(self): + idx = TimedeltaIndex(start='1 days', periods=1, freq='D', name='TEST') + self.assertEqual(idx.name, 'TEST') + + # GH10025 + idx2 = TimedeltaIndex(idx, name='something else') + self.assertEqual(idx2.name, 'something else') diff --git a/pandas/tests/indexes/timedeltas/test_indexing.py b/pandas/tests/indexes/timedeltas/test_indexing.py new file mode 100644 index 0000000000000..b4a8bc79921bf --- /dev/null +++ b/pandas/tests/indexes/timedeltas/test_indexing.py @@ -0,0 +1,110 @@ +from datetime import timedelta + +import pandas.util.testing as tm +from pandas import TimedeltaIndex, timedelta_range, compat, Index, Timedelta + + +class TestTimedeltaIndex(tm.TestCase): + _multiprocess_can_split_ = True + + def test_insert(self): + + idx = TimedeltaIndex(['4day', '1day', '2day'], name='idx') + + result = idx.insert(2, timedelta(days=5)) + exp = TimedeltaIndex(['4day', '1day', '5day', '2day'], name='idx') + self.assert_index_equal(result, exp) + + # insertion of non-datetime should coerce to object index + result = idx.insert(1, 'inserted') + expected = Index([Timedelta('4day'), 'inserted', Timedelta('1day'), + Timedelta('2day')], name='idx') + self.assertNotIsInstance(result, TimedeltaIndex) + tm.assert_index_equal(result, expected) + self.assertEqual(result.name, expected.name) + + idx = timedelta_range('1day 00:00:01', periods=3, freq='s', name='idx') + + # preserve freq + expected_0 = TimedeltaIndex(['1day', '1day 00:00:01', '1day 00:00:02', + '1day 00:00:03'], + name='idx', freq='s') + expected_3 = TimedeltaIndex(['1day 00:00:01', '1day 00:00:02', + '1day 00:00:03', '1day 00:00:04'], + name='idx', freq='s') + + # reset freq to None + expected_1_nofreq = TimedeltaIndex(['1day 00:00:01', '1day 00:00:01', + '1day 00:00:02', '1day 00:00:03'], + name='idx', freq=None) + expected_3_nofreq = TimedeltaIndex(['1day 00:00:01', '1day 00:00:02', + '1day 00:00:03', '1day 00:00:05'], + name='idx', freq=None) + + cases = [(0, Timedelta('1day'), expected_0), + (-3, Timedelta('1day'), expected_0), + (3, Timedelta('1day 00:00:04'), expected_3), + (1, Timedelta('1day 00:00:01'), expected_1_nofreq), + (3, Timedelta('1day 00:00:05'), expected_3_nofreq)] + + for n, d, expected in cases: + result = idx.insert(n, d) + self.assert_index_equal(result, expected) + self.assertEqual(result.name, expected.name) + self.assertEqual(result.freq, expected.freq) + + def test_delete(self): + idx = timedelta_range(start='1 Days', periods=5, freq='D', name='idx') + + # prserve freq + expected_0 = timedelta_range(start='2 Days', periods=4, freq='D', + name='idx') + expected_4 = timedelta_range(start='1 Days', periods=4, freq='D', + name='idx') + + # reset freq to None + expected_1 = TimedeltaIndex( + ['1 day', '3 day', '4 day', '5 day'], freq=None, name='idx') + + cases = {0: expected_0, + -5: expected_0, + -1: expected_4, + 4: expected_4, + 1: expected_1} + for n, expected in compat.iteritems(cases): + result = idx.delete(n) + self.assert_index_equal(result, expected) + self.assertEqual(result.name, expected.name) + self.assertEqual(result.freq, expected.freq) + + with tm.assertRaises((IndexError, ValueError)): + # either depeidnig on numpy version + result = idx.delete(5) + + def test_delete_slice(self): + idx = timedelta_range(start='1 days', periods=10, freq='D', name='idx') + + # prserve freq + expected_0_2 = timedelta_range(start='4 days', periods=7, freq='D', + name='idx') + expected_7_9 = timedelta_range(start='1 days', periods=7, freq='D', + name='idx') + + # reset freq to None + expected_3_5 = TimedeltaIndex(['1 d', '2 d', '3 d', + '7 d', '8 d', '9 d', '10d'], + freq=None, name='idx') + + cases = {(0, 1, 2): expected_0_2, + (7, 8, 9): expected_7_9, + (3, 4, 5): expected_3_5} + for n, expected in compat.iteritems(cases): + result = idx.delete(n) + self.assert_index_equal(result, expected) + self.assertEqual(result.name, expected.name) + self.assertEqual(result.freq, expected.freq) + + result = idx.delete(slice(n[0], n[-1] + 1)) + self.assert_index_equal(result, expected) + self.assertEqual(result.name, expected.name) + self.assertEqual(result.freq, expected.freq) diff --git a/pandas/tests/indexes/timedeltas/test_ops.py b/pandas/tests/indexes/timedeltas/test_ops.py new file mode 100644 index 0000000000000..406a5bdbf3bcd --- /dev/null +++ b/pandas/tests/indexes/timedeltas/test_ops.py @@ -0,0 +1,1276 @@ +import numpy as np +from datetime import timedelta +from distutils.version import LooseVersion + +import pandas as pd +import pandas.util.testing as tm +from pandas import to_timedelta +from pandas.util.testing import assert_series_equal, assert_frame_equal +from pandas import (Series, Timedelta, DataFrame, Timestamp, TimedeltaIndex, + timedelta_range, date_range, DatetimeIndex, Int64Index, + _np_version_under1p10, Float64Index, Index, tslib) + +from pandas.tests.test_base import Ops + + +class TestTimedeltaIndexOps(Ops): + def setUp(self): + super(TestTimedeltaIndexOps, self).setUp() + mask = lambda x: isinstance(x, TimedeltaIndex) + self.is_valid_objs = [o for o in self.objs if mask(o)] + self.not_valid_objs = [] + + def test_ops_properties(self): + self.check_ops_properties(['days', 'hours', 'minutes', 'seconds', + 'milliseconds']) + self.check_ops_properties(['microseconds', 'nanoseconds']) + + def test_asobject_tolist(self): + idx = timedelta_range(start='1 days', periods=4, freq='D', name='idx') + expected_list = [Timedelta('1 days'), Timedelta('2 days'), + Timedelta('3 days'), Timedelta('4 days')] + expected = pd.Index(expected_list, dtype=object, name='idx') + result = idx.asobject + self.assertTrue(isinstance(result, Index)) + + self.assertEqual(result.dtype, object) + self.assert_index_equal(result, expected) + self.assertEqual(result.name, expected.name) + self.assertEqual(idx.tolist(), expected_list) + + idx = TimedeltaIndex([timedelta(days=1), timedelta(days=2), pd.NaT, + timedelta(days=4)], name='idx') + expected_list = [Timedelta('1 days'), Timedelta('2 days'), pd.NaT, + Timedelta('4 days')] + expected = pd.Index(expected_list, dtype=object, name='idx') + result = idx.asobject + self.assertTrue(isinstance(result, Index)) + self.assertEqual(result.dtype, object) + self.assert_index_equal(result, expected) + self.assertEqual(result.name, expected.name) + self.assertEqual(idx.tolist(), expected_list) + + def test_minmax(self): + + # monotonic + idx1 = TimedeltaIndex(['1 days', '2 days', '3 days']) + self.assertTrue(idx1.is_monotonic) + + # non-monotonic + idx2 = TimedeltaIndex(['1 days', np.nan, '3 days', 'NaT']) + self.assertFalse(idx2.is_monotonic) + + for idx in [idx1, idx2]: + self.assertEqual(idx.min(), Timedelta('1 days')), + self.assertEqual(idx.max(), Timedelta('3 days')), + self.assertEqual(idx.argmin(), 0) + self.assertEqual(idx.argmax(), 2) + + for op in ['min', 'max']: + # Return NaT + obj = TimedeltaIndex([]) + self.assertTrue(pd.isnull(getattr(obj, op)())) + + obj = TimedeltaIndex([pd.NaT]) + self.assertTrue(pd.isnull(getattr(obj, op)())) + + obj = TimedeltaIndex([pd.NaT, pd.NaT, pd.NaT]) + self.assertTrue(pd.isnull(getattr(obj, op)())) + + def test_numpy_minmax(self): + dr = pd.date_range(start='2016-01-15', end='2016-01-20') + td = TimedeltaIndex(np.asarray(dr)) + + self.assertEqual(np.min(td), Timedelta('16815 days')) + self.assertEqual(np.max(td), Timedelta('16820 days')) + + errmsg = "the 'out' parameter is not supported" + tm.assertRaisesRegexp(ValueError, errmsg, np.min, td, out=0) + tm.assertRaisesRegexp(ValueError, errmsg, np.max, td, out=0) + + self.assertEqual(np.argmin(td), 0) + self.assertEqual(np.argmax(td), 5) + + if not _np_version_under1p10: + errmsg = "the 'out' parameter is not supported" + tm.assertRaisesRegexp(ValueError, errmsg, np.argmin, td, out=0) + tm.assertRaisesRegexp(ValueError, errmsg, np.argmax, td, out=0) + + def test_round(self): + td = pd.timedelta_range(start='16801 days', periods=5, freq='30Min') + elt = td[1] + + expected_rng = TimedeltaIndex([ + Timedelta('16801 days 00:00:00'), + Timedelta('16801 days 00:00:00'), + Timedelta('16801 days 01:00:00'), + Timedelta('16801 days 02:00:00'), + Timedelta('16801 days 02:00:00'), + ]) + expected_elt = expected_rng[1] + + tm.assert_index_equal(td.round(freq='H'), expected_rng) + self.assertEqual(elt.round(freq='H'), expected_elt) + + msg = pd.tseries.frequencies._INVALID_FREQ_ERROR + with self.assertRaisesRegexp(ValueError, msg): + td.round(freq='foo') + with tm.assertRaisesRegexp(ValueError, msg): + elt.round(freq='foo') + + msg = " is a non-fixed frequency" + tm.assertRaisesRegexp(ValueError, msg, td.round, freq='M') + tm.assertRaisesRegexp(ValueError, msg, elt.round, freq='M') + + def test_representation(self): + idx1 = TimedeltaIndex([], freq='D') + idx2 = TimedeltaIndex(['1 days'], freq='D') + idx3 = TimedeltaIndex(['1 days', '2 days'], freq='D') + idx4 = TimedeltaIndex(['1 days', '2 days', '3 days'], freq='D') + idx5 = TimedeltaIndex(['1 days 00:00:01', '2 days', '3 days']) + + exp1 = """TimedeltaIndex([], dtype='timedelta64[ns]', freq='D')""" + + exp2 = ("TimedeltaIndex(['1 days'], dtype='timedelta64[ns]', " + "freq='D')") + + exp3 = ("TimedeltaIndex(['1 days', '2 days'], " + "dtype='timedelta64[ns]', freq='D')") + + exp4 = ("TimedeltaIndex(['1 days', '2 days', '3 days'], " + "dtype='timedelta64[ns]', freq='D')") + + exp5 = ("TimedeltaIndex(['1 days 00:00:01', '2 days 00:00:00', " + "'3 days 00:00:00'], dtype='timedelta64[ns]', freq=None)") + + with pd.option_context('display.width', 300): + for idx, expected in zip([idx1, idx2, idx3, idx4, idx5], + [exp1, exp2, exp3, exp4, exp5]): + for func in ['__repr__', '__unicode__', '__str__']: + result = getattr(idx, func)() + self.assertEqual(result, expected) + + def test_representation_to_series(self): + idx1 = TimedeltaIndex([], freq='D') + idx2 = TimedeltaIndex(['1 days'], freq='D') + idx3 = TimedeltaIndex(['1 days', '2 days'], freq='D') + idx4 = TimedeltaIndex(['1 days', '2 days', '3 days'], freq='D') + idx5 = TimedeltaIndex(['1 days 00:00:01', '2 days', '3 days']) + + exp1 = """Series([], dtype: timedelta64[ns])""" + + exp2 = """0 1 days +dtype: timedelta64[ns]""" + + exp3 = """0 1 days +1 2 days +dtype: timedelta64[ns]""" + + exp4 = """0 1 days +1 2 days +2 3 days +dtype: timedelta64[ns]""" + + exp5 = """0 1 days 00:00:01 +1 2 days 00:00:00 +2 3 days 00:00:00 +dtype: timedelta64[ns]""" + + with pd.option_context('display.width', 300): + for idx, expected in zip([idx1, idx2, idx3, idx4, idx5], + [exp1, exp2, exp3, exp4, exp5]): + result = repr(pd.Series(idx)) + self.assertEqual(result, expected) + + def test_summary(self): + # GH9116 + idx1 = TimedeltaIndex([], freq='D') + idx2 = TimedeltaIndex(['1 days'], freq='D') + idx3 = TimedeltaIndex(['1 days', '2 days'], freq='D') + idx4 = TimedeltaIndex(['1 days', '2 days', '3 days'], freq='D') + idx5 = TimedeltaIndex(['1 days 00:00:01', '2 days', '3 days']) + + exp1 = """TimedeltaIndex: 0 entries +Freq: D""" + + exp2 = """TimedeltaIndex: 1 entries, 1 days to 1 days +Freq: D""" + + exp3 = """TimedeltaIndex: 2 entries, 1 days to 2 days +Freq: D""" + + exp4 = """TimedeltaIndex: 3 entries, 1 days to 3 days +Freq: D""" + + exp5 = ("TimedeltaIndex: 3 entries, 1 days 00:00:01 to 3 days " + "00:00:00") + + for idx, expected in zip([idx1, idx2, idx3, idx4, idx5], + [exp1, exp2, exp3, exp4, exp5]): + result = idx.summary() + self.assertEqual(result, expected) + + def test_add_iadd(self): + + # only test adding/sub offsets as + is now numeric + + # offset + offsets = [pd.offsets.Hour(2), timedelta(hours=2), + np.timedelta64(2, 'h'), Timedelta(hours=2)] + + for delta in offsets: + rng = timedelta_range('1 days', '10 days') + result = rng + delta + expected = timedelta_range('1 days 02:00:00', '10 days 02:00:00', + freq='D') + tm.assert_index_equal(result, expected) + rng += delta + tm.assert_index_equal(rng, expected) + + # int + rng = timedelta_range('1 days 09:00:00', freq='H', periods=10) + result = rng + 1 + expected = timedelta_range('1 days 10:00:00', freq='H', periods=10) + tm.assert_index_equal(result, expected) + rng += 1 + tm.assert_index_equal(rng, expected) + + def test_sub_isub(self): + # only test adding/sub offsets as - is now numeric + + # offset + offsets = [pd.offsets.Hour(2), timedelta(hours=2), + np.timedelta64(2, 'h'), Timedelta(hours=2)] + + for delta in offsets: + rng = timedelta_range('1 days', '10 days') + result = rng - delta + expected = timedelta_range('0 days 22:00:00', '9 days 22:00:00') + tm.assert_index_equal(result, expected) + rng -= delta + tm.assert_index_equal(rng, expected) + + # int + rng = timedelta_range('1 days 09:00:00', freq='H', periods=10) + result = rng - 1 + expected = timedelta_range('1 days 08:00:00', freq='H', periods=10) + tm.assert_index_equal(result, expected) + rng -= 1 + tm.assert_index_equal(rng, expected) + + idx = TimedeltaIndex(['1 day', '2 day']) + msg = "cannot subtract a datelike from a TimedeltaIndex" + with tm.assertRaisesRegexp(TypeError, msg): + idx - Timestamp('2011-01-01') + + result = Timestamp('2011-01-01') + idx + expected = DatetimeIndex(['2011-01-02', '2011-01-03']) + tm.assert_index_equal(result, expected) + + def test_ops_compat(self): + + offsets = [pd.offsets.Hour(2), timedelta(hours=2), + np.timedelta64(2, 'h'), Timedelta(hours=2)] + + rng = timedelta_range('1 days', '10 days', name='foo') + + # multiply + for offset in offsets: + self.assertRaises(TypeError, lambda: rng * offset) + + # divide + expected = Int64Index((np.arange(10) + 1) * 12, name='foo') + for offset in offsets: + result = rng / offset + tm.assert_index_equal(result, expected, exact=False) + + # divide with nats + rng = TimedeltaIndex(['1 days', pd.NaT, '2 days'], name='foo') + expected = Float64Index([12, np.nan, 24], name='foo') + for offset in offsets: + result = rng / offset + tm.assert_index_equal(result, expected) + + # don't allow division by NaT (make could in the future) + self.assertRaises(TypeError, lambda: rng / pd.NaT) + + def test_subtraction_ops(self): + + # with datetimes/timedelta and tdi/dti + tdi = TimedeltaIndex(['1 days', pd.NaT, '2 days'], name='foo') + dti = date_range('20130101', periods=3, name='bar') + td = Timedelta('1 days') + dt = Timestamp('20130101') + + self.assertRaises(TypeError, lambda: tdi - dt) + self.assertRaises(TypeError, lambda: tdi - dti) + self.assertRaises(TypeError, lambda: td - dt) + self.assertRaises(TypeError, lambda: td - dti) + + result = dt - dti + expected = TimedeltaIndex(['0 days', '-1 days', '-2 days'], name='bar') + tm.assert_index_equal(result, expected) + + result = dti - dt + expected = TimedeltaIndex(['0 days', '1 days', '2 days'], name='bar') + tm.assert_index_equal(result, expected) + + result = tdi - td + expected = TimedeltaIndex(['0 days', pd.NaT, '1 days'], name='foo') + tm.assert_index_equal(result, expected, check_names=False) + + result = td - tdi + expected = TimedeltaIndex(['0 days', pd.NaT, '-1 days'], name='foo') + tm.assert_index_equal(result, expected, check_names=False) + + result = dti - td + expected = DatetimeIndex( + ['20121231', '20130101', '20130102'], name='bar') + tm.assert_index_equal(result, expected, check_names=False) + + result = dt - tdi + expected = DatetimeIndex(['20121231', pd.NaT, '20121230'], name='foo') + tm.assert_index_equal(result, expected) + + def test_subtraction_ops_with_tz(self): + + # check that dt/dti subtraction ops with tz are validated + dti = date_range('20130101', periods=3) + ts = Timestamp('20130101') + dt = ts.to_pydatetime() + dti_tz = date_range('20130101', periods=3).tz_localize('US/Eastern') + ts_tz = Timestamp('20130101').tz_localize('US/Eastern') + ts_tz2 = Timestamp('20130101').tz_localize('CET') + dt_tz = ts_tz.to_pydatetime() + td = Timedelta('1 days') + + def _check(result, expected): + self.assertEqual(result, expected) + self.assertIsInstance(result, Timedelta) + + # scalars + result = ts - ts + expected = Timedelta('0 days') + _check(result, expected) + + result = dt_tz - ts_tz + expected = Timedelta('0 days') + _check(result, expected) + + result = ts_tz - dt_tz + expected = Timedelta('0 days') + _check(result, expected) + + # tz mismatches + self.assertRaises(TypeError, lambda: dt_tz - ts) + self.assertRaises(TypeError, lambda: dt_tz - dt) + self.assertRaises(TypeError, lambda: dt_tz - ts_tz2) + self.assertRaises(TypeError, lambda: dt - dt_tz) + self.assertRaises(TypeError, lambda: ts - dt_tz) + self.assertRaises(TypeError, lambda: ts_tz2 - ts) + self.assertRaises(TypeError, lambda: ts_tz2 - dt) + self.assertRaises(TypeError, lambda: ts_tz - ts_tz2) + + # with dti + self.assertRaises(TypeError, lambda: dti - ts_tz) + self.assertRaises(TypeError, lambda: dti_tz - ts) + self.assertRaises(TypeError, lambda: dti_tz - ts_tz2) + + result = dti_tz - dt_tz + expected = TimedeltaIndex(['0 days', '1 days', '2 days']) + tm.assert_index_equal(result, expected) + + result = dt_tz - dti_tz + expected = TimedeltaIndex(['0 days', '-1 days', '-2 days']) + tm.assert_index_equal(result, expected) + + result = dti_tz - ts_tz + expected = TimedeltaIndex(['0 days', '1 days', '2 days']) + tm.assert_index_equal(result, expected) + + result = ts_tz - dti_tz + expected = TimedeltaIndex(['0 days', '-1 days', '-2 days']) + tm.assert_index_equal(result, expected) + + result = td - td + expected = Timedelta('0 days') + _check(result, expected) + + result = dti_tz - td + expected = DatetimeIndex( + ['20121231', '20130101', '20130102'], tz='US/Eastern') + tm.assert_index_equal(result, expected) + + def test_dti_tdi_numeric_ops(self): + + # These are normally union/diff set-like ops + tdi = TimedeltaIndex(['1 days', pd.NaT, '2 days'], name='foo') + dti = date_range('20130101', periods=3, name='bar') + + # TODO(wesm): unused? + # td = Timedelta('1 days') + # dt = Timestamp('20130101') + + result = tdi - tdi + expected = TimedeltaIndex(['0 days', pd.NaT, '0 days'], name='foo') + tm.assert_index_equal(result, expected) + + result = tdi + tdi + expected = TimedeltaIndex(['2 days', pd.NaT, '4 days'], name='foo') + tm.assert_index_equal(result, expected) + + result = dti - tdi # name will be reset + expected = DatetimeIndex(['20121231', pd.NaT, '20130101']) + tm.assert_index_equal(result, expected) + + def test_sub_period(self): + # GH 13078 + # not supported, check TypeError + p = pd.Period('2011-01-01', freq='D') + + for freq in [None, 'H']: + idx = pd.TimedeltaIndex(['1 hours', '2 hours'], freq=freq) + + with tm.assertRaises(TypeError): + idx - p + + with tm.assertRaises(TypeError): + p - idx + + def test_addition_ops(self): + + # with datetimes/timedelta and tdi/dti + tdi = TimedeltaIndex(['1 days', pd.NaT, '2 days'], name='foo') + dti = date_range('20130101', periods=3, name='bar') + td = Timedelta('1 days') + dt = Timestamp('20130101') + + result = tdi + dt + expected = DatetimeIndex(['20130102', pd.NaT, '20130103'], name='foo') + tm.assert_index_equal(result, expected) + + result = dt + tdi + expected = DatetimeIndex(['20130102', pd.NaT, '20130103'], name='foo') + tm.assert_index_equal(result, expected) + + result = td + tdi + expected = TimedeltaIndex(['2 days', pd.NaT, '3 days'], name='foo') + tm.assert_index_equal(result, expected) + + result = tdi + td + expected = TimedeltaIndex(['2 days', pd.NaT, '3 days'], name='foo') + tm.assert_index_equal(result, expected) + + # unequal length + self.assertRaises(ValueError, lambda: tdi + dti[0:1]) + self.assertRaises(ValueError, lambda: tdi[0:1] + dti) + + # random indexes + self.assertRaises(TypeError, lambda: tdi + Int64Index([1, 2, 3])) + + # this is a union! + # self.assertRaises(TypeError, lambda : Int64Index([1,2,3]) + tdi) + + result = tdi + dti # name will be reset + expected = DatetimeIndex(['20130102', pd.NaT, '20130105']) + tm.assert_index_equal(result, expected) + + result = dti + tdi # name will be reset + expected = DatetimeIndex(['20130102', pd.NaT, '20130105']) + tm.assert_index_equal(result, expected) + + result = dt + td + expected = Timestamp('20130102') + self.assertEqual(result, expected) + + result = td + dt + expected = Timestamp('20130102') + self.assertEqual(result, expected) + + def test_comp_nat(self): + left = pd.TimedeltaIndex([pd.Timedelta('1 days'), pd.NaT, + pd.Timedelta('3 days')]) + right = pd.TimedeltaIndex([pd.NaT, pd.NaT, pd.Timedelta('3 days')]) + + for l, r in [(left, right), (left.asobject, right.asobject)]: + result = l == r + expected = np.array([False, False, True]) + tm.assert_numpy_array_equal(result, expected) + + result = l != r + expected = np.array([True, True, False]) + tm.assert_numpy_array_equal(result, expected) + + expected = np.array([False, False, False]) + tm.assert_numpy_array_equal(l == pd.NaT, expected) + tm.assert_numpy_array_equal(pd.NaT == r, expected) + + expected = np.array([True, True, True]) + tm.assert_numpy_array_equal(l != pd.NaT, expected) + tm.assert_numpy_array_equal(pd.NaT != l, expected) + + expected = np.array([False, False, False]) + tm.assert_numpy_array_equal(l < pd.NaT, expected) + tm.assert_numpy_array_equal(pd.NaT > l, expected) + + def test_value_counts_unique(self): + # GH 7735 + + idx = timedelta_range('1 days 09:00:00', freq='H', periods=10) + # create repeated values, 'n'th element is repeated by n+1 times + idx = TimedeltaIndex(np.repeat(idx.values, range(1, len(idx) + 1))) + + exp_idx = timedelta_range('1 days 18:00:00', freq='-1H', periods=10) + expected = Series(range(10, 0, -1), index=exp_idx, dtype='int64') + + for obj in [idx, Series(idx)]: + tm.assert_series_equal(obj.value_counts(), expected) + + expected = timedelta_range('1 days 09:00:00', freq='H', periods=10) + tm.assert_index_equal(idx.unique(), expected) + + idx = TimedeltaIndex(['1 days 09:00:00', '1 days 09:00:00', + '1 days 09:00:00', '1 days 08:00:00', + '1 days 08:00:00', pd.NaT]) + + exp_idx = TimedeltaIndex(['1 days 09:00:00', '1 days 08:00:00']) + expected = Series([3, 2], index=exp_idx) + + for obj in [idx, Series(idx)]: + tm.assert_series_equal(obj.value_counts(), expected) + + exp_idx = TimedeltaIndex(['1 days 09:00:00', '1 days 08:00:00', + pd.NaT]) + expected = Series([3, 2, 1], index=exp_idx) + + for obj in [idx, Series(idx)]: + tm.assert_series_equal(obj.value_counts(dropna=False), expected) + + tm.assert_index_equal(idx.unique(), exp_idx) + + def test_nonunique_contains(self): + # GH 9512 + for idx in map(TimedeltaIndex, ([0, 1, 0], [0, 0, -1], [0, -1, -1], + ['00:01:00', '00:01:00', '00:02:00'], + ['00:01:00', '00:01:00', '00:00:01'])): + tm.assertIn(idx[0], idx) + + def test_unknown_attribute(self): + # GH 9680 + tdi = pd.timedelta_range(start=0, periods=10, freq='1s') + ts = pd.Series(np.random.normal(size=10), index=tdi) + self.assertNotIn('foo', ts.__dict__.keys()) + self.assertRaises(AttributeError, lambda: ts.foo) + + def test_order(self): + # GH 10295 + idx1 = TimedeltaIndex(['1 day', '2 day', '3 day'], freq='D', + name='idx') + idx2 = TimedeltaIndex( + ['1 hour', '2 hour', '3 hour'], freq='H', name='idx') + + for idx in [idx1, idx2]: + ordered = idx.sort_values() + self.assert_index_equal(ordered, idx) + self.assertEqual(ordered.freq, idx.freq) + + ordered = idx.sort_values(ascending=False) + expected = idx[::-1] + self.assert_index_equal(ordered, expected) + self.assertEqual(ordered.freq, expected.freq) + self.assertEqual(ordered.freq.n, -1) + + ordered, indexer = idx.sort_values(return_indexer=True) + self.assert_index_equal(ordered, idx) + self.assert_numpy_array_equal(indexer, + np.array([0, 1, 2]), + check_dtype=False) + self.assertEqual(ordered.freq, idx.freq) + + ordered, indexer = idx.sort_values(return_indexer=True, + ascending=False) + self.assert_index_equal(ordered, idx[::-1]) + self.assertEqual(ordered.freq, expected.freq) + self.assertEqual(ordered.freq.n, -1) + + idx1 = TimedeltaIndex(['1 hour', '3 hour', '5 hour', + '2 hour ', '1 hour'], name='idx1') + exp1 = TimedeltaIndex(['1 hour', '1 hour', '2 hour', + '3 hour', '5 hour'], name='idx1') + + idx2 = TimedeltaIndex(['1 day', '3 day', '5 day', + '2 day', '1 day'], name='idx2') + + # TODO(wesm): unused? + # exp2 = TimedeltaIndex(['1 day', '1 day', '2 day', + # '3 day', '5 day'], name='idx2') + + # idx3 = TimedeltaIndex([pd.NaT, '3 minute', '5 minute', + # '2 minute', pd.NaT], name='idx3') + # exp3 = TimedeltaIndex([pd.NaT, pd.NaT, '2 minute', '3 minute', + # '5 minute'], name='idx3') + + for idx, expected in [(idx1, exp1), (idx1, exp1), (idx1, exp1)]: + ordered = idx.sort_values() + self.assert_index_equal(ordered, expected) + self.assertIsNone(ordered.freq) + + ordered = idx.sort_values(ascending=False) + self.assert_index_equal(ordered, expected[::-1]) + self.assertIsNone(ordered.freq) + + ordered, indexer = idx.sort_values(return_indexer=True) + self.assert_index_equal(ordered, expected) + + exp = np.array([0, 4, 3, 1, 2]) + self.assert_numpy_array_equal(indexer, exp, check_dtype=False) + self.assertIsNone(ordered.freq) + + ordered, indexer = idx.sort_values(return_indexer=True, + ascending=False) + self.assert_index_equal(ordered, expected[::-1]) + + exp = np.array([2, 1, 3, 4, 0]) + self.assert_numpy_array_equal(indexer, exp, check_dtype=False) + self.assertIsNone(ordered.freq) + + def test_getitem(self): + idx1 = pd.timedelta_range('1 day', '31 day', freq='D', name='idx') + + for idx in [idx1]: + result = idx[0] + self.assertEqual(result, pd.Timedelta('1 day')) + + result = idx[0:5] + expected = pd.timedelta_range('1 day', '5 day', freq='D', + name='idx') + self.assert_index_equal(result, expected) + self.assertEqual(result.freq, expected.freq) + + result = idx[0:10:2] + expected = pd.timedelta_range('1 day', '9 day', freq='2D', + name='idx') + self.assert_index_equal(result, expected) + self.assertEqual(result.freq, expected.freq) + + result = idx[-20:-5:3] + expected = pd.timedelta_range('12 day', '24 day', freq='3D', + name='idx') + self.assert_index_equal(result, expected) + self.assertEqual(result.freq, expected.freq) + + result = idx[4::-1] + expected = TimedeltaIndex(['5 day', '4 day', '3 day', + '2 day', '1 day'], + freq='-1D', name='idx') + self.assert_index_equal(result, expected) + self.assertEqual(result.freq, expected.freq) + + def test_drop_duplicates_metadata(self): + # GH 10115 + idx = pd.timedelta_range('1 day', '31 day', freq='D', name='idx') + result = idx.drop_duplicates() + self.assert_index_equal(idx, result) + self.assertEqual(idx.freq, result.freq) + + idx_dup = idx.append(idx) + self.assertIsNone(idx_dup.freq) # freq is reset + result = idx_dup.drop_duplicates() + self.assert_index_equal(idx, result) + self.assertIsNone(result.freq) + + def test_drop_duplicates(self): + # to check Index/Series compat + base = pd.timedelta_range('1 day', '31 day', freq='D', name='idx') + idx = base.append(base[:5]) + + res = idx.drop_duplicates() + tm.assert_index_equal(res, base) + res = Series(idx).drop_duplicates() + tm.assert_series_equal(res, Series(base)) + + res = idx.drop_duplicates(keep='last') + exp = base[5:].append(base[:5]) + tm.assert_index_equal(res, exp) + res = Series(idx).drop_duplicates(keep='last') + tm.assert_series_equal(res, Series(exp, index=np.arange(5, 36))) + + res = idx.drop_duplicates(keep=False) + tm.assert_index_equal(res, base[5:]) + res = Series(idx).drop_duplicates(keep=False) + tm.assert_series_equal(res, Series(base[5:], index=np.arange(5, 31))) + + def test_take(self): + # GH 10295 + idx1 = pd.timedelta_range('1 day', '31 day', freq='D', name='idx') + + for idx in [idx1]: + result = idx.take([0]) + self.assertEqual(result, pd.Timedelta('1 day')) + + result = idx.take([-1]) + self.assertEqual(result, pd.Timedelta('31 day')) + + result = idx.take([0, 1, 2]) + expected = pd.timedelta_range('1 day', '3 day', freq='D', + name='idx') + self.assert_index_equal(result, expected) + self.assertEqual(result.freq, expected.freq) + + result = idx.take([0, 2, 4]) + expected = pd.timedelta_range('1 day', '5 day', freq='2D', + name='idx') + self.assert_index_equal(result, expected) + self.assertEqual(result.freq, expected.freq) + + result = idx.take([7, 4, 1]) + expected = pd.timedelta_range('8 day', '2 day', freq='-3D', + name='idx') + self.assert_index_equal(result, expected) + self.assertEqual(result.freq, expected.freq) + + result = idx.take([3, 2, 5]) + expected = TimedeltaIndex(['4 day', '3 day', '6 day'], name='idx') + self.assert_index_equal(result, expected) + self.assertIsNone(result.freq) + + result = idx.take([-3, 2, 5]) + expected = TimedeltaIndex(['29 day', '3 day', '6 day'], name='idx') + self.assert_index_equal(result, expected) + self.assertIsNone(result.freq) + + def test_take_invalid_kwargs(self): + idx = pd.timedelta_range('1 day', '31 day', freq='D', name='idx') + indices = [1, 6, 5, 9, 10, 13, 15, 3] + + msg = r"take\(\) got an unexpected keyword argument 'foo'" + tm.assertRaisesRegexp(TypeError, msg, idx.take, + indices, foo=2) + + msg = "the 'out' parameter is not supported" + tm.assertRaisesRegexp(ValueError, msg, idx.take, + indices, out=indices) + + msg = "the 'mode' parameter is not supported" + tm.assertRaisesRegexp(ValueError, msg, idx.take, + indices, mode='clip') + + def test_infer_freq(self): + # GH 11018 + for freq in ['D', '3D', '-3D', 'H', '2H', '-2H', 'T', '2T', 'S', '-3S' + ]: + idx = pd.timedelta_range('1', freq=freq, periods=10) + result = pd.TimedeltaIndex(idx.asi8, freq='infer') + tm.assert_index_equal(idx, result) + self.assertEqual(result.freq, freq) + + def test_nat_new(self): + + idx = pd.timedelta_range('1', freq='D', periods=5, name='x') + result = idx._nat_new() + exp = pd.TimedeltaIndex([pd.NaT] * 5, name='x') + tm.assert_index_equal(result, exp) + + result = idx._nat_new(box=False) + exp = np.array([tslib.iNaT] * 5, dtype=np.int64) + tm.assert_numpy_array_equal(result, exp) + + def test_shift(self): + # GH 9903 + idx = pd.TimedeltaIndex([], name='xxx') + tm.assert_index_equal(idx.shift(0, freq='H'), idx) + tm.assert_index_equal(idx.shift(3, freq='H'), idx) + + idx = pd.TimedeltaIndex(['5 hours', '6 hours', '9 hours'], name='xxx') + tm.assert_index_equal(idx.shift(0, freq='H'), idx) + exp = pd.TimedeltaIndex(['8 hours', '9 hours', '12 hours'], name='xxx') + tm.assert_index_equal(idx.shift(3, freq='H'), exp) + exp = pd.TimedeltaIndex(['2 hours', '3 hours', '6 hours'], name='xxx') + tm.assert_index_equal(idx.shift(-3, freq='H'), exp) + + tm.assert_index_equal(idx.shift(0, freq='T'), idx) + exp = pd.TimedeltaIndex(['05:03:00', '06:03:00', '9:03:00'], + name='xxx') + tm.assert_index_equal(idx.shift(3, freq='T'), exp) + exp = pd.TimedeltaIndex(['04:57:00', '05:57:00', '8:57:00'], + name='xxx') + tm.assert_index_equal(idx.shift(-3, freq='T'), exp) + + def test_repeat(self): + index = pd.timedelta_range('1 days', periods=2, freq='D') + exp = pd.TimedeltaIndex(['1 days', '1 days', '2 days', '2 days']) + for res in [index.repeat(2), np.repeat(index, 2)]: + tm.assert_index_equal(res, exp) + self.assertIsNone(res.freq) + + index = TimedeltaIndex(['1 days', 'NaT', '3 days']) + exp = TimedeltaIndex(['1 days', '1 days', '1 days', + 'NaT', 'NaT', 'NaT', + '3 days', '3 days', '3 days']) + for res in [index.repeat(3), np.repeat(index, 3)]: + tm.assert_index_equal(res, exp) + self.assertIsNone(res.freq) + + def test_nat(self): + self.assertIs(pd.TimedeltaIndex._na_value, pd.NaT) + self.assertIs(pd.TimedeltaIndex([])._na_value, pd.NaT) + + idx = pd.TimedeltaIndex(['1 days', '2 days']) + self.assertTrue(idx._can_hold_na) + + tm.assert_numpy_array_equal(idx._isnan, np.array([False, False])) + self.assertFalse(idx.hasnans) + tm.assert_numpy_array_equal(idx._nan_idxs, + np.array([], dtype=np.intp)) + + idx = pd.TimedeltaIndex(['1 days', 'NaT']) + self.assertTrue(idx._can_hold_na) + + tm.assert_numpy_array_equal(idx._isnan, np.array([False, True])) + self.assertTrue(idx.hasnans) + tm.assert_numpy_array_equal(idx._nan_idxs, + np.array([1], dtype=np.intp)) + + def test_equals(self): + # GH 13107 + idx = pd.TimedeltaIndex(['1 days', '2 days', 'NaT']) + self.assertTrue(idx.equals(idx)) + self.assertTrue(idx.equals(idx.copy())) + self.assertTrue(idx.equals(idx.asobject)) + self.assertTrue(idx.asobject.equals(idx)) + self.assertTrue(idx.asobject.equals(idx.asobject)) + self.assertFalse(idx.equals(list(idx))) + self.assertFalse(idx.equals(pd.Series(idx))) + + idx2 = pd.TimedeltaIndex(['2 days', '1 days', 'NaT']) + self.assertFalse(idx.equals(idx2)) + self.assertFalse(idx.equals(idx2.copy())) + self.assertFalse(idx.equals(idx2.asobject)) + self.assertFalse(idx.asobject.equals(idx2)) + self.assertFalse(idx.asobject.equals(idx2.asobject)) + self.assertFalse(idx.equals(list(idx2))) + self.assertFalse(idx.equals(pd.Series(idx2))) + + +class TestTimedeltas(tm.TestCase): + _multiprocess_can_split_ = True + + def test_ops(self): + + td = Timedelta(10, unit='d') + self.assertEqual(-td, Timedelta(-10, unit='d')) + self.assertEqual(+td, Timedelta(10, unit='d')) + self.assertEqual(td - td, Timedelta(0, unit='ns')) + self.assertTrue((td - pd.NaT) is pd.NaT) + self.assertEqual(td + td, Timedelta(20, unit='d')) + self.assertTrue((td + pd.NaT) is pd.NaT) + self.assertEqual(td * 2, Timedelta(20, unit='d')) + self.assertTrue((td * pd.NaT) is pd.NaT) + self.assertEqual(td / 2, Timedelta(5, unit='d')) + self.assertEqual(abs(td), td) + self.assertEqual(abs(-td), td) + self.assertEqual(td / td, 1) + self.assertTrue((td / pd.NaT) is np.nan) + + # invert + self.assertEqual(-td, Timedelta('-10d')) + self.assertEqual(td * -1, Timedelta('-10d')) + self.assertEqual(-1 * td, Timedelta('-10d')) + self.assertEqual(abs(-td), Timedelta('10d')) + + # invalid + self.assertRaises(TypeError, lambda: Timedelta(11, unit='d') // 2) + + # invalid multiply with another timedelta + self.assertRaises(TypeError, lambda: td * td) + + # can't operate with integers + self.assertRaises(TypeError, lambda: td + 2) + self.assertRaises(TypeError, lambda: td - 2) + + def test_ops_offsets(self): + td = Timedelta(10, unit='d') + self.assertEqual(Timedelta(241, unit='h'), td + pd.offsets.Hour(1)) + self.assertEqual(Timedelta(241, unit='h'), pd.offsets.Hour(1) + td) + self.assertEqual(240, td / pd.offsets.Hour(1)) + self.assertEqual(1 / 240.0, pd.offsets.Hour(1) / td) + self.assertEqual(Timedelta(239, unit='h'), td - pd.offsets.Hour(1)) + self.assertEqual(Timedelta(-239, unit='h'), pd.offsets.Hour(1) - td) + + def test_ops_ndarray(self): + td = Timedelta('1 day') + + # timedelta, timedelta + other = pd.to_timedelta(['1 day']).values + expected = pd.to_timedelta(['2 days']).values + self.assert_numpy_array_equal(td + other, expected) + if LooseVersion(np.__version__) >= '1.8': + self.assert_numpy_array_equal(other + td, expected) + self.assertRaises(TypeError, lambda: td + np.array([1])) + self.assertRaises(TypeError, lambda: np.array([1]) + td) + + expected = pd.to_timedelta(['0 days']).values + self.assert_numpy_array_equal(td - other, expected) + if LooseVersion(np.__version__) >= '1.8': + self.assert_numpy_array_equal(-other + td, expected) + self.assertRaises(TypeError, lambda: td - np.array([1])) + self.assertRaises(TypeError, lambda: np.array([1]) - td) + + expected = pd.to_timedelta(['2 days']).values + self.assert_numpy_array_equal(td * np.array([2]), expected) + self.assert_numpy_array_equal(np.array([2]) * td, expected) + self.assertRaises(TypeError, lambda: td * other) + self.assertRaises(TypeError, lambda: other * td) + + self.assert_numpy_array_equal(td / other, + np.array([1], dtype=np.float64)) + if LooseVersion(np.__version__) >= '1.8': + self.assert_numpy_array_equal(other / td, + np.array([1], dtype=np.float64)) + + # timedelta, datetime + other = pd.to_datetime(['2000-01-01']).values + expected = pd.to_datetime(['2000-01-02']).values + self.assert_numpy_array_equal(td + other, expected) + if LooseVersion(np.__version__) >= '1.8': + self.assert_numpy_array_equal(other + td, expected) + + expected = pd.to_datetime(['1999-12-31']).values + self.assert_numpy_array_equal(-td + other, expected) + if LooseVersion(np.__version__) >= '1.8': + self.assert_numpy_array_equal(other - td, expected) + + def test_ops_series(self): + # regression test for GH8813 + td = Timedelta('1 day') + other = pd.Series([1, 2]) + expected = pd.Series(pd.to_timedelta(['1 day', '2 days'])) + tm.assert_series_equal(expected, td * other) + tm.assert_series_equal(expected, other * td) + + def test_ops_series_object(self): + # GH 13043 + s = pd.Series([pd.Timestamp('2015-01-01', tz='US/Eastern'), + pd.Timestamp('2015-01-01', tz='Asia/Tokyo')], + name='xxx') + self.assertEqual(s.dtype, object) + + exp = pd.Series([pd.Timestamp('2015-01-02', tz='US/Eastern'), + pd.Timestamp('2015-01-02', tz='Asia/Tokyo')], + name='xxx') + tm.assert_series_equal(s + pd.Timedelta('1 days'), exp) + tm.assert_series_equal(pd.Timedelta('1 days') + s, exp) + + # object series & object series + s2 = pd.Series([pd.Timestamp('2015-01-03', tz='US/Eastern'), + pd.Timestamp('2015-01-05', tz='Asia/Tokyo')], + name='xxx') + self.assertEqual(s2.dtype, object) + exp = pd.Series([pd.Timedelta('2 days'), pd.Timedelta('4 days')], + name='xxx') + tm.assert_series_equal(s2 - s, exp) + tm.assert_series_equal(s - s2, -exp) + + s = pd.Series([pd.Timedelta('01:00:00'), pd.Timedelta('02:00:00')], + name='xxx', dtype=object) + self.assertEqual(s.dtype, object) + + exp = pd.Series([pd.Timedelta('01:30:00'), pd.Timedelta('02:30:00')], + name='xxx') + tm.assert_series_equal(s + pd.Timedelta('00:30:00'), exp) + tm.assert_series_equal(pd.Timedelta('00:30:00') + s, exp) + + def test_ops_notimplemented(self): + class Other: + pass + + other = Other() + + td = Timedelta('1 day') + self.assertTrue(td.__add__(other) is NotImplemented) + self.assertTrue(td.__sub__(other) is NotImplemented) + self.assertTrue(td.__truediv__(other) is NotImplemented) + self.assertTrue(td.__mul__(other) is NotImplemented) + self.assertTrue(td.__floordiv__(td) is NotImplemented) + + def test_ops_error_str(self): + # GH 13624 + tdi = TimedeltaIndex(['1 day', '2 days']) + + for l, r in [(tdi, 'a'), ('a', tdi)]: + with tm.assertRaises(TypeError): + l + r + + with tm.assertRaises(TypeError): + l > r + + with tm.assertRaises(TypeError): + l == r + + with tm.assertRaises(TypeError): + l != r + + def test_timedelta_ops(self): + # GH4984 + # make sure ops return Timedelta + s = Series([Timestamp('20130101') + timedelta(seconds=i * i) + for i in range(10)]) + td = s.diff() + + result = td.mean() + expected = to_timedelta(timedelta(seconds=9)) + self.assertEqual(result, expected) + + result = td.to_frame().mean() + self.assertEqual(result[0], expected) + + result = td.quantile(.1) + expected = Timedelta(np.timedelta64(2600, 'ms')) + self.assertEqual(result, expected) + + result = td.median() + expected = to_timedelta('00:00:09') + self.assertEqual(result, expected) + + result = td.to_frame().median() + self.assertEqual(result[0], expected) + + # GH 6462 + # consistency in returned values for sum + result = td.sum() + expected = to_timedelta('00:01:21') + self.assertEqual(result, expected) + + result = td.to_frame().sum() + self.assertEqual(result[0], expected) + + # std + result = td.std() + expected = to_timedelta(Series(td.dropna().values).std()) + self.assertEqual(result, expected) + + result = td.to_frame().std() + self.assertEqual(result[0], expected) + + # invalid ops + for op in ['skew', 'kurt', 'sem', 'prod']: + self.assertRaises(TypeError, getattr(td, op)) + + # GH 10040 + # make sure NaT is properly handled by median() + s = Series([Timestamp('2015-02-03'), Timestamp('2015-02-07')]) + self.assertEqual(s.diff().median(), timedelta(days=4)) + + s = Series([Timestamp('2015-02-03'), Timestamp('2015-02-07'), + Timestamp('2015-02-15')]) + self.assertEqual(s.diff().median(), timedelta(days=6)) + + def test_timedelta_ops_scalar(self): + # GH 6808 + base = pd.to_datetime('20130101 09:01:12.123456') + expected_add = pd.to_datetime('20130101 09:01:22.123456') + expected_sub = pd.to_datetime('20130101 09:01:02.123456') + + for offset in [pd.to_timedelta(10, unit='s'), timedelta(seconds=10), + np.timedelta64(10, 's'), + np.timedelta64(10000000000, 'ns'), + pd.offsets.Second(10)]: + result = base + offset + self.assertEqual(result, expected_add) + + result = base - offset + self.assertEqual(result, expected_sub) + + base = pd.to_datetime('20130102 09:01:12.123456') + expected_add = pd.to_datetime('20130103 09:01:22.123456') + expected_sub = pd.to_datetime('20130101 09:01:02.123456') + + for offset in [pd.to_timedelta('1 day, 00:00:10'), + pd.to_timedelta('1 days, 00:00:10'), + timedelta(days=1, seconds=10), + np.timedelta64(1, 'D') + np.timedelta64(10, 's'), + pd.offsets.Day() + pd.offsets.Second(10)]: + result = base + offset + self.assertEqual(result, expected_add) + + result = base - offset + self.assertEqual(result, expected_sub) + + def test_timedelta_ops_with_missing_values(self): + # setup + s1 = pd.to_timedelta(Series(['00:00:01'])) + s2 = pd.to_timedelta(Series(['00:00:02'])) + sn = pd.to_timedelta(Series([pd.NaT])) + df1 = DataFrame(['00:00:01']).apply(pd.to_timedelta) + df2 = DataFrame(['00:00:02']).apply(pd.to_timedelta) + dfn = DataFrame([pd.NaT]).apply(pd.to_timedelta) + scalar1 = pd.to_timedelta('00:00:01') + scalar2 = pd.to_timedelta('00:00:02') + timedelta_NaT = pd.to_timedelta('NaT') + NA = np.nan + + actual = scalar1 + scalar1 + self.assertEqual(actual, scalar2) + actual = scalar2 - scalar1 + self.assertEqual(actual, scalar1) + + actual = s1 + s1 + assert_series_equal(actual, s2) + actual = s2 - s1 + assert_series_equal(actual, s1) + + actual = s1 + scalar1 + assert_series_equal(actual, s2) + actual = scalar1 + s1 + assert_series_equal(actual, s2) + actual = s2 - scalar1 + assert_series_equal(actual, s1) + actual = -scalar1 + s2 + assert_series_equal(actual, s1) + + actual = s1 + timedelta_NaT + assert_series_equal(actual, sn) + actual = timedelta_NaT + s1 + assert_series_equal(actual, sn) + actual = s1 - timedelta_NaT + assert_series_equal(actual, sn) + actual = -timedelta_NaT + s1 + assert_series_equal(actual, sn) + + actual = s1 + NA + assert_series_equal(actual, sn) + actual = NA + s1 + assert_series_equal(actual, sn) + actual = s1 - NA + assert_series_equal(actual, sn) + actual = -NA + s1 + assert_series_equal(actual, sn) + + actual = s1 + pd.NaT + assert_series_equal(actual, sn) + actual = s2 - pd.NaT + assert_series_equal(actual, sn) + + actual = s1 + df1 + assert_frame_equal(actual, df2) + actual = s2 - df1 + assert_frame_equal(actual, df1) + actual = df1 + s1 + assert_frame_equal(actual, df2) + actual = df2 - s1 + assert_frame_equal(actual, df1) + + actual = df1 + df1 + assert_frame_equal(actual, df2) + actual = df2 - df1 + assert_frame_equal(actual, df1) + + actual = df1 + scalar1 + assert_frame_equal(actual, df2) + actual = df2 - scalar1 + assert_frame_equal(actual, df1) + + actual = df1 + timedelta_NaT + assert_frame_equal(actual, dfn) + actual = df1 - timedelta_NaT + assert_frame_equal(actual, dfn) + + actual = df1 + NA + assert_frame_equal(actual, dfn) + actual = df1 - NA + assert_frame_equal(actual, dfn) + + actual = df1 + pd.NaT # NaT is datetime, not timedelta + assert_frame_equal(actual, dfn) + actual = df1 - pd.NaT + assert_frame_equal(actual, dfn) + + def test_compare_timedelta_series(self): + # regresssion test for GH5963 + s = pd.Series([timedelta(days=1), timedelta(days=2)]) + actual = s > timedelta(days=1) + expected = pd.Series([False, True]) + tm.assert_series_equal(actual, expected) + + def test_compare_timedelta_ndarray(self): + # GH11835 + periods = [Timedelta('0 days 01:00:00'), Timedelta('0 days 01:00:00')] + arr = np.array(periods) + result = arr[0] > arr + expected = np.array([False, False]) + self.assert_numpy_array_equal(result, expected) + + +class TestSlicing(tm.TestCase): + + def test_tdi_ops_attributes(self): + rng = timedelta_range('2 days', periods=5, freq='2D', name='x') + + result = rng + 1 + exp = timedelta_range('4 days', periods=5, freq='2D', name='x') + tm.assert_index_equal(result, exp) + self.assertEqual(result.freq, '2D') + + result = rng - 2 + exp = timedelta_range('-2 days', periods=5, freq='2D', name='x') + tm.assert_index_equal(result, exp) + self.assertEqual(result.freq, '2D') + + result = rng * 2 + exp = timedelta_range('4 days', periods=5, freq='4D', name='x') + tm.assert_index_equal(result, exp) + self.assertEqual(result.freq, '4D') + + result = rng / 2 + exp = timedelta_range('1 days', periods=5, freq='D', name='x') + tm.assert_index_equal(result, exp) + self.assertEqual(result.freq, 'D') + + result = -rng + exp = timedelta_range('-2 days', periods=5, freq='-2D', name='x') + tm.assert_index_equal(result, exp) + self.assertEqual(result.freq, '-2D') + + rng = pd.timedelta_range('-2 days', periods=5, freq='D', name='x') + + result = abs(rng) + exp = TimedeltaIndex(['2 days', '1 days', '0 days', '1 days', + '2 days'], name='x') + tm.assert_index_equal(result, exp) + self.assertEqual(result.freq, None) + + def test_add_overflow(self): + # see gh-14068 + msg = "too (big|large) to convert" + with tm.assertRaisesRegexp(OverflowError, msg): + to_timedelta(106580, 'D') + Timestamp('2000') + with tm.assertRaisesRegexp(OverflowError, msg): + Timestamp('2000') + to_timedelta(106580, 'D') + + _NaT = int(pd.NaT) + 1 + msg = "Overflow in int64 addition" + with tm.assertRaisesRegexp(OverflowError, msg): + to_timedelta([106580], 'D') + Timestamp('2000') + with tm.assertRaisesRegexp(OverflowError, msg): + Timestamp('2000') + to_timedelta([106580], 'D') + with tm.assertRaisesRegexp(OverflowError, msg): + to_timedelta([_NaT]) - Timedelta('1 days') + with tm.assertRaisesRegexp(OverflowError, msg): + to_timedelta(['5 days', _NaT]) - Timedelta('1 days') + with tm.assertRaisesRegexp(OverflowError, msg): + (to_timedelta([_NaT, '5 days', '1 hours']) - + to_timedelta(['7 seconds', _NaT, '4 hours'])) + + # These should not overflow! + exp = TimedeltaIndex([pd.NaT]) + result = to_timedelta([pd.NaT]) - Timedelta('1 days') + tm.assert_index_equal(result, exp) + + exp = TimedeltaIndex(['4 days', pd.NaT]) + result = to_timedelta(['5 days', pd.NaT]) - Timedelta('1 days') + tm.assert_index_equal(result, exp) + + exp = TimedeltaIndex([pd.NaT, pd.NaT, '5 hours']) + result = (to_timedelta([pd.NaT, '5 days', '1 hours']) + + to_timedelta(['7 seconds', pd.NaT, '4 hours'])) + tm.assert_index_equal(result, exp) diff --git a/pandas/tests/indexes/timedeltas/test_partial_slicing.py b/pandas/tests/indexes/timedeltas/test_partial_slicing.py new file mode 100644 index 0000000000000..0d46ee4172211 --- /dev/null +++ b/pandas/tests/indexes/timedeltas/test_partial_slicing.py @@ -0,0 +1,81 @@ +import numpy as np +import pandas.util.testing as tm + +import pandas as pd +from pandas import Series, timedelta_range, Timedelta +from pandas.util.testing import assert_series_equal + + +class TestSlicing(tm.TestCase): + + def test_partial_slice(self): + rng = timedelta_range('1 day 10:11:12', freq='h', periods=500) + s = Series(np.arange(len(rng)), index=rng) + + result = s['5 day':'6 day'] + expected = s.iloc[86:134] + assert_series_equal(result, expected) + + result = s['5 day':] + expected = s.iloc[86:] + assert_series_equal(result, expected) + + result = s[:'6 day'] + expected = s.iloc[:134] + assert_series_equal(result, expected) + + result = s['6 days, 23:11:12'] + self.assertEqual(result, s.iloc[133]) + + self.assertRaises(KeyError, s.__getitem__, '50 days') + + def test_partial_slice_high_reso(self): + + # higher reso + rng = timedelta_range('1 day 10:11:12', freq='us', periods=2000) + s = Series(np.arange(len(rng)), index=rng) + + result = s['1 day 10:11:12':] + expected = s.iloc[0:] + assert_series_equal(result, expected) + + result = s['1 day 10:11:12.001':] + expected = s.iloc[1000:] + assert_series_equal(result, expected) + + result = s['1 days, 10:11:12.001001'] + self.assertEqual(result, s.iloc[1001]) + + def test_slice_with_negative_step(self): + ts = Series(np.arange(20), timedelta_range('0', periods=20, freq='H')) + SLC = pd.IndexSlice + + def assert_slices_equivalent(l_slc, i_slc): + assert_series_equal(ts[l_slc], ts.iloc[i_slc]) + assert_series_equal(ts.loc[l_slc], ts.iloc[i_slc]) + assert_series_equal(ts.loc[l_slc], ts.iloc[i_slc]) + + assert_slices_equivalent(SLC[Timedelta(hours=7)::-1], SLC[7::-1]) + assert_slices_equivalent(SLC['7 hours'::-1], SLC[7::-1]) + + assert_slices_equivalent(SLC[:Timedelta(hours=7):-1], SLC[:6:-1]) + assert_slices_equivalent(SLC[:'7 hours':-1], SLC[:6:-1]) + + assert_slices_equivalent(SLC['15 hours':'7 hours':-1], SLC[15:6:-1]) + assert_slices_equivalent(SLC[Timedelta(hours=15):Timedelta(hours=7):- + 1], SLC[15:6:-1]) + assert_slices_equivalent(SLC['15 hours':Timedelta(hours=7):-1], + SLC[15:6:-1]) + assert_slices_equivalent(SLC[Timedelta(hours=15):'7 hours':-1], + SLC[15:6:-1]) + + assert_slices_equivalent(SLC['7 hours':'15 hours':-1], SLC[:0]) + + def test_slice_with_zero_step_raises(self): + ts = Series(np.arange(20), timedelta_range('0', periods=20, freq='H')) + self.assertRaisesRegexp(ValueError, 'slice step cannot be zero', + lambda: ts[::0]) + self.assertRaisesRegexp(ValueError, 'slice step cannot be zero', + lambda: ts.loc[::0]) + self.assertRaisesRegexp(ValueError, 'slice step cannot be zero', + lambda: ts.loc[::0]) diff --git a/pandas/tests/indexes/timedeltas/test_setops.py b/pandas/tests/indexes/timedeltas/test_setops.py new file mode 100644 index 0000000000000..9000fb3beb279 --- /dev/null +++ b/pandas/tests/indexes/timedeltas/test_setops.py @@ -0,0 +1,76 @@ +import numpy as np + +import pandas as pd +import pandas.util.testing as tm +from pandas import TimedeltaIndex, timedelta_range, Int64Index + + +class TestTimedeltaIndex(tm.TestCase): + _multiprocess_can_split_ = True + + def test_union(self): + + i1 = timedelta_range('1day', periods=5) + i2 = timedelta_range('3day', periods=5) + result = i1.union(i2) + expected = timedelta_range('1day', periods=7) + self.assert_index_equal(result, expected) + + i1 = Int64Index(np.arange(0, 20, 2)) + i2 = TimedeltaIndex(start='1 day', periods=10, freq='D') + i1.union(i2) # Works + i2.union(i1) # Fails with "AttributeError: can't set attribute" + + def test_union_coverage(self): + + idx = TimedeltaIndex(['3d', '1d', '2d']) + ordered = TimedeltaIndex(idx.sort_values(), freq='infer') + result = ordered.union(idx) + self.assert_index_equal(result, ordered) + + result = ordered[:0].union(ordered) + self.assert_index_equal(result, ordered) + self.assertEqual(result.freq, ordered.freq) + + def test_union_bug_1730(self): + + rng_a = timedelta_range('1 day', periods=4, freq='3H') + rng_b = timedelta_range('1 day', periods=4, freq='4H') + + result = rng_a.union(rng_b) + exp = TimedeltaIndex(sorted(set(list(rng_a)) | set(list(rng_b)))) + self.assert_index_equal(result, exp) + + def test_union_bug_1745(self): + + left = TimedeltaIndex(['1 day 15:19:49.695000']) + right = TimedeltaIndex(['2 day 13:04:21.322000', + '1 day 15:27:24.873000', + '1 day 15:31:05.350000']) + + result = left.union(right) + exp = TimedeltaIndex(sorted(set(list(left)) | set(list(right)))) + self.assert_index_equal(result, exp) + + def test_union_bug_4564(self): + + left = timedelta_range("1 day", "30d") + right = left + pd.offsets.Minute(15) + + result = left.union(right) + exp = TimedeltaIndex(sorted(set(list(left)) | set(list(right)))) + self.assert_index_equal(result, exp) + + def test_intersection_bug_1708(self): + index_1 = timedelta_range('1 day', periods=4, freq='h') + index_2 = index_1 + pd.offsets.Hour(5) + + result = index_1 & index_2 + self.assertEqual(len(result), 0) + + index_1 = timedelta_range('1 day', periods=4, freq='h') + index_2 = index_1 + pd.offsets.Hour(1) + + result = index_1 & index_2 + expected = timedelta_range('1 day 01:00:00', periods=3, freq='h') + tm.assert_index_equal(result, expected) diff --git a/pandas/tests/indexes/timedeltas/test_timedelta.py b/pandas/tests/indexes/timedeltas/test_timedelta.py new file mode 100644 index 0000000000000..4c8571e4f08f9 --- /dev/null +++ b/pandas/tests/indexes/timedeltas/test_timedelta.py @@ -0,0 +1,592 @@ +import numpy as np +from datetime import timedelta + +import pandas as pd +import pandas.util.testing as tm +from pandas import (timedelta_range, date_range, Series, Timedelta, + DatetimeIndex, TimedeltaIndex, Index, DataFrame, + Int64Index, _np_version_under1p8) +from pandas.util.testing import (assert_almost_equal, assert_series_equal, + assert_index_equal) + +from ..datetimelike import DatetimeLike + +randn = np.random.randn + + +class TestTimedeltaIndex(DatetimeLike, tm.TestCase): + _holder = TimedeltaIndex + _multiprocess_can_split_ = True + + def setUp(self): + self.indices = dict(index=tm.makeTimedeltaIndex(10)) + self.setup_indices() + + def create_index(self): + return pd.to_timedelta(range(5), unit='d') + pd.offsets.Hour(1) + + def test_shift(self): + # test shift for TimedeltaIndex + # err8083 + + drange = self.create_index() + result = drange.shift(1) + expected = TimedeltaIndex(['1 days 01:00:00', '2 days 01:00:00', + '3 days 01:00:00', + '4 days 01:00:00', '5 days 01:00:00'], + freq='D') + self.assert_index_equal(result, expected) + + result = drange.shift(3, freq='2D 1s') + expected = TimedeltaIndex(['6 days 01:00:03', '7 days 01:00:03', + '8 days 01:00:03', '9 days 01:00:03', + '10 days 01:00:03'], freq='D') + self.assert_index_equal(result, expected) + + def test_get_loc(self): + idx = pd.to_timedelta(['0 days', '1 days', '2 days']) + + for method in [None, 'pad', 'backfill', 'nearest']: + self.assertEqual(idx.get_loc(idx[1], method), 1) + self.assertEqual(idx.get_loc(idx[1].to_pytimedelta(), method), 1) + self.assertEqual(idx.get_loc(str(idx[1]), method), 1) + + self.assertEqual( + idx.get_loc(idx[1], 'pad', tolerance=pd.Timedelta(0)), 1) + self.assertEqual( + idx.get_loc(idx[1], 'pad', tolerance=np.timedelta64(0, 's')), 1) + self.assertEqual(idx.get_loc(idx[1], 'pad', tolerance=timedelta(0)), 1) + + with tm.assertRaisesRegexp(ValueError, 'must be convertible'): + idx.get_loc(idx[1], method='nearest', tolerance='foo') + + for method, loc in [('pad', 1), ('backfill', 2), ('nearest', 1)]: + self.assertEqual(idx.get_loc('1 day 1 hour', method), loc) + + def test_get_loc_nat(self): + tidx = TimedeltaIndex(['1 days 01:00:00', 'NaT', '2 days 01:00:00']) + + self.assertEqual(tidx.get_loc(pd.NaT), 1) + self.assertEqual(tidx.get_loc(None), 1) + self.assertEqual(tidx.get_loc(float('nan')), 1) + self.assertEqual(tidx.get_loc(np.nan), 1) + + def test_get_indexer(self): + idx = pd.to_timedelta(['0 days', '1 days', '2 days']) + tm.assert_numpy_array_equal(idx.get_indexer(idx), + np.array([0, 1, 2], dtype=np.intp)) + + target = pd.to_timedelta(['-1 hour', '12 hours', '1 day 1 hour']) + tm.assert_numpy_array_equal(idx.get_indexer(target, 'pad'), + np.array([-1, 0, 1], dtype=np.intp)) + tm.assert_numpy_array_equal(idx.get_indexer(target, 'backfill'), + np.array([0, 1, 2], dtype=np.intp)) + tm.assert_numpy_array_equal(idx.get_indexer(target, 'nearest'), + np.array([0, 1, 1], dtype=np.intp)) + + res = idx.get_indexer(target, 'nearest', + tolerance=pd.Timedelta('1 hour')) + tm.assert_numpy_array_equal(res, np.array([0, -1, 1], dtype=np.intp)) + + def test_numeric_compat(self): + + idx = self._holder(np.arange(5, dtype='int64')) + didx = self._holder(np.arange(5, dtype='int64') ** 2) + result = idx * 1 + tm.assert_index_equal(result, idx) + + result = 1 * idx + tm.assert_index_equal(result, idx) + + result = idx / 1 + tm.assert_index_equal(result, idx) + + result = idx // 1 + tm.assert_index_equal(result, idx) + + result = idx * np.array(5, dtype='int64') + tm.assert_index_equal(result, + self._holder(np.arange(5, dtype='int64') * 5)) + + result = idx * np.arange(5, dtype='int64') + tm.assert_index_equal(result, didx) + + result = idx * Series(np.arange(5, dtype='int64')) + tm.assert_index_equal(result, didx) + + result = idx * Series(np.arange(5, dtype='float64') + 0.1) + tm.assert_index_equal(result, self._holder(np.arange( + 5, dtype='float64') * (np.arange(5, dtype='float64') + 0.1))) + + # invalid + self.assertRaises(TypeError, lambda: idx * idx) + self.assertRaises(ValueError, lambda: idx * self._holder(np.arange(3))) + self.assertRaises(ValueError, lambda: idx * np.array([1, 2])) + + def test_pickle_compat_construction(self): + pass + + def test_ufunc_coercions(self): + # normal ops are also tested in tseries/test_timedeltas.py + idx = TimedeltaIndex(['2H', '4H', '6H', '8H', '10H'], + freq='2H', name='x') + + for result in [idx * 2, np.multiply(idx, 2)]: + tm.assertIsInstance(result, TimedeltaIndex) + exp = TimedeltaIndex(['4H', '8H', '12H', '16H', '20H'], + freq='4H', name='x') + tm.assert_index_equal(result, exp) + self.assertEqual(result.freq, '4H') + + for result in [idx / 2, np.divide(idx, 2)]: + tm.assertIsInstance(result, TimedeltaIndex) + exp = TimedeltaIndex(['1H', '2H', '3H', '4H', '5H'], + freq='H', name='x') + tm.assert_index_equal(result, exp) + self.assertEqual(result.freq, 'H') + + idx = TimedeltaIndex(['2H', '4H', '6H', '8H', '10H'], + freq='2H', name='x') + for result in [-idx, np.negative(idx)]: + tm.assertIsInstance(result, TimedeltaIndex) + exp = TimedeltaIndex(['-2H', '-4H', '-6H', '-8H', '-10H'], + freq='-2H', name='x') + tm.assert_index_equal(result, exp) + self.assertEqual(result.freq, '-2H') + + idx = TimedeltaIndex(['-2H', '-1H', '0H', '1H', '2H'], + freq='H', name='x') + for result in [abs(idx), np.absolute(idx)]: + tm.assertIsInstance(result, TimedeltaIndex) + exp = TimedeltaIndex(['2H', '1H', '0H', '1H', '2H'], + freq=None, name='x') + tm.assert_index_equal(result, exp) + self.assertEqual(result.freq, None) + + def test_fillna_timedelta(self): + # GH 11343 + idx = pd.TimedeltaIndex(['1 day', pd.NaT, '3 day']) + + exp = pd.TimedeltaIndex(['1 day', '2 day', '3 day']) + self.assert_index_equal(idx.fillna(pd.Timedelta('2 day')), exp) + + exp = pd.TimedeltaIndex(['1 day', '3 hour', '3 day']) + idx.fillna(pd.Timedelta('3 hour')) + + exp = pd.Index( + [pd.Timedelta('1 day'), 'x', pd.Timedelta('3 day')], dtype=object) + self.assert_index_equal(idx.fillna('x'), exp) + + def test_difference_freq(self): + # GH14323: Difference of TimedeltaIndex should not preserve frequency + + index = timedelta_range("0 days", "5 days", freq="D") + + other = timedelta_range("1 days", "4 days", freq="D") + expected = TimedeltaIndex(["0 days", "5 days"], freq=None) + idx_diff = index.difference(other) + tm.assert_index_equal(idx_diff, expected) + tm.assert_attr_equal('freq', idx_diff, expected) + + other = timedelta_range("2 days", "5 days", freq="D") + idx_diff = index.difference(other) + expected = TimedeltaIndex(["0 days", "1 days"], freq=None) + tm.assert_index_equal(idx_diff, expected) + tm.assert_attr_equal('freq', idx_diff, expected) + + def test_take(self): + + tds = ['1day 02:00:00', '1 day 04:00:00', '1 day 10:00:00'] + idx = TimedeltaIndex(start='1d', end='2d', freq='H', name='idx') + expected = TimedeltaIndex(tds, freq=None, name='idx') + + taken1 = idx.take([2, 4, 10]) + taken2 = idx[[2, 4, 10]] + + for taken in [taken1, taken2]: + self.assert_index_equal(taken, expected) + tm.assertIsInstance(taken, TimedeltaIndex) + self.assertIsNone(taken.freq) + self.assertEqual(taken.name, expected.name) + + def test_take_fill_value(self): + # GH 12631 + idx = pd.TimedeltaIndex(['1 days', '2 days', '3 days'], + name='xxx') + result = idx.take(np.array([1, 0, -1])) + expected = pd.TimedeltaIndex(['2 days', '1 days', '3 days'], + name='xxx') + tm.assert_index_equal(result, expected) + + # fill_value + result = idx.take(np.array([1, 0, -1]), fill_value=True) + expected = pd.TimedeltaIndex(['2 days', '1 days', 'NaT'], + name='xxx') + tm.assert_index_equal(result, expected) + + # allow_fill=False + result = idx.take(np.array([1, 0, -1]), allow_fill=False, + fill_value=True) + expected = pd.TimedeltaIndex(['2 days', '1 days', '3 days'], + name='xxx') + tm.assert_index_equal(result, expected) + + msg = ('When allow_fill=True and fill_value is not None, ' + 'all indices must be >= -1') + with tm.assertRaisesRegexp(ValueError, msg): + idx.take(np.array([1, 0, -2]), fill_value=True) + with tm.assertRaisesRegexp(ValueError, msg): + idx.take(np.array([1, 0, -5]), fill_value=True) + + with tm.assertRaises(IndexError): + idx.take(np.array([1, -5])) + + def test_isin(self): + + index = tm.makeTimedeltaIndex(4) + result = index.isin(index) + self.assertTrue(result.all()) + + result = index.isin(list(index)) + self.assertTrue(result.all()) + + assert_almost_equal(index.isin([index[2], 5]), + np.array([False, False, True, False])) + + def test_factorize(self): + idx1 = TimedeltaIndex(['1 day', '1 day', '2 day', '2 day', '3 day', + '3 day']) + + exp_arr = np.array([0, 0, 1, 1, 2, 2], dtype=np.intp) + exp_idx = TimedeltaIndex(['1 day', '2 day', '3 day']) + + arr, idx = idx1.factorize() + self.assert_numpy_array_equal(arr, exp_arr) + self.assert_index_equal(idx, exp_idx) + + arr, idx = idx1.factorize(sort=True) + self.assert_numpy_array_equal(arr, exp_arr) + self.assert_index_equal(idx, exp_idx) + + # freq must be preserved + idx3 = timedelta_range('1 day', periods=4, freq='s') + exp_arr = np.array([0, 1, 2, 3], dtype=np.intp) + arr, idx = idx3.factorize() + self.assert_numpy_array_equal(arr, exp_arr) + self.assert_index_equal(idx, idx3) + + def test_join_self(self): + + index = timedelta_range('1 day', periods=10) + kinds = 'outer', 'inner', 'left', 'right' + for kind in kinds: + joined = index.join(index, how=kind) + tm.assert_index_equal(index, joined) + + def test_slice_keeps_name(self): + + # GH4226 + dr = pd.timedelta_range('1d', '5d', freq='H', name='timebucket') + self.assertEqual(dr[1:].name, dr.name) + + def test_does_not_convert_mixed_integer(self): + df = tm.makeCustomDataframe(10, 10, + data_gen_f=lambda *args, **kwargs: randn(), + r_idx_type='i', c_idx_type='td') + str(df) + + cols = df.columns.join(df.index, how='outer') + joined = cols.join(df.columns) + self.assertEqual(cols.dtype, np.dtype('O')) + self.assertEqual(cols.dtype, joined.dtype) + tm.assert_index_equal(cols, joined) + + def test_sort_values(self): + + idx = TimedeltaIndex(['4d', '1d', '2d']) + + ordered = idx.sort_values() + self.assertTrue(ordered.is_monotonic) + + ordered = idx.sort_values(ascending=False) + self.assertTrue(ordered[::-1].is_monotonic) + + ordered, dexer = idx.sort_values(return_indexer=True) + self.assertTrue(ordered.is_monotonic) + self.assert_numpy_array_equal(dexer, + np.array([1, 2, 0]), + check_dtype=False) + + ordered, dexer = idx.sort_values(return_indexer=True, ascending=False) + self.assertTrue(ordered[::-1].is_monotonic) + self.assert_numpy_array_equal(dexer, + np.array([0, 2, 1]), + check_dtype=False) + + def test_get_duplicates(self): + idx = TimedeltaIndex(['1 day', '2 day', '2 day', '3 day', '3day', + '4day']) + + result = idx.get_duplicates() + ex = TimedeltaIndex(['2 day', '3day']) + self.assert_index_equal(result, ex) + + def test_argmin_argmax(self): + idx = TimedeltaIndex(['1 day 00:00:05', '1 day 00:00:01', + '1 day 00:00:02']) + self.assertEqual(idx.argmin(), 1) + self.assertEqual(idx.argmax(), 0) + + def test_misc_coverage(self): + + rng = timedelta_range('1 day', periods=5) + result = rng.groupby(rng.days) + tm.assertIsInstance(list(result.values())[0][0], Timedelta) + + idx = TimedeltaIndex(['3d', '1d', '2d']) + self.assertFalse(idx.equals(list(idx))) + + non_td = Index(list('abc')) + self.assertFalse(idx.equals(list(non_td))) + + def test_map(self): + + rng = timedelta_range('1 day', periods=10) + + f = lambda x: x.days + result = rng.map(f) + exp = Int64Index([f(x) for x in rng]) + tm.assert_index_equal(result, exp) + + def test_comparisons_nat(self): + + tdidx1 = pd.TimedeltaIndex(['1 day', pd.NaT, '1 day 00:00:01', pd.NaT, + '1 day 00:00:01', '5 day 00:00:03']) + tdidx2 = pd.TimedeltaIndex(['2 day', '2 day', pd.NaT, pd.NaT, + '1 day 00:00:02', '5 days 00:00:03']) + tdarr = np.array([np.timedelta64(2, 'D'), + np.timedelta64(2, 'D'), np.timedelta64('nat'), + np.timedelta64('nat'), + np.timedelta64(1, 'D') + np.timedelta64(2, 's'), + np.timedelta64(5, 'D') + np.timedelta64(3, 's')]) + + if _np_version_under1p8: + # cannot test array because np.datetime('nat') returns today's date + cases = [(tdidx1, tdidx2)] + else: + cases = [(tdidx1, tdidx2), (tdidx1, tdarr)] + + # Check pd.NaT is handles as the same as np.nan + for idx1, idx2 in cases: + + result = idx1 < idx2 + expected = np.array([True, False, False, False, True, False]) + self.assert_numpy_array_equal(result, expected) + + result = idx2 > idx1 + expected = np.array([True, False, False, False, True, False]) + self.assert_numpy_array_equal(result, expected) + + result = idx1 <= idx2 + expected = np.array([True, False, False, False, True, True]) + self.assert_numpy_array_equal(result, expected) + + result = idx2 >= idx1 + expected = np.array([True, False, False, False, True, True]) + self.assert_numpy_array_equal(result, expected) + + result = idx1 == idx2 + expected = np.array([False, False, False, False, False, True]) + self.assert_numpy_array_equal(result, expected) + + result = idx1 != idx2 + expected = np.array([True, True, True, True, True, False]) + self.assert_numpy_array_equal(result, expected) + + def test_comparisons_coverage(self): + rng = timedelta_range('1 days', periods=10) + + result = rng < rng[3] + exp = np.array([True, True, True] + [False] * 7) + self.assert_numpy_array_equal(result, exp) + + # raise TypeError for now + self.assertRaises(TypeError, rng.__lt__, rng[3].value) + + result = rng == list(rng) + exp = rng == rng + self.assert_numpy_array_equal(result, exp) + + def test_total_seconds(self): + # GH 10939 + # test index + rng = timedelta_range('1 days, 10:11:12.100123456', periods=2, + freq='s') + expt = [1 * 86400 + 10 * 3600 + 11 * 60 + 12 + 100123456. / 1e9, + 1 * 86400 + 10 * 3600 + 11 * 60 + 13 + 100123456. / 1e9] + tm.assert_almost_equal(rng.total_seconds(), np.array(expt)) + + # test Series + s = Series(rng) + s_expt = Series(expt, index=[0, 1]) + tm.assert_series_equal(s.dt.total_seconds(), s_expt) + + # with nat + s[1] = np.nan + s_expt = Series([1 * 86400 + 10 * 3600 + 11 * 60 + + 12 + 100123456. / 1e9, np.nan], index=[0, 1]) + tm.assert_series_equal(s.dt.total_seconds(), s_expt) + + # with both nat + s = Series([np.nan, np.nan], dtype='timedelta64[ns]') + tm.assert_series_equal(s.dt.total_seconds(), + Series([np.nan, np.nan], index=[0, 1])) + + def test_pass_TimedeltaIndex_to_index(self): + + rng = timedelta_range('1 days', '10 days') + idx = Index(rng, dtype=object) + + expected = Index(rng.to_pytimedelta(), dtype=object) + + self.assert_numpy_array_equal(idx.values, expected.values) + + def test_pickle(self): + + rng = timedelta_range('1 days', periods=10) + rng_p = self.round_trip_pickle(rng) + tm.assert_index_equal(rng, rng_p) + + def test_hash_error(self): + index = timedelta_range('1 days', periods=10) + with tm.assertRaisesRegexp(TypeError, "unhashable type: %r" % + type(index).__name__): + hash(index) + + def test_append_join_nondatetimeindex(self): + rng = timedelta_range('1 days', periods=10) + idx = Index(['a', 'b', 'c', 'd']) + + result = rng.append(idx) + tm.assertIsInstance(result[0], Timedelta) + + # it works + rng.join(idx, how='outer') + + def test_append_numpy_bug_1681(self): + + td = timedelta_range('1 days', '10 days', freq='2D') + a = DataFrame() + c = DataFrame({'A': 'foo', 'B': td}, index=td) + str(c) + + result = a.append(c) + self.assertTrue((result['B'] == td).all()) + + def test_fields(self): + rng = timedelta_range('1 days, 10:11:12.100123456', periods=2, + freq='s') + self.assert_numpy_array_equal(rng.days, np.array( + [1, 1], dtype='int64')) + self.assert_numpy_array_equal( + rng.seconds, + np.array([10 * 3600 + 11 * 60 + 12, 10 * 3600 + 11 * 60 + 13], + dtype='int64')) + self.assert_numpy_array_equal(rng.microseconds, np.array( + [100 * 1000 + 123, 100 * 1000 + 123], dtype='int64')) + self.assert_numpy_array_equal(rng.nanoseconds, np.array( + [456, 456], dtype='int64')) + + self.assertRaises(AttributeError, lambda: rng.hours) + self.assertRaises(AttributeError, lambda: rng.minutes) + self.assertRaises(AttributeError, lambda: rng.milliseconds) + + # with nat + s = Series(rng) + s[1] = np.nan + + tm.assert_series_equal(s.dt.days, Series([1, np.nan], index=[0, 1])) + tm.assert_series_equal(s.dt.seconds, Series( + [10 * 3600 + 11 * 60 + 12, np.nan], index=[0, 1])) + + def test_freq_conversion(self): + + # doc example + + # series + td = Series(date_range('20130101', periods=4)) - \ + Series(date_range('20121201', periods=4)) + td[2] += timedelta(minutes=5, seconds=3) + td[3] = np.nan + + result = td / np.timedelta64(1, 'D') + expected = Series([31, 31, (31 * 86400 + 5 * 60 + 3) / 86400.0, np.nan + ]) + assert_series_equal(result, expected) + + result = td.astype('timedelta64[D]') + expected = Series([31, 31, 31, np.nan]) + assert_series_equal(result, expected) + + result = td / np.timedelta64(1, 's') + expected = Series([31 * 86400, 31 * 86400, 31 * 86400 + 5 * 60 + 3, + np.nan]) + assert_series_equal(result, expected) + + result = td.astype('timedelta64[s]') + assert_series_equal(result, expected) + + # tdi + td = TimedeltaIndex(td) + + result = td / np.timedelta64(1, 'D') + expected = Index([31, 31, (31 * 86400 + 5 * 60 + 3) / 86400.0, np.nan]) + assert_index_equal(result, expected) + + result = td.astype('timedelta64[D]') + expected = Index([31, 31, 31, np.nan]) + assert_index_equal(result, expected) + + result = td / np.timedelta64(1, 's') + expected = Index([31 * 86400, 31 * 86400, 31 * 86400 + 5 * 60 + 3, + np.nan]) + assert_index_equal(result, expected) + + result = td.astype('timedelta64[s]') + assert_index_equal(result, expected) + + +class TestSlicing(tm.TestCase): + + def test_timedelta(self): + # this is valid too + index = date_range('1/1/2000', periods=50, freq='B') + shifted = index + timedelta(1) + back = shifted + timedelta(-1) + self.assertTrue(tm.equalContents(index, back)) + self.assertEqual(shifted.freq, index.freq) + self.assertEqual(shifted.freq, back.freq) + + result = index - timedelta(1) + expected = index + timedelta(-1) + tm.assert_index_equal(result, expected) + + # GH4134, buggy with timedeltas + rng = date_range('2013', '2014') + s = Series(rng) + result1 = rng - pd.offsets.Hour(1) + result2 = DatetimeIndex(s - np.timedelta64(100000000)) + result3 = rng - np.timedelta64(100000000) + result4 = DatetimeIndex(s - pd.offsets.Hour(1)) + tm.assert_index_equal(result1, result4) + tm.assert_index_equal(result2, result3) + + +class TestTimeSeries(tm.TestCase): + _multiprocess_can_split_ = True + + def test_series_box_timedelta(self): + rng = timedelta_range('1 day 1 s', periods=5, freq='h') + s = Series(rng) + tm.assertIsInstance(s[1], Timedelta) + tm.assertIsInstance(s.iat[2], Timedelta) diff --git a/pandas/tests/indexes/timedeltas/test_timedelta_range.py b/pandas/tests/indexes/timedeltas/test_timedelta_range.py new file mode 100644 index 0000000000000..8bd56b5885bba --- /dev/null +++ b/pandas/tests/indexes/timedeltas/test_timedelta_range.py @@ -0,0 +1,51 @@ +import numpy as np + +import pandas as pd +import pandas.util.testing as tm +from pandas.tseries.offsets import Day, Second +from pandas import to_timedelta, timedelta_range +from pandas.util.testing import assert_frame_equal + + +class TestTimedeltas(tm.TestCase): + _multiprocess_can_split_ = True + + def test_timedelta_range(self): + + expected = to_timedelta(np.arange(5), unit='D') + result = timedelta_range('0 days', periods=5, freq='D') + tm.assert_index_equal(result, expected) + + expected = to_timedelta(np.arange(11), unit='D') + result = timedelta_range('0 days', '10 days', freq='D') + tm.assert_index_equal(result, expected) + + expected = to_timedelta(np.arange(5), unit='D') + Second(2) + Day() + result = timedelta_range('1 days, 00:00:02', '5 days, 00:00:02', + freq='D') + tm.assert_index_equal(result, expected) + + expected = to_timedelta([1, 3, 5, 7, 9], unit='D') + Second(2) + result = timedelta_range('1 days, 00:00:02', periods=5, freq='2D') + tm.assert_index_equal(result, expected) + + expected = to_timedelta(np.arange(50), unit='T') * 30 + result = timedelta_range('0 days', freq='30T', periods=50) + tm.assert_index_equal(result, expected) + + # GH 11776 + arr = np.arange(10).reshape(2, 5) + df = pd.DataFrame(np.arange(10).reshape(2, 5)) + for arg in (arr, df): + with tm.assertRaisesRegexp(TypeError, "1-d array"): + to_timedelta(arg) + for errors in ['ignore', 'raise', 'coerce']: + with tm.assertRaisesRegexp(TypeError, "1-d array"): + to_timedelta(arg, errors=errors) + + # issue10583 + df = pd.DataFrame(np.random.normal(size=(10, 4))) + df.index = pd.timedelta_range(start='0s', periods=10, freq='s') + expected = df.loc[pd.Timedelta('0s'):, :] + result = df.loc['0s':, :] + assert_frame_equal(expected, result) diff --git a/pandas/tests/indexes/timedeltas/test_tools.py b/pandas/tests/indexes/timedeltas/test_tools.py new file mode 100644 index 0000000000000..2442051547312 --- /dev/null +++ b/pandas/tests/indexes/timedeltas/test_tools.py @@ -0,0 +1,201 @@ +from datetime import time, timedelta +import numpy as np + +import pandas as pd +import pandas.util.testing as tm +from pandas.util.testing import assert_series_equal +from pandas import (Series, Timedelta, to_timedelta, tslib, isnull, + TimedeltaIndex) + + +class TestTimedeltas(tm.TestCase): + _multiprocess_can_split_ = True + + def test_to_timedelta(self): + def conv(v): + return v.astype('m8[ns]') + + d1 = np.timedelta64(1, 'D') + + self.assertEqual(to_timedelta('1 days 06:05:01.00003', box=False), + conv(d1 + np.timedelta64(6 * 3600 + + 5 * 60 + 1, 's') + + np.timedelta64(30, 'us'))) + self.assertEqual(to_timedelta('15.5us', box=False), + conv(np.timedelta64(15500, 'ns'))) + + # empty string + result = to_timedelta('', box=False) + self.assertEqual(result.astype('int64'), tslib.iNaT) + + result = to_timedelta(['', '']) + self.assertTrue(isnull(result).all()) + + # pass thru + result = to_timedelta(np.array([np.timedelta64(1, 's')])) + expected = pd.Index(np.array([np.timedelta64(1, 's')])) + tm.assert_index_equal(result, expected) + + # ints + result = np.timedelta64(0, 'ns') + expected = to_timedelta(0, box=False) + self.assertEqual(result, expected) + + # Series + expected = Series([timedelta(days=1), timedelta(days=1, seconds=1)]) + result = to_timedelta(Series(['1d', '1days 00:00:01'])) + tm.assert_series_equal(result, expected) + + # with units + result = TimedeltaIndex([np.timedelta64(0, 'ns'), np.timedelta64( + 10, 's').astype('m8[ns]')]) + expected = to_timedelta([0, 10], unit='s') + tm.assert_index_equal(result, expected) + + # single element conversion + v = timedelta(seconds=1) + result = to_timedelta(v, box=False) + expected = np.timedelta64(timedelta(seconds=1)) + self.assertEqual(result, expected) + + v = np.timedelta64(timedelta(seconds=1)) + result = to_timedelta(v, box=False) + expected = np.timedelta64(timedelta(seconds=1)) + self.assertEqual(result, expected) + + # arrays of various dtypes + arr = np.array([1] * 5, dtype='int64') + result = to_timedelta(arr, unit='s') + expected = TimedeltaIndex([np.timedelta64(1, 's')] * 5) + tm.assert_index_equal(result, expected) + + arr = np.array([1] * 5, dtype='int64') + result = to_timedelta(arr, unit='m') + expected = TimedeltaIndex([np.timedelta64(1, 'm')] * 5) + tm.assert_index_equal(result, expected) + + arr = np.array([1] * 5, dtype='int64') + result = to_timedelta(arr, unit='h') + expected = TimedeltaIndex([np.timedelta64(1, 'h')] * 5) + tm.assert_index_equal(result, expected) + + arr = np.array([1] * 5, dtype='timedelta64[s]') + result = to_timedelta(arr) + expected = TimedeltaIndex([np.timedelta64(1, 's')] * 5) + tm.assert_index_equal(result, expected) + + arr = np.array([1] * 5, dtype='timedelta64[D]') + result = to_timedelta(arr) + expected = TimedeltaIndex([np.timedelta64(1, 'D')] * 5) + tm.assert_index_equal(result, expected) + + # Test with lists as input when box=false + expected = np.array(np.arange(3) * 1000000000, dtype='timedelta64[ns]') + result = to_timedelta(range(3), unit='s', box=False) + tm.assert_numpy_array_equal(expected, result) + + result = to_timedelta(np.arange(3), unit='s', box=False) + tm.assert_numpy_array_equal(expected, result) + + result = to_timedelta([0, 1, 2], unit='s', box=False) + tm.assert_numpy_array_equal(expected, result) + + # Tests with fractional seconds as input: + expected = np.array( + [0, 500000000, 800000000, 1200000000], dtype='timedelta64[ns]') + result = to_timedelta([0., 0.5, 0.8, 1.2], unit='s', box=False) + tm.assert_numpy_array_equal(expected, result) + + def test_to_timedelta_invalid(self): + + # bad value for errors parameter + msg = "errors must be one of" + tm.assertRaisesRegexp(ValueError, msg, to_timedelta, + ['foo'], errors='never') + + # these will error + self.assertRaises(ValueError, lambda: to_timedelta([1, 2], unit='foo')) + self.assertRaises(ValueError, lambda: to_timedelta(1, unit='foo')) + + # time not supported ATM + self.assertRaises(ValueError, lambda: to_timedelta(time(second=1))) + self.assertTrue(to_timedelta( + time(second=1), errors='coerce') is pd.NaT) + + self.assertRaises(ValueError, lambda: to_timedelta(['foo', 'bar'])) + tm.assert_index_equal(TimedeltaIndex([pd.NaT, pd.NaT]), + to_timedelta(['foo', 'bar'], errors='coerce')) + + tm.assert_index_equal(TimedeltaIndex(['1 day', pd.NaT, '1 min']), + to_timedelta(['1 day', 'bar', '1 min'], + errors='coerce')) + + # gh-13613: these should not error because errors='ignore' + invalid_data = 'apple' + self.assertEqual(invalid_data, to_timedelta( + invalid_data, errors='ignore')) + + invalid_data = ['apple', '1 days'] + tm.assert_numpy_array_equal( + np.array(invalid_data, dtype=object), + to_timedelta(invalid_data, errors='ignore')) + + invalid_data = pd.Index(['apple', '1 days']) + tm.assert_index_equal(invalid_data, to_timedelta( + invalid_data, errors='ignore')) + + invalid_data = Series(['apple', '1 days']) + tm.assert_series_equal(invalid_data, to_timedelta( + invalid_data, errors='ignore')) + + def test_to_timedelta_via_apply(self): + # GH 5458 + expected = Series([np.timedelta64(1, 's')]) + result = Series(['00:00:01']).apply(to_timedelta) + tm.assert_series_equal(result, expected) + + result = Series([to_timedelta('00:00:01')]) + tm.assert_series_equal(result, expected) + + def test_to_timedelta_on_missing_values(self): + # GH5438 + timedelta_NaT = np.timedelta64('NaT') + + actual = pd.to_timedelta(Series(['00:00:01', np.nan])) + expected = Series([np.timedelta64(1000000000, 'ns'), + timedelta_NaT], dtype=' r + + self.assertFalse(l == r) + self.assertTrue(l != r) diff --git a/pandas/tests/scalar/test_timestamp.py b/pandas/tests/scalar/test_timestamp.py index 0cef27d2e41fc..2abc83ca6109c 100644 --- a/pandas/tests/scalar/test_timestamp.py +++ b/pandas/tests/scalar/test_timestamp.py @@ -1637,3 +1637,48 @@ def test_woy_boundary(self): for args in [(2000, 1, 1), (2000, 1, 2), ( 2005, 1, 1), (2005, 1, 2)]]) self.assertTrue((result == [52, 52, 53, 53]).all()) + + +class TestTsUtil(tm.TestCase): + + def test_min_valid(self): + # Ensure that Timestamp.min is a valid Timestamp + Timestamp(Timestamp.min) + + def test_max_valid(self): + # Ensure that Timestamp.max is a valid Timestamp + Timestamp(Timestamp.max) + + def test_to_datetime_bijective(self): + # Ensure that converting to datetime and back only loses precision + # by going from nanoseconds to microseconds. + exp_warning = None if Timestamp.max.nanosecond == 0 else UserWarning + with tm.assert_produces_warning(exp_warning, check_stacklevel=False): + self.assertEqual( + Timestamp(Timestamp.max.to_pydatetime()).value / 1000, + Timestamp.max.value / 1000) + + exp_warning = None if Timestamp.min.nanosecond == 0 else UserWarning + with tm.assert_produces_warning(exp_warning, check_stacklevel=False): + self.assertEqual( + Timestamp(Timestamp.min.to_pydatetime()).value / 1000, + Timestamp.min.value / 1000) + + +class TestTslib(tm.TestCase): + + def test_round(self): + stamp = Timestamp('2000-01-05 05:09:15.13') + + def _check_round(freq, expected): + result = stamp.round(freq=freq) + self.assertEqual(result, expected) + + for freq, expected in [('D', Timestamp('2000-01-05 00:00:00')), + ('H', Timestamp('2000-01-05 05:00:00')), + ('S', Timestamp('2000-01-05 05:09:15'))]: + _check_round(freq, expected) + + msg = pd.tseries.frequencies._INVALID_FREQ_ERROR + with self.assertRaisesRegexp(ValueError, msg): + stamp.round('foo') diff --git a/pandas/tseries/tests/test_base.py b/pandas/tseries/tests/test_base.py index be3b917cb8117..114cb02205d4f 100644 --- a/pandas/tseries/tests/test_base.py +++ b/pandas/tseries/tests/test_base.py @@ -2,10 +2,8 @@ from datetime import timedelta import numpy as np import pandas as pd -from pandas import (Series, Index, Int64Index, Timestamp, Period, - DatetimeIndex, PeriodIndex, TimedeltaIndex, - Timedelta, timedelta_range, date_range, Float64Index, - _np_version_under1p10) +from pandas import (Series, Index, Period, DatetimeIndex, PeriodIndex, + Timedelta, _np_version_under1p10) import pandas.tslib as tslib import pandas.tseries.period as period @@ -14,846 +12,6 @@ from pandas.tests.test_base import Ops -class TestTimedeltaIndexOps(Ops): - - def setUp(self): - super(TestTimedeltaIndexOps, self).setUp() - mask = lambda x: isinstance(x, TimedeltaIndex) - self.is_valid_objs = [o for o in self.objs if mask(o)] - self.not_valid_objs = [] - - def test_ops_properties(self): - self.check_ops_properties(['days', 'hours', 'minutes', 'seconds', - 'milliseconds']) - self.check_ops_properties(['microseconds', 'nanoseconds']) - - def test_asobject_tolist(self): - idx = timedelta_range(start='1 days', periods=4, freq='D', name='idx') - expected_list = [Timedelta('1 days'), Timedelta('2 days'), - Timedelta('3 days'), Timedelta('4 days')] - expected = pd.Index(expected_list, dtype=object, name='idx') - result = idx.asobject - self.assertTrue(isinstance(result, Index)) - - self.assertEqual(result.dtype, object) - self.assert_index_equal(result, expected) - self.assertEqual(result.name, expected.name) - self.assertEqual(idx.tolist(), expected_list) - - idx = TimedeltaIndex([timedelta(days=1), timedelta(days=2), pd.NaT, - timedelta(days=4)], name='idx') - expected_list = [Timedelta('1 days'), Timedelta('2 days'), pd.NaT, - Timedelta('4 days')] - expected = pd.Index(expected_list, dtype=object, name='idx') - result = idx.asobject - self.assertTrue(isinstance(result, Index)) - self.assertEqual(result.dtype, object) - self.assert_index_equal(result, expected) - self.assertEqual(result.name, expected.name) - self.assertEqual(idx.tolist(), expected_list) - - def test_minmax(self): - - # monotonic - idx1 = TimedeltaIndex(['1 days', '2 days', '3 days']) - self.assertTrue(idx1.is_monotonic) - - # non-monotonic - idx2 = TimedeltaIndex(['1 days', np.nan, '3 days', 'NaT']) - self.assertFalse(idx2.is_monotonic) - - for idx in [idx1, idx2]: - self.assertEqual(idx.min(), Timedelta('1 days')), - self.assertEqual(idx.max(), Timedelta('3 days')), - self.assertEqual(idx.argmin(), 0) - self.assertEqual(idx.argmax(), 2) - - for op in ['min', 'max']: - # Return NaT - obj = TimedeltaIndex([]) - self.assertTrue(pd.isnull(getattr(obj, op)())) - - obj = TimedeltaIndex([pd.NaT]) - self.assertTrue(pd.isnull(getattr(obj, op)())) - - obj = TimedeltaIndex([pd.NaT, pd.NaT, pd.NaT]) - self.assertTrue(pd.isnull(getattr(obj, op)())) - - def test_numpy_minmax(self): - dr = pd.date_range(start='2016-01-15', end='2016-01-20') - td = TimedeltaIndex(np.asarray(dr)) - - self.assertEqual(np.min(td), Timedelta('16815 days')) - self.assertEqual(np.max(td), Timedelta('16820 days')) - - errmsg = "the 'out' parameter is not supported" - tm.assertRaisesRegexp(ValueError, errmsg, np.min, td, out=0) - tm.assertRaisesRegexp(ValueError, errmsg, np.max, td, out=0) - - self.assertEqual(np.argmin(td), 0) - self.assertEqual(np.argmax(td), 5) - - if not _np_version_under1p10: - errmsg = "the 'out' parameter is not supported" - tm.assertRaisesRegexp(ValueError, errmsg, np.argmin, td, out=0) - tm.assertRaisesRegexp(ValueError, errmsg, np.argmax, td, out=0) - - def test_round(self): - td = pd.timedelta_range(start='16801 days', periods=5, freq='30Min') - elt = td[1] - - expected_rng = TimedeltaIndex([ - Timedelta('16801 days 00:00:00'), - Timedelta('16801 days 00:00:00'), - Timedelta('16801 days 01:00:00'), - Timedelta('16801 days 02:00:00'), - Timedelta('16801 days 02:00:00'), - ]) - expected_elt = expected_rng[1] - - tm.assert_index_equal(td.round(freq='H'), expected_rng) - self.assertEqual(elt.round(freq='H'), expected_elt) - - msg = pd.tseries.frequencies._INVALID_FREQ_ERROR - with self.assertRaisesRegexp(ValueError, msg): - td.round(freq='foo') - with tm.assertRaisesRegexp(ValueError, msg): - elt.round(freq='foo') - - msg = " is a non-fixed frequency" - tm.assertRaisesRegexp(ValueError, msg, td.round, freq='M') - tm.assertRaisesRegexp(ValueError, msg, elt.round, freq='M') - - def test_representation(self): - idx1 = TimedeltaIndex([], freq='D') - idx2 = TimedeltaIndex(['1 days'], freq='D') - idx3 = TimedeltaIndex(['1 days', '2 days'], freq='D') - idx4 = TimedeltaIndex(['1 days', '2 days', '3 days'], freq='D') - idx5 = TimedeltaIndex(['1 days 00:00:01', '2 days', '3 days']) - - exp1 = """TimedeltaIndex([], dtype='timedelta64[ns]', freq='D')""" - - exp2 = ("TimedeltaIndex(['1 days'], dtype='timedelta64[ns]', " - "freq='D')") - - exp3 = ("TimedeltaIndex(['1 days', '2 days'], " - "dtype='timedelta64[ns]', freq='D')") - - exp4 = ("TimedeltaIndex(['1 days', '2 days', '3 days'], " - "dtype='timedelta64[ns]', freq='D')") - - exp5 = ("TimedeltaIndex(['1 days 00:00:01', '2 days 00:00:00', " - "'3 days 00:00:00'], dtype='timedelta64[ns]', freq=None)") - - with pd.option_context('display.width', 300): - for idx, expected in zip([idx1, idx2, idx3, idx4, idx5], - [exp1, exp2, exp3, exp4, exp5]): - for func in ['__repr__', '__unicode__', '__str__']: - result = getattr(idx, func)() - self.assertEqual(result, expected) - - def test_representation_to_series(self): - idx1 = TimedeltaIndex([], freq='D') - idx2 = TimedeltaIndex(['1 days'], freq='D') - idx3 = TimedeltaIndex(['1 days', '2 days'], freq='D') - idx4 = TimedeltaIndex(['1 days', '2 days', '3 days'], freq='D') - idx5 = TimedeltaIndex(['1 days 00:00:01', '2 days', '3 days']) - - exp1 = """Series([], dtype: timedelta64[ns])""" - - exp2 = """0 1 days -dtype: timedelta64[ns]""" - - exp3 = """0 1 days -1 2 days -dtype: timedelta64[ns]""" - - exp4 = """0 1 days -1 2 days -2 3 days -dtype: timedelta64[ns]""" - - exp5 = """0 1 days 00:00:01 -1 2 days 00:00:00 -2 3 days 00:00:00 -dtype: timedelta64[ns]""" - - with pd.option_context('display.width', 300): - for idx, expected in zip([idx1, idx2, idx3, idx4, idx5], - [exp1, exp2, exp3, exp4, exp5]): - result = repr(pd.Series(idx)) - self.assertEqual(result, expected) - - def test_summary(self): - # GH9116 - idx1 = TimedeltaIndex([], freq='D') - idx2 = TimedeltaIndex(['1 days'], freq='D') - idx3 = TimedeltaIndex(['1 days', '2 days'], freq='D') - idx4 = TimedeltaIndex(['1 days', '2 days', '3 days'], freq='D') - idx5 = TimedeltaIndex(['1 days 00:00:01', '2 days', '3 days']) - - exp1 = """TimedeltaIndex: 0 entries -Freq: D""" - - exp2 = """TimedeltaIndex: 1 entries, 1 days to 1 days -Freq: D""" - - exp3 = """TimedeltaIndex: 2 entries, 1 days to 2 days -Freq: D""" - - exp4 = """TimedeltaIndex: 3 entries, 1 days to 3 days -Freq: D""" - - exp5 = ("TimedeltaIndex: 3 entries, 1 days 00:00:01 to 3 days " - "00:00:00") - - for idx, expected in zip([idx1, idx2, idx3, idx4, idx5], - [exp1, exp2, exp3, exp4, exp5]): - result = idx.summary() - self.assertEqual(result, expected) - - def test_add_iadd(self): - - # only test adding/sub offsets as + is now numeric - - # offset - offsets = [pd.offsets.Hour(2), timedelta(hours=2), - np.timedelta64(2, 'h'), Timedelta(hours=2)] - - for delta in offsets: - rng = timedelta_range('1 days', '10 days') - result = rng + delta - expected = timedelta_range('1 days 02:00:00', '10 days 02:00:00', - freq='D') - tm.assert_index_equal(result, expected) - rng += delta - tm.assert_index_equal(rng, expected) - - # int - rng = timedelta_range('1 days 09:00:00', freq='H', periods=10) - result = rng + 1 - expected = timedelta_range('1 days 10:00:00', freq='H', periods=10) - tm.assert_index_equal(result, expected) - rng += 1 - tm.assert_index_equal(rng, expected) - - def test_sub_isub(self): - # only test adding/sub offsets as - is now numeric - - # offset - offsets = [pd.offsets.Hour(2), timedelta(hours=2), - np.timedelta64(2, 'h'), Timedelta(hours=2)] - - for delta in offsets: - rng = timedelta_range('1 days', '10 days') - result = rng - delta - expected = timedelta_range('0 days 22:00:00', '9 days 22:00:00') - tm.assert_index_equal(result, expected) - rng -= delta - tm.assert_index_equal(rng, expected) - - # int - rng = timedelta_range('1 days 09:00:00', freq='H', periods=10) - result = rng - 1 - expected = timedelta_range('1 days 08:00:00', freq='H', periods=10) - tm.assert_index_equal(result, expected) - rng -= 1 - tm.assert_index_equal(rng, expected) - - idx = TimedeltaIndex(['1 day', '2 day']) - msg = "cannot subtract a datelike from a TimedeltaIndex" - with tm.assertRaisesRegexp(TypeError, msg): - idx - Timestamp('2011-01-01') - - result = Timestamp('2011-01-01') + idx - expected = DatetimeIndex(['2011-01-02', '2011-01-03']) - tm.assert_index_equal(result, expected) - - def test_ops_compat(self): - - offsets = [pd.offsets.Hour(2), timedelta(hours=2), - np.timedelta64(2, 'h'), Timedelta(hours=2)] - - rng = timedelta_range('1 days', '10 days', name='foo') - - # multiply - for offset in offsets: - self.assertRaises(TypeError, lambda: rng * offset) - - # divide - expected = Int64Index((np.arange(10) + 1) * 12, name='foo') - for offset in offsets: - result = rng / offset - tm.assert_index_equal(result, expected, exact=False) - - # divide with nats - rng = TimedeltaIndex(['1 days', pd.NaT, '2 days'], name='foo') - expected = Float64Index([12, np.nan, 24], name='foo') - for offset in offsets: - result = rng / offset - tm.assert_index_equal(result, expected) - - # don't allow division by NaT (make could in the future) - self.assertRaises(TypeError, lambda: rng / pd.NaT) - - def test_subtraction_ops(self): - - # with datetimes/timedelta and tdi/dti - tdi = TimedeltaIndex(['1 days', pd.NaT, '2 days'], name='foo') - dti = date_range('20130101', periods=3, name='bar') - td = Timedelta('1 days') - dt = Timestamp('20130101') - - self.assertRaises(TypeError, lambda: tdi - dt) - self.assertRaises(TypeError, lambda: tdi - dti) - self.assertRaises(TypeError, lambda: td - dt) - self.assertRaises(TypeError, lambda: td - dti) - - result = dt - dti - expected = TimedeltaIndex(['0 days', '-1 days', '-2 days'], name='bar') - tm.assert_index_equal(result, expected) - - result = dti - dt - expected = TimedeltaIndex(['0 days', '1 days', '2 days'], name='bar') - tm.assert_index_equal(result, expected) - - result = tdi - td - expected = TimedeltaIndex(['0 days', pd.NaT, '1 days'], name='foo') - tm.assert_index_equal(result, expected, check_names=False) - - result = td - tdi - expected = TimedeltaIndex(['0 days', pd.NaT, '-1 days'], name='foo') - tm.assert_index_equal(result, expected, check_names=False) - - result = dti - td - expected = DatetimeIndex( - ['20121231', '20130101', '20130102'], name='bar') - tm.assert_index_equal(result, expected, check_names=False) - - result = dt - tdi - expected = DatetimeIndex(['20121231', pd.NaT, '20121230'], name='foo') - tm.assert_index_equal(result, expected) - - def test_subtraction_ops_with_tz(self): - - # check that dt/dti subtraction ops with tz are validated - dti = date_range('20130101', periods=3) - ts = Timestamp('20130101') - dt = ts.to_pydatetime() - dti_tz = date_range('20130101', periods=3).tz_localize('US/Eastern') - ts_tz = Timestamp('20130101').tz_localize('US/Eastern') - ts_tz2 = Timestamp('20130101').tz_localize('CET') - dt_tz = ts_tz.to_pydatetime() - td = Timedelta('1 days') - - def _check(result, expected): - self.assertEqual(result, expected) - self.assertIsInstance(result, Timedelta) - - # scalars - result = ts - ts - expected = Timedelta('0 days') - _check(result, expected) - - result = dt_tz - ts_tz - expected = Timedelta('0 days') - _check(result, expected) - - result = ts_tz - dt_tz - expected = Timedelta('0 days') - _check(result, expected) - - # tz mismatches - self.assertRaises(TypeError, lambda: dt_tz - ts) - self.assertRaises(TypeError, lambda: dt_tz - dt) - self.assertRaises(TypeError, lambda: dt_tz - ts_tz2) - self.assertRaises(TypeError, lambda: dt - dt_tz) - self.assertRaises(TypeError, lambda: ts - dt_tz) - self.assertRaises(TypeError, lambda: ts_tz2 - ts) - self.assertRaises(TypeError, lambda: ts_tz2 - dt) - self.assertRaises(TypeError, lambda: ts_tz - ts_tz2) - - # with dti - self.assertRaises(TypeError, lambda: dti - ts_tz) - self.assertRaises(TypeError, lambda: dti_tz - ts) - self.assertRaises(TypeError, lambda: dti_tz - ts_tz2) - - result = dti_tz - dt_tz - expected = TimedeltaIndex(['0 days', '1 days', '2 days']) - tm.assert_index_equal(result, expected) - - result = dt_tz - dti_tz - expected = TimedeltaIndex(['0 days', '-1 days', '-2 days']) - tm.assert_index_equal(result, expected) - - result = dti_tz - ts_tz - expected = TimedeltaIndex(['0 days', '1 days', '2 days']) - tm.assert_index_equal(result, expected) - - result = ts_tz - dti_tz - expected = TimedeltaIndex(['0 days', '-1 days', '-2 days']) - tm.assert_index_equal(result, expected) - - result = td - td - expected = Timedelta('0 days') - _check(result, expected) - - result = dti_tz - td - expected = DatetimeIndex( - ['20121231', '20130101', '20130102'], tz='US/Eastern') - tm.assert_index_equal(result, expected) - - def test_dti_tdi_numeric_ops(self): - - # These are normally union/diff set-like ops - tdi = TimedeltaIndex(['1 days', pd.NaT, '2 days'], name='foo') - dti = date_range('20130101', periods=3, name='bar') - - # TODO(wesm): unused? - # td = Timedelta('1 days') - # dt = Timestamp('20130101') - - result = tdi - tdi - expected = TimedeltaIndex(['0 days', pd.NaT, '0 days'], name='foo') - tm.assert_index_equal(result, expected) - - result = tdi + tdi - expected = TimedeltaIndex(['2 days', pd.NaT, '4 days'], name='foo') - tm.assert_index_equal(result, expected) - - result = dti - tdi # name will be reset - expected = DatetimeIndex(['20121231', pd.NaT, '20130101']) - tm.assert_index_equal(result, expected) - - def test_sub_period(self): - # GH 13078 - # not supported, check TypeError - p = pd.Period('2011-01-01', freq='D') - - for freq in [None, 'H']: - idx = pd.TimedeltaIndex(['1 hours', '2 hours'], freq=freq) - - with tm.assertRaises(TypeError): - idx - p - - with tm.assertRaises(TypeError): - p - idx - - def test_addition_ops(self): - - # with datetimes/timedelta and tdi/dti - tdi = TimedeltaIndex(['1 days', pd.NaT, '2 days'], name='foo') - dti = date_range('20130101', periods=3, name='bar') - td = Timedelta('1 days') - dt = Timestamp('20130101') - - result = tdi + dt - expected = DatetimeIndex(['20130102', pd.NaT, '20130103'], name='foo') - tm.assert_index_equal(result, expected) - - result = dt + tdi - expected = DatetimeIndex(['20130102', pd.NaT, '20130103'], name='foo') - tm.assert_index_equal(result, expected) - - result = td + tdi - expected = TimedeltaIndex(['2 days', pd.NaT, '3 days'], name='foo') - tm.assert_index_equal(result, expected) - - result = tdi + td - expected = TimedeltaIndex(['2 days', pd.NaT, '3 days'], name='foo') - tm.assert_index_equal(result, expected) - - # unequal length - self.assertRaises(ValueError, lambda: tdi + dti[0:1]) - self.assertRaises(ValueError, lambda: tdi[0:1] + dti) - - # random indexes - self.assertRaises(TypeError, lambda: tdi + Int64Index([1, 2, 3])) - - # this is a union! - # self.assertRaises(TypeError, lambda : Int64Index([1,2,3]) + tdi) - - result = tdi + dti # name will be reset - expected = DatetimeIndex(['20130102', pd.NaT, '20130105']) - tm.assert_index_equal(result, expected) - - result = dti + tdi # name will be reset - expected = DatetimeIndex(['20130102', pd.NaT, '20130105']) - tm.assert_index_equal(result, expected) - - result = dt + td - expected = Timestamp('20130102') - self.assertEqual(result, expected) - - result = td + dt - expected = Timestamp('20130102') - self.assertEqual(result, expected) - - def test_comp_nat(self): - left = pd.TimedeltaIndex([pd.Timedelta('1 days'), pd.NaT, - pd.Timedelta('3 days')]) - right = pd.TimedeltaIndex([pd.NaT, pd.NaT, pd.Timedelta('3 days')]) - - for l, r in [(left, right), (left.asobject, right.asobject)]: - result = l == r - expected = np.array([False, False, True]) - tm.assert_numpy_array_equal(result, expected) - - result = l != r - expected = np.array([True, True, False]) - tm.assert_numpy_array_equal(result, expected) - - expected = np.array([False, False, False]) - tm.assert_numpy_array_equal(l == pd.NaT, expected) - tm.assert_numpy_array_equal(pd.NaT == r, expected) - - expected = np.array([True, True, True]) - tm.assert_numpy_array_equal(l != pd.NaT, expected) - tm.assert_numpy_array_equal(pd.NaT != l, expected) - - expected = np.array([False, False, False]) - tm.assert_numpy_array_equal(l < pd.NaT, expected) - tm.assert_numpy_array_equal(pd.NaT > l, expected) - - def test_value_counts_unique(self): - # GH 7735 - - idx = timedelta_range('1 days 09:00:00', freq='H', periods=10) - # create repeated values, 'n'th element is repeated by n+1 times - idx = TimedeltaIndex(np.repeat(idx.values, range(1, len(idx) + 1))) - - exp_idx = timedelta_range('1 days 18:00:00', freq='-1H', periods=10) - expected = Series(range(10, 0, -1), index=exp_idx, dtype='int64') - - for obj in [idx, Series(idx)]: - tm.assert_series_equal(obj.value_counts(), expected) - - expected = timedelta_range('1 days 09:00:00', freq='H', periods=10) - tm.assert_index_equal(idx.unique(), expected) - - idx = TimedeltaIndex(['1 days 09:00:00', '1 days 09:00:00', - '1 days 09:00:00', '1 days 08:00:00', - '1 days 08:00:00', pd.NaT]) - - exp_idx = TimedeltaIndex(['1 days 09:00:00', '1 days 08:00:00']) - expected = Series([3, 2], index=exp_idx) - - for obj in [idx, Series(idx)]: - tm.assert_series_equal(obj.value_counts(), expected) - - exp_idx = TimedeltaIndex(['1 days 09:00:00', '1 days 08:00:00', - pd.NaT]) - expected = Series([3, 2, 1], index=exp_idx) - - for obj in [idx, Series(idx)]: - tm.assert_series_equal(obj.value_counts(dropna=False), expected) - - tm.assert_index_equal(idx.unique(), exp_idx) - - def test_nonunique_contains(self): - # GH 9512 - for idx in map(TimedeltaIndex, ([0, 1, 0], [0, 0, -1], [0, -1, -1], - ['00:01:00', '00:01:00', '00:02:00'], - ['00:01:00', '00:01:00', '00:00:01'])): - tm.assertIn(idx[0], idx) - - def test_unknown_attribute(self): - # GH 9680 - tdi = pd.timedelta_range(start=0, periods=10, freq='1s') - ts = pd.Series(np.random.normal(size=10), index=tdi) - self.assertNotIn('foo', ts.__dict__.keys()) - self.assertRaises(AttributeError, lambda: ts.foo) - - def test_order(self): - # GH 10295 - idx1 = TimedeltaIndex(['1 day', '2 day', '3 day'], freq='D', - name='idx') - idx2 = TimedeltaIndex( - ['1 hour', '2 hour', '3 hour'], freq='H', name='idx') - - for idx in [idx1, idx2]: - ordered = idx.sort_values() - self.assert_index_equal(ordered, idx) - self.assertEqual(ordered.freq, idx.freq) - - ordered = idx.sort_values(ascending=False) - expected = idx[::-1] - self.assert_index_equal(ordered, expected) - self.assertEqual(ordered.freq, expected.freq) - self.assertEqual(ordered.freq.n, -1) - - ordered, indexer = idx.sort_values(return_indexer=True) - self.assert_index_equal(ordered, idx) - self.assert_numpy_array_equal(indexer, - np.array([0, 1, 2]), - check_dtype=False) - self.assertEqual(ordered.freq, idx.freq) - - ordered, indexer = idx.sort_values(return_indexer=True, - ascending=False) - self.assert_index_equal(ordered, idx[::-1]) - self.assertEqual(ordered.freq, expected.freq) - self.assertEqual(ordered.freq.n, -1) - - idx1 = TimedeltaIndex(['1 hour', '3 hour', '5 hour', - '2 hour ', '1 hour'], name='idx1') - exp1 = TimedeltaIndex(['1 hour', '1 hour', '2 hour', - '3 hour', '5 hour'], name='idx1') - - idx2 = TimedeltaIndex(['1 day', '3 day', '5 day', - '2 day', '1 day'], name='idx2') - - # TODO(wesm): unused? - # exp2 = TimedeltaIndex(['1 day', '1 day', '2 day', - # '3 day', '5 day'], name='idx2') - - # idx3 = TimedeltaIndex([pd.NaT, '3 minute', '5 minute', - # '2 minute', pd.NaT], name='idx3') - # exp3 = TimedeltaIndex([pd.NaT, pd.NaT, '2 minute', '3 minute', - # '5 minute'], name='idx3') - - for idx, expected in [(idx1, exp1), (idx1, exp1), (idx1, exp1)]: - ordered = idx.sort_values() - self.assert_index_equal(ordered, expected) - self.assertIsNone(ordered.freq) - - ordered = idx.sort_values(ascending=False) - self.assert_index_equal(ordered, expected[::-1]) - self.assertIsNone(ordered.freq) - - ordered, indexer = idx.sort_values(return_indexer=True) - self.assert_index_equal(ordered, expected) - - exp = np.array([0, 4, 3, 1, 2]) - self.assert_numpy_array_equal(indexer, exp, check_dtype=False) - self.assertIsNone(ordered.freq) - - ordered, indexer = idx.sort_values(return_indexer=True, - ascending=False) - self.assert_index_equal(ordered, expected[::-1]) - - exp = np.array([2, 1, 3, 4, 0]) - self.assert_numpy_array_equal(indexer, exp, check_dtype=False) - self.assertIsNone(ordered.freq) - - def test_getitem(self): - idx1 = pd.timedelta_range('1 day', '31 day', freq='D', name='idx') - - for idx in [idx1]: - result = idx[0] - self.assertEqual(result, pd.Timedelta('1 day')) - - result = idx[0:5] - expected = pd.timedelta_range('1 day', '5 day', freq='D', - name='idx') - self.assert_index_equal(result, expected) - self.assertEqual(result.freq, expected.freq) - - result = idx[0:10:2] - expected = pd.timedelta_range('1 day', '9 day', freq='2D', - name='idx') - self.assert_index_equal(result, expected) - self.assertEqual(result.freq, expected.freq) - - result = idx[-20:-5:3] - expected = pd.timedelta_range('12 day', '24 day', freq='3D', - name='idx') - self.assert_index_equal(result, expected) - self.assertEqual(result.freq, expected.freq) - - result = idx[4::-1] - expected = TimedeltaIndex(['5 day', '4 day', '3 day', - '2 day', '1 day'], - freq='-1D', name='idx') - self.assert_index_equal(result, expected) - self.assertEqual(result.freq, expected.freq) - - def test_drop_duplicates_metadata(self): - # GH 10115 - idx = pd.timedelta_range('1 day', '31 day', freq='D', name='idx') - result = idx.drop_duplicates() - self.assert_index_equal(idx, result) - self.assertEqual(idx.freq, result.freq) - - idx_dup = idx.append(idx) - self.assertIsNone(idx_dup.freq) # freq is reset - result = idx_dup.drop_duplicates() - self.assert_index_equal(idx, result) - self.assertIsNone(result.freq) - - def test_drop_duplicates(self): - # to check Index/Series compat - base = pd.timedelta_range('1 day', '31 day', freq='D', name='idx') - idx = base.append(base[:5]) - - res = idx.drop_duplicates() - tm.assert_index_equal(res, base) - res = Series(idx).drop_duplicates() - tm.assert_series_equal(res, Series(base)) - - res = idx.drop_duplicates(keep='last') - exp = base[5:].append(base[:5]) - tm.assert_index_equal(res, exp) - res = Series(idx).drop_duplicates(keep='last') - tm.assert_series_equal(res, Series(exp, index=np.arange(5, 36))) - - res = idx.drop_duplicates(keep=False) - tm.assert_index_equal(res, base[5:]) - res = Series(idx).drop_duplicates(keep=False) - tm.assert_series_equal(res, Series(base[5:], index=np.arange(5, 31))) - - def test_take(self): - # GH 10295 - idx1 = pd.timedelta_range('1 day', '31 day', freq='D', name='idx') - - for idx in [idx1]: - result = idx.take([0]) - self.assertEqual(result, pd.Timedelta('1 day')) - - result = idx.take([-1]) - self.assertEqual(result, pd.Timedelta('31 day')) - - result = idx.take([0, 1, 2]) - expected = pd.timedelta_range('1 day', '3 day', freq='D', - name='idx') - self.assert_index_equal(result, expected) - self.assertEqual(result.freq, expected.freq) - - result = idx.take([0, 2, 4]) - expected = pd.timedelta_range('1 day', '5 day', freq='2D', - name='idx') - self.assert_index_equal(result, expected) - self.assertEqual(result.freq, expected.freq) - - result = idx.take([7, 4, 1]) - expected = pd.timedelta_range('8 day', '2 day', freq='-3D', - name='idx') - self.assert_index_equal(result, expected) - self.assertEqual(result.freq, expected.freq) - - result = idx.take([3, 2, 5]) - expected = TimedeltaIndex(['4 day', '3 day', '6 day'], name='idx') - self.assert_index_equal(result, expected) - self.assertIsNone(result.freq) - - result = idx.take([-3, 2, 5]) - expected = TimedeltaIndex(['29 day', '3 day', '6 day'], name='idx') - self.assert_index_equal(result, expected) - self.assertIsNone(result.freq) - - def test_take_invalid_kwargs(self): - idx = pd.timedelta_range('1 day', '31 day', freq='D', name='idx') - indices = [1, 6, 5, 9, 10, 13, 15, 3] - - msg = r"take\(\) got an unexpected keyword argument 'foo'" - tm.assertRaisesRegexp(TypeError, msg, idx.take, - indices, foo=2) - - msg = "the 'out' parameter is not supported" - tm.assertRaisesRegexp(ValueError, msg, idx.take, - indices, out=indices) - - msg = "the 'mode' parameter is not supported" - tm.assertRaisesRegexp(ValueError, msg, idx.take, - indices, mode='clip') - - def test_infer_freq(self): - # GH 11018 - for freq in ['D', '3D', '-3D', 'H', '2H', '-2H', 'T', '2T', 'S', '-3S' - ]: - idx = pd.timedelta_range('1', freq=freq, periods=10) - result = pd.TimedeltaIndex(idx.asi8, freq='infer') - tm.assert_index_equal(idx, result) - self.assertEqual(result.freq, freq) - - def test_nat_new(self): - - idx = pd.timedelta_range('1', freq='D', periods=5, name='x') - result = idx._nat_new() - exp = pd.TimedeltaIndex([pd.NaT] * 5, name='x') - tm.assert_index_equal(result, exp) - - result = idx._nat_new(box=False) - exp = np.array([tslib.iNaT] * 5, dtype=np.int64) - tm.assert_numpy_array_equal(result, exp) - - def test_shift(self): - # GH 9903 - idx = pd.TimedeltaIndex([], name='xxx') - tm.assert_index_equal(idx.shift(0, freq='H'), idx) - tm.assert_index_equal(idx.shift(3, freq='H'), idx) - - idx = pd.TimedeltaIndex(['5 hours', '6 hours', '9 hours'], name='xxx') - tm.assert_index_equal(idx.shift(0, freq='H'), idx) - exp = pd.TimedeltaIndex(['8 hours', '9 hours', '12 hours'], name='xxx') - tm.assert_index_equal(idx.shift(3, freq='H'), exp) - exp = pd.TimedeltaIndex(['2 hours', '3 hours', '6 hours'], name='xxx') - tm.assert_index_equal(idx.shift(-3, freq='H'), exp) - - tm.assert_index_equal(idx.shift(0, freq='T'), idx) - exp = pd.TimedeltaIndex(['05:03:00', '06:03:00', '9:03:00'], - name='xxx') - tm.assert_index_equal(idx.shift(3, freq='T'), exp) - exp = pd.TimedeltaIndex(['04:57:00', '05:57:00', '8:57:00'], - name='xxx') - tm.assert_index_equal(idx.shift(-3, freq='T'), exp) - - def test_repeat(self): - index = pd.timedelta_range('1 days', periods=2, freq='D') - exp = pd.TimedeltaIndex(['1 days', '1 days', '2 days', '2 days']) - for res in [index.repeat(2), np.repeat(index, 2)]: - tm.assert_index_equal(res, exp) - self.assertIsNone(res.freq) - - index = TimedeltaIndex(['1 days', 'NaT', '3 days']) - exp = TimedeltaIndex(['1 days', '1 days', '1 days', - 'NaT', 'NaT', 'NaT', - '3 days', '3 days', '3 days']) - for res in [index.repeat(3), np.repeat(index, 3)]: - tm.assert_index_equal(res, exp) - self.assertIsNone(res.freq) - - def test_nat(self): - self.assertIs(pd.TimedeltaIndex._na_value, pd.NaT) - self.assertIs(pd.TimedeltaIndex([])._na_value, pd.NaT) - - idx = pd.TimedeltaIndex(['1 days', '2 days']) - self.assertTrue(idx._can_hold_na) - - tm.assert_numpy_array_equal(idx._isnan, np.array([False, False])) - self.assertFalse(idx.hasnans) - tm.assert_numpy_array_equal(idx._nan_idxs, - np.array([], dtype=np.intp)) - - idx = pd.TimedeltaIndex(['1 days', 'NaT']) - self.assertTrue(idx._can_hold_na) - - tm.assert_numpy_array_equal(idx._isnan, np.array([False, True])) - self.assertTrue(idx.hasnans) - tm.assert_numpy_array_equal(idx._nan_idxs, - np.array([1], dtype=np.intp)) - - def test_equals(self): - # GH 13107 - idx = pd.TimedeltaIndex(['1 days', '2 days', 'NaT']) - self.assertTrue(idx.equals(idx)) - self.assertTrue(idx.equals(idx.copy())) - self.assertTrue(idx.equals(idx.asobject)) - self.assertTrue(idx.asobject.equals(idx)) - self.assertTrue(idx.asobject.equals(idx.asobject)) - self.assertFalse(idx.equals(list(idx))) - self.assertFalse(idx.equals(pd.Series(idx))) - - idx2 = pd.TimedeltaIndex(['2 days', '1 days', 'NaT']) - self.assertFalse(idx.equals(idx2)) - self.assertFalse(idx.equals(idx2.copy())) - self.assertFalse(idx.equals(idx2.asobject)) - self.assertFalse(idx.asobject.equals(idx2)) - self.assertFalse(idx.asobject.equals(idx2.asobject)) - self.assertFalse(idx.equals(list(idx2))) - self.assertFalse(idx.equals(pd.Series(idx2))) - - class TestPeriodIndexOps(Ops): def setUp(self): diff --git a/pandas/tseries/tests/test_daterange.py b/pandas/tseries/tests/test_daterange.py deleted file mode 100644 index a64882380850b..0000000000000 --- a/pandas/tseries/tests/test_daterange.py +++ /dev/null @@ -1,820 +0,0 @@ -from datetime import datetime -from pandas.compat import range -import numpy as np - -from pandas.core.index import Index -from pandas.tseries.index import DatetimeIndex - -from pandas import Timestamp -from pandas.tseries.offsets import (BDay, BMonthEnd, CDay, MonthEnd, - generate_range, DateOffset, Minute) -from pandas.tseries.index import cdate_range, bdate_range, date_range - -from pandas.core import common as com -from pandas.util.testing import assertRaisesRegexp -import pandas.util.testing as tm - - -def eq_gen_range(kwargs, expected): - rng = generate_range(**kwargs) - assert (np.array_equal(list(rng), expected)) - - -START, END = datetime(2009, 1, 1), datetime(2010, 1, 1) - - -class TestGenRangeGeneration(tm.TestCase): - - def test_generate(self): - rng1 = list(generate_range(START, END, offset=BDay())) - rng2 = list(generate_range(START, END, time_rule='B')) - self.assertEqual(rng1, rng2) - - def test_generate_cday(self): - rng1 = list(generate_range(START, END, offset=CDay())) - rng2 = list(generate_range(START, END, time_rule='C')) - self.assertEqual(rng1, rng2) - - def test_1(self): - eq_gen_range(dict(start=datetime(2009, 3, 25), periods=2), - [datetime(2009, 3, 25), datetime(2009, 3, 26)]) - - def test_2(self): - eq_gen_range(dict(start=datetime(2008, 1, 1), - end=datetime(2008, 1, 3)), - [datetime(2008, 1, 1), - datetime(2008, 1, 2), - datetime(2008, 1, 3)]) - - def test_3(self): - eq_gen_range(dict(start=datetime(2008, 1, 5), - end=datetime(2008, 1, 6)), - []) - - def test_precision_finer_than_offset(self): - # GH 9907 - result1 = DatetimeIndex(start='2015-04-15 00:00:03', - end='2016-04-22 00:00:00', freq='Q') - result2 = DatetimeIndex(start='2015-04-15 00:00:03', - end='2015-06-22 00:00:04', freq='W') - expected1_list = ['2015-06-30 00:00:03', '2015-09-30 00:00:03', - '2015-12-31 00:00:03', '2016-03-31 00:00:03'] - expected2_list = ['2015-04-19 00:00:03', '2015-04-26 00:00:03', - '2015-05-03 00:00:03', '2015-05-10 00:00:03', - '2015-05-17 00:00:03', '2015-05-24 00:00:03', - '2015-05-31 00:00:03', '2015-06-07 00:00:03', - '2015-06-14 00:00:03', '2015-06-21 00:00:03'] - expected1 = DatetimeIndex(expected1_list, dtype='datetime64[ns]', - freq='Q-DEC', tz=None) - expected2 = DatetimeIndex(expected2_list, dtype='datetime64[ns]', - freq='W-SUN', tz=None) - self.assert_index_equal(result1, expected1) - self.assert_index_equal(result2, expected2) - - -class TestDateRange(tm.TestCase): - - def setUp(self): - self.rng = bdate_range(START, END) - - def test_constructor(self): - bdate_range(START, END, freq=BDay()) - bdate_range(START, periods=20, freq=BDay()) - bdate_range(end=START, periods=20, freq=BDay()) - self.assertRaises(ValueError, date_range, '2011-1-1', '2012-1-1', 'B') - self.assertRaises(ValueError, bdate_range, '2011-1-1', '2012-1-1', 'B') - - def test_naive_aware_conflicts(self): - naive = bdate_range(START, END, freq=BDay(), tz=None) - aware = bdate_range(START, END, freq=BDay(), - tz="Asia/Hong_Kong") - assertRaisesRegexp(TypeError, "tz-naive.*tz-aware", naive.join, aware) - assertRaisesRegexp(TypeError, "tz-naive.*tz-aware", aware.join, naive) - - def test_cached_range(self): - DatetimeIndex._cached_range(START, END, offset=BDay()) - DatetimeIndex._cached_range(START, periods=20, offset=BDay()) - DatetimeIndex._cached_range(end=START, periods=20, offset=BDay()) - - assertRaisesRegexp(TypeError, "offset", DatetimeIndex._cached_range, - START, END) - - assertRaisesRegexp(TypeError, "specify period", - DatetimeIndex._cached_range, START, - offset=BDay()) - - assertRaisesRegexp(TypeError, "specify period", - DatetimeIndex._cached_range, end=END, - offset=BDay()) - - assertRaisesRegexp(TypeError, "start or end", - DatetimeIndex._cached_range, periods=20, - offset=BDay()) - - def test_cached_range_bug(self): - rng = date_range('2010-09-01 05:00:00', periods=50, - freq=DateOffset(hours=6)) - self.assertEqual(len(rng), 50) - self.assertEqual(rng[0], datetime(2010, 9, 1, 5)) - - def test_timezone_comparaison_bug(self): - start = Timestamp('20130220 10:00', tz='US/Eastern') - try: - date_range(start, periods=2, tz='US/Eastern') - except AssertionError: - self.fail() - - def test_timezone_comparaison_assert(self): - start = Timestamp('20130220 10:00', tz='US/Eastern') - self.assertRaises(AssertionError, date_range, start, periods=2, - tz='Europe/Berlin') - - def test_comparison(self): - d = self.rng[10] - - comp = self.rng > d - self.assertTrue(comp[11]) - self.assertFalse(comp[9]) - - def test_copy(self): - cp = self.rng.copy() - repr(cp) - self.assert_index_equal(cp, self.rng) - - def test_repr(self): - # only really care that it works - repr(self.rng) - - def test_getitem(self): - smaller = self.rng[:5] - exp = DatetimeIndex(self.rng.view(np.ndarray)[:5]) - self.assert_index_equal(smaller, exp) - - self.assertEqual(smaller.offset, self.rng.offset) - - sliced = self.rng[::5] - self.assertEqual(sliced.offset, BDay() * 5) - - fancy_indexed = self.rng[[4, 3, 2, 1, 0]] - self.assertEqual(len(fancy_indexed), 5) - tm.assertIsInstance(fancy_indexed, DatetimeIndex) - self.assertIsNone(fancy_indexed.freq) - - # 32-bit vs. 64-bit platforms - self.assertEqual(self.rng[4], self.rng[np.int_(4)]) - - def test_getitem_matplotlib_hackaround(self): - values = self.rng[:, None] - expected = self.rng.values[:, None] - self.assert_numpy_array_equal(values, expected) - - def test_shift(self): - shifted = self.rng.shift(5) - self.assertEqual(shifted[0], self.rng[5]) - self.assertEqual(shifted.offset, self.rng.offset) - - shifted = self.rng.shift(-5) - self.assertEqual(shifted[5], self.rng[0]) - self.assertEqual(shifted.offset, self.rng.offset) - - shifted = self.rng.shift(0) - self.assertEqual(shifted[0], self.rng[0]) - self.assertEqual(shifted.offset, self.rng.offset) - - rng = date_range(START, END, freq=BMonthEnd()) - shifted = rng.shift(1, freq=BDay()) - self.assertEqual(shifted[0], rng[0] + BDay()) - - def test_pickle_unpickle(self): - unpickled = self.round_trip_pickle(self.rng) - self.assertIsNotNone(unpickled.offset) - - def test_union(self): - # overlapping - left = self.rng[:10] - right = self.rng[5:10] - - the_union = left.union(right) - tm.assertIsInstance(the_union, DatetimeIndex) - - # non-overlapping, gap in middle - left = self.rng[:5] - right = self.rng[10:] - - the_union = left.union(right) - tm.assertIsInstance(the_union, Index) - - # non-overlapping, no gap - left = self.rng[:5] - right = self.rng[5:10] - - the_union = left.union(right) - tm.assertIsInstance(the_union, DatetimeIndex) - - # order does not matter - tm.assert_index_equal(right.union(left), the_union) - - # overlapping, but different offset - rng = date_range(START, END, freq=BMonthEnd()) - - the_union = self.rng.union(rng) - tm.assertIsInstance(the_union, DatetimeIndex) - - def test_outer_join(self): - # should just behave as union - - # overlapping - left = self.rng[:10] - right = self.rng[5:10] - - the_join = left.join(right, how='outer') - tm.assertIsInstance(the_join, DatetimeIndex) - - # non-overlapping, gap in middle - left = self.rng[:5] - right = self.rng[10:] - - the_join = left.join(right, how='outer') - tm.assertIsInstance(the_join, DatetimeIndex) - self.assertIsNone(the_join.freq) - - # non-overlapping, no gap - left = self.rng[:5] - right = self.rng[5:10] - - the_join = left.join(right, how='outer') - tm.assertIsInstance(the_join, DatetimeIndex) - - # overlapping, but different offset - rng = date_range(START, END, freq=BMonthEnd()) - - the_join = self.rng.join(rng, how='outer') - tm.assertIsInstance(the_join, DatetimeIndex) - self.assertIsNone(the_join.freq) - - def test_union_not_cacheable(self): - rng = date_range('1/1/2000', periods=50, freq=Minute()) - rng1 = rng[10:] - rng2 = rng[:25] - the_union = rng1.union(rng2) - self.assert_index_equal(the_union, rng) - - rng1 = rng[10:] - rng2 = rng[15:35] - the_union = rng1.union(rng2) - expected = rng[10:] - self.assert_index_equal(the_union, expected) - - def test_intersection(self): - rng = date_range('1/1/2000', periods=50, freq=Minute()) - rng1 = rng[10:] - rng2 = rng[:25] - the_int = rng1.intersection(rng2) - expected = rng[10:25] - self.assert_index_equal(the_int, expected) - tm.assertIsInstance(the_int, DatetimeIndex) - self.assertEqual(the_int.offset, rng.offset) - - the_int = rng1.intersection(rng2.view(DatetimeIndex)) - self.assert_index_equal(the_int, expected) - - # non-overlapping - the_int = rng[:10].intersection(rng[10:]) - expected = DatetimeIndex([]) - self.assert_index_equal(the_int, expected) - - def test_intersection_bug(self): - # GH #771 - a = bdate_range('11/30/2011', '12/31/2011') - b = bdate_range('12/10/2011', '12/20/2011') - result = a.intersection(b) - self.assert_index_equal(result, b) - - def test_summary(self): - self.rng.summary() - self.rng[2:2].summary() - - def test_summary_pytz(self): - tm._skip_if_no_pytz() - import pytz - bdate_range('1/1/2005', '1/1/2009', tz=pytz.utc).summary() - - def test_summary_dateutil(self): - tm._skip_if_no_dateutil() - import dateutil - bdate_range('1/1/2005', '1/1/2009', tz=dateutil.tz.tzutc()).summary() - - def test_misc(self): - end = datetime(2009, 5, 13) - dr = bdate_range(end=end, periods=20) - firstDate = end - 19 * BDay() - - assert len(dr) == 20 - assert dr[0] == firstDate - assert dr[-1] == end - - def test_date_parse_failure(self): - badly_formed_date = '2007/100/1' - - self.assertRaises(ValueError, Timestamp, badly_formed_date) - - self.assertRaises(ValueError, bdate_range, start=badly_formed_date, - periods=10) - self.assertRaises(ValueError, bdate_range, end=badly_formed_date, - periods=10) - self.assertRaises(ValueError, bdate_range, badly_formed_date, - badly_formed_date) - - def test_equals(self): - self.assertFalse(self.rng.equals(list(self.rng))) - - def test_identical(self): - t1 = self.rng.copy() - t2 = self.rng.copy() - self.assertTrue(t1.identical(t2)) - - # name - t1 = t1.rename('foo') - self.assertTrue(t1.equals(t2)) - self.assertFalse(t1.identical(t2)) - t2 = t2.rename('foo') - self.assertTrue(t1.identical(t2)) - - # freq - t2v = Index(t2.values) - self.assertTrue(t1.equals(t2v)) - self.assertFalse(t1.identical(t2v)) - - def test_daterange_bug_456(self): - # GH #456 - rng1 = bdate_range('12/5/2011', '12/5/2011') - rng2 = bdate_range('12/2/2011', '12/5/2011') - rng2.offset = BDay() - - result = rng1.union(rng2) - tm.assertIsInstance(result, DatetimeIndex) - - def test_error_with_zero_monthends(self): - self.assertRaises(ValueError, date_range, '1/1/2000', '1/1/2001', - freq=MonthEnd(0)) - - def test_range_bug(self): - # GH #770 - offset = DateOffset(months=3) - result = date_range("2011-1-1", "2012-1-31", freq=offset) - - start = datetime(2011, 1, 1) - exp_values = [start + i * offset for i in range(5)] - tm.assert_index_equal(result, DatetimeIndex(exp_values)) - - def test_range_tz_pytz(self): - # GH 2906 - tm._skip_if_no_pytz() - from pytz import timezone - - tz = timezone('US/Eastern') - start = tz.localize(datetime(2011, 1, 1)) - end = tz.localize(datetime(2011, 1, 3)) - - dr = date_range(start=start, periods=3) - self.assertEqual(dr.tz.zone, tz.zone) - self.assertEqual(dr[0], start) - self.assertEqual(dr[2], end) - - dr = date_range(end=end, periods=3) - self.assertEqual(dr.tz.zone, tz.zone) - self.assertEqual(dr[0], start) - self.assertEqual(dr[2], end) - - dr = date_range(start=start, end=end) - self.assertEqual(dr.tz.zone, tz.zone) - self.assertEqual(dr[0], start) - self.assertEqual(dr[2], end) - - def test_range_tz_dst_straddle_pytz(self): - - tm._skip_if_no_pytz() - from pytz import timezone - tz = timezone('US/Eastern') - dates = [(tz.localize(datetime(2014, 3, 6)), - tz.localize(datetime(2014, 3, 12))), - (tz.localize(datetime(2013, 11, 1)), - tz.localize(datetime(2013, 11, 6)))] - for (start, end) in dates: - dr = date_range(start, end, freq='D') - self.assertEqual(dr[0], start) - self.assertEqual(dr[-1], end) - self.assertEqual(np.all(dr.hour == 0), True) - - dr = date_range(start, end, freq='D', tz='US/Eastern') - self.assertEqual(dr[0], start) - self.assertEqual(dr[-1], end) - self.assertEqual(np.all(dr.hour == 0), True) - - dr = date_range(start.replace(tzinfo=None), end.replace( - tzinfo=None), freq='D', tz='US/Eastern') - self.assertEqual(dr[0], start) - self.assertEqual(dr[-1], end) - self.assertEqual(np.all(dr.hour == 0), True) - - def test_range_tz_dateutil(self): - # GH 2906 - tm._skip_if_no_dateutil() - # Use maybe_get_tz to fix filename in tz under dateutil. - from pandas.tslib import maybe_get_tz - tz = lambda x: maybe_get_tz('dateutil/' + x) - - start = datetime(2011, 1, 1, tzinfo=tz('US/Eastern')) - end = datetime(2011, 1, 3, tzinfo=tz('US/Eastern')) - - dr = date_range(start=start, periods=3) - self.assertTrue(dr.tz == tz('US/Eastern')) - self.assertTrue(dr[0] == start) - self.assertTrue(dr[2] == end) - - dr = date_range(end=end, periods=3) - self.assertTrue(dr.tz == tz('US/Eastern')) - self.assertTrue(dr[0] == start) - self.assertTrue(dr[2] == end) - - dr = date_range(start=start, end=end) - self.assertTrue(dr.tz == tz('US/Eastern')) - self.assertTrue(dr[0] == start) - self.assertTrue(dr[2] == end) - - def test_month_range_union_tz_pytz(self): - tm._skip_if_no_pytz() - from pytz import timezone - tz = timezone('US/Eastern') - - early_start = datetime(2011, 1, 1) - early_end = datetime(2011, 3, 1) - - late_start = datetime(2011, 3, 1) - late_end = datetime(2011, 5, 1) - - early_dr = date_range(start=early_start, end=early_end, tz=tz, - freq=MonthEnd()) - late_dr = date_range(start=late_start, end=late_end, tz=tz, - freq=MonthEnd()) - - early_dr.union(late_dr) - - def test_month_range_union_tz_dateutil(self): - tm._skip_if_windows_python_3() - tm._skip_if_no_dateutil() - from pandas.tslib import _dateutil_gettz as timezone - tz = timezone('US/Eastern') - - early_start = datetime(2011, 1, 1) - early_end = datetime(2011, 3, 1) - - late_start = datetime(2011, 3, 1) - late_end = datetime(2011, 5, 1) - - early_dr = date_range(start=early_start, end=early_end, tz=tz, - freq=MonthEnd()) - late_dr = date_range(start=late_start, end=late_end, tz=tz, - freq=MonthEnd()) - - early_dr.union(late_dr) - - def test_range_closed(self): - begin = datetime(2011, 1, 1) - end = datetime(2014, 1, 1) - - for freq in ["1D", "3D", "2M", "7W", "3H", "A"]: - closed = date_range(begin, end, closed=None, freq=freq) - left = date_range(begin, end, closed="left", freq=freq) - right = date_range(begin, end, closed="right", freq=freq) - expected_left = left - expected_right = right - - if end == closed[-1]: - expected_left = closed[:-1] - if begin == closed[0]: - expected_right = closed[1:] - - self.assert_index_equal(expected_left, left) - self.assert_index_equal(expected_right, right) - - def test_range_closed_with_tz_aware_start_end(self): - # GH12409, GH12684 - begin = Timestamp('2011/1/1', tz='US/Eastern') - end = Timestamp('2014/1/1', tz='US/Eastern') - - for freq in ["1D", "3D", "2M", "7W", "3H", "A"]: - closed = date_range(begin, end, closed=None, freq=freq) - left = date_range(begin, end, closed="left", freq=freq) - right = date_range(begin, end, closed="right", freq=freq) - expected_left = left - expected_right = right - - if end == closed[-1]: - expected_left = closed[:-1] - if begin == closed[0]: - expected_right = closed[1:] - - self.assert_index_equal(expected_left, left) - self.assert_index_equal(expected_right, right) - - begin = Timestamp('2011/1/1') - end = Timestamp('2014/1/1') - begintz = Timestamp('2011/1/1', tz='US/Eastern') - endtz = Timestamp('2014/1/1', tz='US/Eastern') - - for freq in ["1D", "3D", "2M", "7W", "3H", "A"]: - closed = date_range(begin, end, closed=None, freq=freq, - tz='US/Eastern') - left = date_range(begin, end, closed="left", freq=freq, - tz='US/Eastern') - right = date_range(begin, end, closed="right", freq=freq, - tz='US/Eastern') - expected_left = left - expected_right = right - - if endtz == closed[-1]: - expected_left = closed[:-1] - if begintz == closed[0]: - expected_right = closed[1:] - - self.assert_index_equal(expected_left, left) - self.assert_index_equal(expected_right, right) - - def test_range_closed_boundary(self): - # GH 11804 - for closed in ['right', 'left', None]: - right_boundary = date_range('2015-09-12', '2015-12-01', - freq='QS-MAR', closed=closed) - left_boundary = date_range('2015-09-01', '2015-09-12', - freq='QS-MAR', closed=closed) - both_boundary = date_range('2015-09-01', '2015-12-01', - freq='QS-MAR', closed=closed) - expected_right = expected_left = expected_both = both_boundary - - if closed == 'right': - expected_left = both_boundary[1:] - if closed == 'left': - expected_right = both_boundary[:-1] - if closed is None: - expected_right = both_boundary[1:] - expected_left = both_boundary[:-1] - - self.assert_index_equal(right_boundary, expected_right) - self.assert_index_equal(left_boundary, expected_left) - self.assert_index_equal(both_boundary, expected_both) - - def test_years_only(self): - # GH 6961 - dr = date_range('2014', '2015', freq='M') - self.assertEqual(dr[0], datetime(2014, 1, 31)) - self.assertEqual(dr[-1], datetime(2014, 12, 31)) - - def test_freq_divides_end_in_nanos(self): - # GH 10885 - result_1 = date_range('2005-01-12 10:00', '2005-01-12 16:00', - freq='345min') - result_2 = date_range('2005-01-13 10:00', '2005-01-13 16:00', - freq='345min') - expected_1 = DatetimeIndex(['2005-01-12 10:00:00', - '2005-01-12 15:45:00'], - dtype='datetime64[ns]', freq='345T', - tz=None) - expected_2 = DatetimeIndex(['2005-01-13 10:00:00', - '2005-01-13 15:45:00'], - dtype='datetime64[ns]', freq='345T', - tz=None) - self.assert_index_equal(result_1, expected_1) - self.assert_index_equal(result_2, expected_2) - - -class TestCustomDateRange(tm.TestCase): - - def setUp(self): - self.rng = cdate_range(START, END) - - def test_constructor(self): - cdate_range(START, END, freq=CDay()) - cdate_range(START, periods=20, freq=CDay()) - cdate_range(end=START, periods=20, freq=CDay()) - self.assertRaises(ValueError, date_range, '2011-1-1', '2012-1-1', 'C') - self.assertRaises(ValueError, cdate_range, '2011-1-1', '2012-1-1', 'C') - - def test_cached_range(self): - DatetimeIndex._cached_range(START, END, offset=CDay()) - DatetimeIndex._cached_range(START, periods=20, - offset=CDay()) - DatetimeIndex._cached_range(end=START, periods=20, - offset=CDay()) - - self.assertRaises(Exception, DatetimeIndex._cached_range, START, END) - - self.assertRaises(Exception, DatetimeIndex._cached_range, START, - freq=CDay()) - - self.assertRaises(Exception, DatetimeIndex._cached_range, end=END, - freq=CDay()) - - self.assertRaises(Exception, DatetimeIndex._cached_range, periods=20, - freq=CDay()) - - def test_comparison(self): - d = self.rng[10] - - comp = self.rng > d - self.assertTrue(comp[11]) - self.assertFalse(comp[9]) - - def test_copy(self): - cp = self.rng.copy() - repr(cp) - self.assert_index_equal(cp, self.rng) - - def test_repr(self): - # only really care that it works - repr(self.rng) - - def test_getitem(self): - smaller = self.rng[:5] - exp = DatetimeIndex(self.rng.view(np.ndarray)[:5]) - self.assert_index_equal(smaller, exp) - self.assertEqual(smaller.offset, self.rng.offset) - - sliced = self.rng[::5] - self.assertEqual(sliced.offset, CDay() * 5) - - fancy_indexed = self.rng[[4, 3, 2, 1, 0]] - self.assertEqual(len(fancy_indexed), 5) - tm.assertIsInstance(fancy_indexed, DatetimeIndex) - self.assertIsNone(fancy_indexed.freq) - - # 32-bit vs. 64-bit platforms - self.assertEqual(self.rng[4], self.rng[np.int_(4)]) - - def test_getitem_matplotlib_hackaround(self): - values = self.rng[:, None] - expected = self.rng.values[:, None] - self.assert_numpy_array_equal(values, expected) - - def test_shift(self): - - shifted = self.rng.shift(5) - self.assertEqual(shifted[0], self.rng[5]) - self.assertEqual(shifted.offset, self.rng.offset) - - shifted = self.rng.shift(-5) - self.assertEqual(shifted[5], self.rng[0]) - self.assertEqual(shifted.offset, self.rng.offset) - - shifted = self.rng.shift(0) - self.assertEqual(shifted[0], self.rng[0]) - self.assertEqual(shifted.offset, self.rng.offset) - - with tm.assert_produces_warning(com.PerformanceWarning): - rng = date_range(START, END, freq=BMonthEnd()) - shifted = rng.shift(1, freq=CDay()) - self.assertEqual(shifted[0], rng[0] + CDay()) - - def test_pickle_unpickle(self): - unpickled = self.round_trip_pickle(self.rng) - self.assertIsNotNone(unpickled.offset) - - def test_union(self): - # overlapping - left = self.rng[:10] - right = self.rng[5:10] - - the_union = left.union(right) - tm.assertIsInstance(the_union, DatetimeIndex) - - # non-overlapping, gap in middle - left = self.rng[:5] - right = self.rng[10:] - - the_union = left.union(right) - tm.assertIsInstance(the_union, Index) - - # non-overlapping, no gap - left = self.rng[:5] - right = self.rng[5:10] - - the_union = left.union(right) - tm.assertIsInstance(the_union, DatetimeIndex) - - # order does not matter - self.assert_index_equal(right.union(left), the_union) - - # overlapping, but different offset - rng = date_range(START, END, freq=BMonthEnd()) - - the_union = self.rng.union(rng) - tm.assertIsInstance(the_union, DatetimeIndex) - - def test_outer_join(self): - # should just behave as union - - # overlapping - left = self.rng[:10] - right = self.rng[5:10] - - the_join = left.join(right, how='outer') - tm.assertIsInstance(the_join, DatetimeIndex) - - # non-overlapping, gap in middle - left = self.rng[:5] - right = self.rng[10:] - - the_join = left.join(right, how='outer') - tm.assertIsInstance(the_join, DatetimeIndex) - self.assertIsNone(the_join.freq) - - # non-overlapping, no gap - left = self.rng[:5] - right = self.rng[5:10] - - the_join = left.join(right, how='outer') - tm.assertIsInstance(the_join, DatetimeIndex) - - # overlapping, but different offset - rng = date_range(START, END, freq=BMonthEnd()) - - the_join = self.rng.join(rng, how='outer') - tm.assertIsInstance(the_join, DatetimeIndex) - self.assertIsNone(the_join.freq) - - def test_intersection_bug(self): - # GH #771 - a = cdate_range('11/30/2011', '12/31/2011') - b = cdate_range('12/10/2011', '12/20/2011') - result = a.intersection(b) - self.assert_index_equal(result, b) - - def test_summary(self): - self.rng.summary() - self.rng[2:2].summary() - - def test_summary_pytz(self): - tm._skip_if_no_pytz() - import pytz - cdate_range('1/1/2005', '1/1/2009', tz=pytz.utc).summary() - - def test_summary_dateutil(self): - tm._skip_if_no_dateutil() - import dateutil - cdate_range('1/1/2005', '1/1/2009', tz=dateutil.tz.tzutc()).summary() - - def test_misc(self): - end = datetime(2009, 5, 13) - dr = cdate_range(end=end, periods=20) - firstDate = end - 19 * CDay() - - assert len(dr) == 20 - assert dr[0] == firstDate - assert dr[-1] == end - - def test_date_parse_failure(self): - badly_formed_date = '2007/100/1' - - self.assertRaises(ValueError, Timestamp, badly_formed_date) - - self.assertRaises(ValueError, cdate_range, start=badly_formed_date, - periods=10) - self.assertRaises(ValueError, cdate_range, end=badly_formed_date, - periods=10) - self.assertRaises(ValueError, cdate_range, badly_formed_date, - badly_formed_date) - - def test_equals(self): - self.assertFalse(self.rng.equals(list(self.rng))) - - def test_daterange_bug_456(self): - # GH #456 - rng1 = cdate_range('12/5/2011', '12/5/2011') - rng2 = cdate_range('12/2/2011', '12/5/2011') - rng2.offset = CDay() - - result = rng1.union(rng2) - tm.assertIsInstance(result, DatetimeIndex) - - def test_cdaterange(self): - rng = cdate_range('2013-05-01', periods=3) - xp = DatetimeIndex(['2013-05-01', '2013-05-02', '2013-05-03']) - self.assert_index_equal(xp, rng) - - def test_cdaterange_weekmask(self): - rng = cdate_range('2013-05-01', periods=3, - weekmask='Sun Mon Tue Wed Thu') - xp = DatetimeIndex(['2013-05-01', '2013-05-02', '2013-05-05']) - self.assert_index_equal(xp, rng) - - def test_cdaterange_holidays(self): - rng = cdate_range('2013-05-01', periods=3, holidays=['2013-05-01']) - xp = DatetimeIndex(['2013-05-02', '2013-05-03', '2013-05-06']) - self.assert_index_equal(xp, rng) - - def test_cdaterange_weekmask_and_holidays(self): - rng = cdate_range('2013-05-01', periods=3, - weekmask='Sun Mon Tue Wed Thu', - holidays=['2013-05-01']) - xp = DatetimeIndex(['2013-05-02', '2013-05-05', '2013-05-06']) - self.assert_index_equal(xp, rng) diff --git a/pandas/tseries/tests/test_period.py b/pandas/tseries/tests/test_period.py index a39830b6aede6..3459da9d2b5c5 100644 --- a/pandas/tseries/tests/test_period.py +++ b/pandas/tseries/tests/test_period.py @@ -6,27 +6,25 @@ """ +import numpy as np +from numpy.random import randn from datetime import datetime, date, timedelta -from pandas import Timestamp, _period -from pandas.tseries.frequencies import MONTHS, DAYS, _period_code_map -from pandas.tseries.period import Period, PeriodIndex, period_range -from pandas.tseries.index import DatetimeIndex, date_range, Index -from pandas.tseries.tools import to_datetime +import pandas as pd +import pandas.util.testing as tm import pandas.tseries.period as period import pandas.tseries.offsets as offsets - -import pandas as pd -import numpy as np -from numpy.random import randn +from pandas.tseries.tools import to_datetime +from pandas.tseries.period import Period, PeriodIndex, period_range +from pandas.tseries.index import DatetimeIndex, date_range, Index +from pandas._period import period_ordinal, period_asfreq from pandas.compat import range, lrange, lmap, zip, text_type, PY3, iteritems from pandas.compat.numpy import np_datetime64_compat - -from pandas import (Series, DataFrame, +from pandas.tseries.frequencies import (MONTHS, DAYS, _period_code_map, + get_freq) +from pandas import (Series, DataFrame, Timestamp, _period, tslib, _np_version_under1p9, _np_version_under1p10, _np_version_under1p12) -from pandas import tslib -import pandas.util.testing as tm class TestPeriodProperties(tm.TestCase): @@ -4970,3 +4968,98 @@ def test_get_period_field_raises_on_out_of_range(self): def test_get_period_field_array_raises_on_out_of_range(self): self.assertRaises(ValueError, _period.get_period_field_arr, -1, np.empty(1), 0) + + +class TestTslib(tm.TestCase): + def test_intraday_conversion_factors(self): + self.assertEqual(period_asfreq( + 1, get_freq('D'), get_freq('H'), False), 24) + self.assertEqual(period_asfreq( + 1, get_freq('D'), get_freq('T'), False), 1440) + self.assertEqual(period_asfreq( + 1, get_freq('D'), get_freq('S'), False), 86400) + self.assertEqual(period_asfreq(1, get_freq( + 'D'), get_freq('L'), False), 86400000) + self.assertEqual(period_asfreq(1, get_freq( + 'D'), get_freq('U'), False), 86400000000) + self.assertEqual(period_asfreq(1, get_freq( + 'D'), get_freq('N'), False), 86400000000000) + + self.assertEqual(period_asfreq( + 1, get_freq('H'), get_freq('T'), False), 60) + self.assertEqual(period_asfreq( + 1, get_freq('H'), get_freq('S'), False), 3600) + self.assertEqual(period_asfreq(1, get_freq('H'), + get_freq('L'), False), 3600000) + self.assertEqual(period_asfreq(1, get_freq( + 'H'), get_freq('U'), False), 3600000000) + self.assertEqual(period_asfreq(1, get_freq( + 'H'), get_freq('N'), False), 3600000000000) + + self.assertEqual(period_asfreq( + 1, get_freq('T'), get_freq('S'), False), 60) + self.assertEqual(period_asfreq( + 1, get_freq('T'), get_freq('L'), False), 60000) + self.assertEqual(period_asfreq(1, get_freq( + 'T'), get_freq('U'), False), 60000000) + self.assertEqual(period_asfreq(1, get_freq( + 'T'), get_freq('N'), False), 60000000000) + + self.assertEqual(period_asfreq( + 1, get_freq('S'), get_freq('L'), False), 1000) + self.assertEqual(period_asfreq(1, get_freq('S'), + get_freq('U'), False), 1000000) + self.assertEqual(period_asfreq(1, get_freq( + 'S'), get_freq('N'), False), 1000000000) + + self.assertEqual(period_asfreq( + 1, get_freq('L'), get_freq('U'), False), 1000) + self.assertEqual(period_asfreq(1, get_freq('L'), + get_freq('N'), False), 1000000) + + self.assertEqual(period_asfreq( + 1, get_freq('U'), get_freq('N'), False), 1000) + + def test_period_ordinal_start_values(self): + # information for 1.1.1970 + self.assertEqual(0, period_ordinal(1970, 1, 1, 0, 0, 0, 0, 0, + get_freq('A'))) + self.assertEqual(0, period_ordinal(1970, 1, 1, 0, 0, 0, 0, 0, + get_freq('M'))) + self.assertEqual(1, period_ordinal(1970, 1, 1, 0, 0, 0, 0, 0, + get_freq('W'))) + self.assertEqual(0, period_ordinal(1970, 1, 1, 0, 0, 0, 0, 0, + get_freq('D'))) + self.assertEqual(0, period_ordinal(1970, 1, 1, 0, 0, 0, 0, 0, + get_freq('B'))) + + def test_period_ordinal_week(self): + self.assertEqual(1, period_ordinal(1970, 1, 4, 0, 0, 0, 0, 0, + get_freq('W'))) + self.assertEqual(2, period_ordinal(1970, 1, 5, 0, 0, 0, 0, 0, + get_freq('W'))) + + self.assertEqual(2284, period_ordinal(2013, 10, 6, 0, 0, 0, 0, 0, + get_freq('W'))) + self.assertEqual(2285, period_ordinal(2013, 10, 7, 0, 0, 0, 0, 0, + get_freq('W'))) + + def test_period_ordinal_business_day(self): + # Thursday + self.assertEqual(11415, period_ordinal(2013, 10, 3, 0, 0, 0, 0, 0, + get_freq('B'))) + # Friday + self.assertEqual(11416, period_ordinal(2013, 10, 4, 0, 0, 0, 0, 0, + get_freq('B'))) + # Saturday + self.assertEqual(11417, period_ordinal(2013, 10, 5, 0, 0, 0, 0, 0, + get_freq('B'))) + # Sunday + self.assertEqual(11417, period_ordinal(2013, 10, 6, 0, 0, 0, 0, 0, + get_freq('B'))) + # Monday + self.assertEqual(11417, period_ordinal(2013, 10, 7, 0, 0, 0, 0, 0, + get_freq('B'))) + # Tuesday + self.assertEqual(11418, period_ordinal(2013, 10, 8, 0, 0, 0, 0, 0, + get_freq('B'))) diff --git a/pandas/tseries/tests/test_timedeltas.py b/pandas/tseries/tests/test_timedeltas.py deleted file mode 100644 index 170d5cdafa60b..0000000000000 --- a/pandas/tseries/tests/test_timedeltas.py +++ /dev/null @@ -1,2051 +0,0 @@ -# pylint: disable-msg=E1101,W0612 - -from __future__ import division -from datetime import timedelta, time - -from distutils.version import LooseVersion -import numpy as np -import pandas as pd - -from pandas import (Index, Series, DataFrame, Timestamp, Timedelta, - TimedeltaIndex, isnull, date_range, - timedelta_range, Int64Index) -from pandas.compat import range -from pandas import compat, to_timedelta, tslib -from pandas.tseries.timedeltas import _coerce_scalar_to_timedelta_type as ct -from pandas.util.testing import (assert_series_equal, assert_frame_equal, - assert_almost_equal, assert_index_equal) -from pandas.tseries.offsets import Day, Second -import pandas.util.testing as tm -from numpy.random import randn -from pandas import _np_version_under1p8 - -iNaT = tslib.iNaT - - -class TestTimedeltas(tm.TestCase): - - def setUp(self): - pass - - def test_get_loc_nat(self): - tidx = TimedeltaIndex(['1 days 01:00:00', 'NaT', '2 days 01:00:00']) - - self.assertEqual(tidx.get_loc(pd.NaT), 1) - self.assertEqual(tidx.get_loc(None), 1) - self.assertEqual(tidx.get_loc(float('nan')), 1) - self.assertEqual(tidx.get_loc(np.nan), 1) - - def test_contains(self): - # Checking for any NaT-like objects - # GH 13603 - td = to_timedelta(range(5), unit='d') + pd.offsets.Hour(1) - for v in [pd.NaT, None, float('nan'), np.nan]: - self.assertFalse((v in td)) - - td = to_timedelta([pd.NaT]) - for v in [pd.NaT, None, float('nan'), np.nan]: - self.assertTrue((v in td)) - - def test_construction(self): - - expected = np.timedelta64(10, 'D').astype('m8[ns]').view('i8') - self.assertEqual(Timedelta(10, unit='d').value, expected) - self.assertEqual(Timedelta(10.0, unit='d').value, expected) - self.assertEqual(Timedelta('10 days').value, expected) - self.assertEqual(Timedelta(days=10).value, expected) - self.assertEqual(Timedelta(days=10.0).value, expected) - - expected += np.timedelta64(10, 's').astype('m8[ns]').view('i8') - self.assertEqual(Timedelta('10 days 00:00:10').value, expected) - self.assertEqual(Timedelta(days=10, seconds=10).value, expected) - self.assertEqual( - Timedelta(days=10, milliseconds=10 * 1000).value, expected) - self.assertEqual( - Timedelta(days=10, microseconds=10 * 1000 * 1000).value, expected) - - # test construction with np dtypes - # GH 8757 - timedelta_kwargs = {'days': 'D', - 'seconds': 's', - 'microseconds': 'us', - 'milliseconds': 'ms', - 'minutes': 'm', - 'hours': 'h', - 'weeks': 'W'} - npdtypes = [np.int64, np.int32, np.int16, np.float64, np.float32, - np.float16] - for npdtype in npdtypes: - for pykwarg, npkwarg in timedelta_kwargs.items(): - expected = np.timedelta64(1, - npkwarg).astype('m8[ns]').view('i8') - self.assertEqual( - Timedelta(**{pykwarg: npdtype(1)}).value, expected) - - # rounding cases - self.assertEqual(Timedelta(82739999850000).value, 82739999850000) - self.assertTrue('0 days 22:58:59.999850' in str(Timedelta( - 82739999850000))) - self.assertEqual(Timedelta(123072001000000).value, 123072001000000) - self.assertTrue('1 days 10:11:12.001' in str(Timedelta( - 123072001000000))) - - # string conversion with/without leading zero - # GH 9570 - self.assertEqual(Timedelta('0:00:00'), timedelta(hours=0)) - self.assertEqual(Timedelta('00:00:00'), timedelta(hours=0)) - self.assertEqual(Timedelta('-1:00:00'), -timedelta(hours=1)) - self.assertEqual(Timedelta('-01:00:00'), -timedelta(hours=1)) - - # more strings & abbrevs - # GH 8190 - self.assertEqual(Timedelta('1 h'), timedelta(hours=1)) - self.assertEqual(Timedelta('1 hour'), timedelta(hours=1)) - self.assertEqual(Timedelta('1 hr'), timedelta(hours=1)) - self.assertEqual(Timedelta('1 hours'), timedelta(hours=1)) - self.assertEqual(Timedelta('-1 hours'), -timedelta(hours=1)) - self.assertEqual(Timedelta('1 m'), timedelta(minutes=1)) - self.assertEqual(Timedelta('1.5 m'), timedelta(seconds=90)) - self.assertEqual(Timedelta('1 minute'), timedelta(minutes=1)) - self.assertEqual(Timedelta('1 minutes'), timedelta(minutes=1)) - self.assertEqual(Timedelta('1 s'), timedelta(seconds=1)) - self.assertEqual(Timedelta('1 second'), timedelta(seconds=1)) - self.assertEqual(Timedelta('1 seconds'), timedelta(seconds=1)) - self.assertEqual(Timedelta('1 ms'), timedelta(milliseconds=1)) - self.assertEqual(Timedelta('1 milli'), timedelta(milliseconds=1)) - self.assertEqual(Timedelta('1 millisecond'), timedelta(milliseconds=1)) - self.assertEqual(Timedelta('1 us'), timedelta(microseconds=1)) - self.assertEqual(Timedelta('1 micros'), timedelta(microseconds=1)) - self.assertEqual(Timedelta('1 microsecond'), timedelta(microseconds=1)) - self.assertEqual(Timedelta('1.5 microsecond'), - Timedelta('00:00:00.000001500')) - self.assertEqual(Timedelta('1 ns'), Timedelta('00:00:00.000000001')) - self.assertEqual(Timedelta('1 nano'), Timedelta('00:00:00.000000001')) - self.assertEqual(Timedelta('1 nanosecond'), - Timedelta('00:00:00.000000001')) - - # combos - self.assertEqual(Timedelta('10 days 1 hour'), - timedelta(days=10, hours=1)) - self.assertEqual(Timedelta('10 days 1 h'), timedelta(days=10, hours=1)) - self.assertEqual(Timedelta('10 days 1 h 1m 1s'), timedelta( - days=10, hours=1, minutes=1, seconds=1)) - self.assertEqual(Timedelta('-10 days 1 h 1m 1s'), - - timedelta(days=10, hours=1, minutes=1, seconds=1)) - self.assertEqual(Timedelta('-10 days 1 h 1m 1s'), - - timedelta(days=10, hours=1, minutes=1, seconds=1)) - self.assertEqual(Timedelta('-10 days 1 h 1m 1s 3us'), - - timedelta(days=10, hours=1, minutes=1, - seconds=1, microseconds=3)) - self.assertEqual(Timedelta('-10 days 1 h 1.5m 1s 3us'), - - timedelta(days=10, hours=1, minutes=1, - seconds=31, microseconds=3)) - - # currently invalid as it has a - on the hhmmdd part (only allowed on - # the days) - self.assertRaises(ValueError, - lambda: Timedelta('-10 days -1 h 1.5m 1s 3us')) - - # only leading neg signs are allowed - self.assertRaises(ValueError, - lambda: Timedelta('10 days -1 h 1.5m 1s 3us')) - - # no units specified - self.assertRaises(ValueError, lambda: Timedelta('3.1415')) - - # invalid construction - tm.assertRaisesRegexp(ValueError, "cannot construct a Timedelta", - lambda: Timedelta()) - tm.assertRaisesRegexp(ValueError, "unit abbreviation w/o a number", - lambda: Timedelta('foo')) - tm.assertRaisesRegexp(ValueError, - "cannot construct a Timedelta from the passed " - "arguments, allowed keywords are ", - lambda: Timedelta(day=10)) - - # roundtripping both for string and value - for v in ['1s', '-1s', '1us', '-1us', '1 day', '-1 day', - '-23:59:59.999999', '-1 days +23:59:59.999999', '-1ns', - '1ns', '-23:59:59.999999999']: - - td = Timedelta(v) - self.assertEqual(Timedelta(td.value), td) - - # str does not normally display nanos - if not td.nanoseconds: - self.assertEqual(Timedelta(str(td)), td) - self.assertEqual(Timedelta(td._repr_base(format='all')), td) - - # floats - expected = np.timedelta64( - 10, 's').astype('m8[ns]').view('i8') + np.timedelta64( - 500, 'ms').astype('m8[ns]').view('i8') - self.assertEqual(Timedelta(10.5, unit='s').value, expected) - - # nat - self.assertEqual(Timedelta('').value, iNaT) - self.assertEqual(Timedelta('nat').value, iNaT) - self.assertEqual(Timedelta('NAT').value, iNaT) - self.assertEqual(Timedelta(None).value, iNaT) - self.assertEqual(Timedelta(np.nan).value, iNaT) - self.assertTrue(isnull(Timedelta('nat'))) - - # offset - self.assertEqual(to_timedelta(pd.offsets.Hour(2)), - Timedelta('0 days, 02:00:00')) - self.assertEqual(Timedelta(pd.offsets.Hour(2)), - Timedelta('0 days, 02:00:00')) - self.assertEqual(Timedelta(pd.offsets.Second(2)), - Timedelta('0 days, 00:00:02')) - - # unicode - # GH 11995 - expected = Timedelta('1H') - result = pd.Timedelta(u'1H') - self.assertEqual(result, expected) - self.assertEqual(to_timedelta(pd.offsets.Hour(2)), - Timedelta(u'0 days, 02:00:00')) - - self.assertRaises(ValueError, lambda: Timedelta(u'foo bar')) - - def test_round(self): - - t1 = Timedelta('1 days 02:34:56.789123456') - t2 = Timedelta('-1 days 02:34:56.789123456') - - for (freq, s1, s2) in [('N', t1, t2), - ('U', Timedelta('1 days 02:34:56.789123000'), - Timedelta('-1 days 02:34:56.789123000')), - ('L', Timedelta('1 days 02:34:56.789000000'), - Timedelta('-1 days 02:34:56.789000000')), - ('S', Timedelta('1 days 02:34:57'), - Timedelta('-1 days 02:34:57')), - ('2S', Timedelta('1 days 02:34:56'), - Timedelta('-1 days 02:34:56')), - ('5S', Timedelta('1 days 02:34:55'), - Timedelta('-1 days 02:34:55')), - ('T', Timedelta('1 days 02:35:00'), - Timedelta('-1 days 02:35:00')), - ('12T', Timedelta('1 days 02:36:00'), - Timedelta('-1 days 02:36:00')), - ('H', Timedelta('1 days 03:00:00'), - Timedelta('-1 days 03:00:00')), - ('d', Timedelta('1 days'), - Timedelta('-1 days'))]: - r1 = t1.round(freq) - self.assertEqual(r1, s1) - r2 = t2.round(freq) - self.assertEqual(r2, s2) - - # invalid - for freq in ['Y', 'M', 'foobar']: - self.assertRaises(ValueError, lambda: t1.round(freq)) - - t1 = timedelta_range('1 days', periods=3, freq='1 min 2 s 3 us') - t2 = -1 * t1 - t1a = timedelta_range('1 days', periods=3, freq='1 min 2 s') - t1c = pd.TimedeltaIndex([1, 1, 1], unit='D') - - # note that negative times round DOWN! so don't give whole numbers - for (freq, s1, s2) in [('N', t1, t2), - ('U', t1, t2), - ('L', t1a, - TimedeltaIndex(['-1 days +00:00:00', - '-2 days +23:58:58', - '-2 days +23:57:56'], - dtype='timedelta64[ns]', - freq=None) - ), - ('S', t1a, - TimedeltaIndex(['-1 days +00:00:00', - '-2 days +23:58:58', - '-2 days +23:57:56'], - dtype='timedelta64[ns]', - freq=None) - ), - ('12T', t1c, - TimedeltaIndex(['-1 days', - '-1 days', - '-1 days'], - dtype='timedelta64[ns]', - freq=None) - ), - ('H', t1c, - TimedeltaIndex(['-1 days', - '-1 days', - '-1 days'], - dtype='timedelta64[ns]', - freq=None) - ), - ('d', t1c, - pd.TimedeltaIndex([-1, -1, -1], unit='D') - )]: - - r1 = t1.round(freq) - tm.assert_index_equal(r1, s1) - r2 = t2.round(freq) - tm.assert_index_equal(r2, s2) - - # invalid - for freq in ['Y', 'M', 'foobar']: - self.assertRaises(ValueError, lambda: t1.round(freq)) - - def test_repr(self): - - self.assertEqual(repr(Timedelta(10, unit='d')), - "Timedelta('10 days 00:00:00')") - self.assertEqual(repr(Timedelta(10, unit='s')), - "Timedelta('0 days 00:00:10')") - self.assertEqual(repr(Timedelta(10, unit='ms')), - "Timedelta('0 days 00:00:00.010000')") - self.assertEqual(repr(Timedelta(-10, unit='ms')), - "Timedelta('-1 days +23:59:59.990000')") - - def test_identity(self): - - td = Timedelta(10, unit='d') - self.assertTrue(isinstance(td, Timedelta)) - self.assertTrue(isinstance(td, timedelta)) - - def test_conversion(self): - - for td in [Timedelta(10, unit='d'), - Timedelta('1 days, 10:11:12.012345')]: - pydt = td.to_pytimedelta() - self.assertTrue(td == Timedelta(pydt)) - self.assertEqual(td, pydt) - self.assertTrue(isinstance(pydt, timedelta) and not isinstance( - pydt, Timedelta)) - - self.assertEqual(td, np.timedelta64(td.value, 'ns')) - td64 = td.to_timedelta64() - self.assertEqual(td64, np.timedelta64(td.value, 'ns')) - self.assertEqual(td, td64) - self.assertTrue(isinstance(td64, np.timedelta64)) - - # this is NOT equal and cannot be roundtriped (because of the nanos) - td = Timedelta('1 days, 10:11:12.012345678') - self.assertTrue(td != td.to_pytimedelta()) - - def test_ops(self): - - td = Timedelta(10, unit='d') - self.assertEqual(-td, Timedelta(-10, unit='d')) - self.assertEqual(+td, Timedelta(10, unit='d')) - self.assertEqual(td - td, Timedelta(0, unit='ns')) - self.assertTrue((td - pd.NaT) is pd.NaT) - self.assertEqual(td + td, Timedelta(20, unit='d')) - self.assertTrue((td + pd.NaT) is pd.NaT) - self.assertEqual(td * 2, Timedelta(20, unit='d')) - self.assertTrue((td * pd.NaT) is pd.NaT) - self.assertEqual(td / 2, Timedelta(5, unit='d')) - self.assertEqual(abs(td), td) - self.assertEqual(abs(-td), td) - self.assertEqual(td / td, 1) - self.assertTrue((td / pd.NaT) is np.nan) - - # invert - self.assertEqual(-td, Timedelta('-10d')) - self.assertEqual(td * -1, Timedelta('-10d')) - self.assertEqual(-1 * td, Timedelta('-10d')) - self.assertEqual(abs(-td), Timedelta('10d')) - - # invalid - self.assertRaises(TypeError, lambda: Timedelta(11, unit='d') // 2) - - # invalid multiply with another timedelta - self.assertRaises(TypeError, lambda: td * td) - - # can't operate with integers - self.assertRaises(TypeError, lambda: td + 2) - self.assertRaises(TypeError, lambda: td - 2) - - def test_ops_offsets(self): - td = Timedelta(10, unit='d') - self.assertEqual(Timedelta(241, unit='h'), td + pd.offsets.Hour(1)) - self.assertEqual(Timedelta(241, unit='h'), pd.offsets.Hour(1) + td) - self.assertEqual(240, td / pd.offsets.Hour(1)) - self.assertEqual(1 / 240.0, pd.offsets.Hour(1) / td) - self.assertEqual(Timedelta(239, unit='h'), td - pd.offsets.Hour(1)) - self.assertEqual(Timedelta(-239, unit='h'), pd.offsets.Hour(1) - td) - - def test_freq_conversion(self): - - td = Timedelta('1 days 2 hours 3 ns') - result = td / np.timedelta64(1, 'D') - self.assertEqual(result, td.value / float(86400 * 1e9)) - result = td / np.timedelta64(1, 's') - self.assertEqual(result, td.value / float(1e9)) - result = td / np.timedelta64(1, 'ns') - self.assertEqual(result, td.value) - - def test_ops_ndarray(self): - td = Timedelta('1 day') - - # timedelta, timedelta - other = pd.to_timedelta(['1 day']).values - expected = pd.to_timedelta(['2 days']).values - self.assert_numpy_array_equal(td + other, expected) - if LooseVersion(np.__version__) >= '1.8': - self.assert_numpy_array_equal(other + td, expected) - self.assertRaises(TypeError, lambda: td + np.array([1])) - self.assertRaises(TypeError, lambda: np.array([1]) + td) - - expected = pd.to_timedelta(['0 days']).values - self.assert_numpy_array_equal(td - other, expected) - if LooseVersion(np.__version__) >= '1.8': - self.assert_numpy_array_equal(-other + td, expected) - self.assertRaises(TypeError, lambda: td - np.array([1])) - self.assertRaises(TypeError, lambda: np.array([1]) - td) - - expected = pd.to_timedelta(['2 days']).values - self.assert_numpy_array_equal(td * np.array([2]), expected) - self.assert_numpy_array_equal(np.array([2]) * td, expected) - self.assertRaises(TypeError, lambda: td * other) - self.assertRaises(TypeError, lambda: other * td) - - self.assert_numpy_array_equal(td / other, - np.array([1], dtype=np.float64)) - if LooseVersion(np.__version__) >= '1.8': - self.assert_numpy_array_equal(other / td, - np.array([1], dtype=np.float64)) - - # timedelta, datetime - other = pd.to_datetime(['2000-01-01']).values - expected = pd.to_datetime(['2000-01-02']).values - self.assert_numpy_array_equal(td + other, expected) - if LooseVersion(np.__version__) >= '1.8': - self.assert_numpy_array_equal(other + td, expected) - - expected = pd.to_datetime(['1999-12-31']).values - self.assert_numpy_array_equal(-td + other, expected) - if LooseVersion(np.__version__) >= '1.8': - self.assert_numpy_array_equal(other - td, expected) - - def test_ops_series(self): - # regression test for GH8813 - td = Timedelta('1 day') - other = pd.Series([1, 2]) - expected = pd.Series(pd.to_timedelta(['1 day', '2 days'])) - tm.assert_series_equal(expected, td * other) - tm.assert_series_equal(expected, other * td) - - def test_ops_series_object(self): - # GH 13043 - s = pd.Series([pd.Timestamp('2015-01-01', tz='US/Eastern'), - pd.Timestamp('2015-01-01', tz='Asia/Tokyo')], - name='xxx') - self.assertEqual(s.dtype, object) - - exp = pd.Series([pd.Timestamp('2015-01-02', tz='US/Eastern'), - pd.Timestamp('2015-01-02', tz='Asia/Tokyo')], - name='xxx') - tm.assert_series_equal(s + pd.Timedelta('1 days'), exp) - tm.assert_series_equal(pd.Timedelta('1 days') + s, exp) - - # object series & object series - s2 = pd.Series([pd.Timestamp('2015-01-03', tz='US/Eastern'), - pd.Timestamp('2015-01-05', tz='Asia/Tokyo')], - name='xxx') - self.assertEqual(s2.dtype, object) - exp = pd.Series([pd.Timedelta('2 days'), pd.Timedelta('4 days')], - name='xxx') - tm.assert_series_equal(s2 - s, exp) - tm.assert_series_equal(s - s2, -exp) - - s = pd.Series([pd.Timedelta('01:00:00'), pd.Timedelta('02:00:00')], - name='xxx', dtype=object) - self.assertEqual(s.dtype, object) - - exp = pd.Series([pd.Timedelta('01:30:00'), pd.Timedelta('02:30:00')], - name='xxx') - tm.assert_series_equal(s + pd.Timedelta('00:30:00'), exp) - tm.assert_series_equal(pd.Timedelta('00:30:00') + s, exp) - - def test_compare_timedelta_series(self): - # regresssion test for GH5963 - s = pd.Series([timedelta(days=1), timedelta(days=2)]) - actual = s > timedelta(days=1) - expected = pd.Series([False, True]) - tm.assert_series_equal(actual, expected) - - def test_compare_timedelta_ndarray(self): - # GH11835 - periods = [Timedelta('0 days 01:00:00'), Timedelta('0 days 01:00:00')] - arr = np.array(periods) - result = arr[0] > arr - expected = np.array([False, False]) - self.assert_numpy_array_equal(result, expected) - - def test_ops_notimplemented(self): - class Other: - pass - - other = Other() - - td = Timedelta('1 day') - self.assertTrue(td.__add__(other) is NotImplemented) - self.assertTrue(td.__sub__(other) is NotImplemented) - self.assertTrue(td.__truediv__(other) is NotImplemented) - self.assertTrue(td.__mul__(other) is NotImplemented) - self.assertTrue(td.__floordiv__(td) is NotImplemented) - - def test_ops_error_str(self): - # GH 13624 - td = Timedelta('1 day') - - for l, r in [(td, 'a'), ('a', td)]: - - with tm.assertRaises(TypeError): - l + r - - with tm.assertRaises(TypeError): - l > r - - self.assertFalse(l == r) - self.assertTrue(l != r) - - def test_fields(self): - def check(value): - # that we are int/long like - self.assertTrue(isinstance(value, (int, compat.long))) - - # compat to datetime.timedelta - rng = to_timedelta('1 days, 10:11:12') - self.assertEqual(rng.days, 1) - self.assertEqual(rng.seconds, 10 * 3600 + 11 * 60 + 12) - self.assertEqual(rng.microseconds, 0) - self.assertEqual(rng.nanoseconds, 0) - - self.assertRaises(AttributeError, lambda: rng.hours) - self.assertRaises(AttributeError, lambda: rng.minutes) - self.assertRaises(AttributeError, lambda: rng.milliseconds) - - # GH 10050 - check(rng.days) - check(rng.seconds) - check(rng.microseconds) - check(rng.nanoseconds) - - td = Timedelta('-1 days, 10:11:12') - self.assertEqual(abs(td), Timedelta('13:48:48')) - self.assertTrue(str(td) == "-1 days +10:11:12") - self.assertEqual(-td, Timedelta('0 days 13:48:48')) - self.assertEqual(-Timedelta('-1 days, 10:11:12').value, 49728000000000) - self.assertEqual(Timedelta('-1 days, 10:11:12').value, -49728000000000) - - rng = to_timedelta('-1 days, 10:11:12.100123456') - self.assertEqual(rng.days, -1) - self.assertEqual(rng.seconds, 10 * 3600 + 11 * 60 + 12) - self.assertEqual(rng.microseconds, 100 * 1000 + 123) - self.assertEqual(rng.nanoseconds, 456) - self.assertRaises(AttributeError, lambda: rng.hours) - self.assertRaises(AttributeError, lambda: rng.minutes) - self.assertRaises(AttributeError, lambda: rng.milliseconds) - - # components - tup = pd.to_timedelta(-1, 'us').components - self.assertEqual(tup.days, -1) - self.assertEqual(tup.hours, 23) - self.assertEqual(tup.minutes, 59) - self.assertEqual(tup.seconds, 59) - self.assertEqual(tup.milliseconds, 999) - self.assertEqual(tup.microseconds, 999) - self.assertEqual(tup.nanoseconds, 0) - - # GH 10050 - check(tup.days) - check(tup.hours) - check(tup.minutes) - check(tup.seconds) - check(tup.milliseconds) - check(tup.microseconds) - check(tup.nanoseconds) - - tup = Timedelta('-1 days 1 us').components - self.assertEqual(tup.days, -2) - self.assertEqual(tup.hours, 23) - self.assertEqual(tup.minutes, 59) - self.assertEqual(tup.seconds, 59) - self.assertEqual(tup.milliseconds, 999) - self.assertEqual(tup.microseconds, 999) - self.assertEqual(tup.nanoseconds, 0) - - def test_timedelta_range(self): - - expected = to_timedelta(np.arange(5), unit='D') - result = timedelta_range('0 days', periods=5, freq='D') - tm.assert_index_equal(result, expected) - - expected = to_timedelta(np.arange(11), unit='D') - result = timedelta_range('0 days', '10 days', freq='D') - tm.assert_index_equal(result, expected) - - expected = to_timedelta(np.arange(5), unit='D') + Second(2) + Day() - result = timedelta_range('1 days, 00:00:02', '5 days, 00:00:02', - freq='D') - tm.assert_index_equal(result, expected) - - expected = to_timedelta([1, 3, 5, 7, 9], unit='D') + Second(2) - result = timedelta_range('1 days, 00:00:02', periods=5, freq='2D') - tm.assert_index_equal(result, expected) - - expected = to_timedelta(np.arange(50), unit='T') * 30 - result = timedelta_range('0 days', freq='30T', periods=50) - tm.assert_index_equal(result, expected) - - # GH 11776 - arr = np.arange(10).reshape(2, 5) - df = pd.DataFrame(np.arange(10).reshape(2, 5)) - for arg in (arr, df): - with tm.assertRaisesRegexp(TypeError, "1-d array"): - to_timedelta(arg) - for errors in ['ignore', 'raise', 'coerce']: - with tm.assertRaisesRegexp(TypeError, "1-d array"): - to_timedelta(arg, errors=errors) - - # issue10583 - df = pd.DataFrame(np.random.normal(size=(10, 4))) - df.index = pd.timedelta_range(start='0s', periods=10, freq='s') - expected = df.loc[pd.Timedelta('0s'):, :] - result = df.loc['0s':, :] - assert_frame_equal(expected, result) - - def test_numeric_conversions(self): - self.assertEqual(ct(0), np.timedelta64(0, 'ns')) - self.assertEqual(ct(10), np.timedelta64(10, 'ns')) - self.assertEqual(ct(10, unit='ns'), np.timedelta64( - 10, 'ns').astype('m8[ns]')) - - self.assertEqual(ct(10, unit='us'), np.timedelta64( - 10, 'us').astype('m8[ns]')) - self.assertEqual(ct(10, unit='ms'), np.timedelta64( - 10, 'ms').astype('m8[ns]')) - self.assertEqual(ct(10, unit='s'), np.timedelta64( - 10, 's').astype('m8[ns]')) - self.assertEqual(ct(10, unit='d'), np.timedelta64( - 10, 'D').astype('m8[ns]')) - - def test_timedelta_conversions(self): - self.assertEqual(ct(timedelta(seconds=1)), - np.timedelta64(1, 's').astype('m8[ns]')) - self.assertEqual(ct(timedelta(microseconds=1)), - np.timedelta64(1, 'us').astype('m8[ns]')) - self.assertEqual(ct(timedelta(days=1)), - np.timedelta64(1, 'D').astype('m8[ns]')) - - def test_short_format_converters(self): - def conv(v): - return v.astype('m8[ns]') - - self.assertEqual(ct('10'), np.timedelta64(10, 'ns')) - self.assertEqual(ct('10ns'), np.timedelta64(10, 'ns')) - self.assertEqual(ct('100'), np.timedelta64(100, 'ns')) - self.assertEqual(ct('100ns'), np.timedelta64(100, 'ns')) - - self.assertEqual(ct('1000'), np.timedelta64(1000, 'ns')) - self.assertEqual(ct('1000ns'), np.timedelta64(1000, 'ns')) - self.assertEqual(ct('1000NS'), np.timedelta64(1000, 'ns')) - - self.assertEqual(ct('10us'), np.timedelta64(10000, 'ns')) - self.assertEqual(ct('100us'), np.timedelta64(100000, 'ns')) - self.assertEqual(ct('1000us'), np.timedelta64(1000000, 'ns')) - self.assertEqual(ct('1000Us'), np.timedelta64(1000000, 'ns')) - self.assertEqual(ct('1000uS'), np.timedelta64(1000000, 'ns')) - - self.assertEqual(ct('1ms'), np.timedelta64(1000000, 'ns')) - self.assertEqual(ct('10ms'), np.timedelta64(10000000, 'ns')) - self.assertEqual(ct('100ms'), np.timedelta64(100000000, 'ns')) - self.assertEqual(ct('1000ms'), np.timedelta64(1000000000, 'ns')) - - self.assertEqual(ct('-1s'), -np.timedelta64(1000000000, 'ns')) - self.assertEqual(ct('1s'), np.timedelta64(1000000000, 'ns')) - self.assertEqual(ct('10s'), np.timedelta64(10000000000, 'ns')) - self.assertEqual(ct('100s'), np.timedelta64(100000000000, 'ns')) - self.assertEqual(ct('1000s'), np.timedelta64(1000000000000, 'ns')) - - self.assertEqual(ct('1d'), conv(np.timedelta64(1, 'D'))) - self.assertEqual(ct('-1d'), -conv(np.timedelta64(1, 'D'))) - self.assertEqual(ct('1D'), conv(np.timedelta64(1, 'D'))) - self.assertEqual(ct('10D'), conv(np.timedelta64(10, 'D'))) - self.assertEqual(ct('100D'), conv(np.timedelta64(100, 'D'))) - self.assertEqual(ct('1000D'), conv(np.timedelta64(1000, 'D'))) - self.assertEqual(ct('10000D'), conv(np.timedelta64(10000, 'D'))) - - # space - self.assertEqual(ct(' 10000D '), conv(np.timedelta64(10000, 'D'))) - self.assertEqual(ct(' - 10000D '), -conv(np.timedelta64(10000, 'D'))) - - # invalid - self.assertRaises(ValueError, ct, '1foo') - self.assertRaises(ValueError, ct, 'foo') - - def test_full_format_converters(self): - def conv(v): - return v.astype('m8[ns]') - - d1 = np.timedelta64(1, 'D') - - self.assertEqual(ct('1days'), conv(d1)) - self.assertEqual(ct('1days,'), conv(d1)) - self.assertEqual(ct('- 1days,'), -conv(d1)) - - self.assertEqual(ct('00:00:01'), conv(np.timedelta64(1, 's'))) - self.assertEqual(ct('06:00:01'), conv( - np.timedelta64(6 * 3600 + 1, 's'))) - self.assertEqual(ct('06:00:01.0'), conv( - np.timedelta64(6 * 3600 + 1, 's'))) - self.assertEqual(ct('06:00:01.01'), conv( - np.timedelta64(1000 * (6 * 3600 + 1) + 10, 'ms'))) - - self.assertEqual(ct('- 1days, 00:00:01'), - conv(-d1 + np.timedelta64(1, 's'))) - self.assertEqual(ct('1days, 06:00:01'), conv( - d1 + np.timedelta64(6 * 3600 + 1, 's'))) - self.assertEqual(ct('1days, 06:00:01.01'), conv( - d1 + np.timedelta64(1000 * (6 * 3600 + 1) + 10, 'ms'))) - - # invalid - self.assertRaises(ValueError, ct, '- 1days, 00') - - def test_nat_converters(self): - self.assertEqual(to_timedelta( - 'nat', box=False).astype('int64'), tslib.iNaT) - self.assertEqual(to_timedelta( - 'nan', box=False).astype('int64'), tslib.iNaT) - - def test_to_timedelta(self): - def conv(v): - return v.astype('m8[ns]') - - d1 = np.timedelta64(1, 'D') - - self.assertEqual(to_timedelta('1 days 06:05:01.00003', box=False), - conv(d1 + np.timedelta64(6 * 3600 + - 5 * 60 + 1, 's') + - np.timedelta64(30, 'us'))) - self.assertEqual(to_timedelta('15.5us', box=False), - conv(np.timedelta64(15500, 'ns'))) - - # empty string - result = to_timedelta('', box=False) - self.assertEqual(result.astype('int64'), tslib.iNaT) - - result = to_timedelta(['', '']) - self.assertTrue(isnull(result).all()) - - # pass thru - result = to_timedelta(np.array([np.timedelta64(1, 's')])) - expected = pd.Index(np.array([np.timedelta64(1, 's')])) - tm.assert_index_equal(result, expected) - - # ints - result = np.timedelta64(0, 'ns') - expected = to_timedelta(0, box=False) - self.assertEqual(result, expected) - - # Series - expected = Series([timedelta(days=1), timedelta(days=1, seconds=1)]) - result = to_timedelta(Series(['1d', '1days 00:00:01'])) - tm.assert_series_equal(result, expected) - - # with units - result = TimedeltaIndex([np.timedelta64(0, 'ns'), np.timedelta64( - 10, 's').astype('m8[ns]')]) - expected = to_timedelta([0, 10], unit='s') - tm.assert_index_equal(result, expected) - - # single element conversion - v = timedelta(seconds=1) - result = to_timedelta(v, box=False) - expected = np.timedelta64(timedelta(seconds=1)) - self.assertEqual(result, expected) - - v = np.timedelta64(timedelta(seconds=1)) - result = to_timedelta(v, box=False) - expected = np.timedelta64(timedelta(seconds=1)) - self.assertEqual(result, expected) - - # arrays of various dtypes - arr = np.array([1] * 5, dtype='int64') - result = to_timedelta(arr, unit='s') - expected = TimedeltaIndex([np.timedelta64(1, 's')] * 5) - tm.assert_index_equal(result, expected) - - arr = np.array([1] * 5, dtype='int64') - result = to_timedelta(arr, unit='m') - expected = TimedeltaIndex([np.timedelta64(1, 'm')] * 5) - tm.assert_index_equal(result, expected) - - arr = np.array([1] * 5, dtype='int64') - result = to_timedelta(arr, unit='h') - expected = TimedeltaIndex([np.timedelta64(1, 'h')] * 5) - tm.assert_index_equal(result, expected) - - arr = np.array([1] * 5, dtype='timedelta64[s]') - result = to_timedelta(arr) - expected = TimedeltaIndex([np.timedelta64(1, 's')] * 5) - tm.assert_index_equal(result, expected) - - arr = np.array([1] * 5, dtype='timedelta64[D]') - result = to_timedelta(arr) - expected = TimedeltaIndex([np.timedelta64(1, 'D')] * 5) - tm.assert_index_equal(result, expected) - - # Test with lists as input when box=false - expected = np.array(np.arange(3) * 1000000000, dtype='timedelta64[ns]') - result = to_timedelta(range(3), unit='s', box=False) - tm.assert_numpy_array_equal(expected, result) - - result = to_timedelta(np.arange(3), unit='s', box=False) - tm.assert_numpy_array_equal(expected, result) - - result = to_timedelta([0, 1, 2], unit='s', box=False) - tm.assert_numpy_array_equal(expected, result) - - # Tests with fractional seconds as input: - expected = np.array( - [0, 500000000, 800000000, 1200000000], dtype='timedelta64[ns]') - result = to_timedelta([0., 0.5, 0.8, 1.2], unit='s', box=False) - tm.assert_numpy_array_equal(expected, result) - - def testit(unit, transform): - - # array - result = to_timedelta(np.arange(5), unit=unit) - expected = TimedeltaIndex([np.timedelta64(i, transform(unit)) - for i in np.arange(5).tolist()]) - tm.assert_index_equal(result, expected) - - # scalar - result = to_timedelta(2, unit=unit) - expected = Timedelta(np.timedelta64(2, transform(unit)).astype( - 'timedelta64[ns]')) - self.assertEqual(result, expected) - - # validate all units - # GH 6855 - for unit in ['Y', 'M', 'W', 'D', 'y', 'w', 'd']: - testit(unit, lambda x: x.upper()) - for unit in ['days', 'day', 'Day', 'Days']: - testit(unit, lambda x: 'D') - for unit in ['h', 'm', 's', 'ms', 'us', 'ns', 'H', 'S', 'MS', 'US', - 'NS']: - testit(unit, lambda x: x.lower()) - - # offsets - - # m - testit('T', lambda x: 'm') - - # ms - testit('L', lambda x: 'ms') - - def test_to_timedelta_invalid(self): - - # bad value for errors parameter - msg = "errors must be one of" - tm.assertRaisesRegexp(ValueError, msg, to_timedelta, - ['foo'], errors='never') - - # these will error - self.assertRaises(ValueError, lambda: to_timedelta([1, 2], unit='foo')) - self.assertRaises(ValueError, lambda: to_timedelta(1, unit='foo')) - - # time not supported ATM - self.assertRaises(ValueError, lambda: to_timedelta(time(second=1))) - self.assertTrue(to_timedelta( - time(second=1), errors='coerce') is pd.NaT) - - self.assertRaises(ValueError, lambda: to_timedelta(['foo', 'bar'])) - tm.assert_index_equal(TimedeltaIndex([pd.NaT, pd.NaT]), - to_timedelta(['foo', 'bar'], errors='coerce')) - - tm.assert_index_equal(TimedeltaIndex(['1 day', pd.NaT, '1 min']), - to_timedelta(['1 day', 'bar', '1 min'], - errors='coerce')) - - # gh-13613: these should not error because errors='ignore' - invalid_data = 'apple' - self.assertEqual(invalid_data, to_timedelta( - invalid_data, errors='ignore')) - - invalid_data = ['apple', '1 days'] - tm.assert_numpy_array_equal( - np.array(invalid_data, dtype=object), - to_timedelta(invalid_data, errors='ignore')) - - invalid_data = pd.Index(['apple', '1 days']) - tm.assert_index_equal(invalid_data, to_timedelta( - invalid_data, errors='ignore')) - - invalid_data = Series(['apple', '1 days']) - tm.assert_series_equal(invalid_data, to_timedelta( - invalid_data, errors='ignore')) - - def test_to_timedelta_via_apply(self): - # GH 5458 - expected = Series([np.timedelta64(1, 's')]) - result = Series(['00:00:01']).apply(to_timedelta) - tm.assert_series_equal(result, expected) - - result = Series([to_timedelta('00:00:01')]) - tm.assert_series_equal(result, expected) - - def test_timedelta_ops(self): - # GH4984 - # make sure ops return Timedelta - s = Series([Timestamp('20130101') + timedelta(seconds=i * i) - for i in range(10)]) - td = s.diff() - - result = td.mean() - expected = to_timedelta(timedelta(seconds=9)) - self.assertEqual(result, expected) - - result = td.to_frame().mean() - self.assertEqual(result[0], expected) - - result = td.quantile(.1) - expected = Timedelta(np.timedelta64(2600, 'ms')) - self.assertEqual(result, expected) - - result = td.median() - expected = to_timedelta('00:00:09') - self.assertEqual(result, expected) - - result = td.to_frame().median() - self.assertEqual(result[0], expected) - - # GH 6462 - # consistency in returned values for sum - result = td.sum() - expected = to_timedelta('00:01:21') - self.assertEqual(result, expected) - - result = td.to_frame().sum() - self.assertEqual(result[0], expected) - - # std - result = td.std() - expected = to_timedelta(Series(td.dropna().values).std()) - self.assertEqual(result, expected) - - result = td.to_frame().std() - self.assertEqual(result[0], expected) - - # invalid ops - for op in ['skew', 'kurt', 'sem', 'prod']: - self.assertRaises(TypeError, getattr(td, op)) - - # GH 10040 - # make sure NaT is properly handled by median() - s = Series([Timestamp('2015-02-03'), Timestamp('2015-02-07')]) - self.assertEqual(s.diff().median(), timedelta(days=4)) - - s = Series([Timestamp('2015-02-03'), Timestamp('2015-02-07'), - Timestamp('2015-02-15')]) - self.assertEqual(s.diff().median(), timedelta(days=6)) - - def test_overflow(self): - # GH 9442 - s = Series(pd.date_range('20130101', periods=100000, freq='H')) - s[0] += pd.Timedelta('1s 1ms') - - # mean - result = (s - s.min()).mean() - expected = pd.Timedelta((pd.DatetimeIndex((s - s.min())).asi8 / len(s) - ).sum()) - - # the computation is converted to float so might be some loss of - # precision - self.assertTrue(np.allclose(result.value / 1000, expected.value / - 1000)) - - # sum - self.assertRaises(ValueError, lambda: (s - s.min()).sum()) - s1 = s[0:10000] - self.assertRaises(ValueError, lambda: (s1 - s1.min()).sum()) - s2 = s[0:1000] - result = (s2 - s2.min()).sum() - - def test_overflow_on_construction(self): - # xref https://github.com/statsmodels/statsmodels/issues/3374 - value = pd.Timedelta('1day').value * 20169940 - self.assertRaises(OverflowError, pd.Timedelta, value) - - def test_timedelta_ops_scalar(self): - # GH 6808 - base = pd.to_datetime('20130101 09:01:12.123456') - expected_add = pd.to_datetime('20130101 09:01:22.123456') - expected_sub = pd.to_datetime('20130101 09:01:02.123456') - - for offset in [pd.to_timedelta(10, unit='s'), timedelta(seconds=10), - np.timedelta64(10, 's'), - np.timedelta64(10000000000, 'ns'), - pd.offsets.Second(10)]: - result = base + offset - self.assertEqual(result, expected_add) - - result = base - offset - self.assertEqual(result, expected_sub) - - base = pd.to_datetime('20130102 09:01:12.123456') - expected_add = pd.to_datetime('20130103 09:01:22.123456') - expected_sub = pd.to_datetime('20130101 09:01:02.123456') - - for offset in [pd.to_timedelta('1 day, 00:00:10'), - pd.to_timedelta('1 days, 00:00:10'), - timedelta(days=1, seconds=10), - np.timedelta64(1, 'D') + np.timedelta64(10, 's'), - pd.offsets.Day() + pd.offsets.Second(10)]: - result = base + offset - self.assertEqual(result, expected_add) - - result = base - offset - self.assertEqual(result, expected_sub) - - def test_to_timedelta_on_missing_values(self): - # GH5438 - timedelta_NaT = np.timedelta64('NaT') - - actual = pd.to_timedelta(Series(['00:00:01', np.nan])) - expected = Series([np.timedelta64(1000000000, 'ns'), - timedelta_NaT], dtype=' idx1 - expected = np.array([True, False, False, False, True, False]) - self.assert_numpy_array_equal(result, expected) - - result = idx1 <= idx2 - expected = np.array([True, False, False, False, True, True]) - self.assert_numpy_array_equal(result, expected) - - result = idx2 >= idx1 - expected = np.array([True, False, False, False, True, True]) - self.assert_numpy_array_equal(result, expected) - - result = idx1 == idx2 - expected = np.array([False, False, False, False, False, True]) - self.assert_numpy_array_equal(result, expected) - - result = idx1 != idx2 - expected = np.array([True, True, True, True, True, False]) - self.assert_numpy_array_equal(result, expected) - - def test_ops_error_str(self): - # GH 13624 - tdi = TimedeltaIndex(['1 day', '2 days']) - - for l, r in [(tdi, 'a'), ('a', tdi)]: - with tm.assertRaises(TypeError): - l + r - - with tm.assertRaises(TypeError): - l > r - - with tm.assertRaises(TypeError): - l == r - - with tm.assertRaises(TypeError): - l != r - - def test_map(self): - - rng = timedelta_range('1 day', periods=10) - - f = lambda x: x.days - result = rng.map(f) - exp = Int64Index([f(x) for x in rng]) - tm.assert_index_equal(result, exp) - - def test_misc_coverage(self): - - rng = timedelta_range('1 day', periods=5) - result = rng.groupby(rng.days) - tm.assertIsInstance(list(result.values())[0][0], Timedelta) - - idx = TimedeltaIndex(['3d', '1d', '2d']) - self.assertFalse(idx.equals(list(idx))) - - non_td = Index(list('abc')) - self.assertFalse(idx.equals(list(non_td))) - - def test_union(self): - - i1 = timedelta_range('1day', periods=5) - i2 = timedelta_range('3day', periods=5) - result = i1.union(i2) - expected = timedelta_range('1day', periods=7) - self.assert_index_equal(result, expected) - - i1 = Int64Index(np.arange(0, 20, 2)) - i2 = TimedeltaIndex(start='1 day', periods=10, freq='D') - i1.union(i2) # Works - i2.union(i1) # Fails with "AttributeError: can't set attribute" - - def test_union_coverage(self): - - idx = TimedeltaIndex(['3d', '1d', '2d']) - ordered = TimedeltaIndex(idx.sort_values(), freq='infer') - result = ordered.union(idx) - self.assert_index_equal(result, ordered) - - result = ordered[:0].union(ordered) - self.assert_index_equal(result, ordered) - self.assertEqual(result.freq, ordered.freq) - - def test_union_bug_1730(self): - - rng_a = timedelta_range('1 day', periods=4, freq='3H') - rng_b = timedelta_range('1 day', periods=4, freq='4H') - - result = rng_a.union(rng_b) - exp = TimedeltaIndex(sorted(set(list(rng_a)) | set(list(rng_b)))) - self.assert_index_equal(result, exp) - - def test_union_bug_1745(self): - - left = TimedeltaIndex(['1 day 15:19:49.695000']) - right = TimedeltaIndex(['2 day 13:04:21.322000', - '1 day 15:27:24.873000', - '1 day 15:31:05.350000']) - - result = left.union(right) - exp = TimedeltaIndex(sorted(set(list(left)) | set(list(right)))) - self.assert_index_equal(result, exp) - - def test_union_bug_4564(self): - - left = timedelta_range("1 day", "30d") - right = left + pd.offsets.Minute(15) - - result = left.union(right) - exp = TimedeltaIndex(sorted(set(list(left)) | set(list(right)))) - self.assert_index_equal(result, exp) - - def test_intersection_bug_1708(self): - index_1 = timedelta_range('1 day', periods=4, freq='h') - index_2 = index_1 + pd.offsets.Hour(5) - - result = index_1 & index_2 - self.assertEqual(len(result), 0) - - index_1 = timedelta_range('1 day', periods=4, freq='h') - index_2 = index_1 + pd.offsets.Hour(1) - - result = index_1 & index_2 - expected = timedelta_range('1 day 01:00:00', periods=3, freq='h') - tm.assert_index_equal(result, expected) - - def test_get_duplicates(self): - idx = TimedeltaIndex(['1 day', '2 day', '2 day', '3 day', '3day', - '4day']) - - result = idx.get_duplicates() - ex = TimedeltaIndex(['2 day', '3day']) - self.assert_index_equal(result, ex) - - def test_argmin_argmax(self): - idx = TimedeltaIndex(['1 day 00:00:05', '1 day 00:00:01', - '1 day 00:00:02']) - self.assertEqual(idx.argmin(), 1) - self.assertEqual(idx.argmax(), 0) - - def test_sort_values(self): - - idx = TimedeltaIndex(['4d', '1d', '2d']) - - ordered = idx.sort_values() - self.assertTrue(ordered.is_monotonic) - - ordered = idx.sort_values(ascending=False) - self.assertTrue(ordered[::-1].is_monotonic) - - ordered, dexer = idx.sort_values(return_indexer=True) - self.assertTrue(ordered.is_monotonic) - self.assert_numpy_array_equal(dexer, - np.array([1, 2, 0]), - check_dtype=False) - - ordered, dexer = idx.sort_values(return_indexer=True, ascending=False) - self.assertTrue(ordered[::-1].is_monotonic) - self.assert_numpy_array_equal(dexer, - np.array([0, 2, 1]), - check_dtype=False) - - def test_insert(self): - - idx = TimedeltaIndex(['4day', '1day', '2day'], name='idx') - - result = idx.insert(2, timedelta(days=5)) - exp = TimedeltaIndex(['4day', '1day', '5day', '2day'], name='idx') - self.assert_index_equal(result, exp) - - # insertion of non-datetime should coerce to object index - result = idx.insert(1, 'inserted') - expected = Index([Timedelta('4day'), 'inserted', Timedelta('1day'), - Timedelta('2day')], name='idx') - self.assertNotIsInstance(result, TimedeltaIndex) - tm.assert_index_equal(result, expected) - self.assertEqual(result.name, expected.name) - - idx = timedelta_range('1day 00:00:01', periods=3, freq='s', name='idx') - - # preserve freq - expected_0 = TimedeltaIndex(['1day', '1day 00:00:01', '1day 00:00:02', - '1day 00:00:03'], - name='idx', freq='s') - expected_3 = TimedeltaIndex(['1day 00:00:01', '1day 00:00:02', - '1day 00:00:03', '1day 00:00:04'], - name='idx', freq='s') - - # reset freq to None - expected_1_nofreq = TimedeltaIndex(['1day 00:00:01', '1day 00:00:01', - '1day 00:00:02', '1day 00:00:03'], - name='idx', freq=None) - expected_3_nofreq = TimedeltaIndex(['1day 00:00:01', '1day 00:00:02', - '1day 00:00:03', '1day 00:00:05'], - name='idx', freq=None) - - cases = [(0, Timedelta('1day'), expected_0), - (-3, Timedelta('1day'), expected_0), - (3, Timedelta('1day 00:00:04'), expected_3), - (1, Timedelta('1day 00:00:01'), expected_1_nofreq), - (3, Timedelta('1day 00:00:05'), expected_3_nofreq)] - - for n, d, expected in cases: - result = idx.insert(n, d) - self.assert_index_equal(result, expected) - self.assertEqual(result.name, expected.name) - self.assertEqual(result.freq, expected.freq) - - def test_delete(self): - idx = timedelta_range(start='1 Days', periods=5, freq='D', name='idx') - - # prserve freq - expected_0 = timedelta_range(start='2 Days', periods=4, freq='D', - name='idx') - expected_4 = timedelta_range(start='1 Days', periods=4, freq='D', - name='idx') - - # reset freq to None - expected_1 = TimedeltaIndex( - ['1 day', '3 day', '4 day', '5 day'], freq=None, name='idx') - - cases = {0: expected_0, - -5: expected_0, - -1: expected_4, - 4: expected_4, - 1: expected_1} - for n, expected in compat.iteritems(cases): - result = idx.delete(n) - self.assert_index_equal(result, expected) - self.assertEqual(result.name, expected.name) - self.assertEqual(result.freq, expected.freq) - - with tm.assertRaises((IndexError, ValueError)): - # either depeidnig on numpy version - result = idx.delete(5) - - def test_delete_slice(self): - idx = timedelta_range(start='1 days', periods=10, freq='D', name='idx') - - # prserve freq - expected_0_2 = timedelta_range(start='4 days', periods=7, freq='D', - name='idx') - expected_7_9 = timedelta_range(start='1 days', periods=7, freq='D', - name='idx') - - # reset freq to None - expected_3_5 = TimedeltaIndex(['1 d', '2 d', '3 d', - '7 d', '8 d', '9 d', '10d'], - freq=None, name='idx') - - cases = {(0, 1, 2): expected_0_2, - (7, 8, 9): expected_7_9, - (3, 4, 5): expected_3_5} - for n, expected in compat.iteritems(cases): - result = idx.delete(n) - self.assert_index_equal(result, expected) - self.assertEqual(result.name, expected.name) - self.assertEqual(result.freq, expected.freq) - - result = idx.delete(slice(n[0], n[-1] + 1)) - self.assert_index_equal(result, expected) - self.assertEqual(result.name, expected.name) - self.assertEqual(result.freq, expected.freq) - - def test_take(self): - - tds = ['1day 02:00:00', '1 day 04:00:00', '1 day 10:00:00'] - idx = TimedeltaIndex(start='1d', end='2d', freq='H', name='idx') - expected = TimedeltaIndex(tds, freq=None, name='idx') - - taken1 = idx.take([2, 4, 10]) - taken2 = idx[[2, 4, 10]] - - for taken in [taken1, taken2]: - self.assert_index_equal(taken, expected) - tm.assertIsInstance(taken, TimedeltaIndex) - self.assertIsNone(taken.freq) - self.assertEqual(taken.name, expected.name) - - def test_take_fill_value(self): - # GH 12631 - idx = pd.TimedeltaIndex(['1 days', '2 days', '3 days'], - name='xxx') - result = idx.take(np.array([1, 0, -1])) - expected = pd.TimedeltaIndex(['2 days', '1 days', '3 days'], - name='xxx') - tm.assert_index_equal(result, expected) - - # fill_value - result = idx.take(np.array([1, 0, -1]), fill_value=True) - expected = pd.TimedeltaIndex(['2 days', '1 days', 'NaT'], - name='xxx') - tm.assert_index_equal(result, expected) - - # allow_fill=False - result = idx.take(np.array([1, 0, -1]), allow_fill=False, - fill_value=True) - expected = pd.TimedeltaIndex(['2 days', '1 days', '3 days'], - name='xxx') - tm.assert_index_equal(result, expected) - - msg = ('When allow_fill=True and fill_value is not None, ' - 'all indices must be >= -1') - with tm.assertRaisesRegexp(ValueError, msg): - idx.take(np.array([1, 0, -2]), fill_value=True) - with tm.assertRaisesRegexp(ValueError, msg): - idx.take(np.array([1, 0, -5]), fill_value=True) - - with tm.assertRaises(IndexError): - idx.take(np.array([1, -5])) - - def test_isin(self): - - index = tm.makeTimedeltaIndex(4) - result = index.isin(index) - self.assertTrue(result.all()) - - result = index.isin(list(index)) - self.assertTrue(result.all()) - - assert_almost_equal(index.isin([index[2], 5]), - np.array([False, False, True, False])) - - def test_does_not_convert_mixed_integer(self): - df = tm.makeCustomDataframe(10, 10, - data_gen_f=lambda *args, **kwargs: randn(), - r_idx_type='i', c_idx_type='td') - str(df) - - cols = df.columns.join(df.index, how='outer') - joined = cols.join(df.columns) - self.assertEqual(cols.dtype, np.dtype('O')) - self.assertEqual(cols.dtype, joined.dtype) - tm.assert_index_equal(cols, joined) - - def test_slice_keeps_name(self): - - # GH4226 - dr = pd.timedelta_range('1d', '5d', freq='H', name='timebucket') - self.assertEqual(dr[1:].name, dr.name) - - def test_join_self(self): - - index = timedelta_range('1 day', periods=10) - kinds = 'outer', 'inner', 'left', 'right' - for kind in kinds: - joined = index.join(index, how=kind) - tm.assert_index_equal(index, joined) - - def test_factorize(self): - idx1 = TimedeltaIndex(['1 day', '1 day', '2 day', '2 day', '3 day', - '3 day']) - - exp_arr = np.array([0, 0, 1, 1, 2, 2], dtype=np.intp) - exp_idx = TimedeltaIndex(['1 day', '2 day', '3 day']) - - arr, idx = idx1.factorize() - self.assert_numpy_array_equal(arr, exp_arr) - self.assert_index_equal(idx, exp_idx) - - arr, idx = idx1.factorize(sort=True) - self.assert_numpy_array_equal(arr, exp_arr) - self.assert_index_equal(idx, exp_idx) - - # freq must be preserved - idx3 = timedelta_range('1 day', periods=4, freq='s') - exp_arr = np.array([0, 1, 2, 3], dtype=np.intp) - arr, idx = idx3.factorize() - self.assert_numpy_array_equal(arr, exp_arr) - self.assert_index_equal(idx, idx3) - - -class TestSlicing(tm.TestCase): - - def test_partial_slice(self): - rng = timedelta_range('1 day 10:11:12', freq='h', periods=500) - s = Series(np.arange(len(rng)), index=rng) - - result = s['5 day':'6 day'] - expected = s.iloc[86:134] - assert_series_equal(result, expected) - - result = s['5 day':] - expected = s.iloc[86:] - assert_series_equal(result, expected) - - result = s[:'6 day'] - expected = s.iloc[:134] - assert_series_equal(result, expected) - - result = s['6 days, 23:11:12'] - self.assertEqual(result, s.iloc[133]) - - self.assertRaises(KeyError, s.__getitem__, '50 days') - - def test_partial_slice_high_reso(self): - - # higher reso - rng = timedelta_range('1 day 10:11:12', freq='us', periods=2000) - s = Series(np.arange(len(rng)), index=rng) - - result = s['1 day 10:11:12':] - expected = s.iloc[0:] - assert_series_equal(result, expected) - - result = s['1 day 10:11:12.001':] - expected = s.iloc[1000:] - assert_series_equal(result, expected) - - result = s['1 days, 10:11:12.001001'] - self.assertEqual(result, s.iloc[1001]) - - def test_slice_with_negative_step(self): - ts = Series(np.arange(20), timedelta_range('0', periods=20, freq='H')) - SLC = pd.IndexSlice - - def assert_slices_equivalent(l_slc, i_slc): - assert_series_equal(ts[l_slc], ts.iloc[i_slc]) - assert_series_equal(ts.loc[l_slc], ts.iloc[i_slc]) - assert_series_equal(ts.loc[l_slc], ts.iloc[i_slc]) - - assert_slices_equivalent(SLC[Timedelta(hours=7)::-1], SLC[7::-1]) - assert_slices_equivalent(SLC['7 hours'::-1], SLC[7::-1]) - - assert_slices_equivalent(SLC[:Timedelta(hours=7):-1], SLC[:6:-1]) - assert_slices_equivalent(SLC[:'7 hours':-1], SLC[:6:-1]) - - assert_slices_equivalent(SLC['15 hours':'7 hours':-1], SLC[15:6:-1]) - assert_slices_equivalent(SLC[Timedelta(hours=15):Timedelta(hours=7):- - 1], SLC[15:6:-1]) - assert_slices_equivalent(SLC['15 hours':Timedelta(hours=7):-1], - SLC[15:6:-1]) - assert_slices_equivalent(SLC[Timedelta(hours=15):'7 hours':-1], - SLC[15:6:-1]) - - assert_slices_equivalent(SLC['7 hours':'15 hours':-1], SLC[:0]) - - def test_slice_with_zero_step_raises(self): - ts = Series(np.arange(20), timedelta_range('0', periods=20, freq='H')) - self.assertRaisesRegexp(ValueError, 'slice step cannot be zero', - lambda: ts[::0]) - self.assertRaisesRegexp(ValueError, 'slice step cannot be zero', - lambda: ts.loc[::0]) - self.assertRaisesRegexp(ValueError, 'slice step cannot be zero', - lambda: ts.loc[::0]) - - def test_tdi_ops_attributes(self): - rng = timedelta_range('2 days', periods=5, freq='2D', name='x') - - result = rng + 1 - exp = timedelta_range('4 days', periods=5, freq='2D', name='x') - tm.assert_index_equal(result, exp) - self.assertEqual(result.freq, '2D') - - result = rng - 2 - exp = timedelta_range('-2 days', periods=5, freq='2D', name='x') - tm.assert_index_equal(result, exp) - self.assertEqual(result.freq, '2D') - - result = rng * 2 - exp = timedelta_range('4 days', periods=5, freq='4D', name='x') - tm.assert_index_equal(result, exp) - self.assertEqual(result.freq, '4D') - - result = rng / 2 - exp = timedelta_range('1 days', periods=5, freq='D', name='x') - tm.assert_index_equal(result, exp) - self.assertEqual(result.freq, 'D') - - result = -rng - exp = timedelta_range('-2 days', periods=5, freq='-2D', name='x') - tm.assert_index_equal(result, exp) - self.assertEqual(result.freq, '-2D') - - rng = pd.timedelta_range('-2 days', periods=5, freq='D', name='x') - - result = abs(rng) - exp = TimedeltaIndex(['2 days', '1 days', '0 days', '1 days', - '2 days'], name='x') - tm.assert_index_equal(result, exp) - self.assertEqual(result.freq, None) - - def test_add_overflow(self): - # see gh-14068 - msg = "too (big|large) to convert" - with tm.assertRaisesRegexp(OverflowError, msg): - to_timedelta(106580, 'D') + Timestamp('2000') - with tm.assertRaisesRegexp(OverflowError, msg): - Timestamp('2000') + to_timedelta(106580, 'D') - - _NaT = int(pd.NaT) + 1 - msg = "Overflow in int64 addition" - with tm.assertRaisesRegexp(OverflowError, msg): - to_timedelta([106580], 'D') + Timestamp('2000') - with tm.assertRaisesRegexp(OverflowError, msg): - Timestamp('2000') + to_timedelta([106580], 'D') - with tm.assertRaisesRegexp(OverflowError, msg): - to_timedelta([_NaT]) - Timedelta('1 days') - with tm.assertRaisesRegexp(OverflowError, msg): - to_timedelta(['5 days', _NaT]) - Timedelta('1 days') - with tm.assertRaisesRegexp(OverflowError, msg): - (to_timedelta([_NaT, '5 days', '1 hours']) - - to_timedelta(['7 seconds', _NaT, '4 hours'])) - - # These should not overflow! - exp = TimedeltaIndex([pd.NaT]) - result = to_timedelta([pd.NaT]) - Timedelta('1 days') - tm.assert_index_equal(result, exp) - - exp = TimedeltaIndex(['4 days', pd.NaT]) - result = to_timedelta(['5 days', pd.NaT]) - Timedelta('1 days') - tm.assert_index_equal(result, exp) - - exp = TimedeltaIndex([pd.NaT, pd.NaT, '5 hours']) - result = (to_timedelta([pd.NaT, '5 days', '1 hours']) + - to_timedelta(['7 seconds', pd.NaT, '4 hours'])) - tm.assert_index_equal(result, exp) diff --git a/pandas/tseries/tests/test_timezones.py b/pandas/tseries/tests/test_timezones.py index 38cd8079faf93..771fb2f50c410 100644 --- a/pandas/tseries/tests/test_timezones.py +++ b/pandas/tseries/tests/test_timezones.py @@ -1,23 +1,20 @@ # pylint: disable-msg=E1101,W0612 -from datetime import datetime, timedelta, tzinfo, date -import numpy as np import pytz +import numpy as np from distutils.version import LooseVersion -from pandas.types.dtypes import DatetimeTZDtype -from pandas import (Index, Series, DataFrame, isnull, Timestamp) - -from pandas import DatetimeIndex, to_datetime, NaT -from pandas import tslib - -import pandas.tseries.offsets as offsets -from pandas.tseries.index import bdate_range, date_range -import pandas.tseries.tools as tools +from datetime import datetime, timedelta, tzinfo, date from pytz import NonExistentTimeError import pandas.util.testing as tm +import pandas.tseries.tools as tools +import pandas.tseries.offsets as offsets +from pandas.compat import lrange, zip +from pandas.tseries.index import bdate_range, date_range +from pandas.types.dtypes import DatetimeTZDtype +from pandas import (Index, Series, DataFrame, isnull, Timestamp, tslib, NaT, + DatetimeIndex, to_datetime) from pandas.util.testing import (assert_frame_equal, assert_series_equal, set_timezone) -from pandas.compat import lrange, zip try: import pytz # noqa @@ -1679,3 +1676,52 @@ def test_nat(self): idx = idx.tz_convert('US/Eastern') expected = ['2010-12-01 11:00', '2010-12-02 11:00', NaT] self.assert_index_equal(idx, DatetimeIndex(expected, tz='US/Eastern')) + + +class TestTslib(tm.TestCase): + + def test_tslib_tz_convert(self): + def compare_utc_to_local(tz_didx, utc_didx): + f = lambda x: tslib.tz_convert_single(x, 'UTC', tz_didx.tz) + result = tslib.tz_convert(tz_didx.asi8, 'UTC', tz_didx.tz) + result_single = np.vectorize(f)(tz_didx.asi8) + self.assert_numpy_array_equal(result, result_single) + + def compare_local_to_utc(tz_didx, utc_didx): + f = lambda x: tslib.tz_convert_single(x, tz_didx.tz, 'UTC') + result = tslib.tz_convert(utc_didx.asi8, tz_didx.tz, 'UTC') + result_single = np.vectorize(f)(utc_didx.asi8) + self.assert_numpy_array_equal(result, result_single) + + for tz in ['UTC', 'Asia/Tokyo', 'US/Eastern', 'Europe/Moscow']: + # US: 2014-03-09 - 2014-11-11 + # MOSCOW: 2014-10-26 / 2014-12-31 + tz_didx = date_range('2014-03-01', '2015-01-10', freq='H', tz=tz) + utc_didx = date_range('2014-03-01', '2015-01-10', freq='H') + compare_utc_to_local(tz_didx, utc_didx) + # local tz to UTC can be differ in hourly (or higher) freqs because + # of DST + compare_local_to_utc(tz_didx, utc_didx) + + tz_didx = date_range('2000-01-01', '2020-01-01', freq='D', tz=tz) + utc_didx = date_range('2000-01-01', '2020-01-01', freq='D') + compare_utc_to_local(tz_didx, utc_didx) + compare_local_to_utc(tz_didx, utc_didx) + + tz_didx = date_range('2000-01-01', '2100-01-01', freq='A', tz=tz) + utc_didx = date_range('2000-01-01', '2100-01-01', freq='A') + compare_utc_to_local(tz_didx, utc_didx) + compare_local_to_utc(tz_didx, utc_didx) + + # Check empty array + result = tslib.tz_convert(np.array([], dtype=np.int64), + tslib.maybe_get_tz('US/Eastern'), + tslib.maybe_get_tz('Asia/Tokyo')) + self.assert_numpy_array_equal(result, np.array([], dtype=np.int64)) + + # Check all-NaT array + result = tslib.tz_convert(np.array([tslib.iNaT], dtype=np.int64), + tslib.maybe_get_tz('US/Eastern'), + tslib.maybe_get_tz('Asia/Tokyo')) + self.assert_numpy_array_equal(result, np.array( + [tslib.iNaT], dtype=np.int64)) diff --git a/pandas/tseries/tests/test_tslib.py b/pandas/tseries/tests/test_tslib.py deleted file mode 100644 index a141d445e6035..0000000000000 --- a/pandas/tseries/tests/test_tslib.py +++ /dev/null @@ -1,694 +0,0 @@ -import datetime -import numpy as np -from distutils.version import LooseVersion - -import pandas as pd -import pandas.util.testing as tm -from pandas import tslib, lib, compat -from pandas.tseries import offsets, tools -from pandas.tseries.frequencies import get_freq -from pandas.tseries.index import date_range, DatetimeIndex -from pandas.util.testing import _skip_if_has_locale -from pandas._period import period_ordinal, period_asfreq -from pandas.compat.numpy import np_array_datetime64_compat -from pandas.core.api import Timestamp, to_datetime, Index, Series - - -class TestTsUtil(tm.TestCase): - - def test_try_parse_dates(self): - from dateutil.parser import parse - arr = np.array(['5/1/2000', '6/1/2000', '7/1/2000'], dtype=object) - - result = lib.try_parse_dates(arr, dayfirst=True) - expected = [parse(d, dayfirst=True) for d in arr] - self.assertTrue(np.array_equal(result, expected)) - - def test_min_valid(self): - # Ensure that Timestamp.min is a valid Timestamp - Timestamp(Timestamp.min) - - def test_max_valid(self): - # Ensure that Timestamp.max is a valid Timestamp - Timestamp(Timestamp.max) - - def test_to_datetime_bijective(self): - # Ensure that converting to datetime and back only loses precision - # by going from nanoseconds to microseconds. - exp_warning = None if Timestamp.max.nanosecond == 0 else UserWarning - with tm.assert_produces_warning(exp_warning, check_stacklevel=False): - self.assertEqual( - Timestamp(Timestamp.max.to_pydatetime()).value / 1000, - Timestamp.max.value / 1000) - - exp_warning = None if Timestamp.min.nanosecond == 0 else UserWarning - with tm.assert_produces_warning(exp_warning, check_stacklevel=False): - self.assertEqual( - Timestamp(Timestamp.min.to_pydatetime()).value / 1000, - Timestamp.min.value / 1000) - - -class TestDatetimeParsingWrappers(tm.TestCase): - - def test_does_not_convert_mixed_integer(self): - bad_date_strings = ('-50000', '999', '123.1234', 'm', 'T') - - for bad_date_string in bad_date_strings: - self.assertFalse(tslib._does_string_look_like_datetime( - bad_date_string)) - - good_date_strings = ('2012-01-01', - '01/01/2012', - 'Mon Sep 16, 2013', - '01012012', - '0101', - '1-1', ) - - for good_date_string in good_date_strings: - self.assertTrue(tslib._does_string_look_like_datetime( - good_date_string)) - - def test_parsers(self): - - # https://github.com/dateutil/dateutil/issues/217 - import dateutil - yearfirst = dateutil.__version__ >= LooseVersion('2.5.0') - - cases = {'2011-01-01': datetime.datetime(2011, 1, 1), - '2Q2005': datetime.datetime(2005, 4, 1), - '2Q05': datetime.datetime(2005, 4, 1), - '2005Q1': datetime.datetime(2005, 1, 1), - '05Q1': datetime.datetime(2005, 1, 1), - '2011Q3': datetime.datetime(2011, 7, 1), - '11Q3': datetime.datetime(2011, 7, 1), - '3Q2011': datetime.datetime(2011, 7, 1), - '3Q11': datetime.datetime(2011, 7, 1), - - # quarterly without space - '2000Q4': datetime.datetime(2000, 10, 1), - '00Q4': datetime.datetime(2000, 10, 1), - '4Q2000': datetime.datetime(2000, 10, 1), - '4Q00': datetime.datetime(2000, 10, 1), - '2000q4': datetime.datetime(2000, 10, 1), - '2000-Q4': datetime.datetime(2000, 10, 1), - '00-Q4': datetime.datetime(2000, 10, 1), - '4Q-2000': datetime.datetime(2000, 10, 1), - '4Q-00': datetime.datetime(2000, 10, 1), - '00q4': datetime.datetime(2000, 10, 1), - '2005': datetime.datetime(2005, 1, 1), - '2005-11': datetime.datetime(2005, 11, 1), - '2005 11': datetime.datetime(2005, 11, 1), - '11-2005': datetime.datetime(2005, 11, 1), - '11 2005': datetime.datetime(2005, 11, 1), - '200511': datetime.datetime(2020, 5, 11), - '20051109': datetime.datetime(2005, 11, 9), - '20051109 10:15': datetime.datetime(2005, 11, 9, 10, 15), - '20051109 08H': datetime.datetime(2005, 11, 9, 8, 0), - '2005-11-09 10:15': datetime.datetime(2005, 11, 9, 10, 15), - '2005-11-09 08H': datetime.datetime(2005, 11, 9, 8, 0), - '2005/11/09 10:15': datetime.datetime(2005, 11, 9, 10, 15), - '2005/11/09 08H': datetime.datetime(2005, 11, 9, 8, 0), - "Thu Sep 25 10:36:28 2003": datetime.datetime(2003, 9, 25, 10, - 36, 28), - "Thu Sep 25 2003": datetime.datetime(2003, 9, 25), - "Sep 25 2003": datetime.datetime(2003, 9, 25), - "January 1 2014": datetime.datetime(2014, 1, 1), - - # GH 10537 - '2014-06': datetime.datetime(2014, 6, 1), - '06-2014': datetime.datetime(2014, 6, 1), - '2014-6': datetime.datetime(2014, 6, 1), - '6-2014': datetime.datetime(2014, 6, 1), - - '20010101 12': datetime.datetime(2001, 1, 1, 12), - '20010101 1234': datetime.datetime(2001, 1, 1, 12, 34), - '20010101 123456': datetime.datetime(2001, 1, 1, 12, 34, 56), - } - - for date_str, expected in compat.iteritems(cases): - result1, _, _ = tools.parse_time_string(date_str, - yearfirst=yearfirst) - result2 = to_datetime(date_str, yearfirst=yearfirst) - result3 = to_datetime([date_str], yearfirst=yearfirst) - # result5 is used below - result4 = to_datetime(np.array([date_str], dtype=object), - yearfirst=yearfirst) - result6 = DatetimeIndex([date_str], yearfirst=yearfirst) - # result7 is used below - result8 = DatetimeIndex(Index([date_str]), yearfirst=yearfirst) - result9 = DatetimeIndex(Series([date_str]), yearfirst=yearfirst) - - for res in [result1, result2]: - self.assertEqual(res, expected) - for res in [result3, result4, result6, result8, result9]: - exp = DatetimeIndex([pd.Timestamp(expected)]) - tm.assert_index_equal(res, exp) - - # these really need to have yearfist, but we don't support - if not yearfirst: - result5 = Timestamp(date_str) - self.assertEqual(result5, expected) - result7 = date_range(date_str, freq='S', periods=1, - yearfirst=yearfirst) - self.assertEqual(result7, expected) - - # NaT - result1, _, _ = tools.parse_time_string('NaT') - result2 = to_datetime('NaT') - result3 = Timestamp('NaT') - result4 = DatetimeIndex(['NaT'])[0] - self.assertTrue(result1 is tslib.NaT) - self.assertTrue(result1 is tslib.NaT) - self.assertTrue(result1 is tslib.NaT) - self.assertTrue(result1 is tslib.NaT) - - def test_parsers_quarter_invalid(self): - - cases = ['2Q 2005', '2Q-200A', '2Q-200', '22Q2005', '6Q-20', '2Q200.'] - for case in cases: - self.assertRaises(ValueError, tools.parse_time_string, case) - - def test_parsers_dayfirst_yearfirst(self): - tm._skip_if_no_dateutil() - - # OK - # 2.5.1 10-11-12 [dayfirst=0, yearfirst=0] -> 2012-10-11 00:00:00 - # 2.5.2 10-11-12 [dayfirst=0, yearfirst=1] -> 2012-10-11 00:00:00 - # 2.5.3 10-11-12 [dayfirst=0, yearfirst=0] -> 2012-10-11 00:00:00 - - # OK - # 2.5.1 10-11-12 [dayfirst=0, yearfirst=1] -> 2010-11-12 00:00:00 - # 2.5.2 10-11-12 [dayfirst=0, yearfirst=1] -> 2010-11-12 00:00:00 - # 2.5.3 10-11-12 [dayfirst=0, yearfirst=1] -> 2010-11-12 00:00:00 - - # bug fix in 2.5.2 - # 2.5.1 10-11-12 [dayfirst=1, yearfirst=1] -> 2010-11-12 00:00:00 - # 2.5.2 10-11-12 [dayfirst=1, yearfirst=1] -> 2010-12-11 00:00:00 - # 2.5.3 10-11-12 [dayfirst=1, yearfirst=1] -> 2010-12-11 00:00:00 - - # OK - # 2.5.1 10-11-12 [dayfirst=1, yearfirst=0] -> 2012-11-10 00:00:00 - # 2.5.2 10-11-12 [dayfirst=1, yearfirst=0] -> 2012-11-10 00:00:00 - # 2.5.3 10-11-12 [dayfirst=1, yearfirst=0] -> 2012-11-10 00:00:00 - - # OK - # 2.5.1 20/12/21 [dayfirst=0, yearfirst=0] -> 2021-12-20 00:00:00 - # 2.5.2 20/12/21 [dayfirst=0, yearfirst=0] -> 2021-12-20 00:00:00 - # 2.5.3 20/12/21 [dayfirst=0, yearfirst=0] -> 2021-12-20 00:00:00 - - # OK - # 2.5.1 20/12/21 [dayfirst=0, yearfirst=1] -> 2020-12-21 00:00:00 - # 2.5.2 20/12/21 [dayfirst=0, yearfirst=1] -> 2020-12-21 00:00:00 - # 2.5.3 20/12/21 [dayfirst=0, yearfirst=1] -> 2020-12-21 00:00:00 - - # revert of bug in 2.5.2 - # 2.5.1 20/12/21 [dayfirst=1, yearfirst=1] -> 2020-12-21 00:00:00 - # 2.5.2 20/12/21 [dayfirst=1, yearfirst=1] -> month must be in 1..12 - # 2.5.3 20/12/21 [dayfirst=1, yearfirst=1] -> 2020-12-21 00:00:00 - - # OK - # 2.5.1 20/12/21 [dayfirst=1, yearfirst=0] -> 2021-12-20 00:00:00 - # 2.5.2 20/12/21 [dayfirst=1, yearfirst=0] -> 2021-12-20 00:00:00 - # 2.5.3 20/12/21 [dayfirst=1, yearfirst=0] -> 2021-12-20 00:00:00 - - import dateutil - is_lt_253 = dateutil.__version__ < LooseVersion('2.5.3') - - # str : dayfirst, yearfirst, expected - cases = {'10-11-12': [(False, False, - datetime.datetime(2012, 10, 11)), - (True, False, - datetime.datetime(2012, 11, 10)), - (False, True, - datetime.datetime(2010, 11, 12)), - (True, True, - datetime.datetime(2010, 12, 11))], - '20/12/21': [(False, False, - datetime.datetime(2021, 12, 20)), - (True, False, - datetime.datetime(2021, 12, 20)), - (False, True, - datetime.datetime(2020, 12, 21)), - (True, True, - datetime.datetime(2020, 12, 21))]} - - from dateutil.parser import parse - for date_str, values in compat.iteritems(cases): - for dayfirst, yearfirst, expected in values: - - # odd comparisons across version - # let's just skip - if dayfirst and yearfirst and is_lt_253: - continue - - # compare with dateutil result - dateutil_result = parse(date_str, dayfirst=dayfirst, - yearfirst=yearfirst) - self.assertEqual(dateutil_result, expected) - - result1, _, _ = tools.parse_time_string(date_str, - dayfirst=dayfirst, - yearfirst=yearfirst) - - # we don't support dayfirst/yearfirst here: - if not dayfirst and not yearfirst: - result2 = Timestamp(date_str) - self.assertEqual(result2, expected) - - result3 = to_datetime(date_str, dayfirst=dayfirst, - yearfirst=yearfirst) - - result4 = DatetimeIndex([date_str], dayfirst=dayfirst, - yearfirst=yearfirst)[0] - - self.assertEqual(result1, expected) - self.assertEqual(result3, expected) - self.assertEqual(result4, expected) - - def test_parsers_timestring(self): - tm._skip_if_no_dateutil() - from dateutil.parser import parse - - # must be the same as dateutil result - cases = {'10:15': (parse('10:15'), datetime.datetime(1, 1, 1, 10, 15)), - '9:05': (parse('9:05'), datetime.datetime(1, 1, 1, 9, 5))} - - for date_str, (exp_now, exp_def) in compat.iteritems(cases): - result1, _, _ = tools.parse_time_string(date_str) - result2 = to_datetime(date_str) - result3 = to_datetime([date_str]) - result4 = Timestamp(date_str) - result5 = DatetimeIndex([date_str])[0] - # parse time string return time string based on default date - # others are not, and can't be changed because it is used in - # time series plot - self.assertEqual(result1, exp_def) - self.assertEqual(result2, exp_now) - self.assertEqual(result3, exp_now) - self.assertEqual(result4, exp_now) - self.assertEqual(result5, exp_now) - - def test_parsers_time(self): - # GH11818 - _skip_if_has_locale() - strings = ["14:15", "1415", "2:15pm", "0215pm", "14:15:00", "141500", - "2:15:00pm", "021500pm", datetime.time(14, 15)] - expected = datetime.time(14, 15) - - for time_string in strings: - self.assertEqual(tools.to_time(time_string), expected) - - new_string = "14.15" - self.assertRaises(ValueError, tools.to_time, new_string) - self.assertEqual(tools.to_time(new_string, format="%H.%M"), expected) - - arg = ["14:15", "20:20"] - expected_arr = [datetime.time(14, 15), datetime.time(20, 20)] - self.assertEqual(tools.to_time(arg), expected_arr) - self.assertEqual(tools.to_time(arg, format="%H:%M"), expected_arr) - self.assertEqual(tools.to_time(arg, infer_time_format=True), - expected_arr) - self.assertEqual(tools.to_time(arg, format="%I:%M%p", errors="coerce"), - [None, None]) - - res = tools.to_time(arg, format="%I:%M%p", errors="ignore") - self.assert_numpy_array_equal(res, np.array(arg, dtype=np.object_)) - - with tm.assertRaises(ValueError): - tools.to_time(arg, format="%I:%M%p", errors="raise") - - self.assert_series_equal(tools.to_time(Series(arg, name="test")), - Series(expected_arr, name="test")) - - res = tools.to_time(np.array(arg)) - self.assertIsInstance(res, list) - self.assert_equal(res, expected_arr) - - def test_parsers_monthfreq(self): - cases = {'201101': datetime.datetime(2011, 1, 1, 0, 0), - '200005': datetime.datetime(2000, 5, 1, 0, 0)} - - for date_str, expected in compat.iteritems(cases): - result1, _, _ = tools.parse_time_string(date_str, freq='M') - self.assertEqual(result1, expected) - - def test_parsers_quarterly_with_freq(self): - msg = ('Incorrect quarterly string is given, quarter ' - 'must be between 1 and 4: 2013Q5') - with tm.assertRaisesRegexp(tslib.DateParseError, msg): - tools.parse_time_string('2013Q5') - - # GH 5418 - msg = ('Unable to retrieve month information from given freq: ' - 'INVLD-L-DEC-SAT') - with tm.assertRaisesRegexp(tslib.DateParseError, msg): - tools.parse_time_string('2013Q1', freq='INVLD-L-DEC-SAT') - - cases = {('2013Q2', None): datetime.datetime(2013, 4, 1), - ('2013Q2', 'A-APR'): datetime.datetime(2012, 8, 1), - ('2013-Q2', 'A-DEC'): datetime.datetime(2013, 4, 1)} - - for (date_str, freq), exp in compat.iteritems(cases): - result, _, _ = tools.parse_time_string(date_str, freq=freq) - self.assertEqual(result, exp) - - def test_parsers_timezone_minute_offsets_roundtrip(self): - # GH11708 - base = to_datetime("2013-01-01 00:00:00") - dt_strings = [ - ('2013-01-01 05:45+0545', - "Asia/Katmandu", - "Timestamp('2013-01-01 05:45:00+0545', tz='Asia/Katmandu')"), - ('2013-01-01 05:30+0530', - "Asia/Kolkata", - "Timestamp('2013-01-01 05:30:00+0530', tz='Asia/Kolkata')") - ] - - for dt_string, tz, dt_string_repr in dt_strings: - dt_time = to_datetime(dt_string) - self.assertEqual(base, dt_time) - converted_time = dt_time.tz_localize('UTC').tz_convert(tz) - self.assertEqual(dt_string_repr, repr(converted_time)) - - def test_parsers_iso8601(self): - # GH 12060 - # test only the iso parser - flexibility to different - # separators and leadings 0s - # Timestamp construction falls back to dateutil - cases = {'2011-01-02': datetime.datetime(2011, 1, 2), - '2011-1-2': datetime.datetime(2011, 1, 2), - '2011-01': datetime.datetime(2011, 1, 1), - '2011-1': datetime.datetime(2011, 1, 1), - '2011 01 02': datetime.datetime(2011, 1, 2), - '2011.01.02': datetime.datetime(2011, 1, 2), - '2011/01/02': datetime.datetime(2011, 1, 2), - '2011\\01\\02': datetime.datetime(2011, 1, 2), - '2013-01-01 05:30:00': datetime.datetime(2013, 1, 1, 5, 30), - '2013-1-1 5:30:00': datetime.datetime(2013, 1, 1, 5, 30)} - for date_str, exp in compat.iteritems(cases): - actual = tslib._test_parse_iso8601(date_str) - self.assertEqual(actual, exp) - - # seperators must all match - YYYYMM not valid - invalid_cases = ['2011-01/02', '2011^11^11', - '201401', '201111', '200101', - # mixed separated and unseparated - '2005-0101', '200501-01', - '20010101 12:3456', '20010101 1234:56', - # HHMMSS must have two digits in each component - # if unseparated - '20010101 1', '20010101 123', '20010101 12345', - '20010101 12345Z', - # wrong separator for HHMMSS - '2001-01-01 12-34-56'] - for date_str in invalid_cases: - with tm.assertRaises(ValueError): - tslib._test_parse_iso8601(date_str) - # If no ValueError raised, let me know which case failed. - raise Exception(date_str) - - -class TestArrayToDatetime(tm.TestCase): - - def test_parsing_valid_dates(self): - arr = np.array(['01-01-2013', '01-02-2013'], dtype=object) - self.assert_numpy_array_equal( - tslib.array_to_datetime(arr), - np_array_datetime64_compat( - [ - '2013-01-01T00:00:00.000000000-0000', - '2013-01-02T00:00:00.000000000-0000' - ], - dtype='M8[ns]' - ) - ) - - arr = np.array(['Mon Sep 16 2013', 'Tue Sep 17 2013'], dtype=object) - self.assert_numpy_array_equal( - tslib.array_to_datetime(arr), - np_array_datetime64_compat( - [ - '2013-09-16T00:00:00.000000000-0000', - '2013-09-17T00:00:00.000000000-0000' - ], - dtype='M8[ns]' - ) - ) - - def test_number_looking_strings_not_into_datetime(self): - # #4601 - # These strings don't look like datetimes so they shouldn't be - # attempted to be converted - arr = np.array(['-352.737091', '183.575577'], dtype=object) - self.assert_numpy_array_equal( - tslib.array_to_datetime(arr, errors='ignore'), arr) - - arr = np.array(['1', '2', '3', '4', '5'], dtype=object) - self.assert_numpy_array_equal( - tslib.array_to_datetime(arr, errors='ignore'), arr) - - def test_coercing_dates_outside_of_datetime64_ns_bounds(self): - invalid_dates = [ - datetime.date(1000, 1, 1), - datetime.datetime(1000, 1, 1), - '1000-01-01', - 'Jan 1, 1000', - np.datetime64('1000-01-01'), - ] - - for invalid_date in invalid_dates: - self.assertRaises(ValueError, - tslib.array_to_datetime, - np.array( - [invalid_date], dtype='object'), - errors='raise', ) - self.assert_numpy_array_equal( - tslib.array_to_datetime( - np.array([invalid_date], dtype='object'), - errors='coerce'), - np.array([tslib.iNaT], dtype='M8[ns]') - ) - - arr = np.array(['1/1/1000', '1/1/2000'], dtype=object) - self.assert_numpy_array_equal( - tslib.array_to_datetime(arr, errors='coerce'), - np_array_datetime64_compat( - [ - tslib.iNaT, - '2000-01-01T00:00:00.000000000-0000' - ], - dtype='M8[ns]' - ) - ) - - def test_coerce_of_invalid_datetimes(self): - arr = np.array(['01-01-2013', 'not_a_date', '1'], dtype=object) - - # Without coercing, the presence of any invalid dates prevents - # any values from being converted - self.assert_numpy_array_equal( - tslib.array_to_datetime(arr, errors='ignore'), arr) - - # With coercing, the invalid dates becomes iNaT - self.assert_numpy_array_equal( - tslib.array_to_datetime(arr, errors='coerce'), - np_array_datetime64_compat( - [ - '2013-01-01T00:00:00.000000000-0000', - tslib.iNaT, - tslib.iNaT - ], - dtype='M8[ns]' - ) - ) - - def test_parsing_timezone_offsets(self): - # All of these datetime strings with offsets are equivalent - # to the same datetime after the timezone offset is added - dt_strings = [ - '01-01-2013 08:00:00+08:00', - '2013-01-01T08:00:00.000000000+0800', - '2012-12-31T16:00:00.000000000-0800', - '12-31-2012 23:00:00-01:00' - ] - - expected_output = tslib.array_to_datetime(np.array( - ['01-01-2013 00:00:00'], dtype=object)) - - for dt_string in dt_strings: - self.assert_numpy_array_equal( - tslib.array_to_datetime( - np.array([dt_string], dtype=object) - ), - expected_output - ) - - -class TestTslib(tm.TestCase): - - def test_intraday_conversion_factors(self): - self.assertEqual(period_asfreq( - 1, get_freq('D'), get_freq('H'), False), 24) - self.assertEqual(period_asfreq( - 1, get_freq('D'), get_freq('T'), False), 1440) - self.assertEqual(period_asfreq( - 1, get_freq('D'), get_freq('S'), False), 86400) - self.assertEqual(period_asfreq(1, get_freq( - 'D'), get_freq('L'), False), 86400000) - self.assertEqual(period_asfreq(1, get_freq( - 'D'), get_freq('U'), False), 86400000000) - self.assertEqual(period_asfreq(1, get_freq( - 'D'), get_freq('N'), False), 86400000000000) - - self.assertEqual(period_asfreq( - 1, get_freq('H'), get_freq('T'), False), 60) - self.assertEqual(period_asfreq( - 1, get_freq('H'), get_freq('S'), False), 3600) - self.assertEqual(period_asfreq(1, get_freq('H'), - get_freq('L'), False), 3600000) - self.assertEqual(period_asfreq(1, get_freq( - 'H'), get_freq('U'), False), 3600000000) - self.assertEqual(period_asfreq(1, get_freq( - 'H'), get_freq('N'), False), 3600000000000) - - self.assertEqual(period_asfreq( - 1, get_freq('T'), get_freq('S'), False), 60) - self.assertEqual(period_asfreq( - 1, get_freq('T'), get_freq('L'), False), 60000) - self.assertEqual(period_asfreq(1, get_freq( - 'T'), get_freq('U'), False), 60000000) - self.assertEqual(period_asfreq(1, get_freq( - 'T'), get_freq('N'), False), 60000000000) - - self.assertEqual(period_asfreq( - 1, get_freq('S'), get_freq('L'), False), 1000) - self.assertEqual(period_asfreq(1, get_freq('S'), - get_freq('U'), False), 1000000) - self.assertEqual(period_asfreq(1, get_freq( - 'S'), get_freq('N'), False), 1000000000) - - self.assertEqual(period_asfreq( - 1, get_freq('L'), get_freq('U'), False), 1000) - self.assertEqual(period_asfreq(1, get_freq('L'), - get_freq('N'), False), 1000000) - - self.assertEqual(period_asfreq( - 1, get_freq('U'), get_freq('N'), False), 1000) - - def test_period_ordinal_start_values(self): - # information for 1.1.1970 - self.assertEqual(0, period_ordinal(1970, 1, 1, 0, 0, 0, 0, 0, - get_freq('A'))) - self.assertEqual(0, period_ordinal(1970, 1, 1, 0, 0, 0, 0, 0, - get_freq('M'))) - self.assertEqual(1, period_ordinal(1970, 1, 1, 0, 0, 0, 0, 0, - get_freq('W'))) - self.assertEqual(0, period_ordinal(1970, 1, 1, 0, 0, 0, 0, 0, - get_freq('D'))) - self.assertEqual(0, period_ordinal(1970, 1, 1, 0, 0, 0, 0, 0, - get_freq('B'))) - - def test_period_ordinal_week(self): - self.assertEqual(1, period_ordinal(1970, 1, 4, 0, 0, 0, 0, 0, - get_freq('W'))) - self.assertEqual(2, period_ordinal(1970, 1, 5, 0, 0, 0, 0, 0, - get_freq('W'))) - - self.assertEqual(2284, period_ordinal(2013, 10, 6, 0, 0, 0, 0, 0, - get_freq('W'))) - self.assertEqual(2285, period_ordinal(2013, 10, 7, 0, 0, 0, 0, 0, - get_freq('W'))) - - def test_period_ordinal_business_day(self): - # Thursday - self.assertEqual(11415, period_ordinal(2013, 10, 3, 0, 0, 0, 0, 0, - get_freq('B'))) - # Friday - self.assertEqual(11416, period_ordinal(2013, 10, 4, 0, 0, 0, 0, 0, - get_freq('B'))) - # Saturday - self.assertEqual(11417, period_ordinal(2013, 10, 5, 0, 0, 0, 0, 0, - get_freq('B'))) - # Sunday - self.assertEqual(11417, period_ordinal(2013, 10, 6, 0, 0, 0, 0, 0, - get_freq('B'))) - # Monday - self.assertEqual(11417, period_ordinal(2013, 10, 7, 0, 0, 0, 0, 0, - get_freq('B'))) - # Tuesday - self.assertEqual(11418, period_ordinal(2013, 10, 8, 0, 0, 0, 0, 0, - get_freq('B'))) - - def test_tslib_tz_convert(self): - def compare_utc_to_local(tz_didx, utc_didx): - f = lambda x: tslib.tz_convert_single(x, 'UTC', tz_didx.tz) - result = tslib.tz_convert(tz_didx.asi8, 'UTC', tz_didx.tz) - result_single = np.vectorize(f)(tz_didx.asi8) - self.assert_numpy_array_equal(result, result_single) - - def compare_local_to_utc(tz_didx, utc_didx): - f = lambda x: tslib.tz_convert_single(x, tz_didx.tz, 'UTC') - result = tslib.tz_convert(utc_didx.asi8, tz_didx.tz, 'UTC') - result_single = np.vectorize(f)(utc_didx.asi8) - self.assert_numpy_array_equal(result, result_single) - - for tz in ['UTC', 'Asia/Tokyo', 'US/Eastern', 'Europe/Moscow']: - # US: 2014-03-09 - 2014-11-11 - # MOSCOW: 2014-10-26 / 2014-12-31 - tz_didx = date_range('2014-03-01', '2015-01-10', freq='H', tz=tz) - utc_didx = date_range('2014-03-01', '2015-01-10', freq='H') - compare_utc_to_local(tz_didx, utc_didx) - # local tz to UTC can be differ in hourly (or higher) freqs because - # of DST - compare_local_to_utc(tz_didx, utc_didx) - - tz_didx = date_range('2000-01-01', '2020-01-01', freq='D', tz=tz) - utc_didx = date_range('2000-01-01', '2020-01-01', freq='D') - compare_utc_to_local(tz_didx, utc_didx) - compare_local_to_utc(tz_didx, utc_didx) - - tz_didx = date_range('2000-01-01', '2100-01-01', freq='A', tz=tz) - utc_didx = date_range('2000-01-01', '2100-01-01', freq='A') - compare_utc_to_local(tz_didx, utc_didx) - compare_local_to_utc(tz_didx, utc_didx) - - # Check empty array - result = tslib.tz_convert(np.array([], dtype=np.int64), - tslib.maybe_get_tz('US/Eastern'), - tslib.maybe_get_tz('Asia/Tokyo')) - self.assert_numpy_array_equal(result, np.array([], dtype=np.int64)) - - # Check all-NaT array - result = tslib.tz_convert(np.array([tslib.iNaT], dtype=np.int64), - tslib.maybe_get_tz('US/Eastern'), - tslib.maybe_get_tz('Asia/Tokyo')) - self.assert_numpy_array_equal(result, np.array( - [tslib.iNaT], dtype=np.int64)) - - def test_shift_months(self): - s = DatetimeIndex([Timestamp('2000-01-05 00:15:00'), Timestamp( - '2000-01-31 00:23:00'), Timestamp('2000-01-01'), Timestamp( - '2000-02-29'), Timestamp('2000-12-31')]) - for years in [-1, 0, 1]: - for months in [-2, 0, 2]: - actual = DatetimeIndex(tslib.shift_months(s.asi8, years * 12 + - months)) - expected = DatetimeIndex([x + offsets.DateOffset( - years=years, months=months) for x in s]) - tm.assert_index_equal(actual, expected) - - def test_round(self): - stamp = Timestamp('2000-01-05 05:09:15.13') - - def _check_round(freq, expected): - result = stamp.round(freq=freq) - self.assertEqual(result, expected) - - for freq, expected in [('D', Timestamp('2000-01-05 00:00:00')), - ('H', Timestamp('2000-01-05 05:00:00')), - ('S', Timestamp('2000-01-05 05:09:15'))]: - _check_round(freq, expected) - - msg = pd.tseries.frequencies._INVALID_FREQ_ERROR - with self.assertRaisesRegexp(ValueError, msg): - stamp.round('foo') diff --git a/pandas/tseries/tests/test_util.py b/pandas/tseries/tests/test_util.py deleted file mode 100644 index 3feffe924c291..0000000000000 --- a/pandas/tseries/tests/test_util.py +++ /dev/null @@ -1,126 +0,0 @@ -from pandas.compat import range - -import numpy as np - -from pandas import Series, date_range -import pandas.util.testing as tm - -from datetime import datetime, date - -from pandas.tseries.tools import normalize_date -from pandas.tseries.util import pivot_annual, isleapyear - - -class TestPivotAnnual(tm.TestCase): - """ - New pandas of scikits.timeseries pivot_annual - """ - - def test_daily(self): - rng = date_range('1/1/2000', '12/31/2004', freq='D') - ts = Series(np.random.randn(len(rng)), index=rng) - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - annual = pivot_annual(ts, 'D') - - doy = ts.index.dayofyear - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - doy[(~isleapyear(ts.index.year)) & (doy >= 60)] += 1 - - for i in range(1, 367): - subset = ts[doy == i] - subset.index = [x.year for x in subset.index] - - result = annual[i].dropna() - tm.assert_series_equal(result, subset, check_names=False) - self.assertEqual(result.name, i) - - # check leap days - leaps = ts[(ts.index.month == 2) & (ts.index.day == 29)] - day = leaps.index.dayofyear[0] - leaps.index = leaps.index.year - leaps.name = 60 - tm.assert_series_equal(annual[day].dropna(), leaps) - - def test_hourly(self): - rng_hourly = date_range('1/1/1994', periods=(18 * 8760 + 4 * 24), - freq='H') - data_hourly = np.random.randint(100, 350, rng_hourly.size) - ts_hourly = Series(data_hourly, index=rng_hourly) - - grouped = ts_hourly.groupby(ts_hourly.index.year) - hoy = grouped.apply(lambda x: x.reset_index(drop=True)) - hoy = hoy.index.droplevel(0).values - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - hoy[~isleapyear(ts_hourly.index.year) & (hoy >= 1416)] += 24 - hoy += 1 - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - annual = pivot_annual(ts_hourly) - - ts_hourly = ts_hourly.astype(float) - for i in [1, 1416, 1417, 1418, 1439, 1440, 1441, 8784]: - subset = ts_hourly[hoy == i] - subset.index = [x.year for x in subset.index] - - result = annual[i].dropna() - tm.assert_series_equal(result, subset, check_names=False) - self.assertEqual(result.name, i) - - leaps = ts_hourly[(ts_hourly.index.month == 2) & ( - ts_hourly.index.day == 29) & (ts_hourly.index.hour == 0)] - hour = leaps.index.dayofyear[0] * 24 - 23 - leaps.index = leaps.index.year - leaps.name = 1417 - tm.assert_series_equal(annual[hour].dropna(), leaps) - - def test_weekly(self): - pass - - def test_monthly(self): - rng = date_range('1/1/2000', '12/31/2004', freq='M') - ts = Series(np.random.randn(len(rng)), index=rng) - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - annual = pivot_annual(ts, 'M') - - month = ts.index.month - for i in range(1, 13): - subset = ts[month == i] - subset.index = [x.year for x in subset.index] - result = annual[i].dropna() - tm.assert_series_equal(result, subset, check_names=False) - self.assertEqual(result.name, i) - - def test_period_monthly(self): - pass - - def test_period_daily(self): - pass - - def test_period_weekly(self): - pass - - def test_isleapyear_deprecate(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - self.assertTrue(isleapyear(2000)) - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - self.assertFalse(isleapyear(2001)) - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - self.assertTrue(isleapyear(2004)) - - -def test_normalize_date(): - value = date(2012, 9, 7) - - result = normalize_date(value) - assert (result == datetime(2012, 9, 7)) - - value = datetime(2012, 9, 7, 12) - - result = normalize_date(value) - assert (result == datetime(2012, 9, 7)) From 6552a237837894f6f244b5fa022cae90343508cd Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 7 Feb 2017 17:58:45 -0500 Subject: [PATCH 018/933] TST: more tseries/tests reorg --- .../indexes/datetimes/test_date_range.py | 444 ++---------------- pandas/tests/indexes/datetimes/test_ops.py | 195 +++++++- pandas/tests/indexes/datetimes/test_setops.py | 224 +++++++++ pandas/tests/indexes/datetimes/test_tools.py | 108 +---- pandas/tools/tests/test_pivot.py | 106 ++++- setup.py | 2 + 6 files changed, 557 insertions(+), 522 deletions(-) diff --git a/pandas/tests/indexes/datetimes/test_date_range.py b/pandas/tests/indexes/datetimes/test_date_range.py index 8dab10269f76d..80664ce246bf8 100644 --- a/pandas/tests/indexes/datetimes/test_date_range.py +++ b/pandas/tests/indexes/datetimes/test_date_range.py @@ -1,22 +1,30 @@ +""" +test date_range, bdate_range, cdate_range +construction from the convenience range functions +""" + import numpy as np from datetime import datetime, timedelta, time import pandas as pd import pandas.util.testing as tm from pandas import compat -from pandas.core import common as com -from pandas.util.testing import assertRaisesRegexp from pandas.tseries.index import bdate_range, cdate_range -from pandas import date_range, offsets, DatetimeIndex, Timestamp, Index -from pandas.tseries.offsets import (generate_range, CDay, BDay, Minute, - BMonthEnd, DateOffset, MonthEnd) +from pandas import date_range, offsets, DatetimeIndex, Timestamp +from pandas.tseries.offsets import (generate_range, CDay, BDay, + DateOffset, MonthEnd) from pandas.tests.series.common import TestData START, END = datetime(2009, 1, 1), datetime(2010, 1, 1) -class TestTimeSeries(TestData, tm.TestCase): +def eq_gen_range(kwargs, expected): + rng = generate_range(**kwargs) + assert (np.array_equal(list(rng), expected)) + + +class TestDateRanges(TestData, tm.TestCase): def test_date_range_gen_error(self): rng = date_range('1/1/2000 00:00', '1/1/2000 00:18', freq='5min') @@ -137,11 +145,6 @@ def test_catch_infinite_loop(self): datetime(2011, 11, 12), freq=offset) -def eq_gen_range(kwargs, expected): - rng = generate_range(**kwargs) - assert (np.array_equal(list(rng), expected)) - - class TestGenRangeGeneration(tm.TestCase): def test_generate(self): @@ -191,7 +194,8 @@ def test_precision_finer_than_offset(self): self.assert_index_equal(result2, expected2) -class TestDateRange(tm.TestCase): +class TestBusinessDateRange(tm.TestCase): + def setUp(self): self.rng = bdate_range(START, END) @@ -206,28 +210,31 @@ def test_naive_aware_conflicts(self): naive = bdate_range(START, END, freq=BDay(), tz=None) aware = bdate_range(START, END, freq=BDay(), tz="Asia/Hong_Kong") - assertRaisesRegexp(TypeError, "tz-naive.*tz-aware", naive.join, aware) - assertRaisesRegexp(TypeError, "tz-naive.*tz-aware", aware.join, naive) + self.assertRaisesRegexp(TypeError, "tz-naive.*tz-aware", + naive.join, aware) + self.assertRaisesRegexp(TypeError, "tz-naive.*tz-aware", + aware.join, naive) def test_cached_range(self): DatetimeIndex._cached_range(START, END, offset=BDay()) DatetimeIndex._cached_range(START, periods=20, offset=BDay()) DatetimeIndex._cached_range(end=START, periods=20, offset=BDay()) - assertRaisesRegexp(TypeError, "offset", DatetimeIndex._cached_range, - START, END) + self.assertRaisesRegexp(TypeError, "offset", + DatetimeIndex._cached_range, + START, END) - assertRaisesRegexp(TypeError, "specify period", - DatetimeIndex._cached_range, START, - offset=BDay()) + self.assertRaisesRegexp(TypeError, "specify period", + DatetimeIndex._cached_range, START, + offset=BDay()) - assertRaisesRegexp(TypeError, "specify period", - DatetimeIndex._cached_range, end=END, - offset=BDay()) + self.assertRaisesRegexp(TypeError, "specify period", + DatetimeIndex._cached_range, end=END, + offset=BDay()) - assertRaisesRegexp(TypeError, "start or end", - DatetimeIndex._cached_range, periods=20, - offset=BDay()) + self.assertRaisesRegexp(TypeError, "start or end", + DatetimeIndex._cached_range, periods=20, + offset=BDay()) def test_cached_range_bug(self): rng = date_range('2010-09-01 05:00:00', periods=50, @@ -236,192 +243,16 @@ def test_cached_range_bug(self): self.assertEqual(rng[0], datetime(2010, 9, 1, 5)) def test_timezone_comparaison_bug(self): + # smoke test start = Timestamp('20130220 10:00', tz='US/Eastern') - try: - date_range(start, periods=2, tz='US/Eastern') - except AssertionError: - self.fail() + result = date_range(start, periods=2, tz='US/Eastern') + self.assertEqual(len(result), 2) def test_timezone_comparaison_assert(self): start = Timestamp('20130220 10:00', tz='US/Eastern') self.assertRaises(AssertionError, date_range, start, periods=2, tz='Europe/Berlin') - def test_comparison(self): - d = self.rng[10] - - comp = self.rng > d - self.assertTrue(comp[11]) - self.assertFalse(comp[9]) - - def test_copy(self): - cp = self.rng.copy() - repr(cp) - self.assert_index_equal(cp, self.rng) - - def test_repr(self): - # only really care that it works - repr(self.rng) - - def test_getitem(self): - smaller = self.rng[:5] - exp = DatetimeIndex(self.rng.view(np.ndarray)[:5]) - self.assert_index_equal(smaller, exp) - - self.assertEqual(smaller.offset, self.rng.offset) - - sliced = self.rng[::5] - self.assertEqual(sliced.offset, BDay() * 5) - - fancy_indexed = self.rng[[4, 3, 2, 1, 0]] - self.assertEqual(len(fancy_indexed), 5) - tm.assertIsInstance(fancy_indexed, DatetimeIndex) - self.assertIsNone(fancy_indexed.freq) - - # 32-bit vs. 64-bit platforms - self.assertEqual(self.rng[4], self.rng[np.int_(4)]) - - def test_getitem_matplotlib_hackaround(self): - values = self.rng[:, None] - expected = self.rng.values[:, None] - self.assert_numpy_array_equal(values, expected) - - def test_shift(self): - shifted = self.rng.shift(5) - self.assertEqual(shifted[0], self.rng[5]) - self.assertEqual(shifted.offset, self.rng.offset) - - shifted = self.rng.shift(-5) - self.assertEqual(shifted[5], self.rng[0]) - self.assertEqual(shifted.offset, self.rng.offset) - - shifted = self.rng.shift(0) - self.assertEqual(shifted[0], self.rng[0]) - self.assertEqual(shifted.offset, self.rng.offset) - - rng = date_range(START, END, freq=BMonthEnd()) - shifted = rng.shift(1, freq=BDay()) - self.assertEqual(shifted[0], rng[0] + BDay()) - - def test_pickle_unpickle(self): - unpickled = self.round_trip_pickle(self.rng) - self.assertIsNotNone(unpickled.offset) - - def test_union(self): - # overlapping - left = self.rng[:10] - right = self.rng[5:10] - - the_union = left.union(right) - tm.assertIsInstance(the_union, DatetimeIndex) - - # non-overlapping, gap in middle - left = self.rng[:5] - right = self.rng[10:] - - the_union = left.union(right) - tm.assertIsInstance(the_union, Index) - - # non-overlapping, no gap - left = self.rng[:5] - right = self.rng[5:10] - - the_union = left.union(right) - tm.assertIsInstance(the_union, DatetimeIndex) - - # order does not matter - tm.assert_index_equal(right.union(left), the_union) - - # overlapping, but different offset - rng = date_range(START, END, freq=BMonthEnd()) - - the_union = self.rng.union(rng) - tm.assertIsInstance(the_union, DatetimeIndex) - - def test_outer_join(self): - # should just behave as union - - # overlapping - left = self.rng[:10] - right = self.rng[5:10] - - the_join = left.join(right, how='outer') - tm.assertIsInstance(the_join, DatetimeIndex) - - # non-overlapping, gap in middle - left = self.rng[:5] - right = self.rng[10:] - - the_join = left.join(right, how='outer') - tm.assertIsInstance(the_join, DatetimeIndex) - self.assertIsNone(the_join.freq) - - # non-overlapping, no gap - left = self.rng[:5] - right = self.rng[5:10] - - the_join = left.join(right, how='outer') - tm.assertIsInstance(the_join, DatetimeIndex) - - # overlapping, but different offset - rng = date_range(START, END, freq=BMonthEnd()) - - the_join = self.rng.join(rng, how='outer') - tm.assertIsInstance(the_join, DatetimeIndex) - self.assertIsNone(the_join.freq) - - def test_union_not_cacheable(self): - rng = date_range('1/1/2000', periods=50, freq=Minute()) - rng1 = rng[10:] - rng2 = rng[:25] - the_union = rng1.union(rng2) - self.assert_index_equal(the_union, rng) - - rng1 = rng[10:] - rng2 = rng[15:35] - the_union = rng1.union(rng2) - expected = rng[10:] - self.assert_index_equal(the_union, expected) - - def test_intersection(self): - rng = date_range('1/1/2000', periods=50, freq=Minute()) - rng1 = rng[10:] - rng2 = rng[:25] - the_int = rng1.intersection(rng2) - expected = rng[10:25] - self.assert_index_equal(the_int, expected) - tm.assertIsInstance(the_int, DatetimeIndex) - self.assertEqual(the_int.offset, rng.offset) - - the_int = rng1.intersection(rng2.view(DatetimeIndex)) - self.assert_index_equal(the_int, expected) - - # non-overlapping - the_int = rng[:10].intersection(rng[10:]) - expected = DatetimeIndex([]) - self.assert_index_equal(the_int, expected) - - def test_intersection_bug(self): - # GH #771 - a = bdate_range('11/30/2011', '12/31/2011') - b = bdate_range('12/10/2011', '12/20/2011') - result = a.intersection(b) - self.assert_index_equal(result, b) - - def test_summary(self): - self.rng.summary() - self.rng[2:2].summary() - - def test_summary_pytz(self): - tm._skip_if_no_pytz() - import pytz - bdate_range('1/1/2005', '1/1/2009', tz=pytz.utc).summary() - - def test_summary_dateutil(self): - tm._skip_if_no_dateutil() - import dateutil - bdate_range('1/1/2005', '1/1/2009', tz=dateutil.tz.tzutc()).summary() - def test_misc(self): end = datetime(2009, 5, 13) dr = bdate_range(end=end, periods=20) @@ -443,26 +274,6 @@ def test_date_parse_failure(self): self.assertRaises(ValueError, bdate_range, badly_formed_date, badly_formed_date) - def test_equals(self): - self.assertFalse(self.rng.equals(list(self.rng))) - - def test_identical(self): - t1 = self.rng.copy() - t2 = self.rng.copy() - self.assertTrue(t1.identical(t2)) - - # name - t1 = t1.rename('foo') - self.assertTrue(t1.equals(t2)) - self.assertFalse(t1.identical(t2)) - t2 = t2.rename('foo') - self.assertTrue(t1.identical(t2)) - - # freq - t2v = Index(t2.values) - self.assertTrue(t1.equals(t2v)) - self.assertFalse(t1.identical(t2v)) - def test_daterange_bug_456(self): # GH #456 rng1 = bdate_range('12/5/2011', '12/5/2011') @@ -560,43 +371,6 @@ def test_range_tz_dateutil(self): self.assertTrue(dr[0] == start) self.assertTrue(dr[2] == end) - def test_month_range_union_tz_pytz(self): - tm._skip_if_no_pytz() - from pytz import timezone - tz = timezone('US/Eastern') - - early_start = datetime(2011, 1, 1) - early_end = datetime(2011, 3, 1) - - late_start = datetime(2011, 3, 1) - late_end = datetime(2011, 5, 1) - - early_dr = date_range(start=early_start, end=early_end, tz=tz, - freq=MonthEnd()) - late_dr = date_range(start=late_start, end=late_end, tz=tz, - freq=MonthEnd()) - - early_dr.union(late_dr) - - def test_month_range_union_tz_dateutil(self): - tm._skip_if_windows_python_3() - tm._skip_if_no_dateutil() - from pandas.tslib import _dateutil_gettz as timezone - tz = timezone('US/Eastern') - - early_start = datetime(2011, 1, 1) - early_end = datetime(2011, 3, 1) - - late_start = datetime(2011, 3, 1) - late_end = datetime(2011, 5, 1) - - early_dr = date_range(start=early_start, end=early_end, tz=tz, - freq=MonthEnd()) - late_dr = date_range(start=late_start, end=late_end, tz=tz, - freq=MonthEnd()) - - early_dr.union(late_dr) - def test_range_closed(self): begin = datetime(2011, 1, 1) end = datetime(2014, 1, 1) @@ -735,151 +509,6 @@ def test_cached_range(self): self.assertRaises(Exception, DatetimeIndex._cached_range, periods=20, freq=CDay()) - def test_comparison(self): - d = self.rng[10] - - comp = self.rng > d - self.assertTrue(comp[11]) - self.assertFalse(comp[9]) - - def test_copy(self): - cp = self.rng.copy() - repr(cp) - self.assert_index_equal(cp, self.rng) - - def test_repr(self): - # only really care that it works - repr(self.rng) - - def test_getitem(self): - smaller = self.rng[:5] - exp = DatetimeIndex(self.rng.view(np.ndarray)[:5]) - self.assert_index_equal(smaller, exp) - self.assertEqual(smaller.offset, self.rng.offset) - - sliced = self.rng[::5] - self.assertEqual(sliced.offset, CDay() * 5) - - fancy_indexed = self.rng[[4, 3, 2, 1, 0]] - self.assertEqual(len(fancy_indexed), 5) - tm.assertIsInstance(fancy_indexed, DatetimeIndex) - self.assertIsNone(fancy_indexed.freq) - - # 32-bit vs. 64-bit platforms - self.assertEqual(self.rng[4], self.rng[np.int_(4)]) - - def test_getitem_matplotlib_hackaround(self): - values = self.rng[:, None] - expected = self.rng.values[:, None] - self.assert_numpy_array_equal(values, expected) - - def test_shift(self): - - shifted = self.rng.shift(5) - self.assertEqual(shifted[0], self.rng[5]) - self.assertEqual(shifted.offset, self.rng.offset) - - shifted = self.rng.shift(-5) - self.assertEqual(shifted[5], self.rng[0]) - self.assertEqual(shifted.offset, self.rng.offset) - - shifted = self.rng.shift(0) - self.assertEqual(shifted[0], self.rng[0]) - self.assertEqual(shifted.offset, self.rng.offset) - - with tm.assert_produces_warning(com.PerformanceWarning): - rng = date_range(START, END, freq=BMonthEnd()) - shifted = rng.shift(1, freq=CDay()) - self.assertEqual(shifted[0], rng[0] + CDay()) - - def test_pickle_unpickle(self): - unpickled = self.round_trip_pickle(self.rng) - self.assertIsNotNone(unpickled.offset) - - def test_union(self): - # overlapping - left = self.rng[:10] - right = self.rng[5:10] - - the_union = left.union(right) - tm.assertIsInstance(the_union, DatetimeIndex) - - # non-overlapping, gap in middle - left = self.rng[:5] - right = self.rng[10:] - - the_union = left.union(right) - tm.assertIsInstance(the_union, Index) - - # non-overlapping, no gap - left = self.rng[:5] - right = self.rng[5:10] - - the_union = left.union(right) - tm.assertIsInstance(the_union, DatetimeIndex) - - # order does not matter - self.assert_index_equal(right.union(left), the_union) - - # overlapping, but different offset - rng = date_range(START, END, freq=BMonthEnd()) - - the_union = self.rng.union(rng) - tm.assertIsInstance(the_union, DatetimeIndex) - - def test_outer_join(self): - # should just behave as union - - # overlapping - left = self.rng[:10] - right = self.rng[5:10] - - the_join = left.join(right, how='outer') - tm.assertIsInstance(the_join, DatetimeIndex) - - # non-overlapping, gap in middle - left = self.rng[:5] - right = self.rng[10:] - - the_join = left.join(right, how='outer') - tm.assertIsInstance(the_join, DatetimeIndex) - self.assertIsNone(the_join.freq) - - # non-overlapping, no gap - left = self.rng[:5] - right = self.rng[5:10] - - the_join = left.join(right, how='outer') - tm.assertIsInstance(the_join, DatetimeIndex) - - # overlapping, but different offset - rng = date_range(START, END, freq=BMonthEnd()) - - the_join = self.rng.join(rng, how='outer') - tm.assertIsInstance(the_join, DatetimeIndex) - self.assertIsNone(the_join.freq) - - def test_intersection_bug(self): - # GH #771 - a = cdate_range('11/30/2011', '12/31/2011') - b = cdate_range('12/10/2011', '12/20/2011') - result = a.intersection(b) - self.assert_index_equal(result, b) - - def test_summary(self): - self.rng.summary() - self.rng[2:2].summary() - - def test_summary_pytz(self): - tm._skip_if_no_pytz() - import pytz - cdate_range('1/1/2005', '1/1/2009', tz=pytz.utc).summary() - - def test_summary_dateutil(self): - tm._skip_if_no_dateutil() - import dateutil - cdate_range('1/1/2005', '1/1/2009', tz=dateutil.tz.tzutc()).summary() - def test_misc(self): end = datetime(2009, 5, 13) dr = cdate_range(end=end, periods=20) @@ -901,9 +530,6 @@ def test_date_parse_failure(self): self.assertRaises(ValueError, cdate_range, badly_formed_date, badly_formed_date) - def test_equals(self): - self.assertFalse(self.rng.equals(list(self.rng))) - def test_daterange_bug_456(self): # GH #456 rng1 = cdate_range('12/5/2011', '12/5/2011') diff --git a/pandas/tests/indexes/datetimes/test_ops.py b/pandas/tests/indexes/datetimes/test_ops.py index c7cdcd9318a0e..63bf07ec041d3 100644 --- a/pandas/tests/indexes/datetimes/test_ops.py +++ b/pandas/tests/indexes/datetimes/test_ops.py @@ -6,13 +6,17 @@ import pandas.tslib as tslib import pandas.util.testing as tm from pandas.core.common import PerformanceWarning +from pandas.tseries.index import cdate_range from pandas import (DatetimeIndex, PeriodIndex, Series, Timestamp, Timedelta, date_range, TimedeltaIndex, _np_version_under1p10, Index, - datetime, Float64Index, offsets) - + datetime, Float64Index, offsets, bdate_range) +from pandas.tseries.offsets import BMonthEnd, CDay, BDay from pandas.tests.test_base import Ops +START, END = datetime(2009, 1, 1), datetime(2010, 1, 1) + + class TestDatetimeIndexOps(Ops): tz = [None, 'UTC', 'Asia/Tokyo', 'US/Eastern', 'dateutil/Asia/Singapore', 'dateutil/US/Pacific'] @@ -1071,9 +1075,6 @@ def test_datetime64_with_DateOffset(self): assert_func(klass([x - op for x in s]), s - op) assert_func(klass([op + x for x in s]), op + s) - -class TestTslib(tm.TestCase): - def test_shift_months(self): s = DatetimeIndex([Timestamp('2000-01-05 00:15:00'), Timestamp( '2000-01-31 00:23:00'), Timestamp('2000-01-01'), Timestamp( @@ -1085,3 +1086,187 @@ def test_shift_months(self): expected = DatetimeIndex([x + offsets.DateOffset( years=years, months=months) for x in s]) tm.assert_index_equal(actual, expected) + + +class TestBusinessDatetimeIndex(tm.TestCase): + + def setUp(self): + self.rng = bdate_range(START, END) + + def test_comparison(self): + d = self.rng[10] + + comp = self.rng > d + self.assertTrue(comp[11]) + self.assertFalse(comp[9]) + + def test_pickle_unpickle(self): + unpickled = self.round_trip_pickle(self.rng) + self.assertIsNotNone(unpickled.offset) + + def test_copy(self): + cp = self.rng.copy() + repr(cp) + self.assert_index_equal(cp, self.rng) + + def test_repr(self): + # only really care that it works + repr(self.rng) + + def test_getitem(self): + smaller = self.rng[:5] + exp = DatetimeIndex(self.rng.view(np.ndarray)[:5]) + self.assert_index_equal(smaller, exp) + + self.assertEqual(smaller.offset, self.rng.offset) + + sliced = self.rng[::5] + self.assertEqual(sliced.offset, BDay() * 5) + + fancy_indexed = self.rng[[4, 3, 2, 1, 0]] + self.assertEqual(len(fancy_indexed), 5) + tm.assertIsInstance(fancy_indexed, DatetimeIndex) + self.assertIsNone(fancy_indexed.freq) + + # 32-bit vs. 64-bit platforms + self.assertEqual(self.rng[4], self.rng[np.int_(4)]) + + def test_getitem_matplotlib_hackaround(self): + values = self.rng[:, None] + expected = self.rng.values[:, None] + self.assert_numpy_array_equal(values, expected) + + def test_shift(self): + shifted = self.rng.shift(5) + self.assertEqual(shifted[0], self.rng[5]) + self.assertEqual(shifted.offset, self.rng.offset) + + shifted = self.rng.shift(-5) + self.assertEqual(shifted[5], self.rng[0]) + self.assertEqual(shifted.offset, self.rng.offset) + + shifted = self.rng.shift(0) + self.assertEqual(shifted[0], self.rng[0]) + self.assertEqual(shifted.offset, self.rng.offset) + + rng = date_range(START, END, freq=BMonthEnd()) + shifted = rng.shift(1, freq=BDay()) + self.assertEqual(shifted[0], rng[0] + BDay()) + + def test_summary(self): + self.rng.summary() + self.rng[2:2].summary() + + def test_summary_pytz(self): + tm._skip_if_no_pytz() + import pytz + bdate_range('1/1/2005', '1/1/2009', tz=pytz.utc).summary() + + def test_summary_dateutil(self): + tm._skip_if_no_dateutil() + import dateutil + bdate_range('1/1/2005', '1/1/2009', tz=dateutil.tz.tzutc()).summary() + + def test_equals(self): + self.assertFalse(self.rng.equals(list(self.rng))) + + def test_identical(self): + t1 = self.rng.copy() + t2 = self.rng.copy() + self.assertTrue(t1.identical(t2)) + + # name + t1 = t1.rename('foo') + self.assertTrue(t1.equals(t2)) + self.assertFalse(t1.identical(t2)) + t2 = t2.rename('foo') + self.assertTrue(t1.identical(t2)) + + # freq + t2v = Index(t2.values) + self.assertTrue(t1.equals(t2v)) + self.assertFalse(t1.identical(t2v)) + + +class TestCustomDatetimeIndex(tm.TestCase): + + def setUp(self): + self.rng = cdate_range(START, END) + + def test_comparison(self): + d = self.rng[10] + + comp = self.rng > d + self.assertTrue(comp[11]) + self.assertFalse(comp[9]) + + def test_copy(self): + cp = self.rng.copy() + repr(cp) + self.assert_index_equal(cp, self.rng) + + def test_repr(self): + # only really care that it works + repr(self.rng) + + def test_getitem(self): + smaller = self.rng[:5] + exp = DatetimeIndex(self.rng.view(np.ndarray)[:5]) + self.assert_index_equal(smaller, exp) + self.assertEqual(smaller.offset, self.rng.offset) + + sliced = self.rng[::5] + self.assertEqual(sliced.offset, CDay() * 5) + + fancy_indexed = self.rng[[4, 3, 2, 1, 0]] + self.assertEqual(len(fancy_indexed), 5) + tm.assertIsInstance(fancy_indexed, DatetimeIndex) + self.assertIsNone(fancy_indexed.freq) + + # 32-bit vs. 64-bit platforms + self.assertEqual(self.rng[4], self.rng[np.int_(4)]) + + def test_getitem_matplotlib_hackaround(self): + values = self.rng[:, None] + expected = self.rng.values[:, None] + self.assert_numpy_array_equal(values, expected) + + def test_shift(self): + + shifted = self.rng.shift(5) + self.assertEqual(shifted[0], self.rng[5]) + self.assertEqual(shifted.offset, self.rng.offset) + + shifted = self.rng.shift(-5) + self.assertEqual(shifted[5], self.rng[0]) + self.assertEqual(shifted.offset, self.rng.offset) + + shifted = self.rng.shift(0) + self.assertEqual(shifted[0], self.rng[0]) + self.assertEqual(shifted.offset, self.rng.offset) + + with tm.assert_produces_warning(PerformanceWarning): + rng = date_range(START, END, freq=BMonthEnd()) + shifted = rng.shift(1, freq=CDay()) + self.assertEqual(shifted[0], rng[0] + CDay()) + + def test_pickle_unpickle(self): + unpickled = self.round_trip_pickle(self.rng) + self.assertIsNotNone(unpickled.offset) + + def test_summary(self): + self.rng.summary() + self.rng[2:2].summary() + + def test_summary_pytz(self): + tm._skip_if_no_pytz() + import pytz + cdate_range('1/1/2005', '1/1/2009', tz=pytz.utc).summary() + + def test_summary_dateutil(self): + tm._skip_if_no_dateutil() + import dateutil + cdate_range('1/1/2005', '1/1/2009', tz=dateutil.tz.tzutc()).summary() + + def test_equals(self): + self.assertFalse(self.rng.equals(list(self.rng))) diff --git a/pandas/tests/indexes/datetimes/test_setops.py b/pandas/tests/indexes/datetimes/test_setops.py index 7777de869bb20..7da660a956e23 100644 --- a/pandas/tests/indexes/datetimes/test_setops.py +++ b/pandas/tests/indexes/datetimes/test_setops.py @@ -4,8 +4,12 @@ import pandas as pd import pandas.util.testing as tm +from pandas.tseries.index import cdate_range from pandas import (DatetimeIndex, date_range, Series, bdate_range, DataFrame, Int64Index, Index) +from pandas.tseries.offsets import Minute, BMonthEnd, MonthEnd + +START, END = datetime(2009, 1, 1), datetime(2010, 1, 1) class TestDatetimeIndex(tm.TestCase): @@ -185,3 +189,223 @@ def test_datetimeindex_union_join_empty(self): result = dti.join(empty) tm.assertIsInstance(result, DatetimeIndex) + + +class TestBusinessDatetimeIndex(tm.TestCase): + + def setUp(self): + self.rng = bdate_range(START, END) + + def test_union(self): + # overlapping + left = self.rng[:10] + right = self.rng[5:10] + + the_union = left.union(right) + tm.assertIsInstance(the_union, DatetimeIndex) + + # non-overlapping, gap in middle + left = self.rng[:5] + right = self.rng[10:] + + the_union = left.union(right) + tm.assertIsInstance(the_union, Index) + + # non-overlapping, no gap + left = self.rng[:5] + right = self.rng[5:10] + + the_union = left.union(right) + tm.assertIsInstance(the_union, DatetimeIndex) + + # order does not matter + tm.assert_index_equal(right.union(left), the_union) + + # overlapping, but different offset + rng = date_range(START, END, freq=BMonthEnd()) + + the_union = self.rng.union(rng) + tm.assertIsInstance(the_union, DatetimeIndex) + + def test_outer_join(self): + # should just behave as union + + # overlapping + left = self.rng[:10] + right = self.rng[5:10] + + the_join = left.join(right, how='outer') + tm.assertIsInstance(the_join, DatetimeIndex) + + # non-overlapping, gap in middle + left = self.rng[:5] + right = self.rng[10:] + + the_join = left.join(right, how='outer') + tm.assertIsInstance(the_join, DatetimeIndex) + self.assertIsNone(the_join.freq) + + # non-overlapping, no gap + left = self.rng[:5] + right = self.rng[5:10] + + the_join = left.join(right, how='outer') + tm.assertIsInstance(the_join, DatetimeIndex) + + # overlapping, but different offset + rng = date_range(START, END, freq=BMonthEnd()) + + the_join = self.rng.join(rng, how='outer') + tm.assertIsInstance(the_join, DatetimeIndex) + self.assertIsNone(the_join.freq) + + def test_union_not_cacheable(self): + rng = date_range('1/1/2000', periods=50, freq=Minute()) + rng1 = rng[10:] + rng2 = rng[:25] + the_union = rng1.union(rng2) + self.assert_index_equal(the_union, rng) + + rng1 = rng[10:] + rng2 = rng[15:35] + the_union = rng1.union(rng2) + expected = rng[10:] + self.assert_index_equal(the_union, expected) + + def test_intersection(self): + rng = date_range('1/1/2000', periods=50, freq=Minute()) + rng1 = rng[10:] + rng2 = rng[:25] + the_int = rng1.intersection(rng2) + expected = rng[10:25] + self.assert_index_equal(the_int, expected) + tm.assertIsInstance(the_int, DatetimeIndex) + self.assertEqual(the_int.offset, rng.offset) + + the_int = rng1.intersection(rng2.view(DatetimeIndex)) + self.assert_index_equal(the_int, expected) + + # non-overlapping + the_int = rng[:10].intersection(rng[10:]) + expected = DatetimeIndex([]) + self.assert_index_equal(the_int, expected) + + def test_intersection_bug(self): + # GH #771 + a = bdate_range('11/30/2011', '12/31/2011') + b = bdate_range('12/10/2011', '12/20/2011') + result = a.intersection(b) + self.assert_index_equal(result, b) + + def test_month_range_union_tz_pytz(self): + tm._skip_if_no_pytz() + from pytz import timezone + tz = timezone('US/Eastern') + + early_start = datetime(2011, 1, 1) + early_end = datetime(2011, 3, 1) + + late_start = datetime(2011, 3, 1) + late_end = datetime(2011, 5, 1) + + early_dr = date_range(start=early_start, end=early_end, tz=tz, + freq=MonthEnd()) + late_dr = date_range(start=late_start, end=late_end, tz=tz, + freq=MonthEnd()) + + early_dr.union(late_dr) + + def test_month_range_union_tz_dateutil(self): + tm._skip_if_windows_python_3() + tm._skip_if_no_dateutil() + from pandas.tslib import _dateutil_gettz as timezone + tz = timezone('US/Eastern') + + early_start = datetime(2011, 1, 1) + early_end = datetime(2011, 3, 1) + + late_start = datetime(2011, 3, 1) + late_end = datetime(2011, 5, 1) + + early_dr = date_range(start=early_start, end=early_end, tz=tz, + freq=MonthEnd()) + late_dr = date_range(start=late_start, end=late_end, tz=tz, + freq=MonthEnd()) + + early_dr.union(late_dr) + + +class TestCustomDatetimeIndex(tm.TestCase): + + def setUp(self): + self.rng = cdate_range(START, END) + + def test_union(self): + # overlapping + left = self.rng[:10] + right = self.rng[5:10] + + the_union = left.union(right) + tm.assertIsInstance(the_union, DatetimeIndex) + + # non-overlapping, gap in middle + left = self.rng[:5] + right = self.rng[10:] + + the_union = left.union(right) + tm.assertIsInstance(the_union, Index) + + # non-overlapping, no gap + left = self.rng[:5] + right = self.rng[5:10] + + the_union = left.union(right) + tm.assertIsInstance(the_union, DatetimeIndex) + + # order does not matter + self.assert_index_equal(right.union(left), the_union) + + # overlapping, but different offset + rng = date_range(START, END, freq=BMonthEnd()) + + the_union = self.rng.union(rng) + tm.assertIsInstance(the_union, DatetimeIndex) + + def test_outer_join(self): + # should just behave as union + + # overlapping + left = self.rng[:10] + right = self.rng[5:10] + + the_join = left.join(right, how='outer') + tm.assertIsInstance(the_join, DatetimeIndex) + + # non-overlapping, gap in middle + left = self.rng[:5] + right = self.rng[10:] + + the_join = left.join(right, how='outer') + tm.assertIsInstance(the_join, DatetimeIndex) + self.assertIsNone(the_join.freq) + + # non-overlapping, no gap + left = self.rng[:5] + right = self.rng[5:10] + + the_join = left.join(right, how='outer') + tm.assertIsInstance(the_join, DatetimeIndex) + + # overlapping, but different offset + rng = date_range(START, END, freq=BMonthEnd()) + + the_join = self.rng.join(rng, how='outer') + tm.assertIsInstance(the_join, DatetimeIndex) + self.assertIsNone(the_join.freq) + + def test_intersection_bug(self): + # GH #771 + a = cdate_range('11/30/2011', '12/31/2011') + b = cdate_range('12/10/2011', '12/20/2011') + result = a.intersection(b) + self.assert_index_equal(result, b) diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index bf1f82b90d5d6..af749963146c6 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -12,7 +12,6 @@ from pandas import tslib from pandas.tseries import tools from pandas.tseries.tools import normalize_date -from pandas.tseries.util import pivot_annual, isleapyear from pandas.compat import lmap from pandas.compat.numpy import np_array_datetime64_compat from pandas.types.common import is_datetime64_ns_dtype @@ -1382,7 +1381,7 @@ def test_parsers_iso8601(self): raise Exception(date_str) -class TestTsUtil(tm.TestCase): +class TestArrayToDatetime(tm.TestCase): def test_try_parse_dates(self): from dateutil.parser import parse @@ -1392,8 +1391,6 @@ def test_try_parse_dates(self): expected = [parse(d, dayfirst=True) for d in arr] self.assertTrue(np.array_equal(result, expected)) - -class TestArrayToDatetime(tm.TestCase): def test_parsing_valid_dates(self): arr = np.array(['01-01-2013', '01-02-2013'], dtype=object) self.assert_numpy_array_equal( @@ -1508,109 +1505,6 @@ def test_coerce_of_invalid_datetimes(self): ) -class TestPivotAnnual(tm.TestCase): - """ - New pandas of scikits.timeseries pivot_annual - """ - - def test_daily(self): - rng = date_range('1/1/2000', '12/31/2004', freq='D') - ts = Series(np.random.randn(len(rng)), index=rng) - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - annual = pivot_annual(ts, 'D') - - doy = ts.index.dayofyear - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - doy[(~isleapyear(ts.index.year)) & (doy >= 60)] += 1 - - for i in range(1, 367): - subset = ts[doy == i] - subset.index = [x.year for x in subset.index] - - result = annual[i].dropna() - tm.assert_series_equal(result, subset, check_names=False) - self.assertEqual(result.name, i) - - # check leap days - leaps = ts[(ts.index.month == 2) & (ts.index.day == 29)] - day = leaps.index.dayofyear[0] - leaps.index = leaps.index.year - leaps.name = 60 - tm.assert_series_equal(annual[day].dropna(), leaps) - - def test_hourly(self): - rng_hourly = date_range('1/1/1994', periods=(18 * 8760 + 4 * 24), - freq='H') - data_hourly = np.random.randint(100, 350, rng_hourly.size) - ts_hourly = Series(data_hourly, index=rng_hourly) - - grouped = ts_hourly.groupby(ts_hourly.index.year) - hoy = grouped.apply(lambda x: x.reset_index(drop=True)) - hoy = hoy.index.droplevel(0).values - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - hoy[~isleapyear(ts_hourly.index.year) & (hoy >= 1416)] += 24 - hoy += 1 - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - annual = pivot_annual(ts_hourly) - - ts_hourly = ts_hourly.astype(float) - for i in [1, 1416, 1417, 1418, 1439, 1440, 1441, 8784]: - subset = ts_hourly[hoy == i] - subset.index = [x.year for x in subset.index] - - result = annual[i].dropna() - tm.assert_series_equal(result, subset, check_names=False) - self.assertEqual(result.name, i) - - leaps = ts_hourly[(ts_hourly.index.month == 2) & ( - ts_hourly.index.day == 29) & (ts_hourly.index.hour == 0)] - hour = leaps.index.dayofyear[0] * 24 - 23 - leaps.index = leaps.index.year - leaps.name = 1417 - tm.assert_series_equal(annual[hour].dropna(), leaps) - - def test_weekly(self): - pass - - def test_monthly(self): - rng = date_range('1/1/2000', '12/31/2004', freq='M') - ts = Series(np.random.randn(len(rng)), index=rng) - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - annual = pivot_annual(ts, 'M') - - month = ts.index.month - for i in range(1, 13): - subset = ts[month == i] - subset.index = [x.year for x in subset.index] - result = annual[i].dropna() - tm.assert_series_equal(result, subset, check_names=False) - self.assertEqual(result.name, i) - - def test_period_monthly(self): - pass - - def test_period_daily(self): - pass - - def test_period_weekly(self): - pass - - def test_isleapyear_deprecate(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - self.assertTrue(isleapyear(2000)) - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - self.assertFalse(isleapyear(2001)) - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - self.assertTrue(isleapyear(2004)) - - def test_normalize_date(): value = date(2012, 9, 7) diff --git a/pandas/tools/tests/test_pivot.py b/pandas/tools/tests/test_pivot.py index 40b46c5413c8f..7f2bb7e724362 100644 --- a/pandas/tools/tests/test_pivot.py +++ b/pandas/tools/tests/test_pivot.py @@ -3,11 +3,12 @@ import numpy as np import pandas as pd -from pandas import DataFrame, Series, Index, MultiIndex, Grouper +from pandas import DataFrame, Series, Index, MultiIndex, Grouper, date_range from pandas.tools.merge import concat from pandas.tools.pivot import pivot_table, crosstab from pandas.compat import range, product import pandas.util.testing as tm +from pandas.tseries.util import pivot_annual, isleapyear class TestPivotTable(tm.TestCase): @@ -1319,3 +1320,106 @@ def test_crosstab_with_numpy_size(self): index=expected_index, columns=expected_column) tm.assert_frame_equal(result, expected) + + +class TestPivotAnnual(tm.TestCase): + """ + New pandas of scikits.timeseries pivot_annual + """ + + def test_daily(self): + rng = date_range('1/1/2000', '12/31/2004', freq='D') + ts = Series(np.random.randn(len(rng)), index=rng) + + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + annual = pivot_annual(ts, 'D') + + doy = ts.index.dayofyear + + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + doy[(~isleapyear(ts.index.year)) & (doy >= 60)] += 1 + + for i in range(1, 367): + subset = ts[doy == i] + subset.index = [x.year for x in subset.index] + + result = annual[i].dropna() + tm.assert_series_equal(result, subset, check_names=False) + self.assertEqual(result.name, i) + + # check leap days + leaps = ts[(ts.index.month == 2) & (ts.index.day == 29)] + day = leaps.index.dayofyear[0] + leaps.index = leaps.index.year + leaps.name = 60 + tm.assert_series_equal(annual[day].dropna(), leaps) + + def test_hourly(self): + rng_hourly = date_range('1/1/1994', periods=(18 * 8760 + 4 * 24), + freq='H') + data_hourly = np.random.randint(100, 350, rng_hourly.size) + ts_hourly = Series(data_hourly, index=rng_hourly) + + grouped = ts_hourly.groupby(ts_hourly.index.year) + hoy = grouped.apply(lambda x: x.reset_index(drop=True)) + hoy = hoy.index.droplevel(0).values + + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + hoy[~isleapyear(ts_hourly.index.year) & (hoy >= 1416)] += 24 + hoy += 1 + + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + annual = pivot_annual(ts_hourly) + + ts_hourly = ts_hourly.astype(float) + for i in [1, 1416, 1417, 1418, 1439, 1440, 1441, 8784]: + subset = ts_hourly[hoy == i] + subset.index = [x.year for x in subset.index] + + result = annual[i].dropna() + tm.assert_series_equal(result, subset, check_names=False) + self.assertEqual(result.name, i) + + leaps = ts_hourly[(ts_hourly.index.month == 2) & ( + ts_hourly.index.day == 29) & (ts_hourly.index.hour == 0)] + hour = leaps.index.dayofyear[0] * 24 - 23 + leaps.index = leaps.index.year + leaps.name = 1417 + tm.assert_series_equal(annual[hour].dropna(), leaps) + + def test_weekly(self): + pass + + def test_monthly(self): + rng = date_range('1/1/2000', '12/31/2004', freq='M') + ts = Series(np.random.randn(len(rng)), index=rng) + + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + annual = pivot_annual(ts, 'M') + + month = ts.index.month + for i in range(1, 13): + subset = ts[month == i] + subset.index = [x.year for x in subset.index] + result = annual[i].dropna() + tm.assert_series_equal(result, subset, check_names=False) + self.assertEqual(result.name, i) + + def test_period_monthly(self): + pass + + def test_period_daily(self): + pass + + def test_period_weekly(self): + pass + + def test_isleapyear_deprecate(self): + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + self.assertTrue(isleapyear(2000)) + + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + self.assertFalse(isleapyear(2001)) + + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + self.assertTrue(isleapyear(2004)) diff --git a/setup.py b/setup.py index 4d6bb76fd6b7c..3c2617da18eae 100755 --- a/setup.py +++ b/setup.py @@ -642,6 +642,8 @@ def pxd(name): 'pandas.tests.frame', 'pandas.tests.indexes', 'pandas.tests.indexes.datetimes', + 'pandas.tests.indexes.timedeltas', + 'pandas.tests.indexes.period', 'pandas.tests.groupby', 'pandas.tests.series', 'pandas.tests.formats', From 153da508a536a3ef203d9cd315c67b5fd3022a51 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 7 Feb 2017 20:24:09 -0500 Subject: [PATCH 019/933] TST/CLN: reorg groupby tests (#15336) --- pandas/tests/groupby/common.py | 52 + pandas/tests/groupby/test_aggregate.py | 336 ++++- pandas/tests/groupby/test_categorical.py | 291 ++-- pandas/tests/groupby/test_groupby.py | 1670 +--------------------- pandas/tests/groupby/test_misc.py | 101 ++ pandas/tests/groupby/test_timegrouper.py | 609 ++++++++ pandas/tests/groupby/test_transform.py | 494 +++++++ 7 files changed, 1731 insertions(+), 1822 deletions(-) create mode 100644 pandas/tests/groupby/common.py create mode 100644 pandas/tests/groupby/test_misc.py create mode 100644 pandas/tests/groupby/test_timegrouper.py create mode 100644 pandas/tests/groupby/test_transform.py diff --git a/pandas/tests/groupby/common.py b/pandas/tests/groupby/common.py new file mode 100644 index 0000000000000..8a70777d08682 --- /dev/null +++ b/pandas/tests/groupby/common.py @@ -0,0 +1,52 @@ +""" Base setup """ + +import numpy as np +from pandas.util import testing as tm +from pandas import DataFrame, MultiIndex + + +class MixIn(object): + + def setUp(self): + self.ts = tm.makeTimeSeries() + + self.seriesd = tm.getSeriesData() + self.tsd = tm.getTimeSeriesData() + self.frame = DataFrame(self.seriesd) + self.tsframe = DataFrame(self.tsd) + + self.df = DataFrame( + {'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'], + 'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'], + 'C': np.random.randn(8), + 'D': np.random.randn(8)}) + + self.df_mixed_floats = DataFrame( + {'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'], + 'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'], + 'C': np.random.randn(8), + 'D': np.array( + np.random.randn(8), dtype='float32')}) + + index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two', + 'three']], + labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], + [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], + names=['first', 'second']) + self.mframe = DataFrame(np.random.randn(10, 3), index=index, + columns=['A', 'B', 'C']) + + self.three_group = DataFrame( + {'A': ['foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'bar', + 'foo', 'foo', 'foo'], + 'B': ['one', 'one', 'one', 'two', 'one', 'one', 'one', 'two', + 'two', 'two', 'one'], + 'C': ['dull', 'dull', 'shiny', 'dull', 'dull', 'shiny', 'shiny', + 'dull', 'shiny', 'shiny', 'shiny'], + 'D': np.random.randn(11), + 'E': np.random.randn(11), + 'F': np.random.randn(11)}) + + +def assert_fp_equal(a, b): + assert (np.abs(a - b) < 1e-12).all() diff --git a/pandas/tests/groupby/test_aggregate.py b/pandas/tests/groupby/test_aggregate.py index 00ddd293f6014..a1fc97eb8d780 100644 --- a/pandas/tests/groupby/test_aggregate.py +++ b/pandas/tests/groupby/test_aggregate.py @@ -1,28 +1,25 @@ # -*- coding: utf-8 -*- -from __future__ import print_function -from datetime import datetime - - -from pandas import date_range -from pandas.core.index import MultiIndex -from pandas.core.api import DataFrame -from pandas.core.series import Series - -from pandas.util.testing import (assert_frame_equal, assert_series_equal - ) - -from pandas.core.groupby import (SpecificationError) -from pandas.compat import (lmap, OrderedDict) -from pandas.formats.printing import pprint_thing +""" +we test .agg behavior / note that .apply is tested +generally in test_groupby.py +""" -from pandas import compat +from __future__ import print_function +from datetime import datetime +from functools import partial -import pandas.core.common as com import numpy as np +from numpy import nan +import pandas as pd +from pandas import (date_range, MultiIndex, DataFrame, + Series, Index, bdate_range) +from pandas.util.testing import assert_frame_equal, assert_series_equal +from pandas.core.groupby import SpecificationError, DataError +from pandas.compat import OrderedDict +from pandas.formats.printing import pprint_thing import pandas.util.testing as tm -import pandas as pd class TestGroupByAggregate(tm.TestCase): @@ -452,35 +449,292 @@ def bad(x): expected = data.groupby(['A', 'B']).agg(lambda x: 'foo') assert_frame_equal(result, expected) + def test_cythonized_aggers(self): + data = {'A': [0, 0, 0, 0, 1, 1, 1, 1, 1, 1., nan, nan], + 'B': ['A', 'B'] * 6, + 'C': np.random.randn(12)} + df = DataFrame(data) + df.loc[2:10:2, 'C'] = nan + + def _testit(name): + + op = lambda x: getattr(x, name)() + + # single column + grouped = df.drop(['B'], axis=1).groupby('A') + exp = {} + for cat, group in grouped: + exp[cat] = op(group['C']) + exp = DataFrame({'C': exp}) + exp.index.name = 'A' + result = op(grouped) + assert_frame_equal(result, exp) + + # multiple columns + grouped = df.groupby(['A', 'B']) + expd = {} + for (cat1, cat2), group in grouped: + expd.setdefault(cat1, {})[cat2] = op(group['C']) + exp = DataFrame(expd).T.stack(dropna=False) + exp.index.names = ['A', 'B'] + exp.name = 'C' + + result = op(grouped)['C'] + if not tm._incompat_bottleneck_version(name): + assert_series_equal(result, exp) + + _testit('count') + _testit('sum') + _testit('std') + _testit('var') + _testit('sem') + _testit('mean') + _testit('median') + _testit('prod') + _testit('min') + _testit('max') + + def test_cython_agg_boolean(self): + frame = DataFrame({'a': np.random.randint(0, 5, 50), + 'b': np.random.randint(0, 2, 50).astype('bool')}) + result = frame.groupby('a')['b'].mean() + expected = frame.groupby('a')['b'].agg(np.mean) -def assert_fp_equal(a, b): - assert (np.abs(a - b) < 1e-12).all() + assert_series_equal(result, expected) + def test_cython_agg_nothing_to_agg(self): + frame = DataFrame({'a': np.random.randint(0, 5, 50), + 'b': ['foo', 'bar'] * 25}) + self.assertRaises(DataError, frame.groupby('a')['b'].mean) + + frame = DataFrame({'a': np.random.randint(0, 5, 50), + 'b': ['foo', 'bar'] * 25}) + self.assertRaises(DataError, frame[['b']].groupby(frame['a']).mean) + + def test_cython_agg_nothing_to_agg_with_dates(self): + frame = DataFrame({'a': np.random.randint(0, 5, 50), + 'b': ['foo', 'bar'] * 25, + 'dates': pd.date_range('now', periods=50, + freq='T')}) + with tm.assertRaisesRegexp(DataError, "No numeric types to aggregate"): + frame.groupby('b').dates.mean() + + def test_cython_agg_frame_columns(self): + # #2113 + df = DataFrame({'x': [1, 2, 3], 'y': [3, 4, 5]}) + + df.groupby(level=0, axis='columns').mean() + df.groupby(level=0, axis='columns').mean() + df.groupby(level=0, axis='columns').mean() + df.groupby(level=0, axis='columns').mean() + + def test_cython_fail_agg(self): + dr = bdate_range('1/1/2000', periods=50) + ts = Series(['A', 'B', 'C', 'D', 'E'] * 10, index=dr) + + grouped = ts.groupby(lambda x: x.month) + summed = grouped.sum() + expected = grouped.agg(np.sum) + assert_series_equal(summed, expected) + + def test_agg_consistency(self): + # agg with ([]) and () not consistent + # GH 6715 + + def P1(a): + try: + return np.percentile(a.dropna(), q=1) + except: + return np.nan + + import datetime as dt + df = DataFrame({'col1': [1, 2, 3, 4], + 'col2': [10, 25, 26, 31], + 'date': [dt.date(2013, 2, 10), dt.date(2013, 2, 10), + dt.date(2013, 2, 11), dt.date(2013, 2, 11)]}) + + g = df.groupby('date') + + expected = g.agg([P1]) + expected.columns = expected.columns.levels[0] + + result = g.agg(P1) + assert_frame_equal(result, expected) -def _check_groupby(df, result, keys, field, f=lambda x: x.sum()): - tups = lmap(tuple, df[keys].values) - tups = com._asarray_tuplesafe(tups) - expected = f(df.groupby(tups)[field]) - for k, v in compat.iteritems(expected): - assert (result[k] == v) + def test_wrap_agg_out(self): + grouped = self.three_group.groupby(['A', 'B']) + def func(ser): + if ser.dtype == np.object: + raise TypeError + else: + return ser.sum() -def test_decons(): - from pandas.core.groupby import decons_group_index, get_group_index + result = grouped.aggregate(func) + exp_grouped = self.three_group.loc[:, self.three_group.columns != 'C'] + expected = exp_grouped.groupby(['A', 'B']).aggregate(func) + assert_frame_equal(result, expected) + + def test_agg_multiple_functions_maintain_order(self): + # GH #610 + funcs = [('mean', np.mean), ('max', np.max), ('min', np.min)] + result = self.df.groupby('A')['C'].agg(funcs) + exp_cols = Index(['mean', 'max', 'min']) + + self.assert_index_equal(result.columns, exp_cols) + + def test_multiple_functions_tuples_and_non_tuples(self): + # #1359 - def testit(label_list, shape): - group_index = get_group_index(label_list, shape, sort=True, xnull=True) - label_list2 = decons_group_index(group_index, shape) + funcs = [('foo', 'mean'), 'std'] + ex_funcs = [('foo', 'mean'), ('std', 'std')] - for a, b in zip(label_list, label_list2): - assert (np.array_equal(a, b)) + result = self.df.groupby('A')['C'].agg(funcs) + expected = self.df.groupby('A')['C'].agg(ex_funcs) + assert_frame_equal(result, expected) + + result = self.df.groupby('A').agg(funcs) + expected = self.df.groupby('A').agg(ex_funcs) + assert_frame_equal(result, expected) + + def test_agg_multiple_functions_too_many_lambdas(self): + grouped = self.df.groupby('A') + funcs = ['mean', lambda x: x.mean(), lambda x: x.std()] + + self.assertRaises(SpecificationError, grouped.agg, funcs) + + def test_more_flexible_frame_multi_function(self): + from pandas import concat + + grouped = self.df.groupby('A') - shape = (4, 5, 6) - label_list = [np.tile([0, 1, 2, 3, 0, 1, 2, 3], 100), np.tile( - [0, 2, 4, 3, 0, 1, 2, 3], 100), np.tile( - [5, 1, 0, 2, 3, 0, 5, 4], 100)] - testit(label_list, shape) + exmean = grouped.agg(OrderedDict([['C', np.mean], ['D', np.mean]])) + exstd = grouped.agg(OrderedDict([['C', np.std], ['D', np.std]])) - shape = (10000, 10000) - label_list = [np.tile(np.arange(10000), 5), np.tile(np.arange(10000), 5)] - testit(label_list, shape) + expected = concat([exmean, exstd], keys=['mean', 'std'], axis=1) + expected = expected.swaplevel(0, 1, axis=1).sort_index(level=0, axis=1) + + d = OrderedDict([['C', [np.mean, np.std]], ['D', [np.mean, np.std]]]) + result = grouped.aggregate(d) + + assert_frame_equal(result, expected) + + # be careful + result = grouped.aggregate(OrderedDict([['C', np.mean], + ['D', [np.mean, np.std]]])) + expected = grouped.aggregate(OrderedDict([['C', np.mean], + ['D', [np.mean, np.std]]])) + assert_frame_equal(result, expected) + + def foo(x): + return np.mean(x) + + def bar(x): + return np.std(x, ddof=1) + + d = OrderedDict([['C', np.mean], ['D', OrderedDict( + [['foo', np.mean], ['bar', np.std]])]]) + result = grouped.aggregate(d) + + d = OrderedDict([['C', [np.mean]], ['D', [foo, bar]]]) + expected = grouped.aggregate(d) + + assert_frame_equal(result, expected) + + def test_multi_function_flexible_mix(self): + # GH #1268 + grouped = self.df.groupby('A') + + d = OrderedDict([['C', OrderedDict([['foo', 'mean'], [ + 'bar', 'std' + ]])], ['D', 'sum']]) + result = grouped.aggregate(d) + d2 = OrderedDict([['C', OrderedDict([['foo', 'mean'], [ + 'bar', 'std' + ]])], ['D', ['sum']]]) + result2 = grouped.aggregate(d2) + + d3 = OrderedDict([['C', OrderedDict([['foo', 'mean'], [ + 'bar', 'std' + ]])], ['D', {'sum': 'sum'}]]) + expected = grouped.aggregate(d3) + + assert_frame_equal(result, expected) + assert_frame_equal(result2, expected) + + def test_agg_callables(self): + # GH 7929 + df = DataFrame({'foo': [1, 2], 'bar': [3, 4]}).astype(np.int64) + + class fn_class(object): + + def __call__(self, x): + return sum(x) + + equiv_callables = [sum, np.sum, lambda x: sum(x), lambda x: x.sum(), + partial(sum), fn_class()] + + expected = df.groupby("foo").agg(sum) + for ecall in equiv_callables: + result = df.groupby('foo').agg(ecall) + assert_frame_equal(result, expected) + + def test__cython_agg_general(self): + ops = [('mean', np.mean), + ('median', np.median), + ('var', np.var), + ('add', np.sum), + ('prod', np.prod), + ('min', np.min), + ('max', np.max), + ('first', lambda x: x.iloc[0]), + ('last', lambda x: x.iloc[-1]), ] + df = DataFrame(np.random.randn(1000)) + labels = np.random.randint(0, 50, size=1000).astype(float) + + for op, targop in ops: + result = df.groupby(labels)._cython_agg_general(op) + expected = df.groupby(labels).agg(targop) + try: + tm.assert_frame_equal(result, expected) + except BaseException as exc: + exc.args += ('operation: %s' % op, ) + raise + + def test_cython_agg_empty_buckets(self): + ops = [('mean', np.mean), + ('median', lambda x: np.median(x) if len(x) > 0 else np.nan), + ('var', lambda x: np.var(x, ddof=1)), + ('add', lambda x: np.sum(x) if len(x) > 0 else np.nan), + ('prod', np.prod), + ('min', np.min), + ('max', np.max), ] + + df = pd.DataFrame([11, 12, 13]) + grps = range(0, 55, 5) + + for op, targop in ops: + result = df.groupby(pd.cut(df[0], grps))._cython_agg_general(op) + expected = df.groupby(pd.cut(df[0], grps)).agg(lambda x: targop(x)) + try: + tm.assert_frame_equal(result, expected) + except BaseException as exc: + exc.args += ('operation: %s' % op,) + raise + + def test_agg_over_numpy_arrays(self): + # GH 3788 + df = pd.DataFrame([[1, np.array([10, 20, 30])], + [1, np.array([40, 50, 60])], + [2, np.array([20, 30, 40])]], + columns=['category', 'arraydata']) + result = df.groupby('category').agg(sum) + + expected_data = [[np.array([50, 70, 90])], [np.array([20, 30, 40])]] + expected_index = pd.Index([1, 2], name='category') + expected_column = ['arraydata'] + expected = pd.DataFrame(expected_data, + index=expected_index, + columns=expected_column) + + assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py index 605b327208a03..8952b520f4f78 100644 --- a/pandas/tests/groupby/test_categorical.py +++ b/pandas/tests/groupby/test_categorical.py @@ -1,67 +1,19 @@ # -*- coding: utf-8 -*- from __future__ import print_function -from numpy import nan - -from pandas.core.index import Index, MultiIndex, CategoricalIndex -from pandas.core.api import DataFrame, Categorical - -from pandas.core.series import Series - -from pandas.util.testing import (assert_frame_equal, assert_series_equal - ) +from datetime import datetime -from pandas.compat import (lmap) - -from pandas import compat - -import pandas.core.common as com import numpy as np +from numpy import nan -import pandas.util.testing as tm import pandas as pd +from pandas import (Index, MultiIndex, CategoricalIndex, + DataFrame, Categorical, Series) +from pandas.util.testing import assert_frame_equal, assert_series_equal +import pandas.util.testing as tm +from .common import MixIn -class TestGroupByCategorical(tm.TestCase): - - def setUp(self): - self.ts = tm.makeTimeSeries() - - self.seriesd = tm.getSeriesData() - self.tsd = tm.getTimeSeriesData() - self.frame = DataFrame(self.seriesd) - self.tsframe = DataFrame(self.tsd) - - self.df = DataFrame( - {'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'], - 'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'], - 'C': np.random.randn(8), - 'D': np.random.randn(8)}) - - self.df_mixed_floats = DataFrame( - {'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'], - 'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'], - 'C': np.random.randn(8), - 'D': np.array( - np.random.randn(8), dtype='float32')}) - - index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two', - 'three']], - labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], - [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], - names=['first', 'second']) - self.mframe = DataFrame(np.random.randn(10, 3), index=index, - columns=['A', 'B', 'C']) - - self.three_group = DataFrame( - {'A': ['foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'bar', - 'foo', 'foo', 'foo'], - 'B': ['one', 'one', 'one', 'two', 'one', 'one', 'one', 'two', - 'two', 'two', 'one'], - 'C': ['dull', 'dull', 'shiny', 'dull', 'dull', 'shiny', 'shiny', - 'dull', 'shiny', 'shiny', 'shiny'], - 'D': np.random.randn(11), - 'E': np.random.randn(11), - 'F': np.random.randn(11)}) +class TestGroupByCategorical(MixIn, tm.TestCase): def test_level_groupby_get_group(self): # GH15155 @@ -210,8 +162,9 @@ def test_groupby_datetime_categorical(self): def test_groupby_categorical_index(self): + s = np.random.RandomState(12345) levels = ['foo', 'bar', 'baz', 'qux'] - codes = np.random.randint(0, 4, size=20) + codes = s.randint(0, 4, size=20) cats = Categorical.from_codes(codes, levels, ordered=True) df = DataFrame( np.repeat( @@ -264,70 +217,15 @@ def test_groupby_unstack_categorical(self): expected = pd.Series([6, 4], index=pd.Index(['X', 'Y'], name='artist')) tm.assert_series_equal(result, expected) - def test_groupby_categorical_unequal_len(self): + def test_groupby_bins_unequal_len(self): # GH3011 series = Series([np.nan, np.nan, 1, 1, 2, 2, 3, 3, 4, 4]) - # The raises only happens with categorical, not with series of types - # category bins = pd.cut(series.dropna().values, 4) # len(bins) != len(series) here - self.assertRaises(ValueError, lambda: series.groupby(bins).mean()) - - def test_groupby_categorical_two_columns(self): - - # https://github.com/pandas-dev/pandas/issues/8138 - d = {'cat': - pd.Categorical(["a", "b", "a", "b"], categories=["a", "b", "c"], - ordered=True), - 'ints': [1, 1, 2, 2], - 'val': [10, 20, 30, 40]} - test = pd.DataFrame(d) - - # Grouping on a single column - groups_single_key = test.groupby("cat") - res = groups_single_key.agg('mean') - - exp_index = pd.CategoricalIndex(["a", "b", "c"], name="cat", - ordered=True) - exp = DataFrame({"ints": [1.5, 1.5, np.nan], "val": [20, 30, np.nan]}, - index=exp_index) - tm.assert_frame_equal(res, exp) - - # Grouping on two columns - groups_double_key = test.groupby(["cat", "ints"]) - res = groups_double_key.agg('mean') - exp = DataFrame({"val": [10, 30, 20, 40, np.nan, np.nan], - "cat": pd.Categorical(["a", "a", "b", "b", "c", "c"], - ordered=True), - "ints": [1, 2, 1, 2, 1, 2]}).set_index(["cat", "ints" - ]) - tm.assert_frame_equal(res, exp) - - # GH 10132 - for key in [('a', 1), ('b', 2), ('b', 1), ('a', 2)]: - c, i = key - result = groups_double_key.get_group(key) - expected = test[(test.cat == c) & (test.ints == i)] - assert_frame_equal(result, expected) - - d = {'C1': [3, 3, 4, 5], 'C2': [1, 2, 3, 4], 'C3': [10, 100, 200, 34]} - test = pd.DataFrame(d) - values = pd.cut(test['C1'], [1, 2, 3, 6]) - values.name = "cat" - groups_double_key = test.groupby([values, 'C2']) - - res = groups_double_key.agg('mean') - nan = np.nan - idx = MultiIndex.from_product( - [Categorical(["(1, 2]", "(2, 3]", "(3, 6]"], ordered=True), - [1, 2, 3, 4]], - names=["cat", "C2"]) - exp = DataFrame({"C1": [nan, nan, nan, nan, 3, 3, - nan, nan, nan, nan, 4, 5], - "C3": [nan, nan, nan, nan, 10, 100, - nan, nan, nan, nan, 200, 34]}, index=idx) - tm.assert_frame_equal(res, exp) + def f(): + series.groupby(bins).mean() + self.assertRaises(ValueError, f) def test_groupby_multi_categorical_as_index(self): # GH13204 @@ -454,35 +352,148 @@ def test_groupby_categorical_no_compress(self): exp = np.array([1, 2, 4, np.nan]) self.assert_numpy_array_equal(result, exp) + def test_groupby_sort_categorical(self): + # dataframe groupby sort was being ignored # GH 8868 + df = DataFrame([['(7.5, 10]', 10, 10], + ['(7.5, 10]', 8, 20], + ['(2.5, 5]', 5, 30], + ['(5, 7.5]', 6, 40], + ['(2.5, 5]', 4, 50], + ['(0, 2.5]', 1, 60], + ['(5, 7.5]', 7, 70]], columns=['range', 'foo', 'bar']) + df['range'] = Categorical(df['range'], ordered=True) + index = CategoricalIndex(['(0, 2.5]', '(2.5, 5]', '(5, 7.5]', + '(7.5, 10]'], name='range', ordered=True) + result_sort = DataFrame([[1, 60], [5, 30], [6, 40], [10, 10]], + columns=['foo', 'bar'], index=index) + + col = 'range' + assert_frame_equal(result_sort, df.groupby(col, sort=True).first()) + # when categories is ordered, group is ordered by category's order + assert_frame_equal(result_sort, df.groupby(col, sort=False).first()) + + df['range'] = Categorical(df['range'], ordered=False) + index = CategoricalIndex(['(0, 2.5]', '(2.5, 5]', '(5, 7.5]', + '(7.5, 10]'], name='range') + result_sort = DataFrame([[1, 60], [5, 30], [6, 40], [10, 10]], + columns=['foo', 'bar'], index=index) + + index = CategoricalIndex(['(7.5, 10]', '(2.5, 5]', '(5, 7.5]', + '(0, 2.5]'], + categories=['(7.5, 10]', '(2.5, 5]', + '(5, 7.5]', '(0, 2.5]'], + name='range') + result_nosort = DataFrame([[10, 10], [5, 30], [6, 40], [1, 60]], + index=index, columns=['foo', 'bar']) + + col = 'range' + # this is an unordered categorical, but we allow this #### + assert_frame_equal(result_sort, df.groupby(col, sort=True).first()) + assert_frame_equal(result_nosort, df.groupby(col, sort=False).first()) + + def test_groupby_sort_categorical_datetimelike(self): + # GH10505 + + # use same data as test_groupby_sort_categorical, which category is + # corresponding to datetime.month + df = DataFrame({'dt': [datetime(2011, 7, 1), datetime(2011, 7, 1), + datetime(2011, 2, 1), datetime(2011, 5, 1), + datetime(2011, 2, 1), datetime(2011, 1, 1), + datetime(2011, 5, 1)], + 'foo': [10, 8, 5, 6, 4, 1, 7], + 'bar': [10, 20, 30, 40, 50, 60, 70]}, + columns=['dt', 'foo', 'bar']) + + # ordered=True + df['dt'] = Categorical(df['dt'], ordered=True) + index = [datetime(2011, 1, 1), datetime(2011, 2, 1), + datetime(2011, 5, 1), datetime(2011, 7, 1)] + result_sort = DataFrame( + [[1, 60], [5, 30], [6, 40], [10, 10]], columns=['foo', 'bar']) + result_sort.index = CategoricalIndex(index, name='dt', ordered=True) + + index = [datetime(2011, 7, 1), datetime(2011, 2, 1), + datetime(2011, 5, 1), datetime(2011, 1, 1)] + result_nosort = DataFrame([[10, 10], [5, 30], [6, 40], [1, 60]], + columns=['foo', 'bar']) + result_nosort.index = CategoricalIndex(index, categories=index, + name='dt', ordered=True) + + col = 'dt' + assert_frame_equal(result_sort, df.groupby(col, sort=True).first()) + # when categories is ordered, group is ordered by category's order + assert_frame_equal(result_sort, df.groupby(col, sort=False).first()) + + # ordered = False + df['dt'] = Categorical(df['dt'], ordered=False) + index = [datetime(2011, 1, 1), datetime(2011, 2, 1), + datetime(2011, 5, 1), datetime(2011, 7, 1)] + result_sort = DataFrame( + [[1, 60], [5, 30], [6, 40], [10, 10]], columns=['foo', 'bar']) + result_sort.index = CategoricalIndex(index, name='dt') + + index = [datetime(2011, 7, 1), datetime(2011, 2, 1), + datetime(2011, 5, 1), datetime(2011, 1, 1)] + result_nosort = DataFrame([[10, 10], [5, 30], [6, 40], [1, 60]], + columns=['foo', 'bar']) + result_nosort.index = CategoricalIndex(index, categories=index, + name='dt') + + col = 'dt' + assert_frame_equal(result_sort, df.groupby(col, sort=True).first()) + assert_frame_equal(result_nosort, df.groupby(col, sort=False).first()) -def assert_fp_equal(a, b): - assert (np.abs(a - b) < 1e-12).all() - + def test_groupby_categorical_two_columns(self): -def _check_groupby(df, result, keys, field, f=lambda x: x.sum()): - tups = lmap(tuple, df[keys].values) - tups = com._asarray_tuplesafe(tups) - expected = f(df.groupby(tups)[field]) - for k, v in compat.iteritems(expected): - assert (result[k] == v) + # https://github.com/pandas-dev/pandas/issues/8138 + d = {'cat': + pd.Categorical(["a", "b", "a", "b"], categories=["a", "b", "c"], + ordered=True), + 'ints': [1, 1, 2, 2], + 'val': [10, 20, 30, 40]} + test = pd.DataFrame(d) + # Grouping on a single column + groups_single_key = test.groupby("cat") + res = groups_single_key.agg('mean') -def test_decons(): - from pandas.core.groupby import decons_group_index, get_group_index + exp_index = pd.CategoricalIndex(["a", "b", "c"], name="cat", + ordered=True) + exp = DataFrame({"ints": [1.5, 1.5, np.nan], "val": [20, 30, np.nan]}, + index=exp_index) + tm.assert_frame_equal(res, exp) - def testit(label_list, shape): - group_index = get_group_index(label_list, shape, sort=True, xnull=True) - label_list2 = decons_group_index(group_index, shape) + # Grouping on two columns + groups_double_key = test.groupby(["cat", "ints"]) + res = groups_double_key.agg('mean') + exp = DataFrame({"val": [10, 30, 20, 40, np.nan, np.nan], + "cat": pd.Categorical(["a", "a", "b", "b", "c", "c"], + ordered=True), + "ints": [1, 2, 1, 2, 1, 2]}).set_index(["cat", "ints" + ]) + tm.assert_frame_equal(res, exp) - for a, b in zip(label_list, label_list2): - assert (np.array_equal(a, b)) + # GH 10132 + for key in [('a', 1), ('b', 2), ('b', 1), ('a', 2)]: + c, i = key + result = groups_double_key.get_group(key) + expected = test[(test.cat == c) & (test.ints == i)] + assert_frame_equal(result, expected) - shape = (4, 5, 6) - label_list = [np.tile([0, 1, 2, 3, 0, 1, 2, 3], 100), np.tile( - [0, 2, 4, 3, 0, 1, 2, 3], 100), np.tile( - [5, 1, 0, 2, 3, 0, 5, 4], 100)] - testit(label_list, shape) + d = {'C1': [3, 3, 4, 5], 'C2': [1, 2, 3, 4], 'C3': [10, 100, 200, 34]} + test = pd.DataFrame(d) + values = pd.cut(test['C1'], [1, 2, 3, 6]) + values.name = "cat" + groups_double_key = test.groupby([values, 'C2']) - shape = (10000, 10000) - label_list = [np.tile(np.arange(10000), 5), np.tile(np.arange(10000), 5)] - testit(label_list, shape) + res = groups_double_key.agg('mean') + nan = np.nan + idx = MultiIndex.from_product( + [Categorical(["(1, 2]", "(2, 3]", "(3, 6]"], ordered=True), + [1, 2, 3, 4]], + names=["cat", "C2"]) + exp = DataFrame({"C1": [nan, nan, nan, nan, 3, 3, + nan, nan, nan, nan, 4, 5], + "C3": [nan, nan, nan, nan, 10, 100, + nan, nan, nan, nan, 200, 34]}, index=idx) + tm.assert_frame_equal(res, exp) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index df4707fcef3f0..458e869130190 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -1,20 +1,13 @@ # -*- coding: utf-8 -*- from __future__ import print_function -import nose from string import ascii_lowercase from datetime import datetime from numpy import nan -from pandas.types.common import _ensure_platform_int -from pandas import date_range, bdate_range, Timestamp, isnull -from pandas.core.index import Index, MultiIndex, CategoricalIndex -from pandas.core.api import Categorical, DataFrame +from pandas import (date_range, bdate_range, Timestamp, + isnull, Index, MultiIndex, DataFrame, Series) from pandas.core.common import UnsupportedFunctionCall -from pandas.core.groupby import (SpecificationError, DataError, _nargsort, - _lexsort_indexer) -from pandas.core.series import Series -from pandas.core.config import option_context from pandas.util.testing import (assert_panel_equal, assert_frame_equal, assert_series_equal, assert_almost_equal, assert_index_equal, assertRaisesRegexp) @@ -24,57 +17,16 @@ from pandas.core.panel import Panel from pandas.tools.merge import concat from collections import defaultdict -from functools import partial import pandas.core.common as com import numpy as np import pandas.core.nanops as nanops - import pandas.util.testing as tm import pandas as pd +from .common import MixIn -class TestGroupBy(tm.TestCase): - - def setUp(self): - self.ts = tm.makeTimeSeries() - - self.seriesd = tm.getSeriesData() - self.tsd = tm.getTimeSeriesData() - self.frame = DataFrame(self.seriesd) - self.tsframe = DataFrame(self.tsd) - - self.df = DataFrame( - {'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'], - 'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'], - 'C': np.random.randn(8), - 'D': np.random.randn(8)}) - - self.df_mixed_floats = DataFrame( - {'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'], - 'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'], - 'C': np.random.randn(8), - 'D': np.array( - np.random.randn(8), dtype='float32')}) - - index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two', - 'three']], - labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], - [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], - names=['first', 'second']) - self.mframe = DataFrame(np.random.randn(10, 3), index=index, - columns=['A', 'B', 'C']) - - self.three_group = DataFrame( - {'A': ['foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'bar', - 'foo', 'foo', 'foo'], - 'B': ['one', 'one', 'one', 'two', 'one', 'one', 'one', 'two', - 'two', 'two', 'one'], - 'C': ['dull', 'dull', 'shiny', 'dull', 'dull', 'shiny', 'shiny', - 'dull', 'shiny', 'shiny', 'shiny'], - 'D': np.random.randn(11), - 'E': np.random.randn(11), - 'F': np.random.randn(11)}) +class TestGroupBy(MixIn, tm.TestCase): def test_basic(self): def checkit(dtype): @@ -774,12 +726,12 @@ def max_value(group): def test_groupby_return_type(self): # GH2893, return a reduced type - df1 = DataFrame([{"val1": 1, - "val2": 20}, {"val1": 1, - "val2": 19}, {"val1": 2, - "val2": 27}, {"val1": 2, - "val2": 12} - ]) + df1 = DataFrame( + [{"val1": 1, "val2": 20}, + {"val1": 1, "val2": 19}, + {"val1": 2, "val2": 27}, + {"val1": 2, "val2": 12} + ]) def func(dataf): return dataf["val2"] - dataf["val2"].mean() @@ -787,12 +739,12 @@ def func(dataf): result = df1.groupby("val1", squeeze=True).apply(func) tm.assertIsInstance(result, Series) - df2 = DataFrame([{"val1": 1, - "val2": 20}, {"val1": 1, - "val2": 19}, {"val1": 1, - "val2": 27}, {"val1": 1, - "val2": 12} - ]) + df2 = DataFrame( + [{"val1": 1, "val2": 20}, + {"val1": 1, "val2": 19}, + {"val1": 1, "val2": 27}, + {"val1": 1, "val2": 12} + ]) def func(dataf): return dataf["val2"] - dataf["val2"].mean() @@ -902,6 +854,7 @@ def test_get_group(self): lambda: g.get_group(('foo', 'bar', 'baz'))) def test_get_group_empty_bins(self): + d = pd.DataFrame([3, 1, 7, 6]) bins = [0, 5, 10, 15] g = d.groupby(pd.cut(d[0], bins)) @@ -1043,266 +996,6 @@ def test_basic_regression(self): grouped = result.groupby(groupings) grouped.mean() - def test_transform(self): - data = Series(np.arange(9) // 3, index=np.arange(9)) - - index = np.arange(9) - np.random.shuffle(index) - data = data.reindex(index) - - grouped = data.groupby(lambda x: x // 3) - - transformed = grouped.transform(lambda x: x * x.sum()) - self.assertEqual(transformed[7], 12) - - # GH 8046 - # make sure that we preserve the input order - - df = DataFrame( - np.arange(6, dtype='int64').reshape( - 3, 2), columns=["a", "b"], index=[0, 2, 1]) - key = [0, 0, 1] - expected = df.sort_index().groupby(key).transform( - lambda x: x - x.mean()).groupby(key).mean() - result = df.groupby(key).transform(lambda x: x - x.mean()).groupby( - key).mean() - assert_frame_equal(result, expected) - - def demean(arr): - return arr - arr.mean() - - people = DataFrame(np.random.randn(5, 5), - columns=['a', 'b', 'c', 'd', 'e'], - index=['Joe', 'Steve', 'Wes', 'Jim', 'Travis']) - key = ['one', 'two', 'one', 'two', 'one'] - result = people.groupby(key).transform(demean).groupby(key).mean() - expected = people.groupby(key).apply(demean).groupby(key).mean() - assert_frame_equal(result, expected) - - # GH 8430 - df = tm.makeTimeDataFrame() - g = df.groupby(pd.TimeGrouper('M')) - g.transform(lambda x: x - 1) - - # GH 9700 - df = DataFrame({'a': range(5, 10), 'b': range(5)}) - result = df.groupby('a').transform(max) - expected = DataFrame({'b': range(5)}) - tm.assert_frame_equal(result, expected) - - def test_transform_fast(self): - - df = DataFrame({'id': np.arange(100000) / 3, - 'val': np.random.randn(100000)}) - - grp = df.groupby('id')['val'] - - values = np.repeat(grp.mean().values, - _ensure_platform_int(grp.count().values)) - expected = pd.Series(values, index=df.index, name='val') - - result = grp.transform(np.mean) - assert_series_equal(result, expected) - - result = grp.transform('mean') - assert_series_equal(result, expected) - - # GH 12737 - df = pd.DataFrame({'grouping': [0, 1, 1, 3], 'f': [1.1, 2.1, 3.1, 4.5], - 'd': pd.date_range('2014-1-1', '2014-1-4'), - 'i': [1, 2, 3, 4]}, - columns=['grouping', 'f', 'i', 'd']) - result = df.groupby('grouping').transform('first') - - dates = [pd.Timestamp('2014-1-1'), pd.Timestamp('2014-1-2'), - pd.Timestamp('2014-1-2'), pd.Timestamp('2014-1-4')] - expected = pd.DataFrame({'f': [1.1, 2.1, 2.1, 4.5], - 'd': dates, - 'i': [1, 2, 2, 4]}, - columns=['f', 'i', 'd']) - assert_frame_equal(result, expected) - - # selection - result = df.groupby('grouping')[['f', 'i']].transform('first') - expected = expected[['f', 'i']] - assert_frame_equal(result, expected) - - # dup columns - df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=['g', 'a', 'a']) - result = df.groupby('g').transform('first') - expected = df.drop('g', axis=1) - assert_frame_equal(result, expected) - - def test_transform_broadcast(self): - grouped = self.ts.groupby(lambda x: x.month) - result = grouped.transform(np.mean) - - self.assert_index_equal(result.index, self.ts.index) - for _, gp in grouped: - assert_fp_equal(result.reindex(gp.index), gp.mean()) - - grouped = self.tsframe.groupby(lambda x: x.month) - result = grouped.transform(np.mean) - self.assert_index_equal(result.index, self.tsframe.index) - for _, gp in grouped: - agged = gp.mean() - res = result.reindex(gp.index) - for col in self.tsframe: - assert_fp_equal(res[col], agged[col]) - - # group columns - grouped = self.tsframe.groupby({'A': 0, 'B': 0, 'C': 1, 'D': 1}, - axis=1) - result = grouped.transform(np.mean) - self.assert_index_equal(result.index, self.tsframe.index) - self.assert_index_equal(result.columns, self.tsframe.columns) - for _, gp in grouped: - agged = gp.mean(1) - res = result.reindex(columns=gp.columns) - for idx in gp.index: - assert_fp_equal(res.xs(idx), agged[idx]) - - def test_transform_axis(self): - - # make sure that we are setting the axes - # correctly when on axis=0 or 1 - # in the presence of a non-monotonic indexer - # GH12713 - - base = self.tsframe.iloc[0:5] - r = len(base.index) - c = len(base.columns) - tso = DataFrame(np.random.randn(r, c), - index=base.index, - columns=base.columns, - dtype='float64') - # monotonic - ts = tso - grouped = ts.groupby(lambda x: x.weekday()) - result = ts - grouped.transform('mean') - expected = grouped.apply(lambda x: x - x.mean()) - assert_frame_equal(result, expected) - - ts = ts.T - grouped = ts.groupby(lambda x: x.weekday(), axis=1) - result = ts - grouped.transform('mean') - expected = grouped.apply(lambda x: (x.T - x.mean(1)).T) - assert_frame_equal(result, expected) - - # non-monotonic - ts = tso.iloc[[1, 0] + list(range(2, len(base)))] - grouped = ts.groupby(lambda x: x.weekday()) - result = ts - grouped.transform('mean') - expected = grouped.apply(lambda x: x - x.mean()) - assert_frame_equal(result, expected) - - ts = ts.T - grouped = ts.groupby(lambda x: x.weekday(), axis=1) - result = ts - grouped.transform('mean') - expected = grouped.apply(lambda x: (x.T - x.mean(1)).T) - assert_frame_equal(result, expected) - - def test_transform_dtype(self): - # GH 9807 - # Check transform dtype output is preserved - df = DataFrame([[1, 3], [2, 3]]) - result = df.groupby(1).transform('mean') - expected = DataFrame([[1.5], [1.5]]) - assert_frame_equal(result, expected) - - def test_transform_bug(self): - # GH 5712 - # transforming on a datetime column - df = DataFrame(dict(A=Timestamp('20130101'), B=np.arange(5))) - result = df.groupby('A')['B'].transform( - lambda x: x.rank(ascending=False)) - expected = Series(np.arange(5, 0, step=-1), name='B') - assert_series_equal(result, expected) - - def test_transform_multiple(self): - grouped = self.ts.groupby([lambda x: x.year, lambda x: x.month]) - - grouped.transform(lambda x: x * 2) - grouped.transform(np.mean) - - def test_dispatch_transform(self): - df = self.tsframe[::5].reindex(self.tsframe.index) - - grouped = df.groupby(lambda x: x.month) - - filled = grouped.fillna(method='pad') - fillit = lambda x: x.fillna(method='pad') - expected = df.groupby(lambda x: x.month).transform(fillit) - assert_frame_equal(filled, expected) - - def test_transform_select_columns(self): - f = lambda x: x.mean() - result = self.df.groupby('A')['C', 'D'].transform(f) - - selection = self.df[['C', 'D']] - expected = selection.groupby(self.df['A']).transform(f) - - assert_frame_equal(result, expected) - - def test_transform_exclude_nuisance(self): - - # this also tests orderings in transform between - # series/frame to make sure it's consistent - expected = {} - grouped = self.df.groupby('A') - expected['C'] = grouped['C'].transform(np.mean) - expected['D'] = grouped['D'].transform(np.mean) - expected = DataFrame(expected) - result = self.df.groupby('A').transform(np.mean) - - assert_frame_equal(result, expected) - - def test_transform_function_aliases(self): - result = self.df.groupby('A').transform('mean') - expected = self.df.groupby('A').transform(np.mean) - assert_frame_equal(result, expected) - - result = self.df.groupby('A')['C'].transform('mean') - expected = self.df.groupby('A')['C'].transform(np.mean) - assert_series_equal(result, expected) - - def test_series_fast_transform_date(self): - # GH 13191 - df = pd.DataFrame({'grouping': [np.nan, 1, 1, 3], - 'd': pd.date_range('2014-1-1', '2014-1-4')}) - result = df.groupby('grouping')['d'].transform('first') - dates = [pd.NaT, pd.Timestamp('2014-1-2'), pd.Timestamp('2014-1-2'), - pd.Timestamp('2014-1-4')] - expected = pd.Series(dates, name='d') - assert_series_equal(result, expected) - - def test_transform_length(self): - # GH 9697 - df = pd.DataFrame({'col1': [1, 1, 2, 2], 'col2': [1, 2, 3, np.nan]}) - expected = pd.Series([3.0] * 4) - - def nsum(x): - return np.nansum(x) - - results = [df.groupby('col1').transform(sum)['col2'], - df.groupby('col1')['col2'].transform(sum), - df.groupby('col1').transform(nsum)['col2'], - df.groupby('col1')['col2'].transform(nsum)] - for result in results: - assert_series_equal(result, expected, check_names=False) - - def test_transform_coercion(self): - - # 14457 - # when we are transforming be sure to not coerce - # via assignment - df = pd.DataFrame(dict(A=['a', 'a'], B=[0, 1])) - g = df.groupby('A') - - expected = g.transform(np.mean) - result = g.transform(lambda x: np.mean(x)) - assert_frame_equal(result, expected) - def test_with_na(self): index = Index(np.arange(10)) @@ -1330,58 +1023,6 @@ def f(x): assert_series_equal(agged, expected, check_dtype=False) self.assertTrue(issubclass(agged.dtype.type, np.dtype(dtype).type)) - def test_groupby_transform_with_int(self): - - # GH 3740, make sure that we might upcast on item-by-item transform - - # floats - df = DataFrame(dict(A=[1, 1, 1, 2, 2, 2], B=Series(1, dtype='float64'), - C=Series( - [1, 2, 3, 1, 2, 3], dtype='float64'), D='foo')) - with np.errstate(all='ignore'): - result = df.groupby('A').transform( - lambda x: (x - x.mean()) / x.std()) - expected = DataFrame(dict(B=np.nan, C=Series( - [-1, 0, 1, -1, 0, 1], dtype='float64'))) - assert_frame_equal(result, expected) - - # int case - df = DataFrame(dict(A=[1, 1, 1, 2, 2, 2], B=1, - C=[1, 2, 3, 1, 2, 3], D='foo')) - with np.errstate(all='ignore'): - result = df.groupby('A').transform( - lambda x: (x - x.mean()) / x.std()) - expected = DataFrame(dict(B=np.nan, C=[-1, 0, 1, -1, 0, 1])) - assert_frame_equal(result, expected) - - # int that needs float conversion - s = Series([2, 3, 4, 10, 5, -1]) - df = DataFrame(dict(A=[1, 1, 1, 2, 2, 2], B=1, C=s, D='foo')) - with np.errstate(all='ignore'): - result = df.groupby('A').transform( - lambda x: (x - x.mean()) / x.std()) - - s1 = s.iloc[0:3] - s1 = (s1 - s1.mean()) / s1.std() - s2 = s.iloc[3:6] - s2 = (s2 - s2.mean()) / s2.std() - expected = DataFrame(dict(B=np.nan, C=concat([s1, s2]))) - assert_frame_equal(result, expected) - - # int downcasting - result = df.groupby('A').transform(lambda x: x * 2 / 2) - expected = DataFrame(dict(B=1, C=[2, 3, 4, 10, 5, -1])) - assert_frame_equal(result, expected) - - def test_groupby_transform_with_nan_group(self): - # GH 9941 - df = pd.DataFrame({'a': range(10), - 'b': [1, 1, 2, 3, np.nan, 4, 4, 5, 5, 5]}) - result = df.groupby(df.b)['a'].transform(max) - expected = pd.Series([1., 1., 2., 3., np.nan, 6., 6., 9., 9., 9.], - name='a') - assert_series_equal(result, expected) - def test_indices_concatenation_order(self): # GH 2808 @@ -1845,6 +1486,7 @@ def check_nunique(df, keys, as_index=True): def test_series_groupby_value_counts(self): from itertools import product + np.random.seed(1234) def rebuild_index(df): arr = list(map(df.index.get_level_values, range(df.index.nlevels))) @@ -2220,51 +1862,6 @@ def test_builtins_apply(self): # GH8155 assert_series_equal(getattr(result, fname)(), getattr(df, fname)()) - def test_cythonized_aggers(self): - data = {'A': [0, 0, 0, 0, 1, 1, 1, 1, 1, 1., nan, nan], - 'B': ['A', 'B'] * 6, - 'C': np.random.randn(12)} - df = DataFrame(data) - df.loc[2:10:2, 'C'] = nan - - def _testit(name): - - op = lambda x: getattr(x, name)() - - # single column - grouped = df.drop(['B'], axis=1).groupby('A') - exp = {} - for cat, group in grouped: - exp[cat] = op(group['C']) - exp = DataFrame({'C': exp}) - exp.index.name = 'A' - result = op(grouped) - assert_frame_equal(result, exp) - - # multiple columns - grouped = df.groupby(['A', 'B']) - expd = {} - for (cat1, cat2), group in grouped: - expd.setdefault(cat1, {})[cat2] = op(group['C']) - exp = DataFrame(expd).T.stack(dropna=False) - exp.index.names = ['A', 'B'] - exp.name = 'C' - - result = op(grouped)['C'] - if not tm._incompat_bottleneck_version(name): - assert_series_equal(result, exp) - - _testit('count') - _testit('sum') - _testit('std') - _testit('var') - _testit('sem') - _testit('mean') - _testit('median') - _testit('prod') - _testit('min') - _testit('max') - def test_max_min_non_numeric(self): # #2700 aa = DataFrame({'nn': [11, 11, 22, 22], @@ -2399,31 +1996,6 @@ def test_arg_passthru(self): result = f(numeric_only=False) tm.assert_index_equal(result.columns, expected_columns) - def test_cython_agg_boolean(self): - frame = DataFrame({'a': np.random.randint(0, 5, 50), - 'b': np.random.randint(0, 2, 50).astype('bool')}) - result = frame.groupby('a')['b'].mean() - expected = frame.groupby('a')['b'].agg(np.mean) - - assert_series_equal(result, expected) - - def test_cython_agg_nothing_to_agg(self): - frame = DataFrame({'a': np.random.randint(0, 5, 50), - 'b': ['foo', 'bar'] * 25}) - self.assertRaises(DataError, frame.groupby('a')['b'].mean) - - frame = DataFrame({'a': np.random.randint(0, 5, 50), - 'b': ['foo', 'bar'] * 25}) - self.assertRaises(DataError, frame[['b']].groupby(frame['a']).mean) - - def test_cython_agg_nothing_to_agg_with_dates(self): - frame = DataFrame({'a': np.random.randint(0, 5, 50), - 'b': ['foo', 'bar'] * 25, - 'dates': pd.date_range('now', periods=50, - freq='T')}) - with tm.assertRaisesRegexp(DataError, "No numeric types to aggregate"): - frame.groupby('b').dates.mean() - def test_groupby_timedelta_cython_count(self): df = DataFrame({'g': list('ab' * 2), 'delt': np.arange(4).astype('timedelta64[ns]')}) @@ -2433,15 +2005,6 @@ def test_groupby_timedelta_cython_count(self): result = df.groupby('g').delt.count() tm.assert_series_equal(expected, result) - def test_cython_agg_frame_columns(self): - # #2113 - df = DataFrame({'x': [1, 2, 3], 'y': [3, 4, 5]}) - - df.groupby(level=0, axis='columns').mean() - df.groupby(level=0, axis='columns').mean() - df.groupby(level=0, axis='columns').mean() - df.groupby(level=0, axis='columns').mean() - def test_wrap_aggregated_output_multindex(self): df = self.mframe.T df['baz', 'two'] = 'peekaboo' @@ -2616,15 +2179,6 @@ def test_grouping_labels(self): exp_labels = np.array([2, 2, 2, 0, 0, 1, 1, 3, 3, 3], dtype=np.intp) assert_almost_equal(grouped.grouper.labels[0], exp_labels) - def test_cython_fail_agg(self): - dr = bdate_range('1/1/2000', periods=50) - ts = Series(['A', 'B', 'C', 'D', 'E'] * 10, index=dr) - - grouped = ts.groupby(lambda x: x.month) - summed = grouped.sum() - expected = grouped.agg(np.sum) - assert_series_equal(summed, expected) - def test_apply_series_to_frame(self): def f(piece): with np.errstate(invalid='ignore'): @@ -3051,30 +2605,6 @@ def test_grouping_ndarray(self): assert_frame_equal(result, expected, check_names=False ) # Note: no names when grouping by value - def test_agg_consistency(self): - # agg with ([]) and () not consistent - # GH 6715 - - def P1(a): - try: - return np.percentile(a.dropna(), q=1) - except: - return np.nan - - import datetime as dt - df = DataFrame({'col1': [1, 2, 3, 4], - 'col2': [10, 25, 26, 31], - 'date': [dt.date(2013, 2, 10), dt.date(2013, 2, 10), - dt.date(2013, 2, 11), dt.date(2013, 2, 11)]}) - - g = df.groupby('date') - - expected = g.agg([P1]) - expected.columns = expected.columns.levels[0] - - result = g.agg(P1) - assert_frame_equal(result, expected) - def test_apply_typecast_fail(self): df = DataFrame({'d': [1., 1., 1., 2., 2., 2.], 'c': np.tile( @@ -3159,28 +2689,6 @@ def f(g): result = grouped.apply(f) self.assertTrue('value3' in result) - def test_transform_mixed_type(self): - index = MultiIndex.from_arrays([[0, 0, 0, 1, 1, 1], [1, 2, 3, 1, 2, 3] - ]) - df = DataFrame({'d': [1., 1., 1., 2., 2., 2.], - 'c': np.tile(['a', 'b', 'c'], 2), - 'v': np.arange(1., 7.)}, index=index) - - def f(group): - group['g'] = group['d'] * 2 - return group[:1] - - grouped = df.groupby('c') - result = grouped.apply(f) - - self.assertEqual(result['d'].dtype, np.float64) - - # this is by definition a mutating operation! - with option_context('mode.chained_assignment', None): - for key, group in grouped: - res = f(group) - assert_frame_equal(res, result.loc[key]) - def test_groupby_wrong_multi_labels(self): from pandas import read_csv data = """index,foo,bar,baz,spam,data @@ -3768,20 +3276,6 @@ def test_no_nonsense_name(self): result = s.groupby(self.frame['A']).agg(np.sum) self.assertIsNone(result.name) - def test_wrap_agg_out(self): - grouped = self.three_group.groupby(['A', 'B']) - - def func(ser): - if ser.dtype == np.object: - raise TypeError - else: - return ser.sum() - - result = grouped.aggregate(func) - exp_grouped = self.three_group.loc[:, self.three_group.columns != 'C'] - expected = exp_grouped.groupby(['A', 'B']).aggregate(func) - assert_frame_equal(result, expected) - def test_multifunc_sum_bug(self): # GH #1065 x = DataFrame(np.arange(9).reshape(3, 3)) @@ -3839,110 +3333,6 @@ def test_getitem_numeric_column_names(self): assert_frame_equal(result2, expected) assert_frame_equal(result3, expected) - def test_agg_multiple_functions_maintain_order(self): - # GH #610 - funcs = [('mean', np.mean), ('max', np.max), ('min', np.min)] - result = self.df.groupby('A')['C'].agg(funcs) - exp_cols = Index(['mean', 'max', 'min']) - - self.assert_index_equal(result.columns, exp_cols) - - def test_multiple_functions_tuples_and_non_tuples(self): - # #1359 - - funcs = [('foo', 'mean'), 'std'] - ex_funcs = [('foo', 'mean'), ('std', 'std')] - - result = self.df.groupby('A')['C'].agg(funcs) - expected = self.df.groupby('A')['C'].agg(ex_funcs) - assert_frame_equal(result, expected) - - result = self.df.groupby('A').agg(funcs) - expected = self.df.groupby('A').agg(ex_funcs) - assert_frame_equal(result, expected) - - def test_agg_multiple_functions_too_many_lambdas(self): - grouped = self.df.groupby('A') - funcs = ['mean', lambda x: x.mean(), lambda x: x.std()] - - self.assertRaises(SpecificationError, grouped.agg, funcs) - - def test_more_flexible_frame_multi_function(self): - from pandas import concat - - grouped = self.df.groupby('A') - - exmean = grouped.agg(OrderedDict([['C', np.mean], ['D', np.mean]])) - exstd = grouped.agg(OrderedDict([['C', np.std], ['D', np.std]])) - - expected = concat([exmean, exstd], keys=['mean', 'std'], axis=1) - expected = expected.swaplevel(0, 1, axis=1).sort_index(level=0, axis=1) - - d = OrderedDict([['C', [np.mean, np.std]], ['D', [np.mean, np.std]]]) - result = grouped.aggregate(d) - - assert_frame_equal(result, expected) - - # be careful - result = grouped.aggregate(OrderedDict([['C', np.mean], - ['D', [np.mean, np.std]]])) - expected = grouped.aggregate(OrderedDict([['C', np.mean], - ['D', [np.mean, np.std]]])) - assert_frame_equal(result, expected) - - def foo(x): - return np.mean(x) - - def bar(x): - return np.std(x, ddof=1) - - d = OrderedDict([['C', np.mean], ['D', OrderedDict( - [['foo', np.mean], ['bar', np.std]])]]) - result = grouped.aggregate(d) - - d = OrderedDict([['C', [np.mean]], ['D', [foo, bar]]]) - expected = grouped.aggregate(d) - - assert_frame_equal(result, expected) - - def test_multi_function_flexible_mix(self): - # GH #1268 - grouped = self.df.groupby('A') - - d = OrderedDict([['C', OrderedDict([['foo', 'mean'], [ - 'bar', 'std' - ]])], ['D', 'sum']]) - result = grouped.aggregate(d) - d2 = OrderedDict([['C', OrderedDict([['foo', 'mean'], [ - 'bar', 'std' - ]])], ['D', ['sum']]]) - result2 = grouped.aggregate(d2) - - d3 = OrderedDict([['C', OrderedDict([['foo', 'mean'], [ - 'bar', 'std' - ]])], ['D', {'sum': 'sum'}]]) - expected = grouped.aggregate(d3) - - assert_frame_equal(result, expected) - assert_frame_equal(result2, expected) - - def test_agg_callables(self): - # GH 7929 - df = DataFrame({'foo': [1, 2], 'bar': [3, 4]}).astype(np.int64) - - class fn_class(object): - - def __call__(self, x): - return sum(x) - - equiv_callables = [sum, np.sum, lambda x: sum(x), lambda x: x.sum(), - partial(sum), fn_class()] - - expected = df.groupby("foo").agg(sum) - for ecall in equiv_callables: - result = df.groupby('foo').agg(ecall) - assert_frame_equal(result, expected) - def test_set_group_name(self): def f(group): assert group.name is not None @@ -3980,97 +3370,6 @@ def test_no_dummy_key_names(self): ]).sum() self.assertEqual(result.index.names, (None, None)) - def test_groupby_sort_categorical(self): - # dataframe groupby sort was being ignored # GH 8868 - df = DataFrame([['(7.5, 10]', 10, 10], - ['(7.5, 10]', 8, 20], - ['(2.5, 5]', 5, 30], - ['(5, 7.5]', 6, 40], - ['(2.5, 5]', 4, 50], - ['(0, 2.5]', 1, 60], - ['(5, 7.5]', 7, 70]], columns=['range', 'foo', 'bar']) - df['range'] = Categorical(df['range'], ordered=True) - index = CategoricalIndex(['(0, 2.5]', '(2.5, 5]', '(5, 7.5]', - '(7.5, 10]'], name='range', ordered=True) - result_sort = DataFrame([[1, 60], [5, 30], [6, 40], [10, 10]], - columns=['foo', 'bar'], index=index) - - col = 'range' - assert_frame_equal(result_sort, df.groupby(col, sort=True).first()) - # when categories is ordered, group is ordered by category's order - assert_frame_equal(result_sort, df.groupby(col, sort=False).first()) - - df['range'] = Categorical(df['range'], ordered=False) - index = CategoricalIndex(['(0, 2.5]', '(2.5, 5]', '(5, 7.5]', - '(7.5, 10]'], name='range') - result_sort = DataFrame([[1, 60], [5, 30], [6, 40], [10, 10]], - columns=['foo', 'bar'], index=index) - - index = CategoricalIndex(['(7.5, 10]', '(2.5, 5]', '(5, 7.5]', - '(0, 2.5]'], - categories=['(7.5, 10]', '(2.5, 5]', - '(5, 7.5]', '(0, 2.5]'], - name='range') - result_nosort = DataFrame([[10, 10], [5, 30], [6, 40], [1, 60]], - index=index, columns=['foo', 'bar']) - - col = 'range' - # this is an unordered categorical, but we allow this #### - assert_frame_equal(result_sort, df.groupby(col, sort=True).first()) - assert_frame_equal(result_nosort, df.groupby(col, sort=False).first()) - - def test_groupby_sort_categorical_datetimelike(self): - # GH10505 - - # use same data as test_groupby_sort_categorical, which category is - # corresponding to datetime.month - df = DataFrame({'dt': [datetime(2011, 7, 1), datetime(2011, 7, 1), - datetime(2011, 2, 1), datetime(2011, 5, 1), - datetime(2011, 2, 1), datetime(2011, 1, 1), - datetime(2011, 5, 1)], - 'foo': [10, 8, 5, 6, 4, 1, 7], - 'bar': [10, 20, 30, 40, 50, 60, 70]}, - columns=['dt', 'foo', 'bar']) - - # ordered=True - df['dt'] = Categorical(df['dt'], ordered=True) - index = [datetime(2011, 1, 1), datetime(2011, 2, 1), - datetime(2011, 5, 1), datetime(2011, 7, 1)] - result_sort = DataFrame( - [[1, 60], [5, 30], [6, 40], [10, 10]], columns=['foo', 'bar']) - result_sort.index = CategoricalIndex(index, name='dt', ordered=True) - - index = [datetime(2011, 7, 1), datetime(2011, 2, 1), - datetime(2011, 5, 1), datetime(2011, 1, 1)] - result_nosort = DataFrame([[10, 10], [5, 30], [6, 40], [1, 60]], - columns=['foo', 'bar']) - result_nosort.index = CategoricalIndex(index, categories=index, - name='dt', ordered=True) - - col = 'dt' - assert_frame_equal(result_sort, df.groupby(col, sort=True).first()) - # when categories is ordered, group is ordered by category's order - assert_frame_equal(result_sort, df.groupby(col, sort=False).first()) - - # ordered = False - df['dt'] = Categorical(df['dt'], ordered=False) - index = [datetime(2011, 1, 1), datetime(2011, 2, 1), - datetime(2011, 5, 1), datetime(2011, 7, 1)] - result_sort = DataFrame( - [[1, 60], [5, 30], [6, 40], [10, 10]], columns=['foo', 'bar']) - result_sort.index = CategoricalIndex(index, name='dt') - - index = [datetime(2011, 7, 1), datetime(2011, 2, 1), - datetime(2011, 5, 1), datetime(2011, 1, 1)] - result_nosort = DataFrame([[10, 10], [5, 30], [6, 40], [1, 60]], - columns=['foo', 'bar']) - result_nosort.index = CategoricalIndex(index, categories=index, - name='dt') - - col = 'dt' - assert_frame_equal(result_sort, df.groupby(col, sort=True).first()) - assert_frame_equal(result_nosort, df.groupby(col, sort=False).first()) - def test_groupby_sort_multiindex_series(self): # series multiindex groupby sort argument was not being passed through # _compress_group_index @@ -4088,169 +3387,6 @@ def test_groupby_sort_multiindex_series(self): result = mseries.groupby(level=['a', 'b'], sort=True).first() assert_series_equal(result, mseries_result.sort_index()) - def test_groupby_groups_datetimeindex(self): - # #1430 - from pandas.tseries.api import DatetimeIndex - periods = 1000 - ind = DatetimeIndex(start='2012/1/1', freq='5min', periods=periods) - df = DataFrame({'high': np.arange(periods), - 'low': np.arange(periods)}, index=ind) - grouped = df.groupby(lambda x: datetime(x.year, x.month, x.day)) - - # it works! - groups = grouped.groups - tm.assertIsInstance(list(groups.keys())[0], datetime) - - # GH 11442 - index = pd.date_range('2015/01/01', periods=5, name='date') - df = pd.DataFrame({'A': [5, 6, 7, 8, 9], - 'B': [1, 2, 3, 4, 5]}, index=index) - result = df.groupby(level='date').groups - dates = ['2015-01-05', '2015-01-04', '2015-01-03', - '2015-01-02', '2015-01-01'] - expected = {pd.Timestamp(date): pd.DatetimeIndex([date], name='date') - for date in dates} - tm.assert_dict_equal(result, expected) - - grouped = df.groupby(level='date') - for date in dates: - result = grouped.get_group(date) - data = [[df.loc[date, 'A'], df.loc[date, 'B']]] - expected_index = pd.DatetimeIndex([date], name='date') - expected = pd.DataFrame(data, - columns=list('AB'), - index=expected_index) - tm.assert_frame_equal(result, expected) - - def test_groupby_groups_datetimeindex_tz(self): - # GH 3950 - dates = ['2011-07-19 07:00:00', '2011-07-19 08:00:00', - '2011-07-19 09:00:00', '2011-07-19 07:00:00', - '2011-07-19 08:00:00', '2011-07-19 09:00:00'] - df = DataFrame({'label': ['a', 'a', 'a', 'b', 'b', 'b'], - 'datetime': dates, - 'value1': np.arange(6, dtype='int64'), - 'value2': [1, 2] * 3}) - df['datetime'] = df['datetime'].apply( - lambda d: Timestamp(d, tz='US/Pacific')) - - exp_idx1 = pd.DatetimeIndex(['2011-07-19 07:00:00', - '2011-07-19 07:00:00', - '2011-07-19 08:00:00', - '2011-07-19 08:00:00', - '2011-07-19 09:00:00', - '2011-07-19 09:00:00'], - tz='US/Pacific', name='datetime') - exp_idx2 = Index(['a', 'b'] * 3, name='label') - exp_idx = MultiIndex.from_arrays([exp_idx1, exp_idx2]) - expected = DataFrame({'value1': [0, 3, 1, 4, 2, 5], - 'value2': [1, 2, 2, 1, 1, 2]}, - index=exp_idx, columns=['value1', 'value2']) - - result = df.groupby(['datetime', 'label']).sum() - assert_frame_equal(result, expected) - - # by level - didx = pd.DatetimeIndex(dates, tz='Asia/Tokyo') - df = DataFrame({'value1': np.arange(6, dtype='int64'), - 'value2': [1, 2, 3, 1, 2, 3]}, - index=didx) - - exp_idx = pd.DatetimeIndex(['2011-07-19 07:00:00', - '2011-07-19 08:00:00', - '2011-07-19 09:00:00'], tz='Asia/Tokyo') - expected = DataFrame({'value1': [3, 5, 7], 'value2': [2, 4, 6]}, - index=exp_idx, columns=['value1', 'value2']) - - result = df.groupby(level=0).sum() - assert_frame_equal(result, expected) - - def test_frame_datetime64_handling_groupby(self): - # it works! - df = DataFrame([(3, np.datetime64('2012-07-03')), - (3, np.datetime64('2012-07-04'))], - columns=['a', 'date']) - result = df.groupby('a').first() - self.assertEqual(result['date'][3], Timestamp('2012-07-03')) - - def test_groupby_multi_timezone(self): - - # combining multiple / different timezones yields UTC - - data = """0,2000-01-28 16:47:00,America/Chicago -1,2000-01-29 16:48:00,America/Chicago -2,2000-01-30 16:49:00,America/Los_Angeles -3,2000-01-31 16:50:00,America/Chicago -4,2000-01-01 16:50:00,America/New_York""" - - df = pd.read_csv(StringIO(data), header=None, - names=['value', 'date', 'tz']) - result = df.groupby('tz').date.apply( - lambda x: pd.to_datetime(x).dt.tz_localize(x.name)) - - expected = Series([Timestamp('2000-01-28 16:47:00-0600', - tz='America/Chicago'), - Timestamp('2000-01-29 16:48:00-0600', - tz='America/Chicago'), - Timestamp('2000-01-30 16:49:00-0800', - tz='America/Los_Angeles'), - Timestamp('2000-01-31 16:50:00-0600', - tz='America/Chicago'), - Timestamp('2000-01-01 16:50:00-0500', - tz='America/New_York')], - name='date', - dtype=object) - assert_series_equal(result, expected) - - tz = 'America/Chicago' - res_values = df.groupby('tz').date.get_group(tz) - result = pd.to_datetime(res_values).dt.tz_localize(tz) - exp_values = Series(['2000-01-28 16:47:00', '2000-01-29 16:48:00', - '2000-01-31 16:50:00'], - index=[0, 1, 3], name='date') - expected = pd.to_datetime(exp_values).dt.tz_localize(tz) - assert_series_equal(result, expected) - - def test_groupby_groups_periods(self): - dates = ['2011-07-19 07:00:00', '2011-07-19 08:00:00', - '2011-07-19 09:00:00', '2011-07-19 07:00:00', - '2011-07-19 08:00:00', '2011-07-19 09:00:00'] - df = DataFrame({'label': ['a', 'a', 'a', 'b', 'b', 'b'], - 'period': [pd.Period(d, freq='H') for d in dates], - 'value1': np.arange(6, dtype='int64'), - 'value2': [1, 2] * 3}) - - exp_idx1 = pd.PeriodIndex(['2011-07-19 07:00:00', - '2011-07-19 07:00:00', - '2011-07-19 08:00:00', - '2011-07-19 08:00:00', - '2011-07-19 09:00:00', - '2011-07-19 09:00:00'], - freq='H', name='period') - exp_idx2 = Index(['a', 'b'] * 3, name='label') - exp_idx = MultiIndex.from_arrays([exp_idx1, exp_idx2]) - expected = DataFrame({'value1': [0, 3, 1, 4, 2, 5], - 'value2': [1, 2, 2, 1, 1, 2]}, - index=exp_idx, columns=['value1', 'value2']) - - result = df.groupby(['period', 'label']).sum() - assert_frame_equal(result, expected) - - # by level - didx = pd.PeriodIndex(dates, freq='H') - df = DataFrame({'value1': np.arange(6, dtype='int64'), - 'value2': [1, 2, 3, 1, 2, 3]}, - index=didx) - - exp_idx = pd.PeriodIndex(['2011-07-19 07:00:00', - '2011-07-19 08:00:00', - '2011-07-19 09:00:00'], freq='H') - expected = DataFrame({'value1': [3, 5, 7], 'value2': [2, 4, 6]}, - index=exp_idx, columns=['value1', 'value2']) - - result = df.groupby(level=0).sum() - assert_frame_equal(result, expected) - def test_groupby_reindex_inside_function(self): from pandas.tseries.api import DatetimeIndex @@ -4336,33 +3472,21 @@ def test_median_empty_bins(self): def test_groupby_non_arithmetic_agg_types(self): # GH9311, GH6620 - df = pd.DataFrame([{'a': 1, - 'b': 1}, {'a': 1, - 'b': 2}, {'a': 2, - 'b': 3}, {'a': 2, - 'b': 4}]) + df = pd.DataFrame( + [{'a': 1, 'b': 1}, + {'a': 1, 'b': 2}, + {'a': 2, 'b': 3}, + {'a': 2, 'b': 4}]) dtypes = ['int8', 'int16', 'int32', 'int64', 'float32', 'float64'] - grp_exp = {'first': {'df': [{'a': 1, - 'b': 1}, {'a': 2, - 'b': 3}]}, - 'last': {'df': [{'a': 1, - 'b': 2}, {'a': 2, - 'b': 4}]}, - 'min': {'df': [{'a': 1, - 'b': 1}, {'a': 2, - 'b': 3}]}, - 'max': {'df': [{'a': 1, - 'b': 2}, {'a': 2, - 'b': 4}]}, - 'nth': {'df': [{'a': 1, - 'b': 2}, {'a': 2, - 'b': 4}], + grp_exp = {'first': {'df': [{'a': 1, 'b': 1}, {'a': 2, 'b': 3}]}, + 'last': {'df': [{'a': 1, 'b': 2}, {'a': 2, 'b': 4}]}, + 'min': {'df': [{'a': 1, 'b': 1}, {'a': 2, 'b': 3}]}, + 'max': {'df': [{'a': 1, 'b': 2}, {'a': 2, 'b': 4}]}, + 'nth': {'df': [{'a': 1, 'b': 2}, {'a': 2, 'b': 4}], 'args': [1]}, - 'count': {'df': [{'a': 1, - 'b': 2}, {'a': 2, - 'b': 2}], + 'count': {'df': [{'a': 1, 'b': 2}, {'a': 2, 'b': 2}], 'out_type': 'int64'}} for dtype in dtypes: @@ -4414,37 +3538,6 @@ def test_groupby_non_arithmetic_agg_intlike_precision(self): res = getattr(grpd, method)(*data['args']) self.assertEqual(res.iloc[0].b, data['expected']) - def test_groupby_first_datetime64(self): - df = DataFrame([(1, 1351036800000000000), (2, 1351036800000000000)]) - df[1] = df[1].view('M8[ns]') - - self.assertTrue(issubclass(df[1].dtype.type, np.datetime64)) - - result = df.groupby(level=0).first() - got_dt = result[1].dtype - self.assertTrue(issubclass(got_dt.type, np.datetime64)) - - result = df[1].groupby(level=0).first() - got_dt = result.dtype - self.assertTrue(issubclass(got_dt.type, np.datetime64)) - - def test_groupby_max_datetime64(self): - # GH 5869 - # datetimelike dtype conversion from int - df = DataFrame(dict(A=Timestamp('20130101'), B=np.arange(5))) - expected = df.groupby('A')['A'].apply(lambda x: x.max()) - result = df.groupby('A')['A'].max() - assert_series_equal(result, expected) - - def test_groupby_datetime64_32_bit(self): - # GH 6410 / numpy 4328 - # 32-bit under 1.9-dev indexing issue - - df = DataFrame({"A": range(2), "B": [pd.Timestamp('2000-01-1')] * 2}) - result = df.groupby("A")["B"].transform(min) - expected = Series([pd.Timestamp('2000-01-1')] * 2, name='B') - assert_series_equal(result, expected) - def test_groupby_multiindex_missing_pair(self): # GH9049 df = DataFrame({'group1': ['a', 'a', 'a', 'b'], @@ -4613,381 +3706,6 @@ def test_groupby_with_small_elem(self): res = grouped.get_group((pd.Timestamp('2014-08-31'), 'start')) tm.assert_frame_equal(res, df.iloc[[2], :]) - def test_groupby_with_timezone_selection(self): - # GH 11616 - # Test that column selection returns output in correct timezone. - np.random.seed(42) - df = pd.DataFrame({ - 'factor': np.random.randint(0, 3, size=60), - 'time': pd.date_range('01/01/2000 00:00', periods=60, - freq='s', tz='UTC') - }) - df1 = df.groupby('factor').max()['time'] - df2 = df.groupby('factor')['time'].max() - tm.assert_series_equal(df1, df2) - - def test_timezone_info(self): - # GH 11682 - # Timezone info lost when broadcasting scalar datetime to DataFrame - tm._skip_if_no_pytz() - import pytz - - df = pd.DataFrame({'a': [1], 'b': [datetime.now(pytz.utc)]}) - self.assertEqual(df['b'][0].tzinfo, pytz.utc) - df = pd.DataFrame({'a': [1, 2, 3]}) - df['b'] = datetime.now(pytz.utc) - self.assertEqual(df['b'][0].tzinfo, pytz.utc) - - def test_groupby_with_timegrouper(self): - # GH 4161 - # TimeGrouper requires a sorted index - # also verifies that the resultant index has the correct name - import datetime as DT - df_original = DataFrame({ - 'Buyer': 'Carl Carl Carl Carl Joe Carl'.split(), - 'Quantity': [18, 3, 5, 1, 9, 3], - 'Date': [ - DT.datetime(2013, 9, 1, 13, 0), - DT.datetime(2013, 9, 1, 13, 5), - DT.datetime(2013, 10, 1, 20, 0), - DT.datetime(2013, 10, 3, 10, 0), - DT.datetime(2013, 12, 2, 12, 0), - DT.datetime(2013, 9, 2, 14, 0), - ] - }) - - # GH 6908 change target column's order - df_reordered = df_original.sort_values(by='Quantity') - - for df in [df_original, df_reordered]: - df = df.set_index(['Date']) - - expected = DataFrame( - {'Quantity': np.nan}, - index=date_range('20130901 13:00:00', - '20131205 13:00:00', freq='5D', - name='Date', closed='left')) - expected.iloc[[0, 6, 18], 0] = np.array( - [24., 6., 9.], dtype='float64') - - result1 = df.resample('5D') .sum() - assert_frame_equal(result1, expected) - - df_sorted = df.sort_index() - result2 = df_sorted.groupby(pd.TimeGrouper(freq='5D')).sum() - assert_frame_equal(result2, expected) - - result3 = df.groupby(pd.TimeGrouper(freq='5D')).sum() - assert_frame_equal(result3, expected) - - def test_groupby_with_timegrouper_methods(self): - # GH 3881 - # make sure API of timegrouper conforms - - import datetime as DT - df_original = pd.DataFrame({ - 'Branch': 'A A A A A B'.split(), - 'Buyer': 'Carl Mark Carl Joe Joe Carl'.split(), - 'Quantity': [1, 3, 5, 8, 9, 3], - 'Date': [ - DT.datetime(2013, 1, 1, 13, 0), - DT.datetime(2013, 1, 1, 13, 5), - DT.datetime(2013, 10, 1, 20, 0), - DT.datetime(2013, 10, 2, 10, 0), - DT.datetime(2013, 12, 2, 12, 0), - DT.datetime(2013, 12, 2, 14, 0), - ] - }) - - df_sorted = df_original.sort_values(by='Quantity', ascending=False) - - for df in [df_original, df_sorted]: - df = df.set_index('Date', drop=False) - g = df.groupby(pd.TimeGrouper('6M')) - self.assertTrue(g.group_keys) - self.assertTrue(isinstance(g.grouper, pd.core.groupby.BinGrouper)) - groups = g.groups - self.assertTrue(isinstance(groups, dict)) - self.assertTrue(len(groups) == 3) - - def test_timegrouper_with_reg_groups(self): - - # GH 3794 - # allow combinateion of timegrouper/reg groups - - import datetime as DT - - df_original = DataFrame({ - 'Branch': 'A A A A A A A B'.split(), - 'Buyer': 'Carl Mark Carl Carl Joe Joe Joe Carl'.split(), - 'Quantity': [1, 3, 5, 1, 8, 1, 9, 3], - 'Date': [ - DT.datetime(2013, 1, 1, 13, 0), - DT.datetime(2013, 1, 1, 13, 5), - DT.datetime(2013, 10, 1, 20, 0), - DT.datetime(2013, 10, 2, 10, 0), - DT.datetime(2013, 10, 1, 20, 0), - DT.datetime(2013, 10, 2, 10, 0), - DT.datetime(2013, 12, 2, 12, 0), - DT.datetime(2013, 12, 2, 14, 0), - ] - }).set_index('Date') - - df_sorted = df_original.sort_values(by='Quantity', ascending=False) - - for df in [df_original, df_sorted]: - expected = DataFrame({ - 'Buyer': 'Carl Joe Mark'.split(), - 'Quantity': [10, 18, 3], - 'Date': [ - DT.datetime(2013, 12, 31, 0, 0), - DT.datetime(2013, 12, 31, 0, 0), - DT.datetime(2013, 12, 31, 0, 0), - ] - }).set_index(['Date', 'Buyer']) - - result = df.groupby([pd.Grouper(freq='A'), 'Buyer']).sum() - assert_frame_equal(result, expected) - - expected = DataFrame({ - 'Buyer': 'Carl Mark Carl Joe'.split(), - 'Quantity': [1, 3, 9, 18], - 'Date': [ - DT.datetime(2013, 1, 1, 0, 0), - DT.datetime(2013, 1, 1, 0, 0), - DT.datetime(2013, 7, 1, 0, 0), - DT.datetime(2013, 7, 1, 0, 0), - ] - }).set_index(['Date', 'Buyer']) - result = df.groupby([pd.Grouper(freq='6MS'), 'Buyer']).sum() - assert_frame_equal(result, expected) - - df_original = DataFrame({ - 'Branch': 'A A A A A A A B'.split(), - 'Buyer': 'Carl Mark Carl Carl Joe Joe Joe Carl'.split(), - 'Quantity': [1, 3, 5, 1, 8, 1, 9, 3], - 'Date': [ - DT.datetime(2013, 10, 1, 13, 0), - DT.datetime(2013, 10, 1, 13, 5), - DT.datetime(2013, 10, 1, 20, 0), - DT.datetime(2013, 10, 2, 10, 0), - DT.datetime(2013, 10, 1, 20, 0), - DT.datetime(2013, 10, 2, 10, 0), - DT.datetime(2013, 10, 2, 12, 0), - DT.datetime(2013, 10, 2, 14, 0), - ] - }).set_index('Date') - - df_sorted = df_original.sort_values(by='Quantity', ascending=False) - for df in [df_original, df_sorted]: - - expected = DataFrame({ - 'Buyer': 'Carl Joe Mark Carl Joe'.split(), - 'Quantity': [6, 8, 3, 4, 10], - 'Date': [ - DT.datetime(2013, 10, 1, 0, 0), - DT.datetime(2013, 10, 1, 0, 0), - DT.datetime(2013, 10, 1, 0, 0), - DT.datetime(2013, 10, 2, 0, 0), - DT.datetime(2013, 10, 2, 0, 0), - ] - }).set_index(['Date', 'Buyer']) - - result = df.groupby([pd.Grouper(freq='1D'), 'Buyer']).sum() - assert_frame_equal(result, expected) - - result = df.groupby([pd.Grouper(freq='1M'), 'Buyer']).sum() - expected = DataFrame({ - 'Buyer': 'Carl Joe Mark'.split(), - 'Quantity': [10, 18, 3], - 'Date': [ - DT.datetime(2013, 10, 31, 0, 0), - DT.datetime(2013, 10, 31, 0, 0), - DT.datetime(2013, 10, 31, 0, 0), - ] - }).set_index(['Date', 'Buyer']) - assert_frame_equal(result, expected) - - # passing the name - df = df.reset_index() - result = df.groupby([pd.Grouper(freq='1M', key='Date'), 'Buyer' - ]).sum() - assert_frame_equal(result, expected) - - with self.assertRaises(KeyError): - df.groupby([pd.Grouper(freq='1M', key='foo'), 'Buyer']).sum() - - # passing the level - df = df.set_index('Date') - result = df.groupby([pd.Grouper(freq='1M', level='Date'), 'Buyer' - ]).sum() - assert_frame_equal(result, expected) - result = df.groupby([pd.Grouper(freq='1M', level=0), 'Buyer']).sum( - ) - assert_frame_equal(result, expected) - - with self.assertRaises(ValueError): - df.groupby([pd.Grouper(freq='1M', level='foo'), - 'Buyer']).sum() - - # multi names - df = df.copy() - df['Date'] = df.index + pd.offsets.MonthEnd(2) - result = df.groupby([pd.Grouper(freq='1M', key='Date'), 'Buyer' - ]).sum() - expected = DataFrame({ - 'Buyer': 'Carl Joe Mark'.split(), - 'Quantity': [10, 18, 3], - 'Date': [ - DT.datetime(2013, 11, 30, 0, 0), - DT.datetime(2013, 11, 30, 0, 0), - DT.datetime(2013, 11, 30, 0, 0), - ] - }).set_index(['Date', 'Buyer']) - assert_frame_equal(result, expected) - - # error as we have both a level and a name! - with self.assertRaises(ValueError): - df.groupby([pd.Grouper(freq='1M', key='Date', - level='Date'), 'Buyer']).sum() - - # single groupers - expected = DataFrame({'Quantity': [31], - 'Date': [DT.datetime(2013, 10, 31, 0, 0) - ]}).set_index('Date') - result = df.groupby(pd.Grouper(freq='1M')).sum() - assert_frame_equal(result, expected) - - result = df.groupby([pd.Grouper(freq='1M')]).sum() - assert_frame_equal(result, expected) - - expected = DataFrame({'Quantity': [31], - 'Date': [DT.datetime(2013, 11, 30, 0, 0) - ]}).set_index('Date') - result = df.groupby(pd.Grouper(freq='1M', key='Date')).sum() - assert_frame_equal(result, expected) - - result = df.groupby([pd.Grouper(freq='1M', key='Date')]).sum() - assert_frame_equal(result, expected) - - # GH 6764 multiple grouping with/without sort - df = DataFrame({ - 'date': pd.to_datetime([ - '20121002', '20121007', '20130130', '20130202', '20130305', - '20121002', '20121207', '20130130', '20130202', '20130305', - '20130202', '20130305' - ]), - 'user_id': [1, 1, 1, 1, 1, 3, 3, 3, 5, 5, 5, 5], - 'whole_cost': [1790, 364, 280, 259, 201, 623, 90, 312, 359, 301, - 359, 801], - 'cost1': [12, 15, 10, 24, 39, 1, 0, 90, 45, 34, 1, 12] - }).set_index('date') - - for freq in ['D', 'M', 'A', 'Q-APR']: - expected = df.groupby('user_id')[ - 'whole_cost'].resample( - freq).sum().dropna().reorder_levels( - ['date', 'user_id']).sort_index().astype('int64') - expected.name = 'whole_cost' - - result1 = df.sort_index().groupby([pd.TimeGrouper(freq=freq), - 'user_id'])['whole_cost'].sum() - assert_series_equal(result1, expected) - - result2 = df.groupby([pd.TimeGrouper(freq=freq), 'user_id'])[ - 'whole_cost'].sum() - assert_series_equal(result2, expected) - - def test_timegrouper_get_group(self): - # GH 6914 - - df_original = DataFrame({ - 'Buyer': 'Carl Joe Joe Carl Joe Carl'.split(), - 'Quantity': [18, 3, 5, 1, 9, 3], - 'Date': [datetime(2013, 9, 1, 13, 0), - datetime(2013, 9, 1, 13, 5), - datetime(2013, 10, 1, 20, 0), - datetime(2013, 10, 3, 10, 0), - datetime(2013, 12, 2, 12, 0), - datetime(2013, 9, 2, 14, 0), ] - }) - df_reordered = df_original.sort_values(by='Quantity') - - # single grouping - expected_list = [df_original.iloc[[0, 1, 5]], df_original.iloc[[2, 3]], - df_original.iloc[[4]]] - dt_list = ['2013-09-30', '2013-10-31', '2013-12-31'] - - for df in [df_original, df_reordered]: - grouped = df.groupby(pd.Grouper(freq='M', key='Date')) - for t, expected in zip(dt_list, expected_list): - dt = pd.Timestamp(t) - result = grouped.get_group(dt) - assert_frame_equal(result, expected) - - # multiple grouping - expected_list = [df_original.iloc[[1]], df_original.iloc[[3]], - df_original.iloc[[4]]] - g_list = [('Joe', '2013-09-30'), ('Carl', '2013-10-31'), - ('Joe', '2013-12-31')] - - for df in [df_original, df_reordered]: - grouped = df.groupby(['Buyer', pd.Grouper(freq='M', key='Date')]) - for (b, t), expected in zip(g_list, expected_list): - dt = pd.Timestamp(t) - result = grouped.get_group((b, dt)) - assert_frame_equal(result, expected) - - # with index - df_original = df_original.set_index('Date') - df_reordered = df_original.sort_values(by='Quantity') - - expected_list = [df_original.iloc[[0, 1, 5]], df_original.iloc[[2, 3]], - df_original.iloc[[4]]] - - for df in [df_original, df_reordered]: - grouped = df.groupby(pd.Grouper(freq='M')) - for t, expected in zip(dt_list, expected_list): - dt = pd.Timestamp(t) - result = grouped.get_group(dt) - assert_frame_equal(result, expected) - - def test_timegrouper_apply_return_type_series(self): - # Using `apply` with the `TimeGrouper` should give the - # same return type as an `apply` with a `Grouper`. - # Issue #11742 - df = pd.DataFrame({'date': ['10/10/2000', '11/10/2000'], - 'value': [10, 13]}) - df_dt = df.copy() - df_dt['date'] = pd.to_datetime(df_dt['date']) - - def sumfunc_series(x): - return pd.Series([x['value'].sum()], ('sum',)) - - expected = df.groupby(pd.Grouper(key='date')).apply(sumfunc_series) - result = (df_dt.groupby(pd.TimeGrouper(freq='M', key='date')) - .apply(sumfunc_series)) - assert_frame_equal(result.reset_index(drop=True), - expected.reset_index(drop=True)) - - def test_timegrouper_apply_return_type_value(self): - # Using `apply` with the `TimeGrouper` should give the - # same return type as an `apply` with a `Grouper`. - # Issue #11742 - df = pd.DataFrame({'date': ['10/10/2000', '11/10/2000'], - 'value': [10, 13]}) - df_dt = df.copy() - df_dt['date'] = pd.to_datetime(df_dt['date']) - - def sumfunc_value(x): - return x.value.sum() - - expected = df.groupby(pd.Grouper(key='date')).apply(sumfunc_value) - result = (df_dt.groupby(pd.TimeGrouper(freq='M', key='date')) - .apply(sumfunc_value)) - assert_series_equal(result.reset_index(drop=True), - expected.reset_index(drop=True)) - def test_cumcount(self): df = DataFrame([['a'], ['a'], ['a'], ['b'], ['a']], columns=['A']) g = df.groupby('A') @@ -5326,106 +4044,6 @@ def test_tab_completion(self): 'ffill', 'bfill', 'pad', 'backfill', 'rolling', 'expanding']) self.assertEqual(results, expected) - def test_lexsort_indexer(self): - keys = [[nan] * 5 + list(range(100)) + [nan] * 5] - # orders=True, na_position='last' - result = _lexsort_indexer(keys, orders=True, na_position='last') - exp = list(range(5, 105)) + list(range(5)) + list(range(105, 110)) - tm.assert_numpy_array_equal(result, np.array(exp, dtype=np.intp)) - - # orders=True, na_position='first' - result = _lexsort_indexer(keys, orders=True, na_position='first') - exp = list(range(5)) + list(range(105, 110)) + list(range(5, 105)) - tm.assert_numpy_array_equal(result, np.array(exp, dtype=np.intp)) - - # orders=False, na_position='last' - result = _lexsort_indexer(keys, orders=False, na_position='last') - exp = list(range(104, 4, -1)) + list(range(5)) + list(range(105, 110)) - tm.assert_numpy_array_equal(result, np.array(exp, dtype=np.intp)) - - # orders=False, na_position='first' - result = _lexsort_indexer(keys, orders=False, na_position='first') - exp = list(range(5)) + list(range(105, 110)) + list(range(104, 4, -1)) - tm.assert_numpy_array_equal(result, np.array(exp, dtype=np.intp)) - - def test_nargsort(self): - # np.argsort(items) places NaNs last - items = [nan] * 5 + list(range(100)) + [nan] * 5 - # np.argsort(items2) may not place NaNs first - items2 = np.array(items, dtype='O') - - try: - # GH 2785; due to a regression in NumPy1.6.2 - np.argsort(np.array([[1, 2], [1, 3], [1, 2]], dtype='i')) - np.argsort(items2, kind='mergesort') - except TypeError: - raise nose.SkipTest('requested sort not available for type') - - # mergesort is the most difficult to get right because we want it to be - # stable. - - # According to numpy/core/tests/test_multiarray, """The number of - # sorted items must be greater than ~50 to check the actual algorithm - # because quick and merge sort fall over to insertion sort for small - # arrays.""" - - # mergesort, ascending=True, na_position='last' - result = _nargsort(items, kind='mergesort', ascending=True, - na_position='last') - exp = list(range(5, 105)) + list(range(5)) + list(range(105, 110)) - tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) - - # mergesort, ascending=True, na_position='first' - result = _nargsort(items, kind='mergesort', ascending=True, - na_position='first') - exp = list(range(5)) + list(range(105, 110)) + list(range(5, 105)) - tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) - - # mergesort, ascending=False, na_position='last' - result = _nargsort(items, kind='mergesort', ascending=False, - na_position='last') - exp = list(range(104, 4, -1)) + list(range(5)) + list(range(105, 110)) - tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) - - # mergesort, ascending=False, na_position='first' - result = _nargsort(items, kind='mergesort', ascending=False, - na_position='first') - exp = list(range(5)) + list(range(105, 110)) + list(range(104, 4, -1)) - tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) - - # mergesort, ascending=True, na_position='last' - result = _nargsort(items2, kind='mergesort', ascending=True, - na_position='last') - exp = list(range(5, 105)) + list(range(5)) + list(range(105, 110)) - tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) - - # mergesort, ascending=True, na_position='first' - result = _nargsort(items2, kind='mergesort', ascending=True, - na_position='first') - exp = list(range(5)) + list(range(105, 110)) + list(range(5, 105)) - tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) - - # mergesort, ascending=False, na_position='last' - result = _nargsort(items2, kind='mergesort', ascending=False, - na_position='last') - exp = list(range(104, 4, -1)) + list(range(5)) + list(range(105, 110)) - tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) - - # mergesort, ascending=False, na_position='first' - result = _nargsort(items2, kind='mergesort', ascending=False, - na_position='first') - exp = list(range(5)) + list(range(105, 110)) + list(range(104, 4, -1)) - tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) - - def test_datetime_count(self): - df = DataFrame({'a': [1, 2, 3] * 2, - 'dates': pd.date_range('now', periods=6, freq='T')}) - result = df.groupby('a').dates.count() - expected = Series([ - 2, 2, 2 - ], index=Index([1, 2, 3], name='a'), name='dates') - tm.assert_series_equal(result, expected) - def test_lower_int_prec_count(self): df = DataFrame({'a': np.array( [0, 1, 2, 100], np.int8), @@ -5462,179 +4080,6 @@ def __eq__(self, other): list('ab'), name='grp')) tm.assert_frame_equal(result, expected) - def test__cython_agg_general(self): - ops = [('mean', np.mean), - ('median', np.median), - ('var', np.var), - ('add', np.sum), - ('prod', np.prod), - ('min', np.min), - ('max', np.max), - ('first', lambda x: x.iloc[0]), - ('last', lambda x: x.iloc[-1]), ] - df = DataFrame(np.random.randn(1000)) - labels = np.random.randint(0, 50, size=1000).astype(float) - - for op, targop in ops: - result = df.groupby(labels)._cython_agg_general(op) - expected = df.groupby(labels).agg(targop) - try: - tm.assert_frame_equal(result, expected) - except BaseException as exc: - exc.args += ('operation: %s' % op, ) - raise - - def test_cython_agg_empty_buckets(self): - ops = [('mean', np.mean), - ('median', lambda x: np.median(x) if len(x) > 0 else np.nan), - ('var', lambda x: np.var(x, ddof=1)), - ('add', lambda x: np.sum(x) if len(x) > 0 else np.nan), - ('prod', np.prod), - ('min', np.min), - ('max', np.max), ] - - df = pd.DataFrame([11, 12, 13]) - grps = range(0, 55, 5) - - for op, targop in ops: - result = df.groupby(pd.cut(df[0], grps))._cython_agg_general(op) - expected = df.groupby(pd.cut(df[0], grps)).agg(lambda x: targop(x)) - try: - tm.assert_frame_equal(result, expected) - except BaseException as exc: - exc.args += ('operation: %s' % op,) - raise - - def test_cython_group_transform_algos(self): - # GH 4095 - dtypes = [np.int8, np.int16, np.int32, np.int64, np.uint8, np.uint32, - np.uint64, np.float32, np.float64] - - ops = [(pd.algos.group_cumprod_float64, np.cumproduct, [np.float64]), - (pd.algos.group_cumsum, np.cumsum, dtypes)] - - is_datetimelike = False - for pd_op, np_op, dtypes in ops: - for dtype in dtypes: - data = np.array([[1], [2], [3], [4]], dtype=dtype) - ans = np.zeros_like(data) - labels = np.array([0, 0, 0, 0], dtype=np.int64) - pd_op(ans, data, labels, is_datetimelike) - self.assert_numpy_array_equal(np_op(data), ans[:, 0], - check_dtype=False) - - # with nans - labels = np.array([0, 0, 0, 0, 0], dtype=np.int64) - - data = np.array([[1], [2], [3], [np.nan], [4]], dtype='float64') - actual = np.zeros_like(data) - actual.fill(np.nan) - pd.algos.group_cumprod_float64(actual, data, labels, is_datetimelike) - expected = np.array([1, 2, 6, np.nan, 24], dtype='float64') - self.assert_numpy_array_equal(actual[:, 0], expected) - - actual = np.zeros_like(data) - actual.fill(np.nan) - pd.algos.group_cumsum(actual, data, labels, is_datetimelike) - expected = np.array([1, 3, 6, np.nan, 10], dtype='float64') - self.assert_numpy_array_equal(actual[:, 0], expected) - - # timedelta - is_datetimelike = True - data = np.array([np.timedelta64(1, 'ns')] * 5, dtype='m8[ns]')[:, None] - actual = np.zeros_like(data, dtype='int64') - pd.algos.group_cumsum(actual, data.view('int64'), labels, - is_datetimelike) - expected = np.array([np.timedelta64(1, 'ns'), np.timedelta64( - 2, 'ns'), np.timedelta64(3, 'ns'), np.timedelta64(4, 'ns'), - np.timedelta64(5, 'ns')]) - self.assert_numpy_array_equal(actual[:, 0].view('m8[ns]'), expected) - - def test_cython_transform(self): - # GH 4095 - ops = [(('cumprod', - ()), lambda x: x.cumprod()), (('cumsum', ()), - lambda x: x.cumsum()), - (('shift', (-1, )), - lambda x: x.shift(-1)), (('shift', - (1, )), lambda x: x.shift())] - - s = Series(np.random.randn(1000)) - s_missing = s.copy() - s_missing.iloc[2:10] = np.nan - labels = np.random.randint(0, 50, size=1000).astype(float) - - # series - for (op, args), targop in ops: - for data in [s, s_missing]: - # print(data.head()) - expected = data.groupby(labels).transform(targop) - - tm.assert_series_equal(expected, - data.groupby(labels).transform(op, - *args)) - tm.assert_series_equal(expected, getattr( - data.groupby(labels), op)(*args)) - - strings = list('qwertyuiopasdfghjklz') - strings_missing = strings[:] - strings_missing[5] = np.nan - df = DataFrame({'float': s, - 'float_missing': s_missing, - 'int': [1, 1, 1, 1, 2] * 200, - 'datetime': pd.date_range('1990-1-1', periods=1000), - 'timedelta': pd.timedelta_range(1, freq='s', - periods=1000), - 'string': strings * 50, - 'string_missing': strings_missing * 50}) - df['cat'] = df['string'].astype('category') - - df2 = df.copy() - df2.index = pd.MultiIndex.from_product([range(100), range(10)]) - - # DataFrame - Single and MultiIndex, - # group by values, index level, columns - for df in [df, df2]: - for gb_target in [dict(by=labels), dict(level=0), dict(by='string') - ]: # dict(by='string_missing')]: - # dict(by=['int','string'])]: - - gb = df.groupby(**gb_target) - # whitelisted methods set the selection before applying - # bit a of hack to make sure the cythonized shift - # is equivalent to pre 0.17.1 behavior - if op == 'shift': - gb._set_group_selection() - - for (op, args), targop in ops: - if op != 'shift' and 'int' not in gb_target: - # numeric apply fastpath promotes dtype so have - # to apply seperately and concat - i = gb[['int']].apply(targop) - f = gb[['float', 'float_missing']].apply(targop) - expected = pd.concat([f, i], axis=1) - else: - expected = gb.apply(targop) - - expected = expected.sort_index(axis=1) - tm.assert_frame_equal(expected, - gb.transform(op, *args).sort_index( - axis=1)) - tm.assert_frame_equal(expected, getattr(gb, op)(*args)) - # individual columns - for c in df: - if c not in ['float', 'int', 'float_missing' - ] and op != 'shift': - self.assertRaises(DataError, gb[c].transform, op) - self.assertRaises(DataError, getattr(gb[c], op)) - else: - expected = gb[c].apply(targop) - expected.name = c - tm.assert_series_equal(expected, - gb[c].transform(op, *args)) - tm.assert_series_equal(expected, - getattr(gb[c], op)(*args)) - def test_groupby_cumprod(self): # GH 4095 df = pd.DataFrame({'key': ['b'] * 10, 'value': 2}) @@ -5784,27 +4229,6 @@ def test_func(x): tm.assert_frame_equal(result1, expected1) tm.assert_frame_equal(result2, expected2) - def test_first_last_max_min_on_time_data(self): - # GH 10295 - # Verify that NaT is not in the result of max, min, first and last on - # Dataframe with datetime or timedelta values. - from datetime import timedelta as td - df_test = DataFrame( - {'dt': [nan, '2015-07-24 10:10', '2015-07-25 11:11', - '2015-07-23 12:12', nan], - 'td': [nan, td(days=1), td(days=2), td(days=3), nan]}) - df_test.dt = pd.to_datetime(df_test.dt) - df_test['group'] = 'A' - df_ref = df_test[df_test.dt.notnull()] - - grouped_test = df_test.groupby('group') - grouped_ref = df_ref.groupby('group') - - assert_frame_equal(grouped_ref.max(), grouped_test.max()) - assert_frame_equal(grouped_ref.min(), grouped_test.min()) - assert_frame_equal(grouped_ref.first(), grouped_test.first()) - assert_frame_equal(grouped_ref.last(), grouped_test.last()) - def test_groupby_preserves_sort(self): # Test to ensure that groupby always preserves sort order of original # object. Issue #8588 and #9651 @@ -5854,21 +4278,6 @@ def test_nunique_with_empty_series(self): expected = pd.Series(name='name', dtype='int64') tm.assert_series_equal(result, expected) - def test_transform_with_non_scalar_group(self): - # GH 10165 - cols = pd.MultiIndex.from_tuples([ - ('syn', 'A'), ('mis', 'A'), ('non', 'A'), - ('syn', 'C'), ('mis', 'C'), ('non', 'C'), - ('syn', 'T'), ('mis', 'T'), ('non', 'T'), - ('syn', 'G'), ('mis', 'G'), ('non', 'G')]) - df = pd.DataFrame(np.random.randint(1, 10, (4, 12)), - columns=cols, - index=['A', 'C', 'G', 'T']) - self.assertRaisesRegexp(ValueError, 'transform must return a scalar ' - 'value for each group.*', df.groupby - (axis=1, level=1).transform, - lambda z: z.div(z.sum(axis=1), axis=0)) - def test_numpy_compat(self): # see gh-12811 df = pd.DataFrame({'A': [1, 2, 1], 'B': [1, 2, 3]}) @@ -5927,23 +4336,6 @@ def test_pivot_table_values_key_error(self): df.reset_index().pivot_table(index='year', columns='month', values='badname', aggfunc='count') - def test_agg_over_numpy_arrays(self): - # GH 3788 - df = pd.DataFrame([[1, np.array([10, 20, 30])], - [1, np.array([40, 50, 60])], - [2, np.array([20, 30, 40])]], - columns=['category', 'arraydata']) - result = df.groupby('category').agg(sum) - - expected_data = [[np.array([50, 70, 90])], [np.array([20, 30, 40])]] - expected_index = pd.Index([1, 2], name='category') - expected_column = ['arraydata'] - expected = pd.DataFrame(expected_data, - index=expected_index, - columns=expected_column) - - assert_frame_equal(result, expected) - def test_cummin_cummax(self): # GH 15048 num_types = [np.int32, np.int64, np.float32, np.float64] @@ -6024,10 +4416,6 @@ def test_cummin_cummax(self): tm.assert_frame_equal(expected, result) -def assert_fp_equal(a, b): - assert (np.abs(a - b) < 1e-12).all() - - def _check_groupby(df, result, keys, field, f=lambda x: x.sum()): tups = lmap(tuple, df[keys].values) tups = com._asarray_tuplesafe(tups) diff --git a/pandas/tests/groupby/test_misc.py b/pandas/tests/groupby/test_misc.py new file mode 100644 index 0000000000000..c9d8ad4231cfb --- /dev/null +++ b/pandas/tests/groupby/test_misc.py @@ -0,0 +1,101 @@ +""" misc non-groupby routines, as they are defined in core/groupby.py """ + +import nose +import numpy as np +from numpy import nan +from pandas.util import testing as tm +from pandas.core.groupby import _nargsort, _lexsort_indexer + + +class TestSorting(tm.TestCase): + + def test_lexsort_indexer(self): + keys = [[nan] * 5 + list(range(100)) + [nan] * 5] + # orders=True, na_position='last' + result = _lexsort_indexer(keys, orders=True, na_position='last') + exp = list(range(5, 105)) + list(range(5)) + list(range(105, 110)) + tm.assert_numpy_array_equal(result, np.array(exp, dtype=np.intp)) + + # orders=True, na_position='first' + result = _lexsort_indexer(keys, orders=True, na_position='first') + exp = list(range(5)) + list(range(105, 110)) + list(range(5, 105)) + tm.assert_numpy_array_equal(result, np.array(exp, dtype=np.intp)) + + # orders=False, na_position='last' + result = _lexsort_indexer(keys, orders=False, na_position='last') + exp = list(range(104, 4, -1)) + list(range(5)) + list(range(105, 110)) + tm.assert_numpy_array_equal(result, np.array(exp, dtype=np.intp)) + + # orders=False, na_position='first' + result = _lexsort_indexer(keys, orders=False, na_position='first') + exp = list(range(5)) + list(range(105, 110)) + list(range(104, 4, -1)) + tm.assert_numpy_array_equal(result, np.array(exp, dtype=np.intp)) + + def test_nargsort(self): + # np.argsort(items) places NaNs last + items = [nan] * 5 + list(range(100)) + [nan] * 5 + # np.argsort(items2) may not place NaNs first + items2 = np.array(items, dtype='O') + + try: + # GH 2785; due to a regression in NumPy1.6.2 + np.argsort(np.array([[1, 2], [1, 3], [1, 2]], dtype='i')) + np.argsort(items2, kind='mergesort') + except TypeError: + raise nose.SkipTest('requested sort not available for type') + + # mergesort is the most difficult to get right because we want it to be + # stable. + + # According to numpy/core/tests/test_multiarray, """The number of + # sorted items must be greater than ~50 to check the actual algorithm + # because quick and merge sort fall over to insertion sort for small + # arrays.""" + + # mergesort, ascending=True, na_position='last' + result = _nargsort(items, kind='mergesort', ascending=True, + na_position='last') + exp = list(range(5, 105)) + list(range(5)) + list(range(105, 110)) + tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) + + # mergesort, ascending=True, na_position='first' + result = _nargsort(items, kind='mergesort', ascending=True, + na_position='first') + exp = list(range(5)) + list(range(105, 110)) + list(range(5, 105)) + tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) + + # mergesort, ascending=False, na_position='last' + result = _nargsort(items, kind='mergesort', ascending=False, + na_position='last') + exp = list(range(104, 4, -1)) + list(range(5)) + list(range(105, 110)) + tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) + + # mergesort, ascending=False, na_position='first' + result = _nargsort(items, kind='mergesort', ascending=False, + na_position='first') + exp = list(range(5)) + list(range(105, 110)) + list(range(104, 4, -1)) + tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) + + # mergesort, ascending=True, na_position='last' + result = _nargsort(items2, kind='mergesort', ascending=True, + na_position='last') + exp = list(range(5, 105)) + list(range(5)) + list(range(105, 110)) + tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) + + # mergesort, ascending=True, na_position='first' + result = _nargsort(items2, kind='mergesort', ascending=True, + na_position='first') + exp = list(range(5)) + list(range(105, 110)) + list(range(5, 105)) + tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) + + # mergesort, ascending=False, na_position='last' + result = _nargsort(items2, kind='mergesort', ascending=False, + na_position='last') + exp = list(range(104, 4, -1)) + list(range(5)) + list(range(105, 110)) + tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) + + # mergesort, ascending=False, na_position='first' + result = _nargsort(items2, kind='mergesort', ascending=False, + na_position='first') + exp = list(range(5)) + list(range(105, 110)) + list(range(104, 4, -1)) + tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) diff --git a/pandas/tests/groupby/test_timegrouper.py b/pandas/tests/groupby/test_timegrouper.py new file mode 100644 index 0000000000000..3142b74b56778 --- /dev/null +++ b/pandas/tests/groupby/test_timegrouper.py @@ -0,0 +1,609 @@ +""" test with the TimeGrouper / grouping with datetimes """ + +from datetime import datetime +import numpy as np +from numpy import nan + +import pandas as pd +from pandas import DataFrame, date_range, Index, Series, MultiIndex, Timestamp +from pandas.compat import StringIO +from pandas.util import testing as tm +from pandas.util.testing import assert_frame_equal, assert_series_equal + + +class TestGroupBy(tm.TestCase): + + def test_groupby_with_timegrouper(self): + # GH 4161 + # TimeGrouper requires a sorted index + # also verifies that the resultant index has the correct name + df_original = DataFrame({ + 'Buyer': 'Carl Carl Carl Carl Joe Carl'.split(), + 'Quantity': [18, 3, 5, 1, 9, 3], + 'Date': [ + datetime(2013, 9, 1, 13, 0), + datetime(2013, 9, 1, 13, 5), + datetime(2013, 10, 1, 20, 0), + datetime(2013, 10, 3, 10, 0), + datetime(2013, 12, 2, 12, 0), + datetime(2013, 9, 2, 14, 0), + ] + }) + + # GH 6908 change target column's order + df_reordered = df_original.sort_values(by='Quantity') + + for df in [df_original, df_reordered]: + df = df.set_index(['Date']) + + expected = DataFrame( + {'Quantity': np.nan}, + index=date_range('20130901 13:00:00', + '20131205 13:00:00', freq='5D', + name='Date', closed='left')) + expected.iloc[[0, 6, 18], 0] = np.array( + [24., 6., 9.], dtype='float64') + + result1 = df.resample('5D') .sum() + assert_frame_equal(result1, expected) + + df_sorted = df.sort_index() + result2 = df_sorted.groupby(pd.TimeGrouper(freq='5D')).sum() + assert_frame_equal(result2, expected) + + result3 = df.groupby(pd.TimeGrouper(freq='5D')).sum() + assert_frame_equal(result3, expected) + + def test_groupby_with_timegrouper_methods(self): + # GH 3881 + # make sure API of timegrouper conforms + + df_original = pd.DataFrame({ + 'Branch': 'A A A A A B'.split(), + 'Buyer': 'Carl Mark Carl Joe Joe Carl'.split(), + 'Quantity': [1, 3, 5, 8, 9, 3], + 'Date': [ + datetime(2013, 1, 1, 13, 0), + datetime(2013, 1, 1, 13, 5), + datetime(2013, 10, 1, 20, 0), + datetime(2013, 10, 2, 10, 0), + datetime(2013, 12, 2, 12, 0), + datetime(2013, 12, 2, 14, 0), + ] + }) + + df_sorted = df_original.sort_values(by='Quantity', ascending=False) + + for df in [df_original, df_sorted]: + df = df.set_index('Date', drop=False) + g = df.groupby(pd.TimeGrouper('6M')) + self.assertTrue(g.group_keys) + self.assertTrue(isinstance(g.grouper, pd.core.groupby.BinGrouper)) + groups = g.groups + self.assertTrue(isinstance(groups, dict)) + self.assertTrue(len(groups) == 3) + + def test_timegrouper_with_reg_groups(self): + + # GH 3794 + # allow combinateion of timegrouper/reg groups + + df_original = DataFrame({ + 'Branch': 'A A A A A A A B'.split(), + 'Buyer': 'Carl Mark Carl Carl Joe Joe Joe Carl'.split(), + 'Quantity': [1, 3, 5, 1, 8, 1, 9, 3], + 'Date': [ + datetime(2013, 1, 1, 13, 0), + datetime(2013, 1, 1, 13, 5), + datetime(2013, 10, 1, 20, 0), + datetime(2013, 10, 2, 10, 0), + datetime(2013, 10, 1, 20, 0), + datetime(2013, 10, 2, 10, 0), + datetime(2013, 12, 2, 12, 0), + datetime(2013, 12, 2, 14, 0), + ] + }).set_index('Date') + + df_sorted = df_original.sort_values(by='Quantity', ascending=False) + + for df in [df_original, df_sorted]: + expected = DataFrame({ + 'Buyer': 'Carl Joe Mark'.split(), + 'Quantity': [10, 18, 3], + 'Date': [ + datetime(2013, 12, 31, 0, 0), + datetime(2013, 12, 31, 0, 0), + datetime(2013, 12, 31, 0, 0), + ] + }).set_index(['Date', 'Buyer']) + + result = df.groupby([pd.Grouper(freq='A'), 'Buyer']).sum() + assert_frame_equal(result, expected) + + expected = DataFrame({ + 'Buyer': 'Carl Mark Carl Joe'.split(), + 'Quantity': [1, 3, 9, 18], + 'Date': [ + datetime(2013, 1, 1, 0, 0), + datetime(2013, 1, 1, 0, 0), + datetime(2013, 7, 1, 0, 0), + datetime(2013, 7, 1, 0, 0), + ] + }).set_index(['Date', 'Buyer']) + result = df.groupby([pd.Grouper(freq='6MS'), 'Buyer']).sum() + assert_frame_equal(result, expected) + + df_original = DataFrame({ + 'Branch': 'A A A A A A A B'.split(), + 'Buyer': 'Carl Mark Carl Carl Joe Joe Joe Carl'.split(), + 'Quantity': [1, 3, 5, 1, 8, 1, 9, 3], + 'Date': [ + datetime(2013, 10, 1, 13, 0), + datetime(2013, 10, 1, 13, 5), + datetime(2013, 10, 1, 20, 0), + datetime(2013, 10, 2, 10, 0), + datetime(2013, 10, 1, 20, 0), + datetime(2013, 10, 2, 10, 0), + datetime(2013, 10, 2, 12, 0), + datetime(2013, 10, 2, 14, 0), + ] + }).set_index('Date') + + df_sorted = df_original.sort_values(by='Quantity', ascending=False) + for df in [df_original, df_sorted]: + + expected = DataFrame({ + 'Buyer': 'Carl Joe Mark Carl Joe'.split(), + 'Quantity': [6, 8, 3, 4, 10], + 'Date': [ + datetime(2013, 10, 1, 0, 0), + datetime(2013, 10, 1, 0, 0), + datetime(2013, 10, 1, 0, 0), + datetime(2013, 10, 2, 0, 0), + datetime(2013, 10, 2, 0, 0), + ] + }).set_index(['Date', 'Buyer']) + + result = df.groupby([pd.Grouper(freq='1D'), 'Buyer']).sum() + assert_frame_equal(result, expected) + + result = df.groupby([pd.Grouper(freq='1M'), 'Buyer']).sum() + expected = DataFrame({ + 'Buyer': 'Carl Joe Mark'.split(), + 'Quantity': [10, 18, 3], + 'Date': [ + datetime(2013, 10, 31, 0, 0), + datetime(2013, 10, 31, 0, 0), + datetime(2013, 10, 31, 0, 0), + ] + }).set_index(['Date', 'Buyer']) + assert_frame_equal(result, expected) + + # passing the name + df = df.reset_index() + result = df.groupby([pd.Grouper(freq='1M', key='Date'), 'Buyer' + ]).sum() + assert_frame_equal(result, expected) + + with self.assertRaises(KeyError): + df.groupby([pd.Grouper(freq='1M', key='foo'), 'Buyer']).sum() + + # passing the level + df = df.set_index('Date') + result = df.groupby([pd.Grouper(freq='1M', level='Date'), 'Buyer' + ]).sum() + assert_frame_equal(result, expected) + result = df.groupby([pd.Grouper(freq='1M', level=0), 'Buyer']).sum( + ) + assert_frame_equal(result, expected) + + with self.assertRaises(ValueError): + df.groupby([pd.Grouper(freq='1M', level='foo'), + 'Buyer']).sum() + + # multi names + df = df.copy() + df['Date'] = df.index + pd.offsets.MonthEnd(2) + result = df.groupby([pd.Grouper(freq='1M', key='Date'), 'Buyer' + ]).sum() + expected = DataFrame({ + 'Buyer': 'Carl Joe Mark'.split(), + 'Quantity': [10, 18, 3], + 'Date': [ + datetime(2013, 11, 30, 0, 0), + datetime(2013, 11, 30, 0, 0), + datetime(2013, 11, 30, 0, 0), + ] + }).set_index(['Date', 'Buyer']) + assert_frame_equal(result, expected) + + # error as we have both a level and a name! + with self.assertRaises(ValueError): + df.groupby([pd.Grouper(freq='1M', key='Date', + level='Date'), 'Buyer']).sum() + + # single groupers + expected = DataFrame({'Quantity': [31], + 'Date': [datetime(2013, 10, 31, 0, 0) + ]}).set_index('Date') + result = df.groupby(pd.Grouper(freq='1M')).sum() + assert_frame_equal(result, expected) + + result = df.groupby([pd.Grouper(freq='1M')]).sum() + assert_frame_equal(result, expected) + + expected = DataFrame({'Quantity': [31], + 'Date': [datetime(2013, 11, 30, 0, 0) + ]}).set_index('Date') + result = df.groupby(pd.Grouper(freq='1M', key='Date')).sum() + assert_frame_equal(result, expected) + + result = df.groupby([pd.Grouper(freq='1M', key='Date')]).sum() + assert_frame_equal(result, expected) + + # GH 6764 multiple grouping with/without sort + df = DataFrame({ + 'date': pd.to_datetime([ + '20121002', '20121007', '20130130', '20130202', '20130305', + '20121002', '20121207', '20130130', '20130202', '20130305', + '20130202', '20130305' + ]), + 'user_id': [1, 1, 1, 1, 1, 3, 3, 3, 5, 5, 5, 5], + 'whole_cost': [1790, 364, 280, 259, 201, 623, 90, 312, 359, 301, + 359, 801], + 'cost1': [12, 15, 10, 24, 39, 1, 0, 90, 45, 34, 1, 12] + }).set_index('date') + + for freq in ['D', 'M', 'A', 'Q-APR']: + expected = df.groupby('user_id')[ + 'whole_cost'].resample( + freq).sum().dropna().reorder_levels( + ['date', 'user_id']).sort_index().astype('int64') + expected.name = 'whole_cost' + + result1 = df.sort_index().groupby([pd.TimeGrouper(freq=freq), + 'user_id'])['whole_cost'].sum() + assert_series_equal(result1, expected) + + result2 = df.groupby([pd.TimeGrouper(freq=freq), 'user_id'])[ + 'whole_cost'].sum() + assert_series_equal(result2, expected) + + def test_timegrouper_get_group(self): + # GH 6914 + + df_original = DataFrame({ + 'Buyer': 'Carl Joe Joe Carl Joe Carl'.split(), + 'Quantity': [18, 3, 5, 1, 9, 3], + 'Date': [datetime(2013, 9, 1, 13, 0), + datetime(2013, 9, 1, 13, 5), + datetime(2013, 10, 1, 20, 0), + datetime(2013, 10, 3, 10, 0), + datetime(2013, 12, 2, 12, 0), + datetime(2013, 9, 2, 14, 0), ] + }) + df_reordered = df_original.sort_values(by='Quantity') + + # single grouping + expected_list = [df_original.iloc[[0, 1, 5]], df_original.iloc[[2, 3]], + df_original.iloc[[4]]] + dt_list = ['2013-09-30', '2013-10-31', '2013-12-31'] + + for df in [df_original, df_reordered]: + grouped = df.groupby(pd.Grouper(freq='M', key='Date')) + for t, expected in zip(dt_list, expected_list): + dt = pd.Timestamp(t) + result = grouped.get_group(dt) + assert_frame_equal(result, expected) + + # multiple grouping + expected_list = [df_original.iloc[[1]], df_original.iloc[[3]], + df_original.iloc[[4]]] + g_list = [('Joe', '2013-09-30'), ('Carl', '2013-10-31'), + ('Joe', '2013-12-31')] + + for df in [df_original, df_reordered]: + grouped = df.groupby(['Buyer', pd.Grouper(freq='M', key='Date')]) + for (b, t), expected in zip(g_list, expected_list): + dt = pd.Timestamp(t) + result = grouped.get_group((b, dt)) + assert_frame_equal(result, expected) + + # with index + df_original = df_original.set_index('Date') + df_reordered = df_original.sort_values(by='Quantity') + + expected_list = [df_original.iloc[[0, 1, 5]], df_original.iloc[[2, 3]], + df_original.iloc[[4]]] + + for df in [df_original, df_reordered]: + grouped = df.groupby(pd.Grouper(freq='M')) + for t, expected in zip(dt_list, expected_list): + dt = pd.Timestamp(t) + result = grouped.get_group(dt) + assert_frame_equal(result, expected) + + def test_timegrouper_apply_return_type_series(self): + # Using `apply` with the `TimeGrouper` should give the + # same return type as an `apply` with a `Grouper`. + # Issue #11742 + df = pd.DataFrame({'date': ['10/10/2000', '11/10/2000'], + 'value': [10, 13]}) + df_dt = df.copy() + df_dt['date'] = pd.to_datetime(df_dt['date']) + + def sumfunc_series(x): + return pd.Series([x['value'].sum()], ('sum',)) + + expected = df.groupby(pd.Grouper(key='date')).apply(sumfunc_series) + result = (df_dt.groupby(pd.TimeGrouper(freq='M', key='date')) + .apply(sumfunc_series)) + assert_frame_equal(result.reset_index(drop=True), + expected.reset_index(drop=True)) + + def test_timegrouper_apply_return_type_value(self): + # Using `apply` with the `TimeGrouper` should give the + # same return type as an `apply` with a `Grouper`. + # Issue #11742 + df = pd.DataFrame({'date': ['10/10/2000', '11/10/2000'], + 'value': [10, 13]}) + df_dt = df.copy() + df_dt['date'] = pd.to_datetime(df_dt['date']) + + def sumfunc_value(x): + return x.value.sum() + + expected = df.groupby(pd.Grouper(key='date')).apply(sumfunc_value) + result = (df_dt.groupby(pd.TimeGrouper(freq='M', key='date')) + .apply(sumfunc_value)) + assert_series_equal(result.reset_index(drop=True), + expected.reset_index(drop=True)) + + def test_groupby_groups_datetimeindex(self): + # #1430 + from pandas.tseries.api import DatetimeIndex + periods = 1000 + ind = DatetimeIndex(start='2012/1/1', freq='5min', periods=periods) + df = DataFrame({'high': np.arange(periods), + 'low': np.arange(periods)}, index=ind) + grouped = df.groupby(lambda x: datetime(x.year, x.month, x.day)) + + # it works! + groups = grouped.groups + tm.assertIsInstance(list(groups.keys())[0], datetime) + + # GH 11442 + index = pd.date_range('2015/01/01', periods=5, name='date') + df = pd.DataFrame({'A': [5, 6, 7, 8, 9], + 'B': [1, 2, 3, 4, 5]}, index=index) + result = df.groupby(level='date').groups + dates = ['2015-01-05', '2015-01-04', '2015-01-03', + '2015-01-02', '2015-01-01'] + expected = {pd.Timestamp(date): pd.DatetimeIndex([date], name='date') + for date in dates} + tm.assert_dict_equal(result, expected) + + grouped = df.groupby(level='date') + for date in dates: + result = grouped.get_group(date) + data = [[df.loc[date, 'A'], df.loc[date, 'B']]] + expected_index = pd.DatetimeIndex([date], name='date') + expected = pd.DataFrame(data, + columns=list('AB'), + index=expected_index) + tm.assert_frame_equal(result, expected) + + def test_groupby_groups_datetimeindex_tz(self): + # GH 3950 + dates = ['2011-07-19 07:00:00', '2011-07-19 08:00:00', + '2011-07-19 09:00:00', '2011-07-19 07:00:00', + '2011-07-19 08:00:00', '2011-07-19 09:00:00'] + df = DataFrame({'label': ['a', 'a', 'a', 'b', 'b', 'b'], + 'datetime': dates, + 'value1': np.arange(6, dtype='int64'), + 'value2': [1, 2] * 3}) + df['datetime'] = df['datetime'].apply( + lambda d: Timestamp(d, tz='US/Pacific')) + + exp_idx1 = pd.DatetimeIndex(['2011-07-19 07:00:00', + '2011-07-19 07:00:00', + '2011-07-19 08:00:00', + '2011-07-19 08:00:00', + '2011-07-19 09:00:00', + '2011-07-19 09:00:00'], + tz='US/Pacific', name='datetime') + exp_idx2 = Index(['a', 'b'] * 3, name='label') + exp_idx = MultiIndex.from_arrays([exp_idx1, exp_idx2]) + expected = DataFrame({'value1': [0, 3, 1, 4, 2, 5], + 'value2': [1, 2, 2, 1, 1, 2]}, + index=exp_idx, columns=['value1', 'value2']) + + result = df.groupby(['datetime', 'label']).sum() + assert_frame_equal(result, expected) + + # by level + didx = pd.DatetimeIndex(dates, tz='Asia/Tokyo') + df = DataFrame({'value1': np.arange(6, dtype='int64'), + 'value2': [1, 2, 3, 1, 2, 3]}, + index=didx) + + exp_idx = pd.DatetimeIndex(['2011-07-19 07:00:00', + '2011-07-19 08:00:00', + '2011-07-19 09:00:00'], tz='Asia/Tokyo') + expected = DataFrame({'value1': [3, 5, 7], 'value2': [2, 4, 6]}, + index=exp_idx, columns=['value1', 'value2']) + + result = df.groupby(level=0).sum() + assert_frame_equal(result, expected) + + def test_frame_datetime64_handling_groupby(self): + # it works! + df = DataFrame([(3, np.datetime64('2012-07-03')), + (3, np.datetime64('2012-07-04'))], + columns=['a', 'date']) + result = df.groupby('a').first() + self.assertEqual(result['date'][3], Timestamp('2012-07-03')) + + def test_groupby_multi_timezone(self): + + # combining multiple / different timezones yields UTC + + data = """0,2000-01-28 16:47:00,America/Chicago +1,2000-01-29 16:48:00,America/Chicago +2,2000-01-30 16:49:00,America/Los_Angeles +3,2000-01-31 16:50:00,America/Chicago +4,2000-01-01 16:50:00,America/New_York""" + + df = pd.read_csv(StringIO(data), header=None, + names=['value', 'date', 'tz']) + result = df.groupby('tz').date.apply( + lambda x: pd.to_datetime(x).dt.tz_localize(x.name)) + + expected = Series([Timestamp('2000-01-28 16:47:00-0600', + tz='America/Chicago'), + Timestamp('2000-01-29 16:48:00-0600', + tz='America/Chicago'), + Timestamp('2000-01-30 16:49:00-0800', + tz='America/Los_Angeles'), + Timestamp('2000-01-31 16:50:00-0600', + tz='America/Chicago'), + Timestamp('2000-01-01 16:50:00-0500', + tz='America/New_York')], + name='date', + dtype=object) + assert_series_equal(result, expected) + + tz = 'America/Chicago' + res_values = df.groupby('tz').date.get_group(tz) + result = pd.to_datetime(res_values).dt.tz_localize(tz) + exp_values = Series(['2000-01-28 16:47:00', '2000-01-29 16:48:00', + '2000-01-31 16:50:00'], + index=[0, 1, 3], name='date') + expected = pd.to_datetime(exp_values).dt.tz_localize(tz) + assert_series_equal(result, expected) + + def test_groupby_groups_periods(self): + dates = ['2011-07-19 07:00:00', '2011-07-19 08:00:00', + '2011-07-19 09:00:00', '2011-07-19 07:00:00', + '2011-07-19 08:00:00', '2011-07-19 09:00:00'] + df = DataFrame({'label': ['a', 'a', 'a', 'b', 'b', 'b'], + 'period': [pd.Period(d, freq='H') for d in dates], + 'value1': np.arange(6, dtype='int64'), + 'value2': [1, 2] * 3}) + + exp_idx1 = pd.PeriodIndex(['2011-07-19 07:00:00', + '2011-07-19 07:00:00', + '2011-07-19 08:00:00', + '2011-07-19 08:00:00', + '2011-07-19 09:00:00', + '2011-07-19 09:00:00'], + freq='H', name='period') + exp_idx2 = Index(['a', 'b'] * 3, name='label') + exp_idx = MultiIndex.from_arrays([exp_idx1, exp_idx2]) + expected = DataFrame({'value1': [0, 3, 1, 4, 2, 5], + 'value2': [1, 2, 2, 1, 1, 2]}, + index=exp_idx, columns=['value1', 'value2']) + + result = df.groupby(['period', 'label']).sum() + assert_frame_equal(result, expected) + + # by level + didx = pd.PeriodIndex(dates, freq='H') + df = DataFrame({'value1': np.arange(6, dtype='int64'), + 'value2': [1, 2, 3, 1, 2, 3]}, + index=didx) + + exp_idx = pd.PeriodIndex(['2011-07-19 07:00:00', + '2011-07-19 08:00:00', + '2011-07-19 09:00:00'], freq='H') + expected = DataFrame({'value1': [3, 5, 7], 'value2': [2, 4, 6]}, + index=exp_idx, columns=['value1', 'value2']) + + result = df.groupby(level=0).sum() + assert_frame_equal(result, expected) + + def test_groupby_first_datetime64(self): + df = DataFrame([(1, 1351036800000000000), (2, 1351036800000000000)]) + df[1] = df[1].view('M8[ns]') + + self.assertTrue(issubclass(df[1].dtype.type, np.datetime64)) + + result = df.groupby(level=0).first() + got_dt = result[1].dtype + self.assertTrue(issubclass(got_dt.type, np.datetime64)) + + result = df[1].groupby(level=0).first() + got_dt = result.dtype + self.assertTrue(issubclass(got_dt.type, np.datetime64)) + + def test_groupby_max_datetime64(self): + # GH 5869 + # datetimelike dtype conversion from int + df = DataFrame(dict(A=Timestamp('20130101'), B=np.arange(5))) + expected = df.groupby('A')['A'].apply(lambda x: x.max()) + result = df.groupby('A')['A'].max() + assert_series_equal(result, expected) + + def test_groupby_datetime64_32_bit(self): + # GH 6410 / numpy 4328 + # 32-bit under 1.9-dev indexing issue + + df = DataFrame({"A": range(2), "B": [pd.Timestamp('2000-01-1')] * 2}) + result = df.groupby("A")["B"].transform(min) + expected = Series([pd.Timestamp('2000-01-1')] * 2, name='B') + assert_series_equal(result, expected) + + def test_groupby_with_timezone_selection(self): + # GH 11616 + # Test that column selection returns output in correct timezone. + np.random.seed(42) + df = pd.DataFrame({ + 'factor': np.random.randint(0, 3, size=60), + 'time': pd.date_range('01/01/2000 00:00', periods=60, + freq='s', tz='UTC') + }) + df1 = df.groupby('factor').max()['time'] + df2 = df.groupby('factor')['time'].max() + tm.assert_series_equal(df1, df2) + + def test_timezone_info(self): + # GH 11682 + # Timezone info lost when broadcasting scalar datetime to DataFrame + tm._skip_if_no_pytz() + import pytz + + df = pd.DataFrame({'a': [1], 'b': [datetime.now(pytz.utc)]}) + self.assertEqual(df['b'][0].tzinfo, pytz.utc) + df = pd.DataFrame({'a': [1, 2, 3]}) + df['b'] = datetime.now(pytz.utc) + self.assertEqual(df['b'][0].tzinfo, pytz.utc) + + def test_datetime_count(self): + df = DataFrame({'a': [1, 2, 3] * 2, + 'dates': pd.date_range('now', periods=6, freq='T')}) + result = df.groupby('a').dates.count() + expected = Series([ + 2, 2, 2 + ], index=Index([1, 2, 3], name='a'), name='dates') + tm.assert_series_equal(result, expected) + + def test_first_last_max_min_on_time_data(self): + # GH 10295 + # Verify that NaT is not in the result of max, min, first and last on + # Dataframe with datetime or timedelta values. + from datetime import timedelta as td + df_test = DataFrame( + {'dt': [nan, '2015-07-24 10:10', '2015-07-25 11:11', + '2015-07-23 12:12', nan], + 'td': [nan, td(days=1), td(days=2), td(days=3), nan]}) + df_test.dt = pd.to_datetime(df_test.dt) + df_test['group'] = 'A' + df_ref = df_test[df_test.dt.notnull()] + + grouped_test = df_test.groupby('group') + grouped_ref = df_ref.groupby('group') + + assert_frame_equal(grouped_ref.max(), grouped_test.max()) + assert_frame_equal(grouped_ref.min(), grouped_test.min()) + assert_frame_equal(grouped_ref.first(), grouped_test.first()) + assert_frame_equal(grouped_ref.last(), grouped_test.last()) diff --git a/pandas/tests/groupby/test_transform.py b/pandas/tests/groupby/test_transform.py new file mode 100644 index 0000000000000..cf5e9eb26ff13 --- /dev/null +++ b/pandas/tests/groupby/test_transform.py @@ -0,0 +1,494 @@ +""" test with the .transform """ + +import numpy as np +import pandas as pd +from pandas.util import testing as tm +from pandas import Series, DataFrame, Timestamp, MultiIndex, concat +from pandas.types.common import _ensure_platform_int +from .common import MixIn, assert_fp_equal + +from pandas.util.testing import assert_frame_equal, assert_series_equal +from pandas.core.groupby import DataError +from pandas.core.config import option_context + + +class TestGroupBy(MixIn, tm.TestCase): + + def test_transform(self): + data = Series(np.arange(9) // 3, index=np.arange(9)) + + index = np.arange(9) + np.random.shuffle(index) + data = data.reindex(index) + + grouped = data.groupby(lambda x: x // 3) + + transformed = grouped.transform(lambda x: x * x.sum()) + self.assertEqual(transformed[7], 12) + + # GH 8046 + # make sure that we preserve the input order + + df = DataFrame( + np.arange(6, dtype='int64').reshape( + 3, 2), columns=["a", "b"], index=[0, 2, 1]) + key = [0, 0, 1] + expected = df.sort_index().groupby(key).transform( + lambda x: x - x.mean()).groupby(key).mean() + result = df.groupby(key).transform(lambda x: x - x.mean()).groupby( + key).mean() + assert_frame_equal(result, expected) + + def demean(arr): + return arr - arr.mean() + + people = DataFrame(np.random.randn(5, 5), + columns=['a', 'b', 'c', 'd', 'e'], + index=['Joe', 'Steve', 'Wes', 'Jim', 'Travis']) + key = ['one', 'two', 'one', 'two', 'one'] + result = people.groupby(key).transform(demean).groupby(key).mean() + expected = people.groupby(key).apply(demean).groupby(key).mean() + assert_frame_equal(result, expected) + + # GH 8430 + df = tm.makeTimeDataFrame() + g = df.groupby(pd.TimeGrouper('M')) + g.transform(lambda x: x - 1) + + # GH 9700 + df = DataFrame({'a': range(5, 10), 'b': range(5)}) + result = df.groupby('a').transform(max) + expected = DataFrame({'b': range(5)}) + tm.assert_frame_equal(result, expected) + + def test_transform_fast(self): + + df = DataFrame({'id': np.arange(100000) / 3, + 'val': np.random.randn(100000)}) + + grp = df.groupby('id')['val'] + + values = np.repeat(grp.mean().values, + _ensure_platform_int(grp.count().values)) + expected = pd.Series(values, index=df.index, name='val') + + result = grp.transform(np.mean) + assert_series_equal(result, expected) + + result = grp.transform('mean') + assert_series_equal(result, expected) + + # GH 12737 + df = pd.DataFrame({'grouping': [0, 1, 1, 3], 'f': [1.1, 2.1, 3.1, 4.5], + 'd': pd.date_range('2014-1-1', '2014-1-4'), + 'i': [1, 2, 3, 4]}, + columns=['grouping', 'f', 'i', 'd']) + result = df.groupby('grouping').transform('first') + + dates = [pd.Timestamp('2014-1-1'), pd.Timestamp('2014-1-2'), + pd.Timestamp('2014-1-2'), pd.Timestamp('2014-1-4')] + expected = pd.DataFrame({'f': [1.1, 2.1, 2.1, 4.5], + 'd': dates, + 'i': [1, 2, 2, 4]}, + columns=['f', 'i', 'd']) + assert_frame_equal(result, expected) + + # selection + result = df.groupby('grouping')[['f', 'i']].transform('first') + expected = expected[['f', 'i']] + assert_frame_equal(result, expected) + + # dup columns + df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=['g', 'a', 'a']) + result = df.groupby('g').transform('first') + expected = df.drop('g', axis=1) + assert_frame_equal(result, expected) + + def test_transform_broadcast(self): + grouped = self.ts.groupby(lambda x: x.month) + result = grouped.transform(np.mean) + + self.assert_index_equal(result.index, self.ts.index) + for _, gp in grouped: + assert_fp_equal(result.reindex(gp.index), gp.mean()) + + grouped = self.tsframe.groupby(lambda x: x.month) + result = grouped.transform(np.mean) + self.assert_index_equal(result.index, self.tsframe.index) + for _, gp in grouped: + agged = gp.mean() + res = result.reindex(gp.index) + for col in self.tsframe: + assert_fp_equal(res[col], agged[col]) + + # group columns + grouped = self.tsframe.groupby({'A': 0, 'B': 0, 'C': 1, 'D': 1}, + axis=1) + result = grouped.transform(np.mean) + self.assert_index_equal(result.index, self.tsframe.index) + self.assert_index_equal(result.columns, self.tsframe.columns) + for _, gp in grouped: + agged = gp.mean(1) + res = result.reindex(columns=gp.columns) + for idx in gp.index: + assert_fp_equal(res.xs(idx), agged[idx]) + + def test_transform_axis(self): + + # make sure that we are setting the axes + # correctly when on axis=0 or 1 + # in the presence of a non-monotonic indexer + # GH12713 + + base = self.tsframe.iloc[0:5] + r = len(base.index) + c = len(base.columns) + tso = DataFrame(np.random.randn(r, c), + index=base.index, + columns=base.columns, + dtype='float64') + # monotonic + ts = tso + grouped = ts.groupby(lambda x: x.weekday()) + result = ts - grouped.transform('mean') + expected = grouped.apply(lambda x: x - x.mean()) + assert_frame_equal(result, expected) + + ts = ts.T + grouped = ts.groupby(lambda x: x.weekday(), axis=1) + result = ts - grouped.transform('mean') + expected = grouped.apply(lambda x: (x.T - x.mean(1)).T) + assert_frame_equal(result, expected) + + # non-monotonic + ts = tso.iloc[[1, 0] + list(range(2, len(base)))] + grouped = ts.groupby(lambda x: x.weekday()) + result = ts - grouped.transform('mean') + expected = grouped.apply(lambda x: x - x.mean()) + assert_frame_equal(result, expected) + + ts = ts.T + grouped = ts.groupby(lambda x: x.weekday(), axis=1) + result = ts - grouped.transform('mean') + expected = grouped.apply(lambda x: (x.T - x.mean(1)).T) + assert_frame_equal(result, expected) + + def test_transform_dtype(self): + # GH 9807 + # Check transform dtype output is preserved + df = DataFrame([[1, 3], [2, 3]]) + result = df.groupby(1).transform('mean') + expected = DataFrame([[1.5], [1.5]]) + assert_frame_equal(result, expected) + + def test_transform_bug(self): + # GH 5712 + # transforming on a datetime column + df = DataFrame(dict(A=Timestamp('20130101'), B=np.arange(5))) + result = df.groupby('A')['B'].transform( + lambda x: x.rank(ascending=False)) + expected = Series(np.arange(5, 0, step=-1), name='B') + assert_series_equal(result, expected) + + def test_transform_multiple(self): + grouped = self.ts.groupby([lambda x: x.year, lambda x: x.month]) + + grouped.transform(lambda x: x * 2) + grouped.transform(np.mean) + + def test_dispatch_transform(self): + df = self.tsframe[::5].reindex(self.tsframe.index) + + grouped = df.groupby(lambda x: x.month) + + filled = grouped.fillna(method='pad') + fillit = lambda x: x.fillna(method='pad') + expected = df.groupby(lambda x: x.month).transform(fillit) + assert_frame_equal(filled, expected) + + def test_transform_select_columns(self): + f = lambda x: x.mean() + result = self.df.groupby('A')['C', 'D'].transform(f) + + selection = self.df[['C', 'D']] + expected = selection.groupby(self.df['A']).transform(f) + + assert_frame_equal(result, expected) + + def test_transform_exclude_nuisance(self): + + # this also tests orderings in transform between + # series/frame to make sure it's consistent + expected = {} + grouped = self.df.groupby('A') + expected['C'] = grouped['C'].transform(np.mean) + expected['D'] = grouped['D'].transform(np.mean) + expected = DataFrame(expected) + result = self.df.groupby('A').transform(np.mean) + + assert_frame_equal(result, expected) + + def test_transform_function_aliases(self): + result = self.df.groupby('A').transform('mean') + expected = self.df.groupby('A').transform(np.mean) + assert_frame_equal(result, expected) + + result = self.df.groupby('A')['C'].transform('mean') + expected = self.df.groupby('A')['C'].transform(np.mean) + assert_series_equal(result, expected) + + def test_series_fast_transform_date(self): + # GH 13191 + df = pd.DataFrame({'grouping': [np.nan, 1, 1, 3], + 'd': pd.date_range('2014-1-1', '2014-1-4')}) + result = df.groupby('grouping')['d'].transform('first') + dates = [pd.NaT, pd.Timestamp('2014-1-2'), pd.Timestamp('2014-1-2'), + pd.Timestamp('2014-1-4')] + expected = pd.Series(dates, name='d') + assert_series_equal(result, expected) + + def test_transform_length(self): + # GH 9697 + df = pd.DataFrame({'col1': [1, 1, 2, 2], 'col2': [1, 2, 3, np.nan]}) + expected = pd.Series([3.0] * 4) + + def nsum(x): + return np.nansum(x) + + results = [df.groupby('col1').transform(sum)['col2'], + df.groupby('col1')['col2'].transform(sum), + df.groupby('col1').transform(nsum)['col2'], + df.groupby('col1')['col2'].transform(nsum)] + for result in results: + assert_series_equal(result, expected, check_names=False) + + def test_transform_coercion(self): + + # 14457 + # when we are transforming be sure to not coerce + # via assignment + df = pd.DataFrame(dict(A=['a', 'a'], B=[0, 1])) + g = df.groupby('A') + + expected = g.transform(np.mean) + result = g.transform(lambda x: np.mean(x)) + assert_frame_equal(result, expected) + + def test_groupby_transform_with_int(self): + + # GH 3740, make sure that we might upcast on item-by-item transform + + # floats + df = DataFrame(dict(A=[1, 1, 1, 2, 2, 2], B=Series(1, dtype='float64'), + C=Series( + [1, 2, 3, 1, 2, 3], dtype='float64'), D='foo')) + with np.errstate(all='ignore'): + result = df.groupby('A').transform( + lambda x: (x - x.mean()) / x.std()) + expected = DataFrame(dict(B=np.nan, C=Series( + [-1, 0, 1, -1, 0, 1], dtype='float64'))) + assert_frame_equal(result, expected) + + # int case + df = DataFrame(dict(A=[1, 1, 1, 2, 2, 2], B=1, + C=[1, 2, 3, 1, 2, 3], D='foo')) + with np.errstate(all='ignore'): + result = df.groupby('A').transform( + lambda x: (x - x.mean()) / x.std()) + expected = DataFrame(dict(B=np.nan, C=[-1, 0, 1, -1, 0, 1])) + assert_frame_equal(result, expected) + + # int that needs float conversion + s = Series([2, 3, 4, 10, 5, -1]) + df = DataFrame(dict(A=[1, 1, 1, 2, 2, 2], B=1, C=s, D='foo')) + with np.errstate(all='ignore'): + result = df.groupby('A').transform( + lambda x: (x - x.mean()) / x.std()) + + s1 = s.iloc[0:3] + s1 = (s1 - s1.mean()) / s1.std() + s2 = s.iloc[3:6] + s2 = (s2 - s2.mean()) / s2.std() + expected = DataFrame(dict(B=np.nan, C=concat([s1, s2]))) + assert_frame_equal(result, expected) + + # int downcasting + result = df.groupby('A').transform(lambda x: x * 2 / 2) + expected = DataFrame(dict(B=1, C=[2, 3, 4, 10, 5, -1])) + assert_frame_equal(result, expected) + + def test_groupby_transform_with_nan_group(self): + # GH 9941 + df = pd.DataFrame({'a': range(10), + 'b': [1, 1, 2, 3, np.nan, 4, 4, 5, 5, 5]}) + result = df.groupby(df.b)['a'].transform(max) + expected = pd.Series([1., 1., 2., 3., np.nan, 6., 6., 9., 9., 9.], + name='a') + assert_series_equal(result, expected) + + def test_transform_mixed_type(self): + index = MultiIndex.from_arrays([[0, 0, 0, 1, 1, 1], [1, 2, 3, 1, 2, 3] + ]) + df = DataFrame({'d': [1., 1., 1., 2., 2., 2.], + 'c': np.tile(['a', 'b', 'c'], 2), + 'v': np.arange(1., 7.)}, index=index) + + def f(group): + group['g'] = group['d'] * 2 + return group[:1] + + grouped = df.groupby('c') + result = grouped.apply(f) + + self.assertEqual(result['d'].dtype, np.float64) + + # this is by definition a mutating operation! + with option_context('mode.chained_assignment', None): + for key, group in grouped: + res = f(group) + assert_frame_equal(res, result.loc[key]) + + def test_cython_group_transform_algos(self): + # GH 4095 + dtypes = [np.int8, np.int16, np.int32, np.int64, np.uint8, np.uint32, + np.uint64, np.float32, np.float64] + + ops = [(pd.algos.group_cumprod_float64, np.cumproduct, [np.float64]), + (pd.algos.group_cumsum, np.cumsum, dtypes)] + + is_datetimelike = False + for pd_op, np_op, dtypes in ops: + for dtype in dtypes: + data = np.array([[1], [2], [3], [4]], dtype=dtype) + ans = np.zeros_like(data) + labels = np.array([0, 0, 0, 0], dtype=np.int64) + pd_op(ans, data, labels, is_datetimelike) + self.assert_numpy_array_equal(np_op(data), ans[:, 0], + check_dtype=False) + + # with nans + labels = np.array([0, 0, 0, 0, 0], dtype=np.int64) + + data = np.array([[1], [2], [3], [np.nan], [4]], dtype='float64') + actual = np.zeros_like(data) + actual.fill(np.nan) + pd.algos.group_cumprod_float64(actual, data, labels, is_datetimelike) + expected = np.array([1, 2, 6, np.nan, 24], dtype='float64') + self.assert_numpy_array_equal(actual[:, 0], expected) + + actual = np.zeros_like(data) + actual.fill(np.nan) + pd.algos.group_cumsum(actual, data, labels, is_datetimelike) + expected = np.array([1, 3, 6, np.nan, 10], dtype='float64') + self.assert_numpy_array_equal(actual[:, 0], expected) + + # timedelta + is_datetimelike = True + data = np.array([np.timedelta64(1, 'ns')] * 5, dtype='m8[ns]')[:, None] + actual = np.zeros_like(data, dtype='int64') + pd.algos.group_cumsum(actual, data.view('int64'), labels, + is_datetimelike) + expected = np.array([np.timedelta64(1, 'ns'), np.timedelta64( + 2, 'ns'), np.timedelta64(3, 'ns'), np.timedelta64(4, 'ns'), + np.timedelta64(5, 'ns')]) + self.assert_numpy_array_equal(actual[:, 0].view('m8[ns]'), expected) + + def test_cython_transform(self): + # GH 4095 + ops = [(('cumprod', + ()), lambda x: x.cumprod()), (('cumsum', ()), + lambda x: x.cumsum()), + (('shift', (-1, )), + lambda x: x.shift(-1)), (('shift', + (1, )), lambda x: x.shift())] + + s = Series(np.random.randn(1000)) + s_missing = s.copy() + s_missing.iloc[2:10] = np.nan + labels = np.random.randint(0, 50, size=1000).astype(float) + + # series + for (op, args), targop in ops: + for data in [s, s_missing]: + # print(data.head()) + expected = data.groupby(labels).transform(targop) + + tm.assert_series_equal(expected, + data.groupby(labels).transform(op, + *args)) + tm.assert_series_equal(expected, getattr( + data.groupby(labels), op)(*args)) + + strings = list('qwertyuiopasdfghjklz') + strings_missing = strings[:] + strings_missing[5] = np.nan + df = DataFrame({'float': s, + 'float_missing': s_missing, + 'int': [1, 1, 1, 1, 2] * 200, + 'datetime': pd.date_range('1990-1-1', periods=1000), + 'timedelta': pd.timedelta_range(1, freq='s', + periods=1000), + 'string': strings * 50, + 'string_missing': strings_missing * 50}) + df['cat'] = df['string'].astype('category') + + df2 = df.copy() + df2.index = pd.MultiIndex.from_product([range(100), range(10)]) + + # DataFrame - Single and MultiIndex, + # group by values, index level, columns + for df in [df, df2]: + for gb_target in [dict(by=labels), dict(level=0), dict(by='string') + ]: # dict(by='string_missing')]: + # dict(by=['int','string'])]: + + gb = df.groupby(**gb_target) + # whitelisted methods set the selection before applying + # bit a of hack to make sure the cythonized shift + # is equivalent to pre 0.17.1 behavior + if op == 'shift': + gb._set_group_selection() + + for (op, args), targop in ops: + if op != 'shift' and 'int' not in gb_target: + # numeric apply fastpath promotes dtype so have + # to apply seperately and concat + i = gb[['int']].apply(targop) + f = gb[['float', 'float_missing']].apply(targop) + expected = pd.concat([f, i], axis=1) + else: + expected = gb.apply(targop) + + expected = expected.sort_index(axis=1) + tm.assert_frame_equal(expected, + gb.transform(op, *args).sort_index( + axis=1)) + tm.assert_frame_equal(expected, getattr(gb, op)(*args)) + # individual columns + for c in df: + if c not in ['float', 'int', 'float_missing' + ] and op != 'shift': + self.assertRaises(DataError, gb[c].transform, op) + self.assertRaises(DataError, getattr(gb[c], op)) + else: + expected = gb[c].apply(targop) + expected.name = c + tm.assert_series_equal(expected, + gb[c].transform(op, *args)) + tm.assert_series_equal(expected, + getattr(gb[c], op)(*args)) + + def test_transform_with_non_scalar_group(self): + # GH 10165 + cols = pd.MultiIndex.from_tuples([ + ('syn', 'A'), ('mis', 'A'), ('non', 'A'), + ('syn', 'C'), ('mis', 'C'), ('non', 'C'), + ('syn', 'T'), ('mis', 'T'), ('non', 'T'), + ('syn', 'G'), ('mis', 'G'), ('non', 'G')]) + df = pd.DataFrame(np.random.randint(1, 10, (4, 12)), + columns=cols, + index=['A', 'C', 'G', 'T']) + self.assertRaisesRegexp(ValueError, 'transform must return a scalar ' + 'value for each group.*', df.groupby + (axis=1, level=1).transform, + lambda z: z.div(z.sum(axis=1), axis=0)) From fe246cc27027c7d469a5e8f946415ec2c5664d1d Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 8 Feb 2017 08:04:01 -0500 Subject: [PATCH 020/933] TST: mark gbq streaming insert tests as slow --- ci/requirements-3.4_SLOW.pip | 3 +++ pandas/io/tests/test_gbq.py | 1 + 2 files changed, 4 insertions(+) create mode 100644 ci/requirements-3.4_SLOW.pip diff --git a/ci/requirements-3.4_SLOW.pip b/ci/requirements-3.4_SLOW.pip new file mode 100644 index 0000000000000..05c938abcbab6 --- /dev/null +++ b/ci/requirements-3.4_SLOW.pip @@ -0,0 +1,3 @@ +httplib2 +google-api-python-client +oauth2client diff --git a/pandas/io/tests/test_gbq.py b/pandas/io/tests/test_gbq.py index 0507f0d89661c..457e2d218cb33 100644 --- a/pandas/io/tests/test_gbq.py +++ b/pandas/io/tests/test_gbq.py @@ -938,6 +938,7 @@ def test_upload_data_if_table_exists_replace(self): private_key=_get_private_key_path()) self.assertEqual(result['NUM_ROWS'][0], 5) + @tm.slow def test_google_upload_errors_should_raise_exception(self): destination_table = DESTINATION_TABLE + "5" From 87c2c2af6150d6e8fa8cfbc017fbfd52e7f8c5c7 Mon Sep 17 00:00:00 2001 From: Kernc Date: Wed, 8 Feb 2017 09:30:47 -0500 Subject: [PATCH 021/933] ENH: .squeeze has gained the axis parameter closes #15339 Author: Kernc Closes #15335 from kernc/squeeze_axis_param and squashes the following commits: 44d3c54 [Kernc] fixup! ENH: .squeeze accepts axis parameter cc018c9 [Kernc] ENH: .squeeze accepts axis parameter --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/compat/numpy/function.py | 7 ------- pandas/core/generic.py | 24 +++++++++++++++++++----- pandas/tests/test_generic.py | 18 ++++++++++++++---- 4 files changed, 34 insertions(+), 16 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 3f6c06e20b546..9afcf85c929a7 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -149,6 +149,7 @@ Other enhancements - ``Series/DataFrame.asfreq()`` have gained a ``fill_value`` parameter, to fill missing values (:issue:`3715`). - ``Series/DataFrame.resample.asfreq`` have gained a ``fill_value`` parameter, to fill missing values during resampling (:issue:`3715`). - ``pandas.tools.hashing`` has gained a ``hash_tuples`` routine, and ``hash_pandas_object`` has gained the ability to hash a ``MultiIndex`` (:issue:`15224`) +- ``Series/DataFrame.squeeze()`` have gained the ``axis`` parameter. (:issue:`15339`) .. _ISO 8601 duration: https://en.wikipedia.org/wiki/ISO_8601#Durations diff --git a/pandas/compat/numpy/function.py b/pandas/compat/numpy/function.py index 72e89586d0280..eb9e9ecc359b2 100644 --- a/pandas/compat/numpy/function.py +++ b/pandas/compat/numpy/function.py @@ -214,13 +214,6 @@ def validate_cum_func_with_skipna(skipna, args, kwargs, name): validate_stat_ddof_func = CompatValidator(STAT_DDOF_FUNC_DEFAULTS, method='kwargs') -# Currently, numpy (v1.11) has backwards compatibility checks -# in place so that this 'kwargs' parameter is technically -# unnecessary, but in the long-run, this will be needed. -SQUEEZE_DEFAULTS = dict(axis=None) -validate_squeeze = CompatValidator(SQUEEZE_DEFAULTS, fname='squeeze', - method='kwargs') - TAKE_DEFAULTS = OrderedDict() TAKE_DEFAULTS['out'] = None TAKE_DEFAULTS['mode'] = 'raise' diff --git a/pandas/core/generic.py b/pandas/core/generic.py index bb2664a5b8d28..228dd2acd2124 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -532,13 +532,27 @@ def pop(self, item): return result - def squeeze(self, **kwargs): - """Squeeze length 1 dimensions.""" - nv.validate_squeeze(tuple(), kwargs) + def squeeze(self, axis=None): + """ + Squeeze length 1 dimensions. + Parameters + ---------- + axis : None, integer or string axis name, optional + The axis to squeeze if 1-sized. + + .. versionadded:: 0.20.0 + + Returns + ------- + scalar if 1-sized, else original object + """ + axis = (self._AXIS_NAMES if axis is None else + (self._get_axis_number(axis),)) try: - return self.iloc[tuple([0 if len(a) == 1 else slice(None) - for a in self.axes])] + return self.iloc[ + tuple([0 if i in axis and len(a) == 1 else slice(None) + for i, a in enumerate(self.axes)])] except: return self diff --git a/pandas/tests/test_generic.py b/pandas/tests/test_generic.py index 916d7ae0b0ec4..bb341c26d454e 100644 --- a/pandas/tests/test_generic.py +++ b/pandas/tests/test_generic.py @@ -1770,6 +1770,20 @@ def test_squeeze(self): [tm.assert_series_equal(empty_series, higher_dim.squeeze()) for higher_dim in [empty_series, empty_frame, empty_panel]] + # axis argument + df = tm.makeTimeDataFrame(nper=1).iloc[:, :1] + tm.assert_equal(df.shape, (1, 1)) + tm.assert_series_equal(df.squeeze(axis=0), df.iloc[0]) + tm.assert_series_equal(df.squeeze(axis='index'), df.iloc[0]) + tm.assert_series_equal(df.squeeze(axis=1), df.iloc[:, 0]) + tm.assert_series_equal(df.squeeze(axis='columns'), df.iloc[:, 0]) + tm.assert_equal(df.squeeze(), df.iloc[0, 0]) + tm.assertRaises(ValueError, df.squeeze, axis=2) + tm.assertRaises(ValueError, df.squeeze, axis='x') + + df = tm.makeTimeDataFrame(3) + tm.assert_frame_equal(df.squeeze(axis=0), df) + def test_numpy_squeeze(self): s = tm.makeFloatSeries() tm.assert_series_equal(np.squeeze(s), s) @@ -1777,10 +1791,6 @@ def test_numpy_squeeze(self): df = tm.makeTimeDataFrame().reindex(columns=['A']) tm.assert_series_equal(np.squeeze(df), df['A']) - msg = "the 'axis' parameter is not supported" - tm.assertRaisesRegexp(ValueError, msg, - np.squeeze, s, axis=0) - def test_transpose(self): msg = (r"transpose\(\) got multiple values for " r"keyword argument 'axes'") From bf1a5961a09a6f5237a681f9f1c9a698b1a13918 Mon Sep 17 00:00:00 2001 From: Jonathan de Bruin Date: Wed, 8 Feb 2017 15:58:25 +0100 Subject: [PATCH 022/933] Small documentation fix for MultiIndex.sortlevel (#15345) * doc fix for return values of MultiIndex.sortlevel * MultiIndex.sortlevel docs improved after feedback --- pandas/indexes/multi.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pandas/indexes/multi.py b/pandas/indexes/multi.py index d2469cf1a3eed..9ab07d87fd13b 100644 --- a/pandas/indexes/multi.py +++ b/pandas/indexes/multi.py @@ -1399,7 +1399,11 @@ def sortlevel(self, level=0, ascending=True, sort_remaining=True): Returns ------- - sorted_index : MultiIndex + sorted_index : pd.MultiIndex + Resulting index + indexer : np.ndarray + Indices of output values in original index + """ from pandas.core.groupby import _indexer_from_factorized From 704cdbf830c110001062012b92302ed30d8ae127 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 8 Feb 2017 12:42:21 -0500 Subject: [PATCH 023/933] CI: use pip install for statsmodels --- ci/requirements-2.7.pip | 1 + ci/requirements-2.7.run | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/requirements-2.7.pip b/ci/requirements-2.7.pip index d16b932c8be4f..d7266fe88fb32 100644 --- a/ci/requirements-2.7.pip +++ b/ci/requirements-2.7.pip @@ -1,3 +1,4 @@ +statsmodels blosc httplib2 google-api-python-client==1.2 diff --git a/ci/requirements-2.7.run b/ci/requirements-2.7.run index b5fc919297c76..62e31e4ae24e3 100644 --- a/ci/requirements-2.7.run +++ b/ci/requirements-2.7.run @@ -18,6 +18,5 @@ patsy pymysql=0.6.3 html5lib=1.0b2 beautiful-soup=4.2.1 -statsmodels jinja2=2.8 xarray=0.8.0 From 9ba10089f1c57d6fff569af39f0e6d37ee0210f5 Mon Sep 17 00:00:00 2001 From: TrigonaMinima Date: Thu, 9 Feb 2017 03:35:43 +0530 Subject: [PATCH 024/933] TST: Period tests reorg xref #14854 --- pandas/tests/frame/test_period.py | 139 + pandas/tests/indexes/period/test_asfreq.py | 154 + .../tests/indexes/period/test_construction.py | 486 ++ .../indexes/period/test_ops.py} | 798 ++- .../indexes/period/test_partial_slicing.py | 139 + pandas/tests/indexes/period/test_period.py | 583 +- pandas/tests/indexes/period/test_setops.py | 157 + pandas/tests/indexes/period/test_tools.py | 449 ++ pandas/tests/scalar/test_period.py | 2074 +++++++ pandas/tests/series/test_period.py | 248 + pandas/tseries/tests/test_period.py | 5065 ----------------- 11 files changed, 5190 insertions(+), 5102 deletions(-) create mode 100644 pandas/tests/frame/test_period.py create mode 100644 pandas/tests/indexes/period/test_asfreq.py create mode 100644 pandas/tests/indexes/period/test_construction.py rename pandas/{tseries/tests/test_base.py => tests/indexes/period/test_ops.py} (58%) create mode 100644 pandas/tests/indexes/period/test_partial_slicing.py create mode 100644 pandas/tests/indexes/period/test_setops.py create mode 100644 pandas/tests/indexes/period/test_tools.py create mode 100644 pandas/tests/scalar/test_period.py create mode 100644 pandas/tests/series/test_period.py delete mode 100644 pandas/tseries/tests/test_period.py diff --git a/pandas/tests/frame/test_period.py b/pandas/tests/frame/test_period.py new file mode 100644 index 0000000000000..84d10a2e78d28 --- /dev/null +++ b/pandas/tests/frame/test_period.py @@ -0,0 +1,139 @@ +import numpy as np +from numpy.random import randn +from datetime import timedelta + +import pandas as pd +import pandas.util.testing as tm +from pandas import (PeriodIndex, period_range, DataFrame, date_range, + Index, to_datetime, DatetimeIndex) + + +def _permute(obj): + return obj.take(np.random.permutation(len(obj))) + + +class TestPeriodIndex(tm.TestCase): + + def setUp(self): + pass + + def test_as_frame_columns(self): + rng = period_range('1/1/2000', periods=5) + df = DataFrame(randn(10, 5), columns=rng) + + ts = df[rng[0]] + tm.assert_series_equal(ts, df.iloc[:, 0]) + + # GH # 1211 + repr(df) + + ts = df['1/1/2000'] + tm.assert_series_equal(ts, df.iloc[:, 0]) + + def test_frame_setitem(self): + rng = period_range('1/1/2000', periods=5, name='index') + df = DataFrame(randn(5, 3), index=rng) + + df['Index'] = rng + rs = Index(df['Index']) + tm.assert_index_equal(rs, rng, check_names=False) + self.assertEqual(rs.name, 'Index') + self.assertEqual(rng.name, 'index') + + rs = df.reset_index().set_index('index') + tm.assertIsInstance(rs.index, PeriodIndex) + tm.assert_index_equal(rs.index, rng) + + def test_frame_to_time_stamp(self): + K = 5 + index = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009') + df = DataFrame(randn(len(index), K), index=index) + df['mix'] = 'a' + + exp_index = date_range('1/1/2001', end='12/31/2009', freq='A-DEC') + result = df.to_timestamp('D', 'end') + tm.assert_index_equal(result.index, exp_index) + tm.assert_numpy_array_equal(result.values, df.values) + + exp_index = date_range('1/1/2001', end='1/1/2009', freq='AS-JAN') + result = df.to_timestamp('D', 'start') + tm.assert_index_equal(result.index, exp_index) + + def _get_with_delta(delta, freq='A-DEC'): + return date_range(to_datetime('1/1/2001') + delta, + to_datetime('12/31/2009') + delta, freq=freq) + + delta = timedelta(hours=23) + result = df.to_timestamp('H', 'end') + exp_index = _get_with_delta(delta) + tm.assert_index_equal(result.index, exp_index) + + delta = timedelta(hours=23, minutes=59) + result = df.to_timestamp('T', 'end') + exp_index = _get_with_delta(delta) + tm.assert_index_equal(result.index, exp_index) + + result = df.to_timestamp('S', 'end') + delta = timedelta(hours=23, minutes=59, seconds=59) + exp_index = _get_with_delta(delta) + tm.assert_index_equal(result.index, exp_index) + + # columns + df = df.T + + exp_index = date_range('1/1/2001', end='12/31/2009', freq='A-DEC') + result = df.to_timestamp('D', 'end', axis=1) + tm.assert_index_equal(result.columns, exp_index) + tm.assert_numpy_array_equal(result.values, df.values) + + exp_index = date_range('1/1/2001', end='1/1/2009', freq='AS-JAN') + result = df.to_timestamp('D', 'start', axis=1) + tm.assert_index_equal(result.columns, exp_index) + + delta = timedelta(hours=23) + result = df.to_timestamp('H', 'end', axis=1) + exp_index = _get_with_delta(delta) + tm.assert_index_equal(result.columns, exp_index) + + delta = timedelta(hours=23, minutes=59) + result = df.to_timestamp('T', 'end', axis=1) + exp_index = _get_with_delta(delta) + tm.assert_index_equal(result.columns, exp_index) + + result = df.to_timestamp('S', 'end', axis=1) + delta = timedelta(hours=23, minutes=59, seconds=59) + exp_index = _get_with_delta(delta) + tm.assert_index_equal(result.columns, exp_index) + + # invalid axis + tm.assertRaisesRegexp(ValueError, 'axis', df.to_timestamp, axis=2) + + result1 = df.to_timestamp('5t', axis=1) + result2 = df.to_timestamp('t', axis=1) + expected = pd.date_range('2001-01-01', '2009-01-01', freq='AS') + self.assertTrue(isinstance(result1.columns, DatetimeIndex)) + self.assertTrue(isinstance(result2.columns, DatetimeIndex)) + self.assert_numpy_array_equal(result1.columns.asi8, expected.asi8) + self.assert_numpy_array_equal(result2.columns.asi8, expected.asi8) + # PeriodIndex.to_timestamp always use 'infer' + self.assertEqual(result1.columns.freqstr, 'AS-JAN') + self.assertEqual(result2.columns.freqstr, 'AS-JAN') + + def test_frame_index_to_string(self): + index = PeriodIndex(['2011-1', '2011-2', '2011-3'], freq='M') + frame = DataFrame(np.random.randn(3, 4), index=index) + + # it works! + frame.to_string() + + def test_align_frame(self): + rng = period_range('1/1/2000', '1/1/2010', freq='A') + ts = DataFrame(np.random.randn(len(rng), 3), index=rng) + + result = ts + ts[::2] + expected = ts + ts + expected.values[1::2] = np.nan + tm.assert_frame_equal(result, expected) + + result = ts + _permute(ts[::2]) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/indexes/period/test_asfreq.py b/pandas/tests/indexes/period/test_asfreq.py new file mode 100644 index 0000000000000..96e3d0bbd8abc --- /dev/null +++ b/pandas/tests/indexes/period/test_asfreq.py @@ -0,0 +1,154 @@ +import numpy as np + +import pandas as pd +from pandas.util import testing as tm +from pandas import PeriodIndex, Series, DataFrame + + +class TestPeriodIndex(tm.TestCase): + + def setUp(self): + pass + + def test_asfreq(self): + pi1 = PeriodIndex(freq='A', start='1/1/2001', end='1/1/2001') + pi2 = PeriodIndex(freq='Q', start='1/1/2001', end='1/1/2001') + pi3 = PeriodIndex(freq='M', start='1/1/2001', end='1/1/2001') + pi4 = PeriodIndex(freq='D', start='1/1/2001', end='1/1/2001') + pi5 = PeriodIndex(freq='H', start='1/1/2001', end='1/1/2001 00:00') + pi6 = PeriodIndex(freq='Min', start='1/1/2001', end='1/1/2001 00:00') + pi7 = PeriodIndex(freq='S', start='1/1/2001', end='1/1/2001 00:00:00') + + self.assertEqual(pi1.asfreq('Q', 'S'), pi2) + self.assertEqual(pi1.asfreq('Q', 's'), pi2) + self.assertEqual(pi1.asfreq('M', 'start'), pi3) + self.assertEqual(pi1.asfreq('D', 'StarT'), pi4) + self.assertEqual(pi1.asfreq('H', 'beGIN'), pi5) + self.assertEqual(pi1.asfreq('Min', 'S'), pi6) + self.assertEqual(pi1.asfreq('S', 'S'), pi7) + + self.assertEqual(pi2.asfreq('A', 'S'), pi1) + self.assertEqual(pi2.asfreq('M', 'S'), pi3) + self.assertEqual(pi2.asfreq('D', 'S'), pi4) + self.assertEqual(pi2.asfreq('H', 'S'), pi5) + self.assertEqual(pi2.asfreq('Min', 'S'), pi6) + self.assertEqual(pi2.asfreq('S', 'S'), pi7) + + self.assertEqual(pi3.asfreq('A', 'S'), pi1) + self.assertEqual(pi3.asfreq('Q', 'S'), pi2) + self.assertEqual(pi3.asfreq('D', 'S'), pi4) + self.assertEqual(pi3.asfreq('H', 'S'), pi5) + self.assertEqual(pi3.asfreq('Min', 'S'), pi6) + self.assertEqual(pi3.asfreq('S', 'S'), pi7) + + self.assertEqual(pi4.asfreq('A', 'S'), pi1) + self.assertEqual(pi4.asfreq('Q', 'S'), pi2) + self.assertEqual(pi4.asfreq('M', 'S'), pi3) + self.assertEqual(pi4.asfreq('H', 'S'), pi5) + self.assertEqual(pi4.asfreq('Min', 'S'), pi6) + self.assertEqual(pi4.asfreq('S', 'S'), pi7) + + self.assertEqual(pi5.asfreq('A', 'S'), pi1) + self.assertEqual(pi5.asfreq('Q', 'S'), pi2) + self.assertEqual(pi5.asfreq('M', 'S'), pi3) + self.assertEqual(pi5.asfreq('D', 'S'), pi4) + self.assertEqual(pi5.asfreq('Min', 'S'), pi6) + self.assertEqual(pi5.asfreq('S', 'S'), pi7) + + self.assertEqual(pi6.asfreq('A', 'S'), pi1) + self.assertEqual(pi6.asfreq('Q', 'S'), pi2) + self.assertEqual(pi6.asfreq('M', 'S'), pi3) + self.assertEqual(pi6.asfreq('D', 'S'), pi4) + self.assertEqual(pi6.asfreq('H', 'S'), pi5) + self.assertEqual(pi6.asfreq('S', 'S'), pi7) + + self.assertEqual(pi7.asfreq('A', 'S'), pi1) + self.assertEqual(pi7.asfreq('Q', 'S'), pi2) + self.assertEqual(pi7.asfreq('M', 'S'), pi3) + self.assertEqual(pi7.asfreq('D', 'S'), pi4) + self.assertEqual(pi7.asfreq('H', 'S'), pi5) + self.assertEqual(pi7.asfreq('Min', 'S'), pi6) + + self.assertRaises(ValueError, pi7.asfreq, 'T', 'foo') + result1 = pi1.asfreq('3M') + result2 = pi1.asfreq('M') + expected = PeriodIndex(freq='M', start='2001-12', end='2001-12') + self.assert_numpy_array_equal(result1.asi8, expected.asi8) + self.assertEqual(result1.freqstr, '3M') + self.assert_numpy_array_equal(result2.asi8, expected.asi8) + self.assertEqual(result2.freqstr, 'M') + + def test_asfreq_nat(self): + idx = PeriodIndex(['2011-01', '2011-02', 'NaT', '2011-04'], freq='M') + result = idx.asfreq(freq='Q') + expected = PeriodIndex(['2011Q1', '2011Q1', 'NaT', '2011Q2'], freq='Q') + tm.assert_index_equal(result, expected) + + def test_asfreq_mult_pi(self): + pi = PeriodIndex(['2001-01', '2001-02', 'NaT', '2001-03'], freq='2M') + + for freq in ['D', '3D']: + result = pi.asfreq(freq) + exp = PeriodIndex(['2001-02-28', '2001-03-31', 'NaT', + '2001-04-30'], freq=freq) + self.assert_index_equal(result, exp) + self.assertEqual(result.freq, exp.freq) + + result = pi.asfreq(freq, how='S') + exp = PeriodIndex(['2001-01-01', '2001-02-01', 'NaT', + '2001-03-01'], freq=freq) + self.assert_index_equal(result, exp) + self.assertEqual(result.freq, exp.freq) + + def test_asfreq_combined_pi(self): + pi = pd.PeriodIndex(['2001-01-01 00:00', '2001-01-02 02:00', 'NaT'], + freq='H') + exp = PeriodIndex(['2001-01-01 00:00', '2001-01-02 02:00', 'NaT'], + freq='25H') + for freq, how in zip(['1D1H', '1H1D'], ['S', 'E']): + result = pi.asfreq(freq, how=how) + self.assert_index_equal(result, exp) + self.assertEqual(result.freq, exp.freq) + + for freq in ['1D1H', '1H1D']: + pi = pd.PeriodIndex(['2001-01-01 00:00', '2001-01-02 02:00', + 'NaT'], freq=freq) + result = pi.asfreq('H') + exp = PeriodIndex(['2001-01-02 00:00', '2001-01-03 02:00', 'NaT'], + freq='H') + self.assert_index_equal(result, exp) + self.assertEqual(result.freq, exp.freq) + + pi = pd.PeriodIndex(['2001-01-01 00:00', '2001-01-02 02:00', + 'NaT'], freq=freq) + result = pi.asfreq('H', how='S') + exp = PeriodIndex(['2001-01-01 00:00', '2001-01-02 02:00', 'NaT'], + freq='H') + self.assert_index_equal(result, exp) + self.assertEqual(result.freq, exp.freq) + + def test_asfreq_ts(self): + index = PeriodIndex(freq='A', start='1/1/2001', end='12/31/2010') + ts = Series(np.random.randn(len(index)), index=index) + df = DataFrame(np.random.randn(len(index), 3), index=index) + + result = ts.asfreq('D', how='end') + df_result = df.asfreq('D', how='end') + exp_index = index.asfreq('D', how='end') + self.assertEqual(len(result), len(ts)) + tm.assert_index_equal(result.index, exp_index) + tm.assert_index_equal(df_result.index, exp_index) + + result = ts.asfreq('D', how='start') + self.assertEqual(len(result), len(ts)) + tm.assert_index_equal(result.index, index.asfreq('D', how='start')) + + def test_astype_asfreq(self): + pi1 = PeriodIndex(['2011-01-01', '2011-02-01', '2011-03-01'], freq='D') + exp = PeriodIndex(['2011-01', '2011-02', '2011-03'], freq='M') + tm.assert_index_equal(pi1.asfreq('M'), exp) + tm.assert_index_equal(pi1.astype('period[M]'), exp) + + exp = PeriodIndex(['2011-01', '2011-02', '2011-03'], freq='3M') + tm.assert_index_equal(pi1.asfreq('3M'), exp) + tm.assert_index_equal(pi1.astype('period[3M]'), exp) diff --git a/pandas/tests/indexes/period/test_construction.py b/pandas/tests/indexes/period/test_construction.py new file mode 100644 index 0000000000000..c1299c6abeda3 --- /dev/null +++ b/pandas/tests/indexes/period/test_construction.py @@ -0,0 +1,486 @@ +import numpy as np + +import pandas as pd +import pandas.util.testing as tm +import pandas.tseries.period as period +from pandas.compat import lrange, PY3, text_type, lmap +from pandas import (Period, PeriodIndex, period_range, offsets, date_range, + Series, Index) + + +class TestPeriodIndex(tm.TestCase): + + def setUp(self): + pass + + def test_construction_base_constructor(self): + # GH 13664 + arr = [pd.Period('2011-01', freq='M'), pd.NaT, + pd.Period('2011-03', freq='M')] + tm.assert_index_equal(pd.Index(arr), pd.PeriodIndex(arr)) + tm.assert_index_equal(pd.Index(np.array(arr)), + pd.PeriodIndex(np.array(arr))) + + arr = [np.nan, pd.NaT, pd.Period('2011-03', freq='M')] + tm.assert_index_equal(pd.Index(arr), pd.PeriodIndex(arr)) + tm.assert_index_equal(pd.Index(np.array(arr)), + pd.PeriodIndex(np.array(arr))) + + arr = [pd.Period('2011-01', freq='M'), pd.NaT, + pd.Period('2011-03', freq='D')] + tm.assert_index_equal(pd.Index(arr), pd.Index(arr, dtype=object)) + + tm.assert_index_equal(pd.Index(np.array(arr)), + pd.Index(np.array(arr), dtype=object)) + + def test_constructor_use_start_freq(self): + # GH #1118 + p = Period('4/2/2012', freq='B') + index = PeriodIndex(start=p, periods=10) + expected = PeriodIndex(start='4/2/2012', periods=10, freq='B') + tm.assert_index_equal(index, expected) + + def test_constructor_field_arrays(self): + # GH #1264 + + years = np.arange(1990, 2010).repeat(4)[2:-2] + quarters = np.tile(np.arange(1, 5), 20)[2:-2] + + index = PeriodIndex(year=years, quarter=quarters, freq='Q-DEC') + expected = period_range('1990Q3', '2009Q2', freq='Q-DEC') + tm.assert_index_equal(index, expected) + + index2 = PeriodIndex(year=years, quarter=quarters, freq='2Q-DEC') + tm.assert_numpy_array_equal(index.asi8, index2.asi8) + + index = PeriodIndex(year=years, quarter=quarters) + tm.assert_index_equal(index, expected) + + years = [2007, 2007, 2007] + months = [1, 2] + self.assertRaises(ValueError, PeriodIndex, year=years, month=months, + freq='M') + self.assertRaises(ValueError, PeriodIndex, year=years, month=months, + freq='2M') + self.assertRaises(ValueError, PeriodIndex, year=years, month=months, + freq='M', start=Period('2007-01', freq='M')) + + years = [2007, 2007, 2007] + months = [1, 2, 3] + idx = PeriodIndex(year=years, month=months, freq='M') + exp = period_range('2007-01', periods=3, freq='M') + tm.assert_index_equal(idx, exp) + + def test_constructor_U(self): + # U was used as undefined period + self.assertRaises(ValueError, period_range, '2007-1-1', periods=500, + freq='X') + + def test_constructor_nano(self): + idx = period_range(start=Period(ordinal=1, freq='N'), + end=Period(ordinal=4, freq='N'), freq='N') + exp = PeriodIndex([Period(ordinal=1, freq='N'), + Period(ordinal=2, freq='N'), + Period(ordinal=3, freq='N'), + Period(ordinal=4, freq='N')], freq='N') + tm.assert_index_equal(idx, exp) + + def test_constructor_arrays_negative_year(self): + years = np.arange(1960, 2000, dtype=np.int64).repeat(4) + quarters = np.tile(np.array([1, 2, 3, 4], dtype=np.int64), 40) + + pindex = PeriodIndex(year=years, quarter=quarters) + + self.assert_numpy_array_equal(pindex.year, years) + self.assert_numpy_array_equal(pindex.quarter, quarters) + + def test_constructor_invalid_quarters(self): + self.assertRaises(ValueError, PeriodIndex, year=lrange(2000, 2004), + quarter=lrange(4), freq='Q-DEC') + + def test_constructor_corner(self): + self.assertRaises(ValueError, PeriodIndex, periods=10, freq='A') + + start = Period('2007', freq='A-JUN') + end = Period('2010', freq='A-DEC') + self.assertRaises(ValueError, PeriodIndex, start=start, end=end) + self.assertRaises(ValueError, PeriodIndex, start=start) + self.assertRaises(ValueError, PeriodIndex, end=end) + + result = period_range('2007-01', periods=10.5, freq='M') + exp = period_range('2007-01', periods=10, freq='M') + tm.assert_index_equal(result, exp) + + def test_constructor_fromarraylike(self): + idx = period_range('2007-01', periods=20, freq='M') + + # values is an array of Period, thus can retrieve freq + tm.assert_index_equal(PeriodIndex(idx.values), idx) + tm.assert_index_equal(PeriodIndex(list(idx.values)), idx) + + self.assertRaises(ValueError, PeriodIndex, idx._values) + self.assertRaises(ValueError, PeriodIndex, list(idx._values)) + self.assertRaises(ValueError, PeriodIndex, + data=Period('2007', freq='A')) + + result = PeriodIndex(iter(idx)) + tm.assert_index_equal(result, idx) + + result = PeriodIndex(idx) + tm.assert_index_equal(result, idx) + + result = PeriodIndex(idx, freq='M') + tm.assert_index_equal(result, idx) + + result = PeriodIndex(idx, freq=offsets.MonthEnd()) + tm.assert_index_equal(result, idx) + self.assertTrue(result.freq, 'M') + + result = PeriodIndex(idx, freq='2M') + tm.assert_index_equal(result, idx.asfreq('2M')) + self.assertTrue(result.freq, '2M') + + result = PeriodIndex(idx, freq=offsets.MonthEnd(2)) + tm.assert_index_equal(result, idx.asfreq('2M')) + self.assertTrue(result.freq, '2M') + + result = PeriodIndex(idx, freq='D') + exp = idx.asfreq('D', 'e') + tm.assert_index_equal(result, exp) + + def test_constructor_datetime64arr(self): + vals = np.arange(100000, 100000 + 10000, 100, dtype=np.int64) + vals = vals.view(np.dtype('M8[us]')) + + self.assertRaises(ValueError, PeriodIndex, vals, freq='D') + + def test_constructor_dtype(self): + # passing a dtype with a tz should localize + idx = PeriodIndex(['2013-01', '2013-03'], dtype='period[M]') + exp = PeriodIndex(['2013-01', '2013-03'], freq='M') + tm.assert_index_equal(idx, exp) + self.assertEqual(idx.dtype, 'period[M]') + + idx = PeriodIndex(['2013-01-05', '2013-03-05'], dtype='period[3D]') + exp = PeriodIndex(['2013-01-05', '2013-03-05'], freq='3D') + tm.assert_index_equal(idx, exp) + self.assertEqual(idx.dtype, 'period[3D]') + + # if we already have a freq and its not the same, then asfreq + # (not changed) + idx = PeriodIndex(['2013-01-01', '2013-01-02'], freq='D') + + res = PeriodIndex(idx, dtype='period[M]') + exp = PeriodIndex(['2013-01', '2013-01'], freq='M') + tm.assert_index_equal(res, exp) + self.assertEqual(res.dtype, 'period[M]') + + res = PeriodIndex(idx, freq='M') + tm.assert_index_equal(res, exp) + self.assertEqual(res.dtype, 'period[M]') + + msg = 'specified freq and dtype are different' + with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): + PeriodIndex(['2011-01'], freq='M', dtype='period[D]') + + def test_constructor_empty(self): + idx = pd.PeriodIndex([], freq='M') + tm.assertIsInstance(idx, PeriodIndex) + self.assertEqual(len(idx), 0) + self.assertEqual(idx.freq, 'M') + + with tm.assertRaisesRegexp(ValueError, 'freq not specified'): + pd.PeriodIndex([]) + + def test_constructor_pi_nat(self): + idx = PeriodIndex([Period('2011-01', freq='M'), pd.NaT, + Period('2011-01', freq='M')]) + exp = PeriodIndex(['2011-01', 'NaT', '2011-01'], freq='M') + tm.assert_index_equal(idx, exp) + + idx = PeriodIndex(np.array([Period('2011-01', freq='M'), pd.NaT, + Period('2011-01', freq='M')])) + tm.assert_index_equal(idx, exp) + + idx = PeriodIndex([pd.NaT, pd.NaT, Period('2011-01', freq='M'), + Period('2011-01', freq='M')]) + exp = PeriodIndex(['NaT', 'NaT', '2011-01', '2011-01'], freq='M') + tm.assert_index_equal(idx, exp) + + idx = PeriodIndex(np.array([pd.NaT, pd.NaT, + Period('2011-01', freq='M'), + Period('2011-01', freq='M')])) + tm.assert_index_equal(idx, exp) + + idx = PeriodIndex([pd.NaT, pd.NaT, '2011-01', '2011-01'], freq='M') + tm.assert_index_equal(idx, exp) + + with tm.assertRaisesRegexp(ValueError, 'freq not specified'): + PeriodIndex([pd.NaT, pd.NaT]) + + with tm.assertRaisesRegexp(ValueError, 'freq not specified'): + PeriodIndex(np.array([pd.NaT, pd.NaT])) + + with tm.assertRaisesRegexp(ValueError, 'freq not specified'): + PeriodIndex(['NaT', 'NaT']) + + with tm.assertRaisesRegexp(ValueError, 'freq not specified'): + PeriodIndex(np.array(['NaT', 'NaT'])) + + def test_constructor_incompat_freq(self): + msg = "Input has different freq=D from PeriodIndex\\(freq=M\\)" + + with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): + PeriodIndex([Period('2011-01', freq='M'), pd.NaT, + Period('2011-01', freq='D')]) + + with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): + PeriodIndex(np.array([Period('2011-01', freq='M'), pd.NaT, + Period('2011-01', freq='D')])) + + # first element is pd.NaT + with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): + PeriodIndex([pd.NaT, Period('2011-01', freq='M'), + Period('2011-01', freq='D')]) + + with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): + PeriodIndex(np.array([pd.NaT, Period('2011-01', freq='M'), + Period('2011-01', freq='D')])) + + def test_constructor_mixed(self): + idx = PeriodIndex(['2011-01', pd.NaT, Period('2011-01', freq='M')]) + exp = PeriodIndex(['2011-01', 'NaT', '2011-01'], freq='M') + tm.assert_index_equal(idx, exp) + + idx = PeriodIndex(['NaT', pd.NaT, Period('2011-01', freq='M')]) + exp = PeriodIndex(['NaT', 'NaT', '2011-01'], freq='M') + tm.assert_index_equal(idx, exp) + + idx = PeriodIndex([Period('2011-01-01', freq='D'), pd.NaT, + '2012-01-01']) + exp = PeriodIndex(['2011-01-01', 'NaT', '2012-01-01'], freq='D') + tm.assert_index_equal(idx, exp) + + def test_constructor_simple_new(self): + idx = period_range('2007-01', name='p', periods=2, freq='M') + result = idx._simple_new(idx, 'p', freq=idx.freq) + tm.assert_index_equal(result, idx) + + result = idx._simple_new(idx.astype('i8'), 'p', freq=idx.freq) + tm.assert_index_equal(result, idx) + + result = idx._simple_new([pd.Period('2007-01', freq='M'), + pd.Period('2007-02', freq='M')], + 'p', freq=idx.freq) + self.assert_index_equal(result, idx) + + result = idx._simple_new(np.array([pd.Period('2007-01', freq='M'), + pd.Period('2007-02', freq='M')]), + 'p', freq=idx.freq) + self.assert_index_equal(result, idx) + + def test_constructor_simple_new_empty(self): + # GH13079 + idx = PeriodIndex([], freq='M', name='p') + result = idx._simple_new(idx, name='p', freq='M') + tm.assert_index_equal(result, idx) + + def test_constructor_simple_new_floats(self): + # GH13079 + for floats in [[1.1], np.array([1.1])]: + with self.assertRaises(TypeError): + pd.PeriodIndex._simple_new(floats, freq='M') + + def test_constructor_nat(self): + self.assertRaises(ValueError, period_range, start='NaT', + end='2011-01-01', freq='M') + self.assertRaises(ValueError, period_range, start='2011-01-01', + end='NaT', freq='M') + + def test_constructor_year_and_quarter(self): + year = pd.Series([2001, 2002, 2003]) + quarter = year - 2000 + idx = PeriodIndex(year=year, quarter=quarter) + strs = ['%dQ%d' % t for t in zip(quarter, year)] + lops = list(map(Period, strs)) + p = PeriodIndex(lops) + tm.assert_index_equal(p, idx) + + def test_constructor_freq_mult(self): + # GH #7811 + for func in [PeriodIndex, period_range]: + # must be the same, but for sure... + pidx = func(start='2014-01', freq='2M', periods=4) + expected = PeriodIndex(['2014-01', '2014-03', + '2014-05', '2014-07'], freq='2M') + tm.assert_index_equal(pidx, expected) + + pidx = func(start='2014-01-02', end='2014-01-15', freq='3D') + expected = PeriodIndex(['2014-01-02', '2014-01-05', + '2014-01-08', '2014-01-11', + '2014-01-14'], freq='3D') + tm.assert_index_equal(pidx, expected) + + pidx = func(end='2014-01-01 17:00', freq='4H', periods=3) + expected = PeriodIndex(['2014-01-01 09:00', '2014-01-01 13:00', + '2014-01-01 17:00'], freq='4H') + tm.assert_index_equal(pidx, expected) + + msg = ('Frequency must be positive, because it' + ' represents span: -1M') + with tm.assertRaisesRegexp(ValueError, msg): + PeriodIndex(['2011-01'], freq='-1M') + + msg = ('Frequency must be positive, because it' ' represents span: 0M') + with tm.assertRaisesRegexp(ValueError, msg): + PeriodIndex(['2011-01'], freq='0M') + + msg = ('Frequency must be positive, because it' ' represents span: 0M') + with tm.assertRaisesRegexp(ValueError, msg): + period_range('2011-01', periods=3, freq='0M') + + def test_constructor_freq_mult_dti_compat(self): + import itertools + mults = [1, 2, 3, 4, 5] + freqs = ['A', 'M', 'D', 'T', 'S'] + for mult, freq in itertools.product(mults, freqs): + freqstr = str(mult) + freq + pidx = PeriodIndex(start='2014-04-01', freq=freqstr, periods=10) + expected = date_range(start='2014-04-01', freq=freqstr, + periods=10).to_period(freqstr) + tm.assert_index_equal(pidx, expected) + + def test_constructor_freq_combined(self): + for freq in ['1D1H', '1H1D']: + pidx = PeriodIndex(['2016-01-01', '2016-01-02'], freq=freq) + expected = PeriodIndex(['2016-01-01 00:00', '2016-01-02 00:00'], + freq='25H') + for freq, func in zip(['1D1H', '1H1D'], [PeriodIndex, period_range]): + pidx = func(start='2016-01-01', periods=2, freq=freq) + expected = PeriodIndex(['2016-01-01 00:00', '2016-01-02 01:00'], + freq='25H') + tm.assert_index_equal(pidx, expected) + + def test_constructor(self): + pi = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009') + self.assertEqual(len(pi), 9) + + pi = PeriodIndex(freq='Q', start='1/1/2001', end='12/1/2009') + self.assertEqual(len(pi), 4 * 9) + + pi = PeriodIndex(freq='M', start='1/1/2001', end='12/1/2009') + self.assertEqual(len(pi), 12 * 9) + + pi = PeriodIndex(freq='D', start='1/1/2001', end='12/31/2009') + self.assertEqual(len(pi), 365 * 9 + 2) + + pi = PeriodIndex(freq='B', start='1/1/2001', end='12/31/2009') + self.assertEqual(len(pi), 261 * 9) + + pi = PeriodIndex(freq='H', start='1/1/2001', end='12/31/2001 23:00') + self.assertEqual(len(pi), 365 * 24) + + pi = PeriodIndex(freq='Min', start='1/1/2001', end='1/1/2001 23:59') + self.assertEqual(len(pi), 24 * 60) + + pi = PeriodIndex(freq='S', start='1/1/2001', end='1/1/2001 23:59:59') + self.assertEqual(len(pi), 24 * 60 * 60) + + start = Period('02-Apr-2005', 'B') + i1 = PeriodIndex(start=start, periods=20) + self.assertEqual(len(i1), 20) + self.assertEqual(i1.freq, start.freq) + self.assertEqual(i1[0], start) + + end_intv = Period('2006-12-31', 'W') + i1 = PeriodIndex(end=end_intv, periods=10) + self.assertEqual(len(i1), 10) + self.assertEqual(i1.freq, end_intv.freq) + self.assertEqual(i1[-1], end_intv) + + end_intv = Period('2006-12-31', '1w') + i2 = PeriodIndex(end=end_intv, periods=10) + self.assertEqual(len(i1), len(i2)) + self.assertTrue((i1 == i2).all()) + self.assertEqual(i1.freq, i2.freq) + + end_intv = Period('2006-12-31', ('w', 1)) + i2 = PeriodIndex(end=end_intv, periods=10) + self.assertEqual(len(i1), len(i2)) + self.assertTrue((i1 == i2).all()) + self.assertEqual(i1.freq, i2.freq) + + try: + PeriodIndex(start=start, end=end_intv) + raise AssertionError('Cannot allow mixed freq for start and end') + except ValueError: + pass + + end_intv = Period('2005-05-01', 'B') + i1 = PeriodIndex(start=start, end=end_intv) + + try: + PeriodIndex(start=start) + raise AssertionError( + 'Must specify periods if missing start or end') + except ValueError: + pass + + # infer freq from first element + i2 = PeriodIndex([end_intv, Period('2005-05-05', 'B')]) + self.assertEqual(len(i2), 2) + self.assertEqual(i2[0], end_intv) + + i2 = PeriodIndex(np.array([end_intv, Period('2005-05-05', 'B')])) + self.assertEqual(len(i2), 2) + self.assertEqual(i2[0], end_intv) + + # Mixed freq should fail + vals = [end_intv, Period('2006-12-31', 'w')] + self.assertRaises(ValueError, PeriodIndex, vals) + vals = np.array(vals) + self.assertRaises(ValueError, PeriodIndex, vals) + + def test_recreate_from_data(self): + for o in ['M', 'Q', 'A', 'D', 'B', 'T', 'S', 'L', 'U', 'N', 'H']: + org = PeriodIndex(start='2001/04/01', freq=o, periods=1) + idx = PeriodIndex(org.values, freq=o) + tm.assert_index_equal(idx, org) + + def test_map_with_string_constructor(self): + raw = [2005, 2007, 2009] + index = PeriodIndex(raw, freq='A') + types = str, + + if PY3: + # unicode + types += text_type, + + for t in types: + expected = Index(lmap(t, raw)) + res = index.map(t) + + # should return an Index + tm.assertIsInstance(res, Index) + + # preserve element types + self.assertTrue(all(isinstance(resi, t) for resi in res)) + + # lastly, values should compare equal + tm.assert_index_equal(res, expected) + + +class TestSeriesPeriod(tm.TestCase): + + def setUp(self): + self.series = Series(period_range('2000-01-01', periods=10, freq='D')) + + def test_constructor_cant_cast_period(self): + with tm.assertRaises(TypeError): + Series(period_range('2000-01-01', periods=10, freq='D'), + dtype=float) + + def test_constructor_cast_object(self): + s = Series(period_range('1/1/2000', periods=10), dtype=object) + exp = Series(period_range('1/1/2000', periods=10)) + tm.assert_series_equal(s, exp) diff --git a/pandas/tseries/tests/test_base.py b/pandas/tests/indexes/period/test_ops.py similarity index 58% rename from pandas/tseries/tests/test_base.py rename to pandas/tests/indexes/period/test_ops.py index 114cb02205d4f..70759e8659c25 100644 --- a/pandas/tseries/tests/test_base.py +++ b/pandas/tests/indexes/period/test_ops.py @@ -1,13 +1,14 @@ -from __future__ import print_function -from datetime import timedelta import numpy as np +from datetime import timedelta, datetime + import pandas as pd -from pandas import (Series, Index, Period, DatetimeIndex, PeriodIndex, - Timedelta, _np_version_under1p10) import pandas.tslib as tslib -import pandas.tseries.period as period - import pandas.util.testing as tm +import pandas.tseries.period as period +from pandas.compat import lrange +from pandas import (DatetimeIndex, PeriodIndex, period_range, Series, Period, + _np_version_under1p10, Index, Timedelta, offsets, + _np_version_under1p9) from pandas.tests.test_base import Ops @@ -473,6 +474,13 @@ def test_difference(self): result_union = rng.difference(other) tm.assert_index_equal(result_union, expected) + def test_sub(self): + rng = period_range('2007-01', periods=50) + + result = rng - 5 + exp = rng + (-5) + tm.assert_index_equal(result, exp) + def test_sub_isub(self): # previously performed setop, now raises TypeError (GH14164) @@ -1020,3 +1028,781 @@ def test_equals(self): self.assertFalse(idx.asobject.equals(idx3)) self.assertFalse(idx.equals(list(idx3))) self.assertFalse(idx.equals(pd.Series(idx3))) + + +class TestPeriodIndexSeriesMethods(tm.TestCase): + """ Test PeriodIndex and Period Series Ops consistency """ + + def _check(self, values, func, expected): + idx = pd.PeriodIndex(values) + result = func(idx) + if isinstance(expected, pd.Index): + tm.assert_index_equal(result, expected) + else: + # comp op results in bool + tm.assert_numpy_array_equal(result, expected) + + s = pd.Series(values) + result = func(s) + + exp = pd.Series(expected, name=values.name) + tm.assert_series_equal(result, exp) + + def test_pi_ops(self): + idx = PeriodIndex(['2011-01', '2011-02', '2011-03', + '2011-04'], freq='M', name='idx') + + expected = PeriodIndex(['2011-03', '2011-04', + '2011-05', '2011-06'], freq='M', name='idx') + self._check(idx, lambda x: x + 2, expected) + self._check(idx, lambda x: 2 + x, expected) + + self._check(idx + 2, lambda x: x - 2, idx) + result = idx - Period('2011-01', freq='M') + exp = pd.Index([0, 1, 2, 3], name='idx') + tm.assert_index_equal(result, exp) + + result = Period('2011-01', freq='M') - idx + exp = pd.Index([0, -1, -2, -3], name='idx') + tm.assert_index_equal(result, exp) + + def test_pi_ops_errors(self): + idx = PeriodIndex(['2011-01', '2011-02', '2011-03', + '2011-04'], freq='M', name='idx') + s = pd.Series(idx) + + msg = r"unsupported operand type\(s\)" + + for obj in [idx, s]: + for ng in ["str", 1.5]: + with tm.assertRaisesRegexp(TypeError, msg): + obj + ng + + with tm.assertRaises(TypeError): + # error message differs between PY2 and 3 + ng + obj + + with tm.assertRaisesRegexp(TypeError, msg): + obj - ng + + with tm.assertRaises(TypeError): + np.add(obj, ng) + + if _np_version_under1p10: + self.assertIs(np.add(ng, obj), NotImplemented) + else: + with tm.assertRaises(TypeError): + np.add(ng, obj) + + with tm.assertRaises(TypeError): + np.subtract(obj, ng) + + if _np_version_under1p10: + self.assertIs(np.subtract(ng, obj), NotImplemented) + else: + with tm.assertRaises(TypeError): + np.subtract(ng, obj) + + def test_pi_ops_nat(self): + idx = PeriodIndex(['2011-01', '2011-02', 'NaT', + '2011-04'], freq='M', name='idx') + expected = PeriodIndex(['2011-03', '2011-04', + 'NaT', '2011-06'], freq='M', name='idx') + self._check(idx, lambda x: x + 2, expected) + self._check(idx, lambda x: 2 + x, expected) + self._check(idx, lambda x: np.add(x, 2), expected) + + self._check(idx + 2, lambda x: x - 2, idx) + self._check(idx + 2, lambda x: np.subtract(x, 2), idx) + + # freq with mult + idx = PeriodIndex(['2011-01', '2011-02', 'NaT', + '2011-04'], freq='2M', name='idx') + expected = PeriodIndex(['2011-07', '2011-08', + 'NaT', '2011-10'], freq='2M', name='idx') + self._check(idx, lambda x: x + 3, expected) + self._check(idx, lambda x: 3 + x, expected) + self._check(idx, lambda x: np.add(x, 3), expected) + + self._check(idx + 3, lambda x: x - 3, idx) + self._check(idx + 3, lambda x: np.subtract(x, 3), idx) + + def test_pi_ops_array_int(self): + idx = PeriodIndex(['2011-01', '2011-02', 'NaT', + '2011-04'], freq='M', name='idx') + f = lambda x: x + np.array([1, 2, 3, 4]) + exp = PeriodIndex(['2011-02', '2011-04', 'NaT', + '2011-08'], freq='M', name='idx') + self._check(idx, f, exp) + + f = lambda x: np.add(x, np.array([4, -1, 1, 2])) + exp = PeriodIndex(['2011-05', '2011-01', 'NaT', + '2011-06'], freq='M', name='idx') + self._check(idx, f, exp) + + f = lambda x: x - np.array([1, 2, 3, 4]) + exp = PeriodIndex(['2010-12', '2010-12', 'NaT', + '2010-12'], freq='M', name='idx') + self._check(idx, f, exp) + + f = lambda x: np.subtract(x, np.array([3, 2, 3, -2])) + exp = PeriodIndex(['2010-10', '2010-12', 'NaT', + '2011-06'], freq='M', name='idx') + self._check(idx, f, exp) + + def test_pi_ops_offset(self): + idx = PeriodIndex(['2011-01-01', '2011-02-01', '2011-03-01', + '2011-04-01'], freq='D', name='idx') + f = lambda x: x + offsets.Day() + exp = PeriodIndex(['2011-01-02', '2011-02-02', '2011-03-02', + '2011-04-02'], freq='D', name='idx') + self._check(idx, f, exp) + + f = lambda x: x + offsets.Day(2) + exp = PeriodIndex(['2011-01-03', '2011-02-03', '2011-03-03', + '2011-04-03'], freq='D', name='idx') + self._check(idx, f, exp) + + f = lambda x: x - offsets.Day(2) + exp = PeriodIndex(['2010-12-30', '2011-01-30', '2011-02-27', + '2011-03-30'], freq='D', name='idx') + self._check(idx, f, exp) + + def test_pi_offset_errors(self): + idx = PeriodIndex(['2011-01-01', '2011-02-01', '2011-03-01', + '2011-04-01'], freq='D', name='idx') + s = pd.Series(idx) + + # Series op is applied per Period instance, thus error is raised + # from Period + msg_idx = r"Input has different freq from PeriodIndex\(freq=D\)" + msg_s = r"Input cannot be converted to Period\(freq=D\)" + for obj, msg in [(idx, msg_idx), (s, msg_s)]: + with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): + obj + offsets.Hour(2) + + with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): + offsets.Hour(2) + obj + + with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): + obj - offsets.Hour(2) + + def test_pi_sub_period(self): + # GH 13071 + idx = PeriodIndex(['2011-01', '2011-02', '2011-03', + '2011-04'], freq='M', name='idx') + + result = idx - pd.Period('2012-01', freq='M') + exp = pd.Index([-12, -11, -10, -9], name='idx') + tm.assert_index_equal(result, exp) + + result = np.subtract(idx, pd.Period('2012-01', freq='M')) + tm.assert_index_equal(result, exp) + + result = pd.Period('2012-01', freq='M') - idx + exp = pd.Index([12, 11, 10, 9], name='idx') + tm.assert_index_equal(result, exp) + + result = np.subtract(pd.Period('2012-01', freq='M'), idx) + if _np_version_under1p10: + self.assertIs(result, NotImplemented) + else: + tm.assert_index_equal(result, exp) + + exp = pd.TimedeltaIndex([np.nan, np.nan, np.nan, np.nan], name='idx') + tm.assert_index_equal(idx - pd.Period('NaT', freq='M'), exp) + tm.assert_index_equal(pd.Period('NaT', freq='M') - idx, exp) + + def test_pi_sub_pdnat(self): + # GH 13071 + idx = PeriodIndex(['2011-01', '2011-02', 'NaT', + '2011-04'], freq='M', name='idx') + exp = pd.TimedeltaIndex([pd.NaT] * 4, name='idx') + tm.assert_index_equal(pd.NaT - idx, exp) + tm.assert_index_equal(idx - pd.NaT, exp) + + def test_pi_sub_period_nat(self): + # GH 13071 + idx = PeriodIndex(['2011-01', 'NaT', '2011-03', + '2011-04'], freq='M', name='idx') + + result = idx - pd.Period('2012-01', freq='M') + exp = pd.Index([-12, np.nan, -10, -9], name='idx') + tm.assert_index_equal(result, exp) + + result = pd.Period('2012-01', freq='M') - idx + exp = pd.Index([12, np.nan, 10, 9], name='idx') + tm.assert_index_equal(result, exp) + + exp = pd.TimedeltaIndex([np.nan, np.nan, np.nan, np.nan], name='idx') + tm.assert_index_equal(idx - pd.Period('NaT', freq='M'), exp) + tm.assert_index_equal(pd.Period('NaT', freq='M') - idx, exp) + + def test_pi_comp_period(self): + idx = PeriodIndex(['2011-01', '2011-02', '2011-03', + '2011-04'], freq='M', name='idx') + + f = lambda x: x == pd.Period('2011-03', freq='M') + exp = np.array([False, False, True, False], dtype=np.bool) + self._check(idx, f, exp) + f = lambda x: pd.Period('2011-03', freq='M') == x + self._check(idx, f, exp) + + f = lambda x: x != pd.Period('2011-03', freq='M') + exp = np.array([True, True, False, True], dtype=np.bool) + self._check(idx, f, exp) + f = lambda x: pd.Period('2011-03', freq='M') != x + self._check(idx, f, exp) + + f = lambda x: pd.Period('2011-03', freq='M') >= x + exp = np.array([True, True, True, False], dtype=np.bool) + self._check(idx, f, exp) + + f = lambda x: x > pd.Period('2011-03', freq='M') + exp = np.array([False, False, False, True], dtype=np.bool) + self._check(idx, f, exp) + + f = lambda x: pd.Period('2011-03', freq='M') >= x + exp = np.array([True, True, True, False], dtype=np.bool) + self._check(idx, f, exp) + + def test_pi_comp_period_nat(self): + idx = PeriodIndex(['2011-01', 'NaT', '2011-03', + '2011-04'], freq='M', name='idx') + + f = lambda x: x == pd.Period('2011-03', freq='M') + exp = np.array([False, False, True, False], dtype=np.bool) + self._check(idx, f, exp) + f = lambda x: pd.Period('2011-03', freq='M') == x + self._check(idx, f, exp) + + f = lambda x: x == tslib.NaT + exp = np.array([False, False, False, False], dtype=np.bool) + self._check(idx, f, exp) + f = lambda x: tslib.NaT == x + self._check(idx, f, exp) + + f = lambda x: x != pd.Period('2011-03', freq='M') + exp = np.array([True, True, False, True], dtype=np.bool) + self._check(idx, f, exp) + f = lambda x: pd.Period('2011-03', freq='M') != x + self._check(idx, f, exp) + + f = lambda x: x != tslib.NaT + exp = np.array([True, True, True, True], dtype=np.bool) + self._check(idx, f, exp) + f = lambda x: tslib.NaT != x + self._check(idx, f, exp) + + f = lambda x: pd.Period('2011-03', freq='M') >= x + exp = np.array([True, False, True, False], dtype=np.bool) + self._check(idx, f, exp) + + f = lambda x: x < pd.Period('2011-03', freq='M') + exp = np.array([True, False, False, False], dtype=np.bool) + self._check(idx, f, exp) + + f = lambda x: x > tslib.NaT + exp = np.array([False, False, False, False], dtype=np.bool) + self._check(idx, f, exp) + + f = lambda x: tslib.NaT >= x + exp = np.array([False, False, False, False], dtype=np.bool) + self._check(idx, f, exp) + + +class TestSeriesPeriod(tm.TestCase): + + def setUp(self): + self.series = Series(period_range('2000-01-01', periods=10, freq='D')) + + def test_ops_series_timedelta(self): + # GH 13043 + s = pd.Series([pd.Period('2015-01-01', freq='D'), + pd.Period('2015-01-02', freq='D')], name='xxx') + self.assertEqual(s.dtype, object) + + exp = pd.Series([pd.Period('2015-01-02', freq='D'), + pd.Period('2015-01-03', freq='D')], name='xxx') + tm.assert_series_equal(s + pd.Timedelta('1 days'), exp) + tm.assert_series_equal(pd.Timedelta('1 days') + s, exp) + + tm.assert_series_equal(s + pd.tseries.offsets.Day(), exp) + tm.assert_series_equal(pd.tseries.offsets.Day() + s, exp) + + def test_ops_series_period(self): + # GH 13043 + s = pd.Series([pd.Period('2015-01-01', freq='D'), + pd.Period('2015-01-02', freq='D')], name='xxx') + self.assertEqual(s.dtype, object) + + p = pd.Period('2015-01-10', freq='D') + # dtype will be object because of original dtype + exp = pd.Series([9, 8], name='xxx', dtype=object) + tm.assert_series_equal(p - s, exp) + tm.assert_series_equal(s - p, -exp) + + s2 = pd.Series([pd.Period('2015-01-05', freq='D'), + pd.Period('2015-01-04', freq='D')], name='xxx') + self.assertEqual(s2.dtype, object) + + exp = pd.Series([4, 2], name='xxx', dtype=object) + tm.assert_series_equal(s2 - s, exp) + tm.assert_series_equal(s - s2, -exp) + + def test_ops_frame_period(self): + # GH 13043 + df = pd.DataFrame({'A': [pd.Period('2015-01', freq='M'), + pd.Period('2015-02', freq='M')], + 'B': [pd.Period('2014-01', freq='M'), + pd.Period('2014-02', freq='M')]}) + self.assertEqual(df['A'].dtype, object) + self.assertEqual(df['B'].dtype, object) + + p = pd.Period('2015-03', freq='M') + # dtype will be object because of original dtype + exp = pd.DataFrame({'A': np.array([2, 1], dtype=object), + 'B': np.array([14, 13], dtype=object)}) + tm.assert_frame_equal(p - df, exp) + tm.assert_frame_equal(df - p, -exp) + + df2 = pd.DataFrame({'A': [pd.Period('2015-05', freq='M'), + pd.Period('2015-06', freq='M')], + 'B': [pd.Period('2015-05', freq='M'), + pd.Period('2015-06', freq='M')]}) + self.assertEqual(df2['A'].dtype, object) + self.assertEqual(df2['B'].dtype, object) + + exp = pd.DataFrame({'A': np.array([4, 4], dtype=object), + 'B': np.array([16, 16], dtype=object)}) + tm.assert_frame_equal(df2 - df, exp) + tm.assert_frame_equal(df - df2, -exp) + + +class TestPeriodIndex(tm.TestCase): + + def setUp(self): + pass + + def test_getitem_index(self): + idx = period_range('2007-01', periods=10, freq='M', name='x') + + result = idx[[1, 3, 5]] + exp = pd.PeriodIndex(['2007-02', '2007-04', '2007-06'], + freq='M', name='x') + tm.assert_index_equal(result, exp) + + result = idx[[True, True, False, False, False, + True, True, False, False, False]] + exp = pd.PeriodIndex(['2007-01', '2007-02', '2007-06', '2007-07'], + freq='M', name='x') + tm.assert_index_equal(result, exp) + + def test_getitem_partial(self): + rng = period_range('2007-01', periods=50, freq='M') + ts = Series(np.random.randn(len(rng)), rng) + + self.assertRaises(KeyError, ts.__getitem__, '2006') + + result = ts['2008'] + self.assertTrue((result.index.year == 2008).all()) + + result = ts['2008':'2009'] + self.assertEqual(len(result), 24) + + result = ts['2008-1':'2009-12'] + self.assertEqual(len(result), 24) + + result = ts['2008Q1':'2009Q4'] + self.assertEqual(len(result), 24) + + result = ts[:'2009'] + self.assertEqual(len(result), 36) + + result = ts['2009':] + self.assertEqual(len(result), 50 - 24) + + exp = result + result = ts[24:] + tm.assert_series_equal(exp, result) + + ts = ts[10:].append(ts[10:]) + self.assertRaisesRegexp(KeyError, + "left slice bound for non-unique " + "label: '2008'", + ts.__getitem__, slice('2008', '2009')) + + def test_getitem_datetime(self): + rng = period_range(start='2012-01-01', periods=10, freq='W-MON') + ts = Series(lrange(len(rng)), index=rng) + + dt1 = datetime(2011, 10, 2) + dt4 = datetime(2012, 4, 20) + + rs = ts[dt1:dt4] + tm.assert_series_equal(rs, ts) + + def test_getitem_nat(self): + idx = pd.PeriodIndex(['2011-01', 'NaT', '2011-02'], freq='M') + self.assertEqual(idx[0], pd.Period('2011-01', freq='M')) + self.assertIs(idx[1], tslib.NaT) + + s = pd.Series([0, 1, 2], index=idx) + self.assertEqual(s[pd.NaT], 1) + + s = pd.Series(idx, index=idx) + self.assertEqual(s[pd.Period('2011-01', freq='M')], + pd.Period('2011-01', freq='M')) + self.assertIs(s[pd.NaT], tslib.NaT) + + def test_getitem_list_periods(self): + # GH 7710 + rng = period_range(start='2012-01-01', periods=10, freq='D') + ts = Series(lrange(len(rng)), index=rng) + exp = ts.iloc[[1]] + tm.assert_series_equal(ts[[Period('2012-01-02', freq='D')]], exp) + + def test_getitem_seconds(self): + # GH 6716 + didx = DatetimeIndex(start='2013/01/01 09:00:00', freq='S', + periods=4000) + pidx = PeriodIndex(start='2013/01/01 09:00:00', freq='S', periods=4000) + + for idx in [didx, pidx]: + # getitem against index should raise ValueError + values = ['2014', '2013/02', '2013/01/02', '2013/02/01 9H', + '2013/02/01 09:00'] + for v in values: + if _np_version_under1p9: + with tm.assertRaises(ValueError): + idx[v] + else: + # GH7116 + # these show deprecations as we are trying + # to slice with non-integer indexers + # with tm.assertRaises(IndexError): + # idx[v] + continue + + s = Series(np.random.rand(len(idx)), index=idx) + tm.assert_series_equal(s['2013/01/01 10:00'], s[3600:3660]) + tm.assert_series_equal(s['2013/01/01 9H'], s[:3600]) + for d in ['2013/01/01', '2013/01', '2013']: + tm.assert_series_equal(s[d], s) + + def test_getitem_day(self): + # GH 6716 + # Confirm DatetimeIndex and PeriodIndex works identically + didx = DatetimeIndex(start='2013/01/01', freq='D', periods=400) + pidx = PeriodIndex(start='2013/01/01', freq='D', periods=400) + + for idx in [didx, pidx]: + # getitem against index should raise ValueError + values = ['2014', '2013/02', '2013/01/02', '2013/02/01 9H', + '2013/02/01 09:00'] + for v in values: + + if _np_version_under1p9: + with tm.assertRaises(ValueError): + idx[v] + else: + # GH7116 + # these show deprecations as we are trying + # to slice with non-integer indexers + # with tm.assertRaises(IndexError): + # idx[v] + continue + + s = Series(np.random.rand(len(idx)), index=idx) + tm.assert_series_equal(s['2013/01'], s[0:31]) + tm.assert_series_equal(s['2013/02'], s[31:59]) + tm.assert_series_equal(s['2014'], s[365:]) + + invalid = ['2013/02/01 9H', '2013/02/01 09:00'] + for v in invalid: + with tm.assertRaises(KeyError): + s[v] + + def test_take(self): + index = PeriodIndex(start='1/1/10', end='12/31/12', freq='D', + name='idx') + expected = PeriodIndex([datetime(2010, 1, 6), datetime(2010, 1, 7), + datetime(2010, 1, 9), datetime(2010, 1, 13)], + freq='D', name='idx') + + taken1 = index.take([5, 6, 8, 12]) + taken2 = index[[5, 6, 8, 12]] + + for taken in [taken1, taken2]: + tm.assert_index_equal(taken, expected) + tm.assertIsInstance(taken, PeriodIndex) + self.assertEqual(taken.freq, index.freq) + self.assertEqual(taken.name, expected.name) + + def test_take_fill_value(self): + # GH 12631 + idx = pd.PeriodIndex(['2011-01-01', '2011-02-01', '2011-03-01'], + name='xxx', freq='D') + result = idx.take(np.array([1, 0, -1])) + expected = pd.PeriodIndex(['2011-02-01', '2011-01-01', '2011-03-01'], + name='xxx', freq='D') + tm.assert_index_equal(result, expected) + + # fill_value + result = idx.take(np.array([1, 0, -1]), fill_value=True) + expected = pd.PeriodIndex(['2011-02-01', '2011-01-01', 'NaT'], + name='xxx', freq='D') + tm.assert_index_equal(result, expected) + + # allow_fill=False + result = idx.take(np.array([1, 0, -1]), allow_fill=False, + fill_value=True) + expected = pd.PeriodIndex(['2011-02-01', '2011-01-01', '2011-03-01'], + name='xxx', freq='D') + tm.assert_index_equal(result, expected) + + msg = ('When allow_fill=True and fill_value is not None, ' + 'all indices must be >= -1') + with tm.assertRaisesRegexp(ValueError, msg): + idx.take(np.array([1, 0, -2]), fill_value=True) + with tm.assertRaisesRegexp(ValueError, msg): + idx.take(np.array([1, 0, -5]), fill_value=True) + + with tm.assertRaises(IndexError): + idx.take(np.array([1, -5])) + + def test_get_loc_msg(self): + idx = period_range('2000-1-1', freq='A', periods=10) + bad_period = Period('2012', 'A') + self.assertRaises(KeyError, idx.get_loc, bad_period) + + try: + idx.get_loc(bad_period) + except KeyError as inst: + self.assertEqual(inst.args[0], bad_period) + + def test_get_loc_nat(self): + didx = DatetimeIndex(['2011-01-01', 'NaT', '2011-01-03']) + pidx = PeriodIndex(['2011-01-01', 'NaT', '2011-01-03'], freq='M') + + # check DatetimeIndex compat + for idx in [didx, pidx]: + self.assertEqual(idx.get_loc(pd.NaT), 1) + self.assertEqual(idx.get_loc(None), 1) + self.assertEqual(idx.get_loc(float('nan')), 1) + self.assertEqual(idx.get_loc(np.nan), 1) + + +class TestComparisons(tm.TestCase): + + def setUp(self): + self.january1 = Period('2000-01', 'M') + self.january2 = Period('2000-01', 'M') + self.february = Period('2000-02', 'M') + self.march = Period('2000-03', 'M') + self.day = Period('2012-01-01', 'D') + + def test_equal(self): + self.assertEqual(self.january1, self.january2) + + def test_equal_Raises_Value(self): + with tm.assertRaises(period.IncompatibleFrequency): + self.january1 == self.day + + def test_notEqual(self): + self.assertNotEqual(self.january1, 1) + self.assertNotEqual(self.january1, self.february) + + def test_greater(self): + self.assertTrue(self.february > self.january1) + + def test_greater_Raises_Value(self): + with tm.assertRaises(period.IncompatibleFrequency): + self.january1 > self.day + + def test_greater_Raises_Type(self): + with tm.assertRaises(TypeError): + self.january1 > 1 + + def test_greaterEqual(self): + self.assertTrue(self.january1 >= self.january2) + + def test_greaterEqual_Raises_Value(self): + with tm.assertRaises(period.IncompatibleFrequency): + self.january1 >= self.day + + with tm.assertRaises(TypeError): + print(self.january1 >= 1) + + def test_smallerEqual(self): + self.assertTrue(self.january1 <= self.january2) + + def test_smallerEqual_Raises_Value(self): + with tm.assertRaises(period.IncompatibleFrequency): + self.january1 <= self.day + + def test_smallerEqual_Raises_Type(self): + with tm.assertRaises(TypeError): + self.january1 <= 1 + + def test_smaller(self): + self.assertTrue(self.january1 < self.february) + + def test_smaller_Raises_Value(self): + with tm.assertRaises(period.IncompatibleFrequency): + self.january1 < self.day + + def test_smaller_Raises_Type(self): + with tm.assertRaises(TypeError): + self.january1 < 1 + + def test_sort(self): + periods = [self.march, self.january1, self.february] + correctPeriods = [self.january1, self.february, self.march] + self.assertEqual(sorted(periods), correctPeriods) + + def test_period_nat_comp(self): + p_nat = Period('NaT', freq='D') + p = Period('2011-01-01', freq='D') + + nat = pd.Timestamp('NaT') + t = pd.Timestamp('2011-01-01') + # confirm Period('NaT') work identical with Timestamp('NaT') + for left, right in [(p_nat, p), (p, p_nat), (p_nat, p_nat), (nat, t), + (t, nat), (nat, nat)]: + self.assertEqual(left < right, False) + self.assertEqual(left > right, False) + self.assertEqual(left == right, False) + self.assertEqual(left != right, True) + self.assertEqual(left <= right, False) + self.assertEqual(left >= right, False) + + def test_pi_pi_comp(self): + + for freq in ['M', '2M', '3M']: + base = PeriodIndex(['2011-01', '2011-02', + '2011-03', '2011-04'], freq=freq) + p = Period('2011-02', freq=freq) + + exp = np.array([False, True, False, False]) + self.assert_numpy_array_equal(base == p, exp) + self.assert_numpy_array_equal(p == base, exp) + + exp = np.array([True, False, True, True]) + self.assert_numpy_array_equal(base != p, exp) + self.assert_numpy_array_equal(p != base, exp) + + exp = np.array([False, False, True, True]) + self.assert_numpy_array_equal(base > p, exp) + self.assert_numpy_array_equal(p < base, exp) + + exp = np.array([True, False, False, False]) + self.assert_numpy_array_equal(base < p, exp) + self.assert_numpy_array_equal(p > base, exp) + + exp = np.array([False, True, True, True]) + self.assert_numpy_array_equal(base >= p, exp) + self.assert_numpy_array_equal(p <= base, exp) + + exp = np.array([True, True, False, False]) + self.assert_numpy_array_equal(base <= p, exp) + self.assert_numpy_array_equal(p >= base, exp) + + idx = PeriodIndex(['2011-02', '2011-01', '2011-03', + '2011-05'], freq=freq) + + exp = np.array([False, False, True, False]) + self.assert_numpy_array_equal(base == idx, exp) + + exp = np.array([True, True, False, True]) + self.assert_numpy_array_equal(base != idx, exp) + + exp = np.array([False, True, False, False]) + self.assert_numpy_array_equal(base > idx, exp) + + exp = np.array([True, False, False, True]) + self.assert_numpy_array_equal(base < idx, exp) + + exp = np.array([False, True, True, False]) + self.assert_numpy_array_equal(base >= idx, exp) + + exp = np.array([True, False, True, True]) + self.assert_numpy_array_equal(base <= idx, exp) + + # different base freq + msg = "Input has different freq=A-DEC from PeriodIndex" + with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): + base <= Period('2011', freq='A') + + with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): + Period('2011', freq='A') >= base + + with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): + idx = PeriodIndex(['2011', '2012', '2013', '2014'], freq='A') + base <= idx + + # different mult + msg = "Input has different freq=4M from PeriodIndex" + with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): + base <= Period('2011', freq='4M') + + with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): + Period('2011', freq='4M') >= base + + with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): + idx = PeriodIndex(['2011', '2012', '2013', '2014'], freq='4M') + base <= idx + + def test_pi_nat_comp(self): + for freq in ['M', '2M', '3M']: + idx1 = PeriodIndex( + ['2011-01', '2011-02', 'NaT', '2011-05'], freq=freq) + + result = idx1 > Period('2011-02', freq=freq) + exp = np.array([False, False, False, True]) + self.assert_numpy_array_equal(result, exp) + result = Period('2011-02', freq=freq) < idx1 + self.assert_numpy_array_equal(result, exp) + + result = idx1 == Period('NaT', freq=freq) + exp = np.array([False, False, False, False]) + self.assert_numpy_array_equal(result, exp) + result = Period('NaT', freq=freq) == idx1 + self.assert_numpy_array_equal(result, exp) + + result = idx1 != Period('NaT', freq=freq) + exp = np.array([True, True, True, True]) + self.assert_numpy_array_equal(result, exp) + result = Period('NaT', freq=freq) != idx1 + self.assert_numpy_array_equal(result, exp) + + idx2 = PeriodIndex(['2011-02', '2011-01', '2011-04', + 'NaT'], freq=freq) + result = idx1 < idx2 + exp = np.array([True, False, False, False]) + self.assert_numpy_array_equal(result, exp) + + result = idx1 == idx2 + exp = np.array([False, False, False, False]) + self.assert_numpy_array_equal(result, exp) + + result = idx1 != idx2 + exp = np.array([True, True, True, True]) + self.assert_numpy_array_equal(result, exp) + + result = idx1 == idx1 + exp = np.array([True, True, False, True]) + self.assert_numpy_array_equal(result, exp) + + result = idx1 != idx1 + exp = np.array([False, False, True, False]) + self.assert_numpy_array_equal(result, exp) + + diff = PeriodIndex(['2011-02', '2011-01', '2011-04', + 'NaT'], freq='4M') + msg = "Input has different freq=4M from PeriodIndex" + with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): + idx1 > diff + + with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): + idx1 == diff diff --git a/pandas/tests/indexes/period/test_partial_slicing.py b/pandas/tests/indexes/period/test_partial_slicing.py new file mode 100644 index 0000000000000..b051c4a0dcab1 --- /dev/null +++ b/pandas/tests/indexes/period/test_partial_slicing.py @@ -0,0 +1,139 @@ +import numpy as np + +import pandas as pd +from pandas.util import testing as tm +from pandas import (Series, period_range, DatetimeIndex, PeriodIndex, + DataFrame, _np_version_under1p12, Period) + + +class TestPeriodIndex(tm.TestCase): + + def setUp(self): + pass + + def test_slice_with_negative_step(self): + ts = Series(np.arange(20), + period_range('2014-01', periods=20, freq='M')) + SLC = pd.IndexSlice + + def assert_slices_equivalent(l_slc, i_slc): + tm.assert_series_equal(ts[l_slc], ts.iloc[i_slc]) + tm.assert_series_equal(ts.loc[l_slc], ts.iloc[i_slc]) + tm.assert_series_equal(ts.loc[l_slc], ts.iloc[i_slc]) + + assert_slices_equivalent(SLC[Period('2014-10')::-1], SLC[9::-1]) + assert_slices_equivalent(SLC['2014-10'::-1], SLC[9::-1]) + + assert_slices_equivalent(SLC[:Period('2014-10'):-1], SLC[:8:-1]) + assert_slices_equivalent(SLC[:'2014-10':-1], SLC[:8:-1]) + + assert_slices_equivalent(SLC['2015-02':'2014-10':-1], SLC[13:8:-1]) + assert_slices_equivalent(SLC[Period('2015-02'):Period('2014-10'):-1], + SLC[13:8:-1]) + assert_slices_equivalent(SLC['2015-02':Period('2014-10'):-1], + SLC[13:8:-1]) + assert_slices_equivalent(SLC[Period('2015-02'):'2014-10':-1], + SLC[13:8:-1]) + + assert_slices_equivalent(SLC['2014-10':'2015-02':-1], SLC[:0]) + + def test_slice_with_zero_step_raises(self): + ts = Series(np.arange(20), + period_range('2014-01', periods=20, freq='M')) + self.assertRaisesRegexp(ValueError, 'slice step cannot be zero', + lambda: ts[::0]) + self.assertRaisesRegexp(ValueError, 'slice step cannot be zero', + lambda: ts.loc[::0]) + self.assertRaisesRegexp(ValueError, 'slice step cannot be zero', + lambda: ts.loc[::0]) + + def test_slice_keep_name(self): + idx = period_range('20010101', periods=10, freq='D', name='bob') + self.assertEqual(idx.name, idx[1:].name) + + def test_pindex_slice_index(self): + pi = PeriodIndex(start='1/1/10', end='12/31/12', freq='M') + s = Series(np.random.rand(len(pi)), index=pi) + res = s['2010'] + exp = s[0:12] + tm.assert_series_equal(res, exp) + res = s['2011'] + exp = s[12:24] + tm.assert_series_equal(res, exp) + + def test_range_slice_day(self): + # GH 6716 + didx = DatetimeIndex(start='2013/01/01', freq='D', periods=400) + pidx = PeriodIndex(start='2013/01/01', freq='D', periods=400) + + # changed to TypeError in 1.12 + # https://github.com/numpy/numpy/pull/6271 + exc = IndexError if _np_version_under1p12 else TypeError + + for idx in [didx, pidx]: + # slices against index should raise IndexError + values = ['2014', '2013/02', '2013/01/02', '2013/02/01 9H', + '2013/02/01 09:00'] + for v in values: + with tm.assertRaises(exc): + idx[v:] + + s = Series(np.random.rand(len(idx)), index=idx) + + tm.assert_series_equal(s['2013/01/02':], s[1:]) + tm.assert_series_equal(s['2013/01/02':'2013/01/05'], s[1:5]) + tm.assert_series_equal(s['2013/02':], s[31:]) + tm.assert_series_equal(s['2014':], s[365:]) + + invalid = ['2013/02/01 9H', '2013/02/01 09:00'] + for v in invalid: + with tm.assertRaises(exc): + idx[v:] + + def test_range_slice_seconds(self): + # GH 6716 + didx = DatetimeIndex(start='2013/01/01 09:00:00', freq='S', + periods=4000) + pidx = PeriodIndex(start='2013/01/01 09:00:00', freq='S', periods=4000) + + # changed to TypeError in 1.12 + # https://github.com/numpy/numpy/pull/6271 + exc = IndexError if _np_version_under1p12 else TypeError + + for idx in [didx, pidx]: + # slices against index should raise IndexError + values = ['2014', '2013/02', '2013/01/02', '2013/02/01 9H', + '2013/02/01 09:00'] + for v in values: + with tm.assertRaises(exc): + idx[v:] + + s = Series(np.random.rand(len(idx)), index=idx) + + tm.assert_series_equal(s['2013/01/01 09:05':'2013/01/01 09:10'], + s[300:660]) + tm.assert_series_equal(s['2013/01/01 10:00':'2013/01/01 10:05'], + s[3600:3960]) + tm.assert_series_equal(s['2013/01/01 10H':], s[3600:]) + tm.assert_series_equal(s[:'2013/01/01 09:30'], s[:1860]) + for d in ['2013/01/01', '2013/01', '2013']: + tm.assert_series_equal(s[d:], s) + + def test_range_slice_outofbounds(self): + # GH 5407 + didx = DatetimeIndex(start='2013/10/01', freq='D', periods=10) + pidx = PeriodIndex(start='2013/10/01', freq='D', periods=10) + + for idx in [didx, pidx]: + df = DataFrame(dict(units=[100 + i for i in range(10)]), index=idx) + empty = DataFrame(index=idx.__class__([], freq='D'), + columns=['units']) + empty['units'] = empty['units'].astype('int64') + + tm.assert_frame_equal(df['2013/09/01':'2013/09/30'], empty) + tm.assert_frame_equal(df['2013/09/30':'2013/10/02'], df.iloc[:2]) + tm.assert_frame_equal(df['2013/10/01':'2013/10/02'], df.iloc[:2]) + tm.assert_frame_equal(df['2013/10/02':'2013/09/30'], empty) + tm.assert_frame_equal(df['2013/10/15':'2013/10/17'], empty) + tm.assert_frame_equal(df['2013-06':'2013-09'], empty) + tm.assert_frame_equal(df['2013-11':'2013-12'], empty) diff --git a/pandas/tests/indexes/period/test_period.py b/pandas/tests/indexes/period/test_period.py index 33653c92da719..6a8128bb8985f 100644 --- a/pandas/tests/indexes/period/test_period.py +++ b/pandas/tests/indexes/period/test_period.py @@ -1,10 +1,12 @@ import numpy as np +from numpy.random import randn from datetime import timedelta import pandas as pd from pandas.util import testing as tm from pandas import (PeriodIndex, period_range, notnull, DatetimeIndex, NaT, - Index, Period, Int64Index) + Index, Period, Int64Index, Series, DataFrame, date_range, + offsets) from ..datetimelike import DatetimeLike @@ -20,26 +22,6 @@ def setUp(self): def create_index(self): return period_range('20130101', periods=5, freq='D') - def test_construction_base_constructor(self): - # GH 13664 - arr = [pd.Period('2011-01', freq='M'), pd.NaT, - pd.Period('2011-03', freq='M')] - tm.assert_index_equal(pd.Index(arr), pd.PeriodIndex(arr)) - tm.assert_index_equal(pd.Index(np.array(arr)), - pd.PeriodIndex(np.array(arr))) - - arr = [np.nan, pd.NaT, pd.Period('2011-03', freq='M')] - tm.assert_index_equal(pd.Index(arr), pd.PeriodIndex(arr)) - tm.assert_index_equal(pd.Index(np.array(arr)), - pd.PeriodIndex(np.array(arr))) - - arr = [pd.Period('2011-01', freq='M'), pd.NaT, - pd.Period('2011-03', freq='D')] - tm.assert_index_equal(pd.Index(arr), pd.Index(arr, dtype=object)) - - tm.assert_index_equal(pd.Index(np.array(arr)), - pd.Index(np.array(arr), dtype=object)) - def test_astype(self): # GH 13149, GH 13209 idx = PeriodIndex(['2016-05-16', 'NaT', NaT, np.NaN], freq='D') @@ -68,16 +50,6 @@ def test_astype_raises(self): self.assertRaises(ValueError, idx.astype, 'timedelta64') self.assertRaises(ValueError, idx.astype, 'timedelta64[ns]') - def test_shift(self): - - # test shift for PeriodIndex - # GH8083 - drange = self.create_index() - result = drange.shift(1) - expected = PeriodIndex(['2013-01-02', '2013-01-03', '2013-01-04', - '2013-01-05', '2013-01-06'], freq='D') - self.assert_index_equal(result, expected) - def test_pickle_compat_construction(self): pass @@ -231,3 +203,552 @@ def test_difference_freq(self): expected = PeriodIndex(["20160920", "20160921"], freq='D') tm.assert_index_equal(idx_diff, expected) tm.assert_attr_equal('freq', idx_diff, expected) + + def test_hash_error(self): + index = period_range('20010101', periods=10) + with tm.assertRaisesRegexp(TypeError, "unhashable type: %r" % + type(index).__name__): + hash(index) + + def test_make_time_series(self): + index = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009') + series = Series(1, index=index) + tm.assertIsInstance(series, Series) + + def test_shallow_copy_empty(self): + + # GH13067 + idx = PeriodIndex([], freq='M') + result = idx._shallow_copy() + expected = idx + + tm.assert_index_equal(result, expected) + + def test_dtype_str(self): + pi = pd.PeriodIndex([], freq='M') + self.assertEqual(pi.dtype_str, 'period[M]') + self.assertEqual(pi.dtype_str, str(pi.dtype)) + + pi = pd.PeriodIndex([], freq='3M') + self.assertEqual(pi.dtype_str, 'period[3M]') + self.assertEqual(pi.dtype_str, str(pi.dtype)) + + def test_view_asi8(self): + idx = pd.PeriodIndex([], freq='M') + + exp = np.array([], dtype=np.int64) + tm.assert_numpy_array_equal(idx.view('i8'), exp) + tm.assert_numpy_array_equal(idx.asi8, exp) + + idx = pd.PeriodIndex(['2011-01', pd.NaT], freq='M') + + exp = np.array([492, -9223372036854775808], dtype=np.int64) + tm.assert_numpy_array_equal(idx.view('i8'), exp) + tm.assert_numpy_array_equal(idx.asi8, exp) + + exp = np.array([14975, -9223372036854775808], dtype=np.int64) + idx = pd.PeriodIndex(['2011-01-01', pd.NaT], freq='D') + tm.assert_numpy_array_equal(idx.view('i8'), exp) + tm.assert_numpy_array_equal(idx.asi8, exp) + + def test_values(self): + idx = pd.PeriodIndex([], freq='M') + + exp = np.array([], dtype=np.object) + tm.assert_numpy_array_equal(idx.values, exp) + tm.assert_numpy_array_equal(idx.get_values(), exp) + exp = np.array([], dtype=np.int64) + tm.assert_numpy_array_equal(idx._values, exp) + + idx = pd.PeriodIndex(['2011-01', pd.NaT], freq='M') + + exp = np.array([pd.Period('2011-01', freq='M'), pd.NaT], dtype=object) + tm.assert_numpy_array_equal(idx.values, exp) + tm.assert_numpy_array_equal(idx.get_values(), exp) + exp = np.array([492, -9223372036854775808], dtype=np.int64) + tm.assert_numpy_array_equal(idx._values, exp) + + idx = pd.PeriodIndex(['2011-01-01', pd.NaT], freq='D') + + exp = np.array([pd.Period('2011-01-01', freq='D'), pd.NaT], + dtype=object) + tm.assert_numpy_array_equal(idx.values, exp) + tm.assert_numpy_array_equal(idx.get_values(), exp) + exp = np.array([14975, -9223372036854775808], dtype=np.int64) + tm.assert_numpy_array_equal(idx._values, exp) + + def test_period_index_length(self): + pi = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009') + self.assertEqual(len(pi), 9) + + pi = PeriodIndex(freq='Q', start='1/1/2001', end='12/1/2009') + self.assertEqual(len(pi), 4 * 9) + + pi = PeriodIndex(freq='M', start='1/1/2001', end='12/1/2009') + self.assertEqual(len(pi), 12 * 9) + + start = Period('02-Apr-2005', 'B') + i1 = PeriodIndex(start=start, periods=20) + self.assertEqual(len(i1), 20) + self.assertEqual(i1.freq, start.freq) + self.assertEqual(i1[0], start) + + end_intv = Period('2006-12-31', 'W') + i1 = PeriodIndex(end=end_intv, periods=10) + self.assertEqual(len(i1), 10) + self.assertEqual(i1.freq, end_intv.freq) + self.assertEqual(i1[-1], end_intv) + + end_intv = Period('2006-12-31', '1w') + i2 = PeriodIndex(end=end_intv, periods=10) + self.assertEqual(len(i1), len(i2)) + self.assertTrue((i1 == i2).all()) + self.assertEqual(i1.freq, i2.freq) + + end_intv = Period('2006-12-31', ('w', 1)) + i2 = PeriodIndex(end=end_intv, periods=10) + self.assertEqual(len(i1), len(i2)) + self.assertTrue((i1 == i2).all()) + self.assertEqual(i1.freq, i2.freq) + + try: + PeriodIndex(start=start, end=end_intv) + raise AssertionError('Cannot allow mixed freq for start and end') + except ValueError: + pass + + end_intv = Period('2005-05-01', 'B') + i1 = PeriodIndex(start=start, end=end_intv) + + try: + PeriodIndex(start=start) + raise AssertionError( + 'Must specify periods if missing start or end') + except ValueError: + pass + + # infer freq from first element + i2 = PeriodIndex([end_intv, Period('2005-05-05', 'B')]) + self.assertEqual(len(i2), 2) + self.assertEqual(i2[0], end_intv) + + i2 = PeriodIndex(np.array([end_intv, Period('2005-05-05', 'B')])) + self.assertEqual(len(i2), 2) + self.assertEqual(i2[0], end_intv) + + # Mixed freq should fail + vals = [end_intv, Period('2006-12-31', 'w')] + self.assertRaises(ValueError, PeriodIndex, vals) + vals = np.array(vals) + self.assertRaises(ValueError, PeriodIndex, vals) + + def test_fields(self): + # year, month, day, hour, minute + # second, weekofyear, week, dayofweek, weekday, dayofyear, quarter + # qyear + pi = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2005') + self._check_all_fields(pi) + + pi = PeriodIndex(freq='Q', start='1/1/2001', end='12/1/2002') + self._check_all_fields(pi) + + pi = PeriodIndex(freq='M', start='1/1/2001', end='1/1/2002') + self._check_all_fields(pi) + + pi = PeriodIndex(freq='D', start='12/1/2001', end='6/1/2001') + self._check_all_fields(pi) + + pi = PeriodIndex(freq='B', start='12/1/2001', end='6/1/2001') + self._check_all_fields(pi) + + pi = PeriodIndex(freq='H', start='12/31/2001', end='1/1/2002 23:00') + self._check_all_fields(pi) + + pi = PeriodIndex(freq='Min', start='12/31/2001', end='1/1/2002 00:20') + self._check_all_fields(pi) + + pi = PeriodIndex(freq='S', start='12/31/2001 00:00:00', + end='12/31/2001 00:05:00') + self._check_all_fields(pi) + + end_intv = Period('2006-12-31', 'W') + i1 = PeriodIndex(end=end_intv, periods=10) + self._check_all_fields(i1) + + def _check_all_fields(self, periodindex): + fields = ['year', 'month', 'day', 'hour', 'minute', 'second', + 'weekofyear', 'week', 'dayofweek', 'weekday', 'dayofyear', + 'quarter', 'qyear', 'days_in_month', 'is_leap_year'] + + periods = list(periodindex) + s = pd.Series(periodindex) + + for field in fields: + field_idx = getattr(periodindex, field) + self.assertEqual(len(periodindex), len(field_idx)) + for x, val in zip(periods, field_idx): + self.assertEqual(getattr(x, field), val) + + if len(s) == 0: + continue + + field_s = getattr(s.dt, field) + self.assertEqual(len(periodindex), len(field_s)) + for x, val in zip(periods, field_s): + self.assertEqual(getattr(x, field), val) + + def test_indexing(self): + + # GH 4390, iat incorrectly indexing + index = period_range('1/1/2001', periods=10) + s = Series(randn(10), index=index) + expected = s[index[0]] + result = s.iat[0] + self.assertEqual(expected, result) + + def test_period_set_index_reindex(self): + # GH 6631 + df = DataFrame(np.random.random(6)) + idx1 = period_range('2011/01/01', periods=6, freq='M') + idx2 = period_range('2013', periods=6, freq='A') + + df = df.set_index(idx1) + tm.assert_index_equal(df.index, idx1) + df = df.set_index(idx2) + tm.assert_index_equal(df.index, idx2) + + def test_factorize(self): + idx1 = PeriodIndex(['2014-01', '2014-01', '2014-02', '2014-02', + '2014-03', '2014-03'], freq='M') + + exp_arr = np.array([0, 0, 1, 1, 2, 2], dtype=np.intp) + exp_idx = PeriodIndex(['2014-01', '2014-02', '2014-03'], freq='M') + + arr, idx = idx1.factorize() + self.assert_numpy_array_equal(arr, exp_arr) + tm.assert_index_equal(idx, exp_idx) + + arr, idx = idx1.factorize(sort=True) + self.assert_numpy_array_equal(arr, exp_arr) + tm.assert_index_equal(idx, exp_idx) + + idx2 = pd.PeriodIndex(['2014-03', '2014-03', '2014-02', '2014-01', + '2014-03', '2014-01'], freq='M') + + exp_arr = np.array([2, 2, 1, 0, 2, 0], dtype=np.intp) + arr, idx = idx2.factorize(sort=True) + self.assert_numpy_array_equal(arr, exp_arr) + tm.assert_index_equal(idx, exp_idx) + + exp_arr = np.array([0, 0, 1, 2, 0, 2], dtype=np.intp) + exp_idx = PeriodIndex(['2014-03', '2014-02', '2014-01'], freq='M') + arr, idx = idx2.factorize() + self.assert_numpy_array_equal(arr, exp_arr) + tm.assert_index_equal(idx, exp_idx) + + def test_asobject_like(self): + idx = pd.PeriodIndex([], freq='M') + + exp = np.array([], dtype=object) + tm.assert_numpy_array_equal(idx.asobject.values, exp) + tm.assert_numpy_array_equal(idx._mpl_repr(), exp) + + idx = pd.PeriodIndex(['2011-01', pd.NaT], freq='M') + + exp = np.array([pd.Period('2011-01', freq='M'), pd.NaT], dtype=object) + tm.assert_numpy_array_equal(idx.asobject.values, exp) + tm.assert_numpy_array_equal(idx._mpl_repr(), exp) + + exp = np.array([pd.Period('2011-01-01', freq='D'), pd.NaT], + dtype=object) + idx = pd.PeriodIndex(['2011-01-01', pd.NaT], freq='D') + tm.assert_numpy_array_equal(idx.asobject.values, exp) + tm.assert_numpy_array_equal(idx._mpl_repr(), exp) + + def test_is_(self): + create_index = lambda: PeriodIndex(freq='A', start='1/1/2001', + end='12/1/2009') + index = create_index() + self.assertEqual(index.is_(index), True) + self.assertEqual(index.is_(create_index()), False) + self.assertEqual(index.is_(index.view()), True) + self.assertEqual( + index.is_(index.view().view().view().view().view()), True) + self.assertEqual(index.view().is_(index), True) + ind2 = index.view() + index.name = "Apple" + self.assertEqual(ind2.is_(index), True) + self.assertEqual(index.is_(index[:]), False) + self.assertEqual(index.is_(index.asfreq('M')), False) + self.assertEqual(index.is_(index.asfreq('A')), False) + self.assertEqual(index.is_(index - 2), False) + self.assertEqual(index.is_(index - 0), False) + + def test_comp_period(self): + idx = period_range('2007-01', periods=20, freq='M') + + result = idx < idx[10] + exp = idx.values < idx.values[10] + self.assert_numpy_array_equal(result, exp) + + def test_contains(self): + rng = period_range('2007-01', freq='M', periods=10) + + self.assertTrue(Period('2007-01', freq='M') in rng) + self.assertFalse(Period('2007-01', freq='D') in rng) + self.assertFalse(Period('2007-01', freq='2M') in rng) + + def test_contains_nat(self): + # GH13582 + idx = period_range('2007-01', freq='M', periods=10) + self.assertFalse(pd.NaT in idx) + self.assertFalse(None in idx) + self.assertFalse(float('nan') in idx) + self.assertFalse(np.nan in idx) + + idx = pd.PeriodIndex(['2011-01', 'NaT', '2011-02'], freq='M') + self.assertTrue(pd.NaT in idx) + self.assertTrue(None in idx) + self.assertTrue(float('nan') in idx) + self.assertTrue(np.nan in idx) + + def test_periods_number_check(self): + with tm.assertRaises(ValueError): + period_range('2011-1-1', '2012-1-1', 'B') + + def test_start_time(self): + index = PeriodIndex(freq='M', start='2016-01-01', end='2016-05-31') + expected_index = date_range('2016-01-01', end='2016-05-31', freq='MS') + tm.assert_index_equal(index.start_time, expected_index) + + def test_end_time(self): + index = PeriodIndex(freq='M', start='2016-01-01', end='2016-05-31') + expected_index = date_range('2016-01-01', end='2016-05-31', freq='M') + tm.assert_index_equal(index.end_time, expected_index) + + def test_index_duplicate_periods(self): + # monotonic + idx = PeriodIndex([2000, 2007, 2007, 2009, 2009], freq='A-JUN') + ts = Series(np.random.randn(len(idx)), index=idx) + + result = ts[2007] + expected = ts[1:3] + tm.assert_series_equal(result, expected) + result[:] = 1 + self.assertTrue((ts[1:3] == 1).all()) + + # not monotonic + idx = PeriodIndex([2000, 2007, 2007, 2009, 2007], freq='A-JUN') + ts = Series(np.random.randn(len(idx)), index=idx) + + result = ts[2007] + expected = ts[idx == 2007] + tm.assert_series_equal(result, expected) + + def test_index_unique(self): + idx = PeriodIndex([2000, 2007, 2007, 2009, 2009], freq='A-JUN') + expected = PeriodIndex([2000, 2007, 2009], freq='A-JUN') + self.assert_index_equal(idx.unique(), expected) + self.assertEqual(idx.nunique(), 3) + + idx = PeriodIndex([2000, 2007, 2007, 2009, 2007], freq='A-JUN', + tz='US/Eastern') + expected = PeriodIndex([2000, 2007, 2009], freq='A-JUN', + tz='US/Eastern') + self.assert_index_equal(idx.unique(), expected) + self.assertEqual(idx.nunique(), 3) + + def test_shift_gh8083(self): + + # test shift for PeriodIndex + # GH8083 + drange = self.create_index() + result = drange.shift(1) + expected = PeriodIndex(['2013-01-02', '2013-01-03', '2013-01-04', + '2013-01-05', '2013-01-06'], freq='D') + self.assert_index_equal(result, expected) + + def test_shift(self): + pi1 = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009') + pi2 = PeriodIndex(freq='A', start='1/1/2002', end='12/1/2010') + + tm.assert_index_equal(pi1.shift(0), pi1) + + self.assertEqual(len(pi1), len(pi2)) + self.assert_index_equal(pi1.shift(1), pi2) + + pi1 = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009') + pi2 = PeriodIndex(freq='A', start='1/1/2000', end='12/1/2008') + self.assertEqual(len(pi1), len(pi2)) + self.assert_index_equal(pi1.shift(-1), pi2) + + pi1 = PeriodIndex(freq='M', start='1/1/2001', end='12/1/2009') + pi2 = PeriodIndex(freq='M', start='2/1/2001', end='1/1/2010') + self.assertEqual(len(pi1), len(pi2)) + self.assert_index_equal(pi1.shift(1), pi2) + + pi1 = PeriodIndex(freq='M', start='1/1/2001', end='12/1/2009') + pi2 = PeriodIndex(freq='M', start='12/1/2000', end='11/1/2009') + self.assertEqual(len(pi1), len(pi2)) + self.assert_index_equal(pi1.shift(-1), pi2) + + pi1 = PeriodIndex(freq='D', start='1/1/2001', end='12/1/2009') + pi2 = PeriodIndex(freq='D', start='1/2/2001', end='12/2/2009') + self.assertEqual(len(pi1), len(pi2)) + self.assert_index_equal(pi1.shift(1), pi2) + + pi1 = PeriodIndex(freq='D', start='1/1/2001', end='12/1/2009') + pi2 = PeriodIndex(freq='D', start='12/31/2000', end='11/30/2009') + self.assertEqual(len(pi1), len(pi2)) + self.assert_index_equal(pi1.shift(-1), pi2) + + def test_shift_nat(self): + idx = PeriodIndex(['2011-01', '2011-02', 'NaT', + '2011-04'], freq='M', name='idx') + result = idx.shift(1) + expected = PeriodIndex(['2011-02', '2011-03', 'NaT', + '2011-05'], freq='M', name='idx') + tm.assert_index_equal(result, expected) + self.assertEqual(result.name, expected.name) + + def test_shift_ndarray(self): + idx = PeriodIndex(['2011-01', '2011-02', 'NaT', + '2011-04'], freq='M', name='idx') + result = idx.shift(np.array([1, 2, 3, 4])) + expected = PeriodIndex(['2011-02', '2011-04', 'NaT', + '2011-08'], freq='M', name='idx') + tm.assert_index_equal(result, expected) + + idx = PeriodIndex(['2011-01', '2011-02', 'NaT', + '2011-04'], freq='M', name='idx') + result = idx.shift(np.array([1, -2, 3, -4])) + expected = PeriodIndex(['2011-02', '2010-12', 'NaT', + '2010-12'], freq='M', name='idx') + tm.assert_index_equal(result, expected) + + def test_negative_ordinals(self): + Period(ordinal=-1000, freq='A') + Period(ordinal=0, freq='A') + + idx1 = PeriodIndex(ordinal=[-1, 0, 1], freq='A') + idx2 = PeriodIndex(ordinal=np.array([-1, 0, 1]), freq='A') + tm.assert_index_equal(idx1, idx2) + + def test_pindex_fieldaccessor_nat(self): + idx = PeriodIndex(['2011-01', '2011-02', 'NaT', + '2012-03', '2012-04'], freq='D') + + exp = np.array([2011, 2011, -1, 2012, 2012], dtype=np.int64) + self.assert_numpy_array_equal(idx.year, exp) + exp = np.array([1, 2, -1, 3, 4], dtype=np.int64) + self.assert_numpy_array_equal(idx.month, exp) + + def test_pindex_qaccess(self): + pi = PeriodIndex(['2Q05', '3Q05', '4Q05', '1Q06', '2Q06'], freq='Q') + s = Series(np.random.rand(len(pi)), index=pi).cumsum() + # Todo: fix these accessors! + self.assertEqual(s['05Q4'], s[2]) + + def test_numpy_repeat(self): + index = period_range('20010101', periods=2) + expected = PeriodIndex([Period('2001-01-01'), Period('2001-01-01'), + Period('2001-01-02'), Period('2001-01-02')]) + + tm.assert_index_equal(np.repeat(index, 2), expected) + + msg = "the 'axis' parameter is not supported" + tm.assertRaisesRegexp(ValueError, msg, np.repeat, index, 2, axis=1) + + def test_pindex_multiples(self): + pi = PeriodIndex(start='1/1/11', end='12/31/11', freq='2M') + expected = PeriodIndex(['2011-01', '2011-03', '2011-05', '2011-07', + '2011-09', '2011-11'], freq='2M') + tm.assert_index_equal(pi, expected) + self.assertEqual(pi.freq, offsets.MonthEnd(2)) + self.assertEqual(pi.freqstr, '2M') + + pi = period_range(start='1/1/11', end='12/31/11', freq='2M') + tm.assert_index_equal(pi, expected) + self.assertEqual(pi.freq, offsets.MonthEnd(2)) + self.assertEqual(pi.freqstr, '2M') + + pi = period_range(start='1/1/11', periods=6, freq='2M') + tm.assert_index_equal(pi, expected) + self.assertEqual(pi.freq, offsets.MonthEnd(2)) + self.assertEqual(pi.freqstr, '2M') + + def test_iteration(self): + index = PeriodIndex(start='1/1/10', periods=4, freq='B') + + result = list(index) + tm.assertIsInstance(result[0], Period) + self.assertEqual(result[0].freq, index.freq) + + def test_is_full(self): + index = PeriodIndex([2005, 2007, 2009], freq='A') + self.assertFalse(index.is_full) + + index = PeriodIndex([2005, 2006, 2007], freq='A') + self.assertTrue(index.is_full) + + index = PeriodIndex([2005, 2005, 2007], freq='A') + self.assertFalse(index.is_full) + + index = PeriodIndex([2005, 2005, 2006], freq='A') + self.assertTrue(index.is_full) + + index = PeriodIndex([2006, 2005, 2005], freq='A') + self.assertRaises(ValueError, getattr, index, 'is_full') + + self.assertTrue(index[:0].is_full) + + def test_with_multi_index(self): + # #1705 + index = date_range('1/1/2012', periods=4, freq='12H') + index_as_arrays = [index.to_period(freq='D'), index.hour] + + s = Series([0, 1, 2, 3], index_as_arrays) + + tm.assertIsInstance(s.index.levels[0], PeriodIndex) + + tm.assertIsInstance(s.index.values[0][0], Period) + + def test_convert_array_of_periods(self): + rng = period_range('1/1/2000', periods=20, freq='D') + periods = list(rng) + + result = pd.Index(periods) + tm.assertIsInstance(result, PeriodIndex) + + def test_append_concat(self): + # #1815 + d1 = date_range('12/31/1990', '12/31/1999', freq='A-DEC') + d2 = date_range('12/31/2000', '12/31/2009', freq='A-DEC') + + s1 = Series(np.random.randn(10), d1) + s2 = Series(np.random.randn(10), d2) + + s1 = s1.to_period() + s2 = s2.to_period() + + # drops index + result = pd.concat([s1, s2]) + tm.assertIsInstance(result.index, PeriodIndex) + self.assertEqual(result.index[0], s1.index[0]) + + def test_pickle_freq(self): + # GH2891 + prng = period_range('1/1/2011', '1/1/2012', freq='M') + new_prng = self.round_trip_pickle(prng) + self.assertEqual(new_prng.freq, offsets.MonthEnd()) + self.assertEqual(new_prng.freqstr, 'M') + + def test_map(self): + index = PeriodIndex([2005, 2007, 2009], freq='A') + result = index.map(lambda x: x + 1) + expected = index + 1 + tm.assert_index_equal(result, expected) + + result = index.map(lambda x: x.ordinal) + exp = Index([x.ordinal for x in index]) + tm.assert_index_equal(result, exp) diff --git a/pandas/tests/indexes/period/test_setops.py b/pandas/tests/indexes/period/test_setops.py new file mode 100644 index 0000000000000..06e15f9175ed8 --- /dev/null +++ b/pandas/tests/indexes/period/test_setops.py @@ -0,0 +1,157 @@ +import numpy as np + +import pandas as pd +import pandas.util.testing as tm +import pandas.tseries.period as period +from pandas import period_range, PeriodIndex, Index, date_range + + +def _permute(obj): + return obj.take(np.random.permutation(len(obj))) + + +class TestPeriodIndex(tm.TestCase): + + def setUp(self): + pass + + def test_joins(self): + index = period_range('1/1/2000', '1/20/2000', freq='D') + + for kind in ['inner', 'outer', 'left', 'right']: + joined = index.join(index[:-5], how=kind) + + tm.assertIsInstance(joined, PeriodIndex) + self.assertEqual(joined.freq, index.freq) + + def test_join_self(self): + index = period_range('1/1/2000', '1/20/2000', freq='D') + + for kind in ['inner', 'outer', 'left', 'right']: + res = index.join(index, how=kind) + self.assertIs(index, res) + + def test_join_does_not_recur(self): + df = tm.makeCustomDataframe( + 3, 2, data_gen_f=lambda *args: np.random.randint(2), + c_idx_type='p', r_idx_type='dt') + s = df.iloc[:2, 0] + + res = s.index.join(df.columns, how='outer') + expected = Index([s.index[0], s.index[1], + df.columns[0], df.columns[1]], object) + tm.assert_index_equal(res, expected) + + def test_union(self): + index = period_range('1/1/2000', '1/20/2000', freq='D') + + result = index[:-5].union(index[10:]) + tm.assert_index_equal(result, index) + + # not in order + result = _permute(index[:-5]).union(_permute(index[10:])) + tm.assert_index_equal(result, index) + + # raise if different frequencies + index = period_range('1/1/2000', '1/20/2000', freq='D') + index2 = period_range('1/1/2000', '1/20/2000', freq='W-WED') + with tm.assertRaises(period.IncompatibleFrequency): + index.union(index2) + + msg = 'can only call with other PeriodIndex-ed objects' + with tm.assertRaisesRegexp(ValueError, msg): + index.join(index.to_timestamp()) + + index3 = period_range('1/1/2000', '1/20/2000', freq='2D') + with tm.assertRaises(period.IncompatibleFrequency): + index.join(index3) + + def test_union_dataframe_index(self): + rng1 = pd.period_range('1/1/1999', '1/1/2012', freq='M') + s1 = pd.Series(np.random.randn(len(rng1)), rng1) + + rng2 = pd.period_range('1/1/1980', '12/1/2001', freq='M') + s2 = pd.Series(np.random.randn(len(rng2)), rng2) + df = pd.DataFrame({'s1': s1, 's2': s2}) + + exp = pd.period_range('1/1/1980', '1/1/2012', freq='M') + self.assert_index_equal(df.index, exp) + + def test_intersection(self): + index = period_range('1/1/2000', '1/20/2000', freq='D') + + result = index[:-5].intersection(index[10:]) + tm.assert_index_equal(result, index[10:-5]) + + # not in order + left = _permute(index[:-5]) + right = _permute(index[10:]) + result = left.intersection(right).sort_values() + tm.assert_index_equal(result, index[10:-5]) + + # raise if different frequencies + index = period_range('1/1/2000', '1/20/2000', freq='D') + index2 = period_range('1/1/2000', '1/20/2000', freq='W-WED') + with tm.assertRaises(period.IncompatibleFrequency): + index.intersection(index2) + + index3 = period_range('1/1/2000', '1/20/2000', freq='2D') + with tm.assertRaises(period.IncompatibleFrequency): + index.intersection(index3) + + def test_intersection_cases(self): + base = period_range('6/1/2000', '6/30/2000', freq='D', name='idx') + + # if target has the same name, it is preserved + rng2 = period_range('5/15/2000', '6/20/2000', freq='D', name='idx') + expected2 = period_range('6/1/2000', '6/20/2000', freq='D', + name='idx') + + # if target name is different, it will be reset + rng3 = period_range('5/15/2000', '6/20/2000', freq='D', name='other') + expected3 = period_range('6/1/2000', '6/20/2000', freq='D', + name=None) + + rng4 = period_range('7/1/2000', '7/31/2000', freq='D', name='idx') + expected4 = PeriodIndex([], name='idx', freq='D') + + for (rng, expected) in [(rng2, expected2), (rng3, expected3), + (rng4, expected4)]: + result = base.intersection(rng) + tm.assert_index_equal(result, expected) + self.assertEqual(result.name, expected.name) + self.assertEqual(result.freq, expected.freq) + + # non-monotonic + base = PeriodIndex(['2011-01-05', '2011-01-04', '2011-01-02', + '2011-01-03'], freq='D', name='idx') + + rng2 = PeriodIndex(['2011-01-04', '2011-01-02', + '2011-02-02', '2011-02-03'], + freq='D', name='idx') + expected2 = PeriodIndex(['2011-01-04', '2011-01-02'], freq='D', + name='idx') + + rng3 = PeriodIndex(['2011-01-04', '2011-01-02', '2011-02-02', + '2011-02-03'], + freq='D', name='other') + expected3 = PeriodIndex(['2011-01-04', '2011-01-02'], freq='D', + name=None) + + rng4 = period_range('7/1/2000', '7/31/2000', freq='D', name='idx') + expected4 = PeriodIndex([], freq='D', name='idx') + + for (rng, expected) in [(rng2, expected2), (rng3, expected3), + (rng4, expected4)]: + result = base.intersection(rng) + tm.assert_index_equal(result, expected) + self.assertEqual(result.name, expected.name) + self.assertEqual(result.freq, 'D') + + # empty same freq + rng = date_range('6/1/2000', '6/15/2000', freq='T') + result = rng[0:0].intersection(rng) + self.assertEqual(len(result), 0) + + result = rng.intersection(rng[0:0]) + self.assertEqual(len(result), 0) diff --git a/pandas/tests/indexes/period/test_tools.py b/pandas/tests/indexes/period/test_tools.py new file mode 100644 index 0000000000000..e09d405afd375 --- /dev/null +++ b/pandas/tests/indexes/period/test_tools.py @@ -0,0 +1,449 @@ +import numpy as np +from datetime import datetime, timedelta + +import pandas as pd +import pandas.util.testing as tm +import pandas.tseries.period as period +from pandas.compat import lrange +from pandas.tseries.frequencies import get_freq, MONTHS +from pandas._period import period_ordinal, period_asfreq +from pandas import (PeriodIndex, Period, DatetimeIndex, Timestamp, Series, + date_range, to_datetime, period_range) + + +class TestPeriodRepresentation(tm.TestCase): + """ + Wish to match NumPy units + """ + + def _check_freq(self, freq, base_date): + rng = PeriodIndex(start=base_date, periods=10, freq=freq) + exp = np.arange(10, dtype=np.int64) + self.assert_numpy_array_equal(rng._values, exp) + self.assert_numpy_array_equal(rng.asi8, exp) + + def test_annual(self): + self._check_freq('A', 1970) + + def test_monthly(self): + self._check_freq('M', '1970-01') + + def test_weekly(self): + self._check_freq('W-THU', '1970-01-01') + + def test_daily(self): + self._check_freq('D', '1970-01-01') + + def test_business_daily(self): + self._check_freq('B', '1970-01-01') + + def test_hourly(self): + self._check_freq('H', '1970-01-01') + + def test_minutely(self): + self._check_freq('T', '1970-01-01') + + def test_secondly(self): + self._check_freq('S', '1970-01-01') + + def test_millisecondly(self): + self._check_freq('L', '1970-01-01') + + def test_microsecondly(self): + self._check_freq('U', '1970-01-01') + + def test_nanosecondly(self): + self._check_freq('N', '1970-01-01') + + def test_negone_ordinals(self): + freqs = ['A', 'M', 'Q', 'D', 'H', 'T', 'S'] + + period = Period(ordinal=-1, freq='D') + for freq in freqs: + repr(period.asfreq(freq)) + + for freq in freqs: + period = Period(ordinal=-1, freq=freq) + repr(period) + self.assertEqual(period.year, 1969) + + period = Period(ordinal=-1, freq='B') + repr(period) + period = Period(ordinal=-1, freq='W') + repr(period) + + +class TestTslib(tm.TestCase): + def test_intraday_conversion_factors(self): + self.assertEqual(period_asfreq( + 1, get_freq('D'), get_freq('H'), False), 24) + self.assertEqual(period_asfreq( + 1, get_freq('D'), get_freq('T'), False), 1440) + self.assertEqual(period_asfreq( + 1, get_freq('D'), get_freq('S'), False), 86400) + self.assertEqual(period_asfreq(1, get_freq( + 'D'), get_freq('L'), False), 86400000) + self.assertEqual(period_asfreq(1, get_freq( + 'D'), get_freq('U'), False), 86400000000) + self.assertEqual(period_asfreq(1, get_freq( + 'D'), get_freq('N'), False), 86400000000000) + + self.assertEqual(period_asfreq( + 1, get_freq('H'), get_freq('T'), False), 60) + self.assertEqual(period_asfreq( + 1, get_freq('H'), get_freq('S'), False), 3600) + self.assertEqual(period_asfreq(1, get_freq('H'), + get_freq('L'), False), 3600000) + self.assertEqual(period_asfreq(1, get_freq( + 'H'), get_freq('U'), False), 3600000000) + self.assertEqual(period_asfreq(1, get_freq( + 'H'), get_freq('N'), False), 3600000000000) + + self.assertEqual(period_asfreq( + 1, get_freq('T'), get_freq('S'), False), 60) + self.assertEqual(period_asfreq( + 1, get_freq('T'), get_freq('L'), False), 60000) + self.assertEqual(period_asfreq(1, get_freq( + 'T'), get_freq('U'), False), 60000000) + self.assertEqual(period_asfreq(1, get_freq( + 'T'), get_freq('N'), False), 60000000000) + + self.assertEqual(period_asfreq( + 1, get_freq('S'), get_freq('L'), False), 1000) + self.assertEqual(period_asfreq(1, get_freq('S'), + get_freq('U'), False), 1000000) + self.assertEqual(period_asfreq(1, get_freq( + 'S'), get_freq('N'), False), 1000000000) + + self.assertEqual(period_asfreq( + 1, get_freq('L'), get_freq('U'), False), 1000) + self.assertEqual(period_asfreq(1, get_freq('L'), + get_freq('N'), False), 1000000) + + self.assertEqual(period_asfreq( + 1, get_freq('U'), get_freq('N'), False), 1000) + + def test_period_ordinal_start_values(self): + # information for 1.1.1970 + self.assertEqual(0, period_ordinal(1970, 1, 1, 0, 0, 0, 0, 0, + get_freq('A'))) + self.assertEqual(0, period_ordinal(1970, 1, 1, 0, 0, 0, 0, 0, + get_freq('M'))) + self.assertEqual(1, period_ordinal(1970, 1, 1, 0, 0, 0, 0, 0, + get_freq('W'))) + self.assertEqual(0, period_ordinal(1970, 1, 1, 0, 0, 0, 0, 0, + get_freq('D'))) + self.assertEqual(0, period_ordinal(1970, 1, 1, 0, 0, 0, 0, 0, + get_freq('B'))) + + def test_period_ordinal_week(self): + self.assertEqual(1, period_ordinal(1970, 1, 4, 0, 0, 0, 0, 0, + get_freq('W'))) + self.assertEqual(2, period_ordinal(1970, 1, 5, 0, 0, 0, 0, 0, + get_freq('W'))) + + self.assertEqual(2284, period_ordinal(2013, 10, 6, 0, 0, 0, 0, 0, + get_freq('W'))) + self.assertEqual(2285, period_ordinal(2013, 10, 7, 0, 0, 0, 0, 0, + get_freq('W'))) + + def test_period_ordinal_business_day(self): + # Thursday + self.assertEqual(11415, period_ordinal(2013, 10, 3, 0, 0, 0, 0, 0, + get_freq('B'))) + # Friday + self.assertEqual(11416, period_ordinal(2013, 10, 4, 0, 0, 0, 0, 0, + get_freq('B'))) + # Saturday + self.assertEqual(11417, period_ordinal(2013, 10, 5, 0, 0, 0, 0, 0, + get_freq('B'))) + # Sunday + self.assertEqual(11417, period_ordinal(2013, 10, 6, 0, 0, 0, 0, 0, + get_freq('B'))) + # Monday + self.assertEqual(11417, period_ordinal(2013, 10, 7, 0, 0, 0, 0, 0, + get_freq('B'))) + # Tuesday + self.assertEqual(11418, period_ordinal(2013, 10, 8, 0, 0, 0, 0, 0, + get_freq('B'))) + + +class TestPeriodIndex(tm.TestCase): + + def setUp(self): + pass + + def test_tolist(self): + index = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009') + rs = index.tolist() + [tm.assertIsInstance(x, Period) for x in rs] + + recon = PeriodIndex(rs) + tm.assert_index_equal(index, recon) + + def test_to_timestamp(self): + index = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009') + series = Series(1, index=index, name='foo') + + exp_index = date_range('1/1/2001', end='12/31/2009', freq='A-DEC') + result = series.to_timestamp(how='end') + tm.assert_index_equal(result.index, exp_index) + self.assertEqual(result.name, 'foo') + + exp_index = date_range('1/1/2001', end='1/1/2009', freq='AS-JAN') + result = series.to_timestamp(how='start') + tm.assert_index_equal(result.index, exp_index) + + def _get_with_delta(delta, freq='A-DEC'): + return date_range(to_datetime('1/1/2001') + delta, + to_datetime('12/31/2009') + delta, freq=freq) + + delta = timedelta(hours=23) + result = series.to_timestamp('H', 'end') + exp_index = _get_with_delta(delta) + tm.assert_index_equal(result.index, exp_index) + + delta = timedelta(hours=23, minutes=59) + result = series.to_timestamp('T', 'end') + exp_index = _get_with_delta(delta) + tm.assert_index_equal(result.index, exp_index) + + result = series.to_timestamp('S', 'end') + delta = timedelta(hours=23, minutes=59, seconds=59) + exp_index = _get_with_delta(delta) + tm.assert_index_equal(result.index, exp_index) + + index = PeriodIndex(freq='H', start='1/1/2001', end='1/2/2001') + series = Series(1, index=index, name='foo') + + exp_index = date_range('1/1/2001 00:59:59', end='1/2/2001 00:59:59', + freq='H') + result = series.to_timestamp(how='end') + tm.assert_index_equal(result.index, exp_index) + self.assertEqual(result.name, 'foo') + + def test_to_timestamp_quarterly_bug(self): + years = np.arange(1960, 2000).repeat(4) + quarters = np.tile(lrange(1, 5), 40) + + pindex = PeriodIndex(year=years, quarter=quarters) + + stamps = pindex.to_timestamp('D', 'end') + expected = DatetimeIndex([x.to_timestamp('D', 'end') for x in pindex]) + tm.assert_index_equal(stamps, expected) + + def test_to_timestamp_preserve_name(self): + index = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009', + name='foo') + self.assertEqual(index.name, 'foo') + + conv = index.to_timestamp('D') + self.assertEqual(conv.name, 'foo') + + def test_to_timestamp_repr_is_code(self): + zs = [Timestamp('99-04-17 00:00:00', tz='UTC'), + Timestamp('2001-04-17 00:00:00', tz='UTC'), + Timestamp('2001-04-17 00:00:00', tz='America/Los_Angeles'), + Timestamp('2001-04-17 00:00:00', tz=None)] + for z in zs: + self.assertEqual(eval(repr(z)), z) + + def test_to_timestamp_pi_nat(self): + # GH 7228 + index = PeriodIndex(['NaT', '2011-01', '2011-02'], freq='M', + name='idx') + + result = index.to_timestamp('D') + expected = DatetimeIndex([pd.NaT, datetime(2011, 1, 1), + datetime(2011, 2, 1)], name='idx') + tm.assert_index_equal(result, expected) + self.assertEqual(result.name, 'idx') + + result2 = result.to_period(freq='M') + tm.assert_index_equal(result2, index) + self.assertEqual(result2.name, 'idx') + + result3 = result.to_period(freq='3M') + exp = PeriodIndex(['NaT', '2011-01', '2011-02'], freq='3M', name='idx') + self.assert_index_equal(result3, exp) + self.assertEqual(result3.freqstr, '3M') + + msg = ('Frequency must be positive, because it' + ' represents span: -2A') + with tm.assertRaisesRegexp(ValueError, msg): + result.to_period(freq='-2A') + + def test_to_timestamp_pi_mult(self): + idx = PeriodIndex(['2011-01', 'NaT', '2011-02'], freq='2M', name='idx') + result = idx.to_timestamp() + expected = DatetimeIndex( + ['2011-01-01', 'NaT', '2011-02-01'], name='idx') + self.assert_index_equal(result, expected) + result = idx.to_timestamp(how='E') + expected = DatetimeIndex( + ['2011-02-28', 'NaT', '2011-03-31'], name='idx') + self.assert_index_equal(result, expected) + + def test_to_timestamp_pi_combined(self): + idx = PeriodIndex(start='2011', periods=2, freq='1D1H', name='idx') + result = idx.to_timestamp() + expected = DatetimeIndex( + ['2011-01-01 00:00', '2011-01-02 01:00'], name='idx') + self.assert_index_equal(result, expected) + result = idx.to_timestamp(how='E') + expected = DatetimeIndex( + ['2011-01-02 00:59:59', '2011-01-03 01:59:59'], name='idx') + self.assert_index_equal(result, expected) + result = idx.to_timestamp(how='E', freq='H') + expected = DatetimeIndex( + ['2011-01-02 00:00', '2011-01-03 01:00'], name='idx') + self.assert_index_equal(result, expected) + + def test_to_timestamp_to_period_astype(self): + idx = DatetimeIndex([pd.NaT, '2011-01-01', '2011-02-01'], name='idx') + + res = idx.astype('period[M]') + exp = PeriodIndex(['NaT', '2011-01', '2011-02'], freq='M', name='idx') + tm.assert_index_equal(res, exp) + + res = idx.astype('period[3M]') + exp = PeriodIndex(['NaT', '2011-01', '2011-02'], freq='3M', name='idx') + self.assert_index_equal(res, exp) + + def test_dti_to_period(self): + dti = DatetimeIndex(start='1/1/2005', end='12/1/2005', freq='M') + pi1 = dti.to_period() + pi2 = dti.to_period(freq='D') + pi3 = dti.to_period(freq='3D') + + self.assertEqual(pi1[0], Period('Jan 2005', freq='M')) + self.assertEqual(pi2[0], Period('1/31/2005', freq='D')) + self.assertEqual(pi3[0], Period('1/31/2005', freq='3D')) + + self.assertEqual(pi1[-1], Period('Nov 2005', freq='M')) + self.assertEqual(pi2[-1], Period('11/30/2005', freq='D')) + self.assertEqual(pi3[-1], Period('11/30/2005', freq='3D')) + + tm.assert_index_equal(pi1, period_range('1/1/2005', '11/1/2005', + freq='M')) + tm.assert_index_equal(pi2, period_range('1/1/2005', '11/1/2005', + freq='M').asfreq('D')) + tm.assert_index_equal(pi3, period_range('1/1/2005', '11/1/2005', + freq='M').asfreq('3D')) + + def test_period_astype_to_timestamp(self): + pi = pd.PeriodIndex(['2011-01', '2011-02', '2011-03'], freq='M') + + exp = pd.DatetimeIndex(['2011-01-01', '2011-02-01', '2011-03-01']) + tm.assert_index_equal(pi.astype('datetime64[ns]'), exp) + + exp = pd.DatetimeIndex(['2011-01-31', '2011-02-28', '2011-03-31']) + tm.assert_index_equal(pi.astype('datetime64[ns]', how='end'), exp) + + exp = pd.DatetimeIndex(['2011-01-01', '2011-02-01', '2011-03-01'], + tz='US/Eastern') + res = pi.astype('datetime64[ns, US/Eastern]') + tm.assert_index_equal(pi.astype('datetime64[ns, US/Eastern]'), exp) + + exp = pd.DatetimeIndex(['2011-01-31', '2011-02-28', '2011-03-31'], + tz='US/Eastern') + res = pi.astype('datetime64[ns, US/Eastern]', how='end') + tm.assert_index_equal(res, exp) + + def test_to_period_quarterly(self): + # make sure we can make the round trip + for month in MONTHS: + freq = 'Q-%s' % month + rng = period_range('1989Q3', '1991Q3', freq=freq) + stamps = rng.to_timestamp() + result = stamps.to_period(freq) + tm.assert_index_equal(rng, result) + + def test_to_period_quarterlyish(self): + offsets = ['BQ', 'QS', 'BQS'] + for off in offsets: + rng = date_range('01-Jan-2012', periods=8, freq=off) + prng = rng.to_period() + self.assertEqual(prng.freq, 'Q-DEC') + + def test_to_period_annualish(self): + offsets = ['BA', 'AS', 'BAS'] + for off in offsets: + rng = date_range('01-Jan-2012', periods=8, freq=off) + prng = rng.to_period() + self.assertEqual(prng.freq, 'A-DEC') + + def test_to_period_monthish(self): + offsets = ['MS', 'BM'] + for off in offsets: + rng = date_range('01-Jan-2012', periods=8, freq=off) + prng = rng.to_period() + self.assertEqual(prng.freq, 'M') + + rng = date_range('01-Jan-2012', periods=8, freq='M') + prng = rng.to_period() + self.assertEqual(prng.freq, 'M') + + msg = pd.tseries.frequencies._INVALID_FREQ_ERROR + with self.assertRaisesRegexp(ValueError, msg): + date_range('01-Jan-2012', periods=8, freq='EOM') + + def test_period_dt64_round_trip(self): + dti = date_range('1/1/2000', '1/7/2002', freq='B') + pi = dti.to_period() + tm.assert_index_equal(pi.to_timestamp(), dti) + + dti = date_range('1/1/2000', '1/7/2002', freq='B') + pi = dti.to_period(freq='H') + tm.assert_index_equal(pi.to_timestamp(), dti) + + def test_to_timestamp_1703(self): + index = period_range('1/1/2012', periods=4, freq='D') + + result = index.to_timestamp() + self.assertEqual(result[0], Timestamp('1/1/2012')) + + def test_to_datetime_depr(self): + index = period_range('1/1/2012', periods=4, freq='D') + + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + result = index.to_datetime() + self.assertEqual(result[0], Timestamp('1/1/2012')) + + def test_combine_first(self): + # GH 3367 + didx = pd.DatetimeIndex(start='1950-01-31', end='1950-07-31', freq='M') + pidx = pd.PeriodIndex(start=pd.Period('1950-1'), + end=pd.Period('1950-7'), freq='M') + # check to be consistent with DatetimeIndex + for idx in [didx, pidx]: + a = pd.Series([1, np.nan, np.nan, 4, 5, np.nan, 7], index=idx) + b = pd.Series([9, 9, 9, 9, 9, 9, 9], index=idx) + + result = a.combine_first(b) + expected = pd.Series([1, 9, 9, 4, 5, 9, 7], index=idx, + dtype=np.float64) + tm.assert_series_equal(result, expected) + + def test_searchsorted(self): + for freq in ['D', '2D']: + pidx = pd.PeriodIndex(['2014-01-01', '2014-01-02', '2014-01-03', + '2014-01-04', '2014-01-05'], freq=freq) + + p1 = pd.Period('2014-01-01', freq=freq) + self.assertEqual(pidx.searchsorted(p1), 0) + + p2 = pd.Period('2014-01-04', freq=freq) + self.assertEqual(pidx.searchsorted(p2), 3) + + msg = "Input has different freq=H from PeriodIndex" + with self.assertRaisesRegexp(period.IncompatibleFrequency, msg): + pidx.searchsorted(pd.Period('2014-01-01', freq='H')) + + msg = "Input has different freq=5D from PeriodIndex" + with self.assertRaisesRegexp(period.IncompatibleFrequency, msg): + pidx.searchsorted(pd.Period('2014-01-01', freq='5D')) + + with tm.assert_produces_warning(FutureWarning): + pidx.searchsorted(key=p2) diff --git a/pandas/tests/scalar/test_period.py b/pandas/tests/scalar/test_period.py new file mode 100644 index 0000000000000..c94a7c62a6dc9 --- /dev/null +++ b/pandas/tests/scalar/test_period.py @@ -0,0 +1,2074 @@ +import numpy as np +from datetime import datetime, date, timedelta + +import pandas as pd +import pandas.util.testing as tm +import pandas.tseries.period as period +from pandas.compat import text_type, iteritems +from pandas.compat.numpy import np_datetime64_compat +from pandas import Period, Timestamp, tslib, offsets, _period +from pandas.tseries.frequencies import DAYS, MONTHS, _period_code_map + + +class TestPeriodProperties(tm.TestCase): + "Test properties such as year, month, weekday, etc...." + + def test_quarterly_negative_ordinals(self): + p = Period(ordinal=-1, freq='Q-DEC') + self.assertEqual(p.year, 1969) + self.assertEqual(p.quarter, 4) + self.assertIsInstance(p, Period) + + p = Period(ordinal=-2, freq='Q-DEC') + self.assertEqual(p.year, 1969) + self.assertEqual(p.quarter, 3) + self.assertIsInstance(p, Period) + + p = Period(ordinal=-2, freq='M') + self.assertEqual(p.year, 1969) + self.assertEqual(p.month, 11) + self.assertIsInstance(p, Period) + + def test_period_cons_quarterly(self): + # bugs in scikits.timeseries + for month in MONTHS: + freq = 'Q-%s' % month + exp = Period('1989Q3', freq=freq) + self.assertIn('1989Q3', str(exp)) + stamp = exp.to_timestamp('D', how='end') + p = Period(stamp, freq=freq) + self.assertEqual(p, exp) + + stamp = exp.to_timestamp('3D', how='end') + p = Period(stamp, freq=freq) + self.assertEqual(p, exp) + + def test_period_cons_annual(self): + # bugs in scikits.timeseries + for month in MONTHS: + freq = 'A-%s' % month + exp = Period('1989', freq=freq) + stamp = exp.to_timestamp('D', how='end') + timedelta(days=30) + p = Period(stamp, freq=freq) + self.assertEqual(p, exp + 1) + self.assertIsInstance(p, Period) + + def test_period_cons_weekly(self): + for num in range(10, 17): + daystr = '2011-02-%d' % num + for day in DAYS: + freq = 'W-%s' % day + + result = Period(daystr, freq=freq) + expected = Period(daystr, freq='D').asfreq(freq) + self.assertEqual(result, expected) + self.assertIsInstance(result, Period) + + def test_period_from_ordinal(self): + p = pd.Period('2011-01', freq='M') + res = pd.Period._from_ordinal(p.ordinal, freq='M') + self.assertEqual(p, res) + self.assertIsInstance(res, Period) + + def test_period_cons_nat(self): + p = Period('NaT', freq='M') + self.assertIs(p, pd.NaT) + + p = Period('nat', freq='W-SUN') + self.assertIs(p, pd.NaT) + + p = Period(tslib.iNaT, freq='D') + self.assertIs(p, pd.NaT) + + p = Period(tslib.iNaT, freq='3D') + self.assertIs(p, pd.NaT) + + p = Period(tslib.iNaT, freq='1D1H') + self.assertIs(p, pd.NaT) + + p = Period('NaT') + self.assertIs(p, pd.NaT) + + p = Period(tslib.iNaT) + self.assertIs(p, pd.NaT) + + def test_cons_null_like(self): + # check Timestamp compat + self.assertIs(Timestamp('NaT'), pd.NaT) + self.assertIs(Period('NaT'), pd.NaT) + + self.assertIs(Timestamp(None), pd.NaT) + self.assertIs(Period(None), pd.NaT) + + self.assertIs(Timestamp(float('nan')), pd.NaT) + self.assertIs(Period(float('nan')), pd.NaT) + + self.assertIs(Timestamp(np.nan), pd.NaT) + self.assertIs(Period(np.nan), pd.NaT) + + def test_period_cons_mult(self): + p1 = Period('2011-01', freq='3M') + p2 = Period('2011-01', freq='M') + self.assertEqual(p1.ordinal, p2.ordinal) + + self.assertEqual(p1.freq, offsets.MonthEnd(3)) + self.assertEqual(p1.freqstr, '3M') + + self.assertEqual(p2.freq, offsets.MonthEnd()) + self.assertEqual(p2.freqstr, 'M') + + result = p1 + 1 + self.assertEqual(result.ordinal, (p2 + 3).ordinal) + self.assertEqual(result.freq, p1.freq) + self.assertEqual(result.freqstr, '3M') + + result = p1 - 1 + self.assertEqual(result.ordinal, (p2 - 3).ordinal) + self.assertEqual(result.freq, p1.freq) + self.assertEqual(result.freqstr, '3M') + + msg = ('Frequency must be positive, because it' + ' represents span: -3M') + with tm.assertRaisesRegexp(ValueError, msg): + Period('2011-01', freq='-3M') + + msg = ('Frequency must be positive, because it' ' represents span: 0M') + with tm.assertRaisesRegexp(ValueError, msg): + Period('2011-01', freq='0M') + + def test_period_cons_combined(self): + p = [(Period('2011-01', freq='1D1H'), + Period('2011-01', freq='1H1D'), + Period('2011-01', freq='H')), + (Period(ordinal=1, freq='1D1H'), + Period(ordinal=1, freq='1H1D'), + Period(ordinal=1, freq='H'))] + + for p1, p2, p3 in p: + self.assertEqual(p1.ordinal, p3.ordinal) + self.assertEqual(p2.ordinal, p3.ordinal) + + self.assertEqual(p1.freq, offsets.Hour(25)) + self.assertEqual(p1.freqstr, '25H') + + self.assertEqual(p2.freq, offsets.Hour(25)) + self.assertEqual(p2.freqstr, '25H') + + self.assertEqual(p3.freq, offsets.Hour()) + self.assertEqual(p3.freqstr, 'H') + + result = p1 + 1 + self.assertEqual(result.ordinal, (p3 + 25).ordinal) + self.assertEqual(result.freq, p1.freq) + self.assertEqual(result.freqstr, '25H') + + result = p2 + 1 + self.assertEqual(result.ordinal, (p3 + 25).ordinal) + self.assertEqual(result.freq, p2.freq) + self.assertEqual(result.freqstr, '25H') + + result = p1 - 1 + self.assertEqual(result.ordinal, (p3 - 25).ordinal) + self.assertEqual(result.freq, p1.freq) + self.assertEqual(result.freqstr, '25H') + + result = p2 - 1 + self.assertEqual(result.ordinal, (p3 - 25).ordinal) + self.assertEqual(result.freq, p2.freq) + self.assertEqual(result.freqstr, '25H') + + msg = ('Frequency must be positive, because it' + ' represents span: -25H') + with tm.assertRaisesRegexp(ValueError, msg): + Period('2011-01', freq='-1D1H') + with tm.assertRaisesRegexp(ValueError, msg): + Period('2011-01', freq='-1H1D') + with tm.assertRaisesRegexp(ValueError, msg): + Period(ordinal=1, freq='-1D1H') + with tm.assertRaisesRegexp(ValueError, msg): + Period(ordinal=1, freq='-1H1D') + + msg = ('Frequency must be positive, because it' + ' represents span: 0D') + with tm.assertRaisesRegexp(ValueError, msg): + Period('2011-01', freq='0D0H') + with tm.assertRaisesRegexp(ValueError, msg): + Period(ordinal=1, freq='0D0H') + + # You can only combine together day and intraday offsets + msg = ('Invalid frequency: 1W1D') + with tm.assertRaisesRegexp(ValueError, msg): + Period('2011-01', freq='1W1D') + msg = ('Invalid frequency: 1D1W') + with tm.assertRaisesRegexp(ValueError, msg): + Period('2011-01', freq='1D1W') + + def test_timestamp_tz_arg(self): + tm._skip_if_no_pytz() + import pytz + for case in ['Europe/Brussels', 'Asia/Tokyo', 'US/Pacific']: + p = Period('1/1/2005', freq='M').to_timestamp(tz=case) + exp = Timestamp('1/1/2005', tz='UTC').tz_convert(case) + exp_zone = pytz.timezone(case).normalize(p) + + self.assertEqual(p, exp) + self.assertEqual(p.tz, exp_zone.tzinfo) + self.assertEqual(p.tz, exp.tz) + + p = Period('1/1/2005', freq='3H').to_timestamp(tz=case) + exp = Timestamp('1/1/2005', tz='UTC').tz_convert(case) + exp_zone = pytz.timezone(case).normalize(p) + + self.assertEqual(p, exp) + self.assertEqual(p.tz, exp_zone.tzinfo) + self.assertEqual(p.tz, exp.tz) + + p = Period('1/1/2005', freq='A').to_timestamp(freq='A', tz=case) + exp = Timestamp('31/12/2005', tz='UTC').tz_convert(case) + exp_zone = pytz.timezone(case).normalize(p) + + self.assertEqual(p, exp) + self.assertEqual(p.tz, exp_zone.tzinfo) + self.assertEqual(p.tz, exp.tz) + + p = Period('1/1/2005', freq='A').to_timestamp(freq='3H', tz=case) + exp = Timestamp('1/1/2005', tz='UTC').tz_convert(case) + exp_zone = pytz.timezone(case).normalize(p) + + self.assertEqual(p, exp) + self.assertEqual(p.tz, exp_zone.tzinfo) + self.assertEqual(p.tz, exp.tz) + + def test_timestamp_tz_arg_dateutil(self): + from pandas.tslib import _dateutil_gettz as gettz + from pandas.tslib import maybe_get_tz + for case in ['dateutil/Europe/Brussels', 'dateutil/Asia/Tokyo', + 'dateutil/US/Pacific']: + p = Period('1/1/2005', freq='M').to_timestamp( + tz=maybe_get_tz(case)) + exp = Timestamp('1/1/2005', tz='UTC').tz_convert(case) + self.assertEqual(p, exp) + self.assertEqual(p.tz, gettz(case.split('/', 1)[1])) + self.assertEqual(p.tz, exp.tz) + + p = Period('1/1/2005', + freq='M').to_timestamp(freq='3H', tz=maybe_get_tz(case)) + exp = Timestamp('1/1/2005', tz='UTC').tz_convert(case) + self.assertEqual(p, exp) + self.assertEqual(p.tz, gettz(case.split('/', 1)[1])) + self.assertEqual(p.tz, exp.tz) + + def test_timestamp_tz_arg_dateutil_from_string(self): + from pandas.tslib import _dateutil_gettz as gettz + p = Period('1/1/2005', + freq='M').to_timestamp(tz='dateutil/Europe/Brussels') + self.assertEqual(p.tz, gettz('Europe/Brussels')) + + def test_timestamp_mult(self): + p = pd.Period('2011-01', freq='M') + self.assertEqual(p.to_timestamp(how='S'), pd.Timestamp('2011-01-01')) + self.assertEqual(p.to_timestamp(how='E'), pd.Timestamp('2011-01-31')) + + p = pd.Period('2011-01', freq='3M') + self.assertEqual(p.to_timestamp(how='S'), pd.Timestamp('2011-01-01')) + self.assertEqual(p.to_timestamp(how='E'), pd.Timestamp('2011-03-31')) + + def test_period_constructor(self): + i1 = Period('1/1/2005', freq='M') + i2 = Period('Jan 2005') + + self.assertEqual(i1, i2) + + i1 = Period('2005', freq='A') + i2 = Period('2005') + i3 = Period('2005', freq='a') + + self.assertEqual(i1, i2) + self.assertEqual(i1, i3) + + i4 = Period('2005', freq='M') + i5 = Period('2005', freq='m') + + self.assertRaises(ValueError, i1.__ne__, i4) + self.assertEqual(i4, i5) + + i1 = Period.now('Q') + i2 = Period(datetime.now(), freq='Q') + i3 = Period.now('q') + + self.assertEqual(i1, i2) + self.assertEqual(i1, i3) + + # Biz day construction, roll forward if non-weekday + i1 = Period('3/10/12', freq='B') + i2 = Period('3/10/12', freq='D') + self.assertEqual(i1, i2.asfreq('B')) + i2 = Period('3/11/12', freq='D') + self.assertEqual(i1, i2.asfreq('B')) + i2 = Period('3/12/12', freq='D') + self.assertEqual(i1, i2.asfreq('B')) + + i3 = Period('3/10/12', freq='b') + self.assertEqual(i1, i3) + + i1 = Period(year=2005, quarter=1, freq='Q') + i2 = Period('1/1/2005', freq='Q') + self.assertEqual(i1, i2) + + i1 = Period(year=2005, quarter=3, freq='Q') + i2 = Period('9/1/2005', freq='Q') + self.assertEqual(i1, i2) + + i1 = Period(year=2005, month=3, day=1, freq='D') + i2 = Period('3/1/2005', freq='D') + self.assertEqual(i1, i2) + + i3 = Period(year=2005, month=3, day=1, freq='d') + self.assertEqual(i1, i3) + + i1 = Period(year=2012, month=3, day=10, freq='B') + i2 = Period('3/12/12', freq='B') + self.assertEqual(i1, i2) + + i1 = Period('2005Q1') + i2 = Period(year=2005, quarter=1, freq='Q') + i3 = Period('2005q1') + self.assertEqual(i1, i2) + self.assertEqual(i1, i3) + + i1 = Period('05Q1') + self.assertEqual(i1, i2) + lower = Period('05q1') + self.assertEqual(i1, lower) + + i1 = Period('1Q2005') + self.assertEqual(i1, i2) + lower = Period('1q2005') + self.assertEqual(i1, lower) + + i1 = Period('1Q05') + self.assertEqual(i1, i2) + lower = Period('1q05') + self.assertEqual(i1, lower) + + i1 = Period('4Q1984') + self.assertEqual(i1.year, 1984) + lower = Period('4q1984') + self.assertEqual(i1, lower) + + i1 = Period('1982', freq='min') + i2 = Period('1982', freq='MIN') + self.assertEqual(i1, i2) + i2 = Period('1982', freq=('Min', 1)) + self.assertEqual(i1, i2) + + expected = Period('2007-01', freq='M') + i1 = Period('200701', freq='M') + self.assertEqual(i1, expected) + + i1 = Period('200701', freq='M') + self.assertEqual(i1, expected) + + i1 = Period(200701, freq='M') + self.assertEqual(i1, expected) + + i1 = Period(ordinal=200701, freq='M') + self.assertEqual(i1.year, 18695) + + i1 = Period(datetime(2007, 1, 1), freq='M') + i2 = Period('200701', freq='M') + self.assertEqual(i1, i2) + + i1 = Period(date(2007, 1, 1), freq='M') + i2 = Period(datetime(2007, 1, 1), freq='M') + i3 = Period(np.datetime64('2007-01-01'), freq='M') + i4 = Period(np_datetime64_compat('2007-01-01 00:00:00Z'), freq='M') + i5 = Period(np_datetime64_compat('2007-01-01 00:00:00.000Z'), freq='M') + self.assertEqual(i1, i2) + self.assertEqual(i1, i3) + self.assertEqual(i1, i4) + self.assertEqual(i1, i5) + + i1 = Period('2007-01-01 09:00:00.001') + expected = Period(datetime(2007, 1, 1, 9, 0, 0, 1000), freq='L') + self.assertEqual(i1, expected) + + expected = Period(np_datetime64_compat( + '2007-01-01 09:00:00.001Z'), freq='L') + self.assertEqual(i1, expected) + + i1 = Period('2007-01-01 09:00:00.00101') + expected = Period(datetime(2007, 1, 1, 9, 0, 0, 1010), freq='U') + self.assertEqual(i1, expected) + + expected = Period(np_datetime64_compat('2007-01-01 09:00:00.00101Z'), + freq='U') + self.assertEqual(i1, expected) + + self.assertRaises(ValueError, Period, ordinal=200701) + + self.assertRaises(ValueError, Period, '2007-1-1', freq='X') + + def test_period_constructor_offsets(self): + self.assertEqual(Period('1/1/2005', freq=offsets.MonthEnd()), + Period('1/1/2005', freq='M')) + self.assertEqual(Period('2005', freq=offsets.YearEnd()), + Period('2005', freq='A')) + self.assertEqual(Period('2005', freq=offsets.MonthEnd()), + Period('2005', freq='M')) + self.assertEqual(Period('3/10/12', freq=offsets.BusinessDay()), + Period('3/10/12', freq='B')) + self.assertEqual(Period('3/10/12', freq=offsets.Day()), + Period('3/10/12', freq='D')) + + self.assertEqual(Period(year=2005, quarter=1, + freq=offsets.QuarterEnd(startingMonth=12)), + Period(year=2005, quarter=1, freq='Q')) + self.assertEqual(Period(year=2005, quarter=2, + freq=offsets.QuarterEnd(startingMonth=12)), + Period(year=2005, quarter=2, freq='Q')) + + self.assertEqual(Period(year=2005, month=3, day=1, freq=offsets.Day()), + Period(year=2005, month=3, day=1, freq='D')) + self.assertEqual(Period(year=2012, month=3, day=10, + freq=offsets.BDay()), + Period(year=2012, month=3, day=10, freq='B')) + + expected = Period('2005-03-01', freq='3D') + self.assertEqual(Period(year=2005, month=3, day=1, + freq=offsets.Day(3)), expected) + self.assertEqual(Period(year=2005, month=3, day=1, freq='3D'), + expected) + + self.assertEqual(Period(year=2012, month=3, day=10, + freq=offsets.BDay(3)), + Period(year=2012, month=3, day=10, freq='3B')) + + self.assertEqual(Period(200701, freq=offsets.MonthEnd()), + Period(200701, freq='M')) + + i1 = Period(ordinal=200701, freq=offsets.MonthEnd()) + i2 = Period(ordinal=200701, freq='M') + self.assertEqual(i1, i2) + self.assertEqual(i1.year, 18695) + self.assertEqual(i2.year, 18695) + + i1 = Period(datetime(2007, 1, 1), freq='M') + i2 = Period('200701', freq='M') + self.assertEqual(i1, i2) + + i1 = Period(date(2007, 1, 1), freq='M') + i2 = Period(datetime(2007, 1, 1), freq='M') + i3 = Period(np.datetime64('2007-01-01'), freq='M') + i4 = Period(np_datetime64_compat('2007-01-01 00:00:00Z'), freq='M') + i5 = Period(np_datetime64_compat('2007-01-01 00:00:00.000Z'), freq='M') + self.assertEqual(i1, i2) + self.assertEqual(i1, i3) + self.assertEqual(i1, i4) + self.assertEqual(i1, i5) + + i1 = Period('2007-01-01 09:00:00.001') + expected = Period(datetime(2007, 1, 1, 9, 0, 0, 1000), freq='L') + self.assertEqual(i1, expected) + + expected = Period(np_datetime64_compat( + '2007-01-01 09:00:00.001Z'), freq='L') + self.assertEqual(i1, expected) + + i1 = Period('2007-01-01 09:00:00.00101') + expected = Period(datetime(2007, 1, 1, 9, 0, 0, 1010), freq='U') + self.assertEqual(i1, expected) + + expected = Period(np_datetime64_compat('2007-01-01 09:00:00.00101Z'), + freq='U') + self.assertEqual(i1, expected) + + self.assertRaises(ValueError, Period, ordinal=200701) + + self.assertRaises(ValueError, Period, '2007-1-1', freq='X') + + def test_freq_str(self): + i1 = Period('1982', freq='Min') + self.assertEqual(i1.freq, offsets.Minute()) + self.assertEqual(i1.freqstr, 'T') + + def test_period_deprecated_freq(self): + cases = {"M": ["MTH", "MONTH", "MONTHLY", "Mth", "month", "monthly"], + "B": ["BUS", "BUSINESS", "BUSINESSLY", "WEEKDAY", "bus"], + "D": ["DAY", "DLY", "DAILY", "Day", "Dly", "Daily"], + "H": ["HR", "HOUR", "HRLY", "HOURLY", "hr", "Hour", "HRly"], + "T": ["minute", "MINUTE", "MINUTELY", "minutely"], + "S": ["sec", "SEC", "SECOND", "SECONDLY", "second"], + "L": ["MILLISECOND", "MILLISECONDLY", "millisecond"], + "U": ["MICROSECOND", "MICROSECONDLY", "microsecond"], + "N": ["NANOSECOND", "NANOSECONDLY", "nanosecond"]} + + msg = pd.tseries.frequencies._INVALID_FREQ_ERROR + for exp, freqs in iteritems(cases): + for freq in freqs: + with self.assertRaisesRegexp(ValueError, msg): + Period('2016-03-01 09:00', freq=freq) + with self.assertRaisesRegexp(ValueError, msg): + Period(ordinal=1, freq=freq) + + # check supported freq-aliases still works + p1 = Period('2016-03-01 09:00', freq=exp) + p2 = Period(ordinal=1, freq=exp) + tm.assertIsInstance(p1, Period) + tm.assertIsInstance(p2, Period) + + def test_hash(self): + self.assertEqual(hash(Period('2011-01', freq='M')), + hash(Period('2011-01', freq='M'))) + + self.assertNotEqual(hash(Period('2011-01-01', freq='D')), + hash(Period('2011-01', freq='M'))) + + self.assertNotEqual(hash(Period('2011-01', freq='3M')), + hash(Period('2011-01', freq='2M'))) + + self.assertNotEqual(hash(Period('2011-01', freq='M')), + hash(Period('2011-02', freq='M'))) + + def test_repr(self): + p = Period('Jan-2000') + self.assertIn('2000-01', repr(p)) + + p = Period('2000-12-15') + self.assertIn('2000-12-15', repr(p)) + + def test_repr_nat(self): + p = Period('nat', freq='M') + self.assertIn(repr(tslib.NaT), repr(p)) + + def test_millisecond_repr(self): + p = Period('2000-01-01 12:15:02.123') + + self.assertEqual("Period('2000-01-01 12:15:02.123', 'L')", repr(p)) + + def test_microsecond_repr(self): + p = Period('2000-01-01 12:15:02.123567') + + self.assertEqual("Period('2000-01-01 12:15:02.123567', 'U')", repr(p)) + + def test_strftime(self): + p = Period('2000-1-1 12:34:12', freq='S') + res = p.strftime('%Y-%m-%d %H:%M:%S') + self.assertEqual(res, '2000-01-01 12:34:12') + tm.assertIsInstance(res, text_type) # GH3363 + + def test_sub_delta(self): + left, right = Period('2011', freq='A'), Period('2007', freq='A') + result = left - right + self.assertEqual(result, 4) + + with self.assertRaises(period.IncompatibleFrequency): + left - Period('2007-01', freq='M') + + def test_to_timestamp(self): + p = Period('1982', freq='A') + start_ts = p.to_timestamp(how='S') + aliases = ['s', 'StarT', 'BEGIn'] + for a in aliases: + self.assertEqual(start_ts, p.to_timestamp('D', how=a)) + # freq with mult should not affect to the result + self.assertEqual(start_ts, p.to_timestamp('3D', how=a)) + + end_ts = p.to_timestamp(how='E') + aliases = ['e', 'end', 'FINIsH'] + for a in aliases: + self.assertEqual(end_ts, p.to_timestamp('D', how=a)) + self.assertEqual(end_ts, p.to_timestamp('3D', how=a)) + + from_lst = ['A', 'Q', 'M', 'W', 'B', 'D', 'H', 'Min', 'S'] + + def _ex(p): + return Timestamp((p + 1).start_time.value - 1) + + for i, fcode in enumerate(from_lst): + p = Period('1982', freq=fcode) + result = p.to_timestamp().to_period(fcode) + self.assertEqual(result, p) + + self.assertEqual(p.start_time, p.to_timestamp(how='S')) + + self.assertEqual(p.end_time, _ex(p)) + + # Frequency other than daily + + p = Period('1985', freq='A') + + result = p.to_timestamp('H', how='end') + expected = datetime(1985, 12, 31, 23) + self.assertEqual(result, expected) + result = p.to_timestamp('3H', how='end') + self.assertEqual(result, expected) + + result = p.to_timestamp('T', how='end') + expected = datetime(1985, 12, 31, 23, 59) + self.assertEqual(result, expected) + result = p.to_timestamp('2T', how='end') + self.assertEqual(result, expected) + + result = p.to_timestamp(how='end') + expected = datetime(1985, 12, 31) + self.assertEqual(result, expected) + + expected = datetime(1985, 1, 1) + result = p.to_timestamp('H', how='start') + self.assertEqual(result, expected) + result = p.to_timestamp('T', how='start') + self.assertEqual(result, expected) + result = p.to_timestamp('S', how='start') + self.assertEqual(result, expected) + result = p.to_timestamp('3H', how='start') + self.assertEqual(result, expected) + result = p.to_timestamp('5S', how='start') + self.assertEqual(result, expected) + + def test_start_time(self): + freq_lst = ['A', 'Q', 'M', 'D', 'H', 'T', 'S'] + xp = datetime(2012, 1, 1) + for f in freq_lst: + p = Period('2012', freq=f) + self.assertEqual(p.start_time, xp) + self.assertEqual(Period('2012', freq='B').start_time, + datetime(2012, 1, 2)) + self.assertEqual(Period('2012', freq='W').start_time, + datetime(2011, 12, 26)) + + def test_end_time(self): + p = Period('2012', freq='A') + + def _ex(*args): + return Timestamp(Timestamp(datetime(*args)).value - 1) + + xp = _ex(2013, 1, 1) + self.assertEqual(xp, p.end_time) + + p = Period('2012', freq='Q') + xp = _ex(2012, 4, 1) + self.assertEqual(xp, p.end_time) + + p = Period('2012', freq='M') + xp = _ex(2012, 2, 1) + self.assertEqual(xp, p.end_time) + + p = Period('2012', freq='D') + xp = _ex(2012, 1, 2) + self.assertEqual(xp, p.end_time) + + p = Period('2012', freq='H') + xp = _ex(2012, 1, 1, 1) + self.assertEqual(xp, p.end_time) + + p = Period('2012', freq='B') + xp = _ex(2012, 1, 3) + self.assertEqual(xp, p.end_time) + + p = Period('2012', freq='W') + xp = _ex(2012, 1, 2) + self.assertEqual(xp, p.end_time) + + # Test for GH 11738 + p = Period('2012', freq='15D') + xp = _ex(2012, 1, 16) + self.assertEqual(xp, p.end_time) + + p = Period('2012', freq='1D1H') + xp = _ex(2012, 1, 2, 1) + self.assertEqual(xp, p.end_time) + + p = Period('2012', freq='1H1D') + xp = _ex(2012, 1, 2, 1) + self.assertEqual(xp, p.end_time) + + def test_anchor_week_end_time(self): + def _ex(*args): + return Timestamp(Timestamp(datetime(*args)).value - 1) + + p = Period('2013-1-1', 'W-SAT') + xp = _ex(2013, 1, 6) + self.assertEqual(p.end_time, xp) + + def test_properties_annually(self): + # Test properties on Periods with annually frequency. + a_date = Period(freq='A', year=2007) + self.assertEqual(a_date.year, 2007) + + def test_properties_quarterly(self): + # Test properties on Periods with daily frequency. + qedec_date = Period(freq="Q-DEC", year=2007, quarter=1) + qejan_date = Period(freq="Q-JAN", year=2007, quarter=1) + qejun_date = Period(freq="Q-JUN", year=2007, quarter=1) + # + for x in range(3): + for qd in (qedec_date, qejan_date, qejun_date): + self.assertEqual((qd + x).qyear, 2007) + self.assertEqual((qd + x).quarter, x + 1) + + def test_properties_monthly(self): + # Test properties on Periods with daily frequency. + m_date = Period(freq='M', year=2007, month=1) + for x in range(11): + m_ival_x = m_date + x + self.assertEqual(m_ival_x.year, 2007) + if 1 <= x + 1 <= 3: + self.assertEqual(m_ival_x.quarter, 1) + elif 4 <= x + 1 <= 6: + self.assertEqual(m_ival_x.quarter, 2) + elif 7 <= x + 1 <= 9: + self.assertEqual(m_ival_x.quarter, 3) + elif 10 <= x + 1 <= 12: + self.assertEqual(m_ival_x.quarter, 4) + self.assertEqual(m_ival_x.month, x + 1) + + def test_properties_weekly(self): + # Test properties on Periods with daily frequency. + w_date = Period(freq='W', year=2007, month=1, day=7) + # + self.assertEqual(w_date.year, 2007) + self.assertEqual(w_date.quarter, 1) + self.assertEqual(w_date.month, 1) + self.assertEqual(w_date.week, 1) + self.assertEqual((w_date - 1).week, 52) + self.assertEqual(w_date.days_in_month, 31) + self.assertEqual(Period(freq='W', year=2012, + month=2, day=1).days_in_month, 29) + + def test_properties_weekly_legacy(self): + # Test properties on Periods with daily frequency. + w_date = Period(freq='W', year=2007, month=1, day=7) + self.assertEqual(w_date.year, 2007) + self.assertEqual(w_date.quarter, 1) + self.assertEqual(w_date.month, 1) + self.assertEqual(w_date.week, 1) + self.assertEqual((w_date - 1).week, 52) + self.assertEqual(w_date.days_in_month, 31) + + exp = Period(freq='W', year=2012, month=2, day=1) + self.assertEqual(exp.days_in_month, 29) + + msg = pd.tseries.frequencies._INVALID_FREQ_ERROR + with self.assertRaisesRegexp(ValueError, msg): + Period(freq='WK', year=2007, month=1, day=7) + + def test_properties_daily(self): + # Test properties on Periods with daily frequency. + b_date = Period(freq='B', year=2007, month=1, day=1) + # + self.assertEqual(b_date.year, 2007) + self.assertEqual(b_date.quarter, 1) + self.assertEqual(b_date.month, 1) + self.assertEqual(b_date.day, 1) + self.assertEqual(b_date.weekday, 0) + self.assertEqual(b_date.dayofyear, 1) + self.assertEqual(b_date.days_in_month, 31) + self.assertEqual(Period(freq='B', year=2012, + month=2, day=1).days_in_month, 29) + # + d_date = Period(freq='D', year=2007, month=1, day=1) + # + self.assertEqual(d_date.year, 2007) + self.assertEqual(d_date.quarter, 1) + self.assertEqual(d_date.month, 1) + self.assertEqual(d_date.day, 1) + self.assertEqual(d_date.weekday, 0) + self.assertEqual(d_date.dayofyear, 1) + self.assertEqual(d_date.days_in_month, 31) + self.assertEqual(Period(freq='D', year=2012, month=2, + day=1).days_in_month, 29) + + def test_properties_hourly(self): + # Test properties on Periods with hourly frequency. + h_date1 = Period(freq='H', year=2007, month=1, day=1, hour=0) + h_date2 = Period(freq='2H', year=2007, month=1, day=1, hour=0) + + for h_date in [h_date1, h_date2]: + self.assertEqual(h_date.year, 2007) + self.assertEqual(h_date.quarter, 1) + self.assertEqual(h_date.month, 1) + self.assertEqual(h_date.day, 1) + self.assertEqual(h_date.weekday, 0) + self.assertEqual(h_date.dayofyear, 1) + self.assertEqual(h_date.hour, 0) + self.assertEqual(h_date.days_in_month, 31) + self.assertEqual(Period(freq='H', year=2012, month=2, day=1, + hour=0).days_in_month, 29) + + def test_properties_minutely(self): + # Test properties on Periods with minutely frequency. + t_date = Period(freq='Min', year=2007, month=1, day=1, hour=0, + minute=0) + # + self.assertEqual(t_date.quarter, 1) + self.assertEqual(t_date.month, 1) + self.assertEqual(t_date.day, 1) + self.assertEqual(t_date.weekday, 0) + self.assertEqual(t_date.dayofyear, 1) + self.assertEqual(t_date.hour, 0) + self.assertEqual(t_date.minute, 0) + self.assertEqual(t_date.days_in_month, 31) + self.assertEqual(Period(freq='D', year=2012, month=2, day=1, hour=0, + minute=0).days_in_month, 29) + + def test_properties_secondly(self): + # Test properties on Periods with secondly frequency. + s_date = Period(freq='Min', year=2007, month=1, day=1, hour=0, + minute=0, second=0) + # + self.assertEqual(s_date.year, 2007) + self.assertEqual(s_date.quarter, 1) + self.assertEqual(s_date.month, 1) + self.assertEqual(s_date.day, 1) + self.assertEqual(s_date.weekday, 0) + self.assertEqual(s_date.dayofyear, 1) + self.assertEqual(s_date.hour, 0) + self.assertEqual(s_date.minute, 0) + self.assertEqual(s_date.second, 0) + self.assertEqual(s_date.days_in_month, 31) + self.assertEqual(Period(freq='Min', year=2012, month=2, day=1, hour=0, + minute=0, second=0).days_in_month, 29) + + def test_properties_nat(self): + p_nat = Period('NaT', freq='M') + t_nat = pd.Timestamp('NaT') + self.assertIs(p_nat, t_nat) + + # confirm Period('NaT') work identical with Timestamp('NaT') + for f in ['year', 'month', 'day', 'hour', 'minute', 'second', 'week', + 'dayofyear', 'quarter', 'days_in_month']: + self.assertTrue(np.isnan(getattr(p_nat, f))) + self.assertTrue(np.isnan(getattr(t_nat, f))) + + def test_pnow(self): + dt = datetime.now() + + val = period.pnow('D') + exp = Period(dt, freq='D') + self.assertEqual(val, exp) + + val2 = period.pnow('2D') + exp2 = Period(dt, freq='2D') + self.assertEqual(val2, exp2) + self.assertEqual(val.ordinal, val2.ordinal) + self.assertEqual(val.ordinal, exp2.ordinal) + + def test_constructor_corner(self): + expected = Period('2007-01', freq='2M') + self.assertEqual(Period(year=2007, month=1, freq='2M'), expected) + + self.assertRaises(ValueError, Period, datetime.now()) + self.assertRaises(ValueError, Period, datetime.now().date()) + self.assertRaises(ValueError, Period, 1.6, freq='D') + self.assertRaises(ValueError, Period, ordinal=1.6, freq='D') + self.assertRaises(ValueError, Period, ordinal=2, value=1, freq='D') + self.assertIs(Period(None), pd.NaT) + self.assertRaises(ValueError, Period, month=1) + + p = Period('2007-01-01', freq='D') + + result = Period(p, freq='A') + exp = Period('2007', freq='A') + self.assertEqual(result, exp) + + def test_constructor_infer_freq(self): + p = Period('2007-01-01') + self.assertEqual(p.freq, 'D') + + p = Period('2007-01-01 07') + self.assertEqual(p.freq, 'H') + + p = Period('2007-01-01 07:10') + self.assertEqual(p.freq, 'T') + + p = Period('2007-01-01 07:10:15') + self.assertEqual(p.freq, 'S') + + p = Period('2007-01-01 07:10:15.123') + self.assertEqual(p.freq, 'L') + + p = Period('2007-01-01 07:10:15.123000') + self.assertEqual(p.freq, 'L') + + p = Period('2007-01-01 07:10:15.123400') + self.assertEqual(p.freq, 'U') + + def test_asfreq_MS(self): + initial = Period("2013") + + self.assertEqual(initial.asfreq(freq="M", how="S"), + Period('2013-01', 'M')) + + msg = pd.tseries.frequencies._INVALID_FREQ_ERROR + with self.assertRaisesRegexp(ValueError, msg): + initial.asfreq(freq="MS", how="S") + + with tm.assertRaisesRegexp(ValueError, msg): + pd.Period('2013-01', 'MS') + + self.assertTrue(_period_code_map.get("MS") is None) + + def test_badinput(self): + self.assertRaises(ValueError, Period, '-2000', 'A') + self.assertRaises(tslib.DateParseError, Period, '0', 'A') + self.assertRaises(tslib.DateParseError, Period, '1/1/-2000', 'A') + + def test_multiples(self): + result1 = Period('1989', freq='2A') + result2 = Period('1989', freq='A') + self.assertEqual(result1.ordinal, result2.ordinal) + self.assertEqual(result1.freqstr, '2A-DEC') + self.assertEqual(result2.freqstr, 'A-DEC') + self.assertEqual(result1.freq, offsets.YearEnd(2)) + self.assertEqual(result2.freq, offsets.YearEnd()) + + self.assertEqual((result1 + 1).ordinal, result1.ordinal + 2) + self.assertEqual((1 + result1).ordinal, result1.ordinal + 2) + self.assertEqual((result1 - 1).ordinal, result2.ordinal - 2) + self.assertEqual((-1 + result1).ordinal, result2.ordinal - 2) + + def test_round_trip(self): + + p = Period('2000Q1') + new_p = self.round_trip_pickle(p) + self.assertEqual(new_p, p) + + +class TestPeriodField(tm.TestCase): + + def test_get_period_field_raises_on_out_of_range(self): + self.assertRaises(ValueError, _period.get_period_field, -1, 0, 0) + + def test_get_period_field_array_raises_on_out_of_range(self): + self.assertRaises(ValueError, _period.get_period_field_arr, -1, + np.empty(1), 0) + + +class TestFreqConversion(tm.TestCase): + "Test frequency conversion of date objects" + + def test_asfreq_corner(self): + val = Period(freq='A', year=2007) + result1 = val.asfreq('5t') + result2 = val.asfreq('t') + expected = Period('2007-12-31 23:59', freq='t') + self.assertEqual(result1.ordinal, expected.ordinal) + self.assertEqual(result1.freqstr, '5T') + self.assertEqual(result2.ordinal, expected.ordinal) + self.assertEqual(result2.freqstr, 'T') + + def test_conv_annual(self): + # frequency conversion tests: from Annual Frequency + + ival_A = Period(freq='A', year=2007) + + ival_AJAN = Period(freq="A-JAN", year=2007) + ival_AJUN = Period(freq="A-JUN", year=2007) + ival_ANOV = Period(freq="A-NOV", year=2007) + + ival_A_to_Q_start = Period(freq='Q', year=2007, quarter=1) + ival_A_to_Q_end = Period(freq='Q', year=2007, quarter=4) + ival_A_to_M_start = Period(freq='M', year=2007, month=1) + ival_A_to_M_end = Period(freq='M', year=2007, month=12) + ival_A_to_W_start = Period(freq='W', year=2007, month=1, day=1) + ival_A_to_W_end = Period(freq='W', year=2007, month=12, day=31) + ival_A_to_B_start = Period(freq='B', year=2007, month=1, day=1) + ival_A_to_B_end = Period(freq='B', year=2007, month=12, day=31) + ival_A_to_D_start = Period(freq='D', year=2007, month=1, day=1) + ival_A_to_D_end = Period(freq='D', year=2007, month=12, day=31) + ival_A_to_H_start = Period(freq='H', year=2007, month=1, day=1, hour=0) + ival_A_to_H_end = Period(freq='H', year=2007, month=12, day=31, + hour=23) + ival_A_to_T_start = Period(freq='Min', year=2007, month=1, day=1, + hour=0, minute=0) + ival_A_to_T_end = Period(freq='Min', year=2007, month=12, day=31, + hour=23, minute=59) + ival_A_to_S_start = Period(freq='S', year=2007, month=1, day=1, hour=0, + minute=0, second=0) + ival_A_to_S_end = Period(freq='S', year=2007, month=12, day=31, + hour=23, minute=59, second=59) + + ival_AJAN_to_D_end = Period(freq='D', year=2007, month=1, day=31) + ival_AJAN_to_D_start = Period(freq='D', year=2006, month=2, day=1) + ival_AJUN_to_D_end = Period(freq='D', year=2007, month=6, day=30) + ival_AJUN_to_D_start = Period(freq='D', year=2006, month=7, day=1) + ival_ANOV_to_D_end = Period(freq='D', year=2007, month=11, day=30) + ival_ANOV_to_D_start = Period(freq='D', year=2006, month=12, day=1) + + self.assertEqual(ival_A.asfreq('Q', 'S'), ival_A_to_Q_start) + self.assertEqual(ival_A.asfreq('Q', 'e'), ival_A_to_Q_end) + self.assertEqual(ival_A.asfreq('M', 's'), ival_A_to_M_start) + self.assertEqual(ival_A.asfreq('M', 'E'), ival_A_to_M_end) + self.assertEqual(ival_A.asfreq('W', 'S'), ival_A_to_W_start) + self.assertEqual(ival_A.asfreq('W', 'E'), ival_A_to_W_end) + self.assertEqual(ival_A.asfreq('B', 'S'), ival_A_to_B_start) + self.assertEqual(ival_A.asfreq('B', 'E'), ival_A_to_B_end) + self.assertEqual(ival_A.asfreq('D', 'S'), ival_A_to_D_start) + self.assertEqual(ival_A.asfreq('D', 'E'), ival_A_to_D_end) + self.assertEqual(ival_A.asfreq('H', 'S'), ival_A_to_H_start) + self.assertEqual(ival_A.asfreq('H', 'E'), ival_A_to_H_end) + self.assertEqual(ival_A.asfreq('min', 'S'), ival_A_to_T_start) + self.assertEqual(ival_A.asfreq('min', 'E'), ival_A_to_T_end) + self.assertEqual(ival_A.asfreq('T', 'S'), ival_A_to_T_start) + self.assertEqual(ival_A.asfreq('T', 'E'), ival_A_to_T_end) + self.assertEqual(ival_A.asfreq('S', 'S'), ival_A_to_S_start) + self.assertEqual(ival_A.asfreq('S', 'E'), ival_A_to_S_end) + + self.assertEqual(ival_AJAN.asfreq('D', 'S'), ival_AJAN_to_D_start) + self.assertEqual(ival_AJAN.asfreq('D', 'E'), ival_AJAN_to_D_end) + + self.assertEqual(ival_AJUN.asfreq('D', 'S'), ival_AJUN_to_D_start) + self.assertEqual(ival_AJUN.asfreq('D', 'E'), ival_AJUN_to_D_end) + + self.assertEqual(ival_ANOV.asfreq('D', 'S'), ival_ANOV_to_D_start) + self.assertEqual(ival_ANOV.asfreq('D', 'E'), ival_ANOV_to_D_end) + + self.assertEqual(ival_A.asfreq('A'), ival_A) + + def test_conv_quarterly(self): + # frequency conversion tests: from Quarterly Frequency + + ival_Q = Period(freq='Q', year=2007, quarter=1) + ival_Q_end_of_year = Period(freq='Q', year=2007, quarter=4) + + ival_QEJAN = Period(freq="Q-JAN", year=2007, quarter=1) + ival_QEJUN = Period(freq="Q-JUN", year=2007, quarter=1) + + ival_Q_to_A = Period(freq='A', year=2007) + ival_Q_to_M_start = Period(freq='M', year=2007, month=1) + ival_Q_to_M_end = Period(freq='M', year=2007, month=3) + ival_Q_to_W_start = Period(freq='W', year=2007, month=1, day=1) + ival_Q_to_W_end = Period(freq='W', year=2007, month=3, day=31) + ival_Q_to_B_start = Period(freq='B', year=2007, month=1, day=1) + ival_Q_to_B_end = Period(freq='B', year=2007, month=3, day=30) + ival_Q_to_D_start = Period(freq='D', year=2007, month=1, day=1) + ival_Q_to_D_end = Period(freq='D', year=2007, month=3, day=31) + ival_Q_to_H_start = Period(freq='H', year=2007, month=1, day=1, hour=0) + ival_Q_to_H_end = Period(freq='H', year=2007, month=3, day=31, hour=23) + ival_Q_to_T_start = Period(freq='Min', year=2007, month=1, day=1, + hour=0, minute=0) + ival_Q_to_T_end = Period(freq='Min', year=2007, month=3, day=31, + hour=23, minute=59) + ival_Q_to_S_start = Period(freq='S', year=2007, month=1, day=1, hour=0, + minute=0, second=0) + ival_Q_to_S_end = Period(freq='S', year=2007, month=3, day=31, hour=23, + minute=59, second=59) + + ival_QEJAN_to_D_start = Period(freq='D', year=2006, month=2, day=1) + ival_QEJAN_to_D_end = Period(freq='D', year=2006, month=4, day=30) + + ival_QEJUN_to_D_start = Period(freq='D', year=2006, month=7, day=1) + ival_QEJUN_to_D_end = Period(freq='D', year=2006, month=9, day=30) + + self.assertEqual(ival_Q.asfreq('A'), ival_Q_to_A) + self.assertEqual(ival_Q_end_of_year.asfreq('A'), ival_Q_to_A) + + self.assertEqual(ival_Q.asfreq('M', 'S'), ival_Q_to_M_start) + self.assertEqual(ival_Q.asfreq('M', 'E'), ival_Q_to_M_end) + self.assertEqual(ival_Q.asfreq('W', 'S'), ival_Q_to_W_start) + self.assertEqual(ival_Q.asfreq('W', 'E'), ival_Q_to_W_end) + self.assertEqual(ival_Q.asfreq('B', 'S'), ival_Q_to_B_start) + self.assertEqual(ival_Q.asfreq('B', 'E'), ival_Q_to_B_end) + self.assertEqual(ival_Q.asfreq('D', 'S'), ival_Q_to_D_start) + self.assertEqual(ival_Q.asfreq('D', 'E'), ival_Q_to_D_end) + self.assertEqual(ival_Q.asfreq('H', 'S'), ival_Q_to_H_start) + self.assertEqual(ival_Q.asfreq('H', 'E'), ival_Q_to_H_end) + self.assertEqual(ival_Q.asfreq('Min', 'S'), ival_Q_to_T_start) + self.assertEqual(ival_Q.asfreq('Min', 'E'), ival_Q_to_T_end) + self.assertEqual(ival_Q.asfreq('S', 'S'), ival_Q_to_S_start) + self.assertEqual(ival_Q.asfreq('S', 'E'), ival_Q_to_S_end) + + self.assertEqual(ival_QEJAN.asfreq('D', 'S'), ival_QEJAN_to_D_start) + self.assertEqual(ival_QEJAN.asfreq('D', 'E'), ival_QEJAN_to_D_end) + self.assertEqual(ival_QEJUN.asfreq('D', 'S'), ival_QEJUN_to_D_start) + self.assertEqual(ival_QEJUN.asfreq('D', 'E'), ival_QEJUN_to_D_end) + + self.assertEqual(ival_Q.asfreq('Q'), ival_Q) + + def test_conv_monthly(self): + # frequency conversion tests: from Monthly Frequency + + ival_M = Period(freq='M', year=2007, month=1) + ival_M_end_of_year = Period(freq='M', year=2007, month=12) + ival_M_end_of_quarter = Period(freq='M', year=2007, month=3) + ival_M_to_A = Period(freq='A', year=2007) + ival_M_to_Q = Period(freq='Q', year=2007, quarter=1) + ival_M_to_W_start = Period(freq='W', year=2007, month=1, day=1) + ival_M_to_W_end = Period(freq='W', year=2007, month=1, day=31) + ival_M_to_B_start = Period(freq='B', year=2007, month=1, day=1) + ival_M_to_B_end = Period(freq='B', year=2007, month=1, day=31) + ival_M_to_D_start = Period(freq='D', year=2007, month=1, day=1) + ival_M_to_D_end = Period(freq='D', year=2007, month=1, day=31) + ival_M_to_H_start = Period(freq='H', year=2007, month=1, day=1, hour=0) + ival_M_to_H_end = Period(freq='H', year=2007, month=1, day=31, hour=23) + ival_M_to_T_start = Period(freq='Min', year=2007, month=1, day=1, + hour=0, minute=0) + ival_M_to_T_end = Period(freq='Min', year=2007, month=1, day=31, + hour=23, minute=59) + ival_M_to_S_start = Period(freq='S', year=2007, month=1, day=1, hour=0, + minute=0, second=0) + ival_M_to_S_end = Period(freq='S', year=2007, month=1, day=31, hour=23, + minute=59, second=59) + + self.assertEqual(ival_M.asfreq('A'), ival_M_to_A) + self.assertEqual(ival_M_end_of_year.asfreq('A'), ival_M_to_A) + self.assertEqual(ival_M.asfreq('Q'), ival_M_to_Q) + self.assertEqual(ival_M_end_of_quarter.asfreq('Q'), ival_M_to_Q) + + self.assertEqual(ival_M.asfreq('W', 'S'), ival_M_to_W_start) + self.assertEqual(ival_M.asfreq('W', 'E'), ival_M_to_W_end) + self.assertEqual(ival_M.asfreq('B', 'S'), ival_M_to_B_start) + self.assertEqual(ival_M.asfreq('B', 'E'), ival_M_to_B_end) + self.assertEqual(ival_M.asfreq('D', 'S'), ival_M_to_D_start) + self.assertEqual(ival_M.asfreq('D', 'E'), ival_M_to_D_end) + self.assertEqual(ival_M.asfreq('H', 'S'), ival_M_to_H_start) + self.assertEqual(ival_M.asfreq('H', 'E'), ival_M_to_H_end) + self.assertEqual(ival_M.asfreq('Min', 'S'), ival_M_to_T_start) + self.assertEqual(ival_M.asfreq('Min', 'E'), ival_M_to_T_end) + self.assertEqual(ival_M.asfreq('S', 'S'), ival_M_to_S_start) + self.assertEqual(ival_M.asfreq('S', 'E'), ival_M_to_S_end) + + self.assertEqual(ival_M.asfreq('M'), ival_M) + + def test_conv_weekly(self): + # frequency conversion tests: from Weekly Frequency + ival_W = Period(freq='W', year=2007, month=1, day=1) + + ival_WSUN = Period(freq='W', year=2007, month=1, day=7) + ival_WSAT = Period(freq='W-SAT', year=2007, month=1, day=6) + ival_WFRI = Period(freq='W-FRI', year=2007, month=1, day=5) + ival_WTHU = Period(freq='W-THU', year=2007, month=1, day=4) + ival_WWED = Period(freq='W-WED', year=2007, month=1, day=3) + ival_WTUE = Period(freq='W-TUE', year=2007, month=1, day=2) + ival_WMON = Period(freq='W-MON', year=2007, month=1, day=1) + + ival_WSUN_to_D_start = Period(freq='D', year=2007, month=1, day=1) + ival_WSUN_to_D_end = Period(freq='D', year=2007, month=1, day=7) + ival_WSAT_to_D_start = Period(freq='D', year=2006, month=12, day=31) + ival_WSAT_to_D_end = Period(freq='D', year=2007, month=1, day=6) + ival_WFRI_to_D_start = Period(freq='D', year=2006, month=12, day=30) + ival_WFRI_to_D_end = Period(freq='D', year=2007, month=1, day=5) + ival_WTHU_to_D_start = Period(freq='D', year=2006, month=12, day=29) + ival_WTHU_to_D_end = Period(freq='D', year=2007, month=1, day=4) + ival_WWED_to_D_start = Period(freq='D', year=2006, month=12, day=28) + ival_WWED_to_D_end = Period(freq='D', year=2007, month=1, day=3) + ival_WTUE_to_D_start = Period(freq='D', year=2006, month=12, day=27) + ival_WTUE_to_D_end = Period(freq='D', year=2007, month=1, day=2) + ival_WMON_to_D_start = Period(freq='D', year=2006, month=12, day=26) + ival_WMON_to_D_end = Period(freq='D', year=2007, month=1, day=1) + + ival_W_end_of_year = Period(freq='W', year=2007, month=12, day=31) + ival_W_end_of_quarter = Period(freq='W', year=2007, month=3, day=31) + ival_W_end_of_month = Period(freq='W', year=2007, month=1, day=31) + ival_W_to_A = Period(freq='A', year=2007) + ival_W_to_Q = Period(freq='Q', year=2007, quarter=1) + ival_W_to_M = Period(freq='M', year=2007, month=1) + + if Period(freq='D', year=2007, month=12, day=31).weekday == 6: + ival_W_to_A_end_of_year = Period(freq='A', year=2007) + else: + ival_W_to_A_end_of_year = Period(freq='A', year=2008) + + if Period(freq='D', year=2007, month=3, day=31).weekday == 6: + ival_W_to_Q_end_of_quarter = Period(freq='Q', year=2007, quarter=1) + else: + ival_W_to_Q_end_of_quarter = Period(freq='Q', year=2007, quarter=2) + + if Period(freq='D', year=2007, month=1, day=31).weekday == 6: + ival_W_to_M_end_of_month = Period(freq='M', year=2007, month=1) + else: + ival_W_to_M_end_of_month = Period(freq='M', year=2007, month=2) + + ival_W_to_B_start = Period(freq='B', year=2007, month=1, day=1) + ival_W_to_B_end = Period(freq='B', year=2007, month=1, day=5) + ival_W_to_D_start = Period(freq='D', year=2007, month=1, day=1) + ival_W_to_D_end = Period(freq='D', year=2007, month=1, day=7) + ival_W_to_H_start = Period(freq='H', year=2007, month=1, day=1, hour=0) + ival_W_to_H_end = Period(freq='H', year=2007, month=1, day=7, hour=23) + ival_W_to_T_start = Period(freq='Min', year=2007, month=1, day=1, + hour=0, minute=0) + ival_W_to_T_end = Period(freq='Min', year=2007, month=1, day=7, + hour=23, minute=59) + ival_W_to_S_start = Period(freq='S', year=2007, month=1, day=1, hour=0, + minute=0, second=0) + ival_W_to_S_end = Period(freq='S', year=2007, month=1, day=7, hour=23, + minute=59, second=59) + + self.assertEqual(ival_W.asfreq('A'), ival_W_to_A) + self.assertEqual(ival_W_end_of_year.asfreq('A'), + ival_W_to_A_end_of_year) + self.assertEqual(ival_W.asfreq('Q'), ival_W_to_Q) + self.assertEqual(ival_W_end_of_quarter.asfreq('Q'), + ival_W_to_Q_end_of_quarter) + self.assertEqual(ival_W.asfreq('M'), ival_W_to_M) + self.assertEqual(ival_W_end_of_month.asfreq('M'), + ival_W_to_M_end_of_month) + + self.assertEqual(ival_W.asfreq('B', 'S'), ival_W_to_B_start) + self.assertEqual(ival_W.asfreq('B', 'E'), ival_W_to_B_end) + + self.assertEqual(ival_W.asfreq('D', 'S'), ival_W_to_D_start) + self.assertEqual(ival_W.asfreq('D', 'E'), ival_W_to_D_end) + + self.assertEqual(ival_WSUN.asfreq('D', 'S'), ival_WSUN_to_D_start) + self.assertEqual(ival_WSUN.asfreq('D', 'E'), ival_WSUN_to_D_end) + self.assertEqual(ival_WSAT.asfreq('D', 'S'), ival_WSAT_to_D_start) + self.assertEqual(ival_WSAT.asfreq('D', 'E'), ival_WSAT_to_D_end) + self.assertEqual(ival_WFRI.asfreq('D', 'S'), ival_WFRI_to_D_start) + self.assertEqual(ival_WFRI.asfreq('D', 'E'), ival_WFRI_to_D_end) + self.assertEqual(ival_WTHU.asfreq('D', 'S'), ival_WTHU_to_D_start) + self.assertEqual(ival_WTHU.asfreq('D', 'E'), ival_WTHU_to_D_end) + self.assertEqual(ival_WWED.asfreq('D', 'S'), ival_WWED_to_D_start) + self.assertEqual(ival_WWED.asfreq('D', 'E'), ival_WWED_to_D_end) + self.assertEqual(ival_WTUE.asfreq('D', 'S'), ival_WTUE_to_D_start) + self.assertEqual(ival_WTUE.asfreq('D', 'E'), ival_WTUE_to_D_end) + self.assertEqual(ival_WMON.asfreq('D', 'S'), ival_WMON_to_D_start) + self.assertEqual(ival_WMON.asfreq('D', 'E'), ival_WMON_to_D_end) + + self.assertEqual(ival_W.asfreq('H', 'S'), ival_W_to_H_start) + self.assertEqual(ival_W.asfreq('H', 'E'), ival_W_to_H_end) + self.assertEqual(ival_W.asfreq('Min', 'S'), ival_W_to_T_start) + self.assertEqual(ival_W.asfreq('Min', 'E'), ival_W_to_T_end) + self.assertEqual(ival_W.asfreq('S', 'S'), ival_W_to_S_start) + self.assertEqual(ival_W.asfreq('S', 'E'), ival_W_to_S_end) + + self.assertEqual(ival_W.asfreq('W'), ival_W) + + msg = pd.tseries.frequencies._INVALID_FREQ_ERROR + with self.assertRaisesRegexp(ValueError, msg): + ival_W.asfreq('WK') + + def test_conv_weekly_legacy(self): + # frequency conversion tests: from Weekly Frequency + msg = pd.tseries.frequencies._INVALID_FREQ_ERROR + with self.assertRaisesRegexp(ValueError, msg): + Period(freq='WK', year=2007, month=1, day=1) + + with self.assertRaisesRegexp(ValueError, msg): + Period(freq='WK-SAT', year=2007, month=1, day=6) + with self.assertRaisesRegexp(ValueError, msg): + Period(freq='WK-FRI', year=2007, month=1, day=5) + with self.assertRaisesRegexp(ValueError, msg): + Period(freq='WK-THU', year=2007, month=1, day=4) + with self.assertRaisesRegexp(ValueError, msg): + Period(freq='WK-WED', year=2007, month=1, day=3) + with self.assertRaisesRegexp(ValueError, msg): + Period(freq='WK-TUE', year=2007, month=1, day=2) + with self.assertRaisesRegexp(ValueError, msg): + Period(freq='WK-MON', year=2007, month=1, day=1) + + def test_conv_business(self): + # frequency conversion tests: from Business Frequency" + + ival_B = Period(freq='B', year=2007, month=1, day=1) + ival_B_end_of_year = Period(freq='B', year=2007, month=12, day=31) + ival_B_end_of_quarter = Period(freq='B', year=2007, month=3, day=30) + ival_B_end_of_month = Period(freq='B', year=2007, month=1, day=31) + ival_B_end_of_week = Period(freq='B', year=2007, month=1, day=5) + + ival_B_to_A = Period(freq='A', year=2007) + ival_B_to_Q = Period(freq='Q', year=2007, quarter=1) + ival_B_to_M = Period(freq='M', year=2007, month=1) + ival_B_to_W = Period(freq='W', year=2007, month=1, day=7) + ival_B_to_D = Period(freq='D', year=2007, month=1, day=1) + ival_B_to_H_start = Period(freq='H', year=2007, month=1, day=1, hour=0) + ival_B_to_H_end = Period(freq='H', year=2007, month=1, day=1, hour=23) + ival_B_to_T_start = Period(freq='Min', year=2007, month=1, day=1, + hour=0, minute=0) + ival_B_to_T_end = Period(freq='Min', year=2007, month=1, day=1, + hour=23, minute=59) + ival_B_to_S_start = Period(freq='S', year=2007, month=1, day=1, hour=0, + minute=0, second=0) + ival_B_to_S_end = Period(freq='S', year=2007, month=1, day=1, hour=23, + minute=59, second=59) + + self.assertEqual(ival_B.asfreq('A'), ival_B_to_A) + self.assertEqual(ival_B_end_of_year.asfreq('A'), ival_B_to_A) + self.assertEqual(ival_B.asfreq('Q'), ival_B_to_Q) + self.assertEqual(ival_B_end_of_quarter.asfreq('Q'), ival_B_to_Q) + self.assertEqual(ival_B.asfreq('M'), ival_B_to_M) + self.assertEqual(ival_B_end_of_month.asfreq('M'), ival_B_to_M) + self.assertEqual(ival_B.asfreq('W'), ival_B_to_W) + self.assertEqual(ival_B_end_of_week.asfreq('W'), ival_B_to_W) + + self.assertEqual(ival_B.asfreq('D'), ival_B_to_D) + + self.assertEqual(ival_B.asfreq('H', 'S'), ival_B_to_H_start) + self.assertEqual(ival_B.asfreq('H', 'E'), ival_B_to_H_end) + self.assertEqual(ival_B.asfreq('Min', 'S'), ival_B_to_T_start) + self.assertEqual(ival_B.asfreq('Min', 'E'), ival_B_to_T_end) + self.assertEqual(ival_B.asfreq('S', 'S'), ival_B_to_S_start) + self.assertEqual(ival_B.asfreq('S', 'E'), ival_B_to_S_end) + + self.assertEqual(ival_B.asfreq('B'), ival_B) + + def test_conv_daily(self): + # frequency conversion tests: from Business Frequency" + + ival_D = Period(freq='D', year=2007, month=1, day=1) + ival_D_end_of_year = Period(freq='D', year=2007, month=12, day=31) + ival_D_end_of_quarter = Period(freq='D', year=2007, month=3, day=31) + ival_D_end_of_month = Period(freq='D', year=2007, month=1, day=31) + ival_D_end_of_week = Period(freq='D', year=2007, month=1, day=7) + + ival_D_friday = Period(freq='D', year=2007, month=1, day=5) + ival_D_saturday = Period(freq='D', year=2007, month=1, day=6) + ival_D_sunday = Period(freq='D', year=2007, month=1, day=7) + + # TODO: unused? + # ival_D_monday = Period(freq='D', year=2007, month=1, day=8) + + ival_B_friday = Period(freq='B', year=2007, month=1, day=5) + ival_B_monday = Period(freq='B', year=2007, month=1, day=8) + + ival_D_to_A = Period(freq='A', year=2007) + + ival_Deoq_to_AJAN = Period(freq='A-JAN', year=2008) + ival_Deoq_to_AJUN = Period(freq='A-JUN', year=2007) + ival_Deoq_to_ADEC = Period(freq='A-DEC', year=2007) + + ival_D_to_QEJAN = Period(freq="Q-JAN", year=2007, quarter=4) + ival_D_to_QEJUN = Period(freq="Q-JUN", year=2007, quarter=3) + ival_D_to_QEDEC = Period(freq="Q-DEC", year=2007, quarter=1) + + ival_D_to_M = Period(freq='M', year=2007, month=1) + ival_D_to_W = Period(freq='W', year=2007, month=1, day=7) + + ival_D_to_H_start = Period(freq='H', year=2007, month=1, day=1, hour=0) + ival_D_to_H_end = Period(freq='H', year=2007, month=1, day=1, hour=23) + ival_D_to_T_start = Period(freq='Min', year=2007, month=1, day=1, + hour=0, minute=0) + ival_D_to_T_end = Period(freq='Min', year=2007, month=1, day=1, + hour=23, minute=59) + ival_D_to_S_start = Period(freq='S', year=2007, month=1, day=1, hour=0, + minute=0, second=0) + ival_D_to_S_end = Period(freq='S', year=2007, month=1, day=1, hour=23, + minute=59, second=59) + + self.assertEqual(ival_D.asfreq('A'), ival_D_to_A) + + self.assertEqual(ival_D_end_of_quarter.asfreq('A-JAN'), + ival_Deoq_to_AJAN) + self.assertEqual(ival_D_end_of_quarter.asfreq('A-JUN'), + ival_Deoq_to_AJUN) + self.assertEqual(ival_D_end_of_quarter.asfreq('A-DEC'), + ival_Deoq_to_ADEC) + + self.assertEqual(ival_D_end_of_year.asfreq('A'), ival_D_to_A) + self.assertEqual(ival_D_end_of_quarter.asfreq('Q'), ival_D_to_QEDEC) + self.assertEqual(ival_D.asfreq("Q-JAN"), ival_D_to_QEJAN) + self.assertEqual(ival_D.asfreq("Q-JUN"), ival_D_to_QEJUN) + self.assertEqual(ival_D.asfreq("Q-DEC"), ival_D_to_QEDEC) + self.assertEqual(ival_D.asfreq('M'), ival_D_to_M) + self.assertEqual(ival_D_end_of_month.asfreq('M'), ival_D_to_M) + self.assertEqual(ival_D.asfreq('W'), ival_D_to_W) + self.assertEqual(ival_D_end_of_week.asfreq('W'), ival_D_to_W) + + self.assertEqual(ival_D_friday.asfreq('B'), ival_B_friday) + self.assertEqual(ival_D_saturday.asfreq('B', 'S'), ival_B_friday) + self.assertEqual(ival_D_saturday.asfreq('B', 'E'), ival_B_monday) + self.assertEqual(ival_D_sunday.asfreq('B', 'S'), ival_B_friday) + self.assertEqual(ival_D_sunday.asfreq('B', 'E'), ival_B_monday) + + self.assertEqual(ival_D.asfreq('H', 'S'), ival_D_to_H_start) + self.assertEqual(ival_D.asfreq('H', 'E'), ival_D_to_H_end) + self.assertEqual(ival_D.asfreq('Min', 'S'), ival_D_to_T_start) + self.assertEqual(ival_D.asfreq('Min', 'E'), ival_D_to_T_end) + self.assertEqual(ival_D.asfreq('S', 'S'), ival_D_to_S_start) + self.assertEqual(ival_D.asfreq('S', 'E'), ival_D_to_S_end) + + self.assertEqual(ival_D.asfreq('D'), ival_D) + + def test_conv_hourly(self): + # frequency conversion tests: from Hourly Frequency" + + ival_H = Period(freq='H', year=2007, month=1, day=1, hour=0) + ival_H_end_of_year = Period(freq='H', year=2007, month=12, day=31, + hour=23) + ival_H_end_of_quarter = Period(freq='H', year=2007, month=3, day=31, + hour=23) + ival_H_end_of_month = Period(freq='H', year=2007, month=1, day=31, + hour=23) + ival_H_end_of_week = Period(freq='H', year=2007, month=1, day=7, + hour=23) + ival_H_end_of_day = Period(freq='H', year=2007, month=1, day=1, + hour=23) + ival_H_end_of_bus = Period(freq='H', year=2007, month=1, day=1, + hour=23) + + ival_H_to_A = Period(freq='A', year=2007) + ival_H_to_Q = Period(freq='Q', year=2007, quarter=1) + ival_H_to_M = Period(freq='M', year=2007, month=1) + ival_H_to_W = Period(freq='W', year=2007, month=1, day=7) + ival_H_to_D = Period(freq='D', year=2007, month=1, day=1) + ival_H_to_B = Period(freq='B', year=2007, month=1, day=1) + + ival_H_to_T_start = Period(freq='Min', year=2007, month=1, day=1, + hour=0, minute=0) + ival_H_to_T_end = Period(freq='Min', year=2007, month=1, day=1, hour=0, + minute=59) + ival_H_to_S_start = Period(freq='S', year=2007, month=1, day=1, hour=0, + minute=0, second=0) + ival_H_to_S_end = Period(freq='S', year=2007, month=1, day=1, hour=0, + minute=59, second=59) + + self.assertEqual(ival_H.asfreq('A'), ival_H_to_A) + self.assertEqual(ival_H_end_of_year.asfreq('A'), ival_H_to_A) + self.assertEqual(ival_H.asfreq('Q'), ival_H_to_Q) + self.assertEqual(ival_H_end_of_quarter.asfreq('Q'), ival_H_to_Q) + self.assertEqual(ival_H.asfreq('M'), ival_H_to_M) + self.assertEqual(ival_H_end_of_month.asfreq('M'), ival_H_to_M) + self.assertEqual(ival_H.asfreq('W'), ival_H_to_W) + self.assertEqual(ival_H_end_of_week.asfreq('W'), ival_H_to_W) + self.assertEqual(ival_H.asfreq('D'), ival_H_to_D) + self.assertEqual(ival_H_end_of_day.asfreq('D'), ival_H_to_D) + self.assertEqual(ival_H.asfreq('B'), ival_H_to_B) + self.assertEqual(ival_H_end_of_bus.asfreq('B'), ival_H_to_B) + + self.assertEqual(ival_H.asfreq('Min', 'S'), ival_H_to_T_start) + self.assertEqual(ival_H.asfreq('Min', 'E'), ival_H_to_T_end) + self.assertEqual(ival_H.asfreq('S', 'S'), ival_H_to_S_start) + self.assertEqual(ival_H.asfreq('S', 'E'), ival_H_to_S_end) + + self.assertEqual(ival_H.asfreq('H'), ival_H) + + def test_conv_minutely(self): + # frequency conversion tests: from Minutely Frequency" + + ival_T = Period(freq='Min', year=2007, month=1, day=1, hour=0, + minute=0) + ival_T_end_of_year = Period(freq='Min', year=2007, month=12, day=31, + hour=23, minute=59) + ival_T_end_of_quarter = Period(freq='Min', year=2007, month=3, day=31, + hour=23, minute=59) + ival_T_end_of_month = Period(freq='Min', year=2007, month=1, day=31, + hour=23, minute=59) + ival_T_end_of_week = Period(freq='Min', year=2007, month=1, day=7, + hour=23, minute=59) + ival_T_end_of_day = Period(freq='Min', year=2007, month=1, day=1, + hour=23, minute=59) + ival_T_end_of_bus = Period(freq='Min', year=2007, month=1, day=1, + hour=23, minute=59) + ival_T_end_of_hour = Period(freq='Min', year=2007, month=1, day=1, + hour=0, minute=59) + + ival_T_to_A = Period(freq='A', year=2007) + ival_T_to_Q = Period(freq='Q', year=2007, quarter=1) + ival_T_to_M = Period(freq='M', year=2007, month=1) + ival_T_to_W = Period(freq='W', year=2007, month=1, day=7) + ival_T_to_D = Period(freq='D', year=2007, month=1, day=1) + ival_T_to_B = Period(freq='B', year=2007, month=1, day=1) + ival_T_to_H = Period(freq='H', year=2007, month=1, day=1, hour=0) + + ival_T_to_S_start = Period(freq='S', year=2007, month=1, day=1, hour=0, + minute=0, second=0) + ival_T_to_S_end = Period(freq='S', year=2007, month=1, day=1, hour=0, + minute=0, second=59) + + self.assertEqual(ival_T.asfreq('A'), ival_T_to_A) + self.assertEqual(ival_T_end_of_year.asfreq('A'), ival_T_to_A) + self.assertEqual(ival_T.asfreq('Q'), ival_T_to_Q) + self.assertEqual(ival_T_end_of_quarter.asfreq('Q'), ival_T_to_Q) + self.assertEqual(ival_T.asfreq('M'), ival_T_to_M) + self.assertEqual(ival_T_end_of_month.asfreq('M'), ival_T_to_M) + self.assertEqual(ival_T.asfreq('W'), ival_T_to_W) + self.assertEqual(ival_T_end_of_week.asfreq('W'), ival_T_to_W) + self.assertEqual(ival_T.asfreq('D'), ival_T_to_D) + self.assertEqual(ival_T_end_of_day.asfreq('D'), ival_T_to_D) + self.assertEqual(ival_T.asfreq('B'), ival_T_to_B) + self.assertEqual(ival_T_end_of_bus.asfreq('B'), ival_T_to_B) + self.assertEqual(ival_T.asfreq('H'), ival_T_to_H) + self.assertEqual(ival_T_end_of_hour.asfreq('H'), ival_T_to_H) + + self.assertEqual(ival_T.asfreq('S', 'S'), ival_T_to_S_start) + self.assertEqual(ival_T.asfreq('S', 'E'), ival_T_to_S_end) + + self.assertEqual(ival_T.asfreq('Min'), ival_T) + + def test_conv_secondly(self): + # frequency conversion tests: from Secondly Frequency" + + ival_S = Period(freq='S', year=2007, month=1, day=1, hour=0, minute=0, + second=0) + ival_S_end_of_year = Period(freq='S', year=2007, month=12, day=31, + hour=23, minute=59, second=59) + ival_S_end_of_quarter = Period(freq='S', year=2007, month=3, day=31, + hour=23, minute=59, second=59) + ival_S_end_of_month = Period(freq='S', year=2007, month=1, day=31, + hour=23, minute=59, second=59) + ival_S_end_of_week = Period(freq='S', year=2007, month=1, day=7, + hour=23, minute=59, second=59) + ival_S_end_of_day = Period(freq='S', year=2007, month=1, day=1, + hour=23, minute=59, second=59) + ival_S_end_of_bus = Period(freq='S', year=2007, month=1, day=1, + hour=23, minute=59, second=59) + ival_S_end_of_hour = Period(freq='S', year=2007, month=1, day=1, + hour=0, minute=59, second=59) + ival_S_end_of_minute = Period(freq='S', year=2007, month=1, day=1, + hour=0, minute=0, second=59) + + ival_S_to_A = Period(freq='A', year=2007) + ival_S_to_Q = Period(freq='Q', year=2007, quarter=1) + ival_S_to_M = Period(freq='M', year=2007, month=1) + ival_S_to_W = Period(freq='W', year=2007, month=1, day=7) + ival_S_to_D = Period(freq='D', year=2007, month=1, day=1) + ival_S_to_B = Period(freq='B', year=2007, month=1, day=1) + ival_S_to_H = Period(freq='H', year=2007, month=1, day=1, hour=0) + ival_S_to_T = Period(freq='Min', year=2007, month=1, day=1, hour=0, + minute=0) + + self.assertEqual(ival_S.asfreq('A'), ival_S_to_A) + self.assertEqual(ival_S_end_of_year.asfreq('A'), ival_S_to_A) + self.assertEqual(ival_S.asfreq('Q'), ival_S_to_Q) + self.assertEqual(ival_S_end_of_quarter.asfreq('Q'), ival_S_to_Q) + self.assertEqual(ival_S.asfreq('M'), ival_S_to_M) + self.assertEqual(ival_S_end_of_month.asfreq('M'), ival_S_to_M) + self.assertEqual(ival_S.asfreq('W'), ival_S_to_W) + self.assertEqual(ival_S_end_of_week.asfreq('W'), ival_S_to_W) + self.assertEqual(ival_S.asfreq('D'), ival_S_to_D) + self.assertEqual(ival_S_end_of_day.asfreq('D'), ival_S_to_D) + self.assertEqual(ival_S.asfreq('B'), ival_S_to_B) + self.assertEqual(ival_S_end_of_bus.asfreq('B'), ival_S_to_B) + self.assertEqual(ival_S.asfreq('H'), ival_S_to_H) + self.assertEqual(ival_S_end_of_hour.asfreq('H'), ival_S_to_H) + self.assertEqual(ival_S.asfreq('Min'), ival_S_to_T) + self.assertEqual(ival_S_end_of_minute.asfreq('Min'), ival_S_to_T) + + self.assertEqual(ival_S.asfreq('S'), ival_S) + + def test_asfreq_mult(self): + # normal freq to mult freq + p = Period(freq='A', year=2007) + # ordinal will not change + for freq in ['3A', offsets.YearEnd(3)]: + result = p.asfreq(freq) + expected = Period('2007', freq='3A') + + self.assertEqual(result, expected) + self.assertEqual(result.ordinal, expected.ordinal) + self.assertEqual(result.freq, expected.freq) + # ordinal will not change + for freq in ['3A', offsets.YearEnd(3)]: + result = p.asfreq(freq, how='S') + expected = Period('2007', freq='3A') + + self.assertEqual(result, expected) + self.assertEqual(result.ordinal, expected.ordinal) + self.assertEqual(result.freq, expected.freq) + + # mult freq to normal freq + p = Period(freq='3A', year=2007) + # ordinal will change because how=E is the default + for freq in ['A', offsets.YearEnd()]: + result = p.asfreq(freq) + expected = Period('2009', freq='A') + + self.assertEqual(result, expected) + self.assertEqual(result.ordinal, expected.ordinal) + self.assertEqual(result.freq, expected.freq) + # ordinal will not change + for freq in ['A', offsets.YearEnd()]: + result = p.asfreq(freq, how='S') + expected = Period('2007', freq='A') + + self.assertEqual(result, expected) + self.assertEqual(result.ordinal, expected.ordinal) + self.assertEqual(result.freq, expected.freq) + + p = Period(freq='A', year=2007) + for freq in ['2M', offsets.MonthEnd(2)]: + result = p.asfreq(freq) + expected = Period('2007-12', freq='2M') + + self.assertEqual(result, expected) + self.assertEqual(result.ordinal, expected.ordinal) + self.assertEqual(result.freq, expected.freq) + for freq in ['2M', offsets.MonthEnd(2)]: + result = p.asfreq(freq, how='S') + expected = Period('2007-01', freq='2M') + + self.assertEqual(result, expected) + self.assertEqual(result.ordinal, expected.ordinal) + self.assertEqual(result.freq, expected.freq) + + p = Period(freq='3A', year=2007) + for freq in ['2M', offsets.MonthEnd(2)]: + result = p.asfreq(freq) + expected = Period('2009-12', freq='2M') + + self.assertEqual(result, expected) + self.assertEqual(result.ordinal, expected.ordinal) + self.assertEqual(result.freq, expected.freq) + for freq in ['2M', offsets.MonthEnd(2)]: + result = p.asfreq(freq, how='S') + expected = Period('2007-01', freq='2M') + + self.assertEqual(result, expected) + self.assertEqual(result.ordinal, expected.ordinal) + self.assertEqual(result.freq, expected.freq) + + def test_asfreq_combined(self): + # normal freq to combined freq + p = Period('2007', freq='H') + + # ordinal will not change + expected = Period('2007', freq='25H') + for freq, how in zip(['1D1H', '1H1D'], ['E', 'S']): + result = p.asfreq(freq, how=how) + self.assertEqual(result, expected) + self.assertEqual(result.ordinal, expected.ordinal) + self.assertEqual(result.freq, expected.freq) + + # combined freq to normal freq + p1 = Period(freq='1D1H', year=2007) + p2 = Period(freq='1H1D', year=2007) + + # ordinal will change because how=E is the default + result1 = p1.asfreq('H') + result2 = p2.asfreq('H') + expected = Period('2007-01-02', freq='H') + self.assertEqual(result1, expected) + self.assertEqual(result1.ordinal, expected.ordinal) + self.assertEqual(result1.freq, expected.freq) + self.assertEqual(result2, expected) + self.assertEqual(result2.ordinal, expected.ordinal) + self.assertEqual(result2.freq, expected.freq) + + # ordinal will not change + result1 = p1.asfreq('H', how='S') + result2 = p2.asfreq('H', how='S') + expected = Period('2007-01-01', freq='H') + self.assertEqual(result1, expected) + self.assertEqual(result1.ordinal, expected.ordinal) + self.assertEqual(result1.freq, expected.freq) + self.assertEqual(result2, expected) + self.assertEqual(result2.ordinal, expected.ordinal) + self.assertEqual(result2.freq, expected.freq) + + def test_is_leap_year(self): + # GH 13727 + for freq in ['A', 'M', 'D', 'H']: + p = Period('2000-01-01 00:00:00', freq=freq) + self.assertTrue(p.is_leap_year) + self.assertIsInstance(p.is_leap_year, bool) + + p = Period('1999-01-01 00:00:00', freq=freq) + self.assertFalse(p.is_leap_year) + + p = Period('2004-01-01 00:00:00', freq=freq) + self.assertTrue(p.is_leap_year) + + p = Period('2100-01-01 00:00:00', freq=freq) + self.assertFalse(p.is_leap_year) + + +class TestMethods(tm.TestCase): + + def test_add(self): + dt1 = Period(freq='D', year=2008, month=1, day=1) + dt2 = Period(freq='D', year=2008, month=1, day=2) + self.assertEqual(dt1 + 1, dt2) + self.assertEqual(1 + dt1, dt2) + + def test_add_pdnat(self): + p = pd.Period('2011-01', freq='M') + self.assertIs(p + pd.NaT, pd.NaT) + self.assertIs(pd.NaT + p, pd.NaT) + + p = pd.Period('NaT', freq='M') + self.assertIs(p + pd.NaT, pd.NaT) + self.assertIs(pd.NaT + p, pd.NaT) + + def test_add_raises(self): + # GH 4731 + dt1 = Period(freq='D', year=2008, month=1, day=1) + dt2 = Period(freq='D', year=2008, month=1, day=2) + msg = r"unsupported operand type\(s\)" + with tm.assertRaisesRegexp(TypeError, msg): + dt1 + "str" + + msg = r"unsupported operand type\(s\)" + with tm.assertRaisesRegexp(TypeError, msg): + "str" + dt1 + + with tm.assertRaisesRegexp(TypeError, msg): + dt1 + dt2 + + def test_sub(self): + dt1 = Period('2011-01-01', freq='D') + dt2 = Period('2011-01-15', freq='D') + + self.assertEqual(dt1 - dt2, -14) + self.assertEqual(dt2 - dt1, 14) + + msg = r"Input has different freq=M from Period\(freq=D\)" + with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): + dt1 - pd.Period('2011-02', freq='M') + + def test_add_offset(self): + # freq is DateOffset + for freq in ['A', '2A', '3A']: + p = Period('2011', freq=freq) + exp = Period('2013', freq=freq) + self.assertEqual(p + offsets.YearEnd(2), exp) + self.assertEqual(offsets.YearEnd(2) + p, exp) + + for o in [offsets.YearBegin(2), offsets.MonthBegin(1), + offsets.Minute(), np.timedelta64(365, 'D'), + timedelta(365)]: + with tm.assertRaises(period.IncompatibleFrequency): + p + o + + if isinstance(o, np.timedelta64): + with tm.assertRaises(TypeError): + o + p + else: + with tm.assertRaises(period.IncompatibleFrequency): + o + p + + for freq in ['M', '2M', '3M']: + p = Period('2011-03', freq=freq) + exp = Period('2011-05', freq=freq) + self.assertEqual(p + offsets.MonthEnd(2), exp) + self.assertEqual(offsets.MonthEnd(2) + p, exp) + + exp = Period('2012-03', freq=freq) + self.assertEqual(p + offsets.MonthEnd(12), exp) + self.assertEqual(offsets.MonthEnd(12) + p, exp) + + for o in [offsets.YearBegin(2), offsets.MonthBegin(1), + offsets.Minute(), np.timedelta64(365, 'D'), + timedelta(365)]: + with tm.assertRaises(period.IncompatibleFrequency): + p + o + + if isinstance(o, np.timedelta64): + with tm.assertRaises(TypeError): + o + p + else: + with tm.assertRaises(period.IncompatibleFrequency): + o + p + + # freq is Tick + for freq in ['D', '2D', '3D']: + p = Period('2011-04-01', freq=freq) + + exp = Period('2011-04-06', freq=freq) + self.assertEqual(p + offsets.Day(5), exp) + self.assertEqual(offsets.Day(5) + p, exp) + + exp = Period('2011-04-02', freq=freq) + self.assertEqual(p + offsets.Hour(24), exp) + self.assertEqual(offsets.Hour(24) + p, exp) + + exp = Period('2011-04-03', freq=freq) + self.assertEqual(p + np.timedelta64(2, 'D'), exp) + with tm.assertRaises(TypeError): + np.timedelta64(2, 'D') + p + + exp = Period('2011-04-02', freq=freq) + self.assertEqual(p + np.timedelta64(3600 * 24, 's'), exp) + with tm.assertRaises(TypeError): + np.timedelta64(3600 * 24, 's') + p + + exp = Period('2011-03-30', freq=freq) + self.assertEqual(p + timedelta(-2), exp) + self.assertEqual(timedelta(-2) + p, exp) + + exp = Period('2011-04-03', freq=freq) + self.assertEqual(p + timedelta(hours=48), exp) + self.assertEqual(timedelta(hours=48) + p, exp) + + for o in [offsets.YearBegin(2), offsets.MonthBegin(1), + offsets.Minute(), np.timedelta64(4, 'h'), + timedelta(hours=23)]: + with tm.assertRaises(period.IncompatibleFrequency): + p + o + + if isinstance(o, np.timedelta64): + with tm.assertRaises(TypeError): + o + p + else: + with tm.assertRaises(period.IncompatibleFrequency): + o + p + + for freq in ['H', '2H', '3H']: + p = Period('2011-04-01 09:00', freq=freq) + + exp = Period('2011-04-03 09:00', freq=freq) + self.assertEqual(p + offsets.Day(2), exp) + self.assertEqual(offsets.Day(2) + p, exp) + + exp = Period('2011-04-01 12:00', freq=freq) + self.assertEqual(p + offsets.Hour(3), exp) + self.assertEqual(offsets.Hour(3) + p, exp) + + exp = Period('2011-04-01 12:00', freq=freq) + self.assertEqual(p + np.timedelta64(3, 'h'), exp) + with tm.assertRaises(TypeError): + np.timedelta64(3, 'h') + p + + exp = Period('2011-04-01 10:00', freq=freq) + self.assertEqual(p + np.timedelta64(3600, 's'), exp) + with tm.assertRaises(TypeError): + np.timedelta64(3600, 's') + p + + exp = Period('2011-04-01 11:00', freq=freq) + self.assertEqual(p + timedelta(minutes=120), exp) + self.assertEqual(timedelta(minutes=120) + p, exp) + + exp = Period('2011-04-05 12:00', freq=freq) + self.assertEqual(p + timedelta(days=4, minutes=180), exp) + self.assertEqual(timedelta(days=4, minutes=180) + p, exp) + + for o in [offsets.YearBegin(2), offsets.MonthBegin(1), + offsets.Minute(), np.timedelta64(3200, 's'), + timedelta(hours=23, minutes=30)]: + with tm.assertRaises(period.IncompatibleFrequency): + p + o + + if isinstance(o, np.timedelta64): + with tm.assertRaises(TypeError): + o + p + else: + with tm.assertRaises(period.IncompatibleFrequency): + o + p + + def test_add_offset_nat(self): + # freq is DateOffset + for freq in ['A', '2A', '3A']: + p = Period('NaT', freq=freq) + for o in [offsets.YearEnd(2)]: + self.assertIs(p + o, tslib.NaT) + self.assertIs(o + p, tslib.NaT) + + for o in [offsets.YearBegin(2), offsets.MonthBegin(1), + offsets.Minute(), np.timedelta64(365, 'D'), + timedelta(365)]: + self.assertIs(p + o, tslib.NaT) + + if isinstance(o, np.timedelta64): + with tm.assertRaises(TypeError): + o + p + else: + self.assertIs(o + p, tslib.NaT) + + for freq in ['M', '2M', '3M']: + p = Period('NaT', freq=freq) + for o in [offsets.MonthEnd(2), offsets.MonthEnd(12)]: + self.assertIs(p + o, tslib.NaT) + + if isinstance(o, np.timedelta64): + with tm.assertRaises(TypeError): + o + p + else: + self.assertIs(o + p, tslib.NaT) + + for o in [offsets.YearBegin(2), offsets.MonthBegin(1), + offsets.Minute(), np.timedelta64(365, 'D'), + timedelta(365)]: + self.assertIs(p + o, tslib.NaT) + + if isinstance(o, np.timedelta64): + with tm.assertRaises(TypeError): + o + p + else: + self.assertIs(o + p, tslib.NaT) + + # freq is Tick + for freq in ['D', '2D', '3D']: + p = Period('NaT', freq=freq) + for o in [offsets.Day(5), offsets.Hour(24), np.timedelta64(2, 'D'), + np.timedelta64(3600 * 24, 's'), timedelta(-2), + timedelta(hours=48)]: + self.assertIs(p + o, tslib.NaT) + + if isinstance(o, np.timedelta64): + with tm.assertRaises(TypeError): + o + p + else: + self.assertIs(o + p, tslib.NaT) + + for o in [offsets.YearBegin(2), offsets.MonthBegin(1), + offsets.Minute(), np.timedelta64(4, 'h'), + timedelta(hours=23)]: + self.assertIs(p + o, tslib.NaT) + + if isinstance(o, np.timedelta64): + with tm.assertRaises(TypeError): + o + p + else: + self.assertIs(o + p, tslib.NaT) + + for freq in ['H', '2H', '3H']: + p = Period('NaT', freq=freq) + for o in [offsets.Day(2), offsets.Hour(3), np.timedelta64(3, 'h'), + np.timedelta64(3600, 's'), timedelta(minutes=120), + timedelta(days=4, minutes=180)]: + self.assertIs(p + o, tslib.NaT) + + if not isinstance(o, np.timedelta64): + self.assertIs(o + p, tslib.NaT) + + for o in [offsets.YearBegin(2), offsets.MonthBegin(1), + offsets.Minute(), np.timedelta64(3200, 's'), + timedelta(hours=23, minutes=30)]: + self.assertIs(p + o, tslib.NaT) + + if isinstance(o, np.timedelta64): + with tm.assertRaises(TypeError): + o + p + else: + self.assertIs(o + p, tslib.NaT) + + def test_sub_pdnat(self): + # GH 13071 + p = pd.Period('2011-01', freq='M') + self.assertIs(p - pd.NaT, pd.NaT) + self.assertIs(pd.NaT - p, pd.NaT) + + p = pd.Period('NaT', freq='M') + self.assertIs(p - pd.NaT, pd.NaT) + self.assertIs(pd.NaT - p, pd.NaT) + + def test_sub_offset(self): + # freq is DateOffset + for freq in ['A', '2A', '3A']: + p = Period('2011', freq=freq) + self.assertEqual(p - offsets.YearEnd(2), Period('2009', freq=freq)) + + for o in [offsets.YearBegin(2), offsets.MonthBegin(1), + offsets.Minute(), np.timedelta64(365, 'D'), + timedelta(365)]: + with tm.assertRaises(period.IncompatibleFrequency): + p - o + + for freq in ['M', '2M', '3M']: + p = Period('2011-03', freq=freq) + self.assertEqual(p - offsets.MonthEnd(2), + Period('2011-01', freq=freq)) + self.assertEqual(p - offsets.MonthEnd(12), + Period('2010-03', freq=freq)) + + for o in [offsets.YearBegin(2), offsets.MonthBegin(1), + offsets.Minute(), np.timedelta64(365, 'D'), + timedelta(365)]: + with tm.assertRaises(period.IncompatibleFrequency): + p - o + + # freq is Tick + for freq in ['D', '2D', '3D']: + p = Period('2011-04-01', freq=freq) + self.assertEqual(p - offsets.Day(5), + Period('2011-03-27', freq=freq)) + self.assertEqual(p - offsets.Hour(24), + Period('2011-03-31', freq=freq)) + self.assertEqual(p - np.timedelta64(2, 'D'), + Period('2011-03-30', freq=freq)) + self.assertEqual(p - np.timedelta64(3600 * 24, 's'), + Period('2011-03-31', freq=freq)) + self.assertEqual(p - timedelta(-2), + Period('2011-04-03', freq=freq)) + self.assertEqual(p - timedelta(hours=48), + Period('2011-03-30', freq=freq)) + + for o in [offsets.YearBegin(2), offsets.MonthBegin(1), + offsets.Minute(), np.timedelta64(4, 'h'), + timedelta(hours=23)]: + with tm.assertRaises(period.IncompatibleFrequency): + p - o + + for freq in ['H', '2H', '3H']: + p = Period('2011-04-01 09:00', freq=freq) + self.assertEqual(p - offsets.Day(2), + Period('2011-03-30 09:00', freq=freq)) + self.assertEqual(p - offsets.Hour(3), + Period('2011-04-01 06:00', freq=freq)) + self.assertEqual(p - np.timedelta64(3, 'h'), + Period('2011-04-01 06:00', freq=freq)) + self.assertEqual(p - np.timedelta64(3600, 's'), + Period('2011-04-01 08:00', freq=freq)) + self.assertEqual(p - timedelta(minutes=120), + Period('2011-04-01 07:00', freq=freq)) + self.assertEqual(p - timedelta(days=4, minutes=180), + Period('2011-03-28 06:00', freq=freq)) + + for o in [offsets.YearBegin(2), offsets.MonthBegin(1), + offsets.Minute(), np.timedelta64(3200, 's'), + timedelta(hours=23, minutes=30)]: + with tm.assertRaises(period.IncompatibleFrequency): + p - o + + def test_sub_offset_nat(self): + # freq is DateOffset + for freq in ['A', '2A', '3A']: + p = Period('NaT', freq=freq) + for o in [offsets.YearEnd(2)]: + self.assertIs(p - o, tslib.NaT) + + for o in [offsets.YearBegin(2), offsets.MonthBegin(1), + offsets.Minute(), np.timedelta64(365, 'D'), + timedelta(365)]: + self.assertIs(p - o, tslib.NaT) + + for freq in ['M', '2M', '3M']: + p = Period('NaT', freq=freq) + for o in [offsets.MonthEnd(2), offsets.MonthEnd(12)]: + self.assertIs(p - o, tslib.NaT) + + for o in [offsets.YearBegin(2), offsets.MonthBegin(1), + offsets.Minute(), np.timedelta64(365, 'D'), + timedelta(365)]: + self.assertIs(p - o, tslib.NaT) + + # freq is Tick + for freq in ['D', '2D', '3D']: + p = Period('NaT', freq=freq) + for o in [offsets.Day(5), offsets.Hour(24), np.timedelta64(2, 'D'), + np.timedelta64(3600 * 24, 's'), timedelta(-2), + timedelta(hours=48)]: + self.assertIs(p - o, tslib.NaT) + + for o in [offsets.YearBegin(2), offsets.MonthBegin(1), + offsets.Minute(), np.timedelta64(4, 'h'), + timedelta(hours=23)]: + self.assertIs(p - o, tslib.NaT) + + for freq in ['H', '2H', '3H']: + p = Period('NaT', freq=freq) + for o in [offsets.Day(2), offsets.Hour(3), np.timedelta64(3, 'h'), + np.timedelta64(3600, 's'), timedelta(minutes=120), + timedelta(days=4, minutes=180)]: + self.assertIs(p - o, tslib.NaT) + + for o in [offsets.YearBegin(2), offsets.MonthBegin(1), + offsets.Minute(), np.timedelta64(3200, 's'), + timedelta(hours=23, minutes=30)]: + self.assertIs(p - o, tslib.NaT) + + def test_nat_ops(self): + for freq in ['M', '2M', '3M']: + p = Period('NaT', freq=freq) + self.assertIs(p + 1, tslib.NaT) + self.assertIs(1 + p, tslib.NaT) + self.assertIs(p - 1, tslib.NaT) + self.assertIs(p - Period('2011-01', freq=freq), tslib.NaT) + self.assertIs(Period('2011-01', freq=freq) - p, tslib.NaT) + + def test_period_ops_offset(self): + p = Period('2011-04-01', freq='D') + result = p + offsets.Day() + exp = pd.Period('2011-04-02', freq='D') + self.assertEqual(result, exp) + + result = p - offsets.Day(2) + exp = pd.Period('2011-03-30', freq='D') + self.assertEqual(result, exp) + + msg = r"Input cannot be converted to Period\(freq=D\)" + with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): + p + offsets.Hour(2) + + with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): + p - offsets.Hour(2) diff --git a/pandas/tests/series/test_period.py b/pandas/tests/series/test_period.py new file mode 100644 index 0000000000000..f1ae7765648ca --- /dev/null +++ b/pandas/tests/series/test_period.py @@ -0,0 +1,248 @@ +import numpy as np + +import pandas as pd +import pandas.util.testing as tm +import pandas.tseries.period as period +from pandas import Series, period_range, DataFrame, Period + + +def _permute(obj): + return obj.take(np.random.permutation(len(obj))) + + +class TestSeriesPeriod(tm.TestCase): + + def setUp(self): + self.series = Series(period_range('2000-01-01', periods=10, freq='D')) + + def test_auto_conversion(self): + series = Series(list(period_range('2000-01-01', periods=10, freq='D'))) + self.assertEqual(series.dtype, 'object') + + series = pd.Series([pd.Period('2011-01-01', freq='D'), + pd.Period('2011-02-01', freq='D')]) + self.assertEqual(series.dtype, 'object') + + def test_getitem(self): + self.assertEqual(self.series[1], pd.Period('2000-01-02', freq='D')) + + result = self.series[[2, 4]] + exp = pd.Series([pd.Period('2000-01-03', freq='D'), + pd.Period('2000-01-05', freq='D')], + index=[2, 4]) + self.assert_series_equal(result, exp) + self.assertEqual(result.dtype, 'object') + + def test_isnull(self): + # GH 13737 + s = Series([pd.Period('2011-01', freq='M'), + pd.Period('NaT', freq='M')]) + tm.assert_series_equal(s.isnull(), Series([False, True])) + tm.assert_series_equal(s.notnull(), Series([True, False])) + + def test_fillna(self): + # GH 13737 + s = Series([pd.Period('2011-01', freq='M'), + pd.Period('NaT', freq='M')]) + + res = s.fillna(pd.Period('2012-01', freq='M')) + exp = Series([pd.Period('2011-01', freq='M'), + pd.Period('2012-01', freq='M')]) + tm.assert_series_equal(res, exp) + self.assertEqual(res.dtype, 'object') + + res = s.fillna('XXX') + exp = Series([pd.Period('2011-01', freq='M'), 'XXX']) + tm.assert_series_equal(res, exp) + self.assertEqual(res.dtype, 'object') + + def test_dropna(self): + # GH 13737 + s = Series([pd.Period('2011-01', freq='M'), + pd.Period('NaT', freq='M')]) + tm.assert_series_equal(s.dropna(), + Series([pd.Period('2011-01', freq='M')])) + + def test_series_comparison_scalars(self): + val = pd.Period('2000-01-04', freq='D') + result = self.series > val + expected = pd.Series([x > val for x in self.series]) + tm.assert_series_equal(result, expected) + + val = self.series[5] + result = self.series > val + expected = pd.Series([x > val for x in self.series]) + tm.assert_series_equal(result, expected) + + def test_between(self): + left, right = self.series[[2, 7]] + result = self.series.between(left, right) + expected = (self.series >= left) & (self.series <= right) + tm.assert_series_equal(result, expected) + + # --------------------------------------------------------------------- + # NaT support + + """ + # ToDo: Enable when support period dtype + def test_NaT_scalar(self): + series = Series([0, 1000, 2000, iNaT], dtype='period[D]') + + val = series[3] + self.assertTrue(isnull(val)) + + series[2] = val + self.assertTrue(isnull(series[2])) + + def test_NaT_cast(self): + result = Series([np.nan]).astype('period[D]') + expected = Series([NaT]) + tm.assert_series_equal(result, expected) + """ + + def test_set_none_nan(self): + # currently Period is stored as object dtype, not as NaT + self.series[3] = None + self.assertIs(self.series[3], None) + + self.series[3:5] = None + self.assertIs(self.series[4], None) + + self.series[5] = np.nan + self.assertTrue(np.isnan(self.series[5])) + + self.series[5:7] = np.nan + self.assertTrue(np.isnan(self.series[6])) + + def test_intercept_astype_object(self): + expected = self.series.astype('object') + + df = DataFrame({'a': self.series, + 'b': np.random.randn(len(self.series))}) + + result = df.values.squeeze() + self.assertTrue((result[:, 0] == expected.values).all()) + + df = DataFrame({'a': self.series, 'b': ['foo'] * len(self.series)}) + + result = df.values.squeeze() + self.assertTrue((result[:, 0] == expected.values).all()) + + def test_comp_series_period_scalar(self): + # GH 13200 + for freq in ['M', '2M', '3M']: + base = Series([Period(x, freq=freq) for x in + ['2011-01', '2011-02', '2011-03', '2011-04']]) + p = Period('2011-02', freq=freq) + + exp = pd.Series([False, True, False, False]) + tm.assert_series_equal(base == p, exp) + tm.assert_series_equal(p == base, exp) + + exp = pd.Series([True, False, True, True]) + tm.assert_series_equal(base != p, exp) + tm.assert_series_equal(p != base, exp) + + exp = pd.Series([False, False, True, True]) + tm.assert_series_equal(base > p, exp) + tm.assert_series_equal(p < base, exp) + + exp = pd.Series([True, False, False, False]) + tm.assert_series_equal(base < p, exp) + tm.assert_series_equal(p > base, exp) + + exp = pd.Series([False, True, True, True]) + tm.assert_series_equal(base >= p, exp) + tm.assert_series_equal(p <= base, exp) + + exp = pd.Series([True, True, False, False]) + tm.assert_series_equal(base <= p, exp) + tm.assert_series_equal(p >= base, exp) + + # different base freq + msg = "Input has different freq=A-DEC from Period" + with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): + base <= Period('2011', freq='A') + + with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): + Period('2011', freq='A') >= base + + def test_comp_series_period_series(self): + # GH 13200 + for freq in ['M', '2M', '3M']: + base = Series([Period(x, freq=freq) for x in + ['2011-01', '2011-02', '2011-03', '2011-04']]) + + s = Series([Period(x, freq=freq) for x in + ['2011-02', '2011-01', '2011-03', '2011-05']]) + + exp = Series([False, False, True, False]) + tm.assert_series_equal(base == s, exp) + + exp = Series([True, True, False, True]) + tm.assert_series_equal(base != s, exp) + + exp = Series([False, True, False, False]) + tm.assert_series_equal(base > s, exp) + + exp = Series([True, False, False, True]) + tm.assert_series_equal(base < s, exp) + + exp = Series([False, True, True, False]) + tm.assert_series_equal(base >= s, exp) + + exp = Series([True, False, True, True]) + tm.assert_series_equal(base <= s, exp) + + s2 = Series([Period(x, freq='A') for x in + ['2011', '2011', '2011', '2011']]) + + # different base freq + msg = "Input has different freq=A-DEC from Period" + with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): + base <= s2 + + def test_comp_series_period_object(self): + # GH 13200 + base = Series([Period('2011', freq='A'), Period('2011-02', freq='M'), + Period('2013', freq='A'), Period('2011-04', freq='M')]) + + s = Series([Period('2012', freq='A'), Period('2011-01', freq='M'), + Period('2013', freq='A'), Period('2011-05', freq='M')]) + + exp = Series([False, False, True, False]) + tm.assert_series_equal(base == s, exp) + + exp = Series([True, True, False, True]) + tm.assert_series_equal(base != s, exp) + + exp = Series([False, True, False, False]) + tm.assert_series_equal(base > s, exp) + + exp = Series([True, False, False, True]) + tm.assert_series_equal(base < s, exp) + + exp = Series([False, True, True, False]) + tm.assert_series_equal(base >= s, exp) + + exp = Series([True, False, True, True]) + tm.assert_series_equal(base <= s, exp) + + def test_align_series(self): + rng = period_range('1/1/2000', '1/1/2010', freq='A') + ts = Series(np.random.randn(len(rng)), index=rng) + + result = ts + ts[::2] + expected = ts + ts + expected[1::2] = np.nan + tm.assert_series_equal(result, expected) + + result = ts + _permute(ts[::2]) + tm.assert_series_equal(result, expected) + + # it works! + for kind in ['inner', 'outer', 'left', 'right']: + ts.align(ts[::2], join=kind) + msg = "Input has different freq=D from PeriodIndex\\(freq=A-DEC\\)" + with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): + ts + ts.asfreq('D', how="end") diff --git a/pandas/tseries/tests/test_period.py b/pandas/tseries/tests/test_period.py deleted file mode 100644 index 3459da9d2b5c5..0000000000000 --- a/pandas/tseries/tests/test_period.py +++ /dev/null @@ -1,5065 +0,0 @@ -"""Tests suite for Period handling. - -Parts derived from scikits.timeseries code, original authors: -- Pierre Gerard-Marchant & Matt Knox -- pierregm_at_uga_dot_edu - mattknow_ca_at_hotmail_dot_com - -""" - -import numpy as np -from numpy.random import randn -from datetime import datetime, date, timedelta - -import pandas as pd -import pandas.util.testing as tm -import pandas.tseries.period as period -import pandas.tseries.offsets as offsets -from pandas.tseries.tools import to_datetime -from pandas.tseries.period import Period, PeriodIndex, period_range -from pandas.tseries.index import DatetimeIndex, date_range, Index -from pandas._period import period_ordinal, period_asfreq -from pandas.compat import range, lrange, lmap, zip, text_type, PY3, iteritems -from pandas.compat.numpy import np_datetime64_compat -from pandas.tseries.frequencies import (MONTHS, DAYS, _period_code_map, - get_freq) -from pandas import (Series, DataFrame, Timestamp, _period, tslib, - _np_version_under1p9, _np_version_under1p10, - _np_version_under1p12) - - -class TestPeriodProperties(tm.TestCase): - "Test properties such as year, month, weekday, etc...." - - def test_quarterly_negative_ordinals(self): - p = Period(ordinal=-1, freq='Q-DEC') - self.assertEqual(p.year, 1969) - self.assertEqual(p.quarter, 4) - self.assertIsInstance(p, Period) - - p = Period(ordinal=-2, freq='Q-DEC') - self.assertEqual(p.year, 1969) - self.assertEqual(p.quarter, 3) - self.assertIsInstance(p, Period) - - p = Period(ordinal=-2, freq='M') - self.assertEqual(p.year, 1969) - self.assertEqual(p.month, 11) - self.assertIsInstance(p, Period) - - def test_period_cons_quarterly(self): - # bugs in scikits.timeseries - for month in MONTHS: - freq = 'Q-%s' % month - exp = Period('1989Q3', freq=freq) - self.assertIn('1989Q3', str(exp)) - stamp = exp.to_timestamp('D', how='end') - p = Period(stamp, freq=freq) - self.assertEqual(p, exp) - - stamp = exp.to_timestamp('3D', how='end') - p = Period(stamp, freq=freq) - self.assertEqual(p, exp) - - def test_period_cons_annual(self): - # bugs in scikits.timeseries - for month in MONTHS: - freq = 'A-%s' % month - exp = Period('1989', freq=freq) - stamp = exp.to_timestamp('D', how='end') + timedelta(days=30) - p = Period(stamp, freq=freq) - self.assertEqual(p, exp + 1) - self.assertIsInstance(p, Period) - - def test_period_cons_weekly(self): - for num in range(10, 17): - daystr = '2011-02-%d' % num - for day in DAYS: - freq = 'W-%s' % day - - result = Period(daystr, freq=freq) - expected = Period(daystr, freq='D').asfreq(freq) - self.assertEqual(result, expected) - self.assertIsInstance(result, Period) - - def test_period_from_ordinal(self): - p = pd.Period('2011-01', freq='M') - res = pd.Period._from_ordinal(p.ordinal, freq='M') - self.assertEqual(p, res) - self.assertIsInstance(res, Period) - - def test_period_cons_nat(self): - p = Period('NaT', freq='M') - self.assertIs(p, pd.NaT) - - p = Period('nat', freq='W-SUN') - self.assertIs(p, pd.NaT) - - p = Period(tslib.iNaT, freq='D') - self.assertIs(p, pd.NaT) - - p = Period(tslib.iNaT, freq='3D') - self.assertIs(p, pd.NaT) - - p = Period(tslib.iNaT, freq='1D1H') - self.assertIs(p, pd.NaT) - - p = Period('NaT') - self.assertIs(p, pd.NaT) - - p = Period(tslib.iNaT) - self.assertIs(p, pd.NaT) - - def test_cons_null_like(self): - # check Timestamp compat - self.assertIs(Timestamp('NaT'), pd.NaT) - self.assertIs(Period('NaT'), pd.NaT) - - self.assertIs(Timestamp(None), pd.NaT) - self.assertIs(Period(None), pd.NaT) - - self.assertIs(Timestamp(float('nan')), pd.NaT) - self.assertIs(Period(float('nan')), pd.NaT) - - self.assertIs(Timestamp(np.nan), pd.NaT) - self.assertIs(Period(np.nan), pd.NaT) - - def test_period_cons_mult(self): - p1 = Period('2011-01', freq='3M') - p2 = Period('2011-01', freq='M') - self.assertEqual(p1.ordinal, p2.ordinal) - - self.assertEqual(p1.freq, offsets.MonthEnd(3)) - self.assertEqual(p1.freqstr, '3M') - - self.assertEqual(p2.freq, offsets.MonthEnd()) - self.assertEqual(p2.freqstr, 'M') - - result = p1 + 1 - self.assertEqual(result.ordinal, (p2 + 3).ordinal) - self.assertEqual(result.freq, p1.freq) - self.assertEqual(result.freqstr, '3M') - - result = p1 - 1 - self.assertEqual(result.ordinal, (p2 - 3).ordinal) - self.assertEqual(result.freq, p1.freq) - self.assertEqual(result.freqstr, '3M') - - msg = ('Frequency must be positive, because it' - ' represents span: -3M') - with tm.assertRaisesRegexp(ValueError, msg): - Period('2011-01', freq='-3M') - - msg = ('Frequency must be positive, because it' ' represents span: 0M') - with tm.assertRaisesRegexp(ValueError, msg): - Period('2011-01', freq='0M') - - def test_period_cons_combined(self): - p = [(Period('2011-01', freq='1D1H'), - Period('2011-01', freq='1H1D'), - Period('2011-01', freq='H')), - (Period(ordinal=1, freq='1D1H'), - Period(ordinal=1, freq='1H1D'), - Period(ordinal=1, freq='H'))] - - for p1, p2, p3 in p: - self.assertEqual(p1.ordinal, p3.ordinal) - self.assertEqual(p2.ordinal, p3.ordinal) - - self.assertEqual(p1.freq, offsets.Hour(25)) - self.assertEqual(p1.freqstr, '25H') - - self.assertEqual(p2.freq, offsets.Hour(25)) - self.assertEqual(p2.freqstr, '25H') - - self.assertEqual(p3.freq, offsets.Hour()) - self.assertEqual(p3.freqstr, 'H') - - result = p1 + 1 - self.assertEqual(result.ordinal, (p3 + 25).ordinal) - self.assertEqual(result.freq, p1.freq) - self.assertEqual(result.freqstr, '25H') - - result = p2 + 1 - self.assertEqual(result.ordinal, (p3 + 25).ordinal) - self.assertEqual(result.freq, p2.freq) - self.assertEqual(result.freqstr, '25H') - - result = p1 - 1 - self.assertEqual(result.ordinal, (p3 - 25).ordinal) - self.assertEqual(result.freq, p1.freq) - self.assertEqual(result.freqstr, '25H') - - result = p2 - 1 - self.assertEqual(result.ordinal, (p3 - 25).ordinal) - self.assertEqual(result.freq, p2.freq) - self.assertEqual(result.freqstr, '25H') - - msg = ('Frequency must be positive, because it' - ' represents span: -25H') - with tm.assertRaisesRegexp(ValueError, msg): - Period('2011-01', freq='-1D1H') - with tm.assertRaisesRegexp(ValueError, msg): - Period('2011-01', freq='-1H1D') - with tm.assertRaisesRegexp(ValueError, msg): - Period(ordinal=1, freq='-1D1H') - with tm.assertRaisesRegexp(ValueError, msg): - Period(ordinal=1, freq='-1H1D') - - msg = ('Frequency must be positive, because it' - ' represents span: 0D') - with tm.assertRaisesRegexp(ValueError, msg): - Period('2011-01', freq='0D0H') - with tm.assertRaisesRegexp(ValueError, msg): - Period(ordinal=1, freq='0D0H') - - # You can only combine together day and intraday offsets - msg = ('Invalid frequency: 1W1D') - with tm.assertRaisesRegexp(ValueError, msg): - Period('2011-01', freq='1W1D') - msg = ('Invalid frequency: 1D1W') - with tm.assertRaisesRegexp(ValueError, msg): - Period('2011-01', freq='1D1W') - - def test_timestamp_tz_arg(self): - tm._skip_if_no_pytz() - import pytz - for case in ['Europe/Brussels', 'Asia/Tokyo', 'US/Pacific']: - p = Period('1/1/2005', freq='M').to_timestamp(tz=case) - exp = Timestamp('1/1/2005', tz='UTC').tz_convert(case) - exp_zone = pytz.timezone(case).normalize(p) - - self.assertEqual(p, exp) - self.assertEqual(p.tz, exp_zone.tzinfo) - self.assertEqual(p.tz, exp.tz) - - p = Period('1/1/2005', freq='3H').to_timestamp(tz=case) - exp = Timestamp('1/1/2005', tz='UTC').tz_convert(case) - exp_zone = pytz.timezone(case).normalize(p) - - self.assertEqual(p, exp) - self.assertEqual(p.tz, exp_zone.tzinfo) - self.assertEqual(p.tz, exp.tz) - - p = Period('1/1/2005', freq='A').to_timestamp(freq='A', tz=case) - exp = Timestamp('31/12/2005', tz='UTC').tz_convert(case) - exp_zone = pytz.timezone(case).normalize(p) - - self.assertEqual(p, exp) - self.assertEqual(p.tz, exp_zone.tzinfo) - self.assertEqual(p.tz, exp.tz) - - p = Period('1/1/2005', freq='A').to_timestamp(freq='3H', tz=case) - exp = Timestamp('1/1/2005', tz='UTC').tz_convert(case) - exp_zone = pytz.timezone(case).normalize(p) - - self.assertEqual(p, exp) - self.assertEqual(p.tz, exp_zone.tzinfo) - self.assertEqual(p.tz, exp.tz) - - def test_timestamp_tz_arg_dateutil(self): - from pandas.tslib import _dateutil_gettz as gettz - from pandas.tslib import maybe_get_tz - for case in ['dateutil/Europe/Brussels', 'dateutil/Asia/Tokyo', - 'dateutil/US/Pacific']: - p = Period('1/1/2005', freq='M').to_timestamp( - tz=maybe_get_tz(case)) - exp = Timestamp('1/1/2005', tz='UTC').tz_convert(case) - self.assertEqual(p, exp) - self.assertEqual(p.tz, gettz(case.split('/', 1)[1])) - self.assertEqual(p.tz, exp.tz) - - p = Period('1/1/2005', - freq='M').to_timestamp(freq='3H', tz=maybe_get_tz(case)) - exp = Timestamp('1/1/2005', tz='UTC').tz_convert(case) - self.assertEqual(p, exp) - self.assertEqual(p.tz, gettz(case.split('/', 1)[1])) - self.assertEqual(p.tz, exp.tz) - - def test_timestamp_tz_arg_dateutil_from_string(self): - from pandas.tslib import _dateutil_gettz as gettz - p = Period('1/1/2005', - freq='M').to_timestamp(tz='dateutil/Europe/Brussels') - self.assertEqual(p.tz, gettz('Europe/Brussels')) - - def test_timestamp_mult(self): - p = pd.Period('2011-01', freq='M') - self.assertEqual(p.to_timestamp(how='S'), pd.Timestamp('2011-01-01')) - self.assertEqual(p.to_timestamp(how='E'), pd.Timestamp('2011-01-31')) - - p = pd.Period('2011-01', freq='3M') - self.assertEqual(p.to_timestamp(how='S'), pd.Timestamp('2011-01-01')) - self.assertEqual(p.to_timestamp(how='E'), pd.Timestamp('2011-03-31')) - - def test_period_constructor(self): - i1 = Period('1/1/2005', freq='M') - i2 = Period('Jan 2005') - - self.assertEqual(i1, i2) - - i1 = Period('2005', freq='A') - i2 = Period('2005') - i3 = Period('2005', freq='a') - - self.assertEqual(i1, i2) - self.assertEqual(i1, i3) - - i4 = Period('2005', freq='M') - i5 = Period('2005', freq='m') - - self.assertRaises(ValueError, i1.__ne__, i4) - self.assertEqual(i4, i5) - - i1 = Period.now('Q') - i2 = Period(datetime.now(), freq='Q') - i3 = Period.now('q') - - self.assertEqual(i1, i2) - self.assertEqual(i1, i3) - - # Biz day construction, roll forward if non-weekday - i1 = Period('3/10/12', freq='B') - i2 = Period('3/10/12', freq='D') - self.assertEqual(i1, i2.asfreq('B')) - i2 = Period('3/11/12', freq='D') - self.assertEqual(i1, i2.asfreq('B')) - i2 = Period('3/12/12', freq='D') - self.assertEqual(i1, i2.asfreq('B')) - - i3 = Period('3/10/12', freq='b') - self.assertEqual(i1, i3) - - i1 = Period(year=2005, quarter=1, freq='Q') - i2 = Period('1/1/2005', freq='Q') - self.assertEqual(i1, i2) - - i1 = Period(year=2005, quarter=3, freq='Q') - i2 = Period('9/1/2005', freq='Q') - self.assertEqual(i1, i2) - - i1 = Period(year=2005, month=3, day=1, freq='D') - i2 = Period('3/1/2005', freq='D') - self.assertEqual(i1, i2) - - i3 = Period(year=2005, month=3, day=1, freq='d') - self.assertEqual(i1, i3) - - i1 = Period(year=2012, month=3, day=10, freq='B') - i2 = Period('3/12/12', freq='B') - self.assertEqual(i1, i2) - - i1 = Period('2005Q1') - i2 = Period(year=2005, quarter=1, freq='Q') - i3 = Period('2005q1') - self.assertEqual(i1, i2) - self.assertEqual(i1, i3) - - i1 = Period('05Q1') - self.assertEqual(i1, i2) - lower = Period('05q1') - self.assertEqual(i1, lower) - - i1 = Period('1Q2005') - self.assertEqual(i1, i2) - lower = Period('1q2005') - self.assertEqual(i1, lower) - - i1 = Period('1Q05') - self.assertEqual(i1, i2) - lower = Period('1q05') - self.assertEqual(i1, lower) - - i1 = Period('4Q1984') - self.assertEqual(i1.year, 1984) - lower = Period('4q1984') - self.assertEqual(i1, lower) - - i1 = Period('1982', freq='min') - i2 = Period('1982', freq='MIN') - self.assertEqual(i1, i2) - i2 = Period('1982', freq=('Min', 1)) - self.assertEqual(i1, i2) - - expected = Period('2007-01', freq='M') - i1 = Period('200701', freq='M') - self.assertEqual(i1, expected) - - i1 = Period('200701', freq='M') - self.assertEqual(i1, expected) - - i1 = Period(200701, freq='M') - self.assertEqual(i1, expected) - - i1 = Period(ordinal=200701, freq='M') - self.assertEqual(i1.year, 18695) - - i1 = Period(datetime(2007, 1, 1), freq='M') - i2 = Period('200701', freq='M') - self.assertEqual(i1, i2) - - i1 = Period(date(2007, 1, 1), freq='M') - i2 = Period(datetime(2007, 1, 1), freq='M') - i3 = Period(np.datetime64('2007-01-01'), freq='M') - i4 = Period(np_datetime64_compat('2007-01-01 00:00:00Z'), freq='M') - i5 = Period(np_datetime64_compat('2007-01-01 00:00:00.000Z'), freq='M') - self.assertEqual(i1, i2) - self.assertEqual(i1, i3) - self.assertEqual(i1, i4) - self.assertEqual(i1, i5) - - i1 = Period('2007-01-01 09:00:00.001') - expected = Period(datetime(2007, 1, 1, 9, 0, 0, 1000), freq='L') - self.assertEqual(i1, expected) - - expected = Period(np_datetime64_compat( - '2007-01-01 09:00:00.001Z'), freq='L') - self.assertEqual(i1, expected) - - i1 = Period('2007-01-01 09:00:00.00101') - expected = Period(datetime(2007, 1, 1, 9, 0, 0, 1010), freq='U') - self.assertEqual(i1, expected) - - expected = Period(np_datetime64_compat('2007-01-01 09:00:00.00101Z'), - freq='U') - self.assertEqual(i1, expected) - - self.assertRaises(ValueError, Period, ordinal=200701) - - self.assertRaises(ValueError, Period, '2007-1-1', freq='X') - - def test_period_constructor_offsets(self): - self.assertEqual(Period('1/1/2005', freq=offsets.MonthEnd()), - Period('1/1/2005', freq='M')) - self.assertEqual(Period('2005', freq=offsets.YearEnd()), - Period('2005', freq='A')) - self.assertEqual(Period('2005', freq=offsets.MonthEnd()), - Period('2005', freq='M')) - self.assertEqual(Period('3/10/12', freq=offsets.BusinessDay()), - Period('3/10/12', freq='B')) - self.assertEqual(Period('3/10/12', freq=offsets.Day()), - Period('3/10/12', freq='D')) - - self.assertEqual(Period(year=2005, quarter=1, - freq=offsets.QuarterEnd(startingMonth=12)), - Period(year=2005, quarter=1, freq='Q')) - self.assertEqual(Period(year=2005, quarter=2, - freq=offsets.QuarterEnd(startingMonth=12)), - Period(year=2005, quarter=2, freq='Q')) - - self.assertEqual(Period(year=2005, month=3, day=1, freq=offsets.Day()), - Period(year=2005, month=3, day=1, freq='D')) - self.assertEqual(Period(year=2012, month=3, day=10, - freq=offsets.BDay()), - Period(year=2012, month=3, day=10, freq='B')) - - expected = Period('2005-03-01', freq='3D') - self.assertEqual(Period(year=2005, month=3, day=1, - freq=offsets.Day(3)), expected) - self.assertEqual(Period(year=2005, month=3, day=1, freq='3D'), - expected) - - self.assertEqual(Period(year=2012, month=3, day=10, - freq=offsets.BDay(3)), - Period(year=2012, month=3, day=10, freq='3B')) - - self.assertEqual(Period(200701, freq=offsets.MonthEnd()), - Period(200701, freq='M')) - - i1 = Period(ordinal=200701, freq=offsets.MonthEnd()) - i2 = Period(ordinal=200701, freq='M') - self.assertEqual(i1, i2) - self.assertEqual(i1.year, 18695) - self.assertEqual(i2.year, 18695) - - i1 = Period(datetime(2007, 1, 1), freq='M') - i2 = Period('200701', freq='M') - self.assertEqual(i1, i2) - - i1 = Period(date(2007, 1, 1), freq='M') - i2 = Period(datetime(2007, 1, 1), freq='M') - i3 = Period(np.datetime64('2007-01-01'), freq='M') - i4 = Period(np_datetime64_compat('2007-01-01 00:00:00Z'), freq='M') - i5 = Period(np_datetime64_compat('2007-01-01 00:00:00.000Z'), freq='M') - self.assertEqual(i1, i2) - self.assertEqual(i1, i3) - self.assertEqual(i1, i4) - self.assertEqual(i1, i5) - - i1 = Period('2007-01-01 09:00:00.001') - expected = Period(datetime(2007, 1, 1, 9, 0, 0, 1000), freq='L') - self.assertEqual(i1, expected) - - expected = Period(np_datetime64_compat( - '2007-01-01 09:00:00.001Z'), freq='L') - self.assertEqual(i1, expected) - - i1 = Period('2007-01-01 09:00:00.00101') - expected = Period(datetime(2007, 1, 1, 9, 0, 0, 1010), freq='U') - self.assertEqual(i1, expected) - - expected = Period(np_datetime64_compat('2007-01-01 09:00:00.00101Z'), - freq='U') - self.assertEqual(i1, expected) - - self.assertRaises(ValueError, Period, ordinal=200701) - - self.assertRaises(ValueError, Period, '2007-1-1', freq='X') - - def test_freq_str(self): - i1 = Period('1982', freq='Min') - self.assertEqual(i1.freq, offsets.Minute()) - self.assertEqual(i1.freqstr, 'T') - - def test_period_deprecated_freq(self): - cases = {"M": ["MTH", "MONTH", "MONTHLY", "Mth", "month", "monthly"], - "B": ["BUS", "BUSINESS", "BUSINESSLY", "WEEKDAY", "bus"], - "D": ["DAY", "DLY", "DAILY", "Day", "Dly", "Daily"], - "H": ["HR", "HOUR", "HRLY", "HOURLY", "hr", "Hour", "HRly"], - "T": ["minute", "MINUTE", "MINUTELY", "minutely"], - "S": ["sec", "SEC", "SECOND", "SECONDLY", "second"], - "L": ["MILLISECOND", "MILLISECONDLY", "millisecond"], - "U": ["MICROSECOND", "MICROSECONDLY", "microsecond"], - "N": ["NANOSECOND", "NANOSECONDLY", "nanosecond"]} - - msg = pd.tseries.frequencies._INVALID_FREQ_ERROR - for exp, freqs in iteritems(cases): - for freq in freqs: - with self.assertRaisesRegexp(ValueError, msg): - Period('2016-03-01 09:00', freq=freq) - with self.assertRaisesRegexp(ValueError, msg): - Period(ordinal=1, freq=freq) - - # check supported freq-aliases still works - p1 = Period('2016-03-01 09:00', freq=exp) - p2 = Period(ordinal=1, freq=exp) - tm.assertIsInstance(p1, Period) - tm.assertIsInstance(p2, Period) - - def test_hash(self): - self.assertEqual(hash(Period('2011-01', freq='M')), - hash(Period('2011-01', freq='M'))) - - self.assertNotEqual(hash(Period('2011-01-01', freq='D')), - hash(Period('2011-01', freq='M'))) - - self.assertNotEqual(hash(Period('2011-01', freq='3M')), - hash(Period('2011-01', freq='2M'))) - - self.assertNotEqual(hash(Period('2011-01', freq='M')), - hash(Period('2011-02', freq='M'))) - - def test_repr(self): - p = Period('Jan-2000') - self.assertIn('2000-01', repr(p)) - - p = Period('2000-12-15') - self.assertIn('2000-12-15', repr(p)) - - def test_repr_nat(self): - p = Period('nat', freq='M') - self.assertIn(repr(tslib.NaT), repr(p)) - - def test_millisecond_repr(self): - p = Period('2000-01-01 12:15:02.123') - - self.assertEqual("Period('2000-01-01 12:15:02.123', 'L')", repr(p)) - - def test_microsecond_repr(self): - p = Period('2000-01-01 12:15:02.123567') - - self.assertEqual("Period('2000-01-01 12:15:02.123567', 'U')", repr(p)) - - def test_strftime(self): - p = Period('2000-1-1 12:34:12', freq='S') - res = p.strftime('%Y-%m-%d %H:%M:%S') - self.assertEqual(res, '2000-01-01 12:34:12') - tm.assertIsInstance(res, text_type) # GH3363 - - def test_sub_delta(self): - left, right = Period('2011', freq='A'), Period('2007', freq='A') - result = left - right - self.assertEqual(result, 4) - - with self.assertRaises(period.IncompatibleFrequency): - left - Period('2007-01', freq='M') - - def test_to_timestamp(self): - p = Period('1982', freq='A') - start_ts = p.to_timestamp(how='S') - aliases = ['s', 'StarT', 'BEGIn'] - for a in aliases: - self.assertEqual(start_ts, p.to_timestamp('D', how=a)) - # freq with mult should not affect to the result - self.assertEqual(start_ts, p.to_timestamp('3D', how=a)) - - end_ts = p.to_timestamp(how='E') - aliases = ['e', 'end', 'FINIsH'] - for a in aliases: - self.assertEqual(end_ts, p.to_timestamp('D', how=a)) - self.assertEqual(end_ts, p.to_timestamp('3D', how=a)) - - from_lst = ['A', 'Q', 'M', 'W', 'B', 'D', 'H', 'Min', 'S'] - - def _ex(p): - return Timestamp((p + 1).start_time.value - 1) - - for i, fcode in enumerate(from_lst): - p = Period('1982', freq=fcode) - result = p.to_timestamp().to_period(fcode) - self.assertEqual(result, p) - - self.assertEqual(p.start_time, p.to_timestamp(how='S')) - - self.assertEqual(p.end_time, _ex(p)) - - # Frequency other than daily - - p = Period('1985', freq='A') - - result = p.to_timestamp('H', how='end') - expected = datetime(1985, 12, 31, 23) - self.assertEqual(result, expected) - result = p.to_timestamp('3H', how='end') - self.assertEqual(result, expected) - - result = p.to_timestamp('T', how='end') - expected = datetime(1985, 12, 31, 23, 59) - self.assertEqual(result, expected) - result = p.to_timestamp('2T', how='end') - self.assertEqual(result, expected) - - result = p.to_timestamp(how='end') - expected = datetime(1985, 12, 31) - self.assertEqual(result, expected) - - expected = datetime(1985, 1, 1) - result = p.to_timestamp('H', how='start') - self.assertEqual(result, expected) - result = p.to_timestamp('T', how='start') - self.assertEqual(result, expected) - result = p.to_timestamp('S', how='start') - self.assertEqual(result, expected) - result = p.to_timestamp('3H', how='start') - self.assertEqual(result, expected) - result = p.to_timestamp('5S', how='start') - self.assertEqual(result, expected) - - def test_start_time(self): - freq_lst = ['A', 'Q', 'M', 'D', 'H', 'T', 'S'] - xp = datetime(2012, 1, 1) - for f in freq_lst: - p = Period('2012', freq=f) - self.assertEqual(p.start_time, xp) - self.assertEqual(Period('2012', freq='B').start_time, - datetime(2012, 1, 2)) - self.assertEqual(Period('2012', freq='W').start_time, - datetime(2011, 12, 26)) - - def test_end_time(self): - p = Period('2012', freq='A') - - def _ex(*args): - return Timestamp(Timestamp(datetime(*args)).value - 1) - - xp = _ex(2013, 1, 1) - self.assertEqual(xp, p.end_time) - - p = Period('2012', freq='Q') - xp = _ex(2012, 4, 1) - self.assertEqual(xp, p.end_time) - - p = Period('2012', freq='M') - xp = _ex(2012, 2, 1) - self.assertEqual(xp, p.end_time) - - p = Period('2012', freq='D') - xp = _ex(2012, 1, 2) - self.assertEqual(xp, p.end_time) - - p = Period('2012', freq='H') - xp = _ex(2012, 1, 1, 1) - self.assertEqual(xp, p.end_time) - - p = Period('2012', freq='B') - xp = _ex(2012, 1, 3) - self.assertEqual(xp, p.end_time) - - p = Period('2012', freq='W') - xp = _ex(2012, 1, 2) - self.assertEqual(xp, p.end_time) - - # Test for GH 11738 - p = Period('2012', freq='15D') - xp = _ex(2012, 1, 16) - self.assertEqual(xp, p.end_time) - - p = Period('2012', freq='1D1H') - xp = _ex(2012, 1, 2, 1) - self.assertEqual(xp, p.end_time) - - p = Period('2012', freq='1H1D') - xp = _ex(2012, 1, 2, 1) - self.assertEqual(xp, p.end_time) - - def test_anchor_week_end_time(self): - def _ex(*args): - return Timestamp(Timestamp(datetime(*args)).value - 1) - - p = Period('2013-1-1', 'W-SAT') - xp = _ex(2013, 1, 6) - self.assertEqual(p.end_time, xp) - - def test_properties_annually(self): - # Test properties on Periods with annually frequency. - a_date = Period(freq='A', year=2007) - self.assertEqual(a_date.year, 2007) - - def test_properties_quarterly(self): - # Test properties on Periods with daily frequency. - qedec_date = Period(freq="Q-DEC", year=2007, quarter=1) - qejan_date = Period(freq="Q-JAN", year=2007, quarter=1) - qejun_date = Period(freq="Q-JUN", year=2007, quarter=1) - # - for x in range(3): - for qd in (qedec_date, qejan_date, qejun_date): - self.assertEqual((qd + x).qyear, 2007) - self.assertEqual((qd + x).quarter, x + 1) - - def test_properties_monthly(self): - # Test properties on Periods with daily frequency. - m_date = Period(freq='M', year=2007, month=1) - for x in range(11): - m_ival_x = m_date + x - self.assertEqual(m_ival_x.year, 2007) - if 1 <= x + 1 <= 3: - self.assertEqual(m_ival_x.quarter, 1) - elif 4 <= x + 1 <= 6: - self.assertEqual(m_ival_x.quarter, 2) - elif 7 <= x + 1 <= 9: - self.assertEqual(m_ival_x.quarter, 3) - elif 10 <= x + 1 <= 12: - self.assertEqual(m_ival_x.quarter, 4) - self.assertEqual(m_ival_x.month, x + 1) - - def test_properties_weekly(self): - # Test properties on Periods with daily frequency. - w_date = Period(freq='W', year=2007, month=1, day=7) - # - self.assertEqual(w_date.year, 2007) - self.assertEqual(w_date.quarter, 1) - self.assertEqual(w_date.month, 1) - self.assertEqual(w_date.week, 1) - self.assertEqual((w_date - 1).week, 52) - self.assertEqual(w_date.days_in_month, 31) - self.assertEqual(Period(freq='W', year=2012, - month=2, day=1).days_in_month, 29) - - def test_properties_weekly_legacy(self): - # Test properties on Periods with daily frequency. - w_date = Period(freq='W', year=2007, month=1, day=7) - self.assertEqual(w_date.year, 2007) - self.assertEqual(w_date.quarter, 1) - self.assertEqual(w_date.month, 1) - self.assertEqual(w_date.week, 1) - self.assertEqual((w_date - 1).week, 52) - self.assertEqual(w_date.days_in_month, 31) - - exp = Period(freq='W', year=2012, month=2, day=1) - self.assertEqual(exp.days_in_month, 29) - - msg = pd.tseries.frequencies._INVALID_FREQ_ERROR - with self.assertRaisesRegexp(ValueError, msg): - Period(freq='WK', year=2007, month=1, day=7) - - def test_properties_daily(self): - # Test properties on Periods with daily frequency. - b_date = Period(freq='B', year=2007, month=1, day=1) - # - self.assertEqual(b_date.year, 2007) - self.assertEqual(b_date.quarter, 1) - self.assertEqual(b_date.month, 1) - self.assertEqual(b_date.day, 1) - self.assertEqual(b_date.weekday, 0) - self.assertEqual(b_date.dayofyear, 1) - self.assertEqual(b_date.days_in_month, 31) - self.assertEqual(Period(freq='B', year=2012, - month=2, day=1).days_in_month, 29) - # - d_date = Period(freq='D', year=2007, month=1, day=1) - # - self.assertEqual(d_date.year, 2007) - self.assertEqual(d_date.quarter, 1) - self.assertEqual(d_date.month, 1) - self.assertEqual(d_date.day, 1) - self.assertEqual(d_date.weekday, 0) - self.assertEqual(d_date.dayofyear, 1) - self.assertEqual(d_date.days_in_month, 31) - self.assertEqual(Period(freq='D', year=2012, month=2, - day=1).days_in_month, 29) - - def test_properties_hourly(self): - # Test properties on Periods with hourly frequency. - h_date1 = Period(freq='H', year=2007, month=1, day=1, hour=0) - h_date2 = Period(freq='2H', year=2007, month=1, day=1, hour=0) - - for h_date in [h_date1, h_date2]: - self.assertEqual(h_date.year, 2007) - self.assertEqual(h_date.quarter, 1) - self.assertEqual(h_date.month, 1) - self.assertEqual(h_date.day, 1) - self.assertEqual(h_date.weekday, 0) - self.assertEqual(h_date.dayofyear, 1) - self.assertEqual(h_date.hour, 0) - self.assertEqual(h_date.days_in_month, 31) - self.assertEqual(Period(freq='H', year=2012, month=2, day=1, - hour=0).days_in_month, 29) - - def test_properties_minutely(self): - # Test properties on Periods with minutely frequency. - t_date = Period(freq='Min', year=2007, month=1, day=1, hour=0, - minute=0) - # - self.assertEqual(t_date.quarter, 1) - self.assertEqual(t_date.month, 1) - self.assertEqual(t_date.day, 1) - self.assertEqual(t_date.weekday, 0) - self.assertEqual(t_date.dayofyear, 1) - self.assertEqual(t_date.hour, 0) - self.assertEqual(t_date.minute, 0) - self.assertEqual(t_date.days_in_month, 31) - self.assertEqual(Period(freq='D', year=2012, month=2, day=1, hour=0, - minute=0).days_in_month, 29) - - def test_properties_secondly(self): - # Test properties on Periods with secondly frequency. - s_date = Period(freq='Min', year=2007, month=1, day=1, hour=0, - minute=0, second=0) - # - self.assertEqual(s_date.year, 2007) - self.assertEqual(s_date.quarter, 1) - self.assertEqual(s_date.month, 1) - self.assertEqual(s_date.day, 1) - self.assertEqual(s_date.weekday, 0) - self.assertEqual(s_date.dayofyear, 1) - self.assertEqual(s_date.hour, 0) - self.assertEqual(s_date.minute, 0) - self.assertEqual(s_date.second, 0) - self.assertEqual(s_date.days_in_month, 31) - self.assertEqual(Period(freq='Min', year=2012, month=2, day=1, hour=0, - minute=0, second=0).days_in_month, 29) - - def test_properties_nat(self): - p_nat = Period('NaT', freq='M') - t_nat = pd.Timestamp('NaT') - self.assertIs(p_nat, t_nat) - - # confirm Period('NaT') work identical with Timestamp('NaT') - for f in ['year', 'month', 'day', 'hour', 'minute', 'second', 'week', - 'dayofyear', 'quarter', 'days_in_month']: - self.assertTrue(np.isnan(getattr(p_nat, f))) - self.assertTrue(np.isnan(getattr(t_nat, f))) - - def test_pnow(self): - dt = datetime.now() - - val = period.pnow('D') - exp = Period(dt, freq='D') - self.assertEqual(val, exp) - - val2 = period.pnow('2D') - exp2 = Period(dt, freq='2D') - self.assertEqual(val2, exp2) - self.assertEqual(val.ordinal, val2.ordinal) - self.assertEqual(val.ordinal, exp2.ordinal) - - def test_constructor_corner(self): - expected = Period('2007-01', freq='2M') - self.assertEqual(Period(year=2007, month=1, freq='2M'), expected) - - self.assertRaises(ValueError, Period, datetime.now()) - self.assertRaises(ValueError, Period, datetime.now().date()) - self.assertRaises(ValueError, Period, 1.6, freq='D') - self.assertRaises(ValueError, Period, ordinal=1.6, freq='D') - self.assertRaises(ValueError, Period, ordinal=2, value=1, freq='D') - self.assertIs(Period(None), pd.NaT) - self.assertRaises(ValueError, Period, month=1) - - p = Period('2007-01-01', freq='D') - - result = Period(p, freq='A') - exp = Period('2007', freq='A') - self.assertEqual(result, exp) - - def test_constructor_infer_freq(self): - p = Period('2007-01-01') - self.assertEqual(p.freq, 'D') - - p = Period('2007-01-01 07') - self.assertEqual(p.freq, 'H') - - p = Period('2007-01-01 07:10') - self.assertEqual(p.freq, 'T') - - p = Period('2007-01-01 07:10:15') - self.assertEqual(p.freq, 'S') - - p = Period('2007-01-01 07:10:15.123') - self.assertEqual(p.freq, 'L') - - p = Period('2007-01-01 07:10:15.123000') - self.assertEqual(p.freq, 'L') - - p = Period('2007-01-01 07:10:15.123400') - self.assertEqual(p.freq, 'U') - - def test_asfreq_MS(self): - initial = Period("2013") - - self.assertEqual(initial.asfreq(freq="M", how="S"), - Period('2013-01', 'M')) - - msg = pd.tseries.frequencies._INVALID_FREQ_ERROR - with self.assertRaisesRegexp(ValueError, msg): - initial.asfreq(freq="MS", how="S") - - with tm.assertRaisesRegexp(ValueError, msg): - pd.Period('2013-01', 'MS') - - self.assertTrue(_period_code_map.get("MS") is None) - - -def noWrap(item): - return item - - -class TestFreqConversion(tm.TestCase): - "Test frequency conversion of date objects" - - def test_asfreq_corner(self): - val = Period(freq='A', year=2007) - result1 = val.asfreq('5t') - result2 = val.asfreq('t') - expected = Period('2007-12-31 23:59', freq='t') - self.assertEqual(result1.ordinal, expected.ordinal) - self.assertEqual(result1.freqstr, '5T') - self.assertEqual(result2.ordinal, expected.ordinal) - self.assertEqual(result2.freqstr, 'T') - - def test_conv_annual(self): - # frequency conversion tests: from Annual Frequency - - ival_A = Period(freq='A', year=2007) - - ival_AJAN = Period(freq="A-JAN", year=2007) - ival_AJUN = Period(freq="A-JUN", year=2007) - ival_ANOV = Period(freq="A-NOV", year=2007) - - ival_A_to_Q_start = Period(freq='Q', year=2007, quarter=1) - ival_A_to_Q_end = Period(freq='Q', year=2007, quarter=4) - ival_A_to_M_start = Period(freq='M', year=2007, month=1) - ival_A_to_M_end = Period(freq='M', year=2007, month=12) - ival_A_to_W_start = Period(freq='W', year=2007, month=1, day=1) - ival_A_to_W_end = Period(freq='W', year=2007, month=12, day=31) - ival_A_to_B_start = Period(freq='B', year=2007, month=1, day=1) - ival_A_to_B_end = Period(freq='B', year=2007, month=12, day=31) - ival_A_to_D_start = Period(freq='D', year=2007, month=1, day=1) - ival_A_to_D_end = Period(freq='D', year=2007, month=12, day=31) - ival_A_to_H_start = Period(freq='H', year=2007, month=1, day=1, hour=0) - ival_A_to_H_end = Period(freq='H', year=2007, month=12, day=31, - hour=23) - ival_A_to_T_start = Period(freq='Min', year=2007, month=1, day=1, - hour=0, minute=0) - ival_A_to_T_end = Period(freq='Min', year=2007, month=12, day=31, - hour=23, minute=59) - ival_A_to_S_start = Period(freq='S', year=2007, month=1, day=1, hour=0, - minute=0, second=0) - ival_A_to_S_end = Period(freq='S', year=2007, month=12, day=31, - hour=23, minute=59, second=59) - - ival_AJAN_to_D_end = Period(freq='D', year=2007, month=1, day=31) - ival_AJAN_to_D_start = Period(freq='D', year=2006, month=2, day=1) - ival_AJUN_to_D_end = Period(freq='D', year=2007, month=6, day=30) - ival_AJUN_to_D_start = Period(freq='D', year=2006, month=7, day=1) - ival_ANOV_to_D_end = Period(freq='D', year=2007, month=11, day=30) - ival_ANOV_to_D_start = Period(freq='D', year=2006, month=12, day=1) - - self.assertEqual(ival_A.asfreq('Q', 'S'), ival_A_to_Q_start) - self.assertEqual(ival_A.asfreq('Q', 'e'), ival_A_to_Q_end) - self.assertEqual(ival_A.asfreq('M', 's'), ival_A_to_M_start) - self.assertEqual(ival_A.asfreq('M', 'E'), ival_A_to_M_end) - self.assertEqual(ival_A.asfreq('W', 'S'), ival_A_to_W_start) - self.assertEqual(ival_A.asfreq('W', 'E'), ival_A_to_W_end) - self.assertEqual(ival_A.asfreq('B', 'S'), ival_A_to_B_start) - self.assertEqual(ival_A.asfreq('B', 'E'), ival_A_to_B_end) - self.assertEqual(ival_A.asfreq('D', 'S'), ival_A_to_D_start) - self.assertEqual(ival_A.asfreq('D', 'E'), ival_A_to_D_end) - self.assertEqual(ival_A.asfreq('H', 'S'), ival_A_to_H_start) - self.assertEqual(ival_A.asfreq('H', 'E'), ival_A_to_H_end) - self.assertEqual(ival_A.asfreq('min', 'S'), ival_A_to_T_start) - self.assertEqual(ival_A.asfreq('min', 'E'), ival_A_to_T_end) - self.assertEqual(ival_A.asfreq('T', 'S'), ival_A_to_T_start) - self.assertEqual(ival_A.asfreq('T', 'E'), ival_A_to_T_end) - self.assertEqual(ival_A.asfreq('S', 'S'), ival_A_to_S_start) - self.assertEqual(ival_A.asfreq('S', 'E'), ival_A_to_S_end) - - self.assertEqual(ival_AJAN.asfreq('D', 'S'), ival_AJAN_to_D_start) - self.assertEqual(ival_AJAN.asfreq('D', 'E'), ival_AJAN_to_D_end) - - self.assertEqual(ival_AJUN.asfreq('D', 'S'), ival_AJUN_to_D_start) - self.assertEqual(ival_AJUN.asfreq('D', 'E'), ival_AJUN_to_D_end) - - self.assertEqual(ival_ANOV.asfreq('D', 'S'), ival_ANOV_to_D_start) - self.assertEqual(ival_ANOV.asfreq('D', 'E'), ival_ANOV_to_D_end) - - self.assertEqual(ival_A.asfreq('A'), ival_A) - - def test_conv_quarterly(self): - # frequency conversion tests: from Quarterly Frequency - - ival_Q = Period(freq='Q', year=2007, quarter=1) - ival_Q_end_of_year = Period(freq='Q', year=2007, quarter=4) - - ival_QEJAN = Period(freq="Q-JAN", year=2007, quarter=1) - ival_QEJUN = Period(freq="Q-JUN", year=2007, quarter=1) - - ival_Q_to_A = Period(freq='A', year=2007) - ival_Q_to_M_start = Period(freq='M', year=2007, month=1) - ival_Q_to_M_end = Period(freq='M', year=2007, month=3) - ival_Q_to_W_start = Period(freq='W', year=2007, month=1, day=1) - ival_Q_to_W_end = Period(freq='W', year=2007, month=3, day=31) - ival_Q_to_B_start = Period(freq='B', year=2007, month=1, day=1) - ival_Q_to_B_end = Period(freq='B', year=2007, month=3, day=30) - ival_Q_to_D_start = Period(freq='D', year=2007, month=1, day=1) - ival_Q_to_D_end = Period(freq='D', year=2007, month=3, day=31) - ival_Q_to_H_start = Period(freq='H', year=2007, month=1, day=1, hour=0) - ival_Q_to_H_end = Period(freq='H', year=2007, month=3, day=31, hour=23) - ival_Q_to_T_start = Period(freq='Min', year=2007, month=1, day=1, - hour=0, minute=0) - ival_Q_to_T_end = Period(freq='Min', year=2007, month=3, day=31, - hour=23, minute=59) - ival_Q_to_S_start = Period(freq='S', year=2007, month=1, day=1, hour=0, - minute=0, second=0) - ival_Q_to_S_end = Period(freq='S', year=2007, month=3, day=31, hour=23, - minute=59, second=59) - - ival_QEJAN_to_D_start = Period(freq='D', year=2006, month=2, day=1) - ival_QEJAN_to_D_end = Period(freq='D', year=2006, month=4, day=30) - - ival_QEJUN_to_D_start = Period(freq='D', year=2006, month=7, day=1) - ival_QEJUN_to_D_end = Period(freq='D', year=2006, month=9, day=30) - - self.assertEqual(ival_Q.asfreq('A'), ival_Q_to_A) - self.assertEqual(ival_Q_end_of_year.asfreq('A'), ival_Q_to_A) - - self.assertEqual(ival_Q.asfreq('M', 'S'), ival_Q_to_M_start) - self.assertEqual(ival_Q.asfreq('M', 'E'), ival_Q_to_M_end) - self.assertEqual(ival_Q.asfreq('W', 'S'), ival_Q_to_W_start) - self.assertEqual(ival_Q.asfreq('W', 'E'), ival_Q_to_W_end) - self.assertEqual(ival_Q.asfreq('B', 'S'), ival_Q_to_B_start) - self.assertEqual(ival_Q.asfreq('B', 'E'), ival_Q_to_B_end) - self.assertEqual(ival_Q.asfreq('D', 'S'), ival_Q_to_D_start) - self.assertEqual(ival_Q.asfreq('D', 'E'), ival_Q_to_D_end) - self.assertEqual(ival_Q.asfreq('H', 'S'), ival_Q_to_H_start) - self.assertEqual(ival_Q.asfreq('H', 'E'), ival_Q_to_H_end) - self.assertEqual(ival_Q.asfreq('Min', 'S'), ival_Q_to_T_start) - self.assertEqual(ival_Q.asfreq('Min', 'E'), ival_Q_to_T_end) - self.assertEqual(ival_Q.asfreq('S', 'S'), ival_Q_to_S_start) - self.assertEqual(ival_Q.asfreq('S', 'E'), ival_Q_to_S_end) - - self.assertEqual(ival_QEJAN.asfreq('D', 'S'), ival_QEJAN_to_D_start) - self.assertEqual(ival_QEJAN.asfreq('D', 'E'), ival_QEJAN_to_D_end) - self.assertEqual(ival_QEJUN.asfreq('D', 'S'), ival_QEJUN_to_D_start) - self.assertEqual(ival_QEJUN.asfreq('D', 'E'), ival_QEJUN_to_D_end) - - self.assertEqual(ival_Q.asfreq('Q'), ival_Q) - - def test_conv_monthly(self): - # frequency conversion tests: from Monthly Frequency - - ival_M = Period(freq='M', year=2007, month=1) - ival_M_end_of_year = Period(freq='M', year=2007, month=12) - ival_M_end_of_quarter = Period(freq='M', year=2007, month=3) - ival_M_to_A = Period(freq='A', year=2007) - ival_M_to_Q = Period(freq='Q', year=2007, quarter=1) - ival_M_to_W_start = Period(freq='W', year=2007, month=1, day=1) - ival_M_to_W_end = Period(freq='W', year=2007, month=1, day=31) - ival_M_to_B_start = Period(freq='B', year=2007, month=1, day=1) - ival_M_to_B_end = Period(freq='B', year=2007, month=1, day=31) - ival_M_to_D_start = Period(freq='D', year=2007, month=1, day=1) - ival_M_to_D_end = Period(freq='D', year=2007, month=1, day=31) - ival_M_to_H_start = Period(freq='H', year=2007, month=1, day=1, hour=0) - ival_M_to_H_end = Period(freq='H', year=2007, month=1, day=31, hour=23) - ival_M_to_T_start = Period(freq='Min', year=2007, month=1, day=1, - hour=0, minute=0) - ival_M_to_T_end = Period(freq='Min', year=2007, month=1, day=31, - hour=23, minute=59) - ival_M_to_S_start = Period(freq='S', year=2007, month=1, day=1, hour=0, - minute=0, second=0) - ival_M_to_S_end = Period(freq='S', year=2007, month=1, day=31, hour=23, - minute=59, second=59) - - self.assertEqual(ival_M.asfreq('A'), ival_M_to_A) - self.assertEqual(ival_M_end_of_year.asfreq('A'), ival_M_to_A) - self.assertEqual(ival_M.asfreq('Q'), ival_M_to_Q) - self.assertEqual(ival_M_end_of_quarter.asfreq('Q'), ival_M_to_Q) - - self.assertEqual(ival_M.asfreq('W', 'S'), ival_M_to_W_start) - self.assertEqual(ival_M.asfreq('W', 'E'), ival_M_to_W_end) - self.assertEqual(ival_M.asfreq('B', 'S'), ival_M_to_B_start) - self.assertEqual(ival_M.asfreq('B', 'E'), ival_M_to_B_end) - self.assertEqual(ival_M.asfreq('D', 'S'), ival_M_to_D_start) - self.assertEqual(ival_M.asfreq('D', 'E'), ival_M_to_D_end) - self.assertEqual(ival_M.asfreq('H', 'S'), ival_M_to_H_start) - self.assertEqual(ival_M.asfreq('H', 'E'), ival_M_to_H_end) - self.assertEqual(ival_M.asfreq('Min', 'S'), ival_M_to_T_start) - self.assertEqual(ival_M.asfreq('Min', 'E'), ival_M_to_T_end) - self.assertEqual(ival_M.asfreq('S', 'S'), ival_M_to_S_start) - self.assertEqual(ival_M.asfreq('S', 'E'), ival_M_to_S_end) - - self.assertEqual(ival_M.asfreq('M'), ival_M) - - def test_conv_weekly(self): - # frequency conversion tests: from Weekly Frequency - ival_W = Period(freq='W', year=2007, month=1, day=1) - - ival_WSUN = Period(freq='W', year=2007, month=1, day=7) - ival_WSAT = Period(freq='W-SAT', year=2007, month=1, day=6) - ival_WFRI = Period(freq='W-FRI', year=2007, month=1, day=5) - ival_WTHU = Period(freq='W-THU', year=2007, month=1, day=4) - ival_WWED = Period(freq='W-WED', year=2007, month=1, day=3) - ival_WTUE = Period(freq='W-TUE', year=2007, month=1, day=2) - ival_WMON = Period(freq='W-MON', year=2007, month=1, day=1) - - ival_WSUN_to_D_start = Period(freq='D', year=2007, month=1, day=1) - ival_WSUN_to_D_end = Period(freq='D', year=2007, month=1, day=7) - ival_WSAT_to_D_start = Period(freq='D', year=2006, month=12, day=31) - ival_WSAT_to_D_end = Period(freq='D', year=2007, month=1, day=6) - ival_WFRI_to_D_start = Period(freq='D', year=2006, month=12, day=30) - ival_WFRI_to_D_end = Period(freq='D', year=2007, month=1, day=5) - ival_WTHU_to_D_start = Period(freq='D', year=2006, month=12, day=29) - ival_WTHU_to_D_end = Period(freq='D', year=2007, month=1, day=4) - ival_WWED_to_D_start = Period(freq='D', year=2006, month=12, day=28) - ival_WWED_to_D_end = Period(freq='D', year=2007, month=1, day=3) - ival_WTUE_to_D_start = Period(freq='D', year=2006, month=12, day=27) - ival_WTUE_to_D_end = Period(freq='D', year=2007, month=1, day=2) - ival_WMON_to_D_start = Period(freq='D', year=2006, month=12, day=26) - ival_WMON_to_D_end = Period(freq='D', year=2007, month=1, day=1) - - ival_W_end_of_year = Period(freq='W', year=2007, month=12, day=31) - ival_W_end_of_quarter = Period(freq='W', year=2007, month=3, day=31) - ival_W_end_of_month = Period(freq='W', year=2007, month=1, day=31) - ival_W_to_A = Period(freq='A', year=2007) - ival_W_to_Q = Period(freq='Q', year=2007, quarter=1) - ival_W_to_M = Period(freq='M', year=2007, month=1) - - if Period(freq='D', year=2007, month=12, day=31).weekday == 6: - ival_W_to_A_end_of_year = Period(freq='A', year=2007) - else: - ival_W_to_A_end_of_year = Period(freq='A', year=2008) - - if Period(freq='D', year=2007, month=3, day=31).weekday == 6: - ival_W_to_Q_end_of_quarter = Period(freq='Q', year=2007, quarter=1) - else: - ival_W_to_Q_end_of_quarter = Period(freq='Q', year=2007, quarter=2) - - if Period(freq='D', year=2007, month=1, day=31).weekday == 6: - ival_W_to_M_end_of_month = Period(freq='M', year=2007, month=1) - else: - ival_W_to_M_end_of_month = Period(freq='M', year=2007, month=2) - - ival_W_to_B_start = Period(freq='B', year=2007, month=1, day=1) - ival_W_to_B_end = Period(freq='B', year=2007, month=1, day=5) - ival_W_to_D_start = Period(freq='D', year=2007, month=1, day=1) - ival_W_to_D_end = Period(freq='D', year=2007, month=1, day=7) - ival_W_to_H_start = Period(freq='H', year=2007, month=1, day=1, hour=0) - ival_W_to_H_end = Period(freq='H', year=2007, month=1, day=7, hour=23) - ival_W_to_T_start = Period(freq='Min', year=2007, month=1, day=1, - hour=0, minute=0) - ival_W_to_T_end = Period(freq='Min', year=2007, month=1, day=7, - hour=23, minute=59) - ival_W_to_S_start = Period(freq='S', year=2007, month=1, day=1, hour=0, - minute=0, second=0) - ival_W_to_S_end = Period(freq='S', year=2007, month=1, day=7, hour=23, - minute=59, second=59) - - self.assertEqual(ival_W.asfreq('A'), ival_W_to_A) - self.assertEqual(ival_W_end_of_year.asfreq('A'), - ival_W_to_A_end_of_year) - self.assertEqual(ival_W.asfreq('Q'), ival_W_to_Q) - self.assertEqual(ival_W_end_of_quarter.asfreq('Q'), - ival_W_to_Q_end_of_quarter) - self.assertEqual(ival_W.asfreq('M'), ival_W_to_M) - self.assertEqual(ival_W_end_of_month.asfreq('M'), - ival_W_to_M_end_of_month) - - self.assertEqual(ival_W.asfreq('B', 'S'), ival_W_to_B_start) - self.assertEqual(ival_W.asfreq('B', 'E'), ival_W_to_B_end) - - self.assertEqual(ival_W.asfreq('D', 'S'), ival_W_to_D_start) - self.assertEqual(ival_W.asfreq('D', 'E'), ival_W_to_D_end) - - self.assertEqual(ival_WSUN.asfreq('D', 'S'), ival_WSUN_to_D_start) - self.assertEqual(ival_WSUN.asfreq('D', 'E'), ival_WSUN_to_D_end) - self.assertEqual(ival_WSAT.asfreq('D', 'S'), ival_WSAT_to_D_start) - self.assertEqual(ival_WSAT.asfreq('D', 'E'), ival_WSAT_to_D_end) - self.assertEqual(ival_WFRI.asfreq('D', 'S'), ival_WFRI_to_D_start) - self.assertEqual(ival_WFRI.asfreq('D', 'E'), ival_WFRI_to_D_end) - self.assertEqual(ival_WTHU.asfreq('D', 'S'), ival_WTHU_to_D_start) - self.assertEqual(ival_WTHU.asfreq('D', 'E'), ival_WTHU_to_D_end) - self.assertEqual(ival_WWED.asfreq('D', 'S'), ival_WWED_to_D_start) - self.assertEqual(ival_WWED.asfreq('D', 'E'), ival_WWED_to_D_end) - self.assertEqual(ival_WTUE.asfreq('D', 'S'), ival_WTUE_to_D_start) - self.assertEqual(ival_WTUE.asfreq('D', 'E'), ival_WTUE_to_D_end) - self.assertEqual(ival_WMON.asfreq('D', 'S'), ival_WMON_to_D_start) - self.assertEqual(ival_WMON.asfreq('D', 'E'), ival_WMON_to_D_end) - - self.assertEqual(ival_W.asfreq('H', 'S'), ival_W_to_H_start) - self.assertEqual(ival_W.asfreq('H', 'E'), ival_W_to_H_end) - self.assertEqual(ival_W.asfreq('Min', 'S'), ival_W_to_T_start) - self.assertEqual(ival_W.asfreq('Min', 'E'), ival_W_to_T_end) - self.assertEqual(ival_W.asfreq('S', 'S'), ival_W_to_S_start) - self.assertEqual(ival_W.asfreq('S', 'E'), ival_W_to_S_end) - - self.assertEqual(ival_W.asfreq('W'), ival_W) - - msg = pd.tseries.frequencies._INVALID_FREQ_ERROR - with self.assertRaisesRegexp(ValueError, msg): - ival_W.asfreq('WK') - - def test_conv_weekly_legacy(self): - # frequency conversion tests: from Weekly Frequency - msg = pd.tseries.frequencies._INVALID_FREQ_ERROR - with self.assertRaisesRegexp(ValueError, msg): - Period(freq='WK', year=2007, month=1, day=1) - - with self.assertRaisesRegexp(ValueError, msg): - Period(freq='WK-SAT', year=2007, month=1, day=6) - with self.assertRaisesRegexp(ValueError, msg): - Period(freq='WK-FRI', year=2007, month=1, day=5) - with self.assertRaisesRegexp(ValueError, msg): - Period(freq='WK-THU', year=2007, month=1, day=4) - with self.assertRaisesRegexp(ValueError, msg): - Period(freq='WK-WED', year=2007, month=1, day=3) - with self.assertRaisesRegexp(ValueError, msg): - Period(freq='WK-TUE', year=2007, month=1, day=2) - with self.assertRaisesRegexp(ValueError, msg): - Period(freq='WK-MON', year=2007, month=1, day=1) - - def test_conv_business(self): - # frequency conversion tests: from Business Frequency" - - ival_B = Period(freq='B', year=2007, month=1, day=1) - ival_B_end_of_year = Period(freq='B', year=2007, month=12, day=31) - ival_B_end_of_quarter = Period(freq='B', year=2007, month=3, day=30) - ival_B_end_of_month = Period(freq='B', year=2007, month=1, day=31) - ival_B_end_of_week = Period(freq='B', year=2007, month=1, day=5) - - ival_B_to_A = Period(freq='A', year=2007) - ival_B_to_Q = Period(freq='Q', year=2007, quarter=1) - ival_B_to_M = Period(freq='M', year=2007, month=1) - ival_B_to_W = Period(freq='W', year=2007, month=1, day=7) - ival_B_to_D = Period(freq='D', year=2007, month=1, day=1) - ival_B_to_H_start = Period(freq='H', year=2007, month=1, day=1, hour=0) - ival_B_to_H_end = Period(freq='H', year=2007, month=1, day=1, hour=23) - ival_B_to_T_start = Period(freq='Min', year=2007, month=1, day=1, - hour=0, minute=0) - ival_B_to_T_end = Period(freq='Min', year=2007, month=1, day=1, - hour=23, minute=59) - ival_B_to_S_start = Period(freq='S', year=2007, month=1, day=1, hour=0, - minute=0, second=0) - ival_B_to_S_end = Period(freq='S', year=2007, month=1, day=1, hour=23, - minute=59, second=59) - - self.assertEqual(ival_B.asfreq('A'), ival_B_to_A) - self.assertEqual(ival_B_end_of_year.asfreq('A'), ival_B_to_A) - self.assertEqual(ival_B.asfreq('Q'), ival_B_to_Q) - self.assertEqual(ival_B_end_of_quarter.asfreq('Q'), ival_B_to_Q) - self.assertEqual(ival_B.asfreq('M'), ival_B_to_M) - self.assertEqual(ival_B_end_of_month.asfreq('M'), ival_B_to_M) - self.assertEqual(ival_B.asfreq('W'), ival_B_to_W) - self.assertEqual(ival_B_end_of_week.asfreq('W'), ival_B_to_W) - - self.assertEqual(ival_B.asfreq('D'), ival_B_to_D) - - self.assertEqual(ival_B.asfreq('H', 'S'), ival_B_to_H_start) - self.assertEqual(ival_B.asfreq('H', 'E'), ival_B_to_H_end) - self.assertEqual(ival_B.asfreq('Min', 'S'), ival_B_to_T_start) - self.assertEqual(ival_B.asfreq('Min', 'E'), ival_B_to_T_end) - self.assertEqual(ival_B.asfreq('S', 'S'), ival_B_to_S_start) - self.assertEqual(ival_B.asfreq('S', 'E'), ival_B_to_S_end) - - self.assertEqual(ival_B.asfreq('B'), ival_B) - - def test_conv_daily(self): - # frequency conversion tests: from Business Frequency" - - ival_D = Period(freq='D', year=2007, month=1, day=1) - ival_D_end_of_year = Period(freq='D', year=2007, month=12, day=31) - ival_D_end_of_quarter = Period(freq='D', year=2007, month=3, day=31) - ival_D_end_of_month = Period(freq='D', year=2007, month=1, day=31) - ival_D_end_of_week = Period(freq='D', year=2007, month=1, day=7) - - ival_D_friday = Period(freq='D', year=2007, month=1, day=5) - ival_D_saturday = Period(freq='D', year=2007, month=1, day=6) - ival_D_sunday = Period(freq='D', year=2007, month=1, day=7) - - # TODO: unused? - # ival_D_monday = Period(freq='D', year=2007, month=1, day=8) - - ival_B_friday = Period(freq='B', year=2007, month=1, day=5) - ival_B_monday = Period(freq='B', year=2007, month=1, day=8) - - ival_D_to_A = Period(freq='A', year=2007) - - ival_Deoq_to_AJAN = Period(freq='A-JAN', year=2008) - ival_Deoq_to_AJUN = Period(freq='A-JUN', year=2007) - ival_Deoq_to_ADEC = Period(freq='A-DEC', year=2007) - - ival_D_to_QEJAN = Period(freq="Q-JAN", year=2007, quarter=4) - ival_D_to_QEJUN = Period(freq="Q-JUN", year=2007, quarter=3) - ival_D_to_QEDEC = Period(freq="Q-DEC", year=2007, quarter=1) - - ival_D_to_M = Period(freq='M', year=2007, month=1) - ival_D_to_W = Period(freq='W', year=2007, month=1, day=7) - - ival_D_to_H_start = Period(freq='H', year=2007, month=1, day=1, hour=0) - ival_D_to_H_end = Period(freq='H', year=2007, month=1, day=1, hour=23) - ival_D_to_T_start = Period(freq='Min', year=2007, month=1, day=1, - hour=0, minute=0) - ival_D_to_T_end = Period(freq='Min', year=2007, month=1, day=1, - hour=23, minute=59) - ival_D_to_S_start = Period(freq='S', year=2007, month=1, day=1, hour=0, - minute=0, second=0) - ival_D_to_S_end = Period(freq='S', year=2007, month=1, day=1, hour=23, - minute=59, second=59) - - self.assertEqual(ival_D.asfreq('A'), ival_D_to_A) - - self.assertEqual(ival_D_end_of_quarter.asfreq('A-JAN'), - ival_Deoq_to_AJAN) - self.assertEqual(ival_D_end_of_quarter.asfreq('A-JUN'), - ival_Deoq_to_AJUN) - self.assertEqual(ival_D_end_of_quarter.asfreq('A-DEC'), - ival_Deoq_to_ADEC) - - self.assertEqual(ival_D_end_of_year.asfreq('A'), ival_D_to_A) - self.assertEqual(ival_D_end_of_quarter.asfreq('Q'), ival_D_to_QEDEC) - self.assertEqual(ival_D.asfreq("Q-JAN"), ival_D_to_QEJAN) - self.assertEqual(ival_D.asfreq("Q-JUN"), ival_D_to_QEJUN) - self.assertEqual(ival_D.asfreq("Q-DEC"), ival_D_to_QEDEC) - self.assertEqual(ival_D.asfreq('M'), ival_D_to_M) - self.assertEqual(ival_D_end_of_month.asfreq('M'), ival_D_to_M) - self.assertEqual(ival_D.asfreq('W'), ival_D_to_W) - self.assertEqual(ival_D_end_of_week.asfreq('W'), ival_D_to_W) - - self.assertEqual(ival_D_friday.asfreq('B'), ival_B_friday) - self.assertEqual(ival_D_saturday.asfreq('B', 'S'), ival_B_friday) - self.assertEqual(ival_D_saturday.asfreq('B', 'E'), ival_B_monday) - self.assertEqual(ival_D_sunday.asfreq('B', 'S'), ival_B_friday) - self.assertEqual(ival_D_sunday.asfreq('B', 'E'), ival_B_monday) - - self.assertEqual(ival_D.asfreq('H', 'S'), ival_D_to_H_start) - self.assertEqual(ival_D.asfreq('H', 'E'), ival_D_to_H_end) - self.assertEqual(ival_D.asfreq('Min', 'S'), ival_D_to_T_start) - self.assertEqual(ival_D.asfreq('Min', 'E'), ival_D_to_T_end) - self.assertEqual(ival_D.asfreq('S', 'S'), ival_D_to_S_start) - self.assertEqual(ival_D.asfreq('S', 'E'), ival_D_to_S_end) - - self.assertEqual(ival_D.asfreq('D'), ival_D) - - def test_conv_hourly(self): - # frequency conversion tests: from Hourly Frequency" - - ival_H = Period(freq='H', year=2007, month=1, day=1, hour=0) - ival_H_end_of_year = Period(freq='H', year=2007, month=12, day=31, - hour=23) - ival_H_end_of_quarter = Period(freq='H', year=2007, month=3, day=31, - hour=23) - ival_H_end_of_month = Period(freq='H', year=2007, month=1, day=31, - hour=23) - ival_H_end_of_week = Period(freq='H', year=2007, month=1, day=7, - hour=23) - ival_H_end_of_day = Period(freq='H', year=2007, month=1, day=1, - hour=23) - ival_H_end_of_bus = Period(freq='H', year=2007, month=1, day=1, - hour=23) - - ival_H_to_A = Period(freq='A', year=2007) - ival_H_to_Q = Period(freq='Q', year=2007, quarter=1) - ival_H_to_M = Period(freq='M', year=2007, month=1) - ival_H_to_W = Period(freq='W', year=2007, month=1, day=7) - ival_H_to_D = Period(freq='D', year=2007, month=1, day=1) - ival_H_to_B = Period(freq='B', year=2007, month=1, day=1) - - ival_H_to_T_start = Period(freq='Min', year=2007, month=1, day=1, - hour=0, minute=0) - ival_H_to_T_end = Period(freq='Min', year=2007, month=1, day=1, hour=0, - minute=59) - ival_H_to_S_start = Period(freq='S', year=2007, month=1, day=1, hour=0, - minute=0, second=0) - ival_H_to_S_end = Period(freq='S', year=2007, month=1, day=1, hour=0, - minute=59, second=59) - - self.assertEqual(ival_H.asfreq('A'), ival_H_to_A) - self.assertEqual(ival_H_end_of_year.asfreq('A'), ival_H_to_A) - self.assertEqual(ival_H.asfreq('Q'), ival_H_to_Q) - self.assertEqual(ival_H_end_of_quarter.asfreq('Q'), ival_H_to_Q) - self.assertEqual(ival_H.asfreq('M'), ival_H_to_M) - self.assertEqual(ival_H_end_of_month.asfreq('M'), ival_H_to_M) - self.assertEqual(ival_H.asfreq('W'), ival_H_to_W) - self.assertEqual(ival_H_end_of_week.asfreq('W'), ival_H_to_W) - self.assertEqual(ival_H.asfreq('D'), ival_H_to_D) - self.assertEqual(ival_H_end_of_day.asfreq('D'), ival_H_to_D) - self.assertEqual(ival_H.asfreq('B'), ival_H_to_B) - self.assertEqual(ival_H_end_of_bus.asfreq('B'), ival_H_to_B) - - self.assertEqual(ival_H.asfreq('Min', 'S'), ival_H_to_T_start) - self.assertEqual(ival_H.asfreq('Min', 'E'), ival_H_to_T_end) - self.assertEqual(ival_H.asfreq('S', 'S'), ival_H_to_S_start) - self.assertEqual(ival_H.asfreq('S', 'E'), ival_H_to_S_end) - - self.assertEqual(ival_H.asfreq('H'), ival_H) - - def test_conv_minutely(self): - # frequency conversion tests: from Minutely Frequency" - - ival_T = Period(freq='Min', year=2007, month=1, day=1, hour=0, - minute=0) - ival_T_end_of_year = Period(freq='Min', year=2007, month=12, day=31, - hour=23, minute=59) - ival_T_end_of_quarter = Period(freq='Min', year=2007, month=3, day=31, - hour=23, minute=59) - ival_T_end_of_month = Period(freq='Min', year=2007, month=1, day=31, - hour=23, minute=59) - ival_T_end_of_week = Period(freq='Min', year=2007, month=1, day=7, - hour=23, minute=59) - ival_T_end_of_day = Period(freq='Min', year=2007, month=1, day=1, - hour=23, minute=59) - ival_T_end_of_bus = Period(freq='Min', year=2007, month=1, day=1, - hour=23, minute=59) - ival_T_end_of_hour = Period(freq='Min', year=2007, month=1, day=1, - hour=0, minute=59) - - ival_T_to_A = Period(freq='A', year=2007) - ival_T_to_Q = Period(freq='Q', year=2007, quarter=1) - ival_T_to_M = Period(freq='M', year=2007, month=1) - ival_T_to_W = Period(freq='W', year=2007, month=1, day=7) - ival_T_to_D = Period(freq='D', year=2007, month=1, day=1) - ival_T_to_B = Period(freq='B', year=2007, month=1, day=1) - ival_T_to_H = Period(freq='H', year=2007, month=1, day=1, hour=0) - - ival_T_to_S_start = Period(freq='S', year=2007, month=1, day=1, hour=0, - minute=0, second=0) - ival_T_to_S_end = Period(freq='S', year=2007, month=1, day=1, hour=0, - minute=0, second=59) - - self.assertEqual(ival_T.asfreq('A'), ival_T_to_A) - self.assertEqual(ival_T_end_of_year.asfreq('A'), ival_T_to_A) - self.assertEqual(ival_T.asfreq('Q'), ival_T_to_Q) - self.assertEqual(ival_T_end_of_quarter.asfreq('Q'), ival_T_to_Q) - self.assertEqual(ival_T.asfreq('M'), ival_T_to_M) - self.assertEqual(ival_T_end_of_month.asfreq('M'), ival_T_to_M) - self.assertEqual(ival_T.asfreq('W'), ival_T_to_W) - self.assertEqual(ival_T_end_of_week.asfreq('W'), ival_T_to_W) - self.assertEqual(ival_T.asfreq('D'), ival_T_to_D) - self.assertEqual(ival_T_end_of_day.asfreq('D'), ival_T_to_D) - self.assertEqual(ival_T.asfreq('B'), ival_T_to_B) - self.assertEqual(ival_T_end_of_bus.asfreq('B'), ival_T_to_B) - self.assertEqual(ival_T.asfreq('H'), ival_T_to_H) - self.assertEqual(ival_T_end_of_hour.asfreq('H'), ival_T_to_H) - - self.assertEqual(ival_T.asfreq('S', 'S'), ival_T_to_S_start) - self.assertEqual(ival_T.asfreq('S', 'E'), ival_T_to_S_end) - - self.assertEqual(ival_T.asfreq('Min'), ival_T) - - def test_conv_secondly(self): - # frequency conversion tests: from Secondly Frequency" - - ival_S = Period(freq='S', year=2007, month=1, day=1, hour=0, minute=0, - second=0) - ival_S_end_of_year = Period(freq='S', year=2007, month=12, day=31, - hour=23, minute=59, second=59) - ival_S_end_of_quarter = Period(freq='S', year=2007, month=3, day=31, - hour=23, minute=59, second=59) - ival_S_end_of_month = Period(freq='S', year=2007, month=1, day=31, - hour=23, minute=59, second=59) - ival_S_end_of_week = Period(freq='S', year=2007, month=1, day=7, - hour=23, minute=59, second=59) - ival_S_end_of_day = Period(freq='S', year=2007, month=1, day=1, - hour=23, minute=59, second=59) - ival_S_end_of_bus = Period(freq='S', year=2007, month=1, day=1, - hour=23, minute=59, second=59) - ival_S_end_of_hour = Period(freq='S', year=2007, month=1, day=1, - hour=0, minute=59, second=59) - ival_S_end_of_minute = Period(freq='S', year=2007, month=1, day=1, - hour=0, minute=0, second=59) - - ival_S_to_A = Period(freq='A', year=2007) - ival_S_to_Q = Period(freq='Q', year=2007, quarter=1) - ival_S_to_M = Period(freq='M', year=2007, month=1) - ival_S_to_W = Period(freq='W', year=2007, month=1, day=7) - ival_S_to_D = Period(freq='D', year=2007, month=1, day=1) - ival_S_to_B = Period(freq='B', year=2007, month=1, day=1) - ival_S_to_H = Period(freq='H', year=2007, month=1, day=1, hour=0) - ival_S_to_T = Period(freq='Min', year=2007, month=1, day=1, hour=0, - minute=0) - - self.assertEqual(ival_S.asfreq('A'), ival_S_to_A) - self.assertEqual(ival_S_end_of_year.asfreq('A'), ival_S_to_A) - self.assertEqual(ival_S.asfreq('Q'), ival_S_to_Q) - self.assertEqual(ival_S_end_of_quarter.asfreq('Q'), ival_S_to_Q) - self.assertEqual(ival_S.asfreq('M'), ival_S_to_M) - self.assertEqual(ival_S_end_of_month.asfreq('M'), ival_S_to_M) - self.assertEqual(ival_S.asfreq('W'), ival_S_to_W) - self.assertEqual(ival_S_end_of_week.asfreq('W'), ival_S_to_W) - self.assertEqual(ival_S.asfreq('D'), ival_S_to_D) - self.assertEqual(ival_S_end_of_day.asfreq('D'), ival_S_to_D) - self.assertEqual(ival_S.asfreq('B'), ival_S_to_B) - self.assertEqual(ival_S_end_of_bus.asfreq('B'), ival_S_to_B) - self.assertEqual(ival_S.asfreq('H'), ival_S_to_H) - self.assertEqual(ival_S_end_of_hour.asfreq('H'), ival_S_to_H) - self.assertEqual(ival_S.asfreq('Min'), ival_S_to_T) - self.assertEqual(ival_S_end_of_minute.asfreq('Min'), ival_S_to_T) - - self.assertEqual(ival_S.asfreq('S'), ival_S) - - def test_asfreq_mult(self): - # normal freq to mult freq - p = Period(freq='A', year=2007) - # ordinal will not change - for freq in ['3A', offsets.YearEnd(3)]: - result = p.asfreq(freq) - expected = Period('2007', freq='3A') - - self.assertEqual(result, expected) - self.assertEqual(result.ordinal, expected.ordinal) - self.assertEqual(result.freq, expected.freq) - # ordinal will not change - for freq in ['3A', offsets.YearEnd(3)]: - result = p.asfreq(freq, how='S') - expected = Period('2007', freq='3A') - - self.assertEqual(result, expected) - self.assertEqual(result.ordinal, expected.ordinal) - self.assertEqual(result.freq, expected.freq) - - # mult freq to normal freq - p = Period(freq='3A', year=2007) - # ordinal will change because how=E is the default - for freq in ['A', offsets.YearEnd()]: - result = p.asfreq(freq) - expected = Period('2009', freq='A') - - self.assertEqual(result, expected) - self.assertEqual(result.ordinal, expected.ordinal) - self.assertEqual(result.freq, expected.freq) - # ordinal will not change - for freq in ['A', offsets.YearEnd()]: - result = p.asfreq(freq, how='S') - expected = Period('2007', freq='A') - - self.assertEqual(result, expected) - self.assertEqual(result.ordinal, expected.ordinal) - self.assertEqual(result.freq, expected.freq) - - p = Period(freq='A', year=2007) - for freq in ['2M', offsets.MonthEnd(2)]: - result = p.asfreq(freq) - expected = Period('2007-12', freq='2M') - - self.assertEqual(result, expected) - self.assertEqual(result.ordinal, expected.ordinal) - self.assertEqual(result.freq, expected.freq) - for freq in ['2M', offsets.MonthEnd(2)]: - result = p.asfreq(freq, how='S') - expected = Period('2007-01', freq='2M') - - self.assertEqual(result, expected) - self.assertEqual(result.ordinal, expected.ordinal) - self.assertEqual(result.freq, expected.freq) - - p = Period(freq='3A', year=2007) - for freq in ['2M', offsets.MonthEnd(2)]: - result = p.asfreq(freq) - expected = Period('2009-12', freq='2M') - - self.assertEqual(result, expected) - self.assertEqual(result.ordinal, expected.ordinal) - self.assertEqual(result.freq, expected.freq) - for freq in ['2M', offsets.MonthEnd(2)]: - result = p.asfreq(freq, how='S') - expected = Period('2007-01', freq='2M') - - self.assertEqual(result, expected) - self.assertEqual(result.ordinal, expected.ordinal) - self.assertEqual(result.freq, expected.freq) - - def test_asfreq_combined(self): - # normal freq to combined freq - p = Period('2007', freq='H') - - # ordinal will not change - expected = Period('2007', freq='25H') - for freq, how in zip(['1D1H', '1H1D'], ['E', 'S']): - result = p.asfreq(freq, how=how) - self.assertEqual(result, expected) - self.assertEqual(result.ordinal, expected.ordinal) - self.assertEqual(result.freq, expected.freq) - - # combined freq to normal freq - p1 = Period(freq='1D1H', year=2007) - p2 = Period(freq='1H1D', year=2007) - - # ordinal will change because how=E is the default - result1 = p1.asfreq('H') - result2 = p2.asfreq('H') - expected = Period('2007-01-02', freq='H') - self.assertEqual(result1, expected) - self.assertEqual(result1.ordinal, expected.ordinal) - self.assertEqual(result1.freq, expected.freq) - self.assertEqual(result2, expected) - self.assertEqual(result2.ordinal, expected.ordinal) - self.assertEqual(result2.freq, expected.freq) - - # ordinal will not change - result1 = p1.asfreq('H', how='S') - result2 = p2.asfreq('H', how='S') - expected = Period('2007-01-01', freq='H') - self.assertEqual(result1, expected) - self.assertEqual(result1.ordinal, expected.ordinal) - self.assertEqual(result1.freq, expected.freq) - self.assertEqual(result2, expected) - self.assertEqual(result2.ordinal, expected.ordinal) - self.assertEqual(result2.freq, expected.freq) - - def test_is_leap_year(self): - # GH 13727 - for freq in ['A', 'M', 'D', 'H']: - p = Period('2000-01-01 00:00:00', freq=freq) - self.assertTrue(p.is_leap_year) - self.assertIsInstance(p.is_leap_year, bool) - - p = Period('1999-01-01 00:00:00', freq=freq) - self.assertFalse(p.is_leap_year) - - p = Period('2004-01-01 00:00:00', freq=freq) - self.assertTrue(p.is_leap_year) - - p = Period('2100-01-01 00:00:00', freq=freq) - self.assertFalse(p.is_leap_year) - - -class TestPeriodIndex(tm.TestCase): - - def setUp(self): - pass - - def test_hash_error(self): - index = period_range('20010101', periods=10) - with tm.assertRaisesRegexp(TypeError, "unhashable type: %r" % - type(index).__name__): - hash(index) - - def test_make_time_series(self): - index = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009') - series = Series(1, index=index) - tm.assertIsInstance(series, Series) - - def test_constructor_use_start_freq(self): - # GH #1118 - p = Period('4/2/2012', freq='B') - index = PeriodIndex(start=p, periods=10) - expected = PeriodIndex(start='4/2/2012', periods=10, freq='B') - tm.assert_index_equal(index, expected) - - def test_constructor_field_arrays(self): - # GH #1264 - - years = np.arange(1990, 2010).repeat(4)[2:-2] - quarters = np.tile(np.arange(1, 5), 20)[2:-2] - - index = PeriodIndex(year=years, quarter=quarters, freq='Q-DEC') - expected = period_range('1990Q3', '2009Q2', freq='Q-DEC') - tm.assert_index_equal(index, expected) - - index2 = PeriodIndex(year=years, quarter=quarters, freq='2Q-DEC') - tm.assert_numpy_array_equal(index.asi8, index2.asi8) - - index = PeriodIndex(year=years, quarter=quarters) - tm.assert_index_equal(index, expected) - - years = [2007, 2007, 2007] - months = [1, 2] - self.assertRaises(ValueError, PeriodIndex, year=years, month=months, - freq='M') - self.assertRaises(ValueError, PeriodIndex, year=years, month=months, - freq='2M') - self.assertRaises(ValueError, PeriodIndex, year=years, month=months, - freq='M', start=Period('2007-01', freq='M')) - - years = [2007, 2007, 2007] - months = [1, 2, 3] - idx = PeriodIndex(year=years, month=months, freq='M') - exp = period_range('2007-01', periods=3, freq='M') - tm.assert_index_equal(idx, exp) - - def test_constructor_U(self): - # U was used as undefined period - self.assertRaises(ValueError, period_range, '2007-1-1', periods=500, - freq='X') - - def test_constructor_nano(self): - idx = period_range(start=Period(ordinal=1, freq='N'), - end=Period(ordinal=4, freq='N'), freq='N') - exp = PeriodIndex([Period(ordinal=1, freq='N'), - Period(ordinal=2, freq='N'), - Period(ordinal=3, freq='N'), - Period(ordinal=4, freq='N')], freq='N') - tm.assert_index_equal(idx, exp) - - def test_constructor_arrays_negative_year(self): - years = np.arange(1960, 2000, dtype=np.int64).repeat(4) - quarters = np.tile(np.array([1, 2, 3, 4], dtype=np.int64), 40) - - pindex = PeriodIndex(year=years, quarter=quarters) - - self.assert_numpy_array_equal(pindex.year, years) - self.assert_numpy_array_equal(pindex.quarter, quarters) - - def test_constructor_invalid_quarters(self): - self.assertRaises(ValueError, PeriodIndex, year=lrange(2000, 2004), - quarter=lrange(4), freq='Q-DEC') - - def test_constructor_corner(self): - self.assertRaises(ValueError, PeriodIndex, periods=10, freq='A') - - start = Period('2007', freq='A-JUN') - end = Period('2010', freq='A-DEC') - self.assertRaises(ValueError, PeriodIndex, start=start, end=end) - self.assertRaises(ValueError, PeriodIndex, start=start) - self.assertRaises(ValueError, PeriodIndex, end=end) - - result = period_range('2007-01', periods=10.5, freq='M') - exp = period_range('2007-01', periods=10, freq='M') - tm.assert_index_equal(result, exp) - - def test_constructor_fromarraylike(self): - idx = period_range('2007-01', periods=20, freq='M') - - # values is an array of Period, thus can retrieve freq - tm.assert_index_equal(PeriodIndex(idx.values), idx) - tm.assert_index_equal(PeriodIndex(list(idx.values)), idx) - - self.assertRaises(ValueError, PeriodIndex, idx._values) - self.assertRaises(ValueError, PeriodIndex, list(idx._values)) - self.assertRaises(ValueError, PeriodIndex, - data=Period('2007', freq='A')) - - result = PeriodIndex(iter(idx)) - tm.assert_index_equal(result, idx) - - result = PeriodIndex(idx) - tm.assert_index_equal(result, idx) - - result = PeriodIndex(idx, freq='M') - tm.assert_index_equal(result, idx) - - result = PeriodIndex(idx, freq=offsets.MonthEnd()) - tm.assert_index_equal(result, idx) - self.assertTrue(result.freq, 'M') - - result = PeriodIndex(idx, freq='2M') - tm.assert_index_equal(result, idx.asfreq('2M')) - self.assertTrue(result.freq, '2M') - - result = PeriodIndex(idx, freq=offsets.MonthEnd(2)) - tm.assert_index_equal(result, idx.asfreq('2M')) - self.assertTrue(result.freq, '2M') - - result = PeriodIndex(idx, freq='D') - exp = idx.asfreq('D', 'e') - tm.assert_index_equal(result, exp) - - def test_constructor_datetime64arr(self): - vals = np.arange(100000, 100000 + 10000, 100, dtype=np.int64) - vals = vals.view(np.dtype('M8[us]')) - - self.assertRaises(ValueError, PeriodIndex, vals, freq='D') - - def test_constructor_dtype(self): - # passing a dtype with a tz should localize - idx = PeriodIndex(['2013-01', '2013-03'], dtype='period[M]') - exp = PeriodIndex(['2013-01', '2013-03'], freq='M') - tm.assert_index_equal(idx, exp) - self.assertEqual(idx.dtype, 'period[M]') - - idx = PeriodIndex(['2013-01-05', '2013-03-05'], dtype='period[3D]') - exp = PeriodIndex(['2013-01-05', '2013-03-05'], freq='3D') - tm.assert_index_equal(idx, exp) - self.assertEqual(idx.dtype, 'period[3D]') - - # if we already have a freq and its not the same, then asfreq - # (not changed) - idx = PeriodIndex(['2013-01-01', '2013-01-02'], freq='D') - - res = PeriodIndex(idx, dtype='period[M]') - exp = PeriodIndex(['2013-01', '2013-01'], freq='M') - tm.assert_index_equal(res, exp) - self.assertEqual(res.dtype, 'period[M]') - - res = PeriodIndex(idx, freq='M') - tm.assert_index_equal(res, exp) - self.assertEqual(res.dtype, 'period[M]') - - msg = 'specified freq and dtype are different' - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): - PeriodIndex(['2011-01'], freq='M', dtype='period[D]') - - def test_constructor_empty(self): - idx = pd.PeriodIndex([], freq='M') - tm.assertIsInstance(idx, PeriodIndex) - self.assertEqual(len(idx), 0) - self.assertEqual(idx.freq, 'M') - - with tm.assertRaisesRegexp(ValueError, 'freq not specified'): - pd.PeriodIndex([]) - - def test_constructor_pi_nat(self): - idx = PeriodIndex([Period('2011-01', freq='M'), pd.NaT, - Period('2011-01', freq='M')]) - exp = PeriodIndex(['2011-01', 'NaT', '2011-01'], freq='M') - tm.assert_index_equal(idx, exp) - - idx = PeriodIndex(np.array([Period('2011-01', freq='M'), pd.NaT, - Period('2011-01', freq='M')])) - tm.assert_index_equal(idx, exp) - - idx = PeriodIndex([pd.NaT, pd.NaT, Period('2011-01', freq='M'), - Period('2011-01', freq='M')]) - exp = PeriodIndex(['NaT', 'NaT', '2011-01', '2011-01'], freq='M') - tm.assert_index_equal(idx, exp) - - idx = PeriodIndex(np.array([pd.NaT, pd.NaT, - Period('2011-01', freq='M'), - Period('2011-01', freq='M')])) - tm.assert_index_equal(idx, exp) - - idx = PeriodIndex([pd.NaT, pd.NaT, '2011-01', '2011-01'], freq='M') - tm.assert_index_equal(idx, exp) - - with tm.assertRaisesRegexp(ValueError, 'freq not specified'): - PeriodIndex([pd.NaT, pd.NaT]) - - with tm.assertRaisesRegexp(ValueError, 'freq not specified'): - PeriodIndex(np.array([pd.NaT, pd.NaT])) - - with tm.assertRaisesRegexp(ValueError, 'freq not specified'): - PeriodIndex(['NaT', 'NaT']) - - with tm.assertRaisesRegexp(ValueError, 'freq not specified'): - PeriodIndex(np.array(['NaT', 'NaT'])) - - def test_constructor_incompat_freq(self): - msg = "Input has different freq=D from PeriodIndex\\(freq=M\\)" - - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): - PeriodIndex([Period('2011-01', freq='M'), pd.NaT, - Period('2011-01', freq='D')]) - - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): - PeriodIndex(np.array([Period('2011-01', freq='M'), pd.NaT, - Period('2011-01', freq='D')])) - - # first element is pd.NaT - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): - PeriodIndex([pd.NaT, Period('2011-01', freq='M'), - Period('2011-01', freq='D')]) - - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): - PeriodIndex(np.array([pd.NaT, Period('2011-01', freq='M'), - Period('2011-01', freq='D')])) - - def test_constructor_mixed(self): - idx = PeriodIndex(['2011-01', pd.NaT, Period('2011-01', freq='M')]) - exp = PeriodIndex(['2011-01', 'NaT', '2011-01'], freq='M') - tm.assert_index_equal(idx, exp) - - idx = PeriodIndex(['NaT', pd.NaT, Period('2011-01', freq='M')]) - exp = PeriodIndex(['NaT', 'NaT', '2011-01'], freq='M') - tm.assert_index_equal(idx, exp) - - idx = PeriodIndex([Period('2011-01-01', freq='D'), pd.NaT, - '2012-01-01']) - exp = PeriodIndex(['2011-01-01', 'NaT', '2012-01-01'], freq='D') - tm.assert_index_equal(idx, exp) - - def test_constructor_simple_new(self): - idx = period_range('2007-01', name='p', periods=2, freq='M') - result = idx._simple_new(idx, 'p', freq=idx.freq) - tm.assert_index_equal(result, idx) - - result = idx._simple_new(idx.astype('i8'), 'p', freq=idx.freq) - tm.assert_index_equal(result, idx) - - result = idx._simple_new([pd.Period('2007-01', freq='M'), - pd.Period('2007-02', freq='M')], - 'p', freq=idx.freq) - self.assert_index_equal(result, idx) - - result = idx._simple_new(np.array([pd.Period('2007-01', freq='M'), - pd.Period('2007-02', freq='M')]), - 'p', freq=idx.freq) - self.assert_index_equal(result, idx) - - def test_constructor_simple_new_empty(self): - # GH13079 - idx = PeriodIndex([], freq='M', name='p') - result = idx._simple_new(idx, name='p', freq='M') - tm.assert_index_equal(result, idx) - - def test_constructor_simple_new_floats(self): - # GH13079 - for floats in [[1.1], np.array([1.1])]: - with self.assertRaises(TypeError): - pd.PeriodIndex._simple_new(floats, freq='M') - - def test_shallow_copy_empty(self): - - # GH13067 - idx = PeriodIndex([], freq='M') - result = idx._shallow_copy() - expected = idx - - tm.assert_index_equal(result, expected) - - def test_constructor_nat(self): - self.assertRaises(ValueError, period_range, start='NaT', - end='2011-01-01', freq='M') - self.assertRaises(ValueError, period_range, start='2011-01-01', - end='NaT', freq='M') - - def test_constructor_year_and_quarter(self): - year = pd.Series([2001, 2002, 2003]) - quarter = year - 2000 - idx = PeriodIndex(year=year, quarter=quarter) - strs = ['%dQ%d' % t for t in zip(quarter, year)] - lops = list(map(Period, strs)) - p = PeriodIndex(lops) - tm.assert_index_equal(p, idx) - - def test_constructor_freq_mult(self): - # GH #7811 - for func in [PeriodIndex, period_range]: - # must be the same, but for sure... - pidx = func(start='2014-01', freq='2M', periods=4) - expected = PeriodIndex(['2014-01', '2014-03', - '2014-05', '2014-07'], freq='2M') - tm.assert_index_equal(pidx, expected) - - pidx = func(start='2014-01-02', end='2014-01-15', freq='3D') - expected = PeriodIndex(['2014-01-02', '2014-01-05', - '2014-01-08', '2014-01-11', - '2014-01-14'], freq='3D') - tm.assert_index_equal(pidx, expected) - - pidx = func(end='2014-01-01 17:00', freq='4H', periods=3) - expected = PeriodIndex(['2014-01-01 09:00', '2014-01-01 13:00', - '2014-01-01 17:00'], freq='4H') - tm.assert_index_equal(pidx, expected) - - msg = ('Frequency must be positive, because it' - ' represents span: -1M') - with tm.assertRaisesRegexp(ValueError, msg): - PeriodIndex(['2011-01'], freq='-1M') - - msg = ('Frequency must be positive, because it' ' represents span: 0M') - with tm.assertRaisesRegexp(ValueError, msg): - PeriodIndex(['2011-01'], freq='0M') - - msg = ('Frequency must be positive, because it' ' represents span: 0M') - with tm.assertRaisesRegexp(ValueError, msg): - period_range('2011-01', periods=3, freq='0M') - - def test_constructor_freq_mult_dti_compat(self): - import itertools - mults = [1, 2, 3, 4, 5] - freqs = ['A', 'M', 'D', 'T', 'S'] - for mult, freq in itertools.product(mults, freqs): - freqstr = str(mult) + freq - pidx = PeriodIndex(start='2014-04-01', freq=freqstr, periods=10) - expected = date_range(start='2014-04-01', freq=freqstr, - periods=10).to_period(freqstr) - tm.assert_index_equal(pidx, expected) - - def test_constructor_freq_combined(self): - for freq in ['1D1H', '1H1D']: - pidx = PeriodIndex(['2016-01-01', '2016-01-02'], freq=freq) - expected = PeriodIndex(['2016-01-01 00:00', '2016-01-02 00:00'], - freq='25H') - for freq, func in zip(['1D1H', '1H1D'], [PeriodIndex, period_range]): - pidx = func(start='2016-01-01', periods=2, freq=freq) - expected = PeriodIndex(['2016-01-01 00:00', '2016-01-02 01:00'], - freq='25H') - tm.assert_index_equal(pidx, expected) - - def test_dtype_str(self): - pi = pd.PeriodIndex([], freq='M') - self.assertEqual(pi.dtype_str, 'period[M]') - self.assertEqual(pi.dtype_str, str(pi.dtype)) - - pi = pd.PeriodIndex([], freq='3M') - self.assertEqual(pi.dtype_str, 'period[3M]') - self.assertEqual(pi.dtype_str, str(pi.dtype)) - - def test_view_asi8(self): - idx = pd.PeriodIndex([], freq='M') - - exp = np.array([], dtype=np.int64) - tm.assert_numpy_array_equal(idx.view('i8'), exp) - tm.assert_numpy_array_equal(idx.asi8, exp) - - idx = pd.PeriodIndex(['2011-01', pd.NaT], freq='M') - - exp = np.array([492, -9223372036854775808], dtype=np.int64) - tm.assert_numpy_array_equal(idx.view('i8'), exp) - tm.assert_numpy_array_equal(idx.asi8, exp) - - exp = np.array([14975, -9223372036854775808], dtype=np.int64) - idx = pd.PeriodIndex(['2011-01-01', pd.NaT], freq='D') - tm.assert_numpy_array_equal(idx.view('i8'), exp) - tm.assert_numpy_array_equal(idx.asi8, exp) - - def test_values(self): - idx = pd.PeriodIndex([], freq='M') - - exp = np.array([], dtype=np.object) - tm.assert_numpy_array_equal(idx.values, exp) - tm.assert_numpy_array_equal(idx.get_values(), exp) - exp = np.array([], dtype=np.int64) - tm.assert_numpy_array_equal(idx._values, exp) - - idx = pd.PeriodIndex(['2011-01', pd.NaT], freq='M') - - exp = np.array([pd.Period('2011-01', freq='M'), pd.NaT], dtype=object) - tm.assert_numpy_array_equal(idx.values, exp) - tm.assert_numpy_array_equal(idx.get_values(), exp) - exp = np.array([492, -9223372036854775808], dtype=np.int64) - tm.assert_numpy_array_equal(idx._values, exp) - - idx = pd.PeriodIndex(['2011-01-01', pd.NaT], freq='D') - - exp = np.array([pd.Period('2011-01-01', freq='D'), pd.NaT], - dtype=object) - tm.assert_numpy_array_equal(idx.values, exp) - tm.assert_numpy_array_equal(idx.get_values(), exp) - exp = np.array([14975, -9223372036854775808], dtype=np.int64) - tm.assert_numpy_array_equal(idx._values, exp) - - def test_asobject_like(self): - idx = pd.PeriodIndex([], freq='M') - - exp = np.array([], dtype=object) - tm.assert_numpy_array_equal(idx.asobject.values, exp) - tm.assert_numpy_array_equal(idx._mpl_repr(), exp) - - idx = pd.PeriodIndex(['2011-01', pd.NaT], freq='M') - - exp = np.array([pd.Period('2011-01', freq='M'), pd.NaT], dtype=object) - tm.assert_numpy_array_equal(idx.asobject.values, exp) - tm.assert_numpy_array_equal(idx._mpl_repr(), exp) - - exp = np.array([pd.Period('2011-01-01', freq='D'), pd.NaT], - dtype=object) - idx = pd.PeriodIndex(['2011-01-01', pd.NaT], freq='D') - tm.assert_numpy_array_equal(idx.asobject.values, exp) - tm.assert_numpy_array_equal(idx._mpl_repr(), exp) - - def test_is_(self): - create_index = lambda: PeriodIndex(freq='A', start='1/1/2001', - end='12/1/2009') - index = create_index() - self.assertEqual(index.is_(index), True) - self.assertEqual(index.is_(create_index()), False) - self.assertEqual(index.is_(index.view()), True) - self.assertEqual( - index.is_(index.view().view().view().view().view()), True) - self.assertEqual(index.view().is_(index), True) - ind2 = index.view() - index.name = "Apple" - self.assertEqual(ind2.is_(index), True) - self.assertEqual(index.is_(index[:]), False) - self.assertEqual(index.is_(index.asfreq('M')), False) - self.assertEqual(index.is_(index.asfreq('A')), False) - self.assertEqual(index.is_(index - 2), False) - self.assertEqual(index.is_(index - 0), False) - - def test_comp_period(self): - idx = period_range('2007-01', periods=20, freq='M') - - result = idx < idx[10] - exp = idx.values < idx.values[10] - self.assert_numpy_array_equal(result, exp) - - def test_getitem_index(self): - idx = period_range('2007-01', periods=10, freq='M', name='x') - - result = idx[[1, 3, 5]] - exp = pd.PeriodIndex(['2007-02', '2007-04', '2007-06'], - freq='M', name='x') - tm.assert_index_equal(result, exp) - - result = idx[[True, True, False, False, False, - True, True, False, False, False]] - exp = pd.PeriodIndex(['2007-01', '2007-02', '2007-06', '2007-07'], - freq='M', name='x') - tm.assert_index_equal(result, exp) - - def test_getitem_partial(self): - rng = period_range('2007-01', periods=50, freq='M') - ts = Series(np.random.randn(len(rng)), rng) - - self.assertRaises(KeyError, ts.__getitem__, '2006') - - result = ts['2008'] - self.assertTrue((result.index.year == 2008).all()) - - result = ts['2008':'2009'] - self.assertEqual(len(result), 24) - - result = ts['2008-1':'2009-12'] - self.assertEqual(len(result), 24) - - result = ts['2008Q1':'2009Q4'] - self.assertEqual(len(result), 24) - - result = ts[:'2009'] - self.assertEqual(len(result), 36) - - result = ts['2009':] - self.assertEqual(len(result), 50 - 24) - - exp = result - result = ts[24:] - tm.assert_series_equal(exp, result) - - ts = ts[10:].append(ts[10:]) - self.assertRaisesRegexp(KeyError, - "left slice bound for non-unique " - "label: '2008'", - ts.__getitem__, slice('2008', '2009')) - - def test_getitem_datetime(self): - rng = period_range(start='2012-01-01', periods=10, freq='W-MON') - ts = Series(lrange(len(rng)), index=rng) - - dt1 = datetime(2011, 10, 2) - dt4 = datetime(2012, 4, 20) - - rs = ts[dt1:dt4] - tm.assert_series_equal(rs, ts) - - def test_getitem_nat(self): - idx = pd.PeriodIndex(['2011-01', 'NaT', '2011-02'], freq='M') - self.assertEqual(idx[0], pd.Period('2011-01', freq='M')) - self.assertIs(idx[1], tslib.NaT) - - s = pd.Series([0, 1, 2], index=idx) - self.assertEqual(s[pd.NaT], 1) - - s = pd.Series(idx, index=idx) - self.assertEqual(s[pd.Period('2011-01', freq='M')], - pd.Period('2011-01', freq='M')) - self.assertIs(s[pd.NaT], tslib.NaT) - - def test_getitem_list_periods(self): - # GH 7710 - rng = period_range(start='2012-01-01', periods=10, freq='D') - ts = Series(lrange(len(rng)), index=rng) - exp = ts.iloc[[1]] - tm.assert_series_equal(ts[[Period('2012-01-02', freq='D')]], exp) - - def test_slice_with_negative_step(self): - ts = Series(np.arange(20), - period_range('2014-01', periods=20, freq='M')) - SLC = pd.IndexSlice - - def assert_slices_equivalent(l_slc, i_slc): - tm.assert_series_equal(ts[l_slc], ts.iloc[i_slc]) - tm.assert_series_equal(ts.loc[l_slc], ts.iloc[i_slc]) - tm.assert_series_equal(ts.loc[l_slc], ts.iloc[i_slc]) - - assert_slices_equivalent(SLC[Period('2014-10')::-1], SLC[9::-1]) - assert_slices_equivalent(SLC['2014-10'::-1], SLC[9::-1]) - - assert_slices_equivalent(SLC[:Period('2014-10'):-1], SLC[:8:-1]) - assert_slices_equivalent(SLC[:'2014-10':-1], SLC[:8:-1]) - - assert_slices_equivalent(SLC['2015-02':'2014-10':-1], SLC[13:8:-1]) - assert_slices_equivalent(SLC[Period('2015-02'):Period('2014-10'):-1], - SLC[13:8:-1]) - assert_slices_equivalent(SLC['2015-02':Period('2014-10'):-1], - SLC[13:8:-1]) - assert_slices_equivalent(SLC[Period('2015-02'):'2014-10':-1], - SLC[13:8:-1]) - - assert_slices_equivalent(SLC['2014-10':'2015-02':-1], SLC[:0]) - - def test_slice_with_zero_step_raises(self): - ts = Series(np.arange(20), - period_range('2014-01', periods=20, freq='M')) - self.assertRaisesRegexp(ValueError, 'slice step cannot be zero', - lambda: ts[::0]) - self.assertRaisesRegexp(ValueError, 'slice step cannot be zero', - lambda: ts.loc[::0]) - self.assertRaisesRegexp(ValueError, 'slice step cannot be zero', - lambda: ts.loc[::0]) - - def test_contains(self): - rng = period_range('2007-01', freq='M', periods=10) - - self.assertTrue(Period('2007-01', freq='M') in rng) - self.assertFalse(Period('2007-01', freq='D') in rng) - self.assertFalse(Period('2007-01', freq='2M') in rng) - - def test_contains_nat(self): - # GH13582 - idx = period_range('2007-01', freq='M', periods=10) - self.assertFalse(pd.NaT in idx) - self.assertFalse(None in idx) - self.assertFalse(float('nan') in idx) - self.assertFalse(np.nan in idx) - - idx = pd.PeriodIndex(['2011-01', 'NaT', '2011-02'], freq='M') - self.assertTrue(pd.NaT in idx) - self.assertTrue(None in idx) - self.assertTrue(float('nan') in idx) - self.assertTrue(np.nan in idx) - - def test_sub(self): - rng = period_range('2007-01', periods=50) - - result = rng - 5 - exp = rng + (-5) - tm.assert_index_equal(result, exp) - - def test_periods_number_check(self): - with tm.assertRaises(ValueError): - period_range('2011-1-1', '2012-1-1', 'B') - - def test_tolist(self): - index = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009') - rs = index.tolist() - [tm.assertIsInstance(x, Period) for x in rs] - - recon = PeriodIndex(rs) - tm.assert_index_equal(index, recon) - - def test_to_timestamp(self): - index = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009') - series = Series(1, index=index, name='foo') - - exp_index = date_range('1/1/2001', end='12/31/2009', freq='A-DEC') - result = series.to_timestamp(how='end') - tm.assert_index_equal(result.index, exp_index) - self.assertEqual(result.name, 'foo') - - exp_index = date_range('1/1/2001', end='1/1/2009', freq='AS-JAN') - result = series.to_timestamp(how='start') - tm.assert_index_equal(result.index, exp_index) - - def _get_with_delta(delta, freq='A-DEC'): - return date_range(to_datetime('1/1/2001') + delta, - to_datetime('12/31/2009') + delta, freq=freq) - - delta = timedelta(hours=23) - result = series.to_timestamp('H', 'end') - exp_index = _get_with_delta(delta) - tm.assert_index_equal(result.index, exp_index) - - delta = timedelta(hours=23, minutes=59) - result = series.to_timestamp('T', 'end') - exp_index = _get_with_delta(delta) - tm.assert_index_equal(result.index, exp_index) - - result = series.to_timestamp('S', 'end') - delta = timedelta(hours=23, minutes=59, seconds=59) - exp_index = _get_with_delta(delta) - tm.assert_index_equal(result.index, exp_index) - - index = PeriodIndex(freq='H', start='1/1/2001', end='1/2/2001') - series = Series(1, index=index, name='foo') - - exp_index = date_range('1/1/2001 00:59:59', end='1/2/2001 00:59:59', - freq='H') - result = series.to_timestamp(how='end') - tm.assert_index_equal(result.index, exp_index) - self.assertEqual(result.name, 'foo') - - def test_to_timestamp_quarterly_bug(self): - years = np.arange(1960, 2000).repeat(4) - quarters = np.tile(lrange(1, 5), 40) - - pindex = PeriodIndex(year=years, quarter=quarters) - - stamps = pindex.to_timestamp('D', 'end') - expected = DatetimeIndex([x.to_timestamp('D', 'end') for x in pindex]) - tm.assert_index_equal(stamps, expected) - - def test_to_timestamp_preserve_name(self): - index = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009', - name='foo') - self.assertEqual(index.name, 'foo') - - conv = index.to_timestamp('D') - self.assertEqual(conv.name, 'foo') - - def test_to_timestamp_repr_is_code(self): - zs = [Timestamp('99-04-17 00:00:00', tz='UTC'), - Timestamp('2001-04-17 00:00:00', tz='UTC'), - Timestamp('2001-04-17 00:00:00', tz='America/Los_Angeles'), - Timestamp('2001-04-17 00:00:00', tz=None)] - for z in zs: - self.assertEqual(eval(repr(z)), z) - - def test_to_timestamp_pi_nat(self): - # GH 7228 - index = PeriodIndex(['NaT', '2011-01', '2011-02'], freq='M', - name='idx') - - result = index.to_timestamp('D') - expected = DatetimeIndex([pd.NaT, datetime(2011, 1, 1), - datetime(2011, 2, 1)], name='idx') - tm.assert_index_equal(result, expected) - self.assertEqual(result.name, 'idx') - - result2 = result.to_period(freq='M') - tm.assert_index_equal(result2, index) - self.assertEqual(result2.name, 'idx') - - result3 = result.to_period(freq='3M') - exp = PeriodIndex(['NaT', '2011-01', '2011-02'], freq='3M', name='idx') - self.assert_index_equal(result3, exp) - self.assertEqual(result3.freqstr, '3M') - - msg = ('Frequency must be positive, because it' - ' represents span: -2A') - with tm.assertRaisesRegexp(ValueError, msg): - result.to_period(freq='-2A') - - def test_to_timestamp_pi_mult(self): - idx = PeriodIndex(['2011-01', 'NaT', '2011-02'], freq='2M', name='idx') - result = idx.to_timestamp() - expected = DatetimeIndex( - ['2011-01-01', 'NaT', '2011-02-01'], name='idx') - self.assert_index_equal(result, expected) - result = idx.to_timestamp(how='E') - expected = DatetimeIndex( - ['2011-02-28', 'NaT', '2011-03-31'], name='idx') - self.assert_index_equal(result, expected) - - def test_to_timestamp_pi_combined(self): - idx = PeriodIndex(start='2011', periods=2, freq='1D1H', name='idx') - result = idx.to_timestamp() - expected = DatetimeIndex( - ['2011-01-01 00:00', '2011-01-02 01:00'], name='idx') - self.assert_index_equal(result, expected) - result = idx.to_timestamp(how='E') - expected = DatetimeIndex( - ['2011-01-02 00:59:59', '2011-01-03 01:59:59'], name='idx') - self.assert_index_equal(result, expected) - result = idx.to_timestamp(how='E', freq='H') - expected = DatetimeIndex( - ['2011-01-02 00:00', '2011-01-03 01:00'], name='idx') - self.assert_index_equal(result, expected) - - def test_to_timestamp_to_period_astype(self): - idx = DatetimeIndex([pd.NaT, '2011-01-01', '2011-02-01'], name='idx') - - res = idx.astype('period[M]') - exp = PeriodIndex(['NaT', '2011-01', '2011-02'], freq='M', name='idx') - tm.assert_index_equal(res, exp) - - res = idx.astype('period[3M]') - exp = PeriodIndex(['NaT', '2011-01', '2011-02'], freq='3M', name='idx') - self.assert_index_equal(res, exp) - - def test_start_time(self): - index = PeriodIndex(freq='M', start='2016-01-01', end='2016-05-31') - expected_index = date_range('2016-01-01', end='2016-05-31', freq='MS') - tm.assert_index_equal(index.start_time, expected_index) - - def test_end_time(self): - index = PeriodIndex(freq='M', start='2016-01-01', end='2016-05-31') - expected_index = date_range('2016-01-01', end='2016-05-31', freq='M') - tm.assert_index_equal(index.end_time, expected_index) - - def test_as_frame_columns(self): - rng = period_range('1/1/2000', periods=5) - df = DataFrame(randn(10, 5), columns=rng) - - ts = df[rng[0]] - tm.assert_series_equal(ts, df.iloc[:, 0]) - - # GH # 1211 - repr(df) - - ts = df['1/1/2000'] - tm.assert_series_equal(ts, df.iloc[:, 0]) - - def test_indexing(self): - - # GH 4390, iat incorrectly indexing - index = period_range('1/1/2001', periods=10) - s = Series(randn(10), index=index) - expected = s[index[0]] - result = s.iat[0] - self.assertEqual(expected, result) - - def test_frame_setitem(self): - rng = period_range('1/1/2000', periods=5, name='index') - df = DataFrame(randn(5, 3), index=rng) - - df['Index'] = rng - rs = Index(df['Index']) - tm.assert_index_equal(rs, rng, check_names=False) - self.assertEqual(rs.name, 'Index') - self.assertEqual(rng.name, 'index') - - rs = df.reset_index().set_index('index') - tm.assertIsInstance(rs.index, PeriodIndex) - tm.assert_index_equal(rs.index, rng) - - def test_period_set_index_reindex(self): - # GH 6631 - df = DataFrame(np.random.random(6)) - idx1 = period_range('2011/01/01', periods=6, freq='M') - idx2 = period_range('2013', periods=6, freq='A') - - df = df.set_index(idx1) - tm.assert_index_equal(df.index, idx1) - df = df.set_index(idx2) - tm.assert_index_equal(df.index, idx2) - - def test_frame_to_time_stamp(self): - K = 5 - index = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009') - df = DataFrame(randn(len(index), K), index=index) - df['mix'] = 'a' - - exp_index = date_range('1/1/2001', end='12/31/2009', freq='A-DEC') - result = df.to_timestamp('D', 'end') - tm.assert_index_equal(result.index, exp_index) - tm.assert_numpy_array_equal(result.values, df.values) - - exp_index = date_range('1/1/2001', end='1/1/2009', freq='AS-JAN') - result = df.to_timestamp('D', 'start') - tm.assert_index_equal(result.index, exp_index) - - def _get_with_delta(delta, freq='A-DEC'): - return date_range(to_datetime('1/1/2001') + delta, - to_datetime('12/31/2009') + delta, freq=freq) - - delta = timedelta(hours=23) - result = df.to_timestamp('H', 'end') - exp_index = _get_with_delta(delta) - tm.assert_index_equal(result.index, exp_index) - - delta = timedelta(hours=23, minutes=59) - result = df.to_timestamp('T', 'end') - exp_index = _get_with_delta(delta) - tm.assert_index_equal(result.index, exp_index) - - result = df.to_timestamp('S', 'end') - delta = timedelta(hours=23, minutes=59, seconds=59) - exp_index = _get_with_delta(delta) - tm.assert_index_equal(result.index, exp_index) - - # columns - df = df.T - - exp_index = date_range('1/1/2001', end='12/31/2009', freq='A-DEC') - result = df.to_timestamp('D', 'end', axis=1) - tm.assert_index_equal(result.columns, exp_index) - tm.assert_numpy_array_equal(result.values, df.values) - - exp_index = date_range('1/1/2001', end='1/1/2009', freq='AS-JAN') - result = df.to_timestamp('D', 'start', axis=1) - tm.assert_index_equal(result.columns, exp_index) - - delta = timedelta(hours=23) - result = df.to_timestamp('H', 'end', axis=1) - exp_index = _get_with_delta(delta) - tm.assert_index_equal(result.columns, exp_index) - - delta = timedelta(hours=23, minutes=59) - result = df.to_timestamp('T', 'end', axis=1) - exp_index = _get_with_delta(delta) - tm.assert_index_equal(result.columns, exp_index) - - result = df.to_timestamp('S', 'end', axis=1) - delta = timedelta(hours=23, minutes=59, seconds=59) - exp_index = _get_with_delta(delta) - tm.assert_index_equal(result.columns, exp_index) - - # invalid axis - tm.assertRaisesRegexp(ValueError, 'axis', df.to_timestamp, axis=2) - - result1 = df.to_timestamp('5t', axis=1) - result2 = df.to_timestamp('t', axis=1) - expected = pd.date_range('2001-01-01', '2009-01-01', freq='AS') - self.assertTrue(isinstance(result1.columns, DatetimeIndex)) - self.assertTrue(isinstance(result2.columns, DatetimeIndex)) - self.assert_numpy_array_equal(result1.columns.asi8, expected.asi8) - self.assert_numpy_array_equal(result2.columns.asi8, expected.asi8) - # PeriodIndex.to_timestamp always use 'infer' - self.assertEqual(result1.columns.freqstr, 'AS-JAN') - self.assertEqual(result2.columns.freqstr, 'AS-JAN') - - def test_index_duplicate_periods(self): - # monotonic - idx = PeriodIndex([2000, 2007, 2007, 2009, 2009], freq='A-JUN') - ts = Series(np.random.randn(len(idx)), index=idx) - - result = ts[2007] - expected = ts[1:3] - tm.assert_series_equal(result, expected) - result[:] = 1 - self.assertTrue((ts[1:3] == 1).all()) - - # not monotonic - idx = PeriodIndex([2000, 2007, 2007, 2009, 2007], freq='A-JUN') - ts = Series(np.random.randn(len(idx)), index=idx) - - result = ts[2007] - expected = ts[idx == 2007] - tm.assert_series_equal(result, expected) - - def test_index_unique(self): - idx = PeriodIndex([2000, 2007, 2007, 2009, 2009], freq='A-JUN') - expected = PeriodIndex([2000, 2007, 2009], freq='A-JUN') - self.assert_index_equal(idx.unique(), expected) - self.assertEqual(idx.nunique(), 3) - - idx = PeriodIndex([2000, 2007, 2007, 2009, 2007], freq='A-JUN', - tz='US/Eastern') - expected = PeriodIndex([2000, 2007, 2009], freq='A-JUN', - tz='US/Eastern') - self.assert_index_equal(idx.unique(), expected) - self.assertEqual(idx.nunique(), 3) - - def test_constructor(self): - pi = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009') - self.assertEqual(len(pi), 9) - - pi = PeriodIndex(freq='Q', start='1/1/2001', end='12/1/2009') - self.assertEqual(len(pi), 4 * 9) - - pi = PeriodIndex(freq='M', start='1/1/2001', end='12/1/2009') - self.assertEqual(len(pi), 12 * 9) - - pi = PeriodIndex(freq='D', start='1/1/2001', end='12/31/2009') - self.assertEqual(len(pi), 365 * 9 + 2) - - pi = PeriodIndex(freq='B', start='1/1/2001', end='12/31/2009') - self.assertEqual(len(pi), 261 * 9) - - pi = PeriodIndex(freq='H', start='1/1/2001', end='12/31/2001 23:00') - self.assertEqual(len(pi), 365 * 24) - - pi = PeriodIndex(freq='Min', start='1/1/2001', end='1/1/2001 23:59') - self.assertEqual(len(pi), 24 * 60) - - pi = PeriodIndex(freq='S', start='1/1/2001', end='1/1/2001 23:59:59') - self.assertEqual(len(pi), 24 * 60 * 60) - - start = Period('02-Apr-2005', 'B') - i1 = PeriodIndex(start=start, periods=20) - self.assertEqual(len(i1), 20) - self.assertEqual(i1.freq, start.freq) - self.assertEqual(i1[0], start) - - end_intv = Period('2006-12-31', 'W') - i1 = PeriodIndex(end=end_intv, periods=10) - self.assertEqual(len(i1), 10) - self.assertEqual(i1.freq, end_intv.freq) - self.assertEqual(i1[-1], end_intv) - - end_intv = Period('2006-12-31', '1w') - i2 = PeriodIndex(end=end_intv, periods=10) - self.assertEqual(len(i1), len(i2)) - self.assertTrue((i1 == i2).all()) - self.assertEqual(i1.freq, i2.freq) - - end_intv = Period('2006-12-31', ('w', 1)) - i2 = PeriodIndex(end=end_intv, periods=10) - self.assertEqual(len(i1), len(i2)) - self.assertTrue((i1 == i2).all()) - self.assertEqual(i1.freq, i2.freq) - - try: - PeriodIndex(start=start, end=end_intv) - raise AssertionError('Cannot allow mixed freq for start and end') - except ValueError: - pass - - end_intv = Period('2005-05-01', 'B') - i1 = PeriodIndex(start=start, end=end_intv) - - try: - PeriodIndex(start=start) - raise AssertionError( - 'Must specify periods if missing start or end') - except ValueError: - pass - - # infer freq from first element - i2 = PeriodIndex([end_intv, Period('2005-05-05', 'B')]) - self.assertEqual(len(i2), 2) - self.assertEqual(i2[0], end_intv) - - i2 = PeriodIndex(np.array([end_intv, Period('2005-05-05', 'B')])) - self.assertEqual(len(i2), 2) - self.assertEqual(i2[0], end_intv) - - # Mixed freq should fail - vals = [end_intv, Period('2006-12-31', 'w')] - self.assertRaises(ValueError, PeriodIndex, vals) - vals = np.array(vals) - self.assertRaises(ValueError, PeriodIndex, vals) - - def test_numpy_repeat(self): - index = period_range('20010101', periods=2) - expected = PeriodIndex([Period('2001-01-01'), Period('2001-01-01'), - Period('2001-01-02'), Period('2001-01-02')]) - - tm.assert_index_equal(np.repeat(index, 2), expected) - - msg = "the 'axis' parameter is not supported" - tm.assertRaisesRegexp(ValueError, msg, np.repeat, index, 2, axis=1) - - def test_shift(self): - pi1 = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009') - pi2 = PeriodIndex(freq='A', start='1/1/2002', end='12/1/2010') - - tm.assert_index_equal(pi1.shift(0), pi1) - - self.assertEqual(len(pi1), len(pi2)) - self.assert_index_equal(pi1.shift(1), pi2) - - pi1 = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009') - pi2 = PeriodIndex(freq='A', start='1/1/2000', end='12/1/2008') - self.assertEqual(len(pi1), len(pi2)) - self.assert_index_equal(pi1.shift(-1), pi2) - - pi1 = PeriodIndex(freq='M', start='1/1/2001', end='12/1/2009') - pi2 = PeriodIndex(freq='M', start='2/1/2001', end='1/1/2010') - self.assertEqual(len(pi1), len(pi2)) - self.assert_index_equal(pi1.shift(1), pi2) - - pi1 = PeriodIndex(freq='M', start='1/1/2001', end='12/1/2009') - pi2 = PeriodIndex(freq='M', start='12/1/2000', end='11/1/2009') - self.assertEqual(len(pi1), len(pi2)) - self.assert_index_equal(pi1.shift(-1), pi2) - - pi1 = PeriodIndex(freq='D', start='1/1/2001', end='12/1/2009') - pi2 = PeriodIndex(freq='D', start='1/2/2001', end='12/2/2009') - self.assertEqual(len(pi1), len(pi2)) - self.assert_index_equal(pi1.shift(1), pi2) - - pi1 = PeriodIndex(freq='D', start='1/1/2001', end='12/1/2009') - pi2 = PeriodIndex(freq='D', start='12/31/2000', end='11/30/2009') - self.assertEqual(len(pi1), len(pi2)) - self.assert_index_equal(pi1.shift(-1), pi2) - - def test_shift_nat(self): - idx = PeriodIndex(['2011-01', '2011-02', 'NaT', - '2011-04'], freq='M', name='idx') - result = idx.shift(1) - expected = PeriodIndex(['2011-02', '2011-03', 'NaT', - '2011-05'], freq='M', name='idx') - tm.assert_index_equal(result, expected) - self.assertEqual(result.name, expected.name) - - def test_shift_ndarray(self): - idx = PeriodIndex(['2011-01', '2011-02', 'NaT', - '2011-04'], freq='M', name='idx') - result = idx.shift(np.array([1, 2, 3, 4])) - expected = PeriodIndex(['2011-02', '2011-04', 'NaT', - '2011-08'], freq='M', name='idx') - tm.assert_index_equal(result, expected) - - idx = PeriodIndex(['2011-01', '2011-02', 'NaT', - '2011-04'], freq='M', name='idx') - result = idx.shift(np.array([1, -2, 3, -4])) - expected = PeriodIndex(['2011-02', '2010-12', 'NaT', - '2010-12'], freq='M', name='idx') - tm.assert_index_equal(result, expected) - - def test_asfreq(self): - pi1 = PeriodIndex(freq='A', start='1/1/2001', end='1/1/2001') - pi2 = PeriodIndex(freq='Q', start='1/1/2001', end='1/1/2001') - pi3 = PeriodIndex(freq='M', start='1/1/2001', end='1/1/2001') - pi4 = PeriodIndex(freq='D', start='1/1/2001', end='1/1/2001') - pi5 = PeriodIndex(freq='H', start='1/1/2001', end='1/1/2001 00:00') - pi6 = PeriodIndex(freq='Min', start='1/1/2001', end='1/1/2001 00:00') - pi7 = PeriodIndex(freq='S', start='1/1/2001', end='1/1/2001 00:00:00') - - self.assertEqual(pi1.asfreq('Q', 'S'), pi2) - self.assertEqual(pi1.asfreq('Q', 's'), pi2) - self.assertEqual(pi1.asfreq('M', 'start'), pi3) - self.assertEqual(pi1.asfreq('D', 'StarT'), pi4) - self.assertEqual(pi1.asfreq('H', 'beGIN'), pi5) - self.assertEqual(pi1.asfreq('Min', 'S'), pi6) - self.assertEqual(pi1.asfreq('S', 'S'), pi7) - - self.assertEqual(pi2.asfreq('A', 'S'), pi1) - self.assertEqual(pi2.asfreq('M', 'S'), pi3) - self.assertEqual(pi2.asfreq('D', 'S'), pi4) - self.assertEqual(pi2.asfreq('H', 'S'), pi5) - self.assertEqual(pi2.asfreq('Min', 'S'), pi6) - self.assertEqual(pi2.asfreq('S', 'S'), pi7) - - self.assertEqual(pi3.asfreq('A', 'S'), pi1) - self.assertEqual(pi3.asfreq('Q', 'S'), pi2) - self.assertEqual(pi3.asfreq('D', 'S'), pi4) - self.assertEqual(pi3.asfreq('H', 'S'), pi5) - self.assertEqual(pi3.asfreq('Min', 'S'), pi6) - self.assertEqual(pi3.asfreq('S', 'S'), pi7) - - self.assertEqual(pi4.asfreq('A', 'S'), pi1) - self.assertEqual(pi4.asfreq('Q', 'S'), pi2) - self.assertEqual(pi4.asfreq('M', 'S'), pi3) - self.assertEqual(pi4.asfreq('H', 'S'), pi5) - self.assertEqual(pi4.asfreq('Min', 'S'), pi6) - self.assertEqual(pi4.asfreq('S', 'S'), pi7) - - self.assertEqual(pi5.asfreq('A', 'S'), pi1) - self.assertEqual(pi5.asfreq('Q', 'S'), pi2) - self.assertEqual(pi5.asfreq('M', 'S'), pi3) - self.assertEqual(pi5.asfreq('D', 'S'), pi4) - self.assertEqual(pi5.asfreq('Min', 'S'), pi6) - self.assertEqual(pi5.asfreq('S', 'S'), pi7) - - self.assertEqual(pi6.asfreq('A', 'S'), pi1) - self.assertEqual(pi6.asfreq('Q', 'S'), pi2) - self.assertEqual(pi6.asfreq('M', 'S'), pi3) - self.assertEqual(pi6.asfreq('D', 'S'), pi4) - self.assertEqual(pi6.asfreq('H', 'S'), pi5) - self.assertEqual(pi6.asfreq('S', 'S'), pi7) - - self.assertEqual(pi7.asfreq('A', 'S'), pi1) - self.assertEqual(pi7.asfreq('Q', 'S'), pi2) - self.assertEqual(pi7.asfreq('M', 'S'), pi3) - self.assertEqual(pi7.asfreq('D', 'S'), pi4) - self.assertEqual(pi7.asfreq('H', 'S'), pi5) - self.assertEqual(pi7.asfreq('Min', 'S'), pi6) - - self.assertRaises(ValueError, pi7.asfreq, 'T', 'foo') - result1 = pi1.asfreq('3M') - result2 = pi1.asfreq('M') - expected = PeriodIndex(freq='M', start='2001-12', end='2001-12') - self.assert_numpy_array_equal(result1.asi8, expected.asi8) - self.assertEqual(result1.freqstr, '3M') - self.assert_numpy_array_equal(result2.asi8, expected.asi8) - self.assertEqual(result2.freqstr, 'M') - - def test_asfreq_nat(self): - idx = PeriodIndex(['2011-01', '2011-02', 'NaT', '2011-04'], freq='M') - result = idx.asfreq(freq='Q') - expected = PeriodIndex(['2011Q1', '2011Q1', 'NaT', '2011Q2'], freq='Q') - tm.assert_index_equal(result, expected) - - def test_asfreq_mult_pi(self): - pi = PeriodIndex(['2001-01', '2001-02', 'NaT', '2001-03'], freq='2M') - - for freq in ['D', '3D']: - result = pi.asfreq(freq) - exp = PeriodIndex(['2001-02-28', '2001-03-31', 'NaT', - '2001-04-30'], freq=freq) - self.assert_index_equal(result, exp) - self.assertEqual(result.freq, exp.freq) - - result = pi.asfreq(freq, how='S') - exp = PeriodIndex(['2001-01-01', '2001-02-01', 'NaT', - '2001-03-01'], freq=freq) - self.assert_index_equal(result, exp) - self.assertEqual(result.freq, exp.freq) - - def test_asfreq_combined_pi(self): - pi = pd.PeriodIndex(['2001-01-01 00:00', '2001-01-02 02:00', 'NaT'], - freq='H') - exp = PeriodIndex(['2001-01-01 00:00', '2001-01-02 02:00', 'NaT'], - freq='25H') - for freq, how in zip(['1D1H', '1H1D'], ['S', 'E']): - result = pi.asfreq(freq, how=how) - self.assert_index_equal(result, exp) - self.assertEqual(result.freq, exp.freq) - - for freq in ['1D1H', '1H1D']: - pi = pd.PeriodIndex(['2001-01-01 00:00', '2001-01-02 02:00', - 'NaT'], freq=freq) - result = pi.asfreq('H') - exp = PeriodIndex(['2001-01-02 00:00', '2001-01-03 02:00', 'NaT'], - freq='H') - self.assert_index_equal(result, exp) - self.assertEqual(result.freq, exp.freq) - - pi = pd.PeriodIndex(['2001-01-01 00:00', '2001-01-02 02:00', - 'NaT'], freq=freq) - result = pi.asfreq('H', how='S') - exp = PeriodIndex(['2001-01-01 00:00', '2001-01-02 02:00', 'NaT'], - freq='H') - self.assert_index_equal(result, exp) - self.assertEqual(result.freq, exp.freq) - - def test_period_index_length(self): - pi = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009') - self.assertEqual(len(pi), 9) - - pi = PeriodIndex(freq='Q', start='1/1/2001', end='12/1/2009') - self.assertEqual(len(pi), 4 * 9) - - pi = PeriodIndex(freq='M', start='1/1/2001', end='12/1/2009') - self.assertEqual(len(pi), 12 * 9) - - start = Period('02-Apr-2005', 'B') - i1 = PeriodIndex(start=start, periods=20) - self.assertEqual(len(i1), 20) - self.assertEqual(i1.freq, start.freq) - self.assertEqual(i1[0], start) - - end_intv = Period('2006-12-31', 'W') - i1 = PeriodIndex(end=end_intv, periods=10) - self.assertEqual(len(i1), 10) - self.assertEqual(i1.freq, end_intv.freq) - self.assertEqual(i1[-1], end_intv) - - end_intv = Period('2006-12-31', '1w') - i2 = PeriodIndex(end=end_intv, periods=10) - self.assertEqual(len(i1), len(i2)) - self.assertTrue((i1 == i2).all()) - self.assertEqual(i1.freq, i2.freq) - - end_intv = Period('2006-12-31', ('w', 1)) - i2 = PeriodIndex(end=end_intv, periods=10) - self.assertEqual(len(i1), len(i2)) - self.assertTrue((i1 == i2).all()) - self.assertEqual(i1.freq, i2.freq) - - try: - PeriodIndex(start=start, end=end_intv) - raise AssertionError('Cannot allow mixed freq for start and end') - except ValueError: - pass - - end_intv = Period('2005-05-01', 'B') - i1 = PeriodIndex(start=start, end=end_intv) - - try: - PeriodIndex(start=start) - raise AssertionError( - 'Must specify periods if missing start or end') - except ValueError: - pass - - # infer freq from first element - i2 = PeriodIndex([end_intv, Period('2005-05-05', 'B')]) - self.assertEqual(len(i2), 2) - self.assertEqual(i2[0], end_intv) - - i2 = PeriodIndex(np.array([end_intv, Period('2005-05-05', 'B')])) - self.assertEqual(len(i2), 2) - self.assertEqual(i2[0], end_intv) - - # Mixed freq should fail - vals = [end_intv, Period('2006-12-31', 'w')] - self.assertRaises(ValueError, PeriodIndex, vals) - vals = np.array(vals) - self.assertRaises(ValueError, PeriodIndex, vals) - - def test_frame_index_to_string(self): - index = PeriodIndex(['2011-1', '2011-2', '2011-3'], freq='M') - frame = DataFrame(np.random.randn(3, 4), index=index) - - # it works! - frame.to_string() - - def test_asfreq_ts(self): - index = PeriodIndex(freq='A', start='1/1/2001', end='12/31/2010') - ts = Series(np.random.randn(len(index)), index=index) - df = DataFrame(np.random.randn(len(index), 3), index=index) - - result = ts.asfreq('D', how='end') - df_result = df.asfreq('D', how='end') - exp_index = index.asfreq('D', how='end') - self.assertEqual(len(result), len(ts)) - tm.assert_index_equal(result.index, exp_index) - tm.assert_index_equal(df_result.index, exp_index) - - result = ts.asfreq('D', how='start') - self.assertEqual(len(result), len(ts)) - tm.assert_index_equal(result.index, index.asfreq('D', how='start')) - - def test_badinput(self): - self.assertRaises(ValueError, Period, '-2000', 'A') - self.assertRaises(tslib.DateParseError, Period, '0', 'A') - self.assertRaises(tslib.DateParseError, Period, '1/1/-2000', 'A') - - def test_negative_ordinals(self): - Period(ordinal=-1000, freq='A') - Period(ordinal=0, freq='A') - - idx1 = PeriodIndex(ordinal=[-1, 0, 1], freq='A') - idx2 = PeriodIndex(ordinal=np.array([-1, 0, 1]), freq='A') - tm.assert_index_equal(idx1, idx2) - - def test_dti_to_period(self): - dti = DatetimeIndex(start='1/1/2005', end='12/1/2005', freq='M') - pi1 = dti.to_period() - pi2 = dti.to_period(freq='D') - pi3 = dti.to_period(freq='3D') - - self.assertEqual(pi1[0], Period('Jan 2005', freq='M')) - self.assertEqual(pi2[0], Period('1/31/2005', freq='D')) - self.assertEqual(pi3[0], Period('1/31/2005', freq='3D')) - - self.assertEqual(pi1[-1], Period('Nov 2005', freq='M')) - self.assertEqual(pi2[-1], Period('11/30/2005', freq='D')) - self.assertEqual(pi3[-1], Period('11/30/2005', freq='3D')) - - tm.assert_index_equal(pi1, period_range('1/1/2005', '11/1/2005', - freq='M')) - tm.assert_index_equal(pi2, period_range('1/1/2005', '11/1/2005', - freq='M').asfreq('D')) - tm.assert_index_equal(pi3, period_range('1/1/2005', '11/1/2005', - freq='M').asfreq('3D')) - - def test_pindex_slice_index(self): - pi = PeriodIndex(start='1/1/10', end='12/31/12', freq='M') - s = Series(np.random.rand(len(pi)), index=pi) - res = s['2010'] - exp = s[0:12] - tm.assert_series_equal(res, exp) - res = s['2011'] - exp = s[12:24] - tm.assert_series_equal(res, exp) - - def test_getitem_day(self): - # GH 6716 - # Confirm DatetimeIndex and PeriodIndex works identically - didx = DatetimeIndex(start='2013/01/01', freq='D', periods=400) - pidx = PeriodIndex(start='2013/01/01', freq='D', periods=400) - - for idx in [didx, pidx]: - # getitem against index should raise ValueError - values = ['2014', '2013/02', '2013/01/02', '2013/02/01 9H', - '2013/02/01 09:00'] - for v in values: - - if _np_version_under1p9: - with tm.assertRaises(ValueError): - idx[v] - else: - # GH7116 - # these show deprecations as we are trying - # to slice with non-integer indexers - # with tm.assertRaises(IndexError): - # idx[v] - continue - - s = Series(np.random.rand(len(idx)), index=idx) - tm.assert_series_equal(s['2013/01'], s[0:31]) - tm.assert_series_equal(s['2013/02'], s[31:59]) - tm.assert_series_equal(s['2014'], s[365:]) - - invalid = ['2013/02/01 9H', '2013/02/01 09:00'] - for v in invalid: - with tm.assertRaises(KeyError): - s[v] - - def test_range_slice_day(self): - # GH 6716 - didx = DatetimeIndex(start='2013/01/01', freq='D', periods=400) - pidx = PeriodIndex(start='2013/01/01', freq='D', periods=400) - - # changed to TypeError in 1.12 - # https://github.com/numpy/numpy/pull/6271 - exc = IndexError if _np_version_under1p12 else TypeError - - for idx in [didx, pidx]: - # slices against index should raise IndexError - values = ['2014', '2013/02', '2013/01/02', '2013/02/01 9H', - '2013/02/01 09:00'] - for v in values: - with tm.assertRaises(exc): - idx[v:] - - s = Series(np.random.rand(len(idx)), index=idx) - - tm.assert_series_equal(s['2013/01/02':], s[1:]) - tm.assert_series_equal(s['2013/01/02':'2013/01/05'], s[1:5]) - tm.assert_series_equal(s['2013/02':], s[31:]) - tm.assert_series_equal(s['2014':], s[365:]) - - invalid = ['2013/02/01 9H', '2013/02/01 09:00'] - for v in invalid: - with tm.assertRaises(exc): - idx[v:] - - def test_getitem_seconds(self): - # GH 6716 - didx = DatetimeIndex(start='2013/01/01 09:00:00', freq='S', - periods=4000) - pidx = PeriodIndex(start='2013/01/01 09:00:00', freq='S', periods=4000) - - for idx in [didx, pidx]: - # getitem against index should raise ValueError - values = ['2014', '2013/02', '2013/01/02', '2013/02/01 9H', - '2013/02/01 09:00'] - for v in values: - if _np_version_under1p9: - with tm.assertRaises(ValueError): - idx[v] - else: - # GH7116 - # these show deprecations as we are trying - # to slice with non-integer indexers - # with tm.assertRaises(IndexError): - # idx[v] - continue - - s = Series(np.random.rand(len(idx)), index=idx) - tm.assert_series_equal(s['2013/01/01 10:00'], s[3600:3660]) - tm.assert_series_equal(s['2013/01/01 9H'], s[:3600]) - for d in ['2013/01/01', '2013/01', '2013']: - tm.assert_series_equal(s[d], s) - - def test_range_slice_seconds(self): - # GH 6716 - didx = DatetimeIndex(start='2013/01/01 09:00:00', freq='S', - periods=4000) - pidx = PeriodIndex(start='2013/01/01 09:00:00', freq='S', periods=4000) - - # changed to TypeError in 1.12 - # https://github.com/numpy/numpy/pull/6271 - exc = IndexError if _np_version_under1p12 else TypeError - - for idx in [didx, pidx]: - # slices against index should raise IndexError - values = ['2014', '2013/02', '2013/01/02', '2013/02/01 9H', - '2013/02/01 09:00'] - for v in values: - with tm.assertRaises(exc): - idx[v:] - - s = Series(np.random.rand(len(idx)), index=idx) - - tm.assert_series_equal(s['2013/01/01 09:05':'2013/01/01 09:10'], - s[300:660]) - tm.assert_series_equal(s['2013/01/01 10:00':'2013/01/01 10:05'], - s[3600:3960]) - tm.assert_series_equal(s['2013/01/01 10H':], s[3600:]) - tm.assert_series_equal(s[:'2013/01/01 09:30'], s[:1860]) - for d in ['2013/01/01', '2013/01', '2013']: - tm.assert_series_equal(s[d:], s) - - def test_range_slice_outofbounds(self): - # GH 5407 - didx = DatetimeIndex(start='2013/10/01', freq='D', periods=10) - pidx = PeriodIndex(start='2013/10/01', freq='D', periods=10) - - for idx in [didx, pidx]: - df = DataFrame(dict(units=[100 + i for i in range(10)]), index=idx) - empty = DataFrame(index=idx.__class__([], freq='D'), - columns=['units']) - empty['units'] = empty['units'].astype('int64') - - tm.assert_frame_equal(df['2013/09/01':'2013/09/30'], empty) - tm.assert_frame_equal(df['2013/09/30':'2013/10/02'], df.iloc[:2]) - tm.assert_frame_equal(df['2013/10/01':'2013/10/02'], df.iloc[:2]) - tm.assert_frame_equal(df['2013/10/02':'2013/09/30'], empty) - tm.assert_frame_equal(df['2013/10/15':'2013/10/17'], empty) - tm.assert_frame_equal(df['2013-06':'2013-09'], empty) - tm.assert_frame_equal(df['2013-11':'2013-12'], empty) - - def test_astype_asfreq(self): - pi1 = PeriodIndex(['2011-01-01', '2011-02-01', '2011-03-01'], freq='D') - exp = PeriodIndex(['2011-01', '2011-02', '2011-03'], freq='M') - tm.assert_index_equal(pi1.asfreq('M'), exp) - tm.assert_index_equal(pi1.astype('period[M]'), exp) - - exp = PeriodIndex(['2011-01', '2011-02', '2011-03'], freq='3M') - tm.assert_index_equal(pi1.asfreq('3M'), exp) - tm.assert_index_equal(pi1.astype('period[3M]'), exp) - - def test_pindex_fieldaccessor_nat(self): - idx = PeriodIndex(['2011-01', '2011-02', 'NaT', - '2012-03', '2012-04'], freq='D') - - exp = np.array([2011, 2011, -1, 2012, 2012], dtype=np.int64) - self.assert_numpy_array_equal(idx.year, exp) - exp = np.array([1, 2, -1, 3, 4], dtype=np.int64) - self.assert_numpy_array_equal(idx.month, exp) - - def test_pindex_qaccess(self): - pi = PeriodIndex(['2Q05', '3Q05', '4Q05', '1Q06', '2Q06'], freq='Q') - s = Series(np.random.rand(len(pi)), index=pi).cumsum() - # Todo: fix these accessors! - self.assertEqual(s['05Q4'], s[2]) - - def test_period_dt64_round_trip(self): - dti = date_range('1/1/2000', '1/7/2002', freq='B') - pi = dti.to_period() - tm.assert_index_equal(pi.to_timestamp(), dti) - - dti = date_range('1/1/2000', '1/7/2002', freq='B') - pi = dti.to_period(freq='H') - tm.assert_index_equal(pi.to_timestamp(), dti) - - def test_period_astype_to_timestamp(self): - pi = pd.PeriodIndex(['2011-01', '2011-02', '2011-03'], freq='M') - - exp = pd.DatetimeIndex(['2011-01-01', '2011-02-01', '2011-03-01']) - tm.assert_index_equal(pi.astype('datetime64[ns]'), exp) - - exp = pd.DatetimeIndex(['2011-01-31', '2011-02-28', '2011-03-31']) - tm.assert_index_equal(pi.astype('datetime64[ns]', how='end'), exp) - - exp = pd.DatetimeIndex(['2011-01-01', '2011-02-01', '2011-03-01'], - tz='US/Eastern') - res = pi.astype('datetime64[ns, US/Eastern]') - tm.assert_index_equal(pi.astype('datetime64[ns, US/Eastern]'), exp) - - exp = pd.DatetimeIndex(['2011-01-31', '2011-02-28', '2011-03-31'], - tz='US/Eastern') - res = pi.astype('datetime64[ns, US/Eastern]', how='end') - tm.assert_index_equal(res, exp) - - def test_to_period_quarterly(self): - # make sure we can make the round trip - for month in MONTHS: - freq = 'Q-%s' % month - rng = period_range('1989Q3', '1991Q3', freq=freq) - stamps = rng.to_timestamp() - result = stamps.to_period(freq) - tm.assert_index_equal(rng, result) - - def test_to_period_quarterlyish(self): - offsets = ['BQ', 'QS', 'BQS'] - for off in offsets: - rng = date_range('01-Jan-2012', periods=8, freq=off) - prng = rng.to_period() - self.assertEqual(prng.freq, 'Q-DEC') - - def test_to_period_annualish(self): - offsets = ['BA', 'AS', 'BAS'] - for off in offsets: - rng = date_range('01-Jan-2012', periods=8, freq=off) - prng = rng.to_period() - self.assertEqual(prng.freq, 'A-DEC') - - def test_to_period_monthish(self): - offsets = ['MS', 'BM'] - for off in offsets: - rng = date_range('01-Jan-2012', periods=8, freq=off) - prng = rng.to_period() - self.assertEqual(prng.freq, 'M') - - rng = date_range('01-Jan-2012', periods=8, freq='M') - prng = rng.to_period() - self.assertEqual(prng.freq, 'M') - - msg = pd.tseries.frequencies._INVALID_FREQ_ERROR - with self.assertRaisesRegexp(ValueError, msg): - date_range('01-Jan-2012', periods=8, freq='EOM') - - def test_multiples(self): - result1 = Period('1989', freq='2A') - result2 = Period('1989', freq='A') - self.assertEqual(result1.ordinal, result2.ordinal) - self.assertEqual(result1.freqstr, '2A-DEC') - self.assertEqual(result2.freqstr, 'A-DEC') - self.assertEqual(result1.freq, offsets.YearEnd(2)) - self.assertEqual(result2.freq, offsets.YearEnd()) - - self.assertEqual((result1 + 1).ordinal, result1.ordinal + 2) - self.assertEqual((1 + result1).ordinal, result1.ordinal + 2) - self.assertEqual((result1 - 1).ordinal, result2.ordinal - 2) - self.assertEqual((-1 + result1).ordinal, result2.ordinal - 2) - - def test_pindex_multiples(self): - pi = PeriodIndex(start='1/1/11', end='12/31/11', freq='2M') - expected = PeriodIndex(['2011-01', '2011-03', '2011-05', '2011-07', - '2011-09', '2011-11'], freq='2M') - tm.assert_index_equal(pi, expected) - self.assertEqual(pi.freq, offsets.MonthEnd(2)) - self.assertEqual(pi.freqstr, '2M') - - pi = period_range(start='1/1/11', end='12/31/11', freq='2M') - tm.assert_index_equal(pi, expected) - self.assertEqual(pi.freq, offsets.MonthEnd(2)) - self.assertEqual(pi.freqstr, '2M') - - pi = period_range(start='1/1/11', periods=6, freq='2M') - tm.assert_index_equal(pi, expected) - self.assertEqual(pi.freq, offsets.MonthEnd(2)) - self.assertEqual(pi.freqstr, '2M') - - def test_iteration(self): - index = PeriodIndex(start='1/1/10', periods=4, freq='B') - - result = list(index) - tm.assertIsInstance(result[0], Period) - self.assertEqual(result[0].freq, index.freq) - - def test_take(self): - index = PeriodIndex(start='1/1/10', end='12/31/12', freq='D', - name='idx') - expected = PeriodIndex([datetime(2010, 1, 6), datetime(2010, 1, 7), - datetime(2010, 1, 9), datetime(2010, 1, 13)], - freq='D', name='idx') - - taken1 = index.take([5, 6, 8, 12]) - taken2 = index[[5, 6, 8, 12]] - - for taken in [taken1, taken2]: - tm.assert_index_equal(taken, expected) - tm.assertIsInstance(taken, PeriodIndex) - self.assertEqual(taken.freq, index.freq) - self.assertEqual(taken.name, expected.name) - - def test_take_fill_value(self): - # GH 12631 - idx = pd.PeriodIndex(['2011-01-01', '2011-02-01', '2011-03-01'], - name='xxx', freq='D') - result = idx.take(np.array([1, 0, -1])) - expected = pd.PeriodIndex(['2011-02-01', '2011-01-01', '2011-03-01'], - name='xxx', freq='D') - tm.assert_index_equal(result, expected) - - # fill_value - result = idx.take(np.array([1, 0, -1]), fill_value=True) - expected = pd.PeriodIndex(['2011-02-01', '2011-01-01', 'NaT'], - name='xxx', freq='D') - tm.assert_index_equal(result, expected) - - # allow_fill=False - result = idx.take(np.array([1, 0, -1]), allow_fill=False, - fill_value=True) - expected = pd.PeriodIndex(['2011-02-01', '2011-01-01', '2011-03-01'], - name='xxx', freq='D') - tm.assert_index_equal(result, expected) - - msg = ('When allow_fill=True and fill_value is not None, ' - 'all indices must be >= -1') - with tm.assertRaisesRegexp(ValueError, msg): - idx.take(np.array([1, 0, -2]), fill_value=True) - with tm.assertRaisesRegexp(ValueError, msg): - idx.take(np.array([1, 0, -5]), fill_value=True) - - with tm.assertRaises(IndexError): - idx.take(np.array([1, -5])) - - def test_joins(self): - index = period_range('1/1/2000', '1/20/2000', freq='D') - - for kind in ['inner', 'outer', 'left', 'right']: - joined = index.join(index[:-5], how=kind) - - tm.assertIsInstance(joined, PeriodIndex) - self.assertEqual(joined.freq, index.freq) - - def test_join_self(self): - index = period_range('1/1/2000', '1/20/2000', freq='D') - - for kind in ['inner', 'outer', 'left', 'right']: - res = index.join(index, how=kind) - self.assertIs(index, res) - - def test_join_does_not_recur(self): - df = tm.makeCustomDataframe( - 3, 2, data_gen_f=lambda *args: np.random.randint(2), - c_idx_type='p', r_idx_type='dt') - s = df.iloc[:2, 0] - - res = s.index.join(df.columns, how='outer') - expected = Index([s.index[0], s.index[1], - df.columns[0], df.columns[1]], object) - tm.assert_index_equal(res, expected) - - def test_align_series(self): - rng = period_range('1/1/2000', '1/1/2010', freq='A') - ts = Series(np.random.randn(len(rng)), index=rng) - - result = ts + ts[::2] - expected = ts + ts - expected[1::2] = np.nan - tm.assert_series_equal(result, expected) - - result = ts + _permute(ts[::2]) - tm.assert_series_equal(result, expected) - - # it works! - for kind in ['inner', 'outer', 'left', 'right']: - ts.align(ts[::2], join=kind) - msg = "Input has different freq=D from PeriodIndex\\(freq=A-DEC\\)" - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): - ts + ts.asfreq('D', how="end") - - def test_align_frame(self): - rng = period_range('1/1/2000', '1/1/2010', freq='A') - ts = DataFrame(np.random.randn(len(rng), 3), index=rng) - - result = ts + ts[::2] - expected = ts + ts - expected.values[1::2] = np.nan - tm.assert_frame_equal(result, expected) - - result = ts + _permute(ts[::2]) - tm.assert_frame_equal(result, expected) - - def test_union(self): - index = period_range('1/1/2000', '1/20/2000', freq='D') - - result = index[:-5].union(index[10:]) - tm.assert_index_equal(result, index) - - # not in order - result = _permute(index[:-5]).union(_permute(index[10:])) - tm.assert_index_equal(result, index) - - # raise if different frequencies - index = period_range('1/1/2000', '1/20/2000', freq='D') - index2 = period_range('1/1/2000', '1/20/2000', freq='W-WED') - with tm.assertRaises(period.IncompatibleFrequency): - index.union(index2) - - msg = 'can only call with other PeriodIndex-ed objects' - with tm.assertRaisesRegexp(ValueError, msg): - index.join(index.to_timestamp()) - - index3 = period_range('1/1/2000', '1/20/2000', freq='2D') - with tm.assertRaises(period.IncompatibleFrequency): - index.join(index3) - - def test_union_dataframe_index(self): - rng1 = pd.period_range('1/1/1999', '1/1/2012', freq='M') - s1 = pd.Series(np.random.randn(len(rng1)), rng1) - - rng2 = pd.period_range('1/1/1980', '12/1/2001', freq='M') - s2 = pd.Series(np.random.randn(len(rng2)), rng2) - df = pd.DataFrame({'s1': s1, 's2': s2}) - - exp = pd.period_range('1/1/1980', '1/1/2012', freq='M') - self.assert_index_equal(df.index, exp) - - def test_intersection(self): - index = period_range('1/1/2000', '1/20/2000', freq='D') - - result = index[:-5].intersection(index[10:]) - tm.assert_index_equal(result, index[10:-5]) - - # not in order - left = _permute(index[:-5]) - right = _permute(index[10:]) - result = left.intersection(right).sort_values() - tm.assert_index_equal(result, index[10:-5]) - - # raise if different frequencies - index = period_range('1/1/2000', '1/20/2000', freq='D') - index2 = period_range('1/1/2000', '1/20/2000', freq='W-WED') - with tm.assertRaises(period.IncompatibleFrequency): - index.intersection(index2) - - index3 = period_range('1/1/2000', '1/20/2000', freq='2D') - with tm.assertRaises(period.IncompatibleFrequency): - index.intersection(index3) - - def test_intersection_cases(self): - base = period_range('6/1/2000', '6/30/2000', freq='D', name='idx') - - # if target has the same name, it is preserved - rng2 = period_range('5/15/2000', '6/20/2000', freq='D', name='idx') - expected2 = period_range('6/1/2000', '6/20/2000', freq='D', - name='idx') - - # if target name is different, it will be reset - rng3 = period_range('5/15/2000', '6/20/2000', freq='D', name='other') - expected3 = period_range('6/1/2000', '6/20/2000', freq='D', - name=None) - - rng4 = period_range('7/1/2000', '7/31/2000', freq='D', name='idx') - expected4 = PeriodIndex([], name='idx', freq='D') - - for (rng, expected) in [(rng2, expected2), (rng3, expected3), - (rng4, expected4)]: - result = base.intersection(rng) - tm.assert_index_equal(result, expected) - self.assertEqual(result.name, expected.name) - self.assertEqual(result.freq, expected.freq) - - # non-monotonic - base = PeriodIndex(['2011-01-05', '2011-01-04', '2011-01-02', - '2011-01-03'], freq='D', name='idx') - - rng2 = PeriodIndex(['2011-01-04', '2011-01-02', - '2011-02-02', '2011-02-03'], - freq='D', name='idx') - expected2 = PeriodIndex(['2011-01-04', '2011-01-02'], freq='D', - name='idx') - - rng3 = PeriodIndex(['2011-01-04', '2011-01-02', '2011-02-02', - '2011-02-03'], - freq='D', name='other') - expected3 = PeriodIndex(['2011-01-04', '2011-01-02'], freq='D', - name=None) - - rng4 = period_range('7/1/2000', '7/31/2000', freq='D', name='idx') - expected4 = PeriodIndex([], freq='D', name='idx') - - for (rng, expected) in [(rng2, expected2), (rng3, expected3), - (rng4, expected4)]: - result = base.intersection(rng) - tm.assert_index_equal(result, expected) - self.assertEqual(result.name, expected.name) - self.assertEqual(result.freq, 'D') - - # empty same freq - rng = date_range('6/1/2000', '6/15/2000', freq='T') - result = rng[0:0].intersection(rng) - self.assertEqual(len(result), 0) - - result = rng.intersection(rng[0:0]) - self.assertEqual(len(result), 0) - - def test_fields(self): - # year, month, day, hour, minute - # second, weekofyear, week, dayofweek, weekday, dayofyear, quarter - # qyear - pi = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2005') - self._check_all_fields(pi) - - pi = PeriodIndex(freq='Q', start='1/1/2001', end='12/1/2002') - self._check_all_fields(pi) - - pi = PeriodIndex(freq='M', start='1/1/2001', end='1/1/2002') - self._check_all_fields(pi) - - pi = PeriodIndex(freq='D', start='12/1/2001', end='6/1/2001') - self._check_all_fields(pi) - - pi = PeriodIndex(freq='B', start='12/1/2001', end='6/1/2001') - self._check_all_fields(pi) - - pi = PeriodIndex(freq='H', start='12/31/2001', end='1/1/2002 23:00') - self._check_all_fields(pi) - - pi = PeriodIndex(freq='Min', start='12/31/2001', end='1/1/2002 00:20') - self._check_all_fields(pi) - - pi = PeriodIndex(freq='S', start='12/31/2001 00:00:00', - end='12/31/2001 00:05:00') - self._check_all_fields(pi) - - end_intv = Period('2006-12-31', 'W') - i1 = PeriodIndex(end=end_intv, periods=10) - self._check_all_fields(i1) - - def _check_all_fields(self, periodindex): - fields = ['year', 'month', 'day', 'hour', 'minute', 'second', - 'weekofyear', 'week', 'dayofweek', 'weekday', 'dayofyear', - 'quarter', 'qyear', 'days_in_month', 'is_leap_year'] - - periods = list(periodindex) - s = pd.Series(periodindex) - - for field in fields: - field_idx = getattr(periodindex, field) - self.assertEqual(len(periodindex), len(field_idx)) - for x, val in zip(periods, field_idx): - self.assertEqual(getattr(x, field), val) - - if len(s) == 0: - continue - - field_s = getattr(s.dt, field) - self.assertEqual(len(periodindex), len(field_s)) - for x, val in zip(periods, field_s): - self.assertEqual(getattr(x, field), val) - - def test_is_full(self): - index = PeriodIndex([2005, 2007, 2009], freq='A') - self.assertFalse(index.is_full) - - index = PeriodIndex([2005, 2006, 2007], freq='A') - self.assertTrue(index.is_full) - - index = PeriodIndex([2005, 2005, 2007], freq='A') - self.assertFalse(index.is_full) - - index = PeriodIndex([2005, 2005, 2006], freq='A') - self.assertTrue(index.is_full) - - index = PeriodIndex([2006, 2005, 2005], freq='A') - self.assertRaises(ValueError, getattr, index, 'is_full') - - self.assertTrue(index[:0].is_full) - - def test_map(self): - index = PeriodIndex([2005, 2007, 2009], freq='A') - result = index.map(lambda x: x + 1) - expected = index + 1 - tm.assert_index_equal(result, expected) - - result = index.map(lambda x: x.ordinal) - exp = Index([x.ordinal for x in index]) - tm.assert_index_equal(result, exp) - - def test_map_with_string_constructor(self): - raw = [2005, 2007, 2009] - index = PeriodIndex(raw, freq='A') - types = str, - - if PY3: - # unicode - types += text_type, - - for t in types: - expected = Index(lmap(t, raw)) - res = index.map(t) - - # should return an Index - tm.assertIsInstance(res, Index) - - # preserve element types - self.assertTrue(all(isinstance(resi, t) for resi in res)) - - # lastly, values should compare equal - tm.assert_index_equal(res, expected) - - def test_convert_array_of_periods(self): - rng = period_range('1/1/2000', periods=20, freq='D') - periods = list(rng) - - result = pd.Index(periods) - tm.assertIsInstance(result, PeriodIndex) - - def test_with_multi_index(self): - # #1705 - index = date_range('1/1/2012', periods=4, freq='12H') - index_as_arrays = [index.to_period(freq='D'), index.hour] - - s = Series([0, 1, 2, 3], index_as_arrays) - - tm.assertIsInstance(s.index.levels[0], PeriodIndex) - - tm.assertIsInstance(s.index.values[0][0], Period) - - def test_to_timestamp_1703(self): - index = period_range('1/1/2012', periods=4, freq='D') - - result = index.to_timestamp() - self.assertEqual(result[0], Timestamp('1/1/2012')) - - def test_to_datetime_depr(self): - index = period_range('1/1/2012', periods=4, freq='D') - - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): - result = index.to_datetime() - self.assertEqual(result[0], Timestamp('1/1/2012')) - - def test_get_loc_msg(self): - idx = period_range('2000-1-1', freq='A', periods=10) - bad_period = Period('2012', 'A') - self.assertRaises(KeyError, idx.get_loc, bad_period) - - try: - idx.get_loc(bad_period) - except KeyError as inst: - self.assertEqual(inst.args[0], bad_period) - - def test_get_loc_nat(self): - didx = DatetimeIndex(['2011-01-01', 'NaT', '2011-01-03']) - pidx = PeriodIndex(['2011-01-01', 'NaT', '2011-01-03'], freq='M') - - # check DatetimeIndex compat - for idx in [didx, pidx]: - self.assertEqual(idx.get_loc(pd.NaT), 1) - self.assertEqual(idx.get_loc(None), 1) - self.assertEqual(idx.get_loc(float('nan')), 1) - self.assertEqual(idx.get_loc(np.nan), 1) - - def test_append_concat(self): - # #1815 - d1 = date_range('12/31/1990', '12/31/1999', freq='A-DEC') - d2 = date_range('12/31/2000', '12/31/2009', freq='A-DEC') - - s1 = Series(np.random.randn(10), d1) - s2 = Series(np.random.randn(10), d2) - - s1 = s1.to_period() - s2 = s2.to_period() - - # drops index - result = pd.concat([s1, s2]) - tm.assertIsInstance(result.index, PeriodIndex) - self.assertEqual(result.index[0], s1.index[0]) - - def test_pickle_freq(self): - # GH2891 - prng = period_range('1/1/2011', '1/1/2012', freq='M') - new_prng = self.round_trip_pickle(prng) - self.assertEqual(new_prng.freq, offsets.MonthEnd()) - self.assertEqual(new_prng.freqstr, 'M') - - def test_slice_keep_name(self): - idx = period_range('20010101', periods=10, freq='D', name='bob') - self.assertEqual(idx.name, idx[1:].name) - - def test_factorize(self): - idx1 = PeriodIndex(['2014-01', '2014-01', '2014-02', '2014-02', - '2014-03', '2014-03'], freq='M') - - exp_arr = np.array([0, 0, 1, 1, 2, 2], dtype=np.intp) - exp_idx = PeriodIndex(['2014-01', '2014-02', '2014-03'], freq='M') - - arr, idx = idx1.factorize() - self.assert_numpy_array_equal(arr, exp_arr) - tm.assert_index_equal(idx, exp_idx) - - arr, idx = idx1.factorize(sort=True) - self.assert_numpy_array_equal(arr, exp_arr) - tm.assert_index_equal(idx, exp_idx) - - idx2 = pd.PeriodIndex(['2014-03', '2014-03', '2014-02', '2014-01', - '2014-03', '2014-01'], freq='M') - - exp_arr = np.array([2, 2, 1, 0, 2, 0], dtype=np.intp) - arr, idx = idx2.factorize(sort=True) - self.assert_numpy_array_equal(arr, exp_arr) - tm.assert_index_equal(idx, exp_idx) - - exp_arr = np.array([0, 0, 1, 2, 0, 2], dtype=np.intp) - exp_idx = PeriodIndex(['2014-03', '2014-02', '2014-01'], freq='M') - arr, idx = idx2.factorize() - self.assert_numpy_array_equal(arr, exp_arr) - tm.assert_index_equal(idx, exp_idx) - - def test_recreate_from_data(self): - for o in ['M', 'Q', 'A', 'D', 'B', 'T', 'S', 'L', 'U', 'N', 'H']: - org = PeriodIndex(start='2001/04/01', freq=o, periods=1) - idx = PeriodIndex(org.values, freq=o) - tm.assert_index_equal(idx, org) - - def test_combine_first(self): - # GH 3367 - didx = pd.DatetimeIndex(start='1950-01-31', end='1950-07-31', freq='M') - pidx = pd.PeriodIndex(start=pd.Period('1950-1'), - end=pd.Period('1950-7'), freq='M') - # check to be consistent with DatetimeIndex - for idx in [didx, pidx]: - a = pd.Series([1, np.nan, np.nan, 4, 5, np.nan, 7], index=idx) - b = pd.Series([9, 9, 9, 9, 9, 9, 9], index=idx) - - result = a.combine_first(b) - expected = pd.Series([1, 9, 9, 4, 5, 9, 7], index=idx, - dtype=np.float64) - tm.assert_series_equal(result, expected) - - def test_searchsorted(self): - for freq in ['D', '2D']: - pidx = pd.PeriodIndex(['2014-01-01', '2014-01-02', '2014-01-03', - '2014-01-04', '2014-01-05'], freq=freq) - - p1 = pd.Period('2014-01-01', freq=freq) - self.assertEqual(pidx.searchsorted(p1), 0) - - p2 = pd.Period('2014-01-04', freq=freq) - self.assertEqual(pidx.searchsorted(p2), 3) - - msg = "Input has different freq=H from PeriodIndex" - with self.assertRaisesRegexp(period.IncompatibleFrequency, msg): - pidx.searchsorted(pd.Period('2014-01-01', freq='H')) - - msg = "Input has different freq=5D from PeriodIndex" - with self.assertRaisesRegexp(period.IncompatibleFrequency, msg): - pidx.searchsorted(pd.Period('2014-01-01', freq='5D')) - - with tm.assert_produces_warning(FutureWarning): - pidx.searchsorted(key=p2) - - def test_round_trip(self): - - p = Period('2000Q1') - new_p = self.round_trip_pickle(p) - self.assertEqual(new_p, p) - - -def _permute(obj): - return obj.take(np.random.permutation(len(obj))) - - -class TestMethods(tm.TestCase): - - def test_add(self): - dt1 = Period(freq='D', year=2008, month=1, day=1) - dt2 = Period(freq='D', year=2008, month=1, day=2) - self.assertEqual(dt1 + 1, dt2) - self.assertEqual(1 + dt1, dt2) - - def test_add_pdnat(self): - p = pd.Period('2011-01', freq='M') - self.assertIs(p + pd.NaT, pd.NaT) - self.assertIs(pd.NaT + p, pd.NaT) - - p = pd.Period('NaT', freq='M') - self.assertIs(p + pd.NaT, pd.NaT) - self.assertIs(pd.NaT + p, pd.NaT) - - def test_add_raises(self): - # GH 4731 - dt1 = Period(freq='D', year=2008, month=1, day=1) - dt2 = Period(freq='D', year=2008, month=1, day=2) - msg = r"unsupported operand type\(s\)" - with tm.assertRaisesRegexp(TypeError, msg): - dt1 + "str" - - msg = r"unsupported operand type\(s\)" - with tm.assertRaisesRegexp(TypeError, msg): - "str" + dt1 - - with tm.assertRaisesRegexp(TypeError, msg): - dt1 + dt2 - - def test_sub(self): - dt1 = Period('2011-01-01', freq='D') - dt2 = Period('2011-01-15', freq='D') - - self.assertEqual(dt1 - dt2, -14) - self.assertEqual(dt2 - dt1, 14) - - msg = r"Input has different freq=M from Period\(freq=D\)" - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): - dt1 - pd.Period('2011-02', freq='M') - - def test_add_offset(self): - # freq is DateOffset - for freq in ['A', '2A', '3A']: - p = Period('2011', freq=freq) - exp = Period('2013', freq=freq) - self.assertEqual(p + offsets.YearEnd(2), exp) - self.assertEqual(offsets.YearEnd(2) + p, exp) - - for o in [offsets.YearBegin(2), offsets.MonthBegin(1), - offsets.Minute(), np.timedelta64(365, 'D'), - timedelta(365)]: - with tm.assertRaises(period.IncompatibleFrequency): - p + o - - if isinstance(o, np.timedelta64): - with tm.assertRaises(TypeError): - o + p - else: - with tm.assertRaises(period.IncompatibleFrequency): - o + p - - for freq in ['M', '2M', '3M']: - p = Period('2011-03', freq=freq) - exp = Period('2011-05', freq=freq) - self.assertEqual(p + offsets.MonthEnd(2), exp) - self.assertEqual(offsets.MonthEnd(2) + p, exp) - - exp = Period('2012-03', freq=freq) - self.assertEqual(p + offsets.MonthEnd(12), exp) - self.assertEqual(offsets.MonthEnd(12) + p, exp) - - for o in [offsets.YearBegin(2), offsets.MonthBegin(1), - offsets.Minute(), np.timedelta64(365, 'D'), - timedelta(365)]: - with tm.assertRaises(period.IncompatibleFrequency): - p + o - - if isinstance(o, np.timedelta64): - with tm.assertRaises(TypeError): - o + p - else: - with tm.assertRaises(period.IncompatibleFrequency): - o + p - - # freq is Tick - for freq in ['D', '2D', '3D']: - p = Period('2011-04-01', freq=freq) - - exp = Period('2011-04-06', freq=freq) - self.assertEqual(p + offsets.Day(5), exp) - self.assertEqual(offsets.Day(5) + p, exp) - - exp = Period('2011-04-02', freq=freq) - self.assertEqual(p + offsets.Hour(24), exp) - self.assertEqual(offsets.Hour(24) + p, exp) - - exp = Period('2011-04-03', freq=freq) - self.assertEqual(p + np.timedelta64(2, 'D'), exp) - with tm.assertRaises(TypeError): - np.timedelta64(2, 'D') + p - - exp = Period('2011-04-02', freq=freq) - self.assertEqual(p + np.timedelta64(3600 * 24, 's'), exp) - with tm.assertRaises(TypeError): - np.timedelta64(3600 * 24, 's') + p - - exp = Period('2011-03-30', freq=freq) - self.assertEqual(p + timedelta(-2), exp) - self.assertEqual(timedelta(-2) + p, exp) - - exp = Period('2011-04-03', freq=freq) - self.assertEqual(p + timedelta(hours=48), exp) - self.assertEqual(timedelta(hours=48) + p, exp) - - for o in [offsets.YearBegin(2), offsets.MonthBegin(1), - offsets.Minute(), np.timedelta64(4, 'h'), - timedelta(hours=23)]: - with tm.assertRaises(period.IncompatibleFrequency): - p + o - - if isinstance(o, np.timedelta64): - with tm.assertRaises(TypeError): - o + p - else: - with tm.assertRaises(period.IncompatibleFrequency): - o + p - - for freq in ['H', '2H', '3H']: - p = Period('2011-04-01 09:00', freq=freq) - - exp = Period('2011-04-03 09:00', freq=freq) - self.assertEqual(p + offsets.Day(2), exp) - self.assertEqual(offsets.Day(2) + p, exp) - - exp = Period('2011-04-01 12:00', freq=freq) - self.assertEqual(p + offsets.Hour(3), exp) - self.assertEqual(offsets.Hour(3) + p, exp) - - exp = Period('2011-04-01 12:00', freq=freq) - self.assertEqual(p + np.timedelta64(3, 'h'), exp) - with tm.assertRaises(TypeError): - np.timedelta64(3, 'h') + p - - exp = Period('2011-04-01 10:00', freq=freq) - self.assertEqual(p + np.timedelta64(3600, 's'), exp) - with tm.assertRaises(TypeError): - np.timedelta64(3600, 's') + p - - exp = Period('2011-04-01 11:00', freq=freq) - self.assertEqual(p + timedelta(minutes=120), exp) - self.assertEqual(timedelta(minutes=120) + p, exp) - - exp = Period('2011-04-05 12:00', freq=freq) - self.assertEqual(p + timedelta(days=4, minutes=180), exp) - self.assertEqual(timedelta(days=4, minutes=180) + p, exp) - - for o in [offsets.YearBegin(2), offsets.MonthBegin(1), - offsets.Minute(), np.timedelta64(3200, 's'), - timedelta(hours=23, minutes=30)]: - with tm.assertRaises(period.IncompatibleFrequency): - p + o - - if isinstance(o, np.timedelta64): - with tm.assertRaises(TypeError): - o + p - else: - with tm.assertRaises(period.IncompatibleFrequency): - o + p - - def test_add_offset_nat(self): - # freq is DateOffset - for freq in ['A', '2A', '3A']: - p = Period('NaT', freq=freq) - for o in [offsets.YearEnd(2)]: - self.assertIs(p + o, tslib.NaT) - self.assertIs(o + p, tslib.NaT) - - for o in [offsets.YearBegin(2), offsets.MonthBegin(1), - offsets.Minute(), np.timedelta64(365, 'D'), - timedelta(365)]: - self.assertIs(p + o, tslib.NaT) - - if isinstance(o, np.timedelta64): - with tm.assertRaises(TypeError): - o + p - else: - self.assertIs(o + p, tslib.NaT) - - for freq in ['M', '2M', '3M']: - p = Period('NaT', freq=freq) - for o in [offsets.MonthEnd(2), offsets.MonthEnd(12)]: - self.assertIs(p + o, tslib.NaT) - - if isinstance(o, np.timedelta64): - with tm.assertRaises(TypeError): - o + p - else: - self.assertIs(o + p, tslib.NaT) - - for o in [offsets.YearBegin(2), offsets.MonthBegin(1), - offsets.Minute(), np.timedelta64(365, 'D'), - timedelta(365)]: - self.assertIs(p + o, tslib.NaT) - - if isinstance(o, np.timedelta64): - with tm.assertRaises(TypeError): - o + p - else: - self.assertIs(o + p, tslib.NaT) - - # freq is Tick - for freq in ['D', '2D', '3D']: - p = Period('NaT', freq=freq) - for o in [offsets.Day(5), offsets.Hour(24), np.timedelta64(2, 'D'), - np.timedelta64(3600 * 24, 's'), timedelta(-2), - timedelta(hours=48)]: - self.assertIs(p + o, tslib.NaT) - - if isinstance(o, np.timedelta64): - with tm.assertRaises(TypeError): - o + p - else: - self.assertIs(o + p, tslib.NaT) - - for o in [offsets.YearBegin(2), offsets.MonthBegin(1), - offsets.Minute(), np.timedelta64(4, 'h'), - timedelta(hours=23)]: - self.assertIs(p + o, tslib.NaT) - - if isinstance(o, np.timedelta64): - with tm.assertRaises(TypeError): - o + p - else: - self.assertIs(o + p, tslib.NaT) - - for freq in ['H', '2H', '3H']: - p = Period('NaT', freq=freq) - for o in [offsets.Day(2), offsets.Hour(3), np.timedelta64(3, 'h'), - np.timedelta64(3600, 's'), timedelta(minutes=120), - timedelta(days=4, minutes=180)]: - self.assertIs(p + o, tslib.NaT) - - if not isinstance(o, np.timedelta64): - self.assertIs(o + p, tslib.NaT) - - for o in [offsets.YearBegin(2), offsets.MonthBegin(1), - offsets.Minute(), np.timedelta64(3200, 's'), - timedelta(hours=23, minutes=30)]: - self.assertIs(p + o, tslib.NaT) - - if isinstance(o, np.timedelta64): - with tm.assertRaises(TypeError): - o + p - else: - self.assertIs(o + p, tslib.NaT) - - def test_sub_pdnat(self): - # GH 13071 - p = pd.Period('2011-01', freq='M') - self.assertIs(p - pd.NaT, pd.NaT) - self.assertIs(pd.NaT - p, pd.NaT) - - p = pd.Period('NaT', freq='M') - self.assertIs(p - pd.NaT, pd.NaT) - self.assertIs(pd.NaT - p, pd.NaT) - - def test_sub_offset(self): - # freq is DateOffset - for freq in ['A', '2A', '3A']: - p = Period('2011', freq=freq) - self.assertEqual(p - offsets.YearEnd(2), Period('2009', freq=freq)) - - for o in [offsets.YearBegin(2), offsets.MonthBegin(1), - offsets.Minute(), np.timedelta64(365, 'D'), - timedelta(365)]: - with tm.assertRaises(period.IncompatibleFrequency): - p - o - - for freq in ['M', '2M', '3M']: - p = Period('2011-03', freq=freq) - self.assertEqual(p - offsets.MonthEnd(2), - Period('2011-01', freq=freq)) - self.assertEqual(p - offsets.MonthEnd(12), - Period('2010-03', freq=freq)) - - for o in [offsets.YearBegin(2), offsets.MonthBegin(1), - offsets.Minute(), np.timedelta64(365, 'D'), - timedelta(365)]: - with tm.assertRaises(period.IncompatibleFrequency): - p - o - - # freq is Tick - for freq in ['D', '2D', '3D']: - p = Period('2011-04-01', freq=freq) - self.assertEqual(p - offsets.Day(5), - Period('2011-03-27', freq=freq)) - self.assertEqual(p - offsets.Hour(24), - Period('2011-03-31', freq=freq)) - self.assertEqual(p - np.timedelta64(2, 'D'), - Period('2011-03-30', freq=freq)) - self.assertEqual(p - np.timedelta64(3600 * 24, 's'), - Period('2011-03-31', freq=freq)) - self.assertEqual(p - timedelta(-2), - Period('2011-04-03', freq=freq)) - self.assertEqual(p - timedelta(hours=48), - Period('2011-03-30', freq=freq)) - - for o in [offsets.YearBegin(2), offsets.MonthBegin(1), - offsets.Minute(), np.timedelta64(4, 'h'), - timedelta(hours=23)]: - with tm.assertRaises(period.IncompatibleFrequency): - p - o - - for freq in ['H', '2H', '3H']: - p = Period('2011-04-01 09:00', freq=freq) - self.assertEqual(p - offsets.Day(2), - Period('2011-03-30 09:00', freq=freq)) - self.assertEqual(p - offsets.Hour(3), - Period('2011-04-01 06:00', freq=freq)) - self.assertEqual(p - np.timedelta64(3, 'h'), - Period('2011-04-01 06:00', freq=freq)) - self.assertEqual(p - np.timedelta64(3600, 's'), - Period('2011-04-01 08:00', freq=freq)) - self.assertEqual(p - timedelta(minutes=120), - Period('2011-04-01 07:00', freq=freq)) - self.assertEqual(p - timedelta(days=4, minutes=180), - Period('2011-03-28 06:00', freq=freq)) - - for o in [offsets.YearBegin(2), offsets.MonthBegin(1), - offsets.Minute(), np.timedelta64(3200, 's'), - timedelta(hours=23, minutes=30)]: - with tm.assertRaises(period.IncompatibleFrequency): - p - o - - def test_sub_offset_nat(self): - # freq is DateOffset - for freq in ['A', '2A', '3A']: - p = Period('NaT', freq=freq) - for o in [offsets.YearEnd(2)]: - self.assertIs(p - o, tslib.NaT) - - for o in [offsets.YearBegin(2), offsets.MonthBegin(1), - offsets.Minute(), np.timedelta64(365, 'D'), - timedelta(365)]: - self.assertIs(p - o, tslib.NaT) - - for freq in ['M', '2M', '3M']: - p = Period('NaT', freq=freq) - for o in [offsets.MonthEnd(2), offsets.MonthEnd(12)]: - self.assertIs(p - o, tslib.NaT) - - for o in [offsets.YearBegin(2), offsets.MonthBegin(1), - offsets.Minute(), np.timedelta64(365, 'D'), - timedelta(365)]: - self.assertIs(p - o, tslib.NaT) - - # freq is Tick - for freq in ['D', '2D', '3D']: - p = Period('NaT', freq=freq) - for o in [offsets.Day(5), offsets.Hour(24), np.timedelta64(2, 'D'), - np.timedelta64(3600 * 24, 's'), timedelta(-2), - timedelta(hours=48)]: - self.assertIs(p - o, tslib.NaT) - - for o in [offsets.YearBegin(2), offsets.MonthBegin(1), - offsets.Minute(), np.timedelta64(4, 'h'), - timedelta(hours=23)]: - self.assertIs(p - o, tslib.NaT) - - for freq in ['H', '2H', '3H']: - p = Period('NaT', freq=freq) - for o in [offsets.Day(2), offsets.Hour(3), np.timedelta64(3, 'h'), - np.timedelta64(3600, 's'), timedelta(minutes=120), - timedelta(days=4, minutes=180)]: - self.assertIs(p - o, tslib.NaT) - - for o in [offsets.YearBegin(2), offsets.MonthBegin(1), - offsets.Minute(), np.timedelta64(3200, 's'), - timedelta(hours=23, minutes=30)]: - self.assertIs(p - o, tslib.NaT) - - def test_nat_ops(self): - for freq in ['M', '2M', '3M']: - p = Period('NaT', freq=freq) - self.assertIs(p + 1, tslib.NaT) - self.assertIs(1 + p, tslib.NaT) - self.assertIs(p - 1, tslib.NaT) - self.assertIs(p - Period('2011-01', freq=freq), tslib.NaT) - self.assertIs(Period('2011-01', freq=freq) - p, tslib.NaT) - - def test_period_ops_offset(self): - p = Period('2011-04-01', freq='D') - result = p + offsets.Day() - exp = pd.Period('2011-04-02', freq='D') - self.assertEqual(result, exp) - - result = p - offsets.Day(2) - exp = pd.Period('2011-03-30', freq='D') - self.assertEqual(result, exp) - - msg = r"Input cannot be converted to Period\(freq=D\)" - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): - p + offsets.Hour(2) - - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): - p - offsets.Hour(2) - - -class TestPeriodIndexSeriesMethods(tm.TestCase): - """ Test PeriodIndex and Period Series Ops consistency """ - - def _check(self, values, func, expected): - idx = pd.PeriodIndex(values) - result = func(idx) - if isinstance(expected, pd.Index): - tm.assert_index_equal(result, expected) - else: - # comp op results in bool - tm.assert_numpy_array_equal(result, expected) - - s = pd.Series(values) - result = func(s) - - exp = pd.Series(expected, name=values.name) - tm.assert_series_equal(result, exp) - - def test_pi_ops(self): - idx = PeriodIndex(['2011-01', '2011-02', '2011-03', - '2011-04'], freq='M', name='idx') - - expected = PeriodIndex(['2011-03', '2011-04', - '2011-05', '2011-06'], freq='M', name='idx') - self._check(idx, lambda x: x + 2, expected) - self._check(idx, lambda x: 2 + x, expected) - - self._check(idx + 2, lambda x: x - 2, idx) - result = idx - Period('2011-01', freq='M') - exp = pd.Index([0, 1, 2, 3], name='idx') - tm.assert_index_equal(result, exp) - - result = Period('2011-01', freq='M') - idx - exp = pd.Index([0, -1, -2, -3], name='idx') - tm.assert_index_equal(result, exp) - - def test_pi_ops_errors(self): - idx = PeriodIndex(['2011-01', '2011-02', '2011-03', - '2011-04'], freq='M', name='idx') - s = pd.Series(idx) - - msg = r"unsupported operand type\(s\)" - - for obj in [idx, s]: - for ng in ["str", 1.5]: - with tm.assertRaisesRegexp(TypeError, msg): - obj + ng - - with tm.assertRaises(TypeError): - # error message differs between PY2 and 3 - ng + obj - - with tm.assertRaisesRegexp(TypeError, msg): - obj - ng - - with tm.assertRaises(TypeError): - np.add(obj, ng) - - if _np_version_under1p10: - self.assertIs(np.add(ng, obj), NotImplemented) - else: - with tm.assertRaises(TypeError): - np.add(ng, obj) - - with tm.assertRaises(TypeError): - np.subtract(obj, ng) - - if _np_version_under1p10: - self.assertIs(np.subtract(ng, obj), NotImplemented) - else: - with tm.assertRaises(TypeError): - np.subtract(ng, obj) - - def test_pi_ops_nat(self): - idx = PeriodIndex(['2011-01', '2011-02', 'NaT', - '2011-04'], freq='M', name='idx') - expected = PeriodIndex(['2011-03', '2011-04', - 'NaT', '2011-06'], freq='M', name='idx') - self._check(idx, lambda x: x + 2, expected) - self._check(idx, lambda x: 2 + x, expected) - self._check(idx, lambda x: np.add(x, 2), expected) - - self._check(idx + 2, lambda x: x - 2, idx) - self._check(idx + 2, lambda x: np.subtract(x, 2), idx) - - # freq with mult - idx = PeriodIndex(['2011-01', '2011-02', 'NaT', - '2011-04'], freq='2M', name='idx') - expected = PeriodIndex(['2011-07', '2011-08', - 'NaT', '2011-10'], freq='2M', name='idx') - self._check(idx, lambda x: x + 3, expected) - self._check(idx, lambda x: 3 + x, expected) - self._check(idx, lambda x: np.add(x, 3), expected) - - self._check(idx + 3, lambda x: x - 3, idx) - self._check(idx + 3, lambda x: np.subtract(x, 3), idx) - - def test_pi_ops_array_int(self): - idx = PeriodIndex(['2011-01', '2011-02', 'NaT', - '2011-04'], freq='M', name='idx') - f = lambda x: x + np.array([1, 2, 3, 4]) - exp = PeriodIndex(['2011-02', '2011-04', 'NaT', - '2011-08'], freq='M', name='idx') - self._check(idx, f, exp) - - f = lambda x: np.add(x, np.array([4, -1, 1, 2])) - exp = PeriodIndex(['2011-05', '2011-01', 'NaT', - '2011-06'], freq='M', name='idx') - self._check(idx, f, exp) - - f = lambda x: x - np.array([1, 2, 3, 4]) - exp = PeriodIndex(['2010-12', '2010-12', 'NaT', - '2010-12'], freq='M', name='idx') - self._check(idx, f, exp) - - f = lambda x: np.subtract(x, np.array([3, 2, 3, -2])) - exp = PeriodIndex(['2010-10', '2010-12', 'NaT', - '2011-06'], freq='M', name='idx') - self._check(idx, f, exp) - - def test_pi_ops_offset(self): - idx = PeriodIndex(['2011-01-01', '2011-02-01', '2011-03-01', - '2011-04-01'], freq='D', name='idx') - f = lambda x: x + offsets.Day() - exp = PeriodIndex(['2011-01-02', '2011-02-02', '2011-03-02', - '2011-04-02'], freq='D', name='idx') - self._check(idx, f, exp) - - f = lambda x: x + offsets.Day(2) - exp = PeriodIndex(['2011-01-03', '2011-02-03', '2011-03-03', - '2011-04-03'], freq='D', name='idx') - self._check(idx, f, exp) - - f = lambda x: x - offsets.Day(2) - exp = PeriodIndex(['2010-12-30', '2011-01-30', '2011-02-27', - '2011-03-30'], freq='D', name='idx') - self._check(idx, f, exp) - - def test_pi_offset_errors(self): - idx = PeriodIndex(['2011-01-01', '2011-02-01', '2011-03-01', - '2011-04-01'], freq='D', name='idx') - s = pd.Series(idx) - - # Series op is applied per Period instance, thus error is raised - # from Period - msg_idx = r"Input has different freq from PeriodIndex\(freq=D\)" - msg_s = r"Input cannot be converted to Period\(freq=D\)" - for obj, msg in [(idx, msg_idx), (s, msg_s)]: - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): - obj + offsets.Hour(2) - - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): - offsets.Hour(2) + obj - - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): - obj - offsets.Hour(2) - - def test_pi_sub_period(self): - # GH 13071 - idx = PeriodIndex(['2011-01', '2011-02', '2011-03', - '2011-04'], freq='M', name='idx') - - result = idx - pd.Period('2012-01', freq='M') - exp = pd.Index([-12, -11, -10, -9], name='idx') - tm.assert_index_equal(result, exp) - - result = np.subtract(idx, pd.Period('2012-01', freq='M')) - tm.assert_index_equal(result, exp) - - result = pd.Period('2012-01', freq='M') - idx - exp = pd.Index([12, 11, 10, 9], name='idx') - tm.assert_index_equal(result, exp) - - result = np.subtract(pd.Period('2012-01', freq='M'), idx) - if _np_version_under1p10: - self.assertIs(result, NotImplemented) - else: - tm.assert_index_equal(result, exp) - - exp = pd.TimedeltaIndex([np.nan, np.nan, np.nan, np.nan], name='idx') - tm.assert_index_equal(idx - pd.Period('NaT', freq='M'), exp) - tm.assert_index_equal(pd.Period('NaT', freq='M') - idx, exp) - - def test_pi_sub_pdnat(self): - # GH 13071 - idx = PeriodIndex(['2011-01', '2011-02', 'NaT', - '2011-04'], freq='M', name='idx') - exp = pd.TimedeltaIndex([pd.NaT] * 4, name='idx') - tm.assert_index_equal(pd.NaT - idx, exp) - tm.assert_index_equal(idx - pd.NaT, exp) - - def test_pi_sub_period_nat(self): - # GH 13071 - idx = PeriodIndex(['2011-01', 'NaT', '2011-03', - '2011-04'], freq='M', name='idx') - - result = idx - pd.Period('2012-01', freq='M') - exp = pd.Index([-12, np.nan, -10, -9], name='idx') - tm.assert_index_equal(result, exp) - - result = pd.Period('2012-01', freq='M') - idx - exp = pd.Index([12, np.nan, 10, 9], name='idx') - tm.assert_index_equal(result, exp) - - exp = pd.TimedeltaIndex([np.nan, np.nan, np.nan, np.nan], name='idx') - tm.assert_index_equal(idx - pd.Period('NaT', freq='M'), exp) - tm.assert_index_equal(pd.Period('NaT', freq='M') - idx, exp) - - def test_pi_comp_period(self): - idx = PeriodIndex(['2011-01', '2011-02', '2011-03', - '2011-04'], freq='M', name='idx') - - f = lambda x: x == pd.Period('2011-03', freq='M') - exp = np.array([False, False, True, False], dtype=np.bool) - self._check(idx, f, exp) - f = lambda x: pd.Period('2011-03', freq='M') == x - self._check(idx, f, exp) - - f = lambda x: x != pd.Period('2011-03', freq='M') - exp = np.array([True, True, False, True], dtype=np.bool) - self._check(idx, f, exp) - f = lambda x: pd.Period('2011-03', freq='M') != x - self._check(idx, f, exp) - - f = lambda x: pd.Period('2011-03', freq='M') >= x - exp = np.array([True, True, True, False], dtype=np.bool) - self._check(idx, f, exp) - - f = lambda x: x > pd.Period('2011-03', freq='M') - exp = np.array([False, False, False, True], dtype=np.bool) - self._check(idx, f, exp) - - f = lambda x: pd.Period('2011-03', freq='M') >= x - exp = np.array([True, True, True, False], dtype=np.bool) - self._check(idx, f, exp) - - def test_pi_comp_period_nat(self): - idx = PeriodIndex(['2011-01', 'NaT', '2011-03', - '2011-04'], freq='M', name='idx') - - f = lambda x: x == pd.Period('2011-03', freq='M') - exp = np.array([False, False, True, False], dtype=np.bool) - self._check(idx, f, exp) - f = lambda x: pd.Period('2011-03', freq='M') == x - self._check(idx, f, exp) - - f = lambda x: x == tslib.NaT - exp = np.array([False, False, False, False], dtype=np.bool) - self._check(idx, f, exp) - f = lambda x: tslib.NaT == x - self._check(idx, f, exp) - - f = lambda x: x != pd.Period('2011-03', freq='M') - exp = np.array([True, True, False, True], dtype=np.bool) - self._check(idx, f, exp) - f = lambda x: pd.Period('2011-03', freq='M') != x - self._check(idx, f, exp) - - f = lambda x: x != tslib.NaT - exp = np.array([True, True, True, True], dtype=np.bool) - self._check(idx, f, exp) - f = lambda x: tslib.NaT != x - self._check(idx, f, exp) - - f = lambda x: pd.Period('2011-03', freq='M') >= x - exp = np.array([True, False, True, False], dtype=np.bool) - self._check(idx, f, exp) - - f = lambda x: x < pd.Period('2011-03', freq='M') - exp = np.array([True, False, False, False], dtype=np.bool) - self._check(idx, f, exp) - - f = lambda x: x > tslib.NaT - exp = np.array([False, False, False, False], dtype=np.bool) - self._check(idx, f, exp) - - f = lambda x: tslib.NaT >= x - exp = np.array([False, False, False, False], dtype=np.bool) - self._check(idx, f, exp) - - -class TestPeriodRepresentation(tm.TestCase): - """ - Wish to match NumPy units - """ - - def test_annual(self): - self._check_freq('A', 1970) - - def test_monthly(self): - self._check_freq('M', '1970-01') - - def test_weekly(self): - self._check_freq('W-THU', '1970-01-01') - - def test_daily(self): - self._check_freq('D', '1970-01-01') - - def test_business_daily(self): - self._check_freq('B', '1970-01-01') - - def test_hourly(self): - self._check_freq('H', '1970-01-01') - - def test_minutely(self): - self._check_freq('T', '1970-01-01') - - def test_secondly(self): - self._check_freq('S', '1970-01-01') - - def test_millisecondly(self): - self._check_freq('L', '1970-01-01') - - def test_microsecondly(self): - self._check_freq('U', '1970-01-01') - - def test_nanosecondly(self): - self._check_freq('N', '1970-01-01') - - def _check_freq(self, freq, base_date): - rng = PeriodIndex(start=base_date, periods=10, freq=freq) - exp = np.arange(10, dtype=np.int64) - self.assert_numpy_array_equal(rng._values, exp) - self.assert_numpy_array_equal(rng.asi8, exp) - - def test_negone_ordinals(self): - freqs = ['A', 'M', 'Q', 'D', 'H', 'T', 'S'] - - period = Period(ordinal=-1, freq='D') - for freq in freqs: - repr(period.asfreq(freq)) - - for freq in freqs: - period = Period(ordinal=-1, freq=freq) - repr(period) - self.assertEqual(period.year, 1969) - - period = Period(ordinal=-1, freq='B') - repr(period) - period = Period(ordinal=-1, freq='W') - repr(period) - - -class TestComparisons(tm.TestCase): - - def setUp(self): - self.january1 = Period('2000-01', 'M') - self.january2 = Period('2000-01', 'M') - self.february = Period('2000-02', 'M') - self.march = Period('2000-03', 'M') - self.day = Period('2012-01-01', 'D') - - def test_equal(self): - self.assertEqual(self.january1, self.january2) - - def test_equal_Raises_Value(self): - with tm.assertRaises(period.IncompatibleFrequency): - self.january1 == self.day - - def test_notEqual(self): - self.assertNotEqual(self.january1, 1) - self.assertNotEqual(self.january1, self.february) - - def test_greater(self): - self.assertTrue(self.february > self.january1) - - def test_greater_Raises_Value(self): - with tm.assertRaises(period.IncompatibleFrequency): - self.january1 > self.day - - def test_greater_Raises_Type(self): - with tm.assertRaises(TypeError): - self.january1 > 1 - - def test_greaterEqual(self): - self.assertTrue(self.january1 >= self.january2) - - def test_greaterEqual_Raises_Value(self): - with tm.assertRaises(period.IncompatibleFrequency): - self.january1 >= self.day - - with tm.assertRaises(TypeError): - print(self.january1 >= 1) - - def test_smallerEqual(self): - self.assertTrue(self.january1 <= self.january2) - - def test_smallerEqual_Raises_Value(self): - with tm.assertRaises(period.IncompatibleFrequency): - self.january1 <= self.day - - def test_smallerEqual_Raises_Type(self): - with tm.assertRaises(TypeError): - self.january1 <= 1 - - def test_smaller(self): - self.assertTrue(self.january1 < self.february) - - def test_smaller_Raises_Value(self): - with tm.assertRaises(period.IncompatibleFrequency): - self.january1 < self.day - - def test_smaller_Raises_Type(self): - with tm.assertRaises(TypeError): - self.january1 < 1 - - def test_sort(self): - periods = [self.march, self.january1, self.february] - correctPeriods = [self.january1, self.february, self.march] - self.assertEqual(sorted(periods), correctPeriods) - - def test_period_nat_comp(self): - p_nat = Period('NaT', freq='D') - p = Period('2011-01-01', freq='D') - - nat = pd.Timestamp('NaT') - t = pd.Timestamp('2011-01-01') - # confirm Period('NaT') work identical with Timestamp('NaT') - for left, right in [(p_nat, p), (p, p_nat), (p_nat, p_nat), (nat, t), - (t, nat), (nat, nat)]: - self.assertEqual(left < right, False) - self.assertEqual(left > right, False) - self.assertEqual(left == right, False) - self.assertEqual(left != right, True) - self.assertEqual(left <= right, False) - self.assertEqual(left >= right, False) - - def test_pi_pi_comp(self): - - for freq in ['M', '2M', '3M']: - base = PeriodIndex(['2011-01', '2011-02', - '2011-03', '2011-04'], freq=freq) - p = Period('2011-02', freq=freq) - - exp = np.array([False, True, False, False]) - self.assert_numpy_array_equal(base == p, exp) - self.assert_numpy_array_equal(p == base, exp) - - exp = np.array([True, False, True, True]) - self.assert_numpy_array_equal(base != p, exp) - self.assert_numpy_array_equal(p != base, exp) - - exp = np.array([False, False, True, True]) - self.assert_numpy_array_equal(base > p, exp) - self.assert_numpy_array_equal(p < base, exp) - - exp = np.array([True, False, False, False]) - self.assert_numpy_array_equal(base < p, exp) - self.assert_numpy_array_equal(p > base, exp) - - exp = np.array([False, True, True, True]) - self.assert_numpy_array_equal(base >= p, exp) - self.assert_numpy_array_equal(p <= base, exp) - - exp = np.array([True, True, False, False]) - self.assert_numpy_array_equal(base <= p, exp) - self.assert_numpy_array_equal(p >= base, exp) - - idx = PeriodIndex(['2011-02', '2011-01', '2011-03', - '2011-05'], freq=freq) - - exp = np.array([False, False, True, False]) - self.assert_numpy_array_equal(base == idx, exp) - - exp = np.array([True, True, False, True]) - self.assert_numpy_array_equal(base != idx, exp) - - exp = np.array([False, True, False, False]) - self.assert_numpy_array_equal(base > idx, exp) - - exp = np.array([True, False, False, True]) - self.assert_numpy_array_equal(base < idx, exp) - - exp = np.array([False, True, True, False]) - self.assert_numpy_array_equal(base >= idx, exp) - - exp = np.array([True, False, True, True]) - self.assert_numpy_array_equal(base <= idx, exp) - - # different base freq - msg = "Input has different freq=A-DEC from PeriodIndex" - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): - base <= Period('2011', freq='A') - - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): - Period('2011', freq='A') >= base - - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): - idx = PeriodIndex(['2011', '2012', '2013', '2014'], freq='A') - base <= idx - - # different mult - msg = "Input has different freq=4M from PeriodIndex" - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): - base <= Period('2011', freq='4M') - - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): - Period('2011', freq='4M') >= base - - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): - idx = PeriodIndex(['2011', '2012', '2013', '2014'], freq='4M') - base <= idx - - def test_pi_nat_comp(self): - for freq in ['M', '2M', '3M']: - idx1 = PeriodIndex( - ['2011-01', '2011-02', 'NaT', '2011-05'], freq=freq) - - result = idx1 > Period('2011-02', freq=freq) - exp = np.array([False, False, False, True]) - self.assert_numpy_array_equal(result, exp) - result = Period('2011-02', freq=freq) < idx1 - self.assert_numpy_array_equal(result, exp) - - result = idx1 == Period('NaT', freq=freq) - exp = np.array([False, False, False, False]) - self.assert_numpy_array_equal(result, exp) - result = Period('NaT', freq=freq) == idx1 - self.assert_numpy_array_equal(result, exp) - - result = idx1 != Period('NaT', freq=freq) - exp = np.array([True, True, True, True]) - self.assert_numpy_array_equal(result, exp) - result = Period('NaT', freq=freq) != idx1 - self.assert_numpy_array_equal(result, exp) - - idx2 = PeriodIndex(['2011-02', '2011-01', '2011-04', - 'NaT'], freq=freq) - result = idx1 < idx2 - exp = np.array([True, False, False, False]) - self.assert_numpy_array_equal(result, exp) - - result = idx1 == idx2 - exp = np.array([False, False, False, False]) - self.assert_numpy_array_equal(result, exp) - - result = idx1 != idx2 - exp = np.array([True, True, True, True]) - self.assert_numpy_array_equal(result, exp) - - result = idx1 == idx1 - exp = np.array([True, True, False, True]) - self.assert_numpy_array_equal(result, exp) - - result = idx1 != idx1 - exp = np.array([False, False, True, False]) - self.assert_numpy_array_equal(result, exp) - - diff = PeriodIndex(['2011-02', '2011-01', '2011-04', - 'NaT'], freq='4M') - msg = "Input has different freq=4M from PeriodIndex" - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): - idx1 > diff - - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): - idx1 == diff - - -class TestSeriesPeriod(tm.TestCase): - - def setUp(self): - self.series = Series(period_range('2000-01-01', periods=10, freq='D')) - - def test_auto_conversion(self): - series = Series(list(period_range('2000-01-01', periods=10, freq='D'))) - self.assertEqual(series.dtype, 'object') - - series = pd.Series([pd.Period('2011-01-01', freq='D'), - pd.Period('2011-02-01', freq='D')]) - self.assertEqual(series.dtype, 'object') - - def test_getitem(self): - self.assertEqual(self.series[1], pd.Period('2000-01-02', freq='D')) - - result = self.series[[2, 4]] - exp = pd.Series([pd.Period('2000-01-03', freq='D'), - pd.Period('2000-01-05', freq='D')], - index=[2, 4]) - self.assert_series_equal(result, exp) - self.assertEqual(result.dtype, 'object') - - def test_constructor_cant_cast_period(self): - with tm.assertRaises(TypeError): - Series(period_range('2000-01-01', periods=10, freq='D'), - dtype=float) - - def test_constructor_cast_object(self): - s = Series(period_range('1/1/2000', periods=10), dtype=object) - exp = Series(period_range('1/1/2000', periods=10)) - tm.assert_series_equal(s, exp) - - def test_isnull(self): - # GH 13737 - s = Series([pd.Period('2011-01', freq='M'), - pd.Period('NaT', freq='M')]) - tm.assert_series_equal(s.isnull(), Series([False, True])) - tm.assert_series_equal(s.notnull(), Series([True, False])) - - def test_fillna(self): - # GH 13737 - s = Series([pd.Period('2011-01', freq='M'), - pd.Period('NaT', freq='M')]) - - res = s.fillna(pd.Period('2012-01', freq='M')) - exp = Series([pd.Period('2011-01', freq='M'), - pd.Period('2012-01', freq='M')]) - tm.assert_series_equal(res, exp) - self.assertEqual(res.dtype, 'object') - - res = s.fillna('XXX') - exp = Series([pd.Period('2011-01', freq='M'), 'XXX']) - tm.assert_series_equal(res, exp) - self.assertEqual(res.dtype, 'object') - - def test_dropna(self): - # GH 13737 - s = Series([pd.Period('2011-01', freq='M'), - pd.Period('NaT', freq='M')]) - tm.assert_series_equal(s.dropna(), - Series([pd.Period('2011-01', freq='M')])) - - def test_series_comparison_scalars(self): - val = pd.Period('2000-01-04', freq='D') - result = self.series > val - expected = pd.Series([x > val for x in self.series]) - tm.assert_series_equal(result, expected) - - val = self.series[5] - result = self.series > val - expected = pd.Series([x > val for x in self.series]) - tm.assert_series_equal(result, expected) - - def test_between(self): - left, right = self.series[[2, 7]] - result = self.series.between(left, right) - expected = (self.series >= left) & (self.series <= right) - tm.assert_series_equal(result, expected) - - # --------------------------------------------------------------------- - # NaT support - - """ - # ToDo: Enable when support period dtype - def test_NaT_scalar(self): - series = Series([0, 1000, 2000, iNaT], dtype='period[D]') - - val = series[3] - self.assertTrue(isnull(val)) - - series[2] = val - self.assertTrue(isnull(series[2])) - - def test_NaT_cast(self): - result = Series([np.nan]).astype('period[D]') - expected = Series([NaT]) - tm.assert_series_equal(result, expected) - """ - - def test_set_none_nan(self): - # currently Period is stored as object dtype, not as NaT - self.series[3] = None - self.assertIs(self.series[3], None) - - self.series[3:5] = None - self.assertIs(self.series[4], None) - - self.series[5] = np.nan - self.assertTrue(np.isnan(self.series[5])) - - self.series[5:7] = np.nan - self.assertTrue(np.isnan(self.series[6])) - - def test_intercept_astype_object(self): - expected = self.series.astype('object') - - df = DataFrame({'a': self.series, - 'b': np.random.randn(len(self.series))}) - - result = df.values.squeeze() - self.assertTrue((result[:, 0] == expected.values).all()) - - df = DataFrame({'a': self.series, 'b': ['foo'] * len(self.series)}) - - result = df.values.squeeze() - self.assertTrue((result[:, 0] == expected.values).all()) - - def test_ops_series_timedelta(self): - # GH 13043 - s = pd.Series([pd.Period('2015-01-01', freq='D'), - pd.Period('2015-01-02', freq='D')], name='xxx') - self.assertEqual(s.dtype, object) - - exp = pd.Series([pd.Period('2015-01-02', freq='D'), - pd.Period('2015-01-03', freq='D')], name='xxx') - tm.assert_series_equal(s + pd.Timedelta('1 days'), exp) - tm.assert_series_equal(pd.Timedelta('1 days') + s, exp) - - tm.assert_series_equal(s + pd.tseries.offsets.Day(), exp) - tm.assert_series_equal(pd.tseries.offsets.Day() + s, exp) - - def test_ops_series_period(self): - # GH 13043 - s = pd.Series([pd.Period('2015-01-01', freq='D'), - pd.Period('2015-01-02', freq='D')], name='xxx') - self.assertEqual(s.dtype, object) - - p = pd.Period('2015-01-10', freq='D') - # dtype will be object because of original dtype - exp = pd.Series([9, 8], name='xxx', dtype=object) - tm.assert_series_equal(p - s, exp) - tm.assert_series_equal(s - p, -exp) - - s2 = pd.Series([pd.Period('2015-01-05', freq='D'), - pd.Period('2015-01-04', freq='D')], name='xxx') - self.assertEqual(s2.dtype, object) - - exp = pd.Series([4, 2], name='xxx', dtype=object) - tm.assert_series_equal(s2 - s, exp) - tm.assert_series_equal(s - s2, -exp) - - def test_comp_series_period_scalar(self): - # GH 13200 - for freq in ['M', '2M', '3M']: - base = Series([Period(x, freq=freq) for x in - ['2011-01', '2011-02', '2011-03', '2011-04']]) - p = Period('2011-02', freq=freq) - - exp = pd.Series([False, True, False, False]) - tm.assert_series_equal(base == p, exp) - tm.assert_series_equal(p == base, exp) - - exp = pd.Series([True, False, True, True]) - tm.assert_series_equal(base != p, exp) - tm.assert_series_equal(p != base, exp) - - exp = pd.Series([False, False, True, True]) - tm.assert_series_equal(base > p, exp) - tm.assert_series_equal(p < base, exp) - - exp = pd.Series([True, False, False, False]) - tm.assert_series_equal(base < p, exp) - tm.assert_series_equal(p > base, exp) - - exp = pd.Series([False, True, True, True]) - tm.assert_series_equal(base >= p, exp) - tm.assert_series_equal(p <= base, exp) - - exp = pd.Series([True, True, False, False]) - tm.assert_series_equal(base <= p, exp) - tm.assert_series_equal(p >= base, exp) - - # different base freq - msg = "Input has different freq=A-DEC from Period" - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): - base <= Period('2011', freq='A') - - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): - Period('2011', freq='A') >= base - - def test_comp_series_period_series(self): - # GH 13200 - for freq in ['M', '2M', '3M']: - base = Series([Period(x, freq=freq) for x in - ['2011-01', '2011-02', '2011-03', '2011-04']]) - - s = Series([Period(x, freq=freq) for x in - ['2011-02', '2011-01', '2011-03', '2011-05']]) - - exp = Series([False, False, True, False]) - tm.assert_series_equal(base == s, exp) - - exp = Series([True, True, False, True]) - tm.assert_series_equal(base != s, exp) - - exp = Series([False, True, False, False]) - tm.assert_series_equal(base > s, exp) - - exp = Series([True, False, False, True]) - tm.assert_series_equal(base < s, exp) - - exp = Series([False, True, True, False]) - tm.assert_series_equal(base >= s, exp) - - exp = Series([True, False, True, True]) - tm.assert_series_equal(base <= s, exp) - - s2 = Series([Period(x, freq='A') for x in - ['2011', '2011', '2011', '2011']]) - - # different base freq - msg = "Input has different freq=A-DEC from Period" - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): - base <= s2 - - def test_comp_series_period_object(self): - # GH 13200 - base = Series([Period('2011', freq='A'), Period('2011-02', freq='M'), - Period('2013', freq='A'), Period('2011-04', freq='M')]) - - s = Series([Period('2012', freq='A'), Period('2011-01', freq='M'), - Period('2013', freq='A'), Period('2011-05', freq='M')]) - - exp = Series([False, False, True, False]) - tm.assert_series_equal(base == s, exp) - - exp = Series([True, True, False, True]) - tm.assert_series_equal(base != s, exp) - - exp = Series([False, True, False, False]) - tm.assert_series_equal(base > s, exp) - - exp = Series([True, False, False, True]) - tm.assert_series_equal(base < s, exp) - - exp = Series([False, True, True, False]) - tm.assert_series_equal(base >= s, exp) - - exp = Series([True, False, True, True]) - tm.assert_series_equal(base <= s, exp) - - def test_ops_frame_period(self): - # GH 13043 - df = pd.DataFrame({'A': [pd.Period('2015-01', freq='M'), - pd.Period('2015-02', freq='M')], - 'B': [pd.Period('2014-01', freq='M'), - pd.Period('2014-02', freq='M')]}) - self.assertEqual(df['A'].dtype, object) - self.assertEqual(df['B'].dtype, object) - - p = pd.Period('2015-03', freq='M') - # dtype will be object because of original dtype - exp = pd.DataFrame({'A': np.array([2, 1], dtype=object), - 'B': np.array([14, 13], dtype=object)}) - tm.assert_frame_equal(p - df, exp) - tm.assert_frame_equal(df - p, -exp) - - df2 = pd.DataFrame({'A': [pd.Period('2015-05', freq='M'), - pd.Period('2015-06', freq='M')], - 'B': [pd.Period('2015-05', freq='M'), - pd.Period('2015-06', freq='M')]}) - self.assertEqual(df2['A'].dtype, object) - self.assertEqual(df2['B'].dtype, object) - - exp = pd.DataFrame({'A': np.array([4, 4], dtype=object), - 'B': np.array([16, 16], dtype=object)}) - tm.assert_frame_equal(df2 - df, exp) - tm.assert_frame_equal(df - df2, -exp) - - -class TestPeriodField(tm.TestCase): - - def test_get_period_field_raises_on_out_of_range(self): - self.assertRaises(ValueError, _period.get_period_field, -1, 0, 0) - - def test_get_period_field_array_raises_on_out_of_range(self): - self.assertRaises(ValueError, _period.get_period_field_arr, -1, - np.empty(1), 0) - - -class TestTslib(tm.TestCase): - def test_intraday_conversion_factors(self): - self.assertEqual(period_asfreq( - 1, get_freq('D'), get_freq('H'), False), 24) - self.assertEqual(period_asfreq( - 1, get_freq('D'), get_freq('T'), False), 1440) - self.assertEqual(period_asfreq( - 1, get_freq('D'), get_freq('S'), False), 86400) - self.assertEqual(period_asfreq(1, get_freq( - 'D'), get_freq('L'), False), 86400000) - self.assertEqual(period_asfreq(1, get_freq( - 'D'), get_freq('U'), False), 86400000000) - self.assertEqual(period_asfreq(1, get_freq( - 'D'), get_freq('N'), False), 86400000000000) - - self.assertEqual(period_asfreq( - 1, get_freq('H'), get_freq('T'), False), 60) - self.assertEqual(period_asfreq( - 1, get_freq('H'), get_freq('S'), False), 3600) - self.assertEqual(period_asfreq(1, get_freq('H'), - get_freq('L'), False), 3600000) - self.assertEqual(period_asfreq(1, get_freq( - 'H'), get_freq('U'), False), 3600000000) - self.assertEqual(period_asfreq(1, get_freq( - 'H'), get_freq('N'), False), 3600000000000) - - self.assertEqual(period_asfreq( - 1, get_freq('T'), get_freq('S'), False), 60) - self.assertEqual(period_asfreq( - 1, get_freq('T'), get_freq('L'), False), 60000) - self.assertEqual(period_asfreq(1, get_freq( - 'T'), get_freq('U'), False), 60000000) - self.assertEqual(period_asfreq(1, get_freq( - 'T'), get_freq('N'), False), 60000000000) - - self.assertEqual(period_asfreq( - 1, get_freq('S'), get_freq('L'), False), 1000) - self.assertEqual(period_asfreq(1, get_freq('S'), - get_freq('U'), False), 1000000) - self.assertEqual(period_asfreq(1, get_freq( - 'S'), get_freq('N'), False), 1000000000) - - self.assertEqual(period_asfreq( - 1, get_freq('L'), get_freq('U'), False), 1000) - self.assertEqual(period_asfreq(1, get_freq('L'), - get_freq('N'), False), 1000000) - - self.assertEqual(period_asfreq( - 1, get_freq('U'), get_freq('N'), False), 1000) - - def test_period_ordinal_start_values(self): - # information for 1.1.1970 - self.assertEqual(0, period_ordinal(1970, 1, 1, 0, 0, 0, 0, 0, - get_freq('A'))) - self.assertEqual(0, period_ordinal(1970, 1, 1, 0, 0, 0, 0, 0, - get_freq('M'))) - self.assertEqual(1, period_ordinal(1970, 1, 1, 0, 0, 0, 0, 0, - get_freq('W'))) - self.assertEqual(0, period_ordinal(1970, 1, 1, 0, 0, 0, 0, 0, - get_freq('D'))) - self.assertEqual(0, period_ordinal(1970, 1, 1, 0, 0, 0, 0, 0, - get_freq('B'))) - - def test_period_ordinal_week(self): - self.assertEqual(1, period_ordinal(1970, 1, 4, 0, 0, 0, 0, 0, - get_freq('W'))) - self.assertEqual(2, period_ordinal(1970, 1, 5, 0, 0, 0, 0, 0, - get_freq('W'))) - - self.assertEqual(2284, period_ordinal(2013, 10, 6, 0, 0, 0, 0, 0, - get_freq('W'))) - self.assertEqual(2285, period_ordinal(2013, 10, 7, 0, 0, 0, 0, 0, - get_freq('W'))) - - def test_period_ordinal_business_day(self): - # Thursday - self.assertEqual(11415, period_ordinal(2013, 10, 3, 0, 0, 0, 0, 0, - get_freq('B'))) - # Friday - self.assertEqual(11416, period_ordinal(2013, 10, 4, 0, 0, 0, 0, 0, - get_freq('B'))) - # Saturday - self.assertEqual(11417, period_ordinal(2013, 10, 5, 0, 0, 0, 0, 0, - get_freq('B'))) - # Sunday - self.assertEqual(11417, period_ordinal(2013, 10, 6, 0, 0, 0, 0, 0, - get_freq('B'))) - # Monday - self.assertEqual(11417, period_ordinal(2013, 10, 7, 0, 0, 0, 0, 0, - get_freq('B'))) - # Tuesday - self.assertEqual(11418, period_ordinal(2013, 10, 8, 0, 0, 0, 0, 0, - get_freq('B'))) From d38d142aaac5ee046aabc32c1e6422f86c37dc41 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 8 Feb 2017 18:45:04 -0500 Subject: [PATCH 025/933] TST: more test_period reorg --- .../tests/indexes/period/test_construction.py | 25 +- pandas/tests/indexes/period/test_indexing.py | 317 +++++++ pandas/tests/indexes/period/test_ops.py | 492 +--------- pandas/tests/indexes/period/test_setops.py | 93 ++ pandas/tests/scalar/test_period.py | 898 +++--------------- pandas/tests/scalar/test_period_asfreq.py | 721 ++++++++++++++ 6 files changed, 1285 insertions(+), 1261 deletions(-) create mode 100644 pandas/tests/indexes/period/test_indexing.py create mode 100644 pandas/tests/scalar/test_period_asfreq.py diff --git a/pandas/tests/indexes/period/test_construction.py b/pandas/tests/indexes/period/test_construction.py index c1299c6abeda3..228615829b5b8 100644 --- a/pandas/tests/indexes/period/test_construction.py +++ b/pandas/tests/indexes/period/test_construction.py @@ -410,22 +410,9 @@ def test_constructor(self): self.assertTrue((i1 == i2).all()) self.assertEqual(i1.freq, i2.freq) - try: - PeriodIndex(start=start, end=end_intv) - raise AssertionError('Cannot allow mixed freq for start and end') - except ValueError: - pass - end_intv = Period('2005-05-01', 'B') i1 = PeriodIndex(start=start, end=end_intv) - try: - PeriodIndex(start=start) - raise AssertionError( - 'Must specify periods if missing start or end') - except ValueError: - pass - # infer freq from first element i2 = PeriodIndex([end_intv, Period('2005-05-05', 'B')]) self.assertEqual(len(i2), 2) @@ -441,6 +428,18 @@ def test_constructor(self): vals = np.array(vals) self.assertRaises(ValueError, PeriodIndex, vals) + def test_constructor_error(self): + start = Period('02-Apr-2005', 'B') + end_intv = Period('2006-12-31', ('w', 1)) + + msg = 'Start and end must have same freq' + with tm.assertRaisesRegexp(ValueError, msg): + PeriodIndex(start=start, end=end_intv) + + msg = 'Must specify 2 of start, end, periods' + with tm.assertRaisesRegexp(ValueError, msg): + PeriodIndex(start=start) + def test_recreate_from_data(self): for o in ['M', 'Q', 'A', 'D', 'B', 'T', 'S', 'L', 'U', 'N', 'H']: org = PeriodIndex(start='2001/04/01', freq=o, periods=1) diff --git a/pandas/tests/indexes/period/test_indexing.py b/pandas/tests/indexes/period/test_indexing.py new file mode 100644 index 0000000000000..8d9e26406defc --- /dev/null +++ b/pandas/tests/indexes/period/test_indexing.py @@ -0,0 +1,317 @@ +from datetime import datetime + +import numpy as np +import pandas as pd +from pandas.util import testing as tm +from pandas.compat import lrange +from pandas import (PeriodIndex, Series, DatetimeIndex, + period_range, Period, tslib, _np_version_under1p9) + + +class TestGetItem(tm.TestCase): + + def setUp(self): + pass + + def test_getitem(self): + idx1 = pd.period_range('2011-01-01', '2011-01-31', freq='D', + name='idx') + + for idx in [idx1]: + result = idx[0] + self.assertEqual(result, pd.Period('2011-01-01', freq='D')) + + result = idx[-1] + self.assertEqual(result, pd.Period('2011-01-31', freq='D')) + + result = idx[0:5] + expected = pd.period_range('2011-01-01', '2011-01-05', freq='D', + name='idx') + self.assert_index_equal(result, expected) + self.assertEqual(result.freq, expected.freq) + self.assertEqual(result.freq, 'D') + + result = idx[0:10:2] + expected = pd.PeriodIndex(['2011-01-01', '2011-01-03', + '2011-01-05', + '2011-01-07', '2011-01-09'], + freq='D', name='idx') + self.assert_index_equal(result, expected) + self.assertEqual(result.freq, expected.freq) + self.assertEqual(result.freq, 'D') + + result = idx[-20:-5:3] + expected = pd.PeriodIndex(['2011-01-12', '2011-01-15', + '2011-01-18', + '2011-01-21', '2011-01-24'], + freq='D', name='idx') + self.assert_index_equal(result, expected) + self.assertEqual(result.freq, expected.freq) + self.assertEqual(result.freq, 'D') + + result = idx[4::-1] + expected = PeriodIndex(['2011-01-05', '2011-01-04', '2011-01-03', + '2011-01-02', '2011-01-01'], + freq='D', name='idx') + self.assert_index_equal(result, expected) + self.assertEqual(result.freq, expected.freq) + self.assertEqual(result.freq, 'D') + + def test_getitem_index(self): + idx = period_range('2007-01', periods=10, freq='M', name='x') + + result = idx[[1, 3, 5]] + exp = pd.PeriodIndex(['2007-02', '2007-04', '2007-06'], + freq='M', name='x') + tm.assert_index_equal(result, exp) + + result = idx[[True, True, False, False, False, + True, True, False, False, False]] + exp = pd.PeriodIndex(['2007-01', '2007-02', '2007-06', '2007-07'], + freq='M', name='x') + tm.assert_index_equal(result, exp) + + def test_getitem_partial(self): + rng = period_range('2007-01', periods=50, freq='M') + ts = Series(np.random.randn(len(rng)), rng) + + self.assertRaises(KeyError, ts.__getitem__, '2006') + + result = ts['2008'] + self.assertTrue((result.index.year == 2008).all()) + + result = ts['2008':'2009'] + self.assertEqual(len(result), 24) + + result = ts['2008-1':'2009-12'] + self.assertEqual(len(result), 24) + + result = ts['2008Q1':'2009Q4'] + self.assertEqual(len(result), 24) + + result = ts[:'2009'] + self.assertEqual(len(result), 36) + + result = ts['2009':] + self.assertEqual(len(result), 50 - 24) + + exp = result + result = ts[24:] + tm.assert_series_equal(exp, result) + + ts = ts[10:].append(ts[10:]) + self.assertRaisesRegexp(KeyError, + "left slice bound for non-unique " + "label: '2008'", + ts.__getitem__, slice('2008', '2009')) + + def test_getitem_datetime(self): + rng = period_range(start='2012-01-01', periods=10, freq='W-MON') + ts = Series(lrange(len(rng)), index=rng) + + dt1 = datetime(2011, 10, 2) + dt4 = datetime(2012, 4, 20) + + rs = ts[dt1:dt4] + tm.assert_series_equal(rs, ts) + + def test_getitem_nat(self): + idx = pd.PeriodIndex(['2011-01', 'NaT', '2011-02'], freq='M') + self.assertEqual(idx[0], pd.Period('2011-01', freq='M')) + self.assertIs(idx[1], tslib.NaT) + + s = pd.Series([0, 1, 2], index=idx) + self.assertEqual(s[pd.NaT], 1) + + s = pd.Series(idx, index=idx) + self.assertEqual(s[pd.Period('2011-01', freq='M')], + pd.Period('2011-01', freq='M')) + self.assertIs(s[pd.NaT], tslib.NaT) + + def test_getitem_list_periods(self): + # GH 7710 + rng = period_range(start='2012-01-01', periods=10, freq='D') + ts = Series(lrange(len(rng)), index=rng) + exp = ts.iloc[[1]] + tm.assert_series_equal(ts[[Period('2012-01-02', freq='D')]], exp) + + def test_getitem_seconds(self): + # GH 6716 + didx = DatetimeIndex(start='2013/01/01 09:00:00', freq='S', + periods=4000) + pidx = PeriodIndex(start='2013/01/01 09:00:00', freq='S', periods=4000) + + for idx in [didx, pidx]: + # getitem against index should raise ValueError + values = ['2014', '2013/02', '2013/01/02', '2013/02/01 9H', + '2013/02/01 09:00'] + for v in values: + if _np_version_under1p9: + with tm.assertRaises(ValueError): + idx[v] + else: + # GH7116 + # these show deprecations as we are trying + # to slice with non-integer indexers + # with tm.assertRaises(IndexError): + # idx[v] + continue + + s = Series(np.random.rand(len(idx)), index=idx) + tm.assert_series_equal(s['2013/01/01 10:00'], s[3600:3660]) + tm.assert_series_equal(s['2013/01/01 9H'], s[:3600]) + for d in ['2013/01/01', '2013/01', '2013']: + tm.assert_series_equal(s[d], s) + + def test_getitem_day(self): + # GH 6716 + # Confirm DatetimeIndex and PeriodIndex works identically + didx = DatetimeIndex(start='2013/01/01', freq='D', periods=400) + pidx = PeriodIndex(start='2013/01/01', freq='D', periods=400) + + for idx in [didx, pidx]: + # getitem against index should raise ValueError + values = ['2014', '2013/02', '2013/01/02', '2013/02/01 9H', + '2013/02/01 09:00'] + for v in values: + + if _np_version_under1p9: + with tm.assertRaises(ValueError): + idx[v] + else: + # GH7116 + # these show deprecations as we are trying + # to slice with non-integer indexers + # with tm.assertRaises(IndexError): + # idx[v] + continue + + s = Series(np.random.rand(len(idx)), index=idx) + tm.assert_series_equal(s['2013/01'], s[0:31]) + tm.assert_series_equal(s['2013/02'], s[31:59]) + tm.assert_series_equal(s['2014'], s[365:]) + + invalid = ['2013/02/01 9H', '2013/02/01 09:00'] + for v in invalid: + with tm.assertRaises(KeyError): + s[v] + + +class TestIndexing(tm.TestCase): + + def test_get_loc_msg(self): + idx = period_range('2000-1-1', freq='A', periods=10) + bad_period = Period('2012', 'A') + self.assertRaises(KeyError, idx.get_loc, bad_period) + + try: + idx.get_loc(bad_period) + except KeyError as inst: + self.assertEqual(inst.args[0], bad_period) + + def test_get_loc_nat(self): + didx = DatetimeIndex(['2011-01-01', 'NaT', '2011-01-03']) + pidx = PeriodIndex(['2011-01-01', 'NaT', '2011-01-03'], freq='M') + + # check DatetimeIndex compat + for idx in [didx, pidx]: + self.assertEqual(idx.get_loc(pd.NaT), 1) + self.assertEqual(idx.get_loc(None), 1) + self.assertEqual(idx.get_loc(float('nan')), 1) + self.assertEqual(idx.get_loc(np.nan), 1) + + def test_take(self): + # GH 10295 + idx1 = pd.period_range('2011-01-01', '2011-01-31', freq='D', + name='idx') + + for idx in [idx1]: + result = idx.take([0]) + self.assertEqual(result, pd.Period('2011-01-01', freq='D')) + + result = idx.take([5]) + self.assertEqual(result, pd.Period('2011-01-06', freq='D')) + + result = idx.take([0, 1, 2]) + expected = pd.period_range('2011-01-01', '2011-01-03', freq='D', + name='idx') + self.assert_index_equal(result, expected) + self.assertEqual(result.freq, 'D') + self.assertEqual(result.freq, expected.freq) + + result = idx.take([0, 2, 4]) + expected = pd.PeriodIndex(['2011-01-01', '2011-01-03', + '2011-01-05'], freq='D', name='idx') + self.assert_index_equal(result, expected) + self.assertEqual(result.freq, expected.freq) + self.assertEqual(result.freq, 'D') + + result = idx.take([7, 4, 1]) + expected = pd.PeriodIndex(['2011-01-08', '2011-01-05', + '2011-01-02'], + freq='D', name='idx') + self.assert_index_equal(result, expected) + self.assertEqual(result.freq, expected.freq) + self.assertEqual(result.freq, 'D') + + result = idx.take([3, 2, 5]) + expected = PeriodIndex(['2011-01-04', '2011-01-03', '2011-01-06'], + freq='D', name='idx') + self.assert_index_equal(result, expected) + self.assertEqual(result.freq, expected.freq) + self.assertEqual(result.freq, 'D') + + result = idx.take([-3, 2, 5]) + expected = PeriodIndex(['2011-01-29', '2011-01-03', '2011-01-06'], + freq='D', name='idx') + self.assert_index_equal(result, expected) + self.assertEqual(result.freq, expected.freq) + self.assertEqual(result.freq, 'D') + + def test_take_misc(self): + index = PeriodIndex(start='1/1/10', end='12/31/12', freq='D', + name='idx') + expected = PeriodIndex([datetime(2010, 1, 6), datetime(2010, 1, 7), + datetime(2010, 1, 9), datetime(2010, 1, 13)], + freq='D', name='idx') + + taken1 = index.take([5, 6, 8, 12]) + taken2 = index[[5, 6, 8, 12]] + + for taken in [taken1, taken2]: + tm.assert_index_equal(taken, expected) + tm.assertIsInstance(taken, PeriodIndex) + self.assertEqual(taken.freq, index.freq) + self.assertEqual(taken.name, expected.name) + + def test_take_fill_value(self): + # GH 12631 + idx = pd.PeriodIndex(['2011-01-01', '2011-02-01', '2011-03-01'], + name='xxx', freq='D') + result = idx.take(np.array([1, 0, -1])) + expected = pd.PeriodIndex(['2011-02-01', '2011-01-01', '2011-03-01'], + name='xxx', freq='D') + tm.assert_index_equal(result, expected) + + # fill_value + result = idx.take(np.array([1, 0, -1]), fill_value=True) + expected = pd.PeriodIndex(['2011-02-01', '2011-01-01', 'NaT'], + name='xxx', freq='D') + tm.assert_index_equal(result, expected) + + # allow_fill=False + result = idx.take(np.array([1, 0, -1]), allow_fill=False, + fill_value=True) + expected = pd.PeriodIndex(['2011-02-01', '2011-01-01', '2011-03-01'], + name='xxx', freq='D') + tm.assert_index_equal(result, expected) + + msg = ('When allow_fill=True and fill_value is not None, ' + 'all indices must be >= -1') + with tm.assertRaisesRegexp(ValueError, msg): + idx.take(np.array([1, 0, -2]), fill_value=True) + with tm.assertRaisesRegexp(ValueError, msg): + idx.take(np.array([1, 0, -5]), fill_value=True) + + with tm.assertRaises(IndexError): + idx.take(np.array([1, -5])) diff --git a/pandas/tests/indexes/period/test_ops.py b/pandas/tests/indexes/period/test_ops.py index 70759e8659c25..82a881d7c65bc 100644 --- a/pandas/tests/indexes/period/test_ops.py +++ b/pandas/tests/indexes/period/test_ops.py @@ -1,14 +1,12 @@ import numpy as np -from datetime import timedelta, datetime +from datetime import timedelta import pandas as pd import pandas.tslib as tslib import pandas.util.testing as tm import pandas.tseries.period as period -from pandas.compat import lrange from pandas import (DatetimeIndex, PeriodIndex, period_range, Series, Period, - _np_version_under1p10, Index, Timedelta, offsets, - _np_version_under1p9) + _np_version_under1p10, Index, Timedelta, offsets) from pandas.tests.test_base import Ops @@ -285,57 +283,6 @@ def test_resolution(self): idx = pd.period_range(start='2013-04-01', periods=30, freq=freq) self.assertEqual(idx.resolution, expected) - def test_union(self): - # union - rng1 = pd.period_range('1/1/2000', freq='D', periods=5) - other1 = pd.period_range('1/6/2000', freq='D', periods=5) - expected1 = pd.period_range('1/1/2000', freq='D', periods=10) - - rng2 = pd.period_range('1/1/2000', freq='D', periods=5) - other2 = pd.period_range('1/4/2000', freq='D', periods=5) - expected2 = pd.period_range('1/1/2000', freq='D', periods=8) - - rng3 = pd.period_range('1/1/2000', freq='D', periods=5) - other3 = pd.PeriodIndex([], freq='D') - expected3 = pd.period_range('1/1/2000', freq='D', periods=5) - - rng4 = pd.period_range('2000-01-01 09:00', freq='H', periods=5) - other4 = pd.period_range('2000-01-02 09:00', freq='H', periods=5) - expected4 = pd.PeriodIndex(['2000-01-01 09:00', '2000-01-01 10:00', - '2000-01-01 11:00', '2000-01-01 12:00', - '2000-01-01 13:00', '2000-01-02 09:00', - '2000-01-02 10:00', '2000-01-02 11:00', - '2000-01-02 12:00', '2000-01-02 13:00'], - freq='H') - - rng5 = pd.PeriodIndex(['2000-01-01 09:01', '2000-01-01 09:03', - '2000-01-01 09:05'], freq='T') - other5 = pd.PeriodIndex(['2000-01-01 09:01', '2000-01-01 09:05' - '2000-01-01 09:08'], - freq='T') - expected5 = pd.PeriodIndex(['2000-01-01 09:01', '2000-01-01 09:03', - '2000-01-01 09:05', '2000-01-01 09:08'], - freq='T') - - rng6 = pd.period_range('2000-01-01', freq='M', periods=7) - other6 = pd.period_range('2000-04-01', freq='M', periods=7) - expected6 = pd.period_range('2000-01-01', freq='M', periods=10) - - rng7 = pd.period_range('2003-01-01', freq='A', periods=5) - other7 = pd.period_range('1998-01-01', freq='A', periods=8) - expected7 = pd.period_range('1998-01-01', freq='A', periods=10) - - for rng, other, expected in [(rng1, other1, expected1), - (rng2, other2, expected2), - (rng3, other3, expected3), (rng4, other4, - expected4), - (rng5, other5, expected5), (rng6, other6, - expected6), - (rng7, other7, expected7)]: - - result_union = rng.union(other) - tm.assert_index_equal(result_union, expected) - def test_add_iadd(self): rng = pd.period_range('1/1/2000', freq='D', periods=5) other = pd.period_range('1/6/2000', freq='D', periods=5) @@ -432,48 +379,6 @@ def test_add_iadd(self): rng += 1 tm.assert_index_equal(rng, expected) - def test_difference(self): - # diff - rng1 = pd.period_range('1/1/2000', freq='D', periods=5) - other1 = pd.period_range('1/6/2000', freq='D', periods=5) - expected1 = pd.period_range('1/1/2000', freq='D', periods=5) - - rng2 = pd.period_range('1/1/2000', freq='D', periods=5) - other2 = pd.period_range('1/4/2000', freq='D', periods=5) - expected2 = pd.period_range('1/1/2000', freq='D', periods=3) - - rng3 = pd.period_range('1/1/2000', freq='D', periods=5) - other3 = pd.PeriodIndex([], freq='D') - expected3 = pd.period_range('1/1/2000', freq='D', periods=5) - - rng4 = pd.period_range('2000-01-01 09:00', freq='H', periods=5) - other4 = pd.period_range('2000-01-02 09:00', freq='H', periods=5) - expected4 = rng4 - - rng5 = pd.PeriodIndex(['2000-01-01 09:01', '2000-01-01 09:03', - '2000-01-01 09:05'], freq='T') - other5 = pd.PeriodIndex( - ['2000-01-01 09:01', '2000-01-01 09:05'], freq='T') - expected5 = pd.PeriodIndex(['2000-01-01 09:03'], freq='T') - - rng6 = pd.period_range('2000-01-01', freq='M', periods=7) - other6 = pd.period_range('2000-04-01', freq='M', periods=7) - expected6 = pd.period_range('2000-01-01', freq='M', periods=3) - - rng7 = pd.period_range('2003-01-01', freq='A', periods=5) - other7 = pd.period_range('1998-01-01', freq='A', periods=8) - expected7 = pd.period_range('2006-01-01', freq='A', periods=2) - - for rng, other, expected in [(rng1, other1, expected1), - (rng2, other2, expected2), - (rng3, other3, expected3), - (rng4, other4, expected4), - (rng5, other5, expected5), - (rng6, other6, expected6), - (rng7, other7, expected7), ]: - result_union = rng.difference(other) - tm.assert_index_equal(result_union, expected) - def test_sub(self): rng = period_range('2007-01', periods=50) @@ -833,98 +738,6 @@ def test_order(self): self.assert_numpy_array_equal(indexer, exp, check_dtype=False) self.assertEqual(ordered.freq, 'D') - def test_getitem(self): - idx1 = pd.period_range('2011-01-01', '2011-01-31', freq='D', - name='idx') - - for idx in [idx1]: - result = idx[0] - self.assertEqual(result, pd.Period('2011-01-01', freq='D')) - - result = idx[-1] - self.assertEqual(result, pd.Period('2011-01-31', freq='D')) - - result = idx[0:5] - expected = pd.period_range('2011-01-01', '2011-01-05', freq='D', - name='idx') - self.assert_index_equal(result, expected) - self.assertEqual(result.freq, expected.freq) - self.assertEqual(result.freq, 'D') - - result = idx[0:10:2] - expected = pd.PeriodIndex(['2011-01-01', '2011-01-03', - '2011-01-05', - '2011-01-07', '2011-01-09'], - freq='D', name='idx') - self.assert_index_equal(result, expected) - self.assertEqual(result.freq, expected.freq) - self.assertEqual(result.freq, 'D') - - result = idx[-20:-5:3] - expected = pd.PeriodIndex(['2011-01-12', '2011-01-15', - '2011-01-18', - '2011-01-21', '2011-01-24'], - freq='D', name='idx') - self.assert_index_equal(result, expected) - self.assertEqual(result.freq, expected.freq) - self.assertEqual(result.freq, 'D') - - result = idx[4::-1] - expected = PeriodIndex(['2011-01-05', '2011-01-04', '2011-01-03', - '2011-01-02', '2011-01-01'], - freq='D', name='idx') - self.assert_index_equal(result, expected) - self.assertEqual(result.freq, expected.freq) - self.assertEqual(result.freq, 'D') - - def test_take(self): - # GH 10295 - idx1 = pd.period_range('2011-01-01', '2011-01-31', freq='D', - name='idx') - - for idx in [idx1]: - result = idx.take([0]) - self.assertEqual(result, pd.Period('2011-01-01', freq='D')) - - result = idx.take([5]) - self.assertEqual(result, pd.Period('2011-01-06', freq='D')) - - result = idx.take([0, 1, 2]) - expected = pd.period_range('2011-01-01', '2011-01-03', freq='D', - name='idx') - self.assert_index_equal(result, expected) - self.assertEqual(result.freq, 'D') - self.assertEqual(result.freq, expected.freq) - - result = idx.take([0, 2, 4]) - expected = pd.PeriodIndex(['2011-01-01', '2011-01-03', - '2011-01-05'], freq='D', name='idx') - self.assert_index_equal(result, expected) - self.assertEqual(result.freq, expected.freq) - self.assertEqual(result.freq, 'D') - - result = idx.take([7, 4, 1]) - expected = pd.PeriodIndex(['2011-01-08', '2011-01-05', - '2011-01-02'], - freq='D', name='idx') - self.assert_index_equal(result, expected) - self.assertEqual(result.freq, expected.freq) - self.assertEqual(result.freq, 'D') - - result = idx.take([3, 2, 5]) - expected = PeriodIndex(['2011-01-04', '2011-01-03', '2011-01-06'], - freq='D', name='idx') - self.assert_index_equal(result, expected) - self.assertEqual(result.freq, expected.freq) - self.assertEqual(result.freq, 'D') - - result = idx.take([-3, 2, 5]) - expected = PeriodIndex(['2011-01-29', '2011-01-03', '2011-01-06'], - freq='D', name='idx') - self.assert_index_equal(result, expected) - self.assertEqual(result.freq, expected.freq) - self.assertEqual(result.freq, 'D') - def test_nat_new(self): idx = pd.period_range('2011-01', freq='M', periods=5, name='x') @@ -1350,6 +1163,9 @@ def test_ops_series_period(self): tm.assert_series_equal(s2 - s, exp) tm.assert_series_equal(s - s2, -exp) + +class TestFramePeriod(tm.TestCase): + def test_ops_frame_period(self): # GH 13043 df = pd.DataFrame({'A': [pd.Period('2015-01', freq='M'), @@ -1379,303 +1195,7 @@ def test_ops_frame_period(self): tm.assert_frame_equal(df - df2, -exp) -class TestPeriodIndex(tm.TestCase): - - def setUp(self): - pass - - def test_getitem_index(self): - idx = period_range('2007-01', periods=10, freq='M', name='x') - - result = idx[[1, 3, 5]] - exp = pd.PeriodIndex(['2007-02', '2007-04', '2007-06'], - freq='M', name='x') - tm.assert_index_equal(result, exp) - - result = idx[[True, True, False, False, False, - True, True, False, False, False]] - exp = pd.PeriodIndex(['2007-01', '2007-02', '2007-06', '2007-07'], - freq='M', name='x') - tm.assert_index_equal(result, exp) - - def test_getitem_partial(self): - rng = period_range('2007-01', periods=50, freq='M') - ts = Series(np.random.randn(len(rng)), rng) - - self.assertRaises(KeyError, ts.__getitem__, '2006') - - result = ts['2008'] - self.assertTrue((result.index.year == 2008).all()) - - result = ts['2008':'2009'] - self.assertEqual(len(result), 24) - - result = ts['2008-1':'2009-12'] - self.assertEqual(len(result), 24) - - result = ts['2008Q1':'2009Q4'] - self.assertEqual(len(result), 24) - - result = ts[:'2009'] - self.assertEqual(len(result), 36) - - result = ts['2009':] - self.assertEqual(len(result), 50 - 24) - - exp = result - result = ts[24:] - tm.assert_series_equal(exp, result) - - ts = ts[10:].append(ts[10:]) - self.assertRaisesRegexp(KeyError, - "left slice bound for non-unique " - "label: '2008'", - ts.__getitem__, slice('2008', '2009')) - - def test_getitem_datetime(self): - rng = period_range(start='2012-01-01', periods=10, freq='W-MON') - ts = Series(lrange(len(rng)), index=rng) - - dt1 = datetime(2011, 10, 2) - dt4 = datetime(2012, 4, 20) - - rs = ts[dt1:dt4] - tm.assert_series_equal(rs, ts) - - def test_getitem_nat(self): - idx = pd.PeriodIndex(['2011-01', 'NaT', '2011-02'], freq='M') - self.assertEqual(idx[0], pd.Period('2011-01', freq='M')) - self.assertIs(idx[1], tslib.NaT) - - s = pd.Series([0, 1, 2], index=idx) - self.assertEqual(s[pd.NaT], 1) - - s = pd.Series(idx, index=idx) - self.assertEqual(s[pd.Period('2011-01', freq='M')], - pd.Period('2011-01', freq='M')) - self.assertIs(s[pd.NaT], tslib.NaT) - - def test_getitem_list_periods(self): - # GH 7710 - rng = period_range(start='2012-01-01', periods=10, freq='D') - ts = Series(lrange(len(rng)), index=rng) - exp = ts.iloc[[1]] - tm.assert_series_equal(ts[[Period('2012-01-02', freq='D')]], exp) - - def test_getitem_seconds(self): - # GH 6716 - didx = DatetimeIndex(start='2013/01/01 09:00:00', freq='S', - periods=4000) - pidx = PeriodIndex(start='2013/01/01 09:00:00', freq='S', periods=4000) - - for idx in [didx, pidx]: - # getitem against index should raise ValueError - values = ['2014', '2013/02', '2013/01/02', '2013/02/01 9H', - '2013/02/01 09:00'] - for v in values: - if _np_version_under1p9: - with tm.assertRaises(ValueError): - idx[v] - else: - # GH7116 - # these show deprecations as we are trying - # to slice with non-integer indexers - # with tm.assertRaises(IndexError): - # idx[v] - continue - - s = Series(np.random.rand(len(idx)), index=idx) - tm.assert_series_equal(s['2013/01/01 10:00'], s[3600:3660]) - tm.assert_series_equal(s['2013/01/01 9H'], s[:3600]) - for d in ['2013/01/01', '2013/01', '2013']: - tm.assert_series_equal(s[d], s) - - def test_getitem_day(self): - # GH 6716 - # Confirm DatetimeIndex and PeriodIndex works identically - didx = DatetimeIndex(start='2013/01/01', freq='D', periods=400) - pidx = PeriodIndex(start='2013/01/01', freq='D', periods=400) - - for idx in [didx, pidx]: - # getitem against index should raise ValueError - values = ['2014', '2013/02', '2013/01/02', '2013/02/01 9H', - '2013/02/01 09:00'] - for v in values: - - if _np_version_under1p9: - with tm.assertRaises(ValueError): - idx[v] - else: - # GH7116 - # these show deprecations as we are trying - # to slice with non-integer indexers - # with tm.assertRaises(IndexError): - # idx[v] - continue - - s = Series(np.random.rand(len(idx)), index=idx) - tm.assert_series_equal(s['2013/01'], s[0:31]) - tm.assert_series_equal(s['2013/02'], s[31:59]) - tm.assert_series_equal(s['2014'], s[365:]) - - invalid = ['2013/02/01 9H', '2013/02/01 09:00'] - for v in invalid: - with tm.assertRaises(KeyError): - s[v] - - def test_take(self): - index = PeriodIndex(start='1/1/10', end='12/31/12', freq='D', - name='idx') - expected = PeriodIndex([datetime(2010, 1, 6), datetime(2010, 1, 7), - datetime(2010, 1, 9), datetime(2010, 1, 13)], - freq='D', name='idx') - - taken1 = index.take([5, 6, 8, 12]) - taken2 = index[[5, 6, 8, 12]] - - for taken in [taken1, taken2]: - tm.assert_index_equal(taken, expected) - tm.assertIsInstance(taken, PeriodIndex) - self.assertEqual(taken.freq, index.freq) - self.assertEqual(taken.name, expected.name) - - def test_take_fill_value(self): - # GH 12631 - idx = pd.PeriodIndex(['2011-01-01', '2011-02-01', '2011-03-01'], - name='xxx', freq='D') - result = idx.take(np.array([1, 0, -1])) - expected = pd.PeriodIndex(['2011-02-01', '2011-01-01', '2011-03-01'], - name='xxx', freq='D') - tm.assert_index_equal(result, expected) - - # fill_value - result = idx.take(np.array([1, 0, -1]), fill_value=True) - expected = pd.PeriodIndex(['2011-02-01', '2011-01-01', 'NaT'], - name='xxx', freq='D') - tm.assert_index_equal(result, expected) - - # allow_fill=False - result = idx.take(np.array([1, 0, -1]), allow_fill=False, - fill_value=True) - expected = pd.PeriodIndex(['2011-02-01', '2011-01-01', '2011-03-01'], - name='xxx', freq='D') - tm.assert_index_equal(result, expected) - - msg = ('When allow_fill=True and fill_value is not None, ' - 'all indices must be >= -1') - with tm.assertRaisesRegexp(ValueError, msg): - idx.take(np.array([1, 0, -2]), fill_value=True) - with tm.assertRaisesRegexp(ValueError, msg): - idx.take(np.array([1, 0, -5]), fill_value=True) - - with tm.assertRaises(IndexError): - idx.take(np.array([1, -5])) - - def test_get_loc_msg(self): - idx = period_range('2000-1-1', freq='A', periods=10) - bad_period = Period('2012', 'A') - self.assertRaises(KeyError, idx.get_loc, bad_period) - - try: - idx.get_loc(bad_period) - except KeyError as inst: - self.assertEqual(inst.args[0], bad_period) - - def test_get_loc_nat(self): - didx = DatetimeIndex(['2011-01-01', 'NaT', '2011-01-03']) - pidx = PeriodIndex(['2011-01-01', 'NaT', '2011-01-03'], freq='M') - - # check DatetimeIndex compat - for idx in [didx, pidx]: - self.assertEqual(idx.get_loc(pd.NaT), 1) - self.assertEqual(idx.get_loc(None), 1) - self.assertEqual(idx.get_loc(float('nan')), 1) - self.assertEqual(idx.get_loc(np.nan), 1) - - -class TestComparisons(tm.TestCase): - - def setUp(self): - self.january1 = Period('2000-01', 'M') - self.january2 = Period('2000-01', 'M') - self.february = Period('2000-02', 'M') - self.march = Period('2000-03', 'M') - self.day = Period('2012-01-01', 'D') - - def test_equal(self): - self.assertEqual(self.january1, self.january2) - - def test_equal_Raises_Value(self): - with tm.assertRaises(period.IncompatibleFrequency): - self.january1 == self.day - - def test_notEqual(self): - self.assertNotEqual(self.january1, 1) - self.assertNotEqual(self.january1, self.february) - - def test_greater(self): - self.assertTrue(self.february > self.january1) - - def test_greater_Raises_Value(self): - with tm.assertRaises(period.IncompatibleFrequency): - self.january1 > self.day - - def test_greater_Raises_Type(self): - with tm.assertRaises(TypeError): - self.january1 > 1 - - def test_greaterEqual(self): - self.assertTrue(self.january1 >= self.january2) - - def test_greaterEqual_Raises_Value(self): - with tm.assertRaises(period.IncompatibleFrequency): - self.january1 >= self.day - - with tm.assertRaises(TypeError): - print(self.january1 >= 1) - - def test_smallerEqual(self): - self.assertTrue(self.january1 <= self.january2) - - def test_smallerEqual_Raises_Value(self): - with tm.assertRaises(period.IncompatibleFrequency): - self.january1 <= self.day - - def test_smallerEqual_Raises_Type(self): - with tm.assertRaises(TypeError): - self.january1 <= 1 - - def test_smaller(self): - self.assertTrue(self.january1 < self.february) - - def test_smaller_Raises_Value(self): - with tm.assertRaises(period.IncompatibleFrequency): - self.january1 < self.day - - def test_smaller_Raises_Type(self): - with tm.assertRaises(TypeError): - self.january1 < 1 - - def test_sort(self): - periods = [self.march, self.january1, self.february] - correctPeriods = [self.january1, self.february, self.march] - self.assertEqual(sorted(periods), correctPeriods) - - def test_period_nat_comp(self): - p_nat = Period('NaT', freq='D') - p = Period('2011-01-01', freq='D') - - nat = pd.Timestamp('NaT') - t = pd.Timestamp('2011-01-01') - # confirm Period('NaT') work identical with Timestamp('NaT') - for left, right in [(p_nat, p), (p, p_nat), (p_nat, p_nat), (nat, t), - (t, nat), (nat, nat)]: - self.assertEqual(left < right, False) - self.assertEqual(left > right, False) - self.assertEqual(left == right, False) - self.assertEqual(left != right, True) - self.assertEqual(left <= right, False) - self.assertEqual(left >= right, False) +class TestPeriodIndexComparisons(tm.TestCase): def test_pi_pi_comp(self): diff --git a/pandas/tests/indexes/period/test_setops.py b/pandas/tests/indexes/period/test_setops.py index 06e15f9175ed8..d4f06bae8bc32 100644 --- a/pandas/tests/indexes/period/test_setops.py +++ b/pandas/tests/indexes/period/test_setops.py @@ -43,6 +43,57 @@ def test_join_does_not_recur(self): tm.assert_index_equal(res, expected) def test_union(self): + # union + rng1 = pd.period_range('1/1/2000', freq='D', periods=5) + other1 = pd.period_range('1/6/2000', freq='D', periods=5) + expected1 = pd.period_range('1/1/2000', freq='D', periods=10) + + rng2 = pd.period_range('1/1/2000', freq='D', periods=5) + other2 = pd.period_range('1/4/2000', freq='D', periods=5) + expected2 = pd.period_range('1/1/2000', freq='D', periods=8) + + rng3 = pd.period_range('1/1/2000', freq='D', periods=5) + other3 = pd.PeriodIndex([], freq='D') + expected3 = pd.period_range('1/1/2000', freq='D', periods=5) + + rng4 = pd.period_range('2000-01-01 09:00', freq='H', periods=5) + other4 = pd.period_range('2000-01-02 09:00', freq='H', periods=5) + expected4 = pd.PeriodIndex(['2000-01-01 09:00', '2000-01-01 10:00', + '2000-01-01 11:00', '2000-01-01 12:00', + '2000-01-01 13:00', '2000-01-02 09:00', + '2000-01-02 10:00', '2000-01-02 11:00', + '2000-01-02 12:00', '2000-01-02 13:00'], + freq='H') + + rng5 = pd.PeriodIndex(['2000-01-01 09:01', '2000-01-01 09:03', + '2000-01-01 09:05'], freq='T') + other5 = pd.PeriodIndex(['2000-01-01 09:01', '2000-01-01 09:05' + '2000-01-01 09:08'], + freq='T') + expected5 = pd.PeriodIndex(['2000-01-01 09:01', '2000-01-01 09:03', + '2000-01-01 09:05', '2000-01-01 09:08'], + freq='T') + + rng6 = pd.period_range('2000-01-01', freq='M', periods=7) + other6 = pd.period_range('2000-04-01', freq='M', periods=7) + expected6 = pd.period_range('2000-01-01', freq='M', periods=10) + + rng7 = pd.period_range('2003-01-01', freq='A', periods=5) + other7 = pd.period_range('1998-01-01', freq='A', periods=8) + expected7 = pd.period_range('1998-01-01', freq='A', periods=10) + + for rng, other, expected in [(rng1, other1, expected1), + (rng2, other2, expected2), + (rng3, other3, expected3), (rng4, other4, + expected4), + (rng5, other5, expected5), (rng6, other6, + expected6), + (rng7, other7, expected7)]: + + result_union = rng.union(other) + tm.assert_index_equal(result_union, expected) + + def test_union_misc(self): index = period_range('1/1/2000', '1/20/2000', freq='D') result = index[:-5].union(index[10:]) @@ -155,3 +206,45 @@ def test_intersection_cases(self): result = rng.intersection(rng[0:0]) self.assertEqual(len(result), 0) + + def test_difference(self): + # diff + rng1 = pd.period_range('1/1/2000', freq='D', periods=5) + other1 = pd.period_range('1/6/2000', freq='D', periods=5) + expected1 = pd.period_range('1/1/2000', freq='D', periods=5) + + rng2 = pd.period_range('1/1/2000', freq='D', periods=5) + other2 = pd.period_range('1/4/2000', freq='D', periods=5) + expected2 = pd.period_range('1/1/2000', freq='D', periods=3) + + rng3 = pd.period_range('1/1/2000', freq='D', periods=5) + other3 = pd.PeriodIndex([], freq='D') + expected3 = pd.period_range('1/1/2000', freq='D', periods=5) + + rng4 = pd.period_range('2000-01-01 09:00', freq='H', periods=5) + other4 = pd.period_range('2000-01-02 09:00', freq='H', periods=5) + expected4 = rng4 + + rng5 = pd.PeriodIndex(['2000-01-01 09:01', '2000-01-01 09:03', + '2000-01-01 09:05'], freq='T') + other5 = pd.PeriodIndex( + ['2000-01-01 09:01', '2000-01-01 09:05'], freq='T') + expected5 = pd.PeriodIndex(['2000-01-01 09:03'], freq='T') + + rng6 = pd.period_range('2000-01-01', freq='M', periods=7) + other6 = pd.period_range('2000-04-01', freq='M', periods=7) + expected6 = pd.period_range('2000-01-01', freq='M', periods=3) + + rng7 = pd.period_range('2003-01-01', freq='A', periods=5) + other7 = pd.period_range('1998-01-01', freq='A', periods=8) + expected7 = pd.period_range('2006-01-01', freq='A', periods=2) + + for rng, other, expected in [(rng1, other1, expected1), + (rng2, other2, expected2), + (rng3, other3, expected3), + (rng4, other4, expected4), + (rng5, other5, expected5), + (rng6, other6, expected6), + (rng7, other7, expected7), ]: + result_union = rng.difference(other) + tm.assert_index_equal(result_union, expected) diff --git a/pandas/tests/scalar/test_period.py b/pandas/tests/scalar/test_period.py index c94a7c62a6dc9..ffe00a4a62a0a 100644 --- a/pandas/tests/scalar/test_period.py +++ b/pandas/tests/scalar/test_period.py @@ -7,12 +7,28 @@ from pandas.compat import text_type, iteritems from pandas.compat.numpy import np_datetime64_compat from pandas import Period, Timestamp, tslib, offsets, _period -from pandas.tseries.frequencies import DAYS, MONTHS, _period_code_map +from pandas.tseries.frequencies import DAYS, MONTHS class TestPeriodProperties(tm.TestCase): "Test properties such as year, month, weekday, etc...." + def test_is_leap_year(self): + # GH 13727 + for freq in ['A', 'M', 'D', 'H']: + p = Period('2000-01-01 00:00:00', freq=freq) + self.assertTrue(p.is_leap_year) + self.assertIsInstance(p.is_leap_year, bool) + + p = Period('1999-01-01 00:00:00', freq=freq) + self.assertFalse(p.is_leap_year) + + p = Period('2004-01-01 00:00:00', freq=freq) + self.assertTrue(p.is_leap_year) + + p = Period('2100-01-01 00:00:00', freq=freq) + self.assertFalse(p.is_leap_year) + def test_quarterly_negative_ordinals(self): p = Period(ordinal=-1, freq='Q-DEC') self.assertEqual(p.year, 1969) @@ -273,7 +289,7 @@ def test_timestamp_mult(self): self.assertEqual(p.to_timestamp(how='S'), pd.Timestamp('2011-01-01')) self.assertEqual(p.to_timestamp(how='E'), pd.Timestamp('2011-03-31')) - def test_period_constructor(self): + def test_construction(self): i1 = Period('1/1/2005', freq='M') i2 = Period('Jan 2005') @@ -299,6 +315,41 @@ def test_period_constructor(self): self.assertEqual(i1, i2) self.assertEqual(i1, i3) + i1 = Period('1982', freq='min') + i2 = Period('1982', freq='MIN') + self.assertEqual(i1, i2) + i2 = Period('1982', freq=('Min', 1)) + self.assertEqual(i1, i2) + + i1 = Period(year=2005, month=3, day=1, freq='D') + i2 = Period('3/1/2005', freq='D') + self.assertEqual(i1, i2) + + i3 = Period(year=2005, month=3, day=1, freq='d') + self.assertEqual(i1, i3) + + i1 = Period('2007-01-01 09:00:00.001') + expected = Period(datetime(2007, 1, 1, 9, 0, 0, 1000), freq='L') + self.assertEqual(i1, expected) + + expected = Period(np_datetime64_compat( + '2007-01-01 09:00:00.001Z'), freq='L') + self.assertEqual(i1, expected) + + i1 = Period('2007-01-01 09:00:00.00101') + expected = Period(datetime(2007, 1, 1, 9, 0, 0, 1010), freq='U') + self.assertEqual(i1, expected) + + expected = Period(np_datetime64_compat('2007-01-01 09:00:00.00101Z'), + freq='U') + self.assertEqual(i1, expected) + + self.assertRaises(ValueError, Period, ordinal=200701) + + self.assertRaises(ValueError, Period, '2007-1-1', freq='X') + + def test_construction_bday(self): + # Biz day construction, roll forward if non-weekday i1 = Period('3/10/12', freq='B') i2 = Period('3/10/12', freq='D') @@ -311,6 +362,12 @@ def test_period_constructor(self): i3 = Period('3/10/12', freq='b') self.assertEqual(i1, i3) + i1 = Period(year=2012, month=3, day=10, freq='B') + i2 = Period('3/12/12', freq='B') + self.assertEqual(i1, i2) + + def test_construction_quarter(self): + i1 = Period(year=2005, quarter=1, freq='Q') i2 = Period('1/1/2005', freq='Q') self.assertEqual(i1, i2) @@ -319,17 +376,6 @@ def test_period_constructor(self): i2 = Period('9/1/2005', freq='Q') self.assertEqual(i1, i2) - i1 = Period(year=2005, month=3, day=1, freq='D') - i2 = Period('3/1/2005', freq='D') - self.assertEqual(i1, i2) - - i3 = Period(year=2005, month=3, day=1, freq='d') - self.assertEqual(i1, i3) - - i1 = Period(year=2012, month=3, day=10, freq='B') - i2 = Period('3/12/12', freq='B') - self.assertEqual(i1, i2) - i1 = Period('2005Q1') i2 = Period(year=2005, quarter=1, freq='Q') i3 = Period('2005q1') @@ -356,11 +402,7 @@ def test_period_constructor(self): lower = Period('4q1984') self.assertEqual(i1, lower) - i1 = Period('1982', freq='min') - i2 = Period('1982', freq='MIN') - self.assertEqual(i1, i2) - i2 = Period('1982', freq=('Min', 1)) - self.assertEqual(i1, i2) + def test_construction_month(self): expected = Period('2007-01', freq='M') i1 = Period('200701', freq='M') @@ -389,26 +431,6 @@ def test_period_constructor(self): self.assertEqual(i1, i4) self.assertEqual(i1, i5) - i1 = Period('2007-01-01 09:00:00.001') - expected = Period(datetime(2007, 1, 1, 9, 0, 0, 1000), freq='L') - self.assertEqual(i1, expected) - - expected = Period(np_datetime64_compat( - '2007-01-01 09:00:00.001Z'), freq='L') - self.assertEqual(i1, expected) - - i1 = Period('2007-01-01 09:00:00.00101') - expected = Period(datetime(2007, 1, 1, 9, 0, 0, 1010), freq='U') - self.assertEqual(i1, expected) - - expected = Period(np_datetime64_compat('2007-01-01 09:00:00.00101Z'), - freq='U') - self.assertEqual(i1, expected) - - self.assertRaises(ValueError, Period, ordinal=200701) - - self.assertRaises(ValueError, Period, '2007-1-1', freq='X') - def test_period_constructor_offsets(self): self.assertEqual(Period('1/1/2005', freq=offsets.MonthEnd()), Period('1/1/2005', freq='M')) @@ -894,21 +916,6 @@ def test_constructor_infer_freq(self): p = Period('2007-01-01 07:10:15.123400') self.assertEqual(p.freq, 'U') - def test_asfreq_MS(self): - initial = Period("2013") - - self.assertEqual(initial.asfreq(freq="M", how="S"), - Period('2013-01', 'M')) - - msg = pd.tseries.frequencies._INVALID_FREQ_ERROR - with self.assertRaisesRegexp(ValueError, msg): - initial.asfreq(freq="MS", how="S") - - with tm.assertRaisesRegexp(ValueError, msg): - pd.Period('2013-01', 'MS') - - self.assertTrue(_period_code_map.get("MS") is None) - def test_badinput(self): self.assertRaises(ValueError, Period, '-2000', 'A') self.assertRaises(tslib.DateParseError, Period, '0', 'A') @@ -945,722 +952,89 @@ def test_get_period_field_array_raises_on_out_of_range(self): np.empty(1), 0) -class TestFreqConversion(tm.TestCase): - "Test frequency conversion of date objects" - - def test_asfreq_corner(self): - val = Period(freq='A', year=2007) - result1 = val.asfreq('5t') - result2 = val.asfreq('t') - expected = Period('2007-12-31 23:59', freq='t') - self.assertEqual(result1.ordinal, expected.ordinal) - self.assertEqual(result1.freqstr, '5T') - self.assertEqual(result2.ordinal, expected.ordinal) - self.assertEqual(result2.freqstr, 'T') - - def test_conv_annual(self): - # frequency conversion tests: from Annual Frequency - - ival_A = Period(freq='A', year=2007) - - ival_AJAN = Period(freq="A-JAN", year=2007) - ival_AJUN = Period(freq="A-JUN", year=2007) - ival_ANOV = Period(freq="A-NOV", year=2007) - - ival_A_to_Q_start = Period(freq='Q', year=2007, quarter=1) - ival_A_to_Q_end = Period(freq='Q', year=2007, quarter=4) - ival_A_to_M_start = Period(freq='M', year=2007, month=1) - ival_A_to_M_end = Period(freq='M', year=2007, month=12) - ival_A_to_W_start = Period(freq='W', year=2007, month=1, day=1) - ival_A_to_W_end = Period(freq='W', year=2007, month=12, day=31) - ival_A_to_B_start = Period(freq='B', year=2007, month=1, day=1) - ival_A_to_B_end = Period(freq='B', year=2007, month=12, day=31) - ival_A_to_D_start = Period(freq='D', year=2007, month=1, day=1) - ival_A_to_D_end = Period(freq='D', year=2007, month=12, day=31) - ival_A_to_H_start = Period(freq='H', year=2007, month=1, day=1, hour=0) - ival_A_to_H_end = Period(freq='H', year=2007, month=12, day=31, - hour=23) - ival_A_to_T_start = Period(freq='Min', year=2007, month=1, day=1, - hour=0, minute=0) - ival_A_to_T_end = Period(freq='Min', year=2007, month=12, day=31, - hour=23, minute=59) - ival_A_to_S_start = Period(freq='S', year=2007, month=1, day=1, hour=0, - minute=0, second=0) - ival_A_to_S_end = Period(freq='S', year=2007, month=12, day=31, - hour=23, minute=59, second=59) - - ival_AJAN_to_D_end = Period(freq='D', year=2007, month=1, day=31) - ival_AJAN_to_D_start = Period(freq='D', year=2006, month=2, day=1) - ival_AJUN_to_D_end = Period(freq='D', year=2007, month=6, day=30) - ival_AJUN_to_D_start = Period(freq='D', year=2006, month=7, day=1) - ival_ANOV_to_D_end = Period(freq='D', year=2007, month=11, day=30) - ival_ANOV_to_D_start = Period(freq='D', year=2006, month=12, day=1) - - self.assertEqual(ival_A.asfreq('Q', 'S'), ival_A_to_Q_start) - self.assertEqual(ival_A.asfreq('Q', 'e'), ival_A_to_Q_end) - self.assertEqual(ival_A.asfreq('M', 's'), ival_A_to_M_start) - self.assertEqual(ival_A.asfreq('M', 'E'), ival_A_to_M_end) - self.assertEqual(ival_A.asfreq('W', 'S'), ival_A_to_W_start) - self.assertEqual(ival_A.asfreq('W', 'E'), ival_A_to_W_end) - self.assertEqual(ival_A.asfreq('B', 'S'), ival_A_to_B_start) - self.assertEqual(ival_A.asfreq('B', 'E'), ival_A_to_B_end) - self.assertEqual(ival_A.asfreq('D', 'S'), ival_A_to_D_start) - self.assertEqual(ival_A.asfreq('D', 'E'), ival_A_to_D_end) - self.assertEqual(ival_A.asfreq('H', 'S'), ival_A_to_H_start) - self.assertEqual(ival_A.asfreq('H', 'E'), ival_A_to_H_end) - self.assertEqual(ival_A.asfreq('min', 'S'), ival_A_to_T_start) - self.assertEqual(ival_A.asfreq('min', 'E'), ival_A_to_T_end) - self.assertEqual(ival_A.asfreq('T', 'S'), ival_A_to_T_start) - self.assertEqual(ival_A.asfreq('T', 'E'), ival_A_to_T_end) - self.assertEqual(ival_A.asfreq('S', 'S'), ival_A_to_S_start) - self.assertEqual(ival_A.asfreq('S', 'E'), ival_A_to_S_end) - - self.assertEqual(ival_AJAN.asfreq('D', 'S'), ival_AJAN_to_D_start) - self.assertEqual(ival_AJAN.asfreq('D', 'E'), ival_AJAN_to_D_end) - - self.assertEqual(ival_AJUN.asfreq('D', 'S'), ival_AJUN_to_D_start) - self.assertEqual(ival_AJUN.asfreq('D', 'E'), ival_AJUN_to_D_end) - - self.assertEqual(ival_ANOV.asfreq('D', 'S'), ival_ANOV_to_D_start) - self.assertEqual(ival_ANOV.asfreq('D', 'E'), ival_ANOV_to_D_end) - - self.assertEqual(ival_A.asfreq('A'), ival_A) - - def test_conv_quarterly(self): - # frequency conversion tests: from Quarterly Frequency - - ival_Q = Period(freq='Q', year=2007, quarter=1) - ival_Q_end_of_year = Period(freq='Q', year=2007, quarter=4) - - ival_QEJAN = Period(freq="Q-JAN", year=2007, quarter=1) - ival_QEJUN = Period(freq="Q-JUN", year=2007, quarter=1) - - ival_Q_to_A = Period(freq='A', year=2007) - ival_Q_to_M_start = Period(freq='M', year=2007, month=1) - ival_Q_to_M_end = Period(freq='M', year=2007, month=3) - ival_Q_to_W_start = Period(freq='W', year=2007, month=1, day=1) - ival_Q_to_W_end = Period(freq='W', year=2007, month=3, day=31) - ival_Q_to_B_start = Period(freq='B', year=2007, month=1, day=1) - ival_Q_to_B_end = Period(freq='B', year=2007, month=3, day=30) - ival_Q_to_D_start = Period(freq='D', year=2007, month=1, day=1) - ival_Q_to_D_end = Period(freq='D', year=2007, month=3, day=31) - ival_Q_to_H_start = Period(freq='H', year=2007, month=1, day=1, hour=0) - ival_Q_to_H_end = Period(freq='H', year=2007, month=3, day=31, hour=23) - ival_Q_to_T_start = Period(freq='Min', year=2007, month=1, day=1, - hour=0, minute=0) - ival_Q_to_T_end = Period(freq='Min', year=2007, month=3, day=31, - hour=23, minute=59) - ival_Q_to_S_start = Period(freq='S', year=2007, month=1, day=1, hour=0, - minute=0, second=0) - ival_Q_to_S_end = Period(freq='S', year=2007, month=3, day=31, hour=23, - minute=59, second=59) - - ival_QEJAN_to_D_start = Period(freq='D', year=2006, month=2, day=1) - ival_QEJAN_to_D_end = Period(freq='D', year=2006, month=4, day=30) - - ival_QEJUN_to_D_start = Period(freq='D', year=2006, month=7, day=1) - ival_QEJUN_to_D_end = Period(freq='D', year=2006, month=9, day=30) - - self.assertEqual(ival_Q.asfreq('A'), ival_Q_to_A) - self.assertEqual(ival_Q_end_of_year.asfreq('A'), ival_Q_to_A) - - self.assertEqual(ival_Q.asfreq('M', 'S'), ival_Q_to_M_start) - self.assertEqual(ival_Q.asfreq('M', 'E'), ival_Q_to_M_end) - self.assertEqual(ival_Q.asfreq('W', 'S'), ival_Q_to_W_start) - self.assertEqual(ival_Q.asfreq('W', 'E'), ival_Q_to_W_end) - self.assertEqual(ival_Q.asfreq('B', 'S'), ival_Q_to_B_start) - self.assertEqual(ival_Q.asfreq('B', 'E'), ival_Q_to_B_end) - self.assertEqual(ival_Q.asfreq('D', 'S'), ival_Q_to_D_start) - self.assertEqual(ival_Q.asfreq('D', 'E'), ival_Q_to_D_end) - self.assertEqual(ival_Q.asfreq('H', 'S'), ival_Q_to_H_start) - self.assertEqual(ival_Q.asfreq('H', 'E'), ival_Q_to_H_end) - self.assertEqual(ival_Q.asfreq('Min', 'S'), ival_Q_to_T_start) - self.assertEqual(ival_Q.asfreq('Min', 'E'), ival_Q_to_T_end) - self.assertEqual(ival_Q.asfreq('S', 'S'), ival_Q_to_S_start) - self.assertEqual(ival_Q.asfreq('S', 'E'), ival_Q_to_S_end) - - self.assertEqual(ival_QEJAN.asfreq('D', 'S'), ival_QEJAN_to_D_start) - self.assertEqual(ival_QEJAN.asfreq('D', 'E'), ival_QEJAN_to_D_end) - self.assertEqual(ival_QEJUN.asfreq('D', 'S'), ival_QEJUN_to_D_start) - self.assertEqual(ival_QEJUN.asfreq('D', 'E'), ival_QEJUN_to_D_end) - - self.assertEqual(ival_Q.asfreq('Q'), ival_Q) - - def test_conv_monthly(self): - # frequency conversion tests: from Monthly Frequency - - ival_M = Period(freq='M', year=2007, month=1) - ival_M_end_of_year = Period(freq='M', year=2007, month=12) - ival_M_end_of_quarter = Period(freq='M', year=2007, month=3) - ival_M_to_A = Period(freq='A', year=2007) - ival_M_to_Q = Period(freq='Q', year=2007, quarter=1) - ival_M_to_W_start = Period(freq='W', year=2007, month=1, day=1) - ival_M_to_W_end = Period(freq='W', year=2007, month=1, day=31) - ival_M_to_B_start = Period(freq='B', year=2007, month=1, day=1) - ival_M_to_B_end = Period(freq='B', year=2007, month=1, day=31) - ival_M_to_D_start = Period(freq='D', year=2007, month=1, day=1) - ival_M_to_D_end = Period(freq='D', year=2007, month=1, day=31) - ival_M_to_H_start = Period(freq='H', year=2007, month=1, day=1, hour=0) - ival_M_to_H_end = Period(freq='H', year=2007, month=1, day=31, hour=23) - ival_M_to_T_start = Period(freq='Min', year=2007, month=1, day=1, - hour=0, minute=0) - ival_M_to_T_end = Period(freq='Min', year=2007, month=1, day=31, - hour=23, minute=59) - ival_M_to_S_start = Period(freq='S', year=2007, month=1, day=1, hour=0, - minute=0, second=0) - ival_M_to_S_end = Period(freq='S', year=2007, month=1, day=31, hour=23, - minute=59, second=59) - - self.assertEqual(ival_M.asfreq('A'), ival_M_to_A) - self.assertEqual(ival_M_end_of_year.asfreq('A'), ival_M_to_A) - self.assertEqual(ival_M.asfreq('Q'), ival_M_to_Q) - self.assertEqual(ival_M_end_of_quarter.asfreq('Q'), ival_M_to_Q) - - self.assertEqual(ival_M.asfreq('W', 'S'), ival_M_to_W_start) - self.assertEqual(ival_M.asfreq('W', 'E'), ival_M_to_W_end) - self.assertEqual(ival_M.asfreq('B', 'S'), ival_M_to_B_start) - self.assertEqual(ival_M.asfreq('B', 'E'), ival_M_to_B_end) - self.assertEqual(ival_M.asfreq('D', 'S'), ival_M_to_D_start) - self.assertEqual(ival_M.asfreq('D', 'E'), ival_M_to_D_end) - self.assertEqual(ival_M.asfreq('H', 'S'), ival_M_to_H_start) - self.assertEqual(ival_M.asfreq('H', 'E'), ival_M_to_H_end) - self.assertEqual(ival_M.asfreq('Min', 'S'), ival_M_to_T_start) - self.assertEqual(ival_M.asfreq('Min', 'E'), ival_M_to_T_end) - self.assertEqual(ival_M.asfreq('S', 'S'), ival_M_to_S_start) - self.assertEqual(ival_M.asfreq('S', 'E'), ival_M_to_S_end) - - self.assertEqual(ival_M.asfreq('M'), ival_M) - - def test_conv_weekly(self): - # frequency conversion tests: from Weekly Frequency - ival_W = Period(freq='W', year=2007, month=1, day=1) - - ival_WSUN = Period(freq='W', year=2007, month=1, day=7) - ival_WSAT = Period(freq='W-SAT', year=2007, month=1, day=6) - ival_WFRI = Period(freq='W-FRI', year=2007, month=1, day=5) - ival_WTHU = Period(freq='W-THU', year=2007, month=1, day=4) - ival_WWED = Period(freq='W-WED', year=2007, month=1, day=3) - ival_WTUE = Period(freq='W-TUE', year=2007, month=1, day=2) - ival_WMON = Period(freq='W-MON', year=2007, month=1, day=1) - - ival_WSUN_to_D_start = Period(freq='D', year=2007, month=1, day=1) - ival_WSUN_to_D_end = Period(freq='D', year=2007, month=1, day=7) - ival_WSAT_to_D_start = Period(freq='D', year=2006, month=12, day=31) - ival_WSAT_to_D_end = Period(freq='D', year=2007, month=1, day=6) - ival_WFRI_to_D_start = Period(freq='D', year=2006, month=12, day=30) - ival_WFRI_to_D_end = Period(freq='D', year=2007, month=1, day=5) - ival_WTHU_to_D_start = Period(freq='D', year=2006, month=12, day=29) - ival_WTHU_to_D_end = Period(freq='D', year=2007, month=1, day=4) - ival_WWED_to_D_start = Period(freq='D', year=2006, month=12, day=28) - ival_WWED_to_D_end = Period(freq='D', year=2007, month=1, day=3) - ival_WTUE_to_D_start = Period(freq='D', year=2006, month=12, day=27) - ival_WTUE_to_D_end = Period(freq='D', year=2007, month=1, day=2) - ival_WMON_to_D_start = Period(freq='D', year=2006, month=12, day=26) - ival_WMON_to_D_end = Period(freq='D', year=2007, month=1, day=1) - - ival_W_end_of_year = Period(freq='W', year=2007, month=12, day=31) - ival_W_end_of_quarter = Period(freq='W', year=2007, month=3, day=31) - ival_W_end_of_month = Period(freq='W', year=2007, month=1, day=31) - ival_W_to_A = Period(freq='A', year=2007) - ival_W_to_Q = Period(freq='Q', year=2007, quarter=1) - ival_W_to_M = Period(freq='M', year=2007, month=1) - - if Period(freq='D', year=2007, month=12, day=31).weekday == 6: - ival_W_to_A_end_of_year = Period(freq='A', year=2007) - else: - ival_W_to_A_end_of_year = Period(freq='A', year=2008) - - if Period(freq='D', year=2007, month=3, day=31).weekday == 6: - ival_W_to_Q_end_of_quarter = Period(freq='Q', year=2007, quarter=1) - else: - ival_W_to_Q_end_of_quarter = Period(freq='Q', year=2007, quarter=2) - - if Period(freq='D', year=2007, month=1, day=31).weekday == 6: - ival_W_to_M_end_of_month = Period(freq='M', year=2007, month=1) - else: - ival_W_to_M_end_of_month = Period(freq='M', year=2007, month=2) - - ival_W_to_B_start = Period(freq='B', year=2007, month=1, day=1) - ival_W_to_B_end = Period(freq='B', year=2007, month=1, day=5) - ival_W_to_D_start = Period(freq='D', year=2007, month=1, day=1) - ival_W_to_D_end = Period(freq='D', year=2007, month=1, day=7) - ival_W_to_H_start = Period(freq='H', year=2007, month=1, day=1, hour=0) - ival_W_to_H_end = Period(freq='H', year=2007, month=1, day=7, hour=23) - ival_W_to_T_start = Period(freq='Min', year=2007, month=1, day=1, - hour=0, minute=0) - ival_W_to_T_end = Period(freq='Min', year=2007, month=1, day=7, - hour=23, minute=59) - ival_W_to_S_start = Period(freq='S', year=2007, month=1, day=1, hour=0, - minute=0, second=0) - ival_W_to_S_end = Period(freq='S', year=2007, month=1, day=7, hour=23, - minute=59, second=59) - - self.assertEqual(ival_W.asfreq('A'), ival_W_to_A) - self.assertEqual(ival_W_end_of_year.asfreq('A'), - ival_W_to_A_end_of_year) - self.assertEqual(ival_W.asfreq('Q'), ival_W_to_Q) - self.assertEqual(ival_W_end_of_quarter.asfreq('Q'), - ival_W_to_Q_end_of_quarter) - self.assertEqual(ival_W.asfreq('M'), ival_W_to_M) - self.assertEqual(ival_W_end_of_month.asfreq('M'), - ival_W_to_M_end_of_month) - - self.assertEqual(ival_W.asfreq('B', 'S'), ival_W_to_B_start) - self.assertEqual(ival_W.asfreq('B', 'E'), ival_W_to_B_end) - - self.assertEqual(ival_W.asfreq('D', 'S'), ival_W_to_D_start) - self.assertEqual(ival_W.asfreq('D', 'E'), ival_W_to_D_end) - - self.assertEqual(ival_WSUN.asfreq('D', 'S'), ival_WSUN_to_D_start) - self.assertEqual(ival_WSUN.asfreq('D', 'E'), ival_WSUN_to_D_end) - self.assertEqual(ival_WSAT.asfreq('D', 'S'), ival_WSAT_to_D_start) - self.assertEqual(ival_WSAT.asfreq('D', 'E'), ival_WSAT_to_D_end) - self.assertEqual(ival_WFRI.asfreq('D', 'S'), ival_WFRI_to_D_start) - self.assertEqual(ival_WFRI.asfreq('D', 'E'), ival_WFRI_to_D_end) - self.assertEqual(ival_WTHU.asfreq('D', 'S'), ival_WTHU_to_D_start) - self.assertEqual(ival_WTHU.asfreq('D', 'E'), ival_WTHU_to_D_end) - self.assertEqual(ival_WWED.asfreq('D', 'S'), ival_WWED_to_D_start) - self.assertEqual(ival_WWED.asfreq('D', 'E'), ival_WWED_to_D_end) - self.assertEqual(ival_WTUE.asfreq('D', 'S'), ival_WTUE_to_D_start) - self.assertEqual(ival_WTUE.asfreq('D', 'E'), ival_WTUE_to_D_end) - self.assertEqual(ival_WMON.asfreq('D', 'S'), ival_WMON_to_D_start) - self.assertEqual(ival_WMON.asfreq('D', 'E'), ival_WMON_to_D_end) - - self.assertEqual(ival_W.asfreq('H', 'S'), ival_W_to_H_start) - self.assertEqual(ival_W.asfreq('H', 'E'), ival_W_to_H_end) - self.assertEqual(ival_W.asfreq('Min', 'S'), ival_W_to_T_start) - self.assertEqual(ival_W.asfreq('Min', 'E'), ival_W_to_T_end) - self.assertEqual(ival_W.asfreq('S', 'S'), ival_W_to_S_start) - self.assertEqual(ival_W.asfreq('S', 'E'), ival_W_to_S_end) - - self.assertEqual(ival_W.asfreq('W'), ival_W) +class TestComparisons(tm.TestCase): - msg = pd.tseries.frequencies._INVALID_FREQ_ERROR - with self.assertRaisesRegexp(ValueError, msg): - ival_W.asfreq('WK') + def setUp(self): + self.january1 = Period('2000-01', 'M') + self.january2 = Period('2000-01', 'M') + self.february = Period('2000-02', 'M') + self.march = Period('2000-03', 'M') + self.day = Period('2012-01-01', 'D') - def test_conv_weekly_legacy(self): - # frequency conversion tests: from Weekly Frequency - msg = pd.tseries.frequencies._INVALID_FREQ_ERROR - with self.assertRaisesRegexp(ValueError, msg): - Period(freq='WK', year=2007, month=1, day=1) + def test_equal(self): + self.assertEqual(self.january1, self.january2) - with self.assertRaisesRegexp(ValueError, msg): - Period(freq='WK-SAT', year=2007, month=1, day=6) - with self.assertRaisesRegexp(ValueError, msg): - Period(freq='WK-FRI', year=2007, month=1, day=5) - with self.assertRaisesRegexp(ValueError, msg): - Period(freq='WK-THU', year=2007, month=1, day=4) - with self.assertRaisesRegexp(ValueError, msg): - Period(freq='WK-WED', year=2007, month=1, day=3) - with self.assertRaisesRegexp(ValueError, msg): - Period(freq='WK-TUE', year=2007, month=1, day=2) - with self.assertRaisesRegexp(ValueError, msg): - Period(freq='WK-MON', year=2007, month=1, day=1) - - def test_conv_business(self): - # frequency conversion tests: from Business Frequency" - - ival_B = Period(freq='B', year=2007, month=1, day=1) - ival_B_end_of_year = Period(freq='B', year=2007, month=12, day=31) - ival_B_end_of_quarter = Period(freq='B', year=2007, month=3, day=30) - ival_B_end_of_month = Period(freq='B', year=2007, month=1, day=31) - ival_B_end_of_week = Period(freq='B', year=2007, month=1, day=5) - - ival_B_to_A = Period(freq='A', year=2007) - ival_B_to_Q = Period(freq='Q', year=2007, quarter=1) - ival_B_to_M = Period(freq='M', year=2007, month=1) - ival_B_to_W = Period(freq='W', year=2007, month=1, day=7) - ival_B_to_D = Period(freq='D', year=2007, month=1, day=1) - ival_B_to_H_start = Period(freq='H', year=2007, month=1, day=1, hour=0) - ival_B_to_H_end = Period(freq='H', year=2007, month=1, day=1, hour=23) - ival_B_to_T_start = Period(freq='Min', year=2007, month=1, day=1, - hour=0, minute=0) - ival_B_to_T_end = Period(freq='Min', year=2007, month=1, day=1, - hour=23, minute=59) - ival_B_to_S_start = Period(freq='S', year=2007, month=1, day=1, hour=0, - minute=0, second=0) - ival_B_to_S_end = Period(freq='S', year=2007, month=1, day=1, hour=23, - minute=59, second=59) - - self.assertEqual(ival_B.asfreq('A'), ival_B_to_A) - self.assertEqual(ival_B_end_of_year.asfreq('A'), ival_B_to_A) - self.assertEqual(ival_B.asfreq('Q'), ival_B_to_Q) - self.assertEqual(ival_B_end_of_quarter.asfreq('Q'), ival_B_to_Q) - self.assertEqual(ival_B.asfreq('M'), ival_B_to_M) - self.assertEqual(ival_B_end_of_month.asfreq('M'), ival_B_to_M) - self.assertEqual(ival_B.asfreq('W'), ival_B_to_W) - self.assertEqual(ival_B_end_of_week.asfreq('W'), ival_B_to_W) - - self.assertEqual(ival_B.asfreq('D'), ival_B_to_D) - - self.assertEqual(ival_B.asfreq('H', 'S'), ival_B_to_H_start) - self.assertEqual(ival_B.asfreq('H', 'E'), ival_B_to_H_end) - self.assertEqual(ival_B.asfreq('Min', 'S'), ival_B_to_T_start) - self.assertEqual(ival_B.asfreq('Min', 'E'), ival_B_to_T_end) - self.assertEqual(ival_B.asfreq('S', 'S'), ival_B_to_S_start) - self.assertEqual(ival_B.asfreq('S', 'E'), ival_B_to_S_end) - - self.assertEqual(ival_B.asfreq('B'), ival_B) - - def test_conv_daily(self): - # frequency conversion tests: from Business Frequency" - - ival_D = Period(freq='D', year=2007, month=1, day=1) - ival_D_end_of_year = Period(freq='D', year=2007, month=12, day=31) - ival_D_end_of_quarter = Period(freq='D', year=2007, month=3, day=31) - ival_D_end_of_month = Period(freq='D', year=2007, month=1, day=31) - ival_D_end_of_week = Period(freq='D', year=2007, month=1, day=7) - - ival_D_friday = Period(freq='D', year=2007, month=1, day=5) - ival_D_saturday = Period(freq='D', year=2007, month=1, day=6) - ival_D_sunday = Period(freq='D', year=2007, month=1, day=7) - - # TODO: unused? - # ival_D_monday = Period(freq='D', year=2007, month=1, day=8) - - ival_B_friday = Period(freq='B', year=2007, month=1, day=5) - ival_B_monday = Period(freq='B', year=2007, month=1, day=8) - - ival_D_to_A = Period(freq='A', year=2007) - - ival_Deoq_to_AJAN = Period(freq='A-JAN', year=2008) - ival_Deoq_to_AJUN = Period(freq='A-JUN', year=2007) - ival_Deoq_to_ADEC = Period(freq='A-DEC', year=2007) - - ival_D_to_QEJAN = Period(freq="Q-JAN", year=2007, quarter=4) - ival_D_to_QEJUN = Period(freq="Q-JUN", year=2007, quarter=3) - ival_D_to_QEDEC = Period(freq="Q-DEC", year=2007, quarter=1) - - ival_D_to_M = Period(freq='M', year=2007, month=1) - ival_D_to_W = Period(freq='W', year=2007, month=1, day=7) - - ival_D_to_H_start = Period(freq='H', year=2007, month=1, day=1, hour=0) - ival_D_to_H_end = Period(freq='H', year=2007, month=1, day=1, hour=23) - ival_D_to_T_start = Period(freq='Min', year=2007, month=1, day=1, - hour=0, minute=0) - ival_D_to_T_end = Period(freq='Min', year=2007, month=1, day=1, - hour=23, minute=59) - ival_D_to_S_start = Period(freq='S', year=2007, month=1, day=1, hour=0, - minute=0, second=0) - ival_D_to_S_end = Period(freq='S', year=2007, month=1, day=1, hour=23, - minute=59, second=59) - - self.assertEqual(ival_D.asfreq('A'), ival_D_to_A) - - self.assertEqual(ival_D_end_of_quarter.asfreq('A-JAN'), - ival_Deoq_to_AJAN) - self.assertEqual(ival_D_end_of_quarter.asfreq('A-JUN'), - ival_Deoq_to_AJUN) - self.assertEqual(ival_D_end_of_quarter.asfreq('A-DEC'), - ival_Deoq_to_ADEC) - - self.assertEqual(ival_D_end_of_year.asfreq('A'), ival_D_to_A) - self.assertEqual(ival_D_end_of_quarter.asfreq('Q'), ival_D_to_QEDEC) - self.assertEqual(ival_D.asfreq("Q-JAN"), ival_D_to_QEJAN) - self.assertEqual(ival_D.asfreq("Q-JUN"), ival_D_to_QEJUN) - self.assertEqual(ival_D.asfreq("Q-DEC"), ival_D_to_QEDEC) - self.assertEqual(ival_D.asfreq('M'), ival_D_to_M) - self.assertEqual(ival_D_end_of_month.asfreq('M'), ival_D_to_M) - self.assertEqual(ival_D.asfreq('W'), ival_D_to_W) - self.assertEqual(ival_D_end_of_week.asfreq('W'), ival_D_to_W) - - self.assertEqual(ival_D_friday.asfreq('B'), ival_B_friday) - self.assertEqual(ival_D_saturday.asfreq('B', 'S'), ival_B_friday) - self.assertEqual(ival_D_saturday.asfreq('B', 'E'), ival_B_monday) - self.assertEqual(ival_D_sunday.asfreq('B', 'S'), ival_B_friday) - self.assertEqual(ival_D_sunday.asfreq('B', 'E'), ival_B_monday) - - self.assertEqual(ival_D.asfreq('H', 'S'), ival_D_to_H_start) - self.assertEqual(ival_D.asfreq('H', 'E'), ival_D_to_H_end) - self.assertEqual(ival_D.asfreq('Min', 'S'), ival_D_to_T_start) - self.assertEqual(ival_D.asfreq('Min', 'E'), ival_D_to_T_end) - self.assertEqual(ival_D.asfreq('S', 'S'), ival_D_to_S_start) - self.assertEqual(ival_D.asfreq('S', 'E'), ival_D_to_S_end) - - self.assertEqual(ival_D.asfreq('D'), ival_D) - - def test_conv_hourly(self): - # frequency conversion tests: from Hourly Frequency" - - ival_H = Period(freq='H', year=2007, month=1, day=1, hour=0) - ival_H_end_of_year = Period(freq='H', year=2007, month=12, day=31, - hour=23) - ival_H_end_of_quarter = Period(freq='H', year=2007, month=3, day=31, - hour=23) - ival_H_end_of_month = Period(freq='H', year=2007, month=1, day=31, - hour=23) - ival_H_end_of_week = Period(freq='H', year=2007, month=1, day=7, - hour=23) - ival_H_end_of_day = Period(freq='H', year=2007, month=1, day=1, - hour=23) - ival_H_end_of_bus = Period(freq='H', year=2007, month=1, day=1, - hour=23) - - ival_H_to_A = Period(freq='A', year=2007) - ival_H_to_Q = Period(freq='Q', year=2007, quarter=1) - ival_H_to_M = Period(freq='M', year=2007, month=1) - ival_H_to_W = Period(freq='W', year=2007, month=1, day=7) - ival_H_to_D = Period(freq='D', year=2007, month=1, day=1) - ival_H_to_B = Period(freq='B', year=2007, month=1, day=1) - - ival_H_to_T_start = Period(freq='Min', year=2007, month=1, day=1, - hour=0, minute=0) - ival_H_to_T_end = Period(freq='Min', year=2007, month=1, day=1, hour=0, - minute=59) - ival_H_to_S_start = Period(freq='S', year=2007, month=1, day=1, hour=0, - minute=0, second=0) - ival_H_to_S_end = Period(freq='S', year=2007, month=1, day=1, hour=0, - minute=59, second=59) - - self.assertEqual(ival_H.asfreq('A'), ival_H_to_A) - self.assertEqual(ival_H_end_of_year.asfreq('A'), ival_H_to_A) - self.assertEqual(ival_H.asfreq('Q'), ival_H_to_Q) - self.assertEqual(ival_H_end_of_quarter.asfreq('Q'), ival_H_to_Q) - self.assertEqual(ival_H.asfreq('M'), ival_H_to_M) - self.assertEqual(ival_H_end_of_month.asfreq('M'), ival_H_to_M) - self.assertEqual(ival_H.asfreq('W'), ival_H_to_W) - self.assertEqual(ival_H_end_of_week.asfreq('W'), ival_H_to_W) - self.assertEqual(ival_H.asfreq('D'), ival_H_to_D) - self.assertEqual(ival_H_end_of_day.asfreq('D'), ival_H_to_D) - self.assertEqual(ival_H.asfreq('B'), ival_H_to_B) - self.assertEqual(ival_H_end_of_bus.asfreq('B'), ival_H_to_B) - - self.assertEqual(ival_H.asfreq('Min', 'S'), ival_H_to_T_start) - self.assertEqual(ival_H.asfreq('Min', 'E'), ival_H_to_T_end) - self.assertEqual(ival_H.asfreq('S', 'S'), ival_H_to_S_start) - self.assertEqual(ival_H.asfreq('S', 'E'), ival_H_to_S_end) - - self.assertEqual(ival_H.asfreq('H'), ival_H) - - def test_conv_minutely(self): - # frequency conversion tests: from Minutely Frequency" - - ival_T = Period(freq='Min', year=2007, month=1, day=1, hour=0, - minute=0) - ival_T_end_of_year = Period(freq='Min', year=2007, month=12, day=31, - hour=23, minute=59) - ival_T_end_of_quarter = Period(freq='Min', year=2007, month=3, day=31, - hour=23, minute=59) - ival_T_end_of_month = Period(freq='Min', year=2007, month=1, day=31, - hour=23, minute=59) - ival_T_end_of_week = Period(freq='Min', year=2007, month=1, day=7, - hour=23, minute=59) - ival_T_end_of_day = Period(freq='Min', year=2007, month=1, day=1, - hour=23, minute=59) - ival_T_end_of_bus = Period(freq='Min', year=2007, month=1, day=1, - hour=23, minute=59) - ival_T_end_of_hour = Period(freq='Min', year=2007, month=1, day=1, - hour=0, minute=59) - - ival_T_to_A = Period(freq='A', year=2007) - ival_T_to_Q = Period(freq='Q', year=2007, quarter=1) - ival_T_to_M = Period(freq='M', year=2007, month=1) - ival_T_to_W = Period(freq='W', year=2007, month=1, day=7) - ival_T_to_D = Period(freq='D', year=2007, month=1, day=1) - ival_T_to_B = Period(freq='B', year=2007, month=1, day=1) - ival_T_to_H = Period(freq='H', year=2007, month=1, day=1, hour=0) - - ival_T_to_S_start = Period(freq='S', year=2007, month=1, day=1, hour=0, - minute=0, second=0) - ival_T_to_S_end = Period(freq='S', year=2007, month=1, day=1, hour=0, - minute=0, second=59) - - self.assertEqual(ival_T.asfreq('A'), ival_T_to_A) - self.assertEqual(ival_T_end_of_year.asfreq('A'), ival_T_to_A) - self.assertEqual(ival_T.asfreq('Q'), ival_T_to_Q) - self.assertEqual(ival_T_end_of_quarter.asfreq('Q'), ival_T_to_Q) - self.assertEqual(ival_T.asfreq('M'), ival_T_to_M) - self.assertEqual(ival_T_end_of_month.asfreq('M'), ival_T_to_M) - self.assertEqual(ival_T.asfreq('W'), ival_T_to_W) - self.assertEqual(ival_T_end_of_week.asfreq('W'), ival_T_to_W) - self.assertEqual(ival_T.asfreq('D'), ival_T_to_D) - self.assertEqual(ival_T_end_of_day.asfreq('D'), ival_T_to_D) - self.assertEqual(ival_T.asfreq('B'), ival_T_to_B) - self.assertEqual(ival_T_end_of_bus.asfreq('B'), ival_T_to_B) - self.assertEqual(ival_T.asfreq('H'), ival_T_to_H) - self.assertEqual(ival_T_end_of_hour.asfreq('H'), ival_T_to_H) - - self.assertEqual(ival_T.asfreq('S', 'S'), ival_T_to_S_start) - self.assertEqual(ival_T.asfreq('S', 'E'), ival_T_to_S_end) - - self.assertEqual(ival_T.asfreq('Min'), ival_T) - - def test_conv_secondly(self): - # frequency conversion tests: from Secondly Frequency" - - ival_S = Period(freq='S', year=2007, month=1, day=1, hour=0, minute=0, - second=0) - ival_S_end_of_year = Period(freq='S', year=2007, month=12, day=31, - hour=23, minute=59, second=59) - ival_S_end_of_quarter = Period(freq='S', year=2007, month=3, day=31, - hour=23, minute=59, second=59) - ival_S_end_of_month = Period(freq='S', year=2007, month=1, day=31, - hour=23, minute=59, second=59) - ival_S_end_of_week = Period(freq='S', year=2007, month=1, day=7, - hour=23, minute=59, second=59) - ival_S_end_of_day = Period(freq='S', year=2007, month=1, day=1, - hour=23, minute=59, second=59) - ival_S_end_of_bus = Period(freq='S', year=2007, month=1, day=1, - hour=23, minute=59, second=59) - ival_S_end_of_hour = Period(freq='S', year=2007, month=1, day=1, - hour=0, minute=59, second=59) - ival_S_end_of_minute = Period(freq='S', year=2007, month=1, day=1, - hour=0, minute=0, second=59) - - ival_S_to_A = Period(freq='A', year=2007) - ival_S_to_Q = Period(freq='Q', year=2007, quarter=1) - ival_S_to_M = Period(freq='M', year=2007, month=1) - ival_S_to_W = Period(freq='W', year=2007, month=1, day=7) - ival_S_to_D = Period(freq='D', year=2007, month=1, day=1) - ival_S_to_B = Period(freq='B', year=2007, month=1, day=1) - ival_S_to_H = Period(freq='H', year=2007, month=1, day=1, hour=0) - ival_S_to_T = Period(freq='Min', year=2007, month=1, day=1, hour=0, - minute=0) - - self.assertEqual(ival_S.asfreq('A'), ival_S_to_A) - self.assertEqual(ival_S_end_of_year.asfreq('A'), ival_S_to_A) - self.assertEqual(ival_S.asfreq('Q'), ival_S_to_Q) - self.assertEqual(ival_S_end_of_quarter.asfreq('Q'), ival_S_to_Q) - self.assertEqual(ival_S.asfreq('M'), ival_S_to_M) - self.assertEqual(ival_S_end_of_month.asfreq('M'), ival_S_to_M) - self.assertEqual(ival_S.asfreq('W'), ival_S_to_W) - self.assertEqual(ival_S_end_of_week.asfreq('W'), ival_S_to_W) - self.assertEqual(ival_S.asfreq('D'), ival_S_to_D) - self.assertEqual(ival_S_end_of_day.asfreq('D'), ival_S_to_D) - self.assertEqual(ival_S.asfreq('B'), ival_S_to_B) - self.assertEqual(ival_S_end_of_bus.asfreq('B'), ival_S_to_B) - self.assertEqual(ival_S.asfreq('H'), ival_S_to_H) - self.assertEqual(ival_S_end_of_hour.asfreq('H'), ival_S_to_H) - self.assertEqual(ival_S.asfreq('Min'), ival_S_to_T) - self.assertEqual(ival_S_end_of_minute.asfreq('Min'), ival_S_to_T) - - self.assertEqual(ival_S.asfreq('S'), ival_S) - - def test_asfreq_mult(self): - # normal freq to mult freq - p = Period(freq='A', year=2007) - # ordinal will not change - for freq in ['3A', offsets.YearEnd(3)]: - result = p.asfreq(freq) - expected = Period('2007', freq='3A') - - self.assertEqual(result, expected) - self.assertEqual(result.ordinal, expected.ordinal) - self.assertEqual(result.freq, expected.freq) - # ordinal will not change - for freq in ['3A', offsets.YearEnd(3)]: - result = p.asfreq(freq, how='S') - expected = Period('2007', freq='3A') - - self.assertEqual(result, expected) - self.assertEqual(result.ordinal, expected.ordinal) - self.assertEqual(result.freq, expected.freq) - - # mult freq to normal freq - p = Period(freq='3A', year=2007) - # ordinal will change because how=E is the default - for freq in ['A', offsets.YearEnd()]: - result = p.asfreq(freq) - expected = Period('2009', freq='A') - - self.assertEqual(result, expected) - self.assertEqual(result.ordinal, expected.ordinal) - self.assertEqual(result.freq, expected.freq) - # ordinal will not change - for freq in ['A', offsets.YearEnd()]: - result = p.asfreq(freq, how='S') - expected = Period('2007', freq='A') - - self.assertEqual(result, expected) - self.assertEqual(result.ordinal, expected.ordinal) - self.assertEqual(result.freq, expected.freq) - - p = Period(freq='A', year=2007) - for freq in ['2M', offsets.MonthEnd(2)]: - result = p.asfreq(freq) - expected = Period('2007-12', freq='2M') - - self.assertEqual(result, expected) - self.assertEqual(result.ordinal, expected.ordinal) - self.assertEqual(result.freq, expected.freq) - for freq in ['2M', offsets.MonthEnd(2)]: - result = p.asfreq(freq, how='S') - expected = Period('2007-01', freq='2M') - - self.assertEqual(result, expected) - self.assertEqual(result.ordinal, expected.ordinal) - self.assertEqual(result.freq, expected.freq) - - p = Period(freq='3A', year=2007) - for freq in ['2M', offsets.MonthEnd(2)]: - result = p.asfreq(freq) - expected = Period('2009-12', freq='2M') - - self.assertEqual(result, expected) - self.assertEqual(result.ordinal, expected.ordinal) - self.assertEqual(result.freq, expected.freq) - for freq in ['2M', offsets.MonthEnd(2)]: - result = p.asfreq(freq, how='S') - expected = Period('2007-01', freq='2M') - - self.assertEqual(result, expected) - self.assertEqual(result.ordinal, expected.ordinal) - self.assertEqual(result.freq, expected.freq) - - def test_asfreq_combined(self): - # normal freq to combined freq - p = Period('2007', freq='H') - - # ordinal will not change - expected = Period('2007', freq='25H') - for freq, how in zip(['1D1H', '1H1D'], ['E', 'S']): - result = p.asfreq(freq, how=how) - self.assertEqual(result, expected) - self.assertEqual(result.ordinal, expected.ordinal) - self.assertEqual(result.freq, expected.freq) - - # combined freq to normal freq - p1 = Period(freq='1D1H', year=2007) - p2 = Period(freq='1H1D', year=2007) - - # ordinal will change because how=E is the default - result1 = p1.asfreq('H') - result2 = p2.asfreq('H') - expected = Period('2007-01-02', freq='H') - self.assertEqual(result1, expected) - self.assertEqual(result1.ordinal, expected.ordinal) - self.assertEqual(result1.freq, expected.freq) - self.assertEqual(result2, expected) - self.assertEqual(result2.ordinal, expected.ordinal) - self.assertEqual(result2.freq, expected.freq) - - # ordinal will not change - result1 = p1.asfreq('H', how='S') - result2 = p2.asfreq('H', how='S') - expected = Period('2007-01-01', freq='H') - self.assertEqual(result1, expected) - self.assertEqual(result1.ordinal, expected.ordinal) - self.assertEqual(result1.freq, expected.freq) - self.assertEqual(result2, expected) - self.assertEqual(result2.ordinal, expected.ordinal) - self.assertEqual(result2.freq, expected.freq) + def test_equal_Raises_Value(self): + with tm.assertRaises(period.IncompatibleFrequency): + self.january1 == self.day - def test_is_leap_year(self): - # GH 13727 - for freq in ['A', 'M', 'D', 'H']: - p = Period('2000-01-01 00:00:00', freq=freq) - self.assertTrue(p.is_leap_year) - self.assertIsInstance(p.is_leap_year, bool) + def test_notEqual(self): + self.assertNotEqual(self.january1, 1) + self.assertNotEqual(self.january1, self.february) - p = Period('1999-01-01 00:00:00', freq=freq) - self.assertFalse(p.is_leap_year) + def test_greater(self): + self.assertTrue(self.february > self.january1) - p = Period('2004-01-01 00:00:00', freq=freq) - self.assertTrue(p.is_leap_year) + def test_greater_Raises_Value(self): + with tm.assertRaises(period.IncompatibleFrequency): + self.january1 > self.day - p = Period('2100-01-01 00:00:00', freq=freq) - self.assertFalse(p.is_leap_year) + def test_greater_Raises_Type(self): + with tm.assertRaises(TypeError): + self.january1 > 1 + + def test_greaterEqual(self): + self.assertTrue(self.january1 >= self.january2) + + def test_greaterEqual_Raises_Value(self): + with tm.assertRaises(period.IncompatibleFrequency): + self.january1 >= self.day + + with tm.assertRaises(TypeError): + print(self.january1 >= 1) + + def test_smallerEqual(self): + self.assertTrue(self.january1 <= self.january2) + + def test_smallerEqual_Raises_Value(self): + with tm.assertRaises(period.IncompatibleFrequency): + self.january1 <= self.day + + def test_smallerEqual_Raises_Type(self): + with tm.assertRaises(TypeError): + self.january1 <= 1 + + def test_smaller(self): + self.assertTrue(self.january1 < self.february) + + def test_smaller_Raises_Value(self): + with tm.assertRaises(period.IncompatibleFrequency): + self.january1 < self.day + + def test_smaller_Raises_Type(self): + with tm.assertRaises(TypeError): + self.january1 < 1 + + def test_sort(self): + periods = [self.march, self.january1, self.february] + correctPeriods = [self.january1, self.february, self.march] + self.assertEqual(sorted(periods), correctPeriods) + + def test_period_nat_comp(self): + p_nat = Period('NaT', freq='D') + p = Period('2011-01-01', freq='D') + + nat = pd.Timestamp('NaT') + t = pd.Timestamp('2011-01-01') + # confirm Period('NaT') work identical with Timestamp('NaT') + for left, right in [(p_nat, p), (p, p_nat), (p_nat, p_nat), (nat, t), + (t, nat), (nat, nat)]: + self.assertEqual(left < right, False) + self.assertEqual(left > right, False) + self.assertEqual(left == right, False) + self.assertEqual(left != right, True) + self.assertEqual(left <= right, False) + self.assertEqual(left >= right, False) class TestMethods(tm.TestCase): diff --git a/pandas/tests/scalar/test_period_asfreq.py b/pandas/tests/scalar/test_period_asfreq.py new file mode 100644 index 0000000000000..d311fef8a826d --- /dev/null +++ b/pandas/tests/scalar/test_period_asfreq.py @@ -0,0 +1,721 @@ +import pandas as pd +from pandas import Period, offsets +from pandas.util import testing as tm +from pandas.tseries.frequencies import _period_code_map + + +class TestFreqConversion(tm.TestCase): + "Test frequency conversion of date objects" + + def test_asfreq_corner(self): + val = Period(freq='A', year=2007) + result1 = val.asfreq('5t') + result2 = val.asfreq('t') + expected = Period('2007-12-31 23:59', freq='t') + self.assertEqual(result1.ordinal, expected.ordinal) + self.assertEqual(result1.freqstr, '5T') + self.assertEqual(result2.ordinal, expected.ordinal) + self.assertEqual(result2.freqstr, 'T') + + def test_conv_annual(self): + # frequency conversion tests: from Annual Frequency + + ival_A = Period(freq='A', year=2007) + + ival_AJAN = Period(freq="A-JAN", year=2007) + ival_AJUN = Period(freq="A-JUN", year=2007) + ival_ANOV = Period(freq="A-NOV", year=2007) + + ival_A_to_Q_start = Period(freq='Q', year=2007, quarter=1) + ival_A_to_Q_end = Period(freq='Q', year=2007, quarter=4) + ival_A_to_M_start = Period(freq='M', year=2007, month=1) + ival_A_to_M_end = Period(freq='M', year=2007, month=12) + ival_A_to_W_start = Period(freq='W', year=2007, month=1, day=1) + ival_A_to_W_end = Period(freq='W', year=2007, month=12, day=31) + ival_A_to_B_start = Period(freq='B', year=2007, month=1, day=1) + ival_A_to_B_end = Period(freq='B', year=2007, month=12, day=31) + ival_A_to_D_start = Period(freq='D', year=2007, month=1, day=1) + ival_A_to_D_end = Period(freq='D', year=2007, month=12, day=31) + ival_A_to_H_start = Period(freq='H', year=2007, month=1, day=1, hour=0) + ival_A_to_H_end = Period(freq='H', year=2007, month=12, day=31, + hour=23) + ival_A_to_T_start = Period(freq='Min', year=2007, month=1, day=1, + hour=0, minute=0) + ival_A_to_T_end = Period(freq='Min', year=2007, month=12, day=31, + hour=23, minute=59) + ival_A_to_S_start = Period(freq='S', year=2007, month=1, day=1, hour=0, + minute=0, second=0) + ival_A_to_S_end = Period(freq='S', year=2007, month=12, day=31, + hour=23, minute=59, second=59) + + ival_AJAN_to_D_end = Period(freq='D', year=2007, month=1, day=31) + ival_AJAN_to_D_start = Period(freq='D', year=2006, month=2, day=1) + ival_AJUN_to_D_end = Period(freq='D', year=2007, month=6, day=30) + ival_AJUN_to_D_start = Period(freq='D', year=2006, month=7, day=1) + ival_ANOV_to_D_end = Period(freq='D', year=2007, month=11, day=30) + ival_ANOV_to_D_start = Period(freq='D', year=2006, month=12, day=1) + + self.assertEqual(ival_A.asfreq('Q', 'S'), ival_A_to_Q_start) + self.assertEqual(ival_A.asfreq('Q', 'e'), ival_A_to_Q_end) + self.assertEqual(ival_A.asfreq('M', 's'), ival_A_to_M_start) + self.assertEqual(ival_A.asfreq('M', 'E'), ival_A_to_M_end) + self.assertEqual(ival_A.asfreq('W', 'S'), ival_A_to_W_start) + self.assertEqual(ival_A.asfreq('W', 'E'), ival_A_to_W_end) + self.assertEqual(ival_A.asfreq('B', 'S'), ival_A_to_B_start) + self.assertEqual(ival_A.asfreq('B', 'E'), ival_A_to_B_end) + self.assertEqual(ival_A.asfreq('D', 'S'), ival_A_to_D_start) + self.assertEqual(ival_A.asfreq('D', 'E'), ival_A_to_D_end) + self.assertEqual(ival_A.asfreq('H', 'S'), ival_A_to_H_start) + self.assertEqual(ival_A.asfreq('H', 'E'), ival_A_to_H_end) + self.assertEqual(ival_A.asfreq('min', 'S'), ival_A_to_T_start) + self.assertEqual(ival_A.asfreq('min', 'E'), ival_A_to_T_end) + self.assertEqual(ival_A.asfreq('T', 'S'), ival_A_to_T_start) + self.assertEqual(ival_A.asfreq('T', 'E'), ival_A_to_T_end) + self.assertEqual(ival_A.asfreq('S', 'S'), ival_A_to_S_start) + self.assertEqual(ival_A.asfreq('S', 'E'), ival_A_to_S_end) + + self.assertEqual(ival_AJAN.asfreq('D', 'S'), ival_AJAN_to_D_start) + self.assertEqual(ival_AJAN.asfreq('D', 'E'), ival_AJAN_to_D_end) + + self.assertEqual(ival_AJUN.asfreq('D', 'S'), ival_AJUN_to_D_start) + self.assertEqual(ival_AJUN.asfreq('D', 'E'), ival_AJUN_to_D_end) + + self.assertEqual(ival_ANOV.asfreq('D', 'S'), ival_ANOV_to_D_start) + self.assertEqual(ival_ANOV.asfreq('D', 'E'), ival_ANOV_to_D_end) + + self.assertEqual(ival_A.asfreq('A'), ival_A) + + def test_conv_quarterly(self): + # frequency conversion tests: from Quarterly Frequency + + ival_Q = Period(freq='Q', year=2007, quarter=1) + ival_Q_end_of_year = Period(freq='Q', year=2007, quarter=4) + + ival_QEJAN = Period(freq="Q-JAN", year=2007, quarter=1) + ival_QEJUN = Period(freq="Q-JUN", year=2007, quarter=1) + + ival_Q_to_A = Period(freq='A', year=2007) + ival_Q_to_M_start = Period(freq='M', year=2007, month=1) + ival_Q_to_M_end = Period(freq='M', year=2007, month=3) + ival_Q_to_W_start = Period(freq='W', year=2007, month=1, day=1) + ival_Q_to_W_end = Period(freq='W', year=2007, month=3, day=31) + ival_Q_to_B_start = Period(freq='B', year=2007, month=1, day=1) + ival_Q_to_B_end = Period(freq='B', year=2007, month=3, day=30) + ival_Q_to_D_start = Period(freq='D', year=2007, month=1, day=1) + ival_Q_to_D_end = Period(freq='D', year=2007, month=3, day=31) + ival_Q_to_H_start = Period(freq='H', year=2007, month=1, day=1, hour=0) + ival_Q_to_H_end = Period(freq='H', year=2007, month=3, day=31, hour=23) + ival_Q_to_T_start = Period(freq='Min', year=2007, month=1, day=1, + hour=0, minute=0) + ival_Q_to_T_end = Period(freq='Min', year=2007, month=3, day=31, + hour=23, minute=59) + ival_Q_to_S_start = Period(freq='S', year=2007, month=1, day=1, hour=0, + minute=0, second=0) + ival_Q_to_S_end = Period(freq='S', year=2007, month=3, day=31, hour=23, + minute=59, second=59) + + ival_QEJAN_to_D_start = Period(freq='D', year=2006, month=2, day=1) + ival_QEJAN_to_D_end = Period(freq='D', year=2006, month=4, day=30) + + ival_QEJUN_to_D_start = Period(freq='D', year=2006, month=7, day=1) + ival_QEJUN_to_D_end = Period(freq='D', year=2006, month=9, day=30) + + self.assertEqual(ival_Q.asfreq('A'), ival_Q_to_A) + self.assertEqual(ival_Q_end_of_year.asfreq('A'), ival_Q_to_A) + + self.assertEqual(ival_Q.asfreq('M', 'S'), ival_Q_to_M_start) + self.assertEqual(ival_Q.asfreq('M', 'E'), ival_Q_to_M_end) + self.assertEqual(ival_Q.asfreq('W', 'S'), ival_Q_to_W_start) + self.assertEqual(ival_Q.asfreq('W', 'E'), ival_Q_to_W_end) + self.assertEqual(ival_Q.asfreq('B', 'S'), ival_Q_to_B_start) + self.assertEqual(ival_Q.asfreq('B', 'E'), ival_Q_to_B_end) + self.assertEqual(ival_Q.asfreq('D', 'S'), ival_Q_to_D_start) + self.assertEqual(ival_Q.asfreq('D', 'E'), ival_Q_to_D_end) + self.assertEqual(ival_Q.asfreq('H', 'S'), ival_Q_to_H_start) + self.assertEqual(ival_Q.asfreq('H', 'E'), ival_Q_to_H_end) + self.assertEqual(ival_Q.asfreq('Min', 'S'), ival_Q_to_T_start) + self.assertEqual(ival_Q.asfreq('Min', 'E'), ival_Q_to_T_end) + self.assertEqual(ival_Q.asfreq('S', 'S'), ival_Q_to_S_start) + self.assertEqual(ival_Q.asfreq('S', 'E'), ival_Q_to_S_end) + + self.assertEqual(ival_QEJAN.asfreq('D', 'S'), ival_QEJAN_to_D_start) + self.assertEqual(ival_QEJAN.asfreq('D', 'E'), ival_QEJAN_to_D_end) + self.assertEqual(ival_QEJUN.asfreq('D', 'S'), ival_QEJUN_to_D_start) + self.assertEqual(ival_QEJUN.asfreq('D', 'E'), ival_QEJUN_to_D_end) + + self.assertEqual(ival_Q.asfreq('Q'), ival_Q) + + def test_conv_monthly(self): + # frequency conversion tests: from Monthly Frequency + + ival_M = Period(freq='M', year=2007, month=1) + ival_M_end_of_year = Period(freq='M', year=2007, month=12) + ival_M_end_of_quarter = Period(freq='M', year=2007, month=3) + ival_M_to_A = Period(freq='A', year=2007) + ival_M_to_Q = Period(freq='Q', year=2007, quarter=1) + ival_M_to_W_start = Period(freq='W', year=2007, month=1, day=1) + ival_M_to_W_end = Period(freq='W', year=2007, month=1, day=31) + ival_M_to_B_start = Period(freq='B', year=2007, month=1, day=1) + ival_M_to_B_end = Period(freq='B', year=2007, month=1, day=31) + ival_M_to_D_start = Period(freq='D', year=2007, month=1, day=1) + ival_M_to_D_end = Period(freq='D', year=2007, month=1, day=31) + ival_M_to_H_start = Period(freq='H', year=2007, month=1, day=1, hour=0) + ival_M_to_H_end = Period(freq='H', year=2007, month=1, day=31, hour=23) + ival_M_to_T_start = Period(freq='Min', year=2007, month=1, day=1, + hour=0, minute=0) + ival_M_to_T_end = Period(freq='Min', year=2007, month=1, day=31, + hour=23, minute=59) + ival_M_to_S_start = Period(freq='S', year=2007, month=1, day=1, hour=0, + minute=0, second=0) + ival_M_to_S_end = Period(freq='S', year=2007, month=1, day=31, hour=23, + minute=59, second=59) + + self.assertEqual(ival_M.asfreq('A'), ival_M_to_A) + self.assertEqual(ival_M_end_of_year.asfreq('A'), ival_M_to_A) + self.assertEqual(ival_M.asfreq('Q'), ival_M_to_Q) + self.assertEqual(ival_M_end_of_quarter.asfreq('Q'), ival_M_to_Q) + + self.assertEqual(ival_M.asfreq('W', 'S'), ival_M_to_W_start) + self.assertEqual(ival_M.asfreq('W', 'E'), ival_M_to_W_end) + self.assertEqual(ival_M.asfreq('B', 'S'), ival_M_to_B_start) + self.assertEqual(ival_M.asfreq('B', 'E'), ival_M_to_B_end) + self.assertEqual(ival_M.asfreq('D', 'S'), ival_M_to_D_start) + self.assertEqual(ival_M.asfreq('D', 'E'), ival_M_to_D_end) + self.assertEqual(ival_M.asfreq('H', 'S'), ival_M_to_H_start) + self.assertEqual(ival_M.asfreq('H', 'E'), ival_M_to_H_end) + self.assertEqual(ival_M.asfreq('Min', 'S'), ival_M_to_T_start) + self.assertEqual(ival_M.asfreq('Min', 'E'), ival_M_to_T_end) + self.assertEqual(ival_M.asfreq('S', 'S'), ival_M_to_S_start) + self.assertEqual(ival_M.asfreq('S', 'E'), ival_M_to_S_end) + + self.assertEqual(ival_M.asfreq('M'), ival_M) + + def test_conv_weekly(self): + # frequency conversion tests: from Weekly Frequency + ival_W = Period(freq='W', year=2007, month=1, day=1) + + ival_WSUN = Period(freq='W', year=2007, month=1, day=7) + ival_WSAT = Period(freq='W-SAT', year=2007, month=1, day=6) + ival_WFRI = Period(freq='W-FRI', year=2007, month=1, day=5) + ival_WTHU = Period(freq='W-THU', year=2007, month=1, day=4) + ival_WWED = Period(freq='W-WED', year=2007, month=1, day=3) + ival_WTUE = Period(freq='W-TUE', year=2007, month=1, day=2) + ival_WMON = Period(freq='W-MON', year=2007, month=1, day=1) + + ival_WSUN_to_D_start = Period(freq='D', year=2007, month=1, day=1) + ival_WSUN_to_D_end = Period(freq='D', year=2007, month=1, day=7) + ival_WSAT_to_D_start = Period(freq='D', year=2006, month=12, day=31) + ival_WSAT_to_D_end = Period(freq='D', year=2007, month=1, day=6) + ival_WFRI_to_D_start = Period(freq='D', year=2006, month=12, day=30) + ival_WFRI_to_D_end = Period(freq='D', year=2007, month=1, day=5) + ival_WTHU_to_D_start = Period(freq='D', year=2006, month=12, day=29) + ival_WTHU_to_D_end = Period(freq='D', year=2007, month=1, day=4) + ival_WWED_to_D_start = Period(freq='D', year=2006, month=12, day=28) + ival_WWED_to_D_end = Period(freq='D', year=2007, month=1, day=3) + ival_WTUE_to_D_start = Period(freq='D', year=2006, month=12, day=27) + ival_WTUE_to_D_end = Period(freq='D', year=2007, month=1, day=2) + ival_WMON_to_D_start = Period(freq='D', year=2006, month=12, day=26) + ival_WMON_to_D_end = Period(freq='D', year=2007, month=1, day=1) + + ival_W_end_of_year = Period(freq='W', year=2007, month=12, day=31) + ival_W_end_of_quarter = Period(freq='W', year=2007, month=3, day=31) + ival_W_end_of_month = Period(freq='W', year=2007, month=1, day=31) + ival_W_to_A = Period(freq='A', year=2007) + ival_W_to_Q = Period(freq='Q', year=2007, quarter=1) + ival_W_to_M = Period(freq='M', year=2007, month=1) + + if Period(freq='D', year=2007, month=12, day=31).weekday == 6: + ival_W_to_A_end_of_year = Period(freq='A', year=2007) + else: + ival_W_to_A_end_of_year = Period(freq='A', year=2008) + + if Period(freq='D', year=2007, month=3, day=31).weekday == 6: + ival_W_to_Q_end_of_quarter = Period(freq='Q', year=2007, quarter=1) + else: + ival_W_to_Q_end_of_quarter = Period(freq='Q', year=2007, quarter=2) + + if Period(freq='D', year=2007, month=1, day=31).weekday == 6: + ival_W_to_M_end_of_month = Period(freq='M', year=2007, month=1) + else: + ival_W_to_M_end_of_month = Period(freq='M', year=2007, month=2) + + ival_W_to_B_start = Period(freq='B', year=2007, month=1, day=1) + ival_W_to_B_end = Period(freq='B', year=2007, month=1, day=5) + ival_W_to_D_start = Period(freq='D', year=2007, month=1, day=1) + ival_W_to_D_end = Period(freq='D', year=2007, month=1, day=7) + ival_W_to_H_start = Period(freq='H', year=2007, month=1, day=1, hour=0) + ival_W_to_H_end = Period(freq='H', year=2007, month=1, day=7, hour=23) + ival_W_to_T_start = Period(freq='Min', year=2007, month=1, day=1, + hour=0, minute=0) + ival_W_to_T_end = Period(freq='Min', year=2007, month=1, day=7, + hour=23, minute=59) + ival_W_to_S_start = Period(freq='S', year=2007, month=1, day=1, hour=0, + minute=0, second=0) + ival_W_to_S_end = Period(freq='S', year=2007, month=1, day=7, hour=23, + minute=59, second=59) + + self.assertEqual(ival_W.asfreq('A'), ival_W_to_A) + self.assertEqual(ival_W_end_of_year.asfreq('A'), + ival_W_to_A_end_of_year) + self.assertEqual(ival_W.asfreq('Q'), ival_W_to_Q) + self.assertEqual(ival_W_end_of_quarter.asfreq('Q'), + ival_W_to_Q_end_of_quarter) + self.assertEqual(ival_W.asfreq('M'), ival_W_to_M) + self.assertEqual(ival_W_end_of_month.asfreq('M'), + ival_W_to_M_end_of_month) + + self.assertEqual(ival_W.asfreq('B', 'S'), ival_W_to_B_start) + self.assertEqual(ival_W.asfreq('B', 'E'), ival_W_to_B_end) + + self.assertEqual(ival_W.asfreq('D', 'S'), ival_W_to_D_start) + self.assertEqual(ival_W.asfreq('D', 'E'), ival_W_to_D_end) + + self.assertEqual(ival_WSUN.asfreq('D', 'S'), ival_WSUN_to_D_start) + self.assertEqual(ival_WSUN.asfreq('D', 'E'), ival_WSUN_to_D_end) + self.assertEqual(ival_WSAT.asfreq('D', 'S'), ival_WSAT_to_D_start) + self.assertEqual(ival_WSAT.asfreq('D', 'E'), ival_WSAT_to_D_end) + self.assertEqual(ival_WFRI.asfreq('D', 'S'), ival_WFRI_to_D_start) + self.assertEqual(ival_WFRI.asfreq('D', 'E'), ival_WFRI_to_D_end) + self.assertEqual(ival_WTHU.asfreq('D', 'S'), ival_WTHU_to_D_start) + self.assertEqual(ival_WTHU.asfreq('D', 'E'), ival_WTHU_to_D_end) + self.assertEqual(ival_WWED.asfreq('D', 'S'), ival_WWED_to_D_start) + self.assertEqual(ival_WWED.asfreq('D', 'E'), ival_WWED_to_D_end) + self.assertEqual(ival_WTUE.asfreq('D', 'S'), ival_WTUE_to_D_start) + self.assertEqual(ival_WTUE.asfreq('D', 'E'), ival_WTUE_to_D_end) + self.assertEqual(ival_WMON.asfreq('D', 'S'), ival_WMON_to_D_start) + self.assertEqual(ival_WMON.asfreq('D', 'E'), ival_WMON_to_D_end) + + self.assertEqual(ival_W.asfreq('H', 'S'), ival_W_to_H_start) + self.assertEqual(ival_W.asfreq('H', 'E'), ival_W_to_H_end) + self.assertEqual(ival_W.asfreq('Min', 'S'), ival_W_to_T_start) + self.assertEqual(ival_W.asfreq('Min', 'E'), ival_W_to_T_end) + self.assertEqual(ival_W.asfreq('S', 'S'), ival_W_to_S_start) + self.assertEqual(ival_W.asfreq('S', 'E'), ival_W_to_S_end) + + self.assertEqual(ival_W.asfreq('W'), ival_W) + + msg = pd.tseries.frequencies._INVALID_FREQ_ERROR + with self.assertRaisesRegexp(ValueError, msg): + ival_W.asfreq('WK') + + def test_conv_weekly_legacy(self): + # frequency conversion tests: from Weekly Frequency + msg = pd.tseries.frequencies._INVALID_FREQ_ERROR + with self.assertRaisesRegexp(ValueError, msg): + Period(freq='WK', year=2007, month=1, day=1) + + with self.assertRaisesRegexp(ValueError, msg): + Period(freq='WK-SAT', year=2007, month=1, day=6) + with self.assertRaisesRegexp(ValueError, msg): + Period(freq='WK-FRI', year=2007, month=1, day=5) + with self.assertRaisesRegexp(ValueError, msg): + Period(freq='WK-THU', year=2007, month=1, day=4) + with self.assertRaisesRegexp(ValueError, msg): + Period(freq='WK-WED', year=2007, month=1, day=3) + with self.assertRaisesRegexp(ValueError, msg): + Period(freq='WK-TUE', year=2007, month=1, day=2) + with self.assertRaisesRegexp(ValueError, msg): + Period(freq='WK-MON', year=2007, month=1, day=1) + + def test_conv_business(self): + # frequency conversion tests: from Business Frequency" + + ival_B = Period(freq='B', year=2007, month=1, day=1) + ival_B_end_of_year = Period(freq='B', year=2007, month=12, day=31) + ival_B_end_of_quarter = Period(freq='B', year=2007, month=3, day=30) + ival_B_end_of_month = Period(freq='B', year=2007, month=1, day=31) + ival_B_end_of_week = Period(freq='B', year=2007, month=1, day=5) + + ival_B_to_A = Period(freq='A', year=2007) + ival_B_to_Q = Period(freq='Q', year=2007, quarter=1) + ival_B_to_M = Period(freq='M', year=2007, month=1) + ival_B_to_W = Period(freq='W', year=2007, month=1, day=7) + ival_B_to_D = Period(freq='D', year=2007, month=1, day=1) + ival_B_to_H_start = Period(freq='H', year=2007, month=1, day=1, hour=0) + ival_B_to_H_end = Period(freq='H', year=2007, month=1, day=1, hour=23) + ival_B_to_T_start = Period(freq='Min', year=2007, month=1, day=1, + hour=0, minute=0) + ival_B_to_T_end = Period(freq='Min', year=2007, month=1, day=1, + hour=23, minute=59) + ival_B_to_S_start = Period(freq='S', year=2007, month=1, day=1, hour=0, + minute=0, second=0) + ival_B_to_S_end = Period(freq='S', year=2007, month=1, day=1, hour=23, + minute=59, second=59) + + self.assertEqual(ival_B.asfreq('A'), ival_B_to_A) + self.assertEqual(ival_B_end_of_year.asfreq('A'), ival_B_to_A) + self.assertEqual(ival_B.asfreq('Q'), ival_B_to_Q) + self.assertEqual(ival_B_end_of_quarter.asfreq('Q'), ival_B_to_Q) + self.assertEqual(ival_B.asfreq('M'), ival_B_to_M) + self.assertEqual(ival_B_end_of_month.asfreq('M'), ival_B_to_M) + self.assertEqual(ival_B.asfreq('W'), ival_B_to_W) + self.assertEqual(ival_B_end_of_week.asfreq('W'), ival_B_to_W) + + self.assertEqual(ival_B.asfreq('D'), ival_B_to_D) + + self.assertEqual(ival_B.asfreq('H', 'S'), ival_B_to_H_start) + self.assertEqual(ival_B.asfreq('H', 'E'), ival_B_to_H_end) + self.assertEqual(ival_B.asfreq('Min', 'S'), ival_B_to_T_start) + self.assertEqual(ival_B.asfreq('Min', 'E'), ival_B_to_T_end) + self.assertEqual(ival_B.asfreq('S', 'S'), ival_B_to_S_start) + self.assertEqual(ival_B.asfreq('S', 'E'), ival_B_to_S_end) + + self.assertEqual(ival_B.asfreq('B'), ival_B) + + def test_conv_daily(self): + # frequency conversion tests: from Business Frequency" + + ival_D = Period(freq='D', year=2007, month=1, day=1) + ival_D_end_of_year = Period(freq='D', year=2007, month=12, day=31) + ival_D_end_of_quarter = Period(freq='D', year=2007, month=3, day=31) + ival_D_end_of_month = Period(freq='D', year=2007, month=1, day=31) + ival_D_end_of_week = Period(freq='D', year=2007, month=1, day=7) + + ival_D_friday = Period(freq='D', year=2007, month=1, day=5) + ival_D_saturday = Period(freq='D', year=2007, month=1, day=6) + ival_D_sunday = Period(freq='D', year=2007, month=1, day=7) + + # TODO: unused? + # ival_D_monday = Period(freq='D', year=2007, month=1, day=8) + + ival_B_friday = Period(freq='B', year=2007, month=1, day=5) + ival_B_monday = Period(freq='B', year=2007, month=1, day=8) + + ival_D_to_A = Period(freq='A', year=2007) + + ival_Deoq_to_AJAN = Period(freq='A-JAN', year=2008) + ival_Deoq_to_AJUN = Period(freq='A-JUN', year=2007) + ival_Deoq_to_ADEC = Period(freq='A-DEC', year=2007) + + ival_D_to_QEJAN = Period(freq="Q-JAN", year=2007, quarter=4) + ival_D_to_QEJUN = Period(freq="Q-JUN", year=2007, quarter=3) + ival_D_to_QEDEC = Period(freq="Q-DEC", year=2007, quarter=1) + + ival_D_to_M = Period(freq='M', year=2007, month=1) + ival_D_to_W = Period(freq='W', year=2007, month=1, day=7) + + ival_D_to_H_start = Period(freq='H', year=2007, month=1, day=1, hour=0) + ival_D_to_H_end = Period(freq='H', year=2007, month=1, day=1, hour=23) + ival_D_to_T_start = Period(freq='Min', year=2007, month=1, day=1, + hour=0, minute=0) + ival_D_to_T_end = Period(freq='Min', year=2007, month=1, day=1, + hour=23, minute=59) + ival_D_to_S_start = Period(freq='S', year=2007, month=1, day=1, hour=0, + minute=0, second=0) + ival_D_to_S_end = Period(freq='S', year=2007, month=1, day=1, hour=23, + minute=59, second=59) + + self.assertEqual(ival_D.asfreq('A'), ival_D_to_A) + + self.assertEqual(ival_D_end_of_quarter.asfreq('A-JAN'), + ival_Deoq_to_AJAN) + self.assertEqual(ival_D_end_of_quarter.asfreq('A-JUN'), + ival_Deoq_to_AJUN) + self.assertEqual(ival_D_end_of_quarter.asfreq('A-DEC'), + ival_Deoq_to_ADEC) + + self.assertEqual(ival_D_end_of_year.asfreq('A'), ival_D_to_A) + self.assertEqual(ival_D_end_of_quarter.asfreq('Q'), ival_D_to_QEDEC) + self.assertEqual(ival_D.asfreq("Q-JAN"), ival_D_to_QEJAN) + self.assertEqual(ival_D.asfreq("Q-JUN"), ival_D_to_QEJUN) + self.assertEqual(ival_D.asfreq("Q-DEC"), ival_D_to_QEDEC) + self.assertEqual(ival_D.asfreq('M'), ival_D_to_M) + self.assertEqual(ival_D_end_of_month.asfreq('M'), ival_D_to_M) + self.assertEqual(ival_D.asfreq('W'), ival_D_to_W) + self.assertEqual(ival_D_end_of_week.asfreq('W'), ival_D_to_W) + + self.assertEqual(ival_D_friday.asfreq('B'), ival_B_friday) + self.assertEqual(ival_D_saturday.asfreq('B', 'S'), ival_B_friday) + self.assertEqual(ival_D_saturday.asfreq('B', 'E'), ival_B_monday) + self.assertEqual(ival_D_sunday.asfreq('B', 'S'), ival_B_friday) + self.assertEqual(ival_D_sunday.asfreq('B', 'E'), ival_B_monday) + + self.assertEqual(ival_D.asfreq('H', 'S'), ival_D_to_H_start) + self.assertEqual(ival_D.asfreq('H', 'E'), ival_D_to_H_end) + self.assertEqual(ival_D.asfreq('Min', 'S'), ival_D_to_T_start) + self.assertEqual(ival_D.asfreq('Min', 'E'), ival_D_to_T_end) + self.assertEqual(ival_D.asfreq('S', 'S'), ival_D_to_S_start) + self.assertEqual(ival_D.asfreq('S', 'E'), ival_D_to_S_end) + + self.assertEqual(ival_D.asfreq('D'), ival_D) + + def test_conv_hourly(self): + # frequency conversion tests: from Hourly Frequency" + + ival_H = Period(freq='H', year=2007, month=1, day=1, hour=0) + ival_H_end_of_year = Period(freq='H', year=2007, month=12, day=31, + hour=23) + ival_H_end_of_quarter = Period(freq='H', year=2007, month=3, day=31, + hour=23) + ival_H_end_of_month = Period(freq='H', year=2007, month=1, day=31, + hour=23) + ival_H_end_of_week = Period(freq='H', year=2007, month=1, day=7, + hour=23) + ival_H_end_of_day = Period(freq='H', year=2007, month=1, day=1, + hour=23) + ival_H_end_of_bus = Period(freq='H', year=2007, month=1, day=1, + hour=23) + + ival_H_to_A = Period(freq='A', year=2007) + ival_H_to_Q = Period(freq='Q', year=2007, quarter=1) + ival_H_to_M = Period(freq='M', year=2007, month=1) + ival_H_to_W = Period(freq='W', year=2007, month=1, day=7) + ival_H_to_D = Period(freq='D', year=2007, month=1, day=1) + ival_H_to_B = Period(freq='B', year=2007, month=1, day=1) + + ival_H_to_T_start = Period(freq='Min', year=2007, month=1, day=1, + hour=0, minute=0) + ival_H_to_T_end = Period(freq='Min', year=2007, month=1, day=1, hour=0, + minute=59) + ival_H_to_S_start = Period(freq='S', year=2007, month=1, day=1, hour=0, + minute=0, second=0) + ival_H_to_S_end = Period(freq='S', year=2007, month=1, day=1, hour=0, + minute=59, second=59) + + self.assertEqual(ival_H.asfreq('A'), ival_H_to_A) + self.assertEqual(ival_H_end_of_year.asfreq('A'), ival_H_to_A) + self.assertEqual(ival_H.asfreq('Q'), ival_H_to_Q) + self.assertEqual(ival_H_end_of_quarter.asfreq('Q'), ival_H_to_Q) + self.assertEqual(ival_H.asfreq('M'), ival_H_to_M) + self.assertEqual(ival_H_end_of_month.asfreq('M'), ival_H_to_M) + self.assertEqual(ival_H.asfreq('W'), ival_H_to_W) + self.assertEqual(ival_H_end_of_week.asfreq('W'), ival_H_to_W) + self.assertEqual(ival_H.asfreq('D'), ival_H_to_D) + self.assertEqual(ival_H_end_of_day.asfreq('D'), ival_H_to_D) + self.assertEqual(ival_H.asfreq('B'), ival_H_to_B) + self.assertEqual(ival_H_end_of_bus.asfreq('B'), ival_H_to_B) + + self.assertEqual(ival_H.asfreq('Min', 'S'), ival_H_to_T_start) + self.assertEqual(ival_H.asfreq('Min', 'E'), ival_H_to_T_end) + self.assertEqual(ival_H.asfreq('S', 'S'), ival_H_to_S_start) + self.assertEqual(ival_H.asfreq('S', 'E'), ival_H_to_S_end) + + self.assertEqual(ival_H.asfreq('H'), ival_H) + + def test_conv_minutely(self): + # frequency conversion tests: from Minutely Frequency" + + ival_T = Period(freq='Min', year=2007, month=1, day=1, hour=0, + minute=0) + ival_T_end_of_year = Period(freq='Min', year=2007, month=12, day=31, + hour=23, minute=59) + ival_T_end_of_quarter = Period(freq='Min', year=2007, month=3, day=31, + hour=23, minute=59) + ival_T_end_of_month = Period(freq='Min', year=2007, month=1, day=31, + hour=23, minute=59) + ival_T_end_of_week = Period(freq='Min', year=2007, month=1, day=7, + hour=23, minute=59) + ival_T_end_of_day = Period(freq='Min', year=2007, month=1, day=1, + hour=23, minute=59) + ival_T_end_of_bus = Period(freq='Min', year=2007, month=1, day=1, + hour=23, minute=59) + ival_T_end_of_hour = Period(freq='Min', year=2007, month=1, day=1, + hour=0, minute=59) + + ival_T_to_A = Period(freq='A', year=2007) + ival_T_to_Q = Period(freq='Q', year=2007, quarter=1) + ival_T_to_M = Period(freq='M', year=2007, month=1) + ival_T_to_W = Period(freq='W', year=2007, month=1, day=7) + ival_T_to_D = Period(freq='D', year=2007, month=1, day=1) + ival_T_to_B = Period(freq='B', year=2007, month=1, day=1) + ival_T_to_H = Period(freq='H', year=2007, month=1, day=1, hour=0) + + ival_T_to_S_start = Period(freq='S', year=2007, month=1, day=1, hour=0, + minute=0, second=0) + ival_T_to_S_end = Period(freq='S', year=2007, month=1, day=1, hour=0, + minute=0, second=59) + + self.assertEqual(ival_T.asfreq('A'), ival_T_to_A) + self.assertEqual(ival_T_end_of_year.asfreq('A'), ival_T_to_A) + self.assertEqual(ival_T.asfreq('Q'), ival_T_to_Q) + self.assertEqual(ival_T_end_of_quarter.asfreq('Q'), ival_T_to_Q) + self.assertEqual(ival_T.asfreq('M'), ival_T_to_M) + self.assertEqual(ival_T_end_of_month.asfreq('M'), ival_T_to_M) + self.assertEqual(ival_T.asfreq('W'), ival_T_to_W) + self.assertEqual(ival_T_end_of_week.asfreq('W'), ival_T_to_W) + self.assertEqual(ival_T.asfreq('D'), ival_T_to_D) + self.assertEqual(ival_T_end_of_day.asfreq('D'), ival_T_to_D) + self.assertEqual(ival_T.asfreq('B'), ival_T_to_B) + self.assertEqual(ival_T_end_of_bus.asfreq('B'), ival_T_to_B) + self.assertEqual(ival_T.asfreq('H'), ival_T_to_H) + self.assertEqual(ival_T_end_of_hour.asfreq('H'), ival_T_to_H) + + self.assertEqual(ival_T.asfreq('S', 'S'), ival_T_to_S_start) + self.assertEqual(ival_T.asfreq('S', 'E'), ival_T_to_S_end) + + self.assertEqual(ival_T.asfreq('Min'), ival_T) + + def test_conv_secondly(self): + # frequency conversion tests: from Secondly Frequency" + + ival_S = Period(freq='S', year=2007, month=1, day=1, hour=0, minute=0, + second=0) + ival_S_end_of_year = Period(freq='S', year=2007, month=12, day=31, + hour=23, minute=59, second=59) + ival_S_end_of_quarter = Period(freq='S', year=2007, month=3, day=31, + hour=23, minute=59, second=59) + ival_S_end_of_month = Period(freq='S', year=2007, month=1, day=31, + hour=23, minute=59, second=59) + ival_S_end_of_week = Period(freq='S', year=2007, month=1, day=7, + hour=23, minute=59, second=59) + ival_S_end_of_day = Period(freq='S', year=2007, month=1, day=1, + hour=23, minute=59, second=59) + ival_S_end_of_bus = Period(freq='S', year=2007, month=1, day=1, + hour=23, minute=59, second=59) + ival_S_end_of_hour = Period(freq='S', year=2007, month=1, day=1, + hour=0, minute=59, second=59) + ival_S_end_of_minute = Period(freq='S', year=2007, month=1, day=1, + hour=0, minute=0, second=59) + + ival_S_to_A = Period(freq='A', year=2007) + ival_S_to_Q = Period(freq='Q', year=2007, quarter=1) + ival_S_to_M = Period(freq='M', year=2007, month=1) + ival_S_to_W = Period(freq='W', year=2007, month=1, day=7) + ival_S_to_D = Period(freq='D', year=2007, month=1, day=1) + ival_S_to_B = Period(freq='B', year=2007, month=1, day=1) + ival_S_to_H = Period(freq='H', year=2007, month=1, day=1, hour=0) + ival_S_to_T = Period(freq='Min', year=2007, month=1, day=1, hour=0, + minute=0) + + self.assertEqual(ival_S.asfreq('A'), ival_S_to_A) + self.assertEqual(ival_S_end_of_year.asfreq('A'), ival_S_to_A) + self.assertEqual(ival_S.asfreq('Q'), ival_S_to_Q) + self.assertEqual(ival_S_end_of_quarter.asfreq('Q'), ival_S_to_Q) + self.assertEqual(ival_S.asfreq('M'), ival_S_to_M) + self.assertEqual(ival_S_end_of_month.asfreq('M'), ival_S_to_M) + self.assertEqual(ival_S.asfreq('W'), ival_S_to_W) + self.assertEqual(ival_S_end_of_week.asfreq('W'), ival_S_to_W) + self.assertEqual(ival_S.asfreq('D'), ival_S_to_D) + self.assertEqual(ival_S_end_of_day.asfreq('D'), ival_S_to_D) + self.assertEqual(ival_S.asfreq('B'), ival_S_to_B) + self.assertEqual(ival_S_end_of_bus.asfreq('B'), ival_S_to_B) + self.assertEqual(ival_S.asfreq('H'), ival_S_to_H) + self.assertEqual(ival_S_end_of_hour.asfreq('H'), ival_S_to_H) + self.assertEqual(ival_S.asfreq('Min'), ival_S_to_T) + self.assertEqual(ival_S_end_of_minute.asfreq('Min'), ival_S_to_T) + + self.assertEqual(ival_S.asfreq('S'), ival_S) + + def test_asfreq_mult(self): + # normal freq to mult freq + p = Period(freq='A', year=2007) + # ordinal will not change + for freq in ['3A', offsets.YearEnd(3)]: + result = p.asfreq(freq) + expected = Period('2007', freq='3A') + + self.assertEqual(result, expected) + self.assertEqual(result.ordinal, expected.ordinal) + self.assertEqual(result.freq, expected.freq) + # ordinal will not change + for freq in ['3A', offsets.YearEnd(3)]: + result = p.asfreq(freq, how='S') + expected = Period('2007', freq='3A') + + self.assertEqual(result, expected) + self.assertEqual(result.ordinal, expected.ordinal) + self.assertEqual(result.freq, expected.freq) + + # mult freq to normal freq + p = Period(freq='3A', year=2007) + # ordinal will change because how=E is the default + for freq in ['A', offsets.YearEnd()]: + result = p.asfreq(freq) + expected = Period('2009', freq='A') + + self.assertEqual(result, expected) + self.assertEqual(result.ordinal, expected.ordinal) + self.assertEqual(result.freq, expected.freq) + # ordinal will not change + for freq in ['A', offsets.YearEnd()]: + result = p.asfreq(freq, how='S') + expected = Period('2007', freq='A') + + self.assertEqual(result, expected) + self.assertEqual(result.ordinal, expected.ordinal) + self.assertEqual(result.freq, expected.freq) + + p = Period(freq='A', year=2007) + for freq in ['2M', offsets.MonthEnd(2)]: + result = p.asfreq(freq) + expected = Period('2007-12', freq='2M') + + self.assertEqual(result, expected) + self.assertEqual(result.ordinal, expected.ordinal) + self.assertEqual(result.freq, expected.freq) + for freq in ['2M', offsets.MonthEnd(2)]: + result = p.asfreq(freq, how='S') + expected = Period('2007-01', freq='2M') + + self.assertEqual(result, expected) + self.assertEqual(result.ordinal, expected.ordinal) + self.assertEqual(result.freq, expected.freq) + + p = Period(freq='3A', year=2007) + for freq in ['2M', offsets.MonthEnd(2)]: + result = p.asfreq(freq) + expected = Period('2009-12', freq='2M') + + self.assertEqual(result, expected) + self.assertEqual(result.ordinal, expected.ordinal) + self.assertEqual(result.freq, expected.freq) + for freq in ['2M', offsets.MonthEnd(2)]: + result = p.asfreq(freq, how='S') + expected = Period('2007-01', freq='2M') + + self.assertEqual(result, expected) + self.assertEqual(result.ordinal, expected.ordinal) + self.assertEqual(result.freq, expected.freq) + + def test_asfreq_combined(self): + # normal freq to combined freq + p = Period('2007', freq='H') + + # ordinal will not change + expected = Period('2007', freq='25H') + for freq, how in zip(['1D1H', '1H1D'], ['E', 'S']): + result = p.asfreq(freq, how=how) + self.assertEqual(result, expected) + self.assertEqual(result.ordinal, expected.ordinal) + self.assertEqual(result.freq, expected.freq) + + # combined freq to normal freq + p1 = Period(freq='1D1H', year=2007) + p2 = Period(freq='1H1D', year=2007) + + # ordinal will change because how=E is the default + result1 = p1.asfreq('H') + result2 = p2.asfreq('H') + expected = Period('2007-01-02', freq='H') + self.assertEqual(result1, expected) + self.assertEqual(result1.ordinal, expected.ordinal) + self.assertEqual(result1.freq, expected.freq) + self.assertEqual(result2, expected) + self.assertEqual(result2.ordinal, expected.ordinal) + self.assertEqual(result2.freq, expected.freq) + + # ordinal will not change + result1 = p1.asfreq('H', how='S') + result2 = p2.asfreq('H', how='S') + expected = Period('2007-01-01', freq='H') + self.assertEqual(result1, expected) + self.assertEqual(result1.ordinal, expected.ordinal) + self.assertEqual(result1.freq, expected.freq) + self.assertEqual(result2, expected) + self.assertEqual(result2.ordinal, expected.ordinal) + self.assertEqual(result2.freq, expected.freq) + + def test_asfreq_MS(self): + initial = Period("2013") + + self.assertEqual(initial.asfreq(freq="M", how="S"), + Period('2013-01', 'M')) + + msg = pd.tseries.frequencies._INVALID_FREQ_ERROR + with self.assertRaisesRegexp(ValueError, msg): + initial.asfreq(freq="MS", how="S") + + with tm.assertRaisesRegexp(ValueError, msg): + pd.Period('2013-01', 'MS') + + self.assertTrue(_period_code_map.get("MS") is None) From c1bd201b270f162ec40229b79493524bcf4734ac Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Thu, 9 Feb 2017 11:42:15 -0500 Subject: [PATCH 026/933] DEPR: remove statsmodels as a dependency remove pd.ols, pd.fama_macbeth from top-level namespace xref #11898 closes https://github.com/pandas-dev/pandas2/issues/26 previously deprecated in 0.18.0 Author: Jeff Reback Closes #15353 from jreback/stats and squashes the following commits: 9563740 [Jeff Reback] DEPR: remove statsmodels as a dependency --- ci/requirements-2.7.pip | 1 - ci/requirements-2.7_COMPAT.run | 1 - ci/requirements-2.7_LOCALE.run | 1 - ci/requirements-2.7_SLOW.run | 1 - ci/requirements-3.4_SLOW.run | 1 - doc/source/whatsnew/v0.20.0.txt | 2 +- pandas/api/tests/test_api.py | 4 +- pandas/stats/api.py | 4 - pandas/stats/common.py | 45 - pandas/stats/fama_macbeth.py | 241 ---- pandas/stats/interface.py | 143 --- pandas/stats/math.py | 130 --- pandas/stats/misc.py | 389 ------- pandas/stats/ols.py | 1377 ----------------------- pandas/stats/plm.py | 863 -------------- pandas/stats/tests/__init__.py | 0 pandas/stats/tests/common.py | 162 --- pandas/stats/tests/test_fama_macbeth.py | 68 -- pandas/stats/tests/test_math.py | 59 - pandas/stats/tests/test_ols.py | 968 ---------------- pandas/stats/tests/test_var.py | 94 -- pandas/stats/var.py | 605 ---------- pandas/util/print_versions.py | 1 - setup.py | 1 - 24 files changed, 3 insertions(+), 5158 deletions(-) delete mode 100644 pandas/stats/common.py delete mode 100644 pandas/stats/fama_macbeth.py delete mode 100644 pandas/stats/interface.py delete mode 100644 pandas/stats/math.py delete mode 100644 pandas/stats/misc.py delete mode 100644 pandas/stats/ols.py delete mode 100644 pandas/stats/plm.py delete mode 100644 pandas/stats/tests/__init__.py delete mode 100644 pandas/stats/tests/common.py delete mode 100644 pandas/stats/tests/test_fama_macbeth.py delete mode 100644 pandas/stats/tests/test_math.py delete mode 100644 pandas/stats/tests/test_ols.py delete mode 100644 pandas/stats/tests/test_var.py delete mode 100644 pandas/stats/var.py diff --git a/ci/requirements-2.7.pip b/ci/requirements-2.7.pip index d7266fe88fb32..d16b932c8be4f 100644 --- a/ci/requirements-2.7.pip +++ b/ci/requirements-2.7.pip @@ -1,4 +1,3 @@ -statsmodels blosc httplib2 google-api-python-client==1.2 diff --git a/ci/requirements-2.7_COMPAT.run b/ci/requirements-2.7_COMPAT.run index 32d71beb24388..d27b6a72c2d15 100644 --- a/ci/requirements-2.7_COMPAT.run +++ b/ci/requirements-2.7_COMPAT.run @@ -4,7 +4,6 @@ pytz=2013b scipy=0.11.0 xlwt=0.7.5 xlrd=0.9.2 -statsmodels=0.4.3 bottleneck=0.8.0 numexpr=2.2.2 pytables=3.0.0 diff --git a/ci/requirements-2.7_LOCALE.run b/ci/requirements-2.7_LOCALE.run index 9bb37ee10f8db..1a9b42d832b0b 100644 --- a/ci/requirements-2.7_LOCALE.run +++ b/ci/requirements-2.7_LOCALE.run @@ -13,5 +13,4 @@ html5lib=1.0b2 lxml=3.2.1 scipy=0.11.0 beautiful-soup=4.2.1 -statsmodels=0.4.3 bigquery=2.0.17 diff --git a/ci/requirements-2.7_SLOW.run b/ci/requirements-2.7_SLOW.run index 630d22636f284..c2d2a14285ad6 100644 --- a/ci/requirements-2.7_SLOW.run +++ b/ci/requirements-2.7_SLOW.run @@ -4,7 +4,6 @@ numpy=1.8.2 matplotlib=1.3.1 scipy patsy -statsmodels xlwt openpyxl xlsxwriter diff --git a/ci/requirements-3.4_SLOW.run b/ci/requirements-3.4_SLOW.run index 215f840381ada..39018439a1223 100644 --- a/ci/requirements-3.4_SLOW.run +++ b/ci/requirements-3.4_SLOW.run @@ -17,5 +17,4 @@ sqlalchemy bottleneck pymysql psycopg2 -statsmodels jinja2=2.8 diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 9afcf85c929a7..3fb6f7b0b9a91 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -396,7 +396,7 @@ Removal of prior version deprecations/changes - The ``pandas.io.ga`` module with a ``google-analytics`` interface is removed (:issue:`11308`). Similar functionality can be found in the `Google2Pandas `__ package. - ``pd.to_datetime`` and ``pd.to_timedelta`` have dropped the ``coerce`` parameter in favor of ``errors`` (:issue:`13602`) - +- ``pandas.stats.fama_macbeth``, ``pandas.stats.ols``, ``pandas.stats.plm`` and ``pandas.stats.var``, as well as the top-level ``pandas.fama_macbeth`` and ``pandas.ols`` routines are removed. Similar functionaility can be found in the `statsmodels `__ package. (:issue:`11898`) diff --git a/pandas/api/tests/test_api.py b/pandas/api/tests/test_api.py index 02165d82d4232..a53f6103b408b 100644 --- a/pandas/api/tests/test_api.py +++ b/pandas/api/tests/test_api.py @@ -42,7 +42,7 @@ class TestPDApi(Base, tm.TestCase): 'json', 'lib', 'index', 'parser'] # these are already deprecated; awaiting removal - deprecated_modules = ['ols', 'stats', 'datetools'] + deprecated_modules = ['stats', 'datetools'] # misc misc = ['IndexSlice', 'NaT'] @@ -109,7 +109,7 @@ class TestPDApi(Base, tm.TestCase): 'expanding_max', 'expanding_mean', 'expanding_median', 'expanding_min', 'expanding_quantile', 'expanding_skew', 'expanding_std', 'expanding_sum', - 'expanding_var', 'fama_macbeth', 'rolling_apply', + 'expanding_var', 'rolling_apply', 'rolling_corr', 'rolling_count', 'rolling_cov', 'rolling_kurt', 'rolling_max', 'rolling_mean', 'rolling_median', 'rolling_min', 'rolling_quantile', diff --git a/pandas/stats/api.py b/pandas/stats/api.py index fd81b875faa91..2a11456d4f9e5 100644 --- a/pandas/stats/api.py +++ b/pandas/stats/api.py @@ -2,10 +2,6 @@ Common namespace of statistical functions """ -# pylint: disable-msg=W0611,W0614,W0401 - # flake8: noqa from pandas.stats.moments import * -from pandas.stats.interface import ols -from pandas.stats.fama_macbeth import fama_macbeth diff --git a/pandas/stats/common.py b/pandas/stats/common.py deleted file mode 100644 index be3b842e93cc8..0000000000000 --- a/pandas/stats/common.py +++ /dev/null @@ -1,45 +0,0 @@ - -_WINDOW_TYPES = { - 0: 'full_sample', - 1: 'rolling', - 2: 'expanding' -} -# also allow 'rolling' as key -_WINDOW_TYPES.update((v, v) for k, v in list(_WINDOW_TYPES.items())) -_ADDITIONAL_CLUSTER_TYPES = set(("entity", "time")) - - -def _get_cluster_type(cluster_type): - # this was previous behavior - if cluster_type is None: - return cluster_type - try: - return _get_window_type(cluster_type) - except ValueError: - final_type = str(cluster_type).lower().replace("_", " ") - if final_type in _ADDITIONAL_CLUSTER_TYPES: - return final_type - raise ValueError('Unrecognized cluster type: %s' % cluster_type) - - -def _get_window_type(window_type): - # e.g., 0, 1, 2 - final_type = _WINDOW_TYPES.get(window_type) - # e.g., 'full_sample' - final_type = final_type or _WINDOW_TYPES.get( - str(window_type).lower().replace(" ", "_")) - if final_type is None: - raise ValueError('Unrecognized window type: %s' % window_type) - return final_type - - -def banner(text, width=80): - """ - - """ - toFill = width - len(text) - - left = toFill // 2 - right = toFill - left - - return '%s%s%s' % ('-' * left, text, '-' * right) diff --git a/pandas/stats/fama_macbeth.py b/pandas/stats/fama_macbeth.py deleted file mode 100644 index d564f9cb6c425..0000000000000 --- a/pandas/stats/fama_macbeth.py +++ /dev/null @@ -1,241 +0,0 @@ -from pandas.core.base import StringMixin -from pandas.compat import StringIO, range - -import numpy as np - -from pandas.core.api import Series, DataFrame -import pandas.stats.common as common -from pandas.util.decorators import cache_readonly - -# flake8: noqa - - -def fama_macbeth(**kwargs): - """Runs Fama-MacBeth regression. - - Parameters - ---------- - Takes the same arguments as a panel OLS, in addition to: - - nw_lags_beta: int - Newey-West adjusts the betas by the given lags - """ - window_type = kwargs.get('window_type') - if window_type is None: - klass = FamaMacBeth - else: - klass = MovingFamaMacBeth - - return klass(**kwargs) - - -class FamaMacBeth(StringMixin): - - def __init__(self, y, x, intercept=True, nw_lags=None, - nw_lags_beta=None, - entity_effects=False, time_effects=False, x_effects=None, - cluster=None, dropped_dummies=None, verbose=False): - import warnings - warnings.warn("The pandas.stats.fama_macbeth module is deprecated and will be " - "removed in a future version. We refer to external packages " - "like statsmodels, see here: " - "http://www.statsmodels.org/stable/index.html", - FutureWarning, stacklevel=4) - - if dropped_dummies is None: - dropped_dummies = {} - self._nw_lags_beta = nw_lags_beta - - from pandas.stats.plm import MovingPanelOLS - self._ols_result = MovingPanelOLS( - y=y, x=x, window_type='rolling', window=1, - intercept=intercept, - nw_lags=nw_lags, entity_effects=entity_effects, - time_effects=time_effects, x_effects=x_effects, cluster=cluster, - dropped_dummies=dropped_dummies, verbose=verbose) - - self._cols = self._ols_result._x.columns - - @cache_readonly - def _beta_raw(self): - return self._ols_result._beta_raw - - @cache_readonly - def _stats(self): - return _calc_t_stat(self._beta_raw, self._nw_lags_beta) - - @cache_readonly - def _mean_beta_raw(self): - return self._stats[0] - - @cache_readonly - def _std_beta_raw(self): - return self._stats[1] - - @cache_readonly - def _t_stat_raw(self): - return self._stats[2] - - def _make_result(self, result): - return Series(result, index=self._cols) - - @cache_readonly - def mean_beta(self): - return self._make_result(self._mean_beta_raw) - - @cache_readonly - def std_beta(self): - return self._make_result(self._std_beta_raw) - - @cache_readonly - def t_stat(self): - return self._make_result(self._t_stat_raw) - - @cache_readonly - def _results(self): - return { - 'mean_beta': self._mean_beta_raw, - 'std_beta': self._std_beta_raw, - 't_stat': self._t_stat_raw, - } - - @cache_readonly - def _coef_table(self): - buffer = StringIO() - buffer.write('%13s %13s %13s %13s %13s %13s\n' % - ('Variable', 'Beta', 'Std Err', 't-stat', 'CI 2.5%', 'CI 97.5%')) - template = '%13s %13.4f %13.4f %13.2f %13.4f %13.4f\n' - - for i, name in enumerate(self._cols): - if i and not (i % 5): - buffer.write('\n' + common.banner('')) - - mean_beta = self._results['mean_beta'][i] - std_beta = self._results['std_beta'][i] - t_stat = self._results['t_stat'][i] - ci1 = mean_beta - 1.96 * std_beta - ci2 = mean_beta + 1.96 * std_beta - - values = '(%s)' % name, mean_beta, std_beta, t_stat, ci1, ci2 - - buffer.write(template % values) - - if self._nw_lags_beta is not None: - buffer.write('\n') - buffer.write('*** The Std Err, t-stat are Newey-West ' - 'adjusted with Lags %5d\n' % self._nw_lags_beta) - - return buffer.getvalue() - - def __unicode__(self): - return self.summary - - @cache_readonly - def summary(self): - template = """ -----------------------Summary of Fama-MacBeth Analysis------------------------- - -Formula: Y ~ %(formulaRHS)s -# betas : %(nu)3d - -----------------------Summary of Estimated Coefficients------------------------ -%(coefTable)s ---------------------------------End of Summary--------------------------------- -""" - params = { - 'formulaRHS': ' + '.join(self._cols), - 'nu': len(self._beta_raw), - 'coefTable': self._coef_table, - } - - return template % params - - -class MovingFamaMacBeth(FamaMacBeth): - - def __init__(self, y, x, window_type='rolling', window=10, - intercept=True, nw_lags=None, nw_lags_beta=None, - entity_effects=False, time_effects=False, x_effects=None, - cluster=None, dropped_dummies=None, verbose=False): - if dropped_dummies is None: - dropped_dummies = {} - self._window_type = common._get_window_type(window_type) - self._window = window - - FamaMacBeth.__init__( - self, y=y, x=x, intercept=intercept, - nw_lags=nw_lags, nw_lags_beta=nw_lags_beta, - entity_effects=entity_effects, time_effects=time_effects, - x_effects=x_effects, cluster=cluster, - dropped_dummies=dropped_dummies, verbose=verbose) - - self._index = self._ols_result._index - self._T = len(self._index) - - @property - def _is_rolling(self): - return self._window_type == 'rolling' - - def _calc_stats(self): - mean_betas = [] - std_betas = [] - t_stats = [] - - # XXX - - mask = self._ols_result._rolling_ols_call[2] - obs_total = mask.astype(int).cumsum() - - start = self._window - 1 - betas = self._beta_raw - for i in range(start, self._T): - if self._is_rolling: - begin = i - start - else: - begin = 0 - - B = betas[max(obs_total[begin] - 1, 0): obs_total[i]] - mean_beta, std_beta, t_stat = _calc_t_stat(B, self._nw_lags_beta) - mean_betas.append(mean_beta) - std_betas.append(std_beta) - t_stats.append(t_stat) - - return np.array([mean_betas, std_betas, t_stats]) - - _stats = cache_readonly(_calc_stats) - - def _make_result(self, result): - return DataFrame(result, index=self._result_index, columns=self._cols) - - @cache_readonly - def _result_index(self): - mask = self._ols_result._rolling_ols_call[2] - # HACK XXX - return self._index[mask.cumsum() >= self._window] - - @cache_readonly - def _results(self): - return { - 'mean_beta': self._mean_beta_raw[-1], - 'std_beta': self._std_beta_raw[-1], - 't_stat': self._t_stat_raw[-1], - } - - -def _calc_t_stat(beta, nw_lags_beta): - N = len(beta) - B = beta - beta.mean(0) - C = np.dot(B.T, B) / N - - if nw_lags_beta is not None: - for i in range(nw_lags_beta + 1): - - cov = np.dot(B[i:].T, B[:(N - i)]) / N - weight = i / (nw_lags_beta + 1) - C += 2 * (1 - weight) * cov - - mean_beta = beta.mean(0) - std_beta = np.sqrt(np.diag(C)) / np.sqrt(N) - t_stat = mean_beta / std_beta - - return mean_beta, std_beta, t_stat diff --git a/pandas/stats/interface.py b/pandas/stats/interface.py deleted file mode 100644 index caf468b4f85fe..0000000000000 --- a/pandas/stats/interface.py +++ /dev/null @@ -1,143 +0,0 @@ -from pandas.core.api import Series, DataFrame, Panel, MultiIndex -from pandas.stats.ols import OLS, MovingOLS -from pandas.stats.plm import PanelOLS, MovingPanelOLS, NonPooledPanelOLS -import pandas.stats.common as common - - -def ols(**kwargs): - """Returns the appropriate OLS object depending on whether you need - simple or panel OLS, and a full-sample or rolling/expanding OLS. - - Will be a normal linear regression or a (pooled) panel regression depending - on the type of the inputs: - - y : Series, x : DataFrame -> OLS - y : Series, x : dict of DataFrame -> OLS - y : DataFrame, x : DataFrame -> PanelOLS - y : DataFrame, x : dict of DataFrame/Panel -> PanelOLS - y : Series with MultiIndex, x : Panel/DataFrame + MultiIndex -> PanelOLS - - Parameters - ---------- - y: Series or DataFrame - See above for types - x: Series, DataFrame, dict of Series, dict of DataFrame, Panel - weights : Series or ndarray - The weights are presumed to be (proportional to) the inverse of the - variance of the observations. That is, if the variables are to be - transformed by 1/sqrt(W) you must supply weights = 1/W - intercept: bool - True if you want an intercept. Defaults to True. - nw_lags: None or int - Number of Newey-West lags. Defaults to None. - nw_overlap: bool - Whether there are overlaps in the NW lags. Defaults to False. - window_type: {'full sample', 'rolling', 'expanding'} - 'full sample' by default - window: int - size of window (for rolling/expanding OLS). If window passed and no - explicit window_type, 'rolling" will be used as the window_type - - Panel OLS options: - pool: bool - Whether to run pooled panel regression. Defaults to true. - entity_effects: bool - Whether to account for entity fixed effects. Defaults to false. - time_effects: bool - Whether to account for time fixed effects. Defaults to false. - x_effects: list - List of x's to account for fixed effects. Defaults to none. - dropped_dummies: dict - Key is the name of the variable for the fixed effect. - Value is the value of that variable for which we drop the dummy. - - For entity fixed effects, key equals 'entity'. - - By default, the first dummy is dropped if no dummy is specified. - cluster: {'time', 'entity'} - cluster variances - - Examples - -------- - # Run simple OLS. - result = ols(y=y, x=x) - - # Run rolling simple OLS with window of size 10. - result = ols(y=y, x=x, window_type='rolling', window=10) - print(result.beta) - - result = ols(y=y, x=x, nw_lags=1) - - # Set up LHS and RHS for data across all items - y = A - x = {'B' : B, 'C' : C} - - # Run panel OLS. - result = ols(y=y, x=x) - - # Run expanding panel OLS with window 10 and entity clustering. - result = ols(y=y, x=x, cluster='entity', window_type='expanding', - window=10) - - Returns - ------- - The appropriate OLS object, which allows you to obtain betas and various - statistics, such as std err, t-stat, etc. - """ - - if (kwargs.get('cluster') is not None and - kwargs.get('nw_lags') is not None): - raise ValueError( - 'Pandas OLS does not work with Newey-West correction ' - 'and clustering.') - - pool = kwargs.get('pool') - if 'pool' in kwargs: - del kwargs['pool'] - - window_type = kwargs.get('window_type') - window = kwargs.get('window') - - if window_type is None: - if window is None: - window_type = 'full_sample' - else: - window_type = 'rolling' - else: - window_type = common._get_window_type(window_type) - - if window_type != 'full_sample': - kwargs['window_type'] = common._get_window_type(window_type) - - y = kwargs.get('y') - x = kwargs.get('x') - - panel = False - if isinstance(y, DataFrame) or (isinstance(y, Series) and - isinstance(y.index, MultiIndex)): - panel = True - if isinstance(x, Panel): - panel = True - - if window_type == 'full_sample': - for rolling_field in ('window_type', 'window', 'min_periods'): - if rolling_field in kwargs: - del kwargs[rolling_field] - - if panel: - if pool is False: - klass = NonPooledPanelOLS - else: - klass = PanelOLS - else: - klass = OLS - else: - if panel: - if pool is False: - klass = NonPooledPanelOLS - else: - klass = MovingPanelOLS - else: - klass = MovingOLS - - return klass(**kwargs) diff --git a/pandas/stats/math.py b/pandas/stats/math.py deleted file mode 100644 index 505415bebf89e..0000000000000 --- a/pandas/stats/math.py +++ /dev/null @@ -1,130 +0,0 @@ -# pylint: disable-msg=E1103 -# pylint: disable-msg=W0212 - -from __future__ import division - -from pandas.compat import range -import numpy as np -import numpy.linalg as linalg - - -def rank(X, cond=1.0e-12): - """ - Return the rank of a matrix X based on its generalized inverse, - not the SVD. - """ - X = np.asarray(X) - if len(X.shape) == 2: - import scipy.linalg as SL - D = SL.svdvals(X) - result = np.add.reduce(np.greater(D / D.max(), cond)) - return int(result.astype(np.int32)) - else: - return int(not np.alltrue(np.equal(X, 0.))) - - -def solve(a, b): - """Returns the solution of A X = B.""" - try: - return linalg.solve(a, b) - except linalg.LinAlgError: - return np.dot(linalg.pinv(a), b) - - -def inv(a): - """Returns the inverse of A.""" - try: - return np.linalg.inv(a) - except linalg.LinAlgError: - return np.linalg.pinv(a) - - -def is_psd(m): - eigvals = linalg.eigvals(m) - return np.isreal(eigvals).all() and (eigvals >= 0).all() - - -def newey_west(m, max_lags, nobs, df, nw_overlap=False): - """ - Compute Newey-West adjusted covariance matrix, taking into account - specified number of leads / lags - - Parameters - ---------- - m : (N x K) - max_lags : int - nobs : int - Number of observations in model - df : int - Degrees of freedom in explanatory variables - nw_overlap : boolean, default False - Assume data is overlapping - - Returns - ------- - ndarray (K x K) - - Reference - --------- - Newey, W. K. & West, K. D. (1987) A Simple, Positive - Semi-definite, Heteroskedasticity and Autocorrelation Consistent - Covariance Matrix, Econometrica, vol. 55(3), 703-708 - """ - Xeps = np.dot(m.T, m) - for lag in range(1, max_lags + 1): - auto_cov = np.dot(m[:-lag].T, m[lag:]) - weight = lag / (max_lags + 1) - if nw_overlap: - weight = 0 - bb = auto_cov + auto_cov.T - dd = (1 - weight) * bb - Xeps += dd - - Xeps *= nobs / (nobs - df) - - if nw_overlap and not is_psd(Xeps): - new_max_lags = int(np.ceil(max_lags * 1.5)) -# print('nw_overlap is True and newey_west generated a non positive ' -# 'semidefinite matrix, so using newey_west with max_lags of %d.' -# % new_max_lags) - return newey_west(m, new_max_lags, nobs, df) - - return Xeps - - -def calc_F(R, r, beta, var_beta, nobs, df): - """ - Computes the standard F-test statistic for linear restriction - hypothesis testing - - Parameters - ---------- - R: ndarray (N x N) - Restriction matrix - r: ndarray (N x 1) - Restriction vector - beta: ndarray (N x 1) - Estimated model coefficients - var_beta: ndarray (N x N) - Variance covariance matrix of regressors - nobs: int - Number of observations in model - df: int - Model degrees of freedom - - Returns - ------- - F value, (q, df_resid), p value - """ - from scipy.stats import f - - hyp = np.dot(R, beta.reshape(len(beta), 1)) - r - RSR = np.dot(R, np.dot(var_beta, R.T)) - - q = len(r) - - F = np.dot(hyp.T, np.dot(inv(RSR), hyp)).squeeze() / q - - p_value = 1 - f.cdf(F, q, nobs - df) - - return F, (q, nobs - df), p_value diff --git a/pandas/stats/misc.py b/pandas/stats/misc.py deleted file mode 100644 index 1a077dcb6f9a1..0000000000000 --- a/pandas/stats/misc.py +++ /dev/null @@ -1,389 +0,0 @@ -from numpy import NaN -from pandas import compat -import numpy as np - -from pandas.core.api import Series, DataFrame -from pandas.core.series import remove_na -from pandas.compat import zip, lrange -import pandas.core.common as com - - -def zscore(series): - return (series - series.mean()) / np.std(series, ddof=0) - - -def correl_ts(frame1, frame2): - """ - Pairwise correlation of columns of two DataFrame objects - - Parameters - ---------- - - Returns - ------- - y : Series - """ - results = {} - for col, series in compat.iteritems(frame1): - if col in frame2: - other = frame2[col] - - idx1 = series.valid().index - idx2 = other.valid().index - - common_index = idx1.intersection(idx2) - - seriesStand = zscore(series.reindex(common_index)) - otherStand = zscore(other.reindex(common_index)) - results[col] = (seriesStand * otherStand).mean() - - return Series(results) - - -def correl_xs(frame1, frame2): - return correl_ts(frame1.T, frame2.T) - - -def percentileofscore(a, score, kind='rank'): - """The percentile rank of a score relative to a list of scores. - - A `percentileofscore` of, for example, 80% means that 80% of the - scores in `a` are below the given score. In the case of gaps or - ties, the exact definition depends on the optional keyword, `kind`. - - Parameters - ---------- - a: array like - Array of scores to which `score` is compared. - score: int or float - Score that is compared to the elements in `a`. - kind: {'rank', 'weak', 'strict', 'mean'}, optional - This optional parameter specifies the interpretation of the - resulting score: - - - "rank": Average percentage ranking of score. In case of - multiple matches, average the percentage rankings of - all matching scores. - - "weak": This kind corresponds to the definition of a cumulative - distribution function. A percentileofscore of 80% - means that 80% of values are less than or equal - to the provided score. - - "strict": Similar to "weak", except that only values that are - strictly less than the given score are counted. - - "mean": The average of the "weak" and "strict" scores, often used in - testing. See - - http://en.wikipedia.org/wiki/Percentile_rank - - Returns - ------- - pcos : float - Percentile-position of score (0-100) relative to `a`. - - Examples - -------- - Three-quarters of the given values lie below a given score: - - >>> percentileofscore([1, 2, 3, 4], 3) - 75.0 - - With multiple matches, note how the scores of the two matches, 0.6 - and 0.8 respectively, are averaged: - - >>> percentileofscore([1, 2, 3, 3, 4], 3) - 70.0 - - Only 2/5 values are strictly less than 3: - - >>> percentileofscore([1, 2, 3, 3, 4], 3, kind='strict') - 40.0 - - But 4/5 values are less than or equal to 3: - - >>> percentileofscore([1, 2, 3, 3, 4], 3, kind='weak') - 80.0 - - The average between the weak and the strict scores is - - >>> percentileofscore([1, 2, 3, 3, 4], 3, kind='mean') - 60.0 - - """ - a = np.array(a) - n = len(a) - - if kind == 'rank': - if not(np.any(a == score)): - a = np.append(a, score) - a_len = np.array(lrange(len(a))) - else: - a_len = np.array(lrange(len(a))) + 1.0 - - a = np.sort(a) - idx = [a == score] - pct = (np.mean(a_len[idx]) / n) * 100.0 - return pct - - elif kind == 'strict': - return sum(a < score) / float(n) * 100 - elif kind == 'weak': - return sum(a <= score) / float(n) * 100 - elif kind == 'mean': - return (sum(a < score) + sum(a <= score)) * 50 / float(n) - else: - raise ValueError("kind can only be 'rank', 'strict', 'weak' or 'mean'") - - -def percentileRank(frame, column=None, kind='mean'): - """ - Return score at percentile for each point in time (cross-section) - - Parameters - ---------- - frame: DataFrame - column: string or Series, optional - Column name or specific Series to compute percentiles for. - If not provided, percentiles are computed for all values at each - point in time. Note that this can take a LONG time. - kind: {'rank', 'weak', 'strict', 'mean'}, optional - This optional parameter specifies the interpretation of the - resulting score: - - - "rank": Average percentage ranking of score. In case of - multiple matches, average the percentage rankings of - all matching scores. - - "weak": This kind corresponds to the definition of a cumulative - distribution function. A percentileofscore of 80% - means that 80% of values are less than or equal - to the provided score. - - "strict": Similar to "weak", except that only values that are - strictly less than the given score are counted. - - "mean": The average of the "weak" and "strict" scores, often used in - testing. See - - http://en.wikipedia.org/wiki/Percentile_rank - - Returns - ------- - TimeSeries or DataFrame, depending on input - """ - fun = lambda xs, score: percentileofscore(remove_na(xs), - score, kind=kind) - - results = {} - framet = frame.T - if column is not None: - if isinstance(column, Series): - for date, xs in compat.iteritems(frame.T): - results[date] = fun(xs, column.get(date, NaN)) - else: - for date, xs in compat.iteritems(frame.T): - results[date] = fun(xs, xs[column]) - results = Series(results) - else: - for column in frame.columns: - for date, xs in compat.iteritems(framet): - results.setdefault(date, {})[column] = fun(xs, xs[column]) - results = DataFrame(results).T - return results - - -def bucket(series, k, by=None): - """ - Produce DataFrame representing quantiles of a Series - - Parameters - ---------- - series : Series - k : int - number of quantiles - by : Series or same-length array - bucket by value - - Returns - ------- - DataFrame - """ - if by is None: - by = series - else: - by = by.reindex(series.index) - - split = _split_quantile(by, k) - mat = np.empty((len(series), k), dtype=float) * np.NaN - - for i, v in enumerate(split): - mat[:, i][v] = series.take(v) - - return DataFrame(mat, index=series.index, columns=np.arange(k) + 1) - - -def _split_quantile(arr, k): - arr = np.asarray(arr) - mask = np.isfinite(arr) - order = arr[mask].argsort() - n = len(arr) - - return np.array_split(np.arange(n)[mask].take(order), k) - - -def bucketcat(series, cats): - """ - Produce DataFrame representing quantiles of a Series - - Parameters - ---------- - series : Series - cat : Series or same-length array - bucket by category; mutually exclusive with 'by' - - Returns - ------- - DataFrame - """ - if not isinstance(series, Series): - series = Series(series, index=np.arange(len(series))) - - cats = np.asarray(cats) - - unique_labels = np.unique(cats) - unique_labels = unique_labels[com.notnull(unique_labels)] - - # group by - data = {} - - for label in unique_labels: - data[label] = series[cats == label] - - return DataFrame(data, columns=unique_labels) - - -def bucketpanel(series, bins=None, by=None, cat=None): - """ - Bucket data by two Series to create summary panel - - Parameters - ---------- - series : Series - bins : tuple (length-2) - e.g. (2, 2) - by : tuple of Series - bucket by value - cat : tuple of Series - bucket by category; mutually exclusive with 'by' - - Returns - ------- - DataFrame - """ - use_by = by is not None - use_cat = cat is not None - - if use_by and use_cat: - raise Exception('must specify by or cat, but not both') - elif use_by: - if len(by) != 2: - raise Exception('must provide two bucketing series') - - xby, yby = by - xbins, ybins = bins - - return _bucketpanel_by(series, xby, yby, xbins, ybins) - - elif use_cat: - xcat, ycat = cat - return _bucketpanel_cat(series, xcat, ycat) - else: - raise Exception('must specify either values or categories ' - 'to bucket by') - - -def _bucketpanel_by(series, xby, yby, xbins, ybins): - xby = xby.reindex(series.index) - yby = yby.reindex(series.index) - - xlabels = _bucket_labels(xby.reindex(series.index), xbins) - ylabels = _bucket_labels(yby.reindex(series.index), ybins) - - labels = _uniquify(xlabels, ylabels, xbins, ybins) - - mask = com.isnull(labels) - labels[mask] = -1 - - unique_labels = np.unique(labels) - bucketed = bucketcat(series, labels) - - _ulist = list(labels) - index_map = dict((x, _ulist.index(x)) for x in unique_labels) - - def relabel(key): - pos = index_map[key] - - xlab = xlabels[pos] - ylab = ylabels[pos] - - return '%sx%s' % (int(xlab) if com.notnull(xlab) else 'NULL', - int(ylab) if com.notnull(ylab) else 'NULL') - - return bucketed.rename(columns=relabel) - - -def _bucketpanel_cat(series, xcat, ycat): - xlabels, xmapping = _intern(xcat) - ylabels, ymapping = _intern(ycat) - - shift = 10 ** (np.ceil(np.log10(ylabels.max()))) - labels = xlabels * shift + ylabels - - sorter = labels.argsort() - sorted_labels = labels.take(sorter) - sorted_xlabels = xlabels.take(sorter) - sorted_ylabels = ylabels.take(sorter) - - unique_labels = np.unique(labels) - unique_labels = unique_labels[com.notnull(unique_labels)] - - locs = sorted_labels.searchsorted(unique_labels) - xkeys = sorted_xlabels.take(locs) - ykeys = sorted_ylabels.take(locs) - - stringified = ['(%s, %s)' % arg - for arg in zip(xmapping.take(xkeys), ymapping.take(ykeys))] - - result = bucketcat(series, labels) - result.columns = stringified - - return result - - -def _intern(values): - # assumed no NaN values - values = np.asarray(values) - - uniqued = np.unique(values) - labels = uniqued.searchsorted(values) - return labels, uniqued - - -def _uniquify(xlabels, ylabels, xbins, ybins): - # encode the stuff, create unique label - shifter = 10 ** max(xbins, ybins) - _xpiece = xlabels * shifter - _ypiece = ylabels - - return _xpiece + _ypiece - - -def _bucket_labels(series, k): - arr = np.asarray(series) - mask = np.isfinite(arr) - order = arr[mask].argsort() - n = len(series) - - split = np.array_split(np.arange(n)[mask].take(order), k) - - mat = np.empty(n, dtype=float) * np.NaN - for i, v in enumerate(split): - mat[v] = i - - return mat + 1 diff --git a/pandas/stats/ols.py b/pandas/stats/ols.py deleted file mode 100644 index 96ec70d59488a..0000000000000 --- a/pandas/stats/ols.py +++ /dev/null @@ -1,1377 +0,0 @@ -""" -Ordinary least squares regression -""" - -# pylint: disable-msg=W0201 - -# flake8: noqa - -from pandas.compat import zip, range, StringIO -from itertools import starmap -from pandas import compat -import numpy as np - -from pandas.core.api import DataFrame, Series, isnull -from pandas.core.base import StringMixin -from pandas.types.common import _ensure_float64 -from pandas.core.index import MultiIndex -from pandas.core.panel import Panel -from pandas.util.decorators import cache_readonly - -import pandas.stats.common as scom -import pandas.stats.math as math -import pandas.stats.moments as moments - -_FP_ERR = 1e-8 - - -class OLS(StringMixin): - """ - Runs a full sample ordinary least squares regression. - - Parameters - ---------- - y : Series - x : Series, DataFrame, dict of Series - intercept : bool - True if you want an intercept. - weights : array-like, optional - 1d array of weights. If you supply 1/W then the variables are pre- - multiplied by 1/sqrt(W). If no weights are supplied the default value - is 1 and WLS reults are the same as OLS. - nw_lags : None or int - Number of Newey-West lags. - nw_overlap : boolean, default False - Assume data is overlapping when computing Newey-West estimator - - """ - _panel_model = False - - def __init__(self, y, x, intercept=True, weights=None, nw_lags=None, - nw_overlap=False): - import warnings - warnings.warn("The pandas.stats.ols module is deprecated and will be " - "removed in a future version. We refer to external packages " - "like statsmodels, see some examples here: " - "http://www.statsmodels.org/stable/regression.html", - FutureWarning, stacklevel=4) - - try: - import statsmodels.api as sm - except ImportError: - import scikits.statsmodels.api as sm - - self._x_orig = x - self._y_orig = y - self._weights_orig = weights - self._intercept = intercept - self._nw_lags = nw_lags - self._nw_overlap = nw_overlap - - (self._y, self._x, self._weights, self._x_filtered, - self._index, self._time_has_obs) = self._prepare_data() - - if self._weights is not None: - self._x_trans = self._x.mul(np.sqrt(self._weights), axis=0) - self._y_trans = self._y * np.sqrt(self._weights) - self.sm_ols = sm.WLS(self._y.get_values(), - self._x.get_values(), - weights=self._weights.values).fit() - else: - self._x_trans = self._x - self._y_trans = self._y - self.sm_ols = sm.OLS(self._y.get_values(), - self._x.get_values()).fit() - - def _prepare_data(self): - """ - Cleans the input for single OLS. - - Parameters - ---------- - lhs: Series - Dependent variable in the regression. - rhs: dict, whose values are Series, DataFrame, or dict - Explanatory variables of the regression. - - Returns - ------- - Series, DataFrame - Cleaned lhs and rhs - """ - (filt_lhs, filt_rhs, filt_weights, - pre_filt_rhs, index, valid) = _filter_data(self._y_orig, self._x_orig, - self._weights_orig) - if self._intercept: - filt_rhs['intercept'] = 1. - pre_filt_rhs['intercept'] = 1. - - if hasattr(filt_weights, 'to_dense'): - filt_weights = filt_weights.to_dense() - - return (filt_lhs, filt_rhs, filt_weights, - pre_filt_rhs, index, valid) - - @property - def nobs(self): - return self._nobs - - @property - def _nobs(self): - return len(self._y) - - @property - def nw_lags(self): - return self._nw_lags - - @property - def x(self): - """Returns the filtered x used in the regression.""" - return self._x - - @property - def y(self): - """Returns the filtered y used in the regression.""" - return self._y - - @cache_readonly - def _beta_raw(self): - """Runs the regression and returns the beta.""" - return self.sm_ols.params - - @cache_readonly - def beta(self): - """Returns the betas in Series form.""" - return Series(self._beta_raw, index=self._x.columns) - - @cache_readonly - def _df_raw(self): - """Returns the degrees of freedom.""" - return math.rank(self._x.values) - - @cache_readonly - def df(self): - """Returns the degrees of freedom. - - This equals the rank of the X matrix. - """ - return self._df_raw - - @cache_readonly - def _df_model_raw(self): - """Returns the raw model degrees of freedom.""" - return self.sm_ols.df_model - - @cache_readonly - def df_model(self): - """Returns the degrees of freedom of the model.""" - return self._df_model_raw - - @cache_readonly - def _df_resid_raw(self): - """Returns the raw residual degrees of freedom.""" - return self.sm_ols.df_resid - - @cache_readonly - def df_resid(self): - """Returns the degrees of freedom of the residuals.""" - return self._df_resid_raw - - @cache_readonly - def _f_stat_raw(self): - """Returns the raw f-stat value.""" - from scipy.stats import f - - cols = self._x.columns - - if self._nw_lags is None: - F = self._r2_raw / (self._r2_raw - self._r2_adj_raw) - - q = len(cols) - if 'intercept' in cols: - q -= 1 - - shape = q, self.df_resid - p_value = 1 - f.cdf(F, shape[0], shape[1]) - return F, shape, p_value - - k = len(cols) - R = np.eye(k) - r = np.zeros((k, 1)) - - try: - intercept = cols.get_loc('intercept') - R = np.concatenate((R[0: intercept], R[intercept + 1:])) - r = np.concatenate((r[0: intercept], r[intercept + 1:])) - except KeyError: - # no intercept - pass - - return math.calc_F(R, r, self._beta_raw, self._var_beta_raw, - self._nobs, self.df) - - @cache_readonly - def f_stat(self): - """Returns the f-stat value.""" - return f_stat_to_dict(self._f_stat_raw) - - def f_test(self, hypothesis): - """Runs the F test, given a joint hypothesis. The hypothesis is - represented by a collection of equations, in the form - - A*x_1+B*x_2=C - - You must provide the coefficients even if they're 1. No spaces. - - The equations can be passed as either a single string or a - list of strings. - - Examples - -------- - o = ols(...) - o.f_test('1*x1+2*x2=0,1*x3=0') - o.f_test(['1*x1+2*x2=0','1*x3=0']) - """ - - x_names = self._x.columns - - R = [] - r = [] - - if isinstance(hypothesis, str): - eqs = hypothesis.split(',') - elif isinstance(hypothesis, list): - eqs = hypothesis - else: # pragma: no cover - raise Exception('hypothesis must be either string or list') - for equation in eqs: - row = np.zeros(len(x_names)) - lhs, rhs = equation.split('=') - for s in lhs.split('+'): - ss = s.split('*') - coeff = float(ss[0]) - x_name = ss[1] - - if x_name not in x_names: - raise Exception('no coefficient named %s' % x_name) - idx = x_names.get_loc(x_name) - row[idx] = coeff - rhs = float(rhs) - - R.append(row) - r.append(rhs) - - R = np.array(R) - q = len(r) - r = np.array(r).reshape(q, 1) - - result = math.calc_F(R, r, self._beta_raw, self._var_beta_raw, - self._nobs, self.df) - - return f_stat_to_dict(result) - - @cache_readonly - def _p_value_raw(self): - """Returns the raw p values.""" - from scipy.stats import t - - return 2 * t.sf(np.fabs(self._t_stat_raw), - self._df_resid_raw) - - @cache_readonly - def p_value(self): - """Returns the p values.""" - return Series(self._p_value_raw, index=self.beta.index) - - @cache_readonly - def _r2_raw(self): - """Returns the raw r-squared values.""" - if self._use_centered_tss: - return 1 - self.sm_ols.ssr / self.sm_ols.centered_tss - else: - return 1 - self.sm_ols.ssr / self.sm_ols.uncentered_tss - - @property - def _use_centered_tss(self): - # has_intercept = np.abs(self._resid_raw.sum()) < _FP_ERR - return self._intercept - - @cache_readonly - def r2(self): - """Returns the r-squared values.""" - return self._r2_raw - - @cache_readonly - def _r2_adj_raw(self): - """Returns the raw r-squared adjusted values.""" - return self.sm_ols.rsquared_adj - - @cache_readonly - def r2_adj(self): - """Returns the r-squared adjusted values.""" - return self._r2_adj_raw - - @cache_readonly - def _resid_raw(self): - """Returns the raw residuals.""" - return self.sm_ols.resid - - @cache_readonly - def resid(self): - """Returns the residuals.""" - return Series(self._resid_raw, index=self._x.index) - - @cache_readonly - def _rmse_raw(self): - """Returns the raw rmse values.""" - return np.sqrt(self.sm_ols.mse_resid) - - @cache_readonly - def rmse(self): - """Returns the rmse value.""" - return self._rmse_raw - - @cache_readonly - def _std_err_raw(self): - """Returns the raw standard err values.""" - return np.sqrt(np.diag(self._var_beta_raw)) - - @cache_readonly - def std_err(self): - """Returns the standard err values of the betas.""" - return Series(self._std_err_raw, index=self.beta.index) - - @cache_readonly - def _t_stat_raw(self): - """Returns the raw t-stat value.""" - return self._beta_raw / self._std_err_raw - - @cache_readonly - def t_stat(self): - """Returns the t-stat values of the betas.""" - return Series(self._t_stat_raw, index=self.beta.index) - - @cache_readonly - def _var_beta_raw(self): - """ - Returns the raw covariance of beta. - """ - x = self._x.values - y = self._y.values - - xx = np.dot(x.T, x) - - if self._nw_lags is None: - return math.inv(xx) * (self._rmse_raw ** 2) - else: - resid = y - np.dot(x, self._beta_raw) - m = (x.T * resid).T - - xeps = math.newey_west(m, self._nw_lags, self._nobs, self._df_raw, - self._nw_overlap) - - xx_inv = math.inv(xx) - return np.dot(xx_inv, np.dot(xeps, xx_inv)) - - @cache_readonly - def var_beta(self): - """Returns the variance-covariance matrix of beta.""" - return DataFrame(self._var_beta_raw, index=self.beta.index, - columns=self.beta.index) - - @cache_readonly - def _y_fitted_raw(self): - """Returns the raw fitted y values.""" - if self._weights is None: - X = self._x_filtered.values - else: - # XXX - return self.sm_ols.fittedvalues - - b = self._beta_raw - return np.dot(X, b) - - @cache_readonly - def y_fitted(self): - """Returns the fitted y values. This equals BX.""" - if self._weights is None: - index = self._x_filtered.index - orig_index = index - else: - index = self._y.index - orig_index = self._y_orig.index - - result = Series(self._y_fitted_raw, index=index) - return result.reindex(orig_index) - - @cache_readonly - def _y_predict_raw(self): - """Returns the raw predicted y values.""" - return self._y_fitted_raw - - @cache_readonly - def y_predict(self): - """Returns the predicted y values. - - For in-sample, this is same as y_fitted.""" - return self.y_fitted - - def predict(self, beta=None, x=None, fill_value=None, - fill_method=None, axis=0): - """ - Parameters - ---------- - beta : Series - x : Series or DataFrame - fill_value : scalar or dict, default None - fill_method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None - axis : {0, 1}, default 0 - See DataFrame.fillna for more details - - Notes - ----- - 1. If both fill_value and fill_method are None then NaNs are dropped - (this is the default behavior) - 2. An intercept will be automatically added to the new_y_values if - the model was fitted using an intercept - - Returns - ------- - Series of predicted values - """ - if beta is None and x is None: - return self.y_predict - - if beta is None: - beta = self.beta - else: - beta = beta.reindex(self.beta.index) - if isnull(beta).any(): - raise ValueError('Must supply betas for same variables') - - if x is None: - x = self._x - orig_x = x - else: - orig_x = x - if fill_value is None and fill_method is None: - x = x.dropna(how='any') - else: - x = x.fillna(value=fill_value, method=fill_method, axis=axis) - if isinstance(x, Series): - x = DataFrame({'x': x}) - if self._intercept: - x['intercept'] = 1. - - x = x.reindex(columns=self._x.columns) - - rs = np.dot(x.values, beta.values) - return Series(rs, x.index).reindex(orig_x.index) - - RESULT_FIELDS = ['r2', 'r2_adj', 'df', 'df_model', 'df_resid', 'rmse', - 'f_stat', 'beta', 'std_err', 't_stat', 'p_value', 'nobs'] - - @cache_readonly - def _results(self): - results = {} - for result in self.RESULT_FIELDS: - results[result] = getattr(self, result) - - return results - - @cache_readonly - def _coef_table(self): - buf = StringIO() - - buf.write('%14s %10s %10s %10s %10s %10s %10s\n' % - ('Variable', 'Coef', 'Std Err', 't-stat', - 'p-value', 'CI 2.5%', 'CI 97.5%')) - buf.write(scom.banner('')) - coef_template = '\n%14s %10.4f %10.4f %10.2f %10.4f %10.4f %10.4f' - - results = self._results - - beta = results['beta'] - - for i, name in enumerate(beta.index): - if i and not (i % 5): - buf.write('\n' + scom.banner('')) - - std_err = results['std_err'][name] - CI1 = beta[name] - 1.96 * std_err - CI2 = beta[name] + 1.96 * std_err - - t_stat = results['t_stat'][name] - p_value = results['p_value'][name] - - line = coef_template % (name, - beta[name], std_err, t_stat, p_value, CI1, CI2) - - buf.write(line) - - if self.nw_lags is not None: - buf.write('\n') - buf.write('*** The calculations are Newey-West ' - 'adjusted with lags %5d\n' % self.nw_lags) - - return buf.getvalue() - - @cache_readonly - def summary_as_matrix(self): - """Returns the formatted results of the OLS as a DataFrame.""" - results = self._results - beta = results['beta'] - data = {'beta': results['beta'], - 't-stat': results['t_stat'], - 'p-value': results['p_value'], - 'std err': results['std_err']} - return DataFrame(data, beta.index).T - - @cache_readonly - def summary(self): - """ - This returns the formatted result of the OLS computation - """ - template = """ -%(bannerTop)s - -Formula: Y ~ %(formula)s - -Number of Observations: %(nobs)d -Number of Degrees of Freedom: %(df)d - -R-squared: %(r2)10.4f -Adj R-squared: %(r2_adj)10.4f - -Rmse: %(rmse)10.4f - -F-stat %(f_stat_shape)s: %(f_stat)10.4f, p-value: %(f_stat_p_value)10.4f - -Degrees of Freedom: model %(df_model)d, resid %(df_resid)d - -%(bannerCoef)s -%(coef_table)s -%(bannerEnd)s -""" - coef_table = self._coef_table - - results = self._results - - f_stat = results['f_stat'] - - bracketed = ['<%s>' % str(c) for c in results['beta'].index] - - formula = StringIO() - formula.write(bracketed[0]) - tot = len(bracketed[0]) - line = 1 - for coef in bracketed[1:]: - tot = tot + len(coef) + 3 - - if tot // (68 * line): - formula.write('\n' + ' ' * 12) - line += 1 - - formula.write(' + ' + coef) - - params = { - 'bannerTop': scom.banner('Summary of Regression Analysis'), - 'bannerCoef': scom.banner('Summary of Estimated Coefficients'), - 'bannerEnd': scom.banner('End of Summary'), - 'formula': formula.getvalue(), - 'r2': results['r2'], - 'r2_adj': results['r2_adj'], - 'nobs': results['nobs'], - 'df': results['df'], - 'df_model': results['df_model'], - 'df_resid': results['df_resid'], - 'coef_table': coef_table, - 'rmse': results['rmse'], - 'f_stat': f_stat['f-stat'], - 'f_stat_shape': '(%d, %d)' % (f_stat['DF X'], f_stat['DF Resid']), - 'f_stat_p_value': f_stat['p-value'], - } - - return template % params - - def __unicode__(self): - return self.summary - - @cache_readonly - def _time_obs_count(self): - # XXX - return self._time_has_obs.astype(int) - - @property - def _total_times(self): - return self._time_has_obs.sum() - - -class MovingOLS(OLS): - """ - Runs a rolling/expanding simple OLS. - - Parameters - ---------- - y : Series - x : Series, DataFrame, or dict of Series - weights : array-like, optional - 1d array of weights. If None, equivalent to an unweighted OLS. - window_type : {'full sample', 'rolling', 'expanding'} - Default expanding - window : int - size of window (for rolling/expanding OLS) - min_periods : int - Threshold of non-null data points to require. - If None, defaults to size of window for window_type='rolling' and 1 - otherwise - intercept : bool - True if you want an intercept. - nw_lags : None or int - Number of Newey-West lags. - nw_overlap : boolean, default False - Assume data is overlapping when computing Newey-West estimator - - """ - - def __init__(self, y, x, weights=None, window_type='expanding', - window=None, min_periods=None, intercept=True, - nw_lags=None, nw_overlap=False): - - self._args = dict(intercept=intercept, nw_lags=nw_lags, - nw_overlap=nw_overlap) - - OLS.__init__(self, y=y, x=x, weights=weights, **self._args) - - self._set_window(window_type, window, min_periods) - - def _set_window(self, window_type, window, min_periods): - self._window_type = scom._get_window_type(window_type) - - if self._is_rolling: - if window is None: - raise AssertionError("Must specify window.") - if min_periods is None: - min_periods = window - else: - window = len(self._x) - if min_periods is None: - min_periods = 1 - - self._window = int(window) - self._min_periods = min_periods - -#------------------------------------------------------------------------------ -# "Public" results - - @cache_readonly - def beta(self): - """Returns the betas in Series/DataFrame form.""" - return DataFrame(self._beta_raw, - index=self._result_index, - columns=self._x.columns) - - @cache_readonly - def rank(self): - return Series(self._rank_raw, index=self._result_index) - - @cache_readonly - def df(self): - """Returns the degrees of freedom.""" - return Series(self._df_raw, index=self._result_index) - - @cache_readonly - def df_model(self): - """Returns the model degrees of freedom.""" - return Series(self._df_model_raw, index=self._result_index) - - @cache_readonly - def df_resid(self): - """Returns the residual degrees of freedom.""" - return Series(self._df_resid_raw, index=self._result_index) - - @cache_readonly - def f_stat(self): - """Returns the f-stat value.""" - f_stat_dicts = dict((date, f_stat_to_dict(f_stat)) - for date, f_stat in zip(self.beta.index, - self._f_stat_raw)) - - return DataFrame(f_stat_dicts).T - - def f_test(self, hypothesis): - raise NotImplementedError('must use full sample') - - @cache_readonly - def forecast_mean(self): - return Series(self._forecast_mean_raw, index=self._result_index) - - @cache_readonly - def forecast_vol(self): - return Series(self._forecast_vol_raw, index=self._result_index) - - @cache_readonly - def p_value(self): - """Returns the p values.""" - cols = self.beta.columns - return DataFrame(self._p_value_raw, columns=cols, - index=self._result_index) - - @cache_readonly - def r2(self): - """Returns the r-squared values.""" - return Series(self._r2_raw, index=self._result_index) - - @cache_readonly - def resid(self): - """Returns the residuals.""" - return Series(self._resid_raw[self._valid_obs_labels], - index=self._result_index) - - @cache_readonly - def r2_adj(self): - """Returns the r-squared adjusted values.""" - index = self.r2.index - - return Series(self._r2_adj_raw, index=index) - - @cache_readonly - def rmse(self): - """Returns the rmse values.""" - return Series(self._rmse_raw, index=self._result_index) - - @cache_readonly - def std_err(self): - """Returns the standard err values.""" - return DataFrame(self._std_err_raw, columns=self.beta.columns, - index=self._result_index) - - @cache_readonly - def t_stat(self): - """Returns the t-stat value.""" - return DataFrame(self._t_stat_raw, columns=self.beta.columns, - index=self._result_index) - - @cache_readonly - def var_beta(self): - """Returns the covariance of beta.""" - result = {} - result_index = self._result_index - for i in range(len(self._var_beta_raw)): - dm = DataFrame(self._var_beta_raw[i], columns=self.beta.columns, - index=self.beta.columns) - result[result_index[i]] = dm - - return Panel.from_dict(result, intersect=False) - - @cache_readonly - def y_fitted(self): - """Returns the fitted y values.""" - return Series(self._y_fitted_raw[self._valid_obs_labels], - index=self._result_index) - - @cache_readonly - def y_predict(self): - """Returns the predicted y values.""" - return Series(self._y_predict_raw[self._valid_obs_labels], - index=self._result_index) - -#------------------------------------------------------------------------------ -# "raw" attributes, calculations - - @property - def _is_rolling(self): - return self._window_type == 'rolling' - - @cache_readonly - def _beta_raw(self): - """Runs the regression and returns the beta.""" - beta, indices, mask = self._rolling_ols_call - - return beta[indices] - - @cache_readonly - def _result_index(self): - return self._index[self._valid_indices] - - @property - def _valid_indices(self): - return self._rolling_ols_call[1] - - @cache_readonly - def _rolling_ols_call(self): - return self._calc_betas(self._x_trans, self._y_trans) - - def _calc_betas(self, x, y): - N = len(self._index) - K = len(self._x.columns) - - betas = np.empty((N, K), dtype=float) - betas[:] = np.NaN - - valid = self._time_has_obs - enough = self._enough_obs - window = self._window - - # Use transformed (demeaned) Y, X variables - cum_xx = self._cum_xx(x) - cum_xy = self._cum_xy(x, y) - - for i in range(N): - if not valid[i] or not enough[i]: - continue - - xx = cum_xx[i] - xy = cum_xy[i] - if self._is_rolling and i >= window: - xx = xx - cum_xx[i - window] - xy = xy - cum_xy[i - window] - - betas[i] = math.solve(xx, xy) - - mask = ~np.isnan(betas).any(axis=1) - have_betas = np.arange(N)[mask] - - return betas, have_betas, mask - - def _rolling_rank(self): - dates = self._index - window = self._window - - ranks = np.empty(len(dates), dtype=float) - ranks[:] = np.NaN - for i, date in enumerate(dates): - if self._is_rolling and i >= window: - prior_date = dates[i - window + 1] - else: - prior_date = dates[0] - - x_slice = self._x.truncate(before=prior_date, after=date).values - - if len(x_slice) == 0: - continue - - ranks[i] = math.rank(x_slice) - - return ranks - - def _cum_xx(self, x): - dates = self._index - K = len(x.columns) - valid = self._time_has_obs - cum_xx = [] - - slicer = lambda df, dt: df.truncate(dt, dt).values - if not self._panel_model: - _get_index = x.index.get_loc - - def slicer(df, dt): - i = _get_index(dt) - return df.values[i:i + 1, :] - - last = np.zeros((K, K)) - - for i, date in enumerate(dates): - if not valid[i]: - cum_xx.append(last) - continue - - x_slice = slicer(x, date) - xx = last = last + np.dot(x_slice.T, x_slice) - cum_xx.append(xx) - - return cum_xx - - def _cum_xy(self, x, y): - dates = self._index - valid = self._time_has_obs - cum_xy = [] - - x_slicer = lambda df, dt: df.truncate(dt, dt).values - if not self._panel_model: - _get_index = x.index.get_loc - - def x_slicer(df, dt): - i = _get_index(dt) - return df.values[i:i + 1] - - _y_get_index = y.index.get_loc - _values = y.values - if isinstance(y.index, MultiIndex): - def y_slicer(df, dt): - loc = _y_get_index(dt) - return _values[loc] - else: - def y_slicer(df, dt): - i = _y_get_index(dt) - return _values[i:i + 1] - - last = np.zeros(len(x.columns)) - for i, date in enumerate(dates): - if not valid[i]: - cum_xy.append(last) - continue - - x_slice = x_slicer(x, date) - y_slice = y_slicer(y, date) - - xy = last = last + np.dot(x_slice.T, y_slice) - cum_xy.append(xy) - - return cum_xy - - @cache_readonly - def _rank_raw(self): - rank = self._rolling_rank() - return rank[self._valid_indices] - - @cache_readonly - def _df_raw(self): - """Returns the degrees of freedom.""" - return self._rank_raw - - @cache_readonly - def _df_model_raw(self): - """Returns the raw model degrees of freedom.""" - return self._df_raw - 1 - - @cache_readonly - def _df_resid_raw(self): - """Returns the raw residual degrees of freedom.""" - return self._nobs - self._df_raw - - @cache_readonly - def _f_stat_raw(self): - """Returns the raw f-stat value.""" - from scipy.stats import f - - items = self.beta.columns - nobs = self._nobs - df = self._df_raw - df_resid = nobs - df - - # var_beta has not been newey-west adjusted - if self._nw_lags is None: - F = self._r2_raw / (self._r2_raw - self._r2_adj_raw) - - q = len(items) - if 'intercept' in items: - q -= 1 - - def get_result_simple(Fst, d): - return Fst, (q, d), 1 - f.cdf(Fst, q, d) - - # Compute the P-value for each pair - result = starmap(get_result_simple, zip(F, df_resid)) - - return list(result) - - K = len(items) - R = np.eye(K) - r = np.zeros((K, 1)) - - try: - intercept = items.get_loc('intercept') - R = np.concatenate((R[0: intercept], R[intercept + 1:])) - r = np.concatenate((r[0: intercept], r[intercept + 1:])) - except KeyError: - # no intercept - pass - - def get_result(beta, vcov, n, d): - return math.calc_F(R, r, beta, vcov, n, d) - - results = starmap(get_result, - zip(self._beta_raw, self._var_beta_raw, nobs, df)) - - return list(results) - - @cache_readonly - def _p_value_raw(self): - """Returns the raw p values.""" - from scipy.stats import t - - result = [2 * t.sf(a, b) - for a, b in zip(np.fabs(self._t_stat_raw), - self._df_resid_raw)] - - return np.array(result) - - @cache_readonly - def _resid_stats(self): - uncentered_sst = [] - sst = [] - sse = [] - - Yreg = self._y - Y = self._y_trans - X = self._x_trans - weights = self._weights - - dates = self._index - window = self._window - for n, index in enumerate(self._valid_indices): - if self._is_rolling and index >= window: - prior_date = dates[index - window + 1] - else: - prior_date = dates[0] - - date = dates[index] - beta = self._beta_raw[n] - - X_slice = X.truncate(before=prior_date, after=date).values - Y_slice = _y_converter(Y.truncate(before=prior_date, after=date)) - - resid = Y_slice - np.dot(X_slice, beta) - - if weights is not None: - Y_slice = _y_converter(Yreg.truncate(before=prior_date, - after=date)) - weights_slice = weights.truncate(prior_date, date) - demeaned = Y_slice - np.average(Y_slice, weights=weights_slice) - SS_total = (weights_slice * demeaned ** 2).sum() - else: - SS_total = ((Y_slice - Y_slice.mean()) ** 2).sum() - - SS_err = (resid ** 2).sum() - SST_uncentered = (Y_slice ** 2).sum() - - sse.append(SS_err) - sst.append(SS_total) - uncentered_sst.append(SST_uncentered) - - return { - 'sse': np.array(sse), - 'centered_tss': np.array(sst), - 'uncentered_tss': np.array(uncentered_sst), - } - - @cache_readonly - def _rmse_raw(self): - """Returns the raw rmse values.""" - return np.sqrt(self._resid_stats['sse'] / self._df_resid_raw) - - @cache_readonly - def _r2_raw(self): - rs = self._resid_stats - - if self._use_centered_tss: - return 1 - rs['sse'] / rs['centered_tss'] - else: - return 1 - rs['sse'] / rs['uncentered_tss'] - - @cache_readonly - def _r2_adj_raw(self): - """Returns the raw r-squared adjusted values.""" - nobs = self._nobs - factors = (nobs - 1) / (nobs - self._df_raw) - return 1 - (1 - self._r2_raw) * factors - - @cache_readonly - def _resid_raw(self): - """Returns the raw residuals.""" - return (self._y.values - self._y_fitted_raw) - - @cache_readonly - def _std_err_raw(self): - """Returns the raw standard err values.""" - results = [] - for i in range(len(self._var_beta_raw)): - results.append(np.sqrt(np.diag(self._var_beta_raw[i]))) - - return np.array(results) - - @cache_readonly - def _t_stat_raw(self): - """Returns the raw t-stat value.""" - return self._beta_raw / self._std_err_raw - - @cache_readonly - def _var_beta_raw(self): - """Returns the raw covariance of beta.""" - x = self._x_trans - y = self._y_trans - dates = self._index - nobs = self._nobs - rmse = self._rmse_raw - beta = self._beta_raw - df = self._df_raw - window = self._window - cum_xx = self._cum_xx(self._x) - - results = [] - for n, i in enumerate(self._valid_indices): - xx = cum_xx[i] - date = dates[i] - - if self._is_rolling and i >= window: - xx = xx - cum_xx[i - window] - prior_date = dates[i - window + 1] - else: - prior_date = dates[0] - - x_slice = x.truncate(before=prior_date, after=date) - y_slice = y.truncate(before=prior_date, after=date) - xv = x_slice.values - yv = np.asarray(y_slice) - - if self._nw_lags is None: - result = math.inv(xx) * (rmse[n] ** 2) - else: - resid = yv - np.dot(xv, beta[n]) - m = (xv.T * resid).T - - xeps = math.newey_west(m, self._nw_lags, nobs[n], df[n], - self._nw_overlap) - - xx_inv = math.inv(xx) - result = np.dot(xx_inv, np.dot(xeps, xx_inv)) - - results.append(result) - - return np.array(results) - - @cache_readonly - def _forecast_mean_raw(self): - """Returns the raw covariance of beta.""" - nobs = self._nobs - window = self._window - - # x should be ones - dummy = DataFrame(index=self._y.index) - dummy['y'] = 1 - - cum_xy = self._cum_xy(dummy, self._y) - - results = [] - for n, i in enumerate(self._valid_indices): - sumy = cum_xy[i] - - if self._is_rolling and i >= window: - sumy = sumy - cum_xy[i - window] - - results.append(sumy[0] / nobs[n]) - - return np.array(results) - - @cache_readonly - def _forecast_vol_raw(self): - """Returns the raw covariance of beta.""" - beta = self._beta_raw - window = self._window - dates = self._index - x = self._x - - results = [] - for n, i in enumerate(self._valid_indices): - date = dates[i] - if self._is_rolling and i >= window: - prior_date = dates[i - window + 1] - else: - prior_date = dates[0] - - x_slice = x.truncate(prior_date, date).values - x_demeaned = x_slice - x_slice.mean(0) - x_cov = np.dot(x_demeaned.T, x_demeaned) / (len(x_slice) - 1) - - B = beta[n] - result = np.dot(B, np.dot(x_cov, B)) - results.append(np.sqrt(result)) - - return np.array(results) - - @cache_readonly - def _y_fitted_raw(self): - """Returns the raw fitted y values.""" - return (self._x.values * self._beta_matrix(lag=0)).sum(1) - - @cache_readonly - def _y_predict_raw(self): - """Returns the raw predicted y values.""" - return (self._x.values * self._beta_matrix(lag=1)).sum(1) - - @cache_readonly - def _results(self): - results = {} - for result in self.RESULT_FIELDS: - value = getattr(self, result) - if isinstance(value, Series): - value = value[self.beta.index[-1]] - elif isinstance(value, DataFrame): - value = value.xs(self.beta.index[-1]) - else: # pragma: no cover - raise Exception('Problem retrieving %s' % result) - results[result] = value - - return results - - @cache_readonly - def _window_time_obs(self): - window_obs = (Series(self._time_obs_count > 0) - .rolling(self._window, min_periods=1) - .sum() - .values - ) - - window_obs[np.isnan(window_obs)] = 0 - return window_obs.astype(int) - - @cache_readonly - def _nobs_raw(self): - if self._is_rolling: - window = self._window - else: - # expanding case - window = len(self._index) - - result = Series(self._time_obs_count).rolling( - window, min_periods=1).sum().values - - return result.astype(int) - - def _beta_matrix(self, lag=0): - if lag < 0: - raise AssertionError("'lag' must be greater than or equal to 0, " - "input was {0}".format(lag)) - - betas = self._beta_raw - - labels = np.arange(len(self._y)) - lag - indexer = self._valid_obs_labels.searchsorted(labels, side='left') - indexer[indexer == len(betas)] = len(betas) - 1 - - beta_matrix = betas[indexer] - beta_matrix[labels < self._valid_obs_labels[0]] = np.NaN - - return beta_matrix - - @cache_readonly - def _valid_obs_labels(self): - dates = self._index[self._valid_indices] - return self._y.index.searchsorted(dates) - - @cache_readonly - def _nobs(self): - return self._nobs_raw[self._valid_indices] - - @property - def nobs(self): - return Series(self._nobs, index=self._result_index) - - @cache_readonly - def _enough_obs(self): - # XXX: what's the best way to determine where to start? - return self._nobs_raw >= max(self._min_periods, - len(self._x.columns) + 1) - - -def _safe_update(d, other): - """ - Combine dictionaries with non-overlapping keys - """ - for k, v in compat.iteritems(other): - if k in d: - raise Exception('Duplicate regressor: %s' % k) - - d[k] = v - - -def _filter_data(lhs, rhs, weights=None): - """ - Cleans the input for single OLS. - - Parameters - ---------- - lhs : Series - Dependent variable in the regression. - rhs : dict, whose values are Series, DataFrame, or dict - Explanatory variables of the regression. - weights : array-like, optional - 1d array of weights. If None, equivalent to an unweighted OLS. - - Returns - ------- - Series, DataFrame - Cleaned lhs and rhs - """ - if not isinstance(lhs, Series): - if len(lhs) != len(rhs): - raise AssertionError("length of lhs must equal length of rhs") - lhs = Series(lhs, index=rhs.index) - - rhs = _combine_rhs(rhs) - lhs = DataFrame({'__y__': lhs}, dtype=float) - pre_filt_rhs = rhs.dropna(how='any') - - combined = rhs.join(lhs, how='outer') - if weights is not None: - combined['__weights__'] = weights - - valid = (combined.count(1) == len(combined.columns)).values - index = combined.index - combined = combined[valid] - - if weights is not None: - filt_weights = combined.pop('__weights__') - else: - filt_weights = None - - filt_lhs = combined.pop('__y__') - filt_rhs = combined - - if hasattr(filt_weights, 'to_dense'): - filt_weights = filt_weights.to_dense() - - return (filt_lhs.to_dense(), filt_rhs.to_dense(), filt_weights, - pre_filt_rhs.to_dense(), index, valid) - - -def _combine_rhs(rhs): - """ - Glue input X variables together while checking for potential - duplicates - """ - series = {} - - if isinstance(rhs, Series): - series['x'] = rhs - elif isinstance(rhs, DataFrame): - series = rhs.copy() - elif isinstance(rhs, dict): - for name, value in compat.iteritems(rhs): - if isinstance(value, Series): - _safe_update(series, {name: value}) - elif isinstance(value, (dict, DataFrame)): - _safe_update(series, value) - else: # pragma: no cover - raise Exception('Invalid RHS data type: %s' % type(value)) - else: # pragma: no cover - raise Exception('Invalid RHS type: %s' % type(rhs)) - - if not isinstance(series, DataFrame): - series = DataFrame(series, dtype=float) - - return series - -# A little kludge so we can use this method for both -# MovingOLS and MovingPanelOLS - - -def _y_converter(y): - y = y.values.squeeze() - if y.ndim == 0: # pragma: no cover - return np.array([y]) - else: - return y - - -def f_stat_to_dict(result): - f_stat, shape, p_value = result - - result = {} - result['f-stat'] = f_stat - result['DF X'] = shape[0] - result['DF Resid'] = shape[1] - result['p-value'] = p_value - - return result diff --git a/pandas/stats/plm.py b/pandas/stats/plm.py deleted file mode 100644 index 806dc289f843a..0000000000000 --- a/pandas/stats/plm.py +++ /dev/null @@ -1,863 +0,0 @@ -""" -Linear regression objects for panel data -""" - -# pylint: disable-msg=W0231 -# pylint: disable-msg=E1101,E1103 - -# flake8: noqa - -from __future__ import division -from pandas.compat import range -from pandas import compat -import warnings - -import numpy as np - -from pandas.core.panel import Panel -from pandas.core.frame import DataFrame -from pandas.core.reshape import get_dummies -from pandas.core.series import Series -from pandas.stats.ols import OLS, MovingOLS -import pandas.stats.common as com -import pandas.stats.math as math -from pandas.util.decorators import cache_readonly - - -class PanelOLS(OLS): - """Implements panel OLS. - - See ols function docs - """ - _panel_model = True - - def __init__(self, y, x, weights=None, intercept=True, nw_lags=None, - entity_effects=False, time_effects=False, x_effects=None, - cluster=None, dropped_dummies=None, verbose=False, - nw_overlap=False): - import warnings - warnings.warn("The pandas.stats.plm module is deprecated and will be " - "removed in a future version. We refer to external packages " - "like statsmodels, see some examples here: " - "http://www.statsmodels.org/stable/mixed_linear.html", - FutureWarning, stacklevel=4) - self._x_orig = x - self._y_orig = y - self._weights = weights - - self._intercept = intercept - self._nw_lags = nw_lags - self._nw_overlap = nw_overlap - self._entity_effects = entity_effects - self._time_effects = time_effects - self._x_effects = x_effects - self._dropped_dummies = dropped_dummies or {} - self._cluster = com._get_cluster_type(cluster) - self._verbose = verbose - - (self._x, self._x_trans, - self._x_filtered, self._y, - self._y_trans) = self._prepare_data() - - self._index = self._x.index.levels[0] - - self._T = len(self._index) - - def log(self, msg): - if self._verbose: # pragma: no cover - print(msg) - - def _prepare_data(self): - """Cleans and stacks input data into DataFrame objects - - If time effects is True, then we turn off intercepts and omit an item - from every (entity and x) fixed effect. - - Otherwise: - - If we have an intercept, we omit an item from every fixed effect. - - Else, we omit an item from every fixed effect except one of them. - - The categorical variables will get dropped from x. - """ - (x, x_filtered, y, weights, cat_mapping) = self._filter_data() - - self.log('Adding dummies to X variables') - x = self._add_dummies(x, cat_mapping) - - self.log('Adding dummies to filtered X variables') - x_filtered = self._add_dummies(x_filtered, cat_mapping) - - if self._x_effects: - x = x.drop(self._x_effects, axis=1) - x_filtered = x_filtered.drop(self._x_effects, axis=1) - - if self._time_effects: - x_regressor = x.sub(x.mean(level=0), level=0) - - unstacked_y = y.unstack() - y_regressor = unstacked_y.sub(unstacked_y.mean(1), axis=0).stack() - y_regressor.index = y.index - - elif self._intercept: - # only add intercept when no time effects - self.log('Adding intercept') - x = x_regressor = add_intercept(x) - x_filtered = add_intercept(x_filtered) - y_regressor = y - else: - self.log('No intercept added') - x_regressor = x - y_regressor = y - - if weights is not None: - if not y_regressor.index.equals(weights.index): - raise AssertionError("y_regressor and weights must have the " - "same index") - if not x_regressor.index.equals(weights.index): - raise AssertionError("x_regressor and weights must have the " - "same index") - - rt_weights = np.sqrt(weights) - y_regressor = y_regressor * rt_weights - x_regressor = x_regressor.mul(rt_weights, axis=0) - - return x, x_regressor, x_filtered, y, y_regressor - - def _filter_data(self): - """ - - """ - data = self._x_orig - cat_mapping = {} - - if isinstance(data, DataFrame): - data = data.to_panel() - else: - if isinstance(data, Panel): - data = data.copy() - - data, cat_mapping = self._convert_x(data) - - if not isinstance(data, Panel): - data = Panel.from_dict(data, intersect=True) - - x_names = data.items - - if self._weights is not None: - data['__weights__'] = self._weights - - # Filter x's without y (so we can make a prediction) - filtered = data.to_frame() - - # Filter all data together using to_frame - - # convert to DataFrame - y = self._y_orig - if isinstance(y, Series): - y = y.unstack() - - data['__y__'] = y - data_long = data.to_frame() - - x_filt = filtered.filter(x_names) - x = data_long.filter(x_names) - y = data_long['__y__'] - - if self._weights is not None and not self._weights.empty: - weights = data_long['__weights__'] - else: - weights = None - - return x, x_filt, y, weights, cat_mapping - - def _convert_x(self, x): - # Converts non-numeric data in x to floats. x_converted is the - # DataFrame with converted values, and x_conversion is a dict that - # provides the reverse mapping. For example, if 'A' was converted to 0 - # for x named 'variety', then x_conversion['variety'][0] is 'A'. - x_converted = {} - cat_mapping = {} - # x can be either a dict or a Panel, but in Python 3, dicts don't have - # .iteritems - iteritems = getattr(x, 'iteritems', x.items) - for key, df in iteritems(): - if not isinstance(df, DataFrame): - raise AssertionError("all input items must be DataFrames, " - "at least one is of " - "type {0}".format(type(df))) - - if _is_numeric(df): - x_converted[key] = df - else: - try: - df = df.astype(float) - except (TypeError, ValueError): - values = df.values - distinct_values = sorted(set(values.flat)) - cat_mapping[key] = dict(enumerate(distinct_values)) - new_values = np.searchsorted(distinct_values, values) - x_converted[key] = DataFrame(new_values, index=df.index, - columns=df.columns) - - if len(cat_mapping) == 0: - x_converted = x - - return x_converted, cat_mapping - - def _add_dummies(self, panel, mapping): - """ - Add entity and / or categorical dummies to input X DataFrame - - Returns - ------- - DataFrame - """ - panel = self._add_entity_effects(panel) - panel = self._add_categorical_dummies(panel, mapping) - - return panel - - def _add_entity_effects(self, panel): - """ - Add entity dummies to panel - - Returns - ------- - DataFrame - """ - from pandas.core.reshape import make_axis_dummies - - if not self._entity_effects: - return panel - - self.log('-- Adding entity fixed effect dummies') - - dummies = make_axis_dummies(panel, 'minor') - - if not self._use_all_dummies: - if 'entity' in self._dropped_dummies: - to_exclude = str(self._dropped_dummies.get('entity')) - else: - to_exclude = dummies.columns[0] - - if to_exclude not in dummies.columns: - raise Exception('%s not in %s' % (to_exclude, - dummies.columns)) - - self.log('-- Excluding dummy for entity: %s' % to_exclude) - - dummies = dummies.filter(dummies.columns.difference([to_exclude])) - - dummies = dummies.add_prefix('FE_') - panel = panel.join(dummies) - - return panel - - def _add_categorical_dummies(self, panel, cat_mappings): - """ - Add categorical dummies to panel - - Returns - ------- - DataFrame - """ - if not self._x_effects: - return panel - - dropped_dummy = (self._entity_effects and not self._use_all_dummies) - - for effect in self._x_effects: - self.log('-- Adding fixed effect dummies for %s' % effect) - - dummies = get_dummies(panel[effect]) - - val_map = cat_mappings.get(effect) - if val_map: - val_map = dict((v, k) for k, v in compat.iteritems(val_map)) - - if dropped_dummy or not self._use_all_dummies: - if effect in self._dropped_dummies: - to_exclude = mapped_name = self._dropped_dummies.get( - effect) - - if val_map: - mapped_name = val_map[to_exclude] - else: - to_exclude = mapped_name = dummies.columns[0] - - if mapped_name not in dummies.columns: # pragma: no cover - raise Exception('%s not in %s' % (to_exclude, - dummies.columns)) - - self.log( - '-- Excluding dummy for %s: %s' % (effect, to_exclude)) - - dummies = dummies.filter( - dummies.columns.difference([mapped_name])) - dropped_dummy = True - - dummies = _convertDummies(dummies, cat_mappings.get(effect)) - dummies = dummies.add_prefix('%s_' % effect) - panel = panel.join(dummies) - - return panel - - @property - def _use_all_dummies(self): - """ - In the case of using an intercept or including time fixed - effects, completely partitioning the sample would make the X - not full rank. - """ - return (not self._intercept and not self._time_effects) - - @cache_readonly - def _beta_raw(self): - """Runs the regression and returns the beta.""" - X = self._x_trans.values - Y = self._y_trans.values.squeeze() - - beta, _, _, _ = np.linalg.lstsq(X, Y) - - return beta - - @cache_readonly - def beta(self): - return Series(self._beta_raw, index=self._x.columns) - - @cache_readonly - def _df_model_raw(self): - """Returns the raw model degrees of freedom.""" - return self._df_raw - 1 - - @cache_readonly - def _df_resid_raw(self): - """Returns the raw residual degrees of freedom.""" - return self._nobs - self._df_raw - - @cache_readonly - def _df_raw(self): - """Returns the degrees of freedom.""" - df = math.rank(self._x_trans.values) - if self._time_effects: - df += self._total_times - - return df - - @cache_readonly - def _r2_raw(self): - Y = self._y_trans.values.squeeze() - X = self._x_trans.values - - resid = Y - np.dot(X, self._beta_raw) - - SSE = (resid ** 2).sum() - - if self._use_centered_tss: - SST = ((Y - np.mean(Y)) ** 2).sum() - else: - SST = (Y ** 2).sum() - - return 1 - SSE / SST - - @property - def _use_centered_tss(self): - # has_intercept = np.abs(self._resid_raw.sum()) < _FP_ERR - return self._intercept or self._entity_effects or self._time_effects - - @cache_readonly - def _r2_adj_raw(self): - """Returns the raw r-squared adjusted values.""" - nobs = self._nobs - factors = (nobs - 1) / (nobs - self._df_raw) - return 1 - (1 - self._r2_raw) * factors - - @cache_readonly - def _resid_raw(self): - Y = self._y.values.squeeze() - X = self._x.values - return Y - np.dot(X, self._beta_raw) - - @cache_readonly - def resid(self): - return self._unstack_vector(self._resid_raw) - - @cache_readonly - def _rmse_raw(self): - """Returns the raw rmse values.""" - # X = self._x.values - # Y = self._y.values.squeeze() - - X = self._x_trans.values - Y = self._y_trans.values.squeeze() - - resid = Y - np.dot(X, self._beta_raw) - ss = (resid ** 2).sum() - return np.sqrt(ss / (self._nobs - self._df_raw)) - - @cache_readonly - def _var_beta_raw(self): - cluster_axis = None - if self._cluster == 'time': - cluster_axis = 0 - elif self._cluster == 'entity': - cluster_axis = 1 - - x = self._x - y = self._y - - if self._time_effects: - xx = _xx_time_effects(x, y) - else: - xx = np.dot(x.values.T, x.values) - - return _var_beta_panel(y, x, self._beta_raw, xx, - self._rmse_raw, cluster_axis, self._nw_lags, - self._nobs, self._df_raw, self._nw_overlap) - - @cache_readonly - def _y_fitted_raw(self): - """Returns the raw fitted y values.""" - return np.dot(self._x.values, self._beta_raw) - - @cache_readonly - def y_fitted(self): - return self._unstack_vector(self._y_fitted_raw, index=self._x.index) - - def _unstack_vector(self, vec, index=None): - if index is None: - index = self._y_trans.index - panel = DataFrame(vec, index=index, columns=['dummy']) - return panel.to_panel()['dummy'] - - def _unstack_y(self, vec): - unstacked = self._unstack_vector(vec) - return unstacked.reindex(self.beta.index) - - @cache_readonly - def _time_obs_count(self): - return self._y_trans.count(level=0).values - - @cache_readonly - def _time_has_obs(self): - return self._time_obs_count > 0 - - @property - def _nobs(self): - return len(self._y) - - -def _convertDummies(dummies, mapping): - # cleans up the names of the generated dummies - new_items = [] - for item in dummies.columns: - if not mapping: - var = str(item) - if isinstance(item, float): - var = '%g' % item - - new_items.append(var) - else: - # renames the dummies if a conversion dict is provided - new_items.append(mapping[int(item)]) - - dummies = DataFrame(dummies.values, index=dummies.index, - columns=new_items) - - return dummies - - -def _is_numeric(df): - for col in df: - if df[col].dtype.name == 'object': - return False - - return True - - -def add_intercept(panel, name='intercept'): - """ - Add column of ones to input panel - - Parameters - ---------- - panel: Panel / DataFrame - name: string, default 'intercept'] - - Returns - ------- - New object (same type as input) - """ - panel = panel.copy() - panel[name] = 1. - - return panel.consolidate() - - -class MovingPanelOLS(MovingOLS, PanelOLS): - """Implements rolling/expanding panel OLS. - - See ols function docs - """ - _panel_model = True - - def __init__(self, y, x, weights=None, - window_type='expanding', window=None, - min_periods=None, - min_obs=None, - intercept=True, - nw_lags=None, nw_overlap=False, - entity_effects=False, - time_effects=False, - x_effects=None, - cluster=None, - dropped_dummies=None, - verbose=False): - - self._args = dict(intercept=intercept, - nw_lags=nw_lags, - nw_overlap=nw_overlap, - entity_effects=entity_effects, - time_effects=time_effects, - x_effects=x_effects, - cluster=cluster, - dropped_dummies=dropped_dummies, - verbose=verbose) - - PanelOLS.__init__(self, y=y, x=x, weights=weights, - **self._args) - - self._set_window(window_type, window, min_periods) - - if min_obs is None: - min_obs = len(self._x.columns) + 1 - - self._min_obs = min_obs - - @cache_readonly - def resid(self): - return self._unstack_y(self._resid_raw) - - @cache_readonly - def y_fitted(self): - return self._unstack_y(self._y_fitted_raw) - - @cache_readonly - def y_predict(self): - """Returns the predicted y values.""" - return self._unstack_y(self._y_predict_raw) - - def lagged_y_predict(self, lag=1): - """ - Compute forecast Y value lagging coefficient by input number - of time periods - - Parameters - ---------- - lag : int - - Returns - ------- - DataFrame - """ - x = self._x.values - betas = self._beta_matrix(lag=lag) - return self._unstack_y((betas * x).sum(1)) - - @cache_readonly - def _rolling_ols_call(self): - return self._calc_betas(self._x_trans, self._y_trans) - - @cache_readonly - def _df_raw(self): - """Returns the degrees of freedom.""" - df = self._rolling_rank() - - if self._time_effects: - df += self._window_time_obs - - return df[self._valid_indices] - - @cache_readonly - def _var_beta_raw(self): - """Returns the raw covariance of beta.""" - x = self._x - y = self._y - - dates = x.index.levels[0] - - cluster_axis = None - if self._cluster == 'time': - cluster_axis = 0 - elif self._cluster == 'entity': - cluster_axis = 1 - - nobs = self._nobs - rmse = self._rmse_raw - beta = self._beta_raw - df = self._df_raw - window = self._window - - if not self._time_effects: - # Non-transformed X - cum_xx = self._cum_xx(x) - - results = [] - for n, i in enumerate(self._valid_indices): - if self._is_rolling and i >= window: - prior_date = dates[i - window + 1] - else: - prior_date = dates[0] - - date = dates[i] - - x_slice = x.truncate(prior_date, date) - y_slice = y.truncate(prior_date, date) - - if self._time_effects: - xx = _xx_time_effects(x_slice, y_slice) - else: - xx = cum_xx[i] - if self._is_rolling and i >= window: - xx = xx - cum_xx[i - window] - - result = _var_beta_panel(y_slice, x_slice, beta[n], xx, rmse[n], - cluster_axis, self._nw_lags, - nobs[n], df[n], self._nw_overlap) - - results.append(result) - - return np.array(results) - - @cache_readonly - def _resid_raw(self): - beta_matrix = self._beta_matrix(lag=0) - - Y = self._y.values.squeeze() - X = self._x.values - resid = Y - (X * beta_matrix).sum(1) - - return resid - - @cache_readonly - def _y_fitted_raw(self): - x = self._x.values - betas = self._beta_matrix(lag=0) - return (betas * x).sum(1) - - @cache_readonly - def _y_predict_raw(self): - """Returns the raw predicted y values.""" - x = self._x.values - betas = self._beta_matrix(lag=1) - return (betas * x).sum(1) - - def _beta_matrix(self, lag=0): - if lag < 0: - raise AssertionError("'lag' must be greater than or equal to 0, " - "input was {0}".format(lag)) - - index = self._y_trans.index - major_labels = index.labels[0] - labels = major_labels - lag - indexer = self._valid_indices.searchsorted(labels, side='left') - - beta_matrix = self._beta_raw[indexer] - beta_matrix[labels < self._valid_indices[0]] = np.NaN - - return beta_matrix - - @cache_readonly - def _enough_obs(self): - # XXX: what's the best way to determine where to start? - # TODO: write unit tests for this - - rank_threshold = len(self._x.columns) + 1 - if self._min_obs < rank_threshold: # pragma: no cover - warnings.warn('min_obs is smaller than rank of X matrix') - - enough_observations = self._nobs_raw >= self._min_obs - enough_time_periods = self._window_time_obs >= self._min_periods - return enough_time_periods & enough_observations - - -def create_ols_dict(attr): - def attr_getter(self): - d = {} - for k, v in compat.iteritems(self.results): - result = getattr(v, attr) - d[k] = result - - return d - - return attr_getter - - -def create_ols_attr(attr): - return property(create_ols_dict(attr)) - - -class NonPooledPanelOLS(object): - """Implements non-pooled panel OLS. - - Parameters - ---------- - y : DataFrame - x : Series, DataFrame, or dict of Series - intercept : bool - True if you want an intercept. - nw_lags : None or int - Number of Newey-West lags. - window_type : {'full_sample', 'rolling', 'expanding'} - 'full_sample' by default - window : int - size of window (for rolling/expanding OLS) - """ - - ATTRIBUTES = [ - 'beta', - 'df', - 'df_model', - 'df_resid', - 'f_stat', - 'p_value', - 'r2', - 'r2_adj', - 'resid', - 'rmse', - 'std_err', - 'summary_as_matrix', - 't_stat', - 'var_beta', - 'x', - 'y', - 'y_fitted', - 'y_predict' - ] - - def __init__(self, y, x, window_type='full_sample', window=None, - min_periods=None, intercept=True, nw_lags=None, - nw_overlap=False): - - import warnings - warnings.warn("The pandas.stats.plm module is deprecated and will be " - "removed in a future version. We refer to external packages " - "like statsmodels, see some examples here: " - "http://www.statsmodels.org/stable/mixed_linear.html", - FutureWarning, stacklevel=4) - - for attr in self.ATTRIBUTES: - setattr(self.__class__, attr, create_ols_attr(attr)) - - results = {} - - for entity in y: - entity_y = y[entity] - - entity_x = {} - for x_var in x: - entity_x[x_var] = x[x_var][entity] - - from pandas.stats.interface import ols - results[entity] = ols(y=entity_y, - x=entity_x, - window_type=window_type, - window=window, - min_periods=min_periods, - intercept=intercept, - nw_lags=nw_lags, - nw_overlap=nw_overlap) - - self.results = results - - -def _var_beta_panel(y, x, beta, xx, rmse, cluster_axis, - nw_lags, nobs, df, nw_overlap): - xx_inv = math.inv(xx) - - yv = y.values - - if cluster_axis is None: - if nw_lags is None: - return xx_inv * (rmse ** 2) - else: - resid = yv - np.dot(x.values, beta) - m = (x.values.T * resid).T - - xeps = math.newey_west(m, nw_lags, nobs, df, nw_overlap) - - return np.dot(xx_inv, np.dot(xeps, xx_inv)) - else: - Xb = np.dot(x.values, beta).reshape((len(x.values), 1)) - resid = DataFrame(yv[:, None] - Xb, index=y.index, columns=['resid']) - - if cluster_axis == 1: - x = x.swaplevel(0, 1).sort_index(level=0) - resid = resid.swaplevel(0, 1).sort_index(level=0) - - m = _group_agg(x.values * resid.values, x.index._bounds, - lambda x: np.sum(x, axis=0)) - - if nw_lags is None: - nw_lags = 0 - - xox = 0 - for i in range(len(x.index.levels[0])): - xox += math.newey_west(m[i: i + 1], nw_lags, - nobs, df, nw_overlap) - - return np.dot(xx_inv, np.dot(xox, xx_inv)) - - -def _group_agg(values, bounds, f): - """ - R-style aggregator - - Parameters - ---------- - values : N-length or N x K ndarray - bounds : B-length ndarray - f : ndarray aggregation function - - Returns - ------- - ndarray with same length as bounds array - """ - if values.ndim == 1: - N = len(values) - result = np.empty(len(bounds), dtype=float) - elif values.ndim == 2: - N, K = values.shape - result = np.empty((len(bounds), K), dtype=float) - - testagg = f(values[:min(1, len(values))]) - if isinstance(testagg, np.ndarray) and testagg.ndim == 2: - raise AssertionError('Function must reduce') - - for i, left_bound in enumerate(bounds): - if i == len(bounds) - 1: - right_bound = N - else: - right_bound = bounds[i + 1] - - result[i] = f(values[left_bound:right_bound]) - - return result - - -def _xx_time_effects(x, y): - """ - Returns X'X - (X'T) (T'T)^-1 (T'X) - """ - # X'X - xx = np.dot(x.values.T, x.values) - xt = x.sum(level=0).values - - count = y.unstack().count(1).values - selector = count > 0 - - # X'X - (T'T)^-1 (T'X) - xt = xt[selector] - count = count[selector] - - return xx - np.dot(xt.T / count, xt) diff --git a/pandas/stats/tests/__init__.py b/pandas/stats/tests/__init__.py deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/pandas/stats/tests/common.py b/pandas/stats/tests/common.py deleted file mode 100644 index 0ce4b20a4b719..0000000000000 --- a/pandas/stats/tests/common.py +++ /dev/null @@ -1,162 +0,0 @@ -# pylint: disable-msg=W0611,W0402 -# flake8: noqa - -from datetime import datetime -import string -import nose - -import numpy as np - -from pandas import DataFrame, bdate_range -from pandas.util.testing import assert_almost_equal # imported in other tests -import pandas.util.testing as tm - -N = 100 -K = 4 - -start = datetime(2007, 1, 1) -DATE_RANGE = bdate_range(start, periods=N) - -COLS = ['Col' + c for c in string.ascii_uppercase[:K]] - - -def makeDataFrame(): - data = DataFrame(np.random.randn(N, K), - columns=COLS, - index=DATE_RANGE) - - return data - - -def getBasicDatasets(): - A = makeDataFrame() - B = makeDataFrame() - C = makeDataFrame() - - return A, B, C - - -def check_for_scipy(): - try: - import scipy - except ImportError: - raise nose.SkipTest('no scipy') - - -def check_for_statsmodels(): - _have_statsmodels = True - try: - import statsmodels.api as sm - except ImportError: - try: - import scikits.statsmodels.api as sm - except ImportError: - raise nose.SkipTest('no statsmodels') - - -class BaseTest(tm.TestCase): - - def setUp(self): - check_for_scipy() - check_for_statsmodels() - - self.A, self.B, self.C = getBasicDatasets() - - self.createData1() - self.createData2() - self.createData3() - - def createData1(self): - date = datetime(2007, 1, 1) - date2 = datetime(2007, 1, 15) - date3 = datetime(2007, 1, 22) - - A = self.A.copy() - B = self.B.copy() - C = self.C.copy() - - A['ColA'][date] = np.NaN - B['ColA'][date] = np.NaN - C['ColA'][date] = np.NaN - C['ColA'][date2] = np.NaN - - # truncate data to save time - A = A[:30] - B = B[:30] - C = C[:30] - - self.panel_y = A - self.panel_x = {'B': B, 'C': C} - - self.series_panel_y = A.filter(['ColA']) - self.series_panel_x = {'B': B.filter(['ColA']), - 'C': C.filter(['ColA'])} - self.series_y = A['ColA'] - self.series_x = {'B': B['ColA'], - 'C': C['ColA']} - - def createData2(self): - y_data = [[1, np.NaN], - [2, 3], - [4, 5]] - y_index = [datetime(2000, 1, 1), - datetime(2000, 1, 2), - datetime(2000, 1, 3)] - y_cols = ['A', 'B'] - self.panel_y2 = DataFrame(np.array(y_data), index=y_index, - columns=y_cols) - - x1_data = [[6, np.NaN], - [7, 8], - [9, 30], - [11, 12]] - x1_index = [datetime(2000, 1, 1), - datetime(2000, 1, 2), - datetime(2000, 1, 3), - datetime(2000, 1, 4)] - x1_cols = ['A', 'B'] - x1 = DataFrame(np.array(x1_data), index=x1_index, - columns=x1_cols) - - x2_data = [[13, 14, np.NaN], - [15, np.NaN, np.NaN], - [16, 17, 48], - [19, 20, 21], - [22, 23, 24]] - x2_index = [datetime(2000, 1, 1), - datetime(2000, 1, 2), - datetime(2000, 1, 3), - datetime(2000, 1, 4), - datetime(2000, 1, 5)] - x2_cols = ['C', 'A', 'B'] - x2 = DataFrame(np.array(x2_data), index=x2_index, - columns=x2_cols) - - self.panel_x2 = {'x1': x1, 'x2': x2} - - def createData3(self): - y_data = [[1, 2], - [3, 4]] - y_index = [datetime(2000, 1, 1), - datetime(2000, 1, 2)] - y_cols = ['A', 'B'] - self.panel_y3 = DataFrame(np.array(y_data), index=y_index, - columns=y_cols) - - x1_data = [['A', 'B'], - ['C', 'A']] - x1_index = [datetime(2000, 1, 1), - datetime(2000, 1, 2)] - x1_cols = ['A', 'B'] - x1 = DataFrame(np.array(x1_data), index=x1_index, - columns=x1_cols) - - x2_data = [['foo', 'bar'], - ['baz', 'foo']] - x2_index = [datetime(2000, 1, 1), - datetime(2000, 1, 2)] - x2_cols = ['A', 'B'] - x2 = DataFrame(np.array(x2_data), index=x2_index, - columns=x2_cols) - - self.panel_x3 = {'x1': x1, 'x2': x2} diff --git a/pandas/stats/tests/test_fama_macbeth.py b/pandas/stats/tests/test_fama_macbeth.py deleted file mode 100644 index 0c9fcf775ad2d..0000000000000 --- a/pandas/stats/tests/test_fama_macbeth.py +++ /dev/null @@ -1,68 +0,0 @@ -# flake8: noqa - -from pandas import DataFrame, Panel -from pandas.stats.api import fama_macbeth -from .common import assert_almost_equal, BaseTest - -from pandas.compat import range -from pandas import compat -import pandas.util.testing as tm -import numpy as np - - -class TestFamaMacBeth(BaseTest): - - def testFamaMacBethRolling(self): - # self.checkFamaMacBethExtended('rolling', self.panel_x, self.panel_y, - # nw_lags_beta=2) - - # df = DataFrame(np.random.randn(50, 10)) - x = dict((k, DataFrame(np.random.randn(50, 10))) for k in 'abcdefg') - x = Panel.from_dict(x) - y = (DataFrame(np.random.randn(50, 10)) + - DataFrame(0.01 * np.random.randn(50, 10))) - self.checkFamaMacBethExtended('rolling', x, y, nw_lags_beta=2) - self.checkFamaMacBethExtended('expanding', x, y, nw_lags_beta=2) - - def checkFamaMacBethExtended(self, window_type, x, y, **kwds): - window = 25 - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = fama_macbeth(y=y, x=x, window_type=window_type, window=window, - **kwds) - self._check_stuff_works(result) - - index = result._index - time = len(index) - - for i in range(time - window + 1): - if window_type == 'rolling': - start = index[i] - else: - start = index[0] - - end = index[i + window - 1] - - x2 = {} - for k, v in x.iteritems(): - x2[k] = v.truncate(start, end) - y2 = y.truncate(start, end) - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - reference = fama_macbeth(y=y2, x=x2, **kwds) - # reference._stats is tuple - assert_almost_equal(reference._stats, result._stats[:, i], - check_dtype=False) - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - static = fama_macbeth(y=y2, x=x2, **kwds) - self._check_stuff_works(static) - - def _check_stuff_works(self, result): - # does it work? - attrs = ['mean_beta', 'std_beta', 't_stat'] - for attr in attrs: - getattr(result, attr) - - # does it work? - result.summary diff --git a/pandas/stats/tests/test_math.py b/pandas/stats/tests/test_math.py deleted file mode 100644 index 3f89dbcd20065..0000000000000 --- a/pandas/stats/tests/test_math.py +++ /dev/null @@ -1,59 +0,0 @@ -import nose - -from datetime import datetime -from numpy.random import randn -import numpy as np - -from pandas.core.api import Series, DataFrame, date_range -import pandas.util.testing as tm -import pandas.stats.math as pmath -from pandas import ols - -N, K = 100, 10 - -_have_statsmodels = True -try: - import statsmodels.api as sm -except ImportError: - try: - import scikits.statsmodels.api as sm # noqa - except ImportError: - _have_statsmodels = False - - -class TestMath(tm.TestCase): - - _nan_locs = np.arange(20, 40) - _inf_locs = np.array([]) - - def setUp(self): - arr = randn(N) - arr[self._nan_locs] = np.NaN - - self.arr = arr - self.rng = date_range(datetime(2009, 1, 1), periods=N) - - self.series = Series(arr.copy(), index=self.rng) - - self.frame = DataFrame(randn(N, K), index=self.rng, - columns=np.arange(K)) - - def test_rank_1d(self): - self.assertEqual(1, pmath.rank(self.series)) - self.assertEqual(0, pmath.rank(Series(0, self.series.index))) - - def test_solve_rect(self): - if not _have_statsmodels: - raise nose.SkipTest("no statsmodels") - - b = Series(np.random.randn(N), self.frame.index) - result = pmath.solve(self.frame, b) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - expected = ols(y=b, x=self.frame, intercept=False).beta - self.assertTrue(np.allclose(result, expected)) - - def test_inv_illformed(self): - singular = DataFrame(np.array([[1, 1], [2, 2]])) - rs = pmath.inv(singular) - expected = np.array([[0.1, 0.2], [0.1, 0.2]]) - self.assertTrue(np.allclose(rs, expected)) diff --git a/pandas/stats/tests/test_ols.py b/pandas/stats/tests/test_ols.py deleted file mode 100644 index b90c51366c86f..0000000000000 --- a/pandas/stats/tests/test_ols.py +++ /dev/null @@ -1,968 +0,0 @@ -""" -Unit test suite for OLS and PanelOLS classes -""" - -# pylint: disable-msg=W0212 - -# flake8: noqa - -from __future__ import division - -from datetime import datetime -from pandas import compat -from distutils.version import LooseVersion -import nose -import numpy as np - -from pandas import date_range, bdate_range -from pandas.core.panel import Panel -from pandas import DataFrame, Index, Series, notnull, offsets -from pandas.stats.api import ols -from pandas.stats.ols import _filter_data -from pandas.stats.plm import NonPooledPanelOLS, PanelOLS -from pandas.util.testing import (assert_almost_equal, assert_series_equal, - assert_frame_equal, assertRaisesRegexp, slow) -import pandas.util.testing as tm -import pandas.compat as compat -from pandas.stats.tests.common import BaseTest - -_have_statsmodels = True -try: - import statsmodels.api as sm -except ImportError: - try: - import scikits.statsmodels.api as sm - except ImportError: - _have_statsmodels = False - - -def _check_repr(obj): - repr(obj) - str(obj) - - -def _compare_ols_results(model1, model2): - tm.assertIsInstance(model1, type(model2)) - - if hasattr(model1, '_window_type'): - _compare_moving_ols(model1, model2) - else: - _compare_fullsample_ols(model1, model2) - - -def _compare_fullsample_ols(model1, model2): - assert_series_equal(model1.beta, model2.beta) - - -def _compare_moving_ols(model1, model2): - assert_frame_equal(model1.beta, model2.beta) - - -class TestOLS(BaseTest): - - # TODO: Add tests for OLS y predict - # TODO: Right now we just check for consistency between full-sample and - # rolling/expanding results of the panel OLS. We should also cross-check - # with trusted implementations of panel OLS (e.g. R). - # TODO: Add tests for non pooled OLS. - - @classmethod - def setUpClass(cls): - super(TestOLS, cls).setUpClass() - try: - import matplotlib as mpl - mpl.use('Agg', warn=False) - except ImportError: - pass - - if not _have_statsmodels: - raise nose.SkipTest("no statsmodels") - - def testOLSWithDatasets_ccard(self): - self.checkDataSet(sm.datasets.ccard.load(), skip_moving=True) - self.checkDataSet(sm.datasets.cpunish.load(), skip_moving=True) - self.checkDataSet(sm.datasets.longley.load(), skip_moving=True) - self.checkDataSet(sm.datasets.stackloss.load(), skip_moving=True) - - @slow - def testOLSWithDatasets_copper(self): - self.checkDataSet(sm.datasets.copper.load()) - - @slow - def testOLSWithDatasets_scotland(self): - self.checkDataSet(sm.datasets.scotland.load()) - - # degenerate case fails on some platforms - # self.checkDataSet(datasets.ccard.load(), 39, 49) # one col in X all - # 0s - - def testWLS(self): - # WLS centered SS changed (fixed) in 0.5.0 - sm_version = sm.version.version - if sm_version < LooseVersion('0.5.0'): - raise nose.SkipTest("WLS centered SS not fixed in statsmodels" - " version {0}".format(sm_version)) - - X = DataFrame(np.random.randn(30, 4), columns=['A', 'B', 'C', 'D']) - Y = Series(np.random.randn(30)) - weights = X.std(1) - - self._check_wls(X, Y, weights) - - weights.loc[[5, 15]] = np.nan - Y[[2, 21]] = np.nan - self._check_wls(X, Y, weights) - - def _check_wls(self, x, y, weights): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = ols(y=y, x=x, weights=1 / weights) - - combined = x.copy() - combined['__y__'] = y - combined['__weights__'] = weights - combined = combined.dropna() - - endog = combined.pop('__y__').values - aweights = combined.pop('__weights__').values - exog = sm.add_constant(combined.values, prepend=False) - - sm_result = sm.WLS(endog, exog, weights=1 / aweights).fit() - - assert_almost_equal(sm_result.params, result._beta_raw) - assert_almost_equal(sm_result.resid, result._resid_raw) - - self.checkMovingOLS('rolling', x, y, weights=weights) - self.checkMovingOLS('expanding', x, y, weights=weights) - - def checkDataSet(self, dataset, start=None, end=None, skip_moving=False): - exog = dataset.exog[start: end] - endog = dataset.endog[start: end] - x = DataFrame(exog, index=np.arange(exog.shape[0]), - columns=np.arange(exog.shape[1])) - y = Series(endog, index=np.arange(len(endog))) - - self.checkOLS(exog, endog, x, y) - - if not skip_moving: - self.checkMovingOLS('rolling', x, y) - self.checkMovingOLS('rolling', x, y, nw_lags=0) - self.checkMovingOLS('expanding', x, y, nw_lags=0) - self.checkMovingOLS('rolling', x, y, nw_lags=1) - self.checkMovingOLS('expanding', x, y, nw_lags=1) - self.checkMovingOLS('expanding', x, y, nw_lags=1, nw_overlap=True) - - def checkOLS(self, exog, endog, x, y): - reference = sm.OLS(endog, sm.add_constant(exog, prepend=False)).fit() - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = ols(y=y, x=x) - - # check that sparse version is the same - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - sparse_result = ols(y=y.to_sparse(), x=x.to_sparse()) - _compare_ols_results(result, sparse_result) - - assert_almost_equal(reference.params, result._beta_raw) - assert_almost_equal(reference.df_model, result._df_model_raw) - assert_almost_equal(reference.df_resid, result._df_resid_raw) - assert_almost_equal(reference.fvalue, result._f_stat_raw[0]) - assert_almost_equal(reference.pvalues, result._p_value_raw) - assert_almost_equal(reference.rsquared, result._r2_raw) - assert_almost_equal(reference.rsquared_adj, result._r2_adj_raw) - assert_almost_equal(reference.resid, result._resid_raw) - assert_almost_equal(reference.bse, result._std_err_raw) - assert_almost_equal(reference.tvalues, result._t_stat_raw) - assert_almost_equal(reference.cov_params(), result._var_beta_raw) - assert_almost_equal(reference.fittedvalues, result._y_fitted_raw) - - _check_non_raw_results(result) - - def checkMovingOLS(self, window_type, x, y, weights=None, **kwds): - window = np.linalg.matrix_rank(x.values) * 2 - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - moving = ols(y=y, x=x, weights=weights, window_type=window_type, - window=window, **kwds) - - # check that sparse version is the same - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - sparse_moving = ols(y=y.to_sparse(), x=x.to_sparse(), - weights=weights, - window_type=window_type, - window=window, **kwds) - _compare_ols_results(moving, sparse_moving) - - index = moving._index - - for n, i in enumerate(moving._valid_indices): - if window_type == 'rolling' and i >= window: - prior_date = index[i - window + 1] - else: - prior_date = index[0] - - date = index[i] - - x_iter = {} - for k, v in compat.iteritems(x): - x_iter[k] = v.truncate(before=prior_date, after=date) - y_iter = y.truncate(before=prior_date, after=date) - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - static = ols(y=y_iter, x=x_iter, weights=weights, **kwds) - - self.compare(static, moving, event_index=i, - result_index=n) - - _check_non_raw_results(moving) - - FIELDS = ['beta', 'df', 'df_model', 'df_resid', 'f_stat', 'p_value', - 'r2', 'r2_adj', 'rmse', 'std_err', 't_stat', - 'var_beta'] - - def compare(self, static, moving, event_index=None, - result_index=None): - - index = moving._index - - # Check resid if we have a time index specified - if event_index is not None: - ref = static._resid_raw[-1] - - label = index[event_index] - - res = moving.resid[label] - - assert_almost_equal(ref, res) - - ref = static._y_fitted_raw[-1] - res = moving.y_fitted[label] - - assert_almost_equal(ref, res) - - # Check y_fitted - - for field in self.FIELDS: - attr = '_%s_raw' % field - - ref = getattr(static, attr) - res = getattr(moving, attr) - - if result_index is not None: - res = res[result_index] - - assert_almost_equal(ref, res) - - def test_ols_object_dtype(self): - df = DataFrame(np.random.randn(20, 2), dtype=object) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - model = ols(y=df[0], x=df[1]) - summary = repr(model) - - -class TestOLSMisc(tm.TestCase): - - """ - For test coverage with faux data - """ - @classmethod - def setUpClass(cls): - super(TestOLSMisc, cls).setUpClass() - if not _have_statsmodels: - raise nose.SkipTest("no statsmodels") - - def test_f_test(self): - x = tm.makeTimeDataFrame() - y = x.pop('A') - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - model = ols(y=y, x=x) - - hyp = '1*B+1*C+1*D=0' - result = model.f_test(hyp) - - hyp = ['1*B=0', - '1*C=0', - '1*D=0'] - result = model.f_test(hyp) - assert_almost_equal(result['f-stat'], model.f_stat['f-stat']) - - self.assertRaises(Exception, model.f_test, '1*A=0') - - def test_r2_no_intercept(self): - y = tm.makeTimeSeries() - x = tm.makeTimeDataFrame() - - x_with = x.copy() - x_with['intercept'] = 1. - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - model1 = ols(y=y, x=x) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - model2 = ols(y=y, x=x_with, intercept=False) - assert_series_equal(model1.beta, model2.beta) - - # TODO: can we infer whether the intercept is there... - self.assertNotEqual(model1.r2, model2.r2) - - # rolling - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - model1 = ols(y=y, x=x, window=20) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - model2 = ols(y=y, x=x_with, window=20, intercept=False) - assert_frame_equal(model1.beta, model2.beta) - self.assertTrue((model1.r2 != model2.r2).all()) - - def test_summary_many_terms(self): - x = DataFrame(np.random.randn(100, 20)) - y = np.random.randn(100) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - model = ols(y=y, x=x) - model.summary - - def test_y_predict(self): - y = tm.makeTimeSeries() - x = tm.makeTimeDataFrame() - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - model1 = ols(y=y, x=x) - assert_series_equal(model1.y_predict, model1.y_fitted) - assert_almost_equal(model1._y_predict_raw, model1._y_fitted_raw) - - def test_predict(self): - y = tm.makeTimeSeries() - x = tm.makeTimeDataFrame() - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - model1 = ols(y=y, x=x) - assert_series_equal(model1.predict(), model1.y_predict) - assert_series_equal(model1.predict(x=x), model1.y_predict) - assert_series_equal(model1.predict(beta=model1.beta), model1.y_predict) - - exog = x.copy() - exog['intercept'] = 1. - rs = Series(np.dot(exog.values, model1.beta.values), x.index) - assert_series_equal(model1.y_predict, rs) - - x2 = x.reindex(columns=x.columns[::-1]) - assert_series_equal(model1.predict(x=x2), model1.y_predict) - - x3 = x2 + 10 - pred3 = model1.predict(x=x3) - x3['intercept'] = 1. - x3 = x3.reindex(columns=model1.beta.index) - expected = Series(np.dot(x3.values, model1.beta.values), x3.index) - assert_series_equal(expected, pred3) - - beta = Series(0., model1.beta.index) - pred4 = model1.predict(beta=beta) - assert_series_equal(Series(0., pred4.index), pred4) - - def test_predict_longer_exog(self): - exogenous = {"1998": "4760", "1999": "5904", "2000": "4504", - "2001": "9808", "2002": "4241", "2003": "4086", - "2004": "4687", "2005": "7686", "2006": "3740", - "2007": "3075", "2008": "3753", "2009": "4679", - "2010": "5468", "2011": "7154", "2012": "4292", - "2013": "4283", "2014": "4595", "2015": "9194", - "2016": "4221", "2017": "4520"} - endogenous = {"1998": "691", "1999": "1580", "2000": "80", - "2001": "1450", "2002": "555", "2003": "956", - "2004": "877", "2005": "614", "2006": "468", - "2007": "191"} - - endog = Series(endogenous) - exog = Series(exogenous) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - model = ols(y=endog, x=exog) - - pred = model.y_predict - self.assert_index_equal(pred.index, exog.index) - - def test_longpanel_series_combo(self): - wp = tm.makePanel() - lp = wp.to_frame() - - y = lp.pop('ItemA') - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - model = ols(y=y, x=lp, entity_effects=True, window=20) - self.assertTrue(notnull(model.beta.values).all()) - tm.assertIsInstance(model, PanelOLS) - model.summary - - def test_series_rhs(self): - y = tm.makeTimeSeries() - x = tm.makeTimeSeries() - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - model = ols(y=y, x=x) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - expected = ols(y=y, x={'x': x}) - assert_series_equal(model.beta, expected.beta) - - # GH 5233/5250 - assert_series_equal(model.y_predict, model.predict(x=x)) - - def test_various_attributes(self): - # just make sure everything "works". test correctness elsewhere - - x = DataFrame(np.random.randn(100, 5)) - y = np.random.randn(100) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - model = ols(y=y, x=x, window=20) - - series_attrs = ['rank', 'df', 'forecast_mean', 'forecast_vol'] - - for attr in series_attrs: - value = getattr(model, attr) - tm.assertIsInstance(value, Series) - - # works - model._results - - def test_catch_regressor_overlap(self): - df1 = tm.makeTimeDataFrame().loc[:, ['A', 'B']] - df2 = tm.makeTimeDataFrame().loc[:, ['B', 'C', 'D']] - y = tm.makeTimeSeries() - - data = {'foo': df1, 'bar': df2} - - def f(): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - ols(y=y, x=data) - self.assertRaises(Exception, f) - - def test_plm_ctor(self): - y = tm.makeTimeDataFrame() - x = {'a': tm.makeTimeDataFrame(), - 'b': tm.makeTimeDataFrame()} - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - model = ols(y=y, x=x, intercept=False) - model.summary - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - model = ols(y=y, x=Panel(x)) - model.summary - - def test_plm_attrs(self): - y = tm.makeTimeDataFrame() - x = {'a': tm.makeTimeDataFrame(), - 'b': tm.makeTimeDataFrame()} - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - rmodel = ols(y=y, x=x, window=10) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - model = ols(y=y, x=x) - model.resid - rmodel.resid - - def test_plm_lagged_y_predict(self): - y = tm.makeTimeDataFrame() - x = {'a': tm.makeTimeDataFrame(), - 'b': tm.makeTimeDataFrame()} - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - model = ols(y=y, x=x, window=10) - result = model.lagged_y_predict(2) - - def test_plm_f_test(self): - y = tm.makeTimeDataFrame() - x = {'a': tm.makeTimeDataFrame(), - 'b': tm.makeTimeDataFrame()} - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - model = ols(y=y, x=x) - - hyp = '1*a+1*b=0' - result = model.f_test(hyp) - - hyp = ['1*a=0', - '1*b=0'] - result = model.f_test(hyp) - assert_almost_equal(result['f-stat'], model.f_stat['f-stat']) - - def test_plm_exclude_dummy_corner(self): - y = tm.makeTimeDataFrame() - x = {'a': tm.makeTimeDataFrame(), - 'b': tm.makeTimeDataFrame()} - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - model = ols( - y=y, x=x, entity_effects=True, dropped_dummies={'entity': 'D'}) - model.summary - - def f(): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - ols(y=y, x=x, entity_effects=True, - dropped_dummies={'entity': 'E'}) - self.assertRaises(Exception, f) - - def test_columns_tuples_summary(self): - # #1837 - X = DataFrame(np.random.randn(10, 2), columns=[('a', 'b'), ('c', 'd')]) - Y = Series(np.random.randn(10)) - - # it works! - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - model = ols(y=Y, x=X) - model.summary - - -class TestPanelOLS(BaseTest): - - FIELDS = ['beta', 'df', 'df_model', 'df_resid', 'f_stat', - 'p_value', 'r2', 'r2_adj', 'rmse', 'std_err', - 't_stat', 'var_beta'] - - _other_fields = ['resid', 'y_fitted'] - - def testFiltering(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = ols(y=self.panel_y2, x=self.panel_x2) - - x = result._x - index = x.index.get_level_values(0) - index = Index(sorted(set(index))) - exp_index = Index([datetime(2000, 1, 1), datetime(2000, 1, 3)]) - self.assert_index_equal(exp_index, index) - - index = x.index.get_level_values(1) - index = Index(sorted(set(index))) - exp_index = Index(['A', 'B']) - self.assert_index_equal(exp_index, index) - - x = result._x_filtered - index = x.index.get_level_values(0) - index = Index(sorted(set(index))) - exp_index = Index([datetime(2000, 1, 1), - datetime(2000, 1, 3), - datetime(2000, 1, 4)]) - self.assert_index_equal(exp_index, index) - - # .flat is flatiter instance - assert_almost_equal(result._y.values.flat, [1, 4, 5], - check_dtype=False) - - exp_x = np.array([[6, 14, 1], [9, 17, 1], - [30, 48, 1]], dtype=np.float64) - assert_almost_equal(exp_x, result._x.values) - - exp_x_filtered = np.array([[6, 14, 1], [9, 17, 1], [30, 48, 1], - [11, 20, 1], [12, 21, 1]], dtype=np.float64) - assert_almost_equal(exp_x_filtered, result._x_filtered.values) - - self.assert_index_equal(result._x_filtered.index.levels[0], - result.y_fitted.index) - - def test_wls_panel(self): - y = tm.makeTimeDataFrame() - x = Panel({'x1': tm.makeTimeDataFrame(), - 'x2': tm.makeTimeDataFrame()}) - - y.iloc[[1, 7], y.columns.get_loc('A')] = np.nan - y.iloc[[6, 15], y.columns.get_loc('B')] = np.nan - y.iloc[[3, 20], y.columns.get_loc('C')] = np.nan - y.iloc[[5, 11], y.columns.get_loc('D')] = np.nan - - stack_y = y.stack() - stack_x = DataFrame(dict((k, v.stack()) - for k, v in x.iteritems())) - - weights = x.std('items') - stack_weights = weights.stack() - - stack_y.index = stack_y.index._tuple_index - stack_x.index = stack_x.index._tuple_index - stack_weights.index = stack_weights.index._tuple_index - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = ols(y=y, x=x, weights=1 / weights) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - expected = ols(y=stack_y, x=stack_x, weights=1 / stack_weights) - - assert_almost_equal(result.beta, expected.beta) - - for attr in ['resid', 'y_fitted']: - rvals = getattr(result, attr).stack().values - evals = getattr(expected, attr).values - assert_almost_equal(rvals, evals) - - def testWithTimeEffects(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = ols(y=self.panel_y2, x=self.panel_x2, time_effects=True) - - # .flat is flatiter instance - assert_almost_equal(result._y_trans.values.flat, [0, -0.5, 0.5], - check_dtype=False) - - exp_x = np.array([[0, 0], [-10.5, -15.5], [10.5, 15.5]]) - assert_almost_equal(result._x_trans.values, exp_x) - - # _check_non_raw_results(result) - - def testWithEntityEffects(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = ols(y=self.panel_y2, x=self.panel_x2, entity_effects=True) - - # .flat is flatiter instance - assert_almost_equal(result._y.values.flat, [1, 4, 5], - check_dtype=False) - - exp_x = DataFrame([[0., 6., 14., 1.], [0, 9, 17, 1], [1, 30, 48, 1]], - index=result._x.index, columns=['FE_B', 'x1', 'x2', - 'intercept'], - dtype=float) - tm.assert_frame_equal(result._x, exp_x.loc[:, result._x.columns]) - # _check_non_raw_results(result) - - def testWithEntityEffectsAndDroppedDummies(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = ols(y=self.panel_y2, x=self.panel_x2, entity_effects=True, - dropped_dummies={'entity': 'B'}) - - # .flat is flatiter instance - assert_almost_equal(result._y.values.flat, [1, 4, 5], - check_dtype=False) - exp_x = DataFrame([[1., 6., 14., 1.], [1, 9, 17, 1], [0, 30, 48, 1]], - index=result._x.index, columns=['FE_A', 'x1', 'x2', - 'intercept'], - dtype=float) - tm.assert_frame_equal(result._x, exp_x.loc[:, result._x.columns]) - # _check_non_raw_results(result) - - def testWithXEffects(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = ols(y=self.panel_y2, x=self.panel_x2, x_effects=['x1']) - - # .flat is flatiter instance - assert_almost_equal(result._y.values.flat, [1, 4, 5], - check_dtype=False) - - res = result._x - exp_x = DataFrame([[0., 0., 14., 1.], [0, 1, 17, 1], [1, 0, 48, 1]], - columns=['x1_30', 'x1_9', 'x2', 'intercept'], - index=res.index, dtype=float) - exp_x[['x1_30', 'x1_9']] = exp_x[['x1_30', 'x1_9']].astype(np.uint8) - assert_frame_equal(res, exp_x.reindex(columns=res.columns)) - - def testWithXEffectsAndDroppedDummies(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = ols(y=self.panel_y2, x=self.panel_x2, x_effects=['x1'], - dropped_dummies={'x1': 30}) - - res = result._x - # .flat is flatiter instance - assert_almost_equal(result._y.values.flat, [1, 4, 5], - check_dtype=False) - exp_x = DataFrame([[1., 0., 14., 1.], [0, 1, 17, 1], [0, 0, 48, 1]], - columns=['x1_6', 'x1_9', 'x2', 'intercept'], - index=res.index, dtype=float) - exp_x[['x1_6', 'x1_9']] = exp_x[['x1_6', 'x1_9']].astype(np.uint8) - - assert_frame_equal(res, exp_x.reindex(columns=res.columns)) - - def testWithXEffectsAndConversion(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = ols(y=self.panel_y3, x=self.panel_x3, - x_effects=['x1', 'x2']) - - # .flat is flatiter instance - assert_almost_equal(result._y.values.flat, [1, 2, 3, 4], - check_dtype=False) - exp_x = np.array([[0, 0, 0, 1, 1], [1, 0, 0, 0, 1], [0, 1, 1, 0, 1], - [0, 0, 0, 1, 1]], dtype=np.float64) - assert_almost_equal(result._x.values, exp_x) - - exp_index = Index(['x1_B', 'x1_C', 'x2_baz', 'x2_foo', 'intercept']) - self.assert_index_equal(exp_index, result._x.columns) - - # _check_non_raw_results(result) - - def testWithXEffectsAndConversionAndDroppedDummies(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = ols(y=self.panel_y3, x=self.panel_x3, x_effects=['x1', 'x2'], - dropped_dummies={'x2': 'foo'}) - # .flat is flatiter instance - assert_almost_equal(result._y.values.flat, [1, 2, 3, 4], - check_dtype=False) - exp_x = np.array([[0, 0, 0, 0, 1], [1, 0, 1, 0, 1], [0, 1, 0, 1, 1], - [0, 0, 0, 0, 1]], dtype=np.float64) - assert_almost_equal(result._x.values, exp_x) - - exp_index = Index(['x1_B', 'x1_C', 'x2_bar', 'x2_baz', 'intercept']) - self.assert_index_equal(exp_index, result._x.columns) - - # _check_non_raw_results(result) - - def testForSeries(self): - self.checkForSeries(self.series_panel_x, self.series_panel_y, - self.series_x, self.series_y) - - self.checkForSeries(self.series_panel_x, self.series_panel_y, - self.series_x, self.series_y, nw_lags=0) - - self.checkForSeries(self.series_panel_x, self.series_panel_y, - self.series_x, self.series_y, nw_lags=1, - nw_overlap=True) - - def testRolling(self): - self.checkMovingOLS(self.panel_x, self.panel_y) - - def testRollingWithFixedEffects(self): - self.checkMovingOLS(self.panel_x, self.panel_y, - entity_effects=True) - self.checkMovingOLS(self.panel_x, self.panel_y, intercept=False, - entity_effects=True) - - def testRollingWithTimeEffects(self): - self.checkMovingOLS(self.panel_x, self.panel_y, - time_effects=True) - - def testRollingWithNeweyWest(self): - self.checkMovingOLS(self.panel_x, self.panel_y, - nw_lags=1) - - def testRollingWithEntityCluster(self): - self.checkMovingOLS(self.panel_x, self.panel_y, - cluster='entity') - - def testUnknownClusterRaisesValueError(self): - assertRaisesRegexp(ValueError, "Unrecognized cluster.*ridiculous", - self.checkMovingOLS, self.panel_x, self.panel_y, - cluster='ridiculous') - - def testRollingWithTimeEffectsAndEntityCluster(self): - self.checkMovingOLS(self.panel_x, self.panel_y, - time_effects=True, cluster='entity') - - def testRollingWithTimeCluster(self): - self.checkMovingOLS(self.panel_x, self.panel_y, - cluster='time') - - def testRollingWithNeweyWestAndEntityCluster(self): - self.assertRaises(ValueError, self.checkMovingOLS, - self.panel_x, self.panel_y, - nw_lags=1, cluster='entity') - - def testRollingWithNeweyWestAndTimeEffectsAndEntityCluster(self): - self.assertRaises(ValueError, - self.checkMovingOLS, self.panel_x, self.panel_y, - nw_lags=1, cluster='entity', - time_effects=True) - - def testExpanding(self): - self.checkMovingOLS( - self.panel_x, self.panel_y, window_type='expanding') - - def testNonPooled(self): - self.checkNonPooled(y=self.panel_y, x=self.panel_x) - self.checkNonPooled(y=self.panel_y, x=self.panel_x, - window_type='rolling', window=25, min_periods=10) - - def testUnknownWindowType(self): - assertRaisesRegexp(ValueError, "window.*ridiculous", - self.checkNonPooled, y=self.panel_y, x=self.panel_x, - window_type='ridiculous', window=25, min_periods=10) - - def checkNonPooled(self, x, y, **kwds): - # For now, just check that it doesn't crash - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = ols(y=y, x=x, pool=False, **kwds) - - _check_repr(result) - for attr in NonPooledPanelOLS.ATTRIBUTES: - _check_repr(getattr(result, attr)) - - def checkMovingOLS(self, x, y, window_type='rolling', **kwds): - window = 25 # must be larger than rank of x - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - moving = ols(y=y, x=x, window_type=window_type, - window=window, **kwds) - - index = moving._index - - for n, i in enumerate(moving._valid_indices): - if window_type == 'rolling' and i >= window: - prior_date = index[i - window + 1] - else: - prior_date = index[0] - - date = index[i] - - x_iter = {} - for k, v in compat.iteritems(x): - x_iter[k] = v.truncate(before=prior_date, after=date) - y_iter = y.truncate(before=prior_date, after=date) - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - static = ols(y=y_iter, x=x_iter, **kwds) - - self.compare(static, moving, event_index=i, - result_index=n) - - _check_non_raw_results(moving) - - def checkForSeries(self, x, y, series_x, series_y, **kwds): - # Consistency check with simple OLS. - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = ols(y=y, x=x, **kwds) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - reference = ols(y=series_y, x=series_x, **kwds) - - self.compare(reference, result) - - def compare(self, static, moving, event_index=None, - result_index=None): - - # Check resid if we have a time index specified - if event_index is not None: - staticSlice = _period_slice(static, -1) - movingSlice = _period_slice(moving, event_index) - - ref = static._resid_raw[staticSlice] - res = moving._resid_raw[movingSlice] - - assert_almost_equal(ref, res) - - ref = static._y_fitted_raw[staticSlice] - res = moving._y_fitted_raw[movingSlice] - - assert_almost_equal(ref, res) - - # Check y_fitted - - for field in self.FIELDS: - attr = '_%s_raw' % field - - ref = getattr(static, attr) - res = getattr(moving, attr) - - if result_index is not None: - res = res[result_index] - - assert_almost_equal(ref, res) - - def test_auto_rolling_window_type(self): - data = tm.makeTimeDataFrame() - y = data.pop('A') - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - window_model = ols(y=y, x=data, window=20, min_periods=10) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - rolling_model = ols(y=y, x=data, window=20, min_periods=10, - window_type='rolling') - - assert_frame_equal(window_model.beta, rolling_model.beta) - - def test_group_agg(self): - from pandas.stats.plm import _group_agg - - values = np.ones((10, 2)) * np.arange(10).reshape((10, 1)) - bounds = np.arange(5) * 2 - f = lambda x: x.mean(axis=0) - - agged = _group_agg(values, bounds, f) - - assert(agged[1][0] == 2.5) - assert(agged[2][0] == 4.5) - - # test a function that doesn't aggregate - f2 = lambda x: np.zeros((2, 2)) - self.assertRaises(Exception, _group_agg, values, bounds, f2) - - -def _check_non_raw_results(model): - _check_repr(model) - _check_repr(model.resid) - _check_repr(model.summary_as_matrix) - _check_repr(model.y_fitted) - _check_repr(model.y_predict) - - -def _period_slice(panelModel, i): - index = panelModel._x_trans.index - period = index.levels[0][i] - - L, R = index.get_major_bounds(period, period) - - return slice(L, R) - - -class TestOLSFilter(tm.TestCase): - - def setUp(self): - date_index = date_range(datetime(2009, 12, 11), periods=3, - freq=offsets.BDay()) - ts = Series([3, 1, 4], index=date_index) - self.TS1 = ts - - date_index = date_range(datetime(2009, 12, 11), periods=5, - freq=offsets.BDay()) - ts = Series([1, 5, 9, 2, 6], index=date_index) - self.TS2 = ts - - date_index = date_range(datetime(2009, 12, 11), periods=3, - freq=offsets.BDay()) - ts = Series([5, np.nan, 3], index=date_index) - self.TS3 = ts - - date_index = date_range(datetime(2009, 12, 11), periods=5, - freq=offsets.BDay()) - ts = Series([np.nan, 5, 8, 9, 7], index=date_index) - self.TS4 = ts - - data = {'x1': self.TS2, 'x2': self.TS4} - self.DF1 = DataFrame(data=data) - - data = {'x1': self.TS2, 'x2': self.TS4} - self.DICT1 = data - - def testFilterWithSeriesRHS(self): - (lhs, rhs, weights, rhs_pre, - index, valid) = _filter_data(self.TS1, {'x1': self.TS2}, None) - self.tsAssertEqual(self.TS1.astype(np.float64), lhs, check_names=False) - self.tsAssertEqual(self.TS2[:3].astype(np.float64), rhs['x1'], - check_names=False) - self.tsAssertEqual(self.TS2.astype(np.float64), rhs_pre['x1'], - check_names=False) - - def testFilterWithSeriesRHS2(self): - (lhs, rhs, weights, rhs_pre, - index, valid) = _filter_data(self.TS2, {'x1': self.TS1}, None) - self.tsAssertEqual(self.TS2[:3].astype(np.float64), lhs, - check_names=False) - self.tsAssertEqual(self.TS1.astype(np.float64), rhs['x1'], - check_names=False) - self.tsAssertEqual(self.TS1.astype(np.float64), rhs_pre['x1'], - check_names=False) - - def testFilterWithSeriesRHS3(self): - (lhs, rhs, weights, rhs_pre, - index, valid) = _filter_data(self.TS3, {'x1': self.TS4}, None) - exp_lhs = self.TS3[2:3] - exp_rhs = self.TS4[2:3] - exp_rhs_pre = self.TS4[1:] - self.tsAssertEqual(exp_lhs, lhs, check_names=False) - self.tsAssertEqual(exp_rhs, rhs['x1'], check_names=False) - self.tsAssertEqual(exp_rhs_pre, rhs_pre['x1'], check_names=False) - - def testFilterWithDataFrameRHS(self): - (lhs, rhs, weights, rhs_pre, - index, valid) = _filter_data(self.TS1, self.DF1, None) - exp_lhs = self.TS1[1:].astype(np.float64) - exp_rhs1 = self.TS2[1:3] - exp_rhs2 = self.TS4[1:3].astype(np.float64) - self.tsAssertEqual(exp_lhs, lhs, check_names=False) - self.tsAssertEqual(exp_rhs1, rhs['x1'], check_names=False) - self.tsAssertEqual(exp_rhs2, rhs['x2'], check_names=False) - - def testFilterWithDictRHS(self): - (lhs, rhs, weights, rhs_pre, - index, valid) = _filter_data(self.TS1, self.DICT1, None) - exp_lhs = self.TS1[1:].astype(np.float64) - exp_rhs1 = self.TS2[1:3].astype(np.float64) - exp_rhs2 = self.TS4[1:3].astype(np.float64) - self.tsAssertEqual(exp_lhs, lhs, check_names=False) - self.tsAssertEqual(exp_rhs1, rhs['x1'], check_names=False) - self.tsAssertEqual(exp_rhs2, rhs['x2'], check_names=False) - - def tsAssertEqual(self, ts1, ts2, **kwargs): - self.assert_series_equal(ts1, ts2, **kwargs) diff --git a/pandas/stats/tests/test_var.py b/pandas/stats/tests/test_var.py deleted file mode 100644 index 04e2019f00a82..0000000000000 --- a/pandas/stats/tests/test_var.py +++ /dev/null @@ -1,94 +0,0 @@ -# flake8: noqa - -from __future__ import print_function - -import pandas.util.testing as tm - -from pandas.compat import range -import nose - -raise nose.SkipTest('skipping this for now') - -try: - import statsmodels.tsa.var as sm_var - import statsmodels as sm -except ImportError: - import scikits.statsmodels.tsa.var as sm_var - import scikits.statsmodels as sm - - -import pandas.stats.var as _pvar -reload(_pvar) -from pandas.stats.var import VAR - -DECIMAL_6 = 6 -DECIMAL_5 = 5 -DECIMAL_4 = 4 -DECIMAL_3 = 3 -DECIMAL_2 = 2 - - -class CheckVAR(object): - - def test_params(self): - tm.assert_almost_equal(self.res1.params, self.res2.params, DECIMAL_3) - - def test_neqs(self): - tm.assert_numpy_array_equal(self.res1.neqs, self.res2.neqs) - - def test_nobs(self): - tm.assert_numpy_array_equal(self.res1.avobs, self.res2.nobs) - - def test_df_eq(self): - tm.assert_numpy_array_equal(self.res1.df_eq, self.res2.df_eq) - - def test_rmse(self): - results = self.res1.results - for i in range(len(results)): - tm.assert_almost_equal(results[i].mse_resid ** .5, - eval('self.res2.rmse_' + str(i + 1)), - DECIMAL_6) - - def test_rsquared(self): - results = self.res1.results - for i in range(len(results)): - tm.assert_almost_equal(results[i].rsquared, - eval('self.res2.rsquared_' + str(i + 1)), - DECIMAL_3) - - def test_llf(self): - results = self.res1.results - tm.assert_almost_equal(self.res1.llf, self.res2.llf, DECIMAL_2) - for i in range(len(results)): - tm.assert_almost_equal(results[i].llf, - eval('self.res2.llf_' + str(i + 1)), - DECIMAL_2) - - def test_aic(self): - tm.assert_almost_equal(self.res1.aic, self.res2.aic) - - def test_bic(self): - tm.assert_almost_equal(self.res1.bic, self.res2.bic) - - def test_hqic(self): - tm.assert_almost_equal(self.res1.hqic, self.res2.hqic) - - def test_fpe(self): - tm.assert_almost_equal(self.res1.fpe, self.res2.fpe) - - def test_detsig(self): - tm.assert_almost_equal(self.res1.detomega, self.res2.detsig) - - def test_bse(self): - tm.assert_almost_equal(self.res1.bse, self.res2.bse, DECIMAL_4) - - -class Foo(object): - - def __init__(self): - data = sm.datasets.macrodata.load() - data = data.data[['realinv', 'realgdp', 'realcons']].view((float, 3)) - data = diff(log(data), axis=0) - self.res1 = VAR2(endog=data).fit(maxlag=2) - from results import results_var - self.res2 = results_var.MacrodataResults() diff --git a/pandas/stats/var.py b/pandas/stats/var.py deleted file mode 100644 index db4028d60f5c8..0000000000000 --- a/pandas/stats/var.py +++ /dev/null @@ -1,605 +0,0 @@ -# flake8: noqa - -from __future__ import division - -from pandas.compat import range, lrange, zip, reduce -from pandas import compat -import numpy as np -from pandas.core.base import StringMixin -from pandas.util.decorators import cache_readonly -from pandas.core.frame import DataFrame -from pandas.core.panel import Panel -from pandas.core.series import Series -import pandas.stats.common as common -from pandas.stats.math import inv -from pandas.stats.ols import _combine_rhs - - -class VAR(StringMixin): - """ - Estimates VAR(p) regression on multivariate time series data - presented in pandas data structures. - - Parameters - ---------- - data : DataFrame or dict of Series - p : lags to include - - """ - - def __init__(self, data, p=1, intercept=True): - import warnings - warnings.warn("The pandas.stats.var module is deprecated and will be " - "removed in a future version. We refer to external packages " - "like statsmodels, see some examples here: " - "http://www.statsmodels.org/stable/vector_ar.html#var", - FutureWarning, stacklevel=4) - - try: - import statsmodels.tsa.vector_ar.api as sm_var - except ImportError: - import scikits.statsmodels.tsa.var as sm_var - - self._data = DataFrame(_combine_rhs(data)) - self._p = p - - self._columns = self._data.columns - self._index = self._data.index - - self._intercept = intercept - - @cache_readonly - def aic(self): - """Returns the Akaike information criterion.""" - return self._ic['aic'] - - @cache_readonly - def bic(self): - """Returns the Bayesian information criterion.""" - return self._ic['bic'] - - @cache_readonly - def beta(self): - """ - Returns a DataFrame, where each column x1 contains the betas - calculated by regressing the x1 column of the VAR input with - the lagged input. - - Returns - ------- - DataFrame - """ - d = dict([(key, value.beta) - for (key, value) in compat.iteritems(self.ols_results)]) - return DataFrame(d) - - def forecast(self, h): - """ - Returns a DataFrame containing the forecasts for 1, 2, ..., n time - steps. Each column x1 contains the forecasts of the x1 column. - - Parameters - ---------- - n: int - Number of time steps ahead to forecast. - - Returns - ------- - DataFrame - """ - forecast = self._forecast_raw(h)[:, 0, :] - return DataFrame(forecast, index=lrange(1, 1 + h), - columns=self._columns) - - def forecast_cov(self, h): - """ - Returns the covariance of the forecast residuals. - - Returns - ------- - DataFrame - """ - return [DataFrame(value, index=self._columns, columns=self._columns) - for value in self._forecast_cov_raw(h)] - - def forecast_std_err(self, h): - """ - Returns the standard errors of the forecast residuals. - - Returns - ------- - DataFrame - """ - return DataFrame(self._forecast_std_err_raw(h), - index=lrange(1, 1 + h), columns=self._columns) - - @cache_readonly - def granger_causality(self): - """Returns the f-stats and p-values from the Granger Causality Test. - - If the data consists of columns x1, x2, x3, then we perform the - following regressions: - - x1 ~ L(x2, x3) - x1 ~ L(x1, x3) - x1 ~ L(x1, x2) - - The f-stats of these results are placed in the 'x1' column of the - returned DataFrame. We then repeat for x2, x3. - - Returns - ------- - Dict, where 'f-stat' returns the DataFrame containing the f-stats, - and 'p-value' returns the DataFrame containing the corresponding - p-values of the f-stats. - """ - from pandas.stats.api import ols - from scipy.stats import f - - d = {} - for col in self._columns: - d[col] = {} - for i in range(1, 1 + self._p): - lagged_data = self._lagged_data[i].filter( - self._columns - [col]) - - for key, value in compat.iteritems(lagged_data): - d[col][_make_param_name(i, key)] = value - - f_stat_dict = {} - p_value_dict = {} - - for col, y in compat.iteritems(self._data): - ssr_full = (self.resid[col] ** 2).sum() - - f_stats = [] - p_values = [] - - for col2 in self._columns: - result = ols(y=y, x=d[col2]) - - resid = result.resid - ssr_reduced = (resid ** 2).sum() - - M = self._p - N = self._nobs - K = self._k * self._p + 1 - f_stat = ((ssr_reduced - ssr_full) / M) / (ssr_full / (N - K)) - f_stats.append(f_stat) - - p_value = f.sf(f_stat, M, N - K) - p_values.append(p_value) - - f_stat_dict[col] = Series(f_stats, self._columns) - p_value_dict[col] = Series(p_values, self._columns) - - f_stat_mat = DataFrame(f_stat_dict) - p_value_mat = DataFrame(p_value_dict) - - return { - 'f-stat': f_stat_mat, - 'p-value': p_value_mat, - } - - @cache_readonly - def ols_results(self): - """ - Returns the results of the regressions: - x_1 ~ L(X) - x_2 ~ L(X) - ... - x_k ~ L(X) - - where X = [x_1, x_2, ..., x_k] - and L(X) represents the columns of X lagged 1, 2, ..., n lags - (n is the user-provided number of lags). - - Returns - ------- - dict - """ - from pandas.stats.api import ols - - d = {} - for i in range(1, 1 + self._p): - for col, series in compat.iteritems(self._lagged_data[i]): - d[_make_param_name(i, col)] = series - - result = dict([(col, ols(y=y, x=d, intercept=self._intercept)) - for col, y in compat.iteritems(self._data)]) - - return result - - @cache_readonly - def resid(self): - """ - Returns the DataFrame containing the residuals of the VAR regressions. - Each column x1 contains the residuals generated by regressing the x1 - column of the input against the lagged input. - - Returns - ------- - DataFrame - """ - d = dict([(col, series.resid) - for (col, series) in compat.iteritems(self.ols_results)]) - return DataFrame(d, index=self._index) - - @cache_readonly - def summary(self): - template = """ -%(banner_top)s - -Number of Observations: %(nobs)d -AIC: %(aic).3f -BIC: %(bic).3f - -%(banner_coef)s -%(coef_table)s -%(banner_end)s -""" - params = { - 'banner_top': common.banner('Summary of VAR'), - 'banner_coef': common.banner('Summary of Estimated Coefficients'), - 'banner_end': common.banner('End of Summary'), - 'coef_table': self.beta, - 'aic': self.aic, - 'bic': self.bic, - 'nobs': self._nobs, - } - - return template % params - - @cache_readonly - def _alpha(self): - """ - Returns array where the i-th element contains the intercept - when regressing the i-th column of self._data with the lagged data. - """ - if self._intercept: - return self._beta_raw[-1] - else: - return np.zeros(self._k) - - @cache_readonly - def _beta_raw(self): - return np.array([list(self.beta[col].values()) for col in self._columns]).T - - def _trans_B(self, h): - """ - Returns 0, 1, ..., (h-1)-th power of transpose of B as defined in - equation (4) on p. 142 of the Stata 11 Time Series reference book. - """ - result = [np.eye(1 + self._k * self._p)] - - row1 = np.zeros((1, 1 + self._k * self._p)) - row1[0, 0] = 1 - - v = self._alpha.reshape((self._k, 1)) - row2 = np.hstack(tuple([v] + self._lag_betas)) - - m = self._k * (self._p - 1) - row3 = np.hstack(( - np.zeros((m, 1)), - np.eye(m), - np.zeros((m, self._k)) - )) - - trans_B = np.vstack((row1, row2, row3)).T - - result.append(trans_B) - - for i in range(2, h): - result.append(np.dot(trans_B, result[i - 1])) - - return result - - @cache_readonly - def _x(self): - values = np.array([ - list(self._lagged_data[i][col].values()) - for i in range(1, 1 + self._p) - for col in self._columns - ]).T - - x = np.hstack((np.ones((len(values), 1)), values))[self._p:] - - return x - - @cache_readonly - def _cov_beta(self): - cov_resid = self._sigma - - x = self._x - - inv_cov_x = inv(np.dot(x.T, x)) - - return np.kron(inv_cov_x, cov_resid) - - def _data_xs(self, i): - """ - Returns the cross-section of the data at the given timestep. - """ - return self._data.values[i] - - def _forecast_cov_raw(self, n): - resid = self._forecast_cov_resid_raw(n) - # beta = self._forecast_cov_beta_raw(n) - - # return [a + b for a, b in zip(resid, beta)] - # TODO: ignore the beta forecast std err until it's verified - - return resid - - def _forecast_cov_beta_raw(self, n): - """ - Returns the covariance of the beta errors for the forecast at - 1, 2, ..., n timesteps. - """ - p = self._p - - values = self._data.values - T = len(values) - self._p - 1 - - results = [] - - for h in range(1, n + 1): - psi = self._psi(h) - trans_B = self._trans_B(h) - - sum = 0 - - cov_beta = self._cov_beta - - for t in range(T + 1): - index = t + p - y = values.take(lrange(index, index - p, -1), axis=0).ravel() - trans_Z = np.hstack(([1], y)) - trans_Z = trans_Z.reshape(1, len(trans_Z)) - - sum2 = 0 - for i in range(h): - ZB = np.dot(trans_Z, trans_B[h - 1 - i]) - - prod = np.kron(ZB, psi[i]) - sum2 = sum2 + prod - - sum = sum + chain_dot(sum2, cov_beta, sum2.T) - - results.append(sum / (T + 1)) - - return results - - def _forecast_cov_resid_raw(self, h): - """ - Returns the covariance of the residual errors for the forecast at - 1, 2, ..., h timesteps. - """ - psi_values = self._psi(h) - sum = 0 - result = [] - for i in range(h): - psi = psi_values[i] - sum = sum + chain_dot(psi, self._sigma, psi.T) - result.append(sum) - - return result - - def _forecast_raw(self, h): - """ - Returns the forecast at 1, 2, ..., h timesteps in the future. - """ - k = self._k - result = [] - for i in range(h): - sum = self._alpha.reshape(1, k) - for j in range(self._p): - beta = self._lag_betas[j] - idx = i - j - if idx > 0: - y = result[idx - 1] - else: - y = self._data_xs(idx - 1) - - sum = sum + np.dot(beta, y.T).T - result.append(sum) - - return np.array(result) - - def _forecast_std_err_raw(self, h): - """ - Returns the standard error of the forecasts - at 1, 2, ..., n timesteps. - """ - return np.array([np.sqrt(np.diag(value)) - for value in self._forecast_cov_raw(h)]) - - @cache_readonly - def _ic(self): - """ - Returns the Akaike/Bayesian information criteria. - """ - RSS = self._rss - k = self._p * (self._k * self._p + 1) - n = self._nobs * self._k - - return {'aic': 2 * k + n * np.log(RSS / n), - 'bic': n * np.log(RSS / n) + k * np.log(n)} - - @cache_readonly - def _k(self): - return len(self._columns) - - @cache_readonly - def _lag_betas(self): - """ - Returns list of B_i, where B_i represents the (k, k) matrix - with the j-th row containing the betas of regressing the j-th - column of self._data with self._data lagged i time steps. - First element is B_1, second element is B_2, etc. - """ - k = self._k - b = self._beta_raw - return [b[k * i: k * (i + 1)].T for i in range(self._p)] - - @cache_readonly - def _lagged_data(self): - return dict([(i, self._data.shift(i)) - for i in range(1, 1 + self._p)]) - - @cache_readonly - def _nobs(self): - return len(self._data) - self._p - - def _psi(self, h): - """ - psi value used for calculating standard error. - - Returns [psi_0, psi_1, ..., psi_(h - 1)] - """ - k = self._k - result = [np.eye(k)] - for i in range(1, h): - result.append(sum( - [np.dot(result[i - j], self._lag_betas[j - 1]) - for j in range(1, 1 + i) - if j <= self._p])) - - return result - - @cache_readonly - def _resid_raw(self): - resid = np.array([self.ols_results[col]._resid_raw - for col in self._columns]) - return resid - - @cache_readonly - def _rss(self): - """Returns the sum of the squares of the residuals.""" - return (self._resid_raw ** 2).sum() - - @cache_readonly - def _sigma(self): - """Returns covariance of resids.""" - k = self._k - n = self._nobs - - resid = self._resid_raw - - return np.dot(resid, resid.T) / (n - k) - - def __unicode__(self): - return self.summary - - -def lag_select(data, max_lags=5, ic=None): - """ - Select number of lags based on a variety of information criteria - - Parameters - ---------- - data : DataFrame-like - max_lags : int - Maximum number of lags to evaluate - ic : {None, 'aic', 'bic', ...} - Choosing None will just display the results - - Returns - ------- - None - """ - pass - - -class PanelVAR(VAR): - """ - Performs Vector Autoregression on panel data. - - Parameters - ---------- - data: Panel or dict of DataFrame - lags: int - """ - - def __init__(self, data, lags, intercept=True): - self._data = _prep_panel_data(data) - self._p = lags - self._intercept = intercept - - self._columns = self._data.items - - @cache_readonly - def _nobs(self): - """Returns the number of observations.""" - _, timesteps, entities = self._data.values.shape - return (timesteps - self._p) * entities - - @cache_readonly - def _rss(self): - """Returns the sum of the squares of the residuals.""" - return (self.resid.values ** 2).sum() - - def forecast(self, h): - """ - Returns the forecasts at 1, 2, ..., n timesteps in the future. - """ - forecast = self._forecast_raw(h).T.swapaxes(1, 2) - index = lrange(1, 1 + h) - w = Panel(forecast, items=self._data.items, major_axis=index, - minor_axis=self._data.minor_axis) - return w - - @cache_readonly - def resid(self): - """ - Returns the DataFrame containing the residuals of the VAR regressions. - Each column x1 contains the residuals generated by regressing the x1 - column of the input against the lagged input. - - Returns - ------- - DataFrame - """ - d = dict([(key, value.resid) - for (key, value) in compat.iteritems(self.ols_results)]) - return Panel.fromDict(d) - - def _data_xs(self, i): - return self._data.values[:, i, :].T - - @cache_readonly - def _sigma(self): - """Returns covariance of resids.""" - k = self._k - resid = _drop_incomplete_rows(self.resid.toLong().values) - n = len(resid) - return np.dot(resid.T, resid) / (n - k) - - -def _prep_panel_data(data): - """Converts the given data into a Panel.""" - if isinstance(data, Panel): - return data - - return Panel.fromDict(data) - - -def _drop_incomplete_rows(array): - mask = np.isfinite(array).all(1) - indices = np.arange(len(array))[mask] - return array.take(indices, 0) - - -def _make_param_name(lag, name): - return 'L%d.%s' % (lag, name) - - -def chain_dot(*matrices): - """ - Returns the dot product of the given matrices. - - Parameters - ---------- - matrices: argument list of ndarray - """ - return reduce(lambda x, y: np.dot(y, x), matrices[::-1]) diff --git a/pandas/util/print_versions.py b/pandas/util/print_versions.py index c7b9f4bdea6b2..c3962ad9c823c 100644 --- a/pandas/util/print_versions.py +++ b/pandas/util/print_versions.py @@ -69,7 +69,6 @@ def show_versions(as_json=False): ("Cython", lambda mod: mod.__version__), ("numpy", lambda mod: mod.version.version), ("scipy", lambda mod: mod.version.version), - ("statsmodels", lambda mod: mod.__version__), ("xarray", lambda mod: mod.__version__), ("IPython", lambda mod: mod.__version__), ("sphinx", lambda mod: mod.__version__), diff --git a/setup.py b/setup.py index 3c2617da18eae..c3cb56f2d6d1b 100755 --- a/setup.py +++ b/setup.py @@ -660,7 +660,6 @@ def pxd(name): 'pandas.io.tests.json', 'pandas.io.tests.parser', 'pandas.io.tests.sas', - 'pandas.stats.tests', 'pandas.msgpack', 'pandas.util.clipboard' ], From 3c9fec39d502cf7a24d4a9e16e3c5733560dc05c Mon Sep 17 00:00:00 2001 From: Stephen Rauch Date: Thu, 9 Feb 2017 12:04:19 -0500 Subject: [PATCH 027/933] BUG: Multiline Eval broken for local variables after first line Also fixes the code which attempted to ignore any blank lines in the multiline expression. closes #15342 Author: Stephen Rauch Closes #15343 from stephenrauch/multi-line-eval-with-local and squashes the following commits: fe67ede [Stephen Rauch] BUG: GH15342 - Multiline Eval broken for local variables after first line --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/computation/eval.py | 5 ++--- pandas/computation/tests/test_eval.py | 19 +++++++++++++++---- 3 files changed, 18 insertions(+), 7 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 3fb6f7b0b9a91..e765cdef4d219 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -517,3 +517,4 @@ Bug Fixes - Bug in ``DataFrame.boxplot`` where ``fontsize`` was not applied to the tick labels on both axes (:issue:`15108`) - Bug in ``Series.replace`` and ``DataFrame.replace`` which failed on empty replacement dicts (:issue:`15289`) +- Bug in ``.eval()`` which caused multiline evals to fail with local variables not on the first line (:issue:`15342`) diff --git a/pandas/computation/eval.py b/pandas/computation/eval.py index a0a08e4a968cc..5b21c753a71da 100644 --- a/pandas/computation/eval.py +++ b/pandas/computation/eval.py @@ -236,7 +236,7 @@ def eval(expr, parser='pandas', engine=None, truediv=True, first_expr = True if isinstance(expr, string_types): _check_expression(expr) - exprs = [e for e in expr.splitlines() if e != ''] + exprs = [e.strip() for e in expr.splitlines() if e.strip() != ''] else: exprs = [expr] multi_line = len(exprs) > 1 @@ -254,8 +254,7 @@ def eval(expr, parser='pandas', engine=None, truediv=True, _check_for_locals(expr, level, parser) # get our (possibly passed-in) scope - level += 1 - env = _ensure_scope(level, global_dict=global_dict, + env = _ensure_scope(level + 1, global_dict=global_dict, local_dict=local_dict, resolvers=resolvers, target=target) diff --git a/pandas/computation/tests/test_eval.py b/pandas/computation/tests/test_eval.py index aa05626af9175..a4bb81ce7263c 100644 --- a/pandas/computation/tests/test_eval.py +++ b/pandas/computation/tests/test_eval.py @@ -1274,7 +1274,6 @@ def test_assignment_fails(self): local_dict={'df': df, 'df2': df2}) def test_assignment_column(self): - tm.skip_if_no_ne('numexpr') df = DataFrame(np.random.randn(5, 2), columns=list('ab')) orig_df = df.copy() @@ -1346,7 +1345,6 @@ def test_column_in(self): def assignment_not_inplace(self): # GH 9297 - tm.skip_if_no_ne('numexpr') df = DataFrame(np.random.randn(5, 2), columns=list('ab')) actual = df.eval('c = a + b', inplace=False) @@ -1365,7 +1363,6 @@ def assignment_not_inplace(self): def test_multi_line_expression(self): # GH 11149 - tm.skip_if_no_ne('numexpr') df = pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}) expected = df.copy() @@ -1393,7 +1390,6 @@ def test_multi_line_expression(self): def test_multi_line_expression_not_inplace(self): # GH 11149 - tm.skip_if_no_ne('numexpr') df = pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}) expected = df.copy() @@ -1411,6 +1407,21 @@ def test_multi_line_expression_not_inplace(self): e = a + 2""", inplace=False) assert_frame_equal(expected, df) + def test_multi_line_expression_local_variable(self): + # GH 15342 + df = pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}) + expected = df.copy() + + local_var = 7 + expected['c'] = expected['a'] * local_var + expected['d'] = expected['c'] + local_var + ans = df.eval(""" + c = a * @local_var + d = c + @local_var + """, inplace=True) + assert_frame_equal(expected, df) + self.assertIsNone(ans) + def test_assignment_in_query(self): # GH 8664 df = pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}) From c23b1a4c8cb4ac87c9e71703285393e5904e2a8a Mon Sep 17 00:00:00 2001 From: Piotr Chromiec Date: Thu, 9 Feb 2017 12:08:02 -0500 Subject: [PATCH 028/933] BUG: fix read_gbq lost precision for longs above 2^53 and floats above 10k closes #14020 closes #14305 Author: Piotr Chromiec Closes #14064 from tworec/read_gbq_full_long_support and squashes the following commits: 788ccee [Piotr Chromiec] BUG: fix read_gbq lost numeric precision --- doc/source/install.rst | 13 +- doc/source/io.rst | 61 +++++-- doc/source/whatsnew/v0.20.0.txt | 5 +- pandas/io/gbq.py | 24 +-- pandas/io/tests/test_gbq.py | 288 +++++++++++++++++++++----------- 5 files changed, 263 insertions(+), 128 deletions(-) diff --git a/doc/source/install.rst b/doc/source/install.rst index 158a6e5562b7a..4b3ea19624a0e 100644 --- a/doc/source/install.rst +++ b/doc/source/install.rst @@ -250,9 +250,9 @@ Optional Dependencies * `Feather Format `__: necessary for feather-based storage, version 0.3.1 or higher. * `SQLAlchemy `__: for SQL database support. Version 0.8.1 or higher recommended. Besides SQLAlchemy, you also need a database specific driver. You can find an overview of supported drivers for each SQL dialect in the `SQLAlchemy docs `__. Some common drivers are: - - `psycopg2 `__: for PostgreSQL - - `pymysql `__: for MySQL. - - `SQLite `__: for SQLite, this is included in Python's standard library by default. + * `psycopg2 `__: for PostgreSQL + * `pymysql `__: for MySQL. + * `SQLite `__: for SQLite, this is included in Python's standard library by default. * `matplotlib `__: for plotting * For Excel I/O: @@ -272,11 +272,8 @@ Optional Dependencies `__, or `xclip `__: necessary to use :func:`~pandas.read_clipboard`. Most package managers on Linux distributions will have ``xclip`` and/or ``xsel`` immediately available for installation. -* Google's `python-gflags <`__ , - `oauth2client `__ , - `httplib2 `__ - and `google-api-python-client `__ - : Needed for :mod:`~pandas.io.gbq` +* For Google BigQuery I/O - see :ref:`here `. + * `Backports.lzma `__: Only for Python 2, for writing to and/or reading from an xz compressed DataFrame in CSV; Python 3 support is built into the standard library. * One of the following combinations of libraries is needed to use the top-level :func:`~pandas.read_html` function: diff --git a/doc/source/io.rst b/doc/source/io.rst index 4c78758a0e2d2..22eac33a715ba 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -39,7 +39,7 @@ object. * :ref:`read_json` * :ref:`read_msgpack` * :ref:`read_html` - * :ref:`read_gbq` + * :ref:`read_gbq` * :ref:`read_stata` * :ref:`read_sas` * :ref:`read_clipboard` @@ -55,7 +55,7 @@ The corresponding ``writer`` functions are object methods that are accessed like * :ref:`to_json` * :ref:`to_msgpack` * :ref:`to_html` - * :ref:`to_gbq` + * :ref:`to_gbq` * :ref:`to_stata` * :ref:`to_clipboard` * :ref:`to_pickle` @@ -4648,16 +4648,11 @@ DataFrame with a shape and data types derived from the source table. Additionally, DataFrames can be inserted into new BigQuery tables or appended to existing tables. -You will need to install some additional dependencies: - -- Google's `python-gflags `__ -- `httplib2 `__ -- `google-api-python-client `__ - .. warning:: To use this module, you will need a valid BigQuery account. Refer to the - `BigQuery Documentation `__ for details on the service itself. + `BigQuery Documentation `__ + for details on the service itself. The key functions are: @@ -4671,7 +4666,44 @@ The key functions are: .. currentmodule:: pandas -.. _io.bigquery_reader: + +Supported Data Types +++++++++++++++++++++ + +Pandas supports all these `BigQuery data types `__: +``STRING``, ``INTEGER`` (64bit), ``FLOAT`` (64 bit), ``BOOLEAN`` and +``TIMESTAMP`` (microsecond precision). Data types ``BYTES`` and ``RECORD`` +are not supported. + +Integer and boolean ``NA`` handling ++++++++++++++++++++++++++++++++++++ + +.. versionadded:: 0.20 + +Since all columns in BigQuery queries are nullable, and NumPy lacks of ``NA`` +support for integer and boolean types, this module will store ``INTEGER`` or +``BOOLEAN`` columns with at least one ``NULL`` value as ``dtype=object``. +Otherwise those columns will be stored as ``dtype=int64`` or ``dtype=bool`` +respectively. + +This is opposite to default pandas behaviour which will promote integer +type to float in order to store NAs. See the :ref:`gotchas` +for detailed explaination. + +While this trade-off works well for most cases, it breaks down for storing +values greater than 2**53. Such values in BigQuery can represent identifiers +and unnoticed precision lost for identifier is what we want to avoid. + +.. _io.bigquery_deps: + +Dependencies +++++++++++++ + +This module requires following additional dependencies: + +- `httplib2 `__: HTTP client +- `google-api-python-client `__: Google's API client +- `oauth2client `__: authentication and authorization for Google's API .. _io.bigquery_authentication: @@ -4686,7 +4718,7 @@ Is possible to authenticate with either user account credentials or service acco Authenticating with user account credentials is as simple as following the prompts in a browser window which will be automatically opened for you. You will be authenticated to the specified ``BigQuery`` account using the product name ``pandas GBQ``. It is only possible on local host. -The remote authentication using user account credentials is not currently supported in Pandas. +The remote authentication using user account credentials is not currently supported in pandas. Additional information on the authentication mechanism can be found `here `__. @@ -4695,8 +4727,6 @@ is particularly useful when working on remote servers (eg. jupyter iPython noteb Additional information on service accounts can be found `here `__. -You will need to install an additional dependency: `oauth2client `__. - Authentication via ``application default credentials`` is also possible. This is only valid if the parameter ``private_key`` is not provided. This method also requires that the credentials can be fetched from the environment the code is running in. @@ -4716,6 +4746,7 @@ Additional information on A private key can be obtained from the Google developers console by clicking `here `__. Use JSON key type. +.. _io.bigquery_reader: Querying '''''''' @@ -4775,7 +4806,6 @@ For more information about query configuration parameters see .. _io.bigquery_writer: - Writing DataFrames '''''''''''''''''' @@ -4865,6 +4895,8 @@ For example: often as the service seems to be changing and evolving. BiqQuery is best for analyzing large sets of data quickly, but it is not a direct replacement for a transactional database. +.. _io.bigquery_create_tables: + Creating BigQuery Tables '''''''''''''''''''''''' @@ -4894,6 +4926,7 @@ produce the dictionary representation schema of the specified pandas DataFrame. the new table with a different name. Refer to `Google BigQuery issue 191 `__. + .. _io.stata: Stata Format diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index e765cdef4d219..9eae2b7a33923 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -369,7 +369,9 @@ Other API Changes - ``pd.read_csv()`` will now raise a ``ValueError`` for the C engine if the quote character is larger than than one byte (:issue:`11592`) - ``inplace`` arguments now require a boolean value, else a ``ValueError`` is thrown (:issue:`14189`) - ``pandas.api.types.is_datetime64_ns_dtype`` will now report ``True`` on a tz-aware dtype, similar to ``pandas.api.types.is_datetime64_any_dtype`` - - ``DataFrame.asof()`` will return a null filled ``Series`` instead the scalar ``NaN`` if a match is not found (:issue:`15118`) +- ``DataFrame.asof()`` will return a null filled ``Series`` instead the scalar ``NaN`` if a match is not found (:issue:`15118`) +- The :func:`pd.read_gbq` method now stores ``INTEGER`` columns as ``dtype=object`` if they contain ``NULL`` values. Otherwise they are stored as ``int64``. This prevents precision lost for integers greather than 2**53. Furthermore ``FLOAT`` columns with values above 10**4 are no more casted to ``int64`` which also caused precision lost (:issue: `14064`, :issue:`14305`). + .. _whatsnew_0200.deprecations: Deprecations @@ -439,6 +441,7 @@ Bug Fixes - Bug in ``DataFrame.loc`` with indexing a ``MultiIndex`` with a ``Series`` indexer (:issue:`14730`) + - Bug in ``pd.read_msgpack()`` in which ``Series`` categoricals were being improperly processed (:issue:`14901`) - Bug in ``Series.ffill()`` with mixed dtypes containing tz-aware datetimes. (:issue:`14956`) diff --git a/pandas/io/gbq.py b/pandas/io/gbq.py index 966f53e9d75ef..76c228418a616 100644 --- a/pandas/io/gbq.py +++ b/pandas/io/gbq.py @@ -603,18 +603,14 @@ def _parse_data(schema, rows): # see: # http://pandas.pydata.org/pandas-docs/dev/missing_data.html # #missing-data-casting-rules-and-indexing - dtype_map = {'INTEGER': np.dtype(float), - 'FLOAT': np.dtype(float), - # This seems to be buggy without nanosecond indicator + dtype_map = {'FLOAT': np.dtype(float), 'TIMESTAMP': 'M8[ns]'} fields = schema['fields'] col_types = [field['type'] for field in fields] col_names = [str(field['name']) for field in fields] col_dtypes = [dtype_map.get(field['type'], object) for field in fields] - page_array = np.zeros((len(rows),), - dtype=lzip(col_names, col_dtypes)) - + page_array = np.zeros((len(rows),), dtype=lzip(col_names, col_dtypes)) for row_num, raw_row in enumerate(rows): entries = raw_row.get('f', []) for col_num, field_type in enumerate(col_types): @@ -628,7 +624,9 @@ def _parse_data(schema, rows): def _parse_entry(field_value, field_type): if field_value is None or field_value == 'null': return None - if field_type == 'INTEGER' or field_type == 'FLOAT': + if field_type == 'INTEGER': + return int(field_value) + elif field_type == 'FLOAT': return float(field_value) elif field_type == 'TIMESTAMP': timestamp = datetime.utcfromtimestamp(float(field_value)) @@ -757,10 +755,14 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None, 'Column order does not match this DataFrame.' ) - # Downcast floats to integers and objects to booleans - # if there are no NaN's. This is presently due to a - # limitation of numpy in handling missing data. - final_df._data = final_df._data.downcast(dtypes='infer') + # cast BOOLEAN and INTEGER columns from object to bool/int + # if they dont have any nulls + type_map = {'BOOLEAN': bool, 'INTEGER': int} + for field in schema['fields']: + if field['type'] in type_map and \ + final_df[field['name']].notnull().all(): + final_df[field['name']] = \ + final_df[field['name']].astype(type_map[field['type']]) connector.print_elapsed_seconds( 'Total time taken', diff --git a/pandas/io/tests/test_gbq.py b/pandas/io/tests/test_gbq.py index 457e2d218cb33..1157482d7ae67 100644 --- a/pandas/io/tests/test_gbq.py +++ b/pandas/io/tests/test_gbq.py @@ -46,6 +46,11 @@ def _skip_if_no_project_id(): "Cannot run integration tests without a project id") +def _skip_local_auth_if_in_travis_env(): + if _in_travis_environment(): + raise nose.SkipTest("Cannot run local auth in travis environment") + + def _skip_if_no_private_key_path(): if not _get_private_key_path(): raise nose.SkipTest("Cannot run integration tests without a " @@ -248,14 +253,14 @@ def test_generate_bq_schema_deprecated(): gbq.generate_bq_schema(df) -class TestGBQConnectorIntegration(tm.TestCase): +class TestGBQConnectorIntegrationWithLocalUserAccountAuth(tm.TestCase): def setUp(self): _setup_common() _skip_if_no_project_id() + _skip_local_auth_if_in_travis_env() - self.sut = gbq.GbqConnector(_get_project_id(), - private_key=_get_private_key_path()) + self.sut = gbq.GbqConnector(_get_project_id()) def test_should_be_able_to_make_a_connector(self): self.assertTrue(self.sut is not None, @@ -293,8 +298,7 @@ def test_get_application_default_credentials_returns_credentials(self): self.assertTrue(isinstance(credentials, GoogleCredentials)) -class TestGBQConnectorServiceAccountKeyPathIntegration(tm.TestCase): - +class TestGBQConnectorIntegrationWithServiceAccountKeyPath(tm.TestCase): def setUp(self): _setup_common() @@ -325,16 +329,15 @@ def test_should_be_able_to_get_results_from_query(self): self.assertTrue(pages is not None) -class TestGBQConnectorServiceAccountKeyContentsIntegration(tm.TestCase): - +class TestGBQConnectorIntegrationWithServiceAccountKeyContents(tm.TestCase): def setUp(self): _setup_common() _skip_if_no_project_id() - _skip_if_no_private_key_path() + _skip_if_no_private_key_contents() self.sut = gbq.GbqConnector(_get_project_id(), - private_key=_get_private_key_path()) + private_key=_get_private_key_contents()) def test_should_be_able_to_make_a_connector(self): self.assertTrue(self.sut is not None, @@ -373,9 +376,9 @@ def test_import_google_api_python_client(self): from googleapiclient.discovery import build # noqa from googleapiclient.errors import HttpError # noqa - def test_should_return_bigquery_integers_as_python_floats(self): + def test_should_return_bigquery_integers_as_python_ints(self): result = gbq._parse_entry(1, 'INTEGER') - tm.assert_equal(result, float(1)) + tm.assert_equal(result, int(1)) def test_should_return_bigquery_floats_as_python_floats(self): result = gbq._parse_entry(1, 'FLOAT') @@ -403,15 +406,15 @@ def test_to_gbq_with_no_project_id_given_should_fail(self): def test_read_gbq_with_no_project_id_given_should_fail(self): with tm.assertRaises(TypeError): - gbq.read_gbq('SELECT "1" as NUMBER_1') + gbq.read_gbq('SELECT 1') def test_that_parse_data_works_properly(self): test_schema = {'fields': [ - {'mode': 'NULLABLE', 'name': 'VALID_STRING', 'type': 'STRING'}]} + {'mode': 'NULLABLE', 'name': 'valid_string', 'type': 'STRING'}]} test_page = [{'f': [{'v': 'PI'}]}] test_output = gbq._parse_data(test_schema, test_page) - correct_output = DataFrame({'VALID_STRING': ['PI']}) + correct_output = DataFrame({'valid_string': ['PI']}) tm.assert_frame_equal(test_output, correct_output) def test_read_gbq_with_invalid_private_key_json_should_fail(self): @@ -435,12 +438,12 @@ def test_read_gbq_with_empty_private_key_file_should_fail(self): private_key=empty_file_path) def test_read_gbq_with_corrupted_private_key_json_should_fail(self): - _skip_if_no_private_key_path() + _skip_if_no_private_key_contents() with tm.assertRaises(gbq.InvalidPrivateKeyFormat): gbq.read_gbq( 'SELECT 1', project_id='x', - private_key=re.sub('[a-z]', '9', _get_private_key_path())) + private_key=re.sub('[a-z]', '9', _get_private_key_contents())) class TestReadGBQIntegration(tm.TestCase): @@ -475,112 +478,207 @@ def tearDown(self): pass def test_should_read_as_user_account(self): - if _in_travis_environment(): - raise nose.SkipTest("Cannot run local auth in travis environment") + _skip_local_auth_if_in_travis_env() - query = 'SELECT "PI" as VALID_STRING' + query = 'SELECT "PI" AS valid_string' df = gbq.read_gbq(query, project_id=_get_project_id()) - tm.assert_frame_equal(df, DataFrame({'VALID_STRING': ['PI']})) + tm.assert_frame_equal(df, DataFrame({'valid_string': ['PI']})) def test_should_read_as_service_account_with_key_path(self): _skip_if_no_private_key_path() - query = 'SELECT "PI" as VALID_STRING' + query = 'SELECT "PI" AS valid_string' df = gbq.read_gbq(query, project_id=_get_project_id(), private_key=_get_private_key_path()) - tm.assert_frame_equal(df, DataFrame({'VALID_STRING': ['PI']})) + tm.assert_frame_equal(df, DataFrame({'valid_string': ['PI']})) def test_should_read_as_service_account_with_key_contents(self): _skip_if_no_private_key_contents() - query = 'SELECT "PI" as VALID_STRING' + query = 'SELECT "PI" AS valid_string' df = gbq.read_gbq(query, project_id=_get_project_id(), private_key=_get_private_key_contents()) - tm.assert_frame_equal(df, DataFrame({'VALID_STRING': ['PI']})) + tm.assert_frame_equal(df, DataFrame({'valid_string': ['PI']})) + + +class TestReadGBQIntegrationWithServiceAccountKeyPath(tm.TestCase): + + @classmethod + def setUpClass(cls): + # - GLOBAL CLASS FIXTURES - + # put here any instruction you want to execute only *ONCE* *BEFORE* + # executing *ALL* tests described below. + + _skip_if_no_project_id() + _skip_if_no_private_key_path() + + _setup_common() + + def setUp(self): + # - PER-TEST FIXTURES - + # put here any instruction you want to be run *BEFORE* *EVERY* test is + # executed. + pass + + @classmethod + def tearDownClass(cls): + # - GLOBAL CLASS FIXTURES - + # put here any instruction you want to execute only *ONCE* *AFTER* + # executing all tests. + pass + + def tearDown(self): + # - PER-TEST FIXTURES - + # put here any instructions you want to be run *AFTER* *EVERY* test is + # executed. + pass def test_should_properly_handle_valid_strings(self): - query = 'SELECT "PI" as VALID_STRING' + query = 'SELECT "PI" AS valid_string' df = gbq.read_gbq(query, project_id=_get_project_id(), private_key=_get_private_key_path()) - tm.assert_frame_equal(df, DataFrame({'VALID_STRING': ['PI']})) + tm.assert_frame_equal(df, DataFrame({'valid_string': ['PI']})) def test_should_properly_handle_empty_strings(self): - query = 'SELECT "" as EMPTY_STRING' + query = 'SELECT "" AS empty_string' df = gbq.read_gbq(query, project_id=_get_project_id(), private_key=_get_private_key_path()) - tm.assert_frame_equal(df, DataFrame({'EMPTY_STRING': [""]})) + tm.assert_frame_equal(df, DataFrame({'empty_string': [""]})) def test_should_properly_handle_null_strings(self): - query = 'SELECT STRING(NULL) as NULL_STRING' + query = 'SELECT STRING(NULL) AS null_string' df = gbq.read_gbq(query, project_id=_get_project_id(), private_key=_get_private_key_path()) - tm.assert_frame_equal(df, DataFrame({'NULL_STRING': [None]})) + tm.assert_frame_equal(df, DataFrame({'null_string': [None]})) def test_should_properly_handle_valid_integers(self): - query = 'SELECT INTEGER(3) as VALID_INTEGER' + query = 'SELECT INTEGER(3) AS valid_integer' + df = gbq.read_gbq(query, project_id=_get_project_id(), + private_key=_get_private_key_path()) + tm.assert_frame_equal(df, DataFrame({'valid_integer': [3]})) + + def test_should_properly_handle_nullable_integers(self): + query = '''SELECT * FROM + (SELECT 1 AS nullable_integer), + (SELECT NULL AS nullable_integer)''' df = gbq.read_gbq(query, project_id=_get_project_id(), private_key=_get_private_key_path()) - tm.assert_frame_equal(df, DataFrame({'VALID_INTEGER': [3]})) + tm.assert_frame_equal( + df, DataFrame({'nullable_integer': [1, None]}).astype(object)) + + def test_should_properly_handle_valid_longs(self): + query = 'SELECT 1 << 62 AS valid_long' + df = gbq.read_gbq(query, project_id=_get_project_id(), + private_key=_get_private_key_path()) + tm.assert_frame_equal( + df, DataFrame({'valid_long': [1 << 62]})) + + def test_should_properly_handle_nullable_longs(self): + query = '''SELECT * FROM + (SELECT 1 << 62 AS nullable_long), + (SELECT NULL AS nullable_long)''' + df = gbq.read_gbq(query, project_id=_get_project_id(), + private_key=_get_private_key_path()) + tm.assert_frame_equal( + df, DataFrame({'nullable_long': [1 << 62, None]}).astype(object)) def test_should_properly_handle_null_integers(self): - query = 'SELECT INTEGER(NULL) as NULL_INTEGER' + query = 'SELECT INTEGER(NULL) AS null_integer' df = gbq.read_gbq(query, project_id=_get_project_id(), private_key=_get_private_key_path()) - tm.assert_frame_equal(df, DataFrame({'NULL_INTEGER': [np.nan]})) + tm.assert_frame_equal(df, DataFrame({'null_integer': [None]})) def test_should_properly_handle_valid_floats(self): - query = 'SELECT PI() as VALID_FLOAT' + from math import pi + query = 'SELECT PI() AS valid_float' + df = gbq.read_gbq(query, project_id=_get_project_id(), + private_key=_get_private_key_path()) + tm.assert_frame_equal(df, DataFrame( + {'valid_float': [pi]})) + + def test_should_properly_handle_nullable_floats(self): + from math import pi + query = '''SELECT * FROM + (SELECT PI() AS nullable_float), + (SELECT NULL AS nullable_float)''' + df = gbq.read_gbq(query, project_id=_get_project_id(), + private_key=_get_private_key_path()) + tm.assert_frame_equal( + df, DataFrame({'nullable_float': [pi, None]})) + + def test_should_properly_handle_valid_doubles(self): + from math import pi + query = 'SELECT PI() * POW(10, 307) AS valid_double' df = gbq.read_gbq(query, project_id=_get_project_id(), private_key=_get_private_key_path()) tm.assert_frame_equal(df, DataFrame( - {'VALID_FLOAT': [3.141592653589793]})) + {'valid_double': [pi * 10 ** 307]})) + + def test_should_properly_handle_nullable_doubles(self): + from math import pi + query = '''SELECT * FROM + (SELECT PI() * POW(10, 307) AS nullable_double), + (SELECT NULL AS nullable_double)''' + df = gbq.read_gbq(query, project_id=_get_project_id(), + private_key=_get_private_key_path()) + tm.assert_frame_equal( + df, DataFrame({'nullable_double': [pi * 10 ** 307, None]})) def test_should_properly_handle_null_floats(self): - query = 'SELECT FLOAT(NULL) as NULL_FLOAT' + query = 'SELECT FLOAT(NULL) AS null_float' df = gbq.read_gbq(query, project_id=_get_project_id(), private_key=_get_private_key_path()) - tm.assert_frame_equal(df, DataFrame({'NULL_FLOAT': [np.nan]})) + tm.assert_frame_equal(df, DataFrame({'null_float': [np.nan]})) def test_should_properly_handle_timestamp_unix_epoch(self): - query = 'SELECT TIMESTAMP("1970-01-01 00:00:00") as UNIX_EPOCH' + query = 'SELECT TIMESTAMP("1970-01-01 00:00:00") AS unix_epoch' df = gbq.read_gbq(query, project_id=_get_project_id(), private_key=_get_private_key_path()) tm.assert_frame_equal(df, DataFrame( - {'UNIX_EPOCH': [np.datetime64('1970-01-01T00:00:00.000000Z')]})) + {'unix_epoch': [np.datetime64('1970-01-01T00:00:00.000000Z')]})) def test_should_properly_handle_arbitrary_timestamp(self): - query = 'SELECT TIMESTAMP("2004-09-15 05:00:00") as VALID_TIMESTAMP' + query = 'SELECT TIMESTAMP("2004-09-15 05:00:00") AS valid_timestamp' df = gbq.read_gbq(query, project_id=_get_project_id(), private_key=_get_private_key_path()) tm.assert_frame_equal(df, DataFrame({ - 'VALID_TIMESTAMP': [np.datetime64('2004-09-15T05:00:00.000000Z')] + 'valid_timestamp': [np.datetime64('2004-09-15T05:00:00.000000Z')] })) def test_should_properly_handle_null_timestamp(self): - query = 'SELECT TIMESTAMP(NULL) as NULL_TIMESTAMP' + query = 'SELECT TIMESTAMP(NULL) AS null_timestamp' df = gbq.read_gbq(query, project_id=_get_project_id(), private_key=_get_private_key_path()) - tm.assert_frame_equal(df, DataFrame({'NULL_TIMESTAMP': [NaT]})) + tm.assert_frame_equal(df, DataFrame({'null_timestamp': [NaT]})) def test_should_properly_handle_true_boolean(self): - query = 'SELECT BOOLEAN(TRUE) as TRUE_BOOLEAN' + query = 'SELECT BOOLEAN(TRUE) AS true_boolean' df = gbq.read_gbq(query, project_id=_get_project_id(), private_key=_get_private_key_path()) - tm.assert_frame_equal(df, DataFrame({'TRUE_BOOLEAN': [True]})) + tm.assert_frame_equal(df, DataFrame({'true_boolean': [True]})) def test_should_properly_handle_false_boolean(self): - query = 'SELECT BOOLEAN(FALSE) as FALSE_BOOLEAN' + query = 'SELECT BOOLEAN(FALSE) AS false_boolean' df = gbq.read_gbq(query, project_id=_get_project_id(), private_key=_get_private_key_path()) - tm.assert_frame_equal(df, DataFrame({'FALSE_BOOLEAN': [False]})) + tm.assert_frame_equal(df, DataFrame({'false_boolean': [False]})) def test_should_properly_handle_null_boolean(self): - query = 'SELECT BOOLEAN(NULL) as NULL_BOOLEAN' + query = 'SELECT BOOLEAN(NULL) AS null_boolean' + df = gbq.read_gbq(query, project_id=_get_project_id(), + private_key=_get_private_key_path()) + tm.assert_frame_equal(df, DataFrame({'null_boolean': [None]})) + + def test_should_properly_handle_nullable_booleans(self): + query = '''SELECT * FROM + (SELECT BOOLEAN(TRUE) AS nullable_boolean), + (SELECT NULL AS nullable_boolean)''' df = gbq.read_gbq(query, project_id=_get_project_id(), private_key=_get_private_key_path()) - tm.assert_frame_equal(df, DataFrame({'NULL_BOOLEAN': [None]})) + tm.assert_frame_equal( + df, DataFrame({'nullable_boolean': [True, None]}).astype(object)) def test_unicode_string_conversion_and_normalization(self): correct_test_datatype = DataFrame( - {'UNICODE_STRING': [u("\xe9\xfc")]} + {'unicode_string': [u("\xe9\xfc")]} ) unicode_string = "\xc3\xa9\xc3\xbc" @@ -588,40 +686,40 @@ def test_unicode_string_conversion_and_normalization(self): if compat.PY3: unicode_string = unicode_string.encode('latin-1').decode('utf8') - query = 'SELECT "{0}" as UNICODE_STRING'.format(unicode_string) + query = 'SELECT "{0}" AS unicode_string'.format(unicode_string) df = gbq.read_gbq(query, project_id=_get_project_id(), private_key=_get_private_key_path()) tm.assert_frame_equal(df, correct_test_datatype) def test_index_column(self): - query = "SELECT 'a' as STRING_1, 'b' as STRING_2" + query = "SELECT 'a' AS string_1, 'b' AS string_2" result_frame = gbq.read_gbq(query, project_id=_get_project_id(), - index_col="STRING_1", + index_col="string_1", private_key=_get_private_key_path()) correct_frame = DataFrame( - {'STRING_1': ['a'], 'STRING_2': ['b']}).set_index("STRING_1") + {'string_1': ['a'], 'string_2': ['b']}).set_index("string_1") tm.assert_equal(result_frame.index.name, correct_frame.index.name) def test_column_order(self): - query = "SELECT 'a' as STRING_1, 'b' as STRING_2, 'c' as STRING_3" - col_order = ['STRING_3', 'STRING_1', 'STRING_2'] + query = "SELECT 'a' AS string_1, 'b' AS string_2, 'c' AS string_3" + col_order = ['string_3', 'string_1', 'string_2'] result_frame = gbq.read_gbq(query, project_id=_get_project_id(), col_order=col_order, private_key=_get_private_key_path()) - correct_frame = DataFrame({'STRING_1': ['a'], 'STRING_2': [ - 'b'], 'STRING_3': ['c']})[col_order] + correct_frame = DataFrame({'string_1': ['a'], 'string_2': [ + 'b'], 'string_3': ['c']})[col_order] tm.assert_frame_equal(result_frame, correct_frame) def test_column_order_plus_index(self): - query = "SELECT 'a' as STRING_1, 'b' as STRING_2, 'c' as STRING_3" - col_order = ['STRING_3', 'STRING_2'] + query = "SELECT 'a' AS string_1, 'b' AS string_2, 'c' AS string_3" + col_order = ['string_3', 'string_2'] result_frame = gbq.read_gbq(query, project_id=_get_project_id(), - index_col='STRING_1', col_order=col_order, + index_col='string_1', col_order=col_order, private_key=_get_private_key_path()) correct_frame = DataFrame( - {'STRING_1': ['a'], 'STRING_2': ['b'], 'STRING_3': ['c']}) - correct_frame.set_index('STRING_1', inplace=True) + {'string_1': ['a'], 'string_2': ['b'], 'string_3': ['c']}) + correct_frame.set_index('string_1', inplace=True) correct_frame = correct_frame[col_order] tm.assert_frame_equal(result_frame, correct_frame) @@ -655,14 +753,17 @@ def test_download_dataset_larger_than_200k_rows(self): def test_zero_rows(self): # Bug fix for https://github.com/pandas-dev/pandas/issues/10273 - df = gbq.read_gbq("SELECT title, id " + df = gbq.read_gbq("SELECT title, id, is_bot, " + "SEC_TO_TIMESTAMP(timestamp) ts " "FROM [publicdata:samples.wikipedia] " "WHERE timestamp=-9999999", project_id=_get_project_id(), private_key=_get_private_key_path()) page_array = np.zeros( - (0,), dtype=[('title', object), ('id', np.dtype(float))]) - expected_result = DataFrame(page_array, columns=['title', 'id']) + (0,), dtype=[('title', object), ('id', np.dtype(int)), + ('is_bot', np.dtype(bool)), ('ts', 'M8[ns]')]) + expected_result = DataFrame( + page_array, columns=['title', 'id', 'is_bot', 'ts']) self.assert_frame_equal(df, expected_result) def test_legacy_sql(self): @@ -715,7 +816,7 @@ def test_invalid_option_for_sql_dialect(self): dialect='standard', private_key=_get_private_key_path()) def test_query_with_parameters(self): - sql_statement = "SELECT @param1 + @param2 as VALID_RESULT" + sql_statement = "SELECT @param1 + @param2 AS valid_result" config = { 'query': { "useLegacySql": False, @@ -753,11 +854,11 @@ def test_query_with_parameters(self): df = gbq.read_gbq(sql_statement, project_id=_get_project_id(), private_key=_get_private_key_path(), configuration=config) - tm.assert_frame_equal(df, DataFrame({'VALID_RESULT': [3]})) + tm.assert_frame_equal(df, DataFrame({'valid_result': [3]})) def test_query_inside_configuration(self): - query_no_use = 'SELECT "PI_WRONG" as VALID_STRING' - query = 'SELECT "PI" as VALID_STRING' + query_no_use = 'SELECT "PI_WRONG" AS valid_string' + query = 'SELECT "PI" AS valid_string' config = { 'query': { "query": query, @@ -774,7 +875,7 @@ def test_query_inside_configuration(self): df = gbq.read_gbq(None, project_id=_get_project_id(), private_key=_get_private_key_path(), configuration=config) - tm.assert_frame_equal(df, DataFrame({'VALID_STRING': ['PI']})) + tm.assert_frame_equal(df, DataFrame({'valid_string': ['PI']})) def test_configuration_without_query(self): sql_statement = 'SELECT 1' @@ -800,7 +901,7 @@ def test_configuration_without_query(self): configuration=config) -class TestToGBQIntegration(tm.TestCase): +class TestToGBQIntegrationWithServiceAccountKeyPath(tm.TestCase): # Changes to BigQuery table schema may take up to 2 minutes as of May 2015 # As a workaround to this issue, each test should use a unique table name. # Make sure to modify the for loop range in the tearDownClass when a new @@ -814,6 +915,7 @@ def setUpClass(cls): # executing *ALL* tests described below. _skip_if_no_project_id() + _skip_if_no_private_key_path() _setup_common() clean_gbq_environment(_get_private_key_path()) @@ -859,11 +961,11 @@ def test_upload_data(self): sleep(30) # <- Curses Google!!! - result = gbq.read_gbq("SELECT COUNT(*) as NUM_ROWS FROM {0}" + result = gbq.read_gbq("SELECT COUNT(*) AS num_rows FROM {0}" .format(destination_table), project_id=_get_project_id(), private_key=_get_private_key_path()) - self.assertEqual(result['NUM_ROWS'][0], test_size) + self.assertEqual(result['num_rows'][0], test_size) def test_upload_data_if_table_exists_fail(self): destination_table = DESTINATION_TABLE + "2" @@ -899,11 +1001,11 @@ def test_upload_data_if_table_exists_append(self): sleep(30) # <- Curses Google!!! - result = gbq.read_gbq("SELECT COUNT(*) as NUM_ROWS FROM {0}" + result = gbq.read_gbq("SELECT COUNT(*) AS num_rows FROM {0}" .format(destination_table), project_id=_get_project_id(), private_key=_get_private_key_path()) - self.assertEqual(result['NUM_ROWS'][0], test_size * 2) + self.assertEqual(result['num_rows'][0], test_size * 2) # Try inserting with a different schema, confirm failure with tm.assertRaises(gbq.InvalidSchema): @@ -932,11 +1034,11 @@ def test_upload_data_if_table_exists_replace(self): sleep(30) # <- Curses Google!!! - result = gbq.read_gbq("SELECT COUNT(*) as NUM_ROWS FROM {0}" + result = gbq.read_gbq("SELECT COUNT(*) AS num_rows FROM {0}" .format(destination_table), project_id=_get_project_id(), private_key=_get_private_key_path()) - self.assertEqual(result['NUM_ROWS'][0], 5) + self.assertEqual(result['num_rows'][0], 5) @tm.slow def test_google_upload_errors_should_raise_exception(self): @@ -1113,7 +1215,7 @@ def test_dataset_does_not_exist(self): DATASET_ID + "_not_found"), 'Expected dataset not to exist') -class TestToGBQIntegrationServiceAccountKeyPath(tm.TestCase): +class TestToGBQIntegrationWithLocalUserAccountAuth(tm.TestCase): # Changes to BigQuery table schema may take up to 2 minutes as of May 2015 # As a workaround to this issue, each test should use a unique table name. # Make sure to modify the for loop range in the tearDownClass when a new @@ -1128,10 +1230,10 @@ def setUpClass(cls): # executing *ALL* tests described below. _skip_if_no_project_id() - _skip_if_no_private_key_path() + _skip_local_auth_if_in_travis_env() _setup_common() - clean_gbq_environment(_get_private_key_path()) + clean_gbq_environment() def setUp(self): # - PER-TEST FIXTURES - @@ -1145,7 +1247,7 @@ def tearDownClass(cls): # put here any instruction you want to execute only *ONCE* *AFTER* # executing all tests. - clean_gbq_environment(_get_private_key_path()) + clean_gbq_environment() def tearDown(self): # - PER-TEST FIXTURES - @@ -1153,26 +1255,24 @@ def tearDown(self): # is executed. pass - def test_upload_data_as_service_account_with_key_path(self): + def test_upload_data(self): destination_table = "{0}.{1}".format(DATASET_ID + "2", TABLE_ID + "1") test_size = 10 df = make_mixed_dataframe_v2(test_size) - gbq.to_gbq(df, destination_table, _get_project_id(), chunksize=10000, - private_key=_get_private_key_path()) + gbq.to_gbq(df, destination_table, _get_project_id(), chunksize=10000) sleep(30) # <- Curses Google!!! result = gbq.read_gbq( - "SELECT COUNT(*) as NUM_ROWS FROM {0}".format(destination_table), - project_id=_get_project_id(), - private_key=_get_private_key_path()) + "SELECT COUNT(*) AS num_rows FROM {0}".format(destination_table), + project_id=_get_project_id()) - self.assertEqual(result['NUM_ROWS'][0], test_size) + self.assertEqual(result['num_rows'][0], test_size) -class TestToGBQIntegrationServiceAccountKeyContents(tm.TestCase): +class TestToGBQIntegrationWithServiceAccountKeyContents(tm.TestCase): # Changes to BigQuery table schema may take up to 2 minutes as of May 2015 # As a workaround to this issue, each test should use a unique table name. # Make sure to modify the for loop range in the tearDownClass when a new @@ -1212,7 +1312,7 @@ def tearDown(self): # is executed. pass - def test_upload_data_as_service_account_with_key_contents(self): + def test_upload_data(self): destination_table = "{0}.{1}".format(DATASET_ID + "3", TABLE_ID + "1") test_size = 10 @@ -1224,7 +1324,7 @@ def test_upload_data_as_service_account_with_key_contents(self): sleep(30) # <- Curses Google!!! result = gbq.read_gbq( - "SELECT COUNT(*) as NUM_ROWS FROM {0}".format(destination_table), + "SELECT COUNT(*) AS num_rows FROM {0}".format(destination_table), project_id=_get_project_id(), private_key=_get_private_key_contents()) - self.assertEqual(result['NUM_ROWS'][0], test_size) + self.assertEqual(result['num_rows'][0], test_size) From ec9bd44c8c93f26f7ce0c7af4a0b80039df416a0 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Thu, 9 Feb 2017 17:36:19 -0500 Subject: [PATCH 029/933] CLN: strip out and form tools/concat.py from tools/merge.py will facilitate some changes in ``tools/merge`` w.r.t. #15321, plus these are independent anyhow. Author: Jeff Reback Closes #15358 from jreback/concat and squashes the following commits: ba34c51 [Jeff Reback] CLN: strip out and form tools/concat.py from tools/merge.py --- doc/source/whatsnew/v0.20.0.txt | 2 +- pandas/__init__.py | 6 +- pandas/core/base.py | 4 +- pandas/core/categorical.py | 2 +- pandas/core/frame.py | 9 +- pandas/core/groupby.py | 12 +- pandas/core/panel.py | 2 +- pandas/core/reshape.py | 2 +- pandas/core/series.py | 2 +- pandas/formats/format.py | 4 +- pandas/io/gbq.py | 4 +- pandas/io/pytables.py | 8 +- pandas/tests/groupby/test_groupby.py | 5 +- pandas/tools/concat.py | 615 ++++++++++++++++++++++ pandas/tools/merge.py | 634 +---------------------- pandas/tools/pivot.py | 4 +- pandas/tools/plotting.py | 2 +- pandas/tools/tests/test_join.py | 3 +- pandas/tools/tests/test_merge.py | 3 +- pandas/tools/tests/test_merge_ordered.py | 2 - pandas/tools/tests/test_pivot.py | 4 +- 21 files changed, 672 insertions(+), 657 deletions(-) create mode 100644 pandas/tools/concat.py diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 9eae2b7a33923..2279d0464a5c7 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -385,7 +385,7 @@ Deprecations - ``TimedeltaIndex.searchsorted()``, ``DatetimeIndex.searchsorted()``, and ``PeriodIndex.searchsorted()`` have deprecated the ``key`` parameter in favor of ``value`` (:issue:`12662`) - ``DataFrame.astype()`` has deprecated the ``raise_on_error`` parameter in favor of ``errors`` (:issue:`14878`) - ``Series.sortlevel`` and ``DataFrame.sortlevel`` have been deprecated in favor of ``Series.sort_index`` and ``DataFrame.sort_index`` (:issue:`15099`) - +- importing ``concat`` from ``pandas.tools.merge`` has been deprecated in favor of imports from the ``pandas`` namespace. This should only affect explict imports (:issue:`15358`) .. _whatsnew_0200.prior_deprecations: diff --git a/pandas/__init__.py b/pandas/__init__.py index 9133e11beaa2b..76542db22a757 100644 --- a/pandas/__init__.py +++ b/pandas/__init__.py @@ -42,10 +42,10 @@ from pandas.sparse.api import * from pandas.stats.api import * from pandas.tseries.api import * -from pandas.io.api import * from pandas.computation.api import * -from pandas.tools.merge import (merge, concat, ordered_merge, +from pandas.tools.concat import concat +from pandas.tools.merge import (merge, ordered_merge, merge_ordered, merge_asof) from pandas.tools.pivot import pivot_table, crosstab from pandas.tools.plotting import scatter_matrix, plot_params @@ -54,6 +54,8 @@ from pandas.core.reshape import melt from pandas.util.print_versions import show_versions +from pandas.io.api import * + # define the testing framework import pandas.util.testing from pandas.util.nosetester import NoseTester diff --git a/pandas/core/base.py b/pandas/core/base.py index 657da859ddde2..92ec6bb3d73e6 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -472,7 +472,7 @@ def _aggregate(self, arg, *args, **kwargs): arg = new_arg - from pandas.tools.merge import concat + from pandas.tools.concat import concat def _agg_1dim(name, how, subset=None): """ @@ -579,7 +579,7 @@ def _agg(arg, func): return result, True def _aggregate_multiple_funcs(self, arg, _level): - from pandas.tools.merge import concat + from pandas.tools.concat import concat if self.axis != 0: raise NotImplementedError("axis other than 0 is not supported") diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index 5980f872f951f..491db2e080953 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -1907,7 +1907,7 @@ def describe(self): counts = self.value_counts(dropna=False) freqs = counts / float(counts.sum()) - from pandas.tools.merge import concat + from pandas.tools.concat import concat result = concat([counts, freqs], axis=1) result.columns = ['counts', 'freqs'] result.index.name = 'categories' diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 79bdad82af5a3..aa03bfb9a54b9 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4402,7 +4402,7 @@ def append(self, other, ignore_index=False, verify_integrity=False): if (self.columns.get_indexer(other.columns) >= 0).all(): other = other.loc[:, self.columns] - from pandas.tools.merge import concat + from pandas.tools.concat import concat if isinstance(other, (list, tuple)): to_concat = [self] + other else: @@ -4532,7 +4532,8 @@ def join(self, other, on=None, how='left', lsuffix='', rsuffix='', def _join_compat(self, other, on=None, how='left', lsuffix='', rsuffix='', sort=False): - from pandas.tools.merge import merge, concat + from pandas.tools.merge import merge + from pandas.tools.concat import concat if isinstance(other, Series): if other.name is None: @@ -4636,7 +4637,7 @@ def round(self, decimals=0, *args, **kwargs): Series.round """ - from pandas.tools.merge import concat + from pandas.tools.concat import concat def _dict_round(df, decimals): for col, vals in df.iteritems(): @@ -5306,7 +5307,7 @@ def isin(self, values): """ if isinstance(values, dict): from collections import defaultdict - from pandas.tools.merge import concat + from pandas.tools.concat import concat values = defaultdict(list, values) return concat((self.iloc[:, [i]].isin(values[col]) for i, col in enumerate(self.columns)), axis=1) diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 99220232114ce..53b6dbe6075cf 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -854,7 +854,7 @@ def _wrap_applied_output(self, *args, **kwargs): raise AbstractMethodError(self) def _concat_objects(self, keys, values, not_indexed_same=False): - from pandas.tools.merge import concat + from pandas.tools.concat import concat def reset_identity(values): # reset the identities of the components @@ -3507,7 +3507,7 @@ def first_non_None_value(values): # still a series # path added as of GH 5545 elif all_indexed_same: - from pandas.tools.merge import concat + from pandas.tools.concat import concat return concat(values) if not all_indexed_same: @@ -3540,7 +3540,7 @@ def first_non_None_value(values): else: # GH5788 instead of stacking; concat gets the # dtypes correct - from pandas.tools.merge import concat + from pandas.tools.concat import concat result = concat(values, keys=key_index, names=key_index.names, axis=self.axis).unstack() @@ -3588,7 +3588,7 @@ def first_non_None_value(values): not_indexed_same=not_indexed_same) def _transform_general(self, func, *args, **kwargs): - from pandas.tools.merge import concat + from pandas.tools.concat import concat applied = [] obj = self._obj_with_exclusions @@ -3980,7 +3980,7 @@ def _iterate_column_groupbys(self): exclusions=self.exclusions) def _apply_to_column_groupbys(self, func): - from pandas.tools.merge import concat + from pandas.tools.concat import concat return concat( (func(col_groupby) for _, col_groupby in self._iterate_column_groupbys()), @@ -4061,7 +4061,7 @@ def groupby_series(obj, col=None): if isinstance(obj, Series): results = groupby_series(obj) else: - from pandas.tools.merge import concat + from pandas.tools.concat import concat results = [groupby_series(obj[col], col) for col in obj.columns] results = concat(results, axis=1) diff --git a/pandas/core/panel.py b/pandas/core/panel.py index 6da10305eb4fc..4a6c6cf291316 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -1282,7 +1282,7 @@ def join(self, other, how='left', lsuffix='', rsuffix=''): ------- joined : Panel """ - from pandas.tools.merge import concat + from pandas.tools.concat import concat if isinstance(other, Panel): join_major, join_minor = self._get_join_index(other, how) diff --git a/pandas/core/reshape.py b/pandas/core/reshape.py index d6287f17c8387..bd0358abf67d5 100644 --- a/pandas/core/reshape.py +++ b/pandas/core/reshape.py @@ -1194,7 +1194,7 @@ def get_dummies(data, prefix=None, prefix_sep='_', dummy_na=False, -------- Series.str.get_dummies """ - from pandas.tools.merge import concat + from pandas.tools.concat import concat from itertools import cycle if isinstance(data, DataFrame): diff --git a/pandas/core/series.py b/pandas/core/series.py index 43f16f690692a..e1eac8f66017e 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1588,7 +1588,7 @@ def append(self, to_append, ignore_index=False, verify_integrity=False): """ - from pandas.tools.merge import concat + from pandas.tools.concat import concat if isinstance(to_append, (list, tuple)): to_concat = [self] + to_append diff --git a/pandas/formats/format.py b/pandas/formats/format.py index 439b96d650204..1a7a06199ad8a 100644 --- a/pandas/formats/format.py +++ b/pandas/formats/format.py @@ -165,7 +165,7 @@ def __init__(self, series, buf=None, length=True, header=True, index=True, self._chk_truncate() def _chk_truncate(self): - from pandas.tools.merge import concat + from pandas.tools.concat import concat max_rows = self.max_rows truncate_v = max_rows and (len(self.series) > max_rows) series = self.series @@ -406,7 +406,7 @@ def _chk_truncate(self): Checks whether the frame should be truncated. If so, slices the frame up. """ - from pandas.tools.merge import concat + from pandas.tools.concat import concat # Column of which first element is used to determine width of a dot col self.tr_size_col = -1 diff --git a/pandas/io/gbq.py b/pandas/io/gbq.py index 76c228418a616..169a2b1df9b4c 100644 --- a/pandas/io/gbq.py +++ b/pandas/io/gbq.py @@ -10,9 +10,7 @@ import numpy as np from distutils.version import StrictVersion -from pandas import compat -from pandas.core.api import DataFrame -from pandas.tools.merge import concat +from pandas import compat, DataFrame, concat from pandas.core.common import PandasError from pandas.compat import lzip, bytes_to_str diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 9f161dc5ec50e..9224f7d3d9a94 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -26,13 +26,12 @@ import pandas as pd from pandas import (Series, DataFrame, Panel, Panel4D, Index, - MultiIndex, Int64Index, isnull) + MultiIndex, Int64Index, isnull, concat, + SparseSeries, SparseDataFrame, PeriodIndex, + DatetimeIndex, TimedeltaIndex) from pandas.core import config from pandas.io.common import _stringify_path -from pandas.sparse.api import SparseSeries, SparseDataFrame from pandas.sparse.array import BlockIndex, IntIndex -from pandas.tseries.api import PeriodIndex, DatetimeIndex -from pandas.tseries.tdi import TimedeltaIndex from pandas.core.base import StringMixin from pandas.formats.printing import adjoin, pprint_thing from pandas.core.common import _asarray_tuplesafe, PerformanceWarning @@ -42,7 +41,6 @@ _block2d_to_blocknd, _factor_indexer, _block_shape) from pandas.core.index import _ensure_index -from pandas.tools.merge import concat from pandas import compat from pandas.compat import u_safe as u, PY3, range, lrange, string_types, filter from pandas.core.config import get_option diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 458e869130190..53f85349834ac 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -6,7 +6,8 @@ from numpy import nan from pandas import (date_range, bdate_range, Timestamp, - isnull, Index, MultiIndex, DataFrame, Series) + isnull, Index, MultiIndex, DataFrame, Series, + concat, Panel) from pandas.core.common import UnsupportedFunctionCall from pandas.util.testing import (assert_panel_equal, assert_frame_equal, assert_series_equal, assert_almost_equal, @@ -14,8 +15,6 @@ from pandas.compat import (range, long, lrange, StringIO, lmap, lzip, map, zip, builtins, OrderedDict, product as cart_product) from pandas import compat -from pandas.core.panel import Panel -from pandas.tools.merge import concat from collections import defaultdict import pandas.core.common as com import numpy as np diff --git a/pandas/tools/concat.py b/pandas/tools/concat.py new file mode 100644 index 0000000000000..dbbc831b19d1d --- /dev/null +++ b/pandas/tools/concat.py @@ -0,0 +1,615 @@ +""" +concat routines +""" + +import numpy as np +from pandas import compat, DataFrame, Series, Index, MultiIndex +from pandas.core.index import (_get_combined_index, + _ensure_index, _get_consensus_names, + _all_indexes_same) +from pandas.core.categorical import (_factorize_from_iterable, + _factorize_from_iterables) +from pandas.core.internals import concatenate_block_managers +from pandas.core import common as com +from pandas.core.generic import NDFrame +import pandas.types.concat as _concat + +# --------------------------------------------------------------------- +# Concatenate DataFrame objects + + +def concat(objs, axis=0, join='outer', join_axes=None, ignore_index=False, + keys=None, levels=None, names=None, verify_integrity=False, + copy=True): + """ + Concatenate pandas objects along a particular axis with optional set logic + along the other axes. + + Can also add a layer of hierarchical indexing on the concatenation axis, + which may be useful if the labels are the same (or overlapping) on + the passed axis number. + + Parameters + ---------- + objs : a sequence or mapping of Series, DataFrame, or Panel objects + If a dict is passed, the sorted keys will be used as the `keys` + argument, unless it is passed, in which case the values will be + selected (see below). Any None objects will be dropped silently unless + they are all None in which case a ValueError will be raised + axis : {0/'index', 1/'columns'}, default 0 + The axis to concatenate along + join : {'inner', 'outer'}, default 'outer' + How to handle indexes on other axis(es) + join_axes : list of Index objects + Specific indexes to use for the other n - 1 axes instead of performing + inner/outer set logic + ignore_index : boolean, default False + If True, do not use the index values along the concatenation axis. The + resulting axis will be labeled 0, ..., n - 1. This is useful if you are + concatenating objects where the concatenation axis does not have + meaningful indexing information. Note the index values on the other + axes are still respected in the join. + keys : sequence, default None + If multiple levels passed, should contain tuples. Construct + hierarchical index using the passed keys as the outermost level + levels : list of sequences, default None + Specific levels (unique values) to use for constructing a + MultiIndex. Otherwise they will be inferred from the keys + names : list, default None + Names for the levels in the resulting hierarchical index + verify_integrity : boolean, default False + Check whether the new concatenated axis contains duplicates. This can + be very expensive relative to the actual data concatenation + copy : boolean, default True + If False, do not copy data unnecessarily + + Returns + ------- + concatenated : type of objects + + Notes + ----- + The keys, levels, and names arguments are all optional. + + A walkthrough of how this method fits in with other tools for combining + panda objects can be found `here + `__. + + See Also + -------- + Series.append + DataFrame.append + DataFrame.join + DataFrame.merge + + Examples + -------- + Combine two ``Series``. + + >>> s1 = pd.Series(['a', 'b']) + >>> s2 = pd.Series(['c', 'd']) + >>> pd.concat([s1, s2]) + 0 a + 1 b + 0 c + 1 d + dtype: object + + Clear the existing index and reset it in the result + by setting the ``ignore_index`` option to ``True``. + + >>> pd.concat([s1, s2], ignore_index=True) + 0 a + 1 b + 2 c + 3 d + dtype: object + + Add a hierarchical index at the outermost level of + the data with the ``keys`` option. + + >>> pd.concat([s1, s2], keys=['s1', 's2',]) + s1 0 a + 1 b + s2 0 c + 1 d + dtype: object + + Label the index keys you create with the ``names`` option. + + >>> pd.concat([s1, s2], keys=['s1', 's2'], + ... names=['Series name', 'Row ID']) + Series name Row ID + s1 0 a + 1 b + s2 0 c + 1 d + dtype: object + + Combine two ``DataFrame`` objects with identical columns. + + >>> df1 = pd.DataFrame([['a', 1], ['b', 2]], + ... columns=['letter', 'number']) + >>> df1 + letter number + 0 a 1 + 1 b 2 + >>> df2 = pd.DataFrame([['c', 3], ['d', 4]], + ... columns=['letter', 'number']) + >>> df2 + letter number + 0 c 3 + 1 d 4 + >>> pd.concat([df1, df2]) + letter number + 0 a 1 + 1 b 2 + 0 c 3 + 1 d 4 + + Combine ``DataFrame`` objects with overlapping columns + and return everything. Columns outside the intersection will + be filled with ``NaN`` values. + + >>> df3 = pd.DataFrame([['c', 3, 'cat'], ['d', 4, 'dog']], + ... columns=['letter', 'number', 'animal']) + >>> df3 + letter number animal + 0 c 3 cat + 1 d 4 dog + >>> pd.concat([df1, df3]) + animal letter number + 0 NaN a 1 + 1 NaN b 2 + 0 cat c 3 + 1 dog d 4 + + Combine ``DataFrame`` objects with overlapping columns + and return only those that are shared by passing ``inner`` to + the ``join`` keyword argument. + + >>> pd.concat([df1, df3], join="inner") + letter number + 0 a 1 + 1 b 2 + 0 c 3 + 1 d 4 + + Combine ``DataFrame`` objects horizontally along the x axis by + passing in ``axis=1``. + + >>> df4 = pd.DataFrame([['bird', 'polly'], ['monkey', 'george']], + ... columns=['animal', 'name']) + >>> pd.concat([df1, df4], axis=1) + letter number animal name + 0 a 1 bird polly + 1 b 2 monkey george + + Prevent the result from including duplicate index values with the + ``verify_integrity`` option. + + >>> df5 = pd.DataFrame([1], index=['a']) + >>> df5 + 0 + a 1 + >>> df6 = pd.DataFrame([2], index=['a']) + >>> df6 + 0 + a 2 + >>> pd.concat([df5, df6], verify_integrity=True) + ValueError: Indexes have overlapping values: ['a'] + """ + op = _Concatenator(objs, axis=axis, join_axes=join_axes, + ignore_index=ignore_index, join=join, + keys=keys, levels=levels, names=names, + verify_integrity=verify_integrity, + copy=copy) + return op.get_result() + + +class _Concatenator(object): + """ + Orchestrates a concatenation operation for BlockManagers + """ + + def __init__(self, objs, axis=0, join='outer', join_axes=None, + keys=None, levels=None, names=None, + ignore_index=False, verify_integrity=False, copy=True): + if isinstance(objs, (NDFrame, compat.string_types)): + raise TypeError('first argument must be an iterable of pandas ' + 'objects, you passed an object of type ' + '"{0}"'.format(type(objs).__name__)) + + if join == 'outer': + self.intersect = False + elif join == 'inner': + self.intersect = True + else: # pragma: no cover + raise ValueError('Only can inner (intersect) or outer (union) ' + 'join the other axis') + + if isinstance(objs, dict): + if keys is None: + keys = sorted(objs) + objs = [objs[k] for k in keys] + else: + objs = list(objs) + + if len(objs) == 0: + raise ValueError('No objects to concatenate') + + if keys is None: + objs = [obj for obj in objs if obj is not None] + else: + # #1649 + clean_keys = [] + clean_objs = [] + for k, v in zip(keys, objs): + if v is None: + continue + clean_keys.append(k) + clean_objs.append(v) + objs = clean_objs + name = getattr(keys, 'name', None) + keys = Index(clean_keys, name=name) + + if len(objs) == 0: + raise ValueError('All objects passed were None') + + # consolidate data & figure out what our result ndim is going to be + ndims = set() + for obj in objs: + if not isinstance(obj, NDFrame): + raise TypeError("cannot concatenate a non-NDFrame object") + + # consolidate + obj.consolidate(inplace=True) + ndims.add(obj.ndim) + + # get the sample + # want the higest ndim that we have, and must be non-empty + # unless all objs are empty + sample = None + if len(ndims) > 1: + max_ndim = max(ndims) + for obj in objs: + if obj.ndim == max_ndim and np.sum(obj.shape): + sample = obj + break + + else: + # filter out the empties if we have not multi-index possibiltes + # note to keep empty Series as it affect to result columns / name + non_empties = [obj for obj in objs + if sum(obj.shape) > 0 or isinstance(obj, Series)] + + if (len(non_empties) and (keys is None and names is None and + levels is None and join_axes is None)): + objs = non_empties + sample = objs[0] + + if sample is None: + sample = objs[0] + self.objs = objs + + # Standardize axis parameter to int + if isinstance(sample, Series): + axis = DataFrame()._get_axis_number(axis) + else: + axis = sample._get_axis_number(axis) + + # Need to flip BlockManager axis in the DataFrame special case + self._is_frame = isinstance(sample, DataFrame) + if self._is_frame: + axis = 1 if axis == 0 else 0 + + self._is_series = isinstance(sample, Series) + if not 0 <= axis <= sample.ndim: + raise AssertionError("axis must be between 0 and {0}, " + "input was {1}".format(sample.ndim, axis)) + + # if we have mixed ndims, then convert to highest ndim + # creating column numbers as needed + if len(ndims) > 1: + current_column = 0 + max_ndim = sample.ndim + self.objs, objs = [], self.objs + for obj in objs: + + ndim = obj.ndim + if ndim == max_ndim: + pass + + elif ndim != max_ndim - 1: + raise ValueError("cannot concatenate unaligned mixed " + "dimensional NDFrame objects") + + else: + name = getattr(obj, 'name', None) + if ignore_index or name is None: + name = current_column + current_column += 1 + + # doing a row-wise concatenation so need everything + # to line up + if self._is_frame and axis == 1: + name = 0 + obj = sample._constructor({name: obj}) + + self.objs.append(obj) + + # note: this is the BlockManager axis (since DataFrame is transposed) + self.axis = axis + self.join_axes = join_axes + self.keys = keys + self.names = names or getattr(keys, 'names', None) + self.levels = levels + + self.ignore_index = ignore_index + self.verify_integrity = verify_integrity + self.copy = copy + + self.new_axes = self._get_new_axes() + + def get_result(self): + + # series only + if self._is_series: + + # stack blocks + if self.axis == 0: + # concat Series with length to keep dtype as much + non_empties = [x for x in self.objs if len(x) > 0] + if len(non_empties) > 0: + values = [x._values for x in non_empties] + else: + values = [x._values for x in self.objs] + new_data = _concat._concat_compat(values) + + name = com._consensus_name_attr(self.objs) + cons = _concat._get_series_result_type(new_data) + + return (cons(new_data, index=self.new_axes[0], + name=name, dtype=new_data.dtype) + .__finalize__(self, method='concat')) + + # combine as columns in a frame + else: + data = dict(zip(range(len(self.objs)), self.objs)) + cons = _concat._get_series_result_type(data) + + index, columns = self.new_axes + df = cons(data, index=index) + df.columns = columns + return df.__finalize__(self, method='concat') + + # combine block managers + else: + mgrs_indexers = [] + for obj in self.objs: + mgr = obj._data + indexers = {} + for ax, new_labels in enumerate(self.new_axes): + if ax == self.axis: + # Suppress reindexing on concat axis + continue + + obj_labels = mgr.axes[ax] + if not new_labels.equals(obj_labels): + indexers[ax] = obj_labels.reindex(new_labels)[1] + + mgrs_indexers.append((obj._data, indexers)) + + new_data = concatenate_block_managers( + mgrs_indexers, self.new_axes, concat_axis=self.axis, + copy=self.copy) + if not self.copy: + new_data._consolidate_inplace() + + cons = _concat._get_frame_result_type(new_data, self.objs) + return (cons._from_axes(new_data, self.new_axes) + .__finalize__(self, method='concat')) + + def _get_result_dim(self): + if self._is_series and self.axis == 1: + return 2 + else: + return self.objs[0].ndim + + def _get_new_axes(self): + ndim = self._get_result_dim() + new_axes = [None] * ndim + + if self.join_axes is None: + for i in range(ndim): + if i == self.axis: + continue + new_axes[i] = self._get_comb_axis(i) + else: + if len(self.join_axes) != ndim - 1: + raise AssertionError("length of join_axes must not be " + "equal to {0}".format(ndim - 1)) + + # ufff... + indices = compat.lrange(ndim) + indices.remove(self.axis) + + for i, ax in zip(indices, self.join_axes): + new_axes[i] = ax + + new_axes[self.axis] = self._get_concat_axis() + return new_axes + + def _get_comb_axis(self, i): + if self._is_series: + all_indexes = [x.index for x in self.objs] + else: + try: + all_indexes = [x._data.axes[i] for x in self.objs] + except IndexError: + types = [type(x).__name__ for x in self.objs] + raise TypeError("Cannot concatenate list of %s" % types) + + return _get_combined_index(all_indexes, intersect=self.intersect) + + def _get_concat_axis(self): + """ + Return index to be used along concatenation axis. + """ + if self._is_series: + if self.axis == 0: + indexes = [x.index for x in self.objs] + elif self.ignore_index: + idx = com._default_index(len(self.objs)) + return idx + elif self.keys is None: + names = [None] * len(self.objs) + num = 0 + has_names = False + for i, x in enumerate(self.objs): + if not isinstance(x, Series): + raise TypeError("Cannot concatenate type 'Series' " + "with object of type " + "%r" % type(x).__name__) + if x.name is not None: + names[i] = x.name + has_names = True + else: + names[i] = num + num += 1 + if has_names: + return Index(names) + else: + return com._default_index(len(self.objs)) + else: + return _ensure_index(self.keys) + else: + indexes = [x._data.axes[self.axis] for x in self.objs] + + if self.ignore_index: + idx = com._default_index(sum(len(i) for i in indexes)) + return idx + + if self.keys is None: + concat_axis = _concat_indexes(indexes) + else: + concat_axis = _make_concat_multiindex(indexes, self.keys, + self.levels, self.names) + + self._maybe_check_integrity(concat_axis) + + return concat_axis + + def _maybe_check_integrity(self, concat_index): + if self.verify_integrity: + if not concat_index.is_unique: + overlap = concat_index.get_duplicates() + raise ValueError('Indexes have overlapping values: %s' + % str(overlap)) + + +def _concat_indexes(indexes): + return indexes[0].append(indexes[1:]) + + +def _make_concat_multiindex(indexes, keys, levels=None, names=None): + + if ((levels is None and isinstance(keys[0], tuple)) or + (levels is not None and len(levels) > 1)): + zipped = compat.lzip(*keys) + if names is None: + names = [None] * len(zipped) + + if levels is None: + _, levels = _factorize_from_iterables(zipped) + else: + levels = [_ensure_index(x) for x in levels] + else: + zipped = [keys] + if names is None: + names = [None] + + if levels is None: + levels = [_ensure_index(keys)] + else: + levels = [_ensure_index(x) for x in levels] + + if not _all_indexes_same(indexes): + label_list = [] + + # things are potentially different sizes, so compute the exact labels + # for each level and pass those to MultiIndex.from_arrays + + for hlevel, level in zip(zipped, levels): + to_concat = [] + for key, index in zip(hlevel, indexes): + try: + i = level.get_loc(key) + except KeyError: + raise ValueError('Key %s not in level %s' + % (str(key), str(level))) + + to_concat.append(np.repeat(i, len(index))) + label_list.append(np.concatenate(to_concat)) + + concat_index = _concat_indexes(indexes) + + # these go at the end + if isinstance(concat_index, MultiIndex): + levels.extend(concat_index.levels) + label_list.extend(concat_index.labels) + else: + codes, categories = _factorize_from_iterable(concat_index) + levels.append(categories) + label_list.append(codes) + + if len(names) == len(levels): + names = list(names) + else: + # make sure that all of the passed indices have the same nlevels + if not len(set([idx.nlevels for idx in indexes])) == 1: + raise AssertionError("Cannot concat indices that do" + " not have the same number of levels") + + # also copies + names = names + _get_consensus_names(indexes) + + return MultiIndex(levels=levels, labels=label_list, names=names, + verify_integrity=False) + + new_index = indexes[0] + n = len(new_index) + kpieces = len(indexes) + + # also copies + new_names = list(names) + new_levels = list(levels) + + # construct labels + new_labels = [] + + # do something a bit more speedy + + for hlevel, level in zip(zipped, levels): + hlevel = _ensure_index(hlevel) + mapped = level.get_indexer(hlevel) + + mask = mapped == -1 + if mask.any(): + raise ValueError('Values not found in passed level: %s' + % str(hlevel[mask])) + + new_labels.append(np.repeat(mapped, n)) + + if isinstance(new_index, MultiIndex): + new_levels.extend(new_index.levels) + new_labels.extend([np.tile(lab, kpieces) for lab in new_index.labels]) + else: + new_levels.append(new_index) + new_labels.append(np.tile(np.arange(n), kpieces)) + + if len(new_names) < len(new_levels): + new_names.extend(new_index.names) + + return MultiIndex(levels=new_levels, labels=new_labels, names=new_names, + verify_integrity=False) diff --git a/pandas/tools/merge.py b/pandas/tools/merge.py index 3fbd83a6f3245..d938c2eeacbef 100644 --- a/pandas/tools/merge.py +++ b/pandas/tools/merge.py @@ -4,19 +4,16 @@ import copy import warnings - import string import numpy as np -from pandas.compat import range, lrange, lzip, zip, map, filter +from pandas.compat import range, lzip, zip, map, filter import pandas.compat as compat -from pandas import (Categorical, DataFrame, Series, +import pandas as pd +from pandas import (Categorical, Series, DataFrame, Index, MultiIndex, Timedelta) -from pandas.core.categorical import (_factorize_from_iterable, - _factorize_from_iterables) from pandas.core.frame import _merge_doc -from pandas.types.generic import ABCSeries from pandas.types.common import (is_datetime64tz_dtype, is_datetime64_dtype, needs_i8_conversion, @@ -33,23 +30,31 @@ _ensure_object, _get_dtype) from pandas.types.missing import na_value_for_dtype - -from pandas.core.generic import NDFrame -from pandas.core.index import (_get_combined_index, - _ensure_index, _get_consensus_names, - _all_indexes_same) from pandas.core.internals import (items_overlap_with_suffix, concatenate_block_managers) from pandas.util.decorators import Appender, Substitution import pandas.core.algorithms as algos import pandas.core.common as com -import pandas.types.concat as _concat import pandas._join as _join import pandas.hashtable as _hash +# back-compat of pseudo-public API +def concat_wrap(): + + def wrapper(*args, **kwargs): + warnings.warn("pandas.tools.merge.concat is deprecated. " + "import from the public API: " + "pandas.concat instead", + FutureWarning, stacklevel=3) + return pd.concat(*args, **kwargs) + return wrapper + +concat = concat_wrap() + + @Substitution('\nleft : DataFrame') @Appender(_merge_doc, indents=0) def merge(left, right, how='inner', on=None, left_on=None, right_on=None, @@ -139,6 +144,7 @@ def _groupby_and_merge(by, on, left, right, _merge_pieces, # preserve the original order # if we have a missing piece this can be reset + from pandas.tools.concat import concat result = concat(pieces, ignore_index=True) result = result.reindex(columns=pieces[0].columns, copy=False) return result, lby @@ -793,9 +799,9 @@ def _get_merge_keys(self): left, right = self.left, self.right is_lkey = lambda x: isinstance( - x, (np.ndarray, ABCSeries)) and len(x) == len(left) + x, (np.ndarray, Series)) and len(x) == len(left) is_rkey = lambda x: isinstance( - x, (np.ndarray, ABCSeries)) and len(x) == len(right) + x, (np.ndarray, Series)) and len(x) == len(right) # Note that pd.merge_asof() has separate 'on' and 'by' parameters. A # user could, for example, request 'left_index' and 'left_by'. In a @@ -1419,606 +1425,6 @@ def _get_join_keys(llab, rlab, shape, sort): return _get_join_keys(llab, rlab, shape, sort) -# --------------------------------------------------------------------- -# Concatenate DataFrame objects - - -def concat(objs, axis=0, join='outer', join_axes=None, ignore_index=False, - keys=None, levels=None, names=None, verify_integrity=False, - copy=True): - """ - Concatenate pandas objects along a particular axis with optional set logic - along the other axes. - - Can also add a layer of hierarchical indexing on the concatenation axis, - which may be useful if the labels are the same (or overlapping) on - the passed axis number. - - Parameters - ---------- - objs : a sequence or mapping of Series, DataFrame, or Panel objects - If a dict is passed, the sorted keys will be used as the `keys` - argument, unless it is passed, in which case the values will be - selected (see below). Any None objects will be dropped silently unless - they are all None in which case a ValueError will be raised - axis : {0/'index', 1/'columns'}, default 0 - The axis to concatenate along - join : {'inner', 'outer'}, default 'outer' - How to handle indexes on other axis(es) - join_axes : list of Index objects - Specific indexes to use for the other n - 1 axes instead of performing - inner/outer set logic - ignore_index : boolean, default False - If True, do not use the index values along the concatenation axis. The - resulting axis will be labeled 0, ..., n - 1. This is useful if you are - concatenating objects where the concatenation axis does not have - meaningful indexing information. Note the index values on the other - axes are still respected in the join. - keys : sequence, default None - If multiple levels passed, should contain tuples. Construct - hierarchical index using the passed keys as the outermost level - levels : list of sequences, default None - Specific levels (unique values) to use for constructing a - MultiIndex. Otherwise they will be inferred from the keys - names : list, default None - Names for the levels in the resulting hierarchical index - verify_integrity : boolean, default False - Check whether the new concatenated axis contains duplicates. This can - be very expensive relative to the actual data concatenation - copy : boolean, default True - If False, do not copy data unnecessarily - - Returns - ------- - concatenated : type of objects - - Notes - ----- - The keys, levels, and names arguments are all optional. - - A walkthrough of how this method fits in with other tools for combining - panda objects can be found `here - `__. - - See Also - -------- - Series.append - DataFrame.append - DataFrame.join - DataFrame.merge - - Examples - -------- - Combine two ``Series``. - - >>> s1 = pd.Series(['a', 'b']) - >>> s2 = pd.Series(['c', 'd']) - >>> pd.concat([s1, s2]) - 0 a - 1 b - 0 c - 1 d - dtype: object - - Clear the existing index and reset it in the result - by setting the ``ignore_index`` option to ``True``. - - >>> pd.concat([s1, s2], ignore_index=True) - 0 a - 1 b - 2 c - 3 d - dtype: object - - Add a hierarchical index at the outermost level of - the data with the ``keys`` option. - - >>> pd.concat([s1, s2], keys=['s1', 's2',]) - s1 0 a - 1 b - s2 0 c - 1 d - dtype: object - - Label the index keys you create with the ``names`` option. - - >>> pd.concat([s1, s2], keys=['s1', 's2'], - ... names=['Series name', 'Row ID']) - Series name Row ID - s1 0 a - 1 b - s2 0 c - 1 d - dtype: object - - Combine two ``DataFrame`` objects with identical columns. - - >>> df1 = pd.DataFrame([['a', 1], ['b', 2]], - ... columns=['letter', 'number']) - >>> df1 - letter number - 0 a 1 - 1 b 2 - >>> df2 = pd.DataFrame([['c', 3], ['d', 4]], - ... columns=['letter', 'number']) - >>> df2 - letter number - 0 c 3 - 1 d 4 - >>> pd.concat([df1, df2]) - letter number - 0 a 1 - 1 b 2 - 0 c 3 - 1 d 4 - - Combine ``DataFrame`` objects with overlapping columns - and return everything. Columns outside the intersection will - be filled with ``NaN`` values. - - >>> df3 = pd.DataFrame([['c', 3, 'cat'], ['d', 4, 'dog']], - ... columns=['letter', 'number', 'animal']) - >>> df3 - letter number animal - 0 c 3 cat - 1 d 4 dog - >>> pd.concat([df1, df3]) - animal letter number - 0 NaN a 1 - 1 NaN b 2 - 0 cat c 3 - 1 dog d 4 - - Combine ``DataFrame`` objects with overlapping columns - and return only those that are shared by passing ``inner`` to - the ``join`` keyword argument. - - >>> pd.concat([df1, df3], join="inner") - letter number - 0 a 1 - 1 b 2 - 0 c 3 - 1 d 4 - - Combine ``DataFrame`` objects horizontally along the x axis by - passing in ``axis=1``. - - >>> df4 = pd.DataFrame([['bird', 'polly'], ['monkey', 'george']], - ... columns=['animal', 'name']) - >>> pd.concat([df1, df4], axis=1) - letter number animal name - 0 a 1 bird polly - 1 b 2 monkey george - - Prevent the result from including duplicate index values with the - ``verify_integrity`` option. - - >>> df5 = pd.DataFrame([1], index=['a']) - >>> df5 - 0 - a 1 - >>> df6 = pd.DataFrame([2], index=['a']) - >>> df6 - 0 - a 2 - >>> pd.concat([df5, df6], verify_integrity=True) - ValueError: Indexes have overlapping values: ['a'] - """ - op = _Concatenator(objs, axis=axis, join_axes=join_axes, - ignore_index=ignore_index, join=join, - keys=keys, levels=levels, names=names, - verify_integrity=verify_integrity, - copy=copy) - return op.get_result() - - -class _Concatenator(object): - """ - Orchestrates a concatenation operation for BlockManagers - """ - - def __init__(self, objs, axis=0, join='outer', join_axes=None, - keys=None, levels=None, names=None, - ignore_index=False, verify_integrity=False, copy=True): - if isinstance(objs, (NDFrame, compat.string_types)): - raise TypeError('first argument must be an iterable of pandas ' - 'objects, you passed an object of type ' - '"{0}"'.format(type(objs).__name__)) - - if join == 'outer': - self.intersect = False - elif join == 'inner': - self.intersect = True - else: # pragma: no cover - raise ValueError('Only can inner (intersect) or outer (union) ' - 'join the other axis') - - if isinstance(objs, dict): - if keys is None: - keys = sorted(objs) - objs = [objs[k] for k in keys] - else: - objs = list(objs) - - if len(objs) == 0: - raise ValueError('No objects to concatenate') - - if keys is None: - objs = [obj for obj in objs if obj is not None] - else: - # #1649 - clean_keys = [] - clean_objs = [] - for k, v in zip(keys, objs): - if v is None: - continue - clean_keys.append(k) - clean_objs.append(v) - objs = clean_objs - name = getattr(keys, 'name', None) - keys = Index(clean_keys, name=name) - - if len(objs) == 0: - raise ValueError('All objects passed were None') - - # consolidate data & figure out what our result ndim is going to be - ndims = set() - for obj in objs: - if not isinstance(obj, NDFrame): - raise TypeError("cannot concatenate a non-NDFrame object") - - # consolidate - obj.consolidate(inplace=True) - ndims.add(obj.ndim) - - # get the sample - # want the higest ndim that we have, and must be non-empty - # unless all objs are empty - sample = None - if len(ndims) > 1: - max_ndim = max(ndims) - for obj in objs: - if obj.ndim == max_ndim and np.sum(obj.shape): - sample = obj - break - - else: - # filter out the empties if we have not multi-index possibiltes - # note to keep empty Series as it affect to result columns / name - non_empties = [obj for obj in objs - if sum(obj.shape) > 0 or isinstance(obj, Series)] - - if (len(non_empties) and (keys is None and names is None and - levels is None and join_axes is None)): - objs = non_empties - sample = objs[0] - - if sample is None: - sample = objs[0] - self.objs = objs - - # Standardize axis parameter to int - if isinstance(sample, Series): - axis = DataFrame()._get_axis_number(axis) - else: - axis = sample._get_axis_number(axis) - - # Need to flip BlockManager axis in the DataFrame special case - self._is_frame = isinstance(sample, DataFrame) - if self._is_frame: - axis = 1 if axis == 0 else 0 - - self._is_series = isinstance(sample, ABCSeries) - if not 0 <= axis <= sample.ndim: - raise AssertionError("axis must be between 0 and {0}, " - "input was {1}".format(sample.ndim, axis)) - - # if we have mixed ndims, then convert to highest ndim - # creating column numbers as needed - if len(ndims) > 1: - current_column = 0 - max_ndim = sample.ndim - self.objs, objs = [], self.objs - for obj in objs: - - ndim = obj.ndim - if ndim == max_ndim: - pass - - elif ndim != max_ndim - 1: - raise ValueError("cannot concatenate unaligned mixed " - "dimensional NDFrame objects") - - else: - name = getattr(obj, 'name', None) - if ignore_index or name is None: - name = current_column - current_column += 1 - - # doing a row-wise concatenation so need everything - # to line up - if self._is_frame and axis == 1: - name = 0 - obj = sample._constructor({name: obj}) - - self.objs.append(obj) - - # note: this is the BlockManager axis (since DataFrame is transposed) - self.axis = axis - self.join_axes = join_axes - self.keys = keys - self.names = names or getattr(keys, 'names', None) - self.levels = levels - - self.ignore_index = ignore_index - self.verify_integrity = verify_integrity - self.copy = copy - - self.new_axes = self._get_new_axes() - - def get_result(self): - - # series only - if self._is_series: - - # stack blocks - if self.axis == 0: - # concat Series with length to keep dtype as much - non_empties = [x for x in self.objs if len(x) > 0] - if len(non_empties) > 0: - values = [x._values for x in non_empties] - else: - values = [x._values for x in self.objs] - new_data = _concat._concat_compat(values) - - name = com._consensus_name_attr(self.objs) - cons = _concat._get_series_result_type(new_data) - - return (cons(new_data, index=self.new_axes[0], - name=name, dtype=new_data.dtype) - .__finalize__(self, method='concat')) - - # combine as columns in a frame - else: - data = dict(zip(range(len(self.objs)), self.objs)) - cons = _concat._get_series_result_type(data) - - index, columns = self.new_axes - df = cons(data, index=index) - df.columns = columns - return df.__finalize__(self, method='concat') - - # combine block managers - else: - mgrs_indexers = [] - for obj in self.objs: - mgr = obj._data - indexers = {} - for ax, new_labels in enumerate(self.new_axes): - if ax == self.axis: - # Suppress reindexing on concat axis - continue - - obj_labels = mgr.axes[ax] - if not new_labels.equals(obj_labels): - indexers[ax] = obj_labels.reindex(new_labels)[1] - - mgrs_indexers.append((obj._data, indexers)) - - new_data = concatenate_block_managers( - mgrs_indexers, self.new_axes, concat_axis=self.axis, - copy=self.copy) - if not self.copy: - new_data._consolidate_inplace() - - cons = _concat._get_frame_result_type(new_data, self.objs) - return (cons._from_axes(new_data, self.new_axes) - .__finalize__(self, method='concat')) - - def _get_result_dim(self): - if self._is_series and self.axis == 1: - return 2 - else: - return self.objs[0].ndim - - def _get_new_axes(self): - ndim = self._get_result_dim() - new_axes = [None] * ndim - - if self.join_axes is None: - for i in range(ndim): - if i == self.axis: - continue - new_axes[i] = self._get_comb_axis(i) - else: - if len(self.join_axes) != ndim - 1: - raise AssertionError("length of join_axes must not be " - "equal to {0}".format(ndim - 1)) - - # ufff... - indices = lrange(ndim) - indices.remove(self.axis) - - for i, ax in zip(indices, self.join_axes): - new_axes[i] = ax - - new_axes[self.axis] = self._get_concat_axis() - return new_axes - - def _get_comb_axis(self, i): - if self._is_series: - all_indexes = [x.index for x in self.objs] - else: - try: - all_indexes = [x._data.axes[i] for x in self.objs] - except IndexError: - types = [type(x).__name__ for x in self.objs] - raise TypeError("Cannot concatenate list of %s" % types) - - return _get_combined_index(all_indexes, intersect=self.intersect) - - def _get_concat_axis(self): - """ - Return index to be used along concatenation axis. - """ - if self._is_series: - if self.axis == 0: - indexes = [x.index for x in self.objs] - elif self.ignore_index: - idx = com._default_index(len(self.objs)) - return idx - elif self.keys is None: - names = [None] * len(self.objs) - num = 0 - has_names = False - for i, x in enumerate(self.objs): - if not isinstance(x, Series): - raise TypeError("Cannot concatenate type 'Series' " - "with object of type " - "%r" % type(x).__name__) - if x.name is not None: - names[i] = x.name - has_names = True - else: - names[i] = num - num += 1 - if has_names: - return Index(names) - else: - return com._default_index(len(self.objs)) - else: - return _ensure_index(self.keys) - else: - indexes = [x._data.axes[self.axis] for x in self.objs] - - if self.ignore_index: - idx = com._default_index(sum(len(i) for i in indexes)) - return idx - - if self.keys is None: - concat_axis = _concat_indexes(indexes) - else: - concat_axis = _make_concat_multiindex(indexes, self.keys, - self.levels, self.names) - - self._maybe_check_integrity(concat_axis) - - return concat_axis - - def _maybe_check_integrity(self, concat_index): - if self.verify_integrity: - if not concat_index.is_unique: - overlap = concat_index.get_duplicates() - raise ValueError('Indexes have overlapping values: %s' - % str(overlap)) - - -def _concat_indexes(indexes): - return indexes[0].append(indexes[1:]) - - -def _make_concat_multiindex(indexes, keys, levels=None, names=None): - - if ((levels is None and isinstance(keys[0], tuple)) or - (levels is not None and len(levels) > 1)): - zipped = lzip(*keys) - if names is None: - names = [None] * len(zipped) - - if levels is None: - _, levels = _factorize_from_iterables(zipped) - else: - levels = [_ensure_index(x) for x in levels] - else: - zipped = [keys] - if names is None: - names = [None] - - if levels is None: - levels = [_ensure_index(keys)] - else: - levels = [_ensure_index(x) for x in levels] - - if not _all_indexes_same(indexes): - label_list = [] - - # things are potentially different sizes, so compute the exact labels - # for each level and pass those to MultiIndex.from_arrays - - for hlevel, level in zip(zipped, levels): - to_concat = [] - for key, index in zip(hlevel, indexes): - try: - i = level.get_loc(key) - except KeyError: - raise ValueError('Key %s not in level %s' - % (str(key), str(level))) - - to_concat.append(np.repeat(i, len(index))) - label_list.append(np.concatenate(to_concat)) - - concat_index = _concat_indexes(indexes) - - # these go at the end - if isinstance(concat_index, MultiIndex): - levels.extend(concat_index.levels) - label_list.extend(concat_index.labels) - else: - codes, categories = _factorize_from_iterable(concat_index) - levels.append(categories) - label_list.append(codes) - - if len(names) == len(levels): - names = list(names) - else: - # make sure that all of the passed indices have the same nlevels - if not len(set([idx.nlevels for idx in indexes])) == 1: - raise AssertionError("Cannot concat indices that do" - " not have the same number of levels") - - # also copies - names = names + _get_consensus_names(indexes) - - return MultiIndex(levels=levels, labels=label_list, names=names, - verify_integrity=False) - - new_index = indexes[0] - n = len(new_index) - kpieces = len(indexes) - - # also copies - new_names = list(names) - new_levels = list(levels) - - # construct labels - new_labels = [] - - # do something a bit more speedy - - for hlevel, level in zip(zipped, levels): - hlevel = _ensure_index(hlevel) - mapped = level.get_indexer(hlevel) - - mask = mapped == -1 - if mask.any(): - raise ValueError('Values not found in passed level: %s' - % str(hlevel[mask])) - - new_labels.append(np.repeat(mapped, n)) - - if isinstance(new_index, MultiIndex): - new_levels.extend(new_index.levels) - new_labels.extend([np.tile(lab, kpieces) for lab in new_index.labels]) - else: - new_levels.append(new_index) - new_labels.append(np.tile(np.arange(n), kpieces)) - - if len(new_names) < len(new_levels): - new_names.extend(new_index.names) - - return MultiIndex(levels=new_levels, labels=new_labels, names=new_names, - verify_integrity=False) - def _should_fill(lname, rname): if (not isinstance(lname, compat.string_types) or diff --git a/pandas/tools/pivot.py b/pandas/tools/pivot.py index 01eefe5f07173..41fc705691a96 100644 --- a/pandas/tools/pivot.py +++ b/pandas/tools/pivot.py @@ -2,10 +2,8 @@ from pandas.types.common import is_list_like, is_scalar -from pandas import Series, DataFrame -from pandas.core.index import MultiIndex, Index +from pandas import Series, DataFrame, MultiIndex, Index, concat from pandas.core.groupby import Grouper -from pandas.tools.merge import concat from pandas.tools.util import cartesian_product from pandas.compat import range, lrange, zip from pandas import compat diff --git a/pandas/tools/plotting.py b/pandas/tools/plotting.py index ee70515850b25..0b1ced97d2b81 100644 --- a/pandas/tools/plotting.py +++ b/pandas/tools/plotting.py @@ -3135,7 +3135,7 @@ def boxplot_frame_groupby(grouped, subplots=True, column=None, fontsize=None, fig.subplots_adjust(bottom=0.15, top=0.9, left=0.1, right=0.9, wspace=0.2) else: - from pandas.tools.merge import concat + from pandas.tools.concat import concat keys, frames = zip(*grouped) if grouped.axis == 0: df = concat(frames, keys=keys, axis=1) diff --git a/pandas/tools/tests/test_join.py b/pandas/tools/tests/test_join.py index ff0a494bd7d02..fe5821a637205 100644 --- a/pandas/tools/tests/test_join.py +++ b/pandas/tools/tests/test_join.py @@ -6,9 +6,8 @@ import pandas as pd from pandas.compat import lrange import pandas.compat as compat -from pandas.tools.merge import merge, concat from pandas.util.testing import assert_frame_equal -from pandas import DataFrame, MultiIndex, Series +from pandas import DataFrame, MultiIndex, Series, merge, concat import pandas._join as _join import pandas.util.testing as tm diff --git a/pandas/tools/tests/test_merge.py b/pandas/tools/tests/test_merge.py index a348a901442c9..d66cd793ec0be 100644 --- a/pandas/tools/tests/test_merge.py +++ b/pandas/tools/tests/test_merge.py @@ -8,7 +8,8 @@ import pandas as pd from pandas.compat import lrange, lzip -from pandas.tools.merge import merge, concat, MergeError +from pandas.tools.concat import concat +from pandas.tools.merge import merge, MergeError from pandas.util.testing import (assert_frame_equal, assert_series_equal, slow) diff --git a/pandas/tools/tests/test_merge_ordered.py b/pandas/tools/tests/test_merge_ordered.py index e08cc98e50794..e4a41ea9a28eb 100644 --- a/pandas/tools/tests/test_merge_ordered.py +++ b/pandas/tools/tests/test_merge_ordered.py @@ -40,10 +40,8 @@ def test_ffill(self): def test_multigroup(self): left = pd.concat([self.left, self.left], ignore_index=True) - # right = concat([self.right, self.right], ignore_index=True) left['group'] = ['a'] * 3 + ['b'] * 3 - # right['group'] = ['a'] * 4 + ['b'] * 4 result = merge_ordered(left, self.right, on='key', left_by='group', fill_method='ffill') diff --git a/pandas/tools/tests/test_pivot.py b/pandas/tools/tests/test_pivot.py index 7f2bb7e724362..f5d91d0088306 100644 --- a/pandas/tools/tests/test_pivot.py +++ b/pandas/tools/tests/test_pivot.py @@ -3,8 +3,8 @@ import numpy as np import pandas as pd -from pandas import DataFrame, Series, Index, MultiIndex, Grouper, date_range -from pandas.tools.merge import concat +from pandas import (DataFrame, Series, Index, MultiIndex, + Grouper, date_range, concat) from pandas.tools.pivot import pivot_table, crosstab from pandas.compat import range, product import pandas.util.testing as tm From f593ee824c5649d6e0e61d249f4542c58dfe66c5 Mon Sep 17 00:00:00 2001 From: TrigonaMinima Date: Fri, 10 Feb 2017 00:33:21 +0530 Subject: [PATCH 030/933] TST: Remaining tseries tests reorg closes #14854 closes #15359 --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/tests/indexes/datetimes/test_ops.py | 14 +- pandas/tests/indexes/datetimes/test_setops.py | 10 +- .../tests => tests/tseries}/__init__.py | 0 .../tseries}/data/cday-0.14.1.pickle | Bin .../tseries}/data/dateoffset_0_15_2.pickle | 0 .../tseries}/test_bin_groupby.py | 0 .../tests => tests/tseries}/test_converter.py | 0 .../tseries}/test_frequencies.py | 0 .../tests => tests/tseries}/test_holiday.py | 0 .../tests => tests/tseries}/test_offsets.py | 0 .../tests => tests/tseries}/test_resample.py | 0 .../tests => tests/tseries}/test_timezones.py | 0 .../tseries/tests/data/daterange_073.pickle | Bin 650 -> 0 bytes pandas/tseries/tests/data/frame.pickle | Bin 1182 -> 0 bytes pandas/tseries/tests/data/series.pickle | Bin 646 -> 0 bytes .../tests/data/series_daterange0.pickle | Bin 357 -> 0 bytes .../tseries/tests/test_timeseries_legacy.py | 219 ------------------ setup.py | 5 +- 19 files changed, 25 insertions(+), 224 deletions(-) rename pandas/{tseries/tests => tests/tseries}/__init__.py (100%) rename pandas/{tseries/tests => tests/tseries}/data/cday-0.14.1.pickle (100%) rename pandas/{tseries/tests => tests/tseries}/data/dateoffset_0_15_2.pickle (100%) rename pandas/{tseries/tests => tests/tseries}/test_bin_groupby.py (100%) rename pandas/{tseries/tests => tests/tseries}/test_converter.py (100%) rename pandas/{tseries/tests => tests/tseries}/test_frequencies.py (100%) rename pandas/{tseries/tests => tests/tseries}/test_holiday.py (100%) rename pandas/{tseries/tests => tests/tseries}/test_offsets.py (100%) rename pandas/{tseries/tests => tests/tseries}/test_resample.py (100%) rename pandas/{tseries/tests => tests/tseries}/test_timezones.py (100%) delete mode 100644 pandas/tseries/tests/data/daterange_073.pickle delete mode 100644 pandas/tseries/tests/data/frame.pickle delete mode 100644 pandas/tseries/tests/data/series.pickle delete mode 100644 pandas/tseries/tests/data/series_daterange0.pickle delete mode 100644 pandas/tseries/tests/test_timeseries_legacy.py diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 2279d0464a5c7..17ce4517035a7 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -371,6 +371,7 @@ Other API Changes - ``pandas.api.types.is_datetime64_ns_dtype`` will now report ``True`` on a tz-aware dtype, similar to ``pandas.api.types.is_datetime64_any_dtype`` - ``DataFrame.asof()`` will return a null filled ``Series`` instead the scalar ``NaN`` if a match is not found (:issue:`15118`) - The :func:`pd.read_gbq` method now stores ``INTEGER`` columns as ``dtype=object`` if they contain ``NULL`` values. Otherwise they are stored as ``int64``. This prevents precision lost for integers greather than 2**53. Furthermore ``FLOAT`` columns with values above 10**4 are no more casted to ``int64`` which also caused precision lost (:issue: `14064`, :issue:`14305`). +- Reorganization of timeseries development tests (:issue:`14854`) .. _whatsnew_0200.deprecations: diff --git a/pandas/tests/indexes/datetimes/test_ops.py b/pandas/tests/indexes/datetimes/test_ops.py index 63bf07ec041d3..7a5ce3a44681b 100644 --- a/pandas/tests/indexes/datetimes/test_ops.py +++ b/pandas/tests/indexes/datetimes/test_ops.py @@ -7,10 +7,12 @@ import pandas.util.testing as tm from pandas.core.common import PerformanceWarning from pandas.tseries.index import cdate_range +from pandas.tseries.frequencies import get_offset, to_offset from pandas import (DatetimeIndex, PeriodIndex, Series, Timestamp, Timedelta, date_range, TimedeltaIndex, _np_version_under1p10, Index, datetime, Float64Index, offsets, bdate_range) -from pandas.tseries.offsets import BMonthEnd, CDay, BDay +from pandas.tseries.offsets import (BMonthEnd, CDay, BDay, Milli, MonthBegin, + Micro) from pandas.tests.test_base import Ops @@ -911,6 +913,16 @@ def test_equals(self): self.assertFalse(idx.equals(list(idx3))) self.assertFalse(idx.equals(pd.Series(idx3))) + def test_ms_vs_MS(self): + left = get_offset('ms') + right = get_offset('MS') + self.assertEqual(left, Milli()) + self.assertEqual(right, MonthBegin()) + + def test_rule_aliases(self): + rule = to_offset('10us') + self.assertEqual(rule, Micro(10)) + class TestDateTimeIndexToJulianDate(tm.TestCase): diff --git a/pandas/tests/indexes/datetimes/test_setops.py b/pandas/tests/indexes/datetimes/test_setops.py index 7da660a956e23..8d05a4016ba45 100644 --- a/pandas/tests/indexes/datetimes/test_setops.py +++ b/pandas/tests/indexes/datetimes/test_setops.py @@ -6,7 +6,7 @@ import pandas.util.testing as tm from pandas.tseries.index import cdate_range from pandas import (DatetimeIndex, date_range, Series, bdate_range, DataFrame, - Int64Index, Index) + Int64Index, Index, to_datetime) from pandas.tseries.offsets import Minute, BMonthEnd, MonthEnd START, END = datetime(2009, 1, 1), datetime(2010, 1, 1) @@ -190,6 +190,14 @@ def test_datetimeindex_union_join_empty(self): result = dti.join(empty) tm.assertIsInstance(result, DatetimeIndex) + def test_join_nonunique(self): + idx1 = to_datetime(['2012-11-06 16:00:11.477563', + '2012-11-06 16:00:11.477563']) + idx2 = to_datetime(['2012-11-06 15:11:09.006507', + '2012-11-06 15:11:09.006507']) + rs = idx1.join(idx2, how='outer') + self.assertTrue(rs.is_monotonic) + class TestBusinessDatetimeIndex(tm.TestCase): diff --git a/pandas/tseries/tests/__init__.py b/pandas/tests/tseries/__init__.py similarity index 100% rename from pandas/tseries/tests/__init__.py rename to pandas/tests/tseries/__init__.py diff --git a/pandas/tseries/tests/data/cday-0.14.1.pickle b/pandas/tests/tseries/data/cday-0.14.1.pickle similarity index 100% rename from pandas/tseries/tests/data/cday-0.14.1.pickle rename to pandas/tests/tseries/data/cday-0.14.1.pickle diff --git a/pandas/tseries/tests/data/dateoffset_0_15_2.pickle b/pandas/tests/tseries/data/dateoffset_0_15_2.pickle similarity index 100% rename from pandas/tseries/tests/data/dateoffset_0_15_2.pickle rename to pandas/tests/tseries/data/dateoffset_0_15_2.pickle diff --git a/pandas/tseries/tests/test_bin_groupby.py b/pandas/tests/tseries/test_bin_groupby.py similarity index 100% rename from pandas/tseries/tests/test_bin_groupby.py rename to pandas/tests/tseries/test_bin_groupby.py diff --git a/pandas/tseries/tests/test_converter.py b/pandas/tests/tseries/test_converter.py similarity index 100% rename from pandas/tseries/tests/test_converter.py rename to pandas/tests/tseries/test_converter.py diff --git a/pandas/tseries/tests/test_frequencies.py b/pandas/tests/tseries/test_frequencies.py similarity index 100% rename from pandas/tseries/tests/test_frequencies.py rename to pandas/tests/tseries/test_frequencies.py diff --git a/pandas/tseries/tests/test_holiday.py b/pandas/tests/tseries/test_holiday.py similarity index 100% rename from pandas/tseries/tests/test_holiday.py rename to pandas/tests/tseries/test_holiday.py diff --git a/pandas/tseries/tests/test_offsets.py b/pandas/tests/tseries/test_offsets.py similarity index 100% rename from pandas/tseries/tests/test_offsets.py rename to pandas/tests/tseries/test_offsets.py diff --git a/pandas/tseries/tests/test_resample.py b/pandas/tests/tseries/test_resample.py similarity index 100% rename from pandas/tseries/tests/test_resample.py rename to pandas/tests/tseries/test_resample.py diff --git a/pandas/tseries/tests/test_timezones.py b/pandas/tests/tseries/test_timezones.py similarity index 100% rename from pandas/tseries/tests/test_timezones.py rename to pandas/tests/tseries/test_timezones.py diff --git a/pandas/tseries/tests/data/daterange_073.pickle b/pandas/tseries/tests/data/daterange_073.pickle deleted file mode 100644 index 0214a023e6338dce54e6daf8b3d94a7275baca66..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 650 zcmZY7OH0E*5C`z4PrH3+U)q<}*CAed_9jSAE<`BoQDl>BZ848-vOy{q^blLWo!>~) zb{G%N!ZQ3A=JKESwB<$ad@;2AKn&f;Q8OL{d_f)qVfkLDg2+-tYSx^4HV=1WHdi9x z-jg7sq#JKLnWm|jY36DyGdk61Gu|yGwpz>uky)0$zosdwB?CE~W|;P77{=XCQrnN- zDD&$<=5=ecUCmrUu#p8u3g22LwXJw8_oh3^q7*@LCj=6X`nPgnkX%h7Rn(=8|4V3gVF}+qI5udC|!^~N>3;w{`{A; z@_i>Hx1;1JWdG_z9xvsI&WfHNxZIh&3OQJ_yg!+QLdny=^fnRN!cm;avn2QACCQ(& Y?DLBq%8RAEoDS9@(>$t0rm-@Izk(m6nE(I) diff --git a/pandas/tseries/tests/data/frame.pickle b/pandas/tseries/tests/data/frame.pickle deleted file mode 100644 index b3b100fb43022faf7bd0b949238988afc7de53bf..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1182 zcmZo*N-jvuOGzx&OU^G!)k`Z%%uVHTNi0cp1G5SlH5&_2m1O3Xq!#5R<`i=|<>V)4 z`zGckrl%Hh6*2==vcwj$Y9!~C<`z^!%*!p!DalMMDoU*6iZ4n{&d)0@DJo4a;VNW9 zu{JX=CAEUfGq1$V#1qUcWcOxh4P{Jf4=Uu)@MiR8ZH1W1l~Ph!kjhoa8OoGt;mzR9 z2voqO;msV%XyfPS=k*^5z=StLNm6I11_Kl@LTM%_%?zbkpmd2}YgY7m%J$PITE56D?utru>CGx=D$H7fJyWf^==6j7BJDUVc$-VoqjNYN2dL zC|iD7T5)Pgp&TM4K*5ocnp2XJQig0taVTS+H)Cm% zUwcw&Y@sqRmcZ$Y3z%rZ>8el#9w(~cq~guh28xw5SgfewNFN;`6M*TW;fH?PbI(qD zpGB$Jd180$pY_~~+OhDdefBm(`J$sw?Iq%0oobwM+g|&L=xlec2ljnYDGD!n-q>g8 zUwoA0f6#t{RL7q$(nsuHeSXyJ?tR(*#=mDLA1WNNXXv=rfBD92`>Bn3vYT!_x3`GD za8a>$i~TLpIhKblp4tZ;l`Kf#cV$0_Toy_unf!A`-q_VZL0AG-eV zsr|Adk2*u+Yxb8^`t(}$F4?E}MdinQzi+=`r_oc77x(PTws(Kna^b%HBhDjt=Q_T$ z&pcMAp>pVf{gs02oSzq6vX}X#bi8QMeftj!9HuX9TW+tmz5In8^E>;!XC?%#<#=n~ z!1O`X*y@NqU;2%8|888h*FUt3f9K|R_OmDVNaSvOY(Fu@LfxqNfxVtm!`}<-H-Wh& PF}6@WgCns$DM=3iVMVf9 diff --git a/pandas/tseries/tests/data/series.pickle b/pandas/tseries/tests/data/series.pickle deleted file mode 100644 index 307a4ac26517384a8267b23a8891e4cf919a20d8..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 646 zcmZo*O3o|IEvVE>&M!*U%Pq|*$xJLNO049HFG@|$&nqq|DorloDr8J9NX$z~EQTm6 zPA$qzE#?Zz%uNl3FbkQy8CpXbliGs{nKir_y}4Q;#&V^UR2HOi6|#gfrCE40cryYO zuxfZShcepu`T2SM2LdqR%}|om85;0gre(H&?L+&pe^2_|L|)soNw4(YaOa)<>%zvK zYfcaC6He?G)>?kezSjQ7#_DSi>^Cm|Q~hw#b9(`k2lLpcEV8#d5t>_a^o4!SJ<&Jf z{KxD|GEg0!l30>jl$e*E%H;xN1%X+GY;dQuL!6!gbge(kwH#pA)}Xr99_ZTGLQaij zkbxz@VBmr?3b{hL*sn4&Gk`&BP$72)M1%z{!UGjyg^Tb)McCjXd{7Z~xClQ~gbOYr z02SeeiwHtRc;F&JP!V3Zh%i)y4=y5-TH@E*h7!YI@8sv_6mvPb024!@sAglKSZ$%W zMkr@qeo<~>PG(hVp+rY0TYg$vacW7SBqAh0!I6@hQ!OOZ59U9nA?q0TAECTVJ&E1=s@p_O5Y99O<1%Q zB;BptG-d=J1D0mc)|$GRnB*5MVgwiVVm1}zl5>9Zji(6111*Nz)~! zRiE7aziNM|S|k1TkABe8-^TPSq(1=Pb|Z~nI#^obZBbmI= 3") - - pth, _ = os.path.split(os.path.abspath(__file__)) - filepath = os.path.join(pth, 'data', 'frame.pickle') - - with open(filepath, 'rb') as f: - cls.frame = pickle.load(f) - - filepath = os.path.join(pth, 'data', 'series.pickle') - with open(filepath, 'rb') as f: - cls.series = pickle.load(f) - - def test_pass_offset_warn(self): - buf = StringIO() - - sys.stderr = buf - DatetimeIndex(start='1/1/2000', periods=10, offset='H') - sys.stderr = sys.__stderr__ - - def test_unpickle_legacy_frame(self): - dtindex = DatetimeIndex(start='1/3/2005', end='1/14/2005', - freq=BDay(1)) - - unpickled = self.frame - - self.assertEqual(type(unpickled.index), DatetimeIndex) - self.assertEqual(len(unpickled), 10) - self.assertTrue((unpickled.columns == Int64Index(np.arange(5))).all()) - self.assertTrue((unpickled.index == dtindex).all()) - self.assertEqual(unpickled.index.offset, BDay(1, normalize=True)) - - def test_unpickle_legacy_series(self): - unpickled = self.series - - dtindex = DatetimeIndex(start='1/3/2005', end='1/14/2005', - freq=BDay(1)) - - self.assertEqual(type(unpickled.index), DatetimeIndex) - self.assertEqual(len(unpickled), 10) - self.assertTrue((unpickled.index == dtindex).all()) - self.assertEqual(unpickled.index.offset, BDay(1, normalize=True)) - - def test_unpickle_legacy_len0_daterange(self): - pth, _ = os.path.split(os.path.abspath(__file__)) - filepath = os.path.join(pth, 'data', 'series_daterange0.pickle') - - result = pd.read_pickle(filepath) - - ex_index = DatetimeIndex([], freq='B') - - self.assert_index_equal(result.index, ex_index) - tm.assertIsInstance(result.index.freq, BDay) - self.assertEqual(len(result), 0) - - def test_arithmetic_interaction(self): - index = self.frame.index - obj_index = index.asobject - - dseries = Series(rand(len(index)), index=index) - oseries = Series(dseries.values, index=obj_index) - - result = dseries + oseries - expected = dseries * 2 - tm.assertIsInstance(result.index, DatetimeIndex) - assert_series_equal(result, expected) - - result = dseries + oseries[:5] - expected = dseries + dseries[:5] - tm.assertIsInstance(result.index, DatetimeIndex) - assert_series_equal(result, expected) - - def test_join_interaction(self): - index = self.frame.index - obj_index = index.asobject - - def _check_join(left, right, how='inner'): - ra, rb, rc = left.join(right, how=how, return_indexers=True) - ea, eb, ec = left.join(DatetimeIndex(right), how=how, - return_indexers=True) - - tm.assertIsInstance(ra, DatetimeIndex) - self.assert_index_equal(ra, ea) - - assert_almost_equal(rb, eb) - assert_almost_equal(rc, ec) - - _check_join(index[:15], obj_index[5:], how='inner') - _check_join(index[:15], obj_index[5:], how='outer') - _check_join(index[:15], obj_index[5:], how='right') - _check_join(index[:15], obj_index[5:], how='left') - - def test_join_nonunique(self): - idx1 = to_datetime(['2012-11-06 16:00:11.477563', - '2012-11-06 16:00:11.477563']) - idx2 = to_datetime(['2012-11-06 15:11:09.006507', - '2012-11-06 15:11:09.006507']) - rs = idx1.join(idx2, how='outer') - self.assertTrue(rs.is_monotonic) - - def test_unpickle_daterange(self): - pth, _ = os.path.split(os.path.abspath(__file__)) - filepath = os.path.join(pth, 'data', 'daterange_073.pickle') - - rng = read_pickle(filepath) - tm.assertIsInstance(rng[0], datetime) - tm.assertIsInstance(rng.offset, BDay) - self.assertEqual(rng.values.dtype, object) - - def test_setops(self): - index = self.frame.index - obj_index = index.asobject - - result = index[:5].union(obj_index[5:]) - expected = index - tm.assertIsInstance(result, DatetimeIndex) - self.assert_index_equal(result, expected) - - result = index[:10].intersection(obj_index[5:]) - expected = index[5:10] - tm.assertIsInstance(result, DatetimeIndex) - self.assert_index_equal(result, expected) - - result = index[:10] - obj_index[5:] - expected = index[:5] - tm.assertIsInstance(result, DatetimeIndex) - self.assert_index_equal(result, expected) - - def test_index_conversion(self): - index = self.frame.index - obj_index = index.asobject - - conv = DatetimeIndex(obj_index) - self.assert_index_equal(conv, index) - - self.assertRaises(ValueError, DatetimeIndex, ['a', 'b', 'c', 'd']) - - def test_tolist(self): - rng = date_range('1/1/2000', periods=10) - - result = rng.tolist() - tm.assertIsInstance(result[0], Timestamp) - - def test_object_convert_fail(self): - idx = DatetimeIndex([np.NaT]) - self.assertRaises(ValueError, idx.astype, 'O') - - def test_setops_conversion_fail(self): - index = self.frame.index - - right = Index(['a', 'b', 'c', 'd']) - - result = index.union(right) - expected = Index(np.concatenate([index.asobject, right])) - self.assert_index_equal(result, expected) - - result = index.intersection(right) - expected = Index([]) - self.assert_index_equal(result, expected) - - def test_legacy_time_rules(self): - rules = [('WEEKDAY', 'B'), ('EOM', 'BM'), ('W@MON', 'W-MON'), - ('W@TUE', 'W-TUE'), ('W@WED', 'W-WED'), ('W@THU', 'W-THU'), - ('W@FRI', 'W-FRI'), ('Q@JAN', 'BQ-JAN'), ('Q@FEB', 'BQ-FEB'), - ('Q@MAR', 'BQ-MAR'), ('A@JAN', 'BA-JAN'), ('A@FEB', 'BA-FEB'), - ('A@MAR', 'BA-MAR'), ('A@APR', 'BA-APR'), ('A@MAY', 'BA-MAY'), - ('A@JUN', 'BA-JUN'), ('A@JUL', 'BA-JUL'), ('A@AUG', 'BA-AUG'), - ('A@SEP', 'BA-SEP'), ('A@OCT', 'BA-OCT'), ('A@NOV', 'BA-NOV'), - ('A@DEC', 'BA-DEC'), ('WOM@1FRI', 'WOM-1FRI'), - ('WOM@2FRI', 'WOM-2FRI'), ('WOM@3FRI', 'WOM-3FRI'), - ('WOM@4FRI', 'WOM-4FRI')] - - start, end = '1/1/2000', '1/1/2010' - - for old_freq, new_freq in rules: - old_rng = date_range(start, end, freq=old_freq) - new_rng = date_range(start, end, freq=new_freq) - self.assert_index_equal(old_rng, new_rng) - - def test_ms_vs_MS(self): - left = get_offset('ms') - right = get_offset('MS') - self.assertEqual(left, Milli()) - self.assertEqual(right, MonthBegin()) - - def test_rule_aliases(self): - rule = to_offset('10us') - self.assertEqual(rule, Micro(10)) diff --git a/setup.py b/setup.py index c3cb56f2d6d1b..edec53e9cefb0 100755 --- a/setup.py +++ b/setup.py @@ -648,13 +648,13 @@ def pxd(name): 'pandas.tests.series', 'pandas.tests.formats', 'pandas.tests.scalar', + 'pandas.tests.tseries', 'pandas.tests.types', 'pandas.tests.test_msgpack', 'pandas.tests.plotting', 'pandas.tools', 'pandas.tools.tests', 'pandas.tseries', - 'pandas.tseries.tests', 'pandas.types', 'pandas.io.tests', 'pandas.io.tests.json', @@ -688,8 +688,7 @@ def pxd(name): 'pandas.tests': ['data/*.csv'], 'pandas.tests.formats': ['data/*.csv'], 'pandas.tests.indexes': ['data/*.pickle'], - 'pandas.tseries.tests': ['data/*.pickle', - 'data/*.csv'] + 'pandas.tests.tseries': ['data/*.pickle'] }, ext_modules=extensions, maintainer_email=EMAIL, From e303e268770824f3259f456263aaa1b1783a7aab Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Thu, 9 Feb 2017 18:34:18 -0500 Subject: [PATCH 031/933] TST: more tseries reorg --- .../tests/{tseries => groupby}/test_bin_groupby.py | 0 pandas/tests/indexes/datetimes/test_ops.py | 14 +------------- pandas/tests/tseries/test_frequencies.py | 12 ++++++++++++ 3 files changed, 13 insertions(+), 13 deletions(-) rename pandas/tests/{tseries => groupby}/test_bin_groupby.py (100%) diff --git a/pandas/tests/tseries/test_bin_groupby.py b/pandas/tests/groupby/test_bin_groupby.py similarity index 100% rename from pandas/tests/tseries/test_bin_groupby.py rename to pandas/tests/groupby/test_bin_groupby.py diff --git a/pandas/tests/indexes/datetimes/test_ops.py b/pandas/tests/indexes/datetimes/test_ops.py index 7a5ce3a44681b..63bf07ec041d3 100644 --- a/pandas/tests/indexes/datetimes/test_ops.py +++ b/pandas/tests/indexes/datetimes/test_ops.py @@ -7,12 +7,10 @@ import pandas.util.testing as tm from pandas.core.common import PerformanceWarning from pandas.tseries.index import cdate_range -from pandas.tseries.frequencies import get_offset, to_offset from pandas import (DatetimeIndex, PeriodIndex, Series, Timestamp, Timedelta, date_range, TimedeltaIndex, _np_version_under1p10, Index, datetime, Float64Index, offsets, bdate_range) -from pandas.tseries.offsets import (BMonthEnd, CDay, BDay, Milli, MonthBegin, - Micro) +from pandas.tseries.offsets import BMonthEnd, CDay, BDay from pandas.tests.test_base import Ops @@ -913,16 +911,6 @@ def test_equals(self): self.assertFalse(idx.equals(list(idx3))) self.assertFalse(idx.equals(pd.Series(idx3))) - def test_ms_vs_MS(self): - left = get_offset('ms') - right = get_offset('MS') - self.assertEqual(left, Milli()) - self.assertEqual(right, MonthBegin()) - - def test_rule_aliases(self): - rule = to_offset('10us') - self.assertEqual(rule, Micro(10)) - class TestDateTimeIndexToJulianDate(tm.TestCase): diff --git a/pandas/tests/tseries/test_frequencies.py b/pandas/tests/tseries/test_frequencies.py index 9983bf5270b29..5fbef465ca8fc 100644 --- a/pandas/tests/tseries/test_frequencies.py +++ b/pandas/tests/tseries/test_frequencies.py @@ -247,6 +247,18 @@ def test_anchored_shortcuts(self): frequencies.to_offset(invalid_anchor) +def test_ms_vs_MS(): + left = frequencies.get_offset('ms') + right = frequencies.get_offset('MS') + assert left == offsets.Milli() + assert right == offsets.MonthBegin() + + +def test_rule_aliases(): + rule = frequencies.to_offset('10us') + assert rule == offsets.Micro(10) + + def test_get_rule_month(): result = frequencies._get_rule_month('W') assert (result == 'DEC') From 3d6fcdcd356b2b1853346bc4e709baa3bf16ddad Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Thu, 9 Feb 2017 19:16:38 -0500 Subject: [PATCH 032/933] API: Reformat output of groupby.describe (#4792) closes #4792 Author: Matt Roeschke Author: Matthew Roeschke Closes #15260 from mroeschke/fix_4792 and squashes the following commits: 618bc46 [Matthew Roeschke] Merge branch 'master' into fix_4792 184378d [Matt Roeschke] TST: groupby.describe levels don't appear as column (#4792) --- doc/source/whatsnew/v0.20.0.txt | 53 +++++++++++++++++ pandas/core/groupby.py | 19 +++++- pandas/tests/formats/test_format.py | 33 +++-------- pandas/tests/groupby/test_categorical.py | 22 ++++--- pandas/tests/groupby/test_groupby.py | 74 +++++++++++++++++------- pandas/tests/test_generic.py | 8 +-- 6 files changed, 150 insertions(+), 59 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 17ce4517035a7..6fe066b08e255 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -356,6 +356,59 @@ New Behavior: In [11]: index.memory_usage(deep=True) Out[11]: 260 +.. _whatsnew_0200.api_breaking.groupby_describe: + +Groupby Describe Formatting +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The output formatting of ``groupby.describe()`` now labels the ``describe()`` metrics in the columns instead of the index. +This format is consistent with ``groupby.agg()`` when applying multiple functions at once. (:issue:`4792`) + +Previous Behavior: + +.. code-block:: ipython + + In [1]: df = pd.DataFrame({'A': [1, 1, 2, 2], 'B': [1, 2, 3, 4]}) + + In [2]: df.groupby('A').describe() + Out[2]: + B + A + 1 count 2.000000 + mean 1.500000 + std 0.707107 + min 1.000000 + 25% 1.250000 + 50% 1.500000 + 75% 1.750000 + max 2.000000 + 2 count 2.000000 + mean 3.500000 + std 0.707107 + min 3.000000 + 25% 3.250000 + 50% 3.500000 + 75% 3.750000 + max 4.000000 + + In [3]: df.groupby('A').agg([np.mean, np.std, np.min, np.max]) + Out[3]: + B + mean std amin amax + A + 1 1.5 0.707107 1 2 + 2 3.5 0.707107 3 4 + +New Behavior: + +.. ipython:: python + + df = pd.DataFrame({'A': [1, 1, 2, 2], 'B': [1, 2, 3, 4]}) + + df.groupby('A').describe() + + df.groupby('A').agg([np.mean, np.std, np.min, np.max]) + .. _whatsnew_0200.api: Other API Changes diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 53b6dbe6075cf..a228861270aea 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -80,7 +80,6 @@ 'mean', 'sum', 'min', 'max', 'cumcount', 'resample', - 'describe', 'rank', 'quantile', 'fillna', 'mad', @@ -1138,6 +1137,16 @@ def ohlc(self): return self._apply_to_column_groupbys( lambda x: x._cython_agg_general('ohlc')) + @Appender(DataFrame.describe.__doc__) + @Substitution(name='groupby') + @Appender(_doc_template) + def describe(self, **kwargs): + self._set_group_selection() + result = self.apply(lambda x: x.describe(**kwargs)) + if self.axis == 1: + return result.T + return result.unstack() + @Substitution(name='groupby') @Appender(_doc_template) def resample(self, rule, *args, **kwargs): @@ -3039,6 +3048,14 @@ def nlargest(self, n=5, keep='first'): def nsmallest(self, n=5, keep='first'): return self.apply(lambda x: x.nsmallest(n=n, keep=keep)) + @Appender(Series.describe.__doc__) + def describe(self, **kwargs): + self._set_group_selection() + result = self.apply(lambda x: x.describe(**kwargs)) + if self.axis == 1: + return result.T + return result.unstack() + def value_counts(self, normalize=False, sort=True, ascending=False, bins=None, dropna=True): diff --git a/pandas/tests/formats/test_format.py b/pandas/tests/formats/test_format.py index a9553d9ea10cb..99cc70ae36f6b 100644 --- a/pandas/tests/formats/test_format.py +++ b/pandas/tests/formats/test_format.py @@ -3545,30 +3545,15 @@ def test_to_latex_multiindex(self): self.assertEqual(result, expected) result = df.groupby('a').describe().to_latex() - expected = r"""\begin{tabular}{llr} -\toprule - & & c \\ -a & {} & \\ -\midrule -0 & count & 2.000000 \\ - & mean & 1.500000 \\ - & std & 0.707107 \\ - & min & 1.000000 \\ - & 25\% & 1.250000 \\ - & 50\% & 1.500000 \\ - & 75\% & 1.750000 \\ - & max & 2.000000 \\ -1 & count & 2.000000 \\ - & mean & 3.500000 \\ - & std & 0.707107 \\ - & min & 3.000000 \\ - & 25\% & 3.250000 \\ - & 50\% & 3.500000 \\ - & 75\% & 3.750000 \\ - & max & 4.000000 \\ -\bottomrule -\end{tabular} -""" + expected = ('\\begin{tabular}{lrrrrrrrr}\n\\toprule\n{} & c & ' + ' & & & & & & ' + '\\\\\n{} & count & mean & std & min & 25\\% & ' + '50\\% & 75\\% & max \\\\\na & & & ' + ' & & & & & \\\\\n\\midrule\n0 ' + '& 2.0 & 1.5 & 0.707107 & 1.0 & 1.25 & 1.5 & 1.75 ' + '& 2.0 \\\\\n1 & 2.0 & 3.5 & 0.707107 & 3.0 & 3.25 ' + '& 3.5 & 3.75 & 4.0 ' + '\\\\\n\\bottomrule\n\\end{tabular}\n') self.assertEqual(result, expected) diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py index 8952b520f4f78..eebd0e0f490c1 100644 --- a/pandas/tests/groupby/test_categorical.py +++ b/pandas/tests/groupby/test_categorical.py @@ -107,17 +107,20 @@ def test_groupby_categorical(self): exp_cats = Categorical(ord_labels, ordered=True, categories=['foo', 'bar', 'baz', 'qux']) expected = ord_data.groupby(exp_cats, sort=False).describe() - expected.index.names = [None, None] assert_frame_equal(desc_result, expected) # GH 10460 expc = Categorical.from_codes(np.arange(4).repeat(8), levels, ordered=True) exp = CategoricalIndex(expc) - self.assert_index_equal(desc_result.index.get_level_values(0), exp) + self.assert_index_equal((desc_result.stack() + .index + .get_level_values(0)), exp) exp = Index(['count', 'mean', 'std', 'min', '25%', '50%', '75%', 'max'] * 4) - self.assert_index_equal(desc_result.index.get_level_values(1), exp) + self.assert_index_equal((desc_result.stack() + .index + .get_level_values(1)), exp) def test_groupby_datetime_categorical(self): # GH9049: ensure backward compatibility @@ -144,7 +147,6 @@ def test_groupby_datetime_categorical(self): ord_labels = cats.take_nd(idx) ord_data = data.take(idx) expected = ord_data.groupby(ord_labels).describe() - expected.index.names = [None, None] assert_frame_equal(desc_result, expected) tm.assert_index_equal(desc_result.index, expected.index) tm.assert_index_equal( @@ -155,10 +157,14 @@ def test_groupby_datetime_categorical(self): expc = Categorical.from_codes( np.arange(4).repeat(8), levels, ordered=True) exp = CategoricalIndex(expc) - self.assert_index_equal(desc_result.index.get_level_values(0), exp) + self.assert_index_equal((desc_result.stack() + .index + .get_level_values(0)), exp) exp = Index(['count', 'mean', 'std', 'min', '25%', '50%', '75%', 'max'] * 4) - self.assert_index_equal(desc_result.index.get_level_values(1), exp) + self.assert_index_equal((desc_result.stack() + .index + .get_level_values(1)), exp) def test_groupby_categorical_index(self): @@ -195,8 +201,8 @@ def test_groupby_describe_categorical_columns(self): df = DataFrame(np.random.randn(20, 4), columns=cats) result = df.groupby([1, 2, 3, 4] * 5).describe() - tm.assert_index_equal(result.columns, cats) - tm.assert_categorical_equal(result.columns.values, cats.values) + tm.assert_index_equal(result.stack().columns, cats) + tm.assert_categorical_equal(result.stack().columns.values, cats.values) def test_groupby_unstack_categorical(self): # GH11558 (example is taken from the original issue) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 53f85349834ac..d625fa07d932c 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -1085,7 +1085,7 @@ def test_attr_wrapper(self): for name, gp in grouped: expected[name] = gp.describe() expected = DataFrame(expected).T - assert_frame_equal(result.unstack(), expected) + assert_frame_equal(result, expected) # get attribute result = grouped.dtype @@ -1097,7 +1097,7 @@ def test_attr_wrapper(self): def test_series_describe_multikey(self): ts = tm.makeTimeSeries() grouped = ts.groupby([lambda x: x.year, lambda x: x.month]) - result = grouped.describe().unstack() + result = grouped.describe() assert_series_equal(result['mean'], grouped.mean(), check_names=False) assert_series_equal(result['std'], grouped.std(), check_names=False) assert_series_equal(result['min'], grouped.min(), check_names=False) @@ -1106,7 +1106,7 @@ def test_series_describe_single(self): ts = tm.makeTimeSeries() grouped = ts.groupby(lambda x: x.month) result = grouped.apply(lambda x: x.describe()) - expected = grouped.describe() + expected = grouped.describe().stack() assert_series_equal(result, expected) def test_series_index_name(self): @@ -1117,17 +1117,27 @@ def test_series_index_name(self): def test_frame_describe_multikey(self): grouped = self.tsframe.groupby([lambda x: x.year, lambda x: x.month]) result = grouped.describe() - + desc_groups = [] for col in self.tsframe: - expected = grouped[col].describe() - assert_series_equal(result[col], expected, check_names=False) + group = grouped[col].describe() + group_col = pd.MultiIndex([[col] * len(group.columns), + group.columns], + [[0] * len(group.columns), + range(len(group.columns))]) + group = pd.DataFrame(group.values, + columns=group_col, + index=group.index) + desc_groups.append(group) + expected = pd.concat(desc_groups, axis=1) + tm.assert_frame_equal(result, expected) groupedT = self.tsframe.groupby({'A': 0, 'B': 0, 'C': 1, 'D': 1}, axis=1) result = groupedT.describe() - - for name, group in groupedT: - assert_frame_equal(result[name], group.describe()) + expected = self.tsframe.describe().T + expected.index = pd.MultiIndex([[0, 0, 1, 1], expected.index], + [range(4), range(len(expected.index))]) + tm.assert_frame_equal(result, expected) def test_frame_describe_tupleindex(self): @@ -1137,10 +1147,27 @@ def test_frame_describe_tupleindex(self): 'z': [100, 200, 300, 400, 500] * 3}) df1['k'] = [(0, 0, 1), (0, 1, 0), (1, 0, 0)] * 5 df2 = df1.rename(columns={'k': 'key'}) - result = df1.groupby('k').describe() - expected = df2.groupby('key').describe() - expected.index.set_names(result.index.names, inplace=True) - assert_frame_equal(result, expected) + tm.assertRaises(ValueError, lambda: df1.groupby('k').describe()) + tm.assertRaises(ValueError, lambda: df2.groupby('key').describe()) + + def test_frame_describe_unstacked_format(self): + # GH 4792 + prices = {pd.Timestamp('2011-01-06 10:59:05', tz=None): 24990, + pd.Timestamp('2011-01-06 12:43:33', tz=None): 25499, + pd.Timestamp('2011-01-06 12:54:09', tz=None): 25499} + volumes = {pd.Timestamp('2011-01-06 10:59:05', tz=None): 1500000000, + pd.Timestamp('2011-01-06 12:43:33', tz=None): 5000000000, + pd.Timestamp('2011-01-06 12:54:09', tz=None): 100000000} + df = pd.DataFrame({'PRICE': prices, + 'VOLUME': volumes}) + result = df.groupby('PRICE').VOLUME.describe() + data = [df[df.PRICE == 24990].VOLUME.describe().values.tolist(), + df[df.PRICE == 25499].VOLUME.describe().values.tolist()] + expected = pd.DataFrame(data, + index=pd.Index([24990, 25499], name='PRICE'), + columns=['count', 'mean', 'std', 'min', + '25%', '50%', '75%', 'max']) + tm.assert_frame_equal(result, expected) def test_frame_groupby(self): grouped = self.tsframe.groupby(lambda x: x.weekday()) @@ -2545,16 +2572,21 @@ def test_non_cython_api(self): assert_frame_equal(result, expected) # describe - expected = DataFrame(dict(B=concat( - [df.loc[[0, 1], 'B'].describe(), df.loc[[2], 'B'].describe()], - keys=[1, 3]))) - expected.index.names = ['A', None] + expected_index = pd.Index([1, 3], name='A') + expected_col = pd.MultiIndex(levels=[['B'], + ['count', 'mean', 'std', 'min', + '25%', '50%', '75%', 'max']], + labels=[[0] * 8, list(range(8))]) + expected = pd.DataFrame([[1.0, 2.0, nan, 2.0, 2.0, 2.0, 2.0, 2.0], + [0.0, nan, nan, nan, nan, nan, nan, nan]], + index=expected_index, + columns=expected_col) result = g.describe() assert_frame_equal(result, expected) - expected = concat( - [df.loc[[0, 1], ['A', 'B']].describe(), - df.loc[[2], ['A', 'B']].describe()], keys=[0, 1]) + expected = pd.concat([df[df.A == 1].describe().unstack().to_frame().T, + df[df.A == 3].describe().unstack().to_frame().T]) + expected.index = pd.Index([0, 1]) result = gni.describe() assert_frame_equal(result, expected) @@ -3872,7 +3904,6 @@ def test_groupby_whitelist(self): 'tail', 'cumcount', 'resample', - 'describe', 'rank', 'quantile', 'fillna', @@ -3909,7 +3940,6 @@ def test_groupby_whitelist(self): 'tail', 'cumcount', 'resample', - 'describe', 'rank', 'quantile', 'fillna', diff --git a/pandas/tests/test_generic.py b/pandas/tests/test_generic.py index bb341c26d454e..e84e2d6809e7b 100644 --- a/pandas/tests/test_generic.py +++ b/pandas/tests/test_generic.py @@ -1267,10 +1267,10 @@ def test_describe_typefiltering_groupby(self): 'numD': np.arange(24.) + .5, 'ts': tm.makeTimeSeries()[:24].index}) G = df.groupby('catA') - self.assertTrue(G.describe(include=['number']).shape == (16, 2)) - self.assertTrue(G.describe(include=['number', 'object']).shape == (22, - 3)) - self.assertTrue(G.describe(include='all').shape == (26, 4)) + self.assertTrue(G.describe(include=['number']).shape == (2, 16)) + self.assertTrue(G.describe(include=['number', 'object']).shape == (2, + 33)) + self.assertTrue(G.describe(include='all').shape == (2, 52)) def test_describe_multi_index_df_column_names(self): """ Test that column names persist after the describe operation.""" From e8840725447859531ddcc4b878266f2043fb6465 Mon Sep 17 00:00:00 2001 From: Tobias Gustafsson Date: Fri, 10 Feb 2017 09:09:31 -0500 Subject: [PATCH 033/933] BUG: Fix #15344 by backporting ujson usage of PEP 393 API Make use of the PEP 393 API to avoid expanding single byte ascii characters into four byte unicode characters when encoding objects to json. closes #15344 Author: Tobias Gustafsson Closes #15360 from tobgu/backport-ujson-compact-ascii-encoding and squashes the following commits: 44de133 [Tobias Gustafsson] Fix C-code formatting to pass linting of GH15344 b7e404f [Tobias Gustafsson] Merge branch 'master' into backport-ujson-compact-ascii-encoding 4e8e2ff [Tobias Gustafsson] BUG: Fix #15344 by backporting ujson usage of PEP 393 APIs for compact ascii --- doc/source/whatsnew/v0.20.0.txt | 5 ++++- pandas/io/tests/json/test_pandas.py | 10 ++++++++++ pandas/src/ujson/python/objToJSON.c | 10 ++++++++++ 3 files changed, 24 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 6fe066b08e255..5fbce3d2594a9 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -538,6 +538,8 @@ Bug Fixes - Bug in ``pd.pivot_table()`` where no error was raised when values argument was not in the columns (:issue:`14938`) - Bug in ``.to_json()`` where ``lines=True`` and contents (keys or values) contain escaped characters (:issue:`15096`) +- Bug in ``.to_json()`` causing single byte ascii characters to be expanded to four byte unicode (:issue:`15344`) +- Bug in ``.read_json()`` for Python 2 where ``lines=True`` and contents contain non-ascii unicode characters (:issue:`15132`) - Bug in ``.rolling/expanding()`` functions where ``count()`` was not counting ``np.Inf``, nor handling ``object`` dtypes (:issue:`12541`) - Bug in ``DataFrame.resample().median()`` if duplicate column names are present (:issue:`14233`) @@ -561,7 +563,6 @@ Bug Fixes - Bug in ``DataFrame.fillna()`` where the argument ``downcast`` was ignored when fillna value was of type ``dict`` (:issue:`15277`) -- Bug in ``.read_json()`` for Python 2 where ``lines=True`` and contents contain non-ascii unicode characters (:issue:`15132`) - Bug in ``pd.read_csv()`` with ``float_precision='round_trip'`` which caused a segfault when a text entry is parsed (:issue:`15140`) @@ -574,4 +575,6 @@ Bug Fixes - Bug in ``DataFrame.boxplot`` where ``fontsize`` was not applied to the tick labels on both axes (:issue:`15108`) - Bug in ``Series.replace`` and ``DataFrame.replace`` which failed on empty replacement dicts (:issue:`15289`) + + - Bug in ``.eval()`` which caused multiline evals to fail with local variables not on the first line (:issue:`15342`) diff --git a/pandas/io/tests/json/test_pandas.py b/pandas/io/tests/json/test_pandas.py index ee5039c38b182..440f5c13d5121 100644 --- a/pandas/io/tests/json/test_pandas.py +++ b/pandas/io/tests/json/test_pandas.py @@ -1044,3 +1044,13 @@ def roundtrip(s, encoding='latin-1'): for s in examples: roundtrip(s) + + def test_data_frame_size_after_to_json(self): + # GH15344 + df = DataFrame({'a': [str(1)]}) + + size_before = df.memory_usage(index=True, deep=True).sum() + df.to_json() + size_after = df.memory_usage(index=True, deep=True).sum() + + self.assertEqual(size_before, size_after) diff --git a/pandas/src/ujson/python/objToJSON.c b/pandas/src/ujson/python/objToJSON.c index 42c0b62a57511..e3c75d3b6e081 100644 --- a/pandas/src/ujson/python/objToJSON.c +++ b/pandas/src/ujson/python/objToJSON.c @@ -402,6 +402,16 @@ static void *PyStringToUTF8(JSOBJ _obj, JSONTypeContext *tc, void *outValue, static void *PyUnicodeToUTF8(JSOBJ _obj, JSONTypeContext *tc, void *outValue, size_t *_outLen) { PyObject *obj = (PyObject *)_obj; + +#if (PY_VERSION_HEX >= 0x03030000) + if (PyUnicode_IS_COMPACT_ASCII(obj)) { + Py_ssize_t len; + char *data = PyUnicode_AsUTF8AndSize(obj, &len); + *_outLen = len; + return data; + } +#endif + PyObject *newObj = PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(obj), PyUnicode_GET_SIZE(obj), NULL); From ab8822ae85ab469efd338d34c10aef6ff89cc8d0 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 10 Feb 2017 10:33:36 -0500 Subject: [PATCH 034/933] TST: Use pytest closes https://github.com/pydata/pandas/issues/13097 Author: Tom Augspurger Closes #13856 from TomAugspurger/pytest and squashes the following commits: 59e2be9 [Tom Augspurger] NOSE_ARGS -> TEST_ARGS 03695aa [Tom Augspurger] TST: Remove disabled marks 42790ae [Tom Augspurger] TST: Remove test_multi.sh 40d7336 [Tom Augspurger] PKG: redo pd.test import 9ba1f12 [Tom Augspurger] TST: Skip if getlocale is None 14c447c [Tom Augspurger] TST: pd.test uses pytest c4f6008 [Tom Augspurger] TST/CI: Use pytest b268d89 [Tom Augspurger] TST: Change method to make reporting more standard a638390 [Tom Augspurger] TST: Test consistency change c8dc927 [Tom Augspurger] TST: Refactor to use setup_class 9b5f2b2 [Tom Augspurger] TST: Refactor sql test inheritance --- .gitignore | 2 + .travis.yml | 38 ++-- appveyor.yml | 8 +- ci/install_test.sh | 3 +- ci/install_travis.sh | 11 +- ci/requirements_all.txt | 2 + ci/requirements_dev.txt | 2 + ci/script.sh | 8 +- doc/source/contributing.rst | 24 ++- doc/source/install.rst | 4 +- doc/source/whatsnew/v0.20.0.txt | 3 + pandas/__init__.py | 6 +- pandas/api/tests/test_api.py | 2 +- pandas/conftest.py | 21 ++ pandas/io/tests/parser/test_network.py | 2 +- pandas/io/tests/test_packers.py | 21 +- pandas/io/tests/test_pickle.py | 7 +- pandas/io/tests/test_sql.py | 27 +-- pandas/tests/formats/test_format.py | 22 +-- pandas/tests/frame/common.py | 4 +- pandas/tools/tests/test_util.py | 5 + pandas/util/_tester.py | 23 +++ pandas/util/nosetester.py | 261 ------------------------- pandas/util/testing.py | 7 +- setup.cfg | 6 + test.bat | 3 +- test.sh | 9 +- test_fast.sh | 3 +- test_multi.sh | 1 - test_rebuild.sh | 8 +- tox.ini | 9 +- 31 files changed, 179 insertions(+), 373 deletions(-) create mode 100644 pandas/conftest.py create mode 100644 pandas/util/_tester.py delete mode 100644 pandas/util/nosetester.py delete mode 100755 test_multi.sh diff --git a/.gitignore b/.gitignore index a77e780f3332d..808d9fb73a631 100644 --- a/.gitignore +++ b/.gitignore @@ -56,6 +56,8 @@ dist **/wheelhouse/* # coverage .coverage +coverage.xml +coverage_html_report # OS generated files # ###################### diff --git a/.travis.yml b/.travis.yml index be2058950d8ec..b38c99e3a5be9 100644 --- a/.travis.yml +++ b/.travis.yml @@ -32,7 +32,7 @@ matrix: env: - PYTHON_VERSION=3.5 - JOB_NAME: "35_osx" - - NOSE_ARGS="not slow and not network and not disabled" + - TEST_ARGS="--skip-slow --skip-network" - BUILD_TYPE=conda - JOB_TAG=_OSX - TRAVIS_PYTHON_VERSION=3.5 @@ -42,7 +42,7 @@ matrix: env: - PYTHON_VERSION=2.7 - JOB_NAME: "27_slow_nnet_LOCALE" - - NOSE_ARGS="slow and not network and not disabled" + - TEST_ARGS="--only-slow --skip-network" - LOCALE_OVERRIDE="zh_CN.UTF-8" - FULL_DEPS=true - JOB_TAG=_LOCALE @@ -56,7 +56,7 @@ matrix: env: - PYTHON_VERSION=2.7 - JOB_NAME: "27_nslow" - - NOSE_ARGS="not slow and not disabled" + - TEST_ARGS="--skip-slow" - FULL_DEPS=true - CLIPBOARD_GUI=gtk2 - LINT=true @@ -70,7 +70,7 @@ matrix: env: - PYTHON_VERSION=3.5 - JOB_NAME: "35_nslow" - - NOSE_ARGS="not slow and not network and not disabled" + - TEST_ARGS="--skip-slow --skip-network" - FULL_DEPS=true - CLIPBOARD=xsel - COVERAGE=true @@ -84,7 +84,7 @@ matrix: env: - PYTHON_VERSION=3.6 - JOB_NAME: "36" - - NOSE_ARGS="not slow and not network and not disabled" + - TEST_ARGS="--skip-slow --skip-network" - PANDAS_TESTING_MODE="deprecate" addons: apt: @@ -96,7 +96,7 @@ matrix: env: - PYTHON_VERSION=2.7 - JOB_NAME: "27_nslow_nnet_COMPAT" - - NOSE_ARGS="not slow and not network and not disabled" + - TEST_ARGS="--skip-slow --skip-network" - LOCALE_OVERRIDE="it_IT.UTF-8" - INSTALL_TEST=true - JOB_TAG=_COMPAT @@ -112,7 +112,7 @@ matrix: - PYTHON_VERSION=2.7 - JOB_NAME: "27_slow" - JOB_TAG=_SLOW - - NOSE_ARGS="slow and not network and not disabled" + - TEST_ARGS="--only-slow --skip-network" - FULL_DEPS=true - CACHE_NAME="27_slow" - USE_CACHE=true @@ -122,7 +122,7 @@ matrix: - PYTHON_VERSION=2.7 - JOB_NAME: "27_build_test_conda" - JOB_TAG=_BUILD_TEST - - NOSE_ARGS="not slow and not disabled" + - TEST_ARGS="--skip-slow" - FULL_DEPS=true - BUILD_TEST=true - CACHE_NAME="27_build_test_conda" @@ -133,7 +133,7 @@ matrix: - PYTHON_VERSION=3.4 - JOB_NAME: "34_nslow" - LOCALE_OVERRIDE="zh_CN.UTF-8" - - NOSE_ARGS="not slow and not disabled" + - TEST_ARGS="--skip-slow" - FULL_DEPS=true - CLIPBOARD=xsel - CACHE_NAME="34_nslow" @@ -149,7 +149,7 @@ matrix: - PYTHON_VERSION=3.4 - JOB_NAME: "34_slow" - JOB_TAG=_SLOW - - NOSE_ARGS="slow and not network and not disabled" + - TEST_ARGS="--only-slow --skip-network" - FULL_DEPS=true - CLIPBOARD=xsel - CACHE_NAME="34_slow" @@ -164,7 +164,7 @@ matrix: - PYTHON_VERSION=3.5 - JOB_NAME: "35_numpy_dev" - JOB_TAG=_NUMPY_DEV - - NOSE_ARGS="not slow and not network and not disabled" + - TEST_ARGS="--skip-slow --skip-network" - PANDAS_TESTING_MODE="deprecate" - CACHE_NAME="35_numpy_dev" - USE_CACHE=true @@ -179,7 +179,7 @@ matrix: - PYTHON_VERSION=3.5 - JOB_NAME: "35_ascii" - JOB_TAG=_ASCII - - NOSE_ARGS="not slow and not network and not disabled" + - TEST_ARGS="--skip-slow --skip-network" - LOCALE_OVERRIDE="C" - CACHE_NAME="35_ascii" - USE_CACHE=true @@ -199,7 +199,7 @@ matrix: - PYTHON_VERSION=2.7 - JOB_NAME: "27_slow" - JOB_TAG=_SLOW - - NOSE_ARGS="slow and not network and not disabled" + - TEST_ARGS="--only-slow --skip-network" - FULL_DEPS=true - CACHE_NAME="27_slow" - USE_CACHE=true @@ -208,7 +208,7 @@ matrix: - PYTHON_VERSION=3.4 - JOB_NAME: "34_slow" - JOB_TAG=_SLOW - - NOSE_ARGS="slow and not network and not disabled" + - TEST_ARGS="--only-slow --skip-network" - FULL_DEPS=true - CLIPBOARD=xsel - CACHE_NAME="34_slow" @@ -222,7 +222,7 @@ matrix: - PYTHON_VERSION=2.7 - JOB_NAME: "27_build_test_conda" - JOB_TAG=_BUILD_TEST - - NOSE_ARGS="not slow and not disabled" + - TEST_ARGS="--skip-slow" - FULL_DEPS=true - BUILD_TEST=true - CACHE_NAME="27_build_test_conda" @@ -232,7 +232,7 @@ matrix: - PYTHON_VERSION=3.4 - JOB_NAME: "34_nslow" - LOCALE_OVERRIDE="zh_CN.UTF-8" - - NOSE_ARGS="not slow and not disabled" + - TEST_ARGS="--skip-slow" - FULL_DEPS=true - CLIPBOARD=xsel - CACHE_NAME="34_nslow" @@ -247,7 +247,7 @@ matrix: - PYTHON_VERSION=3.5 - JOB_NAME: "35_numpy_dev" - JOB_TAG=_NUMPY_DEV - - NOSE_ARGS="not slow and not network and not disabled" + - TEST_ARGS="--skip-slow --skip-network" - PANDAS_TESTING_MODE="deprecate" - CACHE_NAME="35_numpy_dev" - USE_CACHE=true @@ -260,7 +260,7 @@ matrix: env: - PYTHON_VERSION=2.7 - JOB_NAME: "27_nslow_nnet_COMPAT" - - NOSE_ARGS="not slow and not network and not disabled" + - TEST_ARGS="--skip-slow --skip-network" - LOCALE_OVERRIDE="it_IT.UTF-8" - INSTALL_TEST=true - JOB_TAG=_COMPAT @@ -275,7 +275,7 @@ matrix: - PYTHON_VERSION=3.5 - JOB_NAME: "35_ascii" - JOB_TAG=_ASCII - - NOSE_ARGS="not slow and not network and not disabled" + - TEST_ARGS="--skip-slow --skip-network" - LOCALE_OVERRIDE="C" - CACHE_NAME="35_ascii" - USE_CACHE=true diff --git a/appveyor.yml b/appveyor.yml index 2499e7069843d..42c3be13af809 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -14,6 +14,7 @@ environment: # /E:ON and /V:ON options are not enabled in the batch script intepreter # See: http://stackoverflow.com/a/13751649/163740 CMD_IN_ENV: "cmd /E:ON /V:ON /C .\\ci\\run_with_env.cmd" + clone_folder: C:\projects\pandas matrix: @@ -82,7 +83,7 @@ install: - cmd: '%CMD_IN_ENV% conda build ci\appveyor.recipe -q' # create our env - - cmd: conda create -q -n pandas python=%PYTHON_VERSION% nose + - cmd: conda create -q -n pandas python=%PYTHON_VERSION% nose pytest - cmd: activate pandas - SET REQ=ci\requirements-%PYTHON_VERSION%-%PYTHON_ARCH%.run - cmd: echo "installing requirements from %REQ%" @@ -93,7 +94,8 @@ install: test_script: # tests - - cd \ - cmd: activate pandas - cmd: conda list - - cmd: nosetests --exe -A "not slow and not network and not disabled" pandas + - cmd: cd \ + - cmd: python -c "import pandas; pandas.test(['--skip-slow', '--skip-network'])" + diff --git a/ci/install_test.sh b/ci/install_test.sh index e01ad7b94a349..cbb84d8fa4b65 100755 --- a/ci/install_test.sh +++ b/ci/install_test.sh @@ -8,7 +8,8 @@ if [ "$INSTALL_TEST" ]; then conda uninstall cython || exit 1 python "$TRAVIS_BUILD_DIR"/setup.py sdist --formats=zip,gztar || exit 1 pip install "$TRAVIS_BUILD_DIR"/dist/*tar.gz || exit 1 - nosetests --exe -A "$NOSE_ARGS" pandas/tests/test_series.py --with-xunit --xunit-file=/tmp/nosetests_install.xml + # nosetests --exe -A "$TEST_ARGS" pandas/tests/test_series.py --with-xunit --xunit-file=/tmp/nosetests_install.xml + pytest pandas/tests/test_series.py --junitxml=/tmp/pytest_install.xml else echo "Skipping installation test." fi diff --git a/ci/install_travis.sh b/ci/install_travis.sh index 52b52d787aade..f65176fb1147c 100755 --- a/ci/install_travis.sh +++ b/ci/install_travis.sh @@ -83,6 +83,7 @@ else # Useful for debugging any issues with conda conda info -a || exit 1 + fi # may have installation instructions for this build @@ -90,13 +91,9 @@ INSTALL="ci/install-${PYTHON_VERSION}${JOB_TAG}.sh" if [ -e ${INSTALL} ]; then time bash $INSTALL || exit 1 else - # create new env - time conda create -n pandas python=$PYTHON_VERSION nose || exit 1 + time conda create -n pandas python=$PYTHON_VERSION nose pytest || exit 1 - if [ "$COVERAGE" ]; then - pip install coverage - fi if [ "$LINT" ]; then conda install flake8 pip install cpplint @@ -119,6 +116,10 @@ fi source activate pandas +if [ "$COVERAGE" ]; then + pip install coverage pytest-cov +fi + if [ "$BUILD_TEST" ]; then # build testing diff --git a/ci/requirements_all.txt b/ci/requirements_all.txt index bc97957bff2b7..b64143fcd4ecd 100644 --- a/ci/requirements_all.txt +++ b/ci/requirements_all.txt @@ -1,4 +1,6 @@ nose +pytest +pytest-cov flake8 sphinx ipython diff --git a/ci/requirements_dev.txt b/ci/requirements_dev.txt index 7396fba6548d9..b8af9d035de98 100644 --- a/ci/requirements_dev.txt +++ b/ci/requirements_dev.txt @@ -3,4 +3,6 @@ pytz numpy cython nose +pytest +pytest-cov flake8 diff --git a/ci/script.sh b/ci/script.sh index e2ba883b81883..3eac3002d6805 100755 --- a/ci/script.sh +++ b/ci/script.sh @@ -20,11 +20,11 @@ fi if [ "$BUILD_TEST" ]; then echo "We are not running nosetests as this is simply a build test." elif [ "$COVERAGE" ]; then - echo nosetests --exe -A "$NOSE_ARGS" pandas --with-coverage --with-xunit --xunit-file=/tmp/nosetests.xml - nosetests --exe -A "$NOSE_ARGS" pandas --with-coverage --cover-package=pandas --cover-tests --with-xunit --xunit-file=/tmp/nosetests.xml + echo pytest -s --cov=pandas --cov-report xml:/tmp/nosetests.xml $TEST_ARGS pandas + pytest -s --cov=pandas --cov-report xml:/tmp/nosetests.xml $TEST_ARGS pandas else - echo nosetests --exe -A "$NOSE_ARGS" pandas --doctest-tests --with-xunit --xunit-file=/tmp/nosetests.xml - nosetests --exe -A "$NOSE_ARGS" pandas --doctest-tests --with-xunit --xunit-file=/tmp/nosetests.xml + echo pytest $TEST_ARGS pandas + pytest $TEST_ARGS pandas # TODO: doctest fi RET="$?" diff --git a/doc/source/contributing.rst b/doc/source/contributing.rst index ecc2a5e723c45..dbe329b589c75 100644 --- a/doc/source/contributing.rst +++ b/doc/source/contributing.rst @@ -552,8 +552,8 @@ use cases and writing corresponding tests. Adding tests is one of the most common requests after code is pushed to *pandas*. Therefore, it is worth getting in the habit of writing tests ahead of time so this is never an issue. -Like many packages, *pandas* uses the `Nose testing system -`_ and the convenient +Like many packages, *pandas* uses `pytest +`_ and the convenient extensions in `numpy.testing `_. @@ -595,17 +595,25 @@ Running the test suite The tests can then be run directly inside your Git clone (without having to install *pandas*) by typing:: - nosetests pandas + pytest pandas The tests suite is exhaustive and takes around 20 minutes to run. Often it is worth running only a subset of tests first around your changes before running the -entire suite. This is done using one of the following constructs:: +entire suite. - nosetests pandas/tests/[test-module].py - nosetests pandas/tests/[test-module].py:[TestClass] - nosetests pandas/tests/[test-module].py:[TestClass].[test_method] +The easiest way to do this is with:: - .. versionadded:: 0.18.0 + pytest pandas/path/to/test.py -k regex_matching_test_name + +Or with one of the following constructs:: + + pytest pandas/tests/[test-module].py + pytest pandas/tests/[test-module].py::[TestClass] + pytest pandas/tests/[test-module].py::[TestClass]::[test_method] + +For more, see the `pytest`_ documentation. + + .. versionadded:: 0.18.0 Furthermore one can run diff --git a/doc/source/install.rst b/doc/source/install.rst index 4b3ea19624a0e..1c7cbc9326614 100644 --- a/doc/source/install.rst +++ b/doc/source/install.rst @@ -188,8 +188,8 @@ Running the test suite pandas is equipped with an exhaustive set of unit tests covering about 97% of the codebase as of this writing. To run it on your machine to verify that everything is working (and you have all of the dependencies, soft and hard, -installed), make sure you have `nose -`__ and run: +installed), make sure you have `pytest +`__ and run: :: diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 5fbce3d2594a9..d0ffa786aaa8e 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -11,6 +11,9 @@ Highlights include: - Building pandas for development now requires ``cython >= 0.23`` (:issue:`14831`) - The ``.ix`` indexer has been deprecated, see :ref:`here ` +- Switched the test framework to `pytest`_ (:issue:`13097`) + +.. _pytest: http://doc.pytest.org/en/latest/ Check the :ref:`API Changes ` and :ref:`deprecations ` before updating. diff --git a/pandas/__init__.py b/pandas/__init__.py index 76542db22a757..70c547010f623 100644 --- a/pandas/__init__.py +++ b/pandas/__init__.py @@ -56,11 +56,7 @@ from pandas.io.api import * -# define the testing framework -import pandas.util.testing -from pandas.util.nosetester import NoseTester -test = NoseTester().test -del NoseTester +from pandas.util._tester import test # use the closest tagged version if possible from ._version import get_versions diff --git a/pandas/api/tests/test_api.py b/pandas/api/tests/test_api.py index a53f6103b408b..05cf5dc4b7e7b 100644 --- a/pandas/api/tests/test_api.py +++ b/pandas/api/tests/test_api.py @@ -28,7 +28,7 @@ class TestPDApi(Base, tm.TestCase): # these are optionally imported based on testing # & need to be ignored - ignored = ['tests', 'locale'] + ignored = ['tests', 'locale', 'conftest'] # top-level sub-packages lib = ['api', 'compat', 'computation', 'core', diff --git a/pandas/conftest.py b/pandas/conftest.py new file mode 100644 index 0000000000000..b3683de3a173b --- /dev/null +++ b/pandas/conftest.py @@ -0,0 +1,21 @@ +import pytest + + +def pytest_addoption(parser): + parser.addoption("--skip-slow", action="store_true", + help="skip slow tests") + parser.addoption("--skip-network", action="store_true", + help="run network tests") + parser.addoption("--only-slow", action="store_true", + help="run only slow tests") + + +def pytest_runtest_setup(item): + if 'slow' in item.keywords and item.config.getoption("--skip-slow"): + pytest.skip("skipping due to --skip-slow") + + if 'slow' not in item.keywords and item.config.getoption("--only-slow"): + pytest.skip("skipping due to --only-slow") + + if 'skip' in item.keywords and item.config.getoption("--skip-network"): + pytest.skip("skipping due to --skip-network") diff --git a/pandas/io/tests/parser/test_network.py b/pandas/io/tests/parser/test_network.py index e06f94c780c8b..533b7733bde28 100644 --- a/pandas/io/tests/parser/test_network.py +++ b/pandas/io/tests/parser/test_network.py @@ -24,7 +24,7 @@ class TestCompressedUrl(object): 'xz': '.xz', } - def __init__(self): + def setup(self): path = os.path.join(tm.get_data_path(), 'salaries.csv') self.local_table = read_table(path) self.base_url = ('https://github.com/pandas-dev/pandas/raw/master/' diff --git a/pandas/io/tests/test_packers.py b/pandas/io/tests/test_packers.py index 8a0cfb92bd3c0..2ee36d85f674c 100644 --- a/pandas/io/tests/test_packers.py +++ b/pandas/io/tests/test_packers.py @@ -793,18 +793,19 @@ class TestMsgpack(): http://stackoverflow.com/questions/6689537/nose-test-generators-inside-class """ - def setUp(self): + @classmethod + def setup_class(cls): from pandas.io.tests.generate_legacy_storage_files import ( create_msgpack_data, create_data) - self.data = create_msgpack_data() - self.all_data = create_data() - self.path = u('__%s__.msgpack' % tm.rands(10)) - self.minimum_structure = {'series': ['float', 'int', 'mixed', - 'ts', 'mi', 'dup'], - 'frame': ['float', 'int', 'mixed', 'mi'], - 'panel': ['float'], - 'index': ['int', 'date', 'period'], - 'mi': ['reg2']} + cls.data = create_msgpack_data() + cls.all_data = create_data() + cls.path = u('__%s__.msgpack' % tm.rands(10)) + cls.minimum_structure = {'series': ['float', 'int', 'mixed', + 'ts', 'mi', 'dup'], + 'frame': ['float', 'int', 'mixed', 'mi'], + 'panel': ['float'], + 'index': ['int', 'date', 'period'], + 'mi': ['reg2']} def check_min_structure(self, data): for typ, v in self.minimum_structure.items(): diff --git a/pandas/io/tests/test_pickle.py b/pandas/io/tests/test_pickle.py index 73a9173e85906..89827817a85fb 100644 --- a/pandas/io/tests/test_pickle.py +++ b/pandas/io/tests/test_pickle.py @@ -31,11 +31,12 @@ class TestPickle(): nose-test-generators-inside-class """ - def setUp(self): + @classmethod + def setup_class(cls): from pandas.io.tests.generate_legacy_storage_files import ( create_pickle_data) - self.data = create_pickle_data() - self.path = u('__%s__.pickle' % tm.rands(10)) + cls.data = create_pickle_data() + cls.path = u('__%s__.pickle' % tm.rands(10)) def compare_element(self, result, expected, typ, version=None): if isinstance(expected, Index): diff --git a/pandas/io/tests/test_sql.py b/pandas/io/tests/test_sql.py index 4bcde764001c1..ddda65c5bafc8 100644 --- a/pandas/io/tests/test_sql.py +++ b/pandas/io/tests/test_sql.py @@ -236,7 +236,7 @@ def _close_conn(self): pass -class PandasSQLTest(unittest.TestCase): +class PandasSQLTest(object): """ Base class with common private methods for SQLAlchemy and fallback cases. @@ -839,7 +839,7 @@ def test_unicode_column_name(self): df.to_sql('test_unicode', self.conn, index=False) -class TestSQLApi(SQLAlchemyMixIn, _TestSQLApi): +class TestSQLApi(SQLAlchemyMixIn, _TestSQLApi, unittest.TestCase): """ Test the public API as it would be used directly @@ -1024,11 +1024,11 @@ def tearDown(self): super(_EngineToConnMixin, self).tearDown() -class TestSQLApiConn(_EngineToConnMixin, TestSQLApi): +class TestSQLApiConn(_EngineToConnMixin, TestSQLApi, unittest.TestCase): pass -class TestSQLiteFallbackApi(SQLiteMixIn, _TestSQLApi): +class TestSQLiteFallbackApi(SQLiteMixIn, _TestSQLApi, unittest.TestCase): """ Test the public sqlite connection fallback API @@ -1875,34 +1875,39 @@ def test_schema_support(self): tm.assert_frame_equal(res1, res2) -class TestMySQLAlchemy(_TestMySQLAlchemy, _TestSQLAlchemy): +class TestMySQLAlchemy(_TestMySQLAlchemy, _TestSQLAlchemy, unittest.TestCase): pass -class TestMySQLAlchemyConn(_TestMySQLAlchemy, _TestSQLAlchemyConn): +class TestMySQLAlchemyConn(_TestMySQLAlchemy, _TestSQLAlchemyConn, + unittest.TestCase): pass -class TestPostgreSQLAlchemy(_TestPostgreSQLAlchemy, _TestSQLAlchemy): +class TestPostgreSQLAlchemy(_TestPostgreSQLAlchemy, _TestSQLAlchemy, + unittest.TestCase): pass -class TestPostgreSQLAlchemyConn(_TestPostgreSQLAlchemy, _TestSQLAlchemyConn): +class TestPostgreSQLAlchemyConn(_TestPostgreSQLAlchemy, _TestSQLAlchemyConn, + unittest.TestCase): pass -class TestSQLiteAlchemy(_TestSQLiteAlchemy, _TestSQLAlchemy): +class TestSQLiteAlchemy(_TestSQLiteAlchemy, _TestSQLAlchemy, + unittest.TestCase): pass -class TestSQLiteAlchemyConn(_TestSQLiteAlchemy, _TestSQLAlchemyConn): +class TestSQLiteAlchemyConn(_TestSQLiteAlchemy, _TestSQLAlchemyConn, + unittest.TestCase): pass # ----------------------------------------------------------------------------- # -- Test Sqlite / MySQL fallback -class TestSQLiteFallback(SQLiteMixIn, PandasSQLTest): +class TestSQLiteFallback(SQLiteMixIn, PandasSQLTest, unittest.TestCase): """ Test the fallback mode against an in-memory sqlite database. diff --git a/pandas/tests/formats/test_format.py b/pandas/tests/formats/test_format.py index 99cc70ae36f6b..9a24ae332f7c5 100644 --- a/pandas/tests/formats/test_format.py +++ b/pandas/tests/formats/test_format.py @@ -3923,6 +3923,15 @@ def test_period(self): self.assertEqual(str(df), exp) +def gen_series_formatting(): + s1 = pd.Series(['a'] * 100) + s2 = pd.Series(['ab'] * 100) + s3 = pd.Series(['a', 'ab', 'abc', 'abcd', 'abcde', 'abcdef']) + s4 = s3[::-1] + test_sers = {'onel': s1, 'twol': s2, 'asc': s3, 'desc': s4} + return test_sers + + class TestSeriesFormatting(tm.TestCase): def setUp(self): @@ -4320,15 +4329,6 @@ def test_consistent_format(self): '1.0000\n129 1.0000\ndtype: float64') self.assertEqual(res, exp) - @staticmethod - def gen_test_series(): - s1 = pd.Series(['a'] * 100) - s2 = pd.Series(['ab'] * 100) - s3 = pd.Series(['a', 'ab', 'abc', 'abcd', 'abcde', 'abcdef']) - s4 = s3[::-1] - test_sers = {'onel': s1, 'twol': s2, 'asc': s3, 'desc': s4} - return test_sers - def chck_ncols(self, s): with option_context("display.max_rows", 10): res = repr(s) @@ -4339,7 +4339,7 @@ def chck_ncols(self, s): self.assertEqual(ncolsizes, 1) def test_format_explicit(self): - test_sers = self.gen_test_series() + test_sers = gen_series_formatting() with option_context("display.max_rows", 4): res = repr(test_sers['onel']) exp = '0 a\n1 a\n ..\n98 a\n99 a\ndtype: object' @@ -4358,7 +4358,7 @@ def test_format_explicit(self): self.assertEqual(exp, res) def test_ncols(self): - test_sers = self.gen_test_series() + test_sers = gen_series_formatting() for s in test_sers.values(): self.chck_ncols(s) diff --git a/pandas/tests/frame/common.py b/pandas/tests/frame/common.py index 37f67712e1b58..b9cd764c8704c 100644 --- a/pandas/tests/frame/common.py +++ b/pandas/tests/frame/common.py @@ -89,11 +89,11 @@ def empty(self): @cache_readonly def ts1(self): - return tm.makeTimeSeries() + return tm.makeTimeSeries(nper=30) @cache_readonly def ts2(self): - return tm.makeTimeSeries()[5:] + return tm.makeTimeSeries(nper=30)[5:] @cache_readonly def simple(self): diff --git a/pandas/tools/tests/test_util.py b/pandas/tools/tests/test_util.py index e1d057eb3c3c0..0716a13fac3fe 100644 --- a/pandas/tools/tests/test_util.py +++ b/pandas/tools/tests/test_util.py @@ -93,6 +93,11 @@ def test_set_locale(self): raise nose.SkipTest("Only a single locale found, no point in " "trying to test setting another locale") + if all(x is None for x in CURRENT_LOCALE): + # Not sure why, but on some travis runs with pytest, + # getlocale() returned (None, None). + raise nose.SkipTest("CURRENT_LOCALE is not set.") + if LOCALE_OVERRIDE is None: lang, enc = 'it_CH', 'UTF-8' elif LOCALE_OVERRIDE == 'C': diff --git a/pandas/util/_tester.py b/pandas/util/_tester.py new file mode 100644 index 0000000000000..b0e402939caae --- /dev/null +++ b/pandas/util/_tester.py @@ -0,0 +1,23 @@ +""" +Entrypoint for testing from the top-level namespace +""" +import os + +PKG = os.path.dirname(os.path.dirname(__file__)) + + +try: + import pytest +except ImportError: + def test(): + raise ImportError("Need pytest>=3.0 to run tests") +else: + def test(extra_args=None): + if extra_args: + cmd = ['-q'] + extra_args + [PKG] + else: + cmd = ['-q', PKG] + pytest.main(cmd) + + +__all__ = ['test'] diff --git a/pandas/util/nosetester.py b/pandas/util/nosetester.py deleted file mode 100644 index 1bdaaff99fd50..0000000000000 --- a/pandas/util/nosetester.py +++ /dev/null @@ -1,261 +0,0 @@ -""" -Nose test running. - -This module implements ``test()`` function for pandas modules. - -""" -from __future__ import division, absolute_import, print_function - -import os -import sys -import warnings -from pandas.compat import string_types -from numpy.testing import nosetester - - -def get_package_name(filepath): - """ - Given a path where a package is installed, determine its name. - - Parameters - ---------- - filepath : str - Path to a file. If the determination fails, "pandas" is returned. - - Examples - -------- - >>> pandas.util.nosetester.get_package_name('nonsense') - 'pandas' - - """ - - pkg_name = [] - while 'site-packages' in filepath or 'dist-packages' in filepath: - filepath, p2 = os.path.split(filepath) - if p2 in ('site-packages', 'dist-packages'): - break - pkg_name.append(p2) - - # if package name determination failed, just default to pandas - if not pkg_name: - return "pandas" - - # otherwise, reverse to get correct order and return - pkg_name.reverse() - - # don't include the outer egg directory - if pkg_name[0].endswith('.egg'): - pkg_name.pop(0) - - return '.'.join(pkg_name) - -import_nose = nosetester.import_nose -run_module_suite = nosetester.run_module_suite - - -class NoseTester(nosetester.NoseTester): - """ - Nose test runner. - - This class is made available as pandas.util.nosetester.NoseTester, and - a test function is typically added to a package's __init__.py like so:: - - from numpy.testing import Tester - test = Tester().test - - Calling this test function finds and runs all tests associated with the - package and all its sub-packages. - - Attributes - ---------- - package_path : str - Full path to the package to test. - package_name : str - Name of the package to test. - - Parameters - ---------- - package : module, str or None, optional - The package to test. If a string, this should be the full path to - the package. If None (default), `package` is set to the module from - which `NoseTester` is initialized. - raise_warnings : None, str or sequence of warnings, optional - This specifies which warnings to configure as 'raise' instead - of 'warn' during the test execution. Valid strings are: - - - "develop" : equals ``(DeprecationWarning, RuntimeWarning)`` - - "release" : equals ``()``, don't raise on any warnings. - - See Notes for more details. - - Notes - ----- - The default for `raise_warnings` is - ``(DeprecationWarning, RuntimeWarning)`` for development versions of - pandas, and ``()`` for released versions. The purpose of this switching - behavior is to catch as many warnings as possible during development, but - not give problems for packaging of released versions. - - """ - excludes = [] - - def _show_system_info(self): - nose = import_nose() - - import pandas - print("pandas version %s" % pandas.__version__) - import numpy - print("numpy version %s" % numpy.__version__) - pddir = os.path.dirname(pandas.__file__) - print("pandas is installed in %s" % pddir) - - pyversion = sys.version.replace('\n', '') - print("Python version %s" % pyversion) - print("nose version %d.%d.%d" % nose.__versioninfo__) - - def _get_custom_doctester(self): - """ Return instantiated plugin for doctests - - Allows subclassing of this class to override doctester - - A return value of None means use the nose builtin doctest plugin - """ - return None - - def _test_argv(self, label, verbose, extra_argv): - """ - Generate argv for nosetest command - - Parameters - ---------- - label : {'fast', 'full', '', attribute identifier}, optional - see ``test`` docstring - verbose : int, optional - Verbosity value for test outputs, in the range 1-10. Default is 1. - extra_argv : list, optional - List with any extra arguments to pass to nosetests. - - Returns - ------- - argv : list - command line arguments that will be passed to nose - """ - - argv = [__file__, self.package_path] - if label and label != 'full': - if not isinstance(label, string_types): - raise TypeError('Selection label should be a string') - if label == 'fast': - label = 'not slow and not network and not disabled' - argv += ['-A', label] - argv += ['--verbosity', str(verbose)] - - # When installing with setuptools, and also in some other cases, the - # test_*.py files end up marked +x executable. Nose, by default, does - # not run files marked with +x as they might be scripts. However, in - # our case nose only looks for test_*.py files under the package - # directory, which should be safe. - argv += ['--exe'] - - if extra_argv: - argv += extra_argv - return argv - - def test(self, label='fast', verbose=1, extra_argv=None, - doctests=False, coverage=False, raise_warnings=None): - """ - Run tests for module using nose. - - Parameters - ---------- - label : {'fast', 'full', '', attribute identifier}, optional - Identifies the tests to run. This can be a string to pass to - the nosetests executable with the '-A' option, or one of several - special values. Special values are: - - * 'fast' - the default - which corresponds to the ``nosetests -A`` - option of 'not slow'. - * 'full' - fast (as above) and slow tests as in the - 'no -A' option to nosetests - this is the same as ''. - * None or '' - run all tests. - * attribute_identifier - string passed directly to nosetests - as '-A'. - - verbose : int, optional - Verbosity value for test outputs, in the range 1-10. Default is 1. - extra_argv : list, optional - List with any extra arguments to pass to nosetests. - doctests : bool, optional - If True, run doctests in module. Default is False. - coverage : bool, optional - If True, report coverage of NumPy code. Default is False. - (This requires the `coverage module - `_). - raise_warnings : str or sequence of warnings, optional - This specifies which warnings to configure as 'raise' instead - of 'warn' during the test execution. Valid strings are: - - - 'develop' : equals ``(DeprecationWarning, RuntimeWarning)`` - - 'release' : equals ``()``, don't raise on any warnings. - - Returns - ------- - result : object - Returns the result of running the tests as a - ``nose.result.TextTestResult`` object. - - """ - - # cap verbosity at 3 because nose becomes *very* verbose beyond that - verbose = min(verbose, 3) - - if doctests: - print("Running unit tests and doctests for %s" % self.package_name) - else: - print("Running unit tests for %s" % self.package_name) - - self._show_system_info() - - # reset doctest state on every run - import doctest - doctest.master = None - - if raise_warnings is None: - - # default based on if we are released - from pandas import __version__ - from distutils.version import StrictVersion - try: - StrictVersion(__version__) - raise_warnings = 'release' - except ValueError: - raise_warnings = 'develop' - - _warn_opts = dict(develop=(DeprecationWarning, RuntimeWarning), - release=()) - if isinstance(raise_warnings, string_types): - raise_warnings = _warn_opts[raise_warnings] - - with warnings.catch_warnings(): - - if len(raise_warnings): - - # Reset the warning filters to the default state, - # so that running the tests is more repeatable. - warnings.resetwarnings() - # Set all warnings to 'warn', this is because the default - # 'once' has the bad property of possibly shadowing later - # warnings. - warnings.filterwarnings('always') - # Force the requested warnings to raise - for warningtype in raise_warnings: - warnings.filterwarnings('error', category=warningtype) - # Filter out annoying import messages. - warnings.filterwarnings("ignore", category=FutureWarning) - - from numpy.testing.noseclasses import NumpyTestProgram - argv, plugins = self.prepare_test_args( - label, verbose, extra_argv, doctests, coverage) - t = NumpyTestProgram(argv=argv, exit=False, plugins=plugins) - - return t.result diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 6b2e920a24063..336a766fd5830 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -19,7 +19,8 @@ from distutils.version import LooseVersion from numpy.random import randn, rand -from numpy.testing.decorators import slow # noqa +# from numpy.testing.decorators import slow # noqa +import pytest import numpy as np import pandas as pd @@ -2549,9 +2550,7 @@ def assert_produces_warning(expected_warning=Warning, filter_level="always", % extra_warnings) -def disabled(t): - t.disabled = True - return t +slow = pytest.mark.slow class RNGContext(object): diff --git a/setup.cfg b/setup.cfg index f69e256b80869..143470f7ee350 100644 --- a/setup.cfg +++ b/setup.cfg @@ -19,3 +19,9 @@ based_on_style = pep8 split_before_named_assigns = false split_penalty_after_opening_bracket = 1000000 split_penalty_logical_operator = 30 + +[tool:pytest] +# TODO: Change all yield-based (nose-style) fixutures to pytest fixtures +# Silencing the warning until then +addopts = --disable-pytest-warnings +testpaths = pandas diff --git a/test.bat b/test.bat index 16aa6c9105ec3..7f9244abb2bc8 100644 --- a/test.bat +++ b/test.bat @@ -1,3 +1,4 @@ :: test on windows -nosetests --exe -A "not slow and not network and not disabled" pandas %* +:: nosetests --exe -A "not slow and not network and not disabled" pandas %* +pytest pandas diff --git a/test.sh b/test.sh index 4a9ffd7be98b1..23c7ff52d2ce9 100755 --- a/test.sh +++ b/test.sh @@ -1,11 +1,4 @@ #!/bin/sh command -v coverage >/dev/null && coverage erase command -v python-coverage >/dev/null && python-coverage erase -# nosetests pandas/tests/test_index.py --with-coverage --cover-package=pandas.core --pdb-failure --pdb -#nosetests -w pandas --with-coverage --cover-package=pandas --pdb-failure --pdb #--cover-inclusive -#nosetests -A "not slow" -w pandas/tseries --with-coverage --cover-package=pandas.tseries $* #--cover-inclusive -nosetests -w pandas --with-coverage --cover-package=pandas $* -# nosetests -w pandas/io --with-coverage --cover-package=pandas.io --pdb-failure --pdb -# nosetests -w pandas/core --with-coverage --cover-package=pandas.core --pdb-failure --pdb -# nosetests -w pandas/stats --with-coverage --cover-package=pandas.stats -# coverage run runtests.py +pytest pandas --cov=pandas diff --git a/test_fast.sh b/test_fast.sh index b390705f901ad..0b394cffa3d74 100755 --- a/test_fast.sh +++ b/test_fast.sh @@ -1 +1,2 @@ -nosetests -A "not slow and not network" pandas --with-id $* +# nosetests -A "not slow and not network" pandas --with-id $* +pytest pandas --skip-slow diff --git a/test_multi.sh b/test_multi.sh deleted file mode 100755 index 5d77945c66a26..0000000000000 --- a/test_multi.sh +++ /dev/null @@ -1 +0,0 @@ -nosetests -A "not slow and not network" pandas --processes=4 $* diff --git a/test_rebuild.sh b/test_rebuild.sh index d3710c5ff67d3..65aa1098811a1 100755 --- a/test_rebuild.sh +++ b/test_rebuild.sh @@ -3,10 +3,4 @@ python setup.py clean python setup.py build_ext --inplace coverage erase -# nosetests pandas/tests/test_index.py --with-coverage --cover-package=pandas.core --pdb-failure --pdb -#nosetests -w pandas --with-coverage --cover-package=pandas --pdb-failure --pdb #--cover-inclusive -nosetests -w pandas --with-coverage --cover-package=pandas $* #--cover-inclusive -# nosetests -w pandas/io --with-coverage --cover-package=pandas.io --pdb-failure --pdb -# nosetests -w pandas/core --with-coverage --cover-package=pandas.core --pdb-failure --pdb -# nosetests -w pandas/stats --with-coverage --cover-package=pandas.stats -# coverage run runtests.py +pytest pandas --cov=pandas diff --git a/tox.ini b/tox.ini index 5d6c8975307b6..85c5d90fde7fb 100644 --- a/tox.ini +++ b/tox.ini @@ -10,6 +10,7 @@ envlist = py27, py34, py35 deps = cython nose + pytest pytz>=2011k python-dateutil beautifulsoup4 @@ -26,7 +27,7 @@ changedir = {envdir} commands = # TODO: --exe because of GH #761 - {envbindir}/nosetests --exe pandas {posargs:-A "not network and not disabled"} + {envbindir}/pytest pandas {posargs:-A "not network and not disabled"} # cleanup the temp. build dir created by the tox build # /bin/rm -rf {toxinidir}/build @@ -63,18 +64,18 @@ usedevelop = True deps = {[testenv]deps} openpyxl<2.0.0 -commands = {envbindir}/nosetests {toxinidir}/pandas/io/tests/test_excel.py +commands = {envbindir}/pytest {toxinidir}/pandas/io/tests/test_excel.py [testenv:openpyxl20] usedevelop = True deps = {[testenv]deps} openpyxl<2.2.0 -commands = {envbindir}/nosetests {posargs} {toxinidir}/pandas/io/tests/test_excel.py +commands = {envbindir}/pytest {posargs} {toxinidir}/pandas/io/tests/test_excel.py [testenv:openpyxl22] usedevelop = True deps = {[testenv]deps} openpyxl>=2.2.0 -commands = {envbindir}/nosetests {posargs} {toxinidir}/pandas/io/tests/test_excel.py +commands = {envbindir}/pytest {posargs} {toxinidir}/pandas/io/tests/test_excel.py From 7713f2940b74fff8254087d9bdde1d2d3c3927e6 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Fri, 10 Feb 2017 10:42:18 -0500 Subject: [PATCH 035/933] TST: small adjustments for pytest --- doc/source/contributing.rst | 2 +- pandas/util/_tester.py | 9 ++++++--- pandas/util/testing.py | 6 ++---- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/doc/source/contributing.rst b/doc/source/contributing.rst index dbe329b589c75..3ef9ed8962a23 100644 --- a/doc/source/contributing.rst +++ b/doc/source/contributing.rst @@ -613,7 +613,7 @@ Or with one of the following constructs:: For more, see the `pytest`_ documentation. - .. versionadded:: 0.18.0 + .. versionadded:: 0.20.0 Furthermore one can run diff --git a/pandas/util/_tester.py b/pandas/util/_tester.py index b0e402939caae..8d9701e0b4672 100644 --- a/pandas/util/_tester.py +++ b/pandas/util/_tester.py @@ -13,10 +13,13 @@ def test(): raise ImportError("Need pytest>=3.0 to run tests") else: def test(extra_args=None): + cmd = ['--skip-slow', '--skip-network'] if extra_args: - cmd = ['-q'] + extra_args + [PKG] - else: - cmd = ['-q', PKG] + if not isinstance(extra_args, list): + extra_args = [extra_args] + cmd = extra_args + cmd += [PKG] + print("running: pytest {}".format(' '.join(cmd))) pytest.main(cmd) diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 336a766fd5830..c3633c945f60a 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -19,7 +19,6 @@ from distutils.version import LooseVersion from numpy.random import randn, rand -# from numpy.testing.decorators import slow # noqa import pytest import numpy as np @@ -50,6 +49,8 @@ from pandas.util.decorators import deprecate from pandas import _testing from pandas.io.common import urlopen +slow = pytest.mark.slow + N = 30 K = 4 @@ -2550,9 +2551,6 @@ def assert_produces_warning(expected_warning=Warning, filter_level="always", % extra_warnings) -slow = pytest.mark.slow - - class RNGContext(object): """ Context manager to set the numpy random number generator speed. Returns From dcb4e47a0b6620f1efbe5e02ed493e6513fc8763 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Fri, 10 Feb 2017 13:19:22 -0500 Subject: [PATCH 036/933] COMPAT: skip tests for numpy >= 1.12 with pow and integer inputs closes #15363 CI: fix 3.5 build to numpy 1.11.3 --- .gitignore | 1 + ci/requirements-3.5.build | 2 +- ci/requirements-3.5.run | 2 +- pandas/tests/test_expressions.py | 17 ++++++++++++++++- 4 files changed, 19 insertions(+), 3 deletions(-) diff --git a/.gitignore b/.gitignore index 808d9fb73a631..a509fcf736ea8 100644 --- a/.gitignore +++ b/.gitignore @@ -19,6 +19,7 @@ .noseids .ipynb_checkpoints .tags +.cache/ # Compiled source # ################### diff --git a/ci/requirements-3.5.build b/ci/requirements-3.5.build index 9558cf00ddf5c..2fc2053e64fe9 100644 --- a/ci/requirements-3.5.build +++ b/ci/requirements-3.5.build @@ -1,4 +1,4 @@ python-dateutil pytz -numpy +numpy=1.11.3 cython diff --git a/ci/requirements-3.5.run b/ci/requirements-3.5.run index ef354195c8f23..b07ce611c79a2 100644 --- a/ci/requirements-3.5.run +++ b/ci/requirements-3.5.run @@ -1,6 +1,6 @@ python-dateutil pytz -numpy +numpy=1.11.3 openpyxl xlsxwriter xlrd diff --git a/pandas/tests/test_expressions.py b/pandas/tests/test_expressions.py index eca4a8f3c9e66..136786ecff0a0 100644 --- a/pandas/tests/test_expressions.py +++ b/pandas/tests/test_expressions.py @@ -12,7 +12,7 @@ from pandas.core.api import DataFrame, Panel from pandas.computation import expressions as expr -from pandas import compat +from pandas import compat, _np_version_under1p12 from pandas.util.testing import (assert_almost_equal, assert_series_equal, assert_frame_equal, assert_panel_equal, assert_panel4d_equal, slow) @@ -78,6 +78,13 @@ def run_arithmetic_test(self, df, other, assert_func, check_dtype=False, if not compat.PY3: operations.append('div') for arith in operations: + + # numpy >= 1.12 doesn't handle integers + # raised to integer powers + # https://github.com/pandas-dev/pandas/issues/15363 + if arith == 'pow' and not _np_version_under1p12: + continue + operator_name = arith if arith == 'div': operator_name = 'truediv' @@ -90,6 +97,7 @@ def run_arithmetic_test(self, df, other, assert_func, check_dtype=False, expr.set_use_numexpr(False) expected = op(df, other) expr.set_use_numexpr(True) + result = op(df, other) try: if check_dtype: @@ -273,6 +281,13 @@ def testit(): for op, op_str in [('add', '+'), ('sub', '-'), ('mul', '*'), ('div', '/'), ('pow', '**')]: + + # numpy >= 1.12 doesn't handle integers + # raised to integer powers + # https://github.com/pandas-dev/pandas/issues/15363 + if op == 'pow' and not _np_version_under1p12: + continue + if op == 'div': op = getattr(operator, 'truediv', None) else: From 61deba5cfc43425e35c8fc61bcad1123c83a6a5a Mon Sep 17 00:00:00 2001 From: Joshua Bradt Date: Fri, 10 Feb 2017 14:48:11 -0500 Subject: [PATCH 037/933] BUG: Fixed handling of non-list value_vars in melt The value_vars argument of melt is now cast to list like the id_vars argument. closes #15348 Author: Joshua Bradt Author: Joshua Bradt Closes #15351 from jbradt/fix-melt and squashes the following commits: a2f2510 [Joshua Bradt] Changed to tm.assertRaisesRegexp for Python 2 compat. 3038f64 [Joshua Bradt] Merge remote-tracking branch 'upstream/master' into fix-melt e907135 [Joshua Bradt] Split test into two parts 20159c1 [Joshua Bradt] Changed exception classes to ValueError. 129d531 [Joshua Bradt] Moved binary operators to satisfy flake8 70d7256 [Joshua Bradt] Merge branch 'master' into fix-melt 455a310 [Joshua Bradt] Tested types when using MultiIndex to ensure they are lists. 7406222 [Joshua Bradt] Fixed formatting. Added comment with issue number to test. d4c5da3 [Joshua Bradt] Improved type checking and tests. Added whatsnew note. 33728de [Joshua Bradt] BUG: Fixed handling of non-list value_vars in melt --- doc/source/whatsnew/v0.20.0.txt | 3 +-- pandas/core/reshape.py | 14 ++++++++++-- pandas/tests/test_reshape.py | 39 +++++++++++++++++++++++++++++++++ 3 files changed, 52 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index d0ffa786aaa8e..9f86c777c665d 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -578,6 +578,5 @@ Bug Fixes - Bug in ``DataFrame.boxplot`` where ``fontsize`` was not applied to the tick labels on both axes (:issue:`15108`) - Bug in ``Series.replace`` and ``DataFrame.replace`` which failed on empty replacement dicts (:issue:`15289`) - - +- Bug in ``pd.melt()`` where passing a tuple value for ``value_vars`` caused a ``TypeError`` (:issue:`15348`) - Bug in ``.eval()`` which caused multiline evals to fail with local variables not on the first line (:issue:`15342`) diff --git a/pandas/core/reshape.py b/pandas/core/reshape.py index bd0358abf67d5..cebaf4e3fd89b 100644 --- a/pandas/core/reshape.py +++ b/pandas/core/reshape.py @@ -761,16 +761,26 @@ def melt(frame, id_vars=None, value_vars=None, var_name=None, """ # TODO: what about the existing index? if id_vars is not None: - if not isinstance(id_vars, (tuple, list, np.ndarray)): + if not is_list_like(id_vars): id_vars = [id_vars] + elif (isinstance(frame.columns, MultiIndex) and + not isinstance(id_vars, list)): + raise ValueError('id_vars must be a list of tuples when columns' + ' are a MultiIndex') else: id_vars = list(id_vars) else: id_vars = [] if value_vars is not None: - if not isinstance(value_vars, (tuple, list, np.ndarray)): + if not is_list_like(value_vars): value_vars = [value_vars] + elif (isinstance(frame.columns, MultiIndex) and + not isinstance(value_vars, list)): + raise ValueError('value_vars must be a list of tuples when' + ' columns are a MultiIndex') + else: + value_vars = list(value_vars) frame = frame.loc[:, id_vars + value_vars] else: frame = frame.copy() diff --git a/pandas/tests/test_reshape.py b/pandas/tests/test_reshape.py index ed5ec970ba33c..d587e4ea6a1fa 100644 --- a/pandas/tests/test_reshape.py +++ b/pandas/tests/test_reshape.py @@ -56,6 +56,45 @@ def test_value_vars(self): columns=['id1', 'id2', 'variable', 'value']) tm.assert_frame_equal(result4, expected4) + def test_value_vars_types(self): + # GH 15348 + expected = DataFrame({'id1': self.df['id1'].tolist() * 2, + 'id2': self.df['id2'].tolist() * 2, + 'variable': ['A'] * 10 + ['B'] * 10, + 'value': (self.df['A'].tolist() + + self.df['B'].tolist())}, + columns=['id1', 'id2', 'variable', 'value']) + + for type_ in (tuple, list, np.array): + result = melt(self.df, id_vars=['id1', 'id2'], + value_vars=type_(('A', 'B'))) + tm.assert_frame_equal(result, expected) + + def test_vars_work_with_multiindex(self): + expected = DataFrame({ + ('A', 'a'): self.df1[('A', 'a')], + 'CAP': ['B'] * len(self.df1), + 'low': ['b'] * len(self.df1), + 'value': self.df1[('B', 'b')], + }, columns=[('A', 'a'), 'CAP', 'low', 'value']) + + result = melt(self.df1, id_vars=[('A', 'a')], value_vars=[('B', 'b')]) + tm.assert_frame_equal(result, expected) + + def test_tuple_vars_fail_with_multiindex(self): + # melt should fail with an informative error message if + # the columns have a MultiIndex and a tuple is passed + # for id_vars or value_vars. + tuple_a = ('A', 'a') + list_a = [tuple_a] + tuple_b = ('B', 'b') + list_b = [tuple_b] + + for id_vars, value_vars in ((tuple_a, list_b), (list_a, tuple_b), + (tuple_a, tuple_b)): + with tm.assertRaisesRegexp(ValueError, r'MultiIndex'): + melt(self.df1, id_vars=id_vars, value_vars=value_vars) + def test_custom_var_name(self): result5 = melt(self.df, var_name=self.var_name) self.assertEqual(result5.columns.tolist(), ['var', 'value']) From 3f7d2db773fbc3c9bdbfba59b6866be0f2d7b711 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sat, 11 Feb 2017 16:05:54 -0500 Subject: [PATCH 038/933] TST: split up tests/indexing/test_indexing a bit Author: Jeff Reback Closes #15367 from jreback/indexing and squashes the following commits: 15e6010 [Jeff Reback] pep 3a12fdd [Jeff Reback] add panel 5605b2b [Jeff Reback] add chaining and caching 05f6f40 [Jeff Reback] split out datetime d6be34f [Jeff Reback] TST: split up tests/indexing/test_indexing a bit --- pandas/tests/indexing/common.py | 5 + .../indexing/test_chaining_and_caching.py | 358 +++ pandas/tests/indexing/test_datetime.py | 192 ++ pandas/tests/indexing/test_floats.py | 157 ++ pandas/tests/indexing/test_indexing.py | 2242 +---------------- pandas/tests/indexing/test_multiindex.py | 1206 +++++++++ pandas/tests/indexing/test_panel.py | 209 ++ pandas/tests/indexing/test_timedelta.py | 21 + 8 files changed, 2215 insertions(+), 2175 deletions(-) create mode 100644 pandas/tests/indexing/common.py create mode 100644 pandas/tests/indexing/test_chaining_and_caching.py create mode 100644 pandas/tests/indexing/test_datetime.py create mode 100644 pandas/tests/indexing/test_multiindex.py create mode 100644 pandas/tests/indexing/test_panel.py create mode 100644 pandas/tests/indexing/test_timedelta.py diff --git a/pandas/tests/indexing/common.py b/pandas/tests/indexing/common.py new file mode 100644 index 0000000000000..73167393cf35d --- /dev/null +++ b/pandas/tests/indexing/common.py @@ -0,0 +1,5 @@ +""" common utilities """ + + +def _mklbl(prefix, n): + return ["%s%s" % (prefix, i) for i in range(n)] diff --git a/pandas/tests/indexing/test_chaining_and_caching.py b/pandas/tests/indexing/test_chaining_and_caching.py new file mode 100644 index 0000000000000..0e921aaf826f9 --- /dev/null +++ b/pandas/tests/indexing/test_chaining_and_caching.py @@ -0,0 +1,358 @@ +import numpy as np +import pandas as pd +from pandas.core import common as com +from pandas import (compat, DataFrame, option_context, + Series, MultiIndex, date_range, Timestamp) +from pandas.util import testing as tm + + +class TestCaching(tm.TestCase): + + def test_slice_consolidate_invalidate_item_cache(self): + + # this is chained assignment, but will 'work' + with option_context('chained_assignment', None): + + # #3970 + df = DataFrame({"aa": compat.lrange(5), "bb": [2.2] * 5}) + + # Creates a second float block + df["cc"] = 0.0 + + # caches a reference to the 'bb' series + df["bb"] + + # repr machinery triggers consolidation + repr(df) + + # Assignment to wrong series + df['bb'].iloc[0] = 0.17 + df._clear_item_cache() + self.assertAlmostEqual(df['bb'][0], 0.17) + + def test_setitem_cache_updating(self): + # GH 5424 + cont = ['one', 'two', 'three', 'four', 'five', 'six', 'seven'] + + for do_ref in [False, False]: + df = DataFrame({'a': cont, + "b": cont[3:] + cont[:3], + 'c': np.arange(7)}) + + # ref the cache + if do_ref: + df.ix[0, "c"] + + # set it + df.ix[7, 'c'] = 1 + + self.assertEqual(df.ix[0, 'c'], 0.0) + self.assertEqual(df.ix[7, 'c'], 1.0) + + # GH 7084 + # not updating cache on series setting with slices + expected = DataFrame({'A': [600, 600, 600]}, + index=date_range('5/7/2014', '5/9/2014')) + out = DataFrame({'A': [0, 0, 0]}, + index=date_range('5/7/2014', '5/9/2014')) + df = DataFrame({'C': ['A', 'A', 'A'], 'D': [100, 200, 300]}) + + # loop through df to update out + six = Timestamp('5/7/2014') + eix = Timestamp('5/9/2014') + for ix, row in df.iterrows(): + out.loc[six:eix, row['C']] = out.loc[six:eix, row['C']] + row['D'] + + tm.assert_frame_equal(out, expected) + tm.assert_series_equal(out['A'], expected['A']) + + # try via a chain indexing + # this actually works + out = DataFrame({'A': [0, 0, 0]}, + index=date_range('5/7/2014', '5/9/2014')) + for ix, row in df.iterrows(): + v = out[row['C']][six:eix] + row['D'] + out[row['C']][six:eix] = v + + tm.assert_frame_equal(out, expected) + tm.assert_series_equal(out['A'], expected['A']) + + out = DataFrame({'A': [0, 0, 0]}, + index=date_range('5/7/2014', '5/9/2014')) + for ix, row in df.iterrows(): + out.loc[six:eix, row['C']] += row['D'] + + tm.assert_frame_equal(out, expected) + tm.assert_series_equal(out['A'], expected['A']) + + +class TestChaining(tm.TestCase): + + def test_setitem_chained_setfault(self): + + # GH6026 + # setfaults under numpy 1.7.1 (ok on 1.8) + data = ['right', 'left', 'left', 'left', 'right', 'left', 'timeout'] + mdata = ['right', 'left', 'left', 'left', 'right', 'left', 'none'] + + df = DataFrame({'response': np.array(data)}) + mask = df.response == 'timeout' + df.response[mask] = 'none' + tm.assert_frame_equal(df, DataFrame({'response': mdata})) + + recarray = np.rec.fromarrays([data], names=['response']) + df = DataFrame(recarray) + mask = df.response == 'timeout' + df.response[mask] = 'none' + tm.assert_frame_equal(df, DataFrame({'response': mdata})) + + df = DataFrame({'response': data, 'response1': data}) + mask = df.response == 'timeout' + df.response[mask] = 'none' + tm.assert_frame_equal(df, DataFrame({'response': mdata, + 'response1': data})) + + # GH 6056 + expected = DataFrame(dict(A=[np.nan, 'bar', 'bah', 'foo', 'bar'])) + df = DataFrame(dict(A=np.array(['foo', 'bar', 'bah', 'foo', 'bar']))) + df['A'].iloc[0] = np.nan + result = df.head() + tm.assert_frame_equal(result, expected) + + df = DataFrame(dict(A=np.array(['foo', 'bar', 'bah', 'foo', 'bar']))) + df.A.iloc[0] = np.nan + result = df.head() + tm.assert_frame_equal(result, expected) + + def test_detect_chained_assignment(self): + + pd.set_option('chained_assignment', 'raise') + + # work with the chain + expected = DataFrame([[-5, 1], [-6, 3]], columns=list('AB')) + df = DataFrame(np.arange(4).reshape(2, 2), + columns=list('AB'), dtype='int64') + self.assertIsNone(df.is_copy) + df['A'][0] = -5 + df['A'][1] = -6 + tm.assert_frame_equal(df, expected) + + # test with the chaining + df = DataFrame({'A': Series(range(2), dtype='int64'), + 'B': np.array(np.arange(2, 4), dtype=np.float64)}) + self.assertIsNone(df.is_copy) + + def f(): + df['A'][0] = -5 + + self.assertRaises(com.SettingWithCopyError, f) + + def f(): + df['A'][1] = np.nan + + self.assertRaises(com.SettingWithCopyError, f) + self.assertIsNone(df['A'].is_copy) + + # using a copy (the chain), fails + df = DataFrame({'A': Series(range(2), dtype='int64'), + 'B': np.array(np.arange(2, 4), dtype=np.float64)}) + + def f(): + df.loc[0]['A'] = -5 + + self.assertRaises(com.SettingWithCopyError, f) + + # doc example + df = DataFrame({'a': ['one', 'one', 'two', 'three', + 'two', 'one', 'six'], + 'c': Series(range(7), dtype='int64')}) + self.assertIsNone(df.is_copy) + expected = DataFrame({'a': ['one', 'one', 'two', 'three', + 'two', 'one', 'six'], + 'c': [42, 42, 2, 3, 4, 42, 6]}) + + def f(): + indexer = df.a.str.startswith('o') + df[indexer]['c'] = 42 + + self.assertRaises(com.SettingWithCopyError, f) + + expected = DataFrame({'A': [111, 'bbb', 'ccc'], 'B': [1, 2, 3]}) + df = DataFrame({'A': ['aaa', 'bbb', 'ccc'], 'B': [1, 2, 3]}) + + def f(): + df['A'][0] = 111 + + self.assertRaises(com.SettingWithCopyError, f) + + def f(): + df.loc[0]['A'] = 111 + + self.assertRaises(com.SettingWithCopyError, f) + + df.loc[0, 'A'] = 111 + tm.assert_frame_equal(df, expected) + + # make sure that is_copy is picked up reconstruction + # GH5475 + df = DataFrame({"A": [1, 2]}) + self.assertIsNone(df.is_copy) + with tm.ensure_clean('__tmp__pickle') as path: + df.to_pickle(path) + df2 = pd.read_pickle(path) + df2["B"] = df2["A"] + df2["B"] = df2["A"] + + # a suprious raise as we are setting the entire column here + # GH5597 + from string import ascii_letters as letters + + def random_text(nobs=100): + df = [] + for i in range(nobs): + idx = np.random.randint(len(letters), size=2) + idx.sort() + df.append([letters[idx[0]:idx[1]]]) + + return DataFrame(df, columns=['letters']) + + df = random_text(100000) + + # always a copy + x = df.iloc[[0, 1, 2]] + self.assertIsNotNone(x.is_copy) + x = df.iloc[[0, 1, 2, 4]] + self.assertIsNotNone(x.is_copy) + + # explicity copy + indexer = df.letters.apply(lambda x: len(x) > 10) + df = df.ix[indexer].copy() + self.assertIsNone(df.is_copy) + df['letters'] = df['letters'].apply(str.lower) + + # implicity take + df = random_text(100000) + indexer = df.letters.apply(lambda x: len(x) > 10) + df = df.ix[indexer] + self.assertIsNotNone(df.is_copy) + df['letters'] = df['letters'].apply(str.lower) + + # implicity take 2 + df = random_text(100000) + indexer = df.letters.apply(lambda x: len(x) > 10) + df = df.ix[indexer] + self.assertIsNotNone(df.is_copy) + df.loc[:, 'letters'] = df['letters'].apply(str.lower) + + # should be ok even though it's a copy! + self.assertIsNone(df.is_copy) + df['letters'] = df['letters'].apply(str.lower) + self.assertIsNone(df.is_copy) + + df = random_text(100000) + indexer = df.letters.apply(lambda x: len(x) > 10) + df.ix[indexer, 'letters'] = df.ix[indexer, 'letters'].apply(str.lower) + + # an identical take, so no copy + df = DataFrame({'a': [1]}).dropna() + self.assertIsNone(df.is_copy) + df['a'] += 1 + + # inplace ops + # original from: + # http://stackoverflow.com/questions/20508968/series-fillna-in-a-multiindex-dataframe-does-not-fill-is-this-a-bug + a = [12, 23] + b = [123, None] + c = [1234, 2345] + d = [12345, 23456] + tuples = [('eyes', 'left'), ('eyes', 'right'), ('ears', 'left'), + ('ears', 'right')] + events = {('eyes', 'left'): a, + ('eyes', 'right'): b, + ('ears', 'left'): c, + ('ears', 'right'): d} + multiind = MultiIndex.from_tuples(tuples, names=['part', 'side']) + zed = DataFrame(events, index=['a', 'b'], columns=multiind) + + def f(): + zed['eyes']['right'].fillna(value=555, inplace=True) + + self.assertRaises(com.SettingWithCopyError, f) + + df = DataFrame(np.random.randn(10, 4)) + s = df.iloc[:, 0].sort_values() + tm.assert_series_equal(s, df.iloc[:, 0].sort_values()) + tm.assert_series_equal(s, df[0].sort_values()) + + # false positives GH6025 + df = DataFrame({'column1': ['a', 'a', 'a'], 'column2': [4, 8, 9]}) + str(df) + df['column1'] = df['column1'] + 'b' + str(df) + df = df[df['column2'] != 8] + str(df) + df['column1'] = df['column1'] + 'c' + str(df) + + # from SO: + # http://stackoverflow.com/questions/24054495/potential-bug-setting-value-for-undefined-column-using-iloc + df = DataFrame(np.arange(0, 9), columns=['count']) + df['group'] = 'b' + + def f(): + df.iloc[0:5]['group'] = 'a' + + self.assertRaises(com.SettingWithCopyError, f) + + # mixed type setting + # same dtype & changing dtype + df = DataFrame(dict(A=date_range('20130101', periods=5), + B=np.random.randn(5), + C=np.arange(5, dtype='int64'), + D=list('abcde'))) + + def f(): + df.ix[2]['D'] = 'foo' + + self.assertRaises(com.SettingWithCopyError, f) + + def f(): + df.ix[2]['C'] = 'foo' + + self.assertRaises(com.SettingWithCopyError, f) + + def f(): + df['C'][2] = 'foo' + + self.assertRaises(com.SettingWithCopyError, f) + + def test_setting_with_copy_bug(self): + + # operating on a copy + df = pd.DataFrame({'a': list(range(4)), + 'b': list('ab..'), + 'c': ['a', 'b', np.nan, 'd']}) + mask = pd.isnull(df.c) + + def f(): + df[['c']][mask] = df[['b']][mask] + + self.assertRaises(com.SettingWithCopyError, f) + + # invalid warning as we are returning a new object + # GH 8730 + df1 = DataFrame({'x': Series(['a', 'b', 'c']), + 'y': Series(['d', 'e', 'f'])}) + df2 = df1[['x']] + + # this should not raise + df2['y'] = ['g', 'h', 'i'] + + def test_detect_chained_assignment_warnings(self): + + # warnings + with option_context('chained_assignment', 'warn'): + df = DataFrame({'A': ['aaa', 'bbb', 'ccc'], 'B': [1, 2, 3]}) + with tm.assert_produces_warning( + expected_warning=com.SettingWithCopyWarning): + df.loc[0]['A'] = 111 diff --git a/pandas/tests/indexing/test_datetime.py b/pandas/tests/indexing/test_datetime.py new file mode 100644 index 0000000000000..1c4e5772d316f --- /dev/null +++ b/pandas/tests/indexing/test_datetime.py @@ -0,0 +1,192 @@ +import numpy as np +import pandas as pd +from pandas import date_range, Index, DataFrame, Series, Timestamp +from pandas.util import testing as tm + + +class TestDatetimeIndex(tm.TestCase): + + def test_indexing_with_datetime_tz(self): + + # 8260 + # support datetime64 with tz + + idx = Index(date_range('20130101', periods=3, tz='US/Eastern'), + name='foo') + dr = date_range('20130110', periods=3) + df = DataFrame({'A': idx, 'B': dr}) + df['C'] = idx + df.iloc[1, 1] = pd.NaT + df.iloc[1, 2] = pd.NaT + + # indexing + result = df.iloc[1] + expected = Series([Timestamp('2013-01-02 00:00:00-0500', + tz='US/Eastern'), np.nan, np.nan], + index=list('ABC'), dtype='object', name=1) + tm.assert_series_equal(result, expected) + result = df.loc[1] + expected = Series([Timestamp('2013-01-02 00:00:00-0500', + tz='US/Eastern'), np.nan, np.nan], + index=list('ABC'), dtype='object', name=1) + tm.assert_series_equal(result, expected) + + # indexing - fast_xs + df = DataFrame({'a': date_range('2014-01-01', periods=10, tz='UTC')}) + result = df.iloc[5] + expected = Timestamp('2014-01-06 00:00:00+0000', tz='UTC', freq='D') + self.assertEqual(result, expected) + + result = df.loc[5] + self.assertEqual(result, expected) + + # indexing - boolean + result = df[df.a > df.a[3]] + expected = df.iloc[4:] + tm.assert_frame_equal(result, expected) + + # indexing - setting an element + df = DataFrame(data=pd.to_datetime( + ['2015-03-30 20:12:32', '2015-03-12 00:11:11']), columns=['time']) + df['new_col'] = ['new', 'old'] + df.time = df.set_index('time').index.tz_localize('UTC') + v = df[df.new_col == 'new'].set_index('time').index.tz_convert( + 'US/Pacific') + + # trying to set a single element on a part of a different timezone + def f(): + df.loc[df.new_col == 'new', 'time'] = v + + self.assertRaises(ValueError, f) + + v = df.loc[df.new_col == 'new', 'time'] + pd.Timedelta('1s') + df.loc[df.new_col == 'new', 'time'] = v + tm.assert_series_equal(df.loc[df.new_col == 'new', 'time'], v) + + def test_indexing_with_datetimeindex_tz(self): + + # GH 12050 + # indexing on a series with a datetimeindex with tz + index = pd.date_range('2015-01-01', periods=2, tz='utc') + + ser = pd.Series(range(2), index=index, + dtype='int64') + + # list-like indexing + + for sel in (index, list(index)): + # getitem + tm.assert_series_equal(ser[sel], ser) + + # setitem + result = ser.copy() + result[sel] = 1 + expected = pd.Series(1, index=index) + tm.assert_series_equal(result, expected) + + # .loc getitem + tm.assert_series_equal(ser.loc[sel], ser) + + # .loc setitem + result = ser.copy() + result.loc[sel] = 1 + expected = pd.Series(1, index=index) + tm.assert_series_equal(result, expected) + + # single element indexing + + # getitem + self.assertEqual(ser[index[1]], 1) + + # setitem + result = ser.copy() + result[index[1]] = 5 + expected = pd.Series([0, 5], index=index) + tm.assert_series_equal(result, expected) + + # .loc getitem + self.assertEqual(ser.loc[index[1]], 1) + + # .loc setitem + result = ser.copy() + result.loc[index[1]] = 5 + expected = pd.Series([0, 5], index=index) + tm.assert_series_equal(result, expected) + + def test_partial_setting_with_datetimelike_dtype(self): + + # GH9478 + # a datetimeindex alignment issue with partial setting + df = pd.DataFrame(np.arange(6.).reshape(3, 2), columns=list('AB'), + index=pd.date_range('1/1/2000', periods=3, + freq='1H')) + expected = df.copy() + expected['C'] = [expected.index[0]] + [pd.NaT, pd.NaT] + + mask = df.A < 1 + df.loc[mask, 'C'] = df.loc[mask].index + tm.assert_frame_equal(df, expected) + + def test_loc_setitem_datetime(self): + + # GH 9516 + dt1 = Timestamp('20130101 09:00:00') + dt2 = Timestamp('20130101 10:00:00') + + for conv in [lambda x: x, lambda x: x.to_datetime64(), + lambda x: x.to_pydatetime(), lambda x: np.datetime64(x)]: + + df = pd.DataFrame() + df.loc[conv(dt1), 'one'] = 100 + df.loc[conv(dt2), 'one'] = 200 + + expected = DataFrame({'one': [100.0, 200.0]}, index=[dt1, dt2]) + tm.assert_frame_equal(df, expected) + + def test_series_partial_set_datetime(self): + # GH 11497 + + idx = date_range('2011-01-01', '2011-01-02', freq='D', name='idx') + ser = Series([0.1, 0.2], index=idx, name='s') + + result = ser.loc[[Timestamp('2011-01-01'), Timestamp('2011-01-02')]] + exp = Series([0.1, 0.2], index=idx, name='s') + tm.assert_series_equal(result, exp, check_index_type=True) + + keys = [Timestamp('2011-01-02'), Timestamp('2011-01-02'), + Timestamp('2011-01-01')] + exp = Series([0.2, 0.2, 0.1], index=pd.DatetimeIndex(keys, name='idx'), + name='s') + tm.assert_series_equal(ser.loc[keys], exp, check_index_type=True) + + keys = [Timestamp('2011-01-03'), Timestamp('2011-01-02'), + Timestamp('2011-01-03')] + exp = Series([np.nan, 0.2, np.nan], + index=pd.DatetimeIndex(keys, name='idx'), name='s') + tm.assert_series_equal(ser.loc[keys], exp, check_index_type=True) + + def test_series_partial_set_period(self): + # GH 11497 + + idx = pd.period_range('2011-01-01', '2011-01-02', freq='D', name='idx') + ser = Series([0.1, 0.2], index=idx, name='s') + + result = ser.loc[[pd.Period('2011-01-01', freq='D'), + pd.Period('2011-01-02', freq='D')]] + exp = Series([0.1, 0.2], index=idx, name='s') + tm.assert_series_equal(result, exp, check_index_type=True) + + keys = [pd.Period('2011-01-02', freq='D'), + pd.Period('2011-01-02', freq='D'), + pd.Period('2011-01-01', freq='D')] + exp = Series([0.2, 0.2, 0.1], index=pd.PeriodIndex(keys, name='idx'), + name='s') + tm.assert_series_equal(ser.loc[keys], exp, check_index_type=True) + + keys = [pd.Period('2011-01-03', freq='D'), + pd.Period('2011-01-02', freq='D'), + pd.Period('2011-01-03', freq='D')] + exp = Series([np.nan, 0.2, np.nan], + index=pd.PeriodIndex(keys, name='idx'), name='s') + result = ser.loc[keys] + tm.assert_series_equal(result, exp) diff --git a/pandas/tests/indexing/test_floats.py b/pandas/tests/indexing/test_floats.py index 8f0fa2d56113b..99e7460b2a3de 100644 --- a/pandas/tests/indexing/test_floats.py +++ b/pandas/tests/indexing/test_floats.py @@ -709,3 +709,160 @@ def test_floating_tuples(self): result = s[0.0] expected = Series([(1, 1), (2, 2)], index=[0.0, 0.0], name='foo') assert_series_equal(result, expected) + + def test_float64index_slicing_bug(self): + # GH 5557, related to slicing a float index + ser = {256: 2321.0, + 1: 78.0, + 2: 2716.0, + 3: 0.0, + 4: 369.0, + 5: 0.0, + 6: 269.0, + 7: 0.0, + 8: 0.0, + 9: 0.0, + 10: 3536.0, + 11: 0.0, + 12: 24.0, + 13: 0.0, + 14: 931.0, + 15: 0.0, + 16: 101.0, + 17: 78.0, + 18: 9643.0, + 19: 0.0, + 20: 0.0, + 21: 0.0, + 22: 63761.0, + 23: 0.0, + 24: 446.0, + 25: 0.0, + 26: 34773.0, + 27: 0.0, + 28: 729.0, + 29: 78.0, + 30: 0.0, + 31: 0.0, + 32: 3374.0, + 33: 0.0, + 34: 1391.0, + 35: 0.0, + 36: 361.0, + 37: 0.0, + 38: 61808.0, + 39: 0.0, + 40: 0.0, + 41: 0.0, + 42: 6677.0, + 43: 0.0, + 44: 802.0, + 45: 0.0, + 46: 2691.0, + 47: 0.0, + 48: 3582.0, + 49: 0.0, + 50: 734.0, + 51: 0.0, + 52: 627.0, + 53: 70.0, + 54: 2584.0, + 55: 0.0, + 56: 324.0, + 57: 0.0, + 58: 605.0, + 59: 0.0, + 60: 0.0, + 61: 0.0, + 62: 3989.0, + 63: 10.0, + 64: 42.0, + 65: 0.0, + 66: 904.0, + 67: 0.0, + 68: 88.0, + 69: 70.0, + 70: 8172.0, + 71: 0.0, + 72: 0.0, + 73: 0.0, + 74: 64902.0, + 75: 0.0, + 76: 347.0, + 77: 0.0, + 78: 36605.0, + 79: 0.0, + 80: 379.0, + 81: 70.0, + 82: 0.0, + 83: 0.0, + 84: 3001.0, + 85: 0.0, + 86: 1630.0, + 87: 7.0, + 88: 364.0, + 89: 0.0, + 90: 67404.0, + 91: 9.0, + 92: 0.0, + 93: 0.0, + 94: 7685.0, + 95: 0.0, + 96: 1017.0, + 97: 0.0, + 98: 2831.0, + 99: 0.0, + 100: 2963.0, + 101: 0.0, + 102: 854.0, + 103: 0.0, + 104: 0.0, + 105: 0.0, + 106: 0.0, + 107: 0.0, + 108: 0.0, + 109: 0.0, + 110: 0.0, + 111: 0.0, + 112: 0.0, + 113: 0.0, + 114: 0.0, + 115: 0.0, + 116: 0.0, + 117: 0.0, + 118: 0.0, + 119: 0.0, + 120: 0.0, + 121: 0.0, + 122: 0.0, + 123: 0.0, + 124: 0.0, + 125: 0.0, + 126: 67744.0, + 127: 22.0, + 128: 264.0, + 129: 0.0, + 260: 197.0, + 268: 0.0, + 265: 0.0, + 269: 0.0, + 261: 0.0, + 266: 1198.0, + 267: 0.0, + 262: 2629.0, + 258: 775.0, + 257: 0.0, + 263: 0.0, + 259: 0.0, + 264: 163.0, + 250: 10326.0, + 251: 0.0, + 252: 1228.0, + 253: 0.0, + 254: 2769.0, + 255: 0.0} + + # smoke test for the repr + s = Series(ser) + result = s.value_counts() + str(result) diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index b06b1067b7c6b..f7a4af711bbb8 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -1,7 +1,5 @@ # -*- coding: utf-8 -*- # pylint: disable-msg=W0612,E1101 -import sys -import nose import itertools import warnings from warnings import catch_warnings @@ -10,22 +8,21 @@ from pandas.types.common import (is_integer_dtype, is_float_dtype, is_scalar) -from pandas.compat import range, lrange, lzip, StringIO, lmap, map +from pandas.compat import range, lrange, lzip, StringIO, lmap from pandas.tslib import NaT from numpy import nan from numpy.random import randn import numpy as np import pandas as pd -import pandas.core.common as com from pandas import option_context from pandas.core.indexing import _non_reducing_slice, _maybe_numeric_slice from pandas.core.api import (DataFrame, Index, Series, Panel, isnull, MultiIndex, Timestamp, Timedelta, UInt64Index) from pandas.formats.printing import pprint_thing from pandas import concat -from pandas.core.common import PerformanceWarning, UnsortedIndexError - +from pandas.core.common import PerformanceWarning +from pandas.tests.indexing.common import _mklbl import pandas.util.testing as tm from pandas import date_range @@ -93,10 +90,6 @@ def _axify(obj, key, axis): return tuple(axes) -def _mklbl(prefix, n): - return ["%s%s" % (prefix, i) for i in range(n)] - - class TestIndexing(tm.TestCase): _objs = set(['series', 'frame', 'panel']) @@ -665,40 +658,6 @@ def test_iloc_getitem_slice_dups(self): tm.assert_frame_equal(df.iloc[10:, :2], df2) tm.assert_frame_equal(df.iloc[10:, 2:], df1) - def test_iloc_getitem_multiindex2(self): - # TODO(wesm): fix this - raise nose.SkipTest('this test was being suppressed, ' - 'needs to be fixed') - - arr = np.random.randn(3, 3) - df = DataFrame(arr, columns=[[2, 2, 4], [6, 8, 10]], - index=[[4, 4, 8], [8, 10, 12]]) - - rs = df.iloc[2] - xp = Series(arr[2], index=df.columns) - tm.assert_series_equal(rs, xp) - - rs = df.iloc[:, 2] - xp = Series(arr[:, 2], index=df.index) - tm.assert_series_equal(rs, xp) - - rs = df.iloc[2, 2] - xp = df.values[2, 2] - self.assertEqual(rs, xp) - - # for multiple items - # GH 5528 - rs = df.iloc[[0, 1]] - xp = df.xs(4, drop_level=False) - tm.assert_frame_equal(rs, xp) - - tup = zip(*[['a', 'a', 'b', 'b'], ['x', 'y', 'x', 'y']]) - index = MultiIndex.from_tuples(tup) - df = DataFrame(np.random.randn(4, 4), index=index) - rs = df.iloc[[2, 3]] - xp = df.xs('b', drop_level=False) - tm.assert_frame_equal(rs, xp) - def test_iloc_setitem(self): df = self.frame_ints @@ -872,210 +831,6 @@ def compare(result, expected): result2 = s.loc[0:3] tm.assert_series_equal(result1, result2) - def test_setitem_multiindex(self): - for index_fn in ('ix', 'loc'): - - def check(target, indexers, value, compare_fn, expected=None): - fn = getattr(target, index_fn) - fn.__setitem__(indexers, value) - result = fn.__getitem__(indexers) - if expected is None: - expected = value - compare_fn(result, expected) - # GH7190 - index = pd.MultiIndex.from_product([np.arange(0, 100), - np.arange(0, 80)], - names=['time', 'firm']) - t, n = 0, 2 - df = DataFrame(np.nan, columns=['A', 'w', 'l', 'a', 'x', - 'X', 'd', 'profit'], - index=index) - check(target=df, indexers=((t, n), 'X'), value=0, - compare_fn=self.assertEqual) - - df = DataFrame(-999, columns=['A', 'w', 'l', 'a', 'x', - 'X', 'd', 'profit'], - index=index) - check(target=df, indexers=((t, n), 'X'), value=1, - compare_fn=self.assertEqual) - - df = DataFrame(columns=['A', 'w', 'l', 'a', 'x', - 'X', 'd', 'profit'], - index=index) - check(target=df, indexers=((t, n), 'X'), value=2, - compare_fn=self.assertEqual) - - # GH 7218, assinging with 0-dim arrays - df = DataFrame(-999, columns=['A', 'w', 'l', 'a', 'x', - 'X', 'd', 'profit'], - index=index) - check(target=df, - indexers=((t, n), 'X'), - value=np.array(3), - compare_fn=self.assertEqual, - expected=3, ) - - # GH5206 - df = pd.DataFrame(np.arange(25).reshape(5, 5), - columns='A,B,C,D,E'.split(','), dtype=float) - df['F'] = 99 - row_selection = df['A'] % 2 == 0 - col_selection = ['B', 'C'] - with catch_warnings(record=True): - df.ix[row_selection, col_selection] = df['F'] - output = pd.DataFrame(99., index=[0, 2, 4], columns=['B', 'C']) - with catch_warnings(record=True): - tm.assert_frame_equal(df.ix[row_selection, col_selection], - output) - check(target=df, - indexers=(row_selection, col_selection), - value=df['F'], - compare_fn=tm.assert_frame_equal, - expected=output, ) - - # GH11372 - idx = pd.MultiIndex.from_product([ - ['A', 'B', 'C'], - pd.date_range('2015-01-01', '2015-04-01', freq='MS')]) - cols = pd.MultiIndex.from_product([ - ['foo', 'bar'], - pd.date_range('2016-01-01', '2016-02-01', freq='MS')]) - - df = pd.DataFrame(np.random.random((12, 4)), - index=idx, columns=cols) - - subidx = pd.MultiIndex.from_tuples( - [('A', pd.Timestamp('2015-01-01')), - ('A', pd.Timestamp('2015-02-01'))]) - subcols = pd.MultiIndex.from_tuples( - [('foo', pd.Timestamp('2016-01-01')), - ('foo', pd.Timestamp('2016-02-01'))]) - - vals = pd.DataFrame(np.random.random((2, 2)), - index=subidx, columns=subcols) - check(target=df, - indexers=(subidx, subcols), - value=vals, - compare_fn=tm.assert_frame_equal, ) - # set all columns - vals = pd.DataFrame( - np.random.random((2, 4)), index=subidx, columns=cols) - check(target=df, - indexers=(subidx, slice(None, None, None)), - value=vals, - compare_fn=tm.assert_frame_equal, ) - # identity - copy = df.copy() - check(target=df, indexers=(df.index, df.columns), value=df, - compare_fn=tm.assert_frame_equal, expected=copy) - - def test_indexing_with_datetime_tz(self): - - # 8260 - # support datetime64 with tz - - idx = Index(date_range('20130101', periods=3, tz='US/Eastern'), - name='foo') - dr = date_range('20130110', periods=3) - df = DataFrame({'A': idx, 'B': dr}) - df['C'] = idx - df.iloc[1, 1] = pd.NaT - df.iloc[1, 2] = pd.NaT - - # indexing - result = df.iloc[1] - expected = Series([Timestamp('2013-01-02 00:00:00-0500', - tz='US/Eastern'), np.nan, np.nan], - index=list('ABC'), dtype='object', name=1) - tm.assert_series_equal(result, expected) - result = df.loc[1] - expected = Series([Timestamp('2013-01-02 00:00:00-0500', - tz='US/Eastern'), np.nan, np.nan], - index=list('ABC'), dtype='object', name=1) - tm.assert_series_equal(result, expected) - - # indexing - fast_xs - df = DataFrame({'a': date_range('2014-01-01', periods=10, tz='UTC')}) - result = df.iloc[5] - expected = Timestamp('2014-01-06 00:00:00+0000', tz='UTC', freq='D') - self.assertEqual(result, expected) - - result = df.loc[5] - self.assertEqual(result, expected) - - # indexing - boolean - result = df[df.a > df.a[3]] - expected = df.iloc[4:] - tm.assert_frame_equal(result, expected) - - # indexing - setting an element - df = DataFrame(data=pd.to_datetime( - ['2015-03-30 20:12:32', '2015-03-12 00:11:11']), columns=['time']) - df['new_col'] = ['new', 'old'] - df.time = df.set_index('time').index.tz_localize('UTC') - v = df[df.new_col == 'new'].set_index('time').index.tz_convert( - 'US/Pacific') - - # trying to set a single element on a part of a different timezone - def f(): - df.loc[df.new_col == 'new', 'time'] = v - - self.assertRaises(ValueError, f) - - v = df.loc[df.new_col == 'new', 'time'] + pd.Timedelta('1s') - df.loc[df.new_col == 'new', 'time'] = v - tm.assert_series_equal(df.loc[df.new_col == 'new', 'time'], v) - - def test_indexing_with_datetimeindex_tz(self): - - # GH 12050 - # indexing on a series with a datetimeindex with tz - index = pd.date_range('2015-01-01', periods=2, tz='utc') - - ser = pd.Series(range(2), index=index, - dtype='int64') - - # list-like indexing - - for sel in (index, list(index)): - # getitem - tm.assert_series_equal(ser[sel], ser) - - # setitem - result = ser.copy() - result[sel] = 1 - expected = pd.Series(1, index=index) - tm.assert_series_equal(result, expected) - - # .loc getitem - tm.assert_series_equal(ser.loc[sel], ser) - - # .loc setitem - result = ser.copy() - result.loc[sel] = 1 - expected = pd.Series(1, index=index) - tm.assert_series_equal(result, expected) - - # single element indexing - - # getitem - self.assertEqual(ser[index[1]], 1) - - # setitem - result = ser.copy() - result[index[1]] = 5 - expected = pd.Series([0, 5], index=index) - tm.assert_series_equal(result, expected) - - # .loc getitem - self.assertEqual(ser.loc[index[1]], 1) - - # .loc setitem - result = ser.copy() - result.loc[index[1]] = 5 - expected = pd.Series([0, 5], index=index) - tm.assert_series_equal(result, expected) - def test_loc_setitem_dups(self): # GH 6541 @@ -1241,28 +996,6 @@ def test_loc_getitem_label_array_like(self): self.check_result('array like', 'loc', Series(index=[4, 8, 12]).index, 'ix', [4, 8, 12], typs=['ints', 'uints'], axes=2) - def test_loc_getitem_series(self): - # GH14730 - # passing a series as a key with a MultiIndex - index = MultiIndex.from_product([[1, 2, 3], ['A', 'B', 'C']]) - x = Series(index=index, data=range(9), dtype=np.float64) - y = Series([1, 3]) - expected = Series( - data=[0, 1, 2, 6, 7, 8], - index=MultiIndex.from_product([[1, 3], ['A', 'B', 'C']]), - dtype=np.float64) - result = x.loc[y] - tm.assert_series_equal(result, expected) - - result = x.loc[[1, 3]] - tm.assert_series_equal(result, expected) - - empty = Series(data=[], dtype=np.float64) - expected = Series([], index=MultiIndex( - levels=index.levels, labels=[[], []], dtype=np.float64)) - result = x.loc[empty] - tm.assert_series_equal(result, expected) - def test_loc_getitem_bool(self): # boolean indexers b = [True, False, True, False] @@ -1700,136 +1433,6 @@ def test_iloc_getitem_labelled_frame(self): # trying to use a label self.assertRaises(ValueError, df.iloc.__getitem__, tuple(['j', 'D'])) - def test_iloc_getitem_panel(self): - - # GH 7189 - p = Panel(np.arange(4 * 3 * 2).reshape(4, 3, 2), - items=['A', 'B', 'C', 'D'], - major_axis=['a', 'b', 'c'], - minor_axis=['one', 'two']) - - result = p.iloc[1] - expected = p.loc['B'] - tm.assert_frame_equal(result, expected) - - result = p.iloc[1, 1] - expected = p.loc['B', 'b'] - tm.assert_series_equal(result, expected) - - result = p.iloc[1, 1, 1] - expected = p.loc['B', 'b', 'two'] - self.assertEqual(result, expected) - - # slice - result = p.iloc[1:3] - expected = p.loc[['B', 'C']] - tm.assert_panel_equal(result, expected) - - result = p.iloc[:, 0:2] - expected = p.loc[:, ['a', 'b']] - tm.assert_panel_equal(result, expected) - - # list of integers - result = p.iloc[[0, 2]] - expected = p.loc[['A', 'C']] - tm.assert_panel_equal(result, expected) - - # neg indicies - result = p.iloc[[-1, 1], [-1, 1]] - expected = p.loc[['D', 'B'], ['c', 'b']] - tm.assert_panel_equal(result, expected) - - # dups indicies - result = p.iloc[[-1, -1, 1], [-1, 1]] - expected = p.loc[['D', 'D', 'B'], ['c', 'b']] - tm.assert_panel_equal(result, expected) - - # combined - result = p.iloc[0, [True, True], [0, 1]] - expected = p.loc['A', ['a', 'b'], ['one', 'two']] - tm.assert_frame_equal(result, expected) - - # out-of-bounds exception - self.assertRaises(IndexError, p.iloc.__getitem__, tuple([10, 5])) - - def f(): - p.iloc[0, [True, True], [0, 1, 2]] - - self.assertRaises(IndexError, f) - - # trying to use a label - self.assertRaises(ValueError, p.iloc.__getitem__, tuple(['j', 'D'])) - - # GH - p = Panel( - np.random.rand(4, 3, 2), items=['A', 'B', 'C', 'D'], - major_axis=['U', 'V', 'W'], minor_axis=['X', 'Y']) - expected = p['A'] - - result = p.iloc[0, :, :] - tm.assert_frame_equal(result, expected) - - result = p.iloc[0, [True, True, True], :] - tm.assert_frame_equal(result, expected) - - result = p.iloc[0, [True, True, True], [0, 1]] - tm.assert_frame_equal(result, expected) - - def f(): - p.iloc[0, [True, True, True], [0, 1, 2]] - - self.assertRaises(IndexError, f) - - def f(): - p.iloc[0, [True, True, True], [2]] - - self.assertRaises(IndexError, f) - - def test_iloc_getitem_panel_multiindex(self): - # GH 7199 - # Panel with multi-index - multi_index = pd.MultiIndex.from_tuples([('ONE', 'one'), - ('TWO', 'two'), - ('THREE', 'three')], - names=['UPPER', 'lower']) - - simple_index = [x[0] for x in multi_index] - wd1 = Panel(items=['First', 'Second'], major_axis=['a', 'b', 'c', 'd'], - minor_axis=multi_index) - - wd2 = Panel(items=['First', 'Second'], major_axis=['a', 'b', 'c', 'd'], - minor_axis=simple_index) - - expected1 = wd1['First'].iloc[[True, True, True, False], [0, 2]] - result1 = wd1.iloc[0, [True, True, True, False], [0, 2]] # WRONG - tm.assert_frame_equal(result1, expected1) - - expected2 = wd2['First'].iloc[[True, True, True, False], [0, 2]] - result2 = wd2.iloc[0, [True, True, True, False], [0, 2]] - tm.assert_frame_equal(result2, expected2) - - expected1 = DataFrame(index=['a'], columns=multi_index, - dtype='float64') - result1 = wd1.iloc[0, [0], [0, 1, 2]] - tm.assert_frame_equal(result1, expected1) - - expected2 = DataFrame(index=['a'], columns=simple_index, - dtype='float64') - result2 = wd2.iloc[0, [0], [0, 1, 2]] - tm.assert_frame_equal(result2, expected2) - - # GH 7516 - mi = MultiIndex.from_tuples([(0, 'x'), (1, 'y'), (2, 'z')]) - p = Panel(np.arange(3 * 3 * 3, dtype='int64').reshape(3, 3, 3), - items=['a', 'b', 'c'], major_axis=mi, - minor_axis=['u', 'v', 'w']) - result = p.iloc[:, 1, 0] - expected = Series([3, 12, 21], index=['a', 'b', 'c'], name='u') - tm.assert_series_equal(result, expected) - - result = p.loc[:, (1, 'y'), 'u'] - tm.assert_series_equal(result, expected) - def test_iloc_getitem_doc_issue(self): # multi axis slicing issue with single block @@ -1956,929 +1559,90 @@ def test_iloc_setitem_list_of_lists(self): expected = DataFrame(dict(A=[0, 1, 10, 12, 4], B=[5, 6, 11, 13, 9])) tm.assert_frame_equal(df, expected) - df = DataFrame( - dict(A=list('abcde'), B=np.arange(5, 10, dtype='int64'))) - df.iloc[2:4] = [['x', 11], ['y', 13]] - expected = DataFrame(dict(A=['a', 'b', 'x', 'y', 'e'], - B=[5, 6, 11, 13, 9])) - tm.assert_frame_equal(df, expected) - - def test_iloc_getitem_multiindex(self): - mi_labels = DataFrame(np.random.randn(4, 3), - columns=[['i', 'i', 'j'], ['A', 'A', 'B']], - index=[['i', 'i', 'j', 'k'], - ['X', 'X', 'Y', 'Y']]) - - mi_int = DataFrame(np.random.randn(3, 3), - columns=[[2, 2, 4], [6, 8, 10]], - index=[[4, 4, 8], [8, 10, 12]]) - - # the first row - rs = mi_int.iloc[0] - with catch_warnings(record=True): - xp = mi_int.ix[4].ix[8] - tm.assert_series_equal(rs, xp, check_names=False) - self.assertEqual(rs.name, (4, 8)) - self.assertEqual(xp.name, 8) - - # 2nd (last) columns - rs = mi_int.iloc[:, 2] - with catch_warnings(record=True): - xp = mi_int.ix[:, 2] - tm.assert_series_equal(rs, xp) - - # corner column - rs = mi_int.iloc[2, 2] - with catch_warnings(record=True): - xp = mi_int.ix[:, 2].ix[2] - self.assertEqual(rs, xp) - - # this is basically regular indexing - rs = mi_labels.iloc[2, 2] - with catch_warnings(record=True): - xp = mi_labels.ix['j'].ix[:, 'j'].ix[0, 0] - self.assertEqual(rs, xp) - - def test_loc_multiindex(self): - - mi_labels = DataFrame(np.random.randn(3, 3), - columns=[['i', 'i', 'j'], ['A', 'A', 'B']], - index=[['i', 'i', 'j'], ['X', 'X', 'Y']]) - - mi_int = DataFrame(np.random.randn(3, 3), - columns=[[2, 2, 4], [6, 8, 10]], - index=[[4, 4, 8], [8, 10, 12]]) - - # the first row - rs = mi_labels.loc['i'] - with catch_warnings(record=True): - xp = mi_labels.ix['i'] - tm.assert_frame_equal(rs, xp) - - # 2nd (last) columns - rs = mi_labels.loc[:, 'j'] - with catch_warnings(record=True): - xp = mi_labels.ix[:, 'j'] - tm.assert_frame_equal(rs, xp) - - # corner column - rs = mi_labels.loc['j'].loc[:, 'j'] - with catch_warnings(record=True): - xp = mi_labels.ix['j'].ix[:, 'j'] - tm.assert_frame_equal(rs, xp) - - # with a tuple - rs = mi_labels.loc[('i', 'X')] - with catch_warnings(record=True): - xp = mi_labels.ix[('i', 'X')] - tm.assert_frame_equal(rs, xp) - - rs = mi_int.loc[4] - with catch_warnings(record=True): - xp = mi_int.ix[4] - tm.assert_frame_equal(rs, xp) - - def test_loc_multiindex_indexer_none(self): - - # GH6788 - # multi-index indexer is None (meaning take all) - attributes = ['Attribute' + str(i) for i in range(1)] - attribute_values = ['Value' + str(i) for i in range(5)] - - index = MultiIndex.from_product([attributes, attribute_values]) - df = 0.1 * np.random.randn(10, 1 * 5) + 0.5 - df = DataFrame(df, columns=index) - result = df[attributes] - tm.assert_frame_equal(result, df) - - # GH 7349 - # loc with a multi-index seems to be doing fallback - df = DataFrame(np.arange(12).reshape(-1, 1), - index=pd.MultiIndex.from_product([[1, 2, 3, 4], - [1, 2, 3]])) - - expected = df.loc[([1, 2], ), :] - result = df.loc[[1, 2]] - tm.assert_frame_equal(result, expected) - - def test_loc_multiindex_incomplete(self): - - # GH 7399 - # incomplete indexers - s = pd.Series(np.arange(15, dtype='int64'), - MultiIndex.from_product([range(5), ['a', 'b', 'c']])) - expected = s.loc[:, 'a':'c'] - - result = s.loc[0:4, 'a':'c'] - tm.assert_series_equal(result, expected) - tm.assert_series_equal(result, expected) - - result = s.loc[:4, 'a':'c'] - tm.assert_series_equal(result, expected) - tm.assert_series_equal(result, expected) - - result = s.loc[0:, 'a':'c'] - tm.assert_series_equal(result, expected) - tm.assert_series_equal(result, expected) - - # GH 7400 - # multiindexer gettitem with list of indexers skips wrong element - s = pd.Series(np.arange(15, dtype='int64'), - MultiIndex.from_product([range(5), ['a', 'b', 'c']])) - expected = s.iloc[[6, 7, 8, 12, 13, 14]] - result = s.loc[2:4:2, 'a':'c'] - tm.assert_series_equal(result, expected) - - def test_multiindex_perf_warn(self): - - if sys.version_info < (2, 7): - raise nose.SkipTest('python version < 2.7') - - df = DataFrame({'jim': [0, 0, 1, 1], - 'joe': ['x', 'x', 'z', 'y'], - 'jolie': np.random.rand(4)}).set_index(['jim', 'joe']) - - with tm.assert_produces_warning(PerformanceWarning, - clear=[pd.core.index]): - df.loc[(1, 'z')] - - df = df.iloc[[2, 1, 3, 0]] - with tm.assert_produces_warning(PerformanceWarning): - df.loc[(0, )] - - def test_series_getitem_multiindex(self): - - # GH 6018 - # series regression getitem with a multi-index - - s = Series([1, 2, 3]) - s.index = MultiIndex.from_tuples([(0, 0), (1, 1), (2, 1)]) - - result = s[:, 0] - expected = Series([1], index=[0]) - tm.assert_series_equal(result, expected) - - result = s.loc[:, 1] - expected = Series([2, 3], index=[1, 2]) - tm.assert_series_equal(result, expected) - - # xs - result = s.xs(0, level=0) - expected = Series([1], index=[0]) - tm.assert_series_equal(result, expected) - - result = s.xs(1, level=1) - expected = Series([2, 3], index=[1, 2]) - tm.assert_series_equal(result, expected) - - # GH6258 - dt = list(date_range('20130903', periods=3)) - idx = MultiIndex.from_product([list('AB'), dt]) - s = Series([1, 3, 4, 1, 3, 4], index=idx) - - result = s.xs('20130903', level=1) - expected = Series([1, 1], index=list('AB')) - tm.assert_series_equal(result, expected) - - # GH5684 - idx = MultiIndex.from_tuples([('a', 'one'), ('a', 'two'), ('b', 'one'), - ('b', 'two')]) - s = Series([1, 2, 3, 4], index=idx) - s.index.set_names(['L1', 'L2'], inplace=True) - result = s.xs('one', level='L2') - expected = Series([1, 3], index=['a', 'b']) - expected.index.set_names(['L1'], inplace=True) - tm.assert_series_equal(result, expected) - - def test_ix_general(self): - - # ix general issues - - # GH 2817 - data = {'amount': {0: 700, 1: 600, 2: 222, 3: 333, 4: 444}, - 'col': {0: 3.5, 1: 3.5, 2: 4.0, 3: 4.0, 4: 4.0}, - 'year': {0: 2012, 1: 2011, 2: 2012, 3: 2012, 4: 2012}} - df = DataFrame(data).set_index(keys=['col', 'year']) - key = 4.0, 2012 - - # emits a PerformanceWarning, ok - with self.assert_produces_warning(PerformanceWarning): - tm.assert_frame_equal(df.loc[key], df.iloc[2:]) - - # this is ok - df.sort_index(inplace=True) - res = df.loc[key] - - # col has float dtype, result should be Float64Index - index = MultiIndex.from_arrays([[4.] * 3, [2012] * 3], - names=['col', 'year']) - expected = DataFrame({'amount': [222, 333, 444]}, index=index) - tm.assert_frame_equal(res, expected) - - def test_ix_weird_slicing(self): - # http://stackoverflow.com/q/17056560/1240268 - df = DataFrame({'one': [1, 2, 3, np.nan, np.nan], - 'two': [1, 2, 3, 4, 5]}) - df.loc[df['one'] > 1, 'two'] = -df['two'] - - expected = DataFrame({'one': {0: 1.0, - 1: 2.0, - 2: 3.0, - 3: nan, - 4: nan}, - 'two': {0: 1, - 1: -2, - 2: -3, - 3: 4, - 4: 5}}) - tm.assert_frame_equal(df, expected) - - def test_xs_multiindex(self): - - # GH2903 - columns = MultiIndex.from_tuples( - [('a', 'foo'), ('a', 'bar'), ('b', 'hello'), - ('b', 'world')], names=['lvl0', 'lvl1']) - df = DataFrame(np.random.randn(4, 4), columns=columns) - df.sort_index(axis=1, inplace=True) - result = df.xs('a', level='lvl0', axis=1) - expected = df.iloc[:, 0:2].loc[:, 'a'] - tm.assert_frame_equal(result, expected) - - result = df.xs('foo', level='lvl1', axis=1) - expected = df.iloc[:, 1:2].copy() - expected.columns = expected.columns.droplevel('lvl1') - tm.assert_frame_equal(result, expected) - - def test_per_axis_per_level_getitem(self): - - # GH6134 - # example test case - ix = MultiIndex.from_product([_mklbl('A', 5), _mklbl('B', 7), _mklbl( - 'C', 4), _mklbl('D', 2)]) - df = DataFrame(np.arange(len(ix.get_values())), index=ix) - - result = df.loc[(slice('A1', 'A3'), slice(None), ['C1', 'C3']), :] - expected = df.loc[[tuple([a, b, c, d]) - for a, b, c, d in df.index.values - if (a == 'A1' or a == 'A2' or a == 'A3') and ( - c == 'C1' or c == 'C3')]] - tm.assert_frame_equal(result, expected) - - expected = df.loc[[tuple([a, b, c, d]) - for a, b, c, d in df.index.values - if (a == 'A1' or a == 'A2' or a == 'A3') and ( - c == 'C1' or c == 'C2' or c == 'C3')]] - result = df.loc[(slice('A1', 'A3'), slice(None), slice('C1', 'C3')), :] - tm.assert_frame_equal(result, expected) - - # test multi-index slicing with per axis and per index controls - index = MultiIndex.from_tuples([('A', 1), ('A', 2), - ('A', 3), ('B', 1)], - names=['one', 'two']) - columns = MultiIndex.from_tuples([('a', 'foo'), ('a', 'bar'), - ('b', 'foo'), ('b', 'bah')], - names=['lvl0', 'lvl1']) - - df = DataFrame( - np.arange(16, dtype='int64').reshape( - 4, 4), index=index, columns=columns) - df = df.sort_index(axis=0).sort_index(axis=1) - - # identity - result = df.loc[(slice(None), slice(None)), :] - tm.assert_frame_equal(result, df) - result = df.loc[(slice(None), slice(None)), (slice(None), slice(None))] - tm.assert_frame_equal(result, df) - result = df.loc[:, (slice(None), slice(None))] - tm.assert_frame_equal(result, df) - - # index - result = df.loc[(slice(None), [1]), :] - expected = df.iloc[[0, 3]] - tm.assert_frame_equal(result, expected) - - result = df.loc[(slice(None), 1), :] - expected = df.iloc[[0, 3]] - tm.assert_frame_equal(result, expected) - - # columns - result = df.loc[:, (slice(None), ['foo'])] - expected = df.iloc[:, [1, 3]] - tm.assert_frame_equal(result, expected) - - # both - result = df.loc[(slice(None), 1), (slice(None), ['foo'])] - expected = df.iloc[[0, 3], [1, 3]] - tm.assert_frame_equal(result, expected) - - result = df.loc['A', 'a'] - expected = DataFrame(dict(bar=[1, 5, 9], foo=[0, 4, 8]), - index=Index([1, 2, 3], name='two'), - columns=Index(['bar', 'foo'], name='lvl1')) - tm.assert_frame_equal(result, expected) - - result = df.loc[(slice(None), [1, 2]), :] - expected = df.iloc[[0, 1, 3]] - tm.assert_frame_equal(result, expected) - - # multi-level series - s = Series(np.arange(len(ix.get_values())), index=ix) - result = s.loc['A1':'A3', :, ['C1', 'C3']] - expected = s.loc[[tuple([a, b, c, d]) - for a, b, c, d in s.index.values - if (a == 'A1' or a == 'A2' or a == 'A3') and ( - c == 'C1' or c == 'C3')]] - tm.assert_series_equal(result, expected) - - # boolean indexers - result = df.loc[(slice(None), df.loc[:, ('a', 'bar')] > 5), :] - expected = df.iloc[[2, 3]] - tm.assert_frame_equal(result, expected) - - def f(): - df.loc[(slice(None), np.array([True, False])), :] - - self.assertRaises(ValueError, f) - - # ambiguous cases - # these can be multiply interpreted (e.g. in this case - # as df.loc[slice(None),[1]] as well - self.assertRaises(KeyError, lambda: df.loc[slice(None), [1]]) - - result = df.loc[(slice(None), [1]), :] - expected = df.iloc[[0, 3]] - tm.assert_frame_equal(result, expected) - - # not lexsorted - self.assertEqual(df.index.lexsort_depth, 2) - df = df.sort_index(level=1, axis=0) - self.assertEqual(df.index.lexsort_depth, 0) - with tm.assertRaisesRegexp( - UnsortedIndexError, - 'MultiIndex Slicing requires the index to be fully ' - r'lexsorted tuple len \(2\), lexsort depth \(0\)'): - df.loc[(slice(None), df.loc[:, ('a', 'bar')] > 5), :] - - def test_multiindex_slicers_non_unique(self): - - # GH 7106 - # non-unique mi index support - df = (DataFrame(dict(A=['foo', 'foo', 'foo', 'foo'], - B=['a', 'a', 'a', 'a'], - C=[1, 2, 1, 3], - D=[1, 2, 3, 4])) - .set_index(['A', 'B', 'C']).sort_index()) - self.assertFalse(df.index.is_unique) - expected = (DataFrame(dict(A=['foo', 'foo'], B=['a', 'a'], - C=[1, 1], D=[1, 3])) - .set_index(['A', 'B', 'C']).sort_index()) - result = df.loc[(slice(None), slice(None), 1), :] - tm.assert_frame_equal(result, expected) - - # this is equivalent of an xs expression - result = df.xs(1, level=2, drop_level=False) - tm.assert_frame_equal(result, expected) - - df = (DataFrame(dict(A=['foo', 'foo', 'foo', 'foo'], - B=['a', 'a', 'a', 'a'], - C=[1, 2, 1, 2], - D=[1, 2, 3, 4])) - .set_index(['A', 'B', 'C']).sort_index()) - self.assertFalse(df.index.is_unique) - expected = (DataFrame(dict(A=['foo', 'foo'], B=['a', 'a'], - C=[1, 1], D=[1, 3])) - .set_index(['A', 'B', 'C']).sort_index()) - result = df.loc[(slice(None), slice(None), 1), :] - self.assertFalse(result.index.is_unique) - tm.assert_frame_equal(result, expected) - - # GH12896 - # numpy-implementation dependent bug - ints = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 12, 13, 14, 14, 16, - 17, 18, 19, 200000, 200000] - n = len(ints) - idx = MultiIndex.from_arrays([['a'] * n, ints]) - result = Series([1] * n, index=idx) - result = result.sort_index() - result = result.loc[(slice(None), slice(100000))] - expected = Series([1] * (n - 2), index=idx[:-2]).sort_index() - tm.assert_series_equal(result, expected) - - def test_multiindex_slicers_datetimelike(self): - - # GH 7429 - # buggy/inconsistent behavior when slicing with datetime-like - import datetime - dates = [datetime.datetime(2012, 1, 1, 12, 12, 12) + - datetime.timedelta(days=i) for i in range(6)] - freq = [1, 2] - index = MultiIndex.from_product( - [dates, freq], names=['date', 'frequency']) - - df = DataFrame( - np.arange(6 * 2 * 4, dtype='int64').reshape( - -1, 4), index=index, columns=list('ABCD')) - - # multi-axis slicing - idx = pd.IndexSlice - expected = df.iloc[[0, 2, 4], [0, 1]] - result = df.loc[(slice(Timestamp('2012-01-01 12:12:12'), - Timestamp('2012-01-03 12:12:12')), - slice(1, 1)), slice('A', 'B')] - tm.assert_frame_equal(result, expected) - - result = df.loc[(idx[Timestamp('2012-01-01 12:12:12'):Timestamp( - '2012-01-03 12:12:12')], idx[1:1]), slice('A', 'B')] - tm.assert_frame_equal(result, expected) - - result = df.loc[(slice(Timestamp('2012-01-01 12:12:12'), - Timestamp('2012-01-03 12:12:12')), 1), - slice('A', 'B')] - tm.assert_frame_equal(result, expected) - - # with strings - result = df.loc[(slice('2012-01-01 12:12:12', '2012-01-03 12:12:12'), - slice(1, 1)), slice('A', 'B')] - tm.assert_frame_equal(result, expected) - - result = df.loc[(idx['2012-01-01 12:12:12':'2012-01-03 12:12:12'], 1), - idx['A', 'B']] - tm.assert_frame_equal(result, expected) - - def test_multiindex_slicers_edges(self): - # GH 8132 - # various edge cases - df = DataFrame( - {'A': ['A0'] * 5 + ['A1'] * 5 + ['A2'] * 5, - 'B': ['B0', 'B0', 'B1', 'B1', 'B2'] * 3, - 'DATE': ["2013-06-11", "2013-07-02", "2013-07-09", "2013-07-30", - "2013-08-06", "2013-06-11", "2013-07-02", "2013-07-09", - "2013-07-30", "2013-08-06", "2013-09-03", "2013-10-01", - "2013-07-09", "2013-08-06", "2013-09-03"], - 'VALUES': [22, 35, 14, 9, 4, 40, 18, 4, 2, 5, 1, 2, 3, 4, 2]}) - - df['DATE'] = pd.to_datetime(df['DATE']) - df1 = df.set_index(['A', 'B', 'DATE']) - df1 = df1.sort_index() - - # A1 - Get all values under "A0" and "A1" - result = df1.loc[(slice('A1')), :] - expected = df1.iloc[0:10] - tm.assert_frame_equal(result, expected) - - # A2 - Get all values from the start to "A2" - result = df1.loc[(slice('A2')), :] - expected = df1 - tm.assert_frame_equal(result, expected) - - # A3 - Get all values under "B1" or "B2" - result = df1.loc[(slice(None), slice('B1', 'B2')), :] - expected = df1.iloc[[2, 3, 4, 7, 8, 9, 12, 13, 14]] - tm.assert_frame_equal(result, expected) - - # A4 - Get all values between 2013-07-02 and 2013-07-09 - result = df1.loc[(slice(None), slice(None), - slice('20130702', '20130709')), :] - expected = df1.iloc[[1, 2, 6, 7, 12]] - tm.assert_frame_equal(result, expected) - - # B1 - Get all values in B0 that are also under A0, A1 and A2 - result = df1.loc[(slice('A2'), slice('B0')), :] - expected = df1.iloc[[0, 1, 5, 6, 10, 11]] - tm.assert_frame_equal(result, expected) - - # B2 - Get all values in B0, B1 and B2 (similar to what #2 is doing for - # the As) - result = df1.loc[(slice(None), slice('B2')), :] - expected = df1 - tm.assert_frame_equal(result, expected) - - # B3 - Get all values from B1 to B2 and up to 2013-08-06 - result = df1.loc[(slice(None), slice('B1', 'B2'), - slice('2013-08-06')), :] - expected = df1.iloc[[2, 3, 4, 7, 8, 9, 12, 13]] - tm.assert_frame_equal(result, expected) - - # B4 - Same as A4 but the start of the date slice is not a key. - # shows indexing on a partial selection slice - result = df1.loc[(slice(None), slice(None), - slice('20130701', '20130709')), :] - expected = df1.iloc[[1, 2, 6, 7, 12]] - tm.assert_frame_equal(result, expected) - - def test_per_axis_per_level_doc_examples(self): - - # test index maker - idx = pd.IndexSlice - - # from indexing.rst / advanced - index = MultiIndex.from_product([_mklbl('A', 4), _mklbl('B', 2), - _mklbl('C', 4), _mklbl('D', 2)]) - columns = MultiIndex.from_tuples([('a', 'foo'), ('a', 'bar'), - ('b', 'foo'), ('b', 'bah')], - names=['lvl0', 'lvl1']) - df = DataFrame(np.arange(len(index) * len(columns), dtype='int64') - .reshape((len(index), len(columns))), - index=index, columns=columns) - result = df.loc[(slice('A1', 'A3'), slice(None), ['C1', 'C3']), :] - expected = df.loc[[tuple([a, b, c, d]) - for a, b, c, d in df.index.values - if (a == 'A1' or a == 'A2' or a == 'A3') and ( - c == 'C1' or c == 'C3')]] - tm.assert_frame_equal(result, expected) - result = df.loc[idx['A1':'A3', :, ['C1', 'C3']], :] - tm.assert_frame_equal(result, expected) - - result = df.loc[(slice(None), slice(None), ['C1', 'C3']), :] - expected = df.loc[[tuple([a, b, c, d]) - for a, b, c, d in df.index.values - if (c == 'C1' or c == 'C3')]] - tm.assert_frame_equal(result, expected) - result = df.loc[idx[:, :, ['C1', 'C3']], :] - tm.assert_frame_equal(result, expected) - - # not sorted - def f(): - df.loc['A1', (slice(None), 'foo')] - - self.assertRaises(UnsortedIndexError, f) - df = df.sort_index(axis=1) - - # slicing - df.loc['A1', (slice(None), 'foo')] - df.loc[(slice(None), slice(None), ['C1', 'C3']), (slice(None), 'foo')] - - # setitem - df.loc(axis=0)[:, :, ['C1', 'C3']] = -10 - - def test_loc_axis_arguments(self): - - index = MultiIndex.from_product([_mklbl('A', 4), _mklbl('B', 2), - _mklbl('C', 4), _mklbl('D', 2)]) - columns = MultiIndex.from_tuples([('a', 'foo'), ('a', 'bar'), - ('b', 'foo'), ('b', 'bah')], - names=['lvl0', 'lvl1']) - df = DataFrame(np.arange(len(index) * len(columns), dtype='int64') - .reshape((len(index), len(columns))), - index=index, - columns=columns).sort_index().sort_index(axis=1) - - # axis 0 - result = df.loc(axis=0)['A1':'A3', :, ['C1', 'C3']] - expected = df.loc[[tuple([a, b, c, d]) - for a, b, c, d in df.index.values - if (a == 'A1' or a == 'A2' or a == 'A3') and ( - c == 'C1' or c == 'C3')]] - tm.assert_frame_equal(result, expected) - - result = df.loc(axis='index')[:, :, ['C1', 'C3']] - expected = df.loc[[tuple([a, b, c, d]) - for a, b, c, d in df.index.values - if (c == 'C1' or c == 'C3')]] - tm.assert_frame_equal(result, expected) - - # axis 1 - result = df.loc(axis=1)[:, 'foo'] - expected = df.loc[:, (slice(None), 'foo')] - tm.assert_frame_equal(result, expected) - - result = df.loc(axis='columns')[:, 'foo'] - expected = df.loc[:, (slice(None), 'foo')] - tm.assert_frame_equal(result, expected) - - # invalid axis - def f(): - df.loc(axis=-1)[:, :, ['C1', 'C3']] - - self.assertRaises(ValueError, f) - - def f(): - df.loc(axis=2)[:, :, ['C1', 'C3']] - - self.assertRaises(ValueError, f) - - def f(): - df.loc(axis='foo')[:, :, ['C1', 'C3']] - - self.assertRaises(ValueError, f) - - def test_loc_coerceion(self): - - # 12411 - df = DataFrame({'date': [pd.Timestamp('20130101').tz_localize('UTC'), - pd.NaT]}) - expected = df.dtypes - - result = df.iloc[[0]] - tm.assert_series_equal(result.dtypes, expected) - - result = df.iloc[[1]] - tm.assert_series_equal(result.dtypes, expected) - - # 12045 - import datetime - df = DataFrame({'date': [datetime.datetime(2012, 1, 1), - datetime.datetime(1012, 1, 2)]}) - expected = df.dtypes - - result = df.iloc[[0]] - tm.assert_series_equal(result.dtypes, expected) - - result = df.iloc[[1]] - tm.assert_series_equal(result.dtypes, expected) - - # 11594 - df = DataFrame({'text': ['some words'] + [None] * 9}) - expected = df.dtypes - - result = df.iloc[0:2] - tm.assert_series_equal(result.dtypes, expected) - - result = df.iloc[3:] - tm.assert_series_equal(result.dtypes, expected) - - def test_per_axis_per_level_setitem(self): - - # test index maker - idx = pd.IndexSlice - - # test multi-index slicing with per axis and per index controls - index = MultiIndex.from_tuples([('A', 1), ('A', 2), - ('A', 3), ('B', 1)], - names=['one', 'two']) - columns = MultiIndex.from_tuples([('a', 'foo'), ('a', 'bar'), - ('b', 'foo'), ('b', 'bah')], - names=['lvl0', 'lvl1']) - - df_orig = DataFrame( - np.arange(16, dtype='int64').reshape( - 4, 4), index=index, columns=columns) - df_orig = df_orig.sort_index(axis=0).sort_index(axis=1) - - # identity - df = df_orig.copy() - df.loc[(slice(None), slice(None)), :] = 100 - expected = df_orig.copy() - expected.iloc[:, :] = 100 - tm.assert_frame_equal(df, expected) - - df = df_orig.copy() - df.loc(axis=0)[:, :] = 100 - expected = df_orig.copy() - expected.iloc[:, :] = 100 - tm.assert_frame_equal(df, expected) - - df = df_orig.copy() - df.loc[(slice(None), slice(None)), (slice(None), slice(None))] = 100 - expected = df_orig.copy() - expected.iloc[:, :] = 100 - tm.assert_frame_equal(df, expected) - - df = df_orig.copy() - df.loc[:, (slice(None), slice(None))] = 100 - expected = df_orig.copy() - expected.iloc[:, :] = 100 - tm.assert_frame_equal(df, expected) - - # index - df = df_orig.copy() - df.loc[(slice(None), [1]), :] = 100 - expected = df_orig.copy() - expected.iloc[[0, 3]] = 100 - tm.assert_frame_equal(df, expected) - - df = df_orig.copy() - df.loc[(slice(None), 1), :] = 100 - expected = df_orig.copy() - expected.iloc[[0, 3]] = 100 - tm.assert_frame_equal(df, expected) - - df = df_orig.copy() - df.loc(axis=0)[:, 1] = 100 - expected = df_orig.copy() - expected.iloc[[0, 3]] = 100 - tm.assert_frame_equal(df, expected) - - # columns - df = df_orig.copy() - df.loc[:, (slice(None), ['foo'])] = 100 - expected = df_orig.copy() - expected.iloc[:, [1, 3]] = 100 - tm.assert_frame_equal(df, expected) - - # both - df = df_orig.copy() - df.loc[(slice(None), 1), (slice(None), ['foo'])] = 100 - expected = df_orig.copy() - expected.iloc[[0, 3], [1, 3]] = 100 - tm.assert_frame_equal(df, expected) - - df = df_orig.copy() - df.loc[idx[:, 1], idx[:, ['foo']]] = 100 - expected = df_orig.copy() - expected.iloc[[0, 3], [1, 3]] = 100 - tm.assert_frame_equal(df, expected) - - df = df_orig.copy() - df.loc['A', 'a'] = 100 - expected = df_orig.copy() - expected.iloc[0:3, 0:2] = 100 - tm.assert_frame_equal(df, expected) - - # setting with a list-like - df = df_orig.copy() - df.loc[(slice(None), 1), (slice(None), ['foo'])] = np.array( - [[100, 100], [100, 100]], dtype='int64') - expected = df_orig.copy() - expected.iloc[[0, 3], [1, 3]] = 100 - tm.assert_frame_equal(df, expected) - - # not enough values - df = df_orig.copy() - - def f(): - df.loc[(slice(None), 1), (slice(None), ['foo'])] = np.array( - [[100], [100, 100]], dtype='int64') - - self.assertRaises(ValueError, f) - - def f(): - df.loc[(slice(None), 1), (slice(None), ['foo'])] = np.array( - [100, 100, 100, 100], dtype='int64') - - self.assertRaises(ValueError, f) - - # with an alignable rhs - df = df_orig.copy() - df.loc[(slice(None), 1), (slice(None), ['foo'])] = df.loc[(slice( - None), 1), (slice(None), ['foo'])] * 5 - expected = df_orig.copy() - expected.iloc[[0, 3], [1, 3]] = expected.iloc[[0, 3], [1, 3]] * 5 - tm.assert_frame_equal(df, expected) - - df = df_orig.copy() - df.loc[(slice(None), 1), (slice(None), ['foo'])] *= df.loc[(slice( - None), 1), (slice(None), ['foo'])] - expected = df_orig.copy() - expected.iloc[[0, 3], [1, 3]] *= expected.iloc[[0, 3], [1, 3]] - tm.assert_frame_equal(df, expected) - - rhs = df_orig.loc[(slice(None), 1), (slice(None), ['foo'])].copy() - rhs.loc[:, ('c', 'bah')] = 10 - df = df_orig.copy() - df.loc[(slice(None), 1), (slice(None), ['foo'])] *= rhs - expected = df_orig.copy() - expected.iloc[[0, 3], [1, 3]] *= expected.iloc[[0, 3], [1, 3]] + df = DataFrame( + dict(A=list('abcde'), B=np.arange(5, 10, dtype='int64'))) + df.iloc[2:4] = [['x', 11], ['y', 13]] + expected = DataFrame(dict(A=['a', 'b', 'x', 'y', 'e'], + B=[5, 6, 11, 13, 9])) tm.assert_frame_equal(df, expected) - def test_multiindex_setitem(self): - - # GH 3738 - # setting with a multi-index right hand side - arrays = [np.array(['bar', 'bar', 'baz', 'qux', 'qux', 'bar']), - np.array(['one', 'two', 'one', 'one', 'two', 'one']), - np.arange(0, 6, 1)] - - df_orig = pd.DataFrame(np.random.randn(6, 3), - index=arrays, - columns=['A', 'B', 'C']).sort_index() + def test_ix_general(self): - expected = df_orig.loc[['bar']] * 2 - df = df_orig.copy() - df.loc[['bar']] *= 2 - tm.assert_frame_equal(df.loc[['bar']], expected) + # ix general issues - # raise because these have differing levels - def f(): - df.loc['bar'] *= 2 + # GH 2817 + data = {'amount': {0: 700, 1: 600, 2: 222, 3: 333, 4: 444}, + 'col': {0: 3.5, 1: 3.5, 2: 4.0, 3: 4.0, 4: 4.0}, + 'year': {0: 2012, 1: 2011, 2: 2012, 3: 2012, 4: 2012}} + df = DataFrame(data).set_index(keys=['col', 'year']) + key = 4.0, 2012 - self.assertRaises(TypeError, f) + # emits a PerformanceWarning, ok + with self.assert_produces_warning(PerformanceWarning): + tm.assert_frame_equal(df.loc[key], df.iloc[2:]) - # from SO - # http://stackoverflow.com/questions/24572040/pandas-access-the-level-of-multiindex-for-inplace-operation - df_orig = DataFrame.from_dict({'price': { - ('DE', 'Coal', 'Stock'): 2, - ('DE', 'Gas', 'Stock'): 4, - ('DE', 'Elec', 'Demand'): 1, - ('FR', 'Gas', 'Stock'): 5, - ('FR', 'Solar', 'SupIm'): 0, - ('FR', 'Wind', 'SupIm'): 0 - }}) - df_orig.index = MultiIndex.from_tuples(df_orig.index, - names=['Sit', 'Com', 'Type']) + # this is ok + df.sort_index(inplace=True) + res = df.loc[key] - expected = df_orig.copy() - expected.iloc[[0, 2, 3]] *= 2 + # col has float dtype, result should be Float64Index + index = MultiIndex.from_arrays([[4.] * 3, [2012] * 3], + names=['col', 'year']) + expected = DataFrame({'amount': [222, 333, 444]}, index=index) + tm.assert_frame_equal(res, expected) - idx = pd.IndexSlice - df = df_orig.copy() - df.loc[idx[:, :, 'Stock'], :] *= 2 - tm.assert_frame_equal(df, expected) + def test_ix_weird_slicing(self): + # http://stackoverflow.com/q/17056560/1240268 + df = DataFrame({'one': [1, 2, 3, np.nan, np.nan], + 'two': [1, 2, 3, 4, 5]}) + df.loc[df['one'] > 1, 'two'] = -df['two'] - df = df_orig.copy() - df.loc[idx[:, :, 'Stock'], 'price'] *= 2 + expected = DataFrame({'one': {0: 1.0, + 1: 2.0, + 2: 3.0, + 3: nan, + 4: nan}, + 'two': {0: 1, + 1: -2, + 2: -3, + 3: 4, + 4: 5}}) tm.assert_frame_equal(df, expected) - def test_getitem_multiindex(self): - # GH 5725 the 'A' happens to be a valid Timestamp so the doesn't raise - # the appropriate error, only in PY3 of course! - index = MultiIndex(levels=[['D', 'B', 'C'], - [0, 26, 27, 37, 57, 67, 75, 82]], - labels=[[0, 0, 0, 1, 2, 2, 2, 2, 2, 2], - [1, 3, 4, 6, 0, 2, 2, 3, 5, 7]], - names=['tag', 'day']) - arr = np.random.randn(len(index), 1) - df = DataFrame(arr, index=index, columns=['val']) - result = df.val['D'] - expected = Series(arr.ravel()[0:3], name='val', index=Index( - [26, 37, 57], name='day')) - tm.assert_series_equal(result, expected) - - def f(): - df.val['A'] - - self.assertRaises(KeyError, f) - - def f(): - df.val['X'] - - self.assertRaises(KeyError, f) - - # A is treated as a special Timestamp - index = MultiIndex(levels=[['A', 'B', 'C'], - [0, 26, 27, 37, 57, 67, 75, 82]], - labels=[[0, 0, 0, 1, 2, 2, 2, 2, 2, 2], - [1, 3, 4, 6, 0, 2, 2, 3, 5, 7]], - names=['tag', 'day']) - df = DataFrame(arr, index=index, columns=['val']) - result = df.val['A'] - expected = Series(arr.ravel()[0:3], name='val', index=Index( - [26, 37, 57], name='day')) - tm.assert_series_equal(result, expected) - - def f(): - df.val['X'] + def test_loc_coerceion(self): - self.assertRaises(KeyError, f) + # 12411 + df = DataFrame({'date': [pd.Timestamp('20130101').tz_localize('UTC'), + pd.NaT]}) + expected = df.dtypes - # GH 7866 - # multi-index slicing with missing indexers - idx = pd.MultiIndex.from_product([['A', 'B', 'C'], - ['foo', 'bar', 'baz']], - names=['one', 'two']) - s = pd.Series(np.arange(9, dtype='int64'), index=idx).sort_index() + result = df.iloc[[0]] + tm.assert_series_equal(result.dtypes, expected) - exp_idx = pd.MultiIndex.from_product([['A'], ['foo', 'bar', 'baz']], - names=['one', 'two']) - expected = pd.Series(np.arange(3, dtype='int64'), - index=exp_idx).sort_index() + result = df.iloc[[1]] + tm.assert_series_equal(result.dtypes, expected) - result = s.loc[['A']] - tm.assert_series_equal(result, expected) - result = s.loc[['A', 'D']] - tm.assert_series_equal(result, expected) + # 12045 + import datetime + df = DataFrame({'date': [datetime.datetime(2012, 1, 1), + datetime.datetime(1012, 1, 2)]}) + expected = df.dtypes - # not any values found - self.assertRaises(KeyError, lambda: s.loc[['D']]) + result = df.iloc[[0]] + tm.assert_series_equal(result.dtypes, expected) - # empty ok - result = s.loc[[]] - expected = s.iloc[[]] - tm.assert_series_equal(result, expected) + result = df.iloc[[1]] + tm.assert_series_equal(result.dtypes, expected) - idx = pd.IndexSlice - expected = pd.Series([0, 3, 6], index=pd.MultiIndex.from_product( - [['A', 'B', 'C'], ['foo']], names=['one', 'two'])).sort_index() + # 11594 + df = DataFrame({'text': ['some words'] + [None] * 9}) + expected = df.dtypes - result = s.loc[idx[:, ['foo']]] - tm.assert_series_equal(result, expected) - result = s.loc[idx[:, ['foo', 'bah']]] - tm.assert_series_equal(result, expected) + result = df.iloc[0:2] + tm.assert_series_equal(result.dtypes, expected) - # GH 8737 - # empty indexer - multi_index = pd.MultiIndex.from_product((['foo', 'bar', 'baz'], - ['alpha', 'beta'])) - df = DataFrame( - np.random.randn(5, 6), index=range(5), columns=multi_index) - df = df.sort_index(level=0, axis=1) - - expected = DataFrame(index=range(5), - columns=multi_index.reindex([])[0]) - result1 = df.loc[:, ([], slice(None))] - result2 = df.loc[:, (['foo'], [])] - tm.assert_frame_equal(result1, expected) - tm.assert_frame_equal(result2, expected) - - # regression from < 0.14.0 - # GH 7914 - df = DataFrame([[np.mean, np.median], ['mean', 'median']], - columns=MultiIndex.from_tuples([('functs', 'mean'), - ('functs', 'median')]), - index=['function', 'name']) - result = df.loc['function', ('functs', 'mean')] - self.assertEqual(result, np.mean) + result = df.iloc[3:] + tm.assert_series_equal(result.dtypes, expected) def test_setitem_dtype_upcast(self): @@ -3154,233 +1918,6 @@ def test_multi_nan_indexing(self): Index(['C1', 'C2', 'C3', 'C4'], name='b')]) tm.assert_frame_equal(result, expected) - def test_iloc_panel_issue(self): - - # GH 3617 - p = Panel(randn(4, 4, 4)) - - self.assertEqual(p.iloc[:3, :3, :3].shape, (3, 3, 3)) - self.assertEqual(p.iloc[1, :3, :3].shape, (3, 3)) - self.assertEqual(p.iloc[:3, 1, :3].shape, (3, 3)) - self.assertEqual(p.iloc[:3, :3, 1].shape, (3, 3)) - self.assertEqual(p.iloc[1, 1, :3].shape, (3, )) - self.assertEqual(p.iloc[1, :3, 1].shape, (3, )) - self.assertEqual(p.iloc[:3, 1, 1].shape, (3, )) - - def test_panel_getitem(self): - # GH4016, date selection returns a frame when a partial string - # selection - ind = date_range(start="2000", freq="D", periods=1000) - df = DataFrame( - np.random.randn( - len(ind), 5), index=ind, columns=list('ABCDE')) - panel = Panel(dict([('frame_' + c, df) for c in list('ABC')])) - - test2 = panel.ix[:, "2002":"2002-12-31"] - test1 = panel.ix[:, "2002"] - tm.assert_panel_equal(test1, test2) - - # GH8710 - # multi-element getting with a list - panel = tm.makePanel() - - expected = panel.iloc[[0, 1]] - - result = panel.loc[['ItemA', 'ItemB']] - tm.assert_panel_equal(result, expected) - - result = panel.loc[['ItemA', 'ItemB'], :, :] - tm.assert_panel_equal(result, expected) - - result = panel[['ItemA', 'ItemB']] - tm.assert_panel_equal(result, expected) - - result = panel.loc['ItemA':'ItemB'] - tm.assert_panel_equal(result, expected) - - result = panel.ix['ItemA':'ItemB'] - tm.assert_panel_equal(result, expected) - - result = panel.ix[['ItemA', 'ItemB']] - tm.assert_panel_equal(result, expected) - - # with an object-like - # GH 9140 - class TestObject: - - def __str__(self): - return "TestObject" - - obj = TestObject() - - p = Panel(np.random.randn(1, 5, 4), items=[obj], - major_axis=date_range('1/1/2000', periods=5), - minor_axis=['A', 'B', 'C', 'D']) - - expected = p.iloc[0] - result = p[obj] - tm.assert_frame_equal(result, expected) - - def test_panel_setitem(self): - - # GH 7763 - # loc and setitem have setting differences - np.random.seed(0) - index = range(3) - columns = list('abc') - - panel = Panel({'A': DataFrame(np.random.randn(3, 3), - index=index, columns=columns), - 'B': DataFrame(np.random.randn(3, 3), - index=index, columns=columns), - 'C': DataFrame(np.random.randn(3, 3), - index=index, columns=columns)}) - - replace = DataFrame(np.eye(3, 3), index=range(3), columns=columns) - expected = Panel({'A': replace, 'B': replace, 'C': replace}) - - p = panel.copy() - for idx in list('ABC'): - p[idx] = replace - tm.assert_panel_equal(p, expected) - - p = panel.copy() - for idx in list('ABC'): - p.loc[idx, :, :] = replace - tm.assert_panel_equal(p, expected) - - def test_panel_setitem_with_multiindex(self): - - # 10360 - # failing with a multi-index - arr = np.array([[[1, 2, 3], [0, 0, 0]], [[0, 0, 0], [0, 0, 0]]], - dtype=np.float64) - - # reg index - axes = dict(items=['A', 'B'], major_axis=[0, 1], - minor_axis=['X', 'Y', 'Z']) - p1 = Panel(0., **axes) - p1.iloc[0, 0, :] = [1, 2, 3] - expected = Panel(arr, **axes) - tm.assert_panel_equal(p1, expected) - - # multi-indexes - axes['items'] = pd.MultiIndex.from_tuples([('A', 'a'), ('B', 'b')]) - p2 = Panel(0., **axes) - p2.iloc[0, 0, :] = [1, 2, 3] - expected = Panel(arr, **axes) - tm.assert_panel_equal(p2, expected) - - axes['major_axis'] = pd.MultiIndex.from_tuples([('A', 1), ('A', 2)]) - p3 = Panel(0., **axes) - p3.iloc[0, 0, :] = [1, 2, 3] - expected = Panel(arr, **axes) - tm.assert_panel_equal(p3, expected) - - axes['minor_axis'] = pd.MultiIndex.from_product([['X'], range(3)]) - p4 = Panel(0., **axes) - p4.iloc[0, 0, :] = [1, 2, 3] - expected = Panel(arr, **axes) - tm.assert_panel_equal(p4, expected) - - arr = np.array( - [[[1, 0, 0], [2, 0, 0]], [[0, 0, 0], [0, 0, 0]]], dtype=np.float64) - p5 = Panel(0., **axes) - p5.iloc[0, :, 0] = [1, 2] - expected = Panel(arr, **axes) - tm.assert_panel_equal(p5, expected) - - def test_panel_assignment(self): - # GH3777 - wp = Panel(randn(2, 5, 4), items=['Item1', 'Item2'], - major_axis=date_range('1/1/2000', periods=5), - minor_axis=['A', 'B', 'C', 'D']) - wp2 = Panel(randn(2, 5, 4), items=['Item1', 'Item2'], - major_axis=date_range('1/1/2000', periods=5), - minor_axis=['A', 'B', 'C', 'D']) - - # TODO: unused? - # expected = wp.loc[['Item1', 'Item2'], :, ['A', 'B']] - - def f(): - wp.loc[['Item1', 'Item2'], :, ['A', 'B']] = wp2.loc[ - ['Item1', 'Item2'], :, ['A', 'B']] - - self.assertRaises(NotImplementedError, f) - - # to_assign = wp2.loc[['Item1', 'Item2'], :, ['A', 'B']] - # wp.loc[['Item1', 'Item2'], :, ['A', 'B']] = to_assign - # result = wp.loc[['Item1', 'Item2'], :, ['A', 'B']] - # tm.assert_panel_equal(result,expected) - - def test_multiindex_assignment(self): - - # GH3777 part 2 - - # mixed dtype - df = DataFrame(np.random.randint(5, 10, size=9).reshape(3, 3), - columns=list('abc'), - index=[[4, 4, 8], [8, 10, 12]]) - df['d'] = np.nan - arr = np.array([0., 1.]) - - df.ix[4, 'd'] = arr - tm.assert_series_equal(df.ix[4, 'd'], - Series(arr, index=[8, 10], name='d')) - - # single dtype - df = DataFrame(np.random.randint(5, 10, size=9).reshape(3, 3), - columns=list('abc'), - index=[[4, 4, 8], [8, 10, 12]]) - - df.ix[4, 'c'] = arr - exp = Series(arr, index=[8, 10], name='c', dtype='float64') - tm.assert_series_equal(df.ix[4, 'c'], exp) - - # scalar ok - df.ix[4, 'c'] = 10 - exp = Series(10, index=[8, 10], name='c', dtype='float64') - tm.assert_series_equal(df.ix[4, 'c'], exp) - - # invalid assignments - def f(): - df.ix[4, 'c'] = [0, 1, 2, 3] - - self.assertRaises(ValueError, f) - - def f(): - df.ix[4, 'c'] = [0] - - self.assertRaises(ValueError, f) - - # groupby example - NUM_ROWS = 100 - NUM_COLS = 10 - col_names = ['A' + num for num in - map(str, np.arange(NUM_COLS).tolist())] - index_cols = col_names[:5] - - df = DataFrame(np.random.randint(5, size=(NUM_ROWS, NUM_COLS)), - dtype=np.int64, columns=col_names) - df = df.set_index(index_cols).sort_index() - grp = df.groupby(level=index_cols[:4]) - df['new_col'] = np.nan - - f_index = np.arange(5) - - def f(name, df2): - return Series(np.arange(df2.shape[0]), - name=df2.index.values[0]).reindex(f_index) - - # TODO(wesm): unused? - # new_df = pd.concat([f(name, df2) for name, df2 in grp], axis=1).T - - # we are actually operating on a copy here - # but in this case, that's ok - for name, df2 in grp: - new_vals = np.arange(df2.shape[0]) - df.ix[name, 'new_col'] = new_vals - def test_multi_assign(self): # GH 3626, an assignement of a sub-df to a df @@ -4069,36 +2606,6 @@ def f(): dtype='float64') tm.assert_frame_equal(df, exp) - def test_partial_setting_with_datetimelike_dtype(self): - - # GH9478 - # a datetimeindex alignment issue with partial setting - df = pd.DataFrame(np.arange(6.).reshape(3, 2), columns=list('AB'), - index=pd.date_range('1/1/2000', periods=3, - freq='1H')) - expected = df.copy() - expected['C'] = [expected.index[0]] + [pd.NaT, pd.NaT] - - mask = df.A < 1 - df.loc[mask, 'C'] = df.loc[mask].index - tm.assert_frame_equal(df, expected) - - def test_loc_setitem_datetime(self): - - # GH 9516 - dt1 = Timestamp('20130101 09:00:00') - dt2 = Timestamp('20130101 10:00:00') - - for conv in [lambda x: x, lambda x: x.to_datetime64(), - lambda x: x.to_pydatetime(), lambda x: np.datetime64(x)]: - - df = pd.DataFrame() - df.loc[conv(dt1), 'one'] = 100 - df.loc[conv(dt2), 'one'] = 200 - - expected = DataFrame({'one': [100.0, 200.0]}, index=[dt1, dt2]) - tm.assert_frame_equal(df, expected) - def test_series_partial_set(self): # partial set with new index # Regression from GH4825 @@ -4233,54 +2740,6 @@ def test_series_partial_set_with_name(self): result = ser.iloc[[1, 1, 0, 0]] tm.assert_series_equal(result, expected, check_index_type=True) - def test_series_partial_set_datetime(self): - # GH 11497 - - idx = date_range('2011-01-01', '2011-01-02', freq='D', name='idx') - ser = Series([0.1, 0.2], index=idx, name='s') - - result = ser.loc[[Timestamp('2011-01-01'), Timestamp('2011-01-02')]] - exp = Series([0.1, 0.2], index=idx, name='s') - tm.assert_series_equal(result, exp, check_index_type=True) - - keys = [Timestamp('2011-01-02'), Timestamp('2011-01-02'), - Timestamp('2011-01-01')] - exp = Series([0.2, 0.2, 0.1], index=pd.DatetimeIndex(keys, name='idx'), - name='s') - tm.assert_series_equal(ser.loc[keys], exp, check_index_type=True) - - keys = [Timestamp('2011-01-03'), Timestamp('2011-01-02'), - Timestamp('2011-01-03')] - exp = Series([np.nan, 0.2, np.nan], - index=pd.DatetimeIndex(keys, name='idx'), name='s') - tm.assert_series_equal(ser.loc[keys], exp, check_index_type=True) - - def test_series_partial_set_period(self): - # GH 11497 - - idx = pd.period_range('2011-01-01', '2011-01-02', freq='D', name='idx') - ser = Series([0.1, 0.2], index=idx, name='s') - - result = ser.loc[[pd.Period('2011-01-01', freq='D'), - pd.Period('2011-01-02', freq='D')]] - exp = Series([0.1, 0.2], index=idx, name='s') - tm.assert_series_equal(result, exp, check_index_type=True) - - keys = [pd.Period('2011-01-02', freq='D'), - pd.Period('2011-01-02', freq='D'), - pd.Period('2011-01-01', freq='D')] - exp = Series([0.2, 0.2, 0.1], index=pd.PeriodIndex(keys, name='idx'), - name='s') - tm.assert_series_equal(ser.loc[keys], exp, check_index_type=True) - - keys = [pd.Period('2011-01-03', freq='D'), - pd.Period('2011-01-02', freq='D'), - pd.Period('2011-01-03', freq='D')] - exp = Series([np.nan, 0.2, np.nan], - index=pd.PeriodIndex(keys, name='idx'), name='s') - result = ser.loc[keys] - tm.assert_series_equal(result, exp) - def test_partial_set_invalid(self): # GH 4940 @@ -4566,509 +3025,6 @@ def test_cache_updating(self): expected = Series([0, 0, 0, 2, 0], name='f') tm.assert_series_equal(df.f, expected) - def test_slice_consolidate_invalidate_item_cache(self): - - # this is chained assignment, but will 'work' - with option_context('chained_assignment', None): - - # #3970 - df = DataFrame({"aa": lrange(5), "bb": [2.2] * 5}) - - # Creates a second float block - df["cc"] = 0.0 - - # caches a reference to the 'bb' series - df["bb"] - - # repr machinery triggers consolidation - repr(df) - - # Assignment to wrong series - df['bb'].iloc[0] = 0.17 - df._clear_item_cache() - self.assertAlmostEqual(df['bb'][0], 0.17) - - def test_setitem_cache_updating(self): - # GH 5424 - cont = ['one', 'two', 'three', 'four', 'five', 'six', 'seven'] - - for do_ref in [False, False]: - df = DataFrame({'a': cont, - "b": cont[3:] + cont[:3], - 'c': np.arange(7)}) - - # ref the cache - if do_ref: - df.ix[0, "c"] - - # set it - df.ix[7, 'c'] = 1 - - self.assertEqual(df.ix[0, 'c'], 0.0) - self.assertEqual(df.ix[7, 'c'], 1.0) - - # GH 7084 - # not updating cache on series setting with slices - expected = DataFrame({'A': [600, 600, 600]}, - index=date_range('5/7/2014', '5/9/2014')) - out = DataFrame({'A': [0, 0, 0]}, - index=date_range('5/7/2014', '5/9/2014')) - df = DataFrame({'C': ['A', 'A', 'A'], 'D': [100, 200, 300]}) - - # loop through df to update out - six = Timestamp('5/7/2014') - eix = Timestamp('5/9/2014') - for ix, row in df.iterrows(): - out.loc[six:eix, row['C']] = out.loc[six:eix, row['C']] + row['D'] - - tm.assert_frame_equal(out, expected) - tm.assert_series_equal(out['A'], expected['A']) - - # try via a chain indexing - # this actually works - out = DataFrame({'A': [0, 0, 0]}, - index=date_range('5/7/2014', '5/9/2014')) - for ix, row in df.iterrows(): - v = out[row['C']][six:eix] + row['D'] - out[row['C']][six:eix] = v - - tm.assert_frame_equal(out, expected) - tm.assert_series_equal(out['A'], expected['A']) - - out = DataFrame({'A': [0, 0, 0]}, - index=date_range('5/7/2014', '5/9/2014')) - for ix, row in df.iterrows(): - out.loc[six:eix, row['C']] += row['D'] - - tm.assert_frame_equal(out, expected) - tm.assert_series_equal(out['A'], expected['A']) - - def test_setitem_chained_setfault(self): - - # GH6026 - # setfaults under numpy 1.7.1 (ok on 1.8) - data = ['right', 'left', 'left', 'left', 'right', 'left', 'timeout'] - mdata = ['right', 'left', 'left', 'left', 'right', 'left', 'none'] - - df = DataFrame({'response': np.array(data)}) - mask = df.response == 'timeout' - df.response[mask] = 'none' - tm.assert_frame_equal(df, DataFrame({'response': mdata})) - - recarray = np.rec.fromarrays([data], names=['response']) - df = DataFrame(recarray) - mask = df.response == 'timeout' - df.response[mask] = 'none' - tm.assert_frame_equal(df, DataFrame({'response': mdata})) - - df = DataFrame({'response': data, 'response1': data}) - mask = df.response == 'timeout' - df.response[mask] = 'none' - tm.assert_frame_equal(df, DataFrame({'response': mdata, - 'response1': data})) - - # GH 6056 - expected = DataFrame(dict(A=[np.nan, 'bar', 'bah', 'foo', 'bar'])) - df = DataFrame(dict(A=np.array(['foo', 'bar', 'bah', 'foo', 'bar']))) - df['A'].iloc[0] = np.nan - result = df.head() - tm.assert_frame_equal(result, expected) - - df = DataFrame(dict(A=np.array(['foo', 'bar', 'bah', 'foo', 'bar']))) - df.A.iloc[0] = np.nan - result = df.head() - tm.assert_frame_equal(result, expected) - - def test_detect_chained_assignment(self): - - pd.set_option('chained_assignment', 'raise') - - # work with the chain - expected = DataFrame([[-5, 1], [-6, 3]], columns=list('AB')) - df = DataFrame(np.arange(4).reshape(2, 2), - columns=list('AB'), dtype='int64') - self.assertIsNone(df.is_copy) - df['A'][0] = -5 - df['A'][1] = -6 - tm.assert_frame_equal(df, expected) - - # test with the chaining - df = DataFrame({'A': Series(range(2), dtype='int64'), - 'B': np.array(np.arange(2, 4), dtype=np.float64)}) - self.assertIsNone(df.is_copy) - - def f(): - df['A'][0] = -5 - - self.assertRaises(com.SettingWithCopyError, f) - - def f(): - df['A'][1] = np.nan - - self.assertRaises(com.SettingWithCopyError, f) - self.assertIsNone(df['A'].is_copy) - - # using a copy (the chain), fails - df = DataFrame({'A': Series(range(2), dtype='int64'), - 'B': np.array(np.arange(2, 4), dtype=np.float64)}) - - def f(): - df.loc[0]['A'] = -5 - - self.assertRaises(com.SettingWithCopyError, f) - - # doc example - df = DataFrame({'a': ['one', 'one', 'two', 'three', - 'two', 'one', 'six'], - 'c': Series(range(7), dtype='int64')}) - self.assertIsNone(df.is_copy) - expected = DataFrame({'a': ['one', 'one', 'two', 'three', - 'two', 'one', 'six'], - 'c': [42, 42, 2, 3, 4, 42, 6]}) - - def f(): - indexer = df.a.str.startswith('o') - df[indexer]['c'] = 42 - - self.assertRaises(com.SettingWithCopyError, f) - - expected = DataFrame({'A': [111, 'bbb', 'ccc'], 'B': [1, 2, 3]}) - df = DataFrame({'A': ['aaa', 'bbb', 'ccc'], 'B': [1, 2, 3]}) - - def f(): - df['A'][0] = 111 - - self.assertRaises(com.SettingWithCopyError, f) - - def f(): - df.loc[0]['A'] = 111 - - self.assertRaises(com.SettingWithCopyError, f) - - df.loc[0, 'A'] = 111 - tm.assert_frame_equal(df, expected) - - # make sure that is_copy is picked up reconstruction - # GH5475 - df = DataFrame({"A": [1, 2]}) - self.assertIsNone(df.is_copy) - with tm.ensure_clean('__tmp__pickle') as path: - df.to_pickle(path) - df2 = pd.read_pickle(path) - df2["B"] = df2["A"] - df2["B"] = df2["A"] - - # a suprious raise as we are setting the entire column here - # GH5597 - from string import ascii_letters as letters - - def random_text(nobs=100): - df = [] - for i in range(nobs): - idx = np.random.randint(len(letters), size=2) - idx.sort() - df.append([letters[idx[0]:idx[1]]]) - - return DataFrame(df, columns=['letters']) - - df = random_text(100000) - - # always a copy - x = df.iloc[[0, 1, 2]] - self.assertIsNotNone(x.is_copy) - x = df.iloc[[0, 1, 2, 4]] - self.assertIsNotNone(x.is_copy) - - # explicity copy - indexer = df.letters.apply(lambda x: len(x) > 10) - df = df.ix[indexer].copy() - self.assertIsNone(df.is_copy) - df['letters'] = df['letters'].apply(str.lower) - - # implicity take - df = random_text(100000) - indexer = df.letters.apply(lambda x: len(x) > 10) - df = df.ix[indexer] - self.assertIsNotNone(df.is_copy) - df['letters'] = df['letters'].apply(str.lower) - - # implicity take 2 - df = random_text(100000) - indexer = df.letters.apply(lambda x: len(x) > 10) - df = df.ix[indexer] - self.assertIsNotNone(df.is_copy) - df.loc[:, 'letters'] = df['letters'].apply(str.lower) - - # should be ok even though it's a copy! - self.assertIsNone(df.is_copy) - df['letters'] = df['letters'].apply(str.lower) - self.assertIsNone(df.is_copy) - - df = random_text(100000) - indexer = df.letters.apply(lambda x: len(x) > 10) - df.ix[indexer, 'letters'] = df.ix[indexer, 'letters'].apply(str.lower) - - # an identical take, so no copy - df = DataFrame({'a': [1]}).dropna() - self.assertIsNone(df.is_copy) - df['a'] += 1 - - # inplace ops - # original from: - # http://stackoverflow.com/questions/20508968/series-fillna-in-a-multiindex-dataframe-does-not-fill-is-this-a-bug - a = [12, 23] - b = [123, None] - c = [1234, 2345] - d = [12345, 23456] - tuples = [('eyes', 'left'), ('eyes', 'right'), ('ears', 'left'), - ('ears', 'right')] - events = {('eyes', 'left'): a, - ('eyes', 'right'): b, - ('ears', 'left'): c, - ('ears', 'right'): d} - multiind = MultiIndex.from_tuples(tuples, names=['part', 'side']) - zed = DataFrame(events, index=['a', 'b'], columns=multiind) - - def f(): - zed['eyes']['right'].fillna(value=555, inplace=True) - - self.assertRaises(com.SettingWithCopyError, f) - - df = DataFrame(np.random.randn(10, 4)) - s = df.iloc[:, 0].sort_values() - tm.assert_series_equal(s, df.iloc[:, 0].sort_values()) - tm.assert_series_equal(s, df[0].sort_values()) - - # false positives GH6025 - df = DataFrame({'column1': ['a', 'a', 'a'], 'column2': [4, 8, 9]}) - str(df) - df['column1'] = df['column1'] + 'b' - str(df) - df = df[df['column2'] != 8] - str(df) - df['column1'] = df['column1'] + 'c' - str(df) - - # from SO: - # http://stackoverflow.com/questions/24054495/potential-bug-setting-value-for-undefined-column-using-iloc - df = DataFrame(np.arange(0, 9), columns=['count']) - df['group'] = 'b' - - def f(): - df.iloc[0:5]['group'] = 'a' - - self.assertRaises(com.SettingWithCopyError, f) - - # mixed type setting - # same dtype & changing dtype - df = DataFrame(dict(A=date_range('20130101', periods=5), - B=np.random.randn(5), - C=np.arange(5, dtype='int64'), - D=list('abcde'))) - - def f(): - df.ix[2]['D'] = 'foo' - - self.assertRaises(com.SettingWithCopyError, f) - - def f(): - df.ix[2]['C'] = 'foo' - - self.assertRaises(com.SettingWithCopyError, f) - - def f(): - df['C'][2] = 'foo' - - self.assertRaises(com.SettingWithCopyError, f) - - def test_setting_with_copy_bug(self): - - # operating on a copy - df = pd.DataFrame({'a': list(range(4)), - 'b': list('ab..'), - 'c': ['a', 'b', np.nan, 'd']}) - mask = pd.isnull(df.c) - - def f(): - df[['c']][mask] = df[['b']][mask] - - self.assertRaises(com.SettingWithCopyError, f) - - # invalid warning as we are returning a new object - # GH 8730 - df1 = DataFrame({'x': Series(['a', 'b', 'c']), - 'y': Series(['d', 'e', 'f'])}) - df2 = df1[['x']] - - # this should not raise - df2['y'] = ['g', 'h', 'i'] - - def test_detect_chained_assignment_warnings(self): - - # warnings - with option_context('chained_assignment', 'warn'): - df = DataFrame({'A': ['aaa', 'bbb', 'ccc'], 'B': [1, 2, 3]}) - with tm.assert_produces_warning( - expected_warning=com.SettingWithCopyWarning): - df.loc[0]['A'] = 111 - - def test_float64index_slicing_bug(self): - # GH 5557, related to slicing a float index - ser = {256: 2321.0, - 1: 78.0, - 2: 2716.0, - 3: 0.0, - 4: 369.0, - 5: 0.0, - 6: 269.0, - 7: 0.0, - 8: 0.0, - 9: 0.0, - 10: 3536.0, - 11: 0.0, - 12: 24.0, - 13: 0.0, - 14: 931.0, - 15: 0.0, - 16: 101.0, - 17: 78.0, - 18: 9643.0, - 19: 0.0, - 20: 0.0, - 21: 0.0, - 22: 63761.0, - 23: 0.0, - 24: 446.0, - 25: 0.0, - 26: 34773.0, - 27: 0.0, - 28: 729.0, - 29: 78.0, - 30: 0.0, - 31: 0.0, - 32: 3374.0, - 33: 0.0, - 34: 1391.0, - 35: 0.0, - 36: 361.0, - 37: 0.0, - 38: 61808.0, - 39: 0.0, - 40: 0.0, - 41: 0.0, - 42: 6677.0, - 43: 0.0, - 44: 802.0, - 45: 0.0, - 46: 2691.0, - 47: 0.0, - 48: 3582.0, - 49: 0.0, - 50: 734.0, - 51: 0.0, - 52: 627.0, - 53: 70.0, - 54: 2584.0, - 55: 0.0, - 56: 324.0, - 57: 0.0, - 58: 605.0, - 59: 0.0, - 60: 0.0, - 61: 0.0, - 62: 3989.0, - 63: 10.0, - 64: 42.0, - 65: 0.0, - 66: 904.0, - 67: 0.0, - 68: 88.0, - 69: 70.0, - 70: 8172.0, - 71: 0.0, - 72: 0.0, - 73: 0.0, - 74: 64902.0, - 75: 0.0, - 76: 347.0, - 77: 0.0, - 78: 36605.0, - 79: 0.0, - 80: 379.0, - 81: 70.0, - 82: 0.0, - 83: 0.0, - 84: 3001.0, - 85: 0.0, - 86: 1630.0, - 87: 7.0, - 88: 364.0, - 89: 0.0, - 90: 67404.0, - 91: 9.0, - 92: 0.0, - 93: 0.0, - 94: 7685.0, - 95: 0.0, - 96: 1017.0, - 97: 0.0, - 98: 2831.0, - 99: 0.0, - 100: 2963.0, - 101: 0.0, - 102: 854.0, - 103: 0.0, - 104: 0.0, - 105: 0.0, - 106: 0.0, - 107: 0.0, - 108: 0.0, - 109: 0.0, - 110: 0.0, - 111: 0.0, - 112: 0.0, - 113: 0.0, - 114: 0.0, - 115: 0.0, - 116: 0.0, - 117: 0.0, - 118: 0.0, - 119: 0.0, - 120: 0.0, - 121: 0.0, - 122: 0.0, - 123: 0.0, - 124: 0.0, - 125: 0.0, - 126: 67744.0, - 127: 22.0, - 128: 264.0, - 129: 0.0, - 260: 197.0, - 268: 0.0, - 265: 0.0, - 269: 0.0, - 261: 0.0, - 266: 1198.0, - 267: 0.0, - 262: 2629.0, - 258: 775.0, - 257: 0.0, - 263: 0.0, - 259: 0.0, - 264: 163.0, - 250: 10326.0, - 251: 0.0, - 252: 1228.0, - 253: 0.0, - 254: 2769.0, - 255: 0.0} - - # smoke test for the repr - s = Series(ser) - result = s.value_counts() - str(result) - def test_set_ix_out_of_bounds_axis_0(self): df = pd.DataFrame( randn(2, 5), index=["row%s" % i for i in range(2)], @@ -5281,34 +3237,6 @@ def assert_slices_equivalent(l_slc, i_slc): assert_slices_equivalent(SLC[idx[13]:idx[9]:-1], SLC[13:8:-1]) assert_slices_equivalent(SLC[idx[9]:idx[13]:-1], SLC[:0]) - def test_multiindex_label_slicing_with_negative_step(self): - s = Series(np.arange(20), - MultiIndex.from_product([list('abcde'), np.arange(4)])) - SLC = pd.IndexSlice - - def assert_slices_equivalent(l_slc, i_slc): - tm.assert_series_equal(s.loc[l_slc], s.iloc[i_slc]) - tm.assert_series_equal(s[l_slc], s.iloc[i_slc]) - tm.assert_series_equal(s.ix[l_slc], s.iloc[i_slc]) - - assert_slices_equivalent(SLC[::-1], SLC[::-1]) - - assert_slices_equivalent(SLC['d'::-1], SLC[15::-1]) - assert_slices_equivalent(SLC[('d', )::-1], SLC[15::-1]) - - assert_slices_equivalent(SLC[:'d':-1], SLC[:11:-1]) - assert_slices_equivalent(SLC[:('d', ):-1], SLC[:11:-1]) - - assert_slices_equivalent(SLC['d':'b':-1], SLC[15:3:-1]) - assert_slices_equivalent(SLC[('d', ):'b':-1], SLC[15:3:-1]) - assert_slices_equivalent(SLC['d':('b', ):-1], SLC[15:3:-1]) - assert_slices_equivalent(SLC[('d', ):('b', ):-1], SLC[15:3:-1]) - assert_slices_equivalent(SLC['b':'d':-1], SLC[:0]) - - assert_slices_equivalent(SLC[('c', 2)::-1], SLC[10::-1]) - assert_slices_equivalent(SLC[:('c', 2):-1], SLC[:9:-1]) - assert_slices_equivalent(SLC[('e', 0):('c', 2):-1], SLC[16:9:-1]) - def test_slice_with_zero_step_raises(self): s = Series(np.arange(20), index=_mklbl('A', 20)) self.assertRaisesRegexp(ValueError, 'slice step cannot be zero', @@ -5390,23 +3318,6 @@ def test_maybe_numeric_slice(self): expected = [1] self.assertEqual(result, expected) - def test_multiindex_slice_first_level(self): - # GH 12697 - freq = ['a', 'b', 'c', 'd'] - idx = pd.MultiIndex.from_product([freq, np.arange(500)]) - df = pd.DataFrame(list(range(2000)), index=idx, columns=['Test']) - df_slice = df.loc[pd.IndexSlice[:, 30:70], :] - result = df_slice.loc['a'] - expected = pd.DataFrame(list(range(30, 71)), - columns=['Test'], - index=range(30, 71)) - tm.assert_frame_equal(result, expected) - result = df_slice.loc['d'] - expected = pd.DataFrame(list(range(1530, 1571)), - columns=['Test'], - index=range(30, 71)) - tm.assert_frame_equal(result, expected) - class TestSeriesNoneCoercion(tm.TestCase): EXPECTED_RESULTS = [ @@ -5511,22 +3422,3 @@ def test_none_coercion_mixed_dtypes(self): datetime(2000, 1, 3)], 'd': [None, 'b', 'c']}) tm.assert_frame_equal(start_dataframe, exp) - - -class TestTimedeltaIndexing(tm.TestCase): - - def test_boolean_indexing(self): - # GH 14946 - df = pd.DataFrame({'x': range(10)}) - df.index = pd.to_timedelta(range(10), unit='s') - conditions = [df['x'] > 3, df['x'] == 3, df['x'] < 3] - expected_data = [[0, 1, 2, 3, 10, 10, 10, 10, 10, 10], - [0, 1, 2, 10, 4, 5, 6, 7, 8, 9], - [10, 10, 10, 3, 4, 5, 6, 7, 8, 9]] - for cond, data in zip(conditions, expected_data): - result = df.copy() - result.loc[cond, 'x'] = 10 - expected = pd.DataFrame(data, - index=pd.to_timedelta(range(10), unit='s'), - columns=['x']) - tm.assert_frame_equal(expected, result) diff --git a/pandas/tests/indexing/test_multiindex.py b/pandas/tests/indexing/test_multiindex.py new file mode 100644 index 0000000000000..1e6ecbbcdc756 --- /dev/null +++ b/pandas/tests/indexing/test_multiindex.py @@ -0,0 +1,1206 @@ +from warnings import catch_warnings +import pytest +import numpy as np +import pandas as pd +from pandas import (Panel, Series, MultiIndex, DataFrame, + Timestamp, Index, date_range) +from pandas.util import testing as tm +from pandas.core.common import PerformanceWarning, UnsortedIndexError +from pandas.tests.indexing.common import _mklbl + + +class TestMultiIndexBasic(tm.TestCase): + + def test_iloc_getitem_multiindex2(self): + # TODO(wesm): fix this + pytest.skip('this test was being suppressed, ' + 'needs to be fixed') + + arr = np.random.randn(3, 3) + df = DataFrame(arr, columns=[[2, 2, 4], [6, 8, 10]], + index=[[4, 4, 8], [8, 10, 12]]) + + rs = df.iloc[2] + xp = Series(arr[2], index=df.columns) + tm.assert_series_equal(rs, xp) + + rs = df.iloc[:, 2] + xp = Series(arr[:, 2], index=df.index) + tm.assert_series_equal(rs, xp) + + rs = df.iloc[2, 2] + xp = df.values[2, 2] + self.assertEqual(rs, xp) + + # for multiple items + # GH 5528 + rs = df.iloc[[0, 1]] + xp = df.xs(4, drop_level=False) + tm.assert_frame_equal(rs, xp) + + tup = zip(*[['a', 'a', 'b', 'b'], ['x', 'y', 'x', 'y']]) + index = MultiIndex.from_tuples(tup) + df = DataFrame(np.random.randn(4, 4), index=index) + rs = df.iloc[[2, 3]] + xp = df.xs('b', drop_level=False) + tm.assert_frame_equal(rs, xp) + + def test_setitem_multiindex(self): + for index_fn in ('ix', 'loc'): + + def check(target, indexers, value, compare_fn, expected=None): + fn = getattr(target, index_fn) + fn.__setitem__(indexers, value) + result = fn.__getitem__(indexers) + if expected is None: + expected = value + compare_fn(result, expected) + # GH7190 + index = pd.MultiIndex.from_product([np.arange(0, 100), + np.arange(0, 80)], + names=['time', 'firm']) + t, n = 0, 2 + df = DataFrame(np.nan, columns=['A', 'w', 'l', 'a', 'x', + 'X', 'd', 'profit'], + index=index) + check(target=df, indexers=((t, n), 'X'), value=0, + compare_fn=self.assertEqual) + + df = DataFrame(-999, columns=['A', 'w', 'l', 'a', 'x', + 'X', 'd', 'profit'], + index=index) + check(target=df, indexers=((t, n), 'X'), value=1, + compare_fn=self.assertEqual) + + df = DataFrame(columns=['A', 'w', 'l', 'a', 'x', + 'X', 'd', 'profit'], + index=index) + check(target=df, indexers=((t, n), 'X'), value=2, + compare_fn=self.assertEqual) + + # GH 7218, assinging with 0-dim arrays + df = DataFrame(-999, columns=['A', 'w', 'l', 'a', 'x', + 'X', 'd', 'profit'], + index=index) + check(target=df, + indexers=((t, n), 'X'), + value=np.array(3), + compare_fn=self.assertEqual, + expected=3, ) + + # GH5206 + df = pd.DataFrame(np.arange(25).reshape(5, 5), + columns='A,B,C,D,E'.split(','), dtype=float) + df['F'] = 99 + row_selection = df['A'] % 2 == 0 + col_selection = ['B', 'C'] + with catch_warnings(record=True): + df.ix[row_selection, col_selection] = df['F'] + output = pd.DataFrame(99., index=[0, 2, 4], columns=['B', 'C']) + with catch_warnings(record=True): + tm.assert_frame_equal(df.ix[row_selection, col_selection], + output) + check(target=df, + indexers=(row_selection, col_selection), + value=df['F'], + compare_fn=tm.assert_frame_equal, + expected=output, ) + + # GH11372 + idx = pd.MultiIndex.from_product([ + ['A', 'B', 'C'], + pd.date_range('2015-01-01', '2015-04-01', freq='MS')]) + cols = pd.MultiIndex.from_product([ + ['foo', 'bar'], + pd.date_range('2016-01-01', '2016-02-01', freq='MS')]) + + df = pd.DataFrame(np.random.random((12, 4)), + index=idx, columns=cols) + + subidx = pd.MultiIndex.from_tuples( + [('A', pd.Timestamp('2015-01-01')), + ('A', pd.Timestamp('2015-02-01'))]) + subcols = pd.MultiIndex.from_tuples( + [('foo', pd.Timestamp('2016-01-01')), + ('foo', pd.Timestamp('2016-02-01'))]) + + vals = pd.DataFrame(np.random.random((2, 2)), + index=subidx, columns=subcols) + check(target=df, + indexers=(subidx, subcols), + value=vals, + compare_fn=tm.assert_frame_equal, ) + # set all columns + vals = pd.DataFrame( + np.random.random((2, 4)), index=subidx, columns=cols) + check(target=df, + indexers=(subidx, slice(None, None, None)), + value=vals, + compare_fn=tm.assert_frame_equal, ) + # identity + copy = df.copy() + check(target=df, indexers=(df.index, df.columns), value=df, + compare_fn=tm.assert_frame_equal, expected=copy) + + def test_loc_getitem_series(self): + # GH14730 + # passing a series as a key with a MultiIndex + index = MultiIndex.from_product([[1, 2, 3], ['A', 'B', 'C']]) + x = Series(index=index, data=range(9), dtype=np.float64) + y = Series([1, 3]) + expected = Series( + data=[0, 1, 2, 6, 7, 8], + index=MultiIndex.from_product([[1, 3], ['A', 'B', 'C']]), + dtype=np.float64) + result = x.loc[y] + tm.assert_series_equal(result, expected) + + result = x.loc[[1, 3]] + tm.assert_series_equal(result, expected) + + empty = Series(data=[], dtype=np.float64) + expected = Series([], index=MultiIndex( + levels=index.levels, labels=[[], []], dtype=np.float64)) + result = x.loc[empty] + tm.assert_series_equal(result, expected) + + def test_iloc_getitem_multiindex(self): + mi_labels = DataFrame(np.random.randn(4, 3), + columns=[['i', 'i', 'j'], ['A', 'A', 'B']], + index=[['i', 'i', 'j', 'k'], + ['X', 'X', 'Y', 'Y']]) + + mi_int = DataFrame(np.random.randn(3, 3), + columns=[[2, 2, 4], [6, 8, 10]], + index=[[4, 4, 8], [8, 10, 12]]) + + # the first row + rs = mi_int.iloc[0] + with catch_warnings(record=True): + xp = mi_int.ix[4].ix[8] + tm.assert_series_equal(rs, xp, check_names=False) + self.assertEqual(rs.name, (4, 8)) + self.assertEqual(xp.name, 8) + + # 2nd (last) columns + rs = mi_int.iloc[:, 2] + with catch_warnings(record=True): + xp = mi_int.ix[:, 2] + tm.assert_series_equal(rs, xp) + + # corner column + rs = mi_int.iloc[2, 2] + with catch_warnings(record=True): + xp = mi_int.ix[:, 2].ix[2] + self.assertEqual(rs, xp) + + # this is basically regular indexing + rs = mi_labels.iloc[2, 2] + with catch_warnings(record=True): + xp = mi_labels.ix['j'].ix[:, 'j'].ix[0, 0] + self.assertEqual(rs, xp) + + def test_loc_multiindex(self): + + mi_labels = DataFrame(np.random.randn(3, 3), + columns=[['i', 'i', 'j'], ['A', 'A', 'B']], + index=[['i', 'i', 'j'], ['X', 'X', 'Y']]) + + mi_int = DataFrame(np.random.randn(3, 3), + columns=[[2, 2, 4], [6, 8, 10]], + index=[[4, 4, 8], [8, 10, 12]]) + + # the first row + rs = mi_labels.loc['i'] + with catch_warnings(record=True): + xp = mi_labels.ix['i'] + tm.assert_frame_equal(rs, xp) + + # 2nd (last) columns + rs = mi_labels.loc[:, 'j'] + with catch_warnings(record=True): + xp = mi_labels.ix[:, 'j'] + tm.assert_frame_equal(rs, xp) + + # corner column + rs = mi_labels.loc['j'].loc[:, 'j'] + with catch_warnings(record=True): + xp = mi_labels.ix['j'].ix[:, 'j'] + tm.assert_frame_equal(rs, xp) + + # with a tuple + rs = mi_labels.loc[('i', 'X')] + with catch_warnings(record=True): + xp = mi_labels.ix[('i', 'X')] + tm.assert_frame_equal(rs, xp) + + rs = mi_int.loc[4] + with catch_warnings(record=True): + xp = mi_int.ix[4] + tm.assert_frame_equal(rs, xp) + + def test_loc_multiindex_indexer_none(self): + + # GH6788 + # multi-index indexer is None (meaning take all) + attributes = ['Attribute' + str(i) for i in range(1)] + attribute_values = ['Value' + str(i) for i in range(5)] + + index = MultiIndex.from_product([attributes, attribute_values]) + df = 0.1 * np.random.randn(10, 1 * 5) + 0.5 + df = DataFrame(df, columns=index) + result = df[attributes] + tm.assert_frame_equal(result, df) + + # GH 7349 + # loc with a multi-index seems to be doing fallback + df = DataFrame(np.arange(12).reshape(-1, 1), + index=pd.MultiIndex.from_product([[1, 2, 3, 4], + [1, 2, 3]])) + + expected = df.loc[([1, 2], ), :] + result = df.loc[[1, 2]] + tm.assert_frame_equal(result, expected) + + def test_loc_multiindex_incomplete(self): + + # GH 7399 + # incomplete indexers + s = pd.Series(np.arange(15, dtype='int64'), + MultiIndex.from_product([range(5), ['a', 'b', 'c']])) + expected = s.loc[:, 'a':'c'] + + result = s.loc[0:4, 'a':'c'] + tm.assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) + + result = s.loc[:4, 'a':'c'] + tm.assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) + + result = s.loc[0:, 'a':'c'] + tm.assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) + + # GH 7400 + # multiindexer gettitem with list of indexers skips wrong element + s = pd.Series(np.arange(15, dtype='int64'), + MultiIndex.from_product([range(5), ['a', 'b', 'c']])) + expected = s.iloc[[6, 7, 8, 12, 13, 14]] + result = s.loc[2:4:2, 'a':'c'] + tm.assert_series_equal(result, expected) + + def test_multiindex_perf_warn(self): + + df = DataFrame({'jim': [0, 0, 1, 1], + 'joe': ['x', 'x', 'z', 'y'], + 'jolie': np.random.rand(4)}).set_index(['jim', 'joe']) + + with tm.assert_produces_warning(PerformanceWarning, + clear=[pd.core.index]): + df.loc[(1, 'z')] + + df = df.iloc[[2, 1, 3, 0]] + with tm.assert_produces_warning(PerformanceWarning): + df.loc[(0, )] + + def test_series_getitem_multiindex(self): + + # GH 6018 + # series regression getitem with a multi-index + + s = Series([1, 2, 3]) + s.index = MultiIndex.from_tuples([(0, 0), (1, 1), (2, 1)]) + + result = s[:, 0] + expected = Series([1], index=[0]) + tm.assert_series_equal(result, expected) + + result = s.loc[:, 1] + expected = Series([2, 3], index=[1, 2]) + tm.assert_series_equal(result, expected) + + # xs + result = s.xs(0, level=0) + expected = Series([1], index=[0]) + tm.assert_series_equal(result, expected) + + result = s.xs(1, level=1) + expected = Series([2, 3], index=[1, 2]) + tm.assert_series_equal(result, expected) + + # GH6258 + dt = list(date_range('20130903', periods=3)) + idx = MultiIndex.from_product([list('AB'), dt]) + s = Series([1, 3, 4, 1, 3, 4], index=idx) + + result = s.xs('20130903', level=1) + expected = Series([1, 1], index=list('AB')) + tm.assert_series_equal(result, expected) + + # GH5684 + idx = MultiIndex.from_tuples([('a', 'one'), ('a', 'two'), ('b', 'one'), + ('b', 'two')]) + s = Series([1, 2, 3, 4], index=idx) + s.index.set_names(['L1', 'L2'], inplace=True) + result = s.xs('one', level='L2') + expected = Series([1, 3], index=['a', 'b']) + expected.index.set_names(['L1'], inplace=True) + tm.assert_series_equal(result, expected) + + def test_xs_multiindex(self): + + # GH2903 + columns = MultiIndex.from_tuples( + [('a', 'foo'), ('a', 'bar'), ('b', 'hello'), + ('b', 'world')], names=['lvl0', 'lvl1']) + df = DataFrame(np.random.randn(4, 4), columns=columns) + df.sort_index(axis=1, inplace=True) + result = df.xs('a', level='lvl0', axis=1) + expected = df.iloc[:, 0:2].loc[:, 'a'] + tm.assert_frame_equal(result, expected) + + result = df.xs('foo', level='lvl1', axis=1) + expected = df.iloc[:, 1:2].copy() + expected.columns = expected.columns.droplevel('lvl1') + tm.assert_frame_equal(result, expected) + + def test_multiindex_setitem(self): + + # GH 3738 + # setting with a multi-index right hand side + arrays = [np.array(['bar', 'bar', 'baz', 'qux', 'qux', 'bar']), + np.array(['one', 'two', 'one', 'one', 'two', 'one']), + np.arange(0, 6, 1)] + + df_orig = pd.DataFrame(np.random.randn(6, 3), + index=arrays, + columns=['A', 'B', 'C']).sort_index() + + expected = df_orig.loc[['bar']] * 2 + df = df_orig.copy() + df.loc[['bar']] *= 2 + tm.assert_frame_equal(df.loc[['bar']], expected) + + # raise because these have differing levels + def f(): + df.loc['bar'] *= 2 + + self.assertRaises(TypeError, f) + + # from SO + # http://stackoverflow.com/questions/24572040/pandas-access-the-level-of-multiindex-for-inplace-operation + df_orig = DataFrame.from_dict({'price': { + ('DE', 'Coal', 'Stock'): 2, + ('DE', 'Gas', 'Stock'): 4, + ('DE', 'Elec', 'Demand'): 1, + ('FR', 'Gas', 'Stock'): 5, + ('FR', 'Solar', 'SupIm'): 0, + ('FR', 'Wind', 'SupIm'): 0 + }}) + df_orig.index = MultiIndex.from_tuples(df_orig.index, + names=['Sit', 'Com', 'Type']) + + expected = df_orig.copy() + expected.iloc[[0, 2, 3]] *= 2 + + idx = pd.IndexSlice + df = df_orig.copy() + df.loc[idx[:, :, 'Stock'], :] *= 2 + tm.assert_frame_equal(df, expected) + + df = df_orig.copy() + df.loc[idx[:, :, 'Stock'], 'price'] *= 2 + tm.assert_frame_equal(df, expected) + + def test_getitem_multiindex(self): + # GH 5725 the 'A' happens to be a valid Timestamp so the doesn't raise + # the appropriate error, only in PY3 of course! + index = MultiIndex(levels=[['D', 'B', 'C'], + [0, 26, 27, 37, 57, 67, 75, 82]], + labels=[[0, 0, 0, 1, 2, 2, 2, 2, 2, 2], + [1, 3, 4, 6, 0, 2, 2, 3, 5, 7]], + names=['tag', 'day']) + arr = np.random.randn(len(index), 1) + df = DataFrame(arr, index=index, columns=['val']) + result = df.val['D'] + expected = Series(arr.ravel()[0:3], name='val', index=Index( + [26, 37, 57], name='day')) + tm.assert_series_equal(result, expected) + + def f(): + df.val['A'] + + self.assertRaises(KeyError, f) + + def f(): + df.val['X'] + + self.assertRaises(KeyError, f) + + # A is treated as a special Timestamp + index = MultiIndex(levels=[['A', 'B', 'C'], + [0, 26, 27, 37, 57, 67, 75, 82]], + labels=[[0, 0, 0, 1, 2, 2, 2, 2, 2, 2], + [1, 3, 4, 6, 0, 2, 2, 3, 5, 7]], + names=['tag', 'day']) + df = DataFrame(arr, index=index, columns=['val']) + result = df.val['A'] + expected = Series(arr.ravel()[0:3], name='val', index=Index( + [26, 37, 57], name='day')) + tm.assert_series_equal(result, expected) + + def f(): + df.val['X'] + + self.assertRaises(KeyError, f) + + # GH 7866 + # multi-index slicing with missing indexers + idx = pd.MultiIndex.from_product([['A', 'B', 'C'], + ['foo', 'bar', 'baz']], + names=['one', 'two']) + s = pd.Series(np.arange(9, dtype='int64'), index=idx).sort_index() + + exp_idx = pd.MultiIndex.from_product([['A'], ['foo', 'bar', 'baz']], + names=['one', 'two']) + expected = pd.Series(np.arange(3, dtype='int64'), + index=exp_idx).sort_index() + + result = s.loc[['A']] + tm.assert_series_equal(result, expected) + result = s.loc[['A', 'D']] + tm.assert_series_equal(result, expected) + + # not any values found + self.assertRaises(KeyError, lambda: s.loc[['D']]) + + # empty ok + result = s.loc[[]] + expected = s.iloc[[]] + tm.assert_series_equal(result, expected) + + idx = pd.IndexSlice + expected = pd.Series([0, 3, 6], index=pd.MultiIndex.from_product( + [['A', 'B', 'C'], ['foo']], names=['one', 'two'])).sort_index() + + result = s.loc[idx[:, ['foo']]] + tm.assert_series_equal(result, expected) + result = s.loc[idx[:, ['foo', 'bah']]] + tm.assert_series_equal(result, expected) + + # GH 8737 + # empty indexer + multi_index = pd.MultiIndex.from_product((['foo', 'bar', 'baz'], + ['alpha', 'beta'])) + df = DataFrame( + np.random.randn(5, 6), index=range(5), columns=multi_index) + df = df.sort_index(level=0, axis=1) + + expected = DataFrame(index=range(5), + columns=multi_index.reindex([])[0]) + result1 = df.loc[:, ([], slice(None))] + result2 = df.loc[:, (['foo'], [])] + tm.assert_frame_equal(result1, expected) + tm.assert_frame_equal(result2, expected) + + # regression from < 0.14.0 + # GH 7914 + df = DataFrame([[np.mean, np.median], ['mean', 'median']], + columns=MultiIndex.from_tuples([('functs', 'mean'), + ('functs', 'median')]), + index=['function', 'name']) + result = df.loc['function', ('functs', 'mean')] + self.assertEqual(result, np.mean) + + def test_multiindex_assignment(self): + + # GH3777 part 2 + + # mixed dtype + df = DataFrame(np.random.randint(5, 10, size=9).reshape(3, 3), + columns=list('abc'), + index=[[4, 4, 8], [8, 10, 12]]) + df['d'] = np.nan + arr = np.array([0., 1.]) + + df.ix[4, 'd'] = arr + tm.assert_series_equal(df.ix[4, 'd'], + Series(arr, index=[8, 10], name='d')) + + # single dtype + df = DataFrame(np.random.randint(5, 10, size=9).reshape(3, 3), + columns=list('abc'), + index=[[4, 4, 8], [8, 10, 12]]) + + df.ix[4, 'c'] = arr + exp = Series(arr, index=[8, 10], name='c', dtype='float64') + tm.assert_series_equal(df.ix[4, 'c'], exp) + + # scalar ok + df.ix[4, 'c'] = 10 + exp = Series(10, index=[8, 10], name='c', dtype='float64') + tm.assert_series_equal(df.ix[4, 'c'], exp) + + # invalid assignments + def f(): + df.ix[4, 'c'] = [0, 1, 2, 3] + + self.assertRaises(ValueError, f) + + def f(): + df.ix[4, 'c'] = [0] + + self.assertRaises(ValueError, f) + + # groupby example + NUM_ROWS = 100 + NUM_COLS = 10 + col_names = ['A' + num for num in + map(str, np.arange(NUM_COLS).tolist())] + index_cols = col_names[:5] + + df = DataFrame(np.random.randint(5, size=(NUM_ROWS, NUM_COLS)), + dtype=np.int64, columns=col_names) + df = df.set_index(index_cols).sort_index() + grp = df.groupby(level=index_cols[:4]) + df['new_col'] = np.nan + + f_index = np.arange(5) + + def f(name, df2): + return Series(np.arange(df2.shape[0]), + name=df2.index.values[0]).reindex(f_index) + + # TODO(wesm): unused? + # new_df = pd.concat([f(name, df2) for name, df2 in grp], axis=1).T + + # we are actually operating on a copy here + # but in this case, that's ok + for name, df2 in grp: + new_vals = np.arange(df2.shape[0]) + df.ix[name, 'new_col'] = new_vals + + def test_multiindex_label_slicing_with_negative_step(self): + s = Series(np.arange(20), + MultiIndex.from_product([list('abcde'), np.arange(4)])) + SLC = pd.IndexSlice + + def assert_slices_equivalent(l_slc, i_slc): + tm.assert_series_equal(s.loc[l_slc], s.iloc[i_slc]) + tm.assert_series_equal(s[l_slc], s.iloc[i_slc]) + tm.assert_series_equal(s.ix[l_slc], s.iloc[i_slc]) + + assert_slices_equivalent(SLC[::-1], SLC[::-1]) + + assert_slices_equivalent(SLC['d'::-1], SLC[15::-1]) + assert_slices_equivalent(SLC[('d', )::-1], SLC[15::-1]) + + assert_slices_equivalent(SLC[:'d':-1], SLC[:11:-1]) + assert_slices_equivalent(SLC[:('d', ):-1], SLC[:11:-1]) + + assert_slices_equivalent(SLC['d':'b':-1], SLC[15:3:-1]) + assert_slices_equivalent(SLC[('d', ):'b':-1], SLC[15:3:-1]) + assert_slices_equivalent(SLC['d':('b', ):-1], SLC[15:3:-1]) + assert_slices_equivalent(SLC[('d', ):('b', ):-1], SLC[15:3:-1]) + assert_slices_equivalent(SLC['b':'d':-1], SLC[:0]) + + assert_slices_equivalent(SLC[('c', 2)::-1], SLC[10::-1]) + assert_slices_equivalent(SLC[:('c', 2):-1], SLC[:9:-1]) + assert_slices_equivalent(SLC[('e', 0):('c', 2):-1], SLC[16:9:-1]) + + def test_multiindex_slice_first_level(self): + # GH 12697 + freq = ['a', 'b', 'c', 'd'] + idx = pd.MultiIndex.from_product([freq, np.arange(500)]) + df = pd.DataFrame(list(range(2000)), index=idx, columns=['Test']) + df_slice = df.loc[pd.IndexSlice[:, 30:70], :] + result = df_slice.loc['a'] + expected = pd.DataFrame(list(range(30, 71)), + columns=['Test'], + index=range(30, 71)) + tm.assert_frame_equal(result, expected) + result = df_slice.loc['d'] + expected = pd.DataFrame(list(range(1530, 1571)), + columns=['Test'], + index=range(30, 71)) + tm.assert_frame_equal(result, expected) + + +class TestMultiIndexSlicers(tm.TestCase): + + def test_per_axis_per_level_getitem(self): + + # GH6134 + # example test case + ix = MultiIndex.from_product([_mklbl('A', 5), _mklbl('B', 7), _mklbl( + 'C', 4), _mklbl('D', 2)]) + df = DataFrame(np.arange(len(ix.get_values())), index=ix) + + result = df.loc[(slice('A1', 'A3'), slice(None), ['C1', 'C3']), :] + expected = df.loc[[tuple([a, b, c, d]) + for a, b, c, d in df.index.values + if (a == 'A1' or a == 'A2' or a == 'A3') and ( + c == 'C1' or c == 'C3')]] + tm.assert_frame_equal(result, expected) + + expected = df.loc[[tuple([a, b, c, d]) + for a, b, c, d in df.index.values + if (a == 'A1' or a == 'A2' or a == 'A3') and ( + c == 'C1' or c == 'C2' or c == 'C3')]] + result = df.loc[(slice('A1', 'A3'), slice(None), slice('C1', 'C3')), :] + tm.assert_frame_equal(result, expected) + + # test multi-index slicing with per axis and per index controls + index = MultiIndex.from_tuples([('A', 1), ('A', 2), + ('A', 3), ('B', 1)], + names=['one', 'two']) + columns = MultiIndex.from_tuples([('a', 'foo'), ('a', 'bar'), + ('b', 'foo'), ('b', 'bah')], + names=['lvl0', 'lvl1']) + + df = DataFrame( + np.arange(16, dtype='int64').reshape( + 4, 4), index=index, columns=columns) + df = df.sort_index(axis=0).sort_index(axis=1) + + # identity + result = df.loc[(slice(None), slice(None)), :] + tm.assert_frame_equal(result, df) + result = df.loc[(slice(None), slice(None)), (slice(None), slice(None))] + tm.assert_frame_equal(result, df) + result = df.loc[:, (slice(None), slice(None))] + tm.assert_frame_equal(result, df) + + # index + result = df.loc[(slice(None), [1]), :] + expected = df.iloc[[0, 3]] + tm.assert_frame_equal(result, expected) + + result = df.loc[(slice(None), 1), :] + expected = df.iloc[[0, 3]] + tm.assert_frame_equal(result, expected) + + # columns + result = df.loc[:, (slice(None), ['foo'])] + expected = df.iloc[:, [1, 3]] + tm.assert_frame_equal(result, expected) + + # both + result = df.loc[(slice(None), 1), (slice(None), ['foo'])] + expected = df.iloc[[0, 3], [1, 3]] + tm.assert_frame_equal(result, expected) + + result = df.loc['A', 'a'] + expected = DataFrame(dict(bar=[1, 5, 9], foo=[0, 4, 8]), + index=Index([1, 2, 3], name='two'), + columns=Index(['bar', 'foo'], name='lvl1')) + tm.assert_frame_equal(result, expected) + + result = df.loc[(slice(None), [1, 2]), :] + expected = df.iloc[[0, 1, 3]] + tm.assert_frame_equal(result, expected) + + # multi-level series + s = Series(np.arange(len(ix.get_values())), index=ix) + result = s.loc['A1':'A3', :, ['C1', 'C3']] + expected = s.loc[[tuple([a, b, c, d]) + for a, b, c, d in s.index.values + if (a == 'A1' or a == 'A2' or a == 'A3') and ( + c == 'C1' or c == 'C3')]] + tm.assert_series_equal(result, expected) + + # boolean indexers + result = df.loc[(slice(None), df.loc[:, ('a', 'bar')] > 5), :] + expected = df.iloc[[2, 3]] + tm.assert_frame_equal(result, expected) + + def f(): + df.loc[(slice(None), np.array([True, False])), :] + + self.assertRaises(ValueError, f) + + # ambiguous cases + # these can be multiply interpreted (e.g. in this case + # as df.loc[slice(None),[1]] as well + self.assertRaises(KeyError, lambda: df.loc[slice(None), [1]]) + + result = df.loc[(slice(None), [1]), :] + expected = df.iloc[[0, 3]] + tm.assert_frame_equal(result, expected) + + # not lexsorted + self.assertEqual(df.index.lexsort_depth, 2) + df = df.sort_index(level=1, axis=0) + self.assertEqual(df.index.lexsort_depth, 0) + with tm.assertRaisesRegexp( + UnsortedIndexError, + 'MultiIndex Slicing requires the index to be fully ' + r'lexsorted tuple len \(2\), lexsort depth \(0\)'): + df.loc[(slice(None), df.loc[:, ('a', 'bar')] > 5), :] + + def test_multiindex_slicers_non_unique(self): + + # GH 7106 + # non-unique mi index support + df = (DataFrame(dict(A=['foo', 'foo', 'foo', 'foo'], + B=['a', 'a', 'a', 'a'], + C=[1, 2, 1, 3], + D=[1, 2, 3, 4])) + .set_index(['A', 'B', 'C']).sort_index()) + self.assertFalse(df.index.is_unique) + expected = (DataFrame(dict(A=['foo', 'foo'], B=['a', 'a'], + C=[1, 1], D=[1, 3])) + .set_index(['A', 'B', 'C']).sort_index()) + result = df.loc[(slice(None), slice(None), 1), :] + tm.assert_frame_equal(result, expected) + + # this is equivalent of an xs expression + result = df.xs(1, level=2, drop_level=False) + tm.assert_frame_equal(result, expected) + + df = (DataFrame(dict(A=['foo', 'foo', 'foo', 'foo'], + B=['a', 'a', 'a', 'a'], + C=[1, 2, 1, 2], + D=[1, 2, 3, 4])) + .set_index(['A', 'B', 'C']).sort_index()) + self.assertFalse(df.index.is_unique) + expected = (DataFrame(dict(A=['foo', 'foo'], B=['a', 'a'], + C=[1, 1], D=[1, 3])) + .set_index(['A', 'B', 'C']).sort_index()) + result = df.loc[(slice(None), slice(None), 1), :] + self.assertFalse(result.index.is_unique) + tm.assert_frame_equal(result, expected) + + # GH12896 + # numpy-implementation dependent bug + ints = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 12, 13, 14, 14, 16, + 17, 18, 19, 200000, 200000] + n = len(ints) + idx = MultiIndex.from_arrays([['a'] * n, ints]) + result = Series([1] * n, index=idx) + result = result.sort_index() + result = result.loc[(slice(None), slice(100000))] + expected = Series([1] * (n - 2), index=idx[:-2]).sort_index() + tm.assert_series_equal(result, expected) + + def test_multiindex_slicers_datetimelike(self): + + # GH 7429 + # buggy/inconsistent behavior when slicing with datetime-like + import datetime + dates = [datetime.datetime(2012, 1, 1, 12, 12, 12) + + datetime.timedelta(days=i) for i in range(6)] + freq = [1, 2] + index = MultiIndex.from_product( + [dates, freq], names=['date', 'frequency']) + + df = DataFrame( + np.arange(6 * 2 * 4, dtype='int64').reshape( + -1, 4), index=index, columns=list('ABCD')) + + # multi-axis slicing + idx = pd.IndexSlice + expected = df.iloc[[0, 2, 4], [0, 1]] + result = df.loc[(slice(Timestamp('2012-01-01 12:12:12'), + Timestamp('2012-01-03 12:12:12')), + slice(1, 1)), slice('A', 'B')] + tm.assert_frame_equal(result, expected) + + result = df.loc[(idx[Timestamp('2012-01-01 12:12:12'):Timestamp( + '2012-01-03 12:12:12')], idx[1:1]), slice('A', 'B')] + tm.assert_frame_equal(result, expected) + + result = df.loc[(slice(Timestamp('2012-01-01 12:12:12'), + Timestamp('2012-01-03 12:12:12')), 1), + slice('A', 'B')] + tm.assert_frame_equal(result, expected) + + # with strings + result = df.loc[(slice('2012-01-01 12:12:12', '2012-01-03 12:12:12'), + slice(1, 1)), slice('A', 'B')] + tm.assert_frame_equal(result, expected) + + result = df.loc[(idx['2012-01-01 12:12:12':'2012-01-03 12:12:12'], 1), + idx['A', 'B']] + tm.assert_frame_equal(result, expected) + + def test_multiindex_slicers_edges(self): + # GH 8132 + # various edge cases + df = DataFrame( + {'A': ['A0'] * 5 + ['A1'] * 5 + ['A2'] * 5, + 'B': ['B0', 'B0', 'B1', 'B1', 'B2'] * 3, + 'DATE': ["2013-06-11", "2013-07-02", "2013-07-09", "2013-07-30", + "2013-08-06", "2013-06-11", "2013-07-02", "2013-07-09", + "2013-07-30", "2013-08-06", "2013-09-03", "2013-10-01", + "2013-07-09", "2013-08-06", "2013-09-03"], + 'VALUES': [22, 35, 14, 9, 4, 40, 18, 4, 2, 5, 1, 2, 3, 4, 2]}) + + df['DATE'] = pd.to_datetime(df['DATE']) + df1 = df.set_index(['A', 'B', 'DATE']) + df1 = df1.sort_index() + + # A1 - Get all values under "A0" and "A1" + result = df1.loc[(slice('A1')), :] + expected = df1.iloc[0:10] + tm.assert_frame_equal(result, expected) + + # A2 - Get all values from the start to "A2" + result = df1.loc[(slice('A2')), :] + expected = df1 + tm.assert_frame_equal(result, expected) + + # A3 - Get all values under "B1" or "B2" + result = df1.loc[(slice(None), slice('B1', 'B2')), :] + expected = df1.iloc[[2, 3, 4, 7, 8, 9, 12, 13, 14]] + tm.assert_frame_equal(result, expected) + + # A4 - Get all values between 2013-07-02 and 2013-07-09 + result = df1.loc[(slice(None), slice(None), + slice('20130702', '20130709')), :] + expected = df1.iloc[[1, 2, 6, 7, 12]] + tm.assert_frame_equal(result, expected) + + # B1 - Get all values in B0 that are also under A0, A1 and A2 + result = df1.loc[(slice('A2'), slice('B0')), :] + expected = df1.iloc[[0, 1, 5, 6, 10, 11]] + tm.assert_frame_equal(result, expected) + + # B2 - Get all values in B0, B1 and B2 (similar to what #2 is doing for + # the As) + result = df1.loc[(slice(None), slice('B2')), :] + expected = df1 + tm.assert_frame_equal(result, expected) + + # B3 - Get all values from B1 to B2 and up to 2013-08-06 + result = df1.loc[(slice(None), slice('B1', 'B2'), + slice('2013-08-06')), :] + expected = df1.iloc[[2, 3, 4, 7, 8, 9, 12, 13]] + tm.assert_frame_equal(result, expected) + + # B4 - Same as A4 but the start of the date slice is not a key. + # shows indexing on a partial selection slice + result = df1.loc[(slice(None), slice(None), + slice('20130701', '20130709')), :] + expected = df1.iloc[[1, 2, 6, 7, 12]] + tm.assert_frame_equal(result, expected) + + def test_per_axis_per_level_doc_examples(self): + + # test index maker + idx = pd.IndexSlice + + # from indexing.rst / advanced + index = MultiIndex.from_product([_mklbl('A', 4), _mklbl('B', 2), + _mklbl('C', 4), _mklbl('D', 2)]) + columns = MultiIndex.from_tuples([('a', 'foo'), ('a', 'bar'), + ('b', 'foo'), ('b', 'bah')], + names=['lvl0', 'lvl1']) + df = DataFrame(np.arange(len(index) * len(columns), dtype='int64') + .reshape((len(index), len(columns))), + index=index, columns=columns) + result = df.loc[(slice('A1', 'A3'), slice(None), ['C1', 'C3']), :] + expected = df.loc[[tuple([a, b, c, d]) + for a, b, c, d in df.index.values + if (a == 'A1' or a == 'A2' or a == 'A3') and ( + c == 'C1' or c == 'C3')]] + tm.assert_frame_equal(result, expected) + result = df.loc[idx['A1':'A3', :, ['C1', 'C3']], :] + tm.assert_frame_equal(result, expected) + + result = df.loc[(slice(None), slice(None), ['C1', 'C3']), :] + expected = df.loc[[tuple([a, b, c, d]) + for a, b, c, d in df.index.values + if (c == 'C1' or c == 'C3')]] + tm.assert_frame_equal(result, expected) + result = df.loc[idx[:, :, ['C1', 'C3']], :] + tm.assert_frame_equal(result, expected) + + # not sorted + def f(): + df.loc['A1', (slice(None), 'foo')] + + self.assertRaises(UnsortedIndexError, f) + df = df.sort_index(axis=1) + + # slicing + df.loc['A1', (slice(None), 'foo')] + df.loc[(slice(None), slice(None), ['C1', 'C3']), (slice(None), 'foo')] + + # setitem + df.loc(axis=0)[:, :, ['C1', 'C3']] = -10 + + def test_loc_axis_arguments(self): + + index = MultiIndex.from_product([_mklbl('A', 4), _mklbl('B', 2), + _mklbl('C', 4), _mklbl('D', 2)]) + columns = MultiIndex.from_tuples([('a', 'foo'), ('a', 'bar'), + ('b', 'foo'), ('b', 'bah')], + names=['lvl0', 'lvl1']) + df = DataFrame(np.arange(len(index) * len(columns), dtype='int64') + .reshape((len(index), len(columns))), + index=index, + columns=columns).sort_index().sort_index(axis=1) + + # axis 0 + result = df.loc(axis=0)['A1':'A3', :, ['C1', 'C3']] + expected = df.loc[[tuple([a, b, c, d]) + for a, b, c, d in df.index.values + if (a == 'A1' or a == 'A2' or a == 'A3') and ( + c == 'C1' or c == 'C3')]] + tm.assert_frame_equal(result, expected) + + result = df.loc(axis='index')[:, :, ['C1', 'C3']] + expected = df.loc[[tuple([a, b, c, d]) + for a, b, c, d in df.index.values + if (c == 'C1' or c == 'C3')]] + tm.assert_frame_equal(result, expected) + + # axis 1 + result = df.loc(axis=1)[:, 'foo'] + expected = df.loc[:, (slice(None), 'foo')] + tm.assert_frame_equal(result, expected) + + result = df.loc(axis='columns')[:, 'foo'] + expected = df.loc[:, (slice(None), 'foo')] + tm.assert_frame_equal(result, expected) + + # invalid axis + def f(): + df.loc(axis=-1)[:, :, ['C1', 'C3']] + + self.assertRaises(ValueError, f) + + def f(): + df.loc(axis=2)[:, :, ['C1', 'C3']] + + self.assertRaises(ValueError, f) + + def f(): + df.loc(axis='foo')[:, :, ['C1', 'C3']] + + self.assertRaises(ValueError, f) + + def test_per_axis_per_level_setitem(self): + + # test index maker + idx = pd.IndexSlice + + # test multi-index slicing with per axis and per index controls + index = MultiIndex.from_tuples([('A', 1), ('A', 2), + ('A', 3), ('B', 1)], + names=['one', 'two']) + columns = MultiIndex.from_tuples([('a', 'foo'), ('a', 'bar'), + ('b', 'foo'), ('b', 'bah')], + names=['lvl0', 'lvl1']) + + df_orig = DataFrame( + np.arange(16, dtype='int64').reshape( + 4, 4), index=index, columns=columns) + df_orig = df_orig.sort_index(axis=0).sort_index(axis=1) + + # identity + df = df_orig.copy() + df.loc[(slice(None), slice(None)), :] = 100 + expected = df_orig.copy() + expected.iloc[:, :] = 100 + tm.assert_frame_equal(df, expected) + + df = df_orig.copy() + df.loc(axis=0)[:, :] = 100 + expected = df_orig.copy() + expected.iloc[:, :] = 100 + tm.assert_frame_equal(df, expected) + + df = df_orig.copy() + df.loc[(slice(None), slice(None)), (slice(None), slice(None))] = 100 + expected = df_orig.copy() + expected.iloc[:, :] = 100 + tm.assert_frame_equal(df, expected) + + df = df_orig.copy() + df.loc[:, (slice(None), slice(None))] = 100 + expected = df_orig.copy() + expected.iloc[:, :] = 100 + tm.assert_frame_equal(df, expected) + + # index + df = df_orig.copy() + df.loc[(slice(None), [1]), :] = 100 + expected = df_orig.copy() + expected.iloc[[0, 3]] = 100 + tm.assert_frame_equal(df, expected) + + df = df_orig.copy() + df.loc[(slice(None), 1), :] = 100 + expected = df_orig.copy() + expected.iloc[[0, 3]] = 100 + tm.assert_frame_equal(df, expected) + + df = df_orig.copy() + df.loc(axis=0)[:, 1] = 100 + expected = df_orig.copy() + expected.iloc[[0, 3]] = 100 + tm.assert_frame_equal(df, expected) + + # columns + df = df_orig.copy() + df.loc[:, (slice(None), ['foo'])] = 100 + expected = df_orig.copy() + expected.iloc[:, [1, 3]] = 100 + tm.assert_frame_equal(df, expected) + + # both + df = df_orig.copy() + df.loc[(slice(None), 1), (slice(None), ['foo'])] = 100 + expected = df_orig.copy() + expected.iloc[[0, 3], [1, 3]] = 100 + tm.assert_frame_equal(df, expected) + + df = df_orig.copy() + df.loc[idx[:, 1], idx[:, ['foo']]] = 100 + expected = df_orig.copy() + expected.iloc[[0, 3], [1, 3]] = 100 + tm.assert_frame_equal(df, expected) + + df = df_orig.copy() + df.loc['A', 'a'] = 100 + expected = df_orig.copy() + expected.iloc[0:3, 0:2] = 100 + tm.assert_frame_equal(df, expected) + + # setting with a list-like + df = df_orig.copy() + df.loc[(slice(None), 1), (slice(None), ['foo'])] = np.array( + [[100, 100], [100, 100]], dtype='int64') + expected = df_orig.copy() + expected.iloc[[0, 3], [1, 3]] = 100 + tm.assert_frame_equal(df, expected) + + # not enough values + df = df_orig.copy() + + def f(): + df.loc[(slice(None), 1), (slice(None), ['foo'])] = np.array( + [[100], [100, 100]], dtype='int64') + + self.assertRaises(ValueError, f) + + def f(): + df.loc[(slice(None), 1), (slice(None), ['foo'])] = np.array( + [100, 100, 100, 100], dtype='int64') + + self.assertRaises(ValueError, f) + + # with an alignable rhs + df = df_orig.copy() + df.loc[(slice(None), 1), (slice(None), ['foo'])] = df.loc[(slice( + None), 1), (slice(None), ['foo'])] * 5 + expected = df_orig.copy() + expected.iloc[[0, 3], [1, 3]] = expected.iloc[[0, 3], [1, 3]] * 5 + tm.assert_frame_equal(df, expected) + + df = df_orig.copy() + df.loc[(slice(None), 1), (slice(None), ['foo'])] *= df.loc[(slice( + None), 1), (slice(None), ['foo'])] + expected = df_orig.copy() + expected.iloc[[0, 3], [1, 3]] *= expected.iloc[[0, 3], [1, 3]] + tm.assert_frame_equal(df, expected) + + rhs = df_orig.loc[(slice(None), 1), (slice(None), ['foo'])].copy() + rhs.loc[:, ('c', 'bah')] = 10 + df = df_orig.copy() + df.loc[(slice(None), 1), (slice(None), ['foo'])] *= rhs + expected = df_orig.copy() + expected.iloc[[0, 3], [1, 3]] *= expected.iloc[[0, 3], [1, 3]] + tm.assert_frame_equal(df, expected) + + +class TestMultiIndexPanel(tm.TestCase): + + def test_iloc_getitem_panel_multiindex(self): + # GH 7199 + # Panel with multi-index + multi_index = pd.MultiIndex.from_tuples([('ONE', 'one'), + ('TWO', 'two'), + ('THREE', 'three')], + names=['UPPER', 'lower']) + + simple_index = [x[0] for x in multi_index] + wd1 = Panel(items=['First', 'Second'], major_axis=['a', 'b', 'c', 'd'], + minor_axis=multi_index) + + wd2 = Panel(items=['First', 'Second'], major_axis=['a', 'b', 'c', 'd'], + minor_axis=simple_index) + + expected1 = wd1['First'].iloc[[True, True, True, False], [0, 2]] + result1 = wd1.iloc[0, [True, True, True, False], [0, 2]] # WRONG + tm.assert_frame_equal(result1, expected1) + + expected2 = wd2['First'].iloc[[True, True, True, False], [0, 2]] + result2 = wd2.iloc[0, [True, True, True, False], [0, 2]] + tm.assert_frame_equal(result2, expected2) + + expected1 = DataFrame(index=['a'], columns=multi_index, + dtype='float64') + result1 = wd1.iloc[0, [0], [0, 1, 2]] + tm.assert_frame_equal(result1, expected1) + + expected2 = DataFrame(index=['a'], columns=simple_index, + dtype='float64') + result2 = wd2.iloc[0, [0], [0, 1, 2]] + tm.assert_frame_equal(result2, expected2) + + # GH 7516 + mi = MultiIndex.from_tuples([(0, 'x'), (1, 'y'), (2, 'z')]) + p = Panel(np.arange(3 * 3 * 3, dtype='int64').reshape(3, 3, 3), + items=['a', 'b', 'c'], major_axis=mi, + minor_axis=['u', 'v', 'w']) + result = p.iloc[:, 1, 0] + expected = Series([3, 12, 21], index=['a', 'b', 'c'], name='u') + tm.assert_series_equal(result, expected) + + result = p.loc[:, (1, 'y'), 'u'] + tm.assert_series_equal(result, expected) + + def test_panel_setitem_with_multiindex(self): + + # 10360 + # failing with a multi-index + arr = np.array([[[1, 2, 3], [0, 0, 0]], [[0, 0, 0], [0, 0, 0]]], + dtype=np.float64) + + # reg index + axes = dict(items=['A', 'B'], major_axis=[0, 1], + minor_axis=['X', 'Y', 'Z']) + p1 = Panel(0., **axes) + p1.iloc[0, 0, :] = [1, 2, 3] + expected = Panel(arr, **axes) + tm.assert_panel_equal(p1, expected) + + # multi-indexes + axes['items'] = pd.MultiIndex.from_tuples([('A', 'a'), ('B', 'b')]) + p2 = Panel(0., **axes) + p2.iloc[0, 0, :] = [1, 2, 3] + expected = Panel(arr, **axes) + tm.assert_panel_equal(p2, expected) + + axes['major_axis'] = pd.MultiIndex.from_tuples([('A', 1), ('A', 2)]) + p3 = Panel(0., **axes) + p3.iloc[0, 0, :] = [1, 2, 3] + expected = Panel(arr, **axes) + tm.assert_panel_equal(p3, expected) + + axes['minor_axis'] = pd.MultiIndex.from_product([['X'], range(3)]) + p4 = Panel(0., **axes) + p4.iloc[0, 0, :] = [1, 2, 3] + expected = Panel(arr, **axes) + tm.assert_panel_equal(p4, expected) + + arr = np.array( + [[[1, 0, 0], [2, 0, 0]], [[0, 0, 0], [0, 0, 0]]], dtype=np.float64) + p5 = Panel(0., **axes) + p5.iloc[0, :, 0] = [1, 2] + expected = Panel(arr, **axes) + tm.assert_panel_equal(p5, expected) diff --git a/pandas/tests/indexing/test_panel.py b/pandas/tests/indexing/test_panel.py new file mode 100644 index 0000000000000..5ec3076af599a --- /dev/null +++ b/pandas/tests/indexing/test_panel.py @@ -0,0 +1,209 @@ +import numpy as np +from pandas.util import testing as tm +from pandas import Panel, date_range, DataFrame + + +class TestPanel(tm.TestCase): + + def test_iloc_getitem_panel(self): + + # GH 7189 + p = Panel(np.arange(4 * 3 * 2).reshape(4, 3, 2), + items=['A', 'B', 'C', 'D'], + major_axis=['a', 'b', 'c'], + minor_axis=['one', 'two']) + + result = p.iloc[1] + expected = p.loc['B'] + tm.assert_frame_equal(result, expected) + + result = p.iloc[1, 1] + expected = p.loc['B', 'b'] + tm.assert_series_equal(result, expected) + + result = p.iloc[1, 1, 1] + expected = p.loc['B', 'b', 'two'] + self.assertEqual(result, expected) + + # slice + result = p.iloc[1:3] + expected = p.loc[['B', 'C']] + tm.assert_panel_equal(result, expected) + + result = p.iloc[:, 0:2] + expected = p.loc[:, ['a', 'b']] + tm.assert_panel_equal(result, expected) + + # list of integers + result = p.iloc[[0, 2]] + expected = p.loc[['A', 'C']] + tm.assert_panel_equal(result, expected) + + # neg indicies + result = p.iloc[[-1, 1], [-1, 1]] + expected = p.loc[['D', 'B'], ['c', 'b']] + tm.assert_panel_equal(result, expected) + + # dups indicies + result = p.iloc[[-1, -1, 1], [-1, 1]] + expected = p.loc[['D', 'D', 'B'], ['c', 'b']] + tm.assert_panel_equal(result, expected) + + # combined + result = p.iloc[0, [True, True], [0, 1]] + expected = p.loc['A', ['a', 'b'], ['one', 'two']] + tm.assert_frame_equal(result, expected) + + # out-of-bounds exception + self.assertRaises(IndexError, p.iloc.__getitem__, tuple([10, 5])) + + def f(): + p.iloc[0, [True, True], [0, 1, 2]] + + self.assertRaises(IndexError, f) + + # trying to use a label + self.assertRaises(ValueError, p.iloc.__getitem__, tuple(['j', 'D'])) + + # GH + p = Panel( + np.random.rand(4, 3, 2), items=['A', 'B', 'C', 'D'], + major_axis=['U', 'V', 'W'], minor_axis=['X', 'Y']) + expected = p['A'] + + result = p.iloc[0, :, :] + tm.assert_frame_equal(result, expected) + + result = p.iloc[0, [True, True, True], :] + tm.assert_frame_equal(result, expected) + + result = p.iloc[0, [True, True, True], [0, 1]] + tm.assert_frame_equal(result, expected) + + def f(): + p.iloc[0, [True, True, True], [0, 1, 2]] + + self.assertRaises(IndexError, f) + + def f(): + p.iloc[0, [True, True, True], [2]] + + self.assertRaises(IndexError, f) + + def test_iloc_panel_issue(self): + + # GH 3617 + p = Panel(np.random.randn(4, 4, 4)) + + self.assertEqual(p.iloc[:3, :3, :3].shape, (3, 3, 3)) + self.assertEqual(p.iloc[1, :3, :3].shape, (3, 3)) + self.assertEqual(p.iloc[:3, 1, :3].shape, (3, 3)) + self.assertEqual(p.iloc[:3, :3, 1].shape, (3, 3)) + self.assertEqual(p.iloc[1, 1, :3].shape, (3, )) + self.assertEqual(p.iloc[1, :3, 1].shape, (3, )) + self.assertEqual(p.iloc[:3, 1, 1].shape, (3, )) + + def test_panel_getitem(self): + # GH4016, date selection returns a frame when a partial string + # selection + ind = date_range(start="2000", freq="D", periods=1000) + df = DataFrame( + np.random.randn( + len(ind), 5), index=ind, columns=list('ABCDE')) + panel = Panel(dict([('frame_' + c, df) for c in list('ABC')])) + + test2 = panel.ix[:, "2002":"2002-12-31"] + test1 = panel.ix[:, "2002"] + tm.assert_panel_equal(test1, test2) + + # GH8710 + # multi-element getting with a list + panel = tm.makePanel() + + expected = panel.iloc[[0, 1]] + + result = panel.loc[['ItemA', 'ItemB']] + tm.assert_panel_equal(result, expected) + + result = panel.loc[['ItemA', 'ItemB'], :, :] + tm.assert_panel_equal(result, expected) + + result = panel[['ItemA', 'ItemB']] + tm.assert_panel_equal(result, expected) + + result = panel.loc['ItemA':'ItemB'] + tm.assert_panel_equal(result, expected) + + result = panel.ix['ItemA':'ItemB'] + tm.assert_panel_equal(result, expected) + + result = panel.ix[['ItemA', 'ItemB']] + tm.assert_panel_equal(result, expected) + + # with an object-like + # GH 9140 + class TestObject: + + def __str__(self): + return "TestObject" + + obj = TestObject() + + p = Panel(np.random.randn(1, 5, 4), items=[obj], + major_axis=date_range('1/1/2000', periods=5), + minor_axis=['A', 'B', 'C', 'D']) + + expected = p.iloc[0] + result = p[obj] + tm.assert_frame_equal(result, expected) + + def test_panel_setitem(self): + + # GH 7763 + # loc and setitem have setting differences + np.random.seed(0) + index = range(3) + columns = list('abc') + + panel = Panel({'A': DataFrame(np.random.randn(3, 3), + index=index, columns=columns), + 'B': DataFrame(np.random.randn(3, 3), + index=index, columns=columns), + 'C': DataFrame(np.random.randn(3, 3), + index=index, columns=columns)}) + + replace = DataFrame(np.eye(3, 3), index=range(3), columns=columns) + expected = Panel({'A': replace, 'B': replace, 'C': replace}) + + p = panel.copy() + for idx in list('ABC'): + p[idx] = replace + tm.assert_panel_equal(p, expected) + + p = panel.copy() + for idx in list('ABC'): + p.loc[idx, :, :] = replace + tm.assert_panel_equal(p, expected) + + def test_panel_assignment(self): + # GH3777 + wp = Panel(np.random.randn(2, 5, 4), items=['Item1', 'Item2'], + major_axis=date_range('1/1/2000', periods=5), + minor_axis=['A', 'B', 'C', 'D']) + wp2 = Panel(np.random.randn(2, 5, 4), items=['Item1', 'Item2'], + major_axis=date_range('1/1/2000', periods=5), + minor_axis=['A', 'B', 'C', 'D']) + + # TODO: unused? + # expected = wp.loc[['Item1', 'Item2'], :, ['A', 'B']] + + def f(): + wp.loc[['Item1', 'Item2'], :, ['A', 'B']] = wp2.loc[ + ['Item1', 'Item2'], :, ['A', 'B']] + + self.assertRaises(NotImplementedError, f) + + # to_assign = wp2.loc[['Item1', 'Item2'], :, ['A', 'B']] + # wp.loc[['Item1', 'Item2'], :, ['A', 'B']] = to_assign + # result = wp.loc[['Item1', 'Item2'], :, ['A', 'B']] + # tm.assert_panel_equal(result,expected) diff --git a/pandas/tests/indexing/test_timedelta.py b/pandas/tests/indexing/test_timedelta.py new file mode 100644 index 0000000000000..e5ccd72cac20a --- /dev/null +++ b/pandas/tests/indexing/test_timedelta.py @@ -0,0 +1,21 @@ +import pandas as pd +from pandas.util import testing as tm + + +class TestTimedeltaIndexing(tm.TestCase): + + def test_boolean_indexing(self): + # GH 14946 + df = pd.DataFrame({'x': range(10)}) + df.index = pd.to_timedelta(range(10), unit='s') + conditions = [df['x'] > 3, df['x'] == 3, df['x'] < 3] + expected_data = [[0, 1, 2, 3, 10, 10, 10, 10, 10, 10], + [0, 1, 2, 10, 4, 5, 6, 7, 8, 9], + [10, 10, 10, 3, 4, 5, 6, 7, 8, 9]] + for cond, data in zip(conditions, expected_data): + result = df.copy() + result.loc[cond, 'x'] = 10 + expected = pd.DataFrame(data, + index=pd.to_timedelta(range(10), unit='s'), + columns=['x']) + tm.assert_frame_equal(expected, result) From 2100a3af599449efd3edf3dc020f60f8e6436227 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Sat, 11 Feb 2017 23:02:13 +0100 Subject: [PATCH 039/933] DOC: fix py3 compat (change lost in FAQ-gotchas merge) --- doc/source/advanced.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/advanced.rst b/doc/source/advanced.rst index 21ae9f1eb8409..8833d73cb0a84 100644 --- a/doc/source/advanced.rst +++ b/doc/source/advanced.rst @@ -880,7 +880,7 @@ normal Python ``list``. Monotonicity of an index can be tested with the ``is_mon .. ipython:: python - df = pd.DataFrame(index=[2,3,3,4,5], columns=['data'], data=range(5)) + df = pd.DataFrame(index=[2,3,3,4,5], columns=['data'], data=list(range(5))) df.index.is_monotonic_increasing # no rows 0 or 1, but still returns rows 2, 3 (both of them), and 4: @@ -894,7 +894,7 @@ On the other hand, if the index is not monotonic, then both slice bounds must be .. ipython:: python - df = pd.DataFrame(index=[2,3,1,4,3,5], columns=['data'], data=range(6)) + df = pd.DataFrame(index=[2,3,1,4,3,5], columns=['data'], data=list(range(6))) df.index.is_monotonic_increasing # OK because 2 and 4 are in the index From 61a243b858fe41aac81d20a75e5f1e86baefd868 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sat, 11 Feb 2017 18:56:19 -0500 Subject: [PATCH 040/933] TST: remove nose TST: raise nose.SkipTest -> pytest.skip TST: remove KnownFailure (unused), should be replaced by pytest.xfail anyhow xref #15341 Author: Jeff Reback Closes #15368 from jreback/skip and squashes the following commits: afdb5f9 [Jeff Reback] TST: raise nose.SkipTest -> pytest.skip --- .travis.yml | 2 +- ci/install_test.sh | 1 - ci/install_travis.sh | 13 +- ci/lint.sh | 2 +- ci/requirements_all.txt | 2 +- ci/requirements_dev.txt | 2 +- ci/script.sh | 6 +- doc/source/contributing.rst | 2 +- pandas/computation/tests/test_compat.py | 8 +- pandas/computation/tests/test_eval.py | 19 ++- pandas/io/tests/json/test_pandas.py | 6 +- pandas/io/tests/json/test_ujson.py | 12 +- pandas/io/tests/parser/c_parser_only.py | 4 +- pandas/io/tests/parser/common.py | 8 +- pandas/io/tests/parser/compression.py | 8 +- pandas/io/tests/parser/converters.py | 6 +- pandas/io/tests/parser/parse_dates.py | 8 +- pandas/io/tests/parser/python_parser_only.py | 6 +- pandas/io/tests/parser/test_network.py | 4 +- pandas/io/tests/parser/test_read_fwf.py | 8 +- pandas/io/tests/parser/usecols.py | 4 +- pandas/io/tests/test_clipboard.py | 7 +- pandas/io/tests/test_excel.py | 42 +++--- pandas/io/tests/test_feather.py | 9 +- pandas/io/tests/test_gbq.py | 26 ++-- pandas/io/tests/test_html.py | 12 +- pandas/io/tests/test_packers.py | 28 ++-- pandas/io/tests/test_pickle.py | 4 +- pandas/io/tests/test_pytables.py | 29 ++-- pandas/io/tests/test_sql.py | 38 ++--- pandas/io/tests/test_stata.py | 4 +- pandas/sparse/tests/test_indexing.py | 2 +- pandas/sparse/tests/test_libsparse.py | 4 +- pandas/sparse/tests/test_series.py | 4 +- pandas/tests/formats/test_format.py | 12 +- pandas/tests/formats/test_printing.py | 2 +- pandas/tests/formats/test_style.py | 18 +-- pandas/tests/frame/test_analytics.py | 6 +- pandas/tests/frame/test_constructors.py | 6 +- pandas/tests/frame/test_missing.py | 4 +- pandas/tests/frame/test_operators.py | 4 +- pandas/tests/frame/test_quantile.py | 6 +- pandas/tests/frame/test_query_eval.py | 8 +- pandas/tests/groupby/test_misc.py | 4 +- pandas/tests/indexes/datetimes/test_tools.py | 6 +- pandas/tests/indexes/test_multi.py | 6 +- pandas/tests/indexing/test_coercion.py | 12 +- pandas/tests/plotting/common.py | 4 +- pandas/tests/plotting/test_boxplot_method.py | 4 +- pandas/tests/plotting/test_datetimelike.py | 6 +- pandas/tests/plotting/test_frame.py | 4 +- pandas/tests/series/test_analytics.py | 16 +- pandas/tests/series/test_missing.py | 8 +- pandas/tests/series/test_quantile.py | 8 +- pandas/tests/test_base.py | 4 +- pandas/tests/test_expressions.py | 96 ++++++------ pandas/tests/test_generic.py | 6 +- pandas/tests/test_internals.py | 4 +- pandas/tests/test_msgpack/test_unpack.py | 4 +- pandas/tests/test_multilevel.py | 4 +- pandas/tests/test_panel.py | 12 +- pandas/tests/test_panel4d.py | 4 +- pandas/tests/test_testing.py | 33 +--- pandas/tests/test_window.py | 16 +- pandas/tests/tseries/test_converter.py | 7 +- pandas/tests/tseries/test_offsets.py | 8 +- pandas/tools/tests/test_util.py | 16 +- pandas/util/decorators.py | 59 -------- pandas/util/print_versions.py | 2 +- pandas/util/testing.py | 150 ++++++++----------- setup.cfg | 2 +- 71 files changed, 389 insertions(+), 522 deletions(-) diff --git a/.travis.yml b/.travis.yml index b38c99e3a5be9..2ff5d508d0371 100644 --- a/.travis.yml +++ b/.travis.yml @@ -331,5 +331,5 @@ after_script: - echo "after_script start" - ci/install_test.sh - source activate pandas && python -c "import pandas; pandas.show_versions();" - - ci/print_skipped.py /tmp/nosetests.xml + - ci/print_skipped.py /tmp/pytest.xml - echo "after_script done" diff --git a/ci/install_test.sh b/ci/install_test.sh index cbb84d8fa4b65..9ace633d7f39d 100755 --- a/ci/install_test.sh +++ b/ci/install_test.sh @@ -8,7 +8,6 @@ if [ "$INSTALL_TEST" ]; then conda uninstall cython || exit 1 python "$TRAVIS_BUILD_DIR"/setup.py sdist --formats=zip,gztar || exit 1 pip install "$TRAVIS_BUILD_DIR"/dist/*tar.gz || exit 1 - # nosetests --exe -A "$TEST_ARGS" pandas/tests/test_series.py --with-xunit --xunit-file=/tmp/nosetests_install.xml pytest pandas/tests/test_series.py --junitxml=/tmp/pytest_install.xml else echo "Skipping installation test." diff --git a/ci/install_travis.sh b/ci/install_travis.sh index f65176fb1147c..ad804b96a0d82 100755 --- a/ci/install_travis.sh +++ b/ci/install_travis.sh @@ -92,12 +92,7 @@ if [ -e ${INSTALL} ]; then time bash $INSTALL || exit 1 else # create new env - time conda create -n pandas python=$PYTHON_VERSION nose pytest || exit 1 - - if [ "$LINT" ]; then - conda install flake8 - pip install cpplint - fi + time conda create -n pandas python=$PYTHON_VERSION pytest || exit 1 fi # build deps @@ -116,6 +111,12 @@ fi source activate pandas +pip install pytest-xdist +if [ "$LINT" ]; then + conda install flake8 + pip install cpplint +fi + if [ "$COVERAGE" ]; then pip install coverage pytest-cov fi diff --git a/ci/lint.sh b/ci/lint.sh index 2cbfdadf486b8..2ffc68e5eb139 100755 --- a/ci/lint.sh +++ b/ci/lint.sh @@ -55,7 +55,7 @@ if [ "$LINT" ]; then echo "Linting *.c and *.h DONE" echo "Check for invalid testing" - grep -r -E --include '*.py' --exclude nosetester.py --exclude testing.py '(numpy|np)\.testing' pandas + grep -r -E --include '*.py' --exclude testing.py '(numpy|np)\.testing' pandas if [ $? = "0" ]; then RET=1 fi diff --git a/ci/requirements_all.txt b/ci/requirements_all.txt index b64143fcd4ecd..4ff80a478f247 100644 --- a/ci/requirements_all.txt +++ b/ci/requirements_all.txt @@ -1,6 +1,6 @@ -nose pytest pytest-cov +pytest-xdist flake8 sphinx ipython diff --git a/ci/requirements_dev.txt b/ci/requirements_dev.txt index b8af9d035de98..b0a8adc8df5cb 100644 --- a/ci/requirements_dev.txt +++ b/ci/requirements_dev.txt @@ -2,7 +2,7 @@ python-dateutil pytz numpy cython -nose pytest pytest-cov +pytest-xdist flake8 diff --git a/ci/script.sh b/ci/script.sh index 3eac3002d6805..c52fa0fdb33a3 100755 --- a/ci/script.sh +++ b/ci/script.sh @@ -18,10 +18,10 @@ if [ -n "$LOCALE_OVERRIDE" ]; then fi if [ "$BUILD_TEST" ]; then - echo "We are not running nosetests as this is simply a build test." + echo "We are not running pytest as this is simply a build test." elif [ "$COVERAGE" ]; then - echo pytest -s --cov=pandas --cov-report xml:/tmp/nosetests.xml $TEST_ARGS pandas - pytest -s --cov=pandas --cov-report xml:/tmp/nosetests.xml $TEST_ARGS pandas + echo pytest -s --cov=pandas --cov-report xml:/tmp/pytest.xml $TEST_ARGS pandas + pytest -s --cov=pandas --cov-report xml:/tmp/pytest.xml $TEST_ARGS pandas else echo pytest $TEST_ARGS pandas pytest $TEST_ARGS pandas # TODO: doctest diff --git a/doc/source/contributing.rst b/doc/source/contributing.rst index 3ef9ed8962a23..5c2bb9b73d618 100644 --- a/doc/source/contributing.rst +++ b/doc/source/contributing.rst @@ -734,7 +734,7 @@ gbq integration tests on a forked repository: the status by visiting your Travis branches page which exists at the following location: https://travis-ci.org/your-user-name/pandas/branches . Click on a build job for your branch. Expand the following line in the - build log: ``ci/print_skipped.py /tmp/nosetests.xml`` . Search for the + build log: ``ci/print_skipped.py /tmp/pytest.xml`` . Search for the term ``test_gbq`` and confirm that gbq integration tests are not skipped. Running the vbench performance test suite (phasing out) diff --git a/pandas/computation/tests/test_compat.py b/pandas/computation/tests/test_compat.py index 900dd2c28b4c5..599d0c10336dc 100644 --- a/pandas/computation/tests/test_compat.py +++ b/pandas/computation/tests/test_compat.py @@ -1,7 +1,7 @@ # flake8: noqa -import nose +import pytest from itertools import product from distutils.version import LooseVersion @@ -31,7 +31,7 @@ def test_compat(): assert _NUMEXPR_INSTALLED except ImportError: - raise nose.SkipTest("not testing numexpr version compat") + pytest.skip("not testing numexpr version compat") def test_invalid_numexpr_version(): @@ -49,14 +49,14 @@ def testit(): try: import numexpr as ne except ImportError: - raise nose.SkipTest("no numexpr") + pytest.skip("no numexpr") else: if ne.__version__ < LooseVersion('2.1'): with tm.assertRaisesRegexp(ImportError, "'numexpr' version is " ".+, must be >= 2.1"): testit() elif ne.__version__ == LooseVersion('2.4.4'): - raise nose.SkipTest("numexpr version==2.4.4") + pytest.skip("numexpr version==2.4.4") else: testit() else: diff --git a/pandas/computation/tests/test_eval.py b/pandas/computation/tests/test_eval.py index a4bb81ce7263c..ada714c8ac52e 100644 --- a/pandas/computation/tests/test_eval.py +++ b/pandas/computation/tests/test_eval.py @@ -6,8 +6,7 @@ from itertools import product from distutils.version import LooseVersion -import nose -from nose.tools import assert_raises +import pytest from numpy.random import randn, rand, randint import numpy as np @@ -319,7 +318,7 @@ def get_expected_pow_result(self, lhs, rhs): except ValueError as e: if str(e).startswith('negative number cannot be raised to a fractional power'): if self.engine == 'python': - raise nose.SkipTest(str(e)) + pytest.skip(str(e)) else: expected = np.nan else: @@ -1174,13 +1173,15 @@ def test_bool_ops_with_constants(self): def test_panel_fails(self): x = Panel(randn(3, 4, 5)) y = Series(randn(10)) - assert_raises(NotImplementedError, self.eval, 'x + y', + with pytest.raises(NotImplementedError): + self.eval('x + y', local_dict={'x': x, 'y': y}) def test_4d_ndarray_fails(self): x = randn(3, 4, 5, 6) y = Series(randn(10)) - assert_raises(NotImplementedError, self.eval, 'x + y', + with pytest.raises(NotImplementedError): + self.eval('x + y', local_dict={'x': x, 'y': y}) def test_constant(self): @@ -1705,7 +1706,7 @@ def test_result_types(self): def test_result_types2(self): # xref https://github.com/pandas-dev/pandas/issues/12293 - raise nose.SkipTest("unreliable tests on complex128") + pytest.skip("unreliable tests on complex128") # Did not test complex64 because DataFrame is converting it to # complex128. Due to https://github.com/pandas-dev/pandas/issues/10952 @@ -1822,7 +1823,8 @@ def check_disallowed_nodes(engine, parser): inst = VisitorClass('x + 1', engine, parser) for ops in uns_ops: - assert_raises(NotImplementedError, getattr(inst, ops)) + with pytest.raises(NotImplementedError): + getattr(inst, ops)() def test_disallowed_nodes(): @@ -1833,7 +1835,8 @@ def test_disallowed_nodes(): def check_syntax_error_exprs(engine, parser): tm.skip_if_no_ne(engine) e = 's +' - assert_raises(SyntaxError, pd.eval, e, engine=engine, parser=parser) + with pytest.raises(SyntaxError): + pd.eval(e, engine=engine, parser=parser) def test_syntax_error_exprs(): diff --git a/pandas/io/tests/json/test_pandas.py b/pandas/io/tests/json/test_pandas.py index 440f5c13d5121..c298b3841096c 100644 --- a/pandas/io/tests/json/test_pandas.py +++ b/pandas/io/tests/json/test_pandas.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # pylint: disable-msg=W0612,E1101 -import nose +import pytest from pandas.compat import range, lrange, StringIO, OrderedDict import os @@ -1009,8 +1009,8 @@ def test_latin_encoding(self): return # GH 13774 - raise nose.SkipTest("encoding not implemented in .to_json(), " - "xref #13774") + pytest.skip("encoding not implemented in .to_json(), " + "xref #13774") values = [[b'E\xc9, 17', b'', b'a', b'b', b'c'], [b'E\xc9, 17', b'a', b'b', b'c'], diff --git a/pandas/io/tests/json/test_ujson.py b/pandas/io/tests/json/test_ujson.py index 3da61b7696fdc..6a986710ae444 100644 --- a/pandas/io/tests/json/test_ujson.py +++ b/pandas/io/tests/json/test_ujson.py @@ -7,7 +7,7 @@ except ImportError: import simplejson as json import math -import nose +import pytest import platform import sys import time @@ -28,7 +28,7 @@ def _skip_if_python_ver(skip_major, skip_minor=None): major, minor = sys.version_info[:2] if major == skip_major and (skip_minor is None or minor == skip_minor): - raise nose.SkipTest("skipping Python version %d.%d" % (major, minor)) + pytest.skip("skipping Python version %d.%d" % (major, minor)) json_unicode = (json.dumps if compat.PY3 @@ -95,7 +95,7 @@ def test_encodeNonCLocale(self): try: locale.setlocale(locale.LC_NUMERIC, 'Italian_Italy') except: - raise nose.SkipTest('Could not set locale for testing') + pytest.skip('Could not set locale for testing') self.assertEqual(ujson.loads(ujson.dumps(4.78e60)), 4.78e60) self.assertEqual(ujson.loads('4.78', precise_float=True), 4.78) locale.setlocale(locale.LC_NUMERIC, savedlocale) @@ -113,7 +113,7 @@ def test_decimalDecodeTestPrecise(self): def test_encodeDoubleTinyExponential(self): if compat.is_platform_windows() and not compat.PY3: - raise nose.SkipTest("buggy on win-64 for py2") + pytest.skip("buggy on win-64 for py2") num = 1e-40 self.assertEqual(num, ujson.decode(ujson.encode(num))) @@ -393,8 +393,8 @@ def test_nat(self): def test_npy_nat(self): from distutils.version import LooseVersion if LooseVersion(np.__version__) < '1.7.0': - raise nose.SkipTest("numpy version < 1.7.0, is " - "{0}".format(np.__version__)) + pytest.skip("numpy version < 1.7.0, is " + "{0}".format(np.__version__)) input = np.datetime64('NaT') assert ujson.encode(input) == 'null', "Expected null" diff --git a/pandas/io/tests/parser/c_parser_only.py b/pandas/io/tests/parser/c_parser_only.py index 11073f3f108ba..ffbd904843bfc 100644 --- a/pandas/io/tests/parser/c_parser_only.py +++ b/pandas/io/tests/parser/c_parser_only.py @@ -7,7 +7,7 @@ further arguments when parsing. """ -import nose +import pytest import numpy as np import pandas as pd @@ -159,7 +159,7 @@ def error(val): def test_pass_dtype_as_recarray(self): if compat.is_platform_windows() and self.low_memory: - raise nose.SkipTest( + pytest.skip( "segfaults on win-64, only when all tests are run") data = """\ diff --git a/pandas/io/tests/parser/common.py b/pandas/io/tests/parser/common.py index 9655c481b763a..0671901fc170a 100644 --- a/pandas/io/tests/parser/common.py +++ b/pandas/io/tests/parser/common.py @@ -9,7 +9,7 @@ import sys from datetime import datetime -import nose +import pytest import numpy as np from pandas.lib import Timestamp @@ -635,8 +635,8 @@ def test_file(self): url_table = self.read_table('file://localhost/' + localtable) except URLError: # fails on some systems - raise nose.SkipTest("failing on %s" % - ' '.join(platform.uname()).strip()) + pytest.skip("failing on %s" % + ' '.join(platform.uname()).strip()) tm.assert_frame_equal(url_table, local_table) @@ -1262,7 +1262,7 @@ def test_verbose_import(self): def test_iteration_open_handle(self): if PY3: - raise nose.SkipTest( + pytest.skip( "won't work in Python 3 {0}".format(sys.version_info)) with tm.ensure_clean() as path: diff --git a/pandas/io/tests/parser/compression.py b/pandas/io/tests/parser/compression.py index 308ca6e8a5a2c..bdcd10fc64aa5 100644 --- a/pandas/io/tests/parser/compression.py +++ b/pandas/io/tests/parser/compression.py @@ -5,7 +5,7 @@ of the parsers defined in parsers.py """ -import nose +import pytest import pandas.util.testing as tm @@ -16,7 +16,7 @@ def test_zip(self): try: import zipfile except ImportError: - raise nose.SkipTest('need zipfile to run') + pytest.skip('need zipfile to run') with open(self.csv1, 'rb') as data_file: data = data_file.read() @@ -67,7 +67,7 @@ def test_gzip(self): try: import gzip except ImportError: - raise nose.SkipTest('need gzip to run') + pytest.skip('need gzip to run') with open(self.csv1, 'rb') as data_file: data = data_file.read() @@ -96,7 +96,7 @@ def test_bz2(self): try: import bz2 except ImportError: - raise nose.SkipTest('need bz2 to run') + pytest.skip('need bz2 to run') with open(self.csv1, 'rb') as data_file: data = data_file.read() diff --git a/pandas/io/tests/parser/converters.py b/pandas/io/tests/parser/converters.py index 2ceaff9291e7e..859d2e19bd56a 100644 --- a/pandas/io/tests/parser/converters.py +++ b/pandas/io/tests/parser/converters.py @@ -7,7 +7,7 @@ from datetime import datetime -import nose +import pytest import numpy as np import pandas as pd @@ -84,8 +84,8 @@ def test_converter_return_string_bug(self): def test_converters_corner_with_nas(self): # skip aberration observed on Win64 Python 3.2.2 if hash(np.int64(-1)) != -2: - raise nose.SkipTest("skipping because of windows hash on Python" - " 3.2.2") + pytest.skip("skipping because of windows hash on Python" + " 3.2.2") data = """id,score,days 1,2,12 diff --git a/pandas/io/tests/parser/parse_dates.py b/pandas/io/tests/parser/parse_dates.py index ad3d5f2382a49..6197d07d4eafa 100644 --- a/pandas/io/tests/parser/parse_dates.py +++ b/pandas/io/tests/parser/parse_dates.py @@ -8,7 +8,7 @@ from distutils.version import LooseVersion from datetime import datetime -import nose +import pytest import numpy as np import pandas.lib as lib from pandas.lib import Timestamp @@ -268,9 +268,9 @@ def test_yy_format_with_yearfirst(self): # See gh-217 import dateutil if dateutil.__version__ >= LooseVersion('2.5.0'): - raise nose.SkipTest("testing yearfirst=True not-support" - "on datetutil < 2.5.0 this works but" - "is wrong") + pytest.skip("testing yearfirst=True not-support" + "on datetutil < 2.5.0 this works but" + "is wrong") rs = self.read_csv(StringIO(data), index_col=0, parse_dates=[['date', 'time']]) diff --git a/pandas/io/tests/parser/python_parser_only.py b/pandas/io/tests/parser/python_parser_only.py index 283ff366b5efd..bd76070933c47 100644 --- a/pandas/io/tests/parser/python_parser_only.py +++ b/pandas/io/tests/parser/python_parser_only.py @@ -9,7 +9,7 @@ import csv import sys -import nose +import pytest import pandas.util.testing as tm from pandas import DataFrame, Index @@ -79,7 +79,7 @@ def test_sniff_delimiter(self): def test_BytesIO_input(self): if not compat.PY3: - raise nose.SkipTest( + pytest.skip( "Bytes-related test - only needs to work on Python 3") data = BytesIO("שלום::1234\n562::123".encode('cp1255')) @@ -130,7 +130,7 @@ def test_decompression_regex_sep(self): import gzip import bz2 except ImportError: - raise nose.SkipTest('need gzip and bz2 to run') + pytest.skip('need gzip and bz2 to run') with open(self.csv1, 'rb') as f: data = f.read() diff --git a/pandas/io/tests/parser/test_network.py b/pandas/io/tests/parser/test_network.py index 533b7733bde28..4d75b59b09560 100644 --- a/pandas/io/tests/parser/test_network.py +++ b/pandas/io/tests/parser/test_network.py @@ -6,7 +6,7 @@ """ import os -import nose +import pytest import functools from itertools import product @@ -59,7 +59,7 @@ def setUp(self): try: import s3fs # noqa except ImportError: - raise nose.SkipTest("s3fs not installed") + pytest.skip("s3fs not installed") @tm.network def test_parse_public_s3_bucket(self): diff --git a/pandas/io/tests/parser/test_read_fwf.py b/pandas/io/tests/parser/test_read_fwf.py index a423355081ac3..dccae06afe4d1 100644 --- a/pandas/io/tests/parser/test_read_fwf.py +++ b/pandas/io/tests/parser/test_read_fwf.py @@ -8,7 +8,7 @@ from datetime import datetime -import nose +import pytest import numpy as np import pandas as pd import pandas.util.testing as tm @@ -75,7 +75,7 @@ def test_fwf(self): def test_BytesIO_input(self): if not compat.PY3: - raise nose.SkipTest( + pytest.skip( "Bytes-related test - only needs to work on Python 3") result = read_fwf(BytesIO("שלום\nשלום".encode('utf8')), widths=[ @@ -192,7 +192,7 @@ def test_fwf_compression(self): import gzip import bz2 except ImportError: - raise nose.SkipTest("Need gzip and bz2 to run this test") + pytest.skip("Need gzip and bz2 to run this test") data = """1111111111 2222222222 @@ -333,7 +333,7 @@ def test_multiple_delimiters(self): def test_variable_width_unicode(self): if not compat.PY3: - raise nose.SkipTest( + pytest.skip( 'Bytes-related test - only needs to work on Python 3') test = """ שלום שלום diff --git a/pandas/io/tests/parser/usecols.py b/pandas/io/tests/parser/usecols.py index 4875282067fb3..95df077dae997 100644 --- a/pandas/io/tests/parser/usecols.py +++ b/pandas/io/tests/parser/usecols.py @@ -5,7 +5,7 @@ for all of the parsers defined in parsers.py """ -import nose +import pytest import numpy as np import pandas.util.testing as tm @@ -377,7 +377,7 @@ def test_usecols_with_multibyte_characters(self): tm.assert_frame_equal(df, expected) def test_usecols_with_multibyte_unicode_characters(self): - raise nose.SkipTest('TODO: see gh-13253') + pytest.skip('TODO: see gh-13253') s = '''あああ,いい,ううう,ええええ 0.056674973,8,True,a diff --git a/pandas/io/tests/test_clipboard.py b/pandas/io/tests/test_clipboard.py index 98a4152754b55..3abd1093362f4 100644 --- a/pandas/io/tests/test_clipboard.py +++ b/pandas/io/tests/test_clipboard.py @@ -2,7 +2,7 @@ import numpy as np from numpy.random import randint -import nose +import pytest import pandas as pd from pandas import DataFrame @@ -15,10 +15,13 @@ try: DataFrame({'A': [1, 2]}).to_clipboard() + _DEPS_INSTALLED = 1 except PyperclipException: - raise nose.SkipTest("clipboard primitives not installed") + _DEPS_INSTALLED = 0 +@pytest.mark.skipif(not _DEPS_INSTALLED, + reason="clipboard primitives not installed") class TestClipboard(tm.TestCase): @classmethod diff --git a/pandas/io/tests/test_excel.py b/pandas/io/tests/test_excel.py index 2791e397d5b86..a22c89184f20d 100644 --- a/pandas/io/tests/test_excel.py +++ b/pandas/io/tests/test_excel.py @@ -9,7 +9,7 @@ import warnings import operator import functools -import nose +import pytest from numpy import nan import numpy as np @@ -32,30 +32,30 @@ def _skip_if_no_xlrd(): import xlrd ver = tuple(map(int, xlrd.__VERSION__.split(".")[:2])) if ver < (0, 9): - raise nose.SkipTest('xlrd < 0.9, skipping') + pytest.skip('xlrd < 0.9, skipping') except ImportError: - raise nose.SkipTest('xlrd not installed, skipping') + pytest.skip('xlrd not installed, skipping') def _skip_if_no_xlwt(): try: import xlwt # NOQA except ImportError: - raise nose.SkipTest('xlwt not installed, skipping') + pytest.skip('xlwt not installed, skipping') def _skip_if_no_openpyxl(): try: import openpyxl # NOQA except ImportError: - raise nose.SkipTest('openpyxl not installed, skipping') + pytest.skip('openpyxl not installed, skipping') def _skip_if_no_xlsxwriter(): try: import xlsxwriter # NOQA except ImportError: - raise nose.SkipTest('xlsxwriter not installed, skipping') + pytest.skip('xlsxwriter not installed, skipping') def _skip_if_no_excelsuite(): @@ -68,7 +68,7 @@ def _skip_if_no_s3fs(): try: import s3fs # noqa except ImportError: - raise nose.SkipTest('s3fs not installed, skipping') + pytest.skip('s3fs not installed, skipping') _seriesd = tm.getSeriesData() @@ -600,7 +600,7 @@ def test_read_from_file_url(self): # FILE if sys.version_info[:2] < (2, 6): - raise nose.SkipTest("file:// not supported with Python < 2.6") + pytest.skip("file:// not supported with Python < 2.6") localtable = os.path.join(self.dirpath, 'test1' + self.ext) local_table = read_excel(localtable) @@ -610,8 +610,8 @@ def test_read_from_file_url(self): except URLError: # fails on some systems import platform - raise nose.SkipTest("failing on %s" % - ' '.join(platform.uname()).strip()) + pytest.skip("failing on %s" % + ' '.join(platform.uname()).strip()) tm.assert_frame_equal(url_table, local_table) @@ -1513,7 +1513,7 @@ def test_to_excel_unicode_filename(self): try: f = open(filename, 'wb') except UnicodeEncodeError: - raise nose.SkipTest('no unicode file names on this system') + pytest.skip('no unicode file names on this system') else: f.close() @@ -1555,7 +1555,7 @@ def test_to_excel_unicode_filename(self): # import xlwt # import xlrd # except ImportError: - # raise nose.SkipTest + # pytest.skip # filename = '__tmp_to_excel_header_styling_xls__.xls' # pdf.to_excel(filename, 'test1') @@ -1601,9 +1601,9 @@ def test_to_excel_unicode_filename(self): # import openpyxl # from openpyxl.cell import get_column_letter # except ImportError: - # raise nose.SkipTest + # pytest.skip # if openpyxl.__version__ < '1.6.1': - # raise nose.SkipTest + # pytest.skip # # test xlsx_styling # filename = '__tmp_to_excel_header_styling_xlsx__.xlsx' # pdf.to_excel(filename, 'test1') @@ -1635,7 +1635,7 @@ def test_excel_010_hemstring(self): _skip_if_no_xlrd() if self.merge_cells: - raise nose.SkipTest('Skip tests for merged MI format.') + pytest.skip('Skip tests for merged MI format.') from pandas.util.testing import makeCustomDataframe as mkdf # ensure limited functionality in 0.10 @@ -1690,7 +1690,7 @@ def test_excel_010_hemstring_raises_NotImplementedError(self): _skip_if_no_xlrd() if self.merge_cells: - raise nose.SkipTest('Skip tests for merged MI format.') + pytest.skip('Skip tests for merged MI format.') from pandas.util.testing import makeCustomDataframe as mkdf # ensure limited functionality in 0.10 @@ -1873,7 +1873,7 @@ class OpenpyxlTests(ExcelWriterBase, tm.TestCase): def test_to_excel_styleconverter(self): _skip_if_no_openpyxl() if not openpyxl_compat.is_compat(major_ver=1): - raise nose.SkipTest('incompatiable openpyxl version') + pytest.skip('incompatiable openpyxl version') import openpyxl @@ -1910,7 +1910,7 @@ def setUpClass(cls): ver = openpyxl.__version__ if (not (LooseVersion(ver) >= LooseVersion('2.0.0') and LooseVersion(ver) < LooseVersion('2.2.0'))): - raise nose.SkipTest("openpyxl %s >= 2.2" % str(ver)) + pytest.skip("openpyxl %s >= 2.2" % str(ver)) cls.setUpClass = setUpClass return cls @@ -2026,7 +2026,7 @@ def setUpClass(cls): import openpyxl ver = openpyxl.__version__ if LooseVersion(ver) < LooseVersion('2.2.0'): - raise nose.SkipTest("openpyxl %s < 2.2" % str(ver)) + pytest.skip("openpyxl %s < 2.2" % str(ver)) cls.setUpClass = setUpClass return cls @@ -2095,7 +2095,7 @@ def test_to_excel_styleconverter(self): def test_write_cells_merge_styled(self): if not openpyxl_compat.is_compat(major_ver=2): - raise nose.SkipTest('incompatiable openpyxl version') + pytest.skip('incompatiable openpyxl version') from pandas.formats.format import ExcelCell @@ -2278,7 +2278,7 @@ def test_ExcelWriter_dispatch(self): except ImportError: _skip_if_no_openpyxl() if not openpyxl_compat.is_compat(major_ver=1): - raise nose.SkipTest('incompatible openpyxl version') + pytest.skip('incompatible openpyxl version') writer_klass = _Openpyxl1Writer with ensure_clean('.xlsx') as path: diff --git a/pandas/io/tests/test_feather.py b/pandas/io/tests/test_feather.py index 218175e5ef527..6e2c28a0f68de 100644 --- a/pandas/io/tests/test_feather.py +++ b/pandas/io/tests/test_feather.py @@ -1,17 +1,12 @@ """ test feather-format compat """ -import nose +import pytest +feather = pytest.importorskip('feather') import numpy as np import pandas as pd - from pandas.io.feather_format import to_feather, read_feather -try: - import feather # noqa -except ImportError: - raise nose.SkipTest('no feather-format installed') - from feather import FeatherError import pandas.util.testing as tm from pandas.util.testing import assert_frame_equal, ensure_clean diff --git a/pandas/io/tests/test_gbq.py b/pandas/io/tests/test_gbq.py index 1157482d7ae67..0868edd2147b5 100644 --- a/pandas/io/tests/test_gbq.py +++ b/pandas/io/tests/test_gbq.py @@ -1,6 +1,6 @@ import re from datetime import datetime -import nose +import pytest import pytz import platform from time import sleep @@ -42,25 +42,25 @@ def _skip_if_no_project_id(): if not _get_project_id(): - raise nose.SkipTest( + pytest.skip( "Cannot run integration tests without a project id") def _skip_local_auth_if_in_travis_env(): if _in_travis_environment(): - raise nose.SkipTest("Cannot run local auth in travis environment") + pytest.skip("Cannot run local auth in travis environment") def _skip_if_no_private_key_path(): if not _get_private_key_path(): - raise nose.SkipTest("Cannot run integration tests without a " - "private key json file path") + pytest.skip("Cannot run integration tests without a " + "private key json file path") def _skip_if_no_private_key_contents(): if not _get_private_key_contents(): - raise nose.SkipTest("Cannot run integration tests without a " - "private key json contents") + pytest.skip("Cannot run integration tests without a " + "private key json contents") def _in_travis_environment(): @@ -184,7 +184,7 @@ def _setup_common(): try: _test_imports() except (ImportError, NotImplementedError) as import_exception: - raise nose.SkipTest(import_exception) + pytest.skip(import_exception) if _in_travis_environment(): logging.getLogger('oauth2client').setLevel(logging.ERROR) @@ -284,15 +284,15 @@ def test_should_be_able_to_get_results_from_query(self): def test_get_application_default_credentials_does_not_throw_error(self): if _check_if_can_get_correct_default_credentials(): - raise nose.SkipTest("Can get default_credentials " - "from the environment!") + pytest.skip("Can get default_credentials " + "from the environment!") credentials = self.sut.get_application_default_credentials() self.assertIsNone(credentials) def test_get_application_default_credentials_returns_credentials(self): if not _check_if_can_get_correct_default_credentials(): - raise nose.SkipTest("Cannot get default_credentials " - "from the environment!") + pytest.skip("Cannot get default_credentials " + "from the environment!") from oauth2client.client import GoogleCredentials credentials = self.sut.get_application_default_credentials() self.assertTrue(isinstance(credentials, GoogleCredentials)) @@ -1015,7 +1015,7 @@ def test_upload_data_if_table_exists_append(self): def test_upload_data_if_table_exists_replace(self): - raise nose.SkipTest("buggy test") + pytest.skip("buggy test") destination_table = DESTINATION_TABLE + "4" diff --git a/pandas/io/tests/test_html.py b/pandas/io/tests/test_html.py index 356adb92829c6..232e68a87f16e 100644 --- a/pandas/io/tests/test_html.py +++ b/pandas/io/tests/test_html.py @@ -12,7 +12,7 @@ from distutils.version import LooseVersion -import nose +import pytest import numpy as np from numpy.random import rand @@ -39,7 +39,7 @@ def _have_module(module_name): def _skip_if_no(module_name): if not _have_module(module_name): - raise nose.SkipTest("{0!r} not found".format(module_name)) + pytest.skip("{0!r} not found".format(module_name)) def _skip_if_none_of(module_names): @@ -48,16 +48,16 @@ def _skip_if_none_of(module_names): if module_names == 'bs4': import bs4 if bs4.__version__ == LooseVersion('4.2.0'): - raise nose.SkipTest("Bad version of bs4: 4.2.0") + pytest.skip("Bad version of bs4: 4.2.0") else: not_found = [module_name for module_name in module_names if not _have_module(module_name)] if set(not_found) & set(module_names): - raise nose.SkipTest("{0!r} not found".format(not_found)) + pytest.skip("{0!r} not found".format(not_found)) if 'bs4' in module_names: import bs4 if bs4.__version__ == LooseVersion('4.2.0'): - raise nose.SkipTest("Bad version of bs4: 4.2.0") + pytest.skip("Bad version of bs4: 4.2.0") DATA_PATH = tm.get_data_path() @@ -685,7 +685,7 @@ def test_decimal_rows(self): ''') expected = DataFrame(data={'Header': 1100.101}, index=[0]) result = self.read_html(data, decimal='#')[0] - nose.tools.assert_equal(result['Header'].dtype, np.dtype('float64')) + assert result['Header'].dtype == np.dtype('float64') tm.assert_frame_equal(result, expected) def test_bool_header_arg(self): diff --git a/pandas/io/tests/test_packers.py b/pandas/io/tests/test_packers.py index 2ee36d85f674c..4bb6f4a69bab3 100644 --- a/pandas/io/tests/test_packers.py +++ b/pandas/io/tests/test_packers.py @@ -1,4 +1,4 @@ -import nose +import pytest import os import datetime @@ -168,7 +168,7 @@ def test_list_numpy_float(self): def test_list_numpy_float_complex(self): if not hasattr(np, 'complex128'): - raise nose.SkipTest('numpy cant handle complex128') + pytest.skip('numpy cant handle complex128') x = [np.float32(np.random.rand()) for i in range(5)] + \ [np.complex128(np.random.rand() + 1j * np.random.rand()) @@ -261,7 +261,7 @@ def test_datetimes(self): # fails under 2.6/win32 (np.datetime64 seems broken) if LooseVersion(sys.version) < '2.7': - raise nose.SkipTest('2.6 with np.datetime64 is broken') + pytest.skip('2.6 with np.datetime64 is broken') for i in [datetime.datetime(2013, 1, 1), datetime.datetime(2013, 1, 1, 5, 1), @@ -589,12 +589,12 @@ def _test_compression(self, compress): def test_compression_zlib(self): if not _ZLIB_INSTALLED: - raise nose.SkipTest('no zlib') + pytest.skip('no zlib') self._test_compression('zlib') def test_compression_blosc(self): if not _BLOSC_INSTALLED: - raise nose.SkipTest('no blosc') + pytest.skip('no blosc') self._test_compression('blosc') def _test_compression_warns_when_decompress_caches(self, compress): @@ -653,12 +653,12 @@ def decompress(ob): def test_compression_warns_when_decompress_caches_zlib(self): if not _ZLIB_INSTALLED: - raise nose.SkipTest('no zlib') + pytest.skip('no zlib') self._test_compression_warns_when_decompress_caches('zlib') def test_compression_warns_when_decompress_caches_blosc(self): if not _BLOSC_INSTALLED: - raise nose.SkipTest('no blosc') + pytest.skip('no blosc') self._test_compression_warns_when_decompress_caches('blosc') def _test_small_strings_no_warn(self, compress): @@ -690,18 +690,18 @@ def _test_small_strings_no_warn(self, compress): def test_small_strings_no_warn_zlib(self): if not _ZLIB_INSTALLED: - raise nose.SkipTest('no zlib') + pytest.skip('no zlib') self._test_small_strings_no_warn('zlib') def test_small_strings_no_warn_blosc(self): if not _BLOSC_INSTALLED: - raise nose.SkipTest('no blosc') + pytest.skip('no blosc') self._test_small_strings_no_warn('blosc') def test_readonly_axis_blosc(self): # GH11880 if not _BLOSC_INSTALLED: - raise nose.SkipTest('no blosc') + pytest.skip('no blosc') df1 = DataFrame({'A': list('abcd')}) df2 = DataFrame(df1, index=[1., 2., 3., 4.]) self.assertTrue(1 in self.encode_decode(df1['A'], compress='blosc')) @@ -717,9 +717,9 @@ def test_readonly_axis_zlib(self): def test_readonly_axis_blosc_to_sql(self): # GH11880 if not _BLOSC_INSTALLED: - raise nose.SkipTest('no blosc') + pytest.skip('no blosc') if not self._SQLALCHEMY_INSTALLED: - raise nose.SkipTest('no sqlalchemy') + pytest.skip('no sqlalchemy') expected = DataFrame({'A': list('abcd')}) df = self.encode_decode(expected, compress='blosc') eng = self._create_sql_engine("sqlite:///:memory:") @@ -731,9 +731,9 @@ def test_readonly_axis_blosc_to_sql(self): def test_readonly_axis_zlib_to_sql(self): # GH11880 if not _ZLIB_INSTALLED: - raise nose.SkipTest('no zlib') + pytest.skip('no zlib') if not self._SQLALCHEMY_INSTALLED: - raise nose.SkipTest('no sqlalchemy') + pytest.skip('no sqlalchemy') expected = DataFrame({'A': list('abcd')}) df = self.encode_decode(expected, compress='zlib') eng = self._create_sql_engine("sqlite:///:memory:") diff --git a/pandas/io/tests/test_pickle.py b/pandas/io/tests/test_pickle.py index 89827817a85fb..588b2d5f04888 100644 --- a/pandas/io/tests/test_pickle.py +++ b/pandas/io/tests/test_pickle.py @@ -2,7 +2,7 @@ """ manage legacy pickle tests """ -import nose +import pytest import os from distutils.version import LooseVersion @@ -172,7 +172,7 @@ def compare_sp_frame_float(self, result, expected, typ, version): def read_pickles(self, version): if not is_platform_little_endian(): - raise nose.SkipTest("known failure on non-little endian") + pytest.skip("known failure on non-little endian") pth = tm.get_data_path('legacy_pickle/{0}'.format(str(version))) n = 0 diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py index 501e744ad308c..3fa0eb2ef52dc 100644 --- a/pandas/io/tests/test_pytables.py +++ b/pandas/io/tests/test_pytables.py @@ -1,4 +1,4 @@ -import nose +import pytest import sys import os import warnings @@ -17,17 +17,14 @@ from pandas.compat import is_platform_windows, PY3, PY35 from pandas.formats.printing import pprint_thing -from pandas.io.pytables import _tables, TableIterator -try: - _tables() -except ImportError as e: - raise nose.SkipTest(e) - +tables = pytest.importorskip('tables') +from pandas.io.pytables import TableIterator from pandas.io.pytables import (HDFStore, get_store, Term, read_hdf, IncompatibilityWarning, PerformanceWarning, AttributeConflictWarning, DuplicateWarning, PossibleDataLossError, ClosedFileError) + from pandas.io import pytables as pytables import pandas.util.testing as tm from pandas.util.testing import (assert_panel4d_equal, @@ -43,7 +40,7 @@ try: import tables except ImportError: - raise nose.SkipTest('no pytables') + pytest.skip('no pytables') from distutils.version import LooseVersion @@ -738,7 +735,7 @@ def test_put_compression(self): def test_put_compression_blosc(self): tm.skip_if_no_package('tables', '2.2', app='blosc support') if skip_compression: - raise nose.SkipTest("skipping on windows/PY3") + pytest.skip("skipping on windows/PY3") df = tm.makeTimeDataFrame() @@ -968,7 +965,7 @@ def check(format, index): def test_encoding(self): if sys.byteorder != 'little': - raise nose.SkipTest('system byteorder is not little') + pytest.skip('system byteorder is not little') with ensure_clean_store(self.path) as store: df = DataFrame(dict(A='foo', B='bar'), index=range(5)) @@ -2830,14 +2827,14 @@ def test_index_types(self): def test_timeseries_preepoch(self): if sys.version_info[0] == 2 and sys.version_info[1] < 7: - raise nose.SkipTest("won't work on Python < 2.7") + pytest.skip("won't work on Python < 2.7") dr = bdate_range('1/1/1940', '1/1/1960') ts = Series(np.random.randn(len(dr)), index=dr) try: self._check_roundtrip(ts, tm.assert_series_equal) except OverflowError: - raise nose.SkipTest('known failer on some windows platforms') + pytest.skip('known failer on some windows platforms') def test_frame(self): @@ -4202,8 +4199,8 @@ def test_nan_selection_bug_4858(self): # GH 4858; nan selection bug, only works for pytables >= 3.1 if LooseVersion(tables.__version__) < '3.1.0': - raise nose.SkipTest('tables version does not support fix for nan ' - 'selection bug: GH 4858') + pytest.skip('tables version does not support fix for nan ' + 'selection bug: GH 4858') with ensure_clean_store(self.path) as store: @@ -4453,7 +4450,7 @@ def test_pytables_native_read(self): def test_pytables_native2_read(self): # fails on win/3.5 oddly if PY35 and is_platform_windows(): - raise nose.SkipTest("native2 read fails oddly on windows / 3.5") + pytest.skip("native2 read fails oddly on windows / 3.5") with ensure_clean_store( tm.get_data_path('legacy_hdf/pytables_native2.h5'), @@ -4585,7 +4582,7 @@ def do_copy(f=None, new_f=None, keys=None, safe_remove(path) def test_legacy_table_write(self): - raise nose.SkipTest("cannot write legacy tables") + pytest.skip("cannot write legacy tables") store = HDFStore(tm.get_data_path( 'legacy_hdf/legacy_table_%s.h5' % pandas.__version__), 'a') diff --git a/pandas/io/tests/test_sql.py b/pandas/io/tests/test_sql.py index ddda65c5bafc8..a6f4d96001021 100644 --- a/pandas/io/tests/test_sql.py +++ b/pandas/io/tests/test_sql.py @@ -24,7 +24,7 @@ import os import sys -import nose +import pytest import warnings import numpy as np import pandas as pd @@ -854,7 +854,7 @@ def connect(self): if SQLALCHEMY_INSTALLED: return sqlalchemy.create_engine('sqlite:///:memory:') else: - raise nose.SkipTest('SQLAlchemy not installed') + pytest.skip('SQLAlchemy not installed') def test_read_table_columns(self): # test columns argument in read_table @@ -1063,7 +1063,7 @@ def test_con_string_import_error(self): self.assertRaises(ImportError, sql.read_sql, "SELECT * FROM iris", conn) else: - raise nose.SkipTest('SQLAlchemy is installed') + pytest.skip('SQLAlchemy is installed') def test_read_sql_delegate(self): iris_frame1 = sql.read_sql_query("SELECT * FROM iris", self.conn) @@ -1128,7 +1128,7 @@ def setUpClass(cls): conn.connect() except sqlalchemy.exc.OperationalError: msg = "{0} - can't connect to {1} server".format(cls, cls.flavor) - raise nose.SkipTest(msg) + pytest.skip(msg) def setUp(self): self.setup_connect() @@ -1141,7 +1141,7 @@ def setUp(self): def setup_import(cls): # Skip this test if SQLAlchemy not available if not SQLALCHEMY_INSTALLED: - raise nose.SkipTest('SQLAlchemy not installed') + pytest.skip('SQLAlchemy not installed') @classmethod def setup_driver(cls): @@ -1158,7 +1158,7 @@ def setup_connect(self): # to test if connection can be made: self.conn.connect() except sqlalchemy.exc.OperationalError: - raise nose.SkipTest( + pytest.skip( "Can't connect to {0} server".format(self.flavor)) def test_aread_sql(self): @@ -1304,7 +1304,7 @@ def check(col): # GH11216 df = pd.read_sql_query("select * from types_test_data", self.conn) if not hasattr(df, 'DateColWithTz'): - raise nose.SkipTest("no column with datetime with time zone") + pytest.skip("no column with datetime with time zone") # this is parsed on Travis (linux), but not on macosx for some reason # even with the same versions of psycopg2 & sqlalchemy, possibly a @@ -1319,7 +1319,7 @@ def check(col): df = pd.read_sql_query("select * from types_test_data", self.conn, parse_dates=['DateColWithTz']) if not hasattr(df, 'DateColWithTz'): - raise nose.SkipTest("no column with datetime with time zone") + pytest.skip("no column with datetime with time zone") check(df.DateColWithTz) df = pd.concat(list(pd.read_sql_query("select * from types_test_data", @@ -1665,7 +1665,7 @@ class Temporary(Base): class _TestSQLAlchemyConn(_EngineToConnMixin, _TestSQLAlchemy): def test_transactions(self): - raise nose.SkipTest( + pytest.skip( "Nested transactions rollbacks don't work with Pandas") @@ -1739,7 +1739,7 @@ def setup_driver(cls): import pymysql # noqa cls.driver = 'pymysql' except ImportError: - raise nose.SkipTest('pymysql not installed') + pytest.skip('pymysql not installed') def test_default_type_conversion(self): df = sql.read_sql_table("types_test_data", self.conn) @@ -1808,7 +1808,7 @@ def setup_driver(cls): import psycopg2 # noqa cls.driver = 'psycopg2' except ImportError: - raise nose.SkipTest('psycopg2 not installed') + pytest.skip('psycopg2 not installed') def test_schema_support(self): # only test this for postgresql (schema's not supported in @@ -2007,7 +2007,7 @@ def test_to_sql_save_index(self): def test_transactions(self): if PY36: - raise nose.SkipTest("not working on python > 3.5") + pytest.skip("not working on python > 3.5") self._transaction_test() def _get_sqlite_column_type(self, table, column): @@ -2019,7 +2019,7 @@ def _get_sqlite_column_type(self, table, column): def test_dtype(self): if self.flavor == 'mysql': - raise nose.SkipTest('Not applicable to MySQL legacy') + pytest.skip('Not applicable to MySQL legacy') cols = ['A', 'B'] data = [(0.8, True), (0.9, None)] @@ -2045,7 +2045,7 @@ def test_dtype(self): def test_notnull_dtype(self): if self.flavor == 'mysql': - raise nose.SkipTest('Not applicable to MySQL legacy') + pytest.skip('Not applicable to MySQL legacy') cols = {'Bool': Series([True, None]), 'Date': Series([datetime(2012, 5, 1), None]), @@ -2130,7 +2130,7 @@ def _skip_if_no_pymysql(): try: import pymysql # noqa except ImportError: - raise nose.SkipTest('pymysql not installed, skipping') + pytest.skip('pymysql not installed, skipping') class TestXSQLite(SQLiteMixIn, tm.TestCase): @@ -2389,12 +2389,12 @@ def setUpClass(cls): try: pymysql.connect(read_default_group='pandas') except pymysql.ProgrammingError: - raise nose.SkipTest( + pytest.skip( "Create a group of connection parameters under the heading " "[pandas] in your system's mysql default file, " "typically located at ~/.my.cnf or /etc/.my.cnf. ") except pymysql.Error: - raise nose.SkipTest( + pytest.skip( "Cannot connect to database. " "Create a group of connection parameters under the heading " "[pandas] in your system's mysql default file, " @@ -2415,12 +2415,12 @@ def setUp(self): try: self.conn = pymysql.connect(read_default_group='pandas') except pymysql.ProgrammingError: - raise nose.SkipTest( + pytest.skip( "Create a group of connection parameters under the heading " "[pandas] in your system's mysql default file, " "typically located at ~/.my.cnf or /etc/.my.cnf. ") except pymysql.Error: - raise nose.SkipTest( + pytest.skip( "Cannot connect to database. " "Create a group of connection parameters under the heading " "[pandas] in your system's mysql default file, " diff --git a/pandas/io/tests/test_stata.py b/pandas/io/tests/test_stata.py index fcb935925e61f..ae09e671dbca3 100644 --- a/pandas/io/tests/test_stata.py +++ b/pandas/io/tests/test_stata.py @@ -9,7 +9,7 @@ from datetime import datetime from distutils.version import LooseVersion -import nose +import pytest import numpy as np import pandas as pd import pandas.util.testing as tm @@ -128,7 +128,7 @@ def test_read_dta1(self): def test_read_dta2(self): if LooseVersion(sys.version) < '2.7': - raise nose.SkipTest('datetime interp under 2.6 is faulty') + pytest.skip('datetime interp under 2.6 is faulty') expected = DataFrame.from_records( [ diff --git a/pandas/sparse/tests/test_indexing.py b/pandas/sparse/tests/test_indexing.py index c400b68c8a7d8..357a7103f4027 100644 --- a/pandas/sparse/tests/test_indexing.py +++ b/pandas/sparse/tests/test_indexing.py @@ -1,6 +1,6 @@ # pylint: disable-msg=E1101,W0612 -import nose # noqa +import pytest # noqa import numpy as np import pandas as pd import pandas.util.testing as tm diff --git a/pandas/sparse/tests/test_libsparse.py b/pandas/sparse/tests/test_libsparse.py index 491005db2ae79..4d5a93d77cf14 100644 --- a/pandas/sparse/tests/test_libsparse.py +++ b/pandas/sparse/tests/test_libsparse.py @@ -1,6 +1,6 @@ from pandas import Series -import nose +import pytest import numpy as np import operator import pandas.util.testing as tm @@ -213,7 +213,7 @@ def _check_case(xloc, xlen, yloc, ylen, eloc, elen): longer_index.to_int_index()) if compat.is_platform_windows(): - raise nose.SkipTest("segfaults on win-64 when all tests are run") + pytest.skip("segfaults on win-64 when all tests are run") check_cases(_check_case) def test_intersect_empty(self): diff --git a/pandas/sparse/tests/test_series.py b/pandas/sparse/tests/test_series.py index db6ae14b096d3..d4543b97af4dd 100644 --- a/pandas/sparse/tests/test_series.py +++ b/pandas/sparse/tests/test_series.py @@ -577,8 +577,8 @@ def check(a, b): def test_binary_operators(self): # skipping for now ##### - import nose - raise nose.SkipTest("skipping sparse binary operators test") + import pytest + pytest.skip("skipping sparse binary operators test") def _check_inplace_op(iop, op): tmp = self.bseries.copy() diff --git a/pandas/tests/formats/test_format.py b/pandas/tests/formats/test_format.py index 9a24ae332f7c5..476c6a636ae5a 100644 --- a/pandas/tests/formats/test_format.py +++ b/pandas/tests/formats/test_format.py @@ -44,7 +44,7 @@ reset_option) from datetime import datetime -import nose +import pytest use_32bit_repr = is_platform_windows() or is_platform_32bit() @@ -287,7 +287,7 @@ def test_repr_non_interactive(self): def test_repr_max_columns_max_rows(self): term_width, term_height = get_terminal_size() if term_width < 10 or term_height < 10: - raise nose.SkipTest("terminal size too small, " + pytest.skip("terminal size too small, " "{0} x {1}".format(term_width, term_height)) def mkframe(n): @@ -1871,7 +1871,7 @@ def test_to_html_regression_GH6098(self): df.pivot_table(index=[u('clé1')], columns=[u('clé2')])._repr_html_() def test_to_html_truncate(self): - raise nose.SkipTest("unreliable on travis") + pytest.skip("unreliable on travis") index = pd.DatetimeIndex(start='20010101', freq='D', periods=20) df = DataFrame(index=index, columns=range(20)) fmt.set_option('display.max_rows', 8) @@ -1972,7 +1972,7 @@ def test_to_html_truncate(self): self.assertEqual(result, expected) def test_to_html_truncate_multi_index(self): - raise nose.SkipTest("unreliable on travis") + pytest.skip("unreliable on travis") arrays = [['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'], ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']] df = DataFrame(index=arrays, columns=arrays) @@ -2089,7 +2089,7 @@ def test_to_html_truncate_multi_index(self): self.assertEqual(result, expected) def test_to_html_truncate_multi_index_sparse_off(self): - raise nose.SkipTest("unreliable on travis") + pytest.skip("unreliable on travis") arrays = [['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'], ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']] df = DataFrame(index=arrays, columns=arrays) @@ -2250,7 +2250,7 @@ def test_pprint_thing(self): from pandas.formats.printing import pprint_thing as pp_t if PY3: - raise nose.SkipTest("doesn't work on Python 3") + pytest.skip("doesn't work on Python 3") self.assertEqual(pp_t('a'), u('a')) self.assertEqual(pp_t(u('a')), u('a')) diff --git a/pandas/tests/formats/test_printing.py b/pandas/tests/formats/test_printing.py index 1e6794c1c9c69..52f3e06c6cbd0 100644 --- a/pandas/tests/formats/test_printing.py +++ b/pandas/tests/formats/test_printing.py @@ -126,7 +126,7 @@ def test_ambiguous_width(self): # common.console_encode should encode things as utf-8. # """ # if compat.PY3: -# raise nose.SkipTest +# pytest.skip # with tm.stdin_encoding(encoding=None): # result = printing.console_encode(u"\u05d0") diff --git a/pandas/tests/formats/test_style.py b/pandas/tests/formats/test_style.py index eaa209178b2e9..53bb3f9010f7e 100644 --- a/pandas/tests/formats/test_style.py +++ b/pandas/tests/formats/test_style.py @@ -1,5 +1,4 @@ -import os -from nose import SkipTest +import pytest import copy import numpy as np @@ -8,20 +7,7 @@ from pandas.util.testing import TestCase import pandas.util.testing as tm -# Getting failures on a python 2.7 build with -# whenever we try to import jinja, whether it's installed or not. -# so we're explicitly skipping that one *before* we try to import -# jinja. We still need to export the imports as globals, -# since importing Styler tries to import jinja2. -job_name = os.environ.get('JOB_NAME', None) -if job_name == '27_slow_nnet_LOCALE': - raise SkipTest("No jinja") -try: - # Do try except on just jinja, so the only reason - # We skip is if jinja can't import, not something else - import jinja2 # noqa -except ImportError: - raise SkipTest("No Jinja2") +jinja2 = pytest.importorskip('jinja2') from pandas.formats.style import Styler, _get_level_lengths # noqa diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index a55d2cfb2fb2b..1f0d16e959cd7 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -5,7 +5,7 @@ from datetime import timedelta, datetime from distutils.version import LooseVersion import sys -import nose +import pytest from numpy import nan from numpy.random import randn @@ -2066,8 +2066,8 @@ def test_round_issue(self): def test_built_in_round(self): if not compat.PY3: - raise nose.SkipTest("build in round cannot be overriden " - "prior to Python 3") + pytest.skip("build in round cannot be overriden " + "prior to Python 3") # GH11763 # Here's the test frame we'll be working with diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 66a235e1260bd..76eb61bd81110 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -6,7 +6,7 @@ import functools import itertools -import nose +import pytest from numpy.random import randn import numpy as np @@ -1702,7 +1702,7 @@ def test_from_records_with_datetimes(self): # this may fail on certain platforms because of a numpy issue # related GH6140 if not is_platform_little_endian(): - raise nose.SkipTest("known failure of test on non-little endian") + pytest.skip("known failure of test on non-little endian") # construction with a null in a recarray # GH 6140 @@ -1714,7 +1714,7 @@ def test_from_records_with_datetimes(self): try: recarray = np.core.records.fromarrays(arrdata, dtype=dtypes) except (ValueError): - raise nose.SkipTest("known failure of numpy rec array creation") + pytest.skip("known failure of numpy rec array creation") result = DataFrame.from_records(recarray) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/test_missing.py b/pandas/tests/frame/test_missing.py index ef800f0dface3..80ea01d3a05aa 100644 --- a/pandas/tests/frame/test_missing.py +++ b/pandas/tests/frame/test_missing.py @@ -23,8 +23,8 @@ def _skip_if_no_pchip(): try: from scipy.interpolate import pchip_interpolate # noqa except ImportError: - import nose - raise nose.SkipTest('scipy.interpolate.pchip missing') + import pytest + pytest.skip('scipy.interpolate.pchip missing') class TestDataFrameMissingData(tm.TestCase, TestData): diff --git a/pandas/tests/frame/test_operators.py b/pandas/tests/frame/test_operators.py index ec73689088035..d6a3592446fd5 100644 --- a/pandas/tests/frame/test_operators.py +++ b/pandas/tests/frame/test_operators.py @@ -5,7 +5,7 @@ from datetime import datetime import operator -import nose +import pytest from numpy import nan, random import numpy as np @@ -323,7 +323,7 @@ def test_logical_typeerror(self): self.assertRaises(TypeError, self.frame.__gt__, 'foo') self.assertRaises(TypeError, self.frame.__ne__, 'foo') else: - raise nose.SkipTest('test_logical_typeerror not tested on PY3') + pytest.skip('test_logical_typeerror not tested on PY3') def test_logical_with_nas(self): d = DataFrame({'a': [np.nan, False], 'b': [True, True]}) diff --git a/pandas/tests/frame/test_quantile.py b/pandas/tests/frame/test_quantile.py index 400ead788aa7c..909a1a6a4c917 100644 --- a/pandas/tests/frame/test_quantile.py +++ b/pandas/tests/frame/test_quantile.py @@ -3,7 +3,7 @@ from __future__ import print_function -import nose +import pytest import numpy as np from pandas import (DataFrame, Series, Timestamp, _np_version_under1p11) @@ -106,7 +106,7 @@ def test_quantile_axis_parameter(self): def test_quantile_interpolation(self): # GH #10174 if _np_version_under1p9: - raise nose.SkipTest("Numpy version under 1.9") + pytest.skip("Numpy version under 1.9") from numpy import percentile @@ -171,7 +171,7 @@ def test_quantile_interpolation(self): def test_quantile_interpolation_np_lt_1p9(self): # GH #10174 if not _np_version_under1p9: - raise nose.SkipTest("Numpy version is greater than 1.9") + pytest.skip("Numpy version is greater than 1.9") from numpy import percentile diff --git a/pandas/tests/frame/test_query_eval.py b/pandas/tests/frame/test_query_eval.py index aed02b7323f85..647af92b42273 100644 --- a/pandas/tests/frame/test_query_eval.py +++ b/pandas/tests/frame/test_query_eval.py @@ -3,7 +3,7 @@ from __future__ import print_function import operator -import nose +import pytest from itertools import product from pandas.compat import (zip, range, lrange, StringIO) @@ -30,14 +30,14 @@ def skip_if_no_pandas_parser(parser): if parser != 'pandas': - raise nose.SkipTest("cannot evaluate with parser {0!r}".format(parser)) + pytest.skip("cannot evaluate with parser {0!r}".format(parser)) def skip_if_no_ne(engine='numexpr'): if engine == 'numexpr': if not _NUMEXPR_INSTALLED: - raise nose.SkipTest("cannot query engine numexpr when numexpr not " - "installed") + pytest.skip("cannot query engine numexpr when numexpr not " + "installed") class TestCompat(tm.TestCase): diff --git a/pandas/tests/groupby/test_misc.py b/pandas/tests/groupby/test_misc.py index c9d8ad4231cfb..9395304385681 100644 --- a/pandas/tests/groupby/test_misc.py +++ b/pandas/tests/groupby/test_misc.py @@ -1,6 +1,6 @@ """ misc non-groupby routines, as they are defined in core/groupby.py """ -import nose +import pytest import numpy as np from numpy import nan from pandas.util import testing as tm @@ -42,7 +42,7 @@ def test_nargsort(self): np.argsort(np.array([[1, 2], [1, 3], [1, 2]], dtype='i')) np.argsort(items2, kind='mergesort') except TypeError: - raise nose.SkipTest('requested sort not available for type') + pytest.skip('requested sort not available for type') # mergesort is the most difficult to get right because we want it to be # stable. diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index af749963146c6..1b67ffce63b10 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -1,7 +1,7 @@ """ test to_datetime """ import sys -import nose +import pytest import locale import calendar import numpy as np @@ -139,7 +139,7 @@ def test_to_datetime_with_non_exact(self): # 8904 # exact kw if sys.version_info < (2, 7): - raise nose.SkipTest('on python version < 2.7') + pytest.skip('on python version < 2.7') s = Series(['19MAY11', 'foobar19MAY11', '19MAY11:00:00:00', '19MAY11 00:00:00Z']) @@ -277,7 +277,7 @@ def test_to_datetime_tz_psycopg2(self): try: import psycopg2 except ImportError: - raise nose.SkipTest("no psycopg2 installed") + pytest.skip("no psycopg2 installed") # misc cases tz1 = psycopg2.tz.FixedOffsetTimezone(offset=-300, name=None) diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index 365236f72e80e..702c4758da245 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -6,7 +6,7 @@ from datetime import timedelta from itertools import product -import nose +import pytest import numpy as np @@ -988,8 +988,8 @@ def test_iter(self): def test_legacy_pickle(self): if PY3: - raise nose.SkipTest("testing for legacy pickles not " - "support on py3") + pytest.skip("testing for legacy pickles not " + "support on py3") path = tm.get_data_path('multiindex_v1.pickle') obj = pd.read_pickle(path) diff --git a/pandas/tests/indexing/test_coercion.py b/pandas/tests/indexing/test_coercion.py index b9a746cd25c7a..38f8bb5355a69 100644 --- a/pandas/tests/indexing/test_coercion.py +++ b/pandas/tests/indexing/test_coercion.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -import nose +import pytest import numpy as np import pandas as pd @@ -1172,8 +1172,8 @@ def _assert_replace_conversion(self, from_key, to_key, how): if (from_key == 'bool' and to_key == 'int64' and tm.is_platform_windows()): - raise nose.SkipTest("windows platform buggy: {0} -> {1}".format - (from_key, to_key)) + pytest.skip("windows platform buggy: {0} -> {1}".format + (from_key, to_key)) if ((from_key == 'float64' and to_key in ('bool', 'int64')) or @@ -1189,8 +1189,8 @@ def _assert_replace_conversion(self, from_key, to_key, how): # buggy on 32-bit if tm.is_platform_32bit(): - raise nose.SkipTest("32-bit platform buggy: {0} -> {1}".format - (from_key, to_key)) + pytest.skip("32-bit platform buggy: {0} -> {1}".format + (from_key, to_key)) # Expected: do not downcast by replacement exp = pd.Series(self.rep[to_key], index=index, @@ -1243,7 +1243,7 @@ def test_replace_series_bool(self): if compat.PY3: # doesn't work in PY3, though ...dict_from_bool works fine - raise nose.SkipTest("doesn't work as in PY3") + pytest.skip("doesn't work as in PY3") self._assert_replace_conversion(from_key, to_key, how='series') diff --git a/pandas/tests/plotting/common.py b/pandas/tests/plotting/common.py index 9fe1d7cacd38f..92e2dc7b5d934 100644 --- a/pandas/tests/plotting/common.py +++ b/pandas/tests/plotting/common.py @@ -1,7 +1,7 @@ #!/usr/bin/env python # coding: utf-8 -import nose +import pytest import os import warnings @@ -28,7 +28,7 @@ def _skip_if_no_scipy_gaussian_kde(): try: from scipy.stats import gaussian_kde # noqa except ImportError: - raise nose.SkipTest("scipy version doesn't support gaussian_kde") + pytest.skip("scipy version doesn't support gaussian_kde") def _ok_for_gaussian_kde(kind): diff --git a/pandas/tests/plotting/test_boxplot_method.py b/pandas/tests/plotting/test_boxplot_method.py index f7fd6a8519533..31c150bc1e64f 100644 --- a/pandas/tests/plotting/test_boxplot_method.py +++ b/pandas/tests/plotting/test_boxplot_method.py @@ -1,6 +1,6 @@ # coding: utf-8 -import nose +import pytest import itertools import string from distutils.version import LooseVersion @@ -28,7 +28,7 @@ def _skip_if_mpl_14_or_dev_boxplot(): # Don't need try / except since that's done at class level import matplotlib if str(matplotlib.__version__) >= LooseVersion('1.4'): - raise nose.SkipTest("Matplotlib Regression in 1.4 and current dev.") + pytest.skip("Matplotlib Regression in 1.4 and current dev.") @tm.mplskip diff --git a/pandas/tests/plotting/test_datetimelike.py b/pandas/tests/plotting/test_datetimelike.py index bcc9c7ceea8b5..25568f7eb61dc 100644 --- a/pandas/tests/plotting/test_datetimelike.py +++ b/pandas/tests/plotting/test_datetimelike.py @@ -2,7 +2,7 @@ from datetime import datetime, timedelta, date, time -import nose +import pytest from pandas.compat import lrange, zip import numpy as np @@ -161,8 +161,8 @@ def check_format_of_first_point(ax, expected_string): self.assertEqual(expected_string, ax.format_coord(first_x, first_y)) except (ValueError): - raise nose.SkipTest("skipping test because issue forming " - "test comparison GH7664") + pytest.skip("skipping test because issue forming " + "test comparison GH7664") annual = Series(1, index=date_range('2014-01-01', periods=3, freq='A-DEC')) diff --git a/pandas/tests/plotting/test_frame.py b/pandas/tests/plotting/test_frame.py index 81a54bd38b3f8..48af366f24ea4 100644 --- a/pandas/tests/plotting/test_frame.py +++ b/pandas/tests/plotting/test_frame.py @@ -2,7 +2,7 @@ """ Test cases for DataFrame.plot """ -import nose +import pytest import string import warnings @@ -1275,7 +1275,7 @@ def test_kde_missing_vals(self): def test_hist_df(self): from matplotlib.patches import Rectangle if self.mpl_le_1_2_1: - raise nose.SkipTest("not supported in matplotlib <= 1.2.x") + pytest.skip("not supported in matplotlib <= 1.2.x") df = DataFrame(randn(100, 4)) series = df[0] diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index 52b85c89a7009..222165e9d3633 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -4,7 +4,7 @@ from itertools import product from distutils.version import LooseVersion -import nose +import pytest from numpy import nan import numpy as np @@ -476,8 +476,8 @@ def test_cummax_timedelta64(self): self.assert_series_equal(expected, result) def test_npdiff(self): - raise nose.SkipTest("skipping due to Series no longer being an " - "ndarray") + pytest.skip("skipping due to Series no longer being an " + "ndarray") # no longer works as the return type of np.diff is now nd.array s = Series(np.arange(5)) @@ -622,7 +622,7 @@ def test_numpy_round(self): def test_built_in_round(self): if not compat.PY3: - raise nose.SkipTest( + pytest.skip( 'build in round cannot be overriden prior to Python 3') s = Series([1.123, 2.123, 3.123], index=lrange(3)) @@ -785,8 +785,8 @@ def test_corr_rank(self): # these methods got rewritten in 0.8 if scipy.__version__ < LooseVersion('0.9'): - raise nose.SkipTest("skipping corr rank because of scipy version " - "{0}".format(scipy.__version__)) + pytest.skip("skipping corr rank because of scipy version " + "{0}".format(scipy.__version__)) # results from R A = Series( @@ -1063,8 +1063,8 @@ def test_rank_signature(self): self.assertRaises(ValueError, s.rank, 'average') def test_rank_inf(self): - raise nose.SkipTest('DataFrame.rank does not currently rank ' - 'np.inf and -np.inf properly') + pytest.skip('DataFrame.rank does not currently rank ' + 'np.inf and -np.inf properly') values = np.array( [-np.inf, -50, -1, -1e-20, -1e-25, -1e-50, 0, 1e-40, 1e-20, 1e-10, diff --git a/pandas/tests/series/test_missing.py b/pandas/tests/series/test_missing.py index 702fa2acb5106..405d6c98a5d37 100644 --- a/pandas/tests/series/test_missing.py +++ b/pandas/tests/series/test_missing.py @@ -22,16 +22,16 @@ def _skip_if_no_pchip(): try: from scipy.interpolate import pchip_interpolate # noqa except ImportError: - import nose - raise nose.SkipTest('scipy.interpolate.pchip missing') + import pytest + pytest.skip('scipy.interpolate.pchip missing') def _skip_if_no_akima(): try: from scipy.interpolate import Akima1DInterpolator # noqa except ImportError: - import nose - raise nose.SkipTest('scipy.interpolate.Akima1DInterpolator missing') + import pytest + pytest.skip('scipy.interpolate.Akima1DInterpolator missing') def _simple_ts(start, end, freq='D'): diff --git a/pandas/tests/series/test_quantile.py b/pandas/tests/series/test_quantile.py index 76db6c90a685f..b8d1b92081858 100644 --- a/pandas/tests/series/test_quantile.py +++ b/pandas/tests/series/test_quantile.py @@ -1,7 +1,7 @@ # coding=utf-8 # pylint: disable-msg=E1101,W0612 -import nose +import pytest import numpy as np import pandas as pd @@ -73,7 +73,7 @@ def test_quantile_multi(self): def test_quantile_interpolation(self): # GH #10174 if _np_version_under1p9: - raise nose.SkipTest("Numpy version is under 1.9") + pytest.skip("Numpy version is under 1.9") from numpy import percentile @@ -89,7 +89,7 @@ def test_quantile_interpolation(self): def test_quantile_interpolation_dtype(self): # GH #10174 if _np_version_under1p9: - raise nose.SkipTest("Numpy version is under 1.9") + pytest.skip("Numpy version is under 1.9") from numpy import percentile @@ -105,7 +105,7 @@ def test_quantile_interpolation_dtype(self): def test_quantile_interpolation_np_lt_1p9(self): # GH #10174 if not _np_version_under1p9: - raise nose.SkipTest("Numpy version is greater than 1.9") + pytest.skip("Numpy version is greater than 1.9") from numpy import percentile diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py index 1d1ef1a08859c..473f1d81c9532 100644 --- a/pandas/tests/test_base.py +++ b/pandas/tests/test_base.py @@ -4,7 +4,7 @@ import re import sys from datetime import datetime, timedelta -import nose +import pytest import numpy as np import pandas as pd @@ -32,7 +32,7 @@ def test_string_methods_dont_fail(self): def test_tricky_container(self): if not hasattr(self, 'unicode_container'): - raise nose.SkipTest('Need unicode_container to test with this') + pytest.skip('Need unicode_container to test with this') repr(self.unicode_container) str(self.unicode_container) bytes(self.unicode_container) diff --git a/pandas/tests/test_expressions.py b/pandas/tests/test_expressions.py index 136786ecff0a0..0318757f76a11 100644 --- a/pandas/tests/test_expressions.py +++ b/pandas/tests/test_expressions.py @@ -4,7 +4,7 @@ import re import operator -import nose +import pytest from numpy.random import randn @@ -21,13 +21,7 @@ if not expr._USE_NUMEXPR: - try: - import numexpr # noqa - except ImportError: - msg = "don't have" - else: - msg = "not using" - raise nose.SkipTest("{0} numexpr".format(msg)) + numexpr = pytest.importorskip('numexpr') _frame = DataFrame(randn(10000, 4), columns=list('ABCD'), dtype='float64') _frame2 = DataFrame(randn(100, 4), columns=list('ABCD'), dtype='float64') @@ -70,9 +64,8 @@ def setUp(self): def tearDown(self): expr._MIN_ELEMENTS = self._MIN_ELEMENTS - @nose.tools.nottest - def run_arithmetic_test(self, df, other, assert_func, check_dtype=False, - test_flex=True): + def run_arithmetic(self, df, other, assert_func, check_dtype=False, + test_flex=True): expr._MIN_ELEMENTS = 0 operations = ['add', 'sub', 'mul', 'mod', 'truediv', 'floordiv', 'pow'] if not compat.PY3: @@ -109,15 +102,14 @@ def run_arithmetic_test(self, df, other, assert_func, check_dtype=False, raise def test_integer_arithmetic(self): - self.run_arithmetic_test(self.integer, self.integer, - assert_frame_equal) - self.run_arithmetic_test(self.integer.iloc[:, 0], - self.integer.iloc[:, 0], assert_series_equal, - check_dtype=True) - - @nose.tools.nottest - def run_binary_test(self, df, other, assert_func, test_flex=False, - numexpr_ops=set(['gt', 'lt', 'ge', 'le', 'eq', 'ne'])): + self.run_arithmetic(self.integer, self.integer, + assert_frame_equal) + self.run_arithmetic(self.integer.iloc[:, 0], + self.integer.iloc[:, 0], assert_series_equal, + check_dtype=True) + + def run_binary(self, df, other, assert_func, test_flex=False, + numexpr_ops=set(['gt', 'lt', 'ge', 'le', 'eq', 'ne'])): """ tests solely that the result is the same whether or not numexpr is enabled. Need to test whether the function does the correct thing @@ -151,46 +143,46 @@ def run_binary_test(self, df, other, assert_func, test_flex=False, def run_frame(self, df, other, binary_comp=None, run_binary=True, **kwargs): - self.run_arithmetic_test(df, other, assert_frame_equal, - test_flex=False, **kwargs) - self.run_arithmetic_test(df, other, assert_frame_equal, test_flex=True, - **kwargs) + self.run_arithmetic(df, other, assert_frame_equal, + test_flex=False, **kwargs) + self.run_arithmetic(df, other, assert_frame_equal, test_flex=True, + **kwargs) if run_binary: if binary_comp is None: expr.set_use_numexpr(False) binary_comp = other + 1 expr.set_use_numexpr(True) - self.run_binary_test(df, binary_comp, assert_frame_equal, - test_flex=False, **kwargs) - self.run_binary_test(df, binary_comp, assert_frame_equal, - test_flex=True, **kwargs) + self.run_binary(df, binary_comp, assert_frame_equal, + test_flex=False, **kwargs) + self.run_binary(df, binary_comp, assert_frame_equal, + test_flex=True, **kwargs) def run_series(self, ser, other, binary_comp=None, **kwargs): - self.run_arithmetic_test(ser, other, assert_series_equal, - test_flex=False, **kwargs) - self.run_arithmetic_test(ser, other, assert_almost_equal, - test_flex=True, **kwargs) + self.run_arithmetic(ser, other, assert_series_equal, + test_flex=False, **kwargs) + self.run_arithmetic(ser, other, assert_almost_equal, + test_flex=True, **kwargs) # series doesn't uses vec_compare instead of numexpr... # if binary_comp is None: # binary_comp = other + 1 - # self.run_binary_test(ser, binary_comp, assert_frame_equal, + # self.run_binary(ser, binary_comp, assert_frame_equal, # test_flex=False, **kwargs) - # self.run_binary_test(ser, binary_comp, assert_frame_equal, + # self.run_binary(ser, binary_comp, assert_frame_equal, # test_flex=True, **kwargs) def run_panel(self, panel, other, binary_comp=None, run_binary=True, assert_func=assert_panel_equal, **kwargs): - self.run_arithmetic_test(panel, other, assert_func, test_flex=False, - **kwargs) - self.run_arithmetic_test(panel, other, assert_func, test_flex=True, - **kwargs) + self.run_arithmetic(panel, other, assert_func, test_flex=False, + **kwargs) + self.run_arithmetic(panel, other, assert_func, test_flex=True, + **kwargs) if run_binary: if binary_comp is None: binary_comp = other + 1 - self.run_binary_test(panel, binary_comp, assert_func, - test_flex=False, **kwargs) - self.run_binary_test(panel, binary_comp, assert_func, - test_flex=True, **kwargs) + self.run_binary(panel, binary_comp, assert_func, + test_flex=False, **kwargs) + self.run_binary(panel, binary_comp, assert_func, + test_flex=True, **kwargs) def test_integer_arithmetic_frame(self): self.run_frame(self.integer, self.integer) @@ -234,22 +226,22 @@ def test_mixed_panel(self): binary_comp=-2) def test_float_arithemtic(self): - self.run_arithmetic_test(self.frame, self.frame, assert_frame_equal) - self.run_arithmetic_test(self.frame.iloc[:, 0], self.frame.iloc[:, 0], - assert_series_equal, check_dtype=True) + self.run_arithmetic(self.frame, self.frame, assert_frame_equal) + self.run_arithmetic(self.frame.iloc[:, 0], self.frame.iloc[:, 0], + assert_series_equal, check_dtype=True) def test_mixed_arithmetic(self): - self.run_arithmetic_test(self.mixed, self.mixed, assert_frame_equal) + self.run_arithmetic(self.mixed, self.mixed, assert_frame_equal) for col in self.mixed.columns: - self.run_arithmetic_test(self.mixed[col], self.mixed[col], - assert_series_equal) + self.run_arithmetic(self.mixed[col], self.mixed[col], + assert_series_equal) def test_integer_with_zeros(self): self.integer *= np.random.randint(0, 2, size=np.shape(self.integer)) - self.run_arithmetic_test(self.integer, self.integer, - assert_frame_equal) - self.run_arithmetic_test(self.integer.iloc[:, 0], - self.integer.iloc[:, 0], assert_series_equal) + self.run_arithmetic(self.integer, self.integer, + assert_frame_equal) + self.run_arithmetic(self.integer.iloc[:, 0], + self.integer.iloc[:, 0], assert_series_equal) def test_invalid(self): diff --git a/pandas/tests/test_generic.py b/pandas/tests/test_generic.py index e84e2d6809e7b..28f1dc61533c1 100644 --- a/pandas/tests/test_generic.py +++ b/pandas/tests/test_generic.py @@ -2,7 +2,7 @@ # pylint: disable-msg=E1101,W0612 from operator import methodcaller -import nose +import pytest import numpy as np from numpy import nan import pandas as pd @@ -367,7 +367,7 @@ def test_head_tail(self): try: o.head() except (NotImplementedError): - raise nose.SkipTest('not implemented on {0}'.format( + pytest.skip('not implemented on {0}'.format( o.__class__.__name__)) self._compare(o.head(), o.iloc[:5]) @@ -1567,7 +1567,7 @@ class TestPanel4D(tm.TestCase, Generic): _comparator = lambda self, x, y: assert_panel4d_equal(x, y, by_blocks=True) def test_sample(self): - raise nose.SkipTest("sample on Panel4D") + pytest.skip("sample on Panel4D") def test_to_xarray(self): diff --git a/pandas/tests/test_internals.py b/pandas/tests/test_internals.py index 1dfea168c067c..f086935df6dc8 100644 --- a/pandas/tests/test_internals.py +++ b/pandas/tests/test_internals.py @@ -3,7 +3,7 @@ from datetime import datetime, date -import nose +import pytest import numpy as np import re @@ -276,7 +276,7 @@ def test_split_block_at(self): # with dup column support this method was taken out # GH3679 - raise nose.SkipTest("skipping for now") + pytest.skip("skipping for now") bs = list(self.fblock.split_block_at('a')) self.assertEqual(len(bs), 1) diff --git a/pandas/tests/test_msgpack/test_unpack.py b/pandas/tests/test_msgpack/test_unpack.py index a182c676adb3b..ae8227ab276fb 100644 --- a/pandas/tests/test_msgpack/test_unpack.py +++ b/pandas/tests/test_msgpack/test_unpack.py @@ -2,7 +2,7 @@ import sys from pandas.msgpack import Unpacker, packb, OutOfData, ExtType import pandas.util.testing as tm -import nose +import pytest class TestUnpack(tm.TestCase): @@ -19,7 +19,7 @@ def test_unpack_array_header_from_file(self): def test_unpacker_hook_refcnt(self): if not hasattr(sys, 'getrefcount'): - raise nose.SkipTest('no sys.getrefcount()') + pytest.skip('no sys.getrefcount()') result = [] def hook(x): diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index 1fe2d701f5a41..8e0628eefa392 100755 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -3,7 +3,7 @@ from warnings import catch_warnings import datetime import itertools -import nose +import pytest from numpy.random import randn import numpy as np @@ -1733,7 +1733,7 @@ def test_getitem_lowerdim_corner(self): # AMBIGUOUS CASES! def test_partial_ix_missing(self): - raise nose.SkipTest("skipping for now") + pytest.skip("skipping for now") result = self.ymd.loc[2000, 0] expected = self.ymd.loc[2000]['A'] diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index 4f56419b1323a..2f329f241a5b8 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -4,7 +4,7 @@ from datetime import datetime import operator -import nose +import pytest import numpy as np import pandas as pd @@ -97,7 +97,7 @@ def test_skew(self): try: from scipy.stats import skew except ImportError: - raise nose.SkipTest("no scipy.stats.skew") + pytest.skip("no scipy.stats.skew") def this_skew(x): if len(x) < 3: @@ -2059,7 +2059,7 @@ def test_to_excel(self): import openpyxl # noqa from pandas.io.excel import ExcelFile except ImportError: - raise nose.SkipTest("need xlwt xlrd openpyxl") + pytest.skip("need xlwt xlrd openpyxl") for ext in ['xls', 'xlsx']: with ensure_clean('__tmp__.' + ext) as path: @@ -2067,7 +2067,7 @@ def test_to_excel(self): try: reader = ExcelFile(path) except ImportError: - raise nose.SkipTest("need xlwt xlrd openpyxl") + pytest.skip("need xlwt xlrd openpyxl") for item, df in self.panel.iteritems(): recdf = reader.parse(str(item), index_col=0) @@ -2079,14 +2079,14 @@ def test_to_excel_xlsxwriter(self): import xlsxwriter # noqa from pandas.io.excel import ExcelFile except ImportError: - raise nose.SkipTest("Requires xlrd and xlsxwriter. Skipping test.") + pytest.skip("Requires xlrd and xlsxwriter. Skipping test.") with ensure_clean('__tmp__.xlsx') as path: self.panel.to_excel(path, engine='xlsxwriter') try: reader = ExcelFile(path) except ImportError as e: - raise nose.SkipTest("cannot write excel file: %s" % e) + pytest.skip("cannot write excel file: %s" % e) for item, df in self.panel.iteritems(): recdf = reader.parse(str(item), index_col=0) diff --git a/pandas/tests/test_panel4d.py b/pandas/tests/test_panel4d.py index 96864c626ba7f..902b42e7d77d7 100644 --- a/pandas/tests/test_panel4d.py +++ b/pandas/tests/test_panel4d.py @@ -2,7 +2,7 @@ from datetime import datetime from pandas.compat import range, lrange import operator -import nose +import pytest import numpy as np @@ -66,7 +66,7 @@ def test_skew(self): try: from scipy.stats import skew except ImportError: - raise nose.SkipTest("no scipy.stats.skew") + pytest.skip("no scipy.stats.skew") def this_skew(x): if len(x) < 3: diff --git a/pandas/tests/test_testing.py b/pandas/tests/test_testing.py index 466e9ee5a30b8..07bfdc8fc9078 100644 --- a/pandas/tests/test_testing.py +++ b/pandas/tests/test_testing.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- import pandas as pd import unittest -import nose +import pytest import numpy as np import sys from pandas import Series, DataFrame @@ -10,8 +10,7 @@ raise_with_traceback, assert_index_equal, assert_series_equal, assert_frame_equal, assert_numpy_array_equal, - RNGContext, assertRaises, - skip_if_no_package_deco) + RNGContext) from pandas.compat import is_platform_windows # let's get meta. @@ -167,8 +166,8 @@ class TestAssertNumpyArrayEqual(tm.TestCase): def test_numpy_array_equal_message(self): if is_platform_windows(): - raise nose.SkipTest("windows has incomparable line-endings " - "and uses L on the shape") + pytest.skip("windows has incomparable line-endings " + "and uses L on the shape") expected = """numpy array are different @@ -295,8 +294,8 @@ def test_numpy_array_equal_message(self): def test_numpy_array_equal_object_message(self): if is_platform_windows(): - raise nose.SkipTest("windows has incomparable line-endings " - "and uses L on the shape") + pytest.skip("windows has incomparable line-endings " + "and uses L on the shape") a = np.array([pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-01')]) b = np.array([pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-02')]) @@ -772,27 +771,9 @@ class TestLocale(tm.TestCase): def test_locale(self): if sys.platform == 'win32': - raise nose.SkipTest( + pytest.skip( "skipping on win platforms as locale not available") # GH9744 locales = tm.get_locales() self.assertTrue(len(locales) >= 1) - - -def test_skiptest_deco(): - from nose import SkipTest - - @skip_if_no_package_deco("fakepackagename") - def f(): - pass - with assertRaises(SkipTest): - f() - - @skip_if_no_package_deco("numpy") - def f(): - pass - # hack to ensure that SkipTest is *not* raised - with assertRaises(ValueError): - f() - raise ValueError diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index 48861fc6a9528..3add568c1ea99 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -1,9 +1,8 @@ from itertools import product -import nose +import pytest import sys import warnings -from nose.tools import assert_raises from datetime import datetime from numpy.random import randn import numpy as np @@ -726,7 +725,8 @@ def check_dtypes(self, f, f_name, d, d_name, exp): else: # other methods not Implemented ATM - assert_raises(NotImplementedError, f, roll) + with pytest.raises(NotImplementedError): + f(roll) class TestDtype_timedelta(DatetimeLike): @@ -741,8 +741,8 @@ class TestDtype_datetime64UTC(DatetimeLike): dtype = 'datetime64[ns, UTC]' def _create_data(self): - raise nose.SkipTest("direct creation of extension dtype " - "datetime64[ns, UTC] is not supported ATM") + pytest.skip("direct creation of extension dtype " + "datetime64[ns, UTC] is not supported ATM") class TestMoments(Base): @@ -1160,7 +1160,7 @@ def test_rolling_skew(self): try: from scipy.stats import skew except ImportError: - raise nose.SkipTest('no scipy') + pytest.skip('no scipy') self._check_moment_func(mom.rolling_skew, lambda x: skew(x, bias=False), name='skew') @@ -1168,14 +1168,14 @@ def test_rolling_kurt(self): try: from scipy.stats import kurtosis except ImportError: - raise nose.SkipTest('no scipy') + pytest.skip('no scipy') self._check_moment_func(mom.rolling_kurt, lambda x: kurtosis(x, bias=False), name='kurt') def test_fperr_robustness(self): # TODO: remove this once python 2.5 out of picture if PY3: - raise nose.SkipTest("doesn't work on python 3") + pytest.skip("doesn't work on python 3") # #2114 data = '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x1a@\xaa\xaa\xaa\xaa\xaa\xaa\x02@8\x8e\xe38\x8e\xe3\xe8?z\t\xed%\xb4\x97\xd0?\xa2\x0c<\xdd\x9a\x1f\xb6?\x82\xbb\xfa&y\x7f\x9d?\xac\'\xa7\xc4P\xaa\x83?\x90\xdf\xde\xb0k8j?`\xea\xe9u\xf2zQ?*\xe37\x9d\x98N7?\xe2.\xf5&v\x13\x1f?\xec\xc9\xf8\x19\xa4\xb7\x04?\x90b\xf6w\x85\x9f\xeb>\xb5A\xa4\xfaXj\xd2>F\x02\xdb\xf8\xcb\x8d\xb8>.\xac<\xfb\x87^\xa0>\xe8:\xa6\xf9_\xd3\x85>\xfb?\xe2cUU\xfd?\xfc\x7fA\xed8\x8e\xe3?\xa5\xaa\xac\x91\xf6\x12\xca?n\x1cs\xb6\xf9a\xb1?\xe8%D\xf3L-\x97?5\xddZD\x11\xe7~?#>\xe7\x82\x0b\x9ad?\xd9R4Y\x0fxK?;7x;\nP2?N\xf4JO\xb8j\x18?4\xf81\x8a%G\x00?\x9a\xf5\x97\r2\xb4\xe5>\xcd\x9c\xca\xbcB\xf0\xcc>3\x13\x87(\xd7J\xb3>\x99\x19\xb4\xe0\x1e\xb9\x99>ff\xcd\x95\x14&\x81>\x88\x88\xbc\xc7p\xddf>`\x0b\xa6_\x96|N>@\xb2n\xea\x0eS4>U\x98\x938i\x19\x1b>\x8eeb\xd0\xf0\x10\x02>\xbd\xdc-k\x96\x16\xe8=(\x93\x1e\xf2\x0e\x0f\xd0=\xe0n\xd3Bii\xb5=*\xe9\x19Y\x8c\x8c\x9c=\xc6\xf0\xbb\x90]\x08\x83=]\x96\xfa\xc0|`i=>d\xfc\xd5\xfd\xeaP=R0\xfb\xc7\xa7\x8e6=\xc2\x95\xf9_\x8a\x13\x1e=\xd6c\xa6\xea\x06\r\x04=r\xda\xdd8\t\xbc\xea<\xf6\xe6\x93\xd0\xb0\xd2\xd1<\x9d\xdeok\x96\xc3\xb7<&~\xea9s\xaf\x9f\xb8\x02@\xc6\xd2&\xfd\xa8\xf5\xe8?\xd9\xe1\x19\xfe\xc5\xa3\xd0?v\x82"\xa8\xb2/\xb6?\x9dX\x835\xee\x94\x9d?h\x90W\xce\x9e\xb8\x83?\x8a\xc0th~Kj?\\\x80\xf8\x9a\xa9\x87Q?%\xab\xa0\xce\x8c_7?1\xe4\x80\x13\x11*\x1f? \x98\x00\r\xb6\xc6\x04?\x80u\xabf\x9d\xb3\xeb>UNrD\xbew\xd2>\x1c\x13C[\xa8\x9f\xb8>\x12b\xd7m-\x1fQ@\xe3\x85>\xe6\x91)l\x00/m>Da\xc6\xf2\xaatS>\x05\xd7]\xee\xe3\xf09>' # noqa diff --git a/pandas/tests/tseries/test_converter.py b/pandas/tests/tseries/test_converter.py index b934aaed7d41f..5351e26f0e62b 100644 --- a/pandas/tests/tseries/test_converter.py +++ b/pandas/tests/tseries/test_converter.py @@ -1,3 +1,4 @@ +import pytest from datetime import datetime, date import numpy as np @@ -7,11 +8,7 @@ from pandas.tseries.offsets import Second, Milli, Micro, Day from pandas.compat.numpy import np_datetime64_compat -try: - import pandas.tseries.converter as converter -except ImportError: - import nose - raise nose.SkipTest("no pandas.tseries.converter, skipping") +converter = pytest.importorskip('pandas.tseries.converter') def test_timtetonum_accepts_unicode(): diff --git a/pandas/tests/tseries/test_offsets.py b/pandas/tests/tseries/test_offsets.py index 7c5a4c3df28b2..dfa1e94e4dc11 100644 --- a/pandas/tests/tseries/test_offsets.py +++ b/pandas/tests/tseries/test_offsets.py @@ -3,8 +3,7 @@ from datetime import date, datetime, timedelta from dateutil.relativedelta import relativedelta -import nose -from nose.tools import assert_raises +import pytest from pandas.compat import range, iteritems from pandas import compat @@ -59,7 +58,8 @@ def test_ole2datetime(): actual = ole2datetime(60000) assert actual == datetime(2064, 4, 8) - assert_raises(ValueError, ole2datetime, 60) + with pytest.raises(ValueError): + ole2datetime(60) def test_to_datetime1(): @@ -159,7 +159,7 @@ def test_apply_out_of_range(self): except (tslib.OutOfBoundsDatetime): raise except (ValueError, KeyError) as e: - raise nose.SkipTest( + pytest.skip( "cannot create out_of_range offset: {0} {1}".format( str(self).split('.')[-1], e)) diff --git a/pandas/tools/tests/test_util.py b/pandas/tools/tests/test_util.py index 0716a13fac3fe..2672db13a959f 100644 --- a/pandas/tools/tests/test_util.py +++ b/pandas/tools/tests/test_util.py @@ -1,7 +1,7 @@ import os import locale import codecs -import nose +import pytest import decimal import numpy as np @@ -68,7 +68,7 @@ def setUpClass(cls): cls.locales = tm.get_locales() if not cls.locales: - raise nose.SkipTest("No locales found") + pytest.skip("No locales found") tm._skip_if_windows() @@ -83,20 +83,20 @@ def test_get_locales(self): def test_get_locales_prefix(self): if len(self.locales) == 1: - raise nose.SkipTest("Only a single locale found, no point in " - "trying to test filtering locale prefixes") + pytest.skip("Only a single locale found, no point in " + "trying to test filtering locale prefixes") first_locale = self.locales[0] assert len(tm.get_locales(prefix=first_locale[:2])) > 0 def test_set_locale(self): if len(self.locales) == 1: - raise nose.SkipTest("Only a single locale found, no point in " - "trying to test setting another locale") + pytest.skip("Only a single locale found, no point in " + "trying to test setting another locale") if all(x is None for x in CURRENT_LOCALE): # Not sure why, but on some travis runs with pytest, # getlocale() returned (None, None). - raise nose.SkipTest("CURRENT_LOCALE is not set.") + pytest.skip("CURRENT_LOCALE is not set.") if LOCALE_OVERRIDE is None: lang, enc = 'it_CH', 'UTF-8' @@ -456,7 +456,7 @@ def test_downcast_limits(self): # Test the limits of each downcast. Bug: #14401. # Check to make sure numpy is new enough to run this test. if _np_version_under1p9: - raise nose.SkipTest("Numpy version is under 1.9") + pytest.skip("Numpy version is under 1.9") i = 'integer' u = 'unsigned' diff --git a/pandas/util/decorators.py b/pandas/util/decorators.py index 85d77c2f6f57c..1b501eb1d9bda 100644 --- a/pandas/util/decorators.py +++ b/pandas/util/decorators.py @@ -206,65 +206,6 @@ def wrapped(*args, **kwargs): return wrapped -class KnownFailureTest(Exception): - """Raise this exception to mark a test as a known failing test.""" - pass - - -def knownfailureif(fail_condition, msg=None): - """ - Make function raise KnownFailureTest exception if given condition is true. - - If the condition is a callable, it is used at runtime to dynamically - make the decision. This is useful for tests that may require costly - imports, to delay the cost until the test suite is actually executed. - - Parameters - ---------- - fail_condition : bool or callable - Flag to determine whether to mark the decorated test as a known - failure (if True) or not (if False). - msg : str, optional - Message to give on raising a KnownFailureTest exception. - Default is None. - - Returns - ------- - decorator : function - Decorator, which, when applied to a function, causes SkipTest - to be raised when `skip_condition` is True, and the function - to be called normally otherwise. - - Notes - ----- - The decorator itself is decorated with the ``nose.tools.make_decorator`` - function in order to transmit function name, and various other metadata. - - """ - if msg is None: - msg = 'Test skipped due to known failure' - - # Allow for both boolean or callable known failure conditions. - if callable(fail_condition): - fail_val = fail_condition - else: - fail_val = lambda: fail_condition - - def knownfail_decorator(f): - # Local import to avoid a hard nose dependency and only incur the - # import time overhead at actual test-time. - import nose - - def knownfailer(*args, **kwargs): - if fail_val(): - raise KnownFailureTest(msg) - else: - return f(*args, **kwargs) - return nose.tools.make_decorator(f)(knownfailer) - - return knownfail_decorator - - def make_signature(func): """ Returns a string repr of the arg list of a func call, with any defaults diff --git a/pandas/util/print_versions.py b/pandas/util/print_versions.py index c3962ad9c823c..7c5148caf7e74 100644 --- a/pandas/util/print_versions.py +++ b/pandas/util/print_versions.py @@ -63,7 +63,7 @@ def show_versions(as_json=False): deps = [ # (MODULE_NAME, f(mod) -> mod version) ("pandas", lambda mod: mod.__version__), - ("nose", lambda mod: mod.__version__), + ("pytest", lambda mod: mod.__version__), ("pip", lambda mod: mod.__version__), ("setuptools", lambda mod: mod.__version__), ("Cython", lambda mod: mod.__version__), diff --git a/pandas/util/testing.py b/pandas/util/testing.py index c3633c945f60a..566ceec027b2b 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -248,9 +248,9 @@ def close(fignum=None): def _skip_if_32bit(): - import nose + import pytest if is_platform_32bit(): - raise nose.SkipTest("skipping for 32 bit") + pytest.skip("skipping for 32 bit") def mplskip(cls): @@ -262,8 +262,8 @@ def setUpClass(cls): import matplotlib as mpl mpl.use("Agg", warn=False) except ImportError: - import nose - raise nose.SkipTest("matplotlib not installed") + import pytest + pytest.skip("matplotlib not installed") cls.setUpClass = setUpClass return cls @@ -273,102 +273,102 @@ def _skip_if_no_mpl(): try: import matplotlib # noqa except ImportError: - import nose - raise nose.SkipTest("matplotlib not installed") + import pytest + pytest.skip("matplotlib not installed") def _skip_if_mpl_1_5(): import matplotlib v = matplotlib.__version__ if v > LooseVersion('1.4.3') or v[0] == '0': - import nose - raise nose.SkipTest("matplotlib 1.5") + import pytest + pytest.skip("matplotlib 1.5") def _skip_if_no_scipy(): try: import scipy.stats # noqa except ImportError: - import nose - raise nose.SkipTest("no scipy.stats module") + import pytest + pytest.skip("no scipy.stats module") try: import scipy.interpolate # noqa except ImportError: - import nose - raise nose.SkipTest('scipy.interpolate missing') + import pytest + pytest.skip('scipy.interpolate missing') def _skip_if_scipy_0_17(): import scipy v = scipy.__version__ if v >= LooseVersion("0.17.0"): - import nose - raise nose.SkipTest("scipy 0.17") + import pytest + pytest.skip("scipy 0.17") def _skip_if_no_lzma(): try: return compat.import_lzma() except ImportError: - import nose - raise nose.SkipTest('need backports.lzma to run') + import pytest + pytest.skip('need backports.lzma to run') def _skip_if_no_xarray(): try: import xarray except ImportError: - import nose - raise nose.SkipTest("xarray not installed") + import pytest + pytest.skip("xarray not installed") v = xarray.__version__ if v < LooseVersion('0.7.0'): - import nose - raise nose.SkipTest("xarray not version is too low: {0}".format(v)) + import pytest + pytest.skip("xarray not version is too low: {0}".format(v)) def _skip_if_no_pytz(): try: import pytz # noqa except ImportError: - import nose - raise nose.SkipTest("pytz not installed") + import pytest + pytest.skip("pytz not installed") def _skip_if_no_dateutil(): try: import dateutil # noqa except ImportError: - import nose - raise nose.SkipTest("dateutil not installed") + import pytest + pytest.skip("dateutil not installed") def _skip_if_windows_python_3(): if PY3 and is_platform_windows(): - import nose - raise nose.SkipTest("not used on python 3/win32") + import pytest + pytest.skip("not used on python 3/win32") def _skip_if_windows(): if is_platform_windows(): - import nose - raise nose.SkipTest("Running on Windows") + import pytest + pytest.skip("Running on Windows") def _skip_if_no_pathlib(): try: from pathlib import Path # noqa except ImportError: - import nose - raise nose.SkipTest("pathlib not available") + import pytest + pytest.skip("pathlib not available") def _skip_if_no_localpath(): try: from py.path import local as LocalPath # noqa except ImportError: - import nose - raise nose.SkipTest("py.path not installed") + import pytest + pytest.skip("py.path not installed") def _incompat_bottleneck_version(method): @@ -392,27 +392,27 @@ def skip_if_no_ne(engine='numexpr'): if engine == 'numexpr': if not _USE_NUMEXPR: - import nose - raise nose.SkipTest("numexpr enabled->{enabled}, " - "installed->{installed}".format( - enabled=_USE_NUMEXPR, - installed=_NUMEXPR_INSTALLED)) + import pytest + pytest.skip("numexpr enabled->{enabled}, " + "installed->{installed}".format( + enabled=_USE_NUMEXPR, + installed=_NUMEXPR_INSTALLED)) def _skip_if_has_locale(): import locale lang, _ = locale.getlocale() if lang is not None: - import nose - raise nose.SkipTest("Specific locale is set {0}".format(lang)) + import pytest + pytest.skip("Specific locale is set {0}".format(lang)) def _skip_if_not_us_locale(): import locale lang, _ = locale.getlocale() if lang != 'en_US': - import nose - raise nose.SkipTest("Specific locale is set {0}".format(lang)) + import pytest + pytest.skip("Specific locale is set {0}".format(lang)) # ----------------------------------------------------------------------------- # locale utilities @@ -662,8 +662,8 @@ def ensure_clean(filename=None, return_filelike=False): try: fd, filename = tempfile.mkstemp(suffix=filename) except UnicodeEncodeError: - import nose - raise nose.SkipTest('no unicode file names on this system') + import pytest + pytest.skip('no unicode file names on this system') try: yield filename @@ -1997,9 +1997,7 @@ def __init__(self, *args, **kwargs): # Dependency checks. Copied this from Nipy/Nipype (Copyright of # respective developers, license: BSD-3) -def package_check(pkg_name, version=None, app='pandas', checker=LooseVersion, - exc_failed_import=ImportError, - exc_failed_check=RuntimeError): +def package_check(pkg_name, version=None, app='pandas', checker=LooseVersion): """Check that the minimal version of the required package is installed. Parameters @@ -2015,10 +2013,6 @@ def package_check(pkg_name, version=None, app='pandas', checker=LooseVersion, checker : object, optional The class that will perform the version checking. Default is distutils.version.LooseVersion. - exc_failed_import : Exception, optional - Class of the exception to be thrown if import failed. - exc_failed_check : Exception, optional - Class of the exception to be thrown if version check failed. Examples -------- @@ -2027,6 +2021,7 @@ def package_check(pkg_name, version=None, app='pandas', checker=LooseVersion, """ + import pytest if app: msg = '%s requires %s' % (app, pkg_name) else: @@ -2036,46 +2031,24 @@ def package_check(pkg_name, version=None, app='pandas', checker=LooseVersion, try: mod = __import__(pkg_name) except ImportError: - raise exc_failed_import(msg) - if not version: - return + mod = None try: have_version = mod.__version__ except AttributeError: - raise exc_failed_check('Cannot find version for %s' % pkg_name) - if checker(have_version) < checker(version): - raise exc_failed_check(msg) + pytest.skip('Cannot find version for %s' % pkg_name) + if version and checker(have_version) < checker(version): + pytest.skip(msg) def skip_if_no_package(*args, **kwargs): - """Raise SkipTest if package_check fails + """pytest.skip() if package_check fails Parameters ---------- *args Positional parameters passed to `package_check` *kwargs Keyword parameters passed to `package_check` """ - from nose import SkipTest - package_check(exc_failed_import=SkipTest, - exc_failed_check=SkipTest, - *args, **kwargs) - - -def skip_if_no_package_deco(pkg_name, version=None, app='pandas'): - from nose import SkipTest - - def deco(func): - @wraps(func) - def wrapper(*args, **kwargs): - package_check(pkg_name, version=version, app=app, - exc_failed_import=SkipTest, - exc_failed_check=SkipTest) - return func(*args, **kwargs) - return wrapper - return deco -# -# Additional tags decorators for nose -# + package_check(*args, **kwargs) def optional_args(decorator): @@ -2255,18 +2228,17 @@ def network(t, url="http://www.google.com", >>> test_something() Traceback (most recent call last): ... - SkipTest Errors not related to networking will always be raised. """ - from nose import SkipTest + from pytest import skip t.network = True @wraps(t) def wrapper(*args, **kwargs): if check_before_test and not raise_on_error: if not can_connect(url, error_classes): - raise SkipTest + skip() try: return t(*args, **kwargs) except Exception as e: @@ -2275,8 +2247,8 @@ def wrapper(*args, **kwargs): errno = getattr(e.reason, 'errno', None) if errno in skip_errnos: - raise SkipTest("Skipping test due to known errno" - " and error %s" % e) + skip("Skipping test due to known errno" + " and error %s" % e) try: e_str = traceback.format_exc(e) @@ -2284,8 +2256,8 @@ def wrapper(*args, **kwargs): e_str = str(e) if any([m.lower() in e_str.lower() for m in _skip_on_messages]): - raise SkipTest("Skipping test because exception " - "message is known and error %s" % e) + skip("Skipping test because exception " + "message is known and error %s" % e) if not isinstance(e, error_classes): raise @@ -2293,8 +2265,8 @@ def wrapper(*args, **kwargs): if raise_on_error or can_connect(url, error_classes): raise else: - raise SkipTest("Skipping test due to lack of connectivity" - " and error %s" % e) + skip("Skipping test due to lack of connectivity" + " and error %s" % e) return wrapper @@ -2775,8 +2747,8 @@ def set_timezone(tz): 'EDT' """ if is_platform_windows(): - import nose - raise nose.SkipTest("timezone setting not supported on windows") + import pytest + pytest.skip("timezone setting not supported on windows") import os import time diff --git a/setup.cfg b/setup.cfg index 143470f7ee350..45d98dd733f1f 100644 --- a/setup.cfg +++ b/setup.cfg @@ -12,7 +12,7 @@ tag_prefix = v parentdir_prefix = pandas- [flake8] -ignore = E731 +ignore = E731,E402 [yapf] based_on_style = pep8 From 2f971a23a67ef9bc51453d94ae7b9626e12be006 Mon Sep 17 00:00:00 2001 From: "John W. O'Brien" Date: Sat, 11 Feb 2017 21:21:56 -0500 Subject: [PATCH 041/933] BUG: Avoid grafting missing examples directory (#15373) --- MANIFEST.in | 1 - 1 file changed, 1 deletion(-) diff --git a/MANIFEST.in b/MANIFEST.in index 2d26fbfd6adaf..b7a7e6039ac9a 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -7,7 +7,6 @@ include setup.py graft doc prune doc/build -graft examples graft pandas global-exclude *.so From 1bad601641cc024cc4d0c1215b12c9d0066b8103 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sat, 11 Feb 2017 21:53:44 -0500 Subject: [PATCH 042/933] CLN: remove pandas/io/auth.py, from ga.py (now removed) (#15374) --- pandas/io/auth.py | 126 ---------------------------------------------- 1 file changed, 126 deletions(-) delete mode 100644 pandas/io/auth.py diff --git a/pandas/io/auth.py b/pandas/io/auth.py deleted file mode 100644 index e42df6a7309b7..0000000000000 --- a/pandas/io/auth.py +++ /dev/null @@ -1,126 +0,0 @@ -from __future__ import print_function -# see LICENSES directory for copyright and license -import os -import sys -import logging - -import httplib2 - -import apiclient.discovery as gapi -import gflags -import oauth2client.file as auth_file -import oauth2client.client as oauth -import oauth2client.tools as tools -OOB_CALLBACK_URN = oauth.OOB_CALLBACK_URN - - -class AuthenticationConfigError(ValueError): - pass - -FLOWS = {} -FLAGS = gflags.FLAGS -DEFAULT_SECRETS = os.path.join( - os.path.dirname(__file__), 'client_secrets.json') -DEFAULT_SCOPE = 'https://www.googleapis.com/auth/analytics.readonly' -DEFAULT_TOKEN_FILE = os.path.join(os.path.dirname(__file__), 'analytics.dat') -MISSING_CLIENT_MSG = """ -WARNING: Please configure OAuth 2.0 - -You need to populate the client_secrets.json file found at: - - %s - -with information from the APIs Console -. - -""" -DOC_URL = ('https://developers.google.com/api-client-library/python/guide/' - 'aaa_client_secrets') - -gflags.DEFINE_enum('logging_level', 'ERROR', - ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'], - 'Set the level of logging detail.') - -# Name of file that will store the access and refresh tokens to access -# the API without having to login each time. Make sure this file is in -# a secure place. - - -def process_flags(flags=None): - """Uses the command-line flags to set the logging level. - - Args: - argv: List of command line arguments passed to the python script. - """ - if flags is None: - flags = [] - - # Let the gflags module process the command-line arguments. - try: - FLAGS(flags) - except gflags.FlagsError as e: - print('%s\nUsage: %s ARGS\n%s' % (e, str(flags), FLAGS)) - sys.exit(1) - - # Set the logging according to the command-line flag. - logging.getLogger().setLevel(getattr(logging, FLAGS.logging_level)) - - -def get_flow(secret, scope, redirect): - """ - Retrieve an authentication flow object based on the given - configuration in the secret file name, the authentication scope, - and a redirect URN - """ - key = (secret, scope, redirect) - flow = FLOWS.get(key, None) - if flow is None: - msg = MISSING_CLIENT_MSG % secret - if not os.path.exists(secret): - raise AuthenticationConfigError(msg) - flow = oauth.flow_from_clientsecrets(secret, scope, - redirect_uri=redirect, - message=msg) - FLOWS[key] = flow - return flow - - -def make_token_store(fpath=None): - """create token storage from give file name""" - if fpath is None: - fpath = DEFAULT_TOKEN_FILE - return auth_file.Storage(fpath) - - -def authenticate(flow, storage=None): - """ - Try to retrieve a valid set of credentials from the token store if possible - Otherwise use the given authentication flow to obtain new credentials - and return an authenticated http object - - Parameters - ---------- - flow : authentication workflow - storage: token storage, default None - """ - http = httplib2.Http() - - # Prepare credentials, and authorize HTTP object with them. - credentials = storage.get() - if credentials is None or credentials.invalid: - credentials = tools.run(flow, storage) - - http = credentials.authorize(http) - return http - - -def init_service(http): - """ - Use the given http object to build the analytics service object - """ - return gapi.build('analytics', 'v3', http=http) - - -def reset_default_token_store(): - import os - os.remove(DEFAULT_TOKEN_FILE) From 5fb5228988832ff0328c4d830cb4e2609b882ab1 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sun, 12 Feb 2017 09:50:55 -0500 Subject: [PATCH 043/933] TST: consolidate remaining tests under pandas.tests move all remaining tests so that ALL tests are now under pandas/tests Author: Jeff Reback Closes #15371 from jreback/tests and squashes the following commits: 43039e4 [Jeff Reback] add in data 118127b [Jeff Reback] wip bfa6a9c [Jeff Reback] fix data locations 79a79e6 [Jeff Reback] fix import 57437bf [Jeff Reback] fixes b407586 [Jeff Reback] move io e13bfe3 [Jeff Reback] move tools 0194e31 [Jeff Reback] move computation 0e6bcb4 [Jeff Reback] rename test_msgpack -> msgpack c5e4ab8 [Jeff Reback] move sparse 42e60e2 [Jeff Reback] move api tests --- pandas/{api/tests => tests/api}/__init__.py | 0 pandas/{api/tests => tests/api}/test_api.py | 2 +- .../tests => tests/computation}/__init__.py | 0 .../computation}/test_compat.py | 0 .../tests => tests/computation}/test_eval.py | 0 pandas/tests/indexes/datetimes/test_ops.py | 3 +- pandas/{io/tests => tests/io}/__init__.py | 0 .../{io/tests => tests/io}/data/S4_EDUC1.dta | Bin .../{io/tests => tests/io}/data/banklist.csv | 0 .../{io/tests => tests/io}/data/banklist.html | 0 pandas/{io/tests => tests/io}/data/blank.xls | Bin pandas/{io/tests => tests/io}/data/blank.xlsm | Bin pandas/{io/tests => tests/io}/data/blank.xlsx | Bin .../io}/data/blank_with_header.xls | Bin .../io}/data/blank_with_header.xlsm | Bin .../io}/data/blank_with_header.xlsx | Bin .../io}/data/categorical_0_14_1.pickle | 0 .../io}/data/categorical_0_15_2.pickle | Bin .../io}/data/computer_sales_page.html | 0 .../tests => tests/io}/data/gbq_fake_job.txt | 0 .../data/html_encoding/chinese_utf-16.html | Bin .../data/html_encoding/chinese_utf-32.html | Bin .../io}/data/html_encoding/chinese_utf-8.html | 0 .../io}/data/html_encoding/letz_latin1.html | 0 pandas/{io/tests => tests/io}/data/iris.csv | 0 .../io}/data/legacy_hdf/datetimetz_object.h5 | Bin .../io}/data/legacy_hdf/legacy.h5 | Bin .../io}/data/legacy_hdf/legacy_0.10.h5 | Bin .../io}/data/legacy_hdf/legacy_table.h5 | Bin .../io}/data/legacy_hdf/legacy_table_0.11.h5 | Bin .../io}/data/legacy_hdf/pytables_native.h5 | Bin .../io}/data/legacy_hdf/pytables_native2.h5 | Bin .../0.16.0/0.16.0_x86_64_darwin_2.7.9.msgpack | Bin .../0.16.2_AMD64_windows_2.7.10.msgpack | Bin .../0.16.2/0.16.2_AMD64_windows_3.4.3.msgpack | Bin .../0.16.2_x86_64_darwin_2.7.10.msgpack | Bin .../0.16.2/0.16.2_x86_64_darwin_2.7.9.msgpack | Bin .../0.16.2/0.16.2_x86_64_darwin_3.4.3.msgpack | Bin .../0.16.2/0.16.2_x86_64_linux_2.7.10.msgpack | Bin .../0.16.2/0.16.2_x86_64_linux_3.4.3.msgpack | Bin .../0.17.0_AMD64_windows_2.7.11.msgpack | Bin .../0.17.0/0.17.0_AMD64_windows_3.4.4.msgpack | Bin .../0.17.0_x86_64_darwin_2.7.11.msgpack | Bin .../0.17.0/0.17.0_x86_64_darwin_3.4.4.msgpack | Bin .../0.17.0/0.17.0_x86_64_linux_2.7.11.msgpack | Bin .../0.17.0/0.17.0_x86_64_linux_3.4.4.msgpack | Bin .../0.17.1_AMD64_windows_2.7.11.msgpack | Bin .../0.17.0/0.17.1_AMD64_windows_3.5.1.msgpack | Bin .../0.17.1_AMD64_windows_2.7.11.msgpack | Bin .../0.17.1/0.17.1_AMD64_windows_3.5.1.msgpack | Bin .../0.17.1_x86_64_darwin_2.7.11.msgpack | Bin .../0.17.1/0.17.1_x86_64_darwin_3.5.1.msgpack | Bin .../0.17.1/0.17.1_x86_64_linux_2.7.11.msgpack | Bin .../0.17.1/0.17.1_x86_64_linux_3.4.4.msgpack | Bin .../0.18.0_AMD64_windows_2.7.11.msgpack | Bin .../0.18.0/0.18.0_AMD64_windows_3.5.1.msgpack | Bin .../0.18.0_x86_64_darwin_2.7.11.msgpack | Bin .../0.18.0/0.18.0_x86_64_darwin_3.5.1.msgpack | Bin .../0.18.1_x86_64_darwin_2.7.12.msgpack | Bin .../0.18.1/0.18.1_x86_64_darwin_3.5.2.msgpack | Bin .../0.10.1/AMD64_windows_2.7.3.pickle | Bin .../0.10.1/x86_64_linux_2.7.3.pickle | Bin .../0.11.0/0.11.0_x86_64_linux_3.3.0.pickle | Bin .../0.11.0/x86_64_linux_2.7.3.pickle | Bin .../0.11.0/x86_64_linux_3.3.0.pickle | Bin .../0.12.0/0.12.0_AMD64_windows_2.7.3.pickle | Bin .../0.12.0/0.12.0_x86_64_linux_2.7.3.pickle | Bin .../0.13.0/0.13.0_AMD64_windows_2.7.3.pickle | Bin .../0.13.0/0.13.0_i686_linux_2.6.5.pickle | Bin .../0.13.0/0.13.0_i686_linux_2.7.3.pickle | Bin .../0.13.0/0.13.0_i686_linux_3.2.3.pickle | Bin .../0.13.0/0.13.0_x86_64_darwin_2.7.5.pickle | Bin .../0.13.0/0.13.0_x86_64_darwin_2.7.6.pickle | Bin .../0.13.0/0.13.0_x86_64_linux_2.7.3.pickle | Bin .../0.13.0/0.13.0_x86_64_linux_2.7.8.pickle | Bin .../0.13.0/0.13.0_x86_64_linux_3.3.0.pickle | Bin .../0.14.0/0.14.0_x86_64_darwin_2.7.6.pickle | Bin .../0.14.0/0.14.0_x86_64_linux_2.7.8.pickle | Bin .../0.14.1/0.14.1_x86_64_darwin_2.7.12.pickle | Bin .../0.14.1/0.14.1_x86_64_linux_2.7.8.pickle | Bin .../0.15.0/0.15.0_x86_64_darwin_2.7.12.pickle | Bin .../0.15.0/0.15.0_x86_64_linux_2.7.8.pickle | Bin .../0.15.2/0.15.2_x86_64_darwin_2.7.9.pickle | Bin .../0.16.0/0.16.0_x86_64_darwin_2.7.9.pickle | Bin .../0.16.2/0.16.2_AMD64_windows_2.7.10.pickle | Bin .../0.16.2/0.16.2_AMD64_windows_3.4.3.pickle | Bin .../0.16.2/0.16.2_x86_64_darwin_2.7.10.pickle | Bin .../0.16.2/0.16.2_x86_64_darwin_2.7.9.pickle | Bin .../0.16.2/0.16.2_x86_64_darwin_3.4.3.pickle | Bin .../0.16.2/0.16.2_x86_64_linux_2.7.10.pickle | Bin .../0.16.2/0.16.2_x86_64_linux_3.4.3.pickle | Bin .../0.17.0/0.17.0_AMD64_windows_2.7.11.pickle | Bin .../0.17.0/0.17.0_AMD64_windows_3.4.4.pickle | Bin .../0.17.0/0.17.0_x86_64_darwin_2.7.11.pickle | Bin .../0.17.0/0.17.0_x86_64_darwin_3.4.4.pickle | Bin .../0.17.0/0.17.0_x86_64_linux_2.7.11.pickle | Bin .../0.17.0/0.17.0_x86_64_linux_3.4.4.pickle | Bin .../0.17.0/0.17.1_AMD64_windows_2.7.11.pickle | Bin .../0.17.1/0.17.1_AMD64_windows_2.7.11.pickle | Bin .../0.17.1/0.17.1_x86_64_darwin_2.7.11.pickle | Bin .../0.18.0/0.18.0_AMD64_windows_2.7.11.pickle | Bin .../0.18.0/0.18.0_AMD64_windows_3.5.1.pickle | Bin .../0.18.0/0.18.0_x86_64_darwin_2.7.11.pickle | Bin .../0.18.0/0.18.0_x86_64_darwin_3.5.1.pickle | Bin .../0.18.1/0.18.1_x86_64_darwin_2.7.12.pickle | Bin .../0.18.1/0.18.1_x86_64_darwin_3.5.2.pickle | Bin pandas/{io/tests => tests/io}/data/macau.html | 0 .../{io/tests => tests/io}/data/nyse_wsj.html | 0 pandas/{io/tests => tests/io}/data/spam.html | 0 .../tests => tests/io}/data/stata10_115.dta | Bin .../tests => tests/io}/data/stata10_117.dta | Bin .../tests => tests/io}/data/stata11_115.dta | Bin .../tests => tests/io}/data/stata11_117.dta | Bin .../tests => tests/io}/data/stata12_117.dta | Bin .../tests => tests/io}/data/stata14_118.dta | Bin .../{io/tests => tests/io}/data/stata15.dta | Bin .../tests => tests/io}/data/stata1_114.dta | Bin .../tests => tests/io}/data/stata1_117.dta | Bin .../io}/data/stata1_encoding.dta | Bin .../tests => tests/io}/data/stata2_113.dta | Bin .../tests => tests/io}/data/stata2_114.dta | Bin .../tests => tests/io}/data/stata2_115.dta | Bin .../tests => tests/io}/data/stata2_117.dta | Bin pandas/{io/tests => tests/io}/data/stata3.csv | 0 .../tests => tests/io}/data/stata3_113.dta | Bin .../tests => tests/io}/data/stata3_114.dta | Bin .../tests => tests/io}/data/stata3_115.dta | Bin .../tests => tests/io}/data/stata3_117.dta | Bin .../tests => tests/io}/data/stata4_113.dta | Bin .../tests => tests/io}/data/stata4_114.dta | Bin .../tests => tests/io}/data/stata4_115.dta | Bin .../tests => tests/io}/data/stata4_117.dta | Bin pandas/{io/tests => tests/io}/data/stata5.csv | 0 .../tests => tests/io}/data/stata5_113.dta | Bin .../tests => tests/io}/data/stata5_114.dta | Bin .../tests => tests/io}/data/stata5_115.dta | Bin .../tests => tests/io}/data/stata5_117.dta | Bin pandas/{io/tests => tests/io}/data/stata6.csv | 0 .../tests => tests/io}/data/stata6_113.dta | Bin .../tests => tests/io}/data/stata6_114.dta | Bin .../tests => tests/io}/data/stata6_115.dta | Bin .../tests => tests/io}/data/stata6_117.dta | Bin .../tests => tests/io}/data/stata7_111.dta | Bin .../tests => tests/io}/data/stata7_115.dta | Bin .../tests => tests/io}/data/stata7_117.dta | Bin .../tests => tests/io}/data/stata8_113.dta | Bin .../tests => tests/io}/data/stata8_115.dta | Bin .../tests => tests/io}/data/stata8_117.dta | Bin .../tests => tests/io}/data/stata9_115.dta | Bin .../tests => tests/io}/data/stata9_117.dta | Bin pandas/{io/tests => tests/io}/data/test1.csv | 0 pandas/{io/tests => tests/io}/data/test1.xls | Bin pandas/{io/tests => tests/io}/data/test1.xlsm | Bin pandas/{io/tests => tests/io}/data/test1.xlsx | Bin pandas/{io/tests => tests/io}/data/test2.xls | Bin pandas/{io/tests => tests/io}/data/test2.xlsm | Bin pandas/{io/tests => tests/io}/data/test2.xlsx | Bin pandas/{io/tests => tests/io}/data/test3.xls | Bin pandas/{io/tests => tests/io}/data/test3.xlsm | Bin pandas/{io/tests => tests/io}/data/test3.xlsx | Bin pandas/{io/tests => tests/io}/data/test4.xls | Bin pandas/{io/tests => tests/io}/data/test4.xlsm | Bin pandas/{io/tests => tests/io}/data/test4.xlsx | Bin pandas/{io/tests => tests/io}/data/test5.xls | Bin pandas/{io/tests => tests/io}/data/test5.xlsm | Bin pandas/{io/tests => tests/io}/data/test5.xlsx | Bin .../io}/data/test_converters.xls | Bin .../io}/data/test_converters.xlsm | Bin .../io}/data/test_converters.xlsx | Bin .../io}/data/test_index_name_pre17.xls | Bin .../io}/data/test_index_name_pre17.xlsm | Bin .../io}/data/test_index_name_pre17.xlsx | Bin .../{io/tests => tests/io}/data/test_mmap.csv | 0 .../io}/data/test_multisheet.xls | Bin .../io}/data/test_multisheet.xlsm | Bin .../io}/data/test_multisheet.xlsx | Bin .../tests => tests/io}/data/test_squeeze.xls | Bin .../tests => tests/io}/data/test_squeeze.xlsm | Bin .../tests => tests/io}/data/test_squeeze.xlsx | Bin .../tests => tests/io}/data/test_types.xls | Bin .../tests => tests/io}/data/test_types.xlsm | Bin .../tests => tests/io}/data/test_types.xlsx | Bin .../io}/data/testdateoverflow.xls | Bin .../io}/data/testdateoverflow.xlsm | Bin .../io}/data/testdateoverflow.xlsx | Bin .../{io/tests => tests/io}/data/testdtype.xls | Bin .../tests => tests/io}/data/testdtype.xlsm | Bin .../tests => tests/io}/data/testdtype.xlsx | Bin .../io}/data/testmultiindex.xls | Bin .../io}/data/testmultiindex.xlsm | Bin .../io}/data/testmultiindex.xlsx | Bin .../tests => tests/io}/data/testskiprows.xls | Bin .../tests => tests/io}/data/testskiprows.xlsm | Bin .../tests => tests/io}/data/testskiprows.xlsx | Bin .../tests => tests/io}/data/times_1900.xls | Bin .../tests => tests/io}/data/times_1900.xlsm | Bin .../tests => tests/io}/data/times_1900.xlsx | Bin .../tests => tests/io}/data/times_1904.xls | Bin .../tests => tests/io}/data/times_1904.xlsm | Bin .../tests => tests/io}/data/times_1904.xlsx | Bin pandas/{io/tests => tests/io}/data/tips.csv | 0 .../tests => tests/io}/data/valid_markup.html | 0 .../io}/data/wikipedia_states.html | 0 .../io}/generate_legacy_storage_files.py | 0 .../{io/tests => tests/io}/json/__init__.py | 0 .../io}/json/data/tsframe_iso_v012.json | 0 .../io}/json/data/tsframe_v012.json | 0 .../tests => tests/io}/json/test_normalize.py | 0 .../tests => tests/io}/json/test_pandas.py | 0 .../{io/tests => tests/io}/json/test_ujson.py | 0 .../{io/tests => tests/io}/parser/__init__.py | 0 .../io}/parser/c_parser_only.py | 0 .../{io/tests => tests/io}/parser/comment.py | 0 .../{io/tests => tests/io}/parser/common.py | 0 .../tests => tests/io}/parser/compression.py | 0 .../tests => tests/io}/parser/converters.py | 0 .../tests => tests/io}/parser/data/iris.csv | 0 .../io}/parser/data/salaries.csv | 0 .../io}/parser/data/salaries.csv.bz2 | Bin .../io}/parser/data/salaries.csv.gz | Bin .../io}/parser/data/salaries.csv.xz | Bin .../io}/parser/data/salaries.csv.zip | Bin .../io}/parser/data/sauron.SHIFT_JIS.csv | 0 .../tests => tests/io}/parser/data/test1.csv | 0 .../io}/parser/data/test1.csv.bz2 | Bin .../io}/parser/data/test1.csv.gz | Bin .../tests => tests/io}/parser/data/test2.csv | 0 .../io}/parser/data/test_mmap.csv | 0 .../tests => tests/io}/parser/data/tips.csv | 0 .../io}/parser/data/unicode_series.csv | 0 .../io}/parser/data/utf16_ex.txt | Bin .../{io/tests => tests/io}/parser/dialect.py | 0 .../{io/tests => tests/io}/parser/dtypes.py | 0 .../{io/tests => tests/io}/parser/header.py | 0 .../tests => tests/io}/parser/index_col.py | 0 .../tests => tests/io}/parser/multithread.py | 0 .../tests => tests/io}/parser/na_values.py | 0 .../tests => tests/io}/parser/parse_dates.py | 0 .../io}/parser/python_parser_only.py | 0 .../{io/tests => tests/io}/parser/quoting.py | 0 .../{io/tests => tests/io}/parser/skiprows.py | 0 .../tests => tests/io}/parser/test_network.py | 0 .../tests => tests/io}/parser/test_parsers.py | 0 .../io}/parser/test_read_fwf.py | 0 .../io}/parser/test_textreader.py | 0 .../io}/parser/test_unsupported.py | 0 .../{io/tests => tests/io}/parser/usecols.py | 0 .../tests => tests/io}/sas/data/DEMO_G.csv | 0 .../tests => tests/io}/sas/data/DEMO_G.xpt | Bin .../tests => tests/io}/sas/data/DRXFCD_G.csv | 0 .../tests => tests/io}/sas/data/DRXFCD_G.xpt | Bin .../tests => tests/io}/sas/data/SSHSV1_A.csv | 0 .../tests => tests/io}/sas/data/SSHSV1_A.xpt | Bin .../tests => tests/io}/sas/data/airline.csv | 0 .../io}/sas/data/airline.sas7bdat | Bin .../io}/sas/data/paxraw_d_short.csv | 0 .../io}/sas/data/paxraw_d_short.xpt | Bin .../io}/sas/data/productsales.csv | 0 .../io}/sas/data/productsales.sas7bdat | Bin .../io}/sas/data/test1.sas7bdat | Bin .../io}/sas/data/test10.sas7bdat | Bin .../io}/sas/data/test11.sas7bdat | Bin .../io}/sas/data/test12.sas7bdat | Bin .../io}/sas/data/test13.sas7bdat | Bin .../io}/sas/data/test14.sas7bdat | Bin .../io}/sas/data/test15.sas7bdat | Bin .../io}/sas/data/test16.sas7bdat | Bin .../io}/sas/data/test2.sas7bdat | Bin .../io}/sas/data/test3.sas7bdat | Bin .../io}/sas/data/test4.sas7bdat | Bin .../io}/sas/data/test5.sas7bdat | Bin .../io}/sas/data/test6.sas7bdat | Bin .../io}/sas/data/test7.sas7bdat | Bin .../io}/sas/data/test8.sas7bdat | Bin .../io}/sas/data/test9.sas7bdat | Bin .../io}/sas/data/test_12659.csv | 0 .../io}/sas/data/test_12659.sas7bdat | Bin .../io}/sas/data/test_sas7bdat_1.csv | 0 .../io}/sas/data/test_sas7bdat_2.csv | 0 pandas/{io/tests => tests/io}/sas/test_sas.py | 0 .../tests => tests/io}/sas/test_sas7bdat.py | 0 .../{io/tests => tests/io}/sas/test_xport.py | 0 .../{io/tests => tests/io}/test_clipboard.py | 0 pandas/{io/tests => tests/io}/test_common.py | 0 .../io}/test_date_converters.py | 0 pandas/{io/tests => tests/io}/test_excel.py | 0 pandas/{io/tests => tests/io}/test_feather.py | 0 pandas/{io/tests => tests/io}/test_gbq.py | 0 pandas/{io/tests => tests/io}/test_html.py | 0 pandas/{io/tests => tests/io}/test_packers.py | 2 +- pandas/{io/tests => tests/io}/test_pickle.py | 2 +- .../{io/tests => tests/io}/test_pytables.py | 0 pandas/{io/tests => tests/io}/test_s3.py | 0 pandas/{io/tests => tests/io}/test_sql.py | 0 pandas/{io/tests => tests/io}/test_stata.py | 0 .../tests => tests/msgpack}/__init__.py | 0 .../{test_msgpack => msgpack}/test_buffer.py | 0 .../{test_msgpack => msgpack}/test_case.py | 0 .../{test_msgpack => msgpack}/test_except.py | 0 .../test_extension.py | 0 .../{test_msgpack => msgpack}/test_format.py | 0 .../{test_msgpack => msgpack}/test_limits.py | 0 .../{test_msgpack => msgpack}/test_newspec.py | 0 .../{test_msgpack => msgpack}/test_obj.py | 0 .../{test_msgpack => msgpack}/test_pack.py | 0 .../test_read_size.py | 0 .../{test_msgpack => msgpack}/test_seq.py | 0 .../test_sequnpack.py | 0 .../{test_msgpack => msgpack}/test_subtype.py | 0 .../{test_msgpack => msgpack}/test_unpack.py | 0 .../test_unpack_raw.py | 0 .../{test_msgpack => sparse}/__init__.py | 0 .../sparse}/test_arithmetics.py | 0 .../tests => tests/sparse}/test_array.py | 0 .../sparse}/test_combine_concat.py | 0 .../tests => tests/sparse}/test_format.py | 0 .../tests => tests/sparse}/test_frame.py | 0 .../tests => tests/sparse}/test_groupby.py | 0 .../tests => tests/sparse}/test_indexing.py | 0 .../tests => tests/sparse}/test_libsparse.py | 0 .../tests => tests/sparse}/test_list.py | 0 .../tests => tests/sparse}/test_pivot.py | 0 .../tests => tests/sparse}/test_series.py | 0 .../{tools/tests => tests/tools}/__init__.py | 0 .../tools}/data/allow_exact_matches.csv | 0 .../allow_exact_matches_and_tolerance.csv | 0 .../tests => tests/tools}/data/asof.csv | 0 .../tests => tests/tools}/data/asof2.csv | 0 .../tests => tests/tools}/data/cut_data.csv | 0 .../tests => tests/tools}/data/quotes.csv | 0 .../tests => tests/tools}/data/quotes2.csv | 0 .../tests => tests/tools}/data/tolerance.csv | 0 .../tests => tests/tools}/data/trades.csv | 0 .../tests => tests/tools}/data/trades2.csv | 0 .../tests => tests/tools}/test_concat.py | 0 .../tests => tests/tools}/test_hashing.py | 0 .../{tools/tests => tests/tools}/test_join.py | 2 +- .../tests => tests/tools}/test_merge.py | 0 .../tests => tests/tools}/test_merge_asof.py | 0 .../tools}/test_merge_ordered.py | 0 .../tests => tests/tools}/test_pivot.py | 0 .../{tools/tests => tests/tools}/test_tile.py | 0 .../{tools/tests => tests/tools}/test_util.py | 0 setup.py | 64 +++++++++--------- 344 files changed, 38 insertions(+), 37 deletions(-) rename pandas/{api/tests => tests/api}/__init__.py (100%) rename pandas/{api/tests => tests/api}/test_api.py (99%) rename pandas/{computation/tests => tests/computation}/__init__.py (100%) rename pandas/{computation/tests => tests/computation}/test_compat.py (100%) rename pandas/{computation/tests => tests/computation}/test_eval.py (100%) rename pandas/{io/tests => tests/io}/__init__.py (100%) rename pandas/{io/tests => tests/io}/data/S4_EDUC1.dta (100%) rename pandas/{io/tests => tests/io}/data/banklist.csv (100%) rename pandas/{io/tests => tests/io}/data/banklist.html (100%) rename pandas/{io/tests => tests/io}/data/blank.xls (100%) mode change 100755 => 100644 rename pandas/{io/tests => tests/io}/data/blank.xlsm (100%) mode change 100755 => 100644 rename pandas/{io/tests => tests/io}/data/blank.xlsx (100%) mode change 100755 => 100644 rename pandas/{io/tests => tests/io}/data/blank_with_header.xls (100%) mode change 100755 => 100644 rename pandas/{io/tests => tests/io}/data/blank_with_header.xlsm (100%) mode change 100755 => 100644 rename pandas/{io/tests => tests/io}/data/blank_with_header.xlsx (100%) mode change 100755 => 100644 rename pandas/{io/tests => tests/io}/data/categorical_0_14_1.pickle (100%) rename pandas/{io/tests => tests/io}/data/categorical_0_15_2.pickle (100%) rename pandas/{io/tests => tests/io}/data/computer_sales_page.html (100%) rename pandas/{io/tests => tests/io}/data/gbq_fake_job.txt (100%) rename pandas/{io/tests => tests/io}/data/html_encoding/chinese_utf-16.html (100%) rename pandas/{io/tests => tests/io}/data/html_encoding/chinese_utf-32.html (100%) rename pandas/{io/tests => tests/io}/data/html_encoding/chinese_utf-8.html (100%) rename pandas/{io/tests => tests/io}/data/html_encoding/letz_latin1.html (100%) rename pandas/{io/tests => tests/io}/data/iris.csv (100%) rename pandas/{io/tests => tests/io}/data/legacy_hdf/datetimetz_object.h5 (100%) rename pandas/{io/tests => tests/io}/data/legacy_hdf/legacy.h5 (100%) rename pandas/{io/tests => tests/io}/data/legacy_hdf/legacy_0.10.h5 (100%) rename pandas/{io/tests => tests/io}/data/legacy_hdf/legacy_table.h5 (100%) rename pandas/{io/tests => tests/io}/data/legacy_hdf/legacy_table_0.11.h5 (100%) rename pandas/{io/tests => tests/io}/data/legacy_hdf/pytables_native.h5 (100%) rename pandas/{io/tests => tests/io}/data/legacy_hdf/pytables_native2.h5 (100%) rename pandas/{io/tests => tests/io}/data/legacy_msgpack/0.16.0/0.16.0_x86_64_darwin_2.7.9.msgpack (100%) rename pandas/{io/tests => tests/io}/data/legacy_msgpack/0.16.2/0.16.2_AMD64_windows_2.7.10.msgpack (100%) rename pandas/{io/tests => tests/io}/data/legacy_msgpack/0.16.2/0.16.2_AMD64_windows_3.4.3.msgpack (100%) rename pandas/{io/tests => tests/io}/data/legacy_msgpack/0.16.2/0.16.2_x86_64_darwin_2.7.10.msgpack (100%) rename pandas/{io/tests => tests/io}/data/legacy_msgpack/0.16.2/0.16.2_x86_64_darwin_2.7.9.msgpack (100%) rename pandas/{io/tests => tests/io}/data/legacy_msgpack/0.16.2/0.16.2_x86_64_darwin_3.4.3.msgpack (100%) rename pandas/{io/tests => tests/io}/data/legacy_msgpack/0.16.2/0.16.2_x86_64_linux_2.7.10.msgpack (100%) rename pandas/{io/tests => tests/io}/data/legacy_msgpack/0.16.2/0.16.2_x86_64_linux_3.4.3.msgpack (100%) rename pandas/{io/tests => tests/io}/data/legacy_msgpack/0.17.0/0.17.0_AMD64_windows_2.7.11.msgpack (100%) rename pandas/{io/tests => tests/io}/data/legacy_msgpack/0.17.0/0.17.0_AMD64_windows_3.4.4.msgpack (100%) rename pandas/{io/tests => tests/io}/data/legacy_msgpack/0.17.0/0.17.0_x86_64_darwin_2.7.11.msgpack (100%) rename pandas/{io/tests => tests/io}/data/legacy_msgpack/0.17.0/0.17.0_x86_64_darwin_3.4.4.msgpack (100%) rename pandas/{io/tests => tests/io}/data/legacy_msgpack/0.17.0/0.17.0_x86_64_linux_2.7.11.msgpack (100%) rename pandas/{io/tests => tests/io}/data/legacy_msgpack/0.17.0/0.17.0_x86_64_linux_3.4.4.msgpack (100%) rename pandas/{io/tests => tests/io}/data/legacy_msgpack/0.17.0/0.17.1_AMD64_windows_2.7.11.msgpack (100%) rename pandas/{io/tests => tests/io}/data/legacy_msgpack/0.17.0/0.17.1_AMD64_windows_3.5.1.msgpack (100%) rename pandas/{io/tests => tests/io}/data/legacy_msgpack/0.17.1/0.17.1_AMD64_windows_2.7.11.msgpack (100%) rename pandas/{io/tests => tests/io}/data/legacy_msgpack/0.17.1/0.17.1_AMD64_windows_3.5.1.msgpack (100%) rename pandas/{io/tests => tests/io}/data/legacy_msgpack/0.17.1/0.17.1_x86_64_darwin_2.7.11.msgpack (100%) rename pandas/{io/tests => tests/io}/data/legacy_msgpack/0.17.1/0.17.1_x86_64_darwin_3.5.1.msgpack (100%) rename pandas/{io/tests => tests/io}/data/legacy_msgpack/0.17.1/0.17.1_x86_64_linux_2.7.11.msgpack (100%) rename pandas/{io/tests => tests/io}/data/legacy_msgpack/0.17.1/0.17.1_x86_64_linux_3.4.4.msgpack (100%) rename pandas/{io/tests => tests/io}/data/legacy_msgpack/0.18.0/0.18.0_AMD64_windows_2.7.11.msgpack (100%) rename pandas/{io/tests => tests/io}/data/legacy_msgpack/0.18.0/0.18.0_AMD64_windows_3.5.1.msgpack (100%) rename pandas/{io/tests => tests/io}/data/legacy_msgpack/0.18.0/0.18.0_x86_64_darwin_2.7.11.msgpack (100%) rename pandas/{io/tests => tests/io}/data/legacy_msgpack/0.18.0/0.18.0_x86_64_darwin_3.5.1.msgpack (100%) rename pandas/{io/tests => tests/io}/data/legacy_msgpack/0.18.1/0.18.1_x86_64_darwin_2.7.12.msgpack (100%) rename pandas/{io/tests => tests/io}/data/legacy_msgpack/0.18.1/0.18.1_x86_64_darwin_3.5.2.msgpack (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.10.1/AMD64_windows_2.7.3.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.10.1/x86_64_linux_2.7.3.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.11.0/0.11.0_x86_64_linux_3.3.0.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.11.0/x86_64_linux_2.7.3.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.11.0/x86_64_linux_3.3.0.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.12.0/0.12.0_AMD64_windows_2.7.3.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.12.0/0.12.0_x86_64_linux_2.7.3.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.13.0/0.13.0_AMD64_windows_2.7.3.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.13.0/0.13.0_i686_linux_2.6.5.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.13.0/0.13.0_i686_linux_2.7.3.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.13.0/0.13.0_i686_linux_3.2.3.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.13.0/0.13.0_x86_64_darwin_2.7.5.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.13.0/0.13.0_x86_64_darwin_2.7.6.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.13.0/0.13.0_x86_64_linux_2.7.3.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.13.0/0.13.0_x86_64_linux_2.7.8.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.13.0/0.13.0_x86_64_linux_3.3.0.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.14.0/0.14.0_x86_64_darwin_2.7.6.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.14.0/0.14.0_x86_64_linux_2.7.8.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.14.1/0.14.1_x86_64_darwin_2.7.12.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.14.1/0.14.1_x86_64_linux_2.7.8.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.15.0/0.15.0_x86_64_darwin_2.7.12.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.15.0/0.15.0_x86_64_linux_2.7.8.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.15.2/0.15.2_x86_64_darwin_2.7.9.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.16.0/0.16.0_x86_64_darwin_2.7.9.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.16.2/0.16.2_AMD64_windows_2.7.10.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.16.2/0.16.2_AMD64_windows_3.4.3.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.16.2/0.16.2_x86_64_darwin_2.7.10.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.16.2/0.16.2_x86_64_darwin_2.7.9.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.16.2/0.16.2_x86_64_darwin_3.4.3.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.16.2/0.16.2_x86_64_linux_2.7.10.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.16.2/0.16.2_x86_64_linux_3.4.3.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.17.0/0.17.0_AMD64_windows_2.7.11.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.17.0/0.17.0_AMD64_windows_3.4.4.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.17.0/0.17.0_x86_64_darwin_2.7.11.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.17.0/0.17.0_x86_64_darwin_3.4.4.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.17.0/0.17.0_x86_64_linux_2.7.11.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.17.0/0.17.0_x86_64_linux_3.4.4.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.17.0/0.17.1_AMD64_windows_2.7.11.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.17.1/0.17.1_AMD64_windows_2.7.11.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.17.1/0.17.1_x86_64_darwin_2.7.11.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.18.0/0.18.0_AMD64_windows_2.7.11.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.18.0/0.18.0_AMD64_windows_3.5.1.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.18.0/0.18.0_x86_64_darwin_2.7.11.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.18.0/0.18.0_x86_64_darwin_3.5.1.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.18.1/0.18.1_x86_64_darwin_2.7.12.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.18.1/0.18.1_x86_64_darwin_3.5.2.pickle (100%) rename pandas/{io/tests => tests/io}/data/macau.html (100%) rename pandas/{io/tests => tests/io}/data/nyse_wsj.html (100%) rename pandas/{io/tests => tests/io}/data/spam.html (100%) rename pandas/{io/tests => tests/io}/data/stata10_115.dta (100%) mode change 100755 => 100644 rename pandas/{io/tests => tests/io}/data/stata10_117.dta (100%) mode change 100755 => 100644 rename pandas/{io/tests => tests/io}/data/stata11_115.dta (100%) mode change 100755 => 100644 rename pandas/{io/tests => tests/io}/data/stata11_117.dta (100%) mode change 100755 => 100644 rename pandas/{io/tests => tests/io}/data/stata12_117.dta (100%) rename pandas/{io/tests => tests/io}/data/stata14_118.dta (100%) rename pandas/{io/tests => tests/io}/data/stata15.dta (100%) rename pandas/{io/tests => tests/io}/data/stata1_114.dta (100%) rename pandas/{io/tests => tests/io}/data/stata1_117.dta (100%) rename pandas/{io/tests => tests/io}/data/stata1_encoding.dta (100%) rename pandas/{io/tests => tests/io}/data/stata2_113.dta (100%) rename pandas/{io/tests => tests/io}/data/stata2_114.dta (100%) rename pandas/{io/tests => tests/io}/data/stata2_115.dta (100%) rename pandas/{io/tests => tests/io}/data/stata2_117.dta (100%) rename pandas/{io/tests => tests/io}/data/stata3.csv (100%) rename pandas/{io/tests => tests/io}/data/stata3_113.dta (100%) rename pandas/{io/tests => tests/io}/data/stata3_114.dta (100%) rename pandas/{io/tests => tests/io}/data/stata3_115.dta (100%) rename pandas/{io/tests => tests/io}/data/stata3_117.dta (100%) rename pandas/{io/tests => tests/io}/data/stata4_113.dta (100%) rename pandas/{io/tests => tests/io}/data/stata4_114.dta (100%) rename pandas/{io/tests => tests/io}/data/stata4_115.dta (100%) rename pandas/{io/tests => tests/io}/data/stata4_117.dta (100%) rename pandas/{io/tests => tests/io}/data/stata5.csv (100%) rename pandas/{io/tests => tests/io}/data/stata5_113.dta (100%) rename pandas/{io/tests => tests/io}/data/stata5_114.dta (100%) rename pandas/{io/tests => tests/io}/data/stata5_115.dta (100%) rename pandas/{io/tests => tests/io}/data/stata5_117.dta (100%) rename pandas/{io/tests => tests/io}/data/stata6.csv (100%) rename pandas/{io/tests => tests/io}/data/stata6_113.dta (100%) rename pandas/{io/tests => tests/io}/data/stata6_114.dta (100%) rename pandas/{io/tests => tests/io}/data/stata6_115.dta (100%) rename pandas/{io/tests => tests/io}/data/stata6_117.dta (100%) rename pandas/{io/tests => tests/io}/data/stata7_111.dta (100%) rename pandas/{io/tests => tests/io}/data/stata7_115.dta (100%) rename pandas/{io/tests => tests/io}/data/stata7_117.dta (100%) rename pandas/{io/tests => tests/io}/data/stata8_113.dta (100%) rename pandas/{io/tests => tests/io}/data/stata8_115.dta (100%) rename pandas/{io/tests => tests/io}/data/stata8_117.dta (100%) rename pandas/{io/tests => tests/io}/data/stata9_115.dta (100%) rename pandas/{io/tests => tests/io}/data/stata9_117.dta (100%) rename pandas/{io/tests => tests/io}/data/test1.csv (100%) rename pandas/{io/tests => tests/io}/data/test1.xls (100%) rename pandas/{io/tests => tests/io}/data/test1.xlsm (100%) rename pandas/{io/tests => tests/io}/data/test1.xlsx (100%) rename pandas/{io/tests => tests/io}/data/test2.xls (100%) rename pandas/{io/tests => tests/io}/data/test2.xlsm (100%) rename pandas/{io/tests => tests/io}/data/test2.xlsx (100%) rename pandas/{io/tests => tests/io}/data/test3.xls (100%) rename pandas/{io/tests => tests/io}/data/test3.xlsm (100%) rename pandas/{io/tests => tests/io}/data/test3.xlsx (100%) rename pandas/{io/tests => tests/io}/data/test4.xls (100%) rename pandas/{io/tests => tests/io}/data/test4.xlsm (100%) rename pandas/{io/tests => tests/io}/data/test4.xlsx (100%) rename pandas/{io/tests => tests/io}/data/test5.xls (100%) rename pandas/{io/tests => tests/io}/data/test5.xlsm (100%) rename pandas/{io/tests => tests/io}/data/test5.xlsx (100%) rename pandas/{io/tests => tests/io}/data/test_converters.xls (100%) rename pandas/{io/tests => tests/io}/data/test_converters.xlsm (100%) rename pandas/{io/tests => tests/io}/data/test_converters.xlsx (100%) rename pandas/{io/tests => tests/io}/data/test_index_name_pre17.xls (100%) rename pandas/{io/tests => tests/io}/data/test_index_name_pre17.xlsm (100%) rename pandas/{io/tests => tests/io}/data/test_index_name_pre17.xlsx (100%) rename pandas/{io/tests => tests/io}/data/test_mmap.csv (100%) rename pandas/{io/tests => tests/io}/data/test_multisheet.xls (100%) rename pandas/{io/tests => tests/io}/data/test_multisheet.xlsm (100%) rename pandas/{io/tests => tests/io}/data/test_multisheet.xlsx (100%) rename pandas/{io/tests => tests/io}/data/test_squeeze.xls (100%) rename pandas/{io/tests => tests/io}/data/test_squeeze.xlsm (100%) rename pandas/{io/tests => tests/io}/data/test_squeeze.xlsx (100%) rename pandas/{io/tests => tests/io}/data/test_types.xls (100%) rename pandas/{io/tests => tests/io}/data/test_types.xlsm (100%) rename pandas/{io/tests => tests/io}/data/test_types.xlsx (100%) rename pandas/{io/tests => tests/io}/data/testdateoverflow.xls (100%) rename pandas/{io/tests => tests/io}/data/testdateoverflow.xlsm (100%) rename pandas/{io/tests => tests/io}/data/testdateoverflow.xlsx (100%) rename pandas/{io/tests => tests/io}/data/testdtype.xls (100%) rename pandas/{io/tests => tests/io}/data/testdtype.xlsm (100%) rename pandas/{io/tests => tests/io}/data/testdtype.xlsx (100%) rename pandas/{io/tests => tests/io}/data/testmultiindex.xls (100%) rename pandas/{io/tests => tests/io}/data/testmultiindex.xlsm (100%) rename pandas/{io/tests => tests/io}/data/testmultiindex.xlsx (100%) rename pandas/{io/tests => tests/io}/data/testskiprows.xls (100%) rename pandas/{io/tests => tests/io}/data/testskiprows.xlsm (100%) rename pandas/{io/tests => tests/io}/data/testskiprows.xlsx (100%) rename pandas/{io/tests => tests/io}/data/times_1900.xls (100%) rename pandas/{io/tests => tests/io}/data/times_1900.xlsm (100%) rename pandas/{io/tests => tests/io}/data/times_1900.xlsx (100%) rename pandas/{io/tests => tests/io}/data/times_1904.xls (100%) rename pandas/{io/tests => tests/io}/data/times_1904.xlsm (100%) rename pandas/{io/tests => tests/io}/data/times_1904.xlsx (100%) rename pandas/{io/tests => tests/io}/data/tips.csv (100%) rename pandas/{io/tests => tests/io}/data/valid_markup.html (100%) rename pandas/{io/tests => tests/io}/data/wikipedia_states.html (100%) rename pandas/{io/tests => tests/io}/generate_legacy_storage_files.py (100%) rename pandas/{io/tests => tests/io}/json/__init__.py (100%) rename pandas/{io/tests => tests/io}/json/data/tsframe_iso_v012.json (100%) rename pandas/{io/tests => tests/io}/json/data/tsframe_v012.json (100%) rename pandas/{io/tests => tests/io}/json/test_normalize.py (100%) rename pandas/{io/tests => tests/io}/json/test_pandas.py (100%) rename pandas/{io/tests => tests/io}/json/test_ujson.py (100%) rename pandas/{io/tests => tests/io}/parser/__init__.py (100%) rename pandas/{io/tests => tests/io}/parser/c_parser_only.py (100%) rename pandas/{io/tests => tests/io}/parser/comment.py (100%) rename pandas/{io/tests => tests/io}/parser/common.py (100%) rename pandas/{io/tests => tests/io}/parser/compression.py (100%) rename pandas/{io/tests => tests/io}/parser/converters.py (100%) rename pandas/{io/tests => tests/io}/parser/data/iris.csv (100%) rename pandas/{io/tests => tests/io}/parser/data/salaries.csv (100%) rename pandas/{io/tests => tests/io}/parser/data/salaries.csv.bz2 (100%) rename pandas/{io/tests => tests/io}/parser/data/salaries.csv.gz (100%) rename pandas/{io/tests => tests/io}/parser/data/salaries.csv.xz (100%) rename pandas/{io/tests => tests/io}/parser/data/salaries.csv.zip (100%) rename pandas/{io/tests => tests/io}/parser/data/sauron.SHIFT_JIS.csv (100%) rename pandas/{io/tests => tests/io}/parser/data/test1.csv (100%) rename pandas/{io/tests => tests/io}/parser/data/test1.csv.bz2 (100%) rename pandas/{io/tests => tests/io}/parser/data/test1.csv.gz (100%) rename pandas/{io/tests => tests/io}/parser/data/test2.csv (100%) rename pandas/{io/tests => tests/io}/parser/data/test_mmap.csv (100%) rename pandas/{io/tests => tests/io}/parser/data/tips.csv (100%) rename pandas/{io/tests => tests/io}/parser/data/unicode_series.csv (100%) rename pandas/{io/tests => tests/io}/parser/data/utf16_ex.txt (100%) rename pandas/{io/tests => tests/io}/parser/dialect.py (100%) rename pandas/{io/tests => tests/io}/parser/dtypes.py (100%) rename pandas/{io/tests => tests/io}/parser/header.py (100%) rename pandas/{io/tests => tests/io}/parser/index_col.py (100%) rename pandas/{io/tests => tests/io}/parser/multithread.py (100%) rename pandas/{io/tests => tests/io}/parser/na_values.py (100%) rename pandas/{io/tests => tests/io}/parser/parse_dates.py (100%) rename pandas/{io/tests => tests/io}/parser/python_parser_only.py (100%) rename pandas/{io/tests => tests/io}/parser/quoting.py (100%) rename pandas/{io/tests => tests/io}/parser/skiprows.py (100%) rename pandas/{io/tests => tests/io}/parser/test_network.py (100%) rename pandas/{io/tests => tests/io}/parser/test_parsers.py (100%) rename pandas/{io/tests => tests/io}/parser/test_read_fwf.py (100%) rename pandas/{io/tests => tests/io}/parser/test_textreader.py (100%) rename pandas/{io/tests => tests/io}/parser/test_unsupported.py (100%) rename pandas/{io/tests => tests/io}/parser/usecols.py (100%) rename pandas/{io/tests => tests/io}/sas/data/DEMO_G.csv (100%) rename pandas/{io/tests => tests/io}/sas/data/DEMO_G.xpt (100%) rename pandas/{io/tests => tests/io}/sas/data/DRXFCD_G.csv (100%) rename pandas/{io/tests => tests/io}/sas/data/DRXFCD_G.xpt (100%) rename pandas/{io/tests => tests/io}/sas/data/SSHSV1_A.csv (100%) rename pandas/{io/tests => tests/io}/sas/data/SSHSV1_A.xpt (100%) rename pandas/{io/tests => tests/io}/sas/data/airline.csv (100%) rename pandas/{io/tests => tests/io}/sas/data/airline.sas7bdat (100%) rename pandas/{io/tests => tests/io}/sas/data/paxraw_d_short.csv (100%) rename pandas/{io/tests => tests/io}/sas/data/paxraw_d_short.xpt (100%) rename pandas/{io/tests => tests/io}/sas/data/productsales.csv (100%) rename pandas/{io/tests => tests/io}/sas/data/productsales.sas7bdat (100%) rename pandas/{io/tests => tests/io}/sas/data/test1.sas7bdat (100%) rename pandas/{io/tests => tests/io}/sas/data/test10.sas7bdat (100%) rename pandas/{io/tests => tests/io}/sas/data/test11.sas7bdat (100%) rename pandas/{io/tests => tests/io}/sas/data/test12.sas7bdat (100%) rename pandas/{io/tests => tests/io}/sas/data/test13.sas7bdat (100%) rename pandas/{io/tests => tests/io}/sas/data/test14.sas7bdat (100%) rename pandas/{io/tests => tests/io}/sas/data/test15.sas7bdat (100%) rename pandas/{io/tests => tests/io}/sas/data/test16.sas7bdat (100%) rename pandas/{io/tests => tests/io}/sas/data/test2.sas7bdat (100%) rename pandas/{io/tests => tests/io}/sas/data/test3.sas7bdat (100%) rename pandas/{io/tests => tests/io}/sas/data/test4.sas7bdat (100%) rename pandas/{io/tests => tests/io}/sas/data/test5.sas7bdat (100%) rename pandas/{io/tests => tests/io}/sas/data/test6.sas7bdat (100%) rename pandas/{io/tests => tests/io}/sas/data/test7.sas7bdat (100%) rename pandas/{io/tests => tests/io}/sas/data/test8.sas7bdat (100%) rename pandas/{io/tests => tests/io}/sas/data/test9.sas7bdat (100%) rename pandas/{io/tests => tests/io}/sas/data/test_12659.csv (100%) rename pandas/{io/tests => tests/io}/sas/data/test_12659.sas7bdat (100%) rename pandas/{io/tests => tests/io}/sas/data/test_sas7bdat_1.csv (100%) rename pandas/{io/tests => tests/io}/sas/data/test_sas7bdat_2.csv (100%) rename pandas/{io/tests => tests/io}/sas/test_sas.py (100%) rename pandas/{io/tests => tests/io}/sas/test_sas7bdat.py (100%) rename pandas/{io/tests => tests/io}/sas/test_xport.py (100%) rename pandas/{io/tests => tests/io}/test_clipboard.py (100%) rename pandas/{io/tests => tests/io}/test_common.py (100%) rename pandas/{io/tests => tests/io}/test_date_converters.py (100%) rename pandas/{io/tests => tests/io}/test_excel.py (100%) rename pandas/{io/tests => tests/io}/test_feather.py (100%) rename pandas/{io/tests => tests/io}/test_gbq.py (100%) rename pandas/{io/tests => tests/io}/test_html.py (100%) rename pandas/{io/tests => tests/io}/test_packers.py (99%) rename pandas/{io/tests => tests/io}/test_pickle.py (99%) rename pandas/{io/tests => tests/io}/test_pytables.py (100%) rename pandas/{io/tests => tests/io}/test_s3.py (100%) rename pandas/{io/tests => tests/io}/test_sql.py (100%) rename pandas/{io/tests => tests/io}/test_stata.py (100%) rename pandas/{sparse/tests => tests/msgpack}/__init__.py (100%) rename pandas/tests/{test_msgpack => msgpack}/test_buffer.py (100%) rename pandas/tests/{test_msgpack => msgpack}/test_case.py (100%) rename pandas/tests/{test_msgpack => msgpack}/test_except.py (100%) rename pandas/tests/{test_msgpack => msgpack}/test_extension.py (100%) rename pandas/tests/{test_msgpack => msgpack}/test_format.py (100%) rename pandas/tests/{test_msgpack => msgpack}/test_limits.py (100%) rename pandas/tests/{test_msgpack => msgpack}/test_newspec.py (100%) rename pandas/tests/{test_msgpack => msgpack}/test_obj.py (100%) rename pandas/tests/{test_msgpack => msgpack}/test_pack.py (100%) rename pandas/tests/{test_msgpack => msgpack}/test_read_size.py (100%) rename pandas/tests/{test_msgpack => msgpack}/test_seq.py (100%) rename pandas/tests/{test_msgpack => msgpack}/test_sequnpack.py (100%) rename pandas/tests/{test_msgpack => msgpack}/test_subtype.py (100%) rename pandas/tests/{test_msgpack => msgpack}/test_unpack.py (100%) rename pandas/tests/{test_msgpack => msgpack}/test_unpack_raw.py (100%) rename pandas/tests/{test_msgpack => sparse}/__init__.py (100%) rename pandas/{sparse/tests => tests/sparse}/test_arithmetics.py (100%) rename pandas/{sparse/tests => tests/sparse}/test_array.py (100%) rename pandas/{sparse/tests => tests/sparse}/test_combine_concat.py (100%) rename pandas/{sparse/tests => tests/sparse}/test_format.py (100%) rename pandas/{sparse/tests => tests/sparse}/test_frame.py (100%) rename pandas/{sparse/tests => tests/sparse}/test_groupby.py (100%) rename pandas/{sparse/tests => tests/sparse}/test_indexing.py (100%) rename pandas/{sparse/tests => tests/sparse}/test_libsparse.py (100%) rename pandas/{sparse/tests => tests/sparse}/test_list.py (100%) rename pandas/{sparse/tests => tests/sparse}/test_pivot.py (100%) rename pandas/{sparse/tests => tests/sparse}/test_series.py (100%) rename pandas/{tools/tests => tests/tools}/__init__.py (100%) rename pandas/{tools/tests => tests/tools}/data/allow_exact_matches.csv (100%) rename pandas/{tools/tests => tests/tools}/data/allow_exact_matches_and_tolerance.csv (100%) rename pandas/{tools/tests => tests/tools}/data/asof.csv (100%) rename pandas/{tools/tests => tests/tools}/data/asof2.csv (100%) rename pandas/{tools/tests => tests/tools}/data/cut_data.csv (100%) rename pandas/{tools/tests => tests/tools}/data/quotes.csv (100%) rename pandas/{tools/tests => tests/tools}/data/quotes2.csv (100%) rename pandas/{tools/tests => tests/tools}/data/tolerance.csv (100%) rename pandas/{tools/tests => tests/tools}/data/trades.csv (100%) rename pandas/{tools/tests => tests/tools}/data/trades2.csv (100%) rename pandas/{tools/tests => tests/tools}/test_concat.py (100%) rename pandas/{tools/tests => tests/tools}/test_hashing.py (100%) rename pandas/{tools/tests => tests/tools}/test_join.py (99%) rename pandas/{tools/tests => tests/tools}/test_merge.py (100%) rename pandas/{tools/tests => tests/tools}/test_merge_asof.py (100%) rename pandas/{tools/tests => tests/tools}/test_merge_ordered.py (100%) rename pandas/{tools/tests => tests/tools}/test_pivot.py (100%) rename pandas/{tools/tests => tests/tools}/test_tile.py (100%) rename pandas/{tools/tests => tests/tools}/test_util.py (100%) diff --git a/pandas/api/tests/__init__.py b/pandas/tests/api/__init__.py similarity index 100% rename from pandas/api/tests/__init__.py rename to pandas/tests/api/__init__.py diff --git a/pandas/api/tests/test_api.py b/pandas/tests/api/test_api.py similarity index 99% rename from pandas/api/tests/test_api.py rename to pandas/tests/api/test_api.py index 05cf5dc4b7e7b..90a0c1d5c9347 100644 --- a/pandas/api/tests/test_api.py +++ b/pandas/tests/api/test_api.py @@ -133,7 +133,7 @@ def test_api(self): class TestApi(Base, tm.TestCase): - allowed = ['tests', 'types'] + allowed = ['types'] def test_api(self): diff --git a/pandas/computation/tests/__init__.py b/pandas/tests/computation/__init__.py similarity index 100% rename from pandas/computation/tests/__init__.py rename to pandas/tests/computation/__init__.py diff --git a/pandas/computation/tests/test_compat.py b/pandas/tests/computation/test_compat.py similarity index 100% rename from pandas/computation/tests/test_compat.py rename to pandas/tests/computation/test_compat.py diff --git a/pandas/computation/tests/test_eval.py b/pandas/tests/computation/test_eval.py similarity index 100% rename from pandas/computation/tests/test_eval.py rename to pandas/tests/computation/test_eval.py diff --git a/pandas/tests/indexes/datetimes/test_ops.py b/pandas/tests/indexes/datetimes/test_ops.py index 63bf07ec041d3..9a968a42c4247 100644 --- a/pandas/tests/indexes/datetimes/test_ops.py +++ b/pandas/tests/indexes/datetimes/test_ops.py @@ -1245,7 +1245,8 @@ def test_shift(self): self.assertEqual(shifted[0], self.rng[0]) self.assertEqual(shifted.offset, self.rng.offset) - with tm.assert_produces_warning(PerformanceWarning): + # PerformanceWarning + with warnings.catch_warnings(record=True): rng = date_range(START, END, freq=BMonthEnd()) shifted = rng.shift(1, freq=CDay()) self.assertEqual(shifted[0], rng[0] + CDay()) diff --git a/pandas/io/tests/__init__.py b/pandas/tests/io/__init__.py similarity index 100% rename from pandas/io/tests/__init__.py rename to pandas/tests/io/__init__.py diff --git a/pandas/io/tests/data/S4_EDUC1.dta b/pandas/tests/io/data/S4_EDUC1.dta similarity index 100% rename from pandas/io/tests/data/S4_EDUC1.dta rename to pandas/tests/io/data/S4_EDUC1.dta diff --git a/pandas/io/tests/data/banklist.csv b/pandas/tests/io/data/banklist.csv similarity index 100% rename from pandas/io/tests/data/banklist.csv rename to pandas/tests/io/data/banklist.csv diff --git a/pandas/io/tests/data/banklist.html b/pandas/tests/io/data/banklist.html similarity index 100% rename from pandas/io/tests/data/banklist.html rename to pandas/tests/io/data/banklist.html diff --git a/pandas/io/tests/data/blank.xls b/pandas/tests/io/data/blank.xls old mode 100755 new mode 100644 similarity index 100% rename from pandas/io/tests/data/blank.xls rename to pandas/tests/io/data/blank.xls diff --git a/pandas/io/tests/data/blank.xlsm b/pandas/tests/io/data/blank.xlsm old mode 100755 new mode 100644 similarity index 100% rename from pandas/io/tests/data/blank.xlsm rename to pandas/tests/io/data/blank.xlsm diff --git a/pandas/io/tests/data/blank.xlsx b/pandas/tests/io/data/blank.xlsx old mode 100755 new mode 100644 similarity index 100% rename from pandas/io/tests/data/blank.xlsx rename to pandas/tests/io/data/blank.xlsx diff --git a/pandas/io/tests/data/blank_with_header.xls b/pandas/tests/io/data/blank_with_header.xls old mode 100755 new mode 100644 similarity index 100% rename from pandas/io/tests/data/blank_with_header.xls rename to pandas/tests/io/data/blank_with_header.xls diff --git a/pandas/io/tests/data/blank_with_header.xlsm b/pandas/tests/io/data/blank_with_header.xlsm old mode 100755 new mode 100644 similarity index 100% rename from pandas/io/tests/data/blank_with_header.xlsm rename to pandas/tests/io/data/blank_with_header.xlsm diff --git a/pandas/io/tests/data/blank_with_header.xlsx b/pandas/tests/io/data/blank_with_header.xlsx old mode 100755 new mode 100644 similarity index 100% rename from pandas/io/tests/data/blank_with_header.xlsx rename to pandas/tests/io/data/blank_with_header.xlsx diff --git a/pandas/io/tests/data/categorical_0_14_1.pickle b/pandas/tests/io/data/categorical_0_14_1.pickle similarity index 100% rename from pandas/io/tests/data/categorical_0_14_1.pickle rename to pandas/tests/io/data/categorical_0_14_1.pickle diff --git a/pandas/io/tests/data/categorical_0_15_2.pickle b/pandas/tests/io/data/categorical_0_15_2.pickle similarity index 100% rename from pandas/io/tests/data/categorical_0_15_2.pickle rename to pandas/tests/io/data/categorical_0_15_2.pickle diff --git a/pandas/io/tests/data/computer_sales_page.html b/pandas/tests/io/data/computer_sales_page.html similarity index 100% rename from pandas/io/tests/data/computer_sales_page.html rename to pandas/tests/io/data/computer_sales_page.html diff --git a/pandas/io/tests/data/gbq_fake_job.txt b/pandas/tests/io/data/gbq_fake_job.txt similarity index 100% rename from pandas/io/tests/data/gbq_fake_job.txt rename to pandas/tests/io/data/gbq_fake_job.txt diff --git a/pandas/io/tests/data/html_encoding/chinese_utf-16.html b/pandas/tests/io/data/html_encoding/chinese_utf-16.html similarity index 100% rename from pandas/io/tests/data/html_encoding/chinese_utf-16.html rename to pandas/tests/io/data/html_encoding/chinese_utf-16.html diff --git a/pandas/io/tests/data/html_encoding/chinese_utf-32.html b/pandas/tests/io/data/html_encoding/chinese_utf-32.html similarity index 100% rename from pandas/io/tests/data/html_encoding/chinese_utf-32.html rename to pandas/tests/io/data/html_encoding/chinese_utf-32.html diff --git a/pandas/io/tests/data/html_encoding/chinese_utf-8.html b/pandas/tests/io/data/html_encoding/chinese_utf-8.html similarity index 100% rename from pandas/io/tests/data/html_encoding/chinese_utf-8.html rename to pandas/tests/io/data/html_encoding/chinese_utf-8.html diff --git a/pandas/io/tests/data/html_encoding/letz_latin1.html b/pandas/tests/io/data/html_encoding/letz_latin1.html similarity index 100% rename from pandas/io/tests/data/html_encoding/letz_latin1.html rename to pandas/tests/io/data/html_encoding/letz_latin1.html diff --git a/pandas/io/tests/data/iris.csv b/pandas/tests/io/data/iris.csv similarity index 100% rename from pandas/io/tests/data/iris.csv rename to pandas/tests/io/data/iris.csv diff --git a/pandas/io/tests/data/legacy_hdf/datetimetz_object.h5 b/pandas/tests/io/data/legacy_hdf/datetimetz_object.h5 similarity index 100% rename from pandas/io/tests/data/legacy_hdf/datetimetz_object.h5 rename to pandas/tests/io/data/legacy_hdf/datetimetz_object.h5 diff --git a/pandas/io/tests/data/legacy_hdf/legacy.h5 b/pandas/tests/io/data/legacy_hdf/legacy.h5 similarity index 100% rename from pandas/io/tests/data/legacy_hdf/legacy.h5 rename to pandas/tests/io/data/legacy_hdf/legacy.h5 diff --git a/pandas/io/tests/data/legacy_hdf/legacy_0.10.h5 b/pandas/tests/io/data/legacy_hdf/legacy_0.10.h5 similarity index 100% rename from pandas/io/tests/data/legacy_hdf/legacy_0.10.h5 rename to pandas/tests/io/data/legacy_hdf/legacy_0.10.h5 diff --git a/pandas/io/tests/data/legacy_hdf/legacy_table.h5 b/pandas/tests/io/data/legacy_hdf/legacy_table.h5 similarity index 100% rename from pandas/io/tests/data/legacy_hdf/legacy_table.h5 rename to pandas/tests/io/data/legacy_hdf/legacy_table.h5 diff --git a/pandas/io/tests/data/legacy_hdf/legacy_table_0.11.h5 b/pandas/tests/io/data/legacy_hdf/legacy_table_0.11.h5 similarity index 100% rename from pandas/io/tests/data/legacy_hdf/legacy_table_0.11.h5 rename to pandas/tests/io/data/legacy_hdf/legacy_table_0.11.h5 diff --git a/pandas/io/tests/data/legacy_hdf/pytables_native.h5 b/pandas/tests/io/data/legacy_hdf/pytables_native.h5 similarity index 100% rename from pandas/io/tests/data/legacy_hdf/pytables_native.h5 rename to pandas/tests/io/data/legacy_hdf/pytables_native.h5 diff --git a/pandas/io/tests/data/legacy_hdf/pytables_native2.h5 b/pandas/tests/io/data/legacy_hdf/pytables_native2.h5 similarity index 100% rename from pandas/io/tests/data/legacy_hdf/pytables_native2.h5 rename to pandas/tests/io/data/legacy_hdf/pytables_native2.h5 diff --git a/pandas/io/tests/data/legacy_msgpack/0.16.0/0.16.0_x86_64_darwin_2.7.9.msgpack b/pandas/tests/io/data/legacy_msgpack/0.16.0/0.16.0_x86_64_darwin_2.7.9.msgpack similarity index 100% rename from pandas/io/tests/data/legacy_msgpack/0.16.0/0.16.0_x86_64_darwin_2.7.9.msgpack rename to pandas/tests/io/data/legacy_msgpack/0.16.0/0.16.0_x86_64_darwin_2.7.9.msgpack diff --git a/pandas/io/tests/data/legacy_msgpack/0.16.2/0.16.2_AMD64_windows_2.7.10.msgpack b/pandas/tests/io/data/legacy_msgpack/0.16.2/0.16.2_AMD64_windows_2.7.10.msgpack similarity index 100% rename from pandas/io/tests/data/legacy_msgpack/0.16.2/0.16.2_AMD64_windows_2.7.10.msgpack rename to pandas/tests/io/data/legacy_msgpack/0.16.2/0.16.2_AMD64_windows_2.7.10.msgpack diff --git a/pandas/io/tests/data/legacy_msgpack/0.16.2/0.16.2_AMD64_windows_3.4.3.msgpack b/pandas/tests/io/data/legacy_msgpack/0.16.2/0.16.2_AMD64_windows_3.4.3.msgpack similarity index 100% rename from pandas/io/tests/data/legacy_msgpack/0.16.2/0.16.2_AMD64_windows_3.4.3.msgpack rename to pandas/tests/io/data/legacy_msgpack/0.16.2/0.16.2_AMD64_windows_3.4.3.msgpack diff --git a/pandas/io/tests/data/legacy_msgpack/0.16.2/0.16.2_x86_64_darwin_2.7.10.msgpack b/pandas/tests/io/data/legacy_msgpack/0.16.2/0.16.2_x86_64_darwin_2.7.10.msgpack similarity index 100% rename from pandas/io/tests/data/legacy_msgpack/0.16.2/0.16.2_x86_64_darwin_2.7.10.msgpack rename to pandas/tests/io/data/legacy_msgpack/0.16.2/0.16.2_x86_64_darwin_2.7.10.msgpack diff --git a/pandas/io/tests/data/legacy_msgpack/0.16.2/0.16.2_x86_64_darwin_2.7.9.msgpack b/pandas/tests/io/data/legacy_msgpack/0.16.2/0.16.2_x86_64_darwin_2.7.9.msgpack similarity index 100% rename from pandas/io/tests/data/legacy_msgpack/0.16.2/0.16.2_x86_64_darwin_2.7.9.msgpack rename to pandas/tests/io/data/legacy_msgpack/0.16.2/0.16.2_x86_64_darwin_2.7.9.msgpack diff --git a/pandas/io/tests/data/legacy_msgpack/0.16.2/0.16.2_x86_64_darwin_3.4.3.msgpack b/pandas/tests/io/data/legacy_msgpack/0.16.2/0.16.2_x86_64_darwin_3.4.3.msgpack similarity index 100% rename from pandas/io/tests/data/legacy_msgpack/0.16.2/0.16.2_x86_64_darwin_3.4.3.msgpack rename to pandas/tests/io/data/legacy_msgpack/0.16.2/0.16.2_x86_64_darwin_3.4.3.msgpack diff --git a/pandas/io/tests/data/legacy_msgpack/0.16.2/0.16.2_x86_64_linux_2.7.10.msgpack b/pandas/tests/io/data/legacy_msgpack/0.16.2/0.16.2_x86_64_linux_2.7.10.msgpack similarity index 100% rename from pandas/io/tests/data/legacy_msgpack/0.16.2/0.16.2_x86_64_linux_2.7.10.msgpack rename to pandas/tests/io/data/legacy_msgpack/0.16.2/0.16.2_x86_64_linux_2.7.10.msgpack diff --git a/pandas/io/tests/data/legacy_msgpack/0.16.2/0.16.2_x86_64_linux_3.4.3.msgpack b/pandas/tests/io/data/legacy_msgpack/0.16.2/0.16.2_x86_64_linux_3.4.3.msgpack similarity index 100% rename from pandas/io/tests/data/legacy_msgpack/0.16.2/0.16.2_x86_64_linux_3.4.3.msgpack rename to pandas/tests/io/data/legacy_msgpack/0.16.2/0.16.2_x86_64_linux_3.4.3.msgpack diff --git a/pandas/io/tests/data/legacy_msgpack/0.17.0/0.17.0_AMD64_windows_2.7.11.msgpack b/pandas/tests/io/data/legacy_msgpack/0.17.0/0.17.0_AMD64_windows_2.7.11.msgpack similarity index 100% rename from pandas/io/tests/data/legacy_msgpack/0.17.0/0.17.0_AMD64_windows_2.7.11.msgpack rename to pandas/tests/io/data/legacy_msgpack/0.17.0/0.17.0_AMD64_windows_2.7.11.msgpack diff --git a/pandas/io/tests/data/legacy_msgpack/0.17.0/0.17.0_AMD64_windows_3.4.4.msgpack b/pandas/tests/io/data/legacy_msgpack/0.17.0/0.17.0_AMD64_windows_3.4.4.msgpack similarity index 100% rename from pandas/io/tests/data/legacy_msgpack/0.17.0/0.17.0_AMD64_windows_3.4.4.msgpack rename to pandas/tests/io/data/legacy_msgpack/0.17.0/0.17.0_AMD64_windows_3.4.4.msgpack diff --git a/pandas/io/tests/data/legacy_msgpack/0.17.0/0.17.0_x86_64_darwin_2.7.11.msgpack b/pandas/tests/io/data/legacy_msgpack/0.17.0/0.17.0_x86_64_darwin_2.7.11.msgpack similarity index 100% rename from pandas/io/tests/data/legacy_msgpack/0.17.0/0.17.0_x86_64_darwin_2.7.11.msgpack rename to pandas/tests/io/data/legacy_msgpack/0.17.0/0.17.0_x86_64_darwin_2.7.11.msgpack diff --git a/pandas/io/tests/data/legacy_msgpack/0.17.0/0.17.0_x86_64_darwin_3.4.4.msgpack b/pandas/tests/io/data/legacy_msgpack/0.17.0/0.17.0_x86_64_darwin_3.4.4.msgpack similarity index 100% rename from pandas/io/tests/data/legacy_msgpack/0.17.0/0.17.0_x86_64_darwin_3.4.4.msgpack rename to pandas/tests/io/data/legacy_msgpack/0.17.0/0.17.0_x86_64_darwin_3.4.4.msgpack diff --git a/pandas/io/tests/data/legacy_msgpack/0.17.0/0.17.0_x86_64_linux_2.7.11.msgpack b/pandas/tests/io/data/legacy_msgpack/0.17.0/0.17.0_x86_64_linux_2.7.11.msgpack similarity index 100% rename from pandas/io/tests/data/legacy_msgpack/0.17.0/0.17.0_x86_64_linux_2.7.11.msgpack rename to pandas/tests/io/data/legacy_msgpack/0.17.0/0.17.0_x86_64_linux_2.7.11.msgpack diff --git a/pandas/io/tests/data/legacy_msgpack/0.17.0/0.17.0_x86_64_linux_3.4.4.msgpack b/pandas/tests/io/data/legacy_msgpack/0.17.0/0.17.0_x86_64_linux_3.4.4.msgpack similarity index 100% rename from pandas/io/tests/data/legacy_msgpack/0.17.0/0.17.0_x86_64_linux_3.4.4.msgpack rename to pandas/tests/io/data/legacy_msgpack/0.17.0/0.17.0_x86_64_linux_3.4.4.msgpack diff --git a/pandas/io/tests/data/legacy_msgpack/0.17.0/0.17.1_AMD64_windows_2.7.11.msgpack b/pandas/tests/io/data/legacy_msgpack/0.17.0/0.17.1_AMD64_windows_2.7.11.msgpack similarity index 100% rename from pandas/io/tests/data/legacy_msgpack/0.17.0/0.17.1_AMD64_windows_2.7.11.msgpack rename to pandas/tests/io/data/legacy_msgpack/0.17.0/0.17.1_AMD64_windows_2.7.11.msgpack diff --git a/pandas/io/tests/data/legacy_msgpack/0.17.0/0.17.1_AMD64_windows_3.5.1.msgpack b/pandas/tests/io/data/legacy_msgpack/0.17.0/0.17.1_AMD64_windows_3.5.1.msgpack similarity index 100% rename from pandas/io/tests/data/legacy_msgpack/0.17.0/0.17.1_AMD64_windows_3.5.1.msgpack rename to pandas/tests/io/data/legacy_msgpack/0.17.0/0.17.1_AMD64_windows_3.5.1.msgpack diff --git a/pandas/io/tests/data/legacy_msgpack/0.17.1/0.17.1_AMD64_windows_2.7.11.msgpack b/pandas/tests/io/data/legacy_msgpack/0.17.1/0.17.1_AMD64_windows_2.7.11.msgpack similarity index 100% rename from pandas/io/tests/data/legacy_msgpack/0.17.1/0.17.1_AMD64_windows_2.7.11.msgpack rename to pandas/tests/io/data/legacy_msgpack/0.17.1/0.17.1_AMD64_windows_2.7.11.msgpack diff --git a/pandas/io/tests/data/legacy_msgpack/0.17.1/0.17.1_AMD64_windows_3.5.1.msgpack b/pandas/tests/io/data/legacy_msgpack/0.17.1/0.17.1_AMD64_windows_3.5.1.msgpack similarity index 100% rename from pandas/io/tests/data/legacy_msgpack/0.17.1/0.17.1_AMD64_windows_3.5.1.msgpack rename to pandas/tests/io/data/legacy_msgpack/0.17.1/0.17.1_AMD64_windows_3.5.1.msgpack diff --git a/pandas/io/tests/data/legacy_msgpack/0.17.1/0.17.1_x86_64_darwin_2.7.11.msgpack b/pandas/tests/io/data/legacy_msgpack/0.17.1/0.17.1_x86_64_darwin_2.7.11.msgpack similarity index 100% rename from pandas/io/tests/data/legacy_msgpack/0.17.1/0.17.1_x86_64_darwin_2.7.11.msgpack rename to pandas/tests/io/data/legacy_msgpack/0.17.1/0.17.1_x86_64_darwin_2.7.11.msgpack diff --git a/pandas/io/tests/data/legacy_msgpack/0.17.1/0.17.1_x86_64_darwin_3.5.1.msgpack b/pandas/tests/io/data/legacy_msgpack/0.17.1/0.17.1_x86_64_darwin_3.5.1.msgpack similarity index 100% rename from pandas/io/tests/data/legacy_msgpack/0.17.1/0.17.1_x86_64_darwin_3.5.1.msgpack rename to pandas/tests/io/data/legacy_msgpack/0.17.1/0.17.1_x86_64_darwin_3.5.1.msgpack diff --git a/pandas/io/tests/data/legacy_msgpack/0.17.1/0.17.1_x86_64_linux_2.7.11.msgpack b/pandas/tests/io/data/legacy_msgpack/0.17.1/0.17.1_x86_64_linux_2.7.11.msgpack similarity index 100% rename from pandas/io/tests/data/legacy_msgpack/0.17.1/0.17.1_x86_64_linux_2.7.11.msgpack rename to pandas/tests/io/data/legacy_msgpack/0.17.1/0.17.1_x86_64_linux_2.7.11.msgpack diff --git a/pandas/io/tests/data/legacy_msgpack/0.17.1/0.17.1_x86_64_linux_3.4.4.msgpack b/pandas/tests/io/data/legacy_msgpack/0.17.1/0.17.1_x86_64_linux_3.4.4.msgpack similarity index 100% rename from pandas/io/tests/data/legacy_msgpack/0.17.1/0.17.1_x86_64_linux_3.4.4.msgpack rename to pandas/tests/io/data/legacy_msgpack/0.17.1/0.17.1_x86_64_linux_3.4.4.msgpack diff --git a/pandas/io/tests/data/legacy_msgpack/0.18.0/0.18.0_AMD64_windows_2.7.11.msgpack b/pandas/tests/io/data/legacy_msgpack/0.18.0/0.18.0_AMD64_windows_2.7.11.msgpack similarity index 100% rename from pandas/io/tests/data/legacy_msgpack/0.18.0/0.18.0_AMD64_windows_2.7.11.msgpack rename to pandas/tests/io/data/legacy_msgpack/0.18.0/0.18.0_AMD64_windows_2.7.11.msgpack diff --git a/pandas/io/tests/data/legacy_msgpack/0.18.0/0.18.0_AMD64_windows_3.5.1.msgpack b/pandas/tests/io/data/legacy_msgpack/0.18.0/0.18.0_AMD64_windows_3.5.1.msgpack similarity index 100% rename from pandas/io/tests/data/legacy_msgpack/0.18.0/0.18.0_AMD64_windows_3.5.1.msgpack rename to pandas/tests/io/data/legacy_msgpack/0.18.0/0.18.0_AMD64_windows_3.5.1.msgpack diff --git a/pandas/io/tests/data/legacy_msgpack/0.18.0/0.18.0_x86_64_darwin_2.7.11.msgpack b/pandas/tests/io/data/legacy_msgpack/0.18.0/0.18.0_x86_64_darwin_2.7.11.msgpack similarity index 100% rename from pandas/io/tests/data/legacy_msgpack/0.18.0/0.18.0_x86_64_darwin_2.7.11.msgpack rename to pandas/tests/io/data/legacy_msgpack/0.18.0/0.18.0_x86_64_darwin_2.7.11.msgpack diff --git a/pandas/io/tests/data/legacy_msgpack/0.18.0/0.18.0_x86_64_darwin_3.5.1.msgpack b/pandas/tests/io/data/legacy_msgpack/0.18.0/0.18.0_x86_64_darwin_3.5.1.msgpack similarity index 100% rename from pandas/io/tests/data/legacy_msgpack/0.18.0/0.18.0_x86_64_darwin_3.5.1.msgpack rename to pandas/tests/io/data/legacy_msgpack/0.18.0/0.18.0_x86_64_darwin_3.5.1.msgpack diff --git a/pandas/io/tests/data/legacy_msgpack/0.18.1/0.18.1_x86_64_darwin_2.7.12.msgpack b/pandas/tests/io/data/legacy_msgpack/0.18.1/0.18.1_x86_64_darwin_2.7.12.msgpack similarity index 100% rename from pandas/io/tests/data/legacy_msgpack/0.18.1/0.18.1_x86_64_darwin_2.7.12.msgpack rename to pandas/tests/io/data/legacy_msgpack/0.18.1/0.18.1_x86_64_darwin_2.7.12.msgpack diff --git a/pandas/io/tests/data/legacy_msgpack/0.18.1/0.18.1_x86_64_darwin_3.5.2.msgpack b/pandas/tests/io/data/legacy_msgpack/0.18.1/0.18.1_x86_64_darwin_3.5.2.msgpack similarity index 100% rename from pandas/io/tests/data/legacy_msgpack/0.18.1/0.18.1_x86_64_darwin_3.5.2.msgpack rename to pandas/tests/io/data/legacy_msgpack/0.18.1/0.18.1_x86_64_darwin_3.5.2.msgpack diff --git a/pandas/io/tests/data/legacy_pickle/0.10.1/AMD64_windows_2.7.3.pickle b/pandas/tests/io/data/legacy_pickle/0.10.1/AMD64_windows_2.7.3.pickle similarity index 100% rename from pandas/io/tests/data/legacy_pickle/0.10.1/AMD64_windows_2.7.3.pickle rename to pandas/tests/io/data/legacy_pickle/0.10.1/AMD64_windows_2.7.3.pickle diff --git a/pandas/io/tests/data/legacy_pickle/0.10.1/x86_64_linux_2.7.3.pickle b/pandas/tests/io/data/legacy_pickle/0.10.1/x86_64_linux_2.7.3.pickle similarity index 100% rename from pandas/io/tests/data/legacy_pickle/0.10.1/x86_64_linux_2.7.3.pickle rename to pandas/tests/io/data/legacy_pickle/0.10.1/x86_64_linux_2.7.3.pickle diff --git a/pandas/io/tests/data/legacy_pickle/0.11.0/0.11.0_x86_64_linux_3.3.0.pickle b/pandas/tests/io/data/legacy_pickle/0.11.0/0.11.0_x86_64_linux_3.3.0.pickle similarity index 100% rename from pandas/io/tests/data/legacy_pickle/0.11.0/0.11.0_x86_64_linux_3.3.0.pickle rename to pandas/tests/io/data/legacy_pickle/0.11.0/0.11.0_x86_64_linux_3.3.0.pickle diff --git a/pandas/io/tests/data/legacy_pickle/0.11.0/x86_64_linux_2.7.3.pickle b/pandas/tests/io/data/legacy_pickle/0.11.0/x86_64_linux_2.7.3.pickle similarity index 100% rename from pandas/io/tests/data/legacy_pickle/0.11.0/x86_64_linux_2.7.3.pickle rename to pandas/tests/io/data/legacy_pickle/0.11.0/x86_64_linux_2.7.3.pickle diff --git a/pandas/io/tests/data/legacy_pickle/0.11.0/x86_64_linux_3.3.0.pickle b/pandas/tests/io/data/legacy_pickle/0.11.0/x86_64_linux_3.3.0.pickle similarity index 100% rename from pandas/io/tests/data/legacy_pickle/0.11.0/x86_64_linux_3.3.0.pickle rename to pandas/tests/io/data/legacy_pickle/0.11.0/x86_64_linux_3.3.0.pickle diff --git a/pandas/io/tests/data/legacy_pickle/0.12.0/0.12.0_AMD64_windows_2.7.3.pickle b/pandas/tests/io/data/legacy_pickle/0.12.0/0.12.0_AMD64_windows_2.7.3.pickle similarity index 100% rename from pandas/io/tests/data/legacy_pickle/0.12.0/0.12.0_AMD64_windows_2.7.3.pickle rename to pandas/tests/io/data/legacy_pickle/0.12.0/0.12.0_AMD64_windows_2.7.3.pickle diff --git a/pandas/io/tests/data/legacy_pickle/0.12.0/0.12.0_x86_64_linux_2.7.3.pickle b/pandas/tests/io/data/legacy_pickle/0.12.0/0.12.0_x86_64_linux_2.7.3.pickle similarity index 100% rename from pandas/io/tests/data/legacy_pickle/0.12.0/0.12.0_x86_64_linux_2.7.3.pickle rename to pandas/tests/io/data/legacy_pickle/0.12.0/0.12.0_x86_64_linux_2.7.3.pickle diff --git a/pandas/io/tests/data/legacy_pickle/0.13.0/0.13.0_AMD64_windows_2.7.3.pickle b/pandas/tests/io/data/legacy_pickle/0.13.0/0.13.0_AMD64_windows_2.7.3.pickle similarity index 100% rename from pandas/io/tests/data/legacy_pickle/0.13.0/0.13.0_AMD64_windows_2.7.3.pickle rename to pandas/tests/io/data/legacy_pickle/0.13.0/0.13.0_AMD64_windows_2.7.3.pickle diff --git a/pandas/io/tests/data/legacy_pickle/0.13.0/0.13.0_i686_linux_2.6.5.pickle b/pandas/tests/io/data/legacy_pickle/0.13.0/0.13.0_i686_linux_2.6.5.pickle similarity index 100% rename from pandas/io/tests/data/legacy_pickle/0.13.0/0.13.0_i686_linux_2.6.5.pickle rename to pandas/tests/io/data/legacy_pickle/0.13.0/0.13.0_i686_linux_2.6.5.pickle diff --git a/pandas/io/tests/data/legacy_pickle/0.13.0/0.13.0_i686_linux_2.7.3.pickle b/pandas/tests/io/data/legacy_pickle/0.13.0/0.13.0_i686_linux_2.7.3.pickle similarity index 100% rename from pandas/io/tests/data/legacy_pickle/0.13.0/0.13.0_i686_linux_2.7.3.pickle rename to pandas/tests/io/data/legacy_pickle/0.13.0/0.13.0_i686_linux_2.7.3.pickle diff --git a/pandas/io/tests/data/legacy_pickle/0.13.0/0.13.0_i686_linux_3.2.3.pickle b/pandas/tests/io/data/legacy_pickle/0.13.0/0.13.0_i686_linux_3.2.3.pickle similarity index 100% rename from pandas/io/tests/data/legacy_pickle/0.13.0/0.13.0_i686_linux_3.2.3.pickle rename to pandas/tests/io/data/legacy_pickle/0.13.0/0.13.0_i686_linux_3.2.3.pickle diff --git a/pandas/io/tests/data/legacy_pickle/0.13.0/0.13.0_x86_64_darwin_2.7.5.pickle b/pandas/tests/io/data/legacy_pickle/0.13.0/0.13.0_x86_64_darwin_2.7.5.pickle similarity index 100% rename from pandas/io/tests/data/legacy_pickle/0.13.0/0.13.0_x86_64_darwin_2.7.5.pickle rename to pandas/tests/io/data/legacy_pickle/0.13.0/0.13.0_x86_64_darwin_2.7.5.pickle diff --git a/pandas/io/tests/data/legacy_pickle/0.13.0/0.13.0_x86_64_darwin_2.7.6.pickle b/pandas/tests/io/data/legacy_pickle/0.13.0/0.13.0_x86_64_darwin_2.7.6.pickle similarity index 100% rename from pandas/io/tests/data/legacy_pickle/0.13.0/0.13.0_x86_64_darwin_2.7.6.pickle rename to pandas/tests/io/data/legacy_pickle/0.13.0/0.13.0_x86_64_darwin_2.7.6.pickle diff --git a/pandas/io/tests/data/legacy_pickle/0.13.0/0.13.0_x86_64_linux_2.7.3.pickle b/pandas/tests/io/data/legacy_pickle/0.13.0/0.13.0_x86_64_linux_2.7.3.pickle similarity index 100% rename from pandas/io/tests/data/legacy_pickle/0.13.0/0.13.0_x86_64_linux_2.7.3.pickle rename to pandas/tests/io/data/legacy_pickle/0.13.0/0.13.0_x86_64_linux_2.7.3.pickle diff --git a/pandas/io/tests/data/legacy_pickle/0.13.0/0.13.0_x86_64_linux_2.7.8.pickle b/pandas/tests/io/data/legacy_pickle/0.13.0/0.13.0_x86_64_linux_2.7.8.pickle similarity index 100% rename from pandas/io/tests/data/legacy_pickle/0.13.0/0.13.0_x86_64_linux_2.7.8.pickle rename to pandas/tests/io/data/legacy_pickle/0.13.0/0.13.0_x86_64_linux_2.7.8.pickle diff --git a/pandas/io/tests/data/legacy_pickle/0.13.0/0.13.0_x86_64_linux_3.3.0.pickle b/pandas/tests/io/data/legacy_pickle/0.13.0/0.13.0_x86_64_linux_3.3.0.pickle similarity index 100% rename from pandas/io/tests/data/legacy_pickle/0.13.0/0.13.0_x86_64_linux_3.3.0.pickle rename to pandas/tests/io/data/legacy_pickle/0.13.0/0.13.0_x86_64_linux_3.3.0.pickle diff --git a/pandas/io/tests/data/legacy_pickle/0.14.0/0.14.0_x86_64_darwin_2.7.6.pickle b/pandas/tests/io/data/legacy_pickle/0.14.0/0.14.0_x86_64_darwin_2.7.6.pickle similarity index 100% rename from pandas/io/tests/data/legacy_pickle/0.14.0/0.14.0_x86_64_darwin_2.7.6.pickle rename to pandas/tests/io/data/legacy_pickle/0.14.0/0.14.0_x86_64_darwin_2.7.6.pickle diff --git a/pandas/io/tests/data/legacy_pickle/0.14.0/0.14.0_x86_64_linux_2.7.8.pickle b/pandas/tests/io/data/legacy_pickle/0.14.0/0.14.0_x86_64_linux_2.7.8.pickle similarity index 100% rename from pandas/io/tests/data/legacy_pickle/0.14.0/0.14.0_x86_64_linux_2.7.8.pickle rename to pandas/tests/io/data/legacy_pickle/0.14.0/0.14.0_x86_64_linux_2.7.8.pickle diff --git a/pandas/io/tests/data/legacy_pickle/0.14.1/0.14.1_x86_64_darwin_2.7.12.pickle b/pandas/tests/io/data/legacy_pickle/0.14.1/0.14.1_x86_64_darwin_2.7.12.pickle similarity index 100% rename from pandas/io/tests/data/legacy_pickle/0.14.1/0.14.1_x86_64_darwin_2.7.12.pickle rename to pandas/tests/io/data/legacy_pickle/0.14.1/0.14.1_x86_64_darwin_2.7.12.pickle diff --git a/pandas/io/tests/data/legacy_pickle/0.14.1/0.14.1_x86_64_linux_2.7.8.pickle b/pandas/tests/io/data/legacy_pickle/0.14.1/0.14.1_x86_64_linux_2.7.8.pickle similarity index 100% rename from pandas/io/tests/data/legacy_pickle/0.14.1/0.14.1_x86_64_linux_2.7.8.pickle rename to pandas/tests/io/data/legacy_pickle/0.14.1/0.14.1_x86_64_linux_2.7.8.pickle diff --git a/pandas/io/tests/data/legacy_pickle/0.15.0/0.15.0_x86_64_darwin_2.7.12.pickle b/pandas/tests/io/data/legacy_pickle/0.15.0/0.15.0_x86_64_darwin_2.7.12.pickle similarity index 100% rename from pandas/io/tests/data/legacy_pickle/0.15.0/0.15.0_x86_64_darwin_2.7.12.pickle rename to pandas/tests/io/data/legacy_pickle/0.15.0/0.15.0_x86_64_darwin_2.7.12.pickle diff --git a/pandas/io/tests/data/legacy_pickle/0.15.0/0.15.0_x86_64_linux_2.7.8.pickle b/pandas/tests/io/data/legacy_pickle/0.15.0/0.15.0_x86_64_linux_2.7.8.pickle similarity index 100% rename from pandas/io/tests/data/legacy_pickle/0.15.0/0.15.0_x86_64_linux_2.7.8.pickle rename to pandas/tests/io/data/legacy_pickle/0.15.0/0.15.0_x86_64_linux_2.7.8.pickle diff --git a/pandas/io/tests/data/legacy_pickle/0.15.2/0.15.2_x86_64_darwin_2.7.9.pickle b/pandas/tests/io/data/legacy_pickle/0.15.2/0.15.2_x86_64_darwin_2.7.9.pickle similarity index 100% rename from pandas/io/tests/data/legacy_pickle/0.15.2/0.15.2_x86_64_darwin_2.7.9.pickle rename to pandas/tests/io/data/legacy_pickle/0.15.2/0.15.2_x86_64_darwin_2.7.9.pickle diff --git a/pandas/io/tests/data/legacy_pickle/0.16.0/0.16.0_x86_64_darwin_2.7.9.pickle b/pandas/tests/io/data/legacy_pickle/0.16.0/0.16.0_x86_64_darwin_2.7.9.pickle similarity index 100% rename from pandas/io/tests/data/legacy_pickle/0.16.0/0.16.0_x86_64_darwin_2.7.9.pickle rename to pandas/tests/io/data/legacy_pickle/0.16.0/0.16.0_x86_64_darwin_2.7.9.pickle diff --git a/pandas/io/tests/data/legacy_pickle/0.16.2/0.16.2_AMD64_windows_2.7.10.pickle b/pandas/tests/io/data/legacy_pickle/0.16.2/0.16.2_AMD64_windows_2.7.10.pickle similarity index 100% rename from pandas/io/tests/data/legacy_pickle/0.16.2/0.16.2_AMD64_windows_2.7.10.pickle rename to pandas/tests/io/data/legacy_pickle/0.16.2/0.16.2_AMD64_windows_2.7.10.pickle diff --git a/pandas/io/tests/data/legacy_pickle/0.16.2/0.16.2_AMD64_windows_3.4.3.pickle b/pandas/tests/io/data/legacy_pickle/0.16.2/0.16.2_AMD64_windows_3.4.3.pickle similarity index 100% rename from pandas/io/tests/data/legacy_pickle/0.16.2/0.16.2_AMD64_windows_3.4.3.pickle rename to pandas/tests/io/data/legacy_pickle/0.16.2/0.16.2_AMD64_windows_3.4.3.pickle diff --git a/pandas/io/tests/data/legacy_pickle/0.16.2/0.16.2_x86_64_darwin_2.7.10.pickle b/pandas/tests/io/data/legacy_pickle/0.16.2/0.16.2_x86_64_darwin_2.7.10.pickle similarity index 100% rename from pandas/io/tests/data/legacy_pickle/0.16.2/0.16.2_x86_64_darwin_2.7.10.pickle rename to pandas/tests/io/data/legacy_pickle/0.16.2/0.16.2_x86_64_darwin_2.7.10.pickle diff --git a/pandas/io/tests/data/legacy_pickle/0.16.2/0.16.2_x86_64_darwin_2.7.9.pickle b/pandas/tests/io/data/legacy_pickle/0.16.2/0.16.2_x86_64_darwin_2.7.9.pickle similarity index 100% rename from pandas/io/tests/data/legacy_pickle/0.16.2/0.16.2_x86_64_darwin_2.7.9.pickle rename to pandas/tests/io/data/legacy_pickle/0.16.2/0.16.2_x86_64_darwin_2.7.9.pickle diff --git a/pandas/io/tests/data/legacy_pickle/0.16.2/0.16.2_x86_64_darwin_3.4.3.pickle b/pandas/tests/io/data/legacy_pickle/0.16.2/0.16.2_x86_64_darwin_3.4.3.pickle similarity index 100% rename from pandas/io/tests/data/legacy_pickle/0.16.2/0.16.2_x86_64_darwin_3.4.3.pickle rename to pandas/tests/io/data/legacy_pickle/0.16.2/0.16.2_x86_64_darwin_3.4.3.pickle diff --git a/pandas/io/tests/data/legacy_pickle/0.16.2/0.16.2_x86_64_linux_2.7.10.pickle b/pandas/tests/io/data/legacy_pickle/0.16.2/0.16.2_x86_64_linux_2.7.10.pickle similarity index 100% rename from pandas/io/tests/data/legacy_pickle/0.16.2/0.16.2_x86_64_linux_2.7.10.pickle rename to pandas/tests/io/data/legacy_pickle/0.16.2/0.16.2_x86_64_linux_2.7.10.pickle diff --git a/pandas/io/tests/data/legacy_pickle/0.16.2/0.16.2_x86_64_linux_3.4.3.pickle b/pandas/tests/io/data/legacy_pickle/0.16.2/0.16.2_x86_64_linux_3.4.3.pickle similarity index 100% rename from pandas/io/tests/data/legacy_pickle/0.16.2/0.16.2_x86_64_linux_3.4.3.pickle rename to pandas/tests/io/data/legacy_pickle/0.16.2/0.16.2_x86_64_linux_3.4.3.pickle diff --git a/pandas/io/tests/data/legacy_pickle/0.17.0/0.17.0_AMD64_windows_2.7.11.pickle b/pandas/tests/io/data/legacy_pickle/0.17.0/0.17.0_AMD64_windows_2.7.11.pickle similarity index 100% rename from pandas/io/tests/data/legacy_pickle/0.17.0/0.17.0_AMD64_windows_2.7.11.pickle rename to pandas/tests/io/data/legacy_pickle/0.17.0/0.17.0_AMD64_windows_2.7.11.pickle diff --git a/pandas/io/tests/data/legacy_pickle/0.17.0/0.17.0_AMD64_windows_3.4.4.pickle b/pandas/tests/io/data/legacy_pickle/0.17.0/0.17.0_AMD64_windows_3.4.4.pickle similarity index 100% rename from pandas/io/tests/data/legacy_pickle/0.17.0/0.17.0_AMD64_windows_3.4.4.pickle rename to pandas/tests/io/data/legacy_pickle/0.17.0/0.17.0_AMD64_windows_3.4.4.pickle diff --git a/pandas/io/tests/data/legacy_pickle/0.17.0/0.17.0_x86_64_darwin_2.7.11.pickle b/pandas/tests/io/data/legacy_pickle/0.17.0/0.17.0_x86_64_darwin_2.7.11.pickle similarity index 100% rename from pandas/io/tests/data/legacy_pickle/0.17.0/0.17.0_x86_64_darwin_2.7.11.pickle rename to pandas/tests/io/data/legacy_pickle/0.17.0/0.17.0_x86_64_darwin_2.7.11.pickle diff --git a/pandas/io/tests/data/legacy_pickle/0.17.0/0.17.0_x86_64_darwin_3.4.4.pickle b/pandas/tests/io/data/legacy_pickle/0.17.0/0.17.0_x86_64_darwin_3.4.4.pickle similarity index 100% rename from pandas/io/tests/data/legacy_pickle/0.17.0/0.17.0_x86_64_darwin_3.4.4.pickle rename to pandas/tests/io/data/legacy_pickle/0.17.0/0.17.0_x86_64_darwin_3.4.4.pickle diff --git a/pandas/io/tests/data/legacy_pickle/0.17.0/0.17.0_x86_64_linux_2.7.11.pickle b/pandas/tests/io/data/legacy_pickle/0.17.0/0.17.0_x86_64_linux_2.7.11.pickle similarity index 100% rename from pandas/io/tests/data/legacy_pickle/0.17.0/0.17.0_x86_64_linux_2.7.11.pickle rename to pandas/tests/io/data/legacy_pickle/0.17.0/0.17.0_x86_64_linux_2.7.11.pickle diff --git a/pandas/io/tests/data/legacy_pickle/0.17.0/0.17.0_x86_64_linux_3.4.4.pickle b/pandas/tests/io/data/legacy_pickle/0.17.0/0.17.0_x86_64_linux_3.4.4.pickle similarity index 100% rename from pandas/io/tests/data/legacy_pickle/0.17.0/0.17.0_x86_64_linux_3.4.4.pickle rename to pandas/tests/io/data/legacy_pickle/0.17.0/0.17.0_x86_64_linux_3.4.4.pickle diff --git a/pandas/io/tests/data/legacy_pickle/0.17.0/0.17.1_AMD64_windows_2.7.11.pickle b/pandas/tests/io/data/legacy_pickle/0.17.0/0.17.1_AMD64_windows_2.7.11.pickle similarity index 100% rename from pandas/io/tests/data/legacy_pickle/0.17.0/0.17.1_AMD64_windows_2.7.11.pickle rename to pandas/tests/io/data/legacy_pickle/0.17.0/0.17.1_AMD64_windows_2.7.11.pickle diff --git a/pandas/io/tests/data/legacy_pickle/0.17.1/0.17.1_AMD64_windows_2.7.11.pickle b/pandas/tests/io/data/legacy_pickle/0.17.1/0.17.1_AMD64_windows_2.7.11.pickle similarity index 100% rename from pandas/io/tests/data/legacy_pickle/0.17.1/0.17.1_AMD64_windows_2.7.11.pickle rename to pandas/tests/io/data/legacy_pickle/0.17.1/0.17.1_AMD64_windows_2.7.11.pickle diff --git a/pandas/io/tests/data/legacy_pickle/0.17.1/0.17.1_x86_64_darwin_2.7.11.pickle b/pandas/tests/io/data/legacy_pickle/0.17.1/0.17.1_x86_64_darwin_2.7.11.pickle similarity index 100% rename from pandas/io/tests/data/legacy_pickle/0.17.1/0.17.1_x86_64_darwin_2.7.11.pickle rename to pandas/tests/io/data/legacy_pickle/0.17.1/0.17.1_x86_64_darwin_2.7.11.pickle diff --git a/pandas/io/tests/data/legacy_pickle/0.18.0/0.18.0_AMD64_windows_2.7.11.pickle b/pandas/tests/io/data/legacy_pickle/0.18.0/0.18.0_AMD64_windows_2.7.11.pickle similarity index 100% rename from pandas/io/tests/data/legacy_pickle/0.18.0/0.18.0_AMD64_windows_2.7.11.pickle rename to pandas/tests/io/data/legacy_pickle/0.18.0/0.18.0_AMD64_windows_2.7.11.pickle diff --git a/pandas/io/tests/data/legacy_pickle/0.18.0/0.18.0_AMD64_windows_3.5.1.pickle b/pandas/tests/io/data/legacy_pickle/0.18.0/0.18.0_AMD64_windows_3.5.1.pickle similarity index 100% rename from pandas/io/tests/data/legacy_pickle/0.18.0/0.18.0_AMD64_windows_3.5.1.pickle rename to pandas/tests/io/data/legacy_pickle/0.18.0/0.18.0_AMD64_windows_3.5.1.pickle diff --git a/pandas/io/tests/data/legacy_pickle/0.18.0/0.18.0_x86_64_darwin_2.7.11.pickle b/pandas/tests/io/data/legacy_pickle/0.18.0/0.18.0_x86_64_darwin_2.7.11.pickle similarity index 100% rename from pandas/io/tests/data/legacy_pickle/0.18.0/0.18.0_x86_64_darwin_2.7.11.pickle rename to pandas/tests/io/data/legacy_pickle/0.18.0/0.18.0_x86_64_darwin_2.7.11.pickle diff --git a/pandas/io/tests/data/legacy_pickle/0.18.0/0.18.0_x86_64_darwin_3.5.1.pickle b/pandas/tests/io/data/legacy_pickle/0.18.0/0.18.0_x86_64_darwin_3.5.1.pickle similarity index 100% rename from pandas/io/tests/data/legacy_pickle/0.18.0/0.18.0_x86_64_darwin_3.5.1.pickle rename to pandas/tests/io/data/legacy_pickle/0.18.0/0.18.0_x86_64_darwin_3.5.1.pickle diff --git a/pandas/io/tests/data/legacy_pickle/0.18.1/0.18.1_x86_64_darwin_2.7.12.pickle b/pandas/tests/io/data/legacy_pickle/0.18.1/0.18.1_x86_64_darwin_2.7.12.pickle similarity index 100% rename from pandas/io/tests/data/legacy_pickle/0.18.1/0.18.1_x86_64_darwin_2.7.12.pickle rename to pandas/tests/io/data/legacy_pickle/0.18.1/0.18.1_x86_64_darwin_2.7.12.pickle diff --git a/pandas/io/tests/data/legacy_pickle/0.18.1/0.18.1_x86_64_darwin_3.5.2.pickle b/pandas/tests/io/data/legacy_pickle/0.18.1/0.18.1_x86_64_darwin_3.5.2.pickle similarity index 100% rename from pandas/io/tests/data/legacy_pickle/0.18.1/0.18.1_x86_64_darwin_3.5.2.pickle rename to pandas/tests/io/data/legacy_pickle/0.18.1/0.18.1_x86_64_darwin_3.5.2.pickle diff --git a/pandas/io/tests/data/macau.html b/pandas/tests/io/data/macau.html similarity index 100% rename from pandas/io/tests/data/macau.html rename to pandas/tests/io/data/macau.html diff --git a/pandas/io/tests/data/nyse_wsj.html b/pandas/tests/io/data/nyse_wsj.html similarity index 100% rename from pandas/io/tests/data/nyse_wsj.html rename to pandas/tests/io/data/nyse_wsj.html diff --git a/pandas/io/tests/data/spam.html b/pandas/tests/io/data/spam.html similarity index 100% rename from pandas/io/tests/data/spam.html rename to pandas/tests/io/data/spam.html diff --git a/pandas/io/tests/data/stata10_115.dta b/pandas/tests/io/data/stata10_115.dta old mode 100755 new mode 100644 similarity index 100% rename from pandas/io/tests/data/stata10_115.dta rename to pandas/tests/io/data/stata10_115.dta diff --git a/pandas/io/tests/data/stata10_117.dta b/pandas/tests/io/data/stata10_117.dta old mode 100755 new mode 100644 similarity index 100% rename from pandas/io/tests/data/stata10_117.dta rename to pandas/tests/io/data/stata10_117.dta diff --git a/pandas/io/tests/data/stata11_115.dta b/pandas/tests/io/data/stata11_115.dta old mode 100755 new mode 100644 similarity index 100% rename from pandas/io/tests/data/stata11_115.dta rename to pandas/tests/io/data/stata11_115.dta diff --git a/pandas/io/tests/data/stata11_117.dta b/pandas/tests/io/data/stata11_117.dta old mode 100755 new mode 100644 similarity index 100% rename from pandas/io/tests/data/stata11_117.dta rename to pandas/tests/io/data/stata11_117.dta diff --git a/pandas/io/tests/data/stata12_117.dta b/pandas/tests/io/data/stata12_117.dta similarity index 100% rename from pandas/io/tests/data/stata12_117.dta rename to pandas/tests/io/data/stata12_117.dta diff --git a/pandas/io/tests/data/stata14_118.dta b/pandas/tests/io/data/stata14_118.dta similarity index 100% rename from pandas/io/tests/data/stata14_118.dta rename to pandas/tests/io/data/stata14_118.dta diff --git a/pandas/io/tests/data/stata15.dta b/pandas/tests/io/data/stata15.dta similarity index 100% rename from pandas/io/tests/data/stata15.dta rename to pandas/tests/io/data/stata15.dta diff --git a/pandas/io/tests/data/stata1_114.dta b/pandas/tests/io/data/stata1_114.dta similarity index 100% rename from pandas/io/tests/data/stata1_114.dta rename to pandas/tests/io/data/stata1_114.dta diff --git a/pandas/io/tests/data/stata1_117.dta b/pandas/tests/io/data/stata1_117.dta similarity index 100% rename from pandas/io/tests/data/stata1_117.dta rename to pandas/tests/io/data/stata1_117.dta diff --git a/pandas/io/tests/data/stata1_encoding.dta b/pandas/tests/io/data/stata1_encoding.dta similarity index 100% rename from pandas/io/tests/data/stata1_encoding.dta rename to pandas/tests/io/data/stata1_encoding.dta diff --git a/pandas/io/tests/data/stata2_113.dta b/pandas/tests/io/data/stata2_113.dta similarity index 100% rename from pandas/io/tests/data/stata2_113.dta rename to pandas/tests/io/data/stata2_113.dta diff --git a/pandas/io/tests/data/stata2_114.dta b/pandas/tests/io/data/stata2_114.dta similarity index 100% rename from pandas/io/tests/data/stata2_114.dta rename to pandas/tests/io/data/stata2_114.dta diff --git a/pandas/io/tests/data/stata2_115.dta b/pandas/tests/io/data/stata2_115.dta similarity index 100% rename from pandas/io/tests/data/stata2_115.dta rename to pandas/tests/io/data/stata2_115.dta diff --git a/pandas/io/tests/data/stata2_117.dta b/pandas/tests/io/data/stata2_117.dta similarity index 100% rename from pandas/io/tests/data/stata2_117.dta rename to pandas/tests/io/data/stata2_117.dta diff --git a/pandas/io/tests/data/stata3.csv b/pandas/tests/io/data/stata3.csv similarity index 100% rename from pandas/io/tests/data/stata3.csv rename to pandas/tests/io/data/stata3.csv diff --git a/pandas/io/tests/data/stata3_113.dta b/pandas/tests/io/data/stata3_113.dta similarity index 100% rename from pandas/io/tests/data/stata3_113.dta rename to pandas/tests/io/data/stata3_113.dta diff --git a/pandas/io/tests/data/stata3_114.dta b/pandas/tests/io/data/stata3_114.dta similarity index 100% rename from pandas/io/tests/data/stata3_114.dta rename to pandas/tests/io/data/stata3_114.dta diff --git a/pandas/io/tests/data/stata3_115.dta b/pandas/tests/io/data/stata3_115.dta similarity index 100% rename from pandas/io/tests/data/stata3_115.dta rename to pandas/tests/io/data/stata3_115.dta diff --git a/pandas/io/tests/data/stata3_117.dta b/pandas/tests/io/data/stata3_117.dta similarity index 100% rename from pandas/io/tests/data/stata3_117.dta rename to pandas/tests/io/data/stata3_117.dta diff --git a/pandas/io/tests/data/stata4_113.dta b/pandas/tests/io/data/stata4_113.dta similarity index 100% rename from pandas/io/tests/data/stata4_113.dta rename to pandas/tests/io/data/stata4_113.dta diff --git a/pandas/io/tests/data/stata4_114.dta b/pandas/tests/io/data/stata4_114.dta similarity index 100% rename from pandas/io/tests/data/stata4_114.dta rename to pandas/tests/io/data/stata4_114.dta diff --git a/pandas/io/tests/data/stata4_115.dta b/pandas/tests/io/data/stata4_115.dta similarity index 100% rename from pandas/io/tests/data/stata4_115.dta rename to pandas/tests/io/data/stata4_115.dta diff --git a/pandas/io/tests/data/stata4_117.dta b/pandas/tests/io/data/stata4_117.dta similarity index 100% rename from pandas/io/tests/data/stata4_117.dta rename to pandas/tests/io/data/stata4_117.dta diff --git a/pandas/io/tests/data/stata5.csv b/pandas/tests/io/data/stata5.csv similarity index 100% rename from pandas/io/tests/data/stata5.csv rename to pandas/tests/io/data/stata5.csv diff --git a/pandas/io/tests/data/stata5_113.dta b/pandas/tests/io/data/stata5_113.dta similarity index 100% rename from pandas/io/tests/data/stata5_113.dta rename to pandas/tests/io/data/stata5_113.dta diff --git a/pandas/io/tests/data/stata5_114.dta b/pandas/tests/io/data/stata5_114.dta similarity index 100% rename from pandas/io/tests/data/stata5_114.dta rename to pandas/tests/io/data/stata5_114.dta diff --git a/pandas/io/tests/data/stata5_115.dta b/pandas/tests/io/data/stata5_115.dta similarity index 100% rename from pandas/io/tests/data/stata5_115.dta rename to pandas/tests/io/data/stata5_115.dta diff --git a/pandas/io/tests/data/stata5_117.dta b/pandas/tests/io/data/stata5_117.dta similarity index 100% rename from pandas/io/tests/data/stata5_117.dta rename to pandas/tests/io/data/stata5_117.dta diff --git a/pandas/io/tests/data/stata6.csv b/pandas/tests/io/data/stata6.csv similarity index 100% rename from pandas/io/tests/data/stata6.csv rename to pandas/tests/io/data/stata6.csv diff --git a/pandas/io/tests/data/stata6_113.dta b/pandas/tests/io/data/stata6_113.dta similarity index 100% rename from pandas/io/tests/data/stata6_113.dta rename to pandas/tests/io/data/stata6_113.dta diff --git a/pandas/io/tests/data/stata6_114.dta b/pandas/tests/io/data/stata6_114.dta similarity index 100% rename from pandas/io/tests/data/stata6_114.dta rename to pandas/tests/io/data/stata6_114.dta diff --git a/pandas/io/tests/data/stata6_115.dta b/pandas/tests/io/data/stata6_115.dta similarity index 100% rename from pandas/io/tests/data/stata6_115.dta rename to pandas/tests/io/data/stata6_115.dta diff --git a/pandas/io/tests/data/stata6_117.dta b/pandas/tests/io/data/stata6_117.dta similarity index 100% rename from pandas/io/tests/data/stata6_117.dta rename to pandas/tests/io/data/stata6_117.dta diff --git a/pandas/io/tests/data/stata7_111.dta b/pandas/tests/io/data/stata7_111.dta similarity index 100% rename from pandas/io/tests/data/stata7_111.dta rename to pandas/tests/io/data/stata7_111.dta diff --git a/pandas/io/tests/data/stata7_115.dta b/pandas/tests/io/data/stata7_115.dta similarity index 100% rename from pandas/io/tests/data/stata7_115.dta rename to pandas/tests/io/data/stata7_115.dta diff --git a/pandas/io/tests/data/stata7_117.dta b/pandas/tests/io/data/stata7_117.dta similarity index 100% rename from pandas/io/tests/data/stata7_117.dta rename to pandas/tests/io/data/stata7_117.dta diff --git a/pandas/io/tests/data/stata8_113.dta b/pandas/tests/io/data/stata8_113.dta similarity index 100% rename from pandas/io/tests/data/stata8_113.dta rename to pandas/tests/io/data/stata8_113.dta diff --git a/pandas/io/tests/data/stata8_115.dta b/pandas/tests/io/data/stata8_115.dta similarity index 100% rename from pandas/io/tests/data/stata8_115.dta rename to pandas/tests/io/data/stata8_115.dta diff --git a/pandas/io/tests/data/stata8_117.dta b/pandas/tests/io/data/stata8_117.dta similarity index 100% rename from pandas/io/tests/data/stata8_117.dta rename to pandas/tests/io/data/stata8_117.dta diff --git a/pandas/io/tests/data/stata9_115.dta b/pandas/tests/io/data/stata9_115.dta similarity index 100% rename from pandas/io/tests/data/stata9_115.dta rename to pandas/tests/io/data/stata9_115.dta diff --git a/pandas/io/tests/data/stata9_117.dta b/pandas/tests/io/data/stata9_117.dta similarity index 100% rename from pandas/io/tests/data/stata9_117.dta rename to pandas/tests/io/data/stata9_117.dta diff --git a/pandas/io/tests/data/test1.csv b/pandas/tests/io/data/test1.csv similarity index 100% rename from pandas/io/tests/data/test1.csv rename to pandas/tests/io/data/test1.csv diff --git a/pandas/io/tests/data/test1.xls b/pandas/tests/io/data/test1.xls similarity index 100% rename from pandas/io/tests/data/test1.xls rename to pandas/tests/io/data/test1.xls diff --git a/pandas/io/tests/data/test1.xlsm b/pandas/tests/io/data/test1.xlsm similarity index 100% rename from pandas/io/tests/data/test1.xlsm rename to pandas/tests/io/data/test1.xlsm diff --git a/pandas/io/tests/data/test1.xlsx b/pandas/tests/io/data/test1.xlsx similarity index 100% rename from pandas/io/tests/data/test1.xlsx rename to pandas/tests/io/data/test1.xlsx diff --git a/pandas/io/tests/data/test2.xls b/pandas/tests/io/data/test2.xls similarity index 100% rename from pandas/io/tests/data/test2.xls rename to pandas/tests/io/data/test2.xls diff --git a/pandas/io/tests/data/test2.xlsm b/pandas/tests/io/data/test2.xlsm similarity index 100% rename from pandas/io/tests/data/test2.xlsm rename to pandas/tests/io/data/test2.xlsm diff --git a/pandas/io/tests/data/test2.xlsx b/pandas/tests/io/data/test2.xlsx similarity index 100% rename from pandas/io/tests/data/test2.xlsx rename to pandas/tests/io/data/test2.xlsx diff --git a/pandas/io/tests/data/test3.xls b/pandas/tests/io/data/test3.xls similarity index 100% rename from pandas/io/tests/data/test3.xls rename to pandas/tests/io/data/test3.xls diff --git a/pandas/io/tests/data/test3.xlsm b/pandas/tests/io/data/test3.xlsm similarity index 100% rename from pandas/io/tests/data/test3.xlsm rename to pandas/tests/io/data/test3.xlsm diff --git a/pandas/io/tests/data/test3.xlsx b/pandas/tests/io/data/test3.xlsx similarity index 100% rename from pandas/io/tests/data/test3.xlsx rename to pandas/tests/io/data/test3.xlsx diff --git a/pandas/io/tests/data/test4.xls b/pandas/tests/io/data/test4.xls similarity index 100% rename from pandas/io/tests/data/test4.xls rename to pandas/tests/io/data/test4.xls diff --git a/pandas/io/tests/data/test4.xlsm b/pandas/tests/io/data/test4.xlsm similarity index 100% rename from pandas/io/tests/data/test4.xlsm rename to pandas/tests/io/data/test4.xlsm diff --git a/pandas/io/tests/data/test4.xlsx b/pandas/tests/io/data/test4.xlsx similarity index 100% rename from pandas/io/tests/data/test4.xlsx rename to pandas/tests/io/data/test4.xlsx diff --git a/pandas/io/tests/data/test5.xls b/pandas/tests/io/data/test5.xls similarity index 100% rename from pandas/io/tests/data/test5.xls rename to pandas/tests/io/data/test5.xls diff --git a/pandas/io/tests/data/test5.xlsm b/pandas/tests/io/data/test5.xlsm similarity index 100% rename from pandas/io/tests/data/test5.xlsm rename to pandas/tests/io/data/test5.xlsm diff --git a/pandas/io/tests/data/test5.xlsx b/pandas/tests/io/data/test5.xlsx similarity index 100% rename from pandas/io/tests/data/test5.xlsx rename to pandas/tests/io/data/test5.xlsx diff --git a/pandas/io/tests/data/test_converters.xls b/pandas/tests/io/data/test_converters.xls similarity index 100% rename from pandas/io/tests/data/test_converters.xls rename to pandas/tests/io/data/test_converters.xls diff --git a/pandas/io/tests/data/test_converters.xlsm b/pandas/tests/io/data/test_converters.xlsm similarity index 100% rename from pandas/io/tests/data/test_converters.xlsm rename to pandas/tests/io/data/test_converters.xlsm diff --git a/pandas/io/tests/data/test_converters.xlsx b/pandas/tests/io/data/test_converters.xlsx similarity index 100% rename from pandas/io/tests/data/test_converters.xlsx rename to pandas/tests/io/data/test_converters.xlsx diff --git a/pandas/io/tests/data/test_index_name_pre17.xls b/pandas/tests/io/data/test_index_name_pre17.xls similarity index 100% rename from pandas/io/tests/data/test_index_name_pre17.xls rename to pandas/tests/io/data/test_index_name_pre17.xls diff --git a/pandas/io/tests/data/test_index_name_pre17.xlsm b/pandas/tests/io/data/test_index_name_pre17.xlsm similarity index 100% rename from pandas/io/tests/data/test_index_name_pre17.xlsm rename to pandas/tests/io/data/test_index_name_pre17.xlsm diff --git a/pandas/io/tests/data/test_index_name_pre17.xlsx b/pandas/tests/io/data/test_index_name_pre17.xlsx similarity index 100% rename from pandas/io/tests/data/test_index_name_pre17.xlsx rename to pandas/tests/io/data/test_index_name_pre17.xlsx diff --git a/pandas/io/tests/data/test_mmap.csv b/pandas/tests/io/data/test_mmap.csv similarity index 100% rename from pandas/io/tests/data/test_mmap.csv rename to pandas/tests/io/data/test_mmap.csv diff --git a/pandas/io/tests/data/test_multisheet.xls b/pandas/tests/io/data/test_multisheet.xls similarity index 100% rename from pandas/io/tests/data/test_multisheet.xls rename to pandas/tests/io/data/test_multisheet.xls diff --git a/pandas/io/tests/data/test_multisheet.xlsm b/pandas/tests/io/data/test_multisheet.xlsm similarity index 100% rename from pandas/io/tests/data/test_multisheet.xlsm rename to pandas/tests/io/data/test_multisheet.xlsm diff --git a/pandas/io/tests/data/test_multisheet.xlsx b/pandas/tests/io/data/test_multisheet.xlsx similarity index 100% rename from pandas/io/tests/data/test_multisheet.xlsx rename to pandas/tests/io/data/test_multisheet.xlsx diff --git a/pandas/io/tests/data/test_squeeze.xls b/pandas/tests/io/data/test_squeeze.xls similarity index 100% rename from pandas/io/tests/data/test_squeeze.xls rename to pandas/tests/io/data/test_squeeze.xls diff --git a/pandas/io/tests/data/test_squeeze.xlsm b/pandas/tests/io/data/test_squeeze.xlsm similarity index 100% rename from pandas/io/tests/data/test_squeeze.xlsm rename to pandas/tests/io/data/test_squeeze.xlsm diff --git a/pandas/io/tests/data/test_squeeze.xlsx b/pandas/tests/io/data/test_squeeze.xlsx similarity index 100% rename from pandas/io/tests/data/test_squeeze.xlsx rename to pandas/tests/io/data/test_squeeze.xlsx diff --git a/pandas/io/tests/data/test_types.xls b/pandas/tests/io/data/test_types.xls similarity index 100% rename from pandas/io/tests/data/test_types.xls rename to pandas/tests/io/data/test_types.xls diff --git a/pandas/io/tests/data/test_types.xlsm b/pandas/tests/io/data/test_types.xlsm similarity index 100% rename from pandas/io/tests/data/test_types.xlsm rename to pandas/tests/io/data/test_types.xlsm diff --git a/pandas/io/tests/data/test_types.xlsx b/pandas/tests/io/data/test_types.xlsx similarity index 100% rename from pandas/io/tests/data/test_types.xlsx rename to pandas/tests/io/data/test_types.xlsx diff --git a/pandas/io/tests/data/testdateoverflow.xls b/pandas/tests/io/data/testdateoverflow.xls similarity index 100% rename from pandas/io/tests/data/testdateoverflow.xls rename to pandas/tests/io/data/testdateoverflow.xls diff --git a/pandas/io/tests/data/testdateoverflow.xlsm b/pandas/tests/io/data/testdateoverflow.xlsm similarity index 100% rename from pandas/io/tests/data/testdateoverflow.xlsm rename to pandas/tests/io/data/testdateoverflow.xlsm diff --git a/pandas/io/tests/data/testdateoverflow.xlsx b/pandas/tests/io/data/testdateoverflow.xlsx similarity index 100% rename from pandas/io/tests/data/testdateoverflow.xlsx rename to pandas/tests/io/data/testdateoverflow.xlsx diff --git a/pandas/io/tests/data/testdtype.xls b/pandas/tests/io/data/testdtype.xls similarity index 100% rename from pandas/io/tests/data/testdtype.xls rename to pandas/tests/io/data/testdtype.xls diff --git a/pandas/io/tests/data/testdtype.xlsm b/pandas/tests/io/data/testdtype.xlsm similarity index 100% rename from pandas/io/tests/data/testdtype.xlsm rename to pandas/tests/io/data/testdtype.xlsm diff --git a/pandas/io/tests/data/testdtype.xlsx b/pandas/tests/io/data/testdtype.xlsx similarity index 100% rename from pandas/io/tests/data/testdtype.xlsx rename to pandas/tests/io/data/testdtype.xlsx diff --git a/pandas/io/tests/data/testmultiindex.xls b/pandas/tests/io/data/testmultiindex.xls similarity index 100% rename from pandas/io/tests/data/testmultiindex.xls rename to pandas/tests/io/data/testmultiindex.xls diff --git a/pandas/io/tests/data/testmultiindex.xlsm b/pandas/tests/io/data/testmultiindex.xlsm similarity index 100% rename from pandas/io/tests/data/testmultiindex.xlsm rename to pandas/tests/io/data/testmultiindex.xlsm diff --git a/pandas/io/tests/data/testmultiindex.xlsx b/pandas/tests/io/data/testmultiindex.xlsx similarity index 100% rename from pandas/io/tests/data/testmultiindex.xlsx rename to pandas/tests/io/data/testmultiindex.xlsx diff --git a/pandas/io/tests/data/testskiprows.xls b/pandas/tests/io/data/testskiprows.xls similarity index 100% rename from pandas/io/tests/data/testskiprows.xls rename to pandas/tests/io/data/testskiprows.xls diff --git a/pandas/io/tests/data/testskiprows.xlsm b/pandas/tests/io/data/testskiprows.xlsm similarity index 100% rename from pandas/io/tests/data/testskiprows.xlsm rename to pandas/tests/io/data/testskiprows.xlsm diff --git a/pandas/io/tests/data/testskiprows.xlsx b/pandas/tests/io/data/testskiprows.xlsx similarity index 100% rename from pandas/io/tests/data/testskiprows.xlsx rename to pandas/tests/io/data/testskiprows.xlsx diff --git a/pandas/io/tests/data/times_1900.xls b/pandas/tests/io/data/times_1900.xls similarity index 100% rename from pandas/io/tests/data/times_1900.xls rename to pandas/tests/io/data/times_1900.xls diff --git a/pandas/io/tests/data/times_1900.xlsm b/pandas/tests/io/data/times_1900.xlsm similarity index 100% rename from pandas/io/tests/data/times_1900.xlsm rename to pandas/tests/io/data/times_1900.xlsm diff --git a/pandas/io/tests/data/times_1900.xlsx b/pandas/tests/io/data/times_1900.xlsx similarity index 100% rename from pandas/io/tests/data/times_1900.xlsx rename to pandas/tests/io/data/times_1900.xlsx diff --git a/pandas/io/tests/data/times_1904.xls b/pandas/tests/io/data/times_1904.xls similarity index 100% rename from pandas/io/tests/data/times_1904.xls rename to pandas/tests/io/data/times_1904.xls diff --git a/pandas/io/tests/data/times_1904.xlsm b/pandas/tests/io/data/times_1904.xlsm similarity index 100% rename from pandas/io/tests/data/times_1904.xlsm rename to pandas/tests/io/data/times_1904.xlsm diff --git a/pandas/io/tests/data/times_1904.xlsx b/pandas/tests/io/data/times_1904.xlsx similarity index 100% rename from pandas/io/tests/data/times_1904.xlsx rename to pandas/tests/io/data/times_1904.xlsx diff --git a/pandas/io/tests/data/tips.csv b/pandas/tests/io/data/tips.csv similarity index 100% rename from pandas/io/tests/data/tips.csv rename to pandas/tests/io/data/tips.csv diff --git a/pandas/io/tests/data/valid_markup.html b/pandas/tests/io/data/valid_markup.html similarity index 100% rename from pandas/io/tests/data/valid_markup.html rename to pandas/tests/io/data/valid_markup.html diff --git a/pandas/io/tests/data/wikipedia_states.html b/pandas/tests/io/data/wikipedia_states.html similarity index 100% rename from pandas/io/tests/data/wikipedia_states.html rename to pandas/tests/io/data/wikipedia_states.html diff --git a/pandas/io/tests/generate_legacy_storage_files.py b/pandas/tests/io/generate_legacy_storage_files.py similarity index 100% rename from pandas/io/tests/generate_legacy_storage_files.py rename to pandas/tests/io/generate_legacy_storage_files.py diff --git a/pandas/io/tests/json/__init__.py b/pandas/tests/io/json/__init__.py similarity index 100% rename from pandas/io/tests/json/__init__.py rename to pandas/tests/io/json/__init__.py diff --git a/pandas/io/tests/json/data/tsframe_iso_v012.json b/pandas/tests/io/json/data/tsframe_iso_v012.json similarity index 100% rename from pandas/io/tests/json/data/tsframe_iso_v012.json rename to pandas/tests/io/json/data/tsframe_iso_v012.json diff --git a/pandas/io/tests/json/data/tsframe_v012.json b/pandas/tests/io/json/data/tsframe_v012.json similarity index 100% rename from pandas/io/tests/json/data/tsframe_v012.json rename to pandas/tests/io/json/data/tsframe_v012.json diff --git a/pandas/io/tests/json/test_normalize.py b/pandas/tests/io/json/test_normalize.py similarity index 100% rename from pandas/io/tests/json/test_normalize.py rename to pandas/tests/io/json/test_normalize.py diff --git a/pandas/io/tests/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py similarity index 100% rename from pandas/io/tests/json/test_pandas.py rename to pandas/tests/io/json/test_pandas.py diff --git a/pandas/io/tests/json/test_ujson.py b/pandas/tests/io/json/test_ujson.py similarity index 100% rename from pandas/io/tests/json/test_ujson.py rename to pandas/tests/io/json/test_ujson.py diff --git a/pandas/io/tests/parser/__init__.py b/pandas/tests/io/parser/__init__.py similarity index 100% rename from pandas/io/tests/parser/__init__.py rename to pandas/tests/io/parser/__init__.py diff --git a/pandas/io/tests/parser/c_parser_only.py b/pandas/tests/io/parser/c_parser_only.py similarity index 100% rename from pandas/io/tests/parser/c_parser_only.py rename to pandas/tests/io/parser/c_parser_only.py diff --git a/pandas/io/tests/parser/comment.py b/pandas/tests/io/parser/comment.py similarity index 100% rename from pandas/io/tests/parser/comment.py rename to pandas/tests/io/parser/comment.py diff --git a/pandas/io/tests/parser/common.py b/pandas/tests/io/parser/common.py similarity index 100% rename from pandas/io/tests/parser/common.py rename to pandas/tests/io/parser/common.py diff --git a/pandas/io/tests/parser/compression.py b/pandas/tests/io/parser/compression.py similarity index 100% rename from pandas/io/tests/parser/compression.py rename to pandas/tests/io/parser/compression.py diff --git a/pandas/io/tests/parser/converters.py b/pandas/tests/io/parser/converters.py similarity index 100% rename from pandas/io/tests/parser/converters.py rename to pandas/tests/io/parser/converters.py diff --git a/pandas/io/tests/parser/data/iris.csv b/pandas/tests/io/parser/data/iris.csv similarity index 100% rename from pandas/io/tests/parser/data/iris.csv rename to pandas/tests/io/parser/data/iris.csv diff --git a/pandas/io/tests/parser/data/salaries.csv b/pandas/tests/io/parser/data/salaries.csv similarity index 100% rename from pandas/io/tests/parser/data/salaries.csv rename to pandas/tests/io/parser/data/salaries.csv diff --git a/pandas/io/tests/parser/data/salaries.csv.bz2 b/pandas/tests/io/parser/data/salaries.csv.bz2 similarity index 100% rename from pandas/io/tests/parser/data/salaries.csv.bz2 rename to pandas/tests/io/parser/data/salaries.csv.bz2 diff --git a/pandas/io/tests/parser/data/salaries.csv.gz b/pandas/tests/io/parser/data/salaries.csv.gz similarity index 100% rename from pandas/io/tests/parser/data/salaries.csv.gz rename to pandas/tests/io/parser/data/salaries.csv.gz diff --git a/pandas/io/tests/parser/data/salaries.csv.xz b/pandas/tests/io/parser/data/salaries.csv.xz similarity index 100% rename from pandas/io/tests/parser/data/salaries.csv.xz rename to pandas/tests/io/parser/data/salaries.csv.xz diff --git a/pandas/io/tests/parser/data/salaries.csv.zip b/pandas/tests/io/parser/data/salaries.csv.zip similarity index 100% rename from pandas/io/tests/parser/data/salaries.csv.zip rename to pandas/tests/io/parser/data/salaries.csv.zip diff --git a/pandas/io/tests/parser/data/sauron.SHIFT_JIS.csv b/pandas/tests/io/parser/data/sauron.SHIFT_JIS.csv similarity index 100% rename from pandas/io/tests/parser/data/sauron.SHIFT_JIS.csv rename to pandas/tests/io/parser/data/sauron.SHIFT_JIS.csv diff --git a/pandas/io/tests/parser/data/test1.csv b/pandas/tests/io/parser/data/test1.csv similarity index 100% rename from pandas/io/tests/parser/data/test1.csv rename to pandas/tests/io/parser/data/test1.csv diff --git a/pandas/io/tests/parser/data/test1.csv.bz2 b/pandas/tests/io/parser/data/test1.csv.bz2 similarity index 100% rename from pandas/io/tests/parser/data/test1.csv.bz2 rename to pandas/tests/io/parser/data/test1.csv.bz2 diff --git a/pandas/io/tests/parser/data/test1.csv.gz b/pandas/tests/io/parser/data/test1.csv.gz similarity index 100% rename from pandas/io/tests/parser/data/test1.csv.gz rename to pandas/tests/io/parser/data/test1.csv.gz diff --git a/pandas/io/tests/parser/data/test2.csv b/pandas/tests/io/parser/data/test2.csv similarity index 100% rename from pandas/io/tests/parser/data/test2.csv rename to pandas/tests/io/parser/data/test2.csv diff --git a/pandas/io/tests/parser/data/test_mmap.csv b/pandas/tests/io/parser/data/test_mmap.csv similarity index 100% rename from pandas/io/tests/parser/data/test_mmap.csv rename to pandas/tests/io/parser/data/test_mmap.csv diff --git a/pandas/io/tests/parser/data/tips.csv b/pandas/tests/io/parser/data/tips.csv similarity index 100% rename from pandas/io/tests/parser/data/tips.csv rename to pandas/tests/io/parser/data/tips.csv diff --git a/pandas/io/tests/parser/data/unicode_series.csv b/pandas/tests/io/parser/data/unicode_series.csv similarity index 100% rename from pandas/io/tests/parser/data/unicode_series.csv rename to pandas/tests/io/parser/data/unicode_series.csv diff --git a/pandas/io/tests/parser/data/utf16_ex.txt b/pandas/tests/io/parser/data/utf16_ex.txt similarity index 100% rename from pandas/io/tests/parser/data/utf16_ex.txt rename to pandas/tests/io/parser/data/utf16_ex.txt diff --git a/pandas/io/tests/parser/dialect.py b/pandas/tests/io/parser/dialect.py similarity index 100% rename from pandas/io/tests/parser/dialect.py rename to pandas/tests/io/parser/dialect.py diff --git a/pandas/io/tests/parser/dtypes.py b/pandas/tests/io/parser/dtypes.py similarity index 100% rename from pandas/io/tests/parser/dtypes.py rename to pandas/tests/io/parser/dtypes.py diff --git a/pandas/io/tests/parser/header.py b/pandas/tests/io/parser/header.py similarity index 100% rename from pandas/io/tests/parser/header.py rename to pandas/tests/io/parser/header.py diff --git a/pandas/io/tests/parser/index_col.py b/pandas/tests/io/parser/index_col.py similarity index 100% rename from pandas/io/tests/parser/index_col.py rename to pandas/tests/io/parser/index_col.py diff --git a/pandas/io/tests/parser/multithread.py b/pandas/tests/io/parser/multithread.py similarity index 100% rename from pandas/io/tests/parser/multithread.py rename to pandas/tests/io/parser/multithread.py diff --git a/pandas/io/tests/parser/na_values.py b/pandas/tests/io/parser/na_values.py similarity index 100% rename from pandas/io/tests/parser/na_values.py rename to pandas/tests/io/parser/na_values.py diff --git a/pandas/io/tests/parser/parse_dates.py b/pandas/tests/io/parser/parse_dates.py similarity index 100% rename from pandas/io/tests/parser/parse_dates.py rename to pandas/tests/io/parser/parse_dates.py diff --git a/pandas/io/tests/parser/python_parser_only.py b/pandas/tests/io/parser/python_parser_only.py similarity index 100% rename from pandas/io/tests/parser/python_parser_only.py rename to pandas/tests/io/parser/python_parser_only.py diff --git a/pandas/io/tests/parser/quoting.py b/pandas/tests/io/parser/quoting.py similarity index 100% rename from pandas/io/tests/parser/quoting.py rename to pandas/tests/io/parser/quoting.py diff --git a/pandas/io/tests/parser/skiprows.py b/pandas/tests/io/parser/skiprows.py similarity index 100% rename from pandas/io/tests/parser/skiprows.py rename to pandas/tests/io/parser/skiprows.py diff --git a/pandas/io/tests/parser/test_network.py b/pandas/tests/io/parser/test_network.py similarity index 100% rename from pandas/io/tests/parser/test_network.py rename to pandas/tests/io/parser/test_network.py diff --git a/pandas/io/tests/parser/test_parsers.py b/pandas/tests/io/parser/test_parsers.py similarity index 100% rename from pandas/io/tests/parser/test_parsers.py rename to pandas/tests/io/parser/test_parsers.py diff --git a/pandas/io/tests/parser/test_read_fwf.py b/pandas/tests/io/parser/test_read_fwf.py similarity index 100% rename from pandas/io/tests/parser/test_read_fwf.py rename to pandas/tests/io/parser/test_read_fwf.py diff --git a/pandas/io/tests/parser/test_textreader.py b/pandas/tests/io/parser/test_textreader.py similarity index 100% rename from pandas/io/tests/parser/test_textreader.py rename to pandas/tests/io/parser/test_textreader.py diff --git a/pandas/io/tests/parser/test_unsupported.py b/pandas/tests/io/parser/test_unsupported.py similarity index 100% rename from pandas/io/tests/parser/test_unsupported.py rename to pandas/tests/io/parser/test_unsupported.py diff --git a/pandas/io/tests/parser/usecols.py b/pandas/tests/io/parser/usecols.py similarity index 100% rename from pandas/io/tests/parser/usecols.py rename to pandas/tests/io/parser/usecols.py diff --git a/pandas/io/tests/sas/data/DEMO_G.csv b/pandas/tests/io/sas/data/DEMO_G.csv similarity index 100% rename from pandas/io/tests/sas/data/DEMO_G.csv rename to pandas/tests/io/sas/data/DEMO_G.csv diff --git a/pandas/io/tests/sas/data/DEMO_G.xpt b/pandas/tests/io/sas/data/DEMO_G.xpt similarity index 100% rename from pandas/io/tests/sas/data/DEMO_G.xpt rename to pandas/tests/io/sas/data/DEMO_G.xpt diff --git a/pandas/io/tests/sas/data/DRXFCD_G.csv b/pandas/tests/io/sas/data/DRXFCD_G.csv similarity index 100% rename from pandas/io/tests/sas/data/DRXFCD_G.csv rename to pandas/tests/io/sas/data/DRXFCD_G.csv diff --git a/pandas/io/tests/sas/data/DRXFCD_G.xpt b/pandas/tests/io/sas/data/DRXFCD_G.xpt similarity index 100% rename from pandas/io/tests/sas/data/DRXFCD_G.xpt rename to pandas/tests/io/sas/data/DRXFCD_G.xpt diff --git a/pandas/io/tests/sas/data/SSHSV1_A.csv b/pandas/tests/io/sas/data/SSHSV1_A.csv similarity index 100% rename from pandas/io/tests/sas/data/SSHSV1_A.csv rename to pandas/tests/io/sas/data/SSHSV1_A.csv diff --git a/pandas/io/tests/sas/data/SSHSV1_A.xpt b/pandas/tests/io/sas/data/SSHSV1_A.xpt similarity index 100% rename from pandas/io/tests/sas/data/SSHSV1_A.xpt rename to pandas/tests/io/sas/data/SSHSV1_A.xpt diff --git a/pandas/io/tests/sas/data/airline.csv b/pandas/tests/io/sas/data/airline.csv similarity index 100% rename from pandas/io/tests/sas/data/airline.csv rename to pandas/tests/io/sas/data/airline.csv diff --git a/pandas/io/tests/sas/data/airline.sas7bdat b/pandas/tests/io/sas/data/airline.sas7bdat similarity index 100% rename from pandas/io/tests/sas/data/airline.sas7bdat rename to pandas/tests/io/sas/data/airline.sas7bdat diff --git a/pandas/io/tests/sas/data/paxraw_d_short.csv b/pandas/tests/io/sas/data/paxraw_d_short.csv similarity index 100% rename from pandas/io/tests/sas/data/paxraw_d_short.csv rename to pandas/tests/io/sas/data/paxraw_d_short.csv diff --git a/pandas/io/tests/sas/data/paxraw_d_short.xpt b/pandas/tests/io/sas/data/paxraw_d_short.xpt similarity index 100% rename from pandas/io/tests/sas/data/paxraw_d_short.xpt rename to pandas/tests/io/sas/data/paxraw_d_short.xpt diff --git a/pandas/io/tests/sas/data/productsales.csv b/pandas/tests/io/sas/data/productsales.csv similarity index 100% rename from pandas/io/tests/sas/data/productsales.csv rename to pandas/tests/io/sas/data/productsales.csv diff --git a/pandas/io/tests/sas/data/productsales.sas7bdat b/pandas/tests/io/sas/data/productsales.sas7bdat similarity index 100% rename from pandas/io/tests/sas/data/productsales.sas7bdat rename to pandas/tests/io/sas/data/productsales.sas7bdat diff --git a/pandas/io/tests/sas/data/test1.sas7bdat b/pandas/tests/io/sas/data/test1.sas7bdat similarity index 100% rename from pandas/io/tests/sas/data/test1.sas7bdat rename to pandas/tests/io/sas/data/test1.sas7bdat diff --git a/pandas/io/tests/sas/data/test10.sas7bdat b/pandas/tests/io/sas/data/test10.sas7bdat similarity index 100% rename from pandas/io/tests/sas/data/test10.sas7bdat rename to pandas/tests/io/sas/data/test10.sas7bdat diff --git a/pandas/io/tests/sas/data/test11.sas7bdat b/pandas/tests/io/sas/data/test11.sas7bdat similarity index 100% rename from pandas/io/tests/sas/data/test11.sas7bdat rename to pandas/tests/io/sas/data/test11.sas7bdat diff --git a/pandas/io/tests/sas/data/test12.sas7bdat b/pandas/tests/io/sas/data/test12.sas7bdat similarity index 100% rename from pandas/io/tests/sas/data/test12.sas7bdat rename to pandas/tests/io/sas/data/test12.sas7bdat diff --git a/pandas/io/tests/sas/data/test13.sas7bdat b/pandas/tests/io/sas/data/test13.sas7bdat similarity index 100% rename from pandas/io/tests/sas/data/test13.sas7bdat rename to pandas/tests/io/sas/data/test13.sas7bdat diff --git a/pandas/io/tests/sas/data/test14.sas7bdat b/pandas/tests/io/sas/data/test14.sas7bdat similarity index 100% rename from pandas/io/tests/sas/data/test14.sas7bdat rename to pandas/tests/io/sas/data/test14.sas7bdat diff --git a/pandas/io/tests/sas/data/test15.sas7bdat b/pandas/tests/io/sas/data/test15.sas7bdat similarity index 100% rename from pandas/io/tests/sas/data/test15.sas7bdat rename to pandas/tests/io/sas/data/test15.sas7bdat diff --git a/pandas/io/tests/sas/data/test16.sas7bdat b/pandas/tests/io/sas/data/test16.sas7bdat similarity index 100% rename from pandas/io/tests/sas/data/test16.sas7bdat rename to pandas/tests/io/sas/data/test16.sas7bdat diff --git a/pandas/io/tests/sas/data/test2.sas7bdat b/pandas/tests/io/sas/data/test2.sas7bdat similarity index 100% rename from pandas/io/tests/sas/data/test2.sas7bdat rename to pandas/tests/io/sas/data/test2.sas7bdat diff --git a/pandas/io/tests/sas/data/test3.sas7bdat b/pandas/tests/io/sas/data/test3.sas7bdat similarity index 100% rename from pandas/io/tests/sas/data/test3.sas7bdat rename to pandas/tests/io/sas/data/test3.sas7bdat diff --git a/pandas/io/tests/sas/data/test4.sas7bdat b/pandas/tests/io/sas/data/test4.sas7bdat similarity index 100% rename from pandas/io/tests/sas/data/test4.sas7bdat rename to pandas/tests/io/sas/data/test4.sas7bdat diff --git a/pandas/io/tests/sas/data/test5.sas7bdat b/pandas/tests/io/sas/data/test5.sas7bdat similarity index 100% rename from pandas/io/tests/sas/data/test5.sas7bdat rename to pandas/tests/io/sas/data/test5.sas7bdat diff --git a/pandas/io/tests/sas/data/test6.sas7bdat b/pandas/tests/io/sas/data/test6.sas7bdat similarity index 100% rename from pandas/io/tests/sas/data/test6.sas7bdat rename to pandas/tests/io/sas/data/test6.sas7bdat diff --git a/pandas/io/tests/sas/data/test7.sas7bdat b/pandas/tests/io/sas/data/test7.sas7bdat similarity index 100% rename from pandas/io/tests/sas/data/test7.sas7bdat rename to pandas/tests/io/sas/data/test7.sas7bdat diff --git a/pandas/io/tests/sas/data/test8.sas7bdat b/pandas/tests/io/sas/data/test8.sas7bdat similarity index 100% rename from pandas/io/tests/sas/data/test8.sas7bdat rename to pandas/tests/io/sas/data/test8.sas7bdat diff --git a/pandas/io/tests/sas/data/test9.sas7bdat b/pandas/tests/io/sas/data/test9.sas7bdat similarity index 100% rename from pandas/io/tests/sas/data/test9.sas7bdat rename to pandas/tests/io/sas/data/test9.sas7bdat diff --git a/pandas/io/tests/sas/data/test_12659.csv b/pandas/tests/io/sas/data/test_12659.csv similarity index 100% rename from pandas/io/tests/sas/data/test_12659.csv rename to pandas/tests/io/sas/data/test_12659.csv diff --git a/pandas/io/tests/sas/data/test_12659.sas7bdat b/pandas/tests/io/sas/data/test_12659.sas7bdat similarity index 100% rename from pandas/io/tests/sas/data/test_12659.sas7bdat rename to pandas/tests/io/sas/data/test_12659.sas7bdat diff --git a/pandas/io/tests/sas/data/test_sas7bdat_1.csv b/pandas/tests/io/sas/data/test_sas7bdat_1.csv similarity index 100% rename from pandas/io/tests/sas/data/test_sas7bdat_1.csv rename to pandas/tests/io/sas/data/test_sas7bdat_1.csv diff --git a/pandas/io/tests/sas/data/test_sas7bdat_2.csv b/pandas/tests/io/sas/data/test_sas7bdat_2.csv similarity index 100% rename from pandas/io/tests/sas/data/test_sas7bdat_2.csv rename to pandas/tests/io/sas/data/test_sas7bdat_2.csv diff --git a/pandas/io/tests/sas/test_sas.py b/pandas/tests/io/sas/test_sas.py similarity index 100% rename from pandas/io/tests/sas/test_sas.py rename to pandas/tests/io/sas/test_sas.py diff --git a/pandas/io/tests/sas/test_sas7bdat.py b/pandas/tests/io/sas/test_sas7bdat.py similarity index 100% rename from pandas/io/tests/sas/test_sas7bdat.py rename to pandas/tests/io/sas/test_sas7bdat.py diff --git a/pandas/io/tests/sas/test_xport.py b/pandas/tests/io/sas/test_xport.py similarity index 100% rename from pandas/io/tests/sas/test_xport.py rename to pandas/tests/io/sas/test_xport.py diff --git a/pandas/io/tests/test_clipboard.py b/pandas/tests/io/test_clipboard.py similarity index 100% rename from pandas/io/tests/test_clipboard.py rename to pandas/tests/io/test_clipboard.py diff --git a/pandas/io/tests/test_common.py b/pandas/tests/io/test_common.py similarity index 100% rename from pandas/io/tests/test_common.py rename to pandas/tests/io/test_common.py diff --git a/pandas/io/tests/test_date_converters.py b/pandas/tests/io/test_date_converters.py similarity index 100% rename from pandas/io/tests/test_date_converters.py rename to pandas/tests/io/test_date_converters.py diff --git a/pandas/io/tests/test_excel.py b/pandas/tests/io/test_excel.py similarity index 100% rename from pandas/io/tests/test_excel.py rename to pandas/tests/io/test_excel.py diff --git a/pandas/io/tests/test_feather.py b/pandas/tests/io/test_feather.py similarity index 100% rename from pandas/io/tests/test_feather.py rename to pandas/tests/io/test_feather.py diff --git a/pandas/io/tests/test_gbq.py b/pandas/tests/io/test_gbq.py similarity index 100% rename from pandas/io/tests/test_gbq.py rename to pandas/tests/io/test_gbq.py diff --git a/pandas/io/tests/test_html.py b/pandas/tests/io/test_html.py similarity index 100% rename from pandas/io/tests/test_html.py rename to pandas/tests/io/test_html.py diff --git a/pandas/io/tests/test_packers.py b/pandas/tests/io/test_packers.py similarity index 99% rename from pandas/io/tests/test_packers.py rename to pandas/tests/io/test_packers.py index 4bb6f4a69bab3..911cd8164571d 100644 --- a/pandas/io/tests/test_packers.py +++ b/pandas/tests/io/test_packers.py @@ -795,7 +795,7 @@ class TestMsgpack(): @classmethod def setup_class(cls): - from pandas.io.tests.generate_legacy_storage_files import ( + from pandas.tests.io.generate_legacy_storage_files import ( create_msgpack_data, create_data) cls.data = create_msgpack_data() cls.all_data = create_data() diff --git a/pandas/io/tests/test_pickle.py b/pandas/tests/io/test_pickle.py similarity index 99% rename from pandas/io/tests/test_pickle.py rename to pandas/tests/io/test_pickle.py index 588b2d5f04888..5445c506b050c 100644 --- a/pandas/io/tests/test_pickle.py +++ b/pandas/tests/io/test_pickle.py @@ -33,7 +33,7 @@ class TestPickle(): @classmethod def setup_class(cls): - from pandas.io.tests.generate_legacy_storage_files import ( + from pandas.tests.io.generate_legacy_storage_files import ( create_pickle_data) cls.data = create_pickle_data() cls.path = u('__%s__.pickle' % tm.rands(10)) diff --git a/pandas/io/tests/test_pytables.py b/pandas/tests/io/test_pytables.py similarity index 100% rename from pandas/io/tests/test_pytables.py rename to pandas/tests/io/test_pytables.py diff --git a/pandas/io/tests/test_s3.py b/pandas/tests/io/test_s3.py similarity index 100% rename from pandas/io/tests/test_s3.py rename to pandas/tests/io/test_s3.py diff --git a/pandas/io/tests/test_sql.py b/pandas/tests/io/test_sql.py similarity index 100% rename from pandas/io/tests/test_sql.py rename to pandas/tests/io/test_sql.py diff --git a/pandas/io/tests/test_stata.py b/pandas/tests/io/test_stata.py similarity index 100% rename from pandas/io/tests/test_stata.py rename to pandas/tests/io/test_stata.py diff --git a/pandas/sparse/tests/__init__.py b/pandas/tests/msgpack/__init__.py similarity index 100% rename from pandas/sparse/tests/__init__.py rename to pandas/tests/msgpack/__init__.py diff --git a/pandas/tests/test_msgpack/test_buffer.py b/pandas/tests/msgpack/test_buffer.py similarity index 100% rename from pandas/tests/test_msgpack/test_buffer.py rename to pandas/tests/msgpack/test_buffer.py diff --git a/pandas/tests/test_msgpack/test_case.py b/pandas/tests/msgpack/test_case.py similarity index 100% rename from pandas/tests/test_msgpack/test_case.py rename to pandas/tests/msgpack/test_case.py diff --git a/pandas/tests/test_msgpack/test_except.py b/pandas/tests/msgpack/test_except.py similarity index 100% rename from pandas/tests/test_msgpack/test_except.py rename to pandas/tests/msgpack/test_except.py diff --git a/pandas/tests/test_msgpack/test_extension.py b/pandas/tests/msgpack/test_extension.py similarity index 100% rename from pandas/tests/test_msgpack/test_extension.py rename to pandas/tests/msgpack/test_extension.py diff --git a/pandas/tests/test_msgpack/test_format.py b/pandas/tests/msgpack/test_format.py similarity index 100% rename from pandas/tests/test_msgpack/test_format.py rename to pandas/tests/msgpack/test_format.py diff --git a/pandas/tests/test_msgpack/test_limits.py b/pandas/tests/msgpack/test_limits.py similarity index 100% rename from pandas/tests/test_msgpack/test_limits.py rename to pandas/tests/msgpack/test_limits.py diff --git a/pandas/tests/test_msgpack/test_newspec.py b/pandas/tests/msgpack/test_newspec.py similarity index 100% rename from pandas/tests/test_msgpack/test_newspec.py rename to pandas/tests/msgpack/test_newspec.py diff --git a/pandas/tests/test_msgpack/test_obj.py b/pandas/tests/msgpack/test_obj.py similarity index 100% rename from pandas/tests/test_msgpack/test_obj.py rename to pandas/tests/msgpack/test_obj.py diff --git a/pandas/tests/test_msgpack/test_pack.py b/pandas/tests/msgpack/test_pack.py similarity index 100% rename from pandas/tests/test_msgpack/test_pack.py rename to pandas/tests/msgpack/test_pack.py diff --git a/pandas/tests/test_msgpack/test_read_size.py b/pandas/tests/msgpack/test_read_size.py similarity index 100% rename from pandas/tests/test_msgpack/test_read_size.py rename to pandas/tests/msgpack/test_read_size.py diff --git a/pandas/tests/test_msgpack/test_seq.py b/pandas/tests/msgpack/test_seq.py similarity index 100% rename from pandas/tests/test_msgpack/test_seq.py rename to pandas/tests/msgpack/test_seq.py diff --git a/pandas/tests/test_msgpack/test_sequnpack.py b/pandas/tests/msgpack/test_sequnpack.py similarity index 100% rename from pandas/tests/test_msgpack/test_sequnpack.py rename to pandas/tests/msgpack/test_sequnpack.py diff --git a/pandas/tests/test_msgpack/test_subtype.py b/pandas/tests/msgpack/test_subtype.py similarity index 100% rename from pandas/tests/test_msgpack/test_subtype.py rename to pandas/tests/msgpack/test_subtype.py diff --git a/pandas/tests/test_msgpack/test_unpack.py b/pandas/tests/msgpack/test_unpack.py similarity index 100% rename from pandas/tests/test_msgpack/test_unpack.py rename to pandas/tests/msgpack/test_unpack.py diff --git a/pandas/tests/test_msgpack/test_unpack_raw.py b/pandas/tests/msgpack/test_unpack_raw.py similarity index 100% rename from pandas/tests/test_msgpack/test_unpack_raw.py rename to pandas/tests/msgpack/test_unpack_raw.py diff --git a/pandas/tests/test_msgpack/__init__.py b/pandas/tests/sparse/__init__.py similarity index 100% rename from pandas/tests/test_msgpack/__init__.py rename to pandas/tests/sparse/__init__.py diff --git a/pandas/sparse/tests/test_arithmetics.py b/pandas/tests/sparse/test_arithmetics.py similarity index 100% rename from pandas/sparse/tests/test_arithmetics.py rename to pandas/tests/sparse/test_arithmetics.py diff --git a/pandas/sparse/tests/test_array.py b/pandas/tests/sparse/test_array.py similarity index 100% rename from pandas/sparse/tests/test_array.py rename to pandas/tests/sparse/test_array.py diff --git a/pandas/sparse/tests/test_combine_concat.py b/pandas/tests/sparse/test_combine_concat.py similarity index 100% rename from pandas/sparse/tests/test_combine_concat.py rename to pandas/tests/sparse/test_combine_concat.py diff --git a/pandas/sparse/tests/test_format.py b/pandas/tests/sparse/test_format.py similarity index 100% rename from pandas/sparse/tests/test_format.py rename to pandas/tests/sparse/test_format.py diff --git a/pandas/sparse/tests/test_frame.py b/pandas/tests/sparse/test_frame.py similarity index 100% rename from pandas/sparse/tests/test_frame.py rename to pandas/tests/sparse/test_frame.py diff --git a/pandas/sparse/tests/test_groupby.py b/pandas/tests/sparse/test_groupby.py similarity index 100% rename from pandas/sparse/tests/test_groupby.py rename to pandas/tests/sparse/test_groupby.py diff --git a/pandas/sparse/tests/test_indexing.py b/pandas/tests/sparse/test_indexing.py similarity index 100% rename from pandas/sparse/tests/test_indexing.py rename to pandas/tests/sparse/test_indexing.py diff --git a/pandas/sparse/tests/test_libsparse.py b/pandas/tests/sparse/test_libsparse.py similarity index 100% rename from pandas/sparse/tests/test_libsparse.py rename to pandas/tests/sparse/test_libsparse.py diff --git a/pandas/sparse/tests/test_list.py b/pandas/tests/sparse/test_list.py similarity index 100% rename from pandas/sparse/tests/test_list.py rename to pandas/tests/sparse/test_list.py diff --git a/pandas/sparse/tests/test_pivot.py b/pandas/tests/sparse/test_pivot.py similarity index 100% rename from pandas/sparse/tests/test_pivot.py rename to pandas/tests/sparse/test_pivot.py diff --git a/pandas/sparse/tests/test_series.py b/pandas/tests/sparse/test_series.py similarity index 100% rename from pandas/sparse/tests/test_series.py rename to pandas/tests/sparse/test_series.py diff --git a/pandas/tools/tests/__init__.py b/pandas/tests/tools/__init__.py similarity index 100% rename from pandas/tools/tests/__init__.py rename to pandas/tests/tools/__init__.py diff --git a/pandas/tools/tests/data/allow_exact_matches.csv b/pandas/tests/tools/data/allow_exact_matches.csv similarity index 100% rename from pandas/tools/tests/data/allow_exact_matches.csv rename to pandas/tests/tools/data/allow_exact_matches.csv diff --git a/pandas/tools/tests/data/allow_exact_matches_and_tolerance.csv b/pandas/tests/tools/data/allow_exact_matches_and_tolerance.csv similarity index 100% rename from pandas/tools/tests/data/allow_exact_matches_and_tolerance.csv rename to pandas/tests/tools/data/allow_exact_matches_and_tolerance.csv diff --git a/pandas/tools/tests/data/asof.csv b/pandas/tests/tools/data/asof.csv similarity index 100% rename from pandas/tools/tests/data/asof.csv rename to pandas/tests/tools/data/asof.csv diff --git a/pandas/tools/tests/data/asof2.csv b/pandas/tests/tools/data/asof2.csv similarity index 100% rename from pandas/tools/tests/data/asof2.csv rename to pandas/tests/tools/data/asof2.csv diff --git a/pandas/tools/tests/data/cut_data.csv b/pandas/tests/tools/data/cut_data.csv similarity index 100% rename from pandas/tools/tests/data/cut_data.csv rename to pandas/tests/tools/data/cut_data.csv diff --git a/pandas/tools/tests/data/quotes.csv b/pandas/tests/tools/data/quotes.csv similarity index 100% rename from pandas/tools/tests/data/quotes.csv rename to pandas/tests/tools/data/quotes.csv diff --git a/pandas/tools/tests/data/quotes2.csv b/pandas/tests/tools/data/quotes2.csv similarity index 100% rename from pandas/tools/tests/data/quotes2.csv rename to pandas/tests/tools/data/quotes2.csv diff --git a/pandas/tools/tests/data/tolerance.csv b/pandas/tests/tools/data/tolerance.csv similarity index 100% rename from pandas/tools/tests/data/tolerance.csv rename to pandas/tests/tools/data/tolerance.csv diff --git a/pandas/tools/tests/data/trades.csv b/pandas/tests/tools/data/trades.csv similarity index 100% rename from pandas/tools/tests/data/trades.csv rename to pandas/tests/tools/data/trades.csv diff --git a/pandas/tools/tests/data/trades2.csv b/pandas/tests/tools/data/trades2.csv similarity index 100% rename from pandas/tools/tests/data/trades2.csv rename to pandas/tests/tools/data/trades2.csv diff --git a/pandas/tools/tests/test_concat.py b/pandas/tests/tools/test_concat.py similarity index 100% rename from pandas/tools/tests/test_concat.py rename to pandas/tests/tools/test_concat.py diff --git a/pandas/tools/tests/test_hashing.py b/pandas/tests/tools/test_hashing.py similarity index 100% rename from pandas/tools/tests/test_hashing.py rename to pandas/tests/tools/test_hashing.py diff --git a/pandas/tools/tests/test_join.py b/pandas/tests/tools/test_join.py similarity index 99% rename from pandas/tools/tests/test_join.py rename to pandas/tests/tools/test_join.py index fe5821a637205..ab42b1212301b 100644 --- a/pandas/tools/tests/test_join.py +++ b/pandas/tests/tools/test_join.py @@ -11,7 +11,7 @@ import pandas._join as _join import pandas.util.testing as tm -from pandas.tools.tests.test_merge import get_test_data, N, NGROUPS +from pandas.tests.tools.test_merge import get_test_data, N, NGROUPS a_ = np.array diff --git a/pandas/tools/tests/test_merge.py b/pandas/tests/tools/test_merge.py similarity index 100% rename from pandas/tools/tests/test_merge.py rename to pandas/tests/tools/test_merge.py diff --git a/pandas/tools/tests/test_merge_asof.py b/pandas/tests/tools/test_merge_asof.py similarity index 100% rename from pandas/tools/tests/test_merge_asof.py rename to pandas/tests/tools/test_merge_asof.py diff --git a/pandas/tools/tests/test_merge_ordered.py b/pandas/tests/tools/test_merge_ordered.py similarity index 100% rename from pandas/tools/tests/test_merge_ordered.py rename to pandas/tests/tools/test_merge_ordered.py diff --git a/pandas/tools/tests/test_pivot.py b/pandas/tests/tools/test_pivot.py similarity index 100% rename from pandas/tools/tests/test_pivot.py rename to pandas/tests/tools/test_pivot.py diff --git a/pandas/tools/tests/test_tile.py b/pandas/tests/tools/test_tile.py similarity index 100% rename from pandas/tools/tests/test_tile.py rename to pandas/tests/tools/test_tile.py diff --git a/pandas/tools/tests/test_util.py b/pandas/tests/tools/test_util.py similarity index 100% rename from pandas/tools/tests/test_util.py rename to pandas/tests/tools/test_util.py diff --git a/setup.py b/setup.py index edec53e9cefb0..cbcadce459c67 100755 --- a/setup.py +++ b/setup.py @@ -622,12 +622,10 @@ def pxd(name): version=versioneer.get_version(), packages=['pandas', 'pandas.api', - 'pandas.api.tests', 'pandas.api.types', 'pandas.compat', 'pandas.compat.numpy', 'pandas.computation', - 'pandas.computation.tests', 'pandas.core', 'pandas.indexes', 'pandas.io', @@ -635,59 +633,61 @@ def pxd(name): 'pandas.io.sas', 'pandas.formats', 'pandas.sparse', - 'pandas.sparse.tests', 'pandas.stats', 'pandas.util', 'pandas.tests', + 'pandas.tests.api', + 'pandas.tests.computation', 'pandas.tests.frame', 'pandas.tests.indexes', 'pandas.tests.indexes.datetimes', 'pandas.tests.indexes.timedeltas', 'pandas.tests.indexes.period', + 'pandas.tests.io', + 'pandas.tests.io.json', + 'pandas.tests.io.parser', + 'pandas.tests.io.sas', 'pandas.tests.groupby', 'pandas.tests.series', 'pandas.tests.formats', + 'pandas.tests.msgpack', 'pandas.tests.scalar', + 'pandas.tests.sparse', 'pandas.tests.tseries', + 'pandas.tests.tools', 'pandas.tests.types', - 'pandas.tests.test_msgpack', 'pandas.tests.plotting', 'pandas.tools', - 'pandas.tools.tests', 'pandas.tseries', 'pandas.types', - 'pandas.io.tests', - 'pandas.io.tests.json', - 'pandas.io.tests.parser', - 'pandas.io.tests.sas', 'pandas.msgpack', 'pandas.util.clipboard' ], - package_data={'pandas.io': ['tests/data/legacy_hdf/*.h5', - 'tests/data/legacy_pickle/*/*.pickle', - 'tests/data/legacy_msgpack/*/*.msgpack', - 'tests/data/*.csv*', - 'tests/data/*.dta', - 'tests/data/*.pickle', - 'tests/data/*.txt', - 'tests/data/*.xls', - 'tests/data/*.xlsx', - 'tests/data/*.xlsm', - 'tests/data/*.table', - 'tests/parser/data/*.csv', - 'tests/parser/data/*.gz', - 'tests/parser/data/*.bz2', - 'tests/parser/data/*.txt', - 'tests/sas/data/*.csv', - 'tests/sas/data/*.xpt', - 'tests/sas/data/*.sas7bdat', - 'tests/data/*.html', - 'tests/data/html_encoding/*.html', - 'tests/json/data/*.json'], - 'pandas.tools': ['tests/data/*.csv'], - 'pandas.tests': ['data/*.csv'], + package_data={'pandas.tests': ['data/*.csv'], 'pandas.tests.formats': ['data/*.csv'], 'pandas.tests.indexes': ['data/*.pickle'], + 'pandas.tests.io': ['data/legacy_hdf/*.h5', + 'data/legacy_pickle/*/*.pickle', + 'data/legacy_msgpack/*/*.msgpack', + 'data/*.csv*', + 'data/*.dta', + 'data/*.pickle', + 'data/*.txt', + 'data/*.xls', + 'data/*.xlsx', + 'data/*.xlsm', + 'data/*.table', + 'parser/data/*.csv', + 'parser/data/*.gz', + 'parser/data/*.bz2', + 'parser/data/*.txt', + 'sas/data/*.csv', + 'sas/data/*.xpt', + 'sas/data/*.sas7bdat', + 'data/*.html', + 'data/html_encoding/*.html', + 'json/data/*.json'], + 'pandas.tests.tools': ['data/*.csv'], 'pandas.tests.tseries': ['data/*.pickle'] }, ext_modules=extensions, From 1bcc10da51c61886362d9d4d4eeafe604ab288ea Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sun, 12 Feb 2017 10:09:27 -0500 Subject: [PATCH 044/933] TST: fix locations for github based url tests --- pandas/tests/io/parser/common.py | 2 +- pandas/tests/io/test_excel.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/io/parser/common.py b/pandas/tests/io/parser/common.py index 0671901fc170a..b667eed346355 100644 --- a/pandas/tests/io/parser/common.py +++ b/pandas/tests/io/parser/common.py @@ -617,7 +617,7 @@ def test_read_csv_parse_simple_list(self): def test_url(self): # HTTP(S) url = ('https://raw.github.com/pandas-dev/pandas/master/' - 'pandas/io/tests/parser/data/salaries.csv') + 'pandas/tests/io/parser/data/salaries.csv') url_table = self.read_table(url) dirpath = tm.get_data_path() localtable = os.path.join(dirpath, 'salaries.csv') diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py index a22c89184f20d..0c2b443cffe52 100644 --- a/pandas/tests/io/test_excel.py +++ b/pandas/tests/io/test_excel.py @@ -581,7 +581,7 @@ def test_read_xlrd_Book(self): @tm.network def test_read_from_http_url(self): url = ('https://raw.github.com/pandas-dev/pandas/master/' - 'pandas/io/tests/data/test1' + self.ext) + 'pandas/tests/io/data/test1' + self.ext) url_table = read_excel(url) local_table = self.get_exceldf('test1') tm.assert_frame_equal(url_table, local_table) From f87db63d821f9b7bc347c3ed8e0f452859843081 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sun, 12 Feb 2017 10:31:47 -0500 Subject: [PATCH 045/933] DOC: fix path in whatsnew --- doc/source/whatsnew/v0.20.0.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 9f86c777c665d..aa620bce0df59 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -91,7 +91,7 @@ support for bz2 compression in the python 2 c-engine improved (:issue:`14874`). url = 'https://github.com/{repo}/raw/{branch}/{path}'.format( repo = 'pandas-dev/pandas', branch = 'master', - path = 'pandas/io/tests/parser/data/salaries.csv.bz2', + path = 'pandas/tests/io/parser/data/salaries.csv.bz2', ) df = pd.read_table(url, compression='infer') # default, infer compression df = pd.read_table(url, compression='bz2') # explicitly specify compression From 1190ac6e19a431a596980c766ec1a3405a7d554a Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sat, 11 Feb 2017 16:17:27 -0500 Subject: [PATCH 046/933] TST: use xdist for multiple cpu testing closes #15369 --- .travis.yml | 3 +- ci/script_multi.sh | 32 +++ ci/{script.sh => script_single.sh} | 10 +- pandas/tests/indexes/datetimes/test_ops.py | 244 +++++++++++---------- pandas/tests/io/test_clipboard.py | 1 + pandas/tests/io/test_pytables.py | 7 +- pandas/tests/io/test_sql.py | 19 +- pandas/tests/test_window.py | 83 ++++--- setup.cfg | 2 + test_fast.sh | 3 +- 10 files changed, 223 insertions(+), 181 deletions(-) create mode 100755 ci/script_multi.sh rename ci/{script.sh => script_single.sh} (63%) diff --git a/.travis.yml b/.travis.yml index 2ff5d508d0371..6b90e49b336b2 100644 --- a/.travis.yml +++ b/.travis.yml @@ -320,7 +320,8 @@ before_script: script: - echo "script start" - ci/run_build_docs.sh - - ci/script.sh + - ci/script_single.sh + - ci/script_multi.sh - ci/lint.sh - echo "script done" diff --git a/ci/script_multi.sh b/ci/script_multi.sh new file mode 100755 index 0000000000000..83f8427cc57ad --- /dev/null +++ b/ci/script_multi.sh @@ -0,0 +1,32 @@ +#!/bin/bash + +echo "[script multi]" + +source activate pandas + +# don't run the tests for the doc build +if [ x"$DOC_BUILD" != x"" ]; then + exit 0 +fi + +if [ -n "$LOCALE_OVERRIDE" ]; then + export LC_ALL="$LOCALE_OVERRIDE"; + echo "Setting LC_ALL to $LOCALE_OVERRIDE" + + pycmd='import pandas; print("pandas detected console encoding: %s" % pandas.get_option("display.encoding"))' + python -c "$pycmd" +fi + +if [ "$BUILD_TEST" ]; then + echo "We are not running pytest as this is simply a build test." +elif [ "$COVERAGE" ]; then + echo pytest -s -n 2 -m "not single" --cov=pandas --cov-append --cov-report xml:/tmp/pytest.xml $TEST_ARGS pandas + pytest -s -n 2 -m "not single" --cov=pandas --cov-append --cov-report xml:/tmp/pytest.xml $TEST_ARGS pandas +else + echo pytest -n 2 -m "not single" $TEST_ARGS pandas + pytest -n 2 -m "not single" $TEST_ARGS pandas # TODO: doctest +fi + +RET="$?" + +exit "$RET" diff --git a/ci/script.sh b/ci/script_single.sh similarity index 63% rename from ci/script.sh rename to ci/script_single.sh index c52fa0fdb33a3..38021fcac5721 100755 --- a/ci/script.sh +++ b/ci/script_single.sh @@ -1,6 +1,6 @@ #!/bin/bash -echo "inside $0" +echo "[script_single]" source activate pandas @@ -20,11 +20,11 @@ fi if [ "$BUILD_TEST" ]; then echo "We are not running pytest as this is simply a build test." elif [ "$COVERAGE" ]; then - echo pytest -s --cov=pandas --cov-report xml:/tmp/pytest.xml $TEST_ARGS pandas - pytest -s --cov=pandas --cov-report xml:/tmp/pytest.xml $TEST_ARGS pandas + echo pytest -s -m "single" --cov=pandas --cov-report xml:/tmp/pytest.xml $TEST_ARGS pandas + pytest -s -m "single" --cov=pandas --cov-report xml:/tmp/pytest.xml $TEST_ARGS pandas else - echo pytest $TEST_ARGS pandas - pytest $TEST_ARGS pandas # TODO: doctest + echo pytest -m "single" $TEST_ARGS pandas + pytest -m "single" $TEST_ARGS pandas # TODO: doctest fi RET="$?" diff --git a/pandas/tests/indexes/datetimes/test_ops.py b/pandas/tests/indexes/datetimes/test_ops.py index 9a968a42c4247..8eb9128d8d1c8 100644 --- a/pandas/tests/indexes/datetimes/test_ops.py +++ b/pandas/tests/indexes/datetimes/test_ops.py @@ -1,7 +1,9 @@ +import pytest import warnings import numpy as np from datetime import timedelta +from itertools import product import pandas as pd import pandas.tslib as tslib import pandas.util.testing as tm @@ -958,134 +960,134 @@ def test_second(self): tm.assert_index_equal(r1, r2) -class TestDatetimeIndex(tm.TestCase): - - # GH 10699 - def test_datetime64_with_DateOffset(self): - for klass, assert_func in zip([Series, DatetimeIndex], - [self.assert_series_equal, - tm.assert_index_equal]): - s = klass(date_range('2000-01-01', '2000-01-31'), name='a') - result = s + pd.DateOffset(years=1) - result2 = pd.DateOffset(years=1) + s - exp = klass(date_range('2001-01-01', '2001-01-31'), name='a') +# GH 10699 +@pytest.mark.parametrize('klass,assert_func', zip([Series, DatetimeIndex], + [tm.assert_series_equal, + tm.assert_index_equal])) +def test_datetime64_with_DateOffset(klass, assert_func): + s = klass(date_range('2000-01-01', '2000-01-31'), name='a') + result = s + pd.DateOffset(years=1) + result2 = pd.DateOffset(years=1) + s + exp = klass(date_range('2001-01-01', '2001-01-31'), name='a') + assert_func(result, exp) + assert_func(result2, exp) + + result = s - pd.DateOffset(years=1) + exp = klass(date_range('1999-01-01', '1999-01-31'), name='a') + assert_func(result, exp) + + s = klass([Timestamp('2000-01-15 00:15:00', tz='US/Central'), + pd.Timestamp('2000-02-15', tz='US/Central')], name='a') + result = s + pd.offsets.Day() + result2 = pd.offsets.Day() + s + exp = klass([Timestamp('2000-01-16 00:15:00', tz='US/Central'), + Timestamp('2000-02-16', tz='US/Central')], name='a') + assert_func(result, exp) + assert_func(result2, exp) + + s = klass([Timestamp('2000-01-15 00:15:00', tz='US/Central'), + pd.Timestamp('2000-02-15', tz='US/Central')], name='a') + result = s + pd.offsets.MonthEnd() + result2 = pd.offsets.MonthEnd() + s + exp = klass([Timestamp('2000-01-31 00:15:00', tz='US/Central'), + Timestamp('2000-02-29', tz='US/Central')], name='a') + assert_func(result, exp) + assert_func(result2, exp) + + # array of offsets - valid for Series only + if klass is Series: + with tm.assert_produces_warning(PerformanceWarning): + s = klass([Timestamp('2000-1-1'), Timestamp('2000-2-1')]) + result = s + Series([pd.offsets.DateOffset(years=1), + pd.offsets.MonthEnd()]) + exp = klass([Timestamp('2001-1-1'), Timestamp('2000-2-29') + ]) assert_func(result, exp) - assert_func(result2, exp) - result = s - pd.DateOffset(years=1) - exp = klass(date_range('1999-01-01', '1999-01-31'), name='a') + # same offset + result = s + Series([pd.offsets.DateOffset(years=1), + pd.offsets.DateOffset(years=1)]) + exp = klass([Timestamp('2001-1-1'), Timestamp('2001-2-1')]) assert_func(result, exp) - s = klass([Timestamp('2000-01-15 00:15:00', tz='US/Central'), - pd.Timestamp('2000-02-15', tz='US/Central')], name='a') - result = s + pd.offsets.Day() - result2 = pd.offsets.Day() + s - exp = klass([Timestamp('2000-01-16 00:15:00', tz='US/Central'), - Timestamp('2000-02-16', tz='US/Central')], name='a') - assert_func(result, exp) - assert_func(result2, exp) - - s = klass([Timestamp('2000-01-15 00:15:00', tz='US/Central'), - pd.Timestamp('2000-02-15', tz='US/Central')], name='a') - result = s + pd.offsets.MonthEnd() - result2 = pd.offsets.MonthEnd() + s - exp = klass([Timestamp('2000-01-31 00:15:00', tz='US/Central'), - Timestamp('2000-02-29', tz='US/Central')], name='a') - assert_func(result, exp) - assert_func(result2, exp) - - # array of offsets - valid for Series only - if klass is Series: - with tm.assert_produces_warning(PerformanceWarning): - s = klass([Timestamp('2000-1-1'), Timestamp('2000-2-1')]) - result = s + Series([pd.offsets.DateOffset(years=1), - pd.offsets.MonthEnd()]) - exp = klass([Timestamp('2001-1-1'), Timestamp('2000-2-29') - ]) - assert_func(result, exp) - - # same offset - result = s + Series([pd.offsets.DateOffset(years=1), - pd.offsets.DateOffset(years=1)]) - exp = klass([Timestamp('2001-1-1'), Timestamp('2001-2-1')]) - assert_func(result, exp) - - s = klass([Timestamp('2000-01-05 00:15:00'), + s = klass([Timestamp('2000-01-05 00:15:00'), + Timestamp('2000-01-31 00:23:00'), + Timestamp('2000-01-01'), + Timestamp('2000-03-31'), + Timestamp('2000-02-29'), + Timestamp('2000-12-31'), + Timestamp('2000-05-15'), + Timestamp('2001-06-15')]) + + # DateOffset relativedelta fastpath + relative_kwargs = [('years', 2), ('months', 5), ('days', 3), + ('hours', 5), ('minutes', 10), ('seconds', 2), + ('microseconds', 5)] + for i, kwd in enumerate(relative_kwargs): + op = pd.DateOffset(**dict([kwd])) + assert_func(klass([x + op for x in s]), s + op) + assert_func(klass([x - op for x in s]), s - op) + op = pd.DateOffset(**dict(relative_kwargs[:i + 1])) + assert_func(klass([x + op for x in s]), s + op) + assert_func(klass([x - op for x in s]), s - op) + + # assert these are equal on a piecewise basis + offsets = ['YearBegin', ('YearBegin', {'month': 5}), + 'YearEnd', ('YearEnd', {'month': 5}), + 'MonthBegin', 'MonthEnd', + 'SemiMonthEnd', 'SemiMonthBegin', + 'Week', ('Week', {'weekday': 3}), + 'BusinessDay', 'BDay', 'QuarterEnd', 'QuarterBegin', + 'CustomBusinessDay', 'CDay', 'CBMonthEnd', + 'CBMonthBegin', 'BMonthBegin', 'BMonthEnd', + 'BusinessHour', 'BYearBegin', 'BYearEnd', + 'BQuarterBegin', ('LastWeekOfMonth', {'weekday': 2}), + ('FY5253Quarter', {'qtr_with_extra_week': 1, + 'startingMonth': 1, + 'weekday': 2, + 'variation': 'nearest'}), + ('FY5253', {'weekday': 0, + 'startingMonth': 2, + 'variation': + 'nearest'}), + ('WeekOfMonth', {'weekday': 2, + 'week': 2}), + 'Easter', ('DateOffset', {'day': 4}), + ('DateOffset', {'month': 5})] + + with warnings.catch_warnings(record=True): + for normalize in (True, False): + for do in offsets: + if isinstance(do, tuple): + do, kwargs = do + else: + do = do + kwargs = {} + + for n in [0, 5]: + if (do in ['WeekOfMonth', 'LastWeekOfMonth', + 'FY5253Quarter', 'FY5253'] and n == 0): + continue + op = getattr(pd.offsets, do)(n, + normalize=normalize, + **kwargs) + assert_func(klass([x + op for x in s]), s + op) + assert_func(klass([x - op for x in s]), s - op) + assert_func(klass([op + x for x in s]), op + s) + + +@pytest.mark.parametrize('years,months', product([-1, 0, 1], [-2, 0, 2])) +def test_shift_months(years, months): + s = DatetimeIndex([Timestamp('2000-01-05 00:15:00'), Timestamp('2000-01-31 00:23:00'), Timestamp('2000-01-01'), - Timestamp('2000-03-31'), Timestamp('2000-02-29'), - Timestamp('2000-12-31'), - Timestamp('2000-05-15'), - Timestamp('2001-06-15')]) - - # DateOffset relativedelta fastpath - relative_kwargs = [('years', 2), ('months', 5), ('days', 3), - ('hours', 5), ('minutes', 10), ('seconds', 2), - ('microseconds', 5)] - for i, kwd in enumerate(relative_kwargs): - op = pd.DateOffset(**dict([kwd])) - assert_func(klass([x + op for x in s]), s + op) - assert_func(klass([x - op for x in s]), s - op) - op = pd.DateOffset(**dict(relative_kwargs[:i + 1])) - assert_func(klass([x + op for x in s]), s + op) - assert_func(klass([x - op for x in s]), s - op) - - # assert these are equal on a piecewise basis - offsets = ['YearBegin', ('YearBegin', {'month': 5}), 'YearEnd', - ('YearEnd', {'month': 5}), 'MonthBegin', 'MonthEnd', - 'SemiMonthEnd', 'SemiMonthBegin', - 'Week', ('Week', { - 'weekday': 3 - }), 'BusinessDay', 'BDay', 'QuarterEnd', 'QuarterBegin', - 'CustomBusinessDay', 'CDay', 'CBMonthEnd', - 'CBMonthBegin', 'BMonthBegin', 'BMonthEnd', - 'BusinessHour', 'BYearBegin', 'BYearEnd', - 'BQuarterBegin', ('LastWeekOfMonth', { - 'weekday': 2 - }), ('FY5253Quarter', {'qtr_with_extra_week': 1, - 'startingMonth': 1, - 'weekday': 2, - 'variation': 'nearest'}), - ('FY5253', {'weekday': 0, - 'startingMonth': 2, - 'variation': - 'nearest'}), ('WeekOfMonth', {'weekday': 2, - 'week': 2}), - 'Easter', ('DateOffset', {'day': 4}), - ('DateOffset', {'month': 5})] - - with warnings.catch_warnings(record=True): - for normalize in (True, False): - for do in offsets: - if isinstance(do, tuple): - do, kwargs = do - else: - do = do - kwargs = {} - - for n in [0, 5]: - if (do in ['WeekOfMonth', 'LastWeekOfMonth', - 'FY5253Quarter', 'FY5253'] and n == 0): - continue - op = getattr(pd.offsets, do)(n, - normalize=normalize, - **kwargs) - assert_func(klass([x + op for x in s]), s + op) - assert_func(klass([x - op for x in s]), s - op) - assert_func(klass([op + x for x in s]), op + s) - - def test_shift_months(self): - s = DatetimeIndex([Timestamp('2000-01-05 00:15:00'), Timestamp( - '2000-01-31 00:23:00'), Timestamp('2000-01-01'), Timestamp( - '2000-02-29'), Timestamp('2000-12-31')]) - for years in [-1, 0, 1]: - for months in [-2, 0, 2]: - actual = DatetimeIndex(tslib.shift_months(s.asi8, years * 12 + - months)) - expected = DatetimeIndex([x + offsets.DateOffset( - years=years, months=months) for x in s]) - tm.assert_index_equal(actual, expected) + Timestamp('2000-12-31')]) + actual = DatetimeIndex(tslib.shift_months(s.asi8, years * 12 + + months)) + expected = DatetimeIndex([x + offsets.DateOffset( + years=years, months=months) for x in s]) + tm.assert_index_equal(actual, expected) class TestBusinessDatetimeIndex(tm.TestCase): diff --git a/pandas/tests/io/test_clipboard.py b/pandas/tests/io/test_clipboard.py index 3abd1093362f4..2e701143357e3 100644 --- a/pandas/tests/io/test_clipboard.py +++ b/pandas/tests/io/test_clipboard.py @@ -20,6 +20,7 @@ _DEPS_INSTALLED = 0 +@pytest.mark.single @pytest.mark.skipif(not _DEPS_INSTALLED, reason="clipboard primitives not installed") class TestClipboard(tm.TestCase): diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index 3fa0eb2ef52dc..a840ff46aa845 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -36,12 +36,6 @@ from pandas import concat, Timestamp from pandas import compat from pandas.compat import range, lrange, u - -try: - import tables -except ImportError: - pytest.skip('no pytables') - from distutils.version import LooseVersion _default_compressor = ('blosc' if LooseVersion(tables.__version__) >= '2.2' @@ -165,6 +159,7 @@ def tearDown(self): pass +@pytest.mark.single class TestHDFStore(Base, tm.TestCase): def test_factory_fun(self): diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index a6f4d96001021..78560611da7aa 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -18,13 +18,13 @@ """ from __future__ import print_function +import pytest import unittest import sqlite3 import csv import os import sys -import pytest import warnings import numpy as np import pandas as pd @@ -839,6 +839,7 @@ def test_unicode_column_name(self): df.to_sql('test_unicode', self.conn, index=False) +@pytest.mark.single class TestSQLApi(SQLAlchemyMixIn, _TestSQLApi, unittest.TestCase): """ Test the public API as it would be used directly @@ -1024,10 +1025,12 @@ def tearDown(self): super(_EngineToConnMixin, self).tearDown() +@pytest.mark.single class TestSQLApiConn(_EngineToConnMixin, TestSQLApi, unittest.TestCase): pass +@pytest.mark.single class TestSQLiteFallbackApi(SQLiteMixIn, _TestSQLApi, unittest.TestCase): """ Test the public sqlite connection fallback API @@ -1875,30 +1878,36 @@ def test_schema_support(self): tm.assert_frame_equal(res1, res2) +@pytest.mark.single class TestMySQLAlchemy(_TestMySQLAlchemy, _TestSQLAlchemy, unittest.TestCase): pass +@pytest.mark.single class TestMySQLAlchemyConn(_TestMySQLAlchemy, _TestSQLAlchemyConn, unittest.TestCase): pass +@pytest.mark.single class TestPostgreSQLAlchemy(_TestPostgreSQLAlchemy, _TestSQLAlchemy, unittest.TestCase): pass +@pytest.mark.single class TestPostgreSQLAlchemyConn(_TestPostgreSQLAlchemy, _TestSQLAlchemyConn, unittest.TestCase): pass +@pytest.mark.single class TestSQLiteAlchemy(_TestSQLiteAlchemy, _TestSQLAlchemy, unittest.TestCase): pass +@pytest.mark.single class TestSQLiteAlchemyConn(_TestSQLiteAlchemy, _TestSQLAlchemyConn, unittest.TestCase): pass @@ -1907,6 +1916,7 @@ class TestSQLiteAlchemyConn(_TestSQLiteAlchemy, _TestSQLAlchemyConn, # ----------------------------------------------------------------------------- # -- Test Sqlite / MySQL fallback +@pytest.mark.single class TestSQLiteFallback(SQLiteMixIn, PandasSQLTest, unittest.TestCase): """ Test the fallback mode against an in-memory sqlite database. @@ -2133,6 +2143,7 @@ def _skip_if_no_pymysql(): pytest.skip('pymysql not installed, skipping') +@pytest.mark.single class TestXSQLite(SQLiteMixIn, tm.TestCase): def setUp(self): @@ -2343,6 +2354,7 @@ def clean_up(test_table_to_drop): clean_up(table_name) +@pytest.mark.single class TestSQLFlavorDeprecation(tm.TestCase): """ gh-13611: test that the 'flavor' parameter @@ -2367,8 +2379,9 @@ def test_deprecated_flavor(self): getattr(sql, func)(self.con, flavor='sqlite') -@unittest.skip("gh-13611: there is no support for MySQL " - "if SQLAlchemy is not installed") +@pytest.mark.single +@pytest.mark.skip(reason="gh-13611: there is no support for MySQL " + "if SQLAlchemy is not installed") class TestXMySQL(MySQLMixIn, tm.TestCase): @classmethod diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index 3add568c1ea99..1bb1f91423a9d 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -2,6 +2,7 @@ import pytest import sys import warnings +from warnings import catch_warnings from datetime import datetime from numpy.random import randn @@ -291,8 +292,7 @@ def test_how_compat(self): for op in ['mean', 'sum', 'std', 'var', 'kurt', 'skew']: for t in ['rolling', 'expanding']: - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): + with catch_warnings(record=True): dfunc = getattr(pd, "{0}_{1}".format(t, op)) if dfunc is None: @@ -526,7 +526,7 @@ def setUp(self): def test_deprecations(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): mom.rolling_mean(np.ones(10), 3, center=True, axis=0) mom.rolling_mean(Series(np.ones(10)), 3, center=True, axis=0) @@ -791,7 +791,7 @@ def test_cmov_mean(self): xp = np.array([np.nan, np.nan, 9.962, 11.27, 11.564, 12.516, 12.818, 12.952, np.nan, np.nan]) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): rs = mom.rolling_mean(vals, 5, center=True) tm.assert_almost_equal(xp, rs) @@ -808,7 +808,7 @@ def test_cmov_window(self): xp = np.array([np.nan, np.nan, 9.962, 11.27, 11.564, 12.516, 12.818, 12.952, np.nan, np.nan]) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): rs = mom.rolling_window(vals, 5, 'boxcar', center=True) tm.assert_almost_equal(xp, rs) @@ -823,19 +823,19 @@ def test_cmov_window_corner(self): # all nan vals = np.empty(10, dtype=float) vals.fill(np.nan) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): rs = mom.rolling_window(vals, 5, 'boxcar', center=True) self.assertTrue(np.isnan(rs).all()) # empty vals = np.array([]) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): rs = mom.rolling_window(vals, 5, 'boxcar', center=True) self.assertEqual(len(rs), 0) # shorter than window vals = np.random.randn(5) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): rs = mom.rolling_window(vals, 10, 'boxcar') self.assertTrue(np.isnan(rs).all()) self.assertEqual(len(rs), 5) @@ -1014,16 +1014,16 @@ def test_cmov_window_special_linear_range(self): tm.assert_series_equal(xp, rs) def test_rolling_median(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): self._check_moment_func(mom.rolling_median, np.median, name='median') def test_rolling_min(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): self._check_moment_func(mom.rolling_min, np.min, name='min') - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): a = np.array([1, 2, 3, 4, 5]) b = mom.rolling_min(a, window=100, min_periods=1) tm.assert_almost_equal(b, np.ones(len(a))) @@ -1033,10 +1033,10 @@ def test_rolling_min(self): def test_rolling_max(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): self._check_moment_func(mom.rolling_max, np.max, name='max') - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): a = np.array([1, 2, 3, 4, 5], dtype=np.float64) b = mom.rolling_max(a, window=100, min_periods=1) tm.assert_almost_equal(a, b) @@ -1102,11 +1102,11 @@ def test_rolling_apply_out_of_bounds(self): arr = np.arange(4) # it works! - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): result = mom.rolling_apply(arr, 10, np.sum) self.assertTrue(isnull(result).all()) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): result = mom.rolling_apply(arr, 10, np.sum, min_periods=1) tm.assert_almost_equal(result, result) @@ -1117,19 +1117,19 @@ def test_rolling_std(self): name='std', ddof=0) def test_rolling_std_1obs(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): result = mom.rolling_std(np.array([1., 2., 3., 4., 5.]), 1, min_periods=1) expected = np.array([np.nan] * 5) tm.assert_almost_equal(result, expected) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): result = mom.rolling_std(np.array([1., 2., 3., 4., 5.]), 1, min_periods=1, ddof=0) expected = np.zeros(5) tm.assert_almost_equal(result, expected) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): result = mom.rolling_std(np.array([np.nan, np.nan, 3., 4., 5.]), 3, min_periods=2) self.assertTrue(np.isnan(result[2])) @@ -1142,11 +1142,11 @@ def test_rolling_std_neg_sqrt(self): a = np.array([0.0011448196318903589, 0.00028718669878572767, 0.00028718669878572767, 0.00028718669878572767, 0.00028718669878572767]) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): b = mom.rolling_std(a, window=3) self.assertTrue(np.isfinite(b[2:]).all()) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): b = mom.ewmstd(a, span=3) self.assertTrue(np.isfinite(b[2:]).all()) @@ -1184,25 +1184,25 @@ def test_fperr_robustness(self): if sys.byteorder != "little": arr = arr.byteswap().newbyteorder() - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): result = mom.rolling_sum(arr, 2) self.assertTrue((result[1:] >= 0).all()) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): result = mom.rolling_mean(arr, 2) self.assertTrue((result[1:] >= 0).all()) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): result = mom.rolling_var(arr, 2) self.assertTrue((result[1:] >= 0).all()) # #2527, ugh arr = np.array([0.00012456, 0.0003, 0]) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): result = mom.rolling_mean(arr, 1) self.assertTrue(result[-1] >= 0) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): result = mom.rolling_mean(-arr, 1) self.assertTrue(result[-1] <= 0) @@ -1327,15 +1327,13 @@ def get_result(obj, window, min_periods=None, freq=None, center=False): # catch a freq deprecation warning if freq is provided and not # None - w = FutureWarning if freq is not None else None - with tm.assert_produces_warning(w, check_stacklevel=False): + with catch_warnings(record=True): r = obj.rolling(window=window, min_periods=min_periods, freq=freq, center=center) return getattr(r, name)(**kwargs) # check via the moments API - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): + with catch_warnings(record=True): return f(obj, window=window, min_periods=min_periods, freq=freq, center=center, **kwargs) @@ -1419,7 +1417,7 @@ def test_ewma(self): arr = np.zeros(1000) arr[5] = 1 - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): result = mom.ewma(arr, span=100, adjust=False).sum() self.assertTrue(np.abs(result - 1) < 1e-2) @@ -1506,7 +1504,7 @@ def test_ewmvol(self): self._check_ew(mom.ewmvol, name='vol') def test_ewma_span_com_args(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): A = mom.ewma(self.arr, com=9.5) B = mom.ewma(self.arr, span=20) tm.assert_almost_equal(A, B) @@ -1515,7 +1513,7 @@ def test_ewma_span_com_args(self): self.assertRaises(ValueError, mom.ewma, self.arr) def test_ewma_halflife_arg(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): A = mom.ewma(self.arr, com=13.932726172912965) B = mom.ewma(self.arr, halflife=10.0) tm.assert_almost_equal(A, B) @@ -1530,7 +1528,7 @@ def test_ewma_halflife_arg(self): def test_ewma_alpha_old_api(self): # GH 10789 - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): a = mom.ewma(self.arr, alpha=0.61722699889169674) b = mom.ewma(self.arr, com=0.62014947789973052) c = mom.ewma(self.arr, span=2.240298955799461) @@ -1541,7 +1539,7 @@ def test_ewma_alpha_old_api(self): def test_ewma_alpha_arg_old_api(self): # GH 10789 - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): self.assertRaises(ValueError, mom.ewma, self.arr) self.assertRaises(ValueError, mom.ewma, self.arr, com=10.0, alpha=0.5) @@ -1598,13 +1596,12 @@ def test_ew_empty_arrays(self): funcs = [mom.ewma, mom.ewmvol, mom.ewmvar] for f in funcs: - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): + with catch_warnings(record=True): result = f(arr, 3) tm.assert_almost_equal(result, arr) def _check_ew(self, func, name=None): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): self._check_ew_ndarray(func, name=name) self._check_ew_structures(func, name=name) @@ -2870,7 +2867,7 @@ def test_rolling_max_gh6297(self): expected = Series([1.0, 2.0, 6.0, 4.0, 5.0], index=[datetime(1975, 1, i, 0) for i in range(1, 6)]) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): x = series.rolling(window=1, freq='D').max() tm.assert_series_equal(expected, x) @@ -2889,14 +2886,14 @@ def test_rolling_max_how_resample(self): # Default how should be max expected = Series([0.0, 1.0, 2.0, 3.0, 20.0], index=[datetime(1975, 1, i, 0) for i in range(1, 6)]) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): x = series.rolling(window=1, freq='D').max() tm.assert_series_equal(expected, x) # Now specify median (10.0) expected = Series([0.0, 1.0, 2.0, 3.0, 10.0], index=[datetime(1975, 1, i, 0) for i in range(1, 6)]) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): x = series.rolling(window=1, freq='D').max(how='median') tm.assert_series_equal(expected, x) @@ -2904,7 +2901,7 @@ def test_rolling_max_how_resample(self): v = (4.0 + 10.0 + 20.0) / 3.0 expected = Series([0.0, 1.0, 2.0, 3.0, v], index=[datetime(1975, 1, i, 0) for i in range(1, 6)]) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): x = series.rolling(window=1, freq='D').max(how='mean') tm.assert_series_equal(expected, x) @@ -2923,7 +2920,7 @@ def test_rolling_min_how_resample(self): # Default how should be min expected = Series([0.0, 1.0, 2.0, 3.0, 4.0], index=[datetime(1975, 1, i, 0) for i in range(1, 6)]) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): r = series.rolling(window=1, freq='D') tm.assert_series_equal(expected, r.min()) @@ -2942,7 +2939,7 @@ def test_rolling_median_how_resample(self): # Default how should be median expected = Series([0.0, 1.0, 2.0, 3.0, 10], index=[datetime(1975, 1, i, 0) for i in range(1, 6)]) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): x = series.rolling(window=1, freq='D').median() tm.assert_series_equal(expected, x) diff --git a/setup.cfg b/setup.cfg index 45d98dd733f1f..b9de7a3532209 100644 --- a/setup.cfg +++ b/setup.cfg @@ -25,3 +25,5 @@ split_penalty_logical_operator = 30 # Silencing the warning until then addopts = --disable-pytest-warnings testpaths = pandas +markers = + single: mark a test as single cpu only diff --git a/test_fast.sh b/test_fast.sh index 0b394cffa3d74..43eb376f879cd 100755 --- a/test_fast.sh +++ b/test_fast.sh @@ -1,2 +1 @@ -# nosetests -A "not slow and not network" pandas --with-id $* -pytest pandas --skip-slow +pytest pandas --skip-slow --skip-network -m "not single" -n 4 From 0915857cc9209548d9c26122e822eaef841c6b24 Mon Sep 17 00:00:00 2001 From: Andrew Kittredge Date: Sun, 12 Feb 2017 12:37:13 -0500 Subject: [PATCH 047/933] Typo (#15377) --- doc/source/advanced.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/advanced.rst b/doc/source/advanced.rst index 8833d73cb0a84..b6f015c15606d 100644 --- a/doc/source/advanced.rst +++ b/doc/source/advanced.rst @@ -59,7 +59,7 @@ Creating a MultiIndex (hierarchical index) object The ``MultiIndex`` object is the hierarchical analogue of the standard ``Index`` object which typically stores the axis labels in pandas objects. You -can think of ``MultiIndex`` an array of tuples where each tuple is unique. A +can think of ``MultiIndex`` as an array of tuples where each tuple is unique. A ``MultiIndex`` can be created from a list of arrays (using ``MultiIndex.from_arrays``), an array of tuples (using ``MultiIndex.from_tuples``), or a crossed set of iterables (using From a0f7fc061ca37ab992e320bd3d1b7b130e500469 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sun, 12 Feb 2017 11:46:48 -0500 Subject: [PATCH 048/933] TST: control skipping of numexpr tests if its installed / used --- pandas/tests/test_expressions.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pandas/tests/test_expressions.py b/pandas/tests/test_expressions.py index 0318757f76a11..3032a288032a2 100644 --- a/pandas/tests/test_expressions.py +++ b/pandas/tests/test_expressions.py @@ -20,9 +20,6 @@ import pandas.util.testing as tm -if not expr._USE_NUMEXPR: - numexpr = pytest.importorskip('numexpr') - _frame = DataFrame(randn(10000, 4), columns=list('ABCD'), dtype='float64') _frame2 = DataFrame(randn(100, 4), columns=list('ABCD'), dtype='float64') _mixed = DataFrame({'A': _frame['A'].copy(), @@ -50,6 +47,7 @@ _mixed2_panel = Panel(dict(ItemA=_mixed2, ItemB=(_mixed2 + 3))) +@pytest.mark.skipif(not expr._USE_NUMEXPR, reason='not using numexpr') class TestExpressions(tm.TestCase): def setUp(self): From dda3c4292b28d4dbead8bb6ae9927373aea9fe23 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sun, 12 Feb 2017 12:51:11 -0500 Subject: [PATCH 049/933] TST: make test_gbq single cpu --- pandas/tests/io/test_gbq.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/pandas/tests/io/test_gbq.py b/pandas/tests/io/test_gbq.py index 0868edd2147b5..0317ebc49ad2c 100644 --- a/pandas/tests/io/test_gbq.py +++ b/pandas/tests/io/test_gbq.py @@ -253,6 +253,7 @@ def test_generate_bq_schema_deprecated(): gbq.generate_bq_schema(df) +@pytest.mark.single class TestGBQConnectorIntegrationWithLocalUserAccountAuth(tm.TestCase): def setUp(self): @@ -298,6 +299,7 @@ def test_get_application_default_credentials_returns_credentials(self): self.assertTrue(isinstance(credentials, GoogleCredentials)) +@pytest.mark.single class TestGBQConnectorIntegrationWithServiceAccountKeyPath(tm.TestCase): def setUp(self): _setup_common() @@ -329,6 +331,7 @@ def test_should_be_able_to_get_results_from_query(self): self.assertTrue(pages is not None) +@pytest.mark.single class TestGBQConnectorIntegrationWithServiceAccountKeyContents(tm.TestCase): def setUp(self): _setup_common() @@ -360,6 +363,7 @@ def test_should_be_able_to_get_results_from_query(self): self.assertTrue(pages is not None) +@pytest.mark.single class GBQUnitTests(tm.TestCase): def setUp(self): @@ -446,6 +450,7 @@ def test_read_gbq_with_corrupted_private_key_json_should_fail(self): private_key=re.sub('[a-z]', '9', _get_private_key_contents())) +@pytest.mark.single class TestReadGBQIntegration(tm.TestCase): @classmethod @@ -499,6 +504,7 @@ def test_should_read_as_service_account_with_key_contents(self): tm.assert_frame_equal(df, DataFrame({'valid_string': ['PI']})) +@pytest.mark.single class TestReadGBQIntegrationWithServiceAccountKeyPath(tm.TestCase): @classmethod @@ -901,6 +907,7 @@ def test_configuration_without_query(self): configuration=config) +@pytest.mark.single class TestToGBQIntegrationWithServiceAccountKeyPath(tm.TestCase): # Changes to BigQuery table schema may take up to 2 minutes as of May 2015 # As a workaround to this issue, each test should use a unique table name. @@ -1215,6 +1222,7 @@ def test_dataset_does_not_exist(self): DATASET_ID + "_not_found"), 'Expected dataset not to exist') +@pytest.mark.single class TestToGBQIntegrationWithLocalUserAccountAuth(tm.TestCase): # Changes to BigQuery table schema may take up to 2 minutes as of May 2015 # As a workaround to this issue, each test should use a unique table name. @@ -1272,6 +1280,7 @@ def test_upload_data(self): self.assertEqual(result['num_rows'][0], test_size) +@pytest.mark.single class TestToGBQIntegrationWithServiceAccountKeyContents(tm.TestCase): # Changes to BigQuery table schema may take up to 2 minutes as of May 2015 # As a workaround to this issue, each test should use a unique table name. From 010393c4cb650b78e3e51af417e7037737e8d3b6 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sun, 12 Feb 2017 21:43:50 -0500 Subject: [PATCH 050/933] ENH: expose Int64VectorData in hashtable.pxd --- pandas/hashtable.pxd | 14 ++++++++++++++ pandas/src/hashtable_class_helper.pxi.in | 12 +++++++++++- 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/pandas/hashtable.pxd b/pandas/hashtable.pxd index cd06b938310a8..cabfa43a76f26 100644 --- a/pandas/hashtable.pxd +++ b/pandas/hashtable.pxd @@ -1,5 +1,6 @@ from khash cimport (kh_int64_t, kh_uint64_t, kh_float64_t, kh_pymap_t, kh_str_t, uint64_t, int64_t, float64_t) +from numpy cimport ndarray # prototypes for sharing @@ -35,3 +36,16 @@ cdef class StringHashTable(HashTable): cpdef get_item(self, object val) cpdef set_item(self, object key, Py_ssize_t val) + +cdef struct Int64VectorData: + int64_t *data + size_t n, m + +cdef class Int64Vector: + cdef Int64VectorData *data + cdef ndarray ao + + cdef resize(self) + cpdef to_array(self) + cdef inline void append(self, int64_t x) + cdef extend(self, int64_t[:] x) diff --git a/pandas/src/hashtable_class_helper.pxi.in b/pandas/src/hashtable_class_helper.pxi.in index 74c38dfdb393e..ef385ba7dca1c 100644 --- a/pandas/src/hashtable_class_helper.pxi.in +++ b/pandas/src/hashtable_class_helper.pxi.in @@ -24,10 +24,14 @@ dtypes = [('Float64', 'float64', 'float64_t'), {{for name, dtype, arg in dtypes}} +{{if dtype != 'int64'}} + ctypedef struct {{name}}VectorData: {{arg}} *data size_t n, m +{{endif}} + @cython.wraparound(False) @cython.boundscheck(False) @@ -65,9 +69,11 @@ dtypes = [('Float64', 'float64', 'float64_t', 'np.float64'), cdef class {{name}}Vector: + {{if dtype != 'int64'}} cdef: {{name}}VectorData *data ndarray ao + {{endif}} def __cinit__(self): self.data = <{{name}}VectorData *>PyMem_Malloc( @@ -92,7 +98,7 @@ cdef class {{name}}Vector: def __len__(self): return self.data.n - def to_array(self): + cpdef to_array(self): self.ao.resize(self.data.n) self.data.m = self.data.n return self.ao @@ -104,6 +110,10 @@ cdef class {{name}}Vector: append_data_{{dtype}}(self.data, x) + cdef extend(self, {{arg}}[:] x): + for i in range(len(x)): + self.append(x[i]) + {{endfor}} cdef class StringVector: From d9e75c7e724e5f7449c8c57624ce9395c9ffe11a Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sun, 12 Feb 2017 21:54:11 -0500 Subject: [PATCH 051/933] TST: xfail most test_gbq tests for now --- pandas/tests/io/test_gbq.py | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/pandas/tests/io/test_gbq.py b/pandas/tests/io/test_gbq.py index 0317ebc49ad2c..316afaf306011 100644 --- a/pandas/tests/io/test_gbq.py +++ b/pandas/tests/io/test_gbq.py @@ -253,7 +253,7 @@ def test_generate_bq_schema_deprecated(): gbq.generate_bq_schema(df) -@pytest.mark.single +@pytest.mark.xfail(run=False, reason="flaky tests") class TestGBQConnectorIntegrationWithLocalUserAccountAuth(tm.TestCase): def setUp(self): @@ -299,7 +299,7 @@ def test_get_application_default_credentials_returns_credentials(self): self.assertTrue(isinstance(credentials, GoogleCredentials)) -@pytest.mark.single +@pytest.mark.xfail(run=False, reason="flaky tests") class TestGBQConnectorIntegrationWithServiceAccountKeyPath(tm.TestCase): def setUp(self): _setup_common() @@ -331,7 +331,7 @@ def test_should_be_able_to_get_results_from_query(self): self.assertTrue(pages is not None) -@pytest.mark.single +@pytest.mark.xfail(run=False, reason="flaky tests") class TestGBQConnectorIntegrationWithServiceAccountKeyContents(tm.TestCase): def setUp(self): _setup_common() @@ -363,7 +363,6 @@ def test_should_be_able_to_get_results_from_query(self): self.assertTrue(pages is not None) -@pytest.mark.single class GBQUnitTests(tm.TestCase): def setUp(self): @@ -450,7 +449,7 @@ def test_read_gbq_with_corrupted_private_key_json_should_fail(self): private_key=re.sub('[a-z]', '9', _get_private_key_contents())) -@pytest.mark.single +@pytest.mark.xfail(run=False, reason="flaky tests") class TestReadGBQIntegration(tm.TestCase): @classmethod @@ -504,7 +503,7 @@ def test_should_read_as_service_account_with_key_contents(self): tm.assert_frame_equal(df, DataFrame({'valid_string': ['PI']})) -@pytest.mark.single +@pytest.mark.xfail(run=False, reason="flaky tests") class TestReadGBQIntegrationWithServiceAccountKeyPath(tm.TestCase): @classmethod @@ -907,7 +906,7 @@ def test_configuration_without_query(self): configuration=config) -@pytest.mark.single +@pytest.mark.xfail(run=False, reason="flaky tests") class TestToGBQIntegrationWithServiceAccountKeyPath(tm.TestCase): # Changes to BigQuery table schema may take up to 2 minutes as of May 2015 # As a workaround to this issue, each test should use a unique table name. @@ -1022,8 +1021,6 @@ def test_upload_data_if_table_exists_append(self): def test_upload_data_if_table_exists_replace(self): - pytest.skip("buggy test") - destination_table = DESTINATION_TABLE + "4" test_size = 10 @@ -1222,7 +1219,7 @@ def test_dataset_does_not_exist(self): DATASET_ID + "_not_found"), 'Expected dataset not to exist') -@pytest.mark.single +@pytest.mark.xfail(run=False, reason="flaky tests") class TestToGBQIntegrationWithLocalUserAccountAuth(tm.TestCase): # Changes to BigQuery table schema may take up to 2 minutes as of May 2015 # As a workaround to this issue, each test should use a unique table name. @@ -1280,7 +1277,7 @@ def test_upload_data(self): self.assertEqual(result['num_rows'][0], test_size) -@pytest.mark.single +@pytest.mark.xfail(run=False, reason="flaky tests") class TestToGBQIntegrationWithServiceAccountKeyContents(tm.TestCase): # Changes to BigQuery table schema may take up to 2 minutes as of May 2015 # As a workaround to this issue, each test should use a unique table name. From 86ca84d8ec79eba5fe31bf0d4cbb24ec78fc333a Mon Sep 17 00:00:00 2001 From: Anthonios Partheniou Date: Tue, 14 Feb 2017 08:29:18 -0500 Subject: [PATCH 052/933] TST: Fix gbq integration tests. gbq._Dataset.dataset() would not return full results This PR resolves an issue where `gbq._Dataset.datasets()` would not return all datasets under a Google BigQuery project. If `'nextPageToken'` is populated, then another `datasets().list()` request should be sent with `'pageToken'` set to collect more results. In the past few days, additional datasets were added under the Google BigQuery project id used by pandas as part of the following github project : https://github.com/pydata/pandas-gbq . The addition of datasets caused many gbq unit tests to fail because in function `clean_gbq_environment()`, we check to see if the dataset exists using the incomplete results from `gbq._Dataset.datasets()` before we attempt to delete it. Author: Anthonios Partheniou Closes #15381 from parthea/fix-broken-gbq-unit-tests and squashes the following commits: 61bc1e7 [Anthonios Partheniou] TST: Fix gbq tests. gbq.dataset()/gbq.tables would not return full results. --- pandas/io/gbq.py | 67 ++++++++++++++++++++++++------------- pandas/tests/io/test_gbq.py | 16 ++++----- 2 files changed, 52 insertions(+), 31 deletions(-) diff --git a/pandas/io/gbq.py b/pandas/io/gbq.py index 169a2b1df9b4c..0ffb6b4bf8c05 100644 --- a/pandas/io/gbq.py +++ b/pandas/io/gbq.py @@ -1056,21 +1056,32 @@ def datasets(self): List of datasets under the specific project """ - try: - list_dataset_response = self.service.datasets().list( - projectId=self.project_id).execute().get('datasets', None) + dataset_list = [] + next_page_token = None + first_query = True - if not list_dataset_response: - return [] + while first_query or next_page_token: + first_query = False - dataset_list = list() + try: + list_dataset_response = self.service.datasets().list( + projectId=self.project_id, + pageToken=next_page_token).execute() - for row_num, raw_row in enumerate(list_dataset_response): - dataset_list.append(raw_row['datasetReference']['datasetId']) + dataset_response = list_dataset_response.get('datasets') + next_page_token = list_dataset_response.get('nextPageToken') - return dataset_list - except self.http_error as ex: - self.process_http_error(ex) + if not dataset_response: + return dataset_list + + for row_num, raw_row in enumerate(dataset_response): + dataset_list.append( + raw_row['datasetReference']['datasetId']) + + except self.http_error as ex: + self.process_http_error(ex) + + return dataset_list def create(self, dataset_id): """ Create a dataset in Google BigQuery @@ -1140,19 +1151,29 @@ def tables(self, dataset_id): List of tables under the specific dataset """ - try: - list_table_response = self.service.tables().list( - projectId=self.project_id, - datasetId=dataset_id).execute().get('tables', None) + table_list = [] + next_page_token = None + first_query = True - if not list_table_response: - return [] + while first_query or next_page_token: + first_query = False - table_list = list() + try: + list_table_response = self.service.tables().list( + projectId=self.project_id, + datasetId=dataset_id, + pageToken=next_page_token).execute() - for row_num, raw_row in enumerate(list_table_response): - table_list.append(raw_row['tableReference']['tableId']) + table_response = list_table_response.get('tables') + next_page_token = list_table_response.get('nextPageToken') - return table_list - except self.http_error as ex: - self.process_http_error(ex) + if not table_response: + return table_list + + for row_num, raw_row in enumerate(table_response): + table_list.append(raw_row['tableReference']['tableId']) + + except self.http_error as ex: + self.process_http_error(ex) + + return table_list diff --git a/pandas/tests/io/test_gbq.py b/pandas/tests/io/test_gbq.py index 316afaf306011..dfbf3ca69b111 100644 --- a/pandas/tests/io/test_gbq.py +++ b/pandas/tests/io/test_gbq.py @@ -253,7 +253,7 @@ def test_generate_bq_schema_deprecated(): gbq.generate_bq_schema(df) -@pytest.mark.xfail(run=False, reason="flaky tests") +@pytest.mark.single class TestGBQConnectorIntegrationWithLocalUserAccountAuth(tm.TestCase): def setUp(self): @@ -299,7 +299,7 @@ def test_get_application_default_credentials_returns_credentials(self): self.assertTrue(isinstance(credentials, GoogleCredentials)) -@pytest.mark.xfail(run=False, reason="flaky tests") +@pytest.mark.single class TestGBQConnectorIntegrationWithServiceAccountKeyPath(tm.TestCase): def setUp(self): _setup_common() @@ -331,7 +331,7 @@ def test_should_be_able_to_get_results_from_query(self): self.assertTrue(pages is not None) -@pytest.mark.xfail(run=False, reason="flaky tests") +@pytest.mark.single class TestGBQConnectorIntegrationWithServiceAccountKeyContents(tm.TestCase): def setUp(self): _setup_common() @@ -449,7 +449,7 @@ def test_read_gbq_with_corrupted_private_key_json_should_fail(self): private_key=re.sub('[a-z]', '9', _get_private_key_contents())) -@pytest.mark.xfail(run=False, reason="flaky tests") +@pytest.mark.single class TestReadGBQIntegration(tm.TestCase): @classmethod @@ -503,7 +503,7 @@ def test_should_read_as_service_account_with_key_contents(self): tm.assert_frame_equal(df, DataFrame({'valid_string': ['PI']})) -@pytest.mark.xfail(run=False, reason="flaky tests") +@pytest.mark.single class TestReadGBQIntegrationWithServiceAccountKeyPath(tm.TestCase): @classmethod @@ -906,7 +906,7 @@ def test_configuration_without_query(self): configuration=config) -@pytest.mark.xfail(run=False, reason="flaky tests") +@pytest.mark.single class TestToGBQIntegrationWithServiceAccountKeyPath(tm.TestCase): # Changes to BigQuery table schema may take up to 2 minutes as of May 2015 # As a workaround to this issue, each test should use a unique table name. @@ -1219,7 +1219,7 @@ def test_dataset_does_not_exist(self): DATASET_ID + "_not_found"), 'Expected dataset not to exist') -@pytest.mark.xfail(run=False, reason="flaky tests") +@pytest.mark.single class TestToGBQIntegrationWithLocalUserAccountAuth(tm.TestCase): # Changes to BigQuery table schema may take up to 2 minutes as of May 2015 # As a workaround to this issue, each test should use a unique table name. @@ -1277,7 +1277,7 @@ def test_upload_data(self): self.assertEqual(result['num_rows'][0], test_size) -@pytest.mark.xfail(run=False, reason="flaky tests") +@pytest.mark.single class TestToGBQIntegrationWithServiceAccountKeyContents(tm.TestCase): # Changes to BigQuery table schema may take up to 2 minutes as of May 2015 # As a workaround to this issue, each test should use a unique table name. From ff0deecbc8f8e9ae3d274e5e7cd7c0056de1a6c2 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Tue, 14 Feb 2017 08:33:34 -0500 Subject: [PATCH 053/933] Bug: Raise ValueError with interpolate & fillna limit = 0 (#9217) closes #9217 Author: Matt Roeschke Closes #14994 from mroeschke/fix_9217 and squashes the following commits: c1790ee [Matt Roeschke] Unify ValueError message and correct cython limits 6f041e6 [Matt Roeschke] Bug: Raise ValueError with interpolate limit = 0 --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/core/generic.py | 6 ++--- pandas/core/internals.py | 4 +++ pandas/core/missing.py | 8 ++++-- pandas/src/algos_common_helper.pxi.in | 36 ++++++++++++++++++--------- pandas/tests/series/test_missing.py | 18 ++++++++++++++ 6 files changed, 56 insertions(+), 17 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index aa620bce0df59..d76e33caffbf1 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -421,6 +421,7 @@ Other API Changes - ``SparseArray.cumsum()`` and ``SparseSeries.cumsum()`` will now always return ``SparseArray`` and ``SparseSeries`` respectively (:issue:`12855`) - ``DataFrame.applymap()`` with an empty ``DataFrame`` will return a copy of the empty ``DataFrame`` instead of a ``Series`` (:issue:`8222`) - ``.loc`` has compat with ``.ix`` for accepting iterators, and NamedTuples (:issue:`15120`) +- ``interpolate()`` and ``fillna()`` will raise a ``ValueError`` if the ``limit`` keyword argument is not greater than 0. (:issue:`9217`) - ``pd.read_csv()`` will now issue a ``ParserWarning`` whenever there are conflicting values provided by the ``dialect`` parameter and the user (:issue:`14898`) - ``pd.read_csv()`` will now raise a ``ValueError`` for the C engine if the quote character is larger than than one byte (:issue:`11592`) - ``inplace`` arguments now require a boolean value, else a ``ValueError`` is thrown (:issue:`14189`) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 228dd2acd2124..20e6e027dbf09 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3262,7 +3262,7 @@ def convert_objects(self, convert_dates=True, convert_numeric=False, a gap with more than this number of consecutive NaNs, it will only be partially filled. If method is not specified, this is the maximum number of entries along the entire axis where NaNs will be - filled. + filled. Must be greater than 0 if not None. downcast : dict, default is None a dict of item->dtype of what to downcast if possible, or the string 'infer' which will try to downcast to an appropriate @@ -3281,6 +3281,7 @@ def convert_objects(self, convert_dates=True, convert_numeric=False, def fillna(self, value=None, method=None, axis=None, inplace=False, limit=None, downcast=None): inplace = validate_bool_kwarg(inplace, 'inplace') + if isinstance(value, (list, tuple)): raise TypeError('"value" parameter must be a scalar or dict, but ' 'you passed a "{0}"'.format(type(value).__name__)) @@ -3292,7 +3293,6 @@ def fillna(self, value=None, method=None, axis=None, inplace=False, axis = 0 axis = self._get_axis_number(axis) method = missing.clean_fill_method(method) - from pandas import DataFrame if value is None: if method is None: @@ -3687,7 +3687,7 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, * 0: fill column-by-column * 1: fill row-by-row limit : int, default None. - Maximum number of consecutive NaNs to fill. + Maximum number of consecutive NaNs to fill. Must be greater than 0. limit_direction : {'forward', 'backward', 'both'}, default 'forward' If limit is specified, consecutive NaNs will be filled in this direction. diff --git a/pandas/core/internals.py b/pandas/core/internals.py index f0b1516d786c6..6cd5eceed5f2a 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -372,6 +372,10 @@ def fillna(self, value, limit=None, inplace=False, downcast=None, original_value = value mask = isnull(self.values) if limit is not None: + if not is_integer(limit): + raise ValueError('Limit must be an integer') + if limit < 1: + raise ValueError('Limit must be greater than 0') if self.ndim > 2: raise NotImplementedError("number of dimensions for 'fillna' " "is currently limited to 2") diff --git a/pandas/core/missing.py b/pandas/core/missing.py index e83a0518d97f6..ffd0423572f5e 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -12,7 +12,7 @@ is_float_dtype, is_datetime64_dtype, is_datetime64tz_dtype, is_integer_dtype, _ensure_float64, is_scalar, - needs_i8_conversion) + needs_i8_conversion, is_integer) from pandas.types.missing import isnull @@ -169,7 +169,11 @@ def _interp_limit(invalid, fw_limit, bw_limit): # the beginning (see issues #9218 and #10420) violate_limit = sorted(start_nans) - if limit: + if limit is not None: + if not is_integer(limit): + raise ValueError('Limit must be an integer') + if limit < 1: + raise ValueError('Limit must be greater than 0') if limit_direction == 'forward': violate_limit = sorted(start_nans | set(_interp_limit(invalid, limit, 0))) diff --git a/pandas/src/algos_common_helper.pxi.in b/pandas/src/algos_common_helper.pxi.in index 5e87528943005..42089f9520ab6 100644 --- a/pandas/src/algos_common_helper.pxi.in +++ b/pandas/src/algos_common_helper.pxi.in @@ -83,8 +83,10 @@ def pad_{{name}}(ndarray[{{c_type}}] old, ndarray[{{c_type}}] new, if limit is None: lim = nright else: - if limit < 0: - raise ValueError('Limit must be non-negative') + if not util.is_integer_object(limit): + raise ValueError('Limit must be an integer') + if limit < 1: + raise ValueError('Limit must be greater than 0') lim = limit if nleft == 0 or nright == 0 or new[nright - 1] < old[0]: @@ -146,8 +148,10 @@ def pad_inplace_{{name}}(ndarray[{{c_type}}] values, if limit is None: lim = N else: - if limit < 0: - raise ValueError('Limit must be non-negative') + if not util.is_integer_object(limit): + raise ValueError('Limit must be an integer') + if limit < 1: + raise ValueError('Limit must be greater than 0') lim = limit val = values[0] @@ -180,8 +184,10 @@ def pad_2d_inplace_{{name}}(ndarray[{{c_type}}, ndim=2] values, if limit is None: lim = N else: - if limit < 0: - raise ValueError('Limit must be non-negative') + if not util.is_integer_object(limit): + raise ValueError('Limit must be an integer') + if limit < 1: + raise ValueError('Limit must be greater than 0') lim = limit for j in range(K): @@ -240,8 +246,10 @@ def backfill_{{name}}(ndarray[{{c_type}}] old, ndarray[{{c_type}}] new, if limit is None: lim = nright else: - if limit < 0: - raise ValueError('Limit must be non-negative') + if not util.is_integer_object(limit): + raise ValueError('Limit must be an integer') + if limit < 1: + raise ValueError('Limit must be greater than 0') lim = limit if nleft == 0 or nright == 0 or new[0] > old[nleft - 1]: @@ -304,8 +312,10 @@ def backfill_inplace_{{name}}(ndarray[{{c_type}}] values, if limit is None: lim = N else: - if limit < 0: - raise ValueError('Limit must be non-negative') + if not util.is_integer_object(limit): + raise ValueError('Limit must be an integer') + if limit < 1: + raise ValueError('Limit must be greater than 0') lim = limit val = values[N - 1] @@ -338,8 +348,10 @@ def backfill_2d_inplace_{{name}}(ndarray[{{c_type}}, ndim=2] values, if limit is None: lim = N else: - if limit < 0: - raise ValueError('Limit must be non-negative') + if not util.is_integer_object(limit): + raise ValueError('Limit must be an integer') + if limit < 1: + raise ValueError('Limit must be greater than 0') lim = limit for j in range(K): diff --git a/pandas/tests/series/test_missing.py b/pandas/tests/series/test_missing.py index 405d6c98a5d37..23eb6a40f5f1d 100644 --- a/pandas/tests/series/test_missing.py +++ b/pandas/tests/series/test_missing.py @@ -295,6 +295,13 @@ def test_fillna_raise(self): self.assertRaises(TypeError, s.fillna, [1, 2]) self.assertRaises(TypeError, s.fillna, (1, 2)) + # related GH 9217, make sure limit is an int and greater than 0 + s = Series([1, 2, 3, None]) + for limit in [-1, 0, 1., 2.]: + for method in ['backfill', 'bfill', 'pad', 'ffill', None]: + with tm.assertRaises(ValueError): + s.fillna(1, limit=limit, method=method) + def test_fillna_nat(self): series = Series([0, 1, 2, tslib.iNaT], dtype='M8[ns]') @@ -865,6 +872,17 @@ def test_interp_limit(self): result = s.interpolate(method='linear', limit=2) assert_series_equal(result, expected) + # GH 9217, make sure limit is an int and greater than 0 + methods = ['linear', 'time', 'index', 'values', 'nearest', 'zero', + 'slinear', 'quadratic', 'cubic', 'barycentric', 'krogh', + 'polynomial', 'spline', 'piecewise_polynomial', None, + 'from_derivatives', 'pchip', 'akima'] + s = pd.Series([1, 2, np.nan, np.nan, 5]) + for limit in [-1, 0, 1., 2.]: + for method in methods: + with tm.assertRaises(ValueError): + s.interpolate(limit=limit, method=method) + def test_interp_limit_forward(self): s = Series([1, 3, np.nan, np.nan, np.nan, 11]) From 5959fe1fffe4b5749de63d6a26ac64349bc791ac Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 14 Feb 2017 17:35:05 -0500 Subject: [PATCH 054/933] CLN: create core/sorting.py just a small reorg to put sorting / grouping utilities into a separate area Author: Jeff Reback Closes #15402 from jreback/sorting and squashes the following commits: fdcf9a1 [Jeff Reback] change a couple of sorting.py functions to be non-private (public to pandas internals) 90ff22d [Jeff Reback] split up some value_counts groupby tests a bit 18ea902 [Jeff Reback] CLN: create core/sorting.py 92dcb07 [Jeff Reback] CLN: remove numpy_groupby as not used --- pandas/core/frame.py | 26 +- pandas/core/groupby.py | 376 +--------------------- pandas/core/reshape.py | 13 +- pandas/core/series.py | 10 +- pandas/core/sorting.py | 357 ++++++++++++++++++++ pandas/indexes/multi.py | 12 +- pandas/tests/groupby/test_filters.py | 21 -- pandas/tests/groupby/test_groupby.py | 169 ---------- pandas/tests/groupby/test_misc.py | 101 ------ pandas/tests/groupby/test_value_counts.py | 60 ++++ pandas/tests/test_sorting.py | 339 +++++++++++++++++++ pandas/tests/tools/test_merge.py | 135 +------- pandas/tools/merge.py | 4 +- 13 files changed, 802 insertions(+), 821 deletions(-) create mode 100644 pandas/core/sorting.py delete mode 100644 pandas/tests/groupby/test_misc.py create mode 100644 pandas/tests/groupby/test_value_counts.py create mode 100644 pandas/tests/test_sorting.py diff --git a/pandas/core/frame.py b/pandas/core/frame.py index aa03bfb9a54b9..16f8d4658dc20 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3141,7 +3141,7 @@ def duplicated(self, subset=None, keep='first'): ------- duplicated : Series """ - from pandas.core.groupby import get_group_index + from pandas.core.sorting import get_group_index from pandas.hashtable import duplicated_int64, _SIZE_HINT_LIMIT def f(vals): @@ -3179,7 +3179,7 @@ def sort_values(self, by, axis=0, ascending=True, inplace=False, raise ValueError('Length of ascending (%d) != length of by (%d)' % (len(ascending), len(by))) if len(by) > 1: - from pandas.core.groupby import _lexsort_indexer + from pandas.core.sorting import lexsort_indexer def trans(v): if needs_i8_conversion(v): @@ -3193,11 +3193,11 @@ def trans(v): raise ValueError('Cannot sort by duplicate column %s' % str(x)) keys.append(trans(k)) - indexer = _lexsort_indexer(keys, orders=ascending, - na_position=na_position) + indexer = lexsort_indexer(keys, orders=ascending, + na_position=na_position) indexer = _ensure_platform_int(indexer) else: - from pandas.core.groupby import _nargsort + from pandas.core.sorting import nargsort by = by[0] k = self.xs(by, axis=other_axis).values @@ -3214,8 +3214,8 @@ def trans(v): if isinstance(ascending, (tuple, list)): ascending = ascending[0] - indexer = _nargsort(k, kind=kind, ascending=ascending, - na_position=na_position) + indexer = nargsort(k, kind=kind, ascending=ascending, + na_position=na_position) new_data = self._data.take(indexer, axis=self._get_block_manager_axis(axis), @@ -3300,17 +3300,17 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False, sort_remaining=sort_remaining) elif isinstance(labels, MultiIndex): - from pandas.core.groupby import _lexsort_indexer + from pandas.core.sorting import lexsort_indexer # make sure that the axis is lexsorted to start # if not we need to reconstruct to get the correct indexer if not labels.is_lexsorted(): labels = MultiIndex.from_tuples(labels.values) - indexer = _lexsort_indexer(labels.labels, orders=ascending, - na_position=na_position) + indexer = lexsort_indexer(labels.labels, orders=ascending, + na_position=na_position) else: - from pandas.core.groupby import _nargsort + from pandas.core.sorting import nargsort # GH11080 - Check monotonic-ness before sort an index # if monotonic (already sorted), return None or copy() according @@ -3322,8 +3322,8 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False, else: return self.copy() - indexer = _nargsort(labels, kind=kind, ascending=ascending, - na_position=na_position) + indexer = nargsort(labels, kind=kind, ascending=ascending, + na_position=na_position) new_data = self._data.take(indexer, axis=self._get_block_manager_axis(axis), diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index a228861270aea..23c835318b0e6 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -7,7 +7,7 @@ import copy from pandas.compat import ( - zip, range, long, lzip, + zip, range, lzip, callable, map ) from pandas import compat @@ -47,6 +47,9 @@ from pandas.core.internals import BlockManager, make_block from pandas.core.series import Series from pandas.core.panel import Panel +from pandas.core.sorting import (get_group_index_sorter, get_group_index, + compress_group_index, get_flattened_iterator, + decons_obs_group_ids, get_indexer_dict) from pandas.util.decorators import (cache_readonly, Substitution, Appender, make_signature, deprecate_kwarg) from pandas.formats.printing import pprint_thing @@ -59,7 +62,6 @@ from pandas.lib import Timestamp import pandas.tslib as tslib import pandas.algos as _algos -import pandas.hashtable as _hash _doc_template = """ @@ -729,7 +731,7 @@ def _cumcount_array(self, ascending=True): (though the default is sort=True) for groupby in general """ ids, _, ngroups = self.grouper.group_info - sorter = _get_group_index_sorter(ids, ngroups) + sorter = get_group_index_sorter(ids, ngroups) ids, count = ids[sorter], len(ids) if count == 0: @@ -1616,9 +1618,12 @@ def _get_group_keys(self): return self.levels[0] else: comp_ids, _, ngroups = self.group_info + # provide "flattened" iterator for multi-group setting - mapper = _KeyMapper(comp_ids, ngroups, self.labels, self.levels) - return [mapper.get_key(i) for i in range(ngroups)] + return get_flattened_iterator(comp_ids, + ngroups, + self.levels, + self.labels) def apply(self, f, data, axis=0): mutated = self.mutated @@ -1662,7 +1667,7 @@ def indices(self): label_list = [ping.labels for ping in self.groupings] keys = [_values_from_object(ping.group_index) for ping in self.groupings] - return _get_indices_dict(label_list, keys) + return get_indexer_dict(label_list, keys) @property def labels(self): @@ -1726,7 +1731,7 @@ def _get_compressed_labels(self): if len(all_labels) > 1: group_index = get_group_index(all_labels, self.shape, sort=True, xnull=True) - return _compress_group_index(group_index, sort=self.sort) + return compress_group_index(group_index, sort=self.sort) ping = self.groupings[0] return ping.labels, np.arange(len(ping.group_index)) @@ -2027,7 +2032,7 @@ def _aggregate_series_fast(self, obj, func): # avoids object / Series creation overhead dummy = obj._get_values(slice(None, 0)).to_dense() - indexer = _get_group_index_sorter(group_index, ngroups) + indexer = get_group_index_sorter(group_index, ngroups) obj = obj.take(indexer, convert=False) group_index = algos.take_nd(group_index, indexer, allow_fill=False) grouper = lib.SeriesGrouper(obj, func, group_index, ngroups, @@ -2424,7 +2429,6 @@ def _get_grouper(obj, key=None, axis=0, level=None, sort=True, a BaseGrouper. """ - group_axis = obj._get_axis(axis) # validate that the passed level is compatible with the passed @@ -4206,7 +4210,7 @@ def slabels(self): @cache_readonly def sort_idx(self): # Counting sort indexer - return _get_group_index_sorter(self.labels, self.ngroups) + return get_group_index_sorter(self.labels, self.ngroups) def __iter__(self): sdata = self._get_sorted_data() @@ -4302,355 +4306,3 @@ def get_splitter(data, *args, **kwargs): klass = NDFrameSplitter return klass(data, *args, **kwargs) - - -# ---------------------------------------------------------------------- -# Misc utilities - - -def get_group_index(labels, shape, sort, xnull): - """ - For the particular label_list, gets the offsets into the hypothetical list - representing the totally ordered cartesian product of all possible label - combinations, *as long as* this space fits within int64 bounds; - otherwise, though group indices identify unique combinations of - labels, they cannot be deconstructed. - - If `sort`, rank of returned ids preserve lexical ranks of labels. - i.e. returned id's can be used to do lexical sort on labels; - - If `xnull` nulls (-1 labels) are passed through. - - Parameters - ---------- - labels: sequence of arrays - Integers identifying levels at each location - shape: sequence of ints same length as labels - Number of unique levels at each location - sort: boolean - If the ranks of returned ids should match lexical ranks of labels - xnull: boolean - If true nulls are excluded. i.e. -1 values in the labels are - passed through - Returns - ------- - An array of type int64 where two elements are equal if their corresponding - labels are equal at all location. - """ - def _int64_cut_off(shape): - acc = long(1) - for i, mul in enumerate(shape): - acc *= long(mul) - if not acc < _INT64_MAX: - return i - return len(shape) - - def loop(labels, shape): - # how many levels can be done without overflow: - nlev = _int64_cut_off(shape) - - # compute flat ids for the first `nlev` levels - stride = np.prod(shape[1:nlev], dtype='i8') - out = stride * labels[0].astype('i8', subok=False, copy=False) - - for i in range(1, nlev): - if shape[i] == 0: - stride = 0 - else: - stride //= shape[i] - out += labels[i] * stride - - if xnull: # exclude nulls - mask = labels[0] == -1 - for lab in labels[1:nlev]: - mask |= lab == -1 - out[mask] = -1 - - if nlev == len(shape): # all levels done! - return out - - # compress what has been done so far in order to avoid overflow - # to retain lexical ranks, obs_ids should be sorted - comp_ids, obs_ids = _compress_group_index(out, sort=sort) - - labels = [comp_ids] + labels[nlev:] - shape = [len(obs_ids)] + shape[nlev:] - - return loop(labels, shape) - - def maybe_lift(lab, size): # pormote nan values - return (lab + 1, size + 1) if (lab == -1).any() else (lab, size) - - labels = map(_ensure_int64, labels) - if not xnull: - labels, shape = map(list, zip(*map(maybe_lift, labels, shape))) - - return loop(list(labels), list(shape)) - - -_INT64_MAX = np.iinfo(np.int64).max - - -def _int64_overflow_possible(shape): - the_prod = long(1) - for x in shape: - the_prod *= long(x) - - return the_prod >= _INT64_MAX - - -def decons_group_index(comp_labels, shape): - # reconstruct labels - if _int64_overflow_possible(shape): - # at some point group indices are factorized, - # and may not be deconstructed here! wrong path! - raise ValueError('cannot deconstruct factorized group indices!') - - label_list = [] - factor = 1 - y = 0 - x = comp_labels - for i in reversed(range(len(shape))): - labels = (x - y) % (factor * shape[i]) // factor - np.putmask(labels, comp_labels < 0, -1) - label_list.append(labels) - y = labels * factor - factor *= shape[i] - return label_list[::-1] - - -def decons_obs_group_ids(comp_ids, obs_ids, shape, labels, xnull): - """ - reconstruct labels from observed group ids - - Parameters - ---------- - xnull: boolean, - if nulls are excluded; i.e. -1 labels are passed through - """ - from pandas.hashtable import unique_label_indices - - if not xnull: - lift = np.fromiter(((a == -1).any() for a in labels), dtype='i8') - shape = np.asarray(shape, dtype='i8') + lift - - if not _int64_overflow_possible(shape): - # obs ids are deconstructable! take the fast route! - out = decons_group_index(obs_ids, shape) - return out if xnull or not lift.any() \ - else [x - y for x, y in zip(out, lift)] - - i = unique_label_indices(comp_ids) - i8copy = lambda a: a.astype('i8', subok=False, copy=True) - return [i8copy(lab[i]) for lab in labels] - - -def _indexer_from_factorized(labels, shape, compress=True): - ids = get_group_index(labels, shape, sort=True, xnull=False) - - if not compress: - ngroups = (ids.size and ids.max()) + 1 - else: - ids, obs = _compress_group_index(ids, sort=True) - ngroups = len(obs) - - return _get_group_index_sorter(ids, ngroups) - - -def _lexsort_indexer(keys, orders=None, na_position='last'): - labels = [] - shape = [] - if isinstance(orders, bool): - orders = [orders] * len(keys) - elif orders is None: - orders = [True] * len(keys) - - for key, order in zip(keys, orders): - - # we are already a Categorical - if is_categorical_dtype(key): - c = key - - # create the Categorical - else: - c = Categorical(key, ordered=True) - - if na_position not in ['last', 'first']: - raise ValueError('invalid na_position: {!r}'.format(na_position)) - - n = len(c.categories) - codes = c.codes.copy() - - mask = (c.codes == -1) - if order: # ascending - if na_position == 'last': - codes = np.where(mask, n, codes) - elif na_position == 'first': - codes += 1 - else: # not order means descending - if na_position == 'last': - codes = np.where(mask, n, n - codes - 1) - elif na_position == 'first': - codes = np.where(mask, 0, n - codes) - if mask.any(): - n += 1 - - shape.append(n) - labels.append(codes) - - return _indexer_from_factorized(labels, shape) - - -def _nargsort(items, kind='quicksort', ascending=True, na_position='last'): - """ - This is intended to be a drop-in replacement for np.argsort which - handles NaNs. It adds ascending and na_position parameters. - GH #6399, #5231 - """ - - # specially handle Categorical - if is_categorical_dtype(items): - return items.argsort(ascending=ascending) - - items = np.asanyarray(items) - idx = np.arange(len(items)) - mask = isnull(items) - non_nans = items[~mask] - non_nan_idx = idx[~mask] - nan_idx = np.nonzero(mask)[0] - if not ascending: - non_nans = non_nans[::-1] - non_nan_idx = non_nan_idx[::-1] - indexer = non_nan_idx[non_nans.argsort(kind=kind)] - if not ascending: - indexer = indexer[::-1] - # Finally, place the NaNs at the end or the beginning according to - # na_position - if na_position == 'last': - indexer = np.concatenate([indexer, nan_idx]) - elif na_position == 'first': - indexer = np.concatenate([nan_idx, indexer]) - else: - raise ValueError('invalid na_position: {!r}'.format(na_position)) - return indexer - - -class _KeyMapper(object): - - """ - Ease my suffering. Map compressed group id -> key tuple - """ - - def __init__(self, comp_ids, ngroups, labels, levels): - self.levels = levels - self.labels = labels - self.comp_ids = comp_ids.astype(np.int64) - - self.k = len(labels) - self.tables = [_hash.Int64HashTable(ngroups) for _ in range(self.k)] - - self._populate_tables() - - def _populate_tables(self): - for labs, table in zip(self.labels, self.tables): - table.map(self.comp_ids, labs.astype(np.int64)) - - def get_key(self, comp_id): - return tuple(level[table.get_item(comp_id)] - for table, level in zip(self.tables, self.levels)) - - -def _get_indices_dict(label_list, keys): - shape = list(map(len, keys)) - - group_index = get_group_index(label_list, shape, sort=True, xnull=True) - ngroups = ((group_index.size and group_index.max()) + 1) \ - if _int64_overflow_possible(shape) \ - else np.prod(shape, dtype='i8') - - sorter = _get_group_index_sorter(group_index, ngroups) - - sorted_labels = [lab.take(sorter) for lab in label_list] - group_index = group_index.take(sorter) - - return lib.indices_fast(sorter, group_index, keys, sorted_labels) - - -# ---------------------------------------------------------------------- -# sorting levels...cleverly? - -def _get_group_index_sorter(group_index, ngroups): - """ - _algos.groupsort_indexer implements `counting sort` and it is at least - O(ngroups), where - ngroups = prod(shape) - shape = map(len, keys) - that is, linear in the number of combinations (cartesian product) of unique - values of groupby keys. This can be huge when doing multi-key groupby. - np.argsort(kind='mergesort') is O(count x log(count)) where count is the - length of the data-frame; - Both algorithms are `stable` sort and that is necessary for correctness of - groupby operations. e.g. consider: - df.groupby(key)[col].transform('first') - """ - count = len(group_index) - alpha = 0.0 # taking complexities literally; there may be - beta = 1.0 # some room for fine-tuning these parameters - do_groupsort = (count > 0 and ((alpha + beta * ngroups) < - (count * np.log(count)))) - if do_groupsort: - sorter, _ = _algos.groupsort_indexer(_ensure_int64(group_index), - ngroups) - return _ensure_platform_int(sorter) - else: - return group_index.argsort(kind='mergesort') - - -def _compress_group_index(group_index, sort=True): - """ - Group_index is offsets into cartesian product of all possible labels. This - space can be huge, so this function compresses it, by computing offsets - (comp_ids) into the list of unique labels (obs_group_ids). - """ - - size_hint = min(len(group_index), _hash._SIZE_HINT_LIMIT) - table = _hash.Int64HashTable(size_hint) - - group_index = _ensure_int64(group_index) - - # note, group labels come out ascending (ie, 1,2,3 etc) - comp_ids, obs_group_ids = table.get_labels_groupby(group_index) - - if sort and len(obs_group_ids) > 0: - obs_group_ids, comp_ids = _reorder_by_uniques(obs_group_ids, comp_ids) - - return comp_ids, obs_group_ids - - -def _reorder_by_uniques(uniques, labels): - # sorter is index where elements ought to go - sorter = uniques.argsort() - - # reverse_indexer is where elements came from - reverse_indexer = np.empty(len(sorter), dtype=np.int64) - reverse_indexer.put(sorter, np.arange(len(sorter))) - - mask = labels < 0 - - # move labels to right locations (ie, unsort ascending labels) - labels = algos.take_nd(reverse_indexer, labels, allow_fill=False) - np.putmask(labels, mask, -1) - - # sort observed ids - uniques = algos.take_nd(uniques, sorter, allow_fill=False) - - return uniques, labels - - -def numpy_groupby(data, labels, axis=0): - s = np.argsort(labels) - keys, inv = np.unique(labels, return_inverse=True) - i = inv.take(s) - groups_at = np.where(i != np.concatenate(([-1], i[:-1])))[0] - ordered_data = data.take(s, axis=axis) - group_sums = np.add.reduceat(ordered_data, groups_at, axis=axis) - - return group_sums diff --git a/pandas/core/reshape.py b/pandas/core/reshape.py index cebaf4e3fd89b..5fc0d590a6885 100644 --- a/pandas/core/reshape.py +++ b/pandas/core/reshape.py @@ -20,7 +20,8 @@ from pandas._sparse import IntIndex from pandas.core.categorical import Categorical, _factorize_from_iterable -from pandas.core.groupby import get_group_index, _compress_group_index +from pandas.core.sorting import (get_group_index, compress_group_index, + decons_obs_group_ids) import pandas.core.algorithms as algos import pandas.algos as _algos @@ -156,7 +157,7 @@ def get_result(self): # filter out missing levels if values.shape[1] > 0: - col_inds, obs_ids = _compress_group_index(self.sorted_labels[-1]) + col_inds, obs_ids = compress_group_index(self.sorted_labels[-1]) # rare case, level values not observed if len(obs_ids) < self.full_shape[1]: inds = (value_mask.sum(0) > 0).nonzero()[0] @@ -245,8 +246,6 @@ def get_new_index(self): def _unstack_multiple(data, clocs): - from pandas.core.groupby import decons_obs_group_ids - if len(clocs) == 0: return data @@ -268,7 +267,7 @@ def _unstack_multiple(data, clocs): shape = [len(x) for x in clevels] group_index = get_group_index(clabels, shape, sort=False, xnull=False) - comp_ids, obs_ids = _compress_group_index(group_index, sort=False) + comp_ids, obs_ids = compress_group_index(group_index, sort=False) recons_labels = decons_obs_group_ids(comp_ids, obs_ids, shape, clabels, xnull=False) @@ -459,10 +458,8 @@ def _unstack_frame(obj, level, fill_value=None): def get_compressed_ids(labels, sizes): - from pandas.core.groupby import get_group_index - ids = get_group_index(labels, sizes, sort=True, xnull=False) - return _compress_group_index(ids, sort=True) + return compress_group_index(ids, sort=True) def stack(frame, level=-1, dropna=True): diff --git a/pandas/core/series.py b/pandas/core/series.py index e1eac8f66017e..da47ab5dfb003 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1786,12 +1786,12 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False, new_index, indexer = index.sortlevel(level, ascending=ascending, sort_remaining=sort_remaining) elif isinstance(index, MultiIndex): - from pandas.core.groupby import _lexsort_indexer - indexer = _lexsort_indexer(index.labels, orders=ascending) + from pandas.core.sorting import lexsort_indexer + indexer = lexsort_indexer(index.labels, orders=ascending) else: - from pandas.core.groupby import _nargsort - indexer = _nargsort(index, kind=kind, ascending=ascending, - na_position=na_position) + from pandas.core.sorting import nargsort + indexer = nargsort(index, kind=kind, ascending=ascending, + na_position=na_position) indexer = _ensure_platform_int(indexer) new_index = index.take(indexer) diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py new file mode 100644 index 0000000000000..71314da7745c0 --- /dev/null +++ b/pandas/core/sorting.py @@ -0,0 +1,357 @@ +""" miscellaneous sorting / groupby utilities """ + +import numpy as np +from pandas.compat import long +from pandas.core.categorical import Categorical +from pandas.types.common import (_ensure_platform_int, + _ensure_int64, + is_categorical_dtype) +from pandas.types.missing import isnull +import pandas.core.algorithms as algos +import pandas.algos as _algos +import pandas.hashtable as _hash +from pandas import lib + + +_INT64_MAX = np.iinfo(np.int64).max + + +def get_group_index(labels, shape, sort, xnull): + """ + For the particular label_list, gets the offsets into the hypothetical list + representing the totally ordered cartesian product of all possible label + combinations, *as long as* this space fits within int64 bounds; + otherwise, though group indices identify unique combinations of + labels, they cannot be deconstructed. + - If `sort`, rank of returned ids preserve lexical ranks of labels. + i.e. returned id's can be used to do lexical sort on labels; + - If `xnull` nulls (-1 labels) are passed through. + + Parameters + ---------- + labels: sequence of arrays + Integers identifying levels at each location + shape: sequence of ints same length as labels + Number of unique levels at each location + sort: boolean + If the ranks of returned ids should match lexical ranks of labels + xnull: boolean + If true nulls are excluded. i.e. -1 values in the labels are + passed through + Returns + ------- + An array of type int64 where two elements are equal if their corresponding + labels are equal at all location. + """ + def _int64_cut_off(shape): + acc = long(1) + for i, mul in enumerate(shape): + acc *= long(mul) + if not acc < _INT64_MAX: + return i + return len(shape) + + def loop(labels, shape): + # how many levels can be done without overflow: + nlev = _int64_cut_off(shape) + + # compute flat ids for the first `nlev` levels + stride = np.prod(shape[1:nlev], dtype='i8') + out = stride * labels[0].astype('i8', subok=False, copy=False) + + for i in range(1, nlev): + if shape[i] == 0: + stride = 0 + else: + stride //= shape[i] + out += labels[i] * stride + + if xnull: # exclude nulls + mask = labels[0] == -1 + for lab in labels[1:nlev]: + mask |= lab == -1 + out[mask] = -1 + + if nlev == len(shape): # all levels done! + return out + + # compress what has been done so far in order to avoid overflow + # to retain lexical ranks, obs_ids should be sorted + comp_ids, obs_ids = compress_group_index(out, sort=sort) + + labels = [comp_ids] + labels[nlev:] + shape = [len(obs_ids)] + shape[nlev:] + + return loop(labels, shape) + + def maybe_lift(lab, size): # pormote nan values + return (lab + 1, size + 1) if (lab == -1).any() else (lab, size) + + labels = map(_ensure_int64, labels) + if not xnull: + labels, shape = map(list, zip(*map(maybe_lift, labels, shape))) + + return loop(list(labels), list(shape)) + + +def is_int64_overflow_possible(shape): + the_prod = long(1) + for x in shape: + the_prod *= long(x) + + return the_prod >= _INT64_MAX + + +def decons_group_index(comp_labels, shape): + # reconstruct labels + if is_int64_overflow_possible(shape): + # at some point group indices are factorized, + # and may not be deconstructed here! wrong path! + raise ValueError('cannot deconstruct factorized group indices!') + + label_list = [] + factor = 1 + y = 0 + x = comp_labels + for i in reversed(range(len(shape))): + labels = (x - y) % (factor * shape[i]) // factor + np.putmask(labels, comp_labels < 0, -1) + label_list.append(labels) + y = labels * factor + factor *= shape[i] + return label_list[::-1] + + +def decons_obs_group_ids(comp_ids, obs_ids, shape, labels, xnull): + """ + reconstruct labels from observed group ids + + Parameters + ---------- + xnull: boolean, + if nulls are excluded; i.e. -1 labels are passed through + """ + from pandas.hashtable import unique_label_indices + + if not xnull: + lift = np.fromiter(((a == -1).any() for a in labels), dtype='i8') + shape = np.asarray(shape, dtype='i8') + lift + + if not is_int64_overflow_possible(shape): + # obs ids are deconstructable! take the fast route! + out = decons_group_index(obs_ids, shape) + return out if xnull or not lift.any() \ + else [x - y for x, y in zip(out, lift)] + + i = unique_label_indices(comp_ids) + i8copy = lambda a: a.astype('i8', subok=False, copy=True) + return [i8copy(lab[i]) for lab in labels] + + +def indexer_from_factorized(labels, shape, compress=True): + ids = get_group_index(labels, shape, sort=True, xnull=False) + + if not compress: + ngroups = (ids.size and ids.max()) + 1 + else: + ids, obs = compress_group_index(ids, sort=True) + ngroups = len(obs) + + return get_group_index_sorter(ids, ngroups) + + +def lexsort_indexer(keys, orders=None, na_position='last'): + labels = [] + shape = [] + if isinstance(orders, bool): + orders = [orders] * len(keys) + elif orders is None: + orders = [True] * len(keys) + + for key, order in zip(keys, orders): + + # we are already a Categorical + if is_categorical_dtype(key): + c = key + + # create the Categorical + else: + c = Categorical(key, ordered=True) + + if na_position not in ['last', 'first']: + raise ValueError('invalid na_position: {!r}'.format(na_position)) + + n = len(c.categories) + codes = c.codes.copy() + + mask = (c.codes == -1) + if order: # ascending + if na_position == 'last': + codes = np.where(mask, n, codes) + elif na_position == 'first': + codes += 1 + else: # not order means descending + if na_position == 'last': + codes = np.where(mask, n, n - codes - 1) + elif na_position == 'first': + codes = np.where(mask, 0, n - codes) + if mask.any(): + n += 1 + + shape.append(n) + labels.append(codes) + + return indexer_from_factorized(labels, shape) + + +def nargsort(items, kind='quicksort', ascending=True, na_position='last'): + """ + This is intended to be a drop-in replacement for np.argsort which + handles NaNs. It adds ascending and na_position parameters. + GH #6399, #5231 + """ + + # specially handle Categorical + if is_categorical_dtype(items): + return items.argsort(ascending=ascending) + + items = np.asanyarray(items) + idx = np.arange(len(items)) + mask = isnull(items) + non_nans = items[~mask] + non_nan_idx = idx[~mask] + nan_idx = np.nonzero(mask)[0] + if not ascending: + non_nans = non_nans[::-1] + non_nan_idx = non_nan_idx[::-1] + indexer = non_nan_idx[non_nans.argsort(kind=kind)] + if not ascending: + indexer = indexer[::-1] + # Finally, place the NaNs at the end or the beginning according to + # na_position + if na_position == 'last': + indexer = np.concatenate([indexer, nan_idx]) + elif na_position == 'first': + indexer = np.concatenate([nan_idx, indexer]) + else: + raise ValueError('invalid na_position: {!r}'.format(na_position)) + return indexer + + +class _KeyMapper(object): + + """ + Ease my suffering. Map compressed group id -> key tuple + """ + + def __init__(self, comp_ids, ngroups, levels, labels): + self.levels = levels + self.labels = labels + self.comp_ids = comp_ids.astype(np.int64) + + self.k = len(labels) + self.tables = [_hash.Int64HashTable(ngroups) for _ in range(self.k)] + + self._populate_tables() + + def _populate_tables(self): + for labs, table in zip(self.labels, self.tables): + table.map(self.comp_ids, labs.astype(np.int64)) + + def get_key(self, comp_id): + return tuple(level[table.get_item(comp_id)] + for table, level in zip(self.tables, self.levels)) + + +def get_flattened_iterator(comp_ids, ngroups, levels, labels): + # provide "flattened" iterator for multi-group setting + mapper = _KeyMapper(comp_ids, ngroups, levels, labels) + return [mapper.get_key(i) for i in range(ngroups)] + + +def get_indexer_dict(label_list, keys): + """ return a diction of {labels} -> {indexers} """ + shape = list(map(len, keys)) + + group_index = get_group_index(label_list, shape, sort=True, xnull=True) + ngroups = ((group_index.size and group_index.max()) + 1) \ + if is_int64_overflow_possible(shape) \ + else np.prod(shape, dtype='i8') + + sorter = get_group_index_sorter(group_index, ngroups) + + sorted_labels = [lab.take(sorter) for lab in label_list] + group_index = group_index.take(sorter) + + return lib.indices_fast(sorter, group_index, keys, sorted_labels) + + +# ---------------------------------------------------------------------- +# sorting levels...cleverly? + +def get_group_index_sorter(group_index, ngroups): + """ + _algos.groupsort_indexer implements `counting sort` and it is at least + O(ngroups), where + ngroups = prod(shape) + shape = map(len, keys) + that is, linear in the number of combinations (cartesian product) of unique + values of groupby keys. This can be huge when doing multi-key groupby. + np.argsort(kind='mergesort') is O(count x log(count)) where count is the + length of the data-frame; + Both algorithms are `stable` sort and that is necessary for correctness of + groupby operations. e.g. consider: + df.groupby(key)[col].transform('first') + """ + count = len(group_index) + alpha = 0.0 # taking complexities literally; there may be + beta = 1.0 # some room for fine-tuning these parameters + do_groupsort = (count > 0 and ((alpha + beta * ngroups) < + (count * np.log(count)))) + if do_groupsort: + sorter, _ = _algos.groupsort_indexer(_ensure_int64(group_index), + ngroups) + return _ensure_platform_int(sorter) + else: + return group_index.argsort(kind='mergesort') + + +def compress_group_index(group_index, sort=True): + """ + Group_index is offsets into cartesian product of all possible labels. This + space can be huge, so this function compresses it, by computing offsets + (comp_ids) into the list of unique labels (obs_group_ids). + """ + + size_hint = min(len(group_index), _hash._SIZE_HINT_LIMIT) + table = _hash.Int64HashTable(size_hint) + + group_index = _ensure_int64(group_index) + + # note, group labels come out ascending (ie, 1,2,3 etc) + comp_ids, obs_group_ids = table.get_labels_groupby(group_index) + + if sort and len(obs_group_ids) > 0: + obs_group_ids, comp_ids = _reorder_by_uniques(obs_group_ids, comp_ids) + + return comp_ids, obs_group_ids + + +def _reorder_by_uniques(uniques, labels): + # sorter is index where elements ought to go + sorter = uniques.argsort() + + # reverse_indexer is where elements came from + reverse_indexer = np.empty(len(sorter), dtype=np.int64) + reverse_indexer.put(sorter, np.arange(len(sorter))) + + mask = labels < 0 + + # move labels to right locations (ie, unsort ascending labels) + labels = algos.take_nd(reverse_indexer, labels, allow_fill=False) + np.putmask(labels, mask, -1) + + # sort observed ids + uniques = algos.take_nd(uniques, sorter, allow_fill=False) + + return uniques, labels diff --git a/pandas/indexes/multi.py b/pandas/indexes/multi.py index 9ab07d87fd13b..653ba1fee5691 100644 --- a/pandas/indexes/multi.py +++ b/pandas/indexes/multi.py @@ -663,7 +663,7 @@ def is_unique(self): False: 'first'}) @Appender(base._shared_docs['duplicated'] % ibase._index_doc_kwargs) def duplicated(self, keep='first'): - from pandas.core.groupby import get_group_index + from pandas.core.sorting import get_group_index from pandas.hashtable import duplicated_int64 shape = map(len, self.levels) @@ -1405,7 +1405,7 @@ def sortlevel(self, level=0, ascending=True, sort_remaining=True): Indices of output values in original index """ - from pandas.core.groupby import _indexer_from_factorized + from pandas.core.sorting import indexer_from_factorized if isinstance(level, (compat.string_types, int)): level = [level] @@ -1417,8 +1417,8 @@ def sortlevel(self, level=0, ascending=True, sort_remaining=True): if not len(level) == len(ascending): raise ValueError("level must have same length as ascending") - from pandas.core.groupby import _lexsort_indexer - indexer = _lexsort_indexer(self.labels, orders=ascending) + from pandas.core.sorting import lexsort_indexer + indexer = lexsort_indexer(self.labels, orders=ascending) # level ordering else: @@ -1436,8 +1436,8 @@ def sortlevel(self, level=0, ascending=True, sort_remaining=True): else: sortorder = level[0] - indexer = _indexer_from_factorized(primary, primshp, - compress=False) + indexer = indexer_from_factorized(primary, primshp, + compress=False) if not ascending: indexer = indexer[::-1] diff --git a/pandas/tests/groupby/test_filters.py b/pandas/tests/groupby/test_filters.py index 1640858802047..46ddb5a5318fb 100644 --- a/pandas/tests/groupby/test_filters.py +++ b/pandas/tests/groupby/test_filters.py @@ -616,24 +616,3 @@ def _check_groupby(df, result, keys, field, f=lambda x: x.sum()): expected = f(df.groupby(tups)[field]) for k, v in compat.iteritems(expected): assert (result[k] == v) - - -def test_decons(): - from pandas.core.groupby import decons_group_index, get_group_index - - def testit(label_list, shape): - group_index = get_group_index(label_list, shape, sort=True, xnull=True) - label_list2 = decons_group_index(group_index, shape) - - for a, b in zip(label_list, label_list2): - assert (np.array_equal(a, b)) - - shape = (4, 5, 6) - label_list = [np.tile([0, 1, 2, 3, 0, 1, 2, 3], 100), np.tile( - [0, 2, 4, 3, 0, 1, 2, 3], 100), np.tile( - [5, 1, 0, 2, 3, 0, 5, 4], 100)] - testit(label_list, shape) - - shape = (10000, 10000) - label_list = [np.tile(np.arange(10000), 5), np.tile(np.arange(10000), 5)] - testit(label_list, shape) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index d625fa07d932c..3a6a9eaaa8e72 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -1510,59 +1510,6 @@ def check_nunique(df, keys, as_index=True): check_nunique(frame, ['jim'], as_index=False) check_nunique(frame, ['jim', 'joe'], as_index=False) - def test_series_groupby_value_counts(self): - from itertools import product - np.random.seed(1234) - - def rebuild_index(df): - arr = list(map(df.index.get_level_values, range(df.index.nlevels))) - df.index = MultiIndex.from_arrays(arr, names=df.index.names) - return df - - def check_value_counts(df, keys, bins): - for isort, normalize, sort, ascending, dropna \ - in product((False, True), repeat=5): - - kwargs = dict(normalize=normalize, sort=sort, - ascending=ascending, dropna=dropna, bins=bins) - - gr = df.groupby(keys, sort=isort) - left = gr['3rd'].value_counts(**kwargs) - - gr = df.groupby(keys, sort=isort) - right = gr['3rd'].apply(Series.value_counts, **kwargs) - right.index.names = right.index.names[:-1] + ['3rd'] - - # have to sort on index because of unstable sort on values - left, right = map(rebuild_index, (left, right)) # xref GH9212 - assert_series_equal(left.sort_index(), right.sort_index()) - - def loop(df): - bins = None, np.arange(0, max(5, df['3rd'].max()) + 1, 2) - keys = '1st', '2nd', ('1st', '2nd') - for k, b in product(keys, bins): - check_value_counts(df, k, b) - - days = date_range('2015-08-24', periods=10) - - for n, m in product((100, 1000), (5, 20)): - frame = DataFrame({ - '1st': np.random.choice( - list('abcd'), n), - '2nd': np.random.choice(days, n), - '3rd': np.random.randint(1, m + 1, n) - }) - - loop(frame) - - frame.loc[1::11, '1st'] = nan - frame.loc[3::17, '2nd'] = nan - frame.loc[7::19, '3rd'] = nan - frame.loc[8::19, '3rd'] = nan - frame.loc[9::19, '3rd'] = nan - - loop(frame) - def test_multiindex_passthru(self): # GH 7997 @@ -3071,22 +3018,6 @@ def test_panel_groupby(self): agged = grouped.mean() self.assert_index_equal(agged.minor_axis, Index([0, 1])) - def test_numpy_groupby(self): - from pandas.core.groupby import numpy_groupby - - data = np.random.randn(100, 100) - labels = np.random.randint(0, 10, size=100) - - df = DataFrame(data) - - result = df.groupby(labels).sum().values - expected = numpy_groupby(data, labels) - assert_almost_equal(result, expected) - - result = df.groupby(labels, axis=1).sum().values - expected = numpy_groupby(data, labels, axis=1) - assert_almost_equal(result, expected) - def test_groupby_2d_malformed(self): d = DataFrame(index=lrange(2)) d['group'] = ['g1', 'g2'] @@ -3112,85 +3043,6 @@ def test_int32_overflow(self): right = df.groupby(['D', 'C', 'B', 'A']).sum() self.assertEqual(len(left), len(right)) - def test_int64_overflow(self): - from pandas.core.groupby import _int64_overflow_possible - - B = np.concatenate((np.arange(1000), np.arange(1000), np.arange(500))) - A = np.arange(2500) - df = DataFrame({'A': A, - 'B': B, - 'C': A, - 'D': B, - 'E': A, - 'F': B, - 'G': A, - 'H': B, - 'values': np.random.randn(2500)}) - - lg = df.groupby(['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H']) - rg = df.groupby(['H', 'G', 'F', 'E', 'D', 'C', 'B', 'A']) - - left = lg.sum()['values'] - right = rg.sum()['values'] - - exp_index, _ = left.index.sortlevel() - self.assert_index_equal(left.index, exp_index) - - exp_index, _ = right.index.sortlevel(0) - self.assert_index_equal(right.index, exp_index) - - tups = list(map(tuple, df[['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H' - ]].values)) - tups = com._asarray_tuplesafe(tups) - - expected = df.groupby(tups).sum()['values'] - - for k, v in compat.iteritems(expected): - self.assertEqual(left[k], right[k[::-1]]) - self.assertEqual(left[k], v) - self.assertEqual(len(left), len(right)) - - # GH9096 - values = range(55109) - data = pd.DataFrame.from_dict({'a': values, - 'b': values, - 'c': values, - 'd': values}) - grouped = data.groupby(['a', 'b', 'c', 'd']) - self.assertEqual(len(grouped), len(values)) - - arr = np.random.randint(-1 << 12, 1 << 12, (1 << 15, 5)) - i = np.random.choice(len(arr), len(arr) * 4) - arr = np.vstack((arr, arr[i])) # add sume duplicate rows - - i = np.random.permutation(len(arr)) - arr = arr[i] # shuffle rows - - df = DataFrame(arr, columns=list('abcde')) - df['jim'], df['joe'] = np.random.randn(2, len(df)) * 10 - gr = df.groupby(list('abcde')) - - # verify this is testing what it is supposed to test! - self.assertTrue(_int64_overflow_possible(gr.grouper.shape)) - - # mannually compute groupings - jim, joe = defaultdict(list), defaultdict(list) - for key, a, b in zip(map(tuple, arr), df['jim'], df['joe']): - jim[key].append(a) - joe[key].append(b) - - self.assertEqual(len(gr), len(jim)) - mi = MultiIndex.from_tuples(jim.keys(), names=list('abcde')) - - def aggr(func): - f = lambda a: np.fromiter(map(func, a), dtype='f8') - arr = np.vstack((f(jim.values()), f(joe.values()))).T - res = DataFrame(arr, columns=['jim', 'joe'], index=mi) - return res.sort_index() - - assert_frame_equal(gr.mean(), aggr(np.mean)) - assert_frame_equal(gr.median(), aggr(np.median)) - def test_groupby_sort_multi(self): df = DataFrame({'a': ['foo', 'bar', 'baz'], 'b': [3, 2, 1], @@ -4451,24 +4303,3 @@ def _check_groupby(df, result, keys, field, f=lambda x: x.sum()): expected = f(df.groupby(tups)[field]) for k, v in compat.iteritems(expected): assert (result[k] == v) - - -def test_decons(): - from pandas.core.groupby import decons_group_index, get_group_index - - def testit(label_list, shape): - group_index = get_group_index(label_list, shape, sort=True, xnull=True) - label_list2 = decons_group_index(group_index, shape) - - for a, b in zip(label_list, label_list2): - assert (np.array_equal(a, b)) - - shape = (4, 5, 6) - label_list = [np.tile([0, 1, 2, 3, 0, 1, 2, 3], 100), np.tile( - [0, 2, 4, 3, 0, 1, 2, 3], 100), np.tile( - [5, 1, 0, 2, 3, 0, 5, 4], 100)] - testit(label_list, shape) - - shape = (10000, 10000) - label_list = [np.tile(np.arange(10000), 5), np.tile(np.arange(10000), 5)] - testit(label_list, shape) diff --git a/pandas/tests/groupby/test_misc.py b/pandas/tests/groupby/test_misc.py deleted file mode 100644 index 9395304385681..0000000000000 --- a/pandas/tests/groupby/test_misc.py +++ /dev/null @@ -1,101 +0,0 @@ -""" misc non-groupby routines, as they are defined in core/groupby.py """ - -import pytest -import numpy as np -from numpy import nan -from pandas.util import testing as tm -from pandas.core.groupby import _nargsort, _lexsort_indexer - - -class TestSorting(tm.TestCase): - - def test_lexsort_indexer(self): - keys = [[nan] * 5 + list(range(100)) + [nan] * 5] - # orders=True, na_position='last' - result = _lexsort_indexer(keys, orders=True, na_position='last') - exp = list(range(5, 105)) + list(range(5)) + list(range(105, 110)) - tm.assert_numpy_array_equal(result, np.array(exp, dtype=np.intp)) - - # orders=True, na_position='first' - result = _lexsort_indexer(keys, orders=True, na_position='first') - exp = list(range(5)) + list(range(105, 110)) + list(range(5, 105)) - tm.assert_numpy_array_equal(result, np.array(exp, dtype=np.intp)) - - # orders=False, na_position='last' - result = _lexsort_indexer(keys, orders=False, na_position='last') - exp = list(range(104, 4, -1)) + list(range(5)) + list(range(105, 110)) - tm.assert_numpy_array_equal(result, np.array(exp, dtype=np.intp)) - - # orders=False, na_position='first' - result = _lexsort_indexer(keys, orders=False, na_position='first') - exp = list(range(5)) + list(range(105, 110)) + list(range(104, 4, -1)) - tm.assert_numpy_array_equal(result, np.array(exp, dtype=np.intp)) - - def test_nargsort(self): - # np.argsort(items) places NaNs last - items = [nan] * 5 + list(range(100)) + [nan] * 5 - # np.argsort(items2) may not place NaNs first - items2 = np.array(items, dtype='O') - - try: - # GH 2785; due to a regression in NumPy1.6.2 - np.argsort(np.array([[1, 2], [1, 3], [1, 2]], dtype='i')) - np.argsort(items2, kind='mergesort') - except TypeError: - pytest.skip('requested sort not available for type') - - # mergesort is the most difficult to get right because we want it to be - # stable. - - # According to numpy/core/tests/test_multiarray, """The number of - # sorted items must be greater than ~50 to check the actual algorithm - # because quick and merge sort fall over to insertion sort for small - # arrays.""" - - # mergesort, ascending=True, na_position='last' - result = _nargsort(items, kind='mergesort', ascending=True, - na_position='last') - exp = list(range(5, 105)) + list(range(5)) + list(range(105, 110)) - tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) - - # mergesort, ascending=True, na_position='first' - result = _nargsort(items, kind='mergesort', ascending=True, - na_position='first') - exp = list(range(5)) + list(range(105, 110)) + list(range(5, 105)) - tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) - - # mergesort, ascending=False, na_position='last' - result = _nargsort(items, kind='mergesort', ascending=False, - na_position='last') - exp = list(range(104, 4, -1)) + list(range(5)) + list(range(105, 110)) - tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) - - # mergesort, ascending=False, na_position='first' - result = _nargsort(items, kind='mergesort', ascending=False, - na_position='first') - exp = list(range(5)) + list(range(105, 110)) + list(range(104, 4, -1)) - tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) - - # mergesort, ascending=True, na_position='last' - result = _nargsort(items2, kind='mergesort', ascending=True, - na_position='last') - exp = list(range(5, 105)) + list(range(5)) + list(range(105, 110)) - tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) - - # mergesort, ascending=True, na_position='first' - result = _nargsort(items2, kind='mergesort', ascending=True, - na_position='first') - exp = list(range(5)) + list(range(105, 110)) + list(range(5, 105)) - tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) - - # mergesort, ascending=False, na_position='last' - result = _nargsort(items2, kind='mergesort', ascending=False, - na_position='last') - exp = list(range(104, 4, -1)) + list(range(5)) + list(range(105, 110)) - tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) - - # mergesort, ascending=False, na_position='first' - result = _nargsort(items2, kind='mergesort', ascending=False, - na_position='first') - exp = list(range(5)) + list(range(105, 110)) + list(range(104, 4, -1)) - tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) diff --git a/pandas/tests/groupby/test_value_counts.py b/pandas/tests/groupby/test_value_counts.py new file mode 100644 index 0000000000000..801d0da070112 --- /dev/null +++ b/pandas/tests/groupby/test_value_counts.py @@ -0,0 +1,60 @@ +import pytest + +from itertools import product +import numpy as np + +from pandas.util import testing as tm +from pandas import MultiIndex, DataFrame, Series, date_range + + +@pytest.mark.parametrize("n,m", product((100, 1000), (5, 20))) +def test_series_groupby_value_counts(n, m): + np.random.seed(1234) + + def rebuild_index(df): + arr = list(map(df.index.get_level_values, range(df.index.nlevels))) + df.index = MultiIndex.from_arrays(arr, names=df.index.names) + return df + + def check_value_counts(df, keys, bins): + for isort, normalize, sort, ascending, dropna \ + in product((False, True), repeat=5): + + kwargs = dict(normalize=normalize, sort=sort, + ascending=ascending, dropna=dropna, bins=bins) + + gr = df.groupby(keys, sort=isort) + left = gr['3rd'].value_counts(**kwargs) + + gr = df.groupby(keys, sort=isort) + right = gr['3rd'].apply(Series.value_counts, **kwargs) + right.index.names = right.index.names[:-1] + ['3rd'] + + # have to sort on index because of unstable sort on values + left, right = map(rebuild_index, (left, right)) # xref GH9212 + tm.assert_series_equal(left.sort_index(), right.sort_index()) + + def loop(df): + bins = None, np.arange(0, max(5, df['3rd'].max()) + 1, 2) + keys = '1st', '2nd', ('1st', '2nd') + for k, b in product(keys, bins): + check_value_counts(df, k, b) + + days = date_range('2015-08-24', periods=10) + + frame = DataFrame({ + '1st': np.random.choice( + list('abcd'), n), + '2nd': np.random.choice(days, n), + '3rd': np.random.randint(1, m + 1, n) + }) + + loop(frame) + + frame.loc[1::11, '1st'] = np.nan + frame.loc[3::17, '2nd'] = np.nan + frame.loc[7::19, '3rd'] = np.nan + frame.loc[8::19, '3rd'] = np.nan + frame.loc[9::19, '3rd'] = np.nan + + loop(frame) diff --git a/pandas/tests/test_sorting.py b/pandas/tests/test_sorting.py new file mode 100644 index 0000000000000..99361695b2371 --- /dev/null +++ b/pandas/tests/test_sorting.py @@ -0,0 +1,339 @@ +import pytest +from itertools import product +from collections import defaultdict + +import numpy as np +from numpy import nan +import pandas as pd +from pandas.core import common as com +from pandas import DataFrame, MultiIndex, merge, concat, Series, compat +from pandas.util import testing as tm +from pandas.util.testing import assert_frame_equal, assert_series_equal +from pandas.core.sorting import (is_int64_overflow_possible, + decons_group_index, + get_group_index, + nargsort, + lexsort_indexer) + + +class TestSorting(tm.TestCase): + + def test_int64_overflow(self): + + B = np.concatenate((np.arange(1000), np.arange(1000), np.arange(500))) + A = np.arange(2500) + df = DataFrame({'A': A, + 'B': B, + 'C': A, + 'D': B, + 'E': A, + 'F': B, + 'G': A, + 'H': B, + 'values': np.random.randn(2500)}) + + lg = df.groupby(['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H']) + rg = df.groupby(['H', 'G', 'F', 'E', 'D', 'C', 'B', 'A']) + + left = lg.sum()['values'] + right = rg.sum()['values'] + + exp_index, _ = left.index.sortlevel() + self.assert_index_equal(left.index, exp_index) + + exp_index, _ = right.index.sortlevel(0) + self.assert_index_equal(right.index, exp_index) + + tups = list(map(tuple, df[['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H' + ]].values)) + tups = com._asarray_tuplesafe(tups) + + expected = df.groupby(tups).sum()['values'] + + for k, v in compat.iteritems(expected): + self.assertEqual(left[k], right[k[::-1]]) + self.assertEqual(left[k], v) + self.assertEqual(len(left), len(right)) + + # GH9096 + values = range(55109) + data = pd.DataFrame.from_dict({'a': values, + 'b': values, + 'c': values, + 'd': values}) + grouped = data.groupby(['a', 'b', 'c', 'd']) + self.assertEqual(len(grouped), len(values)) + + arr = np.random.randint(-1 << 12, 1 << 12, (1 << 15, 5)) + i = np.random.choice(len(arr), len(arr) * 4) + arr = np.vstack((arr, arr[i])) # add sume duplicate rows + + i = np.random.permutation(len(arr)) + arr = arr[i] # shuffle rows + + df = DataFrame(arr, columns=list('abcde')) + df['jim'], df['joe'] = np.random.randn(2, len(df)) * 10 + gr = df.groupby(list('abcde')) + + # verify this is testing what it is supposed to test! + self.assertTrue(is_int64_overflow_possible(gr.grouper.shape)) + + # mannually compute groupings + jim, joe = defaultdict(list), defaultdict(list) + for key, a, b in zip(map(tuple, arr), df['jim'], df['joe']): + jim[key].append(a) + joe[key].append(b) + + self.assertEqual(len(gr), len(jim)) + mi = MultiIndex.from_tuples(jim.keys(), names=list('abcde')) + + def aggr(func): + f = lambda a: np.fromiter(map(func, a), dtype='f8') + arr = np.vstack((f(jim.values()), f(joe.values()))).T + res = DataFrame(arr, columns=['jim', 'joe'], index=mi) + return res.sort_index() + + assert_frame_equal(gr.mean(), aggr(np.mean)) + assert_frame_equal(gr.median(), aggr(np.median)) + + def test_lexsort_indexer(self): + keys = [[nan] * 5 + list(range(100)) + [nan] * 5] + # orders=True, na_position='last' + result = lexsort_indexer(keys, orders=True, na_position='last') + exp = list(range(5, 105)) + list(range(5)) + list(range(105, 110)) + tm.assert_numpy_array_equal(result, np.array(exp, dtype=np.intp)) + + # orders=True, na_position='first' + result = lexsort_indexer(keys, orders=True, na_position='first') + exp = list(range(5)) + list(range(105, 110)) + list(range(5, 105)) + tm.assert_numpy_array_equal(result, np.array(exp, dtype=np.intp)) + + # orders=False, na_position='last' + result = lexsort_indexer(keys, orders=False, na_position='last') + exp = list(range(104, 4, -1)) + list(range(5)) + list(range(105, 110)) + tm.assert_numpy_array_equal(result, np.array(exp, dtype=np.intp)) + + # orders=False, na_position='first' + result = lexsort_indexer(keys, orders=False, na_position='first') + exp = list(range(5)) + list(range(105, 110)) + list(range(104, 4, -1)) + tm.assert_numpy_array_equal(result, np.array(exp, dtype=np.intp)) + + def test_nargsort(self): + # np.argsort(items) places NaNs last + items = [nan] * 5 + list(range(100)) + [nan] * 5 + # np.argsort(items2) may not place NaNs first + items2 = np.array(items, dtype='O') + + try: + # GH 2785; due to a regression in NumPy1.6.2 + np.argsort(np.array([[1, 2], [1, 3], [1, 2]], dtype='i')) + np.argsort(items2, kind='mergesort') + except TypeError: + pytest.skip('requested sort not available for type') + + # mergesort is the most difficult to get right because we want it to be + # stable. + + # According to numpy/core/tests/test_multiarray, """The number of + # sorted items must be greater than ~50 to check the actual algorithm + # because quick and merge sort fall over to insertion sort for small + # arrays.""" + + # mergesort, ascending=True, na_position='last' + result = nargsort(items, kind='mergesort', ascending=True, + na_position='last') + exp = list(range(5, 105)) + list(range(5)) + list(range(105, 110)) + tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) + + # mergesort, ascending=True, na_position='first' + result = nargsort(items, kind='mergesort', ascending=True, + na_position='first') + exp = list(range(5)) + list(range(105, 110)) + list(range(5, 105)) + tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) + + # mergesort, ascending=False, na_position='last' + result = nargsort(items, kind='mergesort', ascending=False, + na_position='last') + exp = list(range(104, 4, -1)) + list(range(5)) + list(range(105, 110)) + tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) + + # mergesort, ascending=False, na_position='first' + result = nargsort(items, kind='mergesort', ascending=False, + na_position='first') + exp = list(range(5)) + list(range(105, 110)) + list(range(104, 4, -1)) + tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) + + # mergesort, ascending=True, na_position='last' + result = nargsort(items2, kind='mergesort', ascending=True, + na_position='last') + exp = list(range(5, 105)) + list(range(5)) + list(range(105, 110)) + tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) + + # mergesort, ascending=True, na_position='first' + result = nargsort(items2, kind='mergesort', ascending=True, + na_position='first') + exp = list(range(5)) + list(range(105, 110)) + list(range(5, 105)) + tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) + + # mergesort, ascending=False, na_position='last' + result = nargsort(items2, kind='mergesort', ascending=False, + na_position='last') + exp = list(range(104, 4, -1)) + list(range(5)) + list(range(105, 110)) + tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) + + # mergesort, ascending=False, na_position='first' + result = nargsort(items2, kind='mergesort', ascending=False, + na_position='first') + exp = list(range(5)) + list(range(105, 110)) + list(range(104, 4, -1)) + tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) + + +class TestMerge(tm.TestCase): + + @pytest.mark.slow + def test_int64_overflow_issues(self): + + # #2690, combinatorial explosion + df1 = DataFrame(np.random.randn(1000, 7), + columns=list('ABCDEF') + ['G1']) + df2 = DataFrame(np.random.randn(1000, 7), + columns=list('ABCDEF') + ['G2']) + + # it works! + result = merge(df1, df2, how='outer') + self.assertTrue(len(result) == 2000) + + low, high, n = -1 << 10, 1 << 10, 1 << 20 + left = DataFrame(np.random.randint(low, high, (n, 7)), + columns=list('ABCDEFG')) + left['left'] = left.sum(axis=1) + + # one-2-one match + i = np.random.permutation(len(left)) + right = left.iloc[i].copy() + right.columns = right.columns[:-1].tolist() + ['right'] + right.index = np.arange(len(right)) + right['right'] *= -1 + + out = merge(left, right, how='outer') + self.assertEqual(len(out), len(left)) + assert_series_equal(out['left'], - out['right'], check_names=False) + result = out.iloc[:, :-2].sum(axis=1) + assert_series_equal(out['left'], result, check_names=False) + self.assertTrue(result.name is None) + + out.sort_values(out.columns.tolist(), inplace=True) + out.index = np.arange(len(out)) + for how in ['left', 'right', 'outer', 'inner']: + assert_frame_equal(out, merge(left, right, how=how, sort=True)) + + # check that left merge w/ sort=False maintains left frame order + out = merge(left, right, how='left', sort=False) + assert_frame_equal(left, out[left.columns.tolist()]) + + out = merge(right, left, how='left', sort=False) + assert_frame_equal(right, out[right.columns.tolist()]) + + # one-2-many/none match + n = 1 << 11 + left = DataFrame(np.random.randint(low, high, (n, 7)).astype('int64'), + columns=list('ABCDEFG')) + + # confirm that this is checking what it is supposed to check + shape = left.apply(Series.nunique).values + self.assertTrue(is_int64_overflow_possible(shape)) + + # add duplicates to left frame + left = concat([left, left], ignore_index=True) + + right = DataFrame(np.random.randint(low, high, (n // 2, 7)) + .astype('int64'), + columns=list('ABCDEFG')) + + # add duplicates & overlap with left to the right frame + i = np.random.choice(len(left), n) + right = concat([right, right, left.iloc[i]], ignore_index=True) + + left['left'] = np.random.randn(len(left)) + right['right'] = np.random.randn(len(right)) + + # shuffle left & right frames + i = np.random.permutation(len(left)) + left = left.iloc[i].copy() + left.index = np.arange(len(left)) + + i = np.random.permutation(len(right)) + right = right.iloc[i].copy() + right.index = np.arange(len(right)) + + # manually compute outer merge + ldict, rdict = defaultdict(list), defaultdict(list) + + for idx, row in left.set_index(list('ABCDEFG')).iterrows(): + ldict[idx].append(row['left']) + + for idx, row in right.set_index(list('ABCDEFG')).iterrows(): + rdict[idx].append(row['right']) + + vals = [] + for k, lval in ldict.items(): + rval = rdict.get(k, [np.nan]) + for lv, rv in product(lval, rval): + vals.append(k + tuple([lv, rv])) + + for k, rval in rdict.items(): + if k not in ldict: + for rv in rval: + vals.append(k + tuple([np.nan, rv])) + + def align(df): + df = df.sort_values(df.columns.tolist()) + df.index = np.arange(len(df)) + return df + + def verify_order(df): + kcols = list('ABCDEFG') + assert_frame_equal(df[kcols].copy(), + df[kcols].sort_values(kcols, kind='mergesort')) + + out = DataFrame(vals, columns=list('ABCDEFG') + ['left', 'right']) + out = align(out) + + jmask = {'left': out['left'].notnull(), + 'right': out['right'].notnull(), + 'inner': out['left'].notnull() & out['right'].notnull(), + 'outer': np.ones(len(out), dtype='bool')} + + for how in 'left', 'right', 'outer', 'inner': + mask = jmask[how] + frame = align(out[mask].copy()) + self.assertTrue(mask.all() ^ mask.any() or how == 'outer') + + for sort in [False, True]: + res = merge(left, right, how=how, sort=sort) + if sort: + verify_order(res) + + # as in GH9092 dtypes break with outer/right join + assert_frame_equal(frame, align(res), + check_dtype=how not in ('right', 'outer')) + + +def test_decons(): + + def testit(label_list, shape): + group_index = get_group_index(label_list, shape, sort=True, xnull=True) + label_list2 = decons_group_index(group_index, shape) + + for a, b in zip(label_list, label_list2): + assert (np.array_equal(a, b)) + + shape = (4, 5, 6) + label_list = [np.tile([0, 1, 2, 3, 0, 1, 2, 3], 100), np.tile( + [0, 2, 4, 3, 0, 1, 2, 3], 100), np.tile( + [5, 1, 0, 2, 3, 0, 5, 4], 100)] + testit(label_list, shape) + + shape = (10000, 10000) + label_list = [np.tile(np.arange(10000), 5), np.tile(np.arange(10000), 5)] + testit(label_list, shape) diff --git a/pandas/tests/tools/test_merge.py b/pandas/tests/tools/test_merge.py index d66cd793ec0be..472d8674f9f8d 100644 --- a/pandas/tests/tools/test_merge.py +++ b/pandas/tests/tools/test_merge.py @@ -10,9 +10,7 @@ from pandas.compat import lrange, lzip from pandas.tools.concat import concat from pandas.tools.merge import merge, MergeError -from pandas.util.testing import (assert_frame_equal, - assert_series_equal, - slow) +from pandas.util.testing import assert_frame_equal, assert_series_equal from pandas import DataFrame, Index, MultiIndex, Series, Categorical import pandas.util.testing as tm @@ -1092,137 +1090,6 @@ def test_merge_na_keys(self): tm.assert_frame_equal(result, expected) - @slow - def test_int64_overflow_issues(self): - from itertools import product - from collections import defaultdict - from pandas.core.groupby import _int64_overflow_possible - - # #2690, combinatorial explosion - df1 = DataFrame(np.random.randn(1000, 7), - columns=list('ABCDEF') + ['G1']) - df2 = DataFrame(np.random.randn(1000, 7), - columns=list('ABCDEF') + ['G2']) - - # it works! - result = merge(df1, df2, how='outer') - self.assertTrue(len(result) == 2000) - - low, high, n = -1 << 10, 1 << 10, 1 << 20 - left = DataFrame(np.random.randint(low, high, (n, 7)), - columns=list('ABCDEFG')) - left['left'] = left.sum(axis=1) - - # one-2-one match - i = np.random.permutation(len(left)) - right = left.iloc[i].copy() - right.columns = right.columns[:-1].tolist() + ['right'] - right.index = np.arange(len(right)) - right['right'] *= -1 - - out = merge(left, right, how='outer') - self.assertEqual(len(out), len(left)) - assert_series_equal(out['left'], - out['right'], check_names=False) - result = out.iloc[:, :-2].sum(axis=1) - assert_series_equal(out['left'], result, check_names=False) - self.assertTrue(result.name is None) - - out.sort_values(out.columns.tolist(), inplace=True) - out.index = np.arange(len(out)) - for how in ['left', 'right', 'outer', 'inner']: - assert_frame_equal(out, merge(left, right, how=how, sort=True)) - - # check that left merge w/ sort=False maintains left frame order - out = merge(left, right, how='left', sort=False) - assert_frame_equal(left, out[left.columns.tolist()]) - - out = merge(right, left, how='left', sort=False) - assert_frame_equal(right, out[right.columns.tolist()]) - - # one-2-many/none match - n = 1 << 11 - left = DataFrame(np.random.randint(low, high, (n, 7)).astype('int64'), - columns=list('ABCDEFG')) - - # confirm that this is checking what it is supposed to check - shape = left.apply(Series.nunique).values - self.assertTrue(_int64_overflow_possible(shape)) - - # add duplicates to left frame - left = concat([left, left], ignore_index=True) - - right = DataFrame(np.random.randint(low, high, (n // 2, 7)) - .astype('int64'), - columns=list('ABCDEFG')) - - # add duplicates & overlap with left to the right frame - i = np.random.choice(len(left), n) - right = concat([right, right, left.iloc[i]], ignore_index=True) - - left['left'] = np.random.randn(len(left)) - right['right'] = np.random.randn(len(right)) - - # shuffle left & right frames - i = np.random.permutation(len(left)) - left = left.iloc[i].copy() - left.index = np.arange(len(left)) - - i = np.random.permutation(len(right)) - right = right.iloc[i].copy() - right.index = np.arange(len(right)) - - # manually compute outer merge - ldict, rdict = defaultdict(list), defaultdict(list) - - for idx, row in left.set_index(list('ABCDEFG')).iterrows(): - ldict[idx].append(row['left']) - - for idx, row in right.set_index(list('ABCDEFG')).iterrows(): - rdict[idx].append(row['right']) - - vals = [] - for k, lval in ldict.items(): - rval = rdict.get(k, [np.nan]) - for lv, rv in product(lval, rval): - vals.append(k + tuple([lv, rv])) - - for k, rval in rdict.items(): - if k not in ldict: - for rv in rval: - vals.append(k + tuple([np.nan, rv])) - - def align(df): - df = df.sort_values(df.columns.tolist()) - df.index = np.arange(len(df)) - return df - - def verify_order(df): - kcols = list('ABCDEFG') - assert_frame_equal(df[kcols].copy(), - df[kcols].sort_values(kcols, kind='mergesort')) - - out = DataFrame(vals, columns=list('ABCDEFG') + ['left', 'right']) - out = align(out) - - jmask = {'left': out['left'].notnull(), - 'right': out['right'].notnull(), - 'inner': out['left'].notnull() & out['right'].notnull(), - 'outer': np.ones(len(out), dtype='bool')} - - for how in 'left', 'right', 'outer', 'inner': - mask = jmask[how] - frame = align(out[mask].copy()) - self.assertTrue(mask.all() ^ mask.any() or how == 'outer') - - for sort in [False, True]: - res = merge(left, right, how=how, sort=sort) - if sort: - verify_order(res) - - # as in GH9092 dtypes break with outer/right join - assert_frame_equal(frame, align(res), - check_dtype=how not in ('right', 'outer')) - def test_join_multi_levels(self): # GH 3662 diff --git a/pandas/tools/merge.py b/pandas/tools/merge.py index d938c2eeacbef..e82e702cb6e55 100644 --- a/pandas/tools/merge.py +++ b/pandas/tools/merge.py @@ -34,6 +34,7 @@ concatenate_block_managers) from pandas.util.decorators import Appender, Substitution +from pandas.core.sorting import is_int64_overflow_possible import pandas.core.algorithms as algos import pandas.core.common as com @@ -1397,10 +1398,9 @@ def _sort_labels(uniques, left, right): def _get_join_keys(llab, rlab, shape, sort): - from pandas.core.groupby import _int64_overflow_possible # how many levels can be done without overflow - pred = lambda i: not _int64_overflow_possible(shape[:i]) + pred = lambda i: not is_int64_overflow_possible(shape[:i]) nlev = next(filter(pred, range(len(shape), 0, -1))) # get keys for the first `nlev` levels From 4b97db4caa94690691316df6303092f4954e7e6f Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 14 Feb 2017 19:57:51 -0500 Subject: [PATCH 055/933] TST: disable gbq tests again --- pandas/tests/io/test_gbq.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/pandas/tests/io/test_gbq.py b/pandas/tests/io/test_gbq.py index dfbf3ca69b111..0a76267054ee6 100644 --- a/pandas/tests/io/test_gbq.py +++ b/pandas/tests/io/test_gbq.py @@ -253,7 +253,7 @@ def test_generate_bq_schema_deprecated(): gbq.generate_bq_schema(df) -@pytest.mark.single +@pytest.mark.xfail(run=False, reason="intermittent failures") class TestGBQConnectorIntegrationWithLocalUserAccountAuth(tm.TestCase): def setUp(self): @@ -299,7 +299,7 @@ def test_get_application_default_credentials_returns_credentials(self): self.assertTrue(isinstance(credentials, GoogleCredentials)) -@pytest.mark.single +@pytest.mark.xfail(run=False, reason="intermittent failures") class TestGBQConnectorIntegrationWithServiceAccountKeyPath(tm.TestCase): def setUp(self): _setup_common() @@ -331,7 +331,7 @@ def test_should_be_able_to_get_results_from_query(self): self.assertTrue(pages is not None) -@pytest.mark.single +@pytest.mark.xfail(run=False, reason="intermittent failures") class TestGBQConnectorIntegrationWithServiceAccountKeyContents(tm.TestCase): def setUp(self): _setup_common() @@ -449,7 +449,7 @@ def test_read_gbq_with_corrupted_private_key_json_should_fail(self): private_key=re.sub('[a-z]', '9', _get_private_key_contents())) -@pytest.mark.single +@pytest.mark.xfail(run=False, reason="intermittent failures") class TestReadGBQIntegration(tm.TestCase): @classmethod @@ -503,7 +503,7 @@ def test_should_read_as_service_account_with_key_contents(self): tm.assert_frame_equal(df, DataFrame({'valid_string': ['PI']})) -@pytest.mark.single +@pytest.mark.xfail(run=False, reason="intermittent failures") class TestReadGBQIntegrationWithServiceAccountKeyPath(tm.TestCase): @classmethod @@ -906,7 +906,7 @@ def test_configuration_without_query(self): configuration=config) -@pytest.mark.single +@pytest.mark.xfail(run=False, reason="intermittent failures") class TestToGBQIntegrationWithServiceAccountKeyPath(tm.TestCase): # Changes to BigQuery table schema may take up to 2 minutes as of May 2015 # As a workaround to this issue, each test should use a unique table name. @@ -1219,7 +1219,7 @@ def test_dataset_does_not_exist(self): DATASET_ID + "_not_found"), 'Expected dataset not to exist') -@pytest.mark.single +@pytest.mark.xfail(run=False, reason="intermittent failures") class TestToGBQIntegrationWithLocalUserAccountAuth(tm.TestCase): # Changes to BigQuery table schema may take up to 2 minutes as of May 2015 # As a workaround to this issue, each test should use a unique table name. @@ -1277,7 +1277,7 @@ def test_upload_data(self): self.assertEqual(result['num_rows'][0], test_size) -@pytest.mark.single +@pytest.mark.xfail(run=False, reason="intermittent failures") class TestToGBQIntegrationWithServiceAccountKeyContents(tm.TestCase): # Changes to BigQuery table schema may take up to 2 minutes as of May 2015 # As a workaround to this issue, each test should use a unique table name. From 25fb173dcaff5401f2b496e17beba28d14d54c66 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 14 Feb 2017 20:15:20 -0500 Subject: [PATCH 056/933] TST: fix incorrect url in compressed url network tests in parser --- pandas/tests/io/parser/test_network.py | 53 ++++++++++---------------- 1 file changed, 21 insertions(+), 32 deletions(-) diff --git a/pandas/tests/io/parser/test_network.py b/pandas/tests/io/parser/test_network.py index 4d75b59b09560..6e762368f82c5 100644 --- a/pandas/tests/io/parser/test_network.py +++ b/pandas/tests/io/parser/test_network.py @@ -7,7 +7,6 @@ import os import pytest -import functools from itertools import product import pandas.util.testing as tm @@ -15,42 +14,32 @@ from pandas.io.parsers import read_csv, read_table -class TestCompressedUrl(object): +@pytest.fixture(scope='module') +def salaries_table(): + path = os.path.join(tm.get_data_path(), 'salaries.csv') + return read_table(path) - compression_to_extension = { - 'gzip': '.gz', - 'bz2': '.bz2', - 'zip': '.zip', - 'xz': '.xz', - } - def setup(self): - path = os.path.join(tm.get_data_path(), 'salaries.csv') - self.local_table = read_table(path) - self.base_url = ('https://github.com/pandas-dev/pandas/raw/master/' - 'pandas/io/tests/parser/data/salaries.csv') +@tm.network +@pytest.mark.parametrize( + "compression,extension", [('gzip', '.gz'), ('bz2', '.bz2'), + ('zip', '.zip'), ('xz', '.xz')]) +def test_compressed_urls(salaries_table, compression, extension): + # test reading compressed urls with various engines and + # extension inference + base_url = ('https://github.com/pandas-dev/pandas/raw/master/' + 'pandas/tests/io/parser/data/salaries.csv') + + url = base_url + extension + + # args is a (compression, engine) tuple + for (c, engine) in product([compression, 'infer'], ['python', 'c']): - @tm.network - def test_compressed_urls(self): - # Test reading compressed tables from URL. - msg = ('Test reading {}-compressed tables from URL: ' - 'compression="{}", engine="{}"') - - for compression, extension in self.compression_to_extension.items(): - url = self.base_url + extension - # args is a (compression, engine) tuple - for args in product([compression, 'infer'], ['python', 'c']): - # test_fxn is a workaround for more descriptive nose reporting. - # See http://stackoverflow.com/a/37393684/4651668. - test_fxn = functools.partial(self.check_table) - test_fxn.description = msg.format(compression, *args) - yield (test_fxn, url) + args - - def check_table(self, url, compression, engine): if url.endswith('.xz'): tm._skip_if_no_lzma() - url_table = read_table(url, compression=compression, engine=engine) - tm.assert_frame_equal(url_table, self.local_table) + + url_table = read_table(url, compression=c, engine=engine) + tm.assert_frame_equal(url_table, salaries_table) class TestS3(tm.TestCase): From 03bb9003b3b3db92f3c20a60e88fd2001d6b3948 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 14 Feb 2017 20:44:44 -0500 Subject: [PATCH 057/933] TST: incorrect skip in when --skip-network is run closes #15407 --- pandas/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/conftest.py b/pandas/conftest.py index b3683de3a173b..623feb99e9cdc 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -17,5 +17,5 @@ def pytest_runtest_setup(item): if 'slow' not in item.keywords and item.config.getoption("--only-slow"): pytest.skip("skipping due to --only-slow") - if 'skip' in item.keywords and item.config.getoption("--skip-network"): + if 'network' in item.keywords and item.config.getoption("--skip-network"): pytest.skip("skipping due to --skip-network") From bbb583c30bcee83ed3a2e9a3acfc83535f270632 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 14 Feb 2017 22:25:23 -0500 Subject: [PATCH 058/933] TST: fix test_nework.py fixture under py27 --- pandas/tests/io/parser/test_network.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pandas/tests/io/parser/test_network.py b/pandas/tests/io/parser/test_network.py index 6e762368f82c5..721d447262149 100644 --- a/pandas/tests/io/parser/test_network.py +++ b/pandas/tests/io/parser/test_network.py @@ -20,11 +20,15 @@ def salaries_table(): return read_table(path) -@tm.network @pytest.mark.parametrize( "compression,extension", [('gzip', '.gz'), ('bz2', '.bz2'), ('zip', '.zip'), ('xz', '.xz')]) def test_compressed_urls(salaries_table, compression, extension): + check_compressed_urls(salaries_table, compression, extension) + + +@tm.network +def check_compressed_urls(salaries_table, compression, extension): # test reading compressed urls with various engines and # extension inference base_url = ('https://github.com/pandas-dev/pandas/raw/master/' From 2372d275b4b2565b4c406d3dfc7c4b4993f1e625 Mon Sep 17 00:00:00 2001 From: Francesc Alted Date: Wed, 15 Feb 2017 10:20:27 -0500 Subject: [PATCH 059/933] BLD: Numexpr 2.4.6 required closes #15213 Author: Francesc Alted Closes #15383 from FrancescAlted/numexpr-2.4.6 and squashes the following commits: c417fe2 [Francesc Alted] Simplify and remove UserWarning testing on numexpr import e1b34a9 [Francesc Alted] Force a reload of pd.computation for actually triggering the UserWarning c081199 [Francesc Alted] Relax the exact message for the ImportError 73f0319 [Francesc Alted] numexpr requisite raised to 2.4.6 0d4ab9a [Francesc Alted] Restored the old numexpr version dependencies to adjust for old requirements c1aae19 [Francesc Alted] Fixed a lint error 7575ba2 [Francesc Alted] Using constants instead of literals for numexpr version 7a275ce [Francesc Alted] Fixed a typo 93f54aa [Francesc Alted] numexpr section moved to Other API changes section 3b6e58b [Francesc Alted] Removed recomendation for numexpr 2.6.2 f225598 [Francesc Alted] Updated test_compat for numexpr 2.4.6 8bd4ed1 [Francesc Alted] numexpr 2.4.6 requirement moved to other enhancements section e45b742 [Francesc Alted] Moved pinned versions in CI folder to 2.4.6 6e12e29 [Francesc Alted] Added a notice on the recommended numexpr version ac62653 [Francesc Alted] Require numexpr 2.4.6 ab79c54 [Francesc Alted] Require numexpr 2.6.2 --- ci/requirements-3.4_SLOW.run | 2 +- doc/source/install.rst | 2 +- doc/source/whatsnew/v0.20.0.txt | 4 +++- pandas/computation/__init__.py | 17 +++++------------ pandas/tests/computation/test_compat.py | 15 ++++----------- 5 files changed, 14 insertions(+), 26 deletions(-) diff --git a/ci/requirements-3.4_SLOW.run b/ci/requirements-3.4_SLOW.run index 39018439a1223..90156f62c6e71 100644 --- a/ci/requirements-3.4_SLOW.run +++ b/ci/requirements-3.4_SLOW.run @@ -9,7 +9,7 @@ html5lib patsy beautiful-soup scipy -numexpr=2.4.4 +numexpr=2.4.6 pytables matplotlib lxml diff --git a/doc/source/install.rst b/doc/source/install.rst index 1c7cbc9326614..80a5d7e7d375b 100644 --- a/doc/source/install.rst +++ b/doc/source/install.rst @@ -226,7 +226,7 @@ Recommended Dependencies * `numexpr `__: for accelerating certain numerical operations. ``numexpr`` uses multiple cores as well as smart chunking and caching to achieve large speedups. - If installed, must be Version 2.1 or higher (excluding a buggy 2.4.4). Version 2.4.6 or higher is highly recommended. + If installed, must be Version 2.4.6 or higher. * `bottleneck `__: for accelerating certain types of ``nan`` evaluations. ``bottleneck`` uses specialized cython routines to achieve large speedups. diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index d76e33caffbf1..26006083d81b4 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -1,6 +1,6 @@ .. _whatsnew_0200: -v0.20.0 (????, 2016) +v0.20.0 (????, 2017) -------------------- This is a major release from 0.19 and includes a small number of API changes, several new features, @@ -158,6 +158,7 @@ Other enhancements .. _whatsnew_0200.api_breaking: + Backwards incompatible API changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -429,6 +430,7 @@ Other API Changes - ``DataFrame.asof()`` will return a null filled ``Series`` instead the scalar ``NaN`` if a match is not found (:issue:`15118`) - The :func:`pd.read_gbq` method now stores ``INTEGER`` columns as ``dtype=object`` if they contain ``NULL`` values. Otherwise they are stored as ``int64``. This prevents precision lost for integers greather than 2**53. Furthermore ``FLOAT`` columns with values above 10**4 are no more casted to ``int64`` which also caused precision lost (:issue: `14064`, :issue:`14305`). - Reorganization of timeseries development tests (:issue:`14854`) +- ``numexpr`` version is now required to be >= 2.4.6 and it will not be used at all if this requisite is not fulfilled (:issue:`15213`). .. _whatsnew_0200.deprecations: diff --git a/pandas/computation/__init__.py b/pandas/computation/__init__.py index 9e94215eecf62..e13faf890d1f8 100644 --- a/pandas/computation/__init__.py +++ b/pandas/computation/__init__.py @@ -3,26 +3,19 @@ from distutils.version import LooseVersion _NUMEXPR_INSTALLED = False +_MIN_NUMEXPR_VERSION = "2.4.6" try: import numexpr as ne ver = ne.__version__ - _NUMEXPR_INSTALLED = ver >= LooseVersion('2.1') + _NUMEXPR_INSTALLED = ver >= LooseVersion(_MIN_NUMEXPR_VERSION) - # we specifically disallow 2.4.4 as - # has some hard-to-diagnose bugs - if ver == LooseVersion('2.4.4'): - _NUMEXPR_INSTALLED = False - warnings.warn( - "The installed version of numexpr {ver} is not supported " - "in pandas and will be not be used\n".format(ver=ver), - UserWarning) - - elif not _NUMEXPR_INSTALLED: + if not _NUMEXPR_INSTALLED: warnings.warn( "The installed version of numexpr {ver} is not supported " "in pandas and will be not be used\nThe minimum supported " - "version is 2.1\n".format(ver=ver), UserWarning) + "version is {min_ver}\n".format( + ver=ver, min_ver=_MIN_NUMEXPR_VERSION), UserWarning) except ImportError: # pragma: no cover pass diff --git a/pandas/tests/computation/test_compat.py b/pandas/tests/computation/test_compat.py index 599d0c10336dc..77994ac6d2f53 100644 --- a/pandas/tests/computation/test_compat.py +++ b/pandas/tests/computation/test_compat.py @@ -10,6 +10,7 @@ from pandas.computation.engines import _engines import pandas.computation.expr as expr +from pandas.computation import _MIN_NUMEXPR_VERSION ENGINES_PARSERS = list(product(_engines, expr._parsers)) @@ -21,15 +22,10 @@ def test_compat(): try: import numexpr as ne ver = ne.__version__ - if ver == LooseVersion('2.4.4'): + if ver < LooseVersion(_MIN_NUMEXPR_VERSION): assert not _NUMEXPR_INSTALLED - elif ver < LooseVersion('2.1'): - with tm.assert_produces_warning(UserWarning, - check_stacklevel=False): - assert not _NUMEXPR_INSTALLED else: assert _NUMEXPR_INSTALLED - except ImportError: pytest.skip("not testing numexpr version compat") @@ -51,12 +47,9 @@ def testit(): except ImportError: pytest.skip("no numexpr") else: - if ne.__version__ < LooseVersion('2.1'): - with tm.assertRaisesRegexp(ImportError, "'numexpr' version is " - ".+, must be >= 2.1"): + if ne.__version__ < LooseVersion(_MIN_NUMEXPR_VERSION): + with tm.assertRaises(ImportError): testit() - elif ne.__version__ == LooseVersion('2.4.4'): - pytest.skip("numexpr version==2.4.4") else: testit() else: From b261dfe38f114b57e358ad09051501684d88587f Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 15 Feb 2017 10:23:36 -0500 Subject: [PATCH 060/933] TST: print skipped tests files xref #15341 Author: Jeff Reback Closes #15408 from jreback/skip and squashes the following commits: 547bee6 [Jeff Reback] TST: print skipped tests files --- .travis.yml | 3 ++- ci/install_travis.sh | 1 + ci/print_skipped.py | 7 ++++--- ci/script_multi.sh | 8 ++++---- ci/script_single.sh | 8 ++++---- 5 files changed, 15 insertions(+), 12 deletions(-) diff --git a/.travis.yml b/.travis.yml index 6b90e49b336b2..6245213cec06f 100644 --- a/.travis.yml +++ b/.travis.yml @@ -332,5 +332,6 @@ after_script: - echo "after_script start" - ci/install_test.sh - source activate pandas && python -c "import pandas; pandas.show_versions();" - - ci/print_skipped.py /tmp/pytest.xml + - ci/print_skipped.py /tmp/single.xml + - ci/print_skipped.py /tmp/multiple.xml - echo "after_script done" diff --git a/ci/install_travis.sh b/ci/install_travis.sh index ad804b96a0d82..802d8c9f6b776 100755 --- a/ci/install_travis.sh +++ b/ci/install_travis.sh @@ -112,6 +112,7 @@ fi source activate pandas pip install pytest-xdist + if [ "$LINT" ]; then conda install flake8 pip install cpplint diff --git a/ci/print_skipped.py b/ci/print_skipped.py index 9fb05df64bcea..dd2180f6eeb19 100755 --- a/ci/print_skipped.py +++ b/ci/print_skipped.py @@ -30,20 +30,21 @@ def parse_results(filename): i += 1 assert i - 1 == len(skipped) assert i - 1 == len(skipped) - assert len(skipped) == int(root.attrib['skip']) + # assert len(skipped) == int(root.attrib['skip']) return '\n'.join(skipped) def main(args): print('SKIPPED TESTS:') - print(parse_results(args.filename)) + for fn in args.filename: + print(parse_results(fn)) return 0 def parse_args(): import argparse parser = argparse.ArgumentParser() - parser.add_argument('filename', help='XUnit file to parse') + parser.add_argument('filename', nargs='+', help='XUnit file to parse') return parser.parse_args() diff --git a/ci/script_multi.sh b/ci/script_multi.sh index 83f8427cc57ad..f5fbcbbc12f83 100755 --- a/ci/script_multi.sh +++ b/ci/script_multi.sh @@ -20,11 +20,11 @@ fi if [ "$BUILD_TEST" ]; then echo "We are not running pytest as this is simply a build test." elif [ "$COVERAGE" ]; then - echo pytest -s -n 2 -m "not single" --cov=pandas --cov-append --cov-report xml:/tmp/pytest.xml $TEST_ARGS pandas - pytest -s -n 2 -m "not single" --cov=pandas --cov-append --cov-report xml:/tmp/pytest.xml $TEST_ARGS pandas + echo pytest -s -n 2 -m "not single" --cov=pandas --cov-append --cov-report xml:/tmp/cov.xml --junitxml=/tmp/multiple.xml $TEST_ARGS pandas + pytest -s -n 2 -m "not single" --cov=pandas --cov-append --cov-report xml:/tmp/cov.xml --junitxml=/tmp/multiple.xml $TEST_ARGS pandas else - echo pytest -n 2 -m "not single" $TEST_ARGS pandas - pytest -n 2 -m "not single" $TEST_ARGS pandas # TODO: doctest + echo pytest -n 2 -m "not single" --junitxml=/tmp/multiple.xml $TEST_ARGS pandas + pytest -n 2 -m "not single" --junitxml=/tmp/multiple.xml $TEST_ARGS pandas # TODO: doctest fi RET="$?" diff --git a/ci/script_single.sh b/ci/script_single.sh index 38021fcac5721..2d7962352842b 100755 --- a/ci/script_single.sh +++ b/ci/script_single.sh @@ -20,11 +20,11 @@ fi if [ "$BUILD_TEST" ]; then echo "We are not running pytest as this is simply a build test." elif [ "$COVERAGE" ]; then - echo pytest -s -m "single" --cov=pandas --cov-report xml:/tmp/pytest.xml $TEST_ARGS pandas - pytest -s -m "single" --cov=pandas --cov-report xml:/tmp/pytest.xml $TEST_ARGS pandas + echo pytest -s -m "single" --cov=pandas --cov-report xml:/tmp/cov.xml --junitxml=/tmp/single.xml $TEST_ARGS pandas + pytest -s -m "single" --cov=pandas --cov-report xml:/tmp/cov.xml --junitxml=/tmp/single.xml $TEST_ARGS pandas else - echo pytest -m "single" $TEST_ARGS pandas - pytest -m "single" $TEST_ARGS pandas # TODO: doctest + echo pytest -m "single" --junitxml=/tmp/single.xml $TEST_ARGS pandas + pytest -m "single" --junitxml=/tmp/single.xml $TEST_ARGS pandas # TODO: doctest fi RET="$?" From e351ed0fd211a204f960b9116bc13f75ed1f97c4 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 15 Feb 2017 10:24:45 -0500 Subject: [PATCH 061/933] PERF: high memory in MI closes #13904 Creates an efficient MultiIndexHashTable in cython. This allows us to efficiently store a multi-index for fast indexing (.get_loc() and .get_indexer()), with the current tuple-based (and gil holding) use of the PyObject Hash Table. This uses the pandas.tools.hashing routines to hash each of the 'values' of a MI to a single uint64. So this makes MI more memory friendly and much more efficient. You get these speedups, because the creation of the hashtable is now much more efficient. Author: Jeff Reback Closes #15245 from jreback/mi and squashes the following commits: 7df6c34 [Jeff Reback] PERF: high memory in MI --- asv_bench/benchmarks/indexing.py | 30 +++- asv_bench/benchmarks/reindex.py | 4 +- doc/source/whatsnew/v0.20.0.txt | 4 +- pandas/core/algorithms.py | 3 +- pandas/core/frame.py | 3 +- pandas/hashtable.pxd | 8 + pandas/index.pyx | 39 ++++- pandas/indexes/base.py | 5 +- pandas/indexes/multi.py | 203 ++++++++++++++++++---- pandas/io/pytables.py | 4 +- pandas/src/algos_common_helper.pxi.in | 4 +- pandas/src/hashtable_class_helper.pxi.in | 152 +++++++++++++--- pandas/tests/frame/test_mutate_columns.py | 29 +++- pandas/tests/frame/test_repr_info.py | 32 ++++ pandas/tests/groupby/test_groupby.py | 2 +- pandas/tests/indexes/test_multi.py | 136 +++++++++++++-- pandas/tests/indexing/test_multiindex.py | 3 +- pandas/tests/test_multilevel.py | 4 +- pandas/tests/tools/test_hashing.py | 12 ++ pandas/tests/tools/test_join.py | 6 +- pandas/tools/hashing.py | 44 +++-- pandas/types/cast.py | 3 +- 22 files changed, 605 insertions(+), 125 deletions(-) diff --git a/asv_bench/benchmarks/indexing.py b/asv_bench/benchmarks/indexing.py index 27cd320c661e0..d938cc6a6dc4d 100644 --- a/asv_bench/benchmarks/indexing.py +++ b/asv_bench/benchmarks/indexing.py @@ -88,7 +88,7 @@ def setup(self): def time_getitem_scalar(self): self.ts[self.dt] - + class DataFrameIndexing(object): goal_time = 0.2 @@ -189,6 +189,15 @@ def setup(self): self.eps_C = 5 self.eps_D = 5000 self.mdt2 = self.mdt.set_index(['A', 'B', 'C', 'D']).sortlevel() + self.miint = MultiIndex.from_product( + [np.arange(1000), + np.arange(1000)], names=['one', 'two']) + + import string + self.mistring = MultiIndex.from_product( + [np.arange(1000), + np.arange(20), list(string.ascii_letters)], + names=['one', 'two', 'three']) def time_series_xs_mi_ix(self): self.s.ix[999] @@ -197,7 +206,24 @@ def time_frame_xs_mi_ix(self): self.df.ix[999] def time_multiindex_slicers(self): - self.mdt2.loc[self.idx[(self.test_A - self.eps_A):(self.test_A + self.eps_A), (self.test_B - self.eps_B):(self.test_B + self.eps_B), (self.test_C - self.eps_C):(self.test_C + self.eps_C), (self.test_D - self.eps_D):(self.test_D + self.eps_D)], :] + self.mdt2.loc[self.idx[ + (self.test_A - self.eps_A):(self.test_A + self.eps_A), + (self.test_B - self.eps_B):(self.test_B + self.eps_B), + (self.test_C - self.eps_C):(self.test_C + self.eps_C), + (self.test_D - self.eps_D):(self.test_D + self.eps_D)], :] + + def time_multiindex_get_indexer(self): + self.miint.get_indexer( + np.array([(0, 10), (0, 11), (0, 12), + (0, 13), (0, 14), (0, 15), + (0, 16), (0, 17), (0, 18), + (0, 19)], dtype=object)) + + def time_multiindex_string_get_loc(self): + self.mistring.get_loc((999, 19, 'Z')) + + def time_is_monotonic(self): + self.miint.is_monotonic class PanelIndexing(object): diff --git a/asv_bench/benchmarks/reindex.py b/asv_bench/benchmarks/reindex.py index 8db0cd7629332..6fe6c32a96df9 100644 --- a/asv_bench/benchmarks/reindex.py +++ b/asv_bench/benchmarks/reindex.py @@ -16,8 +16,8 @@ def setup(self): data=np.random.rand(10000, 30), columns=range(30)) # multi-index - N = 1000 - K = 20 + N = 5000 + K = 200 level1 = tm.makeStringIndex(N).values.repeat(K) level2 = np.tile(tm.makeStringIndex(K).values, N) index = MultiIndex.from_arrays([level1, level2]) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 26006083d81b4..4708abe4d592e 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -472,7 +472,7 @@ Performance Improvements - Improved performance of timeseries plotting with an irregular DatetimeIndex (or with ``compat_x=True``) (:issue:`15073`). - Improved performance of ``groupby().cummin()`` and ``groupby().cummax()`` (:issue:`15048`, :issue:`15109`) - +- Improved performance and reduced memory when indexing with a ``MultiIndex`` (:issue:`15245`) - When reading buffer object in ``read_sas()`` method without specified format, filepath string is inferred rather than buffer object. @@ -502,6 +502,8 @@ Bug Fixes - Bug in ``DataFrame.loc`` with indexing a ``MultiIndex`` with a ``Series`` indexer (:issue:`14730`) + +- Bug in the display of ``.info()`` where a qualifier (+) would always be displayed with a ``MultiIndex`` that contains only non-strings (:issue:`15245`) - Bug in ``pd.read_msgpack()`` in which ``Series`` categoricals were being improperly processed (:issue:`14901`) - Bug in ``Series.ffill()`` with mixed dtypes containing tz-aware datetimes. (:issue:`14956`) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 05cfb1bd9ec27..c922ac21e12eb 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -1250,7 +1250,7 @@ def take_nd(arr, indexer, axis=0, out=None, fill_value=np.nan, mask_info=None, indexer = np.arange(arr.shape[axis], dtype=np.int64) dtype, fill_value = arr.dtype, arr.dtype.type() else: - indexer = _ensure_int64(indexer) + indexer = _ensure_int64(indexer, copy=False) if not allow_fill: dtype, fill_value = arr.dtype, arr.dtype.type() mask_info = None, False @@ -1303,7 +1303,6 @@ def take_nd(arr, indexer, axis=0, out=None, fill_value=np.nan, mask_info=None, func = _get_take_nd_function(arr.ndim, arr.dtype, out.dtype, axis=axis, mask_info=mask_info) - indexer = _ensure_int64(indexer) func(arr, indexer, out, fill_value) if flip_order: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 16f8d4658dc20..9c66f6dbb273e 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1752,7 +1752,8 @@ def _sizeof_fmt(num, size_qualifier): # all cases (e.g., it misses categorical data even with object # categories) deep = False - if 'object' in counts or is_object_dtype(self.index): + if ('object' in counts or + self.index._is_memory_usage_qualified()): size_qualifier = '+' mem_usage = self.memory_usage(index=True, deep=deep).sum() lines.append("memory usage: %s\n" % diff --git a/pandas/hashtable.pxd b/pandas/hashtable.pxd index cabfa43a76f26..9b352ae1c003b 100644 --- a/pandas/hashtable.pxd +++ b/pandas/hashtable.pxd @@ -31,6 +31,14 @@ cdef class PyObjectHashTable(HashTable): cpdef get_item(self, object val) cpdef set_item(self, object key, Py_ssize_t val) +cdef class MultiIndexHashTable(HashTable): + cdef: + kh_uint64_t *table + object mi + + cpdef get_item(self, object val) + cpdef set_item(self, object key, Py_ssize_t val) + cdef class StringHashTable(HashTable): cdef kh_str_t *table diff --git a/pandas/index.pyx b/pandas/index.pyx index 0c975d1775a03..37fe7d90bebe0 100644 --- a/pandas/index.pyx +++ b/pandas/index.pyx @@ -182,7 +182,7 @@ cdef class IndexEngine: Py_ssize_t i, n int last_true - values = self._get_index_values() + values = np.array(self._get_index_values(), copy=False) n = len(values) result = np.empty(n, dtype=bool) @@ -284,7 +284,6 @@ cdef class IndexEngine: if not self.is_mapping_populated: values = self._get_index_values() - self.mapping = self._make_hash_table(len(values)) self.mapping.map_locations(values) @@ -322,7 +321,7 @@ cdef class IndexEngine: Py_ssize_t i, j, n, n_t, n_alloc self._ensure_mapping_populated() - values = self._get_index_values() + values = np.array(self._get_index_values(), copy=False) stargets = set(targets) n = len(values) n_t = len(targets) @@ -554,5 +553,39 @@ cdef inline bint _is_utc(object tz): return tz is UTC or isinstance(tz, _du_utc) +cdef class MultiIndexEngine(IndexEngine): + + def _call_monotonic(self, object mi): + # defer these back to the mi iteself + return (mi.is_monotonic_increasing, + mi.is_monotonic_decreasing, + mi.is_unique) + + def get_backfill_indexer(self, other, limit=None): + # we coerce to ndarray-of-tuples + values = np.array(self._get_index_values()) + return algos.backfill_object(values, other, limit=limit) + + def get_pad_indexer(self, other, limit=None): + # we coerce to ndarray-of-tuples + values = np.array(self._get_index_values()) + return algos.pad_object(values, other, limit=limit) + + cpdef get_loc(self, object val): + if is_definitely_invalid_key(val): + raise TypeError("'{val}' is an invalid key".format(val=val)) + + self._ensure_mapping_populated() + if not self.unique: + return self._get_loc_duplicates(val) + + try: + return self.mapping.get_item(val) + except TypeError: + raise KeyError(val) + + cdef _make_hash_table(self, n): + return _hash.MultiIndexHashTable(n) + # Generated from template. include "index_class_helper.pxi" diff --git a/pandas/indexes/base.py b/pandas/indexes/base.py index bb2941a121452..c483fb0764a4c 100644 --- a/pandas/indexes/base.py +++ b/pandas/indexes/base.py @@ -1431,6 +1431,10 @@ def inferred_type(self): """ return a string of the type inferred from the values """ return lib.infer_dtype(self) + def _is_memory_usage_qualified(self): + """ return a boolean if we need a qualified .info display """ + return self.is_object() + def is_type_compatible(self, kind): return kind == self.inferred_type @@ -2446,7 +2450,6 @@ def _get_fill_indexer_searchsorted(self, target, method, limit=None): 'if index and target are monotonic' % method) side = 'left' if method == 'pad' else 'right' - target = np.asarray(target) # find exact matches first (this simplifies the algorithm) indexer = self.get_indexer(target) diff --git a/pandas/indexes/multi.py b/pandas/indexes/multi.py index 653ba1fee5691..57739548a17d6 100644 --- a/pandas/indexes/multi.py +++ b/pandas/indexes/multi.py @@ -14,7 +14,6 @@ from pandas.compat.numpy import function as nv from pandas import compat - from pandas.types.common import (_ensure_int64, _ensure_platform_int, is_object_dtype, @@ -73,6 +72,7 @@ class MultiIndex(Index): _levels = FrozenList() _labels = FrozenList() _comparables = ['names'] + _engine_type = _index.MultiIndexEngine rename = Index.set_names def __new__(cls, levels=None, labels=None, sortorder=None, names=None, @@ -114,7 +114,6 @@ def __new__(cls, levels=None, labels=None, sortorder=None, names=None, result._verify_integrity() if _set_identity: result._reset_identity() - return result def _verify_integrity(self, labels=None, levels=None): @@ -429,6 +428,12 @@ def _shallow_copy(self, values=None, **kwargs): def dtype(self): return np.dtype('O') + def _is_memory_usage_qualified(self): + """ return a boolean if we need a qualified .info display """ + def f(l): + return 'mixed' in l or 'string' in l or 'unicode' in l + return any([f(l) for l in self._inferred_type_levels]) + @Appender(Index.memory_usage.__doc__) def memory_usage(self, deep=False): # we are overwriting our base class to avoid @@ -619,6 +624,10 @@ def _get_level_number(self, level): _tuples = None + @cache_readonly + def _engine(self): + return self._engine_type(lambda: self, len(self)) + @property def values(self): if self._tuples is not None: @@ -655,10 +664,95 @@ def _has_complex_internals(self): # to disable groupby tricks return True + @cache_readonly + def is_monotonic(self): + """ + return if the index is monotonic increasing (only equal or + increasing) values. + """ + return self.is_monotonic_increasing + + @cache_readonly + def is_monotonic_increasing(self): + """ + return if the index is monotonic increasing (only equal or + increasing) values. + """ + + # reversed() because lexsort() wants the most significant key last. + values = [self._get_level_values(i) + for i in reversed(range(len(self.levels)))] + try: + sort_order = np.lexsort(values) + return Index(sort_order).is_monotonic + except TypeError: + + # we have mixed types and np.lexsort is not happy + return Index(self.values).is_monotonic + + @property + def is_monotonic_decreasing(self): + """ + return if the index is monotonic decreasing (only equal or + decreasing) values. + """ + return False + @cache_readonly def is_unique(self): return not self.duplicated().any() + @cache_readonly + def _have_mixed_levels(self): + """ return a boolean list indicated if we have mixed levels """ + return ['mixed' in l for l in self._inferred_type_levels] + + @cache_readonly + def _inferred_type_levels(self): + """ return a list of the inferred types, one for each level """ + return [i.inferred_type for i in self.levels] + + @cache_readonly + def _hashed_values(self): + """ return a uint64 ndarray of my hashed values """ + from pandas.tools.hashing import hash_tuples + return hash_tuples(self) + + def _hashed_indexing_key(self, key): + """ + validate and return the hash for the provided key + + *this is internal for use for the cython routines* + + Paramters + --------- + key : string or tuple + + Returns + ------- + np.uint64 + + Notes + ----- + we need to stringify if we have mixed levels + + """ + from pandas.tools.hashing import hash_tuples + + if not isinstance(key, tuple): + return hash_tuples(key) + + if not len(key) == self.nlevels: + raise KeyError + + def f(k, stringify): + if stringify and not isinstance(k, compat.string_types): + k = str(k) + return k + key = tuple([f(k, stringify) + for k, stringify in zip(key, self._have_mixed_levels)]) + return hash_tuples(key) + @deprecate_kwarg('take_last', 'keep', mapping={True: 'last', False: 'first'}) @Appender(base._shared_docs['duplicated'] % ibase._index_doc_kwargs) @@ -748,26 +842,44 @@ def _try_mi(k): raise InvalidIndexError(key) - def get_level_values(self, level): + def _get_level_values(self, level): """ - Return vector of label values for requested level, equal to the length - of the index + Return vector of label values for requested level, + equal to the length of the index + + **this is an internal method** Parameters ---------- - level : int or level name + level : int level Returns ------- values : ndarray """ - num = self._get_level_number(level) - unique = self.levels[num] # .values - labels = self.labels[num] - filled = algos.take_1d(unique.values, labels, + + unique = self.levels[level] + labels = self.labels[level] + filled = algos.take_1d(unique._values, labels, fill_value=unique._na_value) - values = unique._shallow_copy(filled) - return values + return filled + + def get_level_values(self, level): + """ + Return vector of label values for requested level, + equal to the length of the index + + Parameters + ---------- + level : int or level name + + Returns + ------- + values : Index + """ + level = self._get_level_number(level) + values = self._get_level_values(level) + return self.levels[level]._shallow_copy(values) def format(self, space=2, sparsify=None, adjoin=True, names=False, na_rep=None, formatter=None): @@ -852,7 +964,8 @@ def to_frame(self, index=True): from pandas import DataFrame result = DataFrame({(name or level): self.get_level_values(level) for name, level in - zip(self.names, range(len(self.levels)))}) + zip(self.names, range(len(self.levels)))}, + copy=False) if index: result.index = self return result @@ -1482,29 +1595,41 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None): method = missing.clean_reindex_fill_method(method) target = _ensure_index(target) - target_index = target - if isinstance(target, MultiIndex): - target_index = target._tuple_index + # empty indexer + if is_list_like(target) and not len(target): + return _ensure_platform_int(np.array([])) + + if not isinstance(target, MultiIndex): + try: + target = MultiIndex.from_tuples(target) + except (TypeError, ValueError): - if not is_object_dtype(target_index.dtype): - return np.ones(len(target_index)) * -1 + # let's instead try with a straight Index + if method is None: + return Index(self.values).get_indexer(target, + method=method, + limit=limit, + tolerance=tolerance) if not self.is_unique: raise Exception('Reindexing only valid with uniquely valued Index ' 'objects') - self_index = self._tuple_index - if method == 'pad' or method == 'backfill': if tolerance is not None: raise NotImplementedError("tolerance not implemented yet " 'for MultiIndex') - indexer = self_index._get_fill_indexer(target, method, limit) + indexer = self._get_fill_indexer(target, method, limit) elif method == 'nearest': raise NotImplementedError("method='nearest' not implemented yet " 'for MultiIndex; see GitHub issue 9365') else: - indexer = self_index._engine.get_indexer(target._values) + # we may not compare equally because of hashing if we + # don't have the same dtypes + if self._inferred_type_levels != target._inferred_type_levels: + return Index(self.values).get_indexer(target.values) + + indexer = self._engine.get_indexer(target) return _ensure_platform_int(indexer) @@ -1571,17 +1696,6 @@ def reindex(self, target, method=None, level=None, limit=None, return target, indexer - @cache_readonly - def _tuple_index(self): - """ - Convert MultiIndex to an Index of tuples - - Returns - ------- - index : Index - """ - return Index(self._values) - def get_slice_bound(self, label, side, kind): if not isinstance(label, tuple): @@ -1828,8 +1942,9 @@ def partial_selection(key, indexer=None): key = tuple(self[indexer].tolist()[0]) - return (self._engine.get_loc(_values_from_object(key)), - None) + return (self._engine.get_loc( + _values_from_object(key)), None) + else: return partial_selection(key) else: @@ -2115,10 +2230,24 @@ def equals(self, other): return False for i in range(self.nlevels): + slabels = self.labels[i] + slabels = slabels[slabels != -1] svalues = algos.take_nd(np.asarray(self.levels[i]._values), - self.labels[i], allow_fill=False) + slabels, allow_fill=False) + + olabels = other.labels[i] + olabels = olabels[olabels != -1] ovalues = algos.take_nd(np.asarray(other.levels[i]._values), - other.labels[i], allow_fill=False) + olabels, allow_fill=False) + + # since we use NaT both datetime64 and timedelta64 + # we can have a situation where a level is typed say + # timedelta64 in self (IOW it has other values than NaT) + # but types datetime64 in other (where its all NaT) + # but these are equivalent + if len(svalues) == 0 and len(ovalues) == 0: + continue + if not array_equivalent(svalues, ovalues): return False diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 9224f7d3d9a94..d8de1dcd61977 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -3787,9 +3787,9 @@ def read(self, where=None, columns=None, **kwargs): lp = DataFrame(c.data, index=long_index, columns=c.values) # need a better algorithm - tuple_index = long_index._tuple_index + tuple_index = long_index.values - unique_tuples = lib.fast_unique(tuple_index.values) + unique_tuples = lib.fast_unique(tuple_index) unique_tuples = _asarray_tuplesafe(unique_tuples) indexer = match(unique_tuples, tuple_index) diff --git a/pandas/src/algos_common_helper.pxi.in b/pandas/src/algos_common_helper.pxi.in index 42089f9520ab6..b83dec1d26242 100644 --- a/pandas/src/algos_common_helper.pxi.in +++ b/pandas/src/algos_common_helper.pxi.in @@ -579,12 +579,12 @@ def get_dispatch(dtypes): {{for name, c_type, dtype in get_dispatch(dtypes)}} -cpdef ensure_{{name}}(object arr): +cpdef ensure_{{name}}(object arr, copy=True): if util.is_array(arr): if ( arr).descr.type_num == NPY_{{c_type}}: return arr else: - return arr.astype(np.{{dtype}}) + return arr.astype(np.{{dtype}}, copy=copy) else: return np.array(arr, dtype=np.{{dtype}}) diff --git a/pandas/src/hashtable_class_helper.pxi.in b/pandas/src/hashtable_class_helper.pxi.in index ef385ba7dca1c..3ce82dace40a9 100644 --- a/pandas/src/hashtable_class_helper.pxi.in +++ b/pandas/src/hashtable_class_helper.pxi.in @@ -262,13 +262,6 @@ cdef class {{name}}HashTable(HashTable): else: raise KeyError(val) - def get_iter_test(self, {{dtype}}_t key, Py_ssize_t iterations): - cdef Py_ssize_t i, val=0 - for i in range(iterations): - k = kh_get_{{dtype}}(self.table, val) - if k != self.table.n_buckets: - val = self.table.vals[k] - cpdef set_item(self, {{dtype}}_t key, Py_ssize_t val): cdef: khiter_t k @@ -501,18 +494,6 @@ cdef class StringHashTable(HashTable): else: raise KeyError(val) - def get_iter_test(self, object key, Py_ssize_t iterations): - cdef: - Py_ssize_t i, val - char *v - - v = util.get_c_string(key) - - for i in range(iterations): - k = kh_get_str(self.table, v) - if k != self.table.n_buckets: - val = self.table.vals[k] - cpdef set_item(self, object key, Py_ssize_t val): cdef: khiter_t k @@ -755,15 +736,6 @@ cdef class PyObjectHashTable(HashTable): else: raise KeyError(val) - def get_iter_test(self, object key, Py_ssize_t iterations): - cdef Py_ssize_t i, val - if key != key or key is None: - key = na_sentinel - for i in range(iterations): - k = kh_get_pymap(self.table, key) - if k != self.table.n_buckets: - val = self.table.vals[k] - cpdef set_item(self, object key, Py_ssize_t val): cdef: khiter_t k @@ -874,3 +846,127 @@ cdef class PyObjectHashTable(HashTable): count += 1 return np.asarray(labels) + + +cdef class MultiIndexHashTable(HashTable): + + def __init__(self, size_hint=1): + self.table = kh_init_uint64() + self.mi = None + kh_resize_uint64(self.table, size_hint) + + def __dealloc__(self): + if self.table is not NULL: + kh_destroy_uint64(self.table) + self.table = NULL + + def __len__(self): + return self.table.size + + def sizeof(self, deep=False): + """ return the size of my table in bytes """ + return self.table.n_buckets * (sizeof(uint64_t) + # keys + sizeof(size_t) + # vals + sizeof(uint32_t)) # flags + + def _check_for_collisions(self, int64_t[:] locs, object mi): + # validate that the locs map to the actual values + # provided in the mi + # we can only check if we *don't* have any missing values + # :< + cdef: + ndarray[int64_t] alocs + + alocs = np.asarray(locs) + if (alocs != -1).all(): + + result = self.mi.take(locs) + if isinstance(mi, tuple): + from pandas import Index + mi = Index([mi]) + if not result.equals(mi): + raise AssertionError( + "hash collision\nlocs:\n{}\n" + "result:\n{}\nmi:\n{}".format(alocs, result, mi)) + + def __contains__(self, object key): + try: + self.get_item(key) + return True + except (KeyError, ValueError, TypeError): + return False + + cpdef get_item(self, object key): + cdef: + khiter_t k + uint64_t value + int64_t[:] locs + Py_ssize_t loc + + value = self.mi._hashed_indexing_key(key) + k = kh_get_uint64(self.table, value) + if k != self.table.n_buckets: + loc = self.table.vals[k] + locs = np.array([loc], dtype=np.int64) + self._check_for_collisions(locs, key) + return loc + else: + raise KeyError(key) + + cpdef set_item(self, object key, Py_ssize_t val): + raise NotImplementedError + + @cython.boundscheck(False) + def map_locations(self, object mi): + cdef: + Py_ssize_t i, n + ndarray[uint64_t] values + uint64_t val + int ret = 0 + khiter_t k + + self.mi = mi + n = len(mi) + values = mi._hashed_values + + with nogil: + for i in range(n): + val = values[i] + k = kh_put_uint64(self.table, val, &ret) + self.table.vals[k] = i + + @cython.boundscheck(False) + def lookup(self, object mi): + # look up with a target mi + cdef: + Py_ssize_t i, n + ndarray[uint64_t] values + int ret = 0 + uint64_t val + khiter_t k + int64_t[:] locs + + n = len(mi) + values = mi._hashed_values + + locs = np.empty(n, dtype=np.int64) + + with nogil: + for i in range(n): + val = values[i] + k = kh_get_uint64(self.table, val) + if k != self.table.n_buckets: + locs[i] = self.table.vals[k] + else: + locs[i] = -1 + + self._check_for_collisions(locs, mi) + return np.asarray(locs) + + def unique(self, object mi): + raise NotImplementedError + + def get_labels(self, object mi, ObjectVector uniques, + Py_ssize_t count_prior, int64_t na_sentinel, + bint check_null=True): + raise NotImplementedError diff --git a/pandas/tests/frame/test_mutate_columns.py b/pandas/tests/frame/test_mutate_columns.py index 6b4c56747c981..fe3f3c554a9b5 100644 --- a/pandas/tests/frame/test_mutate_columns.py +++ b/pandas/tests/frame/test_mutate_columns.py @@ -1,11 +1,11 @@ # -*- coding: utf-8 -*- from __future__ import print_function - +import pytest from pandas.compat import range, lrange import numpy as np -from pandas import DataFrame, Series, Index +from pandas import DataFrame, Series, Index, MultiIndex from pandas.util.testing import (assert_series_equal, assert_frame_equal, @@ -165,6 +165,31 @@ def test_delitem(self): del self.frame['A'] self.assertNotIn('A', self.frame) + def test_delitem_multiindex(self): + midx = MultiIndex.from_product([['A', 'B'], [1, 2]]) + df = DataFrame(np.random.randn(4, 4), columns=midx) + assert len(df.columns) == 4 + assert ('A', ) in df.columns + assert 'A' in df.columns + + result = df['A'] + assert isinstance(result, DataFrame) + del df['A'] + + assert len(df.columns) == 2 + + # A still in the levels, BUT get a KeyError if trying + # to delete + assert ('A', ) not in df.columns + with pytest.raises(KeyError): + del df[('A',)] + + # xref: https://github.com/pandas-dev/pandas/issues/2770 + # the 'A' is STILL in the columns! + assert 'A' in df.columns + with pytest.raises(KeyError): + del df['A'] + def test_pop(self): self.frame.columns.name = 'baz' diff --git a/pandas/tests/frame/test_repr_info.py b/pandas/tests/frame/test_repr_info.py index 2df297d03bcdf..024e11e63a924 100644 --- a/pandas/tests/frame/test_repr_info.py +++ b/pandas/tests/frame/test_repr_info.py @@ -301,10 +301,12 @@ def test_info_memory_usage(self): data[i] = np.random.randint(2, size=n).astype(dtype) df = DataFrame(data) buf = StringIO() + # display memory usage case df.info(buf=buf, memory_usage=True) res = buf.getvalue().splitlines() self.assertTrue("memory usage: " in res[-1]) + # do not display memory usage cas df.info(buf=buf, memory_usage=False) res = buf.getvalue().splitlines() @@ -312,11 +314,13 @@ def test_info_memory_usage(self): df.info(buf=buf, memory_usage=True) res = buf.getvalue().splitlines() + # memory usage is a lower bound, so print it as XYZ+ MB self.assertTrue(re.match(r"memory usage: [^+]+\+", res[-1])) df.iloc[:, :5].info(buf=buf, memory_usage=True) res = buf.getvalue().splitlines() + # excluded column with object dtype, so estimate is accurate self.assertFalse(re.match(r"memory usage: [^+]+\+", res[-1])) @@ -380,6 +384,34 @@ def test_info_memory_usage(self): diff = df.memory_usage(deep=True).sum() - sys.getsizeof(df) self.assertTrue(abs(diff) < 100) + def test_info_memory_usage_qualified(self): + + buf = StringIO() + df = DataFrame(1, columns=list('ab'), + index=[1, 2, 3]) + df.info(buf=buf) + self.assertFalse('+' in buf.getvalue()) + + buf = StringIO() + df = DataFrame(1, columns=list('ab'), + index=list('ABC')) + df.info(buf=buf) + self.assertTrue('+' in buf.getvalue()) + + buf = StringIO() + df = DataFrame(1, columns=list('ab'), + index=pd.MultiIndex.from_product( + [range(3), range(3)])) + df.info(buf=buf) + self.assertFalse('+' in buf.getvalue()) + + buf = StringIO() + df = DataFrame(1, columns=list('ab'), + index=pd.MultiIndex.from_product( + [range(3), ['foo', 'bar']])) + df.info(buf=buf) + self.assertTrue('+' in buf.getvalue()) + def test_info_memory_usage_bug_on_multiindex(self): # GH 14308 # memory usage introspection should not materialize .values diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 3a6a9eaaa8e72..d53446870beb1 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -1588,7 +1588,7 @@ def test_groupby_as_index_cython(self): result = grouped.mean() expected = data.groupby(['A', 'B']).mean() - arrays = lzip(*expected.index._tuple_index) + arrays = lzip(*expected.index.values) expected.insert(0, 'A', arrays[0]) expected.insert(1, 'B', arrays[1]) expected.index = np.arange(len(expected)) diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index 702c4758da245..5611492b4af1b 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -1046,6 +1046,21 @@ def test_contains(self): self.assertNotIn(('bar', 'two'), self.index) self.assertNotIn(None, self.index) + def test_contains_top_level(self): + midx = MultiIndex.from_product([['A', 'B'], [1, 2]]) + assert 'A' in midx + assert 'A' not in midx._engine + + def test_contains_with_nat(self): + # MI with a NaT + mi = MultiIndex(levels=[['C'], + pd.date_range('2012-01-01', periods=5)], + labels=[[0, 0, 0, 0, 0, 0], [-1, 0, 1, 2, 3, 4]], + names=[None, 'B']) + self.assertTrue(('C', pd.Timestamp('2012-01-01')) in mi) + for val in mi.values: + self.assertTrue(val in mi) + def test_is_all_dates(self): self.assertFalse(self.index.is_all_dates) @@ -1102,6 +1117,17 @@ def test_get_loc_duplicates(self): xp = 0 assert (rs == xp) + def test_get_value_duplicates(self): + index = MultiIndex(levels=[['D', 'B', 'C'], + [0, 26, 27, 37, 57, 67, 75, 82]], + labels=[[0, 0, 0, 1, 2, 2, 2, 2, 2, 2], + [1, 3, 4, 6, 0, 2, 2, 3, 5, 7]], + names=['tag', 'day']) + + assert index.get_loc('D') == slice(0, 3) + with pytest.raises(KeyError): + index._engine.get_value(np.array([]), 'D') + def test_get_loc_level(self): index = MultiIndex(levels=[Index(lrange(4)), Index(lrange(4)), Index( lrange(4))], labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array( @@ -1294,7 +1320,7 @@ def test_get_indexer(self): assert_almost_equal(r1, rbfill1) # pass non-MultiIndex - r1 = idx1.get_indexer(idx2._tuple_index) + r1 = idx1.get_indexer(idx2.values) rexp1 = idx1.get_indexer(idx2) assert_almost_equal(r1, rexp1) @@ -1316,6 +1342,19 @@ def test_get_indexer_nearest(self): with tm.assertRaises(NotImplementedError): midx.get_indexer(['a'], method='pad', tolerance=2) + def test_hash_collisions(self): + # non-smoke test that we don't get hash collisions + + index = MultiIndex.from_product([np.arange(1000), np.arange(1000)], + names=['one', 'two']) + result = index.get_indexer(index.values) + self.assert_numpy_array_equal(result, + np.arange(len(index), dtype='int64')) + + for i in [0, 1, len(index) - 2, len(index) - 1]: + result = index.get_loc(index[i]) + self.assertEqual(result, i) + def test_format(self): self.index.format() self.index[:0].format() @@ -1420,12 +1459,13 @@ def test_bounds(self): self.index._bounds def test_equals_multi(self): - self.assertTrue(self.index.equals(self.index)) - self.assertTrue(self.index.equal_levels(self.index)) - - self.assertFalse(self.index.equals(self.index[:-1])) + assert self.index.equals(self.index) + assert not self.index.equals(self.index.values) + assert self.index.equals(Index(self.index.values)) - self.assertTrue(self.index.equals(self.index._tuple_index)) + assert self.index.equal_levels(self.index) + assert not self.index.equals(self.index[:-1]) + assert not self.index.equals(self.index[-1]) # different number of levels index = MultiIndex(levels=[Index(lrange(4)), Index(lrange(4)), Index( @@ -1433,8 +1473,8 @@ def test_equals_multi(self): [0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])]) index2 = MultiIndex(levels=index.levels[:-1], labels=index.labels[:-1]) - self.assertFalse(index.equals(index2)) - self.assertFalse(index.equal_levels(index2)) + assert not index.equals(index2) + assert not index.equal_levels(index2) # levels are different major_axis = Index(lrange(4)) @@ -1445,8 +1485,8 @@ def test_equals_multi(self): index = MultiIndex(levels=[major_axis, minor_axis], labels=[major_labels, minor_labels]) - self.assertFalse(self.index.equals(index)) - self.assertFalse(self.index.equal_levels(index)) + assert not self.index.equals(index) + assert not self.index.equal_levels(index) # some of the labels are different major_axis = Index(['foo', 'bar', 'baz', 'qux']) @@ -1457,7 +1497,16 @@ def test_equals_multi(self): index = MultiIndex(levels=[major_axis, minor_axis], labels=[major_labels, minor_labels]) - self.assertFalse(self.index.equals(index)) + assert not self.index.equals(index) + + def test_equals_missing_values(self): + # make sure take is not using -1 + i = pd.MultiIndex.from_tuples([(0, pd.NaT), + (0, pd.Timestamp('20130101'))]) + result = i[0:1].equals(i[0]) + self.assertFalse(result) + result = i[1:2].equals(i[1]) + self.assertFalse(result) def test_identical(self): mi = self.index.copy() @@ -1510,7 +1559,7 @@ def test_union(self): the_union = piece1 | piece2 - tups = sorted(self.index._tuple_index) + tups = sorted(self.index.values) expected = MultiIndex.from_tuples(tups) self.assertTrue(the_union.equals(expected)) @@ -1523,7 +1572,7 @@ def test_union(self): self.assertIs(the_union, self.index) # won't work in python 3 - # tuples = self.index._tuple_index + # tuples = self.index.values # result = self.index[:4] | tuples[4:] # self.assertTrue(result.equals(tuples)) @@ -1543,7 +1592,7 @@ def test_intersection(self): piece2 = self.index[3:] the_int = piece1 & piece2 - tups = sorted(self.index[3:5]._tuple_index) + tups = sorted(self.index[3:5].values) expected = MultiIndex.from_tuples(tups) self.assertTrue(the_int.equals(expected)) @@ -1557,7 +1606,7 @@ def test_intersection(self): self.assertTrue(empty.equals(expected)) # can't do in python 3 - # tuples = self.index._tuple_index + # tuples = self.index.values # result = self.index & tuples # self.assertTrue(result.equals(tuples)) @@ -1616,7 +1665,7 @@ def test_difference(self): self.assertEqual(len(result), 0) # raise Exception called with non-MultiIndex - result = first.difference(first._tuple_index) + result = first.difference(first.values) self.assertTrue(result.equals(first[:0])) # name from empty array @@ -1642,7 +1691,7 @@ def test_from_tuples(self): def test_argsort(self): result = self.index.argsort() - expected = self.index._tuple_index.argsort() + expected = self.index.values.argsort() tm.assert_numpy_array_equal(result, expected) def test_sortlevel(self): @@ -2297,11 +2346,60 @@ def test_level_setting_resets_attributes(self): ind = MultiIndex.from_arrays([ ['A', 'A', 'B', 'B', 'B'], [1, 2, 1, 2, 3] ]) - assert ind.is_monotonic + self.assertTrue(ind.is_monotonic) ind.set_levels([['A', 'B', 'A', 'A', 'B'], [2, 1, 3, -2, 5]], inplace=True) + # if this fails, probably didn't reset the cache correctly. - assert not ind.is_monotonic + self.assertFalse(ind.is_monotonic) + + def test_is_monotonic(self): + i = MultiIndex.from_product([np.arange(10), + np.arange(10)], names=['one', 'two']) + self.assertTrue(i.is_monotonic) + self.assertTrue(Index(i.values).is_monotonic) + + i = MultiIndex.from_product([np.arange(10, 0, -1), + np.arange(10)], names=['one', 'two']) + self.assertFalse(i.is_monotonic) + self.assertFalse(Index(i.values).is_monotonic) + + i = MultiIndex.from_product([np.arange(10), + np.arange(10, 0, -1)], + names=['one', 'two']) + self.assertFalse(i.is_monotonic) + self.assertFalse(Index(i.values).is_monotonic) + + i = MultiIndex.from_product([[1.0, np.nan, 2.0], ['a', 'b', 'c']]) + self.assertFalse(i.is_monotonic) + self.assertFalse(Index(i.values).is_monotonic) + + # string ordering + i = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], + ['one', 'two', 'three']], + labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], + [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], + names=['first', 'second']) + self.assertFalse(i.is_monotonic) + self.assertFalse(Index(i.values).is_monotonic) + + i = MultiIndex(levels=[['bar', 'baz', 'foo', 'qux'], + ['mom', 'next', 'zenith']], + labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], + [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], + names=['first', 'second']) + self.assertTrue(i.is_monotonic) + self.assertTrue(Index(i.values).is_monotonic) + + # mixed levels, hits the TypeError + i = MultiIndex( + levels=[[1, 2, 3, 4], ['gb00b03mlx29', 'lu0197800237', + 'nl0000289783', + 'nl0000289965', 'nl0000301109']], + labels=[[0, 1, 1, 2, 2, 2, 3], [4, 2, 0, 0, 1, 3, -1]], + names=['household_id', 'asset_id']) + + self.assertFalse(i.is_monotonic) def test_isin(self): values = [('foo', 2), ('bar', 3), ('quux', 4)] diff --git a/pandas/tests/indexing/test_multiindex.py b/pandas/tests/indexing/test_multiindex.py index 1e6ecbbcdc756..b6b9ac93b234c 100644 --- a/pandas/tests/indexing/test_multiindex.py +++ b/pandas/tests/indexing/test_multiindex.py @@ -413,9 +413,10 @@ def f(): df.loc[idx[:, :, 'Stock'], 'price'] *= 2 tm.assert_frame_equal(df, expected) - def test_getitem_multiindex(self): + def test_getitem_duplicates_multiindex(self): # GH 5725 the 'A' happens to be a valid Timestamp so the doesn't raise # the appropriate error, only in PY3 of course! + index = MultiIndex(levels=[['D', 'B', 'C'], [0, 26, 27, 37, 57, 67, 75, 82]], labels=[[0, 0, 0, 1, 2, 2, 2, 2, 2, 2], diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index 8e0628eefa392..0f36af2c8c4e7 100755 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -1469,7 +1469,7 @@ def test_frame_getitem_not_sorted(self): df = self.frame.T df['foo', 'four'] = 'foo' - arrays = [np.array(x) for x in zip(*df.columns._tuple_index)] + arrays = [np.array(x) for x in zip(*df.columns.values)] result = df['foo'] result2 = df.loc[:, 'foo'] @@ -1493,7 +1493,7 @@ def test_series_getitem_not_sorted(self): index = MultiIndex.from_tuples(tuples) s = Series(randn(8), index=index) - arrays = [np.array(x) for x in zip(*index._tuple_index)] + arrays = [np.array(x) for x in zip(*index.values)] result = s['qux'] result2 = s.loc['qux'] diff --git a/pandas/tests/tools/test_hashing.py b/pandas/tests/tools/test_hashing.py index 05a352f259e8b..9bed0d428bc41 100644 --- a/pandas/tests/tools/test_hashing.py +++ b/pandas/tests/tools/test_hashing.py @@ -152,6 +152,18 @@ def test_categorical_consistency(self): tm.assert_series_equal(h1, h2) tm.assert_series_equal(h1, h3) + def test_categorical_with_nan_consistency(self): + c = pd.Categorical.from_codes( + [-1, 0, 1, 2, 3, 4], + categories=pd.date_range('2012-01-01', periods=5, name='B')) + expected = hash_array(c, categorize=False) + c = pd.Categorical.from_codes( + [-1, 0], + categories=[pd.Timestamp('2012-01-01')]) + result = hash_array(c, categorize=False) + assert result[0] in expected + assert result[1] in expected + def test_pandas_errors(self): for obj in [pd.Timestamp('20130101'), tm.makePanel()]: diff --git a/pandas/tests/tools/test_join.py b/pandas/tests/tools/test_join.py index ab42b1212301b..ee6b3d57b852d 100644 --- a/pandas/tests/tools/test_join.py +++ b/pandas/tests/tools/test_join.py @@ -7,7 +7,7 @@ from pandas.compat import lrange import pandas.compat as compat from pandas.util.testing import assert_frame_equal -from pandas import DataFrame, MultiIndex, Series, merge, concat +from pandas import DataFrame, MultiIndex, Series, Index, merge, concat import pandas._join as _join import pandas.util.testing as tm @@ -368,7 +368,7 @@ def test_join_multiindex(self): df2 = df2.sort_index(level=0) joined = df1.join(df2, how='outer') - ex_index = index1._tuple_index.union(index2._tuple_index) + ex_index = Index(index1.values).union(Index(index2.values)) expected = df1.reindex(ex_index).join(df2.reindex(ex_index)) expected.index.names = index1.names assert_frame_equal(joined, expected) @@ -378,7 +378,7 @@ def test_join_multiindex(self): df2 = df2.sort_index(level=1) joined = df1.join(df2, how='outer').sort_index(level=0) - ex_index = index1._tuple_index.union(index2._tuple_index) + ex_index = Index(index1.values).union(Index(index2.values)) expected = df1.reindex(ex_index).join(df2.reindex(ex_index)) expected.index.names = index1.names diff --git a/pandas/tools/hashing.py b/pandas/tools/hashing.py index 800e0b8815443..ef863510cdd87 100644 --- a/pandas/tools/hashing.py +++ b/pandas/tools/hashing.py @@ -5,7 +5,6 @@ import numpy as np from pandas import _hash, Series, factorize, Categorical, Index, MultiIndex -import pandas.core.algorithms as algos from pandas.lib import is_bool_array from pandas.types.generic import ABCIndexClass, ABCSeries, ABCDataFrame from pandas.types.common import (is_categorical_dtype, is_numeric_dtype, @@ -142,20 +141,18 @@ def hash_tuples(vals, encoding='utf8', hash_key=None): if not isinstance(vals, MultiIndex): vals = MultiIndex.from_tuples(vals) - # create a list-of-ndarrays - def get_level_values(num): - unique = vals.levels[num] # .values - labels = vals.labels[num] - filled = algos.take_1d(unique._values, labels, - fill_value=unique._na_value) - return filled - - vals = [get_level_values(level) + # create a list-of-Categoricals + vals = [Categorical(vals.labels[level], + vals.levels[level], + ordered=False, + fastpath=True) for level in range(vals.nlevels)] # hash the list-of-ndarrays - hashes = (hash_array(l, encoding=encoding, hash_key=hash_key) - for l in vals) + hashes = (_hash_categorical(cat, + encoding=encoding, + hash_key=hash_key) + for cat in vals) h = _combine_hash_arrays(hashes, len(vals)) if is_tuple: h = h[0] @@ -178,9 +175,26 @@ def _hash_categorical(c, encoding, hash_key): ------- ndarray of hashed values array, same size as len(c) """ - cat_hashed = hash_array(c.categories.values, encoding, hash_key, - categorize=False).astype(np.uint64, copy=False) - return c.rename_categories(cat_hashed).astype(np.uint64, copy=False) + hashed = hash_array(c.categories.values, encoding, hash_key, + categorize=False) + + # we have uint64, as we don't directly support missing values + # we don't want to use take_nd which will coerce to float + # instead, directly construt the result with a + # max(np.uint64) as the missing value indicator + # + # TODO: GH 15362 + + mask = c.isnull() + if len(hashed): + result = hashed.take(c.codes) + else: + result = np.zeros(len(mask), dtype='uint64') + + if mask.any(): + result[mask] = np.iinfo(np.uint64).max + + return result def hash_array(vals, encoding='utf8', hash_key=None, categorize=True): diff --git a/pandas/types/cast.py b/pandas/types/cast.py index 6b1c3f9c00351..b1a17df64aecf 100644 --- a/pandas/types/cast.py +++ b/pandas/types/cast.py @@ -12,7 +12,8 @@ is_datetime64tz_dtype, is_datetime64_dtype, is_timedelta64_dtype, is_dtype_equal, is_float_dtype, is_complex_dtype, - is_integer_dtype, is_datetime_or_timedelta_dtype, + is_integer_dtype, + is_datetime_or_timedelta_dtype, is_bool_dtype, is_scalar, _string_dtypes, _coerce_to_dtype, From 93f5e3a0c11c82ad6b7365e83637d133c1a6e8a5 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 15 Feb 2017 12:59:11 -0500 Subject: [PATCH 062/933] STYLE: flake8 upgraded to 3.3 on conda (#15412) fixes for E305, 2 blank lines after a class definition --- pandas/compat/numpy/__init__.py | 1 + pandas/compat/numpy/function.py | 7 +++++++ pandas/computation/expr.py | 1 + pandas/core/algorithms.py | 2 ++ pandas/core/config.py | 1 + pandas/core/config_init.py | 2 ++ pandas/core/frame.py | 2 +- pandas/core/indexing.py | 2 ++ pandas/formats/format.py | 2 +- pandas/indexes/numeric.py | 3 +++ pandas/indexes/range.py | 1 + pandas/io/common.py | 2 ++ pandas/io/excel.py | 5 +++++ pandas/io/gbq.py | 1 + pandas/io/packers.py | 2 ++ pandas/io/parsers.py | 2 ++ pandas/io/pytables.py | 4 ++++ pandas/io/sql.py | 1 + pandas/io/stata.py | 1 + pandas/msgpack/__init__.py | 1 + pandas/sparse/frame.py | 1 + pandas/sparse/series.py | 1 + pandas/stats/moments.py | 3 +++ pandas/tests/sparse/test_libsparse.py | 2 +- pandas/tests/test_generic.py | 1 + pandas/tools/merge.py | 5 +++++ pandas/tools/plotting.py | 1 + pandas/tseries/frequencies.py | 2 ++ pandas/tseries/holiday.py | 2 ++ pandas/tseries/index.py | 1 + pandas/tseries/interval.py | 3 --- pandas/tseries/offsets.py | 4 ++++ pandas/tseries/resample.py | 4 ++++ pandas/tseries/timedeltas.py | 1 + pandas/types/generic.py | 1 + pandas/util/print_versions.py | 1 + pandas/util/terminal.py | 1 + pandas/util/testing.py | 3 +++ 38 files changed, 74 insertions(+), 6 deletions(-) diff --git a/pandas/compat/numpy/__init__.py b/pandas/compat/numpy/__init__.py index bfd770d7af2c6..4a9a2647ece0f 100644 --- a/pandas/compat/numpy/__init__.py +++ b/pandas/compat/numpy/__init__.py @@ -67,6 +67,7 @@ def np_array_datetime64_compat(arr, *args, **kwargs): return np.array(arr, *args, **kwargs) + __all__ = ['np', '_np_version_under1p8', '_np_version_under1p9', diff --git a/pandas/compat/numpy/function.py b/pandas/compat/numpy/function.py index eb9e9ecc359b2..4053994efa005 100644 --- a/pandas/compat/numpy/function.py +++ b/pandas/compat/numpy/function.py @@ -55,6 +55,7 @@ def __call__(self, args, kwargs, fname=None, raise ValueError("invalid validation method " "'{method}'".format(method=method)) + ARGMINMAX_DEFAULTS = dict(out=None) validate_argmin = CompatValidator(ARGMINMAX_DEFAULTS, fname='argmin', method='both', max_fname_arg_count=1) @@ -97,6 +98,7 @@ def validate_argmax_with_skipna(skipna, args, kwargs): validate_argmax(args, kwargs) return skipna + ARGSORT_DEFAULTS = OrderedDict() ARGSORT_DEFAULTS['axis'] = -1 ARGSORT_DEFAULTS['kind'] = 'quicksort' @@ -121,6 +123,7 @@ def validate_argsort_with_ascending(ascending, args, kwargs): validate_argsort(args, kwargs, max_fname_arg_count=1) return ascending + CLIP_DEFAULTS = dict(out=None) validate_clip = CompatValidator(CLIP_DEFAULTS, fname='clip', method='both', max_fname_arg_count=3) @@ -141,6 +144,7 @@ def validate_clip_with_axis(axis, args, kwargs): validate_clip(args, kwargs) return axis + COMPRESS_DEFAULTS = OrderedDict() COMPRESS_DEFAULTS['axis'] = None COMPRESS_DEFAULTS['out'] = None @@ -170,6 +174,7 @@ def validate_cum_func_with_skipna(skipna, args, kwargs, name): validate_cum_func(args, kwargs, fname=name) return skipna + LOGICAL_FUNC_DEFAULTS = dict(out=None) validate_logical_func = CompatValidator(LOGICAL_FUNC_DEFAULTS, method='kwargs') @@ -236,6 +241,7 @@ def validate_take_with_convert(convert, args, kwargs): validate_take(args, kwargs, max_fname_arg_count=3, method='both') return convert + TRANSPOSE_DEFAULTS = dict(axes=None) validate_transpose = CompatValidator(TRANSPOSE_DEFAULTS, fname='transpose', method='both', max_fname_arg_count=0) @@ -318,6 +324,7 @@ def validate_groupby_func(name, args, kwargs, allowed=None): "with groupby. Use .groupby(...)." "{func}() instead".format(func=name))) + RESAMPLER_NUMPY_OPS = ('min', 'max', 'sum', 'prod', 'mean', 'std', 'var') diff --git a/pandas/computation/expr.py b/pandas/computation/expr.py index f1cf210754d12..a782287175327 100644 --- a/pandas/computation/expr.py +++ b/pandas/computation/expr.py @@ -669,6 +669,7 @@ def visitor(x, y): operands = node.values return reduce(visitor, operands) + # ast.Call signature changed on 3.5, # conditionally change which methods is named # visit_Call depending on Python version, #11097 diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index c922ac21e12eb..4ae46fe33a5cc 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -926,6 +926,7 @@ def _finalize_nsmallest(arr, kth_val, n, keep, narr): else: return inds + _dtype_map = {'datetime64[ns]': 'int64', 'timedelta64[ns]': 'int64'} @@ -959,6 +960,7 @@ def _hashtable_algo(f, values, return_dtype=None): # use Object return f(htable.PyObjectHashTable, _ensure_object) + _hashtables = { 'float64': (htable.Float64HashTable, htable.Float64Vector), 'uint64': (htable.UInt64HashTable, htable.UInt64Vector), diff --git a/pandas/core/config.py b/pandas/core/config.py index ed63c865ebfb4..1c0eb60b8ec2f 100644 --- a/pandas/core/config.py +++ b/pandas/core/config.py @@ -804,6 +804,7 @@ def inner(x): return inner + # common type validators, for convenience # usage: register_option(... , validator = is_int) is_int = is_type_factory(int) diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py index fe47391c9ff81..d3db633f3aa04 100644 --- a/pandas/core/config_init.py +++ b/pandas/core/config_init.py @@ -278,6 +278,7 @@ def mpl_style_cb(key): return val + with cf.config_prefix('display'): cf.register_option('precision', 6, pc_precision_doc, validator=is_int) cf.register_option('float_format', None, float_format_doc, @@ -380,6 +381,7 @@ def use_inf_as_null_cb(key): from pandas.types.missing import _use_inf_as_null _use_inf_as_null(key) + with cf.config_prefix('mode'): cf.register_option('use_inf_as_null', False, use_inf_as_null_doc, cb=use_inf_as_null_cb) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 9c66f6dbb273e..f7c306ea7ce95 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5741,9 +5741,9 @@ def _from_nested_dict(data): def _put_str(s, space): return ('%s' % s)[:space].ljust(space) + # ---------------------------------------------------------------------- # Add plotting methods to DataFrame - DataFrame.plot = base.AccessorProperty(gfx.FramePlotMethods, gfx.FramePlotMethods) DataFrame.hist = gfx.hist_frame diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 6bb2d1c479844..66510a7708e64 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -36,6 +36,7 @@ def get_indexers_list(): ('iat', _iAtIndexer), ] + # "null slice" _NS = slice(None, None) @@ -1850,6 +1851,7 @@ def _convert_key(self, key, is_setter=False): "indexers") return key + # 32-bit floating point machine epsilon _eps = np.finfo('f4').eps diff --git a/pandas/formats/format.py b/pandas/formats/format.py index 1a7a06199ad8a..6b235b5e1bc33 100644 --- a/pandas/formats/format.py +++ b/pandas/formats/format.py @@ -2479,9 +2479,9 @@ def _has_names(index): else: return index.name is not None + # ----------------------------------------------------------------------------- # Global formatting options - _initial_defencoding = None diff --git a/pandas/indexes/numeric.py b/pandas/indexes/numeric.py index 0b9b337731d7f..00ddf5b0c918d 100644 --- a/pandas/indexes/numeric.py +++ b/pandas/indexes/numeric.py @@ -159,6 +159,7 @@ def _assert_safe_casting(cls, data, subarr): raise TypeError('Unsafe NumPy casting, you must ' 'explicitly cast') + Int64Index._add_numeric_methods() Int64Index._add_logical_methods() @@ -238,6 +239,7 @@ def _assert_safe_casting(cls, data, subarr): raise TypeError('Unsafe NumPy casting, you must ' 'explicitly cast') + UInt64Index._add_numeric_methods() UInt64Index._add_logical_methods() @@ -391,5 +393,6 @@ def isin(self, values, level=None): return lib.ismember_nans(np.array(self), value_set, isnull(list(value_set)).any()) + Float64Index._add_numeric_methods() Float64Index._add_logical_methods_disabled() diff --git a/pandas/indexes/range.py b/pandas/indexes/range.py index 7a7902b503bd6..cc78361f843bf 100644 --- a/pandas/indexes/range.py +++ b/pandas/indexes/range.py @@ -652,5 +652,6 @@ def _evaluate_numeric_binop(self, other): reversed=True, step=operator.div) + RangeIndex._add_numeric_methods() RangeIndex._add_logical_methods() diff --git a/pandas/io/common.py b/pandas/io/common.py index b24acb256c4a9..74c51b74ca18a 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -75,6 +75,7 @@ class ParserError(ValueError): """ pass + # gh-12665: Alias for now and remove later. CParserError = ParserError @@ -116,6 +117,7 @@ def __iter__(self): def __next__(self): raise AbstractMethodError(self) + if not compat.PY3: BaseIterator.next = lambda self: self.__next__() diff --git a/pandas/io/excel.py b/pandas/io/excel.py index f34ba65cf7b51..2821983213646 100644 --- a/pandas/io/excel.py +++ b/pandas/io/excel.py @@ -886,12 +886,14 @@ def _convert_to_style(cls, style_dict): return xls_style + register_writer(_Openpyxl1Writer) class _OpenpyxlWriter(_Openpyxl1Writer): engine = 'openpyxl' + register_writer(_OpenpyxlWriter) @@ -1368,6 +1370,7 @@ def write_cells(self, cells, sheet_name=None, startrow=0, startcol=0): for k, v in style_kwargs.items(): setattr(xcell, k, v) + register_writer(_Openpyxl22Writer) @@ -1491,6 +1494,7 @@ def _convert_to_style(cls, style_dict, num_format_str=None): return style + register_writer(_XlwtWriter) @@ -1603,4 +1607,5 @@ def _convert_to_style(self, style_dict, num_format_str=None): return xl_format + register_writer(_XlsxWriter) diff --git a/pandas/io/gbq.py b/pandas/io/gbq.py index 0ffb6b4bf8c05..a5558866937cf 100644 --- a/pandas/io/gbq.py +++ b/pandas/io/gbq.py @@ -58,6 +58,7 @@ def _test_google_api_imports(): raise ImportError("Missing module required for Google BigQuery " "support: {0}".format(str(e))) + logger = logging.getLogger('pandas.io.gbq') logger.setLevel(logging.ERROR) diff --git a/pandas/io/packers.py b/pandas/io/packers.py index ab44e46c96b77..3f4be6ad459d8 100644 --- a/pandas/io/packers.py +++ b/pandas/io/packers.py @@ -217,6 +217,7 @@ def read(fh): raise ValueError('path_or_buf needs to be a string file path or file-like') + dtype_dict = {21: np.dtype('M8[ns]'), u('datetime64[ns]'): np.dtype('M8[ns]'), u('datetime64[us]'): np.dtype('M8[us]'), @@ -237,6 +238,7 @@ def dtype_for(t): return dtype_dict[t] return np.typeDict.get(t, t) + c2f_dict = {'complex': np.float64, 'complex128': np.float64, 'complex64': np.float32} diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index f8905dfa315c4..88d0c6c12c04f 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -409,6 +409,7 @@ def _read(filepath_or_buffer, kwds): return data + _parser_defaults = { 'delimiter': None, @@ -655,6 +656,7 @@ def parser_f(filepath_or_buffer, return parser_f + read_csv = _make_parser_function('read_csv', sep=',') read_csv = Appender(_read_csv_doc)(read_csv) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index d8de1dcd61977..65ac4e5654dce 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -74,6 +74,7 @@ def _ensure_encoding(encoding): encoding = _default_encoding return encoding + Term = Expr @@ -112,6 +113,7 @@ class ClosedFileError(Exception): class IncompatibilityWarning(Warning): pass + incompatibility_doc = """ where criteria is being ignored as this version [%s] is too old (or not-defined), read the file in and write it out to a new file to upgrade (with @@ -122,6 +124,7 @@ class IncompatibilityWarning(Warning): class AttributeConflictWarning(Warning): pass + attribute_conflict_doc = """ the [%s] attribute of the existing index is [%s] which conflicts with the new [%s], resetting the attribute to None @@ -131,6 +134,7 @@ class AttributeConflictWarning(Warning): class DuplicateWarning(Warning): pass + duplicate_doc = """ duplicate entries in table, taking most recently appended """ diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 9fa01c413aca8..55e145b493dd9 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -495,6 +495,7 @@ def has_table(table_name, con, flavor=None, schema=None): pandas_sql = pandasSQL_builder(con, flavor=flavor, schema=schema) return pandas_sql.has_table(table_name) + table_exists = has_table diff --git a/pandas/io/stata.py b/pandas/io/stata.py index 2be7657883e88..1698ade4c0102 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -459,6 +459,7 @@ class PossiblePrecisionLoss(Warning): class ValueLabelTypeMismatch(Warning): pass + value_label_mismatch_doc = """ Stata value labels (pandas categories) must be strings. Column {0} contains non-string labels which will be converted to strings. Please check that the diff --git a/pandas/msgpack/__init__.py b/pandas/msgpack/__init__.py index 33d60a12ef0a3..4d6e241171281 100644 --- a/pandas/msgpack/__init__.py +++ b/pandas/msgpack/__init__.py @@ -41,6 +41,7 @@ def packb(o, **kwargs): """ return Packer(**kwargs).pack(o) + # alias for compatibility to simplejson/marshal/pickle. load = unpack loads = unpackb diff --git a/pandas/sparse/frame.py b/pandas/sparse/frame.py index 1fc93a967bdbb..61b8434b0ea09 100644 --- a/pandas/sparse/frame.py +++ b/pandas/sparse/frame.py @@ -863,6 +863,7 @@ def homogenize(series_dict): return output + # use unaccelerated ops for sparse objects ops.add_flex_arithmetic_methods(SparseDataFrame, use_numexpr=False, **ops.frame_flex_funcs) diff --git a/pandas/sparse/series.py b/pandas/sparse/series.py index 2d3a9effe6939..dfdbb3c89814a 100644 --- a/pandas/sparse/series.py +++ b/pandas/sparse/series.py @@ -832,6 +832,7 @@ def from_coo(cls, A, dense_index=False): """ return _coo_to_sparse_series(A, dense_index=dense_index) + # overwrite series methods with unaccelerated versions ops.add_special_arithmetic_methods(SparseSeries, use_numexpr=False, **ops.series_special_funcs) diff --git a/pandas/stats/moments.py b/pandas/stats/moments.py index 95b209aee0b0c..914c4c08863a2 100644 --- a/pandas/stats/moments.py +++ b/pandas/stats/moments.py @@ -385,6 +385,7 @@ def ewmstd(arg, com=None, span=None, halflife=None, alpha=None, min_periods=0, bias=bias, func_kw=['bias']) + ewmvol = ewmstd @@ -476,6 +477,7 @@ def f(arg, window, min_periods=None, freq=None, center=False, **kwargs) return f + rolling_max = _rolling_func('max', 'Moving maximum.', how='max') rolling_min = _rolling_func('min', 'Moving minimum.', how='min') rolling_sum = _rolling_func('sum', 'Moving sum.') @@ -683,6 +685,7 @@ def f(arg, min_periods=1, freq=None, **kwargs): **kwargs) return f + expanding_max = _expanding_func('max', 'Expanding maximum.') expanding_min = _expanding_func('min', 'Expanding minimum.') expanding_sum = _expanding_func('sum', 'Expanding sum.') diff --git a/pandas/tests/sparse/test_libsparse.py b/pandas/tests/sparse/test_libsparse.py index 4d5a93d77cf14..0435b732911da 100644 --- a/pandas/tests/sparse/test_libsparse.py +++ b/pandas/tests/sparse/test_libsparse.py @@ -560,8 +560,8 @@ def _check_case(xloc, xlen, yloc, ylen, eloc, elen): check_cases(_check_case) -# too cute? oh but how I abhor code duplication +# too cute? oh but how I abhor code duplication check_ops = ['add', 'sub', 'mul', 'truediv', 'floordiv'] diff --git a/pandas/tests/test_generic.py b/pandas/tests/test_generic.py index 28f1dc61533c1..b087ca21d3c25 100644 --- a/pandas/tests/test_generic.py +++ b/pandas/tests/test_generic.py @@ -1588,6 +1588,7 @@ def test_to_xarray(self): # non-convertible self.assertRaises(ValueError, lambda: result.to_pandas()) + # run all the tests, but wrap each in a warning catcher for t in ['test_rename', 'test_rename_axis', 'test_get_numeric_data', 'test_get_default', 'test_nonzero', diff --git a/pandas/tools/merge.py b/pandas/tools/merge.py index e82e702cb6e55..ba53d42fccec7 100644 --- a/pandas/tools/merge.py +++ b/pandas/tools/merge.py @@ -53,6 +53,7 @@ def wrapper(*args, **kwargs): return pd.concat(*args, **kwargs) return wrapper + concat = concat_wrap() @@ -66,6 +67,8 @@ def merge(left, right, how='inner', on=None, left_on=None, right_on=None, right_index=right_index, sort=sort, suffixes=suffixes, copy=copy, indicator=indicator) return op.get_result() + + if __debug__: merge.__doc__ = _merge_doc % '\nleft : DataFrame' @@ -264,6 +267,7 @@ def _merger(x, y): result = _merger(left, right) return result + ordered_merge.__doc__ = merge_ordered.__doc__ @@ -1334,6 +1338,7 @@ def _right_outer_join(x, y, max_groups): right_indexer, left_indexer = _join.left_outer_join(y, x, max_groups) return left_indexer, right_indexer + _join_functions = { 'inner': _join.inner_join, 'left': _join.left_outer_join, diff --git a/pandas/tools/plotting.py b/pandas/tools/plotting.py index 0b1ced97d2b81..b2050d7d8d81e 100644 --- a/pandas/tools/plotting.py +++ b/pandas/tools/plotting.py @@ -149,6 +149,7 @@ def _mpl_ge_2_0_0(): except ImportError: return False + if _mpl_ge_1_5_0(): # Compat with mp 1.5, which uses cycler. import cycler diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index e0c602bf5a037..957a934d13f09 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -660,6 +660,7 @@ def get_standard_freq(freq): warnings.warn(msg, FutureWarning, stacklevel=2) return to_offset(freq).rule_code + # --------------------------------------------------------------------- # Period codes @@ -795,6 +796,7 @@ def infer_freq(index, warn=True): inferer = _FrequencyInferer(index, warn=warn) return inferer.get_freq() + _ONE_MICRO = long(1000) _ONE_MILLI = _ONE_MICRO * 1000 _ONE_SECOND = _ONE_MILLI * 1000 diff --git a/pandas/tseries/holiday.py b/pandas/tseries/holiday.py index 31e40c6bcbb2c..d3d936693c266 100644 --- a/pandas/tseries/holiday.py +++ b/pandas/tseries/holiday.py @@ -286,6 +286,7 @@ def _apply_rule(self, dates): dates += offset return dates + holiday_calendars = {} @@ -461,6 +462,7 @@ def merge(self, other, inplace=False): else: return holidays + USMemorialDay = Holiday('MemorialDay', month=5, day=31, offset=DateOffset(weekday=MO(-1))) USLaborDay = Holiday('Labor Day', month=9, day=1, diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py index 6cbb696783e09..5f00e8b648689 100644 --- a/pandas/tseries/index.py +++ b/pandas/tseries/index.py @@ -138,6 +138,7 @@ def _ensure_datetime64(other): return other raise TypeError('%s type object %s' % (type(other), str(other))) + _midnight = time(0, 0) diff --git a/pandas/tseries/interval.py b/pandas/tseries/interval.py index 6698c7e924758..22801318a1853 100644 --- a/pandas/tseries/interval.py +++ b/pandas/tseries/interval.py @@ -33,6 +33,3 @@ def __new__(self, starts, ends): def dtype(self): return self.values.dtype - -if __name__ == '__main__': - pass diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index 370dd00762896..79227f6de90a5 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -1652,6 +1652,7 @@ class WeekDay(object): SAT = 5 SUN = 6 + _int_to_weekday = { WeekDay.MON: 'MON', WeekDay.TUE: 'TUE', @@ -1924,6 +1925,7 @@ def onOffset(self, dt): modMonth = (dt.month - self.startingMonth) % 3 return BMonthEnd().onOffset(dt) and modMonth == 0 + _int_to_month = tslib._MONTH_ALIASES _month_to_int = dict((v, k) for k, v in _int_to_month.items()) @@ -2799,6 +2801,7 @@ def _delta_to_tick(delta): else: # pragma: no cover return Nano(nanos) + _delta_to_nanoseconds = tslib._delta_to_nanoseconds @@ -2931,6 +2934,7 @@ def generate_range(start=None, end=None, periods=None, raise ValueError('Offset %s did not decrement date' % offset) cur = next_date + prefix_mapping = dict((offset._prefix, offset) for offset in [ YearBegin, # 'AS' YearEnd, # 'A' diff --git a/pandas/tseries/resample.py b/pandas/tseries/resample.py index 5692d6c5cabde..a6a10c08966d6 100755 --- a/pandas/tseries/resample.py +++ b/pandas/tseries/resample.py @@ -552,6 +552,8 @@ def var(self, ddof=1, *args, **kwargs): """ nv.validate_resampler_func('var', args, kwargs) return self._downsample('var', ddof=ddof) + + Resampler._deprecated_valids += dir(Resampler) # downsample methods @@ -969,6 +971,8 @@ def resample(obj, kind=None, **kwds): """ create a TimeGrouper and return our resampler """ tg = TimeGrouper(**kwds) return tg._get_resampler(obj, kind=kind) + + resample.__doc__ = Resampler.__doc__ diff --git a/pandas/tseries/timedeltas.py b/pandas/tseries/timedeltas.py index 9bf39652a4e00..5a5d1533bfa91 100644 --- a/pandas/tseries/timedeltas.py +++ b/pandas/tseries/timedeltas.py @@ -87,6 +87,7 @@ def to_timedelta(arg, unit='ns', box=True, errors='raise'): return _coerce_scalar_to_timedelta_type(arg, unit=unit, box=box, errors=errors) + _unit_map = { 'Y': 'Y', 'y': 'Y', diff --git a/pandas/types/generic.py b/pandas/types/generic.py index 756fb47596700..e7b54ccc6f25e 100644 --- a/pandas/types/generic.py +++ b/pandas/types/generic.py @@ -57,4 +57,5 @@ class _ABCGeneric(type): def __instancecheck__(cls, inst): return hasattr(inst, "_data") + ABCGeneric = _ABCGeneric("ABCGeneric", tuple(), {}) diff --git a/pandas/util/print_versions.py b/pandas/util/print_versions.py index 7c5148caf7e74..b0f5d3994ed64 100644 --- a/pandas/util/print_versions.py +++ b/pandas/util/print_versions.py @@ -153,5 +153,6 @@ def main(): return 0 + if __name__ == "__main__": sys.exit(main()) diff --git a/pandas/util/terminal.py b/pandas/util/terminal.py index 6b8428ff75806..dadd09ae74ea4 100644 --- a/pandas/util/terminal.py +++ b/pandas/util/terminal.py @@ -115,6 +115,7 @@ def ioctl_GWINSZ(fd): return None return int(cr[1]), int(cr[0]) + if __name__ == "__main__": sizex, sizey = get_terminal_size() print('width = %s height = %s' % (sizex, sizey)) diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 566ceec027b2b..cda386781e2ec 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -74,6 +74,7 @@ def reset_testing_mode(): if 'deprecate' in testing_mode: warnings.simplefilter('ignore', _testing_mode_warnings) + set_testing_mode() @@ -1381,6 +1382,7 @@ def assert_panelnd_equal(left, right, for i, item in enumerate(right._get_axis(0)): assert item in left, "non-matching item (left) '%s'" % item + # TODO: strangely check_names fails in py3 ? _panel_frame_equal = partial(assert_frame_equal, check_names=False) assert_panel_equal = partial(assert_panelnd_equal, @@ -2076,6 +2078,7 @@ def dec(f): return wrapper + # skip tests on exceptions with this message _network_error_messages = ( # 'urlopen error timed out', From 86ef3ca3ff7c836c5b7c01eb918201ec7c44c000 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 15 Feb 2017 13:00:36 -0500 Subject: [PATCH 063/933] DOC: use shared_docs for Index.get_indexer, get_indexer_non_unique (#15411) * STYLE: flake8 upgraded to 3.3 on conda fixes for E305, 2 blank lines after a class definition * DOC: use shared_docs for Index.get_indexer, get_indexer_non_unique fix non-populated doc-strings for some methods in Index (take) --- pandas/indexes/base.py | 41 +++++++++++++++++++++++++++++--------- pandas/indexes/category.py | 40 +++++++------------------------------ pandas/indexes/multi.py | 40 ++++++++++--------------------------- pandas/tseries/period.py | 5 +++++ 4 files changed, 55 insertions(+), 71 deletions(-) diff --git a/pandas/indexes/base.py b/pandas/indexes/base.py index c483fb0764a4c..e51824e72a2a0 100644 --- a/pandas/indexes/base.py +++ b/pandas/indexes/base.py @@ -65,6 +65,7 @@ _unsortable_types = frozenset(('mixed', 'mixed-integer')) _index_doc_kwargs = dict(klass='Index', inplace='', + target_klass='Index', unique='Index', duplicated='np.ndarray') _index_shared_docs = dict() @@ -1605,7 +1606,7 @@ def _append_same_dtype(self, to_concat, name): numpy.ndarray.take """ - @Appender(_index_shared_docs['take']) + @Appender(_index_shared_docs['take'] % _index_doc_kwargs) def take(self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs): nv.validate_take(tuple(), kwargs) @@ -2350,15 +2351,14 @@ def get_level_values(self, level): self._validate_index_level(level) return self - def get_indexer(self, target, method=None, limit=None, tolerance=None): - """ + _index_shared_docs['get_indexer'] = """ Compute indexer and mask for new index given the current index. The indexer should be then used as an input to ndarray.take to align the current data to the new index. Parameters ---------- - target : Index + target : %(target_klass)s method : {None, 'pad'/'ffill', 'backfill'/'bfill', 'nearest'}, optional * default: exact matches only. * pad / ffill: find the PREVIOUS index value if no exact match. @@ -2387,6 +2387,9 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None): positions matches the corresponding target values. Missing values in the target are marked by -1. """ + + @Appender(_index_shared_docs['get_indexer'] % _index_doc_kwargs) + def get_indexer(self, target, method=None, limit=None, tolerance=None): method = missing.clean_reindex_fill_method(method) target = _ensure_index(target) if tolerance is not None: @@ -2496,11 +2499,28 @@ def _filter_indexer_tolerance(self, target, indexer, tolerance): indexer = np.where(distance <= tolerance, indexer, -1) return indexer + _index_shared_docs['get_indexer_non_unique'] = """ + Compute indexer and mask for new index given the current index. The + indexer should be then used as an input to ndarray.take to align the + current data to the new index. + + Parameters + ---------- + target : %(target_klass)s + + Returns + ------- + indexer : ndarray of int + Integers from 0 to n - 1 indicating that the index at these + positions matches the corresponding target values. Missing values + in the target are marked by -1. + missing : ndarray of int + An indexer into the target of the values not found. + These correspond to the -1 in the indexer array + """ + + @Appender(_index_shared_docs['get_indexer_non_unique'] % _index_doc_kwargs) def get_indexer_non_unique(self, target): - """ return an indexer suitable for taking from a non unique index - return the labels in the same order as the target, and - return a missing indexer into the target (missing are marked as -1 - in the indexer); target must be an iterable """ target = _ensure_index(target) pself, ptarget = self._possibly_promote(target) if pself is not self or ptarget is not target: @@ -2516,7 +2536,10 @@ def get_indexer_non_unique(self, target): return Index(indexer), missing def get_indexer_for(self, target, **kwargs): - """ guaranteed return of an indexer even when non-unique """ + """ + guaranteed return of an indexer even when non-unique + This dispatches to get_indexer or get_indexer_nonunique as appropriate + """ if self.is_unique: return self.get_indexer(target, **kwargs) indexer, _ = self.get_indexer_non_unique(target, **kwargs) diff --git a/pandas/indexes/category.py b/pandas/indexes/category.py index e2e0fd056b111..acb2758641a62 100644 --- a/pandas/indexes/category.py +++ b/pandas/indexes/category.py @@ -18,6 +18,8 @@ import pandas.core.base as base import pandas.core.missing as missing import pandas.indexes.base as ibase +_index_doc_kwargs = dict(ibase._index_doc_kwargs) +_index_doc_kwargs.update(dict(target_klass='CategoricalIndex')) class CategoricalIndex(Index, base.PandasDelegate): @@ -289,7 +291,7 @@ def _engine(self): def is_unique(self): return not self.duplicated().any() - @Appender(base._shared_docs['unique'] % ibase._index_doc_kwargs) + @Appender(base._shared_docs['unique'] % _index_doc_kwargs) def unique(self): result = base.IndexOpsMixin.unique(self) # CategoricalIndex._shallow_copy uses keeps original categories @@ -299,7 +301,7 @@ def unique(self): @deprecate_kwarg('take_last', 'keep', mapping={True: 'last', False: 'first'}) - @Appender(base._shared_docs['duplicated'] % ibase._index_doc_kwargs) + @Appender(base._shared_docs['duplicated'] % _index_doc_kwargs) def duplicated(self, keep='first'): from pandas.hashtable import duplicated_int64 codes = self.codes.astype('i8') @@ -425,34 +427,8 @@ def _reindex_non_unique(self, target): return new_target, indexer, new_indexer + @Appender(_index_shared_docs['get_indexer'] % _index_doc_kwargs) def get_indexer(self, target, method=None, limit=None, tolerance=None): - """ - Compute indexer and mask for new index given the current index. The - indexer should be then used as an input to ndarray.take to align the - current data to the new index. The mask determines whether labels are - found or not in the current index - - Parameters - ---------- - target : MultiIndex or Index (of tuples) - method : {'pad', 'ffill', 'backfill', 'bfill'} - pad / ffill: propagate LAST valid observation forward to next valid - backfill / bfill: use NEXT valid observation to fill gap - - Notes - ----- - This is a low-level method and probably should be used at your own risk - - Examples - -------- - >>> indexer, mask = index.get_indexer(new_index) - >>> new_values = cur_values.take(indexer) - >>> new_values[-mask] = np.nan - - Returns - ------- - (indexer, mask) : (ndarray, ndarray) - """ method = missing.clean_reindex_fill_method(method) target = ibase._ensure_index(target) @@ -472,10 +448,8 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None): return _ensure_platform_int(indexer) + @Appender(_index_shared_docs['get_indexer_non_unique'] % _index_doc_kwargs) def get_indexer_non_unique(self, target): - """ this is the same for a CategoricalIndex for get_indexer; the API - returns the missing values as well - """ target = ibase._ensure_index(target) if isinstance(target, CategoricalIndex): @@ -497,7 +471,7 @@ def _convert_list_indexer(self, keyarr, kind=None): return None - @Appender(_index_shared_docs['take']) + @Appender(_index_shared_docs['take'] % _index_doc_kwargs) def take(self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs): nv.validate_take(tuple(), kwargs) diff --git a/pandas/indexes/multi.py b/pandas/indexes/multi.py index 57739548a17d6..18e1da7303d6d 100644 --- a/pandas/indexes/multi.py +++ b/pandas/indexes/multi.py @@ -43,6 +43,10 @@ _get_na_value, InvalidIndexError, _index_shared_docs) import pandas.indexes.base as ibase +_index_doc_kwargs = dict(ibase._index_doc_kwargs) +_index_doc_kwargs.update( + dict(klass='MultiIndex', + target_klass='MultiIndex or list of tuples')) class MultiIndex(Index): @@ -755,7 +759,7 @@ def f(k, stringify): @deprecate_kwarg('take_last', 'keep', mapping={True: 'last', False: 'first'}) - @Appender(base._shared_docs['duplicated'] % ibase._index_doc_kwargs) + @Appender(base._shared_docs['duplicated'] % _index_doc_kwargs) def duplicated(self, keep='first'): from pandas.core.sorting import get_group_index from pandas.hashtable import duplicated_int64 @@ -1244,7 +1248,7 @@ def __getitem__(self, key): names=self.names, sortorder=sortorder, verify_integrity=False) - @Appender(_index_shared_docs['take']) + @Appender(_index_shared_docs['take'] % _index_doc_kwargs) def take(self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs): nv.validate_take(tuple(), kwargs) @@ -1564,34 +1568,8 @@ def sortlevel(self, level=0, ascending=True, sort_remaining=True): return new_index, indexer + @Appender(_index_shared_docs['get_indexer'] % _index_doc_kwargs) def get_indexer(self, target, method=None, limit=None, tolerance=None): - """ - Compute indexer and mask for new index given the current index. The - indexer should be then used as an input to ndarray.take to align the - current data to the new index. The mask determines whether labels are - found or not in the current index - - Parameters - ---------- - target : MultiIndex or Index (of tuples) - method : {'pad', 'ffill', 'backfill', 'bfill'} - pad / ffill: propagate LAST valid observation forward to next valid - backfill / bfill: use NEXT valid observation to fill gap - - Notes - ----- - This is a low-level method and probably should be used at your own risk - - Examples - -------- - >>> indexer, mask = index.get_indexer(new_index) - >>> new_values = cur_values.take(indexer) - >>> new_values[-mask] = np.nan - - Returns - ------- - (indexer, mask) : (ndarray, ndarray) - """ method = missing.clean_reindex_fill_method(method) target = _ensure_index(target) @@ -1633,6 +1611,10 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None): return _ensure_platform_int(indexer) + @Appender(_index_shared_docs['get_indexer_non_unique'] % _index_doc_kwargs) + def get_indexer_non_unique(self, target): + return super(MultiIndex, self).get_indexer_non_unique(target) + def reindex(self, target, method=None, level=None, limit=None, tolerance=None): """ diff --git a/pandas/tseries/period.py b/pandas/tseries/period.py index 98151d5b6130c..8a6b0c153bb50 100644 --- a/pandas/tseries/period.py +++ b/pandas/tseries/period.py @@ -44,6 +44,10 @@ from pandas.lib import infer_dtype import pandas.tslib as tslib from pandas.compat import zip, u +import pandas.indexes.base as ibase +_index_doc_kwargs = dict(ibase._index_doc_kwargs) +_index_doc_kwargs.update( + dict(target_klass='PeriodIndex or list of Periods')) def _field_accessor(name, alias, docstring=None): @@ -759,6 +763,7 @@ def get_value(self, series, key): return com._maybe_box(self, self._engine.get_value(s, key), series, key) + @Appender(_index_shared_docs['get_indexer'] % _index_doc_kwargs) def get_indexer(self, target, method=None, limit=None, tolerance=None): target = _ensure_index(target) From d6f8b460325fd79faa90858e2743878a7cc74dec Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 15 Feb 2017 15:20:52 -0500 Subject: [PATCH 064/933] BLD: use latest conda version with latest miniconda installer on appveyor change 3.6 build to use numpy=1.12 & add back xlwt (was not on defaults for a while) Author: Jeff Reback Closes #15415 from jreback/appveyor and squashes the following commits: 2019f37 [Jeff Reback] force numpy version f82877b [Jeff Reback] remove extra conda list 3ace9f2 [Jeff Reback] CI: use numpy=1.12 on appveyor 6855a7b [Jeff Reback] BLD: use latest conda version with latest miniconda installer on appveyor --- appveyor.yml | 15 ++++++--------- ci/requirements-3.5-64.run | 2 +- ci/requirements-3.6-64.run | 4 ++-- 3 files changed, 9 insertions(+), 12 deletions(-) diff --git a/appveyor.yml b/appveyor.yml index 42c3be13af809..d96e1dfcf76de 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -18,19 +18,19 @@ environment: matrix: - - CONDA_ROOT: "C:\\Miniconda3.5_64" + - CONDA_ROOT: "C:\\Miniconda3_64" PYTHON_VERSION: "3.6" PYTHON_ARCH: "64" CONDA_PY: "36" - CONDA_NPY: "111" + CONDA_NPY: "112" - - CONDA_ROOT: "C:\\Miniconda3.5_64" + - CONDA_ROOT: "C:\\Miniconda3_64" PYTHON_VERSION: "2.7" PYTHON_ARCH: "64" CONDA_PY: "27" CONDA_NPY: "110" - - CONDA_ROOT: "C:\\Miniconda3.5_64" + - CONDA_ROOT: "C:\\Miniconda3_64" PYTHON_VERSION: "3.5" PYTHON_ARCH: "64" CONDA_PY: "35" @@ -66,8 +66,7 @@ install: # install our build environment - cmd: conda config --set show_channel_urls true --set always_yes true --set changeps1 false - #- cmd: conda update -q conda - - cmd: conda install conda=4.2.15 + - cmd: conda update -q conda - cmd: conda config --set ssl_verify false # add the pandas channel *before* defaults to have defaults take priority @@ -83,7 +82,7 @@ install: - cmd: '%CMD_IN_ENV% conda build ci\appveyor.recipe -q' # create our env - - cmd: conda create -q -n pandas python=%PYTHON_VERSION% nose pytest + - cmd: conda create -q -n pandas python=%PYTHON_VERSION% pytest - cmd: activate pandas - SET REQ=ci\requirements-%PYTHON_VERSION%-%PYTHON_ARCH%.run - cmd: echo "installing requirements from %REQ%" @@ -95,7 +94,5 @@ install: test_script: # tests - cmd: activate pandas - - cmd: conda list - cmd: cd \ - cmd: python -c "import pandas; pandas.test(['--skip-slow', '--skip-network'])" - diff --git a/ci/requirements-3.5-64.run b/ci/requirements-3.5-64.run index 905c2ff3625bd..ad66f578d702a 100644 --- a/ci/requirements-3.5-64.run +++ b/ci/requirements-3.5-64.run @@ -1,6 +1,6 @@ python-dateutil pytz -numpy +numpy=1.11* openpyxl xlsxwriter xlrd diff --git a/ci/requirements-3.6-64.run b/ci/requirements-3.6-64.run index 58ba103504b2c..840d2867e9297 100644 --- a/ci/requirements-3.6-64.run +++ b/ci/requirements-3.6-64.run @@ -1,10 +1,10 @@ python-dateutil pytz -numpy +numpy=1.12* openpyxl xlsxwriter xlrd -#xlwt +xlwt scipy feather-format numexpr From f2246cfa215d01b68aebd2da4afb836d912d248d Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Thu, 16 Feb 2017 09:12:16 -0500 Subject: [PATCH 065/933] TST: convert yield based test_pickle.py to parametrized to remove warnings xref #15341 Author: Jeff Reback Closes #15416 from jreback/warn and squashes the following commits: a6af576 [Jeff Reback] TST: convert yield based test_pickle.py to parametrized to remove warnings xref #15341 --- pandas/tests/io/test_pickle.py | 535 +++++++++++++++++---------------- 1 file changed, 277 insertions(+), 258 deletions(-) diff --git a/pandas/tests/io/test_pickle.py b/pandas/tests/io/test_pickle.py index 5445c506b050c..1e3816c1556f6 100644 --- a/pandas/tests/io/test_pickle.py +++ b/pandas/tests/io/test_pickle.py @@ -1,6 +1,17 @@ # pylint: disable=E1101,E1103,W0232 -""" manage legacy pickle tests """ +""" +manage legacy pickle tests + +How to add pickle tests: + +1. Install pandas version intended to output the pickle. + +2. Execute "generate_legacy_storage_files.py" to create the pickle. +$ python generate_legacy_storage_files.py pickle + +3. Move the created pickle to "data/legacy_pickle/" directory. +""" import pytest import os @@ -9,277 +20,285 @@ import pandas as pd from pandas import Index -from pandas.compat import u, is_platform_little_endian +from pandas.compat import is_platform_little_endian import pandas import pandas.util.testing as tm from pandas.tseries.offsets import Day, MonthEnd -class TestPickle(): - """ - How to add pickle tests: +@pytest.fixture(scope='module') +def current_pickle_data(): + # our current version pickle data + from pandas.tests.io.generate_legacy_storage_files import ( + create_pickle_data) + return create_pickle_data() + + +# --------------------- +# comparision functions +# --------------------- +def compare_element(result, expected, typ, version=None): + if isinstance(expected, Index): + tm.assert_index_equal(expected, result) + return + + if typ.startswith('sp_'): + comparator = getattr(tm, "assert_%s_equal" % typ) + comparator(result, expected, exact_indices=False) + elif typ == 'timestamp': + if expected is pd.NaT: + assert result is pd.NaT + else: + tm.assert_equal(result, expected) + tm.assert_equal(result.freq, expected.freq) + else: + comparator = getattr(tm, "assert_%s_equal" % + typ, tm.assert_almost_equal) + comparator(result, expected) + + +def compare(data, vf, version): + + # py3 compat when reading py2 pickle + try: + data = pandas.read_pickle(vf) + except (ValueError) as e: + if 'unsupported pickle protocol:' in str(e): + # trying to read a py3 pickle in py2 + return + else: + raise + + m = globals() + for typ, dv in data.items(): + for dt, result in dv.items(): + try: + expected = data[typ][dt] + except (KeyError): + if version in ('0.10.1', '0.11.0') and dt == 'reg': + break + else: + raise + + # use a specific comparator + # if available + comparator = "compare_{typ}_{dt}".format(typ=typ, dt=dt) + + comparator = m.get(comparator, m['compare_element']) + comparator(result, expected, typ, version) + return data + + +def compare_sp_series_ts(res, exp, typ, version): + # SparseTimeSeries integrated into SparseSeries in 0.12.0 + # and deprecated in 0.17.0 + if version and LooseVersion(version) <= "0.12.0": + tm.assert_sp_series_equal(res, exp, check_series_type=False) + else: + tm.assert_sp_series_equal(res, exp) + + +def compare_series_ts(result, expected, typ, version): + # GH 7748 + tm.assert_series_equal(result, expected) + tm.assert_equal(result.index.freq, expected.index.freq) + tm.assert_equal(result.index.freq.normalize, False) + tm.assert_series_equal(result > 0, expected > 0) + + # GH 9291 + freq = result.index.freq + tm.assert_equal(freq + Day(1), Day(2)) + + res = freq + pandas.Timedelta(hours=1) + tm.assert_equal(isinstance(res, pandas.Timedelta), True) + tm.assert_equal(res, pandas.Timedelta(days=1, hours=1)) + + res = freq + pandas.Timedelta(nanoseconds=1) + tm.assert_equal(isinstance(res, pandas.Timedelta), True) + tm.assert_equal(res, pandas.Timedelta(days=1, nanoseconds=1)) + + +def compare_series_dt_tz(result, expected, typ, version): + # 8260 + # dtype is object < 0.17.0 + if LooseVersion(version) < '0.17.0': + expected = expected.astype(object) + tm.assert_series_equal(result, expected) + else: + tm.assert_series_equal(result, expected) - 1. Install pandas version intended to output the pickle. - 2. Execute "generate_legacy_storage_files.py" to create the pickle. - $ python generate_legacy_storage_files.py pickle +def compare_series_cat(result, expected, typ, version): + # Categorical dtype is added in 0.15.0 + # ordered is changed in 0.16.0 + if LooseVersion(version) < '0.15.0': + tm.assert_series_equal(result, expected, check_dtype=False, + check_categorical=False) + elif LooseVersion(version) < '0.16.0': + tm.assert_series_equal(result, expected, check_categorical=False) + else: + tm.assert_series_equal(result, expected) - 3. Move the created pickle to "data/legacy_pickle/" directory. - NOTE: TestPickle can't be a subclass of tm.Testcase to use test generator. - http://stackoverflow.com/questions/6689537/ - nose-test-generators-inside-class - """ +def compare_frame_dt_mixed_tzs(result, expected, typ, version): + # 8260 + # dtype is object < 0.17.0 + if LooseVersion(version) < '0.17.0': + expected = expected.astype(object) + tm.assert_frame_equal(result, expected) + else: + tm.assert_frame_equal(result, expected) - @classmethod - def setup_class(cls): - from pandas.tests.io.generate_legacy_storage_files import ( - create_pickle_data) - cls.data = create_pickle_data() - cls.path = u('__%s__.pickle' % tm.rands(10)) - def compare_element(self, result, expected, typ, version=None): - if isinstance(expected, Index): - tm.assert_index_equal(expected, result) - return +def compare_frame_cat_onecol(result, expected, typ, version): + # Categorical dtype is added in 0.15.0 + # ordered is changed in 0.16.0 + if LooseVersion(version) < '0.15.0': + tm.assert_frame_equal(result, expected, check_dtype=False, + check_categorical=False) + elif LooseVersion(version) < '0.16.0': + tm.assert_frame_equal(result, expected, check_categorical=False) + else: + tm.assert_frame_equal(result, expected) - if typ.startswith('sp_'): - comparator = getattr(tm, "assert_%s_equal" % typ) - comparator(result, expected, exact_indices=False) - elif typ == 'timestamp': - if expected is pd.NaT: - assert result is pd.NaT - else: - tm.assert_equal(result, expected) - tm.assert_equal(result.freq, expected.freq) - else: - comparator = getattr(tm, "assert_%s_equal" % - typ, tm.assert_almost_equal) - comparator(result, expected) - - def compare(self, vf, version): - - # py3 compat when reading py2 pickle - try: - data = pandas.read_pickle(vf) - except (ValueError) as e: - if 'unsupported pickle protocol:' in str(e): - # trying to read a py3 pickle in py2 - return - else: - raise - - for typ, dv in data.items(): - for dt, result in dv.items(): - try: - expected = self.data[typ][dt] - except (KeyError): - if version in ('0.10.1', '0.11.0') and dt == 'reg': - break - else: - raise - - # use a specific comparator - # if available - comparator = "compare_{typ}_{dt}".format(typ=typ, dt=dt) - comparator = getattr(self, comparator, self.compare_element) - comparator(result, expected, typ, version) - return data - - def compare_sp_series_ts(self, res, exp, typ, version): - # SparseTimeSeries integrated into SparseSeries in 0.12.0 - # and deprecated in 0.17.0 - if version and LooseVersion(version) <= "0.12.0": - tm.assert_sp_series_equal(res, exp, check_series_type=False) - else: - tm.assert_sp_series_equal(res, exp) - def compare_series_ts(self, result, expected, typ, version): - # GH 7748 - tm.assert_series_equal(result, expected) - tm.assert_equal(result.index.freq, expected.index.freq) - tm.assert_equal(result.index.freq.normalize, False) - tm.assert_series_equal(result > 0, expected > 0) - - # GH 9291 - freq = result.index.freq - tm.assert_equal(freq + Day(1), Day(2)) - - res = freq + pandas.Timedelta(hours=1) - tm.assert_equal(isinstance(res, pandas.Timedelta), True) - tm.assert_equal(res, pandas.Timedelta(days=1, hours=1)) - - res = freq + pandas.Timedelta(nanoseconds=1) - tm.assert_equal(isinstance(res, pandas.Timedelta), True) - tm.assert_equal(res, pandas.Timedelta(days=1, nanoseconds=1)) - - def compare_series_dt_tz(self, result, expected, typ, version): - # 8260 - # dtype is object < 0.17.0 - if LooseVersion(version) < '0.17.0': - expected = expected.astype(object) - tm.assert_series_equal(result, expected) - else: - tm.assert_series_equal(result, expected) - - def compare_series_cat(self, result, expected, typ, version): - # Categorical dtype is added in 0.15.0 - # ordered is changed in 0.16.0 - if LooseVersion(version) < '0.15.0': - tm.assert_series_equal(result, expected, check_dtype=False, - check_categorical=False) - elif LooseVersion(version) < '0.16.0': - tm.assert_series_equal(result, expected, check_categorical=False) - else: - tm.assert_series_equal(result, expected) - - def compare_frame_dt_mixed_tzs(self, result, expected, typ, version): - # 8260 - # dtype is object < 0.17.0 - if LooseVersion(version) < '0.17.0': - expected = expected.astype(object) - tm.assert_frame_equal(result, expected) - else: - tm.assert_frame_equal(result, expected) - - def compare_frame_cat_onecol(self, result, expected, typ, version): - # Categorical dtype is added in 0.15.0 - # ordered is changed in 0.16.0 - if LooseVersion(version) < '0.15.0': - tm.assert_frame_equal(result, expected, check_dtype=False, - check_categorical=False) - elif LooseVersion(version) < '0.16.0': - tm.assert_frame_equal(result, expected, check_categorical=False) - else: - tm.assert_frame_equal(result, expected) - - def compare_frame_cat_and_float(self, result, expected, typ, version): - self.compare_frame_cat_onecol(result, expected, typ, version) - - def compare_index_period(self, result, expected, typ, version): - tm.assert_index_equal(result, expected) - tm.assertIsInstance(result.freq, MonthEnd) - tm.assert_equal(result.freq, MonthEnd()) - tm.assert_equal(result.freqstr, 'M') - tm.assert_index_equal(result.shift(2), expected.shift(2)) - - def compare_sp_frame_float(self, result, expected, typ, version): - if LooseVersion(version) <= '0.18.1': - tm.assert_sp_frame_equal(result, expected, exact_indices=False, - check_dtype=False) - else: - tm.assert_sp_frame_equal(result, expected) - - def read_pickles(self, version): - if not is_platform_little_endian(): - pytest.skip("known failure on non-little endian") - - pth = tm.get_data_path('legacy_pickle/{0}'.format(str(version))) - n = 0 - for f in os.listdir(pth): - vf = os.path.join(pth, f) - data = self.compare(vf, version) - - if data is None: - continue - n += 1 - assert n > 0, 'Pickle files are not tested' - - def test_pickles(self): - pickle_path = tm.get_data_path('legacy_pickle') - n = 0 - for v in os.listdir(pickle_path): - pth = os.path.join(pickle_path, v) - if os.path.isdir(pth): - yield self.read_pickles, v - n += 1 - assert n > 0, 'Pickle files are not tested' - - def test_round_trip_current(self): - - try: - import cPickle as c_pickle - - def c_pickler(obj, path): - with open(path, 'wb') as fh: - c_pickle.dump(obj, fh, protocol=-1) - - def c_unpickler(path): - with open(path, 'rb') as fh: - fh.seek(0) - return c_pickle.load(fh) - except: - c_pickler = None - c_unpickler = None - - import pickle as python_pickle - - def python_pickler(obj, path): +def compare_frame_cat_and_float(result, expected, typ, version): + compare_frame_cat_onecol(result, expected, typ, version) + + +def compare_index_period(result, expected, typ, version): + tm.assert_index_equal(result, expected) + tm.assertIsInstance(result.freq, MonthEnd) + tm.assert_equal(result.freq, MonthEnd()) + tm.assert_equal(result.freqstr, 'M') + tm.assert_index_equal(result.shift(2), expected.shift(2)) + + +def compare_sp_frame_float(result, expected, typ, version): + if LooseVersion(version) <= '0.18.1': + tm.assert_sp_frame_equal(result, expected, exact_indices=False, + check_dtype=False) + else: + tm.assert_sp_frame_equal(result, expected) + + +# --------------------- +# tests +# --------------------- +def legacy_pickle_versions(): + # yield the pickle versions + pickle_path = tm.get_data_path('legacy_pickle') + for v in os.listdir(pickle_path): + pth = os.path.join(pickle_path, v) + if os.path.isdir(pth): + yield v + + +@pytest.mark.parametrize('version', legacy_pickle_versions()) +def test_pickles(current_pickle_data, version): + if not is_platform_little_endian(): + pytest.skip("known failure on non-little endian") + + pth = tm.get_data_path('legacy_pickle/{0}'.format(version)) + n = 0 + for f in os.listdir(pth): + vf = os.path.join(pth, f) + data = compare(current_pickle_data, vf, version) + + if data is None: + continue + n += 1 + assert n > 0, 'Pickle files are not tested' + + +def test_round_trip_current(current_pickle_data): + + try: + import cPickle as c_pickle + + def c_pickler(obj, path): with open(path, 'wb') as fh: - python_pickle.dump(obj, fh, protocol=-1) + c_pickle.dump(obj, fh, protocol=-1) - def python_unpickler(path): + def c_unpickler(path): with open(path, 'rb') as fh: fh.seek(0) - return python_pickle.load(fh) - - for typ, dv in self.data.items(): - for dt, expected in dv.items(): - - for writer in [pd.to_pickle, c_pickler, python_pickler]: - if writer is None: - continue - - with tm.ensure_clean(self.path) as path: - - # test writing with each pickler - writer(expected, path) - - # test reading with each unpickler - result = pd.read_pickle(path) - self.compare_element(result, expected, typ) - - if c_unpickler is not None: - result = c_unpickler(path) - self.compare_element(result, expected, typ) - - result = python_unpickler(path) - self.compare_element(result, expected, typ) - - def test_pickle_v0_14_1(self): - - # we have the name warning - # 10482 - with tm.assert_produces_warning(UserWarning): - cat = pd.Categorical(values=['a', 'b', 'c'], - categories=['a', 'b', 'c', 'd'], - name='foobar', ordered=False) - pickle_path = os.path.join(tm.get_data_path(), - 'categorical_0_14_1.pickle') - # This code was executed once on v0.14.1 to generate the pickle: - # - # cat = Categorical(labels=np.arange(3), levels=['a', 'b', 'c', 'd'], - # name='foobar') - # with open(pickle_path, 'wb') as f: pickle.dump(cat, f) - # - tm.assert_categorical_equal(cat, pd.read_pickle(pickle_path)) - - def test_pickle_v0_15_2(self): - # ordered -> _ordered - # GH 9347 - - # we have the name warning - # 10482 - with tm.assert_produces_warning(UserWarning): - cat = pd.Categorical(values=['a', 'b', 'c'], - categories=['a', 'b', 'c', 'd'], - name='foobar', ordered=False) - pickle_path = os.path.join(tm.get_data_path(), - 'categorical_0_15_2.pickle') - # This code was executed once on v0.15.2 to generate the pickle: - # - # cat = Categorical(labels=np.arange(3), levels=['a', 'b', 'c', 'd'], - # name='foobar') - # with open(pickle_path, 'wb') as f: pickle.dump(cat, f) - # - tm.assert_categorical_equal(cat, pd.read_pickle(pickle_path)) + return c_pickle.load(fh) + except: + c_pickler = None + c_unpickler = None + + import pickle as python_pickle + + def python_pickler(obj, path): + with open(path, 'wb') as fh: + python_pickle.dump(obj, fh, protocol=-1) + + def python_unpickler(path): + with open(path, 'rb') as fh: + fh.seek(0) + return python_pickle.load(fh) + + data = current_pickle_data + for typ, dv in data.items(): + for dt, expected in dv.items(): + + for writer in [pd.to_pickle, c_pickler, python_pickler]: + if writer is None: + continue + + with tm.ensure_clean() as path: + + # test writing with each pickler + writer(expected, path) + + # test reading with each unpickler + result = pd.read_pickle(path) + compare_element(result, expected, typ) + + if c_unpickler is not None: + result = c_unpickler(path) + compare_element(result, expected, typ) + + result = python_unpickler(path) + compare_element(result, expected, typ) + + +def test_pickle_v0_14_1(): + + # we have the name warning + # 10482 + with tm.assert_produces_warning(UserWarning): + cat = pd.Categorical(values=['a', 'b', 'c'], + categories=['a', 'b', 'c', 'd'], + name='foobar', ordered=False) + pickle_path = os.path.join(tm.get_data_path(), + 'categorical_0_14_1.pickle') + # This code was executed once on v0.14.1 to generate the pickle: + # + # cat = Categorical(labels=np.arange(3), levels=['a', 'b', 'c', 'd'], + # name='foobar') + # with open(pickle_path, 'wb') as f: pickle.dump(cat, f) + # + tm.assert_categorical_equal(cat, pd.read_pickle(pickle_path)) + + +def test_pickle_v0_15_2(): + # ordered -> _ordered + # GH 9347 + + # we have the name warning + # 10482 + with tm.assert_produces_warning(UserWarning): + cat = pd.Categorical(values=['a', 'b', 'c'], + categories=['a', 'b', 'c', 'd'], + name='foobar', ordered=False) + pickle_path = os.path.join(tm.get_data_path(), + 'categorical_0_15_2.pickle') + # This code was executed once on v0.15.2 to generate the pickle: + # + # cat = Categorical(labels=np.arange(3), levels=['a', 'b', 'c', 'd'], + # name='foobar') + # with open(pickle_path, 'wb') as f: pickle.dump(cat, f) + # + tm.assert_categorical_equal(cat, pd.read_pickle(pickle_path)) From ddb22f578b7c7147fd8bcd9fb7c8504a8053e313 Mon Sep 17 00:00:00 2001 From: Elliott Sales de Andrade Date: Thu, 16 Feb 2017 09:13:42 -0500 Subject: [PATCH 066/933] TST: Parametrize simple yield tests xref #15341 Author: Elliott Sales de Andrade Closes #15406 from QuLogic/pytest-simple-yield and squashes the following commits: b002752 [Elliott Sales de Andrade] TST: Set PYTHONHASHSEED so xdist doesn't break. 8368772 [Elliott Sales de Andrade] TST: Use fixtures for engine/parser where possible. c6cd346 [Elliott Sales de Andrade] TST: Parametrize remaining simple yield tests. 47bf1a1 [Elliott Sales de Andrade] TST: Replace ENGINES_PARSERS by parametrize. --- ci/script_multi.sh | 6 + pandas/tests/computation/test_compat.py | 11 +- pandas/tests/computation/test_eval.py | 233 ++++++------------------ pandas/tests/io/parser/test_network.py | 26 +-- pandas/util/testing.py | 15 +- 5 files changed, 92 insertions(+), 199 deletions(-) diff --git a/ci/script_multi.sh b/ci/script_multi.sh index f5fbcbbc12f83..41f71fd21f63f 100755 --- a/ci/script_multi.sh +++ b/ci/script_multi.sh @@ -17,6 +17,12 @@ if [ -n "$LOCALE_OVERRIDE" ]; then python -c "$pycmd" fi +# Workaround for pytest-xdist flaky collection order +# https://github.com/pytest-dev/pytest/issues/920 +# https://github.com/pytest-dev/pytest/issues/1075 +export PYTHONHASHSEED=$(python -c 'import random; print(random.randint(1, 4294967295))') +echo PYTHONHASHSEED=$PYTHONHASHSEED + if [ "$BUILD_TEST" ]; then echo "We are not running pytest as this is simply a build test." elif [ "$COVERAGE" ]; then diff --git a/pandas/tests/computation/test_compat.py b/pandas/tests/computation/test_compat.py index 77994ac6d2f53..59bdde83aedd8 100644 --- a/pandas/tests/computation/test_compat.py +++ b/pandas/tests/computation/test_compat.py @@ -12,8 +12,6 @@ import pandas.computation.expr as expr from pandas.computation import _MIN_NUMEXPR_VERSION -ENGINES_PARSERS = list(product(_engines, expr._parsers)) - def test_compat(): # test we have compat with our version of nu @@ -30,12 +28,9 @@ def test_compat(): pytest.skip("not testing numexpr version compat") -def test_invalid_numexpr_version(): - for engine, parser in ENGINES_PARSERS: - yield check_invalid_numexpr_version, engine, parser - - -def check_invalid_numexpr_version(engine, parser): +@pytest.mark.parametrize('engine', _engines) +@pytest.mark.parametrize('parser', expr._parsers) +def test_invalid_numexpr_version(engine, parser): def testit(): a, b = 1, 2 res = pd.eval('a + b', engine=engine, parser=parser) diff --git a/pandas/tests/computation/test_eval.py b/pandas/tests/computation/test_eval.py index ada714c8ac52e..b42f79fe5009b 100644 --- a/pandas/tests/computation/test_eval.py +++ b/pandas/tests/computation/test_eval.py @@ -20,6 +20,7 @@ from pandas.computation import pytables from pandas.computation.engines import _engines, NumExprClobberingError from pandas.computation.expr import PythonExprVisitor, PandasExprVisitor +from pandas.computation.expressions import _USE_NUMEXPR, _NUMEXPR_INSTALLED from pandas.computation.ops import (_binary_ops_dict, _special_case_arith_ops_syms, _arith_ops_syms, _bool_ops_syms, @@ -38,6 +39,23 @@ _scalar_skip = 'in', 'not in' +@pytest.fixture(params=( + pytest.mark.skipif(engine == 'numexpr' and not _USE_NUMEXPR, + reason='numexpr enabled->{enabled}, ' + 'installed->{installed}'.format( + enabled=_USE_NUMEXPR, + installed=_NUMEXPR_INSTALLED))(engine) + for engine in _engines +)) +def engine(request): + return request.param + + +@pytest.fixture(params=expr._parsers) +def parser(request): + return request.param + + def engine_has_neg_frac(engine): return _engines[engine].has_neg_frac @@ -774,17 +792,17 @@ def check_chained_cmp_op(self, lhs, cmp1, mid, cmp2, rhs): f = lambda *args, **kwargs: np.random.randn() -ENGINES_PARSERS = list(product(_engines, expr._parsers)) - #------------------------------------- # typecasting rules consistency with python # issue #12388 class TestTypeCasting(object): - - def check_binop_typecasting(self, engine, parser, op, dt): - tm.skip_if_no_ne(engine) + @pytest.mark.parametrize('op', ['+', '-', '*', '**', '/']) + # maybe someday... numexpr has too many upcasting rules now + # chain(*(np.sctypes[x] for x in ['uint', 'int', 'float'])) + @pytest.mark.parametrize('dt', [np.float32, np.float64]) + def test_binop_typecasting(self, engine, parser, op, dt): df = mkdf(5, 3, data_gen_f=f, dtype=dt) s = 'df {} 3'.format(op) res = pd.eval(s, engine=engine, parser=parser) @@ -798,15 +816,6 @@ def check_binop_typecasting(self, engine, parser, op, dt): assert res.values.dtype == dt assert_frame_equal(res, eval(s)) - def test_binop_typecasting(self): - for engine, parser in ENGINES_PARSERS: - for op in ['+', '-', '*', '**', '/']: - # maybe someday... numexpr has too many upcasting rules now - # for dt in chain(*(np.sctypes[x] for x in ['uint', 'int', - # 'float'])): - for dt in [np.float32, np.float64]: - yield self.check_binop_typecasting, engine, parser, op, dt - #------------------------------------- # basic and complex alignment @@ -826,19 +835,13 @@ class TestAlignment(object): index_types = 'i', 'u', 'dt' lhs_index_types = index_types + ('s',) # 'p' - def check_align_nested_unary_op(self, engine, parser): - tm.skip_if_no_ne(engine) + def test_align_nested_unary_op(self, engine, parser): s = 'df * ~2' df = mkdf(5, 3, data_gen_f=f) res = pd.eval(s, engine=engine, parser=parser) assert_frame_equal(res, df * ~2) - def test_align_nested_unary_op(self): - for engine, parser in ENGINES_PARSERS: - yield self.check_align_nested_unary_op, engine, parser - - def check_basic_frame_alignment(self, engine, parser): - tm.skip_if_no_ne(engine) + def test_basic_frame_alignment(self, engine, parser): args = product(self.lhs_index_types, self.index_types, self.index_types) with warnings.catch_warnings(record=True): @@ -856,12 +859,7 @@ def check_basic_frame_alignment(self, engine, parser): res = pd.eval('df + df2', engine=engine, parser=parser) assert_frame_equal(res, df + df2) - def test_basic_frame_alignment(self): - for engine, parser in ENGINES_PARSERS: - yield self.check_basic_frame_alignment, engine, parser - - def check_frame_comparison(self, engine, parser): - tm.skip_if_no_ne(engine) + def test_frame_comparison(self, engine, parser): args = product(self.lhs_index_types, repeat=2) for r_idx_type, c_idx_type in args: df = mkdf(10, 10, data_gen_f=f, r_idx_type=r_idx_type, @@ -874,12 +872,8 @@ def check_frame_comparison(self, engine, parser): res = pd.eval('df < df3', engine=engine, parser=parser) assert_frame_equal(res, df < df3) - def test_frame_comparison(self): - for engine, parser in ENGINES_PARSERS: - yield self.check_frame_comparison, engine, parser - - def check_medium_complex_frame_alignment(self, engine, parser): - tm.skip_if_no_ne(engine) + @slow + def test_medium_complex_frame_alignment(self, engine, parser): args = product(self.lhs_index_types, self.index_types, self.index_types, self.index_types) @@ -899,14 +893,7 @@ def check_medium_complex_frame_alignment(self, engine, parser): engine=engine, parser=parser) assert_frame_equal(res, df + df2 + df3) - @slow - def test_medium_complex_frame_alignment(self): - for engine, parser in ENGINES_PARSERS: - yield self.check_medium_complex_frame_alignment, engine, parser - - def check_basic_frame_series_alignment(self, engine, parser): - tm.skip_if_no_ne(engine) - + def test_basic_frame_series_alignment(self, engine, parser): def testit(r_idx_type, c_idx_type, index_name): df = mkdf(10, 10, data_gen_f=f, r_idx_type=r_idx_type, c_idx_type=c_idx_type) @@ -932,13 +919,7 @@ def testit(r_idx_type, c_idx_type, index_name): for r_idx_type, c_idx_type, index_name in args: testit(r_idx_type, c_idx_type, index_name) - def test_basic_frame_series_alignment(self): - for engine, parser in ENGINES_PARSERS: - yield self.check_basic_frame_series_alignment, engine, parser - - def check_basic_series_frame_alignment(self, engine, parser): - tm.skip_if_no_ne(engine) - + def test_basic_series_frame_alignment(self, engine, parser): def testit(r_idx_type, c_idx_type, index_name): df = mkdf(10, 7, data_gen_f=f, r_idx_type=r_idx_type, c_idx_type=c_idx_type) @@ -968,12 +949,7 @@ def testit(r_idx_type, c_idx_type, index_name): for r_idx_type, c_idx_type, index_name in args: testit(r_idx_type, c_idx_type, index_name) - def test_basic_series_frame_alignment(self): - for engine, parser in ENGINES_PARSERS: - yield self.check_basic_series_frame_alignment, engine, parser - - def check_series_frame_commutativity(self, engine, parser): - tm.skip_if_no_ne(engine) + def test_series_frame_commutativity(self, engine, parser): args = product(self.lhs_index_types, self.index_types, ('+', '*'), ('index', 'columns')) @@ -1000,13 +976,8 @@ def check_series_frame_commutativity(self, engine, parser): if engine == 'numexpr': assert_frame_equal(a, b) - def test_series_frame_commutativity(self): - for engine, parser in ENGINES_PARSERS: - yield self.check_series_frame_commutativity, engine, parser - - def check_complex_series_frame_alignment(self, engine, parser): - tm.skip_if_no_ne(engine) - + @slow + def test_complex_series_frame_alignment(self, engine, parser): import random args = product(self.lhs_index_types, self.index_types, self.index_types, self.index_types) @@ -1050,13 +1021,7 @@ def check_complex_series_frame_alignment(self, engine, parser): tm.assert_equal(res.shape, expected.shape) assert_frame_equal(res, expected) - @slow - def test_complex_series_frame_alignment(self): - for engine, parser in ENGINES_PARSERS: - yield self.check_complex_series_frame_alignment, engine, parser - - def check_performance_warning_for_poor_alignment(self, engine, parser): - tm.skip_if_no_ne(engine) + def test_performance_warning_for_poor_alignment(self, engine, parser): df = DataFrame(randn(1000, 10)) s = Series(randn(10000)) if engine == 'numexpr': @@ -1098,11 +1063,6 @@ def check_performance_warning_for_poor_alignment(self, engine, parser): "".format(1, 'df', np.log10(s.size - df.shape[1]))) tm.assert_equal(msg, expected) - def test_performance_warning_for_poor_alignment(self): - for engine, parser in ENGINES_PARSERS: - yield (self.check_performance_warning_for_poor_alignment, engine, - parser) - #------------------------------------ # slightly more complex ops @@ -1762,18 +1722,12 @@ def setUpClass(cls): class TestScope(object): - def check_global_scope(self, e, engine, parser): - tm.skip_if_no_ne(engine) + def test_global_scope(self, engine, parser): + e = '_var_s * 2' tm.assert_numpy_array_equal(_var_s * 2, pd.eval(e, engine=engine, parser=parser)) - def test_global_scope(self): - e = '_var_s * 2' - for engine, parser in product(_engines, expr._parsers): - yield self.check_global_scope, e, engine, parser - - def check_no_new_locals(self, engine, parser): - tm.skip_if_no_ne(engine) + def test_no_new_locals(self, engine, parser): x = 1 lcls = locals().copy() pd.eval('x + 1', local_dict=lcls, engine=engine, parser=parser) @@ -1781,22 +1735,13 @@ def check_no_new_locals(self, engine, parser): lcls2.pop('lcls') tm.assert_equal(lcls, lcls2) - def test_no_new_locals(self): - for engine, parser in product(_engines, expr._parsers): - yield self.check_no_new_locals, engine, parser - - def check_no_new_globals(self, engine, parser): - tm.skip_if_no_ne(engine) + def test_no_new_globals(self, engine, parser): x = 1 gbls = globals().copy() pd.eval('x + 1', engine=engine, parser=parser) gbls2 = globals().copy() tm.assert_equal(gbls, gbls2) - def test_no_new_globals(self): - for engine, parser in product(_engines, expr._parsers): - yield self.check_no_new_globals, engine, parser - def test_invalid_engine(): tm.skip_if_no_ne() @@ -1816,7 +1761,9 @@ def test_invalid_parser(): 'pandas': PandasExprVisitor} -def check_disallowed_nodes(engine, parser): +@pytest.mark.parametrize('engine', _parsers) +@pytest.mark.parametrize('parser', _parsers) +def test_disallowed_nodes(engine, parser): tm.skip_if_no_ne(engine) VisitorClass = _parsers[parser] uns_ops = VisitorClass.unsupported_nodes @@ -1827,38 +1774,19 @@ def check_disallowed_nodes(engine, parser): getattr(inst, ops)() -def test_disallowed_nodes(): - for engine, visitor in product(_parsers, repeat=2): - yield check_disallowed_nodes, engine, visitor - - -def check_syntax_error_exprs(engine, parser): - tm.skip_if_no_ne(engine) +def test_syntax_error_exprs(engine, parser): e = 's +' with pytest.raises(SyntaxError): pd.eval(e, engine=engine, parser=parser) -def test_syntax_error_exprs(): - for engine, parser in ENGINES_PARSERS: - yield check_syntax_error_exprs, engine, parser - - -def check_name_error_exprs(engine, parser): - tm.skip_if_no_ne(engine) +def test_name_error_exprs(engine, parser): e = 's + t' with tm.assertRaises(NameError): pd.eval(e, engine=engine, parser=parser) -def test_name_error_exprs(): - for engine, parser in ENGINES_PARSERS: - yield check_name_error_exprs, engine, parser - - -def check_invalid_local_variable_reference(engine, parser): - tm.skip_if_no_ne(engine) - +def test_invalid_local_variable_reference(engine, parser): a, b = 1, 2 exprs = 'a + @b', '@a + b', '@a + @b' for expr in exprs: @@ -1870,13 +1798,7 @@ def check_invalid_local_variable_reference(engine, parser): pd.eval(exprs, engine=engine, parser=parser) -def test_invalid_local_variable_reference(): - for engine, parser in ENGINES_PARSERS: - yield check_invalid_local_variable_reference, engine, parser - - -def check_numexpr_builtin_raises(engine, parser): - tm.skip_if_no_ne(engine) +def test_numexpr_builtin_raises(engine, parser): sin, dotted_line = 1, 2 if engine == 'numexpr': with tm.assertRaisesRegexp(NumExprClobberingError, @@ -1887,51 +1809,35 @@ def check_numexpr_builtin_raises(engine, parser): tm.assert_equal(res, sin + dotted_line) -def test_numexpr_builtin_raises(): - for engine, parser in ENGINES_PARSERS: - yield check_numexpr_builtin_raises, engine, parser - - -def check_bad_resolver_raises(engine, parser): - tm.skip_if_no_ne(engine) +def test_bad_resolver_raises(engine, parser): cannot_resolve = 42, 3.0 with tm.assertRaisesRegexp(TypeError, 'Resolver of type .+'): pd.eval('1 + 2', resolvers=cannot_resolve, engine=engine, parser=parser) -def test_bad_resolver_raises(): - for engine, parser in ENGINES_PARSERS: - yield check_bad_resolver_raises, engine, parser - - -def check_empty_string_raises(engine, parser): +def test_empty_string_raises(engine, parser): # GH 13139 - tm.skip_if_no_ne(engine) with tm.assertRaisesRegexp(ValueError, 'expr cannot be an empty string'): pd.eval('', engine=engine, parser=parser) -def test_empty_string_raises(): - for engine, parser in ENGINES_PARSERS: - yield check_empty_string_raises, engine, parser - - -def check_more_than_one_expression_raises(engine, parser): - tm.skip_if_no_ne(engine) +def test_more_than_one_expression_raises(engine, parser): with tm.assertRaisesRegexp(SyntaxError, 'only a single expression is allowed'): pd.eval('1 + 1; 2 + 2', engine=engine, parser=parser) -def test_more_than_one_expression_raises(): - for engine, parser in ENGINES_PARSERS: - yield check_more_than_one_expression_raises, engine, parser +@pytest.mark.parametrize('cmp', ('and', 'or')) +@pytest.mark.parametrize('lhs', (int, float)) +@pytest.mark.parametrize('rhs', (int, float)) +def test_bool_ops_fails_on_scalars(lhs, cmp, rhs, engine, parser): + gen = {int: lambda: np.random.randint(10), float: np.random.randn} + mid = gen[lhs]() + lhs = gen[lhs]() + rhs = gen[rhs]() -def check_bool_ops_fails_on_scalars(gen, lhs, cmp, rhs, engine, parser): - tm.skip_if_no_ne(engine) - mid = gen[type(lhs)]() ex1 = 'lhs {0} mid {1} rhs'.format(cmp, cmp) ex2 = 'lhs {0} mid and mid {1} rhs'.format(cmp, cmp) ex3 = '(lhs {0} mid) & (mid {1} rhs)'.format(cmp, cmp) @@ -1940,32 +1846,14 @@ def check_bool_ops_fails_on_scalars(gen, lhs, cmp, rhs, engine, parser): pd.eval(ex, engine=engine, parser=parser) -def test_bool_ops_fails_on_scalars(): - _bool_ops_syms = 'and', 'or' - dtypes = int, float - gen = {int: lambda: np.random.randint(10), float: np.random.randn} - for engine, parser, dtype1, cmp, dtype2 in product(_engines, expr._parsers, - dtypes, _bool_ops_syms, - dtypes): - yield (check_bool_ops_fails_on_scalars, gen, gen[dtype1](), cmp, - gen[dtype2](), engine, parser) - - -def check_inf(engine, parser): - tm.skip_if_no_ne(engine) +def test_inf(engine, parser): s = 'inf + 1' expected = np.inf result = pd.eval(s, engine=engine, parser=parser) tm.assert_equal(result, expected) -def test_inf(): - for engine, parser in ENGINES_PARSERS: - yield check_inf, engine, parser - - -def check_negate_lt_eq_le(engine, parser): - tm.skip_if_no_ne(engine) +def test_negate_lt_eq_le(engine, parser): df = pd.DataFrame([[0, 10], [1, 20]], columns=['cat', 'count']) expected = df[~(df.cat > 0)] @@ -1980,11 +1868,6 @@ def check_negate_lt_eq_le(engine, parser): tm.assert_frame_equal(result, expected) -def test_negate_lt_eq_le(): - for engine, parser in product(_engines, expr._parsers): - yield check_negate_lt_eq_le, engine, parser - - class TestValidate(tm.TestCase): def test_validate_bool_args(self): diff --git a/pandas/tests/io/parser/test_network.py b/pandas/tests/io/parser/test_network.py index 721d447262149..4d6b6c7daa3c6 100644 --- a/pandas/tests/io/parser/test_network.py +++ b/pandas/tests/io/parser/test_network.py @@ -7,7 +7,6 @@ import os import pytest -from itertools import product import pandas.util.testing as tm from pandas import DataFrame @@ -21,14 +20,18 @@ def salaries_table(): @pytest.mark.parametrize( - "compression,extension", [('gzip', '.gz'), ('bz2', '.bz2'), - ('zip', '.zip'), ('xz', '.xz')]) -def test_compressed_urls(salaries_table, compression, extension): - check_compressed_urls(salaries_table, compression, extension) + "compression,extension", + [('gzip', '.gz'), ('bz2', '.bz2'), ('zip', '.zip'), + tm._mark_skipif_no_lzma(('xz', '.xz'))]) +@pytest.mark.parametrize('mode', ['explicit', 'infer']) +@pytest.mark.parametrize('engine', ['python', 'c']) +def test_compressed_urls(salaries_table, compression, extension, mode, engine): + check_compressed_urls(salaries_table, compression, extension, mode, engine) @tm.network -def check_compressed_urls(salaries_table, compression, extension): +def check_compressed_urls(salaries_table, compression, extension, mode, + engine): # test reading compressed urls with various engines and # extension inference base_url = ('https://github.com/pandas-dev/pandas/raw/master/' @@ -36,14 +39,11 @@ def check_compressed_urls(salaries_table, compression, extension): url = base_url + extension - # args is a (compression, engine) tuple - for (c, engine) in product([compression, 'infer'], ['python', 'c']): + if mode != 'explicit': + compression = mode - if url.endswith('.xz'): - tm._skip_if_no_lzma() - - url_table = read_table(url, compression=c, engine=engine) - tm.assert_frame_equal(url_table, salaries_table) + url_table = read_table(url, compression=compression, engine=engine) + tm.assert_frame_equal(url_table, salaries_table) class TestS3(tm.TestCase): diff --git a/pandas/util/testing.py b/pandas/util/testing.py index cda386781e2ec..1bd539469dbe3 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -307,12 +307,21 @@ def _skip_if_scipy_0_17(): pytest.skip("scipy 0.17") -def _skip_if_no_lzma(): +def _check_if_lzma(): try: return compat.import_lzma() except ImportError: - import pytest - pytest.skip('need backports.lzma to run') + return False + + +def _skip_if_no_lzma(): + return _check_if_lzma() or pytest.skip('need backports.lzma to run') + + +_mark_skipif_no_lzma = pytest.mark.skipif( + not _check_if_lzma(), + reason='need backports.lzma to run' +) def _skip_if_no_xarray(): From 5a8883b965610234366150897fe8963abffd6a7c Mon Sep 17 00:00:00 2001 From: Diego Fernandez Date: Thu, 16 Feb 2017 09:21:03 -0500 Subject: [PATCH 067/933] BUG: Ensure the right values are set in SeriesGroupBy.nunique closes #13453 Author: Diego Fernandez Closes #15418 from aiguofer/gh_13453 and squashes the following commits: c53bd70 [Diego Fernandez] Add test for #13453 in test_resample and add note to whatsnew 0daab80 [Diego Fernandez] Ensure the right values are set in SeriesGroupBy.nunique --- doc/source/whatsnew/v0.20.0.txt | 7 ++++--- pandas/core/groupby.py | 2 +- pandas/tests/groupby/test_groupby.py | 13 +++++++++++++ pandas/tests/tseries/test_resample.py | 20 ++++++++++++++++++++ 4 files changed, 38 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 4708abe4d592e..09551cfc0bcf8 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -418,6 +418,7 @@ New Behavior: Other API Changes ^^^^^^^^^^^^^^^^^ +- ``numexpr`` version is now required to be >= 2.4.6 and it will not be used at all if this requisite is not fulfilled (:issue:`15213`). - ``CParserError`` has been renamed to ``ParserError`` in ``pd.read_csv`` and will be removed in the future (:issue:`12665`) - ``SparseArray.cumsum()`` and ``SparseSeries.cumsum()`` will now always return ``SparseArray`` and ``SparseSeries`` respectively (:issue:`12855`) - ``DataFrame.applymap()`` with an empty ``DataFrame`` will return a copy of the empty ``DataFrame`` instead of a ``Series`` (:issue:`8222`) @@ -428,9 +429,8 @@ Other API Changes - ``inplace`` arguments now require a boolean value, else a ``ValueError`` is thrown (:issue:`14189`) - ``pandas.api.types.is_datetime64_ns_dtype`` will now report ``True`` on a tz-aware dtype, similar to ``pandas.api.types.is_datetime64_any_dtype`` - ``DataFrame.asof()`` will return a null filled ``Series`` instead the scalar ``NaN`` if a match is not found (:issue:`15118`) -- The :func:`pd.read_gbq` method now stores ``INTEGER`` columns as ``dtype=object`` if they contain ``NULL`` values. Otherwise they are stored as ``int64``. This prevents precision lost for integers greather than 2**53. Furthermore ``FLOAT`` columns with values above 10**4 are no more casted to ``int64`` which also caused precision lost (:issue: `14064`, :issue:`14305`). +- The :func:`pd.read_gbq` method now stores ``INTEGER`` columns as ``dtype=object`` if they contain ``NULL`` values. Otherwise they are stored as ``int64``. This prevents precision lost for integers greather than 2**53. Furthermore ``FLOAT`` columns with values above 10**4 are no longer casted to ``int64`` which also caused precision loss (:issue:`14064`, :issue:`14305`). - Reorganization of timeseries development tests (:issue:`14854`) -- ``numexpr`` version is now required to be >= 2.4.6 and it will not be used at all if this requisite is not fulfilled (:issue:`15213`). .. _whatsnew_0200.deprecations: @@ -473,7 +473,7 @@ Performance Improvements (or with ``compat_x=True``) (:issue:`15073`). - Improved performance of ``groupby().cummin()`` and ``groupby().cummax()`` (:issue:`15048`, :issue:`15109`) - Improved performance and reduced memory when indexing with a ``MultiIndex`` (:issue:`15245`) -- When reading buffer object in ``read_sas()`` method without specified format, filepath string is inferred rather than buffer object. +- When reading buffer object in ``read_sas()`` method without specified format, filepath string is inferred rather than buffer object. (:issue:`14947`) @@ -553,6 +553,7 @@ Bug Fixes - Bug in ``DataFrame.groupby().describe()`` when grouping on ``Index`` containing tuples (:issue:`14848`) - Bug in creating a ``MultiIndex`` with tuples and not passing a list of names; this will now raise ``ValueError`` (:issue:`15110`) +- Bug in ``groupby().nunique()`` with a datetimelike-grouper where bins counts were incorrect (:issue:`13453`) - Bug in catching an overflow in ``Timestamp`` + ``Timedelta/Offset`` operations (:issue:`15126`) - Bug in the HTML display with with a ``MultiIndex`` and truncation (:issue:`14882`) diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 23c835318b0e6..ba2de295fa0a9 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -3032,7 +3032,7 @@ def nunique(self, dropna=True): # we might have duplications among the bins if len(res) != len(ri): res, out = np.zeros(len(ri), dtype=out.dtype), res - res[ids] = out + res[ids[idx]] = out return Series(res, index=ri, diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index d53446870beb1..59cbcab23b9e7 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -4159,6 +4159,19 @@ def test_nunique_with_empty_series(self): expected = pd.Series(name='name', dtype='int64') tm.assert_series_equal(result, expected) + def test_nunique_with_timegrouper(self): + # GH 13453 + test = pd.DataFrame({ + 'time': [Timestamp('2016-06-28 09:35:35'), + Timestamp('2016-06-28 16:09:30'), + Timestamp('2016-06-28 16:46:28')], + 'data': ['1', '2', '3']}).set_index('time') + result = test.groupby(pd.TimeGrouper(freq='h'))['data'].nunique() + expected = test.groupby( + pd.TimeGrouper(freq='h') + )['data'].apply(pd.Series.nunique) + tm.assert_series_equal(result, expected) + def test_numpy_compat(self): # see gh-12811 df = pd.DataFrame({'A': [1, 2, 1], 'B': [1, 2, 3]}) diff --git a/pandas/tests/tseries/test_resample.py b/pandas/tests/tseries/test_resample.py index afb44887fe7d1..45bbc88ef711d 100755 --- a/pandas/tests/tseries/test_resample.py +++ b/pandas/tests/tseries/test_resample.py @@ -1939,6 +1939,26 @@ def test_resample_nunique(self): result = df.ID.groupby(pd.Grouper(freq='D')).nunique() assert_series_equal(result, expected) + def test_resample_nunique_with_date_gap(self): + # GH 13453 + index = pd.date_range('1-1-2000', '2-15-2000', freq='h') + index2 = pd.date_range('4-15-2000', '5-15-2000', freq='h') + index3 = index.append(index2) + s = pd.Series(range(len(index3)), index=index3) + r = s.resample('M') + + # Since all elements are unique, these should all be the same + results = [ + r.count(), + r.nunique(), + r.agg(pd.Series.nunique), + r.agg('nunique') + ] + + assert_series_equal(results[0], results[1]) + assert_series_equal(results[0], results[2]) + assert_series_equal(results[0], results[3]) + def test_resample_group_info(self): # GH10914 for n, k in product((10000, 100000), (10, 100, 1000)): dr = date_range(start='2015-08-27', periods=n // 10, freq='T') From c7300ea9ccf6c8b4eeb5a4ae59dc2419753c9b18 Mon Sep 17 00:00:00 2001 From: abaldenko Date: Thu, 16 Feb 2017 12:39:27 -0500 Subject: [PATCH 068/933] BUG: Concat with inner join and empty DataFrame closes #15328 Author: abaldenko Closes #15397 from abaldenko/concat_empty_dataframe and squashes the following commits: 47c8735 [abaldenko] BUG: Concat with inner join and empty DataFrame fc473b7 [abaldenko] BUG: Concat with inner join and empty DataFrame b86dcb6 [abaldenko] BUG: Concat with inner join and empty DataFrame --- doc/source/whatsnew/v0.20.0.txt | 2 +- pandas/tests/tools/test_concat.py | 10 ++++++++++ pandas/tests/tools/test_merge.py | 8 ++++++++ pandas/tools/concat.py | 4 +++- 4 files changed, 22 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 09551cfc0bcf8..ddb9088035d89 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -576,7 +576,7 @@ Bug Fixes - Bug in ``pd.read_csv()`` with ``float_precision='round_trip'`` which caused a segfault when a text entry is parsed (:issue:`15140`) - Bug in ``DataFrame.to_stata()`` and ``StataWriter`` which produces incorrectly formatted files to be produced for some locales (:issue:`13856`) - +- Bug in ``pd.concat()`` in which concatting with an empty dataframe with ``join='inner'`` was being improperly handled (:issue:`15328`) diff --git a/pandas/tests/tools/test_concat.py b/pandas/tests/tools/test_concat.py index 87a0dda34a525..2a28fccdc9b94 100644 --- a/pandas/tests/tools/test_concat.py +++ b/pandas/tests/tools/test_concat.py @@ -1825,6 +1825,16 @@ def test_concat_bug_3602(self): result = concat([df1, df2], axis=1) assert_frame_equal(result, expected) + def test_concat_inner_join_empty(self): + # GH 15328 + df_empty = pd.DataFrame() + df_a = pd.DataFrame({'a': [1, 2]}, index=[0, 1], dtype='int64') + df_expected = pd.DataFrame({'a': []}, index=[], dtype='int64') + + for how, expected in [('inner', df_expected), ('outer', df_a)]: + result = pd.concat([df_a, df_empty], axis=1, join=how) + assert_frame_equal(result, expected) + def test_concat_series_axis1_same_names_ignore_index(self): dates = date_range('01-Jan-2013', '01-Jan-2014', freq='MS')[0:-1] s1 = Series(randn(len(dates)), index=dates, name='value') diff --git a/pandas/tests/tools/test_merge.py b/pandas/tests/tools/test_merge.py index 472d8674f9f8d..b3b5e7e29319b 100644 --- a/pandas/tests/tools/test_merge.py +++ b/pandas/tests/tools/test_merge.py @@ -52,6 +52,14 @@ def setUp(self): self.right = DataFrame({'v2': np.random.randn(4)}, index=['d', 'b', 'c', 'a']) + def test_merge_inner_join_empty(self): + # GH 15328 + df_empty = pd.DataFrame() + df_a = pd.DataFrame({'a': [1, 2]}, index=[0, 1], dtype='int64') + result = pd.merge(df_empty, df_a, left_index=True, right_index=True) + expected = pd.DataFrame({'a': []}, index=[], dtype='int64') + assert_frame_equal(result, expected) + def test_merge_common(self): joined = merge(self.df, self.df2) exp = merge(self.df, self.df2, on=['key1', 'key2']) diff --git a/pandas/tools/concat.py b/pandas/tools/concat.py index dbbc831b19d1d..31d7a9eb9a01a 100644 --- a/pandas/tools/concat.py +++ b/pandas/tools/concat.py @@ -284,7 +284,9 @@ def __init__(self, objs, axis=0, join='outer', join_axes=None, if sum(obj.shape) > 0 or isinstance(obj, Series)] if (len(non_empties) and (keys is None and names is None and - levels is None and join_axes is None)): + levels is None and + join_axes is None and + not self.intersect)): objs = non_empties sample = objs[0] From 9b5d8488e8184da0507c09482f23ebfff34ecc43 Mon Sep 17 00:00:00 2001 From: Jeff Carey Date: Thu, 16 Feb 2017 12:45:29 -0500 Subject: [PATCH 069/933] ENH: Added ability to freeze panes from DataFrame.to_excel() (#15160) closes #15160 Author: Jeff Carey Closes #15291 from jeffcarey/enh-15160 and squashes the following commits: cef8fce [Jeff Carey] ENH: Added ability to freeze panes from DataFrame.to_excel() --- doc/source/io.rst | 13 +++++++++++++ doc/source/whatsnew/v0.20.0.txt | 1 + pandas/core/frame.py | 19 ++++++++++++++++-- pandas/core/generic.py | 7 ++++++- pandas/io/excel.py | 34 ++++++++++++++++++++++++++------- pandas/tests/io/test_excel.py | 12 ++++++++++-- 6 files changed, 74 insertions(+), 12 deletions(-) diff --git a/doc/source/io.rst b/doc/source/io.rst index 22eac33a715ba..2d6ddf98437e5 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -2777,6 +2777,7 @@ Added support for Openpyxl >= 2.2 ``'xlsxwriter'`` will produce an Excel 2007-format workbook (xlsx). If omitted, an Excel 2007-formatted workbook is produced. + .. _io.excel.writers: Excel writer engines @@ -2823,6 +2824,18 @@ argument to ``to_excel`` and to ``ExcelWriter``. The built-in engines are: df.to_excel('path_to_file.xlsx', sheet_name='Sheet1') +.. _io.excel.style: + +Style and Formatting +'''''''''''''''''''' + +The look and feel of Excel worksheets created from pandas can be modified using the following parameters on the ``DataFrame``'s ``to_excel`` method. + +- ``float_format`` : Format string for floating point numbers (default None) +- ``freeze_panes`` : A tuple of two integers representing the bottommost row and rightmost column to freeze. Each of these parameters is one-based, so (1, 1) will +freeze the first row and first column (default None) + + .. _io.clipboard: Clipboard diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index ddb9088035d89..75a8752c9bfa4 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -153,6 +153,7 @@ Other enhancements - ``Series/DataFrame.resample.asfreq`` have gained a ``fill_value`` parameter, to fill missing values during resampling (:issue:`3715`). - ``pandas.tools.hashing`` has gained a ``hash_tuples`` routine, and ``hash_pandas_object`` has gained the ability to hash a ``MultiIndex`` (:issue:`15224`) - ``Series/DataFrame.squeeze()`` have gained the ``axis`` parameter. (:issue:`15339`) +- ``DataFrame.to_excel()`` has a new ``freeze_panes`` parameter to turn on Freeze Panes when exporting to Excel (:issue:`15160`) .. _ISO 8601 duration: https://en.wikipedia.org/wiki/ISO_8601#Durations diff --git a/pandas/core/frame.py b/pandas/core/frame.py index f7c306ea7ce95..3ebdf72a5cde9 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1390,7 +1390,8 @@ def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None, def to_excel(self, excel_writer, sheet_name='Sheet1', na_rep='', float_format=None, columns=None, header=True, index=True, index_label=None, startrow=0, startcol=0, engine=None, - merge_cells=True, encoding=None, inf_rep='inf', verbose=True): + merge_cells=True, encoding=None, inf_rep='inf', verbose=True, + freeze_panes=None): from pandas.io.excel import ExcelWriter need_save = False if encoding is None: @@ -1406,12 +1407,26 @@ def to_excel(self, excel_writer, sheet_name='Sheet1', na_rep='', index_label=index_label, merge_cells=merge_cells, inf_rep=inf_rep) + formatted_cells = formatter.get_formatted_cells() + freeze_panes = self._validate_freeze_panes(freeze_panes) excel_writer.write_cells(formatted_cells, sheet_name, - startrow=startrow, startcol=startcol) + startrow=startrow, startcol=startcol, + freeze_panes=freeze_panes) if need_save: excel_writer.save() + def _validate_freeze_panes(self, freeze_panes): + if freeze_panes is not None: + if ( + len(freeze_panes) == 2 and + all(isinstance(item, int) for item in freeze_panes) + ): + return freeze_panes + + raise ValueError("freeze_panes must be of form (row, column)" + " where row and column are integers") + def to_stata(self, fname, convert_dates=None, write_index=True, encoding="latin-1", byteorder=None, time_stamp=None, data_label=None, variable_labels=None): diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 20e6e027dbf09..204cd91ebfab0 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1033,7 +1033,7 @@ def __setstate__(self, state): # I/O Methods _shared_docs['to_excel'] = """ - Write %(klass)s to a excel sheet + Write %(klass)s to an excel sheet %(versionadded_to_excel)s Parameters ---------- @@ -1072,6 +1072,11 @@ def __setstate__(self, state): inf_rep : string, default 'inf' Representation for infinity (there is no native representation for infinity in Excel) + freeze_panes : tuple of integer (length 2), default None + Specifies the bottommost row and rightmost column that + is to be frozen + + .. versionadded:: 0.20.0 Notes ----- diff --git a/pandas/io/excel.py b/pandas/io/excel.py index 2821983213646..37a61b7dc9ab5 100644 --- a/pandas/io/excel.py +++ b/pandas/io/excel.py @@ -693,7 +693,8 @@ def engine(self): pass @abc.abstractmethod - def write_cells(self, cells, sheet_name=None, startrow=0, startcol=0): + def write_cells(self, cells, sheet_name=None, startrow=0, startcol=0, + freeze_panes=None): """ Write given formated cells into Excel an excel sheet @@ -705,6 +706,8 @@ def write_cells(self, cells, sheet_name=None, startrow=0, startcol=0): Name of Excel sheet, if None, then use self.cur_sheet startrow: upper left cell row to dump data frame startcol: upper left cell column to dump data frame + freeze_panes: integer tuple of length 2 + contains the bottom-most row and right-most column to freeze """ pass @@ -804,7 +807,8 @@ def save(self): """ return self.book.save(self.path) - def write_cells(self, cells, sheet_name=None, startrow=0, startcol=0): + def write_cells(self, cells, sheet_name=None, startrow=0, startcol=0, + freeze_panes=None): # Write the frame cells using openpyxl. from openpyxl.cell import get_column_letter @@ -904,7 +908,8 @@ class _Openpyxl20Writer(_Openpyxl1Writer): engine = 'openpyxl20' openpyxl_majorver = 2 - def write_cells(self, cells, sheet_name=None, startrow=0, startcol=0): + def write_cells(self, cells, sheet_name=None, startrow=0, startcol=0, + freeze_panes=None): # Write the frame cells using openpyxl. from openpyxl.cell import get_column_letter @@ -1311,7 +1316,8 @@ class _Openpyxl22Writer(_Openpyxl20Writer): engine = 'openpyxl22' openpyxl_majorver = 2 - def write_cells(self, cells, sheet_name=None, startrow=0, startcol=0): + def write_cells(self, cells, sheet_name=None, startrow=0, startcol=0, + freeze_panes=None): # Write the frame cells using openpyxl. sheet_name = self._get_sheet_name(sheet_name) @@ -1324,6 +1330,10 @@ def write_cells(self, cells, sheet_name=None, startrow=0, startcol=0): wks.title = sheet_name self.sheets[sheet_name] = wks + if freeze_panes is not None: + wks.freeze_panes = wks.cell(row=freeze_panes[0] + 1, + column=freeze_panes[1] + 1) + for cell in cells: xcell = wks.cell( row=startrow + cell.row + 1, @@ -1396,7 +1406,8 @@ def save(self): """ return self.book.save(self.path) - def write_cells(self, cells, sheet_name=None, startrow=0, startcol=0): + def write_cells(self, cells, sheet_name=None, startrow=0, startcol=0, + freeze_panes=None): # Write the frame cells using xlwt. sheet_name = self._get_sheet_name(sheet_name) @@ -1407,6 +1418,11 @@ def write_cells(self, cells, sheet_name=None, startrow=0, startcol=0): wks = self.book.add_sheet(sheet_name) self.sheets[sheet_name] = wks + if freeze_panes is not None: + wks.set_panes_frozen(True) + wks.set_horz_split_pos(freeze_panes[0]) + wks.set_vert_split_pos(freeze_panes[1]) + style_dict = {} for cell in cells: @@ -1518,11 +1534,12 @@ def save(self): """ Save workbook to disk. """ + return self.book.close() - def write_cells(self, cells, sheet_name=None, startrow=0, startcol=0): + def write_cells(self, cells, sheet_name=None, startrow=0, startcol=0, + freeze_panes=None): # Write the frame cells using xlsxwriter. - sheet_name = self._get_sheet_name(sheet_name) if sheet_name in self.sheets: @@ -1533,6 +1550,9 @@ def write_cells(self, cells, sheet_name=None, startrow=0, startcol=0): style_dict = {} + if freeze_panes is not None: + wks.freeze_panes(*(freeze_panes)) + for cell in cells: val = _conv_value(cell.val) diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py index 0c2b443cffe52..b66cb24bf44d8 100644 --- a/pandas/tests/io/test_excel.py +++ b/pandas/tests/io/test_excel.py @@ -1836,6 +1836,14 @@ def test_true_and_false_value_options(self): false_values=['bar']) tm.assert_frame_equal(read_frame, expected) + def test_freeze_panes(self): + # GH15160 + expected = DataFrame([[1, 2], [3, 4]], columns=['col1', 'col2']) + with ensure_clean(self.ext) as path: + expected.to_excel(path, "Sheet1", freeze_panes=(1, 1)) + result = read_excel(path) + tm.assert_frame_equal(expected, result) + def raise_wrapper(major_ver): def versioned_raise_wrapper(orig_method): @@ -1873,7 +1881,7 @@ class OpenpyxlTests(ExcelWriterBase, tm.TestCase): def test_to_excel_styleconverter(self): _skip_if_no_openpyxl() if not openpyxl_compat.is_compat(major_ver=1): - pytest.skip('incompatiable openpyxl version') + pytest.skip('incompatible openpyxl version') import openpyxl @@ -2095,7 +2103,7 @@ def test_to_excel_styleconverter(self): def test_write_cells_merge_styled(self): if not openpyxl_compat.is_compat(major_ver=2): - pytest.skip('incompatiable openpyxl version') + pytest.skip('incompatible openpyxl version') from pandas.formats.format import ExcelCell From c588dd1d0b7ea2dffb4e9906b8455739c9055037 Mon Sep 17 00:00:00 2001 From: Jeff Carey Date: Fri, 17 Feb 2017 00:17:38 -0800 Subject: [PATCH 070/933] Documents touch-up for DataFrame.to_excel() freeze_panes option (#15436) --- doc/source/io.rst | 4 ++-- pandas/core/generic.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/source/io.rst b/doc/source/io.rst index 2d6ddf98437e5..55ef2c09d43e4 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -2832,8 +2832,8 @@ Style and Formatting The look and feel of Excel worksheets created from pandas can be modified using the following parameters on the ``DataFrame``'s ``to_excel`` method. - ``float_format`` : Format string for floating point numbers (default None) -- ``freeze_panes`` : A tuple of two integers representing the bottommost row and rightmost column to freeze. Each of these parameters is one-based, so (1, 1) will -freeze the first row and first column (default None) +- ``freeze_panes`` : A tuple of two integers representing the bottommost row and rightmost column to freeze. Each of these parameters is one-based, so (1, 1) will freeze the first row and first column (default None) + .. _io.clipboard: diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 204cd91ebfab0..26b9a880dd2c7 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1073,7 +1073,7 @@ def __setstate__(self, state): Representation for infinity (there is no native representation for infinity in Excel) freeze_panes : tuple of integer (length 2), default None - Specifies the bottommost row and rightmost column that + Specifies the one-based bottommost row and rightmost column that is to be frozen .. versionadded:: 0.20.0 From f4e672ccc46da0a358c4729714b6343e39fafd7b Mon Sep 17 00:00:00 2001 From: Peter Date: Fri, 17 Feb 2017 13:09:20 +0000 Subject: [PATCH 071/933] BUG: to_sql convert index name to string (#15404) (#15423) * Converted index name to string to fix issue #15404 - BUG: to_sql errors with numeric index name - needs conversion to string * Additional int to string conversion added. Associated test cases added. * PEP 8 compliance edits * Removed extraneous brackets --- pandas/io/sql.py | 5 +++-- pandas/tests/io/test_sql.py | 15 +++++++++++++++ 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 55e145b493dd9..bace43e785dff 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -750,7 +750,8 @@ def _get_column_names_and_types(self, dtype_mapper): for i, idx_label in enumerate(self.index): idx_type = dtype_mapper( self.frame.index.get_level_values(i)) - column_names_and_types.append((idx_label, idx_type, True)) + column_names_and_types.append((text_type(idx_label), + idx_type, True)) column_names_and_types += [ (text_type(self.frame.columns[i]), @@ -1220,7 +1221,7 @@ def _create_sql_schema(self, frame, table_name, keys=None, dtype=None): def _get_unicode_name(name): try: - uname = name.encode("utf-8", "strict").decode("utf-8") + uname = text_type(name).encode("utf-8", "strict").decode("utf-8") except UnicodeError: raise ValueError("Cannot convert identifier to UTF-8: '%s'" % name) return uname diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 78560611da7aa..890f52e8c65e9 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -709,6 +709,21 @@ def test_to_sql_index_label(self): self.assertEqual(frame.columns[0], 'other_label', "Specified index_label not written to database") + # index name is integer + temp_frame.index.name = 0 + sql.to_sql(temp_frame, 'test_index_label', self.conn, + if_exists='replace') + frame = sql.read_sql_query('SELECT * FROM test_index_label', self.conn) + self.assertEqual(frame.columns[0], '0', + "Integer index label not written to database") + + temp_frame.index.name = None + sql.to_sql(temp_frame, 'test_index_label', self.conn, + if_exists='replace', index_label=0) + frame = sql.read_sql_query('SELECT * FROM test_index_label', self.conn) + self.assertEqual(frame.columns[0], '0', + "Integer index label not written to database") + def test_to_sql_index_label_multiindex(self): temp_frame = DataFrame({'col1': range(4)}, index=MultiIndex.from_product( From 54b6c6e1c443b992a1df3443669a59dbe430271f Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 17 Feb 2017 14:12:01 +0100 Subject: [PATCH 072/933] DOC: add whatsnew for #15423 --- doc/source/whatsnew/v0.20.0.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 75a8752c9bfa4..c68af842a4f0c 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -522,7 +522,7 @@ Bug Fixes - Bug in ``.groupby(...).rolling(...)`` when ``on`` is specified and using a ``DatetimeIndex`` (:issue:`15130`) - +- Bug in ``to_sql`` when writing a DataFrame with numeric index names (:issue:`15404`). - Bug in ``Series.iloc`` where a ``Categorical`` object for list-like indexes input was returned, where a ``Series`` was expected. (:issue:`14580`) From 763f42f7bba78acc0bf22f66281d1221b49c7238 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Fri, 17 Feb 2017 09:51:46 -0500 Subject: [PATCH 073/933] TST: remove yielding tests from test_msgpacks.py (#15427) --- pandas/tests/io/test_packers.py | 88 ++++++++++++++++++--------------- pandas/tests/io/test_pickle.py | 8 +-- 2 files changed, 52 insertions(+), 44 deletions(-) diff --git a/pandas/tests/io/test_packers.py b/pandas/tests/io/test_packers.py index 911cd8164571d..097c03937ca68 100644 --- a/pandas/tests/io/test_packers.py +++ b/pandas/tests/io/test_packers.py @@ -41,6 +41,22 @@ _ZLIB_INSTALLED = True +@pytest.fixture(scope='module') +def current_packers_data(): + # our current version packers data + from pandas.tests.io.generate_legacy_storage_files import ( + create_msgpack_data) + return create_msgpack_data() + + +@pytest.fixture(scope='module') +def all_packers_data(): + # our all of our current version packers data + from pandas.tests.io.generate_legacy_storage_files import ( + create_data) + return create_data() + + def check_arbitrary(a, b): if isinstance(a, (list, tuple)) and isinstance(b, (list, tuple)): @@ -778,7 +794,16 @@ def test_default_encoding(self): assert_frame_equal(result, frame) -class TestMsgpack(): +def legacy_packers_versions(): + # yield the packers versions + path = tm.get_data_path('legacy_msgpack') + for v in os.listdir(path): + p = os.path.join(path, v) + if os.path.isdir(p): + yield v + + +class TestMsgpack(object): """ How to add msgpack tests: @@ -788,48 +813,38 @@ class TestMsgpack(): $ python generate_legacy_storage_files.py msgpack 3. Move the created pickle to "data/legacy_msgpack/" directory. - - NOTE: TestMsgpack can't be a subclass of tm.Testcase to use test generator. - http://stackoverflow.com/questions/6689537/nose-test-generators-inside-class """ - @classmethod - def setup_class(cls): - from pandas.tests.io.generate_legacy_storage_files import ( - create_msgpack_data, create_data) - cls.data = create_msgpack_data() - cls.all_data = create_data() - cls.path = u('__%s__.msgpack' % tm.rands(10)) - cls.minimum_structure = {'series': ['float', 'int', 'mixed', - 'ts', 'mi', 'dup'], - 'frame': ['float', 'int', 'mixed', 'mi'], - 'panel': ['float'], - 'index': ['int', 'date', 'period'], - 'mi': ['reg2']} - - def check_min_structure(self, data): + minimum_structure = {'series': ['float', 'int', 'mixed', + 'ts', 'mi', 'dup'], + 'frame': ['float', 'int', 'mixed', 'mi'], + 'panel': ['float'], + 'index': ['int', 'date', 'period'], + 'mi': ['reg2']} + + def check_min_structure(self, data, version): for typ, v in self.minimum_structure.items(): assert typ in data, '"{0}" not found in unpacked data'.format(typ) for kind in v: msg = '"{0}" not found in data["{1}"]'.format(kind, typ) assert kind in data[typ], msg - def compare(self, vf, version): + def compare(self, current_data, all_data, vf, version): # GH12277 encoding default used to be latin-1, now utf-8 if LooseVersion(version) < '0.18.0': data = read_msgpack(vf, encoding='latin-1') else: data = read_msgpack(vf) - self.check_min_structure(data) + self.check_min_structure(data, version) for typ, dv in data.items(): - assert typ in self.all_data, ('unpacked data contains ' - 'extra key "{0}"' - .format(typ)) + assert typ in all_data, ('unpacked data contains ' + 'extra key "{0}"' + .format(typ)) for dt, result in dv.items(): - assert dt in self.all_data[typ], ('data["{0}"] contains extra ' - 'key "{1}"'.format(typ, dt)) + assert dt in current_data[typ], ('data["{0}"] contains extra ' + 'key "{1}"'.format(typ, dt)) try: - expected = self.data[typ][dt] + expected = current_data[typ][dt] except KeyError: continue @@ -862,9 +877,11 @@ def compare_frame_dt_mixed_tzs(self, result, expected, typ, version): else: tm.assert_frame_equal(result, expected) - def read_msgpacks(self, version): + @pytest.mark.parametrize('version', legacy_packers_versions()) + def test_msgpacks_legacy(self, current_packers_data, all_packers_data, + version): - pth = tm.get_data_path('legacy_msgpack/{0}'.format(str(version))) + pth = tm.get_data_path('legacy_msgpack/{0}'.format(version)) n = 0 for f in os.listdir(pth): # GH12142 0.17 files packed in P2 can't be read in P3 @@ -873,19 +890,10 @@ def read_msgpacks(self, version): continue vf = os.path.join(pth, f) try: - self.compare(vf, version) + self.compare(current_packers_data, all_packers_data, + vf, version) except ImportError: # blosc not installed continue n += 1 assert n > 0, 'Msgpack files are not tested' - - def test_msgpack(self): - msgpack_path = tm.get_data_path('legacy_msgpack') - n = 0 - for v in os.listdir(msgpack_path): - pth = os.path.join(msgpack_path, v) - if os.path.isdir(pth): - yield self.read_msgpacks, v - n += 1 - assert n > 0, 'Msgpack files are not tested' diff --git a/pandas/tests/io/test_pickle.py b/pandas/tests/io/test_pickle.py index 1e3816c1556f6..c736ec829808a 100644 --- a/pandas/tests/io/test_pickle.py +++ b/pandas/tests/io/test_pickle.py @@ -187,10 +187,10 @@ def compare_sp_frame_float(result, expected, typ, version): # --------------------- def legacy_pickle_versions(): # yield the pickle versions - pickle_path = tm.get_data_path('legacy_pickle') - for v in os.listdir(pickle_path): - pth = os.path.join(pickle_path, v) - if os.path.isdir(pth): + path = tm.get_data_path('legacy_pickle') + for v in os.listdir(path): + p = os.path.join(path, v) + if os.path.isdir(p): yield v From f65a6415f15d438432cc6954ead61b052c5d4d60 Mon Sep 17 00:00:00 2001 From: Elliott Sales de Andrade Date: Fri, 17 Feb 2017 10:07:11 -0500 Subject: [PATCH 074/933] ENH: Don't add rowspan/colspan if it's 1. Just a small thing I noticed in a [footnote here](https://danluu.com/web-bloat/#appendix-irony). Probably can't do much about the extra classes, but rowspan/colspan seem like easy fixes to save a few bytes per row/col and it's already done in the other code path. Author: Elliott Sales de Andrade Closes #15403 from QuLogic/no-extra-span and squashes the following commits: 9a8fcee [Elliott Sales de Andrade] Don't add rowspan/colspan if it's 1. --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/formats/style.py | 55 ++++++++++++++++-------------- pandas/tests/formats/test_style.py | 38 +++++++-------------- 3 files changed, 43 insertions(+), 51 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index c68af842a4f0c..8e48dbbb083e8 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -154,6 +154,7 @@ Other enhancements - ``pandas.tools.hashing`` has gained a ``hash_tuples`` routine, and ``hash_pandas_object`` has gained the ability to hash a ``MultiIndex`` (:issue:`15224`) - ``Series/DataFrame.squeeze()`` have gained the ``axis`` parameter. (:issue:`15339`) - ``DataFrame.to_excel()`` has a new ``freeze_panes`` parameter to turn on Freeze Panes when exporting to Excel (:issue:`15160`) +- HTML table output skips ``colspan`` or ``rowspan`` attribute if equal to 1. (:issue:`15403`) .. _ISO 8601 duration: https://en.wikipedia.org/wiki/ISO_8601#Durations diff --git a/pandas/formats/style.py b/pandas/formats/style.py index b3e0f0f6c7462..89712910a22e1 100644 --- a/pandas/formats/style.py +++ b/pandas/formats/style.py @@ -251,21 +251,23 @@ def format_attr(pair): "class": " ".join(cs), "is_visible": True}) - for c in range(len(clabels[0])): + for c, value in enumerate(clabels[r]): cs = [COL_HEADING_CLASS, "level%s" % r, "col%s" % c] cs.extend(cell_context.get( "col_headings", {}).get(r, {}).get(c, [])) - value = clabels[r][c] - row_es.append({"type": "th", - "value": value, - "display_value": value, - "class": " ".join(cs), - "is_visible": _is_visible(c, r, col_lengths), - "attributes": [ - format_attr({"key": "colspan", - "value": col_lengths.get( - (r, c), 1)}) - ]}) + es = { + "type": "th", + "value": value, + "display_value": value, + "class": " ".join(cs), + "is_visible": _is_visible(c, r, col_lengths), + } + colspan = col_lengths.get((r, c), 0) + if colspan > 1: + es["attributes"] = [ + format_attr({"key": "colspan", "value": colspan}) + ] + row_es.append(es) head.append(row_es) if self.data.index.names and not all(x is None @@ -289,19 +291,22 @@ def format_attr(pair): body = [] for r, idx in enumerate(self.data.index): - # cs.extend( - # cell_context.get("row_headings", {}).get(r, {}).get(c, [])) - row_es = [{"type": "th", - "is_visible": _is_visible(r, c, idx_lengths), - "attributes": [ - format_attr({"key": "rowspan", - "value": idx_lengths.get((c, r), 1)}) - ], - "value": rlabels[r][c], - "class": " ".join([ROW_HEADING_CLASS, "level%s" % c, - "row%s" % r]), - "display_value": rlabels[r][c]} - for c in range(len(rlabels[r]))] + row_es = [] + for c, value in enumerate(rlabels[r]): + es = { + "type": "th", + "is_visible": _is_visible(r, c, idx_lengths), + "value": value, + "display_value": value, + "class": " ".join([ROW_HEADING_CLASS, "level%s" % c, + "row%s" % r]), + } + rowspan = idx_lengths.get((c, r), 0) + if rowspan > 1: + es["attributes"] = [ + format_attr({"key": "rowspan", "value": rowspan}) + ] + row_es.append(es) for c, col in enumerate(self.data.columns): cs = [DATA_CLASS, "row%s" % r, "col%s" % c] diff --git a/pandas/tests/formats/test_style.py b/pandas/tests/formats/test_style.py index 53bb3f9010f7e..44af0b8ebb085 100644 --- a/pandas/tests/formats/test_style.py +++ b/pandas/tests/formats/test_style.py @@ -141,21 +141,18 @@ def test_empty_index_name_doesnt_display(self): 'type': 'th', 'value': 'A', 'is_visible': True, - 'attributes': ["colspan=1"], }, {'class': 'col_heading level0 col1', 'display_value': 'B', 'type': 'th', 'value': 'B', 'is_visible': True, - 'attributes': ["colspan=1"], }, {'class': 'col_heading level0 col2', 'display_value': 'C', 'type': 'th', 'value': 'C', 'is_visible': True, - 'attributes': ["colspan=1"], }]] self.assertEqual(result['head'], expected) @@ -168,11 +165,9 @@ def test_index_name(self): expected = [[{'class': 'blank level0', 'type': 'th', 'value': '', 'display_value': '', 'is_visible': True}, {'class': 'col_heading level0 col0', 'type': 'th', - 'value': 'B', 'display_value': 'B', - 'is_visible': True, 'attributes': ['colspan=1']}, + 'value': 'B', 'display_value': 'B', 'is_visible': True}, {'class': 'col_heading level0 col1', 'type': 'th', - 'value': 'C', 'display_value': 'C', - 'is_visible': True, 'attributes': ['colspan=1']}], + 'value': 'C', 'display_value': 'C', 'is_visible': True}], [{'class': 'index_name level0', 'type': 'th', 'value': 'A'}, {'class': 'blank', 'type': 'th', 'value': ''}, @@ -191,9 +186,7 @@ def test_multiindex_name(self): {'class': 'blank level0', 'type': 'th', 'value': '', 'display_value': '', 'is_visible': True}, {'class': 'col_heading level0 col0', 'type': 'th', - 'value': 'C', 'display_value': 'C', - 'is_visible': True, 'attributes': ['colspan=1'], - }], + 'value': 'C', 'display_value': 'C', 'is_visible': True}], [{'class': 'index_name level0', 'type': 'th', 'value': 'A'}, {'class': 'index_name level1', 'type': 'th', @@ -618,16 +611,14 @@ def test_mi_sparse(self): body_1 = result['body'][0][1] expected_1 = { "value": 0, "display_value": 0, "is_visible": True, - "type": "th", "attributes": ["rowspan=1"], - "class": "row_heading level1 row0", + "type": "th", "class": "row_heading level1 row0", } tm.assert_dict_equal(body_1, expected_1) body_10 = result['body'][1][0] expected_10 = { "value": 'a', "display_value": 'a', "is_visible": False, - "type": "th", "attributes": ["rowspan=1"], - "class": "row_heading level0 row1", + "type": "th", "class": "row_heading level0 row1", } tm.assert_dict_equal(body_10, expected_10) @@ -637,9 +628,8 @@ def test_mi_sparse(self): 'is_visible': True, "display_value": ''}, {'type': 'th', 'class': 'blank level0', 'value': '', 'is_visible': True, 'display_value': ''}, - {'attributes': ['colspan=1'], 'class': 'col_heading level0 col0', - 'is_visible': True, 'type': 'th', 'value': 'A', - 'display_value': 'A'}] + {'type': 'th', 'class': 'col_heading level0 col0', 'value': 'A', + 'is_visible': True, 'display_value': 'A'}] self.assertEqual(head, expected) def test_mi_sparse_disabled(self): @@ -650,7 +640,7 @@ def test_mi_sparse_disabled(self): result = df.style._translate() body = result['body'] for row in body: - self.assertEqual(row[0]['attributes'], ['rowspan=1']) + assert 'attributes' not in row[0] def test_mi_sparse_index_names(self): df = pd.DataFrame({'A': [1, 2]}, index=pd.MultiIndex.from_arrays( @@ -686,28 +676,24 @@ def test_mi_sparse_column_names(self): 'type': 'th', 'is_visible': True}, {'class': 'index_name level1', 'value': 'col_1', 'display_value': 'col_1', 'is_visible': True, 'type': 'th'}, - {'attributes': ['colspan=1'], - 'class': 'col_heading level1 col0', + {'class': 'col_heading level1 col0', 'display_value': 1, 'is_visible': True, 'type': 'th', 'value': 1}, - {'attributes': ['colspan=1'], - 'class': 'col_heading level1 col1', + {'class': 'col_heading level1 col1', 'display_value': 0, 'is_visible': True, 'type': 'th', 'value': 0}, - {'attributes': ['colspan=1'], - 'class': 'col_heading level1 col2', + {'class': 'col_heading level1 col2', 'display_value': 1, 'is_visible': True, 'type': 'th', 'value': 1}, - {'attributes': ['colspan=1'], - 'class': 'col_heading level1 col3', + {'class': 'col_heading level1 col3', 'display_value': 0, 'is_visible': True, 'type': 'th', From a17a03a404649c0672b75983432759e8a29e0804 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Sat, 18 Feb 2017 11:52:01 +0100 Subject: [PATCH 075/933] DOC: correct rpy2 examples (GH15142) (#15450) --- doc/source/r_interface.rst | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/doc/source/r_interface.rst b/doc/source/r_interface.rst index b5d699cad69d5..88634d7f75c63 100644 --- a/doc/source/r_interface.rst +++ b/doc/source/r_interface.rst @@ -41,15 +41,17 @@ In the remainder of this page, a few examples of explicit conversion is given. T Transferring R data sets into Python ------------------------------------ -The ``pandas2ri.ri2py`` function retrieves an R data set and converts it to the -appropriate pandas object (most likely a DataFrame): +Once the pandas conversion is activated (``pandas2ri.activate()``), many conversions +of R to pandas objects will be done automatically. For example, to obtain the 'iris' dataset as a pandas DataFrame: .. ipython:: python r.data('iris') - df_iris = pandas2ri.ri2py(r['iris']) - df_iris.head() + r['iris'].head() +If the pandas conversion was not activated, the above could also be accomplished +by explicitly converting it with the ``pandas2ri.ri2py`` function +(``pandas2ri.ri2py(r['iris'])``). Converting DataFrames into R objects ------------------------------------ @@ -65,7 +67,6 @@ DataFrames into the equivalent R object (that is, **data.frame**): print(type(r_dataframe)) print(r_dataframe) - The DataFrame's index is stored as the ``rownames`` attribute of the data.frame instance. From 29aeffb8d77f56c3a3862a6bfaee993aa7660500 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Sat, 18 Feb 2017 04:08:54 -0800 Subject: [PATCH 076/933] BUG: rolling not accepting Timedelta-like window args (#15443) Remove unnecessary pd.Timedelta --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/core/window.py | 4 +++- pandas/tests/test_window.py | 20 +++++++++++++++++++- 3 files changed, 23 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 8e48dbbb083e8..ae4a3d3c3d97f 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -551,6 +551,7 @@ Bug Fixes - Bug in ``.to_json()`` causing single byte ascii characters to be expanded to four byte unicode (:issue:`15344`) - Bug in ``.read_json()`` for Python 2 where ``lines=True`` and contents contain non-ascii unicode characters (:issue:`15132`) - Bug in ``.rolling/expanding()`` functions where ``count()`` was not counting ``np.Inf``, nor handling ``object`` dtypes (:issue:`12541`) +- Bug in ``.rolling()`` where ``pd.Timedelta`` or ``datetime.timedelta`` was not accepted as a ``window`` argument (:issue:`15440`) - Bug in ``DataFrame.resample().median()`` if duplicate column names are present (:issue:`14233`) - Bug in ``DataFrame.groupby().describe()`` when grouping on ``Index`` containing tuples (:issue:`14848`) diff --git a/pandas/core/window.py b/pandas/core/window.py index 50de6b84d7cba..3f9aa2b0ff392 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -10,6 +10,7 @@ import warnings import numpy as np from collections import defaultdict +from datetime import timedelta from pandas.types.generic import (ABCSeries, ABCDataFrame, @@ -1014,7 +1015,8 @@ def validate(self): # we allow rolling on a datetimelike index if (self.is_datetimelike and - isinstance(self.window, (compat.string_types, DateOffset))): + isinstance(self.window, (compat.string_types, DateOffset, + timedelta))): self._validate_monotonic() freq = self._validate_freq() diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index 1bb1f91423a9d..452e8999ab13f 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -4,7 +4,7 @@ import warnings from warnings import catch_warnings -from datetime import datetime +from datetime import datetime, timedelta from numpy.random import randn import numpy as np from distutils.version import LooseVersion @@ -401,6 +401,24 @@ def test_constructor_with_win_type(self): with self.assertRaises(ValueError): c(-1, win_type='boxcar') + def test_constructor_with_timedelta_window(self): + # GH 15440 + n = 10 + df = pd.DataFrame({'value': np.arange(n)}, + index=pd.date_range('2015-12-24', + periods=n, + freq="D")) + expected_data = np.append([0., 1.], np.arange(3., 27., 3)) + for window in [timedelta(days=3), pd.Timedelta(days=3)]: + result = df.rolling(window=window).sum() + expected = pd.DataFrame({'value': expected_data}, + index=pd.date_range('2015-12-24', + periods=n, + freq="D")) + tm.assert_frame_equal(result, expected) + expected = df.rolling('3D').sum() + tm.assert_frame_equal(result, expected) + def test_numpy_compat(self): # see gh-12811 r = rwindow.Rolling(Series([2, 4, 6]), window=2) From be4a63fe791e27c2f8a9ae4f3a419ccc255c1b5b Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sat, 18 Feb 2017 12:04:48 -0500 Subject: [PATCH 077/933] BUG: testing on windows - we are passing builds which actually have an error - fix the small dtype issues Author: Jeff Reback Closes #15445 from jreback/windows and squashes the following commits: a5b7fb3 [Jeff Reback] change integer to power comparisions eab15c4 [Jeff Reback] don't force remove pandas cf3b9bd [Jeff Reback] more windows fixing efe6a76 [Jeff Reback] add cython to build 8194e63 [Jeff Reback] don't use appveyor recipe, just build inplace e064825 [Jeff Reback] TST: resample dtype issue xref #15418 10d9b26 [Jeff Reback] TST: run windows tests so failures show up in appeveyor --- appveyor.yml | 12 ++++---- ci/appveyor.recipe/bld.bat | 2 -- ci/appveyor.recipe/build.sh | 2 -- ci/appveyor.recipe/meta.yaml | 37 ------------------------- pandas/tests/indexing/test_timedelta.py | 3 +- pandas/tests/test_expressions.py | 10 +++---- pandas/tests/tseries/test_resample.py | 2 +- test.bat | 3 +- 8 files changed, 13 insertions(+), 58 deletions(-) delete mode 100644 ci/appveyor.recipe/bld.bat delete mode 100644 ci/appveyor.recipe/build.sh delete mode 100644 ci/appveyor.recipe/meta.yaml diff --git a/appveyor.yml b/appveyor.yml index d96e1dfcf76de..1c14698430996 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -78,21 +78,19 @@ install: # this is now the downloaded conda... - cmd: conda info -a - # build em using the local source checkout in the correct windows env - - cmd: '%CMD_IN_ENV% conda build ci\appveyor.recipe -q' - # create our env - - cmd: conda create -q -n pandas python=%PYTHON_VERSION% pytest + - cmd: conda create -q -n pandas python=%PYTHON_VERSION% cython pytest - cmd: activate pandas - SET REQ=ci\requirements-%PYTHON_VERSION%-%PYTHON_ARCH%.run - cmd: echo "installing requirements from %REQ%" - cmd: conda install -n pandas -q --file=%REQ% - cmd: conda list -n pandas - cmd: echo "installing requirements from %REQ% - done" - - ps: conda install -n pandas (conda build ci\appveyor.recipe -q --output) + + # build em using the local source checkout in the correct windows env + - cmd: '%CMD_IN_ENV% python setup.py build_ext --inplace' test_script: # tests - cmd: activate pandas - - cmd: cd \ - - cmd: python -c "import pandas; pandas.test(['--skip-slow', '--skip-network'])" + - cmd: test.bat diff --git a/ci/appveyor.recipe/bld.bat b/ci/appveyor.recipe/bld.bat deleted file mode 100644 index 284926fae8c04..0000000000000 --- a/ci/appveyor.recipe/bld.bat +++ /dev/null @@ -1,2 +0,0 @@ -@echo off -%PYTHON% setup.py install diff --git a/ci/appveyor.recipe/build.sh b/ci/appveyor.recipe/build.sh deleted file mode 100644 index f341bce6fcf96..0000000000000 --- a/ci/appveyor.recipe/build.sh +++ /dev/null @@ -1,2 +0,0 @@ -#!/bin/sh -$PYTHON setup.py install diff --git a/ci/appveyor.recipe/meta.yaml b/ci/appveyor.recipe/meta.yaml deleted file mode 100644 index 777fd9d682d48..0000000000000 --- a/ci/appveyor.recipe/meta.yaml +++ /dev/null @@ -1,37 +0,0 @@ -package: - name: pandas - version: 0.20.0 - -build: - number: {{environ.get('APPVEYOR_BUILD_NUMBER', 0)}} # [win] - string: np{{ environ.get('CONDA_NPY') }}py{{ environ.get('CONDA_PY') }}_{{ environ.get('APPVEYOR_BUILD_NUMBER', 0) }} # [win] - -source: - - # conda-build needs a full clone - # rather than a shallow git_url type clone - # https://github.com/conda/conda-build/issues/780 - path: ../../ - -requirements: - build: - - python - - cython - - numpy x.x - - setuptools - - pytz - - python-dateutil - - run: - - python - - numpy x.x - - python-dateutil - - pytz - -test: - imports: - - pandas - -about: - home: http://pandas.pydata.org - license: BSD diff --git a/pandas/tests/indexing/test_timedelta.py b/pandas/tests/indexing/test_timedelta.py index e5ccd72cac20a..5f0088382ce57 100644 --- a/pandas/tests/indexing/test_timedelta.py +++ b/pandas/tests/indexing/test_timedelta.py @@ -13,8 +13,7 @@ def test_boolean_indexing(self): [0, 1, 2, 10, 4, 5, 6, 7, 8, 9], [10, 10, 10, 3, 4, 5, 6, 7, 8, 9]] for cond, data in zip(conditions, expected_data): - result = df.copy() - result.loc[cond, 'x'] = 10 + result = df.assign(x=df.mask(cond, 10).astype('int64')) expected = pd.DataFrame(data, index=pd.to_timedelta(range(10), unit='s'), columns=['x']) diff --git a/pandas/tests/test_expressions.py b/pandas/tests/test_expressions.py index 3032a288032a2..f669ebe371f9d 100644 --- a/pandas/tests/test_expressions.py +++ b/pandas/tests/test_expressions.py @@ -12,7 +12,7 @@ from pandas.core.api import DataFrame, Panel from pandas.computation import expressions as expr -from pandas import compat, _np_version_under1p12 +from pandas import compat, _np_version_under1p11 from pandas.util.testing import (assert_almost_equal, assert_series_equal, assert_frame_equal, assert_panel_equal, assert_panel4d_equal, slow) @@ -70,10 +70,10 @@ def run_arithmetic(self, df, other, assert_func, check_dtype=False, operations.append('div') for arith in operations: - # numpy >= 1.12 doesn't handle integers + # numpy >= 1.11 doesn't handle integers # raised to integer powers # https://github.com/pandas-dev/pandas/issues/15363 - if arith == 'pow' and not _np_version_under1p12: + if arith == 'pow' and not _np_version_under1p11: continue operator_name = arith @@ -272,10 +272,10 @@ def testit(): for op, op_str in [('add', '+'), ('sub', '-'), ('mul', '*'), ('div', '/'), ('pow', '**')]: - # numpy >= 1.12 doesn't handle integers + # numpy >= 1.11 doesn't handle integers # raised to integer powers # https://github.com/pandas-dev/pandas/issues/15363 - if op == 'pow' and not _np_version_under1p12: + if op == 'pow' and not _np_version_under1p11: continue if op == 'div': diff --git a/pandas/tests/tseries/test_resample.py b/pandas/tests/tseries/test_resample.py index 45bbc88ef711d..6e999c5b1d276 100755 --- a/pandas/tests/tseries/test_resample.py +++ b/pandas/tests/tseries/test_resample.py @@ -1944,7 +1944,7 @@ def test_resample_nunique_with_date_gap(self): index = pd.date_range('1-1-2000', '2-15-2000', freq='h') index2 = pd.date_range('4-15-2000', '5-15-2000', freq='h') index3 = index.append(index2) - s = pd.Series(range(len(index3)), index=index3) + s = pd.Series(range(len(index3)), index=index3, dtype='int64') r = s.resample('M') # Since all elements are unique, these should all be the same diff --git a/test.bat b/test.bat index 7f9244abb2bc8..2c5f25c24a637 100644 --- a/test.bat +++ b/test.bat @@ -1,4 +1,3 @@ :: test on windows -:: nosetests --exe -A "not slow and not network and not disabled" pandas %* -pytest pandas +pytest --skip-slow --skip-network pandas From 821be3991cca866a5cc9cf3407cd9f68c66c0306 Mon Sep 17 00:00:00 2001 From: Pietro Battiston Date: Mon, 20 Feb 2017 09:36:19 -0500 Subject: [PATCH 078/933] BUG: MultiIndex indexing with passed Series/DataFrame/ndarray as indexers closes #15424 closes #15434 Author: Pietro Battiston Closes #15425 from toobaz/mi_indexing and squashes the following commits: 2ba2d5d [Pietro Battiston] Updated comment 900e3ce [Pietro Battiston] whatsnew 8467b57 [Pietro Battiston] Tests for previous commit 17209f3 [Pietro Battiston] BUG: support indexing MultiIndex with 1-D array 7606114 [Pietro Battiston] Whatsnew 0b719f5 [Pietro Battiston] Test for previous commit 1f2f385 [Pietro Battiston] BUG: Fix indexing MultiIndex with Series with 0 not index --- doc/source/whatsnew/v0.20.0.txt | 3 ++- pandas/core/indexing.py | 25 ++++++++++++----- pandas/tests/indexing/test_multiindex.py | 34 ++++++++++++++++++++++++ 3 files changed, 55 insertions(+), 7 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index ae4a3d3c3d97f..9e71b9a11c8eb 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -501,7 +501,8 @@ Bug Fixes - Bug in ``pd.tools.hashing.hash_pandas_object()`` in which hashing of categoricals depended on the ordering of categories, instead of just their values. (:issue:`15143`) - Bug in ``.groupby(..).resample()`` when passed the ``on=`` kwarg. (:issue:`15021`) -- Bug in ``DataFrame.loc`` with indexing a ``MultiIndex`` with a ``Series`` indexer (:issue:`14730`) +- Bug in ``DataFrame.loc`` with indexing a ``MultiIndex`` with a ``Series`` indexer (:issue:`14730`, :issue:`15424`) +- Bug in ``DataFrame.loc`` with indexing a ``MultiIndex`` with a numpy array (:issue:`15434`) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 66510a7708e64..6f490875742ca 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1521,15 +1521,28 @@ def _getitem_axis(self, key, axis=0): return self._getbool_axis(key, axis=axis) elif is_list_like_indexer(key): - # GH 7349 - # possibly convert a list-like into a nested tuple - # but don't convert a list-like of tuples + # convert various list-like indexers + # to a list of keys + # we will use the *values* of the object + # and NOT the index if its a PandasObject if isinstance(labels, MultiIndex): + + if isinstance(key, (ABCSeries, np.ndarray)) and key.ndim <= 1: + # Series, or 0,1 ndim ndarray + # GH 14730 + key = list(key) + elif isinstance(key, ABCDataFrame): + # GH 15438 + raise NotImplementedError("Indexing a MultiIndex with a " + "DataFrame key is not " + "implemented") + elif hasattr(key, 'ndim') and key.ndim > 1: + raise NotImplementedError("Indexing a MultiIndex with a " + "multidimensional key is not " + "implemented") + if (not isinstance(key, tuple) and len(key) > 1 and not isinstance(key[0], tuple)): - if isinstance(key, ABCSeries): - # GH 14730 - key = list(key) key = tuple([key]) # an iterable multi-selection diff --git a/pandas/tests/indexing/test_multiindex.py b/pandas/tests/indexing/test_multiindex.py index b6b9ac93b234c..b40f0b8cd9976 100644 --- a/pandas/tests/indexing/test_multiindex.py +++ b/pandas/tests/indexing/test_multiindex.py @@ -158,12 +158,46 @@ def test_loc_getitem_series(self): result = x.loc[[1, 3]] tm.assert_series_equal(result, expected) + # GH15424 + y1 = Series([1, 3], index=[1, 2]) + result = x.loc[y1] + tm.assert_series_equal(result, expected) + empty = Series(data=[], dtype=np.float64) expected = Series([], index=MultiIndex( levels=index.levels, labels=[[], []], dtype=np.float64)) result = x.loc[empty] tm.assert_series_equal(result, expected) + def test_loc_getitem_array(self): + # GH15434 + # passing an array as a key with a MultiIndex + index = MultiIndex.from_product([[1, 2, 3], ['A', 'B', 'C']]) + x = Series(index=index, data=range(9), dtype=np.float64) + y = np.array([1, 3]) + expected = Series( + data=[0, 1, 2, 6, 7, 8], + index=MultiIndex.from_product([[1, 3], ['A', 'B', 'C']]), + dtype=np.float64) + result = x.loc[y] + tm.assert_series_equal(result, expected) + + # empty array: + empty = np.array([]) + expected = Series([], index=MultiIndex( + levels=index.levels, labels=[[], []], dtype=np.float64)) + result = x.loc[empty] + tm.assert_series_equal(result, expected) + + # 0-dim array (scalar): + scalar = np.int64(1) + expected = Series( + data=[0, 1, 2], + index=['A', 'B', 'C'], + dtype=np.float64) + result = x.loc[scalar] + tm.assert_series_equal(result, expected) + def test_iloc_getitem_multiindex(self): mi_labels = DataFrame(np.random.randn(4, 3), columns=[['i', 'i', 'j'], ['A', 'A', 'B']], From 12f2c6a101cf866527df5dac4184a8803792fd78 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Mon, 20 Feb 2017 10:13:38 -0500 Subject: [PATCH 079/933] TST: make sure test_fash uses the same seed for launching processes --- test_fast.sh | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/test_fast.sh b/test_fast.sh index 43eb376f879cd..30ac7f84cbe8b 100755 --- a/test_fast.sh +++ b/test_fast.sh @@ -1 +1,8 @@ +#!/bin/bash + +# Workaround for pytest-xdist flaky collection order +# https://github.com/pytest-dev/pytest/issues/920 +# https://github.com/pytest-dev/pytest/issues/1075 +export PYTHONHASHSEED=$(python -c 'import random; print(random.randint(1, 4294967295))') + pytest pandas --skip-slow --skip-network -m "not single" -n 4 From 0b4fdf988e3125f7c55aaf6e08a2dfa7d9e2e8a0 Mon Sep 17 00:00:00 2001 From: Brian McFee Date: Mon, 20 Feb 2017 14:12:28 -0500 Subject: [PATCH 080/933] ENH: Add __copy__ and __deepcopy__ to NDFrame closes #15370 Author: Brian McFee Author: Jeff Reback Closes #15444 from bmcfee/deepcopy-ndframe and squashes the following commits: bf36f35 [Jeff Reback] TST: skip the panel4d deepcopy tests d58b1f6 [Brian McFee] added tests for copy and deepcopy 35f3e0f [Brian McFee] relocated Index.__deepcopy__ to live near __copy__ 1aea940 [Brian McFee] switched deepcopy test to using generic comparator 7e67e7d [Brian McFee] ndframe and index __copy__ are now proper methods 820664c [Brian McFee] moved deepcopy test to generic.py 9721041 [Brian McFee] added copy/deepcopy to ndframe, fixes #15370 --- doc/source/whatsnew/v0.20.0.txt | 3 ++- pandas/core/generic.py | 8 ++++++++ pandas/indexes/base.py | 13 +++++++------ pandas/tests/test_generic.py | 24 ++++++++++++++++++++++++ 4 files changed, 41 insertions(+), 7 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 9e71b9a11c8eb..40b068547c360 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -433,6 +433,7 @@ Other API Changes - ``DataFrame.asof()`` will return a null filled ``Series`` instead the scalar ``NaN`` if a match is not found (:issue:`15118`) - The :func:`pd.read_gbq` method now stores ``INTEGER`` columns as ``dtype=object`` if they contain ``NULL`` values. Otherwise they are stored as ``int64``. This prevents precision lost for integers greather than 2**53. Furthermore ``FLOAT`` columns with values above 10**4 are no longer casted to ``int64`` which also caused precision loss (:issue:`14064`, :issue:`14305`). - Reorganization of timeseries development tests (:issue:`14854`) +- Specific support for ``copy.copy()`` and ``copy.deepcopy()`` functions on NDFrame objects (:issue:`15444`) .. _whatsnew_0200.deprecations: @@ -500,7 +501,7 @@ Bug Fixes - Bug in ``pd.read_csv()`` in which a file containing a row with many columns followed by rows with fewer columns would cause a crash (:issue:`14125`) - Bug in ``pd.tools.hashing.hash_pandas_object()`` in which hashing of categoricals depended on the ordering of categories, instead of just their values. (:issue:`15143`) - Bug in ``.groupby(..).resample()`` when passed the ``on=`` kwarg. (:issue:`15021`) - +- Bug in using ``__deepcopy__`` on empty NDFrame objects (:issue:`15370`) - Bug in ``DataFrame.loc`` with indexing a ``MultiIndex`` with a ``Series`` indexer (:issue:`14730`, :issue:`15424`) - Bug in ``DataFrame.loc`` with indexing a ``MultiIndex`` with a numpy array (:issue:`15434`) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 26b9a880dd2c7..76fbb9884753d 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3161,6 +3161,14 @@ def copy(self, deep=True): data = self._data.copy(deep=deep) return self._constructor(data).__finalize__(self) + def __copy__(self, deep=True): + return self.copy(deep=deep) + + def __deepcopy__(self, memo=None): + if memo is None: + memo = {} + return self.copy(deep=True) + def _convert(self, datetime=False, numeric=False, timedelta=False, coerce=False, copy=True): """ diff --git a/pandas/indexes/base.py b/pandas/indexes/base.py index e51824e72a2a0..f1f37622b2a74 100644 --- a/pandas/indexes/base.py +++ b/pandas/indexes/base.py @@ -724,7 +724,13 @@ def copy(self, name=None, deep=False, dtype=None, **kwargs): new_index = new_index.astype(dtype) return new_index - __copy__ = copy + def __copy__(self, **kwargs): + return self.copy(**kwargs) + + def __deepcopy__(self, memo=None): + if memo is None: + memo = {} + return self.copy(deep=True) def _validate_names(self, name=None, names=None, deep=False): """ @@ -1480,11 +1486,6 @@ def __setstate__(self, state): _unpickle_compat = __setstate__ - def __deepcopy__(self, memo=None): - if memo is None: - memo = {} - return self.copy(deep=True) - def __nonzero__(self): raise ValueError("The truth value of a {0} is ambiguous. " "Use a.empty, a.bool(), a.item(), a.any() or a.all()." diff --git a/pandas/tests/test_generic.py b/pandas/tests/test_generic.py index b087ca21d3c25..40cdbe083acd7 100644 --- a/pandas/tests/test_generic.py +++ b/pandas/tests/test_generic.py @@ -2,6 +2,7 @@ # pylint: disable-msg=E1101,W0612 from operator import methodcaller +from copy import copy, deepcopy import pytest import numpy as np from numpy import nan @@ -675,6 +676,18 @@ def test_validate_bool_args(self): with self.assertRaises(ValueError): super(DataFrame, df).mask(cond=df.a > 2, inplace=value) + def test_copy_and_deepcopy(self): + # GH 15444 + for shape in [0, 1, 2]: + obj = self._construct(shape) + for func in [copy, + deepcopy, + lambda x: x.copy(deep=False), + lambda x: x.copy(deep=True)]: + obj_copy = func(obj) + self.assertIsNot(obj_copy, obj) + self._compare(obj_copy, obj) + class TestSeries(tm.TestCase, Generic): _typ = Series @@ -1539,6 +1552,14 @@ def test_to_xarray(self): expected, check_index_type=False) + def test_deepcopy_empty(self): + # This test covers empty frame copying with non-empty column sets + # as reported in issue GH15370 + empty_frame = DataFrame(data=[], index=[], columns=['A']) + empty_frame_copy = deepcopy(empty_frame) + + self._compare(empty_frame_copy, empty_frame) + class TestPanel(tm.TestCase, Generic): _typ = Panel @@ -1569,6 +1590,9 @@ class TestPanel4D(tm.TestCase, Generic): def test_sample(self): pytest.skip("sample on Panel4D") + def test_copy_and_deepcopy(self): + pytest.skip("copy_and_deepcopy on Panel4D") + def test_to_xarray(self): tm._skip_if_no_xarray() From e1d54074ce8448bfcc69dc08d8a800ef9ef918ff Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Mon, 20 Feb 2017 21:39:10 -0500 Subject: [PATCH 081/933] CI: add circle ci support this adds support for using CircleCI; configured to put 4 of our builds (3.4, 3.4-slow, 2.7 compat, and 3.5 ascii), they are still on Travis ATM. They are built/tested simultaneously on CircleCI (as we get 4 containers for open-source). Author: Jeff Reback Closes #15464 from jreback/circle and squashes the following commits: 3756674 [Jeff Reback] CI: add circle ci support --- .travis.yml | 2 +- ci/install_circle.sh | 88 ++++++++++++++++++++++ ci/install_db_circle.sh | 8 ++ ci/{install_db.sh => install_db_travis.sh} | 0 ci/run_circle.sh | 9 +++ ci/show_circle.sh | 8 ++ circle.yml | 35 +++++++++ 7 files changed, 149 insertions(+), 1 deletion(-) create mode 100755 ci/install_circle.sh create mode 100755 ci/install_db_circle.sh rename ci/{install_db.sh => install_db_travis.sh} (100%) create mode 100755 ci/run_circle.sh create mode 100755 ci/show_circle.sh create mode 100644 circle.yml diff --git a/.travis.yml b/.travis.yml index 6245213cec06f..bb96ab210c088 100644 --- a/.travis.yml +++ b/.travis.yml @@ -315,7 +315,7 @@ install: before_script: - source activate pandas && pip install codecov - - ci/install_db.sh + - ci/install_db_travis.sh script: - echo "script start" diff --git a/ci/install_circle.sh b/ci/install_circle.sh new file mode 100755 index 0000000000000..485586e9d4f49 --- /dev/null +++ b/ci/install_circle.sh @@ -0,0 +1,88 @@ +#!/usr/bin/env bash + +home_dir=$(pwd) +echo "[home_dir: $home_dir]" + +echo "[ls -ltr]" +ls -ltr + +echo "[Using clean Miniconda install]" +rm -rf "$MINICONDA_DIR" + +# install miniconda +wget http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -q -O miniconda.sh || exit 1 +bash miniconda.sh -b -p "$MINICONDA_DIR" || exit 1 + +export PATH="$MINICONDA_DIR/bin:$PATH" + +echo "[update conda]" +conda config --set ssl_verify false || exit 1 +conda config --set always_yes true --set changeps1 false || exit 1 +conda update -q conda + +# add the pandas channel to take priority +# to add extra packages +echo "[add channels]" +conda config --add channels pandas || exit 1 +conda config --remove channels defaults || exit 1 +conda config --add channels defaults || exit 1 + +# Useful for debugging any issues with conda +conda info -a || exit 1 + +# support env variables passed +export ENVS_FILE=".envs" + +# make sure that the .envs file exists. it is ok if it is empty +touch $ENVS_FILE + +# assume all command line arguments are environmental variables +for var in "$@" +do + echo "export $var" >> $ENVS_FILE +done + +echo "[environmental variable file]" +cat $ENVS_FILE +source $ENVS_FILE + +export REQ_BUILD=ci/requirements-${PYTHON_VERSION}${JOB_TAG}.build +export REQ_RUN=ci/requirements-${PYTHON_VERSION}${JOB_TAG}.run +export REQ_PIP=ci/requirements-${PYTHON_VERSION}${JOB_TAG}.pip + +# edit the locale override if needed +if [ -n "$LOCALE_OVERRIDE" ]; then + echo "[Adding locale to the first line of pandas/__init__.py]" + rm -f pandas/__init__.pyc + sedc="3iimport locale\nlocale.setlocale(locale.LC_ALL, '$LOCALE_OVERRIDE')\n" + sed -i "$sedc" pandas/__init__.py + echo "[head -4 pandas/__init__.py]" + head -4 pandas/__init__.py + echo +fi + +# create new env +echo "[create env]" +time conda create -q -n pandas python=${PYTHON_VERSION} pytest || exit 1 + +source activate pandas + +# build deps +echo "[build installs: ${REQ_BUILD}]" +time conda install -q --file=${REQ_BUILD} || exit 1 + +# build but don't install +echo "[build em]" +time python setup.py build_ext --inplace || exit 1 + +# we may have run installations +echo "[conda installs: ${REQ_RUN}]" +if [ -e ${REQ_RUN} ]; then + time conda install -q --file=${REQ_RUN} || exit 1 +fi + +# we may have additional pip installs +echo "[pip installs: ${REQ_PIP}]" +if [ -e ${REQ_PIP} ]; then + pip install -q -r $REQ_PIP +fi diff --git a/ci/install_db_circle.sh b/ci/install_db_circle.sh new file mode 100755 index 0000000000000..a00f74f009f54 --- /dev/null +++ b/ci/install_db_circle.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +echo "installing dbs" +mysql -e 'create database pandas_nosetest;' +psql -c 'create database pandas_nosetest;' -U postgres + +echo "done" +exit 0 diff --git a/ci/install_db.sh b/ci/install_db_travis.sh similarity index 100% rename from ci/install_db.sh rename to ci/install_db_travis.sh diff --git a/ci/run_circle.sh b/ci/run_circle.sh new file mode 100755 index 0000000000000..0e46d28ab6fc4 --- /dev/null +++ b/ci/run_circle.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash + +echo "[running tests]" +export PATH="$MINICONDA_DIR/bin:$PATH" + +source activate pandas + +echo "pytest --junitxml=$CIRCLE_TEST_REPORTS/reports/junit.xml $@ pandas" +pytest --junitxml=$CIRCLE_TEST_REPORTS/reports/junit.xml $@ pandas diff --git a/ci/show_circle.sh b/ci/show_circle.sh new file mode 100755 index 0000000000000..bfaa65c1d84f2 --- /dev/null +++ b/ci/show_circle.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash + +echo "[installed versions]" + +export PATH="$MINICONDA_DIR/bin:$PATH" +source activate pandas + +python -c "import pandas; pandas.show_versions();" diff --git a/circle.yml b/circle.yml new file mode 100644 index 0000000000000..97136d368ae6f --- /dev/null +++ b/circle.yml @@ -0,0 +1,35 @@ +machine: + environment: + # these are globally set + MINICONDA_DIR: /home/ubuntu/miniconda3 + +database: + override: + - ./ci/install_db_circle.sh + +checkout: + post: + # since circleci does a shallow fetch + # we need to populate our tags + - git fetch --depth=1000 + - git fetch --tags + +dependencies: + override: + - > + case $CIRCLE_NODE_INDEX in + 0) + sudo apt-get install language-pack-it && ./ci/install_circle.sh PYTHON_VERSION=2.7 JOB_TAG="_COMPAT" LOCALE_OVERRIDE="it_IT.UTF-8" ;; + 1) + sudo apt-get install language-pack-zh-hans && ./ci/install_circle.sh PYTHON_VERSION=3.4 JOB_TAG="_SLOW" LOCALE_OVERRIDE="zh_CN.UTF-8" ;; + 2) + sudo apt-get install language-pack-zh-hans && ./ci/install_circle.sh PYTHON_VERSION=3.4 JOB_TAG="" LOCALE_OVERRIDE="zh_CN.UTF-8" ;; + 3) + ./ci/install_circle.sh PYTHON_VERSION=3.5 JOB_TAG="_ASCII" LOCALE_OVERRIDE="C" ;; + esac + - ./ci/show_circle.sh + +test: + override: + - case $CIRCLE_NODE_INDEX in 0) ./ci/run_circle.sh --skip-slow --skip-network ;; 1) ./ci/run_circle.sh --only-slow --skip-network ;; 2) ./ci/run_circle.sh --skip-slow --skip-network ;; 3) ./ci/run_circle.sh --skip-slow --skip-network ;; esac: + parallel: true From bb2144a32cb30bc7428b117389a280b2515e9cf1 Mon Sep 17 00:00:00 2001 From: tzinckgraf Date: Tue, 21 Feb 2017 08:29:55 -0500 Subject: [PATCH 082/933] BUG: Bug on reset_index for a MultiIndex of all NaNs closes #6322 Author: tzinckgraf Closes #15466 from tzinckgraf/GH6322 and squashes the following commits: 35f97f4 [tzinckgraf] GH6322, Bug on reset_index for a MultiIndex of all NaNs --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/core/frame.py | 15 +++++++++++---- pandas/tests/frame/test_alter_axes.py | 27 +++++++++++++++++++++++++++ 3 files changed, 39 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 40b068547c360..86f916bc0acfb 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -575,6 +575,7 @@ Bug Fixes - Incorrect dtyped ``Series`` was returned by comparison methods (e.g., ``lt``, ``gt``, ...) against a constant for an empty ``DataFrame`` (:issue:`15077`) - Bug in ``Series.dt.round`` inconsistent behaviour on NAT's with different arguments (:issue:`14940`) - Bug in ``DataFrame.fillna()`` where the argument ``downcast`` was ignored when fillna value was of type ``dict`` (:issue:`15277`) +- Bug in ``.reset_index()`` when an all ``NaN`` level of a ``MultiIndex`` would fail (:issue:`6322`) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 3ebdf72a5cde9..bfef2cfbd0d51 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2973,10 +2973,17 @@ def _maybe_casted_values(index, labels=None): # if we have the labels, extract the values with a mask if labels is not None: mask = labels == -1 - values = values.take(labels) - if mask.any(): - values, changed = _maybe_upcast_putmask(values, mask, - np.nan) + + # we can have situations where the whole mask is -1, + # meaning there is nothing found in labels, so make all nan's + if mask.all(): + values = np.empty(len(mask)) + values.fill(np.nan) + else: + values = values.take(labels) + if mask.any(): + values, changed = _maybe_upcast_putmask(values, mask, + np.nan) return values new_index = _default_index(len(new_obj)) diff --git a/pandas/tests/frame/test_alter_axes.py b/pandas/tests/frame/test_alter_axes.py index e84bb6407fafc..e52bfdbd4f837 100644 --- a/pandas/tests/frame/test_alter_axes.py +++ b/pandas/tests/frame/test_alter_axes.py @@ -624,6 +624,33 @@ def test_reset_index_multiindex_col(self): ['a', 'mean', 'median', 'mean']]) assert_frame_equal(rs, xp) + def test_reset_index_multiindex_nan(self): + # GH6322, testing reset_index on MultiIndexes + # when we have a nan or all nan + df = pd.DataFrame({'A': ['a', 'b', 'c'], + 'B': [0, 1, np.nan], + 'C': np.random.rand(3)}) + rs = df.set_index(['A', 'B']).reset_index() + assert_frame_equal(rs, df) + + df = pd.DataFrame({'A': [np.nan, 'b', 'c'], + 'B': [0, 1, 2], + 'C': np.random.rand(3)}) + rs = df.set_index(['A', 'B']).reset_index() + assert_frame_equal(rs, df) + + df = pd.DataFrame({'A': ['a', 'b', 'c'], + 'B': [0, 1, 2], + 'C': [np.nan, 1.1, 2.2]}) + rs = df.set_index(['A', 'B']).reset_index() + assert_frame_equal(rs, df) + + df = pd.DataFrame({'A': ['a', 'b', 'c'], + 'B': [np.nan, np.nan, np.nan], + 'C': np.random.rand(3)}) + rs = df.set_index(['A', 'B']).reset_index() + assert_frame_equal(rs, df) + def test_reset_index_with_datetimeindex_cols(self): # GH5818 # From f62e8f242d90d849e4854f4fe82e9ebb5b731d74 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Tue, 21 Feb 2017 17:26:03 -0500 Subject: [PATCH 083/933] DOC: Link CONTRIBUTING.md to contributing.rst (#15451) Previously, we were trying to maintain two different copies of the documentation, one in the ".github" directory, and the other in the "docs/," which just imposes greater maintenance burden in the long-run. We now use the ".github" to refer to different portions of the "docs/" version with short summaries for ease of navigation. Closes gh-15349. --- .github/CONTRIBUTING.md | 519 ++-------------------------------------- 1 file changed, 14 insertions(+), 505 deletions(-) diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index 7898822e0e11d..95729f845ff5c 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -1,515 +1,24 @@ Contributing to pandas ====================== -Where to start? ---------------- - -All contributions, bug reports, bug fixes, documentation improvements, enhancements and ideas are welcome. - -If you are simply looking to start working with the *pandas* codebase, navigate to the [GitHub "issues" tab](https://github.com/pandas-dev/pandas/issues) and start looking through interesting issues. There are a number of issues listed under [Docs](https://github.com/pandas-dev/pandas/issues?labels=Docs&sort=updated&state=open) and [Difficulty Novice](https://github.com/pandas-dev/pandas/issues?q=is%3Aopen+is%3Aissue+label%3A%22Difficulty+Novice%22) where you could start out. - -Or maybe through using *pandas* you have an idea of you own or are looking for something in the documentation and thinking 'this can be improved'...you can do something about it! - -Feel free to ask questions on the [mailing list](https://groups.google.com/forum/?fromgroups#!forum/pydata) or on [Gitter](https://gitter.im/pydata/pandas). - -Bug reports and enhancement requests ------------------------------------- - -Bug reports are an important part of making *pandas* more stable. Having a complete bug report will allow others to reproduce the bug and provide insight into fixing. Because many versions of *pandas* are supported, knowing version information will also identify improvements made since previous versions. Trying the bug-producing code out on the *master* branch is often a worthwhile exercise to confirm the bug still exists. It is also worth searching existing bug reports and pull requests to see if the issue has already been reported and/or fixed. - -Bug reports must: - -1. Include a short, self-contained Python snippet reproducing the problem. You can format the code nicely by using [GitHub Flavored Markdown](http://github.github.com/github-flavored-markdown/): - - ```python - >>> from pandas import DataFrame - >>> df = DataFrame(...) - ... - ``` - -2. Include the full version string of *pandas* and its dependencies. In versions of *pandas* after 0.12 you can use a built in function: - - >>> from pandas.util.print_versions import show_versions - >>> show_versions() - - and in *pandas* 0.13.1 onwards: - - >>> pd.show_versions() - -3. Explain why the current behavior is wrong/not desired and what you expect instead. - -The issue will then show up to the *pandas* community and be open to comments/ideas from others. - -Working with the code ---------------------- - -Now that you have an issue you want to fix, enhancement to add, or documentation to improve, you need to learn how to work with GitHub and the *pandas* code base. - -### Version control, Git, and GitHub - -To the new user, working with Git is one of the more daunting aspects of contributing to *pandas*. It can very quickly become overwhelming, but sticking to the guidelines below will help keep the process straightforward and mostly trouble free. As always, if you are having difficulties please feel free to ask for help. - -The code is hosted on [GitHub](https://www.github.com/pandas-dev/pandas). To contribute you will need to sign up for a [free GitHub account](https://github.com/signup/free). We use [Git](http://git-scm.com/) for version control to allow many people to work together on the project. - -Some great resources for learning Git: - -- the [GitHub help pages](http://help.github.com/). -- the [NumPy's documentation](http://docs.scipy.org/doc/numpy/dev/index.html). -- Matthew Brett's [Pydagogue](http://matthew-brett.github.com/pydagogue/). - -### Getting started with Git - -[GitHub has instructions](http://help.github.com/set-up-git-redirect) for installing git, setting up your SSH key, and configuring git. All these steps need to be completed before you can work seamlessly between your local repository and GitHub. - -### Forking - -You will need your own fork to work on the code. Go to the [pandas project page](https://github.com/pandas-dev/pandas) and hit the `Fork` button. You will want to clone your fork to your machine: - - git clone git@github.com:your-user-name/pandas.git pandas-yourname - cd pandas-yourname - git remote add upstream git://github.com/pandas-dev/pandas.git - -This creates the directory pandas-yourname and connects your repository to the upstream (main project) *pandas* repository. - -The testing suite will run automatically on Travis-CI once your pull request is submitted. However, if you wish to run the test suite on a branch prior to submitting the pull request, then Travis-CI needs to be hooked up to your GitHub repository. Instructions for doing so are [here](http://about.travis-ci.org/docs/user/getting-started/). - -### Creating a branch - -You want your master branch to reflect only production-ready code, so create a feature branch for making your changes. For example: - - git branch shiny-new-feature - git checkout shiny-new-feature - -The above can be simplified to: - - git checkout -b shiny-new-feature - -This changes your working directory to the shiny-new-feature branch. Keep any changes in this branch specific to one bug or feature so it is clear what the branch brings to *pandas*. You can have many shiny-new-features and switch in between them using the git checkout command. - -To update this branch, you need to retrieve the changes from the master branch: - - git fetch upstream - git rebase upstream/master - -This will replay your commits on top of the lastest pandas git master. If this leads to merge conflicts, you must resolve these before submitting your pull request. If you have uncommitted changes, you will need to `stash` them prior to updating. This will effectively store your changes and they can be reapplied after updating. - -### Creating a development environment - -An easy way to create a *pandas* development environment is as follows. - -- Install either Anaconda <install.anaconda> or miniconda <install.miniconda> -- Make sure that you have cloned the repository <contributing.forking> -- `cd` to the *pandas* source directory - -Tell conda to create a new environment, named `pandas_dev`, or any other name you would like for this environment, by running: - - conda create -n pandas_dev --file ci/requirements_dev.txt - -For a python 3 environment: - - conda create -n pandas_dev python=3 --file ci/requirements_dev.txt - -If you are on Windows, then you will also need to install the compiler linkages: - - conda install -n pandas_dev libpython - -This will create the new environment, and not touch any of your existing environments, nor any existing python installation. It will install all of the basic dependencies of *pandas*, as well as the development and testing tools. If you would like to install other dependencies, you can install them as follows: - - conda install -n pandas_dev -c pandas pytables scipy - -To install *all* pandas dependencies you can do the following: - - conda install -n pandas_dev -c pandas --file ci/requirements_all.txt - -To work in this environment, Windows users should `activate` it as follows: - - activate pandas_dev - -Mac OSX and Linux users should use: - - source activate pandas_dev - -You will then see a confirmation message to indicate you are in the new development environment. - -To view your environments: - - conda info -e - -To return to you home root environment: - - deactivate - -See the full conda docs [here](http://conda.pydata.org/docs). - -At this point you can easily do an *in-place* install, as detailed in the next section. - -### Making changes - -Before making your code changes, it is often necessary to build the code that was just checked out. There are two primary methods of doing this. - -1. The best way to develop *pandas* is to build the C extensions in-place by running: - - python setup.py build_ext --inplace - - If you startup the Python interpreter in the *pandas* source directory you will call the built C extensions - -2. Another very common option is to do a `develop` install of *pandas*: - - python setup.py develop - - This makes a symbolic link that tells the Python interpreter to import *pandas* from your development directory. Thus, you can always be using the development version on your system without being inside the clone directory. - -Contributing to the documentation ---------------------------------- - -If you're not the developer type, contributing to the documentation is still of huge value. You don't even have to be an expert on *pandas* to do so! Something as simple as rewriting small passages for clarity as you reference the docs is a simple but effective way to contribute. The next person to read that passage will be in your debt! - -In fact, there are sections of the docs that are worse off after being written by experts. If something in the docs doesn't make sense to you, updating the relevant section after you figure it out is a simple way to ensure it will help the next person. - -### About the *pandas* documentation - -The documentation is written in **reStructuredText**, which is almost like writing in plain English, and built using [Sphinx](http://sphinx.pocoo.org/). The Sphinx Documentation has an excellent [introduction to reST](http://sphinx.pocoo.org/rest.html). Review the Sphinx docs to perform more complex changes to the documentation as well. - -Some other important things to know about the docs: - -- The *pandas* documentation consists of two parts: the docstrings in the code itself and the docs in this folder `pandas/doc/`. - - The docstrings provide a clear explanation of the usage of the individual functions, while the documentation in this folder consists of tutorial-like overviews per topic together with some other information (what's new, installation, etc). - -- The docstrings follow the **Numpy Docstring Standard**, which is used widely in the Scientific Python community. This standard specifies the format of the different sections of the docstring. See [this document](https://github.com/numpy/numpy/blob/master/doc/HOWTO_DOCUMENT.rst.txt) for a detailed explanation, or look at some of the existing functions to extend it in a similar manner. -- The tutorials make heavy use of the [ipython directive](http://matplotlib.org/sampledoc/ipython_directive.html) sphinx extension. This directive lets you put code in the documentation which will be run during the doc build. For example: - - .. ipython:: python - - x = 2 - x**3 - - will be rendered as: - - In [1]: x = 2 - - In [2]: x**3 - Out[2]: 8 - - Almost all code examples in the docs are run (and the output saved) during the doc build. This approach means that code examples will always be up to date, but it does make the doc building a bit more complex. - -> **note** -> -> The `.rst` files are used to automatically generate Markdown and HTML versions of the docs. For this reason, please do not edit `CONTRIBUTING.md` directly, but instead make any changes to `doc/source/contributing.rst`. Then, to generate `CONTRIBUTING.md`, use [pandoc](http://johnmacfarlane.net/pandoc/) with the following command: -> -> pandoc doc/source/contributing.rst -t markdown_github > CONTRIBUTING.md - -The utility script `scripts/api_rst_coverage.py` can be used to compare the list of methods documented in `doc/source/api.rst` (which is used to generate the [API Reference](http://pandas.pydata.org/pandas-docs/stable/api.html) page) and the actual public methods. This will identify methods documented in in `doc/source/api.rst` that are not actually class methods, and existing methods that are not documented in `doc/source/api.rst`. - -### How to build the *pandas* documentation - -#### Requirements - -To build the *pandas* docs there are some extra requirements: you will need to have `sphinx` and `ipython` installed. [numpydoc](https://github.com/numpy/numpydoc) is used to parse the docstrings that follow the Numpy Docstring Standard (see above), but you don't need to install this because a local copy of numpydoc is included in the *pandas* source code. - -It is easiest to create a development environment <contributing.dev\_env>, then install: - - conda install -n pandas_dev sphinx ipython - -Furthermore, it is recommended to have all [optional dependencies](http://pandas.pydata.org/pandas-docs/dev/install.html#optional-dependencies) installed. This is not strictly necessary, but be aware that you will see some error messages when building the docs. This happens because all the code in the documentation is executed during the doc build, and so code examples using optional dependencies will generate errors. Run `pd.show_versions()` to get an overview of the installed version of all dependencies. - -> **warning** -> -> You need to have `sphinx` version 1.2.2 or newer, but older than version 1.3. Versions before 1.1.3 should also work. - -#### Building the documentation - -So how do you build the docs? Navigate to your local `pandas/doc/` directory in the console and run: - - python make.py html - -Then you can find the HTML output in the folder `pandas/doc/build/html/`. - -The first time you build the docs, it will take quite a while because it has to run all the code examples and build all the generated docstring pages. In subsequent evocations, sphinx will try to only build the pages that have been modified. - -If you want to do a full clean build, do: - - python make.py clean - python make.py build - -Starting with *pandas* 0.13.1 you can tell `make.py` to compile only a single section of the docs, greatly reducing the turn-around time for checking your changes. You will be prompted to delete `.rst` files that aren't required. This is okay because the prior versions of these files can be checked out from git. However, you must make sure not to commit the file deletions to your Git repository! - - #omit autosummary and API section - python make.py clean - python make.py --no-api - - # compile the docs with only a single - # section, that which is in indexing.rst - python make.py clean - python make.py --single indexing - -For comparison, a full documentation build may take 10 minutes, a `-no-api` build may take 3 minutes and a single section may take 15 seconds. Subsequent builds, which only process portions you have changed, will be faster. Open the following file in a web browser to see the full documentation you just built: - - pandas/docs/build/html/index.html +Whether you are a novice or experienced software developer, all contributions and suggestions are welcome! -And you'll have the satisfaction of seeing your new and improved documentation! +Our main contribution docs can be found [here](https://github.com/pandas-dev/pandas/blob/master/doc/source/contributing.rst), but if you do not want to read it in its entirety, we will summarize the main ways in which you can contribute and point to relevant places in the docs for further information. -#### Building master branch documentation - -When pull requests are merged into the *pandas* `master` branch, the main parts of the documentation are also built by Travis-CI. These docs are then hosted [here](http://pandas-docs.github.io/pandas-docs-travis). - -Contributing to the code base ------------------------------ - -### Code standards - -*pandas* uses the [PEP8](http://www.python.org/dev/peps/pep-0008/) standard. There are several tools to ensure you abide by this standard. - -We've written a tool to check that your commits are PEP8 great, [pip install pep8radius](https://github.com/hayd/pep8radius). Look at PEP8 fixes in your branch vs master with: - - pep8radius master --diff - -and make these changes with: - - pep8radius master --diff --in-place - -Alternatively, use the [flake8](http://pypi.python.org/pypi/flake8) tool for checking the style of your code. Additional standards are outlined on the [code style wiki page](https://github.com/pandas-dev/pandas/wiki/Code-Style-and-Conventions). - -Please try to maintain backward compatibility. *pandas* has lots of users with lots of existing code, so don't break it if at all possible. If you think breakage is required, clearly state why as part of the pull request. Also, be careful when changing method signatures and add deprecation warnings where needed. - -### Test-driven development/code writing - -*pandas* is serious about testing and strongly encourages contributors to embrace [test-driven development (TDD)](http://en.wikipedia.org/wiki/Test-driven_development). This development process "relies on the repetition of a very short development cycle: first the developer writes an (initially failing) automated test case that defines a desired improvement or new function, then produces the minimum amount of code to pass that test." So, before actually writing any code, you should write your tests. Often the test can be taken from the original GitHub issue. However, it is always worth considering additional use cases and writing corresponding tests. - -Adding tests is one of the most common requests after code is pushed to *pandas*. Therefore, it is worth getting in the habit of writing tests ahead of time so this is never an issue. - -Like many packages, *pandas* uses the [Nose testing system](https://nose.readthedocs.io/en/latest/index.html) and the convenient extensions in [numpy.testing](http://docs.scipy.org/doc/numpy/reference/routines.testing.html). - -#### Writing tests - -All tests should go into the `tests` subdirectory of the specific package. This folder contains many current examples of tests, and we suggest looking to these for inspiration. If your test requires working with files or network connectivity, there is more information on the [testing page](https://github.com/pandas-dev/pandas/wiki/Testing) of the wiki. - -The `pandas.util.testing` module has many special `assert` functions that make it easier to make statements about whether Series or DataFrame objects are equivalent. The easiest way to verify that your code is correct is to explicitly construct the result you expect, then compare the actual result to the expected correct result: - - def test_pivot(self): - data = { - 'index' : ['A', 'B', 'C', 'C', 'B', 'A'], - 'columns' : ['One', 'One', 'One', 'Two', 'Two', 'Two'], - 'values' : [1., 2., 3., 3., 2., 1.] - } - - frame = DataFrame(data) - pivoted = frame.pivot(index='index', columns='columns', values='values') - - expected = DataFrame({ - 'One' : {'A' : 1., 'B' : 2., 'C' : 3.}, - 'Two' : {'A' : 1., 'B' : 2., 'C' : 3.} - }) - - assert_frame_equal(pivoted, expected) - -#### Running the test suite - -The tests can then be run directly inside your Git clone (without having to install *pandas*) by typing: - - nosetests pandas - -The tests suite is exhaustive and takes around 20 minutes to run. Often it is worth running only a subset of tests first around your changes before running the entire suite. This is done using one of the following constructs: - - nosetests pandas/tests/[test-module].py - nosetests pandas/tests/[test-module].py:[TestClass] - nosetests pandas/tests/[test-module].py:[TestClass].[test_method] - -#### Running the performance test suite - -Performance matters and it is worth considering whether your code has introduced performance regressions. *pandas* is in the process of migrating to the [asv library](https://github.com/spacetelescope/asv) to enable easy monitoring of the performance of critical *pandas* operations. These benchmarks are all found in the `pandas/asv_bench` directory. asv supports both python2 and python3. - -> **note** -> -> The asv benchmark suite was translated from the previous framework, vbench, so many stylistic issues are likely a result of automated transformation of the code. - -To use asv you will need either `conda` or `virtualenv`. For more details please check the [asv installation webpage](https://asv.readthedocs.io/en/latest/installing.html). - -To install asv: - - pip install git+https://github.com/spacetelescope/asv - -If you need to run a benchmark, change your directory to `/asv_bench/` and run the following if you have been developing on `master`: - - asv continuous master - -If you are working on another branch, either of the following can be used: - - asv continuous master HEAD - asv continuous master your_branch - -This will check out the master revision and run the suite on both master and your commit. Running the full test suite can take up to one hour and use up to 3GB of RAM. Usually it is sufficient to paste only a subset of the results into the pull request to show that the committed changes do not cause unexpected performance regressions. - -You can run specific benchmarks using the `-b` flag, which takes a regular expression. For example, this will only run tests from a `pandas/asv_bench/benchmarks/groupby.py` file: - - asv continuous master -b groupby - -If you want to only run a specific group of tests from a file, you can do it using `.` as a separator. For example: - - asv continuous master -b groupby.groupby_agg_builtins1 - -will only run a `groupby_agg_builtins1` test defined in a `groupby` file. - -It can also be useful to run tests in your current environment. You can simply do it by: - - asv dev - -This command is equivalent to: - - asv run --quick --show-stderr --python=same - -This will launch every test only once, display stderr from the benchmarks, and use your local `python` that comes from your `$PATH`. - -Information on how to write a benchmark can be found in the [asv documentation](https://asv.readthedocs.io/en/latest/writing_benchmarks.html). - -#### Running the vbench performance test suite (phasing out) - -Historically, *pandas* used [vbench library](https://github.com/pydata/vbench) to enable easy monitoring of the performance of critical *pandas* operations. These benchmarks are all found in the `pandas/vb_suite` directory. vbench currently only works on python2. - -To install vbench: - - pip install git+https://github.com/pydata/vbench - -Vbench also requires `sqlalchemy`, `gitpython`, and `psutil`, which can all be installed using pip. If you need to run a benchmark, change your directory to the *pandas* root and run: - - ./test_perf.sh -b master -t HEAD - -This will check out the master revision and run the suite on both master and your commit. Running the full test suite can take up to one hour and use up to 3GB of RAM. Usually it is sufficient to paste a subset of the results into the Pull Request to show that the committed changes do not cause unexpected performance regressions. - -You can run specific benchmarks using the `-r` flag, which takes a regular expression. - -See the [performance testing wiki](https://github.com/pandas-dev/pandas/wiki/Performance-Testing) for information on how to write a benchmark. - -### Documenting your code - -Changes should be reflected in the release notes located in `doc/source/whatsnew/vx.y.z.txt`. This file contains an ongoing change log for each release. Add an entry to this file to document your fix, enhancement or (unavoidable) breaking change. Make sure to include the GitHub issue number when adding your entry (using `` :issue:`1234` `` where 1234 is the issue/pull request number). - -If your code is an enhancement, it is most likely necessary to add usage examples to the existing documentation. This can be done following the section regarding documentation above <contributing.documentation>. Further, to let users know when this feature was added, the `versionadded` directive is used. The sphinx syntax for that is: - -``` sourceCode -.. versionadded:: 0.17.0 -``` - -This will put the text *New in version 0.17.0* wherever you put the sphinx directive. This should also be put in the docstring when adding a new function or method ([example](https://github.com/pandas-dev/pandas/blob/v0.16.2/pandas/core/generic.py#L1959)) or a new keyword argument ([example](https://github.com/pandas-dev/pandas/blob/v0.16.2/pandas/core/frame.py#L1171)). - -Contributing your changes to *pandas* -------------------------------------- - -### Committing your code - -Keep style fixes to a separate commit to make your pull request more readable. - -Once you've made changes, you can see them by typing: - - git status - -If you have created a new file, it is not being tracked by git. Add it by typing: - - git add path/to/file-to-be-added.py - -Doing 'git status' again should give something like: - - # On branch shiny-new-feature - # - # modified: /relative/path/to/file-you-added.py - # - -Finally, commit your changes to your local repository with an explanatory message. *Pandas* uses a convention for commit message prefixes and layout. Here are some common prefixes along with general guidelines for when to use them: - -> - ENH: Enhancement, new functionality -> - BUG: Bug fix -> - DOC: Additions/updates to documentation -> - TST: Additions/updates to tests -> - BLD: Updates to the build process/scripts -> - PERF: Performance improvement -> - CLN: Code cleanup - -The following defines how a commit message should be structured. Please reference the relevant GitHub issues in your commit message using GH1234 or \#1234. Either style is fine, but the former is generally preferred: - -> - a subject line with < 80 chars. -> - One blank line. -> - Optionally, a commit message body. - -Now you can commit your changes in your local repository: - - git commit -m - -### Combining commits - -If you have multiple commits, you may want to combine them into one commit, often referred to as "squashing" or "rebasing". This is a common request by package maintainers when submitting a pull request as it maintains a more compact commit history. To rebase your commits: - - git rebase -i HEAD~# - -Where \# is the number of commits you want to combine. Then you can pick the relevant commit message and discard others. - -To squash to the master branch do: - - git rebase -i master - -Use the `s` option on a commit to `squash`, meaning to keep the commit messages, or `f` to `fixup`, meaning to merge the commit messages. - -Then you will need to push the branch (see below) forcefully to replace the current commits with the new ones: - - git push origin shiny-new-feature -f - -### Pushing your changes - -When you want your changes to appear publicly on your GitHub page, push your forked feature branch's commits: - - git push origin shiny-new-feature - -Here `origin` is the default name given to your remote repository on GitHub. You can see the remote repositories: - - git remote -v - -If you added the upstream repository as described above you will see something like: - - origin git@github.com:yourname/pandas.git (fetch) - origin git@github.com:yourname/pandas.git (push) - upstream git://github.com/pandas-dev/pandas.git (fetch) - upstream git://github.com/pandas-dev/pandas.git (push) - -Now your code is on GitHub, but it is not yet a part of the *pandas* project. For that to happen, a pull request needs to be submitted on GitHub. - -### Review your code - -When you're ready to ask for a code review, file a pull request. Before you do, once again make sure that you have followed all the guidelines outlined in this document regarding code style, tests, performance tests, and documentation. You should also double check your branch changes against the branch it was based on: - -1. Navigate to your repository on GitHub -- -2. Click on `Branches` -3. Click on the `Compare` button for your feature branch -4. Select the `base` and `compare` branches, if necessary. This will be `master` and `shiny-new-feature`, respectively. - -### Finally, make the pull request - -If everything looks good, you are ready to make a pull request. A pull request is how code from a local repository becomes available to the GitHub community and can be looked at and eventually merged into the master version. This pull request and its associated changes will eventually be committed to the master branch and available in the next release. To submit a pull request: - -1. Navigate to your repository on GitHub -2. Click on the `Pull Request` button -3. You can then click on `Commits` and `Files Changed` to make sure everything looks okay one last time -4. Write a description of your changes in the `Preview Discussion` tab -5. Click `Send Pull Request`. - -This request then goes to the repository maintainers, and they will review the code. If you need to make more changes, you can make them in your branch, push them to GitHub, and the pull request will be automatically updated. Pushing them to GitHub again is done by: - - git push -f origin shiny-new-feature - -This will automatically update your pull request with the latest code and restart the Travis-CI tests. - -### Delete your merged branch (optional) - -Once your feature branch is accepted into upstream, you'll probably want to get rid of the branch. First, merge upstream master into your branch so git knows it is safe to delete your branch: - - git fetch upstream - git checkout master - git merge upstream/master +Getting Started +--------------- +If you are looking to contribute to the *pandas* codebase, the best place to start is the [GitHub "issues" tab](https://github.com/pandas-dev/pandas/issues). This is also a great place for filing bug reports and making suggestions for ways in which we can improve the code and documentation. -Then you can just do: +If you have additional questions, feel free to ask them on the [mailing list](https://groups.google.com/forum/?fromgroups#!forum/pydata) or on [Gitter](https://gitter.im/pydata/pandas). Further information can also be found in our [Getting Started](https://github.com/pandas-dev/pandas/blob/master/doc/source/contributing.rst#where-to-start) section of our main contribution doc. - git branch -d shiny-new-feature +Filing Issues +------------- +If you notice a bug in the code or in docs or have suggestions for how we can improve either, feel free to create an issue on the [GitHub "issues" tab](https://github.com/pandas-dev/pandas/issues) using [GitHub's "issue" form](https://github.com/pandas-dev/pandas/issues/new). The form contains some questions that will help us best address your issue. For more information regarding how to file issues against *pandas*, please refer to the [Bug reports and enhancement requests](https://github.com/pandas-dev/pandas/blob/master/doc/source/contributing.rst#bug-reports-and-enhancement-requests) section of our main contribution doc. -Make sure you use a lower-case `-d`, or else git won't warn you if your feature branch has not actually been merged. +Contributing to the Codebase +---------------------------- +The code is hosted on [GitHub](https://www.github.com/pandas-dev/pandas), so you will need to use [Git](http://git-scm.com/) to clone the project and make changes to the codebase. Once you have obtained a copy of the code, you should create a development environment that is separate from your existing Python environment so that you can make and test changes without compromising your own work environment. For more information, please refer to our [Working with the code](https://github.com/pandas-dev/pandas/blob/master/doc/source/contributing.rst#working-with-the-code) section of our main contribution docs. -The branch will still exist on GitHub, so to delete it there do: +Before submitting your changes for review, make sure to check that your changes do not break any tests. You can find more information about our test suites can be found [here](https://github.com/pandas-dev/pandas/blob/master/doc/source/contributing.rst#test-driven-development-code-writing). We also have guidelines regarding coding style that will be enforced during testing. Details about coding style can be found [here](https://github.com/pandas-dev/pandas/blob/master/doc/source/contributing.rst#code-standards). - git push origin --delete shiny-new-feature +Once your changes are ready to be submitted, make sure to push your changes to GitHub before creating a pull request. Details about how to do that can be found in the [Contributing your changes to pandas](https://github.com/pandas-dev/pandas/blob/master/doc/source/contributing.rst#contributing-your-changes-to-pandas) section of our main contribution docs. We will review your changes, and you will most likely be asked to make additional changes before it is finally ready to merge. However, once it's ready, we will merge it, and you will have successfully contributed to the codebase! From 4136c0c75359705a565a414e60b94dfdfb571a6d Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 22 Feb 2017 10:01:48 -0500 Subject: [PATCH 084/933] TST: remove 4 builds from travis that are on circleci (#15465) --- .travis.yml | 109 ---------------------------------------------------- circle.yml | 5 ++- 2 files changed, 4 insertions(+), 110 deletions(-) diff --git a/.travis.yml b/.travis.yml index bb96ab210c088..97bf881f3b6fc 100644 --- a/.travis.yml +++ b/.travis.yml @@ -91,21 +91,6 @@ matrix: packages: - libatlas-base-dev - gfortran -# In allow_failures - - python: 2.7 - env: - - PYTHON_VERSION=2.7 - - JOB_NAME: "27_nslow_nnet_COMPAT" - - TEST_ARGS="--skip-slow --skip-network" - - LOCALE_OVERRIDE="it_IT.UTF-8" - - INSTALL_TEST=true - - JOB_TAG=_COMPAT - - CACHE_NAME="27_nslow_nnet_COMPAT" - - USE_CACHE=true - addons: - apt: - packages: - - language-pack-it # In allow_failures - python: 2.7 env: @@ -127,37 +112,6 @@ matrix: - BUILD_TEST=true - CACHE_NAME="27_build_test_conda" - USE_CACHE=true -# In allow_failures - - python: 3.4 - env: - - PYTHON_VERSION=3.4 - - JOB_NAME: "34_nslow" - - LOCALE_OVERRIDE="zh_CN.UTF-8" - - TEST_ARGS="--skip-slow" - - FULL_DEPS=true - - CLIPBOARD=xsel - - CACHE_NAME="34_nslow" - - USE_CACHE=true - addons: - apt: - packages: - - xsel - - language-pack-zh-hans -# In allow_failures - - python: 3.4 - env: - - PYTHON_VERSION=3.4 - - JOB_NAME: "34_slow" - - JOB_TAG=_SLOW - - TEST_ARGS="--only-slow --skip-network" - - FULL_DEPS=true - - CLIPBOARD=xsel - - CACHE_NAME="34_slow" - - USE_CACHE=true - addons: - apt: - packages: - - xsel # In allow_failures - python: 3.5 env: @@ -173,16 +127,6 @@ matrix: packages: - libatlas-base-dev - gfortran -# In allow_failures - - python: 3.5 - env: - - PYTHON_VERSION=3.5 - - JOB_NAME: "35_ascii" - - JOB_TAG=_ASCII - - TEST_ARGS="--skip-slow --skip-network" - - LOCALE_OVERRIDE="C" - - CACHE_NAME="35_ascii" - - USE_CACHE=true # In allow_failures - python: 3.5 env: @@ -203,20 +147,6 @@ matrix: - FULL_DEPS=true - CACHE_NAME="27_slow" - USE_CACHE=true - - python: 3.4 - env: - - PYTHON_VERSION=3.4 - - JOB_NAME: "34_slow" - - JOB_TAG=_SLOW - - TEST_ARGS="--only-slow --skip-network" - - FULL_DEPS=true - - CLIPBOARD=xsel - - CACHE_NAME="34_slow" - - USE_CACHE=true - addons: - apt: - packages: - - xsel - python: 2.7 env: - PYTHON_VERSION=2.7 @@ -227,21 +157,6 @@ matrix: - BUILD_TEST=true - CACHE_NAME="27_build_test_conda" - USE_CACHE=true - - python: 3.4 - env: - - PYTHON_VERSION=3.4 - - JOB_NAME: "34_nslow" - - LOCALE_OVERRIDE="zh_CN.UTF-8" - - TEST_ARGS="--skip-slow" - - FULL_DEPS=true - - CLIPBOARD=xsel - - CACHE_NAME="34_nslow" - - USE_CACHE=true - addons: - apt: - packages: - - xsel - - language-pack-zh-hans - python: 3.5 env: - PYTHON_VERSION=3.5 @@ -256,29 +171,6 @@ matrix: packages: - libatlas-base-dev - gfortran - - python: 2.7 - env: - - PYTHON_VERSION=2.7 - - JOB_NAME: "27_nslow_nnet_COMPAT" - - TEST_ARGS="--skip-slow --skip-network" - - LOCALE_OVERRIDE="it_IT.UTF-8" - - INSTALL_TEST=true - - JOB_TAG=_COMPAT - - CACHE_NAME="27_nslow_nnet_COMPAT" - - USE_CACHE=true - addons: - apt: - packages: - - language-pack-it - - python: 3.5 - env: - - PYTHON_VERSION=3.5 - - JOB_NAME: "35_ascii" - - JOB_TAG=_ASCII - - TEST_ARGS="--skip-slow --skip-network" - - LOCALE_OVERRIDE="C" - - CACHE_NAME="35_ascii" - - USE_CACHE=true - python: 3.5 env: - PYTHON_VERSION=3.5 @@ -299,7 +191,6 @@ before_install: - pwd - uname -a - python -V -# git info & get tags - git --version - git tag - ci/before_install_travis.sh diff --git a/circle.yml b/circle.yml index 97136d368ae6f..046af6e9e1389 100644 --- a/circle.yml +++ b/circle.yml @@ -3,16 +3,18 @@ machine: # these are globally set MINICONDA_DIR: /home/ubuntu/miniconda3 + database: override: - ./ci/install_db_circle.sh + checkout: post: # since circleci does a shallow fetch # we need to populate our tags - git fetch --depth=1000 - - git fetch --tags + dependencies: override: @@ -29,6 +31,7 @@ dependencies: esac - ./ci/show_circle.sh + test: override: - case $CIRCLE_NODE_INDEX in 0) ./ci/run_circle.sh --skip-slow --skip-network ;; 1) ./ci/run_circle.sh --only-slow --skip-network ;; 2) ./ci/run_circle.sh --skip-slow --skip-network ;; 3) ./ci/run_circle.sh --skip-slow --skip-network ;; esac: From f9d774263af3e0cafbdec5ff82d086ff97e41bd6 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 22 Feb 2017 10:40:47 -0500 Subject: [PATCH 085/933] update README.md for badges (circleci and fix anaconda cloud pointer) --- README.md | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 4293d7294d5e0..195b76f64b37f 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ - + @@ -30,6 +30,15 @@ + + + + + From b94186d4c58ee055656a84f55618be537db0095a Mon Sep 17 00:00:00 2001 From: Peter Csizsek Date: Thu, 23 Feb 2017 08:23:11 -0500 Subject: [PATCH 093/933] BUG: The roll_quantile function now throws an exception instead of causing a segfault when quantile is out of range closes #15463 Author: Peter Csizsek Closes #15476 from csizsek/fix-rolling-quantile-segfault and squashes the following commits: e31e5be [Peter Csizsek] Correctly catching exception in the test for Rolling.quantile. 4eea34a [Peter Csizsek] Refactored and moved exception throwing test to a new function for Rolling.quantile(). 8b1e020 [Peter Csizsek] Added a note about the Rolling.quantile bug fix to the changelog. f39b122 [Peter Csizsek] Added a new test case to roll_quantile_test to trigger a TypeError when called with a string. f736ca2 [Peter Csizsek] The roll_quantile function in window.pyx now raises a ValueError when the quantile value is not in [0.0, 1.0] --- doc/source/whatsnew/v0.20.0.txt | 2 +- pandas/tests/test_window.py | 14 +++++++++++++- pandas/window.pyx | 7 +++++-- 3 files changed, 19 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index e65276fe51fe8..fa24c973a7549 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -539,7 +539,7 @@ Bug Fixes - Bug in using ``__deepcopy__`` on empty NDFrame objects (:issue:`15370`) - Bug in ``DataFrame.loc`` with indexing a ``MultiIndex`` with a ``Series`` indexer (:issue:`14730`, :issue:`15424`) - Bug in ``DataFrame.loc`` with indexing a ``MultiIndex`` with a numpy array (:issue:`15434`) - +- Bug in ``Rolling.quantile`` function that caused a segmentation fault when called with a quantile value outside of the range [0, 1] (:issue:`15463`) - Bug in the display of ``.info()`` where a qualifier (+) would always be displayed with a ``MultiIndex`` that contains only non-strings (:issue:`15245`) diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index 452e8999ab13f..3f2973a9834ca 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -1063,7 +1063,7 @@ def test_rolling_max(self): window=3, min_periods=5) def test_rolling_quantile(self): - qs = [.1, .5, .9] + qs = [0.0, .1, .5, .9, 1.0] def scoreatpercentile(a, per): values = np.sort(a, axis=0) @@ -1084,6 +1084,18 @@ def alt(x): self._check_moment_func(f, alt, name='quantile', quantile=q) + def test_rolling_quantile_param(self): + ser = Series([0.0, .1, .5, .9, 1.0]) + + with self.assertRaises(ValueError): + ser.rolling(3).quantile(-0.1) + + with self.assertRaises(ValueError): + ser.rolling(3).quantile(10.0) + + with self.assertRaises(TypeError): + ser.rolling(3).quantile('foo') + def test_rolling_apply(self): # suppress warnings about empty slices, as we are deliberately testing # with a 0-length Series diff --git a/pandas/window.pyx b/pandas/window.pyx index 8235d68e2a88b..005d42c9f68be 100644 --- a/pandas/window.pyx +++ b/pandas/window.pyx @@ -134,8 +134,8 @@ cdef class WindowIndexer: bint is_variable def get_data(self): - return (self.start, self.end, self.N, - self.win, self.minp, + return (self.start, self.end, self.N, + self.win, self.minp, self.is_variable) @@ -1285,6 +1285,9 @@ def roll_quantile(ndarray[float64_t, cast=True] input, int64_t win, ndarray[int64_t] start, end ndarray[double_t] output + if quantile < 0.0 or quantile > 1.0: + raise ValueError("quantile value {0} not in [0, 1]".format(quantile)) + # we use the Fixed/Variable Indexer here as the # actual skiplist ops outweigh any window computation costs start, end, N, win, minp, is_variable = get_window_indexer( From 2819478d3e199e8760684b30642fe41bee547173 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Fri, 24 Feb 2017 07:05:11 -0500 Subject: [PATCH 094/933] TST: add pytest to asv conf --- asv_bench/asv.conf.json | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/asv_bench/asv.conf.json b/asv_bench/asv.conf.json index 155deb5bdbd1f..4fc6f9f634426 100644 --- a/asv_bench/asv.conf.json +++ b/asv_bench/asv.conf.json @@ -50,7 +50,8 @@ "openpyxl": [], "xlsxwriter": [], "xlrd": [], - "xlwt": [] + "xlwt": [], + "pytest": [], }, // Combinations of libraries/python versions can be excluded/included From 81c57e20da278494dfebc2f1043f5ff361a234f3 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Fri, 24 Feb 2017 07:13:46 -0500 Subject: [PATCH 095/933] CLN: split off frozen (immutable) data structures into pandas/indexes/frozen.py should make it a bit easier to work with these; and no reason to be in pandas/core/base.py Author: Jeff Reback Closes #15477 from jreback/frozen and squashes the following commits: 2a64a4f [Jeff Reback] CLN: split off frozen (immutable) data structures into pandas/indexes/frozen.py --- pandas/compat/pickle_compat.py | 34 +++++++- pandas/core/base.py | 105 ----------------------- pandas/indexes/base.py | 13 +-- pandas/indexes/frozen.py | 126 ++++++++++++++++++++++++++++ pandas/indexes/multi.py | 6 +- pandas/tests/indexes/test_frozen.py | 68 +++++++++++++++ pandas/tests/test_base.py | 68 +-------------- 7 files changed, 231 insertions(+), 189 deletions(-) create mode 100644 pandas/indexes/frozen.py create mode 100644 pandas/tests/indexes/test_frozen.py diff --git a/pandas/compat/pickle_compat.py b/pandas/compat/pickle_compat.py index 1cdf8afd563c6..240baa848adbc 100644 --- a/pandas/compat/pickle_compat.py +++ b/pandas/compat/pickle_compat.py @@ -52,12 +52,40 @@ def load_reduce(self): stack[-1] = value + +# if classes are moved, provide compat here +_class_locations_map = { + + # 15477 + ('pandas.core.base', 'FrozenNDArray'): ('pandas.indexes.frozen', 'FrozenNDArray'), + ('pandas.core.base', 'FrozenList'): ('pandas.indexes.frozen', 'FrozenList') + } + + +# our Unpickler sub-class to override methods and some dispatcher +# functions for compat + if compat.PY3: class Unpickler(pkl._Unpickler): - pass + + def find_class(self, module, name): + # override superclass + key = (module, name) + module, name = _class_locations_map.get(key, key) + return super(Unpickler, self).find_class(module, name) + else: + class Unpickler(pkl.Unpickler): - pass + + def find_class(self, module, name): + # override superclass + key = (module, name) + module, name = _class_locations_map.get(key, key) + __import__(module) + mod = sys.modules[module] + klass = getattr(mod, name) + return klass Unpickler.dispatch = copy.copy(Unpickler.dispatch) Unpickler.dispatch[pkl.REDUCE[0]] = load_reduce @@ -76,8 +104,6 @@ def load_newobj(self): self.stack[-1] = obj Unpickler.dispatch[pkl.NEWOBJ[0]] = load_newobj -# py3 compat - def load_newobj_ex(self): kwargs = self.stack.pop() diff --git a/pandas/core/base.py b/pandas/core/base.py index 92ec6bb3d73e6..55149198b0dbf 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -17,7 +17,6 @@ from pandas.util.decorators import (Appender, cache_readonly, deprecate_kwarg, Substitution) from pandas.core.common import AbstractMethodError -from pandas.formats.printing import pprint_thing _shared_docs = dict() _indexops_doc_kwargs = dict(klass='IndexOpsMixin', inplace='', @@ -694,110 +693,6 @@ def _gotitem(self, key, ndim, subset=None): return self -class FrozenList(PandasObject, list): - - """ - Container that doesn't allow setting item *but* - because it's technically non-hashable, will be used - for lookups, appropriately, etc. - """ - # Sidenote: This has to be of type list, otherwise it messes up PyTables - # typechecks - - def __add__(self, other): - if isinstance(other, tuple): - other = list(other) - return self.__class__(super(FrozenList, self).__add__(other)) - - __iadd__ = __add__ - - # Python 2 compat - def __getslice__(self, i, j): - return self.__class__(super(FrozenList, self).__getslice__(i, j)) - - def __getitem__(self, n): - # Python 3 compat - if isinstance(n, slice): - return self.__class__(super(FrozenList, self).__getitem__(n)) - return super(FrozenList, self).__getitem__(n) - - def __radd__(self, other): - if isinstance(other, tuple): - other = list(other) - return self.__class__(other + list(self)) - - def __eq__(self, other): - if isinstance(other, (tuple, FrozenList)): - other = list(other) - return super(FrozenList, self).__eq__(other) - - __req__ = __eq__ - - def __mul__(self, other): - return self.__class__(super(FrozenList, self).__mul__(other)) - - __imul__ = __mul__ - - def __reduce__(self): - return self.__class__, (list(self),) - - def __hash__(self): - return hash(tuple(self)) - - def _disabled(self, *args, **kwargs): - """This method will not function because object is immutable.""" - raise TypeError("'%s' does not support mutable operations." % - self.__class__.__name__) - - def __unicode__(self): - return pprint_thing(self, quote_strings=True, - escape_chars=('\t', '\r', '\n')) - - def __repr__(self): - return "%s(%s)" % (self.__class__.__name__, - str(self)) - - __setitem__ = __setslice__ = __delitem__ = __delslice__ = _disabled - pop = append = extend = remove = sort = insert = _disabled - - -class FrozenNDArray(PandasObject, np.ndarray): - - # no __array_finalize__ for now because no metadata - def __new__(cls, data, dtype=None, copy=False): - if copy is None: - copy = not isinstance(data, FrozenNDArray) - res = np.array(data, dtype=dtype, copy=copy).view(cls) - return res - - def _disabled(self, *args, **kwargs): - """This method will not function because object is immutable.""" - raise TypeError("'%s' does not support mutable operations." % - self.__class__) - - __setitem__ = __setslice__ = __delitem__ = __delslice__ = _disabled - put = itemset = fill = _disabled - - def _shallow_copy(self): - return self.view() - - def values(self): - """returns *copy* of underlying array""" - arr = self.view(np.ndarray).copy() - return arr - - def __unicode__(self): - """ - Return a string representation for this object. - - Invoked by unicode(df) in py2 only. Yields a Unicode String in both - py2/py3. - """ - prepr = pprint_thing(self, escape_chars=('\t', '\r', '\n'), - quote_strings=True) - return "%s(%s, dtype='%s')" % (type(self).__name__, prepr, self.dtype) - - class IndexOpsMixin(object): """ common ops mixin to support a unified inteface / docs for Series / Index diff --git a/pandas/indexes/base.py b/pandas/indexes/base.py index f1f37622b2a74..4837fc0d7438c 100644 --- a/pandas/indexes/base.py +++ b/pandas/indexes/base.py @@ -35,16 +35,15 @@ needs_i8_conversion, is_iterator, is_list_like, is_scalar) -from pandas.types.cast import _coerce_indexer_dtype from pandas.core.common import (is_bool_indexer, _values_from_object, _asarray_tuplesafe) -from pandas.core.base import (PandasObject, FrozenList, FrozenNDArray, - IndexOpsMixin) +from pandas.core.base import PandasObject, IndexOpsMixin import pandas.core.base as base from pandas.util.decorators import (Appender, Substitution, cache_readonly, deprecate, deprecate_kwarg) +from pandas.indexes.frozen import FrozenList import pandas.core.common as com import pandas.types.concat as _concat import pandas.core.missing as missing @@ -3844,14 +3843,6 @@ def _get_na_value(dtype): np.timedelta64: tslib.NaT}.get(dtype, np.nan) -def _ensure_frozen(array_like, categories, copy=False): - array_like = _coerce_indexer_dtype(array_like, categories) - array_like = array_like.view(FrozenNDArray) - if copy: - array_like = array_like.copy() - return array_like - - def _ensure_has_len(seq): """If seq is an iterator, put its values into a list.""" try: diff --git a/pandas/indexes/frozen.py b/pandas/indexes/frozen.py new file mode 100644 index 0000000000000..e043ba64bbad7 --- /dev/null +++ b/pandas/indexes/frozen.py @@ -0,0 +1,126 @@ +""" +frozen (immutable) data structures to support MultiIndexing + +These are used for: + +- .names (FrozenList) +- .levels & .labels (FrozenNDArray) + +""" + +import numpy as np +from pandas.core.base import PandasObject +from pandas.types.cast import _coerce_indexer_dtype +from pandas.formats.printing import pprint_thing + + +class FrozenList(PandasObject, list): + + """ + Container that doesn't allow setting item *but* + because it's technically non-hashable, will be used + for lookups, appropriately, etc. + """ + # Sidenote: This has to be of type list, otherwise it messes up PyTables + # typechecks + + def __add__(self, other): + if isinstance(other, tuple): + other = list(other) + return self.__class__(super(FrozenList, self).__add__(other)) + + __iadd__ = __add__ + + # Python 2 compat + def __getslice__(self, i, j): + return self.__class__(super(FrozenList, self).__getslice__(i, j)) + + def __getitem__(self, n): + # Python 3 compat + if isinstance(n, slice): + return self.__class__(super(FrozenList, self).__getitem__(n)) + return super(FrozenList, self).__getitem__(n) + + def __radd__(self, other): + if isinstance(other, tuple): + other = list(other) + return self.__class__(other + list(self)) + + def __eq__(self, other): + if isinstance(other, (tuple, FrozenList)): + other = list(other) + return super(FrozenList, self).__eq__(other) + + __req__ = __eq__ + + def __mul__(self, other): + return self.__class__(super(FrozenList, self).__mul__(other)) + + __imul__ = __mul__ + + def __reduce__(self): + return self.__class__, (list(self),) + + def __hash__(self): + return hash(tuple(self)) + + def _disabled(self, *args, **kwargs): + """This method will not function because object is immutable.""" + raise TypeError("'%s' does not support mutable operations." % + self.__class__.__name__) + + def __unicode__(self): + return pprint_thing(self, quote_strings=True, + escape_chars=('\t', '\r', '\n')) + + def __repr__(self): + return "%s(%s)" % (self.__class__.__name__, + str(self)) + + __setitem__ = __setslice__ = __delitem__ = __delslice__ = _disabled + pop = append = extend = remove = sort = insert = _disabled + + +class FrozenNDArray(PandasObject, np.ndarray): + + # no __array_finalize__ for now because no metadata + def __new__(cls, data, dtype=None, copy=False): + if copy is None: + copy = not isinstance(data, FrozenNDArray) + res = np.array(data, dtype=dtype, copy=copy).view(cls) + return res + + def _disabled(self, *args, **kwargs): + """This method will not function because object is immutable.""" + raise TypeError("'%s' does not support mutable operations." % + self.__class__) + + __setitem__ = __setslice__ = __delitem__ = __delslice__ = _disabled + put = itemset = fill = _disabled + + def _shallow_copy(self): + return self.view() + + def values(self): + """returns *copy* of underlying array""" + arr = self.view(np.ndarray).copy() + return arr + + def __unicode__(self): + """ + Return a string representation for this object. + + Invoked by unicode(df) in py2 only. Yields a Unicode String in both + py2/py3. + """ + prepr = pprint_thing(self, escape_chars=('\t', '\r', '\n'), + quote_strings=True) + return "%s(%s, dtype='%s')" % (type(self).__name__, prepr, self.dtype) + + +def _ensure_frozen(array_like, categories, copy=False): + array_like = _coerce_indexer_dtype(array_like, categories) + array_like = array_like.view(FrozenNDArray) + if copy: + array_like = array_like.copy() + return array_like diff --git a/pandas/indexes/multi.py b/pandas/indexes/multi.py index 18e1da7303d6d..ec30d2c44efd7 100644 --- a/pandas/indexes/multi.py +++ b/pandas/indexes/multi.py @@ -28,7 +28,6 @@ UnsortedIndexError) -from pandas.core.base import FrozenList import pandas.core.base as base from pandas.util.decorators import (Appender, cache_readonly, deprecate, deprecate_kwarg) @@ -39,9 +38,10 @@ from pandas.core.config import get_option -from pandas.indexes.base import (Index, _ensure_index, _ensure_frozen, +from pandas.indexes.base import (Index, _ensure_index, _get_na_value, InvalidIndexError, _index_shared_docs) +from pandas.indexes.frozen import FrozenNDArray, FrozenList, _ensure_frozen import pandas.indexes.base as ibase _index_doc_kwargs = dict(ibase._index_doc_kwargs) _index_doc_kwargs.update( @@ -1276,7 +1276,7 @@ def _assert_take_fillable(self, values, indices, allow_fill=True, for new_label in taken: label_values = new_label.values() label_values[mask] = na_value - masked.append(base.FrozenNDArray(label_values)) + masked.append(FrozenNDArray(label_values)) taken = masked else: taken = [lab.take(indices) for lab in self.labels] diff --git a/pandas/tests/indexes/test_frozen.py b/pandas/tests/indexes/test_frozen.py new file mode 100644 index 0000000000000..a82409fbf9513 --- /dev/null +++ b/pandas/tests/indexes/test_frozen.py @@ -0,0 +1,68 @@ +import numpy as np +from pandas.util import testing as tm +from pandas.tests.test_base import CheckImmutable, CheckStringMixin +from pandas.indexes.frozen import FrozenList, FrozenNDArray +from pandas.compat import u + + +class TestFrozenList(CheckImmutable, CheckStringMixin, tm.TestCase): + mutable_methods = ('extend', 'pop', 'remove', 'insert') + unicode_container = FrozenList([u("\u05d0"), u("\u05d1"), "c"]) + + def setUp(self): + self.lst = [1, 2, 3, 4, 5] + self.container = FrozenList(self.lst) + self.klass = FrozenList + + def test_add(self): + result = self.container + (1, 2, 3) + expected = FrozenList(self.lst + [1, 2, 3]) + self.check_result(result, expected) + + result = (1, 2, 3) + self.container + expected = FrozenList([1, 2, 3] + self.lst) + self.check_result(result, expected) + + def test_inplace(self): + q = r = self.container + q += [5] + self.check_result(q, self.lst + [5]) + # other shouldn't be mutated + self.check_result(r, self.lst) + + +class TestFrozenNDArray(CheckImmutable, CheckStringMixin, tm.TestCase): + mutable_methods = ('put', 'itemset', 'fill') + unicode_container = FrozenNDArray([u("\u05d0"), u("\u05d1"), "c"]) + + def setUp(self): + self.lst = [3, 5, 7, -2] + self.container = FrozenNDArray(self.lst) + self.klass = FrozenNDArray + + def test_shallow_copying(self): + original = self.container.copy() + self.assertIsInstance(self.container.view(), FrozenNDArray) + self.assertFalse(isinstance( + self.container.view(np.ndarray), FrozenNDArray)) + self.assertIsNot(self.container.view(), self.container) + self.assert_numpy_array_equal(self.container, original) + # shallow copy should be the same too + self.assertIsInstance(self.container._shallow_copy(), FrozenNDArray) + + # setting should not be allowed + def testit(container): + container[0] = 16 + + self.check_mutable_error(testit, self.container) + + def test_values(self): + original = self.container.view(np.ndarray).copy() + n = original[0] + 15 + vals = self.container.values() + self.assert_numpy_array_equal(original, vals) + self.assertIsNot(original, vals) + vals[0] = n + self.assertIsInstance(self.container, FrozenNDArray) + self.assert_numpy_array_equal(self.container.values(), original) + self.assertEqual(vals[0], n) diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py index 473f1d81c9532..8264ad33950f9 100644 --- a/pandas/tests/test_base.py +++ b/pandas/tests/test_base.py @@ -14,10 +14,9 @@ import pandas.util.testing as tm from pandas import (Series, Index, DatetimeIndex, TimedeltaIndex, PeriodIndex, Timedelta) -from pandas.compat import u, StringIO +from pandas.compat import StringIO from pandas.compat.numpy import np_array_datetime64_compat -from pandas.core.base import (FrozenList, FrozenNDArray, PandasDelegate, - NoNewAttributesMixin) +from pandas.core.base import PandasDelegate, NoNewAttributesMixin from pandas.tseries.base import DatetimeIndexOpsMixin @@ -83,69 +82,6 @@ def check_result(self, result, expected, klass=None): self.assertEqual(result, expected) -class TestFrozenList(CheckImmutable, CheckStringMixin, tm.TestCase): - mutable_methods = ('extend', 'pop', 'remove', 'insert') - unicode_container = FrozenList([u("\u05d0"), u("\u05d1"), "c"]) - - def setUp(self): - self.lst = [1, 2, 3, 4, 5] - self.container = FrozenList(self.lst) - self.klass = FrozenList - - def test_add(self): - result = self.container + (1, 2, 3) - expected = FrozenList(self.lst + [1, 2, 3]) - self.check_result(result, expected) - - result = (1, 2, 3) + self.container - expected = FrozenList([1, 2, 3] + self.lst) - self.check_result(result, expected) - - def test_inplace(self): - q = r = self.container - q += [5] - self.check_result(q, self.lst + [5]) - # other shouldn't be mutated - self.check_result(r, self.lst) - - -class TestFrozenNDArray(CheckImmutable, CheckStringMixin, tm.TestCase): - mutable_methods = ('put', 'itemset', 'fill') - unicode_container = FrozenNDArray([u("\u05d0"), u("\u05d1"), "c"]) - - def setUp(self): - self.lst = [3, 5, 7, -2] - self.container = FrozenNDArray(self.lst) - self.klass = FrozenNDArray - - def test_shallow_copying(self): - original = self.container.copy() - self.assertIsInstance(self.container.view(), FrozenNDArray) - self.assertFalse(isinstance( - self.container.view(np.ndarray), FrozenNDArray)) - self.assertIsNot(self.container.view(), self.container) - self.assert_numpy_array_equal(self.container, original) - # shallow copy should be the same too - self.assertIsInstance(self.container._shallow_copy(), FrozenNDArray) - - # setting should not be allowed - def testit(container): - container[0] = 16 - - self.check_mutable_error(testit, self.container) - - def test_values(self): - original = self.container.view(np.ndarray).copy() - n = original[0] + 15 - vals = self.container.values() - self.assert_numpy_array_equal(original, vals) - self.assertIsNot(original, vals) - vals[0] = n - self.assertIsInstance(self.container, pd.core.base.FrozenNDArray) - self.assert_numpy_array_equal(self.container.values(), original) - self.assertEqual(vals[0], n) - - class TestPandasDelegate(tm.TestCase): class Delegator(object): From 924c16667ee3db5d025c0963f99a778de8aad398 Mon Sep 17 00:00:00 2001 From: Fumito Hamamura Date: Sat, 25 Feb 2017 04:53:43 +0900 Subject: [PATCH 096/933] DOC: Fix to docstrings of is_type_factory and is_instance_factory (#15499) Closes #15485 --- pandas/core/config.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/core/config.py b/pandas/core/config.py index 1c0eb60b8ec2f..39ed2f9545266 100644 --- a/pandas/core/config.py +++ b/pandas/core/config.py @@ -747,8 +747,8 @@ def is_type_factory(_type): Returns ------- - validator - a function of a single argument x , which returns the - True if type(x) is equal to `_type` + validator - a function of a single argument x , which raises + ValueError if type(x) is not equal to `_type` """ @@ -768,8 +768,8 @@ def is_instance_factory(_type): Returns ------- - validator - a function of a single argument x , which returns the - True if x is an instance of `_type` + validator - a function of a single argument x , which raises + ValueError if x is not an instance of `_type` """ if isinstance(_type, (tuple, list)): From 3fe85afef47e9e079a0fa24f826bb6faaa2341d5 Mon Sep 17 00:00:00 2001 From: Prasanjit Prakash Date: Fri, 24 Feb 2017 14:56:09 -0500 Subject: [PATCH 097/933] BUG: incorrect ranking in an ordered categorical check for categorical, and then pass the underlying integer codes. closes #15420 Author: Prasanjit Prakash Closes #15422 from ikilledthecat/rank_categorical and squashes the following commits: a7e573b [Prasanjit Prakash] moved test for categorical, in rank, to top 3ba4e3a [Prasanjit Prakash] corrections after rebasing c43a029 [Prasanjit Prakash] using if/else construct to pick sorting function for categoricals f8ec019 [Prasanjit Prakash] ask Categorical for ranking function 40d88c1 [Prasanjit Prakash] return values for rank from categorical object 049c0fc [Prasanjit Prakash] GH#15420 added support for na_option when ranking categorical 5e5bbeb [Prasanjit Prakash] BUG: GH#15420 rank for categoricals ef999c3 [Prasanjit Prakash] merged with upstream master fbaba1b [Prasanjit Prakash] return values for rank from categorical object fa0b4c2 [Prasanjit Prakash] BUG: GH15420 - _rank private method on Categorical 9a6b5cd [Prasanjit Prakash] BUG: GH15420 - _rank private method on Categorical 4220e56 [Prasanjit Prakash] BUG: GH15420 - _rank private method on Categorical 6b70921 [Prasanjit Prakash] GH#15420 move rank inside categoricals bf4e36c [Prasanjit Prakash] GH#15420 added support for na_option when ranking categorical ce90207 [Prasanjit Prakash] BUG: GH#15420 rank for categoricals 85b267a [Prasanjit Prakash] Added support for categorical datatype in rank - issue#15420 --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/core/algorithms.py | 5 +- pandas/core/categorical.py | 22 ++++++++ pandas/tests/series/test_analytics.py | 78 +++++++++++++++++++++++++++ 4 files changed, 105 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index fa24c973a7549..0b501adba5039 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -578,6 +578,7 @@ Bug Fixes +- Bug in ``.rank()`` which incorrectly ranks ordered categories (:issue:`15420`) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 4ae46fe33a5cc..b11927a80fb2e 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -973,6 +973,10 @@ def _hashtable_algo(f, values, return_dtype=None): def _get_data_algo(values, func_map): f = None + + if is_categorical_dtype(values): + values = values._values_for_rank() + if is_float_dtype(values): f = func_map['float64'] values = _ensure_float64(values) @@ -988,7 +992,6 @@ def _get_data_algo(values, func_map): elif is_unsigned_integer_dtype(values): f = func_map['uint64'] values = _ensure_uint64(values) - else: values = _ensure_object(values) diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index b6898f11ffa74..b88a6b171b316 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -1404,6 +1404,28 @@ def sort_values(self, inplace=False, ascending=True, na_position='last'): return self._constructor(values=codes, categories=self.categories, ordered=self.ordered, fastpath=True) + def _values_for_rank(self): + """ + For correctly ranking ordered categorical data. See GH#15420 + + Ordered categorical data should be ranked on the basis of + codes with -1 translated to NaN. + + Returns + ------- + numpy array + + """ + if self.ordered: + values = self.codes + mask = values == -1 + if mask.any(): + values = values.astype('float64') + values[mask] = np.nan + else: + values = np.array(self) + return values + def order(self, inplace=False, ascending=True, na_position='last'): """ DEPRECATED: use :meth:`Categorical.sort_values`. That function diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index 222165e9d3633..b092e4f084767 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -1057,6 +1057,84 @@ def test_rank(self): iranks = iseries.rank() assert_series_equal(iranks, exp) + def test_rank_categorical(self): + # GH issue #15420 rank incorrectly orders ordered categories + + # Test ascending/descending ranking for ordered categoricals + exp = pd.Series([1., 2., 3., 4., 5., 6.]) + exp_desc = pd.Series([6., 5., 4., 3., 2., 1.]) + ordered = pd.Series( + ['first', 'second', 'third', 'fourth', 'fifth', 'sixth'] + ).astype('category', ).cat.set_categories( + ['first', 'second', 'third', 'fourth', 'fifth', 'sixth'], + ordered=True + ) + assert_series_equal(ordered.rank(), exp) + assert_series_equal(ordered.rank(ascending=False), exp_desc) + + # Unordered categoricals should be ranked as objects + unordered = pd.Series( + ['first', 'second', 'third', 'fourth', 'fifth', 'sixth'], + ).astype('category').cat.set_categories( + ['first', 'second', 'third', 'fourth', 'fifth', 'sixth'], + ordered=False + ) + exp_unordered = pd.Series([2., 4., 6., 3., 1., 5.]) + res = unordered.rank() + assert_series_equal(res, exp_unordered) + + # Test na_option for rank data + na_ser = pd.Series( + ['first', 'second', 'third', 'fourth', 'fifth', 'sixth', np.NaN] + ).astype('category', ).cat.set_categories( + [ + 'first', 'second', 'third', 'fourth', + 'fifth', 'sixth', 'seventh' + ], + ordered=True + ) + + exp_top = pd.Series([2., 3., 4., 5., 6., 7., 1.]) + exp_bot = pd.Series([1., 2., 3., 4., 5., 6., 7.]) + exp_keep = pd.Series([1., 2., 3., 4., 5., 6., np.NaN]) + + assert_series_equal(na_ser.rank(na_option='top'), exp_top) + assert_series_equal(na_ser.rank(na_option='bottom'), exp_bot) + assert_series_equal(na_ser.rank(na_option='keep'), exp_keep) + + # Test na_option for rank data with ascending False + exp_top = pd.Series([7., 6., 5., 4., 3., 2., 1.]) + exp_bot = pd.Series([6., 5., 4., 3., 2., 1., 7.]) + exp_keep = pd.Series([6., 5., 4., 3., 2., 1., np.NaN]) + + assert_series_equal( + na_ser.rank(na_option='top', ascending=False), + exp_top + ) + assert_series_equal( + na_ser.rank(na_option='bottom', ascending=False), + exp_bot + ) + assert_series_equal( + na_ser.rank(na_option='keep', ascending=False), + exp_keep + ) + + # Test with pct=True + na_ser = pd.Series( + ['first', 'second', 'third', 'fourth', np.NaN], + ).astype('category').cat.set_categories( + ['first', 'second', 'third', 'fourth'], + ordered=True + ) + exp_top = pd.Series([0.4, 0.6, 0.8, 1., 0.2]) + exp_bot = pd.Series([0.2, 0.4, 0.6, 0.8, 1.]) + exp_keep = pd.Series([0.25, 0.5, 0.75, 1., np.NaN]) + + assert_series_equal(na_ser.rank(na_option='top', pct=True), exp_top) + assert_series_equal(na_ser.rank(na_option='bottom', pct=True), exp_bot) + assert_series_equal(na_ser.rank(na_option='keep', pct=True), exp_keep) + def test_rank_signature(self): s = Series([0, 1]) s.rank(method='average') From 7e0a71b02d77a8efbadf2e8c804dbff59639061e Mon Sep 17 00:00:00 2001 From: gfyoung Date: Fri, 24 Feb 2017 14:59:04 -0500 Subject: [PATCH 098/933] BUG: Accept Generic Array-Like for .where Author: gfyoung Closes #15414 from gfyoung/generic-where-gen-array and squashes the following commits: 5037932 [gfyoung] BUG: Accept generic array-like in .where --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/core/generic.py | 42 +++++----- pandas/indexes/base.py | 2 +- pandas/tests/frame/test_indexing.py | 89 ++++++++++++++++++++++ pandas/tests/indexes/common.py | 12 +++ pandas/tests/indexes/period/test_period.py | 11 ++- pandas/tests/indexes/test_category.py | 12 ++- pandas/tests/indexes/test_multi.py | 9 +++ pandas/tests/series/test_indexing.py | 54 +++++++++++++ 9 files changed, 211 insertions(+), 21 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 0b501adba5039..4b3a65780f939 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -548,6 +548,7 @@ Bug Fixes +- Bug in ``Series.where()`` and ``DataFrame.where()`` where array-like conditionals were being rejected (:issue:`15414`) - Bug in ``Series`` construction with a datetimetz (:issue:`14928`) - Bug in compat for passing long integers to ``Timestamp.replace`` (:issue:`15030`) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 76fbb9884753d..921fa2fb1bd48 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -4726,25 +4726,37 @@ def _where(self, cond, other=np.nan, inplace=False, axis=None, level=None, """ inplace = validate_bool_kwarg(inplace, 'inplace') + # align the cond to same shape as myself cond = com._apply_if_callable(cond, self) - if isinstance(cond, NDFrame): cond, _ = cond.align(self, join='right', broadcast_axis=1) else: if not hasattr(cond, 'shape'): - raise ValueError('where requires an ndarray like object for ' - 'its condition') + cond = np.asanyarray(cond) if cond.shape != self.shape: raise ValueError('Array conditional must be same shape as ' 'self') cond = self._constructor(cond, **self._construct_axes_dict()) - if inplace: - cond = -(cond.fillna(True).astype(bool)) + # make sure we are boolean + fill_value = True if inplace else False + cond = cond.fillna(fill_value) + + msg = "Boolean array expected for the condition, not {dtype}" + + if not isinstance(cond, pd.DataFrame): + # This is a single-dimensional object. + if not is_bool_dtype(cond): + raise ValueError(msg.format(dtype=cond.dtype)) else: - cond = cond.fillna(False).astype(bool) + for dt in cond.dtypes: + if not is_bool_dtype(dt): + raise ValueError(msg.format(dtype=dt)) - # try to align + cond = cond.astype(bool, copy=False) + cond = -cond if inplace else cond + + # try to align with other try_quick = True if hasattr(other, 'align'): @@ -4891,26 +4903,20 @@ def _where(self, cond, other=np.nan, inplace=False, axis=None, level=None, Parameters ---------- - cond : boolean %(klass)s, array or callable + cond : boolean %(klass)s, array-like, or callable If cond is callable, it is computed on the %(klass)s and - should return boolean %(klass)s or array. - The callable must not change input %(klass)s - (though pandas doesn't check it). + should return boolean %(klass)s or array. The callable must + not change input %(klass)s (though pandas doesn't check it). .. versionadded:: 0.18.1 - A callable can be used as cond. - other : scalar, %(klass)s, or callable If other is callable, it is computed on the %(klass)s and - should return scalar or %(klass)s. - The callable must not change input %(klass)s - (though pandas doesn't check it). + should return scalar or %(klass)s. The callable must not + change input %(klass)s (though pandas doesn't check it). .. versionadded:: 0.18.1 - A callable can be used as other. - inplace : boolean, default False Whether to perform the operation in place on the data axis : alignment axis if needed, default None diff --git a/pandas/indexes/base.py b/pandas/indexes/base.py index 4837fc0d7438c..dcbcccdfcd610 100644 --- a/pandas/indexes/base.py +++ b/pandas/indexes/base.py @@ -573,7 +573,7 @@ def repeat(self, repeats, *args, **kwargs): Parameters ---------- - cond : boolean same length as self + cond : boolean array-like with the same length as self other : scalar, or array-like """ diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py index c06faa75ed346..18fb17b98570a 100644 --- a/pandas/tests/frame/test_indexing.py +++ b/pandas/tests/frame/test_indexing.py @@ -2479,6 +2479,95 @@ def _check_set(df, cond, check_dtypes=True): expected = df[df['a'] == 1].reindex(df.index) assert_frame_equal(result, expected) + def test_where_array_like(self): + # see gh-15414 + klasses = [list, tuple, np.array] + + df = DataFrame({'a': [1, 2, 3]}) + cond = [[False], [True], [True]] + expected = DataFrame({'a': [np.nan, 2, 3]}) + + for klass in klasses: + result = df.where(klass(cond)) + assert_frame_equal(result, expected) + + df['b'] = 2 + expected['b'] = [2, np.nan, 2] + cond = [[False, True], [True, False], [True, True]] + + for klass in klasses: + result = df.where(klass(cond)) + assert_frame_equal(result, expected) + + def test_where_invalid_input(self): + # see gh-15414: only boolean arrays accepted + df = DataFrame({'a': [1, 2, 3]}) + msg = "Boolean array expected for the condition" + + conds = [ + [[1], [0], [1]], + Series([[2], [5], [7]]), + DataFrame({'a': [2, 5, 7]}), + [["True"], ["False"], ["True"]], + [[Timestamp("2017-01-01")], + [pd.NaT], [Timestamp("2017-01-02")]] + ] + + for cond in conds: + with tm.assertRaisesRegexp(ValueError, msg): + df.where(cond) + + df['b'] = 2 + conds = [ + [[0, 1], [1, 0], [1, 1]], + Series([[0, 2], [5, 0], [4, 7]]), + [["False", "True"], ["True", "False"], + ["True", "True"]], + DataFrame({'a': [2, 5, 7], 'b': [4, 8, 9]}), + [[pd.NaT, Timestamp("2017-01-01")], + [Timestamp("2017-01-02"), pd.NaT], + [Timestamp("2017-01-03"), Timestamp("2017-01-03")]] + ] + + for cond in conds: + with tm.assertRaisesRegexp(ValueError, msg): + df.where(cond) + + def test_where_dataframe_col_match(self): + df = DataFrame([[1, 2, 3], [4, 5, 6]]) + cond = DataFrame([[True, False, True], [False, False, True]]) + + out = df.where(cond) + expected = DataFrame([[1.0, np.nan, 3], [np.nan, np.nan, 6]]) + tm.assert_frame_equal(out, expected) + + cond.columns = ["a", "b", "c"] # Columns no longer match. + msg = "Boolean array expected for the condition" + with tm.assertRaisesRegexp(ValueError, msg): + df.where(cond) + + def test_where_ndframe_align(self): + msg = "Array conditional must be same shape as self" + df = DataFrame([[1, 2, 3], [4, 5, 6]]) + + cond = [True] + with tm.assertRaisesRegexp(ValueError, msg): + df.where(cond) + + expected = DataFrame([[1, 2, 3], [np.nan, np.nan, np.nan]]) + + out = df.where(Series(cond)) + tm.assert_frame_equal(out, expected) + + cond = np.array([False, True, False, True]) + with tm.assertRaisesRegexp(ValueError, msg): + df.where(cond) + + expected = DataFrame([[np.nan, np.nan, np.nan], [4, 5, 6]]) + + out = df.where(Series(cond)) + tm.assert_frame_equal(out, expected) + def test_where_bug(self): # GH 2793 diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index 81ad0524807f3..7b39a33266ffa 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -497,6 +497,18 @@ def test_where(self): result = i.where(cond) tm.assert_index_equal(result, expected) + def test_where_array_like(self): + i = self.create_index() + + _nan = i._na_value + cond = [False] + [True] * (len(i) - 1) + klasses = [list, tuple, np.array, pd.Series] + expected = pd.Index([_nan] + i[1:].tolist(), dtype=i.dtype) + + for klass in klasses: + result = i.where(klass(cond)) + tm.assert_index_equal(result, expected) + def test_setops_errorcases(self): for name, idx in compat.iteritems(self.indices): # # non-iterable input diff --git a/pandas/tests/indexes/period/test_period.py b/pandas/tests/indexes/period/test_period.py index 6a8128bb8985f..b80ab6feeeb23 100644 --- a/pandas/tests/indexes/period/test_period.py +++ b/pandas/tests/indexes/period/test_period.py @@ -89,13 +89,22 @@ def test_where(self): expected = i tm.assert_index_equal(result, expected) - i2 = i.copy() i2 = pd.PeriodIndex([pd.NaT, pd.NaT] + i[2:].tolist(), freq='D') result = i.where(notnull(i2)) expected = i2 tm.assert_index_equal(result, expected) + def test_where_array_like(self): + i = self.create_index() + cond = [False] + [True] * (len(i) - 1) + klasses = [list, tuple, np.array, Series] + expected = pd.PeriodIndex([pd.NaT] + i[1:].tolist(), freq='D') + + for klass in klasses: + result = i.where(klass(cond)) + tm.assert_index_equal(result, expected) + def test_where_other(self): i = self.create_index() diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py index 6b6885c082533..64a0e71bd5ace 100644 --- a/pandas/tests/indexes/test_category.py +++ b/pandas/tests/indexes/test_category.py @@ -240,13 +240,23 @@ def test_where(self): expected = i tm.assert_index_equal(result, expected) - i2 = i.copy() i2 = pd.CategoricalIndex([np.nan, np.nan] + i[2:].tolist(), categories=i.categories) result = i.where(notnull(i2)) expected = i2 tm.assert_index_equal(result, expected) + def test_where_array_like(self): + i = self.create_index() + cond = [False] + [True] * (len(i) - 1) + klasses = [list, tuple, np.array, pd.Series] + expected = pd.CategoricalIndex([np.nan] + i[1:].tolist(), + categories=i.categories) + + for klass in klasses: + result = i.where(klass(cond)) + tm.assert_index_equal(result, expected) + def test_append(self): ci = self.create_index() diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index 5611492b4af1b..80ff67ab3d043 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -88,6 +88,15 @@ def f(): self.assertRaises(NotImplementedError, f) + def test_where_array_like(self): + i = MultiIndex.from_tuples([('A', 1), ('A', 2)]) + klasses = [list, tuple, np.array, pd.Series] + cond = [False, True] + + for klass in klasses: + f = lambda: i.where(klass(cond)) + self.assertRaises(NotImplementedError, f) + def test_repeat(self): reps = 2 numbers = [1, 2, 3] diff --git a/pandas/tests/series/test_indexing.py b/pandas/tests/series/test_indexing.py index a20cb8324d2a3..8a2cc53b42938 100644 --- a/pandas/tests/series/test_indexing.py +++ b/pandas/tests/series/test_indexing.py @@ -1193,6 +1193,60 @@ def f(): expected = Series(np.nan, index=[9]) assert_series_equal(result, expected) + def test_where_array_like(self): + # see gh-15414 + s = Series([1, 2, 3]) + cond = [False, True, True] + expected = Series([np.nan, 2, 3]) + klasses = [list, tuple, np.array, Series] + + for klass in klasses: + result = s.where(klass(cond)) + assert_series_equal(result, expected) + + def test_where_invalid_input(self): + # see gh-15414: only boolean arrays accepted + s = Series([1, 2, 3]) + msg = "Boolean array expected for the condition" + + conds = [ + [1, 0, 1], + Series([2, 5, 7]), + ["True", "False", "True"], + [Timestamp("2017-01-01"), + pd.NaT, Timestamp("2017-01-02")] + ] + + for cond in conds: + with tm.assertRaisesRegexp(ValueError, msg): + s.where(cond) + + msg = "Array conditional must be same shape as self" + with tm.assertRaisesRegexp(ValueError, msg): + s.where([True]) + + def test_where_ndframe_align(self): + msg = "Array conditional must be same shape as self" + s = Series([1, 2, 3]) + + cond = [True] + with tm.assertRaisesRegexp(ValueError, msg): + s.where(cond) + + expected = Series([1, np.nan, np.nan]) + + out = s.where(Series(cond)) + tm.assert_series_equal(out, expected) + + cond = np.array([False, True, False, True]) + with tm.assertRaisesRegexp(ValueError, msg): + s.where(cond) + + expected = Series([np.nan, 2, np.nan]) + + out = s.where(Series(cond)) + tm.assert_series_equal(out, expected) + def test_where_setitem_invalid(self): # GH 2702 From 595580464a256fb883e8baa5b6e62f2013f0cf1a Mon Sep 17 00:00:00 2001 From: Dr-Irv Date: Fri, 24 Feb 2017 15:07:25 -0500 Subject: [PATCH 099/933] BUG: GH #12223, GH #15262. Allow ints for names in MultiIndex closes #12223 closes #15262 Author: Dr-Irv Closes #15478 from Dr-Irv/Issue15262 and squashes the following commits: 15d8433 [Dr-Irv] Address jreback comments 10667a3 [Dr-Irv] Fix types for test 8935068 [Dr-Irv] resolve conflicts 385ca3e [Dr-Irv] BUG: GH #12223, GH #15262. Allow ints for names in MultiIndex --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/core/frame.py | 6 +++--- pandas/core/groupby.py | 6 +++--- pandas/core/reshape.py | 3 ++- pandas/formats/format.py | 2 +- pandas/indexes/base.py | 10 ++++++---- pandas/indexes/multi.py | 14 ++++++++------ pandas/io/sql.py | 2 +- pandas/tests/frame/test_combine_concat.py | 18 ++++++++++++++++++ pandas/util/doctools.py | 6 +++--- 10 files changed, 46 insertions(+), 22 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 4b3a65780f939..7426b5ca2a69d 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -550,6 +550,7 @@ Bug Fixes - Bug in ``Series.where()`` and ``DataFrame.where()`` where array-like conditionals were being rejected (:issue:`15414`) - Bug in ``Series`` construction with a datetimetz (:issue:`14928`) +- Bug in output formatting of a ``MultiIndex`` when names are integers (:issue:`12223`, :issue:`15262`) - Bug in compat for passing long integers to ``Timestamp.replace`` (:issue:`15030`) - Bug in ``.loc`` that would not return the correct dtype for scalar access for a DataFrame (:issue:`11617`) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index bfef2cfbd0d51..ce3481fc17c5b 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2876,7 +2876,7 @@ def set_index(self, keys, drop=True, append=False, inplace=False, names = [x for x in self.index.names] if isinstance(self.index, MultiIndex): for i in range(self.index.nlevels): - arrays.append(self.index.get_level_values(i)) + arrays.append(self.index._get_level_values(i)) else: arrays.append(self.index) @@ -2886,9 +2886,9 @@ def set_index(self, keys, drop=True, append=False, inplace=False, # append all but the last column so we don't have to modify # the end of this loop for n in range(col.nlevels - 1): - arrays.append(col.get_level_values(n)) + arrays.append(col._get_level_values(n)) - level = col.get_level_values(col.nlevels - 1) + level = col._get_level_values(col.nlevels - 1) names.extend(col.names) elif isinstance(col, Series): level = col._values diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 0b3fcba1c1ba5..831ca3886773e 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -291,8 +291,8 @@ def _set_grouper(self, obj, sort=False): # equivalent to the axis name if isinstance(ax, MultiIndex): level = ax._get_level_number(level) - ax = Index(ax.get_level_values( - level), name=ax.names[level]) + ax = Index(ax._get_level_values(level), + name=ax.names[level]) else: if level not in (0, ax.name): @@ -761,7 +761,7 @@ def _index_with_as_index(self, b): gp = self.grouper levels = chain((gp.levels[i][gp.labels[i][b]] for i in range(len(gp.groupings))), - (original.get_level_values(i)[b] + (original._get_level_values(i)[b] for i in range(original.nlevels))) new = MultiIndex.from_arrays(list(levels)) new.names = gp.names + original.names diff --git a/pandas/core/reshape.py b/pandas/core/reshape.py index 5fc0d590a6885..87cb088c2e91e 100644 --- a/pandas/core/reshape.py +++ b/pandas/core/reshape.py @@ -811,7 +811,8 @@ def melt(frame, id_vars=None, value_vars=None, var_name=None, mdata[value_name] = frame.values.ravel('F') for i, col in enumerate(var_name): # asanyarray will keep the columns as an Index - mdata[col] = np.asanyarray(frame.columns.get_level_values(i)).repeat(N) + mdata[col] = np.asanyarray(frame.columns + ._get_level_values(i)).repeat(N) return DataFrame(mdata, columns=mcolumns) diff --git a/pandas/formats/format.py b/pandas/formats/format.py index 6b235b5e1bc33..4c081770e0125 100644 --- a/pandas/formats/format.py +++ b/pandas/formats/format.py @@ -1566,7 +1566,7 @@ def _save_header(self): if isinstance(index_label, list) and len(index_label) > 1: col_line.extend([''] * (len(index_label) - 1)) - col_line.extend(columns.get_level_values(i)) + col_line.extend(columns._get_level_values(i)) writer.writerow(col_line) diff --git a/pandas/indexes/base.py b/pandas/indexes/base.py index dcbcccdfcd610..5d43d2d32af67 100644 --- a/pandas/indexes/base.py +++ b/pandas/indexes/base.py @@ -2334,9 +2334,9 @@ def set_value(self, arr, key, value): self._engine.set_value(_values_from_object(arr), _values_from_object(key), value) - def get_level_values(self, level): + def _get_level_values(self, level): """ - Return vector of label values for requested level, equal to the length + Return an Index of values for requested level, equal to the length of the index Parameters @@ -2345,12 +2345,14 @@ def get_level_values(self, level): Returns ------- - values : ndarray + values : Index """ - # checks that level number is actually just 1 + self._validate_index_level(level) return self + get_level_values = _get_level_values + _index_shared_docs['get_indexer'] = """ Compute indexer and mask for new index given the current index. The indexer should be then used as an input to ndarray.take to align the diff --git a/pandas/indexes/multi.py b/pandas/indexes/multi.py index ec30d2c44efd7..23a42265a149b 100644 --- a/pandas/indexes/multi.py +++ b/pandas/indexes/multi.py @@ -684,7 +684,7 @@ def is_monotonic_increasing(self): """ # reversed() because lexsort() wants the most significant key last. - values = [self._get_level_values(i) + values = [self._get_level_values(i).values for i in reversed(range(len(self.levels)))] try: sort_order = np.lexsort(values) @@ -866,7 +866,8 @@ def _get_level_values(self, level): labels = self.labels[level] filled = algos.take_1d(unique._values, labels, fill_value=unique._na_value) - return filled + values = unique._shallow_copy(filled) + return values def get_level_values(self, level): """ @@ -883,7 +884,7 @@ def get_level_values(self, level): """ level = self._get_level_number(level) values = self._get_level_values(level) - return self.levels[level]._shallow_copy(values) + return values def format(self, space=2, sparsify=None, adjoin=True, names=False, na_rep=None, formatter=None): @@ -966,7 +967,8 @@ def to_frame(self, index=True): """ from pandas import DataFrame - result = DataFrame({(name or level): self.get_level_values(level) + result = DataFrame({(name or level): + self._get_level_values(level) for name, level in zip(self.names, range(len(self.levels)))}, copy=False) @@ -1301,8 +1303,8 @@ def append(self, other): for o in other): arrays = [] for i in range(self.nlevels): - label = self.get_level_values(i) - appended = [o.get_level_values(i) for o in other] + label = self._get_level_values(i) + appended = [o._get_level_values(i) for o in other] arrays.append(label.append(appended)) return MultiIndex.from_arrays(arrays, names=self.names) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index bace43e785dff..2ab642b3af0c7 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -749,7 +749,7 @@ def _get_column_names_and_types(self, dtype_mapper): if self.index is not None: for i, idx_label in enumerate(self.index): idx_type = dtype_mapper( - self.frame.index.get_level_values(i)) + self.frame.index._get_level_values(i)) column_names_and_types.append((text_type(idx_label), idx_type, True)) diff --git a/pandas/tests/frame/test_combine_concat.py b/pandas/tests/frame/test_combine_concat.py index eed4d6261d6e8..6f06a55ad065e 100644 --- a/pandas/tests/frame/test_combine_concat.py +++ b/pandas/tests/frame/test_combine_concat.py @@ -422,6 +422,24 @@ def test_concat_axis_parameter(self): with assertRaisesRegexp(ValueError, 'No axis named'): pd.concat([series1, series2], axis='something') + def test_concat_numerical_names(self): + # #15262 # #12223 + df = pd.DataFrame({'col': range(9)}, + dtype='int32', + index=(pd.MultiIndex + .from_product([['A0', 'A1', 'A2'], + ['B0', 'B1', 'B2']], + names=[1, 2]))) + result = pd.concat((df.iloc[:2, :], df.iloc[-2:, :])) + expected = pd.DataFrame({'col': [0, 1, 7, 8]}, + dtype='int32', + index=pd.MultiIndex.from_tuples([('A0', 'B0'), + ('A0', 'B1'), + ('A2', 'B1'), + ('A2', 'B2')], + names=[1, 2])) + tm.assert_frame_equal(result, expected) + class TestDataFrameCombineFirst(tm.TestCase, TestData): diff --git a/pandas/util/doctools.py b/pandas/util/doctools.py index 62dcba1405581..6df6444aeafab 100644 --- a/pandas/util/doctools.py +++ b/pandas/util/doctools.py @@ -113,12 +113,12 @@ def _insert_index(self, data): else: for i in range(idx_nlevels): data.insert(i, 'Index{0}'.format(i), - data.index.get_level_values(i)) + data.index._get_level_values(i)) col_nlevels = data.columns.nlevels if col_nlevels > 1: - col = data.columns.get_level_values(0) - values = [data.columns.get_level_values(i).values + col = data.columns._get_level_values(0) + values = [data.columns._get_level_values(i).values for i in range(1, col_nlevels)] col_df = pd.DataFrame(values) data.columns = col_df.columns From d80275dfaa6a8ad50bc49dbaef9eacd5509008dc Mon Sep 17 00:00:00 2001 From: Arco Bast Date: Fri, 24 Feb 2017 15:37:18 -0500 Subject: [PATCH 100/933] BUG: msgpack supports CategoricalIndex closes #15487 Author: Arco Bast Closes #15493 from abast/CategoricalIndex_msgpack and squashes the following commits: c1c68e4 [Arco Bast] corrections 3c1f2e7 [Arco Bast] whatsnew 215c2aa [Arco Bast] improve tests cd9354f [Arco Bast] improve tests 7895c16 [Arco Bast] flake8 f3f492a [Arco Bast] fix test 91d85cb [Arco Bast] msgpack supports CategoricalIndex --- doc/source/whatsnew/v0.20.0.txt | 2 ++ pandas/io/packers.py | 2 +- pandas/tests/io/test_packers.py | 8 ++++++++ 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 7426b5ca2a69d..c94429b469641 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -615,6 +615,7 @@ Bug Fixes - Bug in ``DataFrame.fillna()`` where the argument ``downcast`` was ignored when fillna value was of type ``dict`` (:issue:`15277`) - Bug in ``.reset_index()`` when an all ``NaN`` level of a ``MultiIndex`` would fail (:issue:`6322`) +- Bug in ``pd.read_msgpack`` when deserializing a ``CategoricalIndex`` (:issue:`15487`) - Bug in ``pd.read_csv()`` with ``float_precision='round_trip'`` which caused a segfault when a text entry is parsed (:issue:`15140`) @@ -630,3 +631,4 @@ Bug Fixes - Bug in ``Series.replace`` and ``DataFrame.replace`` which failed on empty replacement dicts (:issue:`15289`) - Bug in ``pd.melt()`` where passing a tuple value for ``value_vars`` caused a ``TypeError`` (:issue:`15348`) - Bug in ``.eval()`` which caused multiline evals to fail with local variables not on the first line (:issue:`15342`) +- Bug in ``pd.read_msgpack`` which did not allow to load dataframe with an index of type ``CategoricalIndex`` (:issue:`15487`) diff --git a/pandas/io/packers.py b/pandas/io/packers.py index 3f4be6ad459d8..7afe8a06b6af1 100644 --- a/pandas/io/packers.py +++ b/pandas/io/packers.py @@ -54,7 +54,7 @@ from pandas import (Timestamp, Period, Series, DataFrame, # noqa Index, MultiIndex, Float64Index, Int64Index, Panel, RangeIndex, PeriodIndex, DatetimeIndex, NaT, - Categorical) + Categorical, CategoricalIndex) from pandas.tslib import NaTType from pandas.sparse.api import SparseSeries, SparseDataFrame from pandas.sparse.array import BlockIndex, IntIndex diff --git a/pandas/tests/io/test_packers.py b/pandas/tests/io/test_packers.py index 097c03937ca68..251c6ae8b4dec 100644 --- a/pandas/tests/io/test_packers.py +++ b/pandas/tests/io/test_packers.py @@ -311,6 +311,7 @@ def setUp(self): 'period': Index(period_range('2012-1-1', freq='M', periods=3)), 'date2': Index(date_range('2013-01-1', periods=10)), 'bdate': Index(bdate_range('2013-01-02', periods=10)), + 'cat': tm.makeCategoricalIndex(100) } self.mi = { @@ -349,6 +350,13 @@ def test_unicode(self): i_rec = self.encode_decode(i) self.assert_index_equal(i, i_rec) + def categorical_index(self): + # GH15487 + df = DataFrame(np.random.randn(10, 2)) + df = df.astype({0: 'category'}).set_index(0) + result = self.encode_decode(df) + tm.assert_frame_equal(result, df) + class TestSeries(TestPackers): From 303541eba0797f30c6f10084acbd522220cbc56a Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Sat, 25 Feb 2017 22:38:56 +0100 Subject: [PATCH 101/933] DOC: fix doc build warnings (#15505) --- doc/source/advanced.rst | 1 + doc/source/basics.rst | 2 +- doc/source/contributing.rst | 6 +++--- doc/source/install.rst | 2 +- doc/source/io.rst | 8 ++++---- doc/source/whatsnew/v0.20.0.txt | 2 +- pandas/core/generic.py | 1 + pandas/io/html.py | 2 +- pandas/io/json/normalize.py | 9 ++++----- pandas/io/parsers.py | 10 +++++----- 10 files changed, 22 insertions(+), 21 deletions(-) diff --git a/doc/source/advanced.rst b/doc/source/advanced.rst index b6f015c15606d..f380070ddac79 100644 --- a/doc/source/advanced.rst +++ b/doc/source/advanced.rst @@ -965,6 +965,7 @@ The different indexing operation can potentially change the dtype of a ``Series` res .. ipython:: python + series2 = pd.Series([True]) series2.dtype res = series2.reindex_like(series1) diff --git a/doc/source/basics.rst b/doc/source/basics.rst index f5f7c73223595..f649b3fd8a9a3 100644 --- a/doc/source/basics.rst +++ b/doc/source/basics.rst @@ -1889,7 +1889,7 @@ gotchas Performing selection operations on ``integer`` type data can easily upcast the data to ``floating``. The dtype of the input data will be preserved in cases where ``nans`` are not introduced (starting in 0.11.0) -See also :ref:`Support for integer ``NA`` ` +See also :ref:`Support for integer NA ` .. ipython:: python diff --git a/doc/source/contributing.rst b/doc/source/contributing.rst index 5c2bb9b73d618..2f838a3ab2386 100644 --- a/doc/source/contributing.rst +++ b/doc/source/contributing.rst @@ -461,7 +461,7 @@ C (cpplint) *pandas* uses the `Google `_ standard. Google provides an open source style checker called ``cpplint``, but we -use a fork of it that can be found `here `_. +use a fork of it that can be found `here `__. Here are *some* of the more common ``cpplint`` issues: - we restrict line-length to 80 characters to promote readability @@ -479,7 +479,7 @@ You can also run this command on an entire directory if necessary:: To make your commits compliant with this standard, you can install the `ClangFormat `_ tool, which can be -downloaded `here `_. To configure, in your home directory, +downloaded `here `__. To configure, in your home directory, run the following command:: clang-format style=google -dump-config > .clang-format @@ -611,7 +611,7 @@ Or with one of the following constructs:: pytest pandas/tests/[test-module].py::[TestClass] pytest pandas/tests/[test-module].py::[TestClass]::[test_method] -For more, see the `pytest`_ documentation. +For more, see the `pytest `_ documentation. .. versionadded:: 0.20.0 diff --git a/doc/source/install.rst b/doc/source/install.rst index 80a5d7e7d375b..8b0fec6a3dac3 100644 --- a/doc/source/install.rst +++ b/doc/source/install.rst @@ -282,7 +282,7 @@ Optional Dependencies okay.) * `BeautifulSoup4`_ and `lxml`_ * `BeautifulSoup4`_ and `html5lib`_ and `lxml`_ - * Only `lxml`_, although see :ref:`HTML Table Parsing ` + * Only `lxml`_, although see :ref:`HTML Table Parsing ` for reasons as to why you should probably **not** take this approach. .. warning:: diff --git a/doc/source/io.rst b/doc/source/io.rst index 55ef2c09d43e4..35e8b77782183 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -2043,7 +2043,7 @@ Reading HTML Content .. warning:: - We **highly encourage** you to read the :ref:`HTML Table Parsing gotchas` + We **highly encourage** you to read the :ref:`HTML Table Parsing gotchas ` below regarding the issues surrounding the BeautifulSoup4/html5lib/lxml parsers. .. versionadded:: 0.12.0 @@ -4681,7 +4681,7 @@ The key functions are: Supported Data Types -++++++++++++++++++++ +'''''''''''''''''''' Pandas supports all these `BigQuery data types `__: ``STRING``, ``INTEGER`` (64bit), ``FLOAT`` (64 bit), ``BOOLEAN`` and @@ -4689,7 +4689,7 @@ Pandas supports all these `BigQuery data types `. + HTML parsing libraries `. Expect to do some cleanup after you call this function. For example, you might need to manually assign column names if the column names are diff --git a/pandas/io/json/normalize.py b/pandas/io/json/normalize.py index d684441c5974d..f29472155da17 100644 --- a/pandas/io/json/normalize.py +++ b/pandas/io/json/normalize.py @@ -106,11 +106,10 @@ def json_normalize(data, record_path=None, meta=None, path to records is ['foo', 'bar'] meta_prefix : string, default None errors : {'raise', 'ignore'}, default 'raise' - - * ignore : will ignore KeyError if keys listed in meta are not - always present - * raise : will raise KeyError if keys listed in meta are not - always present + * 'ignore' : will ignore KeyError if keys listed in meta are not + always present + * 'raise' : will raise KeyError if keys listed in meta are not + always present .. versionadded:: 0.20.0 diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 88d0c6c12c04f..78c5247818970 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -181,7 +181,7 @@ If True and parse_dates is enabled, pandas will attempt to infer the format of the datetime strings in the columns, and if it can be inferred, switch to a faster method of parsing them. In some cases this can increase the - parsing speed by ~5-10x. + parsing speed by 5-10x. keep_date_col : boolean, default False If True and parse_dates specifies combining multiple columns then keep the original columns. @@ -200,10 +200,10 @@ Return TextFileReader object for iteration or getting chunks with ``get_chunk()``. chunksize : int, default None - Return TextFileReader object for iteration. `See IO Tools docs for more - information - `_ on - ``iterator`` and ``chunksize``. + Return TextFileReader object for iteration. + See the `IO Tools docs + `_ + for more information on ``iterator`` and ``chunksize``. compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None}, default 'infer' For on-the-fly decompression of on-disk data. If 'infer', then use gzip, bz2, zip or xz if filepath_or_buffer is a string ending in '.gz', '.bz2', From b3ae4c7698de4623e1279d579b46192ef79250d1 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Sat, 25 Feb 2017 17:26:06 -0500 Subject: [PATCH 102/933] DOC: Fix versionadded for cond in .where (#15509) [ci skip] --- pandas/core/generic.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 85c7130ca2827..cdc37e00f70e0 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -4910,6 +4910,7 @@ def _where(self, cond, other=np.nan, inplace=False, axis=None, level=None, not change input %(klass)s (though pandas doesn't check it). .. versionadded:: 0.18.1 + A callable can be used as cond. other : scalar, %(klass)s, or callable If other is callable, it is computed on the %(klass)s and @@ -4917,6 +4918,7 @@ def _where(self, cond, other=np.nan, inplace=False, axis=None, level=None, change input %(klass)s (though pandas doesn't check it). .. versionadded:: 0.18.1 + A callable can be used as other. inplace : boolean, default False Whether to perform the operation in place on the data From fb7dc7dcbde1d81dea28b1b83e1c3bd171a7e73d Mon Sep 17 00:00:00 2001 From: Stephen Rauch Date: Mon, 27 Feb 2017 09:39:27 -0500 Subject: [PATCH 103/933] BUG: Parse two date columns broken in read_csv with multiple headers In `io/parsers/_try_convert_dates()` when selecting columns based on a column index from a set of columns with multi- level names, the column `name` was converted to a string. This appears to be a bug since the `name` was a tuple before the conversion. This causes problems downstream when there is an attempt to use this name to lookup a column, and that lookup fails because the desired column is keyed from the tuple, not its string representation closes #15376 Author: Stephen Rauch Closes #15378 from stephenrauch/fix_read_csv_merge_datetime and squashes the following commits: 030f5ec [Stephen Rauch] BUG: Parse two date columns broken in read_csv with multiple headers --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/io/parsers.py | 2 +- pandas/tests/io/parser/parse_dates.py | 19 +++++++++++++++++++ 3 files changed, 21 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 123fc346441cb..be487e165c602 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -625,6 +625,7 @@ Bug Fixes +- Bug in ``.read_csv()`` with ``parse_dates`` when multiline headers are specified (:issue:`15376`) - Bug in ``DataFrame.boxplot`` where ``fontsize`` was not applied to the tick labels on both axes (:issue:`15108`) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 78c5247818970..811844ec35deb 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -2858,7 +2858,7 @@ def _try_convert_dates(parser, colspec, data_dict, columns): if c in colset: colnames.append(c) elif isinstance(c, int) and c not in columns: - colnames.append(str(columns[c])) + colnames.append(columns[c]) else: colnames.append(c) diff --git a/pandas/tests/io/parser/parse_dates.py b/pandas/tests/io/parser/parse_dates.py index 6197d07d4eafa..b1960159bb41d 100644 --- a/pandas/tests/io/parser/parse_dates.py +++ b/pandas/tests/io/parser/parse_dates.py @@ -18,6 +18,7 @@ import pandas.tseries.tools as tools import pandas.util.testing as tm +import pandas.io.date_converters as conv from pandas import DataFrame, Series, Index, DatetimeIndex from pandas import compat from pandas.compat import parse_date, StringIO, lrange @@ -491,3 +492,21 @@ def test_parse_dates_noconvert_thousands(self): result = self.read_csv(StringIO(data), index_col=[0, 1], parse_dates=True, thousands='.') tm.assert_frame_equal(result, expected) + + def test_parse_date_time_multi_level_column_name(self): + data = """\ +D,T,A,B +date, time,a,b +2001-01-05, 09:00:00, 0.0, 10. +2001-01-06, 00:00:00, 1.0, 11. +""" + datecols = {'date_time': [0, 1]} + result = self.read_csv(StringIO(data), sep=',', header=[0, 1], + parse_dates=datecols, + date_parser=conv.parse_date_time) + + expected_data = [[datetime(2001, 1, 5, 9, 0, 0), 0., 10.], + [datetime(2001, 1, 6, 0, 0, 0), 1., 11.]] + expected = DataFrame(expected_data, + columns=['date_time', ('A', 'a'), ('B', 'b')]) + tm.assert_frame_equal(result, expected) From 6c17f67aafd7de8af96032aa415fc798fa3b73ca Mon Sep 17 00:00:00 2001 From: Stephen Rauch Date: Mon, 27 Feb 2017 10:41:56 -0500 Subject: [PATCH 104/933] BUG: GH15426 timezone lost in groupby-agg with cython functions closes #15426 Author: Stephen Rauch Closes #15433 from stephenrauch/tz-lost-in-groupby-agg and squashes the following commits: 64a84ca [Stephen Rauch] BUG: GH15426 timezone lost in groupby-agg with cython functions --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/tests/groupby/test_aggregate.py | 31 +++++++++++++++++++++++++- pandas/tests/types/test_cast.py | 12 +++++++++- pandas/types/cast.py | 3 ++- 4 files changed, 44 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index be487e165c602..f337d4404abfc 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -622,6 +622,7 @@ Bug Fixes - Bug in ``DataFrame.to_stata()`` and ``StataWriter`` which produces incorrectly formatted files to be produced for some locales (:issue:`13856`) - Bug in ``pd.concat()`` in which concatting with an empty dataframe with ``join='inner'`` was being improperly handled (:issue:`15328`) +- Bug in ``groupby.agg()`` incorrectly localizing timezone on ``datetime`` (:issue:`15426`, :issue:`10668`) diff --git a/pandas/tests/groupby/test_aggregate.py b/pandas/tests/groupby/test_aggregate.py index a1fc97eb8d780..cb739546a2312 100644 --- a/pandas/tests/groupby/test_aggregate.py +++ b/pandas/tests/groupby/test_aggregate.py @@ -6,7 +6,7 @@ """ from __future__ import print_function -from datetime import datetime +from datetime import datetime, timedelta from functools import partial import numpy as np @@ -738,3 +738,32 @@ def test_agg_over_numpy_arrays(self): columns=expected_column) assert_frame_equal(result, expected) + + def test_agg_timezone_round_trip(self): + # GH 15426 + ts = pd.Timestamp("2016-01-01 12:00:00", tz='US/Pacific') + df = pd.DataFrame({'a': 1, 'b': [ts + timedelta(minutes=nn) + for nn in range(10)]}) + + result1 = df.groupby('a')['b'].agg(np.min).iloc[0] + result2 = df.groupby('a')['b'].agg(lambda x: np.min(x)).iloc[0] + result3 = df.groupby('a')['b'].min().iloc[0] + + assert result1 == ts + assert result2 == ts + assert result3 == ts + + dates = [pd.Timestamp("2016-01-0%d 12:00:00" % i, tz='US/Pacific') + for i in range(1, 5)] + df = pd.DataFrame({'A': ['a', 'b'] * 2, 'B': dates}) + grouped = df.groupby('A') + + ts = df['B'].iloc[0] + assert ts == grouped.nth(0)['B'].iloc[0] + assert ts == grouped.head(1)['B'].iloc[0] + assert ts == grouped.first()['B'].iloc[0] + assert ts == grouped.apply(lambda x: x.iloc[0])[0] + + ts = df['B'].iloc[2] + assert ts == grouped.last()['B'].iloc[0] + assert ts == grouped.apply(lambda x: x.iloc[-1])[0] diff --git a/pandas/tests/types/test_cast.py b/pandas/tests/types/test_cast.py index 497130b117289..70f69cc7d5701 100644 --- a/pandas/tests/types/test_cast.py +++ b/pandas/tests/types/test_cast.py @@ -8,7 +8,7 @@ from datetime import datetime import numpy as np -from pandas import Timedelta, Timestamp +from pandas import Timedelta, Timestamp, DatetimeIndex from pandas.types.cast import (_possibly_downcast_to_dtype, _possibly_convert_objects, _infer_dtype_from_scalar, @@ -71,6 +71,16 @@ def test_datetimelikes_nan(self): res = _possibly_downcast_to_dtype(arr, 'timedelta64[ns]') tm.assert_numpy_array_equal(res, exp) + def test_datetime_with_timezone(self): + # GH 15426 + ts = Timestamp("2016-01-01 12:00:00", tz='US/Pacific') + exp = DatetimeIndex([ts, ts]) + res = _possibly_downcast_to_dtype(exp, exp.dtype) + tm.assert_index_equal(res, exp) + + res = _possibly_downcast_to_dtype(exp.asi8, exp.dtype) + tm.assert_index_equal(res, exp) + class TestInferDtype(tm.TestCase): diff --git a/pandas/types/cast.py b/pandas/types/cast.py index b1a17df64aecf..8cc3fe41f73c8 100644 --- a/pandas/types/cast.py +++ b/pandas/types/cast.py @@ -133,7 +133,8 @@ def trans(x): # noqa if dtype.tz: # convert to datetime and change timezone from pandas import to_datetime - result = to_datetime(result).tz_localize(dtype.tz) + result = to_datetime(result).tz_localize('utc') + result = result.tz_convert(dtype.tz) except: pass From 25dcff597162a12dbe419da2ae23d9b0d6322bee Mon Sep 17 00:00:00 2001 From: Alexis Mignon Date: Thu, 16 Jun 2016 15:11:46 +0200 Subject: [PATCH 105/933] BUG: Fix a bug occuring when using DataFrame.to_records with unicode column names in python 2. closes #11879 closes #13462 --- doc/source/whatsnew/v0.20.0.txt | 3 ++- pandas/core/frame.py | 12 ++++++++---- pandas/tests/frame/test_convert_to.py | 15 +++++++++++++++ 3 files changed, 25 insertions(+), 5 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index f337d4404abfc..947a114f1ce95 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -615,7 +615,8 @@ Bug Fixes - Bug in ``DataFrame.fillna()`` where the argument ``downcast`` was ignored when fillna value was of type ``dict`` (:issue:`15277`) - Bug in ``.reset_index()`` when an all ``NaN`` level of a ``MultiIndex`` would fail (:issue:`6322`) -- Bug in ``pd.read_msgpack`` when deserializing a ``CategoricalIndex`` (:issue:`15487`) +- Bug in ``pd.read_msgpack()`` when deserializing a ``CategoricalIndex`` (:issue:`15487`) +- Bug in ``pd.DataFrame.to_records()`` which failed with unicode characters in column names (:issue:`11879`) - Bug in ``pd.read_csv()`` with ``float_precision='round_trip'`` which caused a segfault when a text entry is parsed (:issue:`15140`) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index ce3481fc17c5b..adf397e63984f 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1105,13 +1105,17 @@ def to_records(self, index=True, convert_datetime64=True): count += 1 elif index_names[0] is None: index_names = ['index'] - names = lmap(str, index_names) + lmap(str, self.columns) + names = (lmap(compat.text_type, index_names) + + lmap(compat.text_type, self.columns)) else: arrays = [self[c].get_values() for c in self.columns] - names = lmap(str, self.columns) + names = lmap(compat.text_type, self.columns) - dtype = np.dtype([(x, v.dtype) for x, v in zip(names, arrays)]) - return np.rec.fromarrays(arrays, dtype=dtype, names=names) + formats = [v.dtype for v in arrays] + return np.rec.fromarrays( + arrays, + dtype={'names': names, 'formats': formats} + ) @classmethod def from_items(cls, items, columns=None, orient='columns'): diff --git a/pandas/tests/frame/test_convert_to.py b/pandas/tests/frame/test_convert_to.py index 1bc8313726d0c..0dde113dd5147 100644 --- a/pandas/tests/frame/test_convert_to.py +++ b/pandas/tests/frame/test_convert_to.py @@ -177,3 +177,18 @@ def test_to_records_with_unicode_index(self): .to_records() expected = np.rec.array([('x', 'y')], dtype=[('a', 'O'), ('b', 'O')]) tm.assert_almost_equal(result, expected) + + def test_to_records_with_unicode_column_names(self): + # xref issue: https://github.com/numpy/numpy/issues/2407 + # Issue #11879. to_records used to raise an exception when used + # with column names containing non ascii caracters in Python 2 + result = DataFrame(data={u"accented_name_é": [1.0]}).to_records() + + # Note that numpy allows for unicode field names but dtypes need + # to be specified using dictionnary intsead of list of tuples. + expected = np.rec.array( + [(0, 1.0)], + dtype={"names": ["index", u"accented_name_é"], + "formats": [' Date: Mon, 27 Feb 2017 14:26:07 -0500 Subject: [PATCH 106/933] BUG: reindex_like after shape comparison in assert_frame_equal, if check_like, the former code reindex_like before shape comparison. for example: if left.shape=(2,2), right.shpae=(2.0), after reindex_like, left.shape=(2,0),right.shape=(2,0),then the shape comparison will not find out that the two dataframes are different. For that, the assert_frame_equal will not raise assertion errors. But in fact it should raise. Author: jojomdt Closes #15496 from jojomdt/master and squashes the following commits: 7b3437b [jojomdt] fix test_frame_equal_message error 0340b5c [jojomdt] change check_like description c03e0af [jojomdt] add test for TestAssertFrameEqual 470dbaa [jojomdt] combine row and column shape comparison ce7bd74 [jojomdt] reindex_like after shape comparison --- pandas/tests/test_testing.py | 32 +++++++++++++++++--------------- pandas/util/testing.py | 25 ++++++++----------------- 2 files changed, 25 insertions(+), 32 deletions(-) diff --git a/pandas/tests/test_testing.py b/pandas/tests/test_testing.py index 07bfdc8fc9078..2fb58ef70e3cb 100644 --- a/pandas/tests/test_testing.py +++ b/pandas/tests/test_testing.py @@ -13,8 +13,6 @@ RNGContext) from pandas.compat import is_platform_windows -# let's get meta. - class TestAssertAlmostEqual(tm.TestCase): @@ -594,6 +592,20 @@ def _assert_not_equal(self, a, b, **kwargs): self.assertRaises(AssertionError, assert_frame_equal, a, b, **kwargs) self.assertRaises(AssertionError, assert_frame_equal, b, a, **kwargs) + def test_equal_with_different_row_order(self): + # check_like=True ignores row-column orderings + df1 = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}, + index=['a', 'b', 'c']) + df2 = pd.DataFrame({'A': [3, 2, 1], 'B': [6, 5, 4]}, + index=['c', 'b', 'a']) + + self._assert_equal(df1, df2, check_like=True) + self._assert_not_equal(df1, df2) + + def test_not_equal_with_different_shape(self): + self._assert_not_equal(pd.DataFrame({'A': [1, 2, 3]}), + pd.DataFrame({'A': [1, 2, 3, 4]})) + def test_index_dtype(self): df1 = DataFrame.from_records( {'a': [1, 2], 'c': ['l1', 'l2']}, index=['a']) @@ -621,19 +633,9 @@ def test_frame_equal_message(self): expected = """DataFrame are different -DataFrame shape \\(number of rows\\) are different -\\[left\\]: 3, RangeIndex\\(start=0, stop=3, step=1\\) -\\[right\\]: 4, RangeIndex\\(start=0, stop=4, step=1\\)""" - - with assertRaisesRegexp(AssertionError, expected): - assert_frame_equal(pd.DataFrame({'A': [1, 2, 3]}), - pd.DataFrame({'A': [1, 2, 3, 4]})) - - expected = """DataFrame are different - -DataFrame shape \\(number of columns\\) are different -\\[left\\]: 2, Index\\(\\[u?'A', u?'B'\\], dtype='object'\\) -\\[right\\]: 1, Index\\(\\[u?'A'\\], dtype='object'\\)""" +DataFrame shape mismatch +\\[left\\]: \\(3, 2\\) +\\[right\\]: \\(3, 1\\)""" with assertRaisesRegexp(AssertionError, expected): assert_frame_equal(pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}), diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 1bd539469dbe3..e4b10488c69b2 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -1254,7 +1254,7 @@ def assert_frame_equal(left, right, check_dtype=True, check_categorical : bool, default True Whether to compare internal Categorical exactly. check_like : bool, default False - If true, then reindex_like operands + If true, ignore the order of rows & columns obj : str, default 'DataFrame' Specify object name being compared, internally used to show appropriate assertion message @@ -1270,25 +1270,16 @@ def assert_frame_equal(left, right, check_dtype=True, assertIsInstance(left, type(right)) # assert_class_equal(left, right, obj=obj) + # shape comparison + if left.shape != right.shape: + raise_assert_detail(obj, + 'DataFrame shape mismatch', + '({0}, {1})'.format(*left.shape), + '({0}, {1})'.format(*right.shape)) + if check_like: left, right = left.reindex_like(right), right - # shape comparison (row) - if left.shape[0] != right.shape[0]: - raise_assert_detail(obj, - 'DataFrame shape (number of rows) are different', - '{0}, {1}'.format(left.shape[0], left.index), - '{0}, {1}'.format(right.shape[0], right.index)) - # shape comparison (columns) - if left.shape[1] != right.shape[1]: - raise_assert_detail(obj, - 'DataFrame shape (number of columns) ' - 'are different', - '{0}, {1}'.format(left.shape[1], - left.columns), - '{0}, {1}'.format(right.shape[1], - right.columns)) - # index comparison assert_index_equal(left.index, right.index, exact=check_index_type, check_names=check_names, From fed1827afaabb2ed2988643aba2d2be627634cf9 Mon Sep 17 00:00:00 2001 From: Aleksey Bilogur Date: Mon, 27 Feb 2017 15:26:00 -0500 Subject: [PATCH 107/933] TST: DataFrame.hist() does not get along with matplotlib.pyplot.tight_layout() (#15515) * Add unit test for #9351 * Tweaks. * add _check_plot_works; rm aux method * Add whatsnew entry. --- doc/source/whatsnew/v0.20.0.txt | 2 +- pandas/tests/plotting/test_hist_method.py | 10 ++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 947a114f1ce95..f13b584a4ee13 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -629,7 +629,7 @@ Bug Fixes - Bug in ``.read_csv()`` with ``parse_dates`` when multiline headers are specified (:issue:`15376`) - +- Bug in ``DataFrame.hist`` where ``plt.tight_layout`` caused an ``AttributeError`` (use ``matplotlib >= 0.2.0``) (:issue:`9351`) - Bug in ``DataFrame.boxplot`` where ``fontsize`` was not applied to the tick labels on both axes (:issue:`15108`) - Bug in ``Series.replace`` and ``DataFrame.replace`` which failed on empty replacement dicts (:issue:`15289`) - Bug in ``pd.melt()`` where passing a tuple value for ``value_vars`` caused a ``TypeError`` (:issue:`15348`) diff --git a/pandas/tests/plotting/test_hist_method.py b/pandas/tests/plotting/test_hist_method.py index 4f64f66bd3c4d..22de7055e3cea 100644 --- a/pandas/tests/plotting/test_hist_method.py +++ b/pandas/tests/plotting/test_hist_method.py @@ -238,6 +238,16 @@ def test_hist_layout(self): with tm.assertRaises(ValueError): df.hist(layout=(-1, -1)) + @slow + # GH 9351 + def test_tight_layout(self): + if self.mpl_ge_2_0_0: + df = DataFrame(randn(100, 2)) + _check_plot_works(df.hist) + self.plt.tight_layout() + + tm.close() + @tm.mplskip class TestDataFrameGroupByPlots(TestPlotBase): From e15de4d484dbff8f941c9d5cc31869d503d9c020 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Mon, 27 Feb 2017 15:47:10 -0500 Subject: [PATCH 108/933] CLN: remove pandas/io/gbq.py and tests and replace with pandas-gbq closes #15347 Author: Jeff Reback Closes #15484 from jreback/gbq and squashes the following commits: 0fd8d06 [Jeff Reback] wip 3222de1 [Jeff Reback] CLN: remove pandas/io/gbq.py and tests and replace with pandas-gbq --- ci/requirements-2.7.pip | 5 +- ci/requirements-3.4.pip | 3 - ci/requirements-3.4_SLOW.pip | 3 - ci/requirements-3.5.pip | 1 + doc/source/io.rst | 289 +------ doc/source/whatsnew/v0.20.0.txt | 9 + pandas/core/frame.py | 8 +- pandas/io/gbq.py | 1192 +---------------------------- pandas/tests/io/test_gbq.py | 1242 +------------------------------ pandas/util/decorators.py | 38 +- pandas/util/print_versions.py | 3 +- 11 files changed, 116 insertions(+), 2677 deletions(-) delete mode 100644 ci/requirements-3.4_SLOW.pip diff --git a/ci/requirements-2.7.pip b/ci/requirements-2.7.pip index d16b932c8be4f..08240184f2934 100644 --- a/ci/requirements-2.7.pip +++ b/ci/requirements-2.7.pip @@ -1,8 +1,5 @@ blosc -httplib2 -google-api-python-client==1.2 -python-gflags==2.0 -oauth2client==1.5.0 +pandas-gbq pathlib backports.lzma py diff --git a/ci/requirements-3.4.pip b/ci/requirements-3.4.pip index 55986a0220bf0..4e5fe52d56cf1 100644 --- a/ci/requirements-3.4.pip +++ b/ci/requirements-3.4.pip @@ -1,5 +1,2 @@ python-dateutil==2.2 blosc -httplib2 -google-api-python-client -oauth2client diff --git a/ci/requirements-3.4_SLOW.pip b/ci/requirements-3.4_SLOW.pip deleted file mode 100644 index 05c938abcbab6..0000000000000 --- a/ci/requirements-3.4_SLOW.pip +++ /dev/null @@ -1,3 +0,0 @@ -httplib2 -google-api-python-client -oauth2client diff --git a/ci/requirements-3.5.pip b/ci/requirements-3.5.pip index 0d9e44cf39fa4..6e4f7b65f9728 100644 --- a/ci/requirements-3.5.pip +++ b/ci/requirements-3.5.pip @@ -1 +1,2 @@ xarray==0.9.1 +pandas-gbq diff --git a/doc/source/io.rst b/doc/source/io.rst index 35e8b77782183..b36ae8c2ed450 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -4652,293 +4652,18 @@ And then issue the following queries: Google BigQuery --------------- -.. versionadded:: 0.13.0 - -The :mod:`pandas.io.gbq` module provides a wrapper for Google's BigQuery -analytics web service to simplify retrieving results from BigQuery tables -using SQL-like queries. Result sets are parsed into a pandas -DataFrame with a shape and data types derived from the source table. -Additionally, DataFrames can be inserted into new BigQuery tables or appended -to existing tables. - -.. warning:: - - To use this module, you will need a valid BigQuery account. Refer to the - `BigQuery Documentation `__ - for details on the service itself. - -The key functions are: - -.. currentmodule:: pandas.io.gbq - -.. autosummary:: - :toctree: generated/ - - read_gbq - to_gbq - -.. currentmodule:: pandas - - -Supported Data Types -'''''''''''''''''''' - -Pandas supports all these `BigQuery data types `__: -``STRING``, ``INTEGER`` (64bit), ``FLOAT`` (64 bit), ``BOOLEAN`` and -``TIMESTAMP`` (microsecond precision). Data types ``BYTES`` and ``RECORD`` -are not supported. - -Integer and boolean ``NA`` handling -''''''''''''''''''''''''''''''''''' - -.. versionadded:: 0.20 - -Since all columns in BigQuery queries are nullable, and NumPy lacks of ``NA`` -support for integer and boolean types, this module will store ``INTEGER`` or -``BOOLEAN`` columns with at least one ``NULL`` value as ``dtype=object``. -Otherwise those columns will be stored as ``dtype=int64`` or ``dtype=bool`` -respectively. - -This is opposite to default pandas behaviour which will promote integer -type to float in order to store NAs. See the :ref:`gotchas` -for detailed explaination. - -While this trade-off works well for most cases, it breaks down for storing -values greater than 2**53. Such values in BigQuery can represent identifiers -and unnoticed precision lost for identifier is what we want to avoid. - -.. _io.bigquery_deps: - -Dependencies -'''''''''''' - -This module requires following additional dependencies: - -- `httplib2 `__: HTTP client -- `google-api-python-client `__: Google's API client -- `oauth2client `__: authentication and authorization for Google's API - -.. _io.bigquery_authentication: - -Authentication -'''''''''''''' - -.. versionadded:: 0.18.0 - -Authentication to the Google ``BigQuery`` service is via ``OAuth 2.0``. -Is possible to authenticate with either user account credentials or service account credentials. - -Authenticating with user account credentials is as simple as following the prompts in a browser window -which will be automatically opened for you. You will be authenticated to the specified -``BigQuery`` account using the product name ``pandas GBQ``. It is only possible on local host. -The remote authentication using user account credentials is not currently supported in pandas. -Additional information on the authentication mechanism can be found -`here `__. - -Authentication with service account credentials is possible via the `'private_key'` parameter. This method -is particularly useful when working on remote servers (eg. jupyter iPython notebook on remote host). -Additional information on service accounts can be found -`here `__. - -Authentication via ``application default credentials`` is also possible. This is only valid -if the parameter ``private_key`` is not provided. This method also requires that -the credentials can be fetched from the environment the code is running in. -Otherwise, the OAuth2 client-side authentication is used. -Additional information on -`application default credentials `__. - -.. versionadded:: 0.19.0 - -.. note:: - - The `'private_key'` parameter can be set to either the file path of the service account key - in JSON format, or key contents of the service account key in JSON format. - -.. note:: - - A private key can be obtained from the Google developers console by clicking - `here `__. Use JSON key type. - -.. _io.bigquery_reader: - -Querying -'''''''' - -Suppose you want to load all data from an existing BigQuery table : `test_dataset.test_table` -into a DataFrame using the :func:`~pandas.io.gbq.read_gbq` function. - -.. code-block:: python - - # Insert your BigQuery Project ID Here - # Can be found in the Google web console - projectid = "xxxxxxxx" - - data_frame = pd.read_gbq('SELECT * FROM test_dataset.test_table', projectid) - - -You can define which column from BigQuery to use as an index in the -destination DataFrame as well as a preferred column order as follows: - -.. code-block:: python - - data_frame = pd.read_gbq('SELECT * FROM test_dataset.test_table', - index_col='index_column_name', - col_order=['col1', 'col2', 'col3'], projectid) - - -Starting with 0.20.0, you can specify the query config as parameter to use additional options of your job. -For more information about query configuration parameters see -`here `__. - -.. code-block:: python - - configuration = { - 'query': { - "useQueryCache": False - } - } - data_frame = pd.read_gbq('SELECT * FROM test_dataset.test_table', - configuration=configuration, projectid) - - -.. note:: - - You can find your project id in the `Google developers console `__. - - -.. note:: - - You can toggle the verbose output via the ``verbose`` flag which defaults to ``True``. - -.. note:: - - The ``dialect`` argument can be used to indicate whether to use BigQuery's ``'legacy'`` SQL - or BigQuery's ``'standard'`` SQL (beta). The default value is ``'legacy'``. For more information - on BigQuery's standard SQL, see `BigQuery SQL Reference - `__ - -.. _io.bigquery_writer: - -Writing DataFrames -'''''''''''''''''' - -Assume we want to write a DataFrame ``df`` into a BigQuery table using :func:`~pandas.DataFrame.to_gbq`. - -.. ipython:: python - - df = pd.DataFrame({'my_string': list('abc'), - 'my_int64': list(range(1, 4)), - 'my_float64': np.arange(4.0, 7.0), - 'my_bool1': [True, False, True], - 'my_bool2': [False, True, False], - 'my_dates': pd.date_range('now', periods=3)}) - - df - df.dtypes - -.. code-block:: python - - df.to_gbq('my_dataset.my_table', projectid) - -.. note:: - - The destination table and destination dataset will automatically be created if they do not already exist. - -The ``if_exists`` argument can be used to dictate whether to ``'fail'``, ``'replace'`` -or ``'append'`` if the destination table already exists. The default value is ``'fail'``. - -For example, assume that ``if_exists`` is set to ``'fail'``. The following snippet will raise -a ``TableCreationError`` if the destination table already exists. - -.. code-block:: python - - df.to_gbq('my_dataset.my_table', projectid, if_exists='fail') - -.. note:: - - If the ``if_exists`` argument is set to ``'append'``, the destination dataframe will - be written to the table using the defined table schema and column types. The - dataframe must match the destination table in structure and data types. - If the ``if_exists`` argument is set to ``'replace'``, and the existing table has a - different schema, a delay of 2 minutes will be forced to ensure that the new schema - has propagated in the Google environment. See - `Google BigQuery issue 191 `__. - -Writing large DataFrames can result in errors due to size limitations being exceeded. -This can be avoided by setting the ``chunksize`` argument when calling :func:`~pandas.DataFrame.to_gbq`. -For example, the following writes ``df`` to a BigQuery table in batches of 10000 rows at a time: - -.. code-block:: python - - df.to_gbq('my_dataset.my_table', projectid, chunksize=10000) - -You can also see the progress of your post via the ``verbose`` flag which defaults to ``True``. -For example: - -.. code-block:: python - - In [8]: df.to_gbq('my_dataset.my_table', projectid, chunksize=10000, verbose=True) - - Streaming Insert is 10% Complete - Streaming Insert is 20% Complete - Streaming Insert is 30% Complete - Streaming Insert is 40% Complete - Streaming Insert is 50% Complete - Streaming Insert is 60% Complete - Streaming Insert is 70% Complete - Streaming Insert is 80% Complete - Streaming Insert is 90% Complete - Streaming Insert is 100% Complete - -.. note:: - - If an error occurs while streaming data to BigQuery, see - `Troubleshooting BigQuery Errors `__. - -.. note:: - - The BigQuery SQL query language has some oddities, see the - `BigQuery Query Reference Documentation `__. - -.. note:: - - While BigQuery uses SQL-like syntax, it has some important differences from traditional - databases both in functionality, API limitations (size and quantity of queries or uploads), - and how Google charges for use of the service. You should refer to `Google BigQuery documentation `__ - often as the service seems to be changing and evolving. BiqQuery is best for analyzing large - sets of data quickly, but it is not a direct replacement for a transactional database. - -.. _io.bigquery_create_tables: - -Creating BigQuery Tables -'''''''''''''''''''''''' - .. warning:: - As of 0.17, the function :func:`~pandas.io.gbq.generate_bq_schema` has been deprecated and will be - removed in a future version. - -As of 0.15.2, the gbq module has a function :func:`~pandas.io.gbq.generate_bq_schema` which will -produce the dictionary representation schema of the specified pandas DataFrame. - -.. code-block:: ipython - - In [10]: gbq.generate_bq_schema(df, default_type='STRING') + Starting in 0.20.0, pandas has split off Google BigQuery support into the + separate package ``pandas-gbq``. You can ``pip install pandas-gbq`` to get it. - Out[10]: {'fields': [{'name': 'my_bool1', 'type': 'BOOLEAN'}, - {'name': 'my_bool2', 'type': 'BOOLEAN'}, - {'name': 'my_dates', 'type': 'TIMESTAMP'}, - {'name': 'my_float64', 'type': 'FLOAT'}, - {'name': 'my_int64', 'type': 'INTEGER'}, - {'name': 'my_string', 'type': 'STRING'}]} - -.. note:: +The ``pandas-gbq`` package provides functionality to read/write from Google BigQuery. - If you delete and re-create a BigQuery table with the same name, but different table schema, - you must wait 2 minutes before streaming data into the table. As a workaround, consider creating - the new table with a different name. Refer to - `Google BigQuery issue 191 `__. +pandas integrates with this external package. if ``pandas-gbq`` is installed, you can +use the pandas methods ``pd.read_gbq`` and ``DataFrame.to_gbq``, which will call the +respective functions from ``pandas-gbq``. +Full cocumentation can be found `here `__ .. _io.stata: diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index f13b584a4ee13..f0e4176472861 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -360,6 +360,15 @@ New Behavior: In [5]: df['a']['2011-12-31 23:59:59'] Out[5]: 1 +.. _whatsnew_0200.api_breaking.gbq: + +Pandas Google BigQuery support has moved +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +pandas has split off Google BigQuery support into a separate package ``pandas-gbq``. You can ``pip install pandas-gbq`` to get it. +The functionality of ``pd.read_gbq()`` and ``.to_gbq()`` remains the same with the currently released version of ``pandas-gbq=0.1.2``. (:issue:`15347`) +Documentation is now hosted `here `__ + .. _whatsnew_0200.api_breaking.memory_usage: Memory Usage for Index is more Accurate diff --git a/pandas/core/frame.py b/pandas/core/frame.py index adf397e63984f..7b02926ea8837 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -77,7 +77,8 @@ OrderedDict, raise_with_traceback) from pandas import compat from pandas.compat.numpy import function as nv -from pandas.util.decorators import deprecate_kwarg, Appender, Substitution +from pandas.util.decorators import (deprecate_kwarg, Appender, + Substitution, docstring_wrapper) from pandas.util.validators import validate_bool_kwarg from pandas.tseries.period import PeriodIndex @@ -941,6 +942,11 @@ def to_gbq(self, destination_table, project_id, chunksize=10000, chunksize=chunksize, verbose=verbose, reauth=reauth, if_exists=if_exists, private_key=private_key) + def _f(): + from pandas.io.gbq import _try_import + return _try_import().to_gbq.__doc__ + to_gbq = docstring_wrapper(to_gbq, _f) + @classmethod def from_records(cls, data, index=None, exclude=None, columns=None, coerce_float=False, nrows=None): diff --git a/pandas/io/gbq.py b/pandas/io/gbq.py index a5558866937cf..3407f51af5e83 100644 --- a/pandas/io/gbq.py +++ b/pandas/io/gbq.py @@ -1,1180 +1,52 @@ -import warnings -from datetime import datetime -import json -import logging -from time import sleep -import uuid -import time -import sys +""" Google BigQuery support """ -import numpy as np +from pandas.util.decorators import docstring_wrapper -from distutils.version import StrictVersion -from pandas import compat, DataFrame, concat -from pandas.core.common import PandasError -from pandas.compat import lzip, bytes_to_str - - -def _check_google_client_version(): +def _try_import(): + # since pandas is a dependency of pandas-gbq + # we need to import on first use try: - import pkg_resources - + import pandas_gbq except ImportError: - raise ImportError('Could not import pkg_resources (setuptools).') - - if compat.PY3: - google_api_minimum_version = '1.4.1' - else: - google_api_minimum_version = '1.2.0' - - _GOOGLE_API_CLIENT_VERSION = pkg_resources.get_distribution( - 'google-api-python-client').version - - if (StrictVersion(_GOOGLE_API_CLIENT_VERSION) < - StrictVersion(google_api_minimum_version)): - raise ImportError("pandas requires google-api-python-client >= {0} " - "for Google BigQuery support, " - "current version {1}" - .format(google_api_minimum_version, - _GOOGLE_API_CLIENT_VERSION)) - - -def _test_google_api_imports(): - - try: - import httplib2 # noqa - try: - from googleapiclient.discovery import build # noqa - from googleapiclient.errors import HttpError # noqa - except: - from apiclient.discovery import build # noqa - from apiclient.errors import HttpError # noqa - from oauth2client.client import AccessTokenRefreshError # noqa - from oauth2client.client import OAuth2WebServerFlow # noqa - from oauth2client.file import Storage # noqa - from oauth2client.tools import run_flow, argparser # noqa - except ImportError as e: - raise ImportError("Missing module required for Google BigQuery " - "support: {0}".format(str(e))) - - -logger = logging.getLogger('pandas.io.gbq') -logger.setLevel(logging.ERROR) - - -class InvalidPrivateKeyFormat(PandasError, ValueError): - """ - Raised when provided private key has invalid format. - """ - pass - - -class AccessDenied(PandasError, ValueError): - """ - Raised when invalid credentials are provided, or tokens have expired. - """ - pass - - -class DatasetCreationError(PandasError, ValueError): - """ - Raised when the create dataset method fails - """ - pass - - -class GenericGBQException(PandasError, ValueError): - """ - Raised when an unrecognized Google API Error occurs. - """ - pass - - -class InvalidColumnOrder(PandasError, ValueError): - """ - Raised when the provided column order for output - results DataFrame does not match the schema - returned by BigQuery. - """ - pass - - -class InvalidPageToken(PandasError, ValueError): - """ - Raised when Google BigQuery fails to return, - or returns a duplicate page token. - """ - pass - - -class InvalidSchema(PandasError, ValueError): - """ - Raised when the provided DataFrame does - not match the schema of the destination - table in BigQuery. - """ - pass - - -class NotFoundException(PandasError, ValueError): - """ - Raised when the project_id, table or dataset provided in the query could - not be found. - """ - pass - - -class StreamingInsertError(PandasError, ValueError): - """ - Raised when BigQuery reports a streaming insert error. - For more information see `Streaming Data Into BigQuery - `__ - """ - - -class TableCreationError(PandasError, ValueError): - """ - Raised when the create table method fails - """ - pass - - -class GbqConnector(object): - scope = 'https://www.googleapis.com/auth/bigquery' - - def __init__(self, project_id, reauth=False, verbose=False, - private_key=None, dialect='legacy'): - _check_google_client_version() - _test_google_api_imports() - self.project_id = project_id - self.reauth = reauth - self.verbose = verbose - self.private_key = private_key - self.dialect = dialect - self.credentials = self.get_credentials() - self.service = self.get_service() - - def get_credentials(self): - if self.private_key: - return self.get_service_account_credentials() - else: - # Try to retrieve Application Default Credentials - credentials = self.get_application_default_credentials() - if not credentials: - credentials = self.get_user_account_credentials() - return credentials - - def get_application_default_credentials(self): - """ - This method tries to retrieve the "default application credentials". - This could be useful for running code on Google Cloud Platform. - - .. versionadded:: 0.19.0 - - Parameters - ---------- - None - - Returns - ------- - - GoogleCredentials, - If the default application credentials can be retrieved - from the environment. The retrieved credentials should also - have access to the project (self.project_id) on BigQuery. - - OR None, - If default application credentials can not be retrieved - from the environment. Or, the retrieved credentials do not - have access to the project (self.project_id) on BigQuery. - """ - import httplib2 - try: - from googleapiclient.discovery import build - except ImportError: - from apiclient.discovery import build - try: - from oauth2client.client import GoogleCredentials - except ImportError: - return None - - try: - credentials = GoogleCredentials.get_application_default() - except: - return None - - http = httplib2.Http() - try: - http = credentials.authorize(http) - bigquery_service = build('bigquery', 'v2', http=http) - # Check if the application has rights to the BigQuery project - jobs = bigquery_service.jobs() - job_data = {'configuration': {'query': {'query': 'SELECT 1'}}} - jobs.insert(projectId=self.project_id, body=job_data).execute() - return credentials - except: - return None - - def get_user_account_credentials(self): - from oauth2client.client import OAuth2WebServerFlow - from oauth2client.file import Storage - from oauth2client.tools import run_flow, argparser - - flow = OAuth2WebServerFlow( - client_id=('495642085510-k0tmvj2m941jhre2nbqka17vqpjfddtd' - '.apps.googleusercontent.com'), - client_secret='kOc9wMptUtxkcIFbtZCcrEAc', - scope=self.scope, - redirect_uri='urn:ietf:wg:oauth:2.0:oob') - - storage = Storage('bigquery_credentials.dat') - credentials = storage.get() - - if credentials is None or credentials.invalid or self.reauth: - credentials = run_flow(flow, storage, argparser.parse_args([])) - - return credentials - - def get_service_account_credentials(self): - # Bug fix for https://github.com/pandas-dev/pandas/issues/12572 - # We need to know that a supported version of oauth2client is installed - # Test that either of the following is installed: - # - SignedJwtAssertionCredentials from oauth2client.client - # - ServiceAccountCredentials from oauth2client.service_account - # SignedJwtAssertionCredentials is available in oauthclient < 2.0.0 - # ServiceAccountCredentials is available in oauthclient >= 2.0.0 - oauth2client_v1 = True - oauth2client_v2 = True - - try: - from oauth2client.client import SignedJwtAssertionCredentials - except ImportError: - oauth2client_v1 = False - - try: - from oauth2client.service_account import ServiceAccountCredentials - except ImportError: - oauth2client_v2 = False - - if not oauth2client_v1 and not oauth2client_v2: - raise ImportError("Missing oauth2client required for BigQuery " - "service account support") - - from os.path import isfile - - try: - if isfile(self.private_key): - with open(self.private_key) as f: - json_key = json.loads(f.read()) - else: - # ugly hack: 'private_key' field has new lines inside, - # they break json parser, but we need to preserve them - json_key = json.loads(self.private_key.replace('\n', ' ')) - json_key['private_key'] = json_key['private_key'].replace( - ' ', '\n') - - if compat.PY3: - json_key['private_key'] = bytes( - json_key['private_key'], 'UTF-8') - - if oauth2client_v1: - return SignedJwtAssertionCredentials( - json_key['client_email'], - json_key['private_key'], - self.scope, - ) - else: - return ServiceAccountCredentials.from_json_keyfile_dict( - json_key, - self.scope) - except (KeyError, ValueError, TypeError, AttributeError): - raise InvalidPrivateKeyFormat( - "Private key is missing or invalid. It should be service " - "account private key JSON (file path or string contents) " - "with at least two keys: 'client_email' and 'private_key'. " - "Can be obtained from: https://console.developers.google." - "com/permissions/serviceaccounts") - - def _print(self, msg, end='\n'): - if self.verbose: - sys.stdout.write(msg + end) - sys.stdout.flush() - - def _start_timer(self): - self.start = time.time() - - def get_elapsed_seconds(self): - return round(time.time() - self.start, 2) - - def print_elapsed_seconds(self, prefix='Elapsed', postfix='s.', - overlong=7): - sec = self.get_elapsed_seconds() - if sec > overlong: - self._print('{} {} {}'.format(prefix, sec, postfix)) - - # http://stackoverflow.com/questions/1094841/reusable-library-to-get-human-readable-version-of-file-size - @staticmethod - def sizeof_fmt(num, suffix='b'): - fmt = "%3.1f %s%s" - for unit in ['', 'k', 'M', 'G', 'T', 'P', 'E', 'Z']: - if abs(num) < 1024.0: - return fmt % (num, unit, suffix) - num /= 1024.0 - return fmt % (num, 'Y', suffix) - - def get_service(self): - import httplib2 - try: - from googleapiclient.discovery import build - except: - from apiclient.discovery import build - - http = httplib2.Http() - http = self.credentials.authorize(http) - bigquery_service = build('bigquery', 'v2', http=http) - - return bigquery_service - - @staticmethod - def process_http_error(ex): - # See `BigQuery Troubleshooting Errors - # `__ - - status = json.loads(bytes_to_str(ex.content))['error'] - errors = status.get('errors', None) - - if errors: - for error in errors: - reason = error['reason'] - message = error['message'] - - raise GenericGBQException( - "Reason: {0}, Message: {1}".format(reason, message)) - - raise GenericGBQException(errors) - - def process_insert_errors(self, insert_errors): - for insert_error in insert_errors: - row = insert_error['index'] - errors = insert_error.get('errors', None) - for error in errors: - reason = error['reason'] - message = error['message'] - location = error['location'] - error_message = ('Error at Row: {0}, Reason: {1}, ' - 'Location: {2}, Message: {3}' - .format(row, reason, location, message)) - - # Report all error messages if verbose is set - if self.verbose: - self._print(error_message) - else: - raise StreamingInsertError(error_message + - '\nEnable verbose logging to ' - 'see all errors') - - raise StreamingInsertError - - def run_query(self, query, **kwargs): - try: - from googleapiclient.errors import HttpError - except: - from apiclient.errors import HttpError - from oauth2client.client import AccessTokenRefreshError - - _check_google_client_version() - - job_collection = self.service.jobs() - - job_config = { - 'query': { - 'query': query, - 'useLegacySql': self.dialect == 'legacy' - # 'allowLargeResults', 'createDisposition', - # 'preserveNulls', destinationTable, useQueryCache - } - } - config = kwargs.get('configuration') - if config is not None: - if len(config) != 1: - raise ValueError("Only one job type must be specified, but " - "given {}".format(','.join(config.keys()))) - if 'query' in config: - if 'query' in config['query'] and query is not None: - raise ValueError("Query statement can't be specified " - "inside config while it is specified " - "as parameter") - - job_config['query'].update(config['query']) - else: - raise ValueError("Only 'query' job type is supported") - - job_data = { - 'configuration': job_config - } - - self._start_timer() - try: - self._print('Requesting query... ', end="") - query_reply = job_collection.insert( - projectId=self.project_id, body=job_data).execute() - self._print('ok.\nQuery running...') - except (AccessTokenRefreshError, ValueError): - if self.private_key: - raise AccessDenied( - "The service account credentials are not valid") - else: - raise AccessDenied( - "The credentials have been revoked or expired, " - "please re-run the application to re-authorize") - except HttpError as ex: - self.process_http_error(ex) - - job_reference = query_reply['jobReference'] - - while not query_reply.get('jobComplete', False): - self.print_elapsed_seconds(' Elapsed', 's. Waiting...') - try: - query_reply = job_collection.getQueryResults( - projectId=job_reference['projectId'], - jobId=job_reference['jobId']).execute() - except HttpError as ex: - self.process_http_error(ex) - - if self.verbose: - if query_reply['cacheHit']: - self._print('Query done.\nCache hit.\n') - else: - bytes_processed = int(query_reply.get( - 'totalBytesProcessed', '0')) - self._print('Query done.\nProcessed: {}\n'.format( - self.sizeof_fmt(bytes_processed))) - - self._print('Retrieving results...') - - total_rows = int(query_reply['totalRows']) - result_pages = list() - seen_page_tokens = list() - current_row = 0 - # Only read schema on first page - schema = query_reply['schema'] - - # Loop through each page of data - while 'rows' in query_reply and current_row < total_rows: - page = query_reply['rows'] - result_pages.append(page) - current_row += len(page) - - self.print_elapsed_seconds( - ' Got page: {}; {}% done. Elapsed'.format( - len(result_pages), - round(100.0 * current_row / total_rows))) - - if current_row == total_rows: - break - - page_token = query_reply.get('pageToken', None) - - if not page_token and current_row < total_rows: - raise InvalidPageToken("Required pageToken was missing. " - "Received {0} of {1} rows" - .format(current_row, total_rows)) - - elif page_token in seen_page_tokens: - raise InvalidPageToken("A duplicate pageToken was returned") - - seen_page_tokens.append(page_token) - - try: - query_reply = job_collection.getQueryResults( - projectId=job_reference['projectId'], - jobId=job_reference['jobId'], - pageToken=page_token).execute() - except HttpError as ex: - self.process_http_error(ex) - - if current_row < total_rows: - raise InvalidPageToken() - - # print basic query stats - self._print('Got {} rows.\n'.format(total_rows)) - - return schema, result_pages - - def load_data(self, dataframe, dataset_id, table_id, chunksize): - try: - from googleapiclient.errors import HttpError - except: - from apiclient.errors import HttpError - - job_id = uuid.uuid4().hex - rows = [] - remaining_rows = len(dataframe) - - total_rows = remaining_rows - self._print("\n\n") - - for index, row in dataframe.reset_index(drop=True).iterrows(): - row_dict = dict() - row_dict['json'] = json.loads(row.to_json(force_ascii=False, - date_unit='s', - date_format='iso')) - row_dict['insertId'] = job_id + str(index) - rows.append(row_dict) - remaining_rows -= 1 - - if (len(rows) % chunksize == 0) or (remaining_rows == 0): - self._print("\rStreaming Insert is {0}% Complete".format( - ((total_rows - remaining_rows) * 100) / total_rows)) - - body = {'rows': rows} - - try: - response = self.service.tabledata().insertAll( - projectId=self.project_id, - datasetId=dataset_id, - tableId=table_id, - body=body).execute() - except HttpError as ex: - self.process_http_error(ex) - - # For streaming inserts, even if you receive a success HTTP - # response code, you'll need to check the insertErrors property - # of the response to determine if the row insertions were - # successful, because it's possible that BigQuery was only - # partially successful at inserting the rows. See the `Success - # HTTP Response Codes - # `__ - # section - - insert_errors = response.get('insertErrors', None) - if insert_errors: - self.process_insert_errors(insert_errors) - - sleep(1) # Maintains the inserts "per second" rate per API - rows = [] - - self._print("\n") - - def verify_schema(self, dataset_id, table_id, schema): - try: - from googleapiclient.errors import HttpError - except: - from apiclient.errors import HttpError - - try: - remote_schema = self.service.tables().get( - projectId=self.project_id, - datasetId=dataset_id, - tableId=table_id).execute()['schema'] - - fields_remote = set([json.dumps(field_remote) - for field_remote in remote_schema['fields']]) - fields_local = set(json.dumps(field_local) - for field_local in schema['fields']) - - return fields_remote == fields_local - except HttpError as ex: - self.process_http_error(ex) - - def delete_and_recreate_table(self, dataset_id, table_id, table_schema): - delay = 0 - - # Changes to table schema may take up to 2 minutes as of May 2015 See - # `Issue 191 - # `__ - # Compare previous schema with new schema to determine if there should - # be a 120 second delay - - if not self.verify_schema(dataset_id, table_id, table_schema): - self._print('The existing table has a different schema. Please ' - 'wait 2 minutes. See Google BigQuery issue #191') - delay = 120 - - table = _Table(self.project_id, dataset_id, - private_key=self.private_key) - table.delete(table_id) - table.create(table_id, table_schema) - sleep(delay) - - -def _parse_data(schema, rows): - # see: - # http://pandas.pydata.org/pandas-docs/dev/missing_data.html - # #missing-data-casting-rules-and-indexing - dtype_map = {'FLOAT': np.dtype(float), - 'TIMESTAMP': 'M8[ns]'} - - fields = schema['fields'] - col_types = [field['type'] for field in fields] - col_names = [str(field['name']) for field in fields] - col_dtypes = [dtype_map.get(field['type'], object) for field in fields] - page_array = np.zeros((len(rows),), dtype=lzip(col_names, col_dtypes)) - for row_num, raw_row in enumerate(rows): - entries = raw_row.get('f', []) - for col_num, field_type in enumerate(col_types): - field_value = _parse_entry(entries[col_num].get('v', ''), - field_type) - page_array[row_num][col_num] = field_value - - return DataFrame(page_array, columns=col_names) + # give a nice error message + raise ImportError("Load data from Google BigQuery\n" + "\n" + "the pandas-gbq package is not installed\n" + "see the docs: https://pandas-gbq.readthedocs.io\n" + "\n" + "you can install via:\n" + "pip install pandas-gbq\n") -def _parse_entry(field_value, field_type): - if field_value is None or field_value == 'null': - return None - if field_type == 'INTEGER': - return int(field_value) - elif field_type == 'FLOAT': - return float(field_value) - elif field_type == 'TIMESTAMP': - timestamp = datetime.utcfromtimestamp(float(field_value)) - return np.datetime64(timestamp) - elif field_type == 'BOOLEAN': - return field_value == 'true' - return field_value + return pandas_gbq def read_gbq(query, project_id=None, index_col=None, col_order=None, reauth=False, verbose=True, private_key=None, dialect='legacy', **kwargs): - r"""Load data from Google BigQuery. + pandas_gbq = _try_import() + return pandas_gbq.read_gbq( + query, project_id=project_id, + index_col=index_col, col_order=col_order, + reauth=reauth, verbose=verbose, + private_key=private_key, + dialect=dialect, + **kwargs) - THIS IS AN EXPERIMENTAL LIBRARY - The main method a user calls to execute a Query in Google BigQuery - and read results into a pandas DataFrame. - - Google BigQuery API Client Library v2 for Python is used. - Documentation is available at - https://developers.google.com/api-client-library/python/apis/bigquery/v2 - - Authentication to the Google BigQuery service is via OAuth 2.0. - - - If "private_key" is not provided: - - By default "application default credentials" are used. - - .. versionadded:: 0.19.0 - - If default application credentials are not found or are restrictive, - user account credentials are used. In this case, you will be asked to - grant permissions for product name 'pandas GBQ'. - - - If "private_key" is provided: - - Service account credentials will be used to authenticate. - - Parameters - ---------- - query : str - SQL-Like Query to return data values - project_id : str - Google BigQuery Account project ID. - index_col : str (optional) - Name of result column to use for index in results DataFrame - col_order : list(str) (optional) - List of BigQuery column names in the desired order for results - DataFrame - reauth : boolean (default False) - Force Google BigQuery to reauthenticate the user. This is useful - if multiple accounts are used. - verbose : boolean (default True) - Verbose output - private_key : str (optional) - Service account private key in JSON format. Can be file path - or string contents. This is useful for remote server - authentication (eg. jupyter iPython notebook on remote host) - - .. versionadded:: 0.18.1 - - dialect : {'legacy', 'standard'}, default 'legacy' - 'legacy' : Use BigQuery's legacy SQL dialect. - 'standard' : Use BigQuery's standard SQL (beta), which is - compliant with the SQL 2011 standard. For more information - see `BigQuery SQL Reference - `__ - - .. versionadded:: 0.19.0 - - **kwargs : Arbitrary keyword arguments - configuration (dict): query config parameters for job processing. - For example: - - configuration = {'query': {'useQueryCache': False}} - - For more information see `BigQuery SQL Reference - ` - - .. versionadded:: 0.20.0 - - Returns - ------- - df: DataFrame - DataFrame representing results of query - - """ - - if not project_id: - raise TypeError("Missing required parameter: project_id") - - if dialect not in ('legacy', 'standard'): - raise ValueError("'{0}' is not valid for dialect".format(dialect)) - - connector = GbqConnector(project_id, reauth=reauth, verbose=verbose, - private_key=private_key, - dialect=dialect) - schema, pages = connector.run_query(query, **kwargs) - dataframe_list = [] - while len(pages) > 0: - page = pages.pop() - dataframe_list.append(_parse_data(schema, page)) - - if len(dataframe_list) > 0: - final_df = concat(dataframe_list, ignore_index=True) - else: - final_df = _parse_data(schema, []) - - # Reindex the DataFrame on the provided column - if index_col is not None: - if index_col in final_df.columns: - final_df.set_index(index_col, inplace=True) - else: - raise InvalidColumnOrder( - 'Index column "{0}" does not exist in DataFrame.' - .format(index_col) - ) - - # Change the order of columns in the DataFrame based on provided list - if col_order is not None: - if sorted(col_order) == sorted(final_df.columns): - final_df = final_df[col_order] - else: - raise InvalidColumnOrder( - 'Column order does not match this DataFrame.' - ) - - # cast BOOLEAN and INTEGER columns from object to bool/int - # if they dont have any nulls - type_map = {'BOOLEAN': bool, 'INTEGER': int} - for field in schema['fields']: - if field['type'] in type_map and \ - final_df[field['name']].notnull().all(): - final_df[field['name']] = \ - final_df[field['name']].astype(type_map[field['type']]) - - connector.print_elapsed_seconds( - 'Total time taken', - datetime.now().strftime('s.\nFinished at %Y-%m-%d %H:%M:%S.'), - 0 - ) - - return final_df +read_gbq = docstring_wrapper(read_gbq, + lambda: _try_import().read_gbq.__doc__) def to_gbq(dataframe, destination_table, project_id, chunksize=10000, verbose=True, reauth=False, if_exists='fail', private_key=None): - """Write a DataFrame to a Google BigQuery table. - - THIS IS AN EXPERIMENTAL LIBRARY - - The main method a user calls to export pandas DataFrame contents to - Google BigQuery table. - - Google BigQuery API Client Library v2 for Python is used. - Documentation is available at - https://developers.google.com/api-client-library/python/apis/bigquery/v2 - - Authentication to the Google BigQuery service is via OAuth 2.0. - - - If "private_key" is not provided: - - By default "application default credentials" are used. - - .. versionadded:: 0.19.0 - - If default application credentials are not found or are restrictive, - user account credentials are used. In this case, you will be asked to - grant permissions for product name 'pandas GBQ'. - - - If "private_key" is provided: - - Service account credentials will be used to authenticate. - - Parameters - ---------- - dataframe : DataFrame - DataFrame to be written - destination_table : string - Name of table to be written, in the form 'dataset.tablename' - project_id : str - Google BigQuery Account project ID. - chunksize : int (default 10000) - Number of rows to be inserted in each chunk from the dataframe. - verbose : boolean (default True) - Show percentage complete - reauth : boolean (default False) - Force Google BigQuery to reauthenticate the user. This is useful - if multiple accounts are used. - if_exists : {'fail', 'replace', 'append'}, default 'fail' - 'fail': If table exists, do nothing. - 'replace': If table exists, drop it, recreate it, and insert data. - 'append': If table exists, insert data. Create if does not exist. - private_key : str (optional) - Service account private key in JSON format. Can be file path - or string contents. This is useful for remote server - authentication (eg. jupyter iPython notebook on remote host) - """ - - if if_exists not in ('fail', 'replace', 'append'): - raise ValueError("'{0}' is not valid for if_exists".format(if_exists)) - - if '.' not in destination_table: - raise NotFoundException( - "Invalid Table Name. Should be of the form 'datasetId.tableId' ") - - connector = GbqConnector(project_id, reauth=reauth, verbose=verbose, - private_key=private_key) - dataset_id, table_id = destination_table.rsplit('.', 1) - - table = _Table(project_id, dataset_id, reauth=reauth, - private_key=private_key) - - table_schema = _generate_bq_schema(dataframe) - - # If table exists, check if_exists parameter - if table.exists(table_id): - if if_exists == 'fail': - raise TableCreationError("Could not create the table because it " - "already exists. " - "Change the if_exists parameter to " - "append or replace data.") - elif if_exists == 'replace': - connector.delete_and_recreate_table( - dataset_id, table_id, table_schema) - elif if_exists == 'append': - if not connector.verify_schema(dataset_id, table_id, table_schema): - raise InvalidSchema("Please verify that the structure and " - "data types in the DataFrame match the " - "schema of the destination table.") - else: - table.create(table_id, table_schema) - - connector.load_data(dataframe, dataset_id, table_id, chunksize) - - -def generate_bq_schema(df, default_type='STRING'): - # deprecation TimeSeries, #11121 - warnings.warn("generate_bq_schema is deprecated and will be removed in " - "a future version", FutureWarning, stacklevel=2) - - return _generate_bq_schema(df, default_type=default_type) - - -def _generate_bq_schema(df, default_type='STRING'): - """ Given a passed df, generate the associated Google BigQuery schema. - - Parameters - ---------- - df : DataFrame - default_type : string - The default big query type in case the type of the column - does not exist in the schema. - """ - - type_mapping = { - 'i': 'INTEGER', - 'b': 'BOOLEAN', - 'f': 'FLOAT', - 'O': 'STRING', - 'S': 'STRING', - 'U': 'STRING', - 'M': 'TIMESTAMP' - } - - fields = [] - for column_name, dtype in df.dtypes.iteritems(): - fields.append({'name': column_name, - 'type': type_mapping.get(dtype.kind, default_type)}) - - return {'fields': fields} - - -class _Table(GbqConnector): - - def __init__(self, project_id, dataset_id, reauth=False, verbose=False, - private_key=None): - try: - from googleapiclient.errors import HttpError - except: - from apiclient.errors import HttpError - self.http_error = HttpError - self.dataset_id = dataset_id - super(_Table, self).__init__(project_id, reauth, verbose, private_key) - - def exists(self, table_id): - """ Check if a table exists in Google BigQuery - - .. versionadded:: 0.17.0 - - Parameters - ---------- - table : str - Name of table to be verified - - Returns - ------- - boolean - true if table exists, otherwise false - """ - - try: - self.service.tables().get( - projectId=self.project_id, - datasetId=self.dataset_id, - tableId=table_id).execute() - return True - except self.http_error as ex: - if ex.resp.status == 404: - return False - else: - self.process_http_error(ex) - - def create(self, table_id, schema): - """ Create a table in Google BigQuery given a table and schema - - .. versionadded:: 0.17.0 - - Parameters - ---------- - table : str - Name of table to be written - schema : str - Use the generate_bq_schema to generate your table schema from a - dataframe. - """ - - if self.exists(table_id): - raise TableCreationError( - "The table could not be created because it already exists") - - if not _Dataset(self.project_id, - private_key=self.private_key).exists(self.dataset_id): - _Dataset(self.project_id, - private_key=self.private_key).create(self.dataset_id) - - body = { - 'schema': schema, - 'tableReference': { - 'tableId': table_id, - 'projectId': self.project_id, - 'datasetId': self.dataset_id - } - } - - try: - self.service.tables().insert( - projectId=self.project_id, - datasetId=self.dataset_id, - body=body).execute() - except self.http_error as ex: - self.process_http_error(ex) - - def delete(self, table_id): - """ Delete a table in Google BigQuery - - .. versionadded:: 0.17.0 - - Parameters - ---------- - table : str - Name of table to be deleted - """ - - if not self.exists(table_id): - raise NotFoundException("Table does not exist") - - try: - self.service.tables().delete( - datasetId=self.dataset_id, - projectId=self.project_id, - tableId=table_id).execute() - except self.http_error as ex: - self.process_http_error(ex) - - -class _Dataset(GbqConnector): - - def __init__(self, project_id, reauth=False, verbose=False, - private_key=None): - try: - from googleapiclient.errors import HttpError - except: - from apiclient.errors import HttpError - self.http_error = HttpError - super(_Dataset, self).__init__(project_id, reauth, verbose, - private_key) - - def exists(self, dataset_id): - """ Check if a dataset exists in Google BigQuery - - .. versionadded:: 0.17.0 - - Parameters - ---------- - dataset_id : str - Name of dataset to be verified - - Returns - ------- - boolean - true if dataset exists, otherwise false - """ - - try: - self.service.datasets().get( - projectId=self.project_id, - datasetId=dataset_id).execute() - return True - except self.http_error as ex: - if ex.resp.status == 404: - return False - else: - self.process_http_error(ex) - - def datasets(self): - """ Return a list of datasets in Google BigQuery - - .. versionadded:: 0.17.0 - - Parameters - ---------- - None - - Returns - ------- - list - List of datasets under the specific project - """ - - dataset_list = [] - next_page_token = None - first_query = True - - while first_query or next_page_token: - first_query = False - - try: - list_dataset_response = self.service.datasets().list( - projectId=self.project_id, - pageToken=next_page_token).execute() - - dataset_response = list_dataset_response.get('datasets') - next_page_token = list_dataset_response.get('nextPageToken') - - if not dataset_response: - return dataset_list - - for row_num, raw_row in enumerate(dataset_response): - dataset_list.append( - raw_row['datasetReference']['datasetId']) - - except self.http_error as ex: - self.process_http_error(ex) - - return dataset_list - - def create(self, dataset_id): - """ Create a dataset in Google BigQuery - - .. versionadded:: 0.17.0 - - Parameters - ---------- - dataset : str - Name of dataset to be written - """ - - if self.exists(dataset_id): - raise DatasetCreationError( - "The dataset could not be created because it already exists") - - body = { - 'datasetReference': { - 'projectId': self.project_id, - 'datasetId': dataset_id - } - } - - try: - self.service.datasets().insert( - projectId=self.project_id, - body=body).execute() - except self.http_error as ex: - self.process_http_error(ex) - - def delete(self, dataset_id): - """ Delete a dataset in Google BigQuery - - .. versionadded:: 0.17.0 - - Parameters - ---------- - dataset : str - Name of dataset to be deleted - """ - - if not self.exists(dataset_id): - raise NotFoundException( - "Dataset {0} does not exist".format(dataset_id)) - - try: - self.service.datasets().delete( - datasetId=dataset_id, - projectId=self.project_id).execute() - - except self.http_error as ex: - self.process_http_error(ex) - - def tables(self, dataset_id): - """ List tables in the specific dataset in Google BigQuery - - .. versionadded:: 0.17.0 - - Parameters - ---------- - dataset : str - Name of dataset to list tables for - - Returns - ------- - list - List of tables under the specific dataset - """ - - table_list = [] - next_page_token = None - first_query = True - - while first_query or next_page_token: - first_query = False - - try: - list_table_response = self.service.tables().list( - projectId=self.project_id, - datasetId=dataset_id, - pageToken=next_page_token).execute() - - table_response = list_table_response.get('tables') - next_page_token = list_table_response.get('nextPageToken') - - if not table_response: - return table_list - - for row_num, raw_row in enumerate(table_response): - table_list.append(raw_row['tableReference']['tableId']) + pandas_gbq = _try_import() + pandas_gbq.to_gbq(dataframe, destination_table, project_id, + chunksize=chunksize, + verbose=verbose, reauth=reauth, + if_exists=if_exists, private_key=private_key) - except self.http_error as ex: - self.process_http_error(ex) - return table_list +to_gbq = docstring_wrapper(to_gbq, + lambda: _try_import().to_gbq.__doc__) diff --git a/pandas/tests/io/test_gbq.py b/pandas/tests/io/test_gbq.py index 0a76267054ee6..13529e7b54714 100644 --- a/pandas/tests/io/test_gbq.py +++ b/pandas/tests/io/test_gbq.py @@ -1,23 +1,18 @@ -import re -from datetime import datetime import pytest +from datetime import datetime import pytz import platform from time import sleep import os -import logging import numpy as np +import pandas as pd +from pandas import compat, DataFrame -from distutils.version import StrictVersion -from pandas import compat - -from pandas import NaT -from pandas.compat import u, range -from pandas.core.frame import DataFrame -import pandas.io.gbq as gbq +from pandas.compat import range import pandas.util.testing as tm -from pandas.compat.numpy import np_datetime64_compat + +pandas_gbq = pytest.importorskip('pandas_gbq') PROJECT_ID = None PRIVATE_KEY_JSON_PATH = None @@ -33,12 +28,6 @@ VERSION = platform.python_version() -_IMPORTS = False -_GOOGLE_API_CLIENT_INSTALLED = False -_GOOGLE_API_CLIENT_VALID_VERSION = False -_HTTPLIB2_INSTALLED = False -_SETUPTOOLS_INSTALLED = False - def _skip_if_no_project_id(): if not _get_project_id(): @@ -46,23 +35,12 @@ def _skip_if_no_project_id(): "Cannot run integration tests without a project id") -def _skip_local_auth_if_in_travis_env(): - if _in_travis_environment(): - pytest.skip("Cannot run local auth in travis environment") - - def _skip_if_no_private_key_path(): if not _get_private_key_path(): pytest.skip("Cannot run integration tests without a " "private key json file path") -def _skip_if_no_private_key_contents(): - if not _get_private_key_contents(): - pytest.skip("Cannot run integration tests without a " - "private key json contents") - - def _in_travis_environment(): return 'TRAVIS_BUILD_DIR' in os.environ and \ 'GBQ_PROJECT_ID' in os.environ @@ -83,146 +61,15 @@ def _get_private_key_path(): return PRIVATE_KEY_JSON_PATH -def _get_private_key_contents(): - if _in_travis_environment(): - with open(os.path.join(*[os.environ.get('TRAVIS_BUILD_DIR'), 'ci', - 'travis_gbq.json'])) as f: - return f.read() - else: - return PRIVATE_KEY_JSON_CONTENTS - - -def _test_imports(): - global _GOOGLE_API_CLIENT_INSTALLED, _GOOGLE_API_CLIENT_VALID_VERSION, \ - _HTTPLIB2_INSTALLED, _SETUPTOOLS_INSTALLED - - try: - import pkg_resources - _SETUPTOOLS_INSTALLED = True - except ImportError: - _SETUPTOOLS_INSTALLED = False - - if compat.PY3: - google_api_minimum_version = '1.4.1' - else: - google_api_minimum_version = '1.2.0' - - if _SETUPTOOLS_INSTALLED: - try: - try: - from googleapiclient.discovery import build # noqa - from googleapiclient.errors import HttpError # noqa - except: - from apiclient.discovery import build # noqa - from apiclient.errors import HttpError # noqa - - from oauth2client.client import OAuth2WebServerFlow # noqa - from oauth2client.client import AccessTokenRefreshError # noqa - - from oauth2client.file import Storage # noqa - from oauth2client.tools import run_flow # noqa - _GOOGLE_API_CLIENT_INSTALLED = True - _GOOGLE_API_CLIENT_VERSION = pkg_resources.get_distribution( - 'google-api-python-client').version - - if (StrictVersion(_GOOGLE_API_CLIENT_VERSION) >= - StrictVersion(google_api_minimum_version)): - _GOOGLE_API_CLIENT_VALID_VERSION = True - - except ImportError: - _GOOGLE_API_CLIENT_INSTALLED = False - - try: - import httplib2 # noqa - _HTTPLIB2_INSTALLED = True - except ImportError: - _HTTPLIB2_INSTALLED = False - - if not _SETUPTOOLS_INSTALLED: - raise ImportError('Could not import pkg_resources (setuptools).') - - if not _GOOGLE_API_CLIENT_INSTALLED: - raise ImportError('Could not import Google API Client.') - - if not _GOOGLE_API_CLIENT_VALID_VERSION: - raise ImportError("pandas requires google-api-python-client >= {0} " - "for Google BigQuery support, " - "current version {1}" - .format(google_api_minimum_version, - _GOOGLE_API_CLIENT_VERSION)) - - if not _HTTPLIB2_INSTALLED: - raise ImportError( - "pandas requires httplib2 for Google BigQuery support") - - # Bug fix for https://github.com/pandas-dev/pandas/issues/12572 - # We need to know that a supported version of oauth2client is installed - # Test that either of the following is installed: - # - SignedJwtAssertionCredentials from oauth2client.client - # - ServiceAccountCredentials from oauth2client.service_account - # SignedJwtAssertionCredentials is available in oauthclient < 2.0.0 - # ServiceAccountCredentials is available in oauthclient >= 2.0.0 - oauth2client_v1 = True - oauth2client_v2 = True - - try: - from oauth2client.client import SignedJwtAssertionCredentials # noqa - except ImportError: - oauth2client_v1 = False - - try: - from oauth2client.service_account import ServiceAccountCredentials # noqa - except ImportError: - oauth2client_v2 = False - - if not oauth2client_v1 and not oauth2client_v2: - raise ImportError("Missing oauth2client required for BigQuery " - "service account support") - - -def _setup_common(): - try: - _test_imports() - except (ImportError, NotImplementedError) as import_exception: - pytest.skip(import_exception) - - if _in_travis_environment(): - logging.getLogger('oauth2client').setLevel(logging.ERROR) - logging.getLogger('apiclient').setLevel(logging.ERROR) - - -def _check_if_can_get_correct_default_credentials(): - # Checks if "Application Default Credentials" can be fetched - # from the environment the tests are running in. - # See Issue #13577 - - import httplib2 - try: - from googleapiclient.discovery import build - except ImportError: - from apiclient.discovery import build - try: - from oauth2client.client import GoogleCredentials - credentials = GoogleCredentials.get_application_default() - http = httplib2.Http() - http = credentials.authorize(http) - bigquery_service = build('bigquery', 'v2', http=http) - jobs = bigquery_service.jobs() - job_data = {'configuration': {'query': {'query': 'SELECT 1'}}} - jobs.insert(projectId=_get_project_id(), body=job_data).execute() - return True - except: - return False - - def clean_gbq_environment(private_key=None): - dataset = gbq._Dataset(_get_project_id(), private_key=private_key) + dataset = pandas_gbq.gbq._Dataset(_get_project_id(), + private_key=private_key) for i in range(1, 10): if DATASET_ID + str(i) in dataset.datasets(): dataset_id = DATASET_ID + str(i) - table = gbq._Table(_get_project_id(), dataset_id, - private_key=private_key) + table = pandas_gbq.gbq._Table(_get_project_id(), dataset_id, + private_key=private_key) for j in range(1, 20): if TABLE_ID + str(j) in dataset.tables(dataset_id): table.delete(TABLE_ID + str(j)) @@ -246,673 +93,8 @@ def make_mixed_dataframe_v2(test_size): index=range(test_size)) -def test_generate_bq_schema_deprecated(): - # 11121 Deprecation of generate_bq_schema - with tm.assert_produces_warning(FutureWarning): - df = make_mixed_dataframe_v2(10) - gbq.generate_bq_schema(df) - - -@pytest.mark.xfail(run=False, reason="intermittent failures") -class TestGBQConnectorIntegrationWithLocalUserAccountAuth(tm.TestCase): - - def setUp(self): - _setup_common() - _skip_if_no_project_id() - _skip_local_auth_if_in_travis_env() - - self.sut = gbq.GbqConnector(_get_project_id()) - - def test_should_be_able_to_make_a_connector(self): - self.assertTrue(self.sut is not None, - 'Could not create a GbqConnector') - - def test_should_be_able_to_get_valid_credentials(self): - credentials = self.sut.get_credentials() - self.assertFalse(credentials.invalid, 'Returned credentials invalid') - - def test_should_be_able_to_get_a_bigquery_service(self): - bigquery_service = self.sut.get_service() - self.assertTrue(bigquery_service is not None, 'No service returned') - - def test_should_be_able_to_get_schema_from_query(self): - schema, pages = self.sut.run_query('SELECT 1') - self.assertTrue(schema is not None) - - def test_should_be_able_to_get_results_from_query(self): - schema, pages = self.sut.run_query('SELECT 1') - self.assertTrue(pages is not None) - - def test_get_application_default_credentials_does_not_throw_error(self): - if _check_if_can_get_correct_default_credentials(): - pytest.skip("Can get default_credentials " - "from the environment!") - credentials = self.sut.get_application_default_credentials() - self.assertIsNone(credentials) - - def test_get_application_default_credentials_returns_credentials(self): - if not _check_if_can_get_correct_default_credentials(): - pytest.skip("Cannot get default_credentials " - "from the environment!") - from oauth2client.client import GoogleCredentials - credentials = self.sut.get_application_default_credentials() - self.assertTrue(isinstance(credentials, GoogleCredentials)) - - -@pytest.mark.xfail(run=False, reason="intermittent failures") -class TestGBQConnectorIntegrationWithServiceAccountKeyPath(tm.TestCase): - def setUp(self): - _setup_common() - - _skip_if_no_project_id() - _skip_if_no_private_key_path() - - self.sut = gbq.GbqConnector(_get_project_id(), - private_key=_get_private_key_path()) - - def test_should_be_able_to_make_a_connector(self): - self.assertTrue(self.sut is not None, - 'Could not create a GbqConnector') - - def test_should_be_able_to_get_valid_credentials(self): - credentials = self.sut.get_credentials() - self.assertFalse(credentials.invalid, 'Returned credentials invalid') - - def test_should_be_able_to_get_a_bigquery_service(self): - bigquery_service = self.sut.get_service() - self.assertTrue(bigquery_service is not None, 'No service returned') - - def test_should_be_able_to_get_schema_from_query(self): - schema, pages = self.sut.run_query('SELECT 1') - self.assertTrue(schema is not None) - - def test_should_be_able_to_get_results_from_query(self): - schema, pages = self.sut.run_query('SELECT 1') - self.assertTrue(pages is not None) - - -@pytest.mark.xfail(run=False, reason="intermittent failures") -class TestGBQConnectorIntegrationWithServiceAccountKeyContents(tm.TestCase): - def setUp(self): - _setup_common() - - _skip_if_no_project_id() - _skip_if_no_private_key_contents() - - self.sut = gbq.GbqConnector(_get_project_id(), - private_key=_get_private_key_contents()) - - def test_should_be_able_to_make_a_connector(self): - self.assertTrue(self.sut is not None, - 'Could not create a GbqConnector') - - def test_should_be_able_to_get_valid_credentials(self): - credentials = self.sut.get_credentials() - self.assertFalse(credentials.invalid, 'Returned credentials invalid') - - def test_should_be_able_to_get_a_bigquery_service(self): - bigquery_service = self.sut.get_service() - self.assertTrue(bigquery_service is not None, 'No service returned') - - def test_should_be_able_to_get_schema_from_query(self): - schema, pages = self.sut.run_query('SELECT 1') - self.assertTrue(schema is not None) - - def test_should_be_able_to_get_results_from_query(self): - schema, pages = self.sut.run_query('SELECT 1') - self.assertTrue(pages is not None) - - -class GBQUnitTests(tm.TestCase): - - def setUp(self): - _setup_common() - - def test_import_google_api_python_client(self): - if compat.PY2: - with tm.assertRaises(ImportError): - from googleapiclient.discovery import build # noqa - from googleapiclient.errors import HttpError # noqa - from apiclient.discovery import build # noqa - from apiclient.errors import HttpError # noqa - else: - from googleapiclient.discovery import build # noqa - from googleapiclient.errors import HttpError # noqa - - def test_should_return_bigquery_integers_as_python_ints(self): - result = gbq._parse_entry(1, 'INTEGER') - tm.assert_equal(result, int(1)) - - def test_should_return_bigquery_floats_as_python_floats(self): - result = gbq._parse_entry(1, 'FLOAT') - tm.assert_equal(result, float(1)) - - def test_should_return_bigquery_timestamps_as_numpy_datetime(self): - result = gbq._parse_entry('0e9', 'TIMESTAMP') - tm.assert_equal(result, np_datetime64_compat('1970-01-01T00:00:00Z')) - - def test_should_return_bigquery_booleans_as_python_booleans(self): - result = gbq._parse_entry('false', 'BOOLEAN') - tm.assert_equal(result, False) - - def test_should_return_bigquery_strings_as_python_strings(self): - result = gbq._parse_entry('STRING', 'STRING') - tm.assert_equal(result, 'STRING') - - def test_to_gbq_should_fail_if_invalid_table_name_passed(self): - with tm.assertRaises(gbq.NotFoundException): - gbq.to_gbq(DataFrame(), 'invalid_table_name', project_id="1234") - - def test_to_gbq_with_no_project_id_given_should_fail(self): - with tm.assertRaises(TypeError): - gbq.to_gbq(DataFrame(), 'dataset.tablename') - - def test_read_gbq_with_no_project_id_given_should_fail(self): - with tm.assertRaises(TypeError): - gbq.read_gbq('SELECT 1') - - def test_that_parse_data_works_properly(self): - test_schema = {'fields': [ - {'mode': 'NULLABLE', 'name': 'valid_string', 'type': 'STRING'}]} - test_page = [{'f': [{'v': 'PI'}]}] - - test_output = gbq._parse_data(test_schema, test_page) - correct_output = DataFrame({'valid_string': ['PI']}) - tm.assert_frame_equal(test_output, correct_output) - - def test_read_gbq_with_invalid_private_key_json_should_fail(self): - with tm.assertRaises(gbq.InvalidPrivateKeyFormat): - gbq.read_gbq('SELECT 1', project_id='x', private_key='y') - - def test_read_gbq_with_empty_private_key_json_should_fail(self): - with tm.assertRaises(gbq.InvalidPrivateKeyFormat): - gbq.read_gbq('SELECT 1', project_id='x', private_key='{}') - - def test_read_gbq_with_private_key_json_wrong_types_should_fail(self): - with tm.assertRaises(gbq.InvalidPrivateKeyFormat): - gbq.read_gbq( - 'SELECT 1', project_id='x', - private_key='{ "client_email" : 1, "private_key" : True }') - - def test_read_gbq_with_empty_private_key_file_should_fail(self): - with tm.ensure_clean() as empty_file_path: - with tm.assertRaises(gbq.InvalidPrivateKeyFormat): - gbq.read_gbq('SELECT 1', project_id='x', - private_key=empty_file_path) - - def test_read_gbq_with_corrupted_private_key_json_should_fail(self): - _skip_if_no_private_key_contents() - - with tm.assertRaises(gbq.InvalidPrivateKeyFormat): - gbq.read_gbq( - 'SELECT 1', project_id='x', - private_key=re.sub('[a-z]', '9', _get_private_key_contents())) - - -@pytest.mark.xfail(run=False, reason="intermittent failures") -class TestReadGBQIntegration(tm.TestCase): - - @classmethod - def setUpClass(cls): - # - GLOBAL CLASS FIXTURES - - # put here any instruction you want to execute only *ONCE* *BEFORE* - # executing *ALL* tests described below. - - _skip_if_no_project_id() - - _setup_common() - - def setUp(self): - # - PER-TEST FIXTURES - - # put here any instruction you want to be run *BEFORE* *EVERY* test is - # executed. - pass - - @classmethod - def tearDownClass(cls): - # - GLOBAL CLASS FIXTURES - - # put here any instruction you want to execute only *ONCE* *AFTER* - # executing all tests. - pass - - def tearDown(self): - # - PER-TEST FIXTURES - - # put here any instructions you want to be run *AFTER* *EVERY* test is - # executed. - pass - - def test_should_read_as_user_account(self): - _skip_local_auth_if_in_travis_env() - - query = 'SELECT "PI" AS valid_string' - df = gbq.read_gbq(query, project_id=_get_project_id()) - tm.assert_frame_equal(df, DataFrame({'valid_string': ['PI']})) - - def test_should_read_as_service_account_with_key_path(self): - _skip_if_no_private_key_path() - query = 'SELECT "PI" AS valid_string' - df = gbq.read_gbq(query, project_id=_get_project_id(), - private_key=_get_private_key_path()) - tm.assert_frame_equal(df, DataFrame({'valid_string': ['PI']})) - - def test_should_read_as_service_account_with_key_contents(self): - _skip_if_no_private_key_contents() - query = 'SELECT "PI" AS valid_string' - df = gbq.read_gbq(query, project_id=_get_project_id(), - private_key=_get_private_key_contents()) - tm.assert_frame_equal(df, DataFrame({'valid_string': ['PI']})) - - -@pytest.mark.xfail(run=False, reason="intermittent failures") -class TestReadGBQIntegrationWithServiceAccountKeyPath(tm.TestCase): - - @classmethod - def setUpClass(cls): - # - GLOBAL CLASS FIXTURES - - # put here any instruction you want to execute only *ONCE* *BEFORE* - # executing *ALL* tests described below. - - _skip_if_no_project_id() - _skip_if_no_private_key_path() - - _setup_common() - - def setUp(self): - # - PER-TEST FIXTURES - - # put here any instruction you want to be run *BEFORE* *EVERY* test is - # executed. - pass - - @classmethod - def tearDownClass(cls): - # - GLOBAL CLASS FIXTURES - - # put here any instruction you want to execute only *ONCE* *AFTER* - # executing all tests. - pass - - def tearDown(self): - # - PER-TEST FIXTURES - - # put here any instructions you want to be run *AFTER* *EVERY* test is - # executed. - pass - - def test_should_properly_handle_valid_strings(self): - query = 'SELECT "PI" AS valid_string' - df = gbq.read_gbq(query, project_id=_get_project_id(), - private_key=_get_private_key_path()) - tm.assert_frame_equal(df, DataFrame({'valid_string': ['PI']})) - - def test_should_properly_handle_empty_strings(self): - query = 'SELECT "" AS empty_string' - df = gbq.read_gbq(query, project_id=_get_project_id(), - private_key=_get_private_key_path()) - tm.assert_frame_equal(df, DataFrame({'empty_string': [""]})) - - def test_should_properly_handle_null_strings(self): - query = 'SELECT STRING(NULL) AS null_string' - df = gbq.read_gbq(query, project_id=_get_project_id(), - private_key=_get_private_key_path()) - tm.assert_frame_equal(df, DataFrame({'null_string': [None]})) - - def test_should_properly_handle_valid_integers(self): - query = 'SELECT INTEGER(3) AS valid_integer' - df = gbq.read_gbq(query, project_id=_get_project_id(), - private_key=_get_private_key_path()) - tm.assert_frame_equal(df, DataFrame({'valid_integer': [3]})) - - def test_should_properly_handle_nullable_integers(self): - query = '''SELECT * FROM - (SELECT 1 AS nullable_integer), - (SELECT NULL AS nullable_integer)''' - df = gbq.read_gbq(query, project_id=_get_project_id(), - private_key=_get_private_key_path()) - tm.assert_frame_equal( - df, DataFrame({'nullable_integer': [1, None]}).astype(object)) - - def test_should_properly_handle_valid_longs(self): - query = 'SELECT 1 << 62 AS valid_long' - df = gbq.read_gbq(query, project_id=_get_project_id(), - private_key=_get_private_key_path()) - tm.assert_frame_equal( - df, DataFrame({'valid_long': [1 << 62]})) - - def test_should_properly_handle_nullable_longs(self): - query = '''SELECT * FROM - (SELECT 1 << 62 AS nullable_long), - (SELECT NULL AS nullable_long)''' - df = gbq.read_gbq(query, project_id=_get_project_id(), - private_key=_get_private_key_path()) - tm.assert_frame_equal( - df, DataFrame({'nullable_long': [1 << 62, None]}).astype(object)) - - def test_should_properly_handle_null_integers(self): - query = 'SELECT INTEGER(NULL) AS null_integer' - df = gbq.read_gbq(query, project_id=_get_project_id(), - private_key=_get_private_key_path()) - tm.assert_frame_equal(df, DataFrame({'null_integer': [None]})) - - def test_should_properly_handle_valid_floats(self): - from math import pi - query = 'SELECT PI() AS valid_float' - df = gbq.read_gbq(query, project_id=_get_project_id(), - private_key=_get_private_key_path()) - tm.assert_frame_equal(df, DataFrame( - {'valid_float': [pi]})) - - def test_should_properly_handle_nullable_floats(self): - from math import pi - query = '''SELECT * FROM - (SELECT PI() AS nullable_float), - (SELECT NULL AS nullable_float)''' - df = gbq.read_gbq(query, project_id=_get_project_id(), - private_key=_get_private_key_path()) - tm.assert_frame_equal( - df, DataFrame({'nullable_float': [pi, None]})) - - def test_should_properly_handle_valid_doubles(self): - from math import pi - query = 'SELECT PI() * POW(10, 307) AS valid_double' - df = gbq.read_gbq(query, project_id=_get_project_id(), - private_key=_get_private_key_path()) - tm.assert_frame_equal(df, DataFrame( - {'valid_double': [pi * 10 ** 307]})) - - def test_should_properly_handle_nullable_doubles(self): - from math import pi - query = '''SELECT * FROM - (SELECT PI() * POW(10, 307) AS nullable_double), - (SELECT NULL AS nullable_double)''' - df = gbq.read_gbq(query, project_id=_get_project_id(), - private_key=_get_private_key_path()) - tm.assert_frame_equal( - df, DataFrame({'nullable_double': [pi * 10 ** 307, None]})) - - def test_should_properly_handle_null_floats(self): - query = 'SELECT FLOAT(NULL) AS null_float' - df = gbq.read_gbq(query, project_id=_get_project_id(), - private_key=_get_private_key_path()) - tm.assert_frame_equal(df, DataFrame({'null_float': [np.nan]})) - - def test_should_properly_handle_timestamp_unix_epoch(self): - query = 'SELECT TIMESTAMP("1970-01-01 00:00:00") AS unix_epoch' - df = gbq.read_gbq(query, project_id=_get_project_id(), - private_key=_get_private_key_path()) - tm.assert_frame_equal(df, DataFrame( - {'unix_epoch': [np.datetime64('1970-01-01T00:00:00.000000Z')]})) - - def test_should_properly_handle_arbitrary_timestamp(self): - query = 'SELECT TIMESTAMP("2004-09-15 05:00:00") AS valid_timestamp' - df = gbq.read_gbq(query, project_id=_get_project_id(), - private_key=_get_private_key_path()) - tm.assert_frame_equal(df, DataFrame({ - 'valid_timestamp': [np.datetime64('2004-09-15T05:00:00.000000Z')] - })) - - def test_should_properly_handle_null_timestamp(self): - query = 'SELECT TIMESTAMP(NULL) AS null_timestamp' - df = gbq.read_gbq(query, project_id=_get_project_id(), - private_key=_get_private_key_path()) - tm.assert_frame_equal(df, DataFrame({'null_timestamp': [NaT]})) - - def test_should_properly_handle_true_boolean(self): - query = 'SELECT BOOLEAN(TRUE) AS true_boolean' - df = gbq.read_gbq(query, project_id=_get_project_id(), - private_key=_get_private_key_path()) - tm.assert_frame_equal(df, DataFrame({'true_boolean': [True]})) - - def test_should_properly_handle_false_boolean(self): - query = 'SELECT BOOLEAN(FALSE) AS false_boolean' - df = gbq.read_gbq(query, project_id=_get_project_id(), - private_key=_get_private_key_path()) - tm.assert_frame_equal(df, DataFrame({'false_boolean': [False]})) - - def test_should_properly_handle_null_boolean(self): - query = 'SELECT BOOLEAN(NULL) AS null_boolean' - df = gbq.read_gbq(query, project_id=_get_project_id(), - private_key=_get_private_key_path()) - tm.assert_frame_equal(df, DataFrame({'null_boolean': [None]})) - - def test_should_properly_handle_nullable_booleans(self): - query = '''SELECT * FROM - (SELECT BOOLEAN(TRUE) AS nullable_boolean), - (SELECT NULL AS nullable_boolean)''' - df = gbq.read_gbq(query, project_id=_get_project_id(), - private_key=_get_private_key_path()) - tm.assert_frame_equal( - df, DataFrame({'nullable_boolean': [True, None]}).astype(object)) - - def test_unicode_string_conversion_and_normalization(self): - correct_test_datatype = DataFrame( - {'unicode_string': [u("\xe9\xfc")]} - ) - - unicode_string = "\xc3\xa9\xc3\xbc" - - if compat.PY3: - unicode_string = unicode_string.encode('latin-1').decode('utf8') - - query = 'SELECT "{0}" AS unicode_string'.format(unicode_string) - - df = gbq.read_gbq(query, project_id=_get_project_id(), - private_key=_get_private_key_path()) - tm.assert_frame_equal(df, correct_test_datatype) - - def test_index_column(self): - query = "SELECT 'a' AS string_1, 'b' AS string_2" - result_frame = gbq.read_gbq(query, project_id=_get_project_id(), - index_col="string_1", - private_key=_get_private_key_path()) - correct_frame = DataFrame( - {'string_1': ['a'], 'string_2': ['b']}).set_index("string_1") - tm.assert_equal(result_frame.index.name, correct_frame.index.name) - - def test_column_order(self): - query = "SELECT 'a' AS string_1, 'b' AS string_2, 'c' AS string_3" - col_order = ['string_3', 'string_1', 'string_2'] - result_frame = gbq.read_gbq(query, project_id=_get_project_id(), - col_order=col_order, - private_key=_get_private_key_path()) - correct_frame = DataFrame({'string_1': ['a'], 'string_2': [ - 'b'], 'string_3': ['c']})[col_order] - tm.assert_frame_equal(result_frame, correct_frame) - - def test_column_order_plus_index(self): - query = "SELECT 'a' AS string_1, 'b' AS string_2, 'c' AS string_3" - col_order = ['string_3', 'string_2'] - result_frame = gbq.read_gbq(query, project_id=_get_project_id(), - index_col='string_1', col_order=col_order, - private_key=_get_private_key_path()) - correct_frame = DataFrame( - {'string_1': ['a'], 'string_2': ['b'], 'string_3': ['c']}) - correct_frame.set_index('string_1', inplace=True) - correct_frame = correct_frame[col_order] - tm.assert_frame_equal(result_frame, correct_frame) - - def test_malformed_query(self): - with tm.assertRaises(gbq.GenericGBQException): - gbq.read_gbq("SELCET * FORM [publicdata:samples.shakespeare]", - project_id=_get_project_id(), - private_key=_get_private_key_path()) - - def test_bad_project_id(self): - with tm.assertRaises(gbq.GenericGBQException): - gbq.read_gbq("SELECT 1", project_id='001', - private_key=_get_private_key_path()) - - def test_bad_table_name(self): - with tm.assertRaises(gbq.GenericGBQException): - gbq.read_gbq("SELECT * FROM [publicdata:samples.nope]", - project_id=_get_project_id(), - private_key=_get_private_key_path()) - - def test_download_dataset_larger_than_200k_rows(self): - test_size = 200005 - # Test for known BigQuery bug in datasets larger than 100k rows - # http://stackoverflow.com/questions/19145587/bq-py-not-paging-results - df = gbq.read_gbq("SELECT id FROM [publicdata:samples.wikipedia] " - "GROUP EACH BY id ORDER BY id ASC LIMIT {0}" - .format(test_size), - project_id=_get_project_id(), - private_key=_get_private_key_path()) - self.assertEqual(len(df.drop_duplicates()), test_size) - - def test_zero_rows(self): - # Bug fix for https://github.com/pandas-dev/pandas/issues/10273 - df = gbq.read_gbq("SELECT title, id, is_bot, " - "SEC_TO_TIMESTAMP(timestamp) ts " - "FROM [publicdata:samples.wikipedia] " - "WHERE timestamp=-9999999", - project_id=_get_project_id(), - private_key=_get_private_key_path()) - page_array = np.zeros( - (0,), dtype=[('title', object), ('id', np.dtype(int)), - ('is_bot', np.dtype(bool)), ('ts', 'M8[ns]')]) - expected_result = DataFrame( - page_array, columns=['title', 'id', 'is_bot', 'ts']) - self.assert_frame_equal(df, expected_result) - - def test_legacy_sql(self): - legacy_sql = "SELECT id FROM [publicdata.samples.wikipedia] LIMIT 10" - - # Test that a legacy sql statement fails when - # setting dialect='standard' - with tm.assertRaises(gbq.GenericGBQException): - gbq.read_gbq(legacy_sql, project_id=_get_project_id(), - dialect='standard', - private_key=_get_private_key_path()) - - # Test that a legacy sql statement succeeds when - # setting dialect='legacy' - df = gbq.read_gbq(legacy_sql, project_id=_get_project_id(), - dialect='legacy', - private_key=_get_private_key_path()) - self.assertEqual(len(df.drop_duplicates()), 10) - - def test_standard_sql(self): - standard_sql = "SELECT DISTINCT id FROM " \ - "`publicdata.samples.wikipedia` LIMIT 10" - - # Test that a standard sql statement fails when using - # the legacy SQL dialect (default value) - with tm.assertRaises(gbq.GenericGBQException): - gbq.read_gbq(standard_sql, project_id=_get_project_id(), - private_key=_get_private_key_path()) - - # Test that a standard sql statement succeeds when - # setting dialect='standard' - df = gbq.read_gbq(standard_sql, project_id=_get_project_id(), - dialect='standard', - private_key=_get_private_key_path()) - self.assertEqual(len(df.drop_duplicates()), 10) - - def test_invalid_option_for_sql_dialect(self): - sql_statement = "SELECT DISTINCT id FROM " \ - "`publicdata.samples.wikipedia` LIMIT 10" - - # Test that an invalid option for `dialect` raises ValueError - with tm.assertRaises(ValueError): - gbq.read_gbq(sql_statement, project_id=_get_project_id(), - dialect='invalid', - private_key=_get_private_key_path()) - - # Test that a correct option for dialect succeeds - # to make sure ValueError was due to invalid dialect - gbq.read_gbq(sql_statement, project_id=_get_project_id(), - dialect='standard', private_key=_get_private_key_path()) - - def test_query_with_parameters(self): - sql_statement = "SELECT @param1 + @param2 AS valid_result" - config = { - 'query': { - "useLegacySql": False, - "parameterMode": "named", - "queryParameters": [ - { - "name": "param1", - "parameterType": { - "type": "INTEGER" - }, - "parameterValue": { - "value": 1 - } - }, - { - "name": "param2", - "parameterType": { - "type": "INTEGER" - }, - "parameterValue": { - "value": 2 - } - } - ] - } - } - # Test that a query that relies on parameters fails - # when parameters are not supplied via configuration - with tm.assertRaises(ValueError): - gbq.read_gbq(sql_statement, project_id=_get_project_id(), - private_key=_get_private_key_path()) - - # Test that the query is successful because we have supplied - # the correct query parameters via the 'config' option - df = gbq.read_gbq(sql_statement, project_id=_get_project_id(), - private_key=_get_private_key_path(), - configuration=config) - tm.assert_frame_equal(df, DataFrame({'valid_result': [3]})) - - def test_query_inside_configuration(self): - query_no_use = 'SELECT "PI_WRONG" AS valid_string' - query = 'SELECT "PI" AS valid_string' - config = { - 'query': { - "query": query, - "useQueryCache": False, - } - } - # Test that it can't pass query both - # inside config and as parameter - with tm.assertRaises(ValueError): - gbq.read_gbq(query_no_use, project_id=_get_project_id(), - private_key=_get_private_key_path(), - configuration=config) - - df = gbq.read_gbq(None, project_id=_get_project_id(), - private_key=_get_private_key_path(), - configuration=config) - tm.assert_frame_equal(df, DataFrame({'valid_string': ['PI']})) - - def test_configuration_without_query(self): - sql_statement = 'SELECT 1' - config = { - 'copy': { - "sourceTable": { - "projectId": _get_project_id(), - "datasetId": "publicdata:samples", - "tableId": "wikipedia" - }, - "destinationTable": { - "projectId": _get_project_id(), - "datasetId": "publicdata:samples", - "tableId": "wikipedia_copied" - }, - } - } - # Test that only 'query' configurations are supported - # nor 'copy','load','extract' - with tm.assertRaises(ValueError): - gbq.read_gbq(sql_statement, project_id=_get_project_id(), - private_key=_get_private_key_path(), - configuration=config) - - -@pytest.mark.xfail(run=False, reason="intermittent failures") +@pytest.mark.single class TestToGBQIntegrationWithServiceAccountKeyPath(tm.TestCase): - # Changes to BigQuery table schema may take up to 2 minutes as of May 2015 - # As a workaround to this issue, each test should use a unique table name. - # Make sure to modify the for loop range in the tearDownClass when a new - # test is added See `Issue 191 - # `__ @classmethod def setUpClass(cls): @@ -923,24 +105,10 @@ def setUpClass(cls): _skip_if_no_project_id() _skip_if_no_private_key_path() - _setup_common() clean_gbq_environment(_get_private_key_path()) - - gbq._Dataset(_get_project_id(), - private_key=_get_private_key_path() - ).create(DATASET_ID + "1") - - def setUp(self): - # - PER-TEST FIXTURES - - # put here any instruction you want to be run *BEFORE* *EVERY* test is - # executed. - - self.dataset = gbq._Dataset(_get_project_id(), - private_key=_get_private_key_path()) - self.table = gbq._Table(_get_project_id(), DATASET_ID + "1", - private_key=_get_private_key_path()) - self.sut = gbq.GbqConnector(_get_project_id(), - private_key=_get_private_key_path()) + pandas_gbq.gbq._Dataset(_get_project_id(), + private_key=_get_private_key_path() + ).create(DATASET_ID + "1") @classmethod def tearDownClass(cls): @@ -950,387 +118,19 @@ def tearDownClass(cls): clean_gbq_environment(_get_private_key_path()) - def tearDown(self): - # - PER-TEST FIXTURES - - # put here any instructions you want to be run *AFTER* *EVERY* test is - # executed. - pass - - def test_upload_data(self): + def test_roundtrip(self): destination_table = DESTINATION_TABLE + "1" test_size = 20001 df = make_mixed_dataframe_v2(test_size) - gbq.to_gbq(df, destination_table, _get_project_id(), chunksize=10000, - private_key=_get_private_key_path()) - - sleep(30) # <- Curses Google!!! - - result = gbq.read_gbq("SELECT COUNT(*) AS num_rows FROM {0}" - .format(destination_table), - project_id=_get_project_id(), - private_key=_get_private_key_path()) - self.assertEqual(result['num_rows'][0], test_size) - - def test_upload_data_if_table_exists_fail(self): - destination_table = DESTINATION_TABLE + "2" - - test_size = 10 - df = make_mixed_dataframe_v2(test_size) - self.table.create(TABLE_ID + "2", gbq._generate_bq_schema(df)) - - # Test the default value of if_exists is 'fail' - with tm.assertRaises(gbq.TableCreationError): - gbq.to_gbq(df, destination_table, _get_project_id(), - private_key=_get_private_key_path()) - - # Test the if_exists parameter with value 'fail' - with tm.assertRaises(gbq.TableCreationError): - gbq.to_gbq(df, destination_table, _get_project_id(), - if_exists='fail', private_key=_get_private_key_path()) - - def test_upload_data_if_table_exists_append(self): - destination_table = DESTINATION_TABLE + "3" - - test_size = 10 - df = make_mixed_dataframe_v2(test_size) - df_different_schema = tm.makeMixedDataFrame() - - # Initialize table with sample data - gbq.to_gbq(df, destination_table, _get_project_id(), chunksize=10000, - private_key=_get_private_key_path()) - - # Test the if_exists parameter with value 'append' - gbq.to_gbq(df, destination_table, _get_project_id(), - if_exists='append', private_key=_get_private_key_path()) - - sleep(30) # <- Curses Google!!! - - result = gbq.read_gbq("SELECT COUNT(*) AS num_rows FROM {0}" - .format(destination_table), - project_id=_get_project_id(), - private_key=_get_private_key_path()) - self.assertEqual(result['num_rows'][0], test_size * 2) - - # Try inserting with a different schema, confirm failure - with tm.assertRaises(gbq.InvalidSchema): - gbq.to_gbq(df_different_schema, destination_table, - _get_project_id(), if_exists='append', - private_key=_get_private_key_path()) - - def test_upload_data_if_table_exists_replace(self): - - destination_table = DESTINATION_TABLE + "4" - - test_size = 10 - df = make_mixed_dataframe_v2(test_size) - df_different_schema = tm.makeMixedDataFrame() - - # Initialize table with sample data - gbq.to_gbq(df, destination_table, _get_project_id(), chunksize=10000, - private_key=_get_private_key_path()) - - # Test the if_exists parameter with the value 'replace'. - gbq.to_gbq(df_different_schema, destination_table, - _get_project_id(), if_exists='replace', - private_key=_get_private_key_path()) - - sleep(30) # <- Curses Google!!! - - result = gbq.read_gbq("SELECT COUNT(*) AS num_rows FROM {0}" - .format(destination_table), - project_id=_get_project_id(), - private_key=_get_private_key_path()) - self.assertEqual(result['num_rows'][0], 5) - - @tm.slow - def test_google_upload_errors_should_raise_exception(self): - destination_table = DESTINATION_TABLE + "5" - - test_timestamp = datetime.now(pytz.timezone('US/Arizona')) - bad_df = DataFrame({'bools': [False, False], 'flts': [0.0, 1.0], - 'ints': [0, '1'], 'strs': ['a', 1], - 'times': [test_timestamp, test_timestamp]}, - index=range(2)) - - with tm.assertRaises(gbq.StreamingInsertError): - gbq.to_gbq(bad_df, destination_table, _get_project_id(), - verbose=True, private_key=_get_private_key_path()) - - def test_generate_schema(self): - df = tm.makeMixedDataFrame() - schema = gbq._generate_bq_schema(df) - - test_schema = {'fields': [{'name': 'A', 'type': 'FLOAT'}, - {'name': 'B', 'type': 'FLOAT'}, - {'name': 'C', 'type': 'STRING'}, - {'name': 'D', 'type': 'TIMESTAMP'}]} - - self.assertEqual(schema, test_schema) - - def test_create_table(self): - destination_table = TABLE_ID + "6" - test_schema = {'fields': [{'name': 'A', 'type': 'FLOAT'}, - {'name': 'B', 'type': 'FLOAT'}, - {'name': 'C', 'type': 'STRING'}, - {'name': 'D', 'type': 'TIMESTAMP'}]} - self.table.create(destination_table, test_schema) - self.assertTrue(self.table.exists(destination_table), - 'Expected table to exist') - - def test_table_does_not_exist(self): - self.assertTrue(not self.table.exists(TABLE_ID + "7"), - 'Expected table not to exist') - - def test_delete_table(self): - destination_table = TABLE_ID + "8" - test_schema = {'fields': [{'name': 'A', 'type': 'FLOAT'}, - {'name': 'B', 'type': 'FLOAT'}, - {'name': 'C', 'type': 'STRING'}, - {'name': 'D', 'type': 'TIMESTAMP'}]} - self.table.create(destination_table, test_schema) - self.table.delete(destination_table) - self.assertTrue(not self.table.exists( - destination_table), 'Expected table not to exist') - - def test_list_table(self): - destination_table = TABLE_ID + "9" - test_schema = {'fields': [{'name': 'A', 'type': 'FLOAT'}, - {'name': 'B', 'type': 'FLOAT'}, - {'name': 'C', 'type': 'STRING'}, - {'name': 'D', 'type': 'TIMESTAMP'}]} - self.table.create(destination_table, test_schema) - self.assertTrue( - destination_table in self.dataset.tables(DATASET_ID + "1"), - 'Expected table list to contain table {0}' - .format(destination_table)) - - def test_verify_schema_allows_flexible_column_order(self): - destination_table = TABLE_ID + "10" - test_schema_1 = {'fields': [{'name': 'A', 'type': 'FLOAT'}, - {'name': 'B', 'type': 'FLOAT'}, - {'name': 'C', 'type': 'STRING'}, - {'name': 'D', 'type': 'TIMESTAMP'}]} - test_schema_2 = {'fields': [{'name': 'A', 'type': 'FLOAT'}, - {'name': 'C', 'type': 'STRING'}, - {'name': 'B', 'type': 'FLOAT'}, - {'name': 'D', 'type': 'TIMESTAMP'}]} - - self.table.create(destination_table, test_schema_1) - self.assertTrue(self.sut.verify_schema( - DATASET_ID + "1", destination_table, test_schema_2), - 'Expected schema to match') - - def test_verify_schema_fails_different_data_type(self): - destination_table = TABLE_ID + "11" - test_schema_1 = {'fields': [{'name': 'A', 'type': 'FLOAT'}, - {'name': 'B', 'type': 'FLOAT'}, - {'name': 'C', 'type': 'STRING'}, - {'name': 'D', 'type': 'TIMESTAMP'}]} - test_schema_2 = {'fields': [{'name': 'A', 'type': 'FLOAT'}, - {'name': 'B', 'type': 'STRING'}, - {'name': 'C', 'type': 'STRING'}, - {'name': 'D', 'type': 'TIMESTAMP'}]} - - self.table.create(destination_table, test_schema_1) - self.assertFalse(self.sut.verify_schema( - DATASET_ID + "1", destination_table, test_schema_2), - 'Expected different schema') - - def test_verify_schema_fails_different_structure(self): - destination_table = TABLE_ID + "12" - test_schema_1 = {'fields': [{'name': 'A', 'type': 'FLOAT'}, - {'name': 'B', 'type': 'FLOAT'}, - {'name': 'C', 'type': 'STRING'}, - {'name': 'D', 'type': 'TIMESTAMP'}]} - test_schema_2 = {'fields': [{'name': 'A', 'type': 'FLOAT'}, - {'name': 'B2', 'type': 'FLOAT'}, - {'name': 'C', 'type': 'STRING'}, - {'name': 'D', 'type': 'TIMESTAMP'}]} - - self.table.create(destination_table, test_schema_1) - self.assertFalse(self.sut.verify_schema( - DATASET_ID + "1", destination_table, test_schema_2), - 'Expected different schema') - - def test_upload_data_flexible_column_order(self): - destination_table = DESTINATION_TABLE + "13" - - test_size = 10 - df = make_mixed_dataframe_v2(test_size) - - # Initialize table with sample data - gbq.to_gbq(df, destination_table, _get_project_id(), chunksize=10000, - private_key=_get_private_key_path()) - - df_columns_reversed = df[df.columns[::-1]] - - gbq.to_gbq(df_columns_reversed, destination_table, _get_project_id(), - if_exists='append', private_key=_get_private_key_path()) - - def test_list_dataset(self): - dataset_id = DATASET_ID + "1" - self.assertTrue(dataset_id in self.dataset.datasets(), - 'Expected dataset list to contain dataset {0}' - .format(dataset_id)) - - def test_list_table_zero_results(self): - dataset_id = DATASET_ID + "2" - self.dataset.create(dataset_id) - table_list = gbq._Dataset(_get_project_id(), - private_key=_get_private_key_path() - ).tables(dataset_id) - self.assertEqual(len(table_list), 0, - 'Expected gbq.list_table() to return 0') - - def test_create_dataset(self): - dataset_id = DATASET_ID + "3" - self.dataset.create(dataset_id) - self.assertTrue(dataset_id in self.dataset.datasets(), - 'Expected dataset to exist') - - def test_delete_dataset(self): - dataset_id = DATASET_ID + "4" - self.dataset.create(dataset_id) - self.dataset.delete(dataset_id) - self.assertTrue(dataset_id not in self.dataset.datasets(), - 'Expected dataset not to exist') - - def test_dataset_exists(self): - dataset_id = DATASET_ID + "5" - self.dataset.create(dataset_id) - self.assertTrue(self.dataset.exists(dataset_id), - 'Expected dataset to exist') - - def create_table_data_dataset_does_not_exist(self): - dataset_id = DATASET_ID + "6" - table_id = TABLE_ID + "1" - table_with_new_dataset = gbq._Table(_get_project_id(), dataset_id) - df = make_mixed_dataframe_v2(10) - table_with_new_dataset.create(table_id, gbq._generate_bq_schema(df)) - self.assertTrue(self.dataset.exists(dataset_id), - 'Expected dataset to exist') - self.assertTrue(table_with_new_dataset.exists( - table_id), 'Expected dataset to exist') - - def test_dataset_does_not_exist(self): - self.assertTrue(not self.dataset.exists( - DATASET_ID + "_not_found"), 'Expected dataset not to exist') - - -@pytest.mark.xfail(run=False, reason="intermittent failures") -class TestToGBQIntegrationWithLocalUserAccountAuth(tm.TestCase): - # Changes to BigQuery table schema may take up to 2 minutes as of May 2015 - # As a workaround to this issue, each test should use a unique table name. - # Make sure to modify the for loop range in the tearDownClass when a new - # test is added - # See `Issue 191 - # `__ - - @classmethod - def setUpClass(cls): - # - GLOBAL CLASS FIXTURES - - # put here any instruction you want to execute only *ONCE* *BEFORE* - # executing *ALL* tests described below. - - _skip_if_no_project_id() - _skip_local_auth_if_in_travis_env() - - _setup_common() - clean_gbq_environment() - - def setUp(self): - # - PER-TEST FIXTURES - - # put here any instruction you want to be run *BEFORE* *EVERY* test - # is executed. - pass - - @classmethod - def tearDownClass(cls): - # - GLOBAL CLASS FIXTURES - - # put here any instruction you want to execute only *ONCE* *AFTER* - # executing all tests. - - clean_gbq_environment() - - def tearDown(self): - # - PER-TEST FIXTURES - - # put here any instructions you want to be run *AFTER* *EVERY* test - # is executed. - pass - - def test_upload_data(self): - destination_table = "{0}.{1}".format(DATASET_ID + "2", TABLE_ID + "1") - - test_size = 10 - df = make_mixed_dataframe_v2(test_size) - - gbq.to_gbq(df, destination_table, _get_project_id(), chunksize=10000) - - sleep(30) # <- Curses Google!!! - - result = gbq.read_gbq( - "SELECT COUNT(*) AS num_rows FROM {0}".format(destination_table), - project_id=_get_project_id()) - - self.assertEqual(result['num_rows'][0], test_size) - - -@pytest.mark.xfail(run=False, reason="intermittent failures") -class TestToGBQIntegrationWithServiceAccountKeyContents(tm.TestCase): - # Changes to BigQuery table schema may take up to 2 minutes as of May 2015 - # As a workaround to this issue, each test should use a unique table name. - # Make sure to modify the for loop range in the tearDownClass when a new - # test is added - # See `Issue 191 - # `__ - - @classmethod - def setUpClass(cls): - # - GLOBAL CLASS FIXTURES - - # put here any instruction you want to execute only *ONCE* *BEFORE* - # executing *ALL* tests described below. - - _setup_common() - _skip_if_no_project_id() - _skip_if_no_private_key_contents() - - clean_gbq_environment(_get_private_key_contents()) - - def setUp(self): - # - PER-TEST FIXTURES - - # put here any instruction you want to be run *BEFORE* *EVERY* test - # is executed. - pass - - @classmethod - def tearDownClass(cls): - # - GLOBAL CLASS FIXTURES - - # put here any instruction you want to execute only *ONCE* *AFTER* - # executing all tests. - - clean_gbq_environment(_get_private_key_contents()) - - def tearDown(self): - # - PER-TEST FIXTURES - - # put here any instructions you want to be run *AFTER* *EVERY* test - # is executed. - pass - - def test_upload_data(self): - destination_table = "{0}.{1}".format(DATASET_ID + "3", TABLE_ID + "1") - - test_size = 10 - df = make_mixed_dataframe_v2(test_size) - - gbq.to_gbq(df, destination_table, _get_project_id(), chunksize=10000, - private_key=_get_private_key_contents()) + df.to_gbq(destination_table, _get_project_id(), chunksize=10000, + private_key=_get_private_key_path()) sleep(30) # <- Curses Google!!! - result = gbq.read_gbq( - "SELECT COUNT(*) AS num_rows FROM {0}".format(destination_table), - project_id=_get_project_id(), - private_key=_get_private_key_contents()) + result = pd.read_gbq("SELECT COUNT(*) AS num_rows FROM {0}" + .format(destination_table), + project_id=_get_project_id(), + private_key=_get_private_key_path()) self.assertEqual(result['num_rows'][0], test_size) diff --git a/pandas/util/decorators.py b/pandas/util/decorators.py index 1b501eb1d9bda..d966d6b7a1b32 100644 --- a/pandas/util/decorators.py +++ b/pandas/util/decorators.py @@ -3,7 +3,7 @@ import sys import warnings from textwrap import dedent -from functools import wraps +from functools import wraps, update_wrapper def deprecate(name, alternative, alt_name=None): @@ -233,3 +233,39 @@ def make_signature(func): if spec.keywords: args.append('**' + spec.keywords) return args, spec.args + + +class docstring_wrapper(object): + """ + decorator to wrap a function, + provide a dynamically evaluated doc-string + + Parameters + ---------- + func : callable + creator : callable + return the doc-string + default : str, optional + return this doc-string on error + """ + _attrs = ['__module__', '__name__', + '__qualname__', '__annotations__'] + + def __init__(self, func, creator, default=None): + self.func = func + self.creator = creator + self.default = default + update_wrapper( + self, func, [attr for attr in self._attrs + if hasattr(func, attr)]) + + def __call__(self, func, *args, **kwargs): + return self.func(*args, **kwargs) + + @property + def __doc__(self): + try: + return self.creator() + except Exception as exc: + msg = self.default or str(exc) + return msg diff --git a/pandas/util/print_versions.py b/pandas/util/print_versions.py index b0f5d3994ed64..ca75d4d02e927 100644 --- a/pandas/util/print_versions.py +++ b/pandas/util/print_versions.py @@ -88,13 +88,12 @@ def show_versions(as_json=False): ("lxml", lambda mod: mod.etree.__version__), ("bs4", lambda mod: mod.__version__), ("html5lib", lambda mod: mod.__version__), - ("httplib2", lambda mod: mod.__version__), - ("apiclient", lambda mod: mod.__version__), ("sqlalchemy", lambda mod: mod.__version__), ("pymysql", lambda mod: mod.__version__), ("psycopg2", lambda mod: mod.__version__), ("jinja2", lambda mod: mod.__version__), ("s3fs", lambda mod: mod.__version__), + ("pandas_gbq", lambda mod: mod.__version__), ("pandas_datareader", lambda mod: mod.__version__) ] From 251826f0861159160bd1d51eafadb6e0b4161f77 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Mon, 27 Feb 2017 14:51:20 -0500 Subject: [PATCH 109/933] BUG: GH15429 transform result of timedelta from datetime The transform() operation needs to return a like-indexed. To facilitate this, transform starts with a copy of the original series. Then, after the computation for each group, sets the appropriate elements of the copied series equal to the result. At that point is does a type comparison, and discovers that the timedelta is not cast- able to a datetime. closes #10972 Author: Jeff Reback Author: Stephen Rauch Closes #15430 from stephenrauch/group-by-transform-timedelta-from-datetime and squashes the following commits: c3b0dd0 [Jeff Reback] PEP fix 2f48549 [Jeff Reback] fixup slow transforms cc43503 [Stephen Rauch] BUG: GH15429 transform result of timedelta from datetime --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/core/groupby.py | 34 +++++++++++----------- pandas/tests/groupby/test_filters.py | 1 + pandas/tests/groupby/test_transform.py | 39 +++++++++++++++++++++++++- 4 files changed, 57 insertions(+), 18 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index f0e4176472861..7b32cee7f7064 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -637,6 +637,7 @@ Bug Fixes - Bug in ``.read_csv()`` with ``parse_dates`` when multiline headers are specified (:issue:`15376`) +- Bug in ``groupby.transform()`` that would coerce the resultant dtypes back to the original (:issue:`10972`) - Bug in ``DataFrame.hist`` where ``plt.tight_layout`` caused an ``AttributeError`` (use ``matplotlib >= 0.2.0``) (:issue:`9351`) - Bug in ``DataFrame.boxplot`` where ``fontsize`` was not applied to the tick labels on both axes (:issue:`15108`) diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 831ca3886773e..2c61a73d6814e 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -2890,32 +2890,32 @@ def transform(self, func, *args, **kwargs): lambda: getattr(self, func)(*args, **kwargs)) # reg transform - dtype = self._selected_obj.dtype - result = self._selected_obj.values.copy() - + klass = self._selected_obj.__class__ + results = [] wrapper = lambda x: func(x, *args, **kwargs) - for i, (name, group) in enumerate(self): + for name, group in self: object.__setattr__(group, 'name', name) res = wrapper(group) if hasattr(res, 'values'): res = res.values - # may need to astype - try: - common_type = np.common_type(np.array(res), result) - if common_type != result.dtype: - result = result.astype(common_type) - except: - pass - indexer = self._get_index(name) - result[indexer] = res + s = klass(res, indexer) + results.append(s) - result = _possibly_downcast_to_dtype(result, dtype) - return self._selected_obj.__class__(result, - index=self._selected_obj.index, - name=self._selected_obj.name) + from pandas.tools.concat import concat + result = concat(results).sort_index() + + # we will only try to coerce the result type if + # we have a numeric dtype + dtype = self._selected_obj.dtype + if is_numeric_dtype(dtype): + result = _possibly_downcast_to_dtype(result, dtype) + + result.name = self._selected_obj.name + result.index = self._selected_obj.index + return result def _transform_fast(self, func): """ diff --git a/pandas/tests/groupby/test_filters.py b/pandas/tests/groupby/test_filters.py index 46ddb5a5318fb..de6757786a363 100644 --- a/pandas/tests/groupby/test_filters.py +++ b/pandas/tests/groupby/test_filters.py @@ -216,6 +216,7 @@ def test_filter_against_workaround(self): grouper = s.apply(lambda x: np.round(x, -1)) grouped = s.groupby(grouper) f = lambda x: x.mean() > 10 + old_way = s[grouped.transform(f).astype('bool')] new_way = grouped.filter(f) assert_series_equal(new_way.sort_values(), old_way.sort_values()) diff --git a/pandas/tests/groupby/test_transform.py b/pandas/tests/groupby/test_transform.py index cf5e9eb26ff13..51920ec642705 100644 --- a/pandas/tests/groupby/test_transform.py +++ b/pandas/tests/groupby/test_transform.py @@ -3,7 +3,7 @@ import numpy as np import pandas as pd from pandas.util import testing as tm -from pandas import Series, DataFrame, Timestamp, MultiIndex, concat +from pandas import Series, DataFrame, Timestamp, MultiIndex, concat, date_range from pandas.types.common import _ensure_platform_int from .common import MixIn, assert_fp_equal @@ -190,6 +190,43 @@ def test_transform_bug(self): expected = Series(np.arange(5, 0, step=-1), name='B') assert_series_equal(result, expected) + def test_transform_datetime_to_timedelta(self): + # GH 15429 + # transforming a datetime to timedelta + df = DataFrame(dict(A=Timestamp('20130101'), B=np.arange(5))) + expected = pd.Series([ + Timestamp('20130101') - Timestamp('20130101')] * 5, name='A') + + # this does date math without changing result type in transform + base_time = df['A'][0] + result = df.groupby('A')['A'].transform( + lambda x: x.max() - x.min() + base_time) - base_time + assert_series_equal(result, expected) + + # this does date math and causes the transform to return timedelta + result = df.groupby('A')['A'].transform(lambda x: x.max() - x.min()) + assert_series_equal(result, expected) + + def test_transform_datetime_to_numeric(self): + # GH 10972 + # convert dt to float + df = DataFrame({ + 'a': 1, 'b': date_range('2015-01-01', periods=2, freq='D')}) + result = df.groupby('a').b.transform( + lambda x: x.dt.dayofweek - x.dt.dayofweek.mean()) + + expected = Series([-0.5, 0.5], name='b') + assert_series_equal(result, expected) + + # convert dt to int + df = DataFrame({ + 'a': 1, 'b': date_range('2015-01-01', periods=2, freq='D')}) + result = df.groupby('a').b.transform( + lambda x: x.dt.dayofweek - x.dt.dayofweek.min()) + + expected = Series([0, 1], name='b') + assert_series_equal(result, expected) + def test_transform_multiple(self): grouped = self.ts.groupby([lambda x: x.year, lambda x: x.month]) From 61fa8bed7c3aa620828923098c64af5610e0f9e3 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Mon, 27 Feb 2017 15:07:49 -0500 Subject: [PATCH 110/933] BUG: fix groupby.aggregate resulting dtype coercion, xref #11444, #13046 make sure .size includes the name of the grouped --- doc/source/whatsnew/v0.20.0.txt | 4 ++-- pandas/core/groupby.py | 23 ++++++++++++++------ pandas/tests/groupby/test_aggregate.py | 23 ++++++++++++++++++++ pandas/tests/groupby/test_transform.py | 29 +++++++++++++++++++++++++- pandas/tests/tseries/test_resample.py | 6 ++---- 5 files changed, 72 insertions(+), 13 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 7b32cee7f7064..9b4e6fbe3be10 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -632,12 +632,12 @@ Bug Fixes - Bug in ``DataFrame.to_stata()`` and ``StataWriter`` which produces incorrectly formatted files to be produced for some locales (:issue:`13856`) - Bug in ``pd.concat()`` in which concatting with an empty dataframe with ``join='inner'`` was being improperly handled (:issue:`15328`) -- Bug in ``groupby.agg()`` incorrectly localizing timezone on ``datetime`` (:issue:`15426`, :issue:`10668`) +- Bug in ``groupby.agg()`` incorrectly localizing timezone on ``datetime`` (:issue:`15426`, :issue:`10668`, :issue:`13046`) - Bug in ``.read_csv()`` with ``parse_dates`` when multiline headers are specified (:issue:`15376`) -- Bug in ``groupby.transform()`` that would coerce the resultant dtypes back to the original (:issue:`10972`) +- Bug in ``groupby.transform()`` that would coerce the resultant dtypes back to the original (:issue:`10972`, :issue:`11444`) - Bug in ``DataFrame.hist`` where ``plt.tight_layout`` caused an ``AttributeError`` (use ``matplotlib >= 0.2.0``) (:issue:`9351`) - Bug in ``DataFrame.boxplot`` where ``fontsize`` was not applied to the tick labels on both axes (:issue:`15108`) diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 2c61a73d6814e..3828e5dac5729 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -767,11 +767,14 @@ def _index_with_as_index(self, b): new.names = gp.names + original.names return new - def _try_cast(self, result, obj): + def _try_cast(self, result, obj, numeric_only=False): """ try to cast the result to our obj original type, we may have roundtripped thru object in the mean-time + if numeric_only is True, then only try to cast numerics + and not datetimelikes + """ if obj.ndim > 1: dtype = obj.values.dtype @@ -779,7 +782,8 @@ def _try_cast(self, result, obj): dtype = obj.dtype if not is_scalar(result): - result = _possibly_downcast_to_dtype(result, dtype) + if numeric_only and is_numeric_dtype(dtype) or not numeric_only: + result = _possibly_downcast_to_dtype(result, dtype) return result @@ -830,7 +834,7 @@ def _python_agg_general(self, func, *args, **kwargs): for name, obj in self._iterate_slices(): try: result, counts = self.grouper.agg_series(obj, f) - output[name] = self._try_cast(result, obj) + output[name] = self._try_cast(result, obj, numeric_only=True) except TypeError: continue @@ -1117,7 +1121,11 @@ def sem(self, ddof=1): @Appender(_doc_template) def size(self): """Compute group sizes""" - return self.grouper.size() + result = self.grouper.size() + + if isinstance(self.obj, Series): + result.name = getattr(self, 'name', None) + return result sum = _groupby_function('sum', 'add', np.sum) prod = _groupby_function('prod', 'prod', np.prod) @@ -1689,7 +1697,9 @@ def size(self): ids, _, ngroup = self.group_info ids = _ensure_platform_int(ids) out = np.bincount(ids[ids != -1], minlength=ngroup or None) - return Series(out, index=self.result_index, dtype='int64') + return Series(out, + index=self.result_index, + dtype='int64') @cache_readonly def _max_groupsize(self): @@ -2908,7 +2918,8 @@ def transform(self, func, *args, **kwargs): result = concat(results).sort_index() # we will only try to coerce the result type if - # we have a numeric dtype + # we have a numeric dtype, as these are *always* udfs + # the cython take a different path (and casting) dtype = self._selected_obj.dtype if is_numeric_dtype(dtype): result = _possibly_downcast_to_dtype(result, dtype) diff --git a/pandas/tests/groupby/test_aggregate.py b/pandas/tests/groupby/test_aggregate.py index cb739546a2312..52b35048b6762 100644 --- a/pandas/tests/groupby/test_aggregate.py +++ b/pandas/tests/groupby/test_aggregate.py @@ -154,6 +154,29 @@ def test_agg_dict_parameter_cast_result_dtypes(self): assert_series_equal(grouped.time.last(), exp['time']) assert_series_equal(grouped.time.agg('last'), exp['time']) + # count + exp = pd.Series([2, 2, 2, 2], + index=Index(list('ABCD'), name='class'), + name='time') + assert_series_equal(grouped.time.agg(len), exp) + assert_series_equal(grouped.time.size(), exp) + + exp = pd.Series([0, 1, 1, 2], + index=Index(list('ABCD'), name='class'), + name='time') + assert_series_equal(grouped.time.count(), exp) + + def test_agg_cast_results_dtypes(self): + # similar to GH12821 + # xref #11444 + u = [datetime(2015, x + 1, 1) for x in range(12)] + v = list('aaabbbbbbccd') + df = pd.DataFrame({'X': v, 'Y': u}) + + result = df.groupby('X')['Y'].agg(len) + expected = df.groupby('X')['Y'].count() + assert_series_equal(result, expected) + def test_agg_must_agg(self): grouped = self.df.groupby('A')['C'] self.assertRaises(Exception, grouped.agg, lambda x: x.describe()) diff --git a/pandas/tests/groupby/test_transform.py b/pandas/tests/groupby/test_transform.py index 51920ec642705..2d21eab5822fe 100644 --- a/pandas/tests/groupby/test_transform.py +++ b/pandas/tests/groupby/test_transform.py @@ -4,7 +4,8 @@ import pandas as pd from pandas.util import testing as tm from pandas import Series, DataFrame, Timestamp, MultiIndex, concat, date_range -from pandas.types.common import _ensure_platform_int +from pandas.types.common import _ensure_platform_int, is_timedelta64_dtype +from pandas.compat import StringIO from .common import MixIn, assert_fp_equal from pandas.util.testing import assert_frame_equal, assert_series_equal @@ -227,6 +228,32 @@ def test_transform_datetime_to_numeric(self): expected = Series([0, 1], name='b') assert_series_equal(result, expected) + def test_transform_casting(self): + # 13046 + data = """ + idx A ID3 DATETIME + 0 B-028 b76cd912ff "2014-10-08 13:43:27" + 1 B-054 4a57ed0b02 "2014-10-08 14:26:19" + 2 B-076 1a682034f8 "2014-10-08 14:29:01" + 3 B-023 b76cd912ff "2014-10-08 18:39:34" + 4 B-023 f88g8d7sds "2014-10-08 18:40:18" + 5 B-033 b76cd912ff "2014-10-08 18:44:30" + 6 B-032 b76cd912ff "2014-10-08 18:46:00" + 7 B-037 b76cd912ff "2014-10-08 18:52:15" + 8 B-046 db959faf02 "2014-10-08 18:59:59" + 9 B-053 b76cd912ff "2014-10-08 19:17:48" + 10 B-065 b76cd912ff "2014-10-08 19:21:38" + """ + df = pd.read_csv(StringIO(data), sep='\s+', + index_col=[0], parse_dates=['DATETIME']) + + result = df.groupby('ID3')['DATETIME'].transform(lambda x: x.diff()) + assert is_timedelta64_dtype(result.dtype) + + result = df[['ID3', 'DATETIME']].groupby('ID3').transform( + lambda x: x.diff()) + assert is_timedelta64_dtype(result.DATETIME.dtype) + def test_transform_multiple(self): grouped = self.ts.groupby([lambda x: x.year, lambda x: x.month]) diff --git a/pandas/tests/tseries/test_resample.py b/pandas/tests/tseries/test_resample.py index 6e999c5b1d276..1535bd665fe8b 100755 --- a/pandas/tests/tseries/test_resample.py +++ b/pandas/tests/tseries/test_resample.py @@ -757,10 +757,8 @@ def test_resample_empty_series(self): freq in ['M', 'D']): # GH12871 - TODO: name should propagate, but currently # doesn't on lower / same frequency with PeriodIndex - assert_series_equal(result, expected, check_dtype=False, - check_names=False) - # this assert will break when fixed - self.assertTrue(result.name is None) + assert_series_equal(result, expected, check_dtype=False) + else: assert_series_equal(result, expected, check_dtype=False) From e0647ba059e57cd391cba1296d7cc039e7b2fc7e Mon Sep 17 00:00:00 2001 From: "Dr. Irv" Date: Mon, 27 Feb 2017 16:43:01 -0500 Subject: [PATCH 111/933] DOC: Update contributing for test_fast, fix doc Windows build (#15523) * DOC: Update contributing for test_fast, fix doc Windows build * add pip install for xdist --- doc/make.py | 4 ++-- doc/source/contributing.rst | 29 ++++++++++++++++++++--------- test_fast.bat | 3 +++ 3 files changed, 25 insertions(+), 11 deletions(-) create mode 100644 test_fast.bat diff --git a/doc/make.py b/doc/make.py index d46be2611ce3d..8a6d4e5df24f0 100755 --- a/doc/make.py +++ b/doc/make.py @@ -202,8 +202,8 @@ def html(): raise SystemExit("Building HTML failed.") try: # remove stale file - os.system('rm source/html-styling.html') - os.system('cd build; rm -f html/pandas.zip;') + os.remove('source/html-styling.html') + os.remove('build/html/pandas.zip') except: pass diff --git a/doc/source/contributing.rst b/doc/source/contributing.rst index 2f838a3ab2386..83f99b4f01b26 100644 --- a/doc/source/contributing.rst +++ b/doc/source/contributing.rst @@ -520,15 +520,6 @@ submitting code to run the check yourself on the diff:: git diff master | flake8 --diff -Furthermore, we've written a tool to check that your commits are PEP8 great, `pip install pep8radius -`_. Look at PEP8 fixes in your branch vs master with:: - - pep8radius master --diff - -and make these changes with:: - - pep8radius master --diff --in-place - Backwards Compatibility ~~~~~~~~~~~~~~~~~~~~~~~ @@ -611,6 +602,26 @@ Or with one of the following constructs:: pytest pandas/tests/[test-module].py::[TestClass] pytest pandas/tests/[test-module].py::[TestClass]::[test_method] +Using `pytest-xdist `_, one can +speed up local testing on multicore machines. To use this feature, you will +need to install `pytest-xdist` via:: + + pip install pytest-xdist + +Two scripts are provided to assist with this. These scripts distribute +testing across 4 threads. + +On Unix variants, one can type:: + + test_fast.sh + +On Windows, one can type:: + + test_fast.bat + +This can significantly reduce the time it takes to locally run tests before +submitting a pull request. + For more, see the `pytest `_ documentation. .. versionadded:: 0.20.0 diff --git a/test_fast.bat b/test_fast.bat new file mode 100644 index 0000000000000..17dc54b580137 --- /dev/null +++ b/test_fast.bat @@ -0,0 +1,3 @@ +:: test on windows +set PYTHONHASHSEED=314159265 +pytest --skip-slow --skip-network -m "not single" -n 4 pandas From edd29390403baf9fc3de577871d6472c52a6ca80 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Mon, 27 Feb 2017 20:18:21 -0500 Subject: [PATCH 112/933] BUG: fix to_gbq calling convention; now its a bound method of DataFrame xref #15484 --- pandas/core/frame.py | 16 +++++++++++----- pandas/util/decorators.py | 2 +- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 7b02926ea8837..0963d14762ce5 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -942,11 +942,6 @@ def to_gbq(self, destination_table, project_id, chunksize=10000, chunksize=chunksize, verbose=verbose, reauth=reauth, if_exists=if_exists, private_key=private_key) - def _f(): - from pandas.io.gbq import _try_import - return _try_import().to_gbq.__doc__ - to_gbq = docstring_wrapper(to_gbq, _f) - @classmethod def from_records(cls, data, index=None, exclude=None, columns=None, coerce_float=False, nrows=None): @@ -5430,6 +5425,17 @@ def combineMult(self, other): _EMPTY_SERIES = Series([]) +# patch in the doc-string for to_gbq +# and bind this method +def _f(): + from pandas.io.gbq import _try_import + return _try_import().to_gbq.__doc__ + + +DataFrame.to_gbq = types.MethodType(docstring_wrapper(DataFrame.to_gbq, _f), + DataFrame) + + def _arrays_to_mgr(arrays, arr_names, index, columns, dtype=None): """ Segregate Series based on type and coerce into matrices. diff --git a/pandas/util/decorators.py b/pandas/util/decorators.py index d966d6b7a1b32..d1ca480f7a568 100644 --- a/pandas/util/decorators.py +++ b/pandas/util/decorators.py @@ -259,7 +259,7 @@ def __init__(self, func, creator, default=None): self, func, [attr for attr in self._attrs if hasattr(func, attr)]) - def __call__(self, func, *args, **kwargs): + def __call__(self, *args, **kwargs): return self.func(*args, **kwargs) @property From 23889d3ec8396925269210d6d5782574e61769bd Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Mon, 27 Feb 2017 23:43:16 -0500 Subject: [PATCH 113/933] BUG: fix calling convention for to_gbq, take 2 --- pandas/core/frame.py | 3 +-- pandas/util/decorators.py | 5 +++++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 0963d14762ce5..c47490bfbede4 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5432,8 +5432,7 @@ def _f(): return _try_import().to_gbq.__doc__ -DataFrame.to_gbq = types.MethodType(docstring_wrapper(DataFrame.to_gbq, _f), - DataFrame) +DataFrame.to_gbq = docstring_wrapper(DataFrame.to_gbq, _f) def _arrays_to_mgr(arrays, arr_names, index, columns, dtype=None): diff --git a/pandas/util/decorators.py b/pandas/util/decorators.py index d1ca480f7a568..ee7e2f4302b10 100644 --- a/pandas/util/decorators.py +++ b/pandas/util/decorators.py @@ -1,5 +1,6 @@ from pandas.compat import StringIO, callable, signature from pandas.lib import cache_readonly # noqa +import types import sys import warnings from textwrap import dedent @@ -259,6 +260,10 @@ def __init__(self, func, creator, default=None): self, func, [attr for attr in self._attrs if hasattr(func, attr)]) + def __get__(self, instance, cls=None): + # we want to return the actual passed instance + return types.MethodType(self, instance) + def __call__(self, *args, **kwargs): return self.func(*args, **kwargs) From 7b84eb603d3a3d62f0a7cf9483acac5c168b7533 Mon Sep 17 00:00:00 2001 From: Sam Foo Date: Tue, 28 Feb 2017 02:43:22 -0800 Subject: [PATCH 114/933] DEPR: rename consolidate to _consolidate and create deprecation warning (#15501) --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/core/generic.py | 14 +++++++++++--- pandas/core/groupby.py | 4 ++-- pandas/io/pytables.py | 6 +++--- pandas/tests/frame/test_block_internals.py | 11 ++++++++--- pandas/tests/frame/test_nonunique_indexes.py | 2 +- pandas/tests/io/test_pytables.py | 14 +++++++------- pandas/tests/test_generic.py | 2 +- pandas/tests/test_panel4d.py | 2 +- pandas/tools/concat.py | 2 +- pandas/tseries/resample.py | 2 +- 11 files changed, 37 insertions(+), 23 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 9b4e6fbe3be10..f91ffcdb81f9b 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -493,6 +493,7 @@ Deprecations - ``DataFrame.astype()`` has deprecated the ``raise_on_error`` parameter in favor of ``errors`` (:issue:`14878`) - ``Series.sortlevel`` and ``DataFrame.sortlevel`` have been deprecated in favor of ``Series.sort_index`` and ``DataFrame.sort_index`` (:issue:`15099`) - importing ``concat`` from ``pandas.tools.merge`` has been deprecated in favor of imports from the ``pandas`` namespace. This should only affect explict imports (:issue:`15358`) +- ``Series/DataFrame/Panel.consolidate()`` been deprecated as a public method. (:issue:`15483`) .. _whatsnew_0200.prior_deprecations: diff --git a/pandas/core/generic.py b/pandas/core/generic.py index cdc37e00f70e0..127aac970fbc1 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2875,11 +2875,10 @@ def f(): self._protect_consolidate(f) - def consolidate(self, inplace=False): + def _consolidate(self, inplace=False): """ Compute NDFrame with "consolidated" internals (data of each dtype - grouped together in a single ndarray). Mainly an internal API function, - but available here to the savvy user + grouped together in a single ndarray). Parameters ---------- @@ -2898,6 +2897,15 @@ def consolidate(self, inplace=False): cons_data = self._protect_consolidate(f) return self._constructor(cons_data).__finalize__(self) + def consolidate(self, inplace=False): + """ + DEPRECATED: consolidate will be an internal implementation only. + """ + # 15483 + warnings.warn("consolidate is deprecated and will be removed in a " + "future release.", FutureWarning, stacklevel=2) + return self._consolidate(inplace) + @property def _is_mixed_type(self): f = lambda: self._data.is_mixed_type diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 3828e5dac5729..381a8edcb5192 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -3904,7 +3904,7 @@ def _wrap_aggregated_output(self, output, names=None): if not self.as_index: result = DataFrame(output, columns=output_keys) self._insert_inaxis_grouper_inplace(result) - result = result.consolidate() + result = result._consolidate() else: index = self.grouper.result_index result = DataFrame(output, index=index, columns=output_keys) @@ -3924,7 +3924,7 @@ def _wrap_agged_blocks(self, items, blocks): result = DataFrame(mgr) self._insert_inaxis_grouper_inplace(result) - result = result.consolidate() + result = result._consolidate() else: index = self.grouper.result_index mgr = BlockManager(blocks, [items, index]) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 65ac4e5654dce..06154a86f95fa 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -835,7 +835,7 @@ def func(_start, _stop, _where): # concat and return return concat(objs, axis=axis, - verify_integrity=False).consolidate() + verify_integrity=False)._consolidate() # create the iterator it = TableIterator(self, s, func, where=where, nrows=nrows, @@ -3442,7 +3442,7 @@ def get_blk_items(mgr, blocks): return [mgr.items.take(blk.mgr_locs) for blk in blocks] # figure out data_columns and get out blocks - block_obj = self.get_object(obj).consolidate() + block_obj = self.get_object(obj)._consolidate() blocks = block_obj._data.blocks blk_items = get_blk_items(block_obj._data, blocks) if len(self.non_index_axes): @@ -3809,7 +3809,7 @@ def read(self, where=None, columns=None, **kwargs): if len(objs) == 1: wp = objs[0] else: - wp = concat(objs, axis=0, verify_integrity=False).consolidate() + wp = concat(objs, axis=0, verify_integrity=False)._consolidate() # apply the selection filters & axis orderings wp = self.process_axes(wp, columns=columns) diff --git a/pandas/tests/frame/test_block_internals.py b/pandas/tests/frame/test_block_internals.py index 7b64dea8c102d..accd3ddeb03d7 100644 --- a/pandas/tests/frame/test_block_internals.py +++ b/pandas/tests/frame/test_block_internals.py @@ -40,19 +40,24 @@ def test_cast_internals(self): def test_consolidate(self): self.frame['E'] = 7. - consolidated = self.frame.consolidate() + consolidated = self.frame._consolidate() self.assertEqual(len(consolidated._data.blocks), 1) # Ensure copy, do I want this? - recons = consolidated.consolidate() + recons = consolidated._consolidate() self.assertIsNot(recons, consolidated) assert_frame_equal(recons, consolidated) self.frame['F'] = 8. self.assertEqual(len(self.frame._data.blocks), 3) - self.frame.consolidate(inplace=True) + self.frame._consolidate(inplace=True) self.assertEqual(len(self.frame._data.blocks), 1) + def test_consolidate_deprecation(self): + self.frame['E'] = 7 + with tm.assert_produces_warning(FutureWarning): + self.frame.consolidate() + def test_consolidate_inplace(self): frame = self.frame.copy() # noqa diff --git a/pandas/tests/frame/test_nonunique_indexes.py b/pandas/tests/frame/test_nonunique_indexes.py index 4ad88a12a2625..d6bcb85e01910 100644 --- a/pandas/tests/frame/test_nonunique_indexes.py +++ b/pandas/tests/frame/test_nonunique_indexes.py @@ -87,7 +87,7 @@ def check(result, expected=None): check(df, expected) # consolidate - df = df.consolidate() + df = df._consolidate() expected = DataFrame([[1, 1, 'bah', 3], [1, 2, 'bah', 3], [2, 3, 'bah', 3]], columns=['foo', 'foo', 'string', 'foo2']) diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index a840ff46aa845..d5a8b380d01f9 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -418,7 +418,7 @@ def test_repr(self): df['datetime1'] = datetime.datetime(2001, 1, 2, 0, 0) df['datetime2'] = datetime.datetime(2001, 1, 3, 0, 0) df.loc[3:6, ['obj1']] = np.nan - df = df.consolidate()._convert(datetime=True) + df = df._consolidate()._convert(datetime=True) warnings.filterwarnings('ignore', category=PerformanceWarning) store['df'] = df @@ -762,7 +762,7 @@ def test_put_mixed_type(self): df['datetime1'] = datetime.datetime(2001, 1, 2, 0, 0) df['datetime2'] = datetime.datetime(2001, 1, 3, 0, 0) df.loc[3:6, ['obj1']] = np.nan - df = df.consolidate()._convert(datetime=True) + df = df._consolidate()._convert(datetime=True) with ensure_clean_store(self.path) as store: _maybe_remove(store, 'df') @@ -2077,7 +2077,7 @@ def test_table_mixed_dtypes(self): df['datetime1'] = datetime.datetime(2001, 1, 2, 0, 0) df['datetime2'] = datetime.datetime(2001, 1, 3, 0, 0) df.loc[3:6, ['obj1']] = np.nan - df = df.consolidate()._convert(datetime=True) + df = df._consolidate()._convert(datetime=True) with ensure_clean_store(self.path) as store: store.append('df1_mixed', df) @@ -2091,7 +2091,7 @@ def test_table_mixed_dtypes(self): wp['bool2'] = wp['ItemB'] > 0 wp['int1'] = 1 wp['int2'] = 2 - wp = wp.consolidate() + wp = wp._consolidate() with ensure_clean_store(self.path) as store: store.append('p1_mixed', wp) @@ -2106,7 +2106,7 @@ def test_table_mixed_dtypes(self): wp['bool2'] = wp['l2'] > 0 wp['int1'] = 1 wp['int2'] = 2 - wp = wp.consolidate() + wp = wp._consolidate() with ensure_clean_store(self.path) as store: store.append('p4d_mixed', wp) @@ -2134,7 +2134,7 @@ def test_unimplemented_dtypes_table_columns(self): df['obj1'] = 'foo' df['obj2'] = 'bar' df['datetime1'] = datetime.date(2001, 1, 2) - df = df.consolidate()._convert(datetime=True) + df = df._consolidate()._convert(datetime=True) with ensure_clean_store(self.path) as store: # this fails because we have a date in the object block...... @@ -2949,7 +2949,7 @@ def _make_one(): df['bool2'] = df['B'] > 0 df['int1'] = 1 df['int2'] = 2 - return df.consolidate() + return df._consolidate() df1 = _make_one() df2 = _make_one() diff --git a/pandas/tests/test_generic.py b/pandas/tests/test_generic.py index 40cdbe083acd7..a2329e2d1768e 100644 --- a/pandas/tests/test_generic.py +++ b/pandas/tests/test_generic.py @@ -658,7 +658,7 @@ def test_validate_bool_args(self): super(DataFrame, df).sort_index(inplace=value) with self.assertRaises(ValueError): - super(DataFrame, df).consolidate(inplace=value) + super(DataFrame, df)._consolidate(inplace=value) with self.assertRaises(ValueError): super(DataFrame, df).fillna(value=0, inplace=value) diff --git a/pandas/tests/test_panel4d.py b/pandas/tests/test_panel4d.py index 902b42e7d77d7..2491bac2a7f19 100644 --- a/pandas/tests/test_panel4d.py +++ b/pandas/tests/test_panel4d.py @@ -677,7 +677,7 @@ def test_consolidate(self): self.panel4d['foo'] = 1. self.assertFalse(self.panel4d._data.is_consolidated()) - panel4d = self.panel4d.consolidate() + panel4d = self.panel4d._consolidate() self.assertTrue(panel4d._data.is_consolidated()) def test_ctor_dict(self): diff --git a/pandas/tools/concat.py b/pandas/tools/concat.py index 31d7a9eb9a01a..6405106118472 100644 --- a/pandas/tools/concat.py +++ b/pandas/tools/concat.py @@ -263,7 +263,7 @@ def __init__(self, objs, axis=0, join='outer', join_axes=None, raise TypeError("cannot concatenate a non-NDFrame object") # consolidate - obj.consolidate(inplace=True) + obj._consolidate(inplace=True) ndims.add(obj.ndim) # get the sample diff --git a/pandas/tseries/resample.py b/pandas/tseries/resample.py index a6a10c08966d6..75e550a065fd2 100755 --- a/pandas/tseries/resample.py +++ b/pandas/tseries/resample.py @@ -221,7 +221,7 @@ def _convert_obj(self, obj): ------- obj : converted object """ - obj = obj.consolidate() + obj = obj._consolidate() return obj def _get_binner_for_time(self): From dd368eb574f7f62f8e8e8d667d68b5d06ae241de Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 28 Feb 2017 09:17:08 -0500 Subject: [PATCH 115/933] DEPR: remove pd.TimeSeries & Series.is_time_series xref #10890 Author: Jeff Reback Closes #15098 from jreback/time_series and squashes the following commits: d9101bc [Jeff Reback] fix back-compat for < 0.13 ed57bd5 [Jeff Reback] DEPR: remove legacy pd.TimeSeries class in favor of pd.Series --- doc/source/whatsnew/v0.20.0.txt | 42 ++++++++++++++++++++- pandas/compat/pickle_compat.py | 5 ++- pandas/core/api.py | 2 +- pandas/core/series.py | 17 --------- pandas/io/pytables.py | 4 -- pandas/tests/api/test_api.py | 2 +- pandas/tests/indexes/data/s1-0.12.0.pickle | Bin 862 -> 0 bytes pandas/tests/indexes/data/s2-0.12.0.pickle | Bin 814 -> 0 bytes pandas/tests/indexes/test_base.py | 11 ------ pandas/tests/io/data/legacy_hdf/legacy.h5 | Bin 14928 -> 0 bytes pandas/tests/io/test_pytables.py | 9 ----- pandas/tests/series/test_alter_axes.py | 3 -- pandas/tests/series/test_constructors.py | 15 -------- pandas/tests/series/test_timeseries.py | 2 - 14 files changed, 47 insertions(+), 65 deletions(-) delete mode 100644 pandas/tests/indexes/data/s1-0.12.0.pickle delete mode 100644 pandas/tests/indexes/data/s2-0.12.0.pickle delete mode 100644 pandas/tests/io/data/legacy_hdf/legacy.h5 diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index f91ffcdb81f9b..671df5760fb84 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -246,6 +246,46 @@ Using ``.iloc``. Here we will get the location of the 'A' column, then use *posi df.iloc[[0, 2], df.columns.get_loc('A')] +.. _whatsnew.api_breaking.io_compat + +Possible incompat for HDF5 formats for pandas < 0.13.0 +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +``pd.TimeSeries`` was deprecated officially in 0.17.0, though has only been an alias since 0.13.0. It has +been dropped in favor of ``pd.Series``. (:issue:``15098). + +This *may* cause HDF5 files that were created in prior versions to become unreadable if ``pd.TimeSeries`` +was used. This is most likely to be for pandas < 0.13.0. If you find yourself in this situation. +You can use a recent prior version of pandas to read in your HDF5 files, +then write them out again after applying the procedure below. + +.. code-block:: ipython + + In [2]: s = pd.TimeSeries([1,2,3], index=pd.date_range('20130101', periods=3)) + + In [3]: s + Out[3]: + 2013-01-01 1 + 2013-01-02 2 + 2013-01-03 3 + Freq: D, dtype: int64 + + In [4]: type(s) + Out[4]: pandas.core.series.TimeSeries + + In [5]: s = pd.Series(s) + + In [6]: s + Out[6]: + 2013-01-01 1 + 2013-01-02 2 + 2013-01-03 3 + Freq: D, dtype: int64 + + In [7]: type(s) + Out[7]: pandas.core.series.Series + + .. _whatsnew_0200.api_breaking.index_map: Map on Index types now return other Index types @@ -507,7 +547,7 @@ Removal of prior version deprecations/changes Similar functionality can be found in the `Google2Pandas `__ package. - ``pd.to_datetime`` and ``pd.to_timedelta`` have dropped the ``coerce`` parameter in favor of ``errors`` (:issue:`13602`) - ``pandas.stats.fama_macbeth``, ``pandas.stats.ols``, ``pandas.stats.plm`` and ``pandas.stats.var``, as well as the top-level ``pandas.fama_macbeth`` and ``pandas.ols`` routines are removed. Similar functionaility can be found in the `statsmodels `__ package. (:issue:`11898`) - +- ``Series.is_time_series`` is dropped in favor of ``Series.index.is_all_dates`` (:issue:``) .. _whatsnew_0200.performance: diff --git a/pandas/compat/pickle_compat.py b/pandas/compat/pickle_compat.py index 240baa848adbc..b8ccd13c153d4 100644 --- a/pandas/compat/pickle_compat.py +++ b/pandas/compat/pickle_compat.py @@ -58,7 +58,10 @@ def load_reduce(self): # 15477 ('pandas.core.base', 'FrozenNDArray'): ('pandas.indexes.frozen', 'FrozenNDArray'), - ('pandas.core.base', 'FrozenList'): ('pandas.indexes.frozen', 'FrozenList') + ('pandas.core.base', 'FrozenList'): ('pandas.indexes.frozen', 'FrozenList'), + + # 10890 + ('pandas.core.series', 'TimeSeries'): ('pandas.core.series', 'Series') } diff --git a/pandas/core/api.py b/pandas/core/api.py index 177e7b31cbd4f..eaebf45a038a0 100644 --- a/pandas/core/api.py +++ b/pandas/core/api.py @@ -13,7 +13,7 @@ UInt64Index, RangeIndex, Float64Index, MultiIndex) -from pandas.core.series import Series, TimeSeries +from pandas.core.series import Series from pandas.core.frame import DataFrame from pandas.core.panel import Panel, WidePanel from pandas.core.panel4d import Panel4D diff --git a/pandas/core/series.py b/pandas/core/series.py index da47ab5dfb003..ffe1be26fda54 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -277,13 +277,6 @@ def _constructor_expanddim(self): def _can_hold_na(self): return self._data._can_hold_na - @property - def is_time_series(self): - warnings.warn("is_time_series is deprecated. Please use " - "Series.index.is_all_dates", FutureWarning, stacklevel=2) - # return self._subtyp in ['time_series', 'sparse_time_series'] - return self.index.is_all_dates - _index = None def _set_axis(self, axis, labels, fastpath=False): @@ -2985,16 +2978,6 @@ def create_from_value(value, index, dtype): return subarr -# backwards compatiblity -class TimeSeries(Series): - - def __init__(self, *args, **kwargs): - # deprecation TimeSeries, #10890 - warnings.warn("TimeSeries is deprecated. Please use Series", - FutureWarning, stacklevel=2) - - super(TimeSeries, self).__init__(*args, **kwargs) - # ---------------------------------------------------------------------- # Add plotting methods to Series diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 06154a86f95fa..9ad53db305b59 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -23,8 +23,6 @@ from pandas.types.missing import array_equivalent import numpy as np - -import pandas as pd from pandas import (Series, DataFrame, Panel, Panel4D, Index, MultiIndex, Int64Index, isnull, concat, SparseSeries, SparseDataFrame, PeriodIndex, @@ -166,7 +164,6 @@ class DuplicateWarning(Warning): Series: u('series'), SparseSeries: u('sparse_series'), - pd.TimeSeries: u('series'), DataFrame: u('frame'), SparseDataFrame: u('sparse_frame'), Panel: u('wide'), @@ -175,7 +172,6 @@ class DuplicateWarning(Warning): # storer class map _STORER_MAP = { - u('TimeSeries'): 'LegacySeriesFixed', u('Series'): 'LegacySeriesFixed', u('DataFrame'): 'LegacyFrameFixed', u('DataMatrix'): 'LegacyFrameFixed', diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index 90a0c1d5c9347..8ca369f8df83a 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -57,7 +57,7 @@ class TestPDApi(Base, tm.TestCase): 'TimedeltaIndex', 'Timestamp'] # these are already deprecated; awaiting removal - deprecated_classes = ['TimeSeries', 'WidePanel', + deprecated_classes = ['WidePanel', 'SparseTimeSeries', 'Panel4D', 'SparseList'] diff --git a/pandas/tests/indexes/data/s1-0.12.0.pickle b/pandas/tests/indexes/data/s1-0.12.0.pickle deleted file mode 100644 index 0ce9cfdf3aa94fdfd9f8ad6ea00e72fa7eda6552..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 862 zcmZo*O3o|IEvVE>&M!*U%Pq|*$xJLNO049HFG@|$&nqq|DorloDr8J9NX$z~EQTm6 zPA$qzE#?Zz%uNl3FbkQy8CpXbliGs{nKir_z2#aV#&V^UR2HOi6|#gfrCE40cryYO zuxfZShcepu`T2SM2LdqR%}|om8T!FSUEoT>tcUxr)s_8Ijs3LWI_;WmV#&?@vK#N0 zoL>KQf8a;{X^E~M_s?K#x%;8zsr`i_ZuK`-fA@cJS*24K`*Oe8y*l?PA3p8(`ei62 zbnS%w5$TfaS1jJ`*NZZ%)|&BS|JvJ~j)$H<-hVEx)^DEoqx~v>jx4H;Il6z+xzj(_ zHvYH2f1A@KgW>9a)gQa(NFF=8|Ec5GEr(Bh*?<3;k-CNYJNvscUe7tHch>&PvcCaG zj~}sj+&X>D?sxa?tt`$aH9h;b|8i~p|IC=r_VS!ZE_W{fxSv5`anj?~XZ!izWbc?U z_u77j@IL~N8t(6RE7W>y+IiN#Bm*9>C6GYX%gjqjt>AJ=EJ-Z^2CpZWSI7nrU3N(5 zW&i`!7Z#u#8s03J0a~jO%9K|Oj0+$vX#)o@1H;3TA1{@885j)MdM#9dbp3S2b`X6| zW7|~_ExXH0MHNVM<#7n80qMy9bK*esiV22mLG*XUW4}SP+vX@Qbs%k=E@7qtq?>-M zX#vp}I~^~BXa(tOikd(=e@#{ah>l6nh|vPlt3U2p1)_g71^xulo+8iOwSn}ErB%~F zbZewVlMax+^6Jb55UpI3D6b2oi+I1~g6Mh<-JF)+2r15n#=YH_g(Fr^oAYBm;f)fRGVgmUKP z7v(1AWLBjX@^pr>K~xv=CZ|B7g9{WVDXBRniCl$zz~l^szy!@7%9354Qd}rd8_JmH R%~+b`*WQ-o*VdM#2LML=S0Vra diff --git a/pandas/tests/indexes/data/s2-0.12.0.pickle b/pandas/tests/indexes/data/s2-0.12.0.pickle deleted file mode 100644 index 2318be2d9978bd2edefc4fe1afb2244e0f4c4601..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 814 zcmZY7Z%7ki9KiA0G;?Q5kzN!cDrru$iA6FbI2KcLS0k+>lx5wvGnekR`!h;wro&Cv zs|e9i=8K{d(<1sqr0`HpGi+k`r@(CBaFL0)GE-C5b1r?;J#e2p`0>2>@~ao?X{*Un zBiEZ*N^Y`N^G1?o$r|(wOX9fc?+Ad{*{T=rTjKyZwHh~7?*j!)rvISJi}9740r_w|xsf(d z7f}_-Q#{OAwEo9LZC2bGu2>1f9oq;OEyE-K4`-7RVw!`^;o+U@84+Yi_IxZ=iXe$E z)v0Mvl#Y(u11$};l?!%U%jp*UoznkDu;59VIvsl8+|FDX)V|c!DEPsp91%=T7*EN7 zn!Cn+Dzz!U~i}3ioJMQuxLl1a4*z-x3)#lQL3nVSi_Bq=86iA;yLuNl{3;5 z$Eo1vGLno22DLFUGD1R|Srb`ptfiQ3E+Q&C%}Dizf7wx?y@9IcNtD?R*ApJNps_?` Y)dd5`#MuZDjf<>0O_NinaXNMKziLWDZU6uP diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 2f5b98d145e57..79d10cbda565e 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -9,8 +9,6 @@ from pandas.compat import (range, lrange, lzip, u, text_type, zip, PY3, PY36) import operator -import os - import numpy as np from pandas import (period_range, date_range, Series, @@ -381,15 +379,6 @@ def test_view_with_args(self): # with arguments ind.view('i8') - def test_legacy_pickle_identity(self): - - # GH 8431 - pth = tm.get_data_path() - s1 = pd.read_pickle(os.path.join(pth, 's1-0.12.0.pickle')) - s2 = pd.read_pickle(os.path.join(pth, 's2-0.12.0.pickle')) - self.assertFalse(s1.index.identical(s2.index)) - self.assertFalse(s1.index.equals(s2.index)) - def test_astype(self): casted = self.intIndex.astype('i8') diff --git a/pandas/tests/io/data/legacy_hdf/legacy.h5 b/pandas/tests/io/data/legacy_hdf/legacy.h5 deleted file mode 100644 index 38b822dd169945b5f5022a219cc784a4c9951320..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 14928 zcmeHN2|Sfq8^0(aNlls-)tzdjME0fVxU%M2vRxdOIp-4O%o+G)mJFVB)2b> zR!SU1C>2wKk%ViP>|4lp-|f3TrP24zeA85~pZ9;3=RNN^&w0-Cod0=msJ^!D6rnjn zB;*nhAWa~NqDkU5j9;cJu@WLr)H~n_9vt`K$l}EkJS2V+Qa1}P7scBr^I-)Ic)5j{ zjt+?xV&MLp&<5Sx#dQlO6X@v4 zboJ-jvnC<%foN`QqRqpJUbwlE1SY({0m44XV(*tD`rjx1emOE;okBGHe({gCmWa=V zGq5h2;yva1qmRSi@o+pO>H~2{Nc3A2alo7lu`T4{<5&bDf)pd0oQZgzmzNiPQh?(x zkpv%)=wuFXuH|@(SZ+FQQAnYL8L#nZJ&g3@h=~&qQ_RdLHk>?H{%t>wm?$AHFQdT8 zjg%jsp3#RmH_ZNd|84&%#6lnhMj>IQ`*3}Ys{U-VOdXnvA;WUrCc2Wbs;$4VqShJ} zIjX5?fRhr{z{Fpkrf2DE%kXtIwAHtuu3=dC2T-?K(3q;$dQMs@6cq;3NZU8qc8i|5 zk-s5b(NLKYpcD|`tK{WG*L77=Hc{T>>`Hf)w=&Z6u-37qF^z1Tf&y*b6cv2dIT^?) zY+7UKq2;ckX1>nD*2%!%YK@Y=v5m67vBD->McqJy&1tzUG$3 zI*yK;186Q@pNf@cND*qJh?G2|6krD7KXgv^Qm=)}oqlUGwV+&J~O5cICj* zZ3za2T@7%CnqcS7NhPrKzVO7_;w)%hQd0SAPAL#ns~O-CZ-)!geN}8D?f{aDII|pC z85vqzQ52`|L~%>R9u%M62t~0$G7?2?lNc0ZnLndA7|lYFmUR(DPo*>z^VVmfxOjIi zin~|^D3(7eLa~ifg(B6x4n@|%W)vsfXhYHASvQIS#uJ|-{VNO*LUDQYR227R&OouI zem07Iwo6g8_)Z4Jqyz;Ng&(P+=-#D)Am6H*nxpxH@cG52z&*pcu*kXnY4D6bSjueJ zb|fepyf|qhq?%F&9d15}kIHxfLlQ#r{F|Zy%cxH*n%w{rCy*q>O5ecU*L-{u=)VDx z74z!y5*lE)Mc>BRU)O-C8fUdv);)s9&UA0DkS>FEGn4E#oahG^=1(r`yD$Xp?b3gU zF{^?_#mfd(@TCF%FvI%P^-(}Tl=n>X+-D#(M_R_ZI~59xSlOOl(+LNbP{rBOU2x@G z+bP~bselrebh>&OoVZ&L%t5fqn zyZNPPt|b3$z!~kUW{Ol4H5i_JP$EU5=nCbhFrcP zjJCsY^mcLd-xv_ga76`>u!K+vB0g$61~}^aIeKBWNcp?@3Rwngf4qF9bOk0)?DMeP zm;2|U0b~t~94imb_jz!`n~zt1sNHkrksYT1X)}lLAWb25J#wGuAK*v#c0=r05Vdtx z{JmrKgAd=YP9phZ(;jV#XpOmW^R zOp--MjPM3zKHlYTM$U`5s~;WXrSJALm0x2D9Iu}_F&mR7_T9Mpzrbgrfw9g-_ThF! zsNs)37a@3YtT^@t8vY{gy86(#kI9eZy(8oPe|vt(Lz;}mdffjJ{jQvYb(qWNMNv8A zT$1pAh{V`_Z}}9gg};~I8_nnU{mur3@GG>r*l54pF%66rN4fStzPQ4be>=G5fA2AhT7S)Q2Cm-{EDfzffDjE~R(wvRLg{NYV{=NJk7T1x{Vwdb9JTV4fdmIwU zfjou)UQGC(6N}$3_o>wWb#Yw?wNn}%SVs2)r=9=t-3bvLdw)8J+cEJZHbb8pFX#HD zxA#vYc{lew`c^5s8AQHx5|XJbfk|X(E&H4% zaQ^ZJT`QLs&e?tU9owU?p|l+TuR8(4zu83%;mnN(w*>z zaonK#&H->cQAuqmGXs{{x5*7|dkJ@_UcILGT_sd^lrpEfJq2+hZ#vKK?Sm}WBX{eL zS3>&4V;&tBs=!sZZjJ2kSztwE((`2YSs+81)*r!}1`gD0$l4{526@W;4d!_~ftUFQ zXY2F6fT;^3ykgqw;NI=5r$JAj1Njv$p356`EsK<~ot!xElTJ1qTrc8xes44MovI^g*j@^L zig-kQl2QQV*H;&a=rzFRy^^p_fQwL(Q+hY~rA_posTSZ9zgWq+Jd@pTpuLmc6>8NE#b;eRs2JS{dG#3a*_R4HB14?^t9~UoK>eO&|N1eI1pSuP@!0*L2D|tF*LH-l$TM=GQ2^FkY8tclco|j2e>lmA(87q>#g^u6I^Lnad?YafwghNv2`3pFSH*77_Tt zG`tdiZr=_G-d2fkbXd}uEBjF1hJ5!ia^xJ66B|XYNBoldqJhsw17q>bJbYadiukj5 zCT$t^1{(e%?tbp$=9y$Utiy!+_;8--z7%WW@69t$;CjHP?=2vBW*+|i{4D{z0NaIg zGT6`6#uEEv%184%f_HH3e|+bdT=}>8&%4i$|7f0xetPE23&EJMDf)l&9IhTd)NlOn zdVV+0#J;<0{+rKZ7XHWc%tOkU#{XKL`Dg3@lUev*2J)9^pky^Kxc}@KX#Tm8-d*E6 zUGwW;!?CP=Cpmnt9+t-J>}ml&9&epOmP-J!?hcGxk8+s4%P_<(z7x)EbW$*w*9aay zc~u>@{yw<+u(sc30vpEMuav1=R0XNM$>Jt^TEY3;_Wc*<6oPuWc7fyO9bms{_T7rs zSFlt3)PtNDrw9I9t3jdI$><+ezJ`19^|XfVs-V=JoCVvGdO?=N#YHyWzW{qpSiF2e zEszYk{b2A|Jx~ysrt3{@0z6UIQqGd^fmTJv-lMv$;2)i~X@{;S0K343@8m3dfpht` zqmifYf(=!V8`xXeFvF&g{5_=#lz7s7ww-SW`aAk}2(Ntw*{-HLX!onYy!Gl^4((_F zvZu~IIrQ=gbU&T_;^d)PxKvtVi|*lWu(@jcw~>!{GCDTNOpZVI3W$6iQhM064SYp) z6MQ;ltuv5t^$X0lVP*>1@ebc!d_0x?i9g773V>!UxKr z+ghXP*Y|Wo+5@RW^$%{t#i?E4TCg4%T=jc(di^y(qu5^d=^cQASsFlS#ubp@F!P>y zOB?JPvba}t^(;K2bP6O!6v81dp`FrlQK0X5wm_(CCS2^Uus7g!4M^O*Dk!XY7`UYQ z(+xY*;R*5liEW_|U_|hip)A!dC{etOA?QrXXg+Ls>e!2VsC{f1Fwd_52f{br-TpKa z9L;fy6mzWrb`Q7aR;yP7i+Hn+21Pd1F7mLf%pCyYB7&6zce0_Lg44AMhblOHxZ;5Q z<3iY4U(@HY_9f&yAtjX_(+FO=#b!&LZUzZ`hWl?l=mJ-7?Dd;5aR8V+HVqNqT?bko x8?i3QR6}yy+Rzlh2C-)qD{|JB!30;^z>@kW@LtB&8lx@sKrE{CS2Ol6@LyVSHP`?E diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index d5a8b380d01f9..821d9956a2dfa 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -4454,15 +4454,6 @@ def test_pytables_native2_read(self): d1 = store['detector'] self.assertIsInstance(d1, DataFrame) - def test_legacy_read(self): - with ensure_clean_store( - tm.get_data_path('legacy_hdf/legacy.h5'), - mode='r') as store: - store['a'] - store['b'] - store['c'] - store['d'] - def test_legacy_table_read(self): # legacy table types with ensure_clean_store( diff --git a/pandas/tests/series/test_alter_axes.py b/pandas/tests/series/test_alter_axes.py index 6473dbeeaa1bc..5997b91097cbc 100644 --- a/pandas/tests/series/test_alter_axes.py +++ b/pandas/tests/series/test_alter_axes.py @@ -107,9 +107,6 @@ def test_set_index_makes_timeseries(self): s = Series(lrange(10)) s.index = idx - - with tm.assert_produces_warning(FutureWarning): - self.assertTrue(s.is_time_series) self.assertTrue(s.index.is_all_dates) def test_reset_index(self): diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index aef4c9269bc62..c15171f331df3 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -37,22 +37,11 @@ def test_scalar_conversion(self): self.assertEqual(int(Series([1.])), 1) self.assertEqual(long(Series([1.])), 1) - def test_TimeSeries_deprecation(self): - - # deprecation TimeSeries, #10890 - with tm.assert_produces_warning(FutureWarning): - pd.TimeSeries(1, index=date_range('20130101', periods=3)) - def test_constructor(self): - # Recognize TimeSeries - with tm.assert_produces_warning(FutureWarning): - self.assertTrue(self.ts.is_time_series) self.assertTrue(self.ts.index.is_all_dates) # Pass in Series derived = Series(self.ts) - with tm.assert_produces_warning(FutureWarning): - self.assertTrue(derived.is_time_series) self.assertTrue(derived.index.is_all_dates) self.assertTrue(tm.equalContents(derived.index, self.ts.index)) @@ -64,11 +53,7 @@ def test_constructor(self): self.assertEqual(mixed.dtype, np.object_) self.assertIs(mixed[1], np.NaN) - with tm.assert_produces_warning(FutureWarning): - self.assertFalse(self.empty.is_time_series) self.assertFalse(self.empty.index.is_all_dates) - with tm.assert_produces_warning(FutureWarning): - self.assertFalse(Series({}).is_time_series) self.assertFalse(Series({}).index.is_all_dates) self.assertRaises(Exception, Series, np.random.randn(3, 3), index=np.arange(3)) diff --git a/pandas/tests/series/test_timeseries.py b/pandas/tests/series/test_timeseries.py index e0db813e60c14..8c22b3f047210 100644 --- a/pandas/tests/series/test_timeseries.py +++ b/pandas/tests/series/test_timeseries.py @@ -383,8 +383,6 @@ def test_mpl_compat_hack(self): def test_timeseries_coercion(self): idx = tm.makeDateIndex(10000) ser = Series(np.random.randn(len(idx)), idx.astype(object)) - with tm.assert_produces_warning(FutureWarning): - self.assertTrue(ser.is_time_series) self.assertTrue(ser.index.is_all_dates) self.assertIsInstance(ser.index, DatetimeIndex) From d0a281fd60a2099c932151280af88d5392ea9a84 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Tue, 28 Feb 2017 09:26:10 -0500 Subject: [PATCH 116/933] BUG: DataFrame index & column returned by corr & cov are the same (#14617) closes #14617 Author: Matt Roeschke Closes #15528 from mroeschke/fix_14617 and squashes the following commits: 5a46f0a [Matt Roeschke] Bug:DataFrame index & column returned by corr & cov are the same (#14617) --- doc/source/whatsnew/v0.20.0.txt | 2 +- pandas/core/frame.py | 6 ++++-- pandas/tests/frame/test_analytics.py | 9 +++++++++ 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 671df5760fb84..54df7514a882d 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -631,7 +631,7 @@ Bug Fixes - Bug in ``.rank()`` which incorrectly ranks ordered categories (:issue:`15420`) - +- Bug in ``.corr()`` and ``.cov()`` where the column and index were the same object (:issue:`14617`) - Require at least 0.23 version of cython to avoid problems with character encodings (:issue:`14699`) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index c47490bfbede4..021ce59e3402b 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4725,6 +4725,7 @@ def corr(self, method='pearson', min_periods=1): """ numeric_df = self._get_numeric_data() cols = numeric_df.columns + idx = cols.copy() mat = numeric_df.values if method == 'pearson': @@ -4757,7 +4758,7 @@ def corr(self, method='pearson', min_periods=1): correl[i, j] = c correl[j, i] = c - return self._constructor(correl, index=cols, columns=cols) + return self._constructor(correl, index=idx, columns=cols) def cov(self, min_periods=None): """ @@ -4780,6 +4781,7 @@ def cov(self, min_periods=None): """ numeric_df = self._get_numeric_data() cols = numeric_df.columns + idx = cols.copy() mat = numeric_df.values if notnull(mat).all(): @@ -4793,7 +4795,7 @@ def cov(self, min_periods=None): baseCov = _algos.nancorr(_ensure_float64(mat), cov=True, minp=min_periods) - return self._constructor(baseCov, index=cols, columns=cols) + return self._constructor(baseCov, index=idx, columns=cols) def corrwith(self, other, axis=0, drop=False): """ diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index 1f0d16e959cd7..111195363beb2 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -118,6 +118,15 @@ def test_corr_int_and_boolean(self): for meth in ['pearson', 'kendall', 'spearman']: tm.assert_frame_equal(df.corr(meth), expected) + def test_corr_cov_independent_index_column(self): + # GH 14617 + df = pd.DataFrame(np.random.randn(4 * 10).reshape(10, 4), + columns=list("abcd")) + for method in ['cov', 'corr']: + result = getattr(df, method)() + assert result.index is not result.columns + assert result.index.equals(result.columns) + def test_cov(self): # min_periods no NAs (corner case) expected = self.frame.cov() From 2340fb8b97a3f65ce4f630075849e42fb256e3be Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 1 Mar 2017 14:16:50 -0500 Subject: [PATCH 117/933] BLD: fix 2.7_LOCALE build simplify install_travis.sh, always using a clean miniconda install bump matplotlib to 1.4.0 for _LOCALE, numpy to 1.8.2 So this removes our testing of mpl 1.2.1 with numpy 1.7.1. We *still* test elsewhere 1.7.1, and mpl 1.3.1 (with 1.8.2) Author: Jeff Reback Closes #15540 from jreback/build and squashes the following commits: 58b6f2f [Jeff Reback] BLD: fix 2.7_LOCALE build --- ci/install_travis.sh | 88 +++++++++++++++----------------- ci/requirements-2.7_LOCALE.build | 2 +- ci/requirements-2.7_LOCALE.run | 8 ++- 3 files changed, 44 insertions(+), 54 deletions(-) diff --git a/ci/install_travis.sh b/ci/install_travis.sh index 802d8c9f6b776..b337f6e443be2 100755 --- a/ci/install_travis.sh +++ b/ci/install_travis.sh @@ -32,58 +32,50 @@ edit_init home_dir=$(pwd) echo "[home_dir: $home_dir]" -MINICONDA_DIR="$HOME/miniconda3" +# install miniconda +echo "[Using clean Miniconda install]" -if [ -d "$MINICONDA_DIR" ] && [ -e "$MINICONDA_DIR/bin/conda" ] && [ "$USE_CACHE" ]; then - echo "[Miniconda install already present from cache: $MINICONDA_DIR]" - - conda config --set always_yes yes --set changeps1 no || exit 1 - echo "[update conda]" - conda update -q conda || exit 1 - - # Useful for debugging any issues with conda - conda info -a || exit 1 - - # set the compiler cache to work - if [ "${TRAVIS_OS_NAME}" == "linux" ]; then - echo "[Using ccache]" - export PATH=/usr/lib/ccache:/usr/lib64/ccache:$PATH - gcc=$(which gcc) - echo "[gcc: $gcc]" - ccache=$(which ccache) - echo "[ccache: $ccache]" - export CC='ccache gcc' - fi +MINICONDA_DIR="$HOME/miniconda3" +if [ -d "$MINICONDA_DIR" ]; then + rm -rf "$MINICONDA_DIR" +fi +# install miniconda +if [ "${TRAVIS_OS_NAME}" == "osx" ]; then + wget http://repo.continuum.io/miniconda/Miniconda3-latest-MacOSX-x86_64.sh -O miniconda.sh || exit 1 +else + wget http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh || exit 1 +fi +bash miniconda.sh -b -p "$MINICONDA_DIR" || exit 1 + +echo "[update conda]" +conda config --set ssl_verify false || exit 1 +conda config --set always_yes true --set changeps1 false || exit 1 +conda update -q conda + +# add the pandas channel to take priority +# to add extra packages +echo "[add channels]" +conda config --add channels pandas || exit 1 +conda config --remove channels defaults || exit 1 +conda config --add channels defaults || exit 1 + +conda install anaconda-client + +# Useful for debugging any issues with conda +conda info -a || exit 1 + +# set the compiler cache to work +if [ "$USE_CACHE" ] && "${TRAVIS_OS_NAME}" == "linux" ]; then + echo "[Using ccache]" + export PATH=/usr/lib/ccache:/usr/lib64/ccache:$PATH + gcc=$(which gcc) + echo "[gcc: $gcc]" + ccache=$(which ccache) + echo "[ccache: $ccache]" + export CC='ccache gcc' else - echo "[Using clean Miniconda install]" echo "[Not using ccache]" - rm -rf "$MINICONDA_DIR" - # install miniconda - if [ "${TRAVIS_OS_NAME}" == "osx" ]; then - wget http://repo.continuum.io/miniconda/Miniconda3-latest-MacOSX-x86_64.sh -O miniconda.sh || exit 1 - else - wget http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh || exit 1 - fi - bash miniconda.sh -b -p "$MINICONDA_DIR" || exit 1 - - echo "[update conda]" - conda config --set ssl_verify false || exit 1 - conda config --set always_yes true --set changeps1 false || exit 1 - conda update -q conda - - # add the pandas channel to take priority - # to add extra packages - echo "[add channels]" - conda config --add channels pandas || exit 1 - conda config --remove channels defaults || exit 1 - conda config --add channels defaults || exit 1 - - conda install anaconda-client - - # Useful for debugging any issues with conda - conda info -a || exit 1 - fi # may have installation instructions for this build diff --git a/ci/requirements-2.7_LOCALE.build b/ci/requirements-2.7_LOCALE.build index c17730b912651..28e2b96851eff 100644 --- a/ci/requirements-2.7_LOCALE.build +++ b/ci/requirements-2.7_LOCALE.build @@ -1,4 +1,4 @@ python-dateutil pytz=2013b -numpy=1.7.1 +numpy=1.8.2 cython=0.23 diff --git a/ci/requirements-2.7_LOCALE.run b/ci/requirements-2.7_LOCALE.run index 1a9b42d832b0b..5d7cc31b7d55e 100644 --- a/ci/requirements-2.7_LOCALE.run +++ b/ci/requirements-2.7_LOCALE.run @@ -1,16 +1,14 @@ python-dateutil pytz=2013b -numpy=1.7.1 +numpy=1.8.2 xlwt=0.7.5 openpyxl=1.6.2 xlsxwriter=0.4.6 xlrd=0.9.2 bottleneck=0.8.0 -matplotlib=1.2.1 -patsy=0.1.0 +matplotlib=1.3.1 sqlalchemy=0.8.1 html5lib=1.0b2 lxml=3.2.1 -scipy=0.11.0 +scipy beautiful-soup=4.2.1 -bigquery=2.0.17 From 1c106c8427513775c59e1e93a20829fc67a0a983 Mon Sep 17 00:00:00 2001 From: Prasanjit Prakash Date: Wed, 1 Mar 2017 16:16:12 -0500 Subject: [PATCH 118/933] PERF: Rank categorical perf closes #15498 Author: Prasanjit Prakash Closes #15518 from ikilledthecat/rank_categorical_perf and squashes the following commits: 30b49b9 [Prasanjit Prakash] PERF: GH15498 - pep8 changes ad38544 [Prasanjit Prakash] PERF: GH15498 - asv tests and whatsnew 1ebdb56 [Prasanjit Prakash] PERF: categorical rank GH#15498 a67cd85 [Prasanjit Prakash] PERF: categorical rank GH#15498 81df7df [Prasanjit Prakash] PERF: categorical rank GH#15498 45dd125 [Prasanjit Prakash] PERF: categorical rank GH#15498 33249b3 [Prasanjit Prakash] PERF: categorical rank GH#15498 --- asv_bench/benchmarks/categoricals.py | 34 +++++++++++++++++++++++++++ doc/source/whatsnew/v0.20.0.txt | 1 + pandas/core/algorithms.py | 1 + pandas/core/categorical.py | 9 ++++++- pandas/tests/series/test_analytics.py | 33 +++++++++++++++++++------- 5 files changed, 69 insertions(+), 9 deletions(-) diff --git a/asv_bench/benchmarks/categoricals.py b/asv_bench/benchmarks/categoricals.py index cca652c68cf15..153107911ca2c 100644 --- a/asv_bench/benchmarks/categoricals.py +++ b/asv_bench/benchmarks/categoricals.py @@ -63,3 +63,37 @@ def time_value_counts_dropna(self): def time_rendering(self): str(self.sel) + + +class Categoricals3(object): + goal_time = 0.2 + + def setup(self): + N = 100000 + ncats = 100 + + self.s1 = Series(np.array(tm.makeCategoricalIndex(N, ncats))) + self.s1_cat = self.s1.astype('category') + self.s1_cat_ordered = self.s1.astype('category', ordered=True) + + self.s2 = Series(np.random.randint(0, ncats, size=N)) + self.s2_cat = self.s2.astype('category') + self.s2_cat_ordered = self.s2.astype('category', ordered=True) + + def time_rank_string(self): + self.s1.rank() + + def time_rank_string_cat(self): + self.s1_cat.rank() + + def time_rank_string_cat_ordered(self): + self.s1_cat_ordered.rank() + + def time_rank_int(self): + self.s2.rank() + + def time_rank_int_cat(self): + self.s2_cat.rank() + + def time_rank_int_cat_ordered(self): + self.s2_cat_ordered.rank() diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 54df7514a882d..6e9dfb92dfd90 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -562,6 +562,7 @@ Performance Improvements - Improved performance of ``groupby().cummin()`` and ``groupby().cummax()`` (:issue:`15048`, :issue:`15109`) - Improved performance and reduced memory when indexing with a ``MultiIndex`` (:issue:`15245`) - When reading buffer object in ``read_sas()`` method without specified format, filepath string is inferred rather than buffer object. (:issue:`14947`) +- Improved performance of `rank()` for categorical data (:issue:`15498`) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index b11927a80fb2e..55d404f05dd1d 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -992,6 +992,7 @@ def _get_data_algo(values, func_map): elif is_unsigned_integer_dtype(values): f = func_map['uint64'] values = _ensure_uint64(values) + else: values = _ensure_object(values) diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index b88a6b171b316..d5dce250275d9 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -1416,14 +1416,21 @@ def _values_for_rank(self): numpy array """ + from pandas import Series if self.ordered: values = self.codes mask = values == -1 if mask.any(): values = values.astype('float64') values[mask] = np.nan - else: + elif self.categories.is_numeric(): values = np.array(self) + else: + # reorder the categories (so rank can use the float codes) + # instead of passing an object array to rank + values = np.array( + self.rename_categories(Series(self.categories).rank()) + ) return values def order(self, inplace=False, ascending=True, na_position='last'): diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index b092e4f084767..b6985abb64e40 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -1065,8 +1065,10 @@ def test_rank_categorical(self): exp_desc = pd.Series([6., 5., 4., 3., 2., 1.]) ordered = pd.Series( ['first', 'second', 'third', 'fourth', 'fifth', 'sixth'] - ).astype('category', ).cat.set_categories( - ['first', 'second', 'third', 'fourth', 'fifth', 'sixth'], + ).astype( + 'category', + categories=['first', 'second', 'third', + 'fourth', 'fifth', 'sixth'], ordered=True ) assert_series_equal(ordered.rank(), exp) @@ -1075,19 +1077,33 @@ def test_rank_categorical(self): # Unordered categoricals should be ranked as objects unordered = pd.Series( ['first', 'second', 'third', 'fourth', 'fifth', 'sixth'], - ).astype('category').cat.set_categories( - ['first', 'second', 'third', 'fourth', 'fifth', 'sixth'], + ).astype( + 'category', + categories=['first', 'second', 'third', + 'fourth', 'fifth', 'sixth'], ordered=False ) exp_unordered = pd.Series([2., 4., 6., 3., 1., 5.]) res = unordered.rank() assert_series_equal(res, exp_unordered) + unordered1 = pd.Series( + [1, 2, 3, 4, 5, 6], + ).astype( + 'category', + categories=[1, 2, 3, 4, 5, 6], + ordered=False + ) + exp_unordered1 = pd.Series([1., 2., 3., 4., 5., 6.]) + res1 = unordered1.rank() + assert_series_equal(res1, exp_unordered1) + # Test na_option for rank data na_ser = pd.Series( ['first', 'second', 'third', 'fourth', 'fifth', 'sixth', np.NaN] - ).astype('category', ).cat.set_categories( - [ + ).astype( + 'category', + categories=[ 'first', 'second', 'third', 'fourth', 'fifth', 'sixth', 'seventh' ], @@ -1123,8 +1139,9 @@ def test_rank_categorical(self): # Test with pct=True na_ser = pd.Series( ['first', 'second', 'third', 'fourth', np.NaN], - ).astype('category').cat.set_categories( - ['first', 'second', 'third', 'fourth'], + ).astype( + 'category', + categories=['first', 'second', 'third', 'fourth'], ordered=True ) exp_top = pd.Series([0.4, 0.6, 0.8, 1., 0.2]) From 4121c75e5447a2983d2db9f40f196a6684e0a6b6 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 1 Mar 2017 19:39:14 -0500 Subject: [PATCH 119/933] COMPAT: if docstring_wrapper is activated on a class, don't fail --- pandas/util/decorators.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pandas/util/decorators.py b/pandas/util/decorators.py index ee7e2f4302b10..62ff6ef14418a 100644 --- a/pandas/util/decorators.py +++ b/pandas/util/decorators.py @@ -261,6 +261,11 @@ def __init__(self, func, creator, default=None): if hasattr(func, attr)]) def __get__(self, instance, cls=None): + + # we are called with a class + if instance is None: + return self + # we want to return the actual passed instance return types.MethodType(self, instance) From 5441d39237e078308b45c65faca6a8355de8bd27 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 1 Mar 2017 19:54:04 -0500 Subject: [PATCH 120/933] DOC: add pandas-gbq to doc-build --- ci/requirements-3.5_DOC_BUILD.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ci/requirements-3.5_DOC_BUILD.sh b/ci/requirements-3.5_DOC_BUILD.sh index 25bc63acc96d1..1a5d4643edcf2 100644 --- a/ci/requirements-3.5_DOC_BUILD.sh +++ b/ci/requirements-3.5_DOC_BUILD.sh @@ -4,6 +4,8 @@ source activate pandas echo "[install DOC_BUILD deps]" +pip install pandas-gbq + conda install -n pandas -c conda-forge feather-format conda install -n pandas -c r r rpy2 --yes From 29d81f3df81eb0a4d077ae1317df74d509cdc446 Mon Sep 17 00:00:00 2001 From: Chris Date: Wed, 1 Mar 2017 21:19:55 -0500 Subject: [PATCH 121/933] DOC: Styler.set_table_attributes docstring Author: Chris Closes #15545 from chris-b1/styler-docstring and squashes the following commits: d77a9f1 [Chris] DOC: Style.set_table_attributes docstring --- pandas/formats/style.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/pandas/formats/style.py b/pandas/formats/style.py index 89712910a22e1..e712010a8b4f2 100644 --- a/pandas/formats/style.py +++ b/pandas/formats/style.py @@ -631,11 +631,17 @@ def set_table_attributes(self, attributes): Parameters ---------- - precision: int + attributes : string Returns ------- self : Styler + + Examples + -------- + >>> df = pd.DataFrame(np.random.randn(10, 4)) + >>> df.style.set_table_attributes('class="pure-table"') + # ...
latest release
latest releaselatest release
Package Status
+ + circleci build status + +
From 1400305899d55bee21253952de9f6f0cf245b089 Mon Sep 17 00:00:00 2001 From: James Goppert Date: Wed, 22 Feb 2017 10:58:41 -0500 Subject: [PATCH 086/933] ENH: Adds custom plot formatting for TimedeltaIndex. Author: James Goppert Author: James Goppert Closes #8711 Closes #15067 from jgoppert/tdi_plot_fix and squashes the following commits: 945ec14 [James Goppert] Merge branch 'master' into tdi_plot_fix 7db61ec [James Goppert] Create TimeSeries_TimedeltaFormatter. 232efe6 [James Goppert] Fix comment format and exception type for tdi plotting. 4eff697 [James Goppert] Add more time delta series plotting tests. f5f32bc [James Goppert] Link time delta index docs to better matplotlib docs. d588c2c [James Goppert] Fixes test for tdi w/o autofmt_xdate. b6e6a81 [James Goppert] Disables autofmt_xdate testing. c7851e3 [James Goppert] Adjusts tdi test draw calls to try to fix CI issue. 7d28842 [James Goppert] Switch to draw_idle to try to fix bug on xticks update. 3abc310 [James Goppert] Try plt.draw() instead of canvas.draw() to fix issue on osx 3.5. 91954bd [James Goppert] Finished unit test for timedelta plotting. 41ebc85 [James Goppert] Fixes for review comments from #15067. f021cbd [James Goppert] Support nano-second level precision x-axis labels. 5ec65fa [James Goppert] Plot fix for tdi and added more comments. b967d24 [James Goppert] flake8 fixes for tdi plotting. efe5636 [James Goppert] Adds custom plot formatting for TimedeltaIndex. --- doc/source/visualization.rst | 12 ++++ doc/source/whatsnew/v0.20.0.txt | 2 +- pandas/tests/plotting/test_datetimelike.py | 58 +++++++++++++++++ pandas/tools/plotting.py | 2 +- pandas/tseries/converter.py | 30 +++++++++ pandas/tseries/plotting.py | 73 +++++++++++++++------- 6 files changed, 154 insertions(+), 23 deletions(-) diff --git a/doc/source/visualization.rst b/doc/source/visualization.rst index 2b2012dbf0b8a..e8998bf6f6f5c 100644 --- a/doc/source/visualization.rst +++ b/doc/source/visualization.rst @@ -1245,6 +1245,18 @@ in ``pandas.plot_params`` can be used in a `with statement`: plt.close('all') +Automatic Date Tick Adjustment +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. versionadded:: 0.20.0 + +``TimedeltaIndex`` now uses the native matplotlib +tick locator methods, it is useful to call the automatic +date tick adjustment from matplotlib for figures whose ticklabels overlap. + +See the :meth:`autofmt_xdate ` method and the +`matplotlib documentation `__ for more. + Subplots ~~~~~~~~ diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 86f916bc0acfb..9124929ee5665 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -155,7 +155,7 @@ Other enhancements - ``Series/DataFrame.squeeze()`` have gained the ``axis`` parameter. (:issue:`15339`) - ``DataFrame.to_excel()`` has a new ``freeze_panes`` parameter to turn on Freeze Panes when exporting to Excel (:issue:`15160`) - HTML table output skips ``colspan`` or ``rowspan`` attribute if equal to 1. (:issue:`15403`) - +- ``pd.TimedeltaIndex`` now has a custom datetick formatter specifically designed for nanosecond level precision (:issue:`8711`) .. _ISO 8601 duration: https://en.wikipedia.org/wiki/ISO_8601#Durations .. _whatsnew_0200.api_breaking: diff --git a/pandas/tests/plotting/test_datetimelike.py b/pandas/tests/plotting/test_datetimelike.py index 25568f7eb61dc..cdacded4d7f35 100644 --- a/pandas/tests/plotting/test_datetimelike.py +++ b/pandas/tests/plotting/test_datetimelike.py @@ -9,6 +9,7 @@ from pandas import Index, Series, DataFrame from pandas.tseries.index import date_range, bdate_range +from pandas.tseries.tdi import timedelta_range from pandas.tseries.offsets import DateOffset from pandas.tseries.period import period_range, Period, PeriodIndex from pandas.tseries.resample import DatetimeIndex @@ -1270,6 +1271,63 @@ def test_plot_outofbounds_datetime(self): values = [datetime(1677, 1, 1, 12), datetime(1677, 1, 2, 12)] self.plt.plot(values) + def test_format_timedelta_ticks_narrow(self): + + expected_labels = [ + '00:00:00.00000000{:d}'.format(i) + for i in range(10)] + + rng = timedelta_range('0', periods=10, freq='ns') + df = DataFrame(np.random.randn(len(rng), 3), rng) + ax = df.plot(fontsize=2) + fig = ax.get_figure() + fig.canvas.draw() + labels = ax.get_xticklabels() + self.assertEqual(len(labels), len(expected_labels)) + for l, l_expected in zip(labels, expected_labels): + self.assertEqual(l.get_text(), l_expected) + + def test_format_timedelta_ticks_wide(self): + + expected_labels = [ + '00:00:00', + '1 days 03:46:40', + '2 days 07:33:20', + '3 days 11:20:00', + '4 days 15:06:40', + '5 days 18:53:20', + '6 days 22:40:00', + '8 days 02:26:40', + '' + ] + + rng = timedelta_range('0', periods=10, freq='1 d') + df = DataFrame(np.random.randn(len(rng), 3), rng) + ax = df.plot(fontsize=2) + fig = ax.get_figure() + fig.canvas.draw() + labels = ax.get_xticklabels() + self.assertEqual(len(labels), len(expected_labels)) + for l, l_expected in zip(labels, expected_labels): + self.assertEqual(l.get_text(), l_expected) + + def test_timedelta_plot(self): + # test issue #8711 + s = Series(range(5), timedelta_range('1day', periods=5)) + _check_plot_works(s.plot) + + # test long period + index = timedelta_range('1 day 2 hr 30 min 10 s', + periods=10, freq='1 d') + s = Series(np.random.randn(len(index)), index) + _check_plot_works(s.plot) + + # test short period + index = timedelta_range('1 day 2 hr 30 min 10 s', + periods=10, freq='1 ns') + s = Series(np.random.randn(len(index)), index) + _check_plot_works(s.plot) + def _check_plot_works(f, freq=None, series=None, *args, **kwargs): import matplotlib.pyplot as plt diff --git a/pandas/tools/plotting.py b/pandas/tools/plotting.py index b2050d7d8d81e..d46c38c117445 100644 --- a/pandas/tools/plotting.py +++ b/pandas/tools/plotting.py @@ -1781,7 +1781,7 @@ def _ts_plot(cls, ax, x, data, style=None, **kwds): lines = cls._plot(ax, data.index, data.values, style=style, **kwds) # set date formatter, locators and rescale limits - format_dateaxis(ax, ax.freq) + format_dateaxis(ax, ax.freq, data.index) return lines def _get_stacking_id(self): diff --git a/pandas/tseries/converter.py b/pandas/tseries/converter.py index 95ff9578fa3ee..db7049ebc89b3 100644 --- a/pandas/tseries/converter.py +++ b/pandas/tseries/converter.py @@ -1000,3 +1000,33 @@ def __call__(self, x, pos=0): else: fmt = self.formatdict.pop(x, '') return Period(ordinal=int(x), freq=self.freq).strftime(fmt) + + +class TimeSeries_TimedeltaFormatter(Formatter): + """ + Formats the ticks along an axis controlled by a :class:`TimedeltaIndex`. + """ + + @staticmethod + def format_timedelta_ticks(x, pos, n_decimals): + """ + Convert seconds to 'D days HH:MM:SS.F' + """ + s, ns = divmod(x, 1e9) + m, s = divmod(s, 60) + h, m = divmod(m, 60) + d, h = divmod(h, 24) + decimals = int(ns * 10**(n_decimals - 9)) + s = r'{:02d}:{:02d}:{:02d}'.format(int(h), int(m), int(s)) + if n_decimals > 0: + s += '.{{:0{:0d}d}}'.format(n_decimals).format(decimals) + if d != 0: + s = '{:d} days '.format(int(d)) + s + return s + + def __call__(self, x, pos=0): + (vmin, vmax) = tuple(self.axis.get_view_interval()) + n_decimals = int(np.ceil(np.log10(100 * 1e9 / (vmax - vmin)))) + if n_decimals > 9: + n_decimals = 9 + return self.format_timedelta_ticks(x, pos, n_decimals) diff --git a/pandas/tseries/plotting.py b/pandas/tseries/plotting.py index 89aecf2acc07e..4eddf54701889 100644 --- a/pandas/tseries/plotting.py +++ b/pandas/tseries/plotting.py @@ -12,11 +12,14 @@ from pandas.tseries.offsets import DateOffset import pandas.tseries.frequencies as frequencies from pandas.tseries.index import DatetimeIndex +from pandas.tseries.period import PeriodIndex +from pandas.tseries.tdi import TimedeltaIndex from pandas.formats.printing import pprint_thing import pandas.compat as compat from pandas.tseries.converter import (TimeSeries_DateLocator, - TimeSeries_DateFormatter) + TimeSeries_DateFormatter, + TimeSeries_TimedeltaFormatter) # --------------------------------------------------------------------- # Plotting functions and monkey patches @@ -49,7 +52,7 @@ def tsplot(series, plotf, ax=None, **kwargs): lines = plotf(ax, series.index._mpl_repr(), series.values, **kwargs) # set date formatter, locators and rescale limits - format_dateaxis(ax, ax.freq) + format_dateaxis(ax, ax.freq, series.index) return lines @@ -278,8 +281,24 @@ def _maybe_convert_index(ax, data): # Patch methods for subplot. Only format_dateaxis is currently used. # Do we need the rest for convenience? - -def format_dateaxis(subplot, freq): +def format_timedelta_ticks(x, pos, n_decimals): + """ + Convert seconds to 'D days HH:MM:SS.F' + """ + s, ns = divmod(x, 1e9) + m, s = divmod(s, 60) + h, m = divmod(m, 60) + d, h = divmod(h, 24) + decimals = int(ns * 10**(n_decimals - 9)) + s = r'{:02d}:{:02d}:{:02d}'.format(int(h), int(m), int(s)) + if n_decimals > 0: + s += '.{{:0{:0d}d}}'.format(n_decimals).format(decimals) + if d != 0: + s = '{:d} days '.format(int(d)) + s + return s + + +def format_dateaxis(subplot, freq, index): """ Pretty-formats the date axis (x-axis). @@ -288,26 +307,38 @@ def format_dateaxis(subplot, freq): default, changing the limits of the x axis will intelligently change the positions of the ticks. """ - majlocator = TimeSeries_DateLocator(freq, dynamic_mode=True, - minor_locator=False, - plot_obj=subplot) - minlocator = TimeSeries_DateLocator(freq, dynamic_mode=True, - minor_locator=True, - plot_obj=subplot) - subplot.xaxis.set_major_locator(majlocator) - subplot.xaxis.set_minor_locator(minlocator) - - majformatter = TimeSeries_DateFormatter(freq, dynamic_mode=True, + + # handle index specific formatting + # Note: DatetimeIndex does not use this + # interface. DatetimeIndex uses matplotlib.date directly + if isinstance(index, PeriodIndex): + + majlocator = TimeSeries_DateLocator(freq, dynamic_mode=True, minor_locator=False, plot_obj=subplot) - minformatter = TimeSeries_DateFormatter(freq, dynamic_mode=True, + minlocator = TimeSeries_DateLocator(freq, dynamic_mode=True, minor_locator=True, plot_obj=subplot) - subplot.xaxis.set_major_formatter(majformatter) - subplot.xaxis.set_minor_formatter(minformatter) - - # x and y coord info - subplot.format_coord = lambda t, y: ( - "t = {0} y = {1:8f}".format(Period(ordinal=int(t), freq=freq), y)) + subplot.xaxis.set_major_locator(majlocator) + subplot.xaxis.set_minor_locator(minlocator) + + majformatter = TimeSeries_DateFormatter(freq, dynamic_mode=True, + minor_locator=False, + plot_obj=subplot) + minformatter = TimeSeries_DateFormatter(freq, dynamic_mode=True, + minor_locator=True, + plot_obj=subplot) + subplot.xaxis.set_major_formatter(majformatter) + subplot.xaxis.set_minor_formatter(minformatter) + + # x and y coord info + subplot.format_coord = lambda t, y: ( + "t = {0} y = {1:8f}".format(Period(ordinal=int(t), freq=freq), y)) + + elif isinstance(index, TimedeltaIndex): + subplot.xaxis.set_major_formatter( + TimeSeries_TimedeltaFormatter()) + else: + raise TypeError('index type not supported') pylab.draw_if_interactive() From 486e384a0525dc348ae8cfc30da3de6f1dc9c500 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 22 Feb 2017 11:04:25 -0500 Subject: [PATCH 087/933] TST: skip some timedelta plotting tests on mac (on travis) for precision display issues xref #15067 --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/tests/plotting/test_datetimelike.py | 6 +++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 9124929ee5665..355756c6e605c 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -156,6 +156,7 @@ Other enhancements - ``DataFrame.to_excel()`` has a new ``freeze_panes`` parameter to turn on Freeze Panes when exporting to Excel (:issue:`15160`) - HTML table output skips ``colspan`` or ``rowspan`` attribute if equal to 1. (:issue:`15403`) - ``pd.TimedeltaIndex`` now has a custom datetick formatter specifically designed for nanosecond level precision (:issue:`8711`) + .. _ISO 8601 duration: https://en.wikipedia.org/wiki/ISO_8601#Durations .. _whatsnew_0200.api_breaking: diff --git a/pandas/tests/plotting/test_datetimelike.py b/pandas/tests/plotting/test_datetimelike.py index cdacded4d7f35..673c34903b259 100644 --- a/pandas/tests/plotting/test_datetimelike.py +++ b/pandas/tests/plotting/test_datetimelike.py @@ -7,7 +7,7 @@ import numpy as np from pandas import Index, Series, DataFrame - +from pandas.compat import is_platform_mac from pandas.tseries.index import date_range, bdate_range from pandas.tseries.tdi import timedelta_range from pandas.tseries.offsets import DateOffset @@ -1272,6 +1272,8 @@ def test_plot_outofbounds_datetime(self): self.plt.plot(values) def test_format_timedelta_ticks_narrow(self): + if is_platform_mac(): + pytest.skip("skip on mac for precision display issue on older mpl") expected_labels = [ '00:00:00.00000000{:d}'.format(i) @@ -1288,6 +1290,8 @@ def test_format_timedelta_ticks_narrow(self): self.assertEqual(l.get_text(), l_expected) def test_format_timedelta_ticks_wide(self): + if is_platform_mac(): + pytest.skip("skip on mac for precision display issue on older mpl") expected_labels = [ '00:00:00', From 14fee4f3925994f5fd9761bd455b42b5a97b7a38 Mon Sep 17 00:00:00 2001 From: Justin Solinsky Date: Wed, 22 Feb 2017 11:21:27 -0500 Subject: [PATCH 088/933] ENH union_categoricals supports ignore_order GH13410 xref #13410 (ignore_order portion) Author: Justin Solinsky Closes #15219 from js3711/GH13410-ENHunion_categoricals and squashes the following commits: e9d00de [Justin Solinsky] GH15219 Documentation fixes based on feedback d278d62 [Justin Solinsky] ENH union_categoricals supports ignore_order GH13410 9b827ef [Justin Solinsky] ENH union_categoricals supports ignore_order GH13410 --- doc/source/categorical.rst | 11 +++++++ doc/source/whatsnew/v0.20.0.txt | 2 ++ pandas/tests/tools/test_concat.py | 54 +++++++++++++++++++++++++++++++ pandas/types/concat.py | 16 ++++++--- 4 files changed, 79 insertions(+), 4 deletions(-) diff --git a/doc/source/categorical.rst b/doc/source/categorical.rst index 18e429cfc92fa..db974922e1d76 100644 --- a/doc/source/categorical.rst +++ b/doc/source/categorical.rst @@ -693,6 +693,17 @@ The below raises ``TypeError`` because the categories are ordered and not identi Out[3]: TypeError: to union ordered Categoricals, all categories must be the same +.. versionadded:: 0.20.0 + +Ordered categoricals with different categories or orderings can be combined by +using the ``ignore_ordered=True`` argument. + +.. ipython:: python + + a = pd.Categorical(["a", "b", "c"], ordered=True) + b = pd.Categorical(["c", "b", "a"], ordered=True) + union_categoricals([a, b], ignore_order=True) + ``union_categoricals`` also works with a ``CategoricalIndex``, or ``Series`` containing categorical data, but note that the resulting array will always be a plain ``Categorical`` diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 355756c6e605c..bb5f19b301dc8 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -156,9 +156,11 @@ Other enhancements - ``DataFrame.to_excel()`` has a new ``freeze_panes`` parameter to turn on Freeze Panes when exporting to Excel (:issue:`15160`) - HTML table output skips ``colspan`` or ``rowspan`` attribute if equal to 1. (:issue:`15403`) - ``pd.TimedeltaIndex`` now has a custom datetick formatter specifically designed for nanosecond level precision (:issue:`8711`) +- ``pd.types.concat.union_categoricals`` gained the ``ignore_ordered`` argument to allow ignoring the ordered attribute of unioned categoricals (:issue:`13410`). See the :ref:`categorical union docs ` for more information. .. _ISO 8601 duration: https://en.wikipedia.org/wiki/ISO_8601#Durations + .. _whatsnew_0200.api_breaking: diff --git a/pandas/tests/tools/test_concat.py b/pandas/tests/tools/test_concat.py index 2a28fccdc9b94..6d40de465bff8 100644 --- a/pandas/tests/tools/test_concat.py +++ b/pandas/tests/tools/test_concat.py @@ -1662,6 +1662,60 @@ def test_union_categoricals_ordered(self): with tm.assertRaisesRegexp(TypeError, msg): union_categoricals([c1, c2]) + def test_union_categoricals_ignore_order(self): + # GH 15219 + c1 = Categorical([1, 2, 3], ordered=True) + c2 = Categorical([1, 2, 3], ordered=False) + + res = union_categoricals([c1, c2], ignore_order=True) + exp = Categorical([1, 2, 3, 1, 2, 3]) + tm.assert_categorical_equal(res, exp) + + msg = 'Categorical.ordered must be the same' + with tm.assertRaisesRegexp(TypeError, msg): + union_categoricals([c1, c2], ignore_order=False) + + res = union_categoricals([c1, c1], ignore_order=True) + exp = Categorical([1, 2, 3, 1, 2, 3]) + tm.assert_categorical_equal(res, exp) + + res = union_categoricals([c1, c1], ignore_order=False) + exp = Categorical([1, 2, 3, 1, 2, 3], + categories=[1, 2, 3], ordered=True) + tm.assert_categorical_equal(res, exp) + + c1 = Categorical([1, 2, 3, np.nan], ordered=True) + c2 = Categorical([3, 2], categories=[1, 2, 3], ordered=True) + + res = union_categoricals([c1, c2], ignore_order=True) + exp = Categorical([1, 2, 3, np.nan, 3, 2]) + tm.assert_categorical_equal(res, exp) + + c1 = Categorical([1, 2, 3], ordered=True) + c2 = Categorical([1, 2, 3], categories=[3, 2, 1], ordered=True) + + res = union_categoricals([c1, c2], ignore_order=True) + exp = Categorical([1, 2, 3, 1, 2, 3]) + tm.assert_categorical_equal(res, exp) + + res = union_categoricals([c2, c1], ignore_order=True, + sort_categories=True) + exp = Categorical([1, 2, 3, 1, 2, 3], categories=[1, 2, 3]) + tm.assert_categorical_equal(res, exp) + + c1 = Categorical([1, 2, 3], ordered=True) + c2 = Categorical([4, 5, 6], ordered=True) + result = union_categoricals([c1, c2], ignore_order=True) + expected = Categorical([1, 2, 3, 4, 5, 6]) + tm.assert_categorical_equal(result, expected) + + msg = "to union ordered Categoricals, all categories must be the same" + with tm.assertRaisesRegexp(TypeError, msg): + union_categoricals([c1, c2], ignore_order=False) + + with tm.assertRaisesRegexp(TypeError, msg): + union_categoricals([c1, c2]) + def test_union_categoricals_sort(self): # GH 13846 c1 = Categorical(['x', 'y', 'z']) diff --git a/pandas/types/concat.py b/pandas/types/concat.py index 827eb160c452d..9e47a97dd621a 100644 --- a/pandas/types/concat.py +++ b/pandas/types/concat.py @@ -208,7 +208,7 @@ def _concat_asobject(to_concat): return _concat_asobject(to_concat) -def union_categoricals(to_union, sort_categories=False): +def union_categoricals(to_union, sort_categories=False, ignore_order=False): """ Combine list-like of Categorical-like, unioning categories. All categories must have the same dtype. @@ -222,6 +222,11 @@ def union_categoricals(to_union, sort_categories=False): sort_categories : boolean, default False If true, resulting categories will be lexsorted, otherwise they will be ordered as they appear in the data. + ignore_order: boolean, default False + If true, the ordered attribute of the Categoricals will be ignored. + Results in an unordered categorical. + + .. versionadded:: 0.20.0 Returns ------- @@ -235,7 +240,7 @@ def union_categoricals(to_union, sort_categories=False): - all inputs are ordered and their categories are not identical - sort_categories=True and Categoricals are ordered ValueError - Emmpty list of categoricals passed + Empty list of categoricals passed """ from pandas import Index, Categorical, CategoricalIndex, Series @@ -264,7 +269,7 @@ def _maybe_unwrap(x): ordered = first.ordered new_codes = np.concatenate([c.codes for c in to_union]) - if sort_categories and ordered: + if sort_categories and not ignore_order and ordered: raise TypeError("Cannot use sort_categories=True with " "ordered Categoricals") @@ -272,7 +277,7 @@ def _maybe_unwrap(x): categories = categories.sort_values() indexer = categories.get_indexer(first.categories) new_codes = take_1d(indexer, new_codes, fill_value=-1) - elif all(not c.ordered for c in to_union): + elif ignore_order or all(not c.ordered for c in to_union): # different categories - union and recode cats = first.categories.append([c.categories for c in to_union[1:]]) categories = Index(cats.unique()) @@ -297,6 +302,9 @@ def _maybe_unwrap(x): else: raise TypeError('Categorical.ordered must be the same') + if ignore_order: + ordered = False + return Categorical(new_codes, categories=categories, ordered=ordered, fastpath=True) From 9ff3e52c53660269a4dcaaca25705139e4beade4 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 22 Feb 2017 11:29:28 -0500 Subject: [PATCH 089/933] TST: break out union_categoricals to separate test file --- pandas/tests/tools/test_concat.py | 332 ----------------- pandas/tests/tools/test_union_categoricals.py | 339 ++++++++++++++++++ 2 files changed, 339 insertions(+), 332 deletions(-) create mode 100644 pandas/tests/tools/test_union_categoricals.py diff --git a/pandas/tests/tools/test_concat.py b/pandas/tests/tools/test_concat.py index 6d40de465bff8..f292aeda8cbe0 100644 --- a/pandas/tests/tools/test_concat.py +++ b/pandas/tests/tools/test_concat.py @@ -8,7 +8,6 @@ read_csv, isnull, Series, date_range, Index, Panel, MultiIndex, Timestamp, DatetimeIndex, Categorical, CategoricalIndex) -from pandas.types.concat import union_categoricals from pandas.util import testing as tm from pandas.util.testing import (assert_frame_equal, makeCustomDataframe as mkdf, @@ -1511,337 +1510,6 @@ def test_concat_keys_with_none(self): keys=['b', 'c', 'd', 'e']) tm.assert_frame_equal(result, expected) - def test_union_categorical(self): - # GH 13361 - data = [ - (list('abc'), list('abd'), list('abcabd')), - ([0, 1, 2], [2, 3, 4], [0, 1, 2, 2, 3, 4]), - ([0, 1.2, 2], [2, 3.4, 4], [0, 1.2, 2, 2, 3.4, 4]), - - (['b', 'b', np.nan, 'a'], ['a', np.nan, 'c'], - ['b', 'b', np.nan, 'a', 'a', np.nan, 'c']), - - (pd.date_range('2014-01-01', '2014-01-05'), - pd.date_range('2014-01-06', '2014-01-07'), - pd.date_range('2014-01-01', '2014-01-07')), - - (pd.date_range('2014-01-01', '2014-01-05', tz='US/Central'), - pd.date_range('2014-01-06', '2014-01-07', tz='US/Central'), - pd.date_range('2014-01-01', '2014-01-07', tz='US/Central')), - - (pd.period_range('2014-01-01', '2014-01-05'), - pd.period_range('2014-01-06', '2014-01-07'), - pd.period_range('2014-01-01', '2014-01-07')), - ] - - for a, b, combined in data: - for box in [Categorical, CategoricalIndex, Series]: - result = union_categoricals([box(Categorical(a)), - box(Categorical(b))]) - expected = Categorical(combined) - tm.assert_categorical_equal(result, expected, - check_category_order=True) - - # new categories ordered by appearance - s = Categorical(['x', 'y', 'z']) - s2 = Categorical(['a', 'b', 'c']) - result = union_categoricals([s, s2]) - expected = Categorical(['x', 'y', 'z', 'a', 'b', 'c'], - categories=['x', 'y', 'z', 'a', 'b', 'c']) - tm.assert_categorical_equal(result, expected) - - s = Categorical([0, 1.2, 2], ordered=True) - s2 = Categorical([0, 1.2, 2], ordered=True) - result = union_categoricals([s, s2]) - expected = Categorical([0, 1.2, 2, 0, 1.2, 2], ordered=True) - tm.assert_categorical_equal(result, expected) - - # must exactly match types - s = Categorical([0, 1.2, 2]) - s2 = Categorical([2, 3, 4]) - msg = 'dtype of categories must be the same' - with tm.assertRaisesRegexp(TypeError, msg): - union_categoricals([s, s2]) - - msg = 'No Categoricals to union' - with tm.assertRaisesRegexp(ValueError, msg): - union_categoricals([]) - - def test_union_categoricals_nan(self): - # GH 13759 - res = union_categoricals([pd.Categorical([1, 2, np.nan]), - pd.Categorical([3, 2, np.nan])]) - exp = Categorical([1, 2, np.nan, 3, 2, np.nan]) - tm.assert_categorical_equal(res, exp) - - res = union_categoricals([pd.Categorical(['A', 'B']), - pd.Categorical(['B', 'B', np.nan])]) - exp = Categorical(['A', 'B', 'B', 'B', np.nan]) - tm.assert_categorical_equal(res, exp) - - val1 = [pd.Timestamp('2011-01-01'), pd.Timestamp('2011-03-01'), - pd.NaT] - val2 = [pd.NaT, pd.Timestamp('2011-01-01'), - pd.Timestamp('2011-02-01')] - - res = union_categoricals([pd.Categorical(val1), pd.Categorical(val2)]) - exp = Categorical(val1 + val2, - categories=[pd.Timestamp('2011-01-01'), - pd.Timestamp('2011-03-01'), - pd.Timestamp('2011-02-01')]) - tm.assert_categorical_equal(res, exp) - - # all NaN - res = union_categoricals([pd.Categorical([np.nan, np.nan]), - pd.Categorical(['X'])]) - exp = Categorical([np.nan, np.nan, 'X']) - tm.assert_categorical_equal(res, exp) - - res = union_categoricals([pd.Categorical([np.nan, np.nan]), - pd.Categorical([np.nan, np.nan])]) - exp = Categorical([np.nan, np.nan, np.nan, np.nan]) - tm.assert_categorical_equal(res, exp) - - def test_union_categoricals_empty(self): - # GH 13759 - res = union_categoricals([pd.Categorical([]), - pd.Categorical([])]) - exp = Categorical([]) - tm.assert_categorical_equal(res, exp) - - res = union_categoricals([pd.Categorical([]), - pd.Categorical([1.0])]) - exp = Categorical([1.0]) - tm.assert_categorical_equal(res, exp) - - # to make dtype equal - nanc = pd.Categorical(np.array([np.nan], dtype=np.float64)) - res = union_categoricals([nanc, - pd.Categorical([])]) - tm.assert_categorical_equal(res, nanc) - - def test_union_categorical_same_category(self): - # check fastpath - c1 = Categorical([1, 2, 3, 4], categories=[1, 2, 3, 4]) - c2 = Categorical([3, 2, 1, np.nan], categories=[1, 2, 3, 4]) - res = union_categoricals([c1, c2]) - exp = Categorical([1, 2, 3, 4, 3, 2, 1, np.nan], - categories=[1, 2, 3, 4]) - tm.assert_categorical_equal(res, exp) - - c1 = Categorical(['z', 'z', 'z'], categories=['x', 'y', 'z']) - c2 = Categorical(['x', 'x', 'x'], categories=['x', 'y', 'z']) - res = union_categoricals([c1, c2]) - exp = Categorical(['z', 'z', 'z', 'x', 'x', 'x'], - categories=['x', 'y', 'z']) - tm.assert_categorical_equal(res, exp) - - def test_union_categoricals_ordered(self): - c1 = Categorical([1, 2, 3], ordered=True) - c2 = Categorical([1, 2, 3], ordered=False) - - msg = 'Categorical.ordered must be the same' - with tm.assertRaisesRegexp(TypeError, msg): - union_categoricals([c1, c2]) - - res = union_categoricals([c1, c1]) - exp = Categorical([1, 2, 3, 1, 2, 3], ordered=True) - tm.assert_categorical_equal(res, exp) - - c1 = Categorical([1, 2, 3, np.nan], ordered=True) - c2 = Categorical([3, 2], categories=[1, 2, 3], ordered=True) - - res = union_categoricals([c1, c2]) - exp = Categorical([1, 2, 3, np.nan, 3, 2], ordered=True) - tm.assert_categorical_equal(res, exp) - - c1 = Categorical([1, 2, 3], ordered=True) - c2 = Categorical([1, 2, 3], categories=[3, 2, 1], ordered=True) - - msg = "to union ordered Categoricals, all categories must be the same" - with tm.assertRaisesRegexp(TypeError, msg): - union_categoricals([c1, c2]) - - def test_union_categoricals_ignore_order(self): - # GH 15219 - c1 = Categorical([1, 2, 3], ordered=True) - c2 = Categorical([1, 2, 3], ordered=False) - - res = union_categoricals([c1, c2], ignore_order=True) - exp = Categorical([1, 2, 3, 1, 2, 3]) - tm.assert_categorical_equal(res, exp) - - msg = 'Categorical.ordered must be the same' - with tm.assertRaisesRegexp(TypeError, msg): - union_categoricals([c1, c2], ignore_order=False) - - res = union_categoricals([c1, c1], ignore_order=True) - exp = Categorical([1, 2, 3, 1, 2, 3]) - tm.assert_categorical_equal(res, exp) - - res = union_categoricals([c1, c1], ignore_order=False) - exp = Categorical([1, 2, 3, 1, 2, 3], - categories=[1, 2, 3], ordered=True) - tm.assert_categorical_equal(res, exp) - - c1 = Categorical([1, 2, 3, np.nan], ordered=True) - c2 = Categorical([3, 2], categories=[1, 2, 3], ordered=True) - - res = union_categoricals([c1, c2], ignore_order=True) - exp = Categorical([1, 2, 3, np.nan, 3, 2]) - tm.assert_categorical_equal(res, exp) - - c1 = Categorical([1, 2, 3], ordered=True) - c2 = Categorical([1, 2, 3], categories=[3, 2, 1], ordered=True) - - res = union_categoricals([c1, c2], ignore_order=True) - exp = Categorical([1, 2, 3, 1, 2, 3]) - tm.assert_categorical_equal(res, exp) - - res = union_categoricals([c2, c1], ignore_order=True, - sort_categories=True) - exp = Categorical([1, 2, 3, 1, 2, 3], categories=[1, 2, 3]) - tm.assert_categorical_equal(res, exp) - - c1 = Categorical([1, 2, 3], ordered=True) - c2 = Categorical([4, 5, 6], ordered=True) - result = union_categoricals([c1, c2], ignore_order=True) - expected = Categorical([1, 2, 3, 4, 5, 6]) - tm.assert_categorical_equal(result, expected) - - msg = "to union ordered Categoricals, all categories must be the same" - with tm.assertRaisesRegexp(TypeError, msg): - union_categoricals([c1, c2], ignore_order=False) - - with tm.assertRaisesRegexp(TypeError, msg): - union_categoricals([c1, c2]) - - def test_union_categoricals_sort(self): - # GH 13846 - c1 = Categorical(['x', 'y', 'z']) - c2 = Categorical(['a', 'b', 'c']) - result = union_categoricals([c1, c2], sort_categories=True) - expected = Categorical(['x', 'y', 'z', 'a', 'b', 'c'], - categories=['a', 'b', 'c', 'x', 'y', 'z']) - tm.assert_categorical_equal(result, expected) - - # fastpath - c1 = Categorical(['a', 'b'], categories=['b', 'a', 'c']) - c2 = Categorical(['b', 'c'], categories=['b', 'a', 'c']) - result = union_categoricals([c1, c2], sort_categories=True) - expected = Categorical(['a', 'b', 'b', 'c'], - categories=['a', 'b', 'c']) - tm.assert_categorical_equal(result, expected) - - c1 = Categorical(['a', 'b'], categories=['c', 'a', 'b']) - c2 = Categorical(['b', 'c'], categories=['c', 'a', 'b']) - result = union_categoricals([c1, c2], sort_categories=True) - expected = Categorical(['a', 'b', 'b', 'c'], - categories=['a', 'b', 'c']) - tm.assert_categorical_equal(result, expected) - - # fastpath - skip resort - c1 = Categorical(['a', 'b'], categories=['a', 'b', 'c']) - c2 = Categorical(['b', 'c'], categories=['a', 'b', 'c']) - result = union_categoricals([c1, c2], sort_categories=True) - expected = Categorical(['a', 'b', 'b', 'c'], - categories=['a', 'b', 'c']) - tm.assert_categorical_equal(result, expected) - - c1 = Categorical(['x', np.nan]) - c2 = Categorical([np.nan, 'b']) - result = union_categoricals([c1, c2], sort_categories=True) - expected = Categorical(['x', np.nan, np.nan, 'b'], - categories=['b', 'x']) - tm.assert_categorical_equal(result, expected) - - c1 = Categorical([np.nan]) - c2 = Categorical([np.nan]) - result = union_categoricals([c1, c2], sort_categories=True) - expected = Categorical([np.nan, np.nan], categories=[]) - tm.assert_categorical_equal(result, expected) - - c1 = Categorical([]) - c2 = Categorical([]) - result = union_categoricals([c1, c2], sort_categories=True) - expected = Categorical([]) - tm.assert_categorical_equal(result, expected) - - c1 = Categorical(['b', 'a'], categories=['b', 'a', 'c'], ordered=True) - c2 = Categorical(['a', 'c'], categories=['b', 'a', 'c'], ordered=True) - with tm.assertRaises(TypeError): - union_categoricals([c1, c2], sort_categories=True) - - def test_union_categoricals_sort_false(self): - # GH 13846 - c1 = Categorical(['x', 'y', 'z']) - c2 = Categorical(['a', 'b', 'c']) - result = union_categoricals([c1, c2], sort_categories=False) - expected = Categorical(['x', 'y', 'z', 'a', 'b', 'c'], - categories=['x', 'y', 'z', 'a', 'b', 'c']) - tm.assert_categorical_equal(result, expected) - - # fastpath - c1 = Categorical(['a', 'b'], categories=['b', 'a', 'c']) - c2 = Categorical(['b', 'c'], categories=['b', 'a', 'c']) - result = union_categoricals([c1, c2], sort_categories=False) - expected = Categorical(['a', 'b', 'b', 'c'], - categories=['b', 'a', 'c']) - tm.assert_categorical_equal(result, expected) - - # fastpath - skip resort - c1 = Categorical(['a', 'b'], categories=['a', 'b', 'c']) - c2 = Categorical(['b', 'c'], categories=['a', 'b', 'c']) - result = union_categoricals([c1, c2], sort_categories=False) - expected = Categorical(['a', 'b', 'b', 'c'], - categories=['a', 'b', 'c']) - tm.assert_categorical_equal(result, expected) - - c1 = Categorical(['x', np.nan]) - c2 = Categorical([np.nan, 'b']) - result = union_categoricals([c1, c2], sort_categories=False) - expected = Categorical(['x', np.nan, np.nan, 'b'], - categories=['x', 'b']) - tm.assert_categorical_equal(result, expected) - - c1 = Categorical([np.nan]) - c2 = Categorical([np.nan]) - result = union_categoricals([c1, c2], sort_categories=False) - expected = Categorical([np.nan, np.nan], categories=[]) - tm.assert_categorical_equal(result, expected) - - c1 = Categorical([]) - c2 = Categorical([]) - result = union_categoricals([c1, c2], sort_categories=False) - expected = Categorical([]) - tm.assert_categorical_equal(result, expected) - - c1 = Categorical(['b', 'a'], categories=['b', 'a', 'c'], ordered=True) - c2 = Categorical(['a', 'c'], categories=['b', 'a', 'c'], ordered=True) - result = union_categoricals([c1, c2], sort_categories=False) - expected = Categorical(['b', 'a', 'a', 'c'], - categories=['b', 'a', 'c'], ordered=True) - tm.assert_categorical_equal(result, expected) - - def test_union_categorical_unwrap(self): - # GH 14173 - c1 = Categorical(['a', 'b']) - c2 = pd.Series(['b', 'c'], dtype='category') - result = union_categoricals([c1, c2]) - expected = Categorical(['a', 'b', 'b', 'c']) - tm.assert_categorical_equal(result, expected) - - c2 = CategoricalIndex(c2) - result = union_categoricals([c1, c2]) - tm.assert_categorical_equal(result, expected) - - c1 = Series(c1) - result = union_categoricals([c1, c2]) - tm.assert_categorical_equal(result, expected) - - with tm.assertRaises(TypeError): - union_categoricals([c1, ['a', 'b', 'c']]) - def test_concat_bug_1719(self): ts1 = tm.makeTimeSeries() ts2 = tm.makeTimeSeries()[::2] diff --git a/pandas/tests/tools/test_union_categoricals.py b/pandas/tests/tools/test_union_categoricals.py new file mode 100644 index 0000000000000..299b60f2a00b0 --- /dev/null +++ b/pandas/tests/tools/test_union_categoricals.py @@ -0,0 +1,339 @@ +import numpy as np +import pandas as pd +from pandas import Categorical, Series, CategoricalIndex +from pandas.types.concat import union_categoricals +from pandas.util import testing as tm + + +class TestUnionCategoricals(tm.TestCase): + + def test_union_categorical(self): + # GH 13361 + data = [ + (list('abc'), list('abd'), list('abcabd')), + ([0, 1, 2], [2, 3, 4], [0, 1, 2, 2, 3, 4]), + ([0, 1.2, 2], [2, 3.4, 4], [0, 1.2, 2, 2, 3.4, 4]), + + (['b', 'b', np.nan, 'a'], ['a', np.nan, 'c'], + ['b', 'b', np.nan, 'a', 'a', np.nan, 'c']), + + (pd.date_range('2014-01-01', '2014-01-05'), + pd.date_range('2014-01-06', '2014-01-07'), + pd.date_range('2014-01-01', '2014-01-07')), + + (pd.date_range('2014-01-01', '2014-01-05', tz='US/Central'), + pd.date_range('2014-01-06', '2014-01-07', tz='US/Central'), + pd.date_range('2014-01-01', '2014-01-07', tz='US/Central')), + + (pd.period_range('2014-01-01', '2014-01-05'), + pd.period_range('2014-01-06', '2014-01-07'), + pd.period_range('2014-01-01', '2014-01-07')), + ] + + for a, b, combined in data: + for box in [Categorical, CategoricalIndex, Series]: + result = union_categoricals([box(Categorical(a)), + box(Categorical(b))]) + expected = Categorical(combined) + tm.assert_categorical_equal(result, expected, + check_category_order=True) + + # new categories ordered by appearance + s = Categorical(['x', 'y', 'z']) + s2 = Categorical(['a', 'b', 'c']) + result = union_categoricals([s, s2]) + expected = Categorical(['x', 'y', 'z', 'a', 'b', 'c'], + categories=['x', 'y', 'z', 'a', 'b', 'c']) + tm.assert_categorical_equal(result, expected) + + s = Categorical([0, 1.2, 2], ordered=True) + s2 = Categorical([0, 1.2, 2], ordered=True) + result = union_categoricals([s, s2]) + expected = Categorical([0, 1.2, 2, 0, 1.2, 2], ordered=True) + tm.assert_categorical_equal(result, expected) + + # must exactly match types + s = Categorical([0, 1.2, 2]) + s2 = Categorical([2, 3, 4]) + msg = 'dtype of categories must be the same' + with tm.assertRaisesRegexp(TypeError, msg): + union_categoricals([s, s2]) + + msg = 'No Categoricals to union' + with tm.assertRaisesRegexp(ValueError, msg): + union_categoricals([]) + + def test_union_categoricals_nan(self): + # GH 13759 + res = union_categoricals([pd.Categorical([1, 2, np.nan]), + pd.Categorical([3, 2, np.nan])]) + exp = Categorical([1, 2, np.nan, 3, 2, np.nan]) + tm.assert_categorical_equal(res, exp) + + res = union_categoricals([pd.Categorical(['A', 'B']), + pd.Categorical(['B', 'B', np.nan])]) + exp = Categorical(['A', 'B', 'B', 'B', np.nan]) + tm.assert_categorical_equal(res, exp) + + val1 = [pd.Timestamp('2011-01-01'), pd.Timestamp('2011-03-01'), + pd.NaT] + val2 = [pd.NaT, pd.Timestamp('2011-01-01'), + pd.Timestamp('2011-02-01')] + + res = union_categoricals([pd.Categorical(val1), pd.Categorical(val2)]) + exp = Categorical(val1 + val2, + categories=[pd.Timestamp('2011-01-01'), + pd.Timestamp('2011-03-01'), + pd.Timestamp('2011-02-01')]) + tm.assert_categorical_equal(res, exp) + + # all NaN + res = union_categoricals([pd.Categorical([np.nan, np.nan]), + pd.Categorical(['X'])]) + exp = Categorical([np.nan, np.nan, 'X']) + tm.assert_categorical_equal(res, exp) + + res = union_categoricals([pd.Categorical([np.nan, np.nan]), + pd.Categorical([np.nan, np.nan])]) + exp = Categorical([np.nan, np.nan, np.nan, np.nan]) + tm.assert_categorical_equal(res, exp) + + def test_union_categoricals_empty(self): + # GH 13759 + res = union_categoricals([pd.Categorical([]), + pd.Categorical([])]) + exp = Categorical([]) + tm.assert_categorical_equal(res, exp) + + res = union_categoricals([pd.Categorical([]), + pd.Categorical([1.0])]) + exp = Categorical([1.0]) + tm.assert_categorical_equal(res, exp) + + # to make dtype equal + nanc = pd.Categorical(np.array([np.nan], dtype=np.float64)) + res = union_categoricals([nanc, + pd.Categorical([])]) + tm.assert_categorical_equal(res, nanc) + + def test_union_categorical_same_category(self): + # check fastpath + c1 = Categorical([1, 2, 3, 4], categories=[1, 2, 3, 4]) + c2 = Categorical([3, 2, 1, np.nan], categories=[1, 2, 3, 4]) + res = union_categoricals([c1, c2]) + exp = Categorical([1, 2, 3, 4, 3, 2, 1, np.nan], + categories=[1, 2, 3, 4]) + tm.assert_categorical_equal(res, exp) + + c1 = Categorical(['z', 'z', 'z'], categories=['x', 'y', 'z']) + c2 = Categorical(['x', 'x', 'x'], categories=['x', 'y', 'z']) + res = union_categoricals([c1, c2]) + exp = Categorical(['z', 'z', 'z', 'x', 'x', 'x'], + categories=['x', 'y', 'z']) + tm.assert_categorical_equal(res, exp) + + def test_union_categoricals_ordered(self): + c1 = Categorical([1, 2, 3], ordered=True) + c2 = Categorical([1, 2, 3], ordered=False) + + msg = 'Categorical.ordered must be the same' + with tm.assertRaisesRegexp(TypeError, msg): + union_categoricals([c1, c2]) + + res = union_categoricals([c1, c1]) + exp = Categorical([1, 2, 3, 1, 2, 3], ordered=True) + tm.assert_categorical_equal(res, exp) + + c1 = Categorical([1, 2, 3, np.nan], ordered=True) + c2 = Categorical([3, 2], categories=[1, 2, 3], ordered=True) + + res = union_categoricals([c1, c2]) + exp = Categorical([1, 2, 3, np.nan, 3, 2], ordered=True) + tm.assert_categorical_equal(res, exp) + + c1 = Categorical([1, 2, 3], ordered=True) + c2 = Categorical([1, 2, 3], categories=[3, 2, 1], ordered=True) + + msg = "to union ordered Categoricals, all categories must be the same" + with tm.assertRaisesRegexp(TypeError, msg): + union_categoricals([c1, c2]) + + def test_union_categoricals_ignore_order(self): + # GH 15219 + c1 = Categorical([1, 2, 3], ordered=True) + c2 = Categorical([1, 2, 3], ordered=False) + + res = union_categoricals([c1, c2], ignore_order=True) + exp = Categorical([1, 2, 3, 1, 2, 3]) + tm.assert_categorical_equal(res, exp) + + msg = 'Categorical.ordered must be the same' + with tm.assertRaisesRegexp(TypeError, msg): + union_categoricals([c1, c2], ignore_order=False) + + res = union_categoricals([c1, c1], ignore_order=True) + exp = Categorical([1, 2, 3, 1, 2, 3]) + tm.assert_categorical_equal(res, exp) + + res = union_categoricals([c1, c1], ignore_order=False) + exp = Categorical([1, 2, 3, 1, 2, 3], + categories=[1, 2, 3], ordered=True) + tm.assert_categorical_equal(res, exp) + + c1 = Categorical([1, 2, 3, np.nan], ordered=True) + c2 = Categorical([3, 2], categories=[1, 2, 3], ordered=True) + + res = union_categoricals([c1, c2], ignore_order=True) + exp = Categorical([1, 2, 3, np.nan, 3, 2]) + tm.assert_categorical_equal(res, exp) + + c1 = Categorical([1, 2, 3], ordered=True) + c2 = Categorical([1, 2, 3], categories=[3, 2, 1], ordered=True) + + res = union_categoricals([c1, c2], ignore_order=True) + exp = Categorical([1, 2, 3, 1, 2, 3]) + tm.assert_categorical_equal(res, exp) + + res = union_categoricals([c2, c1], ignore_order=True, + sort_categories=True) + exp = Categorical([1, 2, 3, 1, 2, 3], categories=[1, 2, 3]) + tm.assert_categorical_equal(res, exp) + + c1 = Categorical([1, 2, 3], ordered=True) + c2 = Categorical([4, 5, 6], ordered=True) + result = union_categoricals([c1, c2], ignore_order=True) + expected = Categorical([1, 2, 3, 4, 5, 6]) + tm.assert_categorical_equal(result, expected) + + msg = "to union ordered Categoricals, all categories must be the same" + with tm.assertRaisesRegexp(TypeError, msg): + union_categoricals([c1, c2], ignore_order=False) + + with tm.assertRaisesRegexp(TypeError, msg): + union_categoricals([c1, c2]) + + def test_union_categoricals_sort(self): + # GH 13846 + c1 = Categorical(['x', 'y', 'z']) + c2 = Categorical(['a', 'b', 'c']) + result = union_categoricals([c1, c2], sort_categories=True) + expected = Categorical(['x', 'y', 'z', 'a', 'b', 'c'], + categories=['a', 'b', 'c', 'x', 'y', 'z']) + tm.assert_categorical_equal(result, expected) + + # fastpath + c1 = Categorical(['a', 'b'], categories=['b', 'a', 'c']) + c2 = Categorical(['b', 'c'], categories=['b', 'a', 'c']) + result = union_categoricals([c1, c2], sort_categories=True) + expected = Categorical(['a', 'b', 'b', 'c'], + categories=['a', 'b', 'c']) + tm.assert_categorical_equal(result, expected) + + c1 = Categorical(['a', 'b'], categories=['c', 'a', 'b']) + c2 = Categorical(['b', 'c'], categories=['c', 'a', 'b']) + result = union_categoricals([c1, c2], sort_categories=True) + expected = Categorical(['a', 'b', 'b', 'c'], + categories=['a', 'b', 'c']) + tm.assert_categorical_equal(result, expected) + + # fastpath - skip resort + c1 = Categorical(['a', 'b'], categories=['a', 'b', 'c']) + c2 = Categorical(['b', 'c'], categories=['a', 'b', 'c']) + result = union_categoricals([c1, c2], sort_categories=True) + expected = Categorical(['a', 'b', 'b', 'c'], + categories=['a', 'b', 'c']) + tm.assert_categorical_equal(result, expected) + + c1 = Categorical(['x', np.nan]) + c2 = Categorical([np.nan, 'b']) + result = union_categoricals([c1, c2], sort_categories=True) + expected = Categorical(['x', np.nan, np.nan, 'b'], + categories=['b', 'x']) + tm.assert_categorical_equal(result, expected) + + c1 = Categorical([np.nan]) + c2 = Categorical([np.nan]) + result = union_categoricals([c1, c2], sort_categories=True) + expected = Categorical([np.nan, np.nan], categories=[]) + tm.assert_categorical_equal(result, expected) + + c1 = Categorical([]) + c2 = Categorical([]) + result = union_categoricals([c1, c2], sort_categories=True) + expected = Categorical([]) + tm.assert_categorical_equal(result, expected) + + c1 = Categorical(['b', 'a'], categories=['b', 'a', 'c'], ordered=True) + c2 = Categorical(['a', 'c'], categories=['b', 'a', 'c'], ordered=True) + with tm.assertRaises(TypeError): + union_categoricals([c1, c2], sort_categories=True) + + def test_union_categoricals_sort_false(self): + # GH 13846 + c1 = Categorical(['x', 'y', 'z']) + c2 = Categorical(['a', 'b', 'c']) + result = union_categoricals([c1, c2], sort_categories=False) + expected = Categorical(['x', 'y', 'z', 'a', 'b', 'c'], + categories=['x', 'y', 'z', 'a', 'b', 'c']) + tm.assert_categorical_equal(result, expected) + + # fastpath + c1 = Categorical(['a', 'b'], categories=['b', 'a', 'c']) + c2 = Categorical(['b', 'c'], categories=['b', 'a', 'c']) + result = union_categoricals([c1, c2], sort_categories=False) + expected = Categorical(['a', 'b', 'b', 'c'], + categories=['b', 'a', 'c']) + tm.assert_categorical_equal(result, expected) + + # fastpath - skip resort + c1 = Categorical(['a', 'b'], categories=['a', 'b', 'c']) + c2 = Categorical(['b', 'c'], categories=['a', 'b', 'c']) + result = union_categoricals([c1, c2], sort_categories=False) + expected = Categorical(['a', 'b', 'b', 'c'], + categories=['a', 'b', 'c']) + tm.assert_categorical_equal(result, expected) + + c1 = Categorical(['x', np.nan]) + c2 = Categorical([np.nan, 'b']) + result = union_categoricals([c1, c2], sort_categories=False) + expected = Categorical(['x', np.nan, np.nan, 'b'], + categories=['x', 'b']) + tm.assert_categorical_equal(result, expected) + + c1 = Categorical([np.nan]) + c2 = Categorical([np.nan]) + result = union_categoricals([c1, c2], sort_categories=False) + expected = Categorical([np.nan, np.nan], categories=[]) + tm.assert_categorical_equal(result, expected) + + c1 = Categorical([]) + c2 = Categorical([]) + result = union_categoricals([c1, c2], sort_categories=False) + expected = Categorical([]) + tm.assert_categorical_equal(result, expected) + + c1 = Categorical(['b', 'a'], categories=['b', 'a', 'c'], ordered=True) + c2 = Categorical(['a', 'c'], categories=['b', 'a', 'c'], ordered=True) + result = union_categoricals([c1, c2], sort_categories=False) + expected = Categorical(['b', 'a', 'a', 'c'], + categories=['b', 'a', 'c'], ordered=True) + tm.assert_categorical_equal(result, expected) + + def test_union_categorical_unwrap(self): + # GH 14173 + c1 = Categorical(['a', 'b']) + c2 = pd.Series(['b', 'c'], dtype='category') + result = union_categoricals([c1, c2]) + expected = Categorical(['a', 'b', 'b', 'c']) + tm.assert_categorical_equal(result, expected) + + c2 = CategoricalIndex(c2) + result = union_categoricals([c1, c2]) + tm.assert_categorical_equal(result, expected) + + c1 = Series(c1) + result = union_categoricals([c1, c2]) + tm.assert_categorical_equal(result, expected) + + with tm.assertRaises(TypeError): + union_categoricals([c1, ['a', 'b', 'c']]) From f6385506dd668ae461581c9af564be5b98e6ff16 Mon Sep 17 00:00:00 2001 From: Kernc Date: Wed, 22 Feb 2017 13:41:16 -0500 Subject: [PATCH 090/933] BUG: Categorical.unique() preserves categories closes #13179 Author: Kernc Closes #15439 from kernc/Categorical.unique-nostrip-unused and squashes the following commits: 55733b8 [Kernc] fixup! BUG: Fix .groupby(categorical, sort=False) failing 2aec326 [Kernc] fixup! BUG: Fix .groupby(categorical, sort=False) failing c813146 [Kernc] PERF: add asv for categorical grouping 0c550e6 [Kernc] BUG: Fix .groupby(categorical, sort=False) failing --- asv_bench/benchmarks/groupby.py | 37 +++++++++++++++++++++ doc/source/whatsnew/v0.20.0.txt | 34 ++++++++++++++++++- pandas/core/categorical.py | 42 ++++++++++++++++++++++++ pandas/core/groupby.py | 18 +--------- pandas/indexes/category.py | 4 +++ pandas/tests/groupby/test_categorical.py | 24 ++++++++++++++ 6 files changed, 141 insertions(+), 18 deletions(-) diff --git a/asv_bench/benchmarks/groupby.py b/asv_bench/benchmarks/groupby.py index 03ff62568b405..59f55914ea4d3 100644 --- a/asv_bench/benchmarks/groupby.py +++ b/asv_bench/benchmarks/groupby.py @@ -492,6 +492,43 @@ def time_groupby_sum(self): self.df.groupby(['a'])['b'].sum() +class groupby_categorical(object): + goal_time = 0.2 + + def setup(self): + N = 100000 + arr = np.random.random(N) + + self.df = DataFrame(dict( + a=Categorical(np.random.randint(10000, size=N)), + b=arr)) + self.df_ordered = DataFrame(dict( + a=Categorical(np.random.randint(10000, size=N), ordered=True), + b=arr)) + self.df_extra_cat = DataFrame(dict( + a=Categorical(np.random.randint(100, size=N), + categories=np.arange(10000)), + b=arr)) + + def time_groupby_sort(self): + self.df.groupby('a')['b'].count() + + def time_groupby_nosort(self): + self.df.groupby('a', sort=False)['b'].count() + + def time_groupby_ordered_sort(self): + self.df_ordered.groupby('a')['b'].count() + + def time_groupby_ordered_nosort(self): + self.df_ordered.groupby('a', sort=False)['b'].count() + + def time_groupby_extra_cat_sort(self): + self.df_extra_cat.groupby('a')['b'].count() + + def time_groupby_extra_cat_nosort(self): + self.df_extra_cat.groupby('a', sort=False)['b'].count() + + class groupby_period(object): # GH 14338 goal_time = 0.2 diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index bb5f19b301dc8..e65276fe51fe8 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -120,6 +120,39 @@ Notably, a new numerical index, ``UInt64Index``, has been created (:issue:`14937 - Bug in ``pd.unique()`` in which unsigned 64-bit integers were causing overflow (:issue:`14915`) - Bug in ``pd.value_counts()`` in which unsigned 64-bit integers were being erroneously truncated in the output (:issue:`14934`) +.. _whatsnew_0200.enhancements.groupy_categorical + +GroupBy on Categoricals +^^^^^^^^^^^^^^^^^^^^^^^ + +In previous versions, ``.groupby(..., sort=False)`` would fail with a ``ValueError`` when grouping on a categorical series with some categories not appearing in the data. (:issue:`13179`) + +.. ipython:: python + + chromosomes = np.r_[np.arange(1, 23).astype(str), ['X', 'Y']] + df = pd.DataFrame({ + 'A': np.random.randint(100), + 'B': np.random.randint(100), + 'C': np.random.randint(100), + 'chromosomes': pd.Categorical(np.random.choice(chromosomes, 100), + categories=chromosomes, + ordered=True)}) + df + +Previous Behavior: + +.. code-block:: ipython + + In [3]: df[df.chromosomes != '1'].groupby('chromosomes', sort=False).sum() + --------------------------------------------------------------------------- + ValueError: items in new_categories are not the same as in old categories + +New Behavior: + +.. ipython:: python + + df[df.chromosomes != '1'].groupby('chromosomes', sort=False).sum() + .. _whatsnew_0200.enhancements.other: Other enhancements @@ -163,7 +196,6 @@ Other enhancements .. _whatsnew_0200.api_breaking: - Backwards incompatible API changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index 491db2e080953..b6898f11ffa74 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -602,6 +602,46 @@ def _get_categories(self): categories = property(fget=_get_categories, fset=_set_categories, doc=_categories_doc) + def _codes_for_groupby(self, sort): + """ + If sort=False, return a copy of self, coded with categories as + returned by .unique(), followed by any categories not appearing in + the data. If sort=True, return self. + + This method is needed solely to ensure the categorical index of the + GroupBy result has categories in the order of appearance in the data + (GH-8868). + + Parameters + ---------- + sort : boolean + The value of the sort paramter groupby was called with. + + Returns + ------- + Categorical + If sort=False, the new categories are set to the order of + appearance in codes (unless ordered=True, in which case the + original order is preserved), followed by any unrepresented + categories in the original order. + """ + + # Already sorted according to self.categories; all is fine + if sort: + return self + + # sort=False should order groups in as-encountered order (GH-8868) + cat = self.unique() + + # But for groupby to work, all categories should be present, + # including those missing from the data (GH-13179), which .unique() + # above dropped + cat.add_categories( + self.categories[~self.categories.isin(cat.categories)], + inplace=True) + + return self.reorder_categories(cat.categories) + _ordered = None def set_ordered(self, value, inplace=False): @@ -1853,8 +1893,10 @@ def unique(self): # unlike np.unique, unique1d does not sort unique_codes = unique1d(self.codes) cat = self.copy() + # keep nan in codes cat._codes = unique_codes + # exclude nan from indexer for categories take_codes = unique_codes[unique_codes != -1] if self.ordered: diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index ba2de295fa0a9..0b3fcba1c1ba5 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -2300,23 +2300,7 @@ def __init__(self, index, grouper=None, obj=None, name=None, level=None, # a passed Categorical elif is_categorical_dtype(self.grouper): - # must have an ordered categorical - if self.sort: - if not self.grouper.ordered: - - # technically we cannot group on an unordered - # Categorical - # but this a user convenience to do so; the ordering - # is preserved and if it's a reduction it doesn't make - # any difference - pass - - # fix bug #GH8868 sort=False being ignored in categorical - # groupby - else: - cat = self.grouper.unique() - self.grouper = self.grouper.reorder_categories( - cat.categories) + self.grouper = self.grouper._codes_for_groupby(self.sort) # we make a CategoricalIndex out of the cat grouper # preserving the categories / ordered attributes diff --git a/pandas/indexes/category.py b/pandas/indexes/category.py index acb2758641a62..5299a094156cd 100644 --- a/pandas/indexes/category.py +++ b/pandas/indexes/category.py @@ -550,6 +550,10 @@ def _append_same_dtype(self, to_concat, name): result.name = name return result + def _codes_for_groupby(self, sort): + """ Return a Categorical adjusted for groupby """ + return self.values._codes_for_groupby(sort) + @classmethod def _add_comparison_methods(cls): """ add in comparison methods """ diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py index eebd0e0f490c1..cfcb531bedab8 100644 --- a/pandas/tests/groupby/test_categorical.py +++ b/pandas/tests/groupby/test_categorical.py @@ -284,6 +284,30 @@ def test_groupby_multi_categorical_as_index(self): tm.assert_frame_equal(result, expected, check_index_type=True) + def test_groupby_preserve_categories(self): + # GH-13179 + categories = list('abc') + + # ordered=True + df = DataFrame({'A': pd.Categorical(list('ba'), + categories=categories, + ordered=True)}) + index = pd.CategoricalIndex(categories, categories, ordered=True) + tm.assert_index_equal(df.groupby('A', sort=True).first().index, index) + tm.assert_index_equal(df.groupby('A', sort=False).first().index, index) + + # ordered=False + df = DataFrame({'A': pd.Categorical(list('ba'), + categories=categories, + ordered=False)}) + sort_index = pd.CategoricalIndex(categories, categories, ordered=False) + nosort_index = pd.CategoricalIndex(list('bac'), list('bac'), + ordered=False) + tm.assert_index_equal(df.groupby('A', sort=True).first().index, + sort_index) + tm.assert_index_equal(df.groupby('A', sort=False).first().index, + nosort_index) + def test_groupby_preserve_categorical_dtype(self): # GH13743, GH13854 df = DataFrame({'A': [1, 2, 1, 1, 2], From f4edb053e17e51e8c2bed7c16755c4f7f3222117 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 22 Feb 2017 13:52:46 -0500 Subject: [PATCH 091/933] PEP: pep issue in pandas/tests/tools/test_concat.py --- pandas/tests/tools/test_concat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/tools/test_concat.py b/pandas/tests/tools/test_concat.py index f292aeda8cbe0..a2b5773f551c9 100644 --- a/pandas/tests/tools/test_concat.py +++ b/pandas/tests/tools/test_concat.py @@ -7,7 +7,7 @@ from pandas import (DataFrame, concat, read_csv, isnull, Series, date_range, Index, Panel, MultiIndex, Timestamp, - DatetimeIndex, Categorical, CategoricalIndex) + DatetimeIndex) from pandas.util import testing as tm from pandas.util.testing import (assert_frame_equal, makeCustomDataframe as mkdf, From 03eca9dad6c911d7df12377839e8eb3bb6028d98 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Thu, 23 Feb 2017 08:19:24 -0500 Subject: [PATCH 092/933] CI: use correct circleci badge --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 195b76f64b37f..7bc350d1c6675 100644 --- a/README.md +++ b/README.md @@ -34,7 +34,7 @@ - circleci build status + circleci build status
... """ self.table_attributes = attributes return self From 3f91d5a764f019f017fd7f0268d75fd6001b208f Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Thu, 2 Mar 2017 03:39:34 -0500 Subject: [PATCH 122/933] TST: split tests/format/test_format.py (#15546) closes #15531 --- pandas/tests/formats/test_eng_formatting.py | 195 ++ pandas/tests/formats/test_format.py | 2610 +------------------ pandas/tests/formats/test_to_csv.py | 216 ++ pandas/tests/formats/test_to_html.py | 1861 +++++++++++++ pandas/tests/formats/test_to_latex.py | 351 +++ 5 files changed, 2653 insertions(+), 2580 deletions(-) create mode 100644 pandas/tests/formats/test_eng_formatting.py create mode 100644 pandas/tests/formats/test_to_csv.py create mode 100644 pandas/tests/formats/test_to_html.py create mode 100644 pandas/tests/formats/test_to_latex.py diff --git a/pandas/tests/formats/test_eng_formatting.py b/pandas/tests/formats/test_eng_formatting.py new file mode 100644 index 0000000000000..d2badd4fc160a --- /dev/null +++ b/pandas/tests/formats/test_eng_formatting.py @@ -0,0 +1,195 @@ +import numpy as np +import pandas as pd +from pandas import DataFrame +from pandas.compat import u +import pandas.formats.format as fmt +from pandas.util import testing as tm + + +class TestEngFormatter(tm.TestCase): + + def test_eng_float_formatter(self): + df = DataFrame({'A': [1.41, 141., 14100, 1410000.]}) + + fmt.set_eng_float_format() + result = df.to_string() + expected = (' A\n' + '0 1.410E+00\n' + '1 141.000E+00\n' + '2 14.100E+03\n' + '3 1.410E+06') + self.assertEqual(result, expected) + + fmt.set_eng_float_format(use_eng_prefix=True) + result = df.to_string() + expected = (' A\n' + '0 1.410\n' + '1 141.000\n' + '2 14.100k\n' + '3 1.410M') + self.assertEqual(result, expected) + + fmt.set_eng_float_format(accuracy=0) + result = df.to_string() + expected = (' A\n' + '0 1E+00\n' + '1 141E+00\n' + '2 14E+03\n' + '3 1E+06') + self.assertEqual(result, expected) + + self.reset_display_options() + + def compare(self, formatter, input, output): + formatted_input = formatter(input) + msg = ("formatting of %s results in '%s', expected '%s'" % + (str(input), formatted_input, output)) + self.assertEqual(formatted_input, output, msg) + + def compare_all(self, formatter, in_out): + """ + Parameters: + ----------- + formatter: EngFormatter under test + in_out: list of tuples. Each tuple = (number, expected_formatting) + + It is tested if 'formatter(number) == expected_formatting'. + *number* should be >= 0 because formatter(-number) == fmt is also + tested. *fmt* is derived from *expected_formatting* + """ + for input, output in in_out: + self.compare(formatter, input, output) + self.compare(formatter, -input, "-" + output[1:]) + + def test_exponents_with_eng_prefix(self): + formatter = fmt.EngFormatter(accuracy=3, use_eng_prefix=True) + f = np.sqrt(2) + in_out = [ + (f * 10 ** -24, " 1.414y"), (f * 10 ** -23, " 14.142y"), + (f * 10 ** -22, " 141.421y"), (f * 10 ** -21, " 1.414z"), + (f * 10 ** -20, " 14.142z"), (f * 10 ** -19, " 141.421z"), + (f * 10 ** -18, " 1.414a"), (f * 10 ** -17, " 14.142a"), + (f * 10 ** -16, " 141.421a"), (f * 10 ** -15, " 1.414f"), + (f * 10 ** -14, " 14.142f"), (f * 10 ** -13, " 141.421f"), + (f * 10 ** -12, " 1.414p"), (f * 10 ** -11, " 14.142p"), + (f * 10 ** -10, " 141.421p"), (f * 10 ** -9, " 1.414n"), + (f * 10 ** -8, " 14.142n"), (f * 10 ** -7, " 141.421n"), + (f * 10 ** -6, " 1.414u"), (f * 10 ** -5, " 14.142u"), + (f * 10 ** -4, " 141.421u"), (f * 10 ** -3, " 1.414m"), + (f * 10 ** -2, " 14.142m"), (f * 10 ** -1, " 141.421m"), + (f * 10 ** 0, " 1.414"), (f * 10 ** 1, " 14.142"), + (f * 10 ** 2, " 141.421"), (f * 10 ** 3, " 1.414k"), + (f * 10 ** 4, " 14.142k"), (f * 10 ** 5, " 141.421k"), + (f * 10 ** 6, " 1.414M"), (f * 10 ** 7, " 14.142M"), + (f * 10 ** 8, " 141.421M"), (f * 10 ** 9, " 1.414G"), + (f * 10 ** 10, " 14.142G"), (f * 10 ** 11, " 141.421G"), + (f * 10 ** 12, " 1.414T"), (f * 10 ** 13, " 14.142T"), + (f * 10 ** 14, " 141.421T"), (f * 10 ** 15, " 1.414P"), + (f * 10 ** 16, " 14.142P"), (f * 10 ** 17, " 141.421P"), + (f * 10 ** 18, " 1.414E"), (f * 10 ** 19, " 14.142E"), + (f * 10 ** 20, " 141.421E"), (f * 10 ** 21, " 1.414Z"), + (f * 10 ** 22, " 14.142Z"), (f * 10 ** 23, " 141.421Z"), + (f * 10 ** 24, " 1.414Y"), (f * 10 ** 25, " 14.142Y"), + (f * 10 ** 26, " 141.421Y")] + self.compare_all(formatter, in_out) + + def test_exponents_without_eng_prefix(self): + formatter = fmt.EngFormatter(accuracy=4, use_eng_prefix=False) + f = np.pi + in_out = [ + (f * 10 ** -24, " 3.1416E-24"), + (f * 10 ** -23, " 31.4159E-24"), + (f * 10 ** -22, " 314.1593E-24"), + (f * 10 ** -21, " 3.1416E-21"), + (f * 10 ** -20, " 31.4159E-21"), + (f * 10 ** -19, " 314.1593E-21"), + (f * 10 ** -18, " 3.1416E-18"), + (f * 10 ** -17, " 31.4159E-18"), + (f * 10 ** -16, " 314.1593E-18"), + (f * 10 ** -15, " 3.1416E-15"), + (f * 10 ** -14, " 31.4159E-15"), + (f * 10 ** -13, " 314.1593E-15"), + (f * 10 ** -12, " 3.1416E-12"), + (f * 10 ** -11, " 31.4159E-12"), + (f * 10 ** -10, " 314.1593E-12"), + (f * 10 ** -9, " 3.1416E-09"), + (f * 10 ** -8, " 31.4159E-09"), + (f * 10 ** -7, " 314.1593E-09"), + (f * 10 ** -6, " 3.1416E-06"), + (f * 10 ** -5, " 31.4159E-06"), + (f * 10 ** -4, " 314.1593E-06"), + (f * 10 ** -3, " 3.1416E-03"), + (f * 10 ** -2, " 31.4159E-03"), + (f * 10 ** -1, " 314.1593E-03"), + (f * 10 ** 0, " 3.1416E+00"), + (f * 10 ** 1, " 31.4159E+00"), + (f * 10 ** 2, " 314.1593E+00"), + (f * 10 ** 3, " 3.1416E+03"), + (f * 10 ** 4, " 31.4159E+03"), + (f * 10 ** 5, " 314.1593E+03"), + (f * 10 ** 6, " 3.1416E+06"), + (f * 10 ** 7, " 31.4159E+06"), + (f * 10 ** 8, " 314.1593E+06"), + (f * 10 ** 9, " 3.1416E+09"), + (f * 10 ** 10, " 31.4159E+09"), + (f * 10 ** 11, " 314.1593E+09"), + (f * 10 ** 12, " 3.1416E+12"), + (f * 10 ** 13, " 31.4159E+12"), + (f * 10 ** 14, " 314.1593E+12"), + (f * 10 ** 15, " 3.1416E+15"), + (f * 10 ** 16, " 31.4159E+15"), + (f * 10 ** 17, " 314.1593E+15"), + (f * 10 ** 18, " 3.1416E+18"), + (f * 10 ** 19, " 31.4159E+18"), + (f * 10 ** 20, " 314.1593E+18"), + (f * 10 ** 21, " 3.1416E+21"), + (f * 10 ** 22, " 31.4159E+21"), + (f * 10 ** 23, " 314.1593E+21"), + (f * 10 ** 24, " 3.1416E+24"), + (f * 10 ** 25, " 31.4159E+24"), + (f * 10 ** 26, " 314.1593E+24")] + self.compare_all(formatter, in_out) + + def test_rounding(self): + formatter = fmt.EngFormatter(accuracy=3, use_eng_prefix=True) + in_out = [(5.55555, ' 5.556'), (55.5555, ' 55.556'), + (555.555, ' 555.555'), (5555.55, ' 5.556k'), + (55555.5, ' 55.556k'), (555555, ' 555.555k')] + self.compare_all(formatter, in_out) + + formatter = fmt.EngFormatter(accuracy=1, use_eng_prefix=True) + in_out = [(5.55555, ' 5.6'), (55.5555, ' 55.6'), (555.555, ' 555.6'), + (5555.55, ' 5.6k'), (55555.5, ' 55.6k'), (555555, ' 555.6k')] + self.compare_all(formatter, in_out) + + formatter = fmt.EngFormatter(accuracy=0, use_eng_prefix=True) + in_out = [(5.55555, ' 6'), (55.5555, ' 56'), (555.555, ' 556'), + (5555.55, ' 6k'), (55555.5, ' 56k'), (555555, ' 556k')] + self.compare_all(formatter, in_out) + + formatter = fmt.EngFormatter(accuracy=3, use_eng_prefix=True) + result = formatter(0) + self.assertEqual(result, u(' 0.000')) + + def test_nan(self): + # Issue #11981 + + formatter = fmt.EngFormatter(accuracy=1, use_eng_prefix=True) + result = formatter(np.nan) + self.assertEqual(result, u('NaN')) + + df = pd.DataFrame({'a': [1.5, 10.3, 20.5], + 'b': [50.3, 60.67, 70.12], + 'c': [100.2, 101.33, 120.33]}) + pt = df.pivot_table(values='a', index='b', columns='c') + fmt.set_eng_float_format(accuracy=1) + result = pt.to_string() + self.assertTrue('NaN' in result) + self.reset_display_options() + + def test_inf(self): + # Issue #11981 + + formatter = fmt.EngFormatter(accuracy=1, use_eng_prefix=True) + result = formatter(np.inf) + self.assertEqual(result, u('inf')) diff --git a/pandas/tests/formats/test_format.py b/pandas/tests/formats/test_format.py index 476c6a636ae5a..ddf9d35841ce7 100644 --- a/pandas/tests/formats/test_format.py +++ b/pandas/tests/formats/test_format.py @@ -1,50 +1,41 @@ # -*- coding: utf-8 -*- +""" +test output formatting for Series/DataFrame +including to_string & reprs +""" + # TODO(wesm): lots of issues making flake8 hard # flake8: noqa from __future__ import print_function -from distutils.version import LooseVersion import re -from pandas.compat import (range, zip, lrange, StringIO, PY3, - u, lzip, is_platform_windows, - is_platform_32bit) -import pandas.compat as compat import itertools from operator import methodcaller import os import sys -from textwrap import dedent import warnings +from datetime import datetime -from numpy import nan -from numpy.random import randn -import numpy as np - -import codecs - -div_style = '' -try: - import IPython - if IPython.__version__ < LooseVersion('3.0.0'): - div_style = ' style="max-width:1500px;overflow:auto;"' -except (ImportError, AttributeError): - pass +import pytest -from pandas import DataFrame, Series, Index, Timestamp, MultiIndex, date_range, NaT +import numpy as np +import pandas as pd +from pandas import (DataFrame, Series, Index, Timestamp, MultiIndex, + date_range, NaT, read_table) +from pandas.compat import (range, zip, lrange, StringIO, PY3, + u, lzip, is_platform_windows, + is_platform_32bit) +import pandas.compat as compat import pandas.formats.format as fmt -import pandas.util.testing as tm -import pandas.core.common as com import pandas.formats.printing as printing + +import pandas.util.testing as tm from pandas.util.terminal import get_terminal_size -import pandas as pd from pandas.core.config import (set_option, get_option, option_context, reset_option) -from datetime import datetime - -import pytest use_32bit_repr = is_platform_windows() or is_platform_32bit() @@ -288,7 +279,7 @@ def test_repr_max_columns_max_rows(self): term_width, term_height = get_terminal_size() if term_width < 10 or term_height < 10: pytest.skip("terminal size too small, " - "{0} x {1}".format(term_width, term_height)) + "{0} x {1}".format(term_width, term_height)) def mkframe(n): index = ['%05d' % i for i in range(n)] @@ -829,1393 +820,6 @@ def test_datetimelike_frame(self): '[10 rows x 2 columns]') self.assertEqual(repr(df), expected) - def test_to_html_with_col_space(self): - def check_with_width(df, col_space): - import re - # check that col_space affects HTML generation - # and be very brittle about it. - html = df.to_html(col_space=col_space) - hdrs = [x for x in html.split(r"\n") if re.search(r"\s]", x)] - self.assertTrue(len(hdrs) > 0) - for h in hdrs: - self.assertTrue("min-width" in h) - self.assertTrue(str(col_space) in h) - - df = DataFrame(np.random.random(size=(1, 3))) - - check_with_width(df, 30) - check_with_width(df, 50) - - def test_to_html_with_empty_string_label(self): - # GH3547, to_html regards empty string labels as repeated labels - data = {'c1': ['a', 'b'], 'c2': ['a', ''], 'data': [1, 2]} - df = DataFrame(data).set_index(['c1', 'c2']) - res = df.to_html() - self.assertTrue("rowspan" not in res) - - def test_to_html_unicode(self): - df = DataFrame({u('\u03c3'): np.arange(10.)}) - expected = u'
\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
\u03c3
00.0
11.0
22.0
33.0
44.0
55.0
66.0
77.0
88.0
99.0
' - self.assertEqual(df.to_html(), expected) - df = DataFrame({'A': [u('\u03c3')]}) - expected = u'\n \n \n \n \n \n \n \n \n \n \n \n \n
A
0\u03c3
' - self.assertEqual(df.to_html(), expected) - - def test_to_html_decimal(self): - # GH 12031 - df = DataFrame({'A': [6.0, 3.1, 2.2]}) - result = df.to_html(decimal=',') - expected = ('\n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - '
A
06,0
13,1
22,2
') - self.assertEqual(result, expected) - - def test_to_html_escaped(self): - a = 'str", - b: ""}, - 'co>l2': {a: "", - b: ""}} - rs = DataFrame(test_dict).to_html() - xp = """ - - - - - - - - - - - - - - - - - - - -
co<l1co>l2
str<ing1 &amp;<type 'str'><type 'str'>
stri>ng2 &amp;<type 'str'><type 'str'>
""" - - self.assertEqual(xp, rs) - - def test_to_html_escape_disabled(self): - a = 'strbold", - b: "bold"}, - 'co>l2': {a: "bold", - b: "bold"}} - rs = DataFrame(test_dict).to_html(escape=False) - xp = """ - - - - - - - - - - - - - - - - - -
co - co>l2
str - boldbold
stri>ng2 &boldbold
""" - - self.assertEqual(xp, rs) - - def test_to_html_multiindex_index_false(self): - # issue 8452 - df = DataFrame({ - 'a': range(2), - 'b': range(3, 5), - 'c': range(5, 7), - 'd': range(3, 5) - }) - df.columns = MultiIndex.from_product([['a', 'b'], ['c', 'd']]) - result = df.to_html(index=False) - expected = """\ - - - - - - - - - - - - - - - - - - - - - - - - - - - -
ab
cdcd
0353
1464
""" - - self.assertEqual(result, expected) - - df.index = Index(df.index.values, name='idx') - result = df.to_html(index=False) - self.assertEqual(result, expected) - - def test_to_html_multiindex_sparsify_false_multi_sparse(self): - with option_context('display.multi_sparse', False): - index = MultiIndex.from_arrays([[0, 0, 1, 1], [0, 1, 0, 1]], - names=['foo', None]) - - df = DataFrame([[0, 1], [2, 3], [4, 5], [6, 7]], index=index) - - result = df.to_html() - expected = """\ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
01
foo
0001
0123
1045
1167
""" - - self.assertEqual(result, expected) - - df = DataFrame([[0, 1], [2, 3], [4, 5], [6, 7]], - columns=index[::2], index=index) - - result = df.to_html() - expected = """\ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
foo01
00
foo
0001
0123
1045
1167
""" - - self.assertEqual(result, expected) - - def test_to_html_multiindex_sparsify(self): - index = MultiIndex.from_arrays([[0, 0, 1, 1], [0, 1, 0, 1]], - names=['foo', None]) - - df = DataFrame([[0, 1], [2, 3], [4, 5], [6, 7]], index=index) - - result = df.to_html() - expected = """ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
01
foo
0001
123
1045
167
""" - - self.assertEqual(result, expected) - - df = DataFrame([[0, 1], [2, 3], [4, 5], [6, 7]], columns=index[::2], - index=index) - - result = df.to_html() - expected = """\ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
foo01
00
foo
0001
123
1045
167
""" - - self.assertEqual(result, expected) - - def test_to_html_multiindex_odd_even_truncate(self): - # GH 14882 - Issue on truncation with odd length DataFrame - mi = MultiIndex.from_product([[100, 200, 300], - [10, 20, 30], - [1, 2, 3, 4, 5, 6, 7]], - names=['a', 'b', 'c']) - df = DataFrame({'n': range(len(mi))}, index=mi) - result = df.to_html(max_rows=60) - expected = """\ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
n
abc
1001010
21
32
43
54
65
76
2017
28
39
410
511
612
713
30114
215
316
417
518
619
720
20010121
222
323
424
525
626
727
20128
229
......
633
734
30135
236
337
438
539
640
741
30010142
243
344
445
546
647
748
20149
250
351
452
553
654
755
30156
257
358
459
560
661
762
""" - self.assertEqual(result, expected) - - # Test that ... appears in a middle level - result = df.to_html(max_rows=56) - expected = """\ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
n
abc
1001010
21
32
43
54
65
76
2017
28
39
410
511
612
713
30114
215
316
417
518
619
720
20010121
222
323
424
525
626
727
.........
30135
236
337
438
539
640
741
30010142
243
344
445
546
647
748
20149
250
351
452
553
654
755
30156
257
358
459
560
661
762
""" - self.assertEqual(result, expected) - - def test_to_html_index_formatter(self): - df = DataFrame([[0, 1], [2, 3], [4, 5], [6, 7]], columns=['foo', None], - index=lrange(4)) - - f = lambda x: 'abcd' [x] - result = df.to_html(formatters={'__index__': f}) - expected = """\ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
fooNone
a01
b23
c45
d67
""" - - self.assertEqual(result, expected) - - def test_to_html_datetime64_monthformatter(self): - months = [datetime(2016, 1, 1), datetime(2016, 2, 2)] - x = DataFrame({'months': months}) - - def format_func(x): - return x.strftime('%Y-%m') - result = x.to_html(formatters={'months': format_func}) - expected = """\ - - - - - - - - - - - - - - - - - -
months
02016-01
12016-02
""" - self.assertEqual(result, expected) - - def test_to_html_datetime64_hourformatter(self): - - x = DataFrame({'hod': pd.to_datetime(['10:10:10.100', '12:12:12.120'], - format='%H:%M:%S.%f')}) - - def format_func(x): - return x.strftime('%H:%M') - result = x.to_html(formatters={'hod': format_func}) - expected = """\ - - - - - - - - - - - - - - - - - -
hod
010:10
112:12
""" - self.assertEqual(result, expected) - - def test_to_html_regression_GH6098(self): - df = DataFrame({u('clé1'): [u('a'), u('a'), u('b'), u('b'), u('a')], - u('clé2'): [u('1er'), u('2ème'), u('1er'), u('2ème'), - u('1er')], - 'données1': np.random.randn(5), - 'données2': np.random.randn(5)}) - # it works - df.pivot_table(index=[u('clé1')], columns=[u('clé2')])._repr_html_() - - def test_to_html_truncate(self): - pytest.skip("unreliable on travis") - index = pd.DatetimeIndex(start='20010101', freq='D', periods=20) - df = DataFrame(index=index, columns=range(20)) - fmt.set_option('display.max_rows', 8) - fmt.set_option('display.max_columns', 4) - result = df._repr_html_() - expected = '''\ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
01...1819
2001-01-01NaNNaN...NaNNaN
2001-01-02NaNNaN...NaNNaN
2001-01-03NaNNaN...NaNNaN
2001-01-04NaNNaN...NaNNaN
..................
2001-01-17NaNNaN...NaNNaN
2001-01-18NaNNaN...NaNNaN
2001-01-19NaNNaN...NaNNaN
2001-01-20NaNNaN...NaNNaN
-

20 rows × 20 columns

-'''.format(div_style) - if compat.PY2: - expected = expected.decode('utf-8') - self.assertEqual(result, expected) - - def test_to_html_truncate_multi_index(self): - pytest.skip("unreliable on travis") - arrays = [['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'], - ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']] - df = DataFrame(index=arrays, columns=arrays) - fmt.set_option('display.max_rows', 7) - fmt.set_option('display.max_columns', 7) - result = df._repr_html_() - expected = '''\ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
barbaz...fooqux
onetwoone...twoonetwo
baroneNaNNaNNaN...NaNNaNNaN
twoNaNNaNNaN...NaNNaNNaN
bazoneNaNNaNNaN...NaNNaNNaN
...........................
footwoNaNNaNNaN...NaNNaNNaN
quxoneNaNNaNNaN...NaNNaNNaN
twoNaNNaNNaN...NaNNaNNaN
-

8 rows × 8 columns

-'''.format(div_style) - if compat.PY2: - expected = expected.decode('utf-8') - self.assertEqual(result, expected) - - def test_to_html_truncate_multi_index_sparse_off(self): - pytest.skip("unreliable on travis") - arrays = [['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'], - ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']] - df = DataFrame(index=arrays, columns=arrays) - fmt.set_option('display.max_rows', 7) - fmt.set_option('display.max_columns', 7) - fmt.set_option('display.multi_sparse', False) - result = df._repr_html_() - expected = '''\ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
barbarbaz...fooquxqux
onetwoone...twoonetwo
baroneNaNNaNNaN...NaNNaNNaN
bartwoNaNNaNNaN...NaNNaNNaN
bazoneNaNNaNNaN...NaNNaNNaN
footwoNaNNaNNaN...NaNNaNNaN
quxoneNaNNaNNaN...NaNNaNNaN
quxtwoNaNNaNNaN...NaNNaNNaN
-

8 rows × 8 columns

-'''.format(div_style) - if compat.PY2: - expected = expected.decode('utf-8') - self.assertEqual(result, expected) - - def test_to_html_border(self): - df = DataFrame({'A': [1, 2]}) - result = df.to_html() - assert 'border="1"' in result - - def test_to_html_border_option(self): - df = DataFrame({'A': [1, 2]}) - with pd.option_context('html.border', 0): - result = df.to_html() - self.assertTrue('border="0"' in result) - self.assertTrue('border="0"' in df._repr_html_()) - - def test_to_html_border_zero(self): - df = DataFrame({'A': [1, 2]}) - result = df.to_html(border=0) - self.assertTrue('border="0"' in result) - def test_nonunicode_nonascii_alignment(self): df = DataFrame([["aa\xc3\xa4\xc3\xa4", 1], ["bbbb", 2]]) rep_str = df.to_string() @@ -2223,7 +827,7 @@ def test_nonunicode_nonascii_alignment(self): self.assertEqual(len(lines[1]), len(lines[2])) def test_unicode_problem_decoding_as_ascii(self): - dm = DataFrame({u('c/\u03c3'): Series({'test': np.NaN})}) + dm = DataFrame({u('c/\u03c3'): Series({'test': np.nan})}) compat.text_type(dm.to_string()) def test_string_repr_encoding(self): @@ -2271,7 +875,7 @@ def test_pprint_thing(self): # escape embedded tabs in string # GH #2038 - self.assertTrue(not "\t" in pp_t("a\tb", escape_chars=("\t", ))) + assert "\t" not in pp_t("a\tb", escape_chars=("\t", )) def test_wide_repr(self): with option_context('mode.sim_interactive', True, @@ -2294,7 +898,8 @@ def test_wide_repr(self): def test_wide_repr_wide_columns(self): with option_context('mode.sim_interactive', True): - df = DataFrame(randn(5, 3), columns=['a' * 90, 'b' * 90, 'c' * 90]) + df = DataFrame(np.random.randn(5, 3), + columns=['a' * 90, 'b' * 90, 'c' * 90]) rep_str = repr(df) self.assertEqual(len(rep_str.splitlines()), 20) @@ -2346,8 +951,8 @@ def test_wide_repr_multiindex_cols(self): with option_context('mode.sim_interactive', True): max_cols = get_option('display.max_columns') midx = MultiIndex.from_arrays(tm.rands_array(5, size=(2, 10))) - mcols = MultiIndex.from_arrays(tm.rands_array(3, size=(2, max_cols - - 1))) + mcols = MultiIndex.from_arrays( + tm.rands_array(3, size=(2, max_cols - 1))) df = DataFrame(tm.rands_array(25, (10, max_cols - 1)), index=midx, columns=mcols) df.index.names = ['Level 0', 'Level 1'] @@ -2465,16 +1070,14 @@ def test_index_with_nan(self): self.assertEqual(result, expected) def test_to_string(self): - from pandas import read_table - import re # big mixed - biggie = DataFrame({'A': randn(200), + biggie = DataFrame({'A': np.random.randn(200), 'B': tm.makeStringIndex(200)}, index=lrange(200)) - biggie.loc[:20, 'A'] = nan - biggie.loc[:20, 'B'] = nan + biggie.loc[:20, 'A'] = np.nan + biggie.loc[:20, 'B'] = np.nan s = biggie.to_string() buf = StringIO() @@ -2713,414 +1316,6 @@ def test_show_dimensions(self): self.assertFalse('5 rows' in str(df)) self.assertFalse('5 rows' in df._repr_html_()) - def test_to_html(self): - # big mixed - biggie = DataFrame({'A': randn(200), - 'B': tm.makeStringIndex(200)}, - index=lrange(200)) - - biggie.loc[:20, 'A'] = nan - biggie.loc[:20, 'B'] = nan - s = biggie.to_html() - - buf = StringIO() - retval = biggie.to_html(buf=buf) - self.assertIsNone(retval) - self.assertEqual(buf.getvalue(), s) - - tm.assertIsInstance(s, compat.string_types) - - biggie.to_html(columns=['B', 'A'], col_space=17) - biggie.to_html(columns=['B', 'A'], - formatters={'A': lambda x: '%.1f' % x}) - - biggie.to_html(columns=['B', 'A'], float_format=str) - biggie.to_html(columns=['B', 'A'], col_space=12, float_format=str) - - frame = DataFrame(index=np.arange(200)) - frame.to_html() - - def test_to_html_filename(self): - biggie = DataFrame({'A': randn(200), - 'B': tm.makeStringIndex(200)}, - index=lrange(200)) - - biggie.loc[:20, 'A'] = nan - biggie.loc[:20, 'B'] = nan - with tm.ensure_clean('test.html') as path: - biggie.to_html(path) - with open(path, 'r') as f: - s = biggie.to_html() - s2 = f.read() - self.assertEqual(s, s2) - - frame = DataFrame(index=np.arange(200)) - with tm.ensure_clean('test.html') as path: - frame.to_html(path) - with open(path, 'r') as f: - self.assertEqual(frame.to_html(), f.read()) - - def test_to_html_with_no_bold(self): - x = DataFrame({'x': randn(5)}) - ashtml = x.to_html(bold_rows=False) - self.assertFalse('")]) - - def test_to_html_columns_arg(self): - result = self.frame.to_html(columns=['A']) - self.assertNotIn('B', result) - - def test_to_html_multiindex(self): - columns = MultiIndex.from_tuples(list(zip(np.arange(2).repeat(2), - np.mod(lrange(4), 2))), - names=['CL0', 'CL1']) - df = DataFrame([list('abcd'), list('efgh')], columns=columns) - result = df.to_html(justify='left') - expected = ('\n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - '
CL001
CL10101
0abcd
1efgh
') - - self.assertEqual(result, expected) - - columns = MultiIndex.from_tuples(list(zip( - range(4), np.mod( - lrange(4), 2)))) - df = DataFrame([list('abcd'), list('efgh')], columns=columns) - - result = df.to_html(justify='right') - expected = ('\n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - '
0123
0101
0abcd
1efgh
') - - self.assertEqual(result, expected) - - def test_to_html_justify(self): - df = DataFrame({'A': [6, 30000, 2], - 'B': [1, 2, 70000], - 'C': [223442, 0, 1]}, - columns=['A', 'B', 'C']) - result = df.to_html(justify='left') - expected = ('\n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - '
ABC
061223442
13000020
22700001
') - self.assertEqual(result, expected) - - result = df.to_html(justify='right') - expected = ('\n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - '
ABC
061223442
13000020
22700001
') - self.assertEqual(result, expected) - - def test_to_html_index(self): - index = ['foo', 'bar', 'baz'] - df = DataFrame({'A': [1, 2, 3], - 'B': [1.2, 3.4, 5.6], - 'C': ['one', 'two', np.NaN]}, - columns=['A', 'B', 'C'], - index=index) - expected_with_index = ('\n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - '
ABC
foo11.2one
bar23.4two
baz35.6NaN
') - self.assertEqual(df.to_html(), expected_with_index) - - expected_without_index = ('\n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - '
ABC
11.2one
23.4two
35.6NaN
') - result = df.to_html(index=False) - for i in index: - self.assertNotIn(i, result) - self.assertEqual(result, expected_without_index) - df.index = Index(['foo', 'bar', 'baz'], name='idx') - expected_with_index = ('\n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - '
ABC
idx
foo11.2one
bar23.4two
baz35.6NaN
') - self.assertEqual(df.to_html(), expected_with_index) - self.assertEqual(df.to_html(index=False), expected_without_index) - - tuples = [('foo', 'car'), ('foo', 'bike'), ('bar', 'car')] - df.index = MultiIndex.from_tuples(tuples) - - expected_with_index = ('\n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - '
ABC
foocar11.2one
bike23.4two
barcar35.6NaN
') - self.assertEqual(df.to_html(), expected_with_index) - - result = df.to_html(index=False) - for i in ['foo', 'bar', 'car', 'bike']: - self.assertNotIn(i, result) - # must be the same result as normal index - self.assertEqual(result, expected_without_index) - - df.index = MultiIndex.from_tuples(tuples, names=['idx1', 'idx2']) - expected_with_index = ('\n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - '
ABC
idx1idx2
foocar11.2one
bike23.4two
barcar35.6NaN
') - self.assertEqual(df.to_html(), expected_with_index) - self.assertEqual(df.to_html(index=False), expected_without_index) - def test_repr_html(self): self.frame._repr_html_() @@ -3254,7 +1449,7 @@ def test_info_repr(self): def test_info_repr_max_cols(self): # GH #6939 - df = DataFrame(randn(10, 5)) + df = DataFrame(np.random.randn(10, 5)) with option_context('display.large_repr', 'info', 'display.max_columns', 1, 'display.max_info_columns', 4): @@ -3299,46 +1494,6 @@ def get_ipython(): self.reset_display_options() - def test_to_html_with_classes(self): - df = DataFrame() - result = df.to_html(classes="sortable draggable") - expected = dedent(""" - - - - - - - - - -
- - """).strip() - self.assertEqual(result, expected) - - result = df.to_html(classes=["sortable", "draggable"]) - self.assertEqual(result, expected) - - def test_to_html_no_index_max_rows(self): - # GH https://github.com/pandas-dev/pandas/issues/14998 - df = DataFrame({"A": [1, 2, 3, 4]}) - result = df.to_html(index=False, max_rows=1) - expected = dedent("""\ - - - - - - - - - - - -
A
1
""") - self.assertEqual(result, expected) - def test_pprint_pathological_object(self): """ if the test fails, the stack will overflow and nose crash, @@ -3373,541 +1528,6 @@ def test_dict_entries(self): self.assertTrue("'a': 1" in val) self.assertTrue("'b': 2" in val) - def test_to_latex_filename(self): - with tm.ensure_clean('test.tex') as path: - self.frame.to_latex(path) - - with open(path, 'r') as f: - self.assertEqual(self.frame.to_latex(), f.read()) - - # test with utf-8 and encoding option (GH 7061) - df = DataFrame([[u'au\xdfgangen']]) - with tm.ensure_clean('test.tex') as path: - df.to_latex(path, encoding='utf-8') - with codecs.open(path, 'r', encoding='utf-8') as f: - self.assertEqual(df.to_latex(), f.read()) - - # test with utf-8 without encoding option - if compat.PY3: # python3: pandas default encoding is utf-8 - with tm.ensure_clean('test.tex') as path: - df.to_latex(path) - with codecs.open(path, 'r', encoding='utf-8') as f: - self.assertEqual(df.to_latex(), f.read()) - else: - # python2 default encoding is ascii, so an error should be raised - with tm.ensure_clean('test.tex') as path: - self.assertRaises(UnicodeEncodeError, df.to_latex, path) - - def test_to_latex(self): - # it works! - self.frame.to_latex() - - df = DataFrame({'a': [1, 2], 'b': ['b1', 'b2']}) - withindex_result = df.to_latex() - withindex_expected = r"""\begin{tabular}{lrl} -\toprule -{} & a & b \\ -\midrule -0 & 1 & b1 \\ -1 & 2 & b2 \\ -\bottomrule -\end{tabular} -""" - - self.assertEqual(withindex_result, withindex_expected) - - withoutindex_result = df.to_latex(index=False) - withoutindex_expected = r"""\begin{tabular}{rl} -\toprule - a & b \\ -\midrule - 1 & b1 \\ - 2 & b2 \\ -\bottomrule -\end{tabular} -""" - - self.assertEqual(withoutindex_result, withoutindex_expected) - - def test_to_latex_format(self): - # GH Bug #9402 - self.frame.to_latex(column_format='ccc') - - df = DataFrame({'a': [1, 2], 'b': ['b1', 'b2']}) - withindex_result = df.to_latex(column_format='ccc') - withindex_expected = r"""\begin{tabular}{ccc} -\toprule -{} & a & b \\ -\midrule -0 & 1 & b1 \\ -1 & 2 & b2 \\ -\bottomrule -\end{tabular} -""" - - self.assertEqual(withindex_result, withindex_expected) - - def test_to_latex_with_formatters(self): - df = DataFrame({'int': [1, 2, 3], - 'float': [1.0, 2.0, 3.0], - 'object': [(1, 2), True, False], - 'datetime64': [datetime(2016, 1, 1), - datetime(2016, 2, 5), - datetime(2016, 3, 3)]}) - - formatters = {'int': lambda x: '0x%x' % x, - 'float': lambda x: '[% 4.1f]' % x, - 'object': lambda x: '-%s-' % str(x), - 'datetime64': lambda x: x.strftime('%Y-%m'), - '__index__': lambda x: 'index: %s' % x} - result = df.to_latex(formatters=dict(formatters)) - - expected = r"""\begin{tabular}{llrrl} -\toprule -{} & datetime64 & float & int & object \\ -\midrule -index: 0 & 2016-01 & [ 1.0] & 0x1 & -(1, 2)- \\ -index: 1 & 2016-02 & [ 2.0] & 0x2 & -True- \\ -index: 2 & 2016-03 & [ 3.0] & 0x3 & -False- \\ -\bottomrule -\end{tabular} -""" - self.assertEqual(result, expected) - - def test_to_latex_multiindex(self): - df = DataFrame({('x', 'y'): ['a']}) - result = df.to_latex() - expected = r"""\begin{tabular}{ll} -\toprule -{} & x \\ -{} & y \\ -\midrule -0 & a \\ -\bottomrule -\end{tabular} -""" - - self.assertEqual(result, expected) - - result = df.T.to_latex() - expected = r"""\begin{tabular}{lll} -\toprule - & & 0 \\ -\midrule -x & y & a \\ -\bottomrule -\end{tabular} -""" - - self.assertEqual(result, expected) - - df = DataFrame.from_dict({ - ('c1', 0): pd.Series(dict((x, x) for x in range(4))), - ('c1', 1): pd.Series(dict((x, x + 4) for x in range(4))), - ('c2', 0): pd.Series(dict((x, x) for x in range(4))), - ('c2', 1): pd.Series(dict((x, x + 4) for x in range(4))), - ('c3', 0): pd.Series(dict((x, x) for x in range(4))), - }).T - result = df.to_latex() - expected = r"""\begin{tabular}{llrrrr} -\toprule - & & 0 & 1 & 2 & 3 \\ -\midrule -c1 & 0 & 0 & 1 & 2 & 3 \\ - & 1 & 4 & 5 & 6 & 7 \\ -c2 & 0 & 0 & 1 & 2 & 3 \\ - & 1 & 4 & 5 & 6 & 7 \\ -c3 & 0 & 0 & 1 & 2 & 3 \\ -\bottomrule -\end{tabular} -""" - - self.assertEqual(result, expected) - - # GH 10660 - df = pd.DataFrame({'a': [0, 0, 1, 1], - 'b': list('abab'), - 'c': [1, 2, 3, 4]}) - result = df.set_index(['a', 'b']).to_latex() - expected = r"""\begin{tabular}{llr} -\toprule - & & c \\ -a & b & \\ -\midrule -0 & a & 1 \\ - & b & 2 \\ -1 & a & 3 \\ - & b & 4 \\ -\bottomrule -\end{tabular} -""" - - self.assertEqual(result, expected) - - result = df.groupby('a').describe().to_latex() - expected = ('\\begin{tabular}{lrrrrrrrr}\n\\toprule\n{} & c & ' - ' & & & & & & ' - '\\\\\n{} & count & mean & std & min & 25\\% & ' - '50\\% & 75\\% & max \\\\\na & & & ' - ' & & & & & \\\\\n\\midrule\n0 ' - '& 2.0 & 1.5 & 0.707107 & 1.0 & 1.25 & 1.5 & 1.75 ' - '& 2.0 \\\\\n1 & 2.0 & 3.5 & 0.707107 & 3.0 & 3.25 ' - '& 3.5 & 3.75 & 4.0 ' - '\\\\\n\\bottomrule\n\\end{tabular}\n') - - self.assertEqual(result, expected) - - def test_to_latex_escape(self): - a = 'a' - b = 'b' - - test_dict = {u('co^l1'): {a: "a", - b: "b"}, - u('co$e^x$'): {a: "a", - b: "b"}} - - unescaped_result = DataFrame(test_dict).to_latex(escape=False) - escaped_result = DataFrame(test_dict).to_latex( - ) # default: escape=True - - unescaped_expected = r'''\begin{tabular}{lll} -\toprule -{} & co$e^x$ & co^l1 \\ -\midrule -a & a & a \\ -b & b & b \\ -\bottomrule -\end{tabular} -''' - - escaped_expected = r'''\begin{tabular}{lll} -\toprule -{} & co\$e\textasciicircumx\$ & co\textasciicircuml1 \\ -\midrule -a & a & a \\ -b & b & b \\ -\bottomrule -\end{tabular} -''' - - self.assertEqual(unescaped_result, unescaped_expected) - self.assertEqual(escaped_result, escaped_expected) - - def test_to_latex_longtable(self): - self.frame.to_latex(longtable=True) - - df = DataFrame({'a': [1, 2], 'b': ['b1', 'b2']}) - withindex_result = df.to_latex(longtable=True) - withindex_expected = r"""\begin{longtable}{lrl} -\toprule -{} & a & b \\ -\midrule -\endhead -\midrule -\multicolumn{3}{r}{{Continued on next page}} \\ -\midrule -\endfoot - -\bottomrule -\endlastfoot -0 & 1 & b1 \\ -1 & 2 & b2 \\ -\end{longtable} -""" - - self.assertEqual(withindex_result, withindex_expected) - - withoutindex_result = df.to_latex(index=False, longtable=True) - withoutindex_expected = r"""\begin{longtable}{rl} -\toprule - a & b \\ -\midrule -\endhead -\midrule -\multicolumn{3}{r}{{Continued on next page}} \\ -\midrule -\endfoot - -\bottomrule -\endlastfoot - 1 & b1 \\ - 2 & b2 \\ -\end{longtable} -""" - - self.assertEqual(withoutindex_result, withoutindex_expected) - - def test_to_latex_escape_special_chars(self): - special_characters = ['&', '%', '$', '#', '_', '{', '}', '~', '^', - '\\'] - df = DataFrame(data=special_characters) - observed = df.to_latex() - expected = r"""\begin{tabular}{ll} -\toprule -{} & 0 \\ -\midrule -0 & \& \\ -1 & \% \\ -2 & \$ \\ -3 & \# \\ -4 & \_ \\ -5 & \{ \\ -6 & \} \\ -7 & \textasciitilde \\ -8 & \textasciicircum \\ -9 & \textbackslash \\ -\bottomrule -\end{tabular} -""" - - self.assertEqual(observed, expected) - - def test_to_latex_no_header(self): - # GH 7124 - df = DataFrame({'a': [1, 2], 'b': ['b1', 'b2']}) - withindex_result = df.to_latex(header=False) - withindex_expected = r"""\begin{tabular}{lrl} -\toprule -0 & 1 & b1 \\ -1 & 2 & b2 \\ -\bottomrule -\end{tabular} -""" - - self.assertEqual(withindex_result, withindex_expected) - - withoutindex_result = df.to_latex(index=False, header=False) - withoutindex_expected = r"""\begin{tabular}{rl} -\toprule - 1 & b1 \\ - 2 & b2 \\ -\bottomrule -\end{tabular} -""" - - self.assertEqual(withoutindex_result, withoutindex_expected) - - def test_to_latex_decimal(self): - # GH 12031 - self.frame.to_latex() - df = DataFrame({'a': [1.0, 2.1], 'b': ['b1', 'b2']}) - withindex_result = df.to_latex(decimal=',') - print("WHAT THE") - withindex_expected = r"""\begin{tabular}{lrl} -\toprule -{} & a & b \\ -\midrule -0 & 1,0 & b1 \\ -1 & 2,1 & b2 \\ -\bottomrule -\end{tabular} -""" - - self.assertEqual(withindex_result, withindex_expected) - - def test_to_csv_quotechar(self): - df = DataFrame({'col': [1, 2]}) - expected = """\ -"","col" -"0","1" -"1","2" -""" - - with tm.ensure_clean('test.csv') as path: - df.to_csv(path, quoting=1) # 1=QUOTE_ALL - with open(path, 'r') as f: - self.assertEqual(f.read(), expected) - - expected = """\ -$$,$col$ -$0$,$1$ -$1$,$2$ -""" - - with tm.ensure_clean('test.csv') as path: - df.to_csv(path, quoting=1, quotechar="$") - with open(path, 'r') as f: - self.assertEqual(f.read(), expected) - - with tm.ensure_clean('test.csv') as path: - with tm.assertRaisesRegexp(TypeError, 'quotechar'): - df.to_csv(path, quoting=1, quotechar=None) - - def test_to_csv_doublequote(self): - df = DataFrame({'col': ['a"a', '"bb"']}) - expected = '''\ -"","col" -"0","a""a" -"1","""bb""" -''' - - with tm.ensure_clean('test.csv') as path: - df.to_csv(path, quoting=1, doublequote=True) # QUOTE_ALL - with open(path, 'r') as f: - self.assertEqual(f.read(), expected) - - from _csv import Error - with tm.ensure_clean('test.csv') as path: - with tm.assertRaisesRegexp(Error, 'escapechar'): - df.to_csv(path, doublequote=False) # no escapechar set - - def test_to_csv_escapechar(self): - df = DataFrame({'col': ['a"a', '"bb"']}) - expected = '''\ -"","col" -"0","a\\"a" -"1","\\"bb\\"" -''' - - with tm.ensure_clean('test.csv') as path: # QUOTE_ALL - df.to_csv(path, quoting=1, doublequote=False, escapechar='\\') - with open(path, 'r') as f: - self.assertEqual(f.read(), expected) - - df = DataFrame({'col': ['a,a', ',bb,']}) - expected = """\ -,col -0,a\\,a -1,\\,bb\\, -""" - - with tm.ensure_clean('test.csv') as path: - df.to_csv(path, quoting=3, escapechar='\\') # QUOTE_NONE - with open(path, 'r') as f: - self.assertEqual(f.read(), expected) - - def test_csv_to_string(self): - df = DataFrame({'col': [1, 2]}) - expected = ',col\n0,1\n1,2\n' - self.assertEqual(df.to_csv(), expected) - - def test_to_csv_decimal(self): - # GH 781 - df = DataFrame({'col1': [1], 'col2': ['a'], 'col3': [10.1]}) - - expected_default = ',col1,col2,col3\n0,1,a,10.1\n' - self.assertEqual(df.to_csv(), expected_default) - - expected_european_excel = ';col1;col2;col3\n0;1;a;10,1\n' - self.assertEqual( - df.to_csv(decimal=',', sep=';'), expected_european_excel) - - expected_float_format_default = ',col1,col2,col3\n0,1,a,10.10\n' - self.assertEqual( - df.to_csv(float_format='%.2f'), expected_float_format_default) - - expected_float_format = ';col1;col2;col3\n0;1;a;10,10\n' - self.assertEqual( - df.to_csv(decimal=',', sep=';', - float_format='%.2f'), expected_float_format) - - # GH 11553: testing if decimal is taken into account for '0.0' - df = pd.DataFrame({'a': [0, 1.1], 'b': [2.2, 3.3], 'c': 1}) - expected = 'a,b,c\n0^0,2^2,1\n1^1,3^3,1\n' - self.assertEqual(df.to_csv(index=False, decimal='^'), expected) - - # same but for an index - self.assertEqual(df.set_index('a').to_csv(decimal='^'), expected) - - # same for a multi-index - self.assertEqual( - df.set_index(['a', 'b']).to_csv(decimal="^"), expected) - - def test_to_csv_float_format(self): - # testing if float_format is taken into account for the index - # GH 11553 - df = pd.DataFrame({'a': [0, 1], 'b': [2.2, 3.3], 'c': 1}) - expected = 'a,b,c\n0,2.20,1\n1,3.30,1\n' - self.assertEqual( - df.set_index('a').to_csv(float_format='%.2f'), expected) - - # same for a multi-index - self.assertEqual( - df.set_index(['a', 'b']).to_csv(float_format='%.2f'), expected) - - def test_to_csv_na_rep(self): - # testing if NaN values are correctly represented in the index - # GH 11553 - df = DataFrame({'a': [0, np.NaN], 'b': [0, 1], 'c': [2, 3]}) - expected = "a,b,c\n0.0,0,2\n_,1,3\n" - self.assertEqual(df.set_index('a').to_csv(na_rep='_'), expected) - self.assertEqual(df.set_index(['a', 'b']).to_csv(na_rep='_'), expected) - - # now with an index containing only NaNs - df = DataFrame({'a': np.NaN, 'b': [0, 1], 'c': [2, 3]}) - expected = "a,b,c\n_,0,2\n_,1,3\n" - self.assertEqual(df.set_index('a').to_csv(na_rep='_'), expected) - self.assertEqual(df.set_index(['a', 'b']).to_csv(na_rep='_'), expected) - - # check if na_rep parameter does not break anything when no NaN - df = DataFrame({'a': 0, 'b': [0, 1], 'c': [2, 3]}) - expected = "a,b,c\n0,0,2\n0,1,3\n" - self.assertEqual(df.set_index('a').to_csv(na_rep='_'), expected) - self.assertEqual(df.set_index(['a', 'b']).to_csv(na_rep='_'), expected) - - def test_to_csv_date_format(self): - # GH 10209 - df_sec = DataFrame({'A': pd.date_range('20130101', periods=5, freq='s') - }) - df_day = DataFrame({'A': pd.date_range('20130101', periods=5, freq='d') - }) - - expected_default_sec = ',A\n0,2013-01-01 00:00:00\n1,2013-01-01 00:00:01\n2,2013-01-01 00:00:02' + \ - '\n3,2013-01-01 00:00:03\n4,2013-01-01 00:00:04\n' - self.assertEqual(df_sec.to_csv(), expected_default_sec) - - expected_ymdhms_day = ',A\n0,2013-01-01 00:00:00\n1,2013-01-02 00:00:00\n2,2013-01-03 00:00:00' + \ - '\n3,2013-01-04 00:00:00\n4,2013-01-05 00:00:00\n' - self.assertEqual( - df_day.to_csv( - date_format='%Y-%m-%d %H:%M:%S'), expected_ymdhms_day) - - expected_ymd_sec = ',A\n0,2013-01-01\n1,2013-01-01\n2,2013-01-01\n3,2013-01-01\n4,2013-01-01\n' - self.assertEqual( - df_sec.to_csv(date_format='%Y-%m-%d'), expected_ymd_sec) - - expected_default_day = ',A\n0,2013-01-01\n1,2013-01-02\n2,2013-01-03\n3,2013-01-04\n4,2013-01-05\n' - self.assertEqual(df_day.to_csv(), expected_default_day) - self.assertEqual( - df_day.to_csv(date_format='%Y-%m-%d'), expected_default_day) - - # testing if date_format parameter is taken into account for - # multi-indexed dataframes (GH 7791) - df_sec['B'] = 0 - df_sec['C'] = 1 - expected_ymd_sec = 'A,B,C\n2013-01-01,0,1\n' - df_sec_grouped = df_sec.groupby([pd.Grouper(key='A', freq='1h'), 'B']) - self.assertEqual(df_sec_grouped.mean().to_csv(date_format='%Y-%m-%d'), - expected_ymd_sec) - - def test_to_csv_multi_index(self): - # see gh-6618 - df = DataFrame([1], columns=pd.MultiIndex.from_arrays([[1], [2]])) - - exp = ",1\n,2\n0,1\n" - self.assertEqual(df.to_csv(), exp) - - exp = "1\n2\n1\n" - self.assertEqual(df.to_csv(index=False), exp) - - df = DataFrame([1], columns=pd.MultiIndex.from_arrays([[1], [2]]), - index=pd.MultiIndex.from_arrays([[1], [2]])) - - exp = ",,1\n,,2\n1,2,1\n" - self.assertEqual(df.to_csv(), exp) - - exp = "1\n2\n1\n" - self.assertEqual(df.to_csv(index=False), exp) - - df = DataFrame( - [1], columns=pd.MultiIndex.from_arrays([['foo'], ['bar']])) - - exp = ",foo\n,bar\n0,1\n" - self.assertEqual(df.to_csv(), exp) - - exp = "foo\nbar\n1\n" - self.assertEqual(df.to_csv(index=False), exp) - def test_period(self): # GH 12615 df = pd.DataFrame({'A': pd.period_range('2013-01', @@ -4291,7 +1911,7 @@ def test_max_multi_index_display(self): ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']] tuples = list(zip(*arrays)) index = MultiIndex.from_tuples(tuples, names=['first', 'second']) - s = Series(randn(8), index=index) + s = Series(np.random.randn(8), index=index) with option_context("display.max_rows", 10): self.assertEqual(len(str(s).split('\n')), 10) @@ -4305,7 +1925,7 @@ def test_max_multi_index_display(self): self.assertEqual(len(str(s).split('\n')), 10) # index - s = Series(randn(8), None) + s = Series(np.random.randn(8), None) with option_context("display.max_rows", 10): self.assertEqual(len(str(s).split('\n')), 9) @@ -4436,176 +2056,6 @@ def test_to_string_header(self): self.assertEqual(res, exp) -class TestEngFormatter(tm.TestCase): - - def test_eng_float_formatter(self): - df = DataFrame({'A': [1.41, 141., 14100, 1410000.]}) - - fmt.set_eng_float_format() - result = df.to_string() - expected = (' A\n' - '0 1.410E+00\n' - '1 141.000E+00\n' - '2 14.100E+03\n' - '3 1.410E+06') - self.assertEqual(result, expected) - - fmt.set_eng_float_format(use_eng_prefix=True) - result = df.to_string() - expected = (' A\n' - '0 1.410\n' - '1 141.000\n' - '2 14.100k\n' - '3 1.410M') - self.assertEqual(result, expected) - - fmt.set_eng_float_format(accuracy=0) - result = df.to_string() - expected = (' A\n' - '0 1E+00\n' - '1 141E+00\n' - '2 14E+03\n' - '3 1E+06') - self.assertEqual(result, expected) - - self.reset_display_options() - - def compare(self, formatter, input, output): - formatted_input = formatter(input) - msg = ("formatting of %s results in '%s', expected '%s'" % - (str(input), formatted_input, output)) - self.assertEqual(formatted_input, output, msg) - - def compare_all(self, formatter, in_out): - """ - Parameters: - ----------- - formatter: EngFormatter under test - in_out: list of tuples. Each tuple = (number, expected_formatting) - - It is tested if 'formatter(number) == expected_formatting'. - *number* should be >= 0 because formatter(-number) == fmt is also - tested. *fmt* is derived from *expected_formatting* - """ - for input, output in in_out: - self.compare(formatter, input, output) - self.compare(formatter, -input, "-" + output[1:]) - - def test_exponents_with_eng_prefix(self): - formatter = fmt.EngFormatter(accuracy=3, use_eng_prefix=True) - f = np.sqrt(2) - in_out = [(f * 10 ** -24, " 1.414y"), (f * 10 ** -23, " 14.142y"), - (f * 10 ** -22, " 141.421y"), (f * 10 ** -21, " 1.414z"), - (f * 10 ** -20, " 14.142z"), (f * 10 ** -19, " 141.421z"), - (f * 10 ** -18, " 1.414a"), (f * 10 ** -17, " 14.142a"), - (f * 10 ** -16, " 141.421a"), (f * 10 ** -15, " 1.414f"), - (f * 10 ** -14, " 14.142f"), (f * 10 ** -13, " 141.421f"), - (f * 10 ** -12, " 1.414p"), (f * 10 ** -11, " 14.142p"), - (f * 10 ** -10, " 141.421p"), (f * 10 ** -9, " 1.414n"), - (f * 10 ** -8, " 14.142n"), (f * 10 ** -7, " 141.421n"), - (f * 10 ** -6, " 1.414u"), (f * 10 ** -5, " 14.142u"), - (f * 10 ** -4, " 141.421u"), (f * 10 ** -3, " 1.414m"), - (f * 10 ** -2, " 14.142m"), (f * 10 ** -1, " 141.421m"), - (f * 10 ** 0, " 1.414"), (f * 10 ** 1, " 14.142"), - (f * 10 ** 2, " 141.421"), (f * 10 ** 3, " 1.414k"), - (f * 10 ** 4, " 14.142k"), (f * 10 ** 5, " 141.421k"), - (f * 10 ** 6, " 1.414M"), (f * 10 ** 7, " 14.142M"), - (f * 10 ** 8, " 141.421M"), (f * 10 ** 9, " 1.414G"), ( - f * 10 ** 10, " 14.142G"), (f * 10 ** 11, " 141.421G"), - (f * 10 ** 12, " 1.414T"), (f * 10 ** 13, " 14.142T"), ( - f * 10 ** 14, " 141.421T"), (f * 10 ** 15, " 1.414P"), ( - f * 10 ** 16, " 14.142P"), (f * 10 ** 17, " 141.421P"), ( - f * 10 ** 18, " 1.414E"), (f * 10 ** 19, " 14.142E"), - (f * 10 ** 20, " 141.421E"), (f * 10 ** 21, " 1.414Z"), ( - f * 10 ** 22, " 14.142Z"), (f * 10 ** 23, " 141.421Z"), ( - f * 10 ** 24, " 1.414Y"), (f * 10 ** 25, " 14.142Y"), ( - f * 10 ** 26, " 141.421Y")] - self.compare_all(formatter, in_out) - - def test_exponents_without_eng_prefix(self): - formatter = fmt.EngFormatter(accuracy=4, use_eng_prefix=False) - f = np.pi - in_out = [(f * 10 ** -24, " 3.1416E-24"), - (f * 10 ** -23, " 31.4159E-24"), - (f * 10 ** -22, " 314.1593E-24"), - (f * 10 ** -21, " 3.1416E-21"), - (f * 10 ** -20, " 31.4159E-21"), - (f * 10 ** -19, " 314.1593E-21"), - (f * 10 ** -18, " 3.1416E-18"), - (f * 10 ** -17, " 31.4159E-18"), - (f * 10 ** -16, " 314.1593E-18"), - (f * 10 ** -15, " 3.1416E-15"), - (f * 10 ** -14, " 31.4159E-15"), - (f * 10 ** -13, " 314.1593E-15"), - (f * 10 ** -12, " 3.1416E-12"), - (f * 10 ** -11, " 31.4159E-12"), - (f * 10 ** -10, " 314.1593E-12"), - (f * 10 ** -9, " 3.1416E-09"), (f * 10 ** -8, " 31.4159E-09"), - (f * 10 ** -7, " 314.1593E-09"), (f * 10 ** -6, " 3.1416E-06"), - (f * 10 ** -5, " 31.4159E-06"), (f * 10 ** -4, - " 314.1593E-06"), - (f * 10 ** -3, " 3.1416E-03"), (f * 10 ** -2, " 31.4159E-03"), - (f * 10 ** -1, " 314.1593E-03"), (f * 10 ** 0, " 3.1416E+00"), ( - f * 10 ** 1, " 31.4159E+00"), (f * 10 ** 2, " 314.1593E+00"), - (f * 10 ** 3, " 3.1416E+03"), (f * 10 ** 4, " 31.4159E+03"), ( - f * 10 ** 5, " 314.1593E+03"), (f * 10 ** 6, " 3.1416E+06"), - (f * 10 ** 7, " 31.4159E+06"), (f * 10 ** 8, " 314.1593E+06"), ( - f * 10 ** 9, " 3.1416E+09"), (f * 10 ** 10, " 31.4159E+09"), - (f * 10 ** 11, " 314.1593E+09"), (f * 10 ** 12, " 3.1416E+12"), - (f * 10 ** 13, " 31.4159E+12"), (f * 10 ** 14, " 314.1593E+12"), - (f * 10 ** 15, " 3.1416E+15"), (f * 10 ** 16, " 31.4159E+15"), - (f * 10 ** 17, " 314.1593E+15"), (f * 10 ** 18, " 3.1416E+18"), - (f * 10 ** 19, " 31.4159E+18"), (f * 10 ** 20, " 314.1593E+18"), - (f * 10 ** 21, " 3.1416E+21"), (f * 10 ** 22, " 31.4159E+21"), - (f * 10 ** 23, " 314.1593E+21"), (f * 10 ** 24, " 3.1416E+24"), - (f * 10 ** 25, " 31.4159E+24"), (f * 10 ** 26, " 314.1593E+24")] - self.compare_all(formatter, in_out) - - def test_rounding(self): - formatter = fmt.EngFormatter(accuracy=3, use_eng_prefix=True) - in_out = [(5.55555, ' 5.556'), (55.5555, ' 55.556'), - (555.555, ' 555.555'), (5555.55, ' 5.556k'), - (55555.5, ' 55.556k'), (555555, ' 555.555k')] - self.compare_all(formatter, in_out) - - formatter = fmt.EngFormatter(accuracy=1, use_eng_prefix=True) - in_out = [(5.55555, ' 5.6'), (55.5555, ' 55.6'), (555.555, ' 555.6'), - (5555.55, ' 5.6k'), (55555.5, ' 55.6k'), (555555, ' 555.6k')] - self.compare_all(formatter, in_out) - - formatter = fmt.EngFormatter(accuracy=0, use_eng_prefix=True) - in_out = [(5.55555, ' 6'), (55.5555, ' 56'), (555.555, ' 556'), - (5555.55, ' 6k'), (55555.5, ' 56k'), (555555, ' 556k')] - self.compare_all(formatter, in_out) - - formatter = fmt.EngFormatter(accuracy=3, use_eng_prefix=True) - result = formatter(0) - self.assertEqual(result, u(' 0.000')) - - def test_nan(self): - # Issue #11981 - - formatter = fmt.EngFormatter(accuracy=1, use_eng_prefix=True) - result = formatter(np.nan) - self.assertEqual(result, u('NaN')) - - df = pd.DataFrame({'a': [1.5, 10.3, 20.5], - 'b': [50.3, 60.67, 70.12], - 'c': [100.2, 101.33, 120.33]}) - pt = df.pivot_table(values='a', index='b', columns='c') - fmt.set_eng_float_format(accuracy=1) - result = pt.to_string() - self.assertTrue('NaN' in result) - self.reset_display_options() - - def test_inf(self): - # Issue #11981 - - formatter = fmt.EngFormatter(accuracy=1, use_eng_prefix=True) - result = formatter(np.inf) - self.assertEqual(result, u('inf')) - - def _three_digit_exp(): return '%.4g' % 1.7e8 == '1.7e+008' diff --git a/pandas/tests/formats/test_to_csv.py b/pandas/tests/formats/test_to_csv.py new file mode 100644 index 0000000000000..51295fd750602 --- /dev/null +++ b/pandas/tests/formats/test_to_csv.py @@ -0,0 +1,216 @@ +from pandas import DataFrame +import numpy as np +import pandas as pd +from pandas.util import testing as tm + + +class TestToCSV(tm.TestCase): + + def test_to_csv_quotechar(self): + df = DataFrame({'col': [1, 2]}) + expected = """\ +"","col" +"0","1" +"1","2" +""" + + with tm.ensure_clean('test.csv') as path: + df.to_csv(path, quoting=1) # 1=QUOTE_ALL + with open(path, 'r') as f: + self.assertEqual(f.read(), expected) + + expected = """\ +$$,$col$ +$0$,$1$ +$1$,$2$ +""" + + with tm.ensure_clean('test.csv') as path: + df.to_csv(path, quoting=1, quotechar="$") + with open(path, 'r') as f: + self.assertEqual(f.read(), expected) + + with tm.ensure_clean('test.csv') as path: + with tm.assertRaisesRegexp(TypeError, 'quotechar'): + df.to_csv(path, quoting=1, quotechar=None) + + def test_to_csv_doublequote(self): + df = DataFrame({'col': ['a"a', '"bb"']}) + expected = '''\ +"","col" +"0","a""a" +"1","""bb""" +''' + + with tm.ensure_clean('test.csv') as path: + df.to_csv(path, quoting=1, doublequote=True) # QUOTE_ALL + with open(path, 'r') as f: + self.assertEqual(f.read(), expected) + + from _csv import Error + with tm.ensure_clean('test.csv') as path: + with tm.assertRaisesRegexp(Error, 'escapechar'): + df.to_csv(path, doublequote=False) # no escapechar set + + def test_to_csv_escapechar(self): + df = DataFrame({'col': ['a"a', '"bb"']}) + expected = '''\ +"","col" +"0","a\\"a" +"1","\\"bb\\"" +''' + + with tm.ensure_clean('test.csv') as path: # QUOTE_ALL + df.to_csv(path, quoting=1, doublequote=False, escapechar='\\') + with open(path, 'r') as f: + self.assertEqual(f.read(), expected) + + df = DataFrame({'col': ['a,a', ',bb,']}) + expected = """\ +,col +0,a\\,a +1,\\,bb\\, +""" + + with tm.ensure_clean('test.csv') as path: + df.to_csv(path, quoting=3, escapechar='\\') # QUOTE_NONE + with open(path, 'r') as f: + self.assertEqual(f.read(), expected) + + def test_csv_to_string(self): + df = DataFrame({'col': [1, 2]}) + expected = ',col\n0,1\n1,2\n' + self.assertEqual(df.to_csv(), expected) + + def test_to_csv_decimal(self): + # GH 781 + df = DataFrame({'col1': [1], 'col2': ['a'], 'col3': [10.1]}) + + expected_default = ',col1,col2,col3\n0,1,a,10.1\n' + self.assertEqual(df.to_csv(), expected_default) + + expected_european_excel = ';col1;col2;col3\n0;1;a;10,1\n' + self.assertEqual( + df.to_csv(decimal=',', sep=';'), expected_european_excel) + + expected_float_format_default = ',col1,col2,col3\n0,1,a,10.10\n' + self.assertEqual( + df.to_csv(float_format='%.2f'), expected_float_format_default) + + expected_float_format = ';col1;col2;col3\n0;1;a;10,10\n' + self.assertEqual( + df.to_csv(decimal=',', sep=';', + float_format='%.2f'), expected_float_format) + + # GH 11553: testing if decimal is taken into account for '0.0' + df = pd.DataFrame({'a': [0, 1.1], 'b': [2.2, 3.3], 'c': 1}) + expected = 'a,b,c\n0^0,2^2,1\n1^1,3^3,1\n' + self.assertEqual(df.to_csv(index=False, decimal='^'), expected) + + # same but for an index + self.assertEqual(df.set_index('a').to_csv(decimal='^'), expected) + + # same for a multi-index + self.assertEqual( + df.set_index(['a', 'b']).to_csv(decimal="^"), expected) + + def test_to_csv_float_format(self): + # testing if float_format is taken into account for the index + # GH 11553 + df = pd.DataFrame({'a': [0, 1], 'b': [2.2, 3.3], 'c': 1}) + expected = 'a,b,c\n0,2.20,1\n1,3.30,1\n' + self.assertEqual( + df.set_index('a').to_csv(float_format='%.2f'), expected) + + # same for a multi-index + self.assertEqual( + df.set_index(['a', 'b']).to_csv(float_format='%.2f'), expected) + + def test_to_csv_na_rep(self): + # testing if NaN values are correctly represented in the index + # GH 11553 + df = DataFrame({'a': [0, np.NaN], 'b': [0, 1], 'c': [2, 3]}) + expected = "a,b,c\n0.0,0,2\n_,1,3\n" + self.assertEqual(df.set_index('a').to_csv(na_rep='_'), expected) + self.assertEqual(df.set_index(['a', 'b']).to_csv(na_rep='_'), expected) + + # now with an index containing only NaNs + df = DataFrame({'a': np.NaN, 'b': [0, 1], 'c': [2, 3]}) + expected = "a,b,c\n_,0,2\n_,1,3\n" + self.assertEqual(df.set_index('a').to_csv(na_rep='_'), expected) + self.assertEqual(df.set_index(['a', 'b']).to_csv(na_rep='_'), expected) + + # check if na_rep parameter does not break anything when no NaN + df = DataFrame({'a': 0, 'b': [0, 1], 'c': [2, 3]}) + expected = "a,b,c\n0,0,2\n0,1,3\n" + self.assertEqual(df.set_index('a').to_csv(na_rep='_'), expected) + self.assertEqual(df.set_index(['a', 'b']).to_csv(na_rep='_'), expected) + + def test_to_csv_date_format(self): + # GH 10209 + df_sec = DataFrame({'A': pd.date_range('20130101', periods=5, freq='s') + }) + df_day = DataFrame({'A': pd.date_range('20130101', periods=5, freq='d') + }) + + expected_default_sec = (',A\n0,2013-01-01 00:00:00\n1,' + '2013-01-01 00:00:01\n2,2013-01-01 00:00:02' + '\n3,2013-01-01 00:00:03\n4,' + '2013-01-01 00:00:04\n') + self.assertEqual(df_sec.to_csv(), expected_default_sec) + + expected_ymdhms_day = (',A\n0,2013-01-01 00:00:00\n1,' + '2013-01-02 00:00:00\n2,2013-01-03 00:00:00' + '\n3,2013-01-04 00:00:00\n4,' + '2013-01-05 00:00:00\n') + self.assertEqual( + df_day.to_csv( + date_format='%Y-%m-%d %H:%M:%S'), expected_ymdhms_day) + + expected_ymd_sec = (',A\n0,2013-01-01\n1,2013-01-01\n2,' + '2013-01-01\n3,2013-01-01\n4,2013-01-01\n') + self.assertEqual( + df_sec.to_csv(date_format='%Y-%m-%d'), expected_ymd_sec) + + expected_default_day = (',A\n0,2013-01-01\n1,2013-01-02\n2,' + '2013-01-03\n3,2013-01-04\n4,2013-01-05\n') + self.assertEqual(df_day.to_csv(), expected_default_day) + self.assertEqual( + df_day.to_csv(date_format='%Y-%m-%d'), expected_default_day) + + # testing if date_format parameter is taken into account for + # multi-indexed dataframes (GH 7791) + df_sec['B'] = 0 + df_sec['C'] = 1 + expected_ymd_sec = 'A,B,C\n2013-01-01,0,1\n' + df_sec_grouped = df_sec.groupby([pd.Grouper(key='A', freq='1h'), 'B']) + self.assertEqual(df_sec_grouped.mean().to_csv(date_format='%Y-%m-%d'), + expected_ymd_sec) + + def test_to_csv_multi_index(self): + # see gh-6618 + df = DataFrame([1], columns=pd.MultiIndex.from_arrays([[1], [2]])) + + exp = ",1\n,2\n0,1\n" + self.assertEqual(df.to_csv(), exp) + + exp = "1\n2\n1\n" + self.assertEqual(df.to_csv(index=False), exp) + + df = DataFrame([1], columns=pd.MultiIndex.from_arrays([[1], [2]]), + index=pd.MultiIndex.from_arrays([[1], [2]])) + + exp = ",,1\n,,2\n1,2,1\n" + self.assertEqual(df.to_csv(), exp) + + exp = "1\n2\n1\n" + self.assertEqual(df.to_csv(index=False), exp) + + df = DataFrame( + [1], columns=pd.MultiIndex.from_arrays([['foo'], ['bar']])) + + exp = ",foo\n,bar\n0,1\n" + self.assertEqual(df.to_csv(), exp) + + exp = "foo\nbar\n1\n" + self.assertEqual(df.to_csv(index=False), exp) diff --git a/pandas/tests/formats/test_to_html.py b/pandas/tests/formats/test_to_html.py new file mode 100644 index 0000000000000..771c66e84037c --- /dev/null +++ b/pandas/tests/formats/test_to_html.py @@ -0,0 +1,1861 @@ +# -*- coding: utf-8 -*- + +import re +from textwrap import dedent +from datetime import datetime +from distutils.version import LooseVersion + +import pytest +import numpy as np +import pandas as pd +from pandas import compat, DataFrame, MultiIndex, option_context, Index +from pandas.compat import u, lrange, StringIO +from pandas.util import testing as tm +import pandas.formats.format as fmt + +div_style = '' +try: + import IPython + if IPython.__version__ < LooseVersion('3.0.0'): + div_style = ' style="max-width:1500px;overflow:auto;"' +except (ImportError, AttributeError): + pass + + +class TestToHTML(tm.TestCase): + + def test_to_html_with_col_space(self): + def check_with_width(df, col_space): + # check that col_space affects HTML generation + # and be very brittle about it. + html = df.to_html(col_space=col_space) + hdrs = [x for x in html.split(r"\n") if re.search(r"\s]", x)] + self.assertTrue(len(hdrs) > 0) + for h in hdrs: + self.assertTrue("min-width" in h) + self.assertTrue(str(col_space) in h) + + df = DataFrame(np.random.random(size=(1, 3))) + + check_with_width(df, 30) + check_with_width(df, 50) + + def test_to_html_with_empty_string_label(self): + # GH3547, to_html regards empty string labels as repeated labels + data = {'c1': ['a', 'b'], 'c2': ['a', ''], 'data': [1, 2]} + df = DataFrame(data).set_index(['c1', 'c2']) + res = df.to_html() + self.assertTrue("rowspan" not in res) + + def test_to_html_unicode(self): + df = DataFrame({u('\u03c3'): np.arange(10.)}) + expected = u'\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
\u03c3
00.0
11.0
22.0
33.0
44.0
55.0
66.0
77.0
88.0
99.0
' # noqa + self.assertEqual(df.to_html(), expected) + df = DataFrame({'A': [u('\u03c3')]}) + expected = u'\n \n \n \n \n \n \n \n \n \n \n \n \n
A
0\u03c3
' # noqa + self.assertEqual(df.to_html(), expected) + + def test_to_html_decimal(self): + # GH 12031 + df = DataFrame({'A': [6.0, 3.1, 2.2]}) + result = df.to_html(decimal=',') + expected = ('\n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + '
A
06,0
13,1
22,2
') + self.assertEqual(result, expected) + + def test_to_html_escaped(self): + a = 'str", + b: ""}, + 'co>l2': {a: "", + b: ""}} + rs = DataFrame(test_dict).to_html() + xp = """ + + + + + + + + + + + + + + + + + + + +
co<l1co>l2
str<ing1 &amp;<type 'str'><type 'str'>
stri>ng2 &amp;<type 'str'><type 'str'>
""" + + self.assertEqual(xp, rs) + + def test_to_html_escape_disabled(self): + a = 'strbold", + b: "bold"}, + 'co>l2': {a: "bold", + b: "bold"}} + rs = DataFrame(test_dict).to_html(escape=False) + xp = """ + + + + + + + + + + + + + + + + + +
co + co>l2
str + boldbold
stri>ng2 &boldbold
""" + + self.assertEqual(xp, rs) + + def test_to_html_multiindex_index_false(self): + # issue 8452 + df = DataFrame({ + 'a': range(2), + 'b': range(3, 5), + 'c': range(5, 7), + 'd': range(3, 5) + }) + df.columns = MultiIndex.from_product([['a', 'b'], ['c', 'd']]) + result = df.to_html(index=False) + expected = """\ + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ab
cdcd
0353
1464
""" + + self.assertEqual(result, expected) + + df.index = Index(df.index.values, name='idx') + result = df.to_html(index=False) + self.assertEqual(result, expected) + + def test_to_html_multiindex_sparsify_false_multi_sparse(self): + with option_context('display.multi_sparse', False): + index = MultiIndex.from_arrays([[0, 0, 1, 1], [0, 1, 0, 1]], + names=['foo', None]) + + df = DataFrame([[0, 1], [2, 3], [4, 5], [6, 7]], index=index) + + result = df.to_html() + expected = """\ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
01
foo
0001
0123
1045
1167
""" + + self.assertEqual(result, expected) + + df = DataFrame([[0, 1], [2, 3], [4, 5], [6, 7]], + columns=index[::2], index=index) + + result = df.to_html() + expected = """\ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
foo01
00
foo
0001
0123
1045
1167
""" + + self.assertEqual(result, expected) + + def test_to_html_multiindex_sparsify(self): + index = MultiIndex.from_arrays([[0, 0, 1, 1], [0, 1, 0, 1]], + names=['foo', None]) + + df = DataFrame([[0, 1], [2, 3], [4, 5], [6, 7]], index=index) + + result = df.to_html() + expected = """ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
01
foo
0001
123
1045
167
""" + + self.assertEqual(result, expected) + + df = DataFrame([[0, 1], [2, 3], [4, 5], [6, 7]], columns=index[::2], + index=index) + + result = df.to_html() + expected = """\ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
foo01
00
foo
0001
123
1045
167
""" + + self.assertEqual(result, expected) + + def test_to_html_multiindex_odd_even_truncate(self): + # GH 14882 - Issue on truncation with odd length DataFrame + mi = MultiIndex.from_product([[100, 200, 300], + [10, 20, 30], + [1, 2, 3, 4, 5, 6, 7]], + names=['a', 'b', 'c']) + df = DataFrame({'n': range(len(mi))}, index=mi) + result = df.to_html(max_rows=60) + expected = """\ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
n
abc
1001010
21
32
43
54
65
76
2017
28
39
410
511
612
713
30114
215
316
417
518
619
720
20010121
222
323
424
525
626
727
20128
229
......
633
734
30135
236
337
438
539
640
741
30010142
243
344
445
546
647
748
20149
250
351
452
553
654
755
30156
257
358
459
560
661
762
""" + self.assertEqual(result, expected) + + # Test that ... appears in a middle level + result = df.to_html(max_rows=56) + expected = """\ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
n
abc
1001010
21
32
43
54
65
76
2017
28
39
410
511
612
713
30114
215
316
417
518
619
720
20010121
222
323
424
525
626
727
.........
30135
236
337
438
539
640
741
30010142
243
344
445
546
647
748
20149
250
351
452
553
654
755
30156
257
358
459
560
661
762
""" + self.assertEqual(result, expected) + + def test_to_html_index_formatter(self): + df = DataFrame([[0, 1], [2, 3], [4, 5], [6, 7]], columns=['foo', None], + index=lrange(4)) + + f = lambda x: 'abcd' [x] + result = df.to_html(formatters={'__index__': f}) + expected = """\ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
fooNone
a01
b23
c45
d67
""" + + self.assertEqual(result, expected) + + def test_to_html_datetime64_monthformatter(self): + months = [datetime(2016, 1, 1), datetime(2016, 2, 2)] + x = DataFrame({'months': months}) + + def format_func(x): + return x.strftime('%Y-%m') + result = x.to_html(formatters={'months': format_func}) + expected = """\ + + + + + + + + + + + + + + + + + +
months
02016-01
12016-02
""" + self.assertEqual(result, expected) + + def test_to_html_datetime64_hourformatter(self): + + x = DataFrame({'hod': pd.to_datetime(['10:10:10.100', '12:12:12.120'], + format='%H:%M:%S.%f')}) + + def format_func(x): + return x.strftime('%H:%M') + result = x.to_html(formatters={'hod': format_func}) + expected = """\ + + + + + + + + + + + + + + + + + +
hod
010:10
112:12
""" + self.assertEqual(result, expected) + + def test_to_html_regression_GH6098(self): + df = DataFrame({ + u('clé1'): [u('a'), u('a'), u('b'), u('b'), u('a')], + u('clé2'): [u('1er'), u('2ème'), u('1er'), u('2ème'), u('1er')], + 'données1': np.random.randn(5), + 'données2': np.random.randn(5)}) + + # it works + df.pivot_table(index=[u('clé1')], columns=[u('clé2')])._repr_html_() + + def test_to_html_truncate(self): + pytest.skip("unreliable on travis") + index = pd.DatetimeIndex(start='20010101', freq='D', periods=20) + df = DataFrame(index=index, columns=range(20)) + fmt.set_option('display.max_rows', 8) + fmt.set_option('display.max_columns', 4) + result = df._repr_html_() + expected = '''\ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
01...1819
2001-01-01NaNNaN...NaNNaN
2001-01-02NaNNaN...NaNNaN
2001-01-03NaNNaN...NaNNaN
2001-01-04NaNNaN...NaNNaN
..................
2001-01-17NaNNaN...NaNNaN
2001-01-18NaNNaN...NaNNaN
2001-01-19NaNNaN...NaNNaN
2001-01-20NaNNaN...NaNNaN
+

20 rows × 20 columns

+'''.format(div_style) + if compat.PY2: + expected = expected.decode('utf-8') + self.assertEqual(result, expected) + + def test_to_html_truncate_multi_index(self): + pytest.skip("unreliable on travis") + arrays = [['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'], + ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']] + df = DataFrame(index=arrays, columns=arrays) + fmt.set_option('display.max_rows', 7) + fmt.set_option('display.max_columns', 7) + result = df._repr_html_() + expected = '''\ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
barbaz...fooqux
onetwoone...twoonetwo
baroneNaNNaNNaN...NaNNaNNaN
twoNaNNaNNaN...NaNNaNNaN
bazoneNaNNaNNaN...NaNNaNNaN
...........................
footwoNaNNaNNaN...NaNNaNNaN
quxoneNaNNaNNaN...NaNNaNNaN
twoNaNNaNNaN...NaNNaNNaN
+

8 rows × 8 columns

+'''.format(div_style) + if compat.PY2: + expected = expected.decode('utf-8') + self.assertEqual(result, expected) + + def test_to_html_truncate_multi_index_sparse_off(self): + pytest.skip("unreliable on travis") + arrays = [['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'], + ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']] + df = DataFrame(index=arrays, columns=arrays) + fmt.set_option('display.max_rows', 7) + fmt.set_option('display.max_columns', 7) + fmt.set_option('display.multi_sparse', False) + result = df._repr_html_() + expected = '''\ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
barbarbaz...fooquxqux
onetwoone...twoonetwo
baroneNaNNaNNaN...NaNNaNNaN
bartwoNaNNaNNaN...NaNNaNNaN
bazoneNaNNaNNaN...NaNNaNNaN
footwoNaNNaNNaN...NaNNaNNaN
quxoneNaNNaNNaN...NaNNaNNaN
quxtwoNaNNaNNaN...NaNNaNNaN
+

8 rows × 8 columns

+'''.format(div_style) + if compat.PY2: + expected = expected.decode('utf-8') + self.assertEqual(result, expected) + + def test_to_html_border(self): + df = DataFrame({'A': [1, 2]}) + result = df.to_html() + assert 'border="1"' in result + + def test_to_html_border_option(self): + df = DataFrame({'A': [1, 2]}) + with pd.option_context('html.border', 0): + result = df.to_html() + self.assertTrue('border="0"' in result) + self.assertTrue('border="0"' in df._repr_html_()) + + def test_to_html_border_zero(self): + df = DataFrame({'A': [1, 2]}) + result = df.to_html(border=0) + self.assertTrue('border="0"' in result) + + def test_to_html(self): + # big mixed + biggie = DataFrame({'A': np.random.randn(200), + 'B': tm.makeStringIndex(200)}, + index=lrange(200)) + + biggie.loc[:20, 'A'] = np.nan + biggie.loc[:20, 'B'] = np.nan + s = biggie.to_html() + + buf = StringIO() + retval = biggie.to_html(buf=buf) + self.assertIsNone(retval) + self.assertEqual(buf.getvalue(), s) + + tm.assertIsInstance(s, compat.string_types) + + biggie.to_html(columns=['B', 'A'], col_space=17) + biggie.to_html(columns=['B', 'A'], + formatters={'A': lambda x: '%.1f' % x}) + + biggie.to_html(columns=['B', 'A'], float_format=str) + biggie.to_html(columns=['B', 'A'], col_space=12, float_format=str) + + frame = DataFrame(index=np.arange(200)) + frame.to_html() + + def test_to_html_filename(self): + biggie = DataFrame({'A': np.random.randn(200), + 'B': tm.makeStringIndex(200)}, + index=lrange(200)) + + biggie.loc[:20, 'A'] = np.nan + biggie.loc[:20, 'B'] = np.nan + with tm.ensure_clean('test.html') as path: + biggie.to_html(path) + with open(path, 'r') as f: + s = biggie.to_html() + s2 = f.read() + self.assertEqual(s, s2) + + frame = DataFrame(index=np.arange(200)) + with tm.ensure_clean('test.html') as path: + frame.to_html(path) + with open(path, 'r') as f: + self.assertEqual(frame.to_html(), f.read()) + + def test_to_html_with_no_bold(self): + x = DataFrame({'x': np.random.randn(5)}) + ashtml = x.to_html(bold_rows=False) + self.assertFalse('")]) + + def test_to_html_columns_arg(self): + frame = DataFrame(tm.getSeriesData()) + result = frame.to_html(columns=['A']) + self.assertNotIn('B', result) + + def test_to_html_multiindex(self): + columns = MultiIndex.from_tuples(list(zip(np.arange(2).repeat(2), + np.mod(lrange(4), 2))), + names=['CL0', 'CL1']) + df = DataFrame([list('abcd'), list('efgh')], columns=columns) + result = df.to_html(justify='left') + expected = ('\n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + '
CL001
CL10101
0abcd
1efgh
') + + self.assertEqual(result, expected) + + columns = MultiIndex.from_tuples(list(zip( + range(4), np.mod( + lrange(4), 2)))) + df = DataFrame([list('abcd'), list('efgh')], columns=columns) + + result = df.to_html(justify='right') + expected = ('\n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + '
0123
0101
0abcd
1efgh
') + + self.assertEqual(result, expected) + + def test_to_html_justify(self): + df = DataFrame({'A': [6, 30000, 2], + 'B': [1, 2, 70000], + 'C': [223442, 0, 1]}, + columns=['A', 'B', 'C']) + result = df.to_html(justify='left') + expected = ('\n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + '
ABC
061223442
13000020
22700001
') + self.assertEqual(result, expected) + + result = df.to_html(justify='right') + expected = ('\n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + '
ABC
061223442
13000020
22700001
') + self.assertEqual(result, expected) + + def test_to_html_index(self): + index = ['foo', 'bar', 'baz'] + df = DataFrame({'A': [1, 2, 3], + 'B': [1.2, 3.4, 5.6], + 'C': ['one', 'two', np.nan]}, + columns=['A', 'B', 'C'], + index=index) + expected_with_index = ('\n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + '
ABC
foo11.2one
bar23.4two
baz35.6NaN
') + self.assertEqual(df.to_html(), expected_with_index) + + expected_without_index = ('\n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + '
ABC
11.2one
23.4two
35.6NaN
') + result = df.to_html(index=False) + for i in index: + self.assertNotIn(i, result) + self.assertEqual(result, expected_without_index) + df.index = Index(['foo', 'bar', 'baz'], name='idx') + expected_with_index = ('\n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + '
ABC
idx
foo11.2one
bar23.4two
baz35.6NaN
') + self.assertEqual(df.to_html(), expected_with_index) + self.assertEqual(df.to_html(index=False), expected_without_index) + + tuples = [('foo', 'car'), ('foo', 'bike'), ('bar', 'car')] + df.index = MultiIndex.from_tuples(tuples) + + expected_with_index = ('\n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + '
ABC
foocar11.2one
bike23.4two
barcar35.6NaN
') + self.assertEqual(df.to_html(), expected_with_index) + + result = df.to_html(index=False) + for i in ['foo', 'bar', 'car', 'bike']: + self.assertNotIn(i, result) + # must be the same result as normal index + self.assertEqual(result, expected_without_index) + + df.index = MultiIndex.from_tuples(tuples, names=['idx1', 'idx2']) + expected_with_index = ('\n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + '
ABC
idx1idx2
foocar11.2one
bike23.4two
barcar35.6NaN
') + self.assertEqual(df.to_html(), expected_with_index) + self.assertEqual(df.to_html(index=False), expected_without_index) + + def test_to_html_with_classes(self): + df = DataFrame() + result = df.to_html(classes="sortable draggable") + expected = dedent(""" + + + + + + + + + +
+ + """).strip() + self.assertEqual(result, expected) + + result = df.to_html(classes=["sortable", "draggable"]) + self.assertEqual(result, expected) + + def test_to_html_no_index_max_rows(self): + # GH https://github.com/pandas-dev/pandas/issues/14998 + df = DataFrame({"A": [1, 2, 3, 4]}) + result = df.to_html(index=False, max_rows=1) + expected = dedent("""\ + + + + + + + + + + + +
A
1
""") + self.assertEqual(result, expected) diff --git a/pandas/tests/formats/test_to_latex.py b/pandas/tests/formats/test_to_latex.py new file mode 100644 index 0000000000000..89e18e1cec06e --- /dev/null +++ b/pandas/tests/formats/test_to_latex.py @@ -0,0 +1,351 @@ +from datetime import datetime + +import pytest + +import pandas as pd +from pandas import DataFrame, compat +from pandas.util import testing as tm +from pandas.compat import u +import codecs + + +@pytest.fixture +def frame(): + return DataFrame(tm.getSeriesData()) + + +class TestToLatex(object): + + def test_to_latex_filename(self, frame): + with tm.ensure_clean('test.tex') as path: + frame.to_latex(path) + + with open(path, 'r') as f: + assert frame.to_latex() == f.read() + + # test with utf-8 and encoding option (GH 7061) + df = DataFrame([[u'au\xdfgangen']]) + with tm.ensure_clean('test.tex') as path: + df.to_latex(path, encoding='utf-8') + with codecs.open(path, 'r', encoding='utf-8') as f: + assert df.to_latex() == f.read() + + # test with utf-8 without encoding option + if compat.PY3: # python3: pandas default encoding is utf-8 + with tm.ensure_clean('test.tex') as path: + df.to_latex(path) + with codecs.open(path, 'r', encoding='utf-8') as f: + assert df.to_latex() == f.read() + else: + # python2 default encoding is ascii, so an error should be raised + with tm.ensure_clean('test.tex') as path: + with pytest.raises(UnicodeEncodeError): + df.to_latex(path) + + def test_to_latex(self, frame): + # it works! + frame.to_latex() + + df = DataFrame({'a': [1, 2], 'b': ['b1', 'b2']}) + withindex_result = df.to_latex() + withindex_expected = r"""\begin{tabular}{lrl} +\toprule +{} & a & b \\ +\midrule +0 & 1 & b1 \\ +1 & 2 & b2 \\ +\bottomrule +\end{tabular} +""" + + assert withindex_result == withindex_expected + + withoutindex_result = df.to_latex(index=False) + withoutindex_expected = r"""\begin{tabular}{rl} +\toprule + a & b \\ +\midrule + 1 & b1 \\ + 2 & b2 \\ +\bottomrule +\end{tabular} +""" + + assert withoutindex_result == withoutindex_expected + + def test_to_latex_format(self, frame): + # GH Bug #9402 + frame.to_latex(column_format='ccc') + + df = DataFrame({'a': [1, 2], 'b': ['b1', 'b2']}) + withindex_result = df.to_latex(column_format='ccc') + withindex_expected = r"""\begin{tabular}{ccc} +\toprule +{} & a & b \\ +\midrule +0 & 1 & b1 \\ +1 & 2 & b2 \\ +\bottomrule +\end{tabular} +""" + + assert withindex_result == withindex_expected + + def test_to_latex_with_formatters(self): + df = DataFrame({'int': [1, 2, 3], + 'float': [1.0, 2.0, 3.0], + 'object': [(1, 2), True, False], + 'datetime64': [datetime(2016, 1, 1), + datetime(2016, 2, 5), + datetime(2016, 3, 3)]}) + + formatters = {'int': lambda x: '0x%x' % x, + 'float': lambda x: '[% 4.1f]' % x, + 'object': lambda x: '-%s-' % str(x), + 'datetime64': lambda x: x.strftime('%Y-%m'), + '__index__': lambda x: 'index: %s' % x} + result = df.to_latex(formatters=dict(formatters)) + + expected = r"""\begin{tabular}{llrrl} +\toprule +{} & datetime64 & float & int & object \\ +\midrule +index: 0 & 2016-01 & [ 1.0] & 0x1 & -(1, 2)- \\ +index: 1 & 2016-02 & [ 2.0] & 0x2 & -True- \\ +index: 2 & 2016-03 & [ 3.0] & 0x3 & -False- \\ +\bottomrule +\end{tabular} +""" + assert result == expected + + def test_to_latex_multiindex(self): + df = DataFrame({('x', 'y'): ['a']}) + result = df.to_latex() + expected = r"""\begin{tabular}{ll} +\toprule +{} & x \\ +{} & y \\ +\midrule +0 & a \\ +\bottomrule +\end{tabular} +""" + + assert result == expected + + result = df.T.to_latex() + expected = r"""\begin{tabular}{lll} +\toprule + & & 0 \\ +\midrule +x & y & a \\ +\bottomrule +\end{tabular} +""" + + assert result == expected + + df = DataFrame.from_dict({ + ('c1', 0): pd.Series(dict((x, x) for x in range(4))), + ('c1', 1): pd.Series(dict((x, x + 4) for x in range(4))), + ('c2', 0): pd.Series(dict((x, x) for x in range(4))), + ('c2', 1): pd.Series(dict((x, x + 4) for x in range(4))), + ('c3', 0): pd.Series(dict((x, x) for x in range(4))), + }).T + result = df.to_latex() + expected = r"""\begin{tabular}{llrrrr} +\toprule + & & 0 & 1 & 2 & 3 \\ +\midrule +c1 & 0 & 0 & 1 & 2 & 3 \\ + & 1 & 4 & 5 & 6 & 7 \\ +c2 & 0 & 0 & 1 & 2 & 3 \\ + & 1 & 4 & 5 & 6 & 7 \\ +c3 & 0 & 0 & 1 & 2 & 3 \\ +\bottomrule +\end{tabular} +""" + + assert result == expected + + # GH 10660 + df = pd.DataFrame({'a': [0, 0, 1, 1], + 'b': list('abab'), + 'c': [1, 2, 3, 4]}) + result = df.set_index(['a', 'b']).to_latex() + expected = r"""\begin{tabular}{llr} +\toprule + & & c \\ +a & b & \\ +\midrule +0 & a & 1 \\ + & b & 2 \\ +1 & a & 3 \\ + & b & 4 \\ +\bottomrule +\end{tabular} +""" + + assert result == expected + + result = df.groupby('a').describe().to_latex() + expected = ('\\begin{tabular}{lrrrrrrrr}\n\\toprule\n{} & c & ' + ' & & & & & & ' + '\\\\\n{} & count & mean & std & min & 25\\% & ' + '50\\% & 75\\% & max \\\\\na & & & ' + ' & & & & & \\\\\n\\midrule\n0 ' + '& 2.0 & 1.5 & 0.707107 & 1.0 & 1.25 & 1.5 & 1.75 ' + '& 2.0 \\\\\n1 & 2.0 & 3.5 & 0.707107 & 3.0 & 3.25 ' + '& 3.5 & 3.75 & 4.0 ' + '\\\\\n\\bottomrule\n\\end{tabular}\n') + + assert result == expected + + def test_to_latex_escape(self): + a = 'a' + b = 'b' + + test_dict = {u('co^l1'): {a: "a", + b: "b"}, + u('co$e^x$'): {a: "a", + b: "b"}} + + unescaped_result = DataFrame(test_dict).to_latex(escape=False) + escaped_result = DataFrame(test_dict).to_latex( + ) # default: escape=True + + unescaped_expected = r'''\begin{tabular}{lll} +\toprule +{} & co$e^x$ & co^l1 \\ +\midrule +a & a & a \\ +b & b & b \\ +\bottomrule +\end{tabular} +''' + + escaped_expected = r'''\begin{tabular}{lll} +\toprule +{} & co\$e\textasciicircumx\$ & co\textasciicircuml1 \\ +\midrule +a & a & a \\ +b & b & b \\ +\bottomrule +\end{tabular} +''' + + assert unescaped_result == unescaped_expected + assert escaped_result == escaped_expected + + def test_to_latex_longtable(self, frame): + frame.to_latex(longtable=True) + + df = DataFrame({'a': [1, 2], 'b': ['b1', 'b2']}) + withindex_result = df.to_latex(longtable=True) + withindex_expected = r"""\begin{longtable}{lrl} +\toprule +{} & a & b \\ +\midrule +\endhead +\midrule +\multicolumn{3}{r}{{Continued on next page}} \\ +\midrule +\endfoot + +\bottomrule +\endlastfoot +0 & 1 & b1 \\ +1 & 2 & b2 \\ +\end{longtable} +""" + + assert withindex_result == withindex_expected + + withoutindex_result = df.to_latex(index=False, longtable=True) + withoutindex_expected = r"""\begin{longtable}{rl} +\toprule + a & b \\ +\midrule +\endhead +\midrule +\multicolumn{3}{r}{{Continued on next page}} \\ +\midrule +\endfoot + +\bottomrule +\endlastfoot + 1 & b1 \\ + 2 & b2 \\ +\end{longtable} +""" + + assert withoutindex_result == withoutindex_expected + + def test_to_latex_escape_special_chars(self): + special_characters = ['&', '%', '$', '#', '_', '{', '}', '~', '^', + '\\'] + df = DataFrame(data=special_characters) + observed = df.to_latex() + expected = r"""\begin{tabular}{ll} +\toprule +{} & 0 \\ +\midrule +0 & \& \\ +1 & \% \\ +2 & \$ \\ +3 & \# \\ +4 & \_ \\ +5 & \{ \\ +6 & \} \\ +7 & \textasciitilde \\ +8 & \textasciicircum \\ +9 & \textbackslash \\ +\bottomrule +\end{tabular} +""" + + assert observed == expected + + def test_to_latex_no_header(self): + # GH 7124 + df = DataFrame({'a': [1, 2], 'b': ['b1', 'b2']}) + withindex_result = df.to_latex(header=False) + withindex_expected = r"""\begin{tabular}{lrl} +\toprule +0 & 1 & b1 \\ +1 & 2 & b2 \\ +\bottomrule +\end{tabular} +""" + + assert withindex_result == withindex_expected + + withoutindex_result = df.to_latex(index=False, header=False) + withoutindex_expected = r"""\begin{tabular}{rl} +\toprule + 1 & b1 \\ + 2 & b2 \\ +\bottomrule +\end{tabular} +""" + + assert withoutindex_result == withoutindex_expected + + def test_to_latex_decimal(self, frame): + # GH 12031 + frame.to_latex() + + df = DataFrame({'a': [1.0, 2.1], 'b': ['b1', 'b2']}) + withindex_result = df.to_latex(decimal=',') + + withindex_expected = r"""\begin{tabular}{lrl} +\toprule +{} & a & b \\ +\midrule +0 & 1,0 & b1 \\ +1 & 2,1 & b2 \\ +\bottomrule +\end{tabular} +""" + + assert withindex_result == withindex_expected From 2eb6d38ed0563318cea5f419a6eb32b211d24ff1 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 2 Mar 2017 07:50:49 -0500 Subject: [PATCH 123/933] CLN: remove deprecated irow, icol, iget, iget_value (GH10711) xref https://github.com/pandas-dev/pandas/issues/6581 Author: Joris Van den Bossche Closes #15547 from jorisvandenbossche/remove-irow-icol and squashes the following commits: 06ea1bb [Joris Van den Bossche] CLN: remove deprecated irow, icol, iget, iget_value (GH10711) --- doc/source/whatsnew/v0.20.0.txt | 2 ++ pandas/core/frame.py | 25 -------------------- pandas/core/groupby.py | 10 -------- pandas/core/series.py | 25 -------------------- pandas/tests/frame/test_indexing.py | 23 ++++-------------- pandas/tests/frame/test_nonunique_indexes.py | 2 +- pandas/tests/groupby/test_groupby.py | 16 +------------ pandas/tests/series/test_indexing.py | 16 ++----------- pandas/tests/sparse/test_frame.py | 3 +-- 9 files changed, 12 insertions(+), 110 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 6e9dfb92dfd90..dc8420080b50d 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -548,6 +548,8 @@ Removal of prior version deprecations/changes - ``pd.to_datetime`` and ``pd.to_timedelta`` have dropped the ``coerce`` parameter in favor of ``errors`` (:issue:`13602`) - ``pandas.stats.fama_macbeth``, ``pandas.stats.ols``, ``pandas.stats.plm`` and ``pandas.stats.var``, as well as the top-level ``pandas.fama_macbeth`` and ``pandas.ols`` routines are removed. Similar functionaility can be found in the `statsmodels `__ package. (:issue:`11898`) - ``Series.is_time_series`` is dropped in favor of ``Series.index.is_all_dates`` (:issue:``) +- The deprecated ``irow``, ``icol``, ``iget`` and ``iget_value`` methods are removed + in favor of ``iloc`` and ``iat`` as explained :ref:`here ` (:issue:`10711`). .. _whatsnew_0200.performance: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 021ce59e3402b..0d14f00bee508 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1916,23 +1916,6 @@ def set_value(self, index, col, value, takeable=False): return self - def irow(self, i, copy=False): - """ - DEPRECATED. Use ``.iloc[i]`` instead - """ - - warnings.warn("irow(i) is deprecated. Please use .iloc[i]", - FutureWarning, stacklevel=2) - return self._ixs(i, axis=0) - - def icol(self, i): - """ - DEPRECATED. Use ``.iloc[:, i]`` instead - """ - warnings.warn("icol(i) is deprecated. Please use .iloc[:,i]", - FutureWarning, stacklevel=2) - return self._ixs(i, axis=1) - def _ixs(self, i, axis=0): """ i : int, slice, or sequence of integers @@ -2007,14 +1990,6 @@ def _ixs(self, i, axis=0): return result - def iget_value(self, i, j): - """ - DEPRECATED. Use ``.iat[i, j]`` instead - """ - warnings.warn("iget_value(i, j) is deprecated. Please use .iat[i, j]", - FutureWarning, stacklevel=2) - return self.iat[i, j] - def __getitem__(self, key): key = com._apply_if_callable(key, self) diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 381a8edcb5192..578c334781d15 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -1004,16 +1004,6 @@ class GroupBy(_GroupBy): """ _apply_whitelist = _common_apply_whitelist - def irow(self, i): - """ - DEPRECATED. Use ``.nth(i)`` instead - """ - - # 10177 - warnings.warn("irow(i) is deprecated. Please use .nth(i)", - FutureWarning, stacklevel=2) - return self.nth(i) - @Substitution(name='groupby') @Appender(_doc_template) def count(self): diff --git a/pandas/core/series.py b/pandas/core/series.py index ffe1be26fda54..1114590421fd8 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -875,31 +875,6 @@ def reshape(self, *args, **kwargs): return self._values.reshape(shape, **kwargs) - def iget_value(self, i, axis=0): - """ - DEPRECATED. Use ``.iloc[i]`` or ``.iat[i]`` instead - """ - warnings.warn("iget_value(i) is deprecated. Please use .iloc[i] or " - ".iat[i]", FutureWarning, stacklevel=2) - return self._ixs(i) - - def iget(self, i, axis=0): - """ - DEPRECATED. Use ``.iloc[i]`` or ``.iat[i]`` instead - """ - - warnings.warn("iget(i) is deprecated. Please use .iloc[i] or .iat[i]", - FutureWarning, stacklevel=2) - return self._ixs(i) - - def irow(self, i, axis=0): - """ - DEPRECATED. Use ``.iloc[i]`` or ``.iat[i]`` instead - """ - warnings.warn("irow(i) is deprecated. Please use .iloc[i] or .iat[i]", - FutureWarning, stacklevel=2) - return self._ixs(i) - def get_value(self, label, takeable=False): """ Quickly retrieve single value at passed index label diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py index 18fb17b98570a..36c39ffba70b3 100644 --- a/pandas/tests/frame/test_indexing.py +++ b/pandas/tests/frame/test_indexing.py @@ -1761,13 +1761,9 @@ def test_single_element_ix_dont_upcast(self): result = df.loc[[0], "b"] assert_series_equal(result, expected) - def test_irow(self): + def test_iloc_row(self): df = DataFrame(np.random.randn(10, 4), index=lrange(0, 20, 2)) - # 10711, deprecated - with tm.assert_produces_warning(FutureWarning): - df.irow(1) - result = df.iloc[1] exp = df.loc[2] assert_series_equal(result, exp) @@ -1795,14 +1791,10 @@ def f(): expected = df.reindex(df.index[[1, 2, 4, 6]]) assert_frame_equal(result, expected) - def test_icol(self): + def test_iloc_col(self): df = DataFrame(np.random.randn(4, 10), columns=lrange(0, 20, 2)) - # 10711, deprecated - with tm.assert_produces_warning(FutureWarning): - df.icol(1) - result = df.iloc[:, 1] exp = df.loc[:, 2] assert_series_equal(result, exp) @@ -1828,8 +1820,7 @@ def f(): expected = df.reindex(columns=df.columns[[1, 2, 4, 6]]) assert_frame_equal(result, expected) - def test_irow_icol_duplicates(self): - # 10711, deprecated + def test_iloc_duplicates(self): df = DataFrame(np.random.rand(3, 3), columns=list('ABC'), index=list('aab')) @@ -1874,16 +1865,12 @@ def test_irow_icol_duplicates(self): expected = df.take([0], axis=1) assert_frame_equal(result, expected) - def test_icol_sparse_propegate_fill_value(self): + def test_iloc_sparse_propegate_fill_value(self): from pandas.sparse.api import SparseDataFrame df = SparseDataFrame({'A': [999, 1]}, default_fill_value=999) self.assertTrue(len(df['A'].sp_values) == len(df.iloc[:, 0].sp_values)) - def test_iget_value(self): - # 10711 deprecated - - with tm.assert_produces_warning(FutureWarning): - self.frame.iget_value(0, 0) + def test_iat(self): for i, row in enumerate(self.frame.index): for j, col in enumerate(self.frame.columns): diff --git a/pandas/tests/frame/test_nonunique_indexes.py b/pandas/tests/frame/test_nonunique_indexes.py index d6bcb85e01910..bb7c7c2bd012d 100644 --- a/pandas/tests/frame/test_nonunique_indexes.py +++ b/pandas/tests/frame/test_nonunique_indexes.py @@ -429,7 +429,7 @@ def test_columns_with_dups(self): self.assertEqual(len(df._data._blknos), len(df.columns)) self.assertEqual(len(df._data._blklocs), len(df.columns)) - # testing iget + # testing iloc for i in range(len(df.columns)): df.iloc[:, i] diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 59cbcab23b9e7..74e8c6c45946f 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -3828,20 +3828,6 @@ def test_groupby_whitelist(self): 'mad', 'std', 'var', 'sem'] AGG_FUNCTIONS_WITH_SKIPNA = ['skew', 'mad'] - def test_groupby_whitelist_deprecations(self): - from string import ascii_lowercase - letters = np.array(list(ascii_lowercase)) - N = 10 - random_letters = letters.take(np.random.randint(0, 26, N)) - df = DataFrame({'floats': N / 10 * Series(np.random.random(N)), - 'letters': Series(random_letters)}) - - # 10711 deprecated - with tm.assert_produces_warning(FutureWarning): - df.groupby('letters').irow(0) - with tm.assert_produces_warning(FutureWarning): - df.groupby('letters').floats.irow(0) - def test_regression_whitelist_methods(self): # GH6944 @@ -3917,7 +3903,7 @@ def test_tab_completion(self): 'first', 'get_group', 'groups', 'hist', 'indices', 'last', 'max', 'mean', 'median', 'min', 'name', 'ngroups', 'nth', 'ohlc', 'plot', 'prod', 'size', 'std', 'sum', 'transform', 'var', 'sem', 'count', - 'nunique', 'head', 'irow', 'describe', 'cummax', 'quantile', + 'nunique', 'head', 'describe', 'cummax', 'quantile', 'rank', 'cumprod', 'tail', 'resample', 'cummin', 'fillna', 'cumsum', 'cumcount', 'all', 'shift', 'skew', 'bfill', 'ffill', 'take', 'tshift', 'pct_change', 'any', 'mad', 'corr', 'corrwith', diff --git a/pandas/tests/series/test_indexing.py b/pandas/tests/series/test_indexing.py index 8a2cc53b42938..bb77550e01f11 100644 --- a/pandas/tests/series/test_indexing.py +++ b/pandas/tests/series/test_indexing.py @@ -164,22 +164,10 @@ def test_getitem_get(self): result = s.get(None) self.assertIsNone(result) - def test_iget(self): + def test_iloc(self): s = Series(np.random.randn(10), index=lrange(0, 20, 2)) - # 10711, deprecated - with tm.assert_produces_warning(FutureWarning): - s.iget(1) - - # 10711, deprecated - with tm.assert_produces_warning(FutureWarning): - s.irow(1) - - # 10711, deprecated - with tm.assert_produces_warning(FutureWarning): - s.iget_value(1) - for i in range(len(s)): result = s.iloc[i] exp = s[s.index[i]] @@ -199,7 +187,7 @@ def test_iget(self): expected = s.reindex(s.index[[0, 2, 3, 4, 5]]) assert_series_equal(result, expected) - def test_iget_nonunique(self): + def test_iloc_nonunique(self): s = Series([0, 1, 2], index=[0, 1, 0]) self.assertEqual(s.iloc[2], 2) diff --git a/pandas/tests/sparse/test_frame.py b/pandas/tests/sparse/test_frame.py index e3b865492c043..b2283364a1631 100644 --- a/pandas/tests/sparse/test_frame.py +++ b/pandas/tests/sparse/test_frame.py @@ -389,8 +389,7 @@ def test_getitem(self): self.assertRaises(Exception, sdf.__getitem__, ['a', 'd']) - def test_icol(self): - # 10711 deprecated + def test_iloc(self): # 2227 result = self.frame.iloc[:, 0] From d92a75962b6b772f0befb70762cedcfbf7aecb6e Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Thu, 2 Mar 2017 08:00:14 -0500 Subject: [PATCH 124/933] DOC: revert gbq doc-strings to be in-line rather than wrapped --- pandas/core/frame.py | 35 ++++++++++++-------- pandas/io/gbq.py | 76 ++++++++++++++++++++++++++++++++++++++------ 2 files changed, 87 insertions(+), 24 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 0d14f00bee508..ff5dcb3f544ec 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -78,7 +78,7 @@ from pandas import compat from pandas.compat.numpy import function as nv from pandas.util.decorators import (deprecate_kwarg, Appender, - Substitution, docstring_wrapper) + Substitution) from pandas.util.validators import validate_bool_kwarg from pandas.tseries.period import PeriodIndex @@ -908,7 +908,26 @@ def to_gbq(self, destination_table, project_id, chunksize=10000, verbose=True, reauth=False, if_exists='fail', private_key=None): """Write a DataFrame to a Google BigQuery table. - THIS IS AN EXPERIMENTAL LIBRARY + The main method a user calls to export pandas DataFrame contents to + Google BigQuery table. + + Google BigQuery API Client Library v2 for Python is used. + Documentation is available `here + `__ + + Authentication to the Google BigQuery service is via OAuth 2.0. + + - If "private_key" is not provided: + + By default "application default credentials" are used. + + If default application credentials are not found or are restrictive, + user account credentials are used. In this case, you will be asked to + grant permissions for product name 'pandas GBQ'. + + - If "private_key" is provided: + + Service account credentials will be used to authenticate. Parameters ---------- @@ -933,8 +952,6 @@ def to_gbq(self, destination_table, project_id, chunksize=10000, Service account private key in JSON format. Can be file path or string contents. This is useful for remote server authentication (eg. jupyter iPython notebook on remote host) - - .. versionadded:: 0.17.0 """ from pandas.io import gbq @@ -5402,16 +5419,6 @@ def combineMult(self, other): _EMPTY_SERIES = Series([]) -# patch in the doc-string for to_gbq -# and bind this method -def _f(): - from pandas.io.gbq import _try_import - return _try_import().to_gbq.__doc__ - - -DataFrame.to_gbq = docstring_wrapper(DataFrame.to_gbq, _f) - - def _arrays_to_mgr(arrays, arr_names, index, columns, dtype=None): """ Segregate Series based on type and coerce into matrices. diff --git a/pandas/io/gbq.py b/pandas/io/gbq.py index 3407f51af5e83..9cfb27a92bfef 100644 --- a/pandas/io/gbq.py +++ b/pandas/io/gbq.py @@ -1,7 +1,5 @@ """ Google BigQuery support """ -from pandas.util.decorators import docstring_wrapper - def _try_import(): # since pandas is a dependency of pandas-gbq @@ -25,6 +23,72 @@ def _try_import(): def read_gbq(query, project_id=None, index_col=None, col_order=None, reauth=False, verbose=True, private_key=None, dialect='legacy', **kwargs): + r"""Load data from Google BigQuery. + + The main method a user calls to execute a Query in Google BigQuery + and read results into a pandas DataFrame. + + Google BigQuery API Client Library v2 for Python is used. + Documentation is available `here + `__ + + Authentication to the Google BigQuery service is via OAuth 2.0. + + - If "private_key" is not provided: + + By default "application default credentials" are used. + + If default application credentials are not found or are restrictive, + user account credentials are used. In this case, you will be asked to + grant permissions for product name 'pandas GBQ'. + + - If "private_key" is provided: + + Service account credentials will be used to authenticate. + + Parameters + ---------- + query : str + SQL-Like Query to return data values + project_id : str + Google BigQuery Account project ID. + index_col : str (optional) + Name of result column to use for index in results DataFrame + col_order : list(str) (optional) + List of BigQuery column names in the desired order for results + DataFrame + reauth : boolean (default False) + Force Google BigQuery to reauthenticate the user. This is useful + if multiple accounts are used. + verbose : boolean (default True) + Verbose output + private_key : str (optional) + Service account private key in JSON format. Can be file path + or string contents. This is useful for remote server + authentication (eg. jupyter iPython notebook on remote host) + + dialect : {'legacy', 'standard'}, default 'legacy' + 'legacy' : Use BigQuery's legacy SQL dialect. + 'standard' : Use BigQuery's standard SQL (beta), which is + compliant with the SQL 2011 standard. For more information + see `BigQuery SQL Reference + `__ + + **kwargs : Arbitrary keyword arguments + configuration (dict): query config parameters for job processing. + For example: + + configuration = {'query': {'useQueryCache': False}} + + For more information see `BigQuery SQL Reference + `__ + + Returns + ------- + df: DataFrame + DataFrame representing results of query + + """ pandas_gbq = _try_import() return pandas_gbq.read_gbq( query, project_id=project_id, @@ -35,10 +99,6 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None, **kwargs) -read_gbq = docstring_wrapper(read_gbq, - lambda: _try_import().read_gbq.__doc__) - - def to_gbq(dataframe, destination_table, project_id, chunksize=10000, verbose=True, reauth=False, if_exists='fail', private_key=None): pandas_gbq = _try_import() @@ -46,7 +106,3 @@ def to_gbq(dataframe, destination_table, project_id, chunksize=10000, chunksize=chunksize, verbose=verbose, reauth=reauth, if_exists=if_exists, private_key=private_key) - - -to_gbq = docstring_wrapper(to_gbq, - lambda: _try_import().to_gbq.__doc__) From 37fe2c4edddbec2c08d561667897a1ef5a18771c Mon Sep 17 00:00:00 2001 From: Ben Thayer Date: Thu, 2 Mar 2017 08:16:48 -0500 Subject: [PATCH 125/933] ENH: Added FrozenList difference setop closes #15475 Author: Ben Thayer Author: bthayer2365 Closes #15506 from bthayer2365/frozen-index and squashes the following commits: 428a1b3 [Ben Thayer] Added __iadd__ test, fixed whatsnew 84ba405 [Ben Thayer] Merge branch 'master' of github.com:pandas-dev/pandas into frozen-index 8dbde1e [Ben Thayer] Rebased to upstream/master 6f6c140 [Ben Thayer] Added docstrings, depricated __iadd__, changed __add__ to use self.union() 66b3b91 [Ben Thayer] Fixed issue number 3d6cee5 [Ben Thayer] Depricated __add__ in favor of union ccd75c7 [Ben Thayer] Changed __sub__ to difference cd7de26 [Ben Thayer] Added versionadded tag in docs and renamed test_inplace to test_inplace_add for consistency 0ea8d21 [Ben Thayer] Added __isub__ and groupby example to docs 79dd958 [Ben Thayer] Updated whatsnew to reflect changes 0fc7e19 [Ben Thayer] Removed whitespace 73564ab [Ben Thayer] Added FrozenList subtraction fee7a7d [bthayer2365] Merge branch 'master' into frozen-index 6a2b48d [Ben Thayer] Added docstrings, depricated __iadd__, changed __add__ to use self.union() 2ab85cb [Ben Thayer] Fixed issue number cb95089 [Ben Thayer] Depricated __add__ in favor of union 2e43849 [Ben Thayer] Changed __sub__ to difference fdcfbbb [Ben Thayer] Added versionadded tag in docs and renamed test_inplace to test_inplace_add for consistency 2fad2f7 [Ben Thayer] Added __isub__ and groupby example to docs cd73faa [Ben Thayer] Updated whatsnew to reflect changes f6381a8 [Ben Thayer] Removed whitespace ada7cda [Ben Thayer] Added FrozenList subtraction --- doc/source/groupby.rst | 10 +++++++++ doc/source/whatsnew/v0.20.0.txt | 2 ++ pandas/indexes/frozen.py | 24 ++++++++++++++++++--- pandas/tests/indexes/test_frozen.py | 33 +++++++++++++++++++++-------- 4 files changed, 57 insertions(+), 12 deletions(-) diff --git a/doc/source/groupby.rst b/doc/source/groupby.rst index 8484ccd69a983..2d406de7c0c9b 100644 --- a/doc/source/groupby.rst +++ b/doc/source/groupby.rst @@ -126,6 +126,16 @@ We could naturally group by either the ``A`` or ``B`` columns or both: grouped = df.groupby('A') grouped = df.groupby(['A', 'B']) +.. versionadded:: 0.20 + +If we also have a MultiIndex on columns ``A`` and ``B``, we can group by all +but the specified columns. + +.. ipython:: python + + df2 = df.set_index(['A', 'B']) + grouped = df2.groupby(level=df2.index.names.difference(['B']) + These will split the DataFrame on its index (rows). We could also split by the columns: diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index dc8420080b50d..cc33a4a7ce6c6 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -28,6 +28,7 @@ New features - Integration with the ``feather-format``, including a new top-level ``pd.read_feather()`` and ``DataFrame.to_feather()`` method, see :ref:`here `. - ``.str.replace`` now accepts a callable, as replacement, which is passed to ``re.sub`` (:issue:`15055`) +- ``FrozenList`` has gained the ``.difference()`` setop method (:issue:`15475`) @@ -534,6 +535,7 @@ Deprecations - ``Series.sortlevel`` and ``DataFrame.sortlevel`` have been deprecated in favor of ``Series.sort_index`` and ``DataFrame.sort_index`` (:issue:`15099`) - importing ``concat`` from ``pandas.tools.merge`` has been deprecated in favor of imports from the ``pandas`` namespace. This should only affect explict imports (:issue:`15358`) - ``Series/DataFrame/Panel.consolidate()`` been deprecated as a public method. (:issue:`15483`) +- ``FrozenList`` addition (new object and inplace) have been deprecated in favor of the ``.union()`` method. (:issue: `15475`) .. _whatsnew_0200.prior_deprecations: diff --git a/pandas/indexes/frozen.py b/pandas/indexes/frozen.py index e043ba64bbad7..47e2557333ec7 100644 --- a/pandas/indexes/frozen.py +++ b/pandas/indexes/frozen.py @@ -13,6 +13,8 @@ from pandas.types.cast import _coerce_indexer_dtype from pandas.formats.printing import pprint_thing +import warnings + class FrozenList(PandasObject, list): @@ -25,11 +27,14 @@ class FrozenList(PandasObject, list): # typechecks def __add__(self, other): + warnings.warn("__add__ is deprecated, use union(...)", FutureWarning) + return self.union(other) + + def __iadd__(self, other): + warnings.warn("__iadd__ is deprecated, use union(...)", FutureWarning) if isinstance(other, tuple): other = list(other) - return self.__class__(super(FrozenList, self).__add__(other)) - - __iadd__ = __add__ + return super(FrozenList, self).__iadd__(other) # Python 2 compat def __getslice__(self, i, j): @@ -80,6 +85,19 @@ def __repr__(self): __setitem__ = __setslice__ = __delitem__ = __delslice__ = _disabled pop = append = extend = remove = sort = insert = _disabled + def union(self, other): + """Returns a FrozenList with other concatenated to the end of self""" + if isinstance(other, tuple): + other = list(other) + return self.__class__(super(FrozenList, self).__add__(other)) + + def difference(self, other): + """Returns a FrozenList with the same elements as self, but with elements + that are also in other removed.""" + other = set(other) + temp = [x for x in self if x not in other] + return self.__class__(temp) + class FrozenNDArray(PandasObject, np.ndarray): diff --git a/pandas/tests/indexes/test_frozen.py b/pandas/tests/indexes/test_frozen.py index a82409fbf9513..a5fbf066adc83 100644 --- a/pandas/tests/indexes/test_frozen.py +++ b/pandas/tests/indexes/test_frozen.py @@ -15,20 +15,35 @@ def setUp(self): self.klass = FrozenList def test_add(self): - result = self.container + (1, 2, 3) + q = FrozenList([1]) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + q = q + [2, 3] + expected = FrozenList([1, 2, 3]) + self.check_result(q, expected) + + def test_iadd(self): + q = FrozenList([1]) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + q += [2, 3] + expected = FrozenList([1, 2, 3]) + self.check_result(q, expected) + + def test_union(self): + result = self.container.union((1, 2, 3)) expected = FrozenList(self.lst + [1, 2, 3]) self.check_result(result, expected) - result = (1, 2, 3) + self.container - expected = FrozenList([1, 2, 3] + self.lst) + def test_difference(self): + result = self.container.difference([2]) + expected = FrozenList([1, 3, 4, 5]) self.check_result(result, expected) - def test_inplace(self): - q = r = self.container - q += [5] - self.check_result(q, self.lst + [5]) - # other shouldn't be mutated - self.check_result(r, self.lst) + def test_difference_dupe(self): + result = FrozenList([1, 2, 3, 2]).difference([2]) + expected = FrozenList([1, 3]) + self.check_result(result, expected) class TestFrozenNDArray(CheckImmutable, CheckStringMixin, tm.TestCase): From f000a4eac361737c6524ca2273c158e8d3b04ab2 Mon Sep 17 00:00:00 2001 From: Amol Kahat Date: Thu, 2 Mar 2017 08:33:42 -0500 Subject: [PATCH 126/933] BUG: Fix index for datetime64 conversion. Fixes #13937 closes #13937 Author: Amol Kahat Closes #14446 from amolkahat/bug_fixes and squashes the following commits: 3806983 [Amol Kahat] Modify test cases. --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/core/frame.py | 4 ++-- pandas/tests/frame/test_convert_to.py | 24 ++++++++++++++++++------ 3 files changed, 21 insertions(+), 8 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index cc33a4a7ce6c6..dca4f890e496b 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -612,6 +612,7 @@ Bug Fixes - Bug in ``GroupBy.get_group()`` failing with a categorical grouper (:issue:`15155`) - Bug in ``pandas.tools.utils.cartesian_product()`` with large input can cause overflow on windows (:issue:`15265`) +- Bug in ``DataFrame.to_records()`` with converting a ``DatetimeIndex`` with a timezone (:issue:`13937`) - Bug in ``.groupby(...).rolling(...)`` when ``on`` is specified and using a ``DatetimeIndex`` (:issue:`15130`) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index ff5dcb3f544ec..26a0a91094e7d 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -36,7 +36,7 @@ is_object_dtype, is_extension_type, is_datetimetz, - is_datetime64_dtype, + is_datetime64_any_dtype, is_datetime64tz_dtype, is_bool_dtype, is_integer_dtype, @@ -1103,7 +1103,7 @@ def to_records(self, index=True, convert_datetime64=True): y : recarray """ if index: - if is_datetime64_dtype(self.index) and convert_datetime64: + if is_datetime64_any_dtype(self.index) and convert_datetime64: ix_vals = [self.index.to_pydatetime()] else: if isinstance(self.index, MultiIndex): diff --git a/pandas/tests/frame/test_convert_to.py b/pandas/tests/frame/test_convert_to.py index 0dde113dd5147..8323d5ed9069f 100644 --- a/pandas/tests/frame/test_convert_to.py +++ b/pandas/tests/frame/test_convert_to.py @@ -1,8 +1,6 @@ # -*- coding: utf-8 -*- -from __future__ import print_function - -from numpy import nan +import pytest import numpy as np from pandas import compat @@ -10,7 +8,6 @@ date_range) import pandas.util.testing as tm - from pandas.tests.frame.common import TestData @@ -41,13 +38,13 @@ def test_to_dict(self): recons_data = DataFrame(test_data).to_dict("sp") expected_split = {'columns': ['A', 'B'], 'index': ['1', '2', '3'], - 'data': [[1.0, '1'], [2.0, '2'], [nan, '3']]} + 'data': [[1.0, '1'], [2.0, '2'], [np.nan, '3']]} tm.assert_dict_equal(recons_data, expected_split) recons_data = DataFrame(test_data).to_dict("r") expected_records = [{'A': 1.0, 'B': '1'}, {'A': 2.0, 'B': '2'}, - {'A': nan, 'B': '3'}] + {'A': np.nan, 'B': '3'}] tm.assertIsInstance(recons_data, list) self.assertEqual(len(recons_data), 3) for l, r in zip(recons_data, expected_records): @@ -192,3 +189,18 @@ def test_to_records_with_unicode_column_names(self): "formats": [' Date: Thu, 2 Mar 2017 09:23:58 -0500 Subject: [PATCH 127/933] TST: remove deprecated usages of FrozenList.__add__ from test code xref #15506 --- pandas/core/panel.py | 6 +++--- pandas/core/reshape.py | 6 +++--- pandas/core/strings.py | 2 +- pandas/tests/groupby/test_value_counts.py | 2 +- pandas/tools/concat.py | 2 +- test_fast.sh | 2 +- 6 files changed, 10 insertions(+), 10 deletions(-) diff --git a/pandas/core/panel.py b/pandas/core/panel.py index 4a6c6cf291316..c5ea513223dce 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -940,9 +940,9 @@ def construct_index_parts(idx, major=True): minor_labels, minor_levels, minor_names = construct_index_parts( self.minor_axis, major=False) - levels = major_levels + minor_levels - labels = major_labels + minor_labels - names = major_names + minor_names + levels = list(major_levels) + list(minor_levels) + labels = list(major_labels) + list(minor_labels) + names = list(major_names) + list(minor_names) index = MultiIndex(levels=levels, labels=labels, names=names, verify_integrity=False) diff --git a/pandas/core/reshape.py b/pandas/core/reshape.py index 87cb088c2e91e..faad6c500a21f 100644 --- a/pandas/core/reshape.py +++ b/pandas/core/reshape.py @@ -216,8 +216,8 @@ def get_new_columns(self): width = len(self.value_columns) propagator = np.repeat(np.arange(width), stride) if isinstance(self.value_columns, MultiIndex): - new_levels = self.value_columns.levels + (self.removed_level,) - new_names = self.value_columns.names + (self.removed_name,) + new_levels = self.value_columns.levels.union((self.removed_level,)) + new_names = self.value_columns.names.union((self.removed_name,)) new_labels = [lab.take(propagator) for lab in self.value_columns.labels] @@ -806,7 +806,7 @@ def melt(frame, id_vars=None, value_vars=None, var_name=None, for col in id_vars: mdata[col] = np.tile(frame.pop(col).values, K) - mcolumns = id_vars + var_name + [value_name] + mcolumns = list(id_vars) + list(var_name) + list([value_name]) mdata[value_name] = frame.values.ravel('F') for i, col in enumerate(var_name): diff --git a/pandas/core/strings.py b/pandas/core/strings.py index ac8d1db6a0bf3..51016926d6909 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -787,7 +787,7 @@ def str_extractall(arr, pat, flags=0): if 0 < len(index_list): from pandas import MultiIndex index = MultiIndex.from_tuples( - index_list, names=arr.index.names + ["match"]) + index_list, names=arr.index.names.union(["match"])) else: index = None result = arr._constructor_expanddim(match_list, index=index, diff --git a/pandas/tests/groupby/test_value_counts.py b/pandas/tests/groupby/test_value_counts.py index 801d0da070112..ff01df2693c7c 100644 --- a/pandas/tests/groupby/test_value_counts.py +++ b/pandas/tests/groupby/test_value_counts.py @@ -28,7 +28,7 @@ def check_value_counts(df, keys, bins): gr = df.groupby(keys, sort=isort) right = gr['3rd'].apply(Series.value_counts, **kwargs) - right.index.names = right.index.names[:-1] + ['3rd'] + right.index.names = right.index.names[:-1].union(['3rd']) # have to sort on index because of unstable sort on values left, right = map(rebuild_index, (left, right)) # xref GH9212 diff --git a/pandas/tools/concat.py b/pandas/tools/concat.py index 6405106118472..ae9d7af9d98ff 100644 --- a/pandas/tools/concat.py +++ b/pandas/tools/concat.py @@ -574,7 +574,7 @@ def _make_concat_multiindex(indexes, keys, levels=None, names=None): " not have the same number of levels") # also copies - names = names + _get_consensus_names(indexes) + names = list(names) + list(_get_consensus_names(indexes)) return MultiIndex(levels=levels, labels=label_list, names=names, verify_integrity=False) diff --git a/test_fast.sh b/test_fast.sh index 30ac7f84cbe8b..f22ab73277e8b 100755 --- a/test_fast.sh +++ b/test_fast.sh @@ -5,4 +5,4 @@ # https://github.com/pytest-dev/pytest/issues/1075 export PYTHONHASHSEED=$(python -c 'import random; print(random.randint(1, 4294967295))') -pytest pandas --skip-slow --skip-network -m "not single" -n 4 +pytest pandas --skip-slow --skip-network -m "not single" -n 4 $@ From 211ecd5d829e6ff9019261680f1d4e6f1b193a13 Mon Sep 17 00:00:00 2001 From: manuels Date: Thu, 2 Mar 2017 18:20:40 -0500 Subject: [PATCH 128/933] Make Series.map() documentation a bit more verbose Author: manuels Closes #15235 from manuels/patch-1 and squashes the following commits: c5113f2 [manuels] Make Series.map() documentation a bit more verbose --- pandas/core/series.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 1114590421fd8..626a4a81193cc 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2089,13 +2089,15 @@ def map(self, arg, na_action=None): Examples -------- - Map inputs to outputs + Map inputs to outputs (both of type `Series`) + >>> x = pd.Series([1,2,3], index=['one', 'two', 'three']) >>> x one 1 two 2 three 3 + >>> y = pd.Series(['foo', 'bar', 'baz'], index=[1,2,3]) >>> y 1 foo 2 bar @@ -2106,6 +2108,16 @@ def map(self, arg, na_action=None): two bar three baz + Mapping a dictionary keys on the index labels works similar as + with a `Series`: + + >>> z = {1: 'A', 2: 'B', 3: 'C'} + + >>> x.map(z) + one A + two B + three C + Use na_action to control whether NA values are affected by the mapping function. @@ -2127,6 +2139,11 @@ def map(self, arg, na_action=None): 3 NaN dtype: object + See Also + -------- + Series.apply: For applying more complex functions on a Series + DataFrame.apply: Apply a function row-/column-wise + DataFrame.applymap: Apply a function elementwise on a whole DataFrame """ if is_extension_type(self.dtype): From 24a2155eec4a24242cdecd9ddd7e61d02d8d6aeb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20Gs=C3=A4nger?= Date: Fri, 3 Mar 2017 10:16:45 +0100 Subject: [PATCH 129/933] ENH: Added multicolumn/multirow support for latex (#14184) closes #13508 Print names of MultiIndex columns. Added "multicolumn" and "multirow" flags to to_latex which trigger the corresponding feature. "multicolumn_format" is used to select alignment. Multirow adds clines to visually separate sections. --- doc/source/options.rst | 295 +++++++++++++------------- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/core/config_init.py | 31 ++- pandas/core/frame.py | 46 +++- pandas/formats/format.py | 115 +++++++++- pandas/tests/formats/test_to_latex.py | 115 +++++++++- 6 files changed, 433 insertions(+), 170 deletions(-) diff --git a/doc/source/options.rst b/doc/source/options.rst index 77cac6d495d13..10a13ed36df8d 100644 --- a/doc/source/options.rst +++ b/doc/source/options.rst @@ -273,151 +273,156 @@ Options are 'right', and 'left'. Available Options ----------------- -========================== ============ ================================== -Option Default Function -========================== ============ ================================== -display.chop_threshold None If set to a float value, all float - values smaller then the given - threshold will be displayed as - exactly 0 by repr and friends. -display.colheader_justify right Controls the justification of - column headers. used by DataFrameFormatter. -display.column_space 12 No description available. -display.date_dayfirst False When True, prints and parses dates - with the day first, eg 20/01/2005 -display.date_yearfirst False When True, prints and parses dates - with the year first, eg 2005/01/20 -display.encoding UTF-8 Defaults to the detected encoding - of the console. Specifies the encoding - to be used for strings returned by - to_string, these are generally strings - meant to be displayed on the console. -display.expand_frame_repr True Whether to print out the full DataFrame - repr for wide DataFrames across - multiple lines, `max_columns` is - still respected, but the output will - wrap-around across multiple "pages" - if its width exceeds `display.width`. -display.float_format None The callable should accept a floating - point number and return a string with - the desired format of the number. - This is used in some places like - SeriesFormatter. - See core.format.EngFormatter for an example. -display.height 60 Deprecated. Use `display.max_rows` instead. -display.large_repr truncate For DataFrames exceeding max_rows/max_cols, - the repr (and HTML repr) can show - a truncated table (the default from 0.13), - or switch to the view from df.info() - (the behaviour in earlier versions of pandas). - allowable settings, ['truncate', 'info'] -display.latex.repr False Whether to produce a latex DataFrame - representation for jupyter frontends - that support it. -display.latex.escape True Escapes special caracters in Dataframes, when - using the to_latex method. -display.latex.longtable False Specifies if the to_latex method of a Dataframe - uses the longtable format. -display.line_width 80 Deprecated. Use `display.width` instead. -display.max_columns 20 max_rows and max_columns are used - in __repr__() methods to decide if - to_string() or info() is used to - render an object to a string. In - case python/IPython is running in - a terminal this can be set to 0 and - pandas will correctly auto-detect - the width the terminal and swap to - a smaller format in case all columns - would not fit vertically. The IPython - notebook, IPython qtconsole, or IDLE - do not run in a terminal and hence - it is not possible to do correct - auto-detection. 'None' value means - unlimited. -display.max_colwidth 50 The maximum width in characters of - a column in the repr of a pandas - data structure. When the column overflows, - a "..." placeholder is embedded in - the output. -display.max_info_columns 100 max_info_columns is used in DataFrame.info - method to decide if per column information - will be printed. -display.max_info_rows 1690785 df.info() will usually show null-counts - for each column. For large frames - this can be quite slow. max_info_rows - and max_info_cols limit this null - check only to frames with smaller - dimensions then specified. -display.max_rows 60 This sets the maximum number of rows - pandas should output when printing - out various output. For example, - this value determines whether the - repr() for a dataframe prints out - fully or just a summary repr. - 'None' value means unlimited. -display.max_seq_items 100 when pretty-printing a long sequence, - no more then `max_seq_items` will - be printed. If items are omitted, - they will be denoted by the addition - of "..." to the resulting string. - If set to None, the number of items - to be printed is unlimited. -display.memory_usage True This specifies if the memory usage of - a DataFrame should be displayed when the - df.info() method is invoked. -display.multi_sparse True "Sparsify" MultiIndex display (don't - display repeated elements in outer - levels within groups) -display.notebook_repr_html True When True, IPython notebook will - use html representation for - pandas objects (if it is available). -display.pprint_nest_depth 3 Controls the number of nested levels - to process when pretty-printing -display.precision 6 Floating point output precision in - terms of number of places after the - decimal, for regular formatting as well - as scientific notation. Similar to - numpy's ``precision`` print option -display.show_dimensions truncate Whether to print out dimensions - at the end of DataFrame repr. - If 'truncate' is specified, only - print out the dimensions if the - frame is truncated (e.g. not display - all rows and/or columns) -display.width 80 Width of the display in characters. - In case python/IPython is running in - a terminal this can be set to None - and pandas will correctly auto-detect - the width. Note that the IPython notebook, - IPython qtconsole, or IDLE do not run in a - terminal and hence it is not possible - to correctly detect the width. -html.border 1 A ``border=value`` attribute is - inserted in the ```` tag - for the DataFrame HTML repr. -io.excel.xls.writer xlwt The default Excel writer engine for - 'xls' files. -io.excel.xlsm.writer openpyxl The default Excel writer engine for - 'xlsm' files. Available options: - 'openpyxl' (the default). -io.excel.xlsx.writer openpyxl The default Excel writer engine for - 'xlsx' files. -io.hdf.default_format None default format writing format, if - None, then put will default to - 'fixed' and append will default to - 'table' -io.hdf.dropna_table True drop ALL nan rows when appending - to a table -mode.chained_assignment warn Raise an exception, warn, or no - action if trying to use chained - assignment, The default is warn -mode.sim_interactive False Whether to simulate interactive mode - for purposes of testing -mode.use_inf_as_null False True means treat None, NaN, -INF, - INF as null (old way), False means - None and NaN are null, but INF, -INF - are not null (new way). -========================== ============ ================================== +=================================== ============ ================================== +Option Default Function +=================================== ============ ================================== +display.chop_threshold None If set to a float value, all float + values smaller then the given + threshold will be displayed as + exactly 0 by repr and friends. +display.colheader_justify right Controls the justification of + column headers. used by DataFrameFormatter. +display.column_space 12 No description available. +display.date_dayfirst False When True, prints and parses dates + with the day first, eg 20/01/2005 +display.date_yearfirst False When True, prints and parses dates + with the year first, eg 2005/01/20 +display.encoding UTF-8 Defaults to the detected encoding + of the console. Specifies the encoding + to be used for strings returned by + to_string, these are generally strings + meant to be displayed on the console. +display.expand_frame_repr True Whether to print out the full DataFrame + repr for wide DataFrames across + multiple lines, `max_columns` is + still respected, but the output will + wrap-around across multiple "pages" + if its width exceeds `display.width`. +display.float_format None The callable should accept a floating + point number and return a string with + the desired format of the number. + This is used in some places like + SeriesFormatter. + See core.format.EngFormatter for an example. +display.height 60 Deprecated. Use `display.max_rows` instead. +display.large_repr truncate For DataFrames exceeding max_rows/max_cols, + the repr (and HTML repr) can show + a truncated table (the default from 0.13), + or switch to the view from df.info() + (the behaviour in earlier versions of pandas). + allowable settings, ['truncate', 'info'] +display.latex.repr False Whether to produce a latex DataFrame + representation for jupyter frontends + that support it. +display.latex.escape True Escapes special caracters in Dataframes, when + using the to_latex method. +display.latex.longtable False Specifies if the to_latex method of a Dataframe + uses the longtable format. +display.latex.multicolumn True Combines columns when using a MultiIndex +display.latex.multicolumn_format 'l' Alignment of multicolumn labels +display.latex.multirow False Combines rows when using a MultiIndex. + Centered instead of top-aligned, + separated by clines. +display.line_width 80 Deprecated. Use `display.width` instead. +display.max_columns 20 max_rows and max_columns are used + in __repr__() methods to decide if + to_string() or info() is used to + render an object to a string. In + case python/IPython is running in + a terminal this can be set to 0 and + pandas will correctly auto-detect + the width the terminal and swap to + a smaller format in case all columns + would not fit vertically. The IPython + notebook, IPython qtconsole, or IDLE + do not run in a terminal and hence + it is not possible to do correct + auto-detection. 'None' value means + unlimited. +display.max_colwidth 50 The maximum width in characters of + a column in the repr of a pandas + data structure. When the column overflows, + a "..." placeholder is embedded in + the output. +display.max_info_columns 100 max_info_columns is used in DataFrame.info + method to decide if per column information + will be printed. +display.max_info_rows 1690785 df.info() will usually show null-counts + for each column. For large frames + this can be quite slow. max_info_rows + and max_info_cols limit this null + check only to frames with smaller + dimensions then specified. +display.max_rows 60 This sets the maximum number of rows + pandas should output when printing + out various output. For example, + this value determines whether the + repr() for a dataframe prints out + fully or just a summary repr. + 'None' value means unlimited. +display.max_seq_items 100 when pretty-printing a long sequence, + no more then `max_seq_items` will + be printed. If items are omitted, + they will be denoted by the addition + of "..." to the resulting string. + If set to None, the number of items + to be printed is unlimited. +display.memory_usage True This specifies if the memory usage of + a DataFrame should be displayed when the + df.info() method is invoked. +display.multi_sparse True "Sparsify" MultiIndex display (don't + display repeated elements in outer + levels within groups) +display.notebook_repr_html True When True, IPython notebook will + use html representation for + pandas objects (if it is available). +display.pprint_nest_depth 3 Controls the number of nested levels + to process when pretty-printing +display.precision 6 Floating point output precision in + terms of number of places after the + decimal, for regular formatting as well + as scientific notation. Similar to + numpy's ``precision`` print option +display.show_dimensions truncate Whether to print out dimensions + at the end of DataFrame repr. + If 'truncate' is specified, only + print out the dimensions if the + frame is truncated (e.g. not display + all rows and/or columns) +display.width 80 Width of the display in characters. + In case python/IPython is running in + a terminal this can be set to None + and pandas will correctly auto-detect + the width. Note that the IPython notebook, + IPython qtconsole, or IDLE do not run in a + terminal and hence it is not possible + to correctly detect the width. +html.border 1 A ``border=value`` attribute is + inserted in the ``
`` tag + for the DataFrame HTML repr. +io.excel.xls.writer xlwt The default Excel writer engine for + 'xls' files. +io.excel.xlsm.writer openpyxl The default Excel writer engine for + 'xlsm' files. Available options: + 'openpyxl' (the default). +io.excel.xlsx.writer openpyxl The default Excel writer engine for + 'xlsx' files. +io.hdf.default_format None default format writing format, if + None, then put will default to + 'fixed' and append will default to + 'table' +io.hdf.dropna_table True drop ALL nan rows when appending + to a table +mode.chained_assignment warn Raise an exception, warn, or no + action if trying to use chained + assignment, The default is warn +mode.sim_interactive False Whether to simulate interactive mode + for purposes of testing +mode.use_inf_as_null False True means treat None, NaN, -INF, + INF as null (old way), False means + None and NaN are null, but INF, -INF + are not null (new way). +=================================== ============ ================================== .. _basics.console_output: diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index dca4f890e496b..0991f3873b06f 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -182,6 +182,7 @@ Other enhancements - ``Timedelta.isoformat`` method added for formatting Timedeltas as an `ISO 8601 duration`_. See the :ref:`Timedelta docs ` (:issue:`15136`) - ``pandas.io.json.json_normalize()`` gained the option ``errors='ignore'|'raise'``; the default is ``errors='raise'`` which is backward compatible. (:issue:`14583`) - ``.select_dtypes()`` now allows the string 'datetimetz' to generically select datetimes with tz (:issue:`14910`) +- The ``.to_latex()`` method will now accept ``multicolumn`` and ``multirow`` arguments to use the accompanying LaTeX enhancements - ``pd.merge_asof()`` gained the option ``direction='backward'|'forward'|'nearest'`` (:issue:`14887`) - ``Series/DataFrame.asfreq()`` have gained a ``fill_value`` parameter, to fill missing values (:issue:`3715`). - ``Series/DataFrame.resample.asfreq`` have gained a ``fill_value`` parameter, to fill missing values during resampling (:issue:`3715`). diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py index d3db633f3aa04..89616890e1de1 100644 --- a/pandas/core/config_init.py +++ b/pandas/core/config_init.py @@ -239,14 +239,35 @@ : bool This specifies if the to_latex method of a Dataframe uses escapes special characters. - method. Valid values: False,True + Valid values: False,True """ pc_latex_longtable = """ :bool This specifies if the to_latex method of a Dataframe uses the longtable format. - method. Valid values: False,True + Valid values: False,True +""" + +pc_latex_multicolumn = """ +: bool + This specifies if the to_latex method of a Dataframe uses multicolumns + to pretty-print MultiIndex columns. + Valid values: False,True +""" + +pc_latex_multicolumn_format = """ +: string + This specifies the format for multicolumn headers. + Can be surrounded with '|'. + Valid values: 'l', 'c', 'r', 'p{}' +""" + +pc_latex_multirow = """ +: bool + This specifies if the to_latex method of a Dataframe uses multirows + to pretty-print MultiIndex rows. + Valid values: False,True """ style_backup = dict() @@ -339,6 +360,12 @@ def mpl_style_cb(key): validator=is_bool) cf.register_option('latex.longtable', False, pc_latex_longtable, validator=is_bool) + cf.register_option('latex.multicolumn', True, pc_latex_multicolumn, + validator=is_bool) + cf.register_option('latex.multicolumn_format', 'l', pc_latex_multicolumn, + validator=is_text) + cf.register_option('latex.multirow', False, pc_latex_multirow, + validator=is_bool) cf.deprecate_option('display.line_width', msg=pc_line_width_deprecation_warning, diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 26a0a91094e7d..b3e43edc3eb55 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1614,10 +1614,11 @@ def to_latex(self, buf=None, columns=None, col_space=None, header=True, index=True, na_rep='NaN', formatters=None, float_format=None, sparsify=None, index_names=True, bold_rows=True, column_format=None, longtable=None, escape=None, - encoding=None, decimal='.'): - """ + encoding=None, decimal='.', multicolumn=None, + multicolumn_format=None, multirow=None): + r""" Render a DataFrame to a tabular environment table. You can splice - this into a LaTeX document. Requires \\usepackage{booktabs}. + this into a LaTeX document. Requires \usepackage{booktabs}. `to_latex`-specific options: @@ -1628,27 +1629,54 @@ def to_latex(self, buf=None, columns=None, col_space=None, header=True, `__ e.g 'rcl' for 3 columns longtable : boolean, default will be read from the pandas config module - default: False + Default: False. Use a longtable environment instead of tabular. Requires adding - a \\usepackage{longtable} to your LaTeX preamble. + a \usepackage{longtable} to your LaTeX preamble. escape : boolean, default will be read from the pandas config module - default: True + Default: True. When set to False prevents from escaping latex special characters in column names. encoding : str, default None A string representing the encoding to use in the output file, defaults to 'ascii' on Python 2 and 'utf-8' on Python 3. decimal : string, default '.' - Character recognized as decimal separator, e.g. ',' in Europe + Character recognized as decimal separator, e.g. ',' in Europe. .. versionadded:: 0.18.0 + multicolumn : boolean, default True + Use \multicolumn to enhance MultiIndex columns. + The default will be read from the config module. + + .. versionadded:: 0.20.0 + + multicolumn_format : str, default 'l' + The alignment for multicolumns, similar to `column_format` + The default will be read from the config module. + + .. versionadded:: 0.20.0 + + multirow : boolean, default False + Use \multirow to enhance MultiIndex rows. + Requires adding a \usepackage{multirow} to your LaTeX preamble. + Will print centered labels (instead of top-aligned) + across the contained rows, separating groups via clines. + The default will be read from the pandas config module. + + .. versionadded:: 0.20.0 + """ # Get defaults from the pandas config if longtable is None: longtable = get_option("display.latex.longtable") if escape is None: escape = get_option("display.latex.escape") + if multicolumn is None: + multicolumn = get_option("display.latex.multicolumn") + if multicolumn_format is None: + multicolumn_format = get_option("display.latex.multicolumn_format") + if multirow is None: + multirow = get_option("display.latex.multirow") formatter = fmt.DataFrameFormatter(self, buf=buf, columns=columns, col_space=col_space, na_rep=na_rep, @@ -1660,7 +1688,9 @@ def to_latex(self, buf=None, columns=None, col_space=None, header=True, index_names=index_names, escape=escape, decimal=decimal) formatter.to_latex(column_format=column_format, longtable=longtable, - encoding=encoding) + encoding=encoding, multicolumn=multicolumn, + multicolumn_format=multicolumn_format, + multirow=multirow) if buf is None: return formatter.buf.getvalue() diff --git a/pandas/formats/format.py b/pandas/formats/format.py index 4c081770e0125..9dde3b0001c31 100644 --- a/pandas/formats/format.py +++ b/pandas/formats/format.py @@ -650,13 +650,17 @@ def _join_multiline(self, *strcols): st = ed return '\n\n'.join(str_lst) - def to_latex(self, column_format=None, longtable=False, encoding=None): + def to_latex(self, column_format=None, longtable=False, encoding=None, + multicolumn=False, multicolumn_format=None, multirow=False): """ Render a DataFrame to a LaTeX tabular/longtable environment output. """ latex_renderer = LatexFormatter(self, column_format=column_format, - longtable=longtable) + longtable=longtable, + multicolumn=multicolumn, + multicolumn_format=multicolumn_format, + multirow=multirow) if encoding is None: encoding = 'ascii' if compat.PY2 else 'utf-8' @@ -824,11 +828,15 @@ class LatexFormatter(TableFormatter): HTMLFormatter """ - def __init__(self, formatter, column_format=None, longtable=False): + def __init__(self, formatter, column_format=None, longtable=False, + multicolumn=False, multicolumn_format=None, multirow=False): self.fmt = formatter self.frame = self.fmt.frame self.column_format = column_format self.longtable = longtable + self.multicolumn = multicolumn + self.multicolumn_format = multicolumn_format + self.multirow = multirow def write_result(self, buf): """ @@ -850,14 +858,21 @@ def get_col_type(dtype): else: return 'l' + # reestablish the MultiIndex that has been joined by _to_str_column if self.fmt.index and isinstance(self.frame.index, MultiIndex): clevels = self.frame.columns.nlevels strcols.pop(0) name = any(self.frame.index.names) + cname = any(self.frame.columns.names) + lastcol = self.frame.index.nlevels - 1 for i, lev in enumerate(self.frame.index.levels): lev2 = lev.format() blank = ' ' * len(lev2[0]) - lev3 = [blank] * clevels + # display column names in last index-column + if cname and i == lastcol: + lev3 = [x if x else '{}' for x in self.frame.columns.names] + else: + lev3 = [blank] * clevels if name: lev3.append(lev.name) for level_idx, group in itertools.groupby( @@ -885,10 +900,15 @@ def get_col_type(dtype): buf.write('\\begin{longtable}{%s}\n' % column_format) buf.write('\\toprule\n') - nlevels = self.frame.columns.nlevels + ilevels = self.frame.index.nlevels + clevels = self.frame.columns.nlevels + nlevels = clevels if any(self.frame.index.names): nlevels += 1 - for i, row in enumerate(zip(*strcols)): + strrows = list(zip(*strcols)) + self.clinebuf = [] + + for i, row in enumerate(strrows): if i == nlevels and self.fmt.header: buf.write('\\midrule\n') # End of header if self.longtable: @@ -910,8 +930,17 @@ def get_col_type(dtype): if x else '{}') for x in row] else: crow = [x if x else '{}' for x in row] + if i < clevels and self.fmt.header and self.multicolumn: + # sum up columns to multicolumns + crow = self._format_multicolumn(crow, ilevels) + if (i >= nlevels and self.fmt.index and self.multirow and + ilevels > 1): + # sum up rows to multirows + crow = self._format_multirow(crow, ilevels, i, strrows) buf.write(' & '.join(crow)) buf.write(' \\\\\n') + if self.multirow and i < len(strrows) - 1: + self._print_cline(buf, i, len(strcols)) if not self.longtable: buf.write('\\bottomrule\n') @@ -919,6 +948,80 @@ def get_col_type(dtype): else: buf.write('\\end{longtable}\n') + def _format_multicolumn(self, row, ilevels): + """ + Combine columns belonging to a group to a single multicolumn entry + according to self.multicolumn_format + + e.g.: + a & & & b & c & + will become + \multicolumn{3}{l}{a} & b & \multicolumn{2}{l}{c} + """ + row2 = list(row[:ilevels]) + ncol = 1 + coltext = '' + + def append_col(): + # write multicolumn if needed + if ncol > 1: + row2.append('\\multicolumn{{{0:d}}}{{{1:s}}}{{{2:s}}}' + .format(ncol, self.multicolumn_format, + coltext.strip())) + # don't modify where not needed + else: + row2.append(coltext) + for c in row[ilevels:]: + # if next col has text, write the previous + if c.strip(): + if coltext: + append_col() + coltext = c + ncol = 1 + # if not, add it to the previous multicolumn + else: + ncol += 1 + # write last column name + if coltext: + append_col() + return row2 + + def _format_multirow(self, row, ilevels, i, rows): + """ + Check following rows, whether row should be a multirow + + e.g.: becomes: + a & 0 & \multirow{2}{*}{a} & 0 & + & 1 & & 1 & + b & 0 & \cline{1-2} + b & 0 & + """ + for j in range(ilevels): + if row[j].strip(): + nrow = 1 + for r in rows[i + 1:]: + if not r[j].strip(): + nrow += 1 + else: + break + if nrow > 1: + # overwrite non-multirow entry + row[j] = '\\multirow{{{0:d}}}{{*}}{{{1:s}}}'.format( + nrow, row[j].strip()) + # save when to end the current block with \cline + self.clinebuf.append([i + nrow - 1, j + 1]) + return row + + def _print_cline(self, buf, i, icol): + """ + Print clines after multirow-blocks are finished + """ + for cl in self.clinebuf: + if cl[0] == i: + buf.write('\cline{{{0:d}-{1:d}}}\n'.format(cl[1], icol)) + # remove entries that have been written to buffer + self.clinebuf = [x for x in self.clinebuf if x[0] != i] + class HTMLFormatter(TableFormatter): diff --git a/pandas/tests/formats/test_to_latex.py b/pandas/tests/formats/test_to_latex.py index 89e18e1cec06e..17e1e18f03dd6 100644 --- a/pandas/tests/formats/test_to_latex.py +++ b/pandas/tests/formats/test_to_latex.py @@ -168,6 +168,24 @@ def test_to_latex_multiindex(self): assert result == expected + # GH 14184 + df = df.T + df.columns.names = ['a', 'b'] + result = df.to_latex() + expected = r"""\begin{tabular}{lrrrrr} +\toprule +a & \multicolumn{2}{l}{c1} & \multicolumn{2}{l}{c2} & c3 \\ +b & 0 & 1 & 0 & 1 & 0 \\ +\midrule +0 & 0 & 4 & 0 & 4 & 0 \\ +1 & 1 & 5 & 1 & 5 & 1 \\ +2 & 2 & 6 & 2 & 6 & 2 \\ +3 & 3 & 7 & 3 & 7 & 3 \\ +\bottomrule +\end{tabular} +""" + assert result == expected + # GH 10660 df = pd.DataFrame({'a': [0, 0, 1, 1], 'b': list('abab'), @@ -189,16 +207,95 @@ def test_to_latex_multiindex(self): assert result == expected result = df.groupby('a').describe().to_latex() - expected = ('\\begin{tabular}{lrrrrrrrr}\n\\toprule\n{} & c & ' - ' & & & & & & ' - '\\\\\n{} & count & mean & std & min & 25\\% & ' - '50\\% & 75\\% & max \\\\\na & & & ' - ' & & & & & \\\\\n\\midrule\n0 ' - '& 2.0 & 1.5 & 0.707107 & 1.0 & 1.25 & 1.5 & 1.75 ' - '& 2.0 \\\\\n1 & 2.0 & 3.5 & 0.707107 & 3.0 & 3.25 ' - '& 3.5 & 3.75 & 4.0 ' - '\\\\\n\\bottomrule\n\\end{tabular}\n') + expected = r"""\begin{tabular}{lrrrrrrrr} +\toprule +{} & \multicolumn{8}{l}{c} \\ +{} & count & mean & std & min & 25\% & 50\% & 75\% & max \\ +a & & & & & & & & \\ +\midrule +0 & 2.0 & 1.5 & 0.707107 & 1.0 & 1.25 & 1.5 & 1.75 & 2.0 \\ +1 & 2.0 & 3.5 & 0.707107 & 3.0 & 3.25 & 3.5 & 3.75 & 4.0 \\ +\bottomrule +\end{tabular} +""" + + assert result == expected + + def test_to_latex_multicolumnrow(self): + df = pd.DataFrame({ + ('c1', 0): dict((x, x) for x in range(5)), + ('c1', 1): dict((x, x + 5) for x in range(5)), + ('c2', 0): dict((x, x) for x in range(5)), + ('c2', 1): dict((x, x + 5) for x in range(5)), + ('c3', 0): dict((x, x) for x in range(5)) + }) + result = df.to_latex() + expected = r"""\begin{tabular}{lrrrrr} +\toprule +{} & \multicolumn{2}{l}{c1} & \multicolumn{2}{l}{c2} & c3 \\ +{} & 0 & 1 & 0 & 1 & 0 \\ +\midrule +0 & 0 & 5 & 0 & 5 & 0 \\ +1 & 1 & 6 & 1 & 6 & 1 \\ +2 & 2 & 7 & 2 & 7 & 2 \\ +3 & 3 & 8 & 3 & 8 & 3 \\ +4 & 4 & 9 & 4 & 9 & 4 \\ +\bottomrule +\end{tabular} +""" + assert result == expected + result = df.to_latex(multicolumn=False) + expected = r"""\begin{tabular}{lrrrrr} +\toprule +{} & c1 & & c2 & & c3 \\ +{} & 0 & 1 & 0 & 1 & 0 \\ +\midrule +0 & 0 & 5 & 0 & 5 & 0 \\ +1 & 1 & 6 & 1 & 6 & 1 \\ +2 & 2 & 7 & 2 & 7 & 2 \\ +3 & 3 & 8 & 3 & 8 & 3 \\ +4 & 4 & 9 & 4 & 9 & 4 \\ +\bottomrule +\end{tabular} +""" + assert result == expected + + result = df.T.to_latex(multirow=True) + expected = r"""\begin{tabular}{llrrrrr} +\toprule + & & 0 & 1 & 2 & 3 & 4 \\ +\midrule +\multirow{2}{*}{c1} & 0 & 0 & 1 & 2 & 3 & 4 \\ + & 1 & 5 & 6 & 7 & 8 & 9 \\ +\cline{1-7} +\multirow{2}{*}{c2} & 0 & 0 & 1 & 2 & 3 & 4 \\ + & 1 & 5 & 6 & 7 & 8 & 9 \\ +\cline{1-7} +c3 & 0 & 0 & 1 & 2 & 3 & 4 \\ +\bottomrule +\end{tabular} +""" + assert result == expected + + df.index = df.T.index + result = df.T.to_latex(multirow=True, multicolumn=True, + multicolumn_format='c') + expected = r"""\begin{tabular}{llrrrrr} +\toprule + & & \multicolumn{2}{c}{c1} & \multicolumn{2}{c}{c2} & c3 \\ + & & 0 & 1 & 0 & 1 & 0 \\ +\midrule +\multirow{2}{*}{c1} & 0 & 0 & 1 & 2 & 3 & 4 \\ + & 1 & 5 & 6 & 7 & 8 & 9 \\ +\cline{1-7} +\multirow{2}{*}{c2} & 0 & 0 & 1 & 2 & 3 & 4 \\ + & 1 & 5 & 6 & 7 & 8 & 9 \\ +\cline{1-7} +c3 & 0 & 0 & 1 & 2 & 3 & 4 \\ +\bottomrule +\end{tabular} +""" assert result == expected def test_to_latex_escape(self): From 524a9a06566295eef1d43450ff42859fe81081bf Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Fri, 3 Mar 2017 05:12:21 -0500 Subject: [PATCH 130/933] DEPR: deprecate some top-level non-used functions (#15538) closes #13790 pd.pnow pd.groupby pd.match pd.Term pd.Expr remove info.py --- doc/source/comparison_with_r.rst | 8 ----- doc/source/whatsnew/v0.20.0.txt | 6 ++++ pandas/__init__.py | 43 +++++++++++++++++++++++++- pandas/computation/api.py | 12 +++++++- pandas/core/api.py | 24 +++++++++++++-- pandas/info.py | 20 ------------ pandas/io/api.py | 14 ++++++++- pandas/tests/api/test_api.py | 49 ++++++++++++++++++++++-------- pandas/tests/scalar/test_period.py | 14 +++------ pandas/tseries/period.py | 8 ++++- 10 files changed, 141 insertions(+), 57 deletions(-) delete mode 100644 pandas/info.py diff --git a/doc/source/comparison_with_r.rst b/doc/source/comparison_with_r.rst index aa0cbab4df10b..194e022e34c7c 100644 --- a/doc/source/comparison_with_r.rst +++ b/doc/source/comparison_with_r.rst @@ -206,14 +206,6 @@ of its first argument in its second: s <- 0:4 match(s, c(2,4)) -The :meth:`~pandas.core.groupby.GroupBy.apply` method can be used to replicate -this: - -.. ipython:: python - - s = pd.Series(np.arange(5),dtype=np.float32) - pd.Series(pd.match(s,[2,4],np.nan)) - For more details and examples see :ref:`the reshaping documentation `. diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 0991f3873b06f..fa5974ee84d34 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -537,6 +537,12 @@ Deprecations - importing ``concat`` from ``pandas.tools.merge`` has been deprecated in favor of imports from the ``pandas`` namespace. This should only affect explict imports (:issue:`15358`) - ``Series/DataFrame/Panel.consolidate()`` been deprecated as a public method. (:issue:`15483`) - ``FrozenList`` addition (new object and inplace) have been deprecated in favor of the ``.union()`` method. (:issue: `15475`) +- The following top-level pandas functions have been deprecated and will be removed in a future version (:issue:`13790`) + * ``pd.pnow()``, replaced by ``Period.now()`` + * ``pd.Term``, is removed, as it is not applicable to user code. Instead use in-line string expressions in the where clause when searching in HDFStore + * ``pd.Expr``, is removed, as it is not applicable to user code. + * ``pd.match()``, is removed. + * ``pd.groupby()``, replaced by using the ``.groupby()`` method directly on a ``Series/DataFrame`` .. _whatsnew_0200.prior_deprecations: diff --git a/pandas/__init__.py b/pandas/__init__.py index 70c547010f623..3bded89e6644a 100644 --- a/pandas/__init__.py +++ b/pandas/__init__.py @@ -33,7 +33,6 @@ "the C extensions first.".format(module)) from datetime import datetime -from pandas.info import __doc__ # let init-time option registration happen import pandas.core.config_init @@ -63,3 +62,45 @@ v = get_versions() __version__ = v.get('closest-tag', v['version']) del get_versions, v + +# module level doc-string +__doc__ = """ +pandas - a powerful data analysis and manipulation library for Python +===================================================================== + +**pandas** is a Python package providing fast, flexible, and expressive data +structures designed to make working with "relational" or "labeled" data both +easy and intuitive. It aims to be the fundamental high-level building block for +doing practical, **real world** data analysis in Python. Additionally, it has +the broader goal of becoming **the most powerful and flexible open source data +analysis / manipulation tool available in any language**. It is already well on +its way toward this goal. + +Main Features +------------- +Here are just a few of the things that pandas does well: + + - Easy handling of missing data in floating point as well as non-floating + point data + - Size mutability: columns can be inserted and deleted from DataFrame and + higher dimensional objects + - Automatic and explicit data alignment: objects can be explicitly aligned + to a set of labels, or the user can simply ignore the labels and let + `Series`, `DataFrame`, etc. automatically align the data for you in + computations + - Powerful, flexible group by functionality to perform split-apply-combine + operations on data sets, for both aggregating and transforming data + - Make it easy to convert ragged, differently-indexed data in other Python + and NumPy data structures into DataFrame objects + - Intelligent label-based slicing, fancy indexing, and subsetting of large + data sets + - Intuitive merging and joining data sets + - Flexible reshaping and pivoting of data sets + - Hierarchical labeling of axes (possible to have multiple labels per tick) + - Robust IO tools for loading data from flat files (CSV and delimited), + Excel files, databases, and saving/loading data from the ultrafast HDF5 + format + - Time series-specific functionality: date range generation and frequency + conversion, moving window statistics, moving window linear regressions, + date shifting and lagging, etc. +""" diff --git a/pandas/computation/api.py b/pandas/computation/api.py index e5814e08c4bbe..fe3dad015048e 100644 --- a/pandas/computation/api.py +++ b/pandas/computation/api.py @@ -1,4 +1,14 @@ # flake8: noqa from pandas.computation.eval import eval -from pandas.computation.expr import Expr + + +# deprecation, xref #13790 +def Expr(*args, **kwargs): + import warnings + + warnings.warn("pd.Expr is deprecated as it is not " + "applicable to user code", + FutureWarning, stacklevel=2) + from pandas.computation.expr import Expr + return Expr(*args, **kwargs) diff --git a/pandas/core/api.py b/pandas/core/api.py index eaebf45a038a0..65253dedb8b53 100644 --- a/pandas/core/api.py +++ b/pandas/core/api.py @@ -4,7 +4,7 @@ import numpy as np -from pandas.core.algorithms import factorize, match, unique, value_counts +from pandas.core.algorithms import factorize, unique, value_counts from pandas.types.missing import isnull, notnull from pandas.core.categorical import Categorical from pandas.core.groupby import Grouper @@ -17,7 +17,6 @@ from pandas.core.frame import DataFrame from pandas.core.panel import Panel, WidePanel from pandas.core.panel4d import Panel4D -from pandas.core.groupby import groupby from pandas.core.reshape import (pivot_simple as pivot, get_dummies, lreshape, wide_to_long) @@ -42,3 +41,24 @@ from pandas.core.config import (get_option, set_option, reset_option, describe_option, option_context, options) + + +# deprecation, xref #13790 +def match(*args, **kwargs): + import warnings + + warnings.warn("pd.match() is deprecated and will be removed " + "in a future version", + FutureWarning, stacklevel=2) + from pandas.core.algorithms import match + return match(*args, **kwargs) + + +def groupby(*args, **kwargs): + import warnings + + warnings.warn("pd.groupby() is deprecated and will be removed " + "Please use the Series.groupby() or " + "DataFrame.groupby() methods", + FutureWarning, stacklevel=2) + return args[0].groupby(*args[1:], **kwargs) diff --git a/pandas/info.py b/pandas/info.py deleted file mode 100644 index 57ecd91739eab..0000000000000 --- a/pandas/info.py +++ /dev/null @@ -1,20 +0,0 @@ -""" -pandas - a powerful data analysis and manipulation library for Python -===================================================================== - -See http://pandas.pydata.org/ for full documentation. Otherwise, see the -docstrings of the various objects in the pandas namespace: - -Series -DataFrame -Panel -Index -DatetimeIndex -HDFStore -bdate_range -date_range -read_csv -read_fwf -read_table -ols -""" diff --git a/pandas/io/api.py b/pandas/io/api.py index 0bd86c85b4b8b..1284b3cb222d6 100644 --- a/pandas/io/api.py +++ b/pandas/io/api.py @@ -7,7 +7,7 @@ from pandas.io.parsers import read_csv, read_table, read_fwf from pandas.io.clipboard import read_clipboard from pandas.io.excel import ExcelFile, ExcelWriter, read_excel -from pandas.io.pytables import HDFStore, Term, get_store, read_hdf +from pandas.io.pytables import HDFStore, get_store, read_hdf from pandas.io.json import read_json from pandas.io.html import read_html from pandas.io.sql import read_sql, read_sql_table, read_sql_query @@ -17,3 +17,15 @@ from pandas.io.pickle import read_pickle, to_pickle from pandas.io.packers import read_msgpack, to_msgpack from pandas.io.gbq import read_gbq + +# deprecation, xref #13790 +def Term(*args, **kwargs): + import warnings + + warnings.warn("pd.Term is deprecated as it is not " + "applicable to user code. Instead use in-line " + "string expressions in the where clause when " + "searching in HDFStore", + FutureWarning, stacklevel=2) + from pandas.io.pytables import Term + return Term(*args, **kwargs) diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index 8ca369f8df83a..f2f7a9c778e66 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -59,13 +59,10 @@ class TestPDApi(Base, tm.TestCase): # these are already deprecated; awaiting removal deprecated_classes = ['WidePanel', 'SparseTimeSeries', 'Panel4D', - 'SparseList'] + 'SparseList', 'Expr', 'Term'] # these should be deprecated in the future - deprecated_classes_in_future = ['Term', 'Panel'] - - # these should be removed from top-level namespace - remove_classes_from_top_level_namespace = ['Expr'] + deprecated_classes_in_future = ['Panel'] # external modules exposed in pandas namespace modules = ['np', 'datetime'] @@ -75,7 +72,7 @@ class TestPDApi(Base, tm.TestCase): 'date_range', 'eval', 'factorize', 'get_dummies', 'get_store', 'infer_freq', 'isnull', 'lreshape', - 'match', 'melt', 'notnull', 'offsets', + 'melt', 'notnull', 'offsets', 'merge', 'merge_ordered', 'merge_asof', 'period_range', 'pivot', 'pivot_table', 'plot_params', 'qcut', @@ -99,9 +96,6 @@ class TestPDApi(Base, tm.TestCase): funcs_to = ['to_datetime', 'to_msgpack', 'to_numeric', 'to_pickle', 'to_timedelta'] - # these should be deprecated in the future - deprecated_funcs_in_future = ['pnow', 'groupby', 'info'] - # these are already deprecated; awaiting removal deprecated_funcs = ['ewma', 'ewmcorr', 'ewmcov', 'ewmstd', 'ewmvar', 'ewmvol', 'expanding_apply', 'expanding_corr', @@ -114,7 +108,8 @@ class TestPDApi(Base, tm.TestCase): 'rolling_kurt', 'rolling_max', 'rolling_mean', 'rolling_median', 'rolling_min', 'rolling_quantile', 'rolling_skew', 'rolling_std', 'rolling_sum', - 'rolling_var', 'rolling_window', 'ordered_merge'] + 'rolling_var', 'rolling_window', 'ordered_merge', + 'pnow', 'match', 'groupby'] def test_api(self): @@ -123,11 +118,9 @@ def test_api(self): self.modules + self.deprecated_modules + self.classes + self.deprecated_classes + self.deprecated_classes_in_future + - self.remove_classes_from_top_level_namespace + self.funcs + self.funcs_option + self.funcs_read + self.funcs_to + - self.deprecated_funcs + - self.deprecated_funcs_in_future, + self.deprecated_funcs, self.ignored) @@ -225,3 +218,33 @@ def test_deprecation_access_obj(self): with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): pd.datetools.monthEnd + + +class TestTopLevelDeprecations(tm.TestCase): + # top-level API deprecations + # GH 13790 + + def test_pnow(self): + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + pd.pnow(freq='M') + + def test_term(self): + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + pd.Term('index>=date') + + def test_expr(self): + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + pd.Expr('2>1') + + def test_match(self): + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + pd.match([1, 2, 3], [1]) + + def test_groupby(self): + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + pd.groupby(pd.Series([1, 2, 3]), [1, 1, 1]) diff --git a/pandas/tests/scalar/test_period.py b/pandas/tests/scalar/test_period.py index ffe00a4a62a0a..49aa44492fe81 100644 --- a/pandas/tests/scalar/test_period.py +++ b/pandas/tests/scalar/test_period.py @@ -864,17 +864,11 @@ def test_properties_nat(self): self.assertTrue(np.isnan(getattr(t_nat, f))) def test_pnow(self): - dt = datetime.now() - val = period.pnow('D') - exp = Period(dt, freq='D') - self.assertEqual(val, exp) - - val2 = period.pnow('2D') - exp2 = Period(dt, freq='2D') - self.assertEqual(val2, exp2) - self.assertEqual(val.ordinal, val2.ordinal) - self.assertEqual(val.ordinal, exp2.ordinal) + # deprecation, xref #13790 + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + period.pnow('D') def test_constructor_corner(self): expected = Period('2007-01', freq='2M') diff --git a/pandas/tseries/period.py b/pandas/tseries/period.py index 8a6b0c153bb50..6e499924730b3 100644 --- a/pandas/tseries/period.py +++ b/pandas/tseries/period.py @@ -1144,7 +1144,13 @@ def _make_field_arrays(*fields): def pnow(freq=None): - return Period(datetime.now(), freq=freq) + # deprecation, xref #13790 + import warnings + + warnings.warn("pd.pnow() and pandas.tseries.period.pnow() " + "are deprecated. Please use Period.now()", + FutureWarning, stacklevel=2) + return Period.now(freq=freq) def period_range(start=None, end=None, periods=None, freq='D', name=None): From 0b07b07da7d5de06a414af467f9f5667835c150e Mon Sep 17 00:00:00 2001 From: Sahil Dua Date: Fri, 3 Mar 2017 08:04:43 -0500 Subject: [PATCH 131/933] BUG: Set frequency for empty Series closes #14320 Author: Sahil Dua Closes #14458 from sahildua2305/frequency-series-fix and squashes the following commits: 384e666 [Sahil Dua] BUG: Set frequency for empty Series --- doc/source/whatsnew/v0.20.0.txt | 3 +++ pandas/tests/series/test_timeseries.py | 8 ++++++++ pandas/tseries/resample.py | 18 ++++++++++-------- 3 files changed, 21 insertions(+), 8 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index fa5974ee84d34..df259f4a42b86 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -605,6 +605,9 @@ Bug Fixes - Bug in the display of ``.info()`` where a qualifier (+) would always be displayed with a ``MultiIndex`` that contains only non-strings (:issue:`15245`) + +- Bug in ``.asfreq()``, where frequency was not set for empty ``Series` (:issue:`14320`) + - Bug in ``pd.read_msgpack()`` in which ``Series`` categoricals were being improperly processed (:issue:`14901`) - Bug in ``Series.ffill()`` with mixed dtypes containing tz-aware datetimes. (:issue:`14956`) diff --git a/pandas/tests/series/test_timeseries.py b/pandas/tests/series/test_timeseries.py index 8c22b3f047210..d384460c3d030 100644 --- a/pandas/tests/series/test_timeseries.py +++ b/pandas/tests/series/test_timeseries.py @@ -260,6 +260,14 @@ def test_asfreq(self): index=[-1.0, 2.0, 1.0, 0.0]).sort_index() assert_series_equal(result, expected) + def test_asfreq_datetimeindex_empty_series(self): + # GH 14320 + expected = Series(index=pd.DatetimeIndex( + ["2016-09-29 11:00"])).asfreq('H') + result = Series(index=pd.DatetimeIndex(["2016-09-29 11:00"]), + data=[3]).asfreq('H') + self.assert_index_equal(expected.index, result.index) + def test_diff(self): # Just run the function self.ts.diff() diff --git a/pandas/tseries/resample.py b/pandas/tseries/resample.py index 75e550a065fd2..21d7dc0c177b6 100755 --- a/pandas/tseries/resample.py +++ b/pandas/tseries/resample.py @@ -1382,16 +1382,18 @@ def asfreq(obj, freq, method=None, how=None, normalize=False, fill_value=None): if how is None: how = 'E' - new_index = obj.index.asfreq(freq, how=how) new_obj = obj.copy() - new_obj.index = new_index - return new_obj + new_obj.index = obj.index.asfreq(freq, how=how) + + elif len(obj.index) == 0: + new_obj = obj.copy() + new_obj.index = obj.index._shallow_copy(freq=to_offset(freq)) + else: - if len(obj.index) == 0: - return obj.copy() dti = date_range(obj.index[0], obj.index[-1], freq=freq) dti.name = obj.index.name - rs = obj.reindex(dti, method=method, fill_value=fill_value) + new_obj = obj.reindex(dti, method=method, fill_value=fill_value) if normalize: - rs.index = rs.index.normalize() - return rs + new_obj.index = new_obj.index.normalize() + + return new_obj From 04e116851337cd852b4255f8221d9be44829e0e1 Mon Sep 17 00:00:00 2001 From: Chris Date: Fri, 3 Mar 2017 10:04:26 -0500 Subject: [PATCH 132/933] BUG: syntax error in hdf query with ts closes #15492 Author: Chris Closes #15544 from chris-b1/hdf-dt-error and squashes the following commits: 8288dca [Chris] lint 7c7100d [Chris] expand test cases 946a48e [Chris] ERR: more strict HDFStore string comparison 213585f [Chris] CLN: remove timetuple type check cc977f0 [Chris] BUG: syntax error in hdf query with ts --- doc/source/whatsnew/v0.20.0.txt | 31 +++++++++++++++++++++- pandas/computation/pytables.py | 21 ++++++--------- pandas/tests/io/test_pytables.py | 44 ++++++++++++++++++++++++++++++++ 3 files changed, 82 insertions(+), 14 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index df259f4a42b86..782ae6082c1cf 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -501,6 +501,35 @@ New Behavior: df.groupby('A').agg([np.mean, np.std, np.min, np.max]) +.. _whatsnew_0200.api_breaking.hdfstore_where: + +HDFStore where string comparison +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In previous versions most types could be compared to string column in a ``HDFStore`` +usually resulting in an invalid comparsion. These comparisions will now raise a +``TypeError`` (:issue:`15492`) + +New Behavior: + +.. code-block:: ipython + + In [15]: df = pd.DataFrame({'unparsed_date': ['2014-01-01', '2014-01-01']}) + + In [16]: df.dtypes + Out[16]: + unparsed_date object + dtype: object + + In [17]: df.to_hdf('store.h5', 'key', format='table', data_columns=True) + + In [18]: ts = pd.Timestamp('2014-01-01') + + In [19]: pd.read_hdf('store.h5', 'key', where='unparsed_date > ts') + TypeError: Cannot compare 2014-01-01 00:00:00 of + type to string column + + .. _whatsnew_0200.api: Other API Changes @@ -671,7 +700,7 @@ Bug Fixes - Bug in ``pd.merge_asof()`` where ``left_index``/``right_index`` together caused a failure when ``tolerance`` was specified (:issue:`15135`) - +- Bug in ``pd.read_hdf()`` passing a ``Timestamp`` to the ``where`` parameter with a non date column (:issue:`15492`) - Bug in ``Series`` constructor when both ``copy=True`` and ``dtype`` arguments are provided (:issue:`15125`) diff --git a/pandas/computation/pytables.py b/pandas/computation/pytables.py index 9dc18284ec22c..7c09ca8d38773 100644 --- a/pandas/computation/pytables.py +++ b/pandas/computation/pytables.py @@ -1,7 +1,6 @@ """ manage PyTables query interface via Expressions """ import ast -import time import warnings from functools import partial from datetime import datetime, timedelta @@ -188,10 +187,6 @@ def stringify(value): if v.tz is not None: v = v.tz_convert('UTC') return TermValue(v, v.value, kind) - elif (isinstance(v, datetime) or hasattr(v, 'timetuple') or - kind == u('date')): - v = time.mktime(v.timetuple()) - return TermValue(v, pd.Timestamp(v), kind) elif kind == u('timedelta64') or kind == u('timedelta'): v = _coerce_scalar_to_timedelta_type(v, unit='s').value return TermValue(int(v), v, kind) @@ -218,12 +213,13 @@ def stringify(value): else: v = bool(v) return TermValue(v, v, kind) - elif not isinstance(v, string_types): - v = stringify(v) + elif isinstance(v, string_types): + # string quoting return TermValue(v, stringify(v), u('string')) - - # string quoting - return TermValue(v, stringify(v), u('string')) + else: + raise TypeError(("Cannot compare {v} of type {typ}" + " to {kind} column").format(v=v, typ=type(v), + kind=kind)) def convert_values(self): pass @@ -558,9 +554,8 @@ def parse_back_compat(self, w, op=None, value=None): # stringify with quotes these values def convert(v): - if (isinstance(v, (datetime, np.datetime64, - timedelta, np.timedelta64)) or - hasattr(v, 'timetuple')): + if isinstance(v, (datetime, np.datetime64, + timedelta, np.timedelta64)): return "'{0}'".format(v) return v diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index 821d9956a2dfa..9f1dea2094bc6 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -5071,6 +5071,50 @@ def test_query_long_float_literal(self): expected = df.loc[[1], :] tm.assert_frame_equal(expected, result) + def test_query_compare_column_type(self): + # GH 15492 + df = pd.DataFrame({'date': ['2014-01-01', '2014-01-02'], + 'real_date': date_range('2014-01-01', periods=2), + 'float': [1.1, 1.2], + 'int': [1, 2]}, + columns=['date', 'real_date', 'float', 'int']) + + with ensure_clean_store(self.path) as store: + store.append('test', df, format='table', data_columns=True) + + ts = pd.Timestamp('2014-01-01') # noqa + result = store.select('test', where='real_date > ts') + expected = df.loc[[1], :] + tm.assert_frame_equal(expected, result) + + for op in ['<', '>', '==']: + # non strings to string column always fail + for v in [2.1, True, pd.Timestamp('2014-01-01'), + pd.Timedelta(1, 's')]: + query = 'date {op} v'.format(op=op) + with tm.assertRaises(TypeError): + result = store.select('test', where=query) + + # strings to other columns must be convertible to type + v = 'a' + for col in ['int', 'float', 'real_date']: + query = '{col} {op} v'.format(op=op, col=col) + with tm.assertRaises(ValueError): + result = store.select('test', where=query) + + for v, col in zip(['1', '1.1', '2014-01-01'], + ['int', 'float', 'real_date']): + query = '{col} {op} v'.format(op=op, col=col) + result = store.select('test', where=query) + + if op == '==': + expected = df.loc[[0], :] + elif op == '>': + expected = df.loc[[1], :] + else: + expected = df.loc[[], :] + tm.assert_frame_equal(expected, result) + class TestHDFComplexValues(Base): # GH10447 From 7ae4fd10b1581d77b4cfbf594e3b444af7456876 Mon Sep 17 00:00:00 2001 From: Kyle Kelley Date: Sat, 4 Mar 2017 03:09:45 -0800 Subject: [PATCH 133/933] BUG: handle empty lists in json_normalize (#15535) closes #15534 --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/io/json/normalize.py | 3 +++ pandas/tests/io/json/test_normalize.py | 5 +++++ 3 files changed, 9 insertions(+) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 782ae6082c1cf..8b6c53a159ad8 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -192,6 +192,7 @@ Other enhancements - HTML table output skips ``colspan`` or ``rowspan`` attribute if equal to 1. (:issue:`15403`) - ``pd.TimedeltaIndex`` now has a custom datetick formatter specifically designed for nanosecond level precision (:issue:`8711`) - ``pd.types.concat.union_categoricals`` gained the ``ignore_ordered`` argument to allow ignoring the ordered attribute of unioned categoricals (:issue:`13410`). See the :ref:`categorical union docs ` for more information. +- ``pandas.io.json.json_normalize()`` with an empty ``list`` will return an empty ``DataFrame`` (:issue:`15534`) .. _ISO 8601 duration: https://en.wikipedia.org/wiki/ISO_8601#Durations diff --git a/pandas/io/json/normalize.py b/pandas/io/json/normalize.py index f29472155da17..0e7d025e81851 100644 --- a/pandas/io/json/normalize.py +++ b/pandas/io/json/normalize.py @@ -157,6 +157,9 @@ def _pull_field(js, spec): return result + if isinstance(data, list) and len(data) is 0: + return DataFrame() + # A bit of a hackjob if isinstance(data, dict): data = [data] diff --git a/pandas/tests/io/json/test_normalize.py b/pandas/tests/io/json/test_normalize.py index c60b81ffe504d..f881f4dafe0f3 100644 --- a/pandas/tests/io/json/test_normalize.py +++ b/pandas/tests/io/json/test_normalize.py @@ -62,6 +62,11 @@ def test_simple_normalize(self): tm.assert_frame_equal(result, expected) + def test_empty_array(self): + result = json_normalize([]) + expected = DataFrame() + tm.assert_frame_equal(result, expected) + def test_more_deeply_nested(self): data = [{'country': 'USA', 'states': [{'name': 'California', From 07ac39e9556538e02b3684bd7f4493c5301f409c Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Sat, 4 Mar 2017 05:50:04 -0600 Subject: [PATCH 134/933] ENH: Added to_json_schema (#14904) Lays the groundwork for https://github.com/pandas-dev/pandas/issues/14386 This handles the schema part of the request there. We'll still need to do the work to publish the data to the frontend, but that can be done as a followup. Added publish to dataframe repr --- ci/requirements-2.7.pip | 2 + ci/requirements-3.5.run | 1 + ci/requirements-3.6.run | 1 + doc/source/api.rst | 1 + doc/source/io.rst | 120 +++++ doc/source/options.rst | 21 + doc/source/whatsnew/v0.20.0.txt | 35 ++ pandas/core/config_init.py | 10 + pandas/core/generic.py | 86 +++- pandas/io/json/__init__.py | 3 +- pandas/io/json/json.py | 89 +++- pandas/io/json/table_schema.py | 177 +++++++ pandas/tests/formats/test_printing.py | 61 +++ .../tests/io/json/test_json_table_schema.py | 462 ++++++++++++++++++ pandas/util/testing.py | 19 + 15 files changed, 1072 insertions(+), 16 deletions(-) create mode 100644 pandas/io/json/table_schema.py create mode 100644 pandas/tests/io/json/test_json_table_schema.py diff --git a/ci/requirements-2.7.pip b/ci/requirements-2.7.pip index 08240184f2934..eb796368e7820 100644 --- a/ci/requirements-2.7.pip +++ b/ci/requirements-2.7.pip @@ -4,3 +4,5 @@ pathlib backports.lzma py PyCrypto +mock +ipython diff --git a/ci/requirements-3.5.run b/ci/requirements-3.5.run index b07ce611c79a2..43e6814ed6c8e 100644 --- a/ci/requirements-3.5.run +++ b/ci/requirements-3.5.run @@ -18,3 +18,4 @@ pymysql psycopg2 s3fs beautifulsoup4 +ipython diff --git a/ci/requirements-3.6.run b/ci/requirements-3.6.run index 5d9cb05a7b402..9a6c1c7edbc5e 100644 --- a/ci/requirements-3.6.run +++ b/ci/requirements-3.6.run @@ -18,3 +18,4 @@ pymysql beautifulsoup4 s3fs xarray +ipython diff --git a/doc/source/api.rst b/doc/source/api.rst index 6c4a3cff5b4cf..33ac5fde651d4 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -60,6 +60,7 @@ JSON :toctree: generated/ json_normalize + build_table_schema .. currentmodule:: pandas diff --git a/doc/source/io.rst b/doc/source/io.rst index b36ae8c2ed450..c34cc1ec17512 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -2033,6 +2033,126 @@ using Hadoop or Spark. df df.to_json(orient='records', lines=True) + +.. _io.table_schema: + +Table Schema +'''''''''''' + +.. versionadded:: 0.20.0 + +`Table Schema`_ is a spec for describing tabular datasets as a JSON +object. The JSON includes information on the field names, types, and +other attributes. You can use the orient ``table`` to build +a JSON string with two fields, ``schema`` and ``data``. + +.. ipython:: python + + df = pd.DataFrame( + {'A': [1, 2, 3], + 'B': ['a', 'b', 'c'], + 'C': pd.date_range('2016-01-01', freq='d', periods=3), + }, index=pd.Index(range(3), name='idx')) + df + df.to_json(orient='table', date_format="iso") + +The ``schema`` field contains the ``fields`` key, which itself contains +a list of column name to type pairs, including the ``Index`` or ``MultiIndex`` +(see below for a list of types). +The ``schema`` field also contains a ``primaryKey`` field if the (Multi)index +is unique. + +The second field, ``data``, contains the serialized data with the ``records`` +orient. +The index is included, and any datetimes are ISO 8601 formatted, as required +by the Table Schema spec. + +The full list of types supported are described in the Table Schema +spec. This table shows the mapping from pandas types: + +============== ================= +Pandas type Table Schema type +============== ================= +int64 integer +float64 number +bool boolean +datetime64[ns] datetime +timedelta64[ns] duration +categorical any +object str +=============== ================= + +A few notes on the generated table schema: + +- The ``schema`` object contains a ``pandas_version`` field. This contains + the version of pandas' dialect of the schema, and will be incremented + with each revision. +- All dates are converted to UTC when serializing. Even timezone naïve values, + which are treated as UTC with an offset of 0. + + .. ipython:: python: + + from pandas.io.json import build_table_schema + s = pd.Series(pd.date_range('2016', periods=4)) + build_table_schema(s) + +- datetimes with a timezone (before serializing), include an additional field + ``tz`` with the time zone name (e.g. ``'US/Central'``). + + .. ipython:: python + + s_tz = pd.Series(pd.date_range('2016', periods=12, + tz='US/Central')) + build_table_schema(s_tz) + +- Periods are converted to timestamps before serialization, and so have the + same behavior of being converted to UTC. In addition, periods will contain + and additional field ``freq`` with the period's frequency, e.g. ``'A-DEC'`` + + .. ipython:: python + + s_per = pd.Series(1, index=pd.period_range('2016', freq='A-DEC', + periods=4)) + build_table_schema(s_per) + +- Categoricals use the ``any`` type and an ``enum`` constraint listing + the set of possible values. Additionally, an ``ordered`` field is included + + .. ipython:: python + + s_cat = pd.Series(pd.Categorical(['a', 'b', 'a'])) + build_table_schema(s_cat) + +- A ``primaryKey`` field, containing an array of labels, is included + *if the index is unique*: + + .. ipython:: python + + s_dupe = pd.Series([1, 2], index=[1, 1]) + build_table_schema(s_dupe) + +- The ``primaryKey`` behavior is the same with MultiIndexes, but in this + case the ``primaryKey`` is an array: + + .. ipython:: python + + s_multi = pd.Series(1, index=pd.MultiIndex.from_product([('a', 'b'), + (0, 1)])) + build_table_schema(s_multi) + +- The default naming roughly follows these rules: + + + For series, the ``object.name`` is used. If that's none, then the + name is ``values`` + + For DataFrames, the stringified version of the column name is used + + For ``Index`` (not ``MultiIndex``), ``index.name`` is used, with a + fallback to ``index`` if that is None. + + For ``MultiIndex``, ``mi.names`` is used. If any level has no name, + then ``level_`` is used. + + +_Table Schema: http://specs.frictionlessdata.io/json-table-schema/ + HTML ---- diff --git a/doc/source/options.rst b/doc/source/options.rst index 10a13ed36df8d..1a0e5cf6b7235 100644 --- a/doc/source/options.rst +++ b/doc/source/options.rst @@ -397,6 +397,9 @@ display.width 80 Width of the display in charact IPython qtconsole, or IDLE do not run in a terminal and hence it is not possible to correctly detect the width. +display.html.table_schema False Whether to publish a Table Schema + representation for frontends that + support it. html.border 1 A ``border=value`` attribute is inserted in the ``
`` tag for the DataFrame HTML repr. @@ -424,6 +427,7 @@ mode.use_inf_as_null False True means treat None, NaN, -IN are not null (new way). =================================== ============ ================================== + .. _basics.console_output: Number Formatting @@ -512,3 +516,20 @@ Enabling ``display.unicode.ambiguous_as_wide`` lets pandas to figure these chara pd.set_option('display.unicode.east_asian_width', False) pd.set_option('display.unicode.ambiguous_as_wide', False) + +.. _options.table_schema: + +Table Schema Display +-------------------- + +.. versionadded:: 0.20.0 + +``DataFrame`` and ``Series`` will publish a Table Schema representation +by default. False by default, this can be enabled globally with the +``display.html.table_schema`` option: + +.. ipython:: python + + pd.set_option('display.html.table_schema', True) + +Only ``'display.max_rows'`` are serialized and published. diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 8b6c53a159ad8..7b4538bd181d2 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -12,6 +12,7 @@ Highlights include: - Building pandas for development now requires ``cython >= 0.23`` (:issue:`14831`) - The ``.ix`` indexer has been deprecated, see :ref:`here ` - Switched the test framework to `pytest`_ (:issue:`13097`) +- A new orient for JSON serialization, ``orient='table'``, that uses the Table Schema spec, see :ref: `here ` .. _pytest: http://doc.pytest.org/en/latest/ @@ -154,6 +155,40 @@ New Behavior: df[df.chromosomes != '1'].groupby('chromosomes', sort=False).sum() +.. _whatsnew_0200.enhancements.table_schema + +Table Schema Output +^^^^^^^^^^^^^^^^^^^ + +The new orient ``'table'`` for :meth:`DataFrame.to_json` +will generate a `Table Schema`_ compatible string representation of +the data. + +.. ipython:: python + + df = pd.DataFrame( + {'A': [1, 2, 3], + 'B': ['a', 'b', 'c'], + 'C': pd.date_range('2016-01-01', freq='d', periods=3), + }, index=pd.Index(range(3), name='idx')) + df + df.to_json(orient='table') + + +See :ref:`IO: Table Schema for more`. + +Additionally, the repr for ``DataFrame`` and ``Series`` can now publish +this JSON Table schema representation of the Series or DataFrame if you are +using IPython (or another frontend like `nteract`_ using the Jupyter messaging +protocol). +This gives frontends like the Jupyter notebook and `nteract`_ +more flexiblity in how they display pandas objects, since they have +more information about the data. +You must enable this by setting the ``display.html.table_schema`` option to True. + +.. _Table Schema: http://specs.frictionlessdata.io/json-table-schema/ +.. _nteract: http://nteract.io/ + .. _whatsnew_0200.enhancements.other: Other enhancements diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py index 89616890e1de1..931fe0661818d 100644 --- a/pandas/core/config_init.py +++ b/pandas/core/config_init.py @@ -164,6 +164,13 @@ (default: False) """ +pc_table_schema_doc = """ +: boolean + Whether to publish a Table Schema representation for frontends + that support it. + (default: False) +""" + pc_line_width_deprecation_warning = """\ line_width has been deprecated, use display.width instead (currently both are identical) @@ -366,6 +373,9 @@ def mpl_style_cb(key): validator=is_text) cf.register_option('latex.multirow', False, pc_latex_multirow, validator=is_bool) + cf.register_option('html.table_schema', False, pc_table_schema_doc, + validator=is_bool) + cf.deprecate_option('display.line_width', msg=pc_line_width_deprecation_warning, diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 127aac970fbc1..298fa75779420 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -4,6 +4,7 @@ import operator import weakref import gc +import json import numpy as np import pandas.lib as lib @@ -129,6 +130,37 @@ def __init__(self, data, axes=None, copy=False, dtype=None, object.__setattr__(self, '_data', data) object.__setattr__(self, '_item_cache', {}) + def _ipython_display_(self): + try: + from IPython.display import display + except ImportError: + return None + + # Series doesn't define _repr_html_ or _repr_latex_ + latex = self._repr_latex_() if hasattr(self, '_repr_latex_') else None + html = self._repr_html_() if hasattr(self, '_repr_html_') else None + table_schema = self._repr_table_schema_() + # We need the inital newline since we aren't going through the + # usual __repr__. See + # https://github.com/pandas-dev/pandas/pull/14904#issuecomment-277829277 + text = "\n" + repr(self) + + reprs = {"text/plain": text, "text/html": html, "text/latex": latex, + "application/vnd.dataresource+json": table_schema} + reprs = {k: v for k, v in reprs.items() if v} + display(reprs, raw=True) + + def _repr_table_schema_(self): + """ + Not a real Jupyter special repr method, but we use the same + naming convention. + """ + if config.get_option("display.html.table_schema"): + data = self.head(config.get_option('display.max_rows')) + payload = json.loads(data.to_json(orient='table'), + object_pairs_hook=collections.OrderedDict) + return payload + def _validate_dtype(self, dtype): """ validate the passed dtype """ @@ -1094,7 +1126,7 @@ def __setstate__(self, state): strings before writing. """ - def to_json(self, path_or_buf=None, orient=None, date_format='epoch', + def to_json(self, path_or_buf=None, orient=None, date_format=None, double_precision=10, force_ascii=True, date_unit='ms', default_handler=None, lines=False): """ @@ -1129,10 +1161,17 @@ def to_json(self, path_or_buf=None, orient=None, date_format='epoch', - index : dict like {index -> {column -> value}} - columns : dict like {column -> {index -> value}} - values : just the values array + - table : dict like {'schema': {schema}, 'data': {data}} + describing the data, and the data component is + like ``orient='records'``. - date_format : {'epoch', 'iso'} + .. versionchanged:: 0.20.0 + + date_format : {None, 'epoch', 'iso'} Type of date conversion. `epoch` = epoch milliseconds, - `iso`` = ISO8601, default is epoch. + `iso` = ISO8601. The default depends on the `orient`. For + `orient='table'`, the default is `'iso'`. For all other orients, + the default is `'epoch'`. double_precision : The number of decimal places to use when encoding floating point values, default 10. force_ascii : force encoded string to be ASCII, default True. @@ -1151,14 +1190,53 @@ def to_json(self, path_or_buf=None, orient=None, date_format='epoch', .. versionadded:: 0.19.0 - Returns ------- same type as input object with filtered info axis + See Also + -------- + pd.read_json + + Examples + -------- + + >>> df = pd.DataFrame([['a', 'b'], ['c', 'd']], + ... index=['row 1', 'row 2'], + ... columns=['col 1', 'col 2']) + >>> df.to_json(orient='split') + '{"columns":["col 1","col 2"], + "index":["row 1","row 2"], + "data":[["a","b"],["c","d"]]}' + + Encoding/decoding a Dataframe using ``'index'`` formatted JSON: + + >>> df.to_json(orient='index') + '{"row 1":{"col 1":"a","col 2":"b"},"row 2":{"col 1":"c","col 2":"d"}}' + + Encoding/decoding a Dataframe using ``'records'`` formatted JSON. + Note that index labels are not preserved with this encoding. + + >>> df.to_json(orient='records') + '[{"col 1":"a","col 2":"b"},{"col 1":"c","col 2":"d"}]' + + Encoding with Table Schema + + >>> df.to_json(orient='table') + '{"schema": {"fields": [{"name": "index", "type": "string"}, + {"name": "col 1", "type": "string"}, + {"name": "col 2", "type": "string"}], + "primaryKey": "index", + "pandas_version": "0.20.0"}, + "data": [{"index": "row 1", "col 1": "a", "col 2": "b"}, + {"index": "row 2", "col 1": "c", "col 2": "d"}]}' """ from pandas.io import json + if date_format is None and orient == 'table': + date_format = 'iso' + elif date_format is None: + date_format = 'epoch' return json.to_json(path_or_buf=path_or_buf, obj=self, orient=orient, date_format=date_format, double_precision=double_precision, diff --git a/pandas/io/json/__init__.py b/pandas/io/json/__init__.py index a9390a04cc2cd..32d110b3404a9 100644 --- a/pandas/io/json/__init__.py +++ b/pandas/io/json/__init__.py @@ -1,4 +1,5 @@ from .json import to_json, read_json, loads, dumps # noqa from .normalize import json_normalize # noqa +from .table_schema import build_table_schema # noqa -del json, normalize # noqa +del json, normalize, table_schema # noqa diff --git a/pandas/io/json/json.py b/pandas/io/json/json.py index 6fc766081eefe..a00d3492e8a37 100644 --- a/pandas/io/json/json.py +++ b/pandas/io/json/json.py @@ -1,5 +1,4 @@ # pylint: disable-msg=E1101,W0613,W0603 - import os import numpy as np @@ -12,10 +11,14 @@ from pandas.core.common import AbstractMethodError from pandas.formats.printing import pprint_thing from .normalize import _convert_to_line_delimits +from .table_schema import build_table_schema +from pandas.types.common import is_period_dtype loads = _json.loads dumps = _json.dumps +TABLE_SCHEMA_VERSION = '0.20.0' + # interface to/from def to_json(path_or_buf, obj, orient=None, date_format='epoch', @@ -26,19 +29,22 @@ def to_json(path_or_buf, obj, orient=None, date_format='epoch', raise ValueError( "'lines' keyword only valid when 'orient' is records") - if isinstance(obj, Series): - s = SeriesWriter( - obj, orient=orient, date_format=date_format, - double_precision=double_precision, ensure_ascii=force_ascii, - date_unit=date_unit, default_handler=default_handler).write() + if orient == 'table' and isinstance(obj, Series): + obj = obj.to_frame(name=obj.name or 'values') + if orient == 'table' and isinstance(obj, DataFrame): + writer = JSONTableWriter + elif isinstance(obj, Series): + writer = SeriesWriter elif isinstance(obj, DataFrame): - s = FrameWriter( - obj, orient=orient, date_format=date_format, - double_precision=double_precision, ensure_ascii=force_ascii, - date_unit=date_unit, default_handler=default_handler).write() + writer = FrameWriter else: raise NotImplementedError("'obj' should be a Series or a DataFrame") + s = writer( + obj, orient=orient, date_format=date_format, + double_precision=double_precision, ensure_ascii=force_ascii, + date_unit=date_unit, default_handler=default_handler).write() + if lines: s = _convert_to_line_delimits(s) @@ -81,7 +87,8 @@ def write(self): ensure_ascii=self.ensure_ascii, date_unit=self.date_unit, iso_dates=self.date_format == 'iso', - default_handler=self.default_handler) + default_handler=self.default_handler + ) class SeriesWriter(Writer): @@ -108,6 +115,55 @@ def _format_axes(self): "'%s'." % self.orient) +class JSONTableWriter(FrameWriter): + _default_orient = 'records' + + def __init__(self, obj, orient, date_format, double_precision, + ensure_ascii, date_unit, default_handler=None): + """ + Adds a `schema` attribut with the Table Schema, resets + the index (can't do in caller, because the schema inference needs + to know what the index is, forces orient to records, and forces + date_format to 'iso'. + """ + super(JSONTableWriter, self).__init__( + obj, orient, date_format, double_precision, ensure_ascii, + date_unit, default_handler=default_handler) + + if date_format != 'iso': + msg = ("Trying to write with `orient='table'` and " + "`date_format='%s'`. Table Schema requires dates " + "to be formatted with `date_format='iso'`" % date_format) + raise ValueError(msg) + + self.schema = build_table_schema(obj) + + # TODO: Do this timedelta properly in objToJSON.c See GH #15137 + if ((obj.ndim == 1) and (obj.name in set(obj.index.names)) or + len(obj.columns & obj.index.names)): + msg = "Overlapping names between the index and columns" + raise ValueError(msg) + + obj = obj.copy() + timedeltas = obj.select_dtypes(include=['timedelta']).columns + if len(timedeltas): + obj[timedeltas] = obj[timedeltas].applymap( + lambda x: x.isoformat()) + # Convert PeriodIndex to datetimes before serialzing + if is_period_dtype(obj.index): + obj.index = obj.index.to_timestamp() + + self.obj = obj.reset_index() + self.date_format = 'iso' + self.orient = 'records' + + def write(self): + data = super(JSONTableWriter, self).write() + serialized = '{{"schema": {}, "data": {}}}'.format( + dumps(self.schema), data) + return serialized + + def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True, convert_axes=True, convert_dates=True, keep_default_dates=True, numpy=False, precise_float=False, date_unit=None, encoding=None, @@ -244,6 +300,17 @@ def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True, col 1 col 2 0 a b 1 c d + + Encoding with Table Schema + + >>> df.to_json(orient='table') + '{"schema": {"fields": [{"name": "index", "type": "string"}, + {"name": "col 1", "type": "string"}, + {"name": "col 2", "type": "string"}], + "primaryKey": "index", + "pandas_version": "0.20.0"}, + "data": [{"index": "row 1", "col 1": "a", "col 2": "b"}, + {"index": "row 2", "col 1": "c", "col 2": "d"}]}' """ filepath_or_buffer, _, _ = get_filepath_or_buffer(path_or_buf, diff --git a/pandas/io/json/table_schema.py b/pandas/io/json/table_schema.py new file mode 100644 index 0000000000000..48f92d28baf61 --- /dev/null +++ b/pandas/io/json/table_schema.py @@ -0,0 +1,177 @@ +""" +Table Schema builders + +http://specs.frictionlessdata.io/json-table-schema/ +""" +from pandas.types.common import ( + is_integer_dtype, is_timedelta64_dtype, is_numeric_dtype, + is_bool_dtype, is_datetime64_dtype, is_datetime64tz_dtype, + is_categorical_dtype, is_period_dtype, is_string_dtype +) + + +def as_json_table_type(x): + """ + Convert a NumPy / pandas type to its corresponding json_table. + + Parameters + ---------- + x : array or dtype + + Returns + ------- + t : str + the Table Schema data types + + Notes + ----- + This table shows the relationship between NumPy / pandas dtypes, + and Table Schema dtypes. + + ============== ================= + Pandas type Table Schema type + ============== ================= + int64 integer + float64 number + bool boolean + datetime64[ns] datetime + timedelta64[ns] duration + object str + categorical any + =============== ================= + """ + if is_integer_dtype(x): + return 'integer' + elif is_bool_dtype(x): + return 'boolean' + elif is_numeric_dtype(x): + return 'number' + elif (is_datetime64_dtype(x) or is_datetime64tz_dtype(x) or + is_period_dtype(x)): + return 'datetime' + elif is_timedelta64_dtype(x): + return 'duration' + elif is_categorical_dtype(x): + return 'any' + elif is_string_dtype(x): + return 'string' + else: + return 'any' + + +def set_default_names(data): + """Sets index names to 'index' for regular, or 'level_x' for Multi""" + if all(name is not None for name in data.index.names): + return data + + data = data.copy() + if data.index.nlevels > 1: + names = [name if name is not None else 'level_{}'.format(i) + for i, name in enumerate(data.index.names)] + data.index.names = names + else: + data.index.name = data.index.name or 'index' + return data + + +def make_field(arr, dtype=None): + dtype = dtype or arr.dtype + field = {'name': arr.name or 'values', + 'type': as_json_table_type(dtype)} + + if is_categorical_dtype(arr): + if hasattr(arr, 'categories'): + cats = arr.categories + ordered = arr.ordered + else: + cats = arr.cat.categories + ordered = arr.cat.ordered + field['constraints'] = {"enum": list(cats)} + field['ordered'] = ordered + elif is_period_dtype(arr): + field['freq'] = arr.freqstr + elif is_datetime64tz_dtype(arr): + if hasattr(arr, 'dt'): + field['tz'] = arr.dt.tz.zone + else: + field['tz'] = arr.tz.zone + return field + + +def build_table_schema(data, index=True, primary_key=None, version=True): + """ + Create a Table schema from ``data``. + + Parameters + ---------- + data : Series, DataFrame + index : bool, default True + Whether to include ``data.index`` in the schema. + primary_key : bool or None, default True + column names to designate as the primary key. + The default `None` will set `'primaryKey'` to the index + level or levels if the index is unique. + version : bool, default True + Whether to include a field `pandas_version` with the version + of pandas that generated the schema. + + Returns + ------- + schema : dict + + Examples + -------- + >>> df = pd.DataFrame( + ... {'A': [1, 2, 3], + ... 'B': ['a', 'b', 'c'], + ... 'C': pd.date_range('2016-01-01', freq='d', periods=3), + ... }, index=pd.Index(range(3), name='idx')) + >>> build_table_schema(df) + {'fields': [{'name': 'idx', 'type': 'integer'}, + {'name': 'A', 'type': 'integer'}, + {'name': 'B', 'type': 'string'}, + {'name': 'C', 'type': 'datetime'}], + 'pandas_version': '0.20.0', + 'primaryKey': ['idx']} + + Notes + ----- + See `_as_json_table_type` for conversion types. + Timedeltas as converted to ISO8601 duration format with + 9 decimal places after the secnods field for nanosecond precision. + + Categoricals are converted to the `any` dtype, and use the `enum` field + constraint to list the allowed values. The `ordered` attribute is included + in an `ordered` field. + """ + if index is True: + data = set_default_names(data) + + schema = {} + fields = [] + + if index: + if data.index.nlevels > 1: + for level in data.index.levels: + fields.append(make_field(level)) + else: + fields.append(make_field(data.index)) + + if data.ndim > 1: + for column, s in data.iteritems(): + fields.append(make_field(s)) + else: + fields.append(make_field(data)) + + schema['fields'] = fields + if index and data.index.is_unique and primary_key is None: + if data.index.nlevels == 1: + schema['primaryKey'] = [data.index.name] + else: + schema['primaryKey'] = data.index.names + elif primary_key is not None: + schema['primaryKey'] = primary_key + + if version: + schema['pandas_version'] = '0.20.0' + return schema diff --git a/pandas/tests/formats/test_printing.py b/pandas/tests/formats/test_printing.py index 52f3e06c6cbd0..cacba2ad3f3ba 100644 --- a/pandas/tests/formats/test_printing.py +++ b/pandas/tests/formats/test_printing.py @@ -1,5 +1,7 @@ # -*- coding: utf-8 -*- +import pytest from pandas import compat +import pandas as pd import pandas.formats.printing as printing import pandas.formats.format as fmt import pandas.util.testing as tm @@ -118,6 +120,65 @@ def test_ambiguous_width(self): self.assertEqual(adjoined, expected) +class TestTableSchemaRepr(tm.TestCase): + + @classmethod + def setUpClass(cls): + pytest.importorskip('IPython') + try: + import mock + except ImportError: + try: + from unittest import mock + except ImportError: + pytest.skip("Mock is not installed") + cls.mock = mock + + def test_publishes(self): + df = pd.DataFrame({"A": [1, 2]}) + objects = [df['A'], df, df] # dataframe / series + expected_keys = [ + {'text/plain', 'application/vnd.dataresource+json'}, + {'text/plain', 'text/html', 'application/vnd.dataresource+json'}, + ] + + make_patch = self.mock.patch('IPython.display.display') + opt = pd.option_context('display.html.table_schema', True) + for obj, expected in zip(objects, expected_keys): + with opt, make_patch as mock_display: + handle = obj._ipython_display_() + self.assertEqual(mock_display.call_count, 1) + self.assertIsNone(handle) + args, kwargs = mock_display.call_args + arg, = args # just one argument + + self.assertEqual(kwargs, {"raw": True}) + self.assertEqual(set(arg.keys()), expected) + + with_latex = pd.option_context('display.latex.repr', True) + + with opt, with_latex, make_patch as mock_display: + handle = obj._ipython_display_() + args, kwargs = mock_display.call_args + arg, = args + + expected = {'text/plain', 'text/html', 'text/latex', + 'application/vnd.dataresource+json'} + self.assertEqual(set(arg.keys()), expected) + + def test_config_on(self): + df = pd.DataFrame({"A": [1, 2]}) + with pd.option_context("display.html.table_schema", True): + result = df._repr_table_schema_() + self.assertIsNotNone(result) + + def test_config_default_off(self): + df = pd.DataFrame({"A": [1, 2]}) + with pd.option_context("display.html.table_schema", False): + result = df._repr_table_schema_() + self.assertIsNone(result) + + # TODO: fix this broken test # def test_console_encode(): diff --git a/pandas/tests/io/json/test_json_table_schema.py b/pandas/tests/io/json/test_json_table_schema.py new file mode 100644 index 0000000000000..d1795f2816817 --- /dev/null +++ b/pandas/tests/io/json/test_json_table_schema.py @@ -0,0 +1,462 @@ +"""Tests for Table Schema integration.""" +import json +from collections import OrderedDict + +import numpy as np +import pandas as pd +import pytest + +from pandas import DataFrame +from pandas.types.dtypes import PeriodDtype, CategoricalDtype, DatetimeTZDtype +import pandas.util.testing as tm +from pandas.io.json.table_schema import ( + as_json_table_type, build_table_schema, make_field, set_default_names +) + + +class TestBuildSchema(tm.TestCase): + + def setUp(self): + self.df = DataFrame( + {'A': [1, 2, 3, 4], + 'B': ['a', 'b', 'c', 'c'], + 'C': pd.date_range('2016-01-01', freq='d', periods=4), + 'D': pd.timedelta_range('1H', periods=4, freq='T'), + }, + index=pd.Index(range(4), name='idx')) + + def test_build_table_schema(self): + result = build_table_schema(self.df, version=False) + expected = { + 'fields': [{'name': 'idx', 'type': 'integer'}, + {'name': 'A', 'type': 'integer'}, + {'name': 'B', 'type': 'string'}, + {'name': 'C', 'type': 'datetime'}, + {'name': 'D', 'type': 'duration'}, + ], + 'primaryKey': ['idx'] + } + self.assertEqual(result, expected) + result = build_table_schema(self.df) + self.assertTrue("pandas_version" in result) + + def test_series(self): + s = pd.Series([1, 2, 3], name='foo') + result = build_table_schema(s, version=False) + expected = {'fields': [{'name': 'index', 'type': 'integer'}, + {'name': 'foo', 'type': 'integer'}], + 'primaryKey': ['index']} + self.assertEqual(result, expected) + result = build_table_schema(s) + self.assertTrue('pandas_version' in result) + + def tets_series_unnamed(self): + result = build_table_schema(pd.Series([1, 2, 3]), version=False) + expected = {'fields': [{'name': 'index', 'type': 'integer'}, + {'name': 'values', 'type': 'integer'}], + 'primaryKey': ['index']} + self.assertEqual(result, expected) + + def test_multiindex(self): + df = self.df.copy() + idx = pd.MultiIndex.from_product([('a', 'b'), (1, 2)]) + df.index = idx + + result = build_table_schema(df, version=False) + expected = { + 'fields': [{'name': 'level_0', 'type': 'string'}, + {'name': 'level_1', 'type': 'integer'}, + {'name': 'A', 'type': 'integer'}, + {'name': 'B', 'type': 'string'}, + {'name': 'C', 'type': 'datetime'}, + {'name': 'D', 'type': 'duration'}, + ], + 'primaryKey': ['level_0', 'level_1'] + } + self.assertEqual(result, expected) + + df.index.names = ['idx0', None] + expected['fields'][0]['name'] = 'idx0' + expected['primaryKey'] = ['idx0', 'level_1'] + result = build_table_schema(df, version=False) + self.assertEqual(result, expected) + + +class TestTableSchemaType(tm.TestCase): + + def test_as_json_table_type_int_data(self): + int_data = [1, 2, 3] + int_types = [np.int, np.int16, np.int32, np.int64] + for t in int_types: + self.assertEqual(as_json_table_type(np.array(int_data, dtype=t)), + 'integer') + + def test_as_json_table_type_float_data(self): + float_data = [1., 2., 3.] + float_types = [np.float, np.float16, np.float32, np.float64] + for t in float_types: + self.assertEqual(as_json_table_type(np.array(float_data, + dtype=t)), + 'number') + + def test_as_json_table_type_bool_data(self): + bool_data = [True, False] + bool_types = [bool, np.bool] + for t in bool_types: + self.assertEqual(as_json_table_type(np.array(bool_data, dtype=t)), + 'boolean') + + def test_as_json_table_type_date_data(self): + date_data = [pd.to_datetime(['2016']), + pd.to_datetime(['2016'], utc=True), + pd.Series(pd.to_datetime(['2016'])), + pd.Series(pd.to_datetime(['2016'], utc=True)), + pd.period_range('2016', freq='A', periods=3)] + for arr in date_data: + self.assertEqual(as_json_table_type(arr), 'datetime') + + def test_as_json_table_type_string_data(self): + strings = [pd.Series(['a', 'b']), pd.Index(['a', 'b'])] + for t in strings: + self.assertEqual(as_json_table_type(t), 'string') + + def test_as_json_table_type_categorical_data(self): + self.assertEqual(as_json_table_type(pd.Categorical(['a'])), 'any') + self.assertEqual(as_json_table_type(pd.Categorical([1])), 'any') + self.assertEqual(as_json_table_type( + pd.Series(pd.Categorical([1]))), 'any') + self.assertEqual(as_json_table_type(pd.CategoricalIndex([1])), 'any') + self.assertEqual(as_json_table_type(pd.Categorical([1])), 'any') + + # ------ + # dtypes + # ------ + def test_as_json_table_type_int_dtypes(self): + integers = [np.int, np.int16, np.int32, np.int64] + for t in integers: + self.assertEqual(as_json_table_type(t), 'integer') + + def test_as_json_table_type_float_dtypes(self): + floats = [np.float, np.float16, np.float32, np.float64] + for t in floats: + self.assertEqual(as_json_table_type(t), 'number') + + def test_as_json_table_type_bool_dtypes(self): + bools = [bool, np.bool] + for t in bools: + self.assertEqual(as_json_table_type(t), 'boolean') + + def test_as_json_table_type_date_dtypes(self): + # TODO: datedate.date? datetime.time? + dates = [np.datetime64, np.dtype(" Date: Sat, 4 Mar 2017 15:14:36 +0100 Subject: [PATCH 135/933] DEPR/CLN: remove SparseTimeSeries class (follow-up GH15098) (#15567) --- doc/source/whatsnew/v0.20.0.txt | 2 ++ pandas/compat/pickle_compat.py | 3 ++- pandas/sparse/api.py | 2 +- pandas/sparse/series.py | 11 ----------- pandas/tests/api/test_api.py | 3 +-- pandas/tests/sparse/test_series.py | 6 ------ 6 files changed, 6 insertions(+), 21 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 7b4538bd181d2..eac187b52f65d 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -621,6 +621,8 @@ Removal of prior version deprecations/changes Similar functionality can be found in the `Google2Pandas `__ package. - ``pd.to_datetime`` and ``pd.to_timedelta`` have dropped the ``coerce`` parameter in favor of ``errors`` (:issue:`13602`) - ``pandas.stats.fama_macbeth``, ``pandas.stats.ols``, ``pandas.stats.plm`` and ``pandas.stats.var``, as well as the top-level ``pandas.fama_macbeth`` and ``pandas.ols`` routines are removed. Similar functionaility can be found in the `statsmodels `__ package. (:issue:`11898`) +- The ``TimeSeries`` and ``SparseTimeSeries`` classes, aliases of ``Series`` + and ``SparseSeries``, are removed (:issue:`10890`, :issue:`15098`). - ``Series.is_time_series`` is dropped in favor of ``Series.index.is_all_dates`` (:issue:``) - The deprecated ``irow``, ``icol``, ``iget`` and ``iget_value`` methods are removed in favor of ``iloc`` and ``iat`` as explained :ref:`here ` (:issue:`10711`). diff --git a/pandas/compat/pickle_compat.py b/pandas/compat/pickle_compat.py index b8ccd13c153d4..25a170c3eb121 100644 --- a/pandas/compat/pickle_compat.py +++ b/pandas/compat/pickle_compat.py @@ -61,7 +61,8 @@ def load_reduce(self): ('pandas.core.base', 'FrozenList'): ('pandas.indexes.frozen', 'FrozenList'), # 10890 - ('pandas.core.series', 'TimeSeries'): ('pandas.core.series', 'Series') + ('pandas.core.series', 'TimeSeries'): ('pandas.core.series', 'Series'), + ('pandas.sparse.series', 'SparseTimeSeries'): ('pandas.sparse.series', 'SparseSeries') } diff --git a/pandas/sparse/api.py b/pandas/sparse/api.py index 55841fbeffa2d..90be0a216535f 100644 --- a/pandas/sparse/api.py +++ b/pandas/sparse/api.py @@ -2,5 +2,5 @@ # flake8: noqa from pandas.sparse.array import SparseArray from pandas.sparse.list import SparseList -from pandas.sparse.series import SparseSeries, SparseTimeSeries +from pandas.sparse.series import SparseSeries from pandas.sparse.frame import SparseDataFrame diff --git a/pandas/sparse/series.py b/pandas/sparse/series.py index dfdbb3c89814a..a3b701169ce91 100644 --- a/pandas/sparse/series.py +++ b/pandas/sparse/series.py @@ -844,14 +844,3 @@ def from_coo(cls, A, dense_index=False): comp_method=_arith_method, bool_method=None, use_numexpr=False, force=True) - - -# backwards compatiblity -class SparseTimeSeries(SparseSeries): - - def __init__(self, *args, **kwargs): - # deprecation TimeSeries, #10890 - warnings.warn("SparseTimeSeries is deprecated. Please use " - "SparseSeries", FutureWarning, stacklevel=2) - - super(SparseTimeSeries, self).__init__(*args, **kwargs) diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index f2f7a9c778e66..2f8ebc4cc1df4 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -57,8 +57,7 @@ class TestPDApi(Base, tm.TestCase): 'TimedeltaIndex', 'Timestamp'] # these are already deprecated; awaiting removal - deprecated_classes = ['WidePanel', - 'SparseTimeSeries', 'Panel4D', + deprecated_classes = ['WidePanel', 'Panel4D', 'SparseList', 'Expr', 'Term'] # these should be deprecated in the future diff --git a/pandas/tests/sparse/test_series.py b/pandas/tests/sparse/test_series.py index d4543b97af4dd..de6636162ff05 100644 --- a/pandas/tests/sparse/test_series.py +++ b/pandas/tests/sparse/test_series.py @@ -112,12 +112,6 @@ def test_iteration_and_str(self): [x for x in self.bseries] str(self.bseries) - def test_TimeSeries_deprecation(self): - - # deprecation TimeSeries, #10890 - with tm.assert_produces_warning(FutureWarning): - pd.SparseTimeSeries(1, index=pd.date_range('20130101', periods=3)) - def test_construct_DataFrame_with_sp_series(self): # it works! df = DataFrame({'col': self.bseries}) From d6524850c8dea36ab37536e439999fd121b95429 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Sat, 4 Mar 2017 10:37:58 -0500 Subject: [PATCH 136/933] Revert FrozenList changes (doc build slowdown, #15559) See #15559. This temporarily reverts #15506, to see if this fixes the doc build slowdown. Author: Joris Van den Bossche Closes #15566 from jorisvandenbossche/revert and squashes the following commits: befd858 [Joris Van den Bossche] Revert "ENH: Added FrozenList difference setop" 527ded9 [Joris Van den Bossche] Revert "TST: remove deprecated usages of FrozenList.__add__ from test code" --- doc/source/groupby.rst | 10 ------- doc/source/whatsnew/v0.20.0.txt | 2 -- pandas/core/panel.py | 6 ++--- pandas/core/reshape.py | 6 ++--- pandas/core/strings.py | 2 +- pandas/indexes/frozen.py | 24 +++-------------- pandas/tests/groupby/test_value_counts.py | 2 +- pandas/tests/indexes/test_frozen.py | 33 +++++++---------------- pandas/tools/concat.py | 2 +- test_fast.sh | 2 +- 10 files changed, 22 insertions(+), 67 deletions(-) diff --git a/doc/source/groupby.rst b/doc/source/groupby.rst index 2d406de7c0c9b..8484ccd69a983 100644 --- a/doc/source/groupby.rst +++ b/doc/source/groupby.rst @@ -126,16 +126,6 @@ We could naturally group by either the ``A`` or ``B`` columns or both: grouped = df.groupby('A') grouped = df.groupby(['A', 'B']) -.. versionadded:: 0.20 - -If we also have a MultiIndex on columns ``A`` and ``B``, we can group by all -but the specified columns. - -.. ipython:: python - - df2 = df.set_index(['A', 'B']) - grouped = df2.groupby(level=df2.index.names.difference(['B']) - These will split the DataFrame on its index (rows). We could also split by the columns: diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index eac187b52f65d..1ba327a4ea50c 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -29,7 +29,6 @@ New features - Integration with the ``feather-format``, including a new top-level ``pd.read_feather()`` and ``DataFrame.to_feather()`` method, see :ref:`here `. - ``.str.replace`` now accepts a callable, as replacement, which is passed to ``re.sub`` (:issue:`15055`) -- ``FrozenList`` has gained the ``.difference()`` setop method (:issue:`15475`) @@ -601,7 +600,6 @@ Deprecations - ``Series.sortlevel`` and ``DataFrame.sortlevel`` have been deprecated in favor of ``Series.sort_index`` and ``DataFrame.sort_index`` (:issue:`15099`) - importing ``concat`` from ``pandas.tools.merge`` has been deprecated in favor of imports from the ``pandas`` namespace. This should only affect explict imports (:issue:`15358`) - ``Series/DataFrame/Panel.consolidate()`` been deprecated as a public method. (:issue:`15483`) -- ``FrozenList`` addition (new object and inplace) have been deprecated in favor of the ``.union()`` method. (:issue: `15475`) - The following top-level pandas functions have been deprecated and will be removed in a future version (:issue:`13790`) * ``pd.pnow()``, replaced by ``Period.now()`` * ``pd.Term``, is removed, as it is not applicable to user code. Instead use in-line string expressions in the where clause when searching in HDFStore diff --git a/pandas/core/panel.py b/pandas/core/panel.py index c5ea513223dce..4a6c6cf291316 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -940,9 +940,9 @@ def construct_index_parts(idx, major=True): minor_labels, minor_levels, minor_names = construct_index_parts( self.minor_axis, major=False) - levels = list(major_levels) + list(minor_levels) - labels = list(major_labels) + list(minor_labels) - names = list(major_names) + list(minor_names) + levels = major_levels + minor_levels + labels = major_labels + minor_labels + names = major_names + minor_names index = MultiIndex(levels=levels, labels=labels, names=names, verify_integrity=False) diff --git a/pandas/core/reshape.py b/pandas/core/reshape.py index faad6c500a21f..87cb088c2e91e 100644 --- a/pandas/core/reshape.py +++ b/pandas/core/reshape.py @@ -216,8 +216,8 @@ def get_new_columns(self): width = len(self.value_columns) propagator = np.repeat(np.arange(width), stride) if isinstance(self.value_columns, MultiIndex): - new_levels = self.value_columns.levels.union((self.removed_level,)) - new_names = self.value_columns.names.union((self.removed_name,)) + new_levels = self.value_columns.levels + (self.removed_level,) + new_names = self.value_columns.names + (self.removed_name,) new_labels = [lab.take(propagator) for lab in self.value_columns.labels] @@ -806,7 +806,7 @@ def melt(frame, id_vars=None, value_vars=None, var_name=None, for col in id_vars: mdata[col] = np.tile(frame.pop(col).values, K) - mcolumns = list(id_vars) + list(var_name) + list([value_name]) + mcolumns = id_vars + var_name + [value_name] mdata[value_name] = frame.values.ravel('F') for i, col in enumerate(var_name): diff --git a/pandas/core/strings.py b/pandas/core/strings.py index 51016926d6909..ac8d1db6a0bf3 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -787,7 +787,7 @@ def str_extractall(arr, pat, flags=0): if 0 < len(index_list): from pandas import MultiIndex index = MultiIndex.from_tuples( - index_list, names=arr.index.names.union(["match"])) + index_list, names=arr.index.names + ["match"]) else: index = None result = arr._constructor_expanddim(match_list, index=index, diff --git a/pandas/indexes/frozen.py b/pandas/indexes/frozen.py index 47e2557333ec7..e043ba64bbad7 100644 --- a/pandas/indexes/frozen.py +++ b/pandas/indexes/frozen.py @@ -13,8 +13,6 @@ from pandas.types.cast import _coerce_indexer_dtype from pandas.formats.printing import pprint_thing -import warnings - class FrozenList(PandasObject, list): @@ -27,14 +25,11 @@ class FrozenList(PandasObject, list): # typechecks def __add__(self, other): - warnings.warn("__add__ is deprecated, use union(...)", FutureWarning) - return self.union(other) - - def __iadd__(self, other): - warnings.warn("__iadd__ is deprecated, use union(...)", FutureWarning) if isinstance(other, tuple): other = list(other) - return super(FrozenList, self).__iadd__(other) + return self.__class__(super(FrozenList, self).__add__(other)) + + __iadd__ = __add__ # Python 2 compat def __getslice__(self, i, j): @@ -85,19 +80,6 @@ def __repr__(self): __setitem__ = __setslice__ = __delitem__ = __delslice__ = _disabled pop = append = extend = remove = sort = insert = _disabled - def union(self, other): - """Returns a FrozenList with other concatenated to the end of self""" - if isinstance(other, tuple): - other = list(other) - return self.__class__(super(FrozenList, self).__add__(other)) - - def difference(self, other): - """Returns a FrozenList with the same elements as self, but with elements - that are also in other removed.""" - other = set(other) - temp = [x for x in self if x not in other] - return self.__class__(temp) - class FrozenNDArray(PandasObject, np.ndarray): diff --git a/pandas/tests/groupby/test_value_counts.py b/pandas/tests/groupby/test_value_counts.py index ff01df2693c7c..801d0da070112 100644 --- a/pandas/tests/groupby/test_value_counts.py +++ b/pandas/tests/groupby/test_value_counts.py @@ -28,7 +28,7 @@ def check_value_counts(df, keys, bins): gr = df.groupby(keys, sort=isort) right = gr['3rd'].apply(Series.value_counts, **kwargs) - right.index.names = right.index.names[:-1].union(['3rd']) + right.index.names = right.index.names[:-1] + ['3rd'] # have to sort on index because of unstable sort on values left, right = map(rebuild_index, (left, right)) # xref GH9212 diff --git a/pandas/tests/indexes/test_frozen.py b/pandas/tests/indexes/test_frozen.py index a5fbf066adc83..a82409fbf9513 100644 --- a/pandas/tests/indexes/test_frozen.py +++ b/pandas/tests/indexes/test_frozen.py @@ -15,35 +15,20 @@ def setUp(self): self.klass = FrozenList def test_add(self): - q = FrozenList([1]) - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): - q = q + [2, 3] - expected = FrozenList([1, 2, 3]) - self.check_result(q, expected) - - def test_iadd(self): - q = FrozenList([1]) - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): - q += [2, 3] - expected = FrozenList([1, 2, 3]) - self.check_result(q, expected) - - def test_union(self): - result = self.container.union((1, 2, 3)) + result = self.container + (1, 2, 3) expected = FrozenList(self.lst + [1, 2, 3]) self.check_result(result, expected) - def test_difference(self): - result = self.container.difference([2]) - expected = FrozenList([1, 3, 4, 5]) + result = (1, 2, 3) + self.container + expected = FrozenList([1, 2, 3] + self.lst) self.check_result(result, expected) - def test_difference_dupe(self): - result = FrozenList([1, 2, 3, 2]).difference([2]) - expected = FrozenList([1, 3]) - self.check_result(result, expected) + def test_inplace(self): + q = r = self.container + q += [5] + self.check_result(q, self.lst + [5]) + # other shouldn't be mutated + self.check_result(r, self.lst) class TestFrozenNDArray(CheckImmutable, CheckStringMixin, tm.TestCase): diff --git a/pandas/tools/concat.py b/pandas/tools/concat.py index ae9d7af9d98ff..6405106118472 100644 --- a/pandas/tools/concat.py +++ b/pandas/tools/concat.py @@ -574,7 +574,7 @@ def _make_concat_multiindex(indexes, keys, levels=None, names=None): " not have the same number of levels") # also copies - names = list(names) + list(_get_consensus_names(indexes)) + names = names + _get_consensus_names(indexes) return MultiIndex(levels=levels, labels=label_list, names=names, verify_integrity=False) diff --git a/test_fast.sh b/test_fast.sh index f22ab73277e8b..30ac7f84cbe8b 100755 --- a/test_fast.sh +++ b/test_fast.sh @@ -5,4 +5,4 @@ # https://github.com/pytest-dev/pytest/issues/1075 export PYTHONHASHSEED=$(python -c 'import random; print(random.randint(1, 4294967295))') -pytest pandas --skip-slow --skip-network -m "not single" -n 4 $@ +pytest pandas --skip-slow --skip-network -m "not single" -n 4 From 5f0b69aee3622eed9392cef163e4b31ba742498e Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sat, 4 Mar 2017 13:10:27 -0500 Subject: [PATCH 137/933] DEPR: silence some deprecation warnings --- pandas/tests/test_multilevel.py | 2 +- pandas/tests/test_panel.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index 0f36af2c8c4e7..c809b39bb566e 100755 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -1646,7 +1646,7 @@ def test_multilevel_consolidate(self): 'bar', 'one'), ('bar', 'two')]) df = DataFrame(np.random.randn(4, 4), index=index, columns=index) df['Totals', ''] = df.sum(1) - df = df.consolidate() + df = df._consolidate() def test_ix_preserve_names(self): result = self.ymd.loc[2000] diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index 2f329f241a5b8..373f590cbf9eb 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -688,7 +688,7 @@ def test_ix_setitem_slice_dataframe(self): def test_ix_align(self): from pandas import Series b = Series(np.random.randn(10), name=0) - b.sort() + b.sort_values() df_orig = Panel(np.random.randn(3, 10, 2)) df = df_orig.copy() @@ -1001,7 +1001,7 @@ def test_consolidate(self): self.panel['foo'] = 1. self.assertFalse(self.panel._data.is_consolidated()) - panel = self.panel.consolidate() + panel = self.panel._consolidate() self.assertTrue(panel._data.is_consolidated()) def test_ctor_dict(self): From ca6d88b7367de415770bf2c171887c5bece38d9f Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Sat, 4 Mar 2017 16:04:15 -0500 Subject: [PATCH 138/933] CLN: clean up PeriodIndex constructor closes #13232 Material clean up of PeriodIndex constructor, which was doing a few weird things (https://github.com/pydata/pandas/issues/13232#issuecomme nt-220788816), and generally getting messy. Author: Maximilian Roos Closes #13277 from MaximilianR/period-float and squashes the following commits: 5cae7aa [Maximilian Roos] @jreback changes 75ff54d [Maximilian Roos] _new_PeriodIndex for unpickling 240172f [Maximilian Roos] coerce freq object earlier for perf ba5133b [Maximilian Roos] documentation b0fc0a7 [Maximilian Roos] final changes fa0fa9d [Maximilian Roos] clean up PeriodIndex constructor --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/core/algorithms.py | 4 +- pandas/indexes/base.py | 5 + pandas/io/packers.py | 2 +- .../tests/indexes/period/test_construction.py | 9 +- pandas/tests/indexes/period/test_period.py | 6 + pandas/tseries/period.py | 156 +++++++++--------- setup.cfg | 1 + 8 files changed, 98 insertions(+), 86 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 1ba327a4ea50c..ca093eca30511 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -657,6 +657,7 @@ Bug Fixes - Bug in ``DataFrame.sort_values()`` when sorting by multiple columns where one column is of type ``int64`` and contains ``NaT`` (:issue:`14922`) - Bug in ``DataFrame.reindex()`` in which ``method`` was ignored when passing ``columns`` (:issue:`14992`) - Bug in ``pd.to_numeric()`` in which float and unsigned integer elements were being improperly casted (:issue:`14941`, :issue:`15005`) +- Cleaned up ``PeriodIndex`` constructor, including raising on floats more consistently (:issue:`13277`) - Bug in ``pd.read_csv()`` in which the ``dialect`` parameter was not being verified before processing (:issue:`14898`) - Bug in ``pd.read_fwf`` where the skiprows parameter was not being respected during column width inference (:issue:`11256`) - Bug in ``pd.read_csv()`` in which missing data was being improperly handled with ``usecols`` (:issue:`6710`) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 55d404f05dd1d..d37c98c9b9b90 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -471,8 +471,8 @@ def _value_counts_arraylike(values, dropna=True): # dtype handling if is_datetimetz_type: keys = DatetimeIndex._simple_new(keys, tz=orig.dtype.tz) - if is_period_type: - keys = PeriodIndex._simple_new(keys, freq=freq) + elif is_period_type: + keys = PeriodIndex._from_ordinals(keys, freq=freq) elif is_signed_integer_dtype(dtype): values = _ensure_int64(values) diff --git a/pandas/indexes/base.py b/pandas/indexes/base.py index 5d43d2d32af67..e441d9a88690d 100644 --- a/pandas/indexes/base.py +++ b/pandas/indexes/base.py @@ -88,6 +88,11 @@ def _new_Index(cls, d): """ This is called upon unpickling, rather than the default which doesn't have arguments and breaks __new__ """ + # required for backward compat, because PI can't be instantiated with + # ordinals through __new__ GH #13277 + if issubclass(cls, ABCPeriodIndex): + from pandas.tseries.period import _new_PeriodIndex + return _new_PeriodIndex(cls, **d) return cls.__new__(cls, **d) diff --git a/pandas/io/packers.py b/pandas/io/packers.py index 7afe8a06b6af1..39bc1a4ecf225 100644 --- a/pandas/io/packers.py +++ b/pandas/io/packers.py @@ -573,7 +573,7 @@ def decode(obj): elif typ == u'period_index': data = unconvert(obj[u'data'], np.int64, obj.get(u'compress')) d = dict(name=obj[u'name'], freq=obj[u'freq']) - return globals()[obj[u'klass']](data, **d) + return globals()[obj[u'klass']]._from_ordinals(data, **d) elif typ == u'datetime_index': data = unconvert(obj[u'data'], np.int64, obj.get(u'compress')) d = dict(name=obj[u'name'], freq=obj[u'freq'], verify_integrity=False) diff --git a/pandas/tests/indexes/period/test_construction.py b/pandas/tests/indexes/period/test_construction.py index 228615829b5b8..f13a84f4f0e92 100644 --- a/pandas/tests/indexes/period/test_construction.py +++ b/pandas/tests/indexes/period/test_construction.py @@ -120,7 +120,7 @@ def test_constructor_fromarraylike(self): self.assertRaises(ValueError, PeriodIndex, idx._values) self.assertRaises(ValueError, PeriodIndex, list(idx._values)) - self.assertRaises(ValueError, PeriodIndex, + self.assertRaises(TypeError, PeriodIndex, data=Period('2007', freq='A')) result = PeriodIndex(iter(idx)) @@ -285,12 +285,15 @@ def test_constructor_simple_new_empty(self): result = idx._simple_new(idx, name='p', freq='M') tm.assert_index_equal(result, idx) - def test_constructor_simple_new_floats(self): + def test_constructor_floats(self): # GH13079 - for floats in [[1.1], np.array([1.1])]: + for floats in [[1.1, 2.1], np.array([1.1, 2.1])]: with self.assertRaises(TypeError): pd.PeriodIndex._simple_new(floats, freq='M') + with self.assertRaises(TypeError): + pd.PeriodIndex(floats, freq='M') + def test_constructor_nat(self): self.assertRaises(ValueError, period_range, start='NaT', end='2011-01-01', freq='M') diff --git a/pandas/tests/indexes/period/test_period.py b/pandas/tests/indexes/period/test_period.py index b80ab6feeeb23..1739211982b10 100644 --- a/pandas/tests/indexes/period/test_period.py +++ b/pandas/tests/indexes/period/test_period.py @@ -53,6 +53,12 @@ def test_astype_raises(self): def test_pickle_compat_construction(self): pass + def test_pickle_round_trip(self): + for freq in ['D', 'M', 'Y']: + idx = PeriodIndex(['2016-05-16', 'NaT', NaT, np.NaN], freq='D') + result = self.round_trip_pickle(idx) + tm.assert_index_equal(result, idx) + def test_get_loc(self): idx = pd.period_range('2000-01-01', periods=3) diff --git a/pandas/tseries/period.py b/pandas/tseries/period.py index 6e499924730b3..bfe7724a1cfaa 100644 --- a/pandas/tseries/period.py +++ b/pandas/tseries/period.py @@ -17,7 +17,6 @@ is_period_dtype, is_bool_dtype, pandas_dtype, - _ensure_int64, _ensure_object) from pandas.types.dtypes import PeriodDtype from pandas.types.generic import ABCSeries @@ -114,6 +113,13 @@ def wrapper(self, other): return wrapper +def _new_PeriodIndex(cls, **d): + # GH13277 for unpickling + if d['data'].dtype == 'int64': + values = d.pop('data') + return cls._from_ordinals(values=values, **d) + + class PeriodIndex(DatelikeOps, DatetimeIndexOpsMixin, Int64Index): """ Immutable ndarray holding ordinal values indicating regular periods in @@ -209,17 +215,57 @@ def __new__(cls, data=None, ordinal=None, freq=None, start=None, end=None, msg = 'specified freq and dtype are different' raise IncompatibleFrequency(msg) + # coerce freq to freq object, otherwise it can be coerced elementwise + # which is slow + if freq: + freq = Period._maybe_convert_freq(freq) + if data is None: if ordinal is not None: data = np.asarray(ordinal, dtype=np.int64) else: data, freq = cls._generate_range(start, end, periods, freq, kwargs) - else: - ordinal, freq = cls._from_arraylike(data, freq, tz) - data = np.array(ordinal, dtype=np.int64, copy=copy) + return cls._from_ordinals(data, name=name, freq=freq) - return cls._simple_new(data, name=name, freq=freq) + if isinstance(data, PeriodIndex): + if freq is None or freq == data.freq: # no freq change + freq = data.freq + data = data._values + else: + base1, _ = _gfc(data.freq) + base2, _ = _gfc(freq) + data = period.period_asfreq_arr(data._values, + base1, base2, 1) + return cls._simple_new(data, name=name, freq=freq) + + # not array / index + if not isinstance(data, (np.ndarray, PeriodIndex, + DatetimeIndex, Int64Index)): + if is_scalar(data) or isinstance(data, Period): + cls._scalar_data_error(data) + + # other iterable of some kind + if not isinstance(data, (list, tuple)): + data = list(data) + + data = np.asarray(data) + + # datetime other than period + if is_datetime64_dtype(data.dtype): + data = dt64arr_to_periodarr(data, freq, tz) + return cls._from_ordinals(data, name=name, freq=freq) + + # check not floats + if infer_dtype(data) == 'floating' and len(data) > 0: + raise TypeError("PeriodIndex does not allow " + "floating point in construction") + + # anything else, likely an array of strings or periods + data = _ensure_object(data) + freq = freq or period.extract_freq(data) + data = period.extract_ordinals(data, freq) + return cls._from_ordinals(data, name=name, freq=freq) @classmethod def _generate_range(cls, start, end, periods, freq, fields): @@ -240,77 +286,26 @@ def _generate_range(cls, start, end, periods, freq, fields): return subarr, freq - @classmethod - def _from_arraylike(cls, data, freq, tz): - if freq is not None: - freq = Period._maybe_convert_freq(freq) - - if not isinstance(data, (np.ndarray, PeriodIndex, - DatetimeIndex, Int64Index)): - if is_scalar(data) or isinstance(data, Period): - raise ValueError('PeriodIndex() must be called with a ' - 'collection of some kind, %s was passed' - % repr(data)) - - # other iterable of some kind - if not isinstance(data, (list, tuple)): - data = list(data) - - try: - data = _ensure_int64(data) - if freq is None: - raise ValueError('freq not specified') - data = np.array([Period(x, freq=freq) for x in data], - dtype=np.int64) - except (TypeError, ValueError): - data = _ensure_object(data) - - if freq is None: - freq = period.extract_freq(data) - data = period.extract_ordinals(data, freq) - else: - if isinstance(data, PeriodIndex): - if freq is None or freq == data.freq: - freq = data.freq - data = data._values - else: - base1, _ = _gfc(data.freq) - base2, _ = _gfc(freq) - data = period.period_asfreq_arr(data._values, - base1, base2, 1) - else: - if is_object_dtype(data): - inferred = infer_dtype(data) - if inferred == 'integer': - data = data.astype(np.int64) - - if freq is None and is_object_dtype(data): - # must contain Period instance and thus extract ordinals - freq = period.extract_freq(data) - data = period.extract_ordinals(data, freq) - - if freq is None: - msg = 'freq not specified and cannot be inferred' - raise ValueError(msg) - - if data.dtype != np.int64: - if np.issubdtype(data.dtype, np.datetime64): - data = dt64arr_to_periodarr(data, freq, tz) - else: - data = _ensure_object(data) - data = period.extract_ordinals(data, freq) - - return data, freq - @classmethod def _simple_new(cls, values, name=None, freq=None, **kwargs): - + """ + Values can be any type that can be coerced to Periods. + Ordinals in an ndarray are fastpath-ed to `_from_ordinals` + """ if not is_integer_dtype(values): values = np.array(values, copy=False) - if (len(values) > 0 and is_float_dtype(values)): + if len(values) > 0 and is_float_dtype(values): raise TypeError("PeriodIndex can't take floats") - else: - return cls(values, name=name, freq=freq, **kwargs) + return cls(values, name=name, freq=freq, **kwargs) + + return cls._from_ordinals(values, name, freq, **kwargs) + + @classmethod + def _from_ordinals(cls, values, name=None, freq=None, **kwargs): + """ + Values should be int ordinals + `__new__` & `_simple_new` cooerce to ordinals and call this method + """ values = np.array(values, dtype='int64', copy=False) @@ -318,7 +313,7 @@ def _simple_new(cls, values, name=None, freq=None, **kwargs): result._data = values result.name = name if freq is None: - raise ValueError('freq is not specified') + raise ValueError('freq is not specified and cannot be inferred') result.freq = Period._maybe_convert_freq(freq) result._reset_identity() return result @@ -327,13 +322,13 @@ def _shallow_copy_with_infer(self, values=None, **kwargs): """ we always want to return a PeriodIndex """ return self._shallow_copy(values=values, **kwargs) - def _shallow_copy(self, values=None, **kwargs): - if kwargs.get('freq') is None: - # freq must be provided - kwargs['freq'] = self.freq + def _shallow_copy(self, values=None, freq=None, **kwargs): + if freq is None: + freq = self.freq if values is None: values = self._values - return super(PeriodIndex, self)._shallow_copy(values=values, **kwargs) + return super(PeriodIndex, self)._shallow_copy(values=values, + freq=freq, **kwargs) def _coerce_scalar_to_index(self, item): """ @@ -413,7 +408,7 @@ def __array_wrap__(self, result, context=None): return result # the result is object dtype array of Period # cannot pass _simple_new as it is - return PeriodIndex(result, freq=self.freq, name=self.name) + return self._shallow_copy(result, freq=self.freq, name=self.name) @property def _box_func(self): @@ -708,7 +703,7 @@ def shift(self, n): values = self._values + n * self.freq.n if self.hasnans: values[self._isnan] = tslib.iNaT - return PeriodIndex(data=values, name=self.name, freq=self.freq) + return self._shallow_copy(values=values) @cache_readonly def dtype(self): @@ -945,7 +940,8 @@ def _wrap_union_result(self, other, result): def _apply_meta(self, rawarr): if not isinstance(rawarr, PeriodIndex): - rawarr = PeriodIndex(rawarr, freq=self.freq) + rawarr = PeriodIndex._from_ordinals(rawarr, freq=self.freq, + name=self.name) return rawarr def _format_native_types(self, na_rep=u('NaT'), date_format=None, diff --git a/setup.cfg b/setup.cfg index b9de7a3532209..8de4fc955bd50 100644 --- a/setup.cfg +++ b/setup.cfg @@ -13,6 +13,7 @@ parentdir_prefix = pandas- [flake8] ignore = E731,E402 +max-line-length = 79 [yapf] based_on_style = pep8 From ed2a2e49945478a170b97466bb94444a3353da21 Mon Sep 17 00:00:00 2001 From: Nicholas Ver Halen Date: Sat, 4 Mar 2017 16:38:35 -0500 Subject: [PATCH 139/933] BUG: pivot_table over Categorical Columns closes #15193 Author: Nicholas Ver Halen Closes #15511 from verhalenn/issue15193 and squashes the following commits: bf0fdeb [Nicholas Ver Halen] Added description to code change. adf8616 [Nicholas Ver Halen] Added whatsnew for issue 15193 a643267 [Nicholas Ver Halen] Added test for issue 15193 d605251 [Nicholas Ver Halen] Made sure pivot_table propped na columns --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/tests/tools/test_pivot.py | 33 ++++++++++++++++++++++++++++++++ pandas/tools/pivot.py | 4 ++++ 3 files changed, 38 insertions(+) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index ca093eca30511..f51ff4cd0c908 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -735,6 +735,7 @@ Bug Fixes - Bug in ``pd.merge_asof()`` where ``left_index``/``right_index`` together caused a failure when ``tolerance`` was specified (:issue:`15135`) +- Bug in ``DataFrame.pivot_table()`` where ``dropna=True`` would not drop all-NaN columns when the columns was a ``category`` dtype (:issue:`15193`) - Bug in ``pd.read_hdf()`` passing a ``Timestamp`` to the ``where`` parameter with a non date column (:issue:`15492`) diff --git a/pandas/tests/tools/test_pivot.py b/pandas/tests/tools/test_pivot.py index f5d91d0088306..62863372dbd02 100644 --- a/pandas/tests/tools/test_pivot.py +++ b/pandas/tests/tools/test_pivot.py @@ -86,6 +86,39 @@ def test_pivot_table_dropna(self): tm.assert_index_equal(pv_col.columns, m) tm.assert_index_equal(pv_ind.index, m) + def test_pivot_table_dropna_categoricals(self): + # GH 15193 + categories = ['a', 'b', 'c', 'd'] + + df = DataFrame({'A': ['a', 'a', 'a', 'b', 'b', 'b', 'c', 'c', 'c'], + 'B': [1, 2, 3, 1, 2, 3, 1, 2, 3], + 'C': range(0, 9)}) + + df['A'] = df['A'].astype('category', ordered=False, + categories=categories) + result_true = df.pivot_table(index='B', columns='A', values='C', + dropna=True) + expected_columns = Series(['a', 'b', 'c'], name='A') + expected_columns = expected_columns.astype('category', ordered=False, + categories=categories) + expected_index = Series([1, 2, 3], name='B') + expected_true = DataFrame([[0.0, 3.0, 6.0], + [1.0, 4.0, 7.0], + [2.0, 5.0, 8.0]], + index=expected_index, + columns=expected_columns,) + tm.assert_frame_equal(expected_true, result_true) + + result_false = df.pivot_table(index='B', columns='A', values='C', + dropna=False) + expected_columns = Series(['a', 'b', 'c', 'd'], name='A') + expected_false = DataFrame([[0.0, 3.0, 6.0, np.NaN], + [1.0, 4.0, 7.0, np.NaN], + [2.0, 5.0, 8.0, np.NaN]], + index=expected_index, + columns=expected_columns,) + tm.assert_frame_equal(expected_false, result_false) + def test_pass_array(self): result = self.data.pivot_table( 'D', index=self.data.A, columns=self.data.C) diff --git a/pandas/tools/pivot.py b/pandas/tools/pivot.py index 41fc705691a96..e23beb8332fd4 100644 --- a/pandas/tools/pivot.py +++ b/pandas/tools/pivot.py @@ -175,6 +175,10 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean', if len(index) == 0 and len(columns) > 0: table = table.T + # GH 15193 Makse sure empty columns are removed if dropna=True + if isinstance(table, DataFrame) and dropna: + table = table.dropna(how='all', axis=1) + return table From 0b776806b78421e377bb6c305d1e3a752f24e358 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Sat, 4 Mar 2017 22:58:32 +0100 Subject: [PATCH 140/933] DOC: fix build_table_schema docs (#15571) --- doc/source/io.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/io.rst b/doc/source/io.rst index c34cc1ec17512..c7a68a0fe9fbb 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -2090,7 +2090,7 @@ A few notes on the generated table schema: - All dates are converted to UTC when serializing. Even timezone naïve values, which are treated as UTC with an offset of 0. - .. ipython:: python: + .. ipython:: python from pandas.io.json import build_table_schema s = pd.Series(pd.date_range('2016', periods=4)) From c198e28e1cd187523d77386d607c5536bce024c5 Mon Sep 17 00:00:00 2001 From: chris-b1 Date: Sat, 4 Mar 2017 19:45:20 -0600 Subject: [PATCH 141/933] BUG: DataFrame.isin empty datetimelike (#15570) --- doc/source/whatsnew/v0.20.0.txt | 2 +- pandas/core/ops.py | 2 +- pandas/tests/frame/test_analytics.py | 21 +++++++++++++++++++++ 3 files changed, 23 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index f51ff4cd0c908..c29dfaba2604a 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -677,7 +677,7 @@ Bug Fixes - Bug in ``pd.read_msgpack()`` in which ``Series`` categoricals were being improperly processed (:issue:`14901`) - Bug in ``Series.ffill()`` with mixed dtypes containing tz-aware datetimes. (:issue:`14956`) - +- Bug in ``DataFrame.isin`` comparing datetimelike to empty frame (:issue:`15473`) - Bug in ``Series.where()`` and ``DataFrame.where()`` where array-like conditionals were being rejected (:issue:`15414`) - Bug in ``Series`` construction with a datetimetz (:issue:`14928`) diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 697a99f63f62f..6cc43cd9228f6 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -1249,7 +1249,7 @@ def na_op(x, y): result = op(x, y) except TypeError: xrav = x.ravel() - result = np.empty(x.size, dtype=x.dtype) + result = np.empty(x.size, dtype=bool) if isinstance(y, (np.ndarray, ABCSeries)): yrav = y.ravel() mask = notnull(xrav) & notnull(yrav) diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index 111195363beb2..4758ee1323ca0 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -1502,6 +1502,27 @@ def test_isin_multiIndex(self): result = df1.isin(df2) tm.assert_frame_equal(result, expected) + def test_isin_empty_datetimelike(self): + # GH 15473 + df1_ts = DataFrame({'date': + pd.to_datetime(['2014-01-01', '2014-01-02'])}) + df1_td = DataFrame({'date': + [pd.Timedelta(1, 's'), pd.Timedelta(2, 's')]}) + df2 = DataFrame({'date': []}) + df3 = DataFrame() + + expected = DataFrame({'date': [False, False]}) + + result = df1_ts.isin(df2) + tm.assert_frame_equal(result, expected) + result = df1_ts.isin(df3) + tm.assert_frame_equal(result, expected) + + result = df1_td.isin(df2) + tm.assert_frame_equal(result, expected) + result = df1_td.isin(df3) + tm.assert_frame_equal(result, expected) + # ---------------------------------------------------------------------- # Row deduplication From f5b7bcb4d6ccbc85450a8de3d443eeef11c57d93 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Sat, 4 Mar 2017 18:02:41 -0800 Subject: [PATCH 142/933] BUG: Groupby.cummin/max DataError on datetimes (#15561) (#15569) --- doc/source/whatsnew/v0.20.0.txt | 2 +- pandas/core/groupby.py | 4 ++-- pandas/tests/groupby/test_groupby.py | 10 +++++++++- 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index c29dfaba2604a..4e528daa6e876 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -635,7 +635,7 @@ Performance Improvements - Increased performance of ``pd.factorize()`` by releasing the GIL with ``object`` dtype when inferred as strings (:issue:`14859`) - Improved performance of timeseries plotting with an irregular DatetimeIndex (or with ``compat_x=True``) (:issue:`15073`). -- Improved performance of ``groupby().cummin()`` and ``groupby().cummax()`` (:issue:`15048`, :issue:`15109`) +- Improved performance of ``groupby().cummin()`` and ``groupby().cummax()`` (:issue:`15048`, :issue:`15109`, :issue:`15561`) - Improved performance and reduced memory when indexing with a ``MultiIndex`` (:issue:`15245`) - When reading buffer object in ``read_sas()`` method without specified format, filepath string is inferred rather than buffer object. (:issue:`14947`) - Improved performance of `rank()` for categorical data (:issue:`15498`) diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 578c334781d15..43c57a88b4d19 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -1442,7 +1442,7 @@ def cummin(self, axis=0, **kwargs): if axis != 0: return self.apply(lambda x: np.minimum.accumulate(x, axis)) - return self._cython_transform('cummin', **kwargs) + return self._cython_transform('cummin', numeric_only=False) @Substitution(name='groupby') @Appender(_doc_template) @@ -1451,7 +1451,7 @@ def cummax(self, axis=0, **kwargs): if axis != 0: return self.apply(lambda x: np.maximum.accumulate(x, axis)) - return self._cython_transform('cummax', **kwargs) + return self._cython_transform('cummax', numeric_only=False) @Substitution(name='groupby') @Appender(_doc_template) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 74e8c6c45946f..e846963732883 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -1954,7 +1954,8 @@ def test_arg_passthru(self): for attr in ['cummin', 'cummax']: f = getattr(df.groupby('group'), attr) result = f() - tm.assert_index_equal(result.columns, expected_columns_numeric) + # GH 15561: numeric_only=False set by default like min/max + tm.assert_index_equal(result.columns, expected_columns) result = f(numeric_only=False) tm.assert_index_equal(result.columns, expected_columns) @@ -4295,6 +4296,13 @@ def test_cummin_cummax(self): result = base_df.groupby('A').B.apply(lambda x: x.cummax()).to_frame() tm.assert_frame_equal(expected, result) + # GH 15561 + df = pd.DataFrame(dict(a=[1], b=pd.to_datetime(['2001']))) + expected = pd.Series(pd.to_datetime('2001'), index=[0], name='b') + for method in ['cummax', 'cummin']: + result = getattr(df.groupby('a')['b'], method)() + tm.assert_series_equal(expected, result) + def _check_groupby(df, result, keys, field, f=lambda x: x.sum()): tups = lmap(tuple, df[keys].values) From 0159dc2fa9ea6a6b4c17e01712d61dc4772cc965 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Sun, 5 Mar 2017 11:28:57 +0100 Subject: [PATCH 143/933] DOC: reset table_schema option after example (#15572) --- doc/source/options.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/doc/source/options.rst b/doc/source/options.rst index 1a0e5cf6b7235..1b219f640cc87 100644 --- a/doc/source/options.rst +++ b/doc/source/options.rst @@ -533,3 +533,9 @@ by default. False by default, this can be enabled globally with the pd.set_option('display.html.table_schema', True) Only ``'display.max_rows'`` are serialized and published. + + +.. ipython:: python + :suppress: + + pd.reset_option('display.html.table_schema') \ No newline at end of file From a00ad37c3c8c29f4dd13802e93af9fc4c2cd73a7 Mon Sep 17 00:00:00 2001 From: Rouz Azari Date: Sun, 5 Mar 2017 03:23:57 -0800 Subject: [PATCH 144/933] ENH: str.replace accepts a compiled expression (#15456) - Series.str.replace now accepts a compiled regular expression for `pat`. - Signature for .str.replace changed, but remains backwards compatible. See #15446 --- doc/source/text.rst | 21 +++++++++++ doc/source/whatsnew/v0.20.0.txt | 3 +- pandas/core/strings.py | 65 +++++++++++++++++++++++++-------- pandas/tests/test_strings.py | 59 ++++++++++++++++++++++++++++++ 4 files changed, 132 insertions(+), 16 deletions(-) diff --git a/doc/source/text.rst b/doc/source/text.rst index 52e05c5d511bc..2b2520cb6100f 100644 --- a/doc/source/text.rst +++ b/doc/source/text.rst @@ -164,6 +164,27 @@ positional argument (a regex object) and return a string. repl = lambda m: m.group('two').swapcase() pd.Series(['Foo Bar Baz', np.nan]).str.replace(pat, repl) +The ``replace`` method also accepts a compiled regular expression object +from :func:`re.compile` as a pattern. All flags should be included in the +compiled regular expression object. + +.. versionadded:: 0.20.0 + +.. ipython:: python + + import re + regex_pat = re.compile(r'^.a|dog', flags=re.IGNORECASE) + s3.str.replace(regex_pat, 'XX-XX ') + +Including a ``flags`` argument when calling ``replace`` with a compiled +regular expression object will raise a ``ValueError``. + +.. ipython:: + + @verbatim + In [1]: s3.str.replace(regex_pat, 'XX-XX ', flags=re.IGNORECASE) + --------------------------------------------------------------------------- + ValueError: case and flags cannot be set when pat is a compiled regex Indexing with ``.str`` ---------------------- diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 4e528daa6e876..fe9035106e4af 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -28,7 +28,8 @@ New features ~~~~~~~~~~~~ - Integration with the ``feather-format``, including a new top-level ``pd.read_feather()`` and ``DataFrame.to_feather()`` method, see :ref:`here `. -- ``.str.replace`` now accepts a callable, as replacement, which is passed to ``re.sub`` (:issue:`15055`) +- ``Series.str.replace()`` now accepts a callable, as replacement, which is passed to ``re.sub`` (:issue:`15055`) +- ``Series.str.replace()`` now accepts a compiled regular expression as a pattern (:issue:`15446`) diff --git a/pandas/core/strings.py b/pandas/core/strings.py index ac8d1db6a0bf3..46ba48b4cd846 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -9,7 +9,8 @@ is_string_like, is_list_like, is_scalar, - is_integer) + is_integer, + is_re) from pandas.core.common import _values_from_object from pandas.core.algorithms import take_1d @@ -303,7 +304,7 @@ def str_endswith(arr, pat, na=np.nan): return _na_map(f, arr, na, dtype=bool) -def str_replace(arr, pat, repl, n=-1, case=True, flags=0): +def str_replace(arr, pat, repl, n=-1, case=None, flags=0): """ Replace occurrences of pattern/regex in the Series/Index with some other string. Equivalent to :meth:`str.replace` or @@ -311,8 +312,12 @@ def str_replace(arr, pat, repl, n=-1, case=True, flags=0): Parameters ---------- - pat : string - Character sequence or regular expression + pat : string or compiled regex + String can be a character sequence or regular expression. + + .. versionadded:: 0.20.0 + `pat` also accepts a compiled regex. + repl : string or callable Replacement string or a callable. The callable is passed the regex match object and must return a replacement string to be used. @@ -323,15 +328,24 @@ def str_replace(arr, pat, repl, n=-1, case=True, flags=0): n : int, default -1 (all) Number of replacements to make from start - case : boolean, default True - If True, case sensitive + case : boolean, default None + - If True, case sensitive (the default if `pat` is a string) + - Set to False for case insensitive + - Cannot be set if `pat` is a compiled regex flags : int, default 0 (no flags) - re module flags, e.g. re.IGNORECASE + - re module flags, e.g. re.IGNORECASE + - Cannot be set if `pat` is a compiled regex Returns ------- replaced : Series/Index of objects + Notes + ----- + When `pat` is a compiled regex, all flags should be included in the + compiled regex. Use of `case` or `flags` with a compiled regex will + raise an error. + Examples -------- When `repl` is a string, every `pat` is replaced as with @@ -372,21 +386,42 @@ def str_replace(arr, pat, repl, n=-1, case=True, flags=0): 0 tWO 1 bAR dtype: object + + Using a compiled regex with flags + + >>> regex_pat = re.compile(r'FUZ', flags=re.IGNORECASE) + >>> pd.Series(['foo', 'fuz', np.nan]).str.replace(regex_pat, 'bar') + 0 foo + 1 bar + 2 NaN + dtype: object """ # Check whether repl is valid (GH 13438, GH 15055) if not (is_string_like(repl) or callable(repl)): raise TypeError("repl must be a string or callable") - use_re = not case or len(pat) > 1 or flags or callable(repl) - if use_re: - if not case: + is_compiled_re = is_re(pat) + if is_compiled_re: + if (case is not None) or (flags != 0): + raise ValueError("case and flags cannot be set" + " when pat is a compiled regex") + else: + # not a compiled regex + # set default case + if case is None: + case = True + + # add case flag, if provided + if case is False: flags |= re.IGNORECASE - regex = re.compile(pat, flags=flags) - n = n if n >= 0 else 0 - def f(x): - return regex.sub(repl, x, count=n) + use_re = is_compiled_re or len(pat) > 1 or flags or callable(repl) + + if use_re: + n = n if n >= 0 else 0 + regex = re.compile(pat, flags=flags) + f = lambda x: regex.sub(repl=repl, string=x, count=n) else: f = lambda x: x.replace(pat, repl, n) @@ -1558,7 +1593,7 @@ def match(self, pat, case=True, flags=0, na=np.nan, as_indexer=False): return self._wrap_result(result) @copy(str_replace) - def replace(self, pat, repl, n=-1, case=True, flags=0): + def replace(self, pat, repl, n=-1, case=None, flags=0): result = str_replace(self._data, pat, repl, n=n, case=case, flags=flags) return self._wrap_result(result) diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index ce97b09b7e3ca..f98cabbb70477 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -469,6 +469,65 @@ def test_replace_callable(self): exp = Series(['bAR', NA]) tm.assert_series_equal(result, exp) + def test_replace_compiled_regex(self): + # GH 15446 + values = Series(['fooBAD__barBAD', NA]) + + # test with compiled regex + pat = re.compile(r'BAD[_]*') + result = values.str.replace(pat, '') + exp = Series(['foobar', NA]) + tm.assert_series_equal(result, exp) + + # mixed + mixed = Series(['aBAD', NA, 'bBAD', True, datetime.today(), 'fooBAD', + None, 1, 2.]) + + rs = Series(mixed).str.replace(pat, '') + xp = Series(['a', NA, 'b', NA, NA, 'foo', NA, NA, NA]) + tm.assertIsInstance(rs, Series) + tm.assert_almost_equal(rs, xp) + + # unicode + values = Series([u('fooBAD__barBAD'), NA]) + + result = values.str.replace(pat, '') + exp = Series([u('foobar'), NA]) + tm.assert_series_equal(result, exp) + + result = values.str.replace(pat, '', n=1) + exp = Series([u('foobarBAD'), NA]) + tm.assert_series_equal(result, exp) + + # flags + unicode + values = Series([b"abcd,\xc3\xa0".decode("utf-8")]) + exp = Series([b"abcd, \xc3\xa0".decode("utf-8")]) + pat = re.compile(r"(?<=\w),(?=\w)", flags=re.UNICODE) + result = values.str.replace(pat, ", ") + tm.assert_series_equal(result, exp) + + # case and flags provided to str.replace will have no effect + # and will produce warnings + values = Series(['fooBAD__barBAD__bad', NA]) + pat = re.compile(r'BAD[_]*') + + with tm.assertRaisesRegexp(ValueError, "case and flags must be"): + result = values.str.replace(pat, '', flags=re.IGNORECASE) + + with tm.assertRaisesRegexp(ValueError, "case and flags must be"): + result = values.str.replace(pat, '', case=False) + + with tm.assertRaisesRegexp(ValueError, "case and flags must be"): + result = values.str.replace(pat, '', case=True) + + # test with callable + values = Series(['fooBAD__barBAD', NA]) + repl = lambda m: m.group(0).swapcase() + pat = re.compile('[a-z][A-Z]{2}') + result = values.str.replace(pat, repl, n=2) + exp = Series(['foObaD__baRbaD', NA]) + tm.assert_series_equal(result, exp) + def test_repeat(self): values = Series(['a', 'b', NA, 'c', NA, 'd']) From 84bbeae9f10d63fcd546c632649828621a80f64d Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Sun, 5 Mar 2017 13:01:53 +0100 Subject: [PATCH 145/933] TST: fix test str_replace error messge (#15456) --- pandas/tests/test_strings.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index f98cabbb70477..f8ce0070b2c78 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -511,13 +511,13 @@ def test_replace_compiled_regex(self): values = Series(['fooBAD__barBAD__bad', NA]) pat = re.compile(r'BAD[_]*') - with tm.assertRaisesRegexp(ValueError, "case and flags must be"): + with tm.assertRaisesRegexp(ValueError, "case and flags cannot be"): result = values.str.replace(pat, '', flags=re.IGNORECASE) - with tm.assertRaisesRegexp(ValueError, "case and flags must be"): + with tm.assertRaisesRegexp(ValueError, "case and flags cannot be"): result = values.str.replace(pat, '', case=False) - with tm.assertRaisesRegexp(ValueError, "case and flags must be"): + with tm.assertRaisesRegexp(ValueError, "case and flags cannot be"): result = values.str.replace(pat, '', case=True) # test with callable From f4a03d97fd4e9af69cbd480df01f0172057a0ef1 Mon Sep 17 00:00:00 2001 From: Petio Petrov Date: Sun, 5 Mar 2017 11:21:14 -0500 Subject: [PATCH 146/933] Update dtypes.py (#15577) --- pandas/types/dtypes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/types/dtypes.py b/pandas/types/dtypes.py index 5b6d7905d4095..43135ba94ab46 100644 --- a/pandas/types/dtypes.py +++ b/pandas/types/dtypes.py @@ -73,7 +73,7 @@ def __ne__(self, other): @classmethod def is_dtype(cls, dtype): - """ Return a boolean if we if the passed type is an actual dtype that + """ Return a boolean if the passed type is an actual dtype that we can match (via string or type) """ if hasattr(dtype, 'dtype'): From 5067708f0199a0b614586dbbc1a1536fa4442b65 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sun, 5 Mar 2017 11:25:46 -0500 Subject: [PATCH 147/933] BUG: Floating point accuracy with DatetimeIndex.round (#14440) closes #14440 Employs @eoincondron's fix for float point inaccuracies when rounding by milliseconds for `DatetimeIndex.round` and `Timestamp.round` Author: Matt Roeschke Closes #15568 from mroeschke/fix_14440 and squashes the following commits: c5a7cbc [Matt Roeschke] BUG:Floating point accuracy with DatetimeIndex.round (#14440) --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/tests/indexes/datetimes/test_ops.py | 11 +++++++++++ pandas/tests/scalar/test_timestamp.py | 9 +++++++++ pandas/tseries/base.py | 2 +- pandas/tslib.pyx | 3 ++- 5 files changed, 24 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index fe9035106e4af..db803e6e7856b 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -652,6 +652,7 @@ Bug Fixes - Bug in ``Index`` power operations with reversed operands (:issue:`14973`) - Bug in ``TimedeltaIndex`` addition where overflow was being allowed without error (:issue:`14816`) - Bug in ``TimedeltaIndex`` raising a ``ValueError`` when boolean indexing with ``loc`` (:issue:`14946`) +- Bug in ``DatetimeIndex.round()`` and ``Timestamp.round()`` floating point accuracy when rounding by milliseconds (:issue: `14440`) - Bug in ``astype()`` where ``inf`` values were incorrectly converted to integers. Now raises error now with ``astype()`` for Series and DataFrames (:issue:`14265`) - Bug in ``DataFrame(..).apply(to_numeric)`` when values are of type decimal.Decimal. (:issue:`14827`) - Bug in ``describe()`` when passing a numpy array which does not contain the median to the ``percentiles`` keyword argument (:issue:`14908`) diff --git a/pandas/tests/indexes/datetimes/test_ops.py b/pandas/tests/indexes/datetimes/test_ops.py index 8eb9128d8d1c8..3a6402ae83ae2 100644 --- a/pandas/tests/indexes/datetimes/test_ops.py +++ b/pandas/tests/indexes/datetimes/test_ops.py @@ -175,6 +175,17 @@ def test_round(self): tm.assertRaisesRegexp(ValueError, msg, rng.round, freq='M') tm.assertRaisesRegexp(ValueError, msg, elt.round, freq='M') + # GH 14440 + index = pd.DatetimeIndex(['2016-10-17 12:00:00.0015'], tz=tz) + result = index.round('ms') + expected = pd.DatetimeIndex(['2016-10-17 12:00:00.002000'], tz=tz) + tm.assert_index_equal(result, expected) + + index = pd.DatetimeIndex(['2016-10-17 12:00:00.00149'], tz=tz) + result = index.round('ms') + expected = pd.DatetimeIndex(['2016-10-17 12:00:00.001000'], tz=tz) + tm.assert_index_equal(result, expected) + def test_repeat_range(self): rng = date_range('1/1/2000', '1/1/2001') diff --git a/pandas/tests/scalar/test_timestamp.py b/pandas/tests/scalar/test_timestamp.py index 2abc83ca6109c..ae278ebfa2533 100644 --- a/pandas/tests/scalar/test_timestamp.py +++ b/pandas/tests/scalar/test_timestamp.py @@ -732,6 +732,15 @@ def test_round(self): for freq in ['Y', 'M', 'foobar']: self.assertRaises(ValueError, lambda: dti.round(freq)) + # GH 14440 + result = pd.Timestamp('2016-10-17 12:00:00.0015').round('ms') + expected = pd.Timestamp('2016-10-17 12:00:00.002000') + self.assertEqual(result, expected) + + result = pd.Timestamp('2016-10-17 12:00:00.00149').round('ms') + expected = pd.Timestamp('2016-10-17 12:00:00.001000') + self.assertEqual(result, expected) + def test_class_ops_pytz(self): tm._skip_if_no_pytz() from pytz import timezone diff --git a/pandas/tseries/base.py b/pandas/tseries/base.py index ee9234d6c8237..5891481677ed2 100644 --- a/pandas/tseries/base.py +++ b/pandas/tseries/base.py @@ -83,7 +83,7 @@ def _round(self, freq, rounder): # round the local times values = _ensure_datetimelike_to_i8(self) - result = (unit * rounder(values / float(unit))).astype('i8') + result = (unit * rounder(values / float(unit)).astype('i8')) result = self._maybe_mask_results(result, fill_value=tslib.NaT) attribs = self._get_attributes_dict() if 'freq' in attribs: diff --git a/pandas/tslib.pyx b/pandas/tslib.pyx index fc6e689a35d81..b96e9434e617a 100644 --- a/pandas/tslib.pyx +++ b/pandas/tslib.pyx @@ -421,7 +421,8 @@ class Timestamp(_Timestamp): value = self.tz_localize(None).value else: value = self.value - result = Timestamp(unit * rounder(value / float(unit)), unit='ns') + result = (unit * rounder(value / float(unit)).astype('i8')) + result = Timestamp(result, unit='ns') if self.tz is not None: result = result.tz_localize(self.tz) return result From 09360d80da730008a6a89f38f3780bb1d55f9e25 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sun, 5 Mar 2017 17:09:17 -0500 Subject: [PATCH 148/933] PERF: faster unstacking closes #15503 Author: Jeff Reback Closes #15510 from jreback/reshape3 and squashes the following commits: ec29226 [Jeff Reback] PERF: faster unstacking --- asv_bench/benchmarks/reshape.py | 21 ++++++++ doc/source/whatsnew/v0.20.0.txt | 2 +- pandas/core/reshape.py | 56 +++++++++++++++++---- pandas/src/reshape.pyx | 35 +++++++++++++ pandas/src/reshape_helper.pxi.in | 81 ++++++++++++++++++++++++++++++ pandas/tests/frame/test_reshape.py | 13 +++-- setup.py | 3 ++ 7 files changed, 196 insertions(+), 15 deletions(-) create mode 100644 pandas/src/reshape.pyx create mode 100644 pandas/src/reshape_helper.pxi.in diff --git a/asv_bench/benchmarks/reshape.py b/asv_bench/benchmarks/reshape.py index a3ecfff52c794..b9346c497b9ef 100644 --- a/asv_bench/benchmarks/reshape.py +++ b/asv_bench/benchmarks/reshape.py @@ -59,6 +59,27 @@ def time_reshape_unstack_simple(self): self.df.unstack(1) +class reshape_unstack_large_single_dtype(object): + goal_time = 0.2 + + def setup(self): + m = 100 + n = 1000 + + levels = np.arange(m) + index = pd.MultiIndex.from_product([levels]*2) + columns = np.arange(n) + values = np.arange(m*m*n).reshape(m*m, n) + self.df = pd.DataFrame(values, index, columns) + self.df2 = self.df.iloc[:-1] + + def time_unstack_full_product(self): + self.df.unstack() + + def time_unstack_with_mask(self): + self.df2.unstack() + + class unstack_sparse_keyspace(object): goal_time = 0.2 diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index db803e6e7856b..725dc7fc52ed0 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -640,7 +640,7 @@ Performance Improvements - Improved performance and reduced memory when indexing with a ``MultiIndex`` (:issue:`15245`) - When reading buffer object in ``read_sas()`` method without specified format, filepath string is inferred rather than buffer object. (:issue:`14947`) - Improved performance of `rank()` for categorical data (:issue:`15498`) - +- Improved performance when using ``.unstack()`` (:issue:`15503`) .. _whatsnew_0200.bug_fixes: diff --git a/pandas/core/reshape.py b/pandas/core/reshape.py index 87cb088c2e91e..7bcd9f2d30b79 100644 --- a/pandas/core/reshape.py +++ b/pandas/core/reshape.py @@ -7,7 +7,9 @@ import numpy as np -from pandas.types.common import _ensure_platform_int, is_list_like +from pandas.types.common import (_ensure_platform_int, + is_list_like, is_bool_dtype, + needs_i8_conversion) from pandas.types.cast import _maybe_promote from pandas.types.missing import notnull import pandas.types.concat as _concat @@ -25,6 +27,7 @@ import pandas.core.algorithms as algos import pandas.algos as _algos +import pandas._reshape as _reshape from pandas.core.index import MultiIndex, _get_na_value @@ -182,9 +185,21 @@ def get_new_values(self): stride = values.shape[1] result_width = width * stride result_shape = (length, result_width) + mask = self.mask + mask_all = mask.all() + + # we can simply reshape if we don't have a mask + if mask_all and len(values): + new_values = (self.sorted_values + .reshape(length, width, stride) + .swapaxes(1, 2) + .reshape(result_shape) + ) + new_mask = np.ones(result_shape, dtype=bool) + return new_values, new_mask # if our mask is all True, then we can use our existing dtype - if self.mask.all(): + if mask_all: dtype = values.dtype new_values = np.empty(result_shape, dtype=dtype) else: @@ -194,13 +209,36 @@ def get_new_values(self): new_mask = np.zeros(result_shape, dtype=bool) - # is there a simpler / faster way of doing this? - for i in range(values.shape[1]): - chunk = new_values[:, i * width:(i + 1) * width] - mask_chunk = new_mask[:, i * width:(i + 1) * width] - - chunk.flat[self.mask] = self.sorted_values[:, i] - mask_chunk.flat[self.mask] = True + name = np.dtype(dtype).name + sorted_values = self.sorted_values + + # we need to convert to a basic dtype + # and possibly coerce an input to our output dtype + # e.g. ints -> floats + if needs_i8_conversion(values): + sorted_values = sorted_values.view('i8') + new_values = new_values.view('i8') + name = 'int64' + elif is_bool_dtype(values): + sorted_values = sorted_values.astype('object') + new_values = new_values.astype('object') + name = 'object' + else: + sorted_values = sorted_values.astype(name, copy=False) + + # fill in our values & mask + f = getattr(_reshape, "unstack_{}".format(name)) + f(sorted_values, + mask.view('u1'), + stride, + length, + width, + new_values, + new_mask.view('u1')) + + # reconstruct dtype if needed + if needs_i8_conversion(values): + new_values = new_values.view(values.dtype) return new_values, new_mask diff --git a/pandas/src/reshape.pyx b/pandas/src/reshape.pyx new file mode 100644 index 0000000000000..82851b7e80994 --- /dev/null +++ b/pandas/src/reshape.pyx @@ -0,0 +1,35 @@ +# cython: profile=False + +from numpy cimport * +cimport numpy as np +import numpy as np + +cimport cython + +import_array() + +cimport util + +from numpy cimport NPY_INT8 as NPY_int8 +from numpy cimport NPY_INT16 as NPY_int16 +from numpy cimport NPY_INT32 as NPY_int32 +from numpy cimport NPY_INT64 as NPY_int64 +from numpy cimport NPY_FLOAT16 as NPY_float16 +from numpy cimport NPY_FLOAT32 as NPY_float32 +from numpy cimport NPY_FLOAT64 as NPY_float64 + +from numpy cimport (int8_t, int16_t, int32_t, int64_t, uint8_t, uint16_t, + uint32_t, uint64_t, float16_t, float32_t, float64_t) + +int8 = np.dtype(np.int8) +int16 = np.dtype(np.int16) +int32 = np.dtype(np.int32) +int64 = np.dtype(np.int64) +float16 = np.dtype(np.float16) +float32 = np.dtype(np.float32) +float64 = np.dtype(np.float64) + +cdef double NaN = np.NaN +cdef double nan = NaN + +include "reshape_helper.pxi" diff --git a/pandas/src/reshape_helper.pxi.in b/pandas/src/reshape_helper.pxi.in new file mode 100644 index 0000000000000..bb9a5977f8b45 --- /dev/null +++ b/pandas/src/reshape_helper.pxi.in @@ -0,0 +1,81 @@ +""" +Template for each `dtype` helper function for take + +WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in +""" + +# ---------------------------------------------------------------------- +# reshape +# ---------------------------------------------------------------------- + +{{py: + +# name, c_type +dtypes = [('uint8', 'uint8_t'), + ('uint16', 'uint16_t'), + ('uint32', 'uint32_t'), + ('uint64', 'uint64_t'), + ('int8', 'int8_t'), + ('int16', 'int16_t'), + ('int32', 'int32_t'), + ('int64', 'int64_t'), + ('float32', 'float32_t'), + ('float64', 'float64_t'), + ('object', 'object')] +}} + +{{for dtype, c_type in dtypes}} + + +@cython.wraparound(False) +@cython.boundscheck(False) +def unstack_{{dtype}}(ndarray[{{c_type}}, ndim=2] values, + ndarray[uint8_t, ndim=1] mask, + Py_ssize_t stride, + Py_ssize_t length, + Py_ssize_t width, + ndarray[{{c_type}}, ndim=2] new_values, + ndarray[uint8_t, ndim=2] new_mask): + """ + transform long sorted_values to wide new_values + + Parameters + ---------- + values : typed ndarray + mask : boolean ndarray + stride : int + length : int + width : int + new_values : typed ndarray + result array + new_mask : boolean ndarray + result mask + + """ + + cdef: + Py_ssize_t i, j, w, nulls, s, offset + + {{if dtype == 'object'}} + if True: + {{else}} + with nogil: + {{endif}} + + for i in range(stride): + + nulls = 0 + for j in range(length): + + for w in range(width): + + offset = j * width + w + + if mask[offset]: + s = i * width + w + new_values[j, s] = values[offset - nulls, i] + new_mask[j, s] = 1 + else: + nulls += 1 + +{{endfor}} diff --git a/pandas/tests/frame/test_reshape.py b/pandas/tests/frame/test_reshape.py index 1890b33e3dbaa..c8c7313ddd071 100644 --- a/pandas/tests/frame/test_reshape.py +++ b/pandas/tests/frame/test_reshape.py @@ -121,19 +121,22 @@ def test_pivot_index_none(self): assert_frame_equal(result, expected) def test_stack_unstack(self): - stacked = self.frame.stack() + f = self.frame.copy() + f[:] = np.arange(np.prod(f.shape)).reshape(f.shape) + + stacked = f.stack() stacked_df = DataFrame({'foo': stacked, 'bar': stacked}) unstacked = stacked.unstack() unstacked_df = stacked_df.unstack() - assert_frame_equal(unstacked, self.frame) - assert_frame_equal(unstacked_df['bar'], self.frame) + assert_frame_equal(unstacked, f) + assert_frame_equal(unstacked_df['bar'], f) unstacked_cols = stacked.unstack(0) unstacked_cols_df = stacked_df.unstack(0) - assert_frame_equal(unstacked_cols.T, self.frame) - assert_frame_equal(unstacked_cols_df['bar'].T, self.frame) + assert_frame_equal(unstacked_cols.T, f) + assert_frame_equal(unstacked_cols_df['bar'].T, f) def test_unstack_fill(self): diff --git a/setup.py b/setup.py index cbcadce459c67..525cbdf600c78 100755 --- a/setup.py +++ b/setup.py @@ -113,6 +113,7 @@ def is_platform_mac(): _pxi_dep_template = { 'algos': ['algos_common_helper.pxi.in', 'algos_groupby_helper.pxi.in', 'algos_take_helper.pxi.in', 'algos_rank_helper.pxi.in'], + '_reshape': ['reshape_helper.pxi.in'], '_join': ['join_helper.pxi.in', 'joins_func_helper.pxi.in'], 'hashtable': ['hashtable_class_helper.pxi.in', 'hashtable_func_helper.pxi.in'], @@ -496,6 +497,8 @@ def pxd(name): algos={'pyxfile': 'algos', 'pxdfiles': ['src/util', 'hashtable'], 'depends': _pxi_dep['algos']}, + _reshape={'pyxfile': 'src/reshape', + 'depends': _pxi_dep['_reshape']}, _join={'pyxfile': 'src/join', 'pxdfiles': ['src/util', 'hashtable'], 'depends': _pxi_dep['_join']}, From dc323507672ec0ceb4b2b0366445a794f3e92ee7 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 6 Mar 2017 10:32:57 +0100 Subject: [PATCH 149/933] API: return Index instead of array from datetime field accessors (GH15022) --- pandas/tests/indexes/datetimes/test_misc.py | 4 ++-- pandas/tests/indexes/period/test_construction.py | 4 ++-- pandas/tests/indexes/period/test_period.py | 8 ++++---- pandas/tseries/index.py | 15 +++++++++------ pandas/tseries/period.py | 5 +++-- 5 files changed, 20 insertions(+), 16 deletions(-) diff --git a/pandas/tests/indexes/datetimes/test_misc.py b/pandas/tests/indexes/datetimes/test_misc.py index 6b0191edbda5a..8fcb26ab517bf 100644 --- a/pandas/tests/indexes/datetimes/test_misc.py +++ b/pandas/tests/indexes/datetimes/test_misc.py @@ -307,5 +307,5 @@ def test_datetimeindex_accessors(self): def test_nanosecond_field(self): dti = DatetimeIndex(np.arange(10)) - self.assert_numpy_array_equal(dti.nanosecond, - np.arange(10, dtype=np.int32)) + self.assert_index_equal(dti.nanosecond, + pd.Index(np.arange(10, dtype=np.int64))) diff --git a/pandas/tests/indexes/period/test_construction.py b/pandas/tests/indexes/period/test_construction.py index f13a84f4f0e92..ab70ad59846e8 100644 --- a/pandas/tests/indexes/period/test_construction.py +++ b/pandas/tests/indexes/period/test_construction.py @@ -91,8 +91,8 @@ def test_constructor_arrays_negative_year(self): pindex = PeriodIndex(year=years, quarter=quarters) - self.assert_numpy_array_equal(pindex.year, years) - self.assert_numpy_array_equal(pindex.quarter, quarters) + self.assert_index_equal(pindex.year, pd.Index(years)) + self.assert_index_equal(pindex.quarter, pd.Index(quarters)) def test_constructor_invalid_quarters(self): self.assertRaises(ValueError, PeriodIndex, year=lrange(2000, 2004), diff --git a/pandas/tests/indexes/period/test_period.py b/pandas/tests/indexes/period/test_period.py index 1739211982b10..16b8ce6569802 100644 --- a/pandas/tests/indexes/period/test_period.py +++ b/pandas/tests/indexes/period/test_period.py @@ -653,10 +653,10 @@ def test_pindex_fieldaccessor_nat(self): idx = PeriodIndex(['2011-01', '2011-02', 'NaT', '2012-03', '2012-04'], freq='D') - exp = np.array([2011, 2011, -1, 2012, 2012], dtype=np.int64) - self.assert_numpy_array_equal(idx.year, exp) - exp = np.array([1, 2, -1, 3, 4], dtype=np.int64) - self.assert_numpy_array_equal(idx.month, exp) + exp = Index([2011, 2011, -1, 2012, 2012], dtype=np.int64) + self.assert_index_equal(idx.year, exp) + exp = Index([1, 2, -1, 3, 4], dtype=np.int64) + self.assert_index_equal(idx.month, exp) def test_pindex_qaccess(self): pi = PeriodIndex(['2Q05', '3Q05', '4Q05', '1Q06', '2Q06'], freq='Q') diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py index 5f00e8b648689..f9821c4b799e6 100644 --- a/pandas/tseries/index.py +++ b/pandas/tseries/index.py @@ -77,16 +77,19 @@ def f(self): result = tslib.get_start_end_field(values, field, self.freqstr, month_kw) + result = self._maybe_mask_results(result, convert='float64') + elif field in ['weekday_name']: result = tslib.get_date_name_field(values, field) - return self._maybe_mask_results(result) + result = self._maybe_mask_results(result) elif field in ['is_leap_year']: # no need to mask NaT - return tslib.get_date_field(values, field) + result = tslib.get_date_field(values, field) else: result = tslib.get_date_field(values, field) + result = self._maybe_mask_results(result, convert='float64') - return self._maybe_mask_results(result, convert='float64') + return Index(result) f.__name__ = name f.__doc__ = docstring @@ -1913,9 +1916,9 @@ def to_julian_date(self): """ # http://mysite.verizon.net/aesir_research/date/jdalg2.htm - year = self.year - month = self.month - day = self.day + year = np.asarray(self.year) + month = np.asarray(self.month) + day = np.asarray(self.day) testarr = month < 3 year[testarr] -= 1 month[testarr] += 12 diff --git a/pandas/tseries/period.py b/pandas/tseries/period.py index bfe7724a1cfaa..56f88b7ed800c 100644 --- a/pandas/tseries/period.py +++ b/pandas/tseries/period.py @@ -52,7 +52,8 @@ def _field_accessor(name, alias, docstring=None): def f(self): base, mult = _gfc(self.freq) - return get_period_field_arr(alias, self._values, base) + result = get_period_field_arr(alias, self._values, base) + return Index(result) f.__name__ = name f.__doc__ = docstring return property(f) @@ -585,7 +586,7 @@ def to_datetime(self, dayfirst=False): @property def is_leap_year(self): """ Logical indicating if the date belongs to a leap year """ - return tslib._isleapyear_arr(self.year) + return tslib._isleapyear_arr(np.asarray(self.year)) @property def start_time(self): From 0bf45320440e0d477f6b31ac8825f34e8212f152 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 6 Mar 2017 10:38:22 +0100 Subject: [PATCH 150/933] Revert "API: return Index instead of array from datetime field accessors (GH15022)" This reverts commit dc323507672ec0ceb4b2b0366445a794f3e92ee7. --- pandas/tests/indexes/datetimes/test_misc.py | 4 ++-- pandas/tests/indexes/period/test_construction.py | 4 ++-- pandas/tests/indexes/period/test_period.py | 8 ++++---- pandas/tseries/index.py | 15 ++++++--------- pandas/tseries/period.py | 5 ++--- 5 files changed, 16 insertions(+), 20 deletions(-) diff --git a/pandas/tests/indexes/datetimes/test_misc.py b/pandas/tests/indexes/datetimes/test_misc.py index 8fcb26ab517bf..6b0191edbda5a 100644 --- a/pandas/tests/indexes/datetimes/test_misc.py +++ b/pandas/tests/indexes/datetimes/test_misc.py @@ -307,5 +307,5 @@ def test_datetimeindex_accessors(self): def test_nanosecond_field(self): dti = DatetimeIndex(np.arange(10)) - self.assert_index_equal(dti.nanosecond, - pd.Index(np.arange(10, dtype=np.int64))) + self.assert_numpy_array_equal(dti.nanosecond, + np.arange(10, dtype=np.int32)) diff --git a/pandas/tests/indexes/period/test_construction.py b/pandas/tests/indexes/period/test_construction.py index ab70ad59846e8..f13a84f4f0e92 100644 --- a/pandas/tests/indexes/period/test_construction.py +++ b/pandas/tests/indexes/period/test_construction.py @@ -91,8 +91,8 @@ def test_constructor_arrays_negative_year(self): pindex = PeriodIndex(year=years, quarter=quarters) - self.assert_index_equal(pindex.year, pd.Index(years)) - self.assert_index_equal(pindex.quarter, pd.Index(quarters)) + self.assert_numpy_array_equal(pindex.year, years) + self.assert_numpy_array_equal(pindex.quarter, quarters) def test_constructor_invalid_quarters(self): self.assertRaises(ValueError, PeriodIndex, year=lrange(2000, 2004), diff --git a/pandas/tests/indexes/period/test_period.py b/pandas/tests/indexes/period/test_period.py index 16b8ce6569802..1739211982b10 100644 --- a/pandas/tests/indexes/period/test_period.py +++ b/pandas/tests/indexes/period/test_period.py @@ -653,10 +653,10 @@ def test_pindex_fieldaccessor_nat(self): idx = PeriodIndex(['2011-01', '2011-02', 'NaT', '2012-03', '2012-04'], freq='D') - exp = Index([2011, 2011, -1, 2012, 2012], dtype=np.int64) - self.assert_index_equal(idx.year, exp) - exp = Index([1, 2, -1, 3, 4], dtype=np.int64) - self.assert_index_equal(idx.month, exp) + exp = np.array([2011, 2011, -1, 2012, 2012], dtype=np.int64) + self.assert_numpy_array_equal(idx.year, exp) + exp = np.array([1, 2, -1, 3, 4], dtype=np.int64) + self.assert_numpy_array_equal(idx.month, exp) def test_pindex_qaccess(self): pi = PeriodIndex(['2Q05', '3Q05', '4Q05', '1Q06', '2Q06'], freq='Q') diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py index f9821c4b799e6..5f00e8b648689 100644 --- a/pandas/tseries/index.py +++ b/pandas/tseries/index.py @@ -77,19 +77,16 @@ def f(self): result = tslib.get_start_end_field(values, field, self.freqstr, month_kw) - result = self._maybe_mask_results(result, convert='float64') - elif field in ['weekday_name']: result = tslib.get_date_name_field(values, field) - result = self._maybe_mask_results(result) + return self._maybe_mask_results(result) elif field in ['is_leap_year']: # no need to mask NaT - result = tslib.get_date_field(values, field) + return tslib.get_date_field(values, field) else: result = tslib.get_date_field(values, field) - result = self._maybe_mask_results(result, convert='float64') - return Index(result) + return self._maybe_mask_results(result, convert='float64') f.__name__ = name f.__doc__ = docstring @@ -1916,9 +1913,9 @@ def to_julian_date(self): """ # http://mysite.verizon.net/aesir_research/date/jdalg2.htm - year = np.asarray(self.year) - month = np.asarray(self.month) - day = np.asarray(self.day) + year = self.year + month = self.month + day = self.day testarr = month < 3 year[testarr] -= 1 month[testarr] += 12 diff --git a/pandas/tseries/period.py b/pandas/tseries/period.py index 56f88b7ed800c..bfe7724a1cfaa 100644 --- a/pandas/tseries/period.py +++ b/pandas/tseries/period.py @@ -52,8 +52,7 @@ def _field_accessor(name, alias, docstring=None): def f(self): base, mult = _gfc(self.freq) - result = get_period_field_arr(alias, self._values, base) - return Index(result) + return get_period_field_arr(alias, self._values, base) f.__name__ = name f.__doc__ = docstring return property(f) @@ -586,7 +585,7 @@ def to_datetime(self, dayfirst=False): @property def is_leap_year(self): """ Logical indicating if the date belongs to a leap year """ - return tslib._isleapyear_arr(np.asarray(self.year)) + return tslib._isleapyear_arr(self.year) @property def start_time(self): From c61b350c999f515c199bf9701c4bba3d610bc384 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Mon, 6 Mar 2017 17:12:44 -0500 Subject: [PATCH 151/933] DOC: updated badges --- README.md | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 7bc350d1c6675..8595043cf68c3 100644 --- a/README.md +++ b/README.md @@ -54,7 +54,15 @@ + + + + From 7740231bbc0b92db55479cd02400b37c9470a4d2 Mon Sep 17 00:00:00 2001 From: Mark Mandel Date: Mon, 6 Mar 2017 16:15:10 -0600 Subject: [PATCH 152/933] DOC: remove wakari.io section (#15596) --- doc/source/install.rst | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/doc/source/install.rst b/doc/source/install.rst index 8b0fec6a3dac3..fe2a9fa4ba509 100644 --- a/doc/source/install.rst +++ b/doc/source/install.rst @@ -23,18 +23,6 @@ Officially Python 2.7, 3.4, 3.5, and 3.6 Installing pandas ----------------- -Trying out pandas, no installation required! -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -The easiest way to start experimenting with pandas doesn't involve installing -pandas at all. - -`Wakari `__ is a free service that provides a hosted -`IPython Notebook `__ service in the cloud. - -Simply create an account, and have access to pandas from within your brower via -an `IPython Notebook `__ in a few minutes. - .. _install.anaconda: Installing pandas with Anaconda From e097bf596509779294b2ebf320a4b271deaec6ec Mon Sep 17 00:00:00 2001 From: Leon Yin Date: Mon, 6 Mar 2017 23:41:30 -0800 Subject: [PATCH 153/933] DOC: remove Panel4D from the API docs #15579 (#15598) --- doc/source/api.rst | 53 +------------------------------------ scripts/api_rst_coverage.py | 4 +-- 2 files changed, 3 insertions(+), 54 deletions(-) diff --git a/doc/source/api.rst b/doc/source/api.rst index 33ac5fde651d4..fbce64df84859 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -1237,58 +1237,7 @@ Serialization / IO / Conversion Panel.to_frame Panel.to_xarray Panel.to_clipboard - -.. _api.panel4d: - -Panel4D -------- - -Constructor -~~~~~~~~~~~ -.. autosummary:: - :toctree: generated/ - - Panel4D - -Serialization / IO / Conversion -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -.. autosummary:: - :toctree: generated/ - - Panel4D.to_xarray - -Attributes and underlying data -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -**Axes** - - * **labels**: axis 1; each label corresponds to a Panel contained inside - * **items**: axis 2; each item corresponds to a DataFrame contained inside - * **major_axis**: axis 3; the index (rows) of each of the DataFrames - * **minor_axis**: axis 4; the columns of each of the DataFrames - -.. autosummary:: - :toctree: generated/ - - Panel4D.values - Panel4D.axes - Panel4D.ndim - Panel4D.size - Panel4D.shape - Panel4D.dtypes - Panel4D.ftypes - Panel4D.get_dtype_counts - Panel4D.get_ftype_counts - -Conversion -~~~~~~~~~~ -.. autosummary:: - :toctree: generated/ - - Panel4D.astype - Panel4D.copy - Panel4D.isnull - Panel4D.notnull - + .. _api.index: Index diff --git a/scripts/api_rst_coverage.py b/scripts/api_rst_coverage.py index cc456f03c02ec..6bb5383509be6 100644 --- a/scripts/api_rst_coverage.py +++ b/scripts/api_rst_coverage.py @@ -4,11 +4,11 @@ def main(): # classes whose members to check - classes = [pd.Series, pd.DataFrame, pd.Panel, pd.Panel4D] + classes = [pd.Series, pd.DataFrame, pd.Panel] def class_name_sort_key(x): if x.startswith('Series'): - # make sure Series precedes DataFrame, Panel, and Panel4D + # make sure Series precedes DataFrame, and Panel. return ' ' + x else: return x From 11239822b3ced16c28831f08d1ef62ed0c5c28ca Mon Sep 17 00:00:00 2001 From: Jeff Carey Date: Tue, 7 Mar 2017 08:21:32 -0500 Subject: [PATCH 154/933] CLN: Moved freeze_panes validation to io/excel.py (#15160) follow up to #15160 Author: Jeff Carey Closes #15592 from jeffcarey/enh-15160-touchup2 and squashes the following commits: 81cb86f [Jeff Carey] Cleaned up freeze_panes validation code a802fc7 [Jeff Carey] Moved freeze_panes validation to io/excel.py --- pandas/core/frame.py | 12 ------------ pandas/io/excel.py | 22 +++++++++++++++++++--- 2 files changed, 19 insertions(+), 15 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index b3e43edc3eb55..15179ac321076 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1431,24 +1431,12 @@ def to_excel(self, excel_writer, sheet_name='Sheet1', na_rep='', inf_rep=inf_rep) formatted_cells = formatter.get_formatted_cells() - freeze_panes = self._validate_freeze_panes(freeze_panes) excel_writer.write_cells(formatted_cells, sheet_name, startrow=startrow, startcol=startcol, freeze_panes=freeze_panes) if need_save: excel_writer.save() - def _validate_freeze_panes(self, freeze_panes): - if freeze_panes is not None: - if ( - len(freeze_panes) == 2 and - all(isinstance(item, int) for item in freeze_panes) - ): - return freeze_panes - - raise ValueError("freeze_panes must be of form (row, column)" - " where row and column are integers") - def to_stata(self, fname, convert_dates=None, write_index=True, encoding="latin-1", byteorder=None, time_stamp=None, data_label=None, variable_labels=None): diff --git a/pandas/io/excel.py b/pandas/io/excel.py index 37a61b7dc9ab5..00ec8bcf060ef 100644 --- a/pandas/io/excel.py +++ b/pandas/io/excel.py @@ -543,6 +543,22 @@ def __exit__(self, exc_type, exc_value, traceback): self.close() +def _validate_freeze_panes(freeze_panes): + if freeze_panes is not None: + if ( + len(freeze_panes) == 2 and + all(isinstance(item, int) for item in freeze_panes) + ): + return True + + raise ValueError("freeze_panes must be of form (row, column)" + " where row and column are integers") + + # freeze_panes wasn't specified, return False so it won't be applied + # to output sheet + return False + + def _trim_excel_header(row): # trim header row so auto-index inference works # xlrd uses '' , openpyxl None @@ -1330,7 +1346,7 @@ def write_cells(self, cells, sheet_name=None, startrow=0, startcol=0, wks.title = sheet_name self.sheets[sheet_name] = wks - if freeze_panes is not None: + if _validate_freeze_panes(freeze_panes): wks.freeze_panes = wks.cell(row=freeze_panes[0] + 1, column=freeze_panes[1] + 1) @@ -1418,7 +1434,7 @@ def write_cells(self, cells, sheet_name=None, startrow=0, startcol=0, wks = self.book.add_sheet(sheet_name) self.sheets[sheet_name] = wks - if freeze_panes is not None: + if _validate_freeze_panes(freeze_panes): wks.set_panes_frozen(True) wks.set_horz_split_pos(freeze_panes[0]) wks.set_vert_split_pos(freeze_panes[1]) @@ -1550,7 +1566,7 @@ def write_cells(self, cells, sheet_name=None, startrow=0, startcol=0, style_dict = {} - if freeze_panes is not None: + if _validate_freeze_panes(freeze_panes): wks.freeze_panes(*(freeze_panes)) for cell in cells: From fdee92214dedf87f351f1ae0613d9f25061359b0 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Tue, 7 Mar 2017 08:23:18 -0500 Subject: [PATCH 155/933] BUG: Timestamp.round precision error for ns (#15578) closes #15578 Author: Matt Roeschke Closes #15588 from mroeschke/fix_15578 and squashes the following commits: af95baa [Matt Roeschke] BUG: Timestamp.round precision error for ns (#15578) --- doc/source/whatsnew/v0.20.0.txt | 2 +- pandas/tests/indexes/datetimes/test_ops.py | 14 +++++++++++++- pandas/tests/scalar/test_timestamp.py | 13 ++++++++++++- pandas/tseries/base.py | 16 +++++++++++++--- pandas/tslib.pyx | 13 ++++++++++++- 5 files changed, 51 insertions(+), 7 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 725dc7fc52ed0..f1df8f456159a 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -652,7 +652,7 @@ Bug Fixes - Bug in ``Index`` power operations with reversed operands (:issue:`14973`) - Bug in ``TimedeltaIndex`` addition where overflow was being allowed without error (:issue:`14816`) - Bug in ``TimedeltaIndex`` raising a ``ValueError`` when boolean indexing with ``loc`` (:issue:`14946`) -- Bug in ``DatetimeIndex.round()`` and ``Timestamp.round()`` floating point accuracy when rounding by milliseconds (:issue: `14440`) +- Bug in ``DatetimeIndex.round()`` and ``Timestamp.round()`` floating point accuracy when rounding by milliseconds or less (:issue: `14440`, :issue:`15578`) - Bug in ``astype()`` where ``inf`` values were incorrectly converted to integers. Now raises error now with ``astype()`` for Series and DataFrames (:issue:`14265`) - Bug in ``DataFrame(..).apply(to_numeric)`` when values are of type decimal.Decimal. (:issue:`14827`) - Bug in ``describe()`` when passing a numpy array which does not contain the median to the ``percentiles`` keyword argument (:issue:`14908`) diff --git a/pandas/tests/indexes/datetimes/test_ops.py b/pandas/tests/indexes/datetimes/test_ops.py index 3a6402ae83ae2..312017eef3446 100644 --- a/pandas/tests/indexes/datetimes/test_ops.py +++ b/pandas/tests/indexes/datetimes/test_ops.py @@ -175,17 +175,29 @@ def test_round(self): tm.assertRaisesRegexp(ValueError, msg, rng.round, freq='M') tm.assertRaisesRegexp(ValueError, msg, elt.round, freq='M') - # GH 14440 + # GH 14440 & 15578 index = pd.DatetimeIndex(['2016-10-17 12:00:00.0015'], tz=tz) result = index.round('ms') expected = pd.DatetimeIndex(['2016-10-17 12:00:00.002000'], tz=tz) tm.assert_index_equal(result, expected) + for freq in ['us', 'ns']: + tm.assert_index_equal(index, index.round(freq)) + index = pd.DatetimeIndex(['2016-10-17 12:00:00.00149'], tz=tz) result = index.round('ms') expected = pd.DatetimeIndex(['2016-10-17 12:00:00.001000'], tz=tz) tm.assert_index_equal(result, expected) + index = pd.DatetimeIndex(['2016-10-17 12:00:00.001501031']) + result = index.round('10ns') + expected = pd.DatetimeIndex(['2016-10-17 12:00:00.001501030']) + tm.assert_index_equal(result, expected) + + with tm.assert_produces_warning(): + ts = '2016-10-17 12:00:00.001501031' + pd.DatetimeIndex([ts]).round('1010ns') + def test_repeat_range(self): rng = date_range('1/1/2000', '1/1/2001') diff --git a/pandas/tests/scalar/test_timestamp.py b/pandas/tests/scalar/test_timestamp.py index ae278ebfa2533..bbcdce922f58a 100644 --- a/pandas/tests/scalar/test_timestamp.py +++ b/pandas/tests/scalar/test_timestamp.py @@ -732,7 +732,7 @@ def test_round(self): for freq in ['Y', 'M', 'foobar']: self.assertRaises(ValueError, lambda: dti.round(freq)) - # GH 14440 + # GH 14440 & 15578 result = pd.Timestamp('2016-10-17 12:00:00.0015').round('ms') expected = pd.Timestamp('2016-10-17 12:00:00.002000') self.assertEqual(result, expected) @@ -741,6 +741,17 @@ def test_round(self): expected = pd.Timestamp('2016-10-17 12:00:00.001000') self.assertEqual(result, expected) + ts = pd.Timestamp('2016-10-17 12:00:00.0015') + for freq in ['us', 'ns']: + self.assertEqual(ts, ts.round(freq)) + + result = pd.Timestamp('2016-10-17 12:00:00.001501031').round('10ns') + expected = pd.Timestamp('2016-10-17 12:00:00.001501030') + self.assertEqual(result, expected) + + with tm.assert_produces_warning(): + pd.Timestamp('2016-10-17 12:00:00.001501031').round('1010ns') + def test_class_ops_pytz(self): tm._skip_if_no_pytz() from pytz import timezone diff --git a/pandas/tseries/base.py b/pandas/tseries/base.py index 5891481677ed2..2e22c35868cb3 100644 --- a/pandas/tseries/base.py +++ b/pandas/tseries/base.py @@ -1,6 +1,7 @@ """ Base and utility classes for tseries type pandas objects. """ +import warnings from datetime import datetime, timedelta @@ -79,11 +80,20 @@ def _round(self, freq, rounder): from pandas.tseries.frequencies import to_offset unit = to_offset(freq).nanos - # round the local times values = _ensure_datetimelike_to_i8(self) - - result = (unit * rounder(values / float(unit)).astype('i8')) + if unit < 1000 and unit % 1000 != 0: + # for nano rounding, work with the last 6 digits separately + # due to float precision + buff = 1000000 + result = (buff * (values // buff) + unit * + (rounder((values % buff) / float(unit))).astype('i8')) + elif unit >= 1000 and unit % 1000 != 0: + msg = 'Precision will be lost using frequency: {}' + warnings.warn(msg.format(freq)) + result = (unit * rounder(values / float(unit)).astype('i8')) + else: + result = (unit * rounder(values / float(unit)).astype('i8')) result = self._maybe_mask_results(result, fill_value=tslib.NaT) attribs = self._get_attributes_dict() if 'freq' in attribs: diff --git a/pandas/tslib.pyx b/pandas/tslib.pyx index b96e9434e617a..8ee92e9fb900d 100644 --- a/pandas/tslib.pyx +++ b/pandas/tslib.pyx @@ -421,7 +421,18 @@ class Timestamp(_Timestamp): value = self.tz_localize(None).value else: value = self.value - result = (unit * rounder(value / float(unit)).astype('i8')) + if unit < 1000 and unit % 1000 != 0: + # for nano rounding, work with the last 6 digits separately + # due to float precision + buff = 1000000 + result = (buff * (value // buff) + unit * + (rounder((value % buff) / float(unit))).astype('i8')) + elif unit >= 1000 and unit % 1000 != 0: + msg = 'Precision will be lost using frequency: {}' + warnings.warn(msg.format(freq)) + result = (unit * rounder(value / float(unit)).astype('i8')) + else: + result = (unit * rounder(value / float(unit)).astype('i8')) result = Timestamp(result, unit='ns') if self.tz is not None: result = result.tz_localize(self.tz) From 38a34be9108fc76b68e57860506f428d8d67e002 Mon Sep 17 00:00:00 2001 From: Kernc Date: Tue, 7 Mar 2017 08:27:38 -0500 Subject: [PATCH 156/933] BUG: repr SparseDataFrame after setting a value closes #15488 Author: Kernc Closes #15489 from kernc/sdf-repr and squashes the following commits: 2dc145c [Kernc] BUG: repr SparseDataFrame after setting a value --- doc/source/whatsnew/v0.20.0.txt | 2 +- pandas/formats/format.py | 3 --- pandas/tests/sparse/test_format.py | 12 ++++++++++++ 3 files changed, 13 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index f1df8f456159a..e459c854dfab9 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -698,7 +698,7 @@ Bug Fixes - Bug in ``to_sql`` when writing a DataFrame with numeric index names (:issue:`15404`). - Bug in ``Series.iloc`` where a ``Categorical`` object for list-like indexes input was returned, where a ``Series`` was expected. (:issue:`14580`) - +- Bug in repr-formatting a ``SparseDataFrame`` after a value was set on (a copy of) one of its series (:issue:`15488`) - Bug in groupby operations with timedelta64 when passing ``numeric_only=False`` (:issue:`5724`) diff --git a/pandas/formats/format.py b/pandas/formats/format.py index 9dde3b0001c31..622c4cd3bbcc7 100644 --- a/pandas/formats/format.py +++ b/pandas/formats/format.py @@ -716,9 +716,6 @@ def to_html(self, classes=None, notebook=False, border=None): def _get_formatted_column_labels(self, frame): from pandas.core.index import _sparsify - def is_numeric_dtype(dtype): - return issubclass(dtype.type, np.number) - columns = frame.columns if isinstance(columns, MultiIndex): diff --git a/pandas/tests/sparse/test_format.py b/pandas/tests/sparse/test_format.py index 0c0e773d19bb9..ba870a2c33801 100644 --- a/pandas/tests/sparse/test_format.py +++ b/pandas/tests/sparse/test_format.py @@ -116,3 +116,15 @@ def test_sparse_frame(self): with option_context("display.max_rows", 3): self.assertEqual(repr(sparse), repr(df)) + + def test_sparse_repr_after_set(self): + # GH 15488 + sdf = pd.SparseDataFrame([[np.nan, 1], [2, np.nan]]) + res = sdf.copy() + + # Ignore the warning + with pd.option_context('mode.chained_assignment', None): + sdf[0][1] = 2 # This line triggers the bug + + repr(sdf) + tm.assert_sp_frame_equal(sdf, res) From a347ecb574f4e53f43400ad50b507c481ce12edb Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 7 Mar 2017 09:30:06 -0500 Subject: [PATCH 157/933] DOC/BUILD: Parallelize doc build closes #15591 a couple of minutes faster with -j 2. fixes some deprecated use of pd.Term Author: Jeff Reback Closes #15600 from jreback/docs and squashes the following commits: c19303d [Jeff Reback] DOC: parallel build for docs --- doc/make.py | 6 +++--- doc/source/io.rst | 8 +++++--- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/doc/make.py b/doc/make.py index 8a6d4e5df24f0..a2f5be5594e44 100755 --- a/doc/make.py +++ b/doc/make.py @@ -197,7 +197,7 @@ def html(): print(e) print("Failed to convert %s" % nb) - if os.system('sphinx-build -P -b html -d build/doctrees ' + if os.system('sphinx-build -j 2 -P -b html -d build/doctrees ' 'source build/html'): raise SystemExit("Building HTML failed.") try: @@ -222,7 +222,7 @@ def latex(): check_build() if sys.platform != 'win32': # LaTeX format. - if os.system('sphinx-build -b latex -d build/doctrees ' + if os.system('sphinx-build -j 2 -b latex -d build/doctrees ' 'source build/latex'): raise SystemExit("Building LaTeX failed.") # Produce pdf. @@ -245,7 +245,7 @@ def latex_forced(): check_build() if sys.platform != 'win32': # LaTeX format. - if os.system('sphinx-build -b latex -d build/doctrees ' + if os.system('sphinx-build -j 2 -b latex -d build/doctrees ' 'source build/latex'): raise SystemExit("Building LaTeX failed.") # Produce pdf. diff --git a/doc/source/io.rst b/doc/source/io.rst index c7a68a0fe9fbb..fa57d6d692152 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -3758,7 +3758,7 @@ be data_columns # on-disk operations store.append('df_dc', df_dc, data_columns = ['B', 'C', 'string', 'string2']) - store.select('df_dc', [ pd.Term('B>0') ]) + store.select('df_dc', where='B>0') # getting creative store.select('df_dc', 'B > 0 & C > 0 & string == foo') @@ -4352,6 +4352,9 @@ HDFStore supports ``Panel4D`` storage. .. ipython:: python :okwarning: + wp = pd.Panel(randn(2, 5, 4), items=['Item1', 'Item2'], + major_axis=pd.date_range('1/1/2000', periods=5), + minor_axis=['A', 'B', 'C', 'D']) p4d = pd.Panel4D({ 'l1' : wp }) p4d store.append('p4d', p4d) @@ -4368,8 +4371,7 @@ object). This cannot be changed after table creation. :okwarning: store.append('p4d2', p4d, axes=['labels', 'major_axis', 'minor_axis']) - store - store.select('p4d2', [ pd.Term('labels=l1'), pd.Term('items=Item1'), pd.Term('minor_axis=A_big_strings') ]) + store.select('p4d2', where='labels=l1 and items=Item1 and minor_axis=A') .. ipython:: python :suppress: From c52ff68a536fafc0204c5afea57abb943a6c37ce Mon Sep 17 00:00:00 2001 From: Pietro Battiston Date: Tue, 7 Mar 2017 16:14:53 -0500 Subject: [PATCH 158/933] BUG: fix SparseSeries reindex by using Series implementation closes #15447 Author: Pietro Battiston Closes #15461 from toobaz/drop_sparse_reindex and squashes the following commits: 9084246 [Pietro Battiston] Test SparseSeries.reindex with fill_value and nearest d6a46da [Pietro Battiston] Use _shared_docs for documentation 922c7b0 [Pietro Battiston] Test "copy" argument af99190 [Pietro Battiston] Whatsnew 7945cb4 [Pietro Battiston] Tests for .loc() and .reindex() on sparse series with MultiIndex 55b99f8 [Pietro Battiston] BUG: Drop faulty and redundant reindex() for SparseSeries --- doc/source/whatsnew/v0.20.0.txt | 4 +++ pandas/sparse/series.py | 24 +++---------- pandas/tests/sparse/test_indexing.py | 53 +++++++++++++++++++++++++++- 3 files changed, 61 insertions(+), 20 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index e459c854dfab9..ece9ff4a1adff 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -672,6 +672,10 @@ Bug Fixes - Bug in ``Rolling.quantile`` function that caused a segmentation fault when called with a quantile value outside of the range [0, 1] (:issue:`15463`) +- Bug in ``SparseSeries.reindex`` on single level with list of length 1 (:issue:`15447`) + + + - Bug in the display of ``.info()`` where a qualifier (+) would always be displayed with a ``MultiIndex`` that contains only non-strings (:issue:`15245`) - Bug in ``.asfreq()``, where frequency was not set for empty ``Series` (:issue:`14320`) diff --git a/pandas/sparse/series.py b/pandas/sparse/series.py index a3b701169ce91..c3dd089e8409a 100644 --- a/pandas/sparse/series.py +++ b/pandas/sparse/series.py @@ -32,7 +32,7 @@ _coo_to_sparse_series) -_shared_doc_kwargs = dict(klass='SparseSeries', +_shared_doc_kwargs = dict(axes='index', klass='SparseSeries', axes_single_arg="{0, 'index'}") # ----------------------------------------------------------------------------- @@ -570,27 +570,13 @@ def copy(self, deep=True): return self._constructor(new_data, sparse_index=self.sp_index, fill_value=self.fill_value).__finalize__(self) + @Appender(generic._shared_docs['reindex'] % _shared_doc_kwargs) def reindex(self, index=None, method=None, copy=True, limit=None, **kwargs): - """ - Conform SparseSeries to new Index - - See Series.reindex docstring for general behavior - Returns - ------- - reindexed : SparseSeries - """ - new_index = _ensure_index(index) - - if self.index.equals(new_index): - if copy: - return self.copy() - else: - return self - return self._constructor(self._data.reindex(new_index, method=method, - limit=limit, copy=copy), - index=new_index).__finalize__(self) + return super(SparseSeries, self).reindex(index=index, method=method, + copy=copy, limit=limit, + **kwargs) def sparse_reindex(self, new_index): """ diff --git a/pandas/tests/sparse/test_indexing.py b/pandas/tests/sparse/test_indexing.py index 357a7103f4027..1a0782c0a3db9 100644 --- a/pandas/tests/sparse/test_indexing.py +++ b/pandas/tests/sparse/test_indexing.py @@ -366,7 +366,7 @@ def test_reindex(self): exp = orig.reindex(['A', 'E', 'C', 'D']).to_sparse() tm.assert_sp_series_equal(res, exp) - def test_reindex_fill_value(self): + def test_fill_value_reindex(self): orig = pd.Series([1, np.nan, 0, 3, 0], index=list('ABCDE')) sparse = orig.to_sparse(fill_value=0) @@ -397,6 +397,23 @@ def test_reindex_fill_value(self): exp = orig.reindex(['A', 'E', 'C', 'D']).to_sparse(fill_value=0) tm.assert_sp_series_equal(res, exp) + def test_reindex_fill_value(self): + floats = pd.Series([1., 2., 3.]).to_sparse() + result = floats.reindex([1, 2, 3], fill_value=0) + expected = pd.Series([2., 3., 0], index=[1, 2, 3]).to_sparse() + tm.assert_sp_series_equal(result, expected) + + def test_reindex_nearest(self): + s = pd.Series(np.arange(10, dtype='float64')).to_sparse() + target = [0.1, 0.9, 1.5, 2.0] + actual = s.reindex(target, method='nearest') + expected = pd.Series(np.around(target), target).to_sparse() + tm.assert_sp_series_equal(expected, actual) + + actual = s.reindex(target, method='nearest', tolerance=0.2) + expected = pd.Series([0, 1, np.nan, 2], target).to_sparse() + tm.assert_sp_series_equal(expected, actual) + def tests_indexing_with_sparse(self): # GH 13985 @@ -504,6 +521,11 @@ def test_loc(self): exp = orig.loc[[1, 3, 4, 5]].to_sparse() tm.assert_sp_series_equal(result, exp) + # single element list (GH 15447) + result = sparse.loc[['A']] + exp = orig.loc[['A']].to_sparse() + tm.assert_sp_series_equal(result, exp) + # dense array result = sparse.loc[orig % 2 == 1] exp = orig.loc[orig % 2 == 1].to_sparse() @@ -537,6 +559,35 @@ def test_loc_slice(self): orig.loc['A':'B'].to_sparse()) tm.assert_sp_series_equal(sparse.loc[:'B'], orig.loc[:'B'].to_sparse()) + def test_reindex(self): + # GH 15447 + orig = self.orig + sparse = self.sparse + + res = sparse.reindex([('A', 0), ('C', 1)]) + exp = orig.reindex([('A', 0), ('C', 1)]).to_sparse() + tm.assert_sp_series_equal(res, exp) + + # On specific level: + res = sparse.reindex(['A', 'C', 'B'], level=0) + exp = orig.reindex(['A', 'C', 'B'], level=0).to_sparse() + tm.assert_sp_series_equal(res, exp) + + # single element list (GH 15447) + res = sparse.reindex(['A'], level=0) + exp = orig.reindex(['A'], level=0).to_sparse() + tm.assert_sp_series_equal(res, exp) + + with tm.assertRaises(TypeError): + # Incomplete keys are not accepted for reindexing: + sparse.reindex(['A', 'C']) + + # "copy" argument: + res = sparse.reindex(sparse.index, copy=True) + exp = orig.reindex(orig.index, copy=True).to_sparse() + tm.assert_sp_series_equal(res, exp) + self.assertIsNot(sparse, res) + class TestSparseDataFrameIndexing(tm.TestCase): From 648ae4f03622d8eafe1ca3b833bd6a99f56bece4 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 7 Mar 2017 18:21:18 -0500 Subject: [PATCH 159/933] BLD: consolidate remaining extensions moves extensions to pandas/_libs, which holds the extension code and also the generated builds (as its importable). pandas/_libs/src is now almost an includes dir, holding low-frequency changing code. This consolidates the import process making it more uniform and consistent throughout the codebase. Finally this cleans up the remaining top-level namespace (with some deprecations in place for example pandas.lib, pandas.tslib, pandas.json, pandas.parser. I listed all of the changes in the whatsnew, but I don't think worthwhile deprecating anything else. Author: Jeff Reback Closes #15537 from jreback/extensions3 and squashes the following commits: a6d6cfa [Jeff Reback] BLD: rename / move some extensions --- Makefile | 2 +- asv_bench/benchmarks/binary_ops.py | 2 +- asv_bench/benchmarks/pandas_vb_common.py | 14 +- asv_bench/benchmarks/panel_methods.py | 2 +- doc/source/whatsnew/v0.20.0.txt | 29 +++ pandas/__init__.py | 14 +- pandas/_libs/__init__.py | 8 + pandas/{ => _libs}/algos.pyx | 4 +- .../{src => _libs}/algos_common_helper.pxi.in | 2 +- .../algos_groupby_helper.pxi.in | 0 .../{src => _libs}/algos_rank_helper.pxi.in | 0 .../{src => _libs}/algos_take_helper.pxi.in | 0 pandas/{ => _libs}/hashtable.pxd | 0 pandas/{ => _libs}/hashtable.pyx | 2 +- .../hashtable_class_helper.pxi.in | 0 .../hashtable_func_helper.pxi.in | 0 pandas/{ => _libs}/index.pyx | 4 +- .../{src => _libs}/index_class_helper.pxi.in | 0 pandas/{src => _libs}/join.pyx | 4 +- .../join_func_helper.pxi.in} | 0 pandas/{src => _libs}/join_helper.pxi.in | 0 pandas/{ => _libs}/lib.pxd | 0 pandas/{ => _libs}/lib.pyx | 0 pandas/{src => _libs}/period.pyx | 44 ++-- pandas/{src => _libs}/reshape.pyx | 0 pandas/{src => _libs}/reshape_helper.pxi.in | 0 pandas/{ => _libs}/src/datetime.pxd | 0 pandas/{ => _libs}/src/datetime/np_datetime.c | 0 pandas/{ => _libs}/src/datetime/np_datetime.h | 0 .../src/datetime/np_datetime_strings.c | 0 .../src/datetime/np_datetime_strings.h | 0 pandas/{ => _libs}/src/datetime_helper.h | 0 pandas/{ => _libs}/src/headers/math.h | 0 pandas/{ => _libs}/src/headers/ms_inttypes.h | 0 pandas/{ => _libs}/src/headers/ms_stdint.h | 0 pandas/{ => _libs}/src/headers/portable.h | 0 pandas/{ => _libs}/src/headers/stdint.h | 0 pandas/{ => _libs}/src/helper.h | 0 pandas/{ => _libs}/src/inference.pyx | 0 pandas/{ => _libs}/src/khash.pxd | 0 pandas/{ => _libs}/src/klib/khash.h | 0 pandas/{ => _libs}/src/klib/khash_python.h | 0 pandas/{ => _libs}/src/klib/ktypes.h | 0 pandas/{ => _libs}/src/klib/kvec.h | 0 pandas/{ => _libs}/src/msgpack/pack.h | 0 .../{ => _libs}/src/msgpack/pack_template.h | 0 pandas/{ => _libs}/src/msgpack/sysdep.h | 0 pandas/{ => _libs}/src/msgpack/unpack.h | 0 .../{ => _libs}/src/msgpack/unpack_define.h | 0 .../{ => _libs}/src/msgpack/unpack_template.h | 0 pandas/{ => _libs}/src/numpy.pxd | 0 pandas/{ => _libs}/src/numpy_helper.h | 0 pandas/{ => _libs}/src/offsets.pyx | 0 pandas/{ => _libs}/src/parse_helper.h | 0 pandas/{ => _libs}/src/parser/.gitignore | 0 pandas/{ => _libs}/src/parser/Makefile | 0 pandas/{ => _libs}/src/parser/io.c | 0 pandas/{ => _libs}/src/parser/io.h | 0 pandas/{ => _libs}/src/parser/tokenizer.c | 0 pandas/{ => _libs}/src/parser/tokenizer.h | 0 pandas/{ => _libs}/src/period_helper.c | 0 pandas/{ => _libs}/src/period_helper.h | 0 pandas/{ => _libs}/src/properties.pyx | 0 pandas/{ => _libs}/src/reduce.pyx | 0 pandas/{ => _libs}/src/skiplist.h | 0 pandas/{ => _libs}/src/skiplist.pxd | 0 pandas/{ => _libs}/src/skiplist.pyx | 0 pandas/{ => _libs}/src/ujson/lib/ultrajson.h | 0 .../{ => _libs}/src/ujson/lib/ultrajsondec.c | 0 .../{ => _libs}/src/ujson/lib/ultrajsonenc.c | 0 .../{ => _libs}/src/ujson/python/JSONtoObj.c | 0 .../{ => _libs}/src/ujson/python/objToJSON.c | 2 +- .../{ => _libs}/src/ujson/python/py_defines.h | 0 pandas/{ => _libs}/src/ujson/python/ujson.c | 8 +- pandas/{ => _libs}/src/ujson/python/version.h | 0 pandas/{ => _libs}/src/util.pxd | 0 pandas/{ => _libs}/tslib.pxd | 0 pandas/{ => _libs}/tslib.pyx | 0 pandas/compat/pickle_compat.py | 8 +- pandas/computation/scope.py | 8 +- pandas/core/algorithms.py | 9 +- pandas/core/base.py | 2 +- pandas/core/categorical.py | 10 +- pandas/core/common.py | 5 +- pandas/core/frame.py | 32 ++- pandas/core/generic.py | 6 +- pandas/core/groupby.py | 48 ++-- pandas/core/internals.py | 9 +- pandas/core/missing.py | 4 +- pandas/core/nanops.py | 3 +- pandas/core/ops.py | 39 +-- pandas/core/reshape.py | 5 +- pandas/core/series.py | 31 ++- pandas/core/sorting.py | 25 +- pandas/core/strings.py | 2 +- pandas/core/window.py | 9 +- pandas/{ => core}/window.pyx | 2 +- pandas/formats/format.py | 5 +- pandas/indexes/api.py | 2 +- pandas/indexes/base.py | 38 ++- pandas/indexes/category.py | 6 +- pandas/indexes/multi.py | 10 +- pandas/indexes/numeric.py | 49 ++-- pandas/indexes/range.py | 4 +- pandas/io/api.py | 2 +- pandas/io/date_converters.py | 2 +- pandas/io/excel.py | 6 +- pandas/io/json/json.py | 8 +- pandas/io/json/normalize.py | 2 +- pandas/{ => io}/msgpack/__init__.py | 8 +- pandas/{ => io}/msgpack/_packer.pyx | 6 +- pandas/{ => io}/msgpack/_unpacker.pyx | 8 +- pandas/{ => io}/msgpack/_version.py | 0 pandas/{ => io}/msgpack/exceptions.py | 0 pandas/io/packers.py | 4 +- pandas/io/parsers.py | 8 +- pandas/{parser.pyx => io/parsers.pyx} | 7 +- pandas/io/pytables.py | 4 +- pandas/io/sas/__init__.py | 1 + pandas/io/sas/{saslib.pyx => sas.pyx} | 0 pandas/io/sas/sas7bdat.py | 2 +- pandas/io/sql.py | 2 +- pandas/io/stata.py | 4 +- pandas/json.py | 7 + pandas/lib.py | 7 + pandas/parser.py | 8 + pandas/sparse/array.py | 8 +- pandas/sparse/list.py | 2 +- pandas/sparse/series.py | 6 +- pandas/{src => sparse}/sparse.pyx | 0 .../{src => sparse}/sparse_op_helper.pxi.in | 0 pandas/tests/api/test_api.py | 48 +++- pandas/tests/computation/test_eval.py | 2 +- pandas/tests/frame/test_constructors.py | 2 +- pandas/tests/frame/test_indexing.py | 4 +- pandas/tests/frame/test_to_csv.py | 2 +- pandas/tests/groupby/test_bin_groupby.py | 4 +- pandas/tests/groupby/test_transform.py | 13 +- pandas/tests/indexes/common.py | 9 +- .../indexes/datetimes/test_construction.py | 5 +- .../indexes/datetimes/test_date_range.py | 2 +- .../tests/indexes/datetimes/test_datetime.py | 2 +- pandas/tests/indexes/datetimes/test_ops.py | 2 +- pandas/tests/indexes/datetimes/test_setops.py | 2 +- pandas/tests/indexes/datetimes/test_tools.py | 4 +- pandas/tests/indexes/period/test_indexing.py | 3 +- pandas/tests/indexes/period/test_ops.py | 2 +- pandas/tests/indexes/period/test_tools.py | 2 +- pandas/tests/indexes/test_base.py | 2 +- pandas/tests/indexes/test_multi.py | 9 +- pandas/tests/indexes/test_numeric.py | 2 +- .../indexes/timedeltas/test_construction.py | 4 +- pandas/tests/indexes/timedeltas/test_ops.py | 6 +- pandas/tests/indexes/timedeltas/test_tools.py | 5 +- pandas/tests/indexing/test_indexing.py | 2 +- pandas/tests/io/json/test_pandas.py | 36 +-- pandas/tests/io/json/test_ujson.py | 4 +- pandas/tests/{ => io}/msgpack/__init__.py | 0 pandas/tests/{ => io}/msgpack/test_buffer.py | 2 +- pandas/tests/{ => io}/msgpack/test_case.py | 2 +- pandas/tests/{ => io}/msgpack/test_except.py | 2 +- .../tests/{ => io}/msgpack/test_extension.py | 4 +- pandas/tests/{ => io}/msgpack/test_format.py | 2 +- pandas/tests/{ => io}/msgpack/test_limits.py | 2 +- pandas/tests/{ => io}/msgpack/test_newspec.py | 2 +- pandas/tests/{ => io}/msgpack/test_obj.py | 2 +- pandas/tests/{ => io}/msgpack/test_pack.py | 2 +- .../tests/{ => io}/msgpack/test_read_size.py | 2 +- pandas/tests/{ => io}/msgpack/test_seq.py | 2 +- .../tests/{ => io}/msgpack/test_sequnpack.py | 4 +- pandas/tests/{ => io}/msgpack/test_subtype.py | 2 +- pandas/tests/{ => io}/msgpack/test_unpack.py | 2 +- .../tests/{ => io}/msgpack/test_unpack_raw.py | 2 +- pandas/tests/io/parser/common.py | 2 +- pandas/tests/io/parser/converters.py | 2 +- pandas/tests/io/parser/parse_dates.py | 4 +- pandas/tests/io/parser/test_textreader.py | 4 +- pandas/tests/io/parser/usecols.py | 2 +- pandas/tests/io/test_html.py | 2 +- pandas/tests/io/test_packers.py | 5 +- pandas/tests/io/test_pytables.py | 2 +- pandas/tests/io/test_stata.py | 2 +- pandas/tests/scalar/test_period.py | 14 +- pandas/tests/scalar/test_timedelta.py | 13 +- pandas/tests/scalar/test_timestamp.py | 10 +- pandas/tests/series/test_constructors.py | 23 +- pandas/tests/series/test_dtypes.py | 2 +- pandas/tests/series/test_indexing.py | 6 +- pandas/tests/series/test_internals.py | 2 +- pandas/tests/series/test_missing.py | 27 +-- pandas/tests/series/test_replace.py | 2 +- pandas/tests/series/test_timeseries.py | 2 +- pandas/tests/sparse/test_array.py | 2 +- pandas/tests/sparse/test_frame.py | 2 +- pandas/tests/sparse/test_libsparse.py | 2 +- pandas/tests/sparse/test_series.py | 2 +- pandas/tests/test_algos.py | 41 ++-- pandas/tests/test_base.py | 7 +- pandas/tests/test_internals.py | 2 +- pandas/tests/test_join.py | 2 +- pandas/tests/test_lib.py | 2 +- pandas/tests/test_multilevel.py | 2 +- pandas/tests/test_take.py | 2 +- pandas/tests/tools/test_join.py | 8 +- pandas/tests/tseries/test_offsets.py | 4 +- pandas/tests/tseries/test_resample.py | 2 +- pandas/tests/tseries/test_timezones.py | 5 +- pandas/tests/types/test_inference.py | 14 +- pandas/tests/types/test_io.py | 4 +- pandas/tests/types/test_missing.py | 2 +- pandas/tools/hashing.py | 5 +- pandas/{src/hash.pyx => tools/hashing.pyx} | 0 pandas/tools/merge.py | 28 +-- pandas/tools/tile.py | 2 +- pandas/tools/util.py | 2 +- pandas/tseries/api.py | 2 +- pandas/tseries/base.py | 44 ++-- pandas/tseries/common.py | 3 +- pandas/tseries/converter.py | 2 +- pandas/tseries/frequencies.py | 6 +- pandas/tseries/index.py | 106 ++++----- pandas/tseries/offsets.py | 3 +- pandas/tseries/period.py | 12 +- pandas/tseries/resample.py | 7 +- pandas/tseries/tdi.py | 41 ++-- pandas/tseries/timedeltas.py | 2 +- pandas/tseries/tools.py | 3 +- pandas/tslib.py | 8 + pandas/types/cast.py | 10 +- pandas/types/common.py | 2 +- pandas/types/concat.py | 2 +- pandas/types/inference.py | 2 +- pandas/types/missing.py | 4 +- pandas/util/decorators.py | 2 +- pandas/util/depr_module.py | 30 ++- pandas/util/testing.py | 28 ++- pandas/{src => util}/testing.pyx | 0 scripts/bench_join.py | 2 +- scripts/bench_join_multi.py | 2 +- scripts/groupby_test.py | 2 +- scripts/roll_median_leak.py | 2 +- setup.py | 223 +++++++++--------- vb_suite/pandas_vb_common.py | 2 +- 243 files changed, 885 insertions(+), 771 deletions(-) create mode 100644 pandas/_libs/__init__.py rename pandas/{ => _libs}/algos.pyx (99%) rename pandas/{src => _libs}/algos_common_helper.pxi.in (99%) rename pandas/{src => _libs}/algos_groupby_helper.pxi.in (100%) rename pandas/{src => _libs}/algos_rank_helper.pxi.in (100%) rename pandas/{src => _libs}/algos_take_helper.pxi.in (100%) rename pandas/{ => _libs}/hashtable.pxd (100%) rename pandas/{ => _libs}/hashtable.pyx (99%) rename pandas/{src => _libs}/hashtable_class_helper.pxi.in (100%) rename pandas/{src => _libs}/hashtable_func_helper.pxi.in (100%) rename pandas/{ => _libs}/index.pyx (99%) rename pandas/{src => _libs}/index_class_helper.pxi.in (100%) rename pandas/{src => _libs}/join.pyx (98%) rename pandas/{src/joins_func_helper.pxi.in => _libs/join_func_helper.pxi.in} (100%) rename pandas/{src => _libs}/join_helper.pxi.in (100%) rename pandas/{ => _libs}/lib.pxd (100%) rename pandas/{ => _libs}/lib.pyx (100%) rename pandas/{src => _libs}/period.pyx (98%) rename pandas/{src => _libs}/reshape.pyx (100%) rename pandas/{src => _libs}/reshape_helper.pxi.in (100%) rename pandas/{ => _libs}/src/datetime.pxd (100%) rename pandas/{ => _libs}/src/datetime/np_datetime.c (100%) rename pandas/{ => _libs}/src/datetime/np_datetime.h (100%) rename pandas/{ => _libs}/src/datetime/np_datetime_strings.c (100%) rename pandas/{ => _libs}/src/datetime/np_datetime_strings.h (100%) rename pandas/{ => _libs}/src/datetime_helper.h (100%) rename pandas/{ => _libs}/src/headers/math.h (100%) rename pandas/{ => _libs}/src/headers/ms_inttypes.h (100%) rename pandas/{ => _libs}/src/headers/ms_stdint.h (100%) rename pandas/{ => _libs}/src/headers/portable.h (100%) rename pandas/{ => _libs}/src/headers/stdint.h (100%) rename pandas/{ => _libs}/src/helper.h (100%) rename pandas/{ => _libs}/src/inference.pyx (100%) rename pandas/{ => _libs}/src/khash.pxd (100%) rename pandas/{ => _libs}/src/klib/khash.h (100%) rename pandas/{ => _libs}/src/klib/khash_python.h (100%) rename pandas/{ => _libs}/src/klib/ktypes.h (100%) rename pandas/{ => _libs}/src/klib/kvec.h (100%) rename pandas/{ => _libs}/src/msgpack/pack.h (100%) rename pandas/{ => _libs}/src/msgpack/pack_template.h (100%) rename pandas/{ => _libs}/src/msgpack/sysdep.h (100%) rename pandas/{ => _libs}/src/msgpack/unpack.h (100%) rename pandas/{ => _libs}/src/msgpack/unpack_define.h (100%) rename pandas/{ => _libs}/src/msgpack/unpack_template.h (100%) rename pandas/{ => _libs}/src/numpy.pxd (100%) rename pandas/{ => _libs}/src/numpy_helper.h (100%) rename pandas/{ => _libs}/src/offsets.pyx (100%) rename pandas/{ => _libs}/src/parse_helper.h (100%) rename pandas/{ => _libs}/src/parser/.gitignore (100%) rename pandas/{ => _libs}/src/parser/Makefile (100%) rename pandas/{ => _libs}/src/parser/io.c (100%) rename pandas/{ => _libs}/src/parser/io.h (100%) rename pandas/{ => _libs}/src/parser/tokenizer.c (100%) rename pandas/{ => _libs}/src/parser/tokenizer.h (100%) rename pandas/{ => _libs}/src/period_helper.c (100%) rename pandas/{ => _libs}/src/period_helper.h (100%) rename pandas/{ => _libs}/src/properties.pyx (100%) rename pandas/{ => _libs}/src/reduce.pyx (100%) rename pandas/{ => _libs}/src/skiplist.h (100%) rename pandas/{ => _libs}/src/skiplist.pxd (100%) rename pandas/{ => _libs}/src/skiplist.pyx (100%) rename pandas/{ => _libs}/src/ujson/lib/ultrajson.h (100%) rename pandas/{ => _libs}/src/ujson/lib/ultrajsondec.c (100%) rename pandas/{ => _libs}/src/ujson/lib/ultrajsonenc.c (100%) rename pandas/{ => _libs}/src/ujson/python/JSONtoObj.c (100%) rename pandas/{ => _libs}/src/ujson/python/objToJSON.c (99%) rename pandas/{ => _libs}/src/ujson/python/py_defines.h (100%) rename pandas/{ => _libs}/src/ujson/python/ujson.c (95%) rename pandas/{ => _libs}/src/ujson/python/version.h (100%) rename pandas/{ => _libs}/src/util.pxd (100%) rename pandas/{ => _libs}/tslib.pxd (100%) rename pandas/{ => _libs}/tslib.pyx (100%) rename pandas/{ => core}/window.pyx (99%) rename pandas/{ => io}/msgpack/__init__.py (81%) rename pandas/{ => io}/msgpack/_packer.pyx (98%) rename pandas/{ => io}/msgpack/_unpacker.pyx (98%) rename pandas/{ => io}/msgpack/_version.py (100%) rename pandas/{ => io}/msgpack/exceptions.py (100%) rename pandas/{parser.pyx => io/parsers.pyx} (99%) rename pandas/io/sas/{saslib.pyx => sas.pyx} (100%) create mode 100644 pandas/json.py create mode 100644 pandas/lib.py create mode 100644 pandas/parser.py rename pandas/{src => sparse}/sparse.pyx (100%) rename pandas/{src => sparse}/sparse_op_helper.pxi.in (100%) rename pandas/tests/{ => io}/msgpack/__init__.py (100%) rename pandas/tests/{ => io}/msgpack/test_buffer.py (90%) rename pandas/tests/{ => io}/msgpack/test_case.py (98%) rename pandas/tests/{ => io}/msgpack/test_except.py (96%) rename pandas/tests/{ => io}/msgpack/test_extension.py (96%) rename pandas/tests/{ => io}/msgpack/test_format.py (98%) rename pandas/tests/{ => io}/msgpack/test_limits.py (97%) rename pandas/tests/{ => io}/msgpack/test_newspec.py (97%) rename pandas/tests/{ => io}/msgpack/test_obj.py (98%) rename pandas/tests/{ => io}/msgpack/test_pack.py (98%) rename pandas/tests/{ => io}/msgpack/test_read_size.py (96%) rename pandas/tests/{ => io}/msgpack/test_seq.py (96%) rename pandas/tests/{ => io}/msgpack/test_sequnpack.py (97%) rename pandas/tests/{ => io}/msgpack/test_subtype.py (90%) rename pandas/tests/{ => io}/msgpack/test_unpack.py (96%) rename pandas/tests/{ => io}/msgpack/test_unpack_raw.py (94%) rename pandas/{src/hash.pyx => tools/hashing.pyx} (100%) create mode 100644 pandas/tslib.py rename pandas/{src => util}/testing.pyx (100%) diff --git a/Makefile b/Makefile index 9a768932b8bea..90dcd16d955d6 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -tseries: pandas/lib.pyx pandas/tslib.pyx pandas/hashtable.pyx +tseries: pandas/_libs/lib.pyx pandas/_libs/tslib.pyx pandas/_libs/hashtable.pyx python setup.py build_ext --inplace .PHONY : develop build clean clean_pyc tseries doc diff --git a/asv_bench/benchmarks/binary_ops.py b/asv_bench/benchmarks/binary_ops.py index 53cb1cf465698..72700c3de282e 100644 --- a/asv_bench/benchmarks/binary_ops.py +++ b/asv_bench/benchmarks/binary_ops.py @@ -107,4 +107,4 @@ def setup(self): self.s = Series(date_range('20010101', periods=self.N, freq='T', tz='US/Eastern')) self.ts = self.s[self.halfway] - self.s2 = Series(date_range('20010101', periods=self.N, freq='s', tz='US/Eastern')) \ No newline at end of file + self.s2 = Series(date_range('20010101', periods=self.N, freq='s', tz='US/Eastern')) diff --git a/asv_bench/benchmarks/pandas_vb_common.py b/asv_bench/benchmarks/pandas_vb_common.py index 25b0b5dd4d1b0..56ccc94c414fb 100644 --- a/asv_bench/benchmarks/pandas_vb_common.py +++ b/asv_bench/benchmarks/pandas_vb_common.py @@ -8,16 +8,22 @@ import random import numpy as np import threading +from importlib import import_module + try: from pandas.compat import range except ImportError: pass np.random.seed(1234) -try: - import pandas._tseries as lib -except: - import pandas.lib as lib + +# try em until it works! +for imp in ['pandas_tseries', 'pandas.lib', 'pandas._libs.lib']: + try: + lib = import_module(imp) + break + except: + pass try: Panel = Panel diff --git a/asv_bench/benchmarks/panel_methods.py b/asv_bench/benchmarks/panel_methods.py index ebe278f6e68b5..6609305502011 100644 --- a/asv_bench/benchmarks/panel_methods.py +++ b/asv_bench/benchmarks/panel_methods.py @@ -21,4 +21,4 @@ def time_shift(self): self.panel.shift(1) def time_shift_minor(self): - self.panel.shift(1, axis='minor') \ No newline at end of file + self.panel.shift(1, axis='minor') diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index ece9ff4a1adff..8f2033de6c77f 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -484,6 +484,35 @@ New Behavior: In [11]: index.memory_usage(deep=True) Out[11]: 260 +.. _whatsnew_0200.api_breaking.extensions: + +Extension Modules Moved +^^^^^^^^^^^^^^^^^^^^^^^ + +Some formerly public c/c++/cython extension modules have been moved and/or renamed. These are all removed from the public API. +If indicated, a deprecation warning will be issued if you reference that module. (:issue:`12588`) + +.. csv-table:: + :header: "Previous Location", "New Location", "Deprecated" + :widths: 30, 30, 4 + + "pandas.lib", "pandas._libs.lib", "X" + "pandas.tslib", "pandas._libs.tslib", "X" + "pandas._join", "pandas._libs.join", "" + "pandas._period", "pandas._libs.period", "" + "pandas.msgpack", "pandas.io.msgpack", "" + "pandas.index", "pandas._libs.index", "" + "pandas.algos", "pandas._libs.algos", "" + "pandas.hashtable", "pandas._libs.hashtable", "" + "pandas.json", "pandas.io.json.libjson", "X" + "pandas.parser", "pandas.io.libparsers", "X" + "pandas.io.sas.saslib", "pandas.io.sas.libsas", "" + "pandas._testing", "pandas.util.libtesting", "" + "pandas._sparse", "pandas.sparse.libsparse", "" + "pandas._hash", "pandas.tools.libhash", "" + "pandas._window", "pandas.core.libwindow", "" + + .. _whatsnew_0200.api_breaking.groupby_describe: Groupby Describe Formatting diff --git a/pandas/__init__.py b/pandas/__init__.py index 3bded89e6644a..5c7c9d44c5d10 100644 --- a/pandas/__init__.py +++ b/pandas/__init__.py @@ -23,7 +23,9 @@ from pandas.compat.numpy import * try: - from pandas import hashtable, tslib, lib + from pandas._libs import (hashtable as _hashtable, + lib as _lib, + tslib as _tslib) except ImportError as e: # pragma: no cover # hack but overkill to use re module = str(e).lstrip('cannot import name ') @@ -52,11 +54,17 @@ from pandas.tools.util import to_numeric from pandas.core.reshape import melt from pandas.util.print_versions import show_versions - from pandas.io.api import * - from pandas.util._tester import test +# extension module deprecations +from pandas.util.depr_module import _DeprecatedModule + +json = _DeprecatedModule(deprmod='pandas.json', deprmodto='pandas.io.json.libjson') +parser = _DeprecatedModule(deprmod='pandas.parser', deprmodto='pandas.io.libparsers') +lib = _DeprecatedModule(deprmod='pandas.lib', deprmodto='pandas._libs.lib') +tslib = _DeprecatedModule(deprmod='pandas.tslib', deprmodto='pandas._libs.tslib') + # use the closest tagged version if possible from ._version import get_versions v = get_versions() diff --git a/pandas/_libs/__init__.py b/pandas/_libs/__init__.py new file mode 100644 index 0000000000000..ab3832d0292ba --- /dev/null +++ b/pandas/_libs/__init__.py @@ -0,0 +1,8 @@ +# flake8: noqa + +from .tslib import iNaT, NaT, Timestamp, Timedelta, OutOfBoundsDatetime + +# TODO +# period is directly dependent on tslib and imports python +# modules, so exposing Period as an alias is currently not possible +# from period import Period diff --git a/pandas/algos.pyx b/pandas/_libs/algos.pyx similarity index 99% rename from pandas/algos.pyx rename to pandas/_libs/algos.pyx index 32955fd0f465b..7d3ce3280ec1e 100644 --- a/pandas/algos.pyx +++ b/pandas/_libs/algos.pyx @@ -37,7 +37,7 @@ float64 = np.dtype(np.float64) cdef double NaN = np.NaN cdef double nan = NaN -cdef extern from "src/headers/math.h": +cdef extern from "../src/headers/math.h": double sqrt(double x) nogil double fabs(double) nogil @@ -46,7 +46,7 @@ from util cimport numeric, get_nat cimport lib from lib cimport is_null_datetimelike -from pandas import lib +from pandas._libs import lib cdef int64_t iNaT = get_nat() diff --git a/pandas/src/algos_common_helper.pxi.in b/pandas/_libs/algos_common_helper.pxi.in similarity index 99% rename from pandas/src/algos_common_helper.pxi.in rename to pandas/_libs/algos_common_helper.pxi.in index b83dec1d26242..336dd77ea9a89 100644 --- a/pandas/src/algos_common_helper.pxi.in +++ b/pandas/_libs/algos_common_helper.pxi.in @@ -433,7 +433,7 @@ def arrmap_{{name}}(ndarray[{{c_type}}] index, object func): cdef ndarray[object] result = np.empty(length, dtype=np.object_) - from pandas.lib import maybe_convert_objects + from pandas._libs.lib import maybe_convert_objects for i in range(length): result[i] = func(index[i]) diff --git a/pandas/src/algos_groupby_helper.pxi.in b/pandas/_libs/algos_groupby_helper.pxi.in similarity index 100% rename from pandas/src/algos_groupby_helper.pxi.in rename to pandas/_libs/algos_groupby_helper.pxi.in diff --git a/pandas/src/algos_rank_helper.pxi.in b/pandas/_libs/algos_rank_helper.pxi.in similarity index 100% rename from pandas/src/algos_rank_helper.pxi.in rename to pandas/_libs/algos_rank_helper.pxi.in diff --git a/pandas/src/algos_take_helper.pxi.in b/pandas/_libs/algos_take_helper.pxi.in similarity index 100% rename from pandas/src/algos_take_helper.pxi.in rename to pandas/_libs/algos_take_helper.pxi.in diff --git a/pandas/hashtable.pxd b/pandas/_libs/hashtable.pxd similarity index 100% rename from pandas/hashtable.pxd rename to pandas/_libs/hashtable.pxd diff --git a/pandas/hashtable.pyx b/pandas/_libs/hashtable.pyx similarity index 99% rename from pandas/hashtable.pyx rename to pandas/_libs/hashtable.pyx index 276b0679070dc..eee287b2c157b 100644 --- a/pandas/hashtable.pyx +++ b/pandas/_libs/hashtable.pyx @@ -22,7 +22,7 @@ cdef extern from "numpy/npy_math.h": cimport cython cimport numpy as cnp -from pandas.lib import checknull +from pandas._libs.lib import checknull cnp.import_array() cnp.import_ufunc() diff --git a/pandas/src/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in similarity index 100% rename from pandas/src/hashtable_class_helper.pxi.in rename to pandas/_libs/hashtable_class_helper.pxi.in diff --git a/pandas/src/hashtable_func_helper.pxi.in b/pandas/_libs/hashtable_func_helper.pxi.in similarity index 100% rename from pandas/src/hashtable_func_helper.pxi.in rename to pandas/_libs/hashtable_func_helper.pxi.in diff --git a/pandas/index.pyx b/pandas/_libs/index.pyx similarity index 99% rename from pandas/index.pyx rename to pandas/_libs/index.pyx index 37fe7d90bebe0..c7a537acf5d6f 100644 --- a/pandas/index.pyx +++ b/pandas/_libs/index.pyx @@ -17,8 +17,8 @@ import numpy as np cimport tslib from hashtable cimport * -from pandas import algos, tslib, hashtable as _hash -from pandas.tslib import Timestamp, Timedelta +from pandas._libs import tslib, algos, hashtable as _hash +from pandas._libs.tslib import Timestamp, Timedelta from datetime cimport (get_datetime64_value, _pydatetime_to_dts, pandas_datetimestruct) diff --git a/pandas/src/index_class_helper.pxi.in b/pandas/_libs/index_class_helper.pxi.in similarity index 100% rename from pandas/src/index_class_helper.pxi.in rename to pandas/_libs/index_class_helper.pxi.in diff --git a/pandas/src/join.pyx b/pandas/_libs/join.pyx similarity index 98% rename from pandas/src/join.pyx rename to pandas/_libs/join.pyx index 65c790beb5dbf..385a9762ed90d 100644 --- a/pandas/src/join.pyx +++ b/pandas/_libs/join.pyx @@ -32,10 +32,10 @@ float64 = np.dtype(np.float64) cdef double NaN = np.NaN cdef double nan = NaN -from pandas.algos import groupsort_indexer, ensure_platform_int +from pandas._libs.algos import groupsort_indexer, ensure_platform_int from pandas.core.algorithms import take_nd -include "joins_func_helper.pxi" +include "join_func_helper.pxi" def inner_join(ndarray[int64_t] left, ndarray[int64_t] right, diff --git a/pandas/src/joins_func_helper.pxi.in b/pandas/_libs/join_func_helper.pxi.in similarity index 100% rename from pandas/src/joins_func_helper.pxi.in rename to pandas/_libs/join_func_helper.pxi.in diff --git a/pandas/src/join_helper.pxi.in b/pandas/_libs/join_helper.pxi.in similarity index 100% rename from pandas/src/join_helper.pxi.in rename to pandas/_libs/join_helper.pxi.in diff --git a/pandas/lib.pxd b/pandas/_libs/lib.pxd similarity index 100% rename from pandas/lib.pxd rename to pandas/_libs/lib.pxd diff --git a/pandas/lib.pyx b/pandas/_libs/lib.pyx similarity index 100% rename from pandas/lib.pyx rename to pandas/_libs/lib.pyx diff --git a/pandas/src/period.pyx b/pandas/_libs/period.pyx similarity index 98% rename from pandas/src/period.pyx rename to pandas/_libs/period.pyx index 2d92b9f192328..f30035910a62f 100644 --- a/pandas/src/period.pyx +++ b/pandas/_libs/period.pyx @@ -16,19 +16,15 @@ cdef extern from "datetime_helper.h": from libc.stdlib cimport free from pandas import compat - -from pandas.tseries import offsets -from pandas.tseries.tools import parse_time_string +from pandas.compat import PY2 cimport cython from datetime cimport * -cimport util -cimport lib +cimport util, lib from lib cimport is_null_datetimelike, is_period -import lib -from pandas import tslib -from tslib import Timedelta, Timestamp, iNaT, NaT -from tslib import have_pytz, _get_utcoffset +from pandas._libs import tslib, lib +from pandas._libs.tslib import (Timedelta, Timestamp, iNaT, + NaT, have_pytz, _get_utcoffset) from tslib cimport ( maybe_get_tz, _is_utc, @@ -37,12 +33,10 @@ from tslib cimport ( _nat_scalar_rules, ) +from pandas.tseries import offsets +from pandas.tseries.tools import parse_time_string from pandas.tseries import frequencies -from sys import version_info - -cdef bint PY2 = version_info[0] == 2 - cdef int64_t NPY_NAT = util.get_nat() cdef int RESO_US = frequencies.RESO_US @@ -474,7 +468,7 @@ def extract_ordinals(ndarray[object] values, freq): p = values[i] if is_null_datetimelike(p): - ordinals[i] = tslib.iNaT + ordinals[i] = iNaT else: try: ordinals[i] = p.ordinal @@ -485,9 +479,9 @@ def extract_ordinals(ndarray[object] values, freq): except AttributeError: p = Period(p, freq=freq) - if p is tslib.NaT: + if p is NaT: # input may contain NaT-like string - ordinals[i] = tslib.iNaT + ordinals[i] = iNaT else: ordinals[i] = p.ordinal @@ -716,8 +710,8 @@ cdef class _Period(object): """ Fast creation from an ordinal and freq that are already validated! """ - if ordinal == tslib.iNaT: - return tslib.NaT + if ordinal == iNaT: + return NaT else: self = _Period.__new__(cls) self.ordinal = ordinal @@ -730,7 +724,7 @@ cdef class _Period(object): msg = _DIFFERENT_FREQ.format(self.freqstr, other.freqstr) raise IncompatibleFrequency(msg) return PyObject_RichCompareBool(self.ordinal, other.ordinal, op) - elif other is tslib.NaT: + elif other is NaT: return _nat_scalar_rules[op] # index/series like elif hasattr(other, '_typ'): @@ -776,8 +770,8 @@ cdef class _Period(object): offsets.Tick, offsets.DateOffset, Timedelta)): return self._add_delta(other) - elif other is tslib.NaT: - return tslib.NaT + elif other is NaT: + return NaT elif lib.is_integer(other): ordinal = self.ordinal + other * self.freq.n return Period(ordinal=ordinal, freq=self.freq) @@ -808,8 +802,8 @@ cdef class _Period(object): else: # pragma: no cover return NotImplemented elif isinstance(other, Period): - if self is tslib.NaT: - return tslib.NaT + if self is NaT: + return NaT return NotImplemented else: return NotImplemented @@ -1164,7 +1158,7 @@ class Period(_Period): if (year is None and month is None and quarter is None and day is None and hour is None and minute is None and second is None): - ordinal = tslib.iNaT + ordinal = iNaT else: if freq is None: raise ValueError("If value is None, freq cannot be None") @@ -1190,7 +1184,7 @@ class Period(_Period): ordinal = converted.ordinal elif is_null_datetimelike(value) or value in tslib._nat_strings: - ordinal = tslib.iNaT + ordinal = iNaT elif isinstance(value, compat.string_types) or lib.is_integer(value): if lib.is_integer(value): diff --git a/pandas/src/reshape.pyx b/pandas/_libs/reshape.pyx similarity index 100% rename from pandas/src/reshape.pyx rename to pandas/_libs/reshape.pyx diff --git a/pandas/src/reshape_helper.pxi.in b/pandas/_libs/reshape_helper.pxi.in similarity index 100% rename from pandas/src/reshape_helper.pxi.in rename to pandas/_libs/reshape_helper.pxi.in diff --git a/pandas/src/datetime.pxd b/pandas/_libs/src/datetime.pxd similarity index 100% rename from pandas/src/datetime.pxd rename to pandas/_libs/src/datetime.pxd diff --git a/pandas/src/datetime/np_datetime.c b/pandas/_libs/src/datetime/np_datetime.c similarity index 100% rename from pandas/src/datetime/np_datetime.c rename to pandas/_libs/src/datetime/np_datetime.c diff --git a/pandas/src/datetime/np_datetime.h b/pandas/_libs/src/datetime/np_datetime.h similarity index 100% rename from pandas/src/datetime/np_datetime.h rename to pandas/_libs/src/datetime/np_datetime.h diff --git a/pandas/src/datetime/np_datetime_strings.c b/pandas/_libs/src/datetime/np_datetime_strings.c similarity index 100% rename from pandas/src/datetime/np_datetime_strings.c rename to pandas/_libs/src/datetime/np_datetime_strings.c diff --git a/pandas/src/datetime/np_datetime_strings.h b/pandas/_libs/src/datetime/np_datetime_strings.h similarity index 100% rename from pandas/src/datetime/np_datetime_strings.h rename to pandas/_libs/src/datetime/np_datetime_strings.h diff --git a/pandas/src/datetime_helper.h b/pandas/_libs/src/datetime_helper.h similarity index 100% rename from pandas/src/datetime_helper.h rename to pandas/_libs/src/datetime_helper.h diff --git a/pandas/src/headers/math.h b/pandas/_libs/src/headers/math.h similarity index 100% rename from pandas/src/headers/math.h rename to pandas/_libs/src/headers/math.h diff --git a/pandas/src/headers/ms_inttypes.h b/pandas/_libs/src/headers/ms_inttypes.h similarity index 100% rename from pandas/src/headers/ms_inttypes.h rename to pandas/_libs/src/headers/ms_inttypes.h diff --git a/pandas/src/headers/ms_stdint.h b/pandas/_libs/src/headers/ms_stdint.h similarity index 100% rename from pandas/src/headers/ms_stdint.h rename to pandas/_libs/src/headers/ms_stdint.h diff --git a/pandas/src/headers/portable.h b/pandas/_libs/src/headers/portable.h similarity index 100% rename from pandas/src/headers/portable.h rename to pandas/_libs/src/headers/portable.h diff --git a/pandas/src/headers/stdint.h b/pandas/_libs/src/headers/stdint.h similarity index 100% rename from pandas/src/headers/stdint.h rename to pandas/_libs/src/headers/stdint.h diff --git a/pandas/src/helper.h b/pandas/_libs/src/helper.h similarity index 100% rename from pandas/src/helper.h rename to pandas/_libs/src/helper.h diff --git a/pandas/src/inference.pyx b/pandas/_libs/src/inference.pyx similarity index 100% rename from pandas/src/inference.pyx rename to pandas/_libs/src/inference.pyx diff --git a/pandas/src/khash.pxd b/pandas/_libs/src/khash.pxd similarity index 100% rename from pandas/src/khash.pxd rename to pandas/_libs/src/khash.pxd diff --git a/pandas/src/klib/khash.h b/pandas/_libs/src/klib/khash.h similarity index 100% rename from pandas/src/klib/khash.h rename to pandas/_libs/src/klib/khash.h diff --git a/pandas/src/klib/khash_python.h b/pandas/_libs/src/klib/khash_python.h similarity index 100% rename from pandas/src/klib/khash_python.h rename to pandas/_libs/src/klib/khash_python.h diff --git a/pandas/src/klib/ktypes.h b/pandas/_libs/src/klib/ktypes.h similarity index 100% rename from pandas/src/klib/ktypes.h rename to pandas/_libs/src/klib/ktypes.h diff --git a/pandas/src/klib/kvec.h b/pandas/_libs/src/klib/kvec.h similarity index 100% rename from pandas/src/klib/kvec.h rename to pandas/_libs/src/klib/kvec.h diff --git a/pandas/src/msgpack/pack.h b/pandas/_libs/src/msgpack/pack.h similarity index 100% rename from pandas/src/msgpack/pack.h rename to pandas/_libs/src/msgpack/pack.h diff --git a/pandas/src/msgpack/pack_template.h b/pandas/_libs/src/msgpack/pack_template.h similarity index 100% rename from pandas/src/msgpack/pack_template.h rename to pandas/_libs/src/msgpack/pack_template.h diff --git a/pandas/src/msgpack/sysdep.h b/pandas/_libs/src/msgpack/sysdep.h similarity index 100% rename from pandas/src/msgpack/sysdep.h rename to pandas/_libs/src/msgpack/sysdep.h diff --git a/pandas/src/msgpack/unpack.h b/pandas/_libs/src/msgpack/unpack.h similarity index 100% rename from pandas/src/msgpack/unpack.h rename to pandas/_libs/src/msgpack/unpack.h diff --git a/pandas/src/msgpack/unpack_define.h b/pandas/_libs/src/msgpack/unpack_define.h similarity index 100% rename from pandas/src/msgpack/unpack_define.h rename to pandas/_libs/src/msgpack/unpack_define.h diff --git a/pandas/src/msgpack/unpack_template.h b/pandas/_libs/src/msgpack/unpack_template.h similarity index 100% rename from pandas/src/msgpack/unpack_template.h rename to pandas/_libs/src/msgpack/unpack_template.h diff --git a/pandas/src/numpy.pxd b/pandas/_libs/src/numpy.pxd similarity index 100% rename from pandas/src/numpy.pxd rename to pandas/_libs/src/numpy.pxd diff --git a/pandas/src/numpy_helper.h b/pandas/_libs/src/numpy_helper.h similarity index 100% rename from pandas/src/numpy_helper.h rename to pandas/_libs/src/numpy_helper.h diff --git a/pandas/src/offsets.pyx b/pandas/_libs/src/offsets.pyx similarity index 100% rename from pandas/src/offsets.pyx rename to pandas/_libs/src/offsets.pyx diff --git a/pandas/src/parse_helper.h b/pandas/_libs/src/parse_helper.h similarity index 100% rename from pandas/src/parse_helper.h rename to pandas/_libs/src/parse_helper.h diff --git a/pandas/src/parser/.gitignore b/pandas/_libs/src/parser/.gitignore similarity index 100% rename from pandas/src/parser/.gitignore rename to pandas/_libs/src/parser/.gitignore diff --git a/pandas/src/parser/Makefile b/pandas/_libs/src/parser/Makefile similarity index 100% rename from pandas/src/parser/Makefile rename to pandas/_libs/src/parser/Makefile diff --git a/pandas/src/parser/io.c b/pandas/_libs/src/parser/io.c similarity index 100% rename from pandas/src/parser/io.c rename to pandas/_libs/src/parser/io.c diff --git a/pandas/src/parser/io.h b/pandas/_libs/src/parser/io.h similarity index 100% rename from pandas/src/parser/io.h rename to pandas/_libs/src/parser/io.h diff --git a/pandas/src/parser/tokenizer.c b/pandas/_libs/src/parser/tokenizer.c similarity index 100% rename from pandas/src/parser/tokenizer.c rename to pandas/_libs/src/parser/tokenizer.c diff --git a/pandas/src/parser/tokenizer.h b/pandas/_libs/src/parser/tokenizer.h similarity index 100% rename from pandas/src/parser/tokenizer.h rename to pandas/_libs/src/parser/tokenizer.h diff --git a/pandas/src/period_helper.c b/pandas/_libs/src/period_helper.c similarity index 100% rename from pandas/src/period_helper.c rename to pandas/_libs/src/period_helper.c diff --git a/pandas/src/period_helper.h b/pandas/_libs/src/period_helper.h similarity index 100% rename from pandas/src/period_helper.h rename to pandas/_libs/src/period_helper.h diff --git a/pandas/src/properties.pyx b/pandas/_libs/src/properties.pyx similarity index 100% rename from pandas/src/properties.pyx rename to pandas/_libs/src/properties.pyx diff --git a/pandas/src/reduce.pyx b/pandas/_libs/src/reduce.pyx similarity index 100% rename from pandas/src/reduce.pyx rename to pandas/_libs/src/reduce.pyx diff --git a/pandas/src/skiplist.h b/pandas/_libs/src/skiplist.h similarity index 100% rename from pandas/src/skiplist.h rename to pandas/_libs/src/skiplist.h diff --git a/pandas/src/skiplist.pxd b/pandas/_libs/src/skiplist.pxd similarity index 100% rename from pandas/src/skiplist.pxd rename to pandas/_libs/src/skiplist.pxd diff --git a/pandas/src/skiplist.pyx b/pandas/_libs/src/skiplist.pyx similarity index 100% rename from pandas/src/skiplist.pyx rename to pandas/_libs/src/skiplist.pyx diff --git a/pandas/src/ujson/lib/ultrajson.h b/pandas/_libs/src/ujson/lib/ultrajson.h similarity index 100% rename from pandas/src/ujson/lib/ultrajson.h rename to pandas/_libs/src/ujson/lib/ultrajson.h diff --git a/pandas/src/ujson/lib/ultrajsondec.c b/pandas/_libs/src/ujson/lib/ultrajsondec.c similarity index 100% rename from pandas/src/ujson/lib/ultrajsondec.c rename to pandas/_libs/src/ujson/lib/ultrajsondec.c diff --git a/pandas/src/ujson/lib/ultrajsonenc.c b/pandas/_libs/src/ujson/lib/ultrajsonenc.c similarity index 100% rename from pandas/src/ujson/lib/ultrajsonenc.c rename to pandas/_libs/src/ujson/lib/ultrajsonenc.c diff --git a/pandas/src/ujson/python/JSONtoObj.c b/pandas/_libs/src/ujson/python/JSONtoObj.c similarity index 100% rename from pandas/src/ujson/python/JSONtoObj.c rename to pandas/_libs/src/ujson/python/JSONtoObj.c diff --git a/pandas/src/ujson/python/objToJSON.c b/pandas/_libs/src/ujson/python/objToJSON.c similarity index 99% rename from pandas/src/ujson/python/objToJSON.c rename to pandas/_libs/src/ujson/python/objToJSON.c index e3c75d3b6e081..26a68b8a9ae3a 100644 --- a/pandas/src/ujson/python/objToJSON.c +++ b/pandas/_libs/src/ujson/python/objToJSON.c @@ -180,7 +180,7 @@ void initObjToJSON(void) Py_DECREF(mod_pandas); } - mod_tslib = PyImport_ImportModule("pandas.tslib"); + mod_tslib = PyImport_ImportModule("pandas._libs.tslib"); if (mod_tslib) { cls_nat = (PyTypeObject *)PyObject_GetAttrString(mod_tslib, "NaTType"); Py_DECREF(mod_tslib); diff --git a/pandas/src/ujson/python/py_defines.h b/pandas/_libs/src/ujson/python/py_defines.h similarity index 100% rename from pandas/src/ujson/python/py_defines.h rename to pandas/_libs/src/ujson/python/py_defines.h diff --git a/pandas/src/ujson/python/ujson.c b/pandas/_libs/src/ujson/python/ujson.c similarity index 95% rename from pandas/src/ujson/python/ujson.c rename to pandas/_libs/src/ujson/python/ujson.c index 8c25975f12409..ec6720f16bc77 100644 --- a/pandas/src/ujson/python/ujson.c +++ b/pandas/_libs/src/ujson/python/ujson.c @@ -80,7 +80,7 @@ static PyMethodDef ujsonMethods[] = { static struct PyModuleDef moduledef = { PyModuleDef_HEAD_INIT, - "_pandasujson", + "_libjson", 0, /* m_doc */ -1, /* m_size */ ujsonMethods, /* m_methods */ @@ -90,14 +90,14 @@ static struct PyModuleDef moduledef = { NULL /* m_free */ }; -#define PYMODINITFUNC PyMODINIT_FUNC PyInit_json(void) +#define PYMODINITFUNC PyMODINIT_FUNC PyInit_libjson(void) #define PYMODULE_CREATE() PyModule_Create(&moduledef) #define MODINITERROR return NULL #else -#define PYMODINITFUNC PyMODINIT_FUNC initjson(void) -#define PYMODULE_CREATE() Py_InitModule("json", ujsonMethods) +#define PYMODINITFUNC PyMODINIT_FUNC initlibjson(void) +#define PYMODULE_CREATE() Py_InitModule("libjson", ujsonMethods) #define MODINITERROR return #endif diff --git a/pandas/src/ujson/python/version.h b/pandas/_libs/src/ujson/python/version.h similarity index 100% rename from pandas/src/ujson/python/version.h rename to pandas/_libs/src/ujson/python/version.h diff --git a/pandas/src/util.pxd b/pandas/_libs/src/util.pxd similarity index 100% rename from pandas/src/util.pxd rename to pandas/_libs/src/util.pxd diff --git a/pandas/tslib.pxd b/pandas/_libs/tslib.pxd similarity index 100% rename from pandas/tslib.pxd rename to pandas/_libs/tslib.pxd diff --git a/pandas/tslib.pyx b/pandas/_libs/tslib.pyx similarity index 100% rename from pandas/tslib.pyx rename to pandas/_libs/tslib.pyx diff --git a/pandas/compat/pickle_compat.py b/pandas/compat/pickle_compat.py index 25a170c3eb121..279a82fea1cc2 100644 --- a/pandas/compat/pickle_compat.py +++ b/pandas/compat/pickle_compat.py @@ -62,7 +62,13 @@ def load_reduce(self): # 10890 ('pandas.core.series', 'TimeSeries'): ('pandas.core.series', 'Series'), - ('pandas.sparse.series', 'SparseTimeSeries'): ('pandas.sparse.series', 'SparseSeries') + ('pandas.sparse.series', 'SparseTimeSeries'): ('pandas.sparse.series', 'SparseSeries'), + + # 12588, extensions moving + ('pandas._sparse', 'BlockIndex'): ('pandas.sparse.libsparse', 'BlockIndex'), + ('pandas.tslib', 'Timestamp'): ('pandas._libs.tslib', 'Timestamp'), + ('pandas.tslib', '__nat_unpickle'): ('pandas._libs.tslib', '__nat_unpickle'), + ('pandas._period', 'Period'): ('pandas._libs.period', 'Period') } diff --git a/pandas/computation/scope.py b/pandas/computation/scope.py index 875aaa959b264..9ade755e0ff12 100644 --- a/pandas/computation/scope.py +++ b/pandas/computation/scope.py @@ -1,4 +1,5 @@ -"""Module for scope operations +""" +Module for scope operations """ import sys @@ -10,7 +11,8 @@ import numpy as np -import pandas as pd +import pandas +import pandas as pd # noqa from pandas.compat import DeepChainMap, map, StringIO from pandas.core.base import StringMixin import pandas.computation as compu @@ -46,7 +48,7 @@ def _raw_hex_id(obj): _DEFAULT_GLOBALS = { - 'Timestamp': pd.lib.Timestamp, + 'Timestamp': pandas._libs.lib.Timestamp, 'datetime': datetime.datetime, 'True': True, 'False': False, diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index d37c98c9b9b90..6937675603c10 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -6,7 +6,7 @@ from warnings import warn import numpy as np -from pandas import compat, lib, tslib, _np_version_under1p8 +from pandas import compat, _np_version_under1p8 from pandas.types.cast import _maybe_promote from pandas.types.generic import ABCSeries, ABCIndex from pandas.types.common import (is_unsigned_integer_dtype, @@ -34,10 +34,9 @@ from pandas.types.missing import isnull import pandas.core.common as com -import pandas.algos as algos -import pandas.hashtable as htable from pandas.compat import string_types -from pandas.tslib import iNaT +from pandas._libs import algos, lib, hashtable as htable +from pandas._libs.tslib import iNaT # --------------- # @@ -1412,7 +1411,7 @@ def diff(arr, n, axis=0): if needs_i8_conversion(arr): dtype = np.float64 arr = arr.view('i8') - na = tslib.iNaT + na = iNaT is_timedelta = True elif issubclass(dtype.type, np.integer): dtype = np.float64 diff --git a/pandas/core/base.py b/pandas/core/base.py index 55149198b0dbf..d7c9e35ab6a51 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -12,7 +12,7 @@ from pandas.core import common as com import pandas.core.nanops as nanops -import pandas.lib as lib +import pandas._libs.lib as lib from pandas.compat.numpy import function as nv from pandas.util.decorators import (Appender, cache_readonly, deprecate_kwarg, Substitution) diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index d5dce250275d9..47db86ce1e73e 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -4,9 +4,9 @@ from warnings import warn import types -from pandas import compat, lib +from pandas import compat from pandas.compat import u, lzip -import pandas.algos as _algos +from pandas._libs import lib, algos as libalgos from pandas.types.generic import ABCSeries, ABCIndexClass, ABCCategoricalIndex from pandas.types.missing import isnull, notnull @@ -1817,8 +1817,8 @@ def _reverse_indexer(self): """ categories = self.categories - r, counts = _algos.groupsort_indexer(self.codes.astype('int64'), - categories.size) + r, counts = libalgos.groupsort_indexer(self.codes.astype('int64'), + categories.size) counts = counts.cumsum() result = [r[counts[indexer]:counts[indexer + 1]] for indexer in range(len(counts) - 1)] @@ -1897,7 +1897,7 @@ def mode(self): modes : `Categorical` (sorted) """ - import pandas.hashtable as htable + import pandas._libs.hashtable as htable good = self._codes != -1 values = sorted(htable.mode_int64(_ensure_int64(self._codes[good]))) result = self._constructor(values=values, categories=self.categories, diff --git a/pandas/core/common.py b/pandas/core/common.py index fddac1f29d454..93e24dce8b623 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -8,8 +8,8 @@ from functools import partial import numpy as np -import pandas.lib as lib -import pandas.tslib as tslib +from pandas._libs import lib, tslib + from pandas import compat from pandas.compat import long, zip, iteritems from pandas.core.config import get_option @@ -476,7 +476,6 @@ def _where_compat(mask, arr1, arr2): new_vals = np.where(mask, arr1.view('i8'), arr2.view('i8')) return new_vals.view(_NS_DTYPE) - import pandas.tslib as tslib if arr1.dtype == _NS_DTYPE: arr1 = tslib.ints_to_pydatetime(arr1.view('i8')) if arr2.dtype == _NS_DTYPE: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 15179ac321076..4e7a5ebdf6f67 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -71,7 +71,7 @@ from pandas.core.series import Series from pandas.core.categorical import Categorical import pandas.computation.expressions as expressions -import pandas.core.algorithms as algos +import pandas.core.algorithms as algorithms from pandas.computation.eval import eval as _eval from pandas.compat import (range, map, zip, lrange, lmap, lzip, StringIO, u, OrderedDict, raise_with_traceback) @@ -93,8 +93,7 @@ from pandas.formats.printing import pprint_thing import pandas.tools.plotting as gfx -import pandas.lib as lib -import pandas.algos as _algos +from pandas._libs import lib, algos as libalgos from pandas.core.config import get_option @@ -2794,8 +2793,8 @@ def _reindex_multi(self, axes, copy, fill_value): if row_indexer is not None and col_indexer is not None: indexer = row_indexer, col_indexer - new_values = algos.take_2d_multi(self.values, indexer, - fill_value=fill_value) + new_values = algorithms.take_2d_multi(self.values, indexer, + fill_value=fill_value) return self._constructor(new_values, index=new_index, columns=new_columns) else: @@ -3180,12 +3179,11 @@ def duplicated(self, subset=None, keep='first'): duplicated : Series """ from pandas.core.sorting import get_group_index - from pandas.hashtable import duplicated_int64, _SIZE_HINT_LIMIT + from pandas._libs.hashtable import duplicated_int64, _SIZE_HINT_LIMIT def f(vals): - labels, shape = algos.factorize(vals, - size_hint=min(len(self), - _SIZE_HINT_LIMIT)) + labels, shape = algorithms.factorize( + vals, size_hint=min(len(self), _SIZE_HINT_LIMIT)) return labels.astype('i8', copy=False), len(shape) if subset is None: @@ -3437,7 +3435,7 @@ def nlargest(self, n, columns, keep='first'): 1 10 b 2 2 8 d NaN """ - return algos.select_n_frame(self, columns, n, 'nlargest', keep) + return algorithms.select_n_frame(self, columns, n, 'nlargest', keep) def nsmallest(self, n, columns, keep='first'): """Get the rows of a DataFrame sorted by the `n` smallest @@ -3471,7 +3469,7 @@ def nsmallest(self, n, columns, keep='first'): 0 1 a 1 2 8 d NaN """ - return algos.select_n_frame(self, columns, n, 'nsmallest', keep) + return algorithms.select_n_frame(self, columns, n, 'nsmallest', keep) def swaplevel(self, i=-2, j=-1, axis=0): """ @@ -4739,10 +4737,10 @@ def corr(self, method='pearson', min_periods=1): mat = numeric_df.values if method == 'pearson': - correl = _algos.nancorr(_ensure_float64(mat), minp=min_periods) + correl = libalgos.nancorr(_ensure_float64(mat), minp=min_periods) elif method == 'spearman': - correl = _algos.nancorr_spearman(_ensure_float64(mat), - minp=min_periods) + correl = libalgos.nancorr_spearman(_ensure_float64(mat), + minp=min_periods) else: if min_periods is None: min_periods = 1 @@ -4802,8 +4800,8 @@ def cov(self, min_periods=None): baseCov = np.cov(mat.T) baseCov = baseCov.reshape((len(cols), len(cols))) else: - baseCov = _algos.nancorr(_ensure_float64(mat), cov=True, - minp=min_periods) + baseCov = libalgos.nancorr(_ensure_float64(mat), cov=True, + minp=min_periods) return self._constructor(baseCov, index=idx, columns=cols) @@ -5669,7 +5667,7 @@ def _list_of_series_to_arrays(data, columns, coerce_float=False, dtype=None): indexer = indexer_cache[id(index)] = index.get_indexer(columns) values = _values_from_object(s) - aligned_values.append(algos.take_1d(values, indexer)) + aligned_values.append(algorithms.take_1d(values, indexer)) values = np.vstack(aligned_values) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 298fa75779420..ff58a2aa77447 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -7,11 +7,9 @@ import json import numpy as np -import pandas.lib as lib - import pandas as pd - +from pandas._libs import tslib, lib from pandas.types.common import (_coerce_to_dtype, _ensure_int64, needs_i8_conversion, @@ -6115,7 +6113,7 @@ def cum_func(self, axis=None, skipna=True, *args, **kwargs): issubclass(y.dtype.type, (np.datetime64, np.timedelta64))): result = accum_func(y, axis) mask = isnull(self) - np.putmask(result, mask, pd.tslib.iNaT) + np.putmask(result, mask, tslib.iNaT) elif skipna and not issubclass(y.dtype.type, (np.integer, np.bool_)): mask = isnull(self) np.putmask(y, mask, mask_a) diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 43c57a88b4d19..a10be078a8f96 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -55,13 +55,12 @@ from pandas.formats.printing import pprint_thing from pandas.util.validators import validate_kwargs -import pandas.core.algorithms as algos +import pandas.core.algorithms as algorithms import pandas.core.common as com from pandas.core.config import option_context -import pandas.lib as lib -from pandas.lib import Timestamp -import pandas.tslib as tslib -import pandas.algos as _algos + +from pandas._libs import lib, algos as libalgos, Timestamp, NaT, iNaT +from pandas._libs.lib import count_level_2d _doc_template = """ @@ -1474,11 +1473,11 @@ def shift(self, periods=1, freq=None, axis=0): # filled in by Cython indexer = np.zeros_like(labels) - _algos.group_shift_indexer(indexer, labels, ngroups, periods) + libalgos.group_shift_indexer(indexer, labels, ngroups, periods) output = {} for name, obj in self._iterate_slices(): - output[name] = algos.take_nd(obj.values, indexer) + output[name] = algorithms.take_nd(obj.values, indexer) return self._wrap_transformed_output(output) @@ -1815,13 +1814,13 @@ def _get_cython_function(self, kind, how, values, is_numeric): def get_func(fname): # see if there is a fused-type version of function # only valid for numeric - f = getattr(_algos, fname, None) + f = getattr(libalgos, fname, None) if f is not None and is_numeric: return f # otherwise find dtype-specific version, falling back to object for dt in [dtype_str, 'object']: - f = getattr(_algos, "%s_%s" % (fname, dtype_str), None) + f = getattr(libalgos, "%s_%s" % (fname, dtype_str), None) if f is not None: return f @@ -1901,7 +1900,7 @@ def _cython_operation(self, kind, values, how, axis): elif is_integer_dtype(values): # we use iNaT for the missing value on ints # so pre-convert to guard this condition - if (values == tslib.iNaT).any(): + if (values == iNaT).any(): values = _ensure_float64(values) else: values = values.astype('int64', copy=False) @@ -1943,7 +1942,7 @@ def _cython_operation(self, kind, values, how, axis): result, values, labels, func, is_numeric, is_datetimelike) if is_integer_dtype(result): - mask = result == tslib.iNaT + mask = result == iNaT if mask.any(): result = result.astype('float64') result[mask] = np.nan @@ -2034,7 +2033,8 @@ def _aggregate_series_fast(self, obj, func): dummy = obj._get_values(slice(None, 0)).to_dense() indexer = get_group_index_sorter(group_index, ngroups) obj = obj.take(indexer, convert=False) - group_index = algos.take_nd(group_index, indexer, allow_fill=False) + group_index = algorithms.take_nd( + group_index, indexer, allow_fill=False) grouper = lib.SeriesGrouper(obj, func, group_index, ngroups, dummy) result, counts = grouper.get_result() @@ -2132,7 +2132,7 @@ def groups(self): # GH 3881 result = {} for key, value in zip(self.binlabels, self.bins): - if key is not tslib.NaT: + if key is not NaT: result[key] = value return result @@ -2159,7 +2159,7 @@ def get_iterator(self, data, axis=0): start = 0 for edge, label in zip(self.bins, self.binlabels): - if label is not tslib.NaT: + if label is not NaT: yield label, slicer(start, edge) start = edge @@ -2173,7 +2173,7 @@ def indices(self): i = 0 for label, bin in zip(self.binlabels, self.bins): if i < bin: - if label is not tslib.NaT: + if label is not NaT: indices[label] = list(range(i, bin)) i = bin return indices @@ -2383,7 +2383,8 @@ def group_index(self): def _make_labels(self): if self._labels is None or self._group_index is None: - labels, uniques = algos.factorize(self.grouper, sort=self.sort) + labels, uniques = algorithms.factorize( + self.grouper, sort=self.sort) uniques = Index(uniques, name=self.name) self._labels = labels self._group_index = uniques @@ -2928,7 +2929,7 @@ def _transform_fast(self, func): ids, _, ngroup = self.grouper.group_info cast = (self.size().fillna(0) > 0).any() - out = algos.take_1d(func().values, ids) + out = algorithms.take_1d(func().values, ids) if cast: out = self._try_cast(out, self.obj) return Series(out, index=self.obj.index, name=self.obj.name) @@ -2985,7 +2986,7 @@ def nunique(self, dropna=True): except TypeError: # catches object dtypes assert val.dtype == object, \ 'val.dtype must be object, got %s' % val.dtype - val, _ = algos.factorize(val, sort=False) + val, _ = algorithms.factorize(val, sort=False) sorter = np.lexsort((val, ids)) _isnull = lambda a: a == -1 else: @@ -3069,7 +3070,7 @@ def value_counts(self, normalize=False, sort=True, ascending=False, ids, val = ids[mask], val[mask] if bins is None: - lab, lev = algos.factorize(val, sort=True) + lab, lev = algorithms.factorize(val, sort=True) else: cat, bins = cut(val, bins, retbins=True) # bins[:-1] for backward compat; @@ -3108,7 +3109,7 @@ def value_counts(self, normalize=False, sort=True, ascending=False, if dropna: m = ids[lab == -1] if _np_version_under1p8: - mi, ml = algos.factorize(m) + mi, ml = algorithms.factorize(m) d[ml] = d[ml] - np.bincount(mi) else: np.add.at(d, m, -1) @@ -3130,7 +3131,7 @@ def value_counts(self, normalize=False, sort=True, ascending=False, out = _ensure_int64(out) return Series(out, index=mi, name=self.name) - # for compat. with algos.value_counts need to ensure every + # for compat. with libalgos.value_counts need to ensure every # bin is present at every index level, null filled with zeros diff = np.zeros(len(out), dtype='bool') for lab in labels[:-1]: @@ -3701,7 +3702,7 @@ def _transform_fast(self, result, obj): ids, _, ngroup = self.grouper.group_info output = [] for i, _ in enumerate(result.columns): - res = algos.take_1d(result.iloc[:, i].values, ids) + res = algorithms.take_1d(result.iloc[:, i].values, ids) if cast: res = self._try_cast(res, obj.iloc[:, i]) output.append(res) @@ -3995,7 +3996,6 @@ def _apply_to_column_groupbys(self, func): def count(self): """ Compute count of group, excluding missing values """ from functools import partial - from pandas.lib import count_level_2d from pandas.types.missing import _isnull_ndarraylike as isnull data, _ = self._get_data_to_aggregate() @@ -4190,7 +4190,7 @@ def __init__(self, data, labels, ngroups, axis=0): @cache_readonly def slabels(self): # Sorted labels - return algos.take_nd(self.labels, self.sort_idx, allow_fill=False) + return algorithms.take_nd(self.labels, self.sort_idx, allow_fill=False) @cache_readonly def sort_idx(self): diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 6cd5eceed5f2a..4b43574f49820 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -53,18 +53,17 @@ import pandas.core.missing as missing from pandas.sparse.array import _maybe_to_sparse, SparseArray -import pandas.lib as lib -import pandas.tslib as tslib +from pandas._libs import lib, tslib +from pandas._libs.tslib import Timedelta +from pandas._libs.lib import BlockPlacement + import pandas.computation.expressions as expressions from pandas.util.decorators import cache_readonly from pandas.util.validators import validate_bool_kwarg -from pandas.tslib import Timedelta from pandas import compat, _np_version_under1p9 from pandas.compat import range, map, zip, u -from pandas.lib import BlockPlacement - class Block(PandasObject): """ diff --git a/pandas/core/missing.py b/pandas/core/missing.py index ffd0423572f5e..3b9bfe1de48e7 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -5,8 +5,8 @@ import numpy as np from distutils.version import LooseVersion -import pandas.algos as algos -import pandas.lib as lib +from pandas._libs import algos, lib + from pandas.compat import range, string_types from pandas.types.common import (is_numeric_v_string_like, is_float_dtype, is_datetime64_dtype, diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 0cc3a2d039b5e..bb6c9b4546d0f 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -9,7 +9,8 @@ except ImportError: # pragma: no cover _USE_BOTTLENECK = False -from pandas import compat, lib, algos, tslib +from pandas import compat +from pandas._libs import tslib, algos, lib from pandas.types.common import (_get_dtype, is_float, is_scalar, is_integer, is_complex, is_float_dtype, diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 6cc43cd9228f6..fe83f8a352851 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -10,15 +10,17 @@ import numpy as np import pandas as pd import datetime -from pandas import compat, lib, tslib -import pandas.index as _index + +from pandas._libs import (lib, index as libindex, + tslib as libts, algos as libalgos, iNaT) + +from pandas import compat from pandas.util.decorators import Appender import pandas.computation.expressions as expressions -from pandas.lib import isscalar -from pandas.tslib import iNaT + from pandas.compat import bind_method import pandas.core.missing as missing -import pandas.algos as _algos + from pandas.core.common import (_values_from_object, _maybe_match_name, PerformanceWarning) from pandas.types.missing import notnull, isnull @@ -29,6 +31,7 @@ is_datetime64_dtype, is_datetime64tz_dtype, is_bool_dtype, is_datetimetz, is_list_like, + is_scalar, _ensure_object) from pandas.types.cast import _maybe_upcast_putmask, _find_common_type from pandas.types.generic import ABCSeries, ABCIndex, ABCPeriodIndex @@ -476,7 +479,7 @@ def _convert_to_array(self, values, name=None, other=None): values = values._values elif not (isinstance(values, (np.ndarray, ABCSeries)) and is_datetime64_dtype(values)): - values = tslib.array_to_datetime(values) + values = libts.array_to_datetime(values) elif inferred_type in ('timedelta', 'timedelta64'): # have a timedelta, convert to to ns here values = to_timedelta(values, errors='coerce', box=False) @@ -680,12 +683,12 @@ def safe_na_op(lvalues, rvalues): if isinstance(rvalues, ABCSeries): if is_object_dtype(rvalues): # if dtype is object, try elementwise op - return _algos.arrmap_object(rvalues, - lambda x: op(lvalues, x)) + return libalgos.arrmap_object(rvalues, + lambda x: op(lvalues, x)) else: if is_object_dtype(lvalues): - return _algos.arrmap_object(lvalues, - lambda x: op(x, rvalues)) + return libalgos.arrmap_object(lvalues, + lambda x: op(x, rvalues)) raise def wrapper(left, right, name=name, na_op=na_op): @@ -754,7 +757,7 @@ def na_op(x, y): # in either operand if is_categorical_dtype(x): return op(x, y) - elif is_categorical_dtype(y) and not isscalar(y): + elif is_categorical_dtype(y) and not is_scalar(y): return op(y, x) if is_object_dtype(x.dtype): @@ -770,7 +773,7 @@ def na_op(x, y): raise TypeError("invalid type comparison") # numpy does not like comparisons vs None - if isscalar(y) and isnull(y): + if is_scalar(y) and isnull(y): if name == '__ne__': return np.ones(len(x), dtype=bool) else: @@ -779,11 +782,11 @@ def na_op(x, y): # we have a datetime/timedelta and may need to convert mask = None if (needs_i8_conversion(x) or - (not isscalar(y) and needs_i8_conversion(y))): + (not is_scalar(y) and needs_i8_conversion(y))): - if isscalar(y): + if is_scalar(y): mask = isnull(x) - y = _index.convert_scalar(x, _values_from_object(y)) + y = libindex.convert_scalar(x, _values_from_object(y)) else: mask = isnull(x) | isnull(y) y = y.view('i8') @@ -819,7 +822,7 @@ def wrapper(self, other, axis=None): elif isinstance(other, (np.ndarray, pd.Index)): # do not check length of zerodim array # as it will broadcast - if (not lib.isscalar(lib.item_from_zerodim(other)) and + if (not is_scalar(lib.item_from_zerodim(other)) and len(self) != len(other)): raise ValueError('Lengths must match to compare') @@ -855,7 +858,7 @@ def wrapper(self, other, axis=None): with np.errstate(all='ignore'): res = na_op(values, other) - if isscalar(res): + if is_scalar(res): raise TypeError('Could not compare %s type with Series' % type(other)) @@ -1333,7 +1336,7 @@ def na_op(x, y): # work only for scalars def f(self, other): - if not isscalar(other): + if not is_scalar(other): raise ValueError('Simple arithmetic with %s can only be ' 'done with scalar values' % self._constructor.__name__) diff --git a/pandas/core/reshape.py b/pandas/core/reshape.py index 7bcd9f2d30b79..3279a8f2be39d 100644 --- a/pandas/core/reshape.py +++ b/pandas/core/reshape.py @@ -19,15 +19,14 @@ from pandas.core.sparse import SparseDataFrame, SparseSeries from pandas.sparse.array import SparseArray -from pandas._sparse import IntIndex +from pandas.sparse.libsparse import IntIndex from pandas.core.categorical import Categorical, _factorize_from_iterable from pandas.core.sorting import (get_group_index, compress_group_index, decons_obs_group_ids) import pandas.core.algorithms as algos -import pandas.algos as _algos -import pandas._reshape as _reshape +from pandas._libs import algos as _algos, reshape as _reshape from pandas.core.index import MultiIndex, _get_na_value diff --git a/pandas/core/series.py b/pandas/core/series.py index 626a4a81193cc..83036ffef0bed 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -60,7 +60,7 @@ from pandas.compat.numpy import function as nv import pandas.core.ops as ops -import pandas.core.algorithms as algos +import pandas.core.algorithms as algorithms import pandas.core.common as com import pandas.core.nanops as nanops @@ -68,10 +68,7 @@ from pandas.util.decorators import Appender, deprecate_kwarg, Substitution from pandas.util.validators import validate_bool_kwarg -import pandas.lib as lib -import pandas.tslib as tslib -import pandas.index as _index - +from pandas._libs import index as libindex, tslib as libts, lib, iNaT from pandas.core.config import get_option __all__ = ['Series'] @@ -294,7 +291,7 @@ def _set_axis(self, axis, labels, fastpath=False): # need to set here becuase we changed the index if fastpath: self._data.set_axis(axis, labels) - except (tslib.OutOfBoundsDatetime, ValueError): + except (libts.OutOfBoundsDatetime, ValueError): # labels may exceeds datetime bounds, # or not be a DatetimeIndex pass @@ -568,7 +565,7 @@ def _ixs(self, i, axis=0): # dispatch to the values if we need values = self._values if isinstance(values, np.ndarray): - return _index.get_value_at(values, i) + return libindex.get_value_at(values, i) else: return values[i] except IndexError: @@ -582,7 +579,7 @@ def _ixs(self, i, axis=0): if isinstance(label, Index): return self.take(i, axis=axis, convert=True) else: - return _index.get_value_at(self, i) + return libindex.get_value_at(self, i) @property def _is_mixed_type(self): @@ -733,7 +730,7 @@ def setitem(key, value): elif is_timedelta64_dtype(self.dtype): # reassign a null value to iNaT if isnull(value): - value = tslib.iNaT + value = iNaT try: self.index._engine.set_value(self._values, key, @@ -1202,7 +1199,7 @@ def mode(self): modes : Series (sorted) """ # TODO: Add option for bins like value_counts() - return algos.mode(self) + return algorithms.mode(self) @Appender(base._shared_docs['unique'] % _shared_doc_kwargs) def unique(self): @@ -1424,7 +1421,7 @@ def diff(self, periods=1): ------- diffed : Series """ - result = algos.diff(_values_from_object(self), periods) + result = algorithms.diff(_values_from_object(self), periods) return self._constructor(result, index=self.index).__finalize__(self) def autocorr(self, lag=1): @@ -1915,7 +1912,8 @@ def nlargest(self, n=5, keep='first'): >>> s = pd.Series(np.random.randn(1e6)) >>> s.nlargest(10) # only sorts up to the N requested """ - return algos.select_n_series(self, n=n, keep=keep, method='nlargest') + return algorithms.select_n_series(self, n=n, keep=keep, + method='nlargest') @deprecate_kwarg('take_last', 'keep', mapping={True: 'last', False: 'first'}) @@ -1953,7 +1951,8 @@ def nsmallest(self, n=5, keep='first'): >>> s = pd.Series(np.random.randn(1e6)) >>> s.nsmallest(10) # only sorts up to the N requested """ - return algos.select_n_series(self, n=n, keep=keep, method='nsmallest') + return algorithms.select_n_series(self, n=n, keep=keep, + method='nsmallest') def sortlevel(self, level=0, ascending=True, sort_remaining=True): """ @@ -2166,7 +2165,7 @@ def map_f(values, f): arg = self._constructor(arg, index=arg.keys()) indexer = arg.index.get_indexer(values) - new_values = algos.take_1d(arg._values, indexer) + new_values = algorithms.take_1d(arg._values, indexer) else: new_values = map_f(values, arg) @@ -2324,7 +2323,7 @@ def _reindex_indexer(self, new_index, indexer, copy): return self # be subclass-friendly - new_values = algos.take_1d(self.get_values(), indexer) + new_values = algorithms.take_1d(self.get_values(), indexer) return self._constructor(new_values, index=new_index) def _needs_reindex_multi(self, axes, method, level): @@ -2484,7 +2483,7 @@ def isin(self, values): dtype: bool """ - result = algos.isin(_values_from_object(self), values) + result = algorithms.isin(_values_from_object(self), values) return self._constructor(result, index=self.index).__finalize__(self) def between(self, left, right, inclusive=True): diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py index 71314da7745c0..205d0d94d2ec3 100644 --- a/pandas/core/sorting.py +++ b/pandas/core/sorting.py @@ -7,10 +7,9 @@ _ensure_int64, is_categorical_dtype) from pandas.types.missing import isnull -import pandas.core.algorithms as algos -import pandas.algos as _algos -import pandas.hashtable as _hash -from pandas import lib +import pandas.core.algorithms as algorithms +from pandas._libs import lib, algos, hashtable +from pandas._libs.hashtable import unique_label_indices _INT64_MAX = np.iinfo(np.int64).max @@ -131,7 +130,6 @@ def decons_obs_group_ids(comp_ids, obs_ids, shape, labels, xnull): xnull: boolean, if nulls are excluded; i.e. -1 labels are passed through """ - from pandas.hashtable import unique_label_indices if not xnull: lift = np.fromiter(((a == -1).any() for a in labels), dtype='i8') @@ -250,7 +248,8 @@ def __init__(self, comp_ids, ngroups, levels, labels): self.comp_ids = comp_ids.astype(np.int64) self.k = len(labels) - self.tables = [_hash.Int64HashTable(ngroups) for _ in range(self.k)] + self.tables = [hashtable.Int64HashTable(ngroups) + for _ in range(self.k)] self._populate_tables() @@ -291,7 +290,7 @@ def get_indexer_dict(label_list, keys): def get_group_index_sorter(group_index, ngroups): """ - _algos.groupsort_indexer implements `counting sort` and it is at least + algos.groupsort_indexer implements `counting sort` and it is at least O(ngroups), where ngroups = prod(shape) shape = map(len, keys) @@ -309,8 +308,8 @@ def get_group_index_sorter(group_index, ngroups): do_groupsort = (count > 0 and ((alpha + beta * ngroups) < (count * np.log(count)))) if do_groupsort: - sorter, _ = _algos.groupsort_indexer(_ensure_int64(group_index), - ngroups) + sorter, _ = algos.groupsort_indexer(_ensure_int64(group_index), + ngroups) return _ensure_platform_int(sorter) else: return group_index.argsort(kind='mergesort') @@ -323,8 +322,8 @@ def compress_group_index(group_index, sort=True): (comp_ids) into the list of unique labels (obs_group_ids). """ - size_hint = min(len(group_index), _hash._SIZE_HINT_LIMIT) - table = _hash.Int64HashTable(size_hint) + size_hint = min(len(group_index), hashtable._SIZE_HINT_LIMIT) + table = hashtable.Int64HashTable(size_hint) group_index = _ensure_int64(group_index) @@ -348,10 +347,10 @@ def _reorder_by_uniques(uniques, labels): mask = labels < 0 # move labels to right locations (ie, unsort ascending labels) - labels = algos.take_nd(reverse_indexer, labels, allow_fill=False) + labels = algorithms.take_nd(reverse_indexer, labels, allow_fill=False) np.putmask(labels, mask, -1) # sort observed ids - uniques = algos.take_nd(uniques, sorter, allow_fill=False) + uniques = algorithms.take_nd(uniques, sorter, allow_fill=False) return uniques, labels diff --git a/pandas/core/strings.py b/pandas/core/strings.py index 46ba48b4cd846..b5b5d58235eaa 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -18,7 +18,7 @@ from pandas.core.base import AccessorProperty, NoNewAttributesMixin from pandas.util.decorators import Appender import re -import pandas.lib as lib +import pandas._libs.lib as lib import warnings import textwrap import codecs diff --git a/pandas/core/window.py b/pandas/core/window.py index 3f9aa2b0ff392..6fda60c449f42 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -24,13 +24,14 @@ needs_i8_conversion, is_timedelta64_dtype, is_list_like, - _ensure_float64) + _ensure_float64, + is_scalar) import pandas as pd -from pandas.lib import isscalar + from pandas.core.base import (PandasObject, SelectionMixin, GroupByMixin) import pandas.core.common as com -import pandas._window as _window +import pandas.core.libwindow as _window from pandas.tseries.offsets import DateOffset from pandas import compat from pandas.compat.numpy import function as nv @@ -154,7 +155,7 @@ def _gotitem(self, key, ndim, subset=None): self = self._shallow_copy(subset) self._reset_cache() if subset.ndim == 2: - if isscalar(key) and key in subset or is_list_like(key): + if is_scalar(key) and key in subset or is_list_like(key): self._selection = key return self diff --git a/pandas/window.pyx b/pandas/core/window.pyx similarity index 99% rename from pandas/window.pyx rename to pandas/core/window.pyx index 005d42c9f68be..a06e616002ee2 100644 --- a/pandas/window.pyx +++ b/pandas/core/window.pyx @@ -58,7 +58,7 @@ from util cimport numeric from skiplist cimport * -cdef extern from "src/headers/math.h": +cdef extern from "../src/headers/math.h": double sqrt(double x) nogil int signbit(double) nogil diff --git a/pandas/formats/format.py b/pandas/formats/format.py index 622c4cd3bbcc7..d354911a825bc 100644 --- a/pandas/formats/format.py +++ b/pandas/formats/format.py @@ -33,8 +33,9 @@ from pandas.io.common import _get_handle, UnicodeWriter, _expand_user from pandas.formats.printing import adjoin, justify, pprint_thing import pandas.core.common as com -import pandas.lib as lib -from pandas.tslib import iNaT, Timestamp, Timedelta, format_array_from_datetime +import pandas._libs.lib as lib +from pandas._libs.tslib import (iNaT, Timestamp, Timedelta, + format_array_from_datetime) from pandas.tseries.index import DatetimeIndex from pandas.tseries.period import PeriodIndex import pandas as pd diff --git a/pandas/indexes/api.py b/pandas/indexes/api.py index 64992e46613e5..a38453e0d2ccc 100644 --- a/pandas/indexes/api.py +++ b/pandas/indexes/api.py @@ -8,7 +8,7 @@ from pandas.indexes.range import RangeIndex # noqa import pandas.core.common as com -import pandas.lib as lib +import pandas._libs.lib as lib # TODO: there are many places that rely on these private methods existing in # pandas.core.index diff --git a/pandas/indexes/base.py b/pandas/indexes/base.py index e441d9a88690d..607a463083fdd 100644 --- a/pandas/indexes/base.py +++ b/pandas/indexes/base.py @@ -3,12 +3,10 @@ import operator import numpy as np -import pandas.tslib as tslib -import pandas.lib as lib -import pandas._join as _join -import pandas.algos as _algos -import pandas.index as _index -from pandas.lib import Timestamp, Timedelta, is_datetime_array +from pandas._libs import (lib, index as libindex, tslib as libts, + algos as libalgos, join as libjoin, + Timestamp, Timedelta, ) +from pandas._libs.lib import is_datetime_array from pandas.compat import range, u from pandas.compat.numpy import function as nv @@ -120,11 +118,11 @@ class Index(IndexOpsMixin, StringAccessorMixin, PandasObject): _join_precedence = 1 # Cython methods - _arrmap = _algos.arrmap_object - _left_indexer_unique = _join.left_join_indexer_unique_object - _left_indexer = _join.left_join_indexer_object - _inner_indexer = _join.inner_join_indexer_object - _outer_indexer = _join.outer_join_indexer_object + _arrmap = libalgos.arrmap_object + _left_indexer_unique = libjoin.left_join_indexer_unique_object + _left_indexer = libjoin.left_join_indexer_object + _inner_indexer = libjoin.inner_join_indexer_object + _outer_indexer = libjoin.outer_join_indexer_object _box_scalars = False _typ = 'index' @@ -144,7 +142,7 @@ class Index(IndexOpsMixin, StringAccessorMixin, PandasObject): # used to infer integers as datetime-likes _infer_as_myclass = False - _engine_type = _index.ObjectEngine + _engine_type = libindex.ObjectEngine def __new__(cls, data=None, dtype=None, copy=False, name=None, fastpath=False, tupleize_cols=True, **kwargs): @@ -285,7 +283,7 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, try: return DatetimeIndex(subarr, copy=copy, name=name, **kwargs) - except tslib.OutOfBoundsDatetime: + except libts.OutOfBoundsDatetime: pass elif inferred.startswith('timedelta'): @@ -2314,7 +2312,7 @@ def get_value(self, series, key): raise try: - return tslib.get_value_box(s, key) + return libts.get_value_box(s, key) except IndexError: raise except TypeError: @@ -2972,7 +2970,6 @@ def _join_level(self, other, level, how='left', return_indexers=False, order of the data indexed by the MultiIndex will not be changed; otherwise, it will tie out with `other`. """ - from pandas.algos import groupsort_indexer from .multi import MultiIndex def _get_leaf_sorter(labels): @@ -2985,7 +2982,7 @@ def _get_leaf_sorter(labels): if len(labels) == 1: lab = _ensure_int64(labels[0]) - sorter, _ = groupsort_indexer(lab, 1 + lab.max()) + sorter, _ = libalgos.groupsort_indexer(lab, 1 + lab.max()) return sorter # find indexers of begining of each set of @@ -3051,8 +3048,9 @@ def _get_leaf_sorter(labels): else: # tie out the order with other if level == 0: # outer most level, take the fast route ngroups = 1 + new_lev_labels.max() - left_indexer, counts = groupsort_indexer(new_lev_labels, - ngroups) + left_indexer, counts = libalgos.groupsort_indexer( + new_lev_labels, ngroups) + # missing values are placed first; drop them! left_indexer = left_indexer[counts[0]:] new_labels = [lab[left_indexer] for lab in new_labels] @@ -3846,8 +3844,8 @@ def _ensure_index(index_like, copy=False): def _get_na_value(dtype): - return {np.datetime64: tslib.NaT, - np.timedelta64: tslib.NaT}.get(dtype, np.nan) + return {np.datetime64: libts.NaT, + np.timedelta64: libts.NaT}.get(dtype, np.nan) def _ensure_has_len(seq): diff --git a/pandas/indexes/category.py b/pandas/indexes/category.py index 5299a094156cd..3d8f76fc56b01 100644 --- a/pandas/indexes/category.py +++ b/pandas/indexes/category.py @@ -1,5 +1,5 @@ import numpy as np -import pandas.index as _index +from pandas._libs import index as libindex from pandas import compat from pandas.compat.numpy import function as nv @@ -45,7 +45,7 @@ class CategoricalIndex(Index, base.PandasDelegate): """ _typ = 'categoricalindex' - _engine_type = _index.Int64Engine + _engine_type = libindex.Int64Engine _attributes = ['name'] def __new__(cls, data=None, categories=None, ordered=None, dtype=None, @@ -303,7 +303,7 @@ def unique(self): False: 'first'}) @Appender(base._shared_docs['duplicated'] % _index_doc_kwargs) def duplicated(self, keep='first'): - from pandas.hashtable import duplicated_int64 + from pandas._libs.hashtable import duplicated_int64 codes = self.codes.astype('i8') return duplicated_int64(codes, keep) diff --git a/pandas/indexes/multi.py b/pandas/indexes/multi.py index 23a42265a149b..bca1db83b6645 100644 --- a/pandas/indexes/multi.py +++ b/pandas/indexes/multi.py @@ -6,9 +6,7 @@ from sys import getsizeof import numpy as np -import pandas.lib as lib -import pandas.index as _index -from pandas.lib import Timestamp +from pandas._libs import index as libindex, lib, Timestamp from pandas.compat import range, zip, lrange, lzip, map from pandas.compat.numpy import function as nv @@ -76,7 +74,7 @@ class MultiIndex(Index): _levels = FrozenList() _labels = FrozenList() _comparables = ['names'] - _engine_type = _index.MultiIndexEngine + _engine_type = libindex.MultiIndexEngine rename = Index.set_names def __new__(cls, levels=None, labels=None, sortorder=None, names=None, @@ -762,7 +760,7 @@ def f(k, stringify): @Appender(base._shared_docs['duplicated'] % _index_doc_kwargs) def duplicated(self, keep='first'): from pandas.core.sorting import get_group_index - from pandas.hashtable import duplicated_int64 + from pandas._libs.hashtable import duplicated_int64 shape = map(len, self.levels) ids = get_group_index(self.labels, shape, sort=False, xnull=False) @@ -813,7 +811,7 @@ def _try_mi(k): pass try: - return _index.get_value_at(s, k) + return libindex.get_value_at(s, k) except IndexError: raise except TypeError: diff --git a/pandas/indexes/numeric.py b/pandas/indexes/numeric.py index 00ddf5b0c918d..9bb70feb2501f 100644 --- a/pandas/indexes/numeric.py +++ b/pandas/indexes/numeric.py @@ -1,9 +1,6 @@ import numpy as np -import pandas.lib as lib -import pandas._join as _join -import pandas.algos as _algos -import pandas.index as _index - +from pandas._libs import (lib, index as libindex, + algos as libalgos, join as libjoin) from pandas.types.common import (is_dtype_equal, pandas_dtype, is_float_dtype, is_object_dtype, is_integer_dtype, is_scalar) @@ -114,16 +111,13 @@ class Int64Index(NumericIndex): __doc__ = _num_index_shared_docs['class_descr'] % _int64_descr_args _typ = 'int64index' - _arrmap = _algos.arrmap_int64 - _left_indexer_unique = _join.left_join_indexer_unique_int64 - _left_indexer = _join.left_join_indexer_int64 - _inner_indexer = _join.inner_join_indexer_int64 - _outer_indexer = _join.outer_join_indexer_int64 - + _arrmap = libalgos.arrmap_int64 + _left_indexer_unique = libjoin.left_join_indexer_unique_int64 + _left_indexer = libjoin.left_join_indexer_int64 + _inner_indexer = libjoin.inner_join_indexer_int64 + _outer_indexer = libjoin.outer_join_indexer_int64 _can_hold_na = False - - _engine_type = _index.Int64Engine - + _engine_type = libindex.Int64Engine _default_dtype = np.int64 @property @@ -175,17 +169,14 @@ class UInt64Index(NumericIndex): __doc__ = _num_index_shared_docs['class_descr'] % _uint64_descr_args _typ = 'uint64index' - _arrmap = _algos.arrmap_uint64 - _left_indexer_unique = _join.left_join_indexer_unique_uint64 - _left_indexer = _join.left_join_indexer_uint64 - _inner_indexer = _join.inner_join_indexer_uint64 - _outer_indexer = _join.outer_join_indexer_uint64 - + _arrmap = libalgos.arrmap_uint64 + _left_indexer_unique = libjoin.left_join_indexer_unique_uint64 + _left_indexer = libjoin.left_join_indexer_uint64 + _inner_indexer = libjoin.inner_join_indexer_uint64 + _outer_indexer = libjoin.outer_join_indexer_uint64 _can_hold_na = False _na_value = 0 - - _engine_type = _index.UInt64Engine - + _engine_type = libindex.UInt64Engine _default_dtype = np.uint64 @property @@ -255,12 +246,12 @@ class Float64Index(NumericIndex): __doc__ = _num_index_shared_docs['class_descr'] % _float64_descr_args _typ = 'float64index' - _engine_type = _index.Float64Engine - _arrmap = _algos.arrmap_float64 - _left_indexer_unique = _join.left_join_indexer_unique_float64 - _left_indexer = _join.left_join_indexer_float64 - _inner_indexer = _join.inner_join_indexer_float64 - _outer_indexer = _join.outer_join_indexer_float64 + _engine_type = libindex.Float64Engine + _arrmap = libalgos.arrmap_float64 + _left_indexer_unique = libjoin.left_join_indexer_unique_float64 + _left_indexer = libjoin.left_join_indexer_float64 + _inner_indexer = libjoin.inner_join_indexer_float64 + _outer_indexer = libjoin.outer_join_indexer_float64 _default_dtype = np.float64 diff --git a/pandas/indexes/range.py b/pandas/indexes/range.py index cc78361f843bf..103a3ac2fd5f4 100644 --- a/pandas/indexes/range.py +++ b/pandas/indexes/range.py @@ -2,7 +2,7 @@ import operator import numpy as np -import pandas.index as _index +from pandas._libs import index as libindex from pandas.types.common import (is_integer, is_scalar, @@ -39,7 +39,7 @@ class RangeIndex(Int64Index): """ _typ = 'rangeindex' - _engine_type = _index.Int64Engine + _engine_type = libindex.Int64Engine def __new__(cls, start=None, stop=None, step=None, name=None, dtype=None, fastpath=False, copy=False, **kwargs): diff --git a/pandas/io/api.py b/pandas/io/api.py index 1284b3cb222d6..e312e7bc2f300 100644 --- a/pandas/io/api.py +++ b/pandas/io/api.py @@ -11,7 +11,7 @@ from pandas.io.json import read_json from pandas.io.html import read_html from pandas.io.sql import read_sql, read_sql_table, read_sql_query -from pandas.io.sas.sasreader import read_sas +from pandas.io.sas import read_sas from pandas.io.feather_format import read_feather from pandas.io.stata import read_stata from pandas.io.pickle import read_pickle, to_pickle diff --git a/pandas/io/date_converters.py b/pandas/io/date_converters.py index 3ffcef4b21552..080d6c3e273a3 100644 --- a/pandas/io/date_converters.py +++ b/pandas/io/date_converters.py @@ -1,7 +1,7 @@ """This module is designed for community supported date conversion functions""" from pandas.compat import range, map import numpy as np -import pandas.lib as lib +import pandas._libs.lib as lib def parse_date_time(date_col, time_col): diff --git a/pandas/io/excel.py b/pandas/io/excel.py index 00ec8bcf060ef..82ea2e8a46592 100644 --- a/pandas/io/excel.py +++ b/pandas/io/excel.py @@ -19,7 +19,7 @@ EmptyDataError, get_filepath_or_buffer, _NA_VALUES) from pandas.tseries.period import Period -from pandas import json +from pandas.io.json import libjson from pandas.compat import (map, zip, reduce, range, lrange, u, add_metaclass, string_types, OrderedDict) from pandas.core import config @@ -1450,7 +1450,7 @@ def write_cells(self, cells, sheet_name=None, startrow=0, startcol=0, elif isinstance(cell.val, date): num_format_str = self.date_format - stylekey = json.dumps(cell.style) + stylekey = libjson.dumps(cell.style) if num_format_str: stylekey += num_format_str @@ -1578,7 +1578,7 @@ def write_cells(self, cells, sheet_name=None, startrow=0, startcol=0, elif isinstance(cell.val, date): num_format_str = self.date_format - stylekey = json.dumps(cell.style) + stylekey = libjson.dumps(cell.style) if num_format_str: stylekey += num_format_str diff --git a/pandas/io/json/json.py b/pandas/io/json/json.py index a00d3492e8a37..114ec4bb2723e 100644 --- a/pandas/io/json/json.py +++ b/pandas/io/json/json.py @@ -2,8 +2,8 @@ import os import numpy as np -import pandas.json as _json -from pandas.tslib import iNaT +from pandas.io.json import libjson +from pandas._libs.tslib import iNaT from pandas.compat import StringIO, long, u from pandas import compat, isnull from pandas import Series, DataFrame, to_datetime @@ -14,8 +14,8 @@ from .table_schema import build_table_schema from pandas.types.common import is_period_dtype -loads = _json.loads -dumps = _json.dumps +loads = libjson.loads +dumps = libjson.dumps TABLE_SCHEMA_VERSION = '0.20.0' diff --git a/pandas/io/json/normalize.py b/pandas/io/json/normalize.py index 0e7d025e81851..4da4a6ad57850 100644 --- a/pandas/io/json/normalize.py +++ b/pandas/io/json/normalize.py @@ -5,7 +5,7 @@ from collections import defaultdict import numpy as np -from pandas.lib import convert_json_to_lines +from pandas._libs.lib import convert_json_to_lines from pandas import compat, DataFrame diff --git a/pandas/msgpack/__init__.py b/pandas/io/msgpack/__init__.py similarity index 81% rename from pandas/msgpack/__init__.py rename to pandas/io/msgpack/__init__.py index 4d6e241171281..984e90ee03e69 100644 --- a/pandas/msgpack/__init__.py +++ b/pandas/io/msgpack/__init__.py @@ -2,8 +2,8 @@ from collections import namedtuple -from pandas.msgpack.exceptions import * # noqa -from pandas.msgpack._version import version # noqa +from pandas.io.msgpack.exceptions import * # noqa +from pandas.io.msgpack._version import version # noqa class ExtType(namedtuple('ExtType', 'code data')): @@ -19,8 +19,8 @@ def __new__(cls, code, data): import os # noqa -from pandas.msgpack._packer import Packer # noqa -from pandas.msgpack._unpacker import unpack, unpackb, Unpacker # noqa +from pandas.io.msgpack._packer import Packer # noqa +from pandas.io.msgpack._unpacker import unpack, unpackb, Unpacker # noqa def pack(o, stream, **kwargs): diff --git a/pandas/msgpack/_packer.pyx b/pandas/io/msgpack/_packer.pyx similarity index 98% rename from pandas/msgpack/_packer.pyx rename to pandas/io/msgpack/_packer.pyx index 008dbe5541d50..ad7ce1fb2531a 100644 --- a/pandas/msgpack/_packer.pyx +++ b/pandas/io/msgpack/_packer.pyx @@ -6,11 +6,11 @@ from libc.stdlib cimport * from libc.string cimport * from libc.limits cimport * -from pandas.msgpack.exceptions import PackValueError -from pandas.msgpack import ExtType +from pandas.io.msgpack.exceptions import PackValueError +from pandas.io.msgpack import ExtType -cdef extern from "../src/msgpack/pack.h": +cdef extern from "../../src/msgpack/pack.h": struct msgpack_packer: char* buf size_t length diff --git a/pandas/msgpack/_unpacker.pyx b/pandas/io/msgpack/_unpacker.pyx similarity index 98% rename from pandas/msgpack/_unpacker.pyx rename to pandas/io/msgpack/_unpacker.pyx index 6f23a24adde6c..504bfed48df3c 100644 --- a/pandas/msgpack/_unpacker.pyx +++ b/pandas/io/msgpack/_unpacker.pyx @@ -11,12 +11,12 @@ from libc.stdlib cimport * from libc.string cimport * from libc.limits cimport * -from pandas.msgpack.exceptions import (BufferFull, OutOfData, - UnpackValueError, ExtraData) -from pandas.msgpack import ExtType +from pandas.io.msgpack.exceptions import (BufferFull, OutOfData, + UnpackValueError, ExtraData) +from pandas.io.msgpack import ExtType -cdef extern from "../src/msgpack/unpack.h": +cdef extern from "../../src/msgpack/unpack.h": ctypedef struct msgpack_user: bint use_list PyObject* object_hook diff --git a/pandas/msgpack/_version.py b/pandas/io/msgpack/_version.py similarity index 100% rename from pandas/msgpack/_version.py rename to pandas/io/msgpack/_version.py diff --git a/pandas/msgpack/exceptions.py b/pandas/io/msgpack/exceptions.py similarity index 100% rename from pandas/msgpack/exceptions.py rename to pandas/io/msgpack/exceptions.py diff --git a/pandas/io/packers.py b/pandas/io/packers.py index 39bc1a4ecf225..404be758a7fbe 100644 --- a/pandas/io/packers.py +++ b/pandas/io/packers.py @@ -55,7 +55,7 @@ Index, MultiIndex, Float64Index, Int64Index, Panel, RangeIndex, PeriodIndex, DatetimeIndex, NaT, Categorical, CategoricalIndex) -from pandas.tslib import NaTType +from pandas._libs.tslib import NaTType from pandas.sparse.api import SparseSeries, SparseDataFrame from pandas.sparse.array import BlockIndex, IntIndex from pandas.core.generic import NDFrame @@ -64,7 +64,7 @@ from pandas.core.internals import BlockManager, make_block, _safe_reshape import pandas.core.internals as internals -from pandas.msgpack import Unpacker as _Unpacker, Packer as _Packer, ExtType +from pandas.io.msgpack import Unpacker as _Unpacker, Packer as _Packer, ExtType from pandas.util._move import ( BadMove as _BadMove, move_into_mutable_buffer as _move_into_mutable_buffer, diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 811844ec35deb..9aedddc811830 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -36,8 +36,8 @@ from pandas.util.decorators import Appender -import pandas.lib as lib -import pandas.parser as _parser +import pandas._libs.lib as lib +import pandas.io.libparsers as libparsers # BOM character (byte order mark) @@ -1415,7 +1415,7 @@ def _convert_to_ndarrays(self, dct, na_values, na_fvalues, verbose=False, if issubclass(cvals.dtype.type, np.integer) and self.compact_ints: cvals = lib.downcast_int64( - cvals, _parser.na_values, + cvals, libparsers.na_values, self.use_unsigned) result[c] = cvals @@ -1533,7 +1533,7 @@ def __init__(self, src, **kwds): # #2442 kwds['allow_leading_cols'] = self.index_col is not False - self._reader = _parser.TextReader(src, **kwds) + self._reader = libparsers.TextReader(src, **kwds) # XXX self.usecols, self.usecols_dtype = _validate_usecols_arg( diff --git a/pandas/parser.pyx b/pandas/io/parsers.pyx similarity index 99% rename from pandas/parser.pyx rename to pandas/io/parsers.pyx index 23aee860b3108..a5858accbb6f5 100644 --- a/pandas/parser.pyx +++ b/pandas/io/parsers.pyx @@ -13,11 +13,12 @@ from cpython cimport (PyObject, PyBytes_FromString, PyUnicode_Check, PyUnicode_AsUTF8String, PyErr_Occurred, PyErr_Fetch) from cpython.ref cimport PyObject, Py_XDECREF -from io.common import ParserError, DtypeWarning, EmptyDataError, ParserWarning +from pandas.io.common import (ParserError, DtypeWarning, + EmptyDataError, ParserWarning) # Import CParserError as alias of ParserError for backwards compatibility. # Ultimately, we want to remove this import. See gh-12665 and gh-14479. -from io.common import CParserError +from pandas.io.common import CParserError cdef extern from "Python.h": object PyUnicode_FromString(char *v) @@ -36,7 +37,7 @@ from numpy cimport ndarray, uint8_t, uint64_t import numpy as np cimport util -import pandas.lib as lib +import pandas._libs.lib as lib import pandas.compat as compat from pandas.types.common import (is_categorical_dtype, CategoricalDtype, is_integer_dtype, is_float_dtype, diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 9ad53db305b59..72efc47a3c744 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -44,9 +44,7 @@ from pandas.core.config import get_option from pandas.computation.pytables import Expr, maybe_expression -import pandas.lib as lib -import pandas.algos as algos -import pandas.tslib as tslib +from pandas._libs import tslib, algos, lib from distutils.version import LooseVersion diff --git a/pandas/io/sas/__init__.py b/pandas/io/sas/__init__.py index e69de29bb2d1d..fa6b29a1a3fcc 100644 --- a/pandas/io/sas/__init__.py +++ b/pandas/io/sas/__init__.py @@ -0,0 +1 @@ +from .sasreader import read_sas # noqa diff --git a/pandas/io/sas/saslib.pyx b/pandas/io/sas/sas.pyx similarity index 100% rename from pandas/io/sas/saslib.pyx rename to pandas/io/sas/sas.pyx diff --git a/pandas/io/sas/sas7bdat.py b/pandas/io/sas/sas7bdat.py index 91f417abc0502..d33cee2c5a1bc 100644 --- a/pandas/io/sas/sas7bdat.py +++ b/pandas/io/sas/sas7bdat.py @@ -20,7 +20,7 @@ import numpy as np import struct import pandas.io.sas.sas_constants as const -from pandas.io.sas.saslib import Parser +from pandas.io.sas.libsas import Parser class _subheader_pointer(object): diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 2ab642b3af0c7..b210baedaaf6d 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -11,7 +11,7 @@ import re import numpy as np -import pandas.lib as lib +import pandas._libs.lib as lib from pandas.types.missing import isnull from pandas.types.dtypes import DatetimeTZDtype from pandas.types.common import (is_list_like, is_dict_like, diff --git a/pandas/io/stata.py b/pandas/io/stata.py index 1698ade4c0102..af4bc6a6b7ddb 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -30,8 +30,8 @@ import pandas as pd from pandas.io.common import get_filepath_or_buffer, BaseIterator -from pandas.lib import max_len_string_array, infer_dtype -from pandas.tslib import NaT, Timestamp +from pandas._libs.lib import max_len_string_array, infer_dtype +from pandas._libs.tslib import NaT, Timestamp _version_error = ("Version of given Stata file is not 104, 105, 108, " "111 (Stata 7SE), 113 (Stata 8/9), 114 (Stata 10/11), " diff --git a/pandas/json.py b/pandas/json.py new file mode 100644 index 0000000000000..5b1e395fa4b74 --- /dev/null +++ b/pandas/json.py @@ -0,0 +1,7 @@ +# flake8: noqa + +import warnings +warnings.warn("The pandas.json module is deprecated and will be " + "removed in a future version. Please import from " + "the pandas.io.json instead", FutureWarning, stacklevel=2) +from pandas.io.json.libjson import dumps, loads diff --git a/pandas/lib.py b/pandas/lib.py new file mode 100644 index 0000000000000..6c26627a97de3 --- /dev/null +++ b/pandas/lib.py @@ -0,0 +1,7 @@ +# flake8: noqa + +import warnings +warnings.warn("The pandas.lib module is deprecated and will be " + "removed in a future version. Please import from " + "the pandas._libs.lib instead", FutureWarning, stacklevel=2) +from pandas._libs.lib import * diff --git a/pandas/parser.py b/pandas/parser.py new file mode 100644 index 0000000000000..af203c3df8cc9 --- /dev/null +++ b/pandas/parser.py @@ -0,0 +1,8 @@ +# flake8: noqa + +import warnings +warnings.warn("The pandas.parser module is deprecated and will be " + "removed in a future version. Please import from " + "the pandas.io.parser instead", FutureWarning, stacklevel=2) +from pandas.io.libparsers import na_values +from pandas.io.common import CParserError diff --git a/pandas/sparse/array.py b/pandas/sparse/array.py index c65e0dd5c9f7b..762b6d869eae0 100644 --- a/pandas/sparse/array.py +++ b/pandas/sparse/array.py @@ -25,9 +25,9 @@ _astype_nansafe, _find_common_type) from pandas.types.missing import isnull, notnull, na_value_for_dtype -from pandas._sparse import SparseIndex, BlockIndex, IntIndex -import pandas._sparse as splib -import pandas.index as _index +from pandas.sparse import libsparse as splib +from pandas.sparse.libsparse import SparseIndex, BlockIndex, IntIndex +from pandas._libs import index as libindex import pandas.core.algorithms as algos import pandas.core.ops as ops import pandas.formats.printing as printing @@ -447,7 +447,7 @@ def _get_val_at(self, loc): if sp_loc == -1: return self.fill_value else: - return _index.get_value_at(self, sp_loc) + return libindex.get_value_at(self, sp_loc) @Appender(_index_shared_docs['take'] % _sparray_doc_kwargs) def take(self, indices, axis=0, allow_fill=True, diff --git a/pandas/sparse/list.py b/pandas/sparse/list.py index d294e65bbf10c..54ebf5e51045d 100644 --- a/pandas/sparse/list.py +++ b/pandas/sparse/list.py @@ -6,7 +6,7 @@ from pandas.types.common import is_scalar from pandas.sparse.array import SparseArray from pandas.util.validators import validate_bool_kwarg -import pandas._sparse as splib +import pandas.sparse.libsparse as splib class SparseList(PandasObject): diff --git a/pandas/sparse/series.py b/pandas/sparse/series.py index c3dd089e8409a..7ec42f02c3998 100644 --- a/pandas/sparse/series.py +++ b/pandas/sparse/series.py @@ -20,13 +20,13 @@ from pandas.core import generic import pandas.core.common as com import pandas.core.ops as ops -import pandas.index as _index +import pandas._libs.index as _index from pandas.util.decorators import Appender from pandas.sparse.array import (make_sparse, _sparse_array_op, SparseArray, _make_index) -from pandas._sparse import BlockIndex, IntIndex -import pandas._sparse as splib +from pandas.sparse.libsparse import BlockIndex, IntIndex +import pandas.sparse.libsparse as splib from pandas.sparse.scipy_sparse import (_sparse_series_to_coo, _coo_to_sparse_series) diff --git a/pandas/src/sparse.pyx b/pandas/sparse/sparse.pyx similarity index 100% rename from pandas/src/sparse.pyx rename to pandas/sparse/sparse.pyx diff --git a/pandas/src/sparse_op_helper.pxi.in b/pandas/sparse/sparse_op_helper.pxi.in similarity index 100% rename from pandas/src/sparse_op_helper.pxi.in rename to pandas/sparse/sparse_op_helper.pxi.in diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index 2f8ebc4cc1df4..db92210478182 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- +from warnings import catch_warnings import numpy as np import pandas as pd @@ -33,16 +34,12 @@ class TestPDApi(Base, tm.TestCase): # top-level sub-packages lib = ['api', 'compat', 'computation', 'core', 'indexes', 'formats', 'pandas', - 'test', 'tools', 'tseries', + 'test', 'tools', 'tseries', 'sparse', 'types', 'util', 'options', 'io'] - # top-level packages that are c-imports, should rename to _* - # to avoid naming conflicts - lib_to_rename = ['algos', 'hashtable', 'tslib', 'msgpack', 'sparse', - 'json', 'lib', 'index', 'parser'] - # these are already deprecated; awaiting removal - deprecated_modules = ['stats', 'datetools'] + deprecated_modules = ['stats', 'datetools', 'parser', + 'json', 'lib', 'tslib'] # misc misc = ['IndexSlice', 'NaT'] @@ -113,7 +110,7 @@ class TestPDApi(Base, tm.TestCase): def test_api(self): self.check(pd, - self.lib + self.lib_to_rename + self.misc + + self.lib + self.misc + self.modules + self.deprecated_modules + self.classes + self.deprecated_classes + self.deprecated_classes_in_future + @@ -206,7 +203,7 @@ def test_removed_from_core_common(self): self.assertRaises(AttributeError, lambda: getattr(com, t)) -class TestDatetools(tm.TestCase): +class TestDatetoolsDeprecation(tm.TestCase): def test_deprecation_access_func(self): with tm.assert_produces_warning(FutureWarning, @@ -247,3 +244,36 @@ def test_groupby(self): with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): pd.groupby(pd.Series([1, 2, 3]), [1, 1, 1]) + + +class TestJson(tm.TestCase): + + def test_deprecation_access_func(self): + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + pd.json.dumps([]) + + +class TestParser(tm.TestCase): + + def test_deprecation_access_func(self): + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + pd.parser.na_values + + +class TestLib(tm.TestCase): + + def test_deprecation_access_func(self): + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + pd.lib.infer_dtype + + +class TestTSLib(tm.TestCase): + + def test_deprecation_access_func(self): + # some libraries may be imported before we + # test and could show the warning + with catch_warnings(record=True): + pd.tslib.Timestamp diff --git a/pandas/tests/computation/test_eval.py b/pandas/tests/computation/test_eval.py index b42f79fe5009b..ed6006440441e 100644 --- a/pandas/tests/computation/test_eval.py +++ b/pandas/tests/computation/test_eval.py @@ -28,7 +28,7 @@ import pandas.computation.expr as expr import pandas.util.testing as tm -import pandas.lib as lib +import pandas._libs.lib as lib from pandas.util.testing import (assert_frame_equal, randbool, assertRaisesRegexp, assert_numpy_array_equal, assert_produces_warning, assert_series_equal, diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 76eb61bd81110..ba7e45d7e66fb 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -23,7 +23,7 @@ from pandas.core.common import PandasError import pandas as pd import pandas.core.common as com -import pandas.lib as lib +import pandas._libs.lib as lib import pandas.util.testing as tm from pandas.tests.frame.common import TestData diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py index 36c39ffba70b3..f0dfc4553886b 100644 --- a/pandas/tests/frame/test_indexing.py +++ b/pandas/tests/frame/test_indexing.py @@ -18,6 +18,7 @@ date_range) import pandas as pd +from pandas._libs.tslib import iNaT from pandas.tseries.offsets import BDay from pandas.types.common import (is_float_dtype, is_integer, @@ -1491,8 +1492,7 @@ def test_setitem_single_column_mixed_datetime(self): assert_series_equal(result, expected) # set an allowable datetime64 type - from pandas import tslib - df.loc['b', 'timestamp'] = tslib.iNaT + df.loc['b', 'timestamp'] = iNaT self.assertTrue(isnull(df.loc['b', 'timestamp'])) # allow this syntax diff --git a/pandas/tests/frame/test_to_csv.py b/pandas/tests/frame/test_to_csv.py index 471fc536a90f6..e49dfffc48803 100644 --- a/pandas/tests/frame/test_to_csv.py +++ b/pandas/tests/frame/test_to_csv.py @@ -8,7 +8,7 @@ import numpy as np from pandas.compat import (lmap, range, lrange, StringIO, u) -from pandas.parser import ParserError +from pandas.io.common import ParserError from pandas import (DataFrame, Index, Series, MultiIndex, Timestamp, date_range, read_csv, compat, to_datetime) import pandas as pd diff --git a/pandas/tests/groupby/test_bin_groupby.py b/pandas/tests/groupby/test_bin_groupby.py index 51a10f4141ab5..77c5bde332cff 100644 --- a/pandas/tests/groupby/test_bin_groupby.py +++ b/pandas/tests/groupby/test_bin_groupby.py @@ -7,8 +7,8 @@ from pandas import Index, isnull from pandas.util.testing import assert_almost_equal import pandas.util.testing as tm -import pandas.lib as lib -import pandas.algos as algos +import pandas._libs.lib as lib +import pandas._libs.algos as algos def test_series_grouper(): diff --git a/pandas/tests/groupby/test_transform.py b/pandas/tests/groupby/test_transform.py index 2d21eab5822fe..4acf9dd4755f4 100644 --- a/pandas/tests/groupby/test_transform.py +++ b/pandas/tests/groupby/test_transform.py @@ -6,6 +6,7 @@ from pandas import Series, DataFrame, Timestamp, MultiIndex, concat, date_range from pandas.types.common import _ensure_platform_int, is_timedelta64_dtype from pandas.compat import StringIO +from pandas._libs import algos from .common import MixIn, assert_fp_equal from pandas.util.testing import assert_frame_equal, assert_series_equal @@ -417,8 +418,8 @@ def test_cython_group_transform_algos(self): dtypes = [np.int8, np.int16, np.int32, np.int64, np.uint8, np.uint32, np.uint64, np.float32, np.float64] - ops = [(pd.algos.group_cumprod_float64, np.cumproduct, [np.float64]), - (pd.algos.group_cumsum, np.cumsum, dtypes)] + ops = [(algos.group_cumprod_float64, np.cumproduct, [np.float64]), + (algos.group_cumsum, np.cumsum, dtypes)] is_datetimelike = False for pd_op, np_op, dtypes in ops: @@ -436,13 +437,13 @@ def test_cython_group_transform_algos(self): data = np.array([[1], [2], [3], [np.nan], [4]], dtype='float64') actual = np.zeros_like(data) actual.fill(np.nan) - pd.algos.group_cumprod_float64(actual, data, labels, is_datetimelike) + algos.group_cumprod_float64(actual, data, labels, is_datetimelike) expected = np.array([1, 2, 6, np.nan, 24], dtype='float64') self.assert_numpy_array_equal(actual[:, 0], expected) actual = np.zeros_like(data) actual.fill(np.nan) - pd.algos.group_cumsum(actual, data, labels, is_datetimelike) + algos.group_cumsum(actual, data, labels, is_datetimelike) expected = np.array([1, 3, 6, np.nan, 10], dtype='float64') self.assert_numpy_array_equal(actual[:, 0], expected) @@ -450,8 +451,8 @@ def test_cython_group_transform_algos(self): is_datetimelike = True data = np.array([np.timedelta64(1, 'ns')] * 5, dtype='m8[ns]')[:, None] actual = np.zeros_like(data, dtype='int64') - pd.algos.group_cumsum(actual, data.view('int64'), labels, - is_datetimelike) + algos.group_cumsum(actual, data.view('int64'), labels, + is_datetimelike) expected = np.array([np.timedelta64(1, 'ns'), np.timedelta64( 2, 'ns'), np.timedelta64(3, 'ns'), np.timedelta64(4, 'ns'), np.timedelta64(5, 'ns')]) diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index 7b39a33266ffa..3581f894e53a3 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -10,6 +10,7 @@ TimedeltaIndex, PeriodIndex, notnull, isnull) from pandas.types.common import needs_i8_conversion from pandas.util.testing import assertRaisesRegexp +from pandas._libs.tslib import iNaT import pandas.util.testing as tm @@ -322,7 +323,7 @@ def test_get_unique_index(self): if needs_i8_conversion(ind): vals = ind.asi8[[0] * 5] - vals[0] = pd.tslib.iNaT + vals[0] = iNaT else: vals = ind.values[[0] * 5] vals[0] = np.nan @@ -407,7 +408,7 @@ def test_numpy_argsort(self): # pandas compatibility input validation - the # rest already perform separate (or no) such # validation via their 'values' attribute as - # defined in pandas/indexes/base.py - they + # defined in pandas.indexes/base.py - they # cannot be changed at the moment due to # backwards compatibility concerns if isinstance(type(ind), (CategoricalIndex, RangeIndex)): @@ -836,7 +837,7 @@ def test_hasnans_isnans(self): if len(index) == 0: continue elif isinstance(index, pd.tseries.base.DatetimeIndexOpsMixin): - values[1] = pd.tslib.iNaT + values[1] = iNaT elif isinstance(index, (Int64Index, UInt64Index)): continue else: @@ -876,7 +877,7 @@ def test_fillna(self): values = idx.values if isinstance(index, pd.tseries.base.DatetimeIndexOpsMixin): - values[1] = pd.tslib.iNaT + values[1] = iNaT elif isinstance(index, (Int64Index, UInt64Index)): continue else: diff --git a/pandas/tests/indexes/datetimes/test_construction.py b/pandas/tests/indexes/datetimes/test_construction.py index 772d76305cff2..16881de6e8c39 100644 --- a/pandas/tests/indexes/datetimes/test_construction.py +++ b/pandas/tests/indexes/datetimes/test_construction.py @@ -2,9 +2,10 @@ from datetime import timedelta import pandas as pd -from pandas import tslib, offsets, lib +from pandas import offsets import pandas.util.testing as tm -from pandas.tslib import OutOfBoundsDatetime +from pandas._libs import tslib, lib +from pandas._libs.tslib import OutOfBoundsDatetime from pandas import (DatetimeIndex, Index, Timestamp, datetime, date_range, to_datetime) diff --git a/pandas/tests/indexes/datetimes/test_date_range.py b/pandas/tests/indexes/datetimes/test_date_range.py index 80664ce246bf8..67e82e5c71d75 100644 --- a/pandas/tests/indexes/datetimes/test_date_range.py +++ b/pandas/tests/indexes/datetimes/test_date_range.py @@ -350,7 +350,7 @@ def test_range_tz_dateutil(self): # GH 2906 tm._skip_if_no_dateutil() # Use maybe_get_tz to fix filename in tz under dateutil. - from pandas.tslib import maybe_get_tz + from pandas._libs.tslib import maybe_get_tz tz = lambda x: maybe_get_tz('dateutil/' + x) start = datetime(2011, 1, 1, tzinfo=tz('US/Eastern')) diff --git a/pandas/tests/indexes/datetimes/test_datetime.py b/pandas/tests/indexes/datetimes/test_datetime.py index 2c87c48bcda11..78c37f773547a 100644 --- a/pandas/tests/indexes/datetimes/test_datetime.py +++ b/pandas/tests/indexes/datetimes/test_datetime.py @@ -117,7 +117,7 @@ def test_reindex_preserves_tz_if_target_is_empty_list_or_array(self): def test_time_loc(self): # GH8667 from datetime import time - from pandas.index import _SIZE_CUTOFF + from pandas._libs.index import _SIZE_CUTOFF ns = _SIZE_CUTOFF + np.array([-100, 100], dtype=np.int64) key = time(15, 11, 30) diff --git a/pandas/tests/indexes/datetimes/test_ops.py b/pandas/tests/indexes/datetimes/test_ops.py index 312017eef3446..4abc282252559 100644 --- a/pandas/tests/indexes/datetimes/test_ops.py +++ b/pandas/tests/indexes/datetimes/test_ops.py @@ -5,7 +5,7 @@ from itertools import product import pandas as pd -import pandas.tslib as tslib +import pandas._libs.tslib as tslib import pandas.util.testing as tm from pandas.core.common import PerformanceWarning from pandas.tseries.index import cdate_range diff --git a/pandas/tests/indexes/datetimes/test_setops.py b/pandas/tests/indexes/datetimes/test_setops.py index 8d05a4016ba45..a1ad147f84aff 100644 --- a/pandas/tests/indexes/datetimes/test_setops.py +++ b/pandas/tests/indexes/datetimes/test_setops.py @@ -326,7 +326,7 @@ def test_month_range_union_tz_pytz(self): def test_month_range_union_tz_dateutil(self): tm._skip_if_windows_python_3() tm._skip_if_no_dateutil() - from pandas.tslib import _dateutil_gettz as timezone + from pandas._libs.tslib import _dateutil_gettz as timezone tz = timezone('US/Eastern') early_start = datetime(2011, 1, 1) diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index 1b67ffce63b10..512a3e1c38629 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -9,7 +9,7 @@ from distutils.version import LooseVersion import pandas as pd -from pandas import tslib +from pandas._libs import tslib, lib from pandas.tseries import tools from pandas.tseries.tools import normalize_date from pandas.compat import lmap @@ -19,7 +19,7 @@ from pandas.util.testing import assert_series_equal, _skip_if_has_locale from pandas import (isnull, to_datetime, Timestamp, Series, DataFrame, Index, DatetimeIndex, NaT, date_range, bdate_range, - compat, lib) + compat) class TimeConversionFormats(tm.TestCase): diff --git a/pandas/tests/indexes/period/test_indexing.py b/pandas/tests/indexes/period/test_indexing.py index 8d9e26406defc..ff83b50a2a7b2 100644 --- a/pandas/tests/indexes/period/test_indexing.py +++ b/pandas/tests/indexes/period/test_indexing.py @@ -4,8 +4,9 @@ import pandas as pd from pandas.util import testing as tm from pandas.compat import lrange +from pandas._libs import tslib from pandas import (PeriodIndex, Series, DatetimeIndex, - period_range, Period, tslib, _np_version_under1p9) + period_range, Period, _np_version_under1p9) class TestGetItem(tm.TestCase): diff --git a/pandas/tests/indexes/period/test_ops.py b/pandas/tests/indexes/period/test_ops.py index 82a881d7c65bc..4533428cf1514 100644 --- a/pandas/tests/indexes/period/test_ops.py +++ b/pandas/tests/indexes/period/test_ops.py @@ -2,7 +2,7 @@ from datetime import timedelta import pandas as pd -import pandas.tslib as tslib +import pandas._libs.tslib as tslib import pandas.util.testing as tm import pandas.tseries.period as period from pandas import (DatetimeIndex, PeriodIndex, period_range, Series, Period, diff --git a/pandas/tests/indexes/period/test_tools.py b/pandas/tests/indexes/period/test_tools.py index e09d405afd375..f9a1df3d824f1 100644 --- a/pandas/tests/indexes/period/test_tools.py +++ b/pandas/tests/indexes/period/test_tools.py @@ -6,7 +6,7 @@ import pandas.tseries.period as period from pandas.compat import lrange from pandas.tseries.frequencies import get_freq, MONTHS -from pandas._period import period_ordinal, period_asfreq +from pandas._libs.period import period_ordinal, period_asfreq from pandas import (PeriodIndex, Period, DatetimeIndex, Timestamp, Series, date_range, to_datetime, period_range) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 79d10cbda565e..8c0a399cb58b3 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -24,7 +24,7 @@ from pandas.tseries.index import _to_m8 import pandas as pd -from pandas.lib import Timestamp +from pandas._libs.lib import Timestamp class TestIndex(Base, tm.TestCase): diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index 80ff67ab3d043..f67231e78983c 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -17,7 +17,8 @@ from pandas.compat import PY3, long, lrange, lzip, range, u from pandas.core.common import PerformanceWarning, UnsortedIndexError from pandas.indexes.base import InvalidIndexError -from pandas.lib import Timestamp +from pandas._libs import lib +from pandas._libs.lib import Timestamp import pandas.util.testing as tm @@ -851,7 +852,7 @@ def test_from_product_invalid_input(self): def test_from_product_datetimeindex(self): dt_index = date_range('2000-01-01', periods=2) mi = pd.MultiIndex.from_product([[1, 2], dt_index]) - etalon = pd.lib.list_to_object_array([(1, pd.Timestamp( + etalon = lib.list_to_object_array([(1, pd.Timestamp( '2000-01-01')), (1, pd.Timestamp('2000-01-02')), (2, pd.Timestamp( '2000-01-01')), (2, pd.Timestamp('2000-01-02'))]) tm.assert_numpy_array_equal(mi.values, etalon) @@ -878,7 +879,7 @@ def test_values_boxed(self): (3, pd.Timestamp('2000-01-03'))] mi = pd.MultiIndex.from_tuples(tuples) tm.assert_numpy_array_equal(mi.values, - pd.lib.list_to_object_array(tuples)) + lib.list_to_object_array(tuples)) # Check that code branches for boxed values produce identical results tm.assert_numpy_array_equal(mi.values[:4], mi[:4].values) @@ -2181,7 +2182,7 @@ def check(nlevels, with_nulls): for keep in ['first', 'last', False]: left = mi.duplicated(keep=keep) - right = pd.hashtable.duplicated_object(mi.values, keep=keep) + right = pd._libs.hashtable.duplicated_object(mi.values, keep=keep) tm.assert_numpy_array_equal(left, right) # GH5873 diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py index 1bf9a10628542..e23e7c19ed799 100644 --- a/pandas/tests/indexes/test_numeric.py +++ b/pandas/tests/indexes/test_numeric.py @@ -11,7 +11,7 @@ import pandas.util.testing as tm import pandas as pd -from pandas.lib import Timestamp +from pandas._libs.lib import Timestamp from pandas.tests.indexes.common import Base diff --git a/pandas/tests/indexes/timedeltas/test_construction.py b/pandas/tests/indexes/timedeltas/test_construction.py index 0810b13eb0f53..9a3dd1c6bca71 100644 --- a/pandas/tests/indexes/timedeltas/test_construction.py +++ b/pandas/tests/indexes/timedeltas/test_construction.py @@ -3,9 +3,7 @@ import pandas as pd import pandas.util.testing as tm -from pandas import TimedeltaIndex, timedelta_range, tslib, to_timedelta - -iNaT = tslib.iNaT +from pandas import TimedeltaIndex, timedelta_range, to_timedelta class TestTimedeltaIndex(tm.TestCase): diff --git a/pandas/tests/indexes/timedeltas/test_ops.py b/pandas/tests/indexes/timedeltas/test_ops.py index 406a5bdbf3bcd..8c7b88a9cf2ca 100644 --- a/pandas/tests/indexes/timedeltas/test_ops.py +++ b/pandas/tests/indexes/timedeltas/test_ops.py @@ -8,8 +8,8 @@ from pandas.util.testing import assert_series_equal, assert_frame_equal from pandas import (Series, Timedelta, DataFrame, Timestamp, TimedeltaIndex, timedelta_range, date_range, DatetimeIndex, Int64Index, - _np_version_under1p10, Float64Index, Index, tslib) - + _np_version_under1p10, Float64Index, Index) +from pandas._libs.tslib import iNaT from pandas.tests.test_base import Ops @@ -772,7 +772,7 @@ def test_nat_new(self): tm.assert_index_equal(result, exp) result = idx._nat_new(box=False) - exp = np.array([tslib.iNaT] * 5, dtype=np.int64) + exp = np.array([iNaT] * 5, dtype=np.int64) tm.assert_numpy_array_equal(result, exp) def test_shift(self): diff --git a/pandas/tests/indexes/timedeltas/test_tools.py b/pandas/tests/indexes/timedeltas/test_tools.py index 2442051547312..ade9366c7e994 100644 --- a/pandas/tests/indexes/timedeltas/test_tools.py +++ b/pandas/tests/indexes/timedeltas/test_tools.py @@ -4,8 +4,9 @@ import pandas as pd import pandas.util.testing as tm from pandas.util.testing import assert_series_equal -from pandas import (Series, Timedelta, to_timedelta, tslib, isnull, +from pandas import (Series, Timedelta, to_timedelta, isnull, TimedeltaIndex) +from pandas._libs.tslib import iNaT class TestTimedeltas(tm.TestCase): @@ -26,7 +27,7 @@ def conv(v): # empty string result = to_timedelta('', box=False) - self.assertEqual(result.astype('int64'), tslib.iNaT) + self.assertEqual(result.astype('int64'), iNaT) result = to_timedelta(['', '']) self.assertTrue(isnull(result).all()) diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index f7a4af711bbb8..4502e0171dfbe 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -9,7 +9,7 @@ is_float_dtype, is_scalar) from pandas.compat import range, lrange, lzip, StringIO, lmap -from pandas.tslib import NaT +from pandas._libs.tslib import NaT from numpy import nan from numpy.random import randn import numpy as np diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index c298b3841096c..7dbcf25c60b45 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -637,13 +637,14 @@ def test_convert_dates(self): def test_convert_dates_infer(self): # GH10747 + from pandas.io.json import dumps infer_words = ['trade_time', 'date', 'datetime', 'sold_at', 'modified', 'timestamp', 'timestamps'] for infer_word in infer_words: data = [{'id': 1, infer_word: 1036713600000}, {'id': 2}] expected = DataFrame([[1, Timestamp('2002-11-08')], [2, pd.NaT]], columns=['id', infer_word]) - result = read_json(pd.json.dumps(data))[['id', infer_word]] + result = read_json(dumps(data))[['id', infer_word]] assert_frame_equal(result, expected) def test_date_format_frame(self): @@ -910,50 +911,53 @@ def test_sparse(self): self.assertEqual(expected, ss.to_json()) def test_tz_is_utc(self): + from pandas.io.json import dumps exp = '"2013-01-10T05:00:00.000Z"' ts = Timestamp('2013-01-10 05:00:00Z') - self.assertEqual(exp, pd.json.dumps(ts, iso_dates=True)) + self.assertEqual(exp, dumps(ts, iso_dates=True)) dt = ts.to_pydatetime() - self.assertEqual(exp, pd.json.dumps(dt, iso_dates=True)) + self.assertEqual(exp, dumps(dt, iso_dates=True)) ts = Timestamp('2013-01-10 00:00:00', tz='US/Eastern') - self.assertEqual(exp, pd.json.dumps(ts, iso_dates=True)) + self.assertEqual(exp, dumps(ts, iso_dates=True)) dt = ts.to_pydatetime() - self.assertEqual(exp, pd.json.dumps(dt, iso_dates=True)) + self.assertEqual(exp, dumps(dt, iso_dates=True)) ts = Timestamp('2013-01-10 00:00:00-0500') - self.assertEqual(exp, pd.json.dumps(ts, iso_dates=True)) + self.assertEqual(exp, dumps(ts, iso_dates=True)) dt = ts.to_pydatetime() - self.assertEqual(exp, pd.json.dumps(dt, iso_dates=True)) + self.assertEqual(exp, dumps(dt, iso_dates=True)) def test_tz_range_is_utc(self): + from pandas.io.json import dumps + exp = '["2013-01-01T05:00:00.000Z","2013-01-02T05:00:00.000Z"]' dfexp = ('{"DT":{' '"0":"2013-01-01T05:00:00.000Z",' '"1":"2013-01-02T05:00:00.000Z"}}') tz_range = pd.date_range('2013-01-01 05:00:00Z', periods=2) - self.assertEqual(exp, pd.json.dumps(tz_range, iso_dates=True)) + self.assertEqual(exp, dumps(tz_range, iso_dates=True)) dti = pd.DatetimeIndex(tz_range) - self.assertEqual(exp, pd.json.dumps(dti, iso_dates=True)) + self.assertEqual(exp, dumps(dti, iso_dates=True)) df = DataFrame({'DT': dti}) - self.assertEqual(dfexp, pd.json.dumps(df, iso_dates=True)) + self.assertEqual(dfexp, dumps(df, iso_dates=True)) tz_range = pd.date_range('2013-01-01 00:00:00', periods=2, tz='US/Eastern') - self.assertEqual(exp, pd.json.dumps(tz_range, iso_dates=True)) + self.assertEqual(exp, dumps(tz_range, iso_dates=True)) dti = pd.DatetimeIndex(tz_range) - self.assertEqual(exp, pd.json.dumps(dti, iso_dates=True)) + self.assertEqual(exp, dumps(dti, iso_dates=True)) df = DataFrame({'DT': dti}) - self.assertEqual(dfexp, pd.json.dumps(df, iso_dates=True)) + self.assertEqual(dfexp, dumps(df, iso_dates=True)) tz_range = pd.date_range('2013-01-01 00:00:00-0500', periods=2) - self.assertEqual(exp, pd.json.dumps(tz_range, iso_dates=True)) + self.assertEqual(exp, dumps(tz_range, iso_dates=True)) dti = pd.DatetimeIndex(tz_range) - self.assertEqual(exp, pd.json.dumps(dti, iso_dates=True)) + self.assertEqual(exp, dumps(dti, iso_dates=True)) df = DataFrame({'DT': dti}) - self.assertEqual(dfexp, pd.json.dumps(df, iso_dates=True)) + self.assertEqual(dfexp, dumps(df, iso_dates=True)) def test_read_jsonl(self): # GH9180 diff --git a/pandas/tests/io/json/test_ujson.py b/pandas/tests/io/json/test_ujson.py index 6a986710ae444..e66721beed288 100644 --- a/pandas/tests/io/json/test_ujson.py +++ b/pandas/tests/io/json/test_ujson.py @@ -17,7 +17,7 @@ import decimal from functools import partial from pandas.compat import range, zip, StringIO, u -import pandas.json as ujson +import pandas.io.json.libjson as ujson import pandas.compat as compat import numpy as np @@ -400,7 +400,7 @@ def test_npy_nat(self): assert ujson.encode(input) == 'null', "Expected null" def test_datetime_units(self): - from pandas.lib import Timestamp + from pandas._libs.lib import Timestamp val = datetime.datetime(2013, 8, 17, 21, 17, 12, 215504) stamp = Timestamp(val) diff --git a/pandas/tests/msgpack/__init__.py b/pandas/tests/io/msgpack/__init__.py similarity index 100% rename from pandas/tests/msgpack/__init__.py rename to pandas/tests/io/msgpack/__init__.py diff --git a/pandas/tests/msgpack/test_buffer.py b/pandas/tests/io/msgpack/test_buffer.py similarity index 90% rename from pandas/tests/msgpack/test_buffer.py rename to pandas/tests/io/msgpack/test_buffer.py index caaa22bfd08fc..5a2dc3dba5dfa 100644 --- a/pandas/tests/msgpack/test_buffer.py +++ b/pandas/tests/io/msgpack/test_buffer.py @@ -1,6 +1,6 @@ # coding: utf-8 -from pandas.msgpack import packb, unpackb +from pandas.io.msgpack import packb, unpackb def test_unpack_buffer(): diff --git a/pandas/tests/msgpack/test_case.py b/pandas/tests/io/msgpack/test_case.py similarity index 98% rename from pandas/tests/msgpack/test_case.py rename to pandas/tests/io/msgpack/test_case.py index a8a45b5b37eb0..3927693a94dd8 100644 --- a/pandas/tests/msgpack/test_case.py +++ b/pandas/tests/io/msgpack/test_case.py @@ -1,6 +1,6 @@ # coding: utf-8 -from pandas.msgpack import packb, unpackb +from pandas.io.msgpack import packb, unpackb def check(length, obj): diff --git a/pandas/tests/msgpack/test_except.py b/pandas/tests/io/msgpack/test_except.py similarity index 96% rename from pandas/tests/msgpack/test_except.py rename to pandas/tests/io/msgpack/test_except.py index 76b91bb375bbc..4bcef3607bfa4 100644 --- a/pandas/tests/msgpack/test_except.py +++ b/pandas/tests/io/msgpack/test_except.py @@ -1,7 +1,7 @@ # coding: utf-8 import unittest -from pandas.msgpack import packb, unpackb +from pandas.io.msgpack import packb, unpackb class DummyException(Exception): diff --git a/pandas/tests/msgpack/test_extension.py b/pandas/tests/io/msgpack/test_extension.py similarity index 96% rename from pandas/tests/msgpack/test_extension.py rename to pandas/tests/io/msgpack/test_extension.py index 97f0962a753d9..a5a111efbb835 100644 --- a/pandas/tests/msgpack/test_extension.py +++ b/pandas/tests/io/msgpack/test_extension.py @@ -1,7 +1,7 @@ from __future__ import print_function import array -import pandas.msgpack as msgpack -from pandas.msgpack import ExtType +import pandas.io.msgpack as msgpack +from pandas.io.msgpack import ExtType def test_pack_ext_type(): diff --git a/pandas/tests/msgpack/test_format.py b/pandas/tests/io/msgpack/test_format.py similarity index 98% rename from pandas/tests/msgpack/test_format.py rename to pandas/tests/io/msgpack/test_format.py index a4b309ebb657d..3659602e1381f 100644 --- a/pandas/tests/msgpack/test_format.py +++ b/pandas/tests/io/msgpack/test_format.py @@ -1,6 +1,6 @@ # coding: utf-8 -from pandas.msgpack import unpackb +from pandas.io.msgpack import unpackb def check(src, should, use_list=0): diff --git a/pandas/tests/msgpack/test_limits.py b/pandas/tests/io/msgpack/test_limits.py similarity index 97% rename from pandas/tests/msgpack/test_limits.py rename to pandas/tests/io/msgpack/test_limits.py index 9c08f328b90dd..a908ee3547634 100644 --- a/pandas/tests/msgpack/test_limits.py +++ b/pandas/tests/io/msgpack/test_limits.py @@ -3,7 +3,7 @@ unicode_literals) import pandas.util.testing as tm -from pandas.msgpack import packb, unpackb, Packer, Unpacker, ExtType +from pandas.io.msgpack import packb, unpackb, Packer, Unpacker, ExtType class TestLimits(tm.TestCase): diff --git a/pandas/tests/msgpack/test_newspec.py b/pandas/tests/io/msgpack/test_newspec.py similarity index 97% rename from pandas/tests/msgpack/test_newspec.py rename to pandas/tests/io/msgpack/test_newspec.py index 4eb9a0425c57b..783bfc1b364f8 100644 --- a/pandas/tests/msgpack/test_newspec.py +++ b/pandas/tests/io/msgpack/test_newspec.py @@ -1,6 +1,6 @@ # coding: utf-8 -from pandas.msgpack import packb, unpackb, ExtType +from pandas.io.msgpack import packb, unpackb, ExtType def test_str8(): diff --git a/pandas/tests/msgpack/test_obj.py b/pandas/tests/io/msgpack/test_obj.py similarity index 98% rename from pandas/tests/msgpack/test_obj.py rename to pandas/tests/io/msgpack/test_obj.py index bcc76929fe8f8..b067dacb84494 100644 --- a/pandas/tests/msgpack/test_obj.py +++ b/pandas/tests/io/msgpack/test_obj.py @@ -1,7 +1,7 @@ # coding: utf-8 import unittest -from pandas.msgpack import packb, unpackb +from pandas.io.msgpack import packb, unpackb class DecodeError(Exception): diff --git a/pandas/tests/msgpack/test_pack.py b/pandas/tests/io/msgpack/test_pack.py similarity index 98% rename from pandas/tests/msgpack/test_pack.py rename to pandas/tests/io/msgpack/test_pack.py index 005352691d908..6f9a271cbd326 100644 --- a/pandas/tests/msgpack/test_pack.py +++ b/pandas/tests/io/msgpack/test_pack.py @@ -5,7 +5,7 @@ import struct from pandas import compat from pandas.compat import u, OrderedDict -from pandas.msgpack import packb, unpackb, Unpacker, Packer +from pandas.io.msgpack import packb, unpackb, Unpacker, Packer class TestPack(unittest.TestCase): diff --git a/pandas/tests/msgpack/test_read_size.py b/pandas/tests/io/msgpack/test_read_size.py similarity index 96% rename from pandas/tests/msgpack/test_read_size.py rename to pandas/tests/io/msgpack/test_read_size.py index 965e97a7007de..ef521fa345637 100644 --- a/pandas/tests/msgpack/test_read_size.py +++ b/pandas/tests/io/msgpack/test_read_size.py @@ -1,5 +1,5 @@ """Test Unpacker's read_array_header and read_map_header methods""" -from pandas.msgpack import packb, Unpacker, OutOfData +from pandas.io.msgpack import packb, Unpacker, OutOfData UnexpectedTypeException = ValueError diff --git a/pandas/tests/msgpack/test_seq.py b/pandas/tests/io/msgpack/test_seq.py similarity index 96% rename from pandas/tests/msgpack/test_seq.py rename to pandas/tests/io/msgpack/test_seq.py index 927c2622419a6..5f203e8997ccb 100644 --- a/pandas/tests/msgpack/test_seq.py +++ b/pandas/tests/io/msgpack/test_seq.py @@ -1,7 +1,7 @@ # coding: utf-8 import io -import pandas.msgpack as msgpack +import pandas.io.msgpack as msgpack binarydata = bytes(bytearray(range(256))) diff --git a/pandas/tests/msgpack/test_sequnpack.py b/pandas/tests/io/msgpack/test_sequnpack.py similarity index 97% rename from pandas/tests/msgpack/test_sequnpack.py rename to pandas/tests/io/msgpack/test_sequnpack.py index fe089ccda1c7f..c9c979c4e0e44 100644 --- a/pandas/tests/msgpack/test_sequnpack.py +++ b/pandas/tests/io/msgpack/test_sequnpack.py @@ -3,8 +3,8 @@ import unittest from pandas import compat -from pandas.msgpack import Unpacker, BufferFull -from pandas.msgpack import OutOfData +from pandas.io.msgpack import Unpacker, BufferFull +from pandas.io.msgpack import OutOfData class TestPack(unittest.TestCase): diff --git a/pandas/tests/msgpack/test_subtype.py b/pandas/tests/io/msgpack/test_subtype.py similarity index 90% rename from pandas/tests/msgpack/test_subtype.py rename to pandas/tests/io/msgpack/test_subtype.py index d6dd72c4d9850..e27ec66c63e1f 100644 --- a/pandas/tests/msgpack/test_subtype.py +++ b/pandas/tests/io/msgpack/test_subtype.py @@ -1,6 +1,6 @@ # coding: utf-8 -from pandas.msgpack import packb +from pandas.io.msgpack import packb from collections import namedtuple diff --git a/pandas/tests/msgpack/test_unpack.py b/pandas/tests/io/msgpack/test_unpack.py similarity index 96% rename from pandas/tests/msgpack/test_unpack.py rename to pandas/tests/io/msgpack/test_unpack.py index ae8227ab276fb..24a8e885d19d6 100644 --- a/pandas/tests/msgpack/test_unpack.py +++ b/pandas/tests/io/msgpack/test_unpack.py @@ -1,6 +1,6 @@ from io import BytesIO import sys -from pandas.msgpack import Unpacker, packb, OutOfData, ExtType +from pandas.io.msgpack import Unpacker, packb, OutOfData, ExtType import pandas.util.testing as tm import pytest diff --git a/pandas/tests/msgpack/test_unpack_raw.py b/pandas/tests/io/msgpack/test_unpack_raw.py similarity index 94% rename from pandas/tests/msgpack/test_unpack_raw.py rename to pandas/tests/io/msgpack/test_unpack_raw.py index c6bf747c8d992..a261bf4cbbcd7 100644 --- a/pandas/tests/msgpack/test_unpack_raw.py +++ b/pandas/tests/io/msgpack/test_unpack_raw.py @@ -1,7 +1,7 @@ """Tests for cases where the user seeks to obtain packed msgpack objects""" import io -from pandas.msgpack import Unpacker, packb +from pandas.io.msgpack import Unpacker, packb def test_write_bytes(): diff --git a/pandas/tests/io/parser/common.py b/pandas/tests/io/parser/common.py index b667eed346355..df75d14e9702d 100644 --- a/pandas/tests/io/parser/common.py +++ b/pandas/tests/io/parser/common.py @@ -11,7 +11,7 @@ import pytest import numpy as np -from pandas.lib import Timestamp +from pandas._libs.lib import Timestamp import pandas as pd import pandas.util.testing as tm diff --git a/pandas/tests/io/parser/converters.py b/pandas/tests/io/parser/converters.py index 859d2e19bd56a..2659d977ea747 100644 --- a/pandas/tests/io/parser/converters.py +++ b/pandas/tests/io/parser/converters.py @@ -13,7 +13,7 @@ import pandas as pd import pandas.util.testing as tm -from pandas.lib import Timestamp +from pandas._libs.lib import Timestamp from pandas import DataFrame, Index from pandas.compat import parse_date, StringIO, lmap diff --git a/pandas/tests/io/parser/parse_dates.py b/pandas/tests/io/parser/parse_dates.py index b1960159bb41d..4cba9276a9d1e 100644 --- a/pandas/tests/io/parser/parse_dates.py +++ b/pandas/tests/io/parser/parse_dates.py @@ -10,8 +10,8 @@ import pytest import numpy as np -import pandas.lib as lib -from pandas.lib import Timestamp +import pandas._libs.lib as lib +from pandas._libs.lib import Timestamp import pandas as pd import pandas.io.parsers as parsers diff --git a/pandas/tests/io/parser/test_textreader.py b/pandas/tests/io/parser/test_textreader.py index 0e91ca806e8fe..b6a9900b0b087 100644 --- a/pandas/tests/io/parser/test_textreader.py +++ b/pandas/tests/io/parser/test_textreader.py @@ -20,8 +20,8 @@ import pandas.util.testing as tm -from pandas.parser import TextReader -import pandas.parser as parser +from pandas.io.libparsers import TextReader +import pandas.io.libparsers as parser class TestTextReader(tm.TestCase): diff --git a/pandas/tests/io/parser/usecols.py b/pandas/tests/io/parser/usecols.py index 95df077dae997..0cf642983e8d3 100644 --- a/pandas/tests/io/parser/usecols.py +++ b/pandas/tests/io/parser/usecols.py @@ -11,7 +11,7 @@ import pandas.util.testing as tm from pandas import DataFrame, Index -from pandas.lib import Timestamp +from pandas._libs.lib import Timestamp from pandas.compat import StringIO diff --git a/pandas/tests/io/test_html.py b/pandas/tests/io/test_html.py index 232e68a87f16e..c1a2a4545a6f9 100644 --- a/pandas/tests/io/test_html.py +++ b/pandas/tests/io/test_html.py @@ -23,7 +23,7 @@ is_platform_windows) from pandas.io.common import URLError, urlopen, file_path_to_url from pandas.io.html import read_html -from pandas.parser import ParserError +from pandas.io.libparsers import ParserError import pandas.util.testing as tm from pandas.util.testing import makeCustomDataframe as mkdf, network diff --git a/pandas/tests/io/test_packers.py b/pandas/tests/io/test_packers.py index 251c6ae8b4dec..efa8587d64657 100644 --- a/pandas/tests/io/test_packers.py +++ b/pandas/tests/io/test_packers.py @@ -22,7 +22,8 @@ from pandas.tests.test_panel import assert_panel_equal import pandas -from pandas import Timestamp, NaT, tslib +from pandas import Timestamp, NaT +from pandas._libs.tslib import iNaT nan = np.nan @@ -373,7 +374,7 @@ def setUp(self): s.name = 'object' self.d['object'] = s - s = Series(tslib.iNaT, dtype='M8[ns]', index=range(5)) + s = Series(iNaT, dtype='M8[ns]', index=range(5)) self.d['date'] = s data = { diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index 9f1dea2094bc6..5592c564e51df 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -5282,7 +5282,7 @@ def test_append_with_timezones_dateutil(self): # use maybe_get_tz instead of dateutil.tz.gettz to handle the windows # filename issues. - from pandas.tslib import maybe_get_tz + from pandas._libs.tslib import maybe_get_tz gettz = lambda x: maybe_get_tz('dateutil/' + x) # as columns diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py index ae09e671dbca3..5188adf54b887 100644 --- a/pandas/tests/io/test_stata.py +++ b/pandas/tests/io/test_stata.py @@ -19,7 +19,7 @@ from pandas.io.parsers import read_csv from pandas.io.stata import (read_stata, StataReader, InvalidColumnName, PossiblePrecisionLoss, StataMissingValue) -from pandas.tslib import NaT +from pandas._libs.tslib import NaT from pandas.types.common import is_categorical_dtype diff --git a/pandas/tests/scalar/test_period.py b/pandas/tests/scalar/test_period.py index 49aa44492fe81..3128e90695324 100644 --- a/pandas/tests/scalar/test_period.py +++ b/pandas/tests/scalar/test_period.py @@ -6,7 +6,9 @@ import pandas.tseries.period as period from pandas.compat import text_type, iteritems from pandas.compat.numpy import np_datetime64_compat -from pandas import Period, Timestamp, tslib, offsets, _period + +from pandas._libs import tslib, period as libperiod +from pandas import Period, Timestamp, offsets from pandas.tseries.frequencies import DAYS, MONTHS @@ -256,8 +258,8 @@ def test_timestamp_tz_arg(self): self.assertEqual(p.tz, exp.tz) def test_timestamp_tz_arg_dateutil(self): - from pandas.tslib import _dateutil_gettz as gettz - from pandas.tslib import maybe_get_tz + from pandas._libs.tslib import _dateutil_gettz as gettz + from pandas._libs.tslib import maybe_get_tz for case in ['dateutil/Europe/Brussels', 'dateutil/Asia/Tokyo', 'dateutil/US/Pacific']: p = Period('1/1/2005', freq='M').to_timestamp( @@ -275,7 +277,7 @@ def test_timestamp_tz_arg_dateutil(self): self.assertEqual(p.tz, exp.tz) def test_timestamp_tz_arg_dateutil_from_string(self): - from pandas.tslib import _dateutil_gettz as gettz + from pandas._libs.tslib import _dateutil_gettz as gettz p = Period('1/1/2005', freq='M').to_timestamp(tz='dateutil/Europe/Brussels') self.assertEqual(p.tz, gettz('Europe/Brussels')) @@ -939,10 +941,10 @@ def test_round_trip(self): class TestPeriodField(tm.TestCase): def test_get_period_field_raises_on_out_of_range(self): - self.assertRaises(ValueError, _period.get_period_field, -1, 0, 0) + self.assertRaises(ValueError, libperiod.get_period_field, -1, 0, 0) def test_get_period_field_array_raises_on_out_of_range(self): - self.assertRaises(ValueError, _period.get_period_field_arr, -1, + self.assertRaises(ValueError, libperiod.get_period_field_arr, -1, np.empty(1), 0) diff --git a/pandas/tests/scalar/test_timedelta.py b/pandas/tests/scalar/test_timedelta.py index c5a828bf2e912..7c5caa9506ca2 100644 --- a/pandas/tests/scalar/test_timedelta.py +++ b/pandas/tests/scalar/test_timedelta.py @@ -6,9 +6,8 @@ import pandas.util.testing as tm from pandas.tseries.timedeltas import _coerce_scalar_to_timedelta_type as ct from pandas import (Timedelta, TimedeltaIndex, timedelta_range, Series, - to_timedelta, tslib, compat, isnull) - -iNaT = tslib.iNaT + to_timedelta, compat, isnull) +from pandas._libs.tslib import iNaT, NaTType class TestTimedeltas(tm.TestCase): @@ -301,9 +300,9 @@ def check(value): def test_nat_converters(self): self.assertEqual(to_timedelta( - 'nat', box=False).astype('int64'), tslib.iNaT) + 'nat', box=False).astype('int64'), iNaT) self.assertEqual(to_timedelta( - 'nan', box=False).astype('int64'), tslib.iNaT) + 'nan', box=False).astype('int64'), iNaT) def testit(unit, transform): @@ -589,7 +588,7 @@ def test_implementation_limits(self): # Beyond lower limit, a NAT before the Overflow self.assertIsInstance(min_td - Timedelta(1, 'ns'), - pd.tslib.NaTType) + NaTType) with tm.assertRaises(OverflowError): min_td - Timedelta(2, 'ns') @@ -599,7 +598,7 @@ def test_implementation_limits(self): # Same tests using the internal nanosecond values td = Timedelta(min_td.value - 1, 'ns') - self.assertIsInstance(td, pd.tslib.NaTType) + self.assertIsInstance(td, NaTType) with tm.assertRaises(OverflowError): Timedelta(min_td.value - 2, 'ns') diff --git a/pandas/tests/scalar/test_timestamp.py b/pandas/tests/scalar/test_timestamp.py index bbcdce922f58a..d5d92dcf96eab 100644 --- a/pandas/tests/scalar/test_timestamp.py +++ b/pandas/tests/scalar/test_timestamp.py @@ -9,13 +9,15 @@ import pandas as pd import pandas.util.testing as tm -import pandas._period as period + from pandas.tseries import offsets, frequencies -from pandas.tslib import get_timezone, iNaT +from pandas._libs import tslib, period +from pandas._libs.tslib import get_timezone, iNaT + from pandas.compat import lrange, long from pandas.util.testing import assert_series_equal from pandas.compat.numpy import np_datetime64_compat -from pandas import (Timestamp, date_range, Period, Timedelta, tslib, compat, +from pandas import (Timestamp, date_range, Period, Timedelta, compat, Series, NaT, isnull, DataFrame, DatetimeIndex) from pandas.tseries.frequencies import (RESO_DAY, RESO_HR, RESO_MIN, RESO_US, RESO_MS, RESO_SEC) @@ -1482,7 +1484,7 @@ def test_timestamp_to_datetime_explicit_pytz(self): def test_timestamp_to_datetime_explicit_dateutil(self): tm._skip_if_windows_python_3() tm._skip_if_no_dateutil() - from pandas.tslib import _dateutil_gettz as gettz + from pandas._libs.tslib import _dateutil_gettz as gettz rng = date_range('20090415', '20090519', tz=gettz('US/Eastern')) stamp = rng[0] diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index c15171f331df3..24e4355fa9f9a 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -14,7 +14,8 @@ from pandas.core.index import MultiIndex from pandas.tseries.index import Timestamp, DatetimeIndex -from pandas import lib, tslib +from pandas._libs import lib +from pandas._libs.tslib import iNaT from pandas.compat import lrange, range, zip, OrderedDict, long from pandas import compat @@ -200,14 +201,14 @@ def test_constructor_maskedarray(self): data = ma.masked_all((3, ), dtype='M8[ns]') result = Series(data) - expected = Series([tslib.iNaT, tslib.iNaT, tslib.iNaT], dtype='M8[ns]') + expected = Series([iNaT, iNaT, iNaT], dtype='M8[ns]') assert_series_equal(result, expected) data[0] = datetime(2001, 1, 1) data[2] = datetime(2001, 1, 3) index = ['a', 'b', 'c'] result = Series(data, index=index) - expected = Series([datetime(2001, 1, 1), tslib.iNaT, + expected = Series([datetime(2001, 1, 1), iNaT, datetime(2001, 1, 3)], index=index, dtype='M8[ns]') assert_series_equal(result, expected) @@ -327,20 +328,19 @@ def test_constructor_datelike_coercion(self): self.assertTrue(result.dtype == object) def test_constructor_dtype_datetime64(self): - import pandas.tslib as tslib - s = Series(tslib.iNaT, dtype='M8[ns]', index=lrange(5)) + s = Series(iNaT, dtype='M8[ns]', index=lrange(5)) self.assertTrue(isnull(s).all()) # in theory this should be all nulls, but since # we are not specifying a dtype is ambiguous - s = Series(tslib.iNaT, index=lrange(5)) + s = Series(iNaT, index=lrange(5)) self.assertFalse(isnull(s).all()) s = Series(nan, dtype='M8[ns]', index=lrange(5)) self.assertTrue(isnull(s).all()) - s = Series([datetime(2001, 1, 2, 0, 0), tslib.iNaT], dtype='M8[ns]') + s = Series([datetime(2001, 1, 2, 0, 0), iNaT], dtype='M8[ns]') self.assertTrue(isnull(s[1])) self.assertEqual(s.dtype, 'M8[ns]') @@ -732,8 +732,7 @@ def test_constructor_dtype_timedelta64(self): self.assertEqual(td.dtype, 'timedelta64[ns]') # mixed with NaT - from pandas import tslib - td = Series([timedelta(days=1), tslib.NaT], dtype='m8[ns]') + td = Series([timedelta(days=1), NaT], dtype='m8[ns]') self.assertEqual(td.dtype, 'timedelta64[ns]') td = Series([timedelta(days=1), np.nan], dtype='m8[ns]') @@ -744,11 +743,11 @@ def test_constructor_dtype_timedelta64(self): # improved inference # GH5689 - td = Series([np.timedelta64(300000000), pd.NaT]) + td = Series([np.timedelta64(300000000), NaT]) self.assertEqual(td.dtype, 'timedelta64[ns]') # because iNaT is int, not coerced to timedelta - td = Series([np.timedelta64(300000000), tslib.iNaT]) + td = Series([np.timedelta64(300000000), iNaT]) self.assertEqual(td.dtype, 'object') td = Series([np.timedelta64(300000000), np.nan]) @@ -791,7 +790,7 @@ def f(): self.assertEqual(s.dtype, 'timedelta64[ns]') def test_NaT_scalar(self): - series = Series([0, 1000, 2000, tslib.iNaT], dtype='M8[ns]') + series = Series([0, 1000, 2000, iNaT], dtype='M8[ns]') val = series[3] self.assertTrue(isnull(val)) diff --git a/pandas/tests/series/test_dtypes.py b/pandas/tests/series/test_dtypes.py index 13375ab886d8d..a2aaff25516ae 100644 --- a/pandas/tests/series/test_dtypes.py +++ b/pandas/tests/series/test_dtypes.py @@ -62,7 +62,7 @@ def test_astype_cast_object_int(self): self.assert_series_equal(result, Series(np.arange(1, 5))) def test_astype_datetimes(self): - import pandas.tslib as tslib + import pandas._libs.tslib as tslib s = Series(tslib.iNaT, dtype='M8[ns]', index=lrange(5)) s = s.astype('O') diff --git a/pandas/tests/series/test_indexing.py b/pandas/tests/series/test_indexing.py index bb77550e01f11..9d93d9f01b161 100644 --- a/pandas/tests/series/test_indexing.py +++ b/pandas/tests/series/test_indexing.py @@ -7,14 +7,14 @@ import numpy as np import pandas as pd -import pandas.index as _index +import pandas._libs.index as _index from pandas.types.common import is_integer, is_scalar from pandas import (Index, Series, DataFrame, isnull, date_range, NaT, MultiIndex, Timestamp, DatetimeIndex, Timedelta) from pandas.core.indexing import IndexingError from pandas.tseries.offsets import BDay -from pandas import lib, tslib +from pandas._libs import tslib, lib from pandas.compat import lrange, range from pandas import compat @@ -375,7 +375,7 @@ def test_getitem_setitem_datetime_tz_pytz(self): def test_getitem_setitem_datetime_tz_dateutil(self): tm._skip_if_no_dateutil() from dateutil.tz import tzutc - from pandas.tslib import _dateutil_gettz as gettz + from pandas._libs.tslib import _dateutil_gettz as gettz tz = lambda x: tzutc() if x == 'UTC' else gettz( x) # handle special case for utc in dateutil diff --git a/pandas/tests/series/test_internals.py b/pandas/tests/series/test_internals.py index a3b13ba9b993a..4b1c303200739 100644 --- a/pandas/tests/series/test_internals.py +++ b/pandas/tests/series/test_internals.py @@ -8,7 +8,7 @@ from pandas import Series from pandas.tseries.index import Timestamp -import pandas.lib as lib +import pandas._libs.lib as lib from pandas.util.testing import assert_series_equal import pandas.util.testing as tm diff --git a/pandas/tests/series/test_missing.py b/pandas/tests/series/test_missing.py index 23eb6a40f5f1d..87cfcf32229b4 100644 --- a/pandas/tests/series/test_missing.py +++ b/pandas/tests/series/test_missing.py @@ -9,9 +9,9 @@ import pandas as pd from pandas import (Series, DataFrame, isnull, date_range, - MultiIndex, Index, Timestamp) + MultiIndex, Index, Timestamp, NaT) from pandas.compat import range -from pandas import tslib +from pandas._libs.tslib import iNaT from pandas.util.testing import assert_series_equal, assert_frame_equal import pandas.util.testing as tm @@ -69,9 +69,8 @@ def test_timedelta_fillna(self): timedelta(days=1, seconds=9 * 3600 + 60 + 1)]) assert_series_equal(result, expected) - from pandas import tslib - result = td.fillna(tslib.NaT) - expected = Series([tslib.NaT, timedelta(0), timedelta(1), + result = td.fillna(NaT) + expected = Series([NaT, timedelta(0), timedelta(1), timedelta(days=1, seconds=9 * 3600 + 60 + 1)], dtype='m8[ns]') assert_series_equal(result, expected) @@ -102,8 +101,7 @@ def test_datetime64_fillna(self): '20130101'), Timestamp('20130104'), Timestamp('20130103 9:01:01')]) assert_series_equal(result, expected) - from pandas import tslib - result = s.fillna(tslib.NaT) + result = s.fillna(NaT) expected = s assert_series_equal(result, expected) @@ -303,7 +301,7 @@ def test_fillna_raise(self): s.fillna(1, limit=limit, method=method) def test_fillna_nat(self): - series = Series([0, 1, 2, tslib.iNaT], dtype='M8[ns]') + series = Series([0, 1, 2, iNaT], dtype='M8[ns]') filled = series.fillna(method='pad') filled2 = series.fillna(value=series.values[2]) @@ -321,7 +319,7 @@ def test_fillna_nat(self): assert_frame_equal(filled, expected) assert_frame_equal(filled2, expected) - series = Series([tslib.iNaT, 0, 1, 2], dtype='M8[ns]') + series = Series([iNaT, 0, 1, 2], dtype='M8[ns]') filled = series.fillna(method='bfill') filled2 = series.fillna(value=series[1]) @@ -460,26 +458,25 @@ def test_bfill(self): def test_timedelta64_nan(self): - from pandas import tslib td = Series([timedelta(days=i) for i in range(10)]) # nan ops on timedeltas td1 = td.copy() td1[0] = np.nan self.assertTrue(isnull(td1[0])) - self.assertEqual(td1[0].value, tslib.iNaT) + self.assertEqual(td1[0].value, iNaT) td1[0] = td[0] self.assertFalse(isnull(td1[0])) - td1[1] = tslib.iNaT + td1[1] = iNaT self.assertTrue(isnull(td1[1])) - self.assertEqual(td1[1].value, tslib.iNaT) + self.assertEqual(td1[1].value, iNaT) td1[1] = td[1] self.assertFalse(isnull(td1[1])) - td1[2] = tslib.NaT + td1[2] = NaT self.assertTrue(isnull(td1[2])) - self.assertEqual(td1[2].value, tslib.iNaT) + self.assertEqual(td1[2].value, iNaT) td1[2] = td[2] self.assertFalse(isnull(td1[2])) diff --git a/pandas/tests/series/test_replace.py b/pandas/tests/series/test_replace.py index 7fe31bab87537..0acd03316339e 100644 --- a/pandas/tests/series/test_replace.py +++ b/pandas/tests/series/test_replace.py @@ -3,7 +3,7 @@ import numpy as np import pandas as pd -import pandas.lib as lib +import pandas._libs.lib as lib import pandas.util.testing as tm from .common import TestData diff --git a/pandas/tests/series/test_timeseries.py b/pandas/tests/series/test_timeseries.py index d384460c3d030..ce7d5a573bfab 100644 --- a/pandas/tests/series/test_timeseries.py +++ b/pandas/tests/series/test_timeseries.py @@ -6,7 +6,7 @@ import pandas as pd import pandas.util.testing as tm -from pandas.tslib import iNaT +from pandas._libs.tslib import iNaT from pandas.compat import lrange, StringIO, product from pandas.tseries.tdi import TimedeltaIndex from pandas.tseries.index import DatetimeIndex diff --git a/pandas/tests/sparse/test_array.py b/pandas/tests/sparse/test_array.py index 70aaea5b5b1f0..15531cecfe79b 100644 --- a/pandas/tests/sparse/test_array.py +++ b/pandas/tests/sparse/test_array.py @@ -8,7 +8,7 @@ from pandas import _np_version_under1p8 from pandas.sparse.api import SparseArray, SparseSeries -from pandas._sparse import IntIndex +from pandas.sparse.libsparse import IntIndex from pandas.util.testing import assert_almost_equal, assertRaisesRegexp import pandas.util.testing as tm diff --git a/pandas/tests/sparse/test_frame.py b/pandas/tests/sparse/test_frame.py index b2283364a1631..a7dd7f2e81033 100644 --- a/pandas/tests/sparse/test_frame.py +++ b/pandas/tests/sparse/test_frame.py @@ -14,7 +14,7 @@ from pandas import compat import pandas.sparse.frame as spf -from pandas._sparse import BlockIndex, IntIndex +from pandas.sparse.libsparse import BlockIndex, IntIndex from pandas.sparse.api import SparseSeries, SparseDataFrame, SparseArray from pandas.tests.frame.test_misc_api import SharedWithSparse diff --git a/pandas/tests/sparse/test_libsparse.py b/pandas/tests/sparse/test_libsparse.py index 0435b732911da..b6ab99dc66cda 100644 --- a/pandas/tests/sparse/test_libsparse.py +++ b/pandas/tests/sparse/test_libsparse.py @@ -8,7 +8,7 @@ from pandas import compat from pandas.sparse.array import IntIndex, BlockIndex, _make_index -import pandas._sparse as splib +import pandas.sparse.libsparse as splib TEST_LENGTH = 20 diff --git a/pandas/tests/sparse/test_series.py b/pandas/tests/sparse/test_series.py index de6636162ff05..8aa85a5b7f396 100644 --- a/pandas/tests/sparse/test_series.py +++ b/pandas/tests/sparse/test_series.py @@ -16,7 +16,7 @@ import pandas.sparse.frame as spf -from pandas._sparse import BlockIndex, IntIndex +from pandas.sparse.libsparse import BlockIndex, IntIndex from pandas.sparse.api import SparseSeries from pandas.tests.series.test_misc_api import SharedWithSparse diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index fab04f7fa4bf2..7a3cc3e2c3cd7 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -10,11 +10,11 @@ import pandas as pd from pandas import compat -import pandas.algos as _algos +from pandas._libs import algos as libalgos, hashtable +from pandas._libs.hashtable import unique_label_indices from pandas.compat import lrange import pandas.core.algorithms as algos import pandas.util.testing as tm -import pandas.hashtable as hashtable from pandas.compat.numpy import np_array_datetime64_compat from pandas.util.testing import assert_almost_equal @@ -972,7 +972,6 @@ def test_quantile(): def test_unique_label_indices(): - from pandas.hashtable import unique_label_indices a = np.random.randint(1, 1 << 10, 1 << 15).astype('i8') @@ -998,7 +997,7 @@ def test_scipy_compat(self): def _check(arr): mask = ~np.isfinite(arr) arr = arr.copy() - result = _algos.rank_1d_float64(arr) + result = libalgos.rank_1d_float64(arr) arr[mask] = np.inf exp = rankdata(arr) exp[mask] = nan @@ -1034,26 +1033,26 @@ def test_pad_backfill_object_segfault(): old = np.array([], dtype='O') new = np.array([datetime(2010, 12, 31)], dtype='O') - result = _algos.pad_object(old, new) + result = libalgos.pad_object(old, new) expected = np.array([-1], dtype=np.int64) assert (np.array_equal(result, expected)) - result = _algos.pad_object(new, old) + result = libalgos.pad_object(new, old) expected = np.array([], dtype=np.int64) assert (np.array_equal(result, expected)) - result = _algos.backfill_object(old, new) + result = libalgos.backfill_object(old, new) expected = np.array([-1], dtype=np.int64) assert (np.array_equal(result, expected)) - result = _algos.backfill_object(new, old) + result = libalgos.backfill_object(new, old) expected = np.array([], dtype=np.int64) assert (np.array_equal(result, expected)) def test_arrmap(): values = np.array(['foo', 'foo', 'bar', 'bar', 'baz', 'qux'], dtype='O') - result = _algos.arrmap_object(values, lambda x: x in ['foo', 'bar']) + result = libalgos.arrmap_object(values, lambda x: x in ['foo', 'bar']) assert (result.dtype == np.bool_) @@ -1078,7 +1077,7 @@ def test_backfill(self): old = Index([1, 5, 10]) new = Index(lrange(12)) - filler = _algos.backfill_int64(old.values, new.values) + filler = libalgos.backfill_int64(old.values, new.values) expect_filler = np.array([0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, -1], dtype=np.int64) @@ -1087,7 +1086,7 @@ def test_backfill(self): # corner case old = Index([1, 4]) new = Index(lrange(5, 10)) - filler = _algos.backfill_int64(old.values, new.values) + filler = libalgos.backfill_int64(old.values, new.values) expect_filler = np.array([-1, -1, -1, -1, -1], dtype=np.int64) self.assert_numpy_array_equal(filler, expect_filler) @@ -1096,7 +1095,7 @@ def test_pad(self): old = Index([1, 5, 10]) new = Index(lrange(12)) - filler = _algos.pad_int64(old.values, new.values) + filler = libalgos.pad_int64(old.values, new.values) expect_filler = np.array([-1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2], dtype=np.int64) @@ -1105,7 +1104,7 @@ def test_pad(self): # corner case old = Index([5, 10]) new = Index(lrange(5)) - filler = _algos.pad_int64(old.values, new.values) + filler = libalgos.pad_int64(old.values, new.values) expect_filler = np.array([-1, -1, -1, -1, -1], dtype=np.int64) self.assert_numpy_array_equal(filler, expect_filler) @@ -1137,7 +1136,7 @@ def test_is_lexsorted(): 6, 5, 4, 3, 2, 1, 0])] - assert (not _algos.is_lexsorted(failure)) + assert (not libalgos.is_lexsorted(failure)) # def test_get_group_index(): # a = np.array([0, 1, 2, 0, 2, 1, 0, 0], dtype=np.int64) @@ -1153,7 +1152,7 @@ def test_groupsort_indexer(): a = np.random.randint(0, 1000, 100).astype(np.int64) b = np.random.randint(0, 1000, 100).astype(np.int64) - result = _algos.groupsort_indexer(a, 1000)[0] + result = libalgos.groupsort_indexer(a, 1000)[0] # need to use a stable sort expected = np.argsort(a, kind='mergesort') @@ -1161,7 +1160,7 @@ def test_groupsort_indexer(): # compare with lexsort key = a * 1000 + b - result = _algos.groupsort_indexer(key, 1000000)[0] + result = libalgos.groupsort_indexer(key, 1000000)[0] expected = np.lexsort((b, a)) assert (np.array_equal(result, expected)) @@ -1172,8 +1171,8 @@ def test_infinity_sort(): # itself. Instead, let's give our infinities a self-consistent # ordering, but outside the float extended real line. - Inf = _algos.Infinity() - NegInf = _algos.NegInfinity() + Inf = libalgos.Infinity() + NegInf = libalgos.NegInfinity() ref_nums = [NegInf, float("-inf"), -1e100, 0, 1e100, float("inf"), Inf] @@ -1191,14 +1190,14 @@ def test_infinity_sort(): assert sorted(perm) == ref_nums # smoke tests - np.array([_algos.Infinity()] * 32).argsort() - np.array([_algos.NegInfinity()] * 32).argsort() + np.array([libalgos.Infinity()] * 32).argsort() + np.array([libalgos.NegInfinity()] * 32).argsort() def test_ensure_platform_int(): arr = np.arange(100, dtype=np.intp) - result = _algos.ensure_platform_int(arr) + result = libalgos.ensure_platform_int(arr) assert (result is arr) diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py index 8264ad33950f9..1d4dddf6477df 100644 --- a/pandas/tests/test_base.py +++ b/pandas/tests/test_base.py @@ -18,6 +18,7 @@ from pandas.compat.numpy import np_array_datetime64_compat from pandas.core.base import PandasDelegate, NoNewAttributesMixin from pandas.tseries.base import DatetimeIndexOpsMixin +from pandas._libs.tslib import iNaT class CheckStringMixin(object): @@ -451,15 +452,15 @@ def test_value_counts_unique_nunique_null(self): if is_datetimetz(o): if isinstance(o, DatetimeIndex): v = o.asi8 - v[0:2] = pd.tslib.iNaT + v[0:2] = iNaT values = o._shallow_copy(v) else: o = o.copy() - o[0:2] = pd.tslib.iNaT + o[0:2] = iNaT values = o._values elif needs_i8_conversion(o): - values[0:2] = pd.tslib.iNaT + values[0:2] = iNaT values = o._shallow_copy(values) else: values[0:2] = null_obj diff --git a/pandas/tests/test_internals.py b/pandas/tests/test_internals.py index f086935df6dc8..5ab2bbc4ac6ba 100644 --- a/pandas/tests/test_internals.py +++ b/pandas/tests/test_internals.py @@ -17,7 +17,7 @@ import pandas.core.algorithms as algos import pandas.util.testing as tm import pandas as pd -from pandas import lib +from pandas._libs import lib from pandas.util.testing import (assert_almost_equal, assert_frame_equal, randn, assert_series_equal) from pandas.compat import zip, u diff --git a/pandas/tests/test_join.py b/pandas/tests/test_join.py index 2a16d7663b0cf..6723494d1529b 100644 --- a/pandas/tests/test_join.py +++ b/pandas/tests/test_join.py @@ -3,7 +3,7 @@ import numpy as np from pandas import Index -import pandas._join as _join +from pandas._libs import join as _join import pandas.util.testing as tm from pandas.util.testing import assert_almost_equal diff --git a/pandas/tests/test_lib.py b/pandas/tests/test_lib.py index 2381c52ef14b6..a925cf13900e9 100644 --- a/pandas/tests/test_lib.py +++ b/pandas/tests/test_lib.py @@ -2,7 +2,7 @@ import numpy as np import pandas as pd -import pandas.lib as lib +import pandas._libs.lib as lib import pandas.util.testing as tm diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index c809b39bb566e..d1b7fdadce6ae 100755 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -20,7 +20,7 @@ cart_product, zip) import pandas as pd -import pandas.index as _index +import pandas._libs.index as _index class TestMultiLevel(tm.TestCase): diff --git a/pandas/tests/test_take.py b/pandas/tests/test_take.py index 3aed22c140ffe..0bc1d0dcd0532 100644 --- a/pandas/tests/test_take.py +++ b/pandas/tests/test_take.py @@ -6,7 +6,7 @@ from pandas.compat import long import pandas.core.algorithms as algos import pandas.util.testing as tm -from pandas.tslib import iNaT +from pandas._libs.tslib import iNaT class TestTake(tm.TestCase): diff --git a/pandas/tests/tools/test_join.py b/pandas/tests/tools/test_join.py index ee6b3d57b852d..b65f800802bca 100644 --- a/pandas/tests/tools/test_join.py +++ b/pandas/tests/tools/test_join.py @@ -9,7 +9,7 @@ from pandas.util.testing import assert_frame_equal from pandas import DataFrame, MultiIndex, Series, Index, merge, concat -import pandas._join as _join +from pandas._libs import join as libjoin import pandas.util.testing as tm from pandas.tests.tools.test_merge import get_test_data, N, NGROUPS @@ -46,7 +46,7 @@ def test_cython_left_outer_join(self): right = a_([1, 1, 0, 4, 2, 2, 1], dtype=np.int64) max_group = 5 - ls, rs = _join.left_outer_join(left, right, max_group) + ls, rs = libjoin.left_outer_join(left, right, max_group) exp_ls = left.argsort(kind='mergesort') exp_rs = right.argsort(kind='mergesort') @@ -70,7 +70,7 @@ def test_cython_right_outer_join(self): right = a_([1, 1, 0, 4, 2, 2, 1], dtype=np.int64) max_group = 5 - rs, ls = _join.left_outer_join(right, left, max_group) + rs, ls = libjoin.left_outer_join(right, left, max_group) exp_ls = left.argsort(kind='mergesort') exp_rs = right.argsort(kind='mergesort') @@ -96,7 +96,7 @@ def test_cython_inner_join(self): right = a_([1, 1, 0, 4, 2, 2, 1, 4], dtype=np.int64) max_group = 5 - ls, rs = _join.inner_join(left, right, max_group) + ls, rs = libjoin.inner_join(left, right, max_group) exp_ls = left.argsort(kind='mergesort') exp_rs = right.argsort(kind='mergesort') diff --git a/pandas/tests/tseries/test_offsets.py b/pandas/tests/tseries/test_offsets.py index dfa1e94e4dc11..f644c353982f6 100644 --- a/pandas/tests/tseries/test_offsets.py +++ b/pandas/tests/tseries/test_offsets.py @@ -31,8 +31,8 @@ to_datetime, DateParseError) import pandas.tseries.offsets as offsets from pandas.io.pickle import read_pickle -from pandas.tslib import normalize_date, NaT, Timestamp, Timedelta -import pandas.tslib as tslib +from pandas._libs.tslib import normalize_date, NaT, Timestamp, Timedelta +import pandas._libs.tslib as tslib from pandas.util.testing import assertRaisesRegexp import pandas.util.testing as tm from pandas.tseries.holiday import USFederalHolidayCalendar diff --git a/pandas/tests/tseries/test_resample.py b/pandas/tests/tseries/test_resample.py index 1535bd665fe8b..57a655b0b7610 100755 --- a/pandas/tests/tseries/test_resample.py +++ b/pandas/tests/tseries/test_resample.py @@ -26,7 +26,7 @@ from pandas.tseries.tdi import timedelta_range, TimedeltaIndex from pandas.util.testing import (assert_series_equal, assert_almost_equal, assert_frame_equal, assert_index_equal) -from pandas._period import IncompatibleFrequency +from pandas._libs.period import IncompatibleFrequency bday = BDay() diff --git a/pandas/tests/tseries/test_timezones.py b/pandas/tests/tseries/test_timezones.py index 771fb2f50c410..1ccc1652d2719 100644 --- a/pandas/tests/tseries/test_timezones.py +++ b/pandas/tests/tseries/test_timezones.py @@ -11,7 +11,8 @@ from pandas.compat import lrange, zip from pandas.tseries.index import bdate_range, date_range from pandas.types.dtypes import DatetimeTZDtype -from pandas import (Index, Series, DataFrame, isnull, Timestamp, tslib, NaT, +from pandas._libs import tslib +from pandas import (Index, Series, DataFrame, isnull, Timestamp, NaT, DatetimeIndex, to_datetime) from pandas.util.testing import (assert_frame_equal, assert_series_equal, set_timezone) @@ -924,7 +925,7 @@ def test_utc_with_system_utc(self): # Skipped on win32 due to dateutil bug tm._skip_if_windows() - from pandas.tslib import maybe_get_tz + from pandas._libs.tslib import maybe_get_tz # from system utc to real utc ts = Timestamp('2001-01-05 11:56', tz=maybe_get_tz('dateutil/UTC')) diff --git a/pandas/tests/types/test_inference.py b/pandas/tests/types/test_inference.py index 629aa63f4a0ae..a36a77a70f9ad 100644 --- a/pandas/tests/types/test_inference.py +++ b/pandas/tests/types/test_inference.py @@ -13,7 +13,7 @@ import pytz import pandas as pd -from pandas import lib, tslib +from pandas._libs import tslib, lib from pandas import (Series, Index, DataFrame, Timedelta, DatetimeIndex, TimedeltaIndex, Timestamp, Panel, Period, Categorical) @@ -517,28 +517,28 @@ def test_infer_dtype_period(self): # GH 13664 arr = np.array([pd.Period('2011-01', freq='D'), pd.Period('2011-02', freq='D')]) - self.assertEqual(pd.lib.infer_dtype(arr), 'period') + self.assertEqual(lib.infer_dtype(arr), 'period') arr = np.array([pd.Period('2011-01', freq='D'), pd.Period('2011-02', freq='M')]) - self.assertEqual(pd.lib.infer_dtype(arr), 'period') + self.assertEqual(lib.infer_dtype(arr), 'period') # starts with nan for n in [pd.NaT, np.nan]: arr = np.array([n, pd.Period('2011-01', freq='D')]) - self.assertEqual(pd.lib.infer_dtype(arr), 'period') + self.assertEqual(lib.infer_dtype(arr), 'period') arr = np.array([n, pd.Period('2011-01', freq='D'), n]) - self.assertEqual(pd.lib.infer_dtype(arr), 'period') + self.assertEqual(lib.infer_dtype(arr), 'period') # different type of nat arr = np.array([np.datetime64('nat'), pd.Period('2011-01', freq='M')], dtype=object) - self.assertEqual(pd.lib.infer_dtype(arr), 'mixed') + self.assertEqual(lib.infer_dtype(arr), 'mixed') arr = np.array([pd.Period('2011-01', freq='M'), np.datetime64('nat')], dtype=object) - self.assertEqual(pd.lib.infer_dtype(arr), 'mixed') + self.assertEqual(lib.infer_dtype(arr), 'mixed') def test_infer_dtype_all_nan_nat_like(self): arr = np.array([np.nan, np.nan]) diff --git a/pandas/tests/types/test_io.py b/pandas/tests/types/test_io.py index ce8e23342bf5a..b6c10394dd232 100644 --- a/pandas/tests/types/test_io.py +++ b/pandas/tests/types/test_io.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- import numpy as np -import pandas.lib as lib +import pandas._libs.lib as lib import pandas.util.testing as tm from pandas.compat import long, u @@ -73,7 +73,7 @@ def test_convert_sql_column_decimals(self): self.assert_numpy_array_equal(result, expected) def test_convert_downcast_int64(self): - from pandas.parser import na_values + from pandas.io.libparsers import na_values arr = np.array([1, 2, 7, 8, 10], dtype=np.int64) expected = np.array([1, 2, 7, 8, 10], dtype=np.int8) diff --git a/pandas/tests/types/test_missing.py b/pandas/tests/types/test_missing.py index cab44f1122ae1..2e35f5c1badbb 100644 --- a/pandas/tests/types/test_missing.py +++ b/pandas/tests/types/test_missing.py @@ -7,7 +7,7 @@ import pandas as pd from pandas.core import config as cf from pandas.compat import u -from pandas.tslib import iNaT +from pandas._libs.tslib import iNaT from pandas import (NaT, Float64Index, Series, DatetimeIndex, TimedeltaIndex, date_range) from pandas.types.dtypes import DatetimeTZDtype diff --git a/pandas/tools/hashing.py b/pandas/tools/hashing.py index ef863510cdd87..85ceb439435ee 100644 --- a/pandas/tools/hashing.py +++ b/pandas/tools/hashing.py @@ -4,8 +4,9 @@ import itertools import numpy as np -from pandas import _hash, Series, factorize, Categorical, Index, MultiIndex -from pandas.lib import is_bool_array +from pandas import Series, factorize, Categorical, Index, MultiIndex +from pandas.tools import libhashing as _hash +from pandas._libs.lib import is_bool_array from pandas.types.generic import ABCIndexClass, ABCSeries, ABCDataFrame from pandas.types.common import (is_categorical_dtype, is_numeric_dtype, is_datetime64_dtype, is_timedelta64_dtype, diff --git a/pandas/src/hash.pyx b/pandas/tools/hashing.pyx similarity index 100% rename from pandas/src/hash.pyx rename to pandas/tools/hashing.pyx diff --git a/pandas/tools/merge.py b/pandas/tools/merge.py index ba53d42fccec7..3f1e7640ba538 100644 --- a/pandas/tools/merge.py +++ b/pandas/tools/merge.py @@ -37,9 +37,7 @@ from pandas.core.sorting import is_int64_overflow_possible import pandas.core.algorithms as algos import pandas.core.common as com - -import pandas._join as _join -import pandas.hashtable as _hash +from pandas._libs import hashtable as libhashtable, join as libjoin # back-compat of pseudo-public API @@ -1005,8 +1003,8 @@ def get_result(self): rdata.items, rsuf) if self.fill_method == 'ffill': - left_join_indexer = _join.ffill_indexer(left_indexer) - right_join_indexer = _join.ffill_indexer(right_indexer) + left_join_indexer = libjoin.ffill_indexer(left_indexer) + right_join_indexer = libjoin.ffill_indexer(right_indexer) else: left_join_indexer = left_indexer right_join_indexer = right_indexer @@ -1030,11 +1028,11 @@ def get_result(self): def _asof_function(direction, on_type): - return getattr(_join, 'asof_join_%s_%s' % (direction, on_type), None) + return getattr(libjoin, 'asof_join_%s_%s' % (direction, on_type), None) def _asof_by_function(direction, on_type, by_type): - return getattr(_join, 'asof_join_%s_%s_by_%s' % + return getattr(libjoin, 'asof_join_%s_%s_by_%s' % (direction, on_type, by_type), None) @@ -1294,13 +1292,13 @@ def _get_multiindex_indexer(join_keys, index, sort): # factorize keys to a dense i8 space lkey, rkey, count = fkeys(lkey, rkey) - return _join.left_outer_join(lkey, rkey, count, sort=sort) + return libjoin.left_outer_join(lkey, rkey, count, sort=sort) def _get_single_indexer(join_key, index, sort=False): left_key, right_key, count = _factorize_keys(join_key, index, sort=sort) - left_indexer, right_indexer = _join.left_outer_join( + left_indexer, right_indexer = libjoin.left_outer_join( _ensure_int64(left_key), _ensure_int64(right_key), count, sort=sort) @@ -1335,15 +1333,15 @@ def _left_join_on_index(left_ax, right_ax, join_keys, sort=False): def _right_outer_join(x, y, max_groups): - right_indexer, left_indexer = _join.left_outer_join(y, x, max_groups) + right_indexer, left_indexer = libjoin.left_outer_join(y, x, max_groups) return left_indexer, right_indexer _join_functions = { - 'inner': _join.inner_join, - 'left': _join.left_outer_join, + 'inner': libjoin.inner_join, + 'left': libjoin.left_outer_join, 'right': _right_outer_join, - 'outer': _join.full_outer_join, + 'outer': libjoin.full_outer_join, } @@ -1352,11 +1350,11 @@ def _factorize_keys(lk, rk, sort=True): lk = lk.values rk = rk.values if is_int_or_datetime_dtype(lk) and is_int_or_datetime_dtype(rk): - klass = _hash.Int64Factorizer + klass = libhashtable.Int64Factorizer lk = _ensure_int64(com._values_from_object(lk)) rk = _ensure_int64(com._values_from_object(rk)) else: - klass = _hash.Factorizer + klass = libhashtable.Factorizer lk = _ensure_object(lk) rk = _ensure_object(rk) diff --git a/pandas/tools/tile.py b/pandas/tools/tile.py index feb4d4bfd5044..9b21e542f153c 100644 --- a/pandas/tools/tile.py +++ b/pandas/tools/tile.py @@ -13,7 +13,7 @@ from pandas.compat import zip from pandas import to_timedelta, to_datetime from pandas.types.common import is_datetime64_dtype, is_timedelta64_dtype -from pandas.lib import infer_dtype +from pandas._libs.lib import infer_dtype import numpy as np diff --git a/pandas/tools/util.py b/pandas/tools/util.py index 8ec074fbf5950..bf78a9dfb65cc 100644 --- a/pandas/tools/util.py +++ b/pandas/tools/util.py @@ -1,5 +1,5 @@ import numpy as np -import pandas.lib as lib +import pandas._libs.lib as lib from pandas.types.common import (is_number, is_numeric_dtype, diff --git a/pandas/tseries/api.py b/pandas/tseries/api.py index 9a07983b4d951..a00ccf99e1b96 100644 --- a/pandas/tseries/api.py +++ b/pandas/tseries/api.py @@ -10,5 +10,5 @@ from pandas.tseries.period import Period, PeriodIndex, period_range, pnow from pandas.tseries.resample import TimeGrouper from pandas.tseries.timedeltas import to_timedelta -from pandas.lib import NaT +from pandas._libs.lib import NaT import pandas.tseries.offsets as offsets diff --git a/pandas/tseries/base.py b/pandas/tseries/base.py index 2e22c35868cb3..ae40c2f66a590 100644 --- a/pandas/tseries/base.py +++ b/pandas/tseries/base.py @@ -21,9 +21,10 @@ from pandas.core.common import AbstractMethodError import pandas.formats.printing as printing -import pandas.tslib as tslib -import pandas._period as prlib -import pandas.lib as lib +from pandas._libs import (tslib as libts, lib, + Timedelta, Timestamp, iNaT, NaT) +from pandas._libs.period import Period + from pandas.core.index import Index from pandas.indexes.base import _index_shared_docs from pandas.util.decorators import Appender, cache_readonly @@ -94,7 +95,8 @@ def _round(self, freq, rounder): result = (unit * rounder(values / float(unit)).astype('i8')) else: result = (unit * rounder(values / float(unit)).astype('i8')) - result = self._maybe_mask_results(result, fill_value=tslib.NaT) + result = self._maybe_mask_results(result, fill_value=NaT) + attribs = self._get_attributes_dict() if 'freq' in attribs: attribs['freq'] = None @@ -196,7 +198,7 @@ def _evaluate_compare(self, other, op): result[mask] = False return result try: - result[mask] = tslib.iNaT + result[mask] = iNaT return Index(result) except TypeError: return result @@ -327,7 +329,7 @@ def _nat_new(self, box=True): - If False returns ndarray of np.int64. """ result = np.zeros(len(self), dtype=np.int64) - result.fill(tslib.iNaT) + result.fill(iNaT) if not box: return result @@ -392,7 +394,7 @@ def take(self, indices, axis=0, allow_fill=True, taken = self._assert_take_fillable(self.asi8, indices, allow_fill=allow_fill, fill_value=fill_value, - na_value=tslib.iNaT) + na_value=iNaT) # keep freq in PeriodIndex, reset otherwise freq = self.freq if isinstance(self, ABCPeriodIndex) else None @@ -404,13 +406,13 @@ def get_duplicates(self): _can_hold_na = True - _na_value = tslib.NaT + _na_value = NaT """The expected NA value to use with this index.""" @cache_readonly def _isnan(self): """ return if each value is nan""" - return (self.asi8 == tslib.iNaT) + return (self.asi8 == iNaT) @property def asobject(self): @@ -424,7 +426,7 @@ def asobject(self): def _convert_tolerance(self, tolerance): try: - return tslib.Timedelta(tolerance).to_timedelta64() + return Timedelta(tolerance).to_timedelta64() except ValueError: raise ValueError('tolerance argument for %s must be convertible ' 'to Timedelta: %r' @@ -477,7 +479,7 @@ def min(self, axis=None, *args, **kwargs): # quick check if len(i8) and self.is_monotonic: - if i8[0] != tslib.iNaT: + if i8[0] != iNaT: return self._box_func(i8[0]) if self.hasnans: @@ -525,7 +527,7 @@ def max(self, axis=None, *args, **kwargs): # quick check if len(i8) and self.is_monotonic: - if i8[-1] != tslib.iNaT: + if i8[-1] != iNaT: return self._box_func(i8[-1]) if self.hasnans: @@ -643,11 +645,11 @@ def __add__(self, other): .format(typ1=type(self).__name__, typ2=type(other).__name__)) elif isinstance(other, (DateOffset, timedelta, np.timedelta64, - tslib.Timedelta)): + Timedelta)): return self._add_delta(other) elif is_integer(other): return self.shift(other) - elif isinstance(other, (tslib.Timestamp, datetime)): + elif isinstance(other, (Timestamp, datetime)): return self._add_datelike(other) else: # pragma: no cover return NotImplemented @@ -673,13 +675,13 @@ def __sub__(self, other): .format(typ1=type(self).__name__, typ2=type(other).__name__)) elif isinstance(other, (DateOffset, timedelta, np.timedelta64, - tslib.Timedelta)): + Timedelta)): return self._add_delta(-other) elif is_integer(other): return self.shift(-other) - elif isinstance(other, (tslib.Timestamp, datetime)): + elif isinstance(other, (Timestamp, datetime)): return self._sub_datelike(other) - elif isinstance(other, prlib.Period): + elif isinstance(other, Period): return self._sub_period(other) else: # pragma: no cover return NotImplemented @@ -699,11 +701,11 @@ def _add_delta_td(self, other): # add a delta of a timedeltalike # return the i8 result view - inc = tslib._delta_to_nanoseconds(other) + inc = libts._delta_to_nanoseconds(other) new_values = checked_add_with_arr(self.asi8, inc, arr_mask=self._isnan).view('i8') if self.hasnans: - new_values[self._isnan] = tslib.iNaT + new_values[self._isnan] = iNaT return new_values.view('i8') def _add_delta_tdi(self, other): @@ -721,7 +723,7 @@ def _add_delta_tdi(self, other): b_mask=other._isnan) if self.hasnans or other.hasnans: mask = (self._isnan) | (other._isnan) - new_values[mask] = tslib.iNaT + new_values[mask] = iNaT return new_values.view(self.dtype) def isin(self, values): @@ -849,7 +851,7 @@ def _append_same_dtype(self, to_concat, name): def _ensure_datetimelike_to_i8(other): """ helper for coercing an input scalar or array to i8 """ if lib.isscalar(other) and isnull(other): - other = tslib.iNaT + other = iNaT elif isinstance(other, ABCIndexClass): # convert tz if needed if getattr(other, 'tz', None) is not None: diff --git a/pandas/tseries/common.py b/pandas/tseries/common.py index 46e8bd43e8ff8..82fcdbcd0d367 100644 --- a/pandas/tseries/common.py +++ b/pandas/tseries/common.py @@ -13,10 +13,9 @@ from pandas.core.base import PandasDelegate, NoNewAttributesMixin from pandas.tseries.index import DatetimeIndex -from pandas._period import IncompatibleFrequency # flake8: noqa +from pandas._libs.period import IncompatibleFrequency # flake8: noqa from pandas.tseries.period import PeriodIndex from pandas.tseries.tdi import TimedeltaIndex -from pandas import tslib from pandas.core.algorithms import take_1d diff --git a/pandas/tseries/converter.py b/pandas/tseries/converter.py index db7049ebc89b3..1f99e88ce86d6 100644 --- a/pandas/tseries/converter.py +++ b/pandas/tseries/converter.py @@ -20,7 +20,7 @@ from pandas.compat import lrange import pandas.compat as compat -import pandas.lib as lib +import pandas._libs.lib as lib import pandas.core.common as com from pandas.core.index import Index diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index 957a934d13f09..8013947babc5a 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -17,9 +17,9 @@ from pandas.tseries.offsets import DateOffset from pandas.util.decorators import cache_readonly, deprecate_kwarg import pandas.tseries.offsets as offsets -import pandas.lib as lib -import pandas.tslib as tslib -from pandas.tslib import Timedelta + +from pandas._libs import lib, tslib +from pandas._libs.tslib import Timedelta from pytz import AmbiguousTimeError diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py index 5f00e8b648689..f80618ef34373 100644 --- a/pandas/tseries/index.py +++ b/pandas/tseries/index.py @@ -44,13 +44,9 @@ import pandas.tseries.offsets as offsets import pandas.tseries.tools as tools -from pandas.lib import Timestamp -import pandas.lib as lib -import pandas.tslib as tslib -import pandas._period as period -import pandas._join as _join -import pandas.algos as _algos -import pandas.index as _index +from pandas._libs import (lib, index as libindex, tslib as libts, + algos as libalgos, join as libjoin, + Timestamp, period as libperiod) def _utc(): @@ -75,16 +71,16 @@ def f(self): self.freq.kwds.get('month', 12)) if self.freq else 12) - result = tslib.get_start_end_field(values, field, self.freqstr, + result = libts.get_start_end_field(values, field, self.freqstr, month_kw) elif field in ['weekday_name']: - result = tslib.get_date_name_field(values, field) + result = libts.get_date_name_field(values, field) return self._maybe_mask_results(result) elif field in ['is_leap_year']: # no need to mask NaT - return tslib.get_date_field(values, field) + return libts.get_date_field(values, field) else: - result = tslib.get_date_field(values, field) + result = libts.get_date_field(values, field) return self._maybe_mask_results(result, convert='float64') @@ -115,9 +111,9 @@ def wrapper(self, other): result = _values_from_object(result) if isinstance(other, Index): - o_mask = other.values.view('i8') == tslib.iNaT + o_mask = other.values.view('i8') == libts.iNaT else: - o_mask = other.view('i8') == tslib.iNaT + o_mask = other.view('i8') == libts.iNaT if o_mask.any(): result[o_mask] = nat_result @@ -211,11 +207,11 @@ def _join_i8_wrapper(joinf, **kwargs): return DatetimeIndexOpsMixin._join_i8_wrapper(joinf, dtype='M8[ns]', **kwargs) - _inner_indexer = _join_i8_wrapper(_join.inner_join_indexer_int64) - _outer_indexer = _join_i8_wrapper(_join.outer_join_indexer_int64) - _left_indexer = _join_i8_wrapper(_join.left_join_indexer_int64) + _inner_indexer = _join_i8_wrapper(libjoin.inner_join_indexer_int64) + _outer_indexer = _join_i8_wrapper(libjoin.outer_join_indexer_int64) + _left_indexer = _join_i8_wrapper(libjoin.left_join_indexer_int64) _left_indexer_unique = _join_i8_wrapper( - _join.left_join_indexer_unique_int64, with_indexers=False) + libjoin.left_join_indexer_unique_int64, with_indexers=False) _arrmap = None __eq__ = _dt_index_cmp('__eq__') @@ -225,7 +221,7 @@ def _join_i8_wrapper(joinf, **kwargs): __le__ = _dt_index_cmp('__le__') __ge__ = _dt_index_cmp('__ge__') - _engine_type = _index.DatetimeEngine + _engine_type = libindex.DatetimeEngine tz = None offset = None @@ -340,7 +336,7 @@ def __new__(cls, data=None, verify_integrity = False else: if data.dtype != _NS_DTYPE: - subarr = tslib.cast_to_nanoseconds(data) + subarr = libts.cast_to_nanoseconds(data) else: subarr = data else: @@ -356,13 +352,13 @@ def __new__(cls, data=None, tz = subarr.tz else: if tz is not None: - tz = tslib.maybe_get_tz(tz) + tz = libts.maybe_get_tz(tz) if (not isinstance(data, DatetimeIndex) or getattr(data, 'tz', None) is None): # Convert tz-naive to UTC ints = subarr.view('i8') - subarr = tslib.tz_localize_to_utc(ints, tz, + subarr = libts.tz_localize_to_utc(ints, tz, ambiguous=ambiguous) subarr = subarr.view(_NS_DTYPE) @@ -430,17 +426,17 @@ def _generate(cls, start, end, periods, name, offset, raise TypeError('Start and end cannot both be tz-aware with ' 'different timezones') - inferred_tz = tslib.maybe_get_tz(inferred_tz) + inferred_tz = libts.maybe_get_tz(inferred_tz) # these may need to be localized - tz = tslib.maybe_get_tz(tz) + tz = libts.maybe_get_tz(tz) if tz is not None: date = start or end if date.tzinfo is not None and hasattr(tz, 'localize'): tz = tz.localize(date.replace(tzinfo=None)).tzinfo if tz is not None and inferred_tz is not None: - if not tslib.get_timezone(inferred_tz) == tslib.get_timezone(tz): + if not libts.get_timezone(inferred_tz) == libts.get_timezone(tz): raise AssertionError("Inferred time zone not equal to passed " "time zone") @@ -507,7 +503,7 @@ def _generate(cls, start, end, periods, name, offset, index = _generate_regular_range(start, end, periods, offset) if tz is not None and getattr(index, 'tz', None) is None: - index = tslib.tz_localize_to_utc(_ensure_int64(index), tz, + index = libts.tz_localize_to_utc(_ensure_int64(index), tz, ambiguous=ambiguous) index = index.view(_NS_DTYPE) @@ -539,11 +535,11 @@ def _local_timestamps(self): utc = _utc() if self.is_monotonic: - return tslib.tz_convert(self.asi8, utc, self.tz) + return libts.tz_convert(self.asi8, utc, self.tz) else: values = self.asi8 indexer = values.argsort() - result = tslib.tz_convert(values.take(indexer), utc, self.tz) + result = libts.tz_convert(values.take(indexer), utc, self.tz) n = len(indexer) reverse = np.empty(n, dtype=np.int_) @@ -576,7 +572,7 @@ def _simple_new(cls, values, name=None, freq=None, tz=None, result._data = values result.name = name result.offset = freq - result.tz = tslib.maybe_get_tz(tz) + result.tz = libts.maybe_get_tz(tz) result._reset_identity() return result @@ -590,7 +586,7 @@ def tzinfo(self): @cache_readonly def _timezone(self): """ Comparable timezone both for pytz / dateutil""" - return tslib.get_timezone(self.tzinfo) + return libts.get_timezone(self.tzinfo) def _has_same_tz(self, other): zzone = self._timezone @@ -599,7 +595,7 @@ def _has_same_tz(self, other): if isinstance(other, np.datetime64): # convert to Timestamp as np.datetime64 doesn't have tz attr other = Timestamp(other) - vzone = tslib.get_timezone(getattr(other, 'tzinfo', '__no_tz__')) + vzone = libts.get_timezone(getattr(other, 'tzinfo', '__no_tz__')) return zzone == vzone @classmethod @@ -671,7 +667,7 @@ def _cached_range(cls, start=None, end=None, periods=None, offset=None, def _mpl_repr(self): # how to represent ourselves to matplotlib - return tslib.ints_to_pydatetime(self.asi8, self.tz) + return libts.ints_to_pydatetime(self.asi8, self.tz) @cache_readonly def _is_dates_only(self): @@ -728,7 +724,7 @@ def __setstate__(self, state): def _add_datelike(self, other): # adding a timedeltaindex to a datetimelike - if other is tslib.NaT: + if other is libts.NaT: return self._nat_new(box=True) raise TypeError("cannot add a datelike to a DatetimeIndex") @@ -741,9 +737,9 @@ def _sub_datelike(self, other): raise TypeError("DatetimeIndex subtraction must have the same " "timezones or no timezones") result = self._sub_datelike_dti(other) - elif isinstance(other, (tslib.Timestamp, datetime)): + elif isinstance(other, (libts.Timestamp, datetime)): other = Timestamp(other) - if other is tslib.NaT: + if other is libts.NaT: result = self._nat_new(box=False) # require tz compat elif not self._has_same_tz(other): @@ -753,7 +749,7 @@ def _sub_datelike(self, other): i8 = self.asi8 result = i8 - other.value result = self._maybe_mask_results(result, - fill_value=tslib.iNaT) + fill_value=libts.iNaT) else: raise TypeError("cannot subtract DatetimeIndex and {typ}" .format(typ=type(other).__name__)) @@ -769,7 +765,7 @@ def _sub_datelike_dti(self, other): new_values = self_i8 - other_i8 if self.hasnans or other.hasnans: mask = (self._isnan) | (other._isnan) - new_values[mask] = tslib.iNaT + new_values[mask] = libts.iNaT return new_values.view('i8') def _maybe_update_attributes(self, attrs): @@ -822,7 +818,7 @@ def _format_native_types(self, na_rep='NaT', date_format=None, **kwargs): from pandas.formats.format import _get_format_datetime64_from_values format = _get_format_datetime64_from_values(self, date_format) - return tslib.format_array_from_datetime(self.asi8, + return libts.format_array_from_datetime(self.asi8, tz=self.tz, format=format, na_rep=na_rep) @@ -855,7 +851,7 @@ def _get_time_micros(self): values = self.asi8 if self.tz is not None and self.tz is not utc: values = self._local_timestamps() - return tslib.get_time_micros(values) + return libts.get_time_micros(values) def to_series(self, keep_tz=False): """ @@ -908,7 +904,7 @@ def to_pydatetime(self): ------- datetimes : ndarray """ - return tslib.ints_to_pydatetime(self.asi8, tz=self.tz) + return libts.ints_to_pydatetime(self.asi8, tz=self.tz) def to_period(self, freq=None): """ @@ -1160,7 +1156,7 @@ def __iter__(self): for i in range(chunks): start_i = i * chunksize end_i = min((i + 1) * chunksize, l) - converted = tslib.ints_to_pydatetime(data[start_i:end_i], + converted = libts.ints_to_pydatetime(data[start_i:end_i], tz=self.tz, freq=self.freq, box=True) for v in converted: @@ -1248,14 +1244,14 @@ def _parsed_string_to_bounds(self, reso, parsed): Timestamp(datetime(parsed.year, 12, 31, 23, 59, 59, 999999), tz=self.tz)) elif reso == 'month': - d = tslib.monthrange(parsed.year, parsed.month)[1] + d = libts.monthrange(parsed.year, parsed.month)[1] return (Timestamp(datetime(parsed.year, parsed.month, 1), tz=self.tz), Timestamp(datetime(parsed.year, parsed.month, d, 23, 59, 59, 999999), tz=self.tz)) elif reso == 'quarter': qe = (((parsed.month - 1) + 2) % 12) + 1 # two months ahead - d = tslib.monthrange(parsed.year, qe)[1] # at end of month + d = libts.monthrange(parsed.year, qe)[1] # at end of month return (Timestamp(datetime(parsed.year, parsed.month, 1), tz=self.tz), Timestamp(datetime(parsed.year, qe, d, 23, 59, @@ -1594,9 +1590,9 @@ def time(self): """ Returns numpy array of datetime.time. The time part of the Timestamps. """ - return self._maybe_mask_results(_algos.arrmap_object( + return self._maybe_mask_results(libalgos.arrmap_object( self.asobject.values, - lambda x: np.nan if x is tslib.NaT else x.time())) + lambda x: np.nan if x is libts.NaT else x.time())) @property def date(self): @@ -1604,7 +1600,7 @@ def date(self): Returns numpy array of python datetime.date objects (namely, the date part of Timestamps without timezone information). """ - return self._maybe_mask_results(_algos.arrmap_object( + return self._maybe_mask_results(libalgos.arrmap_object( self.asobject.values, lambda x: x.date())) def normalize(self): @@ -1615,7 +1611,7 @@ def normalize(self): ------- normalized : DatetimeIndex """ - new_values = tslib.date_normalize(self.asi8, self.tz) + new_values = libts.date_normalize(self.asi8, self.tz) return DatetimeIndex(new_values, freq='infer', name=self.name, tz=self.tz) @@ -1654,11 +1650,11 @@ def is_normalized(self): """ Returns True if all of the dates are at midnight ("no time") """ - return tslib.dates_normalized(self.asi8, self.tz) + return libts.dates_normalized(self.asi8, self.tz) @cache_readonly def _resolution(self): - return period.resolution(self.asi8, self.tz) + return libperiod.resolution(self.asi8, self.tz) def insert(self, loc, item): """ @@ -1695,7 +1691,7 @@ def insert(self, loc, item): new_dates = np.concatenate((self[:loc].asi8, [item.view(np.int64)], self[loc:].asi8)) if self.tz is not None: - new_dates = tslib.tz_convert(new_dates, 'UTC', self.tz) + new_dates = libts.tz_convert(new_dates, 'UTC', self.tz) return DatetimeIndex(new_dates, name=self.name, freq=freq, tz=self.tz) @@ -1735,7 +1731,7 @@ def delete(self, loc): freq = self.freq if self.tz is not None: - new_dates = tslib.tz_convert(new_dates, 'UTC', self.tz) + new_dates = libts.tz_convert(new_dates, 'UTC', self.tz) return DatetimeIndex(new_dates, name=self.name, freq=freq, tz=self.tz) def tz_convert(self, tz): @@ -1759,7 +1755,7 @@ def tz_convert(self, tz): TypeError If DatetimeIndex is tz-naive. """ - tz = tslib.maybe_get_tz(tz) + tz = libts.maybe_get_tz(tz) if self.tz is None: # tz naive, use tz_localize @@ -1814,14 +1810,14 @@ def tz_localize(self, tz, ambiguous='raise', errors='raise'): """ if self.tz is not None: if tz is None: - new_dates = tslib.tz_convert(self.asi8, 'UTC', self.tz) + new_dates = libts.tz_convert(self.asi8, 'UTC', self.tz) else: raise TypeError("Already tz-aware, use tz_convert to convert.") else: - tz = tslib.maybe_get_tz(tz) + tz = libts.maybe_get_tz(tz) # Convert to UTC - new_dates = tslib.tz_localize_to_utc(self.asi8, tz, + new_dates = libts.tz_localize_to_utc(self.asi8, tz, ambiguous=ambiguous, errors=errors) new_dates = new_dates.view(_NS_DTYPE) @@ -2134,7 +2130,7 @@ def _to_m8(key, tz=None): # this also converts strings key = Timestamp(key, tz=tz) - return np.int64(tslib.pydt_to_i8(key)).view(_NS_DTYPE) + return np.int64(libts.pydt_to_i8(key)).view(_NS_DTYPE) _CACHE_START = Timestamp(datetime(1950, 1, 1)) diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index 79227f6de90a5..2b6a684fc39dd 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -10,8 +10,7 @@ # import after tools, dateutil check from dateutil.relativedelta import relativedelta, weekday from dateutil.easter import easter -import pandas.tslib as tslib -from pandas.tslib import Timestamp, OutOfBoundsDatetime, Timedelta +from pandas._libs import tslib, Timestamp, OutOfBoundsDatetime, Timedelta import functools import operator diff --git a/pandas/tseries/period.py b/pandas/tseries/period.py index bfe7724a1cfaa..f7e9ba9eaa9b1 100644 --- a/pandas/tseries/period.py +++ b/pandas/tseries/period.py @@ -29,10 +29,11 @@ from pandas.tseries.tools import parse_time_string import pandas.tseries.offsets as offsets -import pandas._period as period -from pandas._period import (Period, IncompatibleFrequency, - get_period_field_arr, _validate_end_alias, - _quarter_to_myear) +from pandas._libs.lib import infer_dtype +from pandas._libs import tslib, period +from pandas._libs.period import (Period, IncompatibleFrequency, + get_period_field_arr, _validate_end_alias, + _quarter_to_myear) from pandas.core.base import _shared_docs from pandas.indexes.base import _index_shared_docs, _ensure_index @@ -40,9 +41,8 @@ from pandas import compat from pandas.util.decorators import (Appender, Substitution, cache_readonly, deprecate_kwarg) -from pandas.lib import infer_dtype -import pandas.tslib as tslib from pandas.compat import zip, u + import pandas.indexes.base as ibase _index_doc_kwargs = dict(ibase._index_doc_kwargs) _index_doc_kwargs.update( diff --git a/pandas/tseries/resample.py b/pandas/tseries/resample.py index 21d7dc0c177b6..2856b54ad9a8c 100755 --- a/pandas/tseries/resample.py +++ b/pandas/tseries/resample.py @@ -20,10 +20,9 @@ import pandas.compat as compat from pandas.compat.numpy import function as nv -from pandas.lib import Timestamp -from pandas._period import IncompatibleFrequency -import pandas.lib as lib -import pandas.tslib as tslib +from pandas._libs import lib, tslib +from pandas._libs.lib import Timestamp +from pandas._libs.period import IncompatibleFrequency from pandas.util.decorators import Appender from pandas.core.generic import _shared_docs diff --git a/pandas/tseries/tdi.py b/pandas/tseries/tdi.py index c62e3fc40d4af..f47d80a31b174 100644 --- a/pandas/tseries/tdi.py +++ b/pandas/tseries/tdi.py @@ -30,13 +30,8 @@ from pandas.tseries.timedeltas import (to_timedelta, _coerce_scalar_to_timedelta_type) from pandas.tseries.offsets import Tick, DateOffset - -import pandas.lib as lib -import pandas.tslib as tslib -import pandas._join as _join -import pandas.index as _index - -Timedelta = tslib.Timedelta +from pandas._libs import (lib, index as libindex, tslib as libts, + join as libjoin, Timedelta, NaT, iNaT) def _td_index_cmp(opname, nat_result=False): @@ -47,7 +42,7 @@ def _td_index_cmp(opname, nat_result=False): def wrapper(self, other): msg = "cannot compare a TimedeltaIndex with type {0}" func = getattr(super(TimedeltaIndex, self), opname) - if _is_convertible_to_td(other) or other is tslib.NaT: + if _is_convertible_to_td(other) or other is NaT: try: other = _to_m8(other) except ValueError: @@ -65,9 +60,9 @@ def wrapper(self, other): result = _values_from_object(result) if isinstance(other, Index): - o_mask = other.values.view('i8') == tslib.iNaT + o_mask = other.values.view('i8') == iNaT else: - o_mask = other.view('i8') == tslib.iNaT + o_mask = other.view('i8') == iNaT if o_mask.any(): result[o_mask] = nat_result @@ -126,11 +121,11 @@ def _join_i8_wrapper(joinf, **kwargs): return DatetimeIndexOpsMixin._join_i8_wrapper( joinf, dtype='m8[ns]', **kwargs) - _inner_indexer = _join_i8_wrapper(_join.inner_join_indexer_int64) - _outer_indexer = _join_i8_wrapper(_join.outer_join_indexer_int64) - _left_indexer = _join_i8_wrapper(_join.left_join_indexer_int64) + _inner_indexer = _join_i8_wrapper(libjoin.inner_join_indexer_int64) + _outer_indexer = _join_i8_wrapper(libjoin.outer_join_indexer_int64) + _left_indexer = _join_i8_wrapper(libjoin.left_join_indexer_int64) _left_indexer_unique = _join_i8_wrapper( - _join.left_join_indexer_unique_int64, with_indexers=False) + libjoin.left_join_indexer_unique_int64, with_indexers=False) _arrmap = None _datetimelike_ops = ['days', 'seconds', 'microseconds', 'nanoseconds', 'freq', 'components'] @@ -142,7 +137,7 @@ def _join_i8_wrapper(joinf, **kwargs): __le__ = _td_index_cmp('__le__') __ge__ = _td_index_cmp('__ge__') - _engine_type = _index.TimedeltaEngine + _engine_type = libindex.TimedeltaEngine _comparables = ['name', 'freq'] _attributes = ['name', 'freq'] @@ -274,7 +269,7 @@ def _box_func(self): def _simple_new(cls, values, name=None, freq=None, **kwargs): values = np.array(values, copy=False) if values.dtype == np.object_: - values = tslib.array_to_timedelta64(values) + values = libts.array_to_timedelta64(values) if values.dtype != _TD_DTYPE: values = _ensure_int64(values).view(_TD_DTYPE) @@ -341,18 +336,18 @@ def _evaluate_with_timedelta_like(self, other, op, opstr): def _add_datelike(self, other): # adding a timedeltaindex to a datetimelike from pandas import Timestamp, DatetimeIndex - if other is tslib.NaT: + if other is NaT: result = self._nat_new(box=False) else: other = Timestamp(other) i8 = self.asi8 result = checked_add_with_arr(i8, other.value) - result = self._maybe_mask_results(result, fill_value=tslib.iNaT) + result = self._maybe_mask_results(result, fill_value=iNaT) return DatetimeIndex(result, name=self.name, copy=False) def _sub_datelike(self, other): from pandas import DatetimeIndex - if other is tslib.NaT: + if other is NaT: result = self._nat_new(box=False) else: raise TypeError("cannot subtract a datelike from a TimedeltaIndex") @@ -452,7 +447,7 @@ def to_pytimedelta(self): ------- datetimes : ndarray """ - return tslib.ints_to_pytimedelta(self.asi8) + return libts.ints_to_pytimedelta(self.asi8) @Appender(_index_shared_docs['astype']) def astype(self, dtype, copy=True): @@ -677,7 +672,7 @@ def get_loc(self, key, method=None, tolerance=None): raise TypeError if isnull(key): - key = tslib.NaT + key = NaT if tolerance is not None: # try converting tolerance now, so errors don't get swallowed by @@ -736,7 +731,7 @@ def _maybe_cast_slice_bound(self, label, side, kind): def _get_string_slice(self, key, use_lhs=True, use_rhs=True): freq = getattr(self, 'freqstr', getattr(self, 'inferred_freq', None)) - if is_integer(key) or is_float(key) or key is tslib.NaT: + if is_integer(key) or is_float(key) or key is NaT: self._invalid_indexer('slice', key) loc = self._partial_td_slice(key, freq, use_lhs=use_lhs, use_rhs=use_rhs) @@ -837,7 +832,7 @@ def insert(self, loc, item): pass freq = None - if isinstance(item, (Timedelta, tslib.NaTType)): + if isinstance(item, (Timedelta, libts.NaTType)): # check freq can be preserved on edge cases if self.freq is not None: diff --git a/pandas/tseries/timedeltas.py b/pandas/tseries/timedeltas.py index 5a5d1533bfa91..ead602ee80e32 100644 --- a/pandas/tseries/timedeltas.py +++ b/pandas/tseries/timedeltas.py @@ -4,7 +4,7 @@ import numpy as np import pandas as pd -import pandas.tslib as tslib +import pandas._libs.tslib as tslib from pandas.types.common import (_ensure_object, is_integer_dtype, diff --git a/pandas/tseries/tools.py b/pandas/tseries/tools.py index f746409aadfc9..093331e861fa7 100644 --- a/pandas/tseries/tools.py +++ b/pandas/tseries/tools.py @@ -2,8 +2,7 @@ import numpy as np from collections import MutableMapping -import pandas.lib as lib -import pandas.tslib as tslib +from pandas._libs import lib, tslib from pandas.types.common import (_ensure_object, is_datetime64_ns_dtype, diff --git a/pandas/tslib.py b/pandas/tslib.py new file mode 100644 index 0000000000000..3ecbffa20700d --- /dev/null +++ b/pandas/tslib.py @@ -0,0 +1,8 @@ +# flake8: noqa + +import warnings +warnings.warn("The pandas.tslib module is deprecated and will be " + "removed in a future version. Please import from " + "the pandas._libs.tslib instead", FutureWarning, stacklevel=2) +from pandas._libs.tslib import (Timestamp, Timedelta, + NaT, OutOfBoundsDatetime) diff --git a/pandas/types/cast.py b/pandas/types/cast.py index 8cc3fe41f73c8..1cd55274b9b49 100644 --- a/pandas/types/cast.py +++ b/pandas/types/cast.py @@ -2,8 +2,8 @@ from datetime import datetime, timedelta import numpy as np -from pandas import lib, tslib -from pandas.tslib import iNaT +from pandas._libs import tslib, lib +from pandas._libs.tslib import iNaT from pandas.compat import string_types, text_type, PY3 from .common import (_ensure_object, is_bool, is_integer, is_float, is_complex, is_datetimetz, is_categorical_dtype, @@ -807,14 +807,14 @@ def _possibly_cast_to_datetime(value, dtype, errors='raise'): "dtype [%s]" % dtype) if is_scalar(value): - if value == tslib.iNaT or isnull(value): - value = tslib.iNaT + if value == iNaT or isnull(value): + value = iNaT else: value = np.array(value, copy=False) # have a scalar array-like (e.g. NaT) if value.ndim == 0: - value = tslib.iNaT + value = iNaT # we have an array of datetime or timedeltas & nulls elif np.prod(value.shape) or not is_dtype_equal(value.dtype, diff --git a/pandas/types/common.py b/pandas/types/common.py index e58e0826ea49a..1be5b5f6f1368 100644 --- a/pandas/types/common.py +++ b/pandas/types/common.py @@ -3,7 +3,7 @@ import numpy as np from pandas.compat import (string_types, text_type, binary_type, PY3, PY36) -from pandas import lib, algos +from pandas._libs import algos, lib from .dtypes import (CategoricalDtype, CategoricalDtypeType, DatetimeTZDtype, DatetimeTZDtypeType, PeriodDtype, PeriodDtypeType, diff --git a/pandas/types/concat.py b/pandas/types/concat.py index 9e47a97dd621a..b098bbb75d984 100644 --- a/pandas/types/concat.py +++ b/pandas/types/concat.py @@ -3,7 +3,7 @@ """ import numpy as np -import pandas.tslib as tslib +import pandas._libs.tslib as tslib from pandas import compat from pandas.core.algorithms import take_1d from .common import (is_categorical_dtype, diff --git a/pandas/types/inference.py b/pandas/types/inference.py index d2a2924b27659..d8e3b3ee7329b 100644 --- a/pandas/types/inference.py +++ b/pandas/types/inference.py @@ -6,7 +6,7 @@ from numbers import Number from pandas.compat import (string_types, text_type, string_and_binary_types) -from pandas import lib +from pandas._libs import lib is_bool = lib.is_bool diff --git a/pandas/types/missing.py b/pandas/types/missing.py index e6791b79bf3bd..cc8b5edc27542 100644 --- a/pandas/types/missing.py +++ b/pandas/types/missing.py @@ -2,8 +2,8 @@ missing types & inference """ import numpy as np -from pandas import lib -from pandas.tslib import NaT, iNaT +from pandas._libs import lib +from pandas._libs.tslib import NaT, iNaT from .generic import (ABCMultiIndex, ABCSeries, ABCIndexClass, ABCGeneric) from .common import (is_string_dtype, is_datetimelike, diff --git a/pandas/util/decorators.py b/pandas/util/decorators.py index 62ff6ef14418a..4e1719958e8b7 100644 --- a/pandas/util/decorators.py +++ b/pandas/util/decorators.py @@ -1,5 +1,5 @@ from pandas.compat import StringIO, callable, signature -from pandas.lib import cache_readonly # noqa +from pandas._libs.lib import cache_readonly # noqa import types import sys import warnings diff --git a/pandas/util/depr_module.py b/pandas/util/depr_module.py index cf8b0f7960f17..b181c4627b1e1 100644 --- a/pandas/util/depr_module.py +++ b/pandas/util/depr_module.py @@ -13,12 +13,15 @@ class _DeprecatedModule(object): Parameters ---------- deprmod : name of module to be deprecated. + deprmodto : name of module as a replacement, optional + if not givent will __module__ removals : objects or methods in module that will no longer be accessible once module is removed. """ - def __init__(self, deprmod, removals=None): + def __init__(self, deprmod, deprmodto=None, removals=None): self.deprmod = deprmod + self.deprmodto = deprmodto self.removals = removals if self.removals is not None: self.removals = frozenset(self.removals) @@ -40,7 +43,15 @@ def __getattr__(self, name): if name in self.self_dir: return object.__getattribute__(self, name) - deprmodule = self._import_deprmod() + try: + deprmodule = self._import_deprmod(self.deprmod) + except ImportError: + if self.deprmodto is None: + raise + + # a rename + deprmodule = self._import_deprmod(self.deprmodto) + obj = getattr(deprmodule, name) if self.removals is not None and name in self.removals: @@ -49,17 +60,24 @@ def __getattr__(self, name): "a future version.".format(deprmod=self.deprmod, name=name), FutureWarning, stacklevel=2) else: + deprmodto = self.deprmodto + if deprmodto is None: + deprmodto = "{modname}.{name}".format( + modname=obj.__module__, name=name) # The object is actually located in another module. warnings.warn( "{deprmod}.{name} is deprecated. Please use " - "{modname}.{name} instead.".format( - deprmod=self.deprmod, modname=obj.__module__, name=name), + "{deprmodto}.{name} instead.".format( + deprmod=self.deprmod, name=name, deprmodto=deprmodto), FutureWarning, stacklevel=2) return obj - def _import_deprmod(self): + def _import_deprmod(self, mod=None): + if mod is None: + mod = self.deprmod + with warnings.catch_warnings(): warnings.filterwarnings('ignore', category=FutureWarning) - deprmodule = importlib.import_module(self.deprmod) + deprmodule = importlib.import_module(mod) return deprmodule diff --git a/pandas/util/testing.py b/pandas/util/testing.py index c5e5df9037daa..b68bf55a347b2 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -47,7 +47,7 @@ TimedeltaIndex, PeriodIndex, RangeIndex, Index, MultiIndex, Series, DataFrame, Panel, Panel4D) from pandas.util.decorators import deprecate -from pandas import _testing +from pandas.util import libtesting from pandas.io.common import urlopen slow = pytest.mark.slow @@ -173,7 +173,7 @@ def assert_almost_equal(left, right, check_exact=False, else: obj = 'Input' assert_class_equal(left, right, obj=obj) - return _testing.assert_almost_equal( + return libtesting.assert_almost_equal( left, right, check_dtype=check_dtype, check_less_precise=check_less_precise, @@ -185,7 +185,7 @@ def assert_dict_equal(left, right, compare_keys=True): assertIsInstance(left, dict, '[dict] ') assertIsInstance(right, dict, '[dict] ') - return _testing.assert_dict_equal(left, right, compare_keys=compare_keys) + return libtesting.assert_dict_equal(left, right, compare_keys=compare_keys) def randbool(size=(), p=0.5): @@ -833,10 +833,10 @@ def _get_ilevel_values(index, level): .format(obj, np.round(diff, 5)) raise_assert_detail(obj, msg, left, right) else: - _testing.assert_almost_equal(left.values, right.values, - check_less_precise=check_less_precise, - check_dtype=exact, - obj=obj, lobj=left, robj=right) + libtesting.assert_almost_equal(left.values, right.values, + check_less_precise=check_less_precise, + check_dtype=exact, + obj=obj, lobj=left, robj=right) # metadata comparison if check_names: @@ -1213,10 +1213,10 @@ def assert_series_equal(left, right, check_dtype=True, assert_numpy_array_equal(left.get_values(), right.get_values(), check_dtype=check_dtype) else: - _testing.assert_almost_equal(left.get_values(), right.get_values(), - check_less_precise=check_less_precise, - check_dtype=check_dtype, - obj='{0}'.format(obj)) + libtesting.assert_almost_equal(left.get_values(), right.get_values(), + check_less_precise=check_less_precise, + check_dtype=check_dtype, + obj='{0}'.format(obj)) # metadata comparison if check_names: @@ -1432,8 +1432,10 @@ def assert_sp_array_equal(left, right, check_dtype=True): check_dtype=check_dtype) # SparseIndex comparison - assertIsInstance(left.sp_index, pd._sparse.SparseIndex, '[SparseIndex]') - assertIsInstance(right.sp_index, pd._sparse.SparseIndex, '[SparseIndex]') + assertIsInstance(left.sp_index, + pd.sparse.libsparse.SparseIndex, '[SparseIndex]') + assertIsInstance(right.sp_index, + pd.sparse.libsparse.SparseIndex, '[SparseIndex]') if not left.sp_index.equals(right.sp_index): raise_assert_detail('SparseArray.index', 'index are not equal', diff --git a/pandas/src/testing.pyx b/pandas/util/testing.pyx similarity index 100% rename from pandas/src/testing.pyx rename to pandas/util/testing.pyx diff --git a/scripts/bench_join.py b/scripts/bench_join.py index 1ce5c94130e85..f9d43772766d8 100644 --- a/scripts/bench_join.py +++ b/scripts/bench_join.py @@ -1,6 +1,6 @@ from pandas.compat import range, lrange import numpy as np -import pandas.lib as lib +import pandas._libs.lib as lib from pandas import * from copy import deepcopy import time diff --git a/scripts/bench_join_multi.py b/scripts/bench_join_multi.py index 7b93112b7f869..b19da6a2c47d8 100644 --- a/scripts/bench_join_multi.py +++ b/scripts/bench_join_multi.py @@ -3,7 +3,7 @@ import numpy as np from pandas.compat import zip, range, lzip from pandas.util.testing import rands -import pandas.lib as lib +import pandas._libs.lib as lib N = 100000 diff --git a/scripts/groupby_test.py b/scripts/groupby_test.py index 5acf7da7534a3..f640a6ed79503 100644 --- a/scripts/groupby_test.py +++ b/scripts/groupby_test.py @@ -5,7 +5,7 @@ from pandas import * -import pandas.lib as tseries +import pandas._libs.lib as tseries import pandas.core.groupby as gp import pandas.util.testing as tm from pandas.compat import range diff --git a/scripts/roll_median_leak.py b/scripts/roll_median_leak.py index 07161cc6499bf..03f39e2b18372 100644 --- a/scripts/roll_median_leak.py +++ b/scripts/roll_median_leak.py @@ -7,7 +7,7 @@ from vbench.api import Benchmark from pandas.util.testing import rands from pandas.compat import range -import pandas.lib as lib +import pandas._libs.lib as lib import pandas._sandbox as sbx import time diff --git a/setup.py b/setup.py index 525cbdf600c78..e257b2376060b 100755 --- a/setup.py +++ b/setup.py @@ -109,21 +109,21 @@ def is_platform_mac(): from os.path import join as pjoin -_pxipath = pjoin('pandas', 'src') _pxi_dep_template = { - 'algos': ['algos_common_helper.pxi.in', 'algos_groupby_helper.pxi.in', - 'algos_take_helper.pxi.in', 'algos_rank_helper.pxi.in'], - '_reshape': ['reshape_helper.pxi.in'], - '_join': ['join_helper.pxi.in', 'joins_func_helper.pxi.in'], - 'hashtable': ['hashtable_class_helper.pxi.in', - 'hashtable_func_helper.pxi.in'], - 'index': ['index_class_helper.pxi.in'], - '_sparse': ['sparse_op_helper.pxi.in'] + 'algos': ['_libs/algos_common_helper.pxi.in', '_libs/algos_groupby_helper.pxi.in', + '_libs/algos_take_helper.pxi.in', '_libs/algos_rank_helper.pxi.in'], + 'join': ['_libs/join_helper.pxi.in', '_libs/join_func_helper.pxi.in'], + 'reshape': ['_libs/reshape_helper.pxi.in'], + 'hashtable': ['_libs/hashtable_class_helper.pxi.in', + '_libs/hashtable_func_helper.pxi.in'], + 'index': ['_libs/index_class_helper.pxi.in'], + 'sparse': ['sparse/sparse_op_helper.pxi.in'], } + _pxifiles = [] _pxi_dep = {} for module, files in _pxi_dep_template.items(): - pxi_files = [pjoin(_pxipath, x) for x in files] + pxi_files = [pjoin('pandas', x) for x in files] _pxifiles.extend(pxi_files) _pxi_dep[module] = pxi_files @@ -261,7 +261,7 @@ def initialize_options(self): self._clean_me = [] self._clean_trees = [] - base = pjoin('pandas','src') + base = pjoin('pandas','_libs', 'src') dt = pjoin(base,'datetime') src = base util = pjoin('pandas','util') @@ -327,19 +327,19 @@ def run(self): class CheckSDist(sdist_class): """Custom sdist that ensures Cython has compiled all pyx files to c.""" - _pyxfiles = ['pandas/lib.pyx', - 'pandas/hashtable.pyx', - 'pandas/tslib.pyx', - 'pandas/index.pyx', - 'pandas/algos.pyx', - 'pandas/join.pyx', - 'pandas/window.pyx', - 'pandas/parser.pyx', - 'pandas/src/period.pyx', - 'pandas/src/sparse.pyx', - 'pandas/src/testing.pyx', - 'pandas/src/hash.pyx', - 'pandas/io/sas/saslib.pyx'] + _pyxfiles = ['pandas/_libs/lib.pyx', + 'pandas/_libs/hashtable.pyx', + 'pandas/_libs/tslib.pyx', + 'pandas/_libs/period.pyx', + 'pandas/_libs/index.pyx', + 'pandas/_libs/algos.pyx', + 'pandas/_libs/join.pyx', + 'pandas/core/window.pyx', + 'pandas/sparse/sparse.pyx', + 'pandas/util/testing.pyx', + 'pandas/tools/hash.pyx', + 'pandas/io/parsers.pyx', + 'pandas/io/sas/sas.pyx'] def initialize_options(self): sdist_class.initialize_options(self) @@ -374,6 +374,7 @@ def check_cython_extensions(self, extensions): for ext in extensions: for src in ext.sources: if not os.path.exists(src): + print("{}: -> [{}]".format(ext.name, ext.sources)) raise Exception("""Cython-generated file '%s' not found. Cython is required to compile pandas from a development branch. Please install Cython or download a release package of pandas. @@ -440,12 +441,12 @@ def srcpath(name=None, suffix='.pyx', subdir='src'): if suffix == '.pyx': lib_depends = [srcpath(f, suffix='.pyx') for f in lib_depends] - lib_depends.append('pandas/src/util.pxd') + lib_depends.append('pandas/_libs/src/util.pxd') else: lib_depends = [] plib_depends = [] -common_include = ['pandas/src/klib', 'pandas/src'] +common_include = ['pandas/_libs/src/klib', 'pandas/_libs/src'] def pxd(name): @@ -457,71 +458,70 @@ def pxd(name): else: extra_compile_args=['-Wno-unused-function'] -lib_depends = lib_depends + ['pandas/src/numpy_helper.h', - 'pandas/src/parse_helper.h'] +lib_depends = lib_depends + ['pandas/_libs/src/numpy_helper.h', + 'pandas/_libs/src/parse_helper.h'] -tseries_depends = ['pandas/src/datetime/np_datetime.h', - 'pandas/src/datetime/np_datetime_strings.h', - 'pandas/src/datetime_helper.h', - 'pandas/src/period_helper.h', - 'pandas/src/datetime.pxd'] +tseries_depends = ['pandas/_libs/src/datetime/np_datetime.h', + 'pandas/_libs/src/datetime/np_datetime_strings.h', + 'pandas/_libs/src/datetime_helper.h', + 'pandas/_libs/src/period_helper.h', + 'pandas/_libs/src/datetime.pxd'] # some linux distros require it libraries = ['m'] if not is_platform_windows() else [] -ext_data = dict( - lib={'pyxfile': 'lib', - 'pxdfiles': [], - 'depends': lib_depends}, - hashtable={'pyxfile': 'hashtable', - 'pxdfiles': ['hashtable'], - 'depends': (['pandas/src/klib/khash_python.h'] - + _pxi_dep['hashtable'])}, - tslib={'pyxfile': 'tslib', - 'depends': tseries_depends, - 'sources': ['pandas/src/datetime/np_datetime.c', - 'pandas/src/datetime/np_datetime_strings.c', - 'pandas/src/period_helper.c']}, - _period={'pyxfile': 'src/period', - 'depends': tseries_depends, - 'sources': ['pandas/src/datetime/np_datetime.c', - 'pandas/src/datetime/np_datetime_strings.c', - 'pandas/src/period_helper.c']}, - index={'pyxfile': 'index', - 'sources': ['pandas/src/datetime/np_datetime.c', - 'pandas/src/datetime/np_datetime_strings.c'], - 'pxdfiles': ['src/util', 'hashtable'], - 'depends': _pxi_dep['index']}, - algos={'pyxfile': 'algos', - 'pxdfiles': ['src/util', 'hashtable'], - 'depends': _pxi_dep['algos']}, - _reshape={'pyxfile': 'src/reshape', - 'depends': _pxi_dep['_reshape']}, - _join={'pyxfile': 'src/join', - 'pxdfiles': ['src/util', 'hashtable'], - 'depends': _pxi_dep['_join']}, - _window={'pyxfile': 'window', - 'pxdfiles': ['src/skiplist', 'src/util'], - 'depends': ['pandas/src/skiplist.pyx', - 'pandas/src/skiplist.h']}, - parser={'pyxfile': 'parser', - 'depends': ['pandas/src/parser/tokenizer.h', - 'pandas/src/parser/io.h', - 'pandas/src/numpy_helper.h'], - 'sources': ['pandas/src/parser/tokenizer.c', - 'pandas/src/parser/io.c']}, - _sparse={'pyxfile': 'src/sparse', - 'depends': ([srcpath('sparse', suffix='.pyx')] + - _pxi_dep['_sparse'])}, - _testing={'pyxfile': 'src/testing', - 'depends': [srcpath('testing', suffix='.pyx')]}, - _hash={'pyxfile': 'src/hash', - 'depends': [srcpath('hash', suffix='.pyx')]}, -) - -ext_data["io.sas.saslib"] = {'pyxfile': 'io/sas/saslib'} +ext_data = { + '_libs.lib': {'pyxfile': '_libs/lib', + 'pxdfiles': [], + 'depends': lib_depends}, + '_libs.hashtable': {'pyxfile': '_libs/hashtable', + 'pxdfiles': ['_libs/hashtable'], + 'depends': (['pandas/_libs/src/klib/khash_python.h'] + + _pxi_dep['hashtable'])}, + '_libs.tslib': {'pyxfile': '_libs/tslib', + 'depends': tseries_depends, + 'sources': ['pandas/_libs/src/datetime/np_datetime.c', + 'pandas/_libs/src/datetime/np_datetime_strings.c', + 'pandas/_libs/src/period_helper.c']}, + '_libs.period': {'pyxfile': '_libs/period', + 'depends': tseries_depends, + 'sources': ['pandas/_libs/src/datetime/np_datetime.c', + 'pandas/_libs/src/datetime/np_datetime_strings.c', + 'pandas/_libs/src/period_helper.c']}, + '_libs.index': {'pyxfile': '_libs/index', + 'sources': ['pandas/_libs/src/datetime/np_datetime.c', + 'pandas/_libs/src/datetime/np_datetime_strings.c'], + 'pxdfiles': ['_libs/src/util', '_libs/hashtable'], + 'depends': _pxi_dep['index']}, + '_libs.algos': {'pyxfile': '_libs/algos', + 'pxdfiles': ['_libs/src/util', '_libs/hashtable'], + 'depends': _pxi_dep['algos']}, + '_libs.join': {'pyxfile': '_libs/join', + 'pxdfiles': ['_libs/src/util', '_libs/hashtable'], + 'depends': _pxi_dep['join']}, + '_libs.reshape': {'pyxfile': '_libs/reshape', + 'depends': _pxi_dep['reshape']}, + 'core.libwindow': {'pyxfile': 'core/window', + 'pxdfiles': ['_libs/src/skiplist', '_libs/src/util'], + 'depends': ['pandas/_libs/src/skiplist.pyx', + 'pandas/_libs/src/skiplist.h']}, + 'io.libparsers': {'pyxfile': 'io/parsers', + 'depends': ['pandas/_libs/src/parser/tokenizer.h', + 'pandas/_libs/src/parser/io.h', + 'pandas/_libs/src/numpy_helper.h'], + 'sources': ['pandas/_libs/src/parser/tokenizer.c', + 'pandas/_libs/src/parser/io.c']}, + 'sparse.libsparse': {'pyxfile': 'sparse/sparse', + 'depends': (['pandas/sparse/sparse.pyx'] + + _pxi_dep['sparse'])}, + 'util.libtesting': {'pyxfile': 'util/testing', + 'depends': ['pandas/util/testing.pyx']}, + 'tools.libhashing': {'pyxfile': 'tools/hashing', + 'depends': ['pandas/tools/hashing.pyx']}, + 'io.sas.libsas': {'pyxfile': 'io/sas/sas'}, + } extensions = [] @@ -552,25 +552,25 @@ def pxd(name): else: macros = [('__LITTLE_ENDIAN__', '1')] -packer_ext = Extension('pandas.msgpack._packer', - depends=['pandas/src/msgpack/pack.h', - 'pandas/src/msgpack/pack_template.h'], +packer_ext = Extension('pandas.io.msgpack._packer', + depends=['pandas/_libs/src/msgpack/pack.h', + 'pandas/_libs/src/msgpack/pack_template.h'], sources = [srcpath('_packer', suffix=suffix if suffix == '.pyx' else '.cpp', - subdir='msgpack')], + subdir='io/msgpack')], language='c++', - include_dirs=['pandas/src/msgpack'] + common_include, + include_dirs=['pandas/_libs/src/msgpack'] + common_include, define_macros=macros, extra_compile_args=extra_compile_args) -unpacker_ext = Extension('pandas.msgpack._unpacker', - depends=['pandas/src/msgpack/unpack.h', - 'pandas/src/msgpack/unpack_define.h', - 'pandas/src/msgpack/unpack_template.h'], +unpacker_ext = Extension('pandas.io.msgpack._unpacker', + depends=['pandas/_libs/src/msgpack/unpack.h', + 'pandas/_libs/src/msgpack/unpack_define.h', + 'pandas/_libs/src/msgpack/unpack_template.h'], sources = [srcpath('_unpacker', suffix=suffix if suffix == '.pyx' else '.cpp', - subdir='msgpack')], + subdir='io/msgpack')], language='c++', - include_dirs=['pandas/src/msgpack'] + common_include, + include_dirs=['pandas/_libs/src/msgpack'] + common_include, define_macros=macros, extra_compile_args=extra_compile_args) extensions.append(packer_ext) @@ -586,20 +586,20 @@ def pxd(name): root, _ = os.path.splitext(ext.sources[0]) ext.sources[0] = root + suffix -ujson_ext = Extension('pandas.json', - depends=['pandas/src/ujson/lib/ultrajson.h', - 'pandas/src/datetime_helper.h', - 'pandas/src/numpy_helper.h'], - sources=['pandas/src/ujson/python/ujson.c', - 'pandas/src/ujson/python/objToJSON.c', - 'pandas/src/ujson/python/JSONtoObj.c', - 'pandas/src/ujson/lib/ultrajsonenc.c', - 'pandas/src/ujson/lib/ultrajsondec.c', - 'pandas/src/datetime/np_datetime.c', - 'pandas/src/datetime/np_datetime_strings.c'], - include_dirs=['pandas/src/ujson/python', - 'pandas/src/ujson/lib', - 'pandas/src/datetime'] + common_include, +ujson_ext = Extension('pandas.io.json.libjson', + depends=['pandas/_libs/src/ujson/lib/ultrajson.h', + 'pandas/_libs/src/datetime_helper.h', + 'pandas/_libs/src/numpy_helper.h'], + sources=['pandas/_libs/src/ujson/python/ujson.c', + 'pandas/_libs/src/ujson/python/objToJSON.c', + 'pandas/_libs/src/ujson/python/JSONtoObj.c', + 'pandas/_libs/src/ujson/lib/ultrajsonenc.c', + 'pandas/_libs/src/ujson/lib/ultrajsondec.c', + 'pandas/_libs/src/datetime/np_datetime.c', + 'pandas/_libs/src/datetime/np_datetime_strings.c'], + include_dirs=['pandas/_libs/src/ujson/python', + 'pandas/_libs/src/ujson/lib', + 'pandas/_libs/src/datetime'] + common_include, extra_compile_args=['-D_GNU_SOURCE'] + extra_compile_args) @@ -634,6 +634,8 @@ def pxd(name): 'pandas.io', 'pandas.io.json', 'pandas.io.sas', + 'pandas.io.msgpack', + 'pandas._libs', 'pandas.formats', 'pandas.sparse', 'pandas.stats', @@ -650,10 +652,10 @@ def pxd(name): 'pandas.tests.io.json', 'pandas.tests.io.parser', 'pandas.tests.io.sas', + 'pandas.tests.io.msgpack', 'pandas.tests.groupby', 'pandas.tests.series', 'pandas.tests.formats', - 'pandas.tests.msgpack', 'pandas.tests.scalar', 'pandas.tests.sparse', 'pandas.tests.tseries', @@ -663,7 +665,6 @@ def pxd(name): 'pandas.tools', 'pandas.tseries', 'pandas.types', - 'pandas.msgpack', 'pandas.util.clipboard' ], package_data={'pandas.tests': ['data/*.csv'], diff --git a/vb_suite/pandas_vb_common.py b/vb_suite/pandas_vb_common.py index a1326d63a112a..bd2e8a1c1d504 100644 --- a/vb_suite/pandas_vb_common.py +++ b/vb_suite/pandas_vb_common.py @@ -16,7 +16,7 @@ try: import pandas._tseries as lib except: - import pandas.lib as lib + import pandas._libs.lib as lib try: Panel = WidePanel From 9da0e0b233c79311c19dad6b151a157c4e47e109 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 7 Mar 2017 19:31:22 -0500 Subject: [PATCH 160/933] CLN: clean up Makefile & fix lib.pyx deps --- Makefile | 3 --- setup.py | 6 +++--- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/Makefile b/Makefile index 90dcd16d955d6..194a8861715b7 100644 --- a/Makefile +++ b/Makefile @@ -9,9 +9,6 @@ clean: clean_pyc: -find . -name '*.py[co]' -exec rm {} \; -sparse: pandas/src/sparse.pyx - python setup.py build_ext --inplace - build: clean_pyc python setup.py build_ext --inplace diff --git a/setup.py b/setup.py index e257b2376060b..3e0a6b41152dc 100755 --- a/setup.py +++ b/setup.py @@ -440,7 +440,7 @@ def srcpath(name=None, suffix='.pyx', subdir='src'): return pjoin('pandas', subdir, name + suffix) if suffix == '.pyx': - lib_depends = [srcpath(f, suffix='.pyx') for f in lib_depends] + lib_depends = [srcpath(f, suffix='.pyx', subdir='_libs/src') for f in lib_depends] lib_depends.append('pandas/_libs/src/util.pxd') else: lib_depends = [] @@ -474,13 +474,13 @@ def pxd(name): ext_data = { '_libs.lib': {'pyxfile': '_libs/lib', - 'pxdfiles': [], - 'depends': lib_depends}, + 'depends': lib_depends + tseries_depends}, '_libs.hashtable': {'pyxfile': '_libs/hashtable', 'pxdfiles': ['_libs/hashtable'], 'depends': (['pandas/_libs/src/klib/khash_python.h'] + _pxi_dep['hashtable'])}, '_libs.tslib': {'pyxfile': '_libs/tslib', + 'pxdfiles': ['_libs/src/util', '_libs/lib'], 'depends': tseries_depends, 'sources': ['pandas/_libs/src/datetime/np_datetime.c', 'pandas/_libs/src/datetime/np_datetime_strings.c', From 8daf677b04b6797e7db894b85da7f6e5a4d356c5 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 8 Mar 2017 08:12:10 -0500 Subject: [PATCH 161/933] DOC: fix appeveyor badge to point to pandas-dev account --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 8595043cf68c3..e05f1405419fc 100644 --- a/README.md +++ b/README.md @@ -41,8 +41,8 @@ From c9d4e0b01f8b47e7c04fb132081dce607c05757c Mon Sep 17 00:00:00 2001 From: manu Date: Wed, 8 Mar 2017 08:24:38 -0500 Subject: [PATCH 162/933] BUG: make Series.sort_values(ascending=[False]) behave as ascending=False (#15604) closes #15604 Author: manu Closes #15607 from MLopez-Ibanez/series-ascending and squashes the following commits: 6678574 [manu] BUG: make Series.sort_values(ascending=[False]) behave as ascending=False (#15604) --- doc/source/whatsnew/v0.20.0.txt | 2 +- pandas/core/series.py | 10 ++++++++++ pandas/tests/series/test_sorting.py | 19 +++++++++++++++++++ 3 files changed, 30 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 8f2033de6c77f..a7169640759e3 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -228,7 +228,7 @@ Other enhancements - ``pd.TimedeltaIndex`` now has a custom datetick formatter specifically designed for nanosecond level precision (:issue:`8711`) - ``pd.types.concat.union_categoricals`` gained the ``ignore_ordered`` argument to allow ignoring the ordered attribute of unioned categoricals (:issue:`13410`). See the :ref:`categorical union docs ` for more information. - ``pandas.io.json.json_normalize()`` with an empty ``list`` will return an empty ``DataFrame`` (:issue:`15534`) - +- ``Series.sort_values`` accepts a one element list of bool for consistency with the behavior of ``DataFrame.sort_values`` (:issue:`15604`) .. _ISO 8601 duration: https://en.wikipedia.org/wiki/ISO_8601#Durations diff --git a/pandas/core/series.py b/pandas/core/series.py index 83036ffef0bed..f23e90effdabf 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -14,6 +14,7 @@ import numpy.ma as ma from pandas.types.common import (_coerce_to_dtype, is_categorical_dtype, + is_bool, is_integer, is_integer_dtype, is_float_dtype, is_extension_type, is_datetimetz, @@ -1719,6 +1720,15 @@ def _try_kind_sort(arr): argsorted = _try_kind_sort(arr[good]) + if is_list_like(ascending): + if len(ascending) != 1: + raise ValueError('Length of ascending (%d) must be 1 ' + 'for Series' % (len(ascending))) + ascending = ascending[0] + + if not is_bool(ascending): + raise ValueError('ascending must be boolean') + if not ascending: argsorted = argsorted[::-1] diff --git a/pandas/tests/series/test_sorting.py b/pandas/tests/series/test_sorting.py index db506f12a2293..590a530a847bd 100644 --- a/pandas/tests/series/test_sorting.py +++ b/pandas/tests/series/test_sorting.py @@ -64,6 +64,25 @@ def test_sort_values(self): ordered = ts.sort_values(ascending=False, na_position='first') assert_almost_equal(expected, ordered.valid().values) + # ascending=[False] should behave the same as ascending=False + ordered = ts.sort_values(ascending=[False]) + expected = ts.sort_values(ascending=False) + assert_series_equal(expected, ordered) + ordered = ts.sort_values(ascending=[False], na_position='first') + expected = ts.sort_values(ascending=False, na_position='first') + assert_series_equal(expected, ordered) + + self.assertRaises(ValueError, + lambda: ts.sort_values(ascending=None)) + self.assertRaises(ValueError, + lambda: ts.sort_values(ascending=[])) + self.assertRaises(ValueError, + lambda: ts.sort_values(ascending=[1, 2, 3])) + self.assertRaises(ValueError, + lambda: ts.sort_values(ascending=[False, False])) + self.assertRaises(ValueError, + lambda: ts.sort_values(ascending='foobar')) + # inplace=True ts = self.ts.copy() ts.sort_values(ascending=False, inplace=True) From 11c947997e0f7f91a4170ad7ddcc90124b7f5f2a Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 8 Mar 2017 08:30:59 -0500 Subject: [PATCH 163/933] DOC: remove gbq references / clean some whatsnew --- doc/source/whatsnew/v0.20.0.txt | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index a7169640759e3..92daf29efe71f 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -12,7 +12,7 @@ Highlights include: - Building pandas for development now requires ``cython >= 0.23`` (:issue:`14831`) - The ``.ix`` indexer has been deprecated, see :ref:`here ` - Switched the test framework to `pytest`_ (:issue:`13097`) -- A new orient for JSON serialization, ``orient='table'``, that uses the Table Schema spec, see :ref: `here ` +- A new orient for JSON serialization, ``orient='table'``, that uses the Table Schema spec, see :ref:`here ` .. _pytest: http://doc.pytest.org/en/latest/ @@ -27,11 +27,6 @@ Check the :ref:`API Changes ` and :ref:`deprecations New features ~~~~~~~~~~~~ -- Integration with the ``feather-format``, including a new top-level ``pd.read_feather()`` and ``DataFrame.to_feather()`` method, see :ref:`here `. -- ``Series.str.replace()`` now accepts a callable, as replacement, which is passed to ``re.sub`` (:issue:`15055`) -- ``Series.str.replace()`` now accepts a compiled regular expression as a pattern (:issue:`15446`) - - .. _whatsnew_0200.enhancements.dataio_dtype: @@ -193,6 +188,11 @@ You must enable this by setting the ``display.html.table_schema`` option to True Other enhancements ^^^^^^^^^^^^^^^^^^ +- Integration with the ``feather-format``, including a new top-level ``pd.read_feather()`` and ``DataFrame.to_feather()`` method, see :ref:`here `. +- ``Series.str.replace()`` now accepts a callable, as replacement, which is passed to ``re.sub`` (:issue:`15055`) +- ``Series.str.replace()`` now accepts a compiled regular expression as a pattern (:issue:`15446`) + + - ``Series.sort_index`` accepts parameters ``kind`` and ``na_position`` (:issue:`13589`, :issue:`14444`) - ``DataFrame`` has gained a ``nunique()`` method to count the distinct values over an axis (:issue:`14336`). @@ -201,7 +201,6 @@ Other enhancements - ``pd.read_excel`` now preserves sheet order when using ``sheetname=None`` (:issue:`9930`) - Multiple offset aliases with decimal points are now supported (e.g. '0.5min' is parsed as '30s') (:issue:`8419`) - ``.isnull()`` and ``.notnull()`` have been added to ``Index`` object to make them more consistent with the ``Series`` API (:issue:`15300`) -- ``pd.read_gbq`` method now allows query configuration preferences (:issue:`14742`) - New ``UnsortedIndexError`` (subclass of ``KeyError``) raised when indexing/slicing into an unsorted MultiIndex (:issue:`11897`). This allows differentiation between errors due to lack @@ -228,7 +227,7 @@ Other enhancements - ``pd.TimedeltaIndex`` now has a custom datetick formatter specifically designed for nanosecond level precision (:issue:`8711`) - ``pd.types.concat.union_categoricals`` gained the ``ignore_ordered`` argument to allow ignoring the ordered attribute of unioned categoricals (:issue:`13410`). See the :ref:`categorical union docs ` for more information. - ``pandas.io.json.json_normalize()`` with an empty ``list`` will return an empty ``DataFrame`` (:issue:`15534`) -- ``Series.sort_values`` accepts a one element list of bool for consistency with the behavior of ``DataFrame.sort_values`` (:issue:`15604`) + .. _ISO 8601 duration: https://en.wikipedia.org/wiki/ISO_8601#Durations @@ -444,7 +443,7 @@ Pandas Google BigQuery support has moved ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ pandas has split off Google BigQuery support into a separate package ``pandas-gbq``. You can ``pip install pandas-gbq`` to get it. -The functionality of ``pd.read_gbq()`` and ``.to_gbq()`` remains the same with the currently released version of ``pandas-gbq=0.1.2``. (:issue:`15347`) +The functionality of ``pd.read_gbq()`` and ``.to_gbq()`` remains the same with the currently released version of ``pandas-gbq=0.1.3``. (:issue:`15347`) Documentation is now hosted `here `__ .. _whatsnew_0200.api_breaking.memory_usage: @@ -611,9 +610,9 @@ Other API Changes - ``inplace`` arguments now require a boolean value, else a ``ValueError`` is thrown (:issue:`14189`) - ``pandas.api.types.is_datetime64_ns_dtype`` will now report ``True`` on a tz-aware dtype, similar to ``pandas.api.types.is_datetime64_any_dtype`` - ``DataFrame.asof()`` will return a null filled ``Series`` instead the scalar ``NaN`` if a match is not found (:issue:`15118`) -- The :func:`pd.read_gbq` method now stores ``INTEGER`` columns as ``dtype=object`` if they contain ``NULL`` values. Otherwise they are stored as ``int64``. This prevents precision lost for integers greather than 2**53. Furthermore ``FLOAT`` columns with values above 10**4 are no longer casted to ``int64`` which also caused precision loss (:issue:`14064`, :issue:`14305`). - Reorganization of timeseries development tests (:issue:`14854`) - Specific support for ``copy.copy()`` and ``copy.deepcopy()`` functions on NDFrame objects (:issue:`15444`) +- ``Series.sort_values()`` accepts a one element list of bool for consistency with the behavior of ``DataFrame.sort_values()`` (:issue:`15604`) .. _whatsnew_0200.deprecations: @@ -651,7 +650,7 @@ Removal of prior version deprecations/changes - ``pandas.stats.fama_macbeth``, ``pandas.stats.ols``, ``pandas.stats.plm`` and ``pandas.stats.var``, as well as the top-level ``pandas.fama_macbeth`` and ``pandas.ols`` routines are removed. Similar functionaility can be found in the `statsmodels `__ package. (:issue:`11898`) - The ``TimeSeries`` and ``SparseTimeSeries`` classes, aliases of ``Series`` and ``SparseSeries``, are removed (:issue:`10890`, :issue:`15098`). -- ``Series.is_time_series`` is dropped in favor of ``Series.index.is_all_dates`` (:issue:``) +- ``Series.is_time_series`` is dropped in favor of ``Series.index.is_all_dates`` (:issue:`15098`) - The deprecated ``irow``, ``icol``, ``iget`` and ``iget_value`` methods are removed in favor of ``iloc`` and ``iat`` as explained :ref:`here ` (:issue:`10711`). @@ -681,7 +680,7 @@ Bug Fixes - Bug in ``Index`` power operations with reversed operands (:issue:`14973`) - Bug in ``TimedeltaIndex`` addition where overflow was being allowed without error (:issue:`14816`) - Bug in ``TimedeltaIndex`` raising a ``ValueError`` when boolean indexing with ``loc`` (:issue:`14946`) -- Bug in ``DatetimeIndex.round()`` and ``Timestamp.round()`` floating point accuracy when rounding by milliseconds or less (:issue: `14440`, :issue:`15578`) +- Bug in ``DatetimeIndex.round()`` and ``Timestamp.round()`` floating point accuracy when rounding by milliseconds or less (:issue:`14440`, :issue:`15578`) - Bug in ``astype()`` where ``inf`` values were incorrectly converted to integers. Now raises error now with ``astype()`` for Series and DataFrames (:issue:`14265`) - Bug in ``DataFrame(..).apply(to_numeric)`` when values are of type decimal.Decimal. (:issue:`14827`) - Bug in ``describe()`` when passing a numpy array which does not contain the median to the ``percentiles`` keyword argument (:issue:`14908`) From d32acaa7fbe95a96a7118a32324beea1e2e8ae32 Mon Sep 17 00:00:00 2001 From: Luca Scarabello Date: Wed, 8 Mar 2017 08:38:43 -0500 Subject: [PATCH 164/933] BUG: pd.cut with bins=1 and input all 0s The special case of running pd.cut() qith bins=1 an input containing all 0s raises a ValueError closes #15428 closes #15431 Author: Luca Scarabello Author: Luca Closes #15437 from luca-s/issue_15428 and squashes the following commits: 1248987 [Luca] rebased on master def84ba [Luca] Yet another implementation attempt 692503a [Luca Scarabello] Improved solution: using same approach as pd.cut b7d92dc [Luca] Added 'allow' duplicates option to _bins_to_cuts f56a27f [Luca Scarabello] Issue #15431 55806cf [Luca Scarabello] BUG: pd.cut with bins=1 and input all 0s --- doc/source/whatsnew/v0.20.0.txt | 4 +- pandas/tests/tools/test_tile.py | 81 +++++++++++++++++++++++++++++++-- pandas/tools/tile.py | 6 +-- 3 files changed, 83 insertions(+), 8 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 92daf29efe71f..bf778f6065010 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -698,8 +698,8 @@ Bug Fixes - Bug in ``DataFrame.loc`` with indexing a ``MultiIndex`` with a ``Series`` indexer (:issue:`14730`, :issue:`15424`) - Bug in ``DataFrame.loc`` with indexing a ``MultiIndex`` with a numpy array (:issue:`15434`) - Bug in ``Rolling.quantile`` function that caused a segmentation fault when called with a quantile value outside of the range [0, 1] (:issue:`15463`) - - +- Bug in ``pd.cut()`` with a single bin on an all 0s array (:issue:`15428`) +- Bug in ``pd.qcut()`` with a single quantile and an array with identical values (:issue:`15431`) - Bug in ``SparseSeries.reindex`` on single level with list of length 1 (:issue:`15447`) diff --git a/pandas/tests/tools/test_tile.py b/pandas/tests/tools/test_tile.py index de44eadc15751..11b242bc06e15 100644 --- a/pandas/tests/tools/test_tile.py +++ b/pandas/tests/tools/test_tile.py @@ -3,7 +3,7 @@ import numpy as np from pandas.compat import zip -from pandas import Series, Index +from pandas import Series, Index, Categorical import pandas.util.testing as tm from pandas.util.testing import assertRaisesRegexp import pandas.core.common as com @@ -239,7 +239,6 @@ def test_qcut_binning_issues(self): self.assertTrue(ep <= sn) def test_cut_return_categorical(self): - from pandas import Categorical s = Series([0, 1, 2, 3, 4, 5, 6, 7, 8]) res = cut(s, 3) exp = Series(Categorical.from_codes([0, 0, 0, 1, 1, 1, 2, 2, 2], @@ -249,7 +248,6 @@ def test_cut_return_categorical(self): tm.assert_series_equal(res, exp) def test_qcut_return_categorical(self): - from pandas import Categorical s = Series([0, 1, 2, 3, 4, 5, 6, 7, 8]) res = qcut(s, [0, 0.333, 0.666, 1]) exp = Series(Categorical.from_codes([0, 0, 0, 1, 1, 1, 2, 2, 2], @@ -285,6 +283,60 @@ def test_qcut_duplicates_bin(self): # invalid self.assertRaises(ValueError, qcut, values, 3, duplicates='foo') + def test_single_quantile(self): + # issue 15431 + expected = Series([0, 0]) + + s = Series([9., 9.]) + result = qcut(s, 1, labels=False) + tm.assert_series_equal(result, expected) + result = qcut(s, 1) + exp_lab = Series(Categorical.from_codes([0, 0], ["[9, 9]"], + ordered=True)) + tm.assert_series_equal(result, exp_lab) + + s = Series([-9., -9.]) + result = qcut(s, 1, labels=False) + tm.assert_series_equal(result, expected) + result = qcut(s, 1) + exp_lab = Series(Categorical.from_codes([0, 0], ["[-9, -9]"], + ordered=True)) + tm.assert_series_equal(result, exp_lab) + + s = Series([0., 0.]) + result = qcut(s, 1, labels=False) + tm.assert_series_equal(result, expected) + result = qcut(s, 1) + exp_lab = Series(Categorical.from_codes([0, 0], ["[0, 0]"], + ordered=True)) + tm.assert_series_equal(result, exp_lab) + + expected = Series([0]) + + s = Series([9]) + result = qcut(s, 1, labels=False) + tm.assert_series_equal(result, expected) + result = qcut(s, 1) + exp_lab = Series(Categorical.from_codes([0], ["[9, 9]"], + ordered=True)) + tm.assert_series_equal(result, exp_lab) + + s = Series([-9]) + result = qcut(s, 1, labels=False) + tm.assert_series_equal(result, expected) + result = qcut(s, 1) + exp_lab = Series(Categorical.from_codes([0], ["[-9, -9]"], + ordered=True)) + tm.assert_series_equal(result, exp_lab) + + s = Series([0]) + result = qcut(s, 1, labels=False) + tm.assert_series_equal(result, expected) + result = qcut(s, 1) + exp_lab = Series(Categorical.from_codes([0], ["[0, 0]"], + ordered=True)) + tm.assert_series_equal(result, exp_lab) + def test_single_bin(self): # issue 14652 expected = Series([0, 0]) @@ -297,6 +349,29 @@ def test_single_bin(self): result = cut(s, 1, labels=False) tm.assert_series_equal(result, expected) + expected = Series([0]) + + s = Series([9]) + result = cut(s, 1, labels=False) + tm.assert_series_equal(result, expected) + + s = Series([-9]) + result = cut(s, 1, labels=False) + tm.assert_series_equal(result, expected) + + # issue 15428 + expected = Series([0, 0]) + + s = Series([0., 0.]) + result = cut(s, 1, labels=False) + tm.assert_series_equal(result, expected) + + expected = Series([0]) + + s = Series([0]) + result = cut(s, 1, labels=False) + tm.assert_series_equal(result, expected) + def test_datetime_cut(self): # GH 14714 # testing for time data to be present as series diff --git a/pandas/tools/tile.py b/pandas/tools/tile.py index 9b21e542f153c..ccd8c2478e8a5 100644 --- a/pandas/tools/tile.py +++ b/pandas/tools/tile.py @@ -104,8 +104,8 @@ def cut(x, bins, right=True, labels=None, retbins=False, precision=3, mn, mx = [mi + 0.0 for mi in rng] if mn == mx: # adjust end points before binning - mn -= .001 * abs(mn) - mx += .001 * abs(mx) + mn -= .001 * abs(mn) if mn != 0 else .001 + mx += .001 * abs(mx) if mx != 0 else .001 bins = np.linspace(mn, mx, bins + 1, endpoint=True) else: # adjust end points after binning bins = np.linspace(mn, mx, bins + 1, endpoint=True) @@ -206,7 +206,7 @@ def _bins_to_cuts(x, bins, right=True, labels=None, "valid options are: raise, drop") unique_bins = algos.unique(bins) - if len(unique_bins) < len(bins): + if len(unique_bins) < len(bins) and len(bins) != 2: if duplicates == 'raise': raise ValueError("Bin edges must be unique: {}.\nYou " "can drop duplicate edges by setting " From b508a0486d0091c550964718d22b0d4292272587 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 8 Mar 2017 09:52:29 -0500 Subject: [PATCH 165/933] BLD: fix linting wrt to #15537, changes in location of pandas/src (#15614) --- ci/lint.sh | 8 ++++---- pandas/_libs/src/datetime/np_datetime.h | 6 +++--- pandas/_libs/src/datetime/np_datetime_strings.h | 6 +++--- pandas/_libs/src/datetime_helper.h | 6 +++--- pandas/_libs/src/helper.h | 6 +++--- pandas/_libs/src/numpy_helper.h | 6 +++--- pandas/_libs/src/parse_helper.h | 6 +++--- pandas/_libs/src/parser/io.h | 6 +++--- pandas/_libs/src/parser/tokenizer.h | 6 +++--- pandas/_libs/src/period_helper.h | 6 +++--- pandas/_libs/src/skiplist.h | 6 +++--- pandas/_libs/src/ujson/lib/ultrajson.h | 6 +++--- pandas/_libs/src/ujson/python/py_defines.h | 8 ++++---- pandas/_libs/src/ujson/python/version.h | 8 ++++---- test.bat | 2 +- test_fast.sh | 2 +- 16 files changed, 47 insertions(+), 47 deletions(-) diff --git a/ci/lint.sh b/ci/lint.sh index 2ffc68e5eb139..ed3af2568811c 100755 --- a/ci/lint.sh +++ b/ci/lint.sh @@ -8,9 +8,9 @@ RET=0 if [ "$LINT" ]; then - # pandas/src is C code, so no need to search there. + # pandas/_libs/src is C code, so no need to search there. echo "Linting *.py" - flake8 pandas --filename=*.py --exclude pandas/src + flake8 pandas --filename=*.py --exclude pandas/_libs/src if [ $? -ne "0" ]; then RET=1 fi @@ -46,8 +46,8 @@ if [ "$LINT" ]; then echo "Linting *.c and *.h" for path in '*.h' 'period_helper.c' 'datetime' 'parser' 'ujson' do - echo "linting -> pandas/src/$path" - cpplint --quiet --extensions=c,h --headers=h --filter=-readability/casting,-runtime/int,-build/include_subdir --recursive pandas/src/$path + echo "linting -> pandas/_libs/src/$path" + cpplint --quiet --extensions=c,h --headers=h --filter=-readability/casting,-runtime/int,-build/include_subdir --recursive pandas/_libs/src/$path if [ $? -ne "0" ]; then RET=1 fi diff --git a/pandas/_libs/src/datetime/np_datetime.h b/pandas/_libs/src/datetime/np_datetime.h index 3445fc3e48376..97ec5782b625b 100644 --- a/pandas/_libs/src/datetime/np_datetime.h +++ b/pandas/_libs/src/datetime/np_datetime.h @@ -14,8 +14,8 @@ This file is derived from NumPy 1.7. See NUMPY_LICENSE.txt */ -#ifndef PANDAS_SRC_DATETIME_NP_DATETIME_H_ -#define PANDAS_SRC_DATETIME_NP_DATETIME_H_ +#ifndef PANDAS__LIBS_SRC_DATETIME_NP_DATETIME_H_ +#define PANDAS__LIBS_SRC_DATETIME_NP_DATETIME_H_ #include @@ -124,4 +124,4 @@ convert_datetime_to_datetimestruct(pandas_datetime_metadata *meta, PANDAS_DATETIMEUNIT get_datetime64_unit(PyObject *obj); -#endif // PANDAS_SRC_DATETIME_NP_DATETIME_H_ +#endif // PANDAS__LIBS_SRC_DATETIME_NP_DATETIME_H_ diff --git a/pandas/_libs/src/datetime/np_datetime_strings.h b/pandas/_libs/src/datetime/np_datetime_strings.h index 1114ec5eae064..833c1869c1664 100644 --- a/pandas/_libs/src/datetime/np_datetime_strings.h +++ b/pandas/_libs/src/datetime/np_datetime_strings.h @@ -19,8 +19,8 @@ This file implements string parsing and creation for NumPy datetime. */ -#ifndef PANDAS_SRC_DATETIME_NP_DATETIME_STRINGS_H_ -#define PANDAS_SRC_DATETIME_NP_DATETIME_STRINGS_H_ +#ifndef PANDAS__LIBS_SRC_DATETIME_NP_DATETIME_STRINGS_H_ +#define PANDAS__LIBS_SRC_DATETIME_NP_DATETIME_STRINGS_H_ /* * Parses (almost) standard ISO 8601 date strings. The differences are: @@ -103,4 +103,4 @@ make_iso_8601_datetime(pandas_datetimestruct *dts, char *outstr, int outlen, int local, PANDAS_DATETIMEUNIT base, int tzoffset, NPY_CASTING casting); -#endif // PANDAS_SRC_DATETIME_NP_DATETIME_STRINGS_H_ +#endif // PANDAS__LIBS_SRC_DATETIME_NP_DATETIME_STRINGS_H_ diff --git a/pandas/_libs/src/datetime_helper.h b/pandas/_libs/src/datetime_helper.h index bef4b4266c824..8023285f85b9b 100644 --- a/pandas/_libs/src/datetime_helper.h +++ b/pandas/_libs/src/datetime_helper.h @@ -7,8 +7,8 @@ Distributed under the terms of the BSD Simplified License. The full license is in the LICENSE file, distributed with this software. */ -#ifndef PANDAS_SRC_DATETIME_HELPER_H_ -#define PANDAS_SRC_DATETIME_HELPER_H_ +#ifndef PANDAS__LIBS_SRC_DATETIME_HELPER_H_ +#define PANDAS__LIBS_SRC_DATETIME_HELPER_H_ #include #include "datetime.h" @@ -33,4 +33,4 @@ npy_float64 total_seconds(PyObject *td) { return (microseconds + (seconds + days_in_seconds) * 1000000.0) / 1000000.0; } -#endif // PANDAS_SRC_DATETIME_HELPER_H_ +#endif // PANDAS__LIBS_SRC_DATETIME_HELPER_H_ diff --git a/pandas/_libs/src/helper.h b/pandas/_libs/src/helper.h index 39bcf27e074df..26b4d033b963b 100644 --- a/pandas/_libs/src/helper.h +++ b/pandas/_libs/src/helper.h @@ -7,8 +7,8 @@ Distributed under the terms of the BSD Simplified License. The full license is in the LICENSE file, distributed with this software. */ -#ifndef PANDAS_SRC_HELPER_H_ -#define PANDAS_SRC_HELPER_H_ +#ifndef PANDAS__LIBS_SRC_HELPER_H_ +#define PANDAS__LIBS_SRC_HELPER_H_ #ifndef PANDAS_INLINE #if defined(__GNUC__) @@ -22,4 +22,4 @@ The full license is in the LICENSE file, distributed with this software. #endif #endif -#endif // PANDAS_SRC_HELPER_H_ +#endif // PANDAS__LIBS_SRC_HELPER_H_ diff --git a/pandas/_libs/src/numpy_helper.h b/pandas/_libs/src/numpy_helper.h index 809edb2e99fa2..5f4db5b2f55d3 100644 --- a/pandas/_libs/src/numpy_helper.h +++ b/pandas/_libs/src/numpy_helper.h @@ -7,8 +7,8 @@ Distributed under the terms of the BSD Simplified License. The full license is in the LICENSE file, distributed with this software. */ -#ifndef PANDAS_SRC_NUMPY_HELPER_H_ -#define PANDAS_SRC_NUMPY_HELPER_H_ +#ifndef PANDAS__LIBS_SRC_NUMPY_HELPER_H_ +#define PANDAS__LIBS_SRC_NUMPY_HELPER_H_ #include "Python.h" #include "helper.h" @@ -159,4 +159,4 @@ PANDAS_INLINE PyObject* unbox_if_zerodim(PyObject* arr) { } } -#endif // PANDAS_SRC_NUMPY_HELPER_H_ +#endif // PANDAS__LIBS_SRC_NUMPY_HELPER_H_ diff --git a/pandas/_libs/src/parse_helper.h b/pandas/_libs/src/parse_helper.h index 5d2a0dad3da17..6dd8b66eab33d 100644 --- a/pandas/_libs/src/parse_helper.h +++ b/pandas/_libs/src/parse_helper.h @@ -7,8 +7,8 @@ Distributed under the terms of the BSD Simplified License. The full license is in the LICENSE file, distributed with this software. */ -#ifndef PANDAS_SRC_PARSE_HELPER_H_ -#define PANDAS_SRC_PARSE_HELPER_H_ +#ifndef PANDAS__LIBS_SRC_PARSE_HELPER_H_ +#define PANDAS__LIBS_SRC_PARSE_HELPER_H_ #include #include @@ -270,4 +270,4 @@ static double xstrtod(const char *str, char **endptr, char decimal, char sci, return number; } -#endif // PANDAS_SRC_PARSE_HELPER_H_ +#endif // PANDAS__LIBS_SRC_PARSE_HELPER_H_ diff --git a/pandas/_libs/src/parser/io.h b/pandas/_libs/src/parser/io.h index 5a0c2b2b5e4a4..77121e9a169c1 100644 --- a/pandas/_libs/src/parser/io.h +++ b/pandas/_libs/src/parser/io.h @@ -7,8 +7,8 @@ Distributed under the terms of the BSD Simplified License. The full license is in the LICENSE file, distributed with this software. */ -#ifndef PANDAS_SRC_PARSER_IO_H_ -#define PANDAS_SRC_PARSER_IO_H_ +#ifndef PANDAS__LIBS_SRC_PARSER_IO_H_ +#define PANDAS__LIBS_SRC_PARSER_IO_H_ #include "Python.h" #include "tokenizer.h" @@ -83,4 +83,4 @@ void *buffer_file_bytes(void *source, size_t nbytes, size_t *bytes_read, void *buffer_rd_bytes(void *source, size_t nbytes, size_t *bytes_read, int *status); -#endif // PANDAS_SRC_PARSER_IO_H_ +#endif // PANDAS__LIBS_SRC_PARSER_IO_H_ diff --git a/pandas/_libs/src/parser/tokenizer.h b/pandas/_libs/src/parser/tokenizer.h index 6c1bc630ab547..9853b5149bee3 100644 --- a/pandas/_libs/src/parser/tokenizer.h +++ b/pandas/_libs/src/parser/tokenizer.h @@ -9,8 +9,8 @@ See LICENSE for the license */ -#ifndef PANDAS_SRC_PARSER_TOKENIZER_H_ -#define PANDAS_SRC_PARSER_TOKENIZER_H_ +#ifndef PANDAS__LIBS_SRC_PARSER_TOKENIZER_H_ +#define PANDAS__LIBS_SRC_PARSER_TOKENIZER_H_ #include #include @@ -276,4 +276,4 @@ double round_trip(const char *p, char **q, char decimal, char sci, char tsep, int skip_trailing); int to_boolean(const char *item, uint8_t *val); -#endif // PANDAS_SRC_PARSER_TOKENIZER_H_ +#endif // PANDAS__LIBS_SRC_PARSER_TOKENIZER_H_ diff --git a/pandas/_libs/src/period_helper.h b/pandas/_libs/src/period_helper.h index 601717692ff6d..45afc074cab72 100644 --- a/pandas/_libs/src/period_helper.h +++ b/pandas/_libs/src/period_helper.h @@ -11,8 +11,8 @@ Cython to pandas. This primarily concerns interval representation and frequency conversion routines. */ -#ifndef PANDAS_SRC_PERIOD_HELPER_H_ -#define PANDAS_SRC_PERIOD_HELPER_H_ +#ifndef PANDAS__LIBS_SRC_PERIOD_HELPER_H_ +#define PANDAS__LIBS_SRC_PERIOD_HELPER_H_ #include #include "headers/stdint.h" @@ -188,4 +188,4 @@ int get_yq(npy_int64 ordinal, int freq, int *quarter, int *year); void initialize_daytime_conversion_factor_matrix(void); -#endif // PANDAS_SRC_PERIOD_HELPER_H_ +#endif // PANDAS__LIBS_SRC_PERIOD_HELPER_H_ diff --git a/pandas/_libs/src/skiplist.h b/pandas/_libs/src/skiplist.h index 013516a49fa2f..f9527e72f577e 100644 --- a/pandas/_libs/src/skiplist.h +++ b/pandas/_libs/src/skiplist.h @@ -13,8 +13,8 @@ Port of Wes McKinney's Cython version of Raymond Hettinger's original pure Python recipe (http://rhettinger.wordpress.com/2010/02/06/lost-knowledge/) */ -#ifndef PANDAS_SRC_SKIPLIST_H_ -#define PANDAS_SRC_SKIPLIST_H_ +#ifndef PANDAS__LIBS_SRC_SKIPLIST_H_ +#define PANDAS__LIBS_SRC_SKIPLIST_H_ #include #include @@ -287,4 +287,4 @@ PANDAS_INLINE int skiplist_remove(skiplist_t *skp, double value) { return 1; } -#endif // PANDAS_SRC_SKIPLIST_H_ +#endif // PANDAS__LIBS_SRC_SKIPLIST_H_ diff --git a/pandas/_libs/src/ujson/lib/ultrajson.h b/pandas/_libs/src/ujson/lib/ultrajson.h index 3bfb4b26c0095..d0588348baa44 100644 --- a/pandas/_libs/src/ujson/lib/ultrajson.h +++ b/pandas/_libs/src/ujson/lib/ultrajson.h @@ -49,8 +49,8 @@ tree doesn't have cyclic references. */ -#ifndef PANDAS_SRC_UJSON_LIB_ULTRAJSON_H_ -#define PANDAS_SRC_UJSON_LIB_ULTRAJSON_H_ +#ifndef PANDAS__LIBS_SRC_UJSON_LIB_ULTRAJSON_H_ +#define PANDAS__LIBS_SRC_UJSON_LIB_ULTRAJSON_H_ #include #include @@ -307,4 +307,4 @@ EXPORTFUNCTION JSOBJ JSON_DecodeObject(JSONObjectDecoder *dec, const char *buffer, size_t cbBuffer); EXPORTFUNCTION void encode(JSOBJ, JSONObjectEncoder *, const char *, size_t); -#endif // PANDAS_SRC_UJSON_LIB_ULTRAJSON_H_ +#endif // PANDAS__LIBS_SRC_UJSON_LIB_ULTRAJSON_H_ diff --git a/pandas/_libs/src/ujson/python/py_defines.h b/pandas/_libs/src/ujson/python/py_defines.h index b32285766c86a..82385fdd48a3b 100644 --- a/pandas/_libs/src/ujson/python/py_defines.h +++ b/pandas/_libs/src/ujson/python/py_defines.h @@ -16,7 +16,7 @@ modification, are permitted provided that the following conditions are met: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL ESN SOCIAL SOFTWARE AB OR JONAS TARNSTROM BE LIABLE +DISCLAIMED. IN NO EVENT SHALL ESN SOCIAL SOFTWARE AB OR JONAS TARNSTROM BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND @@ -35,8 +35,8 @@ Numeric decoder derived from from TCL library * Copyright (c) 1994 Sun Microsystems, Inc. */ -#ifndef PANDAS_SRC_UJSON_PYTHON_PY_DEFINES_H_ -#define PANDAS_SRC_UJSON_PYTHON_PY_DEFINES_H_ +#ifndef PANDAS__LIBS_SRC_UJSON_PYTHON_PY_DEFINES_H_ +#define PANDAS__LIBS_SRC_UJSON_PYTHON_PY_DEFINES_H_ #include @@ -55,4 +55,4 @@ Numeric decoder derived from from TCL library #endif -#endif // PANDAS_SRC_UJSON_PYTHON_PY_DEFINES_H_ +#endif // PANDAS__LIBS_SRC_UJSON_PYTHON_PY_DEFINES_H_ diff --git a/pandas/_libs/src/ujson/python/version.h b/pandas/_libs/src/ujson/python/version.h index c074ef572101d..ef6d28bf3a1f7 100644 --- a/pandas/_libs/src/ujson/python/version.h +++ b/pandas/_libs/src/ujson/python/version.h @@ -16,7 +16,7 @@ modification, are permitted provided that the following conditions are met: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL ESN SOCIAL SOFTWARE AB OR JONAS TARNSTROM BE LIABLE +DISCLAIMED. IN NO EVENT SHALL ESN SOCIAL SOFTWARE AB OR JONAS TARNSTROM BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND @@ -35,9 +35,9 @@ Numeric decoder derived from from TCL library * Copyright (c) 1994 Sun Microsystems, Inc. */ -#ifndef PANDAS_SRC_UJSON_PYTHON_VERSION_H_ -#define PANDAS_SRC_UJSON_PYTHON_VERSION_H_ +#ifndef PANDAS__LIBS_SRC_UJSON_PYTHON_VERSION_H_ +#define PANDAS__LIBS_SRC_UJSON_PYTHON_VERSION_H_ #define UJSON_VERSION "1.33" -#endif // PANDAS_SRC_UJSON_PYTHON_VERSION_H_ +#endif // PANDAS__LIBS_SRC_UJSON_PYTHON_VERSION_H_ diff --git a/test.bat b/test.bat index 2c5f25c24a637..080a1cc163a05 100644 --- a/test.bat +++ b/test.bat @@ -1,3 +1,3 @@ :: test on windows -pytest --skip-slow --skip-network pandas +pytest --skip-slow --skip-network pandas %* diff --git a/test_fast.sh b/test_fast.sh index 30ac7f84cbe8b..9b984156a796c 100755 --- a/test_fast.sh +++ b/test_fast.sh @@ -5,4 +5,4 @@ # https://github.com/pytest-dev/pytest/issues/1075 export PYTHONHASHSEED=$(python -c 'import random; print(random.randint(1, 4294967295))') -pytest pandas --skip-slow --skip-network -m "not single" -n 4 +pytest pandas --skip-slow --skip-network -m "not single" -n 4 "$@" From 3d699884e26120618bf0bb8869bc07f1e51a2935 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 8 Mar 2017 10:07:24 -0500 Subject: [PATCH 166/933] DOC: more whatsnew fixing --- doc/source/whatsnew/v0.20.0.txt | 84 ++++++++++++++++----------------- 1 file changed, 41 insertions(+), 43 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index bf778f6065010..34358a193b360 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -289,7 +289,7 @@ Possible incompat for HDF5 formats for pandas < 0.13.0 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ``pd.TimeSeries`` was deprecated officially in 0.17.0, though has only been an alias since 0.13.0. It has -been dropped in favor of ``pd.Series``. (:issue:``15098). +been dropped in favor of ``pd.Series``. (:issue:`15098`). This *may* cause HDF5 files that were created in prior versions to become unreadable if ``pd.TimeSeries`` was used. This is most likely to be for pandas < 0.13.0. If you find yourself in this situation. @@ -328,68 +328,66 @@ then write them out again after applying the procedure below. Map on Index types now return other Index types ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -- ``map`` on an ``Index`` now returns an ``Index``, not a numpy array (:issue:`12766`) +``map`` on an ``Index`` now returns an ``Index``, not a numpy array (:issue:`12766`) - .. ipython:: python - - idx = Index([1, 2]) - idx - mi = MultiIndex.from_tuples([(1, 2), (2, 4)]) - mi - - Previous Behavior: +.. ipython:: python - .. code-block:: ipython + idx = Index([1, 2]) + idx + mi = MultiIndex.from_tuples([(1, 2), (2, 4)]) + mi - In [5]: idx.map(lambda x: x * 2) - Out[5]: array([2, 4]) +Previous Behavior: - In [6]: idx.map(lambda x: (x, x * 2)) - Out[6]: array([(1, 2), (2, 4)], dtype=object) +.. code-block:: ipython - In [7]: mi.map(lambda x: x) - Out[7]: array([(1, 2), (2, 4)], dtype=object) + In [5]: idx.map(lambda x: x * 2) + Out[5]: array([2, 4]) - In [8]: mi.map(lambda x: x[0]) - Out[8]: array([1, 2]) + In [6]: idx.map(lambda x: (x, x * 2)) + Out[6]: array([(1, 2), (2, 4)], dtype=object) - New Behavior: + In [7]: mi.map(lambda x: x) + Out[7]: array([(1, 2), (2, 4)], dtype=object) - .. ipython:: python + In [8]: mi.map(lambda x: x[0]) + Out[8]: array([1, 2]) - idx.map(lambda x: x * 2) +New Behavior: - idx.map(lambda x: (x, x * 2)) +.. ipython:: python - mi.map(lambda x: x) + idx.map(lambda x: x * 2) + idx.map(lambda x: (x, x * 2)) - mi.map(lambda x: x[0]) + mi.map(lambda x: x) + mi.map(lambda x: x[0]) -- ``map`` on a ``Series`` with ``datetime64`` values may return ``int64`` dtypes rather than ``int32`` - .. ipython:: python +``map`` on a ``Series`` with ``datetime64`` values may return ``int64`` dtypes rather than ``int32`` - s = Series(date_range('2011-01-02T00:00', '2011-01-02T02:00', freq='H').tz_localize('Asia/Tokyo')) - s +.. ipython:: python - Previous Behavior: + s = Series(date_range('2011-01-02T00:00', '2011-01-02T02:00', freq='H').tz_localize('Asia/Tokyo')) + s - .. code-block:: ipython +Previous Behavior: - In [9]: s.map(lambda x: x.hour) - Out[9]: - 0 0 - 1 1 - 2 2 - dtype: int32 +.. code-block:: ipython + In [9]: s.map(lambda x: x.hour) + Out[9]: + 0 0 + 1 1 + 2 2 + dtype: int32 - New Behavior: +New Behavior: - .. ipython:: python +.. ipython:: python - s.map(lambda x: x.hour) + s.map(lambda x: x.hour) .. _whatsnew_0200.api_breaking.s3: @@ -443,8 +441,8 @@ Pandas Google BigQuery support has moved ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ pandas has split off Google BigQuery support into a separate package ``pandas-gbq``. You can ``pip install pandas-gbq`` to get it. -The functionality of ``pd.read_gbq()`` and ``.to_gbq()`` remains the same with the currently released version of ``pandas-gbq=0.1.3``. (:issue:`15347`) -Documentation is now hosted `here `__ +The functionality of :func:`read_gbq` and :meth:`DataFrame.to_gbq` remain the same with the currently released version of ``pandas-gbq=0.1.3``. +Documentation is now hosted `here `__ (:issue:`15347`) .. _whatsnew_0200.api_breaking.memory_usage: @@ -667,7 +665,7 @@ Performance Improvements - Improved performance of ``groupby().cummin()`` and ``groupby().cummax()`` (:issue:`15048`, :issue:`15109`, :issue:`15561`) - Improved performance and reduced memory when indexing with a ``MultiIndex`` (:issue:`15245`) - When reading buffer object in ``read_sas()`` method without specified format, filepath string is inferred rather than buffer object. (:issue:`14947`) -- Improved performance of `rank()` for categorical data (:issue:`15498`) +- Improved performance of ``.rank()`` for categorical data (:issue:`15498`) - Improved performance when using ``.unstack()`` (:issue:`15503`) From 54e71a74c81ecefb55bb35934b75f4cd1fb3ded1 Mon Sep 17 00:00:00 2001 From: DaanVanHauwermeiren Date: Wed, 8 Mar 2017 19:46:50 +0100 Subject: [PATCH 167/933] DOC: fix link to offset strings in resample method (#15619) --- pandas/core/generic.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index ff58a2aa77447..c45cf57152599 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -4360,6 +4360,8 @@ def resample(self, rule, how=None, axis=0, fill_method=None, closed=None, .. versionadded:: 0.19.0 + Notes + ----- To learn more about the offset strings, please see `this link `__. From 1a75f495271dd2e8ab55065ccc5594ee0469a17d Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 9 Mar 2017 08:51:09 +0100 Subject: [PATCH 168/933] DOC: make it possible to run doctests (#15626) --- pandas/conftest.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/pandas/conftest.py b/pandas/conftest.py index 623feb99e9cdc..e0a15f740688b 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -1,5 +1,8 @@ import pytest +import numpy +import pandas + def pytest_addoption(parser): parser.addoption("--skip-slow", action="store_true", @@ -19,3 +22,11 @@ def pytest_runtest_setup(item): if 'network' in item.keywords and item.config.getoption("--skip-network"): pytest.skip("skipping due to --skip-network") + + +# For running doctests: make np and pd names available + +@pytest.fixture(autouse=True) +def add_imports(doctest_namespace): + doctest_namespace['np'] = numpy + doctest_namespace['pd'] = pandas From 2229c26442ea28b7d69819e0b52b9bbc45afae4a Mon Sep 17 00:00:00 2001 From: DaanVanHauwermeiren Date: Thu, 9 Mar 2017 08:57:27 +0100 Subject: [PATCH 169/933] DOC: add example for DataFrame.resample: keywords on and level (#15627) --- pandas/core/generic.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index c45cf57152599..84a48c9be8fd9 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -4462,6 +4462,30 @@ def resample(self, rule, how=None, axis=0, fill_method=None, closed=None, 2000-01-01 00:06:00 26 Freq: 3T, dtype: int64 + For DataFrame objects, the keyword ``on`` can be used to specify the + column instead of the index for resampling. + + >>> df = pd.DataFrame(data=9*[range(4)], columns=['a', 'b', 'c', 'd']) + >>> df['time'] = pd.date_range('1/1/2000', periods=9, freq='T') + >>> df.resample('3T', on='time').sum() + a b c d + time + 2000-01-01 00:00:00 0 3 6 9 + 2000-01-01 00:03:00 0 3 6 9 + 2000-01-01 00:06:00 0 3 6 9 + + For a DataFrame with MultiIndex, the keyword ``level`` can be used to + specify on level the resampling needs to take place. + + >>> time = pd.date_range('1/1/2000', periods=5, freq='T') + >>> df2 = pd.DataFrame(data=10*[range(4)], + columns=['a', 'b', 'c', 'd'], + index=pd.MultiIndex.from_product([time, [1, 2]]) + ) + >>> df2.resample('3T', level=0).sum() + a b c d + 2000-01-01 00:00:00 0 6 12 18 + 2000-01-01 00:03:00 0 4 8 12 """ from pandas.tseries.resample import (resample, _maybe_process_deprecations) From a1d3ff3e3ec407915adb9d37107cd64a2028dd76 Mon Sep 17 00:00:00 2001 From: Michiel Stock Date: Thu, 9 Mar 2017 09:21:30 +0100 Subject: [PATCH 170/933] DOC: resolved mistakes in examples series (#15625) --- pandas/core/generic.py | 11 ++++--- pandas/core/series.py | 71 +++++++++++++++++++++++++++++------------- 2 files changed, 56 insertions(+), 26 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 84a48c9be8fd9..606906bfcd7c4 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -668,6 +668,7 @@ def swaplevel(self, i=-2, j=-1, axis=0): dtype: int64 >>> df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) >>> df.rename(2) + Traceback (most recent call last): ... TypeError: 'int' object is not callable >>> df.rename(index=str, columns={"A": "a", "B": "c"}) @@ -1115,7 +1116,7 @@ def __setstate__(self, state): to the existing workbook. This can be used to save different DataFrames to one workbook: - >>> writer = ExcelWriter('output.xlsx') + >>> writer = pd.ExcelWriter('output.xlsx') >>> df1.to_excel(writer,'Sheet1') >>> df2.to_excel(writer,'Sheet2') >>> writer.save() @@ -2260,7 +2261,7 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False, ... 'response_time': [0.04, 0.02, 0.07, 0.08, 1.0]}, ... index=index) >>> df - http_status response_time + http_status response_time Firefox 200 0.04 Chrome 200 0.02 Safari 404 0.07 @@ -2275,11 +2276,11 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False, ... 'Chrome'] >>> df.reindex(new_index) http_status response_time - Safari 404 0.07 + Safari 404.0 0.07 Iceweasel NaN NaN Comodo Dragon NaN NaN - IE10 404 0.08 - Chrome 200 0.02 + IE10 404.0 0.08 + Chrome 200.0 0.02 We can fill in the missing values by passing a value to the keyword ``fill_value``. Because the index is not monotonically diff --git a/pandas/core/series.py b/pandas/core/series.py index f23e90effdabf..cfa25ca1299eb 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -369,10 +369,10 @@ def values(self): Timezone aware datetime data is converted to UTC: >>> pd.Series(pd.date_range('20130101', periods=3, - tz='US/Eastern')).values - array(['2013-01-01T00:00:00.000000000-0500', - '2013-01-02T00:00:00.000000000-0500', - '2013-01-03T00:00:00.000000000-0500'], dtype='datetime64[ns]') + ... tz='US/Eastern')).values + array(['2013-01-01T05:00:00.000000000', + '2013-01-02T05:00:00.000000000', + '2013-01-03T05:00:00.000000000'], dtype='datetime64[ns]') """ return self._data.external_values() @@ -1550,6 +1550,8 @@ def append(self, to_append, ignore_index=False, verify_integrity=False): With `verify_integrity` set to True: >>> s1.append(s2, verify_integrity=True) + Traceback (most recent call last): + ... ValueError: Indexes have overlapping values: [0, 1, 2] @@ -1919,8 +1921,19 @@ def nlargest(self, n=5, keep='first'): -------- >>> import pandas as pd >>> import numpy as np - >>> s = pd.Series(np.random.randn(1e6)) + >>> s = pd.Series(np.random.randn(10**6)) >>> s.nlargest(10) # only sorts up to the N requested + 219921 4.644710 + 82124 4.608745 + 421689 4.564644 + 425277 4.447014 + 718691 4.414137 + 43154 4.403520 + 283187 4.313922 + 595519 4.273635 + 503969 4.250236 + 121637 4.240952 + dtype: float64 """ return algorithms.select_n_series(self, n=n, keep=keep, method='nlargest') @@ -1958,8 +1971,19 @@ def nsmallest(self, n=5, keep='first'): -------- >>> import pandas as pd >>> import numpy as np - >>> s = pd.Series(np.random.randn(1e6)) + >>> s = pd.Series(np.random.randn(10**6)) >>> s.nsmallest(10) # only sorts up to the N requested + 288532 -4.954580 + 732345 -4.835960 + 64803 -4.812550 + 446457 -4.609998 + 501225 -4.483945 + 669476 -4.472935 + 973615 -4.401699 + 621279 -4.355126 + 773916 -4.347355 + 359919 -4.331927 + dtype: float64 """ return algorithms.select_n_series(self, n=n, keep=keep, method='nsmallest') @@ -2052,21 +2076,24 @@ def unstack(self, level=-1, fill_value=None): Examples -------- + >>> s = pd.Series([1, 2, 3, 4], + ... index=pd.MultiIndex.from_product([['one', 'two'], ['a', 'b']])) >>> s - one a 1. - one b 2. - two a 3. - two b 4. + one a 1 + b 2 + two a 3 + b 4 + dtype: int64 >>> s.unstack(level=-1) - a b - one 1. 2. - two 3. 4. + a b + one 1 2 + two 3 4 >>> s.unstack(level=0) one two - a 1. 2. - b 3. 4. + a 1 3 + b 2 4 Returns ------- @@ -2102,15 +2129,16 @@ def map(self, arg, na_action=None): >>> x = pd.Series([1,2,3], index=['one', 'two', 'three']) >>> x - one 1 - two 2 - three 3 + one 1 + two 2 + three 3 + dtype: int64 >>> y = pd.Series(['foo', 'bar', 'baz'], index=[1,2,3]) >>> y - 1 foo - 2 bar - 3 baz + 1 foo + 2 bar + 3 baz >>> x.map(y) one foo @@ -2215,6 +2243,7 @@ def apply(self, func, convert_dtype=True, args=(), **kwds): >>> import numpy as np >>> series = pd.Series([20, 21, 12], index=['London', ... 'New York','Helsinki']) + >>> series London 20 New York 21 Helsinki 12 From ae0a92a68b985e845465a11a8fb0ec589001d6a9 Mon Sep 17 00:00:00 2001 From: mcocdawc Date: Thu, 9 Mar 2017 11:58:48 +0100 Subject: [PATCH 171/933] ENH: to_string/to_latex now accept list-like header arg for overwriting column names (#15548) closes #15536 --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/core/frame.py | 5 +++ pandas/formats/format.py | 38 ++++++++++++---------- pandas/tests/formats/test_format.py | 11 +++++++ pandas/tests/formats/test_to_latex.py | 45 +++++++++++++++++++++++++++ 5 files changed, 84 insertions(+), 16 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 34358a193b360..ad7571662b8f4 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -227,6 +227,7 @@ Other enhancements - ``pd.TimedeltaIndex`` now has a custom datetick formatter specifically designed for nanosecond level precision (:issue:`8711`) - ``pd.types.concat.union_categoricals`` gained the ``ignore_ordered`` argument to allow ignoring the ordered attribute of unioned categoricals (:issue:`13410`). See the :ref:`categorical union docs ` for more information. - ``pandas.io.json.json_normalize()`` with an empty ``list`` will return an empty ``DataFrame`` (:issue:`15534`) +- ``pd.DataFrame.to_latex`` and ``pd.DataFrame.to_string`` now allow optional header aliases. (:issue:`15536`) .. _ISO 8601 duration: https://en.wikipedia.org/wiki/ISO_8601#Durations diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 4e7a5ebdf6f67..2062f301b9e0e 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1516,6 +1516,8 @@ def to_feather(self, fname): from pandas.io.feather_format import to_feather to_feather(self, fname) + @Substitution(header='Write out column names. If a list of string is given, \ +it is assumed to be aliases for the column names') @Appender(fmt.docstring_to_string, indents=1) def to_string(self, buf=None, columns=None, col_space=None, header=True, index=True, na_rep='NaN', formatters=None, float_format=None, @@ -1543,6 +1545,7 @@ def to_string(self, buf=None, columns=None, col_space=None, header=True, result = formatter.buf.getvalue() return result + @Substitution(header='whether to print column labels, default True') @Appender(fmt.docstring_to_string, indents=1) def to_html(self, buf=None, columns=None, col_space=None, header=True, index=True, na_rep='NaN', formatters=None, float_format=None, @@ -1596,6 +1599,8 @@ def to_html(self, buf=None, columns=None, col_space=None, header=True, if buf is None: return formatter.buf.getvalue() + @Substitution(header='Write out column names. If a list of string is given, \ +it is assumed to be aliases for the column names.') @Appender(fmt.common_docstring + fmt.return_docstring, indents=1) def to_latex(self, buf=None, columns=None, col_space=None, header=True, index=True, na_rep='NaN', formatters=None, float_format=None, diff --git a/pandas/formats/format.py b/pandas/formats/format.py index d354911a825bc..2665f5aea145d 100644 --- a/pandas/formats/format.py +++ b/pandas/formats/format.py @@ -20,9 +20,9 @@ is_float, is_numeric_dtype, is_datetime64_dtype, - is_timedelta64_dtype) + is_timedelta64_dtype, + is_list_like) from pandas.types.generic import ABCSparseArray - from pandas.core.base import PandasObject from pandas.core.index import Index, MultiIndex, _ensure_index from pandas import compat @@ -54,7 +54,7 @@ col_space : int, optional the minimum width of each column header : bool, optional - whether to print column labels, default True + %(header)s index : bool, optional whether to print index (row) labels, default True na_rep : string, optional @@ -488,32 +488,38 @@ def _to_str_columns(self): # may include levels names also str_index = self._get_formatted_index(frame) - str_columns = self._get_formatted_column_labels(frame) - if self.header: + if not is_list_like(self.header) and not self.header: stringified = [] for i, c in enumerate(frame): - cheader = str_columns[i] - max_colwidth = max(self.col_space or 0, *(self.adj.len(x) - for x in cheader)) fmt_values = self._format_col(i) fmt_values = _make_fixed_width(fmt_values, self.justify, - minimum=max_colwidth, + minimum=(self.col_space or 0), adj=self.adj) - - max_len = max(np.max([self.adj.len(x) for x in fmt_values]), - max_colwidth) - cheader = self.adj.justify(cheader, max_len, mode=self.justify) - stringified.append(cheader + fmt_values) + stringified.append(fmt_values) else: + if is_list_like(self.header): + if len(self.header) != len(self.columns): + raise ValueError(('Writing %d cols but got %d aliases' + % (len(self.columns), len(self.header)))) + str_columns = [[label] for label in self.header] + else: + str_columns = self._get_formatted_column_labels(frame) + stringified = [] for i, c in enumerate(frame): + cheader = str_columns[i] + header_colwidth = max(self.col_space or 0, + *(self.adj.len(x) for x in cheader)) fmt_values = self._format_col(i) fmt_values = _make_fixed_width(fmt_values, self.justify, - minimum=(self.col_space or 0), + minimum=header_colwidth, adj=self.adj) - stringified.append(fmt_values) + max_len = max(np.max([self.adj.len(x) for x in fmt_values]), + header_colwidth) + cheader = self.adj.justify(cheader, max_len, mode=self.justify) + stringified.append(cheader + fmt_values) strcols = stringified if self.index: diff --git a/pandas/tests/formats/test_format.py b/pandas/tests/formats/test_format.py index ddf9d35841ce7..b1f163ccf9429 100644 --- a/pandas/tests/formats/test_format.py +++ b/pandas/tests/formats/test_format.py @@ -1125,6 +1125,17 @@ def test_to_string_no_header(self): self.assertEqual(df_s, expected) + def test_to_string_specified_header(self): + df = DataFrame({'x': [1, 2, 3], 'y': [4, 5, 6]}) + + df_s = df.to_string(header=['X', 'Y']) + expected = ' X Y\n0 1 4\n1 2 5\n2 3 6' + + self.assertEqual(df_s, expected) + + with tm.assertRaises(ValueError): + df.to_string(header=['X']) + def test_to_string_no_index(self): df = DataFrame({'x': [1, 2, 3], 'y': [4, 5, 6]}) diff --git a/pandas/tests/formats/test_to_latex.py b/pandas/tests/formats/test_to_latex.py index 17e1e18f03dd6..29ead83f3bcd9 100644 --- a/pandas/tests/formats/test_to_latex.py +++ b/pandas/tests/formats/test_to_latex.py @@ -428,6 +428,51 @@ def test_to_latex_no_header(self): assert withoutindex_result == withoutindex_expected + def test_to_latex_specified_header(self): + # GH 7124 + df = DataFrame({'a': [1, 2], 'b': ['b1', 'b2']}) + withindex_result = df.to_latex(header=['AA', 'BB']) + withindex_expected = r"""\begin{tabular}{lrl} +\toprule +{} & AA & BB \\ +\midrule +0 & 1 & b1 \\ +1 & 2 & b2 \\ +\bottomrule +\end{tabular} +""" + + assert withindex_result == withindex_expected + + withoutindex_result = df.to_latex(header=['AA', 'BB'], index=False) + withoutindex_expected = r"""\begin{tabular}{rl} +\toprule +AA & BB \\ +\midrule + 1 & b1 \\ + 2 & b2 \\ +\bottomrule +\end{tabular} +""" + + assert withoutindex_result == withoutindex_expected + + withoutescape_result = df.to_latex(header=['$A$', '$B$'], escape=False) + withoutescape_expected = r"""\begin{tabular}{lrl} +\toprule +{} & $A$ & $B$ \\ +\midrule +0 & 1 & b1 \\ +1 & 2 & b2 \\ +\bottomrule +\end{tabular} +""" + + assert withoutescape_result == withoutescape_expected + + with tm.assertRaises(ValueError): + df.to_latex(header=['A']) + def test_to_latex_decimal(self, frame): # GH 12031 frame.to_latex() From 27b0ba70c7a62965af1f669f91162f01a2c7e2f5 Mon Sep 17 00:00:00 2001 From: Jim Date: Thu, 9 Mar 2017 13:05:47 +0100 Subject: [PATCH 172/933] DOC: add documentation to IndexSlice (#15623) --- doc/source/api.rst | 1 + pandas/core/indexing.py | 30 ++++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+) diff --git a/doc/source/api.rst b/doc/source/api.rst index fbce64df84859..f126e478f424d 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -1405,6 +1405,7 @@ MultiIndex :toctree: generated/ MultiIndex + IndexSlice MultiIndex Components ~~~~~~~~~~~~~~~~~~~~~~ diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 6f490875742ca..546cbd8337e7e 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -43,6 +43,36 @@ def get_indexers_list(): # the public IndexSlicerMaker class _IndexSlice(object): + """ + Create an object to more easily perform multi-index slicing + + Examples + -------- + + >>> midx = pd.MultiIndex.from_product([['A0','A1'], ['B0','B1','B2','B3']]) + >>> columns = ['foo', 'bar'] + >>> dfmi = pd.DataFrame(np.arange(16).reshape((len(midx), len(columns))), + index=midx, columns=columns) + + Using the default slice command: + + >>> dfmi.loc[(slice(None), slice('B0', 'B1')), :] + foo bar + A0 B0 0 1 + B1 2 3 + A1 B0 8 9 + B1 10 11 + + Using the IndexSlice class for a more intuitive command: + + >>> idx = pd.IndexSlice + >>> dfmi.loc[idx[:, 'B0':'B1'], :] + foo bar + A0 B0 0 1 + B1 2 3 + A1 B0 8 9 + B1 10 11 + """ def __getitem__(self, arg): return arg From df6783f68df903a58c65dc45857ef4e16440f9ee Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Thu, 9 Mar 2017 09:08:17 -0500 Subject: [PATCH 173/933] DOC: remove to_gbq from api.rst as not directly callable (DataFrame.to_gbq) is the entry point --- doc/source/api.rst | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/doc/source/api.rst b/doc/source/api.rst index f126e478f424d..7e297a15055a0 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -118,7 +118,6 @@ Google BigQuery :toctree: generated/ read_gbq - to_gbq .. currentmodule:: pandas @@ -1237,7 +1236,7 @@ Serialization / IO / Conversion Panel.to_frame Panel.to_xarray Panel.to_clipboard - + .. _api.index: Index From a4bba287d40e5e362a20543a302b09ec90254c5f Mon Sep 17 00:00:00 2001 From: chaimdemulder Date: Thu, 9 Mar 2017 15:26:56 +0100 Subject: [PATCH 174/933] DOC: use mathjax on sphinx - #15469 Exponentially Weighed Windows pages now shows formulas (#15618) --- doc/source/conf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/conf.py b/doc/source/conf.py index 1e82dfca87d17..6840f76866d2c 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -46,7 +46,7 @@ 'ipython_sphinxext.ipython_console_highlighting', 'sphinx.ext.intersphinx', 'sphinx.ext.coverage', - 'sphinx.ext.pngmath', + 'sphinx.ext.mathjax', 'sphinx.ext.ifconfig', 'sphinx.ext.linkcode', ] From 0cfc95055ca78ae0ba5189dd84f9319d175586a8 Mon Sep 17 00:00:00 2001 From: goldenbull Date: Thu, 9 Mar 2017 09:43:14 -0500 Subject: [PATCH 175/933] ENH: add compression support for 'read_pickle' and 'to_pickle' closes #11666 Author: goldenbull Author: Chen Jinniu Closes #13317 from goldenbull/pickle_io_compression and squashes the following commits: e9c5fd2 [goldenbull] docs update d50e430 [goldenbull] update docs. re-write all tests to avoid round-trip read/write comparison. 86afd25 [goldenbull] change test to new pytest parameterized style 945e7bb [goldenbull] Merge remote-tracking branch 'origin/master' into pickle_io_compression ccbeaa9 [goldenbull] move pickle compression tests into a new class 9a07250 [goldenbull] Remove prepared compressed data. _get_handle will take care of compressed I/O 1cb810b [goldenbull] add zip decompression support. refactor using lambda. b8c4175 [goldenbull] add compressed pickle data file to io/tests 6df6611 [goldenbull] pickle compression code update 81d55a0 [Chen Jinniu] Merge branch 'master' into pickle_io_compression 025a0cd [goldenbull] add compression support for pickle --- doc/source/io.rst | 39 +++++++ doc/source/whatsnew/v0.20.0.txt | 34 ++++++ pandas/core/generic.py | 8 +- pandas/io/common.py | 14 ++- pandas/io/pickle.py | 52 +++++++-- pandas/tests/io/test_pickle.py | 196 +++++++++++++++++++++++++++++++- 6 files changed, 324 insertions(+), 19 deletions(-) diff --git a/doc/source/io.rst b/doc/source/io.rst index fa57d6d692152..67491c8b30de7 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -3046,6 +3046,45 @@ any pickled pandas object (or any other pickled object) from file: These methods were previously ``pd.save`` and ``pd.load``, prior to 0.12.0, and are now deprecated. +.. _io.pickle.compression: + +Read/Write compressed pickle files +'''''''''''''' + +.. versionadded:: 0.20.0 + +:func:`read_pickle`, :meth:`DataFame.to_pickle` and :meth:`Series.to_pickle` can read +and write compressed pickle files. Compression types of ``gzip``, ``bz2``, ``xz`` supports +both read and write. ``zip`` file supports read only and must contain only one data file +to be read in. +Compression type can be an explicitely parameter or be inferred from the file extension. +If 'infer', then use ``gzip``, ``bz2``, ``zip``, or ``xz`` if filename ends in ``'.gz'``, ``'.bz2'``, ``'.zip'``, or +``'.xz'``, respectively. + +.. ipython:: python + + df = pd.DataFrame({ + 'A': np.random.randn(1000), + 'B': np.random.randn(1000), + 'C': np.random.randn(1000)}) + df.to_pickle("data.pkl.compress", compression="gzip") # explicit compression type + df.to_pickle("data.pkl.xz", compression="infer") # infer compression type from extension + df.to_pickle("data.pkl.gz") # default, using "infer" + df["A"].to_pickle("s1.pkl.bz2") + + df = pd.read_pickle("data.pkl.compress", compression="gzip") + df = pd.read_pickle("data.pkl.xz", compression="infer") + df = pd.read_pickle("data.pkl.gz") + s = pd.read_pickle("s1.pkl.bz2") + +.. ipython:: python + :suppress: + import os + os.remove("data.pkl.compress") + os.remove("data.pkl.xz") + os.remove("data.pkl.gz") + os.remove("s1.pkl.bz2") + .. _io.msgpack: msgpack diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index ad7571662b8f4..4b320d21fe738 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -94,6 +94,40 @@ support for bz2 compression in the python 2 c-engine improved (:issue:`14874`). df = pd.read_table(url, compression='bz2') # explicitly specify compression df.head(2) +.. _whatsnew_0200.enhancements.pickle_compression: + +Pickle file I/O now supports compression +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:func:`read_pickle`, :meth:`DataFame.to_pickle` and :meth:`Series.to_pickle` +can now read from and write to compressed pickle files. Compression methods +can be an explicit parameter or be inferred from the file extension. +See :ref:`Read/Write compressed pickle files ` + +.. ipython:: python + + df = pd.DataFrame({ + 'A': np.random.randn(1000), + 'B': np.random.randn(1000), + 'C': np.random.randn(1000)}) + df.to_pickle("data.pkl.compress", compression="gzip") # explicit compression type + df.to_pickle("data.pkl.xz", compression="infer") # infer compression type from extension + df.to_pickle("data.pkl.gz") # default, using "infer" + df["A"].to_pickle("s1.pkl.bz2") + + df = pd.read_pickle("data.pkl.compress", compression="gzip") + df = pd.read_pickle("data.pkl.xz", compression="infer") + df = pd.read_pickle("data.pkl.gz") + s = pd.read_pickle("s1.pkl.bz2") + +.. ipython:: python + :suppress: + import os + os.remove("data.pkl.compress") + os.remove("data.pkl.xz") + os.remove("data.pkl.gz") + os.remove("s1.pkl.bz2") + .. _whatsnew_0200.enhancements.uint64_support: UInt64 Support Improved diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 606906bfcd7c4..a0111cb9ef7ec 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1355,7 +1355,7 @@ def to_sql(self, name, con, flavor=None, schema=None, if_exists='fail', if_exists=if_exists, index=index, index_label=index_label, chunksize=chunksize, dtype=dtype) - def to_pickle(self, path): + def to_pickle(self, path, compression='infer'): """ Pickle (serialize) object to input file path. @@ -1363,9 +1363,13 @@ def to_pickle(self, path): ---------- path : string File path + compression : {'infer', 'gzip', 'bz2', 'xz', None}, default 'infer' + a string representing the compression to use in the output file + + .. versionadded:: 0.20.0 """ from pandas.io.pickle import to_pickle - return to_pickle(self, path) + return to_pickle(self, path, compression=compression) def to_clipboard(self, excel=None, sep=None, **kwargs): """ diff --git a/pandas/io/common.py b/pandas/io/common.py index 74c51b74ca18a..e42d218d7925f 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -305,7 +305,7 @@ def _infer_compression(filepath_or_buffer, compression): def _get_handle(path_or_buf, mode, encoding=None, compression=None, - memory_map=False): + memory_map=False, is_text=True): """ Get file handle for given path/buffer and mode. @@ -320,7 +320,9 @@ def _get_handle(path_or_buf, mode, encoding=None, compression=None, Supported compression protocols are gzip, bz2, zip, and xz memory_map : boolean, default False See parsers._parser_params for more information. - + is_text : boolean, default True + whether file/buffer is in text format (csv, json, etc.), or in binary + mode (pickle, etc.) Returns ------- f : file-like @@ -394,13 +396,17 @@ def _get_handle(path_or_buf, mode, encoding=None, compression=None, elif encoding: # Python 3 and encoding f = open(path_or_buf, mode, encoding=encoding) - else: + elif is_text: # Python 3 and no explicit encoding f = open(path_or_buf, mode, errors='replace') + else: + # Python 3 and binary mode + f = open(path_or_buf, mode) handles.append(f) # in Python 3, convert BytesIO or fileobjects passed with an encoding - if compat.PY3 and (compression or isinstance(f, need_text_wrapping)): + if compat.PY3 and is_text and\ + (compression or isinstance(f, need_text_wrapping)): from io import TextIOWrapper f = TextIOWrapper(f, encoding=encoding) handles.append(f) diff --git a/pandas/io/pickle.py b/pandas/io/pickle.py index 2358c296f782e..969a2a51cb15d 100644 --- a/pandas/io/pickle.py +++ b/pandas/io/pickle.py @@ -4,9 +4,10 @@ from numpy.lib.format import read_array, write_array from pandas.compat import BytesIO, cPickle as pkl, pickle_compat as pc, PY3 from pandas.types.common import is_datetime64_dtype, _NS_DTYPE +from pandas.io.common import _get_handle, _infer_compression -def to_pickle(obj, path): +def to_pickle(obj, path, compression='infer'): """ Pickle (serialize) object to input file path @@ -15,12 +16,23 @@ def to_pickle(obj, path): obj : any object path : string File path + compression : {'infer', 'gzip', 'bz2', 'xz', None}, default 'infer' + a string representing the compression to use in the output file + + .. versionadded:: 0.20.0 """ - with open(path, 'wb') as f: + inferred_compression = _infer_compression(path, compression) + f, fh = _get_handle(path, 'wb', + compression=inferred_compression, + is_text=False) + try: pkl.dump(obj, f, protocol=pkl.HIGHEST_PROTOCOL) + finally: + for _f in fh: + _f.close() -def read_pickle(path): +def read_pickle(path, compression='infer'): """ Load pickled pandas object (or any other pickled object) from the specified file path @@ -32,12 +44,32 @@ def read_pickle(path): ---------- path : string File path + compression : {'infer', 'gzip', 'bz2', 'xz', 'zip', None}, default 'infer' + For on-the-fly decompression of on-disk data. If 'infer', then use + gzip, bz2, xz or zip if path is a string ending in '.gz', '.bz2', 'xz', + or 'zip' respectively, and no decompression otherwise. + Set to None for no decompression. + + .. versionadded:: 0.20.0 Returns ------- unpickled : type of object stored in file """ + inferred_compression = _infer_compression(path, compression) + + def read_wrapper(func): + # wrapper file handle open/close operation + f, fh = _get_handle(path, 'rb', + compression=inferred_compression, + is_text=False) + try: + return func(f) + finally: + for _f in fh: + _f.close() + def try_read(path, encoding=None): # try with cPickle # try with current pickle, if we have a Type Error then @@ -48,19 +80,16 @@ def try_read(path, encoding=None): # cpickle # GH 6899 try: - with open(path, 'rb') as fh: - return pkl.load(fh) + return read_wrapper(lambda f: pkl.load(f)) except Exception: # reg/patched pickle try: - with open(path, 'rb') as fh: - return pc.load(fh, encoding=encoding, compat=False) - + return read_wrapper( + lambda f: pc.load(f, encoding=encoding, compat=False)) # compat pickle except: - with open(path, 'rb') as fh: - return pc.load(fh, encoding=encoding, compat=True) - + return read_wrapper( + lambda f: pc.load(f, encoding=encoding, compat=True)) try: return try_read(path) except: @@ -68,6 +97,7 @@ def try_read(path, encoding=None): return try_read(path, encoding='latin1') raise + # compat with sparse pickle / unpickle diff --git a/pandas/tests/io/test_pickle.py b/pandas/tests/io/test_pickle.py index c736ec829808a..2fffc3c39ec26 100644 --- a/pandas/tests/io/test_pickle.py +++ b/pandas/tests/io/test_pickle.py @@ -15,15 +15,14 @@ import pytest import os - from distutils.version import LooseVersion - import pandas as pd from pandas import Index from pandas.compat import is_platform_little_endian import pandas import pandas.util.testing as tm from pandas.tseries.offsets import Day, MonthEnd +import shutil @pytest.fixture(scope='module') @@ -302,3 +301,196 @@ def test_pickle_v0_15_2(): # with open(pickle_path, 'wb') as f: pickle.dump(cat, f) # tm.assert_categorical_equal(cat, pd.read_pickle(pickle_path)) + + +# --------------------- +# test pickle compression +# --------------------- +_compression_to_extension = { + None: ".none", + 'gzip': '.gz', + 'bz2': '.bz2', + 'zip': '.zip', + 'xz': '.xz', +} + + +def get_random_path(): + return u'__%s__.pickle' % tm.rands(10) + + +def compress_file(src_path, dest_path, compression): + if compression is None: + shutil.copyfile(src_path, dest_path) + return + + if compression == 'gzip': + import gzip + f = gzip.open(dest_path, "w") + elif compression == 'bz2': + import bz2 + f = bz2.BZ2File(dest_path, "w") + elif compression == 'zip': + import zipfile + zip_file = zipfile.ZipFile(dest_path, "w", + compression=zipfile.ZIP_DEFLATED) + zip_file.write(src_path, os.path.basename(src_path)) + elif compression == 'xz': + lzma = pandas.compat.import_lzma() + f = lzma.LZMAFile(dest_path, "w") + else: + msg = 'Unrecognized compression type: {}'.format(compression) + raise ValueError(msg) + + if compression != "zip": + f.write(open(src_path, "rb").read()) + f.close() + + +def decompress_file(src_path, dest_path, compression): + if compression is None: + shutil.copyfile(src_path, dest_path) + return + + if compression == 'gzip': + import gzip + f = gzip.open(src_path, "r") + elif compression == 'bz2': + import bz2 + f = bz2.BZ2File(src_path, "r") + elif compression == 'zip': + import zipfile + zip_file = zipfile.ZipFile(src_path) + zip_names = zip_file.namelist() + if len(zip_names) == 1: + f = zip_file.open(zip_names.pop()) + else: + raise ValueError('ZIP file {} error. Only one file per ZIP.' + .format(src_path)) + elif compression == 'xz': + lzma = pandas.compat.import_lzma() + f = lzma.LZMAFile(src_path, "r") + else: + msg = 'Unrecognized compression type: {}'.format(compression) + raise ValueError(msg) + + open(dest_path, "wb").write(f.read()) + f.close() + + +@pytest.mark.parametrize('compression', [None, 'gzip', 'bz2', 'xz']) +def test_write_explicit(compression): + # issue 11666 + if compression == 'xz': + tm._skip_if_no_lzma() + + base = get_random_path() + path1 = base + ".compressed" + path2 = base + ".raw" + + with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2: + df = tm.makeDataFrame() + + # write to compressed file + df.to_pickle(p1, compression=compression) + + # decompress + decompress_file(p1, p2, compression=compression) + + # read decompressed file + df2 = pd.read_pickle(p2, compression=None) + + tm.assert_frame_equal(df, df2) + + +@pytest.mark.parametrize('compression', ['', 'None', 'bad', '7z']) +def test_write_explicit_bad(compression): + with tm.assertRaisesRegexp(ValueError, + "Unrecognized compression type"): + with tm.ensure_clean(get_random_path()) as path: + df = tm.makeDataFrame() + df.to_pickle(path, compression=compression) + + +@pytest.mark.parametrize('ext', ['', '.gz', '.bz2', '.xz', '.no_compress']) +def test_write_infer(ext): + if ext == '.xz': + tm._skip_if_no_lzma() + + base = get_random_path() + path1 = base + ext + path2 = base + ".raw" + compression = None + for c in _compression_to_extension: + if _compression_to_extension[c] == ext: + compression = c + break + + with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2: + df = tm.makeDataFrame() + + # write to compressed file by inferred compression method + df.to_pickle(p1) + + # decompress + decompress_file(p1, p2, compression=compression) + + # read decompressed file + df2 = pd.read_pickle(p2, compression=None) + + tm.assert_frame_equal(df, df2) + + +@pytest.mark.parametrize('compression', [None, 'gzip', 'bz2', 'xz', "zip"]) +def test_read_explicit(compression): + # issue 11666 + if compression == 'xz': + tm._skip_if_no_lzma() + + base = get_random_path() + path1 = base + ".raw" + path2 = base + ".compressed" + + with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2: + df = tm.makeDataFrame() + + # write to uncompressed file + df.to_pickle(p1, compression=None) + + # compress + compress_file(p1, p2, compression=compression) + + # read compressed file + df2 = pd.read_pickle(p2, compression=compression) + + tm.assert_frame_equal(df, df2) + + +@pytest.mark.parametrize('ext', ['', '.gz', '.bz2', '.xz', '.zip', + '.no_compress']) +def test_read_infer(ext): + if ext == '.xz': + tm._skip_if_no_lzma() + + base = get_random_path() + path1 = base + ".raw" + path2 = base + ext + compression = None + for c in _compression_to_extension: + if _compression_to_extension[c] == ext: + compression = c + break + + with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2: + df = tm.makeDataFrame() + + # write to uncompressed file + df.to_pickle(p1, compression=None) + + # compress + compress_file(p1, p2, compression=compression) + + # read compressed file by inferred compression method + df2 = pd.read_pickle(p2) + + tm.assert_frame_equal(df, df2) From 5667a3ad0489815c1239cba785300952c9799000 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Thu, 9 Mar 2017 09:50:04 -0500 Subject: [PATCH 176/933] TST: fix up compression tests / docs --- doc/source/io.rst | 55 +++--- doc/source/whatsnew/v0.20.0.txt | 40 +++-- pandas/tests/io/test_pickle.py | 289 ++++++++++++++++---------------- 3 files changed, 208 insertions(+), 176 deletions(-) diff --git a/doc/source/io.rst b/doc/source/io.rst index 67491c8b30de7..fdd33ab4625f3 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -3042,22 +3042,19 @@ any pickled pandas object (or any other pickled object) from file: See `this question `__ for a detailed explanation. -.. note:: - - These methods were previously ``pd.save`` and ``pd.load``, prior to 0.12.0, and are now deprecated. - .. _io.pickle.compression: -Read/Write compressed pickle files -'''''''''''''' +Compressed pickle files +''''''''''''''''''''''' .. versionadded:: 0.20.0 :func:`read_pickle`, :meth:`DataFame.to_pickle` and :meth:`Series.to_pickle` can read -and write compressed pickle files. Compression types of ``gzip``, ``bz2``, ``xz`` supports -both read and write. ``zip`` file supports read only and must contain only one data file +and write compressed pickle files. The compression types of ``gzip``, ``bz2``, ``xz`` are supported for reading and writing. +`zip`` file supports read only and must contain only one data file to be read in. -Compression type can be an explicitely parameter or be inferred from the file extension. + +The compression type can be an explicit parameter or be inferred from the file extension. If 'infer', then use ``gzip``, ``bz2``, ``zip``, or ``xz`` if filename ends in ``'.gz'``, ``'.bz2'``, ``'.zip'``, or ``'.xz'``, respectively. @@ -3065,17 +3062,37 @@ If 'infer', then use ``gzip``, ``bz2``, ``zip``, or ``xz`` if filename ends in ` df = pd.DataFrame({ 'A': np.random.randn(1000), - 'B': np.random.randn(1000), - 'C': np.random.randn(1000)}) - df.to_pickle("data.pkl.compress", compression="gzip") # explicit compression type - df.to_pickle("data.pkl.xz", compression="infer") # infer compression type from extension - df.to_pickle("data.pkl.gz") # default, using "infer" - df["A"].to_pickle("s1.pkl.bz2") + 'B': 'foo', + 'C': pd.date_range('20130101', periods=1000, freq='s')}) + df + +Using an explicit compression type + +.. ipython:: python - df = pd.read_pickle("data.pkl.compress", compression="gzip") - df = pd.read_pickle("data.pkl.xz", compression="infer") - df = pd.read_pickle("data.pkl.gz") - s = pd.read_pickle("s1.pkl.bz2") + df.to_pickle("data.pkl.compress", compression="gzip") + rt = pd.read_pickle("data.pkl.compress", compression="gzip") + rt + +Inferring compression type from the extension + +.. ipython:: python + + df.to_pickle("data.pkl.xz", compression="infer") + rt = pd.read_pickle("data.pkl.xz", compression="infer") + rt + +The default is to 'infer + +.. ipython:: python + + df.to_pickle("data.pkl.gz") + rt = pd.read_pickle("data.pkl.gz") + rt + + df["A"].to_pickle("s1.pkl.bz2") + rt = pd.read_pickle("s1.pkl.bz2") + rt .. ipython:: python :suppress: diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 4b320d21fe738..8f671062464f0 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -102,23 +102,41 @@ Pickle file I/O now supports compression :func:`read_pickle`, :meth:`DataFame.to_pickle` and :meth:`Series.to_pickle` can now read from and write to compressed pickle files. Compression methods can be an explicit parameter or be inferred from the file extension. -See :ref:`Read/Write compressed pickle files ` +See :ref:`the docs here ` .. ipython:: python df = pd.DataFrame({ 'A': np.random.randn(1000), - 'B': np.random.randn(1000), - 'C': np.random.randn(1000)}) - df.to_pickle("data.pkl.compress", compression="gzip") # explicit compression type - df.to_pickle("data.pkl.xz", compression="infer") # infer compression type from extension - df.to_pickle("data.pkl.gz") # default, using "infer" - df["A"].to_pickle("s1.pkl.bz2") + 'B': 'foo', + 'C': pd.date_range('20130101', periods=1000, freq='s')}) + +Using an explicit compression type + +.. ipython:: python - df = pd.read_pickle("data.pkl.compress", compression="gzip") - df = pd.read_pickle("data.pkl.xz", compression="infer") - df = pd.read_pickle("data.pkl.gz") - s = pd.read_pickle("s1.pkl.bz2") + df.to_pickle("data.pkl.compress", compression="gzip") + rt = pd.read_pickle("data.pkl.compress", compression="gzip") + rt + +Inferring compression type from the extension + +.. ipython:: python + + df.to_pickle("data.pkl.xz", compression="infer") + rt = pd.read_pickle("data.pkl.xz", compression="infer") + rt + +The default is to 'infer + +.. ipython:: python + + df.to_pickle("data.pkl.gz") + rt = pd.read_pickle("data.pkl.gz") + rt + df["A"].to_pickle("s1.pkl.bz2") + rt = pd.read_pickle("s1.pkl.bz2") + rt .. ipython:: python :suppress: diff --git a/pandas/tests/io/test_pickle.py b/pandas/tests/io/test_pickle.py index 2fffc3c39ec26..91e70e942089c 100644 --- a/pandas/tests/io/test_pickle.py +++ b/pandas/tests/io/test_pickle.py @@ -306,191 +306,188 @@ def test_pickle_v0_15_2(): # --------------------- # test pickle compression # --------------------- -_compression_to_extension = { - None: ".none", - 'gzip': '.gz', - 'bz2': '.bz2', - 'zip': '.zip', - 'xz': '.xz', -} - +@pytest.fixture def get_random_path(): return u'__%s__.pickle' % tm.rands(10) -def compress_file(src_path, dest_path, compression): - if compression is None: - shutil.copyfile(src_path, dest_path) - return - - if compression == 'gzip': - import gzip - f = gzip.open(dest_path, "w") - elif compression == 'bz2': - import bz2 - f = bz2.BZ2File(dest_path, "w") - elif compression == 'zip': - import zipfile - zip_file = zipfile.ZipFile(dest_path, "w", - compression=zipfile.ZIP_DEFLATED) - zip_file.write(src_path, os.path.basename(src_path)) - elif compression == 'xz': - lzma = pandas.compat.import_lzma() - f = lzma.LZMAFile(dest_path, "w") - else: - msg = 'Unrecognized compression type: {}'.format(compression) - raise ValueError(msg) - - if compression != "zip": - f.write(open(src_path, "rb").read()) - f.close() +class TestCompression(object): + _compression_to_extension = { + None: ".none", + 'gzip': '.gz', + 'bz2': '.bz2', + 'zip': '.zip', + 'xz': '.xz', + } -def decompress_file(src_path, dest_path, compression): - if compression is None: - shutil.copyfile(src_path, dest_path) - return + def compress_file(self, src_path, dest_path, compression): + if compression is None: + shutil.copyfile(src_path, dest_path) + return - if compression == 'gzip': - import gzip - f = gzip.open(src_path, "r") - elif compression == 'bz2': - import bz2 - f = bz2.BZ2File(src_path, "r") - elif compression == 'zip': - import zipfile - zip_file = zipfile.ZipFile(src_path) - zip_names = zip_file.namelist() - if len(zip_names) == 1: - f = zip_file.open(zip_names.pop()) + if compression == 'gzip': + import gzip + f = gzip.open(dest_path, "w") + elif compression == 'bz2': + import bz2 + f = bz2.BZ2File(dest_path, "w") + elif compression == 'zip': + import zipfile + zip_file = zipfile.ZipFile(dest_path, "w", + compression=zipfile.ZIP_DEFLATED) + zip_file.write(src_path, os.path.basename(src_path)) + elif compression == 'xz': + lzma = pandas.compat.import_lzma() + f = lzma.LZMAFile(dest_path, "w") else: - raise ValueError('ZIP file {} error. Only one file per ZIP.' - .format(src_path)) - elif compression == 'xz': - lzma = pandas.compat.import_lzma() - f = lzma.LZMAFile(src_path, "r") - else: - msg = 'Unrecognized compression type: {}'.format(compression) - raise ValueError(msg) - - open(dest_path, "wb").write(f.read()) - f.close() + msg = 'Unrecognized compression type: {}'.format(compression) + raise ValueError(msg) + if compression != "zip": + f.write(open(src_path, "rb").read()) + f.close() -@pytest.mark.parametrize('compression', [None, 'gzip', 'bz2', 'xz']) -def test_write_explicit(compression): - # issue 11666 - if compression == 'xz': - tm._skip_if_no_lzma() + def decompress_file(self, src_path, dest_path, compression): + if compression is None: + shutil.copyfile(src_path, dest_path) + return - base = get_random_path() - path1 = base + ".compressed" - path2 = base + ".raw" + if compression == 'gzip': + import gzip + f = gzip.open(src_path, "r") + elif compression == 'bz2': + import bz2 + f = bz2.BZ2File(src_path, "r") + elif compression == 'zip': + import zipfile + zip_file = zipfile.ZipFile(src_path) + zip_names = zip_file.namelist() + if len(zip_names) == 1: + f = zip_file.open(zip_names.pop()) + else: + raise ValueError('ZIP file {} error. Only one file per ZIP.' + .format(src_path)) + elif compression == 'xz': + lzma = pandas.compat.import_lzma() + f = lzma.LZMAFile(src_path, "r") + else: + msg = 'Unrecognized compression type: {}'.format(compression) + raise ValueError(msg) - with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2: - df = tm.makeDataFrame() + open(dest_path, "wb").write(f.read()) + f.close() - # write to compressed file - df.to_pickle(p1, compression=compression) + @pytest.mark.parametrize('compression', [None, 'gzip', 'bz2', 'xz']) + def test_write_explicit(self, compression, get_random_path): + # issue 11666 + if compression == 'xz': + tm._skip_if_no_lzma() - # decompress - decompress_file(p1, p2, compression=compression) + base = get_random_path + path1 = base + ".compressed" + path2 = base + ".raw" - # read decompressed file - df2 = pd.read_pickle(p2, compression=None) + with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2: + df = tm.makeDataFrame() - tm.assert_frame_equal(df, df2) + # write to compressed file + df.to_pickle(p1, compression=compression) + # decompress + self.decompress_file(p1, p2, compression=compression) -@pytest.mark.parametrize('compression', ['', 'None', 'bad', '7z']) -def test_write_explicit_bad(compression): - with tm.assertRaisesRegexp(ValueError, - "Unrecognized compression type"): - with tm.ensure_clean(get_random_path()) as path: - df = tm.makeDataFrame() - df.to_pickle(path, compression=compression) + # read decompressed file + df2 = pd.read_pickle(p2, compression=None) + tm.assert_frame_equal(df, df2) -@pytest.mark.parametrize('ext', ['', '.gz', '.bz2', '.xz', '.no_compress']) -def test_write_infer(ext): - if ext == '.xz': - tm._skip_if_no_lzma() + @pytest.mark.parametrize('compression', ['', 'None', 'bad', '7z']) + def test_write_explicit_bad(self, compression, get_random_path): + with tm.assertRaisesRegexp(ValueError, + "Unrecognized compression type"): + with tm.ensure_clean(get_random_path) as path: + df = tm.makeDataFrame() + df.to_pickle(path, compression=compression) - base = get_random_path() - path1 = base + ext - path2 = base + ".raw" - compression = None - for c in _compression_to_extension: - if _compression_to_extension[c] == ext: - compression = c - break + @pytest.mark.parametrize('ext', ['', '.gz', '.bz2', '.xz', '.no_compress']) + def test_write_infer(self, ext, get_random_path): + if ext == '.xz': + tm._skip_if_no_lzma() - with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2: - df = tm.makeDataFrame() + base = get_random_path + path1 = base + ext + path2 = base + ".raw" + compression = None + for c in self._compression_to_extension: + if self._compression_to_extension[c] == ext: + compression = c + break - # write to compressed file by inferred compression method - df.to_pickle(p1) - - # decompress - decompress_file(p1, p2, compression=compression) + with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2: + df = tm.makeDataFrame() - # read decompressed file - df2 = pd.read_pickle(p2, compression=None) + # write to compressed file by inferred compression method + df.to_pickle(p1) - tm.assert_frame_equal(df, df2) + # decompress + self.decompress_file(p1, p2, compression=compression) + # read decompressed file + df2 = pd.read_pickle(p2, compression=None) -@pytest.mark.parametrize('compression', [None, 'gzip', 'bz2', 'xz', "zip"]) -def test_read_explicit(compression): - # issue 11666 - if compression == 'xz': - tm._skip_if_no_lzma() + tm.assert_frame_equal(df, df2) - base = get_random_path() - path1 = base + ".raw" - path2 = base + ".compressed" + @pytest.mark.parametrize('compression', [None, 'gzip', 'bz2', 'xz', "zip"]) + def test_read_explicit(self, compression, get_random_path): + # issue 11666 + if compression == 'xz': + tm._skip_if_no_lzma() - with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2: - df = tm.makeDataFrame() + base = get_random_path + path1 = base + ".raw" + path2 = base + ".compressed" - # write to uncompressed file - df.to_pickle(p1, compression=None) + with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2: + df = tm.makeDataFrame() - # compress - compress_file(p1, p2, compression=compression) + # write to uncompressed file + df.to_pickle(p1, compression=None) - # read compressed file - df2 = pd.read_pickle(p2, compression=compression) + # compress + self.compress_file(p1, p2, compression=compression) - tm.assert_frame_equal(df, df2) + # read compressed file + df2 = pd.read_pickle(p2, compression=compression) + tm.assert_frame_equal(df, df2) -@pytest.mark.parametrize('ext', ['', '.gz', '.bz2', '.xz', '.zip', - '.no_compress']) -def test_read_infer(ext): - if ext == '.xz': - tm._skip_if_no_lzma() + @pytest.mark.parametrize('ext', ['', '.gz', '.bz2', '.xz', '.zip', + '.no_compress']) + def test_read_infer(self, ext, get_random_path): + if ext == '.xz': + tm._skip_if_no_lzma() - base = get_random_path() - path1 = base + ".raw" - path2 = base + ext - compression = None - for c in _compression_to_extension: - if _compression_to_extension[c] == ext: - compression = c - break + base = get_random_path + path1 = base + ".raw" + path2 = base + ext + compression = None + for c in self._compression_to_extension: + if self._compression_to_extension[c] == ext: + compression = c + break - with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2: - df = tm.makeDataFrame() + with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2: + df = tm.makeDataFrame() - # write to uncompressed file - df.to_pickle(p1, compression=None) + # write to uncompressed file + df.to_pickle(p1, compression=None) - # compress - compress_file(p1, p2, compression=compression) + # compress + self.compress_file(p1, p2, compression=compression) - # read compressed file by inferred compression method - df2 = pd.read_pickle(p2) + # read compressed file by inferred compression method + df2 = pd.read_pickle(p2) - tm.assert_frame_equal(df, df2) + tm.assert_frame_equal(df, df2) From 470c3276479925a198f38f9c0aacd745ef3a64bd Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 10 Mar 2017 00:15:03 +0100 Subject: [PATCH 177/933] DOC: remove latex and parallel building (#15637) --- ci/build_docs.sh | 3 --- doc/make.py | 2 +- doc/source/io.rst | 5 +++-- doc/source/whatsnew/v0.20.0.txt | 1 + 4 files changed, 5 insertions(+), 6 deletions(-) diff --git a/ci/build_docs.sh b/ci/build_docs.sh index 5dc649a91c4f7..bfe7a1eed756b 100755 --- a/ci/build_docs.sh +++ b/ci/build_docs.sh @@ -23,9 +23,6 @@ if [ x"$DOC_BUILD" != x"" ]; then source activate pandas - # install sudo deps - time sudo apt-get $APT_ARGS install dvipng texlive-latex-base texlive-latex-extra - mv "$TRAVIS_BUILD_DIR"/doc /tmp cd /tmp/doc diff --git a/doc/make.py b/doc/make.py index a2f5be5594e44..30cd2ad8b61c9 100755 --- a/doc/make.py +++ b/doc/make.py @@ -197,7 +197,7 @@ def html(): print(e) print("Failed to convert %s" % nb) - if os.system('sphinx-build -j 2 -P -b html -d build/doctrees ' + if os.system('sphinx-build -P -b html -d build/doctrees ' 'source build/html'): raise SystemExit("Building HTML failed.") try: diff --git a/doc/source/io.rst b/doc/source/io.rst index fdd33ab4625f3..a702efdc6aaf9 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -2070,9 +2070,9 @@ by the Table Schema spec. The full list of types supported are described in the Table Schema spec. This table shows the mapping from pandas types: -============== ================= +=============== ================= Pandas type Table Schema type -============== ================= +=============== ================= int64 integer float64 number bool boolean @@ -3096,6 +3096,7 @@ The default is to 'infer .. ipython:: python :suppress: + import os os.remove("data.pkl.compress") os.remove("data.pkl.xz") diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 8f671062464f0..cf3dddc3a2933 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -140,6 +140,7 @@ The default is to 'infer .. ipython:: python :suppress: + import os os.remove("data.pkl.compress") os.remove("data.pkl.xz") From a703abcb6328b105bfa0b30895b8893f7f52f88f Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Fri, 10 Mar 2017 03:24:36 -0500 Subject: [PATCH 178/933] DOC: increase recursion limit on sphinx builds (#15641) --- doc/source/conf.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/doc/source/conf.py b/doc/source/conf.py index 6840f76866d2c..0b0de16411e9b 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -16,6 +16,14 @@ import inspect from pandas.compat import u, PY3 +# https://github.com/sphinx-doc/sphinx/pull/2325/files +# Workaround for sphinx-build recursion limit overflow: +# pickle.dump(doctree, f, pickle.HIGHEST_PROTOCOL) +# RuntimeError: maximum recursion depth exceeded while pickling an object +# +# Python's default allowed recursion depth is 1000. +sys.setrecursionlimit(5000) + # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. From 22038081aa1546a1f269e7393f0b5f0d294283c5 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Fri, 10 Mar 2017 06:20:01 -0500 Subject: [PATCH 179/933] DEPR: remove more .ix warnings from tests Author: Jeff Reback Closes #15638 from jreback/indexing and squashes the following commits: 8b82bd6 [Jeff Reback] CLN: split test_indexing.py 23e82eb [Jeff Reback] DEPR: remove more .ix warnings from tests --- pandas/tests/indexing/common.py | 257 ++ .../indexing/test_chaining_and_caching.py | 96 +- pandas/tests/indexing/test_iloc.py | 590 ++++ pandas/tests/indexing/test_indexing.py | 2811 +---------------- pandas/tests/indexing/test_ix.py | 333 ++ pandas/tests/indexing/test_loc.py | 630 ++++ pandas/tests/indexing/test_multiindex.py | 225 +- pandas/tests/indexing/test_panel.py | 12 +- pandas/tests/indexing/test_partial.py | 587 ++++ pandas/tests/indexing/test_scalar.py | 156 + 10 files changed, 2906 insertions(+), 2791 deletions(-) create mode 100644 pandas/tests/indexing/test_iloc.py create mode 100644 pandas/tests/indexing/test_ix.py create mode 100644 pandas/tests/indexing/test_loc.py create mode 100644 pandas/tests/indexing/test_partial.py create mode 100644 pandas/tests/indexing/test_scalar.py diff --git a/pandas/tests/indexing/common.py b/pandas/tests/indexing/common.py index 73167393cf35d..c7637a00910c6 100644 --- a/pandas/tests/indexing/common.py +++ b/pandas/tests/indexing/common.py @@ -1,5 +1,262 @@ """ common utilities """ +import itertools +from warnings import catch_warnings +import numpy as np + +from pandas.compat import lrange +from pandas.types.common import is_scalar +from pandas import Series, DataFrame, Panel, date_range, UInt64Index +from pandas.util import testing as tm +from pandas.formats.printing import pprint_thing + +_verbose = False + def _mklbl(prefix, n): return ["%s%s" % (prefix, i) for i in range(n)] + + +def _axify(obj, key, axis): + # create a tuple accessor + axes = [slice(None)] * obj.ndim + axes[axis] = key + return tuple(axes) + + +class Base(object): + """ indexing comprehensive base class """ + + _objs = set(['series', 'frame', 'panel']) + _typs = set(['ints', 'uints', 'labels', 'mixed', + 'ts', 'floats', 'empty', 'ts_rev']) + + def setUp(self): + + self.series_ints = Series(np.random.rand(4), index=lrange(0, 8, 2)) + self.frame_ints = DataFrame(np.random.randn(4, 4), + index=lrange(0, 8, 2), + columns=lrange(0, 12, 3)) + self.panel_ints = Panel(np.random.rand(4, 4, 4), + items=lrange(0, 8, 2), + major_axis=lrange(0, 12, 3), + minor_axis=lrange(0, 16, 4)) + + self.series_uints = Series(np.random.rand(4), + index=UInt64Index(lrange(0, 8, 2))) + self.frame_uints = DataFrame(np.random.randn(4, 4), + index=UInt64Index(lrange(0, 8, 2)), + columns=UInt64Index(lrange(0, 12, 3))) + self.panel_uints = Panel(np.random.rand(4, 4, 4), + items=UInt64Index(lrange(0, 8, 2)), + major_axis=UInt64Index(lrange(0, 12, 3)), + minor_axis=UInt64Index(lrange(0, 16, 4))) + + self.series_labels = Series(np.random.randn(4), index=list('abcd')) + self.frame_labels = DataFrame(np.random.randn(4, 4), + index=list('abcd'), columns=list('ABCD')) + self.panel_labels = Panel(np.random.randn(4, 4, 4), + items=list('abcd'), + major_axis=list('ABCD'), + minor_axis=list('ZYXW')) + + self.series_mixed = Series(np.random.randn(4), index=[2, 4, 'null', 8]) + self.frame_mixed = DataFrame(np.random.randn(4, 4), + index=[2, 4, 'null', 8]) + self.panel_mixed = Panel(np.random.randn(4, 4, 4), + items=[2, 4, 'null', 8]) + + self.series_ts = Series(np.random.randn(4), + index=date_range('20130101', periods=4)) + self.frame_ts = DataFrame(np.random.randn(4, 4), + index=date_range('20130101', periods=4)) + self.panel_ts = Panel(np.random.randn(4, 4, 4), + items=date_range('20130101', periods=4)) + + dates_rev = (date_range('20130101', periods=4) + .sort_values(ascending=False)) + self.series_ts_rev = Series(np.random.randn(4), + index=dates_rev) + self.frame_ts_rev = DataFrame(np.random.randn(4, 4), + index=dates_rev) + self.panel_ts_rev = Panel(np.random.randn(4, 4, 4), + items=dates_rev) + + self.frame_empty = DataFrame({}) + self.series_empty = Series({}) + self.panel_empty = Panel({}) + + # form agglomerates + for o in self._objs: + + d = dict() + for t in self._typs: + d[t] = getattr(self, '%s_%s' % (o, t), None) + + setattr(self, o, d) + + def generate_indices(self, f, values=False): + """ generate the indicies + if values is True , use the axis values + is False, use the range + """ + + axes = f.axes + if values: + axes = [lrange(len(a)) for a in axes] + + return itertools.product(*axes) + + def get_result(self, obj, method, key, axis): + """ return the result for this obj with this key and this axis """ + + if isinstance(key, dict): + key = key[axis] + + # use an artifical conversion to map the key as integers to the labels + # so ix can work for comparisions + if method == 'indexer': + method = 'ix' + key = obj._get_axis(axis)[key] + + # in case we actually want 0 index slicing + try: + with catch_warnings(record=True): + xp = getattr(obj, method).__getitem__(_axify(obj, key, axis)) + except: + xp = getattr(obj, method).__getitem__(key) + + return xp + + def get_value(self, f, i, values=False): + """ return the value for the location i """ + + # check agains values + if values: + return f.values[i] + + # this is equiv of f[col][row]..... + # v = f + # for a in reversed(i): + # v = v.__getitem__(a) + # return v + with catch_warnings(record=True): + return f.ix[i] + + def check_values(self, f, func, values=False): + + if f is None: + return + axes = f.axes + indicies = itertools.product(*axes) + + for i in indicies: + result = getattr(f, func)[i] + + # check agains values + if values: + expected = f.values[i] + else: + expected = f + for a in reversed(i): + expected = expected.__getitem__(a) + + tm.assert_almost_equal(result, expected) + + def check_result(self, name, method1, key1, method2, key2, typs=None, + objs=None, axes=None, fails=None): + def _eq(t, o, a, obj, k1, k2): + """ compare equal for these 2 keys """ + + if a is not None and a > obj.ndim - 1: + return + + def _print(result, error=None): + if error is not None: + error = str(error) + v = ("%-16.16s [%-16.16s]: [typ->%-8.8s,obj->%-8.8s," + "key1->(%-4.4s),key2->(%-4.4s),axis->%s] %s" % + (name, result, t, o, method1, method2, a, error or '')) + if _verbose: + pprint_thing(v) + + try: + rs = getattr(obj, method1).__getitem__(_axify(obj, k1, a)) + + try: + xp = self.get_result(obj, method2, k2, a) + except: + result = 'no comp' + _print(result) + return + + detail = None + + try: + if is_scalar(rs) and is_scalar(xp): + self.assertEqual(rs, xp) + elif xp.ndim == 1: + tm.assert_series_equal(rs, xp) + elif xp.ndim == 2: + tm.assert_frame_equal(rs, xp) + elif xp.ndim == 3: + tm.assert_panel_equal(rs, xp) + result = 'ok' + except AssertionError as e: + detail = str(e) + result = 'fail' + + # reverse the checks + if fails is True: + if result == 'fail': + result = 'ok (fail)' + + _print(result) + if not result.startswith('ok'): + raise AssertionError(detail) + + except AssertionError: + raise + except Exception as detail: + + # if we are in fails, the ok, otherwise raise it + if fails is not None: + if isinstance(detail, fails): + result = 'ok (%s)' % type(detail).__name__ + _print(result) + return + + result = type(detail).__name__ + raise AssertionError(_print(result, error=detail)) + + if typs is None: + typs = self._typs + + if objs is None: + objs = self._objs + + if axes is not None: + if not isinstance(axes, (tuple, list)): + axes = [axes] + else: + axes = list(axes) + else: + axes = [0, 1, 2] + + # check + for o in objs: + if o not in self._objs: + continue + + d = getattr(self, o) + for a in axes: + for t in typs: + if t not in self._typs: + continue + + obj = d[t] + if obj is not None: + obj = obj.copy() + + k2 = key2 + _eq(t, o, a, obj, key1, k2) diff --git a/pandas/tests/indexing/test_chaining_and_caching.py b/pandas/tests/indexing/test_chaining_and_caching.py index 0e921aaf826f9..72e704537ba3f 100644 --- a/pandas/tests/indexing/test_chaining_and_caching.py +++ b/pandas/tests/indexing/test_chaining_and_caching.py @@ -1,3 +1,5 @@ +from warnings import catch_warnings + import numpy as np import pandas as pd from pandas.core import common as com @@ -41,13 +43,13 @@ def test_setitem_cache_updating(self): # ref the cache if do_ref: - df.ix[0, "c"] + df.loc[0, "c"] # set it - df.ix[7, 'c'] = 1 + df.loc[7, 'c'] = 1 - self.assertEqual(df.ix[0, 'c'], 0.0) - self.assertEqual(df.ix[7, 'c'], 1.0) + self.assertEqual(df.loc[0, 'c'], 0.0) + self.assertEqual(df.loc[7, 'c'], 1.0) # GH 7084 # not updating cache on series setting with slices @@ -226,21 +228,21 @@ def random_text(nobs=100): # explicity copy indexer = df.letters.apply(lambda x: len(x) > 10) - df = df.ix[indexer].copy() + df = df.loc[indexer].copy() self.assertIsNone(df.is_copy) df['letters'] = df['letters'].apply(str.lower) # implicity take df = random_text(100000) indexer = df.letters.apply(lambda x: len(x) > 10) - df = df.ix[indexer] + df = df.loc[indexer] self.assertIsNotNone(df.is_copy) df['letters'] = df['letters'].apply(str.lower) # implicity take 2 df = random_text(100000) indexer = df.letters.apply(lambda x: len(x) > 10) - df = df.ix[indexer] + df = df.loc[indexer] self.assertIsNotNone(df.is_copy) df.loc[:, 'letters'] = df['letters'].apply(str.lower) @@ -251,7 +253,8 @@ def random_text(nobs=100): df = random_text(100000) indexer = df.letters.apply(lambda x: len(x) > 10) - df.ix[indexer, 'letters'] = df.ix[indexer, 'letters'].apply(str.lower) + df.loc[indexer, 'letters'] = ( + df.loc[indexer, 'letters'].apply(str.lower)) # an identical take, so no copy df = DataFrame({'a': [1]}).dropna() @@ -312,12 +315,12 @@ def f(): D=list('abcde'))) def f(): - df.ix[2]['D'] = 'foo' + df.loc[2]['D'] = 'foo' self.assertRaises(com.SettingWithCopyError, f) def f(): - df.ix[2]['C'] = 'foo' + df.loc[2]['C'] = 'foo' self.assertRaises(com.SettingWithCopyError, f) @@ -356,3 +359,76 @@ def test_detect_chained_assignment_warnings(self): with tm.assert_produces_warning( expected_warning=com.SettingWithCopyWarning): df.loc[0]['A'] = 111 + + def test_chained_getitem_with_lists(self): + + # GH6394 + # Regression in chained getitem indexing with embedded list-like from + # 0.12 + def check(result, expected): + tm.assert_numpy_array_equal(result, expected) + tm.assertIsInstance(result, np.ndarray) + + df = DataFrame({'A': 5 * [np.zeros(3)], 'B': 5 * [np.ones(3)]}) + expected = df['A'].iloc[2] + result = df.loc[2, 'A'] + check(result, expected) + result2 = df.iloc[2]['A'] + check(result2, expected) + result3 = df['A'].loc[2] + check(result3, expected) + result4 = df['A'].iloc[2] + check(result4, expected) + + def test_cache_updating(self): + # GH 4939, make sure to update the cache on setitem + + df = tm.makeDataFrame() + df['A'] # cache series + with catch_warnings(record=True): + df.ix["Hello Friend"] = df.ix[0] + self.assertIn("Hello Friend", df['A'].index) + self.assertIn("Hello Friend", df['B'].index) + + with catch_warnings(record=True): + panel = tm.makePanel() + panel.ix[0] # get first item into cache + panel.ix[:, :, 'A+1'] = panel.ix[:, :, 'A'] + 1 + self.assertIn("A+1", panel.ix[0].columns) + self.assertIn("A+1", panel.ix[1].columns) + + # 5216 + # make sure that we don't try to set a dead cache + a = np.random.rand(10, 3) + df = DataFrame(a, columns=['x', 'y', 'z']) + tuples = [(i, j) for i in range(5) for j in range(2)] + index = MultiIndex.from_tuples(tuples) + df.index = index + + # setting via chained assignment + # but actually works, since everything is a view + df.loc[0]['z'].iloc[0] = 1. + result = df.loc[(0, 0), 'z'] + self.assertEqual(result, 1) + + # correct setting + df.loc[(0, 0), 'z'] = 2 + result = df.loc[(0, 0), 'z'] + self.assertEqual(result, 2) + + # 10264 + df = DataFrame(np.zeros((5, 5), dtype='int64'), columns=[ + 'a', 'b', 'c', 'd', 'e'], index=range(5)) + df['f'] = 0 + df.f.values[3] = 1 + + # TODO(wesm): unused? + # y = df.iloc[np.arange(2, len(df))] + + df.f.values[3] = 2 + expected = DataFrame(np.zeros((5, 6), dtype='int64'), columns=[ + 'a', 'b', 'c', 'd', 'e', 'f'], index=range(5)) + expected.at[3, 'f'] = 2 + tm.assert_frame_equal(df, expected) + expected = Series([0, 0, 0, 2, 0], name='f') + tm.assert_series_equal(df.f, expected) diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py new file mode 100644 index 0000000000000..517194835ca73 --- /dev/null +++ b/pandas/tests/indexing/test_iloc.py @@ -0,0 +1,590 @@ +""" test positional based indexing with iloc """ + +from warnings import catch_warnings +import numpy as np + +import pandas as pd +from pandas.compat import lrange, lmap +from pandas import Series, DataFrame, date_range, concat, isnull +from pandas.util import testing as tm +from pandas.tests.indexing.common import Base + + +class TestiLoc(Base, tm.TestCase): + + def test_iloc_exceeds_bounds(self): + + # GH6296 + # iloc should allow indexers that exceed the bounds + df = DataFrame(np.random.random_sample((20, 5)), columns=list('ABCDE')) + expected = df + + # lists of positions should raise IndexErrror! + with tm.assertRaisesRegexp(IndexError, + 'positional indexers are out-of-bounds'): + df.iloc[:, [0, 1, 2, 3, 4, 5]] + self.assertRaises(IndexError, lambda: df.iloc[[1, 30]]) + self.assertRaises(IndexError, lambda: df.iloc[[1, -30]]) + self.assertRaises(IndexError, lambda: df.iloc[[100]]) + + s = df['A'] + self.assertRaises(IndexError, lambda: s.iloc[[100]]) + self.assertRaises(IndexError, lambda: s.iloc[[-100]]) + + # still raise on a single indexer + msg = 'single positional indexer is out-of-bounds' + with tm.assertRaisesRegexp(IndexError, msg): + df.iloc[30] + self.assertRaises(IndexError, lambda: df.iloc[-30]) + + # GH10779 + # single positive/negative indexer exceeding Series bounds should raise + # an IndexError + with tm.assertRaisesRegexp(IndexError, msg): + s.iloc[30] + self.assertRaises(IndexError, lambda: s.iloc[-30]) + + # slices are ok + result = df.iloc[:, 4:10] # 0 < start < len < stop + expected = df.iloc[:, 4:] + tm.assert_frame_equal(result, expected) + + result = df.iloc[:, -4:-10] # stop < 0 < start < len + expected = df.iloc[:, :0] + tm.assert_frame_equal(result, expected) + + result = df.iloc[:, 10:4:-1] # 0 < stop < len < start (down) + expected = df.iloc[:, :4:-1] + tm.assert_frame_equal(result, expected) + + result = df.iloc[:, 4:-10:-1] # stop < 0 < start < len (down) + expected = df.iloc[:, 4::-1] + tm.assert_frame_equal(result, expected) + + result = df.iloc[:, -10:4] # start < 0 < stop < len + expected = df.iloc[:, :4] + tm.assert_frame_equal(result, expected) + + result = df.iloc[:, 10:4] # 0 < stop < len < start + expected = df.iloc[:, :0] + tm.assert_frame_equal(result, expected) + + result = df.iloc[:, -10:-11:-1] # stop < start < 0 < len (down) + expected = df.iloc[:, :0] + tm.assert_frame_equal(result, expected) + + result = df.iloc[:, 10:11] # 0 < len < start < stop + expected = df.iloc[:, :0] + tm.assert_frame_equal(result, expected) + + # slice bounds exceeding is ok + result = s.iloc[18:30] + expected = s.iloc[18:] + tm.assert_series_equal(result, expected) + + result = s.iloc[30:] + expected = s.iloc[:0] + tm.assert_series_equal(result, expected) + + result = s.iloc[30::-1] + expected = s.iloc[::-1] + tm.assert_series_equal(result, expected) + + # doc example + def check(result, expected): + str(result) + result.dtypes + tm.assert_frame_equal(result, expected) + + dfl = DataFrame(np.random.randn(5, 2), columns=list('AB')) + check(dfl.iloc[:, 2:3], DataFrame(index=dfl.index)) + check(dfl.iloc[:, 1:3], dfl.iloc[:, [1]]) + check(dfl.iloc[4:6], dfl.iloc[[4]]) + + self.assertRaises(IndexError, lambda: dfl.iloc[[4, 5, 6]]) + self.assertRaises(IndexError, lambda: dfl.iloc[:, 4]) + + def test_iloc_getitem_int(self): + + # integer + self.check_result('integer', 'iloc', 2, 'ix', + {0: 4, 1: 6, 2: 8}, typs=['ints', 'uints']) + self.check_result('integer', 'iloc', 2, 'indexer', 2, + typs=['labels', 'mixed', 'ts', 'floats', 'empty'], + fails=IndexError) + + def test_iloc_getitem_neg_int(self): + + # neg integer + self.check_result('neg int', 'iloc', -1, 'ix', + {0: 6, 1: 9, 2: 12}, typs=['ints', 'uints']) + self.check_result('neg int', 'iloc', -1, 'indexer', -1, + typs=['labels', 'mixed', 'ts', 'floats', 'empty'], + fails=IndexError) + + def test_iloc_getitem_list_int(self): + + # list of ints + self.check_result('list int', 'iloc', [0, 1, 2], 'ix', + {0: [0, 2, 4], 1: [0, 3, 6], 2: [0, 4, 8]}, + typs=['ints', 'uints']) + self.check_result('list int', 'iloc', [2], 'ix', + {0: [4], 1: [6], 2: [8]}, typs=['ints', 'uints']) + self.check_result('list int', 'iloc', [0, 1, 2], 'indexer', [0, 1, 2], + typs=['labels', 'mixed', 'ts', 'floats', 'empty'], + fails=IndexError) + + # array of ints (GH5006), make sure that a single indexer is returning + # the correct type + self.check_result('array int', 'iloc', np.array([0, 1, 2]), 'ix', + {0: [0, 2, 4], + 1: [0, 3, 6], + 2: [0, 4, 8]}, typs=['ints', 'uints']) + self.check_result('array int', 'iloc', np.array([2]), 'ix', + {0: [4], 1: [6], 2: [8]}, typs=['ints', 'uints']) + self.check_result('array int', 'iloc', np.array([0, 1, 2]), 'indexer', + [0, 1, 2], + typs=['labels', 'mixed', 'ts', 'floats', 'empty'], + fails=IndexError) + + def test_iloc_getitem_neg_int_can_reach_first_index(self): + # GH10547 and GH10779 + # negative integers should be able to reach index 0 + df = DataFrame({'A': [2, 3, 5], 'B': [7, 11, 13]}) + s = df['A'] + + expected = df.iloc[0] + result = df.iloc[-3] + tm.assert_series_equal(result, expected) + + expected = df.iloc[[0]] + result = df.iloc[[-3]] + tm.assert_frame_equal(result, expected) + + expected = s.iloc[0] + result = s.iloc[-3] + self.assertEqual(result, expected) + + expected = s.iloc[[0]] + result = s.iloc[[-3]] + tm.assert_series_equal(result, expected) + + # check the length 1 Series case highlighted in GH10547 + expected = pd.Series(['a'], index=['A']) + result = expected.iloc[[-1]] + tm.assert_series_equal(result, expected) + + def test_iloc_getitem_dups(self): + + # no dups in panel (bug?) + self.check_result('list int (dups)', 'iloc', [0, 1, 1, 3], 'ix', + {0: [0, 2, 2, 6], 1: [0, 3, 3, 9]}, + objs=['series', 'frame'], typs=['ints', 'uints']) + + # GH 6766 + df1 = DataFrame([{'A': None, 'B': 1}, {'A': 2, 'B': 2}]) + df2 = DataFrame([{'A': 3, 'B': 3}, {'A': 4, 'B': 4}]) + df = concat([df1, df2], axis=1) + + # cross-sectional indexing + result = df.iloc[0, 0] + self.assertTrue(isnull(result)) + + result = df.iloc[0, :] + expected = Series([np.nan, 1, 3, 3], index=['A', 'B', 'A', 'B'], + name=0) + tm.assert_series_equal(result, expected) + + def test_iloc_getitem_array(self): + + # array like + s = Series(index=lrange(1, 4)) + self.check_result('array like', 'iloc', s.index, 'ix', + {0: [2, 4, 6], 1: [3, 6, 9], 2: [4, 8, 12]}, + typs=['ints', 'uints']) + + def test_iloc_getitem_bool(self): + + # boolean indexers + b = [True, False, True, False, ] + self.check_result('bool', 'iloc', b, 'ix', b, typs=['ints', 'uints']) + self.check_result('bool', 'iloc', b, 'ix', b, + typs=['labels', 'mixed', 'ts', 'floats', 'empty'], + fails=IndexError) + + def test_iloc_getitem_slice(self): + + # slices + self.check_result('slice', 'iloc', slice(1, 3), 'ix', + {0: [2, 4], 1: [3, 6], 2: [4, 8]}, + typs=['ints', 'uints']) + self.check_result('slice', 'iloc', slice(1, 3), 'indexer', + slice(1, 3), + typs=['labels', 'mixed', 'ts', 'floats', 'empty'], + fails=IndexError) + + def test_iloc_getitem_slice_dups(self): + + df1 = DataFrame(np.random.randn(10, 4), columns=['A', 'A', 'B', 'B']) + df2 = DataFrame(np.random.randint(0, 10, size=20).reshape(10, 2), + columns=['A', 'C']) + + # axis=1 + df = concat([df1, df2], axis=1) + tm.assert_frame_equal(df.iloc[:, :4], df1) + tm.assert_frame_equal(df.iloc[:, 4:], df2) + + df = concat([df2, df1], axis=1) + tm.assert_frame_equal(df.iloc[:, :2], df2) + tm.assert_frame_equal(df.iloc[:, 2:], df1) + + exp = concat([df2, df1.iloc[:, [0]]], axis=1) + tm.assert_frame_equal(df.iloc[:, 0:3], exp) + + # axis=0 + df = concat([df, df], axis=0) + tm.assert_frame_equal(df.iloc[0:10, :2], df2) + tm.assert_frame_equal(df.iloc[0:10, 2:], df1) + tm.assert_frame_equal(df.iloc[10:, :2], df2) + tm.assert_frame_equal(df.iloc[10:, 2:], df1) + + def test_iloc_setitem(self): + df = self.frame_ints + + df.iloc[1, 1] = 1 + result = df.iloc[1, 1] + self.assertEqual(result, 1) + + df.iloc[:, 2:3] = 0 + expected = df.iloc[:, 2:3] + result = df.iloc[:, 2:3] + tm.assert_frame_equal(result, expected) + + # GH5771 + s = Series(0, index=[4, 5, 6]) + s.iloc[1:2] += 1 + expected = Series([0, 1, 0], index=[4, 5, 6]) + tm.assert_series_equal(s, expected) + + def test_iloc_setitem_list(self): + + # setitem with an iloc list + df = DataFrame(np.arange(9).reshape((3, 3)), index=["A", "B", "C"], + columns=["A", "B", "C"]) + df.iloc[[0, 1], [1, 2]] + df.iloc[[0, 1], [1, 2]] += 100 + + expected = DataFrame( + np.array([0, 101, 102, 3, 104, 105, 6, 7, 8]).reshape((3, 3)), + index=["A", "B", "C"], columns=["A", "B", "C"]) + tm.assert_frame_equal(df, expected) + + def test_iloc_setitem_dups(self): + + # GH 6766 + # iloc with a mask aligning from another iloc + df1 = DataFrame([{'A': None, 'B': 1}, {'A': 2, 'B': 2}]) + df2 = DataFrame([{'A': 3, 'B': 3}, {'A': 4, 'B': 4}]) + df = concat([df1, df2], axis=1) + + expected = df.fillna(3) + expected['A'] = expected['A'].astype('float64') + inds = np.isnan(df.iloc[:, 0]) + mask = inds[inds].index + df.iloc[mask, 0] = df.iloc[mask, 2] + tm.assert_frame_equal(df, expected) + + # del a dup column across blocks + expected = DataFrame({0: [1, 2], 1: [3, 4]}) + expected.columns = ['B', 'B'] + del df['A'] + tm.assert_frame_equal(df, expected) + + # assign back to self + df.iloc[[0, 1], [0, 1]] = df.iloc[[0, 1], [0, 1]] + tm.assert_frame_equal(df, expected) + + # reversed x 2 + df.iloc[[1, 0], [0, 1]] = df.iloc[[1, 0], [0, 1]].reset_index( + drop=True) + df.iloc[[1, 0], [0, 1]] = df.iloc[[1, 0], [0, 1]].reset_index( + drop=True) + tm.assert_frame_equal(df, expected) + + def test_iloc_getitem_frame(self): + df = DataFrame(np.random.randn(10, 4), index=lrange(0, 20, 2), + columns=lrange(0, 8, 2)) + + result = df.iloc[2] + with catch_warnings(record=True): + exp = df.ix[4] + tm.assert_series_equal(result, exp) + + result = df.iloc[2, 2] + with catch_warnings(record=True): + exp = df.ix[4, 4] + self.assertEqual(result, exp) + + # slice + result = df.iloc[4:8] + with catch_warnings(record=True): + expected = df.ix[8:14] + tm.assert_frame_equal(result, expected) + + result = df.iloc[:, 2:3] + with catch_warnings(record=True): + expected = df.ix[:, 4:5] + tm.assert_frame_equal(result, expected) + + # list of integers + result = df.iloc[[0, 1, 3]] + with catch_warnings(record=True): + expected = df.ix[[0, 2, 6]] + tm.assert_frame_equal(result, expected) + + result = df.iloc[[0, 1, 3], [0, 1]] + with catch_warnings(record=True): + expected = df.ix[[0, 2, 6], [0, 2]] + tm.assert_frame_equal(result, expected) + + # neg indicies + result = df.iloc[[-1, 1, 3], [-1, 1]] + with catch_warnings(record=True): + expected = df.ix[[18, 2, 6], [6, 2]] + tm.assert_frame_equal(result, expected) + + # dups indicies + result = df.iloc[[-1, -1, 1, 3], [-1, 1]] + with catch_warnings(record=True): + expected = df.ix[[18, 18, 2, 6], [6, 2]] + tm.assert_frame_equal(result, expected) + + # with index-like + s = Series(index=lrange(1, 5)) + result = df.iloc[s.index] + with catch_warnings(record=True): + expected = df.ix[[2, 4, 6, 8]] + tm.assert_frame_equal(result, expected) + + def test_iloc_getitem_labelled_frame(self): + # try with labelled frame + df = DataFrame(np.random.randn(10, 4), + index=list('abcdefghij'), columns=list('ABCD')) + + result = df.iloc[1, 1] + exp = df.loc['b', 'B'] + self.assertEqual(result, exp) + + result = df.iloc[:, 2:3] + expected = df.loc[:, ['C']] + tm.assert_frame_equal(result, expected) + + # negative indexing + result = df.iloc[-1, -1] + exp = df.loc['j', 'D'] + self.assertEqual(result, exp) + + # out-of-bounds exception + self.assertRaises(IndexError, df.iloc.__getitem__, tuple([10, 5])) + + # trying to use a label + self.assertRaises(ValueError, df.iloc.__getitem__, tuple(['j', 'D'])) + + def test_iloc_getitem_doc_issue(self): + + # multi axis slicing issue with single block + # surfaced in GH 6059 + + arr = np.random.randn(6, 4) + index = date_range('20130101', periods=6) + columns = list('ABCD') + df = DataFrame(arr, index=index, columns=columns) + + # defines ref_locs + df.describe() + + result = df.iloc[3:5, 0:2] + str(result) + result.dtypes + + expected = DataFrame(arr[3:5, 0:2], index=index[3:5], + columns=columns[0:2]) + tm.assert_frame_equal(result, expected) + + # for dups + df.columns = list('aaaa') + result = df.iloc[3:5, 0:2] + str(result) + result.dtypes + + expected = DataFrame(arr[3:5, 0:2], index=index[3:5], + columns=list('aa')) + tm.assert_frame_equal(result, expected) + + # related + arr = np.random.randn(6, 4) + index = list(range(0, 12, 2)) + columns = list(range(0, 8, 2)) + df = DataFrame(arr, index=index, columns=columns) + + df._data.blocks[0].mgr_locs + result = df.iloc[1:5, 2:4] + str(result) + result.dtypes + expected = DataFrame(arr[1:5, 2:4], index=index[1:5], + columns=columns[2:4]) + tm.assert_frame_equal(result, expected) + + def test_iloc_setitem_series(self): + df = DataFrame(np.random.randn(10, 4), index=list('abcdefghij'), + columns=list('ABCD')) + + df.iloc[1, 1] = 1 + result = df.iloc[1, 1] + self.assertEqual(result, 1) + + df.iloc[:, 2:3] = 0 + expected = df.iloc[:, 2:3] + result = df.iloc[:, 2:3] + tm.assert_frame_equal(result, expected) + + s = Series(np.random.randn(10), index=lrange(0, 20, 2)) + + s.iloc[1] = 1 + result = s.iloc[1] + self.assertEqual(result, 1) + + s.iloc[:4] = 0 + expected = s.iloc[:4] + result = s.iloc[:4] + tm.assert_series_equal(result, expected) + + s = Series([-1] * 6) + s.iloc[0::2] = [0, 2, 4] + s.iloc[1::2] = [1, 3, 5] + result = s + expected = Series([0, 1, 2, 3, 4, 5]) + tm.assert_series_equal(result, expected) + + def test_iloc_setitem_list_of_lists(self): + + # GH 7551 + # list-of-list is set incorrectly in mixed vs. single dtyped frames + df = DataFrame(dict(A=np.arange(5, dtype='int64'), + B=np.arange(5, 10, dtype='int64'))) + df.iloc[2:4] = [[10, 11], [12, 13]] + expected = DataFrame(dict(A=[0, 1, 10, 12, 4], B=[5, 6, 11, 13, 9])) + tm.assert_frame_equal(df, expected) + + df = DataFrame( + dict(A=list('abcde'), B=np.arange(5, 10, dtype='int64'))) + df.iloc[2:4] = [['x', 11], ['y', 13]] + expected = DataFrame(dict(A=['a', 'b', 'x', 'y', 'e'], + B=[5, 6, 11, 13, 9])) + tm.assert_frame_equal(df, expected) + + def test_iloc_mask(self): + + # GH 3631, iloc with a mask (of a series) should raise + df = DataFrame(lrange(5), list('ABCDE'), columns=['a']) + mask = (df.a % 2 == 0) + self.assertRaises(ValueError, df.iloc.__getitem__, tuple([mask])) + mask.index = lrange(len(mask)) + self.assertRaises(NotImplementedError, df.iloc.__getitem__, + tuple([mask])) + + # ndarray ok + result = df.iloc[np.array([True] * len(mask), dtype=bool)] + tm.assert_frame_equal(result, df) + + # the possibilities + locs = np.arange(4) + nums = 2 ** locs + reps = lmap(bin, nums) + df = DataFrame({'locs': locs, 'nums': nums}, reps) + + expected = { + (None, ''): '0b1100', + (None, '.loc'): '0b1100', + (None, '.iloc'): '0b1100', + ('index', ''): '0b11', + ('index', '.loc'): '0b11', + ('index', '.iloc'): ('iLocation based boolean indexing ' + 'cannot use an indexable as a mask'), + ('locs', ''): 'Unalignable boolean Series provided as indexer ' + '(index of the boolean Series and of the indexed ' + 'object do not match', + ('locs', '.loc'): 'Unalignable boolean Series provided as indexer ' + '(index of the boolean Series and of the ' + 'indexed object do not match', + ('locs', '.iloc'): ('iLocation based boolean indexing on an ' + 'integer type is not available'), + } + + # UserWarnings from reindex of a boolean mask + with catch_warnings(record=True): + result = dict() + for idx in [None, 'index', 'locs']: + mask = (df.nums > 2).values + if idx: + mask = Series(mask, list(reversed(getattr(df, idx)))) + for method in ['', '.loc', '.iloc']: + try: + if method: + accessor = getattr(df, method[1:]) + else: + accessor = df + ans = str(bin(accessor[mask]['nums'].sum())) + except Exception as e: + ans = str(e) + + key = tuple([idx, method]) + r = expected.get(key) + if r != ans: + raise AssertionError( + "[%s] does not match [%s], received [%s]" + % (key, ans, r)) + + def test_iloc_non_unique_indexing(self): + + # GH 4017, non-unique indexing (on the axis) + df = DataFrame({'A': [0.1] * 3000, 'B': [1] * 3000}) + idx = np.array(lrange(30)) * 99 + expected = df.iloc[idx] + + df3 = pd.concat([df, 2 * df, 3 * df]) + result = df3.iloc[idx] + + tm.assert_frame_equal(result, expected) + + df2 = DataFrame({'A': [0.1] * 1000, 'B': [1] * 1000}) + df2 = pd.concat([df2, 2 * df2, 3 * df2]) + + sidx = df2.index.to_series() + expected = df2.iloc[idx[idx <= sidx.max()]] + + new_list = [] + for r, s in expected.iterrows(): + new_list.append(s) + new_list.append(s * 2) + new_list.append(s * 3) + + expected = DataFrame(new_list) + expected = pd.concat([expected, DataFrame(index=idx[idx > sidx.max()]) + ]) + result = df2.loc[idx] + tm.assert_frame_equal(result, expected, check_index_type=False) + + def test_iloc_empty_list_indexer_is_ok(self): + from pandas.util.testing import makeCustomDataframe as mkdf + df = mkdf(5, 2) + # vertical empty + tm.assert_frame_equal(df.iloc[:, []], df.iloc[:, :0], + check_index_type=True, check_column_type=True) + # horizontal empty + tm.assert_frame_equal(df.iloc[[], :], df.iloc[:0, :], + check_index_type=True, check_column_type=True) + # horizontal empty + tm.assert_frame_equal(df.iloc[[]], df.iloc[:0, :], + check_index_type=True, + check_column_type=True) diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index 4502e0171dfbe..0d6ca383a1be1 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -1,1648 +1,64 @@ # -*- coding: utf-8 -*- # pylint: disable-msg=W0612,E1101 -import itertools -import warnings + +""" test fancy indexing & misc """ + from warnings import catch_warnings from datetime import datetime from pandas.types.common import (is_integer_dtype, - is_float_dtype, - is_scalar) -from pandas.compat import range, lrange, lzip, StringIO, lmap -from pandas._libs.tslib import NaT -from numpy import nan -from numpy.random import randn + is_float_dtype) +from pandas.compat import range, lrange, lzip, StringIO import numpy as np import pandas as pd -from pandas import option_context from pandas.core.indexing import _non_reducing_slice, _maybe_numeric_slice -from pandas.core.api import (DataFrame, Index, Series, Panel, isnull, - MultiIndex, Timestamp, Timedelta, UInt64Index) -from pandas.formats.printing import pprint_thing -from pandas import concat -from pandas.core.common import PerformanceWarning -from pandas.tests.indexing.common import _mklbl +from pandas import NaT, DataFrame, Index, Series, MultiIndex import pandas.util.testing as tm -from pandas import date_range +from pandas.tests.indexing.common import Base, _mklbl -_verbose = False # ------------------------------------------------------------------------ # Indexing test cases -def _generate_indices(f, values=False): - """ generate the indicies - if values is True , use the axis values - is False, use the range - """ - - axes = f.axes - if values: - axes = [lrange(len(a)) for a in axes] - - return itertools.product(*axes) - - -def _get_value(f, i, values=False): - """ return the value for the location i """ - - # check agains values - if values: - return f.values[i] - - # this is equiv of f[col][row]..... - # v = f - # for a in reversed(i): - # v = v.__getitem__(a) - # return v - with catch_warnings(record=True): - return f.ix[i] - - -def _get_result(obj, method, key, axis): - """ return the result for this obj with this key and this axis """ - - if isinstance(key, dict): - key = key[axis] - - # use an artifical conversion to map the key as integers to the labels - # so ix can work for comparisions - if method == 'indexer': - method = 'ix' - key = obj._get_axis(axis)[key] - - # in case we actually want 0 index slicing - try: - xp = getattr(obj, method).__getitem__(_axify(obj, key, axis)) - except: - xp = getattr(obj, method).__getitem__(key) - - return xp - - -def _axify(obj, key, axis): - # create a tuple accessor - axes = [slice(None)] * obj.ndim - axes[axis] = key - return tuple(axes) - - -class TestIndexing(tm.TestCase): - - _objs = set(['series', 'frame', 'panel']) - _typs = set(['ints', 'uints', 'labels', 'mixed', - 'ts', 'floats', 'empty', 'ts_rev']) - - def setUp(self): - - self.series_ints = Series(np.random.rand(4), index=lrange(0, 8, 2)) - self.frame_ints = DataFrame(np.random.randn(4, 4), - index=lrange(0, 8, 2), - columns=lrange(0, 12, 3)) - self.panel_ints = Panel(np.random.rand(4, 4, 4), - items=lrange(0, 8, 2), - major_axis=lrange(0, 12, 3), - minor_axis=lrange(0, 16, 4)) - - self.series_uints = Series(np.random.rand(4), - index=UInt64Index(lrange(0, 8, 2))) - self.frame_uints = DataFrame(np.random.randn(4, 4), - index=UInt64Index(lrange(0, 8, 2)), - columns=UInt64Index(lrange(0, 12, 3))) - self.panel_uints = Panel(np.random.rand(4, 4, 4), - items=UInt64Index(lrange(0, 8, 2)), - major_axis=UInt64Index(lrange(0, 12, 3)), - minor_axis=UInt64Index(lrange(0, 16, 4))) - - self.series_labels = Series(np.random.randn(4), index=list('abcd')) - self.frame_labels = DataFrame(np.random.randn(4, 4), - index=list('abcd'), columns=list('ABCD')) - self.panel_labels = Panel(np.random.randn(4, 4, 4), - items=list('abcd'), - major_axis=list('ABCD'), - minor_axis=list('ZYXW')) - - self.series_mixed = Series(np.random.randn(4), index=[2, 4, 'null', 8]) - self.frame_mixed = DataFrame(np.random.randn(4, 4), - index=[2, 4, 'null', 8]) - self.panel_mixed = Panel(np.random.randn(4, 4, 4), - items=[2, 4, 'null', 8]) - - self.series_ts = Series(np.random.randn(4), - index=date_range('20130101', periods=4)) - self.frame_ts = DataFrame(np.random.randn(4, 4), - index=date_range('20130101', periods=4)) - self.panel_ts = Panel(np.random.randn(4, 4, 4), - items=date_range('20130101', periods=4)) - - dates_rev = (date_range('20130101', periods=4) - .sort_values(ascending=False)) - self.series_ts_rev = Series(np.random.randn(4), - index=dates_rev) - self.frame_ts_rev = DataFrame(np.random.randn(4, 4), - index=dates_rev) - self.panel_ts_rev = Panel(np.random.randn(4, 4, 4), - items=dates_rev) - - self.frame_empty = DataFrame({}) - self.series_empty = Series({}) - self.panel_empty = Panel({}) - - # form agglomerates - for o in self._objs: - - d = dict() - for t in self._typs: - d[t] = getattr(self, '%s_%s' % (o, t), None) - - setattr(self, o, d) - - def check_values(self, f, func, values=False): - - if f is None: - return - axes = f.axes - indicies = itertools.product(*axes) - - for i in indicies: - result = getattr(f, func)[i] - - # check agains values - if values: - expected = f.values[i] - else: - expected = f - for a in reversed(i): - expected = expected.__getitem__(a) - - tm.assert_almost_equal(result, expected) - - def check_result(self, name, method1, key1, method2, key2, typs=None, - objs=None, axes=None, fails=None): - def _eq(t, o, a, obj, k1, k2): - """ compare equal for these 2 keys """ - - if a is not None and a > obj.ndim - 1: - return - - def _print(result, error=None): - if error is not None: - error = str(error) - v = ("%-16.16s [%-16.16s]: [typ->%-8.8s,obj->%-8.8s," - "key1->(%-4.4s),key2->(%-4.4s),axis->%s] %s" % - (name, result, t, o, method1, method2, a, error or '')) - if _verbose: - pprint_thing(v) - - try: - rs = getattr(obj, method1).__getitem__(_axify(obj, k1, a)) - - try: - xp = _get_result(obj, method2, k2, a) - except: - result = 'no comp' - _print(result) - return - - detail = None - - try: - if is_scalar(rs) and is_scalar(xp): - self.assertEqual(rs, xp) - elif xp.ndim == 1: - tm.assert_series_equal(rs, xp) - elif xp.ndim == 2: - tm.assert_frame_equal(rs, xp) - elif xp.ndim == 3: - tm.assert_panel_equal(rs, xp) - result = 'ok' - except AssertionError as e: - detail = str(e) - result = 'fail' - - # reverse the checks - if fails is True: - if result == 'fail': - result = 'ok (fail)' - - _print(result) - if not result.startswith('ok'): - raise AssertionError(detail) - - except AssertionError: - raise - except Exception as detail: - - # if we are in fails, the ok, otherwise raise it - if fails is not None: - if isinstance(detail, fails): - result = 'ok (%s)' % type(detail).__name__ - _print(result) - return - - result = type(detail).__name__ - raise AssertionError(_print(result, error=detail)) - - if typs is None: - typs = self._typs - - if objs is None: - objs = self._objs - - if axes is not None: - if not isinstance(axes, (tuple, list)): - axes = [axes] - else: - axes = list(axes) - else: - axes = [0, 1, 2] - - # check - for o in objs: - if o not in self._objs: - continue - - d = getattr(self, o) - for a in axes: - for t in typs: - if t not in self._typs: - continue - - obj = d[t] - if obj is not None: - obj = obj.copy() - - k2 = key2 - _eq(t, o, a, obj, key1, k2) - - def test_ix_deprecation(self): - # GH 15114 - - df = DataFrame({'A': [1, 2, 3]}) - with tm.assert_produces_warning(DeprecationWarning, - check_stacklevel=False): - df.ix[1, 'A'] - - def test_indexer_caching(self): - # GH5727 - # make sure that indexers are in the _internal_names_set - n = 1000001 - arrays = [lrange(n), lrange(n)] - index = MultiIndex.from_tuples(lzip(*arrays)) - s = Series(np.zeros(n), index=index) - str(s) - - # setitem - expected = Series(np.ones(n), index=index) - s = Series(np.zeros(n), index=index) - s[s == 0] = 1 - tm.assert_series_equal(s, expected) - - def test_at_and_iat_get(self): - def _check(f, func, values=False): - - if f is not None: - indicies = _generate_indices(f, values) - for i in indicies: - result = getattr(f, func)[i] - expected = _get_value(f, i, values) - tm.assert_almost_equal(result, expected) - - for o in self._objs: - - d = getattr(self, o) - - # iat - for f in [d['ints'], d['uints']]: - _check(f, 'iat', values=True) - - for f in [d['labels'], d['ts'], d['floats']]: - if f is not None: - self.assertRaises(ValueError, self.check_values, f, 'iat') - - # at - for f in [d['ints'], d['uints'], d['labels'], - d['ts'], d['floats']]: - _check(f, 'at') - - def test_at_and_iat_set(self): - def _check(f, func, values=False): - - if f is not None: - indicies = _generate_indices(f, values) - for i in indicies: - getattr(f, func)[i] = 1 - expected = _get_value(f, i, values) - tm.assert_almost_equal(expected, 1) - - for t in self._objs: - - d = getattr(self, t) - - # iat - for f in [d['ints'], d['uints']]: - _check(f, 'iat', values=True) - - for f in [d['labels'], d['ts'], d['floats']]: - if f is not None: - self.assertRaises(ValueError, _check, f, 'iat') - - # at - for f in [d['ints'], d['uints'], d['labels'], - d['ts'], d['floats']]: - _check(f, 'at') - - def test_at_iat_coercion(self): - - # as timestamp is not a tuple! - dates = date_range('1/1/2000', periods=8) - df = DataFrame(randn(8, 4), index=dates, columns=['A', 'B', 'C', 'D']) - s = df['A'] - - result = s.at[dates[5]] - xp = s.values[5] - self.assertEqual(result, xp) - - # GH 7729 - # make sure we are boxing the returns - s = Series(['2014-01-01', '2014-02-02'], dtype='datetime64[ns]') - expected = Timestamp('2014-02-02') - - for r in [lambda: s.iat[1], lambda: s.iloc[1]]: - result = r() - self.assertEqual(result, expected) - - s = Series(['1 days', '2 days'], dtype='timedelta64[ns]') - expected = Timedelta('2 days') - - for r in [lambda: s.iat[1], lambda: s.iloc[1]]: - result = r() - self.assertEqual(result, expected) - - def test_iat_invalid_args(self): - pass - - def test_imethods_with_dups(self): - - # GH6493 - # iat/iloc with dups - - s = Series(range(5), index=[1, 1, 2, 2, 3], dtype='int64') - result = s.iloc[2] - self.assertEqual(result, 2) - result = s.iat[2] - self.assertEqual(result, 2) - - self.assertRaises(IndexError, lambda: s.iat[10]) - self.assertRaises(IndexError, lambda: s.iat[-10]) - - result = s.iloc[[2, 3]] - expected = Series([2, 3], [2, 2], dtype='int64') - tm.assert_series_equal(result, expected) - - df = s.to_frame() - result = df.iloc[2] - expected = Series(2, index=[0], name=2) - tm.assert_series_equal(result, expected) - - result = df.iat[2, 0] - expected = 2 - self.assertEqual(result, 2) - - def test_repeated_getitem_dups(self): - # GH 5678 - # repeated gettitems on a dup index returing a ndarray - df = DataFrame( - np.random.random_sample((20, 5)), - index=['ABCDE' [x % 5] for x in range(20)]) - expected = df.loc['A', 0] - result = df.loc[:, 0].loc['A'] - tm.assert_series_equal(result, expected) - - def test_iloc_exceeds_bounds(self): - - # GH6296 - # iloc should allow indexers that exceed the bounds - df = DataFrame(np.random.random_sample((20, 5)), columns=list('ABCDE')) - expected = df - - # lists of positions should raise IndexErrror! - with tm.assertRaisesRegexp(IndexError, - 'positional indexers are out-of-bounds'): - df.iloc[:, [0, 1, 2, 3, 4, 5]] - self.assertRaises(IndexError, lambda: df.iloc[[1, 30]]) - self.assertRaises(IndexError, lambda: df.iloc[[1, -30]]) - self.assertRaises(IndexError, lambda: df.iloc[[100]]) - - s = df['A'] - self.assertRaises(IndexError, lambda: s.iloc[[100]]) - self.assertRaises(IndexError, lambda: s.iloc[[-100]]) - - # still raise on a single indexer - msg = 'single positional indexer is out-of-bounds' - with tm.assertRaisesRegexp(IndexError, msg): - df.iloc[30] - self.assertRaises(IndexError, lambda: df.iloc[-30]) - - # GH10779 - # single positive/negative indexer exceeding Series bounds should raise - # an IndexError - with tm.assertRaisesRegexp(IndexError, msg): - s.iloc[30] - self.assertRaises(IndexError, lambda: s.iloc[-30]) - - # slices are ok - result = df.iloc[:, 4:10] # 0 < start < len < stop - expected = df.iloc[:, 4:] - tm.assert_frame_equal(result, expected) - - result = df.iloc[:, -4:-10] # stop < 0 < start < len - expected = df.iloc[:, :0] - tm.assert_frame_equal(result, expected) - - result = df.iloc[:, 10:4:-1] # 0 < stop < len < start (down) - expected = df.iloc[:, :4:-1] - tm.assert_frame_equal(result, expected) - - result = df.iloc[:, 4:-10:-1] # stop < 0 < start < len (down) - expected = df.iloc[:, 4::-1] - tm.assert_frame_equal(result, expected) - - result = df.iloc[:, -10:4] # start < 0 < stop < len - expected = df.iloc[:, :4] - tm.assert_frame_equal(result, expected) - - result = df.iloc[:, 10:4] # 0 < stop < len < start - expected = df.iloc[:, :0] - tm.assert_frame_equal(result, expected) - - result = df.iloc[:, -10:-11:-1] # stop < start < 0 < len (down) - expected = df.iloc[:, :0] - tm.assert_frame_equal(result, expected) - - result = df.iloc[:, 10:11] # 0 < len < start < stop - expected = df.iloc[:, :0] - tm.assert_frame_equal(result, expected) - - # slice bounds exceeding is ok - result = s.iloc[18:30] - expected = s.iloc[18:] - tm.assert_series_equal(result, expected) - - result = s.iloc[30:] - expected = s.iloc[:0] - tm.assert_series_equal(result, expected) - - result = s.iloc[30::-1] - expected = s.iloc[::-1] - tm.assert_series_equal(result, expected) - - # doc example - def check(result, expected): - str(result) - result.dtypes - tm.assert_frame_equal(result, expected) - - dfl = DataFrame(np.random.randn(5, 2), columns=list('AB')) - check(dfl.iloc[:, 2:3], DataFrame(index=dfl.index)) - check(dfl.iloc[:, 1:3], dfl.iloc[:, [1]]) - check(dfl.iloc[4:6], dfl.iloc[[4]]) - - self.assertRaises(IndexError, lambda: dfl.iloc[[4, 5, 6]]) - self.assertRaises(IndexError, lambda: dfl.iloc[:, 4]) - - def test_iloc_getitem_int(self): - - # integer - self.check_result('integer', 'iloc', 2, 'ix', - {0: 4, 1: 6, 2: 8}, typs=['ints', 'uints']) - self.check_result('integer', 'iloc', 2, 'indexer', 2, - typs=['labels', 'mixed', 'ts', 'floats', 'empty'], - fails=IndexError) - - def test_iloc_getitem_neg_int(self): - - # neg integer - self.check_result('neg int', 'iloc', -1, 'ix', - {0: 6, 1: 9, 2: 12}, typs=['ints', 'uints']) - self.check_result('neg int', 'iloc', -1, 'indexer', -1, - typs=['labels', 'mixed', 'ts', 'floats', 'empty'], - fails=IndexError) - - def test_iloc_getitem_list_int(self): - - # list of ints - self.check_result('list int', 'iloc', [0, 1, 2], 'ix', - {0: [0, 2, 4], 1: [0, 3, 6], 2: [0, 4, 8]}, - typs=['ints', 'uints']) - self.check_result('list int', 'iloc', [2], 'ix', - {0: [4], 1: [6], 2: [8]}, typs=['ints', 'uints']) - self.check_result('list int', 'iloc', [0, 1, 2], 'indexer', [0, 1, 2], - typs=['labels', 'mixed', 'ts', 'floats', 'empty'], - fails=IndexError) - - # array of ints (GH5006), make sure that a single indexer is returning - # the correct type - self.check_result('array int', 'iloc', np.array([0, 1, 2]), 'ix', - {0: [0, 2, 4], - 1: [0, 3, 6], - 2: [0, 4, 8]}, typs=['ints', 'uints']) - self.check_result('array int', 'iloc', np.array([2]), 'ix', - {0: [4], 1: [6], 2: [8]}, typs=['ints', 'uints']) - self.check_result('array int', 'iloc', np.array([0, 1, 2]), 'indexer', - [0, 1, 2], - typs=['labels', 'mixed', 'ts', 'floats', 'empty'], - fails=IndexError) - - def test_iloc_getitem_neg_int_can_reach_first_index(self): - # GH10547 and GH10779 - # negative integers should be able to reach index 0 - df = DataFrame({'A': [2, 3, 5], 'B': [7, 11, 13]}) - s = df['A'] - - expected = df.iloc[0] - result = df.iloc[-3] - tm.assert_series_equal(result, expected) - - expected = df.iloc[[0]] - result = df.iloc[[-3]] - tm.assert_frame_equal(result, expected) - - expected = s.iloc[0] - result = s.iloc[-3] - self.assertEqual(result, expected) - - expected = s.iloc[[0]] - result = s.iloc[[-3]] - tm.assert_series_equal(result, expected) - - # check the length 1 Series case highlighted in GH10547 - expected = pd.Series(['a'], index=['A']) - result = expected.iloc[[-1]] - tm.assert_series_equal(result, expected) - - def test_iloc_getitem_dups(self): - - # no dups in panel (bug?) - self.check_result('list int (dups)', 'iloc', [0, 1, 1, 3], 'ix', - {0: [0, 2, 2, 6], 1: [0, 3, 3, 9]}, - objs=['series', 'frame'], typs=['ints', 'uints']) - - # GH 6766 - df1 = DataFrame([{'A': None, 'B': 1}, {'A': 2, 'B': 2}]) - df2 = DataFrame([{'A': 3, 'B': 3}, {'A': 4, 'B': 4}]) - df = concat([df1, df2], axis=1) - - # cross-sectional indexing - result = df.iloc[0, 0] - self.assertTrue(isnull(result)) - - result = df.iloc[0, :] - expected = Series([np.nan, 1, 3, 3], index=['A', 'B', 'A', 'B'], - name=0) - tm.assert_series_equal(result, expected) - - def test_iloc_getitem_array(self): - - # array like - s = Series(index=lrange(1, 4)) - self.check_result('array like', 'iloc', s.index, 'ix', - {0: [2, 4, 6], 1: [3, 6, 9], 2: [4, 8, 12]}, - typs=['ints', 'uints']) - - def test_iloc_getitem_bool(self): - - # boolean indexers - b = [True, False, True, False, ] - self.check_result('bool', 'iloc', b, 'ix', b, typs=['ints', 'uints']) - self.check_result('bool', 'iloc', b, 'ix', b, - typs=['labels', 'mixed', 'ts', 'floats', 'empty'], - fails=IndexError) - - def test_iloc_getitem_slice(self): - - # slices - self.check_result('slice', 'iloc', slice(1, 3), 'ix', - {0: [2, 4], 1: [3, 6], 2: [4, 8]}, - typs=['ints', 'uints']) - self.check_result('slice', 'iloc', slice(1, 3), 'indexer', - slice(1, 3), - typs=['labels', 'mixed', 'ts', 'floats', 'empty'], - fails=IndexError) - - def test_iloc_getitem_slice_dups(self): - - df1 = DataFrame(np.random.randn(10, 4), columns=['A', 'A', 'B', 'B']) - df2 = DataFrame(np.random.randint(0, 10, size=20).reshape(10, 2), - columns=['A', 'C']) - - # axis=1 - df = concat([df1, df2], axis=1) - tm.assert_frame_equal(df.iloc[:, :4], df1) - tm.assert_frame_equal(df.iloc[:, 4:], df2) - - df = concat([df2, df1], axis=1) - tm.assert_frame_equal(df.iloc[:, :2], df2) - tm.assert_frame_equal(df.iloc[:, 2:], df1) - - exp = concat([df2, df1.iloc[:, [0]]], axis=1) - tm.assert_frame_equal(df.iloc[:, 0:3], exp) - - # axis=0 - df = concat([df, df], axis=0) - tm.assert_frame_equal(df.iloc[0:10, :2], df2) - tm.assert_frame_equal(df.iloc[0:10, 2:], df1) - tm.assert_frame_equal(df.iloc[10:, :2], df2) - tm.assert_frame_equal(df.iloc[10:, 2:], df1) - - def test_iloc_setitem(self): - df = self.frame_ints - - df.iloc[1, 1] = 1 - result = df.iloc[1, 1] - self.assertEqual(result, 1) - - df.iloc[:, 2:3] = 0 - expected = df.iloc[:, 2:3] - result = df.iloc[:, 2:3] - tm.assert_frame_equal(result, expected) - - # GH5771 - s = Series(0, index=[4, 5, 6]) - s.iloc[1:2] += 1 - expected = Series([0, 1, 0], index=[4, 5, 6]) - tm.assert_series_equal(s, expected) - - def test_loc_setitem_slice(self): - # GH10503 - - # assigning the same type should not change the type - df1 = DataFrame({'a': [0, 1, 1], - 'b': Series([100, 200, 300], dtype='uint32')}) - ix = df1['a'] == 1 - newb1 = df1.loc[ix, 'b'] + 1 - df1.loc[ix, 'b'] = newb1 - expected = DataFrame({'a': [0, 1, 1], - 'b': Series([100, 201, 301], dtype='uint32')}) - tm.assert_frame_equal(df1, expected) - - # assigning a new type should get the inferred type - df2 = DataFrame({'a': [0, 1, 1], 'b': [100, 200, 300]}, - dtype='uint64') - ix = df1['a'] == 1 - newb2 = df2.loc[ix, 'b'] - df1.loc[ix, 'b'] = newb2 - expected = DataFrame({'a': [0, 1, 1], 'b': [100, 200, 300]}, - dtype='uint64') - tm.assert_frame_equal(df2, expected) - - def test_ix_loc_setitem_consistency(self): - - # GH 5771 - # loc with slice and series - s = Series(0, index=[4, 5, 6]) - s.loc[4:5] += 1 - expected = Series([1, 1, 0], index=[4, 5, 6]) - tm.assert_series_equal(s, expected) - - # GH 5928 - # chained indexing assignment - df = DataFrame({'a': [0, 1, 2]}) - expected = df.copy() - with catch_warnings(record=True): - expected.ix[[0, 1, 2], 'a'] = -expected.ix[[0, 1, 2], 'a'] - - with catch_warnings(record=True): - df['a'].ix[[0, 1, 2]] = -df['a'].ix[[0, 1, 2]] - tm.assert_frame_equal(df, expected) - - df = DataFrame({'a': [0, 1, 2], 'b': [0, 1, 2]}) - with catch_warnings(record=True): - df['a'].ix[[0, 1, 2]] = -df['a'].ix[[0, 1, 2]].astype( - 'float64') + 0.5 - expected = DataFrame({'a': [0.5, -0.5, -1.5], 'b': [0, 1, 2]}) - tm.assert_frame_equal(df, expected) - - # GH 8607 - # ix setitem consistency - df = DataFrame({'timestamp': [1413840976, 1413842580, 1413760580], - 'delta': [1174, 904, 161], - 'elapsed': [7673, 9277, 1470]}) - expected = DataFrame({'timestamp': pd.to_datetime( - [1413840976, 1413842580, 1413760580], unit='s'), - 'delta': [1174, 904, 161], - 'elapsed': [7673, 9277, 1470]}) - - df2 = df.copy() - df2['timestamp'] = pd.to_datetime(df['timestamp'], unit='s') - tm.assert_frame_equal(df2, expected) - - df2 = df.copy() - df2.loc[:, 'timestamp'] = pd.to_datetime(df['timestamp'], unit='s') - tm.assert_frame_equal(df2, expected) - - df2 = df.copy() - with catch_warnings(record=True): - df2.ix[:, 2] = pd.to_datetime(df['timestamp'], unit='s') - tm.assert_frame_equal(df2, expected) - - def test_ix_loc_consistency(self): - - # GH 8613 - # some edge cases where ix/loc should return the same - # this is not an exhaustive case - - def compare(result, expected): - if is_scalar(expected): - self.assertEqual(result, expected) - else: - self.assertTrue(expected.equals(result)) - - # failure cases for .loc, but these work for .ix - df = pd.DataFrame(np.random.randn(5, 4), columns=list('ABCD')) - for key in [slice(1, 3), tuple([slice(0, 2), slice(0, 2)]), - tuple([slice(0, 2), df.columns[0:2]])]: - - for index in [tm.makeStringIndex, tm.makeUnicodeIndex, - tm.makeDateIndex, tm.makePeriodIndex, - tm.makeTimedeltaIndex]: - df.index = index(len(df.index)) - with catch_warnings(record=True): - df.ix[key] - - self.assertRaises(TypeError, lambda: df.loc[key]) - - df = pd.DataFrame(np.random.randn(5, 4), columns=list('ABCD'), - index=pd.date_range('2012-01-01', periods=5)) - - for key in ['2012-01-03', - '2012-01-31', - slice('2012-01-03', '2012-01-03'), - slice('2012-01-03', '2012-01-04'), - slice('2012-01-03', '2012-01-06', 2), - slice('2012-01-03', '2012-01-31'), - tuple([[True, True, True, False, True]]), ]: - - # getitem - - # if the expected raises, then compare the exceptions - try: - with catch_warnings(record=True): - expected = df.ix[key] - except KeyError: - self.assertRaises(KeyError, lambda: df.loc[key]) - continue - - result = df.loc[key] - compare(result, expected) - - # setitem - df1 = df.copy() - df2 = df.copy() - - with catch_warnings(record=True): - df1.ix[key] = 10 - df2.loc[key] = 10 - compare(df2, df1) - - # edge cases - s = Series([1, 2, 3, 4], index=list('abde')) - - result1 = s['a':'c'] - with catch_warnings(record=True): - result2 = s.ix['a':'c'] - result3 = s.loc['a':'c'] - tm.assert_series_equal(result1, result2) - tm.assert_series_equal(result1, result3) - - # now work rather than raising KeyError - s = Series(range(5), [-2, -1, 1, 2, 3]) - - with catch_warnings(record=True): - result1 = s.ix[-10:3] - result2 = s.loc[-10:3] - tm.assert_series_equal(result1, result2) - - with catch_warnings(record=True): - result1 = s.ix[0:3] - result2 = s.loc[0:3] - tm.assert_series_equal(result1, result2) - - def test_loc_setitem_dups(self): - - # GH 6541 - df_orig = DataFrame( - {'me': list('rttti'), - 'foo': list('aaade'), - 'bar': np.arange(5, dtype='float64') * 1.34 + 2, - 'bar2': np.arange(5, dtype='float64') * -.34 + 2}).set_index('me') - - indexer = tuple(['r', ['bar', 'bar2']]) - df = df_orig.copy() - df.loc[indexer] *= 2.0 - tm.assert_series_equal(df.loc[indexer], 2.0 * df_orig.loc[indexer]) - - indexer = tuple(['r', 'bar']) - df = df_orig.copy() - df.loc[indexer] *= 2.0 - self.assertEqual(df.loc[indexer], 2.0 * df_orig.loc[indexer]) - - indexer = tuple(['t', ['bar', 'bar2']]) - df = df_orig.copy() - df.loc[indexer] *= 2.0 - tm.assert_frame_equal(df.loc[indexer], 2.0 * df_orig.loc[indexer]) - - def test_iloc_setitem_dups(self): - - # GH 6766 - # iloc with a mask aligning from another iloc - df1 = DataFrame([{'A': None, 'B': 1}, {'A': 2, 'B': 2}]) - df2 = DataFrame([{'A': 3, 'B': 3}, {'A': 4, 'B': 4}]) - df = concat([df1, df2], axis=1) - - expected = df.fillna(3) - expected['A'] = expected['A'].astype('float64') - inds = np.isnan(df.iloc[:, 0]) - mask = inds[inds].index - df.iloc[mask, 0] = df.iloc[mask, 2] - tm.assert_frame_equal(df, expected) - - # del a dup column across blocks - expected = DataFrame({0: [1, 2], 1: [3, 4]}) - expected.columns = ['B', 'B'] - del df['A'] - tm.assert_frame_equal(df, expected) - - # assign back to self - df.iloc[[0, 1], [0, 1]] = df.iloc[[0, 1], [0, 1]] - tm.assert_frame_equal(df, expected) - - # reversed x 2 - df.iloc[[1, 0], [0, 1]] = df.iloc[[1, 0], [0, 1]].reset_index( - drop=True) - df.iloc[[1, 0], [0, 1]] = df.iloc[[1, 0], [0, 1]].reset_index( - drop=True) - tm.assert_frame_equal(df, expected) - - def test_chained_getitem_with_lists(self): - - # GH6394 - # Regression in chained getitem indexing with embedded list-like from - # 0.12 - def check(result, expected): - tm.assert_numpy_array_equal(result, expected) - tm.assertIsInstance(result, np.ndarray) - - df = DataFrame({'A': 5 * [np.zeros(3)], 'B': 5 * [np.ones(3)]}) - expected = df['A'].iloc[2] - result = df.loc[2, 'A'] - check(result, expected) - result2 = df.iloc[2]['A'] - check(result2, expected) - result3 = df['A'].loc[2] - check(result3, expected) - result4 = df['A'].iloc[2] - check(result4, expected) - - def test_loc_getitem_int(self): - - # int label - self.check_result('int label', 'loc', 2, 'ix', 2, - typs=['ints', 'uints'], axes=0) - self.check_result('int label', 'loc', 3, 'ix', 3, - typs=['ints', 'uints'], axes=1) - self.check_result('int label', 'loc', 4, 'ix', 4, - typs=['ints', 'uints'], axes=2) - self.check_result('int label', 'loc', 2, 'ix', 2, - typs=['label'], fails=KeyError) - - def test_loc_getitem_label(self): - - # label - self.check_result('label', 'loc', 'c', 'ix', 'c', typs=['labels'], - axes=0) - self.check_result('label', 'loc', 'null', 'ix', 'null', typs=['mixed'], - axes=0) - self.check_result('label', 'loc', 8, 'ix', 8, typs=['mixed'], axes=0) - self.check_result('label', 'loc', Timestamp('20130102'), 'ix', 1, - typs=['ts'], axes=0) - self.check_result('label', 'loc', 'c', 'ix', 'c', typs=['empty'], - fails=KeyError) - - def test_loc_getitem_label_out_of_range(self): - - # out of range label - self.check_result('label range', 'loc', 'f', 'ix', 'f', - typs=['ints', 'uints', 'labels', 'mixed', 'ts'], - fails=KeyError) - self.check_result('label range', 'loc', 'f', 'ix', 'f', - typs=['floats'], fails=TypeError) - self.check_result('label range', 'loc', 20, 'ix', 20, - typs=['ints', 'uints', 'mixed'], fails=KeyError) - self.check_result('label range', 'loc', 20, 'ix', 20, - typs=['labels'], fails=TypeError) - self.check_result('label range', 'loc', 20, 'ix', 20, typs=['ts'], - axes=0, fails=TypeError) - self.check_result('label range', 'loc', 20, 'ix', 20, typs=['floats'], - axes=0, fails=TypeError) - - def test_loc_getitem_label_list(self): - - # list of labels - self.check_result('list lbl', 'loc', [0, 2, 4], 'ix', [0, 2, 4], - typs=['ints', 'uints'], axes=0) - self.check_result('list lbl', 'loc', [3, 6, 9], 'ix', [3, 6, 9], - typs=['ints', 'uints'], axes=1) - self.check_result('list lbl', 'loc', [4, 8, 12], 'ix', [4, 8, 12], - typs=['ints', 'uints'], axes=2) - self.check_result('list lbl', 'loc', ['a', 'b', 'd'], 'ix', - ['a', 'b', 'd'], typs=['labels'], axes=0) - self.check_result('list lbl', 'loc', ['A', 'B', 'C'], 'ix', - ['A', 'B', 'C'], typs=['labels'], axes=1) - self.check_result('list lbl', 'loc', ['Z', 'Y', 'W'], 'ix', - ['Z', 'Y', 'W'], typs=['labels'], axes=2) - self.check_result('list lbl', 'loc', [2, 8, 'null'], 'ix', - [2, 8, 'null'], typs=['mixed'], axes=0) - self.check_result('list lbl', 'loc', - [Timestamp('20130102'), Timestamp('20130103')], 'ix', - [Timestamp('20130102'), Timestamp('20130103')], - typs=['ts'], axes=0) - - self.check_result('list lbl', 'loc', [0, 1, 2], 'indexer', [0, 1, 2], - typs=['empty'], fails=KeyError) - self.check_result('list lbl', 'loc', [0, 2, 3], 'ix', [0, 2, 3], - typs=['ints', 'uints'], axes=0, fails=KeyError) - self.check_result('list lbl', 'loc', [3, 6, 7], 'ix', [3, 6, 7], - typs=['ints', 'uints'], axes=1, fails=KeyError) - self.check_result('list lbl', 'loc', [4, 8, 10], 'ix', [4, 8, 10], - typs=['ints', 'uints'], axes=2, fails=KeyError) - - def test_loc_getitem_label_list_fails(self): - # fails - self.check_result('list lbl', 'loc', [20, 30, 40], 'ix', [20, 30, 40], - typs=['ints', 'uints'], axes=1, fails=KeyError) - self.check_result('list lbl', 'loc', [20, 30, 40], 'ix', [20, 30, 40], - typs=['ints', 'uints'], axes=2, fails=KeyError) - - def test_loc_getitem_label_array_like(self): - # array like - self.check_result('array like', 'loc', Series(index=[0, 2, 4]).index, - 'ix', [0, 2, 4], typs=['ints', 'uints'], axes=0) - self.check_result('array like', 'loc', Series(index=[3, 6, 9]).index, - 'ix', [3, 6, 9], typs=['ints', 'uints'], axes=1) - self.check_result('array like', 'loc', Series(index=[4, 8, 12]).index, - 'ix', [4, 8, 12], typs=['ints', 'uints'], axes=2) - - def test_loc_getitem_bool(self): - # boolean indexers - b = [True, False, True, False] - self.check_result('bool', 'loc', b, 'ix', b, - typs=['ints', 'uints', 'labels', - 'mixed', 'ts', 'floats']) - self.check_result('bool', 'loc', b, 'ix', b, typs=['empty'], - fails=KeyError) - - def test_loc_getitem_int_slice(self): - - # ok - self.check_result('int slice2', 'loc', slice(2, 4), 'ix', [2, 4], - typs=['ints', 'uints'], axes=0) - self.check_result('int slice2', 'loc', slice(3, 6), 'ix', [3, 6], - typs=['ints', 'uints'], axes=1) - self.check_result('int slice2', 'loc', slice(4, 8), 'ix', [4, 8], - typs=['ints', 'uints'], axes=2) - - # GH 3053 - # loc should treat integer slices like label slices - from itertools import product - - index = MultiIndex.from_tuples([t for t in product( - [6, 7, 8], ['a', 'b'])]) - df = DataFrame(np.random.randn(6, 6), index, index) - result = df.loc[6:8, :] - with catch_warnings(record=True): - expected = df.ix[6:8, :] - tm.assert_frame_equal(result, expected) - - index = MultiIndex.from_tuples([t - for t in product( - [10, 20, 30], ['a', 'b'])]) - df = DataFrame(np.random.randn(6, 6), index, index) - result = df.loc[20:30, :] - with catch_warnings(record=True): - expected = df.ix[20:30, :] - tm.assert_frame_equal(result, expected) - - # doc examples - result = df.loc[10, :] - with catch_warnings(record=True): - expected = df.ix[10, :] - tm.assert_frame_equal(result, expected) - - result = df.loc[:, 10] - # expected = df.ix[:,10] (this fails) - expected = df[10] - tm.assert_frame_equal(result, expected) - - def test_loc_to_fail(self): - - # GH3449 - df = DataFrame(np.random.random((3, 3)), - index=['a', 'b', 'c'], - columns=['e', 'f', 'g']) - - # raise a KeyError? - self.assertRaises(KeyError, df.loc.__getitem__, - tuple([[1, 2], [1, 2]])) - - # GH 7496 - # loc should not fallback - - s = Series() - s.loc[1] = 1 - s.loc['a'] = 2 - - self.assertRaises(KeyError, lambda: s.loc[-1]) - self.assertRaises(KeyError, lambda: s.loc[[-1, -2]]) - - self.assertRaises(KeyError, lambda: s.loc[['4']]) - - s.loc[-1] = 3 - result = s.loc[[-1, -2]] - expected = Series([3, np.nan], index=[-1, -2]) - tm.assert_series_equal(result, expected) - - s['a'] = 2 - self.assertRaises(KeyError, lambda: s.loc[[-2]]) - - del s['a'] - - def f(): - s.loc[[-2]] = 0 - - self.assertRaises(KeyError, f) - - # inconsistency between .loc[values] and .loc[values,:] - # GH 7999 - df = DataFrame([['a'], ['b']], index=[1, 2], columns=['value']) - - def f(): - df.loc[[3], :] - - self.assertRaises(KeyError, f) - - def f(): - df.loc[[3]] - - self.assertRaises(KeyError, f) - - def test_at_to_fail(self): - # at should not fallback - # GH 7814 - s = Series([1, 2, 3], index=list('abc')) - result = s.at['a'] - self.assertEqual(result, 1) - self.assertRaises(ValueError, lambda: s.at[0]) - - df = DataFrame({'A': [1, 2, 3]}, index=list('abc')) - result = df.at['a', 'A'] - self.assertEqual(result, 1) - self.assertRaises(ValueError, lambda: df.at['a', 0]) - - s = Series([1, 2, 3], index=[3, 2, 1]) - result = s.at[1] - self.assertEqual(result, 3) - self.assertRaises(ValueError, lambda: s.at['a']) - - df = DataFrame({0: [1, 2, 3]}, index=[3, 2, 1]) - result = df.at[1, 0] - self.assertEqual(result, 3) - self.assertRaises(ValueError, lambda: df.at['a', 0]) - - # GH 13822, incorrect error string with non-unique columns when missing - # column is accessed - df = DataFrame({'x': [1.], 'y': [2.], 'z': [3.]}) - df.columns = ['x', 'x', 'z'] - - # Check that we get the correct value in the KeyError - self.assertRaisesRegexp(KeyError, r"\['y'\] not in index", - lambda: df[['x', 'y', 'z']]) - - def test_loc_getitem_label_slice(self): - - # label slices (with ints) - self.check_result('lab slice', 'loc', slice(1, 3), - 'ix', slice(1, 3), - typs=['labels', 'mixed', 'empty', 'ts', 'floats'], - fails=TypeError) - - # real label slices - self.check_result('lab slice', 'loc', slice('a', 'c'), - 'ix', slice('a', 'c'), typs=['labels'], axes=0) - self.check_result('lab slice', 'loc', slice('A', 'C'), - 'ix', slice('A', 'C'), typs=['labels'], axes=1) - self.check_result('lab slice', 'loc', slice('W', 'Z'), - 'ix', slice('W', 'Z'), typs=['labels'], axes=2) - - self.check_result('ts slice', 'loc', slice('20130102', '20130104'), - 'ix', slice('20130102', '20130104'), - typs=['ts'], axes=0) - self.check_result('ts slice', 'loc', slice('20130102', '20130104'), - 'ix', slice('20130102', '20130104'), - typs=['ts'], axes=1, fails=TypeError) - self.check_result('ts slice', 'loc', slice('20130102', '20130104'), - 'ix', slice('20130102', '20130104'), - typs=['ts'], axes=2, fails=TypeError) - - # GH 14316 - self.check_result('ts slice rev', 'loc', slice('20130104', '20130102'), - 'indexer', [0, 1, 2], typs=['ts_rev'], axes=0) - - self.check_result('mixed slice', 'loc', slice(2, 8), 'ix', slice(2, 8), - typs=['mixed'], axes=0, fails=TypeError) - self.check_result('mixed slice', 'loc', slice(2, 8), 'ix', slice(2, 8), - typs=['mixed'], axes=1, fails=KeyError) - self.check_result('mixed slice', 'loc', slice(2, 8), 'ix', slice(2, 8), - typs=['mixed'], axes=2, fails=KeyError) - - self.check_result('mixed slice', 'loc', slice(2, 4, 2), 'ix', slice( - 2, 4, 2), typs=['mixed'], axes=0, fails=TypeError) - - def test_loc_general(self): - - df = DataFrame( - np.random.rand(4, 4), columns=['A', 'B', 'C', 'D'], - index=['A', 'B', 'C', 'D']) - - # want this to work - result = df.loc[:, "A":"B"].iloc[0:2, :] - self.assertTrue((result.columns == ['A', 'B']).all()) - self.assertTrue((result.index == ['A', 'B']).all()) - - # mixed type - result = DataFrame({'a': [Timestamp('20130101')], 'b': [1]}).iloc[0] - expected = Series([Timestamp('20130101'), 1], index=['a', 'b'], name=0) - tm.assert_series_equal(result, expected) - self.assertEqual(result.dtype, object) - - def test_loc_setitem_consistency(self): - # GH 6149 - # coerce similary for setitem and loc when rows have a null-slice - expected = DataFrame({'date': Series(0, index=range(5), - dtype=np.int64), - 'val': Series(range(5), dtype=np.int64)}) - - df = DataFrame({'date': date_range('2000-01-01', '2000-01-5'), - 'val': Series( - range(5), dtype=np.int64)}) - df.loc[:, 'date'] = 0 - tm.assert_frame_equal(df, expected) - - df = DataFrame({'date': date_range('2000-01-01', '2000-01-5'), - 'val': Series(range(5), dtype=np.int64)}) - df.loc[:, 'date'] = np.array(0, dtype=np.int64) - tm.assert_frame_equal(df, expected) - - df = DataFrame({'date': date_range('2000-01-01', '2000-01-5'), - 'val': Series(range(5), dtype=np.int64)}) - df.loc[:, 'date'] = np.array([0, 0, 0, 0, 0], dtype=np.int64) - tm.assert_frame_equal(df, expected) - - expected = DataFrame({'date': Series('foo', index=range(5)), - 'val': Series(range(5), dtype=np.int64)}) - df = DataFrame({'date': date_range('2000-01-01', '2000-01-5'), - 'val': Series(range(5), dtype=np.int64)}) - df.loc[:, 'date'] = 'foo' - tm.assert_frame_equal(df, expected) - - expected = DataFrame({'date': Series(1.0, index=range(5)), - 'val': Series(range(5), dtype=np.int64)}) - df = DataFrame({'date': date_range('2000-01-01', '2000-01-5'), - 'val': Series(range(5), dtype=np.int64)}) - df.loc[:, 'date'] = 1.0 - tm.assert_frame_equal(df, expected) - - def test_loc_setitem_consistency_empty(self): - # empty (essentially noops) - expected = DataFrame(columns=['x', 'y']) - expected['x'] = expected['x'].astype(np.int64) - df = DataFrame(columns=['x', 'y']) - df.loc[:, 'x'] = 1 - tm.assert_frame_equal(df, expected) - - df = DataFrame(columns=['x', 'y']) - df['x'] = 1 - tm.assert_frame_equal(df, expected) - - def test_loc_setitem_consistency_slice_column_len(self): - # .loc[:,column] setting with slice == len of the column - # GH10408 - data = """Level_0,,,Respondent,Respondent,Respondent,OtherCat,OtherCat -Level_1,,,Something,StartDate,EndDate,Yes/No,SomethingElse -Region,Site,RespondentID,,,,, -Region_1,Site_1,3987227376,A,5/25/2015 10:59,5/25/2015 11:22,Yes, -Region_1,Site_1,3980680971,A,5/21/2015 9:40,5/21/2015 9:52,Yes,Yes -Region_1,Site_2,3977723249,A,5/20/2015 8:27,5/20/2015 8:41,Yes, -Region_1,Site_2,3977723089,A,5/20/2015 8:33,5/20/2015 9:09,Yes,No""" - - df = pd.read_csv(StringIO(data), header=[0, 1], index_col=[0, 1, 2]) - df.loc[:, ('Respondent', 'StartDate')] = pd.to_datetime(df.loc[:, ( - 'Respondent', 'StartDate')]) - df.loc[:, ('Respondent', 'EndDate')] = pd.to_datetime(df.loc[:, ( - 'Respondent', 'EndDate')]) - df.loc[:, ('Respondent', 'Duration')] = df.loc[:, ( - 'Respondent', 'EndDate')] - df.loc[:, ('Respondent', 'StartDate')] - - df.loc[:, ('Respondent', 'Duration')] = df.loc[:, ( - 'Respondent', 'Duration')].astype('timedelta64[s]') - expected = Series([1380, 720, 840, 2160.], index=df.index, - name=('Respondent', 'Duration')) - tm.assert_series_equal(df[('Respondent', 'Duration')], expected) - - def test_loc_setitem_frame(self): - df = self.frame_labels - - result = df.iloc[0, 0] - - df.loc['a', 'A'] = 1 - result = df.loc['a', 'A'] - self.assertEqual(result, 1) - - result = df.iloc[0, 0] - self.assertEqual(result, 1) - - df.loc[:, 'B':'D'] = 0 - expected = df.loc[:, 'B':'D'] - with catch_warnings(record=True): - result = df.ix[:, 1:] - tm.assert_frame_equal(result, expected) - - # GH 6254 - # setting issue - df = DataFrame(index=[3, 5, 4], columns=['A']) - df.loc[[4, 3, 5], 'A'] = np.array([1, 2, 3], dtype='int64') - expected = DataFrame(dict(A=Series( - [1, 2, 3], index=[4, 3, 5]))).reindex(index=[3, 5, 4]) - tm.assert_frame_equal(df, expected) - - # GH 6252 - # setting with an empty frame - keys1 = ['@' + str(i) for i in range(5)] - val1 = np.arange(5, dtype='int64') - - keys2 = ['@' + str(i) for i in range(4)] - val2 = np.arange(4, dtype='int64') - - index = list(set(keys1).union(keys2)) - df = DataFrame(index=index) - df['A'] = nan - df.loc[keys1, 'A'] = val1 - - df['B'] = nan - df.loc[keys2, 'B'] = val2 - - expected = DataFrame(dict(A=Series(val1, index=keys1), B=Series( - val2, index=keys2))).reindex(index=index) - tm.assert_frame_equal(df, expected) - - # GH 8669 - # invalid coercion of nan -> int - df = DataFrame({'A': [1, 2, 3], 'B': np.nan}) - df.loc[df.B > df.A, 'B'] = df.A - expected = DataFrame({'A': [1, 2, 3], 'B': np.nan}) - tm.assert_frame_equal(df, expected) - - # GH 6546 - # setting with mixed labels - df = DataFrame({1: [1, 2], 2: [3, 4], 'a': ['a', 'b']}) - - result = df.loc[0, [1, 2]] - expected = Series([1, 3], index=[1, 2], dtype=object, name=0) - tm.assert_series_equal(result, expected) - - expected = DataFrame({1: [5, 2], 2: [6, 4], 'a': ['a', 'b']}) - df.loc[0, [1, 2]] = [5, 6] - tm.assert_frame_equal(df, expected) - - def test_loc_setitem_frame_multiples(self): - # multiple setting - df = DataFrame({'A': ['foo', 'bar', 'baz'], - 'B': Series( - range(3), dtype=np.int64)}) - rhs = df.loc[1:2] - rhs.index = df.index[0:2] - df.loc[0:1] = rhs - expected = DataFrame({'A': ['bar', 'baz', 'baz'], - 'B': Series( - [1, 2, 2], dtype=np.int64)}) - tm.assert_frame_equal(df, expected) - - # multiple setting with frame on rhs (with M8) - df = DataFrame({'date': date_range('2000-01-01', '2000-01-5'), - 'val': Series( - range(5), dtype=np.int64)}) - expected = DataFrame({'date': [Timestamp('20000101'), Timestamp( - '20000102'), Timestamp('20000101'), Timestamp('20000102'), - Timestamp('20000103')], - 'val': Series( - [0, 1, 0, 1, 2], dtype=np.int64)}) - rhs = df.loc[0:2] - rhs.index = df.index[2:5] - df.loc[2:4] = rhs - tm.assert_frame_equal(df, expected) - - def test_iloc_getitem_frame(self): - df = DataFrame(np.random.randn(10, 4), index=lrange(0, 20, 2), - columns=lrange(0, 8, 2)) - - result = df.iloc[2] - with catch_warnings(record=True): - exp = df.ix[4] - tm.assert_series_equal(result, exp) - - result = df.iloc[2, 2] - with catch_warnings(record=True): - exp = df.ix[4, 4] - self.assertEqual(result, exp) - - # slice - result = df.iloc[4:8] - with catch_warnings(record=True): - expected = df.ix[8:14] - tm.assert_frame_equal(result, expected) - - result = df.iloc[:, 2:3] - with catch_warnings(record=True): - expected = df.ix[:, 4:5] - tm.assert_frame_equal(result, expected) - - # list of integers - result = df.iloc[[0, 1, 3]] - with catch_warnings(record=True): - expected = df.ix[[0, 2, 6]] - tm.assert_frame_equal(result, expected) - - result = df.iloc[[0, 1, 3], [0, 1]] - with catch_warnings(record=True): - expected = df.ix[[0, 2, 6], [0, 2]] - tm.assert_frame_equal(result, expected) - - # neg indicies - result = df.iloc[[-1, 1, 3], [-1, 1]] - with catch_warnings(record=True): - expected = df.ix[[18, 2, 6], [6, 2]] - tm.assert_frame_equal(result, expected) - - # dups indicies - result = df.iloc[[-1, -1, 1, 3], [-1, 1]] - with catch_warnings(record=True): - expected = df.ix[[18, 18, 2, 6], [6, 2]] - tm.assert_frame_equal(result, expected) - - # with index-like - s = Series(index=lrange(1, 5)) - result = df.iloc[s.index] - with catch_warnings(record=True): - expected = df.ix[[2, 4, 6, 8]] - tm.assert_frame_equal(result, expected) - - def test_iloc_getitem_labelled_frame(self): - # try with labelled frame - df = DataFrame(np.random.randn(10, 4), - index=list('abcdefghij'), columns=list('ABCD')) - - result = df.iloc[1, 1] - exp = df.loc['b', 'B'] - self.assertEqual(result, exp) - - result = df.iloc[:, 2:3] - expected = df.loc[:, ['C']] - tm.assert_frame_equal(result, expected) - - # negative indexing - result = df.iloc[-1, -1] - exp = df.loc['j', 'D'] - self.assertEqual(result, exp) - - # out-of-bounds exception - self.assertRaises(IndexError, df.iloc.__getitem__, tuple([10, 5])) - - # trying to use a label - self.assertRaises(ValueError, df.iloc.__getitem__, tuple(['j', 'D'])) - - def test_iloc_getitem_doc_issue(self): - - # multi axis slicing issue with single block - # surfaced in GH 6059 - - arr = np.random.randn(6, 4) - index = date_range('20130101', periods=6) - columns = list('ABCD') - df = DataFrame(arr, index=index, columns=columns) - - # defines ref_locs - df.describe() - - result = df.iloc[3:5, 0:2] - str(result) - result.dtypes - - expected = DataFrame(arr[3:5, 0:2], index=index[3:5], - columns=columns[0:2]) - tm.assert_frame_equal(result, expected) - - # for dups - df.columns = list('aaaa') - result = df.iloc[3:5, 0:2] - str(result) - result.dtypes - - expected = DataFrame(arr[3:5, 0:2], index=index[3:5], - columns=list('aa')) - tm.assert_frame_equal(result, expected) - - # related - arr = np.random.randn(6, 4) - index = list(range(0, 12, 2)) - columns = list(range(0, 8, 2)) - df = DataFrame(arr, index=index, columns=columns) - - df._data.blocks[0].mgr_locs - result = df.iloc[1:5, 2:4] - str(result) - result.dtypes - expected = DataFrame(arr[1:5, 2:4], index=index[1:5], - columns=columns[2:4]) - tm.assert_frame_equal(result, expected) +class TestFancy(Base, tm.TestCase): + """ pure get/set item & fancy indexing """ def test_setitem_ndarray_1d(self): # GH5508 - # len of indexer vs length of the 1d ndarray - df = DataFrame(index=Index(lrange(1, 11))) - df['foo'] = np.zeros(10, dtype=np.float64) - df['bar'] = np.zeros(10, dtype=np.complex) - - # invalid - def f(): - with catch_warnings(record=True): - df.ix[2:5, 'bar'] = np.array([2.33j, 1.23 + 0.1j, 2.2]) - - self.assertRaises(ValueError, f) - - def f(): - df.loc[df.index[2:5], 'bar'] = np.array([2.33j, 1.23 + 0.1j, - 2.2, 1.0]) - - self.assertRaises(ValueError, f) - - # valid - df.loc[df.index[2:6], 'bar'] = np.array([2.33j, 1.23 + 0.1j, - 2.2, 1.0]) - - result = df.loc[df.index[2:6], 'bar'] - expected = Series([2.33j, 1.23 + 0.1j, 2.2, 1.0], index=[3, 4, 5, 6], - name='bar') - tm.assert_series_equal(result, expected) - - # dtype getting changed? - df = DataFrame(index=Index(lrange(1, 11))) - df['foo'] = np.zeros(10, dtype=np.float64) - df['bar'] = np.zeros(10, dtype=np.complex) - - def f(): - df[2:5] = np.arange(1, 4) * 1j - - self.assertRaises(ValueError, f) - - def test_iloc_setitem_series(self): - df = DataFrame(np.random.randn(10, 4), index=list('abcdefghij'), - columns=list('ABCD')) - - df.iloc[1, 1] = 1 - result = df.iloc[1, 1] - self.assertEqual(result, 1) - - df.iloc[:, 2:3] = 0 - expected = df.iloc[:, 2:3] - result = df.iloc[:, 2:3] - tm.assert_frame_equal(result, expected) - - s = Series(np.random.randn(10), index=lrange(0, 20, 2)) - - s.iloc[1] = 1 - result = s.iloc[1] - self.assertEqual(result, 1) - - s.iloc[:4] = 0 - expected = s.iloc[:4] - result = s.iloc[:4] - tm.assert_series_equal(result, expected) - - s = Series([-1] * 6) - s.iloc[0::2] = [0, 2, 4] - s.iloc[1::2] = [1, 3, 5] - result = s - expected = Series([0, 1, 2, 3, 4, 5]) - tm.assert_series_equal(result, expected) - - def test_iloc_setitem_list_of_lists(self): - - # GH 7551 - # list-of-list is set incorrectly in mixed vs. single dtyped frames - df = DataFrame(dict(A=np.arange(5, dtype='int64'), - B=np.arange(5, 10, dtype='int64'))) - df.iloc[2:4] = [[10, 11], [12, 13]] - expected = DataFrame(dict(A=[0, 1, 10, 12, 4], B=[5, 6, 11, 13, 9])) - tm.assert_frame_equal(df, expected) - - df = DataFrame( - dict(A=list('abcde'), B=np.arange(5, 10, dtype='int64'))) - df.iloc[2:4] = [['x', 11], ['y', 13]] - expected = DataFrame(dict(A=['a', 'b', 'x', 'y', 'e'], - B=[5, 6, 11, 13, 9])) - tm.assert_frame_equal(df, expected) - - def test_ix_general(self): - - # ix general issues - - # GH 2817 - data = {'amount': {0: 700, 1: 600, 2: 222, 3: 333, 4: 444}, - 'col': {0: 3.5, 1: 3.5, 2: 4.0, 3: 4.0, 4: 4.0}, - 'year': {0: 2012, 1: 2011, 2: 2012, 3: 2012, 4: 2012}} - df = DataFrame(data).set_index(keys=['col', 'year']) - key = 4.0, 2012 - - # emits a PerformanceWarning, ok - with self.assert_produces_warning(PerformanceWarning): - tm.assert_frame_equal(df.loc[key], df.iloc[2:]) - - # this is ok - df.sort_index(inplace=True) - res = df.loc[key] - - # col has float dtype, result should be Float64Index - index = MultiIndex.from_arrays([[4.] * 3, [2012] * 3], - names=['col', 'year']) - expected = DataFrame({'amount': [222, 333, 444]}, index=index) - tm.assert_frame_equal(res, expected) - - def test_ix_weird_slicing(self): - # http://stackoverflow.com/q/17056560/1240268 - df = DataFrame({'one': [1, 2, 3, np.nan, np.nan], - 'two': [1, 2, 3, 4, 5]}) - df.loc[df['one'] > 1, 'two'] = -df['two'] - - expected = DataFrame({'one': {0: 1.0, - 1: 2.0, - 2: 3.0, - 3: nan, - 4: nan}, - 'two': {0: 1, - 1: -2, - 2: -3, - 3: 4, - 4: 5}}) - tm.assert_frame_equal(df, expected) - - def test_loc_coerceion(self): - - # 12411 - df = DataFrame({'date': [pd.Timestamp('20130101').tz_localize('UTC'), - pd.NaT]}) - expected = df.dtypes - - result = df.iloc[[0]] - tm.assert_series_equal(result.dtypes, expected) + # len of indexer vs length of the 1d ndarray + df = DataFrame(index=Index(lrange(1, 11))) + df['foo'] = np.zeros(10, dtype=np.float64) + df['bar'] = np.zeros(10, dtype=np.complex) - result = df.iloc[[1]] - tm.assert_series_equal(result.dtypes, expected) + # invalid + def f(): + df.loc[df.index[2:5], 'bar'] = np.array([2.33j, 1.23 + 0.1j, + 2.2, 1.0]) - # 12045 - import datetime - df = DataFrame({'date': [datetime.datetime(2012, 1, 1), - datetime.datetime(1012, 1, 2)]}) - expected = df.dtypes + self.assertRaises(ValueError, f) - result = df.iloc[[0]] - tm.assert_series_equal(result.dtypes, expected) + # valid + df.loc[df.index[2:6], 'bar'] = np.array([2.33j, 1.23 + 0.1j, + 2.2, 1.0]) - result = df.iloc[[1]] - tm.assert_series_equal(result.dtypes, expected) + result = df.loc[df.index[2:6], 'bar'] + expected = Series([2.33j, 1.23 + 0.1j, 2.2, 1.0], index=[3, 4, 5, 6], + name='bar') + tm.assert_series_equal(result, expected) - # 11594 - df = DataFrame({'text': ['some words'] + [None] * 9}) - expected = df.dtypes + # dtype getting changed? + df = DataFrame(index=Index(lrange(1, 11))) + df['foo'] = np.zeros(10, dtype=np.float64) + df['bar'] = np.zeros(10, dtype=np.complex) - result = df.iloc[0:2] - tm.assert_series_equal(result.dtypes, expected) + def f(): + df[2:5] = np.arange(1, 4) * 1j - result = df.iloc[3:] - tm.assert_series_equal(result.dtypes, expected) + self.assertRaises(ValueError, f) def test_setitem_dtype_upcast(self): @@ -1683,19 +99,6 @@ def test_setitem_dtype_upcast(self): self.assertTrue(is_float_dtype(left['foo'])) self.assertTrue(is_float_dtype(left['baz'])) - def test_setitem_iloc(self): - - # setitem with an iloc list - df = DataFrame(np.arange(9).reshape((3, 3)), index=["A", "B", "C"], - columns=["A", "B", "C"]) - df.iloc[[0, 1], [1, 2]] - df.iloc[[0, 1], [1, 2]] += 100 - - expected = DataFrame( - np.array([0, 101, 102, 3, 104, 105, 6, 7, 8]).reshape((3, 3)), - index=["A", "B", "C"], columns=["A", "B", "C"]) - tm.assert_frame_equal(df, expected) - def test_dups_fancy_indexing(self): # GH 3455 @@ -1757,23 +160,24 @@ def test_dups_fancy_indexing(self): # inconsistent returns for unique/duplicate indices when values are # missing - df = DataFrame(randn(4, 3), index=list('ABCD')) - expected = df.ix[['E']] + df = DataFrame(np.random.randn(4, 3), index=list('ABCD')) + expected = df.reindex(['E']) - dfnu = DataFrame(randn(5, 3), index=list('AABCD')) - result = dfnu.ix[['E']] + dfnu = DataFrame(np.random.randn(5, 3), index=list('AABCD')) + with catch_warnings(record=True): + result = dfnu.ix[['E']] tm.assert_frame_equal(result, expected) # ToDo: check_index_type can be True after GH 11497 # GH 4619; duplicate indexer with missing label df = DataFrame({"A": [0, 1, 2]}) - result = df.ix[[0, 8, 0]] + result = df.loc[[0, 8, 0]] expected = DataFrame({"A": [0, np.nan, 0]}, index=[0, 8, 0]) tm.assert_frame_equal(result, expected, check_index_type=False) df = DataFrame({"A": list('abc')}) - result = df.ix[[0, 8, 0]] + result = df.loc[[0, 8, 0]] expected = DataFrame({"A": ['a', np.nan, 'a']}, index=[0, 8, 0]) tm.assert_frame_equal(result, expected, check_index_type=False) @@ -1781,7 +185,7 @@ def test_dups_fancy_indexing(self): df = DataFrame({'test': [5, 7, 9, 11]}, index=['A', 'A', 'B', 'C']) expected = DataFrame( {'test': [5, 7, 5, 7, np.nan]}, index=['A', 'A', 'A', 'A', 'E']) - result = df.ix[['A', 'A', 'E']] + result = df.loc[['A', 'A', 'E']] tm.assert_frame_equal(result, expected) # GH 5835 @@ -1790,9 +194,9 @@ def test_dups_fancy_indexing(self): np.random.randn(5, 5), columns=['A', 'B', 'B', 'B', 'A']) expected = pd.concat( - [df.ix[:, ['A', 'B']], DataFrame(np.nan, columns=['C'], - index=df.index)], axis=1) - result = df.ix[:, ['A', 'B', 'C']] + [df.loc[:, ['A', 'B']], DataFrame(np.nan, columns=['C'], + index=df.index)], axis=1) + result = df.loc[:, ['A', 'B', 'C']] tm.assert_frame_equal(result, expected) # GH 6504, multi-axis indexing @@ -1822,8 +226,8 @@ def test_indexing_mixed_frame_bug(self): # this does not work, ie column test is not changed idx = df['test'] == '_' - temp = df.ix[idx, 'a'].apply(lambda x: '-----' if x == 'aaa' else x) - df.ix[idx, 'test'] = temp + temp = df.loc[idx, 'a'].apply(lambda x: '-----' if x == 'aaa' else x) + df.loc[idx, 'test'] = temp self.assertEqual(df.iloc[0, 2], '-----') # if I look at df, then element [0,2] equals '_'. If instead I type @@ -1859,17 +263,17 @@ def test_set_index_nan(self): 'QC': {17: 0.0, 18: 0.0, 19: 0.0, - 20: nan, - 21: nan, - 22: nan, - 23: nan, + 20: np.nan, + 21: np.nan, + 22: np.nan, + 23: np.nan, 24: 1.0, - 25: nan, - 26: nan, - 27: nan, - 28: nan, - 29: nan, - 30: nan}, + 25: np.nan, + 26: np.nan, + 27: np.nan, + 28: np.nan, + 29: np.nan, + 30: np.nan}, 'data': {17: 7.9544899999999998, 18: 8.0142609999999994, 19: 7.8591520000000008, @@ -1925,14 +329,14 @@ def test_multi_assign(self): 'PF': [0, 0, 0, 0, 1, 1], 'col1': lrange(6), 'col2': lrange(6, 12)}) - df.ix[1, 0] = np.nan + df.iloc[1, 0] = np.nan df2 = df.copy() mask = ~df2.FC.isnull() cols = ['col1', 'col2'] dft = df2 * 2 - dft.ix[3, 3] = np.nan + dft.iloc[3, 3] = np.nan expected = DataFrame({'FC': ['a', np.nan, 'a', 'b', 'a', 'b'], 'PF': [0, 0, 0, 0, 1, 1], @@ -1940,17 +344,17 @@ def test_multi_assign(self): 'col2': [12, 7, 16, np.nan, 20, 22]}) # frame on rhs - df2.ix[mask, cols] = dft.ix[mask, cols] + df2.loc[mask, cols] = dft.loc[mask, cols] tm.assert_frame_equal(df2, expected) - df2.ix[mask, cols] = dft.ix[mask, cols] + df2.loc[mask, cols] = dft.loc[mask, cols] tm.assert_frame_equal(df2, expected) # with an ndarray on rhs df2 = df.copy() - df2.ix[mask, cols] = dft.ix[mask, cols].values + df2.loc[mask, cols] = dft.loc[mask, cols].values tm.assert_frame_equal(df2, expected) - df2.ix[mask, cols] = dft.ix[mask, cols].values + df2.loc[mask, cols] = dft.loc[mask, cols].values tm.assert_frame_equal(df2, expected) # broadcasting on the rhs is required @@ -1965,79 +369,18 @@ def test_multi_assign(self): df.loc[df['A'] == 0, ['A', 'B']] = df['D'] tm.assert_frame_equal(df, expected) - def test_ix_assign_column_mixed(self): - # GH #1142 - df = DataFrame(tm.getSeriesData()) - df['foo'] = 'bar' - - orig = df.ix[:, 'B'].copy() - df.ix[:, 'B'] = df.ix[:, 'B'] + 1 - tm.assert_series_equal(df.B, orig + 1) - - # GH 3668, mixed frame with series value - df = DataFrame({'x': lrange(10), 'y': lrange(10, 20), 'z': 'bar'}) - expected = df.copy() - - for i in range(5): - indexer = i * 2 - v = 1000 + i * 200 - expected.ix[indexer, 'y'] = v - self.assertEqual(expected.ix[indexer, 'y'], v) - - df.ix[df.x % 2 == 0, 'y'] = df.ix[df.x % 2 == 0, 'y'] * 100 - tm.assert_frame_equal(df, expected) - - # GH 4508, making sure consistency of assignments - df = DataFrame({'a': [1, 2, 3], 'b': [0, 1, 2]}) - df.ix[[0, 2, ], 'b'] = [100, -100] - expected = DataFrame({'a': [1, 2, 3], 'b': [100, 1, -100]}) - tm.assert_frame_equal(df, expected) - - df = pd.DataFrame({'a': lrange(4)}) - df['b'] = np.nan - df.ix[[1, 3], 'b'] = [100, -100] - expected = DataFrame({'a': [0, 1, 2, 3], - 'b': [np.nan, 100, np.nan, -100]}) - tm.assert_frame_equal(df, expected) - - # ok, but chained assignments are dangerous - # if we turn off chained assignement it will work - with option_context('chained_assignment', None): - df = pd.DataFrame({'a': lrange(4)}) - df['b'] = np.nan - df['b'].ix[[1, 3]] = [100, -100] - tm.assert_frame_equal(df, expected) - - def test_ix_get_set_consistency(self): - - # GH 4544 - # ix/loc get/set not consistent when - # a mixed int/string index - df = DataFrame(np.arange(16).reshape((4, 4)), - columns=['a', 'b', 8, 'c'], - index=['e', 7, 'f', 'g']) - - self.assertEqual(df.ix['e', 8], 2) - self.assertEqual(df.loc['e', 8], 2) - - df.ix['e', 8] = 42 - self.assertEqual(df.ix['e', 8], 42) - self.assertEqual(df.loc['e', 8], 42) - - df.loc['e', 8] = 45 - self.assertEqual(df.ix['e', 8], 45) - self.assertEqual(df.loc['e', 8], 45) - def test_setitem_list(self): # GH 6043 # ix with a list df = DataFrame(index=[0, 1], columns=[0]) - df.ix[1, 0] = [1, 2, 3] - df.ix[1, 0] = [1, 2] + with catch_warnings(record=True): + df.ix[1, 0] = [1, 2, 3] + df.ix[1, 0] = [1, 2] result = DataFrame(index=[0, 1], columns=[0]) - result.ix[1, 0] = [1, 2] + with catch_warnings(record=True): + result.ix[1, 0] = [1, 2] tm.assert_frame_equal(result, df) @@ -2059,187 +402,25 @@ def view(self): return self df = DataFrame(index=[0, 1], columns=[0]) - df.ix[1, 0] = TO(1) - df.ix[1, 0] = TO(2) + with catch_warnings(record=True): + df.ix[1, 0] = TO(1) + df.ix[1, 0] = TO(2) result = DataFrame(index=[0, 1], columns=[0]) - result.ix[1, 0] = TO(2) + with catch_warnings(record=True): + result.ix[1, 0] = TO(2) tm.assert_frame_equal(result, df) # remains object dtype even after setting it back df = DataFrame(index=[0, 1], columns=[0]) - df.ix[1, 0] = TO(1) - df.ix[1, 0] = np.nan + with catch_warnings(record=True): + df.ix[1, 0] = TO(1) + df.ix[1, 0] = np.nan result = DataFrame(index=[0, 1], columns=[0]) tm.assert_frame_equal(result, df) - def test_iloc_mask(self): - - # GH 3631, iloc with a mask (of a series) should raise - df = DataFrame(lrange(5), list('ABCDE'), columns=['a']) - mask = (df.a % 2 == 0) - self.assertRaises(ValueError, df.iloc.__getitem__, tuple([mask])) - mask.index = lrange(len(mask)) - self.assertRaises(NotImplementedError, df.iloc.__getitem__, - tuple([mask])) - - # ndarray ok - result = df.iloc[np.array([True] * len(mask), dtype=bool)] - tm.assert_frame_equal(result, df) - - # the possibilities - locs = np.arange(4) - nums = 2 ** locs - reps = lmap(bin, nums) - df = DataFrame({'locs': locs, 'nums': nums}, reps) - - expected = { - (None, ''): '0b1100', - (None, '.loc'): '0b1100', - (None, '.iloc'): '0b1100', - ('index', ''): '0b11', - ('index', '.loc'): '0b11', - ('index', '.iloc'): ('iLocation based boolean indexing ' - 'cannot use an indexable as a mask'), - ('locs', ''): 'Unalignable boolean Series provided as indexer ' - '(index of the boolean Series and of the indexed ' - 'object do not match', - ('locs', '.loc'): 'Unalignable boolean Series provided as indexer ' - '(index of the boolean Series and of the ' - 'indexed object do not match', - ('locs', '.iloc'): ('iLocation based boolean indexing on an ' - 'integer type is not available'), - } - - # UserWarnings from reindex of a boolean mask - with warnings.catch_warnings(record=True): - result = dict() - for idx in [None, 'index', 'locs']: - mask = (df.nums > 2).values - if idx: - mask = Series(mask, list(reversed(getattr(df, idx)))) - for method in ['', '.loc', '.iloc']: - try: - if method: - accessor = getattr(df, method[1:]) - else: - accessor = df - ans = str(bin(accessor[mask]['nums'].sum())) - except Exception as e: - ans = str(e) - - key = tuple([idx, method]) - r = expected.get(key) - if r != ans: - raise AssertionError( - "[%s] does not match [%s], received [%s]" - % (key, ans, r)) - - def test_ix_slicing_strings(self): - # GH3836 - data = {'Classification': - ['SA EQUITY CFD', 'bbb', 'SA EQUITY', 'SA SSF', 'aaa'], - 'Random': [1, 2, 3, 4, 5], - 'X': ['correct', 'wrong', 'correct', 'correct', 'wrong']} - df = DataFrame(data) - x = df[~df.Classification.isin(['SA EQUITY CFD', 'SA EQUITY', 'SA SSF' - ])] - df.ix[x.index, 'X'] = df['Classification'] - - expected = DataFrame({'Classification': {0: 'SA EQUITY CFD', - 1: 'bbb', - 2: 'SA EQUITY', - 3: 'SA SSF', - 4: 'aaa'}, - 'Random': {0: 1, - 1: 2, - 2: 3, - 3: 4, - 4: 5}, - 'X': {0: 'correct', - 1: 'bbb', - 2: 'correct', - 3: 'correct', - 4: 'aaa'}}) # bug was 4: 'bbb' - - tm.assert_frame_equal(df, expected) - - def test_non_unique_loc(self): - # GH3659 - # non-unique indexer with loc slice - # https://groups.google.com/forum/?fromgroups#!topic/pydata/zTm2No0crYs - - # these are going to raise becuase the we are non monotonic - df = DataFrame({'A': [1, 2, 3, 4, 5, 6], - 'B': [3, 4, 5, 6, 7, 8]}, index=[0, 1, 0, 1, 2, 3]) - self.assertRaises(KeyError, df.loc.__getitem__, - tuple([slice(1, None)])) - self.assertRaises(KeyError, df.loc.__getitem__, - tuple([slice(0, None)])) - self.assertRaises(KeyError, df.loc.__getitem__, tuple([slice(1, 2)])) - - # monotonic are ok - df = DataFrame({'A': [1, 2, 3, 4, 5, 6], - 'B': [3, 4, 5, 6, 7, 8]}, - index=[0, 1, 0, 1, 2, 3]).sort_index(axis=0) - result = df.loc[1:] - expected = DataFrame({'A': [2, 4, 5, 6], 'B': [4, 6, 7, 8]}, - index=[1, 1, 2, 3]) - tm.assert_frame_equal(result, expected) - - result = df.loc[0:] - tm.assert_frame_equal(result, df) - - result = df.loc[1:2] - expected = DataFrame({'A': [2, 4, 5], 'B': [4, 6, 7]}, - index=[1, 1, 2]) - tm.assert_frame_equal(result, expected) - - def test_loc_name(self): - # GH 3880 - df = DataFrame([[1, 1], [1, 1]]) - df.index.name = 'index_name' - result = df.iloc[[0, 1]].index.name - self.assertEqual(result, 'index_name') - - result = df.ix[[0, 1]].index.name - self.assertEqual(result, 'index_name') - - result = df.loc[[0, 1]].index.name - self.assertEqual(result, 'index_name') - - def test_iloc_non_unique_indexing(self): - - # GH 4017, non-unique indexing (on the axis) - df = DataFrame({'A': [0.1] * 3000, 'B': [1] * 3000}) - idx = np.array(lrange(30)) * 99 - expected = df.iloc[idx] - - df3 = pd.concat([df, 2 * df, 3 * df]) - result = df3.iloc[idx] - - tm.assert_frame_equal(result, expected) - - df2 = DataFrame({'A': [0.1] * 1000, 'B': [1] * 1000}) - df2 = pd.concat([df2, 2 * df2, 3 * df2]) - - sidx = df2.index.to_series() - expected = df2.iloc[idx[idx <= sidx.max()]] - - new_list = [] - for r, s in expected.iterrows(): - new_list.append(s) - new_list.append(s * 2) - new_list.append(s * 3) - - expected = DataFrame(new_list) - expected = pd.concat([expected, DataFrame(index=idx[idx > sidx.max()]) - ]) - result = df2.loc[idx] - tm.assert_frame_equal(result, expected, check_index_type=False) - def test_string_slice(self): # GH 14424 # string indexing against datetimelike with object @@ -2300,43 +481,6 @@ def test_mi_access(self): result = df2['A']['B2'] tm.assert_frame_equal(result, expected) - def test_non_unique_loc_memory_error(self): - - # GH 4280 - # non_unique index with a large selection triggers a memory error - - columns = list('ABCDEFG') - - def gen_test(l, l2): - return pd.concat([DataFrame(randn(l, len(columns)), - index=lrange(l), columns=columns), - DataFrame(np.ones((l2, len(columns))), - index=[0] * l2, columns=columns)]) - - def gen_expected(df, mask): - l = len(mask) - return pd.concat([df.take([0], convert=False), - DataFrame(np.ones((l, len(columns))), - index=[0] * l, - columns=columns), - df.take(mask[1:], convert=False)]) - - df = gen_test(900, 100) - self.assertFalse(df.index.is_unique) - - mask = np.arange(100) - result = df.loc[mask] - expected = gen_expected(df, mask) - tm.assert_frame_equal(result, expected) - - df = gen_test(900000, 100000) - self.assertFalse(df.index.is_unique) - - mask = np.arange(100000) - result = df.loc[mask] - expected = gen_expected(df, mask) - tm.assert_frame_equal(result, expected) - def test_astype_assignment(self): # GH4312 (iloc) @@ -2395,745 +539,79 @@ def test_astype_assignment_with_dups(self): # result = df.get_dtype_counts().sort_index() # expected = Series({'float64': 2, 'object': 1}).sort_index() - def test_dups_loc(self): - - # GH4726 - # dup indexing with iloc/loc - df = DataFrame([[1, 2, 'foo', 'bar', Timestamp('20130101')]], - columns=['a', 'a', 'a', 'a', 'a'], index=[1]) - expected = Series([1, 2, 'foo', 'bar', Timestamp('20130101')], - index=['a', 'a', 'a', 'a', 'a'], name=1) - - result = df.iloc[0] - tm.assert_series_equal(result, expected) - - result = df.loc[1] - tm.assert_series_equal(result, expected) - - def test_partial_setting(self): - - # GH2578, allow ix and friends to partially set - - # series - s_orig = Series([1, 2, 3]) - - s = s_orig.copy() - s[5] = 5 - expected = Series([1, 2, 3, 5], index=[0, 1, 2, 5]) - tm.assert_series_equal(s, expected) - - s = s_orig.copy() - s.loc[5] = 5 - expected = Series([1, 2, 3, 5], index=[0, 1, 2, 5]) - tm.assert_series_equal(s, expected) - - s = s_orig.copy() - s[5] = 5. - expected = Series([1, 2, 3, 5.], index=[0, 1, 2, 5]) - tm.assert_series_equal(s, expected) - - s = s_orig.copy() - s.loc[5] = 5. - expected = Series([1, 2, 3, 5.], index=[0, 1, 2, 5]) - tm.assert_series_equal(s, expected) - - # iloc/iat raise - s = s_orig.copy() - - def f(): - s.iloc[3] = 5. - - self.assertRaises(IndexError, f) - - def f(): - s.iat[3] = 5. - - self.assertRaises(IndexError, f) - - # ## frame ## - - df_orig = DataFrame( - np.arange(6).reshape(3, 2), columns=['A', 'B'], dtype='int64') - - # iloc/iat raise - df = df_orig.copy() - - def f(): - df.iloc[4, 2] = 5. - - self.assertRaises(IndexError, f) - - def f(): - df.iat[4, 2] = 5. - - self.assertRaises(IndexError, f) - - # row setting where it exists - expected = DataFrame(dict({'A': [0, 4, 4], 'B': [1, 5, 5]})) - df = df_orig.copy() - df.iloc[1] = df.iloc[2] - tm.assert_frame_equal(df, expected) - - expected = DataFrame(dict({'A': [0, 4, 4], 'B': [1, 5, 5]})) - df = df_orig.copy() - df.loc[1] = df.loc[2] - tm.assert_frame_equal(df, expected) - - # like 2578, partial setting with dtype preservation - expected = DataFrame(dict({'A': [0, 2, 4, 4], 'B': [1, 3, 5, 5]})) - df = df_orig.copy() - df.loc[3] = df.loc[2] - tm.assert_frame_equal(df, expected) - - # single dtype frame, overwrite - expected = DataFrame(dict({'A': [0, 2, 4], 'B': [0, 2, 4]})) - df = df_orig.copy() - df.ix[:, 'B'] = df.ix[:, 'A'] - tm.assert_frame_equal(df, expected) - - # mixed dtype frame, overwrite - expected = DataFrame(dict({'A': [0, 2, 4], 'B': Series([0, 2, 4])})) - df = df_orig.copy() - df['B'] = df['B'].astype(np.float64) - df.ix[:, 'B'] = df.ix[:, 'A'] - tm.assert_frame_equal(df, expected) - - # single dtype frame, partial setting - expected = df_orig.copy() - expected['C'] = df['A'] - df = df_orig.copy() - df.ix[:, 'C'] = df.ix[:, 'A'] - tm.assert_frame_equal(df, expected) - - # mixed frame, partial setting - expected = df_orig.copy() - expected['C'] = df['A'] - df = df_orig.copy() - df.ix[:, 'C'] = df.ix[:, 'A'] - tm.assert_frame_equal(df, expected) - - # ## panel ## - p_orig = Panel(np.arange(16).reshape(2, 4, 2), - items=['Item1', 'Item2'], - major_axis=pd.date_range('2001/1/12', periods=4), - minor_axis=['A', 'B'], dtype='float64') - - # panel setting via item - p_orig = Panel(np.arange(16).reshape(2, 4, 2), - items=['Item1', 'Item2'], - major_axis=pd.date_range('2001/1/12', periods=4), - minor_axis=['A', 'B'], dtype='float64') - expected = p_orig.copy() - expected['Item3'] = expected['Item1'] - p = p_orig.copy() - p.loc['Item3'] = p['Item1'] - tm.assert_panel_equal(p, expected) - - # panel with aligned series - expected = p_orig.copy() - expected = expected.transpose(2, 1, 0) - expected['C'] = DataFrame({'Item1': [30, 30, 30, 30], - 'Item2': [32, 32, 32, 32]}, - index=p_orig.major_axis) - expected = expected.transpose(2, 1, 0) - p = p_orig.copy() - p.loc[:, :, 'C'] = Series([30, 32], index=p_orig.items) - tm.assert_panel_equal(p, expected) - - # GH 8473 - dates = date_range('1/1/2000', periods=8) - df_orig = DataFrame(np.random.randn(8, 4), index=dates, - columns=['A', 'B', 'C', 'D']) - - expected = pd.concat([df_orig, DataFrame( - {'A': 7}, index=[dates[-1] + 1])]) - df = df_orig.copy() - df.loc[dates[-1] + 1, 'A'] = 7 - tm.assert_frame_equal(df, expected) - df = df_orig.copy() - df.at[dates[-1] + 1, 'A'] = 7 - tm.assert_frame_equal(df, expected) - - exp_other = DataFrame({0: 7}, index=[dates[-1] + 1]) - expected = pd.concat([df_orig, exp_other], axis=1) - - df = df_orig.copy() - df.loc[dates[-1] + 1, 0] = 7 - tm.assert_frame_equal(df, expected) - df = df_orig.copy() - df.at[dates[-1] + 1, 0] = 7 - tm.assert_frame_equal(df, expected) - - def test_partial_setting_mixed_dtype(self): - - # in a mixed dtype environment, try to preserve dtypes - # by appending - df = DataFrame([[True, 1], [False, 2]], columns=["female", "fitness"]) - - s = df.loc[1].copy() - s.name = 2 - expected = df.append(s) - - df.loc[2] = df.loc[1] - tm.assert_frame_equal(df, expected) - - # columns will align - df = DataFrame(columns=['A', 'B']) - df.loc[0] = Series(1, index=range(4)) - tm.assert_frame_equal(df, DataFrame(columns=['A', 'B'], index=[0])) - - # columns will align - df = DataFrame(columns=['A', 'B']) - df.loc[0] = Series(1, index=['B']) - - exp = DataFrame([[np.nan, 1]], columns=['A', 'B'], - index=[0], dtype='float64') - tm.assert_frame_equal(df, exp) - - # list-like must conform - df = DataFrame(columns=['A', 'B']) - - def f(): - df.loc[0] = [1, 2, 3] - - self.assertRaises(ValueError, f) - - # these are coerced to float unavoidably (as its a list-like to begin) - df = DataFrame(columns=['A', 'B']) - df.loc[3] = [6, 7] - - exp = DataFrame([[6, 7]], index=[3], columns=['A', 'B'], - dtype='float64') - tm.assert_frame_equal(df, exp) - - def test_series_partial_set(self): - # partial set with new index - # Regression from GH4825 - ser = Series([0.1, 0.2], index=[1, 2]) - - # loc - expected = Series([np.nan, 0.2, np.nan], index=[3, 2, 3]) - result = ser.loc[[3, 2, 3]] - tm.assert_series_equal(result, expected, check_index_type=True) - - expected = Series([np.nan, 0.2, np.nan, np.nan], index=[3, 2, 3, 'x']) - result = ser.loc[[3, 2, 3, 'x']] - tm.assert_series_equal(result, expected, check_index_type=True) - - expected = Series([0.2, 0.2, 0.1], index=[2, 2, 1]) - result = ser.loc[[2, 2, 1]] - tm.assert_series_equal(result, expected, check_index_type=True) - - expected = Series([0.2, 0.2, np.nan, 0.1], index=[2, 2, 'x', 1]) - result = ser.loc[[2, 2, 'x', 1]] - tm.assert_series_equal(result, expected, check_index_type=True) - - # raises as nothing in in the index - self.assertRaises(KeyError, lambda: ser.loc[[3, 3, 3]]) - - expected = Series([0.2, 0.2, np.nan], index=[2, 2, 3]) - result = ser.loc[[2, 2, 3]] - tm.assert_series_equal(result, expected, check_index_type=True) - - expected = Series([0.3, np.nan, np.nan], index=[3, 4, 4]) - result = Series([0.1, 0.2, 0.3], index=[1, 2, 3]).loc[[3, 4, 4]] - tm.assert_series_equal(result, expected, check_index_type=True) - - expected = Series([np.nan, 0.3, 0.3], index=[5, 3, 3]) - result = Series([0.1, 0.2, 0.3, 0.4], - index=[1, 2, 3, 4]).loc[[5, 3, 3]] - tm.assert_series_equal(result, expected, check_index_type=True) - - expected = Series([np.nan, 0.4, 0.4], index=[5, 4, 4]) - result = Series([0.1, 0.2, 0.3, 0.4], - index=[1, 2, 3, 4]).loc[[5, 4, 4]] - tm.assert_series_equal(result, expected, check_index_type=True) - - expected = Series([0.4, np.nan, np.nan], index=[7, 2, 2]) - result = Series([0.1, 0.2, 0.3, 0.4], - index=[4, 5, 6, 7]).loc[[7, 2, 2]] - tm.assert_series_equal(result, expected, check_index_type=True) - - expected = Series([0.4, np.nan, np.nan], index=[4, 5, 5]) - result = Series([0.1, 0.2, 0.3, 0.4], - index=[1, 2, 3, 4]).loc[[4, 5, 5]] - tm.assert_series_equal(result, expected, check_index_type=True) - - # iloc - expected = Series([0.2, 0.2, 0.1, 0.1], index=[2, 2, 1, 1]) - result = ser.iloc[[1, 1, 0, 0]] - tm.assert_series_equal(result, expected, check_index_type=True) - - def test_series_partial_set_with_name(self): - # GH 11497 - - idx = Index([1, 2], dtype='int64', name='idx') - ser = Series([0.1, 0.2], index=idx, name='s') - - # loc - exp_idx = Index([3, 2, 3], dtype='int64', name='idx') - expected = Series([np.nan, 0.2, np.nan], index=exp_idx, name='s') - result = ser.loc[[3, 2, 3]] - tm.assert_series_equal(result, expected, check_index_type=True) - - exp_idx = Index([3, 2, 3, 'x'], dtype='object', name='idx') - expected = Series([np.nan, 0.2, np.nan, np.nan], index=exp_idx, - name='s') - result = ser.loc[[3, 2, 3, 'x']] - tm.assert_series_equal(result, expected, check_index_type=True) - - exp_idx = Index([2, 2, 1], dtype='int64', name='idx') - expected = Series([0.2, 0.2, 0.1], index=exp_idx, name='s') - result = ser.loc[[2, 2, 1]] - tm.assert_series_equal(result, expected, check_index_type=True) - - exp_idx = Index([2, 2, 'x', 1], dtype='object', name='idx') - expected = Series([0.2, 0.2, np.nan, 0.1], index=exp_idx, name='s') - result = ser.loc[[2, 2, 'x', 1]] - tm.assert_series_equal(result, expected, check_index_type=True) - - # raises as nothing in in the index - self.assertRaises(KeyError, lambda: ser.loc[[3, 3, 3]]) - - exp_idx = Index([2, 2, 3], dtype='int64', name='idx') - expected = Series([0.2, 0.2, np.nan], index=exp_idx, name='s') - result = ser.loc[[2, 2, 3]] - tm.assert_series_equal(result, expected, check_index_type=True) - - exp_idx = Index([3, 4, 4], dtype='int64', name='idx') - expected = Series([0.3, np.nan, np.nan], index=exp_idx, name='s') - idx = Index([1, 2, 3], dtype='int64', name='idx') - result = Series([0.1, 0.2, 0.3], index=idx, name='s').loc[[3, 4, 4]] - tm.assert_series_equal(result, expected, check_index_type=True) - - exp_idx = Index([5, 3, 3], dtype='int64', name='idx') - expected = Series([np.nan, 0.3, 0.3], index=exp_idx, name='s') - idx = Index([1, 2, 3, 4], dtype='int64', name='idx') - result = Series([0.1, 0.2, 0.3, 0.4], index=idx, - name='s').loc[[5, 3, 3]] - tm.assert_series_equal(result, expected, check_index_type=True) - - exp_idx = Index([5, 4, 4], dtype='int64', name='idx') - expected = Series([np.nan, 0.4, 0.4], index=exp_idx, name='s') - idx = Index([1, 2, 3, 4], dtype='int64', name='idx') - result = Series([0.1, 0.2, 0.3, 0.4], index=idx, - name='s').loc[[5, 4, 4]] - tm.assert_series_equal(result, expected, check_index_type=True) - - exp_idx = Index([7, 2, 2], dtype='int64', name='idx') - expected = Series([0.4, np.nan, np.nan], index=exp_idx, name='s') - idx = Index([4, 5, 6, 7], dtype='int64', name='idx') - result = Series([0.1, 0.2, 0.3, 0.4], index=idx, - name='s').loc[[7, 2, 2]] - tm.assert_series_equal(result, expected, check_index_type=True) - - exp_idx = Index([4, 5, 5], dtype='int64', name='idx') - expected = Series([0.4, np.nan, np.nan], index=exp_idx, name='s') - idx = Index([1, 2, 3, 4], dtype='int64', name='idx') - result = Series([0.1, 0.2, 0.3, 0.4], index=idx, - name='s').loc[[4, 5, 5]] - tm.assert_series_equal(result, expected, check_index_type=True) - - # iloc - exp_idx = Index([2, 2, 1, 1], dtype='int64', name='idx') - expected = Series([0.2, 0.2, 0.1, 0.1], index=exp_idx, name='s') - result = ser.iloc[[1, 1, 0, 0]] - tm.assert_series_equal(result, expected, check_index_type=True) - - def test_partial_set_invalid(self): - - # GH 4940 - # allow only setting of 'valid' values - - orig = tm.makeTimeDataFrame() - df = orig.copy() - - # don't allow not string inserts - def f(): - df.loc[100.0, :] = df.ix[0] - - self.assertRaises(TypeError, f) - - def f(): - df.loc[100, :] = df.ix[0] - - self.assertRaises(TypeError, f) - - def f(): - df.ix[100.0, :] = df.ix[0] - - self.assertRaises(TypeError, f) - - def f(): - df.ix[100, :] = df.ix[0] - - self.assertRaises(ValueError, f) - - # allow object conversion here - df = orig.copy() - df.loc['a', :] = df.ix[0] - exp = orig.append(pd.Series(df.ix[0], name='a')) - tm.assert_frame_equal(df, exp) - tm.assert_index_equal(df.index, - pd.Index(orig.index.tolist() + ['a'])) - self.assertEqual(df.index.dtype, 'object') - - def test_partial_set_empty_series(self): - - # GH5226 - - # partially set with an empty object series - s = Series() - s.loc[1] = 1 - tm.assert_series_equal(s, Series([1], index=[1])) - s.loc[3] = 3 - tm.assert_series_equal(s, Series([1, 3], index=[1, 3])) - - s = Series() - s.loc[1] = 1. - tm.assert_series_equal(s, Series([1.], index=[1])) - s.loc[3] = 3. - tm.assert_series_equal(s, Series([1., 3.], index=[1, 3])) - - s = Series() - s.loc['foo'] = 1 - tm.assert_series_equal(s, Series([1], index=['foo'])) - s.loc['bar'] = 3 - tm.assert_series_equal(s, Series([1, 3], index=['foo', 'bar'])) - s.loc[3] = 4 - tm.assert_series_equal(s, Series([1, 3, 4], index=['foo', 'bar', 3])) - - def test_partial_set_empty_frame(self): - - # partially set with an empty object - # frame - df = DataFrame() - - def f(): - df.loc[1] = 1 - - self.assertRaises(ValueError, f) - - def f(): - df.loc[1] = Series([1], index=['foo']) - - self.assertRaises(ValueError, f) - - def f(): - df.loc[:, 1] = 1 - - self.assertRaises(ValueError, f) - - # these work as they don't really change - # anything but the index - # GH5632 - expected = DataFrame(columns=['foo'], index=pd.Index( - [], dtype='int64')) - - def f(): - df = DataFrame() - df['foo'] = Series([], dtype='object') - return df - - tm.assert_frame_equal(f(), expected) - - def f(): - df = DataFrame() - df['foo'] = Series(df.index) - return df - - tm.assert_frame_equal(f(), expected) - - def f(): - df = DataFrame() - df['foo'] = df.index - return df - - tm.assert_frame_equal(f(), expected) - - expected = DataFrame(columns=['foo'], - index=pd.Index([], dtype='int64')) - expected['foo'] = expected['foo'].astype('float64') - - def f(): - df = DataFrame() - df['foo'] = [] - return df - - tm.assert_frame_equal(f(), expected) - - def f(): - df = DataFrame() - df['foo'] = Series(range(len(df))) - return df - - tm.assert_frame_equal(f(), expected) - - def f(): - df = DataFrame() - tm.assert_index_equal(df.index, pd.Index([], dtype='object')) - df['foo'] = range(len(df)) - return df - - expected = DataFrame(columns=['foo'], - index=pd.Index([], dtype='int64')) - expected['foo'] = expected['foo'].astype('float64') - tm.assert_frame_equal(f(), expected) - - df = DataFrame() - tm.assert_index_equal(df.columns, pd.Index([], dtype=object)) - df2 = DataFrame() - df2[1] = Series([1], index=['foo']) - df.loc[:, 1] = Series([1], index=['foo']) - tm.assert_frame_equal(df, DataFrame([[1]], index=['foo'], columns=[1])) - tm.assert_frame_equal(df, df2) - - # no index to start - expected = DataFrame({0: Series(1, index=range(4))}, - columns=['A', 'B', 0]) - - df = DataFrame(columns=['A', 'B']) - df[0] = Series(1, index=range(4)) - df.dtypes - str(df) - tm.assert_frame_equal(df, expected) - - df = DataFrame(columns=['A', 'B']) - df.loc[:, 0] = Series(1, index=range(4)) - df.dtypes - str(df) - tm.assert_frame_equal(df, expected) - - def test_partial_set_empty_frame_row(self): - # GH5720, GH5744 - # don't create rows when empty - expected = DataFrame(columns=['A', 'B', 'New'], - index=pd.Index([], dtype='int64')) - expected['A'] = expected['A'].astype('int64') - expected['B'] = expected['B'].astype('float64') - expected['New'] = expected['New'].astype('float64') - - df = DataFrame({"A": [1, 2, 3], "B": [1.2, 4.2, 5.2]}) - y = df[df.A > 5] - y['New'] = np.nan - tm.assert_frame_equal(y, expected) - # tm.assert_frame_equal(y,expected) - - expected = DataFrame(columns=['a', 'b', 'c c', 'd']) - expected['d'] = expected['d'].astype('int64') - df = DataFrame(columns=['a', 'b', 'c c']) - df['d'] = 3 - tm.assert_frame_equal(df, expected) - tm.assert_series_equal(df['c c'], Series(name='c c', dtype=object)) - - # reindex columns is ok - df = DataFrame({"A": [1, 2, 3], "B": [1.2, 4.2, 5.2]}) - y = df[df.A > 5] - result = y.reindex(columns=['A', 'B', 'C']) - expected = DataFrame(columns=['A', 'B', 'C'], - index=pd.Index([], dtype='int64')) - expected['A'] = expected['A'].astype('int64') - expected['B'] = expected['B'].astype('float64') - expected['C'] = expected['C'].astype('float64') - tm.assert_frame_equal(result, expected) - - def test_partial_set_empty_frame_set_series(self): - # GH 5756 - # setting with empty Series - df = DataFrame(Series()) - tm.assert_frame_equal(df, DataFrame({0: Series()})) - - df = DataFrame(Series(name='foo')) - tm.assert_frame_equal(df, DataFrame({'foo': Series()})) - - def test_partial_set_empty_frame_empty_copy_assignment(self): - # GH 5932 - # copy on empty with assignment fails - df = DataFrame(index=[0]) - df = df.copy() - df['a'] = 0 - expected = DataFrame(0, index=[0], columns=['a']) - tm.assert_frame_equal(df, expected) - - def test_partial_set_empty_frame_empty_consistencies(self): - # GH 6171 - # consistency on empty frames - df = DataFrame(columns=['x', 'y']) - df['x'] = [1, 2] - expected = DataFrame(dict(x=[1, 2], y=[np.nan, np.nan])) - tm.assert_frame_equal(df, expected, check_dtype=False) + def test_index_type_coercion(self): - df = DataFrame(columns=['x', 'y']) - df['x'] = ['1', '2'] - expected = DataFrame( - dict(x=['1', '2'], y=[np.nan, np.nan]), dtype=object) - tm.assert_frame_equal(df, expected) + with catch_warnings(record=True): - df = DataFrame(columns=['x', 'y']) - df.loc[0, 'x'] = 1 - expected = DataFrame(dict(x=[1], y=[np.nan])) - tm.assert_frame_equal(df, expected, check_dtype=False) - - def test_cache_updating(self): - # GH 4939, make sure to update the cache on setitem - - df = tm.makeDataFrame() - df['A'] # cache series - df.ix["Hello Friend"] = df.ix[0] - self.assertIn("Hello Friend", df['A'].index) - self.assertIn("Hello Friend", df['B'].index) - - panel = tm.makePanel() - panel.ix[0] # get first item into cache - panel.ix[:, :, 'A+1'] = panel.ix[:, :, 'A'] + 1 - self.assertIn("A+1", panel.ix[0].columns) - self.assertIn("A+1", panel.ix[1].columns) - - # 5216 - # make sure that we don't try to set a dead cache - a = np.random.rand(10, 3) - df = DataFrame(a, columns=['x', 'y', 'z']) - tuples = [(i, j) for i in range(5) for j in range(2)] - index = MultiIndex.from_tuples(tuples) - df.index = index - - # setting via chained assignment - # but actually works, since everything is a view - df.loc[0]['z'].iloc[0] = 1. - result = df.loc[(0, 0), 'z'] - self.assertEqual(result, 1) - - # correct setting - df.loc[(0, 0), 'z'] = 2 - result = df.loc[(0, 0), 'z'] - self.assertEqual(result, 2) - - # 10264 - df = DataFrame(np.zeros((5, 5), dtype='int64'), columns=[ - 'a', 'b', 'c', 'd', 'e'], index=range(5)) - df['f'] = 0 - df.f.values[3] = 1 + # GH 11836 + # if we have an index type and set it with something that looks + # to numpy like the same, but is actually, not + # (e.g. setting with a float or string '0') + # then we need to coerce to object - # TODO(wesm): unused? - # y = df.iloc[np.arange(2, len(df))] + # integer indexes + for s in [Series(range(5)), + Series(range(5), index=range(1, 6))]: - df.f.values[3] = 2 - expected = DataFrame(np.zeros((5, 6), dtype='int64'), columns=[ - 'a', 'b', 'c', 'd', 'e', 'f'], index=range(5)) - expected.at[3, 'f'] = 2 - tm.assert_frame_equal(df, expected) - expected = Series([0, 0, 0, 2, 0], name='f') - tm.assert_series_equal(df.f, expected) - - def test_set_ix_out_of_bounds_axis_0(self): - df = pd.DataFrame( - randn(2, 5), index=["row%s" % i for i in range(2)], - columns=["col%s" % i for i in range(5)]) - self.assertRaises(ValueError, df.ix.__setitem__, (2, 0), 100) - - def test_set_ix_out_of_bounds_axis_1(self): - df = pd.DataFrame( - randn(5, 2), index=["row%s" % i for i in range(5)], - columns=["col%s" % i for i in range(2)]) - self.assertRaises(ValueError, df.ix.__setitem__, (0, 2), 100) - - def test_iloc_empty_list_indexer_is_ok(self): - from pandas.util.testing import makeCustomDataframe as mkdf - df = mkdf(5, 2) - # vertical empty - tm.assert_frame_equal(df.iloc[:, []], df.iloc[:, :0], - check_index_type=True, check_column_type=True) - # horizontal empty - tm.assert_frame_equal(df.iloc[[], :], df.iloc[:0, :], - check_index_type=True, check_column_type=True) - # horizontal empty - tm.assert_frame_equal(df.iloc[[]], df.iloc[:0, :], - check_index_type=True, - check_column_type=True) - - def test_loc_empty_list_indexer_is_ok(self): - from pandas.util.testing import makeCustomDataframe as mkdf - df = mkdf(5, 2) - # vertical empty - tm.assert_frame_equal(df.loc[:, []], df.iloc[:, :0], - check_index_type=True, check_column_type=True) - # horizontal empty - tm.assert_frame_equal(df.loc[[], :], df.iloc[:0, :], - check_index_type=True, check_column_type=True) - # horizontal empty - tm.assert_frame_equal(df.loc[[]], df.iloc[:0, :], - check_index_type=True, - check_column_type=True) - - def test_ix_empty_list_indexer_is_ok(self): - from pandas.util.testing import makeCustomDataframe as mkdf - df = mkdf(5, 2) - # vertical empty - tm.assert_frame_equal(df.ix[:, []], df.iloc[:, :0], - check_index_type=True, - check_column_type=True) - # horizontal empty - tm.assert_frame_equal(df.ix[[], :], df.iloc[:0, :], - check_index_type=True, - check_column_type=True) - # horizontal empty - tm.assert_frame_equal(df.ix[[]], df.iloc[:0, :], - check_index_type=True, - check_column_type=True) + self.assertTrue(s.index.is_integer()) - def test_index_type_coercion(self): + for indexer in [lambda x: x.ix, + lambda x: x.loc, + lambda x: x]: + s2 = s.copy() + indexer(s2)[0.1] = 0 + self.assertTrue(s2.index.is_floating()) + self.assertTrue(indexer(s2)[0.1] == 0) - # GH 11836 - # if we have an index type and set it with something that looks - # to numpy like the same, but is actually, not - # (e.g. setting with a float or string '0') - # then we need to coerce to object + s2 = s.copy() + indexer(s2)[0.0] = 0 + exp = s.index + if 0 not in s: + exp = Index(s.index.tolist() + [0]) + tm.assert_index_equal(s2.index, exp) - # integer indexes - for s in [Series(range(5)), - Series(range(5), index=range(1, 6))]: + s2 = s.copy() + indexer(s2)['0'] = 0 + self.assertTrue(s2.index.is_object()) - self.assertTrue(s.index.is_integer()) + for s in [Series(range(5), index=np.arange(5.))]: - for indexer in [lambda x: x.ix, - lambda x: x.loc, - lambda x: x]: - s2 = s.copy() - indexer(s2)[0.1] = 0 - self.assertTrue(s2.index.is_floating()) - self.assertTrue(indexer(s2)[0.1] == 0) + self.assertTrue(s.index.is_floating()) - s2 = s.copy() - indexer(s2)[0.0] = 0 - exp = s.index - if 0 not in s: - exp = Index(s.index.tolist() + [0]) - tm.assert_index_equal(s2.index, exp) + for idxr in [lambda x: x.ix, + lambda x: x.loc, + lambda x: x]: - s2 = s.copy() - indexer(s2)['0'] = 0 - self.assertTrue(s2.index.is_object()) + s2 = s.copy() + idxr(s2)[0.1] = 0 + self.assertTrue(s2.index.is_floating()) + self.assertTrue(idxr(s2)[0.1] == 0) - for s in [Series(range(5), index=np.arange(5.))]: + s2 = s.copy() + idxr(s2)[0.0] = 0 + tm.assert_index_equal(s2.index, s.index) - self.assertTrue(s.index.is_floating()) + s2 = s.copy() + idxr(s2)['0'] = 0 + self.assertTrue(s2.index.is_object()) - for idxr in [lambda x: x.ix, - lambda x: x.loc, - lambda x: x]: - s2 = s.copy() - idxr(s2)[0.1] = 0 - self.assertTrue(s2.index.is_floating()) - self.assertTrue(idxr(s2)[0.1] == 0) +class TestMisc(Base, tm.TestCase): - s2 = s.copy() - idxr(s2)[0.0] = 0 - tm.assert_index_equal(s2.index, s.index) + def test_indexer_caching(self): + # GH5727 + # make sure that indexers are in the _internal_names_set + n = 1000001 + arrays = [lrange(n), lrange(n)] + index = MultiIndex.from_tuples(lzip(*arrays)) + s = Series(np.zeros(n), index=index) + str(s) - s2 = s.copy() - idxr(s2)['0'] = 0 - self.assertTrue(s2.index.is_object()) + # setitem + expected = Series(np.ones(n), index=index) + s = Series(np.zeros(n), index=index) + s[s == 0] = 1 + tm.assert_series_equal(s, expected) def test_float_index_to_mixed(self): df = DataFrame({0.0: np.random.rand(10), 1.0: np.random.rand(10)}) @@ -3143,13 +621,6 @@ def test_float_index_to_mixed(self): 'a': [10] * 10}), df) - def test_duplicate_ix_returns_series(self): - df = DataFrame(np.random.randn(3, 3), index=[0.1, 0.2, 0.2], - columns=list('abc')) - r = df.ix[0.2, 'a'] - e = df.loc[0.2, 'a'] - tm.assert_series_equal(r, e) - def test_float_index_non_scalar_assignment(self): df = DataFrame({'a': [1, 2, 3], 'b': [3, 4, 5]}, index=[1., 2., 3.]) df.loc[df.index[:2]] = 1 @@ -3185,15 +656,18 @@ def run_tests(df, rhs, right): tm.assert_frame_equal(left, right) left = df.copy() - left.ix[s, l] = rhs + with catch_warnings(record=True): + left.ix[s, l] = rhs tm.assert_frame_equal(left, right) left = df.copy() - left.ix[i, j] = rhs + with catch_warnings(record=True): + left.ix[i, j] = rhs tm.assert_frame_equal(left, right) left = df.copy() - left.ix[r, c] = rhs + with catch_warnings(record=True): + left.ix[r, c] = rhs tm.assert_frame_equal(left, right) xs = np.arange(20).reshape(5, 4) @@ -3226,7 +700,7 @@ def assert_slices_equivalent(l_slc, i_slc): if not idx.is_integer: # For integer indices, ix and plain getitem are position-based. tm.assert_series_equal(s[l_slc], s.iloc[i_slc]) - tm.assert_series_equal(s.ix[l_slc], s.iloc[i_slc]) + tm.assert_series_equal(s.loc[l_slc], s.iloc[i_slc]) for idx in [_mklbl('A', 20), np.arange(20) + 100, np.linspace(100, 150, 20)]: @@ -3243,8 +717,9 @@ def test_slice_with_zero_step_raises(self): lambda: s[::0]) self.assertRaisesRegexp(ValueError, 'slice step cannot be zero', lambda: s.loc[::0]) - self.assertRaisesRegexp(ValueError, 'slice step cannot be zero', - lambda: s.ix[::0]) + with catch_warnings(record=True): + self.assertRaisesRegexp(ValueError, 'slice step cannot be zero', + lambda: s.ix[::0]) def test_indexing_assignment_dict_already_exists(self): df = pd.DataFrame({'x': [1, 2, 6], @@ -3259,11 +734,13 @@ def test_indexing_assignment_dict_already_exists(self): def test_indexing_dtypes_on_empty(self): # Check that .iloc and .ix return correct dtypes GH9983 df = DataFrame({'a': [1, 2, 3], 'b': ['b', 'b2', 'b3']}) - df2 = df.ix[[], :] + with catch_warnings(record=True): + df2 = df.ix[[], :] self.assertEqual(df2.loc[:, 'a'].dtype, np.int64) tm.assert_series_equal(df2.loc[:, 'a'], df2.iloc[:, 0]) - tm.assert_series_equal(df2.loc[:, 'a'], df2.ix[:, 0]) + with catch_warnings(record=True): + tm.assert_series_equal(df2.loc[:, 'a'], df2.ix[:, 0]) def test_range_in_series_indexing(self): # range can cause an indexing error diff --git a/pandas/tests/indexing/test_ix.py b/pandas/tests/indexing/test_ix.py new file mode 100644 index 0000000000000..e68e8015a2f39 --- /dev/null +++ b/pandas/tests/indexing/test_ix.py @@ -0,0 +1,333 @@ +""" test indexing with ix """ + +from warnings import catch_warnings + +import numpy as np +import pandas as pd + +from pandas.types.common import is_scalar +from pandas.compat import lrange +from pandas import Series, DataFrame, option_context, MultiIndex +from pandas.util import testing as tm +from pandas.core.common import PerformanceWarning + + +class TestIX(tm.TestCase): + + def test_ix_deprecation(self): + # GH 15114 + + df = DataFrame({'A': [1, 2, 3]}) + with tm.assert_produces_warning(DeprecationWarning, + check_stacklevel=False): + df.ix[1, 'A'] + + def test_ix_loc_setitem_consistency(self): + + # GH 5771 + # loc with slice and series + s = Series(0, index=[4, 5, 6]) + s.loc[4:5] += 1 + expected = Series([1, 1, 0], index=[4, 5, 6]) + tm.assert_series_equal(s, expected) + + # GH 5928 + # chained indexing assignment + df = DataFrame({'a': [0, 1, 2]}) + expected = df.copy() + with catch_warnings(record=True): + expected.ix[[0, 1, 2], 'a'] = -expected.ix[[0, 1, 2], 'a'] + + with catch_warnings(record=True): + df['a'].ix[[0, 1, 2]] = -df['a'].ix[[0, 1, 2]] + tm.assert_frame_equal(df, expected) + + df = DataFrame({'a': [0, 1, 2], 'b': [0, 1, 2]}) + with catch_warnings(record=True): + df['a'].ix[[0, 1, 2]] = -df['a'].ix[[0, 1, 2]].astype( + 'float64') + 0.5 + expected = DataFrame({'a': [0.5, -0.5, -1.5], 'b': [0, 1, 2]}) + tm.assert_frame_equal(df, expected) + + # GH 8607 + # ix setitem consistency + df = DataFrame({'timestamp': [1413840976, 1413842580, 1413760580], + 'delta': [1174, 904, 161], + 'elapsed': [7673, 9277, 1470]}) + expected = DataFrame({'timestamp': pd.to_datetime( + [1413840976, 1413842580, 1413760580], unit='s'), + 'delta': [1174, 904, 161], + 'elapsed': [7673, 9277, 1470]}) + + df2 = df.copy() + df2['timestamp'] = pd.to_datetime(df['timestamp'], unit='s') + tm.assert_frame_equal(df2, expected) + + df2 = df.copy() + df2.loc[:, 'timestamp'] = pd.to_datetime(df['timestamp'], unit='s') + tm.assert_frame_equal(df2, expected) + + df2 = df.copy() + with catch_warnings(record=True): + df2.ix[:, 2] = pd.to_datetime(df['timestamp'], unit='s') + tm.assert_frame_equal(df2, expected) + + def test_ix_loc_consistency(self): + + # GH 8613 + # some edge cases where ix/loc should return the same + # this is not an exhaustive case + + def compare(result, expected): + if is_scalar(expected): + self.assertEqual(result, expected) + else: + self.assertTrue(expected.equals(result)) + + # failure cases for .loc, but these work for .ix + df = pd.DataFrame(np.random.randn(5, 4), columns=list('ABCD')) + for key in [slice(1, 3), tuple([slice(0, 2), slice(0, 2)]), + tuple([slice(0, 2), df.columns[0:2]])]: + + for index in [tm.makeStringIndex, tm.makeUnicodeIndex, + tm.makeDateIndex, tm.makePeriodIndex, + tm.makeTimedeltaIndex]: + df.index = index(len(df.index)) + with catch_warnings(record=True): + df.ix[key] + + self.assertRaises(TypeError, lambda: df.loc[key]) + + df = pd.DataFrame(np.random.randn(5, 4), columns=list('ABCD'), + index=pd.date_range('2012-01-01', periods=5)) + + for key in ['2012-01-03', + '2012-01-31', + slice('2012-01-03', '2012-01-03'), + slice('2012-01-03', '2012-01-04'), + slice('2012-01-03', '2012-01-06', 2), + slice('2012-01-03', '2012-01-31'), + tuple([[True, True, True, False, True]]), ]: + + # getitem + + # if the expected raises, then compare the exceptions + try: + with catch_warnings(record=True): + expected = df.ix[key] + except KeyError: + self.assertRaises(KeyError, lambda: df.loc[key]) + continue + + result = df.loc[key] + compare(result, expected) + + # setitem + df1 = df.copy() + df2 = df.copy() + + with catch_warnings(record=True): + df1.ix[key] = 10 + df2.loc[key] = 10 + compare(df2, df1) + + # edge cases + s = Series([1, 2, 3, 4], index=list('abde')) + + result1 = s['a':'c'] + with catch_warnings(record=True): + result2 = s.ix['a':'c'] + result3 = s.loc['a':'c'] + tm.assert_series_equal(result1, result2) + tm.assert_series_equal(result1, result3) + + # now work rather than raising KeyError + s = Series(range(5), [-2, -1, 1, 2, 3]) + + with catch_warnings(record=True): + result1 = s.ix[-10:3] + result2 = s.loc[-10:3] + tm.assert_series_equal(result1, result2) + + with catch_warnings(record=True): + result1 = s.ix[0:3] + result2 = s.loc[0:3] + tm.assert_series_equal(result1, result2) + + def test_ix_weird_slicing(self): + # http://stackoverflow.com/q/17056560/1240268 + df = DataFrame({'one': [1, 2, 3, np.nan, np.nan], + 'two': [1, 2, 3, 4, 5]}) + df.loc[df['one'] > 1, 'two'] = -df['two'] + + expected = DataFrame({'one': {0: 1.0, + 1: 2.0, + 2: 3.0, + 3: np.nan, + 4: np.nan}, + 'two': {0: 1, + 1: -2, + 2: -3, + 3: 4, + 4: 5}}) + tm.assert_frame_equal(df, expected) + + def test_ix_general(self): + + # ix general issues + + # GH 2817 + data = {'amount': {0: 700, 1: 600, 2: 222, 3: 333, 4: 444}, + 'col': {0: 3.5, 1: 3.5, 2: 4.0, 3: 4.0, 4: 4.0}, + 'year': {0: 2012, 1: 2011, 2: 2012, 3: 2012, 4: 2012}} + df = DataFrame(data).set_index(keys=['col', 'year']) + key = 4.0, 2012 + + # emits a PerformanceWarning, ok + with self.assert_produces_warning(PerformanceWarning): + tm.assert_frame_equal(df.loc[key], df.iloc[2:]) + + # this is ok + df.sort_index(inplace=True) + res = df.loc[key] + + # col has float dtype, result should be Float64Index + index = MultiIndex.from_arrays([[4.] * 3, [2012] * 3], + names=['col', 'year']) + expected = DataFrame({'amount': [222, 333, 444]}, index=index) + tm.assert_frame_equal(res, expected) + + def test_ix_assign_column_mixed(self): + # GH #1142 + df = DataFrame(tm.getSeriesData()) + df['foo'] = 'bar' + + orig = df.loc[:, 'B'].copy() + df.loc[:, 'B'] = df.loc[:, 'B'] + 1 + tm.assert_series_equal(df.B, orig + 1) + + # GH 3668, mixed frame with series value + df = DataFrame({'x': lrange(10), 'y': lrange(10, 20), 'z': 'bar'}) + expected = df.copy() + + for i in range(5): + indexer = i * 2 + v = 1000 + i * 200 + expected.loc[indexer, 'y'] = v + self.assertEqual(expected.loc[indexer, 'y'], v) + + df.loc[df.x % 2 == 0, 'y'] = df.loc[df.x % 2 == 0, 'y'] * 100 + tm.assert_frame_equal(df, expected) + + # GH 4508, making sure consistency of assignments + df = DataFrame({'a': [1, 2, 3], 'b': [0, 1, 2]}) + df.loc[[0, 2, ], 'b'] = [100, -100] + expected = DataFrame({'a': [1, 2, 3], 'b': [100, 1, -100]}) + tm.assert_frame_equal(df, expected) + + df = pd.DataFrame({'a': lrange(4)}) + df['b'] = np.nan + df.loc[[1, 3], 'b'] = [100, -100] + expected = DataFrame({'a': [0, 1, 2, 3], + 'b': [np.nan, 100, np.nan, -100]}) + tm.assert_frame_equal(df, expected) + + # ok, but chained assignments are dangerous + # if we turn off chained assignement it will work + with option_context('chained_assignment', None): + df = pd.DataFrame({'a': lrange(4)}) + df['b'] = np.nan + df['b'].loc[[1, 3]] = [100, -100] + tm.assert_frame_equal(df, expected) + + def test_ix_get_set_consistency(self): + + # GH 4544 + # ix/loc get/set not consistent when + # a mixed int/string index + df = DataFrame(np.arange(16).reshape((4, 4)), + columns=['a', 'b', 8, 'c'], + index=['e', 7, 'f', 'g']) + + with catch_warnings(record=True): + self.assertEqual(df.ix['e', 8], 2) + self.assertEqual(df.loc['e', 8], 2) + + with catch_warnings(record=True): + df.ix['e', 8] = 42 + self.assertEqual(df.ix['e', 8], 42) + self.assertEqual(df.loc['e', 8], 42) + + df.loc['e', 8] = 45 + with catch_warnings(record=True): + self.assertEqual(df.ix['e', 8], 45) + self.assertEqual(df.loc['e', 8], 45) + + def test_ix_slicing_strings(self): + # GH3836 + data = {'Classification': + ['SA EQUITY CFD', 'bbb', 'SA EQUITY', 'SA SSF', 'aaa'], + 'Random': [1, 2, 3, 4, 5], + 'X': ['correct', 'wrong', 'correct', 'correct', 'wrong']} + df = DataFrame(data) + x = df[~df.Classification.isin(['SA EQUITY CFD', 'SA EQUITY', 'SA SSF' + ])] + with catch_warnings(record=True): + df.ix[x.index, 'X'] = df['Classification'] + + expected = DataFrame({'Classification': {0: 'SA EQUITY CFD', + 1: 'bbb', + 2: 'SA EQUITY', + 3: 'SA SSF', + 4: 'aaa'}, + 'Random': {0: 1, + 1: 2, + 2: 3, + 3: 4, + 4: 5}, + 'X': {0: 'correct', + 1: 'bbb', + 2: 'correct', + 3: 'correct', + 4: 'aaa'}}) # bug was 4: 'bbb' + + tm.assert_frame_equal(df, expected) + + def test_ix_setitem_out_of_bounds_axis_0(self): + df = pd.DataFrame( + np.random.randn(2, 5), index=["row%s" % i for i in range(2)], + columns=["col%s" % i for i in range(5)]) + with catch_warnings(record=True): + self.assertRaises(ValueError, df.ix.__setitem__, (2, 0), 100) + + def test_ix_setitem_out_of_bounds_axis_1(self): + df = pd.DataFrame( + np.random.randn(5, 2), index=["row%s" % i for i in range(5)], + columns=["col%s" % i for i in range(2)]) + with catch_warnings(record=True): + self.assertRaises(ValueError, df.ix.__setitem__, (0, 2), 100) + + def test_ix_empty_list_indexer_is_ok(self): + with catch_warnings(record=True): + from pandas.util.testing import makeCustomDataframe as mkdf + df = mkdf(5, 2) + # vertical empty + tm.assert_frame_equal(df.ix[:, []], df.iloc[:, :0], + check_index_type=True, + check_column_type=True) + # horizontal empty + tm.assert_frame_equal(df.ix[[], :], df.iloc[:0, :], + check_index_type=True, + check_column_type=True) + # horizontal empty + tm.assert_frame_equal(df.ix[[]], df.iloc[:0, :], + check_index_type=True, + check_column_type=True) + + def test_ix_duplicate_returns_series(self): + df = DataFrame(np.random.randn(3, 3), index=[0.1, 0.2, 0.2], + columns=list('abc')) + with catch_warnings(record=True): + r = df.ix[0.2, 'a'] + e = df.loc[0.2, 'a'] + tm.assert_series_equal(r, e) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py new file mode 100644 index 0000000000000..af9d3ffdf6671 --- /dev/null +++ b/pandas/tests/indexing/test_loc.py @@ -0,0 +1,630 @@ +""" test label based indexing with loc """ + +import itertools +from warnings import catch_warnings +import numpy as np + +import pandas as pd +from pandas.compat import lrange, StringIO +from pandas import (Series, DataFrame, Timestamp, + date_range, MultiIndex) +from pandas.util import testing as tm +from pandas.tests.indexing.common import Base + + +class TestLoc(Base, tm.TestCase): + + def test_loc_getitem_dups(self): + # GH 5678 + # repeated gettitems on a dup index returing a ndarray + df = DataFrame( + np.random.random_sample((20, 5)), + index=['ABCDE' [x % 5] for x in range(20)]) + expected = df.loc['A', 0] + result = df.loc[:, 0].loc['A'] + tm.assert_series_equal(result, expected) + + def test_loc_getitem_dups2(self): + + # GH4726 + # dup indexing with iloc/loc + df = DataFrame([[1, 2, 'foo', 'bar', Timestamp('20130101')]], + columns=['a', 'a', 'a', 'a', 'a'], index=[1]) + expected = Series([1, 2, 'foo', 'bar', Timestamp('20130101')], + index=['a', 'a', 'a', 'a', 'a'], name=1) + + result = df.iloc[0] + tm.assert_series_equal(result, expected) + + result = df.loc[1] + tm.assert_series_equal(result, expected) + + def test_loc_setitem_dups(self): + + # GH 6541 + df_orig = DataFrame( + {'me': list('rttti'), + 'foo': list('aaade'), + 'bar': np.arange(5, dtype='float64') * 1.34 + 2, + 'bar2': np.arange(5, dtype='float64') * -.34 + 2}).set_index('me') + + indexer = tuple(['r', ['bar', 'bar2']]) + df = df_orig.copy() + df.loc[indexer] *= 2.0 + tm.assert_series_equal(df.loc[indexer], 2.0 * df_orig.loc[indexer]) + + indexer = tuple(['r', 'bar']) + df = df_orig.copy() + df.loc[indexer] *= 2.0 + self.assertEqual(df.loc[indexer], 2.0 * df_orig.loc[indexer]) + + indexer = tuple(['t', ['bar', 'bar2']]) + df = df_orig.copy() + df.loc[indexer] *= 2.0 + tm.assert_frame_equal(df.loc[indexer], 2.0 * df_orig.loc[indexer]) + + def test_loc_setitem_slice(self): + # GH10503 + + # assigning the same type should not change the type + df1 = DataFrame({'a': [0, 1, 1], + 'b': Series([100, 200, 300], dtype='uint32')}) + ix = df1['a'] == 1 + newb1 = df1.loc[ix, 'b'] + 1 + df1.loc[ix, 'b'] = newb1 + expected = DataFrame({'a': [0, 1, 1], + 'b': Series([100, 201, 301], dtype='uint32')}) + tm.assert_frame_equal(df1, expected) + + # assigning a new type should get the inferred type + df2 = DataFrame({'a': [0, 1, 1], 'b': [100, 200, 300]}, + dtype='uint64') + ix = df1['a'] == 1 + newb2 = df2.loc[ix, 'b'] + df1.loc[ix, 'b'] = newb2 + expected = DataFrame({'a': [0, 1, 1], 'b': [100, 200, 300]}, + dtype='uint64') + tm.assert_frame_equal(df2, expected) + + def test_loc_getitem_int(self): + + # int label + self.check_result('int label', 'loc', 2, 'ix', 2, + typs=['ints', 'uints'], axes=0) + self.check_result('int label', 'loc', 3, 'ix', 3, + typs=['ints', 'uints'], axes=1) + self.check_result('int label', 'loc', 4, 'ix', 4, + typs=['ints', 'uints'], axes=2) + self.check_result('int label', 'loc', 2, 'ix', 2, + typs=['label'], fails=KeyError) + + def test_loc_getitem_label(self): + + # label + self.check_result('label', 'loc', 'c', 'ix', 'c', typs=['labels'], + axes=0) + self.check_result('label', 'loc', 'null', 'ix', 'null', typs=['mixed'], + axes=0) + self.check_result('label', 'loc', 8, 'ix', 8, typs=['mixed'], axes=0) + self.check_result('label', 'loc', Timestamp('20130102'), 'ix', 1, + typs=['ts'], axes=0) + self.check_result('label', 'loc', 'c', 'ix', 'c', typs=['empty'], + fails=KeyError) + + def test_loc_getitem_label_out_of_range(self): + + # out of range label + self.check_result('label range', 'loc', 'f', 'ix', 'f', + typs=['ints', 'uints', 'labels', 'mixed', 'ts'], + fails=KeyError) + self.check_result('label range', 'loc', 'f', 'ix', 'f', + typs=['floats'], fails=TypeError) + self.check_result('label range', 'loc', 20, 'ix', 20, + typs=['ints', 'uints', 'mixed'], fails=KeyError) + self.check_result('label range', 'loc', 20, 'ix', 20, + typs=['labels'], fails=TypeError) + self.check_result('label range', 'loc', 20, 'ix', 20, typs=['ts'], + axes=0, fails=TypeError) + self.check_result('label range', 'loc', 20, 'ix', 20, typs=['floats'], + axes=0, fails=TypeError) + + def test_loc_getitem_label_list(self): + + # list of labels + self.check_result('list lbl', 'loc', [0, 2, 4], 'ix', [0, 2, 4], + typs=['ints', 'uints'], axes=0) + self.check_result('list lbl', 'loc', [3, 6, 9], 'ix', [3, 6, 9], + typs=['ints', 'uints'], axes=1) + self.check_result('list lbl', 'loc', [4, 8, 12], 'ix', [4, 8, 12], + typs=['ints', 'uints'], axes=2) + self.check_result('list lbl', 'loc', ['a', 'b', 'd'], 'ix', + ['a', 'b', 'd'], typs=['labels'], axes=0) + self.check_result('list lbl', 'loc', ['A', 'B', 'C'], 'ix', + ['A', 'B', 'C'], typs=['labels'], axes=1) + self.check_result('list lbl', 'loc', ['Z', 'Y', 'W'], 'ix', + ['Z', 'Y', 'W'], typs=['labels'], axes=2) + self.check_result('list lbl', 'loc', [2, 8, 'null'], 'ix', + [2, 8, 'null'], typs=['mixed'], axes=0) + self.check_result('list lbl', 'loc', + [Timestamp('20130102'), Timestamp('20130103')], 'ix', + [Timestamp('20130102'), Timestamp('20130103')], + typs=['ts'], axes=0) + + self.check_result('list lbl', 'loc', [0, 1, 2], 'indexer', [0, 1, 2], + typs=['empty'], fails=KeyError) + self.check_result('list lbl', 'loc', [0, 2, 3], 'ix', [0, 2, 3], + typs=['ints', 'uints'], axes=0, fails=KeyError) + self.check_result('list lbl', 'loc', [3, 6, 7], 'ix', [3, 6, 7], + typs=['ints', 'uints'], axes=1, fails=KeyError) + self.check_result('list lbl', 'loc', [4, 8, 10], 'ix', [4, 8, 10], + typs=['ints', 'uints'], axes=2, fails=KeyError) + + def test_loc_getitem_label_list_fails(self): + # fails + self.check_result('list lbl', 'loc', [20, 30, 40], 'ix', [20, 30, 40], + typs=['ints', 'uints'], axes=1, fails=KeyError) + self.check_result('list lbl', 'loc', [20, 30, 40], 'ix', [20, 30, 40], + typs=['ints', 'uints'], axes=2, fails=KeyError) + + def test_loc_getitem_label_array_like(self): + # array like + self.check_result('array like', 'loc', Series(index=[0, 2, 4]).index, + 'ix', [0, 2, 4], typs=['ints', 'uints'], axes=0) + self.check_result('array like', 'loc', Series(index=[3, 6, 9]).index, + 'ix', [3, 6, 9], typs=['ints', 'uints'], axes=1) + self.check_result('array like', 'loc', Series(index=[4, 8, 12]).index, + 'ix', [4, 8, 12], typs=['ints', 'uints'], axes=2) + + def test_loc_getitem_bool(self): + # boolean indexers + b = [True, False, True, False] + self.check_result('bool', 'loc', b, 'ix', b, + typs=['ints', 'uints', 'labels', + 'mixed', 'ts', 'floats']) + self.check_result('bool', 'loc', b, 'ix', b, typs=['empty'], + fails=KeyError) + + def test_loc_getitem_int_slice(self): + + # ok + self.check_result('int slice2', 'loc', slice(2, 4), 'ix', [2, 4], + typs=['ints', 'uints'], axes=0) + self.check_result('int slice2', 'loc', slice(3, 6), 'ix', [3, 6], + typs=['ints', 'uints'], axes=1) + self.check_result('int slice2', 'loc', slice(4, 8), 'ix', [4, 8], + typs=['ints', 'uints'], axes=2) + + # GH 3053 + # loc should treat integer slices like label slices + + index = MultiIndex.from_tuples([t for t in itertools.product( + [6, 7, 8], ['a', 'b'])]) + df = DataFrame(np.random.randn(6, 6), index, index) + result = df.loc[6:8, :] + expected = df + tm.assert_frame_equal(result, expected) + + index = MultiIndex.from_tuples([t + for t in itertools.product( + [10, 20, 30], ['a', 'b'])]) + df = DataFrame(np.random.randn(6, 6), index, index) + result = df.loc[20:30, :] + expected = df.iloc[2:] + tm.assert_frame_equal(result, expected) + + # doc examples + result = df.loc[10, :] + expected = df.iloc[0:2] + expected.index = ['a', 'b'] + tm.assert_frame_equal(result, expected) + + result = df.loc[:, 10] + # expected = df.ix[:,10] (this fails) + expected = df[10] + tm.assert_frame_equal(result, expected) + + def test_loc_to_fail(self): + + # GH3449 + df = DataFrame(np.random.random((3, 3)), + index=['a', 'b', 'c'], + columns=['e', 'f', 'g']) + + # raise a KeyError? + self.assertRaises(KeyError, df.loc.__getitem__, + tuple([[1, 2], [1, 2]])) + + # GH 7496 + # loc should not fallback + + s = Series() + s.loc[1] = 1 + s.loc['a'] = 2 + + self.assertRaises(KeyError, lambda: s.loc[-1]) + self.assertRaises(KeyError, lambda: s.loc[[-1, -2]]) + + self.assertRaises(KeyError, lambda: s.loc[['4']]) + + s.loc[-1] = 3 + result = s.loc[[-1, -2]] + expected = Series([3, np.nan], index=[-1, -2]) + tm.assert_series_equal(result, expected) + + s['a'] = 2 + self.assertRaises(KeyError, lambda: s.loc[[-2]]) + + del s['a'] + + def f(): + s.loc[[-2]] = 0 + + self.assertRaises(KeyError, f) + + # inconsistency between .loc[values] and .loc[values,:] + # GH 7999 + df = DataFrame([['a'], ['b']], index=[1, 2], columns=['value']) + + def f(): + df.loc[[3], :] + + self.assertRaises(KeyError, f) + + def f(): + df.loc[[3]] + + self.assertRaises(KeyError, f) + + def test_loc_getitem_label_slice(self): + + # label slices (with ints) + self.check_result('lab slice', 'loc', slice(1, 3), + 'ix', slice(1, 3), + typs=['labels', 'mixed', 'empty', 'ts', 'floats'], + fails=TypeError) + + # real label slices + self.check_result('lab slice', 'loc', slice('a', 'c'), + 'ix', slice('a', 'c'), typs=['labels'], axes=0) + self.check_result('lab slice', 'loc', slice('A', 'C'), + 'ix', slice('A', 'C'), typs=['labels'], axes=1) + self.check_result('lab slice', 'loc', slice('W', 'Z'), + 'ix', slice('W', 'Z'), typs=['labels'], axes=2) + + self.check_result('ts slice', 'loc', slice('20130102', '20130104'), + 'ix', slice('20130102', '20130104'), + typs=['ts'], axes=0) + self.check_result('ts slice', 'loc', slice('20130102', '20130104'), + 'ix', slice('20130102', '20130104'), + typs=['ts'], axes=1, fails=TypeError) + self.check_result('ts slice', 'loc', slice('20130102', '20130104'), + 'ix', slice('20130102', '20130104'), + typs=['ts'], axes=2, fails=TypeError) + + # GH 14316 + self.check_result('ts slice rev', 'loc', slice('20130104', '20130102'), + 'indexer', [0, 1, 2], typs=['ts_rev'], axes=0) + + self.check_result('mixed slice', 'loc', slice(2, 8), 'ix', slice(2, 8), + typs=['mixed'], axes=0, fails=TypeError) + self.check_result('mixed slice', 'loc', slice(2, 8), 'ix', slice(2, 8), + typs=['mixed'], axes=1, fails=KeyError) + self.check_result('mixed slice', 'loc', slice(2, 8), 'ix', slice(2, 8), + typs=['mixed'], axes=2, fails=KeyError) + + self.check_result('mixed slice', 'loc', slice(2, 4, 2), 'ix', slice( + 2, 4, 2), typs=['mixed'], axes=0, fails=TypeError) + + def test_loc_general(self): + + df = DataFrame( + np.random.rand(4, 4), columns=['A', 'B', 'C', 'D'], + index=['A', 'B', 'C', 'D']) + + # want this to work + result = df.loc[:, "A":"B"].iloc[0:2, :] + self.assertTrue((result.columns == ['A', 'B']).all()) + self.assertTrue((result.index == ['A', 'B']).all()) + + # mixed type + result = DataFrame({'a': [Timestamp('20130101')], 'b': [1]}).iloc[0] + expected = Series([Timestamp('20130101'), 1], index=['a', 'b'], name=0) + tm.assert_series_equal(result, expected) + self.assertEqual(result.dtype, object) + + def test_loc_setitem_consistency(self): + # GH 6149 + # coerce similary for setitem and loc when rows have a null-slice + expected = DataFrame({'date': Series(0, index=range(5), + dtype=np.int64), + 'val': Series(range(5), dtype=np.int64)}) + + df = DataFrame({'date': date_range('2000-01-01', '2000-01-5'), + 'val': Series( + range(5), dtype=np.int64)}) + df.loc[:, 'date'] = 0 + tm.assert_frame_equal(df, expected) + + df = DataFrame({'date': date_range('2000-01-01', '2000-01-5'), + 'val': Series(range(5), dtype=np.int64)}) + df.loc[:, 'date'] = np.array(0, dtype=np.int64) + tm.assert_frame_equal(df, expected) + + df = DataFrame({'date': date_range('2000-01-01', '2000-01-5'), + 'val': Series(range(5), dtype=np.int64)}) + df.loc[:, 'date'] = np.array([0, 0, 0, 0, 0], dtype=np.int64) + tm.assert_frame_equal(df, expected) + + expected = DataFrame({'date': Series('foo', index=range(5)), + 'val': Series(range(5), dtype=np.int64)}) + df = DataFrame({'date': date_range('2000-01-01', '2000-01-5'), + 'val': Series(range(5), dtype=np.int64)}) + df.loc[:, 'date'] = 'foo' + tm.assert_frame_equal(df, expected) + + expected = DataFrame({'date': Series(1.0, index=range(5)), + 'val': Series(range(5), dtype=np.int64)}) + df = DataFrame({'date': date_range('2000-01-01', '2000-01-5'), + 'val': Series(range(5), dtype=np.int64)}) + df.loc[:, 'date'] = 1.0 + tm.assert_frame_equal(df, expected) + + def test_loc_setitem_consistency_empty(self): + # empty (essentially noops) + expected = DataFrame(columns=['x', 'y']) + expected['x'] = expected['x'].astype(np.int64) + df = DataFrame(columns=['x', 'y']) + df.loc[:, 'x'] = 1 + tm.assert_frame_equal(df, expected) + + df = DataFrame(columns=['x', 'y']) + df['x'] = 1 + tm.assert_frame_equal(df, expected) + + def test_loc_setitem_consistency_slice_column_len(self): + # .loc[:,column] setting with slice == len of the column + # GH10408 + data = """Level_0,,,Respondent,Respondent,Respondent,OtherCat,OtherCat +Level_1,,,Something,StartDate,EndDate,Yes/No,SomethingElse +Region,Site,RespondentID,,,,, +Region_1,Site_1,3987227376,A,5/25/2015 10:59,5/25/2015 11:22,Yes, +Region_1,Site_1,3980680971,A,5/21/2015 9:40,5/21/2015 9:52,Yes,Yes +Region_1,Site_2,3977723249,A,5/20/2015 8:27,5/20/2015 8:41,Yes, +Region_1,Site_2,3977723089,A,5/20/2015 8:33,5/20/2015 9:09,Yes,No""" + + df = pd.read_csv(StringIO(data), header=[0, 1], index_col=[0, 1, 2]) + df.loc[:, ('Respondent', 'StartDate')] = pd.to_datetime(df.loc[:, ( + 'Respondent', 'StartDate')]) + df.loc[:, ('Respondent', 'EndDate')] = pd.to_datetime(df.loc[:, ( + 'Respondent', 'EndDate')]) + df.loc[:, ('Respondent', 'Duration')] = df.loc[:, ( + 'Respondent', 'EndDate')] - df.loc[:, ('Respondent', 'StartDate')] + + df.loc[:, ('Respondent', 'Duration')] = df.loc[:, ( + 'Respondent', 'Duration')].astype('timedelta64[s]') + expected = Series([1380, 720, 840, 2160.], index=df.index, + name=('Respondent', 'Duration')) + tm.assert_series_equal(df[('Respondent', 'Duration')], expected) + + def test_loc_setitem_frame(self): + df = self.frame_labels + + result = df.iloc[0, 0] + + df.loc['a', 'A'] = 1 + result = df.loc['a', 'A'] + self.assertEqual(result, 1) + + result = df.iloc[0, 0] + self.assertEqual(result, 1) + + df.loc[:, 'B':'D'] = 0 + expected = df.loc[:, 'B':'D'] + result = df.iloc[:, 1:] + tm.assert_frame_equal(result, expected) + + # GH 6254 + # setting issue + df = DataFrame(index=[3, 5, 4], columns=['A']) + df.loc[[4, 3, 5], 'A'] = np.array([1, 2, 3], dtype='int64') + expected = DataFrame(dict(A=Series( + [1, 2, 3], index=[4, 3, 5]))).reindex(index=[3, 5, 4]) + tm.assert_frame_equal(df, expected) + + # GH 6252 + # setting with an empty frame + keys1 = ['@' + str(i) for i in range(5)] + val1 = np.arange(5, dtype='int64') + + keys2 = ['@' + str(i) for i in range(4)] + val2 = np.arange(4, dtype='int64') + + index = list(set(keys1).union(keys2)) + df = DataFrame(index=index) + df['A'] = np.nan + df.loc[keys1, 'A'] = val1 + + df['B'] = np.nan + df.loc[keys2, 'B'] = val2 + + expected = DataFrame(dict(A=Series(val1, index=keys1), B=Series( + val2, index=keys2))).reindex(index=index) + tm.assert_frame_equal(df, expected) + + # GH 8669 + # invalid coercion of nan -> int + df = DataFrame({'A': [1, 2, 3], 'B': np.nan}) + df.loc[df.B > df.A, 'B'] = df.A + expected = DataFrame({'A': [1, 2, 3], 'B': np.nan}) + tm.assert_frame_equal(df, expected) + + # GH 6546 + # setting with mixed labels + df = DataFrame({1: [1, 2], 2: [3, 4], 'a': ['a', 'b']}) + + result = df.loc[0, [1, 2]] + expected = Series([1, 3], index=[1, 2], dtype=object, name=0) + tm.assert_series_equal(result, expected) + + expected = DataFrame({1: [5, 2], 2: [6, 4], 'a': ['a', 'b']}) + df.loc[0, [1, 2]] = [5, 6] + tm.assert_frame_equal(df, expected) + + def test_loc_setitem_frame_multiples(self): + # multiple setting + df = DataFrame({'A': ['foo', 'bar', 'baz'], + 'B': Series( + range(3), dtype=np.int64)}) + rhs = df.loc[1:2] + rhs.index = df.index[0:2] + df.loc[0:1] = rhs + expected = DataFrame({'A': ['bar', 'baz', 'baz'], + 'B': Series( + [1, 2, 2], dtype=np.int64)}) + tm.assert_frame_equal(df, expected) + + # multiple setting with frame on rhs (with M8) + df = DataFrame({'date': date_range('2000-01-01', '2000-01-5'), + 'val': Series( + range(5), dtype=np.int64)}) + expected = DataFrame({'date': [Timestamp('20000101'), Timestamp( + '20000102'), Timestamp('20000101'), Timestamp('20000102'), + Timestamp('20000103')], + 'val': Series( + [0, 1, 0, 1, 2], dtype=np.int64)}) + rhs = df.loc[0:2] + rhs.index = df.index[2:5] + df.loc[2:4] = rhs + tm.assert_frame_equal(df, expected) + + def test_loc_coerceion(self): + + # 12411 + df = DataFrame({'date': [pd.Timestamp('20130101').tz_localize('UTC'), + pd.NaT]}) + expected = df.dtypes + + result = df.iloc[[0]] + tm.assert_series_equal(result.dtypes, expected) + + result = df.iloc[[1]] + tm.assert_series_equal(result.dtypes, expected) + + # 12045 + import datetime + df = DataFrame({'date': [datetime.datetime(2012, 1, 1), + datetime.datetime(1012, 1, 2)]}) + expected = df.dtypes + + result = df.iloc[[0]] + tm.assert_series_equal(result.dtypes, expected) + + result = df.iloc[[1]] + tm.assert_series_equal(result.dtypes, expected) + + # 11594 + df = DataFrame({'text': ['some words'] + [None] * 9}) + expected = df.dtypes + + result = df.iloc[0:2] + tm.assert_series_equal(result.dtypes, expected) + + result = df.iloc[3:] + tm.assert_series_equal(result.dtypes, expected) + + def test_loc_non_unique(self): + # GH3659 + # non-unique indexer with loc slice + # https://groups.google.com/forum/?fromgroups#!topic/pydata/zTm2No0crYs + + # these are going to raise becuase the we are non monotonic + df = DataFrame({'A': [1, 2, 3, 4, 5, 6], + 'B': [3, 4, 5, 6, 7, 8]}, index=[0, 1, 0, 1, 2, 3]) + self.assertRaises(KeyError, df.loc.__getitem__, + tuple([slice(1, None)])) + self.assertRaises(KeyError, df.loc.__getitem__, + tuple([slice(0, None)])) + self.assertRaises(KeyError, df.loc.__getitem__, tuple([slice(1, 2)])) + + # monotonic are ok + df = DataFrame({'A': [1, 2, 3, 4, 5, 6], + 'B': [3, 4, 5, 6, 7, 8]}, + index=[0, 1, 0, 1, 2, 3]).sort_index(axis=0) + result = df.loc[1:] + expected = DataFrame({'A': [2, 4, 5, 6], 'B': [4, 6, 7, 8]}, + index=[1, 1, 2, 3]) + tm.assert_frame_equal(result, expected) + + result = df.loc[0:] + tm.assert_frame_equal(result, df) + + result = df.loc[1:2] + expected = DataFrame({'A': [2, 4, 5], 'B': [4, 6, 7]}, + index=[1, 1, 2]) + tm.assert_frame_equal(result, expected) + + def test_loc_non_unique_memory_error(self): + + # GH 4280 + # non_unique index with a large selection triggers a memory error + + columns = list('ABCDEFG') + + def gen_test(l, l2): + return pd.concat([ + DataFrame(np.random.randn(l, len(columns)), + index=lrange(l), columns=columns), + DataFrame(np.ones((l2, len(columns))), + index=[0] * l2, columns=columns)]) + + def gen_expected(df, mask): + l = len(mask) + return pd.concat([df.take([0], convert=False), + DataFrame(np.ones((l, len(columns))), + index=[0] * l, + columns=columns), + df.take(mask[1:], convert=False)]) + + df = gen_test(900, 100) + self.assertFalse(df.index.is_unique) + + mask = np.arange(100) + result = df.loc[mask] + expected = gen_expected(df, mask) + tm.assert_frame_equal(result, expected) + + df = gen_test(900000, 100000) + self.assertFalse(df.index.is_unique) + + mask = np.arange(100000) + result = df.loc[mask] + expected = gen_expected(df, mask) + tm.assert_frame_equal(result, expected) + + def test_loc_name(self): + # GH 3880 + df = DataFrame([[1, 1], [1, 1]]) + df.index.name = 'index_name' + result = df.iloc[[0, 1]].index.name + self.assertEqual(result, 'index_name') + + with catch_warnings(record=True): + result = df.ix[[0, 1]].index.name + self.assertEqual(result, 'index_name') + + result = df.loc[[0, 1]].index.name + self.assertEqual(result, 'index_name') + + def test_loc_empty_list_indexer_is_ok(self): + from pandas.util.testing import makeCustomDataframe as mkdf + df = mkdf(5, 2) + # vertical empty + tm.assert_frame_equal(df.loc[:, []], df.iloc[:, :0], + check_index_type=True, check_column_type=True) + # horizontal empty + tm.assert_frame_equal(df.loc[[], :], df.iloc[:0, :], + check_index_type=True, check_column_type=True) + # horizontal empty + tm.assert_frame_equal(df.loc[[]], df.iloc[:0, :], + check_index_type=True, + check_column_type=True) diff --git a/pandas/tests/indexing/test_multiindex.py b/pandas/tests/indexing/test_multiindex.py index b40f0b8cd9976..ed943202872a7 100644 --- a/pandas/tests/indexing/test_multiindex.py +++ b/pandas/tests/indexing/test_multiindex.py @@ -46,101 +46,103 @@ def test_iloc_getitem_multiindex2(self): tm.assert_frame_equal(rs, xp) def test_setitem_multiindex(self): - for index_fn in ('ix', 'loc'): - - def check(target, indexers, value, compare_fn, expected=None): - fn = getattr(target, index_fn) - fn.__setitem__(indexers, value) - result = fn.__getitem__(indexers) - if expected is None: - expected = value - compare_fn(result, expected) - # GH7190 - index = pd.MultiIndex.from_product([np.arange(0, 100), - np.arange(0, 80)], - names=['time', 'firm']) - t, n = 0, 2 - df = DataFrame(np.nan, columns=['A', 'w', 'l', 'a', 'x', - 'X', 'd', 'profit'], - index=index) - check(target=df, indexers=((t, n), 'X'), value=0, - compare_fn=self.assertEqual) - - df = DataFrame(-999, columns=['A', 'w', 'l', 'a', 'x', - 'X', 'd', 'profit'], - index=index) - check(target=df, indexers=((t, n), 'X'), value=1, - compare_fn=self.assertEqual) - - df = DataFrame(columns=['A', 'w', 'l', 'a', 'x', - 'X', 'd', 'profit'], - index=index) - check(target=df, indexers=((t, n), 'X'), value=2, - compare_fn=self.assertEqual) - - # GH 7218, assinging with 0-dim arrays - df = DataFrame(-999, columns=['A', 'w', 'l', 'a', 'x', - 'X', 'd', 'profit'], - index=index) - check(target=df, - indexers=((t, n), 'X'), - value=np.array(3), - compare_fn=self.assertEqual, - expected=3, ) - - # GH5206 - df = pd.DataFrame(np.arange(25).reshape(5, 5), - columns='A,B,C,D,E'.split(','), dtype=float) - df['F'] = 99 - row_selection = df['A'] % 2 == 0 - col_selection = ['B', 'C'] - with catch_warnings(record=True): - df.ix[row_selection, col_selection] = df['F'] - output = pd.DataFrame(99., index=[0, 2, 4], columns=['B', 'C']) - with catch_warnings(record=True): - tm.assert_frame_equal(df.ix[row_selection, col_selection], - output) - check(target=df, - indexers=(row_selection, col_selection), - value=df['F'], - compare_fn=tm.assert_frame_equal, - expected=output, ) - - # GH11372 - idx = pd.MultiIndex.from_product([ - ['A', 'B', 'C'], - pd.date_range('2015-01-01', '2015-04-01', freq='MS')]) - cols = pd.MultiIndex.from_product([ - ['foo', 'bar'], - pd.date_range('2016-01-01', '2016-02-01', freq='MS')]) - - df = pd.DataFrame(np.random.random((12, 4)), - index=idx, columns=cols) - - subidx = pd.MultiIndex.from_tuples( - [('A', pd.Timestamp('2015-01-01')), - ('A', pd.Timestamp('2015-02-01'))]) - subcols = pd.MultiIndex.from_tuples( - [('foo', pd.Timestamp('2016-01-01')), - ('foo', pd.Timestamp('2016-02-01'))]) - - vals = pd.DataFrame(np.random.random((2, 2)), - index=subidx, columns=subcols) - check(target=df, - indexers=(subidx, subcols), - value=vals, - compare_fn=tm.assert_frame_equal, ) - # set all columns - vals = pd.DataFrame( - np.random.random((2, 4)), index=subidx, columns=cols) - check(target=df, - indexers=(subidx, slice(None, None, None)), - value=vals, - compare_fn=tm.assert_frame_equal, ) - # identity - copy = df.copy() - check(target=df, indexers=(df.index, df.columns), value=df, - compare_fn=tm.assert_frame_equal, expected=copy) + with catch_warnings(record=True): + + for index_fn in ('ix', 'loc'): + + def check(target, indexers, value, compare_fn, expected=None): + fn = getattr(target, index_fn) + fn.__setitem__(indexers, value) + result = fn.__getitem__(indexers) + if expected is None: + expected = value + compare_fn(result, expected) + # GH7190 + index = pd.MultiIndex.from_product([np.arange(0, 100), + np.arange(0, 80)], + names=['time', 'firm']) + t, n = 0, 2 + df = DataFrame(np.nan, columns=['A', 'w', 'l', 'a', 'x', + 'X', 'd', 'profit'], + index=index) + check(target=df, indexers=((t, n), 'X'), value=0, + compare_fn=self.assertEqual) + + df = DataFrame(-999, columns=['A', 'w', 'l', 'a', 'x', + 'X', 'd', 'profit'], + index=index) + check(target=df, indexers=((t, n), 'X'), value=1, + compare_fn=self.assertEqual) + + df = DataFrame(columns=['A', 'w', 'l', 'a', 'x', + 'X', 'd', 'profit'], + index=index) + check(target=df, indexers=((t, n), 'X'), value=2, + compare_fn=self.assertEqual) + + # GH 7218, assinging with 0-dim arrays + df = DataFrame(-999, columns=['A', 'w', 'l', 'a', 'x', + 'X', 'd', 'profit'], + index=index) + check(target=df, + indexers=((t, n), 'X'), + value=np.array(3), + compare_fn=self.assertEqual, + expected=3, ) + + # GH5206 + df = pd.DataFrame(np.arange(25).reshape(5, 5), + columns='A,B,C,D,E'.split(','), dtype=float) + df['F'] = 99 + row_selection = df['A'] % 2 == 0 + col_selection = ['B', 'C'] + with catch_warnings(record=True): + df.ix[row_selection, col_selection] = df['F'] + output = pd.DataFrame(99., index=[0, 2, 4], columns=['B', 'C']) + with catch_warnings(record=True): + tm.assert_frame_equal(df.ix[row_selection, col_selection], + output) + check(target=df, + indexers=(row_selection, col_selection), + value=df['F'], + compare_fn=tm.assert_frame_equal, + expected=output, ) + + # GH11372 + idx = pd.MultiIndex.from_product([ + ['A', 'B', 'C'], + pd.date_range('2015-01-01', '2015-04-01', freq='MS')]) + cols = pd.MultiIndex.from_product([ + ['foo', 'bar'], + pd.date_range('2016-01-01', '2016-02-01', freq='MS')]) + + df = pd.DataFrame(np.random.random((12, 4)), + index=idx, columns=cols) + + subidx = pd.MultiIndex.from_tuples( + [('A', pd.Timestamp('2015-01-01')), + ('A', pd.Timestamp('2015-02-01'))]) + subcols = pd.MultiIndex.from_tuples( + [('foo', pd.Timestamp('2016-01-01')), + ('foo', pd.Timestamp('2016-02-01'))]) + + vals = pd.DataFrame(np.random.random((2, 2)), + index=subidx, columns=subcols) + check(target=df, + indexers=(subidx, subcols), + value=vals, + compare_fn=tm.assert_frame_equal, ) + # set all columns + vals = pd.DataFrame( + np.random.random((2, 4)), index=subidx, columns=cols) + check(target=df, + indexers=(subidx, slice(None, None, None)), + value=vals, + compare_fn=tm.assert_frame_equal, ) + # identity + copy = df.copy() + check(target=df, indexers=(df.index, df.columns), value=df, + compare_fn=tm.assert_frame_equal, expected=copy) def test_loc_getitem_series(self): # GH14730 @@ -559,32 +561,37 @@ def test_multiindex_assignment(self): df['d'] = np.nan arr = np.array([0., 1.]) - df.ix[4, 'd'] = arr - tm.assert_series_equal(df.ix[4, 'd'], - Series(arr, index=[8, 10], name='d')) + with catch_warnings(record=True): + df.ix[4, 'd'] = arr + tm.assert_series_equal(df.ix[4, 'd'], + Series(arr, index=[8, 10], name='d')) # single dtype df = DataFrame(np.random.randint(5, 10, size=9).reshape(3, 3), columns=list('abc'), index=[[4, 4, 8], [8, 10, 12]]) - df.ix[4, 'c'] = arr - exp = Series(arr, index=[8, 10], name='c', dtype='float64') - tm.assert_series_equal(df.ix[4, 'c'], exp) + with catch_warnings(record=True): + df.ix[4, 'c'] = arr + exp = Series(arr, index=[8, 10], name='c', dtype='float64') + tm.assert_series_equal(df.ix[4, 'c'], exp) # scalar ok - df.ix[4, 'c'] = 10 - exp = Series(10, index=[8, 10], name='c', dtype='float64') - tm.assert_series_equal(df.ix[4, 'c'], exp) + with catch_warnings(record=True): + df.ix[4, 'c'] = 10 + exp = Series(10, index=[8, 10], name='c', dtype='float64') + tm.assert_series_equal(df.ix[4, 'c'], exp) # invalid assignments def f(): - df.ix[4, 'c'] = [0, 1, 2, 3] + with catch_warnings(record=True): + df.ix[4, 'c'] = [0, 1, 2, 3] self.assertRaises(ValueError, f) def f(): - df.ix[4, 'c'] = [0] + with catch_warnings(record=True): + df.ix[4, 'c'] = [0] self.assertRaises(ValueError, f) @@ -614,7 +621,8 @@ def f(name, df2): # but in this case, that's ok for name, df2 in grp: new_vals = np.arange(df2.shape[0]) - df.ix[name, 'new_col'] = new_vals + with catch_warnings(record=True): + df.ix[name, 'new_col'] = new_vals def test_multiindex_label_slicing_with_negative_step(self): s = Series(np.arange(20), @@ -624,7 +632,8 @@ def test_multiindex_label_slicing_with_negative_step(self): def assert_slices_equivalent(l_slc, i_slc): tm.assert_series_equal(s.loc[l_slc], s.iloc[i_slc]) tm.assert_series_equal(s[l_slc], s.iloc[i_slc]) - tm.assert_series_equal(s.ix[l_slc], s.iloc[i_slc]) + with catch_warnings(record=True): + tm.assert_series_equal(s.ix[l_slc], s.iloc[i_slc]) assert_slices_equivalent(SLC[::-1], SLC[::-1]) diff --git a/pandas/tests/indexing/test_panel.py b/pandas/tests/indexing/test_panel.py index 5ec3076af599a..0677ea498c282 100644 --- a/pandas/tests/indexing/test_panel.py +++ b/pandas/tests/indexing/test_panel.py @@ -1,3 +1,5 @@ +from warnings import catch_warnings + import numpy as np from pandas.util import testing as tm from pandas import Panel, date_range, DataFrame @@ -112,8 +114,8 @@ def test_panel_getitem(self): len(ind), 5), index=ind, columns=list('ABCDE')) panel = Panel(dict([('frame_' + c, df) for c in list('ABC')])) - test2 = panel.ix[:, "2002":"2002-12-31"] - test1 = panel.ix[:, "2002"] + test2 = panel.loc[:, "2002":"2002-12-31"] + test1 = panel.loc[:, "2002"] tm.assert_panel_equal(test1, test2) # GH8710 @@ -134,10 +136,8 @@ def test_panel_getitem(self): result = panel.loc['ItemA':'ItemB'] tm.assert_panel_equal(result, expected) - result = panel.ix['ItemA':'ItemB'] - tm.assert_panel_equal(result, expected) - - result = panel.ix[['ItemA', 'ItemB']] + with catch_warnings(record=True): + result = panel.ix[['ItemA', 'ItemB']] tm.assert_panel_equal(result, expected) # with an object-like diff --git a/pandas/tests/indexing/test_partial.py b/pandas/tests/indexing/test_partial.py new file mode 100644 index 0000000000000..a00f880ff6591 --- /dev/null +++ b/pandas/tests/indexing/test_partial.py @@ -0,0 +1,587 @@ +""" +test setting *parts* of objects both positionally and label based + +TOD: these should be split among the indexer tests +""" +from warnings import catch_warnings +import numpy as np + +import pandas as pd +from pandas import Series, DataFrame, Panel, Index, date_range +from pandas.util import testing as tm + + +class TestPartialSetting(tm.TestCase): + + def test_partial_setting(self): + + # GH2578, allow ix and friends to partially set + + # series + s_orig = Series([1, 2, 3]) + + s = s_orig.copy() + s[5] = 5 + expected = Series([1, 2, 3, 5], index=[0, 1, 2, 5]) + tm.assert_series_equal(s, expected) + + s = s_orig.copy() + s.loc[5] = 5 + expected = Series([1, 2, 3, 5], index=[0, 1, 2, 5]) + tm.assert_series_equal(s, expected) + + s = s_orig.copy() + s[5] = 5. + expected = Series([1, 2, 3, 5.], index=[0, 1, 2, 5]) + tm.assert_series_equal(s, expected) + + s = s_orig.copy() + s.loc[5] = 5. + expected = Series([1, 2, 3, 5.], index=[0, 1, 2, 5]) + tm.assert_series_equal(s, expected) + + # iloc/iat raise + s = s_orig.copy() + + def f(): + s.iloc[3] = 5. + + self.assertRaises(IndexError, f) + + def f(): + s.iat[3] = 5. + + self.assertRaises(IndexError, f) + + # ## frame ## + + df_orig = DataFrame( + np.arange(6).reshape(3, 2), columns=['A', 'B'], dtype='int64') + + # iloc/iat raise + df = df_orig.copy() + + def f(): + df.iloc[4, 2] = 5. + + self.assertRaises(IndexError, f) + + def f(): + df.iat[4, 2] = 5. + + self.assertRaises(IndexError, f) + + # row setting where it exists + expected = DataFrame(dict({'A': [0, 4, 4], 'B': [1, 5, 5]})) + df = df_orig.copy() + df.iloc[1] = df.iloc[2] + tm.assert_frame_equal(df, expected) + + expected = DataFrame(dict({'A': [0, 4, 4], 'B': [1, 5, 5]})) + df = df_orig.copy() + df.loc[1] = df.loc[2] + tm.assert_frame_equal(df, expected) + + # like 2578, partial setting with dtype preservation + expected = DataFrame(dict({'A': [0, 2, 4, 4], 'B': [1, 3, 5, 5]})) + df = df_orig.copy() + df.loc[3] = df.loc[2] + tm.assert_frame_equal(df, expected) + + # single dtype frame, overwrite + expected = DataFrame(dict({'A': [0, 2, 4], 'B': [0, 2, 4]})) + df = df_orig.copy() + with catch_warnings(record=True): + df.ix[:, 'B'] = df.ix[:, 'A'] + tm.assert_frame_equal(df, expected) + + # mixed dtype frame, overwrite + expected = DataFrame(dict({'A': [0, 2, 4], 'B': Series([0, 2, 4])})) + df = df_orig.copy() + df['B'] = df['B'].astype(np.float64) + with catch_warnings(record=True): + df.ix[:, 'B'] = df.ix[:, 'A'] + tm.assert_frame_equal(df, expected) + + # single dtype frame, partial setting + expected = df_orig.copy() + expected['C'] = df['A'] + df = df_orig.copy() + with catch_warnings(record=True): + df.ix[:, 'C'] = df.ix[:, 'A'] + tm.assert_frame_equal(df, expected) + + # mixed frame, partial setting + expected = df_orig.copy() + expected['C'] = df['A'] + df = df_orig.copy() + with catch_warnings(record=True): + df.ix[:, 'C'] = df.ix[:, 'A'] + tm.assert_frame_equal(df, expected) + + # ## panel ## + p_orig = Panel(np.arange(16).reshape(2, 4, 2), + items=['Item1', 'Item2'], + major_axis=pd.date_range('2001/1/12', periods=4), + minor_axis=['A', 'B'], dtype='float64') + + # panel setting via item + p_orig = Panel(np.arange(16).reshape(2, 4, 2), + items=['Item1', 'Item2'], + major_axis=pd.date_range('2001/1/12', periods=4), + minor_axis=['A', 'B'], dtype='float64') + expected = p_orig.copy() + expected['Item3'] = expected['Item1'] + p = p_orig.copy() + p.loc['Item3'] = p['Item1'] + tm.assert_panel_equal(p, expected) + + # panel with aligned series + expected = p_orig.copy() + expected = expected.transpose(2, 1, 0) + expected['C'] = DataFrame({'Item1': [30, 30, 30, 30], + 'Item2': [32, 32, 32, 32]}, + index=p_orig.major_axis) + expected = expected.transpose(2, 1, 0) + p = p_orig.copy() + p.loc[:, :, 'C'] = Series([30, 32], index=p_orig.items) + tm.assert_panel_equal(p, expected) + + # GH 8473 + dates = date_range('1/1/2000', periods=8) + df_orig = DataFrame(np.random.randn(8, 4), index=dates, + columns=['A', 'B', 'C', 'D']) + + expected = pd.concat([df_orig, DataFrame( + {'A': 7}, index=[dates[-1] + 1])]) + df = df_orig.copy() + df.loc[dates[-1] + 1, 'A'] = 7 + tm.assert_frame_equal(df, expected) + df = df_orig.copy() + df.at[dates[-1] + 1, 'A'] = 7 + tm.assert_frame_equal(df, expected) + + exp_other = DataFrame({0: 7}, index=[dates[-1] + 1]) + expected = pd.concat([df_orig, exp_other], axis=1) + + df = df_orig.copy() + df.loc[dates[-1] + 1, 0] = 7 + tm.assert_frame_equal(df, expected) + df = df_orig.copy() + df.at[dates[-1] + 1, 0] = 7 + tm.assert_frame_equal(df, expected) + + def test_partial_setting_mixed_dtype(self): + + # in a mixed dtype environment, try to preserve dtypes + # by appending + df = DataFrame([[True, 1], [False, 2]], columns=["female", "fitness"]) + + s = df.loc[1].copy() + s.name = 2 + expected = df.append(s) + + df.loc[2] = df.loc[1] + tm.assert_frame_equal(df, expected) + + # columns will align + df = DataFrame(columns=['A', 'B']) + df.loc[0] = Series(1, index=range(4)) + tm.assert_frame_equal(df, DataFrame(columns=['A', 'B'], index=[0])) + + # columns will align + df = DataFrame(columns=['A', 'B']) + df.loc[0] = Series(1, index=['B']) + + exp = DataFrame([[np.nan, 1]], columns=['A', 'B'], + index=[0], dtype='float64') + tm.assert_frame_equal(df, exp) + + # list-like must conform + df = DataFrame(columns=['A', 'B']) + + def f(): + df.loc[0] = [1, 2, 3] + + self.assertRaises(ValueError, f) + + # these are coerced to float unavoidably (as its a list-like to begin) + df = DataFrame(columns=['A', 'B']) + df.loc[3] = [6, 7] + + exp = DataFrame([[6, 7]], index=[3], columns=['A', 'B'], + dtype='float64') + tm.assert_frame_equal(df, exp) + + def test_series_partial_set(self): + # partial set with new index + # Regression from GH4825 + ser = Series([0.1, 0.2], index=[1, 2]) + + # loc + expected = Series([np.nan, 0.2, np.nan], index=[3, 2, 3]) + result = ser.loc[[3, 2, 3]] + tm.assert_series_equal(result, expected, check_index_type=True) + + expected = Series([np.nan, 0.2, np.nan, np.nan], index=[3, 2, 3, 'x']) + result = ser.loc[[3, 2, 3, 'x']] + tm.assert_series_equal(result, expected, check_index_type=True) + + expected = Series([0.2, 0.2, 0.1], index=[2, 2, 1]) + result = ser.loc[[2, 2, 1]] + tm.assert_series_equal(result, expected, check_index_type=True) + + expected = Series([0.2, 0.2, np.nan, 0.1], index=[2, 2, 'x', 1]) + result = ser.loc[[2, 2, 'x', 1]] + tm.assert_series_equal(result, expected, check_index_type=True) + + # raises as nothing in in the index + self.assertRaises(KeyError, lambda: ser.loc[[3, 3, 3]]) + + expected = Series([0.2, 0.2, np.nan], index=[2, 2, 3]) + result = ser.loc[[2, 2, 3]] + tm.assert_series_equal(result, expected, check_index_type=True) + + expected = Series([0.3, np.nan, np.nan], index=[3, 4, 4]) + result = Series([0.1, 0.2, 0.3], index=[1, 2, 3]).loc[[3, 4, 4]] + tm.assert_series_equal(result, expected, check_index_type=True) + + expected = Series([np.nan, 0.3, 0.3], index=[5, 3, 3]) + result = Series([0.1, 0.2, 0.3, 0.4], + index=[1, 2, 3, 4]).loc[[5, 3, 3]] + tm.assert_series_equal(result, expected, check_index_type=True) + + expected = Series([np.nan, 0.4, 0.4], index=[5, 4, 4]) + result = Series([0.1, 0.2, 0.3, 0.4], + index=[1, 2, 3, 4]).loc[[5, 4, 4]] + tm.assert_series_equal(result, expected, check_index_type=True) + + expected = Series([0.4, np.nan, np.nan], index=[7, 2, 2]) + result = Series([0.1, 0.2, 0.3, 0.4], + index=[4, 5, 6, 7]).loc[[7, 2, 2]] + tm.assert_series_equal(result, expected, check_index_type=True) + + expected = Series([0.4, np.nan, np.nan], index=[4, 5, 5]) + result = Series([0.1, 0.2, 0.3, 0.4], + index=[1, 2, 3, 4]).loc[[4, 5, 5]] + tm.assert_series_equal(result, expected, check_index_type=True) + + # iloc + expected = Series([0.2, 0.2, 0.1, 0.1], index=[2, 2, 1, 1]) + result = ser.iloc[[1, 1, 0, 0]] + tm.assert_series_equal(result, expected, check_index_type=True) + + def test_series_partial_set_with_name(self): + # GH 11497 + + idx = Index([1, 2], dtype='int64', name='idx') + ser = Series([0.1, 0.2], index=idx, name='s') + + # loc + exp_idx = Index([3, 2, 3], dtype='int64', name='idx') + expected = Series([np.nan, 0.2, np.nan], index=exp_idx, name='s') + result = ser.loc[[3, 2, 3]] + tm.assert_series_equal(result, expected, check_index_type=True) + + exp_idx = Index([3, 2, 3, 'x'], dtype='object', name='idx') + expected = Series([np.nan, 0.2, np.nan, np.nan], index=exp_idx, + name='s') + result = ser.loc[[3, 2, 3, 'x']] + tm.assert_series_equal(result, expected, check_index_type=True) + + exp_idx = Index([2, 2, 1], dtype='int64', name='idx') + expected = Series([0.2, 0.2, 0.1], index=exp_idx, name='s') + result = ser.loc[[2, 2, 1]] + tm.assert_series_equal(result, expected, check_index_type=True) + + exp_idx = Index([2, 2, 'x', 1], dtype='object', name='idx') + expected = Series([0.2, 0.2, np.nan, 0.1], index=exp_idx, name='s') + result = ser.loc[[2, 2, 'x', 1]] + tm.assert_series_equal(result, expected, check_index_type=True) + + # raises as nothing in in the index + self.assertRaises(KeyError, lambda: ser.loc[[3, 3, 3]]) + + exp_idx = Index([2, 2, 3], dtype='int64', name='idx') + expected = Series([0.2, 0.2, np.nan], index=exp_idx, name='s') + result = ser.loc[[2, 2, 3]] + tm.assert_series_equal(result, expected, check_index_type=True) + + exp_idx = Index([3, 4, 4], dtype='int64', name='idx') + expected = Series([0.3, np.nan, np.nan], index=exp_idx, name='s') + idx = Index([1, 2, 3], dtype='int64', name='idx') + result = Series([0.1, 0.2, 0.3], index=idx, name='s').loc[[3, 4, 4]] + tm.assert_series_equal(result, expected, check_index_type=True) + + exp_idx = Index([5, 3, 3], dtype='int64', name='idx') + expected = Series([np.nan, 0.3, 0.3], index=exp_idx, name='s') + idx = Index([1, 2, 3, 4], dtype='int64', name='idx') + result = Series([0.1, 0.2, 0.3, 0.4], index=idx, + name='s').loc[[5, 3, 3]] + tm.assert_series_equal(result, expected, check_index_type=True) + + exp_idx = Index([5, 4, 4], dtype='int64', name='idx') + expected = Series([np.nan, 0.4, 0.4], index=exp_idx, name='s') + idx = Index([1, 2, 3, 4], dtype='int64', name='idx') + result = Series([0.1, 0.2, 0.3, 0.4], index=idx, + name='s').loc[[5, 4, 4]] + tm.assert_series_equal(result, expected, check_index_type=True) + + exp_idx = Index([7, 2, 2], dtype='int64', name='idx') + expected = Series([0.4, np.nan, np.nan], index=exp_idx, name='s') + idx = Index([4, 5, 6, 7], dtype='int64', name='idx') + result = Series([0.1, 0.2, 0.3, 0.4], index=idx, + name='s').loc[[7, 2, 2]] + tm.assert_series_equal(result, expected, check_index_type=True) + + exp_idx = Index([4, 5, 5], dtype='int64', name='idx') + expected = Series([0.4, np.nan, np.nan], index=exp_idx, name='s') + idx = Index([1, 2, 3, 4], dtype='int64', name='idx') + result = Series([0.1, 0.2, 0.3, 0.4], index=idx, + name='s').loc[[4, 5, 5]] + tm.assert_series_equal(result, expected, check_index_type=True) + + # iloc + exp_idx = Index([2, 2, 1, 1], dtype='int64', name='idx') + expected = Series([0.2, 0.2, 0.1, 0.1], index=exp_idx, name='s') + result = ser.iloc[[1, 1, 0, 0]] + tm.assert_series_equal(result, expected, check_index_type=True) + + def test_partial_set_invalid(self): + + # GH 4940 + # allow only setting of 'valid' values + + orig = tm.makeTimeDataFrame() + df = orig.copy() + + # don't allow not string inserts + def f(): + with catch_warnings(record=True): + df.loc[100.0, :] = df.ix[0] + + self.assertRaises(TypeError, f) + + def f(): + with catch_warnings(record=True): + df.loc[100, :] = df.ix[0] + + self.assertRaises(TypeError, f) + + def f(): + with catch_warnings(record=True): + df.ix[100.0, :] = df.ix[0] + + self.assertRaises(TypeError, f) + + def f(): + with catch_warnings(record=True): + df.ix[100, :] = df.ix[0] + + self.assertRaises(ValueError, f) + + # allow object conversion here + df = orig.copy() + with catch_warnings(record=True): + df.loc['a', :] = df.ix[0] + exp = orig.append(pd.Series(df.ix[0], name='a')) + tm.assert_frame_equal(df, exp) + tm.assert_index_equal(df.index, + pd.Index(orig.index.tolist() + ['a'])) + self.assertEqual(df.index.dtype, 'object') + + def test_partial_set_empty_series(self): + + # GH5226 + + # partially set with an empty object series + s = Series() + s.loc[1] = 1 + tm.assert_series_equal(s, Series([1], index=[1])) + s.loc[3] = 3 + tm.assert_series_equal(s, Series([1, 3], index=[1, 3])) + + s = Series() + s.loc[1] = 1. + tm.assert_series_equal(s, Series([1.], index=[1])) + s.loc[3] = 3. + tm.assert_series_equal(s, Series([1., 3.], index=[1, 3])) + + s = Series() + s.loc['foo'] = 1 + tm.assert_series_equal(s, Series([1], index=['foo'])) + s.loc['bar'] = 3 + tm.assert_series_equal(s, Series([1, 3], index=['foo', 'bar'])) + s.loc[3] = 4 + tm.assert_series_equal(s, Series([1, 3, 4], index=['foo', 'bar', 3])) + + def test_partial_set_empty_frame(self): + + # partially set with an empty object + # frame + df = DataFrame() + + def f(): + df.loc[1] = 1 + + self.assertRaises(ValueError, f) + + def f(): + df.loc[1] = Series([1], index=['foo']) + + self.assertRaises(ValueError, f) + + def f(): + df.loc[:, 1] = 1 + + self.assertRaises(ValueError, f) + + # these work as they don't really change + # anything but the index + # GH5632 + expected = DataFrame(columns=['foo'], index=pd.Index( + [], dtype='int64')) + + def f(): + df = DataFrame() + df['foo'] = Series([], dtype='object') + return df + + tm.assert_frame_equal(f(), expected) + + def f(): + df = DataFrame() + df['foo'] = Series(df.index) + return df + + tm.assert_frame_equal(f(), expected) + + def f(): + df = DataFrame() + df['foo'] = df.index + return df + + tm.assert_frame_equal(f(), expected) + + expected = DataFrame(columns=['foo'], + index=pd.Index([], dtype='int64')) + expected['foo'] = expected['foo'].astype('float64') + + def f(): + df = DataFrame() + df['foo'] = [] + return df + + tm.assert_frame_equal(f(), expected) + + def f(): + df = DataFrame() + df['foo'] = Series(range(len(df))) + return df + + tm.assert_frame_equal(f(), expected) + + def f(): + df = DataFrame() + tm.assert_index_equal(df.index, pd.Index([], dtype='object')) + df['foo'] = range(len(df)) + return df + + expected = DataFrame(columns=['foo'], + index=pd.Index([], dtype='int64')) + expected['foo'] = expected['foo'].astype('float64') + tm.assert_frame_equal(f(), expected) + + df = DataFrame() + tm.assert_index_equal(df.columns, pd.Index([], dtype=object)) + df2 = DataFrame() + df2[1] = Series([1], index=['foo']) + df.loc[:, 1] = Series([1], index=['foo']) + tm.assert_frame_equal(df, DataFrame([[1]], index=['foo'], columns=[1])) + tm.assert_frame_equal(df, df2) + + # no index to start + expected = DataFrame({0: Series(1, index=range(4))}, + columns=['A', 'B', 0]) + + df = DataFrame(columns=['A', 'B']) + df[0] = Series(1, index=range(4)) + df.dtypes + str(df) + tm.assert_frame_equal(df, expected) + + df = DataFrame(columns=['A', 'B']) + df.loc[:, 0] = Series(1, index=range(4)) + df.dtypes + str(df) + tm.assert_frame_equal(df, expected) + + def test_partial_set_empty_frame_row(self): + # GH5720, GH5744 + # don't create rows when empty + expected = DataFrame(columns=['A', 'B', 'New'], + index=pd.Index([], dtype='int64')) + expected['A'] = expected['A'].astype('int64') + expected['B'] = expected['B'].astype('float64') + expected['New'] = expected['New'].astype('float64') + + df = DataFrame({"A": [1, 2, 3], "B": [1.2, 4.2, 5.2]}) + y = df[df.A > 5] + y['New'] = np.nan + tm.assert_frame_equal(y, expected) + # tm.assert_frame_equal(y,expected) + + expected = DataFrame(columns=['a', 'b', 'c c', 'd']) + expected['d'] = expected['d'].astype('int64') + df = DataFrame(columns=['a', 'b', 'c c']) + df['d'] = 3 + tm.assert_frame_equal(df, expected) + tm.assert_series_equal(df['c c'], Series(name='c c', dtype=object)) + + # reindex columns is ok + df = DataFrame({"A": [1, 2, 3], "B": [1.2, 4.2, 5.2]}) + y = df[df.A > 5] + result = y.reindex(columns=['A', 'B', 'C']) + expected = DataFrame(columns=['A', 'B', 'C'], + index=pd.Index([], dtype='int64')) + expected['A'] = expected['A'].astype('int64') + expected['B'] = expected['B'].astype('float64') + expected['C'] = expected['C'].astype('float64') + tm.assert_frame_equal(result, expected) + + def test_partial_set_empty_frame_set_series(self): + # GH 5756 + # setting with empty Series + df = DataFrame(Series()) + tm.assert_frame_equal(df, DataFrame({0: Series()})) + + df = DataFrame(Series(name='foo')) + tm.assert_frame_equal(df, DataFrame({'foo': Series()})) + + def test_partial_set_empty_frame_empty_copy_assignment(self): + # GH 5932 + # copy on empty with assignment fails + df = DataFrame(index=[0]) + df = df.copy() + df['a'] = 0 + expected = DataFrame(0, index=[0], columns=['a']) + tm.assert_frame_equal(df, expected) + + def test_partial_set_empty_frame_empty_consistencies(self): + # GH 6171 + # consistency on empty frames + df = DataFrame(columns=['x', 'y']) + df['x'] = [1, 2] + expected = DataFrame(dict(x=[1, 2], y=[np.nan, np.nan])) + tm.assert_frame_equal(df, expected, check_dtype=False) + + df = DataFrame(columns=['x', 'y']) + df['x'] = ['1', '2'] + expected = DataFrame( + dict(x=['1', '2'], y=[np.nan, np.nan]), dtype=object) + tm.assert_frame_equal(df, expected) + + df = DataFrame(columns=['x', 'y']) + df.loc[0, 'x'] = 1 + expected = DataFrame(dict(x=[1], y=[np.nan])) + tm.assert_frame_equal(df, expected, check_dtype=False) diff --git a/pandas/tests/indexing/test_scalar.py b/pandas/tests/indexing/test_scalar.py new file mode 100644 index 0000000000000..4e81cd01cd5d2 --- /dev/null +++ b/pandas/tests/indexing/test_scalar.py @@ -0,0 +1,156 @@ +""" test scalar indexing, including at and iat """ + +import numpy as np + +from pandas import (Series, DataFrame, Timestamp, + Timedelta, date_range) +from pandas.util import testing as tm +from pandas.tests.indexing.common import Base + + +class TestScalar(Base, tm.TestCase): + + def test_at_and_iat_get(self): + def _check(f, func, values=False): + + if f is not None: + indicies = self.generate_indices(f, values) + for i in indicies: + result = getattr(f, func)[i] + expected = self.get_value(f, i, values) + tm.assert_almost_equal(result, expected) + + for o in self._objs: + + d = getattr(self, o) + + # iat + for f in [d['ints'], d['uints']]: + _check(f, 'iat', values=True) + + for f in [d['labels'], d['ts'], d['floats']]: + if f is not None: + self.assertRaises(ValueError, self.check_values, f, 'iat') + + # at + for f in [d['ints'], d['uints'], d['labels'], + d['ts'], d['floats']]: + _check(f, 'at') + + def test_at_and_iat_set(self): + def _check(f, func, values=False): + + if f is not None: + indicies = self.generate_indices(f, values) + for i in indicies: + getattr(f, func)[i] = 1 + expected = self.get_value(f, i, values) + tm.assert_almost_equal(expected, 1) + + for t in self._objs: + + d = getattr(self, t) + + # iat + for f in [d['ints'], d['uints']]: + _check(f, 'iat', values=True) + + for f in [d['labels'], d['ts'], d['floats']]: + if f is not None: + self.assertRaises(ValueError, _check, f, 'iat') + + # at + for f in [d['ints'], d['uints'], d['labels'], + d['ts'], d['floats']]: + _check(f, 'at') + + def test_at_iat_coercion(self): + + # as timestamp is not a tuple! + dates = date_range('1/1/2000', periods=8) + df = DataFrame(np.random.randn(8, 4), + index=dates, + columns=['A', 'B', 'C', 'D']) + s = df['A'] + + result = s.at[dates[5]] + xp = s.values[5] + self.assertEqual(result, xp) + + # GH 7729 + # make sure we are boxing the returns + s = Series(['2014-01-01', '2014-02-02'], dtype='datetime64[ns]') + expected = Timestamp('2014-02-02') + + for r in [lambda: s.iat[1], lambda: s.iloc[1]]: + result = r() + self.assertEqual(result, expected) + + s = Series(['1 days', '2 days'], dtype='timedelta64[ns]') + expected = Timedelta('2 days') + + for r in [lambda: s.iat[1], lambda: s.iloc[1]]: + result = r() + self.assertEqual(result, expected) + + def test_iat_invalid_args(self): + pass + + def test_imethods_with_dups(self): + + # GH6493 + # iat/iloc with dups + + s = Series(range(5), index=[1, 1, 2, 2, 3], dtype='int64') + result = s.iloc[2] + self.assertEqual(result, 2) + result = s.iat[2] + self.assertEqual(result, 2) + + self.assertRaises(IndexError, lambda: s.iat[10]) + self.assertRaises(IndexError, lambda: s.iat[-10]) + + result = s.iloc[[2, 3]] + expected = Series([2, 3], [2, 2], dtype='int64') + tm.assert_series_equal(result, expected) + + df = s.to_frame() + result = df.iloc[2] + expected = Series(2, index=[0], name=2) + tm.assert_series_equal(result, expected) + + result = df.iat[2, 0] + expected = 2 + self.assertEqual(result, 2) + + def test_at_to_fail(self): + # at should not fallback + # GH 7814 + s = Series([1, 2, 3], index=list('abc')) + result = s.at['a'] + self.assertEqual(result, 1) + self.assertRaises(ValueError, lambda: s.at[0]) + + df = DataFrame({'A': [1, 2, 3]}, index=list('abc')) + result = df.at['a', 'A'] + self.assertEqual(result, 1) + self.assertRaises(ValueError, lambda: df.at['a', 0]) + + s = Series([1, 2, 3], index=[3, 2, 1]) + result = s.at[1] + self.assertEqual(result, 3) + self.assertRaises(ValueError, lambda: s.at['a']) + + df = DataFrame({0: [1, 2, 3]}, index=[3, 2, 1]) + result = df.at[1, 0] + self.assertEqual(result, 3) + self.assertRaises(ValueError, lambda: df.at['a', 0]) + + # GH 13822, incorrect error string with non-unique columns when missing + # column is accessed + df = DataFrame({'x': [1.], 'y': [2.], 'z': [3.]}) + df.columns = ['x', 'x', 'z'] + + # Check that we get the correct value in the KeyError + self.assertRaisesRegexp(KeyError, r"\['y'\] not in index", + lambda: df[['x', 'y', 'z']]) From 4ce9c0c9b9ef0c6665a0d9ead1afbfb05a864252 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Fri, 10 Mar 2017 06:25:22 -0500 Subject: [PATCH 180/933] BUG: Incorrect value updating for groupby.cummin/max (#15635) closes #15635 Author: Matt Roeschke Closes #15642 from mroeschke/fix_15635 and squashes the following commits: b92b81a [Matt Roeschke] BUG: Incorrect value updating for groupby.cummin/max (#15635) --- doc/source/whatsnew/v0.20.0.txt | 2 +- pandas/_libs/algos_groupby_helper.pxi.in | 20 ++++++++++---------- pandas/tests/groupby/test_groupby.py | 11 +++++++++++ 3 files changed, 22 insertions(+), 11 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index cf3dddc3a2933..47aa4450b897f 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -716,7 +716,7 @@ Performance Improvements - Increased performance of ``pd.factorize()`` by releasing the GIL with ``object`` dtype when inferred as strings (:issue:`14859`) - Improved performance of timeseries plotting with an irregular DatetimeIndex (or with ``compat_x=True``) (:issue:`15073`). -- Improved performance of ``groupby().cummin()`` and ``groupby().cummax()`` (:issue:`15048`, :issue:`15109`, :issue:`15561`) +- Improved performance of ``groupby().cummin()`` and ``groupby().cummax()`` (:issue:`15048`, :issue:`15109`, :issue:`15561`, :issue:`15635`) - Improved performance and reduced memory when indexing with a ``MultiIndex`` (:issue:`15245`) - When reading buffer object in ``read_sas()`` method without specified format, filepath string is inferred rather than buffer object. (:issue:`14947`) - Improved performance of ``.rank()`` for categorical data (:issue:`15498`) diff --git a/pandas/_libs/algos_groupby_helper.pxi.in b/pandas/_libs/algos_groupby_helper.pxi.in index 9552b4299fe6a..e2c263f49b110 100644 --- a/pandas/_libs/algos_groupby_helper.pxi.in +++ b/pandas/_libs/algos_groupby_helper.pxi.in @@ -603,7 +603,7 @@ def group_cummin_{{name}}(ndarray[{{dest_type2}}, ndim=2] out, """ cdef: Py_ssize_t i, j, N, K, size - {{dest_type2}} val, min_val = 0 + {{dest_type2}} val, mval ndarray[{{dest_type2}}, ndim=2] accum int64_t lab @@ -628,10 +628,10 @@ def group_cummin_{{name}}(ndarray[{{dest_type2}}, ndim=2] out, {{else}} if val == val: {{endif}} - if val < accum[lab, j]: - min_val = val - accum[lab, j] = min_val - out[i, j] = accum[lab, j] + mval = accum[lab, j] + if val < mval: + accum[lab, j] = mval = val + out[i, j] = mval @cython.boundscheck(False) @@ -645,7 +645,7 @@ def group_cummax_{{name}}(ndarray[{{dest_type2}}, ndim=2] out, """ cdef: Py_ssize_t i, j, N, K, size - {{dest_type2}} val, max_val = 0 + {{dest_type2}} val, mval ndarray[{{dest_type2}}, ndim=2] accum int64_t lab @@ -669,10 +669,10 @@ def group_cummax_{{name}}(ndarray[{{dest_type2}}, ndim=2] out, {{else}} if val == val: {{endif}} - if val > accum[lab, j]: - max_val = val - accum[lab, j] = max_val - out[i, j] = accum[lab, j] + mval = accum[lab, j] + if val > mval: + accum[lab, j] = mval = val + out[i, j] = mval {{endfor}} diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index e846963732883..d7fa3beda0abf 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -4303,6 +4303,17 @@ def test_cummin_cummax(self): result = getattr(df.groupby('a')['b'], method)() tm.assert_series_equal(expected, result) + # GH 15635 + df = pd.DataFrame(dict(a=[1, 2, 1], b=[2, 1, 1])) + result = df.groupby('a').b.cummax() + expected = pd.Series([2, 1, 2], name='b') + tm.assert_series_equal(result, expected) + + df = pd.DataFrame(dict(a=[1, 2, 1], b=[1, 2, 2])) + result = df.groupby('a').b.cummin() + expected = pd.Series([1, 2, 1], name='b') + tm.assert_series_equal(result, expected) + def _check_groupby(df, result, keys, field, f=lambda x: x.sum()): tups = lmap(tuple, df[keys].values) From a37c610c3c0759a0b587e6776df8ab8a55f6a266 Mon Sep 17 00:00:00 2001 From: linebp Date: Fri, 10 Mar 2017 13:07:10 +0100 Subject: [PATCH 181/933] DOC GH15643 Removed pytest-xdist from requirements_dev.txt file (#15646) --- ci/requirements_dev.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/ci/requirements_dev.txt b/ci/requirements_dev.txt index b0a8adc8df5cb..1e051802ec9f8 100644 --- a/ci/requirements_dev.txt +++ b/ci/requirements_dev.txt @@ -4,5 +4,4 @@ numpy cython pytest pytest-cov -pytest-xdist flake8 From 94c6c0ce46ceae8ceb3a81f17014e6551902e653 Mon Sep 17 00:00:00 2001 From: JennaVergeynst Date: Fri, 10 Mar 2017 14:44:26 +0100 Subject: [PATCH 182/933] DOC: add examples to DataFrame.dropna (#15620) --- pandas/core/frame.py | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 2062f301b9e0e..987eb10101f12 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3084,6 +3084,50 @@ def dropna(self, axis=0, how='any', thresh=None, subset=None, Returns ------- dropped : DataFrame + + Examples + -------- + >>> df = pd.DataFrame([[np.nan, 2, np.nan, 0], [3, 4, np.nan, 1], + ... [np.nan, np.nan, np.nan, 5]], + ... columns=list('ABCD')) + >>> df + A B C D + 0 NaN 2.0 NaN 0 + 1 3.0 4.0 NaN 1 + 2 NaN NaN NaN 5 + + Drop the columns where all elements are nan: + + >>> df.dropna(axis=1, how='all') + A B D + 0 NaN 2.0 0 + 1 3.0 4.0 1 + 2 NaN NaN 5 + + Drop the columns where any of the elements is nan + + >>> df.dropna(axis=1, how='any') + D + 0 0 + 1 1 + 2 5 + + Drop the rows where all of the elements are nan + (there is no row to drop, so df stays the same): + + >>> df.dropna(axis=0, how='all') + A B C D + 0 NaN 2.0 NaN 0 + 1 3.0 4.0 NaN 1 + 2 NaN NaN NaN 5 + + Keep only the rows with at least 2 non-na values: + + >>> df.dropna(thresh=2) + A B C D + 0 NaN 2.0 NaN 0 + 1 3.0 4.0 NaN 1 + """ inplace = validate_bool_kwarg(inplace, 'inplace') if isinstance(axis, (tuple, list)): From 67d529a00066c6f6278d9971048c69a22febe0cc Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Fri, 10 Mar 2017 09:24:46 -0500 Subject: [PATCH 183/933] DOC: doc warnings Author: Jeff Reback Closes #15647 from jreback/doc and squashes the following commits: 6afb394 [Jeff Reback] gbq install adjustment 0fd4499 [Jeff Reback] maybe d5ec228 [Jeff Reback] DOC: fixup some doc-links b7ea898 [Jeff Reback] DOC: some deprecation warnings removed --- doc/source/index.rst.template | 1 - doc/source/install.rst | 2 +- doc/source/whatsnew/v0.10.0.txt | 5 ++--- doc/source/whatsnew/v0.10.1.txt | 4 ++-- doc/source/whatsnew/v0.17.0.txt | 2 +- doc/source/whatsnew/v0.18.0.txt | 2 +- doc/source/whatsnew/v0.19.0.txt | 2 +- doc/source/whatsnew/v0.20.0.txt | 6 +++--- pandas/io/gbq.py | 5 +++-- 9 files changed, 14 insertions(+), 15 deletions(-) diff --git a/doc/source/index.rst.template b/doc/source/index.rst.template index 67072ff9fb224..0bfb2b635f53a 100644 --- a/doc/source/index.rst.template +++ b/doc/source/index.rst.template @@ -116,7 +116,6 @@ See the package overview for more detail about what's in the library. whatsnew install contributing - faq overview 10min tutorials diff --git a/doc/source/install.rst b/doc/source/install.rst index fe2a9fa4ba509..578caae605471 100644 --- a/doc/source/install.rst +++ b/doc/source/install.rst @@ -260,7 +260,7 @@ Optional Dependencies `__, or `xclip `__: necessary to use :func:`~pandas.read_clipboard`. Most package managers on Linux distributions will have ``xclip`` and/or ``xsel`` immediately available for installation. -* For Google BigQuery I/O - see :ref:`here `. +* For Google BigQuery I/O - see `here `__ * `Backports.lzma `__: Only for Python 2, for writing to and/or reading from an xz compressed DataFrame in CSV; Python 3 support is built into the standard library. * One of the following combinations of libraries is needed to use the diff --git a/doc/source/whatsnew/v0.10.0.txt b/doc/source/whatsnew/v0.10.0.txt index fed3ba3ce3a84..cf5369466308c 100644 --- a/doc/source/whatsnew/v0.10.0.txt +++ b/doc/source/whatsnew/v0.10.0.txt @@ -303,11 +303,10 @@ Updated PyTables Support store.append('wp',wp) # selecting via A QUERY - store.select('wp', - [ Term('major_axis>20000102'), Term('minor_axis', '=', ['A','B']) ]) + store.select('wp', "major_axis>20000102 and minor_axis=['A','B']") # removing data from tables - store.remove('wp', Term('major_axis>20000103')) + store.remove('wp', "major_axis>20000103") store.select('wp') # deleting a store diff --git a/doc/source/whatsnew/v0.10.1.txt b/doc/source/whatsnew/v0.10.1.txt index edc628fe85027..d5880e44e46c6 100644 --- a/doc/source/whatsnew/v0.10.1.txt +++ b/doc/source/whatsnew/v0.10.1.txt @@ -58,7 +58,7 @@ perform queries on a table, by passing a list to ``data_columns`` # on-disk operations store.append('df', df, data_columns = ['B','C','string','string2']) - store.select('df',[ 'B > 0', 'string == foo' ]) + store.select('df', "B>0 and string=='foo'") # this is in-memory version of this type of selection df[(df.B > 0) & (df.string == 'foo')] @@ -110,7 +110,7 @@ columns, this is equivalent to passing a store.select('mi') # the levels are automatically included as data columns - store.select('mi', Term('foo=bar')) + store.select('mi', "foo='bar'") Multi-table creation via ``append_to_multiple`` and selection via ``select_as_multiple`` can create/select from multiple tables and return a diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt index 9cb299593076d..a3bbaf73c01ca 100644 --- a/doc/source/whatsnew/v0.17.0.txt +++ b/doc/source/whatsnew/v0.17.0.txt @@ -329,7 +329,7 @@ has been changed to make this keyword unnecessary - the change is shown below. Google BigQuery Enhancements ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - Added ability to automatically create a table/dataset using the :func:`pandas.io.gbq.to_gbq` function if the destination table/dataset does not exist. (:issue:`8325`, :issue:`11121`). -- Added ability to replace an existing table and schema when calling the :func:`pandas.io.gbq.to_gbq` function via the ``if_exists`` argument. See the :ref:`docs ` for more details (:issue:`8325`). +- Added ability to replace an existing table and schema when calling the :func:`pandas.io.gbq.to_gbq` function via the ``if_exists`` argument. See the `docs `__ for more details (:issue:`8325`). - ``InvalidColumnOrder`` and ``InvalidPageToken`` in the gbq module will raise ``ValueError`` instead of ``IOError``. - The ``generate_bq_schema()`` function is now deprecated and will be removed in a future version (:issue:`11121`) - The gbq module will now support Python 3 (:issue:`11094`). diff --git a/doc/source/whatsnew/v0.18.0.txt b/doc/source/whatsnew/v0.18.0.txt index 893922b719b34..4b27cf706f9b2 100644 --- a/doc/source/whatsnew/v0.18.0.txt +++ b/doc/source/whatsnew/v0.18.0.txt @@ -518,7 +518,7 @@ Other enhancements - Added ``DataFrame.style.format`` for more flexible formatting of cell values (:issue:`11692`) - ``DataFrame.select_dtypes`` now allows the ``np.float16`` typecode (:issue:`11990`) - ``pivot_table()`` now accepts most iterables for the ``values`` parameter (:issue:`12017`) -- Added Google ``BigQuery`` service account authentication support, which enables authentication on remote servers. (:issue:`11881`, :issue:`12572`). For further details see :ref:`here ` +- Added Google ``BigQuery`` service account authentication support, which enables authentication on remote servers. (:issue:`11881`, :issue:`12572`). For further details see `here `__ - ``HDFStore`` is now iterable: ``for k in store`` is equivalent to ``for k in store.keys()`` (:issue:`12221`). - Add missing methods/fields to ``.dt`` for ``Period`` (:issue:`8848`) - The entire codebase has been ``PEP``-ified (:issue:`12096`) diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt index 8e7e95c071ea4..9b003034aa94a 100644 --- a/doc/source/whatsnew/v0.19.0.txt +++ b/doc/source/whatsnew/v0.19.0.txt @@ -377,7 +377,7 @@ For ``MultiIndex``, values are dropped if any level is missing by default. Speci Google BigQuery Enhancements ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -- The :func:`read_gbq` method has gained the ``dialect`` argument to allow users to specify whether to use BigQuery's legacy SQL or BigQuery's standard SQL. See the :ref:`docs ` for more details (:issue:`13615`). +- The :func:`read_gbq` method has gained the ``dialect`` argument to allow users to specify whether to use BigQuery's legacy SQL or BigQuery's standard SQL. See the `docs `__ for more details (:issue:`13615`). - The :func:`~DataFrame.to_gbq` method now allows the DataFrame column order to differ from the destination table schema (:issue:`11359`). .. _whatsnew_0190.errstate: diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 47aa4450b897f..7b24264cd09db 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -203,7 +203,7 @@ New Behavior: df[df.chromosomes != '1'].groupby('chromosomes', sort=False).sum() -.. _whatsnew_0200.enhancements.table_schema +.. _whatsnew_0200.enhancements.table_schema: Table Schema Output ^^^^^^^^^^^^^^^^^^^ @@ -337,7 +337,7 @@ Using ``.iloc``. Here we will get the location of the 'A' column, then use *posi df.iloc[[0, 2], df.columns.get_loc('A')] -.. _whatsnew.api_breaking.io_compat +.. _whatsnew.api_breaking.io_compat: Possible incompat for HDF5 formats for pandas < 0.13.0 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -758,7 +758,7 @@ Bug Fixes - Bug in the display of ``.info()`` where a qualifier (+) would always be displayed with a ``MultiIndex`` that contains only non-strings (:issue:`15245`) -- Bug in ``.asfreq()``, where frequency was not set for empty ``Series` (:issue:`14320`) +- Bug in ``.asfreq()``, where frequency was not set for empty ``Series`` (:issue:`14320`) - Bug in ``pd.read_msgpack()`` in which ``Series`` categoricals were being improperly processed (:issue:`14901`) - Bug in ``Series.ffill()`` with mixed dtypes containing tz-aware datetimes. (:issue:`14956`) diff --git a/pandas/io/gbq.py b/pandas/io/gbq.py index 9cfb27a92bfef..b4dc9173f11ba 100644 --- a/pandas/io/gbq.py +++ b/pandas/io/gbq.py @@ -14,8 +14,9 @@ def _try_import(): "the pandas-gbq package is not installed\n" "see the docs: https://pandas-gbq.readthedocs.io\n" "\n" - "you can install via:\n" - "pip install pandas-gbq\n") + "you can install via pip or conda:\n" + "pip install pandas-gbq\n" + "conda install pandas-gbq -c conda-forge\n") return pandas_gbq From 1be66ac975d89be9c5b695ce34a4a18ffed355ec Mon Sep 17 00:00:00 2001 From: Kernc Date: Fri, 10 Mar 2017 09:27:45 -0500 Subject: [PATCH 184/933] ENH: Native conversion from/to scipy.sparse matrix to SparseDataFrame closes #4343 Author: Kernc Closes #15497 from kernc/scipy-sparse and squashes the following commits: a0f2208 [Kernc] DOC: Fix some whatsnew/v0.20.0.txt sphinx warnings e72e594 [Kernc] ENH: Native conversion from/to scipy.sparse matrix to SparseDataFrame --- doc/source/api.rst | 11 ++- doc/source/sparse.rst | 32 +++++++- doc/source/whatsnew/v0.20.0.txt | 29 +++++++- pandas/sparse/array.py | 9 ++- pandas/sparse/frame.py | 107 ++++++++++++++++++++++----- pandas/tests/sparse/common.py | 10 +++ pandas/tests/sparse/test_frame.py | 62 ++++++++++++++++ pandas/tests/types/test_inference.py | 9 +++ pandas/types/common.py | 14 ++++ pandas/util/testing.py | 5 ++ 10 files changed, 266 insertions(+), 22 deletions(-) create mode 100644 pandas/tests/sparse/common.py diff --git a/doc/source/api.rst b/doc/source/api.rst index 7e297a15055a0..f6bf480bebcfc 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -711,8 +711,8 @@ Serialization / IO / Conversion Series.to_string Series.to_clipboard -Sparse methods -~~~~~~~~~~~~~~ +Sparse +~~~~~~ .. autosummary:: :toctree: generated/ @@ -1030,6 +1030,13 @@ Serialization / IO / Conversion DataFrame.to_string DataFrame.to_clipboard +Sparse +~~~~~~ +.. autosummary:: + :toctree: generated/ + + SparseDataFrame.to_coo + .. _api.panel: Panel diff --git a/doc/source/sparse.rst b/doc/source/sparse.rst index 2bc5d3f6dd0f5..b4884cf1c4141 100644 --- a/doc/source/sparse.rst +++ b/doc/source/sparse.rst @@ -186,7 +186,37 @@ the correct dense result. Interaction with scipy.sparse ----------------------------- -Experimental api to transform between sparse pandas and scipy.sparse structures. +SparseDataFrame +~~~~~~~~~~~~~~~ + +.. versionadded:: 0.20.0 + +Pandas supports creating sparse dataframes directly from ``scipy.sparse`` matrices. + +.. ipython:: python + + from scipy.sparse import csr_matrix + + arr = np.random.random(size=(1000, 5)) + arr[arr < .9] = 0 + + sp_arr = csr_matrix(arr) + sp_arr + + sdf = pd.SparseDataFrame(sp_arr) + sdf + +All sparse formats are supported, but matrices that are not in :mod:`COOrdinate ` format will be converted, copying data as needed. +To convert a ``SparseDataFrame`` back to sparse SciPy matrix in COO format, you can use the :meth:`SparseDataFrame.to_coo` method: + +.. ipython:: python + + sdf.to_coo() + +SparseSeries +~~~~~~~~~~~~ + +.. versionadded:: 0.16.0 A :meth:`SparseSeries.to_coo` method is implemented for transforming a ``SparseSeries`` indexed by a ``MultiIndex`` to a ``scipy.sparse.coo_matrix``. diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 7b24264cd09db..3c82e533dd158 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -237,10 +237,37 @@ You must enable this by setting the ``display.html.table_schema`` option to True .. _Table Schema: http://specs.frictionlessdata.io/json-table-schema/ .. _nteract: http://nteract.io/ +.. _whatsnew_0200.enhancements.scipy_sparse: + +SciPy sparse matrix from/to SparseDataFrame +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Pandas now supports creating sparse dataframes directly from ``scipy.sparse.spmatrix`` instances. +See the :ref:`documentation ` for more information. (:issue:`4343`) + +All sparse formats are supported, but matrices that are not in :mod:`COOrdinate ` format will be converted, copying data as needed. + +.. ipython:: python + + from scipy.sparse import csr_matrix + arr = np.random.random(size=(1000, 5)) + arr[arr < .9] = 0 + sp_arr = csr_matrix(arr) + sp_arr + sdf = pd.SparseDataFrame(sp_arr) + sdf + +To convert a ``SparseDataFrame`` back to sparse SciPy matrix in COO format, you can use: + +.. ipython:: python + + sdf.to_coo() + .. _whatsnew_0200.enhancements.other: Other enhancements ^^^^^^^^^^^^^^^^^^ + - Integration with the ``feather-format``, including a new top-level ``pd.read_feather()`` and ``DataFrame.to_feather()`` method, see :ref:`here `. - ``Series.str.replace()`` now accepts a callable, as replacement, which is passed to ``re.sub`` (:issue:`15055`) - ``Series.str.replace()`` now accepts a compiled regular expression as a pattern (:issue:`15446`) @@ -752,7 +779,6 @@ Bug Fixes - Bug in ``Rolling.quantile`` function that caused a segmentation fault when called with a quantile value outside of the range [0, 1] (:issue:`15463`) - Bug in ``pd.cut()`` with a single bin on an all 0s array (:issue:`15428`) - Bug in ``pd.qcut()`` with a single quantile and an array with identical values (:issue:`15431`) -- Bug in ``SparseSeries.reindex`` on single level with list of length 1 (:issue:`15447`) @@ -783,6 +809,7 @@ Bug Fixes - Bug in ``to_sql`` when writing a DataFrame with numeric index names (:issue:`15404`). - Bug in ``Series.iloc`` where a ``Categorical`` object for list-like indexes input was returned, where a ``Series`` was expected. (:issue:`14580`) - Bug in repr-formatting a ``SparseDataFrame`` after a value was set on (a copy of) one of its series (:issue:`15488`) +- Bug in ``SparseSeries.reindex`` on single level with list of length 1 (:issue:`15447`) - Bug in groupby operations with timedelta64 when passing ``numeric_only=False`` (:issue:`5724`) diff --git a/pandas/sparse/array.py b/pandas/sparse/array.py index 762b6d869eae0..5f4c07971d37e 100644 --- a/pandas/sparse/array.py +++ b/pandas/sparse/array.py @@ -20,6 +20,7 @@ is_integer_dtype, is_bool_dtype, is_list_like, + is_string_dtype, is_scalar, is_dtype_equal) from pandas.types.cast import (_possibly_convert_platform, _maybe_promote, _astype_nansafe, _find_common_type) @@ -769,6 +770,12 @@ def make_sparse(arr, kind='block', fill_value=None): if isnull(fill_value): mask = notnull(arr) else: + # For str arrays in NumPy 1.12.0, operator!= below isn't + # element-wise but just returns False if fill_value is not str, + # so cast to object comparison to be safe + if is_string_dtype(arr): + arr = arr.astype(object) + mask = arr != fill_value length = len(arr) @@ -776,7 +783,7 @@ def make_sparse(arr, kind='block', fill_value=None): # the arr is a SparseArray indices = mask.sp_index.indices else: - indices = np.arange(length, dtype=np.int32)[mask] + indices = mask.nonzero()[0].astype(np.int32) index = _make_index(length, indices, kind) sparsified_values = arr[mask] diff --git a/pandas/sparse/frame.py b/pandas/sparse/frame.py index 61b8434b0ea09..a21f64f524a0a 100644 --- a/pandas/sparse/frame.py +++ b/pandas/sparse/frame.py @@ -11,8 +11,8 @@ import numpy as np from pandas.types.missing import isnull, notnull -from pandas.types.cast import _maybe_upcast -from pandas.types.common import _ensure_platform_int +from pandas.types.cast import _maybe_upcast, _find_common_type +from pandas.types.common import _ensure_platform_int, is_scipy_sparse from pandas.core.common import _try_sort from pandas.compat.numpy import function as nv @@ -25,6 +25,7 @@ create_block_manager_from_arrays) import pandas.core.generic as generic from pandas.sparse.series import SparseSeries, SparseArray +from pandas.sparse.libsparse import BlockIndex, get_blocks from pandas.util.decorators import Appender import pandas.core.ops as ops @@ -39,15 +40,15 @@ class SparseDataFrame(DataFrame): Parameters ---------- - data : same types as can be passed to DataFrame + data : same types as can be passed to DataFrame or scipy.sparse.spmatrix index : array-like, optional column : array-like, optional default_kind : {'block', 'integer'}, default 'block' Default sparse kind for converting Series to SparseSeries. Will not override SparseSeries passed into constructor default_fill_value : float - Default fill_value for converting Series to SparseSeries. Will not - override SparseSeries passed in + Default fill_value for converting Series to SparseSeries + (default: nan). Will not override SparseSeries passed in. """ _constructor_sliced = SparseSeries _subtyp = 'sparse_frame' @@ -84,22 +85,19 @@ def __init__(self, data=None, index=None, columns=None, default_kind=None, self._default_kind = default_kind self._default_fill_value = default_fill_value - if isinstance(data, dict): - mgr = self._init_dict(data, index, columns) - if dtype is not None: - mgr = mgr.astype(dtype) + if is_scipy_sparse(data): + mgr = self._init_spmatrix(data, index, columns, dtype=dtype, + fill_value=default_fill_value) + elif isinstance(data, dict): + mgr = self._init_dict(data, index, columns, dtype=dtype) elif isinstance(data, (np.ndarray, list)): - mgr = self._init_matrix(data, index, columns) - if dtype is not None: - mgr = mgr.astype(dtype) + mgr = self._init_matrix(data, index, columns, dtype=dtype) elif isinstance(data, SparseDataFrame): mgr = self._init_mgr(data._data, dict(index=index, columns=columns), dtype=dtype, copy=copy) elif isinstance(data, DataFrame): - mgr = self._init_dict(data, data.index, data.columns) - if dtype is not None: - mgr = mgr.astype(dtype) + mgr = self._init_dict(data, data.index, data.columns, dtype=dtype) elif isinstance(data, BlockManager): mgr = self._init_mgr(data, axes=dict(index=index, columns=columns), dtype=dtype, copy=copy) @@ -174,7 +172,43 @@ def _init_dict(self, data, index, columns, dtype=None): return to_manager(sdict, columns, index) def _init_matrix(self, data, index, columns, dtype=None): + """ Init self from ndarray or list of lists """ data = _prep_ndarray(data, copy=False) + index, columns = self._prep_index(data, index, columns) + data = dict([(idx, data[:, i]) for i, idx in enumerate(columns)]) + return self._init_dict(data, index, columns, dtype) + + def _init_spmatrix(self, data, index, columns, dtype=None, + fill_value=None): + """ Init self from scipy.sparse matrix """ + index, columns = self._prep_index(data, index, columns) + data = data.tocoo() + N = len(index) + + # Construct a dict of SparseSeries + sdict = {} + values = Series(data.data, index=data.row, copy=False) + for col, rowvals in values.groupby(data.col): + # get_blocks expects int32 row indices in sorted order + rows = rowvals.index.values.astype(np.int32) + rows.sort() + blocs, blens = get_blocks(rows) + + sdict[columns[col]] = SparseSeries( + rowvals.values, index=index, + fill_value=fill_value, + sparse_index=BlockIndex(N, blocs, blens)) + + # Add any columns that were empty and thus not grouped on above + sdict.update({column: SparseSeries(index=index, + fill_value=fill_value, + sparse_index=BlockIndex(N, [], [])) + for column in columns + if column not in sdict}) + + return self._init_dict(sdict, index, columns, dtype) + + def _prep_index(self, data, index, columns): N, K = data.shape if index is None: index = _default_index(N) @@ -187,9 +221,48 @@ def _init_matrix(self, data, index, columns, dtype=None): if len(index) != N: raise ValueError('Index length mismatch: %d vs. %d' % (len(index), N)) + return index, columns - data = dict([(idx, data[:, i]) for i, idx in enumerate(columns)]) - return self._init_dict(data, index, columns, dtype) + def to_coo(self): + """ + Return the contents of the frame as a sparse SciPy COO matrix. + + .. versionadded:: 0.20.0 + + Returns + ------- + coo_matrix : scipy.sparse.spmatrix + If the caller is heterogeneous and contains booleans or objects, + the result will be of dtype=object. See Notes. + + Notes + ----- + The dtype will be the lowest-common-denominator type (implicit + upcasting); that is to say if the dtypes (even of numeric types) + are mixed, the one that accommodates all will be chosen. + + e.g. If the dtypes are float16 and float32, dtype will be upcast to + float32. By numpy.find_common_type convention, mixing int64 and + and uint64 will result in a float64 dtype. + """ + try: + from scipy.sparse import coo_matrix + except ImportError: + raise ImportError('Scipy is not installed') + + dtype = _find_common_type(self.dtypes) + cols, rows, datas = [], [], [] + for col, name in enumerate(self): + s = self[name] + row = s.sp_index.to_int_index().indices + cols.append(np.repeat(col, len(row))) + rows.append(row) + datas.append(s.sp_values.astype(dtype, copy=False)) + + cols = np.concatenate(cols) + rows = np.concatenate(rows) + datas = np.concatenate(datas) + return coo_matrix((datas, (rows, cols)), shape=self.shape) def __array_wrap__(self, result): return self._constructor( diff --git a/pandas/tests/sparse/common.py b/pandas/tests/sparse/common.py new file mode 100644 index 0000000000000..3aeef8d436e1a --- /dev/null +++ b/pandas/tests/sparse/common.py @@ -0,0 +1,10 @@ +import pytest + +import pandas.util.testing as tm + + +@pytest.fixture(params=['bsr', 'coo', 'csc', 'csr', 'dia', 'dok', 'lil']) +def spmatrix(request): + tm._skip_if_no_scipy() + from scipy import sparse + return getattr(sparse, request.param + '_matrix') diff --git a/pandas/tests/sparse/test_frame.py b/pandas/tests/sparse/test_frame.py index a7dd7f2e81033..4cd5a643ce4be 100644 --- a/pandas/tests/sparse/test_frame.py +++ b/pandas/tests/sparse/test_frame.py @@ -2,11 +2,17 @@ import operator +import pytest + from numpy import nan import numpy as np import pandas as pd from pandas import Series, DataFrame, bdate_range, Panel +from pandas.types.common import (is_bool_dtype, + is_float_dtype, + is_object_dtype, + is_float) from pandas.tseries.index import DatetimeIndex from pandas.tseries.offsets import BDay import pandas.util.testing as tm @@ -18,6 +24,8 @@ from pandas.sparse.api import SparseSeries, SparseDataFrame, SparseArray from pandas.tests.frame.test_misc_api import SharedWithSparse +from pandas.tests.sparse.common import spmatrix # noqa: F401 + class TestSparseDataFrame(tm.TestCase, SharedWithSparse): @@ -1118,6 +1126,60 @@ def test_isnotnull(self): tm.assert_frame_equal(res.to_dense(), exp) +@pytest.mark.parametrize('index', [None, list('ab')]) # noqa: F811 +@pytest.mark.parametrize('columns', [None, list('cd')]) +@pytest.mark.parametrize('fill_value', [None, 0, np.nan]) +@pytest.mark.parametrize('dtype', [object, bool, int, float, np.uint16]) +def test_from_to_scipy(spmatrix, index, columns, fill_value, dtype): + # GH 4343 + tm._skip_if_no_scipy() + + # Make one ndarray and from it one sparse matrix, both to be used for + # constructing frames and comparing results + arr = np.eye(2, dtype=dtype) + try: + spm = spmatrix(arr) + assert spm.dtype == arr.dtype + except (TypeError, AssertionError): + # If conversion to sparse fails for this spmatrix type and arr.dtype, + # then the combination is not currently supported in NumPy, so we + # can just skip testing it thoroughly + return + + sdf = pd.SparseDataFrame(spm, index=index, columns=columns, + default_fill_value=fill_value) + + # Expected result construction is kind of tricky for all + # dtype-fill_value combinations; easiest to cast to something generic + # and except later on + rarr = arr.astype(object) + rarr[arr == 0] = np.nan + expected = pd.SparseDataFrame(rarr, index=index, columns=columns).fillna( + fill_value if fill_value is not None else np.nan) + + # Assert frame is as expected + sdf_obj = sdf.astype(object) + tm.assert_sp_frame_equal(sdf_obj, expected) + tm.assert_frame_equal(sdf_obj.to_dense(), expected.to_dense()) + + # Assert spmatrices equal + tm.assert_equal(dict(sdf.to_coo().todok()), dict(spm.todok())) + + # Ensure dtype is preserved if possible + was_upcast = ((fill_value is None or is_float(fill_value)) and + not is_object_dtype(dtype) and + not is_float_dtype(dtype)) + res_dtype = (bool if is_bool_dtype(dtype) else + float if was_upcast else + dtype) + tm.assert_contains_all(sdf.dtypes, {np.dtype(res_dtype)}) + tm.assert_equal(sdf.to_coo().dtype, res_dtype) + + # However, adding a str column results in an upcast to object + sdf['strings'] = np.arange(len(sdf)).astype(str) + tm.assert_equal(sdf.to_coo().dtype, np.object_) + + class TestSparseDataFrameArithmetic(tm.TestCase): def test_numeric_op_scalar(self): diff --git a/pandas/tests/types/test_inference.py b/pandas/tests/types/test_inference.py index a36a77a70f9ad..b41df0da45234 100644 --- a/pandas/tests/types/test_inference.py +++ b/pandas/tests/types/test_inference.py @@ -30,11 +30,14 @@ is_float, is_bool, is_scalar, + is_scipy_sparse, _ensure_int32, _ensure_categorical) from pandas.types.missing import isnull from pandas.util import testing as tm +from pandas.tests.sparse.test_frame import spmatrix # noqa: F401 + def test_is_sequence(): is_seq = inference.is_sequence @@ -946,6 +949,12 @@ def test_nan_to_nat_conversions(): assert (s[8].value == np.datetime64('NaT').astype(np.int64)) +def test_is_scipy_sparse(spmatrix): # noqa: F811 + tm._skip_if_no_scipy() + assert is_scipy_sparse(spmatrix([[0, 1]])) + assert not is_scipy_sparse(np.array([1])) + + def test_ensure_int32(): values = np.arange(10, dtype=np.int32) result = _ensure_int32(values) diff --git a/pandas/types/common.py b/pandas/types/common.py index 1be5b5f6f1368..a1f03e59a5e6e 100644 --- a/pandas/types/common.py +++ b/pandas/types/common.py @@ -23,6 +23,9 @@ _TD_DTYPE = np.dtype('m8[ns]') _INT64_DTYPE = np.dtype(np.int64) +# oh the troubles to reduce import time +_is_scipy_sparse = None + _ensure_float64 = algos.ensure_float64 _ensure_float32 = algos.ensure_float32 @@ -59,6 +62,17 @@ def is_sparse(array): return isinstance(array, (ABCSparseArray, ABCSparseSeries)) +def is_scipy_sparse(array): + """ return if we are a scipy.sparse.spmatrix """ + global _is_scipy_sparse + if _is_scipy_sparse is None: + try: + from scipy.sparse import issparse as _is_scipy_sparse + except ImportError: + _is_scipy_sparse = lambda _: False + return _is_scipy_sparse(array) + + def is_categorical(array): """ return if we are a categorical possibility """ return isinstance(array, ABCCategorical) or is_categorical_dtype(array) diff --git a/pandas/util/testing.py b/pandas/util/testing.py index b68bf55a347b2..ec30a9376a9da 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -297,6 +297,11 @@ def _skip_if_no_scipy(): except ImportError: import pytest pytest.skip('scipy.interpolate missing') + try: + import scipy.sparse # noqa + except ImportError: + import pytest + pytest.skip('scipy.sparse missing') def _skip_if_scipy_0_17(): From 15e8e9a53d036f8e436ae5ad4eff66fc48f67d30 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Fri, 10 Mar 2017 09:38:25 -0500 Subject: [PATCH 185/933] BUG: Error when specifying int index containing NaN xref #15187. Author: gfyoung Closes #15616 from gfyoung/nan-int-index and squashes the following commits: 195b830 [gfyoung] BUG: Error when specifying int index containing NaN --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/indexes/base.py | 27 +++++++++++++++++++++++---- pandas/tests/indexes/test_base.py | 17 +++++++++++++++++ pandas/tests/indexes/test_numeric.py | 27 ++++++++++++++++++++++++++- pandas/tests/indexes/test_range.py | 28 +++++++++++++++++++++++++++- 5 files changed, 94 insertions(+), 6 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 3c82e533dd158..dd081ea605c01 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -792,6 +792,7 @@ Bug Fixes - Bug in ``DataFrame.isin`` comparing datetimelike to empty frame (:issue:`15473`) - Bug in ``Series.where()`` and ``DataFrame.where()`` where array-like conditionals were being rejected (:issue:`15414`) +- Bug in ``Index`` construction with ``NaN`` elements and integer dtype specified (:issue:`15187`) - Bug in ``Series`` construction with a datetimetz (:issue:`14928`) - Bug in output formatting of a ``MultiIndex`` when names are integers (:issue:`12223`, :issue:`15262`) diff --git a/pandas/indexes/base.py b/pandas/indexes/base.py index 607a463083fdd..7f46f437489a1 100644 --- a/pandas/indexes/base.py +++ b/pandas/indexes/base.py @@ -203,6 +203,9 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, if inferred == 'integer': data = np.array(data, copy=copy, dtype=dtype) elif inferred in ['floating', 'mixed-integer-float']: + if isnull(data).any(): + raise ValueError('cannot convert float ' + 'NaN to integer') # If we are actually all equal to integers, # then coerce to integer. @@ -230,8 +233,10 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, else: data = np.array(data, dtype=dtype, copy=copy) - except (TypeError, ValueError): - pass + except (TypeError, ValueError) as e: + msg = str(e) + if 'cannot convert float' in msg: + raise # maybe coerce to a sub-class from pandas.tseries.period import (PeriodIndex, @@ -585,7 +590,14 @@ def where(self, cond, other=None): if other is None: other = self._na_value values = np.where(cond, self.values, other) - return self._shallow_copy_with_infer(values, dtype=self.dtype) + + dtype = self.dtype + if self._is_numeric_dtype and np.any(isnull(values)): + # We can't coerce to the numeric dtype of "self" (unless + # it's float) if there are NaN values in our output. + dtype = None + + return self._shallow_copy_with_infer(values, dtype=dtype) def ravel(self, order='C'): """ @@ -689,7 +701,14 @@ def _coerce_scalar_to_index(self, item): ---------- item : scalar item to coerce """ - return Index([item], dtype=self.dtype, **self._get_attributes_dict()) + dtype = self.dtype + + if self._is_numeric_dtype and isnull(item): + # We can't coerce to the numeric dtype of "self" (unless + # it's float) if there are NaN values in our output. + dtype = None + + return Index([item], dtype=dtype, **self._get_attributes_dict()) _index_shared_docs['copy'] = """ Make a copy of this object. Name and dtype sets those attributes on diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 8c0a399cb58b3..05d3478ab0705 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -199,6 +199,23 @@ def __array__(self, dtype=None): result = pd.Index(ArrayLike(array)) self.assert_index_equal(result, expected) + def test_constructor_int_dtype_nan(self): + # see gh-15187 + data = [np.nan] + msg = "cannot convert" + + with tm.assertRaisesRegexp(ValueError, msg): + Index(data, dtype='int64') + + with tm.assertRaisesRegexp(ValueError, msg): + Index(data, dtype='uint64') + + # This, however, should not break + # because NaN is float. + expected = Float64Index(data) + result = Index(data, dtype='float') + tm.assert_index_equal(result, expected) + def test_index_ctor_infer_nan_nat(self): # GH 13467 exp = pd.Float64Index([np.nan, np.nan]) diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py index e23e7c19ed799..d0ce34169f79e 100644 --- a/pandas/tests/indexes/test_numeric.py +++ b/pandas/tests/indexes/test_numeric.py @@ -5,7 +5,7 @@ import numpy as np -from pandas import (date_range, Series, Index, Float64Index, +from pandas import (date_range, notnull, Series, Index, Float64Index, Int64Index, UInt64Index, RangeIndex) import pandas.util.testing as tm @@ -686,6 +686,31 @@ def test_coerce_list(self): arr = Index([1, 2, 3, 4], dtype=object) tm.assertIsInstance(arr, Index) + def test_where(self): + i = self.create_index() + result = i.where(notnull(i)) + expected = i + tm.assert_index_equal(result, expected) + + _nan = i._na_value + cond = [False] + [True] * len(i[1:]) + expected = pd.Index([_nan] + i[1:].tolist()) + + result = i.where(cond) + tm.assert_index_equal(result, expected) + + def test_where_array_like(self): + i = self.create_index() + + _nan = i._na_value + cond = [False] + [True] * (len(i) - 1) + klasses = [list, tuple, np.array, pd.Series] + expected = pd.Index([_nan] + i[1:].tolist()) + + for klass in klasses: + result = i.where(klass(cond)) + tm.assert_index_equal(result, expected) + def test_get_indexer(self): target = Int64Index(np.arange(10)) indexer = self.index.get_indexer(target) diff --git a/pandas/tests/indexes/test_range.py b/pandas/tests/indexes/test_range.py index 38e715fce2720..53c88897d6764 100644 --- a/pandas/tests/indexes/test_range.py +++ b/pandas/tests/indexes/test_range.py @@ -8,7 +8,8 @@ import numpy as np -from pandas import (Series, Index, Float64Index, Int64Index, RangeIndex) +from pandas import (notnull, Series, Index, Float64Index, + Int64Index, RangeIndex) from pandas.util.testing import assertRaisesRegexp import pandas.util.testing as tm @@ -915,3 +916,28 @@ def test_len_specialised(self): i = RangeIndex(0, 5, step) self.assertEqual(len(i), 0) + + def test_where(self): + i = self.create_index() + result = i.where(notnull(i)) + expected = i + tm.assert_index_equal(result, expected) + + _nan = i._na_value + cond = [False] + [True] * len(i[1:]) + expected = pd.Index([_nan] + i[1:].tolist()) + + result = i.where(cond) + tm.assert_index_equal(result, expected) + + def test_where_array_like(self): + i = self.create_index() + + _nan = i._na_value + cond = [False] + [True] * (len(i) - 1) + klasses = [list, tuple, np.array, pd.Series] + expected = pd.Index([_nan] + i[1:].tolist()) + + for klass in klasses: + result = i.where(klass(cond)) + tm.assert_index_equal(result, expected) From 5dee1f18ac2a06d38e4bb3800eee11424ec25ca1 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Fri, 10 Mar 2017 16:25:29 -0500 Subject: [PATCH 186/933] API: Drop DataFrame.iterkv() Deprecated since 0.17.0 xref #10711 Author: gfyoung Closes #15650 from gfyoung/df-iterkv-remove and squashes the following commits: e40fc9e [gfyoung] API: Drop DataFrame.iterkv() --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/core/generic.py | 10 ---------- pandas/tests/frame/test_misc_api.py | 6 +----- 3 files changed, 2 insertions(+), 15 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index dd081ea605c01..f42dfb80924e0 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -692,6 +692,7 @@ Other API Changes - Reorganization of timeseries development tests (:issue:`14854`) - Specific support for ``copy.copy()`` and ``copy.deepcopy()`` functions on NDFrame objects (:issue:`15444`) - ``Series.sort_values()`` accepts a one element list of bool for consistency with the behavior of ``DataFrame.sort_values()`` (:issue:`15604`) +- ``DataFrame.iterkv()`` has been removed in favor of ``DataFrame.iteritems()`` (:issue:`10711`) .. _whatsnew_0200.deprecations: diff --git a/pandas/core/generic.py b/pandas/core/generic.py index a0111cb9ef7ec..1db9677659ca3 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -899,16 +899,6 @@ def iteritems(self): for h in self._info_axis: yield h, self[h] - # originally used to get around 2to3's changes to iteritems. - # Now unnecessary. Sidenote: don't want to deprecate this for a while, - # otherwise libraries that use 2to3 will have issues. - def iterkv(self, *args, **kwargs): - "iteritems alias used to get around 2to3. Deprecated" - warnings.warn("iterkv is deprecated and will be removed in a future " - "release, use ``iteritems`` instead.", FutureWarning, - stacklevel=2) - return self.iteritems(*args, **kwargs) - def __len__(self): """Returns length of info axis""" return len(self._info_axis) diff --git a/pandas/tests/frame/test_misc_api.py b/pandas/tests/frame/test_misc_api.py index 674202980807a..321d46739b24c 100644 --- a/pandas/tests/frame/test_misc_api.py +++ b/pandas/tests/frame/test_misc_api.py @@ -389,11 +389,7 @@ def test_repr_with_mi_nat(self): exp = ' X\nNaT a 1\n2013-01-01 b 2' self.assertEqual(res, exp) - def test_iterkv_deprecation(self): - with tm.assert_produces_warning(FutureWarning): - self.mixed_float.iterkv() - - def test_iterkv_names(self): + def test_iteritems_names(self): for k, v in compat.iteritems(self.mixed_frame): self.assertEqual(v.name, k) From 026e748e4ff558de80c92c04986a78754b430902 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Fri, 10 Mar 2017 18:04:41 -0500 Subject: [PATCH 187/933] BUG/API: .merge() and .join() on category dtype columns will now preserve category dtype closes #10409 Author: Jeff Reback Closes #15321 from jreback/merge_cat and squashes the following commits: 3671dad [Jeff Reback] DOC: merge docs a4b2ee6 [Jeff Reback] BUG/API: .merge() and .join() on category dtype columns will now preserve the category dtype when possible --- asv_bench/benchmarks/join_merge.py | 36 +++++- doc/source/categorical.rst | 3 + doc/source/merging.rst | 73 +++++++++++ doc/source/whatsnew/v0.20.0.txt | 4 +- pandas/core/internals.py | 2 + pandas/tests/test_categorical.py | 3 + pandas/tests/tools/test_merge.py | 177 +++++++++++++++++++++----- pandas/tests/tools/test_merge_asof.py | 1 + pandas/tests/types/test_common.py | 50 ++++++-- pandas/tools/merge.py | 86 ++++++++++--- 10 files changed, 364 insertions(+), 71 deletions(-) diff --git a/asv_bench/benchmarks/join_merge.py b/asv_bench/benchmarks/join_merge.py index d9c631fa92efd..776316343e009 100644 --- a/asv_bench/benchmarks/join_merge.py +++ b/asv_bench/benchmarks/join_merge.py @@ -6,7 +6,7 @@ from pandas import ordered_merge as merge_ordered -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- # Append class Append(object): @@ -35,7 +35,7 @@ def time_append_mixed(self): self.mdf1.append(self.mdf2) -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- # Concat class Concat(object): @@ -120,7 +120,7 @@ def time_f_ordered_axis1(self): concat(self.frames_f, axis=1, ignore_index=True) -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- # Joins class Join(object): @@ -202,7 +202,7 @@ def time_join_non_unique_equal(self): (self.fracofday * self.temp[self.fracofday.index]) -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- # Merges class Merge(object): @@ -257,7 +257,31 @@ def time_i8merge(self): merge(self.left, self.right, how='outer') -#---------------------------------------------------------------------- +class MergeCategoricals(object): + goal_time = 0.2 + + def setup(self): + self.left_object = pd.DataFrame( + {'X': np.random.choice(range(0, 10), size=(10000,)), + 'Y': np.random.choice(['one', 'two', 'three'], size=(10000,))}) + + self.right_object = pd.DataFrame( + {'X': np.random.choice(range(0, 10), size=(10000,)), + 'Z': np.random.choice(['jjj', 'kkk', 'sss'], size=(10000,))}) + + self.left_cat = self.left_object.assign( + Y=self.left_object['Y'].astype('category')) + self.right_cat = self.right_object.assign( + Z=self.right_object['Z'].astype('category')) + + def time_merge_object(self): + merge(self.left_object, self.right_object, on='X') + + def time_merge_cat(self): + merge(self.left_cat, self.right_cat, on='X') + + +# ---------------------------------------------------------------------- # Ordered merge class MergeOrdered(object): @@ -332,7 +356,7 @@ def time_multiby(self): merge_asof(self.df1e, self.df2e, on='time', by=['key', 'key2']) -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- # data alignment class Align(object): diff --git a/doc/source/categorical.rst b/doc/source/categorical.rst index db974922e1d76..6d85e1a6560b0 100644 --- a/doc/source/categorical.rst +++ b/doc/source/categorical.rst @@ -646,6 +646,9 @@ In this case the categories are not the same and so an error is raised: The same applies to ``df.append(df_different)``. +See also the section on :ref:`merge dtypes` for notes about preserving merge dtypes and performance. + + .. _categorical.union: Unioning diff --git a/doc/source/merging.rst b/doc/source/merging.rst index f732f0a4cc749..70d2ce5b1a664 100644 --- a/doc/source/merging.rst +++ b/doc/source/merging.rst @@ -746,6 +746,79 @@ The ``indicator`` argument will also accept string arguments, in which case the pd.merge(df1, df2, on='col1', how='outer', indicator='indicator_column') +.. _merging.dtypes: + +Merge Dtypes +~~~~~~~~~~~~ + +.. versionadded:: 0.19.0 + +Merging will preserve the dtype of the join keys. + +.. ipython:: python + + left = pd.DataFrame({'key': [1], 'v1': [10]}) + left + right = pd.DataFrame({'key': [1, 2], 'v1': [20, 30]}) + right + +We are able to preserve the join keys + +.. ipython:: python + + pd.merge(left, right, how='outer') + pd.merge(left, right, how='outer').dtypes + +Of course if you have missing values that are introduced, then the +resulting dtype will be upcast. + +.. ipython:: python + + pd.merge(left, right, how='outer', on='key') + pd.merge(left, right, how='outer', on='key').dtypes + +.. versionadded:: 0.20.0 + +Merging will preserve ``category`` dtypes of the mergands. + +The left frame. + +.. ipython:: python + + X = pd.Series(np.random.choice(['foo', 'bar'], size=(10,))) + X = X.astype('category', categories=['foo', 'bar']) + + left = DataFrame({'X': X, + 'Y': np.random.choice(['one', 'two', 'three'], size=(10,))}) + left + left.dtypes + +The right frame. + +.. ipython:: python + + right = DataFrame({'X': Series(['foo', 'bar']).astype('category', categories=['foo', 'bar']), + 'Z': [1, 2]}) + right + right.dtypes + +The merged result + +.. ipython:: python + + result = pd.merge(left, right, how='outer') + result + result.dtypes + +.. note:: + + The category dtypes must be *exactly* the same, meaning the same categories and the ordered attribute. + Otherwise the result will coerce to ``object`` dtype. + +.. note:: + + Merging on ``category`` dtypes that are the same can be quite performant compared to ``object`` dtype merging. + .. _merging.join.index: Joining on index diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index f42dfb80924e0..e392023423eb0 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -692,7 +692,7 @@ Other API Changes - Reorganization of timeseries development tests (:issue:`14854`) - Specific support for ``copy.copy()`` and ``copy.deepcopy()`` functions on NDFrame objects (:issue:`15444`) - ``Series.sort_values()`` accepts a one element list of bool for consistency with the behavior of ``DataFrame.sort_values()`` (:issue:`15604`) -- ``DataFrame.iterkv()`` has been removed in favor of ``DataFrame.iteritems()`` (:issue:`10711`) +- ``.merge()`` and ``.join()`` on ``category`` dtype columns will now preserve the category dtype when possible (:issue:`10409`) .. _whatsnew_0200.deprecations: @@ -733,6 +733,7 @@ Removal of prior version deprecations/changes - ``Series.is_time_series`` is dropped in favor of ``Series.index.is_all_dates`` (:issue:`15098`) - The deprecated ``irow``, ``icol``, ``iget`` and ``iget_value`` methods are removed in favor of ``iloc`` and ``iat`` as explained :ref:`here ` (:issue:`10711`). +- The deprecated ``DataFrame.iterkv()`` has been removed in favor of ``DataFrame.iteritems()`` (:issue:`10711`) .. _whatsnew_0200.performance: @@ -749,6 +750,7 @@ Performance Improvements - When reading buffer object in ``read_sas()`` method without specified format, filepath string is inferred rather than buffer object. (:issue:`14947`) - Improved performance of ``.rank()`` for categorical data (:issue:`15498`) - Improved performance when using ``.unstack()`` (:issue:`15503`) +- Improved performance of merge/join on ``category`` columns (:issue:`10409`) .. _whatsnew_0200.bug_fixes: diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 4b43574f49820..aa954fbee9a60 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -5227,6 +5227,8 @@ def get_reindexed_values(self, empty_dtype, upcasted_na): # External code requested filling/upcasting, bool values must # be upcasted to object to avoid being upcasted to numeric. values = self.block.astype(np.object_).values + elif self.block.is_categorical: + values = self.block.values else: # No dtype upcasting is done here, it will be performed during # concatenation itself. diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py index cc99cf0f830aa..2d5e98d49e152 100644 --- a/pandas/tests/test_categorical.py +++ b/pandas/tests/test_categorical.py @@ -4097,9 +4097,12 @@ def test_merge(self): expected = df.copy() # object-cat + # note that we propogate the category + # because we don't have any matching rows cright = right.copy() cright['d'] = cright['d'].astype('category') result = pd.merge(left, cright, how='left', left_on='b', right_on='c') + expected['d'] = expected['d'].astype('category', categories=['null']) tm.assert_frame_equal(result, expected) # cat-object diff --git a/pandas/tests/tools/test_merge.py b/pandas/tests/tools/test_merge.py index b3b5e7e29319b..ff27500355998 100644 --- a/pandas/tests/tools/test_merge.py +++ b/pandas/tests/tools/test_merge.py @@ -1,5 +1,6 @@ # pylint: disable=E1103 +import pytest from datetime import datetime from numpy.random import randn from numpy import nan @@ -11,6 +12,8 @@ from pandas.tools.concat import concat from pandas.tools.merge import merge, MergeError from pandas.util.testing import assert_frame_equal, assert_series_equal +from pandas.types.dtypes import CategoricalDtype +from pandas.types.common import is_categorical_dtype, is_object_dtype from pandas import DataFrame, Index, MultiIndex, Series, Categorical import pandas.util.testing as tm @@ -1024,38 +1027,6 @@ def test_left_join_index_multi_match(self): expected.index = np.arange(len(expected)) tm.assert_frame_equal(result, expected) - def test_join_multi_dtypes(self): - - # test with multi dtypes in the join index - def _test(dtype1, dtype2): - left = DataFrame({'k1': np.array([0, 1, 2] * 8, dtype=dtype1), - 'k2': ['foo', 'bar'] * 12, - 'v': np.array(np.arange(24), dtype=np.int64)}) - - index = MultiIndex.from_tuples([(2, 'bar'), (1, 'foo')]) - right = DataFrame( - {'v2': np.array([5, 7], dtype=dtype2)}, index=index) - - result = left.join(right, on=['k1', 'k2']) - - expected = left.copy() - - if dtype2.kind == 'i': - dtype2 = np.dtype('float64') - expected['v2'] = np.array(np.nan, dtype=dtype2) - expected.loc[(expected.k1 == 2) & (expected.k2 == 'bar'), 'v2'] = 5 - expected.loc[(expected.k1 == 1) & (expected.k2 == 'foo'), 'v2'] = 7 - - tm.assert_frame_equal(result, expected) - - result = left.join(right, on=['k1', 'k2'], sort=True) - expected.sort_values(['k1', 'k2'], kind='mergesort', inplace=True) - tm.assert_frame_equal(result, expected) - - for d1 in [np.int64, np.int32, np.int16, np.int8, np.uint8]: - for d2 in [np.int64, np.float64, np.float32, np.float16]: - _test(np.dtype(d1), np.dtype(d2)) - def test_left_merge_na_buglet(self): left = DataFrame({'id': list('abcde'), 'v1': randn(5), 'v2': randn(5), 'dummy': list('abcde'), @@ -1242,3 +1213,145 @@ def f(): def f(): household.join(log_return, how='outer') self.assertRaises(NotImplementedError, f) + + +@pytest.fixture +def df(): + return DataFrame( + {'A': ['foo', 'bar'], + 'B': Series(['foo', 'bar']).astype('category'), + 'C': [1, 2], + 'D': [1.0, 2.0], + 'E': Series([1, 2], dtype='uint64'), + 'F': Series([1, 2], dtype='int32')}) + + +class TestMergeDtypes(object): + + def test_different(self, df): + + # we expect differences by kind + # to be ok, while other differences should return object + + left = df + for col in df.columns: + right = DataFrame({'A': df[col]}) + result = pd.merge(left, right, on='A') + assert is_object_dtype(result.A.dtype) + + @pytest.mark.parametrize('d1', [np.int64, np.int32, + np.int16, np.int8, np.uint8]) + @pytest.mark.parametrize('d2', [np.int64, np.float64, + np.float32, np.float16]) + def test_join_multi_dtypes(self, d1, d2): + + dtype1 = np.dtype(d1) + dtype2 = np.dtype(d2) + + left = DataFrame({'k1': np.array([0, 1, 2] * 8, dtype=dtype1), + 'k2': ['foo', 'bar'] * 12, + 'v': np.array(np.arange(24), dtype=np.int64)}) + + index = MultiIndex.from_tuples([(2, 'bar'), (1, 'foo')]) + right = DataFrame({'v2': np.array([5, 7], dtype=dtype2)}, index=index) + + result = left.join(right, on=['k1', 'k2']) + + expected = left.copy() + + if dtype2.kind == 'i': + dtype2 = np.dtype('float64') + expected['v2'] = np.array(np.nan, dtype=dtype2) + expected.loc[(expected.k1 == 2) & (expected.k2 == 'bar'), 'v2'] = 5 + expected.loc[(expected.k1 == 1) & (expected.k2 == 'foo'), 'v2'] = 7 + + tm.assert_frame_equal(result, expected) + + result = left.join(right, on=['k1', 'k2'], sort=True) + expected.sort_values(['k1', 'k2'], kind='mergesort', inplace=True) + tm.assert_frame_equal(result, expected) + + +@pytest.fixture +def left(): + np.random.seed(1234) + return DataFrame( + {'X': Series(np.random.choice( + ['foo', 'bar'], + size=(10,))).astype('category', categories=['foo', 'bar']), + 'Y': np.random.choice(['one', 'two', 'three'], size=(10,))}) + + +@pytest.fixture +def right(): + np.random.seed(1234) + return DataFrame( + {'X': Series(['foo', 'bar']).astype('category', + categories=['foo', 'bar']), + 'Z': [1, 2]}) + + +class TestMergeCategorical(object): + + def test_identical(self, left): + # merging on the same, should preserve dtypes + merged = pd.merge(left, left, on='X') + result = merged.dtypes.sort_index() + expected = Series([CategoricalDtype(), + np.dtype('O'), + np.dtype('O')], + index=['X', 'Y_x', 'Y_y']) + assert_series_equal(result, expected) + + def test_basic(self, left, right): + # we have matching Categorical dtypes in X + # so should preserve the merged column + merged = pd.merge(left, right, on='X') + result = merged.dtypes.sort_index() + expected = Series([CategoricalDtype(), + np.dtype('O'), + np.dtype('int64')], + index=['X', 'Y', 'Z']) + assert_series_equal(result, expected) + + def test_other_columns(self, left, right): + # non-merge columns should preserve if possible + right = right.assign(Z=right.Z.astype('category')) + + merged = pd.merge(left, right, on='X') + result = merged.dtypes.sort_index() + expected = Series([CategoricalDtype(), + np.dtype('O'), + CategoricalDtype()], + index=['X', 'Y', 'Z']) + assert_series_equal(result, expected) + + # categories are preserved + assert left.X.values.is_dtype_equal(merged.X.values) + assert right.Z.values.is_dtype_equal(merged.Z.values) + + @pytest.mark.parametrize( + 'change', [lambda x: x, + lambda x: x.astype('category', + categories=['bar', 'foo']), + lambda x: x.astype('category', + categories=['foo', 'bar', 'bah']), + lambda x: x.astype('category', ordered=True)]) + @pytest.mark.parametrize('how', ['inner', 'outer', 'left', 'right']) + def test_dtype_on_merged_different(self, change, how, left, right): + # our merging columns, X now has 2 different dtypes + # so we must be object as a result + + X = change(right.X.astype('object')) + right = right.assign(X=X) + assert is_categorical_dtype(left.X.values) + assert not left.X.values.is_dtype_equal(right.X.values) + + merged = pd.merge(left, right, on='X', how=how) + + result = merged.dtypes.sort_index() + expected = Series([np.dtype('O'), + np.dtype('O'), + np.dtype('int64')], + index=['X', 'Y', 'Z']) + assert_series_equal(result, expected) diff --git a/pandas/tests/tools/test_merge_asof.py b/pandas/tests/tools/test_merge_asof.py index 76798b3c895ea..cdff8f0349c15 100644 --- a/pandas/tests/tools/test_merge_asof.py +++ b/pandas/tests/tools/test_merge_asof.py @@ -147,6 +147,7 @@ def test_basic_categorical(self): trades.ticker = trades.ticker.astype('category') quotes = self.quotes.copy() quotes.ticker = quotes.ticker.astype('category') + expected.ticker = expected.ticker.astype('category') result = merge_asof(trades, quotes, on='time', diff --git a/pandas/tests/types/test_common.py b/pandas/tests/types/test_common.py index 4667bbd47ad18..c15f219c8fad6 100644 --- a/pandas/tests/types/test_common.py +++ b/pandas/tests/types/test_common.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- +import pytest import numpy as np from pandas.types.dtypes import DatetimeTZDtype, PeriodDtype, CategoricalDtype @@ -38,17 +39,44 @@ def test_period_dtype(self): self.assertEqual(pandas_dtype(dtype), dtype) -def test_dtype_equal(): - assert is_dtype_equal(np.int64, np.int64) - assert not is_dtype_equal(np.int64, np.float64) +dtypes = dict(datetime_tz=pandas_dtype('datetime64[ns, US/Eastern]'), + datetime=pandas_dtype('datetime64[ns]'), + timedelta=pandas_dtype('timedelta64[ns]'), + period=PeriodDtype('D'), + integer=np.dtype(np.int64), + float=np.dtype(np.float64), + object=np.dtype(np.object), + category=pandas_dtype('category')) - p1 = PeriodDtype('D') - p2 = PeriodDtype('D') - assert is_dtype_equal(p1, p2) - assert not is_dtype_equal(np.int64, p1) - p3 = PeriodDtype('2D') - assert not is_dtype_equal(p1, p3) +@pytest.mark.parametrize('name1,dtype1', + list(dtypes.items()), + ids=lambda x: str(x)) +@pytest.mark.parametrize('name2,dtype2', + list(dtypes.items()), + ids=lambda x: str(x)) +def test_dtype_equal(name1, dtype1, name2, dtype2): - assert not DatetimeTZDtype.is_dtype(np.int64) - assert not PeriodDtype.is_dtype(np.int64) + # match equal to self, but not equal to other + assert is_dtype_equal(dtype1, dtype1) + if name1 != name2: + assert not is_dtype_equal(dtype1, dtype2) + + +def test_dtype_equal_strict(): + + # we are strict on kind equality + for dtype in [np.int8, np.int16, np.int32]: + assert not is_dtype_equal(np.int64, dtype) + + for dtype in [np.float32]: + assert not is_dtype_equal(np.float64, dtype) + + # strict w.r.t. PeriodDtype + assert not is_dtype_equal(PeriodDtype('D'), + PeriodDtype('2D')) + + # strict w.r.t. datetime64 + assert not is_dtype_equal( + pandas_dtype('datetime64[ns, US/Eastern]'), + pandas_dtype('datetime64[ns, CET]')) diff --git a/pandas/tools/merge.py b/pandas/tools/merge.py index 3f1e7640ba538..d02f4c5b26c86 100644 --- a/pandas/tools/merge.py +++ b/pandas/tools/merge.py @@ -18,8 +18,10 @@ is_datetime64_dtype, needs_i8_conversion, is_int64_dtype, + is_categorical_dtype, is_integer_dtype, is_float_dtype, + is_numeric_dtype, is_integer, is_int_or_datetime_dtype, is_dtype_equal, @@ -37,7 +39,7 @@ from pandas.core.sorting import is_int64_overflow_possible import pandas.core.algorithms as algos import pandas.core.common as com -from pandas._libs import hashtable as libhashtable, join as libjoin +from pandas._libs import hashtable as libhashtable, join as libjoin, lib # back-compat of pseudo-public API @@ -570,6 +572,10 @@ def __init__(self, left, right, how='inner', on=None, self.right_join_keys, self.join_names) = self._get_merge_keys() + # validate the merge keys dtypes. We may need to coerce + # to avoid incompat dtypes + self._maybe_coerce_merge_keys() + def get_result(self): if self.indicator: self.left, self.right = self._indicator_pre_merge( @@ -760,26 +766,6 @@ def _get_join_info(self): join_index = join_index.astype(object) return join_index, left_indexer, right_indexer - def _get_merge_data(self): - """ - Handles overlapping column names etc. - """ - ldata, rdata = self.left._data, self.right._data - lsuf, rsuf = self.suffixes - - llabels, rlabels = items_overlap_with_suffix( - ldata.items, lsuf, rdata.items, rsuf) - - if not llabels.equals(ldata.items): - ldata = ldata.copy(deep=False) - ldata.set_axis(0, llabels) - - if not rlabels.equals(rdata.items): - rdata = rdata.copy(deep=False) - rdata.set_axis(0, rlabels) - - return ldata, rdata - def _get_merge_keys(self): """ Note: has side effects (copy/delete key columns) @@ -891,6 +877,51 @@ def _get_merge_keys(self): return left_keys, right_keys, join_names + def _maybe_coerce_merge_keys(self): + # we have valid mergee's but we may have to further + # coerce these if they are originally incompatible types + # + # for example if these are categorical, but are not dtype_equal + # or if we have object and integer dtypes + + for lk, rk, name in zip(self.left_join_keys, + self.right_join_keys, + self.join_names): + if (len(lk) and not len(rk)) or (not len(lk) and len(rk)): + continue + + # if either left or right is a categorical + # then the must match exactly in categories & ordered + if is_categorical_dtype(lk) and is_categorical_dtype(rk): + if lk.is_dtype_equal(rk): + continue + elif is_categorical_dtype(lk) or is_categorical_dtype(rk): + pass + + elif is_dtype_equal(lk.dtype, rk.dtype): + continue + + # if we are numeric, then allow differing + # kinds to proceed, eg. int64 and int8 + # further if we are object, but we infer to + # the same, then proceed + if (is_numeric_dtype(lk) and is_numeric_dtype(rk)): + if lk.dtype.kind == rk.dtype.kind: + continue + + # let's infer and see if we are ok + if lib.infer_dtype(lk) == lib.infer_dtype(rk): + continue + + # Houston, we have a problem! + # let's coerce to object + if name in self.left.columns: + self.left = self.left.assign( + **{name: self.left[name].astype(object)}) + if name in self.right.columns: + self.right = self.right.assign( + **{name: self.right[name].astype(object)}) + def _validate_specification(self): # Hm, any way to make this logic less complicated?? if self.on is None and self.left_on is None and self.right_on is None: @@ -942,9 +973,15 @@ def _get_join_indexers(left_keys, right_keys, sort=False, how='inner', Parameters ---------- + left_keys: ndarray, Index, Series + right_keys: ndarray, Index, Series + sort: boolean, default False + how: string {'inner', 'outer', 'left', 'right'}, default 'inner' Returns ------- + tuple of (left_indexer, right_indexer) + indexers into the left_keys, right_keys """ from functools import partial @@ -1349,6 +1386,13 @@ def _factorize_keys(lk, rk, sort=True): if is_datetime64tz_dtype(lk) and is_datetime64tz_dtype(rk): lk = lk.values rk = rk.values + + # if we exactly match in categories, allow us to use codes + if (is_categorical_dtype(lk) and + is_categorical_dtype(rk) and + lk.is_dtype_equal(rk)): + return lk.codes, rk.codes, len(lk.categories) + if is_int_or_datetime_dtype(lk) and is_int_or_datetime_dtype(rk): klass = libhashtable.Int64Factorizer lk = _ensure_int64(com._values_from_object(lk)) From aa53e4fb2de3219173f4d304795f185d5bf934c5 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Sat, 11 Mar 2017 12:20:46 -0500 Subject: [PATCH 188/933] API: Drop the name parameter from Categorical Deprecated in 0.17.0 xref #10632 Author: gfyoung Closes #15654 from gfyoung/categorical-name-drop and squashes the following commits: 7e1e7d8 [gfyoung] API: Drop the name parameter from Categorical --- doc/source/whatsnew/v0.20.0.txt | 2 +- pandas/core/categorical.py | 17 ++--------------- pandas/io/packers.py | 3 +-- pandas/tests/io/test_pickle.py | 16 ++++------------ pandas/tests/test_categorical.py | 11 +---------- 5 files changed, 9 insertions(+), 40 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index e392023423eb0..f6d5e3df814fc 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -734,7 +734,7 @@ Removal of prior version deprecations/changes - The deprecated ``irow``, ``icol``, ``iget`` and ``iget_value`` methods are removed in favor of ``iloc`` and ``iat`` as explained :ref:`here ` (:issue:`10711`). - The deprecated ``DataFrame.iterkv()`` has been removed in favor of ``DataFrame.iteritems()`` (:issue:`10711`) - +- The ``Categorical`` constructor has dropped the ``name`` parameter (:issue:`10632`) .. _whatsnew_0200.performance: diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index 47db86ce1e73e..c1e5904693d1c 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -231,8 +231,7 @@ class Categorical(PandasObject): __array_priority__ = 1000 _typ = 'categorical' - def __init__(self, values, categories=None, ordered=False, - name=None, fastpath=False): + def __init__(self, values, categories=None, ordered=False, fastpath=False): self._validate_ordered(ordered) @@ -244,12 +243,6 @@ def __init__(self, values, categories=None, ordered=False, self._ordered = ordered return - if name is not None: - msg = ("the 'name' keyword is removed, use 'name' with consumers " - "of the categorical instead (e.g. 'Series(cat, " - "name=\"something\")'") - warn(msg, UserWarning, stacklevel=2) - # sanitize input if is_categorical_dtype(values): @@ -431,7 +424,7 @@ def from_array(cls, data, **kwargs): return cls(data, **kwargs) @classmethod - def from_codes(cls, codes, categories, ordered=False, name=None): + def from_codes(cls, codes, categories, ordered=False): """ Make a Categorical type from codes and categories arrays. @@ -454,12 +447,6 @@ def from_codes(cls, codes, categories, ordered=False, name=None): categorical. If not given, the resulting categorical will be unordered. """ - if name is not None: - msg = ("the 'name' keyword is removed, use 'name' with consumers " - "of the categorical instead (e.g. 'Series(cat, " - "name=\"something\")'") - warn(msg, UserWarning, stacklevel=2) - try: codes = np.asarray(codes, np.int64) except: diff --git a/pandas/io/packers.py b/pandas/io/packers.py index 404be758a7fbe..4662e8b635d3f 100644 --- a/pandas/io/packers.py +++ b/pandas/io/packers.py @@ -589,8 +589,7 @@ def decode(obj): from_codes = globals()[obj[u'klass']].from_codes return from_codes(codes=obj[u'codes'], categories=obj[u'categories'], - ordered=obj[u'ordered'], - name=obj[u'name']) + ordered=obj[u'ordered']) elif typ == u'series': dtype = dtype_for(obj[u'dtype']) diff --git a/pandas/tests/io/test_pickle.py b/pandas/tests/io/test_pickle.py index 91e70e942089c..fad6237d851fb 100644 --- a/pandas/tests/io/test_pickle.py +++ b/pandas/tests/io/test_pickle.py @@ -265,12 +265,8 @@ def python_unpickler(path): def test_pickle_v0_14_1(): - # we have the name warning - # 10482 - with tm.assert_produces_warning(UserWarning): - cat = pd.Categorical(values=['a', 'b', 'c'], - categories=['a', 'b', 'c', 'd'], - name='foobar', ordered=False) + cat = pd.Categorical(values=['a', 'b', 'c'], ordered=False, + categories=['a', 'b', 'c', 'd']) pickle_path = os.path.join(tm.get_data_path(), 'categorical_0_14_1.pickle') # This code was executed once on v0.14.1 to generate the pickle: @@ -286,12 +282,8 @@ def test_pickle_v0_15_2(): # ordered -> _ordered # GH 9347 - # we have the name warning - # 10482 - with tm.assert_produces_warning(UserWarning): - cat = pd.Categorical(values=['a', 'b', 'c'], - categories=['a', 'b', 'c', 'd'], - name='foobar', ordered=False) + cat = pd.Categorical(values=['a', 'b', 'c'], ordered=False, + categories=['a', 'b', 'c', 'd']) pickle_path = os.path.join(tm.get_data_path(), 'categorical_0_15_2.pickle') # This code was executed once on v0.15.2 to generate the pickle: diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py index 2d5e98d49e152..6c8aeba704c7b 100644 --- a/pandas/tests/test_categorical.py +++ b/pandas/tests/test_categorical.py @@ -682,7 +682,7 @@ def test_print(self): def test_big_print(self): factor = Categorical([0, 1, 2, 0, 1, 2] * 100, ['a', 'b', 'c'], - name='cat', fastpath=True) + fastpath=True) expected = ["[a, b, c, a, b, ..., b, c, a, b, c]", "Length: 600", "Categories (3, object): [a, b, c]"] expected = "\n".join(expected) @@ -1635,15 +1635,6 @@ def test_deprecated_from_array(self): with tm.assert_produces_warning(FutureWarning): Categorical.from_array([0, 1]) - def test_removed_names_produces_warning(self): - - # 10482 - with tm.assert_produces_warning(UserWarning): - Categorical([0, 1], name="a") - - with tm.assert_produces_warning(UserWarning): - Categorical.from_codes([1, 2], ["a", "b", "c"], name="a") - def test_datetime_categorical_comparison(self): dt_cat = pd.Categorical( pd.date_range('2014-01-01', periods=3), ordered=True) From 5eac08a4f7fb2416fd7d3470e111d203a9a23feb Mon Sep 17 00:00:00 2001 From: gfyoung Date: Sat, 11 Mar 2017 12:24:10 -0500 Subject: [PATCH 189/933] MAINT: Remove testing.assert_isinstance (#15652) Deprecated in 0.17.0 xref gh-10458 --- pandas/tests/test_testing.py | 3 --- pandas/util/testing.py | 5 ----- 2 files changed, 8 deletions(-) diff --git a/pandas/tests/test_testing.py b/pandas/tests/test_testing.py index 2fb58ef70e3cb..e5cb953cb35a5 100644 --- a/pandas/tests/test_testing.py +++ b/pandas/tests/test_testing.py @@ -765,9 +765,6 @@ def test_warning(self): with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): self.assertNotAlmostEquals(1, 2) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - tm.assert_isinstance(Series([1, 2]), Series, msg='xxx') - class TestLocale(tm.TestCase): diff --git a/pandas/util/testing.py b/pandas/util/testing.py index ec30a9376a9da..74ff480a9c198 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -991,11 +991,6 @@ def assertIsInstance(obj, cls, msg=''): raise AssertionError(err_msg.format(msg, cls, type(obj))) -def assert_isinstance(obj, class_type_or_tuple, msg=''): - return deprecate('assert_isinstance', assertIsInstance)( - obj, class_type_or_tuple, msg=msg) - - def assertNotIsInstance(obj, cls, msg=''): """Test that obj is not an instance of cls (which can be a class or a tuple of classes, From e0b37f9bb40e2d27629c573bb985d75360282cd4 Mon Sep 17 00:00:00 2001 From: Rouz Azari Date: Sun, 12 Mar 2017 11:15:17 -0400 Subject: [PATCH 190/933] CLN: Cleanup tests for .rank() closes #15640 Author: Rouz Azari Closes #15658 from rouzazari/GH15640 and squashes the following commits: d0a2abc [Rouz Azari] Fixed linting error with datetime.datetime import 9580af0 [Rouz Azari] CLN: Cleanup tests for .rank() --- pandas/tests/frame/test_analytics.py | 169 +------------- pandas/tests/frame/test_rank.py | 268 +++++++++++++++++++++ pandas/tests/series/test_analytics.py | 201 ---------------- pandas/tests/series/test_rank.py | 323 ++++++++++++++++++++++++++ pandas/tests/test_stats.py | 185 --------------- 5 files changed, 592 insertions(+), 554 deletions(-) create mode 100644 pandas/tests/frame/test_rank.py create mode 100644 pandas/tests/series/test_rank.py delete mode 100644 pandas/tests/test_stats.py diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index 4758ee1323ca0..6c917444f9f43 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -2,7 +2,7 @@ from __future__ import print_function -from datetime import timedelta, datetime +from datetime import timedelta from distutils.version import LooseVersion import sys import pytest @@ -642,173 +642,6 @@ def test_cumprod(self): df.cumprod(0) df.cumprod(1) - def test_rank(self): - tm._skip_if_no_scipy() - from scipy.stats import rankdata - - self.frame['A'][::2] = np.nan - self.frame['B'][::3] = np.nan - self.frame['C'][::4] = np.nan - self.frame['D'][::5] = np.nan - - ranks0 = self.frame.rank() - ranks1 = self.frame.rank(1) - mask = np.isnan(self.frame.values) - - fvals = self.frame.fillna(np.inf).values - - exp0 = np.apply_along_axis(rankdata, 0, fvals) - exp0[mask] = np.nan - - exp1 = np.apply_along_axis(rankdata, 1, fvals) - exp1[mask] = np.nan - - tm.assert_almost_equal(ranks0.values, exp0) - tm.assert_almost_equal(ranks1.values, exp1) - - # integers - df = DataFrame(np.random.randint(0, 5, size=40).reshape((10, 4))) - - result = df.rank() - exp = df.astype(float).rank() - tm.assert_frame_equal(result, exp) - - result = df.rank(1) - exp = df.astype(float).rank(1) - tm.assert_frame_equal(result, exp) - - def test_rank2(self): - df = DataFrame([[1, 3, 2], [1, 2, 3]]) - expected = DataFrame([[1.0, 3.0, 2.0], [1, 2, 3]]) / 3.0 - result = df.rank(1, pct=True) - tm.assert_frame_equal(result, expected) - - df = DataFrame([[1, 3, 2], [1, 2, 3]]) - expected = df.rank(0) / 2.0 - result = df.rank(0, pct=True) - tm.assert_frame_equal(result, expected) - - df = DataFrame([['b', 'c', 'a'], ['a', 'c', 'b']]) - expected = DataFrame([[2.0, 3.0, 1.0], [1, 3, 2]]) - result = df.rank(1, numeric_only=False) - tm.assert_frame_equal(result, expected) - - expected = DataFrame([[2.0, 1.5, 1.0], [1, 1.5, 2]]) - result = df.rank(0, numeric_only=False) - tm.assert_frame_equal(result, expected) - - df = DataFrame([['b', np.nan, 'a'], ['a', 'c', 'b']]) - expected = DataFrame([[2.0, nan, 1.0], [1.0, 3.0, 2.0]]) - result = df.rank(1, numeric_only=False) - tm.assert_frame_equal(result, expected) - - expected = DataFrame([[2.0, nan, 1.0], [1.0, 1.0, 2.0]]) - result = df.rank(0, numeric_only=False) - tm.assert_frame_equal(result, expected) - - # f7u12, this does not work without extensive workaround - data = [[datetime(2001, 1, 5), nan, datetime(2001, 1, 2)], - [datetime(2000, 1, 2), datetime(2000, 1, 3), - datetime(2000, 1, 1)]] - df = DataFrame(data) - - # check the rank - expected = DataFrame([[2., nan, 1.], - [2., 3., 1.]]) - result = df.rank(1, numeric_only=False, ascending=True) - tm.assert_frame_equal(result, expected) - - expected = DataFrame([[1., nan, 2.], - [2., 1., 3.]]) - result = df.rank(1, numeric_only=False, ascending=False) - tm.assert_frame_equal(result, expected) - - # mixed-type frames - self.mixed_frame['datetime'] = datetime.now() - self.mixed_frame['timedelta'] = timedelta(days=1, seconds=1) - - result = self.mixed_frame.rank(1) - expected = self.mixed_frame.rank(1, numeric_only=True) - tm.assert_frame_equal(result, expected) - - df = DataFrame({"a": [1e-20, -5, 1e-20 + 1e-40, 10, - 1e60, 1e80, 1e-30]}) - exp = DataFrame({"a": [3.5, 1., 3.5, 5., 6., 7., 2.]}) - tm.assert_frame_equal(df.rank(), exp) - - def test_rank_na_option(self): - tm._skip_if_no_scipy() - from scipy.stats import rankdata - - self.frame['A'][::2] = np.nan - self.frame['B'][::3] = np.nan - self.frame['C'][::4] = np.nan - self.frame['D'][::5] = np.nan - - # bottom - ranks0 = self.frame.rank(na_option='bottom') - ranks1 = self.frame.rank(1, na_option='bottom') - - fvals = self.frame.fillna(np.inf).values - - exp0 = np.apply_along_axis(rankdata, 0, fvals) - exp1 = np.apply_along_axis(rankdata, 1, fvals) - - tm.assert_almost_equal(ranks0.values, exp0) - tm.assert_almost_equal(ranks1.values, exp1) - - # top - ranks0 = self.frame.rank(na_option='top') - ranks1 = self.frame.rank(1, na_option='top') - - fval0 = self.frame.fillna((self.frame.min() - 1).to_dict()).values - fval1 = self.frame.T - fval1 = fval1.fillna((fval1.min() - 1).to_dict()).T - fval1 = fval1.fillna(np.inf).values - - exp0 = np.apply_along_axis(rankdata, 0, fval0) - exp1 = np.apply_along_axis(rankdata, 1, fval1) - - tm.assert_almost_equal(ranks0.values, exp0) - tm.assert_almost_equal(ranks1.values, exp1) - - # descending - - # bottom - ranks0 = self.frame.rank(na_option='top', ascending=False) - ranks1 = self.frame.rank(1, na_option='top', ascending=False) - - fvals = self.frame.fillna(np.inf).values - - exp0 = np.apply_along_axis(rankdata, 0, -fvals) - exp1 = np.apply_along_axis(rankdata, 1, -fvals) - - tm.assert_almost_equal(ranks0.values, exp0) - tm.assert_almost_equal(ranks1.values, exp1) - - # descending - - # top - ranks0 = self.frame.rank(na_option='bottom', ascending=False) - ranks1 = self.frame.rank(1, na_option='bottom', ascending=False) - - fval0 = self.frame.fillna((self.frame.min() - 1).to_dict()).values - fval1 = self.frame.T - fval1 = fval1.fillna((fval1.min() - 1).to_dict()).T - fval1 = fval1.fillna(np.inf).values - - exp0 = np.apply_along_axis(rankdata, 0, -fval0) - exp1 = np.apply_along_axis(rankdata, 1, -fval1) - - tm.assert_numpy_array_equal(ranks0.values, exp0) - tm.assert_numpy_array_equal(ranks1.values, exp1) - - def test_rank_axis(self): - # check if using axes' names gives the same result - df = pd.DataFrame([[2, 1], [4, 3]]) - tm.assert_frame_equal(df.rank(axis=0), df.rank(axis='index')) - tm.assert_frame_equal(df.rank(axis=1), df.rank(axis='columns')) - def test_sem(self): alt = lambda x: np.std(x, ddof=1) / np.sqrt(len(x)) self._check_stat_op('sem', alt) diff --git a/pandas/tests/frame/test_rank.py b/pandas/tests/frame/test_rank.py new file mode 100644 index 0000000000000..151a89888c329 --- /dev/null +++ b/pandas/tests/frame/test_rank.py @@ -0,0 +1,268 @@ +# -*- coding: utf-8 -*- +from datetime import timedelta, datetime +from distutils.version import LooseVersion +from numpy import nan +import numpy as np + +from pandas import Series, DataFrame + +from pandas.compat import product +from pandas.util.testing import assert_frame_equal +import pandas.util.testing as tm +from pandas.tests.frame.common import TestData + + +class TestRank(tm.TestCase, TestData): + s = Series([1, 3, 4, 2, nan, 2, 1, 5, nan, 3]) + df = DataFrame({'A': s, 'B': s}) + + results = { + 'average': np.array([1.5, 5.5, 7.0, 3.5, nan, + 3.5, 1.5, 8.0, nan, 5.5]), + 'min': np.array([1, 5, 7, 3, nan, 3, 1, 8, nan, 5]), + 'max': np.array([2, 6, 7, 4, nan, 4, 2, 8, nan, 6]), + 'first': np.array([1, 5, 7, 3, nan, 4, 2, 8, nan, 6]), + 'dense': np.array([1, 3, 4, 2, nan, 2, 1, 5, nan, 3]), + } + + def test_rank(self): + tm._skip_if_no_scipy() + from scipy.stats import rankdata + + self.frame['A'][::2] = np.nan + self.frame['B'][::3] = np.nan + self.frame['C'][::4] = np.nan + self.frame['D'][::5] = np.nan + + ranks0 = self.frame.rank() + ranks1 = self.frame.rank(1) + mask = np.isnan(self.frame.values) + + fvals = self.frame.fillna(np.inf).values + + exp0 = np.apply_along_axis(rankdata, 0, fvals) + exp0[mask] = np.nan + + exp1 = np.apply_along_axis(rankdata, 1, fvals) + exp1[mask] = np.nan + + tm.assert_almost_equal(ranks0.values, exp0) + tm.assert_almost_equal(ranks1.values, exp1) + + # integers + df = DataFrame(np.random.randint(0, 5, size=40).reshape((10, 4))) + + result = df.rank() + exp = df.astype(float).rank() + tm.assert_frame_equal(result, exp) + + result = df.rank(1) + exp = df.astype(float).rank(1) + tm.assert_frame_equal(result, exp) + + def test_rank2(self): + df = DataFrame([[1, 3, 2], [1, 2, 3]]) + expected = DataFrame([[1.0, 3.0, 2.0], [1, 2, 3]]) / 3.0 + result = df.rank(1, pct=True) + tm.assert_frame_equal(result, expected) + + df = DataFrame([[1, 3, 2], [1, 2, 3]]) + expected = df.rank(0) / 2.0 + result = df.rank(0, pct=True) + tm.assert_frame_equal(result, expected) + + df = DataFrame([['b', 'c', 'a'], ['a', 'c', 'b']]) + expected = DataFrame([[2.0, 3.0, 1.0], [1, 3, 2]]) + result = df.rank(1, numeric_only=False) + tm.assert_frame_equal(result, expected) + + expected = DataFrame([[2.0, 1.5, 1.0], [1, 1.5, 2]]) + result = df.rank(0, numeric_only=False) + tm.assert_frame_equal(result, expected) + + df = DataFrame([['b', np.nan, 'a'], ['a', 'c', 'b']]) + expected = DataFrame([[2.0, nan, 1.0], [1.0, 3.0, 2.0]]) + result = df.rank(1, numeric_only=False) + tm.assert_frame_equal(result, expected) + + expected = DataFrame([[2.0, nan, 1.0], [1.0, 1.0, 2.0]]) + result = df.rank(0, numeric_only=False) + tm.assert_frame_equal(result, expected) + + # f7u12, this does not work without extensive workaround + data = [[datetime(2001, 1, 5), nan, datetime(2001, 1, 2)], + [datetime(2000, 1, 2), datetime(2000, 1, 3), + datetime(2000, 1, 1)]] + df = DataFrame(data) + + # check the rank + expected = DataFrame([[2., nan, 1.], + [2., 3., 1.]]) + result = df.rank(1, numeric_only=False, ascending=True) + tm.assert_frame_equal(result, expected) + + expected = DataFrame([[1., nan, 2.], + [2., 1., 3.]]) + result = df.rank(1, numeric_only=False, ascending=False) + tm.assert_frame_equal(result, expected) + + # mixed-type frames + self.mixed_frame['datetime'] = datetime.now() + self.mixed_frame['timedelta'] = timedelta(days=1, seconds=1) + + result = self.mixed_frame.rank(1) + expected = self.mixed_frame.rank(1, numeric_only=True) + tm.assert_frame_equal(result, expected) + + df = DataFrame({"a": [1e-20, -5, 1e-20 + 1e-40, 10, + 1e60, 1e80, 1e-30]}) + exp = DataFrame({"a": [3.5, 1., 3.5, 5., 6., 7., 2.]}) + tm.assert_frame_equal(df.rank(), exp) + + def test_rank_na_option(self): + tm._skip_if_no_scipy() + from scipy.stats import rankdata + + self.frame['A'][::2] = np.nan + self.frame['B'][::3] = np.nan + self.frame['C'][::4] = np.nan + self.frame['D'][::5] = np.nan + + # bottom + ranks0 = self.frame.rank(na_option='bottom') + ranks1 = self.frame.rank(1, na_option='bottom') + + fvals = self.frame.fillna(np.inf).values + + exp0 = np.apply_along_axis(rankdata, 0, fvals) + exp1 = np.apply_along_axis(rankdata, 1, fvals) + + tm.assert_almost_equal(ranks0.values, exp0) + tm.assert_almost_equal(ranks1.values, exp1) + + # top + ranks0 = self.frame.rank(na_option='top') + ranks1 = self.frame.rank(1, na_option='top') + + fval0 = self.frame.fillna((self.frame.min() - 1).to_dict()).values + fval1 = self.frame.T + fval1 = fval1.fillna((fval1.min() - 1).to_dict()).T + fval1 = fval1.fillna(np.inf).values + + exp0 = np.apply_along_axis(rankdata, 0, fval0) + exp1 = np.apply_along_axis(rankdata, 1, fval1) + + tm.assert_almost_equal(ranks0.values, exp0) + tm.assert_almost_equal(ranks1.values, exp1) + + # descending + + # bottom + ranks0 = self.frame.rank(na_option='top', ascending=False) + ranks1 = self.frame.rank(1, na_option='top', ascending=False) + + fvals = self.frame.fillna(np.inf).values + + exp0 = np.apply_along_axis(rankdata, 0, -fvals) + exp1 = np.apply_along_axis(rankdata, 1, -fvals) + + tm.assert_almost_equal(ranks0.values, exp0) + tm.assert_almost_equal(ranks1.values, exp1) + + # descending + + # top + ranks0 = self.frame.rank(na_option='bottom', ascending=False) + ranks1 = self.frame.rank(1, na_option='bottom', ascending=False) + + fval0 = self.frame.fillna((self.frame.min() - 1).to_dict()).values + fval1 = self.frame.T + fval1 = fval1.fillna((fval1.min() - 1).to_dict()).T + fval1 = fval1.fillna(np.inf).values + + exp0 = np.apply_along_axis(rankdata, 0, -fval0) + exp1 = np.apply_along_axis(rankdata, 1, -fval1) + + tm.assert_numpy_array_equal(ranks0.values, exp0) + tm.assert_numpy_array_equal(ranks1.values, exp1) + + def test_rank_axis(self): + # check if using axes' names gives the same result + df = DataFrame([[2, 1], [4, 3]]) + tm.assert_frame_equal(df.rank(axis=0), df.rank(axis='index')) + tm.assert_frame_equal(df.rank(axis=1), df.rank(axis='columns')) + + def test_rank_methods_frame(self): + tm.skip_if_no_package('scipy', '0.13', 'scipy.stats.rankdata') + import scipy + from scipy.stats import rankdata + + xs = np.random.randint(0, 21, (100, 26)) + xs = (xs - 10.0) / 10.0 + cols = [chr(ord('z') - i) for i in range(xs.shape[1])] + + for vals in [xs, xs + 1e6, xs * 1e-6]: + df = DataFrame(vals, columns=cols) + + for ax in [0, 1]: + for m in ['average', 'min', 'max', 'first', 'dense']: + result = df.rank(axis=ax, method=m) + sprank = np.apply_along_axis( + rankdata, ax, vals, + m if m != 'first' else 'ordinal') + sprank = sprank.astype(np.float64) + expected = DataFrame(sprank, columns=cols) + + if LooseVersion(scipy.__version__) >= '0.17.0': + expected = expected.astype('float64') + tm.assert_frame_equal(result, expected) + + def test_rank_descending(self): + dtypes = ['O', 'f8', 'i8'] + + for dtype, method in product(dtypes, self.results): + if 'i' in dtype: + df = self.df.dropna() + else: + df = self.df.astype(dtype) + + res = df.rank(ascending=False) + expected = (df.max() - df).rank() + assert_frame_equal(res, expected) + + if method == 'first' and dtype == 'O': + continue + + expected = (df.max() - df).rank(method=method) + + if dtype != 'O': + res2 = df.rank(method=method, ascending=False, + numeric_only=True) + assert_frame_equal(res2, expected) + + res3 = df.rank(method=method, ascending=False, + numeric_only=False) + assert_frame_equal(res3, expected) + + def test_rank_2d_tie_methods(self): + df = self.df + + def _check2d(df, expected, method='average', axis=0): + exp_df = DataFrame({'A': expected, 'B': expected}) + + if axis == 1: + df = df.T + exp_df = exp_df.T + + result = df.rank(method=method, axis=axis) + assert_frame_equal(result, exp_df) + + dtypes = [None, object] + disabled = set([(object, 'first')]) + results = self.results + + for method, axis, dtype in product(results, [0, 1], dtypes): + if (dtype, method) in disabled: + continue + frame = df if dtype is None else df.astype(dtype) + _check2d(frame, results[method], method=method, axis=axis) diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index b6985abb64e40..c2543581dca50 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -969,207 +969,6 @@ def test_drop_duplicates(self): sc.drop_duplicates(keep=False, inplace=True) assert_series_equal(sc, s[~expected]) - def test_rank(self): - tm._skip_if_no_scipy() - from scipy.stats import rankdata - - self.ts[::2] = np.nan - self.ts[:10][::3] = 4. - - ranks = self.ts.rank() - oranks = self.ts.astype('O').rank() - - assert_series_equal(ranks, oranks) - - mask = np.isnan(self.ts) - filled = self.ts.fillna(np.inf) - - # rankdata returns a ndarray - exp = Series(rankdata(filled), index=filled.index, name='ts') - exp[mask] = np.nan - - tm.assert_series_equal(ranks, exp) - - iseries = Series(np.arange(5).repeat(2)) - - iranks = iseries.rank() - exp = iseries.astype(float).rank() - assert_series_equal(iranks, exp) - iseries = Series(np.arange(5)) + 1.0 - exp = iseries / 5.0 - iranks = iseries.rank(pct=True) - - assert_series_equal(iranks, exp) - - iseries = Series(np.repeat(1, 100)) - exp = Series(np.repeat(0.505, 100)) - iranks = iseries.rank(pct=True) - assert_series_equal(iranks, exp) - - iseries[1] = np.nan - exp = Series(np.repeat(50.0 / 99.0, 100)) - exp[1] = np.nan - iranks = iseries.rank(pct=True) - assert_series_equal(iranks, exp) - - iseries = Series(np.arange(5)) + 1.0 - iseries[4] = np.nan - exp = iseries / 4.0 - iranks = iseries.rank(pct=True) - assert_series_equal(iranks, exp) - - iseries = Series(np.repeat(np.nan, 100)) - exp = iseries.copy() - iranks = iseries.rank(pct=True) - assert_series_equal(iranks, exp) - - iseries = Series(np.arange(5)) + 1 - iseries[4] = np.nan - exp = iseries / 4.0 - iranks = iseries.rank(pct=True) - assert_series_equal(iranks, exp) - - rng = date_range('1/1/1990', periods=5) - iseries = Series(np.arange(5), rng) + 1 - iseries.iloc[4] = np.nan - exp = iseries / 4.0 - iranks = iseries.rank(pct=True) - assert_series_equal(iranks, exp) - - iseries = Series([1e-50, 1e-100, 1e-20, 1e-2, 1e-20 + 1e-30, 1e-1]) - exp = Series([2, 1, 3, 5, 4, 6.0]) - iranks = iseries.rank() - assert_series_equal(iranks, exp) - - # GH 5968 - iseries = Series(['3 day', '1 day 10m', '-2 day', pd.NaT], - dtype='m8[ns]') - exp = Series([3, 2, 1, np.nan]) - iranks = iseries.rank() - assert_series_equal(iranks, exp) - - values = np.array( - [-50, -1, -1e-20, -1e-25, -1e-50, 0, 1e-40, 1e-20, 1e-10, 2, 40 - ], dtype='float64') - random_order = np.random.permutation(len(values)) - iseries = Series(values[random_order]) - exp = Series(random_order + 1.0, dtype='float64') - iranks = iseries.rank() - assert_series_equal(iranks, exp) - - def test_rank_categorical(self): - # GH issue #15420 rank incorrectly orders ordered categories - - # Test ascending/descending ranking for ordered categoricals - exp = pd.Series([1., 2., 3., 4., 5., 6.]) - exp_desc = pd.Series([6., 5., 4., 3., 2., 1.]) - ordered = pd.Series( - ['first', 'second', 'third', 'fourth', 'fifth', 'sixth'] - ).astype( - 'category', - categories=['first', 'second', 'third', - 'fourth', 'fifth', 'sixth'], - ordered=True - ) - assert_series_equal(ordered.rank(), exp) - assert_series_equal(ordered.rank(ascending=False), exp_desc) - - # Unordered categoricals should be ranked as objects - unordered = pd.Series( - ['first', 'second', 'third', 'fourth', 'fifth', 'sixth'], - ).astype( - 'category', - categories=['first', 'second', 'third', - 'fourth', 'fifth', 'sixth'], - ordered=False - ) - exp_unordered = pd.Series([2., 4., 6., 3., 1., 5.]) - res = unordered.rank() - assert_series_equal(res, exp_unordered) - - unordered1 = pd.Series( - [1, 2, 3, 4, 5, 6], - ).astype( - 'category', - categories=[1, 2, 3, 4, 5, 6], - ordered=False - ) - exp_unordered1 = pd.Series([1., 2., 3., 4., 5., 6.]) - res1 = unordered1.rank() - assert_series_equal(res1, exp_unordered1) - - # Test na_option for rank data - na_ser = pd.Series( - ['first', 'second', 'third', 'fourth', 'fifth', 'sixth', np.NaN] - ).astype( - 'category', - categories=[ - 'first', 'second', 'third', 'fourth', - 'fifth', 'sixth', 'seventh' - ], - ordered=True - ) - - exp_top = pd.Series([2., 3., 4., 5., 6., 7., 1.]) - exp_bot = pd.Series([1., 2., 3., 4., 5., 6., 7.]) - exp_keep = pd.Series([1., 2., 3., 4., 5., 6., np.NaN]) - - assert_series_equal(na_ser.rank(na_option='top'), exp_top) - assert_series_equal(na_ser.rank(na_option='bottom'), exp_bot) - assert_series_equal(na_ser.rank(na_option='keep'), exp_keep) - - # Test na_option for rank data with ascending False - exp_top = pd.Series([7., 6., 5., 4., 3., 2., 1.]) - exp_bot = pd.Series([6., 5., 4., 3., 2., 1., 7.]) - exp_keep = pd.Series([6., 5., 4., 3., 2., 1., np.NaN]) - - assert_series_equal( - na_ser.rank(na_option='top', ascending=False), - exp_top - ) - assert_series_equal( - na_ser.rank(na_option='bottom', ascending=False), - exp_bot - ) - assert_series_equal( - na_ser.rank(na_option='keep', ascending=False), - exp_keep - ) - - # Test with pct=True - na_ser = pd.Series( - ['first', 'second', 'third', 'fourth', np.NaN], - ).astype( - 'category', - categories=['first', 'second', 'third', 'fourth'], - ordered=True - ) - exp_top = pd.Series([0.4, 0.6, 0.8, 1., 0.2]) - exp_bot = pd.Series([0.2, 0.4, 0.6, 0.8, 1.]) - exp_keep = pd.Series([0.25, 0.5, 0.75, 1., np.NaN]) - - assert_series_equal(na_ser.rank(na_option='top', pct=True), exp_top) - assert_series_equal(na_ser.rank(na_option='bottom', pct=True), exp_bot) - assert_series_equal(na_ser.rank(na_option='keep', pct=True), exp_keep) - - def test_rank_signature(self): - s = Series([0, 1]) - s.rank(method='average') - self.assertRaises(ValueError, s.rank, 'average') - - def test_rank_inf(self): - pytest.skip('DataFrame.rank does not currently rank ' - 'np.inf and -np.inf properly') - - values = np.array( - [-np.inf, -50, -1, -1e-20, -1e-25, -1e-50, 0, 1e-40, 1e-20, 1e-10, - 2, 40, np.inf], dtype='float64') - random_order = np.random.permutation(len(values)) - iseries = Series(values[random_order]) - exp = Series(random_order + 1.0, dtype='float64') - iranks = iseries.rank() - assert_series_equal(iranks, exp) - def test_clip(self): val = self.ts.median() diff --git a/pandas/tests/series/test_rank.py b/pandas/tests/series/test_rank.py new file mode 100644 index 0000000000000..99257b343310f --- /dev/null +++ b/pandas/tests/series/test_rank.py @@ -0,0 +1,323 @@ +# -*- coding: utf-8 -*- +from pandas import compat + +import pytest + +from distutils.version import LooseVersion +from numpy import nan +import numpy as np + +from pandas import (Series, date_range, NaT) + +from pandas.compat import product +from pandas.util.testing import assert_series_equal +import pandas.util.testing as tm +from pandas.tests.series.common import TestData + + +class TestSeriesRank(tm.TestCase, TestData): + s = Series([1, 3, 4, 2, nan, 2, 1, 5, nan, 3]) + + results = { + 'average': np.array([1.5, 5.5, 7.0, 3.5, nan, + 3.5, 1.5, 8.0, nan, 5.5]), + 'min': np.array([1, 5, 7, 3, nan, 3, 1, 8, nan, 5]), + 'max': np.array([2, 6, 7, 4, nan, 4, 2, 8, nan, 6]), + 'first': np.array([1, 5, 7, 3, nan, 4, 2, 8, nan, 6]), + 'dense': np.array([1, 3, 4, 2, nan, 2, 1, 5, nan, 3]), + } + + def test_rank(self): + tm._skip_if_no_scipy() + from scipy.stats import rankdata + + self.ts[::2] = np.nan + self.ts[:10][::3] = 4. + + ranks = self.ts.rank() + oranks = self.ts.astype('O').rank() + + assert_series_equal(ranks, oranks) + + mask = np.isnan(self.ts) + filled = self.ts.fillna(np.inf) + + # rankdata returns a ndarray + exp = Series(rankdata(filled), index=filled.index, name='ts') + exp[mask] = np.nan + + tm.assert_series_equal(ranks, exp) + + iseries = Series(np.arange(5).repeat(2)) + + iranks = iseries.rank() + exp = iseries.astype(float).rank() + assert_series_equal(iranks, exp) + iseries = Series(np.arange(5)) + 1.0 + exp = iseries / 5.0 + iranks = iseries.rank(pct=True) + + assert_series_equal(iranks, exp) + + iseries = Series(np.repeat(1, 100)) + exp = Series(np.repeat(0.505, 100)) + iranks = iseries.rank(pct=True) + assert_series_equal(iranks, exp) + + iseries[1] = np.nan + exp = Series(np.repeat(50.0 / 99.0, 100)) + exp[1] = np.nan + iranks = iseries.rank(pct=True) + assert_series_equal(iranks, exp) + + iseries = Series(np.arange(5)) + 1.0 + iseries[4] = np.nan + exp = iseries / 4.0 + iranks = iseries.rank(pct=True) + assert_series_equal(iranks, exp) + + iseries = Series(np.repeat(np.nan, 100)) + exp = iseries.copy() + iranks = iseries.rank(pct=True) + assert_series_equal(iranks, exp) + + iseries = Series(np.arange(5)) + 1 + iseries[4] = np.nan + exp = iseries / 4.0 + iranks = iseries.rank(pct=True) + assert_series_equal(iranks, exp) + + rng = date_range('1/1/1990', periods=5) + iseries = Series(np.arange(5), rng) + 1 + iseries.iloc[4] = np.nan + exp = iseries / 4.0 + iranks = iseries.rank(pct=True) + assert_series_equal(iranks, exp) + + iseries = Series([1e-50, 1e-100, 1e-20, 1e-2, 1e-20 + 1e-30, 1e-1]) + exp = Series([2, 1, 3, 5, 4, 6.0]) + iranks = iseries.rank() + assert_series_equal(iranks, exp) + + # GH 5968 + iseries = Series(['3 day', '1 day 10m', '-2 day', NaT], + dtype='m8[ns]') + exp = Series([3, 2, 1, np.nan]) + iranks = iseries.rank() + assert_series_equal(iranks, exp) + + values = np.array( + [-50, -1, -1e-20, -1e-25, -1e-50, 0, 1e-40, 1e-20, 1e-10, 2, 40 + ], dtype='float64') + random_order = np.random.permutation(len(values)) + iseries = Series(values[random_order]) + exp = Series(random_order + 1.0, dtype='float64') + iranks = iseries.rank() + assert_series_equal(iranks, exp) + + def test_rank_categorical(self): + # GH issue #15420 rank incorrectly orders ordered categories + + # Test ascending/descending ranking for ordered categoricals + exp = Series([1., 2., 3., 4., 5., 6.]) + exp_desc = Series([6., 5., 4., 3., 2., 1.]) + ordered = Series( + ['first', 'second', 'third', 'fourth', 'fifth', 'sixth'] + ).astype( + 'category', + categories=['first', 'second', 'third', + 'fourth', 'fifth', 'sixth'], + ordered=True + ) + assert_series_equal(ordered.rank(), exp) + assert_series_equal(ordered.rank(ascending=False), exp_desc) + + # Unordered categoricals should be ranked as objects + unordered = Series( + ['first', 'second', 'third', 'fourth', 'fifth', 'sixth'], + ).astype( + 'category', + categories=['first', 'second', 'third', + 'fourth', 'fifth', 'sixth'], + ordered=False + ) + exp_unordered = Series([2., 4., 6., 3., 1., 5.]) + res = unordered.rank() + assert_series_equal(res, exp_unordered) + + unordered1 = Series( + [1, 2, 3, 4, 5, 6], + ).astype( + 'category', + categories=[1, 2, 3, 4, 5, 6], + ordered=False + ) + exp_unordered1 = Series([1., 2., 3., 4., 5., 6.]) + res1 = unordered1.rank() + assert_series_equal(res1, exp_unordered1) + + # Test na_option for rank data + na_ser = Series( + ['first', 'second', 'third', 'fourth', 'fifth', 'sixth', np.NaN] + ).astype( + 'category', + categories=[ + 'first', 'second', 'third', 'fourth', + 'fifth', 'sixth', 'seventh' + ], + ordered=True + ) + + exp_top = Series([2., 3., 4., 5., 6., 7., 1.]) + exp_bot = Series([1., 2., 3., 4., 5., 6., 7.]) + exp_keep = Series([1., 2., 3., 4., 5., 6., np.NaN]) + + assert_series_equal(na_ser.rank(na_option='top'), exp_top) + assert_series_equal(na_ser.rank(na_option='bottom'), exp_bot) + assert_series_equal(na_ser.rank(na_option='keep'), exp_keep) + + # Test na_option for rank data with ascending False + exp_top = Series([7., 6., 5., 4., 3., 2., 1.]) + exp_bot = Series([6., 5., 4., 3., 2., 1., 7.]) + exp_keep = Series([6., 5., 4., 3., 2., 1., np.NaN]) + + assert_series_equal( + na_ser.rank(na_option='top', ascending=False), + exp_top + ) + assert_series_equal( + na_ser.rank(na_option='bottom', ascending=False), + exp_bot + ) + assert_series_equal( + na_ser.rank(na_option='keep', ascending=False), + exp_keep + ) + + # Test with pct=True + na_ser = Series( + ['first', 'second', 'third', 'fourth', np.NaN], + ).astype( + 'category', + categories=['first', 'second', 'third', 'fourth'], + ordered=True + ) + exp_top = Series([0.4, 0.6, 0.8, 1., 0.2]) + exp_bot = Series([0.2, 0.4, 0.6, 0.8, 1.]) + exp_keep = Series([0.25, 0.5, 0.75, 1., np.NaN]) + + assert_series_equal(na_ser.rank(na_option='top', pct=True), exp_top) + assert_series_equal(na_ser.rank(na_option='bottom', pct=True), exp_bot) + assert_series_equal(na_ser.rank(na_option='keep', pct=True), exp_keep) + + def test_rank_signature(self): + s = Series([0, 1]) + s.rank(method='average') + self.assertRaises(ValueError, s.rank, 'average') + + def test_rank_inf(self): + pytest.skip('DataFrame.rank does not currently rank ' + 'np.inf and -np.inf properly') + + values = np.array( + [-np.inf, -50, -1, -1e-20, -1e-25, -1e-50, 0, 1e-40, 1e-20, 1e-10, + 2, 40, np.inf], dtype='float64') + random_order = np.random.permutation(len(values)) + iseries = Series(values[random_order]) + exp = Series(random_order + 1.0, dtype='float64') + iranks = iseries.rank() + assert_series_equal(iranks, exp) + + def test_rank_tie_methods(self): + s = self.s + + def _check(s, expected, method='average'): + result = s.rank(method=method) + tm.assert_series_equal(result, Series(expected)) + + dtypes = [None, object] + disabled = set([(object, 'first')]) + results = self.results + + for method, dtype in product(results, dtypes): + if (dtype, method) in disabled: + continue + series = s if dtype is None else s.astype(dtype) + _check(series, results[method], method=method) + + def test_rank_methods_series(self): + tm.skip_if_no_package('scipy', '0.13', 'scipy.stats.rankdata') + import scipy + from scipy.stats import rankdata + + xs = np.random.randn(9) + xs = np.concatenate([xs[i:] for i in range(0, 9, 2)]) # add duplicates + np.random.shuffle(xs) + + index = [chr(ord('a') + i) for i in range(len(xs))] + + for vals in [xs, xs + 1e6, xs * 1e-6]: + ts = Series(vals, index=index) + + for m in ['average', 'min', 'max', 'first', 'dense']: + result = ts.rank(method=m) + sprank = rankdata(vals, m if m != 'first' else 'ordinal') + expected = Series(sprank, index=index) + + if LooseVersion(scipy.__version__) >= '0.17.0': + expected = expected.astype('float64') + tm.assert_series_equal(result, expected) + + def test_rank_dense_method(self): + dtypes = ['O', 'f8', 'i8'] + in_out = [([1], [1]), + ([2], [1]), + ([0], [1]), + ([2, 2], [1, 1]), + ([1, 2, 3], [1, 2, 3]), + ([4, 2, 1], [3, 2, 1],), + ([1, 1, 5, 5, 3], [1, 1, 3, 3, 2]), + ([-5, -4, -3, -2, -1], [1, 2, 3, 4, 5])] + + for ser, exp in in_out: + for dtype in dtypes: + s = Series(ser).astype(dtype) + result = s.rank(method='dense') + expected = Series(exp).astype(result.dtype) + assert_series_equal(result, expected) + + def test_rank_descending(self): + dtypes = ['O', 'f8', 'i8'] + + for dtype, method in product(dtypes, self.results): + if 'i' in dtype: + s = self.s.dropna() + else: + s = self.s.astype(dtype) + + res = s.rank(ascending=False) + expected = (s.max() - s).rank() + assert_series_equal(res, expected) + + if method == 'first' and dtype == 'O': + continue + + expected = (s.max() - s).rank(method=method) + res2 = s.rank(method=method, ascending=False) + assert_series_equal(res2, expected) + + def test_rank_int(self): + s = self.s.dropna().astype('i8') + + for method, res in compat.iteritems(self.results): + result = s.rank(method=method) + expected = Series(res).dropna() + expected.index = result.index + assert_series_equal(result, expected) + + def test_rank_object_bug(self): + # GH 13445 + + # smoke tests + Series([np.nan] * 32).astype(object).rank(ascending=True) + Series([np.nan] * 32).astype(object).rank(ascending=False) diff --git a/pandas/tests/test_stats.py b/pandas/tests/test_stats.py deleted file mode 100644 index 118c4147a2019..0000000000000 --- a/pandas/tests/test_stats.py +++ /dev/null @@ -1,185 +0,0 @@ -# -*- coding: utf-8 -*- -from pandas import compat - -from distutils.version import LooseVersion -from numpy import nan -import numpy as np - -from pandas import Series, DataFrame - -from pandas.compat import product -from pandas.util.testing import (assert_frame_equal, assert_series_equal) -import pandas.util.testing as tm - - -class TestRank(tm.TestCase): - s = Series([1, 3, 4, 2, nan, 2, 1, 5, nan, 3]) - df = DataFrame({'A': s, 'B': s}) - - results = { - 'average': np.array([1.5, 5.5, 7.0, 3.5, nan, - 3.5, 1.5, 8.0, nan, 5.5]), - 'min': np.array([1, 5, 7, 3, nan, 3, 1, 8, nan, 5]), - 'max': np.array([2, 6, 7, 4, nan, 4, 2, 8, nan, 6]), - 'first': np.array([1, 5, 7, 3, nan, 4, 2, 8, nan, 6]), - 'dense': np.array([1, 3, 4, 2, nan, 2, 1, 5, nan, 3]), - } - - def test_rank_tie_methods(self): - s = self.s - - def _check(s, expected, method='average'): - result = s.rank(method=method) - tm.assert_series_equal(result, Series(expected)) - - dtypes = [None, object] - disabled = set([(object, 'first')]) - results = self.results - - for method, dtype in product(results, dtypes): - if (dtype, method) in disabled: - continue - series = s if dtype is None else s.astype(dtype) - _check(series, results[method], method=method) - - def test_rank_methods_series(self): - tm.skip_if_no_package('scipy', '0.13', 'scipy.stats.rankdata') - import scipy - from scipy.stats import rankdata - - xs = np.random.randn(9) - xs = np.concatenate([xs[i:] for i in range(0, 9, 2)]) # add duplicates - np.random.shuffle(xs) - - index = [chr(ord('a') + i) for i in range(len(xs))] - - for vals in [xs, xs + 1e6, xs * 1e-6]: - ts = Series(vals, index=index) - - for m in ['average', 'min', 'max', 'first', 'dense']: - result = ts.rank(method=m) - sprank = rankdata(vals, m if m != 'first' else 'ordinal') - expected = Series(sprank, index=index) - - if LooseVersion(scipy.__version__) >= '0.17.0': - expected = expected.astype('float64') - tm.assert_series_equal(result, expected) - - def test_rank_methods_frame(self): - tm.skip_if_no_package('scipy', '0.13', 'scipy.stats.rankdata') - import scipy - from scipy.stats import rankdata - - xs = np.random.randint(0, 21, (100, 26)) - xs = (xs - 10.0) / 10.0 - cols = [chr(ord('z') - i) for i in range(xs.shape[1])] - - for vals in [xs, xs + 1e6, xs * 1e-6]: - df = DataFrame(vals, columns=cols) - - for ax in [0, 1]: - for m in ['average', 'min', 'max', 'first', 'dense']: - result = df.rank(axis=ax, method=m) - sprank = np.apply_along_axis( - rankdata, ax, vals, - m if m != 'first' else 'ordinal') - sprank = sprank.astype(np.float64) - expected = DataFrame(sprank, columns=cols) - - if LooseVersion(scipy.__version__) >= '0.17.0': - expected = expected.astype('float64') - tm.assert_frame_equal(result, expected) - - def test_rank_dense_method(self): - dtypes = ['O', 'f8', 'i8'] - in_out = [([1], [1]), - ([2], [1]), - ([0], [1]), - ([2, 2], [1, 1]), - ([1, 2, 3], [1, 2, 3]), - ([4, 2, 1], [3, 2, 1],), - ([1, 1, 5, 5, 3], [1, 1, 3, 3, 2]), - ([-5, -4, -3, -2, -1], [1, 2, 3, 4, 5])] - - for ser, exp in in_out: - for dtype in dtypes: - s = Series(ser).astype(dtype) - result = s.rank(method='dense') - expected = Series(exp).astype(result.dtype) - assert_series_equal(result, expected) - - def test_rank_descending(self): - dtypes = ['O', 'f8', 'i8'] - - for dtype, method in product(dtypes, self.results): - if 'i' in dtype: - s = self.s.dropna() - df = self.df.dropna() - else: - s = self.s.astype(dtype) - df = self.df.astype(dtype) - - res = s.rank(ascending=False) - expected = (s.max() - s).rank() - assert_series_equal(res, expected) - - res = df.rank(ascending=False) - expected = (df.max() - df).rank() - assert_frame_equal(res, expected) - - if method == 'first' and dtype == 'O': - continue - - expected = (s.max() - s).rank(method=method) - res2 = s.rank(method=method, ascending=False) - assert_series_equal(res2, expected) - - expected = (df.max() - df).rank(method=method) - - if dtype != 'O': - res2 = df.rank(method=method, ascending=False, - numeric_only=True) - assert_frame_equal(res2, expected) - - res3 = df.rank(method=method, ascending=False, - numeric_only=False) - assert_frame_equal(res3, expected) - - def test_rank_2d_tie_methods(self): - df = self.df - - def _check2d(df, expected, method='average', axis=0): - exp_df = DataFrame({'A': expected, 'B': expected}) - - if axis == 1: - df = df.T - exp_df = exp_df.T - - result = df.rank(method=method, axis=axis) - assert_frame_equal(result, exp_df) - - dtypes = [None, object] - disabled = set([(object, 'first')]) - results = self.results - - for method, axis, dtype in product(results, [0, 1], dtypes): - if (dtype, method) in disabled: - continue - frame = df if dtype is None else df.astype(dtype) - _check2d(frame, results[method], method=method, axis=axis) - - def test_rank_int(self): - s = self.s.dropna().astype('i8') - - for method, res in compat.iteritems(self.results): - result = s.rank(method=method) - expected = Series(res).dropna() - expected.index = result.index - assert_series_equal(result, expected) - - def test_rank_object_bug(self): - # GH 13445 - - # smoke tests - Series([np.nan] * 32).astype(object).rank(ascending=True) - Series([np.nan] * 32).astype(object).rank(ascending=False) From a212738f07e89a3a6f5905399c3531090d471021 Mon Sep 17 00:00:00 2001 From: "Adam J. Stewart" Date: Sun, 12 Mar 2017 15:55:32 -0500 Subject: [PATCH 191/933] DOC: fix typo in timeseries documentation (#15666) --- doc/source/timeseries.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/timeseries.rst b/doc/source/timeseries.rst index e09d240ed91b7..c0c178ad2fb49 100644 --- a/doc/source/timeseries.rst +++ b/doc/source/timeseries.rst @@ -610,7 +610,7 @@ There are several time/date properties that one can access from ``Timestamp`` or dayofweek,"The numer of the day of the week with Monday=0, Sunday=6" weekday,"The number of the day of the week with Monday=0, Sunday=6" weekday_name,"The name of the day in a week (ex: Friday)" - quarter,"Quarter of the date: Jan=Mar = 1, Apr-Jun = 2, etc." + quarter,"Quarter of the date: Jan-Mar = 1, Apr-Jun = 2, etc." days_in_month,"The number of days in the month of the datetime" is_month_start,"Logical indicating if first day of month (defined by frequency)" is_month_end,"Logical indicating if last day of month (defined by frequency)" From 7c5ebd50bbf5b659c6b40205bea5b42dbc892699 Mon Sep 17 00:00:00 2001 From: "Adam J. Stewart" Date: Sun, 12 Mar 2017 17:40:05 -0500 Subject: [PATCH 192/933] Fix another typo in the timeseries documentation (#15667) --- doc/source/timeseries.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/timeseries.rst b/doc/source/timeseries.rst index c0c178ad2fb49..7136b15a7633a 100644 --- a/doc/source/timeseries.rst +++ b/doc/source/timeseries.rst @@ -607,7 +607,7 @@ There are several time/date properties that one can access from ``Timestamp`` or dayofyear,"The ordinal day of year" weekofyear,"The week ordinal of the year" week,"The week ordinal of the year" - dayofweek,"The numer of the day of the week with Monday=0, Sunday=6" + dayofweek,"The number of the day of the week with Monday=0, Sunday=6" weekday,"The number of the day of the week with Monday=0, Sunday=6" weekday_name,"The name of the day in a week (ex: Friday)" quarter,"Quarter of the date: Jan-Mar = 1, Apr-Jun = 2, etc." From 35109568489401dd2172fb76fd38c1c212355227 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sun, 12 Mar 2017 14:01:40 -0400 Subject: [PATCH 193/933] BLD: make 3.6 use *only* conda-forge channels --- .travis.yml | 1 + ci/install_travis.sh | 7 +++++-- ci/requirements-3.6.run | 1 + ci/requirements-3.6.sh | 7 ------- 4 files changed, 7 insertions(+), 9 deletions(-) delete mode 100644 ci/requirements-3.6.sh diff --git a/.travis.yml b/.travis.yml index 97bf881f3b6fc..b0331941e2a1e 100644 --- a/.travis.yml +++ b/.travis.yml @@ -86,6 +86,7 @@ matrix: - JOB_NAME: "36" - TEST_ARGS="--skip-slow --skip-network" - PANDAS_TESTING_MODE="deprecate" + - CONDA_FORGE=true addons: apt: packages: diff --git a/ci/install_travis.sh b/ci/install_travis.sh index b337f6e443be2..12202b4ceee70 100755 --- a/ci/install_travis.sh +++ b/ci/install_travis.sh @@ -53,14 +53,17 @@ conda config --set ssl_verify false || exit 1 conda config --set always_yes true --set changeps1 false || exit 1 conda update -q conda +echo "[add channels]" # add the pandas channel to take priority # to add extra packages -echo "[add channels]" conda config --add channels pandas || exit 1 conda config --remove channels defaults || exit 1 conda config --add channels defaults || exit 1 -conda install anaconda-client +if [ "$CONDA_FORGE" ]; then + # add conda-forge channel as priority + conda config --add channels conda-forge || exit 1 +fi # Useful for debugging any issues with conda conda info -a || exit 1 diff --git a/ci/requirements-3.6.run b/ci/requirements-3.6.run index 9a6c1c7edbc5e..41c9680ce1b7e 100644 --- a/ci/requirements-3.6.run +++ b/ci/requirements-3.6.run @@ -14,6 +14,7 @@ html5lib jinja2 sqlalchemy pymysql +feather-format # psycopg2 (not avail on defaults ATM) beautifulsoup4 s3fs diff --git a/ci/requirements-3.6.sh b/ci/requirements-3.6.sh deleted file mode 100644 index 7d88ede751ec8..0000000000000 --- a/ci/requirements-3.6.sh +++ /dev/null @@ -1,7 +0,0 @@ -#!/bin/bash - -source activate pandas - -echo "install 36" - -conda install -n pandas -c conda-forge feather-format From 56b5a30937e79335029b1727a80f109b7eb0840a Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sun, 12 Mar 2017 19:27:30 -0400 Subject: [PATCH 194/933] TST: skip scipy tests for >= 0.19.0 as needed in interpolation / window / sparse closes #15668 --- pandas/tests/frame/test_missing.py | 3 +- pandas/tests/frame/test_rank.py | 3 +- pandas/tests/series/test_missing.py | 7 +++-- pandas/tests/series/test_rank.py | 3 +- pandas/tests/sparse/test_frame.py | 49 +++++++++++++++++++++++++++-- pandas/tests/test_nanops.py | 14 +++------ pandas/tests/test_window.py | 10 +++--- pandas/util/testing.py | 28 ++++++++--------- 8 files changed, 80 insertions(+), 37 deletions(-) diff --git a/pandas/tests/frame/test_missing.py b/pandas/tests/frame/test_missing.py index 80ea01d3a05aa..923ed2e7c3444 100644 --- a/pandas/tests/frame/test_missing.py +++ b/pandas/tests/frame/test_missing.py @@ -548,7 +548,8 @@ def test_interp_nan_idx(self): df.interpolate(method='values') def test_interp_various(self): - tm._skip_if_no_scipy() + tm.skip_if_no_package('scipy', max_version='0.19.0') + df = DataFrame({'A': [1, 2, np.nan, 4, 5, np.nan, 7], 'C': [1, 2, 3, 5, 8, 13, 21]}) df = df.set_index('C') diff --git a/pandas/tests/frame/test_rank.py b/pandas/tests/frame/test_rank.py index 151a89888c329..b115218d76958 100644 --- a/pandas/tests/frame/test_rank.py +++ b/pandas/tests/frame/test_rank.py @@ -193,7 +193,8 @@ def test_rank_axis(self): tm.assert_frame_equal(df.rank(axis=1), df.rank(axis='columns')) def test_rank_methods_frame(self): - tm.skip_if_no_package('scipy', '0.13', 'scipy.stats.rankdata') + tm.skip_if_no_package('scipy', min_version='0.13', + app='scipy.stats.rankdata') import scipy from scipy.stats import rankdata diff --git a/pandas/tests/series/test_missing.py b/pandas/tests/series/test_missing.py index 87cfcf32229b4..9e997da517bf6 100644 --- a/pandas/tests/series/test_missing.py +++ b/pandas/tests/series/test_missing.py @@ -827,7 +827,8 @@ def test_interp_quad(self): assert_series_equal(result, expected) def test_interp_scipy_basic(self): - tm._skip_if_no_scipy() + tm.skip_if_no_package('scipy', max_version='0.19.0') + s = Series([1, 3, np.nan, 12, np.nan, 25]) # slinear expected = Series([1., 3., 7.5, 12., 18.5, 25.]) @@ -1027,8 +1028,8 @@ def test_spline(self): def test_spline_extrapolate(self): tm.skip_if_no_package( - 'scipy', '0.15', - 'setting ext on scipy.interpolate.UnivariateSpline') + 'scipy', min_version='0.15', + app='setting ext on scipy.interpolate.UnivariateSpline') s = Series([1, 2, 3, 4, np.nan, 6, np.nan]) result3 = s.interpolate(method='spline', order=1, ext=3) expected3 = Series([1., 2., 3., 4., 5., 6., 6.]) diff --git a/pandas/tests/series/test_rank.py b/pandas/tests/series/test_rank.py index 99257b343310f..f47eae3adc3ae 100644 --- a/pandas/tests/series/test_rank.py +++ b/pandas/tests/series/test_rank.py @@ -246,7 +246,8 @@ def _check(s, expected, method='average'): _check(series, results[method], method=method) def test_rank_methods_series(self): - tm.skip_if_no_package('scipy', '0.13', 'scipy.stats.rankdata') + tm.skip_if_no_package('scipy', min_version='0.13', + app='scipy.stats.rankdata') import scipy from scipy.stats import rankdata diff --git a/pandas/tests/sparse/test_frame.py b/pandas/tests/sparse/test_frame.py index 4cd5a643ce4be..c0c678c184ee8 100644 --- a/pandas/tests/sparse/test_frame.py +++ b/pandas/tests/sparse/test_frame.py @@ -1129,10 +1129,10 @@ def test_isnotnull(self): @pytest.mark.parametrize('index', [None, list('ab')]) # noqa: F811 @pytest.mark.parametrize('columns', [None, list('cd')]) @pytest.mark.parametrize('fill_value', [None, 0, np.nan]) -@pytest.mark.parametrize('dtype', [object, bool, int, float, np.uint16]) +@pytest.mark.parametrize('dtype', [bool, int, float, np.uint16]) def test_from_to_scipy(spmatrix, index, columns, fill_value, dtype): # GH 4343 - tm._skip_if_no_scipy() + tm.skip_if_no_package('scipy') # Make one ndarray and from it one sparse matrix, both to be used for # constructing frames and comparing results @@ -1180,6 +1180,51 @@ def test_from_to_scipy(spmatrix, index, columns, fill_value, dtype): tm.assert_equal(sdf.to_coo().dtype, np.object_) +@pytest.mark.parametrize('fill_value', [None, 0, np.nan]) # noqa: F811 +def test_from_to_scipy_object(spmatrix, fill_value): + # GH 4343 + dtype = object + columns = list('cd') + index = list('ab') + tm.skip_if_no_package('scipy', max_version='0.19.0') + + # Make one ndarray and from it one sparse matrix, both to be used for + # constructing frames and comparing results + arr = np.eye(2, dtype=dtype) + try: + spm = spmatrix(arr) + assert spm.dtype == arr.dtype + except (TypeError, AssertionError): + # If conversion to sparse fails for this spmatrix type and arr.dtype, + # then the combination is not currently supported in NumPy, so we + # can just skip testing it thoroughly + return + + sdf = pd.SparseDataFrame(spm, index=index, columns=columns, + default_fill_value=fill_value) + + # Expected result construction is kind of tricky for all + # dtype-fill_value combinations; easiest to cast to something generic + # and except later on + rarr = arr.astype(object) + rarr[arr == 0] = np.nan + expected = pd.SparseDataFrame(rarr, index=index, columns=columns).fillna( + fill_value if fill_value is not None else np.nan) + + # Assert frame is as expected + sdf_obj = sdf.astype(object) + tm.assert_sp_frame_equal(sdf_obj, expected) + tm.assert_frame_equal(sdf_obj.to_dense(), expected.to_dense()) + + # Assert spmatrices equal + tm.assert_equal(dict(sdf.to_coo().todok()), dict(spm.todok())) + + # Ensure dtype is preserved if possible + res_dtype = object + tm.assert_contains_all(sdf.dtypes, {np.dtype(res_dtype)}) + tm.assert_equal(sdf.to_coo().dtype, res_dtype) + + class TestSparseDataFrameArithmetic(tm.TestCase): def test_numeric_op_scalar(self): diff --git a/pandas/tests/test_nanops.py b/pandas/tests/test_nanops.py index 937c20d009b6b..75a7555d58ca5 100644 --- a/pandas/tests/test_nanops.py +++ b/pandas/tests/test_nanops.py @@ -5,7 +5,7 @@ import warnings import numpy as np -from pandas import Series, isnull +from pandas import Series, isnull, _np_version_under1p9 from pandas.types.common import is_integer_dtype import pandas.core.nanops as nanops import pandas.util.testing as tm @@ -338,8 +338,7 @@ def test_nanmean_overflow(self): # is now consistent with numpy # numpy < 1.9.0 is not computing this correctly - from distutils.version import LooseVersion - if LooseVersion(np.__version__) >= '1.9.0': + if not _np_version_under1p9: for a in [2 ** 55, -2 ** 55, 20150515061816532]: s = Series(a, index=range(500), dtype=np.int64) result = s.mean() @@ -388,8 +387,7 @@ def test_nanstd(self): allow_tdelta=True, allow_obj='convert') def test_nansem(self): - tm.skip_if_no_package('scipy.stats') - tm._skip_if_scipy_0_17() + tm.skip_if_no_package('scipy', min_version='0.17.0') from scipy.stats import sem self.check_funs_ddof(nanops.nansem, sem, allow_complex=False, allow_str=False, allow_date=False, @@ -448,16 +446,14 @@ def _skew_kurt_wrap(self, values, axis=None, func=None): return result def test_nanskew(self): - tm.skip_if_no_package('scipy.stats') - tm._skip_if_scipy_0_17() + tm.skip_if_no_package('scipy', min_version='0.17.0') from scipy.stats import skew func = partial(self._skew_kurt_wrap, func=skew) self.check_funs(nanops.nanskew, func, allow_complex=False, allow_str=False, allow_date=False, allow_tdelta=False) def test_nankurt(self): - tm.skip_if_no_package('scipy.stats') - tm._skip_if_scipy_0_17() + tm.skip_if_no_package('scipy', min_version='0.17.0') from scipy.stats import kurtosis func1 = partial(kurtosis, fisher=True) func = partial(self._skew_kurt_wrap, func=func1) diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index 3f2973a9834ca..b7164d31b2a5e 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -905,7 +905,7 @@ def test_cmov_window_na_min_periods(self): def test_cmov_window_regular(self): # GH 8238 - tm._skip_if_no_scipy() + tm.skip_if_no_package('scipy', max_version='0.19.0') win_types = ['triang', 'blackman', 'hamming', 'bartlett', 'bohman', 'blackmanharris', 'nuttall', 'barthann'] @@ -938,7 +938,7 @@ def test_cmov_window_regular(self): def test_cmov_window_regular_linear_range(self): # GH 8238 - tm._skip_if_no_scipy() + tm.skip_if_no_package('scipy', max_version='0.19.0') win_types = ['triang', 'blackman', 'hamming', 'bartlett', 'bohman', 'blackmanharris', 'nuttall', 'barthann'] @@ -955,7 +955,7 @@ def test_cmov_window_regular_linear_range(self): def test_cmov_window_regular_missing_data(self): # GH 8238 - tm._skip_if_no_scipy() + tm.skip_if_no_package('scipy', max_version='0.19.0') win_types = ['triang', 'blackman', 'hamming', 'bartlett', 'bohman', 'blackmanharris', 'nuttall', 'barthann'] @@ -988,7 +988,7 @@ def test_cmov_window_regular_missing_data(self): def test_cmov_window_special(self): # GH 8238 - tm._skip_if_no_scipy() + tm.skip_if_no_package('scipy', max_version='0.19.0') win_types = ['kaiser', 'gaussian', 'general_gaussian', 'slepian'] kwds = [{'beta': 1.}, {'std': 1.}, {'power': 2., @@ -1015,7 +1015,7 @@ def test_cmov_window_special(self): def test_cmov_window_special_linear_range(self): # GH 8238 - tm._skip_if_no_scipy() + tm.skip_if_no_package('scipy', max_version='0.19.0') win_types = ['kaiser', 'gaussian', 'general_gaussian', 'slepian'] kwds = [{'beta': 1.}, {'std': 1.}, {'power': 2., diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 74ff480a9c198..529ecef3e2d6a 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -304,14 +304,6 @@ def _skip_if_no_scipy(): pytest.skip('scipy.sparse missing') -def _skip_if_scipy_0_17(): - import scipy - v = scipy.__version__ - if v >= LooseVersion("0.17.0"): - import pytest - pytest.skip("scipy 0.17") - - def _check_if_lzma(): try: return compat.import_lzma() @@ -2020,15 +2012,18 @@ def __init__(self, *args, **kwargs): # Dependency checks. Copied this from Nipy/Nipype (Copyright of # respective developers, license: BSD-3) -def package_check(pkg_name, version=None, app='pandas', checker=LooseVersion): - """Check that the minimal version of the required package is installed. +def package_check(pkg_name, min_version=None, max_version=None, app='pandas', + checker=LooseVersion): + """Check that the min/max version of the required package is installed. Parameters ---------- pkg_name : string Name of the required package. - version : string, optional + min_version : string, optional Minimal version number for required package. + max_version : string, optional + Max version number for required package. app : string, optional Application that is performing the check. For instance, the name of the tutorial being executed that depends on specific @@ -2040,7 +2035,6 @@ def package_check(pkg_name, version=None, app='pandas', checker=LooseVersion): Examples -------- package_check('numpy', '1.3') - package_check('networkx', '1.0', 'tutorial1') """ @@ -2049,8 +2043,10 @@ def package_check(pkg_name, version=None, app='pandas', checker=LooseVersion): msg = '%s requires %s' % (app, pkg_name) else: msg = 'module requires %s' % pkg_name - if version: - msg += ' with version >= %s' % (version,) + if min_version: + msg += ' with version >= %s' % (min_version,) + if max_version: + msg += ' with version < %s' % (max_version,) try: mod = __import__(pkg_name) except ImportError: @@ -2059,7 +2055,9 @@ def package_check(pkg_name, version=None, app='pandas', checker=LooseVersion): have_version = mod.__version__ except AttributeError: pytest.skip('Cannot find version for %s' % pkg_name) - if version and checker(have_version) < checker(version): + if min_version and checker(have_version) < checker(min_version): + pytest.skip(msg) + if max_version and checker(have_version) >= checker(max_version): pytest.skip(msg) From 7d04391dd3240c2d7cc80d638a39ad06b1ab679a Mon Sep 17 00:00:00 2001 From: mattip Date: Sun, 12 Mar 2017 20:54:52 -0400 Subject: [PATCH 195/933] COMPAT: free parser memory at close() for non-refcnt gc relying on __dealloc__ to clean up malloc() ed memory can lead to a perceived "leak" on PyPy since the garbage collector will not necessarily collect the object as soon as its refcnt reaches 0. Instead, pre-emptively release memory when close() is called The code still maintains backward compatibility for the case where close() is never called Author: mattip Closes #15665 from mattip/pypy-compat and squashes the following commits: eaf50fe [mattip] COMPAT: free parser memory at close() for non-refcnt gc --- pandas/_libs/src/parser/tokenizer.c | 4 ++++ pandas/_libs/src/parser/tokenizer.h | 2 ++ pandas/io/parsers.pyx | 18 ++++++++++++++++-- 3 files changed, 22 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/src/parser/tokenizer.c b/pandas/_libs/src/parser/tokenizer.c index 916f06d357473..6b0775e54da0c 100644 --- a/pandas/_libs/src/parser/tokenizer.c +++ b/pandas/_libs/src/parser/tokenizer.c @@ -162,6 +162,7 @@ int parser_cleanup(parser_t *self) { if (self->cb_cleanup(self->source) < 0) { status = -1; } + self->cb_cleanup = NULL; } return status; @@ -239,6 +240,9 @@ int parser_init(parser_t *self) { void parser_free(parser_t *self) { // opposite of parser_init parser_cleanup(self); +} + +void parser_del(parser_t *self) { free(self); } diff --git a/pandas/_libs/src/parser/tokenizer.h b/pandas/_libs/src/parser/tokenizer.h index 9853b5149bee3..b4344e8a6c070 100644 --- a/pandas/_libs/src/parser/tokenizer.h +++ b/pandas/_libs/src/parser/tokenizer.h @@ -243,6 +243,8 @@ int parser_set_skipfirstnrows(parser_t *self, int64_t nrows); void parser_free(parser_t *self); +void parser_del(parser_t *self); + void parser_set_default_options(parser_t *self); void debug_print_parser(parser_t *self); diff --git a/pandas/io/parsers.pyx b/pandas/io/parsers.pyx index a5858accbb6f5..3728cda559050 100644 --- a/pandas/io/parsers.pyx +++ b/pandas/io/parsers.pyx @@ -214,6 +214,7 @@ cdef extern from "parser/tokenizer.h": int parser_init(parser_t *self) nogil void parser_free(parser_t *self) nogil + void parser_del(parser_t *self) nogil int parser_add_skiprow(parser_t *self, int64_t row) int parser_set_skipfirstnrows(parser_t *self, int64_t nrows) @@ -573,8 +574,13 @@ cdef class TextReader: def __dealloc__(self): parser_free(self.parser) - kh_destroy_str(self.true_set) - kh_destroy_str(self.false_set) + if self.true_set: + kh_destroy_str(self.true_set) + self.true_set = NULL + if self.false_set: + kh_destroy_str(self.false_set) + self.false_set = NULL + parser_del(self.parser) def close(self): # we need to properly close an open derived @@ -584,6 +590,14 @@ cdef class TextReader: self.handle.close() except: pass + # also preemptively free all allocated memory + parser_free(self.parser) + if self.true_set: + kh_destroy_str(self.true_set) + self.true_set = NULL + if self.false_set: + kh_destroy_str(self.false_set) + self.false_set = NULL def set_error_bad_lines(self, int status): self.parser.error_bad_lines = status From 97c065ebbba0760685343b16e1759bf77f0f9ce0 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Mon, 13 Mar 2017 10:08:01 -0400 Subject: [PATCH 196/933] DOC: typo in merge.rst --- doc/source/categorical.rst | 1 + doc/source/merging.rst | 10 +++++----- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/doc/source/categorical.rst b/doc/source/categorical.rst index 6d85e1a6560b0..2203737ecd7b5 100644 --- a/doc/source/categorical.rst +++ b/doc/source/categorical.rst @@ -617,6 +617,7 @@ Assigning a `Categorical` to parts of a column of other types will use the value df df.dtypes +.. _categorical.merge: Merging ~~~~~~~ diff --git a/doc/source/merging.rst b/doc/source/merging.rst index 70d2ce5b1a664..0b7f9f18190a4 100644 --- a/doc/source/merging.rst +++ b/doc/source/merging.rst @@ -779,7 +779,7 @@ resulting dtype will be upcast. .. versionadded:: 0.20.0 -Merging will preserve ``category`` dtypes of the mergands. +Merging will preserve ``category`` dtypes of the mergands. See also the section on :ref:`categoricals ` The left frame. @@ -788,8 +788,8 @@ The left frame. X = pd.Series(np.random.choice(['foo', 'bar'], size=(10,))) X = X.astype('category', categories=['foo', 'bar']) - left = DataFrame({'X': X, - 'Y': np.random.choice(['one', 'two', 'three'], size=(10,))}) + left = pd.DataFrame({'X': X, + 'Y': np.random.choice(['one', 'two', 'three'], size=(10,))}) left left.dtypes @@ -797,8 +797,8 @@ The right frame. .. ipython:: python - right = DataFrame({'X': Series(['foo', 'bar']).astype('category', categories=['foo', 'bar']), - 'Z': [1, 2]}) + right = pd.DataFrame({'X': Series(['foo', 'bar']).astype('category', categories=['foo', 'bar']), + 'Z': [1, 2]}) right right.dtypes From 03dca9610b6ad91538c1cd1da71fb5196d7bb3f7 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 13 Mar 2017 15:47:49 +0100 Subject: [PATCH 197/933] DOC: correct whatsnew note of #15515 --- doc/source/whatsnew/v0.20.0.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index f6d5e3df814fc..8a4f2f47b9853 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -880,7 +880,7 @@ Bug Fixes - Bug in ``.read_csv()`` with ``parse_dates`` when multiline headers are specified (:issue:`15376`) - Bug in ``groupby.transform()`` that would coerce the resultant dtypes back to the original (:issue:`10972`, :issue:`11444`) -- Bug in ``DataFrame.hist`` where ``plt.tight_layout`` caused an ``AttributeError`` (use ``matplotlib >= 0.2.0``) (:issue:`9351`) +- Bug in ``DataFrame.hist`` where ``plt.tight_layout`` caused an ``AttributeError`` (use ``matplotlib >= 2.0.1``) (:issue:`9351`) - Bug in ``DataFrame.boxplot`` where ``fontsize`` was not applied to the tick labels on both axes (:issue:`15108`) - Bug in ``Series.replace`` and ``DataFrame.replace`` which failed on empty replacement dicts (:issue:`15289`) - Bug in ``pd.melt()`` where passing a tuple value for ``value_vars`` caused a ``TypeError`` (:issue:`15348`) From 32df1e6ae452f7ddd31dc41fa613992493eb51c4 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Mon, 13 Mar 2017 11:10:38 -0400 Subject: [PATCH 198/933] DOC: typo in merge.rst --- doc/source/merging.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/merging.rst b/doc/source/merging.rst index 0b7f9f18190a4..fb020727d077e 100644 --- a/doc/source/merging.rst +++ b/doc/source/merging.rst @@ -797,7 +797,7 @@ The right frame. .. ipython:: python - right = pd.DataFrame({'X': Series(['foo', 'bar']).astype('category', categories=['foo', 'bar']), + right = pd.DataFrame({'X': pd.Series(['foo', 'bar']).astype('category', categories=['foo', 'bar']), 'Z': [1, 2]}) right right.dtypes From 998c801f76256990b98d3f0d2ad885ae27c955a1 Mon Sep 17 00:00:00 2001 From: Aleksey Bilogur Date: Mon, 13 Mar 2017 19:04:39 -0400 Subject: [PATCH 199/933] TST: fix errant tight_layout test (#15671) closes #9351 --- pandas/tests/plotting/common.py | 1 + pandas/tests/plotting/test_hist_method.py | 4 ++-- pandas/tools/plotting.py | 8 ++++++++ 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/pandas/tests/plotting/common.py b/pandas/tests/plotting/common.py index 92e2dc7b5d934..c31d8b539ae6f 100644 --- a/pandas/tests/plotting/common.py +++ b/pandas/tests/plotting/common.py @@ -53,6 +53,7 @@ def setUp(self): self.mpl_ge_1_4_0 = plotting._mpl_ge_1_4_0() self.mpl_ge_1_5_0 = plotting._mpl_ge_1_5_0() self.mpl_ge_2_0_0 = plotting._mpl_ge_2_0_0() + self.mpl_ge_2_0_1 = plotting._mpl_ge_2_0_1() if self.mpl_ge_1_4_0: self.bp_n_objects = 7 diff --git a/pandas/tests/plotting/test_hist_method.py b/pandas/tests/plotting/test_hist_method.py index 22de7055e3cea..380bdc12abce4 100644 --- a/pandas/tests/plotting/test_hist_method.py +++ b/pandas/tests/plotting/test_hist_method.py @@ -241,8 +241,8 @@ def test_hist_layout(self): @slow # GH 9351 def test_tight_layout(self): - if self.mpl_ge_2_0_0: - df = DataFrame(randn(100, 2)) + if self.mpl_ge_2_0_1: + df = DataFrame(randn(100, 3)) _check_plot_works(df.hist) self.plt.tight_layout() diff --git a/pandas/tools/plotting.py b/pandas/tools/plotting.py index d46c38c117445..d311b0e6d83eb 100644 --- a/pandas/tools/plotting.py +++ b/pandas/tools/plotting.py @@ -150,6 +150,14 @@ def _mpl_ge_2_0_0(): return False +def _mpl_ge_2_0_1(): + try: + import matplotlib + return matplotlib.__version__ >= LooseVersion('2.0.1') + except ImportError: + return False + + if _mpl_ge_1_5_0(): # Compat with mp 1.5, which uses cycler. import cycler From 05d70f4e617a274813bdb02db69143b5554aa106 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Mon, 13 Mar 2017 19:49:42 -0400 Subject: [PATCH 200/933] DOC: use shared docs on Index._convert_list_indexer (#15678) CLN: push key coercion to the indexes themselves to simplify a bit --- pandas/core/indexing.py | 86 ++++++++++---------------------------- pandas/indexes/base.py | 37 ++++++++++++++++ pandas/indexes/category.py | 19 ++++++--- pandas/indexes/multi.py | 33 +++++++++++++++ pandas/indexes/numeric.py | 1 + 5 files changed, 106 insertions(+), 70 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 546cbd8337e7e..19b7771251da3 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -7,7 +7,6 @@ from pandas.types.generic import ABCDataFrame, ABCPanel, ABCSeries from pandas.types.common import (is_integer_dtype, is_integer, is_float, - is_categorical_dtype, is_list_like, is_sequence, is_iterator, @@ -1087,51 +1086,24 @@ def _getitem_iterable(self, key, axis=0): inds, = key.nonzero() return self.obj.take(inds, axis=axis, convert=False) else: - if isinstance(key, Index): - keyarr = labels._convert_index_indexer(key) - else: - keyarr = _asarray_tuplesafe(key) - keyarr = labels._convert_arr_indexer(keyarr) - - if is_categorical_dtype(labels): - keyarr = labels._shallow_copy(keyarr) - - # have the index handle the indexer and possibly return - # an indexer or raising - indexer = labels._convert_list_indexer(keyarr, kind=self.name) + # Have the index compute an indexer or return None + # if it cannot handle + indexer, keyarr = labels._convert_listlike_indexer( + key, kind=self.name) if indexer is not None: return self.obj.take(indexer, axis=axis) - # this is not the most robust, but... - if (isinstance(labels, MultiIndex) and len(keyarr) and - not isinstance(keyarr[0], tuple)): - level = 0 - else: - level = None - # existing labels are unique and indexer are unique if labels.is_unique and Index(keyarr).is_unique: try: - result = self.obj.reindex_axis(keyarr, axis=axis, - level=level) - - # this is an error as we are trying to find - # keys in a multi-index that don't exist - if isinstance(labels, MultiIndex) and level is not None: - if (hasattr(result, 'ndim') and - not np.prod(result.shape) and len(keyarr)): - raise KeyError("cannot index a multi-index axis " - "with these keys") - - return result - + return self.obj.reindex_axis(keyarr, axis=axis) except AttributeError: # Series if axis != 0: raise AssertionError('axis must be 0') - return self.obj.reindex(keyarr, level=level) + return self.obj.reindex(keyarr) # existing labels are non-unique else: @@ -1225,49 +1197,33 @@ def _convert_to_indexer(self, obj, axis=0, is_setter=False): if is_nested_tuple(obj, labels): return labels.get_locs(obj) + elif is_list_like_indexer(obj): + if is_bool_indexer(obj): obj = check_bool_indexer(labels, obj) inds, = obj.nonzero() return inds else: - if isinstance(obj, Index): - # want Index objects to pass through untouched - objarr = obj - else: - objarr = _asarray_tuplesafe(obj) - # The index may want to handle a list indexer differently - # by returning an indexer or raising - indexer = labels._convert_list_indexer(objarr, kind=self.name) + # Have the index compute an indexer or return None + # if it cannot handle + indexer, objarr = labels._convert_listlike_indexer( + obj, kind=self.name) if indexer is not None: return indexer - # this is not the most robust, but... - if (isinstance(labels, MultiIndex) and - not isinstance(objarr[0], tuple)): - level = 0 - _, indexer = labels.reindex(objarr, level=level) + # unique index + if labels.is_unique: + indexer = check = labels.get_indexer(objarr) - # take all - if indexer is None: - indexer = np.arange(len(labels)) - - check = labels.levels[0].get_indexer(objarr) + # non-unique (dups) else: - level = None - - # unique index - if labels.is_unique: - indexer = check = labels.get_indexer(objarr) - - # non-unique (dups) - else: - (indexer, - missing) = labels.get_indexer_non_unique(objarr) - # 'indexer' has dupes, create 'check' using 'missing' - check = np.zeros_like(objarr) - check[missing] = -1 + (indexer, + missing) = labels.get_indexer_non_unique(objarr) + # 'indexer' has dupes, create 'check' using 'missing' + check = np.zeros_like(objarr) + check[missing] = -1 mask = check == -1 if mask.any(): diff --git a/pandas/indexes/base.py b/pandas/indexes/base.py index 7f46f437489a1..5b942e2565c29 100644 --- a/pandas/indexes/base.py +++ b/pandas/indexes/base.py @@ -1339,6 +1339,27 @@ def is_int(v): return indexer + def _convert_listlike_indexer(self, keyarr, kind=None): + """ + Parameters + ---------- + keyarr : list-like + Indexer to convert. + + Returns + ------- + tuple (indexer, keyarr) + indexer is an ndarray or None if cannot convert + keyarr are tuple-safe keys + """ + if isinstance(keyarr, Index): + keyarr = self._convert_index_indexer(keyarr) + else: + keyarr = self._convert_arr_indexer(keyarr) + + indexer = self._convert_list_indexer(keyarr, kind=kind) + return indexer, keyarr + _index_shared_docs['_convert_arr_indexer'] = """ Convert an array-like indexer to the appropriate dtype. @@ -1354,6 +1375,7 @@ def is_int(v): @Appender(_index_shared_docs['_convert_arr_indexer']) def _convert_arr_indexer(self, keyarr): + keyarr = _asarray_tuplesafe(keyarr) return keyarr _index_shared_docs['_convert_index_indexer'] = """ @@ -1373,6 +1395,21 @@ def _convert_arr_indexer(self, keyarr): def _convert_index_indexer(self, keyarr): return keyarr + _index_shared_docs['_convert_list_indexer'] = """ + Convert a list-like indexer to the appropriate dtype. + + Parameters + ---------- + keyarr : Index (or sub-class) + Indexer to convert. + kind : iloc, ix, loc, optional + + Returns + ------- + positional indexer or None + """ + + @Appender(_index_shared_docs['_convert_list_indexer']) def _convert_list_indexer(self, keyarr, kind=None): """ passed a key that is tuplesafe that is integer based diff --git a/pandas/indexes/category.py b/pandas/indexes/category.py index 3d8f76fc56b01..923dd4ec785c5 100644 --- a/pandas/indexes/category.py +++ b/pandas/indexes/category.py @@ -18,6 +18,8 @@ import pandas.core.base as base import pandas.core.missing as missing import pandas.indexes.base as ibase +from pandas.core.common import _asarray_tuplesafe + _index_doc_kwargs = dict(ibase._index_doc_kwargs) _index_doc_kwargs.update(dict(target_klass='CategoricalIndex')) @@ -458,12 +460,10 @@ def get_indexer_non_unique(self, target): codes = self.categories.get_indexer(target) return self._engine.get_indexer_non_unique(codes) + @Appender(_index_shared_docs['_convert_list_indexer']) def _convert_list_indexer(self, keyarr, kind=None): - """ - we are passed a list indexer. - Return our indexer or raise if all of the values are not included in - the categories - """ + # Return our indexer or raise if all of the values are not included in + # the categories codes = self.categories.get_indexer(keyarr) if (codes == -1).any(): raise KeyError("a list-indexer must only include values that are " @@ -471,6 +471,15 @@ def _convert_list_indexer(self, keyarr, kind=None): return None + @Appender(_index_shared_docs['_convert_arr_indexer']) + def _convert_arr_indexer(self, keyarr): + keyarr = _asarray_tuplesafe(keyarr) + return self._shallow_copy(keyarr) + + @Appender(_index_shared_docs['_convert_index_indexer']) + def _convert_index_indexer(self, keyarr): + return self._shallow_copy(keyarr) + @Appender(_index_shared_docs['take'] % _index_doc_kwargs) def take(self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs): diff --git a/pandas/indexes/multi.py b/pandas/indexes/multi.py index bca1db83b6645..1c1609fed1dd1 100644 --- a/pandas/indexes/multi.py +++ b/pandas/indexes/multi.py @@ -1568,6 +1568,39 @@ def sortlevel(self, level=0, ascending=True, sort_remaining=True): return new_index, indexer + def _convert_listlike_indexer(self, keyarr, kind=None): + """ + Parameters + ---------- + keyarr : list-like + Indexer to convert. + + Returns + ------- + tuple (indexer, keyarr) + indexer is an ndarray or None if cannot convert + keyarr are tuple-safe keys + """ + indexer, keyarr = super(MultiIndex, self)._convert_listlike_indexer( + keyarr, kind=kind) + + # are we indexing a specific level + if indexer is None and len(keyarr) and not isinstance(keyarr[0], + tuple): + level = 0 + _, indexer = self.reindex(keyarr, level=level) + + # take all + if indexer is None: + indexer = np.arange(len(self)) + + check = self.levels[0].get_indexer(keyarr) + mask = check == -1 + if mask.any(): + raise KeyError('%s not in index' % keyarr[mask]) + + return indexer, keyarr + @Appender(_index_shared_docs['get_indexer'] % _index_doc_kwargs) def get_indexer(self, target, method=None, limit=None, tolerance=None): method = missing.clean_reindex_fill_method(method) diff --git a/pandas/indexes/numeric.py b/pandas/indexes/numeric.py index 9bb70feb2501f..2f897c81975c2 100644 --- a/pandas/indexes/numeric.py +++ b/pandas/indexes/numeric.py @@ -203,6 +203,7 @@ def _convert_arr_indexer(self, keyarr): # Cast the indexer to uint64 if possible so # that the values returned from indexing are # also uint64. + keyarr = _asarray_tuplesafe(keyarr) if is_integer_dtype(keyarr): return _asarray_tuplesafe(keyarr, dtype=np.uint64) return keyarr From 7d34d4d5c2d2c6c68b4124076571cfab9c3b4aee Mon Sep 17 00:00:00 2001 From: Jaehoon Hwang Date: Tue, 14 Mar 2017 08:28:05 -0400 Subject: [PATCH 201/933] BUG: upcasting on reshaping ops #13247 Original work done by @jennolsen84, in #13337 closes #13247 Author: Jaehoon Hwang Author: Jae Closes #15594 from jaehoonhwang/Bug13247 and squashes the following commits: 3cd1734 [Jaehoon Hwang] Pass the non-related tests in test_partial and test_reshape 1fa578b [Jaehoon Hwang] Applying request changes removing unnecessary test and renameing 6744636 [Jaehoon Hwang] Merge remote-tracking branch 'pandas-dev/master' into Bug13247 5bb72c7 [Jaehoon Hwang] Merge remote-tracking branch 'pandas-dev/master' into Bug13247 a1d5d40 [Jaehoon Hwang] Completed pytest 8122359 [Jaehoon Hwang] Merge remote-tracking branch 'pandas-dev/master' into Bug13247 0e52b74 [Jaehoon Hwang] Working: Except for pytest 8fec07c [Jaehoon Hwang] Fix: test_concat.py and internals.py 4f6c03e [Jaehoon Hwang] Fix: is_float_dtypes and is_numeric_dtype wrong place d3476c0 [Jaehoon Hwang] Merge branch 'master' into Bug13247 b977615 [Jaehoon Hwang] Merge remote-tracking branch 'pandas-dev/master' 4b1e5c6 [Jaehoon Hwang] Merge remote-tracking branch 'pandas-dev/master' into Bug13247 45f7ae9 [Jaehoon Hwang] Added pytest function 468baee [Jae] BUG: upcasting on reshaping ops #13247 --- doc/source/whatsnew/v0.20.0.txt | 2 ++ pandas/core/internals.py | 20 ++++++++++++++++---- pandas/tests/indexing/test_partial.py | 2 +- pandas/tests/test_internals.py | 2 +- pandas/tests/test_reshape.py | 1 + pandas/tests/tools/test_concat.py | 14 ++++++++++++++ 6 files changed, 35 insertions(+), 6 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 8a4f2f47b9853..097efdd097eec 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -886,3 +886,5 @@ Bug Fixes - Bug in ``pd.melt()`` where passing a tuple value for ``value_vars`` caused a ``TypeError`` (:issue:`15348`) - Bug in ``.eval()`` which caused multiline evals to fail with local variables not on the first line (:issue:`15342`) - Bug in ``pd.read_msgpack`` which did not allow to load dataframe with an index of type ``CategoricalIndex`` (:issue:`15487`) + +- Concating multiple objects will no longer result in automatically upcast to `float64`, and instead try to find the smallest `dtype` that would suffice (:issue:`13247`) diff --git a/pandas/core/internals.py b/pandas/core/internals.py index aa954fbee9a60..1c070b3ed34a9 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -21,6 +21,7 @@ is_datetime64tz_dtype, is_object_dtype, is_datetimelike_v_numeric, + is_float_dtype, is_numeric_dtype, is_numeric_v_string_like, is_extension_type, is_list_like, is_re, @@ -4522,6 +4523,8 @@ def _interleaved_dtype(blocks): return np.dtype('int%s' % (lcd.itemsize * 8 * 2)) return lcd + elif have_int and have_float and not have_complex: + return np.dtype('float64') elif have_complex: return np.dtype('c16') else: @@ -4891,6 +4894,8 @@ def get_empty_dtype_and_na(join_units): upcast_cls = 'datetime' elif is_timedelta64_dtype(dtype): upcast_cls = 'timedelta' + elif is_float_dtype(dtype) or is_numeric_dtype(dtype): + upcast_cls = dtype.name else: upcast_cls = 'float' @@ -4915,8 +4920,6 @@ def get_empty_dtype_and_na(join_units): return np.dtype(np.bool_), None elif 'category' in upcast_classes: return np.dtype(np.object_), np.nan - elif 'float' in upcast_classes: - return np.dtype(np.float64), np.nan elif 'datetimetz' in upcast_classes: dtype = upcast_classes['datetimetz'] return dtype[0], tslib.iNaT @@ -4925,7 +4928,17 @@ def get_empty_dtype_and_na(join_units): elif 'timedelta' in upcast_classes: return np.dtype('m8[ns]'), tslib.iNaT else: # pragma - raise AssertionError("invalid dtype determination in get_concat_dtype") + g = np.find_common_type(upcast_classes, []) + if is_float_dtype(g): + return g, g.type(np.nan) + elif is_numeric_dtype(g): + if has_none_blocks: + return np.float64, np.nan + else: + return g, None + else: + msg = "invalid dtype determination in get_concat_dtype" + raise AssertionError(msg) def concatenate_join_units(join_units, concat_axis, copy): @@ -5190,7 +5203,6 @@ def is_null(self): return True def get_reindexed_values(self, empty_dtype, upcasted_na): - if upcasted_na is None: # No upcasting is necessary fill_value = self.block.fill_value diff --git a/pandas/tests/indexing/test_partial.py b/pandas/tests/indexing/test_partial.py index a00f880ff6591..b92ffbfb6fe59 100644 --- a/pandas/tests/indexing/test_partial.py +++ b/pandas/tests/indexing/test_partial.py @@ -210,7 +210,7 @@ def f(): df.loc[3] = [6, 7] exp = DataFrame([[6, 7]], index=[3], columns=['A', 'B'], - dtype='float64') + dtype='object') tm.assert_frame_equal(df, exp) def test_series_partial_set(self): diff --git a/pandas/tests/test_internals.py b/pandas/tests/test_internals.py index 5ab2bbc4ac6ba..df5e843097514 100644 --- a/pandas/tests/test_internals.py +++ b/pandas/tests/test_internals.py @@ -651,7 +651,7 @@ def test_interleave(self): mgr = create_mgr('a: f8; b: i8') self.assertEqual(mgr.as_matrix().dtype, 'f8') mgr = create_mgr('a: f4; b: i8') - self.assertEqual(mgr.as_matrix().dtype, 'f4') + self.assertEqual(mgr.as_matrix().dtype, 'f8') mgr = create_mgr('a: f4; b: i8; d: object') self.assertEqual(mgr.as_matrix().dtype, 'object') mgr = create_mgr('a: bool; b: i8') diff --git a/pandas/tests/test_reshape.py b/pandas/tests/test_reshape.py index d587e4ea6a1fa..24e26be15a44b 100644 --- a/pandas/tests/test_reshape.py +++ b/pandas/tests/test_reshape.py @@ -250,6 +250,7 @@ def test_basic_types(self): self.assertEqual(type(r), exp_df_type) r = get_dummies(s_df, sparse=self.sparse, columns=['a']) + exp_blk_type = pd.core.internals.IntBlock self.assertEqual(type(r[['a_0']]._data.blocks[0]), exp_blk_type) self.assertEqual(type(r[['a_1']]._data.blocks[0]), exp_blk_type) self.assertEqual(type(r[['a_2']]._data.blocks[0]), exp_blk_type) diff --git a/pandas/tests/tools/test_concat.py b/pandas/tests/tools/test_concat.py index a2b5773f551c9..a0b22892e74c5 100644 --- a/pandas/tests/tools/test_concat.py +++ b/pandas/tests/tools/test_concat.py @@ -13,6 +13,8 @@ makeCustomDataframe as mkdf, assert_almost_equal) +import pytest + class ConcatenateBase(tm.TestCase): @@ -1899,3 +1901,15 @@ def test_concat_multiindex_dfs_with_deepcopy(self): tm.assert_frame_equal(result_copy, expected) result_no_copy = pd.concat(example_dict, names=['testname']) tm.assert_frame_equal(result_no_copy, expected) + + +@pytest.mark.parametrize('pdt', [pd.Series, pd.DataFrame, pd.Panel]) +@pytest.mark.parametrize('dt', np.sctypes['float']) +def test_concat_no_unnecessary_upcast(dt, pdt): + # GH 13247 + dims = pdt().ndim + dfs = [pdt(np.array([1], dtype=dt, ndmin=dims)), + pdt(np.array([np.nan], dtype=dt, ndmin=dims)), + pdt(np.array([5], dtype=dt, ndmin=dims))] + x = pd.concat(dfs) + assert x.values.dtype == dt From c7c74ad7b2fc33f68e59a7a4f677ce48c2829b18 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 14 Mar 2017 08:35:38 -0400 Subject: [PATCH 202/933] DOC/TST: clean up docs & tests, xref #15594 BUG: default_fill_value for get_dummies will be 0 --- doc/source/whatsnew/v0.20.0.txt | 37 +++++++++++++++++++++-- pandas/core/internals.py | 6 ++-- pandas/core/reshape.py | 3 +- pandas/tests/indexing/test_partial.py | 2 +- pandas/tests/test_reshape.py | 42 +++++++++++++++------------ pandas/tests/tools/test_concat.py | 11 +++++++ 6 files changed, 74 insertions(+), 27 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 097efdd097eec..a509e45b13d9a 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -516,6 +516,39 @@ New Behavior: In [5]: df['a']['2011-12-31 23:59:59'] Out[5]: 1 +.. _whatsnew_0200.api_breaking.concat_dtypes: + +Concat of different float dtypes will not automatically upcast +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Previously, ``concat`` of multiple objects with different ``float`` dtypes would automatically upcast results to a dtype of ``float64``. +Now the smallest acceptable dtype will be used (:issue:`13247`) + +.. ipython:: python + + df1 = pd.DataFrame(np.array([1.0], dtype=np.float32, ndmin=2)) + df1.dtypes + +.. ipython:: python + + df2 = pd.DataFrame(np.array([np.nan], dtype=np.float32, ndmin=2)) + df2.dtypes + +Previous Behavior: + +.. code-block:: ipython + + In [7]: pd.concat([df1,df2]).dtypes + Out[7]: + 0 float64 + dtype: object + +New Behavior: + +.. ipython:: python + + pd.concat([df1,df2]).dtypes + .. _whatsnew_0200.api_breaking.gbq: Pandas Google BigQuery support has moved @@ -693,6 +726,7 @@ Other API Changes - Specific support for ``copy.copy()`` and ``copy.deepcopy()`` functions on NDFrame objects (:issue:`15444`) - ``Series.sort_values()`` accepts a one element list of bool for consistency with the behavior of ``DataFrame.sort_values()`` (:issue:`15604`) - ``.merge()`` and ``.join()`` on ``category`` dtype columns will now preserve the category dtype when possible (:issue:`10409`) +- ``SparseDataFrame.default_fill_value`` will be 0, previously was ``nan`` in the return from ``pd.get_dummies(..., sparse=True)`` (:issue:`15594`) .. _whatsnew_0200.deprecations: @@ -784,7 +818,6 @@ Bug Fixes - Bug in ``pd.qcut()`` with a single quantile and an array with identical values (:issue:`15431`) - - Bug in the display of ``.info()`` where a qualifier (+) would always be displayed with a ``MultiIndex`` that contains only non-strings (:issue:`15245`) - Bug in ``.asfreq()``, where frequency was not set for empty ``Series`` (:issue:`14320`) @@ -886,5 +919,3 @@ Bug Fixes - Bug in ``pd.melt()`` where passing a tuple value for ``value_vars`` caused a ``TypeError`` (:issue:`15348`) - Bug in ``.eval()`` which caused multiline evals to fail with local variables not on the first line (:issue:`15342`) - Bug in ``pd.read_msgpack`` which did not allow to load dataframe with an index of type ``CategoricalIndex`` (:issue:`15487`) - -- Concating multiple objects will no longer result in automatically upcast to `float64`, and instead try to find the smallest `dtype` that would suffice (:issue:`13247`) diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 1c070b3ed34a9..0e6c176d950a1 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -4936,9 +4936,9 @@ def get_empty_dtype_and_na(join_units): return np.float64, np.nan else: return g, None - else: - msg = "invalid dtype determination in get_concat_dtype" - raise AssertionError(msg) + + msg = "invalid dtype determination in get_concat_dtype" + raise AssertionError(msg) def concatenate_join_units(join_units, concat_axis, copy): diff --git a/pandas/core/reshape.py b/pandas/core/reshape.py index 3279a8f2be39d..1e685ae6895ad 100644 --- a/pandas/core/reshape.py +++ b/pandas/core/reshape.py @@ -1308,7 +1308,7 @@ def get_empty_Frame(data, sparse): if not sparse: return DataFrame(index=index) else: - return SparseDataFrame(index=index) + return SparseDataFrame(index=index, default_fill_value=0) # if all NaN if not dummy_na and len(levels) == 0: @@ -1357,6 +1357,7 @@ def get_empty_Frame(data, sparse): sparse_series[col] = SparseSeries(data=sarr, index=index) out = SparseDataFrame(sparse_series, index=index, columns=dummy_cols, + default_fill_value=0, dtype=np.uint8) return out diff --git a/pandas/tests/indexing/test_partial.py b/pandas/tests/indexing/test_partial.py index b92ffbfb6fe59..31fadcc88583c 100644 --- a/pandas/tests/indexing/test_partial.py +++ b/pandas/tests/indexing/test_partial.py @@ -205,7 +205,7 @@ def f(): self.assertRaises(ValueError, f) - # these are coerced to float unavoidably (as its a list-like to begin) + # TODO: #15657, these are left as object and not coerced df = DataFrame(columns=['A', 'B']) df.loc[3] = [6, 7] diff --git a/pandas/tests/test_reshape.py b/pandas/tests/test_reshape.py index 24e26be15a44b..7ba743a6c425c 100644 --- a/pandas/tests/test_reshape.py +++ b/pandas/tests/test_reshape.py @@ -2,7 +2,6 @@ # pylint: disable-msg=W0612,E1101 from pandas import DataFrame, Series -from pandas.core.sparse import SparseDataFrame import pandas as pd from numpy import nan @@ -234,26 +233,31 @@ def test_basic_types(self): 'b': ['A', 'A', 'B', 'C', 'C'], 'c': [2, 3, 3, 3, 2]}) + expected = DataFrame({'a': [1, 0, 0], + 'b': [0, 1, 0], + 'c': [0, 0, 1]}, + dtype='uint8', + columns=list('abc')) if not self.sparse: - exp_df_type = DataFrame - exp_blk_type = pd.core.internals.IntBlock + compare = tm.assert_frame_equal else: - exp_df_type = SparseDataFrame - exp_blk_type = pd.core.internals.SparseBlock - - self.assertEqual( - type(get_dummies(s_list, sparse=self.sparse)), exp_df_type) - self.assertEqual( - type(get_dummies(s_series, sparse=self.sparse)), exp_df_type) - - r = get_dummies(s_df, sparse=self.sparse, columns=s_df.columns) - self.assertEqual(type(r), exp_df_type) - - r = get_dummies(s_df, sparse=self.sparse, columns=['a']) - exp_blk_type = pd.core.internals.IntBlock - self.assertEqual(type(r[['a_0']]._data.blocks[0]), exp_blk_type) - self.assertEqual(type(r[['a_1']]._data.blocks[0]), exp_blk_type) - self.assertEqual(type(r[['a_2']]._data.blocks[0]), exp_blk_type) + expected = expected.to_sparse(fill_value=0, kind='integer') + compare = tm.assert_sp_frame_equal + + result = get_dummies(s_list, sparse=self.sparse) + compare(result, expected) + + result = get_dummies(s_series, sparse=self.sparse) + compare(result, expected) + + result = get_dummies(s_df, sparse=self.sparse, columns=s_df.columns) + tm.assert_series_equal(result.get_dtype_counts(), + Series({'uint8': 8})) + + result = get_dummies(s_df, sparse=self.sparse, columns=['a']) + expected = Series({'uint8': 3, 'int64': 1, 'object': 1}).sort_values() + tm.assert_series_equal(result.get_dtype_counts().sort_values(), + expected) def test_just_na(self): just_na_list = [np.nan] diff --git a/pandas/tests/tools/test_concat.py b/pandas/tests/tools/test_concat.py index a0b22892e74c5..392036a99a297 100644 --- a/pandas/tests/tools/test_concat.py +++ b/pandas/tests/tools/test_concat.py @@ -1913,3 +1913,14 @@ def test_concat_no_unnecessary_upcast(dt, pdt): pdt(np.array([5], dtype=dt, ndmin=dims))] x = pd.concat(dfs) assert x.values.dtype == dt + + +@pytest.mark.parametrize('pdt', [pd.Series, pd.DataFrame, pd.Panel]) +@pytest.mark.parametrize('dt', np.sctypes['int']) +def test_concat_will_upcast(dt, pdt): + dims = pdt().ndim + dfs = [pdt(np.array([1], dtype=dt, ndmin=dims)), + pdt(np.array([np.nan], ndmin=dims)), + pdt(np.array([5], dtype=dt, ndmin=dims))] + x = pd.concat(dfs) + assert x.values.dtype == 'float64' From 2621b31c7dbd68126867266d2b2e32d3e5e222d5 Mon Sep 17 00:00:00 2001 From: "Christopher C. Aycock" Date: Tue, 14 Mar 2017 10:05:38 -0400 Subject: [PATCH 203/933] BUG: Allow multiple 'by' parameters in merge_asof() when DataFrames are indexed (#15676) closes #15676 Author: Christopher C. Aycock Closes #15679 from chrisaycock/GH15676 and squashes the following commits: 965caf2 [Christopher C. Aycock] Verify that 'by' parameters are the same length 4a2cc09 [Christopher C. Aycock] BUG: Allow multiple 'by' parameters in merge_asof() when DataFrames are indexed (#15676) --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/tests/tools/test_merge_asof.py | 35 +++++++++++++++++++++++++++ pandas/tools/merge.py | 25 +++++++++++++------ 3 files changed, 54 insertions(+), 7 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index a509e45b13d9a..3548cbf6eb4a7 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -884,6 +884,7 @@ Bug Fixes - Bug in the HTML display with with a ``MultiIndex`` and truncation (:issue:`14882`) +- Bug in ``pd.merge_asof()`` where ``left_index`` or ``right_index`` caused a failure when multiple ``by`` was specified (:issue:`15676`) - Bug in ``pd.merge_asof()`` where ``left_index``/``right_index`` together caused a failure when ``tolerance`` was specified (:issue:`15135`) - Bug in ``DataFrame.pivot_table()`` where ``dropna=True`` would not drop all-NaN columns when the columns was a ``category`` dtype (:issue:`15193`) diff --git a/pandas/tests/tools/test_merge_asof.py b/pandas/tests/tools/test_merge_asof.py index cdff8f0349c15..c9460cc74c94a 100644 --- a/pandas/tests/tools/test_merge_asof.py +++ b/pandas/tests/tools/test_merge_asof.py @@ -368,6 +368,41 @@ def test_multiby_heterogeneous_types(self): by=['ticker', 'exch']) assert_frame_equal(result, expected) + def test_multiby_indexed(self): + # GH15676 + left = pd.DataFrame([ + [pd.to_datetime('20160602'), 1, 'a'], + [pd.to_datetime('20160602'), 2, 'a'], + [pd.to_datetime('20160603'), 1, 'b'], + [pd.to_datetime('20160603'), 2, 'b']], + columns=['time', 'k1', 'k2']).set_index('time') + + right = pd.DataFrame([ + [pd.to_datetime('20160502'), 1, 'a', 1.0], + [pd.to_datetime('20160502'), 2, 'a', 2.0], + [pd.to_datetime('20160503'), 1, 'b', 3.0], + [pd.to_datetime('20160503'), 2, 'b', 4.0]], + columns=['time', 'k1', 'k2', 'value']).set_index('time') + + expected = pd.DataFrame([ + [pd.to_datetime('20160602'), 1, 'a', 1.0], + [pd.to_datetime('20160602'), 2, 'a', 2.0], + [pd.to_datetime('20160603'), 1, 'b', 3.0], + [pd.to_datetime('20160603'), 2, 'b', 4.0]], + columns=['time', 'k1', 'k2', 'value']).set_index('time') + + result = pd.merge_asof(left, + right, + left_index=True, + right_index=True, + by=['k1', 'k2']) + + assert_frame_equal(expected, result) + + with self.assertRaises(MergeError): + pd.merge_asof(left, right, left_index=True, right_index=True, + left_by=['k1', 'k2'], right_by=['k1']) + def test_basic2(self): expected = self.read_data('asof2.csv') diff --git a/pandas/tools/merge.py b/pandas/tools/merge.py index d02f4c5b26c86..261884bba54bd 100644 --- a/pandas/tools/merge.py +++ b/pandas/tools/merge.py @@ -1165,7 +1165,7 @@ def _validate_specification(self): if self.left_by is not None and self.right_by is None: raise MergeError('missing right_by') - # add by to our key-list so we can have it in the + # add 'by' to our key-list so we can have it in the # output as a key if self.left_by is not None: if not is_list_like(self.left_by): @@ -1173,6 +1173,9 @@ def _validate_specification(self): if not is_list_like(self.right_by): self.right_by = [self.right_by] + if len(self.left_by) != len(self.right_by): + raise MergeError('left_by and right_by must be same length') + self.left_on = self.left_by + list(self.left_on) self.right_on = self.right_by + list(self.right_on) @@ -1264,13 +1267,21 @@ def flip(xs): # a "by" parameter requires special handling if self.left_by is not None: - if len(self.left_join_keys) > 2: - # get tuple representation of values if more than one - left_by_values = flip(self.left_join_keys[0:-1]) - right_by_values = flip(self.right_join_keys[0:-1]) + # remove 'on' parameter from values if one existed + if self.left_index and self.right_index: + left_by_values = self.left_join_keys + right_by_values = self.right_join_keys + else: + left_by_values = self.left_join_keys[0:-1] + right_by_values = self.right_join_keys[0:-1] + + # get tuple representation of values if more than one + if len(left_by_values) == 1: + left_by_values = left_by_values[0] + right_by_values = right_by_values[0] else: - left_by_values = self.left_join_keys[0] - right_by_values = self.right_join_keys[0] + left_by_values = flip(left_by_values) + right_by_values = flip(right_by_values) # upcast 'by' parameter because HashTable is limited by_type = _get_cython_type_upcast(left_by_values.dtype) From 2cad4dd0b48946add99d3d90e3dba958f2885349 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 14 Mar 2017 11:06:20 -0400 Subject: [PATCH 204/933] DOC: elevate deprecations / removals to top-level of whatsnew doc to promote visibility --- doc/source/whatsnew/v0.20.0.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 3548cbf6eb4a7..9c6f5d3e0596d 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -731,7 +731,7 @@ Other API Changes .. _whatsnew_0200.deprecations: Deprecations -^^^^^^^^^^^^ +~~~~~~~~~~~~ - ``SparseArray.to_dense()`` has deprecated the ``fill`` parameter, as that parameter was not being respected (:issue:`14647`) - ``SparseSeries.to_dense()`` has deprecated the ``sparse_only`` parameter (:issue:`14647`) @@ -753,7 +753,7 @@ Deprecations .. _whatsnew_0200.prior_deprecations: Removal of prior version deprecations/changes -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - The ``pandas.rpy`` module is removed. Similar functionality can be accessed through the `rpy2 `__ project. From 76e5185a5ad07672688b096acc94ad5a8a2ec18d Mon Sep 17 00:00:00 2001 From: Yimeng Zhang Date: Wed, 15 Mar 2017 09:26:37 -0400 Subject: [PATCH 205/933] compatibility with scipy 0.19 fix #15662 Author: Yimeng Zhang Closes #15689 from zym1010/fix_scipy019 and squashes the following commits: 3cc6528 [Yimeng Zhang] doc and PEP8 9ed7524 [Yimeng Zhang] fix interpolation related issue with scipy 0.19 ca09705 [Yimeng Zhang] get symmetric window --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/core/window.py | 3 ++- pandas/tests/frame/test_missing.py | 33 ++++++++++++++++++++--------- pandas/tests/series/test_missing.py | 17 +++++++++++++-- pandas/tests/test_window.py | 10 ++++----- 5 files changed, 46 insertions(+), 18 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 9c6f5d3e0596d..2a6c8a1e26955 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -816,6 +816,7 @@ Bug Fixes - Bug in ``Rolling.quantile`` function that caused a segmentation fault when called with a quantile value outside of the range [0, 1] (:issue:`15463`) - Bug in ``pd.cut()`` with a single bin on an all 0s array (:issue:`15428`) - Bug in ``pd.qcut()`` with a single quantile and an array with identical values (:issue:`15431`) +- Compat with SciPy 0.19.0 for testing on ``.interpolate()`` (:issue:`15662`) - Bug in the display of ``.info()`` where a qualifier (+) would always be displayed with a ``MultiIndex`` that contains only non-strings (:issue:`15245`) diff --git a/pandas/core/window.py b/pandas/core/window.py index 6fda60c449f42..9c9f861451309 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -544,7 +544,8 @@ def _pop_args(win_type, arg_names, kwargs): return all_args win_type = _validate_win_type(self.win_type, kwargs) - return sig.get_window(win_type, window).astype(float) + # GH #15662. `False` makes symmetric window, rather than periodic. + return sig.get_window(win_type, window, False).astype(float) def _apply_window(self, mean=True, how=None, **kwargs): """ diff --git a/pandas/tests/frame/test_missing.py b/pandas/tests/frame/test_missing.py index 923ed2e7c3444..93c3ba78a0abf 100644 --- a/pandas/tests/frame/test_missing.py +++ b/pandas/tests/frame/test_missing.py @@ -19,6 +19,13 @@ from pandas.tests.frame.common import TestData, _check_mixed_float +try: + import scipy + _is_scipy_ge_0190 = scipy.__version__ >= LooseVersion('0.19.0') +except: + _is_scipy_ge_0190 = False + + def _skip_if_no_pchip(): try: from scipy.interpolate import pchip_interpolate # noqa @@ -548,7 +555,7 @@ def test_interp_nan_idx(self): df.interpolate(method='values') def test_interp_various(self): - tm.skip_if_no_package('scipy', max_version='0.19.0') + tm._skip_if_no_scipy() df = DataFrame({'A': [1, 2, np.nan, 4, 5, np.nan, 7], 'C': [1, 2, 3, 5, 8, 13, 21]}) @@ -561,8 +568,15 @@ def test_interp_various(self): assert_frame_equal(result, expected) result = df.interpolate(method='cubic') - expected.A.loc[3] = 2.81621174 - expected.A.loc[13] = 5.64146581 + # GH #15662. + # new cubic and quadratic interpolation algorithms from scipy 0.19.0. + # previously `splmake` was used. See scipy/scipy#6710 + if _is_scipy_ge_0190: + expected.A.loc[3] = 2.81547781 + expected.A.loc[13] = 5.52964175 + else: + expected.A.loc[3] = 2.81621174 + expected.A.loc[13] = 5.64146581 assert_frame_equal(result, expected) result = df.interpolate(method='nearest') @@ -571,8 +585,12 @@ def test_interp_various(self): assert_frame_equal(result, expected, check_dtype=False) result = df.interpolate(method='quadratic') - expected.A.loc[3] = 2.82533638 - expected.A.loc[13] = 6.02817974 + if _is_scipy_ge_0190: + expected.A.loc[3] = 2.82150771 + expected.A.loc[13] = 6.12648668 + else: + expected.A.loc[3] = 2.82533638 + expected.A.loc[13] = 6.02817974 assert_frame_equal(result, expected) result = df.interpolate(method='slinear') @@ -585,11 +603,6 @@ def test_interp_various(self): expected.A.loc[13] = 5 assert_frame_equal(result, expected, check_dtype=False) - result = df.interpolate(method='quadratic') - expected.A.loc[3] = 2.82533638 - expected.A.loc[13] = 6.02817974 - assert_frame_equal(result, expected) - def test_interp_alt_scipy(self): tm._skip_if_no_scipy() df = DataFrame({'A': [1, 2, np.nan, 4, 5, np.nan, 7], diff --git a/pandas/tests/series/test_missing.py b/pandas/tests/series/test_missing.py index 9e997da517bf6..7174283494fe7 100644 --- a/pandas/tests/series/test_missing.py +++ b/pandas/tests/series/test_missing.py @@ -4,6 +4,7 @@ import pytz from datetime import timedelta, datetime +from distutils.version import LooseVersion from numpy import nan import numpy as np import pandas as pd @@ -17,6 +18,12 @@ from .common import TestData +try: + import scipy + _is_scipy_ge_0190 = scipy.__version__ >= LooseVersion('0.19.0') +except: + _is_scipy_ge_0190 = False + def _skip_if_no_pchip(): try: @@ -827,7 +834,7 @@ def test_interp_quad(self): assert_series_equal(result, expected) def test_interp_scipy_basic(self): - tm.skip_if_no_package('scipy', max_version='0.19.0') + tm._skip_if_no_scipy() s = Series([1, 3, np.nan, 12, np.nan, 25]) # slinear @@ -852,7 +859,13 @@ def test_interp_scipy_basic(self): result = s.interpolate(method='zero', downcast='infer') assert_series_equal(result, expected) # quadratic - expected = Series([1, 3., 6.769231, 12., 18.230769, 25.]) + # GH #15662. + # new cubic and quadratic interpolation algorithms from scipy 0.19.0. + # previously `splmake` was used. See scipy/scipy#6710 + if _is_scipy_ge_0190: + expected = Series([1, 3., 6.823529, 12., 18.058824, 25.]) + else: + expected = Series([1, 3., 6.769231, 12., 18.230769, 25.]) result = s.interpolate(method='quadratic') assert_series_equal(result, expected) diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index b7164d31b2a5e..3f2973a9834ca 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -905,7 +905,7 @@ def test_cmov_window_na_min_periods(self): def test_cmov_window_regular(self): # GH 8238 - tm.skip_if_no_package('scipy', max_version='0.19.0') + tm._skip_if_no_scipy() win_types = ['triang', 'blackman', 'hamming', 'bartlett', 'bohman', 'blackmanharris', 'nuttall', 'barthann'] @@ -938,7 +938,7 @@ def test_cmov_window_regular(self): def test_cmov_window_regular_linear_range(self): # GH 8238 - tm.skip_if_no_package('scipy', max_version='0.19.0') + tm._skip_if_no_scipy() win_types = ['triang', 'blackman', 'hamming', 'bartlett', 'bohman', 'blackmanharris', 'nuttall', 'barthann'] @@ -955,7 +955,7 @@ def test_cmov_window_regular_linear_range(self): def test_cmov_window_regular_missing_data(self): # GH 8238 - tm.skip_if_no_package('scipy', max_version='0.19.0') + tm._skip_if_no_scipy() win_types = ['triang', 'blackman', 'hamming', 'bartlett', 'bohman', 'blackmanharris', 'nuttall', 'barthann'] @@ -988,7 +988,7 @@ def test_cmov_window_regular_missing_data(self): def test_cmov_window_special(self): # GH 8238 - tm.skip_if_no_package('scipy', max_version='0.19.0') + tm._skip_if_no_scipy() win_types = ['kaiser', 'gaussian', 'general_gaussian', 'slepian'] kwds = [{'beta': 1.}, {'std': 1.}, {'power': 2., @@ -1015,7 +1015,7 @@ def test_cmov_window_special(self): def test_cmov_window_special_linear_range(self): # GH 8238 - tm.skip_if_no_package('scipy', max_version='0.19.0') + tm._skip_if_no_scipy() win_types = ['kaiser', 'gaussian', 'general_gaussian', 'slepian'] kwds = [{'beta': 1.}, {'std': 1.}, {'power': 2., From 68212918a65accffb33e0db6d986ad8f080e67ed Mon Sep 17 00:00:00 2001 From: John Zwinck Date: Wed, 15 Mar 2017 12:04:12 -0400 Subject: [PATCH 206/933] ENH: use constant f32 eps, not np.finfo() during import NumPy docs for np.finfo() say not to call it during import (at module scope). It's a relatively expensive call, and it modifies the GIL state. Now we just hard-code it, because it is always the value anyway. This avoids touching the GIL at import, which helps avoid deadlocks in practice. closes #14641 Author: John Zwinck Closes #15691 from jzwinck/patch-1 and squashes the following commits: dadb97c [John Zwinck] DOC: mention #14641 in 0.20.0 whatsnew e565230 [John Zwinck] ENH: use constant f32 eps, not np.finfo() during import --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/core/indexing.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 2a6c8a1e26955..41b6519eb740f 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -905,6 +905,7 @@ Bug Fixes - Bug in ``pd.read_csv()`` with ``float_precision='round_trip'`` which caused a segfault when a text entry is parsed (:issue:`15140`) +- Avoid use of ``np.finfo()`` during ``import pandas`` removed to mitigate deadlock on Python GIL misuse (:issue:`14641`) - Bug in ``DataFrame.to_stata()`` and ``StataWriter`` which produces incorrectly formatted files to be produced for some locales (:issue:`13856`) - Bug in ``pd.concat()`` in which concatting with an empty dataframe with ``join='inner'`` was being improperly handled (:issue:`15328`) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 19b7771251da3..c80e8c34aa88f 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1852,7 +1852,7 @@ def _convert_key(self, key, is_setter=False): # 32-bit floating point machine epsilon -_eps = np.finfo('f4').eps +_eps = 1.1920929e-07 def length_of_indexer(indexer, target=None): From e7956c45e11244cb1346f088697f3c494612bae4 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 15 Mar 2017 19:15:36 -0400 Subject: [PATCH 207/933] TST: reorg tests_multilevel.py tests --- pandas/tests/test_multilevel.py | 813 ++++++++++++++++---------------- 1 file changed, 411 insertions(+), 402 deletions(-) diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index d1b7fdadce6ae..d7b115d808312 100755 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -12,18 +12,15 @@ from pandas import Panel, DataFrame, Series, notnull, isnull, Timestamp from pandas.types.common import is_float_dtype, is_integer_dtype -from pandas.util.testing import (assert_almost_equal, assert_series_equal, - assert_frame_equal, assertRaisesRegexp) import pandas.core.common as com import pandas.util.testing as tm from pandas.compat import (range, lrange, StringIO, lzip, u, product as cart_product, zip) import pandas as pd - import pandas._libs.index as _index -class TestMultiLevel(tm.TestCase): +class Base(object): def setUp(self): @@ -58,6 +55,9 @@ def setUp(self): inplace=True) self.ymd.index.set_names(['year', 'month', 'day'], inplace=True) + +class TestMultiLevel(Base, tm.TestCase): + def test_append(self): a, b = self.frame[:5], self.frame[5:] @@ -87,19 +87,19 @@ def test_append_index(self): (1.2, datetime.datetime(2011, 1, 2, tzinfo=tz)), (1.3, datetime.datetime(2011, 1, 3, tzinfo=tz))] expected = Index([1.1, 1.2, 1.3] + expected_tuples) - self.assert_index_equal(result, expected) + tm.assert_index_equal(result, expected) result = midx_lv2.append(idx1) expected = Index(expected_tuples + [1.1, 1.2, 1.3]) - self.assert_index_equal(result, expected) + tm.assert_index_equal(result, expected) result = midx_lv2.append(midx_lv2) expected = MultiIndex.from_arrays([idx1.append(idx1), idx2.append(idx2)]) - self.assert_index_equal(result, expected) + tm.assert_index_equal(result, expected) result = midx_lv2.append(midx_lv3) - self.assert_index_equal(result, expected) + tm.assert_index_equal(result, expected) result = midx_lv3.append(midx_lv2) expected = Index._simple_new( @@ -107,7 +107,7 @@ def test_append_index(self): (1.2, datetime.datetime(2011, 1, 2, tzinfo=tz), 'B'), (1.3, datetime.datetime(2011, 1, 3, tzinfo=tz), 'C')] + expected_tuples), None) - self.assert_index_equal(result, expected) + tm.assert_index_equal(result, expected) def test_dataframe_constructor(self): multi = DataFrame(np.random.randn(4, 4), @@ -139,18 +139,18 @@ def test_reindex_level(self): result = month_sums.reindex(self.ymd.index, level=1) expected = self.ymd.groupby(level='month').transform(np.sum) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) # Series result = month_sums['A'].reindex(self.ymd.index, level=1) expected = self.ymd['A'].groupby(level='month').transform(np.sum) - assert_series_equal(result, expected, check_names=False) + tm.assert_series_equal(result, expected, check_names=False) # axis=1 month_sums = self.ymd.T.sum(axis=1, level='month') result = month_sums.reindex(columns=self.ymd.index, level=1) expected = self.ymd.groupby(level='month').transform(np.sum).T - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_binops_level(self): def _check_op(opname): @@ -160,7 +160,7 @@ def _check_op(opname): broadcasted = self.ymd.groupby(level='month').transform(np.sum) expected = op(self.ymd, broadcasted) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) # Series op = getattr(Series, opname) @@ -169,7 +169,7 @@ def _check_op(opname): np.sum) expected = op(self.ymd['A'], broadcasted) expected.name = 'A' - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) _check_op('sub') _check_op('add') @@ -179,7 +179,7 @@ def _check_op(opname): def test_pickle(self): def _test_roundtrip(frame): unpickled = self.round_trip_pickle(frame) - assert_frame_equal(frame, unpickled) + tm.assert_frame_equal(frame, unpickled) _test_roundtrip(self.frame) _test_roundtrip(self.frame.T) @@ -189,11 +189,11 @@ def _test_roundtrip(frame): def test_reindex(self): expected = self.frame.iloc[[0, 3]] reindexed = self.frame.loc[[('foo', 'one'), ('bar', 'one')]] - assert_frame_equal(reindexed, expected) + tm.assert_frame_equal(reindexed, expected) with catch_warnings(record=True): reindexed = self.frame.ix[[('foo', 'one'), ('bar', 'one')]] - assert_frame_equal(reindexed, expected) + tm.assert_frame_equal(reindexed, expected) def test_reindex_preserve_levels(self): new_index = self.ymd.index[::10] @@ -214,50 +214,6 @@ def test_reindex_preserve_levels(self): chunk = ymdT.loc[:, new_index] self.assertIs(chunk.columns, new_index) - def test_sort_index_preserve_levels(self): - result = self.frame.sort_index() - self.assertEqual(result.index.names, self.frame.index.names) - - def test_sorting_repr_8017(self): - - np.random.seed(0) - data = np.random.randn(3, 4) - - for gen, extra in [([1., 3., 2., 5.], 4.), ([1, 3, 2, 5], 4), - ([Timestamp('20130101'), Timestamp('20130103'), - Timestamp('20130102'), Timestamp('20130105')], - Timestamp('20130104')), - (['1one', '3one', '2one', '5one'], '4one')]: - columns = MultiIndex.from_tuples([('red', i) for i in gen]) - df = DataFrame(data, index=list('def'), columns=columns) - df2 = pd.concat([df, - DataFrame('world', index=list('def'), - columns=MultiIndex.from_tuples( - [('red', extra)]))], axis=1) - - # check that the repr is good - # make sure that we have a correct sparsified repr - # e.g. only 1 header of read - self.assertEqual(str(df2).splitlines()[0].split(), ['red']) - - # GH 8017 - # sorting fails after columns added - - # construct single-dtype then sort - result = df.copy().sort_index(axis=1) - expected = df.iloc[:, [0, 2, 1, 3]] - assert_frame_equal(result, expected) - - result = df2.sort_index(axis=1) - expected = df2.iloc[:, [0, 2, 1, 4, 3]] - assert_frame_equal(result, expected) - - # setitem then sort - result = df.copy() - result[('red', extra)] = 'world' - result = result.sort_index(axis=1) - assert_frame_equal(result, expected) - def test_repr_to_string(self): repr(self.frame) repr(self.ymd) @@ -283,9 +239,11 @@ def test_getitem_simple(self): df = self.frame.T col = df['foo', 'one'] - assert_almost_equal(col.values, df.values[:, 0]) - self.assertRaises(KeyError, df.__getitem__, ('foo', 'four')) - self.assertRaises(KeyError, df.__getitem__, 'foobar') + tm.assert_almost_equal(col.values, df.values[:, 0]) + with pytest.raises(KeyError): + df[('foo', 'four')] + with pytest.raises(KeyError): + df['foobar'] def test_series_getitem(self): s = self.ymd['A'] @@ -297,7 +255,7 @@ def test_series_getitem(self): expected = s.reindex(s.index[42:65]) expected.index = expected.index.droplevel(0).droplevel(0) - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) result = s[2000, 3, 10] expected = s[49] @@ -306,11 +264,11 @@ def test_series_getitem(self): # fancy expected = s.reindex(s.index[49:51]) result = s.loc[[(2000, 3, 10), (2000, 3, 13)]] - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) with catch_warnings(record=True): result = s.ix[[(2000, 3, 10), (2000, 3, 13)]] - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) # key error self.assertRaises(KeyError, s.__getitem__, (2000, 3, 4)) @@ -325,7 +283,7 @@ def test_series_getitem_corner(self): # generator result = s[(x > 0 for x in s)] expected = s[s > 0] - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) def test_series_setitem(self): s = self.ymd['A'] @@ -347,29 +305,29 @@ def test_frame_getitem_setitem_boolean(self): result = df[df > 0] expected = df.where(df > 0) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) df[df > 0] = 5 values[values > 0] = 5 - assert_almost_equal(df.values, values) + tm.assert_almost_equal(df.values, values) df[df == 5] = 0 values[values == 5] = 0 - assert_almost_equal(df.values, values) + tm.assert_almost_equal(df.values, values) # a df that needs alignment first df[df[:-1] < 0] = 2 np.putmask(values[:-1], values[:-1] < 0, 2) - assert_almost_equal(df.values, values) + tm.assert_almost_equal(df.values, values) - with assertRaisesRegexp(TypeError, 'boolean values only'): + with tm.assertRaisesRegexp(TypeError, 'boolean values only'): df[df * 0] = 2 def test_frame_getitem_setitem_slice(self): # getitem result = self.frame.iloc[:4] expected = self.frame[:4] - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) # setitem cp = self.frame.copy() @@ -385,25 +343,25 @@ def test_frame_getitem_setitem_multislice(self): df = DataFrame({'value': [1, 2, 3, 7, 8]}, index=midx) result = df.loc[:, 'value'] - assert_series_equal(df['value'], result) + tm.assert_series_equal(df['value'], result) with catch_warnings(record=True): result = df.ix[:, 'value'] - assert_series_equal(df['value'], result) + tm.assert_series_equal(df['value'], result) result = df.loc[df.index[1:3], 'value'] - assert_series_equal(df['value'][1:3], result) + tm.assert_series_equal(df['value'][1:3], result) result = df.loc[:, :] - assert_frame_equal(df, result) + tm.assert_frame_equal(df, result) result = df df.loc[:, 'value'] = 10 result['value'] = 10 - assert_frame_equal(df, result) + tm.assert_frame_equal(df, result) df.loc[:, :] = 10 - assert_frame_equal(df, result) + tm.assert_frame_equal(df, result) def test_frame_getitem_multicolumn_empty_level(self): f = DataFrame({'a': ['1', '2', '3'], 'b': ['2', '3', '4']}) @@ -413,7 +371,7 @@ def test_frame_getitem_multicolumn_empty_level(self): result = f['level1 item1'] expected = DataFrame([['1'], ['2'], ['3']], index=f.index, columns=['level3 item1']) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_frame_setitem_multi_column(self): df = DataFrame(randn(10, 4), columns=[['a', 'a', 'b', 'b'], @@ -421,12 +379,12 @@ def test_frame_setitem_multi_column(self): cp = df.copy() cp['a'] = cp['b'] - assert_frame_equal(cp['a'], cp['b']) + tm.assert_frame_equal(cp['a'], cp['b']) # set with ndarray cp = df.copy() cp['a'] = cp['b'].values - assert_frame_equal(cp['a'], cp['b']) + tm.assert_frame_equal(cp['a'], cp['b']) # --------------------------------------- # #1803 @@ -444,8 +402,8 @@ def test_frame_setitem_multi_column(self): sliced_a1 = df['A', '1'] sliced_a2 = df['A', '2'] sliced_b1 = df['B', '1'] - assert_series_equal(sliced_a1, sliced_b1, check_names=False) - assert_series_equal(sliced_a2, sliced_b1, check_names=False) + tm.assert_series_equal(sliced_a1, sliced_b1, check_names=False) + tm.assert_series_equal(sliced_a2, sliced_b1, check_names=False) self.assertEqual(sliced_a1.name, ('A', '1')) self.assertEqual(sliced_a2.name, ('A', '2')) self.assertEqual(sliced_b1.name, ('B', '1')) @@ -465,9 +423,9 @@ def test_getitem_tuple_plus_slice(self): with catch_warnings(record=True): expected3 = idf.ix[0, 0] - assert_series_equal(result, expected) - assert_series_equal(result, expected2) - assert_series_equal(result, expected3) + tm.assert_series_equal(result, expected) + tm.assert_series_equal(result, expected2) + tm.assert_series_equal(result, expected3) def test_getitem_setitem_tuple_plus_columns(self): # GH #1013 @@ -476,26 +434,14 @@ def test_getitem_setitem_tuple_plus_columns(self): result = df.loc[(2000, 1, 6), ['A', 'B', 'C']] expected = df.loc[2000, 1, 6][['A', 'B', 'C']] - assert_series_equal(result, expected) - - def test_getitem_multilevel_index_tuple_unsorted(self): - index_columns = list("abc") - df = DataFrame([[0, 1, 0, "x"], [0, 0, 1, "y"]], - columns=index_columns + ["data"]) - df = df.set_index(index_columns) - query_index = df.index[:1] - rs = df.loc[query_index, "data"] - - xp_idx = MultiIndex.from_tuples([(0, 1, 0)], names=['a', 'b', 'c']) - xp = Series(['x'], index=xp_idx, name='data') - assert_series_equal(rs, xp) + tm.assert_series_equal(result, expected) def test_xs(self): xs = self.frame.xs(('bar', 'two')) xs2 = self.frame.loc[('bar', 'two')] - assert_series_equal(xs, xs2) - assert_almost_equal(xs.values, self.frame.values[4]) + tm.assert_series_equal(xs, xs2) + tm.assert_almost_equal(xs.values, self.frame.values[4]) # GH 6574 # missing values in returned index should be preserrved @@ -514,18 +460,18 @@ def test_xs(self): ['xbcde', np.nan, 'zbcde', 'ybcde'], name='a2')) result = df.xs('z', level='a1') - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_xs_partial(self): result = self.frame.xs('foo') result2 = self.frame.loc['foo'] expected = self.frame.T['foo'].T - assert_frame_equal(result, expected) - assert_frame_equal(result, result2) + tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result, result2) result = self.ymd.xs((2000, 4)) expected = self.ymd.loc[2000, 4] - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) # ex from #1796 index = MultiIndex(levels=[['foo', 'bar'], ['one', 'two'], [-1, 1]], @@ -537,14 +483,14 @@ def test_xs_partial(self): result = df.xs(['foo', 'one']) expected = df.loc['foo', 'one'] - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_xs_level(self): result = self.frame.xs('two', level='second') expected = self.frame[self.frame.index.get_level_values(1) == 'two'] expected.index = expected.index.droplevel(1) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) index = MultiIndex.from_tuples([('x', 'y', 'z'), ('a', 'b', 'c'), ( 'p', 'q', 'r')]) @@ -552,7 +498,7 @@ def test_xs_level(self): result = df.xs('c', level=2) expected = df[1:2] expected.index = expected.index.droplevel(2) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) # this is a copy in 0.14 result = self.frame.xs('two', level='second') @@ -576,7 +522,7 @@ def test_xs_level_multiple(self): result = df.xs(('a', 4), level=['one', 'four']) expected = df.xs('a').xs(4, level='four') - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) # this is a copy in 0.14 result = df.xs(('a', 4), level=['one', 'four']) @@ -597,7 +543,7 @@ def f(x): rs = df.xs(20111201, level='date') xp = df.loc[20111201, :] - assert_frame_equal(rs, xp) + tm.assert_frame_equal(rs, xp) def test_xs_level0(self): from pandas import read_table @@ -612,18 +558,18 @@ def test_xs_level0(self): result = df.xs('a', level=0) expected = df.xs('a') self.assertEqual(len(result), 2) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_xs_level_series(self): s = self.frame['A'] result = s[:, 'two'] expected = self.frame.xs('two', level=1)['A'] - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) s = self.ymd['A'] result = s[2000, 5] expected = self.ymd.loc[2000, 5]['A'] - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) # not implementing this for now @@ -633,7 +579,7 @@ def test_xs_level_series(self): # lv =s.index.get_level_values(1) # expected = s[(lv == 3) | (lv == 4)] # expected.index = expected.index.droplevel(0) - # assert_series_equal(result, expected) + # tm.assert_series_equal(result, expected) # can do this though @@ -649,15 +595,15 @@ def test_getitem_toplevel(self): result = df['foo'] expected = df.reindex(columns=df.columns[:3]) expected.columns = expected.columns.droplevel(0) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = df['bar'] result2 = df.loc[:, 'bar'] expected = df.reindex(columns=df.columns[3:5]) expected.columns = expected.columns.droplevel(0) - assert_frame_equal(result, expected) - assert_frame_equal(result, result2) + tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result, result2) def test_getitem_setitem_slice_integers(self): index = MultiIndex(levels=[[0, 1, 2], [0, 2]], @@ -667,7 +613,7 @@ def test_getitem_setitem_slice_integers(self): columns=['a', 'b', 'c', 'd']) res = frame.loc[1:2] exp = frame.reindex(frame.index[2:]) - assert_frame_equal(res, exp) + tm.assert_frame_equal(res, exp) frame.loc[1:2] = 7 self.assertTrue((frame.loc[1:2] == 7).values.all()) @@ -676,7 +622,7 @@ def test_getitem_setitem_slice_integers(self): res = series.loc[1:2] exp = series.reindex(series.index[2:]) - assert_series_equal(res, exp) + tm.assert_series_equal(res, exp) series.loc[1:2] = 7 self.assertTrue((series.loc[1:2] == 7).values.all()) @@ -691,7 +637,7 @@ def test_getitem_int(self): result = frame.loc[1] expected = frame[-3:] expected.index = expected.index.droplevel(0) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) # raises exception self.assertRaises(KeyError, frame.loc.__getitem__, 3) @@ -699,7 +645,7 @@ def test_getitem_int(self): # however this will work result = self.frame.iloc[2] expected = self.frame.xs(self.frame.index[2]) - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) def test_getitem_partial(self): ymd = self.ymd.T @@ -707,25 +653,17 @@ def test_getitem_partial(self): expected = ymd.reindex(columns=ymd.columns[ymd.columns.labels[1] == 1]) expected.columns = expected.columns.droplevel(0).droplevel(0) - assert_frame_equal(result, expected) - - def test_getitem_slice_not_sorted(self): - df = self.frame.sort_index(level=1).T - - # buglet with int typechecking - result = df.iloc[:, :np.int32(3)] - expected = df.reindex(columns=df.columns[:3]) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_setitem_change_dtype(self): dft = self.frame.T s = dft['foo', 'two'] dft['foo', 'two'] = s > s.median() - assert_series_equal(dft['foo', 'two'], s > s.median()) + tm.assert_series_equal(dft['foo', 'two'], s > s.median()) # tm.assertIsInstance(dft._data.blocks[1].items, MultiIndex) reindexed = dft.reindex(columns=[('foo', 'two')]) - assert_series_equal(reindexed['foo', 'two'], s > s.median()) + tm.assert_series_equal(reindexed['foo', 'two'], s > s.median()) def test_frame_setitem_ix(self): self.frame.loc[('bar', 'two'), 'B'] = 5 @@ -746,12 +684,12 @@ def test_frame_setitem_ix(self): def test_fancy_slice_partial(self): result = self.frame.loc['bar':'baz'] expected = self.frame[3:7] - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = self.ymd.loc[(2000, 2):(2000, 4)] lev = self.ymd.index.labels[1] expected = self.ymd[(lev >= 1) & (lev <= 3)] - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_getitem_partial_column_select(self): idx = MultiIndex(labels=[[0, 0, 0], [0, 1, 1], [1, 0, 1]], @@ -760,55 +698,19 @@ def test_getitem_partial_column_select(self): result = df.loc[('a', 'y'), :] expected = df.loc[('a', 'y')] - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = df.loc[('a', 'y'), [1, 0]] expected = df.loc[('a', 'y')][[1, 0]] - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) with catch_warnings(record=True): result = df.ix[('a', 'y'), [1, 0]] - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) self.assertRaises(KeyError, df.loc.__getitem__, (('a', 'foo'), slice(None, None))) - def test_sort_index_level(self): - df = self.frame.copy() - df.index = np.arange(len(df)) - - # axis=1 - - # series - a_sorted = self.frame['A'].sort_index(level=0) - - # preserve names - self.assertEqual(a_sorted.index.names, self.frame.index.names) - - # inplace - rs = self.frame.copy() - rs.sort_index(level=0, inplace=True) - assert_frame_equal(rs, self.frame.sort_index(level=0)) - - def test_sort_index_level_large_cardinality(self): - - # #2684 (int64) - index = MultiIndex.from_arrays([np.arange(4000)] * 3) - df = DataFrame(np.random.randn(4000), index=index, dtype=np.int64) - - # it works! - result = df.sort_index(level=0) - self.assertTrue(result.index.lexsort_depth == 3) - - # #2684 (int32) - index = MultiIndex.from_arrays([np.arange(4000)] * 3) - df = DataFrame(np.random.randn(4000), index=index, dtype=np.int32) - - # it works! - result = df.sort_index(level=0) - self.assertTrue((result.dtypes.values == df.dtypes.values).all()) - self.assertTrue(result.index.lexsort_depth == 3) - def test_delevel_infer_dtype(self): tuples = [tuple for tuple in cart_product( @@ -832,28 +734,6 @@ def test_reset_index_with_drop(self): deleveled = self.series.reset_index(drop=True) tm.assertIsInstance(deleveled, Series) - def test_sort_index_level_by_name(self): - self.frame.index.names = ['first', 'second'] - result = self.frame.sort_index(level='second') - expected = self.frame.sort_index(level=1) - assert_frame_equal(result, expected) - - def test_sort_index_level_mixed(self): - sorted_before = self.frame.sort_index(level=1) - - df = self.frame.copy() - df['foo'] = 'bar' - sorted_after = df.sort_index(level=1) - assert_frame_equal(sorted_before, sorted_after.drop(['foo'], axis=1)) - - dft = self.frame.T - sorted_before = dft.sort_index(level=1, axis=1) - dft['foo', 'three'] = 'bar' - - sorted_after = dft.sort_index(level=1, axis=1) - assert_frame_equal(sorted_before.drop([('foo', 'three')], axis=1), - sorted_after.drop([('foo', 'three')], axis=1)) - def test_count_level(self): def _check_counts(frame, axis=0): index = frame._get_axis(axis) @@ -861,7 +741,7 @@ def _check_counts(frame, axis=0): result = frame.count(axis=axis, level=i) expected = frame.groupby(axis=axis, level=i).count() expected = expected.reindex_like(result).astype('i8') - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) self.frame.iloc[1, [1, 2]] = np.nan self.frame.iloc[7, [0, 1]] = np.nan @@ -875,7 +755,7 @@ def _check_counts(frame, axis=0): # can't call with level on regular DataFrame df = tm.makeTimeDataFrame() - assertRaisesRegexp(TypeError, 'hierarchical', df.count, level=0) + tm.assertRaisesRegexp(TypeError, 'hierarchical', df.count, level=0) self.frame['D'] = 'foo' result = self.frame.count(level=0, numeric_only=True) @@ -891,30 +771,30 @@ def test_count_level_series(self): result = s.count(level=0) expected = s.groupby(level=0).count() - assert_series_equal(result.astype('f8'), - expected.reindex(result.index).fillna(0)) + tm.assert_series_equal( + result.astype('f8'), expected.reindex(result.index).fillna(0)) result = s.count(level=1) expected = s.groupby(level=1).count() - assert_series_equal(result.astype('f8'), - expected.reindex(result.index).fillna(0)) + tm.assert_series_equal( + result.astype('f8'), expected.reindex(result.index).fillna(0)) def test_count_level_corner(self): s = self.frame['A'][:0] result = s.count(level=0) expected = Series(0, index=s.index.levels[0], name='A') - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) df = self.frame[:0] result = df.count(level=0) expected = DataFrame({}, index=s.index.levels[0], columns=df.columns).fillna(0).astype(np.int64) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_get_level_number_out_of_bounds(self): - with assertRaisesRegexp(IndexError, "Too many levels"): + with tm.assertRaisesRegexp(IndexError, "Too many levels"): self.frame.index._get_level_number(2) - with assertRaisesRegexp(IndexError, "not a valid level number"): + with tm.assertRaisesRegexp(IndexError, "not a valid level number"): self.frame.index._get_level_number(-3) def test_unstack(self): @@ -936,56 +816,56 @@ def test_unstack_multiple_no_empty_columns(self): unstacked = s.unstack([1, 2]) expected = unstacked.dropna(axis=1, how='all') - assert_frame_equal(unstacked, expected) + tm.assert_frame_equal(unstacked, expected) def test_stack(self): # regular roundtrip unstacked = self.ymd.unstack() restacked = unstacked.stack() - assert_frame_equal(restacked, self.ymd) + tm.assert_frame_equal(restacked, self.ymd) unlexsorted = self.ymd.sort_index(level=2) unstacked = unlexsorted.unstack(2) restacked = unstacked.stack() - assert_frame_equal(restacked.sort_index(level=0), self.ymd) + tm.assert_frame_equal(restacked.sort_index(level=0), self.ymd) unlexsorted = unlexsorted[::-1] unstacked = unlexsorted.unstack(1) restacked = unstacked.stack().swaplevel(1, 2) - assert_frame_equal(restacked.sort_index(level=0), self.ymd) + tm.assert_frame_equal(restacked.sort_index(level=0), self.ymd) unlexsorted = unlexsorted.swaplevel(0, 1) unstacked = unlexsorted.unstack(0).swaplevel(0, 1, axis=1) restacked = unstacked.stack(0).swaplevel(1, 2) - assert_frame_equal(restacked.sort_index(level=0), self.ymd) + tm.assert_frame_equal(restacked.sort_index(level=0), self.ymd) # columns unsorted unstacked = self.ymd.unstack() unstacked = unstacked.sort_index(axis=1, ascending=False) restacked = unstacked.stack() - assert_frame_equal(restacked, self.ymd) + tm.assert_frame_equal(restacked, self.ymd) # more than 2 levels in the columns unstacked = self.ymd.unstack(1).unstack(1) result = unstacked.stack(1) expected = self.ymd.unstack() - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = unstacked.stack(2) expected = self.ymd.unstack(1) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = unstacked.stack(0) expected = self.ymd.stack().unstack(1).unstack(1) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) # not all levels present in each echelon unstacked = self.ymd.unstack(2).loc[:, ::3] stacked = unstacked.stack().stack() ymd_stacked = self.ymd.stack() - assert_series_equal(stacked, ymd_stacked.reindex(stacked.index)) + tm.assert_series_equal(stacked, ymd_stacked.reindex(stacked.index)) # stack with negative number result = self.ymd.unstack(0).stack(-2) @@ -993,7 +873,7 @@ def test_stack(self): # GH10417 def check(left, right): - assert_series_equal(left, right) + tm.assert_series_equal(left, right) self.assertFalse(left.index.is_unique) li, ri = left.index, right.index tm.assert_index_equal(li, ri) @@ -1049,7 +929,7 @@ def test_unstack_odd_failure(self): result = df.unstack(2) recons = result.stack() - assert_frame_equal(recons, df) + tm.assert_frame_equal(recons, df) def test_stack_mixed_dtype(self): df = self.frame.T @@ -1058,7 +938,7 @@ def test_stack_mixed_dtype(self): stacked = df.stack() result = df['foo'].stack() - assert_series_equal(stacked['foo'], result, check_names=False) + tm.assert_series_equal(stacked['foo'], result, check_names=False) self.assertIs(result.name, None) self.assertEqual(stacked['bar'].dtype, np.float_) @@ -1074,8 +954,8 @@ def test_unstack_bug(self): unstacked = result.unstack() restacked = unstacked.stack() - assert_series_equal(restacked, - result.reindex(restacked.index).astype(float)) + tm.assert_series_equal( + restacked, result.reindex(restacked.index).astype(float)) def test_stack_unstack_preserve_names(self): unstacked = self.frame.unstack() @@ -1088,59 +968,59 @@ def test_stack_unstack_preserve_names(self): def test_unstack_level_name(self): result = self.frame.unstack('second') expected = self.frame.unstack(level=1) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_stack_level_name(self): unstacked = self.frame.unstack('second') result = unstacked.stack('exp') expected = self.frame.unstack().stack(0) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = self.frame.stack('exp') expected = self.frame.stack() - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) def test_stack_unstack_multiple(self): unstacked = self.ymd.unstack(['year', 'month']) expected = self.ymd.unstack('year').unstack('month') - assert_frame_equal(unstacked, expected) + tm.assert_frame_equal(unstacked, expected) self.assertEqual(unstacked.columns.names, expected.columns.names) # series s = self.ymd['A'] s_unstacked = s.unstack(['year', 'month']) - assert_frame_equal(s_unstacked, expected['A']) + tm.assert_frame_equal(s_unstacked, expected['A']) restacked = unstacked.stack(['year', 'month']) restacked = restacked.swaplevel(0, 1).swaplevel(1, 2) restacked = restacked.sort_index(level=0) - assert_frame_equal(restacked, self.ymd) + tm.assert_frame_equal(restacked, self.ymd) self.assertEqual(restacked.index.names, self.ymd.index.names) # GH #451 unstacked = self.ymd.unstack([1, 2]) expected = self.ymd.unstack(1).unstack(1).dropna(axis=1, how='all') - assert_frame_equal(unstacked, expected) + tm.assert_frame_equal(unstacked, expected) unstacked = self.ymd.unstack([2, 1]) expected = self.ymd.unstack(2).unstack(1).dropna(axis=1, how='all') - assert_frame_equal(unstacked, expected.loc[:, unstacked.columns]) + tm.assert_frame_equal(unstacked, expected.loc[:, unstacked.columns]) def test_stack_names_and_numbers(self): unstacked = self.ymd.unstack(['year', 'month']) # Can't use mixture of names and numbers to stack - with assertRaisesRegexp(ValueError, "level should contain"): + with tm.assertRaisesRegexp(ValueError, "level should contain"): unstacked.stack([0, 'month']) def test_stack_multiple_out_of_bounds(self): # nlevels == 3 unstacked = self.ymd.unstack(['year', 'month']) - with assertRaisesRegexp(IndexError, "Too many levels"): + with tm.assertRaisesRegexp(IndexError, "Too many levels"): unstacked.stack([2, 3]) - with assertRaisesRegexp(IndexError, "not a valid level number"): + with tm.assertRaisesRegexp(IndexError, "not a valid level number"): unstacked.stack([-4, -3]) def test_unstack_period_series(self): @@ -1163,9 +1043,9 @@ def test_unstack_period_series(self): columns=['A', 'B']) expected.columns.name = 'str' - assert_frame_equal(result1, expected) - assert_frame_equal(result2, expected) - assert_frame_equal(result3, expected.T) + tm.assert_frame_equal(result1, expected) + tm.assert_frame_equal(result2, expected) + tm.assert_frame_equal(result3, expected.T) idx1 = pd.PeriodIndex(['2013-01', '2013-01', '2013-02', '2013-02', '2013-03', '2013-03'], freq='M', name='period1') @@ -1189,9 +1069,9 @@ def test_unstack_period_series(self): [6, 5, np.nan, np.nan, np.nan, np.nan]], index=e_idx, columns=e_cols) - assert_frame_equal(result1, expected) - assert_frame_equal(result2, expected) - assert_frame_equal(result3, expected.T) + tm.assert_frame_equal(result1, expected) + tm.assert_frame_equal(result2, expected) + tm.assert_frame_equal(result3, expected.T) def test_unstack_period_frame(self): # GH 4342 @@ -1216,8 +1096,8 @@ def test_unstack_period_frame(self): expected = DataFrame([[5, 1, 6, 2, 6, 1], [4, 2, 3, 3, 5, 4]], index=e_1, columns=e_cols) - assert_frame_equal(result1, expected) - assert_frame_equal(result2, expected) + tm.assert_frame_equal(result1, expected) + tm.assert_frame_equal(result2, expected) e_1 = pd.PeriodIndex(['2014-01', '2014-02', '2014-01', '2014-02'], freq='M', name='period1') @@ -1227,7 +1107,7 @@ def test_unstack_period_frame(self): expected = DataFrame([[5, 4, 2, 3], [1, 2, 6, 5], [6, 3, 1, 4]], index=e_2, columns=e_cols) - assert_frame_equal(result3, expected) + tm.assert_frame_equal(result3, expected) def test_stack_multiple_bug(self): """ bug when some uniques are not present in the data #3170""" @@ -1245,7 +1125,7 @@ def test_stack_multiple_bug(self): rs = down.stack('ID') xp = unst.loc[:, ['VAR1']].resample('W-THU').mean().stack('ID') xp.columns.name = 'Params' - assert_frame_equal(rs, xp) + tm.assert_frame_equal(rs, xp) def test_stack_dropna(self): # GH #3997 @@ -1256,7 +1136,7 @@ def test_stack_dropna(self): self.assertTrue(len(stacked) > len(stacked.dropna())) stacked = df.unstack().stack(dropna=True) - assert_frame_equal(stacked, stacked.dropna()) + tm.assert_frame_equal(stacked, stacked.dropna()) def test_unstack_multiple_hierarchical(self): df = DataFrame(index=[[0, 0, 0, 0, 1, 1, 1, 1], @@ -1279,7 +1159,7 @@ def test_groupby_transform(self): applied = grouped.apply(lambda x: x * 2) expected = grouped.transform(lambda x: x * 2) result = applied.reindex(expected.index) - assert_series_equal(result, expected, check_names=False) + tm.assert_series_equal(result, expected, check_names=False) def test_unstack_sparse_keyspace(self): # memory problems with naive impl #2278 @@ -1311,7 +1191,7 @@ def test_unstack_unobserved_keys(self): self.assertEqual(len(result.columns), 4) recons = result.stack() - assert_frame_equal(recons, df) + tm.assert_frame_equal(recons, df) def test_groupby_corner(self): midx = MultiIndex(levels=[['foo'], ['bar'], ['baz']], @@ -1344,8 +1224,8 @@ def test_join(self): self.assertFalse(np.isnan(joined.values).all()) - assert_frame_equal(joined, expected, check_names=False - ) # TODO what should join do with names ? + # TODO what should join do with names ? + tm.assert_frame_equal(joined, expected, check_names=False) def test_swaplevel(self): swapped = self.frame['A'].swaplevel() @@ -1353,23 +1233,23 @@ def test_swaplevel(self): swapped3 = self.frame['A'].swaplevel(0, 1) swapped4 = self.frame['A'].swaplevel('first', 'second') self.assertFalse(swapped.index.equals(self.frame.index)) - assert_series_equal(swapped, swapped2) - assert_series_equal(swapped, swapped3) - assert_series_equal(swapped, swapped4) + tm.assert_series_equal(swapped, swapped2) + tm.assert_series_equal(swapped, swapped3) + tm.assert_series_equal(swapped, swapped4) back = swapped.swaplevel() back2 = swapped.swaplevel(0) back3 = swapped.swaplevel(0, 1) back4 = swapped.swaplevel('second', 'first') self.assertTrue(back.index.equals(self.frame.index)) - assert_series_equal(back, back2) - assert_series_equal(back, back3) - assert_series_equal(back, back4) + tm.assert_series_equal(back, back2) + tm.assert_series_equal(back, back3) + tm.assert_series_equal(back, back4) ft = self.frame.T swapped = ft.swaplevel('first', 'second', axis=1) exp = self.frame.swaplevel('first', 'second').T - assert_frame_equal(swapped, exp) + tm.assert_frame_equal(swapped, exp) def test_swaplevel_panel(self): panel = Panel({'ItemA': self.frame, 'ItemB': self.frame * 2}) @@ -1384,20 +1264,20 @@ def test_swaplevel_panel(self): def test_reorder_levels(self): result = self.ymd.reorder_levels(['month', 'day', 'year']) expected = self.ymd.swaplevel(0, 1).swaplevel(1, 2) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = self.ymd['A'].reorder_levels(['month', 'day', 'year']) expected = self.ymd['A'].swaplevel(0, 1).swaplevel(1, 2) - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) result = self.ymd.T.reorder_levels(['month', 'day', 'year'], axis=1) expected = self.ymd.T.swaplevel(0, 1, axis=1).swaplevel(1, 2, axis=1) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) - with assertRaisesRegexp(TypeError, 'hierarchical axis'): + with tm.assertRaisesRegexp(TypeError, 'hierarchical axis'): self.ymd.reorder_levels([1, 2], axis=1) - with assertRaisesRegexp(IndexError, 'Too many levels'): + with tm.assertRaisesRegexp(IndexError, 'Too many levels'): self.ymd.index.reorder_levels([1, 2, 3]) def test_insert_index(self): @@ -1416,29 +1296,13 @@ def test_alignment(self): res = x - y exp_index = x.index.union(y.index) exp = x.reindex(exp_index) - y.reindex(exp_index) - assert_series_equal(res, exp) + tm.assert_series_equal(res, exp) # hit non-monotonic code path res = x[::-1] - y[::-1] exp_index = x.index.union(y.index) exp = x.reindex(exp_index) - y.reindex(exp_index) - assert_series_equal(res, exp) - - def test_is_lexsorted(self): - levels = [[0, 1], [0, 1, 2]] - - index = MultiIndex(levels=levels, - labels=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]]) - self.assertTrue(index.is_lexsorted()) - - index = MultiIndex(levels=levels, - labels=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 2, 1]]) - self.assertFalse(index.is_lexsorted()) - - index = MultiIndex(levels=levels, - labels=[[0, 0, 1, 0, 1, 1], [0, 1, 0, 2, 2, 1]]) - self.assertFalse(index.is_lexsorted()) - self.assertEqual(index.lexsort_depth, 0) + tm.assert_series_equal(res, exp) def test_frame_getitem_view(self): df = self.frame.T.copy() @@ -1465,66 +1329,29 @@ def f(): pass self.assertTrue((df['foo', 'one'] == 0).all()) - def test_frame_getitem_not_sorted(self): - df = self.frame.T - df['foo', 'four'] = 'foo' - - arrays = [np.array(x) for x in zip(*df.columns.values)] - - result = df['foo'] - result2 = df.loc[:, 'foo'] - expected = df.reindex(columns=df.columns[arrays[0] == 'foo']) - expected.columns = expected.columns.droplevel(0) - assert_frame_equal(result, expected) - assert_frame_equal(result2, expected) - - df = df.T - result = df.xs('foo') - result2 = df.loc['foo'] - expected = df.reindex(df.index[arrays[0] == 'foo']) - expected.index = expected.index.droplevel(0) - assert_frame_equal(result, expected) - assert_frame_equal(result2, expected) - - def test_series_getitem_not_sorted(self): - arrays = [['bar', 'bar', 'baz', 'baz', 'qux', 'qux', 'foo', 'foo'], - ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']] - tuples = lzip(*arrays) - index = MultiIndex.from_tuples(tuples) - s = Series(randn(8), index=index) - - arrays = [np.array(x) for x in zip(*index.values)] - - result = s['qux'] - result2 = s.loc['qux'] - expected = s[arrays[0] == 'qux'] - expected.index = expected.index.droplevel(0) - assert_series_equal(result, expected) - assert_series_equal(result2, expected) - def test_count(self): frame = self.frame.copy() frame.index.names = ['a', 'b'] result = frame.count(level='b') expect = self.frame.count(level=1) - assert_frame_equal(result, expect, check_names=False) + tm.assert_frame_equal(result, expect, check_names=False) result = frame.count(level='a') expect = self.frame.count(level=0) - assert_frame_equal(result, expect, check_names=False) + tm.assert_frame_equal(result, expect, check_names=False) series = self.series.copy() series.index.names = ['a', 'b'] result = series.count(level='b') expect = self.series.count(level=1) - assert_series_equal(result, expect, check_names=False) + tm.assert_series_equal(result, expect, check_names=False) self.assertEqual(result.index.name, 'b') result = series.count(level='a') expect = self.series.count(level=0) - assert_series_equal(result, expect, check_names=False) + tm.assert_series_equal(result, expect, check_names=False) self.assertEqual(result.index.name, 'a') self.assertRaises(KeyError, series.count, 'x') @@ -1541,7 +1368,7 @@ def test_series_group_min_max(self): # skipna=True leftside = grouped.agg(aggf) rightside = getattr(self.series, op)(level=level, skipna=skipna) - assert_series_equal(leftside, rightside) + tm.assert_series_equal(leftside, rightside) def test_frame_group_ops(self): self.frame.iloc[1, [1, 2]] = np.nan @@ -1550,6 +1377,7 @@ def test_frame_group_ops(self): for op, level, axis, skipna in cart_product(self.AGG_FUNCTIONS, lrange(2), lrange(2), [False, True]): + if axis == 0: frame = self.frame else: @@ -1570,17 +1398,17 @@ def aggf(x): # for good measure, groupby detail level_index = frame._get_axis(axis).levels[level] - self.assert_index_equal(leftside._get_axis(axis), level_index) - self.assert_index_equal(rightside._get_axis(axis), level_index) + tm.assert_index_equal(leftside._get_axis(axis), level_index) + tm.assert_index_equal(rightside._get_axis(axis), level_index) - assert_frame_equal(leftside, rightside) + tm.assert_frame_equal(leftside, rightside) def test_stat_op_corner(self): obj = Series([10.0], index=MultiIndex.from_tuples([(2, 3)])) result = obj.sum(level=0) expected = Series([10.0], index=[2]) - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) def test_frame_any_all_group(self): df = DataFrame( @@ -1591,11 +1419,11 @@ def test_frame_any_all_group(self): result = df.any(level=0) ex = DataFrame({'data': [False, True]}, index=['one', 'two']) - assert_frame_equal(result, ex) + tm.assert_frame_equal(result, ex) result = df.all(level=0) ex = DataFrame({'data': [False, False]}, index=['one', 'two']) - assert_frame_equal(result, ex) + tm.assert_frame_equal(result, ex) def test_std_var_pass_ddof(self): index = MultiIndex.from_arrays([np.arange(5).repeat(10), np.tile( @@ -1608,20 +1436,20 @@ def test_std_var_pass_ddof(self): result = getattr(df[0], meth)(level=0, ddof=ddof) expected = df[0].groupby(level=0).agg(alt) - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) result = getattr(df, meth)(level=0, ddof=ddof) expected = df.groupby(level=0).agg(alt) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_frame_series_agg_multiple_levels(self): result = self.ymd.sum(level=['year', 'month']) expected = self.ymd.groupby(level=['year', 'month']).sum() - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = self.ymd['A'].sum(level=['year', 'month']) expected = self.ymd['A'].groupby(level=['year', 'month']).sum() - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) def test_groupby_multilevel(self): result = self.ymd.groupby(level=[0, 1]).mean() @@ -1631,12 +1459,12 @@ def test_groupby_multilevel(self): expected = self.ymd.groupby([k1, k2]).mean() - assert_frame_equal(result, expected, check_names=False - ) # TODO groupby with level_values drops names + # TODO groupby with level_values drops names + tm.assert_frame_equal(result, expected, check_names=False) self.assertEqual(result.index.names, self.ymd.index.names[:2]) result2 = self.ymd.groupby(level=self.ymd.index.names[:2]).mean() - assert_frame_equal(result, result2) + tm.assert_frame_equal(result, result2) def test_groupby_multilevel_with_transform(self): pass @@ -1665,15 +1493,15 @@ def test_partial_set(self): exp = self.ymd.copy() df.loc[2000, 4] = 0 exp.loc[2000, 4].values[:] = 0 - assert_frame_equal(df, exp) + tm.assert_frame_equal(df, exp) df['A'].loc[2000, 4] = 1 exp['A'].loc[2000, 4].values[:] = 1 - assert_frame_equal(df, exp) + tm.assert_frame_equal(df, exp) df.loc[2000] = 5 exp.loc[2000].values[:] = 5 - assert_frame_equal(df, exp) + tm.assert_frame_equal(df, exp) # this works...for now df['A'].iloc[14] = 5 @@ -1702,7 +1530,7 @@ def test_unstack_group_index_overflow(self): # test roundtrip stacked = result.stack() - assert_series_equal(s, stacked.reindex(s.index)) + tm.assert_series_equal(s, stacked.reindex(s.index)) # put it at beginning index = MultiIndex(levels=[[0, 1]] + [level] * 8, @@ -1737,7 +1565,7 @@ def test_partial_ix_missing(self): result = self.ymd.loc[2000, 0] expected = self.ymd.loc[2000]['A'] - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) # need to put in some work here @@ -1767,8 +1595,8 @@ def test_level_with_tuples(self): result2 = series.loc[('foo', 'bar', 0)] expected = series[:2] expected.index = expected.index.droplevel(0) - assert_series_equal(result, expected) - assert_series_equal(result2, expected) + tm.assert_series_equal(result, expected) + tm.assert_series_equal(result2, expected) self.assertRaises(KeyError, series.__getitem__, (('foo', 'bar', 0), 2)) @@ -1776,8 +1604,8 @@ def test_level_with_tuples(self): result2 = frame.xs(('foo', 'bar', 0)) expected = frame[:2] expected.index = expected.index.droplevel(0) - assert_frame_equal(result, expected) - assert_frame_equal(result2, expected) + tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result2, expected) index = MultiIndex(levels=[[('foo', 'bar'), ('foo', 'baz'), ( 'foo', 'qux')], [0, 1]], @@ -1790,30 +1618,30 @@ def test_level_with_tuples(self): result2 = series.loc[('foo', 'bar')] expected = series[:2] expected.index = expected.index.droplevel(0) - assert_series_equal(result, expected) - assert_series_equal(result2, expected) + tm.assert_series_equal(result, expected) + tm.assert_series_equal(result2, expected) result = frame.loc[('foo', 'bar')] result2 = frame.xs(('foo', 'bar')) expected = frame[:2] expected.index = expected.index.droplevel(0) - assert_frame_equal(result, expected) - assert_frame_equal(result2, expected) + tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result2, expected) def test_int_series_slicing(self): s = self.ymd['A'] result = s[5:] expected = s.reindex(s.index[5:]) - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) exp = self.ymd['A'].copy() s[5:] = 0 exp.values[5:] = 0 - self.assert_numpy_array_equal(s.values, exp.values) + tm.assert_numpy_array_equal(s.values, exp.values) result = self.ymd[5:] expected = self.ymd.reindex(s.index[5:]) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_mixed_depth_get(self): arrays = [['a', 'top', 'top', 'routine1', 'routine1', 'routine2'], @@ -1826,12 +1654,12 @@ def test_mixed_depth_get(self): result = df['a'] expected = df['a', '', ''] - assert_series_equal(result, expected, check_names=False) + tm.assert_series_equal(result, expected, check_names=False) self.assertEqual(result.name, 'a') result = df['routine1', 'result1'] expected = df['routine1', 'result1', ''] - assert_series_equal(result, expected, check_names=False) + tm.assert_series_equal(result, expected, check_names=False) self.assertEqual(result.name, ('routine1', 'result1')) def test_mixed_depth_insert(self): @@ -1847,7 +1675,7 @@ def test_mixed_depth_insert(self): expected = df.copy() result['b'] = [1, 2, 3, 4] expected['b', '', ''] = [1, 2, 3, 4] - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_mixed_depth_drop(self): arrays = [['a', 'top', 'top', 'routine1', 'routine1', 'routine2'], @@ -1860,16 +1688,16 @@ def test_mixed_depth_drop(self): result = df.drop('a', axis=1) expected = df.drop([('a', '', '')], axis=1) - assert_frame_equal(expected, result) + tm.assert_frame_equal(expected, result) result = df.drop(['top'], axis=1) expected = df.drop([('top', 'OD', 'wx')], axis=1) expected = expected.drop([('top', 'OD', 'wy')], axis=1) - assert_frame_equal(expected, result) + tm.assert_frame_equal(expected, result) result = df.drop(('top', 'OD', 'wx'), axis=1) expected = df.drop([('top', 'OD', 'wx')], axis=1) - assert_frame_equal(expected, result) + tm.assert_frame_equal(expected, result) expected = df.drop([('top', 'OD', 'wy')], axis=1) expected = df.drop('top', axis=1) @@ -1877,7 +1705,7 @@ def test_mixed_depth_drop(self): result = df.drop('result1', level=1, axis=1) expected = df.drop([('routine1', 'result1', ''), ('routine2', 'result1', '')], axis=1) - assert_frame_equal(expected, result) + tm.assert_frame_equal(expected, result) def test_drop_nonunique(self): df = DataFrame([["x-a", "x", "a", 1.5], ["x-a", "x", "a", 1.2], @@ -1898,7 +1726,7 @@ def test_drop_nonunique(self): result.index = expected.index - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_mixed_depth_pop(self): arrays = [['a', 'top', 'top', 'routine1', 'routine1', 'routine2'], @@ -1913,32 +1741,32 @@ def test_mixed_depth_pop(self): df2 = df.copy() result = df1.pop('a') expected = df2.pop(('a', '', '')) - assert_series_equal(expected, result, check_names=False) - assert_frame_equal(df1, df2) + tm.assert_series_equal(expected, result, check_names=False) + tm.assert_frame_equal(df1, df2) self.assertEqual(result.name, 'a') expected = df1['top'] df1 = df1.drop(['top'], axis=1) result = df2.pop('top') - assert_frame_equal(expected, result) - assert_frame_equal(df1, df2) + tm.assert_frame_equal(expected, result) + tm.assert_frame_equal(df1, df2) def test_reindex_level_partial_selection(self): result = self.frame.reindex(['foo', 'qux'], level=0) expected = self.frame.iloc[[0, 1, 2, 7, 8, 9]] - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = self.frame.T.reindex_axis(['foo', 'qux'], axis=1, level=0) - assert_frame_equal(result, expected.T) + tm.assert_frame_equal(result, expected.T) result = self.frame.loc[['foo', 'qux']] - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = self.frame['A'].loc[['foo', 'qux']] - assert_series_equal(result, expected['A']) + tm.assert_series_equal(result, expected['A']) result = self.frame.T.loc[:, ['foo', 'qux']] - assert_frame_equal(result, expected.T) + tm.assert_frame_equal(result, expected.T) def test_setitem_multiple_partial(self): expected = self.frame.copy() @@ -1946,45 +1774,45 @@ def test_setitem_multiple_partial(self): result.loc[['foo', 'bar']] = 0 expected.loc['foo'] = 0 expected.loc['bar'] = 0 - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) expected = self.frame.copy() result = self.frame.copy() result.loc['foo':'bar'] = 0 expected.loc['foo'] = 0 expected.loc['bar'] = 0 - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) expected = self.frame['A'].copy() result = self.frame['A'].copy() result.loc[['foo', 'bar']] = 0 expected.loc['foo'] = 0 expected.loc['bar'] = 0 - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) expected = self.frame['A'].copy() result = self.frame['A'].copy() result.loc['foo':'bar'] = 0 expected.loc['foo'] = 0 expected.loc['bar'] = 0 - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) def test_drop_level(self): result = self.frame.drop(['bar', 'qux'], level='first') expected = self.frame.iloc[[0, 1, 2, 5, 6]] - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = self.frame.drop(['two'], level='second') expected = self.frame.iloc[[0, 2, 3, 6, 7, 9]] - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = self.frame.T.drop(['bar', 'qux'], axis=1, level='first') expected = self.frame.iloc[[0, 1, 2, 5, 6]].T - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = self.frame.T.drop(['two'], axis=1, level='second') expected = self.frame.iloc[[0, 2, 3, 6, 7, 9]].T - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_drop_level_nonunique_datetime(self): # GH 12701 @@ -2003,7 +1831,7 @@ def test_drop_level_nonunique_datetime(self): result = df.drop(ts, level='tstamp') expected = df.loc[idx != 4] - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_drop_preserve_names(self): index = MultiIndex.from_arrays([[0, 0, 0, 1, 1, 1], @@ -2089,7 +1917,7 @@ def test_indexing_ambiguity_bug_1678(self): result = frame.iloc[:, 1] exp = frame.loc[:, ('Ohio', 'Red')] tm.assertIsInstance(result, Series) - assert_series_equal(result, exp) + tm.assert_series_equal(result, exp) def test_nonunique_assignment_1750(self): df = DataFrame([[1, 1, "x", "X"], [1, 1, "y", "Y"], [1, 2, "z", "Z"]], @@ -2181,7 +2009,7 @@ def test_duplicate_mi(self): ['foo', 'bar', 5.0, 5]], columns=list('ABCD')).set_index(['A', 'B']) result = df.loc[('foo', 'bar')] - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_duplicated_drop_duplicates(self): # GH 4060 @@ -2242,8 +2070,8 @@ def test_datetimeindex(self): expected1 = pd.DatetimeIndex(['2013-04-01 9:00', '2013-04-02 9:00', '2013-04-03 9:00'], tz='Asia/Tokyo') - self.assert_index_equal(idx.levels[0], expected1) - self.assert_index_equal(idx.levels[1], idx2) + tm.assert_index_equal(idx.levels[0], expected1) + tm.assert_index_equal(idx.levels[1], idx2) # from datetime combos # GH 7888 @@ -2289,14 +2117,14 @@ def test_set_index_datetime(self): expected = expected.tz_localize('UTC').tz_convert('US/Pacific') df = df.set_index('label', append=True) - self.assert_index_equal(df.index.levels[0], expected) - self.assert_index_equal(df.index.levels[1], - pd.Index(['a', 'b'], name='label')) + tm.assert_index_equal(df.index.levels[0], expected) + tm.assert_index_equal(df.index.levels[1], + pd.Index(['a', 'b'], name='label')) df = df.swaplevel(0, 1) - self.assert_index_equal(df.index.levels[0], - pd.Index(['a', 'b'], name='label')) - self.assert_index_equal(df.index.levels[1], expected) + tm.assert_index_equal(df.index.levels[0], + pd.Index(['a', 'b'], name='label')) + tm.assert_index_equal(df.index.levels[1], expected) df = DataFrame(np.random.random(6)) idx1 = pd.DatetimeIndex(['2011-07-19 07:00:00', '2011-07-19 08:00:00', @@ -2319,14 +2147,14 @@ def test_set_index_datetime(self): expected2 = pd.DatetimeIndex(['2012-04-01 09:00', '2012-04-02 09:00'], tz='US/Eastern') - self.assert_index_equal(df.index.levels[0], expected1) - self.assert_index_equal(df.index.levels[1], expected2) - self.assert_index_equal(df.index.levels[2], idx3) + tm.assert_index_equal(df.index.levels[0], expected1) + tm.assert_index_equal(df.index.levels[1], expected2) + tm.assert_index_equal(df.index.levels[2], idx3) # GH 7092 - self.assert_index_equal(df.index.get_level_values(0), idx1) - self.assert_index_equal(df.index.get_level_values(1), idx2) - self.assert_index_equal(df.index.get_level_values(2), idx3) + tm.assert_index_equal(df.index.get_level_values(0), idx1) + tm.assert_index_equal(df.index.get_level_values(1), idx2) + tm.assert_index_equal(df.index.get_level_values(2), idx3) def test_reset_index_datetime(self): # GH 3950 @@ -2351,7 +2179,7 @@ def test_reset_index_datetime(self): expected['idx1'] = expected['idx1'].apply( lambda d: pd.Timestamp(d, tz=tz)) - assert_frame_equal(df.reset_index(), expected) + tm.assert_frame_equal(df.reset_index(), expected) idx3 = pd.date_range('1/1/2012', periods=5, freq='MS', tz='Europe/Paris', name='idx3') @@ -2378,7 +2206,7 @@ def test_reset_index_datetime(self): lambda d: pd.Timestamp(d, tz=tz)) expected['idx3'] = expected['idx3'].apply( lambda d: pd.Timestamp(d, tz='Europe/Paris')) - assert_frame_equal(df.reset_index(), expected) + tm.assert_frame_equal(df.reset_index(), expected) # GH 7793 idx = pd.MultiIndex.from_product([['a', 'b'], pd.date_range( @@ -2396,7 +2224,7 @@ def test_reset_index_datetime(self): columns=['level_0', 'level_1', 'a']) expected['level_1'] = expected['level_1'].apply( lambda d: pd.Timestamp(d, freq='D', tz=tz)) - assert_frame_equal(df.reset_index(), expected) + tm.assert_frame_equal(df.reset_index(), expected) def test_reset_index_period(self): # GH 7746 @@ -2415,7 +2243,7 @@ def test_reset_index_period(self): 'feature': ['a', 'b', 'c'] * 3, 'a': np.arange(9, dtype='int64') }, columns=['month', 'feature', 'a']) - assert_frame_equal(df.reset_index(), expected) + tm.assert_frame_equal(df.reset_index(), expected) def test_set_index_period(self): # GH 6631 @@ -2433,13 +2261,13 @@ def test_set_index_period(self): expected1 = pd.period_range('2011-01-01', periods=3, freq='M') expected2 = pd.period_range('2013-01-01 09:00', periods=2, freq='H') - self.assert_index_equal(df.index.levels[0], expected1) - self.assert_index_equal(df.index.levels[1], expected2) - self.assert_index_equal(df.index.levels[2], idx3) + tm.assert_index_equal(df.index.levels[0], expected1) + tm.assert_index_equal(df.index.levels[1], expected2) + tm.assert_index_equal(df.index.levels[2], idx3) - self.assert_index_equal(df.index.get_level_values(0), idx1) - self.assert_index_equal(df.index.get_level_values(1), idx2) - self.assert_index_equal(df.index.get_level_values(2), idx3) + tm.assert_index_equal(df.index.get_level_values(0), idx1) + tm.assert_index_equal(df.index.get_level_values(1), idx2) + tm.assert_index_equal(df.index.get_level_values(2), idx3) def test_repeat(self): # GH 9361 @@ -2475,4 +2303,185 @@ def test_iloc_mi(self): result = pd.DataFrame([[df_mi.iloc[r, c] for c in range(2)] for r in range(5)]) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) + + +class TestSorted(Base, tm.TestCase): + """ everthing you wanted to test about sorting """ + + def test_sort_index_preserve_levels(self): + result = self.frame.sort_index() + self.assertEqual(result.index.names, self.frame.index.names) + + def test_sorting_repr_8017(self): + + np.random.seed(0) + data = np.random.randn(3, 4) + + for gen, extra in [([1., 3., 2., 5.], 4.), ([1, 3, 2, 5], 4), + ([Timestamp('20130101'), Timestamp('20130103'), + Timestamp('20130102'), Timestamp('20130105')], + Timestamp('20130104')), + (['1one', '3one', '2one', '5one'], '4one')]: + columns = MultiIndex.from_tuples([('red', i) for i in gen]) + df = DataFrame(data, index=list('def'), columns=columns) + df2 = pd.concat([df, + DataFrame('world', index=list('def'), + columns=MultiIndex.from_tuples( + [('red', extra)]))], axis=1) + + # check that the repr is good + # make sure that we have a correct sparsified repr + # e.g. only 1 header of read + self.assertEqual(str(df2).splitlines()[0].split(), ['red']) + + # GH 8017 + # sorting fails after columns added + + # construct single-dtype then sort + result = df.copy().sort_index(axis=1) + expected = df.iloc[:, [0, 2, 1, 3]] + tm.assert_frame_equal(result, expected) + + result = df2.sort_index(axis=1) + expected = df2.iloc[:, [0, 2, 1, 4, 3]] + tm.assert_frame_equal(result, expected) + + # setitem then sort + result = df.copy() + result[('red', extra)] = 'world' + + result = result.sort_index(axis=1) + tm.assert_frame_equal(result, expected) + + def test_sort_index_level(self): + df = self.frame.copy() + df.index = np.arange(len(df)) + + # axis=1 + + # series + a_sorted = self.frame['A'].sort_index(level=0) + + # preserve names + self.assertEqual(a_sorted.index.names, self.frame.index.names) + + # inplace + rs = self.frame.copy() + rs.sort_index(level=0, inplace=True) + tm.assert_frame_equal(rs, self.frame.sort_index(level=0)) + + def test_sort_index_level_large_cardinality(self): + + # #2684 (int64) + index = MultiIndex.from_arrays([np.arange(4000)] * 3) + df = DataFrame(np.random.randn(4000), index=index, dtype=np.int64) + + # it works! + result = df.sort_index(level=0) + self.assertTrue(result.index.lexsort_depth == 3) + + # #2684 (int32) + index = MultiIndex.from_arrays([np.arange(4000)] * 3) + df = DataFrame(np.random.randn(4000), index=index, dtype=np.int32) + + # it works! + result = df.sort_index(level=0) + self.assertTrue((result.dtypes.values == df.dtypes.values).all()) + self.assertTrue(result.index.lexsort_depth == 3) + + def test_sort_index_level_by_name(self): + self.frame.index.names = ['first', 'second'] + result = self.frame.sort_index(level='second') + expected = self.frame.sort_index(level=1) + tm.assert_frame_equal(result, expected) + + def test_sort_index_level_mixed(self): + sorted_before = self.frame.sort_index(level=1) + + df = self.frame.copy() + df['foo'] = 'bar' + sorted_after = df.sort_index(level=1) + tm.assert_frame_equal(sorted_before, + sorted_after.drop(['foo'], axis=1)) + + dft = self.frame.T + sorted_before = dft.sort_index(level=1, axis=1) + dft['foo', 'three'] = 'bar' + + sorted_after = dft.sort_index(level=1, axis=1) + tm.assert_frame_equal(sorted_before.drop([('foo', 'three')], axis=1), + sorted_after.drop([('foo', 'three')], axis=1)) + + def test_is_lexsorted(self): + levels = [[0, 1], [0, 1, 2]] + + index = MultiIndex(levels=levels, + labels=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]]) + self.assertTrue(index.is_lexsorted()) + + index = MultiIndex(levels=levels, + labels=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 2, 1]]) + self.assertFalse(index.is_lexsorted()) + + index = MultiIndex(levels=levels, + labels=[[0, 0, 1, 0, 1, 1], [0, 1, 0, 2, 2, 1]]) + self.assertFalse(index.is_lexsorted()) + self.assertEqual(index.lexsort_depth, 0) + + def test_getitem_multilevel_index_tuple_not_sorted(self): + index_columns = list("abc") + df = DataFrame([[0, 1, 0, "x"], [0, 0, 1, "y"]], + columns=index_columns + ["data"]) + df = df.set_index(index_columns) + query_index = df.index[:1] + rs = df.loc[query_index, "data"] + + xp_idx = MultiIndex.from_tuples([(0, 1, 0)], names=['a', 'b', 'c']) + xp = Series(['x'], index=xp_idx, name='data') + tm.assert_series_equal(rs, xp) + + def test_getitem_slice_not_sorted(self): + df = self.frame.sort_index(level=1).T + + # buglet with int typechecking + result = df.iloc[:, :np.int32(3)] + expected = df.reindex(columns=df.columns[:3]) + tm.assert_frame_equal(result, expected) + + def test_frame_getitem_not_sorted(self): + df = self.frame.T + df['foo', 'four'] = 'foo' + + arrays = [np.array(x) for x in zip(*df.columns.values)] + + result = df['foo'] + result2 = df.loc[:, 'foo'] + expected = df.reindex(columns=df.columns[arrays[0] == 'foo']) + expected.columns = expected.columns.droplevel(0) + tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result2, expected) + + df = df.T + result = df.xs('foo') + result2 = df.loc['foo'] + expected = df.reindex(df.index[arrays[0] == 'foo']) + expected.index = expected.index.droplevel(0) + tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result2, expected) + + def test_series_getitem_not_sorted(self): + arrays = [['bar', 'bar', 'baz', 'baz', 'qux', 'qux', 'foo', 'foo'], + ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']] + tuples = lzip(*arrays) + index = MultiIndex.from_tuples(tuples) + s = Series(randn(8), index=index) + + arrays = [np.array(x) for x in zip(*index.values)] + + result = s['qux'] + result2 = s.loc['qux'] + expected = s[arrays[0] == 'qux'] + expected.index = expected.index.droplevel(0) + tm.assert_series_equal(result, expected) + tm.assert_series_equal(result2, expected) From 37e5f78b4e9ff03cbff4dea928445cc3b1f707c8 Mon Sep 17 00:00:00 2001 From: Greg Williams Date: Thu, 16 Mar 2017 07:56:46 -0400 Subject: [PATCH 208/933] BUG: Group-by numeric type-coercion with datetime closes #14423 closes #15421 closes #15670 During a group-by/apply on a DataFrame, in the presence of one or more DateTime-like columns, Pandas would incorrectly coerce the type of all other columns to numeric. E.g. a String column would be coerced to numeric, producing NaNs. Author: Greg Williams Closes #15680 from gwpdt/bugfix14423 and squashes the following commits: e1ed104 [Greg Williams] TST: Rename and expand test_numeric_coercion 0a15674 [Greg Williams] CLN: move import, add whatsnew entry c8844e0 [Greg Williams] CLN: PEP8 (whitespace fixes) 46d12c2 [Greg Williams] BUG: Group-by numeric type-coericion with datetime --- doc/source/whatsnew/v0.20.0.txt | 3 +- pandas/core/groupby.py | 5 ++- pandas/tests/groupby/test_groupby.py | 48 ++++++++++++++++++++++++++++ 3 files changed, 54 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 41b6519eb740f..a56212328f5c3 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -850,7 +850,8 @@ Bug Fixes - Bug in ``SparseSeries.reindex`` on single level with list of length 1 (:issue:`15447`) -- Bug in groupby operations with timedelta64 when passing ``numeric_only=False`` (:issue:`5724`) +- Bug in groupby operations with timedelta64 when passing ``numeric_only=False`` (:issue:`5724`) +- Bug in ``groupby.apply()`` coercing ``object`` dtypes to numeric types, when not all values were numeric (:issue:`14423`, :issue:`15421`, :issue:`15670`) - Bug in ``DataFrame.to_html`` with ``index=False`` and ``max_rows`` raising in ``IndexError`` (:issue:`14998`) diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index a10be078a8f96..7a017ffae284c 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -10,6 +10,7 @@ zip, range, lzip, callable, map ) + from pandas import compat from pandas.compat.numpy import function as nv from pandas.compat.numpy import _np_version_under1p8 @@ -3424,6 +3425,7 @@ def _decide_output_index(self, output, labels): def _wrap_applied_output(self, keys, values, not_indexed_same=False): from pandas.core.index import _all_indexes_same + from pandas.tools.util import to_numeric if len(keys) == 0: return DataFrame(index=keys) @@ -3566,7 +3568,8 @@ def first_non_None_value(values): # as we are stacking can easily have object dtypes here so = self._selected_obj if (so.ndim == 2 and so.dtypes.apply(is_datetimelike).any()): - result = result._convert(numeric=True) + result = result.apply( + lambda x: to_numeric(x, errors='ignore')) date_cols = self._selected_obj.select_dtypes( include=['datetime', 'timedelta']).columns date_cols = date_cols.intersection(result.columns) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index d7fa3beda0abf..c25974c94bfd1 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -4314,6 +4314,54 @@ def test_cummin_cummax(self): expected = pd.Series([1, 2, 1], name='b') tm.assert_series_equal(result, expected) + def test_apply_numeric_coercion_when_datetime(self): + # In the past, group-by/apply operations have been over-eager + # in converting dtypes to numeric, in the presence of datetime + # columns. Various GH issues were filed, the reproductions + # for which are here. + + # GH 15670 + df = pd.DataFrame({'Number': [1, 2], + 'Date': ["2017-03-02"] * 2, + 'Str': ["foo", "inf"]}) + expected = df.groupby(['Number']).apply(lambda x: x.iloc[0]) + df.Date = pd.to_datetime(df.Date) + result = df.groupby(['Number']).apply(lambda x: x.iloc[0]) + tm.assert_series_equal(result['Str'], expected['Str']) + + # GH 15421 + df = pd.DataFrame({'A': [10, 20, 30], + 'B': ['foo', '3', '4'], + 'T': [pd.Timestamp("12:31:22")] * 3}) + + def get_B(g): + return g.iloc[0][['B']] + result = df.groupby('A').apply(get_B)['B'] + expected = df.B + expected.index = df.A + tm.assert_series_equal(result, expected) + + # GH 14423 + def predictions(tool): + out = pd.Series(index=['p1', 'p2', 'useTime'], dtype=object) + if 'step1' in list(tool.State): + out['p1'] = str(tool[tool.State == 'step1'].Machine.values[0]) + if 'step2' in list(tool.State): + out['p2'] = str(tool[tool.State == 'step2'].Machine.values[0]) + out['useTime'] = str( + tool[tool.State == 'step2'].oTime.values[0]) + return out + df1 = pd.DataFrame({'Key': ['B', 'B', 'A', 'A'], + 'State': ['step1', 'step2', 'step1', 'step2'], + 'oTime': ['', '2016-09-19 05:24:33', + '', '2016-09-19 23:59:04'], + 'Machine': ['23', '36L', '36R', '36R']}) + df2 = df1.copy() + df2.oTime = pd.to_datetime(df2.oTime) + expected = df1.groupby('Key').apply(predictions).p1 + result = df2.groupby('Key').apply(predictions).p1 + tm.assert_series_equal(expected, result) + def _check_groupby(df, result, keys, field, f=lambda x: x.sum()): tups = lmap(tuple, df[keys].values) From fe15466cff9184e38ecee16639c1eefaa45c3c92 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Thu, 16 Mar 2017 12:20:37 -0400 Subject: [PATCH 209/933] CI: remove dev-scipy from testing on numpy-dev build as really old wheels (#15699) closes #15696 --- ci/requirements-3.5_NUMPY_DEV.build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/requirements-3.5_NUMPY_DEV.build.sh b/ci/requirements-3.5_NUMPY_DEV.build.sh index 91fa15491bbf7..b6c8a477e6f5e 100644 --- a/ci/requirements-3.5_NUMPY_DEV.build.sh +++ b/ci/requirements-3.5_NUMPY_DEV.build.sh @@ -8,6 +8,6 @@ echo "install numpy master wheel" pip uninstall numpy -y # install numpy wheel from master -pip install --pre --upgrade --no-index --timeout=60 --trusted-host travis-dev-wheels.scipy.org -f http://travis-dev-wheels.scipy.org/ numpy scipy +pip install --pre --upgrade --no-index --timeout=60 --trusted-host travis-dev-wheels.scipy.org -f http://travis-dev-wheels.scipy.org/ numpy true From 3cac2d5a74c50a2728e1b977e2ee6593b391c9b1 Mon Sep 17 00:00:00 2001 From: Matthew Brett Date: Thu, 16 Mar 2017 12:12:08 -0700 Subject: [PATCH 210/933] MAINT: test with manylinux numpy/scipy pre-release (#15702) Numpy switching to daily manylinux wheels of trunk, instead of building wheels specific to Ubuntu 12.04 for every commit. Use these new wheels for numpy pre-release testing. --- .travis.yml | 10 ---------- ci/requirements-3.5_NUMPY_DEV.build.sh | 3 ++- 2 files changed, 2 insertions(+), 11 deletions(-) diff --git a/.travis.yml b/.travis.yml index b0331941e2a1e..ee093e5bf0e60 100644 --- a/.travis.yml +++ b/.travis.yml @@ -123,11 +123,6 @@ matrix: - PANDAS_TESTING_MODE="deprecate" - CACHE_NAME="35_numpy_dev" - USE_CACHE=true - addons: - apt: - packages: - - libatlas-base-dev - - gfortran # In allow_failures - python: 3.5 env: @@ -167,11 +162,6 @@ matrix: - PANDAS_TESTING_MODE="deprecate" - CACHE_NAME="35_numpy_dev" - USE_CACHE=true - addons: - apt: - packages: - - libatlas-base-dev - - gfortran - python: 3.5 env: - PYTHON_VERSION=3.5 diff --git a/ci/requirements-3.5_NUMPY_DEV.build.sh b/ci/requirements-3.5_NUMPY_DEV.build.sh index b6c8a477e6f5e..4af1307f26a18 100644 --- a/ci/requirements-3.5_NUMPY_DEV.build.sh +++ b/ci/requirements-3.5_NUMPY_DEV.build.sh @@ -8,6 +8,7 @@ echo "install numpy master wheel" pip uninstall numpy -y # install numpy wheel from master -pip install --pre --upgrade --no-index --timeout=60 --trusted-host travis-dev-wheels.scipy.org -f http://travis-dev-wheels.scipy.org/ numpy +PRE_WHEELS="https://7933911d6844c6c53a7d-47bd50c35cd79bd838daf386af554a83.ssl.cf2.rackcdn.com" +pip install --pre --upgrade --timeout=60 -f $PRE_WHEELS numpy scipy true From acb9d0132bb824052adc2c13a34b88700a735a45 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Thu, 16 Mar 2017 16:46:21 -0400 Subject: [PATCH 211/933] TST: missing __init__.py file in pandas/tests/io/sas --- pandas/tests/io/sas/__init__.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 pandas/tests/io/sas/__init__.py diff --git a/pandas/tests/io/sas/__init__.py b/pandas/tests/io/sas/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d From 61f6f6333fb7bb2dedf82736aee6c9878382a06f Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Thu, 16 Mar 2017 18:28:56 -0400 Subject: [PATCH 212/933] TST: report the exit code on pandas.test() exit --- pandas/util/_tester.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/util/_tester.py b/pandas/util/_tester.py index 8d9701e0b4672..aeb4259a9edae 100644 --- a/pandas/util/_tester.py +++ b/pandas/util/_tester.py @@ -2,6 +2,7 @@ Entrypoint for testing from the top-level namespace """ import os +import sys PKG = os.path.dirname(os.path.dirname(__file__)) @@ -20,7 +21,7 @@ def test(extra_args=None): cmd = extra_args cmd += [PKG] print("running: pytest {}".format(' '.join(cmd))) - pytest.main(cmd) + sys.exit(pytest.main(cmd)) __all__ = ['test'] From d313808337cca3969ec1a323dc3c1dbc21956608 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Fri, 17 Mar 2017 08:37:10 -0400 Subject: [PATCH 213/933] CI: re-enable miniconda cache --- ci/install_travis.sh | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/ci/install_travis.sh b/ci/install_travis.sh index 12202b4ceee70..aad87ea37439f 100755 --- a/ci/install_travis.sh +++ b/ci/install_travis.sh @@ -33,20 +33,26 @@ home_dir=$(pwd) echo "[home_dir: $home_dir]" # install miniconda -echo "[Using clean Miniconda install]" - MINICONDA_DIR="$HOME/miniconda3" -if [ -d "$MINICONDA_DIR" ]; then - rm -rf "$MINICONDA_DIR" -fi -# install miniconda -if [ "${TRAVIS_OS_NAME}" == "osx" ]; then - wget http://repo.continuum.io/miniconda/Miniconda3-latest-MacOSX-x86_64.sh -O miniconda.sh || exit 1 +if [ "$USE_CACHE" ] && [ -d "$MINICONDA_DIR" ]; then + echo "[Using cached Miniconda install]" + else - wget http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh || exit 1 + echo "[Using clean Miniconda install]" + + if [ -d "$MINICONDA_DIR" ]; then + rm -rf "$MINICONDA_DIR" + fi + + # install miniconda + if [ "${TRAVIS_OS_NAME}" == "osx" ]; then + wget http://repo.continuum.io/miniconda/Miniconda3-latest-MacOSX-x86_64.sh -O miniconda.sh || exit 1 + else + wget http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh || exit 1 + fi + bash miniconda.sh -b -p "$MINICONDA_DIR" || exit 1 fi -bash miniconda.sh -b -p "$MINICONDA_DIR" || exit 1 echo "[update conda]" conda config --set ssl_verify false || exit 1 From 087c2f1143e3f67663c121c81f722b8d18029fa4 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Fri, 17 Mar 2017 08:50:27 -0400 Subject: [PATCH 214/933] TST: Replace check_package with skip_if_no_package (#15709) check_package literally just called skip_if_no_package with no additional decorations. --- pandas/tests/io/test_pytables.py | 3 ++- pandas/util/testing.py | 27 ++++++++++----------------- 2 files changed, 12 insertions(+), 18 deletions(-) diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index 5592c564e51df..8ea8088a297b8 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -728,7 +728,8 @@ def test_put_compression(self): format='fixed', complib='zlib') def test_put_compression_blosc(self): - tm.skip_if_no_package('tables', '2.2', app='blosc support') + tm.skip_if_no_package('tables', min_version='2.2', + app='blosc support') if skip_compression: pytest.skip("skipping on windows/PY3") diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 529ecef3e2d6a..154476ce8340a 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -2010,12 +2010,16 @@ def __init__(self, *args, **kwargs): dict.__init__(self, *args, **kwargs) -# Dependency checks. Copied this from Nipy/Nipype (Copyright of -# respective developers, license: BSD-3) -def package_check(pkg_name, min_version=None, max_version=None, app='pandas', - checker=LooseVersion): +# Dependency checker when running tests. +# +# Copied this from nipy/nipype +# Copyright of respective developers, License: BSD-3 +def skip_if_no_package(pkg_name, min_version=None, max_version=None, + app='pandas', checker=LooseVersion): """Check that the min/max version of the required package is installed. + If the package check fails, the test is automatically skipped. + Parameters ---------- pkg_name : string @@ -2025,11 +2029,11 @@ def package_check(pkg_name, min_version=None, max_version=None, app='pandas', max_version : string, optional Max version number for required package. app : string, optional - Application that is performing the check. For instance, the + Application that is performing the check. For instance, the name of the tutorial being executed that depends on specific packages. checker : object, optional - The class that will perform the version checking. Default is + The class that will perform the version checking. Default is distutils.version.LooseVersion. Examples @@ -2061,17 +2065,6 @@ def package_check(pkg_name, min_version=None, max_version=None, app='pandas', pytest.skip(msg) -def skip_if_no_package(*args, **kwargs): - """pytest.skip() if package_check fails - - Parameters - ---------- - *args Positional parameters passed to `package_check` - *kwargs Keyword parameters passed to `package_check` - """ - package_check(*args, **kwargs) - - def optional_args(decorator): """allows a decorator to take optional positional and keyword arguments. Assumes that taking a single, callable, positional argument means that From b69c8775b64a1d2fa5382f04b209888d989030c0 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Fri, 17 Mar 2017 08:50:55 -0400 Subject: [PATCH 215/933] DOC: Add gotcha about flake8-ing diff The `flake8`-ing the diff will not catch any import style errors. I put an alternative check that is more comprehensive but will take longer to run since you will be checking entire files instead of the diff. Author: gfyoung Closes #15712 from gfyoung/pep8-diff-gotcha and squashes the following commits: 42c13de [gfyoung] DOC: Add gotcha about flake8-ing diff --- doc/source/contributing.rst | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/doc/source/contributing.rst b/doc/source/contributing.rst index 83f99b4f01b26..7961780d0c79b 100644 --- a/doc/source/contributing.rst +++ b/doc/source/contributing.rst @@ -520,6 +520,15 @@ submitting code to run the check yourself on the diff:: git diff master | flake8 --diff +This command will catch any stylistic errors in your changes specifically, but +be beware it may not catch all of them. For example, if you delete the only +usage of an imported function, it is stylistically incorrect to import an +unused function. However, style-checking the diff will not catch this because +the actual import is not part of the diff. Thus, for completeness, you should +run this command, though it will take longer:: + + git diff master --name-only -- '*.py' | grep 'pandas' | xargs -r flake8 + Backwards Compatibility ~~~~~~~~~~~~~~~~~~~~~~~ From 3ba68a72f12dd7b1361f1a3ac60720ddb6fd7a34 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Fri, 17 Mar 2017 09:06:27 -0400 Subject: [PATCH 216/933] TST: don't catch, but supress warnings in panel4d/panelnd (#15705) --- pandas/core/categorical.py | 4 +- pandas/io/pytables.py | 18 ++- pandas/tests/io/test_pytables.py | 221 ++++++++++++------------------ pandas/tests/test_panel.py | 3 +- pandas/tests/test_panel4d.py | 187 +++++++++++++------------ pandas/tests/test_panelnd.py | 7 +- pandas/tests/tools/test_concat.py | 5 +- 7 files changed, 210 insertions(+), 235 deletions(-) diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index c1e5904693d1c..af51c7f2e2dc1 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -550,8 +550,8 @@ def _validate_categories(cls, categories, fastpath=False): # we don't allow NaNs in the categories themselves if categories.hasnans: - # NaNs in cats deprecated in 0.17, - # remove in 0.18 or 0.19 GH 10748 + # NaNs in cats deprecated in 0.17 + # GH 10748 msg = ('\nSetting NaNs in `categories` is deprecated and ' 'will be removed in a future version of pandas.') warn(msg, FutureWarning, stacklevel=3) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 72efc47a3c744..b3b253f151541 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -2094,7 +2094,17 @@ def convert(self, values, nan_rep, encoding): # we have a categorical categories = self.metadata - self.data = Categorical.from_codes(self.data.ravel(), + codes = self.data.ravel() + + # if we have stored a NaN in the categories + # then strip it; in theory we could have BOTH + # -1s in the codes and nulls :< + mask = isnull(categories) + if mask.any(): + categories = categories[~mask] + codes[codes != -1] -= mask.astype(int).cumsum().values + + self.data = Categorical.from_codes(codes, categories=categories, ordered=self.ordered) @@ -3404,10 +3414,12 @@ def create_axes(self, axes, obj, validate=True, nan_rep=None, if existing_table is not None: indexer = len(self.non_index_axes) exist_axis = existing_table.non_index_axes[indexer][1] - if append_axis != exist_axis: + if not array_equivalent(np.array(append_axis), + np.array(exist_axis)): # ahah! -> reindex - if sorted(append_axis) == sorted(exist_axis): + if array_equivalent(np.array(sorted(append_axis)), + np.array(sorted(exist_axis))): append_axis = exist_axis # the non_index_axes info diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index 8ea8088a297b8..40866b8702fe2 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -1,11 +1,12 @@ import pytest import sys import os -import warnings +from warnings import catch_warnings import tempfile from contextlib import contextmanager import datetime +from datetime import timedelta import numpy as np import pandas @@ -22,7 +23,7 @@ from pandas.io.pytables import TableIterator from pandas.io.pytables import (HDFStore, get_store, Term, read_hdf, IncompatibilityWarning, PerformanceWarning, - AttributeConflictWarning, DuplicateWarning, + AttributeConflictWarning, PossibleDataLossError, ClosedFileError) from pandas.io import pytables as pytables @@ -31,7 +32,6 @@ assert_panel_equal, assert_frame_equal, assert_series_equal, - assert_produces_warning, set_timezone) from pandas import concat, Timestamp from pandas import compat @@ -123,17 +123,6 @@ def _maybe_remove(store, key): pass -@contextmanager -def compat_assert_produces_warning(w): - """ don't produce a warning under PY3 """ - if compat.PY3: - yield - else: - with tm.assert_produces_warning(expected_warning=w, - check_stacklevel=False): - yield - - class Base(tm.TestCase): @classmethod @@ -151,8 +140,6 @@ def tearDownClass(cls): tm.set_testing_mode() def setUp(self): - warnings.filterwarnings(action='ignore', category=FutureWarning) - self.path = 'tmp.__%s__.h5' % tm.rands(10) def tearDown(self): @@ -420,9 +407,9 @@ def test_repr(self): df.loc[3:6, ['obj1']] = np.nan df = df._consolidate()._convert(datetime=True) - warnings.filterwarnings('ignore', category=PerformanceWarning) - store['df'] = df - warnings.filterwarnings('always', category=PerformanceWarning) + # PerformanceWarning + with catch_warnings(record=True): + store['df'] = df # make a random group in hdf space store._handle.create_group(store._handle.root, 'bah') @@ -455,9 +442,9 @@ def test_contains(self): self.assertNotIn('bar', store) # GH 2694 - warnings.filterwarnings( - 'ignore', category=tables.NaturalNameWarning) - store['node())'] = tm.makeDataFrame() + # tables.NaturalNameWarning + with catch_warnings(record=True): + store['node())'] = tm.makeDataFrame() self.assertIn('node())', store) def test_versioning(self): @@ -768,11 +755,8 @@ def test_put_mixed_type(self): with ensure_clean_store(self.path) as store: _maybe_remove(store, 'df') - # cannot use assert_produces_warning here for some reason - # a PendingDeprecationWarning is also raised? - warnings.filterwarnings('ignore', category=PerformanceWarning) - store.put('df', df) - warnings.filterwarnings('always', category=PerformanceWarning) + with catch_warnings(record=True): + store.put('df', df) expected = store.get('df') tm.assert_frame_equal(expected, df) @@ -797,8 +781,8 @@ def test_append(self): tm.assert_frame_equal(store['df3'], df) # this is allowed by almost always don't want to do it - with tm.assert_produces_warning( - expected_warning=tables.NaturalNameWarning): + # tables.NaturalNameWarning): + with catch_warnings(record=True): _maybe_remove(store, '/df3 foo') store.append('/df3 foo', df[:10]) store.append('/df3 foo', df[10:]) @@ -812,8 +796,7 @@ def test_append(self): assert_panel_equal(store['wp1'], wp) # ndim - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): + with catch_warnings(record=True): p4d = tm.makePanel4D() _maybe_remove(store, 'p4d') store.append('p4d', p4d.iloc[:, :, :10, :]) @@ -901,12 +884,12 @@ def test_append_series(self): # select on the values expected = ns[ns > 60] - result = store.select('ns', Term('foo>60')) + result = store.select('ns', 'foo>60') tm.assert_series_equal(result, expected) # select on the index and values expected = ns[(ns > 70) & (ns.index < 90)] - result = store.select('ns', [Term('foo>70'), Term('index<90')]) + result = store.select('ns', 'foo>70 and index<90') tm.assert_series_equal(result, expected) # multi-index @@ -1228,7 +1211,7 @@ def test_append_with_different_block_ordering(self): def test_ndim_indexables(self): # test using ndim tables in new ways - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): with ensure_clean_store(self.path) as store: p4d = tm.makePanel4D() @@ -1888,8 +1871,7 @@ def test_append_misc(self): with ensure_clean_store(self.path) as store: - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): + with catch_warnings(record=True): # unsuported data types for non-tables p4d = tm.makePanel4D() @@ -1930,7 +1912,7 @@ def check(obj, comparator): p = tm.makePanel() check(p, assert_panel_equal) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): p4d = tm.makePanel4D() check(p4d, assert_panel4d_equal) @@ -2058,8 +2040,8 @@ def test_table_values_dtypes_roundtrip(self): expected = Series({'float32': 2, 'float64': 1, 'int32': 1, 'bool': 1, 'int16': 1, 'int8': 1, 'int64': 1, 'object': 1, 'datetime64[ns]': 2}) - result.sort() - expected.sort() + result = result.sort_index() + result = expected.sort_index() tm.assert_series_equal(result, expected) def test_table_mixed_dtypes(self): @@ -2098,7 +2080,8 @@ def test_table_mixed_dtypes(self): store.append('p1_mixed', wp) assert_panel_equal(store.select('p1_mixed'), wp) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): + # ndim wp = tm.makePanel4D() wp['obj1'] = 'foo' @@ -2170,7 +2153,6 @@ def test_append_with_timedelta(self): # GH 3577 # append timedelta - from datetime import timedelta df = DataFrame(dict(A=Timestamp('20130101'), B=[Timestamp( '20130101') + timedelta(days=i, seconds=10) for i in range(10)])) df['C'] = df['A'] - df['B'] @@ -2184,12 +2166,9 @@ def test_append_with_timedelta(self): result = store.select('df') assert_frame_equal(result, df) - result = store.select('df', Term("C<100000")) + result = store.select('df', "C<100000") assert_frame_equal(result, df) - result = store.select('df', Term("C", "<", -3 * 86400)) - assert_frame_equal(result, df.iloc[3:]) - result = store.select('df', "C<'-3D'") assert_frame_equal(result, df.iloc[3:]) @@ -2432,7 +2411,7 @@ def test_invalid_terms(self): with ensure_clean_store(self.path) as store: - with compat_assert_produces_warning(FutureWarning): + with catch_warnings(record=True): df = tm.makeTimeDataFrame() df['string'] = 'foo' @@ -2490,7 +2469,7 @@ def test_terms(self): 0: tm.makeDataFrame(), 1: tm.makeDataFrame()}) - with compat_assert_produces_warning(FutureWarning): + with catch_warnings(record=True): p4d = tm.makePanel4D() store.put('p4d', p4d, format='table') @@ -2499,39 +2478,23 @@ def test_terms(self): store.put('wpneg', wpneg, format='table') # panel - result = store.select('wp', [Term( - 'major_axis<"20000108"'), Term("minor_axis=['A', 'B']")]) + result = store.select( + 'wp', "major_axis<'20000108' and minor_axis=['A', 'B']") expected = wp.truncate(after='20000108').reindex(minor=['A', 'B']) assert_panel_equal(result, expected) - # with deprecation - result = store.select('wp', [Term( - 'major_axis', '<', "20000108"), Term("minor_axis=['A', 'B']")]) - expected = wp.truncate(after='20000108').reindex(minor=['A', 'B']) - tm.assert_panel_equal(result, expected) - # p4d - with compat_assert_produces_warning(FutureWarning): + with catch_warnings(record=True): result = store.select('p4d', - [Term('major_axis<"20000108"'), - Term("minor_axis=['A', 'B']"), - Term("items=['ItemA', 'ItemB']")]) + ("major_axis<'20000108' and " + "minor_axis=['A', 'B'] and " + "items=['ItemA', 'ItemB']")) expected = p4d.truncate(after='20000108').reindex( minor=['A', 'B'], items=['ItemA', 'ItemB']) assert_panel4d_equal(result, expected) - # back compat invalid terms - terms = [dict(field='major_axis', op='>', value='20121114'), - [dict(field='major_axis', op='>', value='20121114')], - ["minor_axis=['A','B']", - dict(field='major_axis', op='>', value='20121114')]] - for t in terms: - with tm.assert_produces_warning(expected_warning=FutureWarning, - check_stacklevel=False): - Term(t) - - with compat_assert_produces_warning(FutureWarning): + with catch_warnings(record=True): # valid terms terms = [('major_axis=20121114'), @@ -2582,13 +2545,13 @@ def test_term_compat(self): minor_axis=['A', 'B', 'C', 'D']) store.append('wp', wp) - result = store.select('wp', [Term('major_axis>20000102'), - Term('minor_axis', '=', ['A', 'B'])]) + result = store.select( + 'wp', "major_axis>20000102 and minor_axis=['A', 'B']") expected = wp.loc[:, wp.major_axis > Timestamp('20000102'), ['A', 'B']] assert_panel_equal(result, expected) - store.remove('wp', Term('major_axis>20000103')) + store.remove('wp', 'major_axis>20000103') result = store.select('wp') expected = wp.loc[:, wp.major_axis <= Timestamp('20000103'), :] assert_panel_equal(result, expected) @@ -2602,25 +2565,23 @@ def test_term_compat(self): # stringified datetimes result = store.select( - 'wp', [Term('major_axis', '>', datetime.datetime(2000, 1, 2))]) + 'wp', "major_axis>datetime.datetime(2000, 1, 2)") expected = wp.loc[:, wp.major_axis > Timestamp('20000102')] assert_panel_equal(result, expected) result = store.select( - 'wp', [Term('major_axis', '>', - datetime.datetime(2000, 1, 2, 0, 0))]) + 'wp', "major_axis>datetime.datetime(2000, 1, 2, 0, 0)") expected = wp.loc[:, wp.major_axis > Timestamp('20000102')] assert_panel_equal(result, expected) result = store.select( - 'wp', [Term('major_axis', '=', - [datetime.datetime(2000, 1, 2, 0, 0), - datetime.datetime(2000, 1, 3, 0, 0)])]) + 'wp', ("major_axis=[datetime.datetime(2000, 1, 2, 0, 0), " + "datetime.datetime(2000, 1, 3, 0, 0)]")) expected = wp.loc[:, [Timestamp('20000102'), Timestamp('20000103')]] assert_panel_equal(result, expected) - result = store.select('wp', [Term('minor_axis', '=', ['A', 'B'])]) + result = store.select('wp', "minor_axis=['A', 'B']") expected = wp.loc[:, :, ['A', 'B']] assert_panel_equal(result, expected) @@ -2631,8 +2592,7 @@ def test_backwards_compat_without_term_object(self): major_axis=date_range('1/1/2000', periods=5), minor_axis=['A', 'B', 'C', 'D']) store.append('wp', wp) - with assert_produces_warning(expected_warning=FutureWarning, - check_stacklevel=False): + with catch_warnings(record=True): result = store.select('wp', [('major_axis>20000102'), ('minor_axis', '=', ['A', 'B'])]) expected = wp.loc[:, @@ -2653,24 +2613,21 @@ def test_backwards_compat_without_term_object(self): store.append('wp', wp) # stringified datetimes - with assert_produces_warning(expected_warning=FutureWarning, - check_stacklevel=False): + with catch_warnings(record=True): result = store.select('wp', [('major_axis', '>', datetime.datetime(2000, 1, 2))]) expected = wp.loc[:, wp.major_axis > Timestamp('20000102')] assert_panel_equal(result, expected) - with assert_produces_warning(expected_warning=FutureWarning, - check_stacklevel=False): + with catch_warnings(record=True): result = store.select('wp', [('major_axis', '>', datetime.datetime(2000, 1, 2, 0, 0))]) expected = wp.loc[:, wp.major_axis > Timestamp('20000102')] assert_panel_equal(result, expected) - with assert_produces_warning(expected_warning=FutureWarning, - check_stacklevel=False): + with catch_warnings(record=True): result = store.select('wp', [('major_axis', '=', @@ -2769,9 +2726,7 @@ def test_tuple_index(self): data = np.random.randn(30).reshape((3, 10)) DF = DataFrame(data, index=idx, columns=col) - expected_warning = Warning if PY35 else PerformanceWarning - with tm.assert_produces_warning(expected_warning=expected_warning, - check_stacklevel=False): + with catch_warnings(record=True): self._check_roundtrip(DF, tm.assert_frame_equal) def test_index_types(self): @@ -2783,30 +2738,23 @@ def test_index_types(self): check_index_type=True, check_series_type=True) - # nose has a deprecation warning in 3.5 - expected_warning = Warning if PY35 else PerformanceWarning - with tm.assert_produces_warning(expected_warning=expected_warning, - check_stacklevel=False): + with catch_warnings(record=True): ser = Series(values, [0, 'y']) self._check_roundtrip(ser, func) - with tm.assert_produces_warning(expected_warning=expected_warning, - check_stacklevel=False): + with catch_warnings(record=True): ser = Series(values, [datetime.datetime.today(), 0]) self._check_roundtrip(ser, func) - with tm.assert_produces_warning(expected_warning=expected_warning, - check_stacklevel=False): + with catch_warnings(record=True): ser = Series(values, ['y', 0]) self._check_roundtrip(ser, func) - with tm.assert_produces_warning(expected_warning=expected_warning, - check_stacklevel=False): + with catch_warnings(record=True): ser = Series(values, [datetime.date.today(), 'a']) self._check_roundtrip(ser, func) - with tm.assert_produces_warning(expected_warning=expected_warning, - check_stacklevel=False): + with catch_warnings(record=True): ser = Series(values, [1.23, 'b']) self._check_roundtrip(ser, func) @@ -3054,7 +3002,7 @@ def test_wide_table_dups(self): store.put('panel', wp, format='table') store.put('panel', wp, format='table', append=True) - with tm.assert_produces_warning(expected_warning=DuplicateWarning): + with catch_warnings(record=True): recons = store['panel'] assert_panel_equal(recons, wp) @@ -3648,6 +3596,7 @@ def test_retain_index_attributes(self): def test_retain_index_attributes2(self): with ensure_clean_path(self.path) as path: + expected_warning = Warning if PY35 else AttributeConflictWarning with tm.assert_produces_warning(expected_warning=expected_warning, check_stacklevel=False): @@ -3805,15 +3754,10 @@ def test_frame_select_complex2(self): hist.to_hdf(hh, 'df', mode='w', format='table') - expected = read_hdf(hh, 'df', where=Term('l1', '=', [2, 3, 4])) - - # list like - result = read_hdf(hh, 'df', where=Term( - 'l1', '=', selection.index.tolist())) - assert_frame_equal(result, expected) - l = selection.index.tolist() # noqa + expected = read_hdf(hh, 'df', where="l1=[2, 3, 4]") # sccope with list like + l = selection.index.tolist() # noqa store = HDFStore(hh) result = store.select('df', where='l1=l') assert_frame_equal(result, expected) @@ -3882,12 +3826,12 @@ def test_string_select(self): store.append('df', df, data_columns=['x']) - result = store.select('df', Term('x=none')) + result = store.select('df', 'x=none') expected = df[df.x == 'none'] assert_frame_equal(result, expected) try: - result = store.select('df', Term('x!=none')) + result = store.select('df', 'x!=none') expected = df[df.x != 'none'] assert_frame_equal(result, expected) except Exception as detail: @@ -3899,7 +3843,7 @@ def test_string_select(self): df2.loc[df2.x == '', 'x'] = np.nan store.append('df2', df2, data_columns=['x']) - result = store.select('df2', Term('x!=none')) + result = store.select('df2', 'x!=none') expected = df2[isnull(df2.x)] assert_frame_equal(result, expected) @@ -3909,11 +3853,11 @@ def test_string_select(self): store.append('df3', df, data_columns=['int']) - result = store.select('df3', Term('int=2')) + result = store.select('df3', 'int=2') expected = df[df.int == 2] assert_frame_equal(result, expected) - result = store.select('df3', Term('int!=2')) + result = store.select('df3', 'int!=2') expected = df[df.int != 2] assert_frame_equal(result, expected) @@ -4179,8 +4123,8 @@ def test_select_as_multiple(self): tm.assert_frame_equal(result, expected) # multiple (diff selector) - result = store.select_as_multiple(['df1', 'df2'], where=[Term( - 'index>df2.index[4]')], selector='df2') + result = store.select_as_multiple( + ['df1', 'df2'], where='index>df2.index[4]', selector='df2') expected = concat([df1, df2], axis=1) expected = expected[5:] tm.assert_frame_equal(result, expected) @@ -4222,13 +4166,13 @@ def test_start_stop_table(self): store.append('df', df) result = store.select( - 'df', [Term("columns=['A']")], start=0, stop=5) + 'df', "columns=['A']", start=0, stop=5) expected = df.loc[0:4, ['A']] tm.assert_frame_equal(result, expected) # out of range result = store.select( - 'df', [Term("columns=['A']")], start=30, stop=40) + 'df', "columns=['A']", start=30, stop=40) self.assertTrue(len(result) == 0) expected = df.loc[30:40, ['A']] tm.assert_frame_equal(result, expected) @@ -4288,11 +4232,11 @@ def test_select_filter_corner(self): with ensure_clean_store(self.path) as store: store.put('frame', df, format='table') - crit = Term('columns=df.columns[:75]') + crit = 'columns=df.columns[:75]' result = store.select('frame', [crit]) tm.assert_frame_equal(result, df.loc[:, df.columns[:75]]) - crit = Term('columns=df.columns[:75:2]') + crit = 'columns=df.columns[:75:2]' result = store.select('frame', [crit]) tm.assert_frame_equal(result, df.loc[:, df.columns[:75:2]]) @@ -4471,16 +4415,16 @@ def test_legacy_table_read(self): with tm.assert_produces_warning( expected_warning=IncompatibilityWarning): self.assertRaises( - Exception, store.select, 'wp1', Term('minor_axis=B')) + Exception, store.select, 'wp1', 'minor_axis=B') df2 = store.select('df2') - result = store.select('df2', Term('index>df2.index[2]')) + result = store.select('df2', 'index>df2.index[2]') expected = df2[df2.index > df2.index[2]] assert_frame_equal(expected, result) def test_legacy_0_10_read(self): # legacy from 0.10 - with compat_assert_produces_warning(FutureWarning): + with catch_warnings(record=True): path = tm.get_data_path('legacy_hdf/legacy_0.10.h5') with ensure_clean_store(path, mode='r') as store: str(store) @@ -4504,7 +4448,7 @@ def test_legacy_0_11_read(self): def test_copy(self): - with compat_assert_produces_warning(FutureWarning): + with catch_warnings(record=True): def do_copy(f=None, new_f=None, keys=None, propindexes=True, **kwargs): @@ -4646,7 +4590,8 @@ def test_unicode_index(self): unicode_values = [u('\u03c3'), u('\u03c3\u03c3')] - with compat_assert_produces_warning(PerformanceWarning): + # PerformanceWarning + with catch_warnings(record=True): s = Series(np.random.randn(len(unicode_values)), unicode_values) self._check_roundtrip(s, tm.assert_series_equal) @@ -4914,15 +4859,19 @@ def test_to_hdf_with_object_column_names(self): with self.assertRaises( ValueError, msg=("cannot have non-object label " "DataIndexableCol")): - df.to_hdf(path, 'df', format='table', data_columns=True) + with catch_warnings(record=True): + df.to_hdf(path, 'df', + format='table', + data_columns=True) for index in types_should_run: df = DataFrame(np.random.randn(10, 2), columns=index(2)) with ensure_clean_path(self.path) as path: - df.to_hdf(path, 'df', format='table', data_columns=True) - result = pd.read_hdf( - path, 'df', where="index = [{0}]".format(df.index[0])) - assert(len(result)) + with catch_warnings(record=True): + df.to_hdf(path, 'df', format='table', data_columns=True) + result = pd.read_hdf( + path, 'df', where="index = [{0}]".format(df.index[0])) + assert(len(result)) def test_read_hdf_open_store(self): # GH10330 @@ -5187,7 +5136,7 @@ def test_complex_mixed_table(self): with ensure_clean_store(self.path) as store: store.append('df', df, data_columns=['A', 'B']) - result = store.select('df', where=Term('A>2')) + result = store.select('df', where='A>2') assert_frame_equal(df.loc[df.A > 2], result) with ensure_clean_path(self.path) as path: @@ -5216,7 +5165,7 @@ def test_complex_across_dimensions(self): df = DataFrame({'A': s, 'B': s}) p = Panel({'One': df, 'Two': df}) - with compat_assert_produces_warning(FutureWarning): + with catch_warnings(record=True): p4d = pd.Panel4D({'i': p, 'ii': p}) objs = [df, p, p4d] @@ -5300,7 +5249,7 @@ def test_append_with_timezones_dateutil(self): # select with tz aware expected = df[df.A >= df.A[3]] - result = store.select('df_tz', where=Term('A>=df.A[3]')) + result = store.select('df_tz', where='A>=df.A[3]') self._compare_with_tz(result, expected) # ensure we include dates in DST and STD time here. @@ -5371,7 +5320,7 @@ def test_append_with_timezones_pytz(self): # select with tz aware self._compare_with_tz(store.select( - 'df_tz', where=Term('A>=df.A[3]')), df[df.A >= df.A[3]]) + 'df_tz', where='A>=df.A[3]'), df[df.A >= df.A[3]]) _maybe_remove(store, 'df_tz') # ensure we include dates in DST and STD time here. diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index 373f590cbf9eb..ab0322abbcf06 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- # pylint: disable=W0612,E1101 +from warnings import catch_warnings from datetime import datetime import operator @@ -1272,7 +1273,7 @@ def test_apply_slabs(self): f = lambda x: ((x.T - x.mean(1)) / x.std(1)).T # make sure that we don't trigger any warnings - with tm.assert_produces_warning(False): + with catch_warnings(record=True): result = self.panel.apply(f, axis=['items', 'major_axis']) expected = Panel(dict([(ax, f(self.panel.loc[:, :, ax])) for ax in self.panel.minor_axis])) diff --git a/pandas/tests/test_panel4d.py b/pandas/tests/test_panel4d.py index 2491bac2a7f19..c0511581cd299 100644 --- a/pandas/tests/test_panel4d.py +++ b/pandas/tests/test_panel4d.py @@ -3,7 +3,7 @@ from pandas.compat import range, lrange import operator import pytest - +from warnings import catch_warnings import numpy as np from pandas.types.common import is_float_dtype @@ -129,17 +129,21 @@ def skipna_wrapper(x): def wrapper(x): return alternative(np.asarray(x)) - for i in range(obj.ndim): - result = f(axis=i, skipna=False) - assert_panel_equal(result, obj.apply(wrapper, axis=i)) + with catch_warnings(record=True): + for i in range(obj.ndim): + result = f(axis=i, skipna=False) + expected = obj.apply(wrapper, axis=i) + assert_panel_equal(result, expected) else: skipna_wrapper = alternative wrapper = alternative - for i in range(obj.ndim): - result = f(axis=i) - if not tm._incompat_bottleneck_version(name): - assert_panel_equal(result, obj.apply(skipna_wrapper, axis=i)) + with catch_warnings(record=True): + for i in range(obj.ndim): + result = f(axis=i) + if not tm._incompat_bottleneck_version(name): + expected = obj.apply(skipna_wrapper, axis=i) + assert_panel_equal(result, expected) self.assertRaises(Exception, f, axis=obj.ndim) @@ -161,32 +165,33 @@ def test_get_axis(self): assert(self.panel4d._get_axis(3) is self.panel4d.minor_axis) def test_set_axis(self): - new_labels = Index(np.arange(len(self.panel4d.labels))) + with catch_warnings(record=True): + new_labels = Index(np.arange(len(self.panel4d.labels))) - # TODO: unused? - # new_items = Index(np.arange(len(self.panel4d.items))) + # TODO: unused? + # new_items = Index(np.arange(len(self.panel4d.items))) - new_major = Index(np.arange(len(self.panel4d.major_axis))) - new_minor = Index(np.arange(len(self.panel4d.minor_axis))) + new_major = Index(np.arange(len(self.panel4d.major_axis))) + new_minor = Index(np.arange(len(self.panel4d.minor_axis))) - # ensure propagate to potentially prior-cached items too + # ensure propagate to potentially prior-cached items too - # TODO: unused? - # label = self.panel4d['l1'] + # TODO: unused? + # label = self.panel4d['l1'] - self.panel4d.labels = new_labels + self.panel4d.labels = new_labels - if hasattr(self.panel4d, '_item_cache'): - self.assertNotIn('l1', self.panel4d._item_cache) - self.assertIs(self.panel4d.labels, new_labels) + if hasattr(self.panel4d, '_item_cache'): + self.assertNotIn('l1', self.panel4d._item_cache) + self.assertIs(self.panel4d.labels, new_labels) - self.panel4d.major_axis = new_major - self.assertIs(self.panel4d[0].major_axis, new_major) - self.assertIs(self.panel4d.major_axis, new_major) + self.panel4d.major_axis = new_major + self.assertIs(self.panel4d[0].major_axis, new_major) + self.assertIs(self.panel4d.major_axis, new_major) - self.panel4d.minor_axis = new_minor - self.assertIs(self.panel4d[0].minor_axis, new_minor) - self.assertIs(self.panel4d.minor_axis, new_minor) + self.panel4d.minor_axis = new_minor + self.assertIs(self.panel4d[0].minor_axis, new_minor) + self.assertIs(self.panel4d.minor_axis, new_minor) def test_get_axis_number(self): self.assertEqual(self.panel4d._get_axis_number('labels'), 0) @@ -201,7 +206,7 @@ def test_get_axis_name(self): self.assertEqual(self.panel4d._get_axis_name(3), 'minor_axis') def test_arith(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): self._test_op(self.panel4d, operator.add) self._test_op(self.panel4d, operator.sub) self._test_op(self.panel4d, operator.mul) @@ -233,16 +238,16 @@ def test_iteritems(self): len(self.panel4d.labels)) def test_combinePanel4d(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): result = self.panel4d.add(self.panel4d) self.assert_panel4d_equal(result, self.panel4d * 2) def test_neg(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): self.assert_panel4d_equal(-self.panel4d, self.panel4d * -1) def test_select(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): p = self.panel4d @@ -283,7 +288,7 @@ def test_get_value(self): def test_abs(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): result = self.panel4d.abs() expected = np.abs(self.panel4d) self.assert_panel4d_equal(result, expected) @@ -306,7 +311,7 @@ def test_getitem(self): def test_delitem_and_pop(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): expected = self.panel4d['l2'] result = self.panel4d.pop('l2') assert_panel_equal(expected, result) @@ -351,40 +356,38 @@ def test_delitem_and_pop(self): assert_panel_equal(panel4dc[0], panel4d[0]) def test_setitem(self): - # LongPanel with one item - # lp = self.panel.filter(['ItemA', 'ItemB']).to_frame() - # self.assertRaises(Exception, self.panel.__setitem__, - # 'ItemE', lp) + with catch_warnings(record=True): - # Panel - p = Panel(dict( - ItemA=self.panel4d['l1']['ItemA'][2:].filter(items=['A', 'B']))) - self.panel4d['l4'] = p - self.panel4d['l5'] = p + # Panel + p = Panel(dict( + ItemA=self.panel4d['l1']['ItemA'][2:].filter( + items=['A', 'B']))) + self.panel4d['l4'] = p + self.panel4d['l5'] = p - p2 = self.panel4d['l4'] + p2 = self.panel4d['l4'] - assert_panel_equal(p, p2.reindex(items=p.items, - major_axis=p.major_axis, - minor_axis=p.minor_axis)) + assert_panel_equal(p, p2.reindex(items=p.items, + major_axis=p.major_axis, + minor_axis=p.minor_axis)) - # scalar - self.panel4d['lG'] = 1 - self.panel4d['lE'] = True - self.assertEqual(self.panel4d['lG'].values.dtype, np.int64) - self.assertEqual(self.panel4d['lE'].values.dtype, np.bool_) + # scalar + self.panel4d['lG'] = 1 + self.panel4d['lE'] = True + self.assertEqual(self.panel4d['lG'].values.dtype, np.int64) + self.assertEqual(self.panel4d['lE'].values.dtype, np.bool_) - # object dtype - self.panel4d['lQ'] = 'foo' - self.assertEqual(self.panel4d['lQ'].values.dtype, np.object_) + # object dtype + self.panel4d['lQ'] = 'foo' + self.assertEqual(self.panel4d['lQ'].values.dtype, np.object_) - # boolean dtype - self.panel4d['lP'] = self.panel4d['l1'] > 0 - self.assertEqual(self.panel4d['lP'].values.dtype, np.bool_) + # boolean dtype + self.panel4d['lP'] = self.panel4d['l1'] > 0 + self.assertEqual(self.panel4d['lP'].values.dtype, np.bool_) def test_setitem_by_indexer(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): # Panel panel4dc = self.panel4d.copy() @@ -419,7 +422,7 @@ def func(): def test_setitem_by_indexer_mixed_type(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): # GH 8702 self.panel4d['foo'] = 'bar' @@ -433,7 +436,7 @@ def test_setitem_by_indexer_mixed_type(self): self.assertTrue((panel4dc.iloc[2].values == 'foo').all()) def test_comparisons(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): p1 = tm.makePanel4D() p2 = tm.makePanel4D() @@ -467,7 +470,8 @@ def test_major_xs(self): ref = self.panel4d['l1']['ItemA'] idx = self.panel4d.major_axis[5] - xs = self.panel4d.major_xs(idx) + with catch_warnings(record=True): + xs = self.panel4d.major_xs(idx) assert_series_equal(xs['l1'].T['ItemA'], ref.xs(idx), check_names=False) @@ -478,15 +482,17 @@ def test_major_xs(self): def test_major_xs_mixed(self): self.panel4d['l4'] = 'foo' - xs = self.panel4d.major_xs(self.panel4d.major_axis[0]) + with catch_warnings(record=True): + xs = self.panel4d.major_xs(self.panel4d.major_axis[0]) self.assertEqual(xs['l1']['A'].dtype, np.float64) self.assertEqual(xs['l4']['A'].dtype, np.object_) def test_minor_xs(self): ref = self.panel4d['l1']['ItemA'] - idx = self.panel4d.minor_axis[1] - xs = self.panel4d.minor_xs(idx) + with catch_warnings(record=True): + idx = self.panel4d.minor_axis[1] + xs = self.panel4d.minor_xs(idx) assert_series_equal(xs['l1'].T['ItemA'], ref[idx], check_names=False) @@ -496,7 +502,8 @@ def test_minor_xs(self): def test_minor_xs_mixed(self): self.panel4d['l4'] = 'foo' - xs = self.panel4d.minor_xs('D') + with catch_warnings(record=True): + xs = self.panel4d.minor_xs('D') self.assertEqual(xs['l1'].T['ItemA'].dtype, np.float64) self.assertEqual(xs['l4'].T['ItemA'].dtype, np.object_) @@ -512,11 +519,12 @@ def test_xs(self): # mixed-type self.panel4d['strings'] = 'foo' - result = self.panel4d.xs('D', axis=3) + with catch_warnings(record=True): + result = self.panel4d.xs('D', axis=3) self.assertIsNotNone(result.is_copy) def test_getitem_fancy_labels(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): panel4d = self.panel4d labels = panel4d.labels[[1, 0]] @@ -572,7 +580,7 @@ def test_get_value(self): def test_set_value(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): for label in self.panel4d.labels: for item in self.panel4d.items: @@ -603,13 +611,13 @@ def assert_panel4d_equal(cls, x, y): assert_panel4d_equal(x, y) def setUp(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): self.panel4d = tm.makePanel4D(nper=8) add_nans(self.panel4d) def test_constructor(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): panel4d = Panel4D(self.panel4d._data) self.assertIs(panel4d._data, self.panel4d._data) @@ -649,7 +657,7 @@ def test_constructor(self): assert_panel4d_equal(panel4d, expected) def test_constructor_cast(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): zero_filled = self.panel4d.fillna(0) casted = Panel4D(zero_filled._data, dtype=int) @@ -671,7 +679,7 @@ def test_constructor_cast(self): self.assertRaises(ValueError, Panel, data, dtype=float) def test_consolidate(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): self.assertTrue(self.panel4d._data.is_consolidated()) self.panel4d['foo'] = 1. @@ -681,7 +689,7 @@ def test_consolidate(self): self.assertTrue(panel4d._data.is_consolidated()) def test_ctor_dict(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): l1 = self.panel4d['l1'] l2 = self.panel4d['l2'] @@ -694,7 +702,7 @@ def test_ctor_dict(self): :, :]['ItemB']) def test_constructor_dict_mixed(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): data = dict((k, v.values) for k, v in self.panel4d.iteritems()) result = Panel4D(data) @@ -721,7 +729,7 @@ def test_constructor_dict_mixed(self): self.assertRaises(Exception, Panel4D, data) def test_constructor_resize(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): data = self.panel4d._data labels = self.panel4d.labels[:-1] items = self.panel4d.items[:-1] @@ -747,16 +755,19 @@ def test_constructor_resize(self): assert_panel4d_equal(result, expected) def test_conform(self): + with catch_warnings(record=True): - p = self.panel4d['l1'].filter(items=['ItemA', 'ItemB']) - conformed = self.panel4d.conform(p) + p = self.panel4d['l1'].filter(items=['ItemA', 'ItemB']) + conformed = self.panel4d.conform(p) - tm.assert_index_equal(conformed.items, self.panel4d.labels) - tm.assert_index_equal(conformed.major_axis, self.panel4d.major_axis) - tm.assert_index_equal(conformed.minor_axis, self.panel4d.minor_axis) + tm.assert_index_equal(conformed.items, self.panel4d.labels) + tm.assert_index_equal(conformed.major_axis, + self.panel4d.major_axis) + tm.assert_index_equal(conformed.minor_axis, + self.panel4d.minor_axis) def test_reindex(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): ref = self.panel4d['l2'] # labels @@ -810,14 +821,14 @@ def test_reindex(self): self.assertTrue(result is self.panel4d) def test_not_hashable(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): p4D_empty = Panel4D() self.assertRaises(TypeError, hash, p4D_empty) self.assertRaises(TypeError, hash, self.panel4d) def test_reindex_like(self): # reindex_like - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): smaller = self.panel4d.reindex(labels=self.panel4d.labels[:-1], items=self.panel4d.items[:-1], major=self.panel4d.major_axis[:-1], @@ -826,7 +837,7 @@ def test_reindex_like(self): assert_panel4d_equal(smaller, smaller_like) def test_sort_index(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): import random rlabels = list(self.panel4d.labels) @@ -844,7 +855,7 @@ def test_sort_index(self): def test_fillna(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): self.assertFalse(np.isfinite(self.panel4d.values).all()) filled = self.panel4d.fillna(0) self.assertTrue(np.isfinite(filled.values).all()) @@ -853,7 +864,7 @@ def test_fillna(self): self.panel4d.fillna, method='pad') def test_swapaxes(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): result = self.panel4d.swapaxes('labels', 'items') self.assertIs(result.items, self.panel4d.labels) @@ -880,7 +891,7 @@ def test_swapaxes(self): def test_update(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): p4d = Panel4D([[[[1.5, np.nan, 3.], [1.5, np.nan, 3.], [1.5, np.nan, 3.], @@ -913,12 +924,12 @@ def test_dtypes(self): assert_series_equal(result, expected) def test_repr_empty(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): empty = Panel4D() repr(empty) def test_rename(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): mapper = {'l1': 'foo', 'l2': 'bar', diff --git a/pandas/tests/test_panelnd.py b/pandas/tests/test_panelnd.py index 6a578d85d3ee3..7ecc773cd7bea 100644 --- a/pandas/tests/test_panelnd.py +++ b/pandas/tests/test_panelnd.py @@ -1,4 +1,5 @@ # -*- coding: utf-8 -*- +from warnings import catch_warnings from pandas.core import panelnd from pandas.core.panel import Panel @@ -13,7 +14,7 @@ def setUp(self): def test_4d_construction(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): # create a 4D Panel4D = panelnd.create_nd_panel_factory( @@ -29,7 +30,7 @@ def test_4d_construction(self): def test_4d_construction_alt(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): # create a 4D Panel4D = panelnd.create_nd_panel_factory( @@ -61,7 +62,7 @@ def test_4d_construction_error(self): def test_5d_construction(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): # create a 4D Panel4D = panelnd.create_nd_panel_factory( diff --git a/pandas/tests/tools/test_concat.py b/pandas/tests/tools/test_concat.py index 392036a99a297..c41924a7987bd 100644 --- a/pandas/tests/tools/test_concat.py +++ b/pandas/tests/tools/test_concat.py @@ -1,3 +1,4 @@ +from warnings import catch_warnings import numpy as np from numpy.random import randn @@ -1373,7 +1374,7 @@ def df(): concat([panel1, panel3], axis=1, verify_integrity=True) def test_panel4d_concat(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): p4d = tm.makePanel4D() p1 = p4d.iloc[:, :, :5, :] @@ -1389,7 +1390,7 @@ def test_panel4d_concat(self): tm.assert_panel4d_equal(result, p4d) def test_panel4d_concat_mixed_type(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): p4d = tm.makePanel4D() # if things are a bit misbehaved From de17fd9bfb170e7c8ef3c6706bbc7d2630ab893c Mon Sep 17 00:00:00 2001 From: "Christopher C. Aycock" Date: Fri, 17 Mar 2017 09:08:52 -0400 Subject: [PATCH 217/933] BUG: TZ-aware Series.where() appropriately handles default other=nan (#15701) closes #15701 Author: Christopher C. Aycock Closes #15711 from chrisaycock/GH15701 and squashes the following commits: b77f5ed [Christopher C. Aycock] BUG: TZ-aware Series.where() appropriately handles default other=nan (#15701) --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/core/internals.py | 3 ++- pandas/tests/series/test_indexing.py | 8 ++++++++ 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index a56212328f5c3..29d05ddcfb497 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -829,6 +829,7 @@ Bug Fixes - Bug in ``DataFrame.isin`` comparing datetimelike to empty frame (:issue:`15473`) - Bug in ``Series.where()`` and ``DataFrame.where()`` where array-like conditionals were being rejected (:issue:`15414`) +- Bug in ``Series.where()`` where TZ-aware data was converted to float representation (:issue:`15701`) - Bug in ``Index`` construction with ``NaN`` elements and integer dtype specified (:issue:`15187`) - Bug in ``Series`` construction with a datetimetz (:issue:`14928`) - Bug in output formatting of a ``MultiIndex`` when names are integers (:issue:`12223`, :issue:`15262`) diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 0e6c176d950a1..9db01713b05ed 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -2440,7 +2440,8 @@ def _try_coerce_args(self, values, other): if isinstance(other, bool): raise TypeError - elif is_null_datelike_scalar(other): + elif (is_null_datelike_scalar(other) or + (is_scalar(other) and isnull(other))): other = tslib.iNaT other_mask = True elif isinstance(other, self._holder): diff --git a/pandas/tests/series/test_indexing.py b/pandas/tests/series/test_indexing.py index 9d93d9f01b161..0b6c0c601ac72 100644 --- a/pandas/tests/series/test_indexing.py +++ b/pandas/tests/series/test_indexing.py @@ -1385,6 +1385,14 @@ def test_where_datetime(self): expected = Series([10, None], dtype='datetime64[ns]') assert_series_equal(rs, expected) + # GH 15701 + timestamps = ['2016-12-31 12:00:04+00:00', + '2016-12-31 12:00:04.010000+00:00'] + s = Series([pd.Timestamp(t) for t in timestamps]) + rs = s.where(Series([False, True])) + expected = Series([pd.NaT, s[1]]) + assert_series_equal(rs, expected) + def test_where_timedelta(self): s = Series([1, 2], dtype='timedelta64[ns]') expected = Series([10, 10], dtype='timedelta64[ns]') From a73e4518cf3d10fd239cdbd1be3bcda43443bf2a Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Fri, 17 Mar 2017 10:08:48 -0400 Subject: [PATCH 218/933] TST: remove rest of yield warnings (#15708) * TST: remove yield warnings from test_internals.py * TST: remove yield warnings from test_windows.py --- pandas/tests/formats/test_format.py | 38 +- pandas/tests/test_internals.py | 567 ++++++++++++++-------------- pandas/tests/test_window.py | 2 +- pandas/util/testing.py | 14 +- setup.cfg | 1 - 5 files changed, 322 insertions(+), 300 deletions(-) diff --git a/pandas/tests/formats/test_format.py b/pandas/tests/formats/test_format.py index b1f163ccf9429..44a7f2b45e759 100644 --- a/pandas/tests/formats/test_format.py +++ b/pandas/tests/formats/test_format.py @@ -1392,24 +1392,26 @@ def test_repr_html_long(self): assert u('2 columns') in long_repr def test_repr_html_float(self): - max_rows = get_option('display.max_rows') - h = max_rows - 1 - df = DataFrame({'idx': np.linspace(-10, 10, h), - 'A': np.arange(1, 1 + h), - 'B': np.arange(41, 41 + h)}).set_index('idx') - reg_repr = df._repr_html_() - assert '..' not in reg_repr - assert str(40 + h) in reg_repr - - h = max_rows + 1 - df = DataFrame({'idx': np.linspace(-10, 10, h), - 'A': np.arange(1, 1 + h), - 'B': np.arange(41, 41 + h)}).set_index('idx') - long_repr = df._repr_html_() - assert '..' in long_repr - assert '31' not in long_repr - assert u('%d rows ') % h in long_repr - assert u('2 columns') in long_repr + with option_context('display.max_rows', 60): + + max_rows = get_option('display.max_rows') + h = max_rows - 1 + df = DataFrame({'idx': np.linspace(-10, 10, h), + 'A': np.arange(1, 1 + h), + 'B': np.arange(41, 41 + h)}).set_index('idx') + reg_repr = df._repr_html_() + assert '..' not in reg_repr + assert str(40 + h) in reg_repr + + h = max_rows + 1 + df = DataFrame({'idx': np.linspace(-10, 10, h), + 'A': np.arange(1, 1 + h), + 'B': np.arange(41, 41 + h)}).set_index('idx') + long_repr = df._repr_html_() + assert '..' in long_repr + assert '31' not in long_repr + assert u('%d rows ') % h in long_repr + assert u('2 columns') in long_repr def test_repr_html_long_multiindex(self): max_rows = get_option('display.max_rows') diff --git a/pandas/tests/test_internals.py b/pandas/tests/test_internals.py index df5e843097514..29920b165d3f6 100644 --- a/pandas/tests/test_internals.py +++ b/pandas/tests/test_internals.py @@ -23,11 +23,19 @@ from pandas.compat import zip, u +@pytest.fixture +def mgr(): + return create_mgr( + 'a: f8; b: object; c: f8; d: object; e: f8;' + 'f: bool; g: i8; h: complex; i: datetime-1; j: datetime-2;' + 'k: M8[ns, US/Eastern]; l: M8[ns, CET];') + + def assert_block_equal(left, right): tm.assert_numpy_array_equal(left.values, right.values) - assert (left.dtype == right.dtype) - tm.assertIsInstance(left.mgr_locs, lib.BlockPlacement) - tm.assertIsInstance(right.mgr_locs, lib.BlockPlacement) + assert left.dtype == right.dtype + assert isinstance(left.mgr_locs, lib.BlockPlacement) + assert isinstance(right.mgr_locs, lib.BlockPlacement) tm.assert_numpy_array_equal(left.mgr_locs.as_array, right.mgr_locs.as_array) @@ -197,11 +205,11 @@ def setUp(self): def test_constructor(self): int32block = create_block('i4', [0]) - self.assertEqual(int32block.dtype, np.int32) + assert int32block.dtype == np.int32 def test_pickle(self): def _check(blk): - assert_block_equal(self.round_trip_pickle(blk), blk) + assert_block_equal(tm.round_trip_pickle(blk), blk) _check(self.fblock) _check(self.cblock) @@ -209,14 +217,14 @@ def _check(blk): _check(self.bool_block) def test_mgr_locs(self): - tm.assertIsInstance(self.fblock.mgr_locs, lib.BlockPlacement) + assert isinstance(self.fblock.mgr_locs, lib.BlockPlacement) tm.assert_numpy_array_equal(self.fblock.mgr_locs.as_array, np.array([0, 2, 4], dtype=np.int64)) def test_attrs(self): - self.assertEqual(self.fblock.shape, self.fblock.values.shape) - self.assertEqual(self.fblock.dtype, self.fblock.values.dtype) - self.assertEqual(len(self.fblock), len(self.fblock.values)) + assert self.fblock.shape == self.fblock.values.shape + assert self.fblock.dtype == self.fblock.values.dtype + assert len(self.fblock) == len(self.fblock.values) def test_merge(self): avals = randn(2, 10) @@ -251,26 +259,27 @@ def test_insert(self): def test_delete(self): newb = self.fblock.copy() newb.delete(0) - tm.assertIsInstance(newb.mgr_locs, lib.BlockPlacement) + assert isinstance(newb.mgr_locs, lib.BlockPlacement) tm.assert_numpy_array_equal(newb.mgr_locs.as_array, np.array([2, 4], dtype=np.int64)) - self.assertTrue((newb.values[0] == 1).all()) + assert (newb.values[0] == 1).all() newb = self.fblock.copy() newb.delete(1) - tm.assertIsInstance(newb.mgr_locs, lib.BlockPlacement) + assert isinstance(newb.mgr_locs, lib.BlockPlacement) tm.assert_numpy_array_equal(newb.mgr_locs.as_array, np.array([0, 4], dtype=np.int64)) - self.assertTrue((newb.values[1] == 2).all()) + assert (newb.values[1] == 2).all() newb = self.fblock.copy() newb.delete(2) tm.assert_numpy_array_equal(newb.mgr_locs.as_array, np.array([0, 2], dtype=np.int64)) - self.assertTrue((newb.values[1] == 1).all()) + assert (newb.values[1] == 1).all() newb = self.fblock.copy() - self.assertRaises(Exception, newb.delete, 3) + with pytest.raises(Exception): + newb.delete(3) def test_split_block_at(self): @@ -279,21 +288,21 @@ def test_split_block_at(self): pytest.skip("skipping for now") bs = list(self.fblock.split_block_at('a')) - self.assertEqual(len(bs), 1) - self.assertTrue(np.array_equal(bs[0].items, ['c', 'e'])) + assert len(bs) == 1 + assert np.array_equal(bs[0].items, ['c', 'e']) bs = list(self.fblock.split_block_at('c')) - self.assertEqual(len(bs), 2) - self.assertTrue(np.array_equal(bs[0].items, ['a'])) - self.assertTrue(np.array_equal(bs[1].items, ['e'])) + assert len(bs) == 2 + assert np.array_equal(bs[0].items, ['a']) + assert np.array_equal(bs[1].items, ['e']) bs = list(self.fblock.split_block_at('e')) - self.assertEqual(len(bs), 1) - self.assertTrue(np.array_equal(bs[0].items, ['a', 'c'])) + assert len(bs) == 1 + assert np.array_equal(bs[0].items, ['a', 'c']) # bblock = get_bool_ex(['f']) # bs = list(bblock.split_block_at('f')) - # self.assertEqual(len(bs), 0) + # assert len(bs), 0) class TestDatetimeBlock(tm.TestCase): @@ -303,50 +312,44 @@ def test_try_coerce_arg(self): # coerce None none_coerced = block._try_coerce_args(block.values, None)[2] - self.assertTrue(pd.Timestamp(none_coerced) is pd.NaT) + assert pd.Timestamp(none_coerced) is pd.NaT # coerce different types of date bojects vals = (np.datetime64('2010-10-10'), datetime(2010, 10, 10), date(2010, 10, 10)) for val in vals: coerced = block._try_coerce_args(block.values, val)[2] - self.assertEqual(np.int64, type(coerced)) - self.assertEqual(pd.Timestamp('2010-10-10'), pd.Timestamp(coerced)) - + assert np.int64 == type(coerced) + assert pd.Timestamp('2010-10-10') == pd.Timestamp(coerced) -class TestBlockManager(tm.TestCase): - def setUp(self): - self.mgr = create_mgr( - 'a: f8; b: object; c: f8; d: object; e: f8;' - 'f: bool; g: i8; h: complex; i: datetime-1; j: datetime-2;' - 'k: M8[ns, US/Eastern]; l: M8[ns, CET];') +class TestBlockManager(object): def test_constructor_corner(self): pass def test_attrs(self): mgr = create_mgr('a,b,c: f8-1; d,e,f: f8-2') - self.assertEqual(mgr.nblocks, 2) - self.assertEqual(len(mgr), 6) + assert mgr.nblocks == 2 + assert len(mgr) == 6 def test_is_mixed_dtype(self): - self.assertFalse(create_mgr('a,b:f8').is_mixed_type) - self.assertFalse(create_mgr('a:f8-1; b:f8-2').is_mixed_type) + assert not create_mgr('a,b:f8').is_mixed_type + assert not create_mgr('a:f8-1; b:f8-2').is_mixed_type - self.assertTrue(create_mgr('a,b:f8; c,d: f4').is_mixed_type) - self.assertTrue(create_mgr('a,b:f8; c,d: object').is_mixed_type) + assert create_mgr('a,b:f8; c,d: f4').is_mixed_type + assert create_mgr('a,b:f8; c,d: object').is_mixed_type def test_is_indexed_like(self): mgr1 = create_mgr('a,b: f8') mgr2 = create_mgr('a:i8; b:bool') mgr3 = create_mgr('a,b,c: f8') - self.assertTrue(mgr1._is_indexed_like(mgr1)) - self.assertTrue(mgr1._is_indexed_like(mgr2)) - self.assertTrue(mgr1._is_indexed_like(mgr3)) + assert mgr1._is_indexed_like(mgr1) + assert mgr1._is_indexed_like(mgr2) + assert mgr1._is_indexed_like(mgr3) - self.assertFalse(mgr1._is_indexed_like(mgr1.get_slice( - slice(-1), axis=1))) + assert not mgr1._is_indexed_like(mgr1.get_slice( + slice(-1), axis=1)) def test_duplicate_ref_loc_failure(self): tmp_mgr = create_mgr('a:bool; a: f8') @@ -355,61 +358,63 @@ def test_duplicate_ref_loc_failure(self): blocks[0].mgr_locs = np.array([0]) blocks[1].mgr_locs = np.array([0]) + # test trying to create block manager with overlapping ref locs - self.assertRaises(AssertionError, BlockManager, blocks, axes) + with pytest.raises(AssertionError): + BlockManager(blocks, axes) blocks[0].mgr_locs = np.array([0]) blocks[1].mgr_locs = np.array([1]) mgr = BlockManager(blocks, axes) mgr.iget(1) - def test_contains(self): - self.assertIn('a', self.mgr) - self.assertNotIn('baz', self.mgr) + def test_contains(self, mgr): + assert 'a' in mgr + assert 'baz' not in mgr - def test_pickle(self): + def test_pickle(self, mgr): - mgr2 = self.round_trip_pickle(self.mgr) - assert_frame_equal(DataFrame(self.mgr), DataFrame(mgr2)) + mgr2 = tm.round_trip_pickle(mgr) + assert_frame_equal(DataFrame(mgr), DataFrame(mgr2)) # share ref_items # self.assertIs(mgr2.blocks[0].ref_items, mgr2.blocks[1].ref_items) # GH2431 - self.assertTrue(hasattr(mgr2, "_is_consolidated")) - self.assertTrue(hasattr(mgr2, "_known_consolidated")) + assert hasattr(mgr2, "_is_consolidated") + assert hasattr(mgr2, "_known_consolidated") # reset to False on load - self.assertFalse(mgr2._is_consolidated) - self.assertFalse(mgr2._known_consolidated) + assert not mgr2._is_consolidated + assert not mgr2._known_consolidated def test_non_unique_pickle(self): mgr = create_mgr('a,a,a:f8') - mgr2 = self.round_trip_pickle(mgr) + mgr2 = tm.round_trip_pickle(mgr) assert_frame_equal(DataFrame(mgr), DataFrame(mgr2)) mgr = create_mgr('a: f8; a: i8') - mgr2 = self.round_trip_pickle(mgr) + mgr2 = tm.round_trip_pickle(mgr) assert_frame_equal(DataFrame(mgr), DataFrame(mgr2)) def test_categorical_block_pickle(self): mgr = create_mgr('a: category') - mgr2 = self.round_trip_pickle(mgr) + mgr2 = tm.round_trip_pickle(mgr) assert_frame_equal(DataFrame(mgr), DataFrame(mgr2)) smgr = create_single_mgr('category') - smgr2 = self.round_trip_pickle(smgr) + smgr2 = tm.round_trip_pickle(smgr) assert_series_equal(Series(smgr), Series(smgr2)) - def test_get_scalar(self): - for item in self.mgr.items: - for i, index in enumerate(self.mgr.axes[1]): - res = self.mgr.get_scalar((item, index)) - exp = self.mgr.get(item, fastpath=False)[i] - self.assertEqual(res, exp) - exp = self.mgr.get(item).internal_values()[i] - self.assertEqual(res, exp) + def test_get_scalar(self, mgr): + for item in mgr.items: + for i, index in enumerate(mgr.axes[1]): + res = mgr.get_scalar((item, index)) + exp = mgr.get(item, fastpath=False)[i] + assert res == exp + exp = mgr.get(item).internal_values()[i] + assert res == exp def test_get(self): cols = Index(list('abc')) @@ -438,30 +443,21 @@ def test_set(self): tm.assert_numpy_array_equal(mgr.get('d').internal_values(), np.array(['foo'] * 3, dtype=np.object_)) - def test_insert(self): - self.mgr.insert(0, 'inserted', np.arange(N)) - - self.assertEqual(self.mgr.items[0], 'inserted') - assert_almost_equal(self.mgr.get('inserted'), np.arange(N)) + def test_set_change_dtype(self, mgr): + mgr.set('baz', np.zeros(N, dtype=bool)) - for blk in self.mgr.blocks: - yield self.assertIs, self.mgr.items, blk.ref_items + mgr.set('baz', np.repeat('foo', N)) + assert mgr.get('baz').dtype == np.object_ - def test_set_change_dtype(self): - self.mgr.set('baz', np.zeros(N, dtype=bool)) - - self.mgr.set('baz', np.repeat('foo', N)) - self.assertEqual(self.mgr.get('baz').dtype, np.object_) - - mgr2 = self.mgr.consolidate() + mgr2 = mgr.consolidate() mgr2.set('baz', np.repeat('foo', N)) - self.assertEqual(mgr2.get('baz').dtype, np.object_) + assert mgr2.get('baz').dtype == np.object_ mgr2.set('quux', randn(N).astype(int)) - self.assertEqual(mgr2.get('quux').dtype, np.int_) + assert mgr2.get('quux').dtype == np.int_ mgr2.set('quux', randn(N)) - self.assertEqual(mgr2.get('quux').dtype, np.float_) + assert mgr2.get('quux').dtype == np.float_ def test_set_change_dtype_slice(self): # GH8850 cols = MultiIndex.from_tuples([('1st', 'a'), ('2nd', 'b'), ('3rd', 'c') @@ -469,70 +465,69 @@ def test_set_change_dtype_slice(self): # GH8850 df = DataFrame([[1.0, 2, 3], [4.0, 5, 6]], columns=cols) df['2nd'] = df['2nd'] * 2.0 - self.assertEqual(sorted(df.blocks.keys()), ['float64', 'int64']) + assert sorted(df.blocks.keys()) == ['float64', 'int64'] assert_frame_equal(df.blocks['float64'], DataFrame( [[1.0, 4.0], [4.0, 10.0]], columns=cols[:2])) assert_frame_equal(df.blocks['int64'], DataFrame( [[3], [6]], columns=cols[2:])) - def test_copy(self): - cp = self.mgr.copy(deep=False) - for blk, cp_blk in zip(self.mgr.blocks, cp.blocks): + def test_copy(self, mgr): + cp = mgr.copy(deep=False) + for blk, cp_blk in zip(mgr.blocks, cp.blocks): # view assertion - self.assertTrue(cp_blk.equals(blk)) - self.assertTrue(cp_blk.values.base is blk.values.base) + assert cp_blk.equals(blk) + assert cp_blk.values.base is blk.values.base - cp = self.mgr.copy(deep=True) - for blk, cp_blk in zip(self.mgr.blocks, cp.blocks): + cp = mgr.copy(deep=True) + for blk, cp_blk in zip(mgr.blocks, cp.blocks): # copy assertion we either have a None for a base or in case of # some blocks it is an array (e.g. datetimetz), but was copied - self.assertTrue(cp_blk.equals(blk)) + assert cp_blk.equals(blk) if cp_blk.values.base is not None and blk.values.base is not None: - self.assertFalse(cp_blk.values.base is blk.values.base) + assert cp_blk.values.base is not blk.values.base else: - self.assertTrue(cp_blk.values.base is None and blk.values.base - is None) + assert cp_blk.values.base is None and blk.values.base is None def test_sparse(self): mgr = create_mgr('a: sparse-1; b: sparse-2') # what to test here? - self.assertEqual(mgr.as_matrix().dtype, np.float64) + assert mgr.as_matrix().dtype == np.float64 def test_sparse_mixed(self): mgr = create_mgr('a: sparse-1; b: sparse-2; c: f8') - self.assertEqual(len(mgr.blocks), 3) - self.assertIsInstance(mgr, BlockManager) + assert len(mgr.blocks) == 3 + assert isinstance(mgr, BlockManager) # what to test here? def test_as_matrix_float(self): mgr = create_mgr('c: f4; d: f2; e: f8') - self.assertEqual(mgr.as_matrix().dtype, np.float64) + assert mgr.as_matrix().dtype == np.float64 mgr = create_mgr('c: f4; d: f2') - self.assertEqual(mgr.as_matrix().dtype, np.float32) + assert mgr.as_matrix().dtype == np.float32 def test_as_matrix_int_bool(self): mgr = create_mgr('a: bool-1; b: bool-2') - self.assertEqual(mgr.as_matrix().dtype, np.bool_) + assert mgr.as_matrix().dtype == np.bool_ mgr = create_mgr('a: i8-1; b: i8-2; c: i4; d: i2; e: u1') - self.assertEqual(mgr.as_matrix().dtype, np.int64) + assert mgr.as_matrix().dtype == np.int64 mgr = create_mgr('c: i4; d: i2; e: u1') - self.assertEqual(mgr.as_matrix().dtype, np.int32) + assert mgr.as_matrix().dtype == np.int32 def test_as_matrix_datetime(self): mgr = create_mgr('h: datetime-1; g: datetime-2') - self.assertEqual(mgr.as_matrix().dtype, 'M8[ns]') + assert mgr.as_matrix().dtype == 'M8[ns]' def test_as_matrix_datetime_tz(self): mgr = create_mgr('h: M8[ns, US/Eastern]; g: M8[ns, CET]') - self.assertEqual(mgr.get('h').dtype, 'datetime64[ns, US/Eastern]') - self.assertEqual(mgr.get('g').dtype, 'datetime64[ns, CET]') - self.assertEqual(mgr.as_matrix().dtype, 'object') + assert mgr.get('h').dtype == 'datetime64[ns, US/Eastern]' + assert mgr.get('g').dtype == 'datetime64[ns, CET]' + assert mgr.as_matrix().dtype == 'object' def test_astype(self): # coerce all @@ -540,9 +535,9 @@ def test_astype(self): for t in ['float16', 'float32', 'float64', 'int32', 'int64']: t = np.dtype(t) tmgr = mgr.astype(t) - self.assertEqual(tmgr.get('c').dtype.type, t) - self.assertEqual(tmgr.get('d').dtype.type, t) - self.assertEqual(tmgr.get('e').dtype.type, t) + assert tmgr.get('c').dtype.type == t + assert tmgr.get('d').dtype.type == t + assert tmgr.get('e').dtype.type == t # mixed mgr = create_mgr('a,b: object; c: bool; d: datetime;' @@ -550,24 +545,24 @@ def test_astype(self): for t in ['float16', 'float32', 'float64', 'int32', 'int64']: t = np.dtype(t) tmgr = mgr.astype(t, errors='ignore') - self.assertEqual(tmgr.get('c').dtype.type, t) - self.assertEqual(tmgr.get('e').dtype.type, t) - self.assertEqual(tmgr.get('f').dtype.type, t) - self.assertEqual(tmgr.get('g').dtype.type, t) + assert tmgr.get('c').dtype.type == t + assert tmgr.get('e').dtype.type == t + assert tmgr.get('f').dtype.type == t + assert tmgr.get('g').dtype.type == t - self.assertEqual(tmgr.get('a').dtype.type, np.object_) - self.assertEqual(tmgr.get('b').dtype.type, np.object_) + assert tmgr.get('a').dtype.type == np.object_ + assert tmgr.get('b').dtype.type == np.object_ if t != np.int64: - self.assertEqual(tmgr.get('d').dtype.type, np.datetime64) + assert tmgr.get('d').dtype.type == np.datetime64 else: - self.assertEqual(tmgr.get('d').dtype.type, t) + assert tmgr.get('d').dtype.type == t def test_convert(self): def _compare(old_mgr, new_mgr): """ compare the blocks, numeric compare ==, object don't """ old_blocks = set(old_mgr.blocks) new_blocks = set(new_mgr.blocks) - self.assertEqual(len(old_blocks), len(new_blocks)) + assert len(old_blocks) == len(new_blocks) # compare non-numeric for b in old_blocks: @@ -576,7 +571,7 @@ def _compare(old_mgr, new_mgr): if (b.values == nb.values).all(): found = True break - self.assertTrue(found) + assert found for b in new_blocks: found = False @@ -584,7 +579,7 @@ def _compare(old_mgr, new_mgr): if (b.values == ob.values).all(): found = True break - self.assertTrue(found) + assert found # noops mgr = create_mgr('f: i8; g: f8') @@ -601,11 +596,11 @@ def _compare(old_mgr, new_mgr): mgr.set('b', np.array(['2.'] * N, dtype=np.object_)) mgr.set('foo', np.array(['foo.'] * N, dtype=np.object_)) new_mgr = mgr.convert(numeric=True) - self.assertEqual(new_mgr.get('a').dtype, np.int64) - self.assertEqual(new_mgr.get('b').dtype, np.float64) - self.assertEqual(new_mgr.get('foo').dtype, np.object_) - self.assertEqual(new_mgr.get('f').dtype, np.int64) - self.assertEqual(new_mgr.get('g').dtype, np.float64) + assert new_mgr.get('a').dtype == np.int64 + assert new_mgr.get('b').dtype == np.float64 + assert new_mgr.get('foo').dtype == np.object_ + assert new_mgr.get('f').dtype == np.int64 + assert new_mgr.get('g').dtype == np.float64 mgr = create_mgr('a,b,foo: object; f: i4; bool: bool; dt: datetime;' 'i: i8; g: f8; h: f2') @@ -613,15 +608,15 @@ def _compare(old_mgr, new_mgr): mgr.set('b', np.array(['2.'] * N, dtype=np.object_)) mgr.set('foo', np.array(['foo.'] * N, dtype=np.object_)) new_mgr = mgr.convert(numeric=True) - self.assertEqual(new_mgr.get('a').dtype, np.int64) - self.assertEqual(new_mgr.get('b').dtype, np.float64) - self.assertEqual(new_mgr.get('foo').dtype, np.object_) - self.assertEqual(new_mgr.get('f').dtype, np.int32) - self.assertEqual(new_mgr.get('bool').dtype, np.bool_) - self.assertEqual(new_mgr.get('dt').dtype.type, np.datetime64) - self.assertEqual(new_mgr.get('i').dtype, np.int64) - self.assertEqual(new_mgr.get('g').dtype, np.float64) - self.assertEqual(new_mgr.get('h').dtype, np.float16) + assert new_mgr.get('a').dtype == np.int64 + assert new_mgr.get('b').dtype == np.float64 + assert new_mgr.get('foo').dtype == np.object_ + assert new_mgr.get('f').dtype == np.int32 + assert new_mgr.get('bool').dtype == np.bool_ + assert new_mgr.get('dt').dtype.type, np.datetime64 + assert new_mgr.get('i').dtype == np.int64 + assert new_mgr.get('g').dtype == np.float64 + assert new_mgr.get('h').dtype == np.float16 def test_interleave(self): @@ -629,49 +624,49 @@ def test_interleave(self): for dtype in ['f8', 'i8', 'object', 'bool', 'complex', 'M8[ns]', 'm8[ns]']: mgr = create_mgr('a: {0}'.format(dtype)) - self.assertEqual(mgr.as_matrix().dtype, dtype) + assert mgr.as_matrix().dtype == dtype mgr = create_mgr('a: {0}; b: {0}'.format(dtype)) - self.assertEqual(mgr.as_matrix().dtype, dtype) + assert mgr.as_matrix().dtype == dtype # will be converted according the actual dtype of the underlying mgr = create_mgr('a: category') - self.assertEqual(mgr.as_matrix().dtype, 'i8') + assert mgr.as_matrix().dtype == 'i8' mgr = create_mgr('a: category; b: category') - self.assertEqual(mgr.as_matrix().dtype, 'i8'), + assert mgr.as_matrix().dtype == 'i8' mgr = create_mgr('a: category; b: category2') - self.assertEqual(mgr.as_matrix().dtype, 'object') + assert mgr.as_matrix().dtype == 'object' mgr = create_mgr('a: category2') - self.assertEqual(mgr.as_matrix().dtype, 'object') + assert mgr.as_matrix().dtype == 'object' mgr = create_mgr('a: category2; b: category2') - self.assertEqual(mgr.as_matrix().dtype, 'object') + assert mgr.as_matrix().dtype == 'object' # combinations mgr = create_mgr('a: f8') - self.assertEqual(mgr.as_matrix().dtype, 'f8') + assert mgr.as_matrix().dtype == 'f8' mgr = create_mgr('a: f8; b: i8') - self.assertEqual(mgr.as_matrix().dtype, 'f8') + assert mgr.as_matrix().dtype == 'f8' mgr = create_mgr('a: f4; b: i8') - self.assertEqual(mgr.as_matrix().dtype, 'f8') + assert mgr.as_matrix().dtype == 'f8' mgr = create_mgr('a: f4; b: i8; d: object') - self.assertEqual(mgr.as_matrix().dtype, 'object') + assert mgr.as_matrix().dtype == 'object' mgr = create_mgr('a: bool; b: i8') - self.assertEqual(mgr.as_matrix().dtype, 'object') + assert mgr.as_matrix().dtype == 'object' mgr = create_mgr('a: complex') - self.assertEqual(mgr.as_matrix().dtype, 'complex') + assert mgr.as_matrix().dtype == 'complex' mgr = create_mgr('a: f8; b: category') - self.assertEqual(mgr.as_matrix().dtype, 'object') + assert mgr.as_matrix().dtype == 'object' mgr = create_mgr('a: M8[ns]; b: category') - self.assertEqual(mgr.as_matrix().dtype, 'object') + assert mgr.as_matrix().dtype == 'object' mgr = create_mgr('a: M8[ns]; b: bool') - self.assertEqual(mgr.as_matrix().dtype, 'object') + assert mgr.as_matrix().dtype == 'object' mgr = create_mgr('a: M8[ns]; b: i8') - self.assertEqual(mgr.as_matrix().dtype, 'object') + assert mgr.as_matrix().dtype == 'object' mgr = create_mgr('a: m8[ns]; b: bool') - self.assertEqual(mgr.as_matrix().dtype, 'object') + assert mgr.as_matrix().dtype == 'object' mgr = create_mgr('a: m8[ns]; b: i8') - self.assertEqual(mgr.as_matrix().dtype, 'object') + assert mgr.as_matrix().dtype == 'object' mgr = create_mgr('a: M8[ns]; b: m8[ns]') - self.assertEqual(mgr.as_matrix().dtype, 'object') + assert mgr.as_matrix().dtype == 'object' def test_interleave_non_unique_cols(self): df = DataFrame([ @@ -682,26 +677,26 @@ def test_interleave_non_unique_cols(self): df_unique = df.copy() df_unique.columns = ['x', 'y'] - self.assertEqual(df_unique.values.shape, df.values.shape) + assert df_unique.values.shape == df.values.shape tm.assert_numpy_array_equal(df_unique.values[0], df.values[0]) tm.assert_numpy_array_equal(df_unique.values[1], df.values[1]) def test_consolidate(self): pass - def test_consolidate_ordering_issues(self): - self.mgr.set('f', randn(N)) - self.mgr.set('d', randn(N)) - self.mgr.set('b', randn(N)) - self.mgr.set('g', randn(N)) - self.mgr.set('h', randn(N)) - - # we have datetime/tz blocks in self.mgr - cons = self.mgr.consolidate() - self.assertEqual(cons.nblocks, 4) - cons = self.mgr.consolidate().get_numeric_data() - self.assertEqual(cons.nblocks, 1) - tm.assertIsInstance(cons.blocks[0].mgr_locs, lib.BlockPlacement) + def test_consolidate_ordering_issues(self, mgr): + mgr.set('f', randn(N)) + mgr.set('d', randn(N)) + mgr.set('b', randn(N)) + mgr.set('g', randn(N)) + mgr.set('h', randn(N)) + + # we have datetime/tz blocks in mgr + cons = mgr.consolidate() + assert cons.nblocks == 4 + cons = mgr.consolidate().get_numeric_data() + assert cons.nblocks == 1 + assert isinstance(cons.blocks[0].mgr_locs, lib.BlockPlacement) tm.assert_numpy_array_equal(cons.blocks[0].mgr_locs.as_array, np.arange(len(cons.items), dtype=np.int64)) @@ -714,7 +709,7 @@ def test_reindex_items(self): 'f: bool; g: f8-2') reindexed = mgr.reindex_axis(['g', 'c', 'a', 'd'], axis=0) - self.assertEqual(reindexed.nblocks, 2) + assert reindexed.nblocks == 2 tm.assert_index_equal(reindexed.items, pd.Index(['g', 'c', 'a', 'd'])) assert_almost_equal( mgr.get('g', fastpath=False), reindexed.get('g', fastpath=False)) @@ -748,9 +743,9 @@ def test_multiindex_xs(self): mgr.set_axis(1, index) result = mgr.xs('bar', axis=1) - self.assertEqual(result.shape, (6, 2)) - self.assertEqual(result.axes[1][0], ('bar', 'one')) - self.assertEqual(result.axes[1][1], ('bar', 'two')) + assert result.shape == (6, 2) + assert result.axes[1][0] == ('bar', 'one') + assert result.axes[1][1] == ('bar', 'two') def test_get_numeric_data(self): mgr = create_mgr('int: int; float: float; complex: complex;' @@ -826,11 +821,11 @@ def test_equals(self): # unique items bm1 = create_mgr('a,b,c: i8-1; d,e,f: i8-2') bm2 = BlockManager(bm1.blocks[::-1], bm1.axes) - self.assertTrue(bm1.equals(bm2)) + assert bm1.equals(bm2) bm1 = create_mgr('a,a,a: i8-1; b,b,b: i8-2') bm2 = BlockManager(bm1.blocks[::-1], bm1.axes) - self.assertTrue(bm1.equals(bm2)) + assert bm1.equals(bm2) def test_equals_block_order_different_dtypes(self): # GH 9330 @@ -848,19 +843,19 @@ def test_equals_block_order_different_dtypes(self): block_perms = itertools.permutations(bm.blocks) for bm_perm in block_perms: bm_this = BlockManager(bm_perm, bm.axes) - self.assertTrue(bm.equals(bm_this)) - self.assertTrue(bm_this.equals(bm)) + assert bm.equals(bm_this) + assert bm_this.equals(bm) def test_single_mgr_ctor(self): mgr = create_single_mgr('f8', num_rows=5) - self.assertEqual(mgr.as_matrix().tolist(), [0., 1., 2., 3., 4.]) + assert mgr.as_matrix().tolist() == [0., 1., 2., 3., 4.] def test_validate_bool_args(self): invalid_values = [1, "True", [1, 2, 3], 5.0] bm1 = create_mgr('a,b,c: i8-1; d,e,f: i8-2') for value in invalid_values: - with self.assertRaises(ValueError): + with pytest.raises(ValueError): bm1.replace_list([1], [2], inplace=value) @@ -918,32 +913,37 @@ def assert_slice_ok(mgr, axis, slobj): for mgr in self.MANAGERS: for ax in range(mgr.ndim): # slice - yield assert_slice_ok, mgr, ax, slice(None) - yield assert_slice_ok, mgr, ax, slice(3) - yield assert_slice_ok, mgr, ax, slice(100) - yield assert_slice_ok, mgr, ax, slice(1, 4) - yield assert_slice_ok, mgr, ax, slice(3, 0, -2) + assert_slice_ok(mgr, ax, slice(None)) + assert_slice_ok(mgr, ax, slice(3)) + assert_slice_ok(mgr, ax, slice(100)) + assert_slice_ok(mgr, ax, slice(1, 4)) + assert_slice_ok(mgr, ax, slice(3, 0, -2)) # boolean mask - yield assert_slice_ok, mgr, ax, np.array([], dtype=np.bool_) - yield (assert_slice_ok, mgr, ax, - np.ones(mgr.shape[ax], dtype=np.bool_)) - yield (assert_slice_ok, mgr, ax, - np.zeros(mgr.shape[ax], dtype=np.bool_)) + assert_slice_ok( + mgr, ax, np.array([], dtype=np.bool_)) + assert_slice_ok( + mgr, ax, + np.ones(mgr.shape[ax], dtype=np.bool_)) + assert_slice_ok( + mgr, ax, + np.zeros(mgr.shape[ax], dtype=np.bool_)) if mgr.shape[ax] >= 3: - yield (assert_slice_ok, mgr, ax, - np.arange(mgr.shape[ax]) % 3 == 0) - yield (assert_slice_ok, mgr, ax, np.array( - [True, True, False], dtype=np.bool_)) + assert_slice_ok( + mgr, ax, + np.arange(mgr.shape[ax]) % 3 == 0) + assert_slice_ok( + mgr, ax, np.array( + [True, True, False], dtype=np.bool_)) # fancy indexer - yield assert_slice_ok, mgr, ax, [] - yield assert_slice_ok, mgr, ax, lrange(mgr.shape[ax]) + assert_slice_ok(mgr, ax, []) + assert_slice_ok(mgr, ax, lrange(mgr.shape[ax])) if mgr.shape[ax] >= 3: - yield assert_slice_ok, mgr, ax, [0, 1, 2] - yield assert_slice_ok, mgr, ax, [-1, -2, -3] + assert_slice_ok(mgr, ax, [0, 1, 2]) + assert_slice_ok(mgr, ax, [-1, -2, -3]) def test_take(self): def assert_take_ok(mgr, axis, indexer): @@ -957,13 +957,13 @@ def assert_take_ok(mgr, axis, indexer): for mgr in self.MANAGERS: for ax in range(mgr.ndim): # take/fancy indexer - yield assert_take_ok, mgr, ax, [] - yield assert_take_ok, mgr, ax, [0, 0, 0] - yield assert_take_ok, mgr, ax, lrange(mgr.shape[ax]) + assert_take_ok(mgr, ax, []) + assert_take_ok(mgr, ax, [0, 0, 0]) + assert_take_ok(mgr, ax, lrange(mgr.shape[ax])) if mgr.shape[ax] >= 3: - yield assert_take_ok, mgr, ax, [0, 1, 2] - yield assert_take_ok, mgr, ax, [-1, -2, -3] + assert_take_ok(mgr, ax, [0, 1, 2]) + assert_take_ok(mgr, ax, [-1, -2, -3]) def test_reindex_axis(self): def assert_reindex_axis_is_ok(mgr, axis, new_labels, fill_value): @@ -981,25 +981,33 @@ def assert_reindex_axis_is_ok(mgr, axis, new_labels, fill_value): for mgr in self.MANAGERS: for ax in range(mgr.ndim): for fill_value in (None, np.nan, 100.): - yield (assert_reindex_axis_is_ok, mgr, ax, - pd.Index([]), fill_value) - yield (assert_reindex_axis_is_ok, mgr, ax, mgr.axes[ax], - fill_value) - yield (assert_reindex_axis_is_ok, mgr, ax, - mgr.axes[ax][[0, 0, 0]], fill_value) - yield (assert_reindex_axis_is_ok, mgr, ax, - pd.Index(['foo', 'bar', 'baz']), fill_value) - yield (assert_reindex_axis_is_ok, mgr, ax, - pd.Index(['foo', mgr.axes[ax][0], 'baz']), - fill_value) + assert_reindex_axis_is_ok( + mgr, ax, + pd.Index([]), fill_value) + assert_reindex_axis_is_ok( + mgr, ax, mgr.axes[ax], + fill_value) + assert_reindex_axis_is_ok( + mgr, ax, + mgr.axes[ax][[0, 0, 0]], fill_value) + assert_reindex_axis_is_ok( + mgr, ax, + pd.Index(['foo', 'bar', 'baz']), fill_value) + assert_reindex_axis_is_ok( + mgr, ax, + pd.Index(['foo', mgr.axes[ax][0], 'baz']), + fill_value) if mgr.shape[ax] >= 3: - yield (assert_reindex_axis_is_ok, mgr, ax, - mgr.axes[ax][:-3], fill_value) - yield (assert_reindex_axis_is_ok, mgr, ax, - mgr.axes[ax][-3::-1], fill_value) - yield (assert_reindex_axis_is_ok, mgr, ax, - mgr.axes[ax][[0, 1, 2, 0, 1, 2]], fill_value) + assert_reindex_axis_is_ok( + mgr, ax, + mgr.axes[ax][:-3], fill_value) + assert_reindex_axis_is_ok( + mgr, ax, + mgr.axes[ax][-3::-1], fill_value) + assert_reindex_axis_is_ok( + mgr, ax, + mgr.axes[ax][[0, 1, 2, 0, 1, 2]], fill_value) def test_reindex_indexer(self): @@ -1018,33 +1026,41 @@ def assert_reindex_indexer_is_ok(mgr, axis, new_labels, indexer, for mgr in self.MANAGERS: for ax in range(mgr.ndim): for fill_value in (None, np.nan, 100.): - yield (assert_reindex_indexer_is_ok, mgr, ax, - pd.Index([]), [], fill_value) - yield (assert_reindex_indexer_is_ok, mgr, ax, - mgr.axes[ax], np.arange(mgr.shape[ax]), fill_value) - yield (assert_reindex_indexer_is_ok, mgr, ax, - pd.Index(['foo'] * mgr.shape[ax]), - np.arange(mgr.shape[ax]), fill_value) - - yield (assert_reindex_indexer_is_ok, mgr, ax, - mgr.axes[ax][::-1], np.arange(mgr.shape[ax]), - fill_value) - yield (assert_reindex_indexer_is_ok, mgr, ax, mgr.axes[ax], - np.arange(mgr.shape[ax])[::-1], fill_value) - yield (assert_reindex_indexer_is_ok, mgr, ax, - pd.Index(['foo', 'bar', 'baz']), - [0, 0, 0], fill_value) - yield (assert_reindex_indexer_is_ok, mgr, ax, - pd.Index(['foo', 'bar', 'baz']), - [-1, 0, -1], fill_value) - yield (assert_reindex_indexer_is_ok, mgr, ax, - pd.Index(['foo', mgr.axes[ax][0], 'baz']), - [-1, -1, -1], fill_value) + assert_reindex_indexer_is_ok( + mgr, ax, + pd.Index([]), [], fill_value) + assert_reindex_indexer_is_ok( + mgr, ax, + mgr.axes[ax], np.arange(mgr.shape[ax]), fill_value) + assert_reindex_indexer_is_ok( + mgr, ax, + pd.Index(['foo'] * mgr.shape[ax]), + np.arange(mgr.shape[ax]), fill_value) + assert_reindex_indexer_is_ok( + mgr, ax, + mgr.axes[ax][::-1], np.arange(mgr.shape[ax]), + fill_value) + assert_reindex_indexer_is_ok( + mgr, ax, mgr.axes[ax], + np.arange(mgr.shape[ax])[::-1], fill_value) + assert_reindex_indexer_is_ok( + mgr, ax, + pd.Index(['foo', 'bar', 'baz']), + [0, 0, 0], fill_value) + assert_reindex_indexer_is_ok( + mgr, ax, + pd.Index(['foo', 'bar', 'baz']), + [-1, 0, -1], fill_value) + assert_reindex_indexer_is_ok( + mgr, ax, + pd.Index(['foo', mgr.axes[ax][0], 'baz']), + [-1, -1, -1], fill_value) if mgr.shape[ax] >= 3: - yield (assert_reindex_indexer_is_ok, mgr, ax, - pd.Index(['foo', 'bar', 'baz']), - [0, 1, 2], fill_value) + assert_reindex_indexer_is_ok( + mgr, ax, + pd.Index(['foo', 'bar', 'baz']), + [0, 1, 2], fill_value) # test_get_slice(slice_like, axis) # take(indexer, axis) @@ -1055,21 +1071,23 @@ def assert_reindex_indexer_is_ok(mgr, axis, new_labels, indexer, class TestBlockPlacement(tm.TestCase): def test_slice_len(self): - self.assertEqual(len(BlockPlacement(slice(0, 4))), 4) - self.assertEqual(len(BlockPlacement(slice(0, 4, 2))), 2) - self.assertEqual(len(BlockPlacement(slice(0, 3, 2))), 2) + assert len(BlockPlacement(slice(0, 4))) == 4 + assert len(BlockPlacement(slice(0, 4, 2))) == 2 + assert len(BlockPlacement(slice(0, 3, 2))) == 2 - self.assertEqual(len(BlockPlacement(slice(0, 1, 2))), 1) - self.assertEqual(len(BlockPlacement(slice(1, 0, -1))), 1) + assert len(BlockPlacement(slice(0, 1, 2))) == 1 + assert len(BlockPlacement(slice(1, 0, -1))) == 1 def test_zero_step_raises(self): - self.assertRaises(ValueError, BlockPlacement, slice(1, 1, 0)) - self.assertRaises(ValueError, BlockPlacement, slice(1, 2, 0)) + with pytest.raises(ValueError): + BlockPlacement(slice(1, 1, 0)) + with pytest.raises(ValueError): + BlockPlacement(slice(1, 2, 0)) def test_unbounded_slice_raises(self): def assert_unbounded_slice_error(slc): - self.assertRaisesRegexp(ValueError, "unbounded slice", - lambda: BlockPlacement(slc)) + tm.assertRaisesRegexp(ValueError, "unbounded slice", + lambda: BlockPlacement(slc)) assert_unbounded_slice_error(slice(None, None)) assert_unbounded_slice_error(slice(10, None)) @@ -1087,7 +1105,7 @@ def assert_unbounded_slice_error(slc): def test_not_slice_like_slices(self): def assert_not_slice_like(slc): - self.assertTrue(not BlockPlacement(slc).is_slice_like) + assert not BlockPlacement(slc).is_slice_like assert_not_slice_like(slice(0, 0)) assert_not_slice_like(slice(100, 0)) @@ -1095,12 +1113,12 @@ def assert_not_slice_like(slc): assert_not_slice_like(slice(100, 100, -1)) assert_not_slice_like(slice(0, 100, -1)) - self.assertTrue(not BlockPlacement(slice(0, 0)).is_slice_like) - self.assertTrue(not BlockPlacement(slice(100, 100)).is_slice_like) + assert not BlockPlacement(slice(0, 0)).is_slice_like + assert not BlockPlacement(slice(100, 100)).is_slice_like def test_array_to_slice_conversion(self): def assert_as_slice_equals(arr, slc): - self.assertEqual(BlockPlacement(arr).as_slice, slc) + assert BlockPlacement(arr).as_slice == slc assert_as_slice_equals([0], slice(0, 1, 1)) assert_as_slice_equals([100], slice(100, 101, 1)) @@ -1115,7 +1133,7 @@ def assert_as_slice_equals(arr, slc): def test_not_slice_like_arrays(self): def assert_not_slice_like(arr): - self.assertTrue(not BlockPlacement(arr).is_slice_like) + assert not BlockPlacement(arr).is_slice_like assert_not_slice_like([]) assert_not_slice_like([-1]) @@ -1128,13 +1146,12 @@ def assert_not_slice_like(arr): assert_not_slice_like([1, 1, 1]) def test_slice_iter(self): - self.assertEqual(list(BlockPlacement(slice(0, 3))), [0, 1, 2]) - self.assertEqual(list(BlockPlacement(slice(0, 0))), []) - self.assertEqual(list(BlockPlacement(slice(3, 0))), []) + assert list(BlockPlacement(slice(0, 3))) == [0, 1, 2] + assert list(BlockPlacement(slice(0, 0))) == [] + assert list(BlockPlacement(slice(3, 0))) == [] - self.assertEqual(list(BlockPlacement(slice(3, 0, -1))), [3, 2, 1]) - self.assertEqual(list(BlockPlacement(slice(3, None, -1))), - [3, 2, 1, 0]) + assert list(BlockPlacement(slice(3, 0, -1))) == [3, 2, 1] + assert list(BlockPlacement(slice(3, None, -1))) == [3, 2, 1, 0] def test_slice_to_array_conversion(self): def assert_as_array_equals(slc, asarray): @@ -1152,13 +1169,13 @@ def assert_as_array_equals(slc, asarray): def test_blockplacement_add(self): bpl = BlockPlacement(slice(0, 5)) - self.assertEqual(bpl.add(1).as_slice, slice(1, 6, 1)) - self.assertEqual(bpl.add(np.arange(5)).as_slice, slice(0, 10, 2)) - self.assertEqual(list(bpl.add(np.arange(5, 0, -1))), [5, 5, 5, 5, 5]) + assert bpl.add(1).as_slice == slice(1, 6, 1) + assert bpl.add(np.arange(5)).as_slice == slice(0, 10, 2) + assert list(bpl.add(np.arange(5, 0, -1))) == [5, 5, 5, 5, 5] def test_blockplacement_add_int(self): def assert_add_equals(val, inc, result): - self.assertEqual(list(BlockPlacement(val).add(inc)), result) + assert list(BlockPlacement(val).add(inc)) == result assert_add_equals(slice(0, 0), 0, []) assert_add_equals(slice(1, 4), 0, [1, 2, 3]) @@ -1177,9 +1194,9 @@ def assert_add_equals(val, inc, result): assert_add_equals(slice(3, 0, -1), -1, [2, 1, 0]) assert_add_equals([1, 2, 4], -1, [0, 1, 3]) - self.assertRaises(ValueError, - lambda: BlockPlacement(slice(1, 4)).add(-10)) - self.assertRaises(ValueError, - lambda: BlockPlacement([1, 2, 4]).add(-10)) - self.assertRaises(ValueError, - lambda: BlockPlacement(slice(2, None, -1)).add(-1)) + with pytest.raises(ValueError): + BlockPlacement(slice(1, 4)).add(-10) + with pytest.raises(ValueError): + BlockPlacement([1, 2, 4]).add(-10) + with pytest.raises(ValueError): + BlockPlacement(slice(2, None, -1)).add(-1) diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index 3f2973a9834ca..fe03d7886e661 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -646,7 +646,7 @@ def test_dtypes(self): f = self.funcs[f_name] d = self.data[d_name] exp = self.expects[d_name][f_name] - yield self.check_dtypes, f, f_name, d, d_name, exp + self.check_dtypes(f, f_name, d, d_name, exp) def check_dtypes(self, f, f_name, d, d_name, exp): roll = d.rolling(window=self.window) diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 154476ce8340a..cf76f4ead77e3 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -93,11 +93,7 @@ def reset_display_options(self): pd.reset_option('^display.', silent=True) def round_trip_pickle(self, obj, path=None): - if path is None: - path = u('__%s__.pickle' % rands(10)) - with ensure_clean(path) as path: - pd.to_pickle(obj, path) - return pd.read_pickle(path) + return round_trip_pickle(obj, path=path) # https://docs.python.org/3/library/unittest.html#deprecated-aliases def assertEquals(self, *args, **kwargs): @@ -121,6 +117,14 @@ def assertNotAlmostEquals(self, *args, **kwargs): self.assertNotAlmostEqual)(*args, **kwargs) +def round_trip_pickle(obj, path=None): + if path is None: + path = u('__%s__.pickle' % rands(10)) + with ensure_clean(path) as path: + pd.to_pickle(obj, path) + return pd.read_pickle(path) + + def assert_almost_equal(left, right, check_exact=False, check_dtype='equiv', check_less_precise=False, **kwargs): diff --git a/setup.cfg b/setup.cfg index 8de4fc955bd50..8b32f0f62fe28 100644 --- a/setup.cfg +++ b/setup.cfg @@ -24,7 +24,6 @@ split_penalty_logical_operator = 30 [tool:pytest] # TODO: Change all yield-based (nose-style) fixutures to pytest fixtures # Silencing the warning until then -addopts = --disable-pytest-warnings testpaths = pandas markers = single: mark a test as single cpu only From 2c3f808e55d8dc61959b2fd33a103575f2fb85ef Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Fri, 17 Mar 2017 10:43:35 -0400 Subject: [PATCH 219/933] CI: actually use the miniconda cache :> --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index ee093e5bf0e60..af3098b3fc715 100644 --- a/.travis.yml +++ b/.travis.yml @@ -8,7 +8,7 @@ language: python # The cash directories will be deleted if anything in ci/ changes in a commit cache: directories: - - $HOME/miniconda # miniconda cache + - $HOME/miniconda3 # miniconda cache - $HOME/.cache # cython cache - $HOME/.ccache # compiler cache From d96826024aad3f08c365bd74a43a684677982a89 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Fri, 17 Mar 2017 10:58:34 -0400 Subject: [PATCH 220/933] TST: only catch deprecation warnings for top-level module imports (#15718) --- pandas/tests/api/test_api.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index db92210478182..73222c246fc70 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -249,31 +249,26 @@ def test_groupby(self): class TestJson(tm.TestCase): def test_deprecation_access_func(self): - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): + with catch_warnings(record=True): pd.json.dumps([]) class TestParser(tm.TestCase): def test_deprecation_access_func(self): - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): + with catch_warnings(record=True): pd.parser.na_values class TestLib(tm.TestCase): def test_deprecation_access_func(self): - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): + with catch_warnings(record=True): pd.lib.infer_dtype class TestTSLib(tm.TestCase): def test_deprecation_access_func(self): - # some libraries may be imported before we - # test and could show the warning with catch_warnings(record=True): pd.tslib.Timestamp From f0533e4f72bc3d98c2aa54f6bb8bcec157c4db41 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Fri, 17 Mar 2017 11:08:38 -0400 Subject: [PATCH 221/933] CI: fix cache again --- ci/install_travis.sh | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/ci/install_travis.sh b/ci/install_travis.sh index aad87ea37439f..67b94da120d90 100755 --- a/ci/install_travis.sh +++ b/ci/install_travis.sh @@ -35,7 +35,7 @@ echo "[home_dir: $home_dir]" # install miniconda MINICONDA_DIR="$HOME/miniconda3" -if [ "$USE_CACHE" ] && [ -d "$MINICONDA_DIR" ]; then +if [ "$USE_CACHE" ] && [ -d "$MINICONDA_DIR/bin" ]; then echo "[Using cached Miniconda install]" else @@ -54,6 +54,9 @@ else bash miniconda.sh -b -p "$MINICONDA_DIR" || exit 1 fi +echo "[show conds]" +which conda + echo "[update conda]" conda config --set ssl_verify false || exit 1 conda config --set always_yes true --set changeps1 false || exit 1 From 0d9d27cba5381bec0fea8385ed26a836f82d9520 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Fri, 17 Mar 2017 11:28:44 -0400 Subject: [PATCH 222/933] CI: typo in using ccache --- ci/install_travis.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/install_travis.sh b/ci/install_travis.sh index 67b94da120d90..f0f4bc0873e05 100755 --- a/ci/install_travis.sh +++ b/ci/install_travis.sh @@ -54,7 +54,7 @@ else bash miniconda.sh -b -p "$MINICONDA_DIR" || exit 1 fi -echo "[show conds]" +echo "[show conda]" which conda echo "[update conda]" @@ -78,7 +78,7 @@ fi conda info -a || exit 1 # set the compiler cache to work -if [ "$USE_CACHE" ] && "${TRAVIS_OS_NAME}" == "linux" ]; then +if [ "$USE_CACHE" ] && [ "${TRAVIS_OS_NAME}" == "linux" ]; then echo "[Using ccache]" export PATH=/usr/lib/ccache:/usr/lib64/ccache:$PATH gcc=$(which gcc) From 0ad89761df376d52eaee90b52b9b15eb0f06af54 Mon Sep 17 00:00:00 2001 From: Lorenzo Cestaro Date: Fri, 17 Mar 2017 11:41:26 -0400 Subject: [PATCH 223/933] DOC: Update broken link in cookbook.rst #15605 closes #15605 Author: Lorenzo Cestaro Closes #15720 from LorenzoCestaro/fix-15605 and squashes the following commits: 006eefa [Lorenzo Cestaro] DOC: Update broken link in cookbook.rst #15605 --- doc/source/cookbook.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/cookbook.rst b/doc/source/cookbook.rst index 841195de3da47..8fa1283ffc924 100644 --- a/doc/source/cookbook.rst +++ b/doc/source/cookbook.rst @@ -905,7 +905,7 @@ CSV The :ref:`CSV ` docs -`read_csv in action `__ +`read_csv in action `__ `appending to a csv `__ From ad3d88600825d02f4540e2c2614f0c7a93e4af35 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Fri, 17 Mar 2017 12:35:27 -0400 Subject: [PATCH 224/933] CI: don't fail if our env already exists in caching --- ci/install_travis.sh | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/ci/install_travis.sh b/ci/install_travis.sh index f0f4bc0873e05..8bf6de3efe7c4 100755 --- a/ci/install_travis.sh +++ b/ci/install_travis.sh @@ -47,11 +47,11 @@ else # install miniconda if [ "${TRAVIS_OS_NAME}" == "osx" ]; then - wget http://repo.continuum.io/miniconda/Miniconda3-latest-MacOSX-x86_64.sh -O miniconda.sh || exit 1 + time wget http://repo.continuum.io/miniconda/Miniconda3-latest-MacOSX-x86_64.sh -O miniconda.sh || exit 1 else - wget http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh || exit 1 + time wget http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh || exit 1 fi - bash miniconda.sh -b -p "$MINICONDA_DIR" || exit 1 + time bash miniconda.sh -b -p "$MINICONDA_DIR" || exit 1 fi echo "[show conda]" @@ -90,13 +90,16 @@ else echo "[Not using ccache]" fi +echo "[create env]" + # may have installation instructions for this build INSTALL="ci/install-${PYTHON_VERSION}${JOB_TAG}.sh" if [ -e ${INSTALL} ]; then time bash $INSTALL || exit 1 else # create new env - time conda create -n pandas python=$PYTHON_VERSION pytest || exit 1 + # this may already exists, in which case our caching worked + time conda create -n pandas python=$PYTHON_VERSION pytest fi # build deps From a1b118cf46dc0a92fc16f2268b07731e27ed00d3 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Fri, 17 Mar 2017 13:11:55 -0400 Subject: [PATCH 225/933] CI: remove caching for miniconda itself (#15722) --- .travis.yml | 1 - ci/install_travis.sh | 25 ++++++++++--------------- 2 files changed, 10 insertions(+), 16 deletions(-) diff --git a/.travis.yml b/.travis.yml index af3098b3fc715..c1419dd0c5d3b 100644 --- a/.travis.yml +++ b/.travis.yml @@ -8,7 +8,6 @@ language: python # The cash directories will be deleted if anything in ci/ changes in a commit cache: directories: - - $HOME/miniconda3 # miniconda cache - $HOME/.cache # cython cache - $HOME/.ccache # compiler cache diff --git a/ci/install_travis.sh b/ci/install_travis.sh index 8bf6de3efe7c4..e59502b810975 100755 --- a/ci/install_travis.sh +++ b/ci/install_travis.sh @@ -35,24 +35,19 @@ echo "[home_dir: $home_dir]" # install miniconda MINICONDA_DIR="$HOME/miniconda3" -if [ "$USE_CACHE" ] && [ -d "$MINICONDA_DIR/bin" ]; then - echo "[Using cached Miniconda install]" +echo "[Using clean Miniconda install]" -else - echo "[Using clean Miniconda install]" - - if [ -d "$MINICONDA_DIR" ]; then - rm -rf "$MINICONDA_DIR" - fi +if [ -d "$MINICONDA_DIR" ]; then + rm -rf "$MINICONDA_DIR" +fi - # install miniconda - if [ "${TRAVIS_OS_NAME}" == "osx" ]; then - time wget http://repo.continuum.io/miniconda/Miniconda3-latest-MacOSX-x86_64.sh -O miniconda.sh || exit 1 - else - time wget http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh || exit 1 - fi - time bash miniconda.sh -b -p "$MINICONDA_DIR" || exit 1 +# install miniconda +if [ "${TRAVIS_OS_NAME}" == "osx" ]; then + time wget http://repo.continuum.io/miniconda/Miniconda3-latest-MacOSX-x86_64.sh -O miniconda.sh || exit 1 +else + time wget http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh || exit 1 fi +time bash miniconda.sh -b -p "$MINICONDA_DIR" || exit 1 echo "[show conda]" which conda From 043efa6c94e6abdd033293ba55cd8da7e3763d16 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Fri, 17 Mar 2017 15:24:35 -0400 Subject: [PATCH 226/933] CI: remove miniconda from actual cache scripts --- ci/check_cache.sh | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/ci/check_cache.sh b/ci/check_cache.sh index cd7a6e8f6b6f9..1c9de7b017569 100755 --- a/ci/check_cache.sh +++ b/ci/check_cache.sh @@ -12,14 +12,12 @@ else ci_changes=$(git diff PR_HEAD~2 --numstat | grep -E "ci/"| wc -l) fi -MINICONDA_DIR="$HOME/miniconda/" CACHE_DIR="$HOME/.cache/" CCACHE_DIR="$HOME/.ccache/" if [ $ci_changes -ne 0 ] then echo "Files have changed in ci/ deleting all caches" - rm -rf "$MINICONDA_DIR" rm -rf "$CACHE_DIR" rm -rf "$CCACHE_DIR" -fi \ No newline at end of file +fi From 5e96fb050afc192d464904f8f4a2a6e07723ee37 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Fri, 17 Mar 2017 15:39:53 -0400 Subject: [PATCH 227/933] CI: install ccache on osx --- ci/install_travis.sh | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/ci/install_travis.sh b/ci/install_travis.sh index e59502b810975..610e6255e6832 100755 --- a/ci/install_travis.sh +++ b/ci/install_travis.sh @@ -32,6 +32,11 @@ edit_init home_dir=$(pwd) echo "[home_dir: $home_dir]" +if [ "${TRAVIS_OS_NAME}" == "osx" ]; then + echo "[install ccache]" + time brew install ccache +fi + # install miniconda MINICONDA_DIR="$HOME/miniconda3" From a9c823922c08305b3cfd12cca52e3302f831429e Mon Sep 17 00:00:00 2001 From: gfyoung Date: Fri, 17 Mar 2017 20:11:31 -0400 Subject: [PATCH 228/933] MAINT: Drop take_last kwarg from method signatures Affected methods: 1) nlargest 2) nsmallest 3) duplicated 4) drop_duplicates xref #10236, #10792, #10920. Author: gfyoung Closes #15710 from gfyoung/create-last-kw-drop and squashes the following commits: b416290 [gfyoung] MAINT: Drop take_last kwarg from method signatures --- asv_bench/benchmarks/series_methods.py | 12 ++--- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/core/base.py | 6 --- pandas/core/frame.py | 9 +--- pandas/core/groupby.py | 28 +++------- pandas/core/series.py | 10 ---- pandas/indexes/base.py | 4 -- pandas/indexes/category.py | 5 +- pandas/indexes/multi.py | 2 - pandas/tests/frame/test_analytics.py | 75 -------------------------- pandas/tests/groupby/test_groupby.py | 7 +-- pandas/tests/series/test_analytics.py | 33 ------------ pandas/tests/test_base.py | 16 ------ pandas/tests/test_multilevel.py | 11 ---- vb_suite/series_methods.py | 16 +++--- 15 files changed, 26 insertions(+), 209 deletions(-) diff --git a/asv_bench/benchmarks/series_methods.py b/asv_bench/benchmarks/series_methods.py index 413c4e044fd3a..c66654ee1e006 100644 --- a/asv_bench/benchmarks/series_methods.py +++ b/asv_bench/benchmarks/series_methods.py @@ -68,8 +68,8 @@ def setup(self): self.s4 = self.s3.astype('object') def time_series_nlargest1(self): - self.s1.nlargest(3, take_last=True) - self.s1.nlargest(3, take_last=False) + self.s1.nlargest(3, keep='last') + self.s1.nlargest(3, keep='first') class series_nlargest2(object): @@ -83,8 +83,8 @@ def setup(self): self.s4 = self.s3.astype('object') def time_series_nlargest2(self): - self.s2.nlargest(3, take_last=True) - self.s2.nlargest(3, take_last=False) + self.s2.nlargest(3, keep='last') + self.s2.nlargest(3, keep='first') class series_nsmallest2(object): @@ -98,8 +98,8 @@ def setup(self): self.s4 = self.s3.astype('object') def time_series_nsmallest2(self): - self.s2.nsmallest(3, take_last=True) - self.s2.nsmallest(3, take_last=False) + self.s2.nsmallest(3, keep='last') + self.s2.nsmallest(3, keep='first') class series_dropna_int64(object): diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 29d05ddcfb497..9cf53300f8cca 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -769,6 +769,7 @@ Removal of prior version deprecations/changes in favor of ``iloc`` and ``iat`` as explained :ref:`here ` (:issue:`10711`). - The deprecated ``DataFrame.iterkv()`` has been removed in favor of ``DataFrame.iteritems()`` (:issue:`10711`) - The ``Categorical`` constructor has dropped the ``name`` parameter (:issue:`10632`) +- The ``take_last`` parameter has been dropped from ``duplicated()``, ``drop_duplicates()``, ``nlargest()``, and ``nsmallest()`` methods (:issue:`10236`, :issue:`10792`, :issue:`10920`) .. _whatsnew_0200.performance: diff --git a/pandas/core/base.py b/pandas/core/base.py index d7c9e35ab6a51..bde60be3ddcff 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -1065,7 +1065,6 @@ def searchsorted(self, value, side='left', sorter=None): - ``first`` : Drop duplicates except for the first occurrence. - ``last`` : Drop duplicates except for the last occurrence. - False : Drop all duplicates. - take_last : deprecated %(inplace)s Returns @@ -1073,8 +1072,6 @@ def searchsorted(self, value, side='left', sorter=None): deduplicated : %(klass)s """) - @deprecate_kwarg('take_last', 'keep', mapping={True: 'last', - False: 'first'}) @Appender(_shared_docs['drop_duplicates'] % _indexops_doc_kwargs) def drop_duplicates(self, keep='first', inplace=False): inplace = validate_bool_kwarg(inplace, 'inplace') @@ -1100,15 +1097,12 @@ def drop_duplicates(self, keep='first', inplace=False): - ``last`` : Mark duplicates as ``True`` except for the last occurrence. - False : Mark all duplicates as ``True``. - take_last : deprecated Returns ------- duplicated : %(duplicated)s """) - @deprecate_kwarg('take_last', 'keep', mapping={True: 'last', - False: 'first'}) @Appender(_shared_docs['duplicated'] % _indexops_doc_kwargs) def duplicated(self, keep='first'): from pandas.core.algorithms import duplicated diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 987eb10101f12..3696051b269e3 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -77,8 +77,7 @@ OrderedDict, raise_with_traceback) from pandas import compat from pandas.compat.numpy import function as nv -from pandas.util.decorators import (deprecate_kwarg, Appender, - Substitution) +from pandas.util.decorators import Appender, Substitution from pandas.util.validators import validate_bool_kwarg from pandas.tseries.period import PeriodIndex @@ -3169,8 +3168,6 @@ def dropna(self, axis=0, how='any', thresh=None, subset=None, else: return result - @deprecate_kwarg('take_last', 'keep', mapping={True: 'last', - False: 'first'}) def drop_duplicates(self, subset=None, keep='first', inplace=False): """ Return DataFrame with duplicate rows removed, optionally only @@ -3185,7 +3182,6 @@ def drop_duplicates(self, subset=None, keep='first', inplace=False): - ``first`` : Drop duplicates except for the first occurrence. - ``last`` : Drop duplicates except for the last occurrence. - False : Drop all duplicates. - take_last : deprecated inplace : boolean, default False Whether to drop duplicates in place or to return a copy @@ -3203,8 +3199,6 @@ def drop_duplicates(self, subset=None, keep='first', inplace=False): else: return self[-duplicated] - @deprecate_kwarg('take_last', 'keep', mapping={True: 'last', - False: 'first'}) def duplicated(self, subset=None, keep='first'): """ Return boolean Series denoting duplicate rows, optionally only @@ -3221,7 +3215,6 @@ def duplicated(self, subset=None, keep='first'): - ``last`` : Mark duplicates as ``True`` except for the last occurrence. - False : Mark all duplicates as ``True``. - take_last : deprecated Returns ------- diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 7a017ffae284c..4095a14aa5970 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -51,8 +51,8 @@ from pandas.core.sorting import (get_group_index_sorter, get_group_index, compress_group_index, get_flattened_iterator, decons_obs_group_ids, get_indexer_dict) -from pandas.util.decorators import (cache_readonly, Substitution, Appender, - make_signature, deprecate_kwarg) +from pandas.util.decorators import (cache_readonly, Substitution, + Appender, make_signature) from pandas.formats.printing import pprint_thing from pandas.util.validators import validate_kwargs @@ -94,12 +94,12 @@ 'corr', 'cov', 'diff', ]) | _plotting_methods -_series_apply_whitelist = \ - (_common_apply_whitelist - set(['boxplot'])) | \ - frozenset(['dtype', 'unique']) +_series_apply_whitelist = ((_common_apply_whitelist | + {'nlargest', 'nsmallest'}) - + {'boxplot'}) | frozenset(['dtype', 'unique']) -_dataframe_apply_whitelist = \ - _common_apply_whitelist | frozenset(['dtypes', 'corrwith']) +_dataframe_apply_whitelist = (_common_apply_whitelist | + frozenset(['dtypes', 'corrwith'])) _cython_transforms = frozenset(['cumprod', 'cumsum', 'shift', 'cummin', 'cummax']) @@ -3025,20 +3025,6 @@ def nunique(self, dropna=True): index=ri, name=self.name) - @deprecate_kwarg('take_last', 'keep', - mapping={True: 'last', False: 'first'}) - @Appender(Series.nlargest.__doc__) - def nlargest(self, n=5, keep='first'): - # ToDo: When we remove deprecate_kwargs, we can remote these methods - # and include nlargest and nsmallest to _series_apply_whitelist - return self.apply(lambda x: x.nlargest(n=n, keep=keep)) - - @deprecate_kwarg('take_last', 'keep', - mapping={True: 'last', False: 'first'}) - @Appender(Series.nsmallest.__doc__) - def nsmallest(self, n=5, keep='first'): - return self.apply(lambda x: x.nsmallest(n=n, keep=keep)) - @Appender(Series.describe.__doc__) def describe(self, **kwargs): self._set_group_selection() diff --git a/pandas/core/series.py b/pandas/core/series.py index cfa25ca1299eb..7ee3b3e8fb519 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1211,14 +1211,10 @@ def unique(self): return result.asobject.values return result - @deprecate_kwarg('take_last', 'keep', mapping={True: 'last', - False: 'first'}) @Appender(base._shared_docs['drop_duplicates'] % _shared_doc_kwargs) def drop_duplicates(self, keep='first', inplace=False): return super(Series, self).drop_duplicates(keep=keep, inplace=inplace) - @deprecate_kwarg('take_last', 'keep', mapping={True: 'last', - False: 'first'}) @Appender(base._shared_docs['duplicated'] % _shared_doc_kwargs) def duplicated(self, keep='first'): return super(Series, self).duplicated(keep=keep) @@ -1888,8 +1884,6 @@ def argsort(self, axis=0, kind='quicksort', order=None): np.argsort(values, kind=kind), index=self.index, dtype='int64').__finalize__(self) - @deprecate_kwarg('take_last', 'keep', mapping={True: 'last', - False: 'first'}) def nlargest(self, n=5, keep='first'): """Return the largest `n` elements. @@ -1901,7 +1895,6 @@ def nlargest(self, n=5, keep='first'): Where there are duplicate values: - ``first`` : take the first occurrence. - ``last`` : take the last occurrence. - take_last : deprecated Returns ------- @@ -1938,8 +1931,6 @@ def nlargest(self, n=5, keep='first'): return algorithms.select_n_series(self, n=n, keep=keep, method='nlargest') - @deprecate_kwarg('take_last', 'keep', mapping={True: 'last', - False: 'first'}) def nsmallest(self, n=5, keep='first'): """Return the smallest `n` elements. @@ -1951,7 +1942,6 @@ def nsmallest(self, n=5, keep='first'): Where there are duplicate values: - ``first`` : take the first occurrence. - ``last`` : take the last occurrence. - take_last : deprecated Returns ------- diff --git a/pandas/indexes/base.py b/pandas/indexes/base.py index 5b942e2565c29..381e4d5caa8ac 100644 --- a/pandas/indexes/base.py +++ b/pandas/indexes/base.py @@ -3500,14 +3500,10 @@ def unique(self): result = super(Index, self).unique() return self._shallow_copy(result) - @deprecate_kwarg('take_last', 'keep', mapping={True: 'last', - False: 'first'}) @Appender(base._shared_docs['drop_duplicates'] % _index_doc_kwargs) def drop_duplicates(self, keep='first'): return super(Index, self).drop_duplicates(keep=keep) - @deprecate_kwarg('take_last', 'keep', mapping={True: 'last', - False: 'first'}) @Appender(base._shared_docs['duplicated'] % _index_doc_kwargs) def duplicated(self, keep='first'): return super(Index, self).duplicated(keep=keep) diff --git a/pandas/indexes/category.py b/pandas/indexes/category.py index 923dd4ec785c5..7cfc95de5f538 100644 --- a/pandas/indexes/category.py +++ b/pandas/indexes/category.py @@ -11,8 +11,7 @@ from pandas.types.missing import array_equivalent -from pandas.util.decorators import (Appender, cache_readonly, - deprecate_kwarg) +from pandas.util.decorators import Appender, cache_readonly from pandas.core.config import get_option from pandas.indexes.base import Index, _index_shared_docs import pandas.core.base as base @@ -301,8 +300,6 @@ def unique(self): return self._shallow_copy(result, categories=result.categories, ordered=result.ordered) - @deprecate_kwarg('take_last', 'keep', mapping={True: 'last', - False: 'first'}) @Appender(base._shared_docs['duplicated'] % _index_doc_kwargs) def duplicated(self, keep='first'): from pandas._libs.hashtable import duplicated_int64 diff --git a/pandas/indexes/multi.py b/pandas/indexes/multi.py index 1c1609fed1dd1..978492131ca89 100644 --- a/pandas/indexes/multi.py +++ b/pandas/indexes/multi.py @@ -755,8 +755,6 @@ def f(k, stringify): for k, stringify in zip(key, self._have_mixed_levels)]) return hash_tuples(key) - @deprecate_kwarg('take_last', 'keep', mapping={True: 'last', - False: 'first'}) @Appender(base._shared_docs['duplicated'] % _index_doc_kwargs) def duplicated(self, keep='first'): from pandas.core.sorting import get_group_index diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index 6c917444f9f43..4fb1d2222fa06 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -1381,12 +1381,6 @@ def test_drop_duplicates(self): tm.assert_frame_equal(result, expected) self.assertEqual(len(result), 0) - # deprecate take_last - with tm.assert_produces_warning(FutureWarning): - result = df.drop_duplicates('AAA', take_last=True) - expected = df.loc[[6, 7]] - tm.assert_frame_equal(result, expected) - # multi column expected = df.loc[[0, 1, 2, 3]] result = df.drop_duplicates(np.array(['AAA', 'B'])) @@ -1402,12 +1396,6 @@ def test_drop_duplicates(self): expected = df.loc[[0]] tm.assert_frame_equal(result, expected) - # deprecate take_last - with tm.assert_produces_warning(FutureWarning): - result = df.drop_duplicates(('AAA', 'B'), take_last=True) - expected = df.loc[[0, 5, 6, 7]] - tm.assert_frame_equal(result, expected) - # consider everything df2 = df.loc[:, ['AAA', 'B', 'C']] @@ -1424,13 +1412,6 @@ def test_drop_duplicates(self): expected = df2.drop_duplicates(['AAA', 'B'], keep=False) tm.assert_frame_equal(result, expected) - # deprecate take_last - with tm.assert_produces_warning(FutureWarning): - result = df2.drop_duplicates(take_last=True) - with tm.assert_produces_warning(FutureWarning): - expected = df2.drop_duplicates(['AAA', 'B'], take_last=True) - tm.assert_frame_equal(result, expected) - # integers result = df.drop_duplicates('C') expected = df.iloc[[0, 2]] @@ -1529,12 +1510,6 @@ def test_drop_duplicates_tuple(self): self.assertEqual(len(result), 0) tm.assert_frame_equal(result, expected) - # deprecate take_last - with tm.assert_produces_warning(FutureWarning): - result = df.drop_duplicates(('AA', 'AB'), take_last=True) - expected = df.loc[[6, 7]] - tm.assert_frame_equal(result, expected) - # multi column expected = df.loc[[0, 1, 2, 3]] result = df.drop_duplicates((('AA', 'AB'), 'B')) @@ -1563,12 +1538,6 @@ def test_drop_duplicates_NA(self): tm.assert_frame_equal(result, expected) self.assertEqual(len(result), 0) - # deprecate take_last - with tm.assert_produces_warning(FutureWarning): - result = df.drop_duplicates('A', take_last=True) - expected = df.loc[[1, 6, 7]] - tm.assert_frame_equal(result, expected) - # multi column result = df.drop_duplicates(['A', 'B']) expected = df.loc[[0, 2, 3, 6]] @@ -1582,12 +1551,6 @@ def test_drop_duplicates_NA(self): expected = df.loc[[6]] tm.assert_frame_equal(result, expected) - # deprecate take_last - with tm.assert_produces_warning(FutureWarning): - result = df.drop_duplicates(['A', 'B'], take_last=True) - expected = df.loc[[1, 5, 6, 7]] - tm.assert_frame_equal(result, expected) - # nan df = DataFrame({'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'bar', 'foo'], @@ -1610,12 +1573,6 @@ def test_drop_duplicates_NA(self): tm.assert_frame_equal(result, expected) self.assertEqual(len(result), 0) - # deprecate take_last - with tm.assert_produces_warning(FutureWarning): - result = df.drop_duplicates('C', take_last=True) - expected = df.loc[[3, 7]] - tm.assert_frame_equal(result, expected) - # multi column result = df.drop_duplicates(['C', 'B']) expected = df.loc[[0, 1, 2, 4]] @@ -1629,12 +1586,6 @@ def test_drop_duplicates_NA(self): expected = df.loc[[1]] tm.assert_frame_equal(result, expected) - # deprecate take_last - with tm.assert_produces_warning(FutureWarning): - result = df.drop_duplicates(['C', 'B'], take_last=True) - expected = df.loc[[1, 3, 6, 7]] - tm.assert_frame_equal(result, expected) - def test_drop_duplicates_NA_for_take_all(self): # none df = DataFrame({'A': [None, None, 'foo', 'bar', @@ -1697,14 +1648,6 @@ def test_drop_duplicates_inplace(self): tm.assert_frame_equal(result, expected) self.assertEqual(len(df), 0) - # deprecate take_last - df = orig.copy() - with tm.assert_produces_warning(FutureWarning): - df.drop_duplicates('A', take_last=True, inplace=True) - expected = orig.loc[[6, 7]] - result = df - tm.assert_frame_equal(result, expected) - # multi column df = orig.copy() df.drop_duplicates(['A', 'B'], inplace=True) @@ -1724,14 +1667,6 @@ def test_drop_duplicates_inplace(self): result = df tm.assert_frame_equal(result, expected) - # deprecate take_last - df = orig.copy() - with tm.assert_produces_warning(FutureWarning): - df.drop_duplicates(['A', 'B'], take_last=True, inplace=True) - expected = orig.loc[[0, 5, 6, 7]] - result = df - tm.assert_frame_equal(result, expected) - # consider everything orig2 = orig.loc[:, ['A', 'B', 'C']].copy() @@ -1754,17 +1689,7 @@ def test_drop_duplicates_inplace(self): result = df2 tm.assert_frame_equal(result, expected) - # deprecate take_last - df2 = orig2.copy() - with tm.assert_produces_warning(FutureWarning): - df2.drop_duplicates(take_last=True, inplace=True) - with tm.assert_produces_warning(FutureWarning): - expected = orig2.drop_duplicates(['A', 'B'], take_last=True) - result = df2 - tm.assert_frame_equal(result, expected) - # Rounding - def test_round(self): # GH 2665 diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index c25974c94bfd1..a355dca3029c7 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -3816,7 +3816,8 @@ def test_groupby_whitelist(self): 'cov', 'diff', 'unique', - # 'nlargest', 'nsmallest', + 'nlargest', + 'nsmallest', ]) for obj, whitelist in zip((df, s), (df_whitelist, s_whitelist)): @@ -4025,8 +4026,6 @@ def test_nlargest(self): 3, 2, 1, 3, 3, 2 ], index=MultiIndex.from_arrays([list('aaabbb'), [2, 3, 1, 6, 5, 7]])) assert_series_equal(gb.nlargest(3, keep='last'), e) - with tm.assert_produces_warning(FutureWarning): - assert_series_equal(gb.nlargest(3, take_last=True), e) def test_nsmallest(self): a = Series([1, 3, 5, 7, 2, 9, 0, 4, 6, 10]) @@ -4044,8 +4043,6 @@ def test_nsmallest(self): 0, 1, 1, 0, 1, 2 ], index=MultiIndex.from_arrays([list('aaabbb'), [4, 1, 0, 9, 8, 7]])) assert_series_equal(gb.nsmallest(3, keep='last'), e) - with tm.assert_produces_warning(FutureWarning): - assert_series_equal(gb.nsmallest(3, take_last=True), e) def test_transform_doesnt_clobber_ints(self): # GH 7972 diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index c2543581dca50..dc71fafb1094f 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -917,17 +917,6 @@ def test_drop_duplicates(self): sc.drop_duplicates(keep='last', inplace=True) assert_series_equal(sc, s[~expected]) - # deprecate take_last - with tm.assert_produces_warning(FutureWarning): - assert_series_equal(s.duplicated(take_last=True), expected) - with tm.assert_produces_warning(FutureWarning): - assert_series_equal( - s.drop_duplicates(take_last=True), s[~expected]) - sc = s.copy() - with tm.assert_produces_warning(FutureWarning): - sc.drop_duplicates(take_last=True, inplace=True) - assert_series_equal(sc, s[~expected]) - expected = Series([False, False, True, True]) assert_series_equal(s.duplicated(keep=False), expected) assert_series_equal(s.drop_duplicates(keep=False), s[~expected]) @@ -951,17 +940,6 @@ def test_drop_duplicates(self): sc.drop_duplicates(keep='last', inplace=True) assert_series_equal(sc, s[~expected]) - # deprecate take_last - with tm.assert_produces_warning(FutureWarning): - assert_series_equal(s.duplicated(take_last=True), expected) - with tm.assert_produces_warning(FutureWarning): - assert_series_equal( - s.drop_duplicates(take_last=True), s[~expected]) - sc = s.copy() - with tm.assert_produces_warning(FutureWarning): - sc.drop_duplicates(take_last=True, inplace=True) - assert_series_equal(sc, s[~expected]) - expected = Series([False, True, True, False, True, True, False]) assert_series_equal(s.duplicated(keep=False), expected) assert_series_equal(s.drop_duplicates(keep=False), s[~expected]) @@ -1443,18 +1421,7 @@ def test_nsmallest_nlargest(self): for s in s_list: assert_series_equal(s.nsmallest(2), s.iloc[[2, 1]]) - assert_series_equal(s.nsmallest(2, keep='last'), s.iloc[[2, 3]]) - with tm.assert_produces_warning(FutureWarning): - assert_series_equal( - s.nsmallest(2, take_last=True), s.iloc[[2, 3]]) - - assert_series_equal(s.nlargest(3), s.iloc[[4, 0, 1]]) - - assert_series_equal(s.nlargest(3, keep='last'), s.iloc[[4, 0, 3]]) - with tm.assert_produces_warning(FutureWarning): - assert_series_equal( - s.nlargest(3, take_last=True), s.iloc[[4, 0, 3]]) empty = s.iloc[0:0] assert_series_equal(s.nsmallest(0), empty) diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py index 1d4dddf6477df..68db0d19344b9 100644 --- a/pandas/tests/test_base.py +++ b/pandas/tests/test_base.py @@ -816,15 +816,6 @@ def test_duplicated_drop_duplicates_index(self): result = idx.drop_duplicates(keep='last') tm.assert_index_equal(result, idx[~expected]) - # deprecate take_last - with tm.assert_produces_warning(FutureWarning): - duplicated = idx.duplicated(take_last=True) - tm.assert_numpy_array_equal(duplicated, expected) - self.assertTrue(duplicated.dtype == bool) - with tm.assert_produces_warning(FutureWarning): - result = idx.drop_duplicates(take_last=True) - tm.assert_index_equal(result, idx[~expected]) - base = [False] * len(original) + [True, True] base[3] = True base[5] = True @@ -867,13 +858,6 @@ def test_duplicated_drop_duplicates_index(self): tm.assert_series_equal(s.drop_duplicates(keep='last'), s[~np.array(base)]) - # deprecate take_last - with tm.assert_produces_warning(FutureWarning): - tm.assert_series_equal( - s.duplicated(take_last=True), expected) - with tm.assert_produces_warning(FutureWarning): - tm.assert_series_equal(s.drop_duplicates(take_last=True), - s[~np.array(base)]) base = [False] * len(original) + [True, True] base[3] = True base[5] = True diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index d7b115d808312..fd5421abc89ad 100755 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -2037,17 +2037,6 @@ def test_duplicated_drop_duplicates(self): expected = MultiIndex.from_arrays(([2, 3, 2, 3], [1, 1, 2, 2])) tm.assert_index_equal(idx.drop_duplicates(keep=False), expected) - # deprecate take_last - expected = np.array([True, False, False, False, False, False]) - with tm.assert_produces_warning(FutureWarning): - duplicated = idx.duplicated(take_last=True) - tm.assert_numpy_array_equal(duplicated, expected) - self.assertTrue(duplicated.dtype == bool) - expected = MultiIndex.from_arrays(([2, 3, 1, 2, 3], [1, 1, 1, 2, 2])) - with tm.assert_produces_warning(FutureWarning): - tm.assert_index_equal( - idx.drop_duplicates(take_last=True), expected) - def test_multiindex_set_index(self): # segfault in #3308 d = {'t1': [2, 2.5, 3], 't2': [4, 5, 6]} diff --git a/vb_suite/series_methods.py b/vb_suite/series_methods.py index cd8688495fa09..c545f419c2dec 100644 --- a/vb_suite/series_methods.py +++ b/vb_suite/series_methods.py @@ -12,22 +12,22 @@ s4 = s3.astype('object') """ -series_nlargest1 = Benchmark('s1.nlargest(3, take_last=True);' - 's1.nlargest(3, take_last=False)', +series_nlargest1 = Benchmark("s1.nlargest(3, keep='last');" + "s1.nlargest(3, keep='first')", setup, start_date=datetime(2014, 1, 25)) -series_nlargest2 = Benchmark('s2.nlargest(3, take_last=True);' - 's2.nlargest(3, take_last=False)', +series_nlargest2 = Benchmark("s2.nlargest(3, keep='last');" + "s2.nlargest(3, keep='first')", setup, start_date=datetime(2014, 1, 25)) -series_nsmallest2 = Benchmark('s1.nsmallest(3, take_last=True);' - 's1.nsmallest(3, take_last=False)', +series_nsmallest2 = Benchmark("s1.nsmallest(3, keep='last');" + "s1.nsmallest(3, keep='first')", setup, start_date=datetime(2014, 1, 25)) -series_nsmallest2 = Benchmark('s2.nsmallest(3, take_last=True);' - 's2.nsmallest(3, take_last=False)', +series_nsmallest2 = Benchmark("s2.nsmallest(3, keep='last');" + "s2.nsmallest(3, keep='first')", setup, start_date=datetime(2014, 1, 25)) From ee19222c98175a99ec47b1359973141bb9f1dc50 Mon Sep 17 00:00:00 2001 From: Jaehoon Hwang Date: Fri, 17 Mar 2017 21:55:38 -0400 Subject: [PATCH 229/933] TST: move pandas/tests/io/test_date_converters.py to pandas/tests/io/parsers/parse_dates.py closes #15519 Author: Jaehoon Hwang Closes #15707 from jaehoonhwang/TST15519 and squashes the following commits: 0b309d3 [Jaehoon Hwang] Fixed frame email and PEP8 ef6e8fa [Jaehoon Hwang] Fixing up few lines and imports e019e95 [Jaehoon Hwang] Imported read_table and using self.readcsv 3eb63c5 [Jaehoon Hwang] TST15519 Moving Unit tests to appropriate file 9b20caa [Jaehoon Hwang] Merge remote-tracking branch 'pandas-dev/master' b977615 [Jaehoon Hwang] Merge remote-tracking branch 'pandas-dev/master' --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/tests/io/parser/parse_dates.py | 148 ++++++++++++++++++++++- pandas/tests/io/test_date_converters.py | 150 ------------------------ 3 files changed, 147 insertions(+), 152 deletions(-) delete mode 100644 pandas/tests/io/test_date_converters.py diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 9cf53300f8cca..4949b68d46723 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -723,6 +723,7 @@ Other API Changes - ``pandas.api.types.is_datetime64_ns_dtype`` will now report ``True`` on a tz-aware dtype, similar to ``pandas.api.types.is_datetime64_any_dtype`` - ``DataFrame.asof()`` will return a null filled ``Series`` instead the scalar ``NaN`` if a match is not found (:issue:`15118`) - Reorganization of timeseries development tests (:issue:`14854`) +- Reorganization of date converter tests (:issue:`15707`) - Specific support for ``copy.copy()`` and ``copy.deepcopy()`` functions on NDFrame objects (:issue:`15444`) - ``Series.sort_values()`` accepts a one element list of bool for consistency with the behavior of ``DataFrame.sort_values()`` (:issue:`15604`) - ``.merge()`` and ``.join()`` on ``category`` dtype columns will now preserve the category dtype when possible (:issue:`10409`) diff --git a/pandas/tests/io/parser/parse_dates.py b/pandas/tests/io/parser/parse_dates.py index 4cba9276a9d1e..de4e3fbc0d943 100644 --- a/pandas/tests/io/parser/parse_dates.py +++ b/pandas/tests/io/parser/parse_dates.py @@ -6,7 +6,7 @@ """ from distutils.version import LooseVersion -from datetime import datetime +from datetime import datetime, date import pytest import numpy as np @@ -19,9 +19,10 @@ import pandas.util.testing as tm import pandas.io.date_converters as conv -from pandas import DataFrame, Series, Index, DatetimeIndex +from pandas import DataFrame, Series, Index, DatetimeIndex, MultiIndex from pandas import compat from pandas.compat import parse_date, StringIO, lrange +from pandas.compat.numpy import np_array_datetime64_compat from pandas.tseries.index import date_range @@ -510,3 +511,146 @@ def test_parse_date_time_multi_level_column_name(self): expected = DataFrame(expected_data, columns=['date_time', ('A', 'a'), ('B', 'b')]) tm.assert_frame_equal(result, expected) + + def test_parse_date_time(self): + dates = np.array(['2007/1/3', '2008/2/4'], dtype=object) + times = np.array(['05:07:09', '06:08:00'], dtype=object) + expected = np.array([datetime(2007, 1, 3, 5, 7, 9), + datetime(2008, 2, 4, 6, 8, 0)]) + + result = conv.parse_date_time(dates, times) + self.assertTrue((result == expected).all()) + + data = """\ +date, time, a, b +2001-01-05, 10:00:00, 0.0, 10. +2001-01-05, 00:00:00, 1., 11. +""" + datecols = {'date_time': [0, 1]} + df = self.read_csv(StringIO(data), sep=',', header=0, + parse_dates=datecols, + date_parser=conv.parse_date_time) + self.assertIn('date_time', df) + self.assertEqual(df.date_time.loc[0], datetime(2001, 1, 5, 10, 0, 0)) + + data = ("KORD,19990127, 19:00:00, 18:56:00, 0.8100\n" + "KORD,19990127, 20:00:00, 19:56:00, 0.0100\n" + "KORD,19990127, 21:00:00, 20:56:00, -0.5900\n" + "KORD,19990127, 21:00:00, 21:18:00, -0.9900\n" + "KORD,19990127, 22:00:00, 21:56:00, -0.5900\n" + "KORD,19990127, 23:00:00, 22:56:00, -0.5900") + + date_spec = {'nominal': [1, 2], 'actual': [1, 3]} + df = self.read_csv(StringIO(data), header=None, parse_dates=date_spec, + date_parser=conv.parse_date_time) + + def test_parse_date_fields(self): + years = np.array([2007, 2008]) + months = np.array([1, 2]) + days = np.array([3, 4]) + result = conv.parse_date_fields(years, months, days) + expected = np.array([datetime(2007, 1, 3), datetime(2008, 2, 4)]) + self.assertTrue((result == expected).all()) + + data = ("year, month, day, a\n 2001 , 01 , 10 , 10.\n" + "2001 , 02 , 1 , 11.") + datecols = {'ymd': [0, 1, 2]} + df = self.read_csv(StringIO(data), sep=',', header=0, + parse_dates=datecols, + date_parser=conv.parse_date_fields) + self.assertIn('ymd', df) + self.assertEqual(df.ymd.loc[0], datetime(2001, 1, 10)) + + def test_datetime_six_col(self): + years = np.array([2007, 2008]) + months = np.array([1, 2]) + days = np.array([3, 4]) + hours = np.array([5, 6]) + minutes = np.array([7, 8]) + seconds = np.array([9, 0]) + expected = np.array([datetime(2007, 1, 3, 5, 7, 9), + datetime(2008, 2, 4, 6, 8, 0)]) + + result = conv.parse_all_fields(years, months, days, + hours, minutes, seconds) + + self.assertTrue((result == expected).all()) + + data = """\ +year, month, day, hour, minute, second, a, b +2001, 01, 05, 10, 00, 0, 0.0, 10. +2001, 01, 5, 10, 0, 00, 1., 11. +""" + datecols = {'ymdHMS': [0, 1, 2, 3, 4, 5]} + df = self.read_csv(StringIO(data), sep=',', header=0, + parse_dates=datecols, + date_parser=conv.parse_all_fields) + self.assertIn('ymdHMS', df) + self.assertEqual(df.ymdHMS.loc[0], datetime(2001, 1, 5, 10, 0, 0)) + + def test_datetime_fractional_seconds(self): + data = """\ +year, month, day, hour, minute, second, a, b +2001, 01, 05, 10, 00, 0.123456, 0.0, 10. +2001, 01, 5, 10, 0, 0.500000, 1., 11. +""" + datecols = {'ymdHMS': [0, 1, 2, 3, 4, 5]} + df = self.read_csv(StringIO(data), sep=',', header=0, + parse_dates=datecols, + date_parser=conv.parse_all_fields) + self.assertIn('ymdHMS', df) + self.assertEqual(df.ymdHMS.loc[0], datetime(2001, 1, 5, 10, 0, 0, + microsecond=123456)) + self.assertEqual(df.ymdHMS.loc[1], datetime(2001, 1, 5, 10, 0, 0, + microsecond=500000)) + + def test_generic(self): + data = "year, month, day, a\n 2001, 01, 10, 10.\n 2001, 02, 1, 11." + datecols = {'ym': [0, 1]} + dateconverter = lambda y, m: date(year=int(y), month=int(m), day=1) + df = self.read_csv(StringIO(data), sep=',', header=0, + parse_dates=datecols, + date_parser=dateconverter) + self.assertIn('ym', df) + self.assertEqual(df.ym.loc[0], date(2001, 1, 1)) + + def test_dateparser_resolution_if_not_ns(self): + # GH 10245 + data = """\ +date,time,prn,rxstatus +2013-11-03,19:00:00,126,00E80000 +2013-11-03,19:00:00,23,00E80000 +2013-11-03,19:00:00,13,00E80000 +""" + + def date_parser(date, time): + datetime = np_array_datetime64_compat( + date + 'T' + time + 'Z', dtype='datetime64[s]') + return datetime + + df = self.read_csv(StringIO(data), date_parser=date_parser, + parse_dates={'datetime': ['date', 'time']}, + index_col=['datetime', 'prn']) + + datetimes = np_array_datetime64_compat(['2013-11-03T19:00:00Z'] * 3, + dtype='datetime64[s]') + df_correct = DataFrame(data={'rxstatus': ['00E80000'] * 3}, + index=MultiIndex.from_tuples( + [(datetimes[0], 126), + (datetimes[1], 23), + (datetimes[2], 13)], + names=['datetime', 'prn'])) + tm.assert_frame_equal(df, df_correct) + + def test_parse_date_column_with_empty_string(self): + # GH 6428 + data = """case,opdate + 7,10/18/2006 + 7,10/18/2008 + 621, """ + result = self.read_csv(StringIO(data), parse_dates=['opdate']) + expected_data = [[7, '10/18/2006'], + [7, '10/18/2008'], + [621, ' ']] + expected = DataFrame(expected_data, columns=['case', 'opdate']) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/io/test_date_converters.py b/pandas/tests/io/test_date_converters.py deleted file mode 100644 index 5b54925c65fbd..0000000000000 --- a/pandas/tests/io/test_date_converters.py +++ /dev/null @@ -1,150 +0,0 @@ -from pandas.compat import StringIO -from datetime import date, datetime - -import numpy as np - -from pandas import DataFrame, MultiIndex -from pandas.io.parsers import (read_csv, read_table) -from pandas.util.testing import assert_frame_equal -import pandas.io.date_converters as conv -import pandas.util.testing as tm -from pandas.compat.numpy import np_array_datetime64_compat - - -class TestConverters(tm.TestCase): - - def setUp(self): - self.years = np.array([2007, 2008]) - self.months = np.array([1, 2]) - self.days = np.array([3, 4]) - self.hours = np.array([5, 6]) - self.minutes = np.array([7, 8]) - self.seconds = np.array([9, 0]) - self.dates = np.array(['2007/1/3', '2008/2/4'], dtype=object) - self.times = np.array(['05:07:09', '06:08:00'], dtype=object) - self.expected = np.array([datetime(2007, 1, 3, 5, 7, 9), - datetime(2008, 2, 4, 6, 8, 0)]) - - def test_parse_date_time(self): - result = conv.parse_date_time(self.dates, self.times) - self.assertTrue((result == self.expected).all()) - - data = """\ -date, time, a, b -2001-01-05, 10:00:00, 0.0, 10. -2001-01-05, 00:00:00, 1., 11. -""" - datecols = {'date_time': [0, 1]} - df = read_table(StringIO(data), sep=',', header=0, - parse_dates=datecols, date_parser=conv.parse_date_time) - self.assertIn('date_time', df) - self.assertEqual(df.date_time.loc[0], datetime(2001, 1, 5, 10, 0, 0)) - - data = ("KORD,19990127, 19:00:00, 18:56:00, 0.8100\n" - "KORD,19990127, 20:00:00, 19:56:00, 0.0100\n" - "KORD,19990127, 21:00:00, 20:56:00, -0.5900\n" - "KORD,19990127, 21:00:00, 21:18:00, -0.9900\n" - "KORD,19990127, 22:00:00, 21:56:00, -0.5900\n" - "KORD,19990127, 23:00:00, 22:56:00, -0.5900") - - date_spec = {'nominal': [1, 2], 'actual': [1, 3]} - df = read_csv(StringIO(data), header=None, parse_dates=date_spec, - date_parser=conv.parse_date_time) - - def test_parse_date_fields(self): - result = conv.parse_date_fields(self.years, self.months, self.days) - expected = np.array([datetime(2007, 1, 3), datetime(2008, 2, 4)]) - self.assertTrue((result == expected).all()) - - data = ("year, month, day, a\n 2001 , 01 , 10 , 10.\n" - "2001 , 02 , 1 , 11.") - datecols = {'ymd': [0, 1, 2]} - df = read_table(StringIO(data), sep=',', header=0, - parse_dates=datecols, - date_parser=conv.parse_date_fields) - self.assertIn('ymd', df) - self.assertEqual(df.ymd.loc[0], datetime(2001, 1, 10)) - - def test_datetime_six_col(self): - result = conv.parse_all_fields(self.years, self.months, self.days, - self.hours, self.minutes, self.seconds) - self.assertTrue((result == self.expected).all()) - - data = """\ -year, month, day, hour, minute, second, a, b -2001, 01, 05, 10, 00, 0, 0.0, 10. -2001, 01, 5, 10, 0, 00, 1., 11. -""" - datecols = {'ymdHMS': [0, 1, 2, 3, 4, 5]} - df = read_table(StringIO(data), sep=',', header=0, - parse_dates=datecols, - date_parser=conv.parse_all_fields) - self.assertIn('ymdHMS', df) - self.assertEqual(df.ymdHMS.loc[0], datetime(2001, 1, 5, 10, 0, 0)) - - def test_datetime_fractional_seconds(self): - data = """\ -year, month, day, hour, minute, second, a, b -2001, 01, 05, 10, 00, 0.123456, 0.0, 10. -2001, 01, 5, 10, 0, 0.500000, 1., 11. -""" - datecols = {'ymdHMS': [0, 1, 2, 3, 4, 5]} - df = read_table(StringIO(data), sep=',', header=0, - parse_dates=datecols, - date_parser=conv.parse_all_fields) - self.assertIn('ymdHMS', df) - self.assertEqual(df.ymdHMS.loc[0], datetime(2001, 1, 5, 10, 0, 0, - microsecond=123456)) - self.assertEqual(df.ymdHMS.loc[1], datetime(2001, 1, 5, 10, 0, 0, - microsecond=500000)) - - def test_generic(self): - data = "year, month, day, a\n 2001, 01, 10, 10.\n 2001, 02, 1, 11." - datecols = {'ym': [0, 1]} - dateconverter = lambda y, m: date(year=int(y), month=int(m), day=1) - df = read_table(StringIO(data), sep=',', header=0, - parse_dates=datecols, - date_parser=dateconverter) - self.assertIn('ym', df) - self.assertEqual(df.ym.loc[0], date(2001, 1, 1)) - - def test_dateparser_resolution_if_not_ns(self): - # issue 10245 - data = """\ -date,time,prn,rxstatus -2013-11-03,19:00:00,126,00E80000 -2013-11-03,19:00:00,23,00E80000 -2013-11-03,19:00:00,13,00E80000 -""" - - def date_parser(date, time): - datetime = np_array_datetime64_compat( - date + 'T' + time + 'Z', dtype='datetime64[s]') - return datetime - - df = read_csv(StringIO(data), date_parser=date_parser, - parse_dates={'datetime': ['date', 'time']}, - index_col=['datetime', 'prn']) - - datetimes = np_array_datetime64_compat(['2013-11-03T19:00:00Z'] * 3, - dtype='datetime64[s]') - df_correct = DataFrame(data={'rxstatus': ['00E80000'] * 3}, - index=MultiIndex.from_tuples( - [(datetimes[0], 126), - (datetimes[1], 23), - (datetimes[2], 13)], - names=['datetime', 'prn'])) - assert_frame_equal(df, df_correct) - - def test_parse_date_column_with_empty_string(self): - # GH 6428 - data = """case,opdate - 7,10/18/2006 - 7,10/18/2008 - 621, """ - result = read_csv(StringIO(data), parse_dates=['opdate']) - expected_data = [[7, '10/18/2006'], - [7, '10/18/2008'], - [621, ' ']] - expected = DataFrame(expected_data, columns=['case', 'opdate']) - assert_frame_equal(result, expected) From 492b8f7cd652267a1aeab6485abd354930db95d2 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Fri, 17 Mar 2017 22:51:36 -0400 Subject: [PATCH 230/933] CI: install nomkl to speed building (#15728) CI: use cache on all builds --- .travis.yml | 17 +++++++++-------- ci/install_travis.sh | 2 +- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/.travis.yml b/.travis.yml index c1419dd0c5d3b..cafe46059e6c0 100644 --- a/.travis.yml +++ b/.travis.yml @@ -74,7 +74,7 @@ matrix: - CLIPBOARD=xsel - COVERAGE=true - CACHE_NAME="35_nslow" -# - USE_CACHE=true # Don't use cache for 35_nslow + - USE_CACHE=true addons: apt: packages: @@ -86,6 +86,7 @@ matrix: - TEST_ARGS="--skip-slow --skip-network" - PANDAS_TESTING_MODE="deprecate" - CONDA_FORGE=true + - USE_CACHE=true addons: apt: packages: @@ -154,13 +155,13 @@ matrix: - USE_CACHE=true - python: 3.5 env: - - PYTHON_VERSION=3.5 - - JOB_NAME: "35_numpy_dev" - - JOB_TAG=_NUMPY_DEV - - TEST_ARGS="--skip-slow --skip-network" - - PANDAS_TESTING_MODE="deprecate" - - CACHE_NAME="35_numpy_dev" - - USE_CACHE=true + - PYTHON_VERSION=3.5 + - JOB_NAME: "35_numpy_dev" + - JOB_TAG=_NUMPY_DEV + - TEST_ARGS="--skip-slow --skip-network" + - PANDAS_TESTING_MODE="deprecate" + - CACHE_NAME="35_numpy_dev" + - USE_CACHE=true - python: 3.5 env: - PYTHON_VERSION=3.5 diff --git a/ci/install_travis.sh b/ci/install_travis.sh index 610e6255e6832..de3b3fb6a464e 100755 --- a/ci/install_travis.sh +++ b/ci/install_travis.sh @@ -99,7 +99,7 @@ if [ -e ${INSTALL} ]; then else # create new env # this may already exists, in which case our caching worked - time conda create -n pandas python=$PYTHON_VERSION pytest + time conda create -n pandas python=$PYTHON_VERSION pytest nomkl fi # build deps From be2dad17a7e0c39ecb7ed03ed0384856b018bdc3 Mon Sep 17 00:00:00 2001 From: "Christopher C. Aycock" Date: Fri, 17 Mar 2017 23:32:46 -0400 Subject: [PATCH 231/933] DOC: Fix typos in merge_asof() docstring (#15729) --- pandas/tools/merge.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pandas/tools/merge.py b/pandas/tools/merge.py index 261884bba54bd..60d523a8ea539 100644 --- a/pandas/tools/merge.py +++ b/pandas/tools/merge.py @@ -295,7 +295,7 @@ def merge_asof(left, right, on=None, - A "nearest" search selects the row in the right DataFrame whose 'on' key is closest in absolute distance to the left's key. - The default is "backward" and is the compatible in versions below 0.20.0. + The default is "backward" and is compatible in versions below 0.20.0. The direction parameter was added in version 0.20.0 and introduces "forward" and "nearest". @@ -340,13 +340,13 @@ def merge_asof(left, right, on=None, suffixes : 2-length sequence (tuple, list, ...) Suffix to apply to overlapping column names in the left and right - side, respectively + side, respectively. tolerance : integer or Timedelta, optional, default None - select asof tolerance within this range; must be compatible - to the merge index. + Select asof tolerance within this range; must be compatible + with the merge index. allow_exact_matches : boolean, default True - - If True, allow matching the same 'on' value + - If True, allow matching with the same 'on' value (i.e. less-than-or-equal-to / greater-than-or-equal-to) - If False, don't match the same 'on' value (i.e., stricly less-than / strictly greater-than) From 6a52c15a4ac7b2228e7f8ca45412cacfe301b040 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sat, 18 Mar 2017 12:01:19 -0400 Subject: [PATCH 232/933] TST: move conftest.py to top-level (#15731) --- pandas/conftest.py => conftest.py | 0 pandas/tests/api/test_api.py | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename pandas/conftest.py => conftest.py (100%) diff --git a/pandas/conftest.py b/conftest.py similarity index 100% rename from pandas/conftest.py rename to conftest.py diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index 73222c246fc70..2972427f1b245 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -29,7 +29,7 @@ class TestPDApi(Base, tm.TestCase): # these are optionally imported based on testing # & need to be ignored - ignored = ['tests', 'locale', 'conftest'] + ignored = ['tests', 'locale'] # top-level sub-packages lib = ['api', 'compat', 'computation', 'core', From fe8420ae1a108b6ad3fc14209f8bf7623bb5016f Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sat, 18 Mar 2017 12:02:09 -0400 Subject: [PATCH 233/933] CI: remove 3.5 appveyor build (#15730) --- appveyor.yml | 7 ------- ci/requirements-3.5-64.run | 13 ------------- 2 files changed, 20 deletions(-) delete mode 100644 ci/requirements-3.5-64.run diff --git a/appveyor.yml b/appveyor.yml index 1c14698430996..5d748ddf1a108 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -30,13 +30,6 @@ environment: CONDA_PY: "27" CONDA_NPY: "110" - - CONDA_ROOT: "C:\\Miniconda3_64" - PYTHON_VERSION: "3.5" - PYTHON_ARCH: "64" - CONDA_PY: "35" - CONDA_NPY: "111" - - # We always use a 64-bit machine, but can build x86 distributions # with the PYTHON_ARCH variable (which is used by CMD_IN_ENV). platform: diff --git a/ci/requirements-3.5-64.run b/ci/requirements-3.5-64.run deleted file mode 100644 index ad66f578d702a..0000000000000 --- a/ci/requirements-3.5-64.run +++ /dev/null @@ -1,13 +0,0 @@ -python-dateutil -pytz -numpy=1.11* -openpyxl -xlsxwriter -xlrd -xlwt -scipy -feather-format -numexpr -pytables -matplotlib -blosc From 63334122acccee705d834e05f394eb38e37f6392 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sat, 18 Mar 2017 12:50:26 -0400 Subject: [PATCH 234/933] CI: turn on cache for osx (#15733) --- .travis.yml | 1 + ci/submit_cython_cache.sh | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index cafe46059e6c0..88e1655363a4e 100644 --- a/.travis.yml +++ b/.travis.yml @@ -28,6 +28,7 @@ matrix: os: osx compiler: clang osx_image: xcode6.4 + cache: ccache env: - PYTHON_VERSION=3.5 - JOB_NAME: "35_osx" diff --git a/ci/submit_cython_cache.sh b/ci/submit_cython_cache.sh index cfbced4988357..b87acef0ba11c 100755 --- a/ci/submit_cython_cache.sh +++ b/ci/submit_cython_cache.sh @@ -9,7 +9,7 @@ rm -rf $PYX_CACHE_DIR home_dir=$(pwd) -mkdir $PYX_CACHE_DIR +mkdir -p $PYX_CACHE_DIR rsync -Rv $pyx_file_list $PYX_CACHE_DIR echo "pyx files:" From 59b88ab6123338e1dfcc0f77dfb3e5c4c511889a Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sat, 18 Mar 2017 16:44:46 -0400 Subject: [PATCH 235/933] Revert "TST: move conftest.py to top-level (#15731)" This reverts commit 6a52c15a4ac7b2228e7f8ca45412cacfe301b040. --- conftest.py => pandas/conftest.py | 0 pandas/tests/api/test_api.py | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename conftest.py => pandas/conftest.py (100%) diff --git a/conftest.py b/pandas/conftest.py similarity index 100% rename from conftest.py rename to pandas/conftest.py diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index 2972427f1b245..73222c246fc70 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -29,7 +29,7 @@ class TestPDApi(Base, tm.TestCase): # these are optionally imported based on testing # & need to be ignored - ignored = ['tests', 'locale'] + ignored = ['tests', 'locale', 'conftest'] # top-level sub-packages lib = ['api', 'compat', 'computation', 'core', From 5f4a5b4fba87c96a583dda57cff864dea7333759 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sat, 18 Mar 2017 22:00:00 -0400 Subject: [PATCH 236/933] TST: clean up build testing Author: Jeff Reback Closes #15734 from jreback/build and squashes the following commits: a99b713 [Jeff Reback] suppress the import json warning when generating _version ed7c526 [Jeff Reback] modify install tests 1cc5b67 [Jeff Reback] TST: have the build test exercise pandas.test() --- .travis.yml | 9 ++++---- ci/install_test.sh | 17 --------------- ci/install_travis.sh | 52 +++++++++++++++++++++++++------------------- ci/script_multi.sh | 3 ++- versioneer.py | 4 +++- 5 files changed, 39 insertions(+), 46 deletions(-) delete mode 100755 ci/install_test.sh diff --git a/.travis.yml b/.travis.yml index 88e1655363a4e..705b2380ac697 100644 --- a/.travis.yml +++ b/.travis.yml @@ -33,7 +33,6 @@ matrix: - PYTHON_VERSION=3.5 - JOB_NAME: "35_osx" - TEST_ARGS="--skip-slow --skip-network" - - BUILD_TYPE=conda - JOB_TAG=_OSX - TRAVIS_PYTHON_VERSION=3.5 - CACHE_NAME="35_osx" @@ -107,12 +106,12 @@ matrix: - python: 2.7 env: - PYTHON_VERSION=2.7 - - JOB_NAME: "27_build_test_conda" + - JOB_NAME: "27_build_test" - JOB_TAG=_BUILD_TEST - TEST_ARGS="--skip-slow" - FULL_DEPS=true - BUILD_TEST=true - - CACHE_NAME="27_build_test_conda" + - CACHE_NAME="27_build_test" - USE_CACHE=true # In allow_failures - python: 3.5 @@ -147,12 +146,12 @@ matrix: - python: 2.7 env: - PYTHON_VERSION=2.7 - - JOB_NAME: "27_build_test_conda" + - JOB_NAME: "27_build_test" - JOB_TAG=_BUILD_TEST - TEST_ARGS="--skip-slow" - FULL_DEPS=true - BUILD_TEST=true - - CACHE_NAME="27_build_test_conda" + - CACHE_NAME="27_build_test" - USE_CACHE=true - python: 3.5 env: diff --git a/ci/install_test.sh b/ci/install_test.sh deleted file mode 100755 index 9ace633d7f39d..0000000000000 --- a/ci/install_test.sh +++ /dev/null @@ -1,17 +0,0 @@ -#!/bin/bash - -echo "inside $0" - -if [ "$INSTALL_TEST" ]; then - source activate pandas - echo "Starting installation test." - conda uninstall cython || exit 1 - python "$TRAVIS_BUILD_DIR"/setup.py sdist --formats=zip,gztar || exit 1 - pip install "$TRAVIS_BUILD_DIR"/dist/*tar.gz || exit 1 - pytest pandas/tests/test_series.py --junitxml=/tmp/pytest_install.xml -else - echo "Skipping installation test." -fi -RET="$?" - -exit "$RET" diff --git a/ci/install_travis.sh b/ci/install_travis.sh index de3b3fb6a464e..053a2d15a287c 100755 --- a/ci/install_travis.sh +++ b/ci/install_travis.sh @@ -131,10 +131,13 @@ fi if [ "$BUILD_TEST" ]; then - # build testing - pip uninstall --yes cython - pip install cython==0.23 - ( python setup.py build_ext --inplace && python setup.py develop ) || true + # build & install testing + echo ["Starting installation test."] + python setup.py clean + python setup.py build_ext --inplace + python setup.py sdist --formats=gztar + conda uninstall cython + pip install dist/*tar.gz || exit 1 else @@ -142,26 +145,31 @@ else echo "[build em]" time python setup.py build_ext --inplace || exit 1 - # we may have run installations - echo "[conda installs]" - REQ="ci/requirements-${PYTHON_VERSION}${JOB_TAG}.run" - if [ -e ${REQ} ]; then - time conda install -n pandas --file=${REQ} || exit 1 - fi +fi - # we may have additional pip installs - echo "[pip installs]" - REQ="ci/requirements-${PYTHON_VERSION}${JOB_TAG}.pip" - if [ -e ${REQ} ]; then - pip install -r $REQ - fi +# we may have run installations +echo "[conda installs]" +REQ="ci/requirements-${PYTHON_VERSION}${JOB_TAG}.run" +if [ -e ${REQ} ]; then + time conda install -n pandas --file=${REQ} || exit 1 +fi - # may have addtl installation instructions for this build - echo "[addtl installs]" - REQ="ci/requirements-${PYTHON_VERSION}${JOB_TAG}.sh" - if [ -e ${REQ} ]; then - time bash $REQ || exit 1 - fi +# we may have additional pip installs +echo "[pip installs]" +REQ="ci/requirements-${PYTHON_VERSION}${JOB_TAG}.pip" +if [ -e ${REQ} ]; then + pip install -r $REQ +fi + +# may have addtl installation instructions for this build +echo "[addtl installs]" +REQ="ci/requirements-${PYTHON_VERSION}${JOB_TAG}.sh" +if [ -e ${REQ} ]; then + time bash $REQ || exit 1 +fi + +# finish install if we are not doing a build-testk +if [ -z "$BUILD_TEST" ]; then # remove any installed pandas package # w/o removing anything else diff --git a/ci/script_multi.sh b/ci/script_multi.sh index 41f71fd21f63f..2d1211b2f7b96 100755 --- a/ci/script_multi.sh +++ b/ci/script_multi.sh @@ -24,7 +24,8 @@ export PYTHONHASHSEED=$(python -c 'import random; print(random.randint(1, 429496 echo PYTHONHASHSEED=$PYTHONHASHSEED if [ "$BUILD_TEST" ]; then - echo "We are not running pytest as this is simply a build test." + cd /tmp + python -c "import pandas; pandas.test(['-n 2'])" elif [ "$COVERAGE" ]; then echo pytest -s -n 2 -m "not single" --cov=pandas --cov-append --cov-report xml:/tmp/cov.xml --junitxml=/tmp/multiple.xml $TEST_ARGS pandas pytest -s -n 2 -m "not single" --cov=pandas --cov-append --cov-report xml:/tmp/cov.xml --junitxml=/tmp/multiple.xml $TEST_ARGS pandas diff --git a/versioneer.py b/versioneer.py index c010f63e3ead8..104e8e97c6bd6 100644 --- a/versioneer.py +++ b/versioneer.py @@ -1130,7 +1130,9 @@ def versions_from_parentdir(parentdir_prefix, root, verbose): # unpacked source archive. Distribution tarballs contain a pre-generated copy # of this file. -import json +from warnings import catch_warnings +with catch_warnings(record=True): + import json import sys version_json = ''' From 9ab57dc522a41d42cb230272a3a0df0ad8a7eb27 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Sat, 18 Mar 2017 22:02:20 -0400 Subject: [PATCH 237/933] MAINT: Drop order and sort from pandas objects (#15735) Affect classes: 1) Index 2) Series 2) DataFrame xref gh-10726 --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/core/frame.py | 50 -------------------- pandas/core/series.py | 71 ---------------------------- pandas/indexes/base.py | 11 ----- pandas/tests/frame/test_analytics.py | 20 -------- pandas/tests/frame/test_sorting.py | 6 +-- pandas/tests/indexes/common.py | 6 --- pandas/tests/indexes/test_base.py | 15 ------ pandas/tests/series/test_sorting.py | 15 +----- 9 files changed, 4 insertions(+), 191 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 4949b68d46723..680aefc4041fb 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -771,6 +771,7 @@ Removal of prior version deprecations/changes - The deprecated ``DataFrame.iterkv()`` has been removed in favor of ``DataFrame.iteritems()`` (:issue:`10711`) - The ``Categorical`` constructor has dropped the ``name`` parameter (:issue:`10632`) - The ``take_last`` parameter has been dropped from ``duplicated()``, ``drop_duplicates()``, ``nlargest()``, and ``nsmallest()`` methods (:issue:`10236`, :issue:`10792`, :issue:`10920`) +- ``Series``, ``Index``, and ``DataFrame`` have dropped the ``sort`` and ``order`` methods (:issue:`10726`) .. _whatsnew_0200.performance: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 3696051b269e3..732d88b47ae2a 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3304,56 +3304,6 @@ def trans(v): else: return self._constructor(new_data).__finalize__(self) - def sort(self, columns=None, axis=0, ascending=True, inplace=False, - kind='quicksort', na_position='last', **kwargs): - """ - DEPRECATED: use :meth:`DataFrame.sort_values` - - Sort DataFrame either by labels (along either axis) or by the values in - column(s) - - Parameters - ---------- - columns : object - Column name(s) in frame. Accepts a column name or a list - for a nested sort. A tuple will be interpreted as the - levels of a multi-index. - ascending : boolean or list, default True - Sort ascending vs. descending. Specify list for multiple sort - orders - axis : {0 or 'index', 1 or 'columns'}, default 0 - Sort index/rows versus columns - inplace : boolean, default False - Sort the DataFrame without creating a new instance - kind : {'quicksort', 'mergesort', 'heapsort'}, optional - This option is only applied when sorting on a single column or - label. - na_position : {'first', 'last'} (optional, default='last') - 'first' puts NaNs at the beginning - 'last' puts NaNs at the end - - Examples - -------- - >>> result = df.sort(['A', 'B'], ascending=[1, 0]) - - Returns - ------- - sorted : DataFrame - """ - nv.validate_sort(tuple(), kwargs) - - if columns is None: - warnings.warn("sort(....) is deprecated, use sort_index(.....)", - FutureWarning, stacklevel=2) - return self.sort_index(axis=axis, ascending=ascending, - inplace=inplace) - - warnings.warn("sort(columns=....) is deprecated, use " - "sort_values(by=.....)", FutureWarning, stacklevel=2) - return self.sort_values(by=columns, axis=axis, ascending=ascending, - inplace=inplace, kind=kind, - na_position=na_position) - @Appender(_shared_docs['sort_index'] % _shared_doc_kwargs) def sort_index(self, axis=0, level=None, ascending=True, inplace=False, kind='quicksort', na_position='last', sort_remaining=True, diff --git a/pandas/core/series.py b/pandas/core/series.py index 7ee3b3e8fb519..4c51ced1845fe 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1777,77 +1777,6 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False, else: return result.__finalize__(self) - def sort(self, axis=0, ascending=True, kind='quicksort', - na_position='last', inplace=True): - """ - DEPRECATED: use :meth:`Series.sort_values(inplace=True)` for INPLACE - sorting - - Sort values and index labels by value. This is an inplace sort by - default. Series.order is the equivalent but returns a new Series. - - Parameters - ---------- - axis : int (can only be zero) - ascending : boolean, default True - Sort ascending. Passing False sorts descending - kind : {'mergesort', 'quicksort', 'heapsort'}, default 'quicksort' - Choice of sorting algorithm. See np.sort for more - information. 'mergesort' is the only stable algorithm - na_position : {'first', 'last'} (optional, default='last') - 'first' puts NaNs at the beginning - 'last' puts NaNs at the end - inplace : boolean, default True - Do operation in place. - - See Also - -------- - Series.sort_values - """ - warnings.warn("sort is deprecated, use sort_values(inplace=True) for " - "INPLACE sorting", FutureWarning, stacklevel=2) - - return self.sort_values(ascending=ascending, kind=kind, - na_position=na_position, inplace=inplace) - - def order(self, na_last=None, ascending=True, kind='quicksort', - na_position='last', inplace=False): - """ - DEPRECATED: use :meth:`Series.sort_values` - - Sorts Series object, by value, maintaining index-value link. - This will return a new Series by default. Series.sort is the equivalent - but as an inplace method. - - Parameters - ---------- - na_last : boolean (optional, default=True)--DEPRECATED; use na_position - Put NaN's at beginning or end - ascending : boolean, default True - Sort ascending. Passing False sorts descending - kind : {'mergesort', 'quicksort', 'heapsort'}, default 'quicksort' - Choice of sorting algorithm. See np.sort for more - information. 'mergesort' is the only stable algorithm - na_position : {'first', 'last'} (optional, default='last') - 'first' puts NaNs at the beginning - 'last' puts NaNs at the end - inplace : boolean, default False - Do operation in place. - - Returns - ------- - y : Series - - See Also - -------- - Series.sort_values - """ - warnings.warn("order is deprecated, use sort_values(...)", - FutureWarning, stacklevel=2) - - return self.sort_values(ascending=ascending, kind=kind, - na_position=na_position, inplace=inplace) - def argsort(self, axis=0, kind='quicksort', order=None): """ Overrides ndarray.argsort. Argsorts the value, omitting NA/null values, diff --git a/pandas/indexes/base.py b/pandas/indexes/base.py index 381e4d5caa8ac..d262ecd818f1d 100644 --- a/pandas/indexes/base.py +++ b/pandas/indexes/base.py @@ -1912,17 +1912,6 @@ def sort_values(self, return_indexer=False, ascending=True): else: return sorted_index - def order(self, return_indexer=False, ascending=True): - """ - Return sorted copy of Index - - DEPRECATED: use :meth:`Index.sort_values` - """ - warnings.warn("order is deprecated, use sort_values(...)", - FutureWarning, stacklevel=2) - return self.sort_values(return_indexer=return_indexer, - ascending=ascending) - def sort(self, *args, **kwargs): raise TypeError("cannot sort an Index object in-place, use " "sort_values instead") diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index 4fb1d2222fa06..735d3786e6a54 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -660,26 +660,6 @@ def test_sem(self): self.assertFalse((result < 0).any()) nanops._USE_BOTTLENECK = True - def test_sort_invalid_kwargs(self): - df = DataFrame([1, 2, 3], columns=['a']) - - msg = r"sort\(\) got an unexpected keyword argument 'foo'" - tm.assertRaisesRegexp(TypeError, msg, df.sort, foo=2) - - # Neither of these should raise an error because they - # are explicit keyword arguments in the signature and - # hence should not be swallowed by the kwargs parameter - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): - df.sort(axis=1) - - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): - df.sort(kind='mergesort') - - msg = "the 'order' parameter is not supported" - tm.assertRaisesRegexp(ValueError, msg, df.sort, order=2) - def test_skew(self): tm._skip_if_no_scipy() from scipy.stats import skew diff --git a/pandas/tests/frame/test_sorting.py b/pandas/tests/frame/test_sorting.py index 7779afdc47b48..5108fc6080866 100644 --- a/pandas/tests/frame/test_sorting.py +++ b/pandas/tests/frame/test_sorting.py @@ -62,11 +62,7 @@ def test_sort(self): frame = DataFrame(np.arange(16).reshape(4, 4), index=[1, 2, 3, 4], columns=['A', 'B', 'C', 'D']) - # 9816 deprecated - with tm.assert_produces_warning(FutureWarning): - frame.sort(columns='A') - with tm.assert_produces_warning(FutureWarning): - frame.sort() + # see gh-9816 with tm.assert_produces_warning(FutureWarning): frame.sortlevel() diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index 3581f894e53a3..b1e6bd7520c69 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -346,12 +346,6 @@ def test_sort(self): for ind in self.indices.values(): self.assertRaises(TypeError, ind.sort) - def test_order(self): - for ind in self.indices.values(): - # 9816 deprecated - with tm.assert_produces_warning(FutureWarning): - ind.order() - def test_mutability(self): for ind in self.indices.values(): if not len(ind): diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 05d3478ab0705..7199a38bb7a80 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -1808,21 +1808,6 @@ def setUp(self): def create_index(self): return self.mixedIndex - def test_order(self): - idx = self.create_index() - # 9816 deprecated - if PY36: - with tm.assertRaisesRegexp(TypeError, "'>' not supported"): - with tm.assert_produces_warning(FutureWarning): - idx.order() - elif PY3: - with tm.assertRaisesRegexp(TypeError, "unorderable types"): - with tm.assert_produces_warning(FutureWarning): - idx.order() - else: - with tm.assert_produces_warning(FutureWarning): - idx.order() - def test_argsort(self): idx = self.create_index() if PY36: diff --git a/pandas/tests/series/test_sorting.py b/pandas/tests/series/test_sorting.py index 590a530a847bd..66ecba960ae0b 100644 --- a/pandas/tests/series/test_sorting.py +++ b/pandas/tests/series/test_sorting.py @@ -13,24 +13,13 @@ class TestSeriesSorting(TestData, tm.TestCase): - def test_sort(self): - + def test_sortlevel_deprecated(self): ts = self.ts.copy() - # 9816 deprecated - with tm.assert_produces_warning(FutureWarning): - ts.sort() # sorts inplace - self.assert_series_equal(ts, self.ts.sort_values()) + # see gh-9816 with tm.assert_produces_warning(FutureWarning): ts.sortlevel() - def test_order(self): - - # 9816 deprecated - with tm.assert_produces_warning(FutureWarning): - result = self.ts.order() - self.assert_series_equal(result, self.ts.sort_values()) - def test_sort_values(self): # check indexes are reordered corresponding with the values From bd24926bff3cac204a4d459488a9a64c4e8eece1 Mon Sep 17 00:00:00 2001 From: John Zwinck Date: Mon, 20 Mar 2017 16:13:52 +0800 Subject: [PATCH 238/933] DOC: Fix typo in docstring param name (#15739) --- pandas/tseries/holiday.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tseries/holiday.py b/pandas/tseries/holiday.py index d3d936693c266..9acb52ebe0e9f 100644 --- a/pandas/tseries/holiday.py +++ b/pandas/tseries/holiday.py @@ -365,7 +365,7 @@ def holidays(self, start=None, end=None, return_name=False): ---------- start : starting date, datetime-like, optional end : ending date, datetime-like, optional - return_names : bool, optional + return_name : bool, optional If True, return a series that has dates and holiday names. False will only return a DatetimeIndex of dates. From b1e29dba26ff86b826fe0f866182466ae42c0bc5 Mon Sep 17 00:00:00 2001 From: Pankaj Pandey Date: Mon, 20 Mar 2017 09:44:29 -0400 Subject: [PATCH 239/933] BUG: Fix linux clipboard QApplication() creation closes #14372 A Qt application cannot instantiate multiple `QApplication` instances, so we create a new `QApplication` only when the global `QApplication.instance()` is None. Author: Pankaj Pandey Closes #14815 from pankajp/patch-2 and squashes the following commits: 40d70f9 [Pankaj Pandey] BUG: Fix linux clipboard QApplication() creation --- doc/source/whatsnew/v0.20.0.txt | 2 +- pandas/util/clipboard/clipboards.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 680aefc4041fb..af0d0d7b04475 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -828,7 +828,7 @@ Bug Fixes - Bug in ``pd.read_msgpack()`` in which ``Series`` categoricals were being improperly processed (:issue:`14901`) - Bug in ``Series.ffill()`` with mixed dtypes containing tz-aware datetimes. (:issue:`14956`) - +- Bug in interactions with ``Qt`` when a ``QtApplication`` already exists (:issue:`14372`) - Bug in ``DataFrame.isin`` comparing datetimelike to empty frame (:issue:`15473`) - Bug in ``Series.where()`` and ``DataFrame.where()`` where array-like conditionals were being rejected (:issue:`15414`) diff --git a/pandas/util/clipboard/clipboards.py b/pandas/util/clipboard/clipboards.py index f73f4f191d577..bd5528334168f 100644 --- a/pandas/util/clipboard/clipboards.py +++ b/pandas/util/clipboard/clipboards.py @@ -50,7 +50,8 @@ def init_qt_clipboard(): # $DISPLAY should exist from PyQt4.QtGui import QApplication - app = QApplication([]) + # use the global instance if it exists + app = QApplication.instance() or QApplication([]) def copy_qt(text): cb = app.clipboard() From 8bde21a9f01c4a09d6e305906e81794b45935d5e Mon Sep 17 00:00:00 2001 From: sinhrks Date: Mon, 20 Mar 2017 10:19:18 -0400 Subject: [PATCH 240/933] BUG: replace coerces incorrect dtype closes #12747 Author: sinhrks This patch had conflicts when merged, resolved by Committer: Jeff Reback Closes #12780 from sinhrks/replace_type and squashes the following commits: f9154e8 [sinhrks] remove unnecessary comments 279fdf6 [sinhrks] remove import failure de44877 [sinhrks] BUG: replace coerces incorrect dtype --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/core/internals.py | 20 +++++++++-- pandas/tests/indexing/test_coercion.py | 50 ++++++++++++++++++++------ pandas/tests/series/test_replace.py | 4 +-- pandas/types/cast.py | 37 ++++++++++++++----- 5 files changed, 88 insertions(+), 24 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index af0d0d7b04475..7c78132232077 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -823,6 +823,7 @@ Bug Fixes - Bug in the display of ``.info()`` where a qualifier (+) would always be displayed with a ``MultiIndex`` that contains only non-strings (:issue:`15245`) +- Bug in ``.replace()`` may result in incorrect dtypes. (:issue:`12747`) - Bug in ``.asfreq()``, where frequency was not set for empty ``Series`` (:issue:`14320`) diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 9db01713b05ed..60684a929889b 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -1894,8 +1894,11 @@ def convert(self, *args, **kwargs): blocks.append(newb) else: - values = fn( - self.values.ravel(), **fn_kwargs).reshape(self.values.shape) + values = fn(self.values.ravel(), **fn_kwargs) + try: + values = values.reshape(self.values.shape) + except NotImplementedError: + pass blocks.append(make_block(values, ndim=self.ndim, placement=self.mgr_locs)) @@ -3238,6 +3241,16 @@ def comp(s): return _possibly_compare(values, getattr(s, 'asm8', s), operator.eq) + def _cast_scalar(block, scalar): + dtype, val = _infer_dtype_from_scalar(scalar, pandas_dtype=True) + if not is_dtype_equal(block.dtype, dtype): + dtype = _find_common_type([block.dtype, dtype]) + block = block.astype(dtype) + # use original value + val = scalar + + return block, val + masks = [comp(s) for i, s in enumerate(src_list)] result_blocks = [] @@ -3260,7 +3273,8 @@ def comp(s): # particular block m = masks[i][b.mgr_locs.indexer] if m.any(): - new_rb.extend(b.putmask(m, d, inplace=True)) + b, val = _cast_scalar(b, d) + new_rb.extend(b.putmask(m, val, inplace=True)) else: new_rb.append(b) rb = new_rb diff --git a/pandas/tests/indexing/test_coercion.py b/pandas/tests/indexing/test_coercion.py index 38f8bb5355a69..df95f563c0832 100644 --- a/pandas/tests/indexing/test_coercion.py +++ b/pandas/tests/indexing/test_coercion.py @@ -1153,12 +1153,27 @@ def setUp(self): self.rep['float64'] = [1.1, 2.2] self.rep['complex128'] = [1 + 1j, 2 + 2j] self.rep['bool'] = [True, False] + self.rep['datetime64[ns]'] = [pd.Timestamp('2011-01-01'), + pd.Timestamp('2011-01-03')] + + for tz in ['UTC', 'US/Eastern']: + # to test tz => different tz replacement + key = 'datetime64[ns, {0}]'.format(tz) + self.rep[key] = [pd.Timestamp('2011-01-01', tz=tz), + pd.Timestamp('2011-01-03', tz=tz)] + + self.rep['timedelta64[ns]'] = [pd.Timedelta('1 day'), + pd.Timedelta('2 day')] def _assert_replace_conversion(self, from_key, to_key, how): index = pd.Index([3, 4], name='xxx') obj = pd.Series(self.rep[from_key], index=index, name='yyy') self.assertEqual(obj.dtype, from_key) + if (from_key.startswith('datetime') and to_key.startswith('datetime')): + # different tz, currently mask_missing raises SystemError + return + if how == 'dict': replacer = dict(zip(self.rep[from_key], self.rep[to_key])) elif how == 'series': @@ -1175,17 +1190,12 @@ def _assert_replace_conversion(self, from_key, to_key, how): pytest.skip("windows platform buggy: {0} -> {1}".format (from_key, to_key)) - if ((from_key == 'float64' and - to_key in ('bool', 'int64')) or - + if ((from_key == 'float64' and to_key in ('bool', 'int64')) or (from_key == 'complex128' and to_key in ('bool', 'int64', 'float64')) or - (from_key == 'int64' and - to_key in ('bool')) or - - # TODO_GH12747 The result must be int? - (from_key == 'bool' and to_key == 'int64')): + # GH12747 The result must be int? + (from_key == 'int64' and to_key in ('bool'))): # buggy on 32-bit if tm.is_platform_32bit(): @@ -1248,13 +1258,31 @@ def test_replace_series_bool(self): self._assert_replace_conversion(from_key, to_key, how='series') def test_replace_series_datetime64(self): - pass + from_key = 'datetime64[ns]' + for to_key in self.rep: + self._assert_replace_conversion(from_key, to_key, how='dict') + + from_key = 'datetime64[ns]' + for to_key in self.rep: + self._assert_replace_conversion(from_key, to_key, how='series') def test_replace_series_datetime64tz(self): - pass + from_key = 'datetime64[ns, US/Eastern]' + for to_key in self.rep: + self._assert_replace_conversion(from_key, to_key, how='dict') + + from_key = 'datetime64[ns, US/Eastern]' + for to_key in self.rep: + self._assert_replace_conversion(from_key, to_key, how='series') def test_replace_series_timedelta64(self): - pass + from_key = 'timedelta64[ns]' + for to_key in self.rep: + self._assert_replace_conversion(from_key, to_key, how='dict') + + from_key = 'timedelta64[ns]' + for to_key in self.rep: + self._assert_replace_conversion(from_key, to_key, how='series') def test_replace_series_period(self): pass diff --git a/pandas/tests/series/test_replace.py b/pandas/tests/series/test_replace.py index 0acd03316339e..f5a25e93cc82d 100644 --- a/pandas/tests/series/test_replace.py +++ b/pandas/tests/series/test_replace.py @@ -132,8 +132,8 @@ def check_replace(to_rep, val, expected): tm.assert_series_equal(expected, r) tm.assert_series_equal(expected, sc) - # should NOT upcast to float - e = pd.Series([0, 1, 2, 3, 4]) + # MUST upcast to float + e = pd.Series([0., 1., 2., 3., 4.]) tr, v = [3], [3.0] check_replace(tr, v, e) diff --git a/pandas/types/cast.py b/pandas/types/cast.py index 1cd55274b9b49..11a837dd21159 100644 --- a/pandas/types/cast.py +++ b/pandas/types/cast.py @@ -21,7 +21,7 @@ _ensure_int32, _ensure_int64, _NS_DTYPE, _TD_DTYPE, _INT64_DTYPE, _POSSIBLY_CAST_DTYPES) -from .dtypes import ExtensionDtype +from .dtypes import ExtensionDtype, DatetimeTZDtype, PeriodDtype from .generic import ABCDatetimeIndex, ABCPeriodIndex, ABCSeries from .missing import isnull, notnull from .inference import is_list_like @@ -312,8 +312,17 @@ def _maybe_promote(dtype, fill_value=np.nan): return dtype, fill_value -def _infer_dtype_from_scalar(val): - """ interpret the dtype from a scalar """ +def _infer_dtype_from_scalar(val, pandas_dtype=False): + """ + interpret the dtype from a scalar + + Parameters + ---------- + pandas_dtype : bool, default False + whether to infer dtype including pandas extension types. + If False, scalar belongs to pandas extension types is inferred as + object + """ dtype = np.object_ @@ -336,13 +345,20 @@ def _infer_dtype_from_scalar(val): dtype = np.object_ - elif isinstance(val, (np.datetime64, - datetime)) and getattr(val, 'tzinfo', None) is None: - val = lib.Timestamp(val).value - dtype = np.dtype('M8[ns]') + elif isinstance(val, (np.datetime64, datetime)): + val = tslib.Timestamp(val) + if val is tslib.NaT or val.tz is None: + dtype = np.dtype('M8[ns]') + else: + if pandas_dtype: + dtype = DatetimeTZDtype(unit='ns', tz=val.tz) + else: + # return datetimetz as object + return np.object_, val + val = val.value elif isinstance(val, (np.timedelta64, timedelta)): - val = lib.Timedelta(val).value + val = tslib.Timedelta(val).value dtype = np.dtype('m8[ns]') elif is_bool(val): @@ -363,6 +379,11 @@ def _infer_dtype_from_scalar(val): elif is_complex(val): dtype = np.complex_ + elif pandas_dtype: + if lib.is_period(val): + dtype = PeriodDtype(freq=val.freq) + val = val.ordinal + return dtype, val From 771e36c32f922c6a0c4a147f08fef32a011d534f Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Mon, 20 Mar 2017 13:45:49 -0400 Subject: [PATCH 241/933] BUG: tz aware Timestamp field accessors returns local values (#13303) closes #13303 Previously, calling a date/time attribute with Timestamp that's tz aware (e.g. `Timestamp('...', tz='...').dayofyear`) would return the attribute in UTC instead of the local tz. Author: Matt Roeschke Closes #15740 from mroeschke/fix_13303 and squashes the following commits: b78b333 [Matt Roeschke] BUG: tz aware Timestamp field accessors returns local values (#13303) --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/_libs/tslib.pyx | 10 +- pandas/tests/indexes/datetimes/test_misc.py | 158 ++++++++++---------- pandas/tests/scalar/test_timestamp.py | 26 ++++ 4 files changed, 117 insertions(+), 78 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 7c78132232077..98407aacb993b 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -796,6 +796,7 @@ Bug Fixes ~~~~~~~~~ - Bug in ``Timestamp.replace`` now raises ``TypeError`` when incorrect argument names are given; previously this raised ``ValueError`` (:issue:`15240`) +- Bug in ``Timestamp`` returning UTC based time/date attributes when a timezone was provided (:issue:`13303`) - Bug in ``Index`` power operations with reversed operands (:issue:`14973`) - Bug in ``TimedeltaIndex`` addition where overflow was being allowed without error (:issue:`14816`) - Bug in ``TimedeltaIndex`` raising a ``ValueError`` when boolean indexing with ``loc`` (:issue:`14946`) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 8ee92e9fb900d..055534bbdb7ee 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -1233,7 +1233,10 @@ cdef class _Timestamp(datetime): return datetime.__sub__(self, other) cpdef _get_field(self, field): - out = get_date_field(np.array([self.value], dtype=np.int64), field) + val = self.value + if self.tz is not None and not _is_utc(self.tz): + val = tz_convert_single(self.value, 'UTC', self.tz) + out = get_date_field(np.array([val], dtype=np.int64), field) return int(out[0]) cpdef _get_start_end_field(self, field): @@ -1241,8 +1244,11 @@ cdef class _Timestamp(datetime): 'startingMonth', self.freq.kwds.get( 'month', 12)) if self.freq else 12 freqstr = self.freqstr if self.freq else None + val = self.value + if self.tz is not None and not _is_utc(self.tz): + val = tz_convert_single(self.value, 'UTC', self.tz) out = get_start_end_field( - np.array([self.value], dtype=np.int64), field, freqstr, month_kw) + np.array([val], dtype=np.int64), field, freqstr, month_kw) return out[0] property _repr_base: diff --git a/pandas/tests/indexes/datetimes/test_misc.py b/pandas/tests/indexes/datetimes/test_misc.py index 6b0191edbda5a..e99f1d46637c2 100644 --- a/pandas/tests/indexes/datetimes/test_misc.py +++ b/pandas/tests/indexes/datetimes/test_misc.py @@ -172,82 +172,88 @@ def test_normalize(self): class TestDatetime64(tm.TestCase): def test_datetimeindex_accessors(self): - dti = DatetimeIndex(freq='D', start=datetime(1998, 1, 1), periods=365) - - self.assertEqual(dti.year[0], 1998) - self.assertEqual(dti.month[0], 1) - self.assertEqual(dti.day[0], 1) - self.assertEqual(dti.hour[0], 0) - self.assertEqual(dti.minute[0], 0) - self.assertEqual(dti.second[0], 0) - self.assertEqual(dti.microsecond[0], 0) - self.assertEqual(dti.dayofweek[0], 3) - - self.assertEqual(dti.dayofyear[0], 1) - self.assertEqual(dti.dayofyear[120], 121) - - self.assertEqual(dti.weekofyear[0], 1) - self.assertEqual(dti.weekofyear[120], 18) - - self.assertEqual(dti.quarter[0], 1) - self.assertEqual(dti.quarter[120], 2) - - self.assertEqual(dti.days_in_month[0], 31) - self.assertEqual(dti.days_in_month[90], 30) - - self.assertEqual(dti.is_month_start[0], True) - self.assertEqual(dti.is_month_start[1], False) - self.assertEqual(dti.is_month_start[31], True) - self.assertEqual(dti.is_quarter_start[0], True) - self.assertEqual(dti.is_quarter_start[90], True) - self.assertEqual(dti.is_year_start[0], True) - self.assertEqual(dti.is_year_start[364], False) - self.assertEqual(dti.is_month_end[0], False) - self.assertEqual(dti.is_month_end[30], True) - self.assertEqual(dti.is_month_end[31], False) - self.assertEqual(dti.is_month_end[364], True) - self.assertEqual(dti.is_quarter_end[0], False) - self.assertEqual(dti.is_quarter_end[30], False) - self.assertEqual(dti.is_quarter_end[89], True) - self.assertEqual(dti.is_quarter_end[364], True) - self.assertEqual(dti.is_year_end[0], False) - self.assertEqual(dti.is_year_end[364], True) - - # GH 11128 - self.assertEqual(dti.weekday_name[4], u'Monday') - self.assertEqual(dti.weekday_name[5], u'Tuesday') - self.assertEqual(dti.weekday_name[6], u'Wednesday') - self.assertEqual(dti.weekday_name[7], u'Thursday') - self.assertEqual(dti.weekday_name[8], u'Friday') - self.assertEqual(dti.weekday_name[9], u'Saturday') - self.assertEqual(dti.weekday_name[10], u'Sunday') - - self.assertEqual(Timestamp('2016-04-04').weekday_name, u'Monday') - self.assertEqual(Timestamp('2016-04-05').weekday_name, u'Tuesday') - self.assertEqual(Timestamp('2016-04-06').weekday_name, u'Wednesday') - self.assertEqual(Timestamp('2016-04-07').weekday_name, u'Thursday') - self.assertEqual(Timestamp('2016-04-08').weekday_name, u'Friday') - self.assertEqual(Timestamp('2016-04-09').weekday_name, u'Saturday') - self.assertEqual(Timestamp('2016-04-10').weekday_name, u'Sunday') - - self.assertEqual(len(dti.year), 365) - self.assertEqual(len(dti.month), 365) - self.assertEqual(len(dti.day), 365) - self.assertEqual(len(dti.hour), 365) - self.assertEqual(len(dti.minute), 365) - self.assertEqual(len(dti.second), 365) - self.assertEqual(len(dti.microsecond), 365) - self.assertEqual(len(dti.dayofweek), 365) - self.assertEqual(len(dti.dayofyear), 365) - self.assertEqual(len(dti.weekofyear), 365) - self.assertEqual(len(dti.quarter), 365) - self.assertEqual(len(dti.is_month_start), 365) - self.assertEqual(len(dti.is_month_end), 365) - self.assertEqual(len(dti.is_quarter_start), 365) - self.assertEqual(len(dti.is_quarter_end), 365) - self.assertEqual(len(dti.is_year_start), 365) - self.assertEqual(len(dti.is_year_end), 365) - self.assertEqual(len(dti.weekday_name), 365) + dti_naive = DatetimeIndex(freq='D', start=datetime(1998, 1, 1), + periods=365) + # GH 13303 + dti_tz = DatetimeIndex(freq='D', start=datetime(1998, 1, 1), + periods=365, tz='US/Eastern') + for dti in [dti_naive, dti_tz]: + + self.assertEqual(dti.year[0], 1998) + self.assertEqual(dti.month[0], 1) + self.assertEqual(dti.day[0], 1) + self.assertEqual(dti.hour[0], 0) + self.assertEqual(dti.minute[0], 0) + self.assertEqual(dti.second[0], 0) + self.assertEqual(dti.microsecond[0], 0) + self.assertEqual(dti.dayofweek[0], 3) + + self.assertEqual(dti.dayofyear[0], 1) + self.assertEqual(dti.dayofyear[120], 121) + + self.assertEqual(dti.weekofyear[0], 1) + self.assertEqual(dti.weekofyear[120], 18) + + self.assertEqual(dti.quarter[0], 1) + self.assertEqual(dti.quarter[120], 2) + + self.assertEqual(dti.days_in_month[0], 31) + self.assertEqual(dti.days_in_month[90], 30) + + self.assertEqual(dti.is_month_start[0], True) + self.assertEqual(dti.is_month_start[1], False) + self.assertEqual(dti.is_month_start[31], True) + self.assertEqual(dti.is_quarter_start[0], True) + self.assertEqual(dti.is_quarter_start[90], True) + self.assertEqual(dti.is_year_start[0], True) + self.assertEqual(dti.is_year_start[364], False) + self.assertEqual(dti.is_month_end[0], False) + self.assertEqual(dti.is_month_end[30], True) + self.assertEqual(dti.is_month_end[31], False) + self.assertEqual(dti.is_month_end[364], True) + self.assertEqual(dti.is_quarter_end[0], False) + self.assertEqual(dti.is_quarter_end[30], False) + self.assertEqual(dti.is_quarter_end[89], True) + self.assertEqual(dti.is_quarter_end[364], True) + self.assertEqual(dti.is_year_end[0], False) + self.assertEqual(dti.is_year_end[364], True) + + # GH 11128 + self.assertEqual(dti.weekday_name[4], u'Monday') + self.assertEqual(dti.weekday_name[5], u'Tuesday') + self.assertEqual(dti.weekday_name[6], u'Wednesday') + self.assertEqual(dti.weekday_name[7], u'Thursday') + self.assertEqual(dti.weekday_name[8], u'Friday') + self.assertEqual(dti.weekday_name[9], u'Saturday') + self.assertEqual(dti.weekday_name[10], u'Sunday') + + self.assertEqual(Timestamp('2016-04-04').weekday_name, u'Monday') + self.assertEqual(Timestamp('2016-04-05').weekday_name, u'Tuesday') + self.assertEqual(Timestamp('2016-04-06').weekday_name, + u'Wednesday') + self.assertEqual(Timestamp('2016-04-07').weekday_name, u'Thursday') + self.assertEqual(Timestamp('2016-04-08').weekday_name, u'Friday') + self.assertEqual(Timestamp('2016-04-09').weekday_name, u'Saturday') + self.assertEqual(Timestamp('2016-04-10').weekday_name, u'Sunday') + + self.assertEqual(len(dti.year), 365) + self.assertEqual(len(dti.month), 365) + self.assertEqual(len(dti.day), 365) + self.assertEqual(len(dti.hour), 365) + self.assertEqual(len(dti.minute), 365) + self.assertEqual(len(dti.second), 365) + self.assertEqual(len(dti.microsecond), 365) + self.assertEqual(len(dti.dayofweek), 365) + self.assertEqual(len(dti.dayofyear), 365) + self.assertEqual(len(dti.weekofyear), 365) + self.assertEqual(len(dti.quarter), 365) + self.assertEqual(len(dti.is_month_start), 365) + self.assertEqual(len(dti.is_month_end), 365) + self.assertEqual(len(dti.is_quarter_start), 365) + self.assertEqual(len(dti.is_quarter_end), 365) + self.assertEqual(len(dti.is_year_start), 365) + self.assertEqual(len(dti.is_year_end), 365) + self.assertEqual(len(dti.weekday_name), 365) dti = DatetimeIndex(freq='BQ-FEB', start=datetime(1998, 1, 1), periods=4) diff --git a/pandas/tests/scalar/test_timestamp.py b/pandas/tests/scalar/test_timestamp.py index d5d92dcf96eab..082f0fa9c40d5 100644 --- a/pandas/tests/scalar/test_timestamp.py +++ b/pandas/tests/scalar/test_timestamp.py @@ -550,6 +550,32 @@ def check(value, equal): check(ts.daysinmonth, 31) check(ts.daysinmonth, 31) + # GH 13303 + ts = Timestamp('2014-12-31 23:59:00-05:00', tz='US/Eastern') + check(ts.year, 2014) + check(ts.month, 12) + check(ts.day, 31) + check(ts.hour, 23) + check(ts.minute, 59) + check(ts.second, 0) + self.assertRaises(AttributeError, lambda: ts.millisecond) + check(ts.microsecond, 0) + check(ts.nanosecond, 0) + check(ts.dayofweek, 2) + check(ts.quarter, 4) + check(ts.dayofyear, 365) + check(ts.week, 1) + check(ts.daysinmonth, 31) + + ts = Timestamp('2014-01-01 00:00:00+01:00') + starts = ['is_month_start', 'is_quarter_start', 'is_year_start'] + for start in starts: + self.assertTrue(getattr(ts, start)) + ts = Timestamp('2014-12-31 23:59:59+01:00') + ends = ['is_month_end', 'is_year_end', 'is_quarter_end'] + for end in ends: + self.assertTrue(getattr(ts, end)) + def test_nat_fields(self): # GH 10050 ts = Timestamp('NaT') From 2b45e448458f5176d78147e6449ba595bc785973 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Mon, 20 Mar 2017 15:38:35 -0400 Subject: [PATCH 242/933] DOC: Patch new flake8 command grep The grep was initially matching to "pandas," which is incorrect because that was also matching files containing "pandas" in the name but that were not in the main `pandas` directory (e.g. performance test code). This change enforces that we match to any Python files in the main `pandas` directory. Also picked up compatibility issue with OSX, in which the `-r` flag does not exist. However, `xargs` terminates if the argument list is empty, which was the whole point of passing in `-r` in the first place. Follow-up to #15712 Author: gfyoung Closes #15749 from gfyoung/flake8-diff-patch and squashes the following commits: d1543b5 [gfyoung] COMPAT: Do not run xargs with -r on OSX da57857 [gfyoung] DOC: Patch new flake8 command grep --- doc/source/contributing.rst | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/doc/source/contributing.rst b/doc/source/contributing.rst index 7961780d0c79b..7ad5916a8809d 100644 --- a/doc/source/contributing.rst +++ b/doc/source/contributing.rst @@ -527,7 +527,12 @@ unused function. However, style-checking the diff will not catch this because the actual import is not part of the diff. Thus, for completeness, you should run this command, though it will take longer:: - git diff master --name-only -- '*.py' | grep 'pandas' | xargs -r flake8 + git diff master --name-only -- '*.py' | grep 'pandas/' | xargs -r flake8 + +Note that on OSX, the ``-r`` flag is not available, so you have to omit it and +run this slightly modified command:: + + git diff master --name-only -- '*.py' | grep 'pandas/' | xargs flake8 Backwards Compatibility ~~~~~~~~~~~~~~~~~~~~~~~ From bff47f2302a0be4dcbf7e5055e525d5652e08fb5 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Mon, 20 Mar 2017 15:47:48 -0400 Subject: [PATCH 243/933] MAINT: Remove Long and WidePanel (#15748) Deprecated since 0.17.0. xref gh-10892 --- asv_bench/benchmarks/pandas_vb_common.py | 5 ---- bench/bench_join_panel.py | 4 +-- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/core/api.py | 2 +- pandas/core/panel.py | 23 ---------------- pandas/tests/api/test_api.py | 3 +- pandas/tests/io/test_pytables.py | 3 -- pandas/tests/test_panel.py | 35 ++++++++---------------- vb_suite/pandas_vb_common.py | 5 ---- 9 files changed, 17 insertions(+), 64 deletions(-) diff --git a/asv_bench/benchmarks/pandas_vb_common.py b/asv_bench/benchmarks/pandas_vb_common.py index 56ccc94c414fb..a7e530e7f5ef1 100644 --- a/asv_bench/benchmarks/pandas_vb_common.py +++ b/asv_bench/benchmarks/pandas_vb_common.py @@ -25,11 +25,6 @@ except: pass -try: - Panel = Panel -except Exception: - Panel = WidePanel - # didn't add to namespace until later try: from pandas.core.index import MultiIndex diff --git a/bench/bench_join_panel.py b/bench/bench_join_panel.py index f3c3f8ba15f70..113b317dd8ff8 100644 --- a/bench/bench_join_panel.py +++ b/bench/bench_join_panel.py @@ -45,8 +45,8 @@ def reindex_on_axis(panels, axis, axis_reindex): return p -# does the job but inefficient (better to handle like you read a table in -# pytables...e.g create a LongPanel then convert to Wide) +# Does the job but inefficient. It is better to handle +# this like you read a table in pytables. def create_panels_join(cls, panels): """ given an array of panels's, create a single panel """ panels = [a for a in panels if a is not None] diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 98407aacb993b..ebdd4060f0588 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -772,6 +772,7 @@ Removal of prior version deprecations/changes - The ``Categorical`` constructor has dropped the ``name`` parameter (:issue:`10632`) - The ``take_last`` parameter has been dropped from ``duplicated()``, ``drop_duplicates()``, ``nlargest()``, and ``nsmallest()`` methods (:issue:`10236`, :issue:`10792`, :issue:`10920`) - ``Series``, ``Index``, and ``DataFrame`` have dropped the ``sort`` and ``order`` methods (:issue:`10726`) +- The ``LongPanel`` and ``WidePanel`` classes have been removed (:issue:`10892`) .. _whatsnew_0200.performance: diff --git a/pandas/core/api.py b/pandas/core/api.py index 65253dedb8b53..5018de39ca907 100644 --- a/pandas/core/api.py +++ b/pandas/core/api.py @@ -15,7 +15,7 @@ from pandas.core.series import Series from pandas.core.frame import DataFrame -from pandas.core.panel import Panel, WidePanel +from pandas.core.panel import Panel from pandas.core.panel4d import Panel4D from pandas.core.reshape import (pivot_simple as pivot, get_dummies, lreshape, wide_to_long) diff --git a/pandas/core/panel.py b/pandas/core/panel.py index 4a6c6cf291316..5c7b66a2d1356 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -4,8 +4,6 @@ # pylint: disable=E1103,W0231,W0212,W0621 from __future__ import division -import warnings - import numpy as np from pandas.types.cast import (_infer_dtype_from_scalar, @@ -1556,24 +1554,3 @@ def f(self, other, axis=0): ops.add_special_arithmetic_methods(Panel, **ops.panel_special_funcs) Panel._add_aggregate_operations() Panel._add_numeric_operations() - - -# legacy -class WidePanel(Panel): - - def __init__(self, *args, **kwargs): - # deprecation, #10892 - warnings.warn("WidePanel is deprecated. Please use Panel", - FutureWarning, stacklevel=2) - - super(WidePanel, self).__init__(*args, **kwargs) - - -class LongPanel(DataFrame): - - def __init__(self, *args, **kwargs): - # deprecation, #10892 - warnings.warn("LongPanel is deprecated. Please use DataFrame", - FutureWarning, stacklevel=2) - - super(LongPanel, self).__init__(*args, **kwargs) diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index 73222c246fc70..2c7dcf2501f32 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -54,8 +54,7 @@ class TestPDApi(Base, tm.TestCase): 'TimedeltaIndex', 'Timestamp'] # these are already deprecated; awaiting removal - deprecated_classes = ['WidePanel', 'Panel4D', - 'SparseList', 'Expr', 'Term'] + deprecated_classes = ['Panel4D', 'SparseList', 'Expr', 'Term'] # these should be deprecated in the future deprecated_classes_in_future = ['Panel'] diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index 40866b8702fe2..324160d5b1ae6 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -3017,9 +3017,6 @@ def _check(left, right): # empty # self._check_roundtrip(wp.to_frame()[:0], _check) - def test_longpanel(self): - pass - def test_overwrite_node(self): with ensure_clean_store(self.path) as store: diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index ab0322abbcf06..13e16f3b90730 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -178,10 +178,6 @@ def wrapper(x): class SafeForSparse(object): - @classmethod - def assert_panel_equal(cls, x, y): - assert_panel_equal(x, y) - def test_get_axis(self): assert (self.panel._get_axis(0) is self.panel.items) assert (self.panel._get_axis(1) is self.panel.major_axis) @@ -346,10 +342,10 @@ def check_op(op, name): def test_combinePanel(self): result = self.panel.add(self.panel) - self.assert_panel_equal(result, self.panel * 2) + assert_panel_equal(result, self.panel * 2) def test_neg(self): - self.assert_panel_equal(-self.panel, self.panel * -1) + assert_panel_equal(-self.panel, self.panel * -1) # issue 7692 def test_raise_when_not_implemented(self): @@ -369,22 +365,22 @@ def test_select(self): # select items result = p.select(lambda x: x in ('ItemA', 'ItemC'), axis='items') expected = p.reindex(items=['ItemA', 'ItemC']) - self.assert_panel_equal(result, expected) + assert_panel_equal(result, expected) # select major_axis result = p.select(lambda x: x >= datetime(2000, 1, 15), axis='major') new_major = p.major_axis[p.major_axis >= datetime(2000, 1, 15)] expected = p.reindex(major=new_major) - self.assert_panel_equal(result, expected) + assert_panel_equal(result, expected) # select minor_axis result = p.select(lambda x: x in ('D', 'A'), axis=2) expected = p.reindex(minor=['A', 'D']) - self.assert_panel_equal(result, expected) + assert_panel_equal(result, expected) # corner case, empty thing result = p.select(lambda x: x in ('foo', ), axis='items') - self.assert_panel_equal(result, p.reindex(items=[])) + assert_panel_equal(result, p.reindex(items=[])) def test_get_value(self): for item in self.panel.items: @@ -399,8 +395,8 @@ def test_abs(self): result = self.panel.abs() result2 = abs(self.panel) expected = np.abs(self.panel) - self.assert_panel_equal(result, expected) - self.assert_panel_equal(result2, expected) + assert_panel_equal(result, expected) + assert_panel_equal(result2, expected) df = self.panel['ItemA'] result = df.abs() @@ -867,10 +863,6 @@ def test_set_value(self): class TestPanel(tm.TestCase, PanelTests, CheckIndexing, SafeForLongAndSparse, SafeForSparse): - @classmethod - def assert_panel_equal(cls, x, y): - assert_panel_equal(x, y) - def setUp(self): self.panel = _panel.copy() self.panel.major_axis.name = None @@ -1967,7 +1959,7 @@ def test_round(self): major_axis=pd.date_range('1/1/2000', periods=5), minor_axis=['A', 'B']) result = p.round() - self.assert_panel_equal(expected, result) + assert_panel_equal(expected, result) def test_numpy_round(self): values = [[[-3.2, 2.2], [0, -4.8213], [3.123, 123.12], @@ -1983,7 +1975,7 @@ def test_numpy_round(self): major_axis=pd.date_range('1/1/2000', periods=5), minor_axis=['A', 'B']) result = np.round(p) - self.assert_panel_equal(expected, result) + assert_panel_equal(expected, result) msg = "the 'out' parameter is not supported" tm.assertRaisesRegexp(ValueError, msg, np.round, p, out=p) @@ -2270,15 +2262,12 @@ def test_all_any_unhandled(self): self.assertRaises(NotImplementedError, self.panel.any, bool_only=True) -class TestLongPanel(tm.TestCase): +class TestPanelFrame(tm.TestCase): """ - LongPanel no longer exists, but... + Check that conversions to and from Panel to DataFrame work. """ def setUp(self): - import warnings - warnings.filterwarnings(action='ignore', category=FutureWarning) - panel = tm.makePanel() tm.add_nans(panel) diff --git a/vb_suite/pandas_vb_common.py b/vb_suite/pandas_vb_common.py index bd2e8a1c1d504..41e43d6ab10e5 100644 --- a/vb_suite/pandas_vb_common.py +++ b/vb_suite/pandas_vb_common.py @@ -18,11 +18,6 @@ except: import pandas._libs.lib as lib -try: - Panel = WidePanel -except Exception: - pass - # didn't add to namespace until later try: from pandas.core.index import MultiIndex From f2e942e185da9369f2c1f4d3b38f57af7b4243bd Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Mon, 20 Mar 2017 18:45:19 -0400 Subject: [PATCH 244/933] PERF: Improve drop_duplicates for bool columns (#12963) closes #12963 Author: Matt Roeschke Closes #15738 from mroeschke/fix_12963 and squashes the following commits: a020c10 [Matt Roeschke] PERF: Improve drop_duplicates for bool columns (#12963) --- asv_bench/benchmarks/reindex.py | 5 +++++ doc/source/whatsnew/v0.20.0.txt | 1 + pandas/core/algorithms.py | 7 ++++++- 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/asv_bench/benchmarks/reindex.py b/asv_bench/benchmarks/reindex.py index 6fe6c32a96df9..537d275e7c727 100644 --- a/asv_bench/benchmarks/reindex.py +++ b/asv_bench/benchmarks/reindex.py @@ -132,6 +132,9 @@ def setup(self): self.K = 10000 self.key1 = np.random.randint(0, self.K, size=self.N) self.df_int = DataFrame({'key1': self.key1}) + self.df_bool = DataFrame({i: np.random.randint(0, 2, size=self.K, + dtype=bool) + for i in range(10)}) def time_frame_drop_dups(self): self.df.drop_duplicates(['key1', 'key2']) @@ -154,6 +157,8 @@ def time_series_drop_dups_string(self): def time_frame_drop_dups_int(self): self.df_int.drop_duplicates() + def time_frame_drop_dups_bool(self): + self.df_bool.drop_duplicates() #---------------------------------------------------------------------- # blog "pandas escaped the zoo" diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index ebdd4060f0588..d036049e3ffdb 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -789,6 +789,7 @@ Performance Improvements - Improved performance of ``.rank()`` for categorical data (:issue:`15498`) - Improved performance when using ``.unstack()`` (:issue:`15503`) - Improved performance of merge/join on ``category`` columns (:issue:`10409`) +- Improved performance of ``drop_duplicates()`` on ``bool`` columns (:issue:`12963`) .. _whatsnew_0200.bug_fixes: diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 6937675603c10..f9d4c9107d7cd 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -19,6 +19,7 @@ is_period_dtype, is_period_arraylike, is_float_dtype, + is_bool_dtype, needs_i8_conversion, is_categorical, is_datetime64_dtype, @@ -325,8 +326,9 @@ def factorize(values, sort=False, order=None, na_sentinel=-1, size_hint=None): """ from pandas import Index, Series, DatetimeIndex, PeriodIndex - # handling two possibilities here + # handling possibilities here # - for a numpy datetimelike simply view as i8 then cast back + # - bool handled as uint8 then cast back # - for an extension datetimelike view as i8 then # reconstruct from boxed values to transfer metadata dtype = None @@ -341,6 +343,9 @@ def factorize(values, sort=False, order=None, na_sentinel=-1, size_hint=None): # numpy dtype dtype = values.dtype vals = values.view(np.int64) + elif is_bool_dtype(values): + dtype = bool + vals = np.asarray(values).view('uint8') else: vals = np.asarray(values) From 92239f5dcfb02f97b5b1eed651895fe70dfd7eb1 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Mon, 20 Mar 2017 19:38:01 -0400 Subject: [PATCH 245/933] CI: trying for osx cache again --- .travis.yml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 705b2380ac697..67b37f1d58931 100644 --- a/.travis.yml +++ b/.travis.yml @@ -28,7 +28,11 @@ matrix: os: osx compiler: clang osx_image: xcode6.4 - cache: ccache + cache: + ccache: true + directories: + - $HOME/.cache # cython cache + - $HOME/.ccache # compiler cache env: - PYTHON_VERSION=3.5 - JOB_NAME: "35_osx" From 8c80b6bb939a77ffb0ed11d468f22925abcd555a Mon Sep 17 00:00:00 2001 From: Wiktor Tomczak Date: Tue, 21 Mar 2017 09:52:37 +0100 Subject: [PATCH 246/933] Fix num_days in PandasAutoDateLocator (#14716) --- pandas/tseries/converter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tseries/converter.py b/pandas/tseries/converter.py index 1f99e88ce86d6..8aea14a2688d1 100644 --- a/pandas/tseries/converter.py +++ b/pandas/tseries/converter.py @@ -261,7 +261,7 @@ def get_locator(self, dmin, dmax): 'Pick the best locator based on a distance.' delta = relativedelta(dmax, dmin) - num_days = ((delta.years * 12.0) + delta.months * 31.0) + delta.days + num_days = (delta.years * 12.0 + delta.months) * 31.0 + delta.days num_sec = (delta.hours * 60.0 + delta.minutes) * 60.0 + delta.seconds tot_sec = num_days * 86400. + num_sec From 783ae69f0edc350bdf7b20932351c79a5a1fad3c Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 21 Mar 2017 08:21:25 -0400 Subject: [PATCH 247/933] CI: set path for osx ccache --- ci/install_travis.sh | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/ci/install_travis.sh b/ci/install_travis.sh index 053a2d15a287c..c940083f5ae9e 100755 --- a/ci/install_travis.sh +++ b/ci/install_travis.sh @@ -32,11 +32,6 @@ edit_init home_dir=$(pwd) echo "[home_dir: $home_dir]" -if [ "${TRAVIS_OS_NAME}" == "osx" ]; then - echo "[install ccache]" - time brew install ccache -fi - # install miniconda MINICONDA_DIR="$HOME/miniconda3" @@ -86,6 +81,14 @@ if [ "$USE_CACHE" ] && [ "${TRAVIS_OS_NAME}" == "linux" ]; then ccache=$(which ccache) echo "[ccache: $ccache]" export CC='ccache gcc' +elif [ "$USE_CACHE" ] && [ "${TRAVIS_OS_NAME}" == "osx" ]; then + echo "[Using ccache]" + time brew install ccache + export PATH=/usr/local/opt/ccache/libexec:$PATH + gcc=$(which gcc) + echo "[gcc: $gcc]" + ccache=$(which ccache) + echo "[ccache: $ccache]" else echo "[Not using ccache]" fi From 1e753d7ce9dca129d7ec5383612f874ee1393788 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 21 Mar 2017 10:49:46 -0400 Subject: [PATCH 248/933] CLN: replace _interleave_dtype with _find_common_type xref #15736 xref #12780 Author: Jeff Reback Closes #15765 from jreback/common_types and squashes the following commits: d472646 [Jeff Reback] try removing restriction on windows 8d07cae [Jeff Reback] CLN: replace _interleave_dtype with _find_common_type --- doc/source/whatsnew/v0.20.0.txt | 2 +- pandas/core/internals.py | 59 ++++---------------------- pandas/tests/indexing/test_coercion.py | 14 +----- pandas/tests/series/test_replace.py | 4 +- pandas/tests/types/test_cast.py | 14 ++++++ pandas/types/cast.py | 28 +++++++++++- 6 files changed, 55 insertions(+), 66 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index d036049e3ffdb..e0d15c218ec85 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -826,7 +826,7 @@ Bug Fixes - Bug in the display of ``.info()`` where a qualifier (+) would always be displayed with a ``MultiIndex`` that contains only non-strings (:issue:`15245`) -- Bug in ``.replace()`` may result in incorrect dtypes. (:issue:`12747`) +- Bug in ``.replace()`` may result in incorrect dtypes. (:issue:`12747`, :issue:`15765`) - Bug in ``.asfreq()``, where frequency was not set for empty ``Series`` (:issue:`14320`) diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 60684a929889b..6487c2108028e 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -9,7 +9,8 @@ from pandas.core.base import PandasObject -from pandas.types.dtypes import DatetimeTZDtype, CategoricalDtype +from pandas.types.dtypes import (ExtensionDtype, DatetimeTZDtype, + CategoricalDtype) from pandas.types.common import (_TD_DTYPE, _NS_DTYPE, _ensure_int64, _ensure_platform_int, is_integer, @@ -4496,55 +4497,13 @@ def _interleaved_dtype(blocks): if not len(blocks): return None - counts = defaultdict(list) - for x in blocks: - counts[type(x)].append(x) - - have_int = len(counts[IntBlock]) > 0 - have_bool = len(counts[BoolBlock]) > 0 - have_object = len(counts[ObjectBlock]) > 0 - have_float = len(counts[FloatBlock]) > 0 - have_complex = len(counts[ComplexBlock]) > 0 - have_dt64 = len(counts[DatetimeBlock]) > 0 - have_dt64_tz = len(counts[DatetimeTZBlock]) > 0 - have_td64 = len(counts[TimeDeltaBlock]) > 0 - have_cat = len(counts[CategoricalBlock]) > 0 - # TODO: have_sparse is not used - have_sparse = len(counts[SparseBlock]) > 0 # noqa - have_numeric = have_float or have_complex or have_int - has_non_numeric = have_dt64 or have_dt64_tz or have_td64 or have_cat - - if (have_object or - (have_bool and - (have_numeric or have_dt64 or have_dt64_tz or have_td64)) or - (have_numeric and has_non_numeric) or have_cat or have_dt64 or - have_dt64_tz or have_td64): - return np.dtype(object) - elif have_bool: - return np.dtype(bool) - elif have_int and not have_float and not have_complex: - # if we are mixing unsigned and signed, then return - # the next biggest int type (if we can) - lcd = _find_common_type([b.dtype for b in counts[IntBlock]]) - kinds = set([i.dtype.kind for i in counts[IntBlock]]) - if len(kinds) == 1: - return lcd - - if lcd == 'uint64' or lcd == 'int64': - return np.dtype('int64') - - # return 1 bigger on the itemsize if unsinged - if lcd.kind == 'u': - return np.dtype('int%s' % (lcd.itemsize * 8 * 2)) - return lcd - - elif have_int and have_float and not have_complex: - return np.dtype('float64') - elif have_complex: - return np.dtype('c16') - else: - introspection_blks = counts[FloatBlock] + counts[SparseBlock] - return _find_common_type([b.dtype for b in introspection_blks]) + dtype = _find_common_type([b.dtype for b in blocks]) + + # only numpy compat + if isinstance(dtype, ExtensionDtype): + dtype = np.object + + return dtype def _consolidate(blocks): diff --git a/pandas/tests/indexing/test_coercion.py b/pandas/tests/indexing/test_coercion.py index df95f563c0832..7216c05657102 100644 --- a/pandas/tests/indexing/test_coercion.py +++ b/pandas/tests/indexing/test_coercion.py @@ -1183,19 +1183,9 @@ def _assert_replace_conversion(self, from_key, to_key, how): result = obj.replace(replacer) - # buggy on windows for bool/int64 - if (from_key == 'bool' and - to_key == 'int64' and - tm.is_platform_windows()): - pytest.skip("windows platform buggy: {0} -> {1}".format - (from_key, to_key)) - - if ((from_key == 'float64' and to_key in ('bool', 'int64')) or + if ((from_key == 'float64' and to_key in ('int64')) or (from_key == 'complex128' and - to_key in ('bool', 'int64', 'float64')) or - - # GH12747 The result must be int? - (from_key == 'int64' and to_key in ('bool'))): + to_key in ('int64', 'float64'))): # buggy on 32-bit if tm.is_platform_32bit(): diff --git a/pandas/tests/series/test_replace.py b/pandas/tests/series/test_replace.py index f5a25e93cc82d..0a53581e24ba5 100644 --- a/pandas/tests/series/test_replace.py +++ b/pandas/tests/series/test_replace.py @@ -152,8 +152,8 @@ def check_replace(to_rep, val, expected): tr, v = [3, 4], [3.5, pd.Timestamp('20130101')] check_replace(tr, v, e) - # casts to float - e = pd.Series([0, 1, 2, 3.5, 1]) + # casts to object + e = pd.Series([0, 1, 2, 3.5, True], dtype='object') tr, v = [3, 4], [3.5, True] check_replace(tr, v, e) diff --git a/pandas/tests/types/test_cast.py b/pandas/tests/types/test_cast.py index 70f69cc7d5701..d7b086daea1e3 100644 --- a/pandas/tests/types/test_cast.py +++ b/pandas/tests/types/test_cast.py @@ -238,6 +238,20 @@ def test_numpy_dtypes(self): ((np.object, np.float32), np.object), ((np.object, np.int16), np.object), + # bool with int + ((np.dtype('bool'), np.int64), np.object), + ((np.dtype('bool'), np.int32), np.object), + ((np.dtype('bool'), np.int16), np.object), + ((np.dtype('bool'), np.int8), np.object), + ((np.dtype('bool'), np.uint64), np.object), + ((np.dtype('bool'), np.uint32), np.object), + ((np.dtype('bool'), np.uint16), np.object), + ((np.dtype('bool'), np.uint8), np.object), + + # bool with float + ((np.dtype('bool'), np.float64), np.object), + ((np.dtype('bool'), np.float32), np.object), + ((np.dtype('datetime64[ns]'), np.dtype('datetime64[ns]')), np.dtype('datetime64[ns]')), ((np.dtype('timedelta64[ns]'), np.dtype('timedelta64[ns]')), diff --git a/pandas/types/cast.py b/pandas/types/cast.py index 11a837dd21159..0e26cd085db5a 100644 --- a/pandas/types/cast.py +++ b/pandas/types/cast.py @@ -892,12 +892,28 @@ def _possibly_cast_to_datetime(value, dtype, errors='raise'): def _find_common_type(types): - """Find a common data type among the given dtypes.""" + """ + Find a common data type among the given dtypes. + + Parameters + ---------- + types : list of dtypes + + Returns + ------- + pandas extension or numpy dtype + + See Also + -------- + numpy.find_common_type + + """ if len(types) == 0: raise ValueError('no types given') first = types[0] + # workaround for find_common_type([np.dtype('datetime64[ns]')] * 2) # => object if all(is_dtype_equal(first, t) for t in types[1:]): @@ -912,4 +928,14 @@ def _find_common_type(types): if all(is_timedelta64_dtype(t) for t in types): return np.dtype('timedelta64[ns]') + # don't mix bool / int or float or complex + # this is different from numpy, which casts bool with float/int as int + has_bools = any(is_bool_dtype(t) for t in types) + if has_bools: + has_ints = any(is_integer_dtype(t) for t in types) + has_floats = any(is_float_dtype(t) for t in types) + has_complex = any(is_complex_dtype(t) for t in types) + if has_ints or has_floats or has_complex: + return np.object + return np.find_common_type(types, []) From aa9d0cf7fa0061058125d79d22d86f82f69c9185 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 21 Mar 2017 13:39:55 -0400 Subject: [PATCH 249/933] BUG: various 32bit compat issues closes #14866 xref #14183 Author: Jeff Reback Closes #15766 from jreback/32bit and squashes the following commits: 93c03e3 [Jeff Reback] BUG: 32bit compat for .get_indexer 4163918 [Jeff Reback] BUG: fix isin for 32bit platform issues 1bb2f60 [Jeff Reback] BUG: cut/qcut should always return int64 bins --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/core/algorithms.py | 44 +++++++++++++++++++----------- pandas/tests/indexes/test_multi.py | 2 +- pandas/tests/tools/test_tile.py | 4 +-- pandas/tools/tile.py | 4 +-- 5 files changed, 34 insertions(+), 21 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index e0d15c218ec85..55e3d979b07dd 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -824,6 +824,7 @@ Bug Fixes - Bug in ``pd.qcut()`` with a single quantile and an array with identical values (:issue:`15431`) - Compat with SciPy 0.19.0 for testing on ``.interpolate()`` (:issue:`15662`) +- Compat for 32-bit platforms for ``.qcut/cut``; bins will now be ``int64`` dtype (:issue:`14866`) - Bug in the display of ``.info()`` where a qualifier (+) would always be displayed with a ``MultiIndex`` that contains only non-strings (:issue:`15245`) - Bug in ``.replace()`` may result in incorrect dtypes. (:issue:`12747`, :issue:`15765`) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index f9d4c9107d7cd..00a3264e6c74a 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -169,33 +169,45 @@ def isin(comps, values): raise TypeError("only list-like objects are allowed to be passed" " to isin(), you passed a " "[{0}]".format(type(comps).__name__)) - comps = np.asarray(comps) if not is_list_like(values): raise TypeError("only list-like objects are allowed to be passed" " to isin(), you passed a " "[{0}]".format(type(values).__name__)) - if not isinstance(values, np.ndarray): - values = list(values) + + from pandas import DatetimeIndex, PeriodIndex + + if not isinstance(values, (ABCIndex, ABCSeries, np.ndarray)): + values = np.array(list(values), dtype='object') + + if needs_i8_conversion(comps): + if is_period_dtype(values): + comps = PeriodIndex(comps) + values = PeriodIndex(values) + else: + comps = DatetimeIndex(comps) + values = DatetimeIndex(values) + + values = values.asi8 + comps = comps.asi8 + elif is_bool_dtype(comps): + + try: + comps = np.asarray(comps).view('uint8') + values = np.asarray(values).view('uint8') + except TypeError: + # object array conversion will fail + pass + else: + comps = np.asarray(comps) + values = np.asarray(values) # GH11232 # work-around for numpy < 1.8 and comparisions on py3 # faster for larger cases to use np.in1d if (_np_version_under1p8 and compat.PY3) or len(comps) > 1000000: f = lambda x, y: np.in1d(x, np.asarray(list(y))) - else: - f = lambda x, y: lib.ismember_int64(x, set(y)) - - # may need i8 conversion for proper membership testing - if is_datetime64_dtype(comps): - from pandas.tseries.tools import to_datetime - values = to_datetime(values)._values.view('i8') - comps = comps.view('i8') - elif is_timedelta64_dtype(comps): - from pandas.tseries.timedeltas import to_timedelta - values = to_timedelta(values)._values.view('i8') - comps = comps.view('i8') elif is_int64_dtype(comps): - pass + f = lambda x, y: lib.ismember_int64(x, set(y)) else: f = lambda x, y: lib.ismember(x, set(values)) diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index f67231e78983c..0c274b2f6c4ff 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -1359,7 +1359,7 @@ def test_hash_collisions(self): names=['one', 'two']) result = index.get_indexer(index.values) self.assert_numpy_array_equal(result, - np.arange(len(index), dtype='int64')) + np.arange(len(index), dtype='intp')) for i in [0, 1, len(index) - 2, len(index) - 1]: result = index.get_loc(index[i]) diff --git a/pandas/tests/tools/test_tile.py b/pandas/tests/tools/test_tile.py index 11b242bc06e15..cc80c1ff5db29 100644 --- a/pandas/tests/tools/test_tile.py +++ b/pandas/tests/tools/test_tile.py @@ -19,8 +19,8 @@ class TestCut(tm.TestCase): def test_simple(self): data = np.ones(5) result = cut(data, 4, labels=False) - desired = np.array([1, 1, 1, 1, 1]) - tm.assert_numpy_array_equal(result, desired, + expected = np.array([1, 1, 1, 1, 1]) + tm.assert_numpy_array_equal(result, expected, check_dtype=False) def test_bins(self): diff --git a/pandas/tools/tile.py b/pandas/tools/tile.py index ccd8c2478e8a5..4a3d452228e01 100644 --- a/pandas/tools/tile.py +++ b/pandas/tools/tile.py @@ -4,7 +4,7 @@ from pandas.types.missing import isnull from pandas.types.common import (is_float, is_integer, - is_scalar) + is_scalar, _ensure_int64) from pandas.core.api import Series from pandas.core.categorical import Categorical @@ -215,7 +215,7 @@ def _bins_to_cuts(x, bins, right=True, labels=None, bins = unique_bins side = 'left' if right else 'right' - ids = bins.searchsorted(x, side=side) + ids = _ensure_int64(bins.searchsorted(x, side=side)) if include_lowest: ids[x == bins[0]] = 1 From 19c8032bc4f05bf79ae927d5235ac63bf6b33ebe Mon Sep 17 00:00:00 2001 From: gfyoung Date: Tue, 21 Mar 2017 14:00:10 -0400 Subject: [PATCH 250/933] DOC: Ensure basic flake8 diff checks only Python (#15769) Follow-up to gh-15749 --- .github/PULL_REQUEST_TEMPLATE.md | 2 +- doc/source/contributing.rst | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 918d427ee4f4c..9281c51059087 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -1,4 +1,4 @@ - [ ] closes #xxxx - [ ] tests added / passed - - [ ] passes ``git diff upstream/master | flake8 --diff`` + - [ ] passes ``git diff upstream/master --name-only -- '*.py' | flake8 --diff`` - [ ] whatsnew entry diff --git a/doc/source/contributing.rst b/doc/source/contributing.rst index 7ad5916a8809d..5e551a7fd5349 100644 --- a/doc/source/contributing.rst +++ b/doc/source/contributing.rst @@ -518,7 +518,7 @@ Travis-CI will run the `flake8 `_ tool and report any stylistic errors in your code. Therefore, it is helpful before submitting code to run the check yourself on the diff:: - git diff master | flake8 --diff + git diff master --name-only -- '*.py' | flake8 --diff This command will catch any stylistic errors in your changes specifically, but be beware it may not catch all of them. For example, if you delete the only From 163d18ed0d46eeb375f8170f1044808ff40b2a65 Mon Sep 17 00:00:00 2001 From: Pietro Battiston Date: Tue, 21 Mar 2017 14:01:32 -0400 Subject: [PATCH 251/933] ENH: support "nrows" and "chunksize" together closes #15755 Author: Pietro Battiston Closes #15756 from toobaz/nrows_chunksize and squashes the following commits: d0288e3 [Pietro Battiston] ENH: support "nrows" and "chunksize" together --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/io/parsers.py | 24 ++++++++----------- pandas/tests/io/parser/common.py | 27 ++++++++++++++++++++++ pandas/tests/io/parser/test_unsupported.py | 9 -------- 4 files changed, 37 insertions(+), 24 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 55e3d979b07dd..44f0752fc3df4 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -291,6 +291,7 @@ Other enhancements - ``Series`` provides a ``to_excel`` method to output Excel files (:issue:`8825`) - The ``usecols`` argument in ``pd.read_csv`` now accepts a callable function as a value (:issue:`14154`) - The ``skiprows`` argument in ``pd.read_csv`` now accepts a callable function as a value (:issue:`10882`) +- The ``nrows`` and ``chunksize`` arguments in ``pd.read_csv()`` are supported if both are passed (:issue:`6774`, :issue:`15755`) - ``pd.DataFrame.plot`` now prints a title above each subplot if ``suplots=True`` and ``title`` is a list of strings (:issue:`14753`) - ``pd.Series.interpolate`` now supports timedelta as an index type with ``method='time'`` (:issue:`6424`) - ``Timedelta.isoformat`` method added for formatting Timedeltas as an `ISO 8601 duration`_. See the :ref:`Timedelta docs ` (:issue:`15136`) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 9aedddc811830..18343670fb39e 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -384,29 +384,18 @@ def _read(filepath_or_buffer, kwds): # Extract some of the arguments (pass chunksize on). iterator = kwds.get('iterator', False) chunksize = kwds.get('chunksize', None) - nrows = _validate_nrows(kwds.pop('nrows', None)) + nrows = _validate_nrows(kwds.get('nrows', None)) # Create the parser. parser = TextFileReader(filepath_or_buffer, **kwds) - if (nrows is not None) and (chunksize is not None): - raise NotImplementedError("'nrows' and 'chunksize' cannot be used" - " together yet.") - elif nrows is not None: - try: - data = parser.read(nrows) - finally: - parser.close() - return data - - elif chunksize or iterator: + if chunksize or iterator: return parser try: - data = parser.read() + data = parser.read(nrows) finally: parser.close() - return data @@ -445,7 +434,7 @@ def _read(filepath_or_buffer, kwds): 'usecols': None, - # 'nrows': None, + 'nrows': None, # 'iterator': False, 'chunksize': None, 'verbose': False, @@ -749,6 +738,7 @@ def __init__(self, f, engine=None, **kwds): options = self._get_options_with_defaults(engine) self.chunksize = options.pop('chunksize', None) + self.nrows = options.pop('nrows', None) self.squeeze = options.pop('squeeze', False) # might mutate self.engine @@ -1009,6 +999,10 @@ def _create_index(self, ret): def get_chunk(self, size=None): if size is None: size = self.chunksize + if self.nrows is not None: + if self._currow >= self.nrows: + raise StopIteration + size = min(size, self.nrows - self._currow) return self.read(nrows=size) diff --git a/pandas/tests/io/parser/common.py b/pandas/tests/io/parser/common.py index df75d14e9702d..24d15dcb96fe7 100644 --- a/pandas/tests/io/parser/common.py +++ b/pandas/tests/io/parser/common.py @@ -402,6 +402,33 @@ def test_read_chunksize(self): tm.assert_frame_equal(chunks[1], df[2:4]) tm.assert_frame_equal(chunks[2], df[4:]) + def test_read_chunksize_and_nrows(self): + + # gh-15755 + # With nrows + reader = self.read_csv(StringIO(self.data1), index_col=0, + chunksize=2, nrows=5) + df = self.read_csv(StringIO(self.data1), index_col=0, nrows=5) + + tm.assert_frame_equal(pd.concat(reader), df) + + # chunksize > nrows + reader = self.read_csv(StringIO(self.data1), index_col=0, + chunksize=8, nrows=5) + df = self.read_csv(StringIO(self.data1), index_col=0, nrows=5) + + tm.assert_frame_equal(pd.concat(reader), df) + + # with changing "size": + reader = self.read_csv(StringIO(self.data1), index_col=0, + chunksize=8, nrows=5) + df = self.read_csv(StringIO(self.data1), index_col=0, nrows=5) + + tm.assert_frame_equal(reader.get_chunk(size=2), df.iloc[:2]) + tm.assert_frame_equal(reader.get_chunk(size=4), df.iloc[2:5]) + with tm.assertRaises(StopIteration): + reader.get_chunk(size=3) + def test_read_chunksize_named(self): reader = self.read_csv( StringIO(self.data1), index_col='index', chunksize=2) diff --git a/pandas/tests/io/parser/test_unsupported.py b/pandas/tests/io/parser/test_unsupported.py index 999db47cf2eaf..48dd5d4ba506b 100644 --- a/pandas/tests/io/parser/test_unsupported.py +++ b/pandas/tests/io/parser/test_unsupported.py @@ -29,15 +29,6 @@ def test_mangle_dupe_cols_false(self): read_csv(StringIO(data), engine=engine, mangle_dupe_cols=False) - def test_nrows_and_chunksize(self): - data = 'a b c' - msg = "cannot be used together yet" - - for engine in ('c', 'python'): - with tm.assertRaisesRegexp(NotImplementedError, msg): - read_csv(StringIO(data), engine=engine, - nrows=10, chunksize=5) - def test_c_engine(self): # see gh-6607 data = 'a b c\n1 2 3' From 1c9d46a3bb8737c877b0a15aaea15dfb0172ac1c Mon Sep 17 00:00:00 2001 From: Kevin Sheppard Date: Tue, 21 Mar 2017 17:52:18 -0400 Subject: [PATCH 252/933] BUG: Enforce correct encoding in stata Ensure StataReader and StataWriter have the correct encoding. Standardized default encoding to 'latin-1' closes #15723 Author: Kevin Sheppard Closes #15768 from bashtage/limit-stata-encoding and squashes the following commits: 8278be7 [Kevin Sheppard] BUG: Fix limited key range on 32-bit platofrms 2f02697 [Kevin Sheppard] BUG: Enforce correct encoding in stata --- doc/source/whatsnew/v0.20.0.txt | 3 +++ pandas/io/stata.py | 29 ++++++++++++++++++++++------- pandas/tests/io/test_stata.py | 7 +++++++ 3 files changed, 32 insertions(+), 7 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 44f0752fc3df4..eeb568c2e2558 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -919,6 +919,8 @@ Bug Fixes - Avoid use of ``np.finfo()`` during ``import pandas`` removed to mitigate deadlock on Python GIL misuse (:issue:`14641`) - Bug in ``DataFrame.to_stata()`` and ``StataWriter`` which produces incorrectly formatted files to be produced for some locales (:issue:`13856`) +- Bug in ``StataReader`` and ``StataWriter`` which allows invalid encodings (:issue:`15723`) + - Bug in ``pd.concat()`` in which concatting with an empty dataframe with ``join='inner'`` was being improperly handled (:issue:`15328`) - Bug in ``groupby.agg()`` incorrectly localizing timezone on ``datetime`` (:issue:`15426`, :issue:`10668`, :issue:`13046`) @@ -933,3 +935,4 @@ Bug Fixes - Bug in ``pd.melt()`` where passing a tuple value for ``value_vars`` caused a ``TypeError`` (:issue:`15348`) - Bug in ``.eval()`` which caused multiline evals to fail with local variables not on the first line (:issue:`15342`) - Bug in ``pd.read_msgpack`` which did not allow to load dataframe with an index of type ``CategoricalIndex`` (:issue:`15487`) + diff --git a/pandas/io/stata.py b/pandas/io/stata.py index af4bc6a6b7ddb..1d2951da68086 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -33,6 +33,9 @@ from pandas._libs.lib import max_len_string_array, infer_dtype from pandas._libs.tslib import NaT, Timestamp +VALID_ENCODINGS = ('ascii', 'us-ascii', 'latin-1', 'latin_1', 'iso-8859-1', + 'iso8859-1', '8859', 'cp819', 'latin', 'latin1', 'L1') + _version_error = ("Version of given Stata file is not 104, 105, 108, " "111 (Stata 7SE), 113 (Stata 8/9), 114 (Stata 10/11), " "115 (Stata 12), 117 (Stata 13), or 118 (Stata 14)") @@ -45,7 +48,7 @@ _encoding_params = """\ encoding : string, None or encoding - Encoding used to parse the files. None defaults to iso-8859-1.""" + Encoding used to parse the files. None defaults to latin-1.""" _statafile_processing_params2 = """\ index : identifier of index column @@ -816,9 +819,14 @@ def get_base_missing_value(cls, dtype): class StataParser(object): - _default_encoding = 'iso-8859-1' + _default_encoding = 'latin-1' def __init__(self, encoding): + if encoding is not None: + if encoding not in VALID_ENCODINGS: + raise ValueError('Unknown encoding. Only latin-1 and ascii ' + 'supported.') + self._encoding = encoding # type code. @@ -936,7 +944,7 @@ def __init__(self, path_or_buf, convert_dates=True, convert_categoricals=True, index=None, convert_missing=False, preserve_dtypes=True, columns=None, order_categoricals=True, - encoding='iso-8859-1', chunksize=None): + encoding='latin-1', chunksize=None): super(StataReader, self).__init__(encoding) self.col_sizes = () @@ -949,6 +957,10 @@ def __init__(self, path_or_buf, convert_dates=True, self._preserve_dtypes = preserve_dtypes self._columns = columns self._order_categoricals = order_categoricals + if encoding is not None: + if encoding not in VALID_ENCODINGS: + raise ValueError('Unknown encoding. Only latin-1 and ascii ' + 'supported.') self._encoding = encoding self._chunksize = chunksize @@ -1362,7 +1374,8 @@ def _read_value_labels(self): def _read_strls(self): self.path_or_buf.seek(self.seek_strls) - self.GSO = {0: ''} + # Wrap v_o in a string to allow uint64 values as keys on 32bit OS + self.GSO = {'0': ''} while True: if self.path_or_buf.read(3) != b'GSO': break @@ -1387,7 +1400,8 @@ def _read_strls(self): if self.format_version == 117: encoding = self._encoding or self._default_encoding va = va[0:-1].decode(encoding) - self.GSO[v_o] = va + # Wrap v_o in a string to allow uint64 values as keys on 32bit OS + self.GSO[str(v_o)] = va # legacy @Appender('DEPRECATED: ' + _data_method_doc) @@ -1623,7 +1637,8 @@ def _insert_strls(self, data): for i, typ in enumerate(self.typlist): if typ != 'Q': continue - data.iloc[:, i] = [self.GSO[k] for k in data.iloc[:, i]] + # Wrap v_o in a string to allow uint64 values as keys on 32bit OS + data.iloc[:, i] = [self.GSO[str(k)] for k in data.iloc[:, i]] return data def _do_select_columns(self, data, columns): @@ -1855,7 +1870,7 @@ class StataWriter(StataParser): write_index : bool Write the index to Stata dataset. encoding : str - Default is latin-1. Unicode is not supported + Default is latin-1. Only latin-1 and ascii are supported. byteorder : str Can be ">", "<", "little", or "big". default is `sys.byteorder` time_stamp : datetime diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py index 5188adf54b887..db594889c91ee 100644 --- a/pandas/tests/io/test_stata.py +++ b/pandas/tests/io/test_stata.py @@ -1276,3 +1276,10 @@ def test_out_of_range_float(self): original.to_stata(path) tm.assertTrue('ColumnTooBig' in cm.exception) tm.assertTrue('infinity' in cm.exception) + + def test_invalid_encoding(self): + # GH15723, validate encoding + original = self.read_csv(self.csv3) + with tm.assertRaises(ValueError): + with tm.ensure_clean() as path: + original.to_stata(path, encoding='utf-8') From 32dd92912f15a5c66035f5674c116d23f21bdbca Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 21 Mar 2017 19:19:25 -0400 Subject: [PATCH 253/933] CLN: relocate lib.ismember* to hashtable space - fixes .isin on 32-bit (hopefully) - perf about 30% better - releases GIL Author: Jeff Reback Closes #15773 from jreback/ismember and squashes the following commits: a7dfe51 [Jeff Reback] CLN: relocate lib.ismember* to hashtable space --- pandas/_libs/hashtable_func_helper.pxi.in | 98 ++++++++++++++++++++--- pandas/_libs/lib.pyx | 72 +---------------- pandas/core/algorithms.py | 32 ++++++-- pandas/core/frame.py | 4 +- pandas/indexes/multi.py | 5 +- pandas/indexes/numeric.py | 8 +- pandas/io/parsers.py | 6 +- pandas/tests/indexes/test_base.py | 5 +- pandas/tseries/tools.py | 3 +- 9 files changed, 131 insertions(+), 102 deletions(-) diff --git a/pandas/_libs/hashtable_func_helper.pxi.in b/pandas/_libs/hashtable_func_helper.pxi.in index fa373905ef08a..0608af8f8504b 100644 --- a/pandas/_libs/hashtable_func_helper.pxi.in +++ b/pandas/_libs/hashtable_func_helper.pxi.in @@ -11,14 +11,14 @@ WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in {{py: # dtype, ttype -dtypes = [('float64', 'float64'), - ('uint64', 'uint64'), - ('object', 'pymap'), - ('int64', 'int64')] +dtypes = [('float64', 'float64', 'float64_t'), + ('uint64', 'uint64', 'uint64_t'), + ('object', 'pymap', 'object'), + ('int64', 'int64', 'int64_t')] }} -{{for dtype, ttype in dtypes}} +{{for dtype, ttype, scalar in dtypes}} @cython.wraparound(False) @@ -34,9 +34,7 @@ cdef build_count_table_{{dtype}}({{dtype}}_t[:] values, khiter_t k Py_ssize_t i, n = len(values) - {{if dtype != 'object'}} - {{dtype}}_t val - {{endif}} + {{scalar}} val int ret = 0 @@ -79,7 +77,7 @@ cdef build_count_table_{{dtype}}({{dtype}}_t[:] values, {{if dtype == 'object'}} cpdef value_count_{{dtype}}(ndarray[{{dtype}}] values, bint dropna): {{else}} -cpdef value_count_{{dtype}}({{dtype}}_t[:] values, bint dropna): +cpdef value_count_{{dtype}}({{scalar}}[:] values, bint dropna): {{endif}} cdef: Py_ssize_t i=0 @@ -130,12 +128,11 @@ cpdef value_count_{{dtype}}({{dtype}}_t[:] values, bint dropna): @cython.boundscheck(False) {{if dtype == 'object'}} - def duplicated_{{dtype}}(ndarray[{{dtype}}] values, object keep='first'): {{else}} -def duplicated_{{dtype}}({{dtype}}_t[:] values, object keep='first'): +def duplicated_{{dtype}}({{scalar}}[:] values, object keep='first'): {{endif}} cdef: int ret = 0 @@ -203,8 +200,87 @@ def duplicated_{{dtype}}({{dtype}}_t[:] values, object keep='first'): kh_destroy_{{ttype}}(table) return out + +#---------------------------------------------------------------------- +# Membership +#---------------------------------------------------------------------- + + +@cython.wraparound(False) +@cython.boundscheck(False) +{{if dtype == 'object'}} + +def ismember_{{dtype}}(ndarray[{{scalar}}] arr, ndarray[{{scalar}}] values, bint hasnans=0): +{{else}} + +def ismember_{{dtype}}({{scalar}}[:] arr, {{scalar}}[:] values, bint hasnans=0): +{{endif}} + + """ + Return boolean of values in arr on an + element by-element basis + + Parameters + ---------- + arr : {{dtype}} ndarray + values : {{dtype}} ndarray + hasnans : bint, optional + + Returns + ------- + boolean ndarry len of (arr) + """ + cdef: + Py_ssize_t i, n, k + int ret = 0 + ndarray[uint8_t] result + {{scalar}} val + kh_{{ttype}}_t * table = kh_init_{{ttype}}() + + + # construct the table + n = len(values) + kh_resize_{{ttype}}(table, min(n, len(values))) + + {{if dtype == 'object'}} + for i in range(n): + kh_put_{{ttype}}(table, values[i], &ret) + {{else}} + with nogil: + for i in range(n): + kh_put_{{ttype}}(table, values[i], &ret) + {{endif}} + + # test membership + n = len(arr) + result = np.empty(n, dtype=np.uint8) + + {{if dtype == 'object'}} + for i in range(n): + val = arr[i] + k = kh_get_{{ttype}}(table, val) + if k != table.n_buckets: + result[i] = 1 + else: + result[i] = hasnans and val != val + {{else}} + with nogil: + for i in range(n): + val = arr[i] + k = kh_get_{{ttype}}(table, val) + if k != table.n_buckets: + result[i] = 1 + else: + result[i] = hasnans and val != val + {{endif}} + + kh_destroy_{{ttype}}(table) + return result.view(np.bool_) + {{endfor}} + + #---------------------------------------------------------------------- # Mode Computations #---------------------------------------------------------------------- diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index b4724bc3dd59b..f78040e5a52f2 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -13,6 +13,7 @@ cdef extern from "numpy/arrayobject.h": cdef enum NPY_TYPES: NPY_intp "NPY_INTP" +from libc.stdlib cimport malloc, free from cpython cimport (PyDict_New, PyDict_GetItem, PyDict_SetItem, PyDict_Contains, PyDict_Keys, @@ -111,77 +112,6 @@ cpdef map_indices_list(list index): return result -from libc.stdlib cimport malloc, free - - -def ismember_nans(float64_t[:] arr, set values, bint hasnans): - cdef: - Py_ssize_t i, n - ndarray[uint8_t] result - float64_t val - - n = len(arr) - result = np.empty(n, dtype=np.uint8) - for i in range(n): - val = arr[i] - result[i] = val in values or hasnans and isnan(val) - - return result.view(np.bool_) - - -def ismember(ndarray arr, set values): - """ - Checks whether - - Parameters - ---------- - arr : ndarray - values : set - - Returns - ------- - ismember : ndarray (boolean dtype) - """ - cdef: - Py_ssize_t i, n - ndarray[uint8_t] result - object val - - n = len(arr) - result = np.empty(n, dtype=np.uint8) - for i in range(n): - val = util.get_value_at(arr, i) - result[i] = val in values - - return result.view(np.bool_) - - -def ismember_int64(ndarray[int64_t] arr, set values): - """ - Checks whether - - Parameters - ---------- - arr : ndarray of int64 - values : set - - Returns - ------- - ismember : ndarray (boolean dtype) - """ - cdef: - Py_ssize_t i, n - ndarray[uint8_t] result - int64_t v - - n = len(arr) - result = np.empty(n, dtype=np.uint8) - for i in range(n): - result[i] = arr[i] in values - - return result.view(np.bool_) - - @cython.wraparound(False) @cython.boundscheck(False) def memory_usage_of_objects(ndarray[object, ndim=1] arr): diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 00a3264e6c74a..9a8d0a779105e 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -12,12 +12,12 @@ from pandas.types.common import (is_unsigned_integer_dtype, is_signed_integer_dtype, is_integer_dtype, - is_int64_dtype, is_categorical_dtype, is_extension_type, is_datetimetz, is_period_dtype, is_period_arraylike, + is_numeric_dtype, is_float_dtype, is_bool_dtype, needs_i8_conversion, @@ -197,19 +197,37 @@ def isin(comps, values): except TypeError: # object array conversion will fail pass - else: + elif is_numeric_dtype(comps): comps = np.asarray(comps) values = np.asarray(values) + else: + comps = np.asarray(comps).astype(object) + values = np.asarray(values).astype(object) # GH11232 # work-around for numpy < 1.8 and comparisions on py3 # faster for larger cases to use np.in1d + f = lambda x, y: htable.ismember_object(x, values) if (_np_version_under1p8 and compat.PY3) or len(comps) > 1000000: - f = lambda x, y: np.in1d(x, np.asarray(list(y))) - elif is_int64_dtype(comps): - f = lambda x, y: lib.ismember_int64(x, set(y)) - else: - f = lambda x, y: lib.ismember(x, set(values)) + f = lambda x, y: np.in1d(x, y) + elif is_integer_dtype(comps): + try: + values = values.astype('int64', copy=False) + comps = comps.astype('int64', copy=False) + f = lambda x, y: htable.ismember_int64(x, y) + except (TypeError, ValueError): + values = values.astype(object) + comps = comps.astype(object) + + elif is_float_dtype(comps): + try: + values = values.astype('float64', copy=False) + comps = comps.astype('float64', copy=False) + checknull = isnull(values).any() + f = lambda x, y: htable.ismember_float64(x, y, checknull) + except (TypeError, ValueError): + values = values.astype(object) + comps = comps.astype(object) return f(comps, values) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 732d88b47ae2a..b49aa926d1923 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5358,8 +5358,8 @@ def isin(self, values): "you passed a " "{0!r}".format(type(values).__name__)) return DataFrame( - lib.ismember(self.values.ravel(), - set(values)).reshape(self.shape), self.index, + algorithms.isin(self.values.ravel(), + values).reshape(self.shape), self.index, self.columns) # ---------------------------------------------------------------------- diff --git a/pandas/indexes/multi.py b/pandas/indexes/multi.py index 978492131ca89..e6ae0605d4758 100644 --- a/pandas/indexes/multi.py +++ b/pandas/indexes/multi.py @@ -1392,7 +1392,7 @@ def _drop_from_level(self, labels, level): index = self.levels[i] values = index.get_indexer(labels) - mask = ~lib.ismember(self.labels[i], set(values)) + mask = ~algos.isin(self.labels[i], values) return self[mask] @@ -2463,7 +2463,8 @@ def _wrap_joined_index(self, joined, other): @Appender(Index.isin.__doc__) def isin(self, values, level=None): if level is None: - return lib.ismember(np.array(self), set(values)) + return algos.isin(self.values, + MultiIndex.from_tuples(values).values) else: num = self._get_level_number(level) levs = self.levels[num] diff --git a/pandas/indexes/numeric.py b/pandas/indexes/numeric.py index 2f897c81975c2..31258c785d9e8 100644 --- a/pandas/indexes/numeric.py +++ b/pandas/indexes/numeric.py @@ -1,13 +1,13 @@ import numpy as np -from pandas._libs import (lib, index as libindex, +from pandas._libs import (index as libindex, algos as libalgos, join as libjoin) from pandas.types.common import (is_dtype_equal, pandas_dtype, is_float_dtype, is_object_dtype, is_integer_dtype, is_scalar) -from pandas.types.missing import isnull from pandas.core.common import _asarray_tuplesafe, _values_from_object from pandas import compat +from pandas.core import algorithms from pandas.indexes.base import Index, InvalidIndexError, _index_shared_docs from pandas.util.decorators import Appender, cache_readonly import pandas.indexes.base as ibase @@ -379,11 +379,9 @@ def is_unique(self): @Appender(Index.isin.__doc__) def isin(self, values, level=None): - value_set = set(values) if level is not None: self._validate_index_level(level) - return lib.ismember_nans(np.array(self), value_set, - isnull(list(value_set)).any()) + return algorithms.isin(np.array(self), values) Float64Index._add_numeric_methods() diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 18343670fb39e..90d72c0bceeb7 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -26,6 +26,7 @@ from pandas.core.series import Series from pandas.core.frame import DataFrame from pandas.core.categorical import Categorical +from pandas.core import algorithms from pandas.core.common import AbstractMethodError from pandas.io.date_converters import generic_parser from pandas.io.common import (get_filepath_or_buffer, _validate_header_arg, @@ -1388,7 +1389,8 @@ def _convert_to_ndarrays(self, dct, na_values, na_fvalues, verbose=False, try: values = lib.map_infer(values, conv_f) except ValueError: - mask = lib.ismember(values, na_values).view(np.uint8) + mask = algorithms.isin( + values, list(na_values)).view(np.uint8) values = lib.map_infer_mask(values, conv_f, mask) cvals, na_count = self._infer_types( @@ -1436,7 +1438,7 @@ def _infer_types(self, values, na_values, try_num_bool=True): na_count = 0 if issubclass(values.dtype.type, (np.number, np.bool_)): - mask = lib.ismember(values, na_values) + mask = algorithms.isin(values, list(na_values)) na_count = mask.sum() if na_count > 0: if is_integer_dtype(values): diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 7199a38bb7a80..c4dc10d8174cc 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -1363,14 +1363,17 @@ def test_isin_nan(self): np.array([False, False])) tm.assert_numpy_array_equal(Index(['a', np.nan]).isin([pd.NaT]), np.array([False, False])) + # Float64Index overrides isin, so must be checked separately tm.assert_numpy_array_equal(Float64Index([1.0, np.nan]).isin([np.nan]), np.array([False, True])) tm.assert_numpy_array_equal( Float64Index([1.0, np.nan]).isin([float('nan')]), np.array([False, True])) + + # we cannot compare NaT with NaN tm.assert_numpy_array_equal(Float64Index([1.0, np.nan]).isin([pd.NaT]), - np.array([False, True])) + np.array([False, False])) def test_isin_level_kwarg(self): def check_idx(idx): diff --git a/pandas/tseries/tools.py b/pandas/tseries/tools.py index 093331e861fa7..5dc9746c6d6f9 100644 --- a/pandas/tseries/tools.py +++ b/pandas/tseries/tools.py @@ -13,6 +13,7 @@ from pandas.types.generic import (ABCIndexClass, ABCSeries, ABCDataFrame) from pandas.types.missing import notnull +from pandas.core import algorithms import pandas.compat as compat @@ -577,7 +578,7 @@ def calc_with_mask(carg, mask): # string with NaN-like try: - mask = ~lib.ismember(arg, tslib._nat_strings) + mask = ~algorithms.isin(arg, list(tslib._nat_strings)) return calc_with_mask(arg, mask) except: pass From a20009f7fe93c17c13447ba0aff9756b2b5d4863 Mon Sep 17 00:00:00 2001 From: Pietro Battiston Date: Wed, 22 Mar 2017 09:03:32 +0100 Subject: [PATCH 254/933] BUG: Check that values for "nrows" and "chunksize" are valid (#15774) --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/io/parsers.py | 32 +++++++++++++++++++++----------- pandas/tests/io/parser/common.py | 17 ++++++++++++++++- 3 files changed, 38 insertions(+), 12 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index eeb568c2e2558..5ac7624856040 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -815,6 +815,7 @@ Bug Fixes - Bug in ``pd.read_fwf`` where the skiprows parameter was not being respected during column width inference (:issue:`11256`) - Bug in ``pd.read_csv()`` in which missing data was being improperly handled with ``usecols`` (:issue:`6710`) - Bug in ``pd.read_csv()`` in which a file containing a row with many columns followed by rows with fewer columns would cause a crash (:issue:`14125`) +- Added checks in ``pd.read_csv()`` ensuring that values for ``nrows`` and ``chunksize`` are valid (:issue:`15767`) - Bug in ``pd.tools.hashing.hash_pandas_object()`` in which hashing of categoricals depended on the ordering of categories, instead of just their values. (:issue:`15143`) - Bug in ``.groupby(..).resample()`` when passed the ``on=`` kwarg. (:issue:`15021`) - Bug in using ``__deepcopy__`` on empty NDFrame objects (:issue:`15370`) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 90d72c0bceeb7..af57cc3ce7950 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -345,24 +345,34 @@ """ % (_parser_params % (_fwf_widths, '')) -def _validate_nrows(nrows): +def _validate_integer(name, val, min_val=0): """ - Checks whether the 'nrows' parameter for parsing is either + Checks whether the 'name' parameter for parsing is either an integer OR float that can SAFELY be cast to an integer without losing accuracy. Raises a ValueError if that is not the case. + + Parameters + ---------- + name : string + Parameter name (used for error reporting) + val : int or float + The value to check + min_val : int + Minimum allowed value (val < min_val will result in a ValueError) """ - msg = "'nrows' must be an integer" + msg = "'{name:s}' must be an integer >={min_val:d}".format(name=name, + min_val=min_val) - if nrows is not None: - if is_float(nrows): - if int(nrows) != nrows: + if val is not None: + if is_float(val): + if int(val) != val: raise ValueError(msg) - nrows = int(nrows) - elif not is_integer(nrows): + val = int(val) + elif not (is_integer(val) and val >= min_val): raise ValueError(msg) - return nrows + return val def _read(filepath_or_buffer, kwds): @@ -384,8 +394,8 @@ def _read(filepath_or_buffer, kwds): # Extract some of the arguments (pass chunksize on). iterator = kwds.get('iterator', False) - chunksize = kwds.get('chunksize', None) - nrows = _validate_nrows(kwds.get('nrows', None)) + chunksize = _validate_integer('chunksize', kwds.get('chunksize', None), 1) + nrows = _validate_integer('nrows', kwds.get('nrows', None)) # Create the parser. parser = TextFileReader(filepath_or_buffer, **kwds) diff --git a/pandas/tests/io/parser/common.py b/pandas/tests/io/parser/common.py index 24d15dcb96fe7..2c8bca490f274 100644 --- a/pandas/tests/io/parser/common.py +++ b/pandas/tests/io/parser/common.py @@ -384,7 +384,7 @@ def test_read_nrows(self): df = self.read_csv(StringIO(self.data1), nrows=3.0) tm.assert_frame_equal(df, expected) - msg = "must be an integer" + msg = r"'nrows' must be an integer >=0" with tm.assertRaisesRegexp(ValueError, msg): self.read_csv(StringIO(self.data1), nrows=1.2) @@ -392,6 +392,9 @@ def test_read_nrows(self): with tm.assertRaisesRegexp(ValueError, msg): self.read_csv(StringIO(self.data1), nrows='foo') + with tm.assertRaisesRegexp(ValueError, msg): + self.read_csv(StringIO(self.data1), nrows=-1) + def test_read_chunksize(self): reader = self.read_csv(StringIO(self.data1), index_col=0, chunksize=2) df = self.read_csv(StringIO(self.data1), index_col=0) @@ -402,6 +405,18 @@ def test_read_chunksize(self): tm.assert_frame_equal(chunks[1], df[2:4]) tm.assert_frame_equal(chunks[2], df[4:]) + # with invalid chunksize value: + msg = r"'chunksize' must be an integer >=1" + + with tm.assertRaisesRegexp(ValueError, msg): + self.read_csv(StringIO(self.data1), chunksize=1.3) + + with tm.assertRaisesRegexp(ValueError, msg): + self.read_csv(StringIO(self.data1), chunksize='foo') + + with tm.assertRaisesRegexp(ValueError, msg): + self.read_csv(StringIO(self.data1), chunksize=0) + def test_read_chunksize_and_nrows(self): # gh-15755 From bc1235e8277568b4d095b5fd4a5d7990a7fafefd Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 22 Mar 2017 07:53:46 -0400 Subject: [PATCH 255/933] COMPAT: 32-bit skips (#15776) closes #14183 --- pandas/tests/indexes/common.py | 1 - pandas/tests/indexes/period/test_period.py | 9 ++++++++- pandas/tests/test_algos.py | 4 +++- 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index b1e6bd7520c69..e9122f7a17359 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -121,7 +121,6 @@ def test_reindex_base(self): idx.get_indexer(idx, method='invalid') def test_ndarray_compat_properties(self): - idx = self.create_index() self.assertTrue(idx.T.equals(idx)) self.assertTrue(idx.transpose().equals(idx)) diff --git a/pandas/tests/indexes/period/test_period.py b/pandas/tests/indexes/period/test_period.py index 1739211982b10..4fbadfca06ede 100644 --- a/pandas/tests/indexes/period/test_period.py +++ b/pandas/tests/indexes/period/test_period.py @@ -1,3 +1,5 @@ +import pytest + import numpy as np from numpy.random import randn from datetime import timedelta @@ -6,7 +8,7 @@ from pandas.util import testing as tm from pandas import (PeriodIndex, period_range, notnull, DatetimeIndex, NaT, Index, Period, Int64Index, Series, DataFrame, date_range, - offsets) + offsets, compat) from ..datetimelike import DatetimeLike @@ -626,6 +628,11 @@ def test_shift_nat(self): tm.assert_index_equal(result, expected) self.assertEqual(result.name, expected.name) + def test_ndarray_compat_properties(self): + if compat.is_platform_32bit(): + pytest.skip("skipping on 32bit") + super(TestPeriodIndex, self).test_ndarray_compat_properties() + def test_shift_ndarray(self): idx = PeriodIndex(['2011-01', '2011-02', 'NaT', '2011-04'], freq='M', name='idx') diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index 7a3cc3e2c3cd7..ce925f756edb7 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -648,7 +648,9 @@ def test_value_counts_uint64(self): expected = Series([1, 1], index=[-1, 2**63]) result = algos.value_counts(arr) - tm.assert_series_equal(result, expected) + # 32-bit linux has a different ordering + if not compat.is_platform_32bit(): + tm.assert_series_equal(result, expected) class TestDuplicated(tm.TestCase): From 2a3b05a3a7167c7b384375e9442c350f740e9629 Mon Sep 17 00:00:00 2001 From: Dominik Stanczak Date: Wed, 22 Mar 2017 07:55:29 -0400 Subject: [PATCH 256/933] CLN/INT: Rename _possibly to _maybe (GH15764) Also rename "private" functions in pandas.type.cast closes #15764 Author: Dominik Stanczak Closes #15771 from StanczakDominik/rename-possibly and squashes the following commits: 486b932 [Dominik Stanczak] Cleanup missed linting errors 188c48b [Dominik Stanczak] CLN/INT: Rename _possibly to _maybe --- pandas/computation/expr.py | 34 +++++----- pandas/core/algorithms.py | 6 +- pandas/core/categorical.py | 21 +++--- pandas/core/frame.py | 54 +++++++-------- pandas/core/generic.py | 6 +- pandas/core/groupby.py | 6 +- pandas/core/internals.py | 67 +++++++++--------- pandas/core/nanops.py | 4 +- pandas/core/ops.py | 10 +-- pandas/core/panel.py | 12 ++-- pandas/core/reshape.py | 4 +- pandas/core/series.py | 20 +++--- pandas/indexes/base.py | 6 +- pandas/indexes/frozen.py | 4 +- pandas/io/parsers.py | 6 +- pandas/sparse/array.py | 12 ++-- pandas/sparse/frame.py | 6 +- pandas/tests/types/test_cast.py | 116 ++++++++++++++++---------------- pandas/tools/util.py | 5 +- pandas/tseries/index.py | 2 +- pandas/tseries/tdi.py | 2 +- pandas/types/cast.py | 56 +++++++-------- 22 files changed, 228 insertions(+), 231 deletions(-) diff --git a/pandas/computation/expr.py b/pandas/computation/expr.py index a782287175327..e78806b38c667 100644 --- a/pandas/computation/expr.py +++ b/pandas/computation/expr.py @@ -348,7 +348,7 @@ def _rewrite_membership_op(self, node, left, right): op = self.visit(op_instance) return op, op_instance, left, right - def _possibly_transform_eq_ne(self, node, left=None, right=None): + def _maybe_transform_eq_ne(self, node, left=None, right=None): if left is None: left = self.visit(node.left, side='left') if right is None: @@ -357,7 +357,7 @@ def _possibly_transform_eq_ne(self, node, left=None, right=None): right) return op, op_class, left, right - def _possibly_downcast_constants(self, left, right): + def _maybe_downcast_constants(self, left, right): f32 = np.dtype(np.float32) if left.isscalar and not right.isscalar and right.return_type == f32: # right is a float32 array, left is a scalar @@ -370,7 +370,7 @@ def _possibly_downcast_constants(self, left, right): return left, right - def _possibly_eval(self, binop, eval_in_python): + def _maybe_eval(self, binop, eval_in_python): # eval `in` and `not in` (for now) in "partial" python space # things that can be evaluated in "eval" space will be turned into # temporary variables. for example, @@ -380,10 +380,10 @@ def _possibly_eval(self, binop, eval_in_python): return binop.evaluate(self.env, self.engine, self.parser, self.term_type, eval_in_python) - def _possibly_evaluate_binop(self, op, op_class, lhs, rhs, - eval_in_python=('in', 'not in'), - maybe_eval_in_python=('==', '!=', '<', '>', - '<=', '>=')): + def _maybe_evaluate_binop(self, op, op_class, lhs, rhs, + eval_in_python=('in', 'not in'), + maybe_eval_in_python=('==', '!=', '<', '>', + '<=', '>=')): res = op(lhs, rhs) if res.has_invalid_return_type: @@ -397,24 +397,24 @@ def _possibly_evaluate_binop(self, op, op_class, lhs, rhs, getattr(rhs, 'is_datetime', False)): # all date ops must be done in python bc numexpr doesn't work # well with NaT - return self._possibly_eval(res, self.binary_ops) + return self._maybe_eval(res, self.binary_ops) if res.op in eval_in_python: # "in"/"not in" ops are always evaluated in python - return self._possibly_eval(res, eval_in_python) + return self._maybe_eval(res, eval_in_python) elif self.engine != 'pytables': if (getattr(lhs, 'return_type', None) == object or getattr(rhs, 'return_type', None) == object): # evaluate "==" and "!=" in python if either of our operands # has an object return type - return self._possibly_eval(res, eval_in_python + - maybe_eval_in_python) + return self._maybe_eval(res, eval_in_python + + maybe_eval_in_python) return res def visit_BinOp(self, node, **kwargs): - op, op_class, left, right = self._possibly_transform_eq_ne(node) - left, right = self._possibly_downcast_constants(left, right) - return self._possibly_evaluate_binop(op, op_class, left, right) + op, op_class, left, right = self._maybe_transform_eq_ne(node) + left, right = self._maybe_downcast_constants(left, right) + return self._maybe_evaluate_binop(op, op_class, left, right) def visit_Div(self, node, **kwargs): truediv = self.env.scope['truediv'] @@ -662,9 +662,9 @@ def visitor(x, y): lhs = self._try_visit_binop(x) rhs = self._try_visit_binop(y) - op, op_class, lhs, rhs = self._possibly_transform_eq_ne(node, lhs, - rhs) - return self._possibly_evaluate_binop(op, node.op, lhs, rhs) + op, op_class, lhs, rhs = self._maybe_transform_eq_ne( + node, lhs, rhs) + return self._maybe_evaluate_binop(op, node.op, lhs, rhs) operands = node.values return reduce(visitor, operands) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 9a8d0a779105e..3b77bda6f69f0 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -7,7 +7,7 @@ import numpy as np from pandas import compat, _np_version_under1p8 -from pandas.types.cast import _maybe_promote +from pandas.types.cast import maybe_promote from pandas.types.generic import ABCSeries, ABCIndex from pandas.types.common import (is_unsigned_integer_dtype, is_signed_integer_dtype, @@ -1297,7 +1297,7 @@ def take_nd(arr, indexer, axis=0, out=None, fill_value=np.nan, mask_info=None, else: # check for promotion based on types only (do this first because # it's faster than computing a mask) - dtype, fill_value = _maybe_promote(arr.dtype, fill_value) + dtype, fill_value = maybe_promote(arr.dtype, fill_value) if dtype != arr.dtype and (out is None or out.dtype != dtype): # check if promotion is actually required based on indexer if mask_info is not None: @@ -1380,7 +1380,7 @@ def take_2d_multi(arr, indexer, out=None, fill_value=np.nan, mask_info=None, else: # check for promotion based on types only (do this first because # it's faster than computing a mask) - dtype, fill_value = _maybe_promote(arr.dtype, fill_value) + dtype, fill_value = maybe_promote(arr.dtype, fill_value) if dtype != arr.dtype and (out is None or out.dtype != dtype): # check if promotion is actually required based on indexer if mask_info is not None: diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index af51c7f2e2dc1..0e58c18631588 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -10,8 +10,8 @@ from pandas.types.generic import ABCSeries, ABCIndexClass, ABCCategoricalIndex from pandas.types.missing import isnull, notnull -from pandas.types.cast import (_possibly_infer_to_datetimelike, - _coerce_indexer_dtype) +from pandas.types.cast import (maybe_infer_to_datetimelike, + coerce_indexer_dtype) from pandas.types.dtypes import CategoricalDtype from pandas.types.common import (_ensure_int64, _ensure_object, @@ -237,7 +237,7 @@ def __init__(self, values, categories=None, ordered=False, fastpath=False): if fastpath: # fast path - self._codes = _coerce_indexer_dtype(values, categories) + self._codes = coerce_indexer_dtype(values, categories) self._categories = self._validate_categories( categories, fastpath=isinstance(categories, ABCIndexClass)) self._ordered = ordered @@ -266,8 +266,7 @@ def __init__(self, values, categories=None, ordered=False, fastpath=False): # correctly no need here this is an issue because _sanitize_array # also coerces np.nan to a string under certain versions of numpy # as well - values = _possibly_infer_to_datetimelike(values, - convert_dates=True) + values = maybe_infer_to_datetimelike(values, convert_dates=True) if not isinstance(values, np.ndarray): values = _convert_to_list_like(values) from pandas.core.series import _sanitize_array @@ -324,7 +323,7 @@ def __init__(self, values, categories=None, ordered=False, fastpath=False): self.set_ordered(ordered or False, inplace=True) self._categories = categories - self._codes = _coerce_indexer_dtype(codes, categories) + self._codes = coerce_indexer_dtype(codes, categories) @property def _constructor(self): @@ -877,7 +876,7 @@ def add_categories(self, new_categories, inplace=False): new_categories = list(self._categories) + list(new_categories) cat = self if inplace else self.copy() cat._categories = self._validate_categories(new_categories) - cat._codes = _coerce_indexer_dtype(cat._codes, new_categories) + cat._codes = coerce_indexer_dtype(cat._codes, new_categories) if not inplace: return cat @@ -961,7 +960,7 @@ def remove_unused_categories(self, inplace=False): idx, inv = idx[1:], inv - 1 cat._categories = cat.categories.take(idx) - cat._codes = _coerce_indexer_dtype(inv, self._categories) + cat._codes = coerce_indexer_dtype(inv, self._categories) if not inplace: return cat @@ -1065,8 +1064,8 @@ def __setstate__(self, state): state['_categories'] = self._validate_categories(state.pop( '_levels')) if '_codes' not in state and 'labels' in state: - state['_codes'] = _coerce_indexer_dtype(state.pop('labels'), - state['_categories']) + state['_codes'] = coerce_indexer_dtype( + state.pop('labels'), state['_categories']) # 0.16.0 ordered change if '_ordered' not in state: @@ -2062,7 +2061,7 @@ def _get_codes_for_values(values, categories): (_, _), cats = _get_data_algo(categories, _hashtables) t = hash_klass(len(cats)) t.map_locations(cats) - return _coerce_indexer_dtype(t.lookup(vals), cats) + return coerce_indexer_dtype(t.lookup(vals), cats) def _convert_to_list_like(list_like): diff --git a/pandas/core/frame.py b/pandas/core/frame.py index b49aa926d1923..6b5e8e0799421 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -23,15 +23,15 @@ import numpy as np import numpy.ma as ma -from pandas.types.cast import (_maybe_upcast, _infer_dtype_from_scalar, - _possibly_cast_to_datetime, - _possibly_infer_to_datetimelike, - _possibly_convert_platform, - _possibly_downcast_to_dtype, - _invalidate_string_dtypes, - _coerce_to_dtypes, - _maybe_upcast_putmask, - _find_common_type) +from pandas.types.cast import (maybe_upcast, infer_dtype_from_scalar, + maybe_cast_to_datetime, + maybe_infer_to_datetimelike, + maybe_convert_platform, + maybe_downcast_to_dtype, + invalidate_string_dtypes, + coerce_to_dtypes, + maybe_upcast_putmask, + find_common_type) from pandas.types.common import (is_categorical_dtype, is_object_dtype, is_extension_type, @@ -275,7 +275,7 @@ def __init__(self, data=None, index=None, columns=None, dtype=None, else: mask = ma.getmaskarray(data) if mask.any(): - data, fill_value = _maybe_upcast(data, copy=True) + data, fill_value = maybe_upcast(data, copy=True) data[mask] = fill_value else: data = data.copy() @@ -335,7 +335,7 @@ def __init__(self, data=None, index=None, columns=None, dtype=None, if isinstance(data, compat.string_types) and dtype is None: dtype = np.object_ if dtype is None: - dtype, data = _infer_dtype_from_scalar(data) + dtype, data = infer_dtype_from_scalar(data) values = np.empty((len(index), len(columns)), dtype=dtype) values.fill(data) @@ -469,7 +469,7 @@ def _get_axes(N, K, index=index, columns=columns): # on the entire block; this is to convert if we have datetimelike's # embedded in an object type if dtype is None and is_object_dtype(values): - values = _possibly_infer_to_datetimelike(values) + values = maybe_infer_to_datetimelike(values) return create_block_manager_from_blocks([values], [columns, index]) @@ -2359,7 +2359,7 @@ def select_dtypes(self, include=None, exclude=None): include, exclude = map( lambda x: frozenset(map(_get_dtype_from_object, x)), selection) for dtypes in (include, exclude): - _invalidate_string_dtypes(dtypes) + invalidate_string_dtypes(dtypes) # can't both include AND exclude! if not include.isdisjoint(exclude): @@ -2659,7 +2659,7 @@ def reindexer(value): value = _sanitize_index(value, self.index, copy=False) if not isinstance(value, (np.ndarray, Index)): if isinstance(value, list) and len(value) > 0: - value = _possibly_convert_platform(value) + value = maybe_convert_platform(value) else: value = com._asarray_tuplesafe(value) elif value.ndim == 2: @@ -2671,13 +2671,13 @@ def reindexer(value): # possibly infer to datetimelike if is_object_dtype(value.dtype): - value = _possibly_infer_to_datetimelike(value) + value = maybe_infer_to_datetimelike(value) else: # upcast the scalar - dtype, value = _infer_dtype_from_scalar(value) + dtype, value = infer_dtype_from_scalar(value) value = np.repeat(value, len(self.index)).astype(dtype) - value = _possibly_cast_to_datetime(value, dtype) + value = maybe_cast_to_datetime(value, dtype) # return internal types directly if is_extension_type(value): @@ -3000,8 +3000,8 @@ def _maybe_casted_values(index, labels=None): else: values = values.take(labels) if mask.any(): - values, changed = _maybe_upcast_putmask(values, mask, - np.nan) + values, changed = maybe_upcast_putmask( + values, mask, np.nan) return values new_index = _default_index(len(new_obj)) @@ -3722,7 +3722,7 @@ def combine(self, other, func, fill_value=None, overwrite=True): # if we have different dtypes, possibily promote new_dtype = this_dtype if not is_dtype_equal(this_dtype, other_dtype): - new_dtype = _find_common_type([this_dtype, other_dtype]) + new_dtype = find_common_type([this_dtype, other_dtype]) if not is_dtype_equal(this_dtype, new_dtype): series = series.astype(new_dtype) if not is_dtype_equal(other_dtype, new_dtype): @@ -3743,13 +3743,13 @@ def combine(self, other, func, fill_value=None, overwrite=True): # try to downcast back to the original dtype if needs_i8_conversion_i: # ToDo: This conversion should be handled in - # _possibly_cast_to_datetime but the change affects lot... + # _maybe_cast_to_datetime but the change affects lot... if is_datetime64tz_dtype(new_dtype): arr = DatetimeIndex._simple_new(arr, tz=new_dtype.tz) else: - arr = _possibly_cast_to_datetime(arr, new_dtype) + arr = maybe_cast_to_datetime(arr, new_dtype) else: - arr = _possibly_downcast_to_dtype(arr, this_dtype) + arr = maybe_downcast_to_dtype(arr, this_dtype) result[col] = arr @@ -5003,7 +5003,7 @@ def f(x): # try to coerce to the original dtypes item by item if we can if axis == 0: - result = _coerce_to_dtypes(result, self.dtypes) + result = coerce_to_dtypes(result, self.dtypes) return Series(result, index=labels) @@ -5505,7 +5505,7 @@ def _prep_ndarray(values, copy=True): return np.empty((0, 0), dtype=object) def convert(v): - return _possibly_convert_platform(v) + return maybe_convert_platform(v) # we could have a 1-dim or 2-dim list here # this is equiv of np.asarray, but does object conversion @@ -5601,7 +5601,7 @@ def _masked_rec_array_to_mgr(data, index, columns, dtype, copy): for fv, arr, col in zip(fill_value, arrays, arr_columns): mask = ma.getmaskarray(data[col]) if mask.any(): - arr, fv = _maybe_upcast(arr, fill_value=fv, copy=True) + arr, fv = maybe_upcast(arr, fill_value=fv, copy=True) arr[mask] = fv new_arrays.append(arr) @@ -5699,7 +5699,7 @@ def _convert_object_array(content, columns, coerce_float=False, dtype=None): def convert(arr): if dtype != object and dtype != np.object: arr = lib.maybe_convert_objects(arr, try_float=coerce_float) - arr = _possibly_cast_to_datetime(arr, dtype) + arr = maybe_cast_to_datetime(arr, dtype) return arr arrays = [convert(arr) for arr in content] diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 1db9677659ca3..87052800b8fb5 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -23,7 +23,7 @@ is_list_like, is_dict_like, is_re_compilable) -from pandas.types.cast import _maybe_promote, _maybe_upcast_putmask +from pandas.types.cast import maybe_promote, maybe_upcast_putmask from pandas.types.missing import isnull, notnull from pandas.types.generic import ABCSeries, ABCPanel @@ -4956,10 +4956,10 @@ def _where(self, cond, other=np.nan, inplace=False, axis=None, level=None, # or not try_quick if not try_quick: - dtype, fill_value = _maybe_promote(other.dtype) + dtype, fill_value = maybe_promote(other.dtype) new_other = np.empty(len(icond), dtype=dtype) new_other.fill(fill_value) - _maybe_upcast_putmask(new_other, icond, other) + maybe_upcast_putmask(new_other, icond, other) other = new_other else: diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 4095a14aa5970..0a63981290df3 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -32,7 +32,7 @@ _ensure_object, _ensure_categorical, _ensure_float) -from pandas.types.cast import _possibly_downcast_to_dtype +from pandas.types.cast import maybe_downcast_to_dtype from pandas.types.missing import isnull, notnull, _maybe_fill from pandas.core.common import (_values_from_object, AbstractMethodError, @@ -783,7 +783,7 @@ def _try_cast(self, result, obj, numeric_only=False): if not is_scalar(result): if numeric_only and is_numeric_dtype(dtype) or not numeric_only: - result = _possibly_downcast_to_dtype(result, dtype) + result = maybe_downcast_to_dtype(result, dtype) return result @@ -2914,7 +2914,7 @@ def transform(self, func, *args, **kwargs): # the cython take a different path (and casting) dtype = self._selected_obj.dtype if is_numeric_dtype(dtype): - result = _possibly_downcast_to_dtype(result, dtype) + result = maybe_downcast_to_dtype(result, dtype) result.name = self._selected_obj.name result.index = self._selected_obj.index diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 6487c2108028e..8db801f8e7212 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -29,15 +29,15 @@ is_re_compilable, is_scalar, _get_dtype) -from pandas.types.cast import (_possibly_downcast_to_dtype, - _maybe_convert_string_to_object, - _maybe_upcast, - _maybe_convert_scalar, _maybe_promote, - _infer_dtype_from_scalar, - _soft_convert_objects, - _possibly_convert_objects, - _astype_nansafe, - _find_common_type) +from pandas.types.cast import (maybe_downcast_to_dtype, + maybe_convert_string_to_object, + maybe_upcast, + maybe_convert_scalar, maybe_promote, + infer_dtype_from_scalar, + soft_convert_objects, + maybe_convert_objects, + astype_nansafe, + find_common_type) from pandas.types.missing import (isnull, array_equivalent, _is_na_compat, is_null_datelike_scalar) @@ -429,7 +429,7 @@ def downcast(self, dtypes=None, mgr=None): if dtypes is None: dtypes = 'infer' - nv = _possibly_downcast_to_dtype(values, dtypes) + nv = maybe_downcast_to_dtype(values, dtypes) return self.make_block(nv, fastpath=True) # ndim > 1 @@ -455,7 +455,7 @@ def downcast(self, dtypes=None, mgr=None): if dtype is None: nv = _block_shape(values[i], ndim=self.ndim) else: - nv = _possibly_downcast_to_dtype(values[i], dtype) + nv = maybe_downcast_to_dtype(values[i], dtype) nv = _block_shape(nv, ndim=self.ndim) blocks.append(self.make_block(nv, fastpath=True, placement=[rl])) @@ -514,7 +514,7 @@ def _astype(self, dtype, copy=False, errors='raise', values=None, values = self.get_values(dtype=dtype) # _astype_nansafe works fine with 1-d only - values = _astype_nansafe(values.ravel(), dtype, copy=True) + values = astype_nansafe(values.ravel(), dtype, copy=True) values = values.reshape(self.shape) newb = make_block(values, placement=self.mgr_locs, dtype=dtype, @@ -578,7 +578,7 @@ def _try_cast_result(self, result, dtype=None): return result # may need to change the dtype here - return _possibly_downcast_to_dtype(result, dtype) + return maybe_downcast_to_dtype(result, dtype) def _try_operate(self, values): """ return a version to operate on as the input """ @@ -684,7 +684,7 @@ def setitem(self, indexer, value, mgr=None): # cast the values to a type that can hold nan (if necessary) if not self._can_hold_element(value): - dtype, _ = _maybe_promote(arr_value.dtype) + dtype, _ = maybe_promote(arr_value.dtype) values = values.astype(dtype) transf = (lambda x: x.T) if self.ndim == 2 else (lambda x: x) @@ -758,7 +758,7 @@ def _is_empty_indexer(indexer): value.dtype): dtype = value.dtype elif is_scalar(value): - dtype, _ = _infer_dtype_from_scalar(value) + dtype, _ = infer_dtype_from_scalar(value) else: dtype = 'infer' values = self._try_coerce_and_cast_result(values, dtype) @@ -871,7 +871,7 @@ def putmask(self, mask, new, align=True, inplace=False, axis=0, n = np.array(new) # type of the new block - dtype, _ = _maybe_promote(n.dtype) + dtype, _ = maybe_promote(n.dtype) # we need to explicitly astype here to make a copy n = n.astype(dtype) @@ -1066,7 +1066,7 @@ def shift(self, periods, axis=0, mgr=None): # convert integer to float if necessary. need to do a lot more than # that, handle boolean etc also - new_values, fill_value = _maybe_upcast(self.values) + new_values, fill_value = maybe_upcast(self.values) # make sure array sent to np.roll is c_contiguous f_ordered = new_values.flags.f_contiguous @@ -1250,8 +1250,8 @@ def where(self, other, cond, align=True, raise_on_error=True, raise ValueError("where must have a condition that is ndarray " "like") - other = _maybe_convert_string_to_object(other) - other = _maybe_convert_scalar(other) + other = maybe_convert_string_to_object(other) + other = maybe_convert_scalar(other) # our where function def func(cond, values, other): @@ -1864,10 +1864,10 @@ def convert(self, *args, **kwargs): new_style |= kw in kwargs if new_style: - fn = _soft_convert_objects + fn = soft_convert_objects fn_inputs = new_inputs else: - fn = _possibly_convert_objects + fn = maybe_convert_objects fn_inputs = ['convert_dates', 'convert_numeric', 'convert_timedeltas'] fn_inputs += ['copy'] @@ -2643,7 +2643,7 @@ def shift(self, periods, axis=0, mgr=None): new_values = self.values.to_dense().take(indexer) # convert integer to float if necessary. need to do a lot more than # that, handle boolean etc also - new_values, fill_value = _maybe_upcast(new_values) + new_values, fill_value = maybe_upcast(new_values) if periods > 0: new_values[:periods] = fill_value else: @@ -3239,13 +3239,12 @@ def replace_list(self, src_list, dest_list, inplace=False, regex=False, def comp(s): if isnull(s): return isnull(values) - return _possibly_compare(values, getattr(s, 'asm8', s), - operator.eq) + return _maybe_compare(values, getattr(s, 'asm8', s), operator.eq) def _cast_scalar(block, scalar): - dtype, val = _infer_dtype_from_scalar(scalar, pandas_dtype=True) + dtype, val = infer_dtype_from_scalar(scalar, pandas_dtype=True) if not is_dtype_equal(block.dtype, dtype): - dtype = _find_common_type([block.dtype, dtype]) + dtype = find_common_type([block.dtype, dtype]) block = block.astype(dtype) # use original value val = scalar @@ -3920,7 +3919,7 @@ def _slice_take_blocks_ax0(self, slice_or_indexer, fill_tuple=None): return [blk.getitem_block(slobj, new_mgr_locs=slice(0, sllen))] elif not allow_fill or self.ndim == 1: if allow_fill and fill_tuple[0] is None: - _, fill_value = _maybe_promote(blk.dtype) + _, fill_value = maybe_promote(blk.dtype) fill_tuple = (fill_value, ) return [blk.take_nd(slobj, axis=0, @@ -3978,7 +3977,7 @@ def _make_na_block(self, placement, fill_value=None): block_shape = list(self.shape) block_shape[0] = len(placement) - dtype, fill_value = _infer_dtype_from_scalar(fill_value) + dtype, fill_value = infer_dtype_from_scalar(fill_value) block_values = np.empty(block_shape, dtype=dtype) block_values.fill(fill_value) return make_block(block_values, placement=placement) @@ -4497,7 +4496,7 @@ def _interleaved_dtype(blocks): if not len(blocks): return None - dtype = _find_common_type([b.dtype for b in blocks]) + dtype = find_common_type([b.dtype for b in blocks]) # only numpy compat if isinstance(dtype, ExtensionDtype): @@ -4587,7 +4586,7 @@ def _vstack(to_stack, dtype): return np.vstack(to_stack) -def _possibly_compare(a, b, op): +def _maybe_compare(a, b, op): is_a_array = isinstance(a, np.ndarray) is_b_array = isinstance(b, np.ndarray) @@ -4637,7 +4636,7 @@ def _block2d_to_blocknd(values, placement, shape, labels, ref_items): if mask.all(): pvalues = np.empty(panel_shape, dtype=values.dtype) else: - dtype, fill_value = _maybe_promote(values.dtype) + dtype, fill_value = maybe_promote(values.dtype) pvalues = np.empty(panel_shape, dtype=dtype) pvalues.fill(fill_value) @@ -4786,7 +4785,7 @@ def _putmask_smart(v, m, n): pass # change the dtype - dtype, _ = _maybe_promote(n.dtype) + dtype, _ = maybe_promote(n.dtype) if is_extension_type(v.dtype) and is_object_dtype(dtype): nv = v.get_values(dtype) @@ -5142,8 +5141,8 @@ def dtype(self): if not self.needs_filling: return self.block.dtype else: - return _get_dtype(_maybe_promote(self.block.dtype, - self.block.fill_value)[0]) + return _get_dtype(maybe_promote(self.block.dtype, + self.block.fill_value)[0]) return self._dtype diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index bb6c9b4546d0f..6ec94e69740a2 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -20,7 +20,7 @@ is_datetime64_dtype, is_timedelta64_dtype, is_datetime_or_timedelta_dtype, is_int_or_datetime_dtype, is_any_int_dtype) -from pandas.types.cast import _int64_max, _maybe_upcast_putmask +from pandas.types.cast import _int64_max, maybe_upcast_putmask from pandas.types.missing import isnull, notnull from pandas.core.common import _values_from_object @@ -200,7 +200,7 @@ def _get_values(values, skipna, fill_value=None, fill_value_typ=None, # promote if needed else: - values, changed = _maybe_upcast_putmask(values, mask, fill_value) + values, changed = maybe_upcast_putmask(values, mask, fill_value) elif copy: values = values.copy() diff --git a/pandas/core/ops.py b/pandas/core/ops.py index fe83f8a352851..5dac8a7e4d2da 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -33,7 +33,7 @@ is_list_like, is_scalar, _ensure_object) -from pandas.types.cast import _maybe_upcast_putmask, _find_common_type +from pandas.types.cast import maybe_upcast_putmask, find_common_type from pandas.types.generic import ABCSeries, ABCIndex, ABCPeriodIndex # ----------------------------------------------------------------------------- @@ -657,7 +657,7 @@ def na_op(x, y): raise_on_error=True, **eval_kwargs) except TypeError: if isinstance(y, (np.ndarray, ABCSeries, pd.Index)): - dtype = _find_common_type([x.dtype, y.dtype]) + dtype = find_common_type([x.dtype, y.dtype]) result = np.empty(x.size, dtype=dtype) mask = notnull(x) & notnull(y) result[mask] = op(x[mask], _values_from_object(y[mask])) @@ -670,7 +670,7 @@ def na_op(x, y): "{op}".format(typ=type(x).__name__, op=str_rep)) - result, changed = _maybe_upcast_putmask(result, ~mask, np.nan) + result, changed = maybe_upcast_putmask(result, ~mask, np.nan) result = missing.fill_zeros(result, x, y, name, fill_zeros) return result @@ -1204,7 +1204,7 @@ def na_op(x, y): "objects of type {x} and {y}".format( op=name, x=type(x), y=type(y))) - result, changed = _maybe_upcast_putmask(result, ~mask, np.nan) + result, changed = maybe_upcast_putmask(result, ~mask, np.nan) result = result.reshape(x.shape) result = missing.fill_zeros(result, x, y, name, fill_zeros) @@ -1329,7 +1329,7 @@ def na_op(x, y): result = np.empty(len(x), dtype=x.dtype) mask = notnull(x) result[mask] = op(x[mask], y) - result, changed = _maybe_upcast_putmask(result, ~mask, np.nan) + result, changed = maybe_upcast_putmask(result, ~mask, np.nan) result = missing.fill_zeros(result, x, y, name, fill_zeros) return result diff --git a/pandas/core/panel.py b/pandas/core/panel.py index 5c7b66a2d1356..50ddc24ac9656 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -6,8 +6,8 @@ import numpy as np -from pandas.types.cast import (_infer_dtype_from_scalar, - _possibly_cast_item) +from pandas.types.cast import (infer_dtype_from_scalar, + maybe_cast_item) from pandas.types.common import (is_integer, is_list_like, is_string_like, is_scalar) from pandas.types.missing import notnull @@ -165,7 +165,7 @@ def _init_data(self, data, copy, dtype, **kwargs): dtype = None elif is_scalar(data) and all(x is not None for x in passed_axes): if dtype is None: - dtype, data = _infer_dtype_from_scalar(data) + dtype, data = infer_dtype_from_scalar(data) values = np.empty([len(x) for x in passed_axes], dtype=dtype) values.fill(data) mgr = self._init_matrix(values, passed_axes, dtype=dtype, @@ -533,11 +533,11 @@ def set_value(self, *args, **kwargs): d = self._construct_axes_dict_from(self, axes, copy=False) result = self.reindex(**d) args = list(args) - likely_dtype, args[-1] = _infer_dtype_from_scalar(args[-1]) + likely_dtype, args[-1] = infer_dtype_from_scalar(args[-1]) made_bigger = not np.array_equal(axes[0], self._info_axis) # how to make this logic simpler? if made_bigger: - _possibly_cast_item(result, args[0], likely_dtype) + maybe_cast_item(result, args[0], likely_dtype) return result.set_value(*args) @@ -568,7 +568,7 @@ def __setitem__(self, key, value): shape[1:], tuple(map(int, value.shape)))) mat = np.asarray(value) elif is_scalar(value): - dtype, value = _infer_dtype_from_scalar(value) + dtype, value = infer_dtype_from_scalar(value) mat = np.empty(shape[1:], dtype=dtype) mat.fill(value) else: diff --git a/pandas/core/reshape.py b/pandas/core/reshape.py index 1e685ae6895ad..2822d98b7c906 100644 --- a/pandas/core/reshape.py +++ b/pandas/core/reshape.py @@ -10,7 +10,7 @@ from pandas.types.common import (_ensure_platform_int, is_list_like, is_bool_dtype, needs_i8_conversion) -from pandas.types.cast import _maybe_promote +from pandas.types.cast import maybe_promote from pandas.types.missing import notnull import pandas.types.concat as _concat @@ -202,7 +202,7 @@ def get_new_values(self): dtype = values.dtype new_values = np.empty(result_shape, dtype=dtype) else: - dtype, fill_value = _maybe_promote(values.dtype, self.fill_value) + dtype, fill_value = maybe_promote(values.dtype, self.fill_value) new_values = np.empty(result_shape, dtype=dtype) new_values.fill(fill_value) diff --git a/pandas/core/series.py b/pandas/core/series.py index 4c51ced1845fe..0913592e055cd 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -29,9 +29,9 @@ _is_unorderable_exception, _ensure_platform_int) from pandas.types.generic import ABCSparseArray, ABCDataFrame -from pandas.types.cast import (_maybe_upcast, _infer_dtype_from_scalar, - _possibly_convert_platform, - _possibly_cast_to_datetime, _possibly_castable) +from pandas.types.cast import (maybe_upcast, infer_dtype_from_scalar, + maybe_convert_platform, + maybe_cast_to_datetime, maybe_castable) from pandas.types.missing import isnull, notnull from pandas.core.common import (is_bool_indexer, @@ -2794,7 +2794,7 @@ def _sanitize_array(data, index, dtype=None, copy=False, if isinstance(data, ma.MaskedArray): mask = ma.getmaskarray(data) if mask.any(): - data, fill_value = _maybe_upcast(data, copy=True) + data, fill_value = maybe_upcast(data, copy=True) data[mask] = fill_value else: data = data.copy() @@ -2803,11 +2803,11 @@ def _try_cast(arr, take_fast_path): # perf shortcut as this is the most common case if take_fast_path: - if _possibly_castable(arr) and not copy and dtype is None: + if maybe_castable(arr) and not copy and dtype is None: return arr try: - subarr = _possibly_cast_to_datetime(arr, dtype) + subarr = maybe_cast_to_datetime(arr, dtype) if not is_extension_type(subarr): subarr = np.array(subarr, dtype=dtype, copy=copy) except (ValueError, TypeError): @@ -2863,9 +2863,9 @@ def _try_cast(arr, take_fast_path): subarr = lib.maybe_convert_objects(subarr) else: - subarr = _possibly_convert_platform(data) + subarr = maybe_convert_platform(data) - subarr = _possibly_cast_to_datetime(subarr, dtype) + subarr = maybe_cast_to_datetime(subarr, dtype) else: subarr = _try_cast(data, False) @@ -2894,10 +2894,10 @@ def create_from_value(value, index, dtype): # figure out the dtype from the value (upcast if necessary) if dtype is None: - dtype, value = _infer_dtype_from_scalar(value) + dtype, value = infer_dtype_from_scalar(value) else: # need to possibly convert the value here - value = _possibly_cast_to_datetime(value, dtype) + value = maybe_cast_to_datetime(value, dtype) subarr = create_from_value(value, index, dtype) diff --git a/pandas/indexes/base.py b/pandas/indexes/base.py index d262ecd818f1d..54f73a2466286 100644 --- a/pandas/indexes/base.py +++ b/pandas/indexes/base.py @@ -2445,7 +2445,7 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None): if tolerance is not None: tolerance = self._convert_tolerance(tolerance) - pself, ptarget = self._possibly_promote(target) + pself, ptarget = self._maybe_promote(target) if pself is not self or ptarget is not target: return pself.get_indexer(ptarget, method=method, limit=limit, tolerance=tolerance) @@ -2572,7 +2572,7 @@ def _filter_indexer_tolerance(self, target, indexer, tolerance): @Appender(_index_shared_docs['get_indexer_non_unique'] % _index_doc_kwargs) def get_indexer_non_unique(self, target): target = _ensure_index(target) - pself, ptarget = self._possibly_promote(target) + pself, ptarget = self._maybe_promote(target) if pself is not self or ptarget is not target: return pself.get_indexer_non_unique(ptarget) @@ -2595,7 +2595,7 @@ def get_indexer_for(self, target, **kwargs): indexer, _ = self.get_indexer_non_unique(target, **kwargs) return indexer - def _possibly_promote(self, other): + def _maybe_promote(self, other): # A hack, but it works from pandas.tseries.index import DatetimeIndex if self.inferred_type == 'date' and isinstance(other, DatetimeIndex): diff --git a/pandas/indexes/frozen.py b/pandas/indexes/frozen.py index e043ba64bbad7..97a1a3ea99e65 100644 --- a/pandas/indexes/frozen.py +++ b/pandas/indexes/frozen.py @@ -10,7 +10,7 @@ import numpy as np from pandas.core.base import PandasObject -from pandas.types.cast import _coerce_indexer_dtype +from pandas.types.cast import coerce_indexer_dtype from pandas.formats.printing import pprint_thing @@ -119,7 +119,7 @@ def __unicode__(self): def _ensure_frozen(array_like, categories, copy=False): - array_like = _coerce_indexer_dtype(array_like, categories) + array_like = coerce_indexer_dtype(array_like, categories) array_like = array_like.view(FrozenNDArray) if copy: array_like = array_like.copy() diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index af57cc3ce7950..f7b2d75c19304 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -21,7 +21,7 @@ is_object_dtype, is_string_dtype, is_scalar, is_categorical_dtype) from pandas.types.missing import isnull -from pandas.types.cast import _astype_nansafe +from pandas.types.cast import astype_nansafe from pandas.core.index import Index, MultiIndex, RangeIndex from pandas.core.series import Series from pandas.core.frame import DataFrame @@ -1498,11 +1498,11 @@ def _cast_types(self, values, cast_type, column): # c-parser which parses all categories # as strings if not is_object_dtype(values): - values = _astype_nansafe(values, str) + values = astype_nansafe(values, str) values = Categorical(values) else: try: - values = _astype_nansafe(values, cast_type, copy=True) + values = astype_nansafe(values, cast_type, copy=True) except ValueError: raise ValueError("Unable to convert column %s to " "type %s" % (column, cast_type)) diff --git a/pandas/sparse/array.py b/pandas/sparse/array.py index 5f4c07971d37e..f149e724c19c3 100644 --- a/pandas/sparse/array.py +++ b/pandas/sparse/array.py @@ -22,8 +22,8 @@ is_list_like, is_string_dtype, is_scalar, is_dtype_equal) -from pandas.types.cast import (_possibly_convert_platform, _maybe_promote, - _astype_nansafe, _find_common_type) +from pandas.types.cast import (maybe_convert_platform, maybe_promote, + astype_nansafe, find_common_type) from pandas.types.missing import isnull, notnull, na_value_for_dtype from pandas.sparse import libsparse as splib @@ -93,7 +93,7 @@ def _sparse_array_op(left, right, op, name, series=False): # dtype used to find corresponding sparse method if not is_dtype_equal(left.dtype, right.dtype): - dtype = _find_common_type([left.dtype, right.dtype]) + dtype = find_common_type([left.dtype, right.dtype]) left = left.astype(dtype) right = right.astype(dtype) else: @@ -370,7 +370,7 @@ def fill_value(self, value): if not is_scalar(value): raise ValueError('fill_value must be a scalar') # if the specified value triggers type promotion, raise ValueError - new_dtype, fill_value = _maybe_promote(self.dtype, value) + new_dtype, fill_value = maybe_promote(self.dtype, value) if is_dtype_equal(self.dtype, new_dtype): self._fill_value = fill_value else: @@ -532,7 +532,7 @@ def __setslice__(self, i, j, value): def astype(self, dtype=None, copy=True): dtype = np.dtype(dtype) - sp_values = _astype_nansafe(self.sp_values, dtype, copy=copy) + sp_values = astype_nansafe(self.sp_values, dtype, copy=copy) try: if is_bool_dtype(dtype): # to avoid np.bool_ dtype @@ -736,7 +736,7 @@ def _sanitize_values(arr): pass elif is_list_like(arr) and len(arr) > 0: - arr = _possibly_convert_platform(arr) + arr = maybe_convert_platform(arr) else: arr = np.asarray(arr) diff --git a/pandas/sparse/frame.py b/pandas/sparse/frame.py index a21f64f524a0a..41f301f263374 100644 --- a/pandas/sparse/frame.py +++ b/pandas/sparse/frame.py @@ -11,7 +11,7 @@ import numpy as np from pandas.types.missing import isnull, notnull -from pandas.types.cast import _maybe_upcast, _find_common_type +from pandas.types.cast import maybe_upcast, find_common_type from pandas.types.common import _ensure_platform_int, is_scipy_sparse from pandas.core.common import _try_sort @@ -250,7 +250,7 @@ def to_coo(self): except ImportError: raise ImportError('Scipy is not installed') - dtype = _find_common_type(self.dtypes) + dtype = find_common_type(self.dtypes) cols, rows, datas = [], [], [] for col, name in enumerate(self): s = self[name] @@ -635,7 +635,7 @@ def _reindex_index(self, index, method, copy, level, fill_value=np.nan, new = new.values # convert integer to float if necessary. need to do a lot # more than that, handle boolean etc also - new, fill_value = _maybe_upcast(new, fill_value=fill_value) + new, fill_value = maybe_upcast(new, fill_value=fill_value) np.putmask(new, mask, fill_value) new_series[col] = new diff --git a/pandas/tests/types/test_cast.py b/pandas/tests/types/test_cast.py index d7b086daea1e3..dd4ea3bb02be9 100644 --- a/pandas/tests/types/test_cast.py +++ b/pandas/tests/types/test_cast.py @@ -9,33 +9,33 @@ import numpy as np from pandas import Timedelta, Timestamp, DatetimeIndex -from pandas.types.cast import (_possibly_downcast_to_dtype, - _possibly_convert_objects, - _infer_dtype_from_scalar, - _maybe_convert_string_to_object, - _maybe_convert_scalar, - _find_common_type) +from pandas.types.cast import (maybe_downcast_to_dtype, + maybe_convert_objects, + infer_dtype_from_scalar, + maybe_convert_string_to_object, + maybe_convert_scalar, + find_common_type) from pandas.types.dtypes import (CategoricalDtype, DatetimeTZDtype, PeriodDtype) from pandas.util import testing as tm -class TestPossiblyDowncast(tm.TestCase): +class TestMaybeDowncast(tm.TestCase): def test_downcast_conv(self): # test downcasting arr = np.array([8.5, 8.6, 8.7, 8.8, 8.9999999999995]) - result = _possibly_downcast_to_dtype(arr, 'infer') + result = maybe_downcast_to_dtype(arr, 'infer') assert (np.array_equal(result, arr)) arr = np.array([8., 8., 8., 8., 8.9999999999995]) - result = _possibly_downcast_to_dtype(arr, 'infer') + result = maybe_downcast_to_dtype(arr, 'infer') expected = np.array([8, 8, 8, 8, 9]) assert (np.array_equal(result, expected)) arr = np.array([8., 8., 8., 8., 9.0000000000005]) - result = _possibly_downcast_to_dtype(arr, 'infer') + result = maybe_downcast_to_dtype(arr, 'infer') expected = np.array([8, 8, 8, 8, 9]) assert (np.array_equal(result, expected)) @@ -44,41 +44,41 @@ def test_downcast_conv(self): expected = np.array([1, 2]) for dtype in [np.float64, object, np.int64]: arr = np.array([1.0, 2.0], dtype=dtype) - result = _possibly_downcast_to_dtype(arr, 'infer') + result = maybe_downcast_to_dtype(arr, 'infer') tm.assert_almost_equal(result, expected, check_dtype=False) for dtype in [np.float64, object]: expected = np.array([1.0, 2.0, np.nan], dtype=dtype) arr = np.array([1.0, 2.0, np.nan], dtype=dtype) - result = _possibly_downcast_to_dtype(arr, 'infer') + result = maybe_downcast_to_dtype(arr, 'infer') tm.assert_almost_equal(result, expected) # empties for dtype in [np.int32, np.float64, np.float32, np.bool_, np.int64, object]: arr = np.array([], dtype=dtype) - result = _possibly_downcast_to_dtype(arr, 'int64') + result = maybe_downcast_to_dtype(arr, 'int64') tm.assert_almost_equal(result, np.array([], dtype=np.int64)) assert result.dtype == np.int64 def test_datetimelikes_nan(self): arr = np.array([1, 2, np.nan]) exp = np.array([1, 2, np.datetime64('NaT')], dtype='datetime64[ns]') - res = _possibly_downcast_to_dtype(arr, 'datetime64[ns]') + res = maybe_downcast_to_dtype(arr, 'datetime64[ns]') tm.assert_numpy_array_equal(res, exp) exp = np.array([1, 2, np.timedelta64('NaT')], dtype='timedelta64[ns]') - res = _possibly_downcast_to_dtype(arr, 'timedelta64[ns]') + res = maybe_downcast_to_dtype(arr, 'timedelta64[ns]') tm.assert_numpy_array_equal(res, exp) def test_datetime_with_timezone(self): # GH 15426 ts = Timestamp("2016-01-01 12:00:00", tz='US/Pacific') exp = DatetimeIndex([ts, ts]) - res = _possibly_downcast_to_dtype(exp, exp.dtype) + res = maybe_downcast_to_dtype(exp, exp.dtype) tm.assert_index_equal(res, exp) - res = _possibly_downcast_to_dtype(exp.asi8, exp.dtype) + res = maybe_downcast_to_dtype(exp.asi8, exp.dtype) tm.assert_index_equal(res, exp) @@ -91,121 +91,121 @@ def test_infer_dtype_from_scalar(self): for dtypec in [np.uint8, np.int8, np.uint16, np.int16, np.uint32, np.int32, np.uint64, np.int64]: data = dtypec(12) - dtype, val = _infer_dtype_from_scalar(data) + dtype, val = infer_dtype_from_scalar(data) self.assertEqual(dtype, type(data)) data = 12 - dtype, val = _infer_dtype_from_scalar(data) + dtype, val = infer_dtype_from_scalar(data) self.assertEqual(dtype, np.int64) for dtypec in [np.float16, np.float32, np.float64]: data = dtypec(12) - dtype, val = _infer_dtype_from_scalar(data) + dtype, val = infer_dtype_from_scalar(data) self.assertEqual(dtype, dtypec) data = np.float(12) - dtype, val = _infer_dtype_from_scalar(data) + dtype, val = infer_dtype_from_scalar(data) self.assertEqual(dtype, np.float64) for data in [True, False]: - dtype, val = _infer_dtype_from_scalar(data) + dtype, val = infer_dtype_from_scalar(data) self.assertEqual(dtype, np.bool_) for data in [np.complex64(1), np.complex128(1)]: - dtype, val = _infer_dtype_from_scalar(data) + dtype, val = infer_dtype_from_scalar(data) self.assertEqual(dtype, np.complex_) import datetime for data in [np.datetime64(1, 'ns'), Timestamp(1), datetime.datetime(2000, 1, 1, 0, 0)]: - dtype, val = _infer_dtype_from_scalar(data) + dtype, val = infer_dtype_from_scalar(data) self.assertEqual(dtype, 'M8[ns]') for data in [np.timedelta64(1, 'ns'), Timedelta(1), datetime.timedelta(1)]: - dtype, val = _infer_dtype_from_scalar(data) + dtype, val = infer_dtype_from_scalar(data) self.assertEqual(dtype, 'm8[ns]') for data in [datetime.date(2000, 1, 1), Timestamp(1, tz='US/Eastern'), 'foo']: - dtype, val = _infer_dtype_from_scalar(data) + dtype, val = infer_dtype_from_scalar(data) self.assertEqual(dtype, np.object_) class TestMaybe(tm.TestCase): def test_maybe_convert_string_to_array(self): - result = _maybe_convert_string_to_object('x') + result = maybe_convert_string_to_object('x') tm.assert_numpy_array_equal(result, np.array(['x'], dtype=object)) self.assertTrue(result.dtype == object) - result = _maybe_convert_string_to_object(1) + result = maybe_convert_string_to_object(1) self.assertEqual(result, 1) arr = np.array(['x', 'y'], dtype=str) - result = _maybe_convert_string_to_object(arr) + result = maybe_convert_string_to_object(arr) tm.assert_numpy_array_equal(result, np.array(['x', 'y'], dtype=object)) self.assertTrue(result.dtype == object) # unicode arr = np.array(['x', 'y']).astype('U') - result = _maybe_convert_string_to_object(arr) + result = maybe_convert_string_to_object(arr) tm.assert_numpy_array_equal(result, np.array(['x', 'y'], dtype=object)) self.assertTrue(result.dtype == object) # object arr = np.array(['x', 2], dtype=object) - result = _maybe_convert_string_to_object(arr) + result = maybe_convert_string_to_object(arr) tm.assert_numpy_array_equal(result, np.array(['x', 2], dtype=object)) self.assertTrue(result.dtype == object) def test_maybe_convert_scalar(self): # pass thru - result = _maybe_convert_scalar('x') + result = maybe_convert_scalar('x') self.assertEqual(result, 'x') - result = _maybe_convert_scalar(np.array([1])) + result = maybe_convert_scalar(np.array([1])) self.assertEqual(result, np.array([1])) # leave scalar dtype - result = _maybe_convert_scalar(np.int64(1)) + result = maybe_convert_scalar(np.int64(1)) self.assertEqual(result, np.int64(1)) - result = _maybe_convert_scalar(np.int32(1)) + result = maybe_convert_scalar(np.int32(1)) self.assertEqual(result, np.int32(1)) - result = _maybe_convert_scalar(np.float32(1)) + result = maybe_convert_scalar(np.float32(1)) self.assertEqual(result, np.float32(1)) - result = _maybe_convert_scalar(np.int64(1)) + result = maybe_convert_scalar(np.int64(1)) self.assertEqual(result, np.float64(1)) # coerce - result = _maybe_convert_scalar(1) + result = maybe_convert_scalar(1) self.assertEqual(result, np.int64(1)) - result = _maybe_convert_scalar(1.0) + result = maybe_convert_scalar(1.0) self.assertEqual(result, np.float64(1)) - result = _maybe_convert_scalar(Timestamp('20130101')) + result = maybe_convert_scalar(Timestamp('20130101')) self.assertEqual(result, Timestamp('20130101').value) - result = _maybe_convert_scalar(datetime(2013, 1, 1)) + result = maybe_convert_scalar(datetime(2013, 1, 1)) self.assertEqual(result, Timestamp('20130101').value) - result = _maybe_convert_scalar(Timedelta('1 day 1 min')) + result = maybe_convert_scalar(Timedelta('1 day 1 min')) self.assertEqual(result, Timedelta('1 day 1 min').value) class TestConvert(tm.TestCase): - def test_possibly_convert_objects_copy(self): + def test_maybe_convert_objects_copy(self): values = np.array([1, 2]) - out = _possibly_convert_objects(values, copy=False) + out = maybe_convert_objects(values, copy=False) self.assertTrue(values is out) - out = _possibly_convert_objects(values, copy=True) + out = maybe_convert_objects(values, copy=True) self.assertTrue(values is not out) values = np.array(['apply', 'banana']) - out = _possibly_convert_objects(values, copy=False) + out = maybe_convert_objects(values, copy=False) self.assertTrue(values is out) - out = _possibly_convert_objects(values, copy=True) + out = maybe_convert_objects(values, copy=True) self.assertTrue(values is not out) @@ -267,34 +267,34 @@ def test_numpy_dtypes(self): ((np.dtype('datetime64[ns]'), np.int64), np.object) ) for src, common in testcases: - self.assertEqual(_find_common_type(src), common) + self.assertEqual(find_common_type(src), common) with tm.assertRaises(ValueError): # empty - _find_common_type([]) + find_common_type([]) def test_categorical_dtype(self): dtype = CategoricalDtype() - self.assertEqual(_find_common_type([dtype]), 'category') - self.assertEqual(_find_common_type([dtype, dtype]), 'category') - self.assertEqual(_find_common_type([np.object, dtype]), np.object) + self.assertEqual(find_common_type([dtype]), 'category') + self.assertEqual(find_common_type([dtype, dtype]), 'category') + self.assertEqual(find_common_type([np.object, dtype]), np.object) def test_datetimetz_dtype(self): dtype = DatetimeTZDtype(unit='ns', tz='US/Eastern') - self.assertEqual(_find_common_type([dtype, dtype]), + self.assertEqual(find_common_type([dtype, dtype]), 'datetime64[ns, US/Eastern]') for dtype2 in [DatetimeTZDtype(unit='ns', tz='Asia/Tokyo'), np.dtype('datetime64[ns]'), np.object, np.int64]: - self.assertEqual(_find_common_type([dtype, dtype2]), np.object) - self.assertEqual(_find_common_type([dtype2, dtype]), np.object) + self.assertEqual(find_common_type([dtype, dtype2]), np.object) + self.assertEqual(find_common_type([dtype2, dtype]), np.object) def test_period_dtype(self): dtype = PeriodDtype(freq='D') - self.assertEqual(_find_common_type([dtype, dtype]), 'period[D]') + self.assertEqual(find_common_type([dtype, dtype]), 'period[D]') for dtype2 in [DatetimeTZDtype(unit='ns', tz='Asia/Tokyo'), PeriodDtype(freq='2D'), PeriodDtype(freq='H'), np.dtype('datetime64[ns]'), np.object, np.int64]: - self.assertEqual(_find_common_type([dtype, dtype2]), np.object) - self.assertEqual(_find_common_type([dtype2, dtype]), np.object) + self.assertEqual(find_common_type([dtype, dtype2]), np.object) + self.assertEqual(find_common_type([dtype2, dtype]), np.object) diff --git a/pandas/tools/util.py b/pandas/tools/util.py index bf78a9dfb65cc..263d2f16a4216 100644 --- a/pandas/tools/util.py +++ b/pandas/tools/util.py @@ -9,7 +9,7 @@ is_decimal, is_scalar as isscalar) -from pandas.types.cast import _possibly_downcast_to_dtype +from pandas.types.cast import maybe_downcast_to_dtype import pandas as pd from pandas.compat import reduce @@ -226,8 +226,7 @@ def to_numeric(arg, errors='raise', downcast=None): # from smallest to largest for dtype in typecodes: if np.dtype(dtype).itemsize <= values.dtype.itemsize: - values = _possibly_downcast_to_dtype( - values, dtype) + values = maybe_downcast_to_dtype(values, dtype) # successful conversion if values.dtype == dtype: diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py index f80618ef34373..983c1a4cd9de9 100644 --- a/pandas/tseries/index.py +++ b/pandas/tseries/index.py @@ -1329,7 +1329,7 @@ def _partial_date_slice(self, reso, parsed, use_lhs=True, use_rhs=True): # try to find a the dates return (lhs_mask & rhs_mask).nonzero()[0] - def _possibly_promote(self, other): + def _maybe_promote(self, other): if other.inferred_type == 'date': other = DatetimeIndex(other) return self, other diff --git a/pandas/tseries/tdi.py b/pandas/tseries/tdi.py index f47d80a31b174..13d844bb6a399 100644 --- a/pandas/tseries/tdi.py +++ b/pandas/tseries/tdi.py @@ -623,7 +623,7 @@ def intersection(self, other): left_chunk = left.values[lslice] return self._shallow_copy(left_chunk) - def _possibly_promote(self, other): + def _maybe_promote(self, other): if other.inferred_type == 'timedelta': other = TimedeltaIndex(other) return self, other diff --git a/pandas/types/cast.py b/pandas/types/cast.py index 0e26cd085db5a..91c7d287d6d46 100644 --- a/pandas/types/cast.py +++ b/pandas/types/cast.py @@ -32,7 +32,7 @@ _int64_max = np.iinfo(np.int64).max -def _possibly_convert_platform(values): +def maybe_convert_platform(values): """ try to do platform conversion, allow ndarray or list here """ if isinstance(values, (list, tuple)): @@ -45,7 +45,7 @@ def _possibly_convert_platform(values): return values -def _possibly_downcast_to_dtype(result, dtype): +def maybe_downcast_to_dtype(result, dtype): """ try to cast to the specified dtype (e.g. convert back to bool/int or could be an astype of float64->float32 """ @@ -142,7 +142,7 @@ def trans(x): # noqa return result -def _maybe_upcast_putmask(result, mask, other): +def maybe_upcast_putmask(result, mask, other): """ A safe version of putmask that potentially upcasts the result @@ -193,7 +193,7 @@ def changeit(): # we are forced to change the dtype of the result as the input # isn't compatible - r, _ = _maybe_upcast(result, fill_value=other, copy=True) + r, _ = maybe_upcast(result, fill_value=other, copy=True) np.place(r, mask, other) return r, True @@ -203,7 +203,7 @@ def changeit(): # upcast (possibly), otherwise we DON't want to upcast (e.g. if we # have values, say integers, in the success portion then it's ok to not # upcast) - new_dtype, _ = _maybe_promote(result.dtype, other) + new_dtype, _ = maybe_promote(result.dtype, other) if new_dtype != result.dtype: # we have a scalar or len 0 ndarray @@ -227,7 +227,7 @@ def changeit(): return result, False -def _maybe_promote(dtype, fill_value=np.nan): +def maybe_promote(dtype, fill_value=np.nan): # if we passed an array here, determine the fill value by dtype if isinstance(fill_value, np.ndarray): @@ -312,7 +312,7 @@ def _maybe_promote(dtype, fill_value=np.nan): return dtype, fill_value -def _infer_dtype_from_scalar(val, pandas_dtype=False): +def infer_dtype_from_scalar(val, pandas_dtype=False): """ interpret the dtype from a scalar @@ -387,7 +387,7 @@ def _infer_dtype_from_scalar(val, pandas_dtype=False): return dtype, val -def _maybe_upcast(values, fill_value=np.nan, dtype=None, copy=False): +def maybe_upcast(values, fill_value=np.nan, dtype=None, copy=False): """ provide explict type promotion and coercion Parameters @@ -404,7 +404,7 @@ def _maybe_upcast(values, fill_value=np.nan, dtype=None, copy=False): else: if dtype is None: dtype = values.dtype - new_dtype, fill_value = _maybe_promote(dtype, fill_value) + new_dtype, fill_value = maybe_promote(dtype, fill_value) if new_dtype != values.dtype: values = values.astype(new_dtype) elif copy: @@ -413,7 +413,7 @@ def _maybe_upcast(values, fill_value=np.nan, dtype=None, copy=False): return values, fill_value -def _possibly_cast_item(obj, item, dtype): +def maybe_cast_item(obj, item, dtype): chunk = obj[item] if chunk.values.dtype != dtype: @@ -423,7 +423,7 @@ def _possibly_cast_item(obj, item, dtype): raise ValueError("Unexpected dtype encountered: %s" % dtype) -def _invalidate_string_dtypes(dtype_set): +def invalidate_string_dtypes(dtype_set): """Change string like dtypes to object for ``DataFrame.select_dtypes()``. """ @@ -432,7 +432,7 @@ def _invalidate_string_dtypes(dtype_set): raise TypeError("string dtypes are not allowed, use 'object' instead") -def _maybe_convert_string_to_object(values): +def maybe_convert_string_to_object(values): """ Convert string-like and string-like array to convert object dtype. @@ -446,13 +446,13 @@ def _maybe_convert_string_to_object(values): return values -def _maybe_convert_scalar(values): +def maybe_convert_scalar(values): """ Convert a python scalar to the appropriate numpy dtype if possible This avoids numpy directly converting according to platform preferences """ if is_scalar(values): - dtype, values = _infer_dtype_from_scalar(values) + dtype, values = infer_dtype_from_scalar(values) try: values = dtype(values) except TypeError: @@ -460,7 +460,7 @@ def _maybe_convert_scalar(values): return values -def _coerce_indexer_dtype(indexer, categories): +def coerce_indexer_dtype(indexer, categories): """ coerce the indexer input array to the smallest dtype possible """ l = len(categories) if l < _int8_max: @@ -472,7 +472,7 @@ def _coerce_indexer_dtype(indexer, categories): return _ensure_int64(indexer) -def _coerce_to_dtypes(result, dtypes): +def coerce_to_dtypes(result, dtypes): """ given a dtypes and a result set, coerce the result elements to the dtypes @@ -507,7 +507,7 @@ def conv(r, dtype): return [conv(r, dtype) for r, dtype in zip(result, dtypes)] -def _astype_nansafe(arr, dtype, copy=True): +def astype_nansafe(arr, dtype, copy=True): """ return a view if copy is False, but need to be very careful as the result shape could change! """ if not isinstance(dtype, np.dtype): @@ -564,8 +564,8 @@ def _astype_nansafe(arr, dtype, copy=True): return arr.view(dtype) -def _possibly_convert_objects(values, convert_dates=True, convert_numeric=True, - convert_timedeltas=True, copy=True): +def maybe_convert_objects(values, convert_dates=True, convert_numeric=True, + convert_timedeltas=True, copy=True): """ if we have an object dtype, try to coerce dates and/or numbers """ # if we have passed in a list or scalar @@ -579,8 +579,8 @@ def _possibly_convert_objects(values, convert_dates=True, convert_numeric=True, # we take an aggressive stance and convert to datetime64[ns] if convert_dates == 'coerce': - new_values = _possibly_cast_to_datetime(values, 'M8[ns]', - errors='coerce') + new_values = maybe_cast_to_datetime( + values, 'M8[ns]', errors='coerce') # if we are all nans then leave me alone if not isnull(new_values).all(): @@ -627,8 +627,8 @@ def _possibly_convert_objects(values, convert_dates=True, convert_numeric=True, return values -def _soft_convert_objects(values, datetime=True, numeric=True, timedelta=True, - coerce=False, copy=True): +def soft_convert_objects(values, datetime=True, numeric=True, timedelta=True, + coerce=False, copy=True): """ if we have an object dtype, try to coerce dates and/or numbers """ conversion_count = sum((datetime, numeric, timedelta)) @@ -683,7 +683,7 @@ def _soft_convert_objects(values, datetime=True, numeric=True, timedelta=True, return values -def _possibly_castable(arr): +def maybe_castable(arr): # return False to force a non-fastpath # check datetime64[ns]/timedelta64[ns] are valid @@ -695,7 +695,7 @@ def _possibly_castable(arr): return arr.dtype.name not in _POSSIBLY_CAST_DTYPES -def _possibly_infer_to_datetimelike(value, convert_dates=False): +def maybe_infer_to_datetimelike(value, convert_dates=False): """ we might have a array (or single object) that is datetime like, and no dtype is passed don't change the value unless we find a @@ -788,7 +788,7 @@ def _try_timedelta(v): return value -def _possibly_cast_to_datetime(value, dtype, errors='raise'): +def maybe_cast_to_datetime(value, dtype, errors='raise'): """ try to cast the array/value to a datetimelike dtype, converting float nan to iNaT """ @@ -886,12 +886,12 @@ def _possibly_cast_to_datetime(value, dtype, errors='raise'): # conversion elif not (is_array and not (issubclass(value.dtype.type, np.integer) or value.dtype == np.object_)): - value = _possibly_infer_to_datetimelike(value) + value = maybe_infer_to_datetimelike(value) return value -def _find_common_type(types): +def find_common_type(types): """ Find a common data type among the given dtypes. From fb7af6e257d5ca162487ea417eae675e3edbe271 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 22 Mar 2017 07:58:28 -0400 Subject: [PATCH 257/933] CLN: move groupby algos separate cython lib - separate out groupby algorithms to separate lib - release GIL on median - release GIL on is_lexsorted / fix memory leak - release GIL on nancorr Author: Jeff Reback Closes #15775 from jreback/groupby and squashes the following commits: 4e2bfec [Jeff Reback] release GIL on median release GIL on is_lexsorted / fix memory leak release GIL on nancorr ce28bb5 [Jeff Reback] CLN: separate out groupby algorithms to separate lib --- pandas/_libs/algos.pxd | 13 + pandas/_libs/algos.pyx | 530 +++++------------- pandas/_libs/groupby.pyx | 291 ++++++++++ ...by_helper.pxi.in => groupby_helper.pxi.in} | 18 +- pandas/core/groupby.py | 10 +- pandas/tests/groupby/test_bin_groupby.py | 5 +- pandas/tests/groupby/test_transform.py | 14 +- pandas/tests/test_algos.py | 7 +- setup.py | 8 +- 9 files changed, 474 insertions(+), 422 deletions(-) create mode 100644 pandas/_libs/algos.pxd create mode 100644 pandas/_libs/groupby.pyx rename pandas/_libs/{algos_groupby_helper.pxi.in => groupby_helper.pxi.in} (98%) diff --git a/pandas/_libs/algos.pxd b/pandas/_libs/algos.pxd new file mode 100644 index 0000000000000..6d80e6f0073eb --- /dev/null +++ b/pandas/_libs/algos.pxd @@ -0,0 +1,13 @@ +from util cimport numeric +from numpy cimport float64_t, double_t + +cpdef numeric kth_smallest(numeric[:] a, Py_ssize_t k) nogil + +cdef inline Py_ssize_t swap(numeric *a, numeric *b) nogil: + cdef numeric t + + # cython doesn't allow pointer dereference so use array syntax + t = a[0] + a[0] = b[0] + b[0] = t + return 0 diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx index 7d3ce3280ec1e..897a60e0c2f21 100644 --- a/pandas/_libs/algos.pyx +++ b/pandas/_libs/algos.pyx @@ -96,22 +96,94 @@ class NegInfinity(object): __ge__ = lambda self, other: self is other -cdef inline Py_ssize_t swap(numeric *a, numeric *b) nogil except -1: - cdef numeric t +@cython.wraparound(False) +@cython.boundscheck(False) +def is_lexsorted(list list_of_arrays): + cdef: + int i + Py_ssize_t n, nlevels + int64_t k, cur, pre + ndarray arr + bint result = True + + nlevels = len(list_of_arrays) + n = len(list_of_arrays[0]) + + cdef int64_t **vecs = malloc(nlevels * sizeof(int64_t*)) + for i in range(nlevels): + arr = list_of_arrays[i] + vecs[i] = arr.data + + # Assume uniqueness?? + with nogil: + for i in range(n): + for k in range(nlevels): + cur = vecs[k][i] + pre = vecs[k][i -1] + if cur == pre: + continue + elif cur > pre: + break + else: + result = False + break + free(vecs) + return result + + +@cython.boundscheck(False) +@cython.wraparound(False) +def groupsort_indexer(ndarray[int64_t] index, Py_ssize_t ngroups): + """ + compute a 1-d indexer that is an ordering of the passed index, + ordered by the groups. This is a reverse of the label + factorization process. + + Parameters + ---------- + index: int64 ndarray + mappings from group -> position + ngroups: int64 + number of groups + + return a tuple of (1-d indexer ordered by groups, group counts) + """ + + cdef: + Py_ssize_t i, loc, label, n + ndarray[int64_t] counts, where, result + + counts = np.zeros(ngroups + 1, dtype=np.int64) + n = len(index) + result = np.zeros(n, dtype=np.int64) + where = np.zeros(ngroups + 1, dtype=np.int64) + + with nogil: + + # count group sizes, location 0 for NA + for i in range(n): + counts[index[i] + 1] += 1 - # cython doesn't allow pointer dereference so use array syntax - t = a[0] - a[0] = b[0] - b[0] = t - return 0 + # mark the start of each contiguous group of like-indexed data + for i in range(1, ngroups + 1): + where[i] = where[i - 1] + counts[i - 1] + + # this is our indexer + for i in range(n): + label = index[i] + 1 + result[where[label]] = i + where[label] += 1 + + return result, counts @cython.boundscheck(False) @cython.wraparound(False) -cpdef numeric kth_smallest(numeric[:] a, Py_ssize_t k): +cpdef numeric kth_smallest(numeric[:] a, Py_ssize_t k) nogil: cdef: - Py_ssize_t i, j, l, m, n = a.size + Py_ssize_t i, j, l, m, n = a.shape[0] numeric x + with nogil: l = 0 m = n - 1 @@ -132,32 +204,6 @@ cpdef numeric kth_smallest(numeric[:] a, Py_ssize_t k): if j < k: l = i if k < i: m = j - return a[k] - - -cdef inline kth_smallest_c(float64_t* a, Py_ssize_t k, Py_ssize_t n): - cdef: - Py_ssize_t i, j, l, m - double_t x, t - - l = 0 - m = n -1 - while (l j: break - - if j < k: l = i - if k < i: m = j return a[k] @@ -181,6 +227,8 @@ cpdef numeric median(numeric[:] arr): # -------------- Min, Max subsequence +@cython.boundscheck(False) +@cython.wraparound(False) def max_subseq(ndarray[double_t] arr): cdef: Py_ssize_t i=0, s=0, e=0, T, n @@ -195,21 +243,24 @@ def max_subseq(ndarray[double_t] arr): S = m T = 0 - for i in range(1, n): - # S = max { S + A[i], A[i] ) - if (S > 0): - S = S + arr[i] - else: - S = arr[i] - T = i - if S > m: - s = T - e = i - m = S + with nogil: + for i in range(1, n): + # S = max { S + A[i], A[i] ) + if (S > 0): + S = S + arr[i] + else: + S = arr[i] + T = i + if S > m: + s = T + e = i + m = S return (s, e, m) +@cython.boundscheck(False) +@cython.wraparound(False) def min_subseq(ndarray[double_t] arr): cdef: Py_ssize_t s, e @@ -225,9 +276,10 @@ def min_subseq(ndarray[double_t] arr): @cython.boundscheck(False) @cython.wraparound(False) -def nancorr(ndarray[float64_t, ndim=2] mat, cov=False, minp=None): +def nancorr(ndarray[float64_t, ndim=2] mat, bint cov=0, minp=None): cdef: Py_ssize_t i, j, xi, yi, N, K + bint minpv ndarray[float64_t, ndim=2] result ndarray[uint8_t, ndim=2] mask int64_t nobs = 0 @@ -236,46 +288,49 @@ def nancorr(ndarray[float64_t, ndim=2] mat, cov=False, minp=None): N, K = ( mat).shape if minp is None: - minp = 1 + minpv = 1 + else: + minpv = minp result = np.empty((K, K), dtype=np.float64) mask = np.isfinite(mat).view(np.uint8) - for xi in range(K): - for yi in range(xi + 1): - nobs = sumxx = sumyy = sumx = sumy = 0 - for i in range(N): - if mask[i, xi] and mask[i, yi]: - vx = mat[i, xi] - vy = mat[i, yi] - nobs += 1 - sumx += vx - sumy += vy - - if nobs < minp: - result[xi, yi] = result[yi, xi] = np.NaN - else: - meanx = sumx / nobs - meany = sumy / nobs - - # now the cov numerator - sumx = 0 - + with nogil: + for xi in range(K): + for yi in range(xi + 1): + nobs = sumxx = sumyy = sumx = sumy = 0 for i in range(N): if mask[i, xi] and mask[i, yi]: - vx = mat[i, xi] - meanx - vy = mat[i, yi] - meany + vx = mat[i, xi] + vy = mat[i, yi] + nobs += 1 + sumx += vx + sumy += vy + + if nobs < minpv: + result[xi, yi] = result[yi, xi] = NaN + else: + meanx = sumx / nobs + meany = sumy / nobs - sumx += vx * vy - sumxx += vx * vx - sumyy += vy * vy + # now the cov numerator + sumx = 0 - divisor = (nobs - 1.0) if cov else sqrt(sumxx * sumyy) + for i in range(N): + if mask[i, xi] and mask[i, yi]: + vx = mat[i, xi] - meanx + vy = mat[i, yi] - meany - if divisor != 0: - result[xi, yi] = result[yi, xi] = sumx / divisor - else: - result[xi, yi] = result[yi, xi] = np.NaN + sumx += vx * vy + sumxx += vx * vx + sumyy += vy * vy + + divisor = (nobs - 1.0) if cov else sqrt(sumxx * sumyy) + + if divisor != 0: + result[xi, yi] = result[yi, xi] = sumx / divisor + else: + result[xi, yi] = result[yi, xi] = NaN return result @@ -308,7 +363,7 @@ def nancorr_spearman(ndarray[float64_t, ndim=2] mat, Py_ssize_t minp=1): nobs += 1 if nobs < minp: - result[xi, yi] = result[yi, xi] = np.NaN + result[xi, yi] = result[yi, xi] = NaN else: maskedx = np.empty(nobs, dtype=np.float64) maskedy = np.empty(nobs, dtype=np.float64) @@ -339,326 +394,11 @@ def nancorr_spearman(ndarray[float64_t, ndim=2] mat, Py_ssize_t minp=1): if divisor != 0: result[xi, yi] = result[yi, xi] = sumx / divisor else: - result[xi, yi] = result[yi, xi] = np.NaN + result[xi, yi] = result[yi, xi] = NaN return result -#---------------------------------------------------------------------- -# group operations - - -@cython.wraparound(False) -@cython.boundscheck(False) -def is_lexsorted(list list_of_arrays): - cdef: - int i - Py_ssize_t n, nlevels - int64_t k, cur, pre - ndarray arr - - nlevels = len(list_of_arrays) - n = len(list_of_arrays[0]) - - cdef int64_t **vecs = malloc(nlevels * sizeof(int64_t*)) - for i from 0 <= i < nlevels: - arr = list_of_arrays[i] - vecs[i] = arr.data - - # Assume uniqueness?? - for i from 1 <= i < n: - for k from 0 <= k < nlevels: - cur = vecs[k][i] - pre = vecs[k][i -1] - if cur == pre: - continue - elif cur > pre: - break - else: - return False - free(vecs) - return True - - -@cython.boundscheck(False) -@cython.wraparound(False) -def groupsort_indexer(ndarray[int64_t] index, Py_ssize_t ngroups): - """ - compute a 1-d indexer that is an ordering of the passed index, - ordered by the groups. This is a reverse of the label - factorization process. - - Parameters - ---------- - index: int64 ndarray - mappings from group -> position - ngroups: int64 - number of groups - - return a tuple of (1-d indexer ordered by groups, group counts) - """ - - cdef: - Py_ssize_t i, loc, label, n - ndarray[int64_t] counts, where, result - - counts = np.zeros(ngroups + 1, dtype=np.int64) - n = len(index) - result = np.zeros(n, dtype=np.int64) - where = np.zeros(ngroups + 1, dtype=np.int64) - - with nogil: - - # count group sizes, location 0 for NA - for i from 0 <= i < n: - counts[index[i] + 1] += 1 - - # mark the start of each contiguous group of like-indexed data - for i from 1 <= i < ngroups + 1: - where[i] = where[i - 1] + counts[i - 1] - - # this is our indexer - for i from 0 <= i < n: - label = index[i] + 1 - result[where[label]] = i - where[label] += 1 - - return result, counts - -# TODO: aggregate multiple columns in single pass -#---------------------------------------------------------------------- -# first, nth, last - - -@cython.boundscheck(False) -@cython.wraparound(False) -def group_nth_object(ndarray[object, ndim=2] out, - ndarray[int64_t] counts, - ndarray[object, ndim=2] values, - ndarray[int64_t] labels, - int64_t rank): - """ - Only aggregates on axis=0 - """ - cdef: - Py_ssize_t i, j, N, K, lab - object val - float64_t count - ndarray[int64_t, ndim=2] nobs - ndarray[object, ndim=2] resx - - nobs = np.zeros(( out).shape, dtype=np.int64) - resx = np.empty(( out).shape, dtype=object) - - N, K = ( values).shape - - for i in range(N): - lab = labels[i] - if lab < 0: - continue - - counts[lab] += 1 - for j in range(K): - val = values[i, j] - - # not nan - if val == val: - nobs[lab, j] += 1 - if nobs[lab, j] == rank: - resx[lab, j] = val - - for i in range(len(counts)): - for j in range(K): - if nobs[i, j] == 0: - out[i, j] = nan - else: - out[i, j] = resx[i, j] - - -@cython.boundscheck(False) -@cython.wraparound(False) -def group_nth_bin_object(ndarray[object, ndim=2] out, - ndarray[int64_t] counts, - ndarray[object, ndim=2] values, - ndarray[int64_t] bins, int64_t rank): - """ - Only aggregates on axis=0 - """ - cdef: - Py_ssize_t i, j, N, K, ngroups, b - object val - float64_t count - ndarray[object, ndim=2] resx - ndarray[float64_t, ndim=2] nobs - - nobs = np.zeros(( out).shape, dtype=np.float64) - resx = np.empty(( out).shape, dtype=object) - - if len(bins) == 0: - return - if bins[len(bins) - 1] == len(values): - ngroups = len(bins) - else: - ngroups = len(bins) + 1 - - N, K = ( values).shape - - b = 0 - for i in range(N): - while b < ngroups - 1 and i >= bins[b]: - b += 1 - - counts[b] += 1 - for j in range(K): - val = values[i, j] - - # not nan - if val == val: - nobs[b, j] += 1 - if nobs[b, j] == rank: - resx[b, j] = val - - for i in range(ngroups): - for j in range(K): - if nobs[i, j] == 0: - out[i, j] = nan - else: - out[i, j] = resx[i, j] - - -@cython.boundscheck(False) -@cython.wraparound(False) -def group_last_object(ndarray[object, ndim=2] out, - ndarray[int64_t] counts, - ndarray[object, ndim=2] values, - ndarray[int64_t] labels): - """ - Only aggregates on axis=0 - """ - cdef: - Py_ssize_t i, j, N, K, lab - object val - float64_t count - ndarray[object, ndim=2] resx - ndarray[int64_t, ndim=2] nobs - - nobs = np.zeros(( out).shape, dtype=np.int64) - resx = np.empty(( out).shape, dtype=object) - - N, K = ( values).shape - - for i in range(N): - lab = labels[i] - if lab < 0: - continue - - counts[lab] += 1 - for j in range(K): - val = values[i, j] - - # not nan - if val == val: - nobs[lab, j] += 1 - resx[lab, j] = val - - for i in range(len(counts)): - for j in range(K): - if nobs[i, j] == 0: - out[i, j] = nan - else: - out[i, j] = resx[i, j] - - -@cython.boundscheck(False) -@cython.wraparound(False) -def group_last_bin_object(ndarray[object, ndim=2] out, - ndarray[int64_t] counts, - ndarray[object, ndim=2] values, - ndarray[int64_t] bins): - """ - Only aggregates on axis=0 - """ - cdef: - Py_ssize_t i, j, N, K, ngroups, b - object val - float64_t count - ndarray[object, ndim=2] resx - ndarray[float64_t, ndim=2] nobs - - nobs = np.zeros(( out).shape, dtype=np.float64) - resx = np.empty(( out).shape, dtype=object) - - if len(bins) == 0: - return - if bins[len(bins) - 1] == len(values): - ngroups = len(bins) - else: - ngroups = len(bins) + 1 - - N, K = ( values).shape - - b = 0 - for i in range(N): - while b < ngroups - 1 and i >= bins[b]: - b += 1 - - counts[b] += 1 - for j in range(K): - val = values[i, j] - - # not nan - if val == val: - nobs[b, j] += 1 - resx[b, j] = val - - for i in range(ngroups): - for j in range(K): - if nobs[i, j] == 0: - out[i, j] = nan - else: - out[i, j] = resx[i, j] - -cdef inline float64_t _median_linear(float64_t* a, int n): - cdef int i, j, na_count = 0 - cdef float64_t result - cdef float64_t* tmp - - if n == 0: - return NaN - - # count NAs - for i in range(n): - if a[i] != a[i]: - na_count += 1 - - if na_count: - if na_count == n: - return NaN - - tmp = malloc((n - na_count) * sizeof(float64_t)) - - j = 0 - for i in range(n): - if a[i] == a[i]: - tmp[j] = a[i] - j += 1 - - a = tmp - n -= na_count - - if n % 2: - result = kth_smallest_c( a, n / 2, n) - else: - result = (kth_smallest_c(a, n / 2, n) + - kth_smallest_c(a, n / 2 - 1, n)) / 2 - - if na_count: - free(a) - - return result - - # generated from template include "algos_common_helper.pxi" -include "algos_groupby_helper.pxi" include "algos_rank_helper.pxi" include "algos_take_helper.pxi" diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx new file mode 100644 index 0000000000000..c6ff602cfef1c --- /dev/null +++ b/pandas/_libs/groupby.pyx @@ -0,0 +1,291 @@ +# cython: profile=False + +from numpy cimport * +cimport numpy as np +import numpy as np + +cimport cython + +import_array() + +cimport util + +from numpy cimport (int8_t, int16_t, int32_t, int64_t, uint8_t, uint16_t, + uint32_t, uint64_t, float16_t, float32_t, float64_t) + +from libc.stdlib cimport malloc, free + +from util cimport numeric, get_nat +from algos cimport swap +from algos import take_2d_axis1_float64_float64, groupsort_indexer + +cdef int64_t iNaT = get_nat() + +cdef double NaN = np.NaN +cdef double nan = NaN + + +# TODO: aggregate multiple columns in single pass +#---------------------------------------------------------------------- +# first, nth, last + + +@cython.boundscheck(False) +@cython.wraparound(False) +def group_nth_object(ndarray[object, ndim=2] out, + ndarray[int64_t] counts, + ndarray[object, ndim=2] values, + ndarray[int64_t] labels, + int64_t rank): + """ + Only aggregates on axis=0 + """ + cdef: + Py_ssize_t i, j, N, K, lab + object val + float64_t count + ndarray[int64_t, ndim=2] nobs + ndarray[object, ndim=2] resx + + nobs = np.zeros(( out).shape, dtype=np.int64) + resx = np.empty(( out).shape, dtype=object) + + N, K = ( values).shape + + for i in range(N): + lab = labels[i] + if lab < 0: + continue + + counts[lab] += 1 + for j in range(K): + val = values[i, j] + + # not nan + if val == val: + nobs[lab, j] += 1 + if nobs[lab, j] == rank: + resx[lab, j] = val + + for i in range(len(counts)): + for j in range(K): + if nobs[i, j] == 0: + out[i, j] = nan + else: + out[i, j] = resx[i, j] + + +@cython.boundscheck(False) +@cython.wraparound(False) +def group_nth_bin_object(ndarray[object, ndim=2] out, + ndarray[int64_t] counts, + ndarray[object, ndim=2] values, + ndarray[int64_t] bins, int64_t rank): + """ + Only aggregates on axis=0 + """ + cdef: + Py_ssize_t i, j, N, K, ngroups, b + object val + float64_t count + ndarray[object, ndim=2] resx + ndarray[float64_t, ndim=2] nobs + + nobs = np.zeros(( out).shape, dtype=np.float64) + resx = np.empty(( out).shape, dtype=object) + + if len(bins) == 0: + return + if bins[len(bins) - 1] == len(values): + ngroups = len(bins) + else: + ngroups = len(bins) + 1 + + N, K = ( values).shape + + b = 0 + for i in range(N): + while b < ngroups - 1 and i >= bins[b]: + b += 1 + + counts[b] += 1 + for j in range(K): + val = values[i, j] + + # not nan + if val == val: + nobs[b, j] += 1 + if nobs[b, j] == rank: + resx[b, j] = val + + for i in range(ngroups): + for j in range(K): + if nobs[i, j] == 0: + out[i, j] = nan + else: + out[i, j] = resx[i, j] + + +@cython.boundscheck(False) +@cython.wraparound(False) +def group_last_object(ndarray[object, ndim=2] out, + ndarray[int64_t] counts, + ndarray[object, ndim=2] values, + ndarray[int64_t] labels): + """ + Only aggregates on axis=0 + """ + cdef: + Py_ssize_t i, j, N, K, lab + object val + float64_t count + ndarray[object, ndim=2] resx + ndarray[int64_t, ndim=2] nobs + + nobs = np.zeros(( out).shape, dtype=np.int64) + resx = np.empty(( out).shape, dtype=object) + + N, K = ( values).shape + + for i in range(N): + lab = labels[i] + if lab < 0: + continue + + counts[lab] += 1 + for j in range(K): + val = values[i, j] + + # not nan + if val == val: + nobs[lab, j] += 1 + resx[lab, j] = val + + for i in range(len(counts)): + for j in range(K): + if nobs[i, j] == 0: + out[i, j] = nan + else: + out[i, j] = resx[i, j] + + +@cython.boundscheck(False) +@cython.wraparound(False) +def group_last_bin_object(ndarray[object, ndim=2] out, + ndarray[int64_t] counts, + ndarray[object, ndim=2] values, + ndarray[int64_t] bins): + """ + Only aggregates on axis=0 + """ + cdef: + Py_ssize_t i, j, N, K, ngroups, b + object val + float64_t count + ndarray[object, ndim=2] resx + ndarray[float64_t, ndim=2] nobs + + nobs = np.zeros(( out).shape, dtype=np.float64) + resx = np.empty(( out).shape, dtype=object) + + if len(bins) == 0: + return + if bins[len(bins) - 1] == len(values): + ngroups = len(bins) + else: + ngroups = len(bins) + 1 + + N, K = ( values).shape + + b = 0 + for i in range(N): + while b < ngroups - 1 and i >= bins[b]: + b += 1 + + counts[b] += 1 + for j in range(K): + val = values[i, j] + + # not nan + if val == val: + nobs[b, j] += 1 + resx[b, j] = val + + for i in range(ngroups): + for j in range(K): + if nobs[i, j] == 0: + out[i, j] = nan + else: + out[i, j] = resx[i, j] + + +cdef inline float64_t _median_linear(float64_t* a, int n) nogil: + cdef int i, j, na_count = 0 + cdef float64_t result + cdef float64_t* tmp + + if n == 0: + return NaN + + # count NAs + for i in range(n): + if a[i] != a[i]: + na_count += 1 + + if na_count: + if na_count == n: + return NaN + + tmp = malloc((n - na_count) * sizeof(float64_t)) + + j = 0 + for i in range(n): + if a[i] == a[i]: + tmp[j] = a[i] + j += 1 + + a = tmp + n -= na_count + + if n % 2: + result = kth_smallest_c( a, n / 2, n) + else: + result = (kth_smallest_c(a, n / 2, n) + + kth_smallest_c(a, n / 2 - 1, n)) / 2 + + if na_count: + free(a) + + return result + + +cdef inline float64_t kth_smallest_c(float64_t* a, + Py_ssize_t k, + Py_ssize_t n) nogil: + cdef: + Py_ssize_t i, j, l, m + double_t x, t + + l = 0 + m = n -1 + while (l j: break + + if j < k: l = i + if k < i: m = j + return a[k] + + +# generated from template +include "groupby_helper.pxi" diff --git a/pandas/_libs/algos_groupby_helper.pxi.in b/pandas/_libs/groupby_helper.pxi.in similarity index 98% rename from pandas/_libs/algos_groupby_helper.pxi.in rename to pandas/_libs/groupby_helper.pxi.in index e2c263f49b110..d38b677df321c 100644 --- a/pandas/_libs/algos_groupby_helper.pxi.in +++ b/pandas/_libs/groupby_helper.pxi.in @@ -681,6 +681,8 @@ def group_cummax_{{name}}(ndarray[{{dest_type2}}, ndim=2] out, #---------------------------------------------------------------------- +@cython.boundscheck(False) +@cython.wraparound(False) def group_median_float64(ndarray[float64_t, ndim=2] out, ndarray[int64_t] counts, ndarray[float64_t, ndim=2] values, @@ -704,13 +706,15 @@ def group_median_float64(ndarray[float64_t, ndim=2] out, take_2d_axis1_float64_float64(values.T, indexer, out=data) - for i in range(K): - # exclude NA group - ptr += _counts[0] - for j in range(ngroups): - size = _counts[j + 1] - out[j, i] = _median_linear(ptr, size) - ptr += size + with nogil: + + for i in range(K): + # exclude NA group + ptr += _counts[0] + for j in range(ngroups): + size = _counts[j + 1] + out[j, i] = _median_linear(ptr, size) + ptr += size @cython.boundscheck(False) diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 0a63981290df3..727af8b8cd3eb 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -60,7 +60,7 @@ import pandas.core.common as com from pandas.core.config import option_context -from pandas._libs import lib, algos as libalgos, Timestamp, NaT, iNaT +from pandas._libs import lib, groupby as libgroupby, Timestamp, NaT, iNaT from pandas._libs.lib import count_level_2d _doc_template = """ @@ -1474,7 +1474,7 @@ def shift(self, periods=1, freq=None, axis=0): # filled in by Cython indexer = np.zeros_like(labels) - libalgos.group_shift_indexer(indexer, labels, ngroups, periods) + libgroupby.group_shift_indexer(indexer, labels, ngroups, periods) output = {} for name, obj in self._iterate_slices(): @@ -1815,13 +1815,13 @@ def _get_cython_function(self, kind, how, values, is_numeric): def get_func(fname): # see if there is a fused-type version of function # only valid for numeric - f = getattr(libalgos, fname, None) + f = getattr(libgroupby, fname, None) if f is not None and is_numeric: return f # otherwise find dtype-specific version, falling back to object for dt in [dtype_str, 'object']: - f = getattr(libalgos, "%s_%s" % (fname, dtype_str), None) + f = getattr(libgroupby, "%s_%s" % (fname, dtype_str), None) if f is not None: return f @@ -3118,7 +3118,7 @@ def value_counts(self, normalize=False, sort=True, ascending=False, out = _ensure_int64(out) return Series(out, index=mi, name=self.name) - # for compat. with libalgos.value_counts need to ensure every + # for compat. with libgroupby.value_counts need to ensure every # bin is present at every index level, null filled with zeros diff = np.zeros(len(out), dtype='bool') for lab in labels[:-1]: diff --git a/pandas/tests/groupby/test_bin_groupby.py b/pandas/tests/groupby/test_bin_groupby.py index 77c5bde332cff..02c7933e020ea 100644 --- a/pandas/tests/groupby/test_bin_groupby.py +++ b/pandas/tests/groupby/test_bin_groupby.py @@ -7,8 +7,7 @@ from pandas import Index, isnull from pandas.util.testing import assert_almost_equal import pandas.util.testing as tm -import pandas._libs.lib as lib -import pandas._libs.algos as algos +from pandas._libs import lib, groupby def test_series_grouper(): @@ -92,7 +91,7 @@ def _check(dtype): labels = _ensure_int64(np.repeat(np.arange(3), np.diff(np.r_[0, bins]))) - func = getattr(algos, 'group_ohlc_%s' % dtype) + func = getattr(groupby, 'group_ohlc_%s' % dtype) func(out, counts, obj[:, None], labels) def _ohlc(group): diff --git a/pandas/tests/groupby/test_transform.py b/pandas/tests/groupby/test_transform.py index 4acf9dd4755f4..3b85fadda6cfe 100644 --- a/pandas/tests/groupby/test_transform.py +++ b/pandas/tests/groupby/test_transform.py @@ -6,7 +6,7 @@ from pandas import Series, DataFrame, Timestamp, MultiIndex, concat, date_range from pandas.types.common import _ensure_platform_int, is_timedelta64_dtype from pandas.compat import StringIO -from pandas._libs import algos +from pandas._libs import groupby from .common import MixIn, assert_fp_equal from pandas.util.testing import assert_frame_equal, assert_series_equal @@ -418,8 +418,8 @@ def test_cython_group_transform_algos(self): dtypes = [np.int8, np.int16, np.int32, np.int64, np.uint8, np.uint32, np.uint64, np.float32, np.float64] - ops = [(algos.group_cumprod_float64, np.cumproduct, [np.float64]), - (algos.group_cumsum, np.cumsum, dtypes)] + ops = [(groupby.group_cumprod_float64, np.cumproduct, [np.float64]), + (groupby.group_cumsum, np.cumsum, dtypes)] is_datetimelike = False for pd_op, np_op, dtypes in ops: @@ -437,13 +437,13 @@ def test_cython_group_transform_algos(self): data = np.array([[1], [2], [3], [np.nan], [4]], dtype='float64') actual = np.zeros_like(data) actual.fill(np.nan) - algos.group_cumprod_float64(actual, data, labels, is_datetimelike) + groupby.group_cumprod_float64(actual, data, labels, is_datetimelike) expected = np.array([1, 2, 6, np.nan, 24], dtype='float64') self.assert_numpy_array_equal(actual[:, 0], expected) actual = np.zeros_like(data) actual.fill(np.nan) - algos.group_cumsum(actual, data, labels, is_datetimelike) + groupby.group_cumsum(actual, data, labels, is_datetimelike) expected = np.array([1, 3, 6, np.nan, 10], dtype='float64') self.assert_numpy_array_equal(actual[:, 0], expected) @@ -451,8 +451,8 @@ def test_cython_group_transform_algos(self): is_datetimelike = True data = np.array([np.timedelta64(1, 'ns')] * 5, dtype='m8[ns]')[:, None] actual = np.zeros_like(data, dtype='int64') - algos.group_cumsum(actual, data.view('int64'), labels, - is_datetimelike) + groupby.group_cumsum(actual, data.view('int64'), labels, + is_datetimelike) expected = np.array([np.timedelta64(1, 'ns'), np.timedelta64( 2, 'ns'), np.timedelta64(3, 'ns'), np.timedelta64(4, 'ns'), np.timedelta64(5, 'ns')]) diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index ce925f756edb7..f8eac7a8911ad 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -10,7 +10,8 @@ import pandas as pd from pandas import compat -from pandas._libs import algos as libalgos, hashtable +from pandas._libs import (groupby as libgroupby, algos as libalgos, + hashtable) from pandas._libs.hashtable import unique_label_indices from pandas.compat import lrange import pandas.core.algorithms as algos @@ -891,7 +892,7 @@ def test_group_var_constant(self): class TestGroupVarFloat64(tm.TestCase, GroupVarTestMixin): __test__ = True - algo = algos.algos.group_var_float64 + algo = libgroupby.group_var_float64 dtype = np.float64 rtol = 1e-5 @@ -914,7 +915,7 @@ def test_group_var_large_inputs(self): class TestGroupVarFloat32(tm.TestCase, GroupVarTestMixin): __test__ = True - algo = algos.algos.group_var_float32 + algo = libgroupby.group_var_float32 dtype = np.float32 rtol = 1e-2 diff --git a/setup.py b/setup.py index 3e0a6b41152dc..8e690f05b818c 100755 --- a/setup.py +++ b/setup.py @@ -110,8 +110,9 @@ def is_platform_mac(): _pxi_dep_template = { - 'algos': ['_libs/algos_common_helper.pxi.in', '_libs/algos_groupby_helper.pxi.in', + 'algos': ['_libs/algos_common_helper.pxi.in', '_libs/algos_take_helper.pxi.in', '_libs/algos_rank_helper.pxi.in'], + 'groupby': ['_libs/groupby_helper.pxi.in'], 'join': ['_libs/join_helper.pxi.in', '_libs/join_func_helper.pxi.in'], 'reshape': ['_libs/reshape_helper.pxi.in'], 'hashtable': ['_libs/hashtable_class_helper.pxi.in', @@ -496,8 +497,11 @@ def pxd(name): 'pxdfiles': ['_libs/src/util', '_libs/hashtable'], 'depends': _pxi_dep['index']}, '_libs.algos': {'pyxfile': '_libs/algos', - 'pxdfiles': ['_libs/src/util', '_libs/hashtable'], + 'pxdfiles': ['_libs/src/util', '_libs/algos', '_libs/hashtable'], 'depends': _pxi_dep['algos']}, + '_libs.groupby': {'pyxfile': '_libs/groupby', + 'pxdfiles': ['_libs/src/util', '_libs/algos'], + 'depends': _pxi_dep['groupby']}, '_libs.join': {'pyxfile': '_libs/join', 'pxdfiles': ['_libs/src/util', '_libs/hashtable'], 'depends': _pxi_dep['join']}, From 79581ffe6fb73089dfa8394c2f4e44677acfe1ce Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 22 Mar 2017 09:56:58 -0400 Subject: [PATCH 258/933] travis deduping on prs closes #12438 Author: Jeff Reback Closes #15780 from jreback/dedupe and squashes the following commits: 64f217e [Jeff Reback] replace . by space b6f2a62 [Jeff Reback] formatting 0c33d9b [Jeff Reback] tests commit 24f6ae6 [Jeff Reback] CI: fast finish travis builds for the same PR --- .travis.yml | 5 ++- ci/install_travis.sh | 41 +++++++++++---------- ci/travis_fast_finish.py | 77 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 102 insertions(+), 21 deletions(-) create mode 100755 ci/travis_fast_finish.py diff --git a/.travis.yml b/.travis.yml index 67b37f1d58931..270f8c2fc76c3 100644 --- a/.travis.yml +++ b/.travis.yml @@ -177,15 +177,14 @@ matrix: - USE_CACHE=true before_install: + - echo "Checking to see if this build is outdated" + - ci/travis_fast_finish.py || { echo "Failing outdated build to end it."; exit 1; } - echo "before_install" - source ci/travis_process_gbq_encryption.sh - - echo $VIRTUAL_ENV - export PATH="$HOME/miniconda3/bin:$PATH" - df -h - - date - pwd - uname -a - - python -V - git --version - git tag - ci/before_install_travis.sh diff --git a/ci/install_travis.sh b/ci/install_travis.sh index c940083f5ae9e..66633c0592748 100755 --- a/ci/install_travis.sh +++ b/ci/install_travis.sh @@ -1,18 +1,6 @@ #!/bin/bash -# There are 2 distinct pieces that get zipped and cached -# - The venv site-packages dir including the installed dependencies -# - The pandas build artifacts, using the build cache support via -# scripts/use_build_cache.py -# -# if the user opted in to use the cache and we're on a whitelisted fork -# - if the server doesn't hold a cached version of venv/pandas build, -# do things the slow way, and put the results on the cache server -# for the next time. -# - if the cache files are available, instal some necessaries via apt -# (no compiling needed), then directly goto script and collect 200$. -# - +# edit the locale file if needed function edit_init() { if [ -n "$LOCALE_OVERRIDE" ]; then @@ -26,15 +14,18 @@ function edit_init() fi } +echo echo "[install_travis]" edit_init home_dir=$(pwd) -echo "[home_dir: $home_dir]" +echo +echo "[home_dir]: $home_dir" # install miniconda MINICONDA_DIR="$HOME/miniconda3" +echo echo "[Using clean Miniconda install]" if [ -d "$MINICONDA_DIR" ]; then @@ -49,14 +40,17 @@ else fi time bash miniconda.sh -b -p "$MINICONDA_DIR" || exit 1 +echo echo "[show conda]" which conda +echo echo "[update conda]" conda config --set ssl_verify false || exit 1 conda config --set always_yes true --set changeps1 false || exit 1 conda update -q conda +echo echo "[add channels]" # add the pandas channel to take priority # to add extra packages @@ -73,26 +67,28 @@ fi conda info -a || exit 1 # set the compiler cache to work +echo if [ "$USE_CACHE" ] && [ "${TRAVIS_OS_NAME}" == "linux" ]; then echo "[Using ccache]" export PATH=/usr/lib/ccache:/usr/lib64/ccache:$PATH gcc=$(which gcc) - echo "[gcc: $gcc]" + echo "[gcc]: $gcc" ccache=$(which ccache) - echo "[ccache: $ccache]" + echo "[ccache]: $ccache" export CC='ccache gcc' elif [ "$USE_CACHE" ] && [ "${TRAVIS_OS_NAME}" == "osx" ]; then echo "[Using ccache]" time brew install ccache export PATH=/usr/local/opt/ccache/libexec:$PATH gcc=$(which gcc) - echo "[gcc: $gcc]" + echo "[gcc]: $gcc" ccache=$(which ccache) - echo "[ccache: $ccache]" + echo "[ccache]: $ccache" else echo "[Not using ccache]" fi +echo echo "[create env]" # may have installation instructions for this build @@ -106,6 +102,7 @@ else fi # build deps +echo echo "[build installs]" REQ="ci/requirements-${PYTHON_VERSION}${JOB_TAG}.build" if [ -e ${REQ} ]; then @@ -113,6 +110,7 @@ if [ -e ${REQ} ]; then fi # may have addtl installation instructions for this build +echo echo "[build addtl installs]" REQ="ci/requirements-${PYTHON_VERSION}${JOB_TAG}.build.sh" if [ -e ${REQ} ]; then @@ -132,6 +130,7 @@ if [ "$COVERAGE" ]; then pip install coverage pytest-cov fi +echo if [ "$BUILD_TEST" ]; then # build & install testing @@ -151,6 +150,7 @@ else fi # we may have run installations +echo echo "[conda installs]" REQ="ci/requirements-${PYTHON_VERSION}${JOB_TAG}.run" if [ -e ${REQ} ]; then @@ -158,6 +158,7 @@ if [ -e ${REQ} ]; then fi # we may have additional pip installs +echo echo "[pip installs]" REQ="ci/requirements-${PYTHON_VERSION}${JOB_TAG}.pip" if [ -e ${REQ} ]; then @@ -165,6 +166,7 @@ if [ -e ${REQ} ]; then fi # may have addtl installation instructions for this build +echo echo "[addtl installs]" REQ="ci/requirements-${PYTHON_VERSION}${JOB_TAG}.sh" if [ -e ${REQ} ]; then @@ -176,14 +178,17 @@ if [ -z "$BUILD_TEST" ]; then # remove any installed pandas package # w/o removing anything else + echo echo "[removing installed pandas]" conda remove pandas --force # install our pandas + echo echo "[running setup.py develop]" python setup.py develop || exit 1 fi +echo echo "[done]" exit 0 diff --git a/ci/travis_fast_finish.py b/ci/travis_fast_finish.py new file mode 100755 index 0000000000000..c2e2a9159918b --- /dev/null +++ b/ci/travis_fast_finish.py @@ -0,0 +1,77 @@ +#!/usr/bin/env python + +# script to cancel previous travis builds for the same PR +# originally from +# https://github.com/conda-forge/staged-recipes/pull/2257 + +try: + from future_builtins import ( + map, + filter, + ) +except ImportError: + pass + +import codecs +import contextlib +import json +import os + +try: + from urllib.request import ( + Request, + urlopen, + ) +except ImportError: + from urllib2 import ( + Request, + urlopen, + ) + + +def check_latest_pr_build(repo, pr, build_num): + # Not a PR so it is latest. + if pr is None: + return True + + headers = { + "Accept": "application/vnd.travis-ci.2+json", + } + url = "https://api.travis-ci.org/repos/{repo}/builds?event_type=pull_request" + + request = Request(url.format(repo=repo), headers=headers) + with contextlib.closing(urlopen(request)) as response: + reader = codecs.getreader("utf-8") + data = json.load(reader(response)) + + # Parse the response to get a list of build numbers for this PR. + builds = data["builds"] + pr_builds = filter(lambda b: b["pull_request_number"] == pr, builds) + pr_build_nums = sorted(map(lambda b: int(b["number"]), pr_builds)) + + print("build_num: {}".format(build_num)) + print("pr_build_nums: {}".format(','.join([str(n) for n in pr_build_nums]))) + + # Check if our build number is the latest (largest) + # out of all of the builds for this PR. + if build_num < max(pr_build_nums): + return False + else: + return True + + +def main(): + repo = os.environ["TRAVIS_REPO_SLUG"] + + pr = os.environ["TRAVIS_PULL_REQUEST"] + pr = None if pr == "false" else int(pr) + build_num = int(os.environ["TRAVIS_BUILD_NUMBER"]) + + print("checking for fast_finish: {}-{}-{}".format(repo, pr, build_num)) + + return int(check_latest_pr_build(repo, pr, build_num) is False) + + +if __name__ == "__main__": + import sys + sys.exit(main()) From 1a266ee5809990244f1fe6daeb717878d06cf783 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 22 Mar 2017 14:42:52 -0400 Subject: [PATCH 259/933] API: return Index instead of array from DatetimeIndex field accessors (GH15022) closes #15022 Author: Joris Van den Bossche Closes #15589 from jorisvandenbossche/api-dt-fields-index and squashes the following commits: ffacd38 [Joris Van den Bossche] doc fixes 41728a9 [Joris Van den Bossche] FIX: boolean fields should still return array 6317b6b [Joris Van den Bossche] Add whatsnew 96ed069 [Joris Van den Bossche] Preserve name for PeriodIndex field accessors cdf6cae [Joris Van den Bossche] Preserve name for DatetimeIndex field accessors f2831e2 [Joris Van den Bossche] Update timedelta accessors 52f9008 [Joris Van den Bossche] Fix tests 41008c7 [Joris Van den Bossche] API: return Index instead of array from datetime field accessors (GH15022) --- doc/source/whatsnew/v0.20.0.txt | 33 ++++++++- pandas/tests/indexes/datetimes/test_misc.py | 33 ++++++++- .../tests/indexes/period/test_construction.py | 4 +- pandas/tests/indexes/period/test_period.py | 10 +-- .../indexes/timedeltas/test_timedelta.py | 24 ++++--- pandas/tests/scalar/test_timestamp.py | 13 +++- pandas/tests/tools/test_pivot.py | 2 +- pandas/tests/tools/test_util.py | 8 +-- pandas/tests/tseries/test_timezones.py | 70 +++++++++---------- pandas/tseries/common.py | 2 + pandas/tseries/converter.py | 2 +- pandas/tseries/index.py | 19 +++-- pandas/tseries/period.py | 5 +- pandas/tseries/tdi.py | 5 +- pandas/tseries/util.py | 4 +- 15 files changed, 156 insertions(+), 78 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 5ac7624856040..6d951af139b42 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -471,6 +471,38 @@ New Behavior: s.map(lambda x: x.hour) + +.. _whatsnew_0200.api_breaking.index_dt_field: + +Accessing datetime fields of Index now return Index +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The datetime-related attributes (see :ref:`here ` +for an overview) of ``DatetimeIndex``, ``PeriodIndex`` and ``TimedeltaIndex`` previously +returned numpy arrays. They will now return a new ``Index`` object, except +in the case of a boolean field, where the result will stil be a boolean ndarray. (:issue:`15022`) + +Previous behaviour: + +.. code-block:: ipython + + In [1]: idx = pd.date_range("2015-01-01", periods=5, freq='10H') + + In [2]: idx.hour + Out[2]: array([ 0, 10, 20, 6, 16], dtype=int32) + +New Behavior: + +.. ipython:: python + + idx = pd.date_range("2015-01-01", periods=5, freq='10H') + idx.hour + +This has the advantage that specific ``Index`` methods are still available on the +result. On the other hand, this might have backward incompatibilities: e.g. +compared to numpy arrays, ``Index`` objects are not mutable. To get the original +ndarray, you can always convert explicitly using ``np.asarray(idx.hour)``. + .. _whatsnew_0200.api_breaking.s3: S3 File Handling @@ -936,4 +968,3 @@ Bug Fixes - Bug in ``pd.melt()`` where passing a tuple value for ``value_vars`` caused a ``TypeError`` (:issue:`15348`) - Bug in ``.eval()`` which caused multiline evals to fail with local variables not on the first line (:issue:`15342`) - Bug in ``pd.read_msgpack`` which did not allow to load dataframe with an index of type ``CategoricalIndex`` (:issue:`15487`) - diff --git a/pandas/tests/indexes/datetimes/test_misc.py b/pandas/tests/indexes/datetimes/test_misc.py index e99f1d46637c2..ef24c493f5090 100644 --- a/pandas/tests/indexes/datetimes/test_misc.py +++ b/pandas/tests/indexes/datetimes/test_misc.py @@ -172,6 +172,7 @@ def test_normalize(self): class TestDatetime64(tm.TestCase): def test_datetimeindex_accessors(self): + dti_naive = DatetimeIndex(freq='D', start=datetime(1998, 1, 1), periods=365) # GH 13303 @@ -255,6 +256,34 @@ def test_datetimeindex_accessors(self): self.assertEqual(len(dti.is_year_end), 365) self.assertEqual(len(dti.weekday_name), 365) + dti.name = 'name' + + # non boolean accessors -> return Index + for accessor in ['year', 'month', 'day', 'hour', 'minute', + 'second', 'microsecond', 'nanosecond', + 'dayofweek', 'dayofyear', 'weekofyear', + 'quarter', 'weekday_name']: + res = getattr(dti, accessor) + assert len(res) == 365 + assert isinstance(res, Index) + assert res.name == 'name' + + # boolean accessors -> return array + for accessor in ['is_month_start', 'is_month_end', + 'is_quarter_start', 'is_quarter_end', + 'is_year_start', 'is_year_end']: + res = getattr(dti, accessor) + assert len(res) == 365 + assert isinstance(res, np.ndarray) + + # test boolean indexing + res = dti[dti.is_quarter_start] + exp = dti[[0, 90, 181, 273]] + tm.assert_index_equal(res, exp) + res = dti[dti.is_leap_year] + exp = DatetimeIndex([], freq='D', tz=dti.tz, name='name') + tm.assert_index_equal(res, exp) + dti = DatetimeIndex(freq='BQ-FEB', start=datetime(1998, 1, 1), periods=4) @@ -313,5 +342,5 @@ def test_datetimeindex_accessors(self): def test_nanosecond_field(self): dti = DatetimeIndex(np.arange(10)) - self.assert_numpy_array_equal(dti.nanosecond, - np.arange(10, dtype=np.int32)) + self.assert_index_equal(dti.nanosecond, + pd.Index(np.arange(10, dtype=np.int64))) diff --git a/pandas/tests/indexes/period/test_construction.py b/pandas/tests/indexes/period/test_construction.py index f13a84f4f0e92..ab70ad59846e8 100644 --- a/pandas/tests/indexes/period/test_construction.py +++ b/pandas/tests/indexes/period/test_construction.py @@ -91,8 +91,8 @@ def test_constructor_arrays_negative_year(self): pindex = PeriodIndex(year=years, quarter=quarters) - self.assert_numpy_array_equal(pindex.year, years) - self.assert_numpy_array_equal(pindex.quarter, quarters) + self.assert_index_equal(pindex.year, pd.Index(years)) + self.assert_index_equal(pindex.quarter, pd.Index(quarters)) def test_constructor_invalid_quarters(self): self.assertRaises(ValueError, PeriodIndex, year=lrange(2000, 2004), diff --git a/pandas/tests/indexes/period/test_period.py b/pandas/tests/indexes/period/test_period.py index 4fbadfca06ede..6a6c0ab49b15d 100644 --- a/pandas/tests/indexes/period/test_period.py +++ b/pandas/tests/indexes/period/test_period.py @@ -658,12 +658,12 @@ def test_negative_ordinals(self): def test_pindex_fieldaccessor_nat(self): idx = PeriodIndex(['2011-01', '2011-02', 'NaT', - '2012-03', '2012-04'], freq='D') + '2012-03', '2012-04'], freq='D', name='name') - exp = np.array([2011, 2011, -1, 2012, 2012], dtype=np.int64) - self.assert_numpy_array_equal(idx.year, exp) - exp = np.array([1, 2, -1, 3, 4], dtype=np.int64) - self.assert_numpy_array_equal(idx.month, exp) + exp = Index([2011, 2011, -1, 2012, 2012], dtype=np.int64, name='name') + self.assert_index_equal(idx.year, exp) + exp = Index([1, 2, -1, 3, 4], dtype=np.int64, name='name') + self.assert_index_equal(idx.month, exp) def test_pindex_qaccess(self): pi = PeriodIndex(['2Q05', '3Q05', '4Q05', '1Q06', '2Q06'], freq='Q') diff --git a/pandas/tests/indexes/timedeltas/test_timedelta.py b/pandas/tests/indexes/timedeltas/test_timedelta.py index 4c8571e4f08f9..3abc2d8422fd3 100644 --- a/pandas/tests/indexes/timedeltas/test_timedelta.py +++ b/pandas/tests/indexes/timedeltas/test_timedelta.py @@ -424,7 +424,7 @@ def test_total_seconds(self): freq='s') expt = [1 * 86400 + 10 * 3600 + 11 * 60 + 12 + 100123456. / 1e9, 1 * 86400 + 10 * 3600 + 11 * 60 + 13 + 100123456. / 1e9] - tm.assert_almost_equal(rng.total_seconds(), np.array(expt)) + tm.assert_almost_equal(rng.total_seconds(), Index(expt)) # test Series s = Series(rng) @@ -486,16 +486,16 @@ def test_append_numpy_bug_1681(self): def test_fields(self): rng = timedelta_range('1 days, 10:11:12.100123456', periods=2, freq='s') - self.assert_numpy_array_equal(rng.days, np.array( - [1, 1], dtype='int64')) - self.assert_numpy_array_equal( + self.assert_index_equal(rng.days, Index([1, 1], dtype='int64')) + self.assert_index_equal( rng.seconds, - np.array([10 * 3600 + 11 * 60 + 12, 10 * 3600 + 11 * 60 + 13], - dtype='int64')) - self.assert_numpy_array_equal(rng.microseconds, np.array( - [100 * 1000 + 123, 100 * 1000 + 123], dtype='int64')) - self.assert_numpy_array_equal(rng.nanoseconds, np.array( - [456, 456], dtype='int64')) + Index([10 * 3600 + 11 * 60 + 12, 10 * 3600 + 11 * 60 + 13], + dtype='int64')) + self.assert_index_equal( + rng.microseconds, + Index([100 * 1000 + 123, 100 * 1000 + 123], dtype='int64')) + self.assert_index_equal(rng.nanoseconds, + Index([456, 456], dtype='int64')) self.assertRaises(AttributeError, lambda: rng.hours) self.assertRaises(AttributeError, lambda: rng.minutes) @@ -509,6 +509,10 @@ def test_fields(self): tm.assert_series_equal(s.dt.seconds, Series( [10 * 3600 + 11 * 60 + 12, np.nan], index=[0, 1])) + # preserve name (GH15589) + rng.name = 'name' + assert rng.days.name == 'name' + def test_freq_conversion(self): # doc example diff --git a/pandas/tests/scalar/test_timestamp.py b/pandas/tests/scalar/test_timestamp.py index 082f0fa9c40d5..bbf33c4db5ad7 100644 --- a/pandas/tests/scalar/test_timestamp.py +++ b/pandas/tests/scalar/test_timestamp.py @@ -597,9 +597,20 @@ def test_nat_fields(self): def test_nat_vector_field_access(self): idx = DatetimeIndex(['1/1/2000', None, None, '1/4/2000']) + # non boolean fields fields = ['year', 'quarter', 'month', 'day', 'hour', 'minute', 'second', 'microsecond', 'nanosecond', 'week', 'dayofyear', - 'days_in_month', 'is_leap_year'] + 'days_in_month'] + + for field in fields: + result = getattr(idx, field) + expected = [getattr(x, field) for x in idx] + self.assert_index_equal(result, pd.Index(expected)) + + # boolean fields + fields = ['is_leap_year'] + # other boolean fields like 'is_month_start' and 'is_month_end' + # not yet supported by NaT for field in fields: result = getattr(idx, field) diff --git a/pandas/tests/tools/test_pivot.py b/pandas/tests/tools/test_pivot.py index 62863372dbd02..4502f232c6d9c 100644 --- a/pandas/tests/tools/test_pivot.py +++ b/pandas/tests/tools/test_pivot.py @@ -1367,7 +1367,7 @@ def test_daily(self): with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): annual = pivot_annual(ts, 'D') - doy = ts.index.dayofyear + doy = np.asarray(ts.index.dayofyear) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): doy[(~isleapyear(ts.index.year)) & (doy >= 60)] += 1 diff --git a/pandas/tests/tools/test_util.py b/pandas/tests/tools/test_util.py index 2672db13a959f..ed64e8f42d84b 100644 --- a/pandas/tests/tools/test_util.py +++ b/pandas/tests/tools/test_util.py @@ -31,10 +31,10 @@ def test_datetimeindex(self): # make sure that the ordering on datetimeindex is consistent x = date_range('2000-01-01', periods=2) result1, result2 = [Index(y).day for y in cartesian_product([x, x])] - expected1 = np.array([1, 1, 2, 2], dtype=np.int32) - expected2 = np.array([1, 2, 1, 2], dtype=np.int32) - tm.assert_numpy_array_equal(result1, expected1) - tm.assert_numpy_array_equal(result2, expected2) + expected1 = Index([1, 1, 2, 2]) + expected2 = Index([1, 2, 1, 2]) + tm.assert_index_equal(result1, expected1) + tm.assert_index_equal(result2, expected2) def test_empty(self): # product of empty factors diff --git a/pandas/tests/tseries/test_timezones.py b/pandas/tests/tseries/test_timezones.py index 1ccc1652d2719..1fc0e1b73df6b 100644 --- a/pandas/tests/tseries/test_timezones.py +++ b/pandas/tests/tseries/test_timezones.py @@ -358,8 +358,8 @@ def test_field_access_localize(self): dr = date_range('2011-10-02 00:00', freq='h', periods=10, tz=self.tzstr('America/Atikokan')) - expected = np.arange(10, dtype=np.int32) - self.assert_numpy_array_equal(dr.hour, expected) + expected = Index(np.arange(10, dtype=np.int64)) + self.assert_index_equal(dr.hour, expected) def test_with_tz(self): tz = self.tz('US/Central') @@ -947,8 +947,8 @@ def test_tz_convert_hour_overflow_dst(self): '2009-05-12 09:50:32'] tt = to_datetime(ts).tz_localize('US/Eastern') ut = tt.tz_convert('UTC') - expected = np.array([13, 14, 13], dtype=np.int32) - self.assert_numpy_array_equal(ut.hour, expected) + expected = Index([13, 14, 13]) + self.assert_index_equal(ut.hour, expected) # sorted case UTC -> US/Eastern ts = ['2008-05-12 13:50:00', @@ -956,8 +956,8 @@ def test_tz_convert_hour_overflow_dst(self): '2009-05-12 13:50:32'] tt = to_datetime(ts).tz_localize('UTC') ut = tt.tz_convert('US/Eastern') - expected = np.array([9, 9, 9], dtype=np.int32) - self.assert_numpy_array_equal(ut.hour, expected) + expected = Index([9, 9, 9]) + self.assert_index_equal(ut.hour, expected) # unsorted case US/Eastern -> UTC ts = ['2008-05-12 09:50:00', @@ -965,8 +965,8 @@ def test_tz_convert_hour_overflow_dst(self): '2008-05-12 09:50:32'] tt = to_datetime(ts).tz_localize('US/Eastern') ut = tt.tz_convert('UTC') - expected = np.array([13, 14, 13], dtype=np.int32) - self.assert_numpy_array_equal(ut.hour, expected) + expected = Index([13, 14, 13]) + self.assert_index_equal(ut.hour, expected) # unsorted case UTC -> US/Eastern ts = ['2008-05-12 13:50:00', @@ -974,8 +974,8 @@ def test_tz_convert_hour_overflow_dst(self): '2008-05-12 13:50:32'] tt = to_datetime(ts).tz_localize('UTC') ut = tt.tz_convert('US/Eastern') - expected = np.array([9, 9, 9], dtype=np.int32) - self.assert_numpy_array_equal(ut.hour, expected) + expected = Index([9, 9, 9]) + self.assert_index_equal(ut.hour, expected) def test_tz_convert_hour_overflow_dst_timestamps(self): # Regression test for: @@ -989,8 +989,8 @@ def test_tz_convert_hour_overflow_dst_timestamps(self): Timestamp('2009-05-12 09:50:32', tz=tz)] tt = to_datetime(ts) ut = tt.tz_convert('UTC') - expected = np.array([13, 14, 13], dtype=np.int32) - self.assert_numpy_array_equal(ut.hour, expected) + expected = Index([13, 14, 13]) + self.assert_index_equal(ut.hour, expected) # sorted case UTC -> US/Eastern ts = [Timestamp('2008-05-12 13:50:00', tz='UTC'), @@ -998,8 +998,8 @@ def test_tz_convert_hour_overflow_dst_timestamps(self): Timestamp('2009-05-12 13:50:32', tz='UTC')] tt = to_datetime(ts) ut = tt.tz_convert('US/Eastern') - expected = np.array([9, 9, 9], dtype=np.int32) - self.assert_numpy_array_equal(ut.hour, expected) + expected = Index([9, 9, 9]) + self.assert_index_equal(ut.hour, expected) # unsorted case US/Eastern -> UTC ts = [Timestamp('2008-05-12 09:50:00', tz=tz), @@ -1007,8 +1007,8 @@ def test_tz_convert_hour_overflow_dst_timestamps(self): Timestamp('2008-05-12 09:50:32', tz=tz)] tt = to_datetime(ts) ut = tt.tz_convert('UTC') - expected = np.array([13, 14, 13], dtype=np.int32) - self.assert_numpy_array_equal(ut.hour, expected) + expected = Index([13, 14, 13]) + self.assert_index_equal(ut.hour, expected) # unsorted case UTC -> US/Eastern ts = [Timestamp('2008-05-12 13:50:00', tz='UTC'), @@ -1016,8 +1016,8 @@ def test_tz_convert_hour_overflow_dst_timestamps(self): Timestamp('2008-05-12 13:50:32', tz='UTC')] tt = to_datetime(ts) ut = tt.tz_convert('US/Eastern') - expected = np.array([9, 9, 9], dtype=np.int32) - self.assert_numpy_array_equal(ut.hour, expected) + expected = Index([9, 9, 9]) + self.assert_index_equal(ut.hour, expected) def test_tslib_tz_convert_trans_pos_plus_1__bug(self): # Regression test for tslib.tz_convert(vals, tz1, tz2). @@ -1028,9 +1028,8 @@ def test_tslib_tz_convert_trans_pos_plus_1__bug(self): idx = idx.tz_localize('UTC') idx = idx.tz_convert('Europe/Moscow') - expected = np.repeat(np.array([3, 4, 5], dtype=np.int32), - np.array([n, n, 1])) - self.assert_numpy_array_equal(idx.hour, expected) + expected = np.repeat(np.array([3, 4, 5]), np.array([n, n, 1])) + self.assert_index_equal(idx.hour, Index(expected)) def test_tslib_tz_convert_dst(self): for freq, n in [('H', 1), ('T', 60), ('S', 3600)]: @@ -1039,62 +1038,57 @@ def test_tslib_tz_convert_dst(self): tz='UTC') idx = idx.tz_convert('US/Eastern') expected = np.repeat(np.array([18, 19, 20, 21, 22, 23, - 0, 1, 3, 4, 5], dtype=np.int32), + 0, 1, 3, 4, 5]), np.array([n, n, n, n, n, n, n, n, n, n, 1])) - self.assert_numpy_array_equal(idx.hour, expected) + self.assert_index_equal(idx.hour, Index(expected)) idx = date_range('2014-03-08 18:00', '2014-03-09 05:00', freq=freq, tz='US/Eastern') idx = idx.tz_convert('UTC') - expected = np.repeat(np.array([23, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9], - dtype=np.int32), + expected = np.repeat(np.array([23, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), np.array([n, n, n, n, n, n, n, n, n, n, 1])) - self.assert_numpy_array_equal(idx.hour, expected) + self.assert_index_equal(idx.hour, Index(expected)) # End DST idx = date_range('2014-11-01 23:00', '2014-11-02 09:00', freq=freq, tz='UTC') idx = idx.tz_convert('US/Eastern') expected = np.repeat(np.array([19, 20, 21, 22, 23, - 0, 1, 1, 2, 3, 4], dtype=np.int32), + 0, 1, 1, 2, 3, 4]), np.array([n, n, n, n, n, n, n, n, n, n, 1])) - self.assert_numpy_array_equal(idx.hour, expected) + self.assert_index_equal(idx.hour, Index(expected)) idx = date_range('2014-11-01 18:00', '2014-11-02 05:00', freq=freq, tz='US/Eastern') idx = idx.tz_convert('UTC') expected = np.repeat(np.array([22, 23, 0, 1, 2, 3, 4, 5, 6, - 7, 8, 9, 10], dtype=np.int32), + 7, 8, 9, 10]), np.array([n, n, n, n, n, n, n, n, n, n, n, n, 1])) - self.assert_numpy_array_equal(idx.hour, expected) + self.assert_index_equal(idx.hour, Index(expected)) # daily # Start DST idx = date_range('2014-03-08 00:00', '2014-03-09 00:00', freq='D', tz='UTC') idx = idx.tz_convert('US/Eastern') - self.assert_numpy_array_equal(idx.hour, - np.array([19, 19], dtype=np.int32)) + self.assert_index_equal(idx.hour, Index([19, 19])) idx = date_range('2014-03-08 00:00', '2014-03-09 00:00', freq='D', tz='US/Eastern') idx = idx.tz_convert('UTC') - self.assert_numpy_array_equal(idx.hour, - np.array([5, 5], dtype=np.int32)) + self.assert_index_equal(idx.hour, Index([5, 5])) # End DST idx = date_range('2014-11-01 00:00', '2014-11-02 00:00', freq='D', tz='UTC') idx = idx.tz_convert('US/Eastern') - self.assert_numpy_array_equal(idx.hour, - np.array([20, 20], dtype=np.int32)) + self.assert_index_equal(idx.hour, Index([20, 20])) idx = date_range('2014-11-01 00:00', '2014-11-02 000:00', freq='D', tz='US/Eastern') idx = idx.tz_convert('UTC') - self.assert_numpy_array_equal(idx.hour, - np.array([4, 4], dtype=np.int32)) + self.assert_index_equal(idx.hour, Index([4, 4])) def test_tzlocal(self): # GH 13583 diff --git a/pandas/tseries/common.py b/pandas/tseries/common.py index 82fcdbcd0d367..f9fd27176487c 100644 --- a/pandas/tseries/common.py +++ b/pandas/tseries/common.py @@ -105,6 +105,8 @@ def _delegate_property_get(self, name): elif not is_list_like(result): return result + result = np.asarray(result) + # blow up if we operate on categories if self.orig is not None: result = take_1d(result, self.orig.cat.codes) diff --git a/pandas/tseries/converter.py b/pandas/tseries/converter.py index 8aea14a2688d1..bc768a8bc5b58 100644 --- a/pandas/tseries/converter.py +++ b/pandas/tseries/converter.py @@ -455,7 +455,7 @@ def period_break(dates, period): """ current = getattr(dates, period) previous = getattr(dates - 1, period) - return (current - previous).nonzero()[0] + return np.nonzero(current - previous)[0] def has_level_label(label_flags, vmin): diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py index 983c1a4cd9de9..11d2d29597fc0 100644 --- a/pandas/tseries/index.py +++ b/pandas/tseries/index.py @@ -64,6 +64,7 @@ def f(self): if self.tz is not utc: values = self._local_timestamps() + # boolean accessors -> return array if field in ['is_month_start', 'is_month_end', 'is_quarter_start', 'is_quarter_end', 'is_year_start', 'is_year_end']: @@ -73,16 +74,20 @@ def f(self): result = libts.get_start_end_field(values, field, self.freqstr, month_kw) - elif field in ['weekday_name']: - result = libts.get_date_name_field(values, field) - return self._maybe_mask_results(result) + return self._maybe_mask_results(result, convert='float64') elif field in ['is_leap_year']: # no need to mask NaT return libts.get_date_field(values, field) + + # non-boolean accessors -> return Index + elif field in ['weekday_name']: + result = libts.get_date_name_field(values, field) + result = self._maybe_mask_results(result) else: result = libts.get_date_field(values, field) + result = self._maybe_mask_results(result, convert='float64') - return self._maybe_mask_results(result, convert='float64') + return Index(result, name=self.name) f.__name__ = name f.__doc__ = docstring @@ -1909,9 +1914,9 @@ def to_julian_date(self): """ # http://mysite.verizon.net/aesir_research/date/jdalg2.htm - year = self.year - month = self.month - day = self.day + year = np.asarray(self.year) + month = np.asarray(self.month) + day = np.asarray(self.day) testarr = month < 3 year[testarr] -= 1 month[testarr] += 12 diff --git a/pandas/tseries/period.py b/pandas/tseries/period.py index f7e9ba9eaa9b1..c279d5a9342e8 100644 --- a/pandas/tseries/period.py +++ b/pandas/tseries/period.py @@ -52,7 +52,8 @@ def _field_accessor(name, alias, docstring=None): def f(self): base, mult = _gfc(self.freq) - return get_period_field_arr(alias, self._values, base) + result = get_period_field_arr(alias, self._values, base) + return Index(result, name=self.name) f.__name__ = name f.__doc__ = docstring return property(f) @@ -585,7 +586,7 @@ def to_datetime(self, dayfirst=False): @property def is_leap_year(self): """ Logical indicating if the date belongs to a leap year """ - return tslib._isleapyear_arr(self.year) + return tslib._isleapyear_arr(np.asarray(self.year)) @property def start_time(self): diff --git a/pandas/tseries/tdi.py b/pandas/tseries/tdi.py index 13d844bb6a399..55333890640c1 100644 --- a/pandas/tseries/tdi.py +++ b/pandas/tseries/tdi.py @@ -374,7 +374,7 @@ def _get_field(self, m): else: result = np.array([getattr(Timedelta(val), m) for val in values], dtype='int64') - return result + return Index(result, name=self.name) @property def days(self): @@ -437,7 +437,8 @@ def total_seconds(self): .. versionadded:: 0.17.0 """ - return self._maybe_mask_results(1e-9 * self.asi8) + return Index(self._maybe_mask_results(1e-9 * self.asi8), + name=self.name) def to_pytimedelta(self): """ diff --git a/pandas/tseries/util.py b/pandas/tseries/util.py index dc460dee8415b..da3bb075dd02c 100644 --- a/pandas/tseries/util.py +++ b/pandas/tseries/util.py @@ -54,7 +54,7 @@ def pivot_annual(series, freq=None): if freq == 'D': width = 366 - offset = index.dayofyear - 1 + offset = np.asarray(index.dayofyear) - 1 # adjust for leap year offset[(~isleapyear(year)) & (offset >= 59)] += 1 @@ -63,7 +63,7 @@ def pivot_annual(series, freq=None): # todo: strings like 1/1, 1/25, etc.? elif freq in ('M', 'BM'): width = 12 - offset = index.month - 1 + offset = np.asarray(index.month) - 1 columns = lrange(1, 13) elif freq == 'H': width = 8784 From 94720d951b4e804bab72abc33dffeb2186ecb310 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 22 Mar 2017 14:48:38 -0400 Subject: [PATCH 260/933] API: change default behaviour of str.match from deprecated extract to match (GH5224) This PR changes the default behaviour of `str.match` from extracting groups to just a match (True/False). The previous default behaviour was deprecated since 0.13.0 (https://github.com/pandas-dev/pandas/pull/5224) Author: Joris Van den Bossche Closes #15257 from jorisvandenbossche/str-match and squashes the following commits: 0ab36b6 [Joris Van den Bossche] Raise FutureWarning instead of UserWarning for as_indexer a2bae51 [Joris Van den Bossche] raise error in case of regex with groups and as_indexer=False 87446c3 [Joris Van den Bossche] fix test 0788de2 [Joris Van den Bossche] API: change default behaviour of str.match from deprecated extract to match (GH5224) --- doc/source/text.rst | 12 ------- doc/source/whatsnew/v0.20.0.txt | 7 ++++ pandas/core/strings.py | 59 +++++++++--------------------- pandas/tests/test_strings.py | 63 ++++++++++++--------------------- 4 files changed, 46 insertions(+), 95 deletions(-) diff --git a/doc/source/text.rst b/doc/source/text.rst index 2b2520cb6100f..b110ef2167a03 100644 --- a/doc/source/text.rst +++ b/doc/source/text.rst @@ -385,18 +385,6 @@ or match a pattern: The distinction between ``match`` and ``contains`` is strictness: ``match`` relies on strict ``re.match``, while ``contains`` relies on ``re.search``. -.. warning:: - - In previous versions, ``match`` was for *extracting* groups, - returning a not-so-convenient Series of tuples. The new method ``extract`` - (described in the previous section) is now preferred. - - This old, deprecated behavior of ``match`` is still the default. As - demonstrated above, use the new behavior by setting ``as_indexer=True``. - In this mode, ``match`` is analogous to ``contains``, returning a boolean - Series. The new behavior will become the default behavior in a future - release. - Methods like ``match``, ``contains``, ``startswith``, and ``endswith`` take an extra ``na`` argument so missing values can be considered True or False: diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 6d951af139b42..37a70435ed6ff 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -761,6 +761,12 @@ Other API Changes - ``Series.sort_values()`` accepts a one element list of bool for consistency with the behavior of ``DataFrame.sort_values()`` (:issue:`15604`) - ``.merge()`` and ``.join()`` on ``category`` dtype columns will now preserve the category dtype when possible (:issue:`10409`) - ``SparseDataFrame.default_fill_value`` will be 0, previously was ``nan`` in the return from ``pd.get_dummies(..., sparse=True)`` (:issue:`15594`) +- The default behaviour of ``Series.str.match`` has changed from extracting + groups to matching the pattern. The extracting behaviour was deprecated + since pandas version 0.13.0 and can be done with the ``Series.str.extract`` + method (:issue:`5224`). As a consequence, the ``as_indexer`` keyword is + ignored (no longer needed to specify the new behaviour) and is deprecated. + .. _whatsnew_0200.deprecations: @@ -777,6 +783,7 @@ Deprecations - ``Series.sortlevel`` and ``DataFrame.sortlevel`` have been deprecated in favor of ``Series.sort_index`` and ``DataFrame.sort_index`` (:issue:`15099`) - importing ``concat`` from ``pandas.tools.merge`` has been deprecated in favor of imports from the ``pandas`` namespace. This should only affect explict imports (:issue:`15358`) - ``Series/DataFrame/Panel.consolidate()`` been deprecated as a public method. (:issue:`15483`) +- The ``as_indexer`` keyword of ``Series.str.match()`` has been deprecated (ignored keyword) (:issue:`15257`). - The following top-level pandas functions have been deprecated and will be removed in a future version (:issue:`13790`) * ``pd.pnow()``, replaced by ``Period.now()`` * ``pd.Term``, is removed, as it is not applicable to user code. Instead use in-line string expressions in the where clause when searching in HDFStore diff --git a/pandas/core/strings.py b/pandas/core/strings.py index b5b5d58235eaa..504d3dd47cc21 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -464,11 +464,9 @@ def rep(x, r): return result -def str_match(arr, pat, case=True, flags=0, na=np.nan, as_indexer=False): +def str_match(arr, pat, case=True, flags=0, na=np.nan, as_indexer=None): """ - Deprecated: Find groups in each string in the Series/Index - using passed regular expression. - If as_indexer=True, determine if each string matches a regular expression. + Determine if each string matches a regular expression. Parameters ---------- @@ -479,60 +477,37 @@ def str_match(arr, pat, case=True, flags=0, na=np.nan, as_indexer=False): flags : int, default 0 (no flags) re module flags, e.g. re.IGNORECASE na : default NaN, fill value for missing values. - as_indexer : False, by default, gives deprecated behavior better achieved - using str_extract. True return boolean indexer. + as_indexer : DEPRECATED Returns ------- Series/array of boolean values - if as_indexer=True - Series/Index of tuples - if as_indexer=False, default but deprecated See Also -------- contains : analogous, but less strict, relying on re.search instead of re.match - extract : now preferred to the deprecated usage of match (as_indexer=False) + extract : extract matched groups - Notes - ----- - To extract matched groups, which is the deprecated behavior of match, use - str.extract. """ - if not case: flags |= re.IGNORECASE regex = re.compile(pat, flags=flags) - if (not as_indexer) and regex.groups > 0: - # Do this first, to make sure it happens even if the re.compile - # raises below. - warnings.warn("In future versions of pandas, match will change to" - " always return a bool indexer.", FutureWarning, - stacklevel=3) - - if as_indexer and regex.groups > 0: - warnings.warn("This pattern has match groups. To actually get the" - " groups, use str.extract.", UserWarning, stacklevel=3) + if (as_indexer is False) and (regex.groups > 0): + raise ValueError("as_indexer=False with a pattern with groups is no " + "longer supported. Use '.str.extract(pat)' instead") + elif as_indexer is not None: + # Previously, this keyword was used for changing the default but + # deprecated behaviour. This keyword is now no longer needed. + warnings.warn("'as_indexer' keyword was specified but is ignored " + "(match now returns a boolean indexer by default), " + "and will be removed in a future version.", + FutureWarning, stacklevel=3) - # If not as_indexer and regex.groups == 0, this returns empty lists - # and is basically useless, so we will not warn. - - if (not as_indexer) and regex.groups > 0: - dtype = object - - def f(x): - m = regex.match(x) - if m: - return m.groups() - else: - return [] - else: - # This is the new behavior of str_match. - dtype = bool - f = lambda x: bool(regex.match(x)) + dtype = bool + f = lambda x: bool(regex.match(x)) return _na_map(f, arr, na, dtype=dtype) @@ -1587,7 +1562,7 @@ def contains(self, pat, case=True, flags=0, na=np.nan, regex=True): return self._wrap_result(result) @copy(str_match) - def match(self, pat, case=True, flags=0, na=np.nan, as_indexer=False): + def match(self, pat, case=True, flags=0, na=np.nan, as_indexer=None): result = str_match(self._data, pat, case=case, flags=flags, na=na, as_indexer=as_indexer) return self._wrap_result(result) diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index f8ce0070b2c78..7a68ec8f368ae 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -559,64 +559,44 @@ def test_repeat(self): exp = Series([u('a'), u('bb'), NA, u('cccc'), NA, u('dddddd')]) tm.assert_series_equal(result, exp) - def test_deprecated_match(self): - # Old match behavior, deprecated (but still default) in 0.13 + def test_match(self): + # New match behavior introduced in 0.13 values = Series(['fooBAD__barBAD', NA, 'foo']) - - with tm.assert_produces_warning(): - result = values.str.match('.*(BAD[_]+).*(BAD)') - exp = Series([('BAD__', 'BAD'), NA, []]) - tm.assert_series_equal(result, exp) - - # mixed - mixed = Series(['aBAD_BAD', NA, 'BAD_b_BAD', True, datetime.today(), - 'foo', None, 1, 2.]) - - with tm.assert_produces_warning(): - rs = Series(mixed).str.match('.*(BAD[_]+).*(BAD)') - xp = Series([('BAD_', 'BAD'), NA, ('BAD_', 'BAD'), - NA, NA, [], NA, NA, NA]) - tm.assertIsInstance(rs, Series) - tm.assert_series_equal(rs, xp) - - # unicode - values = Series([u('fooBAD__barBAD'), NA, u('foo')]) - - with tm.assert_produces_warning(): - result = values.str.match('.*(BAD[_]+).*(BAD)') - exp = Series([(u('BAD__'), u('BAD')), NA, []]) + result = values.str.match('.*(BAD[_]+).*(BAD)') + exp = Series([True, NA, False]) tm.assert_series_equal(result, exp) - def test_match(self): - # New match behavior introduced in 0.13 values = Series(['fooBAD__barBAD', NA, 'foo']) - with tm.assert_produces_warning(): - result = values.str.match('.*(BAD[_]+).*(BAD)', as_indexer=True) + result = values.str.match('.*BAD[_]+.*BAD') exp = Series([True, NA, False]) tm.assert_series_equal(result, exp) - # If no groups, use new behavior even when as_indexer is False. - # (Old behavior is pretty much useless in this case.) + # test passing as_indexer still works but is ignored values = Series(['fooBAD__barBAD', NA, 'foo']) - result = values.str.match('.*BAD[_]+.*BAD', as_indexer=False) exp = Series([True, NA, False]) + with tm.assert_produces_warning(FutureWarning): + result = values.str.match('.*BAD[_]+.*BAD', as_indexer=True) + tm.assert_series_equal(result, exp) + with tm.assert_produces_warning(FutureWarning): + result = values.str.match('.*BAD[_]+.*BAD', as_indexer=False) tm.assert_series_equal(result, exp) + with tm.assert_produces_warning(FutureWarning): + result = values.str.match('.*(BAD[_]+).*(BAD)', as_indexer=True) + tm.assert_series_equal(result, exp) + self.assertRaises(ValueError, values.str.match, '.*(BAD[_]+).*(BAD)', + as_indexer=False) # mixed mixed = Series(['aBAD_BAD', NA, 'BAD_b_BAD', True, datetime.today(), 'foo', None, 1, 2.]) - - with tm.assert_produces_warning(): - rs = Series(mixed).str.match('.*(BAD[_]+).*(BAD)', as_indexer=True) + rs = Series(mixed).str.match('.*(BAD[_]+).*(BAD)') xp = Series([True, NA, True, NA, NA, False, NA, NA, NA]) tm.assertIsInstance(rs, Series) tm.assert_series_equal(rs, xp) # unicode values = Series([u('fooBAD__barBAD'), NA, u('foo')]) - - with tm.assert_produces_warning(): - result = values.str.match('.*(BAD[_]+).*(BAD)', as_indexer=True) + result = values.str.match('.*(BAD[_]+).*(BAD)') exp = Series([True, NA, False]) tm.assert_series_equal(result, exp) @@ -2610,10 +2590,11 @@ def test_match_findall_flags(self): pat = r'([A-Z0-9._%+-]+)@([A-Z0-9.-]+)\.([A-Z]{2,4})' - with tm.assert_produces_warning(FutureWarning): - result = data.str.match(pat, flags=re.IGNORECASE) + result = data.str.extract(pat, flags=re.IGNORECASE, expand=True) + self.assertEqual(result.iloc[0].tolist(), ['dave', 'google', 'com']) - self.assertEqual(result[0], ('dave', 'google', 'com')) + result = data.str.match(pat, flags=re.IGNORECASE) + self.assertEqual(result[0], True) result = data.str.findall(pat, flags=re.IGNORECASE) self.assertEqual(result[0][0], ('dave', 'google', 'com')) From 7fa77527c747f2d91b6c16fe512cd05a7a072ec9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miroslav=20=C5=A0ediv=C3=BD?= Date: Wed, 22 Mar 2017 22:36:53 +0100 Subject: [PATCH 261/933] Update testing.py (#15784) Docs typo fix --- pandas/util/testing.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/util/testing.py b/pandas/util/testing.py index cf76f4ead77e3..9a9f3c6c6b945 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -1151,7 +1151,7 @@ def assert_series_equal(left, right, check_dtype=True, Whether to compare number exactly. check_names : bool, default True Whether to check the Series and Index names attribute. - check_dateteimelike_compat : bool, default False + check_datetimelike_compat : bool, default False Compare datetime-like which is comparable ignoring dtype. check_categorical : bool, default True Whether to compare internal Categorical exactly. @@ -1264,7 +1264,7 @@ def assert_frame_equal(left, right, check_dtype=True, If True, compare by blocks. check_exact : bool, default False Whether to compare number exactly. - check_dateteimelike_compat : bool, default False + check_datetimelike_compat : bool, default False Compare datetime-like which is comparable ignoring dtype. check_categorical : bool, default True Whether to compare internal Categorical exactly. From 1bcb671877287be731ce677aaf96686278b69f9a Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Thu, 23 Mar 2017 15:06:49 -0400 Subject: [PATCH 262/933] CI: remove travis dedupe as enabled auto-cancellation xref https://github.com/pandas- dev/pandas/commit/79581ffe6fb73089dfa8394c2f4e44677acfe1ce of course Travis just announced auto-cancellation / it looks good when I enabled it. so removing this :< Author: Jeff Reback Closes #15783 from jreback/cancel and squashes the following commits: 8286d70 [Jeff Reback] CI: remove travis dedupe as enabled auto-cancellation --- .travis.yml | 2 -- ci/travis_fast_finish.py | 77 ---------------------------------------- 2 files changed, 79 deletions(-) delete mode 100755 ci/travis_fast_finish.py diff --git a/.travis.yml b/.travis.yml index 270f8c2fc76c3..eb2a58b0616ef 100644 --- a/.travis.yml +++ b/.travis.yml @@ -177,8 +177,6 @@ matrix: - USE_CACHE=true before_install: - - echo "Checking to see if this build is outdated" - - ci/travis_fast_finish.py || { echo "Failing outdated build to end it."; exit 1; } - echo "before_install" - source ci/travis_process_gbq_encryption.sh - export PATH="$HOME/miniconda3/bin:$PATH" diff --git a/ci/travis_fast_finish.py b/ci/travis_fast_finish.py deleted file mode 100755 index c2e2a9159918b..0000000000000 --- a/ci/travis_fast_finish.py +++ /dev/null @@ -1,77 +0,0 @@ -#!/usr/bin/env python - -# script to cancel previous travis builds for the same PR -# originally from -# https://github.com/conda-forge/staged-recipes/pull/2257 - -try: - from future_builtins import ( - map, - filter, - ) -except ImportError: - pass - -import codecs -import contextlib -import json -import os - -try: - from urllib.request import ( - Request, - urlopen, - ) -except ImportError: - from urllib2 import ( - Request, - urlopen, - ) - - -def check_latest_pr_build(repo, pr, build_num): - # Not a PR so it is latest. - if pr is None: - return True - - headers = { - "Accept": "application/vnd.travis-ci.2+json", - } - url = "https://api.travis-ci.org/repos/{repo}/builds?event_type=pull_request" - - request = Request(url.format(repo=repo), headers=headers) - with contextlib.closing(urlopen(request)) as response: - reader = codecs.getreader("utf-8") - data = json.load(reader(response)) - - # Parse the response to get a list of build numbers for this PR. - builds = data["builds"] - pr_builds = filter(lambda b: b["pull_request_number"] == pr, builds) - pr_build_nums = sorted(map(lambda b: int(b["number"]), pr_builds)) - - print("build_num: {}".format(build_num)) - print("pr_build_nums: {}".format(','.join([str(n) for n in pr_build_nums]))) - - # Check if our build number is the latest (largest) - # out of all of the builds for this PR. - if build_num < max(pr_build_nums): - return False - else: - return True - - -def main(): - repo = os.environ["TRAVIS_REPO_SLUG"] - - pr = os.environ["TRAVIS_PULL_REQUEST"] - pr = None if pr == "false" else int(pr) - build_num = int(os.environ["TRAVIS_BUILD_NUMBER"]) - - print("checking for fast_finish: {}-{}-{}".format(repo, pr, build_num)) - - return int(check_latest_pr_build(repo, pr, build_num) is False) - - -if __name__ == "__main__": - import sys - sys.exit(main()) From 56ccad8229824584678e22815f4f180a91309c9d Mon Sep 17 00:00:00 2001 From: Kernc Date: Thu, 23 Mar 2017 18:42:20 +0100 Subject: [PATCH 263/933] DOC: .groupby() aligns Series, accepts ndarray closes #15789 closes #15244 --- pandas/core/generic.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 87052800b8fb5..134840728d931 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -4129,11 +4129,14 @@ def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True, Parameters ---------- - by : mapping function / list of functions, dict, Series, or tuple / - list of column names or index level names. + by : mapping function / list of functions, dict, Series, ndarray, + or tuple / list of column names or index level names or + Series or ndarrays Called on each element of the object index to determine the groups. If a dict or Series is passed, the Series or dict VALUES will be - used to determine the groups + used to determine the groups (the Series' values are first + aligned; see ``.align()`` method). If ndarray is passed, the + values as-is determine the groups. axis : int, default 0 level : int, level name, or sequence of such, default None If the axis is a MultiIndex (hierarchical), group by a particular From 39a46fff8d73751dab5f5abfd50cbe221a2f91d4 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Thu, 23 Mar 2017 15:11:07 -0400 Subject: [PATCH 264/933] COMPAT: 3.6.1 compat for change in PySlice_GetIndices_Ex This doesn't actually matter to any tests except for some internal consistency ones. Bonus is that it eliminates a warning :< note that we aren't actually testing this (yet) on Travis as our 3.6 build uses conda-forge and 3.6.1 is not there as of yet. Its in defaults though (and shows up on appveyor build). Author: Jeff Reback Closes #15790 from jreback/py361 and squashes the following commits: 42ddddc [Jeff Reback] change to version < 3 d36902c [Jeff Reback] COMPAT: 3.6.1 compat for change in PySlice_GetIndices_Ex --- pandas/_libs/lib.pyx | 19 ++++++++-------- pandas/_libs/src/compat_helper.h | 37 ++++++++++++++++++++++++++++++++ pandas/tests/test_internals.py | 36 ++++++++++++++++++++----------- setup.py | 3 ++- 4 files changed, 73 insertions(+), 22 deletions(-) create mode 100644 pandas/_libs/src/compat_helper.h diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index f78040e5a52f2..f902422b0916d 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -3,6 +3,7 @@ cimport numpy as np cimport cython import numpy as np import sys + cdef bint PY3 = (sys.version_info[0] >= 3) from numpy cimport * @@ -26,7 +27,8 @@ from cpython cimport (PyDict_New, PyDict_GetItem, PyDict_SetItem, PyObject_SetAttrString, PyObject_RichCompareBool, PyBytes_GET_SIZE, - PyUnicode_GET_SIZE) + PyUnicode_GET_SIZE, + PyObject) try: from cpython cimport PyString_GET_SIZE @@ -36,11 +38,10 @@ except ImportError: cdef extern from "Python.h": Py_ssize_t PY_SSIZE_T_MAX - ctypedef struct PySliceObject: - pass +cdef extern from "compat_helper.h": - cdef int PySlice_GetIndicesEx( - PySliceObject* s, Py_ssize_t length, + cdef int slice_get_indices( + PyObject* s, Py_ssize_t length, Py_ssize_t *start, Py_ssize_t *stop, Py_ssize_t *step, Py_ssize_t *slicelength) except -1 @@ -1658,8 +1659,8 @@ cpdef slice_get_indices_ex(slice slc, Py_ssize_t objlen=PY_SSIZE_T_MAX): if slc is None: raise TypeError("slc should be a slice") - PySlice_GetIndicesEx(slc, objlen, - &start, &stop, &step, &length) + slice_get_indices(slc, objlen, + &start, &stop, &step, &length) return start, stop, step, length @@ -1683,8 +1684,8 @@ cpdef Py_ssize_t slice_len( if slc is None: raise TypeError("slc must be slice") - PySlice_GetIndicesEx(slc, objlen, - &start, &stop, &step, &length) + slice_get_indices(slc, objlen, + &start, &stop, &step, &length) return length diff --git a/pandas/_libs/src/compat_helper.h b/pandas/_libs/src/compat_helper.h new file mode 100644 index 0000000000000..e3c40d2ca65f4 --- /dev/null +++ b/pandas/_libs/src/compat_helper.h @@ -0,0 +1,37 @@ +/* +Copyright (c) 2016, PyData Development Team +All rights reserved. + +Distributed under the terms of the BSD Simplified License. + +The full license is in the LICENSE file, distributed with this software. +*/ + +#ifndef PANDAS__LIBS_SRC_COMPAT_HELPER_H_ +#define PANDAS__LIBS_SRC_COMPAT_HELPER_H_ + +#include "Python.h" +#include "numpy_helper.h" + +/* +PySlice_GetIndicesEx changes signature in PY3 +but 3.6.1 in particular changes the behavior of this function slightly +https://bugs.python.org/issue27867 +*/ + +PANDAS_INLINE int slice_get_indices(PyObject *s, + Py_ssize_t length, + Py_ssize_t *start, + Py_ssize_t *stop, + Py_ssize_t *step, + Py_ssize_t *slicelength) { +#if PY_VERSION_HEX >= 0x03000000 + return PySlice_GetIndicesEx(s, length, start, stop, + step, slicelength); +#else + return PySlice_GetIndicesEx((PySliceObject *)s, length, start, + stop, step, slicelength); +#endif +} + +#endif // PANDAS__LIBS_SRC_COMPAT_HELPER_H_ diff --git a/pandas/tests/test_internals.py b/pandas/tests/test_internals.py index 29920b165d3f6..af7c584249416 100644 --- a/pandas/tests/test_internals.py +++ b/pandas/tests/test_internals.py @@ -2,11 +2,12 @@ # pylint: disable=W0102 from datetime import datetime, date - +import sys import pytest import numpy as np import re +from distutils.version import LooseVersion import itertools from pandas import (Index, MultiIndex, DataFrame, DatetimeIndex, Series, Categorical) @@ -22,6 +23,9 @@ randn, assert_series_equal) from pandas.compat import zip, u +# in 3.6.1 a c-api slicing function changed, see src/compat_helper.h +PY361 = sys.version >= LooseVersion('3.6.1') + @pytest.fixture def mgr(): @@ -1128,8 +1132,10 @@ def assert_as_slice_equals(arr, slc): assert_as_slice_equals([0, 100], slice(0, 200, 100)) assert_as_slice_equals([2, 1], slice(2, 0, -1)) - assert_as_slice_equals([2, 1, 0], slice(2, None, -1)) - assert_as_slice_equals([100, 0], slice(100, None, -100)) + + if not PY361: + assert_as_slice_equals([2, 1, 0], slice(2, None, -1)) + assert_as_slice_equals([100, 0], slice(100, None, -100)) def test_not_slice_like_arrays(self): def assert_not_slice_like(arr): @@ -1150,8 +1156,9 @@ def test_slice_iter(self): assert list(BlockPlacement(slice(0, 0))) == [] assert list(BlockPlacement(slice(3, 0))) == [] - assert list(BlockPlacement(slice(3, 0, -1))) == [3, 2, 1] - assert list(BlockPlacement(slice(3, None, -1))) == [3, 2, 1, 0] + if not PY361: + assert list(BlockPlacement(slice(3, 0, -1))) == [3, 2, 1] + assert list(BlockPlacement(slice(3, None, -1))) == [3, 2, 1, 0] def test_slice_to_array_conversion(self): def assert_as_array_equals(slc, asarray): @@ -1164,8 +1171,10 @@ def assert_as_array_equals(slc, asarray): assert_as_array_equals(slice(3, 0), []) assert_as_array_equals(slice(3, 0, -1), [3, 2, 1]) - assert_as_array_equals(slice(3, None, -1), [3, 2, 1, 0]) - assert_as_array_equals(slice(31, None, -10), [31, 21, 11, 1]) + + if not PY361: + assert_as_array_equals(slice(3, None, -1), [3, 2, 1, 0]) + assert_as_array_equals(slice(31, None, -10), [31, 21, 11, 1]) def test_blockplacement_add(self): bpl = BlockPlacement(slice(0, 5)) @@ -1180,23 +1189,26 @@ def assert_add_equals(val, inc, result): assert_add_equals(slice(0, 0), 0, []) assert_add_equals(slice(1, 4), 0, [1, 2, 3]) assert_add_equals(slice(3, 0, -1), 0, [3, 2, 1]) - assert_add_equals(slice(2, None, -1), 0, [2, 1, 0]) assert_add_equals([1, 2, 4], 0, [1, 2, 4]) assert_add_equals(slice(0, 0), 10, []) assert_add_equals(slice(1, 4), 10, [11, 12, 13]) assert_add_equals(slice(3, 0, -1), 10, [13, 12, 11]) - assert_add_equals(slice(2, None, -1), 10, [12, 11, 10]) assert_add_equals([1, 2, 4], 10, [11, 12, 14]) assert_add_equals(slice(0, 0), -1, []) assert_add_equals(slice(1, 4), -1, [0, 1, 2]) - assert_add_equals(slice(3, 0, -1), -1, [2, 1, 0]) assert_add_equals([1, 2, 4], -1, [0, 1, 3]) with pytest.raises(ValueError): BlockPlacement(slice(1, 4)).add(-10) with pytest.raises(ValueError): BlockPlacement([1, 2, 4]).add(-10) - with pytest.raises(ValueError): - BlockPlacement(slice(2, None, -1)).add(-1) + + if not PY361: + assert_add_equals(slice(3, 0, -1), -1, [2, 1, 0]) + assert_add_equals(slice(2, None, -1), 0, [2, 1, 0]) + assert_add_equals(slice(2, None, -1), 10, [12, 11, 10]) + + with pytest.raises(ValueError): + BlockPlacement(slice(2, None, -1)).add(-1) diff --git a/setup.py b/setup.py index 8e690f05b818c..1b471f76ac5e6 100755 --- a/setup.py +++ b/setup.py @@ -460,7 +460,8 @@ def pxd(name): extra_compile_args=['-Wno-unused-function'] lib_depends = lib_depends + ['pandas/_libs/src/numpy_helper.h', - 'pandas/_libs/src/parse_helper.h'] + 'pandas/_libs/src/parse_helper.h', + 'pandas/_libs/src/compat_helper.h'] tseries_depends = ['pandas/_libs/src/datetime/np_datetime.h', From 9d3554c26aa85d66cbfe6f481464f1a357af5e12 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Thu, 23 Mar 2017 15:32:31 -0400 Subject: [PATCH 265/933] CI: tweaks in codecov --- codecov.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/codecov.yml b/codecov.yml index 45a6040c6a50d..b4552563deeaa 100644 --- a/codecov.yml +++ b/codecov.yml @@ -1,3 +1,6 @@ +codecov: + branch: master + coverage: status: project: @@ -6,4 +9,3 @@ coverage: patch: default: target: '50' - branches: null From 5d28f26bb3b2a4fa7adc0808be54d49a70b1589b Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sat, 25 Mar 2017 11:56:18 -0400 Subject: [PATCH 266/933] CI: fix coverage file location CI: clean up some unused env variables Author: Jeff Reback Closes #15792 from jreback/ci and squashes the following commits: 8100d6d [Jeff Reback] CI: fix coverage file location --- .travis.yml | 33 +++++++-------------------------- ci/script_multi.sh | 4 ++-- ci/script_single.sh | 4 ++-- 3 files changed, 11 insertions(+), 30 deletions(-) diff --git a/.travis.yml b/.travis.yml index eb2a58b0616ef..d78e4dab31fbe 100644 --- a/.travis.yml +++ b/.travis.yml @@ -39,7 +39,6 @@ matrix: - TEST_ARGS="--skip-slow --skip-network" - JOB_TAG=_OSX - TRAVIS_PYTHON_VERSION=3.5 - - CACHE_NAME="35_osx" - USE_CACHE=true - python: 2.7 env: @@ -47,9 +46,7 @@ matrix: - JOB_NAME: "27_slow_nnet_LOCALE" - TEST_ARGS="--only-slow --skip-network" - LOCALE_OVERRIDE="zh_CN.UTF-8" - - FULL_DEPS=true - JOB_TAG=_LOCALE - - CACHE_NAME="27_slow_nnet_LOCALE" - USE_CACHE=true addons: apt: @@ -60,10 +57,8 @@ matrix: - PYTHON_VERSION=2.7 - JOB_NAME: "27_nslow" - TEST_ARGS="--skip-slow" - - FULL_DEPS=true - CLIPBOARD_GUI=gtk2 - LINT=true - - CACHE_NAME="27_nslow" - USE_CACHE=true addons: apt: @@ -74,10 +69,8 @@ matrix: - PYTHON_VERSION=3.5 - JOB_NAME: "35_nslow" - TEST_ARGS="--skip-slow --skip-network" - - FULL_DEPS=true - CLIPBOARD=xsel - COVERAGE=true - - CACHE_NAME="35_nslow" - USE_CACHE=true addons: apt: @@ -96,28 +89,24 @@ matrix: packages: - libatlas-base-dev - gfortran -# In allow_failures + # In allow_failures - python: 2.7 env: - PYTHON_VERSION=2.7 - JOB_NAME: "27_slow" - JOB_TAG=_SLOW - TEST_ARGS="--only-slow --skip-network" - - FULL_DEPS=true - - CACHE_NAME="27_slow" - USE_CACHE=true -# In allow_failures + # In allow_failures - python: 2.7 env: - PYTHON_VERSION=2.7 - JOB_NAME: "27_build_test" - JOB_TAG=_BUILD_TEST - TEST_ARGS="--skip-slow" - - FULL_DEPS=true - BUILD_TEST=true - - CACHE_NAME="27_build_test" - USE_CACHE=true -# In allow_failures + # In allow_failures - python: 3.5 env: - PYTHON_VERSION=3.5 @@ -125,17 +114,14 @@ matrix: - JOB_TAG=_NUMPY_DEV - TEST_ARGS="--skip-slow --skip-network" - PANDAS_TESTING_MODE="deprecate" - - CACHE_NAME="35_numpy_dev" - USE_CACHE=true -# In allow_failures + # In allow_failures - python: 3.5 env: - PYTHON_VERSION=3.5 - JOB_NAME: "doc_build" - - FULL_DEPS=true - DOC_BUILD=true - JOB_TAG=_DOC_BUILD - - CACHE_NAME="doc_build" - USE_CACHE=true allow_failures: - python: 2.7 @@ -144,8 +130,6 @@ matrix: - JOB_NAME: "27_slow" - JOB_TAG=_SLOW - TEST_ARGS="--only-slow --skip-network" - - FULL_DEPS=true - - CACHE_NAME="27_slow" - USE_CACHE=true - python: 2.7 env: @@ -153,9 +137,7 @@ matrix: - JOB_NAME: "27_build_test" - JOB_TAG=_BUILD_TEST - TEST_ARGS="--skip-slow" - - FULL_DEPS=true - BUILD_TEST=true - - CACHE_NAME="27_build_test" - USE_CACHE=true - python: 3.5 env: @@ -164,16 +146,13 @@ matrix: - JOB_TAG=_NUMPY_DEV - TEST_ARGS="--skip-slow --skip-network" - PANDAS_TESTING_MODE="deprecate" - - CACHE_NAME="35_numpy_dev" - USE_CACHE=true - python: 3.5 env: - PYTHON_VERSION=3.5 - JOB_NAME: "doc_build" - - FULL_DEPS=true - DOC_BUILD=true - JOB_TAG=_DOC_BUILD - - CACHE_NAME="doc_build" - USE_CACHE=true before_install: @@ -209,7 +188,9 @@ script: - echo "script done" after_success: - - source activate pandas && codecov + - if [ "$COVERAGE" ]; then + source activate pandas && codecov --file /tmp/cov-single.xml /tmp/cov-multiple.xml; + fi after_script: - echo "after_script start" diff --git a/ci/script_multi.sh b/ci/script_multi.sh index 2d1211b2f7b96..f0fbb8c54bf2a 100755 --- a/ci/script_multi.sh +++ b/ci/script_multi.sh @@ -27,8 +27,8 @@ if [ "$BUILD_TEST" ]; then cd /tmp python -c "import pandas; pandas.test(['-n 2'])" elif [ "$COVERAGE" ]; then - echo pytest -s -n 2 -m "not single" --cov=pandas --cov-append --cov-report xml:/tmp/cov.xml --junitxml=/tmp/multiple.xml $TEST_ARGS pandas - pytest -s -n 2 -m "not single" --cov=pandas --cov-append --cov-report xml:/tmp/cov.xml --junitxml=/tmp/multiple.xml $TEST_ARGS pandas + echo pytest -s -n 2 -m "not single" --cov=pandas --cov-report xml:/tmp/cov-multiple.xml --junitxml=/tmp/multiple.xml $TEST_ARGS pandas + pytest -s -n 2 -m "not single" --cov=pandas --cov-report xml:/tmp/cov-multiple.xml --junitxml=/tmp/multiple.xml $TEST_ARGS pandas else echo pytest -n 2 -m "not single" --junitxml=/tmp/multiple.xml $TEST_ARGS pandas pytest -n 2 -m "not single" --junitxml=/tmp/multiple.xml $TEST_ARGS pandas # TODO: doctest diff --git a/ci/script_single.sh b/ci/script_single.sh index 2d7962352842b..86e822cb57653 100755 --- a/ci/script_single.sh +++ b/ci/script_single.sh @@ -20,8 +20,8 @@ fi if [ "$BUILD_TEST" ]; then echo "We are not running pytest as this is simply a build test." elif [ "$COVERAGE" ]; then - echo pytest -s -m "single" --cov=pandas --cov-report xml:/tmp/cov.xml --junitxml=/tmp/single.xml $TEST_ARGS pandas - pytest -s -m "single" --cov=pandas --cov-report xml:/tmp/cov.xml --junitxml=/tmp/single.xml $TEST_ARGS pandas + echo pytest -s -m "single" --cov=pandas --cov-report xml:/tmp/cov-single.xml --junitxml=/tmp/single.xml $TEST_ARGS pandas + pytest -s -m "single" --cov=pandas --cov-report xml:/tmp/cov-single.xml --junitxml=/tmp/single.xml $TEST_ARGS pandas else echo pytest -m "single" --junitxml=/tmp/single.xml $TEST_ARGS pandas pytest -m "single" --junitxml=/tmp/single.xml $TEST_ARGS pandas # TODO: doctest From 59f977f366d1560b3600d7fb1fdb36ffd189c151 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Sat, 25 Mar 2017 11:57:16 -0400 Subject: [PATCH 267/933] MAINT: Enforce string type for where parameter Deprecated in 0.11.0. xref #12027. Author: gfyoung Closes #15798 from gfyoung/where-string-enforce and squashes the following commits: 06adda1 [gfyoung] MAINT: Enforce string type for where parameter --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/computation/pytables.py | 89 +++++++++++--------------------- pandas/tests/io/test_pytables.py | 53 ------------------- 3 files changed, 30 insertions(+), 113 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 37a70435ed6ff..dee1a5750eeeb 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -812,6 +812,7 @@ Removal of prior version deprecations/changes - The ``Categorical`` constructor has dropped the ``name`` parameter (:issue:`10632`) - The ``take_last`` parameter has been dropped from ``duplicated()``, ``drop_duplicates()``, ``nlargest()``, and ``nsmallest()`` methods (:issue:`10236`, :issue:`10792`, :issue:`10920`) - ``Series``, ``Index``, and ``DataFrame`` have dropped the ``sort`` and ``order`` methods (:issue:`10726`) +- Where clauses in ``pytables`` are only accepted as strings and expressions types and not other data-types (:issue:`12027`) - The ``LongPanel`` and ``WidePanel`` classes have been removed (:issue:`10892`) .. _whatsnew_0200.performance: diff --git a/pandas/computation/pytables.py b/pandas/computation/pytables.py index 7c09ca8d38773..2a5056963fe8d 100644 --- a/pandas/computation/pytables.py +++ b/pandas/computation/pytables.py @@ -1,9 +1,7 @@ """ manage PyTables query interface via Expressions """ import ast -import warnings from functools import partial -from datetime import datetime, timedelta import numpy as np import pandas as pd @@ -452,6 +450,32 @@ def _rewrite_membership_op(self, node, left, right): return self.visit(node.op), node.op, left, right +def _validate_where(w): + """ + Validate that the where statement is of the right type. + + The type may either be String, Expr, or list-like of Exprs. + + Parameters + ---------- + w : String term expression, Expr, or list-like of Exprs. + + Returns + ------- + where : The original where clause if the check was successful. + + Raises + ------ + TypeError : An invalid data type was passed in for w (e.g. dict). + """ + + if not (isinstance(w, (Expr, string_types)) or is_list_like(w)): + raise TypeError("where must be passed as a string, Expr, " + "or list-like of Exprs") + + return w + + class Expr(expr.Expr): """ hold a pytables like expression, comprised of possibly multiple 'terms' @@ -481,11 +505,9 @@ class Expr(expr.Expr): "major_axis>=20130101" """ - def __init__(self, where, op=None, value=None, queryables=None, - encoding=None, scope_level=0): + def __init__(self, where, queryables=None, encoding=None, scope_level=0): - # try to be back compat - where = self.parse_back_compat(where, op, value) + where = _validate_where(where) self.encoding = encoding self.condition = None @@ -505,7 +527,7 @@ def __init__(self, where, op=None, value=None, queryables=None, if isinstance(w, Expr): local_dict = w.env.scope else: - w = self.parse_back_compat(w) + w = _validate_where(w) where[idx] = w where = ' & ' .join(["(%s)" % w for w in where]) # noqa @@ -519,59 +541,6 @@ def __init__(self, where, op=None, value=None, queryables=None, encoding=encoding) self.terms = self.parse() - def parse_back_compat(self, w, op=None, value=None): - """ allow backward compatibility for passed arguments """ - - if isinstance(w, dict): - w, op, value = w.get('field'), w.get('op'), w.get('value') - if not isinstance(w, string_types): - raise TypeError( - "where must be passed as a string if op/value are passed") - warnings.warn("passing a dict to Expr is deprecated, " - "pass the where as a single string", - FutureWarning, stacklevel=10) - if isinstance(w, tuple): - if len(w) == 2: - w, value = w - op = '==' - elif len(w) == 3: - w, op, value = w - warnings.warn("passing a tuple into Expr is deprecated, " - "pass the where as a single string", - FutureWarning, stacklevel=10) - - if op is not None: - if not isinstance(w, string_types): - raise TypeError( - "where must be passed as a string if op/value are passed") - - if isinstance(op, Expr): - raise TypeError("invalid op passed, must be a string") - w = "{0}{1}".format(w, op) - if value is not None: - if isinstance(value, Expr): - raise TypeError("invalid value passed, must be a string") - - # stringify with quotes these values - def convert(v): - if isinstance(v, (datetime, np.datetime64, - timedelta, np.timedelta64)): - return "'{0}'".format(v) - return v - - if isinstance(value, (list, tuple)): - value = [convert(v) for v in value] - else: - value = convert(value) - - w = "{0}{1}".format(w, value) - - warnings.warn("passing multiple values to Expr is deprecated, " - "pass the where as a single string", - FutureWarning, stacklevel=10) - - return w - def __unicode__(self): if self.terms is not None: return pprint_thing(self.terms) diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index 324160d5b1ae6..2d62cb2d6944d 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -2585,59 +2585,6 @@ def test_term_compat(self): expected = wp.loc[:, :, ['A', 'B']] assert_panel_equal(result, expected) - def test_backwards_compat_without_term_object(self): - with ensure_clean_store(self.path) as store: - - wp = Panel(np.random.randn(2, 5, 4), items=['Item1', 'Item2'], - major_axis=date_range('1/1/2000', periods=5), - minor_axis=['A', 'B', 'C', 'D']) - store.append('wp', wp) - with catch_warnings(record=True): - result = store.select('wp', [('major_axis>20000102'), - ('minor_axis', '=', ['A', 'B'])]) - expected = wp.loc[:, - wp.major_axis > Timestamp('20000102'), - ['A', 'B']] - assert_panel_equal(result, expected) - - store.remove('wp', ('major_axis>20000103')) - result = store.select('wp') - expected = wp.loc[:, wp.major_axis <= Timestamp('20000103'), :] - assert_panel_equal(result, expected) - - with ensure_clean_store(self.path) as store: - - wp = Panel(np.random.randn(2, 5, 4), items=['Item1', 'Item2'], - major_axis=date_range('1/1/2000', periods=5), - minor_axis=['A', 'B', 'C', 'D']) - store.append('wp', wp) - - # stringified datetimes - with catch_warnings(record=True): - result = store.select('wp', - [('major_axis', - '>', - datetime.datetime(2000, 1, 2))]) - expected = wp.loc[:, wp.major_axis > Timestamp('20000102')] - assert_panel_equal(result, expected) - with catch_warnings(record=True): - result = store.select('wp', - [('major_axis', - '>', - datetime.datetime(2000, 1, 2, 0, 0))]) - expected = wp.loc[:, wp.major_axis > Timestamp('20000102')] - assert_panel_equal(result, expected) - with catch_warnings(record=True): - result = store.select('wp', - [('major_axis', - '=', - [datetime.datetime(2000, 1, 2, 0, 0), - datetime.datetime(2000, 1, 3, 0, 0)])] - ) - expected = wp.loc[:, [Timestamp('20000102'), - Timestamp('20000103')]] - assert_panel_equal(result, expected) - def test_same_name_scoping(self): with ensure_clean_store(self.path) as store: From 8c8dd8881107ba353c675ea65774ae409e6aea35 Mon Sep 17 00:00:00 2001 From: Joe Jevnik Date: Sat, 25 Mar 2017 12:07:09 -0400 Subject: [PATCH 268/933] PERF: add the 'name' attribute to dataframes that go through apply_frame_axis0 Previously, if you did `group.name` in the applied function, it would fail and fall back to the slower path because the attribute did not exist; `shape_before` was unused. Author: Joe Jevnik This patch had conflicts when merged, resolved by Committer: Jeff Reback Closes #15062 from llllllllll/add-name-in-apply-inference-call and squashes the following commits: 722a945 [Joe Jevnik] DOC: update whatsnew for groupby perf change 7e75635 [Joe Jevnik] DEV: add groupby asv benchmark 710528a [Joe Jevnik] BUG: add the 'name' attribute to dataframes that go through apply_frame_axis0 --- asv_bench/benchmarks/groupby.py | 32 ++++++++++++++++++++++------ doc/source/whatsnew/v0.20.0.txt | 3 +++ pandas/_libs/src/reduce.pyx | 2 +- pandas/tests/groupby/test_groupby.py | 16 ++++++++++++++ 4 files changed, 45 insertions(+), 8 deletions(-) diff --git a/asv_bench/benchmarks/groupby.py b/asv_bench/benchmarks/groupby.py index 59f55914ea4d3..b8d8e8b7912d7 100644 --- a/asv_bench/benchmarks/groupby.py +++ b/asv_bench/benchmarks/groupby.py @@ -108,16 +108,34 @@ def setup(self): self.N = 10000 self.labels = np.random.randint(0, 2000, size=self.N) self.labels2 = np.random.randint(0, 3, size=self.N) - self.df = DataFrame({'key': self.labels, 'key2': self.labels2, 'value1': randn(self.N), 'value2': (['foo', 'bar', 'baz', 'qux'] * (self.N / 4)), }) - - def f(self, g): + self.df = DataFrame({ + 'key': self.labels, + 'key2': self.labels2, + 'value1': np.random.randn(self.N), + 'value2': (['foo', 'bar', 'baz', 'qux'] * (self.N // 4)), + }) + + @staticmethod + def scalar_function(g): return 1 - def time_groupby_frame_apply(self): - self.df.groupby(['key', 'key2']).apply(self.f) + def time_groupby_frame_apply_scalar_function(self): + self.df.groupby(['key', 'key2']).apply(self.scalar_function) + + def time_groupby_frame_apply_scalar_function_overhead(self): + self.df.groupby('key').apply(self.scalar_function) + + @staticmethod + def df_copy_function(g): + # ensure that the group name is available (see GH #15062) + g.name + return g.copy() + + def time_groupby_frame_df_copy_function(self): + self.df.groupby(['key', 'key2']).apply(self.df_copy_function) - def time_groupby_frame_apply_overhead(self): - self.df.groupby('key').apply(self.f) + def time_groupby_frame_apply_df_copy_overhead(self): + self.df.groupby('key').apply(self.df_copy_function) #---------------------------------------------------------------------- diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index dee1a5750eeeb..64bfeb3307e17 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -831,6 +831,9 @@ Performance Improvements - Improved performance when using ``.unstack()`` (:issue:`15503`) - Improved performance of merge/join on ``category`` columns (:issue:`10409`) - Improved performance of ``drop_duplicates()`` on ``bool`` columns (:issue:`12963`) +- Improve performance of ``pd.core.groupby.GroupBy.apply`` when the applied + function used the ``.name`` attribute of the group DataFrame (:issue:`15062`). + .. _whatsnew_0200.bug_fixes: diff --git a/pandas/_libs/src/reduce.pyx b/pandas/_libs/src/reduce.pyx index 1cd3e53494a72..2bba07256305a 100644 --- a/pandas/_libs/src/reduce.pyx +++ b/pandas/_libs/src/reduce.pyx @@ -497,7 +497,7 @@ def apply_frame_axis0(object frame, object f, object names, # Need to infer if our low-level mucking is going to cause a segfault if n > 0: chunk = frame.iloc[starts[0]:ends[0]] - shape_before = chunk.shape + object.__setattr__(chunk, 'name', names[0]) try: result = f(chunk) if result is chunk: diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index a355dca3029c7..9f5a7f404e2be 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -3244,6 +3244,22 @@ def _check_all(grouped): _check_all(self.df.groupby('A')) _check_all(self.df.groupby(['A', 'B'])) + def test_group_name_available_in_inference_pass(self): + # gh-15062 + df = pd.DataFrame({'a': [0, 0, 1, 1, 2, 2], 'b': np.arange(6)}) + + names = [] + + def f(group): + names.append(group.name) + return group.copy() + + df.groupby('a', sort=False, group_keys=False).apply(f) + # we expect 2 zeros because we call ``f`` once to see if a faster route + # can be used. + expected_names = [0, 0, 1, 2] + tm.assert_equal(names, expected_names) + def test_no_dummy_key_names(self): # GH #1291 From 80f30b44e3c79f26b20fada91995c1874c2e5cdf Mon Sep 17 00:00:00 2001 From: Kevin Sheppard Date: Sat, 25 Mar 2017 14:04:46 -0400 Subject: [PATCH 269/933] DOC: Add details to DataFrame groupby transform closes #13543 Author: Kevin Sheppard Closes #14388 from bashtage/groupby-transform-doc-string and squashes the following commits: ef1ff13 [Kevin Sheppard] DOC: Add details to DataFrame groupby transform --- doc/source/groupby.rst | 38 +++++++++++++++++++++++++++++++++----- pandas/core/groupby.py | 15 +++++++++++++++ 2 files changed, 48 insertions(+), 5 deletions(-) diff --git a/doc/source/groupby.rst b/doc/source/groupby.rst index 8484ccd69a983..cbe3588104439 100644 --- a/doc/source/groupby.rst +++ b/doc/source/groupby.rst @@ -580,9 +580,21 @@ Transformation -------------- The ``transform`` method returns an object that is indexed the same (same size) -as the one being grouped. Thus, the passed transform function should return a -result that is the same size as the group chunk. For example, suppose we wished -to standardize the data within each group: +as the one being grouped. The transform function must: + +* Return a result that is either the same size as the group chunk or + broadcastable to the size of the group chunk (e.g., a scalar, + ``grouped.transform(lambda x: x.iloc[-1])``). +* Operate column-by-column on the group chunk. The transform is applied to + the first group chunk using chunk.apply. +* Not perform in-place operations on the group chunk. Group chunks should + be treated as immutable, and changes to a group chunk may produce unexpected + results. For example, when using ``fillna``, ``inplace`` must be ``False`` + (``grouped.transform(lambda x: x.fillna(inplace=False))``). +* (Optionally) operates on the entire group chunk. If this is supported, a + fast path is used starting from the *second* chunk. + +For example, suppose we wished to standardize the data within each group: .. ipython:: python @@ -620,6 +632,21 @@ We can also visually compare the original and transformed data sets. @savefig groupby_transform_plot.png compare.plot() +Transformation functions that have lower dimension outputs are broadcast to +match the shape of the input array. + +.. ipython:: python + + data_range = lambda x: x.max() - x.min() + ts.groupby(key).transform(data_range) + +Alternatively the built-in methods can be could be used to produce the same +outputs + +.. ipython:: python + + ts.groupby(key).transform('max') - ts.groupby(key).transform('min') + Another common data transform is to replace missing data with the group mean. .. ipython:: python @@ -664,8 +691,9 @@ and that the transformed data contains no NAs. .. note:: - Some functions when applied to a groupby object will automatically transform the input, returning - an object of the same shape as the original. Passing ``as_index=False`` will not affect these transformation methods. + Some functions when applied to a groupby object will automatically transform + the input, returning an object of the same shape as the original. Passing + ``as_index=False`` will not affect these transformation methods. For example: ``fillna, ffill, bfill, shift``. diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 727af8b8cd3eb..64e116df88b88 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -3649,10 +3649,25 @@ def transform(self, func, *args, **kwargs): Each subframe is endowed the attribute 'name' in case you need to know which group you are working on. + The current implementation imposes three requirements on f: + + * f must return a value that either has the same shape as the input + subframe or can be broadcast to the shape of the input subframe. + For example, f returns a scalar it will be broadcast to have the + same shape as the input subframe. + * f must support application column-by-column in the subframe. If f + also supports application to the entire subframe, then a fast path + is used starting from the second chunk. + * f must not mutate subframes. Mutation is not supported and may + produce unexpected results. + Examples -------- >>> grouped = df.groupby(lambda x: mapping[x]) + # Same shape >>> grouped.transform(lambda x: (x - x.mean()) / x.std()) + # Broadcastable + >>> grouped.transform(lambda x: x.max() - x.min()) """ # optimized transforms From 83e24ca97b71e72a54ab360a44dc7a00f17ea429 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sat, 25 Mar 2017 14:15:51 -0400 Subject: [PATCH 270/933] DOC: template groupby.transform doc-string --- pandas/core/groupby.py | 109 +++++++++++++++++++++-------------------- 1 file changed, 55 insertions(+), 54 deletions(-) diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 64e116df88b88..dded55114ab6f 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -72,6 +72,55 @@ pandas.Panel.%(name)s """ +_transform_template = """ +Call function producing a like-indexed %(klass)s on each group and +return a %(klass)s having the same indexes as the original object +filled with the transformed values + +Parameters +---------- +f : function + Function to apply to each group + +Notes +----- +Each group is endowed the attribute 'name' in case you need to know +which group you are working on. + +The current implementation imposes three requirements on f: + +* f must return a value that either has the same shape as the input + subframe or can be broadcast to the shape of the input subframe. + For example, f returns a scalar it will be broadcast to have the + same shape as the input subframe. +* if this is a DataFrame, f must support application column-by-column + in the subframe. If f also supports application to the entire subframe, + then a fast path is used starting from the second chunk. +* f must not mutate groups. Mutation is not supported and may + produce unexpected results. + +Returns +------- +%(klass)s + +See also +-------- +aggregate, transform + +Examples +-------- +>>> df = pd.DataFrame(np.repeat(np.arange(10), 3).reshape(-1, 3), + columns=list('ABC')) +>>> grouped = df.groupby(df.index // 3) + +# Same shape +>>> grouped.%(selected)stransform(lambda x: (x - x.mean()) / x.std()) + +# Broadcastable +>>> grouped.%(selected)stransform(lambda x: x.max() - x.min()) + +""" + # special case to prevent duplicate plots when catching exceptions when # forwarding methods from NDFrames _plotting_methods = frozenset(['plot', 'boxplot', 'hist']) @@ -2860,25 +2909,9 @@ def _aggregate_named(self, func, *args, **kwargs): return result + @Substitution(klass='Series', selected='A.') + @Appender(_transform_template) def transform(self, func, *args, **kwargs): - """ - Call function producing a like-indexed Series on each group and return - a Series with the transformed values - - Parameters - ---------- - func : function - To apply to each group. Should return a Series with the same index - - Examples - -------- - >>> grouped.transform(lambda x: (x - x.mean()) / x.std()) - - Returns - ------- - transformed : Series - """ - func = self._is_cython_func(func) or func # if string function @@ -3633,42 +3666,9 @@ def _transform_general(self, func, *args, **kwargs): axis=self.axis, verify_integrity=False) return self._set_result_index_ordered(concatenated) + @Substitution(klass='DataFrame', selected='') + @Appender(_transform_template) def transform(self, func, *args, **kwargs): - """ - Call function producing a like-indexed DataFrame on each group and - return a DataFrame having the same indexes as the original object - filled with the transformed values - - Parameters - ---------- - f : function - Function to apply to each subframe - - Notes - ----- - Each subframe is endowed the attribute 'name' in case you need to know - which group you are working on. - - The current implementation imposes three requirements on f: - - * f must return a value that either has the same shape as the input - subframe or can be broadcast to the shape of the input subframe. - For example, f returns a scalar it will be broadcast to have the - same shape as the input subframe. - * f must support application column-by-column in the subframe. If f - also supports application to the entire subframe, then a fast path - is used starting from the second chunk. - * f must not mutate subframes. Mutation is not supported and may - produce unexpected results. - - Examples - -------- - >>> grouped = df.groupby(lambda x: mapping[x]) - # Same shape - >>> grouped.transform(lambda x: (x - x.mean()) / x.std()) - # Broadcastable - >>> grouped.transform(lambda x: x.max() - x.min()) - """ # optimized transforms func = self._is_cython_func(func) or func @@ -3784,7 +3784,8 @@ def filter(self, func, dropna=True, *args, **kwargs): # noqa Examples -------- - >>> grouped = df.groupby(lambda x: mapping[x]) + >>> df = pd.DataFrame(np.random.randn(10, 3), columns=list('ABC')) + >>> grouped = df.groupby(df.index % 3) >>> grouped.filter(lambda x: x['A'].sum() + x['B'].sum() > 0) """ From 7a42240fd38404092049ea5006561b69fa5b0d88 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sat, 25 Mar 2017 15:18:00 -0400 Subject: [PATCH 271/933] DOC: whatsnew fixes --- doc/source/whatsnew/v0.20.0.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 64bfeb3307e17..c5bf943cebca7 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -785,6 +785,7 @@ Deprecations - ``Series/DataFrame/Panel.consolidate()`` been deprecated as a public method. (:issue:`15483`) - The ``as_indexer`` keyword of ``Series.str.match()`` has been deprecated (ignored keyword) (:issue:`15257`). - The following top-level pandas functions have been deprecated and will be removed in a future version (:issue:`13790`) + * ``pd.pnow()``, replaced by ``Period.now()`` * ``pd.Term``, is removed, as it is not applicable to user code. Instead use in-line string expressions in the where clause when searching in HDFStore * ``pd.Expr``, is removed, as it is not applicable to user code. From c577c19d22ac8ec7ea05630576c379e3108248af Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sat, 25 Mar 2017 15:41:58 -0400 Subject: [PATCH 272/933] CI: only print skipped if not on doc-build --- .travis.yml | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index d78e4dab31fbe..ab83a37f25905 100644 --- a/.travis.yml +++ b/.travis.yml @@ -196,6 +196,10 @@ after_script: - echo "after_script start" - ci/install_test.sh - source activate pandas && python -c "import pandas; pandas.show_versions();" - - ci/print_skipped.py /tmp/single.xml - - ci/print_skipped.py /tmp/multiple.xml + - if [ "$DOC_BUILD"]; then + ci/print_skipped.py /tmp/single.xml; + fi + - if [ "$DOC_BUILD"]; then + ci/print_skipped.py /tmp/multiple.xml; + fi - echo "after_script done" From 156bfd2ed5db2837fe740ec2934a782f56e99864 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sat, 25 Mar 2017 18:42:14 -0400 Subject: [PATCH 273/933] CI: typo in .travis.yml for print_skipped CI: linted .travis.yml CI: removed CLIPBOARD env variables as not used closes #15803 --- .travis.yml | 95 +++++++++-------------------------------------------- 1 file changed, 16 insertions(+), 79 deletions(-) diff --git a/.travis.yml b/.travis.yml index ab83a37f25905..bb3388734229e 100644 --- a/.travis.yml +++ b/.travis.yml @@ -27,63 +27,37 @@ matrix: - language: objective-c os: osx compiler: clang - osx_image: xcode6.4 cache: ccache: true directories: - $HOME/.cache # cython cache - $HOME/.ccache # compiler cache env: - - PYTHON_VERSION=3.5 - - JOB_NAME: "35_osx" - - TEST_ARGS="--skip-slow --skip-network" - - JOB_TAG=_OSX - - TRAVIS_PYTHON_VERSION=3.5 - - USE_CACHE=true + - PYTHON_VERSION=3.5 JOB_NAME="35_osx" TEST_ARGS="--skip-slow --skip-network" JOB_TAG="_OSX" TRAVIS_PYTHON_VERSION=3.5 USE_CACHE=true - python: 2.7 env: - - PYTHON_VERSION=2.7 - - JOB_NAME: "27_slow_nnet_LOCALE" - - TEST_ARGS="--only-slow --skip-network" - - LOCALE_OVERRIDE="zh_CN.UTF-8" - - JOB_TAG=_LOCALE - - USE_CACHE=true + - PYTHON_VERSION=2.7 JOB_NAME="27_slow_nnet_LOCALE" TEST_ARGS="--only-slow --skip-network" LOCALE_OVERRIDE="zh_CN.UTF-8" JOB_TAG="_LOCALE" USE_CACHE=true addons: apt: packages: - language-pack-zh-hans - python: 2.7 env: - - PYTHON_VERSION=2.7 - - JOB_NAME: "27_nslow" - - TEST_ARGS="--skip-slow" - - CLIPBOARD_GUI=gtk2 - - LINT=true - - USE_CACHE=true + - PYTHON_VERSION=2.7 JOB_NAME="27_nslow" TEST_ARGS="--skip-slow" LINT=true USE_CACHE=true addons: apt: packages: - python-gtk2 - python: 3.5 env: - - PYTHON_VERSION=3.5 - - JOB_NAME: "35_nslow" - - TEST_ARGS="--skip-slow --skip-network" - - CLIPBOARD=xsel - - COVERAGE=true - - USE_CACHE=true + - PYTHON_VERSION=3.5 JOB_NAME="35_nslow" TEST_ARGS="--skip-slow --skip-network" COVERAGE=true USE_CACHE=true addons: apt: packages: - xsel - python: 3.6 env: - - PYTHON_VERSION=3.6 - - JOB_NAME: "36" - - TEST_ARGS="--skip-slow --skip-network" - - PANDAS_TESTING_MODE="deprecate" - - CONDA_FORGE=true - - USE_CACHE=true + - PYTHON_VERSION=3.6 JOB_NAME="36" TEST_ARGS="--skip-slow --skip-network" PANDAS_TESTING_MODE="deprecate" CONDA_FORGE=true USE_CACHE=true addons: apt: packages: @@ -92,68 +66,32 @@ matrix: # In allow_failures - python: 2.7 env: - - PYTHON_VERSION=2.7 - - JOB_NAME: "27_slow" - - JOB_TAG=_SLOW - - TEST_ARGS="--only-slow --skip-network" - - USE_CACHE=true + - PYTHON_VERSION=2.7 JOB_NAME="27_slow" JOB_TAG="_SLOW" TEST_ARGS="--only-slow --skip-network" USE_CACHE=true # In allow_failures - python: 2.7 env: - - PYTHON_VERSION=2.7 - - JOB_NAME: "27_build_test" - - JOB_TAG=_BUILD_TEST - - TEST_ARGS="--skip-slow" - - BUILD_TEST=true - - USE_CACHE=true + - PYTHON_VERSION=2.7 JOB_NAME="27_build_test" JOB_TAG="_BUILD_TEST" TEST_ARGS="--skip-slow" BUILD_TEST=true USE_CACHE=true # In allow_failures - python: 3.5 env: - - PYTHON_VERSION=3.5 - - JOB_NAME: "35_numpy_dev" - - JOB_TAG=_NUMPY_DEV - - TEST_ARGS="--skip-slow --skip-network" - - PANDAS_TESTING_MODE="deprecate" - - USE_CACHE=true + - PYTHON_VERSION=3.5 JOB_NAME="35_numpy_dev" JOB_TAG="_NUMPY_DEV" TEST_ARGS="--skip-slow --skip-network" PANDAS_TESTING_MODE="deprecate" USE_CACHE=true # In allow_failures - python: 3.5 env: - - PYTHON_VERSION=3.5 - - JOB_NAME: "doc_build" - - DOC_BUILD=true - - JOB_TAG=_DOC_BUILD - - USE_CACHE=true + - PYTHON_VERSION=3.5 JOB_NAME="doc_build" DOC_BUILD=true JOB_TAG="_DOC_BUILD" USE_CACHE=true allow_failures: - python: 2.7 env: - - PYTHON_VERSION=2.7 - - JOB_NAME: "27_slow" - - JOB_TAG=_SLOW - - TEST_ARGS="--only-slow --skip-network" - - USE_CACHE=true + - PYTHON_VERSION=2.7 JOB_NAME="27_slow" JOB_TAG="_SLOW" TEST_ARGS="--only-slow --skip-network" USE_CACHE=true - python: 2.7 env: - - PYTHON_VERSION=2.7 - - JOB_NAME: "27_build_test" - - JOB_TAG=_BUILD_TEST - - TEST_ARGS="--skip-slow" - - BUILD_TEST=true - - USE_CACHE=true + - PYTHON_VERSION=2.7 JOB_NAME="27_build_test" JOB_TAG="_BUILD_TEST" TEST_ARGS="--skip-slow" BUILD_TEST=true USE_CACHE=true - python: 3.5 env: - - PYTHON_VERSION=3.5 - - JOB_NAME: "35_numpy_dev" - - JOB_TAG=_NUMPY_DEV - - TEST_ARGS="--skip-slow --skip-network" - - PANDAS_TESTING_MODE="deprecate" - - USE_CACHE=true + - PYTHON_VERSION=3.5 JOB_NAME="35_numpy_dev" JOB_TAG="_NUMPY_DEV" TEST_ARGS="--skip-slow --skip-network" PANDAS_TESTING_MODE="deprecate" USE_CACHE=true - python: 3.5 env: - - PYTHON_VERSION=3.5 - - JOB_NAME: "doc_build" - - DOC_BUILD=true - - JOB_TAG=_DOC_BUILD - - USE_CACHE=true + - PYTHON_VERSION=3.5 JOB_NAME="doc_build" DOC_BUILD=true JOB_TAG="_DOC_BUILD" USE_CACHE=true before_install: - echo "before_install" @@ -165,7 +103,7 @@ before_install: - git --version - git tag - ci/before_install_travis.sh - - export DISPLAY=:99.0 + - export DISPLAY=":99.0" install: - echo "install start" @@ -194,12 +132,11 @@ after_success: after_script: - echo "after_script start" - - ci/install_test.sh - source activate pandas && python -c "import pandas; pandas.show_versions();" - - if [ "$DOC_BUILD"]; then + - if [ -e /tmp/single.xml ]; then ci/print_skipped.py /tmp/single.xml; fi - - if [ "$DOC_BUILD"]; then + - if [ -e /tmp/multiple.xml ]; then ci/print_skipped.py /tmp/multiple.xml; fi - echo "after_script done" From 22f9d0ddbefccbf9a3e4000ad17dd12db9bddba9 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sat, 25 Mar 2017 22:16:31 -0400 Subject: [PATCH 274/933] Revert "MAINT: Remove Long and WidePanel (#15748)" (#15802) This reverts commit bff47f2302a0be4dcbf7e5055e525d5652e08fb5. --- asv_bench/benchmarks/pandas_vb_common.py | 5 ++++ bench/bench_join_panel.py | 4 +-- doc/source/whatsnew/v0.20.0.txt | 1 - pandas/core/api.py | 2 +- pandas/core/panel.py | 23 ++++++++++++++++ pandas/tests/api/test_api.py | 3 +- pandas/tests/io/test_pytables.py | 3 ++ pandas/tests/test_panel.py | 35 ++++++++++++++++-------- vb_suite/pandas_vb_common.py | 5 ++++ 9 files changed, 64 insertions(+), 17 deletions(-) diff --git a/asv_bench/benchmarks/pandas_vb_common.py b/asv_bench/benchmarks/pandas_vb_common.py index a7e530e7f5ef1..56ccc94c414fb 100644 --- a/asv_bench/benchmarks/pandas_vb_common.py +++ b/asv_bench/benchmarks/pandas_vb_common.py @@ -25,6 +25,11 @@ except: pass +try: + Panel = Panel +except Exception: + Panel = WidePanel + # didn't add to namespace until later try: from pandas.core.index import MultiIndex diff --git a/bench/bench_join_panel.py b/bench/bench_join_panel.py index 113b317dd8ff8..f3c3f8ba15f70 100644 --- a/bench/bench_join_panel.py +++ b/bench/bench_join_panel.py @@ -45,8 +45,8 @@ def reindex_on_axis(panels, axis, axis_reindex): return p -# Does the job but inefficient. It is better to handle -# this like you read a table in pytables. +# does the job but inefficient (better to handle like you read a table in +# pytables...e.g create a LongPanel then convert to Wide) def create_panels_join(cls, panels): """ given an array of panels's, create a single panel """ panels = [a for a in panels if a is not None] diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index c5bf943cebca7..ca6541256f1d2 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -814,7 +814,6 @@ Removal of prior version deprecations/changes - The ``take_last`` parameter has been dropped from ``duplicated()``, ``drop_duplicates()``, ``nlargest()``, and ``nsmallest()`` methods (:issue:`10236`, :issue:`10792`, :issue:`10920`) - ``Series``, ``Index``, and ``DataFrame`` have dropped the ``sort`` and ``order`` methods (:issue:`10726`) - Where clauses in ``pytables`` are only accepted as strings and expressions types and not other data-types (:issue:`12027`) -- The ``LongPanel`` and ``WidePanel`` classes have been removed (:issue:`10892`) .. _whatsnew_0200.performance: diff --git a/pandas/core/api.py b/pandas/core/api.py index 5018de39ca907..65253dedb8b53 100644 --- a/pandas/core/api.py +++ b/pandas/core/api.py @@ -15,7 +15,7 @@ from pandas.core.series import Series from pandas.core.frame import DataFrame -from pandas.core.panel import Panel +from pandas.core.panel import Panel, WidePanel from pandas.core.panel4d import Panel4D from pandas.core.reshape import (pivot_simple as pivot, get_dummies, lreshape, wide_to_long) diff --git a/pandas/core/panel.py b/pandas/core/panel.py index 50ddc24ac9656..5ab3c44b175fe 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -4,6 +4,8 @@ # pylint: disable=E1103,W0231,W0212,W0621 from __future__ import division +import warnings + import numpy as np from pandas.types.cast import (infer_dtype_from_scalar, @@ -1554,3 +1556,24 @@ def f(self, other, axis=0): ops.add_special_arithmetic_methods(Panel, **ops.panel_special_funcs) Panel._add_aggregate_operations() Panel._add_numeric_operations() + + +# legacy +class WidePanel(Panel): + + def __init__(self, *args, **kwargs): + # deprecation, #10892 + warnings.warn("WidePanel is deprecated. Please use Panel", + FutureWarning, stacklevel=2) + + super(WidePanel, self).__init__(*args, **kwargs) + + +class LongPanel(DataFrame): + + def __init__(self, *args, **kwargs): + # deprecation, #10892 + warnings.warn("LongPanel is deprecated. Please use DataFrame", + FutureWarning, stacklevel=2) + + super(LongPanel, self).__init__(*args, **kwargs) diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index 2c7dcf2501f32..73222c246fc70 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -54,7 +54,8 @@ class TestPDApi(Base, tm.TestCase): 'TimedeltaIndex', 'Timestamp'] # these are already deprecated; awaiting removal - deprecated_classes = ['Panel4D', 'SparseList', 'Expr', 'Term'] + deprecated_classes = ['WidePanel', 'Panel4D', + 'SparseList', 'Expr', 'Term'] # these should be deprecated in the future deprecated_classes_in_future = ['Panel'] diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index 2d62cb2d6944d..82a98f5d08488 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -2964,6 +2964,9 @@ def _check(left, right): # empty # self._check_roundtrip(wp.to_frame()[:0], _check) + def test_longpanel(self): + pass + def test_overwrite_node(self): with ensure_clean_store(self.path) as store: diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index 13e16f3b90730..ab0322abbcf06 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -178,6 +178,10 @@ def wrapper(x): class SafeForSparse(object): + @classmethod + def assert_panel_equal(cls, x, y): + assert_panel_equal(x, y) + def test_get_axis(self): assert (self.panel._get_axis(0) is self.panel.items) assert (self.panel._get_axis(1) is self.panel.major_axis) @@ -342,10 +346,10 @@ def check_op(op, name): def test_combinePanel(self): result = self.panel.add(self.panel) - assert_panel_equal(result, self.panel * 2) + self.assert_panel_equal(result, self.panel * 2) def test_neg(self): - assert_panel_equal(-self.panel, self.panel * -1) + self.assert_panel_equal(-self.panel, self.panel * -1) # issue 7692 def test_raise_when_not_implemented(self): @@ -365,22 +369,22 @@ def test_select(self): # select items result = p.select(lambda x: x in ('ItemA', 'ItemC'), axis='items') expected = p.reindex(items=['ItemA', 'ItemC']) - assert_panel_equal(result, expected) + self.assert_panel_equal(result, expected) # select major_axis result = p.select(lambda x: x >= datetime(2000, 1, 15), axis='major') new_major = p.major_axis[p.major_axis >= datetime(2000, 1, 15)] expected = p.reindex(major=new_major) - assert_panel_equal(result, expected) + self.assert_panel_equal(result, expected) # select minor_axis result = p.select(lambda x: x in ('D', 'A'), axis=2) expected = p.reindex(minor=['A', 'D']) - assert_panel_equal(result, expected) + self.assert_panel_equal(result, expected) # corner case, empty thing result = p.select(lambda x: x in ('foo', ), axis='items') - assert_panel_equal(result, p.reindex(items=[])) + self.assert_panel_equal(result, p.reindex(items=[])) def test_get_value(self): for item in self.panel.items: @@ -395,8 +399,8 @@ def test_abs(self): result = self.panel.abs() result2 = abs(self.panel) expected = np.abs(self.panel) - assert_panel_equal(result, expected) - assert_panel_equal(result2, expected) + self.assert_panel_equal(result, expected) + self.assert_panel_equal(result2, expected) df = self.panel['ItemA'] result = df.abs() @@ -863,6 +867,10 @@ def test_set_value(self): class TestPanel(tm.TestCase, PanelTests, CheckIndexing, SafeForLongAndSparse, SafeForSparse): + @classmethod + def assert_panel_equal(cls, x, y): + assert_panel_equal(x, y) + def setUp(self): self.panel = _panel.copy() self.panel.major_axis.name = None @@ -1959,7 +1967,7 @@ def test_round(self): major_axis=pd.date_range('1/1/2000', periods=5), minor_axis=['A', 'B']) result = p.round() - assert_panel_equal(expected, result) + self.assert_panel_equal(expected, result) def test_numpy_round(self): values = [[[-3.2, 2.2], [0, -4.8213], [3.123, 123.12], @@ -1975,7 +1983,7 @@ def test_numpy_round(self): major_axis=pd.date_range('1/1/2000', periods=5), minor_axis=['A', 'B']) result = np.round(p) - assert_panel_equal(expected, result) + self.assert_panel_equal(expected, result) msg = "the 'out' parameter is not supported" tm.assertRaisesRegexp(ValueError, msg, np.round, p, out=p) @@ -2262,12 +2270,15 @@ def test_all_any_unhandled(self): self.assertRaises(NotImplementedError, self.panel.any, bool_only=True) -class TestPanelFrame(tm.TestCase): +class TestLongPanel(tm.TestCase): """ - Check that conversions to and from Panel to DataFrame work. + LongPanel no longer exists, but... """ def setUp(self): + import warnings + warnings.filterwarnings(action='ignore', category=FutureWarning) + panel = tm.makePanel() tm.add_nans(panel) diff --git a/vb_suite/pandas_vb_common.py b/vb_suite/pandas_vb_common.py index 41e43d6ab10e5..bd2e8a1c1d504 100644 --- a/vb_suite/pandas_vb_common.py +++ b/vb_suite/pandas_vb_common.py @@ -18,6 +18,11 @@ except: import pandas._libs.lib as lib +try: + Panel = WidePanel +except Exception: + pass + # didn't add to namespace until later try: from pandas.core.index import MultiIndex From d2f32a0362bbb90c4ab32a454962912901e32080 Mon Sep 17 00:00:00 2001 From: Carlos Souza Date: Sat, 25 Mar 2017 22:19:58 -0400 Subject: [PATCH 275/933] BUG: Series.asof fails for all NaN Series (GH15713) closes bug #15713 Added the test if the series is all nans Added the code that check if that's the case: if yes, return the expected output Author: Carlos Souza Closes #15758 from ucals/bug-fix-15713 and squashes the following commits: 0765108 [Carlos Souza] First simplification, code-block in the same place bb63964 [Carlos Souza] Propagating Series name af9a29b [Carlos Souza] Setting name of asof result when scalar input and all nan b8f078a [Carlos Souza] Small code standard change 7448b96 [Carlos Souza] Fixing scalar input a080b9b [Carlos Souza] Making scalar input return in a Series 04b7306 [Carlos Souza] Removing .values and formating code PEP8 3f9c7fd [Carlos Souza] Minor comments 70c958f [Carlos Souza] Added tests for non-default indexes, scalar and multiple inputs, and results preserve columns 6b745af [Carlos Souza] Adding DataFrame tests & support, and optimizing the code 89fb6cf [Carlos Souza] BUG #15713 fixing failing tests 17d1d77 [Carlos Souza] BUG #15713 Series.asof return nan when series is all nans! 4e26ab8 [Carlos Souza] BUG #15713 Series.asof return nan when series is all nans. c78d687 [Carlos Souza] BUG #15713 Series.asof return nan when series is all nans 676a4e5 [Carlos Souza] Test --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/core/generic.py | 10 +++++++ pandas/tests/frame/test_asof.py | 47 ++++++++++++++++++++++++-------- pandas/tests/series/test_asof.py | 26 ++++++++++++++++++ 4 files changed, 73 insertions(+), 11 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index ca6541256f1d2..f96fc41c73f15 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -868,6 +868,7 @@ Bug Fixes - Bug in ``pd.cut()`` with a single bin on an all 0s array (:issue:`15428`) - Bug in ``pd.qcut()`` with a single quantile and an array with identical values (:issue:`15431`) - Compat with SciPy 0.19.0 for testing on ``.interpolate()`` (:issue:`15662`) +- Bug in ``Series.asof`` which raised if the series contained all ``np.nan`` (:issue:`15713`) - Compat for 32-bit platforms for ``.qcut/cut``; bins will now be ``int64`` dtype (:issue:`14866`) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 134840728d931..ad56ea44a0dc6 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3972,6 +3972,16 @@ def asof(self, where, subset=None): where = Index(where) if is_list else Index([where]) nulls = self.isnull() if is_series else self[subset].isnull().any(1) + if nulls.all(): + if is_series: + return self._constructor(np.nan, index=where, name=self.name) + elif is_list: + from pandas import DataFrame + return DataFrame(np.nan, index=where, columns=self.columns) + else: + from pandas import Series + return Series(np.nan, index=self.columns, name=where[0]) + locs = self.index.asof_locs(where, ~(nulls.values)) # mask the missing diff --git a/pandas/tests/frame/test_asof.py b/pandas/tests/frame/test_asof.py index 8bb26d3d7474c..dd03f8f7cb7a9 100644 --- a/pandas/tests/frame/test_asof.py +++ b/pandas/tests/frame/test_asof.py @@ -4,22 +4,19 @@ from pandas import (DataFrame, date_range, Timestamp, Series, to_datetime) -from pandas.util.testing import assert_frame_equal, assert_series_equal import pandas.util.testing as tm from .common import TestData class TestFrameAsof(TestData, tm.TestCase): - def setUp(self): self.N = N = 50 - rng = date_range('1/1/1990', periods=N, freq='53s') + self.rng = date_range('1/1/1990', periods=N, freq='53s') self.df = DataFrame({'A': np.arange(N), 'B': np.arange(N)}, - index=rng) + index=self.rng) def test_basic(self): - df = self.df.copy() df.loc[15:30, 'A'] = np.nan dates = date_range('1/1/1990', periods=self.N * 3, @@ -39,7 +36,6 @@ def test_basic(self): self.assertTrue((rs == 14).all(1).all()) def test_subset(self): - N = 10 rng = date_range('1/1/1990', periods=N, freq='53s') df = DataFrame({'A': np.arange(N), 'B': np.arange(N)}, @@ -51,19 +47,19 @@ def test_subset(self): # with a subset of A should be the same result = df.asof(dates, subset='A') expected = df.asof(dates) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) # same with A/B result = df.asof(dates, subset=['A', 'B']) expected = df.asof(dates) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) # B gives self.df.asof result = df.asof(dates, subset='B') expected = df.resample('25s', closed='right').ffill().reindex(dates) expected.iloc[20:] = 9 - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_missing(self): # GH 15118 @@ -75,9 +71,38 @@ def test_missing(self): result = df.asof('1989-12-31') expected = Series(index=['A', 'B'], name=Timestamp('1989-12-31')) - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) result = df.asof(to_datetime(['1989-12-31'])) expected = DataFrame(index=to_datetime(['1989-12-31']), columns=['A', 'B'], dtype='float64') - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) + + def test_all_nans(self): + # GH 15713 + # DataFrame is all nans + result = DataFrame([np.nan]).asof([0]) + expected = DataFrame([np.nan]) + tm.assert_frame_equal(result, expected) + + # testing non-default indexes, multiple inputs + dates = date_range('1/1/1990', periods=self.N * 3, freq='25s') + result = DataFrame(np.nan, index=self.rng, columns=['A']).asof(dates) + expected = DataFrame(np.nan, index=dates, columns=['A']) + tm.assert_frame_equal(result, expected) + + # testing multiple columns + dates = date_range('1/1/1990', periods=self.N * 3, freq='25s') + result = DataFrame(np.nan, index=self.rng, + columns=['A', 'B', 'C']).asof(dates) + expected = DataFrame(np.nan, index=dates, columns=['A', 'B', 'C']) + tm.assert_frame_equal(result, expected) + + # testing scalar input + result = DataFrame(np.nan, index=[1, 2], columns=['A', 'B']).asof([3]) + expected = DataFrame(np.nan, index=[3], columns=['A', 'B']) + tm.assert_frame_equal(result, expected) + + result = DataFrame(np.nan, index=[1, 2], columns=['A', 'B']).asof(3) + expected = Series(np.nan, index=['A', 'B'], name=3) + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/series/test_asof.py b/pandas/tests/series/test_asof.py index d2fd8858e7647..82914a99e2f6c 100644 --- a/pandas/tests/series/test_asof.py +++ b/pandas/tests/series/test_asof.py @@ -148,3 +148,29 @@ def test_errors(self): s = Series(np.random.randn(N), index=rng) with self.assertRaises(ValueError): s.asof(s.index[0], subset='foo') + + def test_all_nans(self): + # GH 15713 + # series is all nans + result = Series([np.nan]).asof([0]) + expected = Series([np.nan]) + tm.assert_series_equal(result, expected) + + # testing non-default indexes + N = 50 + rng = date_range('1/1/1990', periods=N, freq='53s') + + dates = date_range('1/1/1990', periods=N * 3, freq='25s') + result = Series(np.nan, index=rng).asof(dates) + expected = Series(np.nan, index=dates) + tm.assert_series_equal(result, expected) + + # testing scalar input + date = date_range('1/1/1990', periods=N * 3, freq='25s')[0] + result = Series(np.nan, index=rng).asof(date) + assert isnull(result) + + # test name is propagated + result = Series(np.nan, index=[1, 2, 3, 4], name='test').asof([4, 5]) + expected = Series(np.nan, index=[4, 5], name='test') + tm.assert_series_equal(result, expected) From 10589887016f4c9280fdeec01f9fcdbe9cea4dfa Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sun, 26 Mar 2017 09:41:19 -0400 Subject: [PATCH 276/933] CI: simplify ci setup a bit closes #15807 --- .travis.yml | 37 +++++++++---------- appveyor.yml | 6 +-- ci/check_cache.sh | 4 ++ ci/install_circle.sh | 19 ++++------ ci/install_travis.sh | 37 ++++++------------- ci/prep_cython_cache.sh | 8 ++-- ci/requirements-2.7.build | 2 + ci/requirements-2.7_BUILD_TEST.build | 2 + ci/requirements-2.7_COMPAT.build | 1 + ci/requirements-2.7_LOCALE.build | 1 + ci/requirements-2.7_SLOW.build | 1 + ...ts-2.7-64.run => requirements-2.7_WIN.run} | 0 ci/requirements-3.4-64.run | 12 ------ ci/requirements-3.4.build | 1 + ci/requirements-3.4_SLOW.build | 2 + ci/requirements-3.5.build | 2 + ci/requirements-3.5_ASCII.build | 2 + ci/requirements-3.5_DOC_BUILD.build | 2 + ci/requirements-3.5_OSX.build | 2 + ci/requirements-3.6.build | 2 + ...build => requirements-3.6_NUMPY_DEV.build} | 1 + ...sh => requirements-3.6_NUMPY_DEV.build.sh} | 0 ...DEV.run => requirements-3.6_NUMPY_DEV.run} | 0 ...ts-3.6-64.run => requirements-3.6_WIN.run} | 0 circle.yml | 8 ++-- 25 files changed, 74 insertions(+), 78 deletions(-) rename ci/{requirements-2.7-64.run => requirements-2.7_WIN.run} (100%) delete mode 100644 ci/requirements-3.4-64.run rename ci/{requirements-3.5_NUMPY_DEV.build => requirements-3.6_NUMPY_DEV.build} (70%) rename ci/{requirements-3.5_NUMPY_DEV.build.sh => requirements-3.6_NUMPY_DEV.build.sh} (100%) rename ci/{requirements-3.5_NUMPY_DEV.run => requirements-3.6_NUMPY_DEV.run} (100%) rename ci/{requirements-3.6-64.run => requirements-3.6_WIN.run} (100%) diff --git a/.travis.yml b/.travis.yml index bb3388734229e..d9dbdf96ff976 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,9 +1,9 @@ sudo: false language: python -# To turn off cached miniconda, cython files and compiler cache comment out the -# USE_CACHE=true line for the build in the matrix below. To delete caches go to -# https://travis-ci.org/OWNER/REPOSITORY/caches or run +# To turn off cached cython files and compiler cache +# set NOCACHE-true +# To delete caches go to https://travis-ci.org/OWNER/REPOSITORY/caches or run # travis cache --delete inside the project directory from the travis command line client # The cash directories will be deleted if anything in ci/ changes in a commit cache: @@ -33,31 +33,31 @@ matrix: - $HOME/.cache # cython cache - $HOME/.ccache # compiler cache env: - - PYTHON_VERSION=3.5 JOB_NAME="35_osx" TEST_ARGS="--skip-slow --skip-network" JOB_TAG="_OSX" TRAVIS_PYTHON_VERSION=3.5 USE_CACHE=true + - JOB="3.5_OSX" TEST_ARGS="--skip-slow --skip-network" TRAVIS_PYTHON_VERSION=3.5 - python: 2.7 env: - - PYTHON_VERSION=2.7 JOB_NAME="27_slow_nnet_LOCALE" TEST_ARGS="--only-slow --skip-network" LOCALE_OVERRIDE="zh_CN.UTF-8" JOB_TAG="_LOCALE" USE_CACHE=true + - JOB="2.7_LOCALE" TEST_ARGS="--only-slow --skip-network" LOCALE_OVERRIDE="zh_CN.UTF-8" addons: apt: packages: - language-pack-zh-hans - python: 2.7 env: - - PYTHON_VERSION=2.7 JOB_NAME="27_nslow" TEST_ARGS="--skip-slow" LINT=true USE_CACHE=true + - JOB="2.7" TEST_ARGS="--skip-slow" LINT=true addons: apt: packages: - python-gtk2 - python: 3.5 env: - - PYTHON_VERSION=3.5 JOB_NAME="35_nslow" TEST_ARGS="--skip-slow --skip-network" COVERAGE=true USE_CACHE=true + - JOB="3.5" TEST_ARGS="--skip-slow --skip-network" COVERAGE=true addons: apt: packages: - xsel - python: 3.6 env: - - PYTHON_VERSION=3.6 JOB_NAME="36" TEST_ARGS="--skip-slow --skip-network" PANDAS_TESTING_MODE="deprecate" CONDA_FORGE=true USE_CACHE=true + - JOB="3.6" TEST_ARGS="--skip-slow --skip-network" PANDAS_TESTING_MODE="deprecate" CONDA_FORGE=true addons: apt: packages: @@ -66,32 +66,32 @@ matrix: # In allow_failures - python: 2.7 env: - - PYTHON_VERSION=2.7 JOB_NAME="27_slow" JOB_TAG="_SLOW" TEST_ARGS="--only-slow --skip-network" USE_CACHE=true + - JOB="2.7_SLOW" TEST_ARGS="--only-slow --skip-network" # In allow_failures - python: 2.7 env: - - PYTHON_VERSION=2.7 JOB_NAME="27_build_test" JOB_TAG="_BUILD_TEST" TEST_ARGS="--skip-slow" BUILD_TEST=true USE_CACHE=true + - JOB="2.7_BUILD_TEST" TEST_ARGS="--skip-slow" BUILD_TEST=true # In allow_failures - - python: 3.5 + - python: 3.6 env: - - PYTHON_VERSION=3.5 JOB_NAME="35_numpy_dev" JOB_TAG="_NUMPY_DEV" TEST_ARGS="--skip-slow --skip-network" PANDAS_TESTING_MODE="deprecate" USE_CACHE=true + - JOB="3.6_NUMPY_DEV" TEST_ARGS="--skip-slow --skip-network" PANDAS_TESTING_MODE="deprecate" # In allow_failures - python: 3.5 env: - - PYTHON_VERSION=3.5 JOB_NAME="doc_build" DOC_BUILD=true JOB_TAG="_DOC_BUILD" USE_CACHE=true + - JOB="3.5_DOC_BUILD" DOC_BUILD=true allow_failures: - python: 2.7 env: - - PYTHON_VERSION=2.7 JOB_NAME="27_slow" JOB_TAG="_SLOW" TEST_ARGS="--only-slow --skip-network" USE_CACHE=true + - JOB="2.7_SLOW" TEST_ARGS="--only-slow --skip-network" - python: 2.7 env: - - PYTHON_VERSION=2.7 JOB_NAME="27_build_test" JOB_TAG="_BUILD_TEST" TEST_ARGS="--skip-slow" BUILD_TEST=true USE_CACHE=true - - python: 3.5 + - JOB="2.7_BUILD_TEST" TEST_ARGS="--skip-slow" BUILD_TEST=true + - python: 3.6 env: - - PYTHON_VERSION=3.5 JOB_NAME="35_numpy_dev" JOB_TAG="_NUMPY_DEV" TEST_ARGS="--skip-slow --skip-network" PANDAS_TESTING_MODE="deprecate" USE_CACHE=true + - JOB="3.6_NUMPY_DEV" TEST_ARGS="--skip-slow --skip-network" PANDAS_TESTING_MODE="deprecate" - python: 3.5 env: - - PYTHON_VERSION=3.5 JOB_NAME="doc_build" DOC_BUILD=true JOB_TAG="_DOC_BUILD" USE_CACHE=true + - JOB="3.5_DOC_BUILD" DOC_BUILD=true before_install: - echo "before_install" @@ -107,7 +107,6 @@ before_install: install: - echo "install start" - - ci/check_cache.sh - ci/prep_cython_cache.sh - ci/install_travis.sh - ci/submit_cython_cache.sh diff --git a/appveyor.yml b/appveyor.yml index 5d748ddf1a108..db729b3005be6 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -72,11 +72,11 @@ install: - cmd: conda info -a # create our env - - cmd: conda create -q -n pandas python=%PYTHON_VERSION% cython pytest + - cmd: conda create -n pandas python=%PYTHON_VERSION% cython pytest - cmd: activate pandas - - SET REQ=ci\requirements-%PYTHON_VERSION%-%PYTHON_ARCH%.run + - SET REQ=ci\requirements-%PYTHON_VERSION%_WIN.run - cmd: echo "installing requirements from %REQ%" - - cmd: conda install -n pandas -q --file=%REQ% + - cmd: conda install -n pandas --file=%REQ% - cmd: conda list -n pandas - cmd: echo "installing requirements from %REQ% - done" diff --git a/ci/check_cache.sh b/ci/check_cache.sh index 1c9de7b017569..b83144fc45ef4 100755 --- a/ci/check_cache.sh +++ b/ci/check_cache.sh @@ -1,5 +1,9 @@ #!/bin/bash +# currently not used +# script to make sure that cache is clean +# Travis CI now handles this + if [ "$TRAVIS_PULL_REQUEST" == "false" ] then echo "Not a PR: checking for changes in ci/ from last 2 commits" diff --git a/ci/install_circle.sh b/ci/install_circle.sh index 485586e9d4f49..00e14b10ebbd6 100755 --- a/ci/install_circle.sh +++ b/ci/install_circle.sh @@ -46,9 +46,9 @@ echo "[environmental variable file]" cat $ENVS_FILE source $ENVS_FILE -export REQ_BUILD=ci/requirements-${PYTHON_VERSION}${JOB_TAG}.build -export REQ_RUN=ci/requirements-${PYTHON_VERSION}${JOB_TAG}.run -export REQ_PIP=ci/requirements-${PYTHON_VERSION}${JOB_TAG}.pip +export REQ_BUILD=ci/requirements-${JOB}.build +export REQ_RUN=ci/requirements-${JOB}.run +export REQ_PIP=ci/requirements-${JOB}.pip # edit the locale override if needed if [ -n "$LOCALE_OVERRIDE" ]; then @@ -61,16 +61,13 @@ if [ -n "$LOCALE_OVERRIDE" ]; then echo fi -# create new env -echo "[create env]" -time conda create -q -n pandas python=${PYTHON_VERSION} pytest || exit 1 +# create envbuild deps +echo "[create env: ${REQ_BUILD}]" +time conda create -n pandas -q --file=${REQ_BUILD} || exit 1 +time conda install -n pandas pytest || exit 1 source activate pandas -# build deps -echo "[build installs: ${REQ_BUILD}]" -time conda install -q --file=${REQ_BUILD} || exit 1 - # build but don't install echo "[build em]" time python setup.py build_ext --inplace || exit 1 @@ -84,5 +81,5 @@ fi # we may have additional pip installs echo "[pip installs: ${REQ_PIP}]" if [ -e ${REQ_PIP} ]; then - pip install -q -r $REQ_PIP + pip install -r $REQ_PIP fi diff --git a/ci/install_travis.sh b/ci/install_travis.sh index 66633c0592748..ac7bb2c2f3764 100755 --- a/ci/install_travis.sh +++ b/ci/install_travis.sh @@ -68,7 +68,7 @@ conda info -a || exit 1 # set the compiler cache to work echo -if [ "$USE_CACHE" ] && [ "${TRAVIS_OS_NAME}" == "linux" ]; then +if [ -z "$NOCACHE" ] && [ "${TRAVIS_OS_NAME}" == "linux" ]; then echo "[Using ccache]" export PATH=/usr/lib/ccache:/usr/lib64/ccache:$PATH gcc=$(which gcc) @@ -76,7 +76,7 @@ if [ "$USE_CACHE" ] && [ "${TRAVIS_OS_NAME}" == "linux" ]; then ccache=$(which ccache) echo "[ccache]: $ccache" export CC='ccache gcc' -elif [ "$USE_CACHE" ] && [ "${TRAVIS_OS_NAME}" == "osx" ]; then +elif [ -z "$NOCACHE" ] && [ "${TRAVIS_OS_NAME}" == "osx" ]; then echo "[Using ccache]" time brew install ccache export PATH=/usr/local/opt/ccache/libexec:$PATH @@ -91,35 +91,22 @@ fi echo echo "[create env]" -# may have installation instructions for this build -INSTALL="ci/install-${PYTHON_VERSION}${JOB_TAG}.sh" -if [ -e ${INSTALL} ]; then - time bash $INSTALL || exit 1 -else - # create new env - # this may already exists, in which case our caching worked - time conda create -n pandas python=$PYTHON_VERSION pytest nomkl -fi +# create our environment +REQ="ci/requirements-${JOB}.build" +time conda create -n pandas --file=${REQ} || exit 1 -# build deps -echo -echo "[build installs]" -REQ="ci/requirements-${PYTHON_VERSION}${JOB_TAG}.build" -if [ -e ${REQ} ]; then - time conda install -n pandas --file=${REQ} || exit 1 -fi +source activate pandas # may have addtl installation instructions for this build echo echo "[build addtl installs]" -REQ="ci/requirements-${PYTHON_VERSION}${JOB_TAG}.build.sh" +REQ="ci/requirements-${JOB}.build.sh" if [ -e ${REQ} ]; then time bash $REQ || exit 1 fi -source activate pandas - -pip install pytest-xdist +time conda install -n pandas pytest +time pip install pytest-xdist if [ "$LINT" ]; then conda install flake8 @@ -152,7 +139,7 @@ fi # we may have run installations echo echo "[conda installs]" -REQ="ci/requirements-${PYTHON_VERSION}${JOB_TAG}.run" +REQ="ci/requirements-${JOB}.run" if [ -e ${REQ} ]; then time conda install -n pandas --file=${REQ} || exit 1 fi @@ -160,7 +147,7 @@ fi # we may have additional pip installs echo echo "[pip installs]" -REQ="ci/requirements-${PYTHON_VERSION}${JOB_TAG}.pip" +REQ="ci/requirements-${JOB}.pip" if [ -e ${REQ} ]; then pip install -r $REQ fi @@ -168,7 +155,7 @@ fi # may have addtl installation instructions for this build echo echo "[addtl installs]" -REQ="ci/requirements-${PYTHON_VERSION}${JOB_TAG}.sh" +REQ="ci/requirements-${JOB}.sh" if [ -e ${REQ} ]; then time bash $REQ || exit 1 fi diff --git a/ci/prep_cython_cache.sh b/ci/prep_cython_cache.sh index e091bb00ccedc..18d9388327ddc 100755 --- a/ci/prep_cython_cache.sh +++ b/ci/prep_cython_cache.sh @@ -22,7 +22,7 @@ fi home_dir=$(pwd) -if [ -f "$CACHE_File" ] && [ "$USE_CACHE" ] && [ -d "$PYX_CACHE_DIR" ]; then +if [ -f "$CACHE_File" ] && [ -z "$NOCACHE" ] && [ -d "$PYX_CACHE_DIR" ]; then echo "Cache available - checking pyx diff" @@ -57,16 +57,16 @@ if [ -f "$CACHE_File" ] && [ "$USE_CACHE" ] && [ -d "$PYX_CACHE_DIR" ]; then fi -if [ $clear_cache -eq 0 ] && [ "$USE_CACHE" ] +if [ $clear_cache -eq 0 ] && [ -z "$NOCACHE" ] then - # No and use_cache is set + # No and nocache is not set echo "Will reuse cached cython file" cd / tar xvmf $CACHE_File cd $home_dir else echo "Rebuilding cythonized files" - echo "Use cache (Blank if not set) = $USE_CACHE" + echo "No cache = $NOCACHE" echo "Clear cache (1=YES) = $clear_cache" fi diff --git a/ci/requirements-2.7.build b/ci/requirements-2.7.build index 836385671d603..415df13179fcf 100644 --- a/ci/requirements-2.7.build +++ b/ci/requirements-2.7.build @@ -1,4 +1,6 @@ +python=2.7* python-dateutil=2.4.1 pytz=2013b +nomkl numpy cython=0.23 diff --git a/ci/requirements-2.7_BUILD_TEST.build b/ci/requirements-2.7_BUILD_TEST.build index faf1e3559f7f1..aadec00cb7ebf 100644 --- a/ci/requirements-2.7_BUILD_TEST.build +++ b/ci/requirements-2.7_BUILD_TEST.build @@ -1,4 +1,6 @@ +python=2.7* dateutil pytz +nomkl numpy cython diff --git a/ci/requirements-2.7_COMPAT.build b/ci/requirements-2.7_COMPAT.build index 95e3da03f161b..0e1ccf9eac9bf 100644 --- a/ci/requirements-2.7_COMPAT.build +++ b/ci/requirements-2.7_COMPAT.build @@ -1,3 +1,4 @@ +python=2.7* numpy=1.7.1 cython=0.23 dateutil=1.5 diff --git a/ci/requirements-2.7_LOCALE.build b/ci/requirements-2.7_LOCALE.build index 28e2b96851eff..4a37ce8fbe161 100644 --- a/ci/requirements-2.7_LOCALE.build +++ b/ci/requirements-2.7_LOCALE.build @@ -1,3 +1,4 @@ +python=2.7* python-dateutil pytz=2013b numpy=1.8.2 diff --git a/ci/requirements-2.7_SLOW.build b/ci/requirements-2.7_SLOW.build index 664e8b418def7..0f4a2c6792e6b 100644 --- a/ci/requirements-2.7_SLOW.build +++ b/ci/requirements-2.7_SLOW.build @@ -1,3 +1,4 @@ +python=2.7* python-dateutil pytz numpy=1.8.2 diff --git a/ci/requirements-2.7-64.run b/ci/requirements-2.7_WIN.run similarity index 100% rename from ci/requirements-2.7-64.run rename to ci/requirements-2.7_WIN.run diff --git a/ci/requirements-3.4-64.run b/ci/requirements-3.4-64.run deleted file mode 100644 index 106cc5b7168ba..0000000000000 --- a/ci/requirements-3.4-64.run +++ /dev/null @@ -1,12 +0,0 @@ -python-dateutil -pytz -numpy=1.9* -openpyxl -xlsxwriter -xlrd -xlwt -scipy -numexpr -pytables -bottleneck -jinja2=2.8 diff --git a/ci/requirements-3.4.build b/ci/requirements-3.4.build index e6e59dcba63fe..e8a957f70d40e 100644 --- a/ci/requirements-3.4.build +++ b/ci/requirements-3.4.build @@ -1,3 +1,4 @@ +python=3.4* numpy=1.8.1 cython=0.24.1 libgfortran=1.0 diff --git a/ci/requirements-3.4_SLOW.build b/ci/requirements-3.4_SLOW.build index c05a68a14b402..88212053af472 100644 --- a/ci/requirements-3.4_SLOW.build +++ b/ci/requirements-3.4_SLOW.build @@ -1,4 +1,6 @@ +python=3.4* python-dateutil pytz +nomkl numpy=1.10* cython diff --git a/ci/requirements-3.5.build b/ci/requirements-3.5.build index 2fc2053e64fe9..76227e106e1fd 100644 --- a/ci/requirements-3.5.build +++ b/ci/requirements-3.5.build @@ -1,4 +1,6 @@ +python=3.5* python-dateutil pytz +nomkl numpy=1.11.3 cython diff --git a/ci/requirements-3.5_ASCII.build b/ci/requirements-3.5_ASCII.build index 9558cf00ddf5c..f7befe3b31865 100644 --- a/ci/requirements-3.5_ASCII.build +++ b/ci/requirements-3.5_ASCII.build @@ -1,4 +1,6 @@ +python=3.5* python-dateutil pytz +nomkl numpy cython diff --git a/ci/requirements-3.5_DOC_BUILD.build b/ci/requirements-3.5_DOC_BUILD.build index 9558cf00ddf5c..f7befe3b31865 100644 --- a/ci/requirements-3.5_DOC_BUILD.build +++ b/ci/requirements-3.5_DOC_BUILD.build @@ -1,4 +1,6 @@ +python=3.5* python-dateutil pytz +nomkl numpy cython diff --git a/ci/requirements-3.5_OSX.build b/ci/requirements-3.5_OSX.build index a201be352b8e4..f5bc01b67a20a 100644 --- a/ci/requirements-3.5_OSX.build +++ b/ci/requirements-3.5_OSX.build @@ -1,2 +1,4 @@ +python=3.5* +nomkl numpy=1.10.4 cython diff --git a/ci/requirements-3.6.build b/ci/requirements-3.6.build index 9558cf00ddf5c..1c4b46aea3865 100644 --- a/ci/requirements-3.6.build +++ b/ci/requirements-3.6.build @@ -1,4 +1,6 @@ +python=3.6* python-dateutil pytz +nomkl numpy cython diff --git a/ci/requirements-3.5_NUMPY_DEV.build b/ci/requirements-3.6_NUMPY_DEV.build similarity index 70% rename from ci/requirements-3.5_NUMPY_DEV.build rename to ci/requirements-3.6_NUMPY_DEV.build index d15edbfa3d2c1..738366867a217 100644 --- a/ci/requirements-3.5_NUMPY_DEV.build +++ b/ci/requirements-3.6_NUMPY_DEV.build @@ -1,3 +1,4 @@ +python=3.6* python-dateutil pytz cython diff --git a/ci/requirements-3.5_NUMPY_DEV.build.sh b/ci/requirements-3.6_NUMPY_DEV.build.sh similarity index 100% rename from ci/requirements-3.5_NUMPY_DEV.build.sh rename to ci/requirements-3.6_NUMPY_DEV.build.sh diff --git a/ci/requirements-3.5_NUMPY_DEV.run b/ci/requirements-3.6_NUMPY_DEV.run similarity index 100% rename from ci/requirements-3.5_NUMPY_DEV.run rename to ci/requirements-3.6_NUMPY_DEV.run diff --git a/ci/requirements-3.6-64.run b/ci/requirements-3.6_WIN.run similarity index 100% rename from ci/requirements-3.6-64.run rename to ci/requirements-3.6_WIN.run diff --git a/circle.yml b/circle.yml index 046af6e9e1389..fa2da0680f388 100644 --- a/circle.yml +++ b/circle.yml @@ -21,13 +21,13 @@ dependencies: - > case $CIRCLE_NODE_INDEX in 0) - sudo apt-get install language-pack-it && ./ci/install_circle.sh PYTHON_VERSION=2.7 JOB_TAG="_COMPAT" LOCALE_OVERRIDE="it_IT.UTF-8" ;; + sudo apt-get install language-pack-it && ./ci/install_circle.sh JOB="2.7_COMPAT" LOCALE_OVERRIDE="it_IT.UTF-8" ;; 1) - sudo apt-get install language-pack-zh-hans && ./ci/install_circle.sh PYTHON_VERSION=3.4 JOB_TAG="_SLOW" LOCALE_OVERRIDE="zh_CN.UTF-8" ;; + sudo apt-get install language-pack-zh-hans && ./ci/install_circle.sh JOB="3.4_SLOW" LOCALE_OVERRIDE="zh_CN.UTF-8" ;; 2) - sudo apt-get install language-pack-zh-hans && ./ci/install_circle.sh PYTHON_VERSION=3.4 JOB_TAG="" LOCALE_OVERRIDE="zh_CN.UTF-8" ;; + sudo apt-get install language-pack-zh-hans && ./ci/install_circle.sh JOB="3.4" LOCALE_OVERRIDE="zh_CN.UTF-8" ;; 3) - ./ci/install_circle.sh PYTHON_VERSION=3.5 JOB_TAG="_ASCII" LOCALE_OVERRIDE="C" ;; + ./ci/install_circle.sh JOB="3.5_ASCII" LOCALE_OVERRIDE="C" ;; esac - ./ci/show_circle.sh From c80bd19e7c866762c8cfdbb11d2608e4acd6c2f8 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sun, 26 Mar 2017 11:12:18 -0400 Subject: [PATCH 277/933] DOC: remove warnings for .sort / .order deprecation removals (#15808) --- doc/source/whatsnew/v0.13.1.txt | 2 +- doc/source/whatsnew/v0.15.0.txt | 2 +- doc/source/whatsnew/v0.20.0.txt | 5 +++++ doc/source/whatsnew/v0.7.3.txt | 2 +- doc/source/whatsnew/v0.9.1.txt | 15 +++++++++++---- 5 files changed, 19 insertions(+), 7 deletions(-) diff --git a/doc/source/whatsnew/v0.13.1.txt b/doc/source/whatsnew/v0.13.1.txt index d5d54ba43b622..5e5653945fefa 100644 --- a/doc/source/whatsnew/v0.13.1.txt +++ b/doc/source/whatsnew/v0.13.1.txt @@ -125,7 +125,7 @@ API changes df = DataFrame({'col':['foo', 0, np.nan]}) df2 = DataFrame({'col':[np.nan, 0, 'foo']}, index=[2,1,0]) df.equals(df2) - df.equals(df2.sort()) + df.equals(df2.sort_index()) import pandas.core.common as com com.array_equivalent(np.array([0, np.nan]), np.array([0, np.nan])) diff --git a/doc/source/whatsnew/v0.15.0.txt b/doc/source/whatsnew/v0.15.0.txt index aff8ec9092cdc..6282f15b6faeb 100644 --- a/doc/source/whatsnew/v0.15.0.txt +++ b/doc/source/whatsnew/v0.15.0.txt @@ -80,7 +80,7 @@ For full docs, see the :ref:`categorical introduction ` and the # Reorder the categories and simultaneously add the missing categories df["grade"] = df["grade"].cat.set_categories(["very bad", "bad", "medium", "good", "very good"]) df["grade"] - df.sort("grade") + df.sort_values("grade") df.groupby("grade").size() - ``pandas.core.group_agg`` and ``pandas.core.factor_agg`` were removed. As an alternative, construct diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index f96fc41c73f15..38109d5442751 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -35,6 +35,11 @@ New features The ``dtype`` keyword argument in the :func:`read_csv` function for specifying the types of parsed columns is now supported with the ``'python'`` engine (:issue:`14295`). See the :ref:`io docs ` for more information. +.. ipython:: python + :suppress: + + from pandas.compat import StringIO + .. ipython:: python data = "a,b\n1,2\n3,4" diff --git a/doc/source/whatsnew/v0.7.3.txt b/doc/source/whatsnew/v0.7.3.txt index 21aa16e5fcb06..6b5199c55cbf5 100644 --- a/doc/source/whatsnew/v0.7.3.txt +++ b/doc/source/whatsnew/v0.7.3.txt @@ -93,4 +93,4 @@ Series, to be more consistent with the ``groupby`` behavior with DataFrame: df grouped = df.groupby('A')['C'] grouped.describe() - grouped.apply(lambda x: x.order()[-2:]) # top 2 values + grouped.apply(lambda x: x.sort_values()[-2:]) # top 2 values diff --git a/doc/source/whatsnew/v0.9.1.txt b/doc/source/whatsnew/v0.9.1.txt index 9dd29a5fe7bf7..4faf38219ebee 100644 --- a/doc/source/whatsnew/v0.9.1.txt +++ b/doc/source/whatsnew/v0.9.1.txt @@ -20,13 +20,20 @@ New features - `Series.sort`, `DataFrame.sort`, and `DataFrame.sort_index` can now be specified in a per-column manner to support multiple sort orders (:issue:`928`) - .. ipython:: python - :okwarning: + .. code-block:: ipython - df = DataFrame(np.random.randint(0, 2, (6, 3)), columns=['A', 'B', 'C']) + In [2]: df = DataFrame(np.random.randint(0, 2, (6, 3)), columns=['A', 'B', 'C']) - df.sort(['A', 'B'], ascending=[1, 0]) + In [3]: df.sort(['A', 'B'], ascending=[1, 0]) + Out[3]: + A B C + 3 0 1 1 + 4 0 1 1 + 2 0 0 1 + 0 1 0 0 + 1 1 0 0 + 5 1 0 0 - `DataFrame.rank` now supports additional argument values for the `na_option` parameter so missing values can be assigned either the largest From 18ac0b7752b531daa105a73ef3c211f83bc5c8f7 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Sun, 26 Mar 2017 12:58:58 -0400 Subject: [PATCH 278/933] MAINT: Remove combineAdd and combineMult (#15805) Deprecated in 0.17.0. xref gh-10735 --- doc/source/10min.rst | 43 +++++++++--------- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/core/frame.py | 56 ----------------------- pandas/tests/frame/test_operators.py | 68 ---------------------------- 4 files changed, 22 insertions(+), 146 deletions(-) diff --git a/doc/source/10min.rst b/doc/source/10min.rst index 0612e86134cf2..8482eef552c17 100644 --- a/doc/source/10min.rst +++ b/doc/source/10min.rst @@ -84,29 +84,28 @@ will be completed: @verbatim In [1]: df2. - df2.A df2.boxplot - df2.abs df2.C - df2.add df2.clip - df2.add_prefix df2.clip_lower - df2.add_suffix df2.clip_upper - df2.align df2.columns - df2.all df2.combine - df2.any df2.combineAdd + df2.A df2.bool + df2.abs df2.boxplot + df2.add df2.C + df2.add_prefix df2.clip + df2.add_suffix df2.clip_lower + df2.align df2.clip_upper + df2.all df2.columns + df2.any df2.combine df2.append df2.combine_first - df2.apply df2.combineMult - df2.applymap df2.compound - df2.as_blocks df2.consolidate - df2.asfreq df2.convert_objects - df2.as_matrix df2.copy - df2.astype df2.corr - df2.at df2.corrwith - df2.at_time df2.count - df2.axes df2.cov - df2.B df2.cummax - df2.between_time df2.cummin - df2.bfill df2.cumprod - df2.blocks df2.cumsum - df2.bool df2.D + df2.apply df2.compound + df2.applymap df2.consolidate + df2.as_blocks df2.convert_objects + df2.asfreq df2.copy + df2.as_matrix df2.corr + df2.astype df2.corrwith + df2.at df2.count + df2.at_time df2.cov + df2.axes df2.cummax + df2.B df2.cummin + df2.between_time df2.cumprod + df2.bfill df2.cumsum + df2.blocks df2.D As you can see, the columns ``A``, ``B``, ``C``, and ``D`` are automatically tab completed. ``E`` is there as well; the rest of the attributes have been diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 38109d5442751..358d66653fb9c 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -819,6 +819,7 @@ Removal of prior version deprecations/changes - The ``take_last`` parameter has been dropped from ``duplicated()``, ``drop_duplicates()``, ``nlargest()``, and ``nsmallest()`` methods (:issue:`10236`, :issue:`10792`, :issue:`10920`) - ``Series``, ``Index``, and ``DataFrame`` have dropped the ``sort`` and ``order`` methods (:issue:`10726`) - Where clauses in ``pytables`` are only accepted as strings and expressions types and not other data-types (:issue:`12027`) +- ``DataFrame`` has dropped the ``combineAdd`` and ``combineMult`` methods in favor of ``add`` and ``mul`` respectively (:issue:`10735`) .. _whatsnew_0200.performance: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 6b5e8e0799421..90c49a9c85133 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5362,62 +5362,6 @@ def isin(self, values): values).reshape(self.shape), self.index, self.columns) - # ---------------------------------------------------------------------- - # Deprecated stuff - - def combineAdd(self, other): - """ - DEPRECATED. Use ``DataFrame.add(other, fill_value=0.)`` instead. - - Add two DataFrame objects and do not propagate - NaN values, so if for a (column, time) one frame is missing a - value, it will default to the other frame's value (which might - be NaN as well) - - Parameters - ---------- - other : DataFrame - - Returns - ------- - DataFrame - - See also - -------- - DataFrame.add - - """ - warnings.warn("'combineAdd' is deprecated. Use " - "'DataFrame.add(other, fill_value=0.)' instead", - FutureWarning, stacklevel=2) - return self.add(other, fill_value=0.) - - def combineMult(self, other): - """ - DEPRECATED. Use ``DataFrame.mul(other, fill_value=1.)`` instead. - - Multiply two DataFrame objects and do not propagate NaN values, so if - for a (column, time) one frame is missing a value, it will default to - the other frame's value (which might be NaN as well) - - Parameters - ---------- - other : DataFrame - - Returns - ------- - DataFrame - - See also - -------- - DataFrame.mul - - """ - warnings.warn("'combineMult' is deprecated. Use " - "'DataFrame.mul(other, fill_value=1.)' instead", - FutureWarning, stacklevel=2) - return self.mul(other, fill_value=1.) - DataFrame._setup_axes(['index', 'columns'], info_axis=1, stat_axis=0, axes_are_reversed=True, aliases={'rows': 0}) diff --git a/pandas/tests/frame/test_operators.py b/pandas/tests/frame/test_operators.py index d6a3592446fd5..268854fe6b62d 100644 --- a/pandas/tests/frame/test_operators.py +++ b/pandas/tests/frame/test_operators.py @@ -1038,74 +1038,6 @@ def test_boolean_comparison(self): self.assertRaises(ValueError, lambda: df == (2, 2)) self.assertRaises(ValueError, lambda: df == [2, 2]) - def test_combineAdd(self): - - with tm.assert_produces_warning(FutureWarning): - # trivial - comb = self.frame.combineAdd(self.frame) - assert_frame_equal(comb, self.frame * 2) - - # more rigorous - a = DataFrame([[1., nan, nan, 2., nan]], - columns=np.arange(5)) - b = DataFrame([[2., 3., nan, 2., 6., nan]], - columns=np.arange(6)) - expected = DataFrame([[3., 3., nan, 4., 6., nan]], - columns=np.arange(6)) - - with tm.assert_produces_warning(FutureWarning): - result = a.combineAdd(b) - assert_frame_equal(result, expected) - - with tm.assert_produces_warning(FutureWarning): - result2 = a.T.combineAdd(b.T) - assert_frame_equal(result2, expected.T) - - expected2 = a.combine(b, operator.add, fill_value=0.) - assert_frame_equal(expected, expected2) - - # corner cases - with tm.assert_produces_warning(FutureWarning): - comb = self.frame.combineAdd(self.empty) - assert_frame_equal(comb, self.frame) - - with tm.assert_produces_warning(FutureWarning): - comb = self.empty.combineAdd(self.frame) - assert_frame_equal(comb, self.frame) - - # integer corner case - df1 = DataFrame({'x': [5]}) - df2 = DataFrame({'x': [1]}) - df3 = DataFrame({'x': [6]}) - - with tm.assert_produces_warning(FutureWarning): - comb = df1.combineAdd(df2) - assert_frame_equal(comb, df3) - - # mixed type GH2191 - df1 = DataFrame({'A': [1, 2], 'B': [3, 4]}) - df2 = DataFrame({'A': [1, 2], 'C': [5, 6]}) - with tm.assert_produces_warning(FutureWarning): - rs = df1.combineAdd(df2) - xp = DataFrame({'A': [2, 4], 'B': [3, 4.], 'C': [5, 6.]}) - assert_frame_equal(xp, rs) - - # TODO: test integer fill corner? - - def test_combineMult(self): - with tm.assert_produces_warning(FutureWarning): - # trivial - comb = self.frame.combineMult(self.frame) - - assert_frame_equal(comb, self.frame ** 2) - - # corner cases - comb = self.frame.combineMult(self.empty) - assert_frame_equal(comb, self.frame) - - comb = self.empty.combineMult(self.frame) - assert_frame_equal(comb, self.frame) - def test_combine_generic(self): df1 = self.frame df2 = self.frame.loc[self.frame.index[:-5], ['A', 'B', 'C']] From 0caf685d1d419782b513efdab399c18dadc3a9da Mon Sep 17 00:00:00 2001 From: gfyoung Date: Sun, 26 Mar 2017 13:02:45 -0400 Subject: [PATCH 279/933] DOC: Explain differences further for sep parameter (#15804) [ci skip] --- doc/source/io.rst | 11 ++++++----- pandas/io/parsers.py | 10 ++++++---- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/doc/source/io.rst b/doc/source/io.rst index a702efdc6aaf9..faeea9d448cf2 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -91,11 +91,12 @@ filepath_or_buffer : various locations), or any object with a ``read()`` method (such as an open file or :class:`~python:io.StringIO`). sep : str, defaults to ``','`` for :func:`read_csv`, ``\t`` for :func:`read_table` - Delimiter to use. If sep is ``None``, - will try to automatically determine this. Separators longer than 1 character - and different from ``'\s+'`` will be interpreted as regular expressions, will - force use of the python parsing engine and will ignore quotes in the data. - Regex example: ``'\\r\\t'``. + Delimiter to use. If sep is ``None``, the C engine cannot automatically detect + the separator, but the Python parsing engine can, meaning the latter will be + used automatically. In addition, separators longer than 1 character and + different from ``'\s+'`` will be interpreted as regular expressions and + will also force the use of the Python parsing engine. Note that regex + delimiters are prone to ignoring quoted data. Regex example: ``'\\r\\t'``. delimiter : str, default ``None`` Alternative argument name for sep. delim_whitespace : boolean, default False diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index f7b2d75c19304..45c62b224ef4e 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -305,10 +305,12 @@ currently more feature-complete.""" _sep_doc = r"""sep : str, default {default} - Delimiter to use. If sep is None, will try to automatically determine - this. Separators longer than 1 character and different from ``'\s+'`` will - be interpreted as regular expressions, will force use of the python parsing - engine and will ignore quotes in the data. Regex example: ``'\r\t'``""" + Delimiter to use. If sep is None, the C engine cannot automatically detect + the separator, but the Python parsing engine can, meaning the latter will + be used automatically. In addition, separators longer than 1 character and + different from ``'\s+'`` will be interpreted as regular expressions and + will also force the use of the Python parsing engine. Note that regex + delimiters are prone to ignoring quoted data. Regex example: ``'\r\t'``""" _read_csv_doc = """ Read CSV (comma-separated) file into DataFrame From 179363765110611ad10883bab55d79785369da9b Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sun, 26 Mar 2017 14:26:49 -0400 Subject: [PATCH 280/933] TST: suppress some numpy warnings (#15811) * BUG: incorrect conversion on isin algos with m8 * TST: suppress some warnings --- pandas/core/algorithms.py | 5 ++++- pandas/tests/test_algos.py | 2 ++ pandas/tests/test_categorical.py | 6 ++++-- pandas/tests/test_nanops.py | 19 ++++++++++++------- 4 files changed, 22 insertions(+), 10 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 3b77bda6f69f0..a62d290277443 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -174,7 +174,7 @@ def isin(comps, values): " to isin(), you passed a " "[{0}]".format(type(values).__name__)) - from pandas import DatetimeIndex, PeriodIndex + from pandas import DatetimeIndex, TimedeltaIndex, PeriodIndex if not isinstance(values, (ABCIndex, ABCSeries, np.ndarray)): values = np.array(list(values), dtype='object') @@ -183,6 +183,9 @@ def isin(comps, values): if is_period_dtype(values): comps = PeriodIndex(comps) values = PeriodIndex(values) + elif is_timedelta64_dtype(comps): + comps = TimedeltaIndex(comps) + values = TimedeltaIndex(values) else: comps = DatetimeIndex(comps) values = DatetimeIndex(values) diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index f8eac7a8911ad..5d69746034346 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -431,6 +431,8 @@ def test_basic(self): expected = np.array([False, False]) tm.assert_numpy_array_equal(result, expected) + def test_i8(self): + arr = pd.date_range('20130101', periods=3).values result = algos.isin(arr, [arr[0]]) expected = np.array([True, False, False]) diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py index 6c8aeba704c7b..479f0e4566b8d 100644 --- a/pandas/tests/test_categorical.py +++ b/pandas/tests/test_categorical.py @@ -2913,10 +2913,12 @@ def test_info(self): df['category'] = Series(np.array(list('abcdefghij')).take( np.random.randint(0, 10, size=n))).astype('category') df.isnull() - df.info() + buf = compat.StringIO() + df.info(buf=buf) df2 = df[df['category'] == 'd'] - df2.info() + buf = compat.StringIO() + df2.info(buf=buf) def test_groupby_sort(self): diff --git a/pandas/tests/test_nanops.py b/pandas/tests/test_nanops.py index 75a7555d58ca5..54de8c1e34031 100644 --- a/pandas/tests/test_nanops.py +++ b/pandas/tests/test_nanops.py @@ -389,9 +389,10 @@ def test_nanstd(self): def test_nansem(self): tm.skip_if_no_package('scipy', min_version='0.17.0') from scipy.stats import sem - self.check_funs_ddof(nanops.nansem, sem, allow_complex=False, - allow_str=False, allow_date=False, - allow_tdelta=True, allow_obj='convert') + with np.errstate(invalid='ignore'): + self.check_funs_ddof(nanops.nansem, sem, allow_complex=False, + allow_str=False, allow_date=False, + allow_tdelta=False, allow_obj='convert') def _minmax_wrap(self, value, axis=None, func=None): res = func(value, axis) @@ -449,16 +450,20 @@ def test_nanskew(self): tm.skip_if_no_package('scipy', min_version='0.17.0') from scipy.stats import skew func = partial(self._skew_kurt_wrap, func=skew) - self.check_funs(nanops.nanskew, func, allow_complex=False, - allow_str=False, allow_date=False, allow_tdelta=False) + with np.errstate(invalid='ignore'): + self.check_funs(nanops.nanskew, func, allow_complex=False, + allow_str=False, allow_date=False, + allow_tdelta=False) def test_nankurt(self): tm.skip_if_no_package('scipy', min_version='0.17.0') from scipy.stats import kurtosis func1 = partial(kurtosis, fisher=True) func = partial(self._skew_kurt_wrap, func=func1) - self.check_funs(nanops.nankurt, func, allow_complex=False, - allow_str=False, allow_date=False, allow_tdelta=False) + with np.errstate(invalid='ignore'): + self.check_funs(nanops.nankurt, func, allow_complex=False, + allow_str=False, allow_date=False, + allow_tdelta=False) def test_nanprod(self): self.check_funs(nanops.nanprod, np.prod, allow_str=False, From da92411485d7fbe766d12e5a78910ff7aaa45c12 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sun, 26 Mar 2017 14:53:16 -0400 Subject: [PATCH 281/933] DOC: remove as_indexer from text.rst example --- doc/source/text.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/source/text.rst b/doc/source/text.rst index b110ef2167a03..4992f132ce815 100644 --- a/doc/source/text.rst +++ b/doc/source/text.rst @@ -146,8 +146,8 @@ following code will cause trouble because of the regular expression meaning of # We need to escape the special character (for >1 len patterns) dollars.str.replace(r'-\$', '-') -The ``replace`` method can also take a callable as replacement. It is called -on every ``pat`` using :func:`re.sub`. The callable should expect one +The ``replace`` method can also take a callable as replacement. It is called +on every ``pat`` using :func:`re.sub`. The callable should expect one positional argument (a regex object) and return a string. .. versionadded:: 0.20.0 @@ -380,7 +380,7 @@ or match a pattern: .. ipython:: python - pd.Series(['1', '2', '3a', '3b', '03c']).str.match(pattern, as_indexer=True) + pd.Series(['1', '2', '3a', '3b', '03c']).str.match(pattern) The distinction between ``match`` and ``contains`` is strictness: ``match`` relies on strict ``re.match``, while ``contains`` relies on ``re.search``. From 7e3dd90d0e4744c29da08cb158a10c37b6610ef0 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Mon, 27 Mar 2017 07:56:56 -0400 Subject: [PATCH 282/933] DOC: small fixes in text.rst --- doc/source/text.rst | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/doc/source/text.rst b/doc/source/text.rst index 4992f132ce815..e3e4b24d17f44 100644 --- a/doc/source/text.rst +++ b/doc/source/text.rst @@ -372,12 +372,11 @@ You can check whether elements contain a pattern: .. ipython:: python - pattern = r'[a-z][0-9]' + pattern = r'[0-9][a-z]' pd.Series(['1', '2', '3a', '3b', '03c']).str.contains(pattern) or match a pattern: - .. ipython:: python pd.Series(['1', '2', '3a', '3b', '03c']).str.match(pattern) @@ -386,7 +385,7 @@ The distinction between ``match`` and ``contains`` is strictness: ``match`` relies on strict ``re.match``, while ``contains`` relies on ``re.search``. Methods like ``match``, ``contains``, ``startswith``, and ``endswith`` take - an extra ``na`` argument so missing values can be considered True or False: +an extra ``na`` argument so missing values can be considered True or False: .. ipython:: python From aff78d91f5895f5645cf254f03a3538829e73687 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sun, 26 Mar 2017 15:21:05 -0400 Subject: [PATCH 283/933] CI: remove more cruft --- .travis.yml | 6 ------ ci/install_travis.sh | 3 ++- 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/.travis.yml b/.travis.yml index d9dbdf96ff976..777280e3c4a25 100644 --- a/.travis.yml +++ b/.travis.yml @@ -26,7 +26,6 @@ matrix: include: - language: objective-c os: osx - compiler: clang cache: ccache: true directories: @@ -58,11 +57,6 @@ matrix: - python: 3.6 env: - JOB="3.6" TEST_ARGS="--skip-slow --skip-network" PANDAS_TESTING_MODE="deprecate" CONDA_FORGE=true - addons: - apt: - packages: - - libatlas-base-dev - - gfortran # In allow_failures - python: 2.7 env: diff --git a/ci/install_travis.sh b/ci/install_travis.sh index ac7bb2c2f3764..f71df979c9df0 100755 --- a/ci/install_travis.sh +++ b/ci/install_travis.sh @@ -77,8 +77,9 @@ if [ -z "$NOCACHE" ] && [ "${TRAVIS_OS_NAME}" == "linux" ]; then echo "[ccache]: $ccache" export CC='ccache gcc' elif [ -z "$NOCACHE" ] && [ "${TRAVIS_OS_NAME}" == "osx" ]; then + echo "[Install ccache]" + brew install ccache > /dev/null 2>&1 echo "[Using ccache]" - time brew install ccache export PATH=/usr/local/opt/ccache/libexec:$PATH gcc=$(which gcc) echo "[gcc]: $gcc" From 056c0a666f0cb83ee15e793376361b916e7b364c Mon Sep 17 00:00:00 2001 From: Wes Turner Date: Mon, 27 Mar 2017 09:06:53 -0400 Subject: [PATCH 284/933] DOC: ecosystem.rst: QtPandas xref draperjames/qtpandas#36 Author: Wes Turner Closes #15813 from westurner/patch-5 and squashes the following commits: a97084e [Wes Turner] DOC: ecosystem.rst: QtPandas --- doc/source/ecosystem.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/ecosystem.rst b/doc/source/ecosystem.rst index 5a7d6a11d293d..ee0ea60c6f220 100644 --- a/doc/source/ecosystem.rst +++ b/doc/source/ecosystem.rst @@ -93,8 +93,8 @@ targets the IPython Notebook environment. `Plotly’s `__ `Python API `__ enables interactive figures and web shareability. Maps, 2D, 3D, and live-streaming graphs are rendered with WebGL and `D3.js `__. The library supports plotting directly from a pandas DataFrame and cloud-based collaboration. Users of `matplotlib, ggplot for Python, and Seaborn `__ can convert figures into interactive web-based plots. Plots can be drawn in `IPython Notebooks `__ , edited with R or MATLAB, modified in a GUI, or embedded in apps and dashboards. Plotly is free for unlimited sharing, and has `cloud `__, `offline `__, or `on-premise `__ accounts for private use. -Visualizing Data in Qt applications -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +`QtPandas `__ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Spun off from the main pandas library, the `qtpandas `__ library enables DataFrame visualization and manipulation in PyQt4 and PySide applications. From 80280ec576ab8077ba0cc6664c6a358f0b1e671e Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Mon, 27 Mar 2017 10:45:17 -0400 Subject: [PATCH 285/933] DEPR: Drop support for NaN categories in Categorical Deprecated in 0.17.0. xref #10748 xref #13648 Author: Jeff Reback Author: gfyoung Closes #15806 from gfyoung/categories-nan-drop and squashes the following commits: 318175b [Jeff Reback] TST: test pd.NaT with correct dtype 4dce349 [gfyoung] Drop support for NaN categories in Categorical --- doc/source/categorical.rst | 9 ++ doc/source/whatsnew/v0.20.0.txt | 1 + pandas/core/categorical.py | 13 +- pandas/tests/indexes/test_category.py | 14 -- pandas/tests/test_categorical.py | 207 ++++---------------------- 5 files changed, 41 insertions(+), 203 deletions(-) diff --git a/doc/source/categorical.rst b/doc/source/categorical.rst index 2203737ecd7b5..411f973e9a71f 100644 --- a/doc/source/categorical.rst +++ b/doc/source/categorical.rst @@ -230,6 +230,15 @@ Categories must be unique or a `ValueError` is raised: except ValueError as e: print("ValueError: " + str(e)) +Categories must also not be ``NaN`` or a `ValueError` is raised: + +.. ipython:: python + + try: + s.cat.categories = [1,2,np.nan] + except ValueError as e: + print("ValueError: " + str(e)) + Appending new categories ~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 358d66653fb9c..a0b2b47c4bac3 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -816,6 +816,7 @@ Removal of prior version deprecations/changes in favor of ``iloc`` and ``iat`` as explained :ref:`here ` (:issue:`10711`). - The deprecated ``DataFrame.iterkv()`` has been removed in favor of ``DataFrame.iteritems()`` (:issue:`10711`) - The ``Categorical`` constructor has dropped the ``name`` parameter (:issue:`10632`) +- ``Categorical`` has dropped support for ``NaN`` categories (:issue:`10748`) - The ``take_last`` parameter has been dropped from ``duplicated()``, ``drop_duplicates()``, ``nlargest()``, and ``nsmallest()`` methods (:issue:`10236`, :issue:`10792`, :issue:`10920`) - ``Series``, ``Index``, and ``DataFrame`` have dropped the ``sort`` and ``order`` methods (:issue:`10726`) - Where clauses in ``pytables`` are only accepted as strings and expressions types and not other data-types (:issue:`12027`) diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index 0e58c18631588..632c24c33feb7 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -545,18 +545,11 @@ def _validate_categories(cls, categories, fastpath=False): if not fastpath: - # check properties of the categories - # we don't allow NaNs in the categories themselves - + # Categories cannot contain NaN. if categories.hasnans: - # NaNs in cats deprecated in 0.17 - # GH 10748 - msg = ('\nSetting NaNs in `categories` is deprecated and ' - 'will be removed in a future version of pandas.') - warn(msg, FutureWarning, stacklevel=3) - - # categories must be unique + raise ValueError('Categorial categories cannot be null') + # Categories must be unique. if not categories.is_unique: raise ValueError('Categorical categories must be unique') diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py index 64a0e71bd5ace..ef1be7e60e0e8 100644 --- a/pandas/tests/indexes/test_category.py +++ b/pandas/tests/indexes/test_category.py @@ -183,11 +183,6 @@ def test_contains(self): self.assertFalse(0 in ci) self.assertFalse(1 in ci) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - ci = CategoricalIndex( - list('aabbca'), categories=list('cabdef') + [np.nan]) - self.assertFalse(np.nan in ci) - ci = CategoricalIndex( list('aabbca') + [np.nan], categories=list('cabdef')) self.assertTrue(np.nan in ci) @@ -541,7 +536,6 @@ def test_ensure_copied_data(self): self.assertIs(_base(index.values), _base(result.values)) def test_equals_categorical(self): - ci1 = CategoricalIndex(['a', 'b'], categories=['a', 'b'], ordered=True) ci2 = CategoricalIndex(['a', 'b'], categories=['a', 'b', 'c'], ordered=True) @@ -579,14 +573,6 @@ def test_equals_categorical(self): self.assertFalse(ci.equals(CategoricalIndex(list('aabca')))) self.assertTrue(ci.equals(ci.copy())) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - ci = CategoricalIndex(list('aabca'), - categories=['c', 'a', 'b', np.nan]) - self.assertFalse(ci.equals(list('aabca'))) - self.assertFalse(ci.equals(CategoricalIndex(list('aabca')))) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - self.assertTrue(ci.equals(ci.copy())) - ci = CategoricalIndex(list('aabca') + [np.nan], categories=['c', 'a', 'b']) self.assertFalse(ci.equals(list('aabca'))) diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py index 479f0e4566b8d..8fd3c6324d48c 100644 --- a/pandas/tests/test_categorical.py +++ b/pandas/tests/test_categorical.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- # pylint: disable=E1101,E1103,W0232 +import pytest import sys from datetime import datetime from distutils.version import LooseVersion @@ -17,7 +18,8 @@ import pandas.compat as compat import pandas.util.testing as tm from pandas import (Categorical, Index, Series, DataFrame, PeriodIndex, - Timestamp, CategoricalIndex, isnull) + Timestamp, CategoricalIndex, DatetimeIndex, + isnull, NaT) from pandas.compat import range, lrange, u, PY3 from pandas.core.config import option_context @@ -160,12 +162,6 @@ def f(): self.assertRaises(ValueError, f) - def f(): - with tm.assert_produces_warning(FutureWarning): - Categorical([1, 2], [1, 2, np.nan, np.nan]) - - self.assertRaises(ValueError, f) - # The default should be unordered c1 = Categorical(["a", "b", "c", "a"]) self.assertFalse(c1.ordered) @@ -222,29 +218,12 @@ def f(): cat = pd.Categorical([np.nan, 1., 2., 3.]) self.assertTrue(is_float_dtype(cat.categories)) - # Deprecating NaNs in categoires (GH #10748) - # preserve int as far as possible by converting to object if NaN is in - # categories - with tm.assert_produces_warning(FutureWarning): - cat = pd.Categorical([np.nan, 1, 2, 3], - categories=[np.nan, 1, 2, 3]) - self.assertTrue(is_object_dtype(cat.categories)) - # This doesn't work -> this would probably need some kind of "remember # the original type" feature to try to cast the array interface result # to... # vals = np.asarray(cat[cat.notnull()]) # self.assertTrue(is_integer_dtype(vals)) - with tm.assert_produces_warning(FutureWarning): - cat = pd.Categorical([np.nan, "a", "b", "c"], - categories=[np.nan, "a", "b", "c"]) - self.assertTrue(is_object_dtype(cat.categories)) - # but don't do it for floats - with tm.assert_produces_warning(FutureWarning): - cat = pd.Categorical([np.nan, 1., 2., 3.], - categories=[np.nan, 1., 2., 3.]) - self.assertTrue(is_float_dtype(cat.categories)) # corner cases cat = pd.Categorical([1]) @@ -295,6 +274,22 @@ def f(): c = Categorical(np.array([], dtype='int64'), # noqa categories=[3, 2, 1], ordered=True) + def test_constructor_with_null(self): + + # Cannot have NaN in categories + with pytest.raises(ValueError): + pd.Categorical([np.nan, "a", "b", "c"], + categories=[np.nan, "a", "b", "c"]) + + with pytest.raises(ValueError): + pd.Categorical([None, "a", "b", "c"], + categories=[None, "a", "b", "c"]) + + with pytest.raises(ValueError): + pd.Categorical(DatetimeIndex(['nat', '20160101']), + categories=[NaT, Timestamp('20160101')]) + + def test_constructor_with_index(self): ci = CategoricalIndex(list('aabbca'), categories=list('cab')) tm.assert_categorical_equal(ci.values, Categorical(ci)) @@ -418,6 +413,12 @@ def f(): self.assertRaises(ValueError, f) + # NaN categories included + def f(): + Categorical.from_codes([0, 1, 2], ["a", "b", np.nan]) + + self.assertRaises(ValueError, f) + # too negative def f(): Categorical.from_codes([-2, 1, 2], ["a", "b", "c"]) @@ -649,30 +650,6 @@ def test_describe(self): name='categories')) tm.assert_frame_equal(desc, expected) - # NA as a category - with tm.assert_produces_warning(FutureWarning): - cat = pd.Categorical(["a", "c", "c", np.nan], - categories=["b", "a", "c", np.nan]) - result = cat.describe() - - expected = DataFrame([[0, 0], [1, 0.25], [2, 0.5], [1, 0.25]], - columns=['counts', 'freqs'], - index=pd.CategoricalIndex(['b', 'a', 'c', np.nan], - name='categories')) - tm.assert_frame_equal(result, expected, check_categorical=False) - - # NA as an unused category - with tm.assert_produces_warning(FutureWarning): - cat = pd.Categorical(["a", "c", "c"], - categories=["b", "a", "c", np.nan]) - result = cat.describe() - - exp_idx = pd.CategoricalIndex( - ['b', 'a', 'c', np.nan], name='categories') - expected = DataFrame([[0, 0], [1, 1 / 3.], [2, 2 / 3.], [0, 0]], - columns=['counts', 'freqs'], index=exp_idx) - tm.assert_frame_equal(result, expected, check_categorical=False) - def test_print(self): expected = ["[a, b, b, a, a, c, c, c]", "Categories (3, object): [a < b < c]"] @@ -1119,90 +1096,18 @@ def test_nan_handling(self): self.assert_numpy_array_equal(c._codes, np.array([0, -1, -1, 0], dtype=np.int8)) - # If categories have nan included, the code should point to that - # instead - with tm.assert_produces_warning(FutureWarning): - c = Categorical(["a", "b", np.nan, "a"], - categories=["a", "b", np.nan]) - self.assert_index_equal(c.categories, Index(["a", "b", np.nan])) - self.assert_numpy_array_equal(c._codes, - np.array([0, 1, 2, 0], dtype=np.int8)) - c[1] = np.nan - self.assert_index_equal(c.categories, Index(["a", "b", np.nan])) - self.assert_numpy_array_equal(c._codes, - np.array([0, 2, 2, 0], dtype=np.int8)) - - # Changing categories should also make the replaced category np.nan - c = Categorical(["a", "b", "c", "a"]) - with tm.assert_produces_warning(FutureWarning): - c.categories = ["a", "b", np.nan] # noqa - - self.assert_index_equal(c.categories, Index(["a", "b", np.nan])) - self.assert_numpy_array_equal(c._codes, - np.array([0, 1, 2, 0], dtype=np.int8)) - # Adding nan to categories should make assigned nan point to the # category! c = Categorical(["a", "b", np.nan, "a"]) self.assert_index_equal(c.categories, Index(["a", "b"])) self.assert_numpy_array_equal(c._codes, np.array([0, 1, -1, 0], dtype=np.int8)) - with tm.assert_produces_warning(FutureWarning): - c.set_categories(["a", "b", np.nan], rename=True, inplace=True) - - self.assert_index_equal(c.categories, Index(["a", "b", np.nan])) - self.assert_numpy_array_equal(c._codes, - np.array([0, 1, -1, 0], dtype=np.int8)) - c[1] = np.nan - self.assert_index_equal(c.categories, Index(["a", "b", np.nan])) - self.assert_numpy_array_equal(c._codes, - np.array([0, 2, -1, 0], dtype=np.int8)) - - # Remove null categories (GH 10156) - cases = [([1.0, 2.0, np.nan], [1.0, 2.0]), - (['a', 'b', None], ['a', 'b']), - ([pd.Timestamp('2012-05-01'), pd.NaT], - [pd.Timestamp('2012-05-01')])] - - null_values = [np.nan, None, pd.NaT] - - for with_null, without in cases: - with tm.assert_produces_warning(FutureWarning): - base = Categorical([], with_null) - expected = Categorical([], without) - - for nullval in null_values: - result = base.remove_categories(nullval) - self.assert_categorical_equal(result, expected) - - # Different null values are indistinguishable - for i, j in [(0, 1), (0, 2), (1, 2)]: - nulls = [null_values[i], null_values[j]] - - def f(): - with tm.assert_produces_warning(FutureWarning): - Categorical([], categories=nulls) - - self.assertRaises(ValueError, f) def test_isnull(self): exp = np.array([False, False, True]) c = Categorical(["a", "b", np.nan]) res = c.isnull() - self.assert_numpy_array_equal(res, exp) - with tm.assert_produces_warning(FutureWarning): - c = Categorical(["a", "b", np.nan], categories=["a", "b", np.nan]) - res = c.isnull() - self.assert_numpy_array_equal(res, exp) - - # test both nan in categories and as -1 - exp = np.array([True, False, True]) - c = Categorical(["a", "b", np.nan]) - with tm.assert_produces_warning(FutureWarning): - c.set_categories(["a", "b", np.nan], rename=True, inplace=True) - c[0] = np.nan - res = c.isnull() self.assert_numpy_array_equal(res, exp) def test_codes_immutable(self): @@ -1487,45 +1392,10 @@ def test_slicing_directly(self): def test_set_item_nan(self): cat = pd.Categorical([1, 2, 3]) - exp = pd.Categorical([1, np.nan, 3], categories=[1, 2, 3]) cat[1] = np.nan - tm.assert_categorical_equal(cat, exp) - # if nan in categories, the proper code should be set! - cat = pd.Categorical([1, 2, 3, np.nan], categories=[1, 2, 3]) - with tm.assert_produces_warning(FutureWarning): - cat.set_categories([1, 2, 3, np.nan], rename=True, inplace=True) - cat[1] = np.nan - exp = np.array([0, 3, 2, -1], dtype=np.int8) - self.assert_numpy_array_equal(cat.codes, exp) - - cat = pd.Categorical([1, 2, 3, np.nan], categories=[1, 2, 3]) - with tm.assert_produces_warning(FutureWarning): - cat.set_categories([1, 2, 3, np.nan], rename=True, inplace=True) - cat[1:3] = np.nan - exp = np.array([0, 3, 3, -1], dtype=np.int8) - self.assert_numpy_array_equal(cat.codes, exp) - - cat = pd.Categorical([1, 2, 3, np.nan], categories=[1, 2, 3]) - with tm.assert_produces_warning(FutureWarning): - cat.set_categories([1, 2, 3, np.nan], rename=True, inplace=True) - cat[1:3] = [np.nan, 1] - exp = np.array([0, 3, 0, -1], dtype=np.int8) - self.assert_numpy_array_equal(cat.codes, exp) - - cat = pd.Categorical([1, 2, 3, np.nan], categories=[1, 2, 3]) - with tm.assert_produces_warning(FutureWarning): - cat.set_categories([1, 2, 3, np.nan], rename=True, inplace=True) - cat[1:3] = [np.nan, np.nan] - exp = np.array([0, 3, 3, -1], dtype=np.int8) - self.assert_numpy_array_equal(cat.codes, exp) - - cat = pd.Categorical([1, 2, np.nan, 3], categories=[1, 2, 3]) - with tm.assert_produces_warning(FutureWarning): - cat.set_categories([1, 2, 3, np.nan], rename=True, inplace=True) - cat[pd.isnull(cat)] = np.nan - exp = np.array([0, 1, 3, 2], dtype=np.int8) - self.assert_numpy_array_equal(cat.codes, exp) + exp = pd.Categorical([1, np.nan, 3], categories=[1, 2, 3]) + tm.assert_categorical_equal(cat, exp) def test_shift(self): # GH 9416 @@ -2026,33 +1896,12 @@ def test_sideeffects_free(self): def test_nan_handling(self): - # Nans are represented as -1 in labels + # NaNs are represented as -1 in labels s = Series(Categorical(["a", "b", np.nan, "a"])) self.assert_index_equal(s.cat.categories, Index(["a", "b"])) self.assert_numpy_array_equal(s.values.codes, np.array([0, 1, -1, 0], dtype=np.int8)) - # If categories have nan included, the label should point to that - # instead - with tm.assert_produces_warning(FutureWarning): - s2 = Series(Categorical(["a", "b", np.nan, "a"], - categories=["a", "b", np.nan])) - - exp_cat = Index(["a", "b", np.nan]) - self.assert_index_equal(s2.cat.categories, exp_cat) - self.assert_numpy_array_equal(s2.values.codes, - np.array([0, 1, 2, 0], dtype=np.int8)) - - # Changing categories should also make the replaced category np.nan - s3 = Series(Categorical(["a", "b", "c", "a"])) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - s3.cat.categories = ["a", "b", np.nan] - - exp_cat = Index(["a", "b", np.nan]) - self.assert_index_equal(s3.cat.categories, exp_cat) - self.assert_numpy_array_equal(s3.values.codes, - np.array([0, 1, 2, 0], dtype=np.int8)) - def test_cat_accessor(self): s = Series(Categorical(["a", "b", np.nan, "a"])) self.assert_index_equal(s.cat.categories, Index(["a", "b"])) From 686e9e00aaac6747bf0c8e340f5c3eedd893d702 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Mon, 27 Mar 2017 10:06:32 -0400 Subject: [PATCH 286/933] CI: make generic 3.5 builds CI: rename BUILD_DOC -> DOC --- .travis.yml | 45 ++++++++++--------- ci/build_docs.sh | 2 +- ...BUILD.build => requirements-3.5_DOC.build} | 1 - ...DOC_BUILD.run => requirements-3.5_DOC.run} | 0 ...5_DOC_BUILD.sh => requirements-3.5_DOC.sh} | 0 ci/script_multi.sh | 7 +-- ci/script_single.sh | 9 ++-- 7 files changed, 29 insertions(+), 35 deletions(-) rename ci/{requirements-3.5_DOC_BUILD.build => requirements-3.5_DOC.build} (88%) rename ci/{requirements-3.5_DOC_BUILD.run => requirements-3.5_DOC.run} (100%) rename ci/{requirements-3.5_DOC_BUILD.sh => requirements-3.5_DOC.sh} (100%) diff --git a/.travis.yml b/.travis.yml index 777280e3c4a25..1053f8925ebd7 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,5 +1,7 @@ sudo: false language: python +# Default Python version is usually 2.7 +python: 3.5 # To turn off cached cython files and compiler cache # set NOCACHE-true @@ -7,6 +9,7 @@ language: python # travis cache --delete inside the project directory from the travis command line client # The cash directories will be deleted if anything in ci/ changes in a commit cache: + ccache: true directories: - $HOME/.cache # cython cache - $HOME/.ccache # compiler cache @@ -23,69 +26,67 @@ git: matrix: fast_finish: true + exclude: + # Exclude the default Python 3.5 build + - python: 3.5 include: - - language: objective-c - os: osx - cache: - ccache: true - directories: - - $HOME/.cache # cython cache - - $HOME/.ccache # compiler cache + - os: osx + language: generic env: - - JOB="3.5_OSX" TEST_ARGS="--skip-slow --skip-network" TRAVIS_PYTHON_VERSION=3.5 - - python: 2.7 + - JOB="3.5_OSX" TEST_ARGS="--skip-slow --skip-network" + - os: linux env: - JOB="2.7_LOCALE" TEST_ARGS="--only-slow --skip-network" LOCALE_OVERRIDE="zh_CN.UTF-8" addons: apt: packages: - language-pack-zh-hans - - python: 2.7 + - os: linux env: - JOB="2.7" TEST_ARGS="--skip-slow" LINT=true addons: apt: packages: - python-gtk2 - - python: 3.5 + - os: linux env: - JOB="3.5" TEST_ARGS="--skip-slow --skip-network" COVERAGE=true addons: apt: packages: - xsel - - python: 3.6 + - os: linux env: - JOB="3.6" TEST_ARGS="--skip-slow --skip-network" PANDAS_TESTING_MODE="deprecate" CONDA_FORGE=true # In allow_failures - - python: 2.7 + - os: linux env: - JOB="2.7_SLOW" TEST_ARGS="--only-slow --skip-network" # In allow_failures - - python: 2.7 + - os: linux env: - JOB="2.7_BUILD_TEST" TEST_ARGS="--skip-slow" BUILD_TEST=true # In allow_failures - - python: 3.6 + - os: linux env: - JOB="3.6_NUMPY_DEV" TEST_ARGS="--skip-slow --skip-network" PANDAS_TESTING_MODE="deprecate" # In allow_failures - - python: 3.5 + - os: linux env: - - JOB="3.5_DOC_BUILD" DOC_BUILD=true + - JOB="3.5_DOC" DOC=true allow_failures: - - python: 2.7 + - os: linux env: - JOB="2.7_SLOW" TEST_ARGS="--only-slow --skip-network" - - python: 2.7 + - os: linux env: - JOB="2.7_BUILD_TEST" TEST_ARGS="--skip-slow" BUILD_TEST=true - - python: 3.6 + - os: linux env: - JOB="3.6_NUMPY_DEV" TEST_ARGS="--skip-slow --skip-network" PANDAS_TESTING_MODE="deprecate" - - python: 3.5 + - os: linux env: - - JOB="3.5_DOC_BUILD" DOC_BUILD=true + - JOB="3.5_DOC" DOC=true before_install: - echo "before_install" diff --git a/ci/build_docs.sh b/ci/build_docs.sh index bfe7a1eed756b..1356d097025c9 100755 --- a/ci/build_docs.sh +++ b/ci/build_docs.sh @@ -17,7 +17,7 @@ if [ "$?" != "0" ]; then fi -if [ x"$DOC_BUILD" != x"" ]; then +if [ "$DOC" ]; then echo "Will build docs" diff --git a/ci/requirements-3.5_DOC_BUILD.build b/ci/requirements-3.5_DOC.build similarity index 88% rename from ci/requirements-3.5_DOC_BUILD.build rename to ci/requirements-3.5_DOC.build index f7befe3b31865..73aeb3192242f 100644 --- a/ci/requirements-3.5_DOC_BUILD.build +++ b/ci/requirements-3.5_DOC.build @@ -1,6 +1,5 @@ python=3.5* python-dateutil pytz -nomkl numpy cython diff --git a/ci/requirements-3.5_DOC_BUILD.run b/ci/requirements-3.5_DOC.run similarity index 100% rename from ci/requirements-3.5_DOC_BUILD.run rename to ci/requirements-3.5_DOC.run diff --git a/ci/requirements-3.5_DOC_BUILD.sh b/ci/requirements-3.5_DOC.sh similarity index 100% rename from ci/requirements-3.5_DOC_BUILD.sh rename to ci/requirements-3.5_DOC.sh diff --git a/ci/script_multi.sh b/ci/script_multi.sh index f0fbb8c54bf2a..88ecaf344a410 100755 --- a/ci/script_multi.sh +++ b/ci/script_multi.sh @@ -4,11 +4,6 @@ echo "[script multi]" source activate pandas -# don't run the tests for the doc build -if [ x"$DOC_BUILD" != x"" ]; then - exit 0 -fi - if [ -n "$LOCALE_OVERRIDE" ]; then export LC_ALL="$LOCALE_OVERRIDE"; echo "Setting LC_ALL to $LOCALE_OVERRIDE" @@ -26,6 +21,8 @@ echo PYTHONHASHSEED=$PYTHONHASHSEED if [ "$BUILD_TEST" ]; then cd /tmp python -c "import pandas; pandas.test(['-n 2'])" +elif [ "$DOC" ]; then + echo "We are not running pytest as this is a doc-build" elif [ "$COVERAGE" ]; then echo pytest -s -n 2 -m "not single" --cov=pandas --cov-report xml:/tmp/cov-multiple.xml --junitxml=/tmp/multiple.xml $TEST_ARGS pandas pytest -s -n 2 -m "not single" --cov=pandas --cov-report xml:/tmp/cov-multiple.xml --junitxml=/tmp/multiple.xml $TEST_ARGS pandas diff --git a/ci/script_single.sh b/ci/script_single.sh index 86e822cb57653..db637679f0e0f 100755 --- a/ci/script_single.sh +++ b/ci/script_single.sh @@ -4,11 +4,6 @@ echo "[script_single]" source activate pandas -# don't run the tests for the doc build -if [ x"$DOC_BUILD" != x"" ]; then - exit 0 -fi - if [ -n "$LOCALE_OVERRIDE" ]; then export LC_ALL="$LOCALE_OVERRIDE"; echo "Setting LC_ALL to $LOCALE_OVERRIDE" @@ -18,7 +13,9 @@ if [ -n "$LOCALE_OVERRIDE" ]; then fi if [ "$BUILD_TEST" ]; then - echo "We are not running pytest as this is simply a build test." + echo "We are not running pytest as this is a build test." +elif [ "$DOC" ]; then + echo "We are not running pytest as this is a doc-build" elif [ "$COVERAGE" ]; then echo pytest -s -m "single" --cov=pandas --cov-report xml:/tmp/cov-single.xml --junitxml=/tmp/single.xml $TEST_ARGS pandas pytest -s -m "single" --cov=pandas --cov-report xml:/tmp/cov-single.xml --junitxml=/tmp/single.xml $TEST_ARGS pandas From 71f621fe11b499a6e1420737faf375ba99bb619b Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Mon, 27 Mar 2017 15:22:22 -0400 Subject: [PATCH 287/933] API: NaT boolean accessors now return False (#15782) TST: add pandas/tests/scalar/test_nat TST: revise testing of tseries accessors closes #15781 --- doc/source/whatsnew/v0.20.0.txt | 3 +- pandas/_libs/tslib.pyx | 53 +++- pandas/tests/indexes/datetimes/test_misc.py | 9 +- pandas/tests/indexes/datetimes/test_ops.py | 13 +- pandas/tests/indexes/period/test_ops.py | 9 +- pandas/tests/indexes/period/test_period.py | 4 +- pandas/tests/indexes/timedeltas/test_ops.py | 6 +- pandas/tests/scalar/test_nat.py | 248 +++++++++++++++++ pandas/tests/scalar/test_period.py | 25 -- pandas/tests/scalar/test_timedelta.py | 15 +- pandas/tests/scalar/test_timestamp.py | 294 +++----------------- pandas/tests/series/test_datetime_values.py | 20 +- pandas/tests/test_base.py | 2 +- pandas/tests/test_categorical.py | 28 +- pandas/tseries/common.py | 12 +- pandas/tseries/index.py | 61 ++-- pandas/tseries/period.py | 18 +- pandas/tseries/tdi.py | 11 +- 18 files changed, 429 insertions(+), 402 deletions(-) create mode 100644 pandas/tests/scalar/test_nat.py diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index a0b2b47c4bac3..3ab69e1ff409b 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -771,7 +771,8 @@ Other API Changes since pandas version 0.13.0 and can be done with the ``Series.str.extract`` method (:issue:`5224`). As a consequence, the ``as_indexer`` keyword is ignored (no longer needed to specify the new behaviour) and is deprecated. - +- ``NaT`` will now correctly report ``False`` for datetimelike boolean operations such as ``is_month_start`` (:issue:`15781`) +- ``NaT`` will now correctly return ``np.nan`` for ``Timedelta`` and ``Period`` accessors such as ``days`` and ``quarter`` (:issue:`15782`) .. _whatsnew_0200.deprecations: diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 055534bbdb7ee..d441f1ec4759b 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -849,6 +849,30 @@ class NaTType(_NaT): def is_leap_year(self): return False + @property + def is_month_start(self): + return False + + @property + def is_quarter_start(self): + return False + + @property + def is_year_start(self): + return False + + @property + def is_month_end(self): + return False + + @property + def is_quarter_end(self): + return False + + @property + def is_year_end(self): + return False + def __rdiv__(self, other): return _nat_rdivide_op(self, other) @@ -3799,8 +3823,9 @@ def array_strptime(ndarray[object] values, object fmt, # these by definition return np.nan fields = ['year', 'quarter', 'month', 'day', 'hour', 'minute', 'second', 'millisecond', 'microsecond', 'nanosecond', - 'week', 'dayofyear', 'days_in_month', 'daysinmonth', 'dayofweek', - 'weekday_name'] + 'week', 'dayofyear', 'weekofyear', 'days_in_month', 'daysinmonth', + 'dayofweek', 'weekday_name', 'days', 'seconds', 'microseconds', + 'nanoseconds', 'qyear', 'quarter'] for field in fields: prop = property(fget=lambda self: np.nan) setattr(NaTType, field, prop) @@ -4810,7 +4835,7 @@ def get_start_end_field(ndarray[int64_t] dtindex, object field, if field == 'is_month_start': if is_business: for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = -1; continue + if dtindex[i] == NPY_NAT: out[i] = 0; continue pandas_datetime_to_datetimestruct( dtindex[i], PANDAS_FR_ns, &dts) @@ -4823,7 +4848,7 @@ def get_start_end_field(ndarray[int64_t] dtindex, object field, return out.view(bool) else: for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = -1; continue + if dtindex[i] == NPY_NAT: out[i] = 0; continue pandas_datetime_to_datetimestruct( dtindex[i], PANDAS_FR_ns, &dts) @@ -4836,7 +4861,7 @@ def get_start_end_field(ndarray[int64_t] dtindex, object field, elif field == 'is_month_end': if is_business: for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = -1; continue + if dtindex[i] == NPY_NAT: out[i] = 0; continue pandas_datetime_to_datetimestruct( dtindex[i], PANDAS_FR_ns, &dts) @@ -4854,7 +4879,7 @@ def get_start_end_field(ndarray[int64_t] dtindex, object field, return out.view(bool) else: for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = -1; continue + if dtindex[i] == NPY_NAT: out[i] = 0; continue pandas_datetime_to_datetimestruct( dtindex[i], PANDAS_FR_ns, &dts) @@ -4871,7 +4896,7 @@ def get_start_end_field(ndarray[int64_t] dtindex, object field, elif field == 'is_quarter_start': if is_business: for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = -1; continue + if dtindex[i] == NPY_NAT: out[i] = 0; continue pandas_datetime_to_datetimestruct( dtindex[i], PANDAS_FR_ns, &dts) @@ -4885,7 +4910,7 @@ def get_start_end_field(ndarray[int64_t] dtindex, object field, return out.view(bool) else: for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = -1; continue + if dtindex[i] == NPY_NAT: out[i] = 0; continue pandas_datetime_to_datetimestruct( dtindex[i], PANDAS_FR_ns, &dts) @@ -4898,7 +4923,7 @@ def get_start_end_field(ndarray[int64_t] dtindex, object field, elif field == 'is_quarter_end': if is_business: for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = -1; continue + if dtindex[i] == NPY_NAT: out[i] = 0; continue pandas_datetime_to_datetimestruct( dtindex[i], PANDAS_FR_ns, &dts) @@ -4917,7 +4942,7 @@ def get_start_end_field(ndarray[int64_t] dtindex, object field, return out.view(bool) else: for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = -1; continue + if dtindex[i] == NPY_NAT: out[i] = 0; continue pandas_datetime_to_datetimestruct( dtindex[i], PANDAS_FR_ns, &dts) @@ -4934,7 +4959,7 @@ def get_start_end_field(ndarray[int64_t] dtindex, object field, elif field == 'is_year_start': if is_business: for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = -1; continue + if dtindex[i] == NPY_NAT: out[i] = 0; continue pandas_datetime_to_datetimestruct( dtindex[i], PANDAS_FR_ns, &dts) @@ -4948,7 +4973,7 @@ def get_start_end_field(ndarray[int64_t] dtindex, object field, return out.view(bool) else: for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = -1; continue + if dtindex[i] == NPY_NAT: out[i] = 0; continue pandas_datetime_to_datetimestruct( dtindex[i], PANDAS_FR_ns, &dts) @@ -4961,7 +4986,7 @@ def get_start_end_field(ndarray[int64_t] dtindex, object field, elif field == 'is_year_end': if is_business: for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = -1; continue + if dtindex[i] == NPY_NAT: out[i] = 0; continue pandas_datetime_to_datetimestruct( dtindex[i], PANDAS_FR_ns, &dts) @@ -4980,7 +5005,7 @@ def get_start_end_field(ndarray[int64_t] dtindex, object field, return out.view(bool) else: for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = -1; continue + if dtindex[i] == NPY_NAT: out[i] = 0; continue pandas_datetime_to_datetimestruct( dtindex[i], PANDAS_FR_ns, &dts) diff --git a/pandas/tests/indexes/datetimes/test_misc.py b/pandas/tests/indexes/datetimes/test_misc.py index ef24c493f5090..76a26b09ed131 100644 --- a/pandas/tests/indexes/datetimes/test_misc.py +++ b/pandas/tests/indexes/datetimes/test_misc.py @@ -259,19 +259,14 @@ def test_datetimeindex_accessors(self): dti.name = 'name' # non boolean accessors -> return Index - for accessor in ['year', 'month', 'day', 'hour', 'minute', - 'second', 'microsecond', 'nanosecond', - 'dayofweek', 'dayofyear', 'weekofyear', - 'quarter', 'weekday_name']: + for accessor in DatetimeIndex._field_ops: res = getattr(dti, accessor) assert len(res) == 365 assert isinstance(res, Index) assert res.name == 'name' # boolean accessors -> return array - for accessor in ['is_month_start', 'is_month_end', - 'is_quarter_start', 'is_quarter_end', - 'is_year_start', 'is_year_end']: + for accessor in DatetimeIndex._bool_ops: res = getattr(dti, accessor) assert len(res) == 365 assert isinstance(res, np.ndarray) diff --git a/pandas/tests/indexes/datetimes/test_ops.py b/pandas/tests/indexes/datetimes/test_ops.py index 4abc282252559..4681879d708c4 100644 --- a/pandas/tests/indexes/datetimes/test_ops.py +++ b/pandas/tests/indexes/datetimes/test_ops.py @@ -31,15 +31,10 @@ def setUp(self): self.not_valid_objs = [o for o in self.objs if not mask(o)] def test_ops_properties(self): - self.check_ops_properties( - ['year', 'month', 'day', 'hour', 'minute', 'second', 'weekofyear', - 'week', 'dayofweek', 'dayofyear', 'quarter']) - self.check_ops_properties(['date', 'time', 'microsecond', 'nanosecond', - 'is_month_start', 'is_month_end', - 'is_quarter_start', - 'is_quarter_end', 'is_year_start', - 'is_year_end', 'weekday_name'], - lambda x: isinstance(x, DatetimeIndex)) + f = lambda x: isinstance(x, DatetimeIndex) + self.check_ops_properties(DatetimeIndex._field_ops, f) + self.check_ops_properties(DatetimeIndex._object_ops, f) + self.check_ops_properties(DatetimeIndex._bool_ops, f) def test_ops_properties_basic(self): diff --git a/pandas/tests/indexes/period/test_ops.py b/pandas/tests/indexes/period/test_ops.py index 4533428cf1514..3b94992f2fe9f 100644 --- a/pandas/tests/indexes/period/test_ops.py +++ b/pandas/tests/indexes/period/test_ops.py @@ -21,11 +21,10 @@ def setUp(self): self.not_valid_objs = [o for o in self.objs if not mask(o)] def test_ops_properties(self): - self.check_ops_properties( - ['year', 'month', 'day', 'hour', 'minute', 'second', 'weekofyear', - 'week', 'dayofweek', 'dayofyear', 'quarter']) - self.check_ops_properties(['qyear'], - lambda x: isinstance(x, PeriodIndex)) + f = lambda x: isinstance(x, PeriodIndex) + self.check_ops_properties(PeriodIndex._field_ops, f) + self.check_ops_properties(PeriodIndex._object_ops, f) + self.check_ops_properties(PeriodIndex._bool_ops, f) def test_asobject_tolist(self): idx = pd.period_range(start='2013-01-01', periods=4, freq='M', diff --git a/pandas/tests/indexes/period/test_period.py b/pandas/tests/indexes/period/test_period.py index 6a6c0ab49b15d..6639fcd985ac4 100644 --- a/pandas/tests/indexes/period/test_period.py +++ b/pandas/tests/indexes/period/test_period.py @@ -394,8 +394,8 @@ def test_fields(self): def _check_all_fields(self, periodindex): fields = ['year', 'month', 'day', 'hour', 'minute', 'second', - 'weekofyear', 'week', 'dayofweek', 'weekday', 'dayofyear', - 'quarter', 'qyear', 'days_in_month', 'is_leap_year'] + 'weekofyear', 'week', 'dayofweek', 'dayofyear', + 'quarter', 'qyear', 'days_in_month'] periods = list(periodindex) s = pd.Series(periodindex) diff --git a/pandas/tests/indexes/timedeltas/test_ops.py b/pandas/tests/indexes/timedeltas/test_ops.py index 8c7b88a9cf2ca..2e9f11297dc83 100644 --- a/pandas/tests/indexes/timedeltas/test_ops.py +++ b/pandas/tests/indexes/timedeltas/test_ops.py @@ -21,9 +21,9 @@ def setUp(self): self.not_valid_objs = [] def test_ops_properties(self): - self.check_ops_properties(['days', 'hours', 'minutes', 'seconds', - 'milliseconds']) - self.check_ops_properties(['microseconds', 'nanoseconds']) + f = lambda x: isinstance(x, TimedeltaIndex) + self.check_ops_properties(TimedeltaIndex._field_ops, f) + self.check_ops_properties(TimedeltaIndex._object_ops, f) def test_asobject_tolist(self): idx = timedelta_range(start='1 days', periods=4, freq='D', name='idx') diff --git a/pandas/tests/scalar/test_nat.py b/pandas/tests/scalar/test_nat.py new file mode 100644 index 0000000000000..ce2ed237f5559 --- /dev/null +++ b/pandas/tests/scalar/test_nat.py @@ -0,0 +1,248 @@ +import pytest + +from datetime import datetime, timedelta +import pytz + +import numpy as np +from pandas import (NaT, Index, Timestamp, Timedelta, Period, + DatetimeIndex, PeriodIndex, + TimedeltaIndex, Series, isnull) +from pandas.util import testing as tm +from pandas._libs.tslib import iNaT + + +@pytest.mark.parametrize('nat, idx', [(Timestamp('NaT'), DatetimeIndex), + (Timedelta('NaT'), TimedeltaIndex), + (Period('NaT', freq='M'), PeriodIndex)]) +def test_nat_fields(nat, idx): + + for field in idx._field_ops: + + # weekday is a property of DTI, but a method + # on NaT/Timestamp for compat with datetime + if field == 'weekday': + continue + + result = getattr(NaT, field) + assert np.isnan(result) + + result = getattr(nat, field) + assert np.isnan(result) + + for field in idx._bool_ops: + + result = getattr(NaT, field) + assert result is False + + result = getattr(nat, field) + assert result is False + + +def test_nat_vector_field_access(): + idx = DatetimeIndex(['1/1/2000', None, None, '1/4/2000']) + + for field in DatetimeIndex._field_ops: + # weekday is a property of DTI, but a method + # on NaT/Timestamp for compat with datetime + if field == 'weekday': + continue + + result = getattr(idx, field) + expected = Index([getattr(x, field) for x in idx]) + tm.assert_index_equal(result, expected) + + s = Series(idx) + + for field in DatetimeIndex._field_ops: + + # weekday is a property of DTI, but a method + # on NaT/Timestamp for compat with datetime + if field == 'weekday': + continue + + result = getattr(s.dt, field) + expected = [getattr(x, field) for x in idx] + tm.assert_series_equal(result, Series(expected)) + + for field in DatetimeIndex._bool_ops: + result = getattr(s.dt, field) + expected = [getattr(x, field) for x in idx] + tm.assert_series_equal(result, Series(expected)) + + +@pytest.mark.parametrize('klass', [Timestamp, Timedelta, Period]) +def test_identity(klass): + assert klass(None) is NaT + + result = klass(np.nan) + assert result is NaT + + result = klass(None) + assert result is NaT + + result = klass(iNaT) + assert result is NaT + + result = klass(np.nan) + assert result is NaT + + result = klass(float('nan')) + assert result is NaT + + result = klass(NaT) + assert result is NaT + + result = klass('NaT') + assert result is NaT + + assert isnull(klass('nat')) + + +@pytest.mark.parametrize('klass', [Timestamp, Timedelta, Period]) +def test_equality(klass): + + # nat + if klass is not Period: + klass('').value == iNaT + klass('nat').value == iNaT + klass('NAT').value == iNaT + klass(None).value == iNaT + klass(np.nan).value == iNaT + assert isnull(klass('nat')) + + +@pytest.mark.parametrize('klass', [Timestamp, Timedelta]) +def test_round_nat(klass): + # GH14940 + ts = klass('nat') + for method in ["round", "floor", "ceil"]: + round_method = getattr(ts, method) + for freq in ["s", "5s", "min", "5min", "h", "5h"]: + assert round_method(freq) is ts + + +def test_NaT_methods(): + # GH 9513 + raise_methods = ['astimezone', 'combine', 'ctime', 'dst', + 'fromordinal', 'fromtimestamp', 'isocalendar', + 'strftime', 'strptime', 'time', 'timestamp', + 'timetuple', 'timetz', 'toordinal', 'tzname', + 'utcfromtimestamp', 'utcnow', 'utcoffset', + 'utctimetuple'] + nat_methods = ['date', 'now', 'replace', 'to_datetime', 'today'] + nan_methods = ['weekday', 'isoweekday'] + + for method in raise_methods: + if hasattr(NaT, method): + with pytest.raises(ValueError): + getattr(NaT, method)() + + for method in nan_methods: + if hasattr(NaT, method): + assert np.isnan(getattr(NaT, method)()) + + for method in nat_methods: + if hasattr(NaT, method): + # see gh-8254 + exp_warning = None + if method == 'to_datetime': + exp_warning = FutureWarning + with tm.assert_produces_warning( + exp_warning, check_stacklevel=False): + assert getattr(NaT, method)() is NaT + + # GH 12300 + assert NaT.isoformat() == 'NaT' + + +@pytest.mark.parametrize('klass', [Timestamp, Timedelta]) +def test_isoformat(klass): + + result = klass('NaT').isoformat() + expected = 'NaT' + assert result == expected + + +def test_nat_arithmetic(): + # GH 6873 + i = 2 + f = 1.5 + + for (left, right) in [(NaT, i), (NaT, f), (NaT, np.nan)]: + assert left / right is NaT + assert left * right is NaT + assert right * left is NaT + with pytest.raises(TypeError): + right / left + + # Timestamp / datetime + t = Timestamp('2014-01-01') + dt = datetime(2014, 1, 1) + for (left, right) in [(NaT, NaT), (NaT, t), (NaT, dt)]: + # NaT __add__ or __sub__ Timestamp-like (or inverse) returns NaT + assert right + left is NaT + assert left + right is NaT + assert left - right is NaT + assert right - left is NaT + + # timedelta-like + # offsets are tested in test_offsets.py + + delta = timedelta(3600) + td = Timedelta('5s') + + for (left, right) in [(NaT, delta), (NaT, td)]: + # NaT + timedelta-like returns NaT + assert right + left is NaT + assert left + right is NaT + assert right - left is NaT + assert left - right is NaT + + # GH 11718 + t_utc = Timestamp('2014-01-01', tz='UTC') + t_tz = Timestamp('2014-01-01', tz='US/Eastern') + dt_tz = pytz.timezone('Asia/Tokyo').localize(dt) + + for (left, right) in [(NaT, t_utc), (NaT, t_tz), + (NaT, dt_tz)]: + # NaT __add__ or __sub__ Timestamp-like (or inverse) returns NaT + assert right + left is NaT + assert left + right is NaT + assert left - right is NaT + assert right - left is NaT + + # int addition / subtraction + for (left, right) in [(NaT, 2), (NaT, 0), (NaT, -3)]: + assert right + left is NaT + assert left + right is NaT + assert left - right is NaT + assert right - left is NaT + + +def test_nat_arithmetic_index(): + # GH 11718 + + dti = DatetimeIndex(['2011-01-01', '2011-01-02'], name='x') + exp = DatetimeIndex([NaT, NaT], name='x') + tm.assert_index_equal(dti + NaT, exp) + tm.assert_index_equal(NaT + dti, exp) + + dti_tz = DatetimeIndex(['2011-01-01', '2011-01-02'], + tz='US/Eastern', name='x') + exp = DatetimeIndex([NaT, NaT], name='x', tz='US/Eastern') + tm.assert_index_equal(dti_tz + NaT, exp) + tm.assert_index_equal(NaT + dti_tz, exp) + + exp = TimedeltaIndex([NaT, NaT], name='x') + for (left, right) in [(NaT, dti), (NaT, dti_tz)]: + tm.assert_index_equal(left - right, exp) + tm.assert_index_equal(right - left, exp) + + # timedelta + tdi = TimedeltaIndex(['1 day', '2 day'], name='x') + exp = DatetimeIndex([NaT, NaT], name='x') + for (left, right) in [(NaT, tdi)]: + tm.assert_index_equal(left + right, exp) + tm.assert_index_equal(right + left, exp) + tm.assert_index_equal(left - right, exp) + tm.assert_index_equal(right - left, exp) diff --git a/pandas/tests/scalar/test_period.py b/pandas/tests/scalar/test_period.py index 3128e90695324..7a15600d6041e 100644 --- a/pandas/tests/scalar/test_period.py +++ b/pandas/tests/scalar/test_period.py @@ -110,20 +110,6 @@ def test_period_cons_nat(self): p = Period(tslib.iNaT) self.assertIs(p, pd.NaT) - def test_cons_null_like(self): - # check Timestamp compat - self.assertIs(Timestamp('NaT'), pd.NaT) - self.assertIs(Period('NaT'), pd.NaT) - - self.assertIs(Timestamp(None), pd.NaT) - self.assertIs(Period(None), pd.NaT) - - self.assertIs(Timestamp(float('nan')), pd.NaT) - self.assertIs(Period(float('nan')), pd.NaT) - - self.assertIs(Timestamp(np.nan), pd.NaT) - self.assertIs(Period(np.nan), pd.NaT) - def test_period_cons_mult(self): p1 = Period('2011-01', freq='3M') p2 = Period('2011-01', freq='M') @@ -854,17 +840,6 @@ def test_properties_secondly(self): self.assertEqual(Period(freq='Min', year=2012, month=2, day=1, hour=0, minute=0, second=0).days_in_month, 29) - def test_properties_nat(self): - p_nat = Period('NaT', freq='M') - t_nat = pd.Timestamp('NaT') - self.assertIs(p_nat, t_nat) - - # confirm Period('NaT') work identical with Timestamp('NaT') - for f in ['year', 'month', 'day', 'hour', 'minute', 'second', 'week', - 'dayofyear', 'quarter', 'days_in_month']: - self.assertTrue(np.isnan(getattr(p_nat, f))) - self.assertTrue(np.isnan(getattr(t_nat, f))) - def test_pnow(self): # deprecation, xref #13790 diff --git a/pandas/tests/scalar/test_timedelta.py b/pandas/tests/scalar/test_timedelta.py index 7c5caa9506ca2..c2b895925b685 100644 --- a/pandas/tests/scalar/test_timedelta.py +++ b/pandas/tests/scalar/test_timedelta.py @@ -6,7 +6,7 @@ import pandas.util.testing as tm from pandas.tseries.timedeltas import _coerce_scalar_to_timedelta_type as ct from pandas import (Timedelta, TimedeltaIndex, timedelta_range, Series, - to_timedelta, compat, isnull) + to_timedelta, compat) from pandas._libs.tslib import iNaT, NaTType @@ -151,14 +151,6 @@ def test_construction(self): 500, 'ms').astype('m8[ns]').view('i8') self.assertEqual(Timedelta(10.5, unit='s').value, expected) - # nat - self.assertEqual(Timedelta('').value, iNaT) - self.assertEqual(Timedelta('nat').value, iNaT) - self.assertEqual(Timedelta('NAT').value, iNaT) - self.assertEqual(Timedelta(None).value, iNaT) - self.assertEqual(Timedelta(np.nan).value, iNaT) - self.assertTrue(isnull(Timedelta('nat'))) - # offset self.assertEqual(to_timedelta(pd.offsets.Hour(2)), Timedelta('0 days, 02:00:00')) @@ -686,11 +678,6 @@ def test_isoformat(self): expected = 'P0DT0H0M0.001S' self.assertEqual(result, expected) - # NaT - result = Timedelta('NaT').isoformat() - expected = 'NaT' - self.assertEqual(result, expected) - # don't strip every 0 result = Timedelta(minutes=1).isoformat() expected = 'P0DT0H1M0S' diff --git a/pandas/tests/scalar/test_timestamp.py b/pandas/tests/scalar/test_timestamp.py index bbf33c4db5ad7..e39375141ad5f 100644 --- a/pandas/tests/scalar/test_timestamp.py +++ b/pandas/tests/scalar/test_timestamp.py @@ -7,23 +7,19 @@ from datetime import datetime, timedelta from distutils.version import LooseVersion -import pandas as pd import pandas.util.testing as tm - from pandas.tseries import offsets, frequencies from pandas._libs import tslib, period -from pandas._libs.tslib import get_timezone, iNaT +from pandas._libs.tslib import get_timezone from pandas.compat import lrange, long from pandas.util.testing import assert_series_equal from pandas.compat.numpy import np_datetime64_compat from pandas import (Timestamp, date_range, Period, Timedelta, compat, - Series, NaT, isnull, DataFrame, DatetimeIndex) + Series, NaT, DataFrame, DatetimeIndex) from pandas.tseries.frequencies import (RESO_DAY, RESO_HR, RESO_MIN, RESO_US, RESO_MS, RESO_SEC) -randn = np.random.randn - class TestTimestamp(tm.TestCase): @@ -202,8 +198,6 @@ def test_constructor_positional(self): repr(Timestamp(2015, 11, 12, 1, 2, 3, 999999)), repr(Timestamp('2015-11-12 01:02:03.999999'))) - self.assertIs(Timestamp(None), pd.NaT) - def test_constructor_keyword(self): # GH 10758 with tm.assertRaises(TypeError): @@ -235,7 +229,7 @@ def test_constructor_fromordinal(self): self.assertEqual(base.toordinal(), ts.toordinal()) ts = Timestamp.fromordinal(base.toordinal(), tz='US/Eastern') - self.assertEqual(pd.Timestamp('2000-01-01', tz='US/Eastern'), ts) + self.assertEqual(Timestamp('2000-01-01', tz='US/Eastern'), ts) self.assertEqual(base.toordinal(), ts.toordinal()) def test_constructor_offset_depr(self): @@ -260,7 +254,7 @@ def test_constructor_offset_depr_fromordinal(self): with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): ts = Timestamp.fromordinal(base.toordinal(), offset='D') - self.assertEqual(pd.Timestamp('2000-01-01'), ts) + self.assertEqual(Timestamp('2000-01-01'), ts) self.assertEqual(ts.freq, 'D') self.assertEqual(base.toordinal(), ts.toordinal()) @@ -422,12 +416,12 @@ def test_tz_localize_nonexistent(self): self.assertRaises(NonExistentTimeError, ts.tz_localize, tz, errors='raise') self.assertIs(ts.tz_localize(tz, errors='coerce'), - pd.NaT) + NaT) def test_tz_localize_errors_ambiguous(self): # See issue 13057 from pytz.exceptions import AmbiguousTimeError - ts = pd.Timestamp('2015-11-1 01:00') + ts = Timestamp('2015-11-1 01:00') self.assertRaises(AmbiguousTimeError, ts.tz_localize, 'US/Pacific', errors='coerce') @@ -576,94 +570,6 @@ def check(value, equal): for end in ends: self.assertTrue(getattr(ts, end)) - def test_nat_fields(self): - # GH 10050 - ts = Timestamp('NaT') - self.assertTrue(np.isnan(ts.year)) - self.assertTrue(np.isnan(ts.month)) - self.assertTrue(np.isnan(ts.day)) - self.assertTrue(np.isnan(ts.hour)) - self.assertTrue(np.isnan(ts.minute)) - self.assertTrue(np.isnan(ts.second)) - self.assertTrue(np.isnan(ts.microsecond)) - self.assertTrue(np.isnan(ts.nanosecond)) - self.assertTrue(np.isnan(ts.dayofweek)) - self.assertTrue(np.isnan(ts.quarter)) - self.assertTrue(np.isnan(ts.dayofyear)) - self.assertTrue(np.isnan(ts.week)) - self.assertTrue(np.isnan(ts.daysinmonth)) - self.assertTrue(np.isnan(ts.days_in_month)) - - def test_nat_vector_field_access(self): - idx = DatetimeIndex(['1/1/2000', None, None, '1/4/2000']) - - # non boolean fields - fields = ['year', 'quarter', 'month', 'day', 'hour', 'minute', - 'second', 'microsecond', 'nanosecond', 'week', 'dayofyear', - 'days_in_month'] - - for field in fields: - result = getattr(idx, field) - expected = [getattr(x, field) for x in idx] - self.assert_index_equal(result, pd.Index(expected)) - - # boolean fields - fields = ['is_leap_year'] - # other boolean fields like 'is_month_start' and 'is_month_end' - # not yet supported by NaT - - for field in fields: - result = getattr(idx, field) - expected = [getattr(x, field) for x in idx] - self.assert_numpy_array_equal(result, np.array(expected)) - - s = pd.Series(idx) - - for field in fields: - result = getattr(s.dt, field) - expected = [getattr(x, field) for x in idx] - self.assert_series_equal(result, pd.Series(expected)) - - def test_nat_scalar_field_access(self): - fields = ['year', 'quarter', 'month', 'day', 'hour', 'minute', - 'second', 'microsecond', 'nanosecond', 'week', 'dayofyear', - 'days_in_month', 'daysinmonth', 'dayofweek', 'weekday_name'] - for field in fields: - result = getattr(NaT, field) - self.assertTrue(np.isnan(result)) - - def test_NaT_methods(self): - # GH 9513 - raise_methods = ['astimezone', 'combine', 'ctime', 'dst', - 'fromordinal', 'fromtimestamp', 'isocalendar', - 'strftime', 'strptime', 'time', 'timestamp', - 'timetuple', 'timetz', 'toordinal', 'tzname', - 'utcfromtimestamp', 'utcnow', 'utcoffset', - 'utctimetuple'] - nat_methods = ['date', 'now', 'replace', 'to_datetime', 'today'] - nan_methods = ['weekday', 'isoweekday'] - - for method in raise_methods: - if hasattr(NaT, method): - self.assertRaises(ValueError, getattr(NaT, method)) - - for method in nan_methods: - if hasattr(NaT, method): - self.assertTrue(np.isnan(getattr(NaT, method)())) - - for method in nat_methods: - if hasattr(NaT, method): - # see gh-8254 - exp_warning = None - if method == 'to_datetime': - exp_warning = FutureWarning - with tm.assert_produces_warning( - exp_warning, check_stacklevel=False): - self.assertIs(getattr(NaT, method)(), NaT) - - # GH 12300 - self.assertEqual(NaT.isoformat(), 'NaT') - def test_pprint(self): # GH12622 import pprint @@ -772,24 +678,40 @@ def test_round(self): self.assertRaises(ValueError, lambda: dti.round(freq)) # GH 14440 & 15578 - result = pd.Timestamp('2016-10-17 12:00:00.0015').round('ms') - expected = pd.Timestamp('2016-10-17 12:00:00.002000') + result = Timestamp('2016-10-17 12:00:00.0015').round('ms') + expected = Timestamp('2016-10-17 12:00:00.002000') self.assertEqual(result, expected) - result = pd.Timestamp('2016-10-17 12:00:00.00149').round('ms') - expected = pd.Timestamp('2016-10-17 12:00:00.001000') + result = Timestamp('2016-10-17 12:00:00.00149').round('ms') + expected = Timestamp('2016-10-17 12:00:00.001000') self.assertEqual(result, expected) - ts = pd.Timestamp('2016-10-17 12:00:00.0015') + ts = Timestamp('2016-10-17 12:00:00.0015') for freq in ['us', 'ns']: self.assertEqual(ts, ts.round(freq)) - result = pd.Timestamp('2016-10-17 12:00:00.001501031').round('10ns') - expected = pd.Timestamp('2016-10-17 12:00:00.001501030') + result = Timestamp('2016-10-17 12:00:00.001501031').round('10ns') + expected = Timestamp('2016-10-17 12:00:00.001501030') self.assertEqual(result, expected) with tm.assert_produces_warning(): - pd.Timestamp('2016-10-17 12:00:00.001501031').round('1010ns') + Timestamp('2016-10-17 12:00:00.001501031').round('1010ns') + + def test_round_misc(self): + stamp = Timestamp('2000-01-05 05:09:15.13') + + def _check_round(freq, expected): + result = stamp.round(freq=freq) + self.assertEqual(result, expected) + + for freq, expected in [('D', Timestamp('2000-01-05 00:00:00')), + ('H', Timestamp('2000-01-05 05:00:00')), + ('S', Timestamp('2000-01-05 05:09:15'))]: + _check_round(freq, expected) + + msg = frequencies._INVALID_FREQ_ERROR + with self.assertRaisesRegexp(ValueError, msg): + stamp.round('foo') def test_class_ops_pytz(self): tm._skip_if_no_pytz() @@ -906,48 +828,30 @@ def check(val, unit=None, h=1, s=1, us=0): check(val / 1000000000.0 + 0.5, unit='s', us=500000) check(days + 0.5, unit='D', h=12) - # nan - result = Timestamp(np.nan) - self.assertIs(result, NaT) - - result = Timestamp(None) - self.assertIs(result, NaT) - - result = Timestamp(iNaT) - self.assertIs(result, NaT) - - result = Timestamp(NaT) - self.assertIs(result, NaT) - - result = Timestamp('NaT') - self.assertIs(result, NaT) - - self.assertTrue(isnull(Timestamp('nat'))) - def test_roundtrip(self): # test value to string and back conversions # further test accessors base = Timestamp('20140101 00:00:00') - result = Timestamp(base.value + pd.Timedelta('5ms').value) + result = Timestamp(base.value + Timedelta('5ms').value) self.assertEqual(result, Timestamp(str(base) + ".005000")) self.assertEqual(result.microsecond, 5000) - result = Timestamp(base.value + pd.Timedelta('5us').value) + result = Timestamp(base.value + Timedelta('5us').value) self.assertEqual(result, Timestamp(str(base) + ".000005")) self.assertEqual(result.microsecond, 5) - result = Timestamp(base.value + pd.Timedelta('5ns').value) + result = Timestamp(base.value + Timedelta('5ns').value) self.assertEqual(result, Timestamp(str(base) + ".000000005")) self.assertEqual(result.nanosecond, 5) self.assertEqual(result.microsecond, 0) - result = Timestamp(base.value + pd.Timedelta('6ms 5us').value) + result = Timestamp(base.value + Timedelta('6ms 5us').value) self.assertEqual(result, Timestamp(str(base) + ".006005")) self.assertEqual(result.microsecond, 5 + 6 * 1000) - result = Timestamp(base.value + pd.Timedelta('200ms 5us').value) + result = Timestamp(base.value + Timedelta('200ms 5us').value) self.assertEqual(result, Timestamp(str(base) + ".200005")) self.assertEqual(result.microsecond, 5 + 200 * 1000) @@ -1004,9 +908,9 @@ def test_compare_invalid(self): self.assertTrue(val != np.int64(1)) # ops testing - df = DataFrame(randn(5, 2)) + df = DataFrame(np.random.randn(5, 2)) a = df[0] - b = Series(randn(5)) + b = Series(np.random.randn(5)) b.name = Timestamp('2000-01-01') tm.assert_series_equal(a / b, 1 / (b / a)) @@ -1149,8 +1053,8 @@ def test_timestamp_compare_series(self): s = Series(date_range('20010101', periods=10), name='dates') s_nat = s.copy(deep=True) - s[0] = pd.Timestamp('nat') - s[3] = pd.Timestamp('nat') + s[0] = Timestamp('nat') + s[3] = Timestamp('nat') ops = {'lt': 'gt', 'le': 'ge', 'eq': 'eq', 'ne': 'ne'} @@ -1194,18 +1098,6 @@ def test_is_leap_year(self): dt = Timestamp('2100-01-01 00:00:00', tz=tz) self.assertFalse(dt.is_leap_year) - self.assertFalse(pd.NaT.is_leap_year) - self.assertIsInstance(pd.NaT.is_leap_year, bool) - - def test_round_nat(self): - # GH14940 - ts = Timestamp('nat') - print(dir(ts)) - for method in ["round", "floor", "ceil"]: - round_method = getattr(ts, method) - for freq in ["s", "5s", "min", "5min", "h", "5h"]: - self.assertIs(round_method(freq), ts) - class TestTimestampNsOperations(tm.TestCase): @@ -1293,95 +1185,6 @@ def test_nanosecond_timestamp(self): self.assertEqual(t.value, expected) self.assertEqual(t.nanosecond, 10) - def test_nat_arithmetic(self): - # GH 6873 - i = 2 - f = 1.5 - - for (left, right) in [(pd.NaT, i), (pd.NaT, f), (pd.NaT, np.nan)]: - self.assertIs(left / right, pd.NaT) - self.assertIs(left * right, pd.NaT) - self.assertIs(right * left, pd.NaT) - with tm.assertRaises(TypeError): - right / left - - # Timestamp / datetime - t = Timestamp('2014-01-01') - dt = datetime(2014, 1, 1) - for (left, right) in [(pd.NaT, pd.NaT), (pd.NaT, t), (pd.NaT, dt)]: - # NaT __add__ or __sub__ Timestamp-like (or inverse) returns NaT - self.assertIs(right + left, pd.NaT) - self.assertIs(left + right, pd.NaT) - self.assertIs(left - right, pd.NaT) - self.assertIs(right - left, pd.NaT) - - # timedelta-like - # offsets are tested in test_offsets.py - - delta = timedelta(3600) - td = Timedelta('5s') - - for (left, right) in [(pd.NaT, delta), (pd.NaT, td)]: - # NaT + timedelta-like returns NaT - self.assertIs(right + left, pd.NaT) - self.assertIs(left + right, pd.NaT) - self.assertIs(right - left, pd.NaT) - self.assertIs(left - right, pd.NaT) - - # GH 11718 - tm._skip_if_no_pytz() - import pytz - - t_utc = Timestamp('2014-01-01', tz='UTC') - t_tz = Timestamp('2014-01-01', tz='US/Eastern') - dt_tz = pytz.timezone('Asia/Tokyo').localize(dt) - - for (left, right) in [(pd.NaT, t_utc), (pd.NaT, t_tz), - (pd.NaT, dt_tz)]: - # NaT __add__ or __sub__ Timestamp-like (or inverse) returns NaT - self.assertIs(right + left, pd.NaT) - self.assertIs(left + right, pd.NaT) - self.assertIs(left - right, pd.NaT) - self.assertIs(right - left, pd.NaT) - - # int addition / subtraction - for (left, right) in [(pd.NaT, 2), (pd.NaT, 0), (pd.NaT, -3)]: - self.assertIs(right + left, pd.NaT) - self.assertIs(left + right, pd.NaT) - self.assertIs(left - right, pd.NaT) - self.assertIs(right - left, pd.NaT) - - def test_nat_arithmetic_index(self): - # GH 11718 - - # datetime - tm._skip_if_no_pytz() - - dti = pd.DatetimeIndex(['2011-01-01', '2011-01-02'], name='x') - exp = pd.DatetimeIndex([pd.NaT, pd.NaT], name='x') - self.assert_index_equal(dti + pd.NaT, exp) - self.assert_index_equal(pd.NaT + dti, exp) - - dti_tz = pd.DatetimeIndex(['2011-01-01', '2011-01-02'], - tz='US/Eastern', name='x') - exp = pd.DatetimeIndex([pd.NaT, pd.NaT], name='x', tz='US/Eastern') - self.assert_index_equal(dti_tz + pd.NaT, exp) - self.assert_index_equal(pd.NaT + dti_tz, exp) - - exp = pd.TimedeltaIndex([pd.NaT, pd.NaT], name='x') - for (left, right) in [(pd.NaT, dti), (pd.NaT, dti_tz)]: - self.assert_index_equal(left - right, exp) - self.assert_index_equal(right - left, exp) - - # timedelta - tdi = pd.TimedeltaIndex(['1 day', '2 day'], name='x') - exp = pd.DatetimeIndex([pd.NaT, pd.NaT], name='x') - for (left, right) in [(pd.NaT, tdi)]: - self.assert_index_equal(left + right, exp) - self.assert_index_equal(right + left, exp) - self.assert_index_equal(left - right, exp) - self.assert_index_equal(right - left, exp) - class TestTimestampOps(tm.TestCase): @@ -1722,22 +1525,3 @@ def test_to_datetime_bijective(self): self.assertEqual( Timestamp(Timestamp.min.to_pydatetime()).value / 1000, Timestamp.min.value / 1000) - - -class TestTslib(tm.TestCase): - - def test_round(self): - stamp = Timestamp('2000-01-05 05:09:15.13') - - def _check_round(freq, expected): - result = stamp.round(freq=freq) - self.assertEqual(result, expected) - - for freq, expected in [('D', Timestamp('2000-01-05 00:00:00')), - ('H', Timestamp('2000-01-05 05:00:00')), - ('S', Timestamp('2000-01-05 05:09:15'))]: - _check_round(freq, expected) - - msg = pd.tseries.frequencies._INVALID_FREQ_ERROR - with self.assertRaisesRegexp(ValueError, msg): - stamp.round('foo') diff --git a/pandas/tests/series/test_datetime_values.py b/pandas/tests/series/test_datetime_values.py index 4c697c7e52bb8..89f972a33a630 100644 --- a/pandas/tests/series/test_datetime_values.py +++ b/pandas/tests/series/test_datetime_values.py @@ -8,10 +8,8 @@ from pandas.types.common import is_integer_dtype, is_list_like from pandas import (Index, Series, DataFrame, bdate_range, - date_range, period_range, timedelta_range) -from pandas.tseries.period import PeriodIndex -from pandas.tseries.index import Timestamp, DatetimeIndex -from pandas.tseries.tdi import TimedeltaIndex + date_range, period_range, timedelta_range, + PeriodIndex, Timestamp, DatetimeIndex, TimedeltaIndex) import pandas.core.common as com from pandas.util.testing import assert_series_equal @@ -27,21 +25,13 @@ def test_dt_namespace_accessor(self): # GH 7207, 11128 # test .dt namespace accessor - ok_for_base = ['year', 'month', 'day', 'hour', 'minute', 'second', - 'weekofyear', 'week', 'dayofweek', 'weekday', - 'dayofyear', 'quarter', 'freq', 'days_in_month', - 'daysinmonth', 'is_leap_year'] - ok_for_period = ok_for_base + ['qyear', 'start_time', 'end_time'] + ok_for_period = PeriodIndex._datetimelike_ops ok_for_period_methods = ['strftime', 'to_timestamp', 'asfreq'] - ok_for_dt = ok_for_base + ['date', 'time', 'microsecond', 'nanosecond', - 'is_month_start', 'is_month_end', - 'is_quarter_start', 'is_quarter_end', - 'is_year_start', 'is_year_end', 'tz', - 'weekday_name'] + ok_for_dt = DatetimeIndex._datetimelike_ops ok_for_dt_methods = ['to_period', 'to_pydatetime', 'tz_localize', 'tz_convert', 'normalize', 'strftime', 'round', 'floor', 'ceil', 'weekday_name'] - ok_for_td = ['days', 'seconds', 'microseconds', 'nanoseconds'] + ok_for_td = TimedeltaIndex._datetimelike_ops ok_for_td_methods = ['components', 'to_pytimedelta', 'total_seconds', 'round', 'floor', 'ceil'] diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py index 68db0d19344b9..032e3a186b84a 100644 --- a/pandas/tests/test_base.py +++ b/pandas/tests/test_base.py @@ -219,7 +219,7 @@ def check_ops_properties(self, props, filter=None, ignore_failures=False): self.assertEqual(result, expected) # freq raises AttributeError on an Int64Index because its not - # defined we mostly care about Series hwere anyhow + # defined we mostly care about Series here anyhow if not ignore_failures: for o in self.not_valid_objs: diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py index 8fd3c6324d48c..b4072d04dfd81 100644 --- a/pandas/tests/test_categorical.py +++ b/pandas/tests/test_categorical.py @@ -17,9 +17,11 @@ import pandas as pd import pandas.compat as compat import pandas.util.testing as tm -from pandas import (Categorical, Index, Series, DataFrame, PeriodIndex, - Timestamp, CategoricalIndex, DatetimeIndex, - isnull, NaT) +from pandas import (Categorical, Index, Series, DataFrame, + Timestamp, CategoricalIndex, isnull, + date_range, DatetimeIndex, + period_range, PeriodIndex, + timedelta_range, TimedeltaIndex, NaT) from pandas.compat import range, lrange, u, PY3 from pandas.core.config import option_context @@ -4299,9 +4301,6 @@ def test_str_accessor_api_for_categorical(self): def test_dt_accessor_api_for_categorical(self): # https://github.com/pandas-dev/pandas/issues/10661 from pandas.tseries.common import Properties - from pandas.tseries.index import date_range, DatetimeIndex - from pandas.tseries.period import period_range, PeriodIndex - from pandas.tseries.tdi import timedelta_range, TimedeltaIndex s_dr = Series(date_range('1/1/2015', periods=5, tz="MET")) c_dr = s_dr.astype("category") @@ -4312,10 +4311,14 @@ def test_dt_accessor_api_for_categorical(self): s_tdr = Series(timedelta_range('1 days', '10 days')) c_tdr = s_tdr.astype("category") + # only testing field (like .day) + # and bool (is_month_start) + get_ops = lambda x: x._datetimelike_ops + test_data = [ - ("Datetime", DatetimeIndex._datetimelike_ops, s_dr, c_dr), - ("Period", PeriodIndex._datetimelike_ops, s_pr, c_pr), - ("Timedelta", TimedeltaIndex._datetimelike_ops, s_tdr, c_tdr)] + ("Datetime", get_ops(DatetimeIndex), s_dr, c_dr), + ("Period", get_ops(PeriodIndex), s_pr, c_pr), + ("Timedelta", get_ops(TimedeltaIndex), s_tdr, c_tdr)] self.assertIsInstance(c_dr.dt, Properties) @@ -4325,12 +4328,13 @@ def test_dt_accessor_api_for_categorical(self): ('round', ("D",), {}), ('floor', ("D",), {}), ('ceil', ("D",), {}), + ('asfreq', ("D",), {}), # ('tz_localize', ("UTC",), {}), ] _special_func_names = [f[0] for f in special_func_defs] # the series is already localized - _ignore_names = ['tz_localize'] + _ignore_names = ['tz_localize', 'components'] for name, attr_names, s, c in test_data: func_names = [f @@ -4352,7 +4356,7 @@ def test_dt_accessor_api_for_categorical(self): elif isinstance(res, pd.Series): tm.assert_series_equal(res, exp) else: - tm.assert_numpy_array_equal(res, exp) + tm.assert_almost_equal(res, exp) for attr in attr_names: try: @@ -4367,7 +4371,7 @@ def test_dt_accessor_api_for_categorical(self): elif isinstance(res, pd.Series): tm.assert_series_equal(res, exp) else: - tm.assert_numpy_array_equal(res, exp) + tm.assert_almost_equal(res, exp) invalid = Series([1, 2, 3]).astype('category') with tm.assertRaisesRegexp( diff --git a/pandas/tseries/common.py b/pandas/tseries/common.py index f9fd27176487c..7940efc7e1b59 100644 --- a/pandas/tseries/common.py +++ b/pandas/tseries/common.py @@ -168,8 +168,7 @@ def to_pydatetime(self): typ='property') DatetimeProperties._add_delegate_accessors( delegate=DatetimeIndex, - accessors=["to_period", "tz_localize", "tz_convert", - "normalize", "strftime", "round", "floor", "ceil"], + accessors=DatetimeIndex._datetimelike_methods, typ='method') @@ -208,7 +207,7 @@ def components(self): typ='property') TimedeltaProperties._add_delegate_accessors( delegate=TimedeltaIndex, - accessors=["to_pytimedelta", "total_seconds", "round", "floor", "ceil"], + accessors=TimedeltaIndex._datetimelike_methods, typ='method') @@ -230,9 +229,10 @@ class PeriodProperties(Properties): delegate=PeriodIndex, accessors=PeriodIndex._datetimelike_ops, typ='property') -PeriodProperties._add_delegate_accessors(delegate=PeriodIndex, - accessors=["strftime"], - typ='method') +PeriodProperties._add_delegate_accessors( + delegate=PeriodIndex, + accessors=PeriodIndex._datetimelike_methods, + typ='method') class CombinedDatetimelikeProperties(DatetimeProperties, TimedeltaProperties): diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py index 11d2d29597fc0..1992e177556cc 100644 --- a/pandas/tseries/index.py +++ b/pandas/tseries/index.py @@ -64,25 +64,26 @@ def f(self): if self.tz is not utc: values = self._local_timestamps() - # boolean accessors -> return array - if field in ['is_month_start', 'is_month_end', - 'is_quarter_start', 'is_quarter_end', - 'is_year_start', 'is_year_end']: - month_kw = (self.freq.kwds.get('startingMonth', - self.freq.kwds.get('month', 12)) - if self.freq else 12) - - result = libts.get_start_end_field(values, field, self.freqstr, - month_kw) - return self._maybe_mask_results(result, convert='float64') - elif field in ['is_leap_year']: - # no need to mask NaT - return libts.get_date_field(values, field) - - # non-boolean accessors -> return Index - elif field in ['weekday_name']: + if field in self._bool_ops: + if field in ['is_month_start', 'is_month_end', + 'is_quarter_start', 'is_quarter_end', + 'is_year_start', 'is_year_end']: + month_kw = (self.freq.kwds.get('startingMonth', + self.freq.kwds.get('month', 12)) + if self.freq else 12) + + result = libts.get_start_end_field(values, field, self.freqstr, + month_kw) + else: + result = libts.get_date_field(values, field) + + # these return a boolean by-definition + return result + + if field in self._object_ops: result = libts.get_date_name_field(values, field) result = self._maybe_mask_results(result) + else: result = libts.get_date_field(values, field) result = self._maybe_mask_results(result, convert='float64') @@ -232,14 +233,24 @@ def _join_i8_wrapper(joinf, **kwargs): offset = None _comparables = ['name', 'freqstr', 'tz'] _attributes = ['name', 'freq', 'tz'] - _datetimelike_ops = ['year', 'month', 'day', 'hour', 'minute', 'second', - 'weekofyear', 'week', 'dayofweek', 'weekday', - 'dayofyear', 'quarter', 'days_in_month', - 'daysinmonth', 'date', 'time', 'microsecond', - 'nanosecond', 'is_month_start', 'is_month_end', - 'is_quarter_start', 'is_quarter_end', 'is_year_start', - 'is_year_end', 'tz', 'freq', 'weekday_name', - 'is_leap_year'] + + # define my properties & methods for delegation + _bool_ops = ['is_month_start', 'is_month_end', + 'is_quarter_start', 'is_quarter_end', 'is_year_start', + 'is_year_end', 'is_leap_year'] + _object_ops = ['weekday_name', 'freq', 'tz'] + _field_ops = ['year', 'month', 'day', 'hour', 'minute', 'second', + 'weekofyear', 'week', 'weekday', 'dayofweek', + 'dayofyear', 'quarter', 'days_in_month', + 'daysinmonth', 'microsecond', + 'nanosecond'] + _other_ops = ['date', 'time'] + _datetimelike_ops = _field_ops + _object_ops + _bool_ops + _other_ops + _datetimelike_methods = ['to_period', 'tz_localize', + 'tz_convert', + 'normalize', 'strftime', 'round', 'floor', + 'ceil'] + _is_numeric_dtype = False _infer_as_myclass = True diff --git a/pandas/tseries/period.py b/pandas/tseries/period.py index c279d5a9342e8..1e1496bbe9c27 100644 --- a/pandas/tseries/period.py +++ b/pandas/tseries/period.py @@ -174,12 +174,18 @@ class PeriodIndex(DatelikeOps, DatetimeIndexOpsMixin, Int64Index): _box_scalars = True _typ = 'periodindex' _attributes = ['name', 'freq'] - _datetimelike_ops = ['year', 'month', 'day', 'hour', 'minute', 'second', - 'weekofyear', 'week', 'dayofweek', 'weekday', - 'dayofyear', 'quarter', 'qyear', 'freq', - 'days_in_month', 'daysinmonth', - 'to_timestamp', 'asfreq', 'start_time', 'end_time', - 'is_leap_year'] + + # define my properties & methods for delegation + _other_ops = [] + _bool_ops = ['is_leap_year'] + _object_ops = ['start_time', 'end_time', 'freq'] + _field_ops = ['year', 'month', 'day', 'hour', 'minute', 'second', + 'weekofyear', 'weekday', 'week', 'dayofweek', + 'dayofyear', 'quarter', 'qyear', + 'days_in_month', 'daysinmonth'] + _datetimelike_ops = _field_ops + _object_ops + _bool_ops + _datetimelike_methods = ['strftime', 'to_timestamp', 'asfreq'] + _is_numeric_dtype = False _infer_as_myclass = True diff --git a/pandas/tseries/tdi.py b/pandas/tseries/tdi.py index 55333890640c1..5d062dd38f9fc 100644 --- a/pandas/tseries/tdi.py +++ b/pandas/tseries/tdi.py @@ -127,8 +127,15 @@ def _join_i8_wrapper(joinf, **kwargs): _left_indexer_unique = _join_i8_wrapper( libjoin.left_join_indexer_unique_int64, with_indexers=False) _arrmap = None - _datetimelike_ops = ['days', 'seconds', 'microseconds', 'nanoseconds', - 'freq', 'components'] + + # define my properties & methods for delegation + _other_ops = [] + _bool_ops = [] + _object_ops = ['freq'] + _field_ops = ['days', 'seconds', 'microseconds', 'nanoseconds'] + _datetimelike_ops = _field_ops + _object_ops + _bool_ops + _datetimelike_methods = ["to_pytimedelta", "total_seconds", + "round", "floor", "ceil"] __eq__ = _td_index_cmp('__eq__') __ne__ = _td_index_cmp('__ne__', nat_result=True) From 7e43c78a4e310955f3f214f58d1b77dc03c2ec0d Mon Sep 17 00:00:00 2001 From: "Graham R. Jeffries" Date: Mon, 27 Mar 2017 13:36:19 -0400 Subject: [PATCH 288/933] Remove NotImplementedError for parse_dates keyword in read_excel Rebase and update of PR https://github.com/pydata/pandas/pull/12051 Author: Joris Van den Bossche Author: Graham R. Jeffries This patch had conflicts when merged, resolved by Committer: Jeff Reback Closes #14326 from jorisvandenbossche/pr/12051 and squashes the following commits: 0b65a7a [Joris Van den Bossche] update wording 656ec44 [Joris Van den Bossche] Fix detection to raise warning b1c7f87 [Joris Van den Bossche] add whatsnew 925ce1b [Joris Van den Bossche] Update tests 0e10a9d [Graham R. Jeffries] remove read_excel kwd NotImplemented error, update documentation #11544 --- doc/source/io.rst | 14 +++++++++++++ doc/source/whatsnew/v0.19.0.txt | 4 ++++ pandas/io/excel.py | 9 +++------ pandas/tests/io/test_excel.py | 36 ++++++++++++++++++++------------- 4 files changed, 43 insertions(+), 20 deletions(-) diff --git a/doc/source/io.rst b/doc/source/io.rst index faeea9d448cf2..e72224c6fa1fe 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -2767,6 +2767,20 @@ indices to be parsed. read_excel('path_to_file.xls', 'Sheet1', parse_cols=[0, 2, 3]) + +Parsing Dates ++++++++++++++ + +Datetime-like values are normally automatically converted to the appropriate +dtype when reading the excel file. But if you have a column of strings that +*look* like dates (but are not actually formatted as dates in excel), you can +use the `parse_dates` keyword to parse those strings to datetimes: + +.. code-block:: python + + read_excel('path_to_file.xls', 'Sheet1', parse_dates=['date_strings']) + + Cell Converters +++++++++++++++ diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt index 9b003034aa94a..11df0afb144ea 100644 --- a/doc/source/whatsnew/v0.19.0.txt +++ b/doc/source/whatsnew/v0.19.0.txt @@ -517,6 +517,7 @@ Other enhancements - The ``pd.read_json`` and ``DataFrame.to_json`` has gained support for reading and writing json lines with ``lines`` option see :ref:`Line delimited json ` (:issue:`9180`) - :func:`read_excel` now supports the true_values and false_values keyword arguments (:issue:`13347`) - ``groupby()`` will now accept a scalar and a single-element list for specifying ``level`` on a non-``MultiIndex`` grouper. (:issue:`13907`) +<<<<<<< HEAD - Non-convertible dates in an excel date column will be returned without conversion and the column will be ``object`` dtype, rather than raising an exception (:issue:`10001`). - ``pd.Timedelta(None)`` is now accepted and will return ``NaT``, mirroring ``pd.Timestamp`` (:issue:`13687`) - ``pd.read_stata()`` can now handle some format 111 files, which are produced by SAS when generating Stata dta files (:issue:`11526`) @@ -524,6 +525,9 @@ Other enhancements series or indices. This behaves like a standard binary operator with regards to broadcasting rules (:issue:`14208`). +======= +- Re-enable the ``parse_dates`` keyword of ``read_excel`` to parse string columns as dates (:issue:`14326`) +>>>>>>> PR_TOOL_MERGE_PR_14326 .. _whatsnew_0190.api: diff --git a/pandas/io/excel.py b/pandas/io/excel.py index 82ea2e8a46592..e7a8b71a5f6c9 100644 --- a/pandas/io/excel.py +++ b/pandas/io/excel.py @@ -343,13 +343,10 @@ def _parse_excel(self, sheetname=0, header=0, skiprows=None, names=None, if 'chunksize' in kwds: raise NotImplementedError("chunksize keyword of read_excel " "is not implemented") - if parse_dates: - raise NotImplementedError("parse_dates keyword of read_excel " - "is not implemented") - if date_parser is not None: - raise NotImplementedError("date_parser keyword of read_excel " - "is not implemented") + if parse_dates is True and not index_col: + warn("The 'parse_dates=True' keyword of read_excel was provided" + " without an 'index_col' keyword value.") import xlrd from xlrd import (xldate, XL_CELL_DATE, diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py index b66cb24bf44d8..df77708232dd2 100644 --- a/pandas/tests/io/test_excel.py +++ b/pandas/tests/io/test_excel.py @@ -924,17 +924,27 @@ def test_read_excel_chunksize(self): chunksize=100) def test_read_excel_parse_dates(self): - # GH 11544 - with tm.assertRaises(NotImplementedError): - pd.read_excel(os.path.join(self.dirpath, 'test1' + self.ext), - parse_dates=True) + # GH 11544, 12051 - def test_read_excel_date_parser(self): - # GH 11544 - with tm.assertRaises(NotImplementedError): - dateparse = lambda x: pd.datetime.strptime(x, '%Y-%m-%d %H:%M:%S') - pd.read_excel(os.path.join(self.dirpath, 'test1' + self.ext), - date_parser=dateparse) + df = DataFrame( + {'col': [1, 2, 3], + 'date_strings': pd.date_range('2012-01-01', periods=3)}) + df2 = df.copy() + df2['date_strings'] = df2['date_strings'].dt.strftime('%m/%d/%Y') + + with ensure_clean(self.ext) as pth: + df2.to_excel(pth) + + res = read_excel(pth) + tm.assert_frame_equal(df2, res) + + res = read_excel(pth, parse_dates=['date_strings']) + tm.assert_frame_equal(df, res) + + dateparser = lambda x: pd.datetime.strptime(x, '%m/%d/%Y') + res = read_excel(pth, parse_dates=['date_strings'], + date_parser=dateparser) + tm.assert_frame_equal(df, res) def test_read_excel_skiprows_list(self): # GH 4903 @@ -1382,8 +1392,7 @@ def test_to_excel_multiindex(self): # round trip frame.to_excel(path, 'test1', merge_cells=self.merge_cells) reader = ExcelFile(path) - df = read_excel(reader, 'test1', index_col=[0, 1], - parse_dates=False) + df = read_excel(reader, 'test1', index_col=[0, 1]) tm.assert_frame_equal(frame, df) # GH13511 @@ -1424,8 +1433,7 @@ def test_to_excel_multiindex_cols(self): frame.to_excel(path, 'test1', merge_cells=self.merge_cells) reader = ExcelFile(path) df = read_excel(reader, 'test1', header=header, - index_col=[0, 1], - parse_dates=False) + index_col=[0, 1]) if not self.merge_cells: fm = frame.columns.format(sparsify=False, adjoin=False, names=False) From 1dab800b412be3613e8f666eb1be88458b631312 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Mon, 27 Mar 2017 13:37:32 -0400 Subject: [PATCH 289/933] BUG: index_names can be None when processing date conversions closes #15820 closes #11544 --- doc/source/whatsnew/v0.19.0.txt | 4 ---- doc/source/whatsnew/v0.20.0.txt | 3 ++- pandas/io/excel.py | 2 +- pandas/io/parsers.py | 13 ++++++++++--- pandas/tests/io/test_excel.py | 9 +++++++-- 5 files changed, 20 insertions(+), 11 deletions(-) diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt index 11df0afb144ea..9b003034aa94a 100644 --- a/doc/source/whatsnew/v0.19.0.txt +++ b/doc/source/whatsnew/v0.19.0.txt @@ -517,7 +517,6 @@ Other enhancements - The ``pd.read_json`` and ``DataFrame.to_json`` has gained support for reading and writing json lines with ``lines`` option see :ref:`Line delimited json ` (:issue:`9180`) - :func:`read_excel` now supports the true_values and false_values keyword arguments (:issue:`13347`) - ``groupby()`` will now accept a scalar and a single-element list for specifying ``level`` on a non-``MultiIndex`` grouper. (:issue:`13907`) -<<<<<<< HEAD - Non-convertible dates in an excel date column will be returned without conversion and the column will be ``object`` dtype, rather than raising an exception (:issue:`10001`). - ``pd.Timedelta(None)`` is now accepted and will return ``NaT``, mirroring ``pd.Timestamp`` (:issue:`13687`) - ``pd.read_stata()`` can now handle some format 111 files, which are produced by SAS when generating Stata dta files (:issue:`11526`) @@ -525,9 +524,6 @@ Other enhancements series or indices. This behaves like a standard binary operator with regards to broadcasting rules (:issue:`14208`). -======= -- Re-enable the ``parse_dates`` keyword of ``read_excel`` to parse string columns as dates (:issue:`14326`) ->>>>>>> PR_TOOL_MERGE_PR_14326 .. _whatsnew_0190.api: diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 3ab69e1ff409b..fdf34e0d11572 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -270,7 +270,7 @@ To convert a ``SparseDataFrame`` back to sparse SciPy matrix in COO format, you .. _whatsnew_0200.enhancements.other: -Other enhancements +Other Enhancements ^^^^^^^^^^^^^^^^^^ - Integration with the ``feather-format``, including a new top-level ``pd.read_feather()`` and ``DataFrame.to_feather()`` method, see :ref:`here `. @@ -314,6 +314,7 @@ Other enhancements - ``pd.types.concat.union_categoricals`` gained the ``ignore_ordered`` argument to allow ignoring the ordered attribute of unioned categoricals (:issue:`13410`). See the :ref:`categorical union docs ` for more information. - ``pandas.io.json.json_normalize()`` with an empty ``list`` will return an empty ``DataFrame`` (:issue:`15534`) - ``pd.DataFrame.to_latex`` and ``pd.DataFrame.to_string`` now allow optional header aliases. (:issue:`15536`) +- Re-enable the ``parse_dates`` keyword of ``read_excel`` to parse string columns as dates (:issue:`14326`) .. _ISO 8601 duration: https://en.wikipedia.org/wiki/ISO_8601#Durations diff --git a/pandas/io/excel.py b/pandas/io/excel.py index e7a8b71a5f6c9..d324855bc2f4d 100644 --- a/pandas/io/excel.py +++ b/pandas/io/excel.py @@ -344,7 +344,7 @@ def _parse_excel(self, sheetname=0, header=0, skiprows=None, names=None, raise NotImplementedError("chunksize keyword of read_excel " "is not implemented") - if parse_dates is True and not index_col: + if parse_dates is True and index_col is None: warn("The 'parse_dates=True' keyword of read_excel was provided" " without an 'index_col' keyword value.") diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 45c62b224ef4e..30b88de91ef76 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -1176,13 +1176,18 @@ def _should_parse_dates(self, i): if isinstance(self.parse_dates, bool): return self.parse_dates else: - name = self.index_names[i] + if self.index_names is not None: + name = self.index_names[i] + else: + name = None j = self.index_col[i] if is_scalar(self.parse_dates): - return (j == self.parse_dates) or (name == self.parse_dates) + return ((j == self.parse_dates) or + (name is not None and name == self.parse_dates)) else: - return (j in self.parse_dates) or (name in self.parse_dates) + return ((j in self.parse_dates) or + (name is not None and name in self.parse_dates)) def _extract_multi_indexer_columns(self, header, index_names, col_names, passed_names=False): @@ -1352,6 +1357,7 @@ def _get_name(icol): def _agg_index(self, index, try_parse_dates=True): arrays = [] + for i, arr in enumerate(index): if (try_parse_dates and self._should_parse_dates(i)): @@ -1512,6 +1518,7 @@ def _cast_types(self, values, cast_type, column): def _do_date_conversions(self, names, data): # returns data, columns + if self.parse_dates is not None: data, names = _process_date_conversion( data, self._date_conv, self.parse_dates, self.index_col, diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py index df77708232dd2..256a37e922177 100644 --- a/pandas/tests/io/test_excel.py +++ b/pandas/tests/io/test_excel.py @@ -938,12 +938,17 @@ def test_read_excel_parse_dates(self): res = read_excel(pth) tm.assert_frame_equal(df2, res) - res = read_excel(pth, parse_dates=['date_strings']) + # no index_col specified when parse_dates is True + with tm.assert_produces_warning(): + res = read_excel(pth, parse_dates=True) + tm.assert_frame_equal(df2, res) + + res = read_excel(pth, parse_dates=['date_strings'], index_col=0) tm.assert_frame_equal(df, res) dateparser = lambda x: pd.datetime.strptime(x, '%m/%d/%Y') res = read_excel(pth, parse_dates=['date_strings'], - date_parser=dateparser) + date_parser=dateparser, index_col=0) tm.assert_frame_equal(df, res) def test_read_excel_skiprows_list(self): From a9406057b5f48d579d9a9136a183a594c4b1f758 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 28 Mar 2017 12:48:41 -0400 Subject: [PATCH 290/933] BUG: bug in .at/.loc indexing with a tz-aware columns closes #15822 Author: Jeff Reback Closes #15827 from jreback/at and squashes the following commits: 4fcd2c6 [Jeff Reback] BUG: bug in .at/.loc indexing with a tz-aware columns --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/core/frame.py | 11 ++++++++++- pandas/tests/indexing/test_scalar.py | 15 +++++++++++++++ 3 files changed, 26 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index fdf34e0d11572..51c3d5578ae6c 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -881,6 +881,7 @@ Bug Fixes - Compat for 32-bit platforms for ``.qcut/cut``; bins will now be ``int64`` dtype (:issue:`14866`) +- Bug in ``.at`` when selecting from a tz-aware column (:issue:`15822`) - Bug in the display of ``.info()`` where a qualifier (+) would always be displayed with a ``MultiIndex`` that contains only non-strings (:issue:`15245`) - Bug in ``.replace()`` may result in incorrect dtypes. (:issue:`12747`, :issue:`15765`) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 90c49a9c85133..90baa1aff4857 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1918,7 +1918,16 @@ def get_value(self, index, col, takeable=False): series = self._get_item_cache(col) engine = self.index._engine - return engine.get_value(series.get_values(), index) + + try: + return engine.get_value(series._values, index) + except TypeError: + + # we cannot handle direct indexing + # use positional + col = self.columns.get_loc(col) + index = self.index.get_loc(index) + return self.get_value(index, col, takeable=True) def set_value(self, index, col, value, takeable=False): """ diff --git a/pandas/tests/indexing/test_scalar.py b/pandas/tests/indexing/test_scalar.py index 4e81cd01cd5d2..0eeaec3e00fa6 100644 --- a/pandas/tests/indexing/test_scalar.py +++ b/pandas/tests/indexing/test_scalar.py @@ -154,3 +154,18 @@ def test_at_to_fail(self): # Check that we get the correct value in the KeyError self.assertRaisesRegexp(KeyError, r"\['y'\] not in index", lambda: df[['x', 'y', 'z']]) + + def test_at_with_tz(self): + # gh-15822 + df = DataFrame({'name': ['John', 'Anderson'], + 'date': [Timestamp(2017, 3, 13, 13, 32, 56), + Timestamp(2017, 2, 16, 12, 10, 3)]}) + df['date'] = df['date'].dt.tz_localize('Asia/Shanghai') + + expected = Timestamp('2017-03-13 13:32:56+0800', tz='Asia/Shanghai') + + result = df.loc[0, 'date'] + assert result == expected + + result = df.at[0, 'date'] + assert result == expected From 66fb0a3e0c25cf10988ce0d14a7efee437aa94d6 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 28 Mar 2017 13:11:17 -0400 Subject: [PATCH 291/933] TST: consistency of indexing with a tz-aware scalar xref #12938 --- pandas/tests/indexing/test_datetime.py | 31 ++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/pandas/tests/indexing/test_datetime.py b/pandas/tests/indexing/test_datetime.py index 1c4e5772d316f..eeef41ad6dbb2 100644 --- a/pandas/tests/indexing/test_datetime.py +++ b/pandas/tests/indexing/test_datetime.py @@ -63,6 +63,37 @@ def f(): df.loc[df.new_col == 'new', 'time'] = v tm.assert_series_equal(df.loc[df.new_col == 'new', 'time'], v) + def test_consistency_with_tz_aware_scalar(self): + # xef gh-12938 + # various ways of indexing the same tz-aware scalar + df = Series([Timestamp('2016-03-30 14:35:25', + tz='Europe/Brussels')]).to_frame() + + df = pd.concat([df, df]).reset_index(drop=True) + expected = Timestamp('2016-03-30 14:35:25+0200', + tz='Europe/Brussels') + + result = df[0][0] + assert result == expected + + result = df.iloc[0, 0] + assert result == expected + + result = df.loc[0, 0] + assert result == expected + + result = df.iat[0, 0] + assert result == expected + + result = df.at[0, 0] + assert result == expected + + result = df[0].loc[0] + assert result == expected + + result = df[0].at[0] + assert result == expected + def test_indexing_with_datetimeindex_tz(self): # GH 12050 From d96ff291cc7446ad36ae7d8db05b0cc588ccd7ec Mon Sep 17 00:00:00 2001 From: stijnvanhoey Date: Wed, 8 Mar 2017 20:25:51 +0100 Subject: [PATCH 292/933] DOC: Make example running example closes #15624 --- pandas/core/groupby.py | 58 +++++++++++++++++++++++++++++++++++------- 1 file changed, 49 insertions(+), 9 deletions(-) diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index dded55114ab6f..2cc68bcabdd22 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -109,15 +109,33 @@ Examples -------- ->>> df = pd.DataFrame(np.repeat(np.arange(10), 3).reshape(-1, 3), - columns=list('ABC')) ->>> grouped = df.groupby(df.index // 3) # Same shape ->>> grouped.%(selected)stransform(lambda x: (x - x.mean()) / x.std()) +>>> df = pd.DataFrame({'A' : ['foo', 'bar', 'foo', 'bar', +... 'foo', 'bar'], +... 'B' : ['one', 'one', 'two', 'three', +... 'two', 'two'], +... 'C' : [1, 5, 5, 2, 5, 5], +... 'D' : [2.0, 5., 8., 1., 2., 9.]}) +>>> grouped = df.groupby('A') +>>> grouped.transform(lambda x: (x - x.mean()) / x.std()) + C D +0 -1.154701 -0.577350 +1 0.577350 0.000000 +2 0.577350 1.154701 +3 -1.154701 -1.000000 +4 0.577350 -0.577350 +5 0.577350 1.000000 # Broadcastable ->>> grouped.%(selected)stransform(lambda x: x.max() - x.min()) +>>> grouped.transform(lambda x: x.max() - x.min()) + C D +0 4 6.0 +1 3 8.0 +2 4 6.0 +3 3 8.0 +4 4 6.0 +5 3 8.0 """ @@ -2982,7 +3000,17 @@ def filter(self, func, dropna=True, *args, **kwargs): # noqa Examples -------- - >>> grouped.filter(lambda x: x.mean() > 0) + >>> import pandas as pd + >>> df = pd.DataFrame({'A' : ['foo', 'bar', 'foo', 'bar', + ... 'foo', 'bar'], + ... 'B' : [1, 2, 3, 4, 5, 6], + ... 'C' : [2.0, 5., 8., 1., 2., 9.]}) + >>> grouped = df.groupby('A') + >>> df.groupby('A').B.filter(lambda x: x.mean() > 3.) + 1 2 + 3 4 + 5 6 + Name: B, dtype: int64 Returns ------- @@ -3784,9 +3812,21 @@ def filter(self, func, dropna=True, *args, **kwargs): # noqa Examples -------- - >>> df = pd.DataFrame(np.random.randn(10, 3), columns=list('ABC')) - >>> grouped = df.groupby(df.index % 3) - >>> grouped.filter(lambda x: x['A'].sum() + x['B'].sum() > 0) + >>> import pandas as pd + >>> df = pd.DataFrame({'A' : ['foo', 'bar', 'foo', 'bar', + ... 'foo', 'bar'], + ... 'B' : [1, 2, 3, 4, 5, 6], + ... 'C' : [2.0, 5., 8., 1., 2., 9.]}) + >>> grouped = df.groupby('A') + >>> grouped.filter(lambda x: x['B'].mean() > 3.) + A B C + 1 bar 2 5.0 + 3 bar 4 1.0 + 5 bar 6 9.0 + + Returns + ------- + filtered : DataFrame """ indices = [] From 6f789e15cdd91cc02af2005405026355e6fae69e Mon Sep 17 00:00:00 2001 From: Carlos Souza Date: Tue, 28 Mar 2017 14:23:44 -0400 Subject: [PATCH 293/933] BUG: replace of numeric by string / dtype coversion (GH15743) closes #15743 Author: Carlos Souza Author: Jeff Reback Closes #15812 from ucals/bug-fix-15743 and squashes the following commits: e6e4971 [Carlos Souza] Adding replace unicode with number and replace mixed types with string tests bd31b2b [Carlos Souza] Resolving merge conflict by incorporating @jreback suggestions 73805ce [Jeff Reback] CLN: add infer_dtype_from_array 45e67e4 [Carlos Souza] Fixing PEP8 line indent 0a98557 [Carlos Souza] BUG: replace of numeric by string fixed 97e1f18 [Carlos Souza] Test e62763c [Carlos Souza] Fixing PEP8 line indent 080c71e [Carlos Souza] BUG: replace of numeric by string fixed 8b463cb [Carlos Souza] Merge remote-tracking branch 'upstream/master' 9fc617b [Carlos Souza] Merge remote-tracking branch 'upstream/master' e12bca7 [Carlos Souza] Sync fork 676a4e5 [Carlos Souza] Test --- RELEASE.md | 2 +- doc/source/whatsnew/v0.20.0.txt | 3 +- pandas/core/missing.py | 26 ++++++++------- pandas/tests/frame/test_replace.py | 25 +++++++++------ pandas/tests/series/test_replace.py | 22 ++++++++++++- pandas/tests/types/test_cast.py | 50 ++++++++++++++++++++--------- pandas/types/cast.py | 44 +++++++++++++++++++++++++ 7 files changed, 132 insertions(+), 40 deletions(-) diff --git a/RELEASE.md b/RELEASE.md index a181412be2719..efd075dabcba9 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -1,6 +1,6 @@ Release Notes ============= -The list of changes to pandas between each release can be found +The list of changes to Pandas between each release can be found [here](http://pandas.pydata.org/pandas-docs/stable/whatsnew.html). For full details, see the commit logs at http://github.com/pandas-dev/pandas. diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 51c3d5578ae6c..1aebfc140284d 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -884,6 +884,8 @@ Bug Fixes - Bug in ``.at`` when selecting from a tz-aware column (:issue:`15822`) - Bug in the display of ``.info()`` where a qualifier (+) would always be displayed with a ``MultiIndex`` that contains only non-strings (:issue:`15245`) - Bug in ``.replace()`` may result in incorrect dtypes. (:issue:`12747`, :issue:`15765`) +- Bug in ``Series.replace`` and ``DataFrame.replace`` which failed on empty replacement dicts (:issue:`15289`) +- Bug in ``Series.replace`` which replaced a numeric by string (:issue:`15743`) - Bug in ``.asfreq()``, where frequency was not set for empty ``Series`` (:issue:`14320`) @@ -986,7 +988,6 @@ Bug Fixes - Bug in ``DataFrame.hist`` where ``plt.tight_layout`` caused an ``AttributeError`` (use ``matplotlib >= 2.0.1``) (:issue:`9351`) - Bug in ``DataFrame.boxplot`` where ``fontsize`` was not applied to the tick labels on both axes (:issue:`15108`) -- Bug in ``Series.replace`` and ``DataFrame.replace`` which failed on empty replacement dicts (:issue:`15289`) - Bug in ``pd.melt()`` where passing a tuple value for ``value_vars`` caused a ``TypeError`` (:issue:`15348`) - Bug in ``.eval()`` which caused multiline evals to fail with local variables not on the first line (:issue:`15342`) - Bug in ``pd.read_msgpack`` which did not allow to load dataframe with an index of type ``CategoricalIndex`` (:issue:`15487`) diff --git a/pandas/core/missing.py b/pandas/core/missing.py index 3b9bfe1de48e7..91039f3270af2 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -9,10 +9,16 @@ from pandas.compat import range, string_types from pandas.types.common import (is_numeric_v_string_like, - is_float_dtype, is_datetime64_dtype, - is_datetime64tz_dtype, is_integer_dtype, - _ensure_float64, is_scalar, - needs_i8_conversion, is_integer) + is_float_dtype, + is_datetime64_dtype, + is_datetime64tz_dtype, + is_integer_dtype, + is_scalar, + is_integer, + needs_i8_conversion, + _ensure_float64) + +from pandas.types.cast import infer_dtype_from_array from pandas.types.missing import isnull @@ -21,11 +27,11 @@ def mask_missing(arr, values_to_mask): Return a masking array of same size/shape as arr with entries equaling any member of values_to_mask set to True """ - if not isinstance(values_to_mask, (list, np.ndarray)): - values_to_mask = [values_to_mask] + dtype, values_to_mask = infer_dtype_from_array(values_to_mask) try: - values_to_mask = np.array(values_to_mask, dtype=arr.dtype) + values_to_mask = np.array(values_to_mask, dtype=dtype) + except Exception: values_to_mask = np.array(values_to_mask, dtype=object) @@ -409,7 +415,7 @@ def interpolate_2d(values, method='pad', axis=0, limit=None, fill_value=None, if axis != 0: # pragma: no cover raise AssertionError("cannot interpolate on a ndim == 1 with " "axis != 0") - values = values.reshape(tuple((1, ) + values.shape)) + values = values.reshape(tuple((1,) + values.shape)) if fill_value is None: mask = None @@ -447,7 +453,6 @@ def wrapper(arr, mask, limit=None): def pad_1d(values, limit=None, mask=None, dtype=None): - if dtype is None: dtype = values.dtype _method = None @@ -472,7 +477,6 @@ def pad_1d(values, limit=None, mask=None, dtype=None): def backfill_1d(values, limit=None, mask=None, dtype=None): - if dtype is None: dtype = values.dtype _method = None @@ -498,7 +502,6 @@ def backfill_1d(values, limit=None, mask=None, dtype=None): def pad_2d(values, limit=None, mask=None, dtype=None): - if dtype is None: dtype = values.dtype _method = None @@ -528,7 +531,6 @@ def pad_2d(values, limit=None, mask=None, dtype=None): def backfill_2d(values, limit=None, mask=None, dtype=None): - if dtype is None: dtype = values.dtype _method = None diff --git a/pandas/tests/frame/test_replace.py b/pandas/tests/frame/test_replace.py index 8b50036cd50f8..fce59e10bf4bd 100644 --- a/pandas/tests/frame/test_replace.py +++ b/pandas/tests/frame/test_replace.py @@ -795,7 +795,7 @@ def test_replace_dtypes(self): expected = DataFrame({'datetime64': Index([now] * 3)}) assert_frame_equal(result, expected) - def test_replace_input_formats(self): + def test_replace_input_formats_listlike(self): # both dicts to_rep = {'A': np.nan, 'B': 0, 'C': ''} values = {'A': 0, 'B': -1, 'C': 'missing'} @@ -812,15 +812,6 @@ def test_replace_input_formats(self): 'C': ['', 'asdf', 'fd']}) assert_frame_equal(result, expected) - # dict to scalar - filled = df.replace(to_rep, 0) - expected = {} - for k, v in compat.iteritems(df): - expected[k] = v.replace(to_rep[k], 0) - assert_frame_equal(filled, DataFrame(expected)) - - self.assertRaises(TypeError, df.replace, to_rep, [np.nan, 0, '']) - # scalar to dict values = {'A': 0, 'B': -1, 'C': 'missing'} df = DataFrame({'A': [np.nan, 0, np.nan], 'B': [0, 2, 5], @@ -842,6 +833,20 @@ def test_replace_input_formats(self): self.assertRaises(ValueError, df.replace, to_rep, values[1:]) + def test_replace_input_formats_scalar(self): + df = DataFrame({'A': [np.nan, 0, np.inf], 'B': [0, 2, 5], + 'C': ['', 'asdf', 'fd']}) + + # dict to scalar + to_rep = {'A': np.nan, 'B': 0, 'C': ''} + filled = df.replace(to_rep, 0) + expected = {} + for k, v in compat.iteritems(df): + expected[k] = v.replace(to_rep[k], 0) + assert_frame_equal(filled, DataFrame(expected)) + + self.assertRaises(TypeError, df.replace, to_rep, [np.nan, 0, '']) + # list to scalar to_rep = [np.nan, 0, ''] result = df.replace(to_rep, -1) diff --git a/pandas/tests/series/test_replace.py b/pandas/tests/series/test_replace.py index 0a53581e24ba5..5190eb110f4cf 100644 --- a/pandas/tests/series/test_replace.py +++ b/pandas/tests/series/test_replace.py @@ -10,7 +10,6 @@ class TestSeriesReplace(TestData, tm.TestCase): - def test_replace(self): N = 100 ser = pd.Series(np.random.randn(N)) @@ -227,3 +226,24 @@ def test_replace_with_empty_dictlike(self): s = pd.Series(list('abcd')) tm.assert_series_equal(s, s.replace(dict())) tm.assert_series_equal(s, s.replace(pd.Series([]))) + + def test_replace_string_with_number(self): + # GH 15743 + s = pd.Series([1, 2, 3]) + result = s.replace('2', np.nan) + expected = pd.Series([1, 2, 3]) + tm.assert_series_equal(expected, result) + + def test_replace_unicode_with_number(self): + # GH 15743 + s = pd.Series([1, 2, 3]) + result = s.replace(u'2', np.nan) + expected = pd.Series([1, 2, 3]) + tm.assert_series_equal(expected, result) + + def test_replace_mixed_types_with_string(self): + # Testing mixed + s = pd.Series([1, 2, 3, '4', 4, 5]) + result = s.replace([2, '4'], np.nan) + expected = pd.Series([1, np.nan, 3, np.nan, 4, 5]) + tm.assert_series_equal(expected, result) diff --git a/pandas/tests/types/test_cast.py b/pandas/tests/types/test_cast.py index dd4ea3bb02be9..de6ef7af9d7f9 100644 --- a/pandas/tests/types/test_cast.py +++ b/pandas/tests/types/test_cast.py @@ -5,13 +5,15 @@ """ -from datetime import datetime +import pytest +from datetime import datetime, timedelta, date import numpy as np from pandas import Timedelta, Timestamp, DatetimeIndex from pandas.types.cast import (maybe_downcast_to_dtype, maybe_convert_objects, infer_dtype_from_scalar, + infer_dtype_from_array, maybe_convert_string_to_object, maybe_convert_scalar, find_common_type) @@ -82,7 +84,7 @@ def test_datetime_with_timezone(self): tm.assert_index_equal(res, exp) -class TestInferDtype(tm.TestCase): +class TestInferDtype(object): def test_infer_dtype_from_scalar(self): # Test that _infer_dtype_from_scalar is returning correct dtype for int @@ -92,44 +94,62 @@ def test_infer_dtype_from_scalar(self): np.int32, np.uint64, np.int64]: data = dtypec(12) dtype, val = infer_dtype_from_scalar(data) - self.assertEqual(dtype, type(data)) + assert dtype == type(data) data = 12 dtype, val = infer_dtype_from_scalar(data) - self.assertEqual(dtype, np.int64) + assert dtype == np.int64 for dtypec in [np.float16, np.float32, np.float64]: data = dtypec(12) dtype, val = infer_dtype_from_scalar(data) - self.assertEqual(dtype, dtypec) + assert dtype == dtypec data = np.float(12) dtype, val = infer_dtype_from_scalar(data) - self.assertEqual(dtype, np.float64) + assert dtype == np.float64 for data in [True, False]: dtype, val = infer_dtype_from_scalar(data) - self.assertEqual(dtype, np.bool_) + assert dtype == np.bool_ for data in [np.complex64(1), np.complex128(1)]: dtype, val = infer_dtype_from_scalar(data) - self.assertEqual(dtype, np.complex_) + assert dtype == np.complex_ - import datetime for data in [np.datetime64(1, 'ns'), Timestamp(1), - datetime.datetime(2000, 1, 1, 0, 0)]: + datetime(2000, 1, 1, 0, 0)]: dtype, val = infer_dtype_from_scalar(data) - self.assertEqual(dtype, 'M8[ns]') + assert dtype == 'M8[ns]' for data in [np.timedelta64(1, 'ns'), Timedelta(1), - datetime.timedelta(1)]: + timedelta(1)]: dtype, val = infer_dtype_from_scalar(data) - self.assertEqual(dtype, 'm8[ns]') + assert dtype == 'm8[ns]' - for data in [datetime.date(2000, 1, 1), + for data in [date(2000, 1, 1), Timestamp(1, tz='US/Eastern'), 'foo']: dtype, val = infer_dtype_from_scalar(data) - self.assertEqual(dtype, np.object_) + assert dtype == np.object_ + + @pytest.mark.parametrize( + "arr, expected", + [('foo', np.object_), + (b'foo', np.object_), + (1, np.int_), + (1.5, np.float_), + ([1], np.int_), + (np.array([1]), np.int_), + ([np.nan, 1, ''], np.object_), + (np.array([[1.0, 2.0]]), np.float_), + (Timestamp('20160101'), np.object_), + (np.datetime64('2016-01-01'), np.dtype('>> np.asarray([1, '1']) + array(['1', '1'], dtype='>> infer_dtype_from_array([1, '1']) + (numpy.object_, [1, '1']) + + """ + + if isinstance(arr, np.ndarray): + return arr.dtype, arr + + if not is_list_like(arr): + arr = [arr] + + # don't force numpy coerce with nan's + inferred = lib.infer_dtype(arr) + if inferred in ['string', 'bytes', 'unicode', + 'mixed', 'mixed-integer']: + return (np.object_, arr) + + arr = np.asarray(arr) + return arr.dtype, arr + + def maybe_upcast(values, fill_value=np.nan, dtype=None, copy=False): """ provide explict type promotion and coercion From ec84ae3d6f73633aee5058148ea76fdd79f74ac4 Mon Sep 17 00:00:00 2001 From: Scott Sanderson Date: Tue, 28 Mar 2017 15:03:27 -0400 Subject: [PATCH 294/933] ENH: Add empty property to Index. Previously, attempting to evaluate an Index in a boolean context prints an error message listing various alternatives, one of which is `.empty`, which was not actually implemented on `Index`. Author: Scott Sanderson This patch had conflicts when merged, resolved by Committer: Jeff Reback closes #13207 Closes #15270 from ssanderson/add-empty-to-index and squashes the following commits: bb0126f [Scott Sanderson] ENH: Add empty property to Index. --- doc/source/api.rst | 1 + doc/source/whatsnew/v0.20.0.txt | 2 ++ pandas/core/base.py | 4 ++++ pandas/tests/indexes/common.py | 6 ++++++ 4 files changed, 13 insertions(+) diff --git a/doc/source/api.rst b/doc/source/api.rst index f6bf480bebcfc..dfeaf8e60feb1 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -1277,6 +1277,7 @@ Attributes Index.nbytes Index.ndim Index.size + Index.empty Index.strides Index.itemsize Index.base diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 1aebfc140284d..15566d207e31f 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -310,11 +310,13 @@ Other Enhancements - ``Series/DataFrame.squeeze()`` have gained the ``axis`` parameter. (:issue:`15339`) - ``DataFrame.to_excel()`` has a new ``freeze_panes`` parameter to turn on Freeze Panes when exporting to Excel (:issue:`15160`) - HTML table output skips ``colspan`` or ``rowspan`` attribute if equal to 1. (:issue:`15403`) + - ``pd.TimedeltaIndex`` now has a custom datetick formatter specifically designed for nanosecond level precision (:issue:`8711`) - ``pd.types.concat.union_categoricals`` gained the ``ignore_ordered`` argument to allow ignoring the ordered attribute of unioned categoricals (:issue:`13410`). See the :ref:`categorical union docs ` for more information. - ``pandas.io.json.json_normalize()`` with an empty ``list`` will return an empty ``DataFrame`` (:issue:`15534`) - ``pd.DataFrame.to_latex`` and ``pd.DataFrame.to_string`` now allow optional header aliases. (:issue:`15536`) - Re-enable the ``parse_dates`` keyword of ``read_excel`` to parse string columns as dates (:issue:`14326`) +- Added ``.empty`` property to subclasses of ``Index``. (:issue:`15270`) .. _ISO 8601 duration: https://en.wikipedia.org/wiki/ISO_8601#Durations diff --git a/pandas/core/base.py b/pandas/core/base.py index bde60be3ddcff..3401c7c59cb56 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -774,6 +774,10 @@ def _values(self): """ the internal implementation """ return self.values + @property + def empty(self): + return not self.size + def max(self): """ The maximum value of the object """ return nanops.nanmax(self.values) diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index e9122f7a17359..ba76945834aff 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -910,3 +910,9 @@ def test_nulls(self): result = isnull(index) self.assert_numpy_array_equal(index.isnull(), result) self.assert_numpy_array_equal(index.notnull(), ~result) + + def test_empty(self): + # GH 15270 + index = self.create_index() + self.assertFalse(index.empty) + self.assertTrue(index[:0].empty) From 34c6bd0fb7ad58b579ba940d4248ebab0aa758bf Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 28 Mar 2017 17:43:38 -0400 Subject: [PATCH 295/933] ENH: GH14883: json_normalize now takes a user-specified separator closes #14883 Author: Jeff Reback Author: John Owens Closes #14950 from jowens/json_normalize-separator and squashes the following commits: 0327dd1 [Jeff Reback] compare sorted columns bc5aae8 [Jeff Reback] CLN: fixup json_normalize with sep 8edc40e [John Owens] ENH: json_normalize now takes a user-specified separator --- doc/source/whatsnew/v0.20.0.txt | 8 +- pandas/io/json/normalize.py | 37 +++++-- pandas/tests/io/json/test_normalize.py | 141 ++++++++++++++----------- 3 files changed, 114 insertions(+), 72 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 15566d207e31f..638044cee67bb 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -300,9 +300,9 @@ Other Enhancements - ``pd.DataFrame.plot`` now prints a title above each subplot if ``suplots=True`` and ``title`` is a list of strings (:issue:`14753`) - ``pd.Series.interpolate`` now supports timedelta as an index type with ``method='time'`` (:issue:`6424`) - ``Timedelta.isoformat`` method added for formatting Timedeltas as an `ISO 8601 duration`_. See the :ref:`Timedelta docs ` (:issue:`15136`) -- ``pandas.io.json.json_normalize()`` gained the option ``errors='ignore'|'raise'``; the default is ``errors='raise'`` which is backward compatible. (:issue:`14583`) - ``.select_dtypes()`` now allows the string 'datetimetz' to generically select datetimes with tz (:issue:`14910`) - The ``.to_latex()`` method will now accept ``multicolumn`` and ``multirow`` arguments to use the accompanying LaTeX enhancements + - ``pd.merge_asof()`` gained the option ``direction='backward'|'forward'|'nearest'`` (:issue:`14887`) - ``Series/DataFrame.asfreq()`` have gained a ``fill_value`` parameter, to fill missing values (:issue:`3715`). - ``Series/DataFrame.resample.asfreq`` have gained a ``fill_value`` parameter, to fill missing values during resampling (:issue:`3715`). @@ -313,11 +313,15 @@ Other Enhancements - ``pd.TimedeltaIndex`` now has a custom datetick formatter specifically designed for nanosecond level precision (:issue:`8711`) - ``pd.types.concat.union_categoricals`` gained the ``ignore_ordered`` argument to allow ignoring the ordered attribute of unioned categoricals (:issue:`13410`). See the :ref:`categorical union docs ` for more information. -- ``pandas.io.json.json_normalize()`` with an empty ``list`` will return an empty ``DataFrame`` (:issue:`15534`) - ``pd.DataFrame.to_latex`` and ``pd.DataFrame.to_string`` now allow optional header aliases. (:issue:`15536`) - Re-enable the ``parse_dates`` keyword of ``read_excel`` to parse string columns as dates (:issue:`14326`) - Added ``.empty`` property to subclasses of ``Index``. (:issue:`15270`) +- ``pandas.io.json.json_normalize()`` gained the option ``errors='ignore'|'raise'``; the default is ``errors='raise'`` which is backward compatible. (:issue:`14583`) +- ``pandas.io.json.json_normalize()`` with an empty ``list`` will return an empty ``DataFrame`` (:issue:`15534`) +- ``pandas.io.json.json_normalize()`` has gained a ``sep`` option that accepts ``str`` to separate joined fields; the default is ".", which is backward compatible. (:issue:`14883`) + + .. _ISO 8601 duration: https://en.wikipedia.org/wiki/ISO_8601#Durations diff --git a/pandas/io/json/normalize.py b/pandas/io/json/normalize.py index 4da4a6ad57850..518e0bc2064e2 100644 --- a/pandas/io/json/normalize.py +++ b/pandas/io/json/normalize.py @@ -21,7 +21,7 @@ def _convert_to_line_delimits(s): return convert_json_to_lines(s) -def nested_to_record(ds, prefix="", level=0): +def nested_to_record(ds, prefix="", sep=".", level=0): """a simplified json_normalize converts a nested dict into a flat dict ("record"), unlike json_normalize, @@ -31,6 +31,12 @@ def nested_to_record(ds, prefix="", level=0): ---------- ds : dict or list of dicts prefix: the prefix, optional, default: "" + sep : string, default '.' + Nested records will generate names separated by sep, + e.g., for sep='.', { 'foo' : { 'bar' : 0 } } -> foo.bar + + .. versionadded:: 0.20.0 + level: the number of levels in the jason string, optional, default: 0 Returns @@ -66,7 +72,7 @@ def nested_to_record(ds, prefix="", level=0): if level == 0: newkey = k else: - newkey = prefix + '.' + k + newkey = prefix + sep + k # only dicts gets recurse-flattend # only at level>1 do we rename the rest of the keys @@ -77,7 +83,7 @@ def nested_to_record(ds, prefix="", level=0): continue else: v = new_d.pop(k) - new_d.update(nested_to_record(v, newkey, level + 1)) + new_d.update(nested_to_record(v, newkey, sep, level + 1)) new_ds.append(new_d) if singleton: @@ -88,7 +94,8 @@ def nested_to_record(ds, prefix="", level=0): def json_normalize(data, record_path=None, meta=None, meta_prefix=None, record_prefix=None, - errors='raise'): + errors='raise', + sep='.'): """ "Normalize" semi-structured JSON data into a flat table @@ -106,13 +113,21 @@ def json_normalize(data, record_path=None, meta=None, path to records is ['foo', 'bar'] meta_prefix : string, default None errors : {'raise', 'ignore'}, default 'raise' - * 'ignore' : will ignore KeyError if keys listed in meta are not - always present - * 'raise' : will raise KeyError if keys listed in meta are not - always present + + * ignore : will ignore KeyError if keys listed in meta are not + always present + * raise : will raise KeyError if keys listed in meta are not + always present .. versionadded:: 0.20.0 + sep : string, default '.' + Nested records will generate names separated by sep, + e.g., for sep='.', { 'foo' : { 'bar' : 0 } } -> foo.bar + + .. versionadded:: 0.20.0 + + Returns ------- frame : DataFrame @@ -173,7 +188,7 @@ def _pull_field(js, spec): # # TODO: handle record value which are lists, at least error # reasonably - data = nested_to_record(data) + data = nested_to_record(data, sep=sep) return DataFrame(data) elif not isinstance(record_path, list): record_path = [record_path] @@ -192,7 +207,9 @@ def _pull_field(js, spec): lengths = [] meta_vals = defaultdict(list) - meta_keys = ['.'.join(val) for val in meta] + if not isinstance(sep, compat.string_types): + sep = str(sep) + meta_keys = [sep.join(val) for val in meta] def _recursive_extract(data, path, seen_meta, level=0): if len(path) > 1: diff --git a/pandas/tests/io/json/test_normalize.py b/pandas/tests/io/json/test_normalize.py index f881f4dafe0f3..ee79859e9b71a 100644 --- a/pandas/tests/io/json/test_normalize.py +++ b/pandas/tests/io/json/test_normalize.py @@ -1,36 +1,60 @@ -from pandas import DataFrame +import pytest import numpy as np import json import pandas.util.testing as tm -from pandas import compat +from pandas import compat, Index, DataFrame from pandas.io.json import json_normalize from pandas.io.json.normalize import nested_to_record -def _assert_equal_data(left, right): - if not left.columns.equals(right.columns): - left = left.reindex(columns=right.columns) +@pytest.fixture +def deep_nested(): + # deeply nested data + return [{'country': 'USA', + 'states': [{'name': 'California', + 'cities': [{'name': 'San Francisco', + 'pop': 12345}, + {'name': 'Los Angeles', + 'pop': 12346}] + }, + {'name': 'Ohio', + 'cities': [{'name': 'Columbus', + 'pop': 1234}, + {'name': 'Cleveland', + 'pop': 1236}]} + ] + }, + {'country': 'Germany', + 'states': [{'name': 'Bayern', + 'cities': [{'name': 'Munich', 'pop': 12347}] + }, + {'name': 'Nordrhein-Westfalen', + 'cities': [{'name': 'Duesseldorf', 'pop': 1238}, + {'name': 'Koeln', 'pop': 1239}]} + ] + } + ] - tm.assert_frame_equal(left, right) +@pytest.fixture +def state_data(): + return [ + {'counties': [{'name': 'Dade', 'population': 12345}, + {'name': 'Broward', 'population': 40000}, + {'name': 'Palm Beach', 'population': 60000}], + 'info': {'governor': 'Rick Scott'}, + 'shortname': 'FL', + 'state': 'Florida'}, + {'counties': [{'name': 'Summit', 'population': 1234}, + {'name': 'Cuyahoga', 'population': 1337}], + 'info': {'governor': 'John Kasich'}, + 'shortname': 'OH', + 'state': 'Ohio'}] -class TestJSONNormalize(tm.TestCase): - def setUp(self): - self.state_data = [ - {'counties': [{'name': 'Dade', 'population': 12345}, - {'name': 'Broward', 'population': 40000}, - {'name': 'Palm Beach', 'population': 60000}], - 'info': {'governor': 'Rick Scott'}, - 'shortname': 'FL', - 'state': 'Florida'}, - {'counties': [{'name': 'Summit', 'population': 1234}, - {'name': 'Cuyahoga', 'population': 1337}], - 'info': {'governor': 'John Kasich'}, - 'shortname': 'OH', - 'state': 'Ohio'}] +class TestJSONNormalize(object): def test_simple_records(self): recs = [{'a': 1, 'b': 2, 'c': 3}, @@ -43,21 +67,21 @@ def test_simple_records(self): tm.assert_frame_equal(result, expected) - def test_simple_normalize(self): - result = json_normalize(self.state_data[0], 'counties') - expected = DataFrame(self.state_data[0]['counties']) + def test_simple_normalize(self, state_data): + result = json_normalize(state_data[0], 'counties') + expected = DataFrame(state_data[0]['counties']) tm.assert_frame_equal(result, expected) - result = json_normalize(self.state_data, 'counties') + result = json_normalize(state_data, 'counties') expected = [] - for rec in self.state_data: + for rec in state_data: expected.extend(rec['counties']) expected = DataFrame(expected) tm.assert_frame_equal(result, expected) - result = json_normalize(self.state_data, 'counties', meta='state') + result = json_normalize(state_data, 'counties', meta='state') expected['state'] = np.array(['Florida', 'Ohio']).repeat([3, 2]) tm.assert_frame_equal(result, expected) @@ -67,33 +91,30 @@ def test_empty_array(self): expected = DataFrame() tm.assert_frame_equal(result, expected) - def test_more_deeply_nested(self): - data = [{'country': 'USA', - 'states': [{'name': 'California', - 'cities': [{'name': 'San Francisco', - 'pop': 12345}, - {'name': 'Los Angeles', - 'pop': 12346}] - }, - {'name': 'Ohio', - 'cities': [{'name': 'Columbus', - 'pop': 1234}, - {'name': 'Cleveland', - 'pop': 1236}]} - ] - }, - {'country': 'Germany', - 'states': [{'name': 'Bayern', - 'cities': [{'name': 'Munich', 'pop': 12347}] - }, - {'name': 'Nordrhein-Westfalen', - 'cities': [{'name': 'Duesseldorf', 'pop': 1238}, - {'name': 'Koeln', 'pop': 1239}]} - ] - } - ] + def test_simple_normalize_with_separator(self, deep_nested): + # GH 14883 + result = json_normalize({'A': {'A': 1, 'B': 2}}) + expected = DataFrame([[1, 2]], columns=['A.A', 'A.B']) + tm.assert_frame_equal(result.reindex_like(expected), expected) + + result = json_normalize({'A': {'A': 1, 'B': 2}}, sep='_') + expected = DataFrame([[1, 2]], columns=['A_A', 'A_B']) + tm.assert_frame_equal(result.reindex_like(expected), expected) + + result = json_normalize({'A': {'A': 1, 'B': 2}}, sep=u'\u03c3') + expected = DataFrame([[1, 2]], columns=[u'A\u03c3A', u'A\u03c3B']) + tm.assert_frame_equal(result.reindex_like(expected), expected) + + result = json_normalize(deep_nested, ['states', 'cities'], + meta=['country', ['states', 'name']], + sep='_') + expected = Index(['name', 'pop', + 'country', 'states_name']).sort_values() + assert result.columns.sort_values().equals(expected) + + def test_more_deeply_nested(self, deep_nested): - result = json_normalize(data, ['states', 'cities'], + result = json_normalize(deep_nested, ['states', 'cities'], meta=['country', ['states', 'name']]) # meta_prefix={'states': 'state_'}) @@ -143,26 +164,26 @@ def test_meta_name_conflict(self): 'data': [{'foo': 'something', 'bar': 'else'}, {'foo': 'something2', 'bar': 'else2'}]}] - self.assertRaises(ValueError, json_normalize, data, - 'data', meta=['foo', 'bar']) + with pytest.raises(ValueError): + json_normalize(data, 'data', meta=['foo', 'bar']) result = json_normalize(data, 'data', meta=['foo', 'bar'], meta_prefix='meta') for val in ['metafoo', 'metabar', 'foo', 'bar']: - self.assertTrue(val in result) + assert val in result - def test_record_prefix(self): - result = json_normalize(self.state_data[0], 'counties') - expected = DataFrame(self.state_data[0]['counties']) + def test_record_prefix(self, state_data): + result = json_normalize(state_data[0], 'counties') + expected = DataFrame(state_data[0]['counties']) tm.assert_frame_equal(result, expected) - result = json_normalize(self.state_data, 'counties', + result = json_normalize(state_data, 'counties', meta='state', record_prefix='county_') expected = [] - for rec in self.state_data: + for rec in state_data: expected.extend(rec['counties']) expected = DataFrame(expected) expected = expected.rename(columns=lambda x: 'county_' + x) From 2e646147349bafa7f0b2e9ce26d1e27e3015d3b3 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 28 Mar 2017 18:42:31 -0400 Subject: [PATCH 296/933] COMPAT: Fix indent level bug preventing wrapper function rename Original code intends to rename the wrapper function f using the provided name, but this isn't happening because code is incorrectly indented an extra level. from pandas.core.groupby import GroupBy GroupBy.sum.__name__ Should be 'sum'. Author: Jeff Reback Author: Matt Hagy Author: Matt Hagy Closes #14620 from matthagy/patch-1 and squashes the following commits: db3c6e4 [Jeff Reback] clean/reorg tests 205489b [Jeff Reback] doc 8b185b4 [Jeff Reback] PEP 781b9b3 [Jeff Reback] Move _groupby_function inside GroupBy 68013bf [Matt Hagy] Added a test for known inconsistent attribute/method names 3bf8993 [Matt Hagy] Revise attribute/method consistency check to skip known inconsistencies 033e42d [Matt Hagy] Test for consistency of attribute and method names 2a54b77 [Matt Hagy] Test renaming of _groupby_function wrapper function a492b5a [Matt Hagy] Fix indent level bug preventing wrapper function rename --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/core/groupby.py | 143 ++++++------ pandas/tests/groupby/common.py | 36 +-- pandas/tests/groupby/test_groupby.py | 223 ------------------ pandas/tests/groupby/test_whitelist.py | 301 +++++++++++++++++++++++++ 5 files changed, 400 insertions(+), 304 deletions(-) create mode 100644 pandas/tests/groupby/test_whitelist.py diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 638044cee67bb..787857095044a 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -887,6 +887,7 @@ Bug Fixes - Compat for 32-bit platforms for ``.qcut/cut``; bins will now be ``int64`` dtype (:issue:`14866`) +- Properly set ``__name__`` and ``__qualname__`` for ``Groupby.*`` functions (:issue:`14620`) - Bug in ``.at`` when selecting from a tz-aware column (:issue:`15822`) - Bug in the display of ``.info()`` where a qualifier (+) would always be displayed with a ``MultiIndex`` that contains only non-strings (:issue:`15245`) - Bug in ``.replace()`` may result in incorrect dtypes. (:issue:`12747`, :issue:`15765`) diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 2cc68bcabdd22..fe764a099bb63 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -12,8 +12,8 @@ ) from pandas import compat -from pandas.compat.numpy import function as nv -from pandas.compat.numpy import _np_version_under1p8 +from pandas.compat.numpy import function as nv, _np_version_under1p8 +from pandas.compat import set_function_name from pandas.types.common import (is_numeric_dtype, is_timedelta64_dtype, is_datetime64_dtype, @@ -172,64 +172,6 @@ 'cummin', 'cummax']) -def _groupby_function(name, alias, npfunc, numeric_only=True, - _convert=False): - - _local_template = "Compute %(f)s of group values" - - @Substitution(name='groupby', f=name) - @Appender(_doc_template) - @Appender(_local_template) - def f(self, **kwargs): - if 'numeric_only' not in kwargs: - kwargs['numeric_only'] = numeric_only - self._set_group_selection() - try: - return self._cython_agg_general(alias, alt=npfunc, **kwargs) - except AssertionError as e: - raise SpecificationError(str(e)) - except Exception: - result = self.aggregate(lambda x: npfunc(x, axis=self.axis)) - if _convert: - result = result._convert(datetime=True) - return result - - f.__name__ = name - - return f - - -def _first_compat(x, axis=0): - - def _first(x): - - x = np.asarray(x) - x = x[notnull(x)] - if len(x) == 0: - return np.nan - return x[0] - - if isinstance(x, DataFrame): - return x.apply(_first, axis=axis) - else: - return _first(x) - - -def _last_compat(x, axis=0): - def _last(x): - - x = np.asarray(x) - x = x[notnull(x)] - if len(x) == 0: - return np.nan - return x[-1] - - if isinstance(x, DataFrame): - return x.apply(_last, axis=axis) - else: - return _last(x) - - class Grouper(object): """ A Grouper allows the user to specify a groupby instruction for a target @@ -1184,14 +1126,76 @@ def size(self): result.name = getattr(self, 'name', None) return result - sum = _groupby_function('sum', 'add', np.sum) - prod = _groupby_function('prod', 'prod', np.prod) - min = _groupby_function('min', 'min', np.min, numeric_only=False) - max = _groupby_function('max', 'max', np.max, numeric_only=False) - first = _groupby_function('first', 'first', _first_compat, - numeric_only=False, _convert=True) - last = _groupby_function('last', 'last', _last_compat, numeric_only=False, - _convert=True) + @classmethod + def _add_numeric_operations(cls): + """ add numeric operations to the GroupBy generically """ + + def groupby_function(name, alias, npfunc, + numeric_only=True, _convert=False): + + _local_template = "Compute %(f)s of group values" + + @Substitution(name='groupby', f=name) + @Appender(_doc_template) + @Appender(_local_template) + def f(self, **kwargs): + if 'numeric_only' not in kwargs: + kwargs['numeric_only'] = numeric_only + self._set_group_selection() + try: + return self._cython_agg_general( + alias, alt=npfunc, **kwargs) + except AssertionError as e: + raise SpecificationError(str(e)) + except Exception: + result = self.aggregate( + lambda x: npfunc(x, axis=self.axis)) + if _convert: + result = result._convert(datetime=True) + return result + + set_function_name(f, name, cls) + + return f + + def first_compat(x, axis=0): + + def first(x): + + x = np.asarray(x) + x = x[notnull(x)] + if len(x) == 0: + return np.nan + return x[0] + + if isinstance(x, DataFrame): + return x.apply(first, axis=axis) + else: + return first(x) + + def last_compat(x, axis=0): + + def last(x): + + x = np.asarray(x) + x = x[notnull(x)] + if len(x) == 0: + return np.nan + return x[-1] + + if isinstance(x, DataFrame): + return x.apply(last, axis=axis) + else: + return last(x) + + cls.sum = groupby_function('sum', 'add', np.sum) + cls.prod = groupby_function('prod', 'prod', np.prod) + cls.min = groupby_function('min', 'min', np.min, numeric_only=False) + cls.max = groupby_function('max', 'max', np.max, numeric_only=False) + cls.first = groupby_function('first', 'first', first_compat, + numeric_only=False, _convert=True) + cls.last = groupby_function('last', 'last', last_compat, + numeric_only=False, _convert=True) @Substitution(name='groupby') @Appender(_doc_template) @@ -1604,6 +1608,9 @@ def tail(self, n=5): return self._selected_obj[mask] +GroupBy._add_numeric_operations() + + @Appender(GroupBy.__doc__) def groupby(obj, by, **kwds): if isinstance(obj, Series): diff --git a/pandas/tests/groupby/common.py b/pandas/tests/groupby/common.py index 8a70777d08682..f3dccf473f53a 100644 --- a/pandas/tests/groupby/common.py +++ b/pandas/tests/groupby/common.py @@ -1,10 +1,31 @@ """ Base setup """ +import pytest import numpy as np from pandas.util import testing as tm from pandas import DataFrame, MultiIndex +@pytest.fixture +def mframe(): + index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two', + 'three']], + labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], + [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], + names=['first', 'second']) + return DataFrame(np.random.randn(10, 3), index=index, + columns=['A', 'B', 'C']) + + +@pytest.fixture +def df(): + return DataFrame( + {'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'], + 'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'], + 'C': np.random.randn(8), + 'D': np.random.randn(8)}) + + class MixIn(object): def setUp(self): @@ -15,12 +36,7 @@ def setUp(self): self.frame = DataFrame(self.seriesd) self.tsframe = DataFrame(self.tsd) - self.df = DataFrame( - {'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'], - 'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'], - 'C': np.random.randn(8), - 'D': np.random.randn(8)}) - + self.df = df() self.df_mixed_floats = DataFrame( {'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'], 'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'], @@ -28,13 +44,7 @@ def setUp(self): 'D': np.array( np.random.randn(8), dtype='float32')}) - index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two', - 'three']], - labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], - [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], - names=['first', 'second']) - self.mframe = DataFrame(np.random.randn(10, 3), index=index, - columns=['A', 'B', 'C']) + self.mframe = mframe() self.three_group = DataFrame( {'A': ['foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'bar', diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 9f5a7f404e2be..83502434e6053 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -3706,229 +3706,6 @@ def test_index_label_overlaps_location(self): expected = ser.take([1, 3, 4]) assert_series_equal(actual, expected) - def test_groupby_selection_with_methods(self): - # some methods which require DatetimeIndex - rng = pd.date_range('2014', periods=len(self.df)) - self.df.index = rng - - g = self.df.groupby(['A'])[['C']] - g_exp = self.df[['C']].groupby(self.df['A']) - # TODO check groupby with > 1 col ? - - # methods which are called as .foo() - methods = ['count', - 'corr', - 'cummax', - 'cummin', - 'cumprod', - 'describe', - 'rank', - 'quantile', - 'diff', - 'shift', - 'all', - 'any', - 'idxmin', - 'idxmax', - 'ffill', - 'bfill', - 'pct_change', - 'tshift'] - - for m in methods: - res = getattr(g, m)() - exp = getattr(g_exp, m)() - assert_frame_equal(res, exp) # should always be frames! - - # methods which aren't just .foo() - assert_frame_equal(g.fillna(0), g_exp.fillna(0)) - assert_frame_equal(g.dtypes, g_exp.dtypes) - assert_frame_equal(g.apply(lambda x: x.sum()), - g_exp.apply(lambda x: x.sum())) - - assert_frame_equal(g.resample('D').mean(), g_exp.resample('D').mean()) - assert_frame_equal(g.resample('D').ohlc(), - g_exp.resample('D').ohlc()) - - assert_frame_equal(g.filter(lambda x: len(x) == 3), - g_exp.filter(lambda x: len(x) == 3)) - - def test_groupby_whitelist(self): - from string import ascii_lowercase - letters = np.array(list(ascii_lowercase)) - N = 10 - random_letters = letters.take(np.random.randint(0, 26, N)) - df = DataFrame({'floats': N / 10 * Series(np.random.random(N)), - 'letters': Series(random_letters)}) - s = df.floats - - df_whitelist = frozenset([ - 'last', - 'first', - 'mean', - 'sum', - 'min', - 'max', - 'head', - 'tail', - 'cumcount', - 'resample', - 'rank', - 'quantile', - 'fillna', - 'mad', - 'any', - 'all', - 'take', - 'idxmax', - 'idxmin', - 'shift', - 'tshift', - 'ffill', - 'bfill', - 'pct_change', - 'skew', - 'plot', - 'boxplot', - 'hist', - 'median', - 'dtypes', - 'corrwith', - 'corr', - 'cov', - 'diff', - ]) - s_whitelist = frozenset([ - 'last', - 'first', - 'mean', - 'sum', - 'min', - 'max', - 'head', - 'tail', - 'cumcount', - 'resample', - 'rank', - 'quantile', - 'fillna', - 'mad', - 'any', - 'all', - 'take', - 'idxmax', - 'idxmin', - 'shift', - 'tshift', - 'ffill', - 'bfill', - 'pct_change', - 'skew', - 'plot', - 'hist', - 'median', - 'dtype', - 'corr', - 'cov', - 'diff', - 'unique', - 'nlargest', - 'nsmallest', - ]) - - for obj, whitelist in zip((df, s), (df_whitelist, s_whitelist)): - gb = obj.groupby(df.letters) - self.assertEqual(whitelist, gb._apply_whitelist) - for m in whitelist: - getattr(type(gb), m) - - AGG_FUNCTIONS = ['sum', 'prod', 'min', 'max', 'median', 'mean', 'skew', - 'mad', 'std', 'var', 'sem'] - AGG_FUNCTIONS_WITH_SKIPNA = ['skew', 'mad'] - - def test_regression_whitelist_methods(self): - - # GH6944 - # explicity test the whitelest methods - index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two', - 'three']], - labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], - [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], - names=['first', 'second']) - raw_frame = DataFrame(np.random.randn(10, 3), index=index, - columns=Index(['A', 'B', 'C'], name='exp')) - raw_frame.iloc[1, [1, 2]] = np.nan - raw_frame.iloc[7, [0, 1]] = np.nan - - for op, level, axis, skipna in cart_product(self.AGG_FUNCTIONS, - lrange(2), lrange(2), - [True, False]): - - if axis == 0: - frame = raw_frame - else: - frame = raw_frame.T - - if op in self.AGG_FUNCTIONS_WITH_SKIPNA: - grouped = frame.groupby(level=level, axis=axis) - result = getattr(grouped, op)(skipna=skipna) - expected = getattr(frame, op)(level=level, axis=axis, - skipna=skipna) - assert_frame_equal(result, expected) - else: - grouped = frame.groupby(level=level, axis=axis) - result = getattr(grouped, op)() - expected = getattr(frame, op)(level=level, axis=axis) - assert_frame_equal(result, expected) - - def test_groupby_blacklist(self): - from string import ascii_lowercase - letters = np.array(list(ascii_lowercase)) - N = 10 - random_letters = letters.take(np.random.randint(0, 26, N)) - df = DataFrame({'floats': N / 10 * Series(np.random.random(N)), - 'letters': Series(random_letters)}) - s = df.floats - - blacklist = [ - 'eval', 'query', 'abs', 'where', - 'mask', 'align', 'groupby', 'clip', 'astype', - 'at', 'combine', 'consolidate', 'convert_objects', - ] - to_methods = [method for method in dir(df) if method.startswith('to_')] - - blacklist.extend(to_methods) - - # e.g., to_csv - defined_but_not_allowed = ("(?:^Cannot.+{0!r}.+{1!r}.+try using the " - "'apply' method$)") - - # e.g., query, eval - not_defined = "(?:^{1!r} object has no attribute {0!r}$)" - fmt = defined_but_not_allowed + '|' + not_defined - for bl in blacklist: - for obj in (df, s): - gb = obj.groupby(df.letters) - msg = fmt.format(bl, type(gb).__name__) - with tm.assertRaisesRegexp(AttributeError, msg): - getattr(gb, bl) - - def test_tab_completion(self): - grp = self.mframe.groupby(level='second') - results = set([v for v in dir(grp) if not v.startswith('_')]) - expected = set( - ['A', 'B', 'C', 'agg', 'aggregate', 'apply', 'boxplot', 'filter', - 'first', 'get_group', 'groups', 'hist', 'indices', 'last', 'max', - 'mean', 'median', 'min', 'name', 'ngroups', 'nth', 'ohlc', 'plot', - 'prod', 'size', 'std', 'sum', 'transform', 'var', 'sem', 'count', - 'nunique', 'head', 'describe', 'cummax', 'quantile', - 'rank', 'cumprod', 'tail', 'resample', 'cummin', 'fillna', - 'cumsum', 'cumcount', 'all', 'shift', 'skew', 'bfill', 'ffill', - 'take', 'tshift', 'pct_change', 'any', 'mad', 'corr', 'corrwith', - 'cov', 'dtypes', 'ndim', 'diff', 'idxmax', 'idxmin', - 'ffill', 'bfill', 'pad', 'backfill', 'rolling', 'expanding']) - self.assertEqual(results, expected) - def test_lower_int_prec_count(self): df = DataFrame({'a': np.array( [0, 1, 2, 100], np.int8), diff --git a/pandas/tests/groupby/test_whitelist.py b/pandas/tests/groupby/test_whitelist.py new file mode 100644 index 0000000000000..d566f34b7eae8 --- /dev/null +++ b/pandas/tests/groupby/test_whitelist.py @@ -0,0 +1,301 @@ +""" +test methods relating to generic function evaluation +the so-called white/black lists +""" + +import pytest +from string import ascii_lowercase +import numpy as np +from pandas import DataFrame, Series, compat, date_range, Index, MultiIndex +from pandas.util import testing as tm +from pandas.compat import lrange, product + +AGG_FUNCTIONS = ['sum', 'prod', 'min', 'max', 'median', 'mean', 'skew', + 'mad', 'std', 'var', 'sem'] +AGG_FUNCTIONS_WITH_SKIPNA = ['skew', 'mad'] + +df_whitelist = frozenset([ + 'last', + 'first', + 'mean', + 'sum', + 'min', + 'max', + 'head', + 'tail', + 'cumcount', + 'resample', + 'rank', + 'quantile', + 'fillna', + 'mad', + 'any', + 'all', + 'take', + 'idxmax', + 'idxmin', + 'shift', + 'tshift', + 'ffill', + 'bfill', + 'pct_change', + 'skew', + 'plot', + 'boxplot', + 'hist', + 'median', + 'dtypes', + 'corrwith', + 'corr', + 'cov', + 'diff', +]) + +s_whitelist = frozenset([ + 'last', + 'first', + 'mean', + 'sum', + 'min', + 'max', + 'head', + 'tail', + 'cumcount', + 'resample', + 'rank', + 'quantile', + 'fillna', + 'mad', + 'any', + 'all', + 'take', + 'idxmax', + 'idxmin', + 'shift', + 'tshift', + 'ffill', + 'bfill', + 'pct_change', + 'skew', + 'plot', + 'hist', + 'median', + 'dtype', + 'corr', + 'cov', + 'diff', + 'unique', + 'nlargest', + 'nsmallest', +]) + + +@pytest.fixture +def mframe(): + index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two', + 'three']], + labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], + [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], + names=['first', 'second']) + return DataFrame(np.random.randn(10, 3), index=index, + columns=['A', 'B', 'C']) + + +@pytest.fixture +def df(): + return DataFrame( + {'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'], + 'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'], + 'C': np.random.randn(8), + 'D': np.random.randn(8)}) + + +@pytest.fixture +def df_letters(): + letters = np.array(list(ascii_lowercase)) + N = 10 + random_letters = letters.take(np.random.randint(0, 26, N)) + df = DataFrame({'floats': N / 10 * Series(np.random.random(N)), + 'letters': Series(random_letters)}) + return df + + +@pytest.mark.parametrize( + "obj, whitelist", zip((df_letters(), df_letters().floats), + (df_whitelist, s_whitelist))) +def test_groupby_whitelist(df_letters, obj, whitelist): + df = df_letters + + # these are aliases so ok to have the alias __name__ + alias = {'bfill': 'backfill', + 'ffill': 'pad', + 'boxplot': None} + + gb = obj.groupby(df.letters) + + assert whitelist == gb._apply_whitelist + for m in whitelist: + + m = alias.get(m, m) + if m is None: + continue + + f = getattr(type(gb), m) + + # name + try: + n = f.__name__ + except AttributeError: + continue + assert n == m + + # qualname + if compat.PY3: + try: + n = f.__qualname__ + except AttributeError: + continue + assert n.endswith(m) + + +@pytest.fixture +def raw_frame(): + index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two', + 'three']], + labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], + [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], + names=['first', 'second']) + raw_frame = DataFrame(np.random.randn(10, 3), index=index, + columns=Index(['A', 'B', 'C'], name='exp')) + raw_frame.iloc[1, [1, 2]] = np.nan + raw_frame.iloc[7, [0, 1]] = np.nan + return raw_frame + + +@pytest.mark.parametrize( + "op, level, axis, skipna", + product(AGG_FUNCTIONS, + lrange(2), lrange(2), + [True, False])) +def test_regression_whitelist_methods(raw_frame, op, level, axis, skipna): + # GH6944 + # explicity test the whitelest methods + + if axis == 0: + frame = raw_frame + else: + frame = raw_frame.T + + if op in AGG_FUNCTIONS_WITH_SKIPNA: + grouped = frame.groupby(level=level, axis=axis) + result = getattr(grouped, op)(skipna=skipna) + expected = getattr(frame, op)(level=level, axis=axis, + skipna=skipna) + tm.assert_frame_equal(result, expected) + else: + grouped = frame.groupby(level=level, axis=axis) + result = getattr(grouped, op)() + expected = getattr(frame, op)(level=level, axis=axis) + tm.assert_frame_equal(result, expected) + + +def test_groupby_blacklist(df_letters): + df = df_letters + s = df_letters.floats + + blacklist = [ + 'eval', 'query', 'abs', 'where', + 'mask', 'align', 'groupby', 'clip', 'astype', + 'at', 'combine', 'consolidate', 'convert_objects', + ] + to_methods = [method for method in dir(df) if method.startswith('to_')] + + blacklist.extend(to_methods) + + # e.g., to_csv + defined_but_not_allowed = ("(?:^Cannot.+{0!r}.+{1!r}.+try using the " + "'apply' method$)") + + # e.g., query, eval + not_defined = "(?:^{1!r} object has no attribute {0!r}$)" + fmt = defined_but_not_allowed + '|' + not_defined + for bl in blacklist: + for obj in (df, s): + gb = obj.groupby(df.letters) + msg = fmt.format(bl, type(gb).__name__) + with tm.assertRaisesRegexp(AttributeError, msg): + getattr(gb, bl) + + +def test_tab_completion(mframe): + grp = mframe.groupby(level='second') + results = set([v for v in dir(grp) if not v.startswith('_')]) + expected = set( + ['A', 'B', 'C', 'agg', 'aggregate', 'apply', 'boxplot', 'filter', + 'first', 'get_group', 'groups', 'hist', 'indices', 'last', 'max', + 'mean', 'median', 'min', 'name', 'ngroups', 'nth', 'ohlc', 'plot', + 'prod', 'size', 'std', 'sum', 'transform', 'var', 'sem', 'count', + 'nunique', 'head', 'describe', 'cummax', 'quantile', + 'rank', 'cumprod', 'tail', 'resample', 'cummin', 'fillna', + 'cumsum', 'cumcount', 'all', 'shift', 'skew', + 'take', 'tshift', 'pct_change', 'any', 'mad', 'corr', 'corrwith', + 'cov', 'dtypes', 'ndim', 'diff', 'idxmax', 'idxmin', + 'ffill', 'bfill', 'pad', 'backfill', 'rolling', 'expanding']) + assert results == expected + + +def test_groupby_function_rename(mframe): + grp = mframe.groupby(level='second') + for name in ['sum', 'prod', 'min', 'max', 'first', 'last']: + f = getattr(grp, name) + assert f.__name__ == name + + +def test_groupby_selection_with_methods(df): + # some methods which require DatetimeIndex + rng = date_range('2014', periods=len(df)) + df.index = rng + + g = df.groupby(['A'])[['C']] + g_exp = df[['C']].groupby(df['A']) + # TODO check groupby with > 1 col ? + + # methods which are called as .foo() + methods = ['count', + 'corr', + 'cummax', + 'cummin', + 'cumprod', + 'describe', + 'rank', + 'quantile', + 'diff', + 'shift', + 'all', + 'any', + 'idxmin', + 'idxmax', + 'ffill', + 'bfill', + 'pct_change', + 'tshift'] + + for m in methods: + res = getattr(g, m)() + exp = getattr(g_exp, m)() + + # should always be frames! + tm.assert_frame_equal(res, exp) + + # methods which aren't just .foo() + tm.assert_frame_equal(g.fillna(0), g_exp.fillna(0)) + tm.assert_frame_equal(g.dtypes, g_exp.dtypes) + tm.assert_frame_equal(g.apply(lambda x: x.sum()), + g_exp.apply(lambda x: x.sum())) + + tm.assert_frame_equal(g.resample('D').mean(), g_exp.resample('D').mean()) + tm.assert_frame_equal(g.resample('D').ohlc(), + g_exp.resample('D').ohlc()) + + tm.assert_frame_equal(g.filter(lambda x: len(x) == 3), + g_exp.filter(lambda x: len(x) == 3)) From bd169dc0a91f50031f6c2240075ff84d6b296576 Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral Date: Tue, 28 Mar 2017 20:17:41 -0400 Subject: [PATCH 297/933] BUG: Fix index order for Index.intersection() closes #15582 Author: Albert Villanova del Moral Author: Jeff Reback Closes #15583 from albertvillanova/fix-15582 and squashes the following commits: 2d4e143 [Albert Villanova del Moral] Fix pytest fixture name collision 64e86a4 [Albert Villanova del Moral] Fix test on right join 73df69e [Albert Villanova del Moral] Address requested changes 8d2e9cc [Albert Villanova del Moral] Address requested changes 968c7f1 [Jeff Reback] DOC/TST: change to use parameterization 9e39794 [Albert Villanova del Moral] Address requested changes 5bf1508 [Albert Villanova del Moral] Address requested changes 654288b [Albert Villanova del Moral] Fix Travis errors 33eb740 [Albert Villanova del Moral] Address requested changes 3c200fe [Albert Villanova del Moral] Add new tests ef2581e [Albert Villanova del Moral] Fix Travis error f0d9d03 [Albert Villanova del Moral] Add whatsnew c96306d [Albert Villanova del Moral] Add sort argument to Index.join 047b513 [Albert Villanova del Moral] Address requested changes ec836bd [Albert Villanova del Moral] Fix Travis errors b977278 [Albert Villanova del Moral] Address requested changes 784fe75 [Albert Villanova del Moral] Fix error: line too long 1197b99 [Albert Villanova del Moral] Fix DataFrame column order when read from HDF file d9e29f8 [Albert Villanova del Moral] Create new DatetimeIndex from the Index.intersection result e7bcd28 [Albert Villanova del Moral] Fix typo in documentation a4ead99 [Albert Villanova del Moral] Fix typo c2a8dc3 [Albert Villanova del Moral] Implement tests c12bb3f [Albert Villanova del Moral] BUG: Fix index order for Index.intersection() --- doc/source/whatsnew/v0.20.0.txt | 57 +++++++++++ pandas/core/frame.py | 23 +++-- pandas/indexes/base.py | 27 ++++-- pandas/indexes/range.py | 27 ++---- pandas/io/pytables.py | 2 +- pandas/tests/frame/test_join.py | 140 ++++++++++++++++++++++++++++ pandas/tests/frame/test_misc_api.py | 86 ----------------- pandas/tests/indexes/test_base.py | 19 ++-- pandas/tests/tools/test_merge.py | 48 ++++++++++ pandas/tools/merge.py | 3 +- pandas/tseries/index.py | 14 +-- 11 files changed, 309 insertions(+), 137 deletions(-) create mode 100644 pandas/tests/frame/test_join.py diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 787857095044a..2e822729873ad 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -750,6 +750,62 @@ New Behavior: TypeError: Cannot compare 2014-01-01 00:00:00 of type to string column +.. _whatsnew_0200.api_breaking.index_order: + +Index.intersection and inner join now preserve the order of the left Index +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +`:meth:Index.intersection` now preserves the order of the calling ``Index`` (left) +instead of the other ``Index`` (right) (:issue:`15582`). This affects the inner +joins (`:meth:DataFrame.join` and `:func:merge`) and the ``.align`` methods. + +- ``Index.intersection`` + + .. ipython:: python + + left = pd.Index([2, 1, 0]) + left + right = pd.Index([1, 2, 3]) + right + + Previous Behavior: + + .. code-block:: ipython + + In [4]: left.intersection(right) + Out[4]: Int64Index([1, 2], dtype='int64') + + New Behavior: + + .. ipython:: python + + left.intersection(right) + +- ``DataFrame.join`` and ``pd.merge`` + + .. ipython:: python + + left = pd.DataFrame({'a': [20, 10, 0]}, index=[2, 1, 0]) + left + right = pd.DataFrame({'b': [100, 200, 300]}, index=[1, 2, 3]) + right + + Previous Behavior: + + .. code-block:: ipython + + In [4]: left.join(right, how='inner') + Out[4]: + a b + 1 10 100 + 2 20 200 + + New Behavior: + + .. ipython:: python + + left.join(right, how='inner') + .. _whatsnew_0200.api: @@ -984,6 +1040,7 @@ Bug Fixes - Bug in ``DataFrame.to_stata()`` and ``StataWriter`` which produces incorrectly formatted files to be produced for some locales (:issue:`13856`) - Bug in ``StataReader`` and ``StataWriter`` which allows invalid encodings (:issue:`15723`) +- Bug with ``sort=True`` in ``DataFrame.join`` and ``pd.merge`` when joining on indexes (:issue:`15582`) - Bug in ``pd.concat()`` in which concatting with an empty dataframe with ``join='inner'`` was being improperly handled (:issue:`15328`) - Bug in ``groupby.agg()`` incorrectly localizing timezone on ``datetime`` (:issue:`15426`, :issue:`10668`, :issue:`13046`) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 90baa1aff4857..03f93f1e53cc8 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -124,10 +124,14 @@ ----------%s right : DataFrame how : {'left', 'right', 'outer', 'inner'}, default 'inner' - * left: use only keys from left frame (SQL: left outer join) - * right: use only keys from right frame (SQL: right outer join) - * outer: use union of keys from both frames (SQL: full outer join) - * inner: use intersection of keys from both frames (SQL: inner join) + * left: use only keys from left frame, similar to a SQL left outer join; + preserve key order + * right: use only keys from right frame, similar to a SQL right outer join; + preserve key order + * outer: use union of keys from both frames, similar to a SQL full outer + join; sort keys lexicographically + * inner: use intersection of keys from both frames, similar to a SQL inner + join; preserve the order of the left keys on : label or list Field names to join on. Must be found in both DataFrames. If on is None and not merging on indexes, then it merges on the intersection of @@ -147,7 +151,8 @@ Use the index from the right DataFrame as the join key. Same caveats as left_index sort : boolean, default False - Sort the join keys lexicographically in the result DataFrame + Sort the join keys lexicographically in the result DataFrame. If False, + the order of the join keys depends on the join type (how keyword) suffixes : 2-length sequence (tuple, list, ...) Suffix to apply to overlapping column names in the left and right side, respectively @@ -4472,16 +4477,18 @@ def join(self, other, on=None, how='left', lsuffix='', rsuffix='', * left: use calling frame's index (or column if on is specified) * right: use other frame's index * outer: form union of calling frame's index (or column if on is - specified) with other frame's index + specified) with other frame's index, and sort it + lexicographically * inner: form intersection of calling frame's index (or column if - on is specified) with other frame's index + on is specified) with other frame's index, preserving the order + of the calling's one lsuffix : string Suffix to use from left frame's overlapping columns rsuffix : string Suffix to use from right frame's overlapping columns sort : boolean, default False Order result DataFrame lexicographically by the join key. If False, - preserves the index order of the calling (left) DataFrame + the order of the join key depends on the join type (how keyword) Notes ----- diff --git a/pandas/indexes/base.py b/pandas/indexes/base.py index 54f73a2466286..7f0de963e5c56 100644 --- a/pandas/indexes/base.py +++ b/pandas/indexes/base.py @@ -2089,8 +2089,8 @@ def intersection(self, other): """ Form the intersection of two Index objects. - This returns a new Index with elements common to the index and `other`. - Sortedness of the result is not guaranteed. + This returns a new Index with elements common to the index and `other`, + preserving the order of the calling index. Parameters ---------- @@ -2128,15 +2128,15 @@ def intersection(self, other): pass try: - indexer = Index(self._values).get_indexer(other._values) + indexer = Index(other._values).get_indexer(self._values) indexer = indexer.take((indexer != -1).nonzero()[0]) except: # duplicates - indexer = Index(self._values).get_indexer_non_unique( - other._values)[0].unique() + indexer = Index(other._values).get_indexer_non_unique( + self._values)[0].unique() indexer = indexer[indexer != -1] - taken = self.take(indexer) + taken = other.take(indexer) if self.name != other.name: taken.name = None return taken @@ -2831,8 +2831,7 @@ def _reindex_non_unique(self, target): new_index = self._shallow_copy_with_infer(new_labels, freq=None) return new_index, indexer, new_indexer - def join(self, other, how='left', level=None, return_indexers=False): - """ + _index_shared_docs['join'] = """ *this is an internal non-public method* Compute join_index and indexers to conform data @@ -2844,11 +2843,20 @@ def join(self, other, how='left', level=None, return_indexers=False): how : {'left', 'right', 'inner', 'outer'} level : int or level name, default None return_indexers : boolean, default False + sort : boolean, default False + Sort the join keys lexicographically in the result Index. If False, + the order of the join keys depends on the join type (how keyword) + + .. versionadded:: 0.20.0 Returns ------- join_index, (left_indexer, right_indexer) """ + + @Appender(_index_shared_docs['join']) + def join(self, other, how='left', level=None, return_indexers=False, + sort=False): from .multi import MultiIndex self_is_mi = isinstance(self, MultiIndex) other_is_mi = isinstance(other, MultiIndex) @@ -2929,6 +2937,9 @@ def join(self, other, how='left', level=None, return_indexers=False): elif how == 'outer': join_index = self.union(other) + if sort: + join_index = join_index.sort_values() + if return_indexers: if join_index is self: lindexer = None diff --git a/pandas/indexes/range.py b/pandas/indexes/range.py index 103a3ac2fd5f4..be68c97fb7890 100644 --- a/pandas/indexes/range.py +++ b/pandas/indexes/range.py @@ -431,29 +431,16 @@ def union(self, other): return self._int64index.union(other) - def join(self, other, how='left', level=None, return_indexers=False): - """ - *this is an internal non-public method* - - Compute join_index and indexers to conform data - structures to the new index. - - Parameters - ---------- - other : Index - how : {'left', 'right', 'inner', 'outer'} - level : int or level name, default None - return_indexers : boolean, default False - - Returns - ------- - join_index, (left_indexer, right_indexer) - """ + @Appender(_index_shared_docs['join']) + def join(self, other, how='left', level=None, return_indexers=False, + sort=False): if how == 'outer' and self is not other: # note: could return RangeIndex in more circumstances - return self._int64index.join(other, how, level, return_indexers) + return self._int64index.join(other, how, level, return_indexers, + sort) - return super(RangeIndex, self).join(other, how, level, return_indexers) + return super(RangeIndex, self).join(other, how, level, return_indexers, + sort) def __len__(self): """ diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index b3b253f151541..f75a4761e0948 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -4321,7 +4321,7 @@ def _reindex_axis(obj, axis, labels, other=None): labels = _ensure_index(labels.unique()) if other is not None: - labels = labels & _ensure_index(other.unique()) + labels = _ensure_index(other.unique()) & labels if not labels.equals(ax): slicer = [slice(None, None)] * obj.ndim slicer[axis] = labels diff --git a/pandas/tests/frame/test_join.py b/pandas/tests/frame/test_join.py new file mode 100644 index 0000000000000..f7a510023ca07 --- /dev/null +++ b/pandas/tests/frame/test_join.py @@ -0,0 +1,140 @@ +# -*- coding: utf-8 -*- + +import pytest +import numpy as np + +from pandas import DataFrame, Index +from pandas.tests.frame.common import TestData +import pandas.util.testing as tm + + +@pytest.fixture +def frame(): + return TestData().frame + + +@pytest.fixture +def left(): + return DataFrame({'a': [20, 10, 0]}, index=[2, 1, 0]) + + +@pytest.fixture +def right(): + return DataFrame({'b': [300, 100, 200]}, index=[3, 1, 2]) + + +@pytest.mark.parametrize( + "how, sort, expected", + [('inner', False, DataFrame({'a': [20, 10], + 'b': [200, 100]}, + index=[2, 1])), + ('inner', True, DataFrame({'a': [10, 20], + 'b': [100, 200]}, + index=[1, 2])), + ('left', False, DataFrame({'a': [20, 10, 0], + 'b': [200, 100, np.nan]}, + index=[2, 1, 0])), + ('left', True, DataFrame({'a': [0, 10, 20], + 'b': [np.nan, 100, 200]}, + index=[0, 1, 2])), + ('right', False, DataFrame({'a': [np.nan, 10, 20], + 'b': [300, 100, 200]}, + index=[3, 1, 2])), + ('right', True, DataFrame({'a': [10, 20, np.nan], + 'b': [100, 200, 300]}, + index=[1, 2, 3])), + ('outer', False, DataFrame({'a': [0, 10, 20, np.nan], + 'b': [np.nan, 100, 200, 300]}, + index=[0, 1, 2, 3])), + ('outer', True, DataFrame({'a': [0, 10, 20, np.nan], + 'b': [np.nan, 100, 200, 300]}, + index=[0, 1, 2, 3]))]) +def test_join(left, right, how, sort, expected): + + result = left.join(right, how=how, sort=sort) + tm.assert_frame_equal(result, expected) + + +def test_join_index(frame): + # left / right + + f = frame.loc[frame.index[:10], ['A', 'B']] + f2 = frame.loc[frame.index[5:], ['C', 'D']].iloc[::-1] + + joined = f.join(f2) + tm.assert_index_equal(f.index, joined.index) + expected_columns = Index(['A', 'B', 'C', 'D']) + tm.assert_index_equal(joined.columns, expected_columns) + + joined = f.join(f2, how='left') + tm.assert_index_equal(joined.index, f.index) + tm.assert_index_equal(joined.columns, expected_columns) + + joined = f.join(f2, how='right') + tm.assert_index_equal(joined.index, f2.index) + tm.assert_index_equal(joined.columns, expected_columns) + + # inner + + joined = f.join(f2, how='inner') + tm.assert_index_equal(joined.index, f.index[5:10]) + tm.assert_index_equal(joined.columns, expected_columns) + + # outer + + joined = f.join(f2, how='outer') + tm.assert_index_equal(joined.index, frame.index.sort_values()) + tm.assert_index_equal(joined.columns, expected_columns) + + tm.assertRaisesRegexp(ValueError, 'join method', f.join, f2, how='foo') + + # corner case - overlapping columns + for how in ('outer', 'left', 'inner'): + with tm.assertRaisesRegexp(ValueError, 'columns overlap but ' + 'no suffix'): + frame.join(frame, how=how) + + +def test_join_index_more(frame): + af = frame.loc[:, ['A', 'B']] + bf = frame.loc[::2, ['C', 'D']] + + expected = af.copy() + expected['C'] = frame['C'][::2] + expected['D'] = frame['D'][::2] + + result = af.join(bf) + tm.assert_frame_equal(result, expected) + + result = af.join(bf, how='right') + tm.assert_frame_equal(result, expected[::2]) + + result = bf.join(af, how='right') + tm.assert_frame_equal(result, expected.loc[:, result.columns]) + + +def test_join_index_series(frame): + df = frame.copy() + s = df.pop(frame.columns[-1]) + joined = df.join(s) + + # TODO should this check_names ? + tm.assert_frame_equal(joined, frame, check_names=False) + + s.name = None + tm.assertRaisesRegexp(ValueError, 'must have a name', df.join, s) + + +def test_join_overlap(frame): + df1 = frame.loc[:, ['A', 'B', 'C']] + df2 = frame.loc[:, ['B', 'C', 'D']] + + joined = df1.join(df2, lsuffix='_df1', rsuffix='_df2') + df1_suf = df1.loc[:, ['B', 'C']].add_suffix('_df1') + df2_suf = df2.loc[:, ['B', 'C']].add_suffix('_df2') + + no_overlap = frame.loc[:, ['A', 'D']] + expected = df1_suf.join(df2_suf).join(no_overlap) + + # column order not necessarily sorted + tm.assert_frame_equal(joined, expected.loc[:, joined.columns]) diff --git a/pandas/tests/frame/test_misc_api.py b/pandas/tests/frame/test_misc_api.py index 321d46739b24c..42427df90401d 100644 --- a/pandas/tests/frame/test_misc_api.py +++ b/pandas/tests/frame/test_misc_api.py @@ -57,92 +57,6 @@ def test_get_value(self): expected = self.frame[col][idx] tm.assert_almost_equal(result, expected) - def test_join_index(self): - # left / right - - f = self.frame.reindex(columns=['A', 'B'])[:10] - f2 = self.frame.reindex(columns=['C', 'D']) - - joined = f.join(f2) - self.assert_index_equal(f.index, joined.index) - self.assertEqual(len(joined.columns), 4) - - joined = f.join(f2, how='left') - self.assert_index_equal(joined.index, f.index) - self.assertEqual(len(joined.columns), 4) - - joined = f.join(f2, how='right') - self.assert_index_equal(joined.index, f2.index) - self.assertEqual(len(joined.columns), 4) - - # inner - - f = self.frame.reindex(columns=['A', 'B'])[:10] - f2 = self.frame.reindex(columns=['C', 'D']) - - joined = f.join(f2, how='inner') - self.assert_index_equal(joined.index, f.index.intersection(f2.index)) - self.assertEqual(len(joined.columns), 4) - - # outer - - f = self.frame.reindex(columns=['A', 'B'])[:10] - f2 = self.frame.reindex(columns=['C', 'D']) - - joined = f.join(f2, how='outer') - self.assertTrue(tm.equalContents(self.frame.index, joined.index)) - self.assertEqual(len(joined.columns), 4) - - assertRaisesRegexp(ValueError, 'join method', f.join, f2, how='foo') - - # corner case - overlapping columns - for how in ('outer', 'left', 'inner'): - with assertRaisesRegexp(ValueError, 'columns overlap but ' - 'no suffix'): - self.frame.join(self.frame, how=how) - - def test_join_index_more(self): - af = self.frame.loc[:, ['A', 'B']] - bf = self.frame.loc[::2, ['C', 'D']] - - expected = af.copy() - expected['C'] = self.frame['C'][::2] - expected['D'] = self.frame['D'][::2] - - result = af.join(bf) - assert_frame_equal(result, expected) - - result = af.join(bf, how='right') - assert_frame_equal(result, expected[::2]) - - result = bf.join(af, how='right') - assert_frame_equal(result, expected.loc[:, result.columns]) - - def test_join_index_series(self): - df = self.frame.copy() - s = df.pop(self.frame.columns[-1]) - joined = df.join(s) - - # TODO should this check_names ? - assert_frame_equal(joined, self.frame, check_names=False) - - s.name = None - assertRaisesRegexp(ValueError, 'must have a name', df.join, s) - - def test_join_overlap(self): - df1 = self.frame.loc[:, ['A', 'B', 'C']] - df2 = self.frame.loc[:, ['B', 'C', 'D']] - - joined = df1.join(df2, lsuffix='_df1', rsuffix='_df2') - df1_suf = df1.loc[:, ['B', 'C']].add_suffix('_df1') - df2_suf = df2.loc[:, ['B', 'C']].add_suffix('_df2') - - no_overlap = self.frame.loc[:, ['A', 'D']] - expected = df1_suf.join(df2_suf).join(no_overlap) - - # column order not necessarily sorted - assert_frame_equal(joined, expected.loc[:, joined.columns]) - def test_add_prefix_suffix(self): with_prefix = self.frame.add_prefix('foo#') expected = pd.Index(['foo#%s' % c for c in self.frame.columns]) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index c4dc10d8174cc..a8197b070b032 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -626,14 +626,14 @@ def test_intersection(self): # non monotonic idx1 = Index([5, 3, 2, 4, 1], name='idx') idx2 = Index([4, 7, 6, 5, 3], name='idx') - result2 = idx1.intersection(idx2) - self.assertTrue(tm.equalContents(result2, expected2)) - self.assertEqual(result2.name, expected2.name) + expected = Index([5, 3, 4], name='idx') + result = idx1.intersection(idx2) + self.assert_index_equal(result, expected) - idx3 = Index([4, 7, 6, 5, 3], name='other') - result3 = idx1.intersection(idx3) - self.assertTrue(tm.equalContents(result3, expected3)) - self.assertEqual(result3.name, expected3.name) + idx2 = Index([4, 7, 6, 5, 3], name='other') + expected = Index([5, 3, 4], name=None) + result = idx1.intersection(idx2) + self.assert_index_equal(result, expected) # non-monotonic non-unique idx1 = Index(['A', 'B', 'A', 'C']) @@ -642,6 +642,11 @@ def test_intersection(self): result = idx1.intersection(idx2) self.assert_index_equal(result, expected) + idx2 = Index(['B', 'D', 'A']) + expected = Index(['A', 'B', 'A'], dtype='object') + result = idx1.intersection(idx2) + self.assert_index_equal(result, expected) + # preserve names first = self.strIndex[5:20] second = self.strIndex[:10] diff --git a/pandas/tests/tools/test_merge.py b/pandas/tests/tools/test_merge.py index ff27500355998..8011bc4a1cfc2 100644 --- a/pandas/tests/tools/test_merge.py +++ b/pandas/tests/tools/test_merge.py @@ -1355,3 +1355,51 @@ def test_dtype_on_merged_different(self, change, how, left, right): np.dtype('int64')], index=['X', 'Y', 'Z']) assert_series_equal(result, expected) + + +@pytest.fixture +def left_df(): + return DataFrame({'a': [20, 10, 0]}, index=[2, 1, 0]) + + +@pytest.fixture +def right_df(): + return DataFrame({'b': [300, 100, 200]}, index=[3, 1, 2]) + + +class TestMergeOnIndexes(object): + + @pytest.mark.parametrize( + "how, sort, expected", + [('inner', False, DataFrame({'a': [20, 10], + 'b': [200, 100]}, + index=[2, 1])), + ('inner', True, DataFrame({'a': [10, 20], + 'b': [100, 200]}, + index=[1, 2])), + ('left', False, DataFrame({'a': [20, 10, 0], + 'b': [200, 100, np.nan]}, + index=[2, 1, 0])), + ('left', True, DataFrame({'a': [0, 10, 20], + 'b': [np.nan, 100, 200]}, + index=[0, 1, 2])), + ('right', False, DataFrame({'a': [np.nan, 10, 20], + 'b': [300, 100, 200]}, + index=[3, 1, 2])), + ('right', True, DataFrame({'a': [10, 20, np.nan], + 'b': [100, 200, 300]}, + index=[1, 2, 3])), + ('outer', False, DataFrame({'a': [0, 10, 20, np.nan], + 'b': [np.nan, 100, 200, 300]}, + index=[0, 1, 2, 3])), + ('outer', True, DataFrame({'a': [0, 10, 20, np.nan], + 'b': [np.nan, 100, 200, 300]}, + index=[0, 1, 2, 3]))]) + def test_merge_on_indexes(self, left_df, right_df, how, sort, expected): + + result = pd.merge(left_df, right_df, + left_index=True, + right_index=True, + how=how, + sort=sort) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tools/merge.py b/pandas/tools/merge.py index 60d523a8ea539..7de2549cadfc7 100644 --- a/pandas/tools/merge.py +++ b/pandas/tools/merge.py @@ -733,7 +733,8 @@ def _get_join_info(self): if self.left_index and self.right_index and self.how != 'asof': join_index, left_indexer, right_indexer = \ - left_ax.join(right_ax, how=self.how, return_indexers=True) + left_ax.join(right_ax, how=self.how, return_indexers=True, + sort=self.sort) elif self.right_index and self.how == 'left': join_index, left_indexer, right_indexer = \ _left_join_on_index(left_ax, right_ax, self.left_join_keys, diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py index 1992e177556cc..9123131a6dccf 100644 --- a/pandas/tseries/index.py +++ b/pandas/tseries/index.py @@ -1048,7 +1048,8 @@ def union_many(self, others): this.offset = to_offset(this.inferred_freq) return this - def join(self, other, how='left', level=None, return_indexers=False): + def join(self, other, how='left', level=None, return_indexers=False, + sort=False): """ See Index.join """ @@ -1062,7 +1063,7 @@ def join(self, other, how='left', level=None, return_indexers=False): this, other = self._maybe_utc_convert(other) return Index.join(this, other, how=how, level=level, - return_indexers=return_indexers) + return_indexers=return_indexers, sort=sort) def _maybe_utc_convert(self, other): this = self @@ -1214,9 +1215,10 @@ def intersection(self, other): not other.offset.isAnchored() or (not self.is_monotonic or not other.is_monotonic)): result = Index.intersection(self, other) - if isinstance(result, DatetimeIndex): - if result.freq is None: - result.offset = to_offset(result.inferred_freq) + result = self._shallow_copy(result._values, name=result.name, + tz=result.tz, freq=None) + if result.freq is None: + result.offset = to_offset(result.inferred_freq) return result if len(self) == 0: @@ -1539,7 +1541,7 @@ def _get_freq(self): def _set_freq(self, value): self.offset = value freq = property(fget=_get_freq, fset=_set_freq, - doc="get/set the frequncy of the Index") + doc="get/set the frequency of the Index") year = _field_accessor('year', 'Y', "The year of the datetime") month = _field_accessor('month', 'M', From f3e3cfeddac934fdb60ef9f54ea4e06af687014f Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 29 Mar 2017 12:21:02 -0400 Subject: [PATCH 298/933] CI: upload coverage with flags --- .travis.yml | 5 +---- ci/install_travis.sh | 2 +- ci/upload_coverage.sh | 11 +++++++++++ 3 files changed, 13 insertions(+), 5 deletions(-) create mode 100755 ci/upload_coverage.sh diff --git a/.travis.yml b/.travis.yml index 1053f8925ebd7..d864b755541de 100644 --- a/.travis.yml +++ b/.travis.yml @@ -108,7 +108,6 @@ install: - echo "install done" before_script: - - source activate pandas && pip install codecov - ci/install_db_travis.sh script: @@ -120,9 +119,7 @@ script: - echo "script done" after_success: - - if [ "$COVERAGE" ]; then - source activate pandas && codecov --file /tmp/cov-single.xml /tmp/cov-multiple.xml; - fi + - ci/upload_coverage.sh after_script: - echo "after_script start" diff --git a/ci/install_travis.sh b/ci/install_travis.sh index f71df979c9df0..10556ccffa55d 100755 --- a/ci/install_travis.sh +++ b/ci/install_travis.sh @@ -115,7 +115,7 @@ if [ "$LINT" ]; then fi if [ "$COVERAGE" ]; then - pip install coverage pytest-cov + pip install coverage pytest-cov codecov fi echo diff --git a/ci/upload_coverage.sh b/ci/upload_coverage.sh new file mode 100755 index 0000000000000..0da8e46a15de1 --- /dev/null +++ b/ci/upload_coverage.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +if [ -z "$COVERAGE" ]; then + echo "no upload of coverage is needed" + exit 0 +fi + +source activate pandas + +codecov --file -c -F single /tmp/cov-single.xml +codecov --file -c -F multiple /tmp/cov-multiple.xml From abf16978cfcb1188ca856819204e6bc7500af179 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 29 Mar 2017 13:02:46 -0400 Subject: [PATCH 299/933] CI: reconfig coverage uploading --- ci/install_travis.sh | 2 +- ci/upload_coverage.sh | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/ci/install_travis.sh b/ci/install_travis.sh index 10556ccffa55d..f71df979c9df0 100755 --- a/ci/install_travis.sh +++ b/ci/install_travis.sh @@ -115,7 +115,7 @@ if [ "$LINT" ]; then fi if [ "$COVERAGE" ]; then - pip install coverage pytest-cov codecov + pip install coverage pytest-cov fi echo diff --git a/ci/upload_coverage.sh b/ci/upload_coverage.sh index 0da8e46a15de1..a7ef2fa908079 100755 --- a/ci/upload_coverage.sh +++ b/ci/upload_coverage.sh @@ -1,11 +1,12 @@ #!/bin/bash if [ -z "$COVERAGE" ]; then - echo "no upload of coverage is needed" + echo "coverage is not selected for this build" exit 0 fi source activate pandas -codecov --file -c -F single /tmp/cov-single.xml -codecov --file -c -F multiple /tmp/cov-multiple.xml +echo "uploading coverage" +bash <(curl -s https://codecov.io/bash) -Z -c -F single -f /tmp/cov-single.xml +bash <(curl -s https://codecov.io/bash) -Z -c -F multiple -f /tmp/cov-multiple.xml From ecaeea17f2cba89f41630527c8bf8801447fa09e Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 29 Mar 2017 18:15:43 -0400 Subject: [PATCH 300/933] DOC: update io.rst Author: Jeff Reback Closes #15840 from jreback/io and squashes the following commits: b4ee5dd [Jeff Reback] DOC: update io.rst --- doc/source/io.rst | 50 +++++++++++++++++++---------------------------- 1 file changed, 20 insertions(+), 30 deletions(-) diff --git a/doc/source/io.rst b/doc/source/io.rst index e72224c6fa1fe..90167e7c6183f 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -29,36 +29,26 @@ IO Tools (Text, CSV, HDF5, ...) =============================== The pandas I/O API is a set of top level ``reader`` functions accessed like ``pd.read_csv()`` that generally return a ``pandas`` -object. - - * :ref:`read_csv` - * :ref:`read_excel` - * :ref:`read_hdf` - * :ref:`read_feather` - * :ref:`read_sql` - * :ref:`read_json` - * :ref:`read_msgpack` - * :ref:`read_html` - * :ref:`read_gbq` - * :ref:`read_stata` - * :ref:`read_sas` - * :ref:`read_clipboard` - * :ref:`read_pickle` - -The corresponding ``writer`` functions are object methods that are accessed like ``df.to_csv()`` - - * :ref:`to_csv` - * :ref:`to_excel` - * :ref:`to_hdf` - * :ref:`to_feather` - * :ref:`to_sql` - * :ref:`to_json` - * :ref:`to_msgpack` - * :ref:`to_html` - * :ref:`to_gbq` - * :ref:`to_stata` - * :ref:`to_clipboard` - * :ref:`to_pickle` +object. The corresponding ``writer`` functions are object methods that are accessed like ``df.to_csv()`` + +.. csv-table:: + :header: "Format Type", "Data Description", "Reader", "Writer" + :widths: 30, 100, 60, 60 + :delim: ; + + text;`CSV `__;:ref:`read_csv`;:ref:`to_csv` + text;`JSON `__;:ref:`read_json`;:ref:`to_json` + text;`HTML `__;:ref:`read_html`;:ref:`to_html` + text; Local clipboard;:ref:`read_clipboard`;:ref:`to_clipboard` + binary;`MS Excel `__;:ref:`read_excel`;:ref:`to_excel` + binary;`HDF5 Format `__;:ref:`read_hdf`;:ref:`to_hdf` + binary;`Feather Format `__;:ref:`read_feather`;:ref:`to_feather` + binary;`Msgpack `__;:ref:`read_msgpack`;:ref:`to_msgpack` + binary;`Stata `__;:ref:`read_stata`;:ref:`to_stata` + binary;`SAS `__;:ref:`read_sas`; + binary;`Python Pickle Format `__;:ref:`read_pickle`;:ref:`to_pickle` + SQL;`SQL `__;:ref:`read_sql`;:ref:`to_sql` + SQL;`Google Big Query `__;:ref:`read_gbq`;:ref:`to_gbq` :ref:`Here ` is an informal performance comparison for some of these IO methods. From 0ab081345eb191937fd4152eba48b8c9692b02bf Mon Sep 17 00:00:00 2001 From: Brian Date: Wed, 29 Mar 2017 19:24:40 -0400 Subject: [PATCH 301/933] ENH: read_html() handles tables with multiple header rows #13434 closes #13434 Author: Brian Author: S. Brian Huey Closes #15242 from brianhuey/thead-improvement and squashes the following commits: fc1c80e [S. Brian Huey] Merge branch 'master' into thead-improvement b54aa0c [Brian] removed duplicate test case 6ae2860 [Brian] updated docstring and io.rst 41fe8cd [Brian] review changes 873ea58 [Brian] switched from range to lrange cd70225 [Brian] ENH:read_html() handles tables with multiple header rows #13434 --- doc/source/io.rst | 7 ++++--- doc/source/whatsnew/v0.20.0.txt | 13 +++++++------ pandas/io/html.py | 31 ++++++++++++++++++++----------- pandas/tests/io/test_html.py | 12 ++++++++++++ 4 files changed, 43 insertions(+), 20 deletions(-) diff --git a/doc/source/io.rst b/doc/source/io.rst index 90167e7c6183f..5cec27c329a7f 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -2222,9 +2222,10 @@ Read a URL and match a table that contains specific text match = 'Metcalf Bank' df_list = pd.read_html(url, match=match) -Specify a header row (by default ```` are used to form the column index, if multiple rows are contained within +```` then a multiindex is created); if specified, the header row is taken +from the data minus the parsed header elements (``', result) + assert '' not in result def test_to_html_multiindex(self): columns = MultiIndex.from_tuples(list(zip(np.arange(2).repeat(2), @@ -1660,7 +1660,7 @@ def test_to_html_index(self): ' \n' ' \n' '
Conda - conda downloads + conda default downloads + +
Conda-forge + + conda-forge downloads
- - appveyor build status + + appveyor build status
`` elements are used to form the column -index); if specified, the header row is taken from the data minus the parsed -header elements (```` elements). +Specify a header row (by default ```` or ```` elements located within a +``
`` elements). .. code-block:: python diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 2e822729873ad..65635edb82163 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -283,7 +283,7 @@ Other Enhancements - ``DataFrame`` has gained a ``nunique()`` method to count the distinct values over an axis (:issue:`14336`). - ``DataFrame.groupby()`` has gained a ``.nunique()`` method to count the distinct values for all columns within each group (:issue:`14336`, :issue:`15197`). -- ``pd.read_excel`` now preserves sheet order when using ``sheetname=None`` (:issue:`9930`) +- ``pd.read_excel()`` now preserves sheet order when using ``sheetname=None`` (:issue:`9930`) - Multiple offset aliases with decimal points are now supported (e.g. '0.5min' is parsed as '30s') (:issue:`8419`) - ``.isnull()`` and ``.notnull()`` have been added to ``Index`` object to make them more consistent with the ``Series`` API (:issue:`15300`) @@ -294,8 +294,8 @@ Other Enhancements - ``pd.cut`` and ``pd.qcut`` now support datetime64 and timedelta64 dtypes (:issue:`14714`, :issue:`14798`) - ``pd.qcut`` has gained the ``duplicates='raise'|'drop'`` option to control whether to raise on duplicated edges (:issue:`7751`) - ``Series`` provides a ``to_excel`` method to output Excel files (:issue:`8825`) -- The ``usecols`` argument in ``pd.read_csv`` now accepts a callable function as a value (:issue:`14154`) -- The ``skiprows`` argument in ``pd.read_csv`` now accepts a callable function as a value (:issue:`10882`) +- The ``usecols`` argument in ``pd.read_csv()`` now accepts a callable function as a value (:issue:`14154`) +- The ``skiprows`` argument in ``pd.read_csv()`` now accepts a callable function as a value (:issue:`10882`) - The ``nrows`` and ``chunksize`` arguments in ``pd.read_csv()`` are supported if both are passed (:issue:`6774`, :issue:`15755`) - ``pd.DataFrame.plot`` now prints a title above each subplot if ``suplots=True`` and ``title`` is a list of strings (:issue:`14753`) - ``pd.Series.interpolate`` now supports timedelta as an index type with ``method='time'`` (:issue:`6424`) @@ -309,6 +309,7 @@ Other Enhancements - ``pandas.tools.hashing`` has gained a ``hash_tuples`` routine, and ``hash_pandas_object`` has gained the ability to hash a ``MultiIndex`` (:issue:`15224`) - ``Series/DataFrame.squeeze()`` have gained the ``axis`` parameter. (:issue:`15339`) - ``DataFrame.to_excel()`` has a new ``freeze_panes`` parameter to turn on Freeze Panes when exporting to Excel (:issue:`15160`) +- ``pd.read_html()`` will parse multiple header rows, creating a multiindex header. (:issue:`13434`). - HTML table output skips ``colspan`` or ``rowspan`` attribute if equal to 1. (:issue:`15403`) - ``pd.TimedeltaIndex`` now has a custom datetick formatter specifically designed for nanosecond level precision (:issue:`8711`) @@ -813,7 +814,7 @@ Other API Changes ^^^^^^^^^^^^^^^^^ - ``numexpr`` version is now required to be >= 2.4.6 and it will not be used at all if this requisite is not fulfilled (:issue:`15213`). -- ``CParserError`` has been renamed to ``ParserError`` in ``pd.read_csv`` and will be removed in the future (:issue:`12665`) +- ``CParserError`` has been renamed to ``ParserError`` in ``pd.read_csv()`` and will be removed in the future (:issue:`12665`) - ``SparseArray.cumsum()`` and ``SparseSeries.cumsum()`` will now always return ``SparseArray`` and ``SparseSeries`` respectively (:issue:`12855`) - ``DataFrame.applymap()`` with an empty ``DataFrame`` will return a copy of the empty ``DataFrame`` instead of a ``Series`` (:issue:`8222`) - ``.loc`` has compat with ``.ix`` for accepting iterators, and NamedTuples (:issue:`15120`) @@ -926,7 +927,7 @@ Bug Fixes - Bug in ``pd.to_numeric()`` in which float and unsigned integer elements were being improperly casted (:issue:`14941`, :issue:`15005`) - Cleaned up ``PeriodIndex`` constructor, including raising on floats more consistently (:issue:`13277`) - Bug in ``pd.read_csv()`` in which the ``dialect`` parameter was not being verified before processing (:issue:`14898`) -- Bug in ``pd.read_fwf`` where the skiprows parameter was not being respected during column width inference (:issue:`11256`) +- Bug in ``pd.read_fwf()`` where the skiprows parameter was not being respected during column width inference (:issue:`11256`) - Bug in ``pd.read_csv()`` in which missing data was being improperly handled with ``usecols`` (:issue:`6710`) - Bug in ``pd.read_csv()`` in which a file containing a row with many columns followed by rows with fewer columns would cause a crash (:issue:`14125`) - Added checks in ``pd.read_csv()`` ensuring that values for ``nrows`` and ``chunksize`` are valid (:issue:`15767`) @@ -1054,4 +1055,4 @@ Bug Fixes - Bug in ``DataFrame.boxplot`` where ``fontsize`` was not applied to the tick labels on both axes (:issue:`15108`) - Bug in ``pd.melt()`` where passing a tuple value for ``value_vars`` caused a ``TypeError`` (:issue:`15348`) - Bug in ``.eval()`` which caused multiline evals to fail with local variables not on the first line (:issue:`15342`) -- Bug in ``pd.read_msgpack`` which did not allow to load dataframe with an index of type ``CategoricalIndex`` (:issue:`15487`) +- Bug in ``pd.read_msgpack()`` which did not allow to load dataframe with an index of type ``CategoricalIndex`` (:issue:`15487`) diff --git a/pandas/io/html.py b/pandas/io/html.py index 53595b94eb94d..8a3709dba2176 100644 --- a/pandas/io/html.py +++ b/pandas/io/html.py @@ -355,9 +355,12 @@ def _parse_raw_thead(self, table): thead = self._parse_thead(table) res = [] if thead: - res = lmap(self._text_getter, self._parse_th(thead[0])) - return np.atleast_1d( - np.array(res).squeeze()) if res and len(res) == 1 else res + trs = self._parse_tr(thead[0]) + for tr in trs: + cols = lmap(self._text_getter, self._parse_td(tr)) + if any([col != '' for col in cols]): + res.append(cols) + return res def _parse_raw_tfoot(self, table): tfoot = self._parse_tfoot(table) @@ -591,9 +594,17 @@ def _parse_tfoot(self, table): return table.xpath('.//tfoot') def _parse_raw_thead(self, table): - expr = './/thead//th' - return [_remove_whitespace(x.text_content()) for x in - table.xpath(expr)] + expr = './/thead' + thead = table.xpath(expr) + res = [] + if thead: + trs = self._parse_tr(thead[0]) + for tr in trs: + cols = [_remove_whitespace(x.text_content()) for x in + self._parse_td(tr)] + if any([col != '' for col in cols]): + res.append(cols) + return res def _parse_raw_tfoot(self, table): expr = './/tfoot//th|//tfoot//td' @@ -615,19 +626,17 @@ def _data_to_frame(**kwargs): head, body, foot = kwargs.pop('data') header = kwargs.pop('header') kwargs['skiprows'] = _get_skiprows(kwargs['skiprows']) - if head: - body = [head] + body - + rows = lrange(len(head)) + body = head + body if header is None: # special case when a table has elements - header = 0 + header = 0 if rows == [0] else rows if foot: body += [foot] # fill out elements of body that are "ragged" _expand_elements(body) - tp = TextParser(body, header=header, **kwargs) df = tp.read() return df diff --git a/pandas/tests/io/test_html.py b/pandas/tests/io/test_html.py index c1a2a4545a6f9..4aa85c0f63a68 100644 --- a/pandas/tests/io/test_html.py +++ b/pandas/tests/io/test_html.py @@ -760,6 +760,18 @@ def test_keep_default_na(self): html_df = read_html(html_data, keep_default_na=True)[0] tm.assert_frame_equal(expected_df, html_df) + def test_multiple_header_rows(self): + # Issue #13434 + expected_df = DataFrame(data=[("Hillary", 68, "D"), + ("Bernie", 74, "D"), + ("Donald", 69, "R")]) + expected_df.columns = [["Unnamed: 0_level_0", "Age", "Party"], + ["Name", "Unnamed: 1_level_1", + "Unnamed: 2_level_1"]] + html = expected_df.to_html(index=False) + html_df = read_html(html, )[0] + tm.assert_frame_equal(expected_df, html_df) + def _lang_enc(filename): return os.path.splitext(os.path.basename(filename))[0].split('_') From de589c23e0ae79d9cae59674259dc2707513795f Mon Sep 17 00:00:00 2001 From: Robin Date: Wed, 29 Mar 2017 19:45:05 -0400 Subject: [PATCH 302/933] BUG: Return mode even if single value (#15714) Author: Robin This patch had conflicts when merged, resolved by Committer: Jeff Reback Closes #15744 from buyology/issue-15714-fix-mode and squashes the following commits: 8c08cd5 [Robin] Added multi-test and whatsnew note 5f36395 [Robin] Fixed flake issues, removed duplicate test, inserted GH issue number reference 5f829e1 [Robin] Merge conflict 0e2dec0 [Robin] Fixed tests 26db131 [Robin] Return mode even if single value (#15714) 44dbbb2 [Robin] Return mode even if single value (#15714) --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/_libs/hashtable_func_helper.pxi.in | 2 +- pandas/core/categorical.py | 3 +- pandas/core/frame.py | 5 ++-- pandas/core/series.py | 3 +- pandas/tests/frame/test_analytics.py | 35 +++++++++++------------ pandas/tests/series/test_analytics.py | 13 +++++---- pandas/tests/test_algos.py | 33 ++++++++++++++++----- pandas/tests/test_categorical.py | 6 ++-- 9 files changed, 58 insertions(+), 43 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 65635edb82163..023d41763baee 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -1050,6 +1050,7 @@ Bug Fixes - Bug in ``.read_csv()`` with ``parse_dates`` when multiline headers are specified (:issue:`15376`) - Bug in ``groupby.transform()`` that would coerce the resultant dtypes back to the original (:issue:`10972`, :issue:`11444`) +- Bug in ``.mode()`` where ``mode`` was not returned if was only a single value (:issue:`15714`) - Bug in ``DataFrame.hist`` where ``plt.tight_layout`` caused an ``AttributeError`` (use ``matplotlib >= 2.0.1``) (:issue:`9351`) - Bug in ``DataFrame.boxplot`` where ``fontsize`` was not applied to the tick labels on both axes (:issue:`15108`) diff --git a/pandas/_libs/hashtable_func_helper.pxi.in b/pandas/_libs/hashtable_func_helper.pxi.in index 0608af8f8504b..c97639481f12c 100644 --- a/pandas/_libs/hashtable_func_helper.pxi.in +++ b/pandas/_libs/hashtable_func_helper.pxi.in @@ -309,7 +309,7 @@ def mode_{{dtype}}(ndarray[{{ctype}}] values): def mode_{{dtype}}({{ctype}}[:] values): {{endif}} cdef: - int count, max_count = 2 + int count, max_count = 1 int j = -1 # so you can do += Py_ssize_t k kh_{{table_type}}_t *table diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index 632c24c33feb7..0fcf8664e755d 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -1868,8 +1868,7 @@ def mode(self): """ Returns the mode(s) of the Categorical. - Empty if nothing occurs at least 2 times. Always returns `Categorical` - even if only one value. + Always returns `Categorical` even if only one value. Returns ------- diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 03f93f1e53cc8..ffae22447cc65 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5127,9 +5127,8 @@ def _get_agg_axis(self, axis_num): def mode(self, axis=0, numeric_only=False): """ - Gets the mode(s) of each element along the axis selected. Empty if - nothing has 2+ occurrences. Adds a row for each mode per label, fills - in gaps with nan. + Gets the mode(s) of each element along the axis selected. Adds a row + for each mode per label, fills in gaps with nan. Note that there could be multiple values returned for the selected axis (when more than one item share the maximum frequency), which is diff --git a/pandas/core/series.py b/pandas/core/series.py index 0913592e055cd..bcc1ed272b081 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1192,8 +1192,7 @@ def count(self, level=None): def mode(self): """Return the mode(s) of the dataset. - Empty if nothing occurs at least 2 times. Always returns Series even - if only one value is returned. + Always returns Series even if only one value is returned. Returns ------- diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index 735d3786e6a54..aa15e9fbab4cc 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -789,18 +789,23 @@ def test_mode(self): "E": [8, 8, 1, 1, 3, 3]}) tm.assert_frame_equal(df[["A"]].mode(), pd.DataFrame({"A": [12]})) - expected = pd.Series([], dtype='int64', name='D').to_frame() + expected = pd.Series([0, 1, 2, 3, 4, 5], dtype='int64', name='D').\ + to_frame() tm.assert_frame_equal(df[["D"]].mode(), expected) expected = pd.Series([1, 3, 8], dtype='int64', name='E').to_frame() tm.assert_frame_equal(df[["E"]].mode(), expected) tm.assert_frame_equal(df[["A", "B"]].mode(), pd.DataFrame({"A": [12], "B": [10.]})) tm.assert_frame_equal(df.mode(), - pd.DataFrame({"A": [12, np.nan, np.nan], - "B": [10, np.nan, np.nan], - "C": [8, 9, np.nan], - "D": [np.nan, np.nan, np.nan], - "E": [1, 3, 8]})) + pd.DataFrame({"A": [12, np.nan, np.nan, np.nan, + np.nan, np.nan], + "B": [10, np.nan, np.nan, np.nan, + np.nan, np.nan], + "C": [8, 9, np.nan, np.nan, np.nan, + np.nan], + "D": [0, 1, 2, 3, 4, 5], + "E": [1, 3, 8, np.nan, np.nan, + np.nan]})) # outputs in sorted order df["C"] = list(reversed(df["C"])) @@ -817,20 +822,12 @@ def test_mode(self): df = pd.DataFrame({"A": np.arange(6, dtype='int64'), "B": pd.date_range('2011', periods=6), "C": list('abcdef')}) - exp = pd.DataFrame({"A": pd.Series([], dtype=df["A"].dtype), - "B": pd.Series([], dtype=df["B"].dtype), - "C": pd.Series([], dtype=df["C"].dtype)}) - tm.assert_frame_equal(df.mode(), exp) - - # and also when not empty - df.loc[1, "A"] = 0 - df.loc[4, "B"] = df.loc[3, "B"] - df.loc[5, "C"] = 'e' - exp = pd.DataFrame({"A": pd.Series([0], dtype=df["A"].dtype), - "B": pd.Series([df.loc[3, "B"]], + exp = pd.DataFrame({"A": pd.Series(np.arange(6, dtype='int64'), + dtype=df["A"].dtype), + "B": pd.Series(pd.date_range('2011', periods=6), dtype=df["B"].dtype), - "C": pd.Series(['e'], dtype=df["C"].dtype)}) - + "C": pd.Series(list('abcdef'), + dtype=df["C"].dtype)}) tm.assert_frame_equal(df.mode(), exp) def test_operators_timedelta64(self): diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index dc71fafb1094f..b747a680c17dd 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -130,10 +130,10 @@ def test_mode(self): exp = Series([], dtype=np.float64) tm.assert_series_equal(Series([]).mode(), exp) - exp = Series([], dtype=np.int64) + exp = Series([1], dtype=np.int64) tm.assert_series_equal(Series([1]).mode(), exp) - exp = Series([], dtype=np.object) + exp = Series(['a', 'b', 'c'], dtype=np.object) tm.assert_series_equal(Series(['a', 'b', 'c']).mode(), exp) # Test numerical data types. @@ -169,7 +169,8 @@ def test_mode(self): tm.assert_series_equal(s.mode(), exp) # Test datetime types. - exp = Series([], dtype="M8[ns]") + exp = Series(['1900-05-03', '2011-01-03', + '2013-01-02'], dtype='M8[ns]') s = Series(['2011-01-03', '2013-01-02', '1900-05-03'], dtype='M8[ns]') tm.assert_series_equal(s.mode(), exp) @@ -180,7 +181,7 @@ def test_mode(self): tm.assert_series_equal(s.mode(), exp) # gh-5986: Test timedelta types. - exp = Series([], dtype='timedelta64[ns]') + exp = Series(['-1 days', '0 days', '1 days'], dtype='timedelta64[ns]') s = Series(['1 days', '-1 days', '0 days'], dtype='timedelta64[ns]') tm.assert_series_equal(s.mode(), exp) @@ -200,13 +201,13 @@ def test_mode(self): s = Series([1, 2**63, 2**63], dtype=np.uint64) tm.assert_series_equal(s.mode(), exp) - exp = Series([], dtype=np.uint64) + exp = Series([1, 2**63], dtype=np.uint64) s = Series([1, 2**63], dtype=np.uint64) tm.assert_series_equal(s.mode(), exp) # Test category dtype. c = Categorical([1, 2]) - exp = Categorical([], categories=[1, 2]) + exp = Categorical([1, 2], categories=[1, 2]) exp = Series(exp, dtype='category') tm.assert_series_equal(Series(c).mode(), exp) diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index 5d69746034346..ac3a42c3cf122 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -1261,10 +1261,27 @@ def test_no_mode(self): exp = Series([], dtype=np.float64) tm.assert_series_equal(algos.mode([]), exp) - exp = Series([], dtype=np.int) + # GH 15714 + def test_mode_single(self): + exp_single = [1] + data_single = [1] + + exp_multi = [1] + data_multi = [1, 1] + + for dt in np.typecodes['AllInteger'] + np.typecodes['Float']: + s = Series(data_single, dtype=dt) + exp = Series(exp_single, dtype=dt) + tm.assert_series_equal(algos.mode(s), exp) + + s = Series(data_multi, dtype=dt) + exp = Series(exp_multi, dtype=dt) + tm.assert_series_equal(algos.mode(s), exp) + + exp = Series([1], dtype=np.int) tm.assert_series_equal(algos.mode([1]), exp) - exp = Series([], dtype=np.object) + exp = Series(['a', 'b', 'c'], dtype=np.object) tm.assert_series_equal(algos.mode(['a', 'b', 'c']), exp) def test_number_mode(self): @@ -1300,7 +1317,8 @@ def test_strobj_mode(self): tm.assert_series_equal(algos.mode(s), exp) def test_datelike_mode(self): - exp = Series([], dtype="M8[ns]") + exp = Series(['1900-05-03', '2011-01-03', + '2013-01-02'], dtype="M8[ns]") s = Series(['2011-01-03', '2013-01-02', '1900-05-03'], dtype='M8[ns]') tm.assert_series_equal(algos.mode(s), exp) @@ -1311,7 +1329,8 @@ def test_datelike_mode(self): tm.assert_series_equal(algos.mode(s), exp) def test_timedelta_mode(self): - exp = Series([], dtype='timedelta64[ns]') + exp = Series(['-1 days', '0 days', '1 days'], + dtype='timedelta64[ns]') s = Series(['1 days', '-1 days', '0 days'], dtype='timedelta64[ns]') tm.assert_series_equal(algos.mode(s), exp) @@ -1331,13 +1350,13 @@ def test_uint64_overflow(self): s = Series([1, 2**63, 2**63], dtype=np.uint64) tm.assert_series_equal(algos.mode(s), exp) - exp = Series([], dtype=np.uint64) + exp = Series([1, 2**63], dtype=np.uint64) s = Series([1, 2**63], dtype=np.uint64) tm.assert_series_equal(algos.mode(s), exp) def test_categorical(self): c = Categorical([1, 2]) - exp = Series([], dtype=np.int64) + exp = Series([1, 2], dtype=np.int64) tm.assert_series_equal(algos.mode(c), exp) c = Categorical([1, 'a', 'a']) @@ -1350,7 +1369,7 @@ def test_categorical(self): def test_index(self): idx = Index([1, 2, 3]) - exp = Series([], dtype=np.int64) + exp = Series([1, 2, 3], dtype=np.int64) tm.assert_series_equal(algos.mode(idx), exp) idx = Index([1, 'a', 'a']) diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py index b4072d04dfd81..ea2697ec19df3 100644 --- a/pandas/tests/test_categorical.py +++ b/pandas/tests/test_categorical.py @@ -1279,13 +1279,13 @@ def test_mode(self): s = Categorical([1, 2, 3, 4, 5], categories=[5, 4, 3, 2, 1], ordered=True) res = s.mode() - exp = Categorical([], categories=[5, 4, 3, 2, 1], ordered=True) + exp = Categorical([5, 4, 3, 2, 1], categories=[5, 4, 3, 2, 1], ordered=True) tm.assert_categorical_equal(res, exp) # NaN should not become the mode! s = Categorical([np.nan, np.nan, np.nan, 4, 5], categories=[5, 4, 3, 2, 1], ordered=True) res = s.mode() - exp = Categorical([], categories=[5, 4, 3, 2, 1], ordered=True) + exp = Categorical([5, 4], categories=[5, 4, 3, 2, 1], ordered=True) tm.assert_categorical_equal(res, exp) s = Categorical([np.nan, np.nan, np.nan, 4, 5, 4], categories=[5, 4, 3, 2, 1], ordered=True) @@ -2833,7 +2833,7 @@ def test_mode(self): s = Series(Categorical([1, 2, 3, 4, 5], categories=[5, 4, 3, 2, 1], ordered=True)) res = s.mode() - exp = Series(Categorical([], categories=[5, 4, 3, 2, 1], ordered=True)) + exp = Series(Categorical([5, 4, 3, 2, 1], categories=[5, 4, 3, 2, 1], ordered=True)) tm.assert_series_equal(res, exp) def test_value_counts(self): From 046d3be54970bb7ff99d7ebfd307d93e41eeb7ee Mon Sep 17 00:00:00 2001 From: gfyoung Date: Thu, 30 Mar 2017 07:53:11 -0400 Subject: [PATCH 303/933] CLN: Remove "flake8: noqa" from files Just some minor house-cleaning to cut down on the number of search results found here. Author: gfyoung Closes #15842 from gfyoung/flake8-noqa-clean and squashes the following commits: 5d1edeb [gfyoung] CLN: Make test_compat.py flake8-able f9079ff [gfyoung] CLN: Make exceptions.py flake8-able 0e236f5 [gfyoung] CLN: Make test_format.py flake8-able --- pandas/tests/computation/test_compat.py | 6 +- pandas/tests/formats/test_format.py | 255 ++++++++++++++++-------- pandas/util/clipboard/exceptions.py | 1 - 3 files changed, 177 insertions(+), 85 deletions(-) diff --git a/pandas/tests/computation/test_compat.py b/pandas/tests/computation/test_compat.py index 59bdde83aedd8..56a7cab730f1f 100644 --- a/pandas/tests/computation/test_compat.py +++ b/pandas/tests/computation/test_compat.py @@ -1,8 +1,4 @@ - -# flake8: noqa - import pytest -from itertools import product from distutils.version import LooseVersion import pandas as pd @@ -32,7 +28,7 @@ def test_compat(): @pytest.mark.parametrize('parser', expr._parsers) def test_invalid_numexpr_version(engine, parser): def testit(): - a, b = 1, 2 + a, b = 1, 2 # noqa res = pd.eval('a + b', engine=engine, parser=parser) tm.assert_equal(res, 3) diff --git a/pandas/tests/formats/test_format.py b/pandas/tests/formats/test_format.py index 44a7f2b45e759..83458c82a3d7c 100644 --- a/pandas/tests/formats/test_format.py +++ b/pandas/tests/formats/test_format.py @@ -1,13 +1,9 @@ # -*- coding: utf-8 -*- """ -test output formatting for Series/DataFrame -including to_string & reprs +Test output formatting for Series/DataFrame, including to_string & reprs """ -# TODO(wesm): lots of issues making flake8 hard -# flake8: noqa - from __future__ import print_function import re @@ -57,8 +53,14 @@ def has_info_repr(df): def has_non_verbose_info_repr(df): has_info = has_info_repr(df) r = repr(df) - nv = len(r.split( - '\n')) == 6 # 1. , 2. Index, 3. Columns, 4. dtype, 5. memory usage, 6. trailing newline + + # 1. + # 2. Index + # 3. Columns + # 4. dtype + # 5. memory usage + # 6. trailing newline + nv = len(r.split('\n')) == 6 return has_info and nv @@ -477,7 +479,7 @@ def test_east_asian_unicode_frame(self): if PY3: _rep = repr else: - _rep = unicode + _rep = unicode # noqa # not alighned properly because of east asian width @@ -529,27 +531,39 @@ def test_east_asian_unicode_frame(self): # index name df = DataFrame({'a': [u'あああああ', u'い', u'う', u'えええ'], 'b': [u'あ', u'いいい', u'う', u'ええええええ']}, - index=pd.Index([u'あ', u'い', u'うう', u'え'], name=u'おおおお')) - expected = (u" a b\nおおおお \nあ あああああ あ\n" - u"い い いいい\nうう う う\nえ えええ ええええええ" - ) + index=pd.Index([u'あ', u'い', u'うう', u'え'], + name=u'おおおお')) + expected = (u" a b\n" + u"おおおお \n" + u"あ あああああ あ\n" + u"い い いいい\n" + u"うう う う\n" + u"え えええ ええええええ") self.assertEqual(_rep(df), expected) # all df = DataFrame({u'あああ': [u'あああ', u'い', u'う', u'えええええ'], u'いいいいい': [u'あ', u'いいい', u'う', u'ええ']}, - index=pd.Index([u'あ', u'いいい', u'うう', u'え'], name=u'お')) - expected = (u" あああ いいいいい\nお \nあ あああ あ\n" - u"いいい い いいい\nうう う う\nえ えええええ ええ") + index=pd.Index([u'あ', u'いいい', u'うう', u'え'], + name=u'お')) + expected = (u" あああ いいいいい\n" + u"お \n" + u"あ あああ あ\n" + u"いいい い いいい\n" + u"うう う う\n" + u"え えええええ ええ") self.assertEqual(_rep(df), expected) # MultiIndex idx = pd.MultiIndex.from_tuples([(u'あ', u'いい'), (u'う', u'え'), ( u'おおお', u'かかかか'), (u'き', u'くく')]) df = DataFrame({'a': [u'あああああ', u'い', u'う', u'えええ'], - 'b': [u'あ', u'いいい', u'う', u'ええええええ']}, index=idx) - expected = (u" a b\nあ いい あああああ あ\n" - u"う え い いいい\nおおお かかかか う う\n" + 'b': [u'あ', u'いいい', u'う', u'ええええええ']}, + index=idx) + expected = (u" a b\n" + u"あ いい あああああ あ\n" + u"う え い いいい\n" + u"おおお かかかか う う\n" u"き くく えええ ええええええ") self.assertEqual(_rep(df), expected) @@ -597,18 +611,21 @@ def test_east_asian_unicode_frame(self): df = DataFrame({'a': [u'あああああ', u'い', u'う', u'えええ'], 'b': [u'あ', u'いいい', u'う', u'ええええええ']}, index=['a', 'bb', 'c', 'ddd']) - expected = (u" a b\na あああああ あ\n" - u"bb い いいい\nc う う\n" - u"ddd えええ ええええええ" - "") + expected = (u" a b\n" + u"a あああああ あ\n" + u"bb い いいい\n" + u"c う う\n" + u"ddd えええ ええええええ") self.assertEqual(_rep(df), expected) # column name df = DataFrame({u'あああああ': [1, 222, 33333, 4], 'b': [u'あ', u'いいい', u'う', u'ええええええ']}, index=['a', 'bb', 'c', 'ddd']) - expected = (u" b あああああ\na あ 1\n" - u"bb いいい 222\nc う 33333\n" + expected = (u" b あああああ\n" + u"a あ 1\n" + u"bb いいい 222\n" + u"c う 33333\n" u"ddd ええええええ 4") self.assertEqual(_rep(df), expected) @@ -616,37 +633,49 @@ def test_east_asian_unicode_frame(self): df = DataFrame({'a': [u'あああああ', u'い', u'う', u'えええ'], 'b': [u'あ', u'いいい', u'う', u'ええええええ']}, index=[u'あああ', u'いいいいいい', u'うう', u'え']) - expected = (u" a b\nあああ あああああ あ\n" - u"いいいいいい い いいい\nうう う う\n" + expected = (u" a b\n" + u"あああ あああああ あ\n" + u"いいいいいい い いいい\n" + u"うう う う\n" u"え えええ ええええええ") self.assertEqual(_rep(df), expected) # index name df = DataFrame({'a': [u'あああああ', u'い', u'う', u'えええ'], 'b': [u'あ', u'いいい', u'う', u'ええええええ']}, - index=pd.Index([u'あ', u'い', u'うう', u'え'], name=u'おおおお')) - expected = (u" a b\nおおおお \n" - u"あ あああああ あ\nい い いいい\n" - u"うう う う\nえ えええ ええええええ" - ) + index=pd.Index([u'あ', u'い', u'うう', u'え'], + name=u'おおおお')) + expected = (u" a b\n" + u"おおおお \n" + u"あ あああああ あ\n" + u"い い いいい\n" + u"うう う う\n" + u"え えええ ええええええ") self.assertEqual(_rep(df), expected) # all df = DataFrame({u'あああ': [u'あああ', u'い', u'う', u'えええええ'], u'いいいいい': [u'あ', u'いいい', u'う', u'ええ']}, - index=pd.Index([u'あ', u'いいい', u'うう', u'え'], name=u'お')) - expected = (u" あああ いいいいい\nお \n" - u"あ あああ あ\nいいい い いいい\n" - u"うう う う\nえ えええええ ええ") + index=pd.Index([u'あ', u'いいい', u'うう', u'え'], + name=u'お')) + expected = (u" あああ いいいいい\n" + u"お \n" + u"あ あああ あ\n" + u"いいい い いいい\n" + u"うう う う\n" + u"え えええええ ええ") self.assertEqual(_rep(df), expected) # MultiIndex idx = pd.MultiIndex.from_tuples([(u'あ', u'いい'), (u'う', u'え'), ( u'おおお', u'かかかか'), (u'き', u'くく')]) df = DataFrame({'a': [u'あああああ', u'い', u'う', u'えええ'], - 'b': [u'あ', u'いいい', u'う', u'ええええええ']}, index=idx) - expected = (u" a b\nあ いい あああああ あ\n" - u"う え い いいい\nおおお かかかか う う\n" + 'b': [u'あ', u'いいい', u'う', u'ええええええ']}, + index=idx) + expected = (u" a b\n" + u"あ いい あああああ あ\n" + u"う え い いいい\n" + u"おおお かかかか う う\n" u"き くく えええ ええええええ") self.assertEqual(_rep(df), expected) @@ -660,14 +689,18 @@ def test_east_asian_unicode_frame(self): u'ああああ': [u'さ', u'し', u'す', u'せ']}, columns=['a', 'b', 'c', u'ああああ']) - expected = (u" a ... ああああ\n0 あああああ ... さ\n" - u".. ... ... ...\n3 えええ ... せ\n" + expected = (u" a ... ああああ\n" + u"0 あああああ ... さ\n" + u".. ... ... ...\n" + u"3 えええ ... せ\n" u"\n[4 rows x 4 columns]") self.assertEqual(_rep(df), expected) df.index = [u'あああ', u'いいいい', u'う', 'aaa'] - expected = (u" a ... ああああ\nあああ あああああ ... さ\n" - u"... ... ... ...\naaa えええ ... せ\n" + expected = (u" a ... ああああ\n" + u"あああ あああああ ... さ\n" + u"... ... ... ...\n" + u"aaa えええ ... せ\n" u"\n[4 rows x 4 columns]") self.assertEqual(_rep(df), expected) @@ -675,8 +708,10 @@ def test_east_asian_unicode_frame(self): df = DataFrame({u'あああああ': [1, 222, 33333, 4], 'b': [u'あ', u'いいい', u'¡¡', u'ええええええ']}, index=['a', 'bb', 'c', '¡¡¡']) - expected = (u" b あああああ\na あ 1\n" - u"bb いいい 222\nc ¡¡ 33333\n" + expected = (u" b あああああ\n" + u"a あ 1\n" + u"bb いいい 222\n" + u"c ¡¡ 33333\n" u"¡¡¡ ええええええ 4") self.assertEqual(_rep(df), expected) @@ -753,7 +788,8 @@ def test_truncate_with_different_dtypes(self): # 11594 import datetime s = Series([datetime.datetime(2012, 1, 1)] * 10 + - [datetime.datetime(1012, 1, 2)] + [datetime.datetime(2012, 1, 3)] * 10) + [datetime.datetime(1012, 1, 2)] + [ + datetime.datetime(2012, 1, 3)] * 10) with pd.option_context('display.max_rows', 8): result = str(s) @@ -762,7 +798,8 @@ def test_truncate_with_different_dtypes(self): # 12045 df = DataFrame({'text': ['some words'] + [None] * 9}) - with pd.option_context('display.max_rows', 8, 'display.max_columns', 3): + with pd.option_context('display.max_rows', 8, + 'display.max_columns', 3): result = str(df) self.assertTrue('None' in result) self.assertFalse('NaN' in result) @@ -771,7 +808,8 @@ def test_datetimelike_frame(self): # GH 12211 df = DataFrame( - {'date': [pd.Timestamp('20130101').tz_localize('UTC')] + [pd.NaT] * 5}) + {'date': [pd.Timestamp('20130101').tz_localize('UTC')] + + [pd.NaT] * 5}) with option_context("display.max_rows", 5): result = str(df) @@ -1019,21 +1057,24 @@ def test_index_with_nan(self): y = df.set_index(['id1', 'id2', 'id3']) result = y.to_string() expected = u( - ' value\nid1 id2 id3 \n1a3 NaN 78d 123\n9h4 d67 79d 64') + ' value\nid1 id2 id3 \n' + '1a3 NaN 78d 123\n9h4 d67 79d 64') self.assertEqual(result, expected) # index y = df.set_index('id2') result = y.to_string() expected = u( - ' id1 id3 value\nid2 \nNaN 1a3 78d 123\nd67 9h4 79d 64') + ' id1 id3 value\nid2 \n' + 'NaN 1a3 78d 123\nd67 9h4 79d 64') self.assertEqual(result, expected) # with append (this failed in 0.12) y = df.set_index(['id1', 'id2']).set_index('id3', append=True) result = y.to_string() expected = u( - ' value\nid1 id2 id3 \n1a3 NaN 78d 123\n9h4 d67 79d 64') + ' value\nid1 id2 id3 \n' + '1a3 NaN 78d 123\n9h4 d67 79d 64') self.assertEqual(result, expected) # all-nan in mi @@ -1042,7 +1083,8 @@ def test_index_with_nan(self): y = df2.set_index('id2') result = y.to_string() expected = u( - ' id1 id3 value\nid2 \nNaN 1a3 78d 123\nNaN 9h4 79d 64') + ' id1 id3 value\nid2 \n' + 'NaN 1a3 78d 123\nNaN 9h4 79d 64') self.assertEqual(result, expected) # partial nan in mi @@ -1051,7 +1093,8 @@ def test_index_with_nan(self): y = df2.set_index(['id2', 'id3']) result = y.to_string() expected = u( - ' id1 value\nid2 id3 \nNaN 78d 1a3 123\n 79d 9h4 64') + ' id1 value\nid2 id3 \n' + 'NaN 78d 1a3 123\n 79d 9h4 64') self.assertEqual(result, expected) df = DataFrame({'id1': {0: np.nan, @@ -1066,7 +1109,8 @@ def test_index_with_nan(self): y = df.set_index(['id1', 'id2', 'id3']) result = y.to_string() expected = u( - ' value\nid1 id2 id3 \nNaN NaN NaN 123\n9h4 d67 79d 64') + ' value\nid1 id2 id3 \n' + 'NaN NaN NaN 123\n9h4 d67 79d 64') self.assertEqual(result, expected) def test_to_string(self): @@ -1660,8 +1704,8 @@ def test_east_asian_unicode_series(self): if PY3: _rep = repr else: - _rep = unicode - # not alighned properly because of east asian width + _rep = unicode # noqa + # not aligned properly because of east asian width # unicode index s = Series(['a', 'bb', 'CCC', 'D'], @@ -1686,7 +1730,8 @@ def test_east_asian_unicode_series(self): # unicode footer s = Series([u'あ', u'いい', u'ううう', u'ええええ'], - index=[u'ああ', u'いいいい', u'う', u'えええ'], name=u'おおおおおおお') + index=[u'ああ', u'いいいい', u'う', u'えええ'], + name=u'おおおおおおお') expected = (u"ああ あ\nいいいい いい\nう ううう\n" u"えええ ええええ\nName: おおおおおおお, dtype: object") self.assertEqual(_rep(s), expected) @@ -1695,7 +1740,9 @@ def test_east_asian_unicode_series(self): idx = pd.MultiIndex.from_tuples([(u'あ', u'いい'), (u'う', u'え'), ( u'おおお', u'かかかか'), (u'き', u'くく')]) s = Series([1, 22, 3333, 44444], index=idx) - expected = (u"あ いい 1\nう え 22\nおおお かかかか 3333\n" + expected = (u"あ いい 1\n" + u"う え 22\n" + u"おおお かかかか 3333\n" u"き くく 44444\ndtype: int64") self.assertEqual(_rep(s), expected) @@ -1708,14 +1755,16 @@ def test_east_asian_unicode_series(self): # object dtype, longer than unicode repr s = Series([1, 22, 3333, 44444], index=[1, 'AB', pd.Timestamp('2011-01-01'), u'あああ']) - expected = (u"1 1\nAB 22\n" - u"2011-01-01 00:00:00 3333\nあああ 44444\ndtype: int64" - ) + expected = (u"1 1\n" + u"AB 22\n" + u"2011-01-01 00:00:00 3333\n" + u"あああ 44444\ndtype: int64") self.assertEqual(_rep(s), expected) # truncate with option_context('display.max_rows', 3): - s = Series([u'あ', u'いい', u'ううう', u'ええええ'], name=u'おおおおおおお') + s = Series([u'あ', u'いい', u'ううう', u'ええええ'], + name=u'おおおおおおお') expected = (u"0 あ\n ... \n" u"3 ええええ\nName: おおおおおおお, dtype: object") @@ -1746,23 +1795,32 @@ def test_east_asian_unicode_series(self): # both s = Series([u'あ', u'いい', u'ううう', u'ええええ'], index=[u'ああ', u'いいいい', u'う', u'えええ']) - expected = (u"ああ あ\nいいいい いい\nう ううう\n" + expected = (u"ああ あ\n" + u"いいいい いい\n" + u"う ううう\n" u"えええ ええええ\ndtype: object") self.assertEqual(_rep(s), expected) # unicode footer s = Series([u'あ', u'いい', u'ううう', u'ええええ'], - index=[u'ああ', u'いいいい', u'う', u'えええ'], name=u'おおおおおおお') - expected = (u"ああ あ\nいいいい いい\nう ううう\n" - u"えええ ええええ\nName: おおおおおおお, dtype: object") + index=[u'ああ', u'いいいい', u'う', u'えええ'], + name=u'おおおおおおお') + expected = (u"ああ あ\n" + u"いいいい いい\n" + u"う ううう\n" + u"えええ ええええ\n" + u"Name: おおおおおおお, dtype: object") self.assertEqual(_rep(s), expected) # MultiIndex idx = pd.MultiIndex.from_tuples([(u'あ', u'いい'), (u'う', u'え'), ( u'おおお', u'かかかか'), (u'き', u'くく')]) s = Series([1, 22, 3333, 44444], index=idx) - expected = (u"あ いい 1\nう え 22\nおおお かかかか 3333\n" - u"き くく 44444\ndtype: int64") + expected = (u"あ いい 1\n" + u"う え 22\n" + u"おおお かかかか 3333\n" + u"き くく 44444\n" + u"dtype: int64") self.assertEqual(_rep(s), expected) # object dtype, shorter than unicode repr @@ -1774,27 +1832,33 @@ def test_east_asian_unicode_series(self): # object dtype, longer than unicode repr s = Series([1, 22, 3333, 44444], index=[1, 'AB', pd.Timestamp('2011-01-01'), u'あああ']) - expected = (u"1 1\nAB 22\n" - u"2011-01-01 00:00:00 3333\nあああ 44444\ndtype: int64" - ) + expected = (u"1 1\n" + u"AB 22\n" + u"2011-01-01 00:00:00 3333\n" + u"あああ 44444\ndtype: int64") self.assertEqual(_rep(s), expected) # truncate with option_context('display.max_rows', 3): - s = Series([u'あ', u'いい', u'ううう', u'ええええ'], name=u'おおおおおおお') + s = Series([u'あ', u'いい', u'ううう', u'ええええ'], + name=u'おおおおおおお') expected = (u"0 あ\n ... \n" u"3 ええええ\nName: おおおおおおお, dtype: object") self.assertEqual(_rep(s), expected) s.index = [u'ああ', u'いいいい', u'う', u'えええ'] - expected = (u"ああ あ\n ... \n" - u"えええ ええええ\nName: おおおおおおお, dtype: object") + expected = (u"ああ あ\n" + u" ... \n" + u"えええ ええええ\n" + u"Name: おおおおおおお, dtype: object") self.assertEqual(_rep(s), expected) # ambiguous unicode s = Series([u'¡¡', u'い¡¡', u'ううう', u'ええええ'], index=[u'ああ', u'¡¡¡¡いい', u'¡¡', u'えええ']) - expected = (u"ああ ¡¡\n¡¡¡¡いい い¡¡\n¡¡ ううう\n" + expected = (u"ああ ¡¡\n" + u"¡¡¡¡いい い¡¡\n" + u"¡¡ ううう\n" u"えええ ええええ\ndtype: object") self.assertEqual(_rep(s), expected) @@ -2099,15 +2163,48 @@ def test_output_significant_digits(self): expected_output = { (0, 6): - ' col1\n0 9.999000e-08\n1 1.000000e-07\n2 1.000100e-07\n3 2.000000e-07\n4 4.999000e-07\n5 5.000000e-07', + ' col1\n' + '0 9.999000e-08\n' + '1 1.000000e-07\n' + '2 1.000100e-07\n' + '3 2.000000e-07\n' + '4 4.999000e-07\n' + '5 5.000000e-07', (1, 6): - ' col1\n1 1.000000e-07\n2 1.000100e-07\n3 2.000000e-07\n4 4.999000e-07\n5 5.000000e-07', + ' col1\n' + '1 1.000000e-07\n' + '2 1.000100e-07\n' + '3 2.000000e-07\n' + '4 4.999000e-07\n' + '5 5.000000e-07', (1, 8): - ' col1\n1 1.000000e-07\n2 1.000100e-07\n3 2.000000e-07\n4 4.999000e-07\n5 5.000000e-07\n6 5.000100e-07\n7 6.000000e-07', + ' col1\n' + '1 1.000000e-07\n' + '2 1.000100e-07\n' + '3 2.000000e-07\n' + '4 4.999000e-07\n' + '5 5.000000e-07\n' + '6 5.000100e-07\n' + '7 6.000000e-07', (8, 16): - ' col1\n8 9.999000e-07\n9 1.000000e-06\n10 1.000100e-06\n11 2.000000e-06\n12 4.999000e-06\n13 5.000000e-06\n14 5.000100e-06\n15 6.000000e-06', + ' col1\n' + '8 9.999000e-07\n' + '9 1.000000e-06\n' + '10 1.000100e-06\n' + '11 2.000000e-06\n' + '12 4.999000e-06\n' + '13 5.000000e-06\n' + '14 5.000100e-06\n' + '15 6.000000e-06', (9, 16): - ' col1\n9 0.000001\n10 0.000001\n11 0.000002\n12 0.000005\n13 0.000005\n14 0.000005\n15 0.000006' + ' col1\n' + '9 0.000001\n' + '10 0.000001\n' + '11 0.000002\n' + '12 0.000005\n' + '13 0.000005\n' + '14 0.000005\n' + '15 0.000006' } for (start, stop), v in expected_output.items(): diff --git a/pandas/util/clipboard/exceptions.py b/pandas/util/clipboard/exceptions.py index f42d263a02993..413518e53660a 100644 --- a/pandas/util/clipboard/exceptions.py +++ b/pandas/util/clipboard/exceptions.py @@ -1,4 +1,3 @@ -# flake8: noqa import ctypes From 3d6c5a8723ae69c55a43d444dcb64fc532358714 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Thu, 30 Mar 2017 04:55:17 -0700 Subject: [PATCH 304/933] DOC: Fix up _DeprecatedModule parameters doc (#15843) [ci skip] --- pandas/util/depr_module.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/util/depr_module.py b/pandas/util/depr_module.py index b181c4627b1e1..af7faf9dd96c8 100644 --- a/pandas/util/depr_module.py +++ b/pandas/util/depr_module.py @@ -13,8 +13,9 @@ class _DeprecatedModule(object): Parameters ---------- deprmod : name of module to be deprecated. - deprmodto : name of module as a replacement, optional - if not givent will __module__ + deprmodto : name of module as a replacement, optional. + If not given, the __module__ attribute will + be used when needed. removals : objects or methods in module that will no longer be accessible once module is removed. """ From 48749ce4a774fba73ea38501cd99820537549d5a Mon Sep 17 00:00:00 2001 From: Baurzhan Muftakhidinov Date: Thu, 30 Mar 2017 16:57:00 +0500 Subject: [PATCH 305/933] Fix a typo (#15844) --- pandas/sparse/sparse.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/sparse/sparse.pyx b/pandas/sparse/sparse.pyx index 7ab29414499fc..00d317c42b18d 100644 --- a/pandas/sparse/sparse.pyx +++ b/pandas/sparse/sparse.pyx @@ -71,7 +71,7 @@ cdef class IntIndex(SparseIndex): def check_integrity(self): """ Only need be strictly ascending and nothing less than 0 or greater than - totall ength + total length """ pass From 1e0fbd2b86985e11d1869ceff688214f3ca64055 Mon Sep 17 00:00:00 2001 From: Carlos Souza Date: Thu, 30 Mar 2017 08:03:45 -0400 Subject: [PATCH 306/933] BUG: SparseDataFrame construction with lists not coercing to dtype (GH 15682) closes #15682 Author: Carlos Souza Closes #15834 from ucals/bug-fix-15682 and squashes the following commits: 04fba8d [Carlos Souza] Adding test_rename test cases (were missing) 483bb2c [Carlos Souza] Doing adjustments as per @jreback requests cc4c15b [Carlos Souza] Fixing coersion bug at SparseDataFrame construction faa5c5c [Carlos Souza] Merge remote-tracking branch 'upstream/master' 43456a5 [Carlos Souza] Merge remote-tracking branch 'upstream/master' 8b463cb [Carlos Souza] Merge remote-tracking branch 'upstream/master' 9fc617b [Carlos Souza] Merge remote-tracking branch 'upstream/master' e12bca7 [Carlos Souza] Sync fork 676a4e5 [Carlos Souza] Test --- doc/source/whatsnew/v0.20.0.txt | 2 +- pandas/sparse/frame.py | 2 +- pandas/tests/sparse/test_frame.py | 36 +++++++++++++++++++++++-------- 3 files changed, 29 insertions(+), 11 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 023d41763baee..1ecdd6dd8fbef 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -1046,7 +1046,7 @@ Bug Fixes - Bug in ``pd.concat()`` in which concatting with an empty dataframe with ``join='inner'`` was being improperly handled (:issue:`15328`) - Bug in ``groupby.agg()`` incorrectly localizing timezone on ``datetime`` (:issue:`15426`, :issue:`10668`, :issue:`13046`) - +- Bug in ``SparseDataFrame`` construction with lists not coercing to dtype (:issue:`15682`) - Bug in ``.read_csv()`` with ``parse_dates`` when multiline headers are specified (:issue:`15376`) - Bug in ``groupby.transform()`` that would coerce the resultant dtypes back to the original (:issue:`10972`, :issue:`11444`) diff --git a/pandas/sparse/frame.py b/pandas/sparse/frame.py index 41f301f263374..455d120cca640 100644 --- a/pandas/sparse/frame.py +++ b/pandas/sparse/frame.py @@ -142,7 +142,7 @@ def _init_dict(self, data, index, columns, dtype=None): sp_maker = lambda x: SparseArray(x, kind=self._default_kind, fill_value=self._default_fill_value, - copy=True) + copy=True, dtype=dtype) sdict = DataFrame() for k, v in compat.iteritems(data): if isinstance(v, Series): diff --git a/pandas/tests/sparse/test_frame.py b/pandas/tests/sparse/test_frame.py index c0c678c184ee8..ae1a1e35f1859 100644 --- a/pandas/tests/sparse/test_frame.py +++ b/pandas/tests/sparse/test_frame.py @@ -28,7 +28,6 @@ class TestSparseDataFrame(tm.TestCase, SharedWithSparse): - klass = SparseDataFrame def setUp(self): @@ -237,6 +236,18 @@ def test_constructor_nan_dataframe(self): dtype=float) tm.assert_sp_frame_equal(result, expected) + def test_type_coercion_at_construction(self): + # GH 15682 + result = pd.SparseDataFrame( + {'a': [1, 0, 0], 'b': [0, 1, 0], 'c': [0, 0, 1]}, dtype='uint8', + default_fill_value=0) + expected = pd.SparseDataFrame( + {'a': pd.SparseSeries([1, 0, 0], dtype='uint8'), + 'b': pd.SparseSeries([0, 1, 0], dtype='uint8'), + 'c': pd.SparseSeries([0, 0, 1], dtype='uint8')}, + default_fill_value=0) + tm.assert_sp_frame_equal(result, expected) + def test_dtypes(self): df = DataFrame(np.random.randn(10000, 4)) df.loc[:9998] = np.nan @@ -756,9 +767,18 @@ def test_sparse_frame_fillna_limit(self): tm.assert_frame_equal(result, expected) def test_rename(self): - # just check this works - renamed = self.frame.rename(index=str) # noqa - renamed = self.frame.rename(columns=lambda x: '%s%d' % (x, len(x))) # noqa + result = self.frame.rename(index=str) + expected = SparseDataFrame(self.data, index=self.dates.strftime( + "%Y-%m-%d %H:%M:%S")) + tm.assert_sp_frame_equal(result, expected) + + result = self.frame.rename(columns=lambda x: '%s%d' % (x, len(x))) + data = {'A1': [nan, nan, nan, 0, 1, 2, 3, 4, 5, 6], + 'B1': [0, 1, 2, nan, nan, nan, 3, 4, 5, 6], + 'C1': np.arange(10, dtype=np.float64), + 'D1': [0, 1, 2, 3, 4, 5, nan, nan, nan, nan]} + expected = SparseDataFrame(data, index=self.dates) + tm.assert_sp_frame_equal(result, expected) def test_corr(self): res = self.frame.corr() @@ -967,7 +987,6 @@ def _check(frame, orig): def test_shift(self): def _check(frame, orig): - shifted = frame.shift(0) exp = orig.shift(0) tm.assert_frame_equal(shifted.to_dense(), exp) @@ -1060,7 +1079,7 @@ def test_sparse_pow_issue(self): df = SparseDataFrame({'A': [nan, 0, 1]}) # note that 2 ** df works fine, also df ** 1 - result = 1**df + result = 1 ** df r1 = result.take([0], 1)['A'] r2 = result['A'] @@ -1126,7 +1145,7 @@ def test_isnotnull(self): tm.assert_frame_equal(res.to_dense(), exp) -@pytest.mark.parametrize('index', [None, list('ab')]) # noqa: F811 +@pytest.mark.parametrize('index', [None, list('ab')]) # noqa: F811 @pytest.mark.parametrize('columns', [None, list('cd')]) @pytest.mark.parametrize('fill_value', [None, 0, np.nan]) @pytest.mark.parametrize('dtype', [bool, int, float, np.uint16]) @@ -1180,7 +1199,7 @@ def test_from_to_scipy(spmatrix, index, columns, fill_value, dtype): tm.assert_equal(sdf.to_coo().dtype, np.object_) -@pytest.mark.parametrize('fill_value', [None, 0, np.nan]) # noqa: F811 +@pytest.mark.parametrize('fill_value', [None, 0, np.nan]) # noqa: F811 def test_from_to_scipy_object(spmatrix, fill_value): # GH 4343 dtype = object @@ -1255,7 +1274,6 @@ def test_comparison_op_scalar(self): class TestSparseDataFrameAnalytics(tm.TestCase): - def setUp(self): self.data = {'A': [nan, nan, nan, 0, 1, 2, 3, 4, 5, 6], 'B': [0, 1, 2, nan, nan, nan, 3, 4, 5, 6], From 9c98e13172dd5decd99496f7f381568c547f6ba3 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Thu, 30 Mar 2017 08:46:29 -0400 Subject: [PATCH 307/933] DOC: prettify bug fixes section (#15846) --- doc/source/whatsnew/v0.20.0.txt | 196 ++++++++++++++++---------------- 1 file changed, 95 insertions(+), 101 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 1ecdd6dd8fbef..399f91fc60810 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -913,147 +913,141 @@ Performance Improvements Bug Fixes ~~~~~~~~~ +Conversion +^^^^^^^^^^ + - Bug in ``Timestamp.replace`` now raises ``TypeError`` when incorrect argument names are given; previously this raised ``ValueError`` (:issue:`15240`) +- Bug in ``Timestamp.replace`` with compat for passing long integers (:issue:`15030`) - Bug in ``Timestamp`` returning UTC based time/date attributes when a timezone was provided (:issue:`13303`) -- Bug in ``Index`` power operations with reversed operands (:issue:`14973`) - Bug in ``TimedeltaIndex`` addition where overflow was being allowed without error (:issue:`14816`) - Bug in ``TimedeltaIndex`` raising a ``ValueError`` when boolean indexing with ``loc`` (:issue:`14946`) +- Bug in catching an overflow in ``Timestamp`` + ``Timedelta/Offset`` operations (:issue:`15126`) - Bug in ``DatetimeIndex.round()`` and ``Timestamp.round()`` floating point accuracy when rounding by milliseconds or less (:issue:`14440`, :issue:`15578`) - Bug in ``astype()`` where ``inf`` values were incorrectly converted to integers. Now raises error now with ``astype()`` for Series and DataFrames (:issue:`14265`) - Bug in ``DataFrame(..).apply(to_numeric)`` when values are of type decimal.Decimal. (:issue:`14827`) - Bug in ``describe()`` when passing a numpy array which does not contain the median to the ``percentiles`` keyword argument (:issue:`14908`) -- Bug in ``DataFrame.sort_values()`` when sorting by multiple columns where one column is of type ``int64`` and contains ``NaT`` (:issue:`14922`) -- Bug in ``DataFrame.reindex()`` in which ``method`` was ignored when passing ``columns`` (:issue:`14992`) -- Bug in ``pd.to_numeric()`` in which float and unsigned integer elements were being improperly casted (:issue:`14941`, :issue:`15005`) - Cleaned up ``PeriodIndex`` constructor, including raising on floats more consistently (:issue:`13277`) -- Bug in ``pd.read_csv()`` in which the ``dialect`` parameter was not being verified before processing (:issue:`14898`) -- Bug in ``pd.read_fwf()`` where the skiprows parameter was not being respected during column width inference (:issue:`11256`) -- Bug in ``pd.read_csv()`` in which missing data was being improperly handled with ``usecols`` (:issue:`6710`) -- Bug in ``pd.read_csv()`` in which a file containing a row with many columns followed by rows with fewer columns would cause a crash (:issue:`14125`) -- Added checks in ``pd.read_csv()`` ensuring that values for ``nrows`` and ``chunksize`` are valid (:issue:`15767`) -- Bug in ``pd.tools.hashing.hash_pandas_object()`` in which hashing of categoricals depended on the ordering of categories, instead of just their values. (:issue:`15143`) -- Bug in ``.groupby(..).resample()`` when passed the ``on=`` kwarg. (:issue:`15021`) - Bug in using ``__deepcopy__`` on empty NDFrame objects (:issue:`15370`) -- Bug in ``DataFrame.loc`` with indexing a ``MultiIndex`` with a ``Series`` indexer (:issue:`14730`, :issue:`15424`) -- Bug in ``DataFrame.loc`` with indexing a ``MultiIndex`` with a numpy array (:issue:`15434`) -- Bug in ``Rolling.quantile`` function that caused a segmentation fault when called with a quantile value outside of the range [0, 1] (:issue:`15463`) -- Bug in ``pd.cut()`` with a single bin on an all 0s array (:issue:`15428`) -- Bug in ``pd.qcut()`` with a single quantile and an array with identical values (:issue:`15431`) -- Compat with SciPy 0.19.0 for testing on ``.interpolate()`` (:issue:`15662`) -- Bug in ``Series.asof`` which raised if the series contained all ``np.nan`` (:issue:`15713`) - -- Compat for 32-bit platforms for ``.qcut/cut``; bins will now be ``int64`` dtype (:issue:`14866`) - -- Properly set ``__name__`` and ``__qualname__`` for ``Groupby.*`` functions (:issue:`14620`) -- Bug in ``.at`` when selecting from a tz-aware column (:issue:`15822`) -- Bug in the display of ``.info()`` where a qualifier (+) would always be displayed with a ``MultiIndex`` that contains only non-strings (:issue:`15245`) - Bug in ``.replace()`` may result in incorrect dtypes. (:issue:`12747`, :issue:`15765`) - Bug in ``Series.replace`` and ``DataFrame.replace`` which failed on empty replacement dicts (:issue:`15289`) - Bug in ``Series.replace`` which replaced a numeric by string (:issue:`15743`) - +- Bug in ``Index`` construction with ``NaN`` elements and integer dtype specified (:issue:`15187`) +- Bug in ``Series`` construction with a datetimetz (:issue:`14928`) +- Bug in ``Series.dt.round()`` inconsistent behaviour on ``NaT`` 's with different arguments (:issue:`14940`) +- Bug in ``Series`` constructor when both ``copy=True`` and ``dtype`` arguments are provided (:issue:`15125`) +- Incorrect dtyped ``Series`` was returned by comparison methods (e.g., ``lt``, ``gt``, ...) against a constant for an empty ``DataFrame`` (:issue:`15077`) +- Bug in ``Series.ffill()`` with mixed dtypes containing tz-aware datetimes. (:issue:`14956`) +- Bug in ``DataFrame.fillna()`` where the argument ``downcast`` was ignored when fillna value was of type ``dict`` (:issue:`15277`) - Bug in ``.asfreq()``, where frequency was not set for empty ``Series`` (:issue:`14320`) -- Bug in ``pd.read_msgpack()`` in which ``Series`` categoricals were being improperly processed (:issue:`14901`) -- Bug in ``Series.ffill()`` with mixed dtypes containing tz-aware datetimes. (:issue:`14956`) -- Bug in interactions with ``Qt`` when a ``QtApplication`` already exists (:issue:`14372`) -- Bug in ``DataFrame.isin`` comparing datetimelike to empty frame (:issue:`15473`) +Indexing +^^^^^^^^ +- Bug in ``Index`` power operations with reversed operands (:issue:`14973`) +- Bug in ``DataFrame.sort_values()`` when sorting by multiple columns where one column is of type ``int64`` and contains ``NaT`` (:issue:`14922`) +- Bug in ``DataFrame.reindex()`` in which ``method`` was ignored when passing ``columns`` (:issue:`14992`) +- Bug in ``DataFrame.loc`` with indexing a ``MultiIndex`` with a ``Series`` indexer (:issue:`14730`, :issue:`15424`) +- Bug in ``DataFrame.loc`` with indexing a ``MultiIndex`` with a numpy array (:issue:`15434`) +- Bug in ``Series.asof`` which raised if the series contained all ``np.nan`` (:issue:`15713`) +- Bug in ``.at`` when selecting from a tz-aware column (:issue:`15822`) - Bug in ``Series.where()`` and ``DataFrame.where()`` where array-like conditionals were being rejected (:issue:`15414`) - Bug in ``Series.where()`` where TZ-aware data was converted to float representation (:issue:`15701`) -- Bug in ``Index`` construction with ``NaN`` elements and integer dtype specified (:issue:`15187`) -- Bug in ``Series`` construction with a datetimetz (:issue:`14928`) +- Bug in ``.loc`` that would not return the correct dtype for scalar access for a DataFrame (:issue:`11617`) - Bug in output formatting of a ``MultiIndex`` when names are integers (:issue:`12223`, :issue:`15262`) +- Bug in ``Categorical.searchsorted()`` where alphabetical instead of the provided categorical order was used (:issue:`14522`) +- Bug in ``Series.iloc`` where a ``Categorical`` object for list-like indexes input was returned, where a ``Series`` was expected. (:issue:`14580`) +- Bug in ``DataFrame.isin`` comparing datetimelike to empty frame (:issue:`15473`) +- Bug in ``.reset_index()`` when an all ``NaN`` level of a ``MultiIndex`` would fail (:issue:`6322`) +- Bug in creating a ``MultiIndex`` with tuples and not passing a list of names; this will now raise ``ValueError`` (:issue:`15110`) +- Bug in the HTML display with with a ``MultiIndex`` and truncation (:issue:`14882`) +- Bug in the display of ``.info()`` where a qualifier (+) would always be displayed with a ``MultiIndex`` that contains only non-strings (:issue:`15245`) -- Bug in compat for passing long integers to ``Timestamp.replace`` (:issue:`15030`) -- Bug in ``.loc`` that would not return the correct dtype for scalar access for a DataFrame (:issue:`11617`) -- Bug in ``GroupBy.get_group()`` failing with a categorical grouper (:issue:`15155`) -- Bug in ``pandas.tools.utils.cartesian_product()`` with large input can cause overflow on windows (:issue:`15265`) +I/O +^^^ +- Bug in ``pd.to_numeric()`` in which float and unsigned integer elements were being improperly casted (:issue:`14941`, :issue:`15005`) +- Bug in ``pd.read_fwf()`` where the skiprows parameter was not being respected during column width inference (:issue:`11256`) +- Bug in ``pd.read_csv()`` in which the ``dialect`` parameter was not being verified before processing (:issue:`14898`) +- Bug in ``pd.read_csv()`` in which missing data was being improperly handled with ``usecols`` (:issue:`6710`) +- Bug in ``pd.read_csv()`` in which a file containing a row with many columns followed by rows with fewer columns would cause a crash (:issue:`14125`) +- Bug in ``pd.read_csv()`` for the C engine where ``usecols`` were being indexed incorrectly with ``parse_dates`` (:issue:`14792`) +- Bug in ``pd.read_csv()`` with ``parse_dates`` when multiline headers are specified (:issue:`15376`) +- Bug in ``pd.read_csv()`` with ``float_precision='round_trip'`` which caused a segfault when a text entry is parsed (:issue:`15140`) +- Added checks in ``pd.read_csv()`` ensuring that values for ``nrows`` and ``chunksize`` are valid (:issue:`15767`) +- Bug in ``pd.tools.hashing.hash_pandas_object()`` in which hashing of categoricals depended on the ordering of categories, instead of just their values. (:issue:`15143`) +- Bug in ``.to_json()`` where ``lines=True`` and contents (keys or values) contain escaped characters (:issue:`15096`) +- Bug in ``.to_json()`` causing single byte ascii characters to be expanded to four byte unicode (:issue:`15344`) +- Bug in ``.read_json()`` for Python 2 where ``lines=True`` and contents contain non-ascii unicode characters (:issue:`15132`) +- Bug in ``pd.read_msgpack()`` in which ``Series`` categoricals were being improperly processed (:issue:`14901`) +- Bug in ``pd.read_msgpack()`` which did not allow loading of a dataframe with an index of type ``CategoricalIndex`` (:issue:`15487`) +- Bug in ``pd.read_msgpack()`` when deserializing a ``CategoricalIndex`` (:issue:`15487`) - Bug in ``DataFrame.to_records()`` with converting a ``DatetimeIndex`` with a timezone (:issue:`13937`) +- Bug in ``DataFrame.to_records()`` which failed with unicode characters in column names (:issue:`11879`) +- Bug in ``.to_sql()`` when writing a DataFrame with numeric index names (:issue:`15404`). +- Bug in ``DataFrame.to_html()`` with ``index=False`` and ``max_rows`` raising in ``IndexError`` (:issue:`14998`) +- Bug in ``pd.read_hdf()`` passing a ``Timestamp`` to the ``where`` parameter with a non date column (:issue:`15492`) +- Bug in ``DataFrame.to_stata()`` and ``StataWriter`` which produces incorrectly formatted files to be produced for some locales (:issue:`13856`) +- Bug in ``StataReader`` and ``StataWriter`` which allows invalid encodings (:issue:`15723`) +Plotting +^^^^^^^^ -- Bug in ``.groupby(...).rolling(...)`` when ``on`` is specified and using a ``DatetimeIndex`` (:issue:`15130`) - - -- Bug in ``to_sql`` when writing a DataFrame with numeric index names (:issue:`15404`). -- Bug in ``Series.iloc`` where a ``Categorical`` object for list-like indexes input was returned, where a ``Series`` was expected. (:issue:`14580`) -- Bug in repr-formatting a ``SparseDataFrame`` after a value was set on (a copy of) one of its series (:issue:`15488`) -- Bug in ``SparseSeries.reindex`` on single level with list of length 1 (:issue:`15447`) +- Bug in ``DataFrame.hist`` where ``plt.tight_layout`` caused an ``AttributeError`` (use ``matplotlib >= 2.0.1``) (:issue:`9351`) +- Bug in ``DataFrame.boxplot`` where ``fontsize`` was not applied to the tick labels on both axes (:issue:`15108`) +Groupby/Resample/Rolling +^^^^^^^^^^^^^^^^^^^^^^^^ +- Bug in ``.groupby(..).resample()`` when passed the ``on=`` kwarg. (:issue:`15021`) +- Properly set ``__name__`` and ``__qualname__`` for ``Groupby.*`` functions (:issue:`14620`) +- Bug in ``GroupBy.get_group()`` failing with a categorical grouper (:issue:`15155`) +- Bug in ``.groupby(...).rolling(...)`` when ``on`` is specified and using a ``DatetimeIndex`` (:issue:`15130`) - Bug in groupby operations with timedelta64 when passing ``numeric_only=False`` (:issue:`5724`) - Bug in ``groupby.apply()`` coercing ``object`` dtypes to numeric types, when not all values were numeric (:issue:`14423`, :issue:`15421`, :issue:`15670`) - - -- Bug in ``DataFrame.to_html`` with ``index=False`` and ``max_rows`` raising in ``IndexError`` (:issue:`14998`) - -- Bug in ``Categorical.searchsorted()`` where alphabetical instead of the provided categorical order was used (:issue:`14522`) - - - -- Bug in ``resample``, where a non-string ```loffset`` argument would not be applied when resampling a timeseries (:issue:`13218`) - - - -- Bug in ``.rank()`` which incorrectly ranks ordered categories (:issue:`15420`) -- Bug in ``.corr()`` and ``.cov()`` where the column and index were the same object (:issue:`14617`) - - -- Require at least 0.23 version of cython to avoid problems with character encodings (:issue:`14699`) -- Bug in ``pd.pivot_table()`` where no error was raised when values argument was not in the columns (:issue:`14938`) - -- Bug in ``.to_json()`` where ``lines=True`` and contents (keys or values) contain escaped characters (:issue:`15096`) -- Bug in ``.to_json()`` causing single byte ascii characters to be expanded to four byte unicode (:issue:`15344`) -- Bug in ``.read_json()`` for Python 2 where ``lines=True`` and contents contain non-ascii unicode characters (:issue:`15132`) +- Bug in ``resample``, where a non-string ``loffset`` argument would not be applied when resampling a timeseries (:issue:`13218`) +- Bug in ``DataFrame.groupby().describe()`` when grouping on ``Index`` containing tuples (:issue:`14848`) +- Bug in ``groupby().nunique()`` with a datetimelike-grouper where bins counts were incorrect (:issue:`13453`) +- Bug in ``groupby.transform()`` that would coerce the resultant dtypes back to the original (:issue:`10972`, :issue:`11444`) +- Bug in ``groupby.agg()`` incorrectly localizing timezone on ``datetime`` (:issue:`15426`, :issue:`10668`, :issue:`13046`) - Bug in ``.rolling/expanding()`` functions where ``count()`` was not counting ``np.Inf``, nor handling ``object`` dtypes (:issue:`12541`) - Bug in ``.rolling()`` where ``pd.Timedelta`` or ``datetime.timedelta`` was not accepted as a ``window`` argument (:issue:`15440`) +- Bug in ``Rolling.quantile`` function that caused a segmentation fault when called with a quantile value outside of the range [0, 1] (:issue:`15463`) - Bug in ``DataFrame.resample().median()`` if duplicate column names are present (:issue:`14233`) -- Bug in ``DataFrame.groupby().describe()`` when grouping on ``Index`` containing tuples (:issue:`14848`) -- Bug in creating a ``MultiIndex`` with tuples and not passing a list of names; this will now raise ``ValueError`` (:issue:`15110`) -- Bug in ``groupby().nunique()`` with a datetimelike-grouper where bins counts were incorrect (:issue:`13453`) +Sparse +^^^^^^ -- Bug in catching an overflow in ``Timestamp`` + ``Timedelta/Offset`` operations (:issue:`15126`) -- Bug in the HTML display with with a ``MultiIndex`` and truncation (:issue:`14882`) +- Bug in ``SparseSeries.reindex`` on single level with list of length 1 (:issue:`15447`) +- Bug in repr-formatting a ``SparseDataFrame`` after a value was set on (a copy of) one of its series (:issue:`15488`) +- Bug in ``SparseDataFrame`` construction with lists not coercing to dtype (:issue:`15682`) +Reshaping +^^^^^^^^^ - Bug in ``pd.merge_asof()`` where ``left_index`` or ``right_index`` caused a failure when multiple ``by`` was specified (:issue:`15676`) - Bug in ``pd.merge_asof()`` where ``left_index``/``right_index`` together caused a failure when ``tolerance`` was specified (:issue:`15135`) - Bug in ``DataFrame.pivot_table()`` where ``dropna=True`` would not drop all-NaN columns when the columns was a ``category`` dtype (:issue:`15193`) - - -- Bug in ``pd.read_hdf()`` passing a ``Timestamp`` to the ``where`` parameter with a non date column (:issue:`15492`) - - -- Bug in ``Series`` constructor when both ``copy=True`` and ``dtype`` arguments are provided (:issue:`15125`) -- Bug in ``pd.read_csv()`` for the C engine where ``usecols`` were being indexed incorrectly with ``parse_dates`` (:issue:`14792`) -- Incorrect dtyped ``Series`` was returned by comparison methods (e.g., ``lt``, ``gt``, ...) against a constant for an empty ``DataFrame`` (:issue:`15077`) -- Bug in ``Series.dt.round`` inconsistent behaviour on NAT's with different arguments (:issue:`14940`) -- Bug in ``DataFrame.fillna()`` where the argument ``downcast`` was ignored when fillna value was of type ``dict`` (:issue:`15277`) -- Bug in ``.reset_index()`` when an all ``NaN`` level of a ``MultiIndex`` would fail (:issue:`6322`) - -- Bug in ``pd.read_msgpack()`` when deserializing a ``CategoricalIndex`` (:issue:`15487`) -- Bug in ``pd.DataFrame.to_records()`` which failed with unicode characters in column names (:issue:`11879`) - - -- Bug in ``pd.read_csv()`` with ``float_precision='round_trip'`` which caused a segfault when a text entry is parsed (:issue:`15140`) -- Avoid use of ``np.finfo()`` during ``import pandas`` removed to mitigate deadlock on Python GIL misuse (:issue:`14641`) - -- Bug in ``DataFrame.to_stata()`` and ``StataWriter`` which produces incorrectly formatted files to be produced for some locales (:issue:`13856`) -- Bug in ``StataReader`` and ``StataWriter`` which allows invalid encodings (:issue:`15723`) -- Bug with ``sort=True`` in ``DataFrame.join`` and ``pd.merge`` when joining on indexes (:issue:`15582`) - +- Bug in ``pd.melt()`` where passing a tuple value for ``value_vars`` caused a ``TypeError`` (:issue:`15348`) +- Bug in ``pd.pivot_table()`` where no error was raised when values argument was not in the columns (:issue:`14938`) - Bug in ``pd.concat()`` in which concatting with an empty dataframe with ``join='inner'`` was being improperly handled (:issue:`15328`) -- Bug in ``groupby.agg()`` incorrectly localizing timezone on ``datetime`` (:issue:`15426`, :issue:`10668`, :issue:`13046`) +- Bug with ``sort=True`` in ``DataFrame.join`` and ``pd.merge`` when joining on indexes (:issue:`15582`) -- Bug in ``SparseDataFrame`` construction with lists not coercing to dtype (:issue:`15682`) +Numeric +^^^^^^^ -- Bug in ``.read_csv()`` with ``parse_dates`` when multiline headers are specified (:issue:`15376`) -- Bug in ``groupby.transform()`` that would coerce the resultant dtypes back to the original (:issue:`10972`, :issue:`11444`) +- Bug in ``.rank()`` which incorrectly ranks ordered categories (:issue:`15420`) +- Bug in ``.corr()`` and ``.cov()`` where the column and index were the same object (:issue:`14617`) - Bug in ``.mode()`` where ``mode`` was not returned if was only a single value (:issue:`15714`) - -- Bug in ``DataFrame.hist`` where ``plt.tight_layout`` caused an ``AttributeError`` (use ``matplotlib >= 2.0.1``) (:issue:`9351`) -- Bug in ``DataFrame.boxplot`` where ``fontsize`` was not applied to the tick labels on both axes (:issue:`15108`) -- Bug in ``pd.melt()`` where passing a tuple value for ``value_vars`` caused a ``TypeError`` (:issue:`15348`) +- Bug in ``pd.cut()`` with a single bin on an all 0s array (:issue:`15428`) +- Bug in ``pd.qcut()`` with a single quantile and an array with identical values (:issue:`15431`) +- Bug in ``pandas.tools.utils.cartesian_product()`` with large input can cause overflow on windows (:issue:`15265`) - Bug in ``.eval()`` which caused multiline evals to fail with local variables not on the first line (:issue:`15342`) -- Bug in ``pd.read_msgpack()`` which did not allow to load dataframe with an index of type ``CategoricalIndex`` (:issue:`15487`) + +Other +^^^^^ + +- Compat with SciPy 0.19.0 for testing on ``.interpolate()`` (:issue:`15662`) +- Compat for 32-bit platforms for ``.qcut/cut``; bins will now be ``int64`` dtype (:issue:`14866`) +- Require at least 0.23 version of cython to avoid problems with character encodings (:issue:`14699`) +- Bug in interactions with ``Qt`` when a ``QtApplication`` already exists (:issue:`14372`) +- Avoid use of ``np.finfo()`` during ``import pandas`` removed to mitigate deadlock on Python GIL misuse (:issue:`14641`) From 1f8906078c723cc9b7000cdd552c03769cc4c5ca Mon Sep 17 00:00:00 2001 From: Tong SHEN Date: Thu, 30 Mar 2017 21:20:51 +0800 Subject: [PATCH 308/933] CLN: Fix a typo in comment (#15847) --- pandas/_libs/src/ujson/lib/ultrajson.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/src/ujson/lib/ultrajson.h b/pandas/_libs/src/ujson/lib/ultrajson.h index d0588348baa44..4f51fa8b3eb38 100644 --- a/pandas/_libs/src/ujson/lib/ultrajson.h +++ b/pandas/_libs/src/ujson/lib/ultrajson.h @@ -233,7 +233,7 @@ typedef struct __JSONObjectEncoder { int recursionMax; /* - Configuration for max decimals of double floating poiunt numbers to encode (0-9) */ + Configuration for max decimals of double floating point numbers to encode (0-9) */ int doublePrecision; /* From b6d405d695249980aa2f93d58998412b4b81dcf3 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Thu, 30 Mar 2017 16:42:23 -0400 Subject: [PATCH 309/933] TST: incorrect localization in append testing and when ``pytz`` version changes our tests break because of this incorrect (old) method, which works when you *dont'* have a tz change, but fails when the tz's actually change. Author: Jeff Reback Closes #15849 from jreback/localize and squashes the following commits: d43d088 [Jeff Reback] TST: incorrect localization in append testing --- pandas/tests/test_multilevel.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index fd5421abc89ad..5584c1ac6a239 100755 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -83,9 +83,9 @@ def test_append_index(self): # GH 7112 import pytz tz = pytz.timezone('Asia/Tokyo') - expected_tuples = [(1.1, datetime.datetime(2011, 1, 1, tzinfo=tz)), - (1.2, datetime.datetime(2011, 1, 2, tzinfo=tz)), - (1.3, datetime.datetime(2011, 1, 3, tzinfo=tz))] + expected_tuples = [(1.1, tz.localize(datetime.datetime(2011, 1, 1))), + (1.2, tz.localize(datetime.datetime(2011, 1, 2))), + (1.3, tz.localize(datetime.datetime(2011, 1, 3)))] expected = Index([1.1, 1.2, 1.3] + expected_tuples) tm.assert_index_equal(result, expected) @@ -103,9 +103,9 @@ def test_append_index(self): result = midx_lv3.append(midx_lv2) expected = Index._simple_new( - np.array([(1.1, datetime.datetime(2011, 1, 1, tzinfo=tz), 'A'), - (1.2, datetime.datetime(2011, 1, 2, tzinfo=tz), 'B'), - (1.3, datetime.datetime(2011, 1, 3, tzinfo=tz), 'C')] + + np.array([(1.1, tz.localize(datetime.datetime(2011, 1, 1)), 'A'), + (1.2, tz.localize(datetime.datetime(2011, 1, 2)), 'B'), + (1.3, tz.localize(datetime.datetime(2011, 1, 3)), 'C')] + expected_tuples), None) tm.assert_index_equal(result, expected) From a1086517818039dc4461a526b19c4b7c917b9afe Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Thu, 30 Mar 2017 17:02:14 -0400 Subject: [PATCH 310/933] COMPAT: add 0.19.2 msgpack/pickle files (#15848) * COMPAT: add 0.19.2 msgpack/pickle files * show error * add in 2.7 pickles --- .../0.19.2/0.19.2_x86_64_darwin_2.7.12.msgpack | Bin 0 -> 12325 bytes .../0.19.2/0.19.2_x86_64_darwin_3.6.1.msgpack | Bin 0 -> 119196 bytes .../0.19.2/0.19.2_x86_64_darwin_2.7.12.pickle | Bin 0 -> 127525 bytes .../0.19.2/0.19.2_x86_64_darwin_3.6.1.pickle | Bin 0 -> 125349 bytes pandas/tests/io/test_pickle.py | 3 ++- 5 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 pandas/tests/io/data/legacy_msgpack/0.19.2/0.19.2_x86_64_darwin_2.7.12.msgpack create mode 100644 pandas/tests/io/data/legacy_msgpack/0.19.2/0.19.2_x86_64_darwin_3.6.1.msgpack create mode 100644 pandas/tests/io/data/legacy_pickle/0.19.2/0.19.2_x86_64_darwin_2.7.12.pickle create mode 100644 pandas/tests/io/data/legacy_pickle/0.19.2/0.19.2_x86_64_darwin_3.6.1.pickle diff --git a/pandas/tests/io/data/legacy_msgpack/0.19.2/0.19.2_x86_64_darwin_2.7.12.msgpack b/pandas/tests/io/data/legacy_msgpack/0.19.2/0.19.2_x86_64_darwin_2.7.12.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..f2dc38766025e8b5d2d24d2587783c127686973c GIT binary patch literal 12325 zcmeHN3w%@689z7aBc*`p%N9YC7Ni}ku$J0Yx7oB5l<}}gahr^l+vc`Jng_Y5w$-gL zkk+XPDNM$6(l%*n3+e_Nn|oNbLna9B0Vp!9dpM@s=p0Jj9QAzXk)*dtn!X0?*ZcdO z@4K&@obP=9?{U6+>fI)bQL5VLGKvnVeov{@Y_m(U>@KOW%B4+?nojrH3Q?B#72wAX zlUZ5_Z@Z0-YMb;F`~>c0XRWlzt6XJvX&MbYy2KL<>A7GB6}ihE$BJ84qJTobd?8BN^8) z9>qAFzV?MM1TuFpKXlGpY?9q%HP-o#wT<3exnxy{;|*g23}-xo@kqusj7Kp(f$?a@ zCo(>XaXQ|PLNYWmV5-+8Fz&r{bX2RYa;aD$+T{&xfC1Sdnr)i^Fy#GCc-{NJeUDgC zDIJeJ5pnwLA%t3F+`S~%gR1K2WH61nWQWxj?#8{@9SC1YHi6CH9ipY2q7S<6u)`&T z7gamH%lcJJ9qpiHIr&dtNKg#q5L~8VeL>4%dIQEWz;7w9o6IIYdfIJ^wd8K8)S>u) z@;5u2g4!vBIaO+>3e#{V)so5zhslGB4l`U*soWry>}G`ApavhjgAPtwOBpHj+#p%!n6InWV(LE@;UQG3~ak) zRY1k)#WOc0V*jUq#(Ycy-gmL&0Pv+-FQ~Bp(?5~?@UoM!O%9of^nc;FhXHZw+UY#{ zi!T()+-{kRL&NsvW>b}9yd)@Mo`t%SBUCKxpayfeF`ITBJ+=j*<}$1G2s3EZY^=2P z7h-Vl)tJ|q-#D+apfQg+F1N>LQTIIz-HncIYYk1tOA|vpNe)YYNRs0(n1q((xXbL= z;nzV(+|+`zuhO;hw#~lLN*Pt~D>9CnMw3|x_8J+Mp2RpU#tk$*Zv5c3;g;8kHffs* zf@`bADBA5}HNA&}RSOB)lZ<<-&-5}xz<3R z;{Z!m3GE9Y_OX`D-uQex3vzWCaYlUHwrFNqqA_b=pVC%1#H0s`k1d>Xjzao57Dom2q7Rm{Ny z7_2uriusaeG_tHTl4U`51r3UK{7{O+w5#@$b*ViVt|#49;Xa_%^S(Ts7sQeb^i~Sv zX^iu%j3>p@*!>xd&te=g9?!U*@tKUL56t3*hLi^4x&kcJ$JBAPQLp8r@9}iSCeb35 z(m9iL`M~JMp&QsCIYU_}@NN)0l-JatJz4d5x-#rS3U`?vR~bSfWf!C)72WBkk#6{$|`yXPVgNT*@n1i zD-|6Z!K~+I*11cqMrmLnE+gBY!5>yl$5BR5>6hXv9B$Jwdg8H(=3x8GMS@Bl79J6) ziJA~Sanj_gv@x-9@d;NaCQZ3!>a=UG)9LidDbr_MKQlG$Q?ovue#7iJpUKFan{{LM zXLD}4`E#HD!Y#QvLtg&8g82(>y=~za7cE}0^!CDKcPuYjap#x5{FS>_es$H?igl~S zl2W5oR=&n`_u2}x#cErJSE7~o+*?&$bKm`6|3=`Etya5H zvP;GnXt7;^FJ}7laK>D2wVUv_S@`W9j8wR;Zf7U>vn#XV!P3RU>TxHndxy_DWqI7qy)L5C2ugX22&mpO#0)5%`1 zb%uJRvAYw*x?R}1bNm@Md7y~!belsWhC!SF+s&XQ;>Pku2Ghtf2Wfz+FQny4pcX!wWt`e(b4eVuT&y!8!oFePmYOBw-Gw+*hA6d-# zy0rQGS0nO{cMk;JeCX$G*OMP-AcXrT|KZ(%6J)De%=rTs&mGmHW{c?908BLnDg`x-8Ru8RcySR>t7A(a25Z$zZfGcM$>}<1#K~I5HzxIOUCJSJJR(a zjW#}Mnm%R5%(PkQv->YUP_Cgn3TX`dV5nDo5pL+0d=88SD_KQzEUY)DM2_Q5{kgQLQ`hQO|vcI|z_!sgcg{`cr0 zpFscZF#$n-{#^#e9keLaC#bc5^Y1(2k6c(QZcoLyBjD(;aG%g{EIKSaIE0G*_7A~h z{9^yrf8O_N<;xT8=M(O;F9Y_kpXbG4yF|P!_`dgi-{8QIQ2(&7?NI?i|3_(!&2sp! zQ;I_p6Zqx!kCes)26XZF`@gxbbAwy9_4f_`ep@FT`YCM#;>v45ctD_kY=H!Z#B}m$ z-_ie=Hel-EqoXh^x>cxuhwXF1yKRqZ6&!pBZZ`HK2Xn&1{#5dZ#9d}-Y?1oMUFzpG zV@Vm<`SZ&0y(av0c)mQ1gTk7|SBM(1Dc^hlB7H{3FsoZ|-8lUOGCIyx(JIzlm@#8#EejP*pd~V`BusXC4XG(pGx!`zyG5yUnmhFRm+cgjL3G!*}9~xgL7Q}WBKIP($jP~ggTeH76fj?hqBH}8| zk8A#o(P{4Y#D$F=6djw$u;{|5QedzbsguSy<=EH z&+#KcbHDeuVf%jcF7XHd_Bi+Ze^*Py|NVXM_(a9F@PBP0FdUbQ8_-L|-R>W0i5oZn zR}S&_??+kv(RRWpkWjY2-2O#5#Mg`XQkY*c=$B;>9})bwN@09L7y6^n|9*=Y|JyAB z|BV)LF5#yBERslQ34cD~gFB1=`5OLLQkqb%KW^EtQ<`v0LbiUn{fm_TK^^!jsZ5AL z{(UMR&HgaP)G(}2u2I+G9J(u2HY?Wib=LKWGyUDlhb-+EsrYvh#P=VuVO-pgZV7w3#RYcX_r!#^3HA5?+3kxT zq9XoEVI)-DpPdbcra#*t)e^rborEX3-x+fLN=g$N*YBk?;h2Ofme4#BLiAsx^bZIt? zfC;foLTr)*_NM_eff#6lDZq4KmV^q1{(O(W;QJGH^Z#N2PDmJ2fBsnV_Y3iVqv}sJ z5ra=cqfXfV&l>Sx>1qB4eu8A-|Aui?J)h9CMe+%@Sh)Fe|lz4cy|8x#^2zv(EI()LwWp& zyDVX5txls~1Vw+PEhMzQU)86$Ioj>Rn|tq&@kjn^23GxIR-ts+I!*s54fhE6x|1%Q@|7imLb;jbqJMAz# zp)Um!hUx?phU5ejhWZ2(hV=Kn<4=&_fBOFk=Sdj)6V8(`jWLd(h(AA%@q45F->4e@ zWT^I60t6y{ndFaa#6J?{J^ziW@yn|4%c4t&0u!RV6eZ$MlnP9U`VykLgeWl~3d{n> zWdn17Il)|DZZHp+7t9Cd2Md4&!9rkRun1TbqRVmUKd5SdBE0+?74GMK(LeKq@Q?IG z|M2+tTT`F^X&>^dJ_G*`3i6*F5r1SUVbn~J!2b1(kyRG_3EBHQn!l^^Z+h;?>X47w_-=`4VsJR9WBXQ2*As;s>X{Ka({%egf-{X0l=* zN8;zU;(mOe+v?D=dVjRX}0w!V-CSwYwVj8An24-RwW@8TKVjh+VON=GKl48lQ4p7tRPkhD~uJviekmE;#di+ zBvuM5jg`U5V&$;%SOu&iRtc+&Rl%xa)v)SV4Xh?s3#*OQ!Rli5u=-d7tRdD2Ym7C) znqtkc=2#2N2WyG>Vt$xE)(UHlwZQ_gwpcr?Jr;-sVZm4k)&UE}!mx0xBi0G)jCH}f zV%@OrSP!fx)(h*6^}+gL{jmPn0Bj&O2pfzI!G>bPu;JJUY$P@c8;y;@#$plJIBYyN z0h@?T!X{%=u&LNIY&tdrn~BZBW@C}q94rcp#$vF!*gR}Lwg6j*Ey5OKOR%NbGHf}v z0$Yi#!d7Ezu(jAaY(2IC+lXz#He*|`t=KkfJGKMciS5F6V|%c@*gkAOb^tqw9l{P{ zN3f&VG3+>Y0y~MF!cJpnu(Q}X>^yb>yNF%FE@M})tJpQ{I(7rQiQU3(V|TE-*gfn% z_5gc`J;EMiPq3%hGweC`0(*(Q!d_!^=4Y`-pwQK4V|7uh_TP;>U3UM>vU7 zIE^zni*q=S3%H0&xQr{fifg!z8@P#ExQ#owi+gw?JTaaGPl_kQljAAylz1vUHJ%1f zi>JfW;~DUbcqTkEo(0c}XT!7OIq;l#E<87$2hWS=!}H?>@Pc?Dyf9t_FNzn#i{mBm zl6Wb+G+qWTi@P>FJyfNMc zZ;Cg=o8v8TAG{^*i~HgJcq_a$-UbiA+v4r;_IMy3ga_jxcn3Tb55vRpj(8`$Gu{R7 zig&}i<2~@6crUy+-UsiC_rv?+1Mq?PAbc=B1RshI!-wM|@R9f^d^A1=AB#uekOuj1G6>-Y`)CVmUQjo-oV;`i|T z_yhbQ{s@1JKf#~k&+zB?3;ZSi3V)5i!QbNV@b~x!{3HGe|BQdZzvACw8$V7E1R}`T z=P8b9Cvp%uiCjc(A`g+5$VcQS3J?W}LPTMr2vL+MMieJX z5G9FHL}{W7QI;r2lqV_>6^TkjWugjEm8eEkCu$HiiCRQ$q7G4)s7KT%8W0VMMnq$x z3DJ~jMl>f{5I#gp!k6$P{E1dXYoZMiK(r;=5$%aUB8UhkLWmATC=o`46CH_8L}#K4 z(Us^%bSHWcJ&9gKZ=w&;m*_|ICk7A$i9y6*VhAym7)A^yMi3*3QN(Cs3^A67AjT2n zi3!9+ViGZ#m_ke?rV-PL8N^It7BQQMB<2uNL^KgY%q8X#^N9t-LShlIm{>wAC6*D( zi50|3VimEPSVOEO))DK84a7!b6S0}tLTn|r5!;C!#7<%tv76XK>?QUQ`-ua@LE;c` zm^eZlC5{oti4(+0;uLY3I76Hz&JpK{3&cg@5^5EVj&Q4v%W6+^{Q2~-l5LZwj| zR2G#(`Z zB46Z({81~^8nrVQH~7z#%nQ76VKs`|})Eo6d zeNjKu9}PeQ(I7M!4M9WEFf<&EKqJv8G#ZUTV^IVehsL7`Xd;?~CZj26Dw>9-qZw!> znuTVgNHhmUp=cC?=AwCMK3aelqD5#iT7s6MWoS8CfmWhbXf;}c)}nQ2J=%aaqD^Qs z+Jd&CZD>2%fp(%@XgAt}_M&}gKRSR8qC@B~I)aX(W9T?Kfli`R=rlTm&Z2YZJi34` zqD$y9x`M8vYv?+NA&P}G0GoD{QHxa`02Uip9=YTIHLfHpajaG z0;*u_di3{?4jP~dTA&R&V8RlX2iu9j#9$IIDVPjQ4yFK8f~mmNU>Yzjm<~)2#w`iN zeKN)q8yCfYGJ{#btY9`UJD3B^3FZQGgL%NbU_LNESO6>t76J={MZlt9F|asT0xSuZ z0!xEsz_MUDusm1+tO!;DD}z9BcvlfGt5^&=2$nTY;^?HedkQ7HkK$2Lr(%Fc=I0JAk2J7#I$A1UrG9!7gA| zup8JN>;d)!dx5>dK44$4AJ`up01gBPfrG&z;81WFI2;@Sjs!=6qroxYSTF({2aX3P zfD^$<;AC(LI2D`*P6uazGr?KlY%mg>14e<-U<^1HoCnSa7k~@FMc`s^3Ahwo1}+Cz zfGfdO;A(IUxE5Rot_L@O8^KNBW^fC*72F1H2X}xw!Cl~Pa1Xc_+z0Ll4}b^3L*QZX z2zV4c1|A1bfG5FI;A!v-cosYdo(C_07r{&5W$+4k6}$#s2XBBk!CT;M@D6wvya(O~ zAAk?RN8n@d3HTIz20jO0fG@#U;A`*=_!fKzz6U>mAHh%HXYdR775oM!theKM{0;&{ zAPG_+4Kg4Lav%>1pa@E!3@V@sYM>4ppb1)_4LYC;dSD_jF_;8Q3MK=SgDJq2U@9;* zmrUTQ18NiHSCNMLY1w^uzhF~MG zG1vrb3N{0qgDpTGuqEgV`hosnE3h@#1`Gh(g6+WeU?3O-27@7B2QU;21H-|NU?;FM z*ahqgb_2VEJ;0t|FR(Y*2kZ;>1N(ymz=7Z(a4DtBG&lwv3r2wB z!13S&a3VMfoD5C@r-IYK>EH}-CO8Y64Mu`pi~;9@^T7Gw0&pR?2wV&<0hfZy zz~$fya3#13Tn(-P*MjT7_233@Be)6N3~m9pg4@9D;0|ynxC`73?g96L`@sF+0q`Jr z2s{iP0gr;mz~kTv@FaK&JPn=!&w}T`^WX*WB6tbB3|;}Rg4e+7;0^F5cniD@-U07| z_rUw$1MngE2z(4a0iS};z~|r#@Fn;Pd=0(<--7SJ_uvQcBlrpY41NK>g5SW{g@NxM z93(&l{?lt#GMrZdRZs(U&;U))0&UO%UC;v)fr-H+U{Wv{m>f(2rUX-gslha0S}+}$ z9?Sq{1T%q|!7N}_FdLX1%mL;EbAh?RJYZfhADAC302TxbfrY^$U{SCbSR5f9;^UX1S^4+!75-?uo_q$tO3>pYk{@FI$&L}9#|i205$|0fsMf?U{kOe z*c@yD`hYD#U(gTq2U~%y!8Tw3*cNODwg&^jATSsV0Xu-9U>FzdGO7&sgp0gePmfuq4O;8-vM90!gECx8>d zN#JB~3OE&<22KZOfHT2a;A}7woC8LI(O?WX7n}#q2N!?~!A0O=a0$2+Tm~)&SAZ+Q zRp4rH4Y(Fu2d)PS;!9(C-@CbMm zJO&;IPk<-EQ{ZXv40ski2c8EnfEU3_;AQX%con<`UI%Z0H^E!rZSW3w7rY1F2OoeB z!AIa@@Co=7d#4fqy(2fhbCfFHq6;Aij)_!ayH#=aI1KmLORh(Hpg zKpJE~7UV!46hIM_Kp9j(71Tf-G(Z!yKpS*G7xchHU}7){m=sI~CI?f1DZx}=YA_9$ z7EA}G2Qz>f!AxLgFbkL!%m!u$bAUO)Twrc6511Fs2j&M0fCa%qU}3NbSQIP<76(g! zCBaf)X|N1f7Ayyr2P=RT!Af9dunJfetOiyGYk)PuT3~Ik4pCf2YY}$!Cqi*un*W5><9J-2Y>^?LEvC;2sjiR1`Y>DfFr?C;An6RI2Mcm$ARO) z3E)I<5;z&00!{^|fz!bm;7o89I2()v=YUaQG#CTU1?PeD!3E$#a1po|Tmmiymx0T{ z72ry66}TE)1Fi+vf$PBy;6`v0xEb66ZUwi2+rb^+PH-2v8{7l#1^0pb!2{qy@DO+y zJOUmCkAcU*6W~ek6nGju1D*xXf#<;s;6?Bfcp1C`UInj#*TEa$P4E_Y8@vPF1@D1B z*5ZHT<-Xjx{`Sj#>DAwQxvyG^*ugvY{T}RpFZccL<-Y&D-1oni`~LTG-~V3j``^ob z|1ZDXcON}K578s^7(GEx(KGZMy+AL~EA$$@L2uDJ^d5acAJHfD8GS)t(YM%vA14VC zkt9ixG|7-G$&oxMkRmCOGO3U%sgXKqkS1x7HtCQq>5+-Z#AFgODVdB+PNpDJlBvkl zWEwIpnT||PW*{??naIp!7BVZDjm%EwAajzr$lPQeGB25r%uf~|3zCJ%!ekM$C|Qgw zPL?1`lBLMfWErw7S&l4ERv;^qmB`9u6|yQ>jjT@AAZwDf$l7EbvMyPVtWP!|8GKQQ>&LiiO3&@4!B62aggj`B4 zBbSpa$d%+Oay7YzTuZJa*OME_jpQbBGr5J_N^T>!lRL%5N`50T3a1DP zQ6xoCG{sOX#Zf#ZP$DH!GNn)|rBOO%P$p$jHsw$*K#8eV0DV2;$PNkqyQmLra zR2nKRm5xeJWuP)rnW)TE7Ah;1jml2tpmI{VsN7T@Dle6f%1;%b3Q~or!c-BeC{>Iq zPL-faQl+TUR2ix)RgNl8RiG+Tm8i;86{;##jjB%7plVXJsM=H=sxDQJs!uhb8d8m@ z##9rkDbPPL$XsFsv39j(jnpP;Gqr`NItRI!m3S&Qlkti_|6RGIfQzN?oI_Q#Yua)Gg{Zb%(l3 z-J|YP52%OKBkD2rgnCLnqn=YQsF&0$>NWL-dP}{d-cui_kJKmXGxde~N`0d+8m9>w z(IidLG|kW~&Cxt9&>}6-GOf@mtrmDV>Z?PN$$#(y8dw zbQ(G>osLdVXP`6Endr=P7CI}Pjm}QzpmWl>=-hN3Ixn4%&QBMh3(|$?!gLY3C|!&$ zPM4rd(xvFqbQ!uVU5+kKSD-7>mFUWJ6}l>2jjm4Dpli~#=-PB0x-MOhu1`0h8`6#F z#&i?9Dcy{2PPd?a=$5oE?MM65t?1Tt8#;h)OShxj(}8pl9ZZMN9q3Rxj1H$e(w*qe zbQiiS-Hq-}_n>>yz3AR_AG$BykM2(opa;@}=)v?5dMG`N9!`&-N7AF{(exO4EFD3Q zqsP+|=!x_sdNMtQo=Q)nr_(d≠4rHXTXNp`++%I)v&$^e%cgy@%dQ@1ytA2k3+J zA^I?Vgg#0iqmR=k=#%s*`ZRrpK1-jY&(jy^i}WS>GJS=Lgw^ey@}eTTkF z-=pu-59o*VBl`ZfKAeoMcj-_sxHkMt+{GyR4BN`Ip<24@Hc zF(gAVG{Z0~!!bM~Fd`!{GNUjmqcJ*TFeYO$HsdfZ<1vYt#7q(7A7l`jmggBU~)3KnA}VrCNGnZ$A#9hguij0tBtGM$*t zOc$mr(~arQ^k8~2y_nuiAEqzUkLk}0UE{naWIKrZY2`nanI^HWSIrVWOC5CWe{I%wy&=3z&t>B4#nOgjvcg zW0o^3n3c>bW;L^hS<9?r)-xNJjm#!yGqZ)+%4}n{Gdq}_%r0g(vxnKs>|^#b2bhD* zA?7f1ggMF_V~#T?n3K#Y<}`DLIm?`5&NCO7i_9hFGINEw%3NcvGdGx<%q`|NbBDRh z++*%D515C{Bjz#lgn7z5W1cfFn3v2e<~8$%dCR#>R0#B35aDVvN<&Zb~fvZ>hA zY#KH#n~qJ-W?(b2nb^#17B(xJjm^&HU~{s$*xYO$HZPlx&CeEK3$lgS!fX+?C|isz z&X!>&N=Dt=QIV8#aJ#%eG_Nvw>_78_b5V9oSGdj16ZyvYpt@ zY!|jG+l}qc_F#Lmz1ZGtAGR;skL}M6U@IdUyNBJ&?qm0}2iSw` zA@(qPggwe0V~?{Z*puuj_B4BjJ@D^-dxyQt z-ed2x57>w7Bla=-gni0BW1q7x*q7`p_BH#4eapUM-?JaskL)M*Gy8@8%6?-p4(A9C zaU@4^G{`)8!mur%eCX$bAenC7tDol9k@^~j0@*Fa-F!& zTooj z@Fs8ZHt+B*@9~NF#C#GyDW8l_&Zpp0@~QaL zd>TG2pN>z@XW%pPnfT0n7CtMVjnB^K;B)f1_}qLRJ};k-&(9a&3-X2d!h8|FC|`^( z&X?dz@}>CFd>OthUyd)&SKur1mH5hh6}~E8jjztv;A`@=_}Y9OzAj&nug^E&8}g0# z#(WdLDc_85&bQ!w_?Emc@5lS|t@ze_8$N(<%eUj(^MQO2AIyjF9r#c_j1T8K@}2n3 zd>6he-;M9i_uzZ-z4+dIAHFZ&kMGY9;0N-9_`&=TekebTAI^{9NAjci(fk;GEFZy- z_U(2uK*Yg|rjr=BlGrxu3%5USh^E>#R{4RbszlYz;@8kFL2l#{h zA^tFbgg?q3=r8{xpAvKg*xv&+`}fi~J@2GJl1?%3tHJ^EddL{4M@Ae}})z z-{bG|5BP`tBmOb}gn!CE|gzzMt{2%;bfvY-g6pb5HQ2&P~Ow%`b^;0cL@#6l7wsgO)aE~F4r3aNzD zLK-2hkWNT1WDqh6nS{(j79p#UO~@|f5ONB+gxo?NA+L~6$S)KS3JQgU!a@R1hi(m4wPd6``t7O{gx^5NZmwgxW$Kp{`I*s4p}S8VZes z#zGUJsnASlF0>GQgqDJ@;3xPCt%TM>8zDewE3^~Z3xPtA5G;fU9fVLJOb8b`3Y~<` zLKmT{&`szr^bmRqy@cLEAEB?%Pv|cU5C#f^gu%iPVW==n7%q$uMhc^Z(ZU#EtPmlL z6UGY@go(l=VX`nqm?}&YrVBHKnZhh#wh$@I5u$`>Ax4-h%oFAd3xtKjB4M$xL|7^; z6P61rgq6Z7VYRSESSzd()(abijlw2jv#>?jDr^(B3p<3J!Y*OAut(S{>=X722ZV#d zA>puaL^vuO6OIcfgpeY8;Xs@ z#$pq(sn|?xF18ST#FnD3=qLJ%t;E)18!}L|iH^ z6PJrC#FgSIakaQcTq~{<*NYp(jp8P8v$#dvDsB_Ei#x=f;x2KwxJTS8?i2Tm2gHNo zA@Q(yL_8`U6OW50#FOGF@w9kGJS(0P&x;qti{d5mvUo+jDqa(>i#NoZ;w|yEct^Y| z-V^VO55$M!Bk{5LM0_ef6Q7GO#FyeL@wNCyd@H^a--{o_kK!lsv-m~)Dt;3&36}^7 zNu)$cw8TiP#7Vp)NTMW3vZP3=q)EDDNTy^-w&X~zeNR8A@{Rgfx5m88m26{)IJO{y-{kZMY`q}oy)sjgH{sxLK=8cL0% z#!?fhsnkqrF13(+q?VGey`0ibX|gm$nkr3`rb{!VnbIt2wiGGNk)ot%DMp$r&6DO!3#5h8B5AR-L|Q5> zla@;>q?OVtX|=RQS}U!S)=L|tjnXD*v$RFpDs7XtOFN{U(k^MYv`5-2?UVLP2c(12 zA?dJmL^>)Rla5O#q?6Jq>9ll4IxC%%&Px}hi_#_OvUEkdDqWMVOE;vO(kbVs@? z-IMN152T0EBk8g9M0zSclb%a2q?ghw>9zDmdMmw?-b){(kJ2aUv-Cy!Dt(hM8J7te z$)rrlw9Lq?%*nhg$f7LCvaHCetjW4;$fj(`w(Q8R?8%Ab#BvfjshmtsE~k)F%Bkek zavC|UoK8+JXOJ_>ndHoJ7CEb&P0lXokaNnp?ixnt>o5n8#zF3E4P!|%Ykx`94v>(9pq3sOb(Yj%AMrS zau>O)+)eH-_mF$az2x3>Dyfvz zN*X1tl1@plWKc3HnUu^*7A32aP06n0P;x4{l-x=lC9jfC$*&Yp3Mz$^!b%aPs8UQR zu9Q$pDy5XtN*SfBQcfwaR8T4^m6Xa#6{V_DO{uQbP--f*l-f!irLIy>sjoCp8Y+#H z#!3^VsnSeouC!2ml$MIG;-~m4t(4YE8zn$#tF%+vD}hRo60C$M9h6WdObJ&yDxH+h zN*AT8(oN~E^iX;#y_DWcAEmF-PwB4=PzEZ4l)=gnWvDVt8Lo^_Mk=F}(aIQQtP-J& zQ^qS3l!?kDWwJ6wnW{`vrYkd)naV6>wi2n#QKFP+B}SR6%v0to3zUV*B4x3%L|LjV zQ{IqD2b6=# zA?2`gL^-M)Q;sVql#|LS<+O4}Ijfvg&MOy`i^?VCvT{Yas$5g9D>syz$}Q!#a!0wV z+*9r=50r<>BjvI3M0u(_Q=Tg?l$XjY<+bugd8@or-YXxJkIEZys;#A*^XshUhpuBK2^s;Sh} zY8o}InodoxW>7P#nbgc`7B#DyP0g<6P;;ue)ZA(wHLsda&94?v3#x_G!fFw)@e z#%dF_soG3!uC`Ep)RwBR>Zkgvt<=_P8#O>}tF}|ytAT2e8mxw>9n?@YObu5%s-4u% zY8SPu+D+}Q_E3AOz0}@nAGNRAPwlS`PzS1m)WPZyb*MT_9j=a0N2;UL(drm=tQw(? zQ^%_l)QRdOb+S4|ovKb#r>is6nd&Tcwi>CMnJ+x<}os?o;=x2h@Y= zA@#6&L_Mk=Q;(}B)RXEd^|X3MJ*%Em&#M>Ii|QryvU)|ms$NsCt2fk}>MixQdPlvh z-c#?Z57dY1BlWTRM1870Q=h9Z)R*cj^|ks&eXG7x->V34c7<_ zX{1JJw8m(x#%a7JXrd-*vZiRNrfIrnXr^Xqw&rNA=4pwv#99(9sg_JjuBFgYYN@o; zS{f~_mQG8rWzaHenY7GW7A>omP0Oz3&~j?IwA@-AEw7eO%dZvC3TlP4!delns8&oX zu9eVAYNfQ&S{bdZR!%FgRnRJGm9)xQ6|JgPO{=ce&}wS6wAxx7t*%y2tFJZC8fuNS z##$4tsn$$uuC>s7w3eE$=BN2KG&C}*<3$%sWB5kp@L|dvY z)0S&1w3XT_ZMC*WTdS?p)@vKIjoKz{v$jRss%_J@Ydf@^+AeLkwny8m?bG&a2egCQ zA?>hsL_4Y-(~fH=w3FH??X-4AJFA`3&TAL6i`pgavUWwgs$J8rYd5r;+AZz2c1OFb z-P7)C544BcBki&FM0=_|)1GTDw3pf|?X~tsd#k7-8Sw9e?P&gr}^=%Oy^vaaZ=uIajN=%#Mzw(jVz?&*p2#Cj4vsh&(vuBXsb>Z$b9 zdKx{go=#7%XV5e1ne@zh7Coz;P0z09&~xg!^xS$LJ+GcmI3+jdR!g>+Cs9sDj zu9whD>ZSD3dKtZ}UQREsSI{f!mGsJb6}_rnO|P!k&}-_o^xAqIy{=wQudg@I8|sbp z#(ERIsoqR)uD8&A^p?7>?x*|ft@PG<8$Cd8tGCnJ>w$WZ9;}Dx9rRE=Ob^#P>Yen? zdKbN`-c9eW_t1Olz4YFCAHA>MPw%e}&W(fSyDtRA6{ z)5q%*^ojZ;eX>49pQ=yOr|UEHnffe!wjQa^(WCTeJw~6a&(r7Y3-pEhB7L#GL|>{e z)0gWj^p*N5eYL(uU#qXv*XtYfjrt~iv%W>&s&CV`>pS$F`YwI9zDM7y@6-3|2lRvb zA^os^L_ew@(~s*X^ppB2{j`2YKdYb9&+8ZTi~1$~vVKLss$bKu>o@e9`Yrvoen-En z-_!5w5A=unBmJ@dM1QJ3)1T`v^q2Z8{k8r^f2+UK-|HXrkNPM5v;IZ@s(;fl12+f* z8KglOw80px!5O?E7@{E=vY{BNp&7bi7^Yzvw&57A;TegH#6}V$sgcY`Zlo|$8mWxb zMj9inkX&B$)#Fmf8XjNC>ZBd?Lq$Zr%d3L1rs!bTCJs8P%) zZj>-e8l{ZVMj4~5QO+oDR4^(Um5j?^e}oFy^P*QAEU3)&**OqFa{cfjKRhbW2iCA7;cO(MjE4x(Z(2KtPx?1 zGsYVejETl1W3n;Dm}*QjrW-SinZ_(*wh?K}F`|rUBgU9(%roX23yg)vB4e?!#8_%9 zGnN}GjFrYJW3{oySZk~^)*Bm)jm9Qpv$4h4YHTyM8#|1h#x7&GvB%hJ>@)To2aJQp zA>*)d#5igkGmaZ4jFZMG3FYJ4*=6E_JH znWRaXw8@yP$(g(6wYl#AXsRshP}7Zl*9(nyJjx zW*RfCna)gaW-v3Fnas>)7Bj1v&CG7*Fmsx@%-m)kGq0J?%x@Mj3z~(@!e$Y(s9DS` zZk8}hnx)LrW*M`rS1Xnw`wf zW*4)o+0E>3_Aq;zz0BTbAG5F7&+KmwFbA50%)#ambErAY9Bz&);%<}P!$xyRgV?lbqB2h4-! zA@i_##5`&qGmo1m%#-FR^R#)!JZqja&zl#_i{>TsvU$b4YF;z1n>Wmx<}LHKdB?nK z-ZSr;56p+=BlEHO#C&Q#GoPC;%$MdX^R@ZLd~3cl-NoA9js6*%nG+UTAi%U zRu`+Q)y?W|^{{$cy{z6=AFHp`&+2atum)O#tije0Yp6BM8g7lSMp~n+(bgDitQBF6 zv&LH!tclhnYqB-Pnrcn6rdu|8_jn*b>v$e(AYHhQ&TRW_s)-G$ewa40P?X&h<2dsnE zA?vVp#5!snvyNLQtdrI$>$G*oI%}P?&RZ9(i`FIUvUSC}YF)FgTQ{tm)-CI{b;r7E z-Lvjn53GmQBkQsC#CmEyvz}Wote4g+>$Ua9dTYJ2-di86kJcyav-QRLYJIaX8@CA? z*`!U`w9VM8&Dp#y*rF}jvaQ&vt=YP5*rsjSw(Z!i?b(U!#C8%psh!MDZl|zQ+Ntc+ zb{adaoz6~gXRtHcne5DV7CWn*&CYJ;uyfkE?A&%9JFlJ3&Tkj63)+S3!gdk6s9nr1 zZkMo2+NJE$b{V^@UCu6VSFkJEmF&uP6}zfk&8}|Ouxr}2?Amr6yRKc&u5UN68`_QR z#Csol(OZnv;~?3T8#?PvSjt?brz8#};mYqzu8+ktkF9c+i#9qdp$%nr9Z+MVpq zb{D&=-OcW9_pp1~z3kq0AG@#J&+cyzum{?M?7{XBd#F9k9&V4YN7|$8(e@a7tQ}#G zv&Y*L?1}ayd$K*no@!6Cr`t2^nf5GuwjF8Dv7_v0JI0=C&$H*-3+#pVB73pD#9nGI zvzOZ|?3MN^d$qmBUTd$j*V`NHjrJycv%SUMYHzc*+dJ%?_AYz3y~o~b@3Z&Y2ke9P zA^Wg>#6D^tvya;+?34B>`?P(=K5L(|&)XO5i}oe^vVFzAYG1Rj+c)f+_AUFieaF6Q z-?Q)A5A28bBm1%a#C~c&v!B~9?3eZ{`?dYXervz8-`gMTkM<|~v;D>XYJama2X_bu zIiy25w8J>8!#TVoIHDstvZFYvqdB@`IHqGcw&OUi<2i|(#7+_?sguk}?xb*1I;ou0 zP8uhzlg>%+WN6{o6G&8hCxaB4cWoZ3zur>;}asqZv!8aj=f z#!eHbsng79?zC`xoR*HS$G#)JAqD+6YPXI9h^`n%n5foI-Q)( zP8X-E)6MDb^l*AQy`0`oAE&R=&*|?Ba0WVqoWafzXQ(sG8Sad5MmnRM(asoWtP|mk zbH+OpoQcjPXRTGkiJ3E}6&Ms%Sv&Y%%>~r=z2b_b> zA?L7j#5w95bB;SFoRiKe=d^RiIqRHr&N~;Ji_RtIvUA0`>RfZKJ2#x0&MoJ*bH};s z+;i?b51fb2Bj>U6#ChsGbDlddoR`ik=e6_3dF#A$-a8+hkIpCOv-8FI>U?uB7k3F4 zxui?Ew9B}x%elNOxS}h$va7hNtGT*sxTb5lw(GdA>$!>C#BLHdshiAA?xt{4x~bgM zZW=eOo6b$|W^gmQncU277B{P#&CTxSaC5r3+}v&+H?Nz|&F>a)3%Z5e!fp|_s9Ve} z?v`*%x~1IGZW*_%Th1--R&Xo2mE6j16}PHe&8_a%aBI4?+}ds(x2{{yt?xE)8@i3$ z#%>e0soTtL?zV7!+?KAd>*xBrt=!gb8#lmh>$Y>-yMbTYwlyF1*S?k;z?yT{$@?sNCM2i$}1 zA@{I*#69XBbC0_x+>`Dp_q2P)J?oxx&$}1gi|!@&vU|n7>RxlNyEojM?k)GWd&j-& z-gED}58Q|DBlofU#C_^MbDz5}+?Vbv_qF@Ree1q+-@6~&kM1Y;v-`#U>V9)E5BCTU zd89{qw8wa?$9cRbc%mnHvZr{er+K<(c&2B0w&!@R=Xr^|#9k6Fsh7-4?xpZjda1nB zUK%g0m(EM?W$-e3nY_$i7B8!p&CBlP@N#;&yxd+MFRz!+%kLHN3VMaS!d?-ts8`G@ z?v?OLdZoP5UKy{fSI#T%Rq!f$mAuMc6|bsS&8zO!@M?OsyxLwJudY|mtM4`N8hVYq z#$FSzsn^VF?zQlIyq2D?=jZu*t-RJ=8!y0X>$UURdx2h%7wm<29lTI4%nSEAdY!z^ zUKg*c*Ujth_3(Ony}aIDAFr?1&+G3E@CJH=yuscOZ>TrS8}5zpMtY;X(cTzutQX;p z^TvATUD3dpo?H-Y##qx5wM-?eq3~2fTyc zA@8tv#5?L8^NxEbyp!H3@3eQuJL{eE&U+WUi{2&gvUkP1>Rt1$dpEqB-YxI8cgMTy z-Sh5y54?xoBk!^I#Cz&J^PYPzyqEtUY4_A6Mi;j2I#1F;M;#|Ui?eFh?AW$#+qP}n zwr$(CZQJI0$9m5-))?RJckdiO;jF5StHM8pe-8f={x$qt`1kN1;XlKFh5ru!6aF{+ zUw9<(pNK4?h!7D~L=(|P3=t|~idZ7Hh$G^PFcDA07YRf{kw_#KNkmeSOoR&|3}FgO z*uoL6kirwbP$CdoM2O@fg-9tvWe^>hsY^%iQFQO z$Sd-R{Gxy;C<=+fqKGIeiizT)geWOWiPEBsC@ac|@}h#MC@P7{qKc?0s)_2NhNvlO ziQ1x$s4MD;`l5koC>n{zqKRlKnu+G3g=i^SiPoZxXe-)@_M(I6C_0JGqKoJ%x{2U~m@DRq`C@@sC>DvuVu@HPmWkzJg;*(8iPd6_SS!|v^5U9VMkAAv*~nsKHL@AmjT}Z!BbSle$YbO+@)`M!0!BfjkWttuViYxs z8O4nfMoFWTQQ9bDlr_p3<&6qPMWd2Y*{EVvHL4lajT%Nxqn1(IsAJSM>KXNo21Y}p zkSk+IlVVk|Y58Ox0o#!6$AvD#Q;tTomd>x~V@Mq`t)+1O%i zHMSYsjUC2LW0$ep*kkN9_8I$)1I9t)ka5^JVjMM&8OMzi#!2IpaoRXzoHfoF=Zy=- zMdOlj*|=g{HLe-gjT^>IncW-2qanZ`_OrZdx<8O)4kCNr~{#ms7EGqamH%$#N}Gq;(?%xmT|^P2_Cf@UGJ zuvx?`Y8Eq#nzfVChGrwPvDw6IYBn>Qn=Q}&Qj`R`By+Mk#hhwR zGpCy~%$epabGA9hoNLZA=bH=6h2|o2vAM)tYA!REn=8zf<|=cwxyD>;t~1x08_bR7 zCUdj7#oTIcGq;;N%$?>gbGNz2+-vSL_nQaIgXSUguzAEhY92F>nGq0OB%$w#d^R{`%yldVw@0$O*BU%4hk*z3Jh!xd} zW<|GRSfN%-E0z`8ietsK!mM~!d@F&K&`M+_wvt#$tz=fXB`m`-Ez7bk$8s%cd6sV} zE3mW`VI{XxSShVkR%$DamDWmUrMEIz8LdoKW-E)8)yigNw{loHtz1@aE02}e%4g-b z3RnfLLRMj`h*i`oW)-(eSS77eR%xq@Rn{tJmA5Kb6|G8EWvhx+)v9Jyw`y25ty)%X ztBzIIs%O==8dwdjMpk31iPh9Sy)023P~FLDpbvh&9w2W(~JSSR<`b)@W;tHP#wujkhLP6RkyCBTx@XyP!<`e#M5|Fa|8 zQS1;qsvXUaZpW}g?U;5fJGLFij%$b6@$C3^0z09d$WCl0v6I@#>~LGyhHcuGZQG9R z+S2xH-&S^DYdgYDZl|zQ+Ntc+b{adaoz6~gXRtHcne5DV7CWn*&CYJ;uyfkE?A&%9 zJFlJ3&Tkj63)+S3!gdk6s9nr1ZkMo2+NJE$b{V^@UCu6VSFkJEmF&uP6}zfk&8}|O zuxr}2?Amr6yRKc&u5UN68`_QR#Csol(OZnv;o+O6!?b{o5`-Og@rcd$F!o$Stb z7rU$7&F*gZuzT9Q?A~@CyRY5P?r#sU2ik+|!S)b)s6EUcZjZ1>+N12z_85DtJn zZlADE+NbQ(_8I%Eea=2_U$8IQm+Z^-75l1v&Ax8muy5M8?A!Jo`>uV@zHdLUAKH)X z$MzHZsr}4;ZojZ!+OO=__8a@H{my=Gf3QE=pX|@}7yGOI&Hirxuz%XW?BDhu`>*}a zj^zC3M0TP$Ax=~$niJiL;e33K8(@tp)tLMM@v*h%6fb&@&Zj&Kae zbS%eq9LIH}<2k;goWRjegp=G!;iPm@IjNmAPFg3OlitbTWOOn)nVl?7RwtX2-O1tP zbaFYlojgunC!dqwDc}@z3OR+HB2H1Km{Z&-;gobrIi;O4PFbg%Q{JiIRCFpim7OY1 zRi~O$-KpW!bZR-ZojOikr=C;aY2Y+;8aa)fCQehQnbX{9;k0yGIjx;GPFts))86Ue zbaXm7ot-XDSErlP-Ra@P;m7CDQZCC*Z3nX}wk z;jDC4IjfyD&RS=kv)j*Ip7?04mpRN zBhFFhm~-4Y;hc0%Ij5a7&ROT2bKbe&Ty!oumz^ulRp**>-MQi1bZ$AfojcB5=bm%l zdEh*B9yyPlC(cvnne*Iv;kq^&ieOI}GtKA4UxtqdG>85g1yJ_6CZaO!;o59WKW^yyTS=_8{ zHaEMQ!_DdDa&x6UU!yJg(6ZaKHSTfwd9 zR&p!5Rotp>HMhE3!>#Goa%;PF+`4W(x4zrJZRj>~8@o;1rfxI0x!b~R>9%rPyKUUI zZacTV+rjPVc5*wrUEHp2H@Can!|mzza(lad+`eu5g(oyJOt3?l^b6JHegkPI4!^Q{1WUG8^5DyKCIF?mBn9yTRS)ZgMxfTimVgHg~(b!`HxO>7q>7H^=yJy_9?m73od%?ZvUUDzHSKO=aHTSxE!@cR=a&NnL z+`H~Q_rCkUeds=NAG=T7r|vWNx%>ArGbyKmgL?mPFr`@#L_esVv%U)-R+u$>=hM43#luEE!wIk#S|1j3?vE1TvvaBooUd zGO0`^!=;dhG^HhN=}1>f=}BKI8AvT7WOA88rj)586BWN}$SmXxJrX<0^=mE~l4SwU8mm1JdE zMOKy7WOZ3X)|9nmZCOXwmGxwO*+4dwjbvllL^hSpWOLaMQ)Yb-L>`sL{fH7sU(lqI%K1 z=w1vj)QjoG@?v{&ytrPN7tf3DCGZk@iM+&K5-+Kj%nSE~XLzP(dA8?xt|vXu^F8GS zp7tWV7P zub@}RE9@2Vih9Mo;$8`_q*ux-?UnJ$dgZ+GUInkBSIMjFRq?8N)x7Fn4X>tG%d73x z@#=c@y!u`Puc6n-YwR`gntIK==3Wc0rPs=9?X~gRdhNXSUI(wE*U9Vbb@94--MsEz z53i@!%j@m+@%noGy#C$*Z=g5G8|)47hI+%i;ob;uq&LbN?TzuqdgHwD-UM%=H_4mq zP4T9B)4b{43~#14%bV@Z@#cE-y!qY&Z=tuyTkI|ImU_#)<=zT!rMJpk?XB_Fdh5LP z-Ue@@x5?Y=ZSl5x+q~`G4sWNo%iHbk@%DQAy#3w*@1S?cJM10tj(W$ulC-f8fiTxygQa_m=?hD`WP2ciu-|<~v z`kwFm$`5?)NBGJ86n;uSm7m&A?!e~>@eAL0-7hxx<(5&lSjlt0=Zn`Q!Zw z{zQM0KiQw+PxYty)BPF#On;U?+n?jl_2>EX{RRF)f04h~U*a$Im-);675++pmA~3w zPJfrb+u!5w_4oPv{R93%|B!#!KjI(tkNL;_ z6aGp6lz-Ykgff(=EM+T4 zxk@Te`AVrkX%(T8s}w4wN~Kb(G%Br1r_!qoDx=DzGOH{qtIDRbs~jq)%B6CvJSwls zr}C=;s-P;Q3acWjs4Aw4s}icDDy2%RGODa9r^>4es-mi-Dyu50s;Z`{s~W1Ns-^r`oFys-xZN+CKB}+kr~0b_YM>gV2CE@zs2Zk*s}X9X8l^_7F>0(Dr^c%ZYNDE?CaWoGs+y*z zs~Kvhnx$r|Iclz&r{=2#YN1-B7ON#{samF%s}*XcTBTO2HEOL|r`D?tYNOhuHmfaa ztJYzHL4yz;Ts5+*Os}t&^I;BplGwQ55r_QSj>Y}=& zE~_i*s=B7Gs~hU3x}|QbJL;~wr|zo<>Y;k19;+wnsd}cKs~75}dZk{gH|ni=r{1d% z>ZAIkKC3V4tNNzCs~_s8`lWuWKkBdgry>Ra1(Aa&K}Zlah!#W-Vg#W<%pg_}JBSm+ z4Z?zWLHr;=kT6ITBo2}UNrPlTcpw5JFas;F11E3;8F+yos2~V*5D_E~QUoc3R6*(> zO^`N77o-m|1Q~-&LFOP!kTu8_WDjx#IfGn5?jTQ)H^>*{4+;bYgF->!ph!?OC>9hC zN(3c?QbFmUOi(r`7nBbw1QmlyLFJ%IP&KF)R1azdHG^6~?VwIjH>elX4;lmwgGNE) zph?g)XcjaNS_CbFRzd5aP0%)I7qkyL1RaA;LFb@L&^725bPsw2J%e6B@1Rf6H|Q7i z4+aDSgF(UIU`Q}D7#0i|jnXH<%a94;BOqgGIsOU`envSQab~Rs<`9Rl(|DO|Ujt7pxC91RH}*!RBB~ur=5g zY!7w>JA+-p?qE-_H`o{K4-NzegG0gL;7D*ZI2IfaP6Q`|Q^D!rOmH?h7n~0+1Q&x# z!R6pea5cCVTn}ypH-lTj?ch#uH@Fwv4;};$gGa&R;7RZ_cosYlUIZ_LSHbJxP4G5& z7rYNX1RsM>!RO#h@HO}rd=GvEKZ9Sv@8D1HH~1Gs(*NnmI*Ja_QFSyOUB}R&I;M`L zW9v9Nt`5`jbbOsaC)9~_Vx2@M)yZ_Y7TVCJwzRDs?P{q#?Q5k2t#yP>u2blgI+aeX z)9AE1oldVa=!`m(&aAWOtU8;{u5;*|I+xC^^XR-fpU$rf=z_YCF06~_qPmzau1n~W zx|A-h%jmMYoGz~`=!&|MuB@x*s=Au4u50L;x|Xi3>*%_=p02MO=!UwHZmgT=rn;GK zu3PAqx|MFN+vv8soo=r?=#ILR?yS4$uDYA*=z)5W9;}Dx zp?a7eu1DyRdXyfm$LO(ooF1~ulk$*u7BvC`j`H# z|LDK^pNsJnOoM4L9j3<&m=QB!X3T_y7RM4;5=&ueEQ4jS9G1rlSP?5>Wvqf#u^Lv#8dwu+VQs8~ zb+I1S#|GFC8)0K?f=#g*Hpdp&5?f(wY=dpF9k#~~*bzHnXY7Jqu^V>B9@rCmVQ=h% zeX$?*#{oDH2jO5GfxDhwuX54~XaT{*O9k>&B;cnc6 zdvPD`#{+l}58+`vf=BTf9>)`S5>Mf2JcDQP9G=Guco8q*WxRq{@fu#o8+a3M;cdKw zckv$H#|QWjAK_zsf=}@oKF1gM5?|qKe1mWC9lpm8_z^$hXZ(U+@f&`}ANUi0;cxtd zfAJsw&)Dq$d;ei%jDjH;6{BHvjDevT6Jud)jDvA84C7&ZOn?b75hlhYm=u#?I0`h- zLqLqPRxb5F%Ra& ze3%~#U_mT|g|P@0#bQ_-OJGSXg{83!mc?>d9xGr)tb~=Z3RcBxSRHF%O{|5ru@2V7 zdRQMDU_)$#jj;(f#b($XTVP9Ug{`p-w#9bX9y?%1?1Y`M3wFhB*d2RdPwa)gu@Cme ze%K!e;6NONgK-EB#bG!cN8m^tg`;r{j>T~}9w*>LoP?8c3QomoI2~u;Oq_+YaSqPK zc{m>z;6hx4i*X4q#bvl0SKvxqg{yH5uElk@9yj1d+=QEP3vR`2xE*)kPTYmNaS!gr zeYhVF;6Xfuhw%s=#bbCJPvA*Bg{Schp2c%`9xvcUyo8tW3SPx)cpY!xO}vG-@eba_ zdw3ro;6r?bkMRjU#b@{&U*Jo8g|G1qzQuR=9zWnm{DhzJ3x36K_#J=XPyB_y@elsR zfA~LB^Z(m_jEqq*1fya!jE*rd6k}p6jE!+HE{0(|jE@O0Atu7am;{qzG7Lw72AXK0 zjSjjf(L)~<2B%!rvVGiJf8m<_XI4$O(UFgNDGyqFL3 zV*xCPg|ILd!J=3Ui(?5aiKVbKmcg=E4$ET&tcaDcGFHK=SPiRV4XlZ^ur}7gx>yhE zV*_l6jj%B`!KT;@n_~-XiLJ0Tw!ya84%=e~?1-JPGj_qQ*bTd55A2D(us8O>zSs}@ z;{Y6pgK#ho!J#+|hvNtwiKB2dj=`}w4#(pJoQRWfGETv%I1Q)c44jFxa5m1txi}B! z;{sfWi*PY6!KJtim*WatiK}omuEDjq4%g!b+=!cSGj74HxDB`C4%~^ma5wJ3y|@qe z;{iN~hwv~S!J~K#kK+kEiKp;1p24$t4$tESyoi_ZGG4)}cnz=P4ZMlB@HXDTyLb=p z;{$w%kMJ=*!Ke5PpW_RBiLdZAzQMQn4&UPk{D`0MGk(FZ_zl0~5B!P0@HhU!zxWUT zzYqQYMfu<7fXEmHLog~v!{`_TLop`C!q^xG<6;=b!}yp06JjDvj7cylCc|(PXrPG} z+UTH*5Js)Gh-IairFwb=D?ho3v**0%!~Oj zKNi4(SO^Pa5iE+uusD{$l2{5$V;L-q<*+*1(!r3u|K?tc&%q zJ~qIH*a#bA6KsmjusOECme>kgV;gLX?XW#|z>e4nJ7X8@irug~_Q0Ol3wvW9?2G-d zKMufwI0y&h5FCoba5#>@kvIxR;}{%^<8VAqz==2sC*u^Hiqmj9&cK;C3uogToQv~t zJ}$t8xCj^H5?qSQa5=8PmADF5;~HFx>u^18z>T;GH{%xEira8I?!cY63wPrl+>85g zKOVq?cnA;U5j={=@Hn2plXwbG;~6}Q=kPpUz>9bZFXI)wir4Tu-oTr93vc5cyo>kn zK0d&Q_y`~46MTx#@HxJ~m-q@_;~RX7@9;f-z>oL|KjRntir?@%{=lF33xDGu{EPoE zQV8$=7#X8r2u8(d7#(9^D8|HC7#rhYTnxi_7#|a0LQI5-F$pHcWEhSD4K&e08y$2} zqK7^z3{YbPCdU+*5>sJnOoM4L9j3<&m=QB!X3T_y7RM4;5=&ueEQ4jS9G1rlSP?5>Wvqf#u^Lv#8dwu+VQs8~b+I1S#|GFC z8)0K?f=#g*Hpdp&5?f(wY=dpF9k#~~*bzHnXY7Jqu^V>B9@rCmVQ=h%eX$?*#{oDH z2jO5GfxDhwuX54~XaT{*O9k>&B;cnc6dvPD`#{+l} z58+`vf=BTf9>)`S5>Mf2JcDQP9G=Guco8q*WxRq{@fu#o8+a3M;cdKwckv$H#|QWj zAK_zsf=}@oKF1gM5?|qKe1mWC9lpm8_z^$hXZ(U+@f&`}ANUi0;cxtdfAJqiic0@6 zGDg7=jEd1PI>x|IjES)@HpaoY7>4mMJ|@6~mjwP@pmcr6l2FqeOERPkiB38o6SOu$MHLQ*`uqM{R+E@qcVm+*n4X`0L!p7JH zn_@F;jxDeyw!+rf2HRpgY>yqVBX+{h*af>{H|&l*uqXDy-q;8GVn6JU18^V?!ofHM zhvG0Cjw5g+j>6G62FKz!9FG%lB2L1|I0dKTG@Onza3;>e**FL1;yj#>3veMW!o|1* zm*O&9jw^5_uEN#02G`;`T#p-YBW}XYxCOW3Hr$Roa3}7<-M9z$;y&Du2k;;s!ozq3 zkK!>rjwkRWp2E|32G8O-JdYRfB3{DFcm=QGHN1{D@Fw2E+js}>;yt{N5AY#A!pHao zpW-uojxX>fzQWh|2H)a4e2*XSBYwiq_yxb>H~fx2@F)Jl-}ndr;y;WOjs9a~jDjH; z6{BHvjDevT6Jud)jDvA84C7&ZOn?b75hlhYm=u#?I0`h-LqLqPRxb5F%Ra&e3%~#U_mT|g|P@0#bQ_- zOJGSXg{83!mc?>d9xGr)tb~=Z3RcBxSRHF%O{|5ru@2V7dRQMDU_)$#jj;(f#b($X zTVP9Ug{`p-w#9bX9y?%1?1Y`M3wFhB*d2RdPwa)gu@Cmee%K!e;6NONgK-EB#bG!c zN8m^tg`;r{j>T~}9w*>LoP?8c3QomoI2~u;Oq_+YaSqPKc{m>z;6hx4i*X4q#bvl0 zSKvxqg{yH5uElk@9yj1d+=QEP3vR`2xE*)kPTYmNaS!greYhVF;6Xfuhw%s=#bbCJ zPvA*Bg{Schp2c%`9xvcUyo8tW3SPx)cpY!xO}vG-@eba_dw3ro;6r?bkMRjU#b@{& zU*Jo8g|G1qzQuR=9zWnm{DhzJ3x36K_#J=XPyB_y@elsRe;6q`{l~}{1w$|@M#JbB z14A(;#=_Vb2jgNG#>4oS025*&OpHk|DJH{k6lkD{7TV~bixNHbQDJ}@BQQCpz?7H@ zQ)3!Ti|H^uX26V?2{U6B%!=7CJLbTgm;O(V-YNh#jrS*z>-)B zOJf-us$}xhS&%jV-swO&9FJPz?Rqw zTVoq+i|w#IcEFC<2|HsK?26s6JNCey*b94OAMA_$us;sKfj9^U;}9H*!*Do`z>zo# zN8=bAi{o%SPQZyc2`A$eoQl(MI?lkEI16Xv9Gr{ua6T@;g}4Y8;}Tqo%Wyfaz?HZP zSK}I7i|cSbZorMW2{+>w+=|<9JMO@pxC?jV9^8xja6cZvgLnuJ;}JZH$M86wz>|0i zPvaRpi|6n>UcifZ2`}Rnyo%TGI^MvWcnfdi9lVS8@IF4khxiB|;}d*}&+s|Ez?b+6 zU*j8mi|_C~e!!3T2|wc({EFZ3JO03*_zQpIAN-5|Fj5TqkC8D7hG0~ThS4zwhGI;N zg|RUX#>Fsc!U~)`>DKQnM#x$4~ z(_wndfEh6pX2vX-6|-S>%z-&E7v{!1m>2V5ek_0mu@Dxmq=6{}%&tbsML7S_f(SQqPIeQbaYu@N@LCfF34VRLMOEwL50#x~d% z+hKd`fE}?DcE&E)6}w?~?14S87xu^NPR1!X6{q2JoPjfO7S6^wI2Y&Pd|ZGFaS<-YCAbuq;c{GoD{&RB#x=MW z*Wr5HfE#fWZpJOR6}RDb+<`lB7w*PAxEJ@~emsB&@em%yBX|^#;c+~HC-D@X#xr;p z&*6EzfEV!+UdAhU6|doSyn#3I7T(4?co*;CeSClq@ew}8C-@Yf;d6X}FYy(=#y9vD z-{E`wfFJP_e#S5O6~Ezk{DD957yiaS_!s|Sq)_^gkueH}U{s8T(J=;wVoZ#Mu`v$D z#W0MA@i74=#6*}FlVDOzhT$mCKoc#r(Lom_dg!CV05wKna!i3KF%_o9G?*6CVS3Df z88H)P#w?f>vtf43fjKc3=Egjj7xQ6$EPw^E5EjNFSQLw4aV&u)u@siZGFTSNVR@{8 z6|oXl#wu79t6_DlfiY z6LAtw#wj=zr{Q#*firOy&c-=77w6%8T!0I45iZ6hxD=P+a$JEcaTTt{HMkbn;d@fE(tH~1Fc;d}gm zAMq1@#xM94zu|ZMfj{vV{>DG}7yn_TnDiebV-yU*s2B~SV+;(%m>3IVV;qc&VHgkN zV**Twi7+uH!K9cB!%?7tCR%8tgDy(+&_{&F!wSOQC8DJ+d;uq>9t@>l^YVkNAM zRj?{n!|GTAYho>|jdidt*2DVP02^W>Y>Z8?DK^9A*aBN(D{PHzur0R3_SgYCVkhj3 zU9c;5!|vDvdtxu_jeW2$_QU=-00-hA9E?M7C=SEnI08rFC>)Jra4e3)@i+k|;v}4m zQ*bIy!|6B!XW}fJjdO4=&cpe*02ksST#QR_DK5k1xB^$=DqM|ga4oLG^|%2y;wIdT zTW~9G!|k{Ocj7MGjeBq}?!*0f01x6JJd8*1C?3P(cmhx2DLjp5@GPFg^LPO-;w8L{ zSMVxc!|QkhZ{jVyjd$=a-oyL&03YHbe2h=_xJ%n;wSu! zU+^n_!|(V5f8sCvjeqbj{=-PI=s!lrC>VlKF&ak47#NB%F&4(gI2aehFdoLo1eg#L zVPZ^zNii9Qqd)^qw9rNeU6kmdj|v0S7=g(#1*XJQm>SbyT1i(0EQZCg1eU~7SQ^Vw}aN>~}IU{$P! z)v*TF#9CMz>tJ21hxM@mHpE8Q7@J^IY=+IT1-8Ui*c#hlTWp8zu>*F*PS_c{U{~yh z-LVJu#9r7N`(R(}hy8H?4#Yt?7>D3c9EQVj1dhZ}I2y;`SR9AraRN@nNjMp&;8dK3 z({TpQ#925S=ipqNhx2g(F2qH+7?_uyXKhx_pW9>ha<7?0plJch^d1fIlGcpA^(Sv-g5@d94NOL!Tt;8nba z*YO74#9Me9@8Dg$hxhRTKEy}(7@y!%e1^~Q1-`^r_!{5fTYQJ_@dJLuPxu+X;8*;H z-|+|j#9#Ou|KMNzhmm5_e~gS#Fa)DwG>nchFcf2AER2nDFfN8+JdBSCFd-(w#Fzw= zVloUzfd-mrp^Xl@DA7Y76$Yp=0+VA3Oo^#5HKxI|m=4op2F!?=Ff(Sste6e6V-C!T zxiB~8!MvCc^J4)lh=s5)7Qv!e42xq4EQzJCG?u}#SPsi$1+0jburgM`s#p!HV-2i{ zwXinU!Ma!v>th3Kh>fr@Ho>OY44Y#MY>BO~HMYUF*bduc2keNQurqeSuGkH`V-M_! zy|6d-!M@lJ`{Mu{h=Xu24#A-~42R*ZsI1b0-1e}PIa57H8sW=U%;|!dM zvv4-f!MQjO=i>rgh>LJBF2SX^442~yT#2i2HLk(6xDMCj2Hc37a5HYft+)-h;||=3 zyKpz|!M(T-_u~OPh==en9>Jq{43FapJc+09G@ik;cn;6w1-yut@G@S(t9T8s;|;ut zx9~RJ!Mk`5@8bh}h>!3wKEbE>44>l*e2K5{HNL^O_zvIW2mFYi@H2kFulNnW;}86a zzwkHy!N2$qBgLWr7#X8r2u8(d7#(9^D8|HC7#rhYTnxi_7#|a0LQI5-F$pHcWEhSD z4K&e08y$2}qK7^z3{YbPCdU+*5>sJnOoM4L9j3<&m=QB!X3T_y7RM4;5=&ueEQ4jS9G1rlSP?5>Wvqf#u^Lv#8dwu+VQs8~ zb+I1S#|GFC8)0K?f=#g*Hpdp&5?f(wY=dpF9k#~~*bzHnXY7Jqu^V>B9@rCmVQ=h% zeX$?*#{oDH2jO5GfxDhwuX54~XaT{*O9k>&B;cnc6 zdvPD`#{+l}58+`vf=BTf9>)`S5>Mf2JcDQP9G=Guco8q*WxRq{@fu#o8+a3M;cdKw zckv$H#|QWjAK_zsf=}@oKF1gM5?|qKe1mWC9lpm8_z^$hXZ(U+@f&`}ANUi0;cxtd zfAJqiic9}7GDg7=jEd1PI>x|IjES)@HpaoY7>4mMJ|@6~mjwP@pmcr6l2FqeOERPkiB38o6SOu$MHLQ*`uqM{R+E@qcVm+*n z4X`0L!p7JHn_@F;jxDeyw!+rf2HRpgY>yqVBX+{h*af>{H|&l*uqXDy-q;8GVn6JU z18^V?!ofHMhvG0Cjw5g+j>6G62FKz!9FG%lB2L1|I0dKTG@Onza3;>e**FL1;yj#> z3veMW!o|1*m*O&9jw^5_uEN#02G`;`T#p-YBW}XYxCOW3Hr$Roa3}7<-M9z$;y&Du z2k;;s!ozq3kK!>rjwkRWp2E|32G8O-JdYRfB3{DFcm=QGHN1{D@Fw2E+js}>;yt{N z5AY#A!pHaopW-uojxX>fzQWh|2H)a4e2*XSBYwiq_yxb>H~fx2@F)Jl-}ndr;y;WO zM*lG~M!^t_iqSAS#=uaFiLo#?#=*E4hVd{yCcuQ42oqxxOp3`c90eL^qJ=g(=%Pdq zeN-5r#t2M~DKI6b!qk`s(_%VIj~Or{X2Q&v1+!u{%#JxQC+5Q3mKFp5=upkz~ z!dL{0Vlga^C9oux!qQj<%VIe!j}@>YR>I0y1*>8;td2FXCf35*SO@E3J*SeNC+@=CxCi&*KHQH7@E{(- z!*~Rb;xRmqC-5Ym!qa#L&*C{ej~DPFUc$?G1+U^YypA{UCf>r^cn9y|J-m+(@F70J z$M^)F;xl}XFYqP4!q@l)-{L!bk00&yZK`exYu?QB$ zVptqYU`Z^6rLhc_#d264D_}*egq5)hR>f*q9cy4stcA6)4%WqbSRWf;Lu`bNu?aTC zX4o8CU`uR;t+5TZ#dg>pJ77obgq^VqcExVk9eZF;?1jCt5B9}=*dGVrKpcdFaR?5@ zVK^K|;7A;Wqj3z5#c?Js)Gh-IairFwb=D?ho3v**0%!~OjKNi4(SO^Pa5iE+uusD{$ zl2{5$V;L-q<*+*1(!r3u|K?tc&%qJ~qIH*a#bA6KsmjusOEC zme>kgV;gLX?XW#|z>e4nJ7X8@irug~_Q0Ol3wvW9?2G-dKMufwI0y&h5FCoba5#>@ zkvIxR;}{%^<8VAqz==2sC*u^Hiqmj9&cK;C3uogToQv~tJ}$t8xCj^H5?qSQa5=8P zmADF5;~HFx>u^18z>T;GH{%xEira8I?!cY63wPrl+>85gKOVq?cnA;U5j={=@Hn2p zlXwbG;~6}Q=kPpUz>9bZFXI)wir4Tu-oTr93vc5cyo>knK0d&Q_y`~46MTx#@HxJ~ zm-q@_;~RX7@9;f-z>oL|KjRntir?@%{=lF33xDGu{EPoEQUdyqkueH}U{s8T(J=;w zVoZ#Mu`v$D#W0MA@i74=#6*}FlVDOzhT$mCKoc#r(Lom_dg!CV05wKna!i3KF%_o9 zG?*6CVS3Df88H)P#w?f>vtf43fjKc3=Egjj7xQ6$EPw^E5EjNFSQLw4aV&u)u@siZ zGFTSNVR@{86|oXl#wu79t6_DlfiY6LAtw#wj=zr{Q#*firOy&c-=77w6%8T!0I45iZ6hxD=P+a$JEcaTTt{ zHMkbn;d0vz>oaI z&-}u#{KoJ6!Jqua-~7YB{Ko*1^q&D4h=Cb|K^cs}8G<1hilG^XVHu9$8G#WQiIEwF zQ5lWV8G|tyi?JDpaT$;CnScqIh>4kmNtukvnSv>qim91~X_=1cnSmLZiJ6&&S(%O5 znS(i*i@BMHd6|#-S%3vuh=o~%MOlo+S%M{5ilteGWm%5pS%DQh8VP1%gi*@7+Eimlm(ZP||P*?}F|iJjSnUD=J@*@HdVi@n*0 zec6xwIe-H>h=VzVLphAYIf5fOilaG(V>yoFIe`;7iIX{nQ#p;(IfFAfi?cb0b2*Rm zxqu6~h>N*|OSz28xq>UXimSPXYq^f=xq%zGiJQ5FTe*$fxq~~oi@Ujpd%2JMd4LCb zh=+NEM|q6Ld4eZ-il=#oXL*k2d4U&siI;hWS9y)sd4o53i??})cX^NZ`G61kh>!V% zPx*|``GPO`im&;GZ~2bz`GFt#iJ$p}U-^yS`GY_Ci@*7YfBBCABI`c`G7tkZ2!k>h zgEIs}G898I48t-U!!rUSG7=**3ZpU_qca9$G8SVq4&yQ&<1+yhG7%Fq36nAzlQRWV zG8I!Z4bw6m(=!7zG7~d13$rpCvoi;CG8c0*5A!k~^Roa8vJeZi2#c~9i?akvvJ^|R z49l_{%d-M2vJxw^3ahdjtFs1cvKDKz4(qZW>$3qHvJo4z37fJRo3jO5vK3pi4coFE z+p_~ZvJ*SA3%jx#yR!#-vKM=^5Bsto`*Q#Xau5e|2#0bQhjRo+aui2%499XD$8!QF zauO$V3a4@!r*j5pau#QE4(DU62#@j@kMjgi@)S?=4A1f$&+`H=@)9re3a|1S zuk!|P@)mFN4)5|F@ACm4@(~~N37_&ApYsJ@@)ck64d3z|-}3`M@)JMv3%~Lkzw-xw z@)v*e5C8HX14Pk(24o-xW)KEtFa~D`hGZy)W*CNLIEH5gMr0&LW)wzcG)89(#$+tU zW*o+4JjQ1NCS)QeW)dc4GA3sVrerFnW*VktI;Lj^W@IL2W)@~;HfCoI=43ABW*+8c zKIUfu7Gxn7W)T);F&1YDmSicGW*L@cIhJPyR%9hsW))UtHCAU0)?_W#W*ydLJ=SLf zHe@3<{6&lIiBYQUgRZS<`rJ$HD2cp-sCOb<{jSUJ>KU7KI9`l<`X{UGd|}FzT_*u z<{Q4{JHF=!e&i>9<`;hDH-6_2{^T$I<{$p$KL&`Z{|v}L49p-5%3uu65Ddvs49zeM z%Ww?O2#m-`jLayE%4m$v7>vnSjLkTV%Xo~>1Wd?8Ow1%q%4AH=6imrfOwBY*%XCc7 z49v((%*-sz%52Qe9L&jF%*{N^%Y4kw0xZZvEX*P-%3>_e5-iD5EX^`3%W^Ew3arRV ztjsE`%4)368m!4$tj#*C%X+NO25iViY|JKX%4TfN7Hr8@Y|S=o%XVzf4(!NI?949g z%5Ln=9_-0p?9D#x%YN+70UXFd9Lymc%3&PN5gf@;9L+Ht%W)jf37p7DoXjbl%4wX= z8Jx*koXt6$%Xys71zgBQT+Ah0%4J;6613bt>Jj^3J%40mv6FkXNJk2va%X2)>3%tlnyv!@S%4@vN8@$O|yv;kj z%X_@f2Ykp!e9R|&%4dAe7ktTAe9bp}%XfUw5B$ha{LC->%5VJ6ANojI73xtN=In3wsOp9NTug;tLmw1_1c$L?9oi})sw|JX(c$fEhpAYzukNB8R_>|B1oG@KzxbPf_?Q0}AiDlDAOkTlgD@zAF*rjoBttPY!!RtvF+3wMA|o*} zqcAF?F*;)~CSx%+<1jAcF+LM8Armn%lQ1chF*#E(B~vjq(=aX5F+DRdBQr5GvoI^O zF*|cGCv!13^Dr;-F+U5iAPccDi?Aq*u{cYxBulY0%djlVu{##2Cu|6BHAsewVo3JUHu{m3?C0nsI+psO$u{}GmBRjD(yRa*}u{(RPCws9s z`>-$ju|EfJAO~?Uhj1u|aX3eCBu8;H$8apiaXcq*A}4V&r*JB#aXM#kCTDRr=Ws6P zaXuGtAs2BmmvAYUaXD9TC0B7Z*KjS@aXmM1BR6p~w{R=BaXWW#CwFl-_i!)waX%06 zAP?~{kMJmu@iV$^He++PU`w`QYqnuq zwqtvCU`KXhXLey%c4K$;U{Cg9Z}wqd_G5nz;6M)IU=HC>4&!i+;7E?*XpZ4nj^lVv z;6zU1WKQ8!PUCdW;7rcqY|i0a&f|P8;6g6qVlLrQF5_~p;7YFIYOdj0uH$-c;6`rZ zW^UnDZsT_D;7;!1Ztme;?&E$Q;6WbZVIJX89^-MI;7Ok1X`bO(p5u95;6+~IWnSS` zUgLG%;7#7*ZQkKs-s62f;6py*V?N{)#nep0v`okJ%)pGy#LUdXtjxyj%)y+@#oWxp zyv)b^EWm;+#KJ7XqAbSZEWwg2#nLRpvMk5)tiXz_#LBF~s;tK9tihVB#oDaHx~#|g zY`}(W#KvsGrfkOMY{8an#nx=Ywrt1t?7)uf#Ln!(uI$F{?7^Pw#op}0zU;^T9KeAb z#K9cGp&Z8H9Kn$s#nBwYu^h+ooWO~k#L1k(shq~?oWYr##o3(0xtz!OT)>4~#Kl~~ zrCi44T)~xG#noKHwOq&b+`x_8#Le8ot=z`#+`*mP#ogS)z1+wBJivoI#KSzoqddmr zJi(JZ#nU{)vpmQ1yugdR#LK+GtGveRyuq8i#oN5YyS&Hye87i%#K(NXr+miee8HD| z#n*hpw|vL<{J@X=#LxV~ul&aE{K236#ozqHzx>AlvGkt-8Hj-ygh3gM!5M-f8H%A9 zhG7|w;TeGu8Hte@g;5!e(HVm=8H=$QhjAH?@tJ@LnTUy*gh`o<$(e#FnTn~IhH06O z>6w8UnTeU1g;|-6*_nemnTxrZhk2Qg`B{JkS%`&Mghg45#aV(SS&F4uhGkifOm zghGRL7<2iv7If;`wg;P0= z(>a4PIg7J7hjTfP^SOWvxrmFogiE=M%ejIpxr(c~hHJTw>$!m&xrv*(g=Xrq_d5M>Kg;#lv*Lj0Cd5gDs zhj)38_xXSi`G}ACgira5&-sEc`HHXkhHv?f@A-ir`H7$TgrGYX?J8ly7?V=@+FGY;c29^*3s z6EYDKGYOM28Iv;wQ!*7(GY!)+9n&)dGcpr1GYhja8?!S9b21lmGY|7JAM>*S3$hRk zvj~f_7>lz6OR^M8vkc3!9Luu;E3y(RvkI%S8mqGgYqAz=vkvRB9_zCK8?q4_vk9BB z8Jn{OTe1~fvklv_9ow@5JF*iyvkSYj8@sayd$JdMvk&{SANz9v2XYVxa|nlW7>9EN zM{*QLa}39F9LIA4Cvp-ea|)+&8mDsxXL1&2a}MWn9_Mob7jh97a|xGn8JBYfS8^3s za}C#W9oKUMH*ym8n5#PZ}Jvz^A7Lw9`Ex3AMz0&^9i5w8K3h7U-A`S^9|qf9pCc< zKk^el^9#T78^7}hfASZ9^AG>>9|OeEe+FbA24)ZjWiSS32!>=RhGrOsWjKas1V&^e zMrIU7Wi&=-48~+E#%3JGWjw}b0w!c4CT0>QWilpb3Z`T#re+$ZWjdy324-X?W@Z*< zWj1DK4(4Po=4Kw|Wj^L-0TyH-7G@C^Wib|K36^9jmS!22WjU5-1y*DwR%R7eWi?i3 z4c25W)@B{nWj)qs12$wMHf9qxWivKs3$|n{wq_f)WjnTK2X z9LixF&Ji5RQ5?-N9LsSW z&k3B!Nu10noXTmO&KaD^S)9!|oXdHf&jnn_MO@4!T*_r!&J|qARb0(AT+4M_&kfwj zP29{a+{$g-&K=yzUEIw*+{=C3&jUQjLp;nQJj!D{&J#SzQ#{QxJj-)D&kMZBOT5f0 zyvl35&KtbRTfEIXyvuvM&j)iSA5Mke9L!y&ky{_PyEa;{K{|q z&L8~AU;NEK{L6m~5KsRZkbxMOK^T<57@Q#(lA#!yVHlR-7@iRrk&zggQ5coc7@aW~ zld%|^aTu5J7@rB4kcpU>Ntl$$n4Bq?lBt-QX_%Jjn4TG!k(rp8S(ugCn4LM8lew6i zd6<{^n4bk$kcC*7MOc)@SezwTlBHOhWmuNwSe_MFk(F4PRalkPSe-RkleJizby%16 zSf35pkd4@wP1uyp*qklclC9X9ZP=FW*q$BOk)7C?UD%b~*quGtlfBrReb|@%*q;M9 zkb^jwLpYSfIGiImlA}19V>p)MIGz(Yk&`%?Q#h5=IGr;%le0LRb2yjtIG+o+kc+sO zOSqKFxST7vlB>9yYq*x{xSkuhk(;=gTey|mxScz=le@T^d$^bTxSt1jkcW7fM|hOS zc$_DAlBal@XLy$9c%Bz{k(YRxS9q1zc%3(Rlec)AcX*fgc%KjWkdOG7PxzG2_?$2J zlCSuhZ}^t)_?{p5k)QaPU-*^Z_?Gav&oFoQ5CgE2TmFeF1U zG{Z0~!!bM~Fd`!{GNUjmqcJ*TFeYO$HsdfZ<1s!HFd-8$F_SPUlQB6{FeOtlHPbLH z(=k0WFe5WDGqW%&voSk!Feh^{H}fzr^D#dQupkSuFpID#i?KLMup~>dG|R9o%dtEw zup%q5GOMsEtFbz3uqJD#;r?upt|LM zGrO=WyRkcauqS)5H~X+J`>{U)0*Ks{Ja3eQyGq-Rnw{bgn za3^@Fs8Z zHt+B*@9{n#@F5@ZF`w`$pYb_g@FidIHQ(?p-|;;^@FPF*Gr#aFzwtYN@F#!qH~;W2 z|1m&<0RR2{9|mL~24)ZjWiSS32!>=RhGrOsWjKas1V&^eMrIU7Wi&=-48~+E#%3JG zWjw}b0w!c4CT0>QWilpb3Z`T#re+$ZWjdy324-X?W@Z*9LixF&Ji5RQ5?-N9LsSW&k3B!Nu10noXTmO&KaD^ zS)9!|oXdHf&jnn_MO@4!T*_r!&J|qARb0(AT+4M_&kfwjP29{a+{$g-&K=yzUEIw* z+{=C3&jUQjLp;nQJj!D{&J#SzQ#{QxJj-)D&kMZBOT5f0yvl35&KtbRTfEIXyvuvM z&j)iSA5Mke9L!y&ky{_PyEa;{K{|q&L8~AU;NEK{L6m~kVyX- zkbxMOK^T<57@Q#(lA#!yVHlR-7@iRrk&zggQ5coc7@aW~ld%|^aTu5J7@rB4kcpU> zNtl$$n4Bq?lBt-QX_%Jjn4TG!k(rp8S(ugCn4LM8lew6id6<{^n4bk$kcC*7MOc)@ zSezwTlBHOhWmuNwSe_MFk(F4PRalkPSe-RkleJizby%16Sf35pkd4@wP1uyp*qklc zlC9X9ZP=FW*q$BOk)7C?UD%b~*quGtlfBrReb|@%*q;M9kb^jwLpYSfIGiImlA}19 zV>p)MIGz(Yk&`%?Q#h5=IGr;%le0LRb2yjtIG+o+kc+sOOSqKFxST7vlB>9yYq*x{ zxSkuhk(;=gTey|mxScz=le@T^d$^bTxSt1jkcW7fM|hOSc$_DAlBal@XLy$9c%Bz{ zk(YRxS9q1zc%3(Rlec)AcX*fgc%KjWkdOG7PxzG2_?$2JlCSuhZ}^t)_?{p5k)QaP zU-*^Z_?dG|R9o%dtEwup%q5GOMsEtFbz3uqJD< zHtVo1>#;r?upt|LMGrO=WyRkcauqS)5H~X+J z`>{U)0*Ks{Ja3eQyGq-Rnw{bgna3^@Fs8ZHt+B*@9{n#@F5@ZF`w`$ zpYb_g@FidIHQ(?p-|;;^@FPF*Gr#aFzwtYN@F#!qH~;W2|1m%k{bxW1VqgYgPzGag zhG0mBVrYh8ScYSGMqornVq`{PR7PWT#$ZgwVr<4?T*hO3CSXD)Vqzv?QYK?^reI2@ zVrr&gTBc)qW?)8UVrFJxR%T;%=3q|dVs7SPUgl$d7GOaZVqq3xQ5IuymS9PiVriCP zS(amYR$xU|Vr5ogRaRql)?iK6Vr|x8UDjiLHef?GVq-R8Q#NCBwqQ%PVr#ZxTef3+ zc3?+#VrOdpRbJzD z-r!B%;%(mHUEbq;KHx(>;$uGHQ$FK!zTiu~;%mO)TfXCae&9!b;%9#0SAOGn{@_pk z;&1-pU;bl&r25Z*48*_;!k`Ss;0(c#48_n4!>|m;@QlESjKs){!l;bK=#0UbjK$cD z!?=vc_)NfrOvJ=Y!lX>ba4+1Y{k}W!?tY4_Uyop?8MIO!mjMb?(D&y?8V;f!@lgt{v5!89K^vK z!l4|-;T*w{9L3Qb!?7I4@tnYkoW#kT!l|6b>72otoWfJjBC1!lOLK<2=EW zJjK&I!?Qfc^Sr=|yu{1A!mGT->%766yv5tR!@Io4`+UHMe8k6m!l!)3=X}AJe8ty% z!?%3L_x!+*{KU`v!ms?s@BG1^{Ken=!@vB;0Lk>90U3ya8H7O@jKLX#AsLFH8HQmQ zj>{XhYtz1ClTMxXFKgGaag&Zs8Xx?h(TdJ3+BE6Zxj~!uLzcE}*Rf55)`J&y?$Q2# z#!I?3Xx*jBxd@*Z&-J>xz|WwuWap6T-U-Rkv5|FrtAGdp?^FMnMWlL2zaS*Jn}-Sv!lDu*Js$ech4E2Q zNs-A(pn7Om-~oC2c$icV>lZ}F#V5vw#zc3E^l(Fh$dL4D9zNVBsZS8#yF^6BB!>cz z5C8{20nnhv9x+G`NlA(fNsNpMO^)uAKEWdm3Bo#dj!5#zsX=gD0Gtw*rg5l836D#O zP3Y<$9-kQLADa@B937gN=oQ4fK0H1yDLFAEJQ;Y@^jW}bo3ytcEdUx21c!O|)BU_# zg8>;6B9gl%L;{Zygw!wX6$tiDF#Q6sAh>*BU|`)}-e1}!07~^*B`(Rk)ccn_Fga{U zP(uhh`1mSuL53i$TYQj@MljfXA6++n@>?H8{0mmO=%tNz?HT2xvl}+M>7%diJS}j^ zODiVz>h7Z>F!ksqIoYdYTxe{h#|lgt*w|wSq0w>49_P3D(_HIyaad?lWV(klJnQ;% zhSx&gXb1;&;*v`i&zO`c)sxxJt+-Y9-=@ftZVI`=XY~s}|5N)6eWA1;oK7Gl zgQ6J}%b<7$B{C?PL8%N%XVBy@ceeZ$QJe2^q@dNcH6O zbN`QlSwB85*j2E4KcyOWq8-Wc4~3%g_?f zphN~GGboio=?n@oD3d|i49aEDzm(3G5=172hsJ~^CVBFwdJ1?G$d?~U$)T|co`Sz! znw%699R`~IegQoGb&3=#QL;>#(!~q^{R4r4-oP#7&i;E@$iJ)xJcR?@K2sD)^%VUN zQxyA`De8arM8vnOw0U> zX?Q}{N!#&_%*?S@9qAn zV`1-hKCr2j>Z$x6+g173c0OZNbyL5&lqmfVnG1tJw@-+mCWUH*CM8EE#(ApMUA^Vk zFP|7Sykb=Mic!PMpk@#n7nf z8hZ6_lPtR)fDe?S zeuGlj{|HL8Gq5CsysngSQ#qYd8C1-mY6dkks9y%nS2?*>ug_50@3*ia>C%EUOJ%4Y z?$zv{2pZuN7D*E}x_CzYi2NUgO#?vIyNMSB{>ddOgG=O)Aj$`pKB!3c)vERb2Znhq z=yto@UZY3*xxLnQd+qH0+rk|}Q#~Dnh{FCwiu)Jw#H4y+y-6F|B{Inq=WY>1g!$Z@ zfq5n8e8SGUAJuCd^H}?xVYk$@#x}DSu#LS04>-R_^du>91-s#2Y3< zy+QSN9u3RLBX6hje<4ZSe`dn)f5fmi8le9Y<|FKvz`?=%Xz1-d`oRaF9>gP^$|D9;l z+$rv1=?UikS5o=fq_E_qj3k=w?&9_)Qc4izEg(^ezBX}2s%NJA&$^NJDx++8d;8Fu zzNx@7%WKTpsh&B$%wsb$FP+O=_y1HV{#_#Gc{$AY=H&vv|Ev*dEolZsE==_-`m@no z?DGnimRk*eWBQUbnC$WIHoKCy(G2!1^=sklF{`9qtEOG6rCqB>{uS`c(t~CBKZ0dN zs%PcDf@Rgef@Sr81j`zqSm56&zxIEb^4{+M|7^Otd z`;zPL-~Ybg{axU^xeNYPe>VEq!voyj`h)qJ`9I3jrhmNlk8a>ia$}{*ZIqKc@K9@O_V@$-4bdS$Ftm5^44E%C^Th%NvxwkB{Gi{U7_{`nzcV83TL&A>4nFecwM`gEZB= zvhVlG?hV!hUPN$v)BfO}X@4kV(GqV?A5Kr{(7(d`Ncy6qe=a)qk44AR5#aAdC(;+4 z{BzN%e=Itk4i$edI+MQW?4OIy{bSMjv_;kaUUVTc)pOB@-IsjWy}*avm;F*bSAK)H z*U?@JT=jzYhLlv#we(?cgr*H&_YQCI4&O*0Mgy?4;hWy!9p2$v>BC3>nl^mfJG>_) z($`|Sy9Cj`<~<#n?*wrn5s^`$-cyc{j^2LUb2kW2Yv?@pf|S4B{G%zm|CfJL!jgiJ z*l5p#RL{d8EHScuQO~3Q#O<{66HwoGijslek5fHQ{DO#>(6GpuB+t`e&oeI){qBg< zJ|xnxzGh;4x5&7_>KWbp^S`?H7e1R(=?!raZ?EMu(#!vb^xou1%VZcv5EM6GL<0E+Oui}Z>g*~usV z`(V$9jM)76r}IDgw(yoDUu=H%YvIF-u+T)$myEk^p08k{4kFgi1yo?jQl0)gd0g1 zPfU(ajEGE3#JmH(W1vLbdy^D|hKD9665iX?MACarI}A)NRy2|FKK#o)-g%Mj7-rkxv`{ z@8?5Jvj%!?=+zj3P%rf2UW6tg3ZfwZG2U*FgLp`QL`Z^UNP$#HgLKG%Ovr+4$bno? zCMYwM1wstMJCYD0CPy55ez9#kI+gc?8%p+-<+s0q{* z3WAzJ&7l@hOQ;pp8fpUtLv5iDs2vmvg+bv^1QZEHLG2+o6b*HNIzll}EEEUDLkW-v zN`#W2WGDsd1a*eGKwY73PaLit%O!VtD!Z}T4)`#9@+qHgf>B&p)Jr>XdAR0+5zo^c0s$LJeF&Kvln1m^qh5^jLEX=_?EWjcx!7{ACDy+deY``XL!8Yu`E;tjM z8O{P{g|org;T&*II2W87&I9|wdEtC;ez*W!5cY=)!G+->a8bAzTpTU|mxN2frQtGg zS-2cr9C*YItDfl#e20ja)gU`bk;EV7j_%eJ2z6xK1ufsRsoA538Hhc%Z z3*Uq9!w=wx@FVy!`~-dqKZBpcFW{H(EBH1127U{_gWtm+;E(Vp_%r+k{tADCzr#P^ zpYShl@_XSNK@kkW5dt9*3ZW5zFbIoq2#*Meh)9TxD2R$^h>jSDiCBn@IEV|$gk(mt zAX$-YNOmL#k`u{=A@z|!qyf?pX@oRJnjlS)Afy@6 z9BF~HL|P%Okv2#$(iRCp+99Dx7!r;|AdyHE(jIXm(MSiRBNBteB5_DOl7M)SL?j7G zMpBSYNN1!A(iQ23bVqt1sYp+x7t$N)gY-rEA^nj7$UtNeG8h?x3`K?^!;ullNMsZ; z8X1F(MaCiHkqO8|WD+tNnSxA3rXkaj8OTgz7BU-|gUm(dA@h+1$UH$B`4rN#qoA8aacUMb07TkqgL06bB~c2cQGhZii*hKB3aE%msEjJ8ifX8i8mNg{sEsz2XfWCq4ME$Xp=cNyjz*x7XcXEWb)(T}2ecy^gT|t9Xgr#LdeB5P2~9>* z&`xM)v8|{PkMf;)s(E;c{bPzfi9fA%;hoQsK5$H&C6gnCm zgN{YVq2tjB=tOi9IvJgUPDQ7o)6p5|Omr4H8=ZsBMdzXO(FN#2bP>83U4kw}m!Zqi z73fNI6}lQ-gRVu_q3h8N=tguCx*6SqZbi4D+tD59PIMQ#8{LEMMfaim(F5o~^bmR& zJ%S!ZkD5^bz_PeS$tkpP|pu7wAj$75W-|gT6)Iq3_WT=tuMu`WgL#enr2b-_alFPxP0! z@WU8_p%{kY7=e)(h0z$m7>va@jK>5_#3W3{6imf5OvenOCl9L$Af!ZKr7u&h`% zEIXD1%ZcT}a$|WgKP)en56h1gzzSmiSRt%1Rs<`G6~l^SC9slMDXcVB1}lq|!^&e7 zu!>kEtTI*wtBO^_s$(^&SQ{)DYm0?o?XXZR3=791ut+QlYmd3HXsiR)5sSfMu{bOqOTau>B9??DV<}iC ztTWaH>xy;5x??@CRIDe~3+s*b!TMtTu>RNpY#=rW8;lLXhGN68;n)alBsK~gjg7&^ zV&ky!*aU1MHVK=IO~IyO)3E8-3~VMg3!9D2!RBJ~u=&^mY$3J?TZ}EimSW4W<=6^r zCAJD%jjh4fV(YN=*amDPwh7yeZNau;+pz7}4s0j33)_wD!S-VNu>IHp>>zdsJB%H{ zj$+5K05p z>>>6DdyGB7o?_3i=hzGECH4w?jlIF%V(+l`*az$*_6hrpeZjtB->~o459}xQ3xjYN zM{pF!a2zLa5~pw)2RMVXIEVANfQz_<%eaE8xQ6Svft$F6+qi?f@Jx7SJPV!`&xU8m zbKp7gTzGCg5AKKO#q;6$@d9{3+#fH57siX=Me$;Ial8ax5-)|9#>?Pk@p5>1yaHYk zuY^~|tKe1fYIt?L23`}dh1bUG;C1l;ydGX355ybb4e>^JW4sC86c56i;mz?DcuTw$ z-WqR%2jgw=5WF28iihFhcmy7aN8#;pHy(|5z&qkGcq|@=$KwgO2T#P4@MJs%?}T^8 zyWm~%Zg_XR2cC-e#Czer@jiH8ydT~lAAk?U2jPS9A^1>y7(N^yfse#T;iK^}_*i@# zJ|3TdPsAtTlkq9|RD2pf9iM^E#Ao5N@j3Whd>%d@Uw|*f7vYQXCHPW&8NM7}fv?0@ z;j8gA_*#4&z8>FzZ^Sp@oAE99R(u=29p8cP#CPGl@jdund>_6aKY$;^58;RLBluDL z7=9c-fuF=r;ivI4_*wiMejdMoU&Jrrm+>q3Rs0%$9lwF!#BbrZ@jLik{2qQEe}F&4 zAK{PjC-_tR8U7r9fxpCG;ji&G_*?uP{vQ8;f5boGpYbpFSNt3P9shy<#DC!s0TT#; z5*UFK1VIuMK@)&r2$tXoo)8F;kO-Mj2$j$XoiGTKun3!Q2p5rw$V_A*vJ%;d>_iSC zCy|TDP2?f`h`dBTB0o`pC`kAdg^0pL5uzwjj3`c&AW9OYh|)wEqAXF4C{I)%DiW25 z%0v~SDp8H7PShZ3619lhL>;0o5kS-<>Jx!P1EL|(h-geSA(|3FL^Gl}(Sm46v?5v) zZHQo^EfGSrBSMKVBAkdIB8e!XJ>e#zi4H_ZB8G@1;)r-6f$$KCL=urqq!68m&O{fY zE76VUPV^vBiJn9+qBqfp=u7k?`V#|)fy5wUFfoJ}N(>{06C;R`#3*7kF@_jRj3dSq z6Nrh#Bw{i#g_ufABc>BGh?&GJVm2{{m`ltf<`WBug~TFaF|mYLN-QIm6Dx?7#42Jn zv4&VntRvPF8;Fg>CSo(Oh1g1LBeoMeh@HePVmGme*h}mq_7ew)gTx`?FmZ%9N*p7O z6DNq1#3|x5afUccoFmQ?7l@0*CE_x1g}6#wBd!xSh?~SM;x=)IxJ%q4?h_A)hr}b| zG4X_WN<1T;6EBFD#4F-8@rHOyyd&NdABc~{C*m{lh4@N*Bfb+qh@ZqS0wQ4&AyE<| zagrcOk|JpmkPOL^9LbXcDUuQ?lM1Pl8mW^8X_6LclMd-3Gm)9eEM!(P8=0NVLFOcL zk-5n{q#v1=%tz)Y3y=j#f3gr+m@GmTC5w^8$r5BqvJ_dGEJKzh%aP^D3S>pH5?Pt7 zLRKZKk=4l>WKFUbS(~gw)+GbTdSrbvkZeFUBpZ>9$tGk|GKg$OHYZz`2Ctv1A+>PbQEaGLcLolgSja6WN*ULUtv) zk=@B2WGdN{>_zq_`;dLfeq?`g06CBxL=Gm0kVDB~?xOkVna5;R7#_C%AicjqHM~cTvR41GnIwPN@b(6Q#q)dR4ytv zm51`9@>2Pz{8Ry|AmvXLq6$+*sG?LcsyJ1GDoK^1N>gR1vQ#;$JXL|JNL8XLQ&p&{ zR5hwPRfDQY)uL)sb*Q>j09B8wPX$s9sD@M{sxj4sYDxuB&8X&73#uj6ifT=@p@ONl zR0!3M3Z=rRa4Ld|q@t+yl$(mCI#3;{7%G;EqvELq%0nemNmMeGLUp1#Q(dU8R5z+S z)q_fT}L+0-0rE;WyuPc5JpQj4g?)DmhbwTxO$t)Ny?tEkn~8fq=I zj#^J`pf*yQsLj+CYAdyk+D`4Dc2c{j-P9gxFSU=_PaU8RQirI+)Dh|^b&NVrouE!q zr>N7^8R{%`jyg|Wpe|CEsLRw9>MC`Ox=!7oZc?|X+teNEE_IK(Pd%U>Qje&|)D!9{ z^^AH>y`WxFuc+758|p3fj(SghpgvNcsL#|F>MQk)`cD0zep0_Eh=yr|Mrn-3X@Vwc zil%8mGc-$cG*1h(NK3R#E3`^$v`!neNn5l{JG6_=L}#Y6&{^qhbapxios-T*=ce<} zeso?sADy2rKo_L_=|Xg2x(HpAE=CupOVB0hQgms$3|*EkN0+B7&=u)QbY;2#-x&hsgZbUbxo6t?^Ai5ddoNht4q+8Lg={9sQ-Ifla z+tHzP7#&VW(2;Z$-JW*S(R2s8BOOD>(s6V=oj`l&L^_F1rc>xnbZ5E?-IeY}cc**M zsdP`e7u}ogL-(co(f#QG^gwzLJ(wOs52c6E!|4(9NO}}KnjS-srN`0Z=?U~idJ;XE zo(evpA^g?Dsx6#|_9rR9m7rmR_L+_>c(fjEG^g;R%eV9H%AEl4c$LSOFN%|Ch znm$9HrO(ml=?nBl`VxJazCvH6uhG}(8}v>37JZw(L*J$E(f8>G^h5d){g{42Kc%11 z&*>NROZpZ4ntnsSrQgx-=@0Zr`V;+`{z8AHztP|6AM{W97YzXzAOHmzzySeBKmi&6 zzyKC-fCmB)fdpir02OFJ2L>>K1#I8|7sv!MgDfB`$Of{593Usi1#*Ktzz^gF`9OY9 z02BoNpb#hwih!b^7$^=(fRdmTC=JShvY;F&4=R9)pc1GIs(`AX8mJCxfSRBds153X zx*!151NA{5XaE|5MxZfh0-Ay#&ZunlYnJHSq`3+x7a zz+SKq><0(HL2w8h21meAa10y=C%{Q?3Y-RKz*%q(oCg=cMQ{mR23NpUa1C4sH^5DB z3)}{Gz+G?;+y@W9L+}VZ22a3K@C-Z$FThLi3cLnyz+3PRyayk^NAL-J24BEe@C|$i zKfq7$3qTCaAPmZ249*Y?$xsZ<0ES^$hGTd}U_?e@WJY0BMq_lwU`)nhY{p?+OeQ8X zlZDC3WMi^3IhdSGE+#jVhw)?bGWnSNOaZ1K~XVrnyWn7T{=Q;(_71TqbnhD;--G1G);$^Nw&gGpt2GQF7IOdqB%(~s%T3}6N_gP6h05N0Sdj2X_1U`8^d zn9HZq%-&CC{NE3=K+&g@`zGP{`F%pPVhvya)&9AFMI zhnU065#}g!j5*GnU`{fpnA6M|<}7oLInP{RE;5&x%ghz#Dszpw&fH*bGPju9%pK+~ zbC0>tJYXI&kC?~I6Xq%NjCszyU|uq>nAgl3<}LG%dCz=cJ~E$}&&(I*EAx%{&ir70 zGQSvzg;|6}S&YS5f+bmsrCGo-EX#5%&kC%_O03K(tjcPv&Kj)ATCB}Ftc%UWW@fXn zS=nrCb~Xo_lg-8EX7jLqY+g1Wo1ZPf7G(X|LTq8S2wRjb#ujHwuqD}2Y-zR(Tb3=y zmS-!l71>H`Wwr`im955BXKS!E*;;IEwhmjD4PfiB_1QqS0o#ym#5QJ|uua(@wi(-; zZNau=Td}R#Hf%84mJMOsv7u}j8_q_sk!%#(o^`X)YzMX@8^gx3acn%BzoMXM3=zY)`fq+nepf_GSCA{n-KRKz0y2m>t3nWrwlD*%9nWb`(3B z9m9@g$Fbwt3G7665<8il!cJwUvD4WZ>`ZnRJDZ)u&SmGZ^VtRLLUs|mm|emyWtXwb z*%j`rzUyPMs^?q&C}``H8RLG}=P zm_5QCWskAP*%RzZ_7r=XJ;R=5&#~v(3+zSq5__4w!d_*svDeuf>`nF-dz-z(-evEx z_t^*RL-rB-n0>-NWuLLn*%$0f_7(e@eZ#(G-?8u659~+w6Z@I{!hU7HvESJr>`(R= z3vn=qa43gyI7e_KM{zUjng@UGdYX1Ifrv`nYhec7A`B7 zjmysE;Bs=gxZGSG&X3E><>T^m1-OEoKUat=%oX8^a>cmfTnVluSBfjmmEp>A<+$=( z1+F4jiL1<2;i_`gxawRDt|nKDtIgHn>T&^GJ+3|%$Ti>^a*epgTobM-7sNH=nsY6< zmRu{YHP?m<=Gt;0Tstn53**AM2riO~;@Wd=E}HAWb>w2WST2r>=Mp#%m&he?$y^H8 ziR;XD;kt6&xb9pJE|u%a_2PPSeYn0{KdwJFfE&mS;s$d=xS`xIZa6oB8_A90Mss7h zvD`RrJU4-x$W7uVb5ppf+%#@FH-nqW&EjTrbGW(OJZ?U>fLq8d;udpDxTV}OZaKGt zTgk2BR(wcI*xJ-30|$Zg^_b6dEr+%|4Iw}acs?c#QGd$_&aK5jpEfIG+?;tq31 zxTD-L?l^aXJIS5mPIG6tv)noEJa>V+$X((tb62>l+%@hxcZ0jh-QsR@ceuOUJ?=jD zfP2V2;vREPxToAR?m72@d&#}xUUP4_x7<7KJ@cx+&At!_k;V%{o)`V z<`Ev{F&^g$p5!T><^j*}EYI;gFYqES@iMRQDzEW6Z}28>@iy=9E^S-u=!p0B`H zz8YVhuff;kYw@-DI(%I|fUn2b=L7i$d_%qw-cfy z#kc0$@WFgrK7?<_hw@>3I3K}B@=<(y-pxnz9r%uX3?IwK@$q~D@8J{qBtDr>;XCo2 z`7V4{z8l}2@4=_?J^5aIZ@v%Tm+!~-=Lhfu`9b_(eh5F5AI1;oNAM&0QT%9r3_q41 z$B*YH@Duq-{A7L#Kb4=xPv>XwGx=HkY<>PslG65DE(ZLLs5BP(&yy6cdUIC4`bfDWSAbMkp(k6Uqw}go;8X zp|Vg#s47$wstYxQnnEq1wopf?D+CDjg!)3D&_HM?G!hyMO@yXGkkCwMF0>F@3ay0J zLK`7iXe)#W?SxPvOb8bugh(MuXfL>hXrY79QHT*@g*YKzNDw?iqL3sc3n@Y;p|j9M z=qhv*x(hvoRH3KPOXw~15&8=Kg#N++VW2Qb7%U7Ch6=-k;lc=Eq%cYtEsPPy3gd+F z!USQWFiDs!OcACE(}d~53}L1)OPDRp5#|c>g!#e(VWF@{SS&0NmI}*+<-!VKrLam^ zEvymN3hRXR!UkcZuu0e~Y!S8!+l1}H4q>OTOV};!5%voEg#E$+;h=CxI4m3yjta+w z?C#;yNF%I zZen+_hnOn%6nlxi#Xe$Rv7gvq93T!92Z@8lA>vSRm^fSj5UA!UQ6mN;Q#XI6%@t$~Jd>}p)ABm5} zC*o7_nfP3MA-)t}iLb>s;#={Z_+I=VeiT26pT#fYSMi(pUHl>b6n}}31WSm7N|=O8 zghWb|L`y(oBv#@iUJ@ixk|bGDBvsNRT{0w7vLst_B$t#)$}DA(vP#*c>{1RXr<6;| zE#;B?q`Xo-DZf-eDk%9&g`~n#5vizDOe!vwkV;CWq|#CusjO5^Dlb)#DoT~4%2E}n zs#Hy?F4d4~O0}fgQXQ$T6d=`;>PvxA1F50ZNNOxKk(x?DQZuQ!)Iw@0wUSy(ZKPnS ztrQ})lR~91DO`$>BBdy)z2ugnr4CX@DMpHw;-q*fLGnn6Qj(M`rAVEm&QcettJF>E zF7=R7rJhnRskhWe>MQk=`bz_(fzlvpurx#(Dh-o{OCzL_(kN-PG)5XLjg!Vp6QqgK zBx$lVMVcy2lcq~Eq?ytzX|^;+nk&td=1U8th0-Evv9v^5DlLEfWCTX*@McOKDleSAcq@B_(X}7dT+AHmo_Dct(gVG`CuyjN^Djk!KODCk0 z(kbb*bVfQWos-T>7o>~QCF!zsMY<|oldelQq?^($>9%x7x+~q2?n@7(htebIvGhcG zDm{~)OE09C(ktn;^hSCsy_4QcAEb}cC+V~FMfxgzlfFwoq@U6+36fzMkx?0wahZ@w znUZN4$c)U&oXpFDEXtBB%ZjYZnykx)Y|55w%Z}`lGs&6dEOJ&ko19(FA?K8H$+_h` zvY(t+&L`)W3&;g!f4PucSS}(Lm5a&6C3UWocl3ZD?B3G5G z$<^f=a!t9GTwAUq*Ode0dUAa^P;MYMlpD#7CJW?JdkCw;CW94!3czJ?6QJy4E zmZ!*5ILd-;R>QT`-d{v|^StRM=iU<$4f3aL;E ztpJ5lScOw~MNmXVQe;I@R7F#C#ZXMeQf$RhTuLS-vyw&0s$^5LD>;;$N-ib0l1K4V z@+$e1{7M0(pyIC-QVJ_Yl%h&8rMOZ;DXElFN-JfQvPwCnyi!4_s8muaD^--LN;Rdr zQbVb!)KY3Ib(FeFfKpGXuLLR$l!i(prLodPX{rP%&6MU!3#FyfN@=aMQG%7WN{G@< z301(uhLKHuMAKIDua~4$`EC!GE5n+j8H}@qmek#8dNQG5IMO942RYE0IN~Kkx zGAgTbDz6Hvs7k7=Dyph#s;(NUsamS7I;u;}q-IvLs9Du)YIZe;np4fC=2r8lerjGd zpPFASpcYj9)k11vwTN0&Ev6P%OQbZMBYCR}E0>srA)BwSn4DZKO6y zm>RA|sF7-v+Fo_5(P{^^qZ*^es&Q((nxJ~rL^VlGR#VhYYG<{J+EwkQc2|3-scKKP zm)cwHqxMz%sr}Uf>OggnI#?Z|4poP#!_^V$NOhDtS{WD`8R|@RmO5LVqs~?5sq@tZ>Oysqx>#MJE>)MQ%heU?N_CaGT3w^ARoAKO)eY)K zb(6YT-J)()x2fCJ9qLYXm%3ZsqwZDrsr%If>Ou98dRRT89#xO2$JG<+N%fR^T0Ntl zRnMvC)eGuH^^$s7y`o-Kuc_D78|qE&gquy2TsrS_f>O=LB`dEFUK2@Ko&(#;| zOZAodT79FwRo|)a)eq`N^^^Kp{i1$Vzp3BVAL>u_mkMdHhG?jUX}Cscq(*791~f)v zHBRF-K@&AelQl(CHBHksLo+o?vo%L^X_>UlS{5y^E36gKifYBQ;#vuct+m!h3)b3dAzC{vR14F> zwFoUzi_+R_ZY^5tpmo$@v{)@pi`Nn~kCvz1`=K5JjJui7{5yY@r-sr}L*9o7*Y)iE8{37ym_oz{WQ=&a7^ zye{aXF6pwa=&G*ix^C#EZt1q}=q^2zo>|YLXVtUm+4UTHPCb{NThF8W>3Q{hdValt zUQqYf3+aXRB6?B1m|k2jp_kN4>815DdRe`kUS6-DSJW%%mGvrmRlS;CU9X|n)NARr z^*VZ8JwUIg*VhB}26{uik=|HuqBqrp^k#Z6`^+vvf1TRlW?r-$lcdbl2; zN9s{}d)=)^>mBrtdW;^c$LaBUg6`21^&~x6PtiN+o%JqySG}9wUGJf%>OJ*ddT+gt z-dFFZ_tyvL1NA}rV10-_R3D}f*GK3h^-=n0eT+UO+Mv&3WXl}GHS{kj4)-bgS!Mxv2qBpWG4C!@2`#pr5uGrAi+j8vni(aY#<^fCGx z{fz#`0Arvr$QW!4F@_q$jN!%zW27<47;TI(#v0>{@x}yWqA|&sY)mnx8q@oHl`;7g@0pp-?$T(~qF^(F?jN`@$8^Tq|^ zqH)Q%Y+Ny}8rO{L#tq}9am%=E+%fJN_l*0-1LL9b$ari#F`gRFjOWG+zH-T z0JEN1-wZSxm<`QFW@EF7+0+a&o0-kc7G_JcmD$>CV+NaT%@DJl8ES@^;bw#xX-1jt zO}80sb}&1dF=nh8XU3ZerpHV)lgwl@#q4BuHoKTz&2DCQvxk{#_B4B$z0E#mU$dXt z-yC2LGzXc3%^~JcbC@~Y9AS<$N13C|G3HovoH^c{U`{kAnUl>a=2UZSDCBLHRf7#ow?rJU~V)wnVZcm=2ml? zx!v4h?lgCqyUji3UUQ$h-#lO*G!L1F%_HVf^O$+uJYk+RPnoC9Gv-Vl39;EZ!0<(UL6L zQY_WdEZs6J)3Pkvax9mX$;xbHv9em(tn5|}E2ovq%5CMb{H(lIJ}bXfz$$3@TZOE` zRuQYHRm>`Gm9R=$rL59c8LO;S&MI$Juqs-WtjbmutEyGas&3V=YFf3d+EyK_t`%U_ zv+7%cRs*Y{)yQgWHL;pnK~^)Xxz)mIX|=LiTWzdhtF0AcwX;I4Fe}`Oup+G}tG(s6 zqOA^AM=QpPwc@OJE5Y(uiB^)8Y^7M8tj<;!tE<(`>TdP0Qmvj=FRQoJ$LeeKv-(>D ztbx`bYp^xM8fp!*hFc@7k=7_{v^B;WYmKwUTNA8_)+B4PHN~20O|zz3Gpw1`ENiwk z$C_)+v*ue1tcBJhYq7P&T52t`mRl>VmDVb2wYA1tYpt`^TN|v6)+TGSwZ+#%jiI%*xWj$0?Jlh!Hgv~|WhYn`*sTNkX0)+Ot* zb;Y`BU9+xRH>{i1E$g;*$GU6Xv+i3DtcTVk>#_C3dTKqho?9=hm)0xmwe`k&YrV7H zTOX{C)+g(;^~L&XeY3t>KdhhDFAK6^8?jLvvvHfSNt?218`zA^+MLbXf-TyTE!&E% z+M2D~hHcuGZQG9RvNPG4?JRayJDZ)|&SB@YbJ@A=Jhq>m*Uo3>w+q+>ZGXFvUDz&S z7qyGo#qAPyNxPI?+Ad?4waeM%?Fx29yOLemu3}fUtJ&4<8g@;)mR;MfW7o93C9(JnT)9z*Ww)@z9?S6KDdw@OA z9%K);huA~yVfJu)ggw$8WskPU*kkQ+_IP`OJ<*sUSuz}m)J|~W%hD=g}u^VWv{l^*lX=|_Ii7Rz0uxeZ??DCTkUQ3c6*1t z)81w8w)fb3?S1xs`+$AWK4c%ZkJv};WA<_TgniOJWuLas*k|o?_Idk)ebK&TU$(E< zSM6)|b^C^W)4pZjw(rR=A;5Dw{34($Mkaaf0Qct>zVM{;CGaa2ch zbjNT^$8v1Paa>L&C$p2q$?9ZtvO77PoK7w$x0A>5bMiX*ocvA!r=a8S6mkkXMVz8e zF{ijw!YS#La!Na8oU%?ir@T|aspwR4Dmzu2s!lbhx>Lic>C|#+J9V78PJmO-sqX|j z4V;EfBd4*`#A)gTInA8rP79}{)5>Y>v~hx+woZuC&IxtGoNyF*4120DYB z!Ojq8s58tN?u>9oI-{J?&KPH`GtL?BOmHSTlbp%U6lbb4&6)1ZaArEQoY~GCXRb5P zneQxc7CMWZ#m*9Esk6*k?yPWDI;))3&KhT}v(8!XY;ZO@o1D$g7H6xo&DrkkaCSPo zoZZeIXRou*+3y^14myXN!_E=ssB_FY?woK=I;Wh|&Kc*dbIv*MTyQQrmz>Ma73ZpR z&AIN}aBe!coZHSF=dN?lx$iu19y*Vl$IcVysq@Tv?!0hbI9Sn5%W=6}nOvD&SzK9N*<9IOIb1nixm>wjd0c+3ysmt%{H_A7f-ZkoAy;8n z5m!-HF;{U{30Fy1DOYJ%8CO|XIahgC1y@B^C0Auv6<1YPHCJ_44OdN9E!Y3U+C2qX zvb=A+pFOrcm8i_B>alIxwr$(CZQHhO+qTWKXaB#yn{#&}&U5prcXdQxtypia?uaLg zI>nsgP6?-^Q_3mrlyS;B<(%?P1*f7@$*JsAajH7ioa#;ur>0ZOsqNHp>N@qD`c4C< zq0`7|>@;zjI?bHsP79}{)5>Y>v~k)x?VR>b2dAUc$?5ELak@I)obFB!r>E1)>FxA! z`a1ob{>}hrpfkuB>+I>Vgd&Io6uGs+q5jB&;~@0DXI?J5p&I)Ixv&vcRtZ~*l>zwt@24|zQ$=U2| zake_!obApIXQ#8v+3oCc_B#8V{mudBpmWGM>>P29I>(&j&I#wFbILjGoN>-N=bZD- z1?Qr3$+_%YajrVooa@dF=caSZx$WF>?mG9J`_2RBq4UUj>^yOvI?tTv&I{+I^U8Vc zym8(-@0|C}2j`>n$@%PjalSg=obS#L=cn__`R)91LWzGwXc0z)72!m95kW*0kwjz> zMMM?RM062D#1yeaY!OGq74bxTkw7FAi9}+NL^whSDU{H{6-HR$310*vL?ji-L~@Zr zq!g({YLP~y73oBJkwIh>nM7uhMPwD(M0Sxw=e7iZm~z~ z75l_~aX=gths0rVL>v{z#Bp&#oD`?TX>mrJ73aixaY0-Zm&9dpMO+ov#C35)+!VLO zZE;8375Bt_@jyHjkHll~L_8JG#B=dNycDm*Ywr>!^!Y6f{Z95$;dK_j4Gqa=rV?kDPzglGLDQZi~%qp|V z>@tVUDRar(GLOtF^U3_OfGj8r$-=UTEGmo1;avEcDQn5vvW~1P>&g1Efov!n$;PsYY$}_{=CXxsDO<_bvW;vj+sXE_gX}0f z$?*s-?y`sMDSOG@vXAU5`^o-tfE*|X$-#1n94d#&;c|o=DM!iCa*P}+$I0<> zf}AKP$;onxoGPcu>2ijgDQC&qa*muU=gIkUfm|pT$;EPsTq>8z<#L5wDObtWa*bRo z*U9yAgWM=L$<1<$+$y)p?Q)0QDR;@;a*y0A_sRY8fIKJ<$;0x9JSvaL+*)YDR0T!@{YVK@5%e}fqW<*$;a}Ed@7&G=kkSo zDPPIg@{N2e-^us#gZwBz$)e5yztx~Ji8nsrfQ|r|RwNY(So7EPzRc%w-)ef~&?NYnd z9<^8PQ~T8cbx<8rht&~vR2@^t)d_V{ol>XO8Ff~jQ|HwMbx~bXm(>+@Rb5lp)eUu1 z-BP#J9d%dTQ}@*a^-w)hkJS_PR6SGA)eH4fy;85$8}(MbQ}5LW^-+CNpVb%jRee+6 z)erSk{ZhZx9~DafqeJU3I;;+-!|Mn-qK>2^>nJ*^j;5pQ7&@korDN+jIm=IILQAc*)~+_%YESz*&>=dhPNtLV6gs6&rBmxPI;~Ep)9Va6qt2u=>nu8} z&Ze{L96G1YrE}{%Ims_SE~bm?61t==rAzBFx~wjz%j*idqOPPX z>nggcuBNN&8oH*grEBXtx~{IL>+1%(p>Cub>n6IXZl;^-7P_TwrCaMZx~*=f+v^Ux zqwb_T>n^&h?xwrz9=fOQrF-i>y07l1`|AOEpdO?L>mhom9;S!u5qhK^rAO;AdaNF& z$Lk4tqMoED>nVDwo~Ebk8G5FkrDy9odajm_=rUZ$7p6?&y!rB~}U zdaYik*Xs>>qu!)9>n(b#-ln(f9eStUrFZK+davH6_v-`tpgyDz>m&N8KBkZB6Z)h+ zrBCZK`m8>u&+7~NqQ0ar>nr-IzNWA18~UccrElvy`mVmG@9PKpp?;(v>nHlDex{%6 z7y6}srC;ke`mKJa-|G+hqyD5n>o5AN{-(d{ANr^MrGM)`I+Xj58`=%yhIPZa;oS&s zL^qNf*^S~xb)&h_-5736H$$!g zxFK#*H<_EyUALZF-)-PFbQ`&i-6n2Rx0&1AZQ-_bTe+>>Hf~$Do!j2+;C6I7xt-lEZdbRP z+uiNq_H=u>z1==;U$>vz-yPr%bO*VE-68H!cbGfe9pR32N4cZjG45D*oIBo~;7)WW zxs%-~?o@Z0JKde(&U9zFv)wuFTz8&3-(BD?bQigc-6if)cbU7~UE!{DSGlX*HSSt> zox9%M;BIs`xtrZB?pAl3yWQR4?sRv#yWKtRUU#3n-#y?SbPu_Q-6QT%_n3R!J>i~o zPr0YvGwxaUoO|BA;9hhuxtHB5?p61id)>X^-gIxdx7|DLUH6`Q-+kadbRW5o-6!r- z_nG_Lec`@zU%9W{H||^ao%`PX;C^&Jxu4xH?pODl``!KF{&au2zuiA>DD#gAZNiwa zCY%XxBAAFKl8J1hn5ZV2iEd(;m?oBqZQ_`?CZ36J5}1T0kx6Wl7{>@BjWXJ}#u#fn zC(oFYiZR(i1 zrk<&98kmNrk!ft2n5L$gX>MAWmZp_yZQ7W&rk!bTI+%{8lj&@_n69Rq>27+Mo~D=S zZTgtLrl09=2AF|nkQr=-n4xBv8E!_Hk!F+`ZN`|fW}F#sCYXt4l9_C#n5kx(nQms7 znP!%mZRVJ{W}caE7MO)*ky&h(n5AZ!S#DOCm1dP$ZPu8zW}R7YHkgfOli6&xn5|}; z*=}~2oo1KWZT6VGW}n$_4w!@HkU4CQn4{*HIc`pvljf8;ZO)jp=A1ciE|`nvlDTZI zn5*WRxo&Qlo933eZSI)6=AOB49+-#bk$G&Mn5X8Md2U{qm*$muZQhu-=AC(OKA4Z@ zllg4En6KuW`EGugpXQhOZT^^0_8%MChOuF7I2+zZun}z}8`(y&QEfCE-NvvnZ7dtx z#<6j2JR9F8unBDHyV+_tbSZ7bW_ zwy|w(JKNrNupMnD+u3%pU2Qkp-S)6OZ7Wp# z?I=6ijuoLYhJK0XLQ|&Z6-OjKx?JPUn&ardtJUibmunX-XyVx$VOYJhd z+^(=I?JB$4uCZ(FI=kL(up8|ryV-8BTkSTx-R`hE?Jm39?y-CAKD*x@um|lSd)OYa zN9{3t+@7!}?J0ZOp0Q``IeXq-uovwmd)Z#GSM4=>-QKV_?JaxT-m!P>J$v6iun+Ac z``A9QPwg}N+`h0c?JN7*zOirZJNw>#upjLw``Lc6U+p*h-TtsY?JxV={;{FFf4tCM z7%!|B&I|8F@FIGVyvSY@FRB;Ki|)nnVtTQ>*j^kjt{2aX?7Pub@}RE9@2Vih9Mo;$8`_q*ux-?UnJ$dgZ+GUInkBSIMjFRq?8N z)x7Fn4X>tG%d73x@#=c@y!u`Puc6n-YwR`gntIK==3Wc0rPs=9?X~gRdhNXSUI(wE z*U9Vbb@94--MsEz53i@!%j@m+@%noGy#C$*Z=g5G8|)47hI+%i;ob;uq&LbN?Tzuq zdgHwD-UM%=H_4mqP4T9B)4b{43~#14%bV@Z@#cE-y!qY&Z=tuyTkI|ImU_#)<=zT! zrMJpk?XB_Fdh5LP-Ue@@x5?Y=ZSl5x+q~`G4sWNo%iHbk@%DQAy#3w*@1S?cJM10t zj(W$u0^^^I@{S%lYN~3Vubu zl3&@c;#c*n`PKazeoeoYU)!(a*Y)f9_5B8ZL%)&V*l*%D^_%(4{T6;pzm?zGZ{xT1 z+xhMN4t__!li%6z;&=7C`Q7~;!pLb`P2Ow{!D+CKii+<&-Lf|^Zf<>LVuCJ*k9r= z^_Tg}{T2R7f0e)5U*oU!*ZJ%H4gN-dlfT*D;&1i0`P=;+{!V|FzuVvA@Adcj`~3s{ zLI03{*gxVQ^^f_-{S*F4|CE2)KjWYE&-v&53;sp_l7HF1;$QWz`Pcm${!Rauf7`#~ z-}UeL_x%U{L;sQg*ni?b^`H6A{TKdA|CRsRf8)RP-}&$T5B^90lmFTO;(ztO`QQB? z{!jmx|J(oLhYJ1)LI+`jutB&Wd=Mds7(@yp2T_8kL9`%x5F>~g#0p{uae}x(ydZv% zAV?S_3K9oN0w)lG3{;>4H!y(>yuc5FAS6f{Bny%UDT0(isvvccCP*8k3(^M}f{a0? zAajr<$Qon|vIjYWoI$Q2caSH@8{`Y}2L*zHL7|{NtArUx^EnZc}Jb}%QH8_Wyl2MdCQ z!J=Ssuq0R-EDM$gD}t55s$g}nCRiJ+3)Tl4f{nqZU~{k~*cxmLwg)?cox!ePcd#ee z8|(}A2M2QCO8|M3(f}@f{VeW;Bs&!xEfpwt_L@Q zo58K%c5o-S8{7--2M>aW!K2`D@FaK|JPV!&FM^lBtKfC;CU_gX3*HAGf{($c;B)XL z_!@i*z6U>opTV!-ckm|&74lC==#VfWVMD@&gb#@j5-}uFNaT=wkl+(!afWqF4J^|33Y{s`c8({`(rWbb46D zzFGgB$^V{c+5fkK?*H=t^-hAn-Ijk7<^TAv#`y1JH0J0S17qU9jm-b$-^(2SOB{@g z@i0Cnz=W6x6Jru|P@qJG8eKGK(L)~t48f$B43lFDOo^#5HKxJ;)u*OoPLCNdBWA+P zm<9h=XPb>VJLbTgm;O(V-fscHFh!P;#dMpVk!KuT~}9w*>LoP?8c3QomoI2~u;Oq_+YaSqPKc{m>z;6hx4i*X4q#bvl0SKvxqg{yH5 zuElk@9yj1d+=QEP3vR`2xE*)kPTYmNaS!greYhVF;6Xfuhw%s=#bbCJPvA*Bg{Sch zp2c%`9xvcUyo8tW3SPx)cpY!xO}vG-@eba_dw3ro;6r?bkMRjU#b@{&U*Jo8g|G1q zzQuR=9zWnm{DhzJ3x36K_#J=XPyB_y@elrO*!`Qo-MWACw^sLW{`Tqq&EFc`zxmsu z`!|0pbpPgWiSFP0?a%$2zwNkx^S2)NZ~hkK{>|TN+`swTi2FBx8*u;T@7d*V{vKHV z=I?RkZ~h)p{w5B_#dsJW6JSD2go!Z;Iw(-0LX9pOwCJIa0ft~wOoquZ1*XJQm>Sby zT1ZzFARfZQcm$8)F+7eZ z@FbqX(|88Y;yFBz7w{rp!pnFCui`bljyLco-oo2>2k+uNypIp?AwI&#_ynKgGklIO z@Fl*&*Z2nC;yZkgAMhi7!q4~xzv4Iijz91x{=(n*2mf~o;Xk4OTf3n`V;KDZ`})4< z{N5NC6Jud)jDvA89>&K6m=F_TVoZV#3Y4f&ql*SDdgx<-A(#}CVRB4?DKQnM#x$4~ z(_wndfEh6pX2vX-6|-S>%z-&E7v{!1m>2V5ek_0mu@Dxmq=6{}%&tbsML7S_f(SQqPIeQbaYu@N@LCfF34VRLMOEwL50#x~d% z+hKd`fE}?DcE&E)6}w?~?14S87xu^NPR1!X6{q2JoPjfO7S6^wI2Y&Pd|ZGFaS<-YCAbuq;c{GoD{&RB#x=MW z*Wr5HfE#fWZpJOR6}RDb+<`lB7w*PAxEJ@~emsB&@em%yBX|^#;c+~HC-D@X#xr;p z&*6EzfEV!+UdAhU6|doSyn#3I7T(4?co*;CeSClq@ew}8C-@Yf;d6X}FYy(=#y9vD z-{E`wfFJP_e#S5O6~Ezk{DD957yiaS_`g5&{}YD&k74k?`^^6RQ8?!C7y%<!;vgK1LvSb#!{Imr zN8%_Pjbm^uj>GXd0Vm=loQzX&Do(@cI0I+mES!yVa4ycn`M3ZV;v!s(OK>SJ!{xXF zSK=yMjcaf%uEX`X0XO0%+>BdrD{jN>xC3|MF5HcKa4+t|{dfQm;vqbYNAM^f!{c}Y zPvR*&jc4#Ip2PEa0Wabuyo^`yDqh3ucmr?ZExe6)@GjoN`}hDK;v;;FPw*)|!{_({ zU*ao#jc@QRzQgzU0YBm={ET1lD}KZ8_yd39FZ_*v@V`qw|NZ>@CoKCP!(dnphv6{- zM#M-M8KYoSjE2!M2FAo#7#rhYT#SeDF##sTM3@+ppo0P>D%9wrL5m*x7+?q{#blTq zQ(#I=g{d(Orp0ua9y4G@%!HXS3ueV^m>qLqPRxb5F%Ra&e3%~#U_mT|g|P@0#bQ_- zOJGSXg{83!mc?>d9xGr)tb~=Z3RcBxSRHF%O{|5ru@2V7dRQMDU_)$#jj;(f#b($X zTVP9Ug{`p-w#9bX9y?%1?1Y`M3wFhB*d2RdPwa)gu@Cmee%K!e;6NONgK-EB#bG!c zN8m^tg`;r{j>T~}9w*>LoP?8c3QomoI2~u;Oq_+YaSqPKc{m>z;6hx4i*X4q#bvl0 zSKvxqg{yH5uElk@9yj1d+=QEP3vR`2xE*)kPTYmNaS!greYhVF;6Xfuhw%s=#bbCJ zPvA*Bg{Schp2c%`9xvcUyo8tW3SPx)cpY!xO}vG-@eba_dw3ro;6r?bkMRjU#b@{& zU*Jo8g|G1qzQuR=9zWnm{DhzJ3x36K_#J=XPyB_y@elrc&+LEs2SZ~R42$6~JVwBX z7zra|6pV_|FgnJ-m>3IVV;qc&@i0Cnz=W6x6Jru|P@qJG8eKGK(L)~t48f$B43lFD zOo^#5HKxI|m=4op2F!?=Ff(Sste6e6V-C!TxiB~8!MvCc^J4)lh=s5)7Qv!e42xq4 zEQzJCG?u}#SPsi$1+0jburgM`|8BAV_Z?h~xjNRsnpg{KV;!uE^{_rRz=qfe8)Fk} zip{V&w!oIy3R`0vY>Vx%J$As3*acz=gO77vmCK zipy|0uE3SJ3RmMAT#M^)J#N5_xCuAo7Tk*4a69h6owy5k;~w0L`*1%Vz=L=Q591L$ zipTIcp1_lM3Qyx1Jd5Y>JYK+ycnL4#6}*bq@H*bWn|KRv;~l(<_wYVGz=!wv>U|0-? z;V}Y6#7Gz!qhM5whS4zw#>7|{8{=SHjEC_t0Vc#mm>83wg90Tg)aar?iyrzIU&yZK`exYu?QB$ zVptqYU`Z^6rLhc_#d264D_}*egq5)hR>f*q9cy4stcA6)4%WqbSRWf;Lu`bNu?aTC zX4o8CU`uR;t+5TZ#dg>pJ77obgq^VqcExVk9eZF;?1jCt5B9}=*dGVrKpcdFaR?5@ zVK^K|;7A;Wqj3z5#c?RW52E$@F437~o zB1Xc<7zLwZG>nchFeb*r*cb=nVmyqG2{0ih!o-*a9TX^0p+*-CTJ+Gz07Eb-Cd1^I z0#jltOpR$UEvCctm;p0lCd`akFe_%m?3e>{VlK>$c`z^L!~9qP3t}NGj76|07Q^CL z0!v~kERAKbESAIaSOF_yC9I59uqsx=>R1D7VlAwVb+9hh!}{0&8)74Dj7_j9HpAxF z0$XA$Y>jQOEw;n<*a16YC+v(}uq$@M?$`r+VlV8CeXuX~!~Qq`2jUa4Js2={N&t;w+qvb8s%s!}+)X7vdsZj7xASF2m)x z0$1WHT#ajREw01$xB)lfCftl$a4T-Z?YIMX;x62cdvGuA!~J*w58@#_j7RV&9>e2! z0#D*8JdJ1YES|&jcmXfsCA^GR@G4%z>v#ii;w`+5cknLW!~6IEAL1i?j8E_>KEvnu z0$<`Qe2s7LExyC|_yIrSC;W_G@GE}9@Aw0M;xGJ-e=t-;_CJQkFc=oYVR(#y5it@* z#wZvSqhWN6fiW=_#>O}p7vo`kOn?b75hlhY=%7G}3N^ZD(4vPv1{i`#F&QSu6qpiI zVQNf+X)zt9#|)SeGht@Tf>|*eX2%?u6LVp1%!7F`ALhpbSP%p5^R>vAx6Ki2@tb=v29@fVO*bp0GV{C#=u^BeU7T6M7 zVQXxIZLuA;#}3#LJ7H(+f?cs2cE=vr6MJEA?1O!=ANI!qI1mTnU>t%&aTpHA5jYY@ z;bUuCPRAKI6KCOUoP%?59?r)FxDXfNVqAhtaTzYh6}S>t z;c8riYjGW}#|^j-exUdJ1F6K~;dyn}b~9^S_X_z)lAV|;>7@fkkH7x)ri z;cI+@Z}AVx%J$As3*acz=gO77vmCKipy|0uE3SJ3RmMA zT#M^)J#N5_xCuAo7Tk*4a69h6owy5k;~w0L`*1%Vz=L=Q591L$ipTIcp1_lM3Qyx1 zJd5Y>JYK+ycnL4#6}*bq@H*bWn|KRv;~l(<_wYVGz=!wIFT9}{3gOoWLs2|6fHqC$->8no!4j{$~YQcQ-)F$Jc?RG1pmU|LLv z=`jOl#7vkOvtU-thS@O(=EPi>8}ndZ%!m2002ahTSQv|7Q7neVu>_XHQdkv02a#7(#vx8PRXhTCxm?!;ZV8~5N|+=u(|03O6cco>i1Q9Opn@dTd4Q+OKB;8{F} z=kWqw#7lS?ui#a@hS%{1-o#sY8}Hy_!ytyQ+$Tc@ddubSNIy=;9Go$ z@9_hE#83Dczu;H=hTriA{={GS8~@<{ihBQv@_*-m&=>~8VmJ(s5ilY~!pIl}qhd6S zjxjJM#=_Vb2jgNqjE@O0Atu7am;@aZC{dwC7Y$nU(8mBnFexU(SI818ZU}tc`WBF4n{P*Z>=1BW#RKuqigf=GX#TVk>NoZLlr2 z!}iz#J7Op7j9suRcEj%21AAgG?2Ub}FZRR!H~D z!}YiUH{vGTj9YLkZo}=k19##s+>Lv1FYd$rcmNOLAv}yn@F*U`<9Gs3;we0hXYeeZ z!}E9nFXAP8n18?Fjyp4D8F5biY_y8Z`BYccc@F_mS=lB9&;wyZOZ}2U? z!}s_BKjJ6+j9>68e#7th1ApQ#{EdI`e+B*jL}mYD7z~TyFg!-Uh!_bYV-$>v(J(s3 zz?c{dV`ChQi}5f%CcuQ42oqxxbWor~g&JKnXwgF-0}R2Wm<*F+3QUQqFg2#Zw3rUl zV+PEKnJ_bE!K|1Kvttg-iMcR0=E1y}5A$OIEQp1$Fc!h0SPY9}2`q`Fur!vzvRDqw zV+E{;m9R2a!Kzpdt78qUiM6mc*1@`159?zCY>17pF*d=b*bJLv3v7w4ur;>9w%88a zV+ZVrov<@@!LHa1yJHXRiM_Bl_QAf`5BuW)9EgK(Fb=_?I1Gp52pox{a5Rp=u{aLL z;{=?DlW;Ol!KpY6r{fHqiL-Dv&cV4j59i|oT!@QsF)qQSxD1!$3S5b+a5b*MwYUz~ z;|AP_n{YF3!L7Irx8n}niMwz&?!mpd5BK8%Jcx(zFdo69cnpu@2|S6X@HC#mvv>~A z;|08km+&%P!K-);uj388iMQ}J-od+g5AWjxe29Js)Gh-IairFwb=D?ho3v**0%!~OjKNi4(SO^Pa5iE+uusD{$l2{5$V;L-q<*+*1(!r3u|K?tc&%qJ~qIH*a#bA6KsmjusOECme>kgV;gLX?XW#| zz>e4nJ7X8@irug~_Q0Ol3wvW9?2G-dKMufwI0y&h5FCoba5#>@kvIxR;}{%^<8VAq zz==2sC*u^Hiqmj9&cK;C3uogToQv~tJ}$t8xCj^H5?qSQa5=8PmADF5;~HFx>u^18 zz>T;GH{%xEira8I?!cY63wPrl+>85gKOVq?cnA;U5j={=@Hn2plXwbG;~6}Q=kPpU zz>9bZFXI)wir4Tu-oTr93vc5cyo>knK0d&Q_y`~46MTx#@HxJ~m-q@_;~RX7@9;f- zz>oL|KjRntir?@%{=lF33xDGu3>BUK|6^zjgJCfohQ|mP5hGz_jDk@y8b-$$7!zY* zY>b0(F&@Up1eg#LVPZ^z4hod0P@{_mEqdrJeU{rVSX%t1+fqo#v)i0i(zprfhDmNmc}wz7RzCItbi4< z5?014SQV>bb*zCku@=_GI#?I$VSQ|X4Y3h6#wOSln_+Wofi1BWw#GKt7TaNa?0_Ay z6L!Wf*cH2BckF>Zu^0BnKG+xgVSgNe191=z#vwQqhv9G>fg^Dgj>a)K7RTXuoPZN? z5>Cb`I2EVibew@RaTdv(J(s3z?c{dV`ChQ zi}5f%CcuQ42oqxxbWor~g&JKnXwgF-0}R2Wm<*F+3QUQqFg2#Zw3rUlV+PEKnJ_bE z!K|1Kvttg-iMcR0=E1y}5A$OIEQp1$Fc!h0SPY9}2`q`Fur!vzvRDqwV+E{;m9R2a z!Kzpdt78qUiM6mc*1@`159?zCY>17pF*d=b*bJLv3v7w4ur;>9w%88aV+ZVrov<@@ z!LHa1yJHXRiM_Bl_QAf`5BuW)9EgK(Fb=_?I1Gp52pox{a5Rp=u{aLL;{=?DlW;Ol z!KpY6r{fHqiL-Dv&cV4j59i|oT!@QsF)qQSxD1!$3S5b+a5b*MwYUz~;|AP_n{YF3 z!L7Irx8n}niMwz&?!mpd5BK8%Jcx(zFdo69cnpu@2|S6X@HC#mvv>~A;|08km+&%P z!K-);uj388iMQ}J-od+g5AWjxe29D%9wrL5m*x7+?q{#blTqQ(#I=g{d(Orp0ua9y4G@%!HXS3ueV^ zm>qLqPRxb5F%Ra&e3%~#U_mT|g|P@0#bQ_-OJGSXg{83!mc?>d9xGr)tb~=Z3RcBx zSRHF%O{|5ru@2V7dRQMDU_)$#jj;(f#b($XTVP9Ug{`p-w#9bX9y?%1?1Y`M3wFhB z*d2RdPwa)gu@Cmee%K!e;6NONgK-EB#bG!cN8m^tg`;r{j>T~}9w*>LoP?8c3Qomo zI2~u;Oq_+YaSqPKc{m>z;6hx4i*X4q#bvl0SKvxqg{yH5uElk@9yj1d+=QEP3vR`2 zxE*)kPTYmNaS!greYhVF;6Xfuhw%s=#bbCJPvA*Bg{Schp2c%`9xvcUyo8tW3SPx) zcpY!xO}vG-@eba_dw3ro;6r?bkMRjU#b@{&U*Jo8g|G1qzQuR=9zWnm{DhzJ3x36K z_#J=XPyB_y@ehWI#s0_87zV>)I1G;wFd|06$QT8qVl<47F)${^!q^xG<6=CFj|ng# zCc?y+1RWG8QK3c`4O;Zj#{fexDJH|@m;zH`Dol-OFfFFT^q2uNVkXRtSuiVR!|a#? zb7C&cjd?IH=EMA001ILvER034C>F!wSOQC8DJ+d;uq>9t@>l^YVkNAMRj?{n!|GTA zYho>|jdidt*2DVP02^W>Y>Z8?DK^9A*aBN(D{PHzur0R3_SgYCVkhj3U9c;5!|vDv zdtxu_jeW2$_QU=-00-hA9E?M7C=SEnI08rFC>)Jra4e3)@i+k|;v}4mQ*bIy!|6B! zXW}fJjdO4=&cpe*02ksST#QR_DK5k1xB^$=DqM|ga4oLG^|%2y;wIdTTW~9G!|k{O zcj7MGjeBq}?!*0f01x6JJd8*1C?3P(cmhx2DLjp5@GPFg^LPO-;w8L{SMVxc!|Qkh zZ{jVyjd$=a-oyL&03YHbe2h=_xJ%n;wSu!U+^n_!|(V5 zf8sCvjejsyZ1z8f#xNKb!(n)gfDthgM#d-@6{BHvjDayR7RJUn7#HJVd`y4|F%c%l zB|SQBeu zZLEWJu^!gP2G|fAVPkB9O|cm^#}?QUTVZQ#gKe=Lw#N?G5j$aL?1Ejf8+OMY*b{qU zZ|sA8u^;xw0XPr`;b0tsLva`m#}POZN8xB3gJW?Vj>ic&5hvkfoPtwv8cxR0*UCP4=U zN>r%PMS~VS^fAB?Op3`cIi|prm85)v!9&z?xVKYhxX( zi}kQRHo%712peM)Y>LgWIkv!-*a}-?8*Gd1uswFbj@Su1V;Ag--LO0Mz@FF(dt)E$ zi~X=a4#0sp2nXX39E!tmIF7)PI0{GO7#xe^a6C@Hi8u)-;}o2V({MV@z?nD;XX6~4 zi}P?kF2IGj2p8iLT#CzZIj+E!xC&R}8eEI(a6N9ojkpOn;}+bC+i*MXz@4}YcjF%1 zi~Ddt9>9Zm2oK{CJc`HgIG(_hcnVMB89a;U@H}3?i+Bky;}yJ$*YG;tz?*mrZ{r=j zi}&z8KEQ|g2p{7Ue2UNTIljP`_zGX+8+?oJ@I8LOkN62c;}`sj-|##Bz@PXFf8!qv z6_@>wp)m}G#c&uNBVa^~gpn}{M#X3t9b;fjjD@i=4#vfJ7#|a0LQI5-F$p>-P@+PO zE*iAxp^pKEU{Xwm$uR|{#8j9X(_mUmhv_i`X2eXG8M9zk%!b)92j;|Fm>ct8Ud)I2 zu>cmtLRc7!U{NfF#jymI#8Oxq%V1e7hvl&XR>VqJ8LMDbtcKOG2G+z{SR3nLU95-o zu>m&3M%WmeU{h>{&9Mcx#8%iE+hAL4hwZTgcEnED8M|Ot?1tU32lm8X*cY>oQBhJ2F}D;I2-5ST%3pV zaRDyGMYtH3;8I+M%W(y+#8tQ&*Wg-QhwE_zZp2Nv8Mok8+=kn62kyjOxEuH2UfhTK z@cNB9_@;8T2t&+!Gm#8>zl-{4z(hwt$Ne#B4s8Nc9H{D$B02mZug_#6LVsCevu z42@whEQZ7I7y%<>vBd{AMznj3n9V z7E57iEQ4jS9G1rlSP?5>Wvqf#u^Lv#8t8#Fu@=_GI#?I$VSQ|X4bc-DVPkB9O|cm^ z#}?>?EwL50MsM^%U-UzNY=dplhV9Uf?Xd%P!~hJ$PS_bu48mXx!7kVpyJ2_ifjzMo zI*ZsI1b0-1e}PIa57H8sW=U%;|!dM zvv4-f!MQjO=i>rgh>LJBF2SX^442~yT#2i2HLk(6xDMCj2Hc37a5HYft+)-h;||=3 zyKpz|!M(T-_u~OPh==en9>Jq{43FapJc+09G@ik;cn;6w1-yut@G@S(t9T8s;|;ut zx9~RJ!Mk`5@8bh}h>!3wKEbE>44>l*e2K5{HNL^O_zvIW2mFYi@H2kFulNnW;}86a zzwkHyK`jaQe-vn76m-L=7!9Li40Oks7z<-#9E^+cFg_;0gqR2uV-ie?$uK#lz?7H@ zQ)3!Ti|H^uX26V?2{U6B%!=7CJLbTgm;O(V-YNh#jrS*z>-)B zOJf-T~}9w*>LoP?8c3QomoI2~u;Oq_+Y zaSqPKc{m>z;6hx4i*X4q#bvl0SKvxqg{yH5uElk@9yj1d+=QEP3vR`2xE*)kPTYmN zaS!greYhVF;6Xfuhw%s=#bbCJPvA*Bg{Schp2c%`9xvcUyo8tW3SPx)cpY!xO}vG- z@eba_dw3ro;6r?bkMRjU#b@{&U*Jo8g|G1qzQuR=9zWnm{DhzJ3x36K_#J=XPyB_y z@egWAx&Na;1EZiDM#X3t9b=$7#>7|{8{=SHjEC_t0Vc#mm>82_QcQ-)F$Jc?RG1pm zU|LLv=`jOl#7vkOvtU-thS@O(=EPi>8}ndZ%!m2002ahTSQv|7Q7neVu>_XHQdkuVU|Y0dJG5ha?0_9H00XfTc19C}Fc?Fy3wFhB*d2RdPwa&b?2Ub} zFZRR!H~D!}YiUH{vGTj9YLkZo}=k19##s+>Lv1 zFYd$rcmNOLAv}yn@F*U`<9Gs3;we0hXYeeZ!}E9nFXAP8n18?Fjyp4D8 zF5biY_y8Z`BYccc@F_mS=lB9&;wyZOZ}2U?!}s_BKjJ6+j9>68e#7th1ApQ#{EdH5 zOUC^l1sWIy-7qRf!{`_T-7zM{!q^xG<6=CFj|ng#Cc?y+1e0PiOpYlqC8omEmkzUYVk*aq984cnm|+hYgphyfUgov<^S7=*zXf?cpHcEj%21AAgGbYO4ngMG0d z_QwG@5C`F49D+k}7!Jn~I1)$UXdHuMaU71v2{;ia;bfeGQ*jzj#~C;iXW?v|gL82n z&c_9~5EtQMT!Kq+87{{axDr?4YFvYBaUHJ54Y(0E;bz=|TX7q1#~rv6cj0c_gL`ow z?#Bao5D(#DJc38@7#_zHcoI+HX*`2x@f@DV3wRMP;bpvnSMeHN#~XMPZ{cmcgLm;B z-p2>{5Fg=Ve1cE$89v7s_!3{?YkY%m@g2U$5BL#3;b;7UU-27$#~=6;f8lTZgIaR# z|0vMFDCmY!F&ak480d~MF&4(gI2ae>VSG%02{92S#w3^&lVNg9fhjQ+rp7dw7SmyR z%zzm&6K2LNm=&{OcFch}F&E~>JeU{rVSX%t1+fqo#v)i0i(zprfhDmNmc}wz7RzCI ztbi4<5?014SQV>bb*zCNSQBeuZLEWJu^!gP2G|fiu@N@LCfF34VRLMOUf2>_VQch8 zAM`~(^v5>X7H!xL?bseWU`GtVK#~#=dd!Yk+V;}5`{jfg{ zz=1dj2jdVNioc zz=gO77vmCKipy|0uE3SJ3RmMAT#M^)J#N5_xCuAo7Tk*4a69h6owy5k;~w0L`*1%V zz=L=Q591L$ipTIcp1_lM3Qyx1Jd5Y>JYK+ycnL4#6}*bq@H*bWn|KRv;~l(<_wYVG zz=!wtbEjES)@HpaoY7!TuP0!)aBFfk^7)R4Xa}f^uU@}3u|K?tc&%qJ~qIH=!uQ6F*d=b*bJLv3-rR4*a};tH~OG2 z`k_Cz!M13_c4)`;*a16Y00v?w?2IM`VK9bZ7wn4Nusim^p4bZ=*cY>oQBhJ2F}D;I2-5ST%3pVaRDyG zMYtH3;8I+M%W(y+#8tQ&*Wg-QhwE_zZp2Nv8Mok8+=kn62kyjOxEuH2UfhTK@c zNB9_@;8T2t&+!Gm#8>zl-{4z(hwt$Ne#B4s8Nc9H{D$B02mZug_#6MAmXiBF3N$bZ zx?xm|hS4zwx?@a?g|RUX#>IFT9}{3gOoWLs2`0s4m>g4JN=${RF%720beJA9U`EV@ znK27y#cY@zb6`%)g}E^g=EZ!N9}8eXEQE!z2o}X+SR6}WNi2n>u?&{Qa#$WKU`4Ei zm9Yv|#cEg`YoG_##9CMz>tJ21hxM@mHbhTsgpIKYHpOPx99y6lw!~K08okj6ebEp7 zu?@CG8@59`w#N?G5d$y~J7H%uF$jY(1iN5Y?1tU32lm8X=)m6C2m4|_?2iL*AP&O8 zI0T2{FdU8}a3qex(KrUj;y4_S6L2CZzFARfZQ zcm$8)F+7eZ@FbqX(|88Y;yFBz7w{rp!pnFCui`bljyLco-oo2>2k+uNypIp?AwI&# z_ynKgGklIO@Fl*&*Z2nC;yZkgAMhi7!q4~xzv4Iijz91x{=(n*2enk(|52cUQP2&e zVl<47G0+`jVl0e}aWF2%!}yp06JjDvj7cylCd1^I0#jltOpR$UEvCctm;p0lCd`ak zFe_%m?3e>{VlK>$c`z^L!~9qP3t}NGj76|07Q^CL0!v~kERAKbESAIaSOF_yC9I59 zuqsx=>R1CkuqM{R+E@qcVm+*n4X`14Vk2yfO|U68!{*omy|5*=!q(`GKIn^n=#Oo% zE!wah+Oa)$z>XMzf!GN88#yz+f_u+m#fCupq9>ybh z6p!I?Jb@?i6rRR2coxs$dAxuZ@e*FfD|i*J;dQ)$H}MwU#yfZy@8NxXfDiEzKE@~b z6rbU9e1R|V6~4wd_!i&cd;EYO@e_W=FZdO|;dlIjKk*m-#y_Z~=KhZY4UB?r7!{*o zbc})S7!zY*Y>b0(F&@Up1eg#LVPZ^zNii8F#}t?nQ(|=z%q{7S_f(SQqPIeQbaY(GweCV{C#=u^BeU7U+d7u@$yPZ}dT5^h1AagKg1< z?a+?xu>*F*01U)V*cnX>!e9)+F4z^jVR!6-J+T)$us8O>zSs}@;{Y6pgK#ho!J#+| zhvNtwiKB2dj=`}w4#(pJoQRWfGETv%I1Q)c44jFxa5m1txi}B!;{sfWi*PY6!KJti zm*WatiK}omuEDjq4%g!b+=!cSGj74HxDB`C4%~^ma5wJ3y|@qe;{iN~hwv~S!J~K# zkK+kEiKp;1p24$t4$tESyoi_ZGG4)}cnz=P4ZMlB@HXDTyLb=p;{$w%kMJ=*!Ke5P zpW_RBiLdZAzQMQn4&UPk{D`0MGk(FZ_zl0~5B!P0@HhTJEe-d76lh=+bi=3^4WnZW zbjO$&3u9v(jEnIwJ|@6~mJs)Gh-IairFwb z=D?ho3v**0%!~OjKNi4(SO^Pa5iE+uusD{$l2{5$V;L-q<*+ z)<6%eiM6mc*1@`159?zCY>1xN2peM)Y>LgWIkrGAY>BO~HF~2D`l28DV;gLXHf)D> zY>yqVBL-j~cEZkRVh{#n2zJ4)*bTd55A2D((1E?N5B9}=*dGVrKpcdFaR?5@VK^K| z;7A;Wqj3z5#c?SbyT1i(0EQZCg1eU~7SQ^Vw}aN>~}IU{$P!)v*S8 zU`?!rwXqJ?#d=sD8(>59#75W{n_yFHhRv}BdSOd!g{{#Web5*E&>!1iTeM+2v}1eh zfE_Ua1F;i!MiYZD7(=iNcExVk9eZF;?1c{OjeW2$_QU=-00-hA9E?M7C=SEnI08rF zC>)Jra4e3)@i+k|;v}4mQ*bIy!|6B!XW}fJjdO4=&cpe*02ksST#QR_DK5k1xB^$= zDqM|ga4oLG^|%2y;wIdTTW~9G!|k{Ocj7MGjeBq}?!*0f01x6JJd8*1C?3P(cmhx2 zDLjp5@GPFg^LPO-;w8L{SMVxc!|QkhZ{jVyjd$=a-oyL&03YHbe2h=_xJ%n;wSu!U+^n_!|(V5f8sCvjek%}$Ne7#8W;uLFe*mF=okatF($^s z*cb=nVmyqG2{0ih!o-*alVUPVjwvuDroz;i2Ge3XOph5bBWA+Pm<6+9Hq4GWFem21 z+?WURVm{1|1+X9%!opYti()Y>jwP@pmcr6l2FqeOERPkiB38o6SOu$MHLQ*`&;x5? zEv$`ourAia`q%&)q9-=O#@Ga#Vl!-xEzk>FVk>No-spqA=!gE;2HTxDhwuX54~XaT{*O9k>&B;cnc6dvPD`#{+l}58+`vf=BTf9>)`S5>Mf2 zJcDQP9G=Guco8q*WxRq{@fu#o8+a3M;cdKwckv$H#|QWjAK_zsf=}@oKF1gM5?|qK ze1mWC9lpm8_z^$hXZ(U+@f&`}ANUi0;cxtdT6)cT=_t^^DCmY!F&ak480d~MF&4(g zI2ae>VSG%02{92S#w3^&lVNg9fhjQ+rp7dw7SmyR%zzm&6K2LNm=&{OcFch}F&E~> zJeU{rVSX%t1+fqo#v)i0i(zprfhDmNmc}wz7RzCItbi4<5?014SQV>bb*zCNSQBeu zZLEWJu^!gP2G|fiu@N@LCfF34VRLMOUf2>_VQch8AM`~(^v5>X7H!xL?bseWU`GtV zK#~#=dd!Yk+V;}5`{jfg{z=1dj2jdVNiocz=gO77vmCKipy|0uE3SJ3RmMA zT#M^)J#N5_xCuAo7Tk*4a69h6owy5k;~w0L`*1%Vz=L=Q591L$ipTIcp1_lM3Qyx1 zJd5Y>JYK+ycnL4#6}*bq@H*bWn|KRv;~l(<_wYVGz=!wtbEjES)@HpaoY z7!TuP0!)aBFfk^7)R4Xa}f^uU@}3u|K? ztc&%qJ~qIH=!uQ6F*d=b*bJLv3-rR4*a};tH~OG2`k_Cz!M13_c4)`;*a16Y00v?w z?2IM`VK9bZ7wn4Nusim^p4bZ=*cY>oQBhJ2F}D;I2-5ST%3pVaRDyGMYtH3;8I+M%W(y+#8tQ&*Wg-Q zhwE_zZp2Nv8Mok8+=kn62kyjOxEuH2UfhTK@cNB9_@;8T2t&+!Gm#8>zl-{4z( zhwt$Ne#B4s8Nc9H{D$B02mZug_#6MAmXZ5E3N$bZx?xm|hS4zwx?@a?g|RUX#>IFT z9}{3gOoWLs2`0s4m>g4JN=${RF%720beJA9U`EV@nK27y#cY@zb6`%)g}E^g=EZ!N z9}8eXEQE!z2o}X+SR6}WNi2n>u?&{Qa#$WKU`4Eim9Yv|#cEg`YoG_##9CMz>tJ21 zhxM@mHbhTsgpIKYHpOPx99y6lw!~K08okj6ebEp7u?@CG8@59`w#N?G5d$y~J7H%u zF$jY(1iN5Y?1tU32lm8X=)m6C2m4|_?2iL*AP&O8I0T2{FdU8}a3qex(KrUj;y4_S z6L2CZzFARfZQcm$8)F+7eZ@FbqX(|88Y;yFBz z7w{rp!pnFCui`bljyLco-oo2>2k+uNypIp?AwI&#_ynKgGklIO@Fl*&*Z2nC;yZkg zAMhi7!q4~xzv4Iijz91x{=(n*2enMx|52cUQP2&eVl<47G0+`jVl0e}aWF2%!}yp0 z6JjDvj7cylCd1^I0#jltOpR$UEvCctm;p0lCd`akFe_%m?3e>{VlK>$c`z^L!~9qP z3t}NGj76|07Q^CL0!v~kERAKbESAIaSOF_yC9I59uqsx=>R1CkuqM{R+E@qcVm+*n z4X`14Vk2yfO|U68!{*omy|5*=!q(`GKIn^n=#Oo%E!wah+Oa)$z>XMzf!GN88#yz+f_u+m#fCupq9>ybh6p!I?Jb@?i6rRR2coxs$dAxuZ z@e*FfD|i*J;dQ)$H}MwU#yfZy@8NxXfDiEzKE@~b6rbU9e1R|V6~4wd_!i&cd;EYO z@e_W=FZdO|;dlIjKk*m-#y_ZK=KhZY4UB?r7!{*obc})S7!zY*Y>b0(F&@Up1eg#L zVPZ^zNii8F#}t?nQ(|=z%q{7S_f(SQqPIeQbaY z(GweCV{C#=u^BeU7U+d7u@$yPZ}dT5^h1AagKg1*F*01U)V*cnX>!e9)+ zF4z^jVR!6-J+T)$us8O>zSs}@;{Y6pgK#ho!J#+|hvNtwiKB2dj=`}w4#(pJoQRWf zGETv%I1Q)c44jFxa5m1txi}B!;{sfWi*PY6!KJtim*WatiK}omuEDjq4%g!b+=!cS zGj74HxDB`C4%~^ma5wJ3y|@qe;{iN~hwv~S!J~K#kK+kEiKp;1p24$t4$tESyoi_Z zGG4)}cnz=P4ZMlB@HXDTyLb=p;{$w%kMJ=*!Ke5PpW_RBiLdZAzQMQn4&UPk{D`0M zGk(FZ_zl0~5B!P0@HhTJEerR56lh=+bi=3^4WnZWbjO$&3u9v(jEnIwJ|@6~mJs)Gh-IairFwb=D?ho3v**0%!~OjKNi4(SO^Pa z5iE+uusD{$l2{5$V;L-q<*+)<6%eiM6mc*1@`159?zCY>1xN z2peM)Y>LgWIkrGAY>BO~HF~2D`l28DV;gLXHf)D>Y>yqVBL-j~cEZkRVh{#n2zJ4) z*bTd55A2D((1E?N5B9}=*dGVrKpcdFaR?5@VK^K|;7A;Wqj3z5#c?Jlc5@-pvL|S4kiI!AL zrX|->XeqT+T52tgmR3urrPnfO8MRDWW-W_sqHJ1rEr*s<%cbSk@@RRrd|H03fL2f| zq!reRXhpSRT5+v}R#GdamDb8=Wwmlzd98w0QLCg?)~aY#wQ5>*t%l~I)zoTfwY550 zU9Fy0Uu&Q>)I7CDT4Sw=)>LbzHP>2bURq17mDXDG)_gQy%}?{!+GuSxo7PUVYwfiT zT1PEF3)DJkoi$Sn(t@=Rt&7%G>!x+rdT2ehUYbMet@Y9RYW=kS+5l~!Hb@(+4bg^b z!?fYr2yLV`N*k?>(Z*`ywDH;mZK5_wo2*UIrfSo)>Dml!rZ!8Pt0%e3X%3T>sfN?Wb1(bj6~wDsBsZKJkH+pKNTwrbn7?b;4)r?yMmt?kkF zYWuYP+5zpLc1Sy{9np?z$F$?x3GJkIN;|Ec(avh;wDZ~p?V@%`yR2Q&u4>n`>)H+N zrglrat=-Y?YWKAJ+5_#O_DFlIJ<*-3+<)$N_(xn(cWtBwD;Nv?W6Wd`>cJ@ zzG~mJ@7fRTr}j(xt^JYRuj@iL^eDQU9#xN~N7rNM?s`l;mL6M=qsP_b>GAahdO|&s zo>)(!C)Jbb$@LU^NG|~ndO^LAURW=p7uAdD#q|<;NxhU_S}&uQ)ywJS^$L1Ly^>y8ucBAgtLfGC8oGyG zQ?I4h*6ZkX^?G`Jy@B3P_tYEdjrAsaQ@xqqTyLR!=`HnEdTZTV_tAZIKiyw%qqo&< zdOO{&x7R!99rXY`Q17I7)=fP~57tBUE_zqJo8DdTq4(5#=?=ZO-be4N_tX391N4FV zAbqetL?5aT(}(LL^pW}~eY8GCAFGek$LkaHiTWgcvOYzhs!!9W>ofG3`Ye66K1ZLc z&(r7Y3-pEhB7L#GL|>{e)0gWj^p*N5eYL(uU#qXv*XtYfjrt~iv%W>&s&CV`>pS$F z`YwI9zDM7y@6-3|2lRvbA^os^L_ew@(~s*X^ppB2{j`2YKdYb9&+8ZTi~1$~vVKLs zs$bKu>o@e9`Yrvoen-En-_!5w5A=unBmJ@dM1QJ3)1T`v^q2Z8{k8r^f2+UK-|HXr zkNPM5v;IZ@s(;hJ>p%3L`Y-*r{zp!JT?k=_D8fxd712a=5kt6(m?D;lE#io{BA$pZ z5{QH%kw`3(h@>K!NG?){lp>W#Ez*dzBArMtGKh>KlgKQxh^!);$S!h-oFbRVE%J!G zBA>`F3W$QDkSHvQh@zsHC@xBflA@F-Ey{?pqMRr%Du{}rlBg`Ih^nHRs4i*<4^dOp z617DgQCHLx^+f~GP|TqO&kXkO&qbqKoJ%x{22p7m@KA?(@VwG4e)`+!Yomek6h>c>C*ete)tzw(lE_R5WVwc!0_K3Y=pV%)Bh=byg zI4q8cqvDu2E>4J(;*>Zo&WN+(oH#Eoh>PNqxGb)StKyotE^dgM;+D8A?ufhMp13a_ zh=<~lcr2cXr{bA-E?$V2;+1$U-iWv2op>)kh>zlv_${>V$e zZV1CLq8M&QR3n-Z-H2hh8!?SoMr)JSF|H&PfWjZ{Wz zBaM;PNN1!sG8h?+Oh#rSi;>mHW@I;V7&(nxMs6dIk=Mv)hz77#>DVqn1(IsAJSM>KXNo21Y}} z(`aNgHkuesjb=u3qlMvRv@}{7tqpI($M7}$41c4I(bljT?F_ro-soU-Gy;r3qm$9u zFpVH1*a$JY7+sBSMt7r!(bMQy#AEU3)&**OqFa{cfjKRhbW2iCA7;cO(MjE4x z(Z(2KtTE0QZ%i;I8k3C4#uQ_!G0m86%rIsevy9os9AmCA&zNs4Fcun%jK#(hW2v#s zSZ=H^RvN2})y5iQt+CEnZ)`9&8k>yG#uj6%vCY_S>@ap3yNun&9%HYu&)9DqFb*1r zjKjteWJh5EEfyOoB-<879XRm=aTA zYD|N!Q%^@aJx2K1r+mV3z9K9KxgHDUlaKS29dlq#%!Roz59Y;um>&yZK`exYu?QB$ zVptqYU`Z^6rLhc_#d264D_}*egq5)hR>f*q9c!Qm*2G#^8|z?QtcUfn0X9TWY=n)m z2{y%M*c@A+7q-M!*c!dj2Yt~G{jm+UMH{w5JGRFT*bxIT5IbRKG%*N+F$B9{SL}w} zu?P0VUg*Hy*a!P!KkSbKa3BuC!8inm;xHVJBXA^+!qGSe$Kp5~j}verPQuAJ1*hUP zoQ^YaCeFgyI0xtAJe-dUa3LSeNC+@=CxCi&*KHQH7@E{(-!*~Rb;xRmqC-5Ym!qa#L&*C{ej~DPFUc$?G1+U^Y zypA{UCf>r^cn9y|J-m+(@F70J$M^)F;xl}XFYqP4!q@l)-{L!bk00{VlK>$c`z^L!~9qP3t}NGj76|07Q^CL0!v~kERAKbESAIaSOF_yC9I59 zuqsx=>R1CkuqM{R+E@qcVm+*n4X`14Vk2yfO|U68!{*omy|5*=!q(`GKIn^n=#Oo% zE!wah+Oa)$z>XMzf!GN88#yz+f_u+m#fCupq9>ybh z6p!I?Jb@?i6rRR2coxs$dAxuZ@e*FfD|i*J;dQ)$H}MwU#yfZy@8NxXfDiEzKE@~b z6rbU9e1R|V6~4wd_!i&cd;EYO@e_W=FZdO|;dlIjKk*m-#y=?k3}C%<6lh=+bi=3^ z4WnZWbjO$&3u9v(jEnIwJ|@6~mJs)Gh-Ia zirFwb=D?ho3v**0%!~OjKNi4(SO^Pa5iE+uusD{$l2{5$V;L-q<*+)<6%eiM6mc*1@`159?zCY>1xN2peM)Y>LgWIkrGAY>BO~HF~2D`l28DV;gLX zHf)D>Y>yqVBL-j~cEZkRVh{#n2zJ4)*bTd55A2D((1E?N5B9}=*dGVrKpcdFaR?5@ zVK^K|;7A;Wqj3z5#c?O}p7vo`kOn?b75hlhYm=u#?a!i3KF%_o9G?*6CVS3Df88H)P#w?f> zvtf43fjKc3=Egjj7xQ6$EPw^E5EjNFSQLw4aV&u)u@siZGFTSNVR@{86|oXl#wu79 zt6_DlfgV^BYhi7ygLSbU*2f0e5IwOGHpV8{6q{jlY=K_b5?f(w^hO`_ML+b%HrN(z z*beR39y?%148TC_gq_jEAPmM3?1Ejf8+OMY*b{r91AAj1?2G-dKMufwI0y&h5FCob za5#>@kvIxR;}{%^<8VAqz==2sC*u^Hiqmj9&cK;C3uogToQv~tJ}$t8xCj^H5?qSQ za5=8PmADF5;~HFx>u^18z>T;GH{%xEira8I?!cY63wPrl+>85gKOVq?cnA;U5j={= z@Hn2plXwbG;~6}Q=kPpUz>9bZFXI)wir4Tu-oTr93vc5cyo>knK0d&Q_y`~46a4%8 zycF^awfucv6#4tSVrEL2aQ!?lmE3lispWelKhH~}!XCOuv_ZDl!#tyDx-g=+MUD99 z{KXD4ttuGd&-v*>cV0Hk%S^A5p1QaE;h(4O>#6&B>i(X38&AEh|KFp`Aj^dKgm^{` z@;BT02bmdV+F@o=d)iX^cJdDJ^A5`68)*9HaTaiIV3l(>Gs`0Kw}x4)@>x}R&ls&b z`UiXac?Wx&*<`@W%r4(;$-Ql!wnnmO4pqp@%&CH@Y)u_zE)}pf_t0!DJan6vha10a z%q`>IUS=LuFzink^Q!W)ZQKUQA1iofb3UhNnv8~fMN9|#$jb7m%B)Y#uWE_pQwzwr zx5F%`3U+Xqg{)!$cI$8hz0ATg?J$d2QPURWp?SD@=<@fJMOFC_yE?WI?~owbu-$Ck z)rt7n9@%N6V%EOJ)xLJsGIpzBoUKwqMZL_D5_x=29cC#Nus!!OOIsU>Y%l-*cC(C% zI2%9?4mWw~x-LI3Y>~1Nis*6#1^f8h-g=qkRF++r?{*>I{B0k-%Ti!C->o0KR+AgDC#y@s4T~BwK&$wK85Yh8-P9KE$p%C*r)Q z^^p?oelUEBL6^LZs`pUQ*7JjN=*sF!VD({>u|MOw-wU0F{ z+NjE`Pi?DeiR4plGA>)Cohm3>#cmY~uv>>KTcy2BJIoGNG;FJMROLhL>ewQ*il>`j zaI4@RW`N9cwNIejc9@;y`@ikeSw+1}Q{p_*AQgz@NP}fuwoiyE=w^%RFuPa)|H0P2L)5SYd-IGc623fQ98rs+n=q{AF(1tT=w zD7DXjX}ZyBC)srFgKVT25Ir`My(rqUAk2=r9?#f-90xnbti_ezl`cX`=_TD?8{ z#wh33ZFF1t=HByMSEOZv`t($eijwH~mCz<0BR@wr@mO^@VQ0>`(9f6mS^4?SmyB93 z*VX?&UBjbNE*eJ17--F~@#;gZ;W$BkT4=jXlu3s<$qGhjx5;Xs|I%(#)K32o+HI=* z9692qsl$;YZo2wdc}28_f*f%(WZGfQw4%-t=WabI%u?k;?5cGmj<^i25jWe~caGZk zpAk1#MZL^<66cJVuL6&jdz^i1g^UPMut;o)7jf~5dU#kkrmS1NT3$R;F zB3pjFOgqdCRy6Ef->Axm*wy(*Xn9Y=FQk*XNoKfuV6)tIm|Nugzdf*3MZL^z5_z2R zA$+?EM7Z$nP_fWf-l>x69;%hM-YZz|6|MJ5&Uon4_%2yaHpFg8xSrdiDsk5NKfjmp zh_hE!W*zN5RZE1U-5*-B8WRUp(s@+Y`+q&!gR-#fjYAUE8;8}tavoUCBYWeBOgqe@ zRy1sH98={(?CP8%_J&)>cHR8_%;Pf4)gLG1w!=Ir-~a88Q!46Zo|ZVr%^4MlWJa8o zaoMitRKc(_;=C#!+E*7;vMQG;O@1fIQ$2ryA_WTOE0izH%e{7OIg~G|x_(qGM``B> zzNCWI-RiQcDv~3)BIB~nuBw8v&8}I+0_@f)$Tqt!(+=~76%E^FH&yu%yE@*8Z6^GJ z&08|=YOvdK+hN|3@BcQ~T^02*?@648d0z!0Im`z#E*tEjDj2rG9;x!q2D9$!|HE68 zHN79J%B;qDqH2lcQ=iJXY@BDRplqDyR}-TLlLA`v!I}qsoZu)?zInSjF8d$>o76YDaf7nk*r2 zD$%V9VyFtNd!Co+F1O{^=zDVEV1XAaA6|9z&6Cu7cLif>gA+DwA(O(5g`US>jxt9uj4ZHJjy?eK5+CQ)Jeq4JxW>|UX3 zk@Rj-RVG63CJWt99c*%|$hWY4n?i;>qqg$(_O&dzVu6WZCXIZ;5yWoH+(vcq2G3aN-{ zGY@01T53w>x+TbIV7+9%^^(0EL&oG7Ei6NFc}VT}?Lj%b2q_DnwF2 z#qbqWa#c`1w1Ubq=I=18NYvn}s`ikBOLemvT-8+CVOF=oYIMoDWR0#Gsz8Wcbw!B7 z^!S@EhrN|w)0wZ@J;YzFyUA~$DP3c&mW(>g+G;1~V|0YER!8M{nROM;v#V!qhHsww zDy9y|c|~cUl97+KhBC{ZTaEhmIWM@QE& zI7e21N=AMvfig>WM<<-mbsyj@n!wj;bVY?$(<%ihSx&G~rN3QM&v3BaB zc8at+x~d#6vzx+s4&ANI@ZHfv#X`HIr%FbC4!vZStmmo2bXfJsMb_TdM%YExJ}M%s zlZ*TRX_2*Wgd#%z*D`C{{jB=l+df#U+WoDR)%pXh&G1J)kb0QcgRCO*s%qUkZ12_5 z^I#csp8AkTDi}%y@_Mc=%fnRCTH$S?*0JR`VSj&fxC}bi+12BR?Sraygp7q*Sskg8 zUgjvZd$=JwT18}A$kwoi=-)qe{GW!P{HU<^u`yxmiR@1%GT(YdT*{HHD%XXqKSQPEQi~N1`)D#t6$r7bc9b`d*?;q0H*f#mHcn5vQ`$5SHN`=H3rtJ@^TEQwpU|dgVbSew4%--;QZ#Y zN#%#wRm-@pfS$4B61%m+AQ#$$%*`^(bxGVJw;kqI`5t+PZ&Nv5=5~ei)OJ{#;X8b% zipg%^!m>*x!=74bSMQctvL5+w*?S_XXD{{qhd*@6T$Nu749ndG;BSQ-R_t-|2$|%sOMSOdYsE#FXnz5n<&1fl_)U! zj&j(640#euuQuG$OPTet$BD%8Bh>ReY(45h;e`q`QGfGp6BwA^d?}NzQ~~opRNkzDv;|BeZT%z@H>(U{)DeUlh-`w_zSH-moYipghb6Y zL#k$*8s%!XMNw&o>1Kt)&bFv(hY-6OeR8%%`{9jDplws@nznH$Ms7I)WDm8smtAJh2TDzL8b~Q)sYHHfmj12R41N*;!9FQ%S zM(ytGy|mV5_})v$$1z#c%VfmKnnA{#6=aO0f=uBn$n2^h%w)~-w}PyZRFExv1=(E{ zgqf^4{#KAPk_vKB0nb0VRnj?IBM!GbGE2_cyb?8M^Qn?@&Z+^f=4^hIc9;dMaM(Fp zP!$NVt05=nT%o`Dt~p!SnXiVD%rEjc-!*58I`h>Kk@>~`<_nLg9zxF9;?8{4^{zQ9 z7rN!o;QTw7C1lWHmQ?##UlzhGZd*m%>#Bn8W+_=lt_ha5b|_=*kWD^#+1=$Zv2}CR zS5{`Q!d*_4k-x@LI6Jt4wHbbdR8%oJ(fB@5NhO^PYrQ-FR#!fBPFI!%oO8O0RolOF zy6WE&)gqK|Er?f_aXEHtsDfed*&eFAIu`jeI59-6qr7$9U7h~F`_AF~ygcQc=EMA0 z01ILvER034C>F!wSOQC8DJ<>$u2WMrjP+fomTHj*57xC^t>nBS*HOv;)?;;L0r_30 zo*_jSgRHGkk0MxemiQI{anQYQ4}$n>I4!?2WdO>~9Odf4hJ8x66>c z%(j=P@zg=>AjgyHWi_5UsR?d;sS0$~MvWIV52L5+ujQinOniaLjb Ub8&yL$`7%twsT!yLPGNVfBy>S&;S4c literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/legacy_pickle/0.19.2/0.19.2_x86_64_darwin_3.6.1.pickle b/pandas/tests/io/data/legacy_pickle/0.19.2/0.19.2_x86_64_darwin_3.6.1.pickle new file mode 100644 index 0000000000000000000000000000000000000000..6bb02672a4151c8d6536127fc94e68634e56c86d GIT binary patch literal 125349 zcmd?y1y~ea!|?H$Gk|~?*xiAJ-GySK*exQlxU7_f-Hoojc6WDocXxMpcYQOv{{>X^ z_V#(+_xrBvJ6`UOVRmO{PoFbqmR>ev@=uf{Ep#`z80{i-;eomcllHT1Kv1Ypq{)~p z%qPU(C&JY)G+gJ(exqGO%M(YN^7OVm+uPqK(#K>>8TZ+NA(6W95TBq3^(YMkL)r%E zDhGx7wX5S3;?q_aZZf~v#mi(&&Wi#={B@ml5w5;I5jvC6(K|%f(YvPhJN2o?6hAyE z+$W^1_N)e))@NxaZWH1YtTTBUZ6hLm!Xr%{lF=q2GBnKOVXOV1Q-6@72ACR{eAIVN z84?v7*4gsef}?^W1AW57eL9mS)UOsBm>1e8*r;~`u2D@M7T$mW1_`at}eY$$idS`tK^<(N&dFoSpNcuD$w)(WzPt@IHv@7IVxR`5U zbE4SzsIh6%CpOyoYVjUn@-qM0u7gidl(r+6(LT6sxVO4bO(A>Vs6aIWLew8^BZ30` zw9lZPzOCg0MEOKTsPAvI@m4>c`YzUt;NTssi}W$4y?Sb?eALs5@--@I%A53dmb{^N z&^xI=sR{kNQ&;z9)@Re=RG%YmjOH_*;4n2g2ir^1u%m}3_H*|1)Z*Bv1cZi~s*}Sc-JfJI zYt$HGqo(~IM2(14^V?4uBBoGBTJn>txdu>c3vO^So(l4*xkvx(6r zAY9kpWK0#4PUf1_H8da~LZ?QOQK_O(IF%>JlH2Fu#3(VZM13E(O;{g zjS0LhFH*~{8UyC2QWIQ@F@If zJgxO%{BhXS92=HAW-U&FET^Ci+M*pMKsD!?g9BQN;V8XUCF#e;|ISnX=h8txM?atX zX_1GlUmUGp(%qzAre7HwXZkg21nJijJL+C)h2gPTe=}L%@w-~``&vN+2gZ~2_0BPw z-<;{yX3yLJsV!O^ts$~zdM$(71?f8I)K=6>jVb4toMU~N+A^tG$WMJ?&5+0v#p1j$ zzGxL{p->Yv>5mC%$(}|fG(=}IX9u;izV(ZwQ7p$u^Q*+S9xJkAsO4BX^&|NBT0cT& zV^T7|xldtf?GsfC5AC80@vKrQPV@S6Q&2c=ldA6!UsIrFL0Oi~=a5pocG`jP&P?$<>_Ywz>_ar>RrO?xj(`+Y@= z*K6j~{AC>fKArTB{yS-;e`7w5PXAszo8Ly)zl>BvA~l*l4T;r9?a;AN8C#An#*`yN zO7&@f(26MjQDO0xC4Ca3?LTCEExDq!*3u|fG8yH{>VJ3je-)FVf%@iJ+ov_+bPRv4 zj{Z8e4Ib*@yQ@(UPaPL?PUdqkG*q8yXrw(f)*gO3ub8Ma)KvG!#$GFJ>z}(Pil5q_ z|G5$VsaK}Nb|}>@a_YEcRXu{PZKzt?_yn1Z3Ekst#k5%V-sX~|_Jg903B9c^(7Kz> zzl#d3PO&79;_DNxHiN!CT}=8Oanr?U=j|8juN~>9o}&6{%C#aSbJ>rIx33)Mqs2yM z)OV5DJvK6x-_`2=e-xK-I_F;ToVS(%)J9Gn1N>QMBeo7np=GJ~#{jfbhMC`et_885 z>-#?a_pOyWe(_ej1b>iCzW->yZpdRkV-}ywrBkaNr!32ZkLonPef3?f={e$^P&-ap zrernNKWptM{dogzvRH}ow8>LH*XUS3N*(L! z!mEb($MhZbmVS*fk$D`f4!`S!hD7R39!^?M$M9DDEFMk~QNA9XwEm&tgS!30!@n61 z818%*Q;+{BrlQnIhH!1LuH|LL=p47EvFtE9s2zTvYUU?dYZ!Gr>1cF{cP2sY9{sM4 z(b`mVbjDVSoy=XGH%WoU~7DJ(g7M zl*cv-8MQ4-qac2$$BYizh{+Pe`f6S#wU=AN;HW;|I$$>>wrr?l_Wv+S>;5K6;~Q64 zBUsHp-v#TodS2%6tL>Ny^1lsyvrycm zMWH!(n_k$qjX3bvdf zC^rA9Qyp5hA7$AQZrve!SnCSQj!xDciigd2ujpajAv~B(i?E}@AG7M55EG0>8eFDSPSxtsgzdJ@u?^OTDXd9^y*Xc~^*wCCaw7F1q zs@R{A?tg70B_v_)$m+fyNf}P6F>kn{K3ImP|DDoq8J+&Bl*U)u)e|s>ALjtHQu@DN z!s6ulZCcJ$t8DW8D{a$KXMdmPKUjCf=K1(@u%W7!ZT_x#jLA2#J&vD-bw7>2VmsDq zME;y6ilrtWRvOmB74$}cp?^p}YgQ*Sr;p(gz)Wkk%#SWOv2H}&b3 z9+RP``eU@Am-_hoCL*>{)9Rw{hqnf!_N$*7ZuKP$ZG|jprAr$I`}pa#UbA|w%Rew! zo#s{h@#Znj?-JyfA2UuWRQoO!8pWo9we0*N6%uKwpk62Rwp=v)%Y=xH9W5oy(;>g@ z^Tdwke^m=AYM)1~z0ybo^|0okmVI;3;`49PF}7UCv?+g-m_N@`7<6hRtK+e@D&}|?xa7J=TK_~h^-?n{eqF!9qKvLFNnYLG&-2CiFs=icYm2H{@o=l#<^XK_p3zi8d@W1+Zi-SJ~*`L1<`@UZg` z=6?4tN{f>^`(`OHkzFGGw!p-GEQ{TYFsJK^cWU7-iu{P(Y{TMUn_rIT?h;k$ozl|U zW_+E}YStaGc`N>$qE4Nuvt_!V_@>S*JB+sV%uk4&Hv5ZN9rdb8q%K&!UShVgdHig> zOrlS3NUPQ%YP`nNX;IgWs2N?mJ~o}E7OcHgtCAZ#(7N8mYV2&8<$3RWTKu=Kw;spZ z3$`BTU(dots7YjqR3GA2BXPP=L0S^VOc`~xCa4rMvbJ`itSJ~XYh)Q{=+#&J`+2z7 z4oOUz|2G4TpIXiDdb&TSuQ~>?rtfpo4}MISAJfqq|Nr?Y#PWu&ZWaq+VJw10u^1M| z5?B&TVQDObWw9KV{~sCsn7+$2^I|j2Potj^+CJ+%qhY(+vKam@$HcZ0<{Yg(#LdyM zr91vUlQsE&F)99IHn0|RYj&_|EtcOG$DcEtI(=z=$N1)qf9%vt1ncjus+HAu!TNix z75NvfgBDY9=iC0UxAWu6;OS^Ph=)W>i`%^B)K8|zwD z>*6%)rMHBZOK<@#Ee(w`*S?6nBFlJdHsPC$+5VTy0wyY2|M}^^q8Xum9G-~4i$%?5u?=4fy z`m@>uMYyhQ5pCh;A5PZBUI2+Rur?~~qdL*|FwcMf!HjK8^=4jrqLy%dD)X$V`K7;F z?_wV_*P_OQEDND1=IH)uVNBd7s_Tfff96``_j9!o>Pn+9Zi<{+@d9_xwde8TkEo02pZOaL%OWDkqcdaGNBgxMV zf|kD8+Tdc}EeMNuZLpe_lH{cRCoO-tGNvBM5UJ+vXm@*UwW3zcv?prAv;peBc-N7{ ziO|@qEnezDVtamVwJl36p5(jhuSUm!z@Q*+^CIlnIMAmuCh#R|s23227@EcwH#OM} zE!0h8GE!Vy;~7t~|GI`vVu`K4TMDbrGat)zpwb)lN?fMpUL!PvE4{p`x%uRP0&i6l z`ls)v`9Af9w(nG{F9=?%Ikt?jeXn}w(Ab8W#k}&()XeJqVCVYK=h#<|98>!KB-XWq z$A8*!V%_@LVp`*PW8K=PU z%a|x;cRO{=tj=_r@0C;6#6=j|tFf)Vtf6Zby^C^y`oHaWld8YC2&^85i@*ugyA~81TRW}fsJHK^hh&%>Q(#I=g{d(Orp0ua z9y4G@%!JmDo`vnKm<_XI4$O(UFgLnj9?XmRFh3SRS1gFunzk_8MX)Fq!{S&1OQN-% zEX{TqEQ{r^JXXLT?lhGu@%zf?j#aQKRzvI6L+hBZCcm$R9#|XepeNQv>l9*rwi{qW zY=n)m2{y%M*c@A6OKgQ+*c!dD4f>!j`k_DSFaX=49s|*U?Jx*~F$6;~4BKNkMqngH zVF&Dpov<@@!LHa1yJHXRiM_Bl_QAf`5BuW)9EgK38VBPLG~!SkhQo0Lj>J(o8cjF` z$Kp5~j}verPQuAJ1*hUPoQ^YaCeFgyI0xtAJe-dUa3LSeNC+@=CxCi&*KHQH7@E{(-!*~Rb;xRmqC-5Ym!qa#L z&*C{ej~DPFUc$?G1+U^YypA{UCf>r^cn9y|J-m+(@F70J$M^)F;xl}XFYqP4!q@l) z-{L!bk00EK}L0*nK@`TN@~FdZP5-BpglUEBPK*A)b0q>f_6`! z7LuU#UQ}nclVNg9fhjQ+rp7dw7SmyR%zzm&6K2LNm=&{OcFch}F&E}W7tDisF(2l~ z0_chbu@Dxt%*9E!tmIF7)PI0{Fj3CG}A9Eam^0#3w9I2otl zRGfy>aR$!BSvVW#;9Q)C^Kk(##6`Fmm*7%dhRbmUuEbTi8rR@jT!-s%18&4kxEZ(L zR@{c$aR=_iUAP0#-yftb~=(9jjnftcKOG2G+z{=z+Dd4tio;tcUfn0XD=& z*ch8&Q*4IKu?4n7?JhVicwuYw#y04KzUYVksKWqki+T)11GYo$wmU5ZV+e*~7`Df7 zjKD~Y!VcIGwVU*`&>6d6SL}w}QM-3f3q7$H_QpQg7yDs<9DoCH5Jux*9D+t1ioY>oQBhJ2F}D;I2-5ST%3pVaRDyGMYtH3;8I+M z%W(y+#8tQ&*Wg-QhwE_zZp2Nv8Mok8+=kn62kyjOxEuH2UfhTK@cNB9_@;8T2t z&+!Gm#8>zl-{4z(hwt$Ne#B4s8Nc9H{D#)cW3pnY7X&J3zCNn%vSr&26QDghpd%(k zCrpHiF$pF`XH164F$Jc?RG1pA-$Rv_?R1zPGoW^hp%yY>X3Ti(0EQZCg1eU~7SQ^Vw{^H>`w}(H*N`Rn%@0 z)IxQvfitTItfDKW*4N(h?u?aTCX4o8CU`uR;Uf3GFu?_m5FZ!WB zYIo{sApo^o_OzhKKr~=G48mXx!B7mt_85*47>QBX0Xt$R?2KKoD|SQe<~}X-z@FF( zdt)E$i~X=a4#0sp2%~W@4nZRh#bG!cN8m^tg`?4gV{j~v!|^x)C*mZWj8kwbPQ&Rq z183qaoQ-pEF3!XGxBwU8B3z71a49as<+uV@;woH?Yj7>D!}YiUH{vGTj9YLkZo}=k z19##s+>Lv1FYd$rcmNOLAv}yn@F*U`<9Gs3;we0hXYeeZ!}E9nFXAP8n z18?Fjyp4D8F5W})A{M?oF8=HHmdowha@8XLxLmdJ@dR2Sl_eo5>WUp(b$L$$$zF1h z9MuczPU=PU#8MI|spKprlafm*q?A%BDYcYFN-L$4(n}enj8Y~kvy?^3DrJ+hOF5*R zQZ6aCY9|Fr!BU76DuqeyrEn=iij<G)x*UjgUr4qomQ2Ng5-K zmBvZqr3unRX_7Qqnj%e=rb*MK8PZH?mNZ+MBh8iON%N%z(n4vGv{+gqEtQr@%cT|4 zN@5_C=x*}bbu1VLW8`4ebmULUXBi)tmN%y4( z(nINy^jLZ#J(ZqG&!rdAOX-#LT6!bBmEKA3r4Q0a>67$X`XYUm)C)7}H&_Z;k!@sK z*-lO%+sh8JqnuE7k`u{^Yd)TC@(OvSyh>gzuaVcv>*V$F26>~rN!~1Pk+;g*Vc`KEkJzAfL8 z@5=Y&`|<<%q5MdGEI*N-%FpEI@(cN;{7QZ;zmea{@8tLL2l=D?N&YNqM2weT8NgS zmGBa+g|}!Ue1xy?6aGRc0z_M(7lFbc+KC_$EJ8%62ovo^xQGyuB1&`+9YrV6S#%Ly zMK{r1^bkEoFVS1{5q(8J(O(P@1H~W_Ee4Ar!YGD{VPd!#Ax4T(Vze-cF=DJ3C&r5j zVxpKNCW|Rzs+cCGiy303m?dV5IbyDuC+3R>};-PpX9*Za9sdy%yix=Xh zcqLwoH{z{$C*F$>;-mN^K8r8nt564Kima$NpC~qptzxGnQ0x^4#ZgJ9I4Oyg#7Yt+ zsp708Q<5twl$1&;CAE@9Nvot&(kmI1j7laYvyw&0s$^5LD>;;$N-ib0;-chH@+$e1 z{7M1ERVk`Qbnn%R8y)e zHI$l4EyY8rt<+IGmAXnjrM}WYX{a<(8Y@kdrb;uVxza*uskBnOl-7#3(nj%7d=)>% zU(qQ6N?S#*1S$rlof4!3D@nfN@t~u(pBlEbXR&PJ(XTc zZ>5jYSLvtpR|Y5pl|f3hGFTa+7?q*QFlD$hLK&%yQbsE#WsEXb8K;a_CMXk?Ny=np ziZWH1rc766C^MB=%4}thGFO?W%vTmD3zbF6Vr7Z4R9U7hS5_!1l~u}WWsR~{S*NU5 zHYgjFP0D6vi?UVOrfgSsC_9y1%5G(kvRB!s>{kvb2bDw0VdaQ&R5_*`S57D=l~c-T z<&1JxIj5XgE+`k3OUh;CigH!Crd(HUC^wZ`%5CM2a#y)$zKe*}dPR%;`_+0$b6ZyH z^(tkZvX|5jf+hSgh&M0t)%Mu3-ww^a3iW4uwjIzB6QUC)!o-*alcF;w!{nF(Q(`Ji zjcG6~ro;4@0W)GI%#2wuD`vy&m;-ZSF3gQCmKFp5=&=m_}AuNnVuqYP8;#dMp zVks<*Ww0!k!}3@GE20}#!pi85Rj?{n!|GTAYho?*z}i>`J+UspqpiecCu!!ZIQF$z0iN9=^1 zu?u#^ZrB}rU{CCYy|EAW#eUcy2jD;)gwZ${hoBLM;xHVJBXA^+!qI5LF*p{-;dq>Y z6LAtw#wj=zr{Q#*firOy&c-=77w6%8T!0I45iZ6hxD=P+a$JEcaTTt{HMkbn;d@fE(tH~1Fc;d}gm zAMq1@#xM94zoE6^mu1T@1S)8QwrGb5&>kJo5fh>lCc?y+1e2mOCd1^I0#jltOpR$U zEvCctm;p0lCd`akFe_%m?3e>{VlK>$E|>@NVm{1|1<(}>Vj(PyMX)Fq!{S&1OQN+4 zSDNiISQg7+d8~jH(G4qMWpu|XSQV>bb*zCku@-t@ZLEWySQqPIeQbaYu@N@LCfF34 zVRLMOEwL4PVQcipHt2)C=!gEO!vJiHdJIGZw!+E7=e)(g&nXXcEZls z1-oK5?2bLKC-%bL*a!P!KkSbKa3BuCXdH|~(1=5E7!Jn~I1)$UXf)v%9E;;{JWjxg zI0+}?6r76Fa5~PwnK%n);~boe^Kd>cz=gO77vmCKipy|0uE3SJ3RmMAT#M^)J#N5_ zxCuAo7Tk*4a69h6owy5k;~w0L`*1%Vz=L=Q591L$ipTIcp1_lM3Qyx1Jd5Y>JYK+y zcnL4#6}*bq@H*bWn|KRv;~l(<_wYVGz=!wXR%59Y;um>&zED;C5;SQv|7Q7neVu>_XHQdkM;-v*baj*7(*}=!>~PuV+2NG6n4Ol*auVK^K|;7A;WqtS$8a4e3)@i+k|;v}4m zQ*bIy!|6B!XW}fJjdO4=&cpe*02ksST#QR_DK5k1xB^$=DqM|ga4oLG^|%2y;wIdT zTW~9G!|k{Ocj7MGjeBq}?!*0f01x6JJd8*1C?3P(cmhx2DLjp5@GPFg^LPO-;w8L{ zSMVxc!|QkhZ{jVyjd$=a-oyL&03YHbe2h=_xJ%n;wSu! zU+^n_Lv>YvrTs^N3fiD8+F=5;M+bDogy@8cFfk^Js)Gh-IairFwb=D?ho3v;6j=E1y}5A$OIbj5;L2n%BoEQ-ajIF`VYSPDyH87zzC zusl}4is*)wurj)16|9QYusYVjnpg`xur}5~Pppgeus$}xhS&%jV-swO&9FJPz?Rqw zy|6WUV;l5AU-UzN)L{U&MLhl^aE!o6jKU7s5j$aL?1Ejf8+OMY z*b{qUZ|sA8u^;xw0XPr`VKffLA!x**I1Gp52pox{a5S25435QdI36e9M4W_^aSBew zX*eBc;7pu_vvCg2#d$a%7vMr%go|+rF2!ZI99Q5rsL98cg$JcXz644%bvcpfj{MZAQU@d{qW zYj_=R;7z=RxA6|%#d~-kAK*iLgpctFKE-GF9ADr|e1)&^4Zg*9_#QvtNBo4J@e6*% zZ)m;1FWXr90Rk1YL0h!L1Za;A=!gl?2@_#rOoB<#8IxghOo1se6{f~Cm=@Dvddz?s zF%xFSESMFuq4pK~TF8MpF&E}W7tDisF(2l~0_chbu@DxzSs}@;{Y6pgD@Hg;}A6BP#lKCaRiRUQ8*e+ zI0nb!I2?}?a3W5^$v6e4;xwF&GjJx(!r3?n=i)q^j|*@iF2cpQ1efA6T#hSnC9cBN zxCYnaI$Vz%a3gNQ&A0`(;x^olJ8&oN!rizB_u@X>j|cD|9>T+T1drk|JdP*uB%Z?4 zcm~hnIXsUS@FHHq%XkH^;x)XEH}EFj!rOQU@8UhYj}P!6KElWN1fSwFe2y>hCBDMf z_y*tNJA98H@FRZ0&-ewu;y1J|?2~OR^@TtMZO|6&Fag@513F?tbizcK7?WU9bjDqLqPRxb5(FOBhUd)I2u>iVaK`exY zu?QB$VptqYU`Z^6rLhc_#d264D_})*!%A2g-LVQ*#cEg`YhX>Rg&tTN>!2sr#d=sD z8(>3hgpIKYYTswBg=W|swJ$c;LQ8Cg+PA4|p*4DA8}vb6^h1BtVF0#8JqDry+hGs} zV+e*~7`Df7j6m(Hw+=|<9JMO@pxC?jV9^8xja6cZvgLnuJ z;}JZH$M86wz>|0iPvaRpi|6n>UcifZ2`}Rnyo%TGI^MvWcnfdi9lVS8@IF4khxiB| z;}d*}&+s|Ez?b+6U*j8mi|_C~e!!3T2|wc({EFYu{B_@I{UY0$s~Jh4f;MQ2cBp;V zvKH*o0Ua?RI$4aJ93wCiqp$;Z#7@{5yI@!BhTX9T_QYP;8~b2i z?1%kv01m`K7>$E*2pVxH4#VL%0!QK~9E~O%gJW?Vj>ic&5hvkfoPtwv8cxR-(1U=A`0 zRL}-((GC-!`Hx(wM{r=<5fh>lCc?y+1e2mOCd1^I0#jltOpR$UEvCctm;p0lCd`ak zFe_%m?3e>{VlK>$E|>@NVm{1|1<(}>Vj(PyMX)Fq!{S&1OJXT3jb*Sbmc#N`0V|># zR>I2Yj#aQKR>SI818ZU}^uXF!2R*Sa*2DVP02^W>Y>Z8?DK^9A*aBN(EA+zF=#6dA z2Yt~G{ZWSj*cSB|hz4wjK^Tl77>Z%o9>XyLBQXj)U`OnPov{mc#ctRgdtguOg}t#4 z_QihK9|zz-9E8z07>A${hvG0Cjw5g+j>6Gs!ZA1&$KiOKfD>^NPR1!X6{q2JoPjfO z7S6^wI2Y&Pd|ZGFaS<-YCAbuq;c{GoD{&RB#x=MW*Wr5HfE#fWZpJOR6}RDb+<`lB z7w*PAxEJ@~emsB&@em%yBX|^#;c+~HC-D@X#xr;p&*6EzfEV!+UdAhU6|doSyn#3I z7T(4?co*;CeSClq@ew}8C-@Yf;d6X}FYy(=#y9vD-{E`wfFJP_e#S5O6~CeN27cMz z(hm@*pbgrh9VS3~bU;T;h)$RY6Jrugiq4n}lVb`@iK#F(roptB4%1@>%!rvVGiJf8 zm<_XI4$O(UFgLnj9?XmRFh3SRS1gEyurLth3Kh>fr@Ho>OY44Y#MY>BPV3tOW%wm~2C zML+aM9R^@q)MFqTupI_rFos|#hGBaQ#|VtXDC~e8u@iR2F4z^jVR!6-J+T+|#y;2= z`(b|^fCF(5M&n={f<_#Q!*Do`z>zo#N23YH;8+}o<8cB`#7Q_Ar{GkahSPBd&csv02a#7(#vx8PRXhTCxm?!;ZV z8~5N|+=u(|03O6cco>i1Q9Opn@dTd4Q+OKB;8{F}=kWqw#7lS?ui#a@hS%{1-o#sY z8}Hy_!ytyQ+$Tc@ddubSNIy=;9Go$@9_hE#83Dczu;H=hUzx~TE_n< zP(d5CMLSG@_UM3)m=K*X5hlhYm=v8c879XRm=aTAYD|M^F&(DI444r!VP?#NSuq=C z#~hdwb75|D!9183^I?80fUZ~&3t?d_f<>_y7RM4;5=&ueEQ4jS9G1rlSP|W@5>`fc ztb$ds8dk>|SQBfZ2iC?q=!tc)9@fVO*bp0GV{C#=u^BeU7T6M7p%=DBZ)}4;=!<^n zk2(y%wy4KIG+;Xn!e9)+Pz=NN7>*GbiBZ@AJ7Op7j9suRcEj%21AAgG?2Ub}FZRR! zH~|*eX2%?u z6LVp1biq8B7xQ6$EP$?95DQ^pEP_R`7#7D8SQ1NNX)J?fu^g7i3Rn@{uo6~AcdUX{ zu^Lv#8dwu+p$FE+I_QaYu^!gP2G|fAVPkB9O|cm^#}?QUTcH=WMsI9`KIn^n=#M%K zz_zHzKr~=G48mXx!B7mt_85*47>QBX0Xt$R?2KKoD|W-~*aLfFFYJwdurKz*{x|>! z;vkI1!8insI24EBa2$anaTJb56OO^LI1b0-1e}PIa57H8sW=U%;|!dMvv4-f!MQjO z=i>rgh>LJBF2SX^442~yT#2i2HLk(6xDMCj2Hc37a5HYft+)-h;||=3yKpz|!M(T- z_u~OPh==en9>Jq{43FapJc+09G@ik;cn;6w1-yut@G@S(t9T8s;|;utx9~RJ!Mk`5 z@8bh}h>!3wKEbE>44>l*e2K5{HNL^O_zvIW2mFYi@H2kFulNnszXD)s|52cVHfW1> zm;mk30Ua?RI$Yx?v@(jP6(kt70{* zjy13*)ta2uj}5RPHp0f(1e;q9kCALB@Y>nR727S;M{m>tE7=Ue2 zkAY~wb{K@g7=ob~hV3yNBQO%9umg6)PS_c{U{~yh-LVJu#9r7N`(R(}hy8H?4#Yti zje~Ir8gVEN!{ImrN8%_PjV2s}V{sgg#|bzQC*fqAf>UuCPRAKI6KCOUoP%?59?r)F zxDXfNVqAhtaTzYh6}S>t;c8riYjGW}#|^j-exUdJ1F6K~;dyn}b~9^S_X z_z)lAV|;>7@fkkH7x)ri;cI+@Z}A(*C1B1#Qq4?Jxn_ zqXRl(LUh7Jm>82_Qgp^-m>g4JN=${RF%720beJA9U`EV@nK27y#cY@zb6`%)g}KoM z^I%@ghxxGpx?({rgoUvP7R6#%97|wHEQO`943@=mSRN~2MRdbTSQ*{13RcBxSRHF% zO{|3;SR3o0C)UM!SRWf;Lu`bNu?aTCX4o8CU`uR;Uf3GFu?_m5FZ!WB>M#J?q8T033*eFd7Hr z5H#XY9EQVj1dhZ}I2uhj2FKz!9FG%lB2L1|I0dKTG@Onza3;>e**FL1;yj#>3veMW z!o|1*m*O&9jw^5_uEN#02G`;`T#p-YBW}XYxCOW3Hr$Roa3}7<-M9z$;y&Du2k;;s z!ozq3kK!>rjwkRWp2E|32G8O-JdYRfB3{DFcm=QGHN1{D@Fw2E+js}>;yt{N5AY#A z!pHaopW-uojxX>fzQWh|2H)a4e2*XSBYwiq_yxb>H&p)upr!psfePB7E!trMv_}VY z#DwUCi7+uH!KCPn$uK#lz?7H@Q)3!Ti|H^uX26V?2{U6B%!=7CJLbTgm4UGm9R3pV->85)v!9&z?xVK zJ+L;`K~JoU^{_rRz=qfe8)Fk}ip{V&w!oIy3cav3dSe^(L0|Mkf7D?BwnaS#q5<1s z5C&rihGH1D$8e0mNQ}Y`*bzHnXY7Jqu^V>B9@rCmVQ=h%eX$?*#{oDH2Vpc0#vy3L zp*ReO;|Lsyqi{5ua14&caX20);6$8+lW_`8#c4PlXW&eng|l%E&c%5+9~a<4T!f2p z2`Lkg}ZSN?!|q$9}nO`JcNhw z2p+{_cpOjQNj!z8@eH2Db9f#v;6=QIm+=Z-#cOySZ{SV5g}3nz-o<-(A0OaDe1wnj z2|mSV_#9v0OMHc|@eRJkclaJZ;79y~pYaQR#c!zo{a;J_j{+66L0h!L1Za;A=!gl? z2@_#rOoB<#8IxghOo1se6{f~Cm=@Dvddz?sF%xFSESMFuVRp=cIWZUJMi(j>fsq)69k3&I!p_(QyJ9!&jyw+=|<9JMO@pxC?jV9^8xja6cZvgLnuJ;}JZH z$M86wz>|0iPvaRpi|6n>UcifZ2`}Rnyo%TGI^MvWcnfdi9lVS8@IF4khxiB|;}d*} z&+s|Ez?b+6U*j8mi|_C~e!!3T2|wc({EFXDN<#aO0u{7DTeQOjXpau)hzZdN6JcUZ zf=SUClVNg9fhjQ+rp7dw7SmyR%zzm&6K2LNm=&{OcFch}F&E}W7tDisF(2l~0_chb zu@DxtTItfDN$`HpV8{6q{jlY=JGY6?$Q7^u{*mgTCm8{;0zMY>RpfL<6?NAPmM348<^P zkKq`Bkr;&?up@TD&e#RJVmIuLJ+LSC!rs^i`(i)rj{|TZ4#H?0j6=|fLva`m#}POZ zN8xBR;TRl?<8VAqz==2sC*u^Hiqmj9&cK;C3uogToQv~tJ}$t8xCj^H5?qSQa5=8P zmADF5;~HFx>u^18z>T;GH{%xEira8I?!cY63wPrl+>85gKOVq?cnA;U5j={=@Hn2p zlXwbG;~6}Q=kPpUz>9bZFXI)wir4Tu-oTr93vc5cyo>knK0d&Q_y`~46MTx#@HxJ~ zm-q@_;~RX7@9;f-z>oL|KjRntir-L5O8buj6|_NHw8I2wj}GXF3DF4?VPZ^zNzoaT zVRB4?DKQnM#x$4~(_wndfEh6pX2vX-6|-S>%z-&E7v@G6%!7F`ALhpb=!ylg5EjNF zSQLw4aV&u)u@siZGFTSNVR@{8710eVVP$m3Dp(b(VRfv5HL(_YU~Q~}o>&*_VSQ|X z4Y3h6#wOSln_+Wofi1BWdSPqy#y04KzUYVksKWqki+T)11Gd8;48{-)#V~A-;TVCD z7=<0MBX+{h*af>{H|&l*uqXDy-q;8GVn6JU18^V?!e|_fL(qsraTpHA5jYY@;b=7B z7#xe^a6C@Hi8u)-;}o2V({MV@z?nD;XX6~4i}P?kF2IGj2p8iLT#CzZIj+E!xC&R} z8eEI(a6N9ojkpOn;}+bC+i*MXz@4}YcjF%1i~Ddt9>9Zm2oK{CJc`HgIG(_hcnVMB z89a;U@H}3?i+Bky;}yJ$*YG;tz?*mrZ{r=ji}&z8KEQ|g2p{7Ue2UNTIljP`_zGX+ z8+?oJ@I8LOkN62c;}`sj-%xU<{YQZc+Mq4kVFI*A2Xw@Q=!A(dF($#J=#0rQIi|pr zmltur+#P8}vb6^h1BtVF0#8JqDry+hGs}V+e*~7`Df7jKD~Y!VcIG zJ7H(+f?cs2cE=vr6MJEA?1O!=ANI!qI1mS6G!DigXvCp742Rj>T~} z9w*>LoP?8c3QomoI2~u;Oq_+YaSqPKc{m>z;6hx4i*X4q#bvl0SKvxqg{yH5uElk@ z9yj1d+=QEP3vR`2xE*)kPTYmNaS!greYhVF;6Xfuhw%s=#bbCJPvA*Bg{Schp2c%` z9xvcUyo8tW3SPx)cpY!xO}vG-@eba_dw3ro;6r?bkMRjU#b@{&U*Jo8g|G1qzQuR= z9zWnm{DhzJ3x36KC?%u)M}Z33pe@>A0<=d5bi{<{go!XQCc&iWjL9%Lrofb#3R7bm zOpEC-J!Zg+m;O(V-YNh#jrS*z>-)B zOJf-friq)_>*1(!r3q7zl)LgW zIkv!-*b2R{HF{$k^g&)9w#RUcz(|b34%iVpVQ1`u zU9lT>#~#=ddtqs24#pv9#GyD0hvNtwiKB2dns5w`#c?4UGm9R3pV->85)v)^iVW}QtMcV-c3U3vA_gveyZQHheu5H^s*S2ljwr$&* zH#4h$zNAT$rcJxBB$mR`SO&{tIV_JAup(B%%2)-fVl}LeHLxbu!rE8|>ta2uj}5RP zHp0f(1e;q836kB2|Y>jQOEw;n<*a16YC+v(}uq$@M?$`r+VlV8CeXuX~!~Qq` z2jUe**FL1;yj#> z3veMW!o|1*m*O&9jw^5_uEN#02G`;`T#p-YBW}XYxCOW3Hr$Roa3}7<-M9z$;y&Du z2k;;s!ozq3kK!>rjwkRWp2E|32G8O-JdYRfB3{DFcm=QGHN1{D@Fw2E+js}>;yt{N z5AY#A!pHaopW-uojxX>fzQWh|2H)a4e2*XSBYwiq_yxb>H~fx2@F)Jl-}ndr;y<*a z(tmW&MGpl^RH)I%01bvta2uj}5RPHp0f( z1e;q836kB2|Y>jQOEw;n<*a16YC+v(}uq$@M?$`r+VlV8CeXuX~!~Qq`2jUe**FL1;yj#>3veMW z!o|1*m*O&9jw^5_uEN#02G`;`T#p-YBW}XYxCOW3Hr$Roa3}7<-M9z$;y&Du2k;;s z!ozq3kK!>rjwkRWp2E|32G8O-JdYRfB3{DFcm=QGHN1{D@Fw2E+js}>;yt{N5AY#A z!pHaopW-uojxX>fzQWh|2H)a4e2*XSBYwiq_yxb>H~fx2@F)Jl-}ndr;y<*a(SLN% zMGpl^RH)I%01bvta2uj}5RPHp0f(1e;q836kB2|Y>jQOEw;n<*a16YC+v(}uq$@M?$`r+VlV8CeXuX~!~Qq`2jUe**FL1;yj#>3veMW!o|1* zm*O&9jw^5_uEN#02G`;`T#p-YBW}XYxCOW3Hr$Roa3}7<-M9z$;y&Du2k;;s!ozq3 zkK!>rjwkRWp2E|32G8O-JdYRfB3{DFcm=QGHN1{D@Fw2E+js}>;yt{N5AY#A!pHao zpW-uojxX>fzQWh|2H)a4e2*XSBYwiq_yxb>H~fx2@F)Jl-}ndr;y<*a(|>f(MGpl^ zRH)I%01bvta2uj}5RPHp0f(1e;q83 z6kB2|Y>jQOEw;n<*a16YC+v(}uq$@M?$`r+VlV8CeXuX~!~Qq`2jUe**FL1;yj#>3veMW!o|1*m*O&9 zjw^5_uEN#02G`;`T#p-YBW}XYxCOW3Hr$Roa3}7<-M9z$;y&Du2k;;s!ozq3kK!>r zjwkRWp2E|32G8O-JdYRfB3{DFcm=QGHN1{D@Fw2E+js}>;yt{N5AY#A!pHaopW-uo zjxX>fzQWh|2H)a4e2*XSBYwiq_yxb>H~fx2@F)Jl-}ndr;y<)v(0_E$MGpl^RH)I% z01bvta2uj}5RPHp0f(1e;q836kB2| zY>jQOEw;n<*a16YC+v(}uq$@M?$`r+VlV8CeXuX~!~Qq`2jUe**FL1;yj#>3veMW!o|1*m*O&9jw^5_ zuEN#02G`;`T#p-YBW}XYxCOW3Hr$Roa3}7<-M9z$;y&Du2k;;s!ozq3kK!>rjwkRW zp2E|32G8O-JdYRfB3{DFcm=QGHN1{D@Fw2E+js}>;yt{N5AY#A!pHaopW-uojxX>f zzQWh|2H)a4e2*XSBYwiq_yxb>H~fx2@F)Jl-}ndr;y<)v(tmW&MGpl^RH)I%01bv< zI1G;wFd|06$QT8qVl<47F)${^!q^xG<6=CFj|ng#Cc?y+1e0PiOpYlqC8omEmta2uj}5RPHp0f(1e;q836kB2|Y>jQO zEw;n<*a16YC+v(}uq$@M?$`r+VlV8CeXuX~!~Qq`2jUe**FL1;yj#>3veMW!o|1*m*O&9jw^5_uEN#0 z2G`;`T#p-YBW}XYxCOW3Hr$Roa3}7<-M9z$;y&Du2k;;s!ozq3kK!>rjwkRWp2E|3 z2G8O-JdYRfB3{DFcm=QGHN1{D@Fw2E+js}>;yt{N5AY#A!pHaopW-uojxX>fzQWh| z2H)a4e2*XSBYwiq_yxb>H~fx2@F)Jl-}ndr;y<)v(SLN%MGpl^RH)I%01bvta2uj}5RPHp0f(1e;q836kB2|Y>jQOEw;n< z*a16YC+v(}uq$@M?$`r+VlV8CeXuX~!~Qq`2jUe**FL1;yj#>3veMW!o|1*m*O&9jw^5_uEN#02G`;` zT#p-YBW}XYxCOW3Hr$Roa3}7<-M9z$;y&Du2k;;s!ozq3kK!>rjwkRWp2E|32G8O- zJdYRfB3{DFcm=QGHN1{D@Fw2E+js}>;yt{N5AY#A!pHaopW-uojxX>fzQWh|2H)a4 ze2*XSBYwiq_yxb>H~fx2@F)Jl-}ndr;y<)v(|>f(MGpl^RH)I%01bvta2uj}5RPHp0f(1e;q836kB2|Y>jQOEw;n<*a16Y zC+v(}uq$@M?$`r+VlV8CeXuX~!~Qq`2jUe**FL1;yj#>3veMW!o|1*m*O&9jw^5_uEN#02G`;`T#p-Y zBW}XYxCOW3Hr$Roa3}7<-M9z$;y&Du2k;;s!ozq3kK!>rjwkRWp2E|32G8O-JdYRf zB3{DFcm=QGHN1{D@Fw2E+js}>;yt{N5AY#A!pHaopW-uojxX>fzQWh|2H)a4e2*XS zBYwiq_yxb>H~fx2@F)Jl-}ndr;y<+F(0_E$MGpl^RH)I%01bvta2uj}5RPHp0f(1e;q836kB2|Y>jQOEw;n<*a16YC+v(} zuq$@M?$`r+VlV8CeXuX~!~Qq`2jUe**FL1;yj#>3veMW!o|1*m*O&9jw^5_uEN#02G`;`T#p-YBW}XY zxCOW3Hr$Roa3}7<-M9z$;y&Du2k;;s!ozq3kK!>rjwkRWp2E|32G8O-JdYRfB3{DF zcm=QGHN1{D@Fw2E+js}>;yt{N5AY#A!pHaopW-uojxX>fzQWh|2H)a4e2*XSBYwiq z_yxb>H~fx2@F)Jl-}ndr;y<+F(tmW&MGpl^RH)I%01bvta2uj}5RPHp0f(1e;q836kB2|Y>jQOEw;n<*a16YC+v(}uq$@M z?$`r+VlV8CeXuX~!~Qq`2jUe**FL1;yj#>3veMW!o|1*m*O&9jw^5_uEN#02G`;`T#p-YBW}XYxCOW3 zHr$Roa3}7<-M9z$;y&Du2k;;s!ozq3kK!>rjwkRWp2E|32G8O-JdYRfB3{DFcm=QG zHN1{D@Fw2E+js}>;yt{N5AY#A!pHaopW-uojxX>fzQWh|2H)a4e2*XSBYwiq_yxb> zH~fx2@F)Jl-}ndr;y<+F(SLN%MGpl^RH)I%01bvta2uj}5RPHp0f(1e;q836kB2|Y>jQOEw;n<*a16YC+v(}uq$@M?$`r+ zVlV8CeXuX~!~Qq`2jUe**FL1;yj#>3veMW!o|1*m*O&9jw^5_uEN#02G`;`T#p-YBW}XYxCOW3Hr$Ro za3}7<-M9z$;y&Du2k;;s!ozq3kK!>rjwkRWp2E|32G8O-JdYRfB3{DFcm=QGHN1{D z@Fw2E+js}>;yt{N5AY#A!pHaopW-uojxX>fzQWh|2H)a4e2*XSBYwiq_yxb>H~fx2 z@F)Jl-}ndr;y<+F(|>f(MGpl^RH)I%01bvta2uj}5RPHp0f(1e;q836kB2|Y>jQOEw;n<*a16YC+v(}uq$@M?$`r+VlV8C zeXuX~!~Qq`2jUe z**FL1;yj#>3veMW!o|1*m*O&9jw^5_uEN#02G`;`T#p-YBW}XYxCOW3Hr$Roa3}7< z-M9z$;y&Du2k;;s!ozq3kK!>rjwkRWp2E|32G8O-JdYRfB3{DFcm=QGHN1{D@Fw2E z+js}>;yt{N5AY#A!pHaopW-uojxX>fzQWh|2H)a4e2*XSBYwiq_yxb>H~fx2@F)Jl z-}ndr;y<(!(0_E$MGpl^RH)I%01bvta2u zj}5RPHp0f(1e;q836kB2|Y>jQOEw;n<*a16YC+v(}uq$@M?$`r+VlV8CeXuX~ z!~Qq`2jUe**FL1 z;yj#>3veMW!o|1*m*O&9jw^5_uEN#02G`;`T#p-YBW}XYxCOW3Hr$Roa3}7<-M9z$ z;y&Du2k;;s!ozq3kK!>rjwkRWp2E|32G8O-JdYRfB3{DFcm=QGHN1{D@Fw2E+js}> z;yt{N5AY#A!pHaopW-uojxX>fzQWh|2H)a4e2*XSBYwiq_yxb>H~fx2@F)Jl-}ndr z;y<(!(tmW&MGpl^RH)I%01bvta2uj}5RP zHp0f(1e;q836kB2|Y>jQOEw;n<*a16YC+v(}uq$@M?$`r+VlV8CeXuX~!~Qq` z2jUe**FL1;yj#> z3veMW!o|1*m*O&9jw^5_uEN#02G`;`T#p-YBW}XYxCOW3Hr$Roa3}7<-M9z$;y&Du z2k;;s!ozq3kK!>rjwkRWp2E|32G8O-JdYRfB3{DFcm=QGHN1{D@Fw2E+js}>;yt{N z5AY#A!pHaopW-uojxX>fzQWh|2H)a4e2*XSBYwiq_yxb>H~fx2@F)Jl-}ndr;y<(! zS^qa|bkIc)1xi$?(Z>J{hF~}hj}b5;M#9J#1*2j#jE*rdCdR_p7zg8GJdBSCFd-(w z#Fzw=VlqsQDKI6b!qk`s(_%VIj~Or{X2Q&v1+!u{%#JxQC+5Q3mKFp5=upkz~ z!dL{0Vlga^C9oux!qQj<%VIe!j}@>YR>I0y1*>8;td2FXCf35*SO@E3J*YVk>NoZLlr2!}iz#J7Op7j9suRcEj%21AAgG?2Ub}FZRR!H~ZzF zARfZQcm$8)F+7eZ@FbqX(|88Y;yFBz7w{rp!pnFCui`bljyLco-oo2>2k+uNypIp? zAwI&#_ynKgGklIO@Fl*&*Z2nC;yZkgAMhi7!q4~xzv4Iijz91x{=(n*2mj(fv=Y;Q zbkIc)1xi$?(Z>J{hF~}hj}b5;M#9J#1*2j#jE*rdCdR_p7zg8GJdBSCFd-(w#Fzw= zVlqsQDKI6b!qk`s(_%VIj~Or{X2Q&v1+!u{%#JxQC+5Q3mKFp5=upkz~!dL{0 zVlga^C9oux!qQj<%VIe!j}@>YR>I0y1*>8;td2FXCf35*SO@E3J*YVk>NoZLlr2!}iz#J7Op7j9suRcEj%21AAgG?2Ub}FZRR!H~ZzFARfZQ zcm$8)F+7eZ@FbqX(|88Y;yFBz7w{rp!pnFCui`bljyLco-oo2>2k+uNypIp?AwI&# z_ynKgGklIO@Fl*&*Z2nC;yZkgAMhi7!q4~xzv4Iijz91x{=(n*2mj(fw35(&bkIc) z1xi$?(Z>J{hF~}hj}b5;M#9J#1*2j#jE*rdCdR_p7zg8GJdBSCFd-(w#Fzw=VlqsQ zDKI6b!qk`s(_%VIj~Or{X2Q&v1+!u{%#JxQC+5Q3mKFp5=upkz~!dL{0Vlga^ zC9oux!qQj<%VIe!j}@>YR>I0y1*>8;td2FXCf35*SO@E3J*YVk>NoZLlr2!}iz#J7Op7j9suRcEj%21AAgG?2Ub}FZRR!H~ZzFARfZQcm$8) zF+7eZ@FbqX(|88Y;yFBz7w{rp!pnFCui`bljyLco-oo2>2k+uNypIp?AwI&#_ynKg zGklIO@Fl*&*Z2nC;yZkgAMhi7!q4~xzv4Iijz91x{=(n*2mj(fw35<)bkIc)1xi$? z(Z>J{hF~}hj}b5;M#9J#1*2j#jE*rdCdR_p7zg8GJdBSCFd-(w#Fzw=VlqsQDKI6b z!qk`s(_%VIj~Or{X2Q&v1+!u{%#JxQC+5Q3mKFp5=upkz~!dL{0Vlga^C9oux z!qQj<%VIe!j}@>YR>I0y1*>8;td2FXCf35*SO@E3J*Y zVk>NoZLlr2!}iz#J7Op7j9suRcEj%21AAgG?2Ub}FZRR!H~ZzFARfZQcm$8)F+7eZ z@FbqX(|88Y;yFBz7w{rp!pnFCui`bljyLco-oo2>2k+uNypIp?AwI&#_ynKgGklIO z@Fl*&*Z2nC;yZkgAMhi7!q4~xzv4Iijz91x{=(n*2mj(fw35+(bkIc)1xi$?(Z>J{ zhF~}hj}b5;M#9J#1*2j#jE*rdCdR_p7zg8GJdBSCFd-(w#Fzw=VlqsQDKI6b!qk`s z(_%VIj~Or{X2Q&v1+!u{%#JxQC+5Q3mKFp5=upkz~!dL{0Vlga^C9oux!qQj< z%VIe!j}@>YR>I0y1*>8;td2FXCf35*SO@E3J*YVk>No zZLlr2!}iz#J7Op7j9suRcEj%21AAgG?2Ub}FZRR!H~ZzFARfZQcm$8)F+7eZ@FbqX z(|88Y;yFBz7w{rp!pnFCui`bljyLco-oo2>2k+uNypIp?AwI&#_ynKgGklIO@Fl*& z*Z2nC;yZkgAMhi7!q4~xzv4Iijz91x{=(n*2mj(fw35?*bkIc)1xi$?(Z>J{hF~}h zj}b5;M#9J#1*2j#jE*rdCdR_p7zg8GJdBSCFd-(w#Fzw=VlqsQDKI6b!qk`s(_%VI zj~Or{X2Q&v1+!u{%#JxQC+5Q3mKFp5=upkz~!dL{0Vlga^C9oux!qQj<%VIe! zj}@>YR>I0y1*>8;td2FXCf35*SO@E3J*YVk>NoZLlr2 z!}iz#J7Op7j9suRcEj%21AAgG?2Ub}FZRR!H~ZzFARfZQcm$8)F+7eZ@FbqX(|88Y z;yFBz7w{rp!pnFCui`bljyLco-oo2>2k+uNypIp?AwI&#_ynKgGklIO@Fl*&*Z2nC z;yZkgAMhi7!q4~xzv4Iijz91x{=(n*2mj(fv{KN2bkIc)1xi$?(Z>J{hF~}hj}b7e zOIYJ6%_{u=CPJf^?>a23a#*V|wq=DT2u)nZ8X6XwbohT$Da+X9BBmG`*6M(?tdWHi zmjB-sw&hr^DvQ{~(yj8)fXjQT*TUD&8 zRyC`-Rl}-j)v{_^b*#EpJ*&Rez-nkUvKm`Wtfp2otGU&}3bk5Vt*q8o8>_9=&T4OU zusT|utj<;!tE<(`>TdP0dRo1#-c}#0uhq}$Zw;^pT7#^?)(~r`HOv}rjj%>qqpZ=^ z7;CIG&Khq`uqIlQtjSiGHN~20O|zz3Gpw1`ENiwk$C_)+v*ue1tcBJhYq7P&T52t` zmRl>VmDVb2wYA1tYpt`^TN|v6)+TGSwZ+#%jiI%*xWj$0?Jlh!Hgv~|WhYn`*sTNkX0)+Ot*b;Y`BU9+xRH>{i1E$g;*$GU6X zv+i3DtcTVk>#_C3dTKqho?9=hm)0xmwe`k&YrV7HTOX{C)+g(;^~L&XeY3t>KdhhD zFYCAU$NFpi`|tW~+p%5SvxO~fWoz5F1KZdkb~rn{9l?%hN3tW^QS7L8G&{N-!;Wdk zvSZtE?6`J3JHDO3PG~2x6WdAbq;@hpxt+pJX{WMN+iC2yb~-z~ox#p%XR(!9vTNIQ?7DV6yT0APZfG~M8{19nrgk&Cx!uAJwOiV)?ACT0 zyRF^MZf|$6JKCM>&UP2OtKH4+ZuhWz+P&=Fb|1U1-OuiC53mQ?gY3cf5PPUS%pPu! zut(aX?9uiZd#pXq9&b;uC)$(j$#$4M#hz+Uv!~lL?3wl~d$v8to@>vu=i3YHh4vzQ zvAx7zYA>^w+bisq_9}a|y~bW^ud~=~7wn7nCHt~{#lC7^v#;AX?3?y2 z`?h_@zH8sJ@7oXThxQ};vHiq;YCp4|+b`^w_AC3f{lY6{E=>EZNrdO5wFK2Be!pVQwN;0$yIIfI=c z&QNEVGu#>BjC4jhqn$C%SZACw-kIP`bS62IoiJyLGu4^qOm}8DGo4w^Y-f%$*O}+c zcNRDcokh-KXNj}aS>`NvRyZr2RnBT>jkDHS=d5=&I2)Z!&Sqzev(?$=YQ_gAUjC0mG=bU#gI2WBu&SmF{bJe-# zTz76bH=SF~ZRd`2*SY81cOEzookz}N=ZW*wdFDKKUN|qESI%qajq}!d=e&15I3Jx) z&S&R~^VRw0e0P30Kb>FBZ|9Hm*ZKF~*KfOy>$;vRTq45-2`qzH<6pzP2whXlex*=6mCj4m7Cg4_xP9GzZhv=x zJJ22E4t9sQL)~HSaCd||(jDcFcE`A5-Er=CcY-_7o#ak-!`vzERCk&?-JRjibZ5D< z-8t@Dcb+@nUEnTs7rBewCGJvpnY-Lw;jVO7xvSkZ?pk-9yWZX4Zge-fo82w$R(G4b z-QD5tba%PC-97GJcb~i8J>VX654nfkBkoc6n0wqk;huC)xu@MT?pgPod)~d^UUV`&mJ_zdSdeOY-UJNg$7t4$7#qr{L@x1t60xzML$V==c@sfJUyyRXAFQu2t zOYNoc(t7E<^j-!pqnF9c>}Bz?dfB|}UJfs(m&?oT@rruI zyy9L7ucTMXEA5r>%6jFz@?Hh6qF2eQ>{aoqdeyw@UJb9NSIevI)$!_j^}PCC1Fxai z$ZPC1@tS(gyyjjDFVt)4wenhfZM?Q#JFmUh!RzRC@;ZB6ysln1ue;a7>*@9KdV77m zzFt4Czc;`e=ne7)dqcdT-Y{>tH^Lj~jq*l&W4y86IB&c+!JFt!@+Ny>-V|@DH_e;w z&G2S=v%J~f9B-~S&ztWp@D_TDyv5!UZ>hJ;Tkfs!R(h+v)!rI!t+&ow?``lldYin> z-WG4Gx6Rw`?eKPbyS&}r9&fL=&)e@E@D6&1yu;oR@2GdoJMNwEPI{-j)7}~Htar{k z?_KaNdY8P*-WBhvcg?%*-SBRDx4hfl9q+Dp&%5tE@E&@PyvN=X@2U69d+xpPUV5** z*WMfNt@qA*?|tw-dY`<{-WTty_s#q6{qTNzzr5exAMdaC&$EOr9N`L22qA?MTKFOm zMudoPBD{zoB8o^NvWOz0ifAIbh#_K%SR%HFBjSp9BECo<5{g73u}C74iew_WNFh>+ zR3f!VBhrd=BE85UGKx$hv&bT{ifkgg$RToyTq3u~Bl3!TBEKjg3W`FauqYymiejR; zC?QIUQlhjdBg%?$qP(aeDvC;?vZx}eifW>|s3B^KTB5e7BkGEJqP}P#8j41uv1lTi zie{p@XdyyHOVLWS7HvdZ(N44%9YjabNpu!nL|4&GbQe8DPti;C7JWot(NFXj1H?cv zNDLN3#85Fz3>PEBNHI!`7GuO%F;0vZ6U0O@NlX@DVv3k5ritldhL|a4iP>U~m@DRq z`C@@sC>DvuVu@HPmWkzJg;*(8iPd6_SS!|v^T?CX>q)GNnuN zGN;TXbIUw3ugoX&%L1~XEF=rdBC@C~CX34wvZO2}OUp8{tSl$X%L=lhtRySTDzd7q zCacRDvZkyhYs)&auB<2P%LcNcY$O}YCbFq)CY#F^GE}ydtz>K2Mz)phWP8~`c9flD zXW2z|mEB}_*+ce}y<~6MNA{KdWPdq84wQrBU^zq%mBZw4IYN$$fopP7lE%(U1a-ZBU56FY^kUT7p$fNR@JT6bjlk${2 zEziia@|-*`FUX7XlDsUh$gA?2ye@CZoAQ>tE$_&?@}9geAIOLDk$fzl$fxp|d@f(e zm-3Z-E#Jtu@|}DyKgf^rll&~d$glF7{4RgUpYoUdE&s^B@}IPntsLblPYETJQd;>c zP)3EQa4NitpdzYBDzb{AqN->rx{9GvZ`zxpbDx&s<0}eimGC& zxGJGas#2=7Dx=D(a;m(lpem|LsbHs-xV!I}PN~!Cj5@2%sq^ZBx~MLx%j$}{s;;T)>V~?h zZmHYqj=HPvsr%}IdZ-?$$LfiCs-CIm>VWBKNeyQK;kNT_rDNEbh(XRHi&{8X{wXXwhbchb8!|Mn-qK>2^>nJ*^j;5pQ7&@ko zrDN+jIm)jnu8}&Ze{L z96G1YrE}{%Ims_SE~bm?61t==rAzBFx~wjz%j*idqOPPX>nggc zuBNN&8oH*grEBXtx~{IL>+1%(p>Cub>n6IXZl;^-7CKb7)U9-D-A1?7?R0zHL3h-h zbZ6Z~ch%i=ciltx)V*|X-ADJ;{d9jlKo8V|^k6+i57oo;a6Lkg)T8uhJw}h!nO?3}=#_evUai;Y zwR)XiuQ%w8dXwI)x9F{Uo8GQ>=$(3(-mUlOy?USCuMg;h`j9@XkLaWNm_Dvg=#%=C zKCRE_v-+GquP^9}`jWn^ujs4#n!c`Y=$rbMzOC=*yZWBKuOH}#`jLLDpXjIhnSQQc z=$HDHey!i=xB8uauRrLI`jh^wzv!>}oBpnU=%4zR{;mJ$zxtoHeA{(euy8=5AR3tBl?m2$bJ+*svpgd?#J+B`my}jejGopAJ32PC-4*ciTuQV5tDnu!?&t7x`nmkvejY!spU=! z{GNUJRgW`y>33{wRO6KgJ*HkMqa-6a0z(B!99W z=1=jb`qTXB{tSPnKg*x(&++H_^Zfb#0)L^u$Y1O)@t69`{N?@%f2F_5U+u5)*ZS-H z_5KEbqrb`D>~Hb6`rG{N{tkbqzsuk4@A3Eg`~3a>0so+X$Up2K@sIk){Nw%!|D=D) zKkc9K&-&;5^Zo_@qJPQ1>|gP(`q%vH{tf@8f6Kq^-|_GI_x$_*1OK7_$balV@t^w7 z{OA4)|E2%Rf9=2V-}>+T_x=a}qyNeO?0@mU`rrKT{ty4B|I7dF|MCC&|9mU311E3; zFA#wYRG(Sqnfj38zZD~KJ$3E~Fvg7`s#AYqUw zNE{>yk_O3wkSWL3GxQ{ zg8V^&pkPoaC>#_CiU!4k;z5a^WKb$79h3>m2IYeCL4}}VP${S!R0*mE)q?6lji6>w zE2tgR3F-#*g8D&&pkdG`XdE;Nng-2+=0S@fG-w&L3R(wkg0?}spncFG=ooYgItN{X zu0gk;d(b238T1Nz2YrIRLBF7XFd!Hh35h7CI?}`lwfKwEtnq62xbPeg4w~GU~Vukm>(<%76yxg#lezbX|OC<9;^sf2CIVA z!J1%gur631YzQ_6n}W^3mSAhJE!ZCH2zCa$g5ANMU~jN5*dH7S4hDyU!@-f@XmBhz z9-IhH2B(74!I|J}a4t9>TnH`(mx9Z|mEdY{Ew~=s2yO}(rnD(z%9?Vfys2O+no6d!sbZ>{YNooWVQQLMrnaeL z>Y93{zG+|@nntFvX=0k1W~RAmVM0wy)5^3qZA@F!&a^ijOh?nnbT(Z~SJTaOH$6;G z)64WWeN11|&-6C~%s?~93^qf|P&3R7HzUkQGs=uMW6W4H&Wtw`%tSNEOg3R=ikWJr zndxSRnQ3O3*=CNJYv!5xW`S8~7MaCniCJovndN4MS!q_8)n<)ZYu1_dW`o&iHkr+4 zi`iphYtEVT=7PCs zE}6^bin(g8nd|0;xoK{h+vbkBYwnr*=7D)=9+}7HiFsBBgvbyTqC@C3)ZV~w#epIJn6Lxo|&aAAZnQWzzS7RCr;g>k}oVS+GGm?TUVrU+AoX~J}2 zhA>l@CCnD)2y=yb!hB(YuuxbeEEbjsONC{^a$$wAQdlLd7S;%Bg>}MuVS}(y*d%Ng zwg_8=ZNhe8hp%Ar z;ev2cxFlQ_t_W9!Yr=KmhHz83CEOP72zP~h!hPX^@KAUpJQkh^Plac~bK!;XQg|i2 z7TySNg?GYx;e+r|_#}K5z6f81Z^C!shwxMQCHxlt2!DltLL3km!~^j`0+0|S0*OHq zkQ5{X$$fB*pzU?2e*C_n`o&;bDkFo6YZ-~bnRzy|>cK?EpZfP)y20;B|~Kx&W% zqy_0fdXNER1eri)kOgD~*+6!X1LOp`KyHu+2AS=770i9+(dnfQ4WY zSPYhcrC=FY4pxAbU=>&m)_}EO9as-GfQ?`i*bKIStzaA24t9W@U>Dd8_JF-$AJ`8L zfP>%=I1G+}qu>}g4o-lR;1oCw&VaMv95@dyfQ#S~xD2j)uizW_4t{{2;1~D} z{(!&WABY3v!gw$~OaK$YL@+T-0+YgIFgX+;fDj@mLJTD+Lj|f(gE}P8fF`t{4ISu0 z5Be~GA&ek}400F)Q^1rk6-*7&z_c(OOb;`_j4%_-470$jFdNJcbHJQ17t9Uwz`QUY z%nu8|g0K)Q42!^`uox^3OTdz_6f6zPz_PF$EDtNdim(!_46DGZuo|omYrvYY7OV~H zz`C#=tPdN&hOiNA44c5Fuo-L)V_^%}61IY^VH?;Mwu9|q2iOsIf}LR(*cEnz-C+;d z6ZV3=VISBR_JjT505}j1f`j1@I1~VP_;PN*~Lg1Vw^s5|O`dZJ#aH|m4>qJF498h{3(L1-`ff~KNrXgZpKW};bWHkyOxqIqaOT7VX!MQAZvf|jCXXgOMe zR-#pCHClt#qIGCJ+JH8qO=vUPg0`Y8*dVn6HN9Zwn zf}Wyh=s9|UUZPj%HF|^IqIc*$`hY&7Pv|rHg1(|}=sWs>exhIKH~NGAqJJok7*~uZ z#upQa3B^QWVlj!BR7@r&7X=ZBP(-3AVo?%hQ4v*96Lpb@hG>eGXp4^Mik|3;ff$OB zNJS=cF-A-wrW8|&sl_y6S}~oNUd$k76f=pL#Vlf0F`JlO%pv9!bBVddJYrrkpO{}P zAQlu0iG{@?Vo|Y}SX?Y2mJ~~grNuI0S+Sg0UaTNi6f239#VTS|v6@(2tRdDEYl*eR zI$~Y1o>*UOAT|^miH*f3VpFl1*j$ViTZk>iR$^N#YN&`af!H8TqZ6TSBNXcRpM%Kjks1^ zC$1Mah#SRC;%0G+xK-RHZWnimJH=h%ZgG#eSKKG=7Y~RB#Y5s@@rZa-JSH9&PlzYQ zQ{rjyjCfW&C!QBCh!@36;$`uQcvZY6UKekOH^p1xZSjtHSG*_Q7axcZ#Yf^}@rn3U zd?r2@Ux+WoSK@2&jrdl4C%zXyh#$pI;%D)T_*MKSeiwg;KgD0-Z}E@#SNtc&!Etds z93LmZ32`Ev7$?C=aWb473m9OC5f(AV5|*)oRjgqh6Kr4;TiC`9cCm+j9N-W~m|})G zj=?E#N}LL(#%XX`oDQeQ8E{6N31`Mxa8{fRXU92kPMizp#(8jFoDb*61#m%J2p7gh za8XTn?AV6>vpd30KBda8+ClSI0GQOu9*f7}@puBBh$rF6cnY41r{U>%2A+v$;n{c& zo{Q(<`FH_dh!^3-cnMyLm*M4j1zw3);njEzUW?b^^>_o`h&SQQcnjW&x8d!02i}Qy z;oW!--i!C){rCVrh!5ez_y|6VkKyC^1U`vR;nVmGK8w%c^Y{Y3h%e#G_zJ#?ui@+X z2EK`J;oJBQzKieS`}hHVh#%p{_z8ZBpW)~D1%8QN;n(;Lev9AX_xJ<;h(F=a_zV7u zzv1ur2mXnF;otZV{)_+NI8t0Go)lk7ASIL%Nr|N-Qc@|Ilw1-dAVCR9qJ$+$k|jk_ zB~8*LA{mk?S&}U|k}G+VF9lL4MG}>m#HAQ1g_KfCC8d_qNNJ^XQhF(alu^ngWtOr? zS*2`Jb}5IHQ_3afmhwn>rF>F;sen{aDkK$_ibzGJVp4Ibgj7;0C6$)SNM)sRQhBL@ zR8guVRhFtqRi$cDb*Y9_Q>rD^mg-1#rFv3*se#l`Y9uw5nn+EhW>Rx0R%#)&lv+ux zr8ZJqsh!kb>L7KLI!T?SE>c&io77$EA@!7cNxh{$QeUZ`)L$AP4U`5+gQX$TP-&Pn zTpA&bltxLTr7_Z2X`D1(njlS-CP|Z}DbiGFnlxRSA6~<4x*%PYE=iZAE7Dcznsi;dA>EX2 zNw=jt(p~AEbYFTPJ(M0vkEJKlQ|X!XTzVnBlwL`%r8m-B>7Ddm`XGIjK1rXYFVa`( zoAh1!A^ntoNx!8((qHMH6i1FL$CKmB3FL%wA~~^~L{2IvlatGW3}h%HS(LFX$+E1- zs;tSnOk_hgWlOeYM|NdT_T@kh;HpFS)neNA4^4ll#j9m&+^UmGUZiwY)}NE3cE+%Nyj4@+Nt+yhYwBZHP=Epzq=*VuBt=#f zMO8FKSBPRLreZ0!;wY}-DZUaYp%N)nVG37bloU!zC6$s|Nu#7y(kbbc3`#~Nlag7< zqGVOFDcO}AN=_x0l3U56cQ z3Q9$#l2TczqEuC?DbDfQd_B`)K%&!^_2!nL#2_@SZSg(RhlWyl~|>P(o$)q zv{u?EZIyOPd!>WYQR$>~R=Ox%m2OIRrH9f}>812m`Y3&seoB93fHF`SqzqPuC_|ND z%5Y_bGEy0(j8?`dW0i5rcx8ezQJJJnR;DOZm1)X!Wri|SnWfBD<|uQOdCGicfwE9p zq%2mJC`*-P%5r6evQk;4tX9@2Yn64%dS!#MQQ4$yR<QZcid0d>s-()Q zqN=K<>MBtU)l@CjRvpzt7+7Pb& zS=6j*HZ{ANL(Qq?Qgf?$)VyjwHNRRwEvOb!3#&!cqG~a>y!rfM^_xf-jsP+O|4 z)YfVnwXNDtZLfAvJF1=3&T1F6tJ+QNuJ%xSs=d_SY9F<)+E4AT4p0ZGgVe$55Ot_J zOdYO{P)Dkx)Y0k~b*ws09j{JMC#sXw$?6n!sya=buFgKt{hI!~RiE>IV$ zi`2#H5_PG%OkJ+7P*#V|&FU6)tGZ3yuI^BGs=L(P>K=8k zx=-D&9#9Xeht$LB5%s8gOg*liP*19-)YIx2^{jeMJ+EF+FRGW+%jy;Ns(MYmuHH~@ zs<+hJ>K*m2dQZKtK2RU3kJQKN6ZNV3Ont7tP+zLA)Ys}8^{x6&eXo8{KdPV9&*~TT ztNKm-uKrMes=w6V>L2y5`cI9c#ns|z@wEh6LM@S&SWBWM)skt+H9-R!)Q~1>Sd%na zQ#4i6G+iT_p_!Vc*_xxdny2|%poLnbQH^O_i_ubODYaBuYAubHR!gU)*D`1swM<%O zEsK^_%cf=5a%efVTv~1|kCs=LbzHP>Rb7FtWK zmDXBoqqWuAY3;QRT1Tyu)>-SKb=A6Q-L)QCPpy~MTkE6s)%t1uwE@~dZICut8=?)> zhH1mK5!y&?lr~x$qm9+ZY2&pC+C*)VHd&jZP1UAp)3q7eOl_7nTbrZJ)#hpQwFTNj zZIQNETcR!1mTAki71~N|m9|=2qpj7}Y3sEO+D2`YwprVvZPm7E+qE6qPHmUATic`U z)%I!owFBBg?T~g@JE9%cj%mlW6WU4bly+J>qn*{xY3H>I+C}Y>c3HckUDd8>*R>nk zP3@L;Tf3v()$VEcwFlZm?UD9ad!jwno@vju7urkhmG)YDqrKJMY45cU+DGk^_F4O) zebv5c-?bmwPwkiXTl=H^)&6O5^tgIFJ-(hmPpBu-6YELzqx!=Gny%|aH*{0CbX#|HSNC*Z5A;xvbgDC*>oIx?J*A#XPpzlX)9UH;^m+z8qn=66 ztY^`)>e=+{dJa9Oo=eZI=h5@(`SkpH0llDJNH44x(TnQE^x}F6y`)}BFRho+%j)Iy z@_GfmqFza_tXI*i>eck>dJVm%UQ4g7*U{_h_4N9B1HGZ%NN=n+(VObc^yYf3-a>Dw zx6)hdZS=N!JH5T$LGP${(mU&2^sah0y}RB+@2U6Fd+UAlzIs2szdk@8s1MQy>qGRR z`Y?UCK0+Ur3>d`Z9gFzCvHAuhLiRYxK4HI(@yqLEorv(l_f{^sV|feY?Ix->L7?ck6rf zz4|_VzkWbJs2|b~>qqpX`Z4{uenLN~pVCk3XY{lBIsLqTLBFV9(l6^*^sD+c{kncb zzp3BSZ|isTyZSx-zWzXes6Wyl>reEj`ZN8x{z8ALztUgpZ}hkNJN>=>LI0?K(m(58 z^so9i{k#4{|Ed4df9rqrzxqEt4v9r-BgjZHii{>>$XGIt zj3*PwL^6p?CR4~%GL1|pGssLbi_9i-$Xqgy%qI)TLb8Y~CQHatvWzSzE67T+imWDU z$Xc?FtS1}DMzV=)CR@l>vW;vfJIGG5i|i(Q$X>FK>?a4vL2`&3CP&Coa*P}&C&)>1 zikv2A$XRlZoF^B^MRJK;CRfN+a*bRkH^@zLi`*u6$X#-e+$RsnL-L3`CQryy@{Bwu zFUU*sio7Oo$XoJ`yeA*XNAih$CSS-`@{N2aKgdt=i~J^k$Y1i0#4+L;@r?LJ0wbZ3 z$VhA?F_Id|jO2!300SDx5DjcdhHNN?YG{UT5W_G`!!m5cFDKq%q1EZHzI-8sm)d#sp)cG0B*0OfjY! z(~RlH3}dD-%b0Dk!jQz#||ypa zdzrn>K4xFDpV{9WU=B0~nS;$C=1_B(Ioup!jx%`N6ubDO!{++prCcbU7*J?36>pSj;WU>-CNnTO3I=27#QdE7iT`P_VA zzBFH%ugy2+Tl1ay-uz&GG(VZ2%`fIx^PBnI{9*nyf0@6{KjvTapBcxBYsItTTM4X$ zRw660mBdPFC9{%Sf(0ySAxpHdC0VkiSgNI2x#&+@In3a!YZ7PGh& zW2LZCTB)qmRvIg?QtF_g}YHPK#+FKp0j#ekDv(?4wYIU=^TRp6v zRxhi!)yL{<^|Sh01FV78AZxHS#2RW1vxZwEtdZ6zYqT}S8f%TS##|8_ zjn*b>v$e(AYHhQ&TRW_s)-G$ewa40P?X&h<2dsnEA?vVp#5!snvyNLQtdrI$>$G*o zI%}P?&RZ9(i`FIUvUSC}YF)FgTQ{tm)-CI{b;r7E-Lvjn53GmQBkQsC#CmEyvz}Wo zte4g+>$Ua9dTYJ2-di86kJcyav-QRLYJIc5TR*Iy)-UU~^~d^a{j=iOaqW0^d^>@i z&`xA0wv*UN?PPXxTd;u*ZDfl!wk2D(6DqwbR+@?F@ECJCmK+&SGb^v)S409Cl7Smz~?rW9PN=+4=1Pc0s$4 zUDz&S7qyGo#qAPyNxPI?+Ad?4waeM%?Fx29yOLemu3}fUtJ&4<8g@;)mR;MfW7oCo z+4b!Pc0;?7-PmqoH?^DD&Fxsbh27F_Ww*B5*lq20c6+;n-O=u3cecCOUF~jmce{t( z)9z*Ww)@z9?S6KDdw@OA9%K);huA~yVfJu)ggw$8WskPU*kkQ+_IP`OJ<*sUSuz}m)J|~W%hD=g}u^VWv{l^*lX=|_Ii7R zz0uxeZ??DCTkUQ3c6*1t)81w8w)fb3?S1xs`+$AWK4c%ZkJv};WA<_TgniOJWuLas z*k|o?_Idk)ebK&TU$(ET#7+_?sguk}?g$QWpo1LI!H(p}j^e0}=I9P_499dV$95dYbv(y+0w;7LhdRvR zPK=YnN$I3=Qafp!v`#uFy_3Po=wxy-J6W8pPBtgIlf%jB|oT5%Kr?^wXDe07QN;_qovQ9atyi>uc=u~nlJ5`*jPBo{xQ^Tq0)N*P&b)33R zJ*U3Yz-j0-avD2LoTg4Qr@0gBv~XHFt(?|Q8>g+)&S~#-a5_4joX$=cr>oP=>F)G! zdOE$F-cBE?<{Z@I*Xje&Jt&-v&>oUtZ-I3tDM!&8fUGu&ROql za5g%doXyS_XREW#+3xIcb~?M9-Oe6oud~nD?;LOrI)|LY&JpLRbIdvJoN!J$r<~Ky z8Rx8X&N=T~a4tHRoXgG?=c;qfx$fL>ZaTM|+s+;5u5-`1?>ulGI***k&J*XU^UQhf zyl`GRubkJ;8|SU_&Ux>Aa6USpoX^e|=d1J0`R@F1emcLL-_9TBuk+7|}x~}K?Zs3M)LQlxGCLKZfZA;o7PR|rgt;A8Qn~7W;ctQ)y?K+cXPNo-CS;NH;@Nv8@LVKMs8!biQCj|<~Da@-4#Br&$;K_3+_etl6%>`;$C&Hx!2tr?oIcWd)vL^-gWP}_uU8XL-&#U*nQ$Yb)UJ< z-52gl_m%tFedE4$-?{JI5AH|zll$5I;(m3%x!>I%?oaoZ``i8F{&oMkalE)*JTJbN zz)R>Q@)CPVyrf<-FS#dpz=Iz0L=StCCwq#gdYY$u#4|k8vpm~#JlFF)-wV9Zi#+Nv zk9#p*3NNLX%1iB~@zQ$fy!2iMFQb>q%j{+GvU=IP>|PEprRt`6rdP|W?bY$> zdiA{eUIVY8*T`$^HSwBy&AjGbtk=S8>9z7&du_b7UOTV7*TL)Pb@DoUUA(SdH?O5cM6dtyZ@#y{Tj(wF7JEy)rQR}cxwpbw>87DXU zduP0}-Z}5Qcfq^pUGgq_SG=pAmt^dvCnA-aGHT_rd$-O@B4ut`jJn4 z=5s&BPvNKZQ~9a=G=5q?ouA&%;AiwR`I-GJepWx5pWV;l=k#;=x&1tTUO%6o-!I@7 z^b7fg{UUx*znEX#FX5N;OZlb!GJaXVoL}Cr;8*l3`IY@DepSDkU)`_a*Ys=owf#DN zUB8}R-*4bI^c(q&{U&}>znS0MkM&#lE&W!0Yrl=()^F#x_dEC<{Z4*ozl-11@8);+ zd-y&5UVd-CkKfnt=lAyq_yhex{$PKIKhz)Q5BEp-BmGhSXn%}9)*t7O_b2!h{Yn00 ze~LfVpXN{ZXZSPyS^jK)jz8C*=g;>S_zV3-{$hWLztmsmFZWmYEB#geYJZKt)?eqZ z_c!<({Z0O6e~Z7>-{x=kclbN~UH)!=kH6R7=kNCq_y_$%{$c-!f7CzbANNoAC;e0Y zY5$CW)<5T;_b>Pt{Y(C3|B8Rrzvf@}Z}>O;TmEhTj(^v`=im1q_z(R@{$u}%|I~lx zKlfkwFa1~kYyXY^)_>=}_doa_{ZIa9|BL_C|K@-9fA~NBU;c0ZkN?;IFTgU08^jCZ z2ML0NL82gWkR(VNBny%ULI467pg;_8AO&)u1ZtoKdO!jrFas;F11E3;FYtpP2!kk~ z0SkB#6Ql@I2C0J7L7E_KkS<6cWC$__nS#tgmLO}8Eyy0^2yzCwg4{u#Aa9T_$R894 z3I>IO!afMMf|fz6pmoqDXdAQ(+6NtijzOoObI>K|8gvV~ z2R(wGL9d{9&?o2{^b7h21A>9UpkQz?Bp4bD3x)?Hf|0?fV017h7#oZW#s?FEiNT~` zaxf*B8cYkO2Qz}1!K`3*FejKB%nRlR3xb8gqF`~bBv=|O3zi2ff|bFlV0Ex2SR1Sh z)(0Dcjlrg1bFd}Y8f*);2RnkD!LDF;uqW6X>2ZDpaq2O?EBsdxz3yudTf|J3i z;B;^%I2)V`&IcEQi@~Mfa&RTM8e9vm2RDM7!L8tSa3{DM+zajp4}yomqu_DyBzPJ; z3!Vorf|tRo;C1jOcpJP6-UlCokHM$lbMPhj8hi`B2S0+J!LQ(V@F(~i{1;#r#tq|z z@xugR!Z1;oI7|{I4U>h*Lm>nq3{fbCIFv#;R6;e>LOmp*5t^YD+MyG=p%?mL5QbqC z(vXEbj0sbODZ^A@>M%{1HcS_$4>N=r!%Si3FiV&<%ob)3bA&m=Tw(4oPnb8%7v>KO zgayMwVd1bySTrma77t5=CBsr->99;#HY^vG4=aQf!%AW0uu51ptQJ-eYlJn!T4C+5 zPFOdr7uF9Ogbl++VdJn#*feYwHVL@KAU-JQ5xakA=s>6XD75 zRCqc(6P^vvh3CTy;l=P$csaZhUJb8>*TWm(&G1%uJG>L#4ey2b!w2ER@KN|Ud=fql zpM}rE7vanBRror56TS`Kh3~@;;m7b(_&NL%eht5c-@_l_&+u3HJNy&=4gU+UiQ-1_ zqWDpQC}ET+N*pDLl19m*qi{qLfjpD0P%3N*kq%(nlGhj8Ud2bCe~@8fA;JM>(RLQLZR=lqbp?<%{x1 z1)_pcp{Q_FBq|yei;71jqLNXmsB}~&DjSuH%10HViczJga#SU%8dZy`M>V3FQLU(U zR41w%)r;y!4Wfooqo{GzBx)Kpi<(EVQH!W$)GBHnwTaqB?V|Qkhp1!JDe4?`iMmGJ zqV7?TsAtqG>K*lo`bPbt{?UMFU^FNi91V$vM#G}v(THedG%6Y$jfuua!S71hG=87DcT%uiMB@DqV3U+XlJx5+8ynQ_D1`n{n3HwV00)t936>{M#rM#(TV6} zbSgR>or%sy=c4n`h3H~*DY_h8iLOT1qU+I(=w@^)x*gq#?nd{b`_Y5wVe}|^96gDi zM$e+>(TnJ1^eTECy@}pN@1pn7hv;MUDf%3JiM~ePqVLg<=x6jR`W^j={zm@=7|^&h z9*s{E(1bJ*O-z%}q%;{#P6Y}mq=OVcv6EGCbTJSMw`=E+Jd&Et!Qi7hPI{cXnWd$cBGwXXWE5!rQK+E+Jp9_y=ZURhxVoY zXn#6@4y1$VU^;{jrNiiOI)aX*qv&WlhK{A<=y*DTPNb9QWIBaTrPJtiI)l!nv*>I( zht8$*=zO|>E~Ja-V!DJbrOW7Yx`M8xtLSRFhOVXS=z6+=Zls&&X1axLrQ7Isx`Xbd zyXbDZhwi2O=ze;D9;AopVS0ofrN`)TdV-#$r|4;VhMuM8=y`g9UZj`kWqO5PrPt_n zdV}7ix9Dwphu)?4=zaQtKBSN6WBPYSZP*%~*35%UZCOtQBj`+OW2)9c#}zu#T(~ z>&&{auB;pD&U&z(tQYIe`mnyNAM4Ksuz_q48_b5Vp==l%&PK42Y!n;K#;~z$92?Ij zu!(FEo6M%LscagX&StQgY!;i%=CHYJ9-Ge=u!U?9Tg;ZQrED2n&Q`FMY!zG0*08l~ z9b3;fu#IdJ+swAGt!x|H&UUb!Y!}=ZlA z&akuW96Qf0u#4;xyUebztLz%P&Tg=q>=wJt?y$S;9=p#Tu!rmsd(57&r|cPf&R(#W z>=k>>-mtgq9ed9{u#fB$`^>(uuk0K9&VI0;>=*mZ{;bMTxz7thV}@Vq=9&(90+g1itf%!}}%ycjReOYoAs z6fe!o@UpxdFV8FRio6o9%&YLKyc)00Yw()97O&0g@VdMnug@FshP)AP%$xA0ycuuK zV|feSlDFcmc^lrAx8v=32i}o);+=UH-j#Rb-FXk*llS7ic^}@F_v8Kf06vfp;)D4R zK9mpR!}$n4l8@q}`4~QykK^O{1U`{Z;*c`5L~KujA|a2ELJR;+y#vzLjs|+xZT@lkeiY`5wNP@8kRV z0e+Al;)nSWev}{M$N338li%XE`5k_j z-{bfB1OAXd;*a?g{**uC&-n}flE31w`5XS0zvJ)u2mXN^_i64_7CSgpXn8YzjVv@!ri%A|M#DEwWgJS;2TCPi@E-k}^apLru zoMLpMw(UB$Y1n#lY_E|?;>5}Ld*0&DxLmpaF_#-1ziq?LlVf{~hV|?J*VXURwtb7n zty(voJZW;dA^*)|xBlN7|Ma`RoBz9MdG0@Zx)d$X|5r<`-|_EX={Td~7p+|7KfOwm z|L0xolK)pCc6rJHlmF9?T~#`6?CKG*Yx+!%T^GBtbllSadBy$T^(`a1H2PnT1dZBt zj{RT22Ahhe81@;rB>&O>|2+6#sL{y^c52ZueU)~tdbFEdc6{IeK`ZtDpmphz?*9O! CHmvOc literal 0 HcmV?d00001 diff --git a/pandas/tests/io/test_pickle.py b/pandas/tests/io/test_pickle.py index fad6237d851fb..f46f62e781006 100644 --- a/pandas/tests/io/test_pickle.py +++ b/pandas/tests/io/test_pickle.py @@ -207,7 +207,8 @@ def test_pickles(current_pickle_data, version): if data is None: continue n += 1 - assert n > 0, 'Pickle files are not tested' + assert n > 0, ('Pickle files are not ' + 'tested: {version}'.format(version=version)) def test_round_trip_current(current_pickle_data): From 1b53d8864af0ed936f84d0935e2cc360dc9f8de7 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 31 Mar 2017 08:40:58 +0200 Subject: [PATCH 311/933] Only call validation functions when args/kwargs are passed (#15850) --- pandas/compat/numpy/function.py | 35 +++++++++++++++++---------------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/pandas/compat/numpy/function.py b/pandas/compat/numpy/function.py index 4053994efa005..f448a9aad04c6 100644 --- a/pandas/compat/numpy/function.py +++ b/pandas/compat/numpy/function.py @@ -37,23 +37,24 @@ def __init__(self, defaults, fname=None, method=None, def __call__(self, args, kwargs, fname=None, max_fname_arg_count=None, method=None): - fname = self.fname if fname is None else fname - max_fname_arg_count = (self.max_fname_arg_count if - max_fname_arg_count is None - else max_fname_arg_count) - method = self.method if method is None else method - - if method == 'args': - validate_args(fname, args, max_fname_arg_count, self.defaults) - elif method == 'kwargs': - validate_kwargs(fname, kwargs, self.defaults) - elif method == 'both': - validate_args_and_kwargs(fname, args, kwargs, - max_fname_arg_count, - self.defaults) - else: - raise ValueError("invalid validation method " - "'{method}'".format(method=method)) + if args or kwargs: + fname = self.fname if fname is None else fname + max_fname_arg_count = (self.max_fname_arg_count if + max_fname_arg_count is None + else max_fname_arg_count) + method = self.method if method is None else method + + if method == 'args': + validate_args(fname, args, max_fname_arg_count, self.defaults) + elif method == 'kwargs': + validate_kwargs(fname, kwargs, self.defaults) + elif method == 'both': + validate_args_and_kwargs(fname, args, kwargs, + max_fname_arg_count, + self.defaults) + else: + raise ValueError("invalid validation method " + "'{method}'".format(method=method)) ARGMINMAX_DEFAULTS = dict(out=None) From e7201ca1a9f5b3359a0e179ab1faf6a39cc9e2c7 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sat, 1 Apr 2017 11:37:16 -0400 Subject: [PATCH 312/933] BLD: bug in building json compiled code on windows (#15857) --- pandas/_libs/src/ujson/python/objToJSON.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/_libs/src/ujson/python/objToJSON.c b/pandas/_libs/src/ujson/python/objToJSON.c index 26a68b8a9ae3a..f2c0b18d35131 100644 --- a/pandas/_libs/src/ujson/python/objToJSON.c +++ b/pandas/_libs/src/ujson/python/objToJSON.c @@ -401,7 +401,8 @@ static void *PyStringToUTF8(JSOBJ _obj, JSONTypeContext *tc, void *outValue, static void *PyUnicodeToUTF8(JSOBJ _obj, JSONTypeContext *tc, void *outValue, size_t *_outLen) { - PyObject *obj = (PyObject *)_obj; + PyObject *obj, *newObj; + obj = (PyObject *)_obj; #if (PY_VERSION_HEX >= 0x03030000) if (PyUnicode_IS_COMPACT_ASCII(obj)) { @@ -412,8 +413,8 @@ static void *PyUnicodeToUTF8(JSOBJ _obj, JSONTypeContext *tc, void *outValue, } #endif - PyObject *newObj = PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(obj), - PyUnicode_GET_SIZE(obj), NULL); + newObj = PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(obj), + PyUnicode_GET_SIZE(obj), NULL); GET_TC(tc)->newObj = newObj; From 57c7c87f695f6b133742978e4c7d04f4892eb991 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sat, 1 Apr 2017 12:04:56 -0400 Subject: [PATCH 313/933] CI: use pytest-xdist on windows --- appveyor.yml | 2 +- test.bat | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/appveyor.yml b/appveyor.yml index db729b3005be6..684b859c206b2 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -72,7 +72,7 @@ install: - cmd: conda info -a # create our env - - cmd: conda create -n pandas python=%PYTHON_VERSION% cython pytest + - cmd: conda create -n pandas python=%PYTHON_VERSION% cython pytest pytest-xdist - cmd: activate pandas - SET REQ=ci\requirements-%PYTHON_VERSION%_WIN.run - cmd: echo "installing requirements from %REQ%" diff --git a/test.bat b/test.bat index 080a1cc163a05..6c69f83866ffd 100644 --- a/test.bat +++ b/test.bat @@ -1,3 +1,3 @@ :: test on windows -pytest --skip-slow --skip-network pandas %* +pytest --skip-slow --skip-network pandas -n 2 %* From a57e681aef4d5de5da1201d18009b5dbb4382a6d Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Sat, 1 Apr 2017 17:48:58 -0400 Subject: [PATCH 314/933] PERF: improve iloc list indexing Author: Joris Van den Bossche Closes #15504 from jorisvandenbossche/perf-iloc-list and squashes the following commits: bf54a0b [Joris Van den Bossche] TST: edit test_take to preserve original dtype 74d45ae [Joris Van den Bossche] add whatsnew 3e537b6 [Joris Van den Bossche] small clean-up 6d2705c [Joris Van den Bossche] take method: only validate kwargs if there are kwargs aacbaa8 [Joris Van den Bossche] PERF: improve iloc list indexing --- doc/source/whatsnew/v0.20.0.txt | 2 +- pandas/core/indexing.py | 24 ++++++++++++++---------- pandas/core/series.py | 7 ++++--- pandas/indexes/base.py | 3 ++- pandas/tests/test_generic.py | 2 +- 5 files changed, 22 insertions(+), 16 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 399f91fc60810..a34b9feb2b2fa 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -905,7 +905,7 @@ Performance Improvements - Improved performance of ``drop_duplicates()`` on ``bool`` columns (:issue:`12963`) - Improve performance of ``pd.core.groupby.GroupBy.apply`` when the applied function used the ``.name`` attribute of the group DataFrame (:issue:`15062`). - +- Improved performance of ``iloc`` indexing with a list or array (:issue:`15504`). .. _whatsnew_0200.bug_fixes: diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index c80e8c34aa88f..61a847ccf1523 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1697,26 +1697,24 @@ def _get_slice_axis(self, slice_obj, axis=0): else: return self.obj.take(slice_obj, axis=axis, convert=False) - def _get_list_axis(self, key_list, axis=0): + def _get_list_axis(self, key, axis=0): """ Return Series values by list or array of integers Parameters ---------- - key_list : list-like positional indexer + key : list-like positional indexer axis : int (can only be zero) Returns ------- Series object """ - - # validate list bounds - self._is_valid_list_like(key_list, axis) - - # force an actual list - key_list = list(key_list) - return self.obj.take(key_list, axis=axis, convert=False) + try: + return self.obj.take(key, axis=axis, convert=False) + except IndexError: + # re-raise with different error message + raise IndexError("positional indexers are out-of-bounds") def _getitem_axis(self, key, axis=0): @@ -1724,7 +1722,13 @@ def _getitem_axis(self, key, axis=0): self._has_valid_type(key, axis) return self._get_slice_axis(key, axis=axis) - elif is_bool_indexer(key): + if isinstance(key, list): + try: + key = np.asarray(key) + except TypeError: # pragma: no cover + pass + + if is_bool_indexer(key): self._has_valid_type(key, axis) return self._getbool_axis(key, axis=axis) diff --git a/pandas/core/series.py b/pandas/core/series.py index bcc1ed272b081..bcd58ea791083 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2378,7 +2378,8 @@ def take(self, indices, axis=0, convert=True, is_copy=False, **kwargs): -------- numpy.ndarray.take """ - nv.validate_take(tuple(), kwargs) + if kwargs: + nv.validate_take(tuple(), kwargs) # check/convert indicies here if convert: @@ -2387,8 +2388,8 @@ def take(self, indices, axis=0, convert=True, is_copy=False, **kwargs): indices = _ensure_platform_int(indices) new_index = self.index.take(indices) new_values = self._values.take(indices) - return self._constructor(new_values, - index=new_index).__finalize__(self) + return (self._constructor(new_values, index=new_index, fastpath=True) + .__finalize__(self)) def isin(self, values): """ diff --git a/pandas/indexes/base.py b/pandas/indexes/base.py index 7f0de963e5c56..91e2422873dd4 100644 --- a/pandas/indexes/base.py +++ b/pandas/indexes/base.py @@ -1668,7 +1668,8 @@ def _append_same_dtype(self, to_concat, name): @Appender(_index_shared_docs['take'] % _index_doc_kwargs) def take(self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs): - nv.validate_take(tuple(), kwargs) + if kwargs: + nv.validate_take(tuple(), kwargs) indices = _ensure_platform_int(indices) if self._can_hold_na: taken = self._assert_take_fillable(self.values, indices, diff --git a/pandas/tests/test_generic.py b/pandas/tests/test_generic.py index a2329e2d1768e..0e8e8dc43ff03 100644 --- a/pandas/tests/test_generic.py +++ b/pandas/tests/test_generic.py @@ -1870,7 +1870,7 @@ def test_take(self): tm.makeObjectSeries()]: out = s.take(indices) expected = Series(data=s.values.take(indices), - index=s.index.take(indices)) + index=s.index.take(indices), dtype=s.dtype) tm.assert_series_equal(out, expected) for df in [tm.makeTimeDataFrame()]: out = df.take(indices) From d1e1ba08ef259724ba71e0953c52e8e4ad81bd17 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sat, 1 Apr 2017 20:09:03 -0400 Subject: [PATCH 315/933] CI: add jdcal to 3.6 build as openpyxl >= 2.4.5 is broken --- ci/requirements-3.6.run | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ci/requirements-3.6.run b/ci/requirements-3.6.run index 41c9680ce1b7e..8f81c4620558e 100644 --- a/ci/requirements-3.6.run +++ b/ci/requirements-3.6.run @@ -2,7 +2,10 @@ python-dateutil pytz numpy scipy +# openpyxl >= 2.4.5 should be dependent on jdcal +# but is not for some reason openpyxl +jdcal xlsxwriter xlrd xlwt From 74f527ff0cbc8045b9f350382a4ad37694e8c5e6 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Sun, 2 Apr 2017 10:19:43 -0400 Subject: [PATCH 316/933] BUG: Check integrity of sparse int indices The check_integrity method of IntIndex in pandas.sparse was un- implemented despite having documentation. This PR implements the method and calls it when initializing `IntIndex`. xref #15844 (comment) Author: gfyoung Closes #15863 from gfyoung/sparse-pyx-refactor and squashes the following commits: f435d28 [gfyoung] BUG: Check integrity of sparse int indices --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/sparse/sparse.pyx | 48 +++++++++++++++++++++++---- pandas/tests/sparse/test_libsparse.py | 38 +++++++++++++++++++++ 3 files changed, 80 insertions(+), 7 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index a34b9feb2b2fa..230f39db67197 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -1020,6 +1020,7 @@ Sparse - Bug in ``SparseSeries.reindex`` on single level with list of length 1 (:issue:`15447`) - Bug in repr-formatting a ``SparseDataFrame`` after a value was set on (a copy of) one of its series (:issue:`15488`) - Bug in ``SparseDataFrame`` construction with lists not coercing to dtype (:issue:`15682`) +- Bug in sparse array indexing in which indices were not being validated (:issue:`15863`) Reshaping ^^^^^^^^^ diff --git a/pandas/sparse/sparse.pyx b/pandas/sparse/sparse.pyx index 00d317c42b18d..0c2e056ead7fa 100644 --- a/pandas/sparse/sparse.pyx +++ b/pandas/sparse/sparse.pyx @@ -34,8 +34,9 @@ cdef inline int int_min(int a, int b): return a if a <= b else b cdef class SparseIndex: """ - Abstract superclass for sparse index types + Abstract superclass for sparse index types. """ + def __init__(self): raise NotImplementedError @@ -48,8 +49,9 @@ cdef class IntIndex(SparseIndex): ---------- length : integer indices : array-like - Contains integers corresponding to + Contains integers corresponding to the indices. """ + cdef readonly: Py_ssize_t length, npoints ndarray indices @@ -59,9 +61,11 @@ cdef class IntIndex(SparseIndex): self.indices = np.ascontiguousarray(indices, dtype=np.int32) self.npoints = len(self.indices) + self.check_integrity() + def __reduce__(self): args = (self.length, self.indices) - return (IntIndex, args) + return IntIndex, args def __repr__(self): output = 'IntIndex\n' @@ -70,10 +74,40 @@ cdef class IntIndex(SparseIndex): def check_integrity(self): """ - Only need be strictly ascending and nothing less than 0 or greater than - total length + Checks the following: + + - Indices are strictly ascending + - Number of indices is at most self.length + - Indices are at least 0 and at most the total length less one + + A ValueError is raised if any of these conditions is violated. """ - pass + + cdef: + int32_t index, prev = -1 + + if self.npoints > self.length: + msg = ("Too many indices. Expected " + "{exp} but found {act}").format( + exp=self.length, act=self.npoints) + raise ValueError(msg) + + # Indices are vacuously ordered and non-negative + # if the sequence of indices is empty. + if self.npoints == 0: + return + + if min(self.indices) < 0: + raise ValueError("No index can be less than zero") + + if max(self.indices) >= self.length: + raise ValueError("All indices must be less than the length") + + for index in self.indices: + if prev != -1 and index <= prev: + raise ValueError("Indices must be strictly increasing") + + prev = index def equals(self, other): if not isinstance(other, IntIndex): @@ -320,7 +354,7 @@ cdef class BlockIndex(SparseIndex): def __reduce__(self): args = (self.length, self.blocs, self.blengths) - return (BlockIndex, args) + return BlockIndex, args def __repr__(self): output = 'BlockIndex\n' diff --git a/pandas/tests/sparse/test_libsparse.py b/pandas/tests/sparse/test_libsparse.py index b6ab99dc66cda..696d2cf47f4c0 100644 --- a/pandas/tests/sparse/test_libsparse.py +++ b/pandas/tests/sparse/test_libsparse.py @@ -474,6 +474,44 @@ def test_to_block_index(self): class TestIntIndex(tm.TestCase): + def test_check_integrity(self): + + # Too many indices than specified in self.length + msg = "Too many indices" + + with tm.assertRaisesRegexp(ValueError, msg): + IntIndex(length=1, indices=[1, 2, 3]) + + # No index can be negative. + msg = "No index can be less than zero" + + with tm.assertRaisesRegexp(ValueError, msg): + IntIndex(length=5, indices=[1, -2, 3]) + + # No index can be negative. + msg = "No index can be less than zero" + + with tm.assertRaisesRegexp(ValueError, msg): + IntIndex(length=5, indices=[1, -2, 3]) + + # All indices must be less than the length. + msg = "All indices must be less than the length" + + with tm.assertRaisesRegexp(ValueError, msg): + IntIndex(length=5, indices=[1, 2, 5]) + + with tm.assertRaisesRegexp(ValueError, msg): + IntIndex(length=5, indices=[1, 2, 6]) + + # Indices must be strictly ascending. + msg = "Indices must be strictly increasing" + + with tm.assertRaisesRegexp(ValueError, msg): + IntIndex(length=5, indices=[1, 3, 2]) + + with tm.assertRaisesRegexp(ValueError, msg): + IntIndex(length=5, indices=[1, 3, 3]) + def test_int_internal(self): idx = _make_index(4, np.array([2, 3], dtype=np.int32), kind='integer') self.assertIsInstance(idx, IntIndex) From a293d22ed25294c1753524903455ce5122319632 Mon Sep 17 00:00:00 2001 From: atbd Date: Sun, 2 Apr 2017 23:57:01 +0200 Subject: [PATCH 317/933] COMPAT: NaT support tz_localize / tz_convert (#15830) (#15868) --- doc/source/whatsnew/v0.20.0.txt | 2 ++ pandas/_libs/tslib.pyx | 3 ++- pandas/tests/scalar/test_nat.py | 3 ++- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 230f39db67197..781a912555e14 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -837,6 +837,8 @@ Other API Changes ignored (no longer needed to specify the new behaviour) and is deprecated. - ``NaT`` will now correctly report ``False`` for datetimelike boolean operations such as ``is_month_start`` (:issue:`15781`) - ``NaT`` will now correctly return ``np.nan`` for ``Timedelta`` and ``Period`` accessors such as ``days`` and ``quarter`` (:issue:`15782`) +- ``NaT`` will now returns ``NaT`` for ``tz_localize`` and ``tz_convert`` + methods (:issue:`15830`) .. _whatsnew_0200.deprecations: diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index d441f1ec4759b..5aa8e15d0d087 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -3835,7 +3835,8 @@ for field in fields: # to the NaTType class; these can return NaT, np.nan # or raise respectively _nat_methods = ['date', 'now', 'replace', 'to_pydatetime', - 'today', 'round', 'floor', 'ceil'] + 'today', 'round', 'floor', 'ceil', 'tz_convert', + 'tz_localize'] _nan_methods = ['weekday', 'isoweekday', 'total_seconds'] _implemented_methods = ['to_datetime', 'to_datetime64', 'isoformat'] _implemented_methods.extend(_nat_methods) diff --git a/pandas/tests/scalar/test_nat.py b/pandas/tests/scalar/test_nat.py index ce2ed237f5559..0695fe2243947 100644 --- a/pandas/tests/scalar/test_nat.py +++ b/pandas/tests/scalar/test_nat.py @@ -129,7 +129,8 @@ def test_NaT_methods(): 'timetuple', 'timetz', 'toordinal', 'tzname', 'utcfromtimestamp', 'utcnow', 'utcoffset', 'utctimetuple'] - nat_methods = ['date', 'now', 'replace', 'to_datetime', 'today'] + nat_methods = ['date', 'now', 'replace', 'to_datetime', 'today', + 'tz_convert', 'tz_localize'] nan_methods = ['weekday', 'isoweekday'] for method in raise_methods: From 67cc0213def8b9f56c4d1f71bb95ebef22790b24 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Sun, 2 Apr 2017 17:59:15 -0400 Subject: [PATCH 318/933] CLN: Remove "flake8: noqa" from more files Another round of house-cleaning that builds off #15842. xref #12066 (comment) : the issue remains unresolved, but it does not seem entirely necessary to disable style-checking on the entire file for that IMO. Author: gfyoung Closes #15867 from gfyoung/flake8-noqa-clean and squashes the following commits: 0c84926 [gfyoung] CLN: Make tseries/common.py flake8-able 7a799ff [gfyoung] CLN: Make _version.py flake8-able 7087b64 [gfyoung] CLN: Make test_categorical.py flake8-able 5d5abf8 [gfyoung] CLN: Make test_categorical.py flake8-able 6ace90b [gfyoung] CLN: Make test_eval.py flake8-able --- pandas/_version.py | 3 - pandas/tests/computation/test_eval.py | 95 ++++++++++---------- pandas/tests/indexes/test_category.py | 57 ++++++------ pandas/tests/test_categorical.py | 120 ++++++++++++-------------- pandas/tseries/common.py | 8 +- 5 files changed, 134 insertions(+), 149 deletions(-) diff --git a/pandas/_version.py b/pandas/_version.py index d764923fd7247..4695b512feff5 100644 --- a/pandas/_version.py +++ b/pandas/_version.py @@ -1,4 +1,3 @@ - # This file helps to compute a version number in source trees obtained from # git-archive tarball (such as those provided by githubs download-from-tag # feature). Distribution tarballs (built by setup.py sdist) and build @@ -8,8 +7,6 @@ # This file is released into the public domain. Generated by # versioneer-0.15 (https://github.com/warner/python-versioneer) -# flake8: noqa - import errno import os import re diff --git a/pandas/tests/computation/test_eval.py b/pandas/tests/computation/test_eval.py index ed6006440441e..81e9b7c77a81b 100644 --- a/pandas/tests/computation/test_eval.py +++ b/pandas/tests/computation/test_eval.py @@ -1,10 +1,6 @@ - -# flake8: noqa - import warnings import operator from itertools import product -from distutils.version import LooseVersion import pytest @@ -28,12 +24,11 @@ import pandas.computation.expr as expr import pandas.util.testing as tm -import pandas._libs.lib as lib from pandas.util.testing import (assert_frame_equal, randbool, assertRaisesRegexp, assert_numpy_array_equal, assert_produces_warning, assert_series_equal, slow) -from pandas.compat import PY3, u, reduce +from pandas.compat import PY3, reduce _series_frame_incompatible = _bool_ops_syms _scalar_skip = 'in', 'not in' @@ -43,9 +38,9 @@ pytest.mark.skipif(engine == 'numexpr' and not _USE_NUMEXPR, reason='numexpr enabled->{enabled}, ' 'installed->{installed}'.format( - enabled=_USE_NUMEXPR, - installed=_NUMEXPR_INSTALLED))(engine) - for engine in _engines + enabled=_USE_NUMEXPR, + installed=_NUMEXPR_INSTALLED))(engine) + for engine in _engines # noqa )) def engine(request): return request.param @@ -66,7 +61,8 @@ def _eval_single_bin(lhs, cmp1, rhs, engine): try: return c(lhs, rhs) except ValueError as e: - if str(e).startswith('negative number cannot be raised to a fractional power'): + if str(e).startswith('negative number cannot be ' + 'raised to a fractional power'): return np.nan raise return c(lhs, rhs) @@ -74,14 +70,14 @@ def _eval_single_bin(lhs, cmp1, rhs, engine): def _series_and_2d_ndarray(lhs, rhs): return ((isinstance(lhs, Series) and - isinstance(rhs, np.ndarray) and rhs.ndim > 1) - or (isinstance(rhs, Series) and - isinstance(lhs, np.ndarray) and lhs.ndim > 1)) + isinstance(rhs, np.ndarray) and rhs.ndim > 1) or + (isinstance(rhs, Series) and + isinstance(lhs, np.ndarray) and lhs.ndim > 1)) def _series_and_frame(lhs, rhs): - return ((isinstance(lhs, Series) and isinstance(rhs, DataFrame)) - or (isinstance(rhs, Series) and isinstance(lhs, DataFrame))) + return ((isinstance(lhs, Series) and isinstance(rhs, DataFrame)) or + (isinstance(rhs, Series) and isinstance(lhs, DataFrame))) def _bool_and_frame(lhs, rhs): @@ -228,19 +224,22 @@ def check_complex_cmp_op(self, lhs, cmp1, rhs, binop, cmp2): else: lhs_new = _eval_single_bin(lhs, cmp1, rhs, self.engine) rhs_new = _eval_single_bin(lhs, cmp2, rhs, self.engine) - if (isinstance(lhs_new, Series) and isinstance(rhs_new, DataFrame) - and binop in _series_frame_incompatible): + if (isinstance(lhs_new, Series) and + isinstance(rhs_new, DataFrame) and + binop in _series_frame_incompatible): pass # TODO: the code below should be added back when left and right # hand side bool ops are fixed. - + # # try: - # self.assertRaises(Exception, pd.eval, ex, - #local_dict={'lhs': lhs, 'rhs': rhs}, - # engine=self.engine, parser=self.parser) + # self.assertRaises(Exception, pd.eval, ex, + # local_dict={'lhs': lhs, 'rhs': rhs}, + # engine=self.engine, parser=self.parser) # except AssertionError: - #import ipdb; ipdb.set_trace() - # raise + # import ipdb + # + # ipdb.set_trace() + # raise else: expected = _eval_single_bin( lhs_new, binop, rhs_new, self.engine) @@ -248,7 +247,6 @@ def check_complex_cmp_op(self, lhs, cmp1, rhs, binop, cmp2): self.check_equal(result, expected) def check_chained_cmp_op(self, lhs, cmp1, mid, cmp2, rhs): - skip_these = _scalar_skip def check_operands(left, right, cmp_op): return _eval_single_bin(left, cmp_op, right, self.engine) @@ -334,7 +332,8 @@ def get_expected_pow_result(self, lhs, rhs): try: expected = _eval_single_bin(lhs, '**', rhs, self.engine) except ValueError as e: - if str(e).startswith('negative number cannot be raised to a fractional power'): + if str(e).startswith('negative number cannot be ' + 'raised to a fractional power'): if self.engine == 'python': pytest.skip(str(e)) else: @@ -650,7 +649,7 @@ def test_disallow_scalar_bool_ops(self): exprs += '2 * x > 2 or 1 and 2', exprs += '2 * df > 3 and 1 or a', - x, a, b, df = np.random.randn(3), 1, 2, DataFrame(randn(3, 2)) + x, a, b, df = np.random.randn(3), 1, 2, DataFrame(randn(3, 2)) # noqa for ex in exprs: with tm.assertRaises(NotImplementedError): pd.eval(ex, engine=self.engine, parser=self.parser) @@ -682,7 +681,7 @@ def test_identical(self): tm.assert_numpy_array_equal(result, np.array([1.5])) self.assertEqual(result.shape, (1, )) - x = np.array([False]) + x = np.array([False]) # noqa result = pd.eval('x', engine=self.engine, parser=self.parser) tm.assert_numpy_array_equal(result, np.array([False])) self.assertEqual(result.shape, (1, )) @@ -792,9 +791,8 @@ def check_chained_cmp_op(self, lhs, cmp1, mid, cmp2, rhs): f = lambda *args, **kwargs: np.random.randn() -#------------------------------------- -# typecasting rules consistency with python -# issue #12388 +# ------------------------------------- +# gh-12388: Typecasting rules consistency with python class TestTypeCasting(object): @@ -817,8 +815,8 @@ def test_binop_typecasting(self, engine, parser, op, dt): assert_frame_equal(res, eval(s)) -#------------------------------------- -# basic and complex alignment +# ------------------------------------- +# Basic and complex alignment def _is_datetime(x): return issubclass(x.dtype.type, np.datetime64) @@ -1064,8 +1062,8 @@ def test_performance_warning_for_poor_alignment(self, engine, parser): tm.assert_equal(msg, expected) -#------------------------------------ -# slightly more complex ops +# ------------------------------------ +# Slightly more complex ops class TestOperationsNumExprPandas(tm.TestCase): @@ -1156,7 +1154,7 @@ def test_single_variable(self): def test_truediv(self): s = np.array([1]) ex = 's / 1' - d = {'s': s} + d = {'s': s} # noqa if PY3: res = self.eval(ex, truediv=False) @@ -1204,7 +1202,7 @@ def test_truediv(self): self.assertEqual(res, expec) def test_failing_subscript_with_name_error(self): - df = DataFrame(np.random.randn(5, 3)) + df = DataFrame(np.random.randn(5, 3)) # noqa with tm.assertRaises(NameError): self.eval('df[x > 2] > 2') @@ -1501,7 +1499,7 @@ def setUpClass(cls): cls.arith_ops) def test_check_many_exprs(self): - a = 1 + a = 1 # noqa expr = ' * '.join('a' * 33) expected = 1 res = pd.eval(expr, engine=self.engine, parser=self.parser) @@ -1526,13 +1524,13 @@ def test_fails_not(self): engine=self.engine) def test_fails_ampersand(self): - df = DataFrame(np.random.randn(5, 3)) + df = DataFrame(np.random.randn(5, 3)) # noqa ex = '(df + 2)[df > 1] > 0 & (df > 0)' with tm.assertRaises(NotImplementedError): pd.eval(ex, parser=self.parser, engine=self.engine) def test_fails_pipe(self): - df = DataFrame(np.random.randn(5, 3)) + df = DataFrame(np.random.randn(5, 3)) # noqa ex = '(df + 2)[df > 1] > 0 | (df > 0)' with tm.assertRaises(NotImplementedError): pd.eval(ex, parser=self.parser, engine=self.engine) @@ -1728,7 +1726,7 @@ def test_global_scope(self, engine, parser): parser=parser)) def test_no_new_locals(self, engine, parser): - x = 1 + x = 1 # noqa lcls = locals().copy() pd.eval('x + 1', local_dict=lcls, engine=engine, parser=parser) lcls2 = locals().copy() @@ -1736,7 +1734,7 @@ def test_no_new_locals(self, engine, parser): tm.assert_equal(lcls, lcls2) def test_no_new_globals(self, engine, parser): - x = 1 + x = 1 # noqa gbls = globals().copy() pd.eval('x + 1', engine=engine, parser=parser) gbls2 = globals().copy() @@ -1787,15 +1785,16 @@ def test_name_error_exprs(engine, parser): def test_invalid_local_variable_reference(engine, parser): - a, b = 1, 2 + a, b = 1, 2 # noqa exprs = 'a + @b', '@a + b', '@a + @b' - for expr in exprs: + + for _expr in exprs: if parser != 'pandas': with tm.assertRaisesRegexp(SyntaxError, "The '@' prefix is only"): - pd.eval(exprs, engine=engine, parser=parser) + pd.eval(_expr, engine=engine, parser=parser) else: with tm.assertRaisesRegexp(SyntaxError, "The '@' prefix is not"): - pd.eval(exprs, engine=engine, parser=parser) + pd.eval(_expr, engine=engine, parser=parser) def test_numexpr_builtin_raises(engine, parser): @@ -1834,9 +1833,9 @@ def test_more_than_one_expression_raises(engine, parser): def test_bool_ops_fails_on_scalars(lhs, cmp, rhs, engine, parser): gen = {int: lambda: np.random.randint(10), float: np.random.randn} - mid = gen[lhs]() - lhs = gen[lhs]() - rhs = gen[rhs]() + mid = gen[lhs]() # noqa + lhs = gen[lhs]() # noqa + rhs = gen[rhs]() # noqa ex1 = 'lhs {0} mid {1} rhs'.format(cmp, cmp) ex2 = 'lhs {0} mid and mid {1} rhs'.format(cmp, cmp) diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py index ef1be7e60e0e8..0d75ba5f2bd46 100644 --- a/pandas/tests/indexes/test_category.py +++ b/pandas/tests/indexes/test_category.py @@ -1,8 +1,5 @@ # -*- coding: utf-8 -*- -# TODO(wesm): fix long line flake8 issues -# flake8: noqa - import pandas.util.testing as tm from pandas.indexes.api import Index, CategoricalIndex from .common import Base @@ -215,7 +212,8 @@ def test_map(self): # GH 12766: Return an index not an array tm.assert_index_equal(ci.map(lambda x: 1), - Index(np.array([1] * 5, dtype=np.int64), name='XXX')) + Index(np.array([1] * 5, dtype=np.int64), + name='XXX')) # change categories dtype ci = pd.CategoricalIndex(list('ABABC'), categories=list('BAC'), @@ -225,7 +223,8 @@ def f(x): return {'A': 10, 'B': 20, 'C': 30}.get(x) result = ci.map(f) - exp = pd.CategoricalIndex([10, 20, 10, 20, 30], categories=[20, 10, 30], + exp = pd.CategoricalIndex([10, 20, 10, 20, 30], + categories=[20, 10, 30], ordered=False) tm.assert_index_equal(result, exp) @@ -589,10 +588,10 @@ def test_string_categorical_index_repr(self): # short idx = pd.CategoricalIndex(['a', 'bb', 'ccc']) if PY3: - expected = u"""CategoricalIndex(['a', 'bb', 'ccc'], categories=['a', 'bb', 'ccc'], ordered=False, dtype='category')""" + expected = u"""CategoricalIndex(['a', 'bb', 'ccc'], categories=['a', 'bb', 'ccc'], ordered=False, dtype='category')""" # noqa self.assertEqual(repr(idx), expected) else: - expected = u"""CategoricalIndex([u'a', u'bb', u'ccc'], categories=[u'a', u'bb', u'ccc'], ordered=False, dtype='category')""" + expected = u"""CategoricalIndex([u'a', u'bb', u'ccc'], categories=[u'a', u'bb', u'ccc'], ordered=False, dtype='category')""" # noqa self.assertEqual(unicode(idx), expected) # multiple lines @@ -601,7 +600,7 @@ def test_string_categorical_index_repr(self): expected = u"""CategoricalIndex(['a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc'], - categories=['a', 'bb', 'ccc'], ordered=False, dtype='category')""" + categories=['a', 'bb', 'ccc'], ordered=False, dtype='category')""" # noqa self.assertEqual(repr(idx), expected) else: @@ -609,7 +608,7 @@ def test_string_categorical_index_repr(self): u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', u'ccc'], - categories=[u'a', u'bb', u'ccc'], ordered=False, dtype='category')""" + categories=[u'a', u'bb', u'ccc'], ordered=False, dtype='category')""" # noqa self.assertEqual(unicode(idx), expected) @@ -619,7 +618,7 @@ def test_string_categorical_index_repr(self): expected = u"""CategoricalIndex(['a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', ... 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc'], - categories=['a', 'bb', 'ccc'], ordered=False, dtype='category', length=300)""" + categories=['a', 'bb', 'ccc'], ordered=False, dtype='category', length=300)""" # noqa self.assertEqual(repr(idx), expected) else: @@ -628,7 +627,7 @@ def test_string_categorical_index_repr(self): ... u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', u'ccc'], - categories=[u'a', u'bb', u'ccc'], ordered=False, dtype='category', length=300)""" + categories=[u'a', u'bb', u'ccc'], ordered=False, dtype='category', length=300)""" # noqa self.assertEqual(unicode(idx), expected) @@ -637,23 +636,23 @@ def test_string_categorical_index_repr(self): if PY3: expected = u"""CategoricalIndex(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'm', 'o'], - categories=['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', ...], ordered=False, dtype='category')""" + categories=['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', ...], ordered=False, dtype='category')""" # noqa self.assertEqual(repr(idx), expected) else: expected = u"""CategoricalIndex([u'a', u'b', u'c', u'd', u'e', u'f', u'g', u'h', u'i', u'j', u'k', u'l', u'm', u'm', u'o'], - categories=[u'a', u'b', u'c', u'd', u'e', u'f', u'g', u'h', ...], ordered=False, dtype='category')""" + categories=[u'a', u'b', u'c', u'd', u'e', u'f', u'g', u'h', ...], ordered=False, dtype='category')""" # noqa self.assertEqual(unicode(idx), expected) # short idx = pd.CategoricalIndex([u'あ', u'いい', u'ううう']) if PY3: - expected = u"""CategoricalIndex(['あ', 'いい', 'ううう'], categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category')""" + expected = u"""CategoricalIndex(['あ', 'いい', 'ううう'], categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category')""" # noqa self.assertEqual(repr(idx), expected) else: - expected = u"""CategoricalIndex([u'あ', u'いい', u'ううう'], categories=[u'あ', u'いい', u'ううう'], ordered=False, dtype='category')""" + expected = u"""CategoricalIndex([u'あ', u'いい', u'ううう'], categories=[u'あ', u'いい', u'ううう'], ordered=False, dtype='category')""" # noqa self.assertEqual(unicode(idx), expected) # multiple lines @@ -662,7 +661,7 @@ def test_string_categorical_index_repr(self): expected = u"""CategoricalIndex(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう'], - categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category')""" + categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category')""" # noqa self.assertEqual(repr(idx), expected) else: @@ -670,7 +669,7 @@ def test_string_categorical_index_repr(self): u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう'], - categories=[u'あ', u'いい', u'ううう'], ordered=False, dtype='category')""" + categories=[u'あ', u'いい', u'ううう'], ordered=False, dtype='category')""" # noqa self.assertEqual(unicode(idx), expected) @@ -680,7 +679,7 @@ def test_string_categorical_index_repr(self): expected = u"""CategoricalIndex(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', ... 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう'], - categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category', length=300)""" + categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category', length=300)""" # noqa self.assertEqual(repr(idx), expected) else: @@ -689,7 +688,7 @@ def test_string_categorical_index_repr(self): ... u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう'], - categories=[u'あ', u'いい', u'ううう'], ordered=False, dtype='category', length=300)""" + categories=[u'あ', u'いい', u'ううう'], ordered=False, dtype='category', length=300)""" # noqa self.assertEqual(unicode(idx), expected) @@ -698,13 +697,13 @@ def test_string_categorical_index_repr(self): if PY3: expected = u"""CategoricalIndex(['あ', 'い', 'う', 'え', 'お', 'か', 'き', 'く', 'け', 'こ', 'さ', 'し', 'す', 'せ', 'そ'], - categories=['あ', 'い', 'う', 'え', 'お', 'か', 'き', 'く', ...], ordered=False, dtype='category')""" + categories=['あ', 'い', 'う', 'え', 'お', 'か', 'き', 'く', ...], ordered=False, dtype='category')""" # noqa self.assertEqual(repr(idx), expected) else: expected = u"""CategoricalIndex([u'あ', u'い', u'う', u'え', u'お', u'か', u'き', u'く', u'け', u'こ', u'さ', u'し', u'す', u'せ', u'そ'], - categories=[u'あ', u'い', u'う', u'え', u'お', u'か', u'き', u'く', ...], ordered=False, dtype='category')""" + categories=[u'あ', u'い', u'う', u'え', u'お', u'か', u'き', u'く', ...], ordered=False, dtype='category')""" # noqa self.assertEqual(unicode(idx), expected) @@ -714,10 +713,10 @@ def test_string_categorical_index_repr(self): # short idx = pd.CategoricalIndex([u'あ', u'いい', u'ううう']) if PY3: - expected = u"""CategoricalIndex(['あ', 'いい', 'ううう'], categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category')""" + expected = u"""CategoricalIndex(['あ', 'いい', 'ううう'], categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category')""" # noqa self.assertEqual(repr(idx), expected) else: - expected = u"""CategoricalIndex([u'あ', u'いい', u'ううう'], categories=[u'あ', u'いい', u'ううう'], ordered=False, dtype='category')""" + expected = u"""CategoricalIndex([u'あ', u'いい', u'ううう'], categories=[u'あ', u'いい', u'ううう'], ordered=False, dtype='category')""" # noqa self.assertEqual(unicode(idx), expected) # multiple lines @@ -727,7 +726,7 @@ def test_string_categorical_index_repr(self): 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう'], - categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category')""" + categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category')""" # noqa self.assertEqual(repr(idx), expected) else: @@ -736,7 +735,7 @@ def test_string_categorical_index_repr(self): u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう'], - categories=[u'あ', u'いい', u'ううう'], ordered=False, dtype='category')""" + categories=[u'あ', u'いい', u'ううう'], ordered=False, dtype='category')""" # noqa self.assertEqual(unicode(idx), expected) @@ -748,7 +747,7 @@ def test_string_categorical_index_repr(self): ... 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう'], - categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category', length=300)""" + categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category', length=300)""" # noqa self.assertEqual(repr(idx), expected) else: @@ -757,7 +756,7 @@ def test_string_categorical_index_repr(self): ... u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう'], - categories=[u'あ', u'いい', u'ううう'], ordered=False, dtype='category', length=300)""" + categories=[u'あ', u'いい', u'ううう'], ordered=False, dtype='category', length=300)""" # noqa self.assertEqual(unicode(idx), expected) @@ -766,13 +765,13 @@ def test_string_categorical_index_repr(self): if PY3: expected = u"""CategoricalIndex(['あ', 'い', 'う', 'え', 'お', 'か', 'き', 'く', 'け', 'こ', 'さ', 'し', 'す', 'せ', 'そ'], - categories=['あ', 'い', 'う', 'え', 'お', 'か', 'き', 'く', ...], ordered=False, dtype='category')""" + categories=['あ', 'い', 'う', 'え', 'お', 'か', 'き', 'く', ...], ordered=False, dtype='category')""" # noqa self.assertEqual(repr(idx), expected) else: expected = u"""CategoricalIndex([u'あ', u'い', u'う', u'え', u'お', u'か', u'き', u'く', u'け', u'こ', u'さ', u'し', u'す', u'せ', u'そ'], - categories=[u'あ', u'い', u'う', u'え', u'お', u'か', u'き', u'く', ...], ordered=False, dtype='category')""" + categories=[u'あ', u'い', u'う', u'え', u'お', u'か', u'き', u'く', ...], ordered=False, dtype='category')""" # noqa self.assertEqual(unicode(idx), expected) diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py index ea2697ec19df3..63c1ae70e35a6 100644 --- a/pandas/tests/test_categorical.py +++ b/pandas/tests/test_categorical.py @@ -10,7 +10,6 @@ from pandas.types.dtypes import CategoricalDtype from pandas.types.common import (is_categorical_dtype, - is_object_dtype, is_float_dtype, is_integer_dtype) @@ -25,9 +24,6 @@ from pandas.compat import range, lrange, u, PY3 from pandas.core.config import option_context -# GH 12066 -# flake8: noqa - class TestCategorical(tm.TestCase): @@ -291,7 +287,6 @@ def test_constructor_with_null(self): pd.Categorical(DatetimeIndex(['nat', '20160101']), categories=[NaT, Timestamp('20160101')]) - def test_constructor_with_index(self): ci = CategoricalIndex(list('aabbca'), categories=list('cab')) tm.assert_categorical_equal(ci.values, Categorical(ci)) @@ -710,8 +705,7 @@ def test_unicode_print(self): self.assertEqual(_rep(c), expected) - c = pd.Categorical([u'ああああ', u'いいいいい', u'ううううううう'] - * 20) + c = pd.Categorical([u'ああああ', u'いいいいい', u'ううううううう'] * 20) expected = u"""\ [ああああ, いいいいい, ううううううう, ああああ, いいいいい, ..., いいいいい, ううううううう, ああああ, いいいいい, ううううううう] Length: 60 @@ -723,8 +717,7 @@ def test_unicode_print(self): # the repr width with option_context('display.unicode.east_asian_width', True): - c = pd.Categorical([u'ああああ', u'いいいいい', u'ううううううう'] - * 20) + c = pd.Categorical([u'ああああ', u'いいいいい', u'ううううううう'] * 20) expected = u"""[ああああ, いいいいい, ううううううう, ああああ, いいいいい, ..., いいいいい, ううううううう, ああああ, いいいいい, ううううううう] Length: 60 Categories (3, object): [ああああ, いいいいい, ううううううう]""" # noqa @@ -1279,7 +1272,8 @@ def test_mode(self): s = Categorical([1, 2, 3, 4, 5], categories=[5, 4, 3, 2, 1], ordered=True) res = s.mode() - exp = Categorical([5, 4, 3, 2, 1], categories=[5, 4, 3, 2, 1], ordered=True) + exp = Categorical([5, 4, 3, 2, 1], + categories=[5, 4, 3, 2, 1], ordered=True) tm.assert_categorical_equal(res, exp) # NaN should not become the mode! s = Categorical([np.nan, np.nan, np.nan, 4, 5], @@ -2233,7 +2227,7 @@ def test_categorical_repr_datetime_ordered(self): exp = """[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00, 2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00] Categories (5, datetime64[ns, US/Eastern]): [2011-01-01 09:00:00-05:00 < 2011-01-01 10:00:00-05:00 < 2011-01-01 11:00:00-05:00 < 2011-01-01 12:00:00-05:00 < - 2011-01-01 13:00:00-05:00]""" + 2011-01-01 13:00:00-05:00]""" # noqa self.assertEqual(repr(c), exp) @@ -2242,14 +2236,14 @@ def test_categorical_repr_period(self): c = pd.Categorical(idx) exp = """[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00] Categories (5, period[H]): [2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, - 2011-01-01 13:00]""" + 2011-01-01 13:00]""" # noqa self.assertEqual(repr(c), exp) c = pd.Categorical(idx.append(idx), categories=idx) exp = """[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00, 2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00] Categories (5, period[H]): [2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, - 2011-01-01 13:00]""" + 2011-01-01 13:00]""" # noqa self.assertEqual(repr(c), exp) @@ -2262,7 +2256,7 @@ def test_categorical_repr_period(self): c = pd.Categorical(idx.append(idx), categories=idx) exp = """[2011-01, 2011-02, 2011-03, 2011-04, 2011-05, 2011-01, 2011-02, 2011-03, 2011-04, 2011-05] -Categories (5, period[M]): [2011-01, 2011-02, 2011-03, 2011-04, 2011-05]""" +Categories (5, period[M]): [2011-01, 2011-02, 2011-03, 2011-04, 2011-05]""" # noqa self.assertEqual(repr(c), exp) @@ -2271,14 +2265,14 @@ def test_categorical_repr_period_ordered(self): c = pd.Categorical(idx, ordered=True) exp = """[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00] Categories (5, period[H]): [2011-01-01 09:00 < 2011-01-01 10:00 < 2011-01-01 11:00 < 2011-01-01 12:00 < - 2011-01-01 13:00]""" + 2011-01-01 13:00]""" # noqa self.assertEqual(repr(c), exp) c = pd.Categorical(idx.append(idx), categories=idx, ordered=True) exp = """[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00, 2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00] Categories (5, period[H]): [2011-01-01 09:00 < 2011-01-01 10:00 < 2011-01-01 11:00 < 2011-01-01 12:00 < - 2011-01-01 13:00]""" + 2011-01-01 13:00]""" # noqa self.assertEqual(repr(c), exp) @@ -2291,7 +2285,7 @@ def test_categorical_repr_period_ordered(self): c = pd.Categorical(idx.append(idx), categories=idx, ordered=True) exp = """[2011-01, 2011-02, 2011-03, 2011-04, 2011-05, 2011-01, 2011-02, 2011-03, 2011-04, 2011-05] -Categories (5, period[M]): [2011-01 < 2011-02 < 2011-03 < 2011-04 < 2011-05]""" +Categories (5, period[M]): [2011-01 < 2011-02 < 2011-03 < 2011-04 < 2011-05]""" # noqa self.assertEqual(repr(c), exp) @@ -2305,7 +2299,7 @@ def test_categorical_repr_timedelta(self): c = pd.Categorical(idx.append(idx), categories=idx) exp = """[1 days, 2 days, 3 days, 4 days, 5 days, 1 days, 2 days, 3 days, 4 days, 5 days] -Categories (5, timedelta64[ns]): [1 days, 2 days, 3 days, 4 days, 5 days]""" +Categories (5, timedelta64[ns]): [1 days, 2 days, 3 days, 4 days, 5 days]""" # noqa self.assertEqual(repr(c), exp) @@ -2315,7 +2309,7 @@ def test_categorical_repr_timedelta(self): Length: 20 Categories (20, timedelta64[ns]): [0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, ..., 16 days 01:00:00, 17 days 01:00:00, - 18 days 01:00:00, 19 days 01:00:00]""" + 18 days 01:00:00, 19 days 01:00:00]""" # noqa self.assertEqual(repr(c), exp) @@ -2324,7 +2318,7 @@ def test_categorical_repr_timedelta(self): Length: 40 Categories (20, timedelta64[ns]): [0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, ..., 16 days 01:00:00, 17 days 01:00:00, - 18 days 01:00:00, 19 days 01:00:00]""" + 18 days 01:00:00, 19 days 01:00:00]""" # noqa self.assertEqual(repr(c), exp) @@ -2332,13 +2326,13 @@ def test_categorical_repr_timedelta_ordered(self): idx = pd.timedelta_range('1 days', periods=5) c = pd.Categorical(idx, ordered=True) exp = """[1 days, 2 days, 3 days, 4 days, 5 days] -Categories (5, timedelta64[ns]): [1 days < 2 days < 3 days < 4 days < 5 days]""" +Categories (5, timedelta64[ns]): [1 days < 2 days < 3 days < 4 days < 5 days]""" # noqa self.assertEqual(repr(c), exp) c = pd.Categorical(idx.append(idx), categories=idx, ordered=True) exp = """[1 days, 2 days, 3 days, 4 days, 5 days, 1 days, 2 days, 3 days, 4 days, 5 days] -Categories (5, timedelta64[ns]): [1 days < 2 days < 3 days < 4 days < 5 days]""" +Categories (5, timedelta64[ns]): [1 days < 2 days < 3 days < 4 days < 5 days]""" # noqa self.assertEqual(repr(c), exp) @@ -2348,7 +2342,7 @@ def test_categorical_repr_timedelta_ordered(self): Length: 20 Categories (20, timedelta64[ns]): [0 days 01:00:00 < 1 days 01:00:00 < 2 days 01:00:00 < 3 days 01:00:00 ... 16 days 01:00:00 < 17 days 01:00:00 < - 18 days 01:00:00 < 19 days 01:00:00]""" + 18 days 01:00:00 < 19 days 01:00:00]""" # noqa self.assertEqual(repr(c), exp) @@ -2357,7 +2351,7 @@ def test_categorical_repr_timedelta_ordered(self): Length: 40 Categories (20, timedelta64[ns]): [0 days 01:00:00 < 1 days 01:00:00 < 2 days 01:00:00 < 3 days 01:00:00 ... 16 days 01:00:00 < 17 days 01:00:00 < - 18 days 01:00:00 < 19 days 01:00:00]""" + 18 days 01:00:00 < 19 days 01:00:00]""" # noqa self.assertEqual(repr(c), exp) @@ -2423,7 +2417,7 @@ def test_categorical_series_repr_datetime(self): 4 2011-01-01 13:00:00 dtype: category Categories (5, datetime64[ns]): [2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, - 2011-01-01 12:00:00, 2011-01-01 13:00:00]""" + 2011-01-01 12:00:00, 2011-01-01 13:00:00]""" # noqa self.assertEqual(repr(s), exp) @@ -2438,7 +2432,7 @@ def test_categorical_series_repr_datetime(self): dtype: category Categories (5, datetime64[ns, US/Eastern]): [2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, - 2011-01-01 13:00:00-05:00]""" + 2011-01-01 13:00:00-05:00]""" # noqa self.assertEqual(repr(s), exp) @@ -2452,7 +2446,7 @@ def test_categorical_series_repr_datetime_ordered(self): 4 2011-01-01 13:00:00 dtype: category Categories (5, datetime64[ns]): [2011-01-01 09:00:00 < 2011-01-01 10:00:00 < 2011-01-01 11:00:00 < - 2011-01-01 12:00:00 < 2011-01-01 13:00:00]""" + 2011-01-01 12:00:00 < 2011-01-01 13:00:00]""" # noqa self.assertEqual(repr(s), exp) @@ -2467,7 +2461,7 @@ def test_categorical_series_repr_datetime_ordered(self): dtype: category Categories (5, datetime64[ns, US/Eastern]): [2011-01-01 09:00:00-05:00 < 2011-01-01 10:00:00-05:00 < 2011-01-01 11:00:00-05:00 < 2011-01-01 12:00:00-05:00 < - 2011-01-01 13:00:00-05:00]""" + 2011-01-01 13:00:00-05:00]""" # noqa self.assertEqual(repr(s), exp) @@ -2481,7 +2475,7 @@ def test_categorical_series_repr_period(self): 4 2011-01-01 13:00 dtype: category Categories (5, period[H]): [2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, - 2011-01-01 13:00]""" + 2011-01-01 13:00]""" # noqa self.assertEqual(repr(s), exp) @@ -2507,7 +2501,7 @@ def test_categorical_series_repr_period_ordered(self): 4 2011-01-01 13:00 dtype: category Categories (5, period[H]): [2011-01-01 09:00 < 2011-01-01 10:00 < 2011-01-01 11:00 < 2011-01-01 12:00 < - 2011-01-01 13:00]""" + 2011-01-01 13:00]""" # noqa self.assertEqual(repr(s), exp) @@ -2551,7 +2545,7 @@ def test_categorical_series_repr_timedelta(self): dtype: category Categories (10, timedelta64[ns]): [0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, ..., 6 days 01:00:00, 7 days 01:00:00, - 8 days 01:00:00, 9 days 01:00:00]""" + 8 days 01:00:00, 9 days 01:00:00]""" # noqa self.assertEqual(repr(s), exp) @@ -2564,7 +2558,7 @@ def test_categorical_series_repr_timedelta_ordered(self): 3 4 days 4 5 days dtype: category -Categories (5, timedelta64[ns]): [1 days < 2 days < 3 days < 4 days < 5 days]""" +Categories (5, timedelta64[ns]): [1 days < 2 days < 3 days < 4 days < 5 days]""" # noqa self.assertEqual(repr(s), exp) @@ -2583,26 +2577,26 @@ def test_categorical_series_repr_timedelta_ordered(self): dtype: category Categories (10, timedelta64[ns]): [0 days 01:00:00 < 1 days 01:00:00 < 2 days 01:00:00 < 3 days 01:00:00 ... 6 days 01:00:00 < 7 days 01:00:00 < - 8 days 01:00:00 < 9 days 01:00:00]""" + 8 days 01:00:00 < 9 days 01:00:00]""" # noqa self.assertEqual(repr(s), exp) def test_categorical_index_repr(self): idx = pd.CategoricalIndex(pd.Categorical([1, 2, 3])) - exp = """CategoricalIndex([1, 2, 3], categories=[1, 2, 3], ordered=False, dtype='category')""" + exp = """CategoricalIndex([1, 2, 3], categories=[1, 2, 3], ordered=False, dtype='category')""" # noqa self.assertEqual(repr(idx), exp) i = pd.CategoricalIndex(pd.Categorical(np.arange(10))) - exp = """CategoricalIndex([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], categories=[0, 1, 2, 3, 4, 5, 6, 7, ...], ordered=False, dtype='category')""" + exp = """CategoricalIndex([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], categories=[0, 1, 2, 3, 4, 5, 6, 7, ...], ordered=False, dtype='category')""" # noqa self.assertEqual(repr(i), exp) def test_categorical_index_repr_ordered(self): i = pd.CategoricalIndex(pd.Categorical([1, 2, 3], ordered=True)) - exp = """CategoricalIndex([1, 2, 3], categories=[1, 2, 3], ordered=True, dtype='category')""" + exp = """CategoricalIndex([1, 2, 3], categories=[1, 2, 3], ordered=True, dtype='category')""" # noqa self.assertEqual(repr(i), exp) i = pd.CategoricalIndex(pd.Categorical(np.arange(10), ordered=True)) - exp = """CategoricalIndex([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], categories=[0, 1, 2, 3, 4, 5, 6, 7, ...], ordered=True, dtype='category')""" + exp = """CategoricalIndex([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], categories=[0, 1, 2, 3, 4, 5, 6, 7, ...], ordered=True, dtype='category')""" # noqa self.assertEqual(repr(i), exp) def test_categorical_index_repr_datetime(self): @@ -2611,7 +2605,7 @@ def test_categorical_index_repr_datetime(self): exp = """CategoricalIndex(['2011-01-01 09:00:00', '2011-01-01 10:00:00', '2011-01-01 11:00:00', '2011-01-01 12:00:00', '2011-01-01 13:00:00'], - categories=[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, 2011-01-01 13:00:00], ordered=False, dtype='category')""" + categories=[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, 2011-01-01 13:00:00], ordered=False, dtype='category')""" # noqa self.assertEqual(repr(i), exp) @@ -2621,7 +2615,7 @@ def test_categorical_index_repr_datetime(self): exp = """CategoricalIndex(['2011-01-01 09:00:00-05:00', '2011-01-01 10:00:00-05:00', '2011-01-01 11:00:00-05:00', '2011-01-01 12:00:00-05:00', '2011-01-01 13:00:00-05:00'], - categories=[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00], ordered=False, dtype='category')""" + categories=[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00], ordered=False, dtype='category')""" # noqa self.assertEqual(repr(i), exp) @@ -2631,7 +2625,7 @@ def test_categorical_index_repr_datetime_ordered(self): exp = """CategoricalIndex(['2011-01-01 09:00:00', '2011-01-01 10:00:00', '2011-01-01 11:00:00', '2011-01-01 12:00:00', '2011-01-01 13:00:00'], - categories=[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, 2011-01-01 13:00:00], ordered=True, dtype='category')""" + categories=[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, 2011-01-01 13:00:00], ordered=True, dtype='category')""" # noqa self.assertEqual(repr(i), exp) @@ -2641,7 +2635,7 @@ def test_categorical_index_repr_datetime_ordered(self): exp = """CategoricalIndex(['2011-01-01 09:00:00-05:00', '2011-01-01 10:00:00-05:00', '2011-01-01 11:00:00-05:00', '2011-01-01 12:00:00-05:00', '2011-01-01 13:00:00-05:00'], - categories=[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00], ordered=True, dtype='category')""" + categories=[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00], ordered=True, dtype='category')""" # noqa self.assertEqual(repr(i), exp) @@ -2651,7 +2645,7 @@ def test_categorical_index_repr_datetime_ordered(self): '2011-01-01 13:00:00-05:00', '2011-01-01 09:00:00-05:00', '2011-01-01 10:00:00-05:00', '2011-01-01 11:00:00-05:00', '2011-01-01 12:00:00-05:00', '2011-01-01 13:00:00-05:00'], - categories=[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00], ordered=True, dtype='category')""" + categories=[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00], ordered=True, dtype='category')""" # noqa self.assertEqual(repr(i), exp) @@ -2659,24 +2653,24 @@ def test_categorical_index_repr_period(self): # test all length idx = pd.period_range('2011-01-01 09:00', freq='H', periods=1) i = pd.CategoricalIndex(pd.Categorical(idx)) - exp = """CategoricalIndex(['2011-01-01 09:00'], categories=[2011-01-01 09:00], ordered=False, dtype='category')""" + exp = """CategoricalIndex(['2011-01-01 09:00'], categories=[2011-01-01 09:00], ordered=False, dtype='category')""" # noqa self.assertEqual(repr(i), exp) idx = pd.period_range('2011-01-01 09:00', freq='H', periods=2) i = pd.CategoricalIndex(pd.Categorical(idx)) - exp = """CategoricalIndex(['2011-01-01 09:00', '2011-01-01 10:00'], categories=[2011-01-01 09:00, 2011-01-01 10:00], ordered=False, dtype='category')""" + exp = """CategoricalIndex(['2011-01-01 09:00', '2011-01-01 10:00'], categories=[2011-01-01 09:00, 2011-01-01 10:00], ordered=False, dtype='category')""" # noqa self.assertEqual(repr(i), exp) idx = pd.period_range('2011-01-01 09:00', freq='H', periods=3) i = pd.CategoricalIndex(pd.Categorical(idx)) - exp = """CategoricalIndex(['2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00'], categories=[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00], ordered=False, dtype='category')""" + exp = """CategoricalIndex(['2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00'], categories=[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00], ordered=False, dtype='category')""" # noqa self.assertEqual(repr(i), exp) idx = pd.period_range('2011-01-01 09:00', freq='H', periods=5) i = pd.CategoricalIndex(pd.Categorical(idx)) exp = """CategoricalIndex(['2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00', '2011-01-01 12:00', '2011-01-01 13:00'], - categories=[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00], ordered=False, dtype='category')""" + categories=[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00], ordered=False, dtype='category')""" # noqa self.assertEqual(repr(i), exp) @@ -2685,13 +2679,13 @@ def test_categorical_index_repr_period(self): '2011-01-01 12:00', '2011-01-01 13:00', '2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00', '2011-01-01 12:00', '2011-01-01 13:00'], - categories=[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00], ordered=False, dtype='category')""" + categories=[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00], ordered=False, dtype='category')""" # noqa self.assertEqual(repr(i), exp) idx = pd.period_range('2011-01', freq='M', periods=5) i = pd.CategoricalIndex(pd.Categorical(idx)) - exp = """CategoricalIndex(['2011-01', '2011-02', '2011-03', '2011-04', '2011-05'], categories=[2011-01, 2011-02, 2011-03, 2011-04, 2011-05], ordered=False, dtype='category')""" + exp = """CategoricalIndex(['2011-01', '2011-02', '2011-03', '2011-04', '2011-05'], categories=[2011-01, 2011-02, 2011-03, 2011-04, 2011-05], ordered=False, dtype='category')""" # noqa self.assertEqual(repr(i), exp) def test_categorical_index_repr_period_ordered(self): @@ -2699,19 +2693,19 @@ def test_categorical_index_repr_period_ordered(self): i = pd.CategoricalIndex(pd.Categorical(idx, ordered=True)) exp = """CategoricalIndex(['2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00', '2011-01-01 12:00', '2011-01-01 13:00'], - categories=[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00], ordered=True, dtype='category')""" + categories=[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00], ordered=True, dtype='category')""" # noqa self.assertEqual(repr(i), exp) idx = pd.period_range('2011-01', freq='M', periods=5) i = pd.CategoricalIndex(pd.Categorical(idx, ordered=True)) - exp = """CategoricalIndex(['2011-01', '2011-02', '2011-03', '2011-04', '2011-05'], categories=[2011-01, 2011-02, 2011-03, 2011-04, 2011-05], ordered=True, dtype='category')""" + exp = """CategoricalIndex(['2011-01', '2011-02', '2011-03', '2011-04', '2011-05'], categories=[2011-01, 2011-02, 2011-03, 2011-04, 2011-05], ordered=True, dtype='category')""" # noqa self.assertEqual(repr(i), exp) def test_categorical_index_repr_timedelta(self): idx = pd.timedelta_range('1 days', periods=5) i = pd.CategoricalIndex(pd.Categorical(idx)) - exp = """CategoricalIndex(['1 days', '2 days', '3 days', '4 days', '5 days'], categories=[1 days 00:00:00, 2 days 00:00:00, 3 days 00:00:00, 4 days 00:00:00, 5 days 00:00:00], ordered=False, dtype='category')""" + exp = """CategoricalIndex(['1 days', '2 days', '3 days', '4 days', '5 days'], categories=[1 days 00:00:00, 2 days 00:00:00, 3 days 00:00:00, 4 days 00:00:00, 5 days 00:00:00], ordered=False, dtype='category')""" # noqa self.assertEqual(repr(i), exp) idx = pd.timedelta_range('1 hours', periods=10) @@ -2720,14 +2714,14 @@ def test_categorical_index_repr_timedelta(self): '3 days 01:00:00', '4 days 01:00:00', '5 days 01:00:00', '6 days 01:00:00', '7 days 01:00:00', '8 days 01:00:00', '9 days 01:00:00'], - categories=[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, 4 days 01:00:00, 5 days 01:00:00, 6 days 01:00:00, 7 days 01:00:00, ...], ordered=False, dtype='category')""" + categories=[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, 4 days 01:00:00, 5 days 01:00:00, 6 days 01:00:00, 7 days 01:00:00, ...], ordered=False, dtype='category')""" # noqa self.assertEqual(repr(i), exp) def test_categorical_index_repr_timedelta_ordered(self): idx = pd.timedelta_range('1 days', periods=5) i = pd.CategoricalIndex(pd.Categorical(idx, ordered=True)) - exp = """CategoricalIndex(['1 days', '2 days', '3 days', '4 days', '5 days'], categories=[1 days 00:00:00, 2 days 00:00:00, 3 days 00:00:00, 4 days 00:00:00, 5 days 00:00:00], ordered=True, dtype='category')""" + exp = """CategoricalIndex(['1 days', '2 days', '3 days', '4 days', '5 days'], categories=[1 days 00:00:00, 2 days 00:00:00, 3 days 00:00:00, 4 days 00:00:00, 5 days 00:00:00], ordered=True, dtype='category')""" # noqa self.assertEqual(repr(i), exp) idx = pd.timedelta_range('1 hours', periods=10) @@ -2736,7 +2730,7 @@ def test_categorical_index_repr_timedelta_ordered(self): '3 days 01:00:00', '4 days 01:00:00', '5 days 01:00:00', '6 days 01:00:00', '7 days 01:00:00', '8 days 01:00:00', '9 days 01:00:00'], - categories=[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, 4 days 01:00:00, 5 days 01:00:00, 6 days 01:00:00, 7 days 01:00:00, ...], ordered=True, dtype='category')""" + categories=[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, 4 days 01:00:00, 5 days 01:00:00, 6 days 01:00:00, 7 days 01:00:00, ...], ordered=True, dtype='category')""" # noqa self.assertEqual(repr(i), exp) @@ -2833,7 +2827,8 @@ def test_mode(self): s = Series(Categorical([1, 2, 3, 4, 5], categories=[5, 4, 3, 2, 1], ordered=True)) res = s.mode() - exp = Series(Categorical([5, 4, 3, 2, 1], categories=[5, 4, 3, 2, 1], ordered=True)) + exp = Series(Categorical([5, 4, 3, 2, 1], categories=[5, 4, 3, 2, 1], + ordered=True)) tm.assert_series_equal(res, exp) def test_value_counts(self): @@ -4275,10 +4270,10 @@ def test_str_accessor_api_for_categorical(self): # * `translate` has different interfaces for py2 vs. py3 _ignore_names = ["get", "join", "translate"] - str_func_names = [f - for f in dir(s.str) - if not (f.startswith("_") or f in _special_func_names - or f in _ignore_names)] + str_func_names = [f for f in dir(s.str) if not ( + f.startswith("_") or + f in _special_func_names or + f in _ignore_names)] func_defs = [(f, (), {}) for f in str_func_names] func_defs.extend(special_func_defs) @@ -4418,10 +4413,3 @@ def test_map(self): self.assertIsInstance(res, tm.SubclassedCategorical) exp = Categorical(['A', 'B', 'C']) tm.assert_categorical_equal(res, exp) - - def test_map(self): - sc = tm.SubclassedCategorical(['a', 'b', 'c']) - res = sc.map(lambda x: x.upper()) - self.assertIsInstance(res, tm.SubclassedCategorical) - exp = Categorical(['A', 'B', 'C']) - tm.assert_categorical_equal(res, exp) diff --git a/pandas/tseries/common.py b/pandas/tseries/common.py index 7940efc7e1b59..955edce2591e6 100644 --- a/pandas/tseries/common.py +++ b/pandas/tseries/common.py @@ -4,8 +4,7 @@ import numpy as np -from pandas.types.common import (_NS_DTYPE, _TD_DTYPE, - is_period_arraylike, +from pandas.types.common import (is_period_arraylike, is_datetime_arraylike, is_integer_dtype, is_datetime64_dtype, is_datetime64tz_dtype, is_timedelta64_dtype, is_categorical_dtype, @@ -13,7 +12,7 @@ from pandas.core.base import PandasDelegate, NoNewAttributesMixin from pandas.tseries.index import DatetimeIndex -from pandas._libs.period import IncompatibleFrequency # flake8: noqa +from pandas._libs.period import IncompatibleFrequency # noqa from pandas.tseries.period import PeriodIndex from pandas.tseries.tdi import TimedeltaIndex from pandas.core.algorithms import take_1d @@ -162,6 +161,7 @@ class DatetimeProperties(Properties): def to_pydatetime(self): return self.values.to_pydatetime() + DatetimeProperties._add_delegate_accessors( delegate=DatetimeIndex, accessors=DatetimeIndex._datetimelike_ops, @@ -201,6 +201,7 @@ def components(self): """ return self.values.components.set_index(self.index) + TimedeltaProperties._add_delegate_accessors( delegate=TimedeltaIndex, accessors=TimedeltaIndex._datetimelike_ops, @@ -225,6 +226,7 @@ class PeriodProperties(Properties): Raises TypeError if the Series does not contain datetimelike values. """ + PeriodProperties._add_delegate_accessors( delegate=PeriodIndex, accessors=PeriodIndex._datetimelike_ops, From cd24fa95f1781b14d35eac4953bab02691fd9d04 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sun, 2 Apr 2017 18:47:11 -0400 Subject: [PATCH 319/933] ENH: add origin to to_datetime closes #11276 closes #11745 superseded #11470 Author: Jeff Reback Author: Sumit Binnani Closes #15828 from jreback/datetime-unit and squashes the following commits: ebb4acd [Jeff Reback] doc fixes & cleanup 209591a [Jeff Reback] bug fix 56663a5 [Jeff Reback] add Timedelta floordiv ops a24e88c [Jeff Reback] rename epoch -> unix 6a8a779 [Jeff Reback] update docs / tests ad7356e [Sumit Binnani] BUG: Series creation with datetime64 with non-ns unit as object dtype --- doc/source/timeseries.rst | 26 ++++- doc/source/whatsnew/v0.20.0.txt | 24 +++- pandas/_libs/tslib.pyx | 39 ++++++- pandas/tests/indexes/datetimes/test_tools.py | 117 +++++++++++++++++++ pandas/tests/indexes/timedeltas/test_ops.py | 13 ++- pandas/tests/scalar/test_timedelta.py | 10 ++ pandas/tseries/tdi.py | 7 +- pandas/tseries/tools.py | 107 ++++++++++++++--- 8 files changed, 317 insertions(+), 26 deletions(-) diff --git a/doc/source/timeseries.rst b/doc/source/timeseries.rst index 7136b15a7633a..44c200e13b877 100644 --- a/doc/source/timeseries.rst +++ b/doc/source/timeseries.rst @@ -252,7 +252,8 @@ Epoch Timestamps It's also possible to convert integer or float epoch times. The default unit for these is nanoseconds (since these are how ``Timestamp`` s are stored). However, -often epochs are stored in another ``unit`` which can be specified: +often epochs are stored in another ``unit`` which can be specified. These are computed +from the starting point specified by the :ref:`Origin Parameter `. Typical epoch stored units @@ -276,6 +277,29 @@ These *work*, but the results may be unexpected. Epoch times will be rounded to the nearest nanosecond. +.. _timeseries.origin: + +Using the Origin Parameter +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. versionadded:: 0.20.0 + +Using the ``origin`` parameter, one can specify an alternative starting point for creation +of a ``DatetimeIndex``. + +Start with 1960-01-01 as the starting date + +.. ipython:: python + + pd.to_datetime([1, 2, 3], unit='D', origin=pd.Timestamp('1960-01-01')) + +The default is set at ``origin='unix'``, which defaults to ``1970-01-01 00:00:00``. +Commonly called 'unix epoch' or POSIX time. + +.. ipython:: python + + pd.to_datetime([1, 2, 3], unit='D') + .. _timeseries.daterange: Generating Ranges of Timestamps diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 781a912555e14..ceb8f0f5fabe4 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -27,7 +27,6 @@ Check the :ref:`API Changes ` and :ref:`deprecations New features ~~~~~~~~~~~~ - .. _whatsnew_0200.enhancements.dataio_dtype: ``dtype`` keyword for data IO @@ -55,6 +54,27 @@ fixed-width text files, and :func:`read_excel` for parsing Excel files. pd.read_fwf(StringIO(data)).dtypes pd.read_fwf(StringIO(data), dtype={'a':'float64', 'b':'object'}).dtypes +.. _whatsnew_0120.enhancements.datetime_origin: + +``.to_datetime()`` has gained an ``origin`` parameter +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:func:`to_datetime` has gained a new parameter, ``origin``, to define a reference date +from where to compute the resulting ``DatetimeIndex``. (:issue:`11276`, :issue:`11745`) + +Start with 1960-01-01 as the starting date + +.. ipython:: python + + pd.to_datetime([1, 2, 3], unit='D', origin=pd.Timestamp('1960-01-01')) + +The default is set at ``origin='unix'``, which defaults to ``1970-01-01 00:00:00``. +Commonly called 'unix epoch' or POSIX time. + +.. ipython:: python + + pd.to_datetime([1, 2, 3], unit='D') + .. _whatsnew_0200.enhancements.groupby_access: Groupby Enhancements @@ -317,7 +337,7 @@ Other Enhancements - ``pd.DataFrame.to_latex`` and ``pd.DataFrame.to_string`` now allow optional header aliases. (:issue:`15536`) - Re-enable the ``parse_dates`` keyword of ``read_excel`` to parse string columns as dates (:issue:`14326`) - Added ``.empty`` property to subclasses of ``Index``. (:issue:`15270`) - +- Enabled floor division for ``Timedelta`` and ``TimedeltaIndex`` (:issue:`15828`) - ``pandas.io.json.json_normalize()`` gained the option ``errors='ignore'|'raise'``; the default is ``errors='raise'`` which is backward compatible. (:issue:`14583`) - ``pandas.io.json.json_normalize()`` with an empty ``list`` will return an empty ``DataFrame`` (:issue:`15534`) - ``pandas.io.json.json_normalize()`` has gained a ``sep`` option that accepts ``str`` to separate joined fields; the default is ".", which is backward compatible. (:issue:`14883`) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 5aa8e15d0d087..cc1439711c1d4 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -3073,6 +3073,7 @@ class Timedelta(_Timedelta): return np.timedelta64(self.value, 'ns') def _validate_ops_compat(self, other): + # return True if we are compat with operating if _checknull_with_nat(other): return True @@ -3179,11 +3180,41 @@ class Timedelta(_Timedelta): __div__ = __truediv__ __rdiv__ = __rtruediv__ - def _not_implemented(self, *args, **kwargs): - return NotImplemented + def __floordiv__(self, other): + + if hasattr(other, 'dtype'): + + # work with i8 + other = other.astype('m8[ns]').astype('i8') + + return self.value // other - __floordiv__ = _not_implemented - __rfloordiv__ = _not_implemented + # integers only + if is_integer_object(other): + return Timedelta(self.value // other, unit='ns') + + if not self._validate_ops_compat(other): + return NotImplemented + + other = Timedelta(other) + if other is NaT: + return np.nan + return self.value // other.value + + def __rfloordiv__(self, other): + if hasattr(other, 'dtype'): + + # work with i8 + other = other.astype('m8[ns]').astype('i8') + return other // self.value + + if not self._validate_ops_compat(other): + return NotImplemented + + other = Timedelta(other) + if other is NaT: + return NaT + return other.value // self.value def _op_unary_method(func, name): diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index 512a3e1c38629..02630c76abb93 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -1515,3 +1515,120 @@ def test_normalize_date(): result = normalize_date(value) assert (result == datetime(2012, 9, 7)) + + +@pytest.fixture(params=['D', 's', 'ms', 'us', 'ns']) +def units(request): + return request.param + + +@pytest.fixture +def epoch_1960(): + # for origin as 1960-01-01 + return Timestamp('1960-01-01') + + +@pytest.fixture +def units_from_epochs(): + return list(range(5)) + + +@pytest.fixture(params=[epoch_1960(), epoch_1960().to_datetime(), + epoch_1960().to_datetime64(), + str(epoch_1960())]) +def epochs(request): + return request.param + + +@pytest.fixture +def julian_dates(): + return pd.date_range('2014-1-1', periods=10).to_julian_date().values + + +class TestOrigin(object): + + def test_to_basic(self, julian_dates): + # gh-11276, gh-11745 + # for origin as julian + + result = Series(pd.to_datetime( + julian_dates, unit='D', origin='julian')) + expected = Series(pd.to_datetime( + julian_dates - pd.Timestamp(0).to_julian_date(), unit='D')) + assert_series_equal(result, expected) + + result = Series(pd.to_datetime( + [0, 1, 2], unit='D', origin='unix')) + expected = Series([Timestamp('1970-01-01'), + Timestamp('1970-01-02'), + Timestamp('1970-01-03')]) + assert_series_equal(result, expected) + + # default + result = Series(pd.to_datetime( + [0, 1, 2], unit='D')) + expected = Series([Timestamp('1970-01-01'), + Timestamp('1970-01-02'), + Timestamp('1970-01-03')]) + assert_series_equal(result, expected) + + def test_julian_round_trip(self): + result = pd.to_datetime(2456658, origin='julian', unit='D') + assert result.to_julian_date() == 2456658 + + # out-of-bounds + with pytest.raises(ValueError): + pd.to_datetime(1, origin="julian", unit='D') + + def test_invalid_unit(self, units, julian_dates): + + # checking for invalid combination of origin='julian' and unit != D + if units != 'D': + with pytest.raises(ValueError): + pd.to_datetime(julian_dates, unit=units, origin='julian') + + def test_invalid_origin(self): + + # need to have a numeric specified + with pytest.raises(ValueError): + pd.to_datetime("2005-01-01", origin="1960-01-01") + + with pytest.raises(ValueError): + pd.to_datetime("2005-01-01", origin="1960-01-01", unit='D') + + def test_epoch(self, units, epochs, epoch_1960, units_from_epochs): + + expected = Series( + [pd.Timedelta(x, unit=units) + + epoch_1960 for x in units_from_epochs]) + + result = Series(pd.to_datetime( + units_from_epochs, unit=units, origin=epochs)) + assert_series_equal(result, expected) + + @pytest.mark.parametrize("origin, exc", + [('random_string', ValueError), + ('epoch', ValueError), + ('13-24-1990', ValueError), + (datetime(1, 1, 1), tslib.OutOfBoundsDatetime)]) + def test_invalid_origins(self, origin, exc, units, units_from_epochs): + + with pytest.raises(exc): + pd.to_datetime(units_from_epochs, unit=units, + origin=origin) + + def test_processing_order(self): + # make sure we handle out-of-bounds *before* + # constructing the dates + + result = pd.to_datetime(200 * 365, unit='D') + expected = Timestamp('2169-11-13 00:00:00') + assert result == expected + + result = pd.to_datetime(200 * 365, unit='D', origin='1870-01-01') + expected = Timestamp('2069-11-13 00:00:00') + assert result == expected + + result = pd.to_datetime(300 * 365, unit='D', origin='1870-01-01') + expected = Timestamp('2169-10-20 00:00:00') + assert result == expected diff --git a/pandas/tests/indexes/timedeltas/test_ops.py b/pandas/tests/indexes/timedeltas/test_ops.py index 2e9f11297dc83..36aac8cafecc1 100644 --- a/pandas/tests/indexes/timedeltas/test_ops.py +++ b/pandas/tests/indexes/timedeltas/test_ops.py @@ -284,6 +284,12 @@ def test_ops_compat(self): result = rng / offset tm.assert_index_equal(result, expected, exact=False) + # floor divide + expected = Int64Index((np.arange(10) + 1) * 12, name='foo') + for offset in offsets: + result = rng // offset + tm.assert_index_equal(result, expected, exact=False) + # divide with nats rng = TimedeltaIndex(['1 days', pd.NaT, '2 days'], name='foo') expected = Float64Index([12, np.nan, 24], name='foo') @@ -867,10 +873,12 @@ def test_ops(self): self.assertEqual(td * 2, Timedelta(20, unit='d')) self.assertTrue((td * pd.NaT) is pd.NaT) self.assertEqual(td / 2, Timedelta(5, unit='d')) + self.assertEqual(td // 2, Timedelta(5, unit='d')) self.assertEqual(abs(td), td) self.assertEqual(abs(-td), td) self.assertEqual(td / td, 1) self.assertTrue((td / pd.NaT) is np.nan) + self.assertTrue((td // pd.NaT) is np.nan) # invert self.assertEqual(-td, Timedelta('-10d')) @@ -878,9 +886,6 @@ def test_ops(self): self.assertEqual(-1 * td, Timedelta('-10d')) self.assertEqual(abs(-td), Timedelta('10d')) - # invalid - self.assertRaises(TypeError, lambda: Timedelta(11, unit='d') // 2) - # invalid multiply with another timedelta self.assertRaises(TypeError, lambda: td * td) @@ -991,7 +996,7 @@ class Other: self.assertTrue(td.__sub__(other) is NotImplemented) self.assertTrue(td.__truediv__(other) is NotImplemented) self.assertTrue(td.__mul__(other) is NotImplemented) - self.assertTrue(td.__floordiv__(td) is NotImplemented) + self.assertTrue(td.__floordiv__(other) is NotImplemented) def test_ops_error_str(self): # GH 13624 diff --git a/pandas/tests/scalar/test_timedelta.py b/pandas/tests/scalar/test_timedelta.py index c2b895925b685..c22d1d2329fba 100644 --- a/pandas/tests/scalar/test_timedelta.py +++ b/pandas/tests/scalar/test_timedelta.py @@ -216,6 +216,7 @@ def test_conversion(self): def test_freq_conversion(self): + # truediv td = Timedelta('1 days 2 hours 3 ns') result = td / np.timedelta64(1, 'D') self.assertEqual(result, td.value / float(86400 * 1e9)) @@ -224,6 +225,15 @@ def test_freq_conversion(self): result = td / np.timedelta64(1, 'ns') self.assertEqual(result, td.value) + # floordiv + td = Timedelta('1 days 2 hours 3 ns') + result = td // np.timedelta64(1, 'D') + self.assertEqual(result, 1) + result = td // np.timedelta64(1, 's') + self.assertEqual(result, 93600) + result = td // np.timedelta64(1, 'ns') + self.assertEqual(result, td.value) + def test_fields(self): def check(value): # that we are int/long like diff --git a/pandas/tseries/tdi.py b/pandas/tseries/tdi.py index 5d062dd38f9fc..d0f373fcc5a45 100644 --- a/pandas/tseries/tdi.py +++ b/pandas/tseries/tdi.py @@ -326,7 +326,7 @@ def _add_delta(self, delta): def _evaluate_with_timedelta_like(self, other, op, opstr): # allow division by a timedelta - if opstr in ['__div__', '__truediv__']: + if opstr in ['__div__', '__truediv__', '__floordiv__']: if _is_convertible_to_td(other): other = Timedelta(other) if isnull(other): @@ -334,7 +334,10 @@ def _evaluate_with_timedelta_like(self, other, op, opstr): "division by pd.NaT not implemented") i8 = self.asi8 - result = i8 / float(other.value) + if opstr in ['__floordiv__']: + result = i8 // other.value + else: + result = op(i8, float(other.value)) result = self._maybe_mask_results(result, convert='float64') return Index(result, name=self.name, copy=False) diff --git a/pandas/tseries/tools.py b/pandas/tseries/tools.py index 5dc9746c6d6f9..d0f1671f9e309 100644 --- a/pandas/tseries/tools.py +++ b/pandas/tseries/tools.py @@ -9,7 +9,11 @@ is_datetime64_dtype, is_datetime64tz_dtype, is_integer_dtype, - is_list_like) + is_integer, + is_float, + is_list_like, + is_scalar, + is_numeric_dtype) from pandas.types.generic import (ABCIndexClass, ABCSeries, ABCDataFrame) from pandas.types.missing import notnull @@ -177,7 +181,7 @@ def _guess_datetime_format_for_array(arr, **kwargs): def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False, utc=None, box=True, format=None, exact=True, - unit=None, infer_datetime_format=False): + unit=None, infer_datetime_format=False, origin='unix'): """ Convert argument to datetime. @@ -229,13 +233,27 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False, - If False, allow the format to match anywhere in the target string. unit : string, default 'ns' - unit of the arg (D,s,ms,us,ns) denote the unit in epoch - (e.g. a unix timestamp), which is an integer/float number. + unit of the arg (D,s,ms,us,ns) denote the unit, which is an + integer or float number. This will be based off the origin. + Example, with unit='ms' and origin='unix' (the default), this + would calculate the number of milliseconds to the unix epoch start. infer_datetime_format : boolean, default False If True and no `format` is given, attempt to infer the format of the datetime strings, and if it can be inferred, switch to a faster method of parsing them. In some cases this can increase the parsing speed by ~5-10x. + origin : scalar, default is 'unix' + Define the reference date. The numeric values would be parsed as number + of units (defined by `unit`) since this reference date. + + - If 'unix' (or POSIX) time; origin is set to 1970-01-01. + - If 'julian', unit must be 'D', and origin is set to beginning of + Julian Calendar. Julian day number 0 is assigned to the day starting + at noon on January 1, 4713 BC. + - If Timestamp convertible, origin is set to Timestamp identified by + origin. + + .. versionadded: 0.20.0 Returns ------- @@ -297,8 +315,15 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False, >>> %timeit pd.to_datetime(s,infer_datetime_format=False) 1 loop, best of 3: 471 ms per loop - """ + Using a non-unix epoch origin + + >>> pd.to_datetime([1, 2, 3], unit='D', + origin=pd.Timestamp('1960-01-01')) + 0 1960-01-02 + 1 1960-01-03 + 2 1960-01-04 + """ from pandas.tseries.index import DatetimeIndex tz = 'utc' if utc else None @@ -410,21 +435,77 @@ def _convert_listlike(arg, box, format, name=None, tz=tz): raise e if arg is None: - return arg - elif isinstance(arg, tslib.Timestamp): - return arg + return None + + # handle origin + if origin == 'julian': + + original = arg + j0 = tslib.Timestamp(0).to_julian_date() + if unit != 'D': + raise ValueError("unit must be 'D' for origin='julian'") + try: + arg = arg - j0 + except: + raise ValueError("incompatible 'arg' type for given " + "'origin'='julian'") + + # premptively check this for a nice range + j_max = tslib.Timestamp.max.to_julian_date() - j0 + j_min = tslib.Timestamp.min.to_julian_date() - j0 + if np.any(arg > j_max) or np.any(arg < j_min): + raise tslib.OutOfBoundsDatetime( + "{original} is Out of Bounds for " + "origin='julian'".format(original=original)) + + elif origin not in ['unix', 'julian']: + + # arg must be a numeric + original = arg + if not ((is_scalar(arg) and (is_integer(arg) or is_float(arg))) or + is_numeric_dtype(np.asarray(arg))): + raise ValueError( + "'{arg}' is not compatible with origin='{origin}'; " + "it must be numeric with a unit specified ".format( + arg=arg, + origin=origin)) + + # we are going to offset back to unix / epoch time + try: + offset = tslib.Timestamp(origin) - tslib.Timestamp(0) + except tslib.OutOfBoundsDatetime: + raise tslib.OutOfBoundsDatetime( + "origin {} is Out of Bounds".format(origin)) + except ValueError: + raise ValueError("origin {} cannot be converted " + "to a Timestamp".format(origin)) + + # convert the offset to the unit of the arg + # this should be lossless in terms of precision + offset = offset // tslib.Timedelta(1, unit=unit) + + # scalars & ndarray-like can handle the addition + if is_list_like(arg) and not isinstance( + arg, (ABCSeries, ABCIndexClass, np.ndarray)): + arg = np.asarray(arg) + arg = arg + offset + + if isinstance(arg, tslib.Timestamp): + result = arg elif isinstance(arg, ABCSeries): from pandas import Series values = _convert_listlike(arg._values, False, format) - return Series(values, index=arg.index, name=arg.name) + result = Series(values, index=arg.index, name=arg.name) elif isinstance(arg, (ABCDataFrame, MutableMapping)): - return _assemble_from_unit_mappings(arg, errors=errors) + result = _assemble_from_unit_mappings(arg, errors=errors) elif isinstance(arg, ABCIndexClass): - return _convert_listlike(arg, box, format, name=arg.name) + result = _convert_listlike(arg, box, format, name=arg.name) elif is_list_like(arg): - return _convert_listlike(arg, box, format) + result = _convert_listlike(arg, box, format) + else: + result = _convert_listlike(np.array([arg]), box, format)[0] - return _convert_listlike(np.array([arg]), box, format)[0] + return result # mappings for assembling units From f49f9058d152efc9a309e01541762407e16dc953 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Mon, 3 Apr 2017 08:24:23 -0400 Subject: [PATCH 320/933] DOC: doc fix for feather_format error message text --- pandas/io/feather_format.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/pandas/io/feather_format.py b/pandas/io/feather_format.py index ac74ac4823613..de6d04c105376 100644 --- a/pandas/io/feather_format.py +++ b/pandas/io/feather_format.py @@ -56,15 +56,16 @@ def to_feather(df, path): # raise on anything else as we don't serialize the index if not isinstance(df.index, Int64Index): - raise ValueError("feather does not serializing {} " + raise ValueError("feather does not support serializing {} " "for the index; you can .reset_index()" "to make the index into column(s)".format( type(df.index))) if not df.index.equals(RangeIndex.from_range(range(len(df)))): - raise ValueError("feather does not serializing a non-default index " - "for the index; you can .reset_index()" - "to make the index into column(s)") + raise ValueError("feather does not support serializing a " + "non-default index for the index; you " + "can .reset_index() to make the index " + "into column(s)") if df.index.name is not None: raise ValueError("feather does not serialize index meta-data on a " From 7059d898511a62710d6bd6487c8b40d7f535c1a1 Mon Sep 17 00:00:00 2001 From: funnycrab Date: Mon, 3 Apr 2017 08:41:29 -0400 Subject: [PATCH 321/933] BUG: Fix rollover handling in json encoding closes #15716 closes #15864 whenever the frac is incremented, there is a chance that its value may hit the value of pow10. Author: funnycrab Author: Funnycrab Closes #15865 from funnycrab/fix_rollover_handling_in_json_enc and squashes the following commits: c9710ee [funnycrab] add more tests for examples listed in issue #15716 and #15864 3cee6b3 [funnycrab] add whatsnew entry 9b0dff0 [funnycrab] remove additional blank line 75effb4 [funnycrab] add tests 6acb969 [funnycrab] fix for cpplint aec58e6 [Funnycrab] BUG: Fix rollover handling in json encoding --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/_libs/src/ujson/lib/ultrajsonenc.c | 12 ++++--- pandas/tests/io/json/test_pandas.py | 25 ++++++++++++++ pandas/tests/io/json/test_ujson.py | 42 +++++++++++++++++++++++ 4 files changed, 75 insertions(+), 5 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index ceb8f0f5fabe4..63aea96ef3369 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -1010,6 +1010,7 @@ I/O - Bug in ``pd.read_hdf()`` passing a ``Timestamp`` to the ``where`` parameter with a non date column (:issue:`15492`) - Bug in ``DataFrame.to_stata()`` and ``StataWriter`` which produces incorrectly formatted files to be produced for some locales (:issue:`13856`) - Bug in ``StataReader`` and ``StataWriter`` which allows invalid encodings (:issue:`15723`) +- Bug in ``pd.to_json()`` for the C engine where rollover was not correctly handled for case where frac is odd and diff is exactly 0.5 (:issue:`15716`, :issue:`15864`) Plotting ^^^^^^^^ diff --git a/pandas/_libs/src/ujson/lib/ultrajsonenc.c b/pandas/_libs/src/ujson/lib/ultrajsonenc.c index 5a15071938c1a..6bf2297749006 100644 --- a/pandas/_libs/src/ujson/lib/ultrajsonenc.c +++ b/pandas/_libs/src/ujson/lib/ultrajsonenc.c @@ -823,17 +823,19 @@ int Buffer_AppendDoubleUnchecked(JSOBJ obj, JSONObjectEncoder *enc, if (diff > 0.5) { ++frac; - /* handle rollover, e.g. case 0.99 with prec 1 is 1.0 */ - if (frac >= pow10) { - frac = 0; - ++whole; - } } else if (diff == 0.5 && ((frac == 0) || (frac & 1))) { /* if halfway, round up if odd, OR if last digit is 0. That last part is strange */ ++frac; } + // handle rollover, e.g. + // case 0.99 with prec 1 is 1.0 and case 0.95 with prec is 1.0 as well + if (frac >= pow10) { + frac = 0; + ++whole; + } + if (enc->doublePrecision == 0) { diff = value - whole; diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 7dbcf25c60b45..8fc8ecbdf8abc 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -380,6 +380,31 @@ def test_frame_from_json_nones(self): unser = read_json(df.to_json(), dtype=False) self.assertTrue(np.isnan(unser[2][0])) + def test_frame_to_json_float_precision(self): + df = pd.DataFrame([dict(a_float=0.95)]) + encoded = df.to_json(double_precision=1) + self.assertEqual(encoded, '{"a_float":{"0":1.0}}') + + df = pd.DataFrame([dict(a_float=1.95)]) + encoded = df.to_json(double_precision=1) + self.assertEqual(encoded, '{"a_float":{"0":2.0}}') + + df = pd.DataFrame([dict(a_float=-1.95)]) + encoded = df.to_json(double_precision=1) + self.assertEqual(encoded, '{"a_float":{"0":-2.0}}') + + df = pd.DataFrame([dict(a_float=0.995)]) + encoded = df.to_json(double_precision=2) + self.assertEqual(encoded, '{"a_float":{"0":1.0}}') + + df = pd.DataFrame([dict(a_float=0.9995)]) + encoded = df.to_json(double_precision=3) + self.assertEqual(encoded, '{"a_float":{"0":1.0}}') + + df = pd.DataFrame([dict(a_float=0.99999999999999944)]) + encoded = df.to_json(double_precision=15) + self.assertEqual(encoded, '{"a_float":{"0":1.0}}') + def test_frame_to_json_except(self): df = DataFrame([1, 2, 3]) self.assertRaises(ValueError, df.to_json, orient="garbage") diff --git a/pandas/tests/io/json/test_ujson.py b/pandas/tests/io/json/test_ujson.py index e66721beed288..c2cbbe1ca65ab 100644 --- a/pandas/tests/io/json/test_ujson.py +++ b/pandas/tests/io/json/test_ujson.py @@ -43,6 +43,48 @@ def test_encodeDecimal(self): decoded = ujson.decode(encoded) self.assertEqual(decoded, 1337.1337) + sut = decimal.Decimal("0.95") + encoded = ujson.encode(sut, double_precision=1) + self.assertEqual(encoded, "1.0") + decoded = ujson.decode(encoded) + self.assertEqual(decoded, 1.0) + + sut = decimal.Decimal("0.94") + encoded = ujson.encode(sut, double_precision=1) + self.assertEqual(encoded, "0.9") + decoded = ujson.decode(encoded) + self.assertEqual(decoded, 0.9) + + sut = decimal.Decimal("1.95") + encoded = ujson.encode(sut, double_precision=1) + self.assertEqual(encoded, "2.0") + decoded = ujson.decode(encoded) + self.assertEqual(decoded, 2.0) + + sut = decimal.Decimal("-1.95") + encoded = ujson.encode(sut, double_precision=1) + self.assertEqual(encoded, "-2.0") + decoded = ujson.decode(encoded) + self.assertEqual(decoded, -2.0) + + sut = decimal.Decimal("0.995") + encoded = ujson.encode(sut, double_precision=2) + self.assertEqual(encoded, "1.0") + decoded = ujson.decode(encoded) + self.assertEqual(decoded, 1.0) + + sut = decimal.Decimal("0.9995") + encoded = ujson.encode(sut, double_precision=3) + self.assertEqual(encoded, "1.0") + decoded = ujson.decode(encoded) + self.assertEqual(decoded, 1.0) + + sut = decimal.Decimal("0.99999999999999944") + encoded = ujson.encode(sut, double_precision=15) + self.assertEqual(encoded, "1.0") + decoded = ujson.decode(encoded) + self.assertEqual(decoded, 1.0) + def test_encodeStringConversion(self): input = "A string \\ / \b \f \n \r \t &" not_html_encoded = ('"A string \\\\ \\/ \\b \\f \\n ' From 4cb730e95353f414bb47f571a69781746bd3e84b Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Mon, 3 Apr 2017 09:01:59 -0400 Subject: [PATCH 322/933] Revert "CI: add jdcal to 3.6 build as openpyxl >= 2.4.5 is broken" (#15875) This reverts commit d1e1ba08ef259724ba71e0953c52e8e4ad81bd17. closes #15861 --- ci/requirements-3.6.run | 3 --- 1 file changed, 3 deletions(-) diff --git a/ci/requirements-3.6.run b/ci/requirements-3.6.run index 8f81c4620558e..41c9680ce1b7e 100644 --- a/ci/requirements-3.6.run +++ b/ci/requirements-3.6.run @@ -2,10 +2,7 @@ python-dateutil pytz numpy scipy -# openpyxl >= 2.4.5 should be dependent on jdcal -# but is not for some reason openpyxl -jdcal xlsxwriter xlrd xlwt From b199fbffe2590a148eec8ebc38751c43d5c7c361 Mon Sep 17 00:00:00 2001 From: Tong SHEN Date: Tue, 4 Apr 2017 00:06:46 +0800 Subject: [PATCH 323/933] DOC: Fix a typo in dsintro.rst (#15877) --- doc/source/dsintro.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/dsintro.rst b/doc/source/dsintro.rst index cc69367017aed..4fcb63c18757a 100644 --- a/doc/source/dsintro.rst +++ b/doc/source/dsintro.rst @@ -153,7 +153,7 @@ Vectorized operations and label alignment with Series ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ When doing data analysis, as with raw NumPy arrays looping through Series -value-by-value is usually not necessary. Series can be also be passed into most +value-by-value is usually not necessary. Series can also be passed into most NumPy methods expecting an ndarray. From ed07df196e08a183c162c0e91a12f4f203d41041 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Mon, 3 Apr 2017 12:31:55 -0400 Subject: [PATCH 324/933] CLN: Remove "flake8: noqa" from even more files Another round of house-cleaning that builds off #15867. Likely to be the last one for now. Author: gfyoung Closes #15872 from gfyoung/flake8-noqa-clean and squashes the following commits: 3e610f5 [gfyoung] CLN: Make pickle_compat.py flake8-able 05e067a [gfyoung] CLN: Make windows.py flake8-able dc22c0a [gfyoung] CLN: Make clipboards.py flake8-able 90b00f0 [gfyoung] CLN: Make clipboard/__init__.py flake8-able ccb44cc [gfyoung] CLN: Make engines.py flake8-able --- pandas/compat/pickle_compat.py | 54 ++++++++++++++++++----------- pandas/computation/engines.py | 8 ++--- pandas/util/clipboard/__init__.py | 8 ++--- pandas/util/clipboard/clipboards.py | 4 +-- pandas/util/clipboard/windows.py | 1 - 5 files changed, 41 insertions(+), 34 deletions(-) diff --git a/pandas/compat/pickle_compat.py b/pandas/compat/pickle_compat.py index 279a82fea1cc2..5b4fcad252192 100644 --- a/pandas/compat/pickle_compat.py +++ b/pandas/compat/pickle_compat.py @@ -1,13 +1,13 @@ -""" support pre 0.12 series pickle compatibility """ - -# flake8: noqa +""" +Support pre-0.12 series pickle compatibility. +""" import sys -import pandas +import pandas # noqa import copy import pickle as pkl from pandas import compat, Index -from pandas.compat import u, string_types +from pandas.compat import u, string_types # noqa def load_reduce(self): @@ -16,17 +16,19 @@ def load_reduce(self): func = stack[-1] if type(args[0]) is type: - n = args[0].__name__ + n = args[0].__name__ # noqa try: stack[-1] = func(*args) return except Exception as e: - # if we have a deprecated function - # try to replace and try again + # If we have a deprecated function, + # try to replace and try again. + + msg = '_reconstruct: First argument must be a sub-type of ndarray' - if '_reconstruct: First argument must be a sub-type of ndarray' in str(e): + if msg in str(e): try: cls = args[0] stack[-1] = object.__new__(cls) @@ -34,7 +36,7 @@ def load_reduce(self): except: pass - # try to reencode the arguments + # try to re-encode the arguments if getattr(self, 'encoding', None) is not None: args = tuple([arg.encode(self.encoding) if isinstance(arg, string_types) @@ -45,31 +47,37 @@ def load_reduce(self): except: pass + # unknown exception, re-raise if getattr(self, 'is_verbose', None): print(sys.exc_info()) print(func, args) raise - stack[-1] = value - -# if classes are moved, provide compat here +# If classes are moved, provide compat here. _class_locations_map = { # 15477 - ('pandas.core.base', 'FrozenNDArray'): ('pandas.indexes.frozen', 'FrozenNDArray'), - ('pandas.core.base', 'FrozenList'): ('pandas.indexes.frozen', 'FrozenList'), + ('pandas.core.base', 'FrozenNDArray'): + ('pandas.indexes.frozen', 'FrozenNDArray'), + ('pandas.core.base', 'FrozenList'): + ('pandas.indexes.frozen', 'FrozenList'), # 10890 - ('pandas.core.series', 'TimeSeries'): ('pandas.core.series', 'Series'), - ('pandas.sparse.series', 'SparseTimeSeries'): ('pandas.sparse.series', 'SparseSeries'), + ('pandas.core.series', 'TimeSeries'): + ('pandas.core.series', 'Series'), + ('pandas.sparse.series', 'SparseTimeSeries'): + ('pandas.sparse.series', 'SparseSeries'), # 12588, extensions moving - ('pandas._sparse', 'BlockIndex'): ('pandas.sparse.libsparse', 'BlockIndex'), - ('pandas.tslib', 'Timestamp'): ('pandas._libs.tslib', 'Timestamp'), - ('pandas.tslib', '__nat_unpickle'): ('pandas._libs.tslib', '__nat_unpickle'), + ('pandas._sparse', 'BlockIndex'): + ('pandas.sparse.libsparse', 'BlockIndex'), + ('pandas.tslib', 'Timestamp'): + ('pandas._libs.tslib', 'Timestamp'), + ('pandas.tslib', '__nat_unpickle'): + ('pandas._libs.tslib', '__nat_unpickle'), ('pandas._period', 'Period'): ('pandas._libs.period', 'Period') - } +} # our Unpickler sub-class to override methods and some dispatcher @@ -112,6 +120,8 @@ def load_newobj(self): obj = cls.__new__(cls, *args) self.stack[-1] = obj + + Unpickler.dispatch[pkl.NEWOBJ[0]] = load_newobj @@ -126,6 +136,8 @@ def load_newobj_ex(self): else: obj = cls.__new__(cls, *args, **kwargs) self.append(obj) + + try: Unpickler.dispatch[pkl.NEWOBJ_EX[0]] = load_newobj_ex except: diff --git a/pandas/computation/engines.py b/pandas/computation/engines.py index a3de78c2f2089..aebc5bb02d59d 100644 --- a/pandas/computation/engines.py +++ b/pandas/computation/engines.py @@ -1,13 +1,11 @@ -"""Engine classes for :func:`~pandas.eval` """ - -# flake8: noqa +Engine classes for :func:`~pandas.eval` +""" import abc from pandas import compat -from pandas.compat import DeepChainMap, map -import pandas.core.common as com +from pandas.compat import map import pandas.formats.printing as printing from pandas.computation.align import _align, _reconstruct_object from pandas.computation.ops import (UndefinedVariableError, diff --git a/pandas/util/clipboard/__init__.py b/pandas/util/clipboard/__init__.py index 9e2b2faf858db..4066a3be5e850 100644 --- a/pandas/util/clipboard/__init__.py +++ b/pandas/util/clipboard/__init__.py @@ -25,8 +25,6 @@ """ __version__ = '1.5.27' -# flake8: noqa - import platform import os import subprocess @@ -62,14 +60,16 @@ def determine_clipboard(): if HAS_DISPLAY: # Determine which command/module is installed, if any. try: - import gtk # check if gtk is installed + # Check if gtk is installed + import gtk # noqa except ImportError: pass else: return init_gtk_clipboard() try: - import PyQt4 # check if PyQt4 is installed + # Check if PyQt4 is installed + import PyQt4 # noqa except ImportError: pass else: diff --git a/pandas/util/clipboard/clipboards.py b/pandas/util/clipboard/clipboards.py index bd5528334168f..e32380a383374 100644 --- a/pandas/util/clipboard/clipboards.py +++ b/pandas/util/clipboard/clipboards.py @@ -1,5 +1,3 @@ -# flake8: noqa - import sys import subprocess from .exceptions import PyperclipException @@ -8,7 +6,7 @@ Pyperclip could not find a copy/paste mechanism for your system. For more information, please visit https://pyperclip.readthedocs.org """ PY2 = sys.version_info[0] == 2 -text_type = unicode if PY2 else str +text_type = unicode if PY2 else str # noqa def init_osx_clipboard(): diff --git a/pandas/util/clipboard/windows.py b/pandas/util/clipboard/windows.py index 5c9be9ddaf508..5fc23f7102f41 100644 --- a/pandas/util/clipboard/windows.py +++ b/pandas/util/clipboard/windows.py @@ -1,4 +1,3 @@ -# flake8: noqa """ This module implements clipboard handling on Windows using ctypes. """ From 456e729384c315c291ef92bb150bcc4f79a22bdf Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Mon, 3 Apr 2017 13:25:55 -0400 Subject: [PATCH 325/933] DOC: remove gbq_integration instructions from contributing.rst (#15879) DOC: remove vbench instructions from contributing.rst --- doc/source/contributing.rst | 83 +++---------------------------------- 1 file changed, 5 insertions(+), 78 deletions(-) diff --git a/doc/source/contributing.rst b/doc/source/contributing.rst index 5e551a7fd5349..83cc1777b35f6 100644 --- a/doc/source/contributing.rst +++ b/doc/source/contributing.rst @@ -616,23 +616,23 @@ Or with one of the following constructs:: pytest pandas/tests/[test-module].py::[TestClass] pytest pandas/tests/[test-module].py::[TestClass]::[test_method] -Using `pytest-xdist `_, one can +Using `pytest-xdist `_, one can speed up local testing on multicore machines. To use this feature, you will need to install `pytest-xdist` via:: pip install pytest-xdist - -Two scripts are provided to assist with this. These scripts distribute + +Two scripts are provided to assist with this. These scripts distribute testing across 4 threads. On Unix variants, one can type:: test_fast.sh - + On Windows, one can type:: test_fast.bat - + This can significantly reduce the time it takes to locally run tests before submitting a pull request. @@ -657,12 +657,6 @@ to enable easy monitoring of the performance of critical *pandas* operations. These benchmarks are all found in the ``pandas/asv_bench`` directory. asv supports both python2 and python3. -.. note:: - - The asv benchmark suite was translated from the previous framework, vbench, - so many stylistic issues are likely a result of automated transformation of the - code. - To use all features of asv, you will need either ``conda`` or ``virtualenv``. For more details please check the `asv installation webpage `_. @@ -722,73 +716,6 @@ This will display stderr from the benchmarks, and use your local Information on how to write a benchmark and how to use asv can be found in the `asv documentation `_. -.. _contributing.gbq_integration_tests: - -Running Google BigQuery Integration Tests -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -You will need to create a Google BigQuery private key in JSON format in -order to run Google BigQuery integration tests on your local machine and -on Travis-CI. The first step is to create a `service account -`__. - -Integration tests for ``pandas.io.gbq`` are skipped in pull requests because -the credentials that are required for running Google BigQuery integration -tests are `encrypted `__ -on Travis-CI and are only accessible from the pandas-dev/pandas repository. The -credentials won't be available on forks of pandas. Here are the steps to run -gbq integration tests on a forked repository: - -#. Go to `Travis CI `__ and sign in with your GitHub - account. -#. Click on the ``+`` icon next to the ``My Repositories`` list and enable - Travis builds for your fork. -#. Click on the gear icon to edit your travis build, and add two environment - variables: - - - ``GBQ_PROJECT_ID`` with the value being the ID of your BigQuery project. - - - ``SERVICE_ACCOUNT_KEY`` with the value being the contents of the JSON key - that you downloaded for your service account. Use single quotes around - your JSON key to ensure that it is treated as a string. - - For both environment variables, keep the "Display value in build log" option - DISABLED. These variables contain sensitive data and you do not want their - contents being exposed in build logs. -#. Your branch should be tested automatically once it is pushed. You can check - the status by visiting your Travis branches page which exists at the - following location: https://travis-ci.org/your-user-name/pandas/branches . - Click on a build job for your branch. Expand the following line in the - build log: ``ci/print_skipped.py /tmp/pytest.xml`` . Search for the - term ``test_gbq`` and confirm that gbq integration tests are not skipped. - -Running the vbench performance test suite (phasing out) -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Historically, *pandas* used `vbench library `_ -to enable easy monitoring of the performance of critical *pandas* operations. -These benchmarks are all found in the ``pandas/vb_suite`` directory. vbench -currently only works on python2. - -To install vbench:: - - pip install git+https://github.com/pydata/vbench - -Vbench also requires ``sqlalchemy``, ``gitpython``, and ``psutil``, which can all be installed -using pip. If you need to run a benchmark, change your directory to the *pandas* root and run:: - - ./test_perf.sh -b master -t HEAD - -This will check out the master revision and run the suite on both master and -your commit. Running the full test suite can take up to one hour and use up -to 3GB of RAM. Usually it is sufficient to paste a subset of the results into the Pull Request to show that the committed changes do not cause unexpected -performance regressions. - -You can run specific benchmarks using the ``-r`` flag, which takes a regular expression. - -See the `performance testing wiki `_ for information -on how to write a benchmark. - Documenting your code --------------------- From ca7207f6472ec50424054e899fe51d012d950c1e Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Mon, 3 Apr 2017 14:26:36 -0400 Subject: [PATCH 326/933] DOC: update contributing.rst for ci (#15880) * DOC: update contributing.rst for ci * typos & auto-cancel links * make it a note * add back accid deleted section --- doc/source/_static/ci.png | Bin 0 -> 224055 bytes doc/source/contributing.rst | 56 +++++++++++++++++++++++++----------- 2 files changed, 39 insertions(+), 17 deletions(-) create mode 100644 doc/source/_static/ci.png diff --git a/doc/source/_static/ci.png b/doc/source/_static/ci.png new file mode 100644 index 0000000000000000000000000000000000000000..82985ff8c204abcae6eead66d676cb880f92d3af GIT binary patch literal 224055 zcmeFYby!@@vM-DT2th*1> z=iBE!_q*rp=l*}^dDg66y{f8L{kpn(bycq)DoWB=uZUkEARu7L%1EjqAfUD(ARsrs zM18JliROKSfPhJ8BO#$8D|L)Xr8Lo4l?zHd0Ylo#_q4N_31vXgSgz z^@OHg`!&06kNVSc9uFf>Iu7Bzc0b_`d-mWev zrhXW@x-U_XL_dV{eTER5D#g_nexUvMo1_*8!Te)5jRK;mQbu-aztUIW#)}x{TT*h0 zGo=BC0}XsM7n}{*)KuM=&61eN`1Yarv!)E$bw1tl1m~ov&!*-ShEbG3+{b@1qW;*Y z66MthyP4z`y+VH9V-|9*m6-MJgXK>$n=QKF5HYPlpV83xP{W7s9J7$dK--OaTP-c<0*PN^t$&_=lQ^gh_RJ%K}NOauV!tT z>FXqX`0>N%7~e3piM_c*xIb!@z7b094@VDXLTjd`6XLM0KMZ=%I3BAZ%%vAE*QGdH zFwZsm#)YarGE785VA*9Ud#4tB5Svc?j?zB7*V%O~u%FB6mvFlp7Gda6H2HK$*YxH) z_W4GcnAgI@(e~SxH_|fOL;?x%1bj1%j07xfyMo3zlRE0%g?BZcfr}^fB|M~PU7aL1 z2(-9eaW85`iB}qMxG+17CWu8kk;G$=es%gxan8KgQtJ+d%EaJ2ed|HtTR&|3(C{S& z?a`yVaLdAXjPP@)O ztKyEzB;I+?+fI42^?UqB!%2VN2{XkidT#tlj8o8<#4jwTNUJ!IRLU0@ zopyft`SO8$_x+|$5sy0QN#!TV7WBAy7rR%7LaFC9*~zEvGD%Q z%8^VoV5P5Ms;95Vhd}w%E-ymiNEmm_^ZVDsNU|?6Lb2(N!tIvXE~3)|I>LqLvX6Jw z>XjZw=MU^gnH0Lq)ipFkl>os-B3n_69YnKFv;bqAF9B#LDC%N_-;qv@@iQ(pV$e0A zh!~w}DlgmqVO{DMffg8B;sCPm?eh}tE<7D4$EJ(tQ& z5^G!v6PL4x*jaKB*XRS`57D()$qy*2)bGs^YcUzbCf{m|aR;j&&G##| z2||1?VG6co6rU+vhcGFC#!TGqwN^x;Numz(%!@z!0395j0NG+*%T8R{URCu&S79fHKDdA6a$a-2w0vPDPDvFNBc38b|32aM6<*Zq(Qfdhw3qY; zJR(WzJmkgn(;luhpS6!`Jf>u)Ts84G-#SL< z0gkzDEzexqfwU6k`5?*$$C=-mtP8;{15igwtWUI0msTN!np4>_a+o}oHnohym!tdb z=djnkiFlu7^O*|mil)>?)sIx+0}#j_gnv*eesho|c8LijbCqXTbs$}eH=VGjm?`#7 zRa^a9RaxCpeR<59OCk-8`D>EAzG7KXW}#4yZ%qD>?f?qJc>{k)b6|SNeGr_G9LpPX zosdnF&1yvJFZopZ5D*vbxHBySQc9{k%m*ddteb4y;LX^Lsl+Q;6JwCkW5dZ;h;^fp{iD+`9n3e zOfA<%yfE}DJ0{WB2+Tg>K9fGhzGC#WH-dCdbmbW;0=aVwj#0jUzi40QIOVi)yjWQm zNP@asr+YI5D+C&$5s4Ie7HJ!CL^K((6j>L^7+IB$t4OLiq1c$}mXcTGQB+d&u1I@C zg%iY?HOEsoD*H`VDgGw@Bb|G?`iRiT`7mG@#7v0ag^z-dh0jX2udI~P=HRlwQI+J8 zM3e+#I@V**Rc!cJZCmSYZ*M!&SXtffAW?T(r)xK9eGZu!P6D(pT_uq%vGh1J8-yz7@V5LG2QG-CCgz(iZ(r{hCn(nJ<+LmBKUAGKKjU1h)ApJf4Pn7ipF(I)>^P zq||?@Unu}LO7|fyOPllSeQxq@3%fJxmFuG}Rc`(}x!YZ*v-_u0lRcax=Kg2~Fp&!*@I=JJ+ve6l2%_3(83EER>KU>H9#`AWP2yat}*r(Z#&3sMe%|L6k z{mc=qcva-ka7P77nw}2_iiBxL0>0}P?L;1&VU~*-He9Yp!}4`gRu;qj?D~E@fU8(Z zaZ|gsrygiIn-&d#o`KB9nR?cV&OGq^M)wg5ASlTc~f)4H{}iI_Q03k=7(ncbA|1SjBBoC)<4O& zH$GPnNhtgnNh~`CJIzk8_vWe-c}czA;TQzh%%?_b4GFbezPraLNhpOVj>)|&fR9s5 zU({}P=>BBehHZp>`@A${kaVo`wQ8lp+Q_EPWe8H$o3S=TlSE@dldWB&ZSXX=w>=^f zU9M!f)ZXVea@Ek&@X){uYpyE$sL3?K>RT^t?)!+k6|#oq$U6%6Dj9CH>!*d#ju&N( zbfuFt?KtYTZn^?(ZzEq-g|$==>uWpsc0|@y09yO6=E>3oMW=Rnp)YJF}pD9y_9)uyXy*>3d!ob=^I^F zk8a{><|4ZvS!?ng;U9TNr=e8NC(Ebn<+7piRFXRxIEiefHY#YFy5w?gw>Y=IU6ctB zp>TUzL+QY<(ihTKHgE&a`V8JqoDM}LlT^+cl{nS`1g=__j{D~i7xt_8zt|r*?OE?- zOaN<#!2848*#Lq{G{X)T>Iw#kECm4}tZoG6A4x$T3T^^e=56qgB5- zchGMAsHk%Z-@9I6_{@w+=VHr)Uj0 zniOOrjecQ|7j9moRukJJMhfj*zF2qOjvrjjB|GtIKS(2D;&bs41;FkRa(cV#klXrI zHwvyt7n%a>Ik&+wn5o2^DXZ>qpw&O zmF>Q*^`h=PFBQ<8WOQ8-5b$aKIuK>mKAa#RAZFQU=(y=9D)5^*+OwFLJDOUsc-cEW zQzIY#yg8#YvS2gQ<%72i!0fgS`D5_9OIJ#I+a4sIr1%nq*9{}l57$&s{hHFL3XaL>4-#?$z!pr7A zTXJyy54D~ZWc}+7D?1At>%U|_vkLxI%dcYNWnrfyX=88U;QHK#Fgq^?m*77b{(rar zv&p})>i#DyA2-|IIsbO&KR5+h|I*-Z8vPTlf7CwXCHzW|^3{2>q-0YMZ&R#HsE3-MqH%jfMhnD-=G{3WGiOh64H zOCY2SPqI2vN1;&g<-2SZ#1aPku!6I^fplC+#Lt0ew8Yg|?-TpV0urUc^aM5Ejob2G z(pRa)AT?9I2&8(EbaM7!t#SW58hYGuBuuZ*ZkIJD|D6*5QVrAVl=#0<^j|b@zgr`sw!Lq;)A9PB zfcXbKYMU3<{{Y@UWlGi~ARE3m@MRMGcVIu$wW0o-4naV9uNQz=^}SLUKkMHi9e~Jc zi2QFsdClAzpe?CQfua2GI=C}{pz7bzh<^Q!Z`{e4R~8*y{GXWqpFub742S(YnjBHd zXR~@YKHdLY{l7QUe@b~aKfu4EQTc!7_upE?|E>9rB7Fu*tLk7*wX$Hl=VwlSs$%Xd zp{8>pSGD$%puQa)@Henx9r>-f`5*5EgTCyxgtD$AIsATVF_Q`4$!*bGl%w z@h9h#icut@mt_XHU?D}G92jMFj}*RY3z3n8uqzz}e;KT-zZU;4hbuOh(RNXrL8VapD*j?nd9*8_(Z}J%=VF>QZelJ?dc2ma_DF0T^P9m^>Lh>g+swe$ z6K3FB-*o2XOuoAoVQMRgXR)K`NUcP}eU2cS+~vb}-0yA5PUo!sDZz)?KfsemJR+ga zHrxBir=15&N7Ox$y<7g+QkvivakZK7;?v$1tTAyPmMQ7$g39j<>bU$7iFkqK5~lA0 zUDz=G?h`enpFL%%S;bk#dnM!JW!kmWOOzuQ-re9P!yWxA*BI0(H=7L+O1>WXPJl-V zlA*SDzQNpFbMbo>dNEl4qL_9LHf;w7+TlL1k%0Vn(WQWs)Xee(SYR#k5^_YP&(sgb zt>utZkg}?A8P;KfCL{4G^sF^ASiJJOOz3->`ZzyZbt!p&)|!tu7T%pX@umXPIg(>) zOZPyhiM&57fVPSMA>}K~nn+=&Jsfba=0?cYuZ&7|n)RY>K5_N8ac5aM6h}l;wO6b4 zsxm=zSEEkDexl}aDRdEBIox2PJAb_;bK&h5Byha2`D^Z->tbz&f(*nKou$uoH#vKS zK(k3*UpM8#SKL%{E8kW6zaZoI7c!oYX9w@aSLkaevU>T60>>W1y1q4Y#;VM$xlmNt8{1gSceOL(sfjB*G_EKDM=gsNy=Te zdt!A9oI{ZWv;K7Hd(HysO*$@sTG<1SG8O|6sW}q4FufQE3%@YkpXVo_l@@FObuwI7 zE&bMoA9i5EJtsXkuNcB<*wns$ZLH1sLJ#rwF+jzW9;nWrFN?ge1~u(nS-TR{+WAYE zXf{&2vi=to{QpA1pX?m7?3d_i(-4~%+*_Vu;hHRTaFCgKKk!K<3RMHOEl|tyH^-~D zLt{okLK6No(MhYv4jAhvo}0f2AYR_D+w|QXt5L6>4sCX2LE?KqU(*h;r5`eyrYOQZ zcAZGl!^l|K~&H>9^ zUHrc69|>hsSG7rn1d9aFshrn-g2{Mh=!&ocCeZT|uMxnAP5Ox5kXPBu9chZY+){`+ z*+8})!Zfwbd?rw^T8KnCTT9meDy&euuff{MfK;zOEQmPbr8haAHeysJlX%5^S|@IrpY zDwQi+-r#_6t~b87ot7eHI@cm8KjdzLPAgMw^794BS+r??$JKfShIb~f!!r4)%Tj68 z`&0V2I0L@ZA7*a7PqMNLY*y$ZX`pTS^izV4)&Z$+$&c)Y!vNZr z_?BcQ0?4ELd|QW|{#W_6yLI&DkbQo_7InboZP3&<5uaa$H@me@{8Z{;1|lHnQf+tj zou(HO$n#LFeR#ic2^5*m595xwAGI_g1XeLNC;QLRZN`{RhgC9eXRP>)LS(lfoGjY@ zeS_R%KT`$Q({hidT_U$+$p$%#;Fm5VknI3ghUJnxQ#)NQWZl+9==>DjIb;5+cSxdj<$;fgd%~5>+SLmfnsfQ0C+w_8 zHcLKjlH0wYUGKh`Ct*EgO;@Y>P+}P5Nn%PoBmEWAevb1H$GY~?J|Z>#MH`NY)@K0_&PF)P!M!*26X+1trv-@Ic;LaS1Gwjk0t6LhzBw77K+U&815^=an_8s^-q6h5|jg|m9*LVbON{k^ueuR^U2;P{L3VkkkLVLA#6pd`O0=ES?;lMXR1~h+j03D zYvW<)kUKfd<0$f8pigh~=mxHM#qlx0L2vmfpH+a#iZt6YtCvjMDEa;&Pr&;32?S$|wv^?FOKN*vFg7(V^N z+UGs^&2AE1LJJn!4Q%*$a`pa$ejpaGdLBjfO1HJ*ph=bh^CAFlY z%SVby-^ar~=f@A5cVW|&Rh2Hjx>XX6RLy^1iG8)DefEajWhz-p-_Yqho*XI9?G@V@ z(AJY2V9M{^=fyAVj<$Wy8cpH#Om=Z0*862dQqf91X~3as37$_VZA!kEEE5Eky$t(N z99;_UX+;QwcQKa0BY{}YrjHIt38){nif!p_v3tD#^^-gj-_b+F zTdo@37cEz2w-G2hKovkvBREgb^kPGGXMU0R=SfnbAz+tJBsQuk|Tl|ei-WeBgv~}_YvE_COg@STm2e(8t;M`54@}gu>dbPe{Bw-cu`63 z+nD>(2z`2G9 z)9P8tc}e`P*kLW{G#&-EO*Oz}ekQ&GFG*T4ohaA97t&N!bIFXU_yQX^o&} zYpc`MEOnPZQvchg|K6U#>kIaHj%r^qM?m*OuV8t~C){`~hOaD+qkZCz41kcr@;>}; z3@eF(pHObxQhhw^aAf-f9A+rhtC#hQc{-vAo?3{}4(1@}p9h7~PwhUgCGt3( z&*28ucNx^!?&>XDmHI@R3#NzQU$%XO2T%AGCqCY;3gvVJBfz`{m*WlC?vCnGuG_oo zm~vUO$fg+wnjgOCJd{)}b16*S)yxbR%xxUz5ECbsK2Xt2BZA?V|gBsR68H|Nx*`o7A>NA zpuR05gmHNJTTg3Vf$8eGfPn8TzNp;0b;RPiwiH+DQqt<Y%(hY~9y~-cI2C`-3dkebr zRX0tjlpJY5HVe9-RLp}r6~!BKDqBHmjNo98!LdwMz0e~r94d)n@Oz12*?hqGmR;&P zWB=k?O*E`L))kFhU~i9nfNdkMs0Hn%u38|v@PZv9v72mHS@#lb1+fI`rtHg)-RkAU zjj9MVf>QmgyMt3Ueguhw&#b#5NG}>>9|Q ze3Co*V+y`6D|Cty^98(G=}cFq_N{OXP*pA_PB_h~r%?V={*cmT(|X-<=4pQ;cwBE> zV*hdsS7eAOa#|@^3pPh=Z2(%93Pt0Xdh3@=;M0aBw7!%s?>gLE#=H8oB_-J(jML}Y zo1*E5lW*tsa^IsUzob^m2vfat$dB-TJ4+%@hpqrZYWds>?~NgV>EYdc zd7oufaT>vvjg_=iKRP{Ar4~xQS@=ik+`l9F;-NZe^r|G{xHA2{vxd0CQ;c%L_btcF!CLBi^&8(O=Na2(VqtNNBtHYYlgD8?bQY% z8Se*sPBe6n&4I-)6N58n9|r~{x25r$VZD((&x>Stft$s$q$MBW`uH3e=vh6(ewt-T zY9q74?pD33c0}h;vII?6k)-W-Jm;si!morGo4B#+SEnrVS;bvP0?xNmn7b?JpYRX+ zQv~8pzw)Y!U?_c+5|M3A`}>HA{rEh$7R-Fwfrv&(He*It z*?lQtb%1|Im#$;f&qM8_LsX(=#_vZRK7eo%W5>rP)V8bj;Hf#b-v-PD#eLtXgg&I? zV3{NbX&32()DxIjG|?ES`}38ph{)OfacZ;Eqq#Y_l1{^vcY^6C4#R0(hRr4>?ZTRh zb*TjzUH29bYzl6D?&!^r&bev;9bYK21M%niTB1ysj10w-#*yEDM69wb1cG3*B+V`G zCG^W4?!9?JNmgo#Erv5NF`S70Y4Mq z&hd0u()T2JPPQ;xh2+&)7oklUJnpWDiknYt^q9$m$tYl#;?UDk6B(^>^%Q^T#7m0$ zprk6vcvjp~x$4k=HiC%_$aXdjLDhHTWaPx7~mR{gF{q26570-x4}cUr@F7pk%j+zpm3Xq6)28U^6GLcP2mA z#pR5rG1YKnk!$&%x2q2s53$O5XDbnYt(>pU<0#lG>cwD#?tGmqKuc;LgH8ezd{O0~ ziCaIsuF#wRaN`^jT~8n}EnA@;mP0>*fBOA7(BXPL>B{wnJ&UFfHkjvvm3WzT0TQ*B zXnznjiMU651qTd*z(XGQ4OG|5-%d+ptdBhI9$4#QZWHNrU~L3RO8fiHuO91$Cv@yH z5-x~!?_3-YKxUXly54HNrd?LqImN}v$>ZH+2HClUr^YyLEi~lp?f@GzWz&)0ZV|xA z1HPGE26TME0}-8Pd;N6hK~PjwWEQT94LtsH7Me0nZD_PU|A&hIf!1K|BkZjHGa)>m z`Bl47_{CI--}U9u#^iVMG`_Zrhr-aKZ|F8MW0jsTbf5MM8gp-uJ5RSA(61L%DRlni zoIHFDl0W*ve&&b*^|XPCb+cDXqxpJC-e1oI&}~yUS3Iv6e0YAH+p~hd^kW_!k>k0+ z^oD&;4UTa{rXE#_j=2^dS*~@ep*W7dpO_LZv+C0zuKp+VALtsUjx&+>lU7m-jezZS z>4P0Y6W~8rPV|9Rnl{LW_AzHV6kNj16yvfI>>t?4uiz1fcg57}IG)s7rYu?tBw@i> z#%Ug+uJj@cF(Fcjj{YxQ(PTxp6E5)LRz;>^LX%pis{Yrm3lsMWc$XBs1QOl@;N%XH zxw<||eZC^%UmtH=Uh@~1f^$vTowZPpno({{fwV;o*PM6e#>eGw@7GLFTxHz;%T2XG zK$NZ|u&DfA2WgCZShFvVc*iE0R;-LpP>eAfG!5>5jJC-6b9h;Eo~bab;XRF5qjPsl zcN6?1v?uP-QJJz8Db&!_liUw!?#VY^Mm3KZP|11+rZG-g#U|t@ugm{55K}qcoO1oE zg<@>CB=PKvYb&)QGM+!*gn&=J*1X4K|5|M8y0gQvo`UzL#CpQgv2CY()O`SEL(8Ph zI4zMY9i|lBt!G~c4^y{FLwjOY)fN9_b|{$t{hc`Z%tBVzUNm+@_etj8{pPgnb7o{u zBV90g052wF%wz)BTTZUx2{Jpe4=8xZ>!_Ox8l1qT>B_prJFqe31K7BwvEdI1Mb6op zov`mJ@<1lj%f)_+w(*y?S}4!@$r>aDBr7Xp(KsP2R6!ep(y_+mB%0@2CH#O-rNb68 zoqB3MPis2n-XTlsAfca;Uu1)_#PpOst9x2o^~i0^jhG@2W)K32-yTqk4Xk2!&jDR0 z2^t}!cC0(D)v{~@yD9hZ-kjYdqb=T;EcU9W8svKKiz{P>e!h?%eJrIx$C44M-No$^ z!)WFLE6tEXD^0{&iuCWA!ErNqF{;EO%=?9p_xihC+|Z?k{a?`6r4(TsX zm6KpV+#xL=P*O(sX>C*;-JulreF8KJWRWN#X_l&O07x3{agKuf{uph$cuyp+XI=P9 z&gD&k&2$qF^(bLYr(n^*&NoY9#?uU#P!4&s20t~=b#N8uxRSrjmWfFRh*{$Ml87tK z*3~7~31sn8f4FUK4Nq0<@U~_Qt1HG}j_LS#LxI^4#|LZFbl`J8gP7!hHt`P^4tsX|(j5;n*B1WG*^M$JKE z2Ai#>_1pJ@Z7h#Ip#$yB=`hp&@%NI`_K^hzGq#PQSr0p=ZTHdyd)JM(b}{~DpM18< zz&6v5kc-SB;j#j^LZF|Hved0c6llWKcGve{)W3f@tMB4ck5bpylA-@f7gKm;`WIBA zJc~i7ekx$}^RBOJ+1!0m{Dr7q<%Q@(NVWcKRXWh0;Vb67@7&F5v(EJt_)aMAojS%7 zt?bG5FICH3qC{$;{Cc>0zlCA@J3gQb1!QUV!LZ=sWU)$besbb|U#adoc}`!(TRAEQ z9uJ2b*jMkYTm0%L1I$F5V!_=LD96==WXu`LGP+x8Tom<@Ch=76^pNCc{JnO8d#D|F zLHS3~-9`)F$A4TdQTQBb8n5Y3w3g&swHC~u-N|6Gg}h9FCUq{CHl6N9Z@)ft$<@V4 zl^?rtPb{cI4dXYxg~^Z0GG;^BK||+k-RFV{ho2YNE~2__H1Hd=*PmQtIS&ne=bjE) ztmZk;#an_N^a9uMR1`Fj!}iW)Lu?;cB5|A*CTz~@9t>CNf6X6cu zUO)J}Qkrw@xibytsYO^$P$b z^->RU(cdReQ?^G?KW}19$C76*gpGUxRFM?2!-O%@lA`(hzrVJw$yhhGd8<&1Qc+J9 zU7lp9dA#2-rWF)ysar0jQg2Y7oYv%6-J98Gl-~|2d#swUW(8kt$+HRp5GFF)cLr2< z*mL*SRJ8S6rd`~a&-p_77VvheFDb|6s(8})Q6kz5ZOz)|Z)|iON1)fl>OmjoeGDFB z?nFJgY^<@`W&vHtR-1Q0W&!F8=l@JnRD@@7)F%_~)MYf`#mIWGk-fR&ItO_jqZX20 zf#%bub_-=|)h+5h4`MnQkoAlH)}deJ+=E93a4qbe?1lc@{R zm1SILYf3K9{Opekud{U#>WVXnDA}OW#Bzmb#%VLiV^8zf?Jr`r_W{#Z8A7R#b*_9% zB@kET`DnA-^1sHr{5(r)>ZVpkLavC!In()}tm&=^)#j%*+mCU5RTo)V7B{Tuj)z0H zxEPjJYe7q+=WmDvc>F%S;Zt6!@UQ2J86xIZzGwSkiF{?VTQjb>TSFxPS(M^DD#3vP zuT}G6LxE6Mjv$1lVl&W{1f23~7Nl5AfD5q-Unqon-Q?CJ=#fSY(lz0#0h&tK0u5zF zLai)r)Gkv?;qrTrdrL;QO(A?-fVuWbre$isEX}7Z3i#P9{<8*pM?||qaKrGNkF4^# z2O>;r6RF`{sBH zuA_IBCwwJd$T)>bqEx8?tHf45AGJ-8Ms@j%nxTyDCaXsqJ5rF*+!nn2zzS<|aEk}b zZsx_|;JARVxV}fNht*k{>YDmrCU2GJ$=g<^M6gf>SH=5>3{J#gUEh8;ku9@HgDt`8 z5;~DiVMCbBf}ksjO?Fs2E2u(4Zg@$V@4a>z+0vL-4GJ$M<#6v@5o3NTG zE;zL*DSKGaJUU+{U#G^ilDx=+%bdJcdm1>9pmFuNj+9EcpTnyR zD=|t;Rz8|36(CEzJCzJuoERv1A;7dR|H@0Yb>-u3>P3`QisZVeD!YvJGU5_mYmWN% z%w5cvzM_ER(olOigS&`hG-^Xrrb}drI{?)tuvu78S``xe0dbV8n6lC=W~?J$)vp*N zv+SV~D6YOJMnzrWG16o`O}x}Xn8i3O+g!nS`&FP+EV4(Tg8FbR-BXSK3G69e6+1|^{W z+N62|4)>2ux`u~8bG?#oQQL+Q;_rteOfZ(Rr&bbPAQwo>u$@2d+|w%dUZD&|VQu6W z!up()tFSdpr+vT|s)jDo#2_%$b*<)eL!_R_98;P!OzC2b z^0Q3D)mw}9CY!9)_tOX+HQ853ZB?iY<%satv%h%-JpS^N1^AkWB0{#0`_5JzF zg0}bVuYgFe8FDUE=t~}Km027eiJ5V?PRcBVSo@`zi16h@vh;Q786GYg$F7)tsbeMx z_ou2ibnw9mcNFJdmO)=sw&}qxNitO#+<0#_NMgZQp-L99)M>zUi<1rw>A*cS)w%6_3O8!sGOm72rC1Zq*Cq)pPHl?on>(j=+W5q=j3LN^}XAhMZU?4Z^@7np?^ ze1ddn(L@SZFDf%1uO2g{MYK1$Xl?MZt6RoLbZbKnw!OhoC1Ib=x2!Lp>S~oS+-%09 zlMgg%6_&eHcxP?p7s64%R~ z$+|4$OS;rMF+`?%>6HpZ`New%GaXCuymS#Ts1v5!(>2*T1Xu`@Ad$kLLc7Oacix3Z zWJ7ur9)mq4iInUGA zAVSt(JYl`>D@$F$j+y+J@+R5wcMSH?mJX=B@3a6&!B`*$j4KwKOLy!i8I`*K^7vp@X4qI zbNCgPEK^@uwX0gz3C?VX)+{|a7_Jv~(8+T!+v^^{GJwqUN9j38YFC`@cZ*WqHq4Wu zGj)s9Vw;Yr_OBYj%wh#ANcQl6Wacr>O|YVjY6ybc$BLR!H7&Ek>m{BKIi6sU$xvAk zd*`HU$!YgQ*+EIAWmpbmV*vp18npwehY0xi1*K*$(@c&E7v$dAad;&w_$R zSQkPTHJbi6hcEhA)K7S$7-D+G$I!4F6<HCne2}ql*~aFm3tt+eVsdiCq4xr5 zK;%J*zSS%()iVrWKG)SXJ(mqRoi7~{IAJwgPZq)4#R(C&j#Mo=SMZ-cTnX#Ul8ld6 zjX(V!Co3T#yVT&XwE7@`7xv?9&8$i!8BIj6__0#~HMZ%Pcx!ArxW_VRLkcz03V-^?w@-#MyH zmt)`$ExNlG!eD3YW=rS1a}dh|OGEiBA#*!Uc=&nrT4&VZ?Aal<9p2EF%`}RWg5l|D ztyakNIvwOx-DgYYT-y^4pnQSIdP!(G7r}YjRu*|C$Ev0OF*|y6(~XXkOx|Dlj#S1V zcM3fDsJTXJ6ql^tJaQJYr)vrLmAu|aLMP$%I7s??NZ34Se5d1lq|0js2*uUEYrgKA zh^AX5IL*3s$A2N{Qahy555_qqGa5(lCqmA` zqNJhZWcETN2j_7NkxTH*Abpjp1@jJx4YJ>DQl{Bmrje1K37K<^?$LIpKQWj0O&Ku~fHjN{NEBMwY<2<+ss^#;3->2|Xhe zKD-FOo|jS}@6gZNkg;!Iu*4f%&|-E{lp&dZ-jjQcwTr_W@4-v!wzlgmW?VZQAClcn zIJ1SjT+YQ&y@POMFNeaeg7O!^%?4#Q8r5AaOAN1)^V`&09gx$s-S$Z_xp*;&nX~=6PR0U1K~OPa7uzP0u|N?_dT`LC9p2J})ohy}F{iA*$RPF$ zErK$`(};-QqXLuc{uiHHIS9*Eia%|5qrMk*LEiY%Bzq0mU{-i*@vfyM`uNorsrF`4LMuXFz8LL1cD_R)u zpr&I=^ovuZsGM23x>=@KP#RF9ZhX)#hLfaugOuhI+Z@%WoKf`|ac?b)hnk=)n;(&1 zAZ@(1P`*0JLIX8ov~h(%wq0zol)6tR)8rYkNNuByK&>Bp*twiDcOMpy@ zyT&OZYd+@Pec>A&UQp8F8@(}nLN;&gI7i0Zer(v%#HHivq?~1LjaqKb8eyIo$q??J~mW$Vhiw5MHiJOUDO{V{S)u zp1HF|-^ErJsw|*?Yx_)wmwkeb4DRq@?Vz(B{>H0kv%Ca*-&ndGTCPw#Totd~s z`4FW|ts#;$t5nBMj7F1QVZ!CtW?HFM)-T$aBHL@{;Q6a3dgxL@PmRs_6Ec8hZF`O9 zA#*1hqC%Q~5s-3|bb-0Qb3|lGK2WFOMqJs#5gG|a1>j21g1aVIE#Q>O8KZQ-fOmVY zYGg>$wc#AYKi5eO{f?Y3LCfsMmBUy?evauD934i?o6c=lkgLohzvvr~R_dnn6H85k zqVP|cmrMs;Lz_HdDay`K9qsjkAD`wPy$0feCN8T;YCO#IHN*qk28a00Yn_;t(2Zr=5CX0!sU9ZONeex#kR?h`#M@)GUE z;J_T;H0iu*+>wugpE28lCpN1PBlSuZ%55a^7ZDkf;j21fIS;|A6E9=SR zXOh!^*55UK#t6FxgRQ4R>O_CS`*e!?`1?`0IeE`X6H)mrjz09e_Mq=xp4GR)!p#%; z?Ww%Iy<1S2!aZ~f1@*?>ON5jFoEvn>?RU?Em8n76uT`3mi@-5qv@;{Z+1N}_(K~59 zTuF?uf`JU1A4y!czWvvP!E-oGNiKHMclj93qRP7}(H26|n-5bhystKPDlQJt+oD~m z3+tNyto)ewT9Eb@FL2We)N=8Kg0}2^>ASDMQj`r=ocTL}}UD&!)>#JYsaIU>(^|=@*iy1sYdrI*Se!p}*3RS@E{XJOuv!+@hla1mIhRD_{GgN|;xLdCo3rgp2c#`o_ zn+#RY@$5_vk;s`@#;-x^B1e72$L*1_x)i^grGtc2DDP<^dqDBz;_A2KKjewS^7Hqp zi`u<&f6|$37+(VLtE_$Z@C)*}_ZAm2d)-rT1+(w6*2fMw4!i0T_$?%2m@lOVYjbp?uZ*#G;HVqGQE|5VplBN0jyPoMO+^#!GPnIDH_F0;zYVE0*3Mt)--9G zZGf*n6#Tl6{is-45u?&J;7oEcTw1*ap}o0;9)TBK*FTKx1(-njyC zhOkr(z;(n)|LzfQA>Se4BJTc5{lR_JmUQg20v6s5*^OQ^v2)0tB`6vEVf|gu&c_u! z$xdi>P6O@D)~J}c1a;xI0dtjAVh4F#qtHk+_U6t!H43udG_h*0^vUi7jZQOkXLC*g zsb&{0YIjsRM|eHz=V^F%$&c&V*oNQ#bjepsP@#3?IW2jbEVMv(C9xOeN%b_J_UWoy zTPLvr6A;x*@pj9%@?gDcbxE1J=9)NW-|LJKPiBH-c=#cslJW3c#x=5w_DHfTd|lY; zb|FU-$!}5^zu&{|cbe}JxnG}@KagZ$`q~VDZ#t#FaH)x<=iZ+p_T$%8%O@_Ioufm& z8IYP#$nhDKd7*`I=BNSMV34}AHY1Tzc?o86Dat}3pnJLb#=NRV%?mpZpSmyWxp!F!)QRm( zGw^`D2J2?W1i*ryc)OS$k?F~5thK)ZIoyBKUS3p&OlmByBhIFEK>%sluI`}N{>pHl z1>MDsBZzdq(L)w&VJ&#r@@MRipPsnBBZK|BHU71yQT7|-M7%HY(}(2xVb>wDx*HY@ zWu~id5aI%|i3e0c%-nvT)VGdX^r6uEeySA>Z-~?}q*eBRi;?7BWiImIZ zW0o*j)hICL%C%;A-Rk*WSaWc2`BofB!R$k)Ir_zrUuGOX_)-4a}_F|8@OwMIzi>bzmQT|7LpdF(b#g^DJ~Ao0K0d#k9rwq;#7A&>+Q z?h@SHo!}5$ChqPM+#$HTyF+kycX#&y!6!O#?#w!8-#@Fhz4m_p+ih(=@Gux-RQ0N_ zs=nTqHjML^KCtJ?{DasP+K9lMm{-S}SXmD2a)Y1?Ch zAWeyGHgPD+AJjR{{>#4%ZKAn-6Hqb-E)Be3N-M2Hh8+fT+L5zAs-oMd%m{tG)W%}4 zp`6q-YXV%}o}E?6U7xmMGTD^(qB|tC)78B(9Ga~Dy0QOBrNfA(S#N52ZEUvjVs^!U zkXA_yg4E$!U|9Q*5folDm|kL{W$wLci#1W}?^rNP-k&EJC(7YC_R;#DOa_|;!Hk!; z)2q=^WkN*5#^Of<$qlG`!RLfNrGd}&rvv7c+Tn8nf-knIm{gTSSz~Oa1k1T^|LlXS zn;X@^VbS(_n@)MR>^ZA0CAjvVbu7pTz4AJXTl>s=Ug++&u`Jta0iqkydQEu2y-8~H zBzUfcM0hVzHGdfzdp}cCcRy1HSU!`G@GyyAb~V@F!DPD?zfXjBvCW@z#4S=W~BgrdI&H(F=XOJg07#)TKS% z6XZL$hzqkOc7(hRZV)gQbtaf+A7L}l?7Tx#t^F7meaxL0hBkGs&(}co&1z;-^!&W!HdHc6W^f>Ipi!meW&odT zPIBYg2)RFhEE>su9qGHVp~}ZyO0Bdz+giYWDv(8(ma3^?olf?)pFG@0pK(DthF}|O z5`7eu<0*cA(tLZ-!>p?Z`^-Nhj3x~x$+ImUV3!0ghphOE>2+DZFxmJ5Yv5vt1p@5 z(#n$=)?x9i`kH4pbhbl^KA(35L%L#tFQwAQh%s(>M$PhJ6=F=|{E%~Z9i7y5GispR__&($P5$Ki%XHgqTNE+m5y1 zyr%KSuHI`!L|k@&#*2)^1)xdP0*6_ygs-$!xymcA7nyWY=VPH>ySQxfs_F&VoOhWF z=`^C=1?=;!C@A$Nieu%gNn%9mY9|>E@i%AIW1ZI3kZ0U|n87$ueb=EL6~fQ&n`s_B zoCvCBy(%z?ZXJ||ZAN4cf1FC3I2R+`V%5EN_*KQN;pb+INao33-nfcFJXN32%bV2s zVVN02KcNVaXqSGgm4Bep2$3&URll^2PqmWde?l|ATx+5P=e7a*q!5b|? z$pM;20SFCpubvcrSM7P5$jv7lcP4WHAmn3`kjR?O{HZ2k$DdTjCu4II#(gYm;0`zia3&O+McaiISD08blyr#Ye`Oja4rmN} zjaY>Fu+K7DWvo=;hJ{qp_DA=*x6z&&%}m3SRPh@UHDd8!N%A)h4~XbUxtbZaaD+G!bCC zsR52#b7lE$|(9KPuHhYXo#mefg|DSyLtq*EkBg z`qH^MS@0-B{dm=@LKPJC7j;|DH|lra7|FUdZ@tFU1}b86Q$wtbwQy-OX_{Q!a~nqR zXg|?tL^Ti{yMR}hKr9V07$ho?6&t;2un}pw{_-TppXh{WNVfOcvrEoH7n)&E|9Uix zp)KeO&#OCgY;Wgb6V*QazX>qfA|FKQhso8gw=&*(_bdHgbpKjg*$A}KSWZ?pCs+UBMsNPXFo`@c3AD;Ey9*YK=rn9@k zps%=nD;QG`nQo+dT+Wic#MO>TC_&scPwc6k6V2@39mw|1y}{}IVc!d+ap{#SP_c8dQ;@%Db{i>tn7a%% z(-FJSJ9mTR*aig*dbGS>`EUj*+%N$}VREv8gk=_14ixSeb#2~MUH1PHF~O*_Pl&Ud=}(hPfWVQ^rux+w) zo=zD9+iSZNpEga)Jub-$y=bu6;pCR0@z!KTA3Vty?cnddV$N5h*PN4nYV(!sQEoC~ z_`6%*QECCpMlUnaJ6Gs?CY{A=yx$YSl@>IUO zh+lt4r|xc_Dpa8ER0GvUCYfy+jP4QZdJFUO$xhInYuLk`bp71T?V^M5K(Uq&`vW6? zRspWj)7H!pS4(0%sEsE{Qcs6uFW!55l%HzvTK^rNJFUp`>g|159eMqe$@xygwh;3 zTSS}XsqTB3aj*_loaB7pHgY(&{^!I8TO(+OEk~ny_R0TImfxqPYEzoR1JmA(W*yp= zqR+cg&;x@r?cnC&r98EVH9cOj31KRCmxL{6h_XWduoF)=cKU-ld0NK%06WvoDOcvj zL~Y&8H2>-2d#epREa0Vr?Y1Nu5ZYxu0_xl-`(V5PGx1Ea9`>@W9{(Hnm63Q)m_S*p zJGgP3SqnlN@f-Fpc%Dcv3*ta$Lp(@{F~dJ8^`;%LB3pJ!)5*X7rAkt0+|@R8ZW&F1 z>5D5&>Z30_MI))b8YYI6V2 zIRBq<{yW$GU*GtD#tCPqP2Ay$kS609|Ya{9+2ESz;MxBx*GqA zC+Y#heW&%g6o(dAzqL>_UB6xad8FE78JF*sm<-Pat99axUPp8-?LS~@57ktIjMSLv z1xM;a3*V1Hz5Tu45cd@Lc`|3z|su)|IZ zh+JU^%*wcH(R8&nG4a)5Yss4lVxJ3S^td1L{sQaN?li0Pz9qdZ)Z(8pO{v8P{m$*D zj^a)B;x7ABWG{}2TLdH>_yNA2R36ms&H+(vRM(eGZqDN!cDOBr-w-y3nN0ozlv6Zz zJ@}1df_g1)FR2<0@0h{i>}jBYtEc=JpF-JimwI_Oi8%oF&iN{7c=92-yn z_SP?!qbL82(%;nuj`!wITloo(N>hiLPtS8EGVcR^AEf-GA4o(E2Kk&{{81waE{@A= z;`D0(yv<7;aqFDF0r%hT?n_17<+M(vi1rq?#|I#g8M-;-18^C~0>V_%JN)xJC%JWS zb`%y0BFYoR0+#CE^no8j{rznv@PWzspS3vu zdrNnySRTyfs`4^!p>4qEby2r-`~psQ{(m3} z8XC|X6V2bke4)Q!G|Bcwfl&qxW2KKwfNJNRh2#ZOM``Bg6X%SLsJ1Kei^*M`oifE$BhJpylJ6B9U?on(a{jZE zHX;9I3hKtcO+m2pW(YD!u{E8%P%M zNB_4(`8Szb_RjB|&gH5GH|A|+O>ux-mMvskMOX_u)~V?30X&NBjWQrX_2C>AQFrbZ z4wuipuCFd85o{~sWsHeQ|Ez^QaAu|4{L+n|@wQU8fQmw^_?SM`%;O)Yfr%h9n8U;Udm|KE@% z$M`!l3wLmh0y~u}lu(;D_(`Y7o8Mngc%GI3+vcr z@iU^cH%kLVfm*06*9OU#E~_E3i09j@4EFK^!fxIFglq*)U);k^734L;?0? zb%2PGod@ID zh~*xSOi{IXf*w0@0}U$T-hx8%Bf#P)SXKY{58~*6Z$47#uPX@_hd~l8nG7rrHekN% zl6!wA(T(<+Ir*k?vHE(-{q3ajw=(`M+409iv&%5pDVy2dA?_>!+br=FFeH=AqmN4d zW163R`P*ThxIPv_!nr`@Dfyax!2-5Oj__XW=lr(pUnaKM6c|DcGPkP+Wg_b?G&0GN zWYdd>hx`v0%wLz|LTWLz&?r7_2q_zPQ6K-}R=wWkXJ9#N6mNa$24+(Hhd?0U8~uB; z0C}Lw1Z?U{AyUpha_`&fh%MS%Uuv&tgmtu}=!af^+VtjUT-mNISd^({aC~h#MEVwW znp@QBa-TbPe*@RSSFiP|sr90y(iPTSabyp5!SlBu^@rTP)c+F`?EZ=g0bEx+VT&}_&n~AqYi-GN9GT<2~UfMD-BKjXZU@K9on1P9`_L1k05SUd)tER zGcG{?_Ypw)$VV5(CFKsT-HrdSii*EiaSrii39Mcl<?E`cWnL!2o$<#x?Q73 z^2+>-;5(X=X@c*_vYTsgv5}EdtdS?vRjcj2L!Ez~9yW^+3Q-u=7Xc0~`c>4(l?aJ{ zYtZN$=p$bIEz15FzUsX>cM-SkUe>IITNIE4pC{4=D{ItFI86} zXC0re@S^raXntU=r1ain`tZ-Gf!-%@K!YpR8;X5f$&j5$K3e(-I5X0DQ?!oRxGVDX zi%ULpw#5E1kbBFK6spyJ60bMaAWP^Nsp)<$(th6Y;Qi@zyFA9Vr9H-u1v@1`d+nbk z|6j_)`0i4_i`t{XXr#FmvaPPCGXvb){M-GCEGDvrkKhh&S z#T{dNO$2aQEz~4-ianj|awYe48>kb+05JY%vLoB@w_?~zVJN{62UDLNpF;%(m>o|Y zmp3TR6a5>^uPTx_u32Ney-W&pQmEfaHgAUD+?`l+TQC2%zr5cD~NN+@u3GRe3cQFle|$$ zwLN}cAY{?8=j3)*zH^_oU^&9<-v^r!B3lX37s^DZwqL16%W02t+3LU71Zy6Q>%Y~8 z3!Evg%U%>4WZODjL%i9Wxm?F~fQ{8}%>@Tlu*V`)#KKMJwD_zCJQ_t7!{1ft+L+bS zD7(FHgK*f(m>e|NeWE>m6E?)K)A6Ppl;+1SW<_Vk{Lt&n-l+E>oY=G^8N_w zlnahBJpVbts2YQ=)8avw(WgP5(TCU%&V1G*7%8d+4{s}n=8|N#%K=wFf2om`W~BFy zofGcYiku%9oO_`NOWz0b+Y#?n+*-p-e(|aKe&F+Y=*E<8zrzmwfTT?4xdO{;gN6$- zMJ0m$3Ut5th}y)M1G)mC>2Xw=qD^cZ@wo!L@FTNs z7hrM?cBh-HhaVZCpopZp0(vi>E;+eb zN=e#O=^*40jSl#tMrBv4zK|uQrZ`@ciU8O?=Cq2IqaWLYlS^bEXdYY6TCE|tdD*cJ z8*;0|dYPkK6E~tWPjNErD*I}Q)&Z`)&}8uMd}5G+OcO+)wWzzv-ZEe_oHwYm<^;94 z6i`b?w!wqyxf`1=mb6er4U~e-jaa|P{aNsgPgqy^J66Xk7iBMIHAA4X>gIu12!9UqS%&MhU?SUYV7S2SF8qu7I1{y z8|GaHb_6zHu`Blbp{Y7~g_02fB`dmZLX)SgGBhkwq`WNedhd5)bbtN|W7Yc5!dA+s zeZ+v|%*#-Bl;tD{@Jh>GWYNZaqB$x9PZ-bwWvp3qhGG-O!;Qf7P zH~hvqiyRwmw_m&|lTyb(+!g@1DprRkX8e^v&3pNBUKqoHQOG&_1>oT8P)d8%@YGq6 z5}b70Fr$qih-9O-l5cR;iuie;wP(^#2>)bzZ?KdI@IjFtO)V2S3Ot%UMEI*5Ex6_` zMV}vv`=bW^7$o4+6>+r<0o6vgHJ1Zn`fh&S8q@#%P5LVIBpz>MxyA%h=N;8t$E|rE zYpH(&o*^V9rIcJRHF{>x!gv73tVCB?)tlB1+AiNVDFw;#60((sNI}gt)*rK+W(u%Y>g@Jy2$Bdc%|F$-@^qwp zG>ONtmnK#N@>XT_7js}a$a!HbMp-{C*6^(~&{T_zC(63;*&|c}n*zB2V8V-5ed4K9 zvP+5^-s-H}u3X`!xiOcAw{FD2jTzJt$d2g!i6IP${RJVa0kgPh+ReE@;?3EN(Ww+6 zOQkIs{g6e~r!1Z#)l@w#4NPsRIg`MdkDkioMS@D`E@z6khha?C?%-duY)pP+jSn2@ zLQr?-GhUYYfT(l3n?qBQG|S;!5-j^r=LNSyU0#%eLU%a-NH>()d=#rlbSWVq+hC%$ zUaE_LF(+A5&7V7DYNI|X2S=gf4m@va-|~KGH^t3V!f+tpGpMk1%fQQY(=YA{XEX@R2ZeJmt3h-0n{ z_fjX-&M z$8WQbTA+jy=$!JK$_u1&2CoMxR<1{>0(dycQ7q%07)``{3Ym`;^ZAnAndGX}5tmULm;sb3bt_G%Nn$ z$YB*RdxQF{LfvAo`#fs=|1~50dy-L3}?&<|_z$>G6`b zX&M<3=CVnqp<*|gkqvQ@WAI>1pQR+C?ZEo`wCt#N*Qj=4>Yeq(xT(fu#(YWobFgQx z(LS{lG!yrP%IHO9&j1$t_CcgC?`VbIBbYB(G8IZ?e5d_+kH*_rnRd%4UB0&%GoS4| zNbsUP2c7_s2moh5cSPP6U0UvJ&9DwD{3S)pj&A&gXTyCHvyTvGim4IcN^E8X1zNC@ z-#X>{c#%_lVC6{B=~_1L%YvC`f6~=88ISFO+{raQCpw6ve`nUQwcX+qJ2K}hZs@>1 z_GZCf_I@$W2~7PxDEufv_2PAz8-*j|=sC)}5A&FXDgS*)yqvg_b*y-z6riy>9<`#` zsKkQ0F8h0p)^aE{De0QMB8lB}E)j-N7>(Y`FmePV57~qFq-(>bTBb>6imNfF3Ex4- z8PCDfgw6fS#^zP}*g5m&xlMQ49ngCO-!Q>^dm`{=`Y5xZsmO?TH9ounb|UQ{$LYk( zQoA0`roCGD;o81fj`D3nrA;CA^aqsc;Ra;I^b0`J>;8}o1ktyaR4 zjCL)_y{^M;oBT>wHcwWYaQ8FIFoe_8T>BD%n=OgHG2T1#3m%UQh`tByxJ;d>Jefsb zQ|0x$jqCKapad!Oo$@FZy{ihsE0~_A1$5f!79&`e)&j2ZazC;Fepp(U0{pemg7eO> zekpd{K_sg+NTuS0VE30Nh6kP{2s9>~DdNe{yPMuUsq2GB)vjnr7Sjz>(Aw;mOf>4j zIjbJ8dA{vj_$}YTI3I*zExF#vMiqn2gXC&mYJK`E?4K8Xxk8m^#qw+n*&yM}_3_J- ziR3G?1uERnlj`qW7w{#|B5!Yyr6X!=T|M3zw>7+-Isp#xcc9?p$toBecKPF#B^taS zZ&%Iwk6aA)M|EXLAm*7=MmCj~F3Y7c)(+3C#C>q@RExNO^}@)aS%sM=UjhBhbk_)# zwHAx8-_A&DvGH$vm#BMqw0CIxx*{XB8_+G)o-Hcj5iYA9Mc7cwrWB z2RTzK(sTQKRo>?Pg$IzbG66#(0P+9*w$LF$RcH1WTs)R`L%$j)215BG zZ0E3)t0N&1n?v<|q$Sn6+$FzgS!0A%5lcK1-7~^~@Z_hD5r4-`_N!+u#;P+LM1#Zy z;aq+yTmFCq9p+y;@FRZ|C@^~&>o(>8ob~FL?@aP#c=6@_Lq!+R*vm}haL%4;XFc1$ zqj@NqJ#k=;ZjY2THy=KJ&dUOwpnqNG(JHcgd$B^0(3o~ivzFQ=<$y!en^~nN88P}{-l3IG1-Nb-q#gzZrw>^-!4&+P+uqH%la0`!&ii(? zJ-5da+;)$MFY;kq*9C53Gk-oa)FH116)5>k&mCT28 zofRPxW!57YEChKz9Oc*-{uNL6z7`{F5+9}0tUw|BG>Jw+t01~ave`a&zQdtDk)Fj?=(_|k53#%S($zjCguVUjNgJso2W*wyx_cr0jp?4!+uZq1rh z2wqVYehXQ>&0A9H)R82#+L><|GDB@E%{B4YS7kOMI70|F85p8l2ZnX(j~R4saXu5h zHzN{AoSO9H|M_yfE4%gPMc|1wNq3vL1Tm28j)R{z8JUW&(F;I^yjirHA_xl_18U;OQE zqJcsX$JdTyr!hJ7TnkWE5F&mV2Kf80-xI`-zT9so=%o2gbJ)#;JGNqk9#lh=T@-Gwi1>p zx*x5ocOI*Y^^k*VUVglJ@4Hxw*pBn9Z)HnBV20!bYyJ!EMMjaX(rJTecU{ID9>EXy zleA^i8Div%@kJxj?YnAQB@aF3s)C^hkHs(klC4#0_u3lR_b}$WQ`0ls(dD+?GTPiz5*;l!1hA1@D(630^VhFqYNJTfd zHzGgo*$5Ykd7mOlC(i_D01T)cj2g?!b>6LA&c0F+>*2bD-SRVQ$Mfox?# zI+0wO!I%>Tj6# z#Xr$>L=|HaR{Ej(ejLYbF+jQdxcnt)_sCJquGQP7F&XnD#U+CBG?z_YnS<63@lhv) z&2hI52kI<)OLfn4C5@!CF=B4#Xm5-&gC5Kcm4Ja1#*MYu#?gr11UcPhTDuRS^)6Tgf zK+MkbZfqx>FSK3cgooXUGjsl@HuUl{Vh`>pf(Ah!v0tQ~nW5rDI8yW|uCz_fR*p1d zsLbw_Uhk;MbTjXOY!^9RhCORuIh~Z~p3Ly@ek+8www&e-`1=GTK-U9*|0%(l+6?Zd zw^!tC7AYE8-k!d1rRO3cq=nKA>0FeHA#q_puaHFOL^7VD$_wvrmI#`Zy`enBh*O_S z@=iPJ)0ZeP;Y^L{Z#cAiH!YykP9d*hQ#R?>-ef#z*0n}Ib%0LbFZV^ZpzzlM(c8=` zWW2gNhcHeTTkKn&^*MF?LXz174a~U#Iw2T=`c@#jn3u7B0CAQ!!9StA>0`0J#8>&Y89AbF9x+Nz zgpylC#+D<%PykY>fcbR-C-w-#0OALE%NeY8@p5PDT48eV=MH5cLiOyp@7ISd^GOfE)4U^&vfki=&8u1*M+>c{;h%+x*OTncOz zt>(>*#ekv7F>F5f9}T3cN`>v=V<|YT==Oxa;U0w5jl5mWaB$uZtZb)*G?7qJlT}nw zv$Z@

n}GM~~wjFkcDQQl~}a^Kv3sInFI?RDQM>nXa{lW&uxwRA=2K3CN@D8C!DA z)RG5|Fwb#f2m1jcfg+vNvyk-M5F$VM(M3W{okIz4V=;N;?kDfWZq9s5TW1k;m+6nI zp0A@QRPu{6v8xsJErde*L|z*q=${yvr^wXF??1bkHDHiTTbv1olrBGt)J1}^y8N@aY^1S z*3KFgpLl;7Nx+7ZM9#tEU|z%I+>?o$hC)V34EJZKw-SSmrboS|(ZwOCHG@;B0jBHD zx&5g*J$Xqz#E!?7EGJy!a+P0Z#61|=@@hKg%zk)O(?4{y&&y5Hz~O1|!%M%gt;&6f zy|jGzFgra0{xHoWMY*RZrkkv?2L)}Lyj#ddgI==xEsfy1yO{bwGH97c0^XFSVMP(K z7BDuDVQ#nrg>-m^9wUR6N1|XFC7mB#?R-O3w>59I#sfEX%S%nA4ZW>1#M~yGsrRaA zE^$mQbdy4Nc#%py?RxmgK-^4BhLdZ5q!*K)WU;j>J6wCJC4tDH!itJ6D1N5iq4-J9 zvQ{|b9q1!cWMlqa`t!8v+TrRfmE)#4 z+{2ZC0c6@jhQxIkx|FnVrUwAF*I|ax6?@}BY?kmH)=O1>tfjR$DW!hJ3209i9-Lco zOC5cxn|Lh9J z5#G)reBIbYnNVdU2`d|Oq{~Y*tRmTByE&Cio$9Tun#FjVbMl+S3!a*0bE_Is7m1@z zrj9vpJuIw*58(#Hh&gy#{KPD*)QPQ9`T<}wI_Uv|CF+WPJ*^vNtc z0G^Vt=mj&UAIScK5(&1)j-N9bE#fpObG8zwRr59|p(=}s$JK(YKedfPN@^a|Z|%jKnmq)C!bG8(Kp_;ZK+@7J*n;st$T=7O>+M5(hP z(N3m}*V05Kb@1AEeBrN<2&_aJIa(<=B_1gb(N1s8Yx8T$Sj2O!T0Z1jJ>9w>SoZ_m z6`zxW+e`VZub`=Yd#cE9{Qv~Z%oaXIlQe19zGU67MT%9owOSnKfUZ#O2$j;Mz1n`N z{p9`+Wy$jHRr&WPC588Og&)g-TbB0|EVq&RaoE*+P5G8a4vK~a!$*J^oUMrWxiK~eQQwX+1nLbsbrIdFdSU^R-=g6 z#XTW)dz=#>K^A@{&*?LU(WWBJjC%te{jVRYIs{S(DHWaw$8g}*P1}hy>slLoLg!;$ zRt(Fp_)I7I{4F#^$amCQJ2rZcu9AJx6`qx`&-qWC=_@L6J&HB#OOw9s-Ph{KlnaGl zku>G^k(H>Op*tSvh^xfrK`3Cobw(lwrcps+t$b$^8c}|0dZ$A-cxA=3iA*-7g{cAI zn0-eZzm*`;(Czx=_Po?=y_1@wY42-B4Dr zAloF)#Xt{V8w}z?OPrXYegK56JyAt=7;S8F+8e#aJ|~yl0U`CLB-$2OZ_Pd&WY6n? zoY&wbG|iLx^7QeZC)75UxU0v0mj@8D>E->q<$R21MZ!^w$b9`nL=C+fxh(7D)4@p zCViED$KCx}oJCcO75gjNsU-*2_c$NU!o0d4micRu+P;>Y%yl@o(U!35 zDCO>r`6a1yt~K{}8h4M9ArDz5MTi%O`l^W_AjNkiT)P=x44WM6r(_sS*6Bhjs&+3M zTRTlQlJ4croB3@fCA6_8I>{|Un{ z$d2N%E?B~!JV$nLRbDNskg1`nHMCHcW-=F!V?PgGywv&?_H+b^cFMUrm2CE5Y1@kmrmb)@-FvU7@eJO7fe)3_j2eN-ovgEW!Tht0Cw!6uk zw;G@?pQY1N!t;YFM%yoKi5^%k$omnkN?#+XTjCX`^`k39=^RCii6%LPJg~Q7 z5ZHAVAqdhqyM-}PijMHfNcn+!ptWNmE^S8` zak**;2BV608h49gQd2_>$fHK@`8-UQP{yhFmO}c>=hhh|x)=MR{U)2(({5Z*YAxzT z6yWbio44SuYmtc_XtXU+v*e_5k2V@b=TlPO)0OX1PO4@n+Q)|I7R|+<+fS%g;L6MP z;R{9ihQZVyZR3Z$X)V|Goa@*&SE94H@{C^z0)(6s06YkO3Cb9H*WA*o=ibxa$UQzB z%L~cMmZ|_VHLxVW{A&%!N&*Q??sN-|`I!PEy%JrOa!TIPOZY&F|KDGB)u&)_8hJg6 z9*OUM7&3*~&r?b|BrQ+Lr=xv4cYQ}y;63j2Zcb@n za7ctHJ-KpK^``%bKWUAL(v19g;hMBra2)bF+Z@jNd({5Sl*JwMG=YYU^V43xEV`qs z<9k)sU)|P7_i`6$RPReVhKW`k(hP0t)jHq^(4=-j7>52Pqo2L9yin74y|eog3K*)w z07HsUm5mPok;Rq*(vef2d`BLfU)!!B-*BuWzg^u8T)m*#XIFrJ0>TeLu$i+WmAQ8> z2^Mzk&y`JuPSc%W)b}iGo+gH>T+2@N=W|d5N>XSej9PmtM>W(iU*rob^H!OCM95OM zCs|ewS4e5_PC`FTxEMT_KVrPHhH2fpDHWEL-h7qW&%Vqa$eVA;1}TF+^wC)j@&ELm zy={+J=6}}UpA5lDjlL}wj@sMMlb$47E@m`yQzTpm5txUBgtk|`^$hB;V9CqK-@rMS zZ7*7@K~kY1k|uGCr#qF2DvxO|f0$6QRttdMGhR|W)^}F)AVjjPw|c$ML(_-5;%p<-}XKs*(E|z z^`>7k+~6$lmJinH2K>mBaBH)Q#h<*rs5>0Jy?;^3JeNaKVw;qFw5zev{{a3ZW4tPU z()lPJl9!G-Ykz)GV9qn`hRzp1Mczm(P<_`=t?t+HNxw|siiLZRKEKok$bPU za?H4p&L5PYPBWXQn0N60MNs_3RM^t~2297>P+R2ckNFKTXhXxCpT=iZA$Q##{ zF_<8rU;eJf<%wVx5E9N(8-d=LQNq|v4EQrUE;&C<9W_XVVT9!BbEL-;G1aRWK#ZX+ zBOmER_baiBVZxfuNqg3@Uy@<;y~{OU9sf_Gb)+%MS?ICRNmGcnMR=Pq$nQWU~y*WxF5u4B6pA4k%x`;UzJ9e0L( zP!StEZtF}bvpUiNs1RLJ;oJ)#wdo71W*F1|NX6b8A%&{c?8d&4p>z?!Gn?QVqwOxs z&qwtrws*g-+@Ss8%Z%6-wkV7`Jp{z#oQLN7(XTiJj%A}4V| zPRb(rwv{uT{FtnHodY^|TEBgmIEqBZ*RRXe zW{c`LGZjYOF-ue*zaNiM^%Y;$exy$s>vd~IN0)GGeSfLydP^&|&X35fBtOWsHgwVx z*zAdd^azeh9inL%Q6fc1WJq%zLX=k2lOw9fwa7IcPQ4?|-CF^wbG3<9)M1F@eo5p| zajSLx;WV$b-=Ax%oU4T|!sT*ZtS?WK$mCt$;b4No((3g6_4rw&G*+#?AEso9%+e?}P6+i@pd=_fn#h!NDgMo%ed z?9s)wwFQ9X^WFBFP-z%ZRCpWp_gcqE0?_G$i=#5TIi5#s*v+`x1!|yP!RmvrQxo=# zcXd|L~Cn%qjA@ji0$_N$_maA&|~Y z?i{n40HaT;_xQCXD*X81+j|m^WR-ysUW2I;?zSMr7Zv0ZR(YcOI%9K|RLvuUXHtB? zsI~?F`6Il;rbKTti%u&QQ3bC7hTZ--yE8wf%O2gP>5WV`wK_@OoUHXn?G--J-pXh0 z6$z?txkrB%CBxloC9@VSybxPXc~iAV&)ic&6{grq_%Tw zXvX28z=YSDU#)U5hYsZo_+^@d0|l`u*7V$+wW!{>%oGtuy| zguvzG&YXQciqNQ(7SLSgklw*G{?{x3O?9fbM%A$E1W^JW9CT)U=W??ip>Nt;@y!W5 zFdWbEaOx(=GBWyAN_ejmmYXE{cE1uJoo?>hg*25{9J*uRPTL#f$!6M&FUTP@xT?&V z@Ep;2q8AmO^uDrr`X*%k=+PHaml%!LMMrvO<++p^h@{o1b~?3}fV$_5yti5MXdr0R z%w1^1Ef23bM+tK=c0(U8RNXY%-r(O~Xe0Z-6z%UnFi<{KlN?^ewWy+|$mM?|uW`FO z?Ncv4pJ(`q_}BxgH#)OkdvcU}!>Bm_u)Kppb7HyNc`Y`lnK-N-jlO&pDtxse=RudSbi_@EmT_E}Xe5>-_bbZsSi@O*r^ao=%)XgwK}o>D)uLmgNTPCuL&6A7p-8J2fFc!VVrH{6y|!=#Pt zC2i2l@dH#>Fh5V)AH|?rZeBu>A*Pt#@tM{4hGd!Blo0vgZM36>6(RIKy_Put)I`e7 zJ&gMwqS|lLry*#wjUHpC!M^aqGbxd`^V3mEt<9vu-|*6w_U$CSxYrZFw2X*1O*>F+ zoo>>w)ZDFGf9Xn6TJ!W(S@X24`Fg&8WidwiaoYWYbJI(^J15fyKWH0IYNvV7yBl*n z@zbfS6<;u;!-Q_E8Yq;~50IK9GTBKc`CUaNlI2IrBXgj~vpb>N$ZAV)KI)I~_d$_& zGuWxO-(#EkDA(#xLyfJwi^&z!K8|yNdiBCn9RCk{Zy8kAmPL&Q0tpZZ?v~*0?!n#N zCCEX8yC=8=cXxMpcXyYAySu!T+qZAueqXx3zpv_b6-Dhj1^ev1)?9OrIp$b%abT37 zn2_3<+ZPO;RJt9oV)S z*mpd#kmoPPlj_7W;$%+S&^tKr{2i2iP+3b~sv*nF#8rAalSmd|Co>tf8~^oejYWPeNj7E15!Yrmj{$ zI4>wdGfV{nCG;yQ)J_6wYl9bG)x(Gun5PUbMWiZ(xbL<5hAwbowCkL)W9==FJ0pegizX)cc z5F^zn;!uDNoq}3O+M`)W>PKKF-rsADv$GQ_k-ceMS@~4UC*^M3w@litrcmhoh(~xE zCCQ&6@?GGSr&zsYzP^&6d||%QE;~C}@s&Lx+$S#d8wQ2^$seV+yQabK{JbCh<3a^5 zKXIe6d3EB;n2;(p_2Ek?a$pb2V++0>&}GXO4YFUr8IckspMfDvLk_IBib>usoM z3=69*3MrWrCc?ST-?AVth>KXS_jzHW6dm*Wk%PMgn?rqu&Udz?Q(sraJ;Wxy(*_;{ z(duvvxnp4UW6c+2;YA~4Y$+}ZDHbAG)u+&+0|0C}&jhg^h-6BdJA3dsLt18)V>gb; zoX#J2nUa{>qR9iD0)>Za1b|UHk3!73s;TFJUwKfP2{NKz?Lo%X1`mrSsBw1Su7rel zd%R?S)5-p14@73On=J>&!Dtg75O-?n2+l`2)Y40CzLjeBID2k)XH7uny*)JZgMqDaAE&OUH>cKJOio40Ns90_l#=+ewfnf-NQU7 z?5p`#3}rXA7O>mZ$))z?wYZl87z#h^*#ZH&KK%uJH9!vL+ zAE9O1$L~ouO7b)yQkmD8c-+pdFz#xRYIA3lrWh8Xv=I%^!LJ53w~@s)^49D!0v?#a*iVn8%~ zNq_Yhi3)do634>;Op;~2v<6+X)QErbmVXhLKlx&zWzPl~D%}8l9h5rd{&-A@J}MIg zak?*3v{{y#Qts_4@Kn>cqA=VXbm_fadurD_ErWS|v%m%|g@_whmwJ-0Mv)Fm7ax`I zLfniTGAgBNh@HU~4Mt+>jFi2YH~{R7Km8m8Z4%_dPeiD~FD$_;em&nxEJ-?pR<%GO z(^?=o&oas1FNH+?bafgr3J>62iL~8d)4@dbl2+MYGRGo}tp@3x46P0I`MaXY96U0< zr8fn^C)~w4uwdhm9{=1P%U~}PAm!=zL%qFvf4QvU_nfn>!2_`C!oDPV(U!hdEXB-wtfDbQHk`ok+W5l)Udn7iYa-#;P zeXUUTgelf8qEKwC`*`()sb2ftm%VR8*8x%alLk$!x)0@gce030)#?Jo?f_bD_ktE~ ztiIM`tSq0W9_@pO6jcs00uowAj|EeB?ErX}caZre58WrOnG2!n7WCCcXFRQ_jJY(> z6@9Ap#?q%hc>l%4{6)%q(trR7Ek0LS~{TuFAmZ4$H*S>ij%8oodS7d;i9g5 zNf1XpbQJ*g#$e^aE>{kp9$DN8A6XK>Ykg8Ax+A!n+^g=6l%2jM3LZ1VIUskaIk@LK zQjcvtyxs^v8Yb^mR6nSKy6_bU8d)T+(IX@nt-PhQn_9;WWP5VJGJT(kUc)JOB?dLY z$>vdCbvCA1AKNVQx3bqig>xHFvS%q_6;GBl7ZI?TFoKgbn}%4wY)RNMp1 zIcm6%LSAizKEM_w6pI|=aC#!$O2BmMjJ=}OhDKSUIdl~DJfyN6utylgEkJ9l=Oykj zyT5YQlgVhRAoe_HKlACnvK_Z7zxWCI0_;b1rjcHER^|SHv*%X6m?hRnH+utIDH)!2 zO^_Y};*3dTbitoc#Bg7|5 zX&W~dA7ClKhj?AHPK*0fR~E$#&S&#$!7@n4rASC;cbmwCxl#h;Xj`#c4f~B5f~bhK zHbSy%ci}3lW3PIRkD#V`RKN%K`#Vs*uq}fSjq+B+ ztsHE{lp=U2Vp?ud#Ky&Z$tJ{IT0ZTEDq9)#K7{|xsn1fOx_~9u`?xI2L%l&dhY9?b zC8QVvz>yy%;7C~1rN+|5h1oM@J&)Aztx-rq0Hzr~1_}A7@YQK(_Rfs&glfl*ALH1$ zR{Z58lmzJfMB;t3NrmU}7c^{Z@(;tTZL2ajmkc?#O@R2jdWm*nnL*`YPXm_T7K=i3 zIufwssP`GCT^4d#n(1^ONY$D2G!~3$Fj~;(>jrYs$5M&&T~GVhajz3#qf>iTtYo^_ z5M3M~2AfGyiQsn1Kl_fiuF}I6mw51Ya(`H@HPbnwlJgUOEp+Wp@T=6sdof}c7F%qs z$*p#O393oD?e+4s>qud4aUH!MA_Qen&B|`PaHwC$Dy?NgJ2thx^uF*!y_4x0PnGHl zFq`OpvrCK+;0q?;z=C8;@UXW;v*|aCX^X;79+A9wVXL8Qs|R%_70n zMUKeT<9>-_y9$&72-Jr61!d_7j%dLv?R$M;#`7jA_2aDWFIB7bUj~)x`m4A%)K!{s zfrqfr#%QMrP)^c#eHBx9P9C>!)Qtu+D?oB=L~Flk!(3tKZ~J1rq0>68Q$8-|xNbdCjA^p8J|rv*J^o9m{7>IYgb?-yvh153%qTaEHe?@qBk=@Ub197y z<|TG3(W6t$#N(QiDfWZME%>N5n{oWT5^-5~yBXdu72@7@L0jGz3GaH&W3P>Uv-0ek(!q*mX`9`Sz-OZs{Yv1YO4Zz5zV@vqS+&G|JN(bzt5EJD3Xvu?9TNBUF*kH6FQPI|7|DGY|s#{FR;Mdb4n>1DSZfT)e3lb-&~fW zk0A4JwXA>QrwcF8cyW2*{P;}fBm}0<)YMPiAyGvSl0hkkNb$vWvs9|gm5_muaS$4L z&|71L^c+^rZ%&vVH#kG+-UNxTUDrY#AL3oVL6aQ26XN9JtQ=B}#i6M&pN_x3zoCLF zK8LDi3g-Id51Pgd<)bINaXh7MM;GdUivtb$>LY^N!)|E^2;D^jtAyn&=OW()aM zRaLNr(cURQiy*@}9-qWOi}m$E|7mkVrtUejG()5F54qvq0lSUOZ>{kkA36(o`KYXZ z8{H>EX|@rk8$7<0KMShd58Dg#iJOxO_vC$N7Ul_rN;914-+`_JEe1d18qCArKH8tp z==IB%ie4wv z)EdZD2jTn+`Z*tdDlz@|P3-+MO=`CQrAF*sSt!jg@o8A@v(URVPB8IGCv>58g=nRY5?z2(qGZ}MI?n?gzjYY@xc9P5dV1MmZ)6WpTbv9aVoD1(I$e+QCkgT214N^Yrec-o?W1%E%!O)rKMh=!&7F@$*-}|M> z^*1Qas|aC25C3?++@xm&l7iK$}o=%Y2vX_4G=hb%|E9DmGg$H=oxYQ ztKhO8$J$TkBgt@cFEGc{G?$9uQ=RExIz{8Hohaj=lb3xa-WuyQaY#xM4kygK&6%z= zPtLPS4A!1vIiOy;#`d>_16EE*C?_WOWWM;86G<&bN3ctd+~ZU-xYQoxT6C2d{rM2ham`Pb<70zt3RM_*tCEDR=g{AHuZeL5f6M<`6FuT73#S5hds$|9xE6`3T zJD^Mz7jCdmRGcFT==P3+sTF=jU8$~NfItMHz8;?TI?;=|u|nx=I$kf|Q|O9HC5;$H z`*S_W#KHV}ej6N|h1%f`8=ZS7c>6&5&9p0ahYZ5?Ty%rM2A%SYJ(pt63i>`d9d#y^ zs*BL^LVaX0EhlGhPZ|cnvx8ER?e6roJMudPes9)Jv6dX}us3tzfEGGl=Z-AV`Ybwde{D?n(u%VJc%^;B zZHwSP_TrPZ>nUU^j0M^ta2OWelFhcMKbWq6J|x?)wVf~5NMRaR;{7g8JG`=|^Z2{c zS!z;ri~^lg2fV(jGZ*7E09duO26rftR`C`b*duGlc z|GK%a+7DyAx=XJ&z-i~T_&DXxn$QD6o>|2Sw|#bR?lp_9jjp!O0#!5dkYb6Vnr^zKAU*7%#ifQOZ0~HWwscXyd`*d+{lASfcy1bZ5#L7EEPaz z)^$L^B9wvuEnnuBt2dRMO529GU&QsX^hS&EI)PMZ#PFR+Qs&Un2^!x^qZB&K9y7@? zo~@xiq8#s_bo`-8^b;+}o~eUy}hs4q1Gm*=A;cS4oz z8>ezaS`VJU^;o&=t#Yzlfldzzz*BHte;TEpKYQlUVj3r?Kq}JoH?t}xi2O1euAnpA zV_@678PjGTVi&y;tbHCE)9rPRJTV_9PLDJ}WR5!qAHVDg!-CO!7GCNmJ~)s-YcJ3& zZ>Kwef{1+tiJp@fKuR0nlqr}u5Kqg^(No77t3KtVohOCQHC0fW@huoCjCS8AtBvg) zJw2wdF#R>Xp)v!-L*dt$qQN2Fpp7a?IqW;|+T> zN)h;;<(33?sZ0b?F%*5iD|vJJB!KP8@c_ArW6J}P2%F_hY&_!u4{|Wnl4(R#;fNjt zE$|ZE*HR`7*|jCYLhZU?vlYb>=ow$Rv4l2IFTx z9-PjvISuq*Ng{909ihD__9c??xZuGMhmBp<3GGt8c|bGOv9nVEnA66mOkJ#Itrp-s z+uEQvh3+I4nz3wkG=h#j;%4jNKcoWo4}~gvR5FE5uMZc&1Gl@aT;~i9BrYq6$7_A7 zEpW@**-FuV%wgX$7X{1H8(ZtHG&xynpQ!mxVvp1ZNuQC(@JSy-ZHM3Bo${q<$xV1aU;lX%hj(-b-7d$Ybjt< zHjikWNhCHnBGlfPUQ@gr(><37QvMWS!$>SoqumC;sf)U+%G#0H7F}b~nKXMyixG|| zNBrqvQT4ToA+DQbAbz}wM?Q9d$K~}#1PzO@$FJD1Q+Rp>O8wQ_gXI zB^7BVITBE3J$vs&@H&=X#td?3VI+_P8?W!*FN#G+IN+=_e6%H{g%$1>b*bIeaql45 zn~!4iQ5zUD2&MVjo#B7vK64;U443XsC`gd_y)a97<*ouKYQzfEChyWS5yf(D9>DQFO zUK5NkOLc#0kx}u+KJogT7(tkWTU1wplP{#MVQ|sz1Jb6T-v`mw21JV%tH>l36Ravcj@XwJbRAY#u!Jg9@Q)R$-@ihj%yxz?Drs% zkF?!h%a{x5S6)P_zK`_FfZHR`g#z}sk@W?KT}+=oi?8MVax`D4gU0& zGR+0t*1n=$gFaUim8tEVgQ>oNr?jQCkv257B1%?2R-IBY<+u`m)@02?_4q+bu+FbH z0JjFdt05Z=hpGVbnk@Y%e|Uz_IzKKlGggL3ibUw6mDmWzlixl5<0P;kGt9fKZm7F( zF2g#7!(_airf}wGb%1xo6X^o5uui!f&Gm~c;!l1XC@s=xou}9(_d+?v8W%<{q&0a!xV}BZi+}E+R@urA+X)ui`3Pw zyZJ$H&m;XTaoX}g0XtjlW*U~&4hcuyq_Jrxnah;T|Q8t>tQ&hG# z^iHKkEB7))kPqh9Q21O{q+%{!VUm(F`i}Y3)HV=RQ=5*Yhof{z$ZOnOKfZ8|7z%2>|ygBV1saoSTDqLe( z+-Kn{zD;6rxG`vRotlr7tmf^0zUoAg_vOK@Ls%*;B8I6qo!Ug;@_QT^Px`5$E+w^r zTrT5l0wx+y%++J*;rvkA^u%(zFcKzNi;$zq?_Qq5J1R*vLp~oH#abx4>0v8@M|#dUE~M-> z7?aK33E#o91#60D6|KKAPVK9fk>>R(Y6Y=E2W56SrutWT_q>Omu%tFgDR_*vd~Bl| zz-Ak2hp~yqbZ@QC97*6=Vy$*r?EDe5q|+8hjoADp&O-6s8^Z&G&iRjb7Y)1at|0^E z@+^j?3>1Df?XHXjX(HqBMgMPV><_vIg$7)6&fc$${BIW1O33IC^zEurg^hpaqbl7X zPv!nJI^)@Vvc<#q;~$X-R9u*ix&~=9=LjUE;XUuPt|-DmJ{yeLAoP)N2Ed3hW*6BK zBPk-={zg%c*gflLh^ecNR-tEB^-TV~g2eF!gtBx(__4mmem9Azhb*$HJbl}%EY;<) z!t8MZxruRAocu%XE&Vl6fQ-gQ=Tf^x?llM_`9VLnd?7O&Sw{yViIg9T1z;ROQ==~) zBN&3T67$)?q{*$Pf0??k19gu_zd+@xa30oH``{?F_}7ZF5;Cv21*%bDvGtR-D)!B@ z`^rgG%q+i0$&hb~!|nJ$DkO=!x5Nvl^mSNomOkh7IM`?->#eaIS1l69_32UY&!N`! zA5n+r^X>Pm0Ro5#MnU~wDQ#T3tr~;H-`yq;1&$QLCtf3Fl+b+gIW$=iux$FMn(B9W z+SN>|@lfi9zi{19f&L#{(;EdWjx9_OYD5s3kaR=2YU}J0b(D4mI@x03c3_l38FAW{p==w{hU3`Tjd1JWt#<4m% ze2D!w)OX=d_~fnNN@txt_;ihP(O%!i?_N4E#z)EGbBdFFIzD)u)zZu%U6QQii4^5f zVFh;(814JBoG4cGec~s-5zmoE8kvy`-jSc;Hr+C`~j$TH;&AF4e@qIuSTf3&#neu?kOLUj%Ve%o6K!v!1x2G!GK#^`A|MGcaUEi zW_KpUGQ*zx? z(uq!q$Z78jE<%2vZrc5FKE1xbn0}AU7@E)R=vfjt&gTd_JoIRk&zlPU08j({cxw}? zPKE3~Zy{tsH<%#qo8!Xugn};FJjt_C=M7$`J{KdjWFFda6tAYS_k3vmdDveDowj2?`gLdFKdTv||Ez6$2J3-%&2L+-(cW#0R@Xi{mx z#IEjuM{ri6#R56Ain_N*sMgXAp~n(NdW)0PXdP4_d}-HO#<;j*DOt8>k@Wd^NB^Wx znyn`oaIGvH_Qx*Nge3{4Z zp0XH6n{Q=?7s)dv*%wsIBdv7k_eEBaX`*$G{4V2iPBniNF!DtA^D+iRn#}BcwmVVe zMNrzc$jT;W3iNs5sTt$8;tk$a21K|keJ!ikd(gH%cveV?)n7EDEMMU=m&8%8iH53^ zmXZX%(=cAxk#zqC9-QieCgE0qCMR$+>)P8?r(NrY#}s4yTOyJu9eKU6lo1aCR6#vM zF0q3KYsJvo6t%|!oin>|`!-ChT4i5i(t1Z9r?h#8Ly>A%iX~rp59#qIRn6^yCiY6{C%bX+J)zHLSZT>wov;-+6j=*v=Vg*tdOk zyA%6e{=BaJ2Uf89J5~rJWQY4J;rl->y!i!w0n%^GpnA|U*f8e~CzvH~_h~#&_W3m5HqvFf>{40&B zPxOgnd=jM8;s=q0acq%O|7v#ge`K^|Odx9P5LN}megBosB8k{w{oXkKc&Q(Lzp{=E z(|@-RY2V-b!e8%_K92Cqs5GVi%^?l|ehc0H@gWMIfwm=3A=+VySCqH`YQb2;^iCY`Xs)n*4;jOv#)Y{;$#e4?Wpm zkO2gi|L?X9f+T;7|2}aNIRCde`wL(EE13W9MgA+k`rnNF6I%ZN+O$UPx4P%nIyb|4 zdumsBH{FDVQ>C+=Bw-J>^MeQ`Mx4J8JILK;v7Fp3KBtAg0l2hOz`&O46#w6W49Z*cc*eFr6N=xgS7q zk%fJ~t90gd0>g{YgekW*!GiwD_kvs>?;R3?(cQh%Mqd5zu~$>5-4-u^`lOH?5S0N+ z{o>^RnP-9KJC-nb4sgV*I+gyU9_O$bek7k~K=$}{PE!f>IEl5VP{RsQX zq4{!~GC$3@aSG6UJMHdt14Ldu$A;};wQ}H3g)0j^SO_-*kDvW#FMx>w8Bl9YOEy=$ zk%zFpwIt#Pr>!{=u=PkCh2474EWZQC^ylenzSppRM{hCt-x zeag45Pk({_**gEHDs)aKlp5wEPe*$N{s!ykZ`&jAy31=;LyTyBt(qauX}E}-ppMjD z@4w}|TIt$!_*^0TpL_C}y@pZ+V(1|&WN?*I;m3U-tdRJ(=^lql?pq}1%^0yf0Y@Tw2jHAeGDgJ)OL!(VgJ?A4{JuP(I47F858}K@h{-j7z$JvmJ z^_W~UD(V$aO&CfG0jG(`Puc^sX4@V87+7cV+S9f}-7y>Gz#S)8)uax58Fk!{$F86@ zm_y>?IJX6;!rsDT{b2EPJV2!S>3CAe^oj6BJtXPQgyhtu>0C|tUx4XH3?GNGq-aNf zS39c1>rJ$d8D$U3ml|>U`YHgIWr1zMC11|7PngWJA1cofe#0fhu%!5S*{B7#mko%A zln6^02z1KU20Sl|@&2m%Y5W2ye-ws67u)@O-{X4BrM_SI8v^n?KW$Xn#J?;@?K$0Q zA-zFEsb`XtXqjzfhy%>&SgrV_%J7Ec-@a~zeww0q_i@Up_2Mb~jDyx^uH;?E;b~b( z7joS~pX#w25b?==(vj!z)h8w?O<+4Yo9`-MewvJo1#FxCwo1lY!tIUbNm>A0co1CL zy>KDu2AucQPN)zx3nZT4AKry@_f^Ih;>NAsuu2}Qjk)SwUgmkWeK+QfSVOBO%>CFa zW&5yc_O8|40_mlN6_q}2&3pIV3ZXSS zTaR(MIJPNUOp|zMGtJ({L1~M$cGerOMeHpOhc8OK`em$yo<_Vr+9CPTTtFMrvjS%@ z;jT9dj*e68q`EN6`azCT%k&z!~}>~UBGDU*1mqXVf@Mq%TDHW|K^-f<<~|u)Z(LP zh$GOB-V2+AnoF%`qWcG$n{*h_0=$i!KotT}%t?#~!e+PIgV|#5*Wu*vMWxOB=v7ZU zk3VRn#u@Mzxow>f5*YV5&H!!?KWf>?fupyv3&SC&7sP{n7SrORniKTYWywm4e zdYfheNIimfSRB#IwO15b4`Z@9M%fD0k`rjYVJTyS5^@t*c}ZB{Dnw#F*x3Z28{m@ZD5#A5X;C?>g-yInfY) z2>lrx|DIHL+P%V%S*g|mRqiY@!tGRZPq3-zs z%pG49kkXS0wvl={tLdV`q4R5V7PN;wazrcLy&v~?l?j*QA%PPV=YDs2>1ZN;mm0&4 zTk%inPUkHy4!9|4i(wU0Yn7+&Bl1J_ASFeD*Fz~KTeIVP7K_ozGL@pZSOS+3hXC#S z{uh(?7h9YjS)51{3=h3xL)+v|$8OdPXvW(bU9tG!f>}E9_;%*Fp`BI!<+hIwZr{d( z%%=*42#GC7HnHoIAX<&W49=r2%N1N%C1k{l0U{3HQS)*A*)eTw=yH%(T{z?ZUVYf(s&W9!7gBl82 zIVo)f55bW_qJ(WE1w zS8)#S7@AZFoa6Pt39sJA_gcJl9Fzu_Kua}J67VEMXz>OV`j4%u7l=3j@~Ae%uji|V zIcl|M9F!M&n%$OTxWHXM;xI@8T;jsXDlS9;s%I;o&7D0dd4@Owd={By-_9UTv4G*8 zHsfk7Iek!sB)#3NdS1iyP%iVA4y@}^;qUT!Yn3@AP$0~GpLDZ|E^2CozsR|UJMVqV z`-Gr_H!K9VkI$Sr(JJ6-_c*X(vNG&yQ3^!bb~xiCBx)6Ys|I`RP!}F%JA&N(nq+-; zSu%gKkC)s()H71I>_0J}^)+NL))6fYWN8{aW1T;^Fx$53P3M_Do~JaXKc-ZSHmX;p zLzeOHYaCgwc$Wzcj~~sNjN{-UH>UTkbPgLUCNGuA+E{Cpw7d5B(|JyVKgL<4yazSN zS(UQzzjPRE6dVe%54?i}+Hbm08AhOO6{@x9rP0dXc_ zk+sPNO%Z#Jfwc(NN9l<2}|uHwg}r&H<`|4UhM6;Ye9qkAjEcGW}c(P&D2(dyBOWh z%2tcVW9pAC^V!vyMS5pzAh}+)#euu}$=XqO>C{gGlf{cO#3xW4u3 zkzDyCPc{5;;2VW%hlD5mJ^!jeP_^rHeca+h%j&kso2f04H)~k#(RYMizDQ!6b3xfL zWMw}t@Jw5G<<1x|E@=OiOMXxFB5;~1@wUBG&&+L;4omB%-O=uTcj@-pbO4kY2z8&M z8SUDjwJpEhQo4uLfMnKEnqaD>+0=LEN?}j(ol*&^IFvD_(@irfg_-4DKnRp;H&hgg z!dN(8rrFUyZ4G84zM++PPOL;QFPe8TK{d#xxGMfw;Q7$(mKtjpD5by513~J9x_NbC zJ8%00Lo?KOmv{j=bE?|RLGub;x}o$EA~5k5vP@Ps8x9F7lFtyWd9rG){i4OQwuc5T zf#i$}mluT)=uvw5KJeU6X-Wa>;b24(aM$}`Oz-S1?~GYKqNXx((NU7s*m`?JR=rD& zDh~ov)N5iK+p3o9OX%y}y+OT=5>L4aqV`<1&MI47=aFnK6IJ((VOz&u+{?+4Aol@1 z*lF7tnSQkYkgGDuP@|^nh`M$E9n)J}cdG9;bOtz)^3+Q_v!{cc-&OLsym#vWhlzKq zjl4A&!NMyg0n$I5Y8+X)JQs`CzJl{CUF4J3J12LoV9Xo9&lcPOvtN>w-NoM4A2Sn$ zYXzLTInw4h61oFF?0EzeG(&!NLQP)w-tQ9D%i7M1dv2-_J-SgoR=4LL?P1V7*Gb7# zhBaztgS5CZlaV)l7MC|o5I95Ws@?=`Lh5`nT*4(EStbnY9S14Q4_V;B3ixJooflP; zdS{%8ltYl@`fM*XNb${5lS8B~ytxW(@wsH91DVyt7t14Oq2!NT8~`|Tk?$t~H*WzY zlGs2hOoOoSccr~4^sc>hObRDTMd}g7?$vf*-Z`XJiBGGbKkzW137mw^e7Jle?%X5@ zu2vsgb_^+kS}>c8XjFQg-BMbZ;de{iTp}$Sbn)&~%p62}+O?KUu9@g_VooCs8l&eK z!I>||v1K*d-wbyA;etUi@uj6X)A#!+sSRAol`UL@KAq)(pq=FgN~U<%?7oDq!+9XQ z!7^~Ag}~kpd)L9BXW7pDl0I#OgEvHsdsKhYP2kf-*sQh7#$mqXZDi%B!rBI*+-{eW zs_QU9$~2EAH4^<}-GIpsg4zt$>f_Rmba#-iUrG9;Ss}Un3{yr>chDM7JvH0H8dC^Pzt+4Li+tGl2B*?N&1t;Uw;%!G+F)0(* zPjipD4I-}CxA|Ib=2h7~D<*jF(4DV4iYpwV&DTI3kla}XqI1^`6}CVAZB@qekweh# z-`niyF*&u2Uu9r@qy>bM;H9P5wz36nP|^Uo@l#2d3d7(OG%^C8TVx;72+RTiv;_&phyQSj$d3OA9Ij7(Lkx6RzTDDu_BLNiWHSTRaxf?x|cK ziUOSQpLb$^<~i71A$Eg-wtjw6@}fX#rf4S`|1l_#&H$sWjMb$S*?T50m&(1MjwL{y zvOy zHjCpH`n1Kb@@5WA?Q!U?c}K5K0mh*O>&U${a2~V?uO}y9ommyV|6^&AU-p{C#mRUP z5f%XZHHb(enc~YC1zRI@N@T9H+BwZxd65zsI?XD#%2uR>ZQo#*MORn;_pemW9CBjs zNw#?=aq(1}-c+$9O+V_d)=68rGxxwr9f{RQNIDylB$jus$C0V)m7nbk* z>9&B)*|3rk(cYx)$qYeTU^l_GM`X2x*l4;By^*~b!I&RNGeaM?f#xLg@(ei7CJ}@} zNMAB7x$zSG5N}!R1>YC!9=reWoTZ$i<5fVs-_mOvQX;B7Uc7kX1-sP2BR4yNtRj~r z?i^7fLNowNw8x`E1mXqQPqUR9kr-KA(`I7F5R04C79;0r!mxwdD$d79XUNd8_xY$} z#3lNTD${9^FErwg^u4#u)2kuhtRLdOQNI#=2mzCxI84^8iV8KG1)VX`H3_4gQ6J~V zk%`7~5{PR1E*b$~%>0x2geYZ$v$0;$#x8D#;;Q}5O%KiFBRytnR58cG;Mv6I+w-DFyLc|@ZhJqpSeMbXQFZ@HRmoZ zPGIbi9GHSaO`dPk7cW|kq92hC;1QdPtNzZBwFmRb zUw2S_q58QV6XlE{Wnn#59{p*pqY_64+>pCUEpmZ$(J9R6+L2I)+-XI0(eM=1$9rL+ zf*3kFkMJv@lISH*^rtVq!T4&fM za@vG^T+TI%w?xsqlHw6kLp$0Be;@)F$(c7P z!|zWcTubI(etvu1PiFu}TT+rdE*aP&Te1P-8j%{q)b;F(+GfC&N{fMx3a+!mLY+faz?b1?6l;oe zG-YLZ(t~7Kh#k zqc_OqmhPSru@^Z#t(RYx$5aeS-IdpxvKWLoHMg`WC9**qt)ETmfdlxi86~PJd4A0cujT96c zdc&>^6QTgJ0mWMS%>gFt!+D-q>!Wo%zG8sp^NQ$;qGTWe-$_3@YRVy6Ob_;n@)wb~?@3 zg-m5Xh;69d%-FUiUmd{ANF=*RE+ukiE&IoiuBPpTc0X?TUgfA72(Jo?5v?OJB9&8_ zGmnj#tNDmosmocbUyVXtovwm`I80!~v6cQTHs^7}2^Ieq8zMpINJAK|pcgii+yBGf zTZP5BWDVPdK(GM8B|*}-LvV-S9^9MYjR%Lu0>K@EyF+ky2~Kc#Z`|GG>+G3%_n!IR znd`s4gYRh0o<8WVr@CsbTD9a}ufe;E=M~ROsvpwhbhG{HT=bL9E-|y*elhE!i)O9>&>5Ym( zK?7rt<{r|(q}K&FH#Gj(lzh5-Lqdp)5=xTgb+w1Ic(3rdhAEIV|4!Ff^KKy8r`$xB zEIW)=-V8l!Isz2v#=9Jvq#6=cb9GhAl2;v|=yWqGAB4^YVI&ydvh29!NwOi{Q)N!-PcIb&@-_g>GFAdow5 z#UoPKYHWxxpr2kzG%?4NHIjpQ`0)H!o8jssq`a4QmrVrlgLu*}_cL6XkBCjS^0TIz zMNs73B++{cg9#ywWXpxKa`!8}Cm-+1<+vK6l`Bk7o@s_YM|UTp53hRqca@NVw%;m_ zHiJMbqc(hkQnQP=3i%`(;QW^F>mpl_0}q#m9i3*+(lX0nTM++A z)E%B4@Jn;RD3mvhr6=T>Chj>-7b86{D+8OhQuJ53+ z%t^%a_od_o{lN*TBM_hs!oeHsU`|G!n)KvR*xjkY-^oz9%MNWH^Du)t$Ms}NHdd#+ z3(us!FCdwow1I&X6CqbcX*k5viN5Vl+>wMg0|_;P`7u$8E>1nh3_dgn${{co9}jei zW?T;B5N%o8h_@R9ul^o-kurMX8Ndwu~#Qoc^DUVs9Ef3M_GJXfDoo2 z(zXy7m3dtCmj#XN(gluAOeW}Sf&36lkP zI(@5Zie{?tD;mb&8BX;W7J>9s1a#4rP!0F!O}dU3i*Z-PCw~BfNTz zj-0~In$o8LVHBWwa9;*}73QDTu!<9GE?%?UhovY5h}$E~kC%jdZ>rP|B)aSBv=sYE5r3ArJ6Gh)}WEEE#JOyxo~>Lb{`xtWenH z4G7PkM)7xXdQQEp@YFE8Zx&oA46MBf+KT2GtJZW!JMT$rmtFEf*(Ns_tf);)(Q8b^Mcs0#*Oir2pPCJ!iCKbby&fR&69eQB8mE}I?7=u(}EMCo`nPKp?C$A zDPBow5vwX+N-I6l;$DQS&dL?Jsqkh%^YT(7DtcHd;8GNZRjk|`5c-%SlEP?yxY&Yl z3x5VT4Jy|BX%7+CKP&UU$8H{fzQ7B;KfXPSRK*$!MP~(T-(p3__ycCiou!;4nAu&W zr`rkFaNP!nuZm^5vm)#i%bXZT-I-02JLH(5!B^eYZ?p9lue6eBFi=FX_cq@6#py3` zDTAtC(ds|!dvc;`pIM8yH0vzhtzw5ZmNkg~Or&M`?5oZ;US}=r45`Yr(yWSlzl)n* zMOLzYvPVU*V_u{yKDAi3+{5a%NDFBrz#AoOOJ(oS7@+^UT4TETMRR2^SpIzC+1X3t zIt_BLQ<=lPA9+=!5krD!yrW-5r>KBsGLOOOzi=2y`_cFO#kp;e^GO5-RT__rSwIAMzUN;w5Ha8Veh7=|hmS29Hf z9JB&-&t(5{6+sh9R6#fVZ6&O=N<2-qIwZJdUcE&!I$+4rzsILsAe*>>x^y|C*|Xoo90>B z>(QOwC5)B%H>^unOXoil1WDZ_&y4{yhC&V4etPT=>M(VmA2YzN5dpG|K_$nQ!(^^h$8j0IwTBYV8V_Cn_EQ9ZPUHD>7 zA>Cn&d8>}uLGdMm$UtLAj~e+WO4v0UH+8>-(laH(4AkF1?>g>m9G*ROS23zIEu3k8s zp6d#S!(zHl9?#^s+-cxx4?}9w z*5K6#f4VVU3Jpfh6WXqx3e4{6AvW?ciKb*!iwH5i+UA9s6@$>#AB&nOy1Wn5Q?(^ybr9 zL_EFDLcP4}W0f^IM(1W+pY)OF5AuRZ_X0eYe~56oKVaJp>;BA5;oI2!W$^@Wbhx+; z0Y}~I&x;n3%{Ja;OI-eIsPDc5){ z72T(*8>IRgrT$<@duZ_z5GmOOY!==6L4*BL?BX3pTi*Q}!C2yE=c)a{8AnhZ;yUmQ zBj^XGdKN4{Jk>(;cDNl;$B2#_$EjQK?AgHz12H!|O&x84g3qr+uj*kR9Np5 zdh=d#4#y&+g*~~G^q0vAB?{o+Kri~ygF;oBoz?@gn6G;K(F}Qqd8SppVK5@_7ZeOt zO5Cv_|FUB2pgs6=C69x+@C#_oB;BnzG`&afeZ#^RXJ2h9O$5u>8nlE>`SWiVlJ-1mxn` z)59f=N1zJZ4XUt{U%9#B8tv#hEX=kb5zqhGmNx7W}YcXd^BnhFzsetJ22 zyzDfrEZz1ryK%S_N^{Pcv^$$JRMUHLiWG$Z;X%JTW+!Kr!%ytnk)(p#EZO`}W!B{P zm;zQ{N)Sh07La^DfB_ikO)t0jy4rPku6s#?g`a5Qq1s|P>*Xs{dGT#7c6P(vxx$W* zq}P-7(}WiLbHj7(Fl!C@I+I+)4R1M^(8NQh zA3I9k@G1(d3)W)|p51m`nZ^BUn2dZiSqN(O)iJ}*TDagyHg-TuKR&MTyT*Y_JvM8# z#XMscacu=!^HYcUe8$S~R{0eSiF=`2FQ`4lv>bHBopW=GHZ9oWhxGDowgqn;XnU;( zl{;tEQG{8~nF_knZ{MRv3Z1}psGi)N&%8ao_i7E6Q9nCq7npZ~p`2DjH3$^WdHl|$T#O6aahw&p1xHi`F z2Ja*<)t}s65qmKIDk|-%@!Te^*C9O>nqgZQ7iDGg@HIeX8o8|D$?Ln*_*LPrC;Y%b z(^0g#L+K!$WTGv-K)UbT-I$${vWq5mauvY zw7TP{gP=mJB|;@WMc)@6Y^Do67<&D*x$nt(sOJJ#qgQekf6Nngk^1uK)=CV3$tl-0 zDlD(hS&tTrF!%)EjX{_#<&r|@{N)VhdQp<&8q$7%m!FYK8A*NkDDd@KTcW~M7u{Q? zJo}<7NRmODO*kXsKAsUM^zhP{_>x1KU$St(IC@V> zoANZK*p_`s3nE=cJ>$&4R;c~=BPx{yLbBkrN}{BoFat`c zc)lVMKovPMHz%>+&DAeLMtQTcp8V>Sy;SQ1KeH16Q@ef@S3;*!d>XdwvWY%_lKp3M zH%d`lZ_$Sp&L`m6Nw;l*Xw0um63}9}Nh?|m3m7KqMF*gqhR#j&?W z|AS@+Ewiadicvd>zGICrtS7iN%BQ$^)9l}?a+x?jy2rV_&hF6C_TGO568@4->ZJen z=1tPmuUlP~OlzF#LW<|979`Y-`^P~@PgPkYBCxT_kL#h^`DO>gXBoM}qW5uOT#uvu zq>I`D#Q6{ly%S3S^U!EqVv*jyjS`we6pdcZ#ejB`rs93TRNeIQy1UbDCLjCg*L~#m zrYM+{t$2x)Z)#0ZrA#YLVSIptgrcT!xm`y_8ZTjRe!i8#g$5TIu*b?7EHTvKJ*8};% zBN(BU1t<-j3!Qa1pK5z}Fg8i9&_vcWA(s&vPZ<-H5H=1I!nP0C6EVN;x!246NrMi0 z?7y)eI+gj@lWfBw#*OZ&<{LNN5i(R73$oeioZtPbeQS#}YXvXxgYWYdjeOvKIGd$+ zmSBuyxypMn4;ar8P!8JR>QJYRiu9W&SM77ftXV~mD)U0an1cK%%u9zB?J+1ct3eQD zVcNP2VMXhBQhB~6A`I|yTgWX9Po9C2+H(%#9=S6pb%_6WmDvJ};HK_#@}>`kV{UoHcSmG7<>ReMdvJY1*8!l%H?>waapN_jxj7i0-O_3=in z${s^?V7c2O$R-e@b|}>c2O$gzv~?_?y6`K4aJrQ`D!`7RDXYnF=gs*%%4nOm%NdhP zNo>v7GIze?hF2=n6sqG&x`$)wot)NJVcXoB#hg-Os;F=p%w5n*oB4MC(=P@Bjr&+i zS#^;J32{LCr1eV_VDO8hzi#%JXdx&bXG!AIzVSY{sOo{%=quZKXEyd+{l}!fbbA?^ zM{1hf%o}boUk=uJ^Zpi2vfLGPijs_ZIqI1cr9-WI^)I)UjA*8M9sQ238b)3SMZd$Z z9oGOlwS_Y`D9_W=Qp5~eK|pObj*Ao{M5+VNWTl}c-I?pQEcAqf0gO4^uA|H>EkXk> zT;m1I=Bg49JtKq&qkuqLUu*1Hlj>Y=IhjL&txjJ#}$tDiX@ z2cLv8i+s=Tx>RwFS4-MK2VbzN;_rQx%~|V9+m#R>7lhB`czsJ&s&0MSHAmIr`#D&(eA* zGE$FVKEsMcFcF69R!HAOpW-RKnnFSl`UMkl4KpJAMC=F<|a)-@u$X_48;9I5fxr0Cd}pX`LIA9h@7Vj{VTr!P8%#>!2lc1a54%Y<08_4m7OFA;w6S~BrC{`rTTV)Q zDPJ+Z?Gc)Q=&vu|owuT`W(x}Up9#CEcK^5$z>50NCt-BaKwm7s5`I1vGFqtp;VyCP z>(fDzJz;Dl{k3M+e6HJbmJYhzMxibWGSd)`Ow1Ow;32yV@%|BNVk3-)_S>1J zJ7R(dc8F7i+n=_%5bP6T%*PX+X9lIb)Ly{E6=#(E-5APHd|4g)H32Dc`fA||InwkV$Su* zhCHIbTsHDK?}@DIwPh-}qBhkrh44kwKX$n-Nipvi5BH;B(eok4K z6oiq$Xc^659e!siJ`^c{R8FA_*3em3q>Yc-DMT!eKdA_}6o7Y@cYC$!-}0!HKAqVe*AuQSbo%vH(FeAI(xUC%g4vzNXhXq(krfAF`EI@efPLic4m=C zh}JH%e^=y1@>78rEV?k|$70xfqn*$q!cTwxWI7OiIxFKAd*yMu=QfgM!Frpa7#VVC zp~M3A&ef8t@n#~upGyNje)XA|!b(`)HPtZHI(j7@;)U=Z>`7>b0x zoqzJ<%}kyC|4BrMCJuW;w^A6R=;F;abnP}_K-!cu<4+}+b!%+Bdfgb?-Ozr zu$WWKsANa%sw}QBd|s279%H%;=7vz`qiIt8Iz)rtERBZ4we%b(#VfeT(}~}gkYpc$ zyJz+zTpB?dO*i9es@X*Om3DSW<{4>I{1vHoko>ozmr8oXVJ5^tqv3RVANAH4Frt4U z{tnrC?;{M!4xbBQ&m~8q))h|s01l_nxE$dz$+X!@8M1PTPZ5y`(>sft^lHLDy>lvh z&TZLL1$N9nzf9$Ag4e}P08`^uE>xM4n!y+gX^+|C1$D8LO0KxXlAM5K4g!n8hTvmv zOd2(~=*^-~7S*1F5|UoY{vI0fesP*~#!ZPqaLa}kvAOPmiRLsND~sU7=h81lv1;kwrDlCM5NB7gU#}TmlbkQZz`KxmME2L$an6h<&LiSx`i>n7XOj1(Wc^S zy=RN7hB8wx44}|h`z1b$a}jrn7Cwt38aTlxid|e)$a>D6-UOWmaPu&)UPi|9Vdoe3 z&Ww~U6z$MBkr2q@ahvumij4;Wm3F9yGhPFJ^=NU+t2-^ zv3W+>-ubfg(E>4v(BykNa^wxAq&Jh)?)58 z$`N_)!_0fBSYmfgb~XH{a}nj&jdlp6I&cWw_nJVyrW=qs7@CccPsdLT#0?K@-{Flh zb?yc*hv9fj)d5l6;;O~z?gP}6sJHRt_zlbI99w*5jDw6UK8sgUR2PLP^tyQ}tHLqe zo^GwJ^mar)y@ugDVuH$d?;RKnm2duw**G|9Dbhu#eAh#ZQ`V2BfKfo);4cb>pfC}) zlHV6?fEG8PS#*|=yFd`==@gur_NmAsm(<;U0^w$JgFq|lec&&d!@X$60(+{-f}@VB z=b?`0P2jkKMU?LRm7c=u&cTXo%ePvI#x~vasQ9dOa(ENPck`}8Ws#yTX0^?}1SOwp zDbGHiRNw3WP(&PzbF6)pv~LNI-7IhY@!@fz*d&2r-x*a!;Ik%`ouHQ#Tt3cp=HvV) zfp+(NMExElb_N3w9FrtlXXSDWQC9L;{MF}4w+FN}#>iEQ5<%_7w-F52kHB$W+iU6L}>%KG8F$jk}4WQSqMlkan#r_>OmE{*hvc?Q~XbI1~ zV`#$k_NAg+n&{qQ7kj*)VZdEtkiq3tjVb{ws#vttC_3T0{Oaqv{gX^#-q4*Df-v(s zA$Ka)vAlrM7wYuld}+S5aynekm)4f^_^Y2O#`G1W9TJ|3xOR0dBM72>Z`8)> z9q!)LmUz8PMl*+Yx9kos|1kDO?ZS}4D&rkz$bR^H{`t&dbc3-xNB#||>M;=AyyUR+ z5jV`YC)O92y0=^pa{(HSp7}%k0#aG}cW%flU@BrS?7XW)@D3*z5|Nh%dVKg%mt8kw z^<~o&^}chWBau%Scf79F0KdoSo2RQD@q=o7vrAQZ^U2th@g5$BpZp+--8UodEm#)0 zB||geaP+*|syd@OLFQuwhOe9@Jc5vUZbxK`m+B{4k5Xn*LF*GhMR842q+S0&PVjc^@>55I8}wzBA`IBh)kgx^sEHc~iuSK1T!&Qv7JN z;4XCw5;xn!skx49=f@y<8YrjjX;Eq(`B%xoQ1 zJM%IxM_W(jxSV$|pQ>5V&x?=w1hT5-Z3KMPjx~r>+xxv8G1S`#Jtr$L%+(mW(({RW zPR{dA8x8Ka9gPo53;Qq@>{n=hzADmlZ2`8UY`3+4HEXy^(tTW*>KgP^JR>Tml_zTc z8tc`%K^*e%fyb-xc1h!OP5e;lq){=atc(O1IMW+ZB9);62Y_Z$zdB%Q$#h~|oWM6X zEa%|wkcn8w)b9t!ey>}`{KlU=J(m@us)zL>SgebX?~1^3wo!LJLC|~5k3nQ{JeGDS zu2z?>Ct_hKIAW$5nhID8{N3{xTu+`gtE!`=O3xXC-aFk|jJdfgYur~) z!-Yj@wnrntD%6*}i(zV%R?vu>H z;K84!GQHS2tYSjKOgQk%?DH>`o1sK}PYnbG8C~=*xnFTL%X0C5gJ!K63pXY+cE1 zI|B<%7>3(6PJAcMGc{M_(iS~@7ofFp_g@Q5#M#^0iZp8c$1}nZipmWRQzNPAknb!j z!GV=Y^+qMt@c}`BkrC)7ay{moYi+9Cr-TkXA|3fdTM(yDq* z#50poH7LxwtuU{a4UFXTsO^=UY9%=6+b_cIVV(2EzPC^-y6!R2iAC!1y5=>+#T;)b z+6`?a=sECrqpUD2<{G!gi^@v7f*TkoxsS2xtmIB;E5enQ>^W_YO=J`G#yzZS%_F}~ zPDaT)y|{A@k$)mTU6txkybM{zQjjsdtpyFi}2_yUaj0tiCc9pq1*5 z(UOKUHo)M^qsFk-hbQ1U%Fo_UV1~O2XH0t6aUEb2$ zmr}Y>ICpN-qu2I0i;oxLXV+iRWM)pc_!bgH`$;s<8t}oG+Z+}}f-N4zgfY|Q#85#r zh%0%bL^d%gS(*!2M^zVX?%anaD>da{oVsj*P|{kwWA9HN1)aS8&=)HTM5t1K<@bxsBH!1C|m zR%ashao3@74*Eoc(tu$}a~}S%&mn1*PctxOmu)H2up~sYmC_MF9<#?)`EkoU`V_Me zI`Do~rNF2UCwXwMeJEp>b1Q-f0=f_@YnJF?$&Lo@bn_jh0GCM~P*tA8A~i5GfqE9>BrJ&bfBeuKcN-Q!HtLYFvGS_z|4@5DWKui|1Mrcu4mx+6N zMP+MjRR74B7_;L$RQGo}ivcxu>1`11HZ|3TqdQc|9@3!5v-y|{Zw9yi7QQas?XNlF zsk!;$VeO$%2W?m{AFq{e3r2 zD-4&}rOlXM*{CDXNzC+Q(eyNGbsn34&HWW-N#;heWI)Bgl+_E??r+qiOONs7nZ3ok zsLgNV3eVjq{~&kT`+;Y3m?z;RWGnFObZ)NDo(u?0IYjKASwVm=+c%X&A%DZ>Es5D& zEo_lBGti`mN0W!&R)1;gQrMYY0GzsClN$X>tI_>8pOEIq@3!P#MHwT|CSXlo8V8Ju za7=PAbd0V3>N8e9&&!(qzez+aTvn&{j0yfs*bq**m}RmBKlEzG4)2FV|Okx7b2$L$quWN7zqp= zPAqd2;+(np>}}bYF$t8%PUZC@3s8VW(=%!1Q+}aA z06AD#{@A*0)TLqk2poogY@8{5xO)#C89mAcG{jf$mw}f+qQA=AzpVs+*;*AyxfO@g z({#gLC&^06axK!Rz-q=ROX{{ON$q1<#w+U$kJrcjD<>-^@vFP(FPcd7t~>O+B0ymj zU+2j+(RdZst*0Zw)F z$`z}ga^>>ydOkbxPQ(UTL#GzqgLC zpc*MJ+Evt5<8SePsF;7cAH{I++`+)cHhb=q*^Yi1wWk|gUei>u4R2crKH~Q>lwf;Q zn|_q$n0YMZk{`!{HX;j_`B}HBYWtY`zX0o^WTEAME`yBlWhV(#V1oB>0L7Ew$RDTN zp?jy8YsN^r^F^({!YQqT3NAZ$UfsAaS%AiwweaeSC;j6<#$uM5Wm@(YE8t62wNDy> zZeF)=Pn_BVhxwOOOi6(zNY-GoK0;u+g&U6DxlWtKQhxN_C_xGE{bSzmh&7HMvd

ZQQ$QPVs~$+ZBpFMGDgZBfaZagd&!#%2;C5Qnp4 z|E3nF5_h^6GznKfda~_uIoJAP=2&U&OZsn9=c{{hDl@?`$ik_ChwO1v-*dVgK=;?1 z!{48&y`b;ZpKePHhJq51g9Xa5X8yqfs1}A5{*6x{FF;l*f#xduQ)wB+;u_~#-L*nn zKf+P-pCRvDL0ld-H{n6#7WTWUF){vN?`c_@b37y`&FOGL7DvkE`G?Fg*<gUE#)(F^r#1^ zh)JBS*E1V=r z^5;NjM9ut@@9MU<^8xd$iffKB-3I;+I5y7$pP)-5{_(e|*i~qkKR>F4JM@VvuvF-#4xDMKMT`rL5F)6~lfMvzHnh5Zp)4HIgi0LD9LRwCKH@76ssO!pQ#%o-w|4B`qjRYkiir_UPm3iMQEa|iSibya`hWfX%i z&7lJDme1YGCXc1K01xpI@Fzv!pEOUP#&11nXb%13d-=u&5Pvrh{YV+z`5$&i0o(8C z_NqW%FTyn|9s>%dQTLT45BN{8O*1HI06F4E-e1HBF;NojKOri|p^R!AVr6KLT)wR! zpg1CQ?Jzz>z57qJ&D`H_&zA?y82TR-4=@67zy7Bg|I^I8rGGakbdhEMp92fp?5j`I z4*#!FmVbGU_5ZDV5}<^xH5P@74nj{B>hGP$0wsrb7^dwQ`~IHV zd#qpj{~`x}{is6t$HEl;cVYf+N&I(V{$EH;+-gnf+A*1t=zkJ47$i{+Z|{{yEt1u6 zU-l^eQDzb%_%h~np+*wmZ{0BCKBoQeW$wjqLxy@^_ziVe6(R$#l$B3`hd()kNs z`25Aw{nr}XuW*2rKlI<{zZen7Z9&CJ5ANLd9KRIo+6j;zA3pE{I2?Uk=__v^^mNY-R*jN>B}i|;;6fla~{WRZT1@Zq}u+fJoKq$*)MoefiT69q)( z{LD;{<>kJAuis3!iu^LM4kxPAZhCf$$#Zy6U&0Jttnf7r*9j#Le~h_O3lTwjsc7%~ zCR_t-0s~JiQ3zghm?Z}5moaF@h-sFDg&?|L$cVv22fRkrwNL!^Y0yjNqti5MB-P>4 z%Y^$gwUmqM)nI55DmZE_m2CvodvgbGX>oE^9^a)gEQU>2>CXxd*HZ+1#?sPo8!5R z!QNZDFg~goNrBO7s&P|tKkq=Tt@h;`fB2>-p7%ue7L&*>L)$@-OIK-Qk_77#uNn96 z!*#alwitVB?T<2>2%UQl2s%e)TSyl3GlNPdwyk?8;+xH?B|Ch6cpO-)Z}`JY&bjNr zZE48L5$!kUW4~Se^B(Da93Vuz?6wAH(%*Yca(7ourgxpxSeT)zjCa(MFVN@|c;~s; zoCah{>-kEJ_ zrG`nW`v;6p|N6%(e+tQ^`jN5f^x?=%fCxf-93ePggJRm}b`Yb^+wUOjgH){uqx7x2(zr zw}}ig%)4?B!qdj^kFyFv;VNConfTA$^f|H`M-0~Y_%LHwKW;NM8>vz;%Ay=(_80QRTny|{+ z&b>E!SE6L^dRNBPgP3aJLlawIg5-vJ_P$xD(<_-Mh%Tg*VER=oyF@N$H1Rci&fcEf z&6I*5c>Xt9UjQj$mf8nO$L%Ye10n%bvt?-Hp7eU%L8Kj}Zld2uef` z;CB~(wFQA{m){I&Ewf@)1>C(MgAMI&vw1-{mk)bW7D(+WhPfdBHFe!1de1fp!+RzR zUCl+@*{?7ihEZil`8~eZGPHIEte7V-SkP0kbMq39@s`j*AA``5c$G}6>fo5SP%>@Q zl$;XPJYWzny>pzNjHl)g*r4x}0zU=f+2b6JH;?W!xL4WJ2e!6^W5J3VbrSI^QD2_^DS>i!ZV$un*`aU<>ArS47=NYvSkgT_UdNwXF`4r+9KmZ~5| z^8G4gjMp4orrSw!W%T*-scnZXpwx?;{|`^jnHcas-tbH1fiz_rBOqM6ViU8S94$?n5AlFKb$}*jtlG(4J2{sYLp8grOq02gp*#C>3~eTP^AoMs4JS zvJj5Z^at*>Pc^57u_sI4?+dF+$S%M+G@m;;SWWO{=)rF*e7+CRu6R6e0WW}|yubo8 zvg$SnqeC127ww^w)3REF0f}^oDDfBjZ(E>AdfcVsrnfBYXzv>)gm|NWwLz;BzomNx zGH5%m?%RJLie4}EpWRIuh37wNCE{Gc9`$f{{*18vd?MoR%B()$PbGEQ<`HnUbM?Jx ztWopwYnXk6r{8s*XXwaP%H+Z{=uOxbs&@9ol5;kMfs=RMJXCPOqk}PI_Y0L4=S6M0 z8wkOPh`r;no1n{i-^afnVeuOQfHqevYy6vw*Uam#9YgxAw80VEywRPG%5*nL_r=*808x z@bG^Qlm~m?_A-2dfr1}mcMvJ278?!$S}n%G=7$?EE;Jx7s?CKwB8!U`V(KhnZ!E|O zl>LyLz2t?B^KTkwFKQBVq+~8*Ddi69x*q?*P^1$KcgU2t!a($zg|)H?shPQ|ki*`F z3^RBlMe554e_B83K?aXNV5?#O>_+TWiqGO zVc}Ka;}O@^wwP2M^)VCGbF3ek1pB?v|*Vcgo$ z*=Z{u@ecC(!X%ct2sANw$CInfR);=R!ZNZ&G)#YkbP(1b>~1o)x9Zg^QcpOVV7d)Me9 zJ#Pxhx!J~iE}SV1h!Z>tH8H@^ zJT>|d z>l0jABl{O?pv>gO%7u%BNxBv$>G(@D>Dk!@~2<^lA+NON(I4`ER~Q3-NEi<4?J7x1HHIA z_UyteRZITrU=}4Duv);RpvO}%wmO!ZHHi;aGqNrCn)`s;Q_M}&Kk2Ny=l>_oP!lKU z0LchM5dPLXj4th? z#kDEXb%PI<%Vkjd+O6&p;Cgw<<&r7FU`Zsqe2o5r;_uwSoMAI(neF8zjp|eQi`H&! zjgQOkn|U^o_}YdTM!6PXm(sOD#j=mCaOI)2md5S0_6tWh{^Z%-!|&UljivQ8L`H^W z#by+3=C`AoU(eA1TzG0fYNi(@yLm8j*Mau&cC1WaQg=1#FEK9}Osh{HCz*;vBnvye zm;4PQ%*r6jOr#B;xha{h4py09E;C0<=FU`z)>dqC=A`AGa#hVhlf<))|QQgCF0wUPWrkvAP%?^2u_-*O){TGfgRg z`L4eiX(kUyb33U4nw3Hk%WiUlBzH-QocHNE=(e*r@$da}z*Du2sTD?R%x(KY1hKHs z{8i@+gL<_~BZQ)3_&lBfFpb57hUKHyWkp)tN~0a%Snn;1jBnL<0oqHj5#R~Z#K|JF zy@&lsy1UcUuO-4gKKuu@QCjFrD{9XQrECvx(x+9^eTt7`pGYk7T-BWQl%vNLm2^DI zxn63X<&N8j#P7fFJ_ny{KDUY9=olfUr*T|Bl*(1f0>jJRA>XT#Zu@M7SlxIHWI2{JC zV@XSSkijLj!@MD{goOJ#mK`p@rTG2MuIaLT18zLQOjg@uK$?|{>_3b@A=piY3@DX> z%bxq3nXs9$je4rioU1v9J;~*voMAg88^3k^!!oPI4<@9rLc`8UHIRW|XVg=EAE812 zwpLARZQDf<{%eKu*7EK37My&RRTKuy!Cti$KV4-*jQmFzg}H}Z8G2}C>My=v#WKBG zF@f$J_wdr-2v2*NK4o5>JH&P9ed0|dtq=~>qw^Va=i}!4PXzExbYW|>Qid)+Bt2EF ztp5r{M1n#Qp;#o^5kB9-&ZC!3B;8XIK$y}#CiDKv2I~$vi!==W4ESKZIUfrY*g%QA z$5A9;g9c|ULY@su&uKfRRE&W*rr9M*#isK!I%MV+FJw{eSiFZSWNb{y^8(8hTIpj$ zk4zn1&j4~ugLWDkliP_Q&oT_sW*g^t!jUT6q1eTwl4n<#s5B|A)P|3X5yo_I(oq!QI^@c!CEF9yCC3 zC;|j6ED)e@cXxMp_rfhW1b6qs-EU>?bJm>ee)sJ4a_`Ii_FFxT8ly|=t+oDdzb3;O zF544BxH*2%f_|or>CO46A8!^%ABAdIUM9uI_pJiDtliIVM=+Q%pxAn?13uUy$Rl@-l5f-=e~ zasx@Q#Dk^nN1qa_L!cZ&iQv+z@76+(js^WSKt4wSakbsgyAx4d9Piyq&Q4@jmF&dV z3Qg1*RE4QBbAu}rEgP;|H-m{O4i}PyiN=c=HZP_{B~~ z&lr?_IUb(+bCA&gR*J0kVKbzy!EG6A$6B-Z;S{DC;MjmRKh>`0N+`oE6zk7|XYv{H zE>8*u2I&n?FVGe^nq_YP^>;3GZ*V+e8=++D!VEUOA(4SElr;OafNTz(SC_bJ$O8KRA!^%9|&-QYjn;) z;BD#?BlmbKMhn_DZll4JVj@|t{olaDNKT(gJ=PHQX{4V|+p^*Hs4fD2`vVO* zYpW^j^D2!GFOW23*RRucFCH`~Y+fRWqE4|=es4_?HE=@bVj{;{}#WLCG_3Rw*8rFOfHg1vOt(oSAV0VbTSjQ z&Jv7@aKb=*)!n=m>?(`%G}9mh2*`+0?L=ibBspA=nB2}q`$$s=C!(S%*#3Qt3MO@D z6}m?)*-$kaL!8&;jj)s)@zWH3$tK24@In>HTRu6Y|3PA7Je~_Qp{BlFPcMHz!ucRS zwql970kOksxfEn>oTWAWeAC;%^64aCr^T{n>V_6n*GxR7fP2kHzUjxMy<~qyTC!)n zi#6|pV*IoLs09K-1+7bas#y*iM{I)izPEap=J?&3>_|Ey$do++y8AOL5Quksx|yEm zqZB%)ge1#!GH=q@I+=`r|GW$a2bR{>4XM}0CMHvx%whk~L+jC}2n zyK>Fm4dTpBv+v?H;0QQ|GgT34x|Jme#R?||Q{%=>InuHwAY47z!$~`Z(LAuu86%;tcvy5j*>^~PDtVb4a#%ls0WjR}xQ z5Z7s}MGh**FvFZcH=IzZ4rHa=T{A^s$u?`!)-R-Bu@?(v z6VfsSKF3EX*)+oN+8@-`%QpMk#WwtEw9c6c6=N9zdB9TzH87jlt`ErasjV;eKL+4D8FZ5Ba9X;Zt%O5uhZ;X8o zR`u|q&uiU~&Y;JW%ycV~J&C`U{}i=bD03`lAn~I#5BvBTzx>Ue)cJK&@1%cVJc-SN zBH=}unt9;F8OP~2@;Bj!T5-aTGzqJ0v{Tb-tLv1h0JsNQ>KB~;2i{GibOIZHPuc+_(Lsp^^HK3!eWaD27aK|(xv z8P9FI;anB*(_Cg18Lkv=L8?AFy`({;KY+!*dQZ3|LEhKPWb=WRRIs&(IVD4N<67r-X|GM+Y@@wGm*_$(O zDh5<(tpF6vme4X@6J-BzX*B`A?9*Zd^o4|X(xZ7s8_RWCXU7M0Bpe0FS#OZh+k7%Y z(<-JKoI$u`OMz{MlcC4oDk_{vjp(ntnI%?#goxqygqVA7NYLKp;ensH4XinI$88={BA%z^fSEPZ5W;23qw5acch?ncLro|yk+=NlKeXMgR zkq#WS4!Vn@y8%?4M_jWi0-H0M0-NC`pP$QjQQwc_q^3@{A^@VQif$+Ej5Q=!bo5hw znFJsQiY`g@#_$^_Q%9@RWXSt3c3Qrl5i`b}QPkLMoeKDdfN~yGa;TjOkVTg3cj3uu z566<%ZQFY!n=FD)wH|#=GxQ(cziXaq%-$=~bYG`nMx9M2Hd8l2nAJAB$U6nX-E z%uOR(_sz6-yj~wTU)*8y!>YrkmoLIrv!MEOLUk#09B{D@)gp!0D@0!?kCAkC`!3?6 zyA;W=KFYLL96iX7%>|~BhbV8%`qc_&4}6{9tDElas<;n9L8Wf$LBxTlFGN1oH!t|J z8Fy)`Z;K^Pv3}4fbgjDSUrw2SxVA{yyY~E`Kt3wdT^)fmQL98b?tp@4njP!=CN##z z^SBw~)2=7$9>G=ZYT3fL7~38@q4iT(ucB&#*-v9m}}~>-L~D z9c=?0pjYV8BD=`T^DZm8Hb{sXRsk5;i9bW6zBg z9Th;J3}R1Bi$1Gh?bJv!7XJ>R_&Ti*S=NqwsgTNdK2p9<<2Q4(0=G(;B0J?Z{j(Rk$U;DZ1VOm2+uv6RB4XGXsG zfT0IaU+FWg&tZirL0&y@Vq=MI_yth&rW#Qx&V>!%kx$DSpYr=r2;(0cXJErCE{*W( zqQ6o~Kr9_=3}ct5i9CYy3?_vE@NLupFYD97J8Ke2bs>OiRyUdLn6AWrPChcAt&5_V zOga8RfiNXtZv7;gIJ})y;`AKhb>UQrt?=)eGoJTW>k1}0ZG9$Vlsokzwi+zqdh5=` z^>90F0_Y9%LMC<9v{Kyq8yiPZx}*>edQs3jX9dB4zz(2WV25^e!MAQ6E&D~6n!(Kk zjDks8p7=>*8DKkQU!P*{sgSeha*dRO!P7ZU9Q%iv_i9geQim$5GOR(f*>(-(AO`cU z4T&>Jc?YMQV8=y*Gb`fa5?$I}txFB38)U z_ycH6>Qa6k|8xMP;cLg^v)P$V1Mf&YJ3|3Pk0LY4-)j)$Y$39-ws>fJV|)U9cQ2{l z3kaCt=P$UOvM}o{{nUqHDe2~655PTZM|jD9wq6dTqz2(Z5tB4LfN{&So6FU+Ic_#2R7Heubk&KkJ;? zqeMK5zn%@TRnrwvhkZNbq-}Kc<*b>Zg3ZM8T?Q3jtinh1(8?e9RfboLrOPRY0wqrq z;E(xqLKgnRc6w*kIdk8x5~~v8Brf)=P0LyKN<@9sN@WZ_WrAfYX`Li{dxKwDa-1OA zvmT+ilEW*viww-~$YPu~qQlg#78uF6ML4N2z1tSB_%dYn(2F0H5JR1zxBDZj5J3V? z-DX_|b99HS2cS;S?%8irTecGLd>-@pcBmKY`8T1JWD|PGb*sN`oQrOmeb-!~LUuS| zSPMYnfv?uSMfPX3Q#{ylIn4AuE@69_=57{(gBjHvsV!M;$8fDvV7!S^X-eSGq=b$` zkikR_*87Xsx)>$*yWw^8SHHkK+dx&5feLPyN`wdPDpwnho6}#(m0lI&Qn!Jb=GqJp9pRrA!iG#pYDU02*;07|xld=(3A9YkS*iEMF!`N$A zjPn;YC`3`XNlhLnqIglBt*isC0?6>IDG=L4CelNxix4kvqZquScZ+w}#%$-_U)H>4 zk9so3@16H>iN6@cl{=)YcjK~v~Jon45}YU^nE1~&(LH|SOov<-s!uS@brTfZ9jTxG?n0MIHpQQB)fQc34G~9|`HKMK9*Cf(YDl$lLWL6R2&Vy{$QtOGz;|yfD z%l*B4mkVM5zfBuA9uTCQvpaLUlC|W zn>4|a2a%rWFkAaMO29e~?N=EPw+dU6)tPo6Ke-Fc!dQtN&CRJVST4a6%Jwgcz9!n9 zl5k|QZui#qtWUb}Ewij~X?O9OQr}&)UA?QBSmkQRYho=Hji$nI`ifeZfd$cPhY5Kp z4q&-7730G^y$$_n{&GV@VX1?@8GLk=?aL}RBZkkosWiX9cq>4Hl@a{IE%{(}`uRao zbb&3~ZRW!1Q+nOI3Gn+vAV776M>R1d(p`<<>1+3Hcmr9;^D`Y*JmJfRqr`$W6eyUQ zvL|FxwmsDS8{8fy;5}m#HKPU+|2uoh<0H}w$EK19v&07urqx+Tq1Ms==CEj|mvl4} zC2**Vu|4QmT7P|29wQBEB{(UeZ-uBsFHI%c(t4}m?xby{$SWN7zKX8sQ|YKu4n3pHt}atE?dT1tY@P3&-DqWxW|C7vs$Os^y!{!%o;)^M zX-it7QEmp4-6$Ga{o;g>Mc*ArX}XXMK0|IC;X)apvLu z?sW(NzLP-Eht*|=3Yq~WTzbVM_)1)Ot=WJ8wT>${1Y<4Jc4ow*gz9ci?hjBG?@oKN z*_LRT`~`If<-~VWb08Xi2xWhFMD~}(e6tDaKD|K_K00-oVmI02Xl^1+?UXI z56sKWXj+-tUu8*zvWr9GFu*Xaj^HGUT^P(7n0GncdX;R%)^5>E>sN!ARns!KD=%s! zybJ4{dVQHj8oi7qAr0|Ugk&u*(m@6@-}eFLtn~03f|CsKc35}Kk33y2 z@f?#+n9995Rtd{F3P|^dtE*gj=ys!}c11X6EyU7psOev>^BJ@14Z z%I1b^n~D$4wrv1Be)7Z*Z=S+*3}jw#+XGnSAB4XM>rLH#8B5@B*UR81Bb7}tJJ-y2 zNqfC%T+tCZ8eEX?S{>$Yj`x;(DZ($LCanp!ZpnB>+=L@ne|v?Tto{u4??R}rNg@(( zZ105G@Uce^NL#wG7GNt7kGjDAoSeffjXz1(w!yBa~eUgj8YV_uxw$Tbk2g)o4e7@v?w08yDlIJuwVIS1w2FK5*7H(mhiqU zvWP4|;!Uqgj)lA{i~8xHJ6hgL9og+7;>iw=rSC^a$Nnqfp$(g%e1ug_cvFXvKpprm z`A%-Qd~c4OX2OnrPj~yWFDWqxA1$USL1s;{STq5%3hKg9RaBJ%Cb$}@3BeUvr^!Pr zr*x*jrAHSIsMKIncHg>aSfG>gt~D`#?+Gq6uCN}H*fuXZ?KqjZ*$u`|-uP^cO32VP zBs4&q2dpG>m{XjC!?bqFto_&<-Eyc^1ZV-|&=s;BEgtsCpcTQ@lPGJ22@Y+Z-V&*h zXi;=&lwvuLF{Xxzt6hH^4oV8{c6O{p`o1s#{G|~4sOzu5!;br)hBDh)tz$3Hh?dG^ zB&jtdt=UqLxQ=LyPld_#wxW%b&}~uXnF(LJWEmG94B=MWYn!wXrn{RZli`qr_^Es| zT0({G41cEp^5?8Y095YGSD~mMrLNfWWi#X5Zz zCLeyZT{-qBD-$=9eNZ?!YOXUO`?~{b#|Y(@Po0NrpE-5RN{RjSSECm(@&r2y{5dUV zUa2d$tLtApAp;CdvJouH;k&Wmxjy~&S&ggoHs_g6>Xx;foqNGkAH(11$%3|qR`oUN zQ#RbZ|D`we8a32_=MKtRm46EuIA#E;Vv&?Yi5MZkLUCnbs>o)I2dBi1=SQkT;~v*f z1e^{_-m4yW^%9WZNVGpaaALCE@nQ`Uj6IyZ6DFC4k_2>_jyy^@^n5!5?%zCw=UkrY z@e4LS&WwxF0v=|pqYj{boYrCLNHi*rhs9To&1dV!y~^?p$LjmXdv++dO3IhD_cRen z_lDzWz133#E%h2(%)KrlmVs5S>P+H0w_c!6XWSn|vCOASyO1#&3{rgIYK3X_0=qNF zK2^E$GsQ19Sn`~~U?25~&6#m&_Hkj_4lG%4OUk5IrA3O3PS&zk`v^dC1<+ zj@+nXVXL!o>-H6aaf_~`DIS4N zh{5mLSx4$N6Tv+AvSUT@pZq5u)!WwMnp{Yo&8Uj1;6NuDqH07pX|?(d6DkBKrJ?!? z?BL`LU;Iwlx0S#%l+uscFGvKu`ebw~MjqtSA6rIbBV)?~94<}~_-aV8Rk-&<&cjVh zHTVXGRJ@}@_7ci<#Gmn^XnDpB22g8P;T$f9e+m%fvE`?1vP-ac;6w^Q&Oh=M7bHjF z57%Te&UB<3e#S*+#6(lTeVzhK;xUaPPtxG4@UwY5J+$Qi{8nAMuXtzO561f`o*LNEDEqt1>dTq^1_y~Jaj+=I28JKgcw zi`|S^uzyKHt$kjz>A1#|JbIz`tkebnXE#ls58SF>E@;`^?r@1%CuiYHdb?%GsvWGZ z${Y^pFRJ#`P2P{)*{NebqqtaaK%tT;2rWpuN~gsz0&28g!B_~!owCG-ClOX%(QklC z&db}!8*e;!=C8yXctcOlF|2+RmMjS|PK{5x!&L;&F{cC8`SAt~RCQy+W#GbhRCDXc zs-dy+=R+5@5d7av`S{Vy1)I}18Dgv4=Mp)M82miBl=?yI$~3q*isfv%R;jI44p{f@ zir;fKE`9+%!qt43u9uQQXlpvgaOI!$r=pz1P^iti7QgrZ+H+~jq`OOT=tnPbE?wk} z&+6(w_xj8#g5j6S4l>~7je$#Z+fmTdyzC|mx15Vx00OT;hG1@HY7j}f@9=J4X4+DH zFTQPL(ygYsBG0*A_VqY`Yd{*bZCIu>yCZ*^?3q~mM)EaEVigfvcvyU_ z<5en|?SfzNlA{ChJ5W(bJCW`{cO0uzY@BtrC_k~80@p2e zht`UPdswD7-ZdxQQfWo+<++@2WB^Fv9jClx$P;;g!21Tz7bk+Gg+40)Q%E6kg+I9? zCA_M&6+|948xd_dNO=SFXPGh?C^|}f?-BDL2n}d(QpT*by^@IAf2K`z9|_JaHe$-y zn06!OTB@H1yrV=t;yZ3ePxqy{ORcr|&{w+;)S-M{ptQ87TJUX>iRE#Z6$79R(%_hb znQ)3h9rCFu!CsP$;yI1r2t36xn~>S-o2Yjm&2Sknnl=yizSeJ1E)^pmW4Zq<_a$!~ z!MKV2QOt)lGWKfGMHZ-BWQa+qyL{&Ncb(3&;2%O{fyqn*@7e5R;xjy#qZ%nBUO;<7 zCcO;XnoO)AV(?PG7U=mnzgpuYs2gt+Sg^c~!6P_{dBX&Rhu#Hc-_SctcjaL(Qm#8d zfpK!M`R}OM`LruSx|u%aAA-x^TZR zcqepwVYeT{irsSzEu{)V>)iwFR9&*xR8_r86=85GFd*Iu++XD{o$uD1(G}U=h*bu> z=j_XBRZCgpq9IyucZZqF@cAvJr`%Hb+ua@!ke(eiRH{ZrjF9m%ukk;btrEQq=pj`WYzL zT;r@om_VyHV-n3aREJ=KK5ZI%ewCcwstukM$WR-6vT578bMDVc=6TQL7!oVbIk>`d z&Fr|iY1L7(w%vB%Ce_m@7UF+68;nL#zX~fM&RJ|ql^s@d5nZqr*Fvia`6{lQgSZsN zT-B7)H$K~n-QgzHDSNJSWp$8Wd{Rh;x{%}mvRwd)J6iZ6OF0_c4VH~YdW}|Z2v(_q z=^Qt&%0r&8YhD_dAcgC^|gkELk|~7G?uoAuN*2E-+etD zAUeaFaBB~Rdp1l_|K?r~kh)r9U%0$44ivq=!XtRtWB3_V8UlP-h)^YVZ1O{?Sl@}g zhq_O-H{MAJ(ls4kUfq%g(yP~vk7I0=`kM}^KZ7ir-RdQr{yLg?b{>fXzvbcF%%SGG z=fSdV8Qihqe;T`_qcWa)6-qjX+u&M&ZFOkTZY?-$o6fM%B5B*Ze*^9Rqam9x89}>6 zhB473O!ve|G_timjh$iDt2H2-X$AdzPJO$l%ddjM2_^5PE);GfPWWst#}j_lEWJ_g zfaWayRH~}n$OZU}UnNg7eOg&{STBRL9^aJhx@nExPIunTsSDQDQn|g2=-PX@&(zdF zRDdj-yOBFxJ=V6PGd?%@RFg1A=xwy=k>PmR)k>@hL|iF;M&qO(a*&ph#mUlvJ;K4P zh0)&T@g`A^1*NOX70&fk^5#<`!%)wQ)Pc?nfElenIzpuL-L#pQY z)Bz=op!KeHcE+ahv>KrJ?AyefZ?GS^(Yup2@vDv_5=N?iO%A9VH>fzbH?=^+D%O+1 zYst=%>Me~3SjJBjIIwJ%b8TJ3=Cj^{p;_E-4NF)%OGerJsVw@WV=d>r4vUDxc1j+# zEWHynltHJYm!$mdIw=O}L*mXBv3WXYRV)PeU#j_;Gq$sB$7KcL0k{#gn^~5dzt33A z0zY4NOSt^(C*4sLJ$c6mzT`%VisQw2w{nS_gFOpu28s7FC~}z`>D+JvNV(L&a)0)$y3CibBj2X@c+83Zf?4Ugdw?^5i@JD0&-mqj`eI3m(I0ckZJ8t{>%F z0x9q>;XKeDQoC-;S=*LB#p=7;K>aXu`u2U|B7ju4N<@H-e{MCf+#rCaM?0CRC#5X^ zp;B=eM~-x2+6}UT8IcsU@jmCp0?RrgZ^I~Gz^^bN2CUT{3)VW8ISAkFs&k%h)3bGD zGG-oD^JQ^CJZgk9W=#w*?cVQW$Od_WkPs!aYyo2@qB6?)lN=*1pO4@zf`)_?JBNOS zY)yE0*27V0I5o%$|H%~aRw9cE9VP7Cv@ZCLim^T zoSsr++0);Zv!|!0X^egS?i!zrm2K}fh~>I=y;Gwjk)fY6ZoEndeh5o#Km=$F#8BPl zH^S-x<+p!LyfI^=Ph8P0m)#E2GwXb?(qx?()A^1G0pnlp^zJcg_KuON{seBa;HUhw zyUsxwb`?V}F>R=Ls3eR&ls)%mOd+!U2?DzUt`X%-Rg>3P|4te#y@DxT!xgy*Bb^h> z5zN*ez}5!Tm+0*nj8{`N@U$gRjjCuHJN9Hc)CX8gd_SP4ig^x9(zM&}soyxF-AJxo z{<47f{em7;lx|i`Kg)=DM#n(45+vV0arEZJOmM5}Cq$dZX;{GB@U4N-RV?%b5hStx z$pl^Koci^OFi2ispq7kszMI%zhCs}yxp<_zWHbhy!s1-<%k=?#V;d-DK_c$D1S*6h zvRy||V@r)N(hdjB;m*6zC@be1?pdmJ_Un!->WZUE-P>`{$YsrD{vM@6LbvLyX_x6! zkeK_8S8Qk^5TI$uPx%yXd#?7`;(%@CfhC`=;))6)%^zbzA!OCa;h3;!Y*cW%=D-Yk zCj9P#M%rpnu<2(Ok!8n`=UuH0k~?@#xM^PYSA=#(7xK4?s%J}(abqd3=c313*lXy4>I~9y*(Zj!%Y5|E!z3D}?mY z{TGHQ?aktjuoFU3FQ4|fi?4o8gMe{x)u&3}4w3D*Qsyt{5!DYSO>ckf%UWn8da3 zf3%NcQrL)gBR$vq!+1`@F-gRMb$hLhw&b+D%tPv;JnDdt;Zb)O+>ui|=Ap!|pd|s;D3g zT>*P6o4i?+o)O^SqRAKY*XFw0sOkQ9vp#|7U_g4d0S6fH?fzKV!8y8yQ#usy~WIDaw?GbMDpMZJUNeC7OF=# zdM>2Z4t1y-h+)OWE#Fx}q+bm!JdY4zKm;ska7_TWJ7<;XUIfpdG7M}Z`oFLZ_fuIo z*_f|g9h{$bbvsf%Hu1)`zO*+!51$lU=1#MMnwXhb8O8M{By{Wrhp{KXw8@-j^m7bi zwll4Cd`i&XKpPU7X0o+Fyl=W@`KPHM)aiutXu5Jh4BfG-q73>r1R%EVCyIlk3s)Q9 zL&PO9wByyFqQhXw7Fz|hKjan>55Y6VTEk~F(fwSjrPn5lrvnZSlwO}U6Oa}xXpay$Q@m;9hPn#R1fQ)n{`wmV z)+uW*$!a~Cie>8)FUggj92rqI-nfvV1z|!7`-y5hAR>~6U)e1Z5-V;e<+X0(S8wBn zddZwut1YT~)4FwAznyd#9MCLZo~`CY?1_BYQ+#1tcPRTQqbI`X_M zewGOZlVw|fTg0OU7k{1cAlvKOka0MKH@~dPG)yuJ)(AlCMcacrr zdH6QDKmSigBr_SY@Gi@OlCq?!V30e3o)X(0U1-2*w_D{4D)ykOavUwGX=_NdHXEUK z>K!AIfwA+)?AAa7ulhbHnBnhRE1FE`bz~fYr4G zPaanBfQvw5MI&&cNUDlu4Uz?F1_DF!n z3ZAY6Ijoa%tk}MG7*~hdL3U^^lTaUn%LbJ{LD}p!7CMhe{mg{++5O!E-VY#pkKi@?z0{fA+^B#5 zzfCTN9JCG+uHQoG6e-*(JC5=X3e86TyFD_Xkl6p@g-3WL{KEmoD1J61Myw;D;D_6C z0It7SXGzyD4X^+8F;)a#(X#lox+a)bMJJK7Or|qnQX;N4{pBkCzmX1qm}Oom($JnI z^1ix$<uA^`=%6*E+MSl^lBgG1L*Vov>4+>w4-YzwF)r^A)<*m)B44*qF{ zL`DhtTOso=9l(G6@GAZT?XdUf$$^8~b)OFp6tD6B45mHP5_{)R$7wfOA@(%d?CJ_! zY#gD<8+Rxugq;!ZpDj`UZ$z~}k2CwpQ%42O?FFZ#O*=r>miHkVLW|+YFUof$+B`pN z!);>S>M#8rI{W@kKK?r&1uB$#1$~Jh_)u=#KZjOhDWoGyg==!}_(&=K&SIoCOzrPP z{{IFi{;#zyf`IvFjl_KZ_cdYO`)%6|{LJkNGiKV6otP5j5Q{O3ndnEqi$h8IDRl_VG9Ok#hKl@zJJmh2ExuFOXe zEenHm_aojMQTbrd$^R|R_`lE28a0$@$GZ~>U0B1|Whj_`2Gfx$Z5_429gSq`5*9rWq;nD_YWTMUqdPSAMk)$fAD}dfAD~7iS$~8RDa5z{{tSd?hhVt?GGNX z^Sxs22k}3j7WLob0nPv50XzTT0sm)W{?{7+pNaYJftvqs>=?1r!Qe;xz|g?|j`m66 zHBN`&a;!f3+8wWzvKF3bD+g-Owy>oyd_w*#^kgyg#!+m44+50-e-sa*G znID{X6`B2{f^r3F)J}@8mCQ~~Q-f~VbU1)9w{+}^>A}~CW z(ElbzXdp_-UsDZZpT|Lrm4f;uPQKdg+g?adFtl*WiAm1S5No9nM=NJuhkk@=*UWfRqI$h+pYXg|t%EDpEZn`o6GT}_O92Zm;s5TX`*M2$Z%yYvqr99qqtu`YC9fj7Y5%g8 zvV6GL)3|;pX=H5iAu|hUJMvb74iNpJ_Jw6o5fS_qpUhgQeLgK_VlsPb$r>fq z#hA%M#rP2-bn89Dz#kuQm_Y{TK>E6^3=FSXh6xT1rzqDWX|q`tS~mVp1r+^ivnw*u(#qWuD_;JNTT`F&7nbXs~#FPb`ZD8`lkBil|{t50E8OLDg zdSFZI6S&*I(6ts`lE61VD)b32BPIB$$Z>guiOETAN9?>043%N-cqqrle(MHbLKgm$ zOn2D>X2gR!U9Y;#Ih%z}yk%w})x>)f{jY8C&k&+Ks1%AZS@rrTOzvCR|MpGPHV7}Z zJ+K*2hRpP2hlofjP(Tb5Gb@Ox+pDv%+x; z#X`7=f{0x5JCHI?bzj+5jxxV%Sw#MllS8b2x6#o0O>D+I)8>4GwTVKHs@_ZWB zq@JNu0TaJ~Rag+M)KFEaa!_k=hJ{ydt<&C0mm=mv-bG7fbZk(}^+o+A+KpV7_m7GC zn!a@jizH!mVuiaG6npXBZ@t6%gSX<}5{2IY6$y_DD?Y^MeZXUbSqb-3-fdh_hsPwx z3%#PtEwXf49P2&TW9w?`LFMlZEPnd-eVEiJt^F?<*ht3i^_of%!Sdoo%cqqL zbqcc>S-CdV1-ZIo~4gJ5Oi7rai_Mpbl{Row>G+Eq~Pb<)?_i zDqW-CszE|q09+^DLDe4T2R)sAlJ4GYhT-H3f{(L-f&R0Rd~M`_x!vZt?C469O@b+e zNk)+4iH8#&-U*LiF}D(ww@LprQ3`k+?p})vbWB3NuoTyjxu9ZBTdG8OwJ&F#$h$Vnof)2v2&(@SkJJcFPmd7K+Nd+1A+cgfcLsTeMHGZ)+#NvWddC- zC+<4Sbjj&Xd%#+o7U?F>;d?~_BRY$PsvNAH>JTIv3tZ!R3ic-}XD zWjrgi&Q^t!vuf8|r%}T$nvG!4x~)@Jfe%|FrlbVKm=x z!TI*~nN7w8*S%lcmUQ1S_C7Qj-8+76c*4T7aR@L+e{tNZ%h`L{x8Ki0b6`90Hf2M9 zXp%2n8=YRVuH-MkL8VOvtv)w9Cd|OXRUmnPKa=^d+*_$M3@-hbMYxLU!`ZWx?j#M5 zOG|q24_&e@?Bxv%wE~4+{S{v#TrBYMzSk*j42`pzAjK{)mURWz^^)gB4egP#E5iw4 zj|OIb8p!Z8ZS56Nmnq?8SqG)@MfxLR{f;C9%sgb@u1Q{!rR;p1hZ$N~_0O^_H3EIr zgqe+m+MQrZ(0?VbQHpJ%Mx!#K{4Q|}DM<;=O{w}&Z4rf7IVGg3-y5B&%^J9f$;rz~ zkx$E(+)^l~kJ`BCqmsk~28qRSZ`~U-SxLWf>!UF|7(W-qfH9E}NHz1B2iHLa+OSbC!1G%APOyG7(;4ED!3|=9)K|XlhGN)Fl))5s;?NBWO zJ0givAtH$h1iWVkyL?Wz_^UQ1C7s`-9(hRqSpu1J3NO(Oa_YFUVfEk2we-QS4UgvS02)^xnWL zz;f;pQ-W|ahC1upE&9`+`jwUn+n7&h#adV=_Uug*FI&sX^jd=1Qu{{H8w_^~q*Cou zW~{y9%e1o>j~w+y6kqJ?{O!;!`s*WvdiSX^KVQ_F&gWylU2@@Bm>&D2bE45eA9E7v zKWpw@wnZMe%5;?TEw(U)B9{C z*qpT*aU!|PP4)eL9;yAKXS>A<(>2Mc-Avgk7;Zt*LGq;KM6%*U^U0gD&jl^rp<$-H zKW&d>f8Jex)MV}^?)aFkFVm%+AEU^%Q!t%Rwd=Jz#N34Dx!Rg_fv+Eg8sO*_Tr+zq zEBWBZXRsMQW>W>g)_FMK9p6@X+-mDIHQ^f5EaE*uxF!)S;Q3O)^7irsHs^Vr!^BHc zDG@_hd8co>^;%gckWJ_rgI`E!Ew%f8dC@u(r%IRlD$#y0rCssM4DlHP+g(yz25$PM*CrF8G)^aY`e?+Pl{w8MXjNc-h zTSMyS)e-3GV_aWDD@*sQZEeQaxY1XM+~R78VsuY5@OdXv>-o-sK{SANYK;(+R(Uzw zcNqUirf$i}yUJC5kdLv0dgvGZWQ))^v$rP`vv@Nu&H(vbKgZ5QXO zSlv%6S!_SfOE?;tUur)Ram7Lv612r>CYmGV8zH|QBrnK-f!s%aPV*9}kB`Wyk#@AS zXg|R&1XeZj5-Y1Wn|s2FL8xd;SQA8~+0EJVK>64v@0x;9eoNp?Jr|Lm87auhBm&Z3 z<&oNDRCHp)I}T|{8*g=_M}J`CavwYPlzoWebPGFH@-zMZ=n(x*hjIHR_-50#;3mgO zkC&~4s)Et@ojqlSIB~j6AKvaPVU7Dl+iYazZ^BY6sN~@6AfOp!6s&5jS;jZ{HdEf? ztnyK!HV=Qui>mo!w&Z8lrZpseAR#jP-*A9TH7qEBukR|Vz3>gu?M7rufD|5}%Xsz* z(~+y2e8cJr&|Hc@-^S1pB`|==#@9i_+cn;-VkqwPuv-_3(JL}}Cn`S9DM zO4J@(J70Nl<%jfL1)D2V!J&-H zj`eN_Yz6T4{+kfmu6qefRF>D{uTws}h;Zvi#GTXTv(Sk2nzgte zd#I&+=TZ%Gd6Xs8>AH%JcJ z(x&<`xUz#Jb+c(i7VQk2&&0~9YfGpptYdjvB(dALD!=) z)T5aSN`a#0A(`Mct*7t)AA{G`r-;IiHBym1C znq())G7moeq|TR@vAZ!d8+IDU-nXt08Mb^2*D79xu-4koYLB0BeUH|2QyV|9T!;DN z%#7;O-JG+cAJ-QB%$cW*qnyL*BLcXxLS?(Xgmr!(@r znK@_Xe3#$Fd$+qAcCTK0Rn?OE|B4|mG-&50ZMYsXRc_g{-VuKBXoHu({?TtwX| zh8=De+I#XOw3S1-G2_?L0$fpz5ZNPh3^~$O0)hy?N+T1elaGl$3#}Y>O(4i{God|m z;H*fZ5wwkvT*{&qj|)JQfzh&wlu7DB6mi>n0)uu>k(}-m=cDII`^++nLgxSpCzoi- zYf}1U)By<MXKOL$p12S`!kk7eYcig7&7bbp*y!imG6ZcK=eF*q`_-!ojCo}L2fC7Z2 z3#T3XIUrHO;JUxYR2QzsCSbFP(@z7pmnW}xI^`TYXiaH%J`joJ1-Q1RLg8^mx~Ach zmB*M$jJuAL0hzUsedN4o;}J}7-aqYm4Gr|~Phmu&geXDaL0<9dd_JGP2YkqXVIN)j+oB3p&s^n}ZcdJVe51ErM%F zr0+zW8XUd*=Mu}08#?m#l+t`LPJ;aJF5}za(?YqZg%1G1Q>moa!(h792^{~<=lAS znCp6u$(BTy-tnVlL+WbU<=(*OB>XDd#NpIpW$|Q2$vXMweYsArD5@Aj*?lQeh$Ts& zO3K!mynjWzAha`L1_za~Tfg9Wf_&GWi*mrUzKO3N*HUR4YfJp}7~5yt4iq>G%Y<^! zW^(?ro7HK}-!&ciHk++`#(i32N_m3(hGg{uh7YA*os8H6L`<_(z}(d)q2my$3^u_# z4U*1G5+T{TOU70beRr+sT348bIdsISjf}?z)t4b58}W*SCC89E>a3OF28D_9rNE>r zi=xqAAN319>Zj~+D0ItcpT4cJR`^=Af@p?y68p!2{gmQ6v`@{16(TdFFR(I=5L88& zkZJwg5mp*sWT?C}w2m~U=1IjVpC5l#o)kFUsUcd1pD@0BJ^ex^`VHeX^*uRoA3|(u zeT8Nng_I|WDCiyxkabGuHLt7BXGNpcUcXJOPmRYLFlK4{^icb=D5JRmoRMQibt3Kz z&a(csry31GCH=h?8o1JlX*Ttx!rXm!B57%pVG6d2mR||?5WYyRbPa9d<2peItFo^v zg$D`V-Qz&v{I4qFP9~u%Dk|aq7Qh-R@O%*nmO1!00g)rKYZ6);;WNYhqIuedpqY{` zLX?JOd*VCan}Y?XWvU0l2x(z)^%)c|edxoLu+Ciy+e0%pbtZ9-KY~anOOA=nz(r$` zAk*&Qh8UH~04+F0U^1;rA6$2608@-|&aEI#syn&5`1fYKGRd9;j}RA@3O%r90^}a< zIPulUj!KW@>&<%PjK12;+g!$|mzUij)9lO#JxAOPpd1Gjl;nTK+HIg4XwJ;_;7WLC z`=}H(ogXRcW?nJ?RjYa!>Ds9plYJ6PRHv4MkOTdf*`if%-H?+?=l$49Kt=1SQD0DK z%kcPwH{?@6IRU^aK}sHEu)Nr)sOF5!JwAXs#Nq`BDAk zj|D}v{AQ?mxzhY~!Wb2n&+Y~Vw~(4nVlBlO+Q>gDFq4y&g&Ax=8LB;Srnt+TjMZ~o zS()8wJKDOhWe=dR$jWh}E=ojLpvajv9fs|D1H6Stp7;yra;BJ_z$>|V_d3>O=Bw{= zz3wd(4V5w1nrbNOr(y#*d3z;B4D$+IwEXAX3jV1GzT~>@-E_mN4&(B(9^>`YF*}lc z&Z#Fgztx3y0SFF>4#)MJy&+CSViyeCY-6qi)<$K@qqf0YxT$-CZ0Hjr{F1$k{F3O} z(P!T-vQO&98u*XJGVL>+VTja4yM)8FUV-Qm{jK@oQ62!JUdhfqwRgXs_>454VlhEYequBYK?0{O0o5g= zcjPC?8jxj70I1vpy{I^w>QmqO*uJ&e9eBC8SJIa!ai|q8!za6A@170sX?b(X@$)9P z>lTHx0rRBFQA!apq7MPxVSQ(B(``bLMi<-A%va{NZ!8qJD%|T#TC!T}bUv1r0%o2Z zW?UOFHT7Ik1`St_y>$GvEly%M(0eoZR*U@do(8s zCKk2(=&$Om4@#)+JPhJ3*FZbTJeIEL{l8Iksis8mhgp33TvQQM(x!$iQeoYMF0KuH zs#q`x8Iy~%8r{uv38J_b+#Ra69fy@4-SU?^feU9JyLraNA7T*bw)8Jk>_H?V#x}3V z$a)ggDr2m#{ywuK#=vBJN{~_j_F)t+DfARe$B+i50K69etw_2qoO$J3?z~#*+I5v1 zQpQD4@ymb0(3Pfr* z{5C@sI?wS|V*--8!=Yf1eaQ57YX`7F=;yhWmYLRygfeI1Ax_fPxmA z>ng;=1+wz-6HsM+KN!dh7rOKbMW+8M1d?wVIKov#EbB`C<&-k-)a-|!X5@$OP-A%= z$Ha!}NyUFeq7;M|Tr9JbmD*{ud-0WoLKlCxxdq3zE}4kFs&_R4b#tZGZO-)hLx*y; zbGYAo2Y*QcavD{x_jsJms)Ft*q#FvY41_{w@-076D+VYES_^-CDA@>9A6(+8R_~-e zY5AGC$0=#&+XLEi_{C3~CrU?ln+*{HpV1jUUrT0VqE=R{!9hYRC3xF=FduriW;l>o zvx=JeBUnEe5K&WWcTSF3Nc*s;VtK+uhO7{%{=~klBsIoqNVlY7D6*o0165j9&=s@4__%hb@_>-4kO4`;xWr|&8 zR};;UkZl=U8fu;&CWJN7&&mA|XZeWe{E{Rr>waGTBkMK*_lp5wDYH1;R@3w*NofOU# zt|=l10g*k>{-xmxK0~tko8zc2`&{}01~Ohv_p+rLZpzws9r`D zqof|dd{t=!3h>m{`~B$owddN{bGOdh47phlH#qjpSsRn9CWYocn9OyHoINc*z>okf z%QTz+`!OcXxFBWZnKfV!cN`Kgo9+18W=9d3PLkV}x>k3TR}HWFw!~!~H_06Sc(>RZ zja#u}h^oGtxA(>p^Sjq8lhoH+1}M0U1N0F+`KqXwNofl7isCvU8jbMc0_UXbG&hu! z^LqmeYBQfaA^*FVe!=e5#hGlYoUQeReOkm>&_I`A5ECO%6LcbWRzQ{g5%{s@5I2_nj*z zc~4O9_rXIjq#uwWNVy@lg_;Jo$i;poA|W)M=6J`9PjZH7K~%jvdo;R`pYVloW_`Vl zUt7_J5)D)J5|w2u_>RYx19rr&{6Hu*2}mx6DmHezWWy?ukvD+ik!wJ4uyOQPRyO8J z;SK*B%fOkj*eBygB%|{fDmvQ`%!r4SpUM?dHS^kYeFsSHwInhV_WqQ^U(A(oXzm;1 zer#C+eR7U)casr(Kb5zeBTyG%B(d+hVT^TN5HHB+^8jr(j#);}oDO-QsXcW=q^B7? zX$clr;FDErw9wMy_!^y>ZX-Loe;+3be7vD4N~dINMT3TgXF$SavLPOcpe3oC&mrg6}+XJ@Pf;Hy-3+QhcJD=zOBaEGoS5sfMODxwk_APY*!aK%YGPJBU z$5>vK3LF@q5=8$2i7fKF4mwba=M~--klWXkC+%B+Bkh<*Jp+Urrob()LB3|k;q$Jy zQ#8fn8mymVRK6Pf;A=$VhI&S1CB2=^bBl~Op4X&5P0f1a`p-*V0zddyhVd$5B%*%< z*_}tRdqU+R(#e{;9I1q^8D84S_mD&)9Fvo5X{K!@Gj=m-J052Vv!0I+{^G2*@NiD0 zA==+CJi4K^=73sy4bv!>f4t}C4BJpdBo_RkVm&F5uWC+x`UZg-Us%|q31hqp$4TOa zqN;Qm?s^V?@LcWu#8vbBgmG}epSUJ%+G+A~1i$n%kh3hWfwL^_c){))*1L>}{N=79 zFfs39=KDy~d)tz8u$tgxmD>M@z(pM}+!Q*yFjMZ6cjg+$RXE`$L{30cFS7m4at>ZlK_` z8X1Uzht?44U76Mz2ucmd3SKsk_fF5dQ;h61509y$s7?vx*{rTzD@wZQYOxt9!&14z!76#j^5;z8gzP5!WdOVnZgo+N_w zRt6v6i;yNA1&X&2uuQZ2rA7OI=n>UO9nI0AD^t90Zea5@h=B3~Dxrwp z^%vK!G`a-)*GD5>zFD#R!SvTU0O$TgN`qMzNO9T9)fAErq$R*NDwOtB!RILyWwiV! zs>(L=YiFJ6tzj}#j@~pFdk6AbfevFotkeQ#rTXjgM?&q<19f!1OZJoahye@zR{vM$rf1>NX%*ZZhYR24$t{DvGjZOD$|L8H3t}Ughuex0%D_e zWGxH9KZ)gb;yj@G0Y$51k)ac?hHB%13_>{xa@;j%JTT9YPS)@C6^HIK*`Mw6mRmi* zgBnKepr!+taRbgGsUK-Aw4~B(w=VdcFbtef*`ichNBA98)gJGs=lTa9(dq)iX+5g8 zUJr6UI0-$DaxU6c-SYWK@ntSNH|GJO(TEX{qnBOO|!9{Kq# z8&5-VC-`okOI&G6Ztt9>kh3*AL{ca`ovmcL-i(9m0WOvXAsz^w zhq&75-H}d8$$UFrs5T&(g!qTE=8gNf^1%h%ov5Ha(6MZ8*|MkOBkk-q`{Fyc%#>t zA&Y&^%-bYv#z_l_PPEY&8~^*4uu82ef2}W%Dknl3XkLLrnW7Y9j*nVG3%WZ#SV2~_ zZFkdB5X0L8SaXN+8w+sKyeTR&EGnd-s@ozuN{{77+8yZ7oAn*rQdnG6$V{a49ZdsF zOF!^vdpufJ!E*=|EC~V5KQ$FU*?v#Wk~>k?M%R^S)?th>*J0k{q;uWI&s*UQRINbD zC5)fCrfQ$E2m8&2o_n%sDT2Xw2y}l3-(8$P8i5srd3`G(ty)qOrMOjA{f1zr2ZVVI zt9VDzLUbh`%9#|QDr6bY3lmP;%VJUelV!A9QuC)Dg9D_-Fd@jW_DM!KehG2XX7wukHM7OuyELo$<}R~+9SaDH z6e9OFo=hja-X|^>k{%tE-{V9lG_xVq zd|W5Ysq|pC0Fcd`FyiQbyoABf&SPmzyY5WJ(ktD=173}T!^!vfkB`S}=V|wuEp_L= zaJ;v;iLNJwMW3tfGlnMGI(}4p=h>S*@aQAiQ+1kb9m((!+%AqUcLNL{uL^`j6(<>$ zCC)0Z5NSY=(yN%VXO$7O)S;L*&bi3Sq{iR!Jzw3BU%%lpZxaiGi{wQhPeYF}63+6S zQ7vWqS`wtk45KIot*crWC+606ho#C6&)ODx3Wp=Qd;>WGib2QREf9j_z4wUniooeGMxh=kWocv^R2TbhlZ`Plqp34p+dZ3)D6?+ z-(+q76dP?4$trY6EYP$0Q?AyltVv0jAUEgv`OoEj`7)ygx@U*tnnF|X+aAHSw_ z*I|1Fl6vkXl2iFHGF}w^#8b`2p6C%unqPXTFa>5aMk<6pWmQwlvQ? zilFr6z?LvSpa5$oQk3T}huH_<+3ziu_@=$J*@t-ZUt#l5d&?-_n zQiRdws4+>Na3C-}$_Nf2J$^6}UhJ(elRq0&3%k&^gnF^HGlkuzf5W0U6iumfVO2eJkV$?5qfBo+Rva}2 zMQn$B_$YD8t?+KsIQ-O4$s$;T5?NV3bEq+1^tkkw-R*pW{AodanDsFZwMXK@)w4lO z&dlN(JaeZ!a(wvVRpr|0q@fhMv)8`EDa6K$1r#Bn_fq)o{;2K%tz6c!35& ztpP!c#IiSOv0rsYlz|y#II_r6TtEU9wRh9{@e{JqHhlrTKIwIC1sMg42HV4oIaU)C zB_y+?(PNWN{FD>Ugwh9-hx{B-_d~YH%9gtsw`s;Ul=vM>6v+?9@viC@YhKww;wNo$ z0HCPiAj`yIjpb4^8YY_6ZCx`jL=`E1NAdfe;5VrIk96B7nnPY=&FS9V$0AoNd9Lz< zzFzIIPM_Ox4=}lPd)q3+;?8Cs<>jv^JJsiTsy;hTb9*3Z=yrO66db0<$zwiWci^*> zd@F>=6!4Oaq2RBh*eLa25jM+AewI(i270UB?R?1UCD*imKLp3NsX~pU!JMe+uhX8N9J!fFIiUQ&+LZ!uiX+dhkBZaxWHA!gJL0 zPRD7OHMVnt!}9EAxq;n`V1fG6*J;9 z2h?`@S1;;l8}#hCZQg)4^l=v-aFcyhJbrDdlE^~7&4&RlR943Ym?OSoskF5isc&!A z^$|`@Bb4F0bdfljh8tbEy_@VLaLxhm_G!nj<%$@ZB5O_0rI3bxzT4B$d1Z*>xz^zF zDf(rlUf3;93qyRS4BpOW)B27o-bw9N_4#Kkdl{*t&MnBj(jKtzD6~Fz%RJ2=quWwb zDVP+DgSlJs7D`MzA*1ROL*AH~P=5-|{{GG5(YX*PxICImBdePG3K%vtM``-D38dYG z7-Dnhbs-1H92MWXX<_5fYF{w-iDYYpUp5|W%;TrKX9*40GB(mHo5H$0j}!5yxTg% zs{Z9popCtZ#<~!?X4FCt$P>e>n%`kJnu$xX&p}vpu*PEnd#)+G$;6W=uj;@}CQih@ zMPcrMFzvf6lPOYM)wVZ7m!${!4`PZ6S%S=1i>w?>pRJ2DYz{E7$o2bI84k%VB`=1v z^YE%!6ts)3GXp|?GEOTiA`{U9{l1K0l!&2Nep$k?R3xL^*l|_phAd?^w89YGlJke4 z91RjON8#iTMyc3B>o6b{_o(94FFc%%7|O|5P?X`SiIAI&V&M9GlJ0&~~N~}wU zof>6;ybJVg9VpM|semV}8}^W#-CrP*JHHg~LzJJnjKkSxd5!QJT@Zk(r1eDwLo?2q!Yrr&cfW7^@#n z6|2Ayd|2_t>N|dqlEae#4IUQ`TkAyh%cybCzG+(d6khD)y1 z4Amtr1j~evrHD(1_}pB0u~~2$)rT>ar$S=$&XSL{yYld1v<5(GG6V`YroRL!?6+T2We3K%0d z{SrDVbTB#;npXLy^qo-%gw>h*(EwJ6@KHRV@>8?B$CsV`q|dx9ti_j?%ztES@@@jY ze+0fIu4?TGMwBLgVXkpfW^$)-tOW>o6tSHrW(R@VIz0fz%ygVyMWhG&Avpq^$5Mgy z&kaD3sEbon<~1pl`cw3Udnd?8rOT%DkOpkL;!)3H-=QGV z=s>0t(}HEpA6OrST(L0IGJ3~V18d7zJIp~ODX%xCYwfmiJm)x$A?$dwiEG>R*e=5d zbTWBk$vtGpA(XV4nQMyoF?y3E*V@%&0U(ypZ~AGa7G=U8O^?AVK>lA)>PK+;OLs6#Fk@kcJ~Ot}rn?;e&NP(IH| z53|QpTYd_wMGWF9YPkV{QRmIgD#wU|VH|Pj$1TcQ9TIV*3L^}sZ@dw%*bQ5&6XT(Q zyl<7mF}<&Mhkb?MJl-tR;q|UO8)Z7jQFZ!c<%bv0T=KF2Z? z`}lH~JXM|cG^iHAivHRWnJjmRVY?W_sMzZKxi5x^vE5m^JybqVoS>+@JDAkfLf?)a ztU5I96VWsz%suR*Fy8nfu)vf|`%BwGB?PQ9&aD@YU?g6sx5w6LP4Dp- zTmNL=ZI~K$2dwO&eS?VI4{eZkb&DN}3%Q3R{LsjYT-L@FR7$vv&K_dp_oF9keEsee8lJQ;fYk^Qfk7(v{!mgINI#YWj@+u3Fl~=-n3U587t%H7M4w>O!ORBes{G%{pG?U8F_g5nz z&dPKbJ{13J$xa~yY~$?@GYzrqgd2G~^)^p1z#&TEl=GH$GV#NuroM?oDg7*?{xEnU zP(J$I3)k{Edk$(Dd3t+XXhw&C;ZdJJu9M0~7{eosRhkY2ec=48(phJZm%fF3be+-c zUTaB(fuhLO0j^KgMGMj8HFaxEe_3o9H$mR;N_ik^Gj@WrrE&I}l7`)p$h#UsBsibM z+9Te5_!@I*8U3Fn9dic~XJ$85$RCqApwKjK)?7o_7J8D3SRP$JH5*)H`FizyhaON$ zL9Y4=yI--M=R_)E&#*B;;)UEXB@U<}(c4E0R;`54SyQ#5(xZ{%%`07yaYmi!q5_#_ zNjP=N(hM_SS9(O#A4^+n?wYvP^wfEEAc#!E3*O6q`aVPj zGW1B>k4URzAn56D0QqsZWmvhamd{iPIio(C_v6CR3EL zdm^+>7Bwf-;ZC^UeRRvjT+G7ASIdP(K^!r`N&W(n9HFAU)a@T$ww?$^v|h91l9^>KARZq+`^ zNh$TqE7EQK-`_xiq>GT*yYgu|NSYkHV(4rNb6~3*E$*&RgKuK?*w6PbwC{dxDY5I* zLeg6fGjWhUSQjBb8z&0^4l=M7-RS|FpBHRf1aowiW%HI?XqoLMW9^@%Nb4kCDO;8x1jecin~)MlH*(kGlqLwm9;8FFj; zNxliW{YUD@R^$n$3X2y(LC~+WWOi3=lLiG$~(hVf3X@UH%Hsjatl)Q-JS_NUh6=GU8xpxxE0@PP4_89DRVk z^a4OK7~%F4M-ggsweBD(^UhFPuMa#yp>Jbk(`q%a_ zN3E%ovfT=ujq-0EW1-fwB@5q*xdy}6%RkcJR>GE>dqA=5&WCyl=;!GknAj`E63}V( zv|gL4nzIHJvF^~OY-6lvh5yJS#pNaygUG<&`2mZda~UEvcU_~jGLhogo@2bLclWK` zi{q+8T$;#v1j=HdP9qat>kHN={1fX86*t2W-cND>k6`Tsu55+%mo}Y~)0-4QidkBL zsE1Dd6-6)MMe$p+7AE8&7W$A1OW@PkvT76ChYxgj^3Hb{smiUCx7 zco2f`JwuK*NzEiR#k<|tdWN7sw(wta_L?IES3C`owWfK!iVG@$mWdECAYkUovsHR` zRjA%pX4;;;q5oKJxao9suYOcJubKX+w=lNx`RkIMm)e2Vyu&qzVJQ)L0bJ~^-lmvy zl?gHxxe0&nGTH6xh5yb~)cC55{r0oLiH(s6Hj4ccO^GSt2LG0ZQwX z4qlcB1bf}yereFz%BXkB-6#e)%?`^y=(RUJW8C>|60s@FroP+vCPAxt%UOgBKC>Ca z9IR-~Ulf=7pM%NlUm?yGkQ-VD+_i9w9af$C(CG zgeJIZh%c878yjZ4>||nRJRYIW7)cnU=tVM^bb1^ky-uc1Ok5GGY?^f)Q-YXr4;zOC zJBsM8<`N;yL?WQebad#wd5LI2T(J3kE0?;OUlQ-+!)RjGo*ptxXxZC^NZ$n~-n!9g0`QjE z43NZp_Mz4!JVLASy9YCm?7m0txEb|5jT&elo81Y`%Ce}%Wo^!Ctb;MxjXdc)9jiD`mtM))h(of7O9;~b&gCu)Q-X?ApIu}fE3unhjP{b*c(rI?~R4yzSz~voL z56Y6cCbQ5V?ZoGau+sBge6Wh1P(EV}_+-_C5n?NZBTHjB+W@-&^^mP|#c%@-E+#4i zt&%4tO!;H$gEirV&WAH&^1(K6dVDAs({b=^9I>#A!hEt6|PN&WHBU++a2-by8Uu;)3@WfDc3=qC6 zxhm?LG~pb0q{@}O9=6#u95IsJczpTUO=oEdjvfQ=XLlAeKF|S$@5@PCQi=*F9apWu zx@YI1ubtt9Hl8w60mBOiSPyRvB%Vt@HU;*^e+062ma2zScJ5VVXKutBjuCOJvUa+Q z;oK{F-Rq2oL--`|+O9witDZ{Zqx`g5aBg%r-&zz8cCX5VkFNGAh`Bk9vZ6Q?TQ>1R z+qH~w>rLmb)SvodAX!EQxD?jtKK~a&+4p@gL17y%6U?Cv$TTsGLi#A&K`pAO-xT(H z?sm1q0eoZiicdw7yY47vsY(Eb`8|B!OJk6{ZlaPX+I`b;Xk&z{r$Ql~coJZiBkd5; zWI_q8$iGIlqFMfC@rr^TBSWq6ov)kXujUT=c-_RDt;Hj;fe$JeVK^M-ab{Fo=C0o)#zZ!FugtNJSQ2+6KtCyy>!kGd6Zl@b<5rqbv| z+vg$Z2>LQ^5yZ^|F-~w9QRU9Xv$&A*EsL0`C7mT6bnrAA_r^6TWtx+Ky zyCWKtjM&tjBJvr4Eagp=^|4w0%uxd$ivElPs~W}6zodkM=OW?MCZY<2;6rUSV5v%n zM8XPvBVFPFn{9!$^L=8j8NA1^p2Em_o`E93|(w zLWiqD_18n>?<&sEk}j?+U`UBscqt!;{F#~TQd)5VSW3KI%&`^#4TK74vJ$8|`>g4uParF& zn^tlI-WsfttN3$$_D3fFZ`5N7_$jGpn~z6;pAsr7x%t8GegHE7u`m(irdxYw;I&GM z*r>R*Tm2`Ro0pJY)N`i}%EKEZE3 ziA-r?;J)(Aisu6Z8WXE<=Pf4POls{!vcEjIKAC7{K4pggV!{5;H|hA>O`89_$tCVQ zMRqgh@^&*$F#4Z5hChjWUlV?}zdi!of+s<0UoZfW{sa5Pl#3q?DEB;vWvaXMD*sz~ zDT1F^?7xqrzd(ZRE`J+P#((y~Y_zLumov}%R?hcv@vKMunj_vEZW|B%Zpppnx&!ts ze=HCF$nXSP&)>Qe%>VmqY(fSTH^BYJKPU7i1N)0aCQYgT={}XHuQ5Y^W+(jHwF%?j z+6!jc{qL9j#)1BCcm3M{`Jb2n99_?^Z{EClebt5U^ZjRyo1+%QZxKo>udigAud97Q zhT@9YN7(9}v1b(1Q&%FyO?f5N7TJEedai;1837zbl*vj1^EgZ8!72sHwmJ1~)^PMH zltCn2tgg>YVXM0jUZsaGOt-^aOe0(?D#?$HV~(S{X_xG~jk_6_t=yRkn_0MPeD6qP zDIr`9;HMAoMRr-Ka=DfH|D#*^?Xr;Yye1G*^!8y$Wx)s||BGB(6r10( zgw77u|Kiz5WVe6QTQ3B1SfjwX^LO?`Ida;7a321@7_r1}B4s}&m`EATAZ=mD42~5=DR3lB;9vZQtNoX2n}OeWO)LR0 zUX!smxTKg2{H;o{-Uh7yqf!3LL;drD4htMY0Qx_FV`aWp#83a`otiO59YOuEjrXr% zBC4JA_k6?v4$Rl=$0V<}1+|M=Is1&Pl49kjn<{=@SK7>mCY zEXTGDmNz08&+xxv|Lsq?kiU$TeqhPy_z*}fa#U&JXzhfhygG#hZ!ULLnG|>9sM1S9 z!kuY~e&~BOw)?|8`a&8)^~HyG&ag{@-skG;yd)MHCLc;1;dMAo!E_<#` z+_nEop8xE%48=uSD3^oxJEnC1odKuT`i%wZ~G8n;R8dQ}h2sF?Vk4fWt0QK#p%-JIw3mibg3(b?Y!&sVkb+ zvRqKT3TKf@5Lx(@1k!D#r~MKAyAVzJVYifOl@ZKt1e4wL9StJZr_-(_587{^=J#01 z-cO!x7&}wE2ZAY8y>0afl~8cijnGW_aQx4*Fv@25$TC3G^R0H5@1mb%^4K@TqMVMN zCKYq6MnvK`y}sNH>>?9r(2JfNxR~x{k`Xy>QtoSVJ7VaYopTX z2<-sVne1)v_B_)9036%B?Z~1c5npVO6zsoc-alqbIQ&pXS>7#FmfQ1EP`El;%1Jvc zgRsw!)EV#Abk|X4E`IgWS%02*XtGy(oH%0Em$F>Xw){*}%18R;r+X3D3AS=l4`PFB zc>*MwNY(H|D%h1Ja|q(`|mr9bdX*^9@W(9}q1aXN*!SqtS>DYV#O z=vf|QC%qUFsjpESX!77oC^W;~_ya)KcF@29(?f<@RxULvdpgu3wP35;Tq-4mzMbQ2 zS*vUtT>>EH14ZqRaGEdz!MgLZ_4zvqXI;+SSVfXtD-lU^!d(jtnn(MPNZx8z4@?x2 zNJl~PTwMWi#6!z?SI1EM+l_2X1*ER=93`_bsvL&<#po$>%vQ#5@=66mvb!4g>WC0>CDeyccnQ9adCc=gA2!j1@O(QsW{#*LR$RN z{gm79WU%ZR_AHoz&rh2pZ5miKXj!liVHCk0QedR91840UJ&V&h(}1`7W4@?z%nui} zS(4Yi0O@xCfS?YK`Q`<_f}<-tc9r09sopGm_AS&a=!2!pdR$PrbN;atQ-a5A^V^*= ze>oqrnVgYN|8ksedzYsb=dQ;$J90%3!%$uAic)Gc*&a||_*TV*w)I<(8e(Wt&4^^M zRL4%dfV*Iv&uQISZsLlN_l=HRQ7VIO_nR*O;Sc(`X=r-(4;C~dRDzrCI%{*bm>{W! z0WDt7TADEOdDu;@zII)L*vtjE74v^`}8e^OdRn9q0nR8K{wl9 zNwx!l&0h%6>7@?2WuD$UakGAZ8lD7~c9AU?@VoetAiIKV&!ZaZ4cNG#@a8?Y${`!; zlP1*=)r*N~`z4L&6|^2Z%d26>uAOIho(G0( znKG*D4NG%V^$J)l&>L5VCIt?BLZ9+HP-H)99{8wKP;iwUi}zhvzZ5FC>@7T=ne>rB zPZ~q>z!XzCnE5e&$>67ysyfhCeqZ-ojQ&vl)Bgq_KaJ*1sv=l*)C{ZYcE`zr&l9O9 zApkpZre&FRW>nU1|AU0!~oq zy9F0vZG%8Jx#Lyz5E=Ix82Sb&>+cPX5&0x5FO?4lD4UsdQC2u~6(7dq?(4Th+TrK! zE5D-bFm)LMyL1oP>i!%2rauh%*{29RFf5TkOZj&oEd^ItEi%VVIriptzqccG6+6-Yw9Rd|@XbAc$_E=I8uk?$y?S z>^YI6w-gH#aon{KIiqVo14NxFBkq*5?AF8yEM;}z8#DKz|BR8w83dnM+!a7yd0iQP z_(ddCnRfjA0cWBv^tBQ)u(IMlRbTkjypC^mm9;Xi*R_4Vo(~M zTXdklD!T*R%-qvPg-OrH>iVb;U(5m{j^UiqoVmg$N}MtJU?rP1S*+(j2>dGcmG*EY ze0Z|8c-i|@{4D>FwP+QQ@qF6e?s@FqUES=lSaIxTNdV7wFwk4XdN**{vNdnKQ-DV+ z!`AaKHxZKPuRD4}JLlGE;gK4_C{0B^Qs`H$a49&HSCQ_~mrW@1)%}5uWzM5Ce6O}* z&&dPV)#>~=R=42`4{U|`CdOAYza!yIC≻hoxXUCsCdVwdW}Ip2x$v4BERr>&*TP zDd8V`wOlqTog*d$B^Fj1QGr^oF%5P`94N80GDxN>ZKwue$38yp=M+th=*OhEez)Vt zUKh}<3#ozZ3+{<*i+T3AP&cQ(nE9qFdJm6j$A{%Nlk_&S&C}}DW|1r^)dI6CTu2J5 z%VX%XE8ikYN*b4E^)?=YM3gdY5?PM=d*v#oD^`6)k4hpUS!UbX1I8-L56^6~X@8AI zq;q7=(z|{4Fn1QXY#kGhSX;R1n*b3_4%6pmWVFFOZ>={fj1Sf_>|`3B)U^Q@PFaj^ zv=+*rnf6bO=N!v7&=KFD7<6^2W-E zsA1W5+zTd);TxS!0GZk^kJ}mcy)6nhcA4%G9sw;bj^jVqenN;_`g)$THtiHHvv_$l z7bCf~-N7BQ9t)(igxg%DW(V!tezH2I&tx0yt7W-$f|1ya8cPRWG;Ho7s*Wf}pv6mb z6T1x>=TK{SyDOK(jup~Z8d!vn9qlPwZZTPXJKZgvsiV2HSE4Gl&s{R!G0s^gKkkmD z&%1;_ag(mx8ZcfEIedv2X+prdm1b&i&RWdY$mD)e3uY~KeGO#4)xvKsG}gvEDrs%( zTfG~ef9zq$YSOaP-u*EV&#G@AkH~6WWE1a6+u1kStAGEtnG&XTRDnu8;6LuQQIdR{R|n!;Z=V zoCyxP?=Rkqm}r!#M`|)*$8XRE>lOMBcLNn2NB1JOFSbZn>treqC0O|Da9M4Q&H&G! za)>34N75!rWds^BzO5Uvp!Cq)cmPzfy>wuD`69vnh!)T@`yp83fuCO zGr|IVtu>Jt%$p?Fzp{-B1Q~P#Nh-!qp=(Qc4s?1KqV7&25WnQeGYP^yR4Mei=<;y& za}w%>P4}1GR{mdIM5`Nu5n}B;&O9|ISC+Q~gip?;;VE3dc7zoVsl1?8WCTx#w0`UK zJWguugDuSTfBTja-u}tjWmTjsIlulzQvfsf$CESEup9y=hM`4m*F|3p4LZ&%gJ5#x z3Y<@=68NwI6OLLT?rSLmyIGIT^aNim|C620*-V-h6j)Yq5%ciKDBoSq zA?K69LcZzno&IbMeM%(K!sI^cSH{qT?Yj}aLr%#PrUl8y1Uca;UKG@JFO$S%U3OrM zxUGyA3Jswq1(okFC~0WyEBKSKZbg;02-{CRW(rxKMI|b3CO7^CR^okJcDuzoTH1+; zrqA2QN$fx-*CGQU^2FEQw@Gk=I|V>*+VA&mdcMx-;cP=jH!Dc4Mx#1PkHs!R9XkGD zHR~Z0wQO|7QGLy2&Sz6V2;%v-)1lADp#h?4b``e69wQ?)SaL{^UODNp^du1-SDRg2 zK2TyaLM3C65hP7ARyzcJSZ~ZYn|BbSXV$0g zYG&u$cbtBF_-{R5^DM|?Xwm@AT`!^FtN4Qg@c~Xy$Nu1V@?mrGY8%9-IEW?nfxUB4 zxm6HOJd&GzkUd|ew&xr^G>t2mS+YTggv=S7kd_Vr02%`W4=8Ua$Ow zUZR9{!i{)=ZpDnd_5NvI-jdc{3z-&O3H3ALQi&>sS#tJZp*O_&MN&lf{u{2i9pnp_ z>wRYMe7gL1%o;_}Ow{`8?xL0(oFiIDV^$QVCnAkDC{G-Fq0vG(l%*UP&1E)G&);^2 z$U63!@>bdQo8AWLYzb{%qbIlQI@6D4dY4b--1@$*g=eG3UVFY>L??VY>P7inLwMKDA0m}b=_y~V6jAQKyjAni zx;cxbn#A$M1EA#@aP8U|wBd!!&NudSHq}N{xeCK*!AI_&|>gST4 z_Mt>gdt@S<&Se}5Lm!vT=`c5 zn9YbG)>4pM=-(nnpcc%1GL^$0-UGg#nIfHN{`kFzv4FuU zXmnlNf|8vWA?2=!I)t@ug6lI7WusTWskMJ$*;J8>$UziQ1S z`koCuH2HVp&y^?dxisghdhwP=uh9|uaT+xhd1wp4K2OgmIb=F(i2QNFy0&`}h#bjR zaAtG`h+Mo7Zc2iswu8RdETk*QwAAr-pl*IAc>0MS|0!U8k0ShE4d{c2EFP6b8}r^j z<2Tgx3vtgaY7q_Hm!CvSH*2#7LT|N`Q|^D$enSJaReGb)#@Z^fXos`iButgs|7r*A zsUU!a>qKXKVl>ufSr&HVCnnIf!9MgK=;cw$(0x>AY8jap?A2SK$#(yQ{Dh&KwRfmf z?hStNBeh1Ez!t+&qAyJ25UqmGR-?pCFr%DWRav14Xkw_6N1+mGBPzX++i z>;PoK|9xkIR16w&OBf8A!ZzIo`&fc{dqE9MTLAUcp{Tp5C-XCgmxN<1YKd) zFcdC#FhAvc%H4SxGKWB8d#{}m!L?mA*n!8?XAO6(Nk>^K{Sl?+cglrcV1yHMybz^7 z;l2?^MJN&q{iw{)pyFZ#{(QktcgfT2{#GxMyS1humBDU?^pn;PXg=n=JF7zZtL_yR zCf6i=a;#sg64n&^TdrCLb(2*3Xf|H%SWujO8R3hUtGN-CLnF}jaadZ)Ap;JyluuoDEZsw^@!x<@l~`;{H9nrxYUCkAHVg` zup5bf_{A_U{@?I+01q&I`(L+b7~bJT;aVSP6i zrMn-J%(6)bCw6uT8H`#zVjtAai*>o)MHZu7kA*r8u}8H@{>@>9e|3i=J?qNdD7awp zmj3S!tQP$Bzimb2yHWbr-$U~LBWzM|Pm5=gL-ON^%u6b$tL9C7l1xRf?<7-%DFO{# z7rKZudsbC3jhCk15ufido*7JGtmHHx zmGm-JEZd)c&{$c{`~sr_i)c7hgJC^WP;{_Nt}OyP?D-fFNknJTy9uuuaZ0)RVl~+gmk&(EdUECCF6h$V;7IE327^Ycv8GU4|1tz&|OkSak(BYaa$K`P|7ua1Z` z)`IdZu!CcBa1E`wSz}0rT7-7=o?O-bWVC~Ya znsx#ZR2rd7D4)ZPq7f<7)11Tp*ECf_i|{H$+uRG5tR#xutoUn^Z(Ppi5#+V?Q=MM_ zdvixzB?j>E*l24H2QwcCizfE-CkQm?bHN7jPU2$b4h)^MPOpa;_BuF7$=Ac%`rfFG zjsJYw0t)*i*S;g(+}mEgO|JG>k{3gK^L-YOAf7?KtxvXZya6bC>l?5KFh&d+cd3dL$ zplA9oB^&*~(SDmLaPb=J%^SAa>l5&fTsu^;S=5XNZd%rADbCnC=5a&lvxBsJ4^3x& zEw?k5{`FcxbuIJ2S~IhIR!&r28_Z{4IHU$P zV(FA1^QXg`DKYUN`?<-29oiRUnsxi1S0WvmfYmjA{cnQE_!XP4p`!f~iHl3i>HQdg zxw{p<+F(CkV_yC}SxotK^Lw5omXVgVBS1sv>ell<=E4%&l_J>nIar~gqE3lydic9_ zRNgR+<0!`$%BlMMgO|nl$2^)rm+wTeNnR^|{v`8$!RxV6L=H%0h*fPaaTp=HMo|w{ zpZqpLVuE2EeI)V;ndaiOIG8CgQnNoYsMYWV8UTB-k7Jmz0-?ws68!=z(I z+2(N|;-anjucuGyQnU>-QnZ~ zZ_dl{u3rvPvLC0=S-4=W-oynpT1;)Q*r&Bx7O!>t_*KI@BKu?ZZH|{Qr>Y#hjxQ*H z|JplVI7vrlroz1C__JFFQ&3gpqmXn|LbEhR-eSY-0TfT>TSy3yo{L0bkLq+ySafGV zL&>E2!IidJg2tBQiM!9(|LW7@K@rmQo6UH2n9QWi(QPrzo>sebbqe@Z8NcMVCbX|$ z+8|rMTBn|29p&|`V`JE zB&YTFspwcBe?T&j)<6%5!}ZPwr?o#og!np+K61smibt(A8>}m6PA8a6!hcJQ<3PE~ zTJ4rWt^W-gVM%8^)%OAppbooL(4yC;P)`%oN?K!}poW%GP*W4wVb#iX$yqgNDCO@cDhEor!arD(OZJT=@Uej;py5wMb5vo~A0Pw$l9$WF2BCdzZfqFSjjnOTU_T2< z>5ZV0xDVu~xX;LEoEg5cuNpTS^9X^BWqq8y>kr^C0?bnMVx*+SCVJDR4)jBfewu`^Pa_I)sA_`Qy`XMIg3NlfD&|umv=)3qLldskOywtKC*}7Y=z%hmIQ& zR%~{!?ts*wALBq*fMB^7Li)V)*eD#faq*v=qjOx^=M6il+4dCy8;0NhoRHryRX!Y@S2oM~Q`^dC z#QHt!D3ZGVHBq8X+mQ2rI-JAdj}=5p??+Ei&RgiiQ@1Lrq+Mr&ju@Wn$NR>l_#T`r zpKRAM)lWU4R772sU&tbwd5?%p#+p3rs$SI&8^H9%>1tPm)aH|fT@N`;WoXTd%N&4WC^&_B2bmzWkoS@8`TXZGe zX6|C@;Lh~Y>v)`J_m$4Z#p5yM)lz?=W--Iu-(uoL>WNn!jE}V6m-nIMy<<_%*U|5e zcx&h`!fgxkxB<#{-Drw!%$`Lqkv zwKAZ#{wLauGk+?|-ghcYnyV&!UW!R372pH8mNDXQX{?$xU8?+$}r`)Cy`py$FQ&cuG1jz zrzzf|6_9EpwrUmBPSK2h*=x0V__>C0y7N8>8hqx%ZR~HS{J(mLf-r{5D>0eh$!xBb zj7p;+Bf%5f{F>s!X*0-=7G)DEIML9+)~a}oVSJMs`c4ZK>J_Fqd+04QWG0?Y8@I1_fza^E%r#6&eX3-tEvw736Ef7wpjOziW|?x&)90_LirJ%o7QG` zG#_z6=QA4`>iAyV-+p_G%zB@mBAV^rypfn*cwM4$Z4qS3M{Zi=CkDATU`hh7A=q#T zP##y$?-EHy&u_R9fQ_GeLmaqp^FYI|s0_;rWqd{3;7@jAB^zm zl5TK*beO~A^~$-@Sh0qFzE*T&Csa!9Ce3DVrk(sNvA%DF_<}VFLf(0B756GY5gAl_ zDPaa8E=!-r1!2f@Ng(kav*~MhN6$u9?8F&3Fx8jGmP8L;zaJ#M+__c6epGEHId?a} zy)sM)f_qkB(ZDqMVbK$gRl@pDuKjTI)nHJ(Q!DS`mrTdn4|xv0`KA-{J#?GW2og+% zKe0M4M%2>%8Gnj_yTIvIn?(0eQ9MvR&!BN9wRTG|ZF6d7Z1gAOm?PRJ;Z_8m#wn&s ztk~|cS?%-@1#A;tJJrz-6T5Td`CD}2XO7s$cMi>Pr_P~PtkGMIYQ2gR+FmlOo9=Km*~@5NduKI z5%~RovvJthV?+)$+FwCMx??Xd*oBtex^z*O+i!M<(5K0GH802cp}|)jc7NFjM4Te= zP*=3r=*MRE(si1EMV;-B`qzwM>y%nvI(I;pwd$Z1%uWUToWuZ)0dDqK5*YS|B)Rg* z9}D?g+t%G6-zD|om{HWr1Ls`DzUM|SJ=U=i+Ih^sVRXuUF@ zJ7=amvc6XEcAVcZw0Yc=_Wo|i?CK3%i2Ci(Yks7a3t4?L^Z-yN^s=_;P}06GzPmla z4pm%ikf51t0S?dZTOJ8 znLwV*Shn^5=F8BSh6d>=lv%)`&IkJ?e$be79z(9CVYg9MS~((5L#@?*8ZuO zdD>M}_>TW^qu$EXNFC+a=+Ng=l`&L@HP5Rl=Z?R|iIrC0 zhrMX8d23ioOIqGRoX&RcWV(8wVhEQV^_|5XP0741hzdh2V1#nc-d=k zlZP(}15uD|DLOSL%^@@nIjIUTBNWc38nH8uw^V#mwTF>coIIQyXI?NKXMO>8E0&Z5 zqja>edMMfUA0Z#Ng?>NHkfOe$lF(H~1Q|w7z_1CqBk?|(K8Pn*#_vuW@Khw_$z{GV zlI)S#tc}!NM636^lrf?`U};$%1|8396P%^>W00nKV{mSw*6A5%;1%@;jK==211uAC zA4(}KtnZ~7yFTqiw@3uJ~Hln~CzgX+gt23XbE6WjU#= zY*}Ym)4;Z0_L4-?5g*bd%Av|Nt!hb~D1ESvH6OL6NMrI!E3^5@Vm?WMm#)R_y$4zK z@f;i*?Ofp{c8j0l#yb!3?DsKrr}FrkyJnj$dA^Y7`s+6Rc|kdW$(v@`mMIN%Q7Jph z_B>?j+~bpiAAz4%9<~LYbMS~Yt^&?zvwv;J=<&OTj(kU7J{fZ=asbf%$FVU>=As2? zEckx$Qpl!H?t_4FE~^Y8p$pda4yX=gm=8;Cl-J9?JEz_Nb16K1Sj=5{wR=Z{IAI4$ zeETIHL=LY2w}dv+=oK+DaS@kc(q#F<_~ps*K0t#?B(04yFh)=>%tjlxPjFGv;R?R} zf&{rpbBJ7Rcv!0yCgoyIb=R_iIfSVHVwHwB`-C+duvq8U87SF46YuxR8luNX6zl5! z7j%RVd;If8ojaYnnK6z94|Uxm0R{_0xkP?`fYHQ^!w9GKO~qv|w=y%e z)sqJU2ixBnIwaj4RX%-lr@oT|j`sOGu@`)*!{PeV`4zU0S=o3xzr=>ORoyz0$PY>? zhazS0->pR!6yZhQs)saL5?4}O_LjAA+WLP?VSruxymJBD+-OB_JvV#9z1LaQHHqpK z>mZXu(@ziPLpS52{aLHPLt|o>9w8tpx((ePvh`-o)}8kCqIu32Iq(V%1v|88C3W7z zmcg3gL1(I2qmHev3ix%-;o_|C-^Bc~DcTdtuzlXwSjqKc5BFCqa9(f8BdqLY!OttYd8vu5z z&A8h`g89lWhRvO^$pm%s>wa>RMG^QBab{XI2Fczn9UKJXEe4p_B@34{rpv)*C~vBe z@$_~TrK>yn1hB;}?QQx+{9X{?XXl7_?{?X_$_cXDiFx@)Rq79g#nI9>9%-_6C32sB zo}jXiFdByg=ZnU$;lu#mYZUtakV%vU&)swy!SK{&mO71$SGaARi@1+jOvO{jVQ}A9 znI*tK2G{6#Ju`G_-Y4d07KUb{O)Thu@fNzT;ZxG|^*1V1&`lgCg##Vk-!MjsIEjJ) zNJ%GSsupWxD#LuKIe0~iq8cysS~t2`qrTjavJHT5^L1v#LX90c*3a}>|9gaR$74?2 z#eZb}_hbG2-;Y%~4gO5H1BF(18_1$-WVwEQ;j_O~MfFr^5UKg%6|uSyAH8X`M~Zm= zH|y+%-MdFlK3gQBbyn$<vPm5D{YJmlkbi>WK+7%P*ePug&%)!_!0OQOD zJ zPGupC`inf3;rpGWpPJ%#7m{7C;ZX*VD?@|$=3j=^$ z?n=HHCutXI36A~Pt_UTo66Y!bZ*y}9HOiTnMh8)`)-qO{-`9}5ThSnZ$5RdciyfMJ+9fKdBpBE`xF>sV|B2pytX4XxCh|y$pF+{BeqO<}Nb9Qj^(ij^SPxw8QA- zC!AC>_0Hr!ec7e0G?xi&loIoy;KW3P#mp;C$f;Us^4(q{XAHV@YhtHE&;96COeExw^~jQM3ep}G%rbDJ{D?n>v*(pNvwY8I;0IPyP%|Ji`3 z^0Igqg#*%qtk#;Y#cX}`2S4|b$tkrcTz2A?f&}U;ma9gx%^;a5+plw^6>n3J(!`f` zR`QxnH0ziE z2Vt?P*ew0-`L%X>qJFY=B=T+0JGXCR zo?{?VmhEx`cTd5-DG2L%=tJe3CnZJ}q?1q^n)0&XySmzd1^egX!(B7J{JexK(cR(> z)pMK#P@;a%D#|)!_WL$3QKFtbz5z(iiWil*AQIMx5Jdh$?lvQCcRW$o66W=3ntat$ zuUF&vf}&4=-<9wV`KKN{>`YGkHb;iWEu3>!6GYKtqbaY@O@+>e(OSZhe|SDkx}yi0k4TIu$Yw3KH*+W?8vhoN(20)xC1`_#bWXtw@Wj@a$l?n9GCVC)0# z3c+hJUC14VAKRF*>WT)6ei0pHcCTdXZr0JFDwWaygcfKeQ5vov>;8&vJ>uu!^CUuW zc3&SRI>FJOPL1>bpP7`!-yWc0aL`0~X)NnFE@j8dfog>!-UtO@rclicaX4jSEZN`Lv`ZBRzeiEaKl7Ci+4SAMe%m zl*2m2*}xfOsXXE|SN7l($sNj?n^rhag1b=nHx^fp9A$$2z{4w6?GIpBK0#l*qcz7A zbJoX{A-mOX&Kfa28TA3=NovPqV$Uaz>`$PK>`!h_m?POg-37dUB-hmFI9`8(T%>S% zt}Yl&2e{iQ(T(diL|uI_i$XQkM-V!ht0h>XODzGhQ~hIMzPBqV0c`-p&Pl6oV@z@- zy}EryHe(9)`_8bkKXNYq68S@q`g*xSuls_Q7p*M@N{d>e(tHe6Ia-$L-6y8xaYV)1 zX9IdjQ1cE|VlGhUHLkPW^poPK0-Tz{7-CpYJhfoN%nY-mm#9Efm3j-Y3~;27IF0Ap zy#Gt%fnJTkZYzDZPS?|*tO5lGx9eJ`&ri|SN?vP_y#pOvNA5{9?M_TsBrdCy3M5L$ z`I3s3R>$ju=g`8e{iRQ?I@#XZcoS-hFPLm$BiqtXu~gqOo!G$u_pl@On6LS&tf%uZ zzm7vY%-MkN$Y?_Y@I>Gt`~-k^IbKf1l%{+qsup0u)uH-C)Ge+%9Zq%zF!ENq!N$WQ zRoF@3d_$Ha0=s-)o|VQQoC`kJ2~oh$8+prbYvYJDFgol1fc)p4u+OWyGl+*w3zcTS zbHTbBK9M>hSIdAk$F;kphpI!(_M7ZxD!>U*|8-rS#&jRQ*=RTkA+#i65mb_?msNL| zz~W-`4ts!|a3!4P!@C~9$y|*ihC>m0byJxuiwB@LQ`J2zAQk7wj>bANAS?hXJr^sl6gkTM}0(HuwuF>;w5&0 z+Zg3ry`VMXak1k$Yl&Sibf1Qqlp8|JI=U3B+AQeK2IBBL|9PVVzLpNN`tRx+`IQL~ z1cUrHDh>prD%H%+W%`ifnh+y;}2F3cTauI2(NLA}<3D6fq6Ioq?!-!0rN+G*sv} z6QetOuCCz9ehl^4K8r}F(?{-m_HH5;&-o$pDs(ZWoBNY?_TQLI)_(!rj3TC|Zxz^W zJlyKFZ^g*w8DvU=qLNXCzfi0gSxzY&;9NmO6%cV`hlNqt_lt}d|p&=KE)+8 z;g_LKoJX4oq1#l3{c3PGiTzrqxAt2PWn+q;=u6`G7Vm#f|IL4?Il*77x_nFn?I-Qlm z!9*bzre}eCO?TYo6hg>JIXJOU{?LZ_66&rP<70xG(5^`xlOlQZyHyon7?Tm~eMt`O zZ~=hE;2$HKDO@kS1`1-W>uAQJ<1jjlv(`qj?V?yTa zPXD|v-}wyKIK&4*R53xChy%D|tGGZ1QokZBtDXK7%puZAR|CytZO|tAC+wd8>jhwuui8s{N?r`w2>P}vG%kaY z(-~S|W51*(JvKbu7zy4RA@}qOWczMI!Ox_`I!Gm4Vv)lk2~KeMnwG$ey*PI(naj?O zw9<#V;nQTxUDB^(lW<&xFH_1Seo%O&c~ziZD)1;1qahGSRoHp{Wg1 z?cH2ZizIcw)g(%?pG@C7@mzh47@9SYUgj=tBlr!WJTvRiW1PX-D)-9y2dv_)qKl^w zz=Ny}#oZzGs~?#bo=hp{UKE)B zLs*_B%AV~iYQTYH*m4C^m+{+SHgpuKI%c9(Uz|m&MjiUDiIGLl5&HvglU>eO8N~w^ zI=Q?{=|w`hCX;=jQ_2W!0&oaz0rmeD32+BcE4%L6f3oSM)T)lA)?KVk5}MrBu?EHh z6WRGXW!L}W8ELUAgMZC#3&FT@l5XSw>i7gS7GGRfW?y86lGDI_SUW8& zUm^0v26G#E?D<7SeqNG8^1dFVv4M;T8ZjYPRKV-#%^RwtRsIei53`}xd%5AO;PiUJ z(QRR5=@G*iuRooRkpc#F z4EZJh;-MGSJ%QCf5h?9nGNw9(P^%oHsY1w$ZX1+dyt`Cykki+(Zm%+(lQpQq6X)#y z(BkBvDA!AaT9<6Sb64`>Wu3Qya$RF2$t*R|WGpAkJCJzF!j!Sc)=rNo48rrz-Qr&u zBaTKsF1Ks`w5wkO(Q8;F!3V^KIZ>B(=Au|zUD6zf5a7IiUvY@w>#+qAD$Yy#p?awGS=FR~Ot-IG1 zr^xAyHYIClUHYMJcg8WUA4~gZVv?<|K3rX zz})bxs+PYI$~{KHH)LdX?Rdw{_#P?Oy-Si|;_sU{V~J&pdozgMz$IszL%G%`!e5m zHPAd=_ZVixzUmo+bFItI2*0b>^!I2oaLC7CRC7XR=(gR|Rab~s{&40^v0pCbFe#;Y z%qK@Vc*RxVR)l^q{M*mpAQA1~g|ZX6Q`HsQh@H<3rV%>P&Uz|T0_uKc>+IHLm6WT#LRZVuq#1WkR ztsov62GtH)25tR>u52IeZ18d_*dt#Jhbt7gBcJ};-S-O#;8s@4JN8b`ySc6Yb>K>~ zPQ!pLZeUxBXhlKi8HSeg@7eu)TIGEEgP>YC?JNVVS0fU3&$_Uq698+L2N81*niVi)8asjPm#^ z#rWcwJGL;4Z7paN+NNmn>h{R>7eT)ndPx&5{a#qVl(zn5Q1I*KaC*{UC~;JtN@hl! zpd$$@VWz%K4YIrZpg;mX%@|$Hg&wd4aR}6$ZYoe!2PZc zopz2$ce%|&lag%di$t+qy2_F#F;v%0o-d@o>FPhoj?w|l{v1ANVaGG1QP(|ZeKth+ zvG?@*6brNN=W^n#oy@C#il2 z{I_5Xb#7@?2BV~Y8cqDl^bvMiC~{T9?bKhcT^pJm?fvhlP~~e0>V6y|_IA7n&__(( z`?-HcsCq^v*4GZ8AKW-a7WvG9Z`@bNDKrky&olSLX#I7trRLLxAYOzZjw*dZVJ2g)FFR+ zun%KpSbQE0_N|_5qJq^Bm~G4;9rP1PoSFv#EpEe|hI+6d-p0j9?VO!(?Ef~AR#t=u zoliHV3ShsBV~Z9v?!iyn9%|2EgNm$j_D9MmG#{iWrw-W$dA9aXSJf1M`EqDXI+@iY zh(7J|$q|dLnlmzp5q3yud9zc+QYw*DXq{|W4)L;M#VcW>-pbb=t#XhEE&3gp!^I92 zEbfk%fBI*mg}@kyt20cj>iH`2zJBaGZ~UKI?na zZUl4Xg;|+T=iX7`aOA8qEGOUJUB`2_{imR~Sz>?_X&1-tuqE|X3V=>KU@IBtdRbNl z36tBL=a{8laX>7c4KM!s%hV&X6?`4?^_kk4W4B#8$flXz$!hM|%_>O-d!}g3KcZ5V z>fMBtG+rZlW;L280^8HOSpqiR>`~bX;J&$Ct~u#|-zLVpf+lg_=*!JduF#3Hjf0+1 z>8Z7%Cka8}@x_bZk_bM7|G*8>#cmZb@Sgo1qBx2qxW+%)O3g70KifB?q=-cYZ58d? zkgpO-Ah{$6H!5ydV5)C-cBUU0k~!7+Ehc8av{%|FOQLbBpyyZ~)++)~cf+hhx4RYo zHnJ5sUayk>(|w_;yJg5N`Yqj2D{m>@KT7>A?UmNE4trxt$x@n9eVpsoU#x|?pv8$LGVwG_oG;vTn@&8+}F2M9~Xvsk@&4+j6G2pd^v$$Q>V6MuL>fsdt zkV41%&30D1rP>B{u1YJcnc;k>Cb9S`#P~@A-R!bD{U81+N0-L$dG*d~lIpGCL{Y=3 zLV(v~wU^-Bz)z3APvz!3PQXhhRw1x`JWxsij{|t^y*}<)2Q#P8{p;68@Jf`3vNX4S1deqJK%;qIAmz&tVv#v|u ztwvo+=0XmiOa&MsbARR>t#`+YvFrH4DApRcK*Ptlxb?kO+q`&{j6~nVz84C=4g=^C&^(Zr4?a~)K z={(#(W&8W8{ZC@>$TzKHW0m66$JxcsgUmpyH_k@i0|65`YeD7uqV8Io{!C+e$oB4y zO3Z_KW>va>#KNRWhqftlH2k2f86q&dE8F?$&|^n<{e}-!w+uFv{wxZ+Kf46uip(1D zS{f-f%cQBM9{T^8y(Ejr3MZxgW~}MxvQAo!C_HXi@kf?3dN^6Wm0 zAC4`oAPP7G1Za0eu84gvkUje}=KJgSMI#R5=7I)&SZI9X6kE1{Dy_>FzW=Yt+=R{# z1qQEEw<4%XO7+~TXGEAJ9?l+wi*_I~2PqGO2B<`opHnoo?ZYr_4vUgv4s+hwoXvMe z;n=vs1Los3?5yAP<0n+mpn;KQ)71wEQEgxDoYkzrSLYi&%(MH^w1+XZBT5sIFmp4k__EY6`H5Y|!C-;D z)v)H&FdO6c>?Mr-kMzK@Y^!pHMRDw_5GLx4S2ft2VZ!$8QX|nzJdxxxmU%cHXjUC( z1M4ghY+6Z+w$v_Utl|*haZA|)QWqo~fPE)8gUaNcld)N;Q^6%F8gpfOSJ79S1~~%} zmnieBkcYqOjd4x|H#!=yE@WNxcWT|__P4x))O|8-qP}Cjla*|}mE>JS!s*jp0GR-x zHsd#kG|3@PA`6E4c*xF|vGH5In&=BR4tgbuQ2#~C>-ZcR8Lj?7lLhD9_^IV6uR=Wc zNVeBb;x!MlOS}QJm~*XANoPVDy*rs7!W|nqw2=)df47^q*E0t9d(72$#&_2PrExoX zS-m#MA5}O=BP|HKgY-S*;y5R?X(5|RN%4953SB`CUau-0rkBU2ya4syikC~q%*S02 z`6ad}1+KssQj2&IE`)IgD{(h>f>@0GTZn}GG-wd32Mx0EPc3M3~2Xxzkk1f$~26W!D z(KbH9ub=tEtU0e(|MDLzi;@KG1iPs5Cw62Uh^)a|y)`m{FbaZKrbm zBvq`_TSn_H*VD8Z3cL^&YWGcG$Z4K^dt$0ox!_`vmTq;|2R@3qeCcD+K2GPnYNGAbivA=y$9Tk~#zWdL`P>p=Bh z!UIov*OTnuLkSMf*;2fqih#wfTIzhW61jWv%Nf^)s#ZI1L_*|@mK zreK(O&-nH9A7HCWsdvz|8@x>!=QO{?%GBb)lxtA_UF{h5o2G6= z-vRVb-ytQpI;_mjfrN=V)7aU4pWP*sy>RZen@Iziieq``(lan#uhSqHxkmGToV%$HJ zLguT@;pF@FG`H)lnyC&r9{(xVFHzXBJxExa!hc(Wxh9>Pt9p7xeZ#UA-oL01s6;!B zzPq|6T@%{!A-Gsd?!2g9dMpV}YgW19oL?cICkvz`3cqJ*HokAbyvtt7u~6^6|Nkhg znBO1uRh?l(ihsJhl02T&#sc082=jVifZ6+5nl(Kq(n)&#RB>g$7N^)z*O;XNZ<5V# zsFgf4F5Q=eTS-tORhWck!&C~`=iI*kczSWFJ>ezzydH;%9=|zzfPNQz62v>m4 zjWQx4)>^U``!>jiHT$K%&fhkL{zaS$1;K>8?EkR$R?%@bNw%mZOSYJqvBYFCGc#Gt zlEq|OOcpaUGn2(sVrFJ$X694==X6g`PfyS5UF)8;^q_+LA~JJFRAyx4j`gMXQkLHC zF`8F?N2LXGVU2bsO%ZmOX1kMyHbyLro!w0{5TnJ@G0gM9RJGbNLwG5T5h4E3!t?2f zd=h2q8ivFz`6y(#Mh4?_f7w@^TC+7g2XOab`T=fw_wmE@60LQ9y>lTTdzh(4&`vs< zqHMv>vq7QSC_9fi0`2@NRU#fKL&X^0a{Q-P^3B2T*Z%Qy2_MR&s?lGb1c4K~^m(bt zFVG2mySr=S!!!cc$ZHWWAjSc(9+#FZaZu7urS%qaiG%nv94&3!W2_f~0}kZd%XP%< zG!5Hk^w<{#O|2`~EGB-6kMlXn?g-8hf-Ts)_9VAEgFIt&aZMT}Zv65*G|x?U0wDGo z15?|tJT*SG^6gGHF62M1XE<4cF}~jBzZ>9jX69|p+0)3vzWHmP8pazCis_YVD}ss? zv|3)trwJ(v@{J9EyG##jWqbZ|(?tiKw04GsMrFFen8J*O?5pzDn$7YQsi`$Fae81h zP3?7&SgZ0&Ez#XRL4hj57rO<8seiXGGUwwS(7uJ49MxvBfzaa9kH|Hx~5S)*&faro5qE1m7z!0|Kn1eU+j>2Fap?TSL3C`a)C|^{OOlVXB zKf;lsGN8O92h5TWzJ7h`L-O$G|tHAMnQ zWG~&TM3E>k$E8T@WPt9UICS~QUAf|o$Iov&d=l4BJW)JE6G5d33tnN$`fNtDph z_6&Xm?8TS326C=>#jlc;@+J0bAdR;Lnb0(M9Nn$mFjNvE0`L;UeKTzKapo6#)OJU%IpkzVRy5MBrh`p~!6hK@=SBA=} zrzgBWz*hN3x*;Pi6Ro<*-JF}W%Ja?p)5|S4MWHpERefU!561@?#B8BWl4uYsoks+j z_hSL^WvA;K%U=1|V}=hZ@t34LONE)0sDr9`-ryfGK?gaTmqvsbYVLNUhg6`0MO86G;yU!pQ* z#%xv5fG>v743{XN`rp<%f$7hYq{r9r1J!Jfw){}Tnb=4K@(KUQ8(HKCVptUIi&FYK z#V&|`kTYsf>-uu;eu=cbAcy?Rj3{!3#X(3SsWhj#CQEB*bZ7QDB&gUPwK^ zl9dHIf5PYV&rAN3lJbrej6AqD3@mBoPxqhP#yEC!6m=luY$A$Tci;Xjq;A-F^3&y`(kvtkqgCr8&EUs11keJfQtj$Q#(69ueaa zH@3CZxQ6xA@rpck_Z`~-=O2qcP(p&ZEAxNdqJGQYxJB(UolPY8LuSdnCw-5;albqk z-}F(9y(swJ#^M(ia%(LqcO}*<|6sRs|^QEWz8EB_}W?VtW;qy8Fym46)n>d#lv3|Gp;~1)C2Whjvd$|m7xeyy`f!JE}1HY8*itxZS(K3UwVE&26`|mH~(m@icYf49A zKzplLw&Ug<5G{Df2~`xsv+e5j{q)1udT;f9!b$xua^_3EgS95AS8Ek<%DnxT_TSrh zx=(7}umLrwNwHS>I`h8)d?)J)cVIuPIGUWr{<$yz*)@e|ka?<-tzOm0c9y=VPoY4^ zNr0&0f`dxj1GGU&Oc(owRgVvl$Uqjbp_u#~88imc(F3u{_oCo<5=YkH?|vn8?`^-9 z`9^&=mv{4UP3pd%)v7N-NWl6pln~sXekr=5_#gYLLJyj~_~DkaI?!5)hBJZG`wNI_ zT=kQg>hGYv!+0PoVd;+$@cOfMGKkI8R@|&?6yJ-R$Iyf4LGu$%d`(JWy0Fcw4ETdZ z^r$Zke;>tv{zsq}RK7y>GHf{a6DajSFq%z*NCZS!C&}O4!yWE4WDN2G)6aVB50es8@>*wcwB`6-uzI^#<;f}q; zRuo(*uiNo4b!@>UEUk^-7Ng2aEb&Pxf*8)@kkS8tgDI32*rO z(I$tXY@HR+euGX-?HsyOK;8o5X_!yvV^3*maNgy=p$IQ>{e#C=rptK}7~Qub#&8Jg zAW-*Uz@=;c&7hMh2>xk?wD6rxy0bLRdkPO4;t$a=7QQa&e`6v4PwCzz;-JZz#YOd^ z%h}kCgqfDe^7%u&PxPDR-*U#I|CwV*Z#vT&5Mu#06Z* zahCTAiTu!S=A|?mp(fFqaq)ZX#Tc^e(^umu-WV~TXuCu&e74Scoz6zmlAi-%S1f;> zA4WU9rCsyw^Nc4qC0?i8KQn>>Iv|ohpz4Fm{DS_^7q^igV0CXR<=fFFOWp>=M;rc5;gAA|b6u^Wb< zXY}tI&hUc-%(nAYsgYs;PZT$Q$aDyUl)XYyXB++(gYpk@|L?aGFabJWKLn46 z05b4x^_dPazT;c+e}lr`oBs7lM>+&YUN*P6>lg;CZidltLh>(o(7ahiT|oaYf?GkV zs(S8FCbSWL5Omix=Eu00kA)Tju{LWG;NDS zo2dWt>wiN9qqnQ74o#h>JYJu<< zL8tW%oDDQ<)q-6Ve}DMD#`0|nsNrq1+sshaMt;r)K48P;4}>$A691Kv{exIL3Xo^xpqq&i^3k zZ(ZMz18LS2Td;3cwXw@tZeu4r_w=_n7;|{YNvol#H5QpTzxzY7>CIFbGSyWbQuyesB~jK@C+aTg1!p zV0Ecs)(pTiC_t+2WB-8uuYLMQpZV7(PADMF(ph*-Lm?T)t4xMvIs!nDV3T-&`1ga8 zr3>m(sjgF`CMO0^1r68@!6Oe+b)0z*<$uB485u~k|7Vc@UpL6@CzY;93a>6i{+C~R zo}9i@Q%cUP)*)G0a|d&EG*5To0(N$(PbxYUu>Te)j(^<3XDMEuULQ5y6SNAYZE}!X z-9FXIHz6knSh<|@x;tIq{yaa53`*^`6IRE4Xt0VHR#m0s@&co%@#^+!dxk849km}{ zRm5U4;l|F>pzg#5?FD4KEcln&+_sf#CW4T& z&^HgRuQY}_Kxb&|OwY)&kD!F0!NbPG0)pF8L2cZBD+1JoZY&5|XVx&!EloBQ(9>CC zB3M{*fj~Uzw|Y1ax96?57TYTA*TX`0ZIZ|eSd(wU7LUJm%s$ZTmi2_|2pdqBca+rW zZ6vfg>A!s3J}KCbb)e8^OV-ahGee(&X_Gy+iKLqe5P+0{1avf@Yt~&Ahl7o6+kk&V zg^@v@A@4NXJ+|4lssupbbTbeAY(x6?K$PcDn!=Jk{lz;#zC$}$%GTD|jrFBq<{3i; z`B=(ubY0`Nt+dhSKm36mJSwBw#QJOaxveJ#5T`HcTW6`k4eES#G{~uzM*qy42!q8O zVtn`CzwF@EVaj@k-W1#O3e%~{A=3dWQk0ckeaA_z>#vEI7A?>oY3gs4Gwc<4Y zqSI3!iSTi4m2sCL*81uqxEYxvfe&*M)=51aPIPI$bRu!z==f7zL6dAh({nwRvHG$M-%*&~vkiD-8J^-3jN7dC2dE6H$LHLMU0YUF5{g z2;7-Pr&rLpuqZcm-cVc@TQd-$BTq8=qCPf2el}fTt-mz7@z$?*VqowG(_#Ic1G}a$ ztcicqz_V)e`^&~^SWni3{!b``qHJ``She)t&y{VV%4Am#58V)9gamtrea#f5BX4NT zi6&V|G7eZszIhA8C6h0e>%9ddHNWH;0f3$?VffyDAj4Yk-g_E3aXBx5?~zEzGD)u- z>U<_mK$V_Py$QXLntGD+%cuKSgnNZino-XG@QodO-Z1Xb-*JrOw6lk0FM4z6vCvjz zQhOPW zE+vk2by_~7&_Ak#w`i@c#r3@8eic&kT>m;dwmG50+!G>>-Iq?&QW z%LJiWqrJA)(o;e^IZXGBm8PxY(ef)!;T$qj|I7y*nZu*b&wrU%Br0V$1w(qL zkM;(ao7c}t`S!at*@>oB_JH*@9ADSZu0SyxhO8UBzEP?^im%0 zS@c?4igw8s$8G`{53>(%O}kn<8IOcENA~zK$M>6Y^CoZ}PY5UP@(7>Sm(bhP=yO-$ zttVUd9WX)z+r~wE=nS0AXWzSpQ46>Ga9h~u^$E;nAsqp{5xwhQiO?P|@`ITJ&A1 zfMa&;{J|&T)P(I_2J^nMB|5m8m^~trg{TRcu36=3yIF6)QZhAvnj!Yct8~B*)*(~M zHnu<)ZZ#r%>4>!1RKFE9R^t1e0!a0n{GDA40iXFP99Uh4QN7eVI|3T-b$c0m!oL>9 z74+U;fXwWevEwc37M6r~?)A)Fr*8$0lL5ZQ`;M7HFZ=O7>p+!}oCHnXz|(wzW1!sT z$69yV-vCX{eA2C_;jXz}(i@-q(%LZyYg%Uwu**R4Kl?`HGOIY~g+J@Pk=5S8SF zqXNt@=0P-S7pHBgl$0%D+~POYwGS7Frglb&NX{-U28-+WhKuVlR3aq35<8>NU@4TI zVLfHOb7Y%nVwR!SSajFp&zD=F%lZkrv=&~Uzsci!e+5P)qE=)ys(5>t)BL_vfCL}< zrFaRAH9H!fV}vc+h&TsWc^;DyUNZjvxmk?8HEH&IgLz8kVA9x>gNA&&}F32nDejg|BnRWn!_icK0?zsXLKTHW=978W3S);0y~ z*qnVuD_@X8&xx{A*uy&(Wy)M%KTvBanQ@)c4vp<@m_@TN2_i@Nb|Fp50gKIba$(o7 z?ws*HO=xxADqi?#Et!~mzCIPQU z@?e=23*DcSBPuyqXEB5#E{+x}co24iMq=1!DFm)=lmboXdG$Nqt0Di;0UOjQKSa7u z9d}-ZkE+h~e4A`q&74v6;Mj4A0qF=ywyIUIbbm8_)q}W{eFFITWH;H z$6`7G)OBONoeJ~Zi-2|>#`izmeCFe&6Ycy~n!H|S{p=NAX zz@r1V=?y~erkj5t6o;UWlJlA8F(Co2yW_bVS+&9q?iE40ugO9TbkkJmv3x1tdjb@Y z()D%vwWNlWjeo12HYKvyRGXtl$iIj#wc-ID{dje7U=B)l_4dmIhrBU!9POq z;k+by^V}RV!cM%rx@xgpuXu%x0mO^mygvIA^=JvM<^^o{b=1*1OB&AOQml33X!aN8 zNWE>{Ss(cp+CI_{96Gk6THKy1oh54nWAB_+e(aAP*y()u!uLopA6VCy-QB*iBflT@ zqZ!Gz+!c$@yL=+gTshD79pkam-TLw(1^8s(cW)Lsj5)f{{!T%w-2|KaK7(q#45`G? zeSCSmczu95iH%9GvXN=dzhNt#sHgg2&j;`1#ftq7e<+~Ip8;B&59*k}yRG4_Gs^}) zeXiI24eks7diaQKgx1xkhfk!n9+a+k?eNhaC;P1X`2B5!u*NNX_{f}jurpjQ&SHG$ z%aFN!>)``7hO`+lI|PFOejjkFG>(th7NdRor+dXvtybWg4ua$K)h!`yX&-KIr^I3N z#dYbQ@Ad^8%kD)hvp*t#FGhOid&c8#&#zav`fN0D(ZXb*j)}`O8utV1vsZna=&3kt z<(dV>=y)8UKc$btZ|yrwLlr5yGHyUq;5})}faY@qZ*iuG+!rjJ6eLZS`UY#SB-x(L z(8}oI`o+)0>0OhRUJ##dtE^+KngH)#H`M3Y2NH!HNwZ+XOL1A8a#l!ZBEa+URRAtt z=xHmF<9u&vb5=3T{y%Yo+;YzZ=8+6^h-#qw-=vLpy9`A7sbddRDh+}qBF{hmz;=mS zMT)3EL+EPMIPrSpY#V0lRKq!E`Nn-0(NVN1j9j2Pnl$=7&CQwJ(eo01EUH1%?78B= z*#q+i{|k}<-OVZ7sV9xAqr%Xf9&9L<6oD#WOU{AgJKm#l>HcqVWL}XILh~{&-;qj$ z&4opwCVG|ej8-?p3&Kx1mY+RDW%&uk={LC^A`jK8yHhg0^R{rIcOUFFqqq*YQ{1-i z+LCuKoG)b_RkE8}bI)#GsLiZZ+MR?YtJeuhC{PNutoTWYm%-Iry6S1b*1KbM8vwkn z=yJGbcCm^0E%V&&Yj~f7E%_!$@M4YnNn~C~m2kA&+J*MAg$L>)1lO}74whRGc~Ni6 z^7iN2Nasv3X1U7l%b)BmtfQz*`FR<3>=BOqb$t(AgiW|S#Hbh#njBAPO5`zS`)Suv z+XPw8NJto+^$Gog#`vT%xZH8h5sS7X`pUTq>cD^AGtOE@`R;o<;sjSohG3N<%2V-? zq6J9t$3N5x^^T*lN1PSAo#7ccl^q>TD0)U1d)45&-4W!v&H8<1U-53l(&Sbyb@W|A z^^ahZr|<3YR5}n6PsQ&nErFnDe?>6$jBSh5%raTl|Jh6qo;?dj#T%OYIfA6J5pHd5 z?ZO;c;PF$I?}pBH5k^ItQwr1@mF720xSLk?y<9dSwyfs{JR?pOCl$O1*o`Urq6kvD1 zL3bH)-&U=WCZf$$wS0O1&Td@{5E%ExlIF|8uql1d;CbZ4LQLzJrqZ+P?YVU#WRM!) zWej8e8P?ZrmfrTAE%04yN!lgS=l ztm!}hb57tt!qP>F5D=Zi>zJa$+}TaJu6ui%n@H5-!?=lc%zet`1N5O{an7s7mXWO>D)oX&pmRftC(oB~@j!e@Vejs-_xjMw)WfH_Omn=P$z(dm+oZxX4mKr;n zo=Gr(WQ)KF!yI3%*Py~JqKA)9Z$|Rcs1hDFtYQCz>%`DYM2F)o8)6}^fK_wQ89U1d zo173bvcNZI6~F}wY19ZuZui~>o~DFrBuDI*Z%&21nlkvd(MlNdI2zTogUd#;?;r9Ca+Oh+ADZL|avxQA{Fz1Z{6wa`MMm`H2Vlq zn7h(^ZNj<0v2PD-6i1bDm_H=?zC@kZgMjWw+P#39(f$Pd6bWRi^_>5HY}~r}cEEi~ z-jg(MbmzLuEo3XX!=T4B115~jdxAC9>*zOq--Yq$CTLr3fY(F|r;$PmrE5*fJbz=P| z^$!5vFCSXo>QY4@t*7T0a)0;-{xlJ3<#NLosWuQQ}<9dn{$T za$V-3v|xIPBH`YRD0ab+pSa zOmcaVD9>cB7;qbAX{mau$%3Ei8C?_R@+H|%iT|-9!;U;**Gz9<-n>Ue27WQpMGIk3 z68~4Q{8&$t=hJ>nL}K$^8edB{ix45!ymYDDZ|Y~_kD*F@v*FnJGtrSFJ%q6J`jdrC zKZjT=zOQ|^0u|bFoM40n@*>c@#f2u%@LypmmPu!>VF{ba;1K{aRPN<%^de2L15Z{m zl~8cbYxeYw8_PS=-)~!-Ia#M|QaA7eu*XMxejUT-md+Vf(~tY2oVc2Bvf)?%AbjA! zte^TD7l1g?;iLCLT;tkSxO3gSzcRPC(_bMXHGctk2!@jJ-ZEtxLDFSXhc-LMLUG; zws@Y-6RYx8`->)eC^g-JDu6v2eHB+{DaWyCPn&}x`r}XXn4tS!X7)zj+T02_Oa#!e z2bdDCL>DMzdjL_V}QKaV&R;?ZT4Ugd@!T9YA*oKOFFzoK9Y> zVJs2JLCuyGZku1Jy(cc%cVm~v&Cb34__H?QPUw5XJz7EEN=U)ax9B9VtQw7wa5$B< z6^vobl}PEG&D-(OXUTmFpZDi>{_J8W=Hc6P?)Luh)k@*{F_w=~TK0pYFW2R^7lB|- zMmrJ|wilX8^LK=(Q-z4#K;DX-%wOXO01_FU_Y!b!%r)PKpZ%!0DYMxU&#vfCFxQ7m zBK0iTB+F5nXJ$(57LS1v1k=&q5^s)K%}t=(x_Sz}M*M6=oVqW_DRfxCZWX7+Sek5) z06g^5=0h(rB^Xcfk6FBxJ+~9%>X5Oh=gW=h%j-it0fT ztEcmlj!-mYuYU{`kw&CL(TG-s?hXMMR1sC z$`_6QJXCMwpShVm>8V%_ejpw)Zf5+77tZ?mnCGjqOl#9g2_u=BIucOZtg}ahh7q8h zhQ-idVe+GjYPn56F9qlcF&C5aHBs+-MZL64VFJTRk*+OS2w60vgG%E!TeX{rq4g%5 zn)-n*-9lu0b)KpQSqvQ<6m|^xnCYNfrDOd;4~$gQ1Viy+{Se^D0KP1TppaybullP> z-dS4i$0}MZVKlBG6-^(*8kGWea9_wt5u^$WvM*4Nc<59a?F^1!&?Q42Qv92KQDLsS zn)HF@e%0X)jsRzLLg$au^Y{}mcFk(coyWe5WC%%16=b%;6!o9OQiHLbj=SkH zSAZWv9f#Y!@l9|0;(BQqWhU}N+Y8w8kCu(OfiH}tRM37@UryB+?XJVYYk1!ZJv^WR%Ink*uL`moo zp*oKbJ0Q?iy8VQHBOTI|k#k*8=(d(!uKW=EaKEAYu#0@^Nk4>WPv(MpjLZM7q>(}^ zBr8)k@TXo`ZN?%=2pKZ%cqY}Z&Zs;FumW(pln6SdIbF1u11K1Q&!=pdfN1R@WKmiO zj+qFiUHPTf6t;Gx!lQNT-~h4EUV70%oY1)rCKX!mkcNeDu`9E7hu_hw?@YD_oE9H0 zXWki&JK%SURf9u9q#NBIL*gK0Q(Zg9$8pIELK*ddekd?H9H)cPP&TA3K3=!ygvc`> zCRw+`0!b#@MLt@+dKI<$76cg$%K>IV{-)(0BP>1PtC!%1{Ay}~|N!@(W%^=ydUyRcRsnyv*>WQ$?Z(BGi z9Bia?)`2mTSJ>#x{%ihpc@wHeNb=dGd9r#Dus)c(!6_+00}+5cI&GtiJDC2y%4x;{ z(5}D6nhKb5b=;?rv=I-JJGkPjRe}ucVF?&C{qpN!7rN0@Uv0ykZ3?sBa8)oLUEt)2 zH>h0|Riu`N{sbFPqiWC}GdqFMtNnn*^M0U^)aLP->d@7;GGlsC<+ z`4b*wnz*7=ts9`?2hiOYIqGo!rXe1ioNx$Cq(-CKdB+uCAy8Cx17=$0rKR`rF}f%* zCop6VuAhqL%m)z3J7H>zzC$Ukr^)-+5G#p=cX0_`oZ-g)YD7l9An#=Mc+)n9nkRCm zJeAJjlb(K(*&&WMbDs;bjOO%=P$4pU(jQsNo#nJxQtDZCqXDOn*FUfK-vP@He~v0X zj^Y&dJTvFiUT2QrrYcm?MWe&AI|-F#yU)hcRgyAk4D8(x`?=VR-OG(nDt<5ClI`sH z$-{QX-Jkpz{+>|8U{c|8zQWT9rxl!y2!bb!3z|8D8OWSsK2;qnK>XHn)9Ngp{zb{d zWZ*}hrl;YEq3cE^n<~N>M3K{IbSan22zf8A6vPEH>Btq!WANr6kiI~8F`zwywYFwM zr_Fh2(bDs?dR1R0?S<$K^xl=5G_^MAI=i84pRrEHXP4zfQYO-QrM8>#U&u^ouZAC1 zMFrmOv40Yt%(?VnKdX)oaQ}(ld5Z}OAf=VSnwwdlU)BxH-2o(yk`8a=?3k(RuF&%QoI+f+R+z&yzxV3Kk=MB4{NGyy zj%|`R<@q(iKfY`nPlwHYih8>zVW4MVA(yWxDWPeUd6&*iq}LG>-6fApRNLYyr%}|< zFd0Lat64JSyyBEYUULgm6HUGGHy#0i0A3 z%kEpt)7A0N1Y@VAQ_P|yPOh31i@$-JU=nTNu{42R(8`AtG-|4B#%~OzUC~o)iTv=f zi(JG592#->!3~Pt9&AF+ea5w^F#oSXIje-nAC+o=y=62d*EGaqVqMfE+ zL*smc2Wqo2!BTRrJLNc~NB=AwDXd zKrkjUN_ZyY!QKMD-31f$)&Sn>56S_<<67cCk&g(|P$szk$l>9-zp390=?dPDgRFF6 zG8?{Aa|QWNz7ao-c|877CCe`~-x|ta(liaIeG-24*jsgAnd8GWLH<$ub9bT2jE(J` zBy-XQY3jUew9=}s%yy~z*-QqT!41B|rBrTXcVYI*7_e$;dB|7etQ-^65F)~}*k1a1 zZmKyw;aWtPXr!@hMNEjEw$VvGwr*0p-LseX*{&@?Glx+V6S3z-LJ@3(o0LX+7GQ{p{7o&OE_AD zmZiESRe@@%$O-b}{otXHwrJ7)NjBEZ%^A%(P=yNqS-2mO`Nt-{)`4!g{gt?PM(GVo zuv9R{mFW-#4;Q>>2ES6tc~7$rrwYrS8e1QG4Ty_#N#ftPiJOv|DnBSsd`FMZmQ{d7 zsVq@YvFdxg2JB$QMgLL_$+_O8q$?MA?1gFe9K34Bba7&}TtaJWYpZokWY=uC6SdT| z=1#JzHQ$!c(4pH{pGxhi9A->q(BSDD8cm#>s>IIlw87MMbEs3ozxSAeFoN?>ewM3R zu)!&JV--!;R*@r=mKPer1VGd>g5gu>SCNS<_@SlxjC4;Ey}u}ZpITxuXGTiTb7|`- zTMOO~9E>FZEm=1n@9#+pQdP~WQx+Pi19)`eBpq7S$z|^fc-hoZSN#S}%Rh|)VoNjg z5>ArZh~?{xEmdN37_)4Bt31DB1eza>fE=Zka_6u*pNkS1gKS4p5Iu|B?B3EkQ+j8c z?ezx_Cv$Vj!ovkGq8V0;zg%U|6I1$=B09bKX?oZz*~Gz-z}5*RiYfXzj?sfy;oYtU zb~mBew7RwxeyC`eN+Fb=A2x zE7-4tm~ZZT$CGI;Sst{Pm{P;k1ok} z>a;NTDRit8ljIt76MtOStJSRc=9wfXT;~XoyBZ&gKub}%Ho_$d5Lo9fl{)si2H#yt znkAc{Yl54P{w9}?_*8zWTPTa_K-OtVRoPdvgo*n~@lG|n1LjD_v#vIsi4Cu|Nbm7@cJxzk2={z6F@C#ItWk&v?X zAQ4W}?Ns^E?;Q!H!te$>K5C}1bj`HcJts=U(!F=QXCQTVeXj3i%f9@twN~xk3YzBh zNRHFt5yKp)2t6pbl^2`O5L_nf61k{pgWOfqyi18mDT6OiQ;4G_&QuG(<-vXPvN-& zlmSDf@h30XDeh0YC}lM~V1vXvfZnc`OE`0Ucd;(sE94Zv81Gg z@%Eh{pI?EnOHLE$(XFNNu>LMJ5z~dWW>(?D51jBZ#7JOi^2w~w?n*9^#=zH)OC%IZ zW$86|Qa*40**3I}NTVdP}(`MkK%>Gz5yPgbh^m-C()5ns-xG_?%^zkpVjlbiTHbRPygPAs!XwMFMn}&iT z!1C5zZO)3%)(hK;j#6C>_X_1Yj;99z!Va{W^t5bgd9xoF(Xn8nJGy5dT z?eVPmAp2CyBJb)kdJfmdP3iJUoxRj-Q+O@pbwEpT5ii^c?WCZtAwzM#bt?Zyc0`EB z?sQzDuWx_Ec44dwCDo9)ATso^>G;M$XSW4*pTl7u5TQTKFo`cc{P=uMwqPPMWWz*{ zu?8Kh_NUk~Yis3>#aGfObre-*`m$38|3KE_+g$J_ipcGSDl6mtbU{x=;!6gBGmWSB z4g2s`13XW0A%ie#Gz)_AJ_XOdi>$GuLd;ix&H7{C1Gn>zEbos@4|^3*<|arT1G!AD z7sl!AaZ$_FxL@edldEFXjpcu>NLw~zTWKmSJ@@;2GCIduSdMF)VxJ+hS`}Au_!fTC z)8wZgt@ccVbzodVi5%ULyo{TT72-Mfe$~w_B(-;s#L;pDEj;STpmgN`DXildvSdj9 zgjuivDSLi8<~bojw8)c#!j^JkWGdS)E9?ixmZa2E7Y~;z(QHim_ z<0C?#8mz(^_MBshiNyT(PzWz1ycgj)iu7M!*S{}XitX0{0+`_d=b4aP=Y$@u5XjVS zG|-kU{uVB|YR(KIH;3UtTCTamZroj=QA=wTlyEbj5gBY@{F^qLChtn5^u7o+@^x|# z5;Js$bhG#{?t+R93e2Dn#W?ly&7DD}8naPow~XgGSg=#jMhPQ*ir$E~qH&H-AMGq& zsiCB&?t~0^Q}`;Dxl7LRbHnfCHLoR-rYcm(%0@|;e9GNa0V`m2bGK7y1&dlhoY#eT_i5Pzrk>5_^Ne_Ha#iu{;%5>@Ua4P5S7A(NH<)qvQnu>o1$x4 zh_Ux`jx>JrV)eg9#~Gi>zutIYXZ#k6_ppY6)_t`kbym&dD1;~+`1TUcL1$({@Xoo8E-;Bwe)V1`?NF9fV>C*T%+IThaI92g(|@Y-OBTX+q+!ZEHrcS7rbe4;>R{bhnP>W^>82=5MZ^9uS%fhs6D{2<_SN>!l z_3YyUrKTBb;B0FBAa^VPpCWMi8;dh?F!-+;N!{M4YnBRGJFXgov?R=4+I)nuDUO=* z9jCa0GdLad_G=aZ%J;KL|8y0L^7k;kWVsAHO=mVzR1b?siT)|ZWV(otxSKshBm;A4 z4c!O$ISmY2jIMNxr8T3Rq8D6}YYd2HYYcZMT2~3D;yWAasz{UhKZZIKsvqH|_T>9e zSgo=*o6jaFA?8&tauib0j(!GjsY_;yutt*VbbdmVqf57UKj`zB6{$0a(^X^x5AKy? z-R{y%$aPa#$9Wt89erIBgw9sZnlNPl9OL-vwdSp>n8Jut^CWy_b_(^nP;;yOU9jtKUTxOFWfg6ilzF7t9Z3x#~mbq$))`nDe(M zb*;7OuOl5PWok;8UWAmski(XKop5!{f5KxE=jsftNFeA8aIviF7i>eAzwdj1X2L?M z!2D{-1?X-2T^#F+I&>{Ca{LpR5~;tYY-yiz0=n<=fjaXW=!Z5M3G_po?_vmcgroZn zMvX%mTI~P_XM0I4Wff36=IZ44yu8Gj=zwEyn|v93O1_%h46AEg!9qwC8X9%5;dc7Q zXiFy2(3?e&Vpdq%qspb_?08~mP zJmo`RewGnbzS51U@5Abwm}1<_92u#+*h{svKM9;p5>ZCQpFraVj&O+jB=GwKtb~q# zqARNl+g`m;=Q0L-gVDmmk=hikh{DBmF_)Z-50y2+a^L(CpabH?C5!QpZ6hc5imLs!0#@L(g#wFtqB>ISi- zxIo}t&fxWB(riLSx*WDTvpOG~2S7I`xH@Aqo5N9CHTR$dz(OUdgPqvNH}e!alqp9H zxTl;uanqAhV~#b`DPid)9jgjV{H9f9&K(eDK0-!ti`qkq>)m5G)GIwhxq zzOnZ;BKpidePpp{DSqM$6WnUIR>0#szmt?`_w!Lehhr=%Np(O5j~dHfFbina)fU;H zznyAJdfnKiWh%rMq$_%SpqsdQ zOwqFZO1RJydVYS^O%7KLG)AO<1hRt!Ps)R7(BmoYKiGNcJQpzTl#dyH;)jYhjPwfquGl`^W= zF3s2*`Wds}OwA~9-p6wOhOy?il|>mC!l#Nc@2S+YCWUtlvr|F=FHDuK4*TQMM{wa$ zPjWfJ2bA}<8p8L2pLY z3|wk2ce;$cu=N@9ylMARhM2Tnf^53NF|bn;Dwgi z%74E<z@pGA1q&gV!55du!!=jXy9K9-KO|4*+RN|I7QHK(@5|}_oATrRnGF4 zjsRb<2_#v2z4vAbelVI9W%3|6-myK8|&gK3w+Tl#EHW3X5gMui!bi zDl1v7?D)J^>?@KKfTP|-`z>ugd&M+q#qRbHS?x7T0JPk#te3RxtXJBi9nWFqw+3XV zo?w?&>*p?kJs}jxsdz~X!L%Y~VWa2$?G0QbRzyT;RI7P(u0|6d;_!*NYttwXpRNP^ zmGC;t`r-^Jr4Xc}ZJF%IQ5O0%>$bM8M%G$Y3Wz2aMQgKCMKe1Ky^8#*%2WTv9}5J+ zSgfO^|A)P|3XW?@wm@xx1r{?iOBORTGh>UHnI(&vnOU-!C5xGv$zo=(7+;?=aA)Se zIX~~~McfE|w089F-d$O_vMMWcE#VyEhunHbchGv99;%e<-8J&slz`R`O5%otoO`T#ibACy>te4m_ zS){O3*76ys#siyjy=isO#P`)=A?h2R8qC0TrQT0|$^(gq0vNQ8C;!&i@#Z*e=c|Xo zGo^~1V>clAi-Zxw)h!6nN58PknP=5|;0 z8%@h#{TOMHzLgNA68@K_bb}s!r^MXqi1o7 zHHye(?6F)Gn9YLs*KQ)aXMI%%Mxurm-aKtJA$({Jnzml=16;p_YFTIcZHmA#6m>Hq z>?ahLsQ#UwwMPhr9~mR>Sb#x~;UQpi5&PYnk5`Q`TQrq#JV(pNx0W)@gw~os7B9R| zkV>X0RUOPPgmXSpPc(nD%mJA|e&lUA?=imuRD32px3|J=AX7RY24aINGYY(pVOhTH zXkX=fOcLfzJo`^)Zdb;-Z91c<%F_&HS-OR}B)v)fR5$A_EkKV=2Lz+sc~1OMCfD;$ zL@f!J@~-=%NA{v&{Etq{x^HCiuztZ*ZW0smf>@m{XSx(!P6|t-AvTfzU9M8??mOOD z%cP{?r+1LW!F=-+{Z+tTzj3!$s0McDFNdok`6BXP!KP_8_g>!3wy)>ty0-gE>wnTR z$8K!#NSx}x2v{lxO`@Uf7SMHx0eajem%#M%Dz6WJ^jz62qO7^#_g^c4r=qZY!WCy$ zVl|(rLf?GOT!d}&-)@HQL$;6UlR}tEbh3G3Zl{#HZmD5lf(_7&Sfr+CU(x&4uF(c| zw)RA!*^Jr)MtLl%8r!E6cSMtQGR1S{j923|xNe+lG8Wx% zA}8NBdlmz&W3YjxJMs;vq*vqf1hdZfQ1%e>xD%jB+4n}6)r?vM+lyH=!<|rG8lWlP zw~iPDk_D;PEYU06o&aUrMkJtsRy-w}&>`|_j^tYuAmQc6FO0M`Xp zu~m>yB%*R8q=uom~%)j`XKCTmaFaHU?7r@`X9c9Np4{H#u+L$ec!yhoDn%YD(P| zKQ@Hsi|$$mP*vOlgBYjSWd3`hVu#u|QZ5+9yAP`&$m<46I*tn!gyF z8s3Hk88hjhXX?i3Ar_`@MTo333{Se&-m)zW%t|E|V5YxB_{xFgZ`}d=3KMnJiKS5H zUvVL&$$f%#!@2p6(BSFzjWw@}G%PL6Sr}=2Iq|>J=%V^xl>oXtxPU7Mo<(QMF!1Wu z>D_ke++Z-QI^fFTZUVpAU|<{er`N!2JB61!#wp^o9Ws2xzD#QQA^g+hNXn=z*JYd4-^@%wQICvh2?mPca(p7exP6u!2T8QnIKHoyw07v%C{n{C8~LiFExf znWX};k8KPdk3^64b*`7ifh{`A=&vS2db{Lwda)=c6IOx;2PFNK@_?}tBY*NG*F?6# zR5oJ70GnJ)4M##oRoS64=9wlytUdQgxyq0`7SgV`ycQ9(#VN1wcxB!m2mf7jINPMv zytBc^&kNX9#4gzW@9p-|ruW4AY3fG^q5kMB^)@iD1|%zAF%v@P4~O!p*F9Q3o-nSJ z&`YyP(aL(kyaiN|hzy&5)OgBjg)}Vtl`RE59j=Fld5DhLPEiX@=uAZGGim|X7eZHz z6{uoLM#~gY11`*-h3{sp9wak%K7^=KVh^=VEg&mgsst9%gTfipA^NyvEgf-l$wcpy zc8Ny5L3@EoSk%;TI$T+d(~p=#Mt_{V-M(UIL>5lFq@5lT*-WTue(6h%r!^{(5mlbm zXOY<(cXqQk0OL{fs=(uLir~DyVhlD(gb^XF>T3`AqIsfVX9QtDKDp5N_d2;5#bTwk zMNJNw-Ee+`TBQfm?~RvI{%tKkMIeGvrJOtkw-(=TkKx;opF0SzUvai&`;OSU06EUWTJBUB*)x|K!{A z__F(SR~j0fYp-Jeg-eY)V&&MvkWzyFkDUE50;MyHb1W?DkuNSZwTl+1bpMj_RlakK z%Xn7QMAt~LXH;squ!qkP>&=ElKFbsh@-<`ug0ws@;3tHF1^B(){S;%0CS4P#zCmD-#D zR{aeaiJ5<8@MZwG^DMJ$VtRHYT!gF3>X&P|F*E9o2|tU%lK{^U^TglsVy7H>0OF$1 z3n}7x&o>nuB63y0PFQJ;`%c=6B^yXCmmg2 z>EMb~ZGpJC$vHTk+e5d(^ZA+tO4Sj-J?$b%NcDFgURnfYUlFvj!Bic`vfeSit+a;? zhbWanA*nTK9Uv`YDe)?^*P8~Iw#r;g$}o?f$Jst)G$9?_L1Oj2Lw~c?#G8 z-Cdzvjv;Ou`Bo-1{%5bN-xoQ4gmxSc6P?~tx+#KfYy0yGvSq1LD-p<3t1OQP5)U5% zTSjp$bw?B)*MOeWV~`*r?t);UU(eE}zer!-d9&|{pS{0@kZ5y|!q~qZLv+=t#C0=g zfsu{bGvR2hqh<|kx;ydgw|F^sRJK9`&_@!!4WEpl&Gg}W?s8GE)cQdCqj!iqom}eV ziu(o!;_(^vjO~W5D*~A;Yk+VYP0NHYy@@066MK2Ir&*rOX?&XkK**|P?(#Rl;A>=GsN~j2+4Y(&89i8}j{p=E&1}jm@n0FtG=(B9HxHbQruC(HOP>RL_uB7fxe> zz2d8|nE>+Bev+rM+tuuI9&=k*I&XIV9@d7MjLE^q#6DFR^mB}dyKJe2m1GAlnS^H0 zj*IkiGy^q)TJ6dgCjM{2WynDXf3yy`Np84aaW~Mp#VuPwT9;xt&BF~%{Sc%%26m-} ze;zZX&(FWiHDBW{EW*C<9!u9B43pC_DS;@qUDTUa-hC4g05LM`aXb4+CGzJpR>4#= zG_uhs2_0mp@7<>mTVYUf$tg}Zom0J@m=MYW`BljCRZU|>PJ4(_+2Bt#dI<1 zfqVm_ZqWE{IT*atVQz(cwJ(%=O4Xhr`a8s~ggamO_RiO$mUvM|n=pL^-S|en`=U!@ z@?ou47v$jSo)`1Ri|`LL{fQmBlQZhcoos)c7xXD>MtT@7AJt-T9C>Iz9&=e0U-_0) zrsry#;DXUb_-+wMH#JmTfk5Ip0$!NJu8o)}bV%yQ+~;;NcXiN|$4R3A$&vH=F#g<`U) zfI86Akv5sQm*MSNYp`q#{4hA5efEYBK~FsSRl4T6&}VP{c0+OqMs{-;^}A zwaZI4rFiQNx;%yNZvsJw{eyaNf~C~bWqq@N1ML^O_@*U*JwwNT-j&wOI*;voG^c*I z^4t7SLK~1kDZc8(s(x#qH`yu-N?9tG0TQ4{j2pz3SjAFIAFSx*WRh)cic6hCigmb5>*1J~r)-T5XEYAahT6 zNrC=R(3qge6%mUm&@AB?( zlB_uB?Bb&9`7$GZwD?2h6pS<1dc~3@52PgHk~Vs7Z21)-`-@-Ax}RX4P^^{nM$ME< zl!kQD+{{bnL>vVzG;>&(bH-nu=cmT;eZ42P5vF0hGhHY!O`#j;4oaO@z-}m=c~^7PyV9^dW{{?Le8Gzk;dc&EXxWy%lltrPxV) zPj_o@`;903keppJd+_@HOX3t`ka~BxXH}aO4{+X#=y36K_cGZ9IkuKZ!L0f>=kT4? z@<({WBqw6GO?{x9`HpZ4VWnZ*C-x3+mILFX0SF^L5e{N8tXcm`esI zucaY~v^u|d5J((7DDfQnBQ4drYx||!VnV+|GA7n^N^_xpsMzi_3_!dy98lFYrOcmjd7rGnmD> zWen6n>R5~9h#YF=bx<@lHN*_kNKHvChRz#gG$WV`G%RYwbw+KssvI+=5I%5}VvxSw zVa(|sldG(9iB!MAzSy0+oMA5+kvzn{)|Gfgjrqzh5^40x=sBtOx zv1UQ-QTw%5(jRPB^6*eV3E8g;1&6yEX-mZUxE(EqrfqIS=zU6~Wp8*Z*V?8q@b#Sy z70ZNi=Ja9GtJF|8K}kxS@y+OAmPTtaR&8}0$nC;Y%rgn6EJ)X6Ux_wKD_4$Mq@cTw ztP1Rc$INk=_B|&w6ybFYd&Y$H)~%Ak>DWMxfuwqveuG4A64uPYY*tqKFiAe#39eso zxtx^YuHT!^%>e09au0Z79PYks)veMSn!fx}e7a-lTBehz+aG9A=JFQ6aXe<)#X}O z3(X$``Fkjj&CKl+1&hN`T!%4+>vbaHD|rtmagvb7Y-qm^NcEK2j2-6Ws}kBF@dNgN zMO$Uf`b`1SM+0axTKx6Zas}=sW|YIfxAtr*WHe%!XNVs7a^2kGJ9HOjpg{N|mdQP3s*9Kd=)BCJbN8H$z_N)G5{b%L?g$v7(d+d9IdK zv{nMZIcMHc%(EEbLf)?VK)SWF*zXFOtzBA=0yBhs5lqz&14$z_7IQz8>9 zMJ-05J$Nnk zsTNLVS0fncNC1Ge`&F2%z1Dm4Me2+b>=)YZqM^<3v=i z+FTler422sv<&Ny8he!}v|V;a)q!)svPVlTW?;?mM9q)m)MY-1xk`O_T!)I$p0Qf$ ziUfX=?<8~2k^O(<=+O`@EJS{XW&UzpI;&OWJmx>#Hz z!pN!Fpb~t3fW9=P^YF{FPKD$|L#bzfECj=K%)a`)L$JvA*}FJ(QzU5guY%jh4`$X) za*iWj^hmlIdTn@U?lK+CJ1kyT5df0JXZ^u(_P~n*gaov_`S*_8d94phH^nAkZkMUe z;0*~{_eZ;etqhGj3(v#2vQs@|1-*>}dT`_%FQd?fG2m1{_ zn23i_U&6;7!V&Wfuia|lhcPr z&R;9Y+B>lUf*%RIgn5y=51=je*$u-zVWbO-E<5+s_NZEl^73*9snq^kETb4e&GN=t z!0j8EGK)iI@3;HsI*(F^ndC0B5fa}(5U^&;4)7G5^~_k)Vr_{olzkix@D-Kr z4cyKY)aVD{j0$TN<#UBI!i`hMv(} zGqrTIVg$2mzFGIXp3GxVTxu3Ir<&i4t1L%A+W{yMZ$XB)Q2S#tS{Z)q4csH6D^gv; z*}l7sQ){vs8w@Fn{e+wR*AonZ7Fhx&Z$DQ~D?vp7Xirx(@@Wm&+xt)U!C z&N(#LXt<=jhDtDWy4u4vT;V2P^Z*&*+GVGfsygJf?cCQQQnM9Sm|LHj%LbMFiW)F< zDr5cpB42vVX|1)36{@%#Z;+?1*#}>}Pu?2hIp;IAj_A{#YHHTh)Knx2F{?(!__6Qj zIy4#WWs})`;LHV?8)oaQ7#o1>N(WdTrOI4)N_G7MlFXK;~4JDmU_VtNsk`E<`*^6>ExHtrN!Yo?nLq_L0 z5Q@HYJjYV-*JY9oxtED2ry4R$kBom~ARGoe^2}ZKN_!WLUA(&y)T4Nd+B$Ki@;22e zyxcjMO{q!2wU2ycW%p*EN}GmGjDDiG(Rv+8hch_WwV6J!394JJYi~FC0#}cUWj6wJXr@*NJ9BhG zETu+oF9Ch|w0fn;V}_%|0NdiNNgZ&FtFQDPxs1#A9kzkf2fjK2!IrauzGLN5*k;} zxZ931OL%^R6mB}69Td$!%gUfRI7(`acP+G+npmK8eU3ItaXXwhe$}BZy zx2cCYCf$m@quvA*a0|$^PnU$jf77`L(rpbQVz>uLu?suylPaqMikpiww(oA~u3z(w z>T&`W1^ilL!&?U0Su#$9JWnif zBTro^2{BGBy;}g2P(N|9pj#gU%JgCK}>9>n4GaG0{NawXb4MN6K& zjVxL8kgZe@vnqEE^&8qsj0~;5lpb;$`?nBQ8mhsz%XV-& zI<>cHYzSQ&aO59dU{p0s$9VlE3hdby&AiJ$xljWH%VdV3V89$6toUEk=_rkeYE+8+ zDT5mjCZ*(iV7c7%_0NGR!@Xg|$-3Wva60=yfUIb&)wrBv0adxly@H7EgTP*$&-fc= zZ_hXLlC?>Y&Gf)%GirWkSq~?n<>lSA3Wh|ADCcZ_S%j>j1=?>3%dY_6c3@;j%FJE| zOcd_YzLF(p?H@eXTj@l98qekrW;{OoZ`e>kwks?NF-4{(o3-ea6w(5*DLa(uoh@rk z=7IgpO#}ghg-?$JN0R3qb&}#|t9FKvtet7$WHM#w6Ik#nHc-fB>R^9Fm&ugr)zi-u zw3GtBKi$k*t(WPu_*?$tTmOSTS6K=;lop5!5kM-?g$SE1No{H%^RIdAr937Jv5rKAJM{JXYHYg^Wq?=Q)8f<^?}y0`z}0@#x`9eKq18&naNfVzS5%CkX7ZEPQMBtX9eFamtfaw$f-J@V1+g%*!=GLP_rJM8-yk|g zM#on|%dMhSzaUCUkX244g){zQG(mVx_uTzBUyjH_&&cHnGvZLA-EOExU;Xt9`3>TB z=v*iyU+lp~QuS|rCI9ff`nXE<^AcW^Iyd594+LNsX-k0|;N z=L^Ri$NQF;yoIKfC?WF z?}i0&a%&No@nm#4-w1GrpX%Ymed2O3hOhP*v3i8h9AwDsB{%hgJ=9tLOx)!Q>*3vo z-1&UMeE;T(il_SqqS4GSDX^rtlkgr4sZu9 z=fTD$K)@L7d1+j~(tipS{ENy5fQUaNCB>sp1D&%ybL9#e>Zj~?IJ%`cMS}mXpYaKb zBWd$_E&egg#q6LIDs|QIl6K%>7ypu;`|J}Urf^P zFCLI2Rv9<|q^Q-Fy~kG$u%zH|{8qpV119}Qe|6UuDez71rEHnbd3|GPu+_2OPr-oS zic1Op7m78p{EAT1+eympRZt~;V=Rd%!XUN43#b{0QvX#<1Sm4{Jq2=ld%tMIZZQas z&Xl!X5Tpt?fySH*QN;hdQVl3f30-};y){KGQsC1NIQL}KXC6~=ly@Y z=9h$=uF60fkE#lUs}?0L{Dc&m^9g42>fI6?QdH(&zV*DmT4y6B&f{vQMWum2VX0tcV6=huW_r@~1k{=(ATUxAjs6Qt{3^nOm->%WwQ z{B1Up^lYt2ob6aqu^{3EK0r(FnhwPO>%0E*^%gkL(jIx}2B-gsf>nrHy+>G*&_sVR z_Fe{S~gIQjoqHmxs68M$sPFqI?EbET0ZZu=$bI4?JDQP0< zd~e8nKDxAODxdEPZ&d*I3y(8=u!DE!)zaZ!(4>Eb4#JSk;B7WU=M(dFz)h9Kl)_~s zN$kkig6$)Zo2yn)F~@;5d(rtNInGV#A+`2y_2~Z|!3Ze97+0z?Pzdl)B|}ogxPmQM zWI)n>(qol=_4T|mx8i-a(Bw204>`57P2oyCq5pQPLHX~K>R*F4KiR5thGF&0_5zM) zmbEOlpR^Xn$JNa5RL5Kx-OQV!s5FIZ4bz7HhANTA?I*Tk6?j$Ie|On`4Vu%hkVfe` z%};moxdT9eU&=OZn-e5RKpPC%@P)A~V=W0vP%0{L2-)(BxO8b9|NWoX`>&t1#px#v zw8F+C43c<=+81Xul?gA9v>N(FA7><``-y>j-?JN%E1%7y0C%3^{JXS6H$^@wk-vDh z21p}+on~`HTXjodq{Cp_5CMTD+PVv>eIDBdlF1S;YkB4hoJ%dp@MyUXAQx=SI z02URM9~`I_xUmTt5y_^)yyPBEMpI-JI?bR%(@5(thlYE(HV%o=DeyyZYQ*hdGtY*S z6Ed>MbY^GXl^4wSe|^bv&?0Yi*u1vDDTjTa(KIVigNi!_=a5e}c_m;jA`|kA8EFk1$cEoc%UKBiP6aAnuB$#1)kS}9cBH##ySumkG z8TH14SHKDQrz-IjQv^ zm|PJE2R$Y1#aP-!7Djg+5gE_F{NKWB!l-pc?5l%0XuKd)^A2T-!kP8DGi4~S<19H- z!r?nX_w&cRipp%%e;-)f<{1j>&p!6eJKpE*9CW7M*$6*mLhUd6xK!jD;G z`)&psRCAhNZ+oBF#%9*LfQ3F)#vOR1r;U`ywgAd|T1-hXAI0ajkVbHChPQ7jkMdUU zmXxi`J9)A5)oEq#_IPctYXp;;I#g&w+76|~CWK@KLxNxK5I9WbO%+uB4kF_4pXt_| z=Y;w*8fUlnCw%f$WfH%uq5osjo08lq0847S2V6_UU9+|;^;KMR>ab3IXTRCizxwHr ze3?f_RhV0B4`<-PKDb)_Fe0=`Zx)Lq1Li~YYzHbS*@joMY|HFdDv0=4GWkz9)(!>n zG3P7=4O_QmwzEjKc>Q*{?B*Ip(8Sb!U^&Z#^Hh0z?)&*G96&&i%htzgceV-U%Hl+- zY2ri@xecb{QXH*ZT_pp5A8hXSjcvdev*}PRgQ+4>NL!y~lUw!S+zX;Zm=l^Z$-n#3 zG2xad;1#Od<7KJ)lTnYNKE$7xGTCT^Lg6(;BLs_W~x8yGAui6<@_Fw z-4py7F}74Jn{$FhV`i=KvX8CqT#K^TFl2!N6+cN!^t@x!6!=`Ktb-?IHgGAu)t6lA z&0te7hifr!jN+*q_UTwfYMro$;mYh?yKpdIEDAKRk`wL`76W(;!zz1V^SyQZk?R%2`mV6 zu#iEsyN8pBIZR}$U^r-%o=IT3MZFqhnrzzme&5}y55l+!MOy!5&FE4^J~N3C1orQ!9J>wHZn89Cmnor?16N&`SEPqjgM78x z%kI3X4U95nIx{fbOoe^UscVK7@hlJ9Eto;6w~{OlRxB#J%R@~G!^b}u{C%{eF2~FI z{o~NVZNwMxmwot(Ux{CWicXVg3k+IUGpan4rZ{+q_*>m-07#I?(-qa=QcVV;*(TPa zAiJEmxuM*4aknYOuoENO`-jw?rq5looeuAkT#sjN_HG?W$I&+Nqpn%JNwz0T26@D9E@n1^1|_zBozRf{4>*Mx^I6*^}&v} zSdAW-kA}TE2s`f=>bZGn&)XEraO!kbnMe3DCrs3WZJY2V#VL-@L;Ol$ldQ&Yeo7(o zPUf9;V)=!AFA;@(mL-<{VndCUuyr8|_{Q+mVe-4wf?Hdf_HGmMIDnL?#187;JV~?k zh`7gB;kVZHd=|Z3$UFu-v}0be{*yd*Qi5o(^<$0{!~W0> z1?HrfWp7)VC0nZr`s14#tmBVM{O<2LsChB8xdK&7IIDh*)YP^5r(|bp`ST8~cS8?0 zIH5h8*nyU`bymqp0EwMN_gflDG*E9z=-qx`sLwk(YBJ4#gRVoV0Pmuv8Qg&YRMRp zWwHXnPa7TD-{l0pdMF37zqOtnUQ}sxsVw#FaIg)cSjyabNl^B{3yh-@dX{>uGp z*{Po6NqZQn95u9?7CfdTT+_Xi${g60OVMjD&UTNvYg2>NSPEeRxKXqNjBp=Q+O-(k zQ+{iganM+vF(s+LpB39_@(KZ5XFeP>48uexP9F?^&c7I+VP$f%f86mP3*YCPUo^>_4 zD3C@6jy+8uP7=Y?R95r0KN?}WEQE3!n!km>^N6nd_2_^~N1v|*hFgd1rQ0j4ddA(p zgaSU3M%A`y%wmkbo7h&;2n5xUVS4jePZ~MJ#_JAS^L&97dB`nbDzY>Ib)M&H$GTqY zo~;xMJau@gl&_VM01~=I-r4tCHyiBL09Ku1(HP+H3&u%zUlcn7a-ISrqmW}S^`APE zFIz5AfB7-o;LcDUxC(E%js?Ts@N{k`Z9PcyM`hY{&~JqcV0g{(!`QvzA}Xv*aKtNT zEBKIOb#1c_yAG^Ya=o5L_0vts_6bC-&A|!2Zl{el1SgbPvE8bU~Sf zedf6+B%Vt6$5P&i<}=-jGZUHtG)hSyHF&HBI8-2{o3XxC_>2zW;BmxFxWO+d%EQ~L zCs5D(XBqbDe1zRaD&GpN%Pud*CLWzoSyOddvwP39pEoKf4|N#I-YV+6r3j8qBBjI> ztuM>q>W-KE5x<8Os}Bm+f3Txu=;FtqU*d&xko6Tl9b6DgT}QH)ucpe4k%twwY1}J( zeYAO1E8$TH2DX0HDBB}F(TQ`GXT?uak@UHmQ4fCT#UmZ-BjfQhc&WA&X2M`8_n8*m zckCZ=SorKk3L{u$e}TbdVRDdlWWElQp|~h>XiMsb7{Q78C(o@A(F*B4+Mt%Bu(V5tJEa@lTUUA{yVH*)#w06JaqQzW|-E@ zagqe%R8ot+O(DCx&A{^==}}dWd6t^DPYVX$yIygH+TvzlN6o>6!!1-LqRp+}xvPb5 z#fo;IX)wwThb4C6)Szv$=^k6eKDqo);fW2I$KxMJXV5wJoUxlkQ)9M>r^|~DHvO0O zB`5CL|JX4T7(%%XRAtnDQYvJ}1yF)aScrEMY$XLE4JWv>7+Jd&-D>XB4^250_M!rob@>+zRBI;%9I>Hr5Uuxp{1{>aJ5L_3q6}xqsUUc4QJ!?4!v}xvV2wo@1@86t*|9 zORU*oO;_p+UT&ERI;-Y%(c21hansTz8-GRKzug?IBtt|^DBqhl4fbs2zN2mv8@Pq7 z@_{FF$Wc+uw=^MgF6BJI0jTQW@SXZ#V+0{*m#?VTxg&pZb#Y)|H`l`_77dh{9J(C5 zic&LA$`DSgsOnXAN(?!b-_)6SHrrW3`Fm>XlQ zwz?7|_t5WPZ*Rr!;`ns}D&r~GIN*U#xw%x&vbiJ%pb*)Z+I6#|&=z1ttN4(EQ7ah4 z%0tH))cnbb_s_&QGZYA*_LLPWRl*7kc*GniHAA9Dx)l^udlFHAaibZh(nxma{+q@4 zk9<=2ni1QDSyV1(<<280m)HeYahuxYx8-)wJ{7}6(d>~>vIm?Mis0c=G$E4KJiSre zpT$g9d4&yWzt5EWR7`O>%?d?CDS`&Eo5hinvb2E5m!NdssQSf_+;U# zb&;)#@p7{@R`XzqD-OxDIa1KF3uT{d8WmpEVL)+3r%>b&g@}Jr`8{k;a}X9Cl+I)P zbEjz_4mz=bw=a07J%CG^GP^g03r+teXQ)q|7|Feb<^DUxq~kVk``0U7Y*OzZqAfe* zO$G*hEU0=F_N2!QTv*3)EWJjNDIM*opszb;3hJ#~uZJW^PSwKxUy55gd|o*5a;#*vtwlqFfNacrl5qCI`^t0ky#0 z|C)!|tMCs%fzw%vHC>qv5@jpB`)h75gxxow5J*)V)ZaV1nC zC^PR+=Trv^lZy@^>}QoBHyi-e8>?Mr`3i>n3qb(%JrA|HJMjrvOg~|(E>3>a?-|+U zLy3mjd&~RFqJSwhgJI%(2$0sp{oNa1kbCPSl$!KL{iZWlHpTVJ$wrQyhI>-Kk^|SQ zER95_zCFM1EqM?iAp2IoN3ZW~khU9F@wfLi@%t@e3VSBvlv7$SVUIe0m+1<0oeo)! zxlWzNz~yuXpfB78Z#RuT9Qy3PFeL*VbPXJ7wGrX}d03VX`I&CnIRkREkTn3fdD|(n zL!w8%>h1lxQzPt%<272Lj{L%0I+lg8&cQcOS%>X<7OV)7g!9pT5d=;x_4dMwIk=oI zRIPUdPlB5(5?P5C#a-Ta+PJyl-)kH`)3xSirKMpco1{kiB@lb!3mD5pq6qA{n5mF1 zt4TFQ${P{LB#rJJ-Z-XKD6|M^7k`F+-z0~!{X-mSs{0UuQ=Ck`7t7F=Wm<&nilA%) zZTZXcKzx2}S3Ft?8r{1KPIz)!l)0*n?D;WM6Ux^?I1%z)D6^blk2)}shzhtqA_lBz zwaDNF^~ZI;Cgn(P67M}^>(ExBi9HTJ=ZKTkrgJ@Uu+| z7(~Z66?6%43mytDD=qlY2Z@IHwGR6Hu`IpL0&n*vnb9b=@BvV50BG?#Llk`l-^(a33-Xn5K>rf+K`f} zI2-m{#}2}Ylp22bSR3EqngU`f9G5FO;uW*gH=PNyiA|IA(+m0TK1^cY@N=}-M>-d= z;K5!+@F305TcvO&rlQdy^6y;3wJ(!nNSUCovm*K%cguTfTa%AwSJcPWPeF%jz1J_SsAF9IV!`E-JDW30=~OcOa8K;IG5)9@L?x{< zTLYE5OFIv*cnn&O;!-7!D4;pVUxu4eUWiY>^l-G`)#AZ&HvuKS$`{5*p`3w`^_<38 zsR#XdgYTolanqS@^IdjB;6y*yu~klFW{f8qK}kA5ClX!G7=W~_4Vwbc$(g#sYV4k6 zMrnyjKKHC7-v%IdSkzi(SRK%Qk=xz>-8kqPd@CY5orl71sl6Pa;0-NfzEbha>3pQc zyH?od4n6`37l`*Mq_e|T2(rRx%yNZO2(;h=w453UhFkZ}!L>`ToMh&{&lTBkm zLJty~@b{s(y8sEHh4uL0QG*&*81t&zxHuaLm?sM?4(H8maVf9&XI|JGvw*Q`c=qGo z5(Av0R5mvnoHCTd9%;v@_zfFd_70uBc)3BrDZ;V>fvSCp71HGS;y5iUv9KdN@3Z4U zzqwbmp~1+!RHK!YX&mpol;*vs%6q5WbdDMCQHzQX2{uR1##4Qg*&#*tCMIgQkQ7?z z-n~dAs>YJHQ@XgK2${Gd&fS`Xgw^ClYMkRTWvYBLvg0>kdU{mg@DeZpm(q2pre_X=o)6jQ@!OnLixjSN& z;K6zpg_n;r+p^yIfm_09u|K-cR5bZ&w?bSk)$e$o;x~0uU#QgOcT~l02U2%7leM@h zelZ@0@_W9*P}K8$X7)35Q%L$=Lv4uW%CPve!J1bb<3U5&XN{9Cn>#-rt5aeqXG|#R zE{(v6AH2<}K5dwWo*BZtYxzhlr^)8J0^bgUCpnUEV*Yv1o(}zIh=3k5Y66A`quC-W zq#6YAhhFuU1&LeF{Y@IpqRZW-YnI54E3{wsJu*I|+3$8d$=)?sCD*=DsurFfAN;3G zQ|1kskKq59+7tN8i;2#0p6BLOR(EE5X_24?p|eHa*c`^6!+!%AI$czilnd{LW1NDW z*8y~dEi9g+q?H*QAS3O%0a;8p{S-|k1}I-gBwyEUD3~0PkvIHQj@*?>iCQzbSvpp4 z=beu7ZEZO`q7~Qj&jJX8&IZX|l*R}(g0HUzOvh@a&gMifAHBXY~NJx(3udslznW5F*r+T3tn0J>S^;> zq{^Vr`bsT<^Vh!cA=bQNW4-k|LUeGXZx#;2!tk5J$y#9U+{+nJTB<3v~P z%Cr2)m_sosXe-SBg&#htFo(Z-f59YHjis_SSl;)HXv-l~?EOyWIl*iZ)%ESYH{Hkf z$`*7&dw~yTt6C%bgF;A-WuPnplu%y#ceM!~JICp(t7WE@xRY(mcdVD3#LEnK&t6WA z!y0Nwk_B!S!_Nwxj#0z+3To;2TMU^|YXd779xDf5&&)H0jFG)#79co7QaJMEN)_hN z?${!1Xwhlnj=I~o;7LRh?cI#ls4BdhS&!K7w*CLeM5!GO&9~iTxkMauRrWYeD$?=& zv&MuO7Ua0;lod97#1pXrE$2OcV^zXcjkWIzk2#dJ=Jwq?X%C?d#!CaU@=T2O(9RQjIueD*&62L zAc`|XpN<$@167m}5;)R(3dS=UJ1gA~dZ5K4W9h4r%CM=mmSgjIHrvoqJ?^}L|B3%d zJCY@;nkN{w=DSqeq@$)xd=I4GA8vHVbEdxenewn7h1<;KSXRUFw>B)9P{i@=S;bM% zQIh^wwk9_A05+#z}7+%Sw=?qSQD0S4&SN$`3GA zs2P18RnS($tXnPTwfY7}R>*3w<;poVb<(N^IzvkbT>4%N?rR0@d2Q6bA zk8f9gF8N$aiy&Uv9c7AJRpG>b^rZuv8?vx3{G_ME@d~oEuMJ4_Q_ubt% z%UJ%kmFY4*0iOxxSa?)>=tDzd#ehnquzv9=wF|sRs{oy@Tq`*7h!U4?WmA!zcvXMl zOcY|(y2-oZEU9;J&OdKs{`nIXU?2Pf?a?laPz=)jjGA_1CjuaMEW6)FZDS_NGP&0S z-v2m9wvOwzJS%{}d)F4EUQ%3f7LY_9e|)9{wN_R$h#DfK6tuMtcTkFd zCH@PQm-7p_UpE70&sQ1maO>YB)gNJfvTZ|E< zQG|bh?EQN~-o%x6Z&4Ch3iN7$2U?^X};9!79{rhARqTo z3X{^Bcby(on9aC!NVFkEs~T*Fg&yP<99iMeJ|xO8jv)FNY&?stwOp=yBoPv`5#rU* zR-89Wo_L`nM;;UJ$3FrByK(q7k4qJ;d`YWd53kH@x$OITC(H4!F-c#%b56>A8_mOW zenP3200q`uQ}x?S=B_XxYk7tM9>ap&)I;haNcM^d#e`Y_`os{vg8|JDN8bWz&hIE!&kjEu@?6|t zFW)_QQL1)c5@EbTjz|9V9 zeB^$-+;ODzWPj`~Sw#)ind4nL7 ziqK`9_k+-7Bf+DD$cCIGWbafripFhCQHGSj8=yUl18wR7g+rFB1R%}!(-aggL)1Y8 zg@7d2UY?z@hffaCHO92wItscn5uCEQQJclPZaNM%+_m+S-XRY)2EleDO#Jf~HFeX7 z4Y&P9cJQ+okzQ!HKx7UA&0rA=iCG>4+19Tax-d!0#8JdMnG5rk2w;lp7PL*JV^N(zhF z1yxXL6;rtK<%DvXR~wEW*lx$#c&kwt?1NCn6JPT7L!XTTZS&(N^B?;Cgv>r}1BU+D zYeVm?7Y()jU5tj`j7=)*N_ z{C?xP4mm(MAspIBJk#1lpX$m zp$1wS&hPr(oK2NIA1C1qemxVCTXqfs7?#btl{=PW@EkgkEOHK#Eh&GcoaAHNPwk6z zri2-Tn#*34p{7GpFGc+S-Pxf3XKylRYnrlF4AHXos;M_u{IfOLIVg6nEFf4xUl|aI zi4k_sUrw#d23*W16+#WhD=nZE+Fo1lv`l;H#eqvsAo(BCu6Gy*FSl-@w2YsQggsAt z6|`GmxFj`2h5jTW7T9>8ehnoF;d)}+FRucYzO3U5e!~W1Go!SMPBZMZ1Nyx-deQ&b9SZ z4QOZ-eUnRF+N+_ixD%Z{(wGaP{bQDt6(FGk@Z4RPn>+w+CMOOWHJC3YgVs`^QBX^; z8MZFQW-MVTXuG*M680GCMVHOTFyUJ#aD8Oo+Gxl8xcXW~1>aqs(d=W$y6d-W?9(e_HE+ur^TIpj&JY~j7|AAV1sze5}GEl|bB7d`=2qT=e$qi2oJ686a)W2>zXHFK-K z8;h(s2ix9}I2#Tv>u@8m%8E9n8@Gn00c@;5b2{U&d*A}#jUL>3RJq1ES6rZo01uIwO$hNqWOewlgY z2(DJBXl7(jpAtkPv-L*V#LPny53A}{ze9|}4c#{tmjbSv9}wNYo|S#J?s@M1`$1T{ zrRXKDgH8p6D^xptF*5wUI<>+y-FyID9Bl9{|9eoc*}8Bu)>c`V`G){Ft4}^V2W#Mx zq9Ab7`v*>BfC+W>`{?7$3nKFEo7GYSjF|yrq^re=mej#*F+Oyf29kPI?0x-;TIWuwGeQEwOG?*XoS#bxZFbEw#7=9=vN>vqo>(|m|MZdlW@$2UVgdm*MLQ+<&>yk zDGd6v>(muB-T;P?rC0~nntbVCn!iJKO5Fse)rZKwTGWcm5#~aC6our9G(Ot>kd5|* zP0wufKeYzp&$2A63go_eHQ}IX8a!4kHsSxIhw(@7nwxdZX=_B|4n4 zi@0UzZ(G_K9&YYW#PF==V|+K4t>+f4vkR-$CqB0KX2hINPxUi@l6YGv~f>J{(}I*<$wXH^u-pQcX7A z=2|(8#j3;4a?e#rLsLUS%AhfBtpX#ZZ@IMY;2BHbDkbOOEi6%vY50bc_u*KDytSyb zw+gV?pe}a}T2(Kv8K>hxGnq0g5f{-Wq~n??tG@m>thkZ>z=|+_qLP->aPJtw`WNU= zGSj-BuX4RnJEpN=)IDo*)R0iZ6Kvf*dh0FM>eNkisU8=72uW{zI&p(XC4t|BlBBCz zm^5}}uQi`&yWz9zzURE7B#F%utrmI(a_;mq_hCf@E16&=COwv z7e4Y;AA4-({WzU2TCx6I)lj&OUYU9r;nb@%^tBwdRSbdPK$n0I$Q2B*lgR@hz>Hn$fw%^fu+LzGS+Z?6VinQ8PLXmhJeUkQ2pP?<) ziT3D*2nYBVyqc>xJ|zlxOIc^Eh|)5iv+))Vi}jt(J?>Bi;6zTy_K6wzp%MF3<7eg7 z>RE=9VCBNtIIV4dkn1xS7n&Cp7k=KLH|*{4r$q3l+GHB7b}9?OR6gwo89<0GP5V8< zZR?_`6-u}jh`|#nH$BNvwf1ejdd$8?^K#)vXy?5<4iJAd7s67bnVMwdgagNv_FOjdxFWMlTGm5-gpNh0)nr8Ki9I$R zyJrtYe*gzc7X#BOSa|FVEBW2!WWj`SSW;f93%B1KS&H0qO-`bQ=z)u8A6=1&d;L5Q z=TjJbdu&k~q=ab)=Z{Xl=0z;(IF)*_S2v4kUkCb{v8R5{pvw&hS2y&{OB29Vmr6uRzdomP)AD|bSV`kki%{OSc68^l?gcFK9#D#wFXYa$E_5U#x%C};7 zn}TX{bQyaj<6u|v(d*W4Cej2)gM@cm`lDU00==D`^|x*mcnUnlXBQ;Sp-VJaTaTUWpV`f-@93ipibHF z#t_H4DFWzzA==Lv9lo%;Kv(l6s1vZ8+V zn_jP2w-K#21ln2r6j&SgV2NUYfx7jL5uVZ7Nh{;yuH;$DU;yDGvQ5|9?i3z8G3U)f3c6`M9YWZq}Zyi3yp9E4l_Gy{xFRNlFeH6s$kg(nK;n?*Mm?swR~Ubn2r zg@yzspR zfkBz**Lc3u#~Uex5ekfJ_q096c}Km*I|m1$E`6S9+M;t8+=8v94&(}^MuE3Qx;YmW z{8D=|$$iwwEJ1b3YIYa?cTZJ&53nad_j#}^Fi#ML28tQ(PC|Il-=4Q)#8$9HJO+Nsr z`EEK3Z1^-qHDcnRGug%U`zPJ%2d#(X#N(8z2C5yJlk`m7@NUkNcnPZW*>qm+UpeBg z^8Jii>!!qV=sp}O^v7gLJzSo3T z4}2kIUtV_oh1Z0MlbNOL#s)dcQNhgv$Hj~rM}%#1U*y8antlgY&Aug+(Bm*~iFch|csUJehlEmKU3Pk+xnSL?)2$6NyJi+l2$Hx2q7GF>B`l zj+8R&5~BBV8G%j*JhHHa9a62B^P$nH!Y0Y>BSd=^9WTfi|m8 z2n#ehz>kX0F&(z@HUoaYaw2)~)p(!c`IJ!9Z8sL(Fg&fzzqlh)CNu>_ps}u#kJNPw z{i<>71gLj*bcxCX6`GT}SXUmk+WY?_C=Iz(VPFw0rq(Y8)JW6ZQ;XR#R&_TEk<6*u zLNv6*lM}Mt$|PjZ{+7386~C2kFlZ=tly?`}J(1-z9;AOB^u8G%@?|mLYi_#|P9%|g zk&7o`9BScgT*m{VqTEK$Vh^zJ`CGDnY9ZWM=G_Q&fCUtO=E~t_1r?V@16Dq!fX6VegFEG`m|6>bf zDMM$EJ=P@uKq4q7MYkrfPX}2_Fd0UB=|88t@7e_?8fu+Q$UJn${ccFbIO#{LI22R^ zBUMxb1^4znSAweW>Ju z@4M;g^Ub^nFxf)4mF>LT;DPHM#iTLH5R!P4C%2d%Ur&k`G3ja|1}>nKQDo$rJ$Or> zYGxuMyQ~tY>Y=r}vev@8oa#M)n{Gj{J&iOgJ+E6v>Ooqdr#NLV^X7t+4vLC4XR+{s(~DyxR?8%A$DYi5PgO4scvhXFuCCeVQV1EhCCSpF zoeC~m-5GCP)8E58uQyoSk`X95m+gCNnO8c&7StVft|UufYR2{An{-t;?f6}v*LDp&S$@K*5jj~D|>_6GrVw&2I2H0n2zfXBQt$M0N8LW(G)3f^}qS>(0 z?+mSLbl+=+7|t8>gjwz~SLv7M&8DVn+_cJCoim)0bCUcpo-bk#DyH_U)l9OF^yzMS zt8cC@oDy(RZ?sk&uI6%zj2dou4e4+s4tHKG+uVb` z+#;gOU-Yp;!XZWD8e&ym0gTPkM7eu$qmJbT3(tXaoSR44=;%mZ*7`7OWwg|83k{P_ zX($lE9m}|OkA|;g->+BpSAO^5PjfS_)>^8o|7V0Jhbh{Qc!j|VY%^2wHKzWGa z^@xd)Y2xaK3875PeEhczSrr&9d=-$1t@FbNQd=d(hHQou)o<$$Cefs%FZ9w(qv@_7 zUDkvs=cIjIpBF7m7RJuU^NpViCT{cC9~5_iaKN}78Igj-q^MD(cR)z@cjdMIB@w># zod(X=W~+&jm}s?Dv!=o<``;ZFo~&WyX2PxgfLm0-Q;^MwF1w9#$|Mm@aczc=bGvZR zCv7{KCP51>WXXOaVv#~oj_ZnYPa-+mS4*6Aq2?ktC9tfGVlK{msdnZmJsJ$E;OKOa zQ+0u>l|_5pHN6l&aE$B0_m{+@KmwYjv!h*@?+&BY@tLVnH7(3<;6=XKJBQL9?1(`c zRwGkCu8Lx)*Pmv2_pn{dgTtMD2Ly>%)Cp@7^XFAldo4yYeS~ME!d%Bxi-7XbFlS40 z?ChJH1>N~d7s>nwLmhHF7M9KvL-!r|BFD%)eSHBVGWn`K)=jT2)A)V|#MNM|-wF{R z{K8{8{vGKFFKbBGf#^|Ze?Udo0-_v;U%B7N$N+O!++G|XK&xR!M*P#qbEYYTjSJ^o z3@n`_-VUqjCX#U$M_1cpy7B}CnkeBe9fTac-#^0{T$IfzKbNYT-O>`-NWkOiD{79WA^3-AGcHUi zX*yetp}Dz-F}qn)xlg2F(aMjp=SaE@OD1w~N<5$5&~`(!_7$OsNx))V+{#j`hfu6h zu^Jh$`nstWqGqS+k}sI<}tDhg#;P+I0yR`=TX0Gsdy(oZ;Q(x(+Wdu#!_ZrysqHhcSQ^ zYoM9_fds)gnrQ=KpU%r3dK(!z;A-tA*D8Q6K=@R3b>3BwmPDi?(TTxjF^G32W-}mt zp6hmj;*w}{S39!0zG3i#V&Ba8aSbm8f)YVlpmhU|BIMoK67vXv2r4_`wACW20nO!P zv-y3u>R32E^K(|VeFy8NjnbQ3kgbF8_tnRd;>5?})7Jq~%>q&r5H)}9sp}4EQw+`Z z`Dz2?_RXb(KpJ`Z%&Mb-W7$=$&N?NCyJFw(Cy4w_K94A|K4)hEF{5jlbaocT&79~tcN|BTF+Dj7Pplts5%Ix0zBndW&i)sf z~<^PYB8dTY(6f3Cpx_j~dGSyBprR}^cs zE%wx@sj}HQ`c*Dg$d}uiXR(~qq>$1f( zS+r|)K~DSaO>@2%8s!x!?#^%>FGoY)l)PHn?cb&H{~H4Txb_5po;_$L>r3Ffgr}m! z-kM7!Z#?ac%WgLq9^LvpeVqoA#+RLD2w6`v{qU);%Fqdy(V3L~=RdY4{KBvX>Kz@! z&jZ&X5kI_VO?~s6oedOlz3VmX>)|AM^>f&M^R)ZiVR)BM+3R1Q<^M-0>wtBVV=$YR zH&KcUrzW??7SafxSF(XH+MWnz)a^$5R|pb>USjC`dPwdo=KtNA^4~CMRr*=#ZgBiv zt~}V)Ge42v#%B9vYb(d2?U=mnQ(pyWKl_goIN+F{NXC?-%vYU$Vm9_P>Nb)raLG zbGV5JgKsO`vY0wX24z$1STvYdBMsZ;Q=DA%An8{`wsxTb`1mRH7llw2qs*YPL>0MH zs=xm4=e{>ncl&0nV`q(t>e2rhJ6k_cd_QPjp-3cSg@X>mV`f4Ajp@QabfI7bv+Ip` z_Hto&IOsUbZ7}^kA0w@%bDOjb101>iA5UI`-@a|q!u@3E)Uh2!Q+Fu(z{!MP4nZ`@ z8*SUi0}ZyoCtmPpFGRK15Vx#)W6($Pnbk?HM8D2HoJ~_k_&yy!NuJ}p0`NUC6J{e( z{(KJw(3R2+D$qS&_0bksS2-w3k{xYBj%Fi@L?0+)4?f7Ptbl~$i@w&amFkRt@teuT zX|+b~w66qE3w6YmT=hcO&4_sZJ?TFX7$-?FdC)9_u9#V4e_M;d&%med{nX~N9Qn^m zkJ+&CBNzR5MFdyT`-F9^No+^VX~7{4voAH8pJ9j?eyspq$Qt1L)f21Hv4|y1&DF1J zztfx&a&zNPNfr691wyRa{$ad_(KZTsh}h2UYswz;^`Oa$>MS)y#dARgRG*y$HK#Lk zn4c0Gqe(rOh@7zt3f&7%BgP)%LG|xRZ0O`Aw=8mE1-26-fE5}g2|&O9{6^l=#Duvo zZ_jzbLw28`McSk!#dmy2lBgH95}9ELT!xtcZ57wfXN^AH;VG!7uiD2i)uED0 z+okZit0FMCvuY5Vp+1QS1m}CsG@1OqN{>dwQ^-E&;}bDXFb1AnP?j<11p95Qwl+Rl zf<~U)-<1zvb7|-$8n2rJ!5)z72@`!wKaTa>kw&}Q;7>wh5I>adT#uFu-n*Y_7;9SW zepJY{m^i6vFxcFU*zGvNbB}hypiba#8?>=Uw&g+ zBE#((u5TZn>^tE**Y_7p?044pyP2!!xRsih7vD`+qYLv9&aVR?bm9{F@{~Ma# zpZ#SuaklFz#YJkkvWH?lfr`YF|3|pc0UR1X^xXT-PegiYChoHQ>yZid=$DczzW?MV zb?4E^;JzQq;BgaW@CrF_eh#rpVIU8}kWSF=IWPa`Y0VFXPh`?t@Qqi(8T&6kYZB&v zEQtKDY8V!-PZS#7noV$iGz`Y6jwHmVL!8*e;xvmqu?C&cEZIyXVH(O%$jAN%(Cpt@ z%DNlydwOcThTHKS67Emayy2M+s)8R_=Pvhz=8SyaA}WzVV#l7Ma*ZAlcrPbQECSO_@uhN2=51Hp|AK?9n16hxnk=vAYi*iH-A@Mx&+2^rW z&FQ$+7K5<WAYmu%kdXFIu^NNNg2E25>GV#52w*iwVJI~I`jR) zMiaGuFiu!o6~2_xRCc=r->rWK`GUVe@uMpmy6x(h4${)|A&13mY_`@sdH}&4fgkKm z$s^?stqRcI!o`msJs|YE^A^}a|8xCdhLjt*U+AXfrSnRRO2EBXYsa0idipi84(n%k zQg+E`X12Jtp`4C=^yA)&4zey8LOz`xaEPKodt`;d3l#xHH>so|!v_)#A&>Qk?&+cs z4&g!t<@Tn?ebPtZUPMXT@p>SJDEdCFpHrtq<8>TL?P1kr&@ncNrw@D7%e!ic(x~0> zO8x~p>u)oH#2>4p{g1RRN%s0Ov@XP%>niQgkii1CqxK}8S3fQae^|mfjd2O5%UDJI z#wR@w@euckp`6+Fq(`c|NV8lqO;W|d#sen}UpqzlN^cV3`>KmVP?aM7_2Oa2Q_Q%Q z(IB`9uZ`UX19;$@a``4By$V$b)TJYQx>lApUb1riiG1ggkI2RH%255)PQPIJLY9~X z@ErhzZ-iFz$ac<4qqIGT$hGq5exWuMF#lq8U-YFx4cdSrK+Z_IsXo1(*6-R*jXWd9 z>-4cj%JJiru++@gCAqtekH8EmpI5V=nRF1UP0_3)i$tpsXi@m@j>no!U&;nh`9w`mQ#11@mUm$tp5t6@g#}JL zA?81s84p{J%WjukNGTCgC-UaC>$W4B${+iflWDdBm4%3N=~p6F>>^c?Xg3>xo+Blt ziM)Ev6r~BnK8Ds~gH~6=AG|+U^ZX{(5)Lj_g<4Itv^E`TW@UVf+hi$&|#+ zajqM>OI*qc+>;Y9jy3Te-gm^6B`1m0kNDzXG;jK@)c{Pk1A|;2`v>(B!cdwbCIXj*jZ>c5%Ss_Rt=gcz$Jw1C0wF@; ze5$j))U_F6&{*uF1G5TG$K4U;y)WeT$n{{?k1j@r zeius@u?tnm?=|ve(CrBHhk{j7KpE)y5gvV{y~rzFAmPr0-d%|F<@L=vt$-<|D2bY*D z*7-o|!FRTA4hf5JpiW}Xk2-2Ho#|W&44%jy>^t*vhnEcG*8Sz1j*(Cq97&tb#Bu`eXQWv|+R;66FJraoDB-GG_OBdZM=XzBu~E!-xIg zgjX6Z97TKUe+*2k`Li1tgxrL;r!@GK0mWBOPZTKLB!U|Z@5TF5V^rj5r z;5$nQ5-kFU#Gw&c#L65=csh>bxN1<9QN0#Z>s$*7{qVF~I^;O!oc>+hBh%~&zjxha z#=tlS8wl`tPUhSC(m(;dCA!L_D{v00SGhEtW47FIoIF7JrijDM(U76xH@+y14?qGe zj+o*1tm91RG5%wHHw!5Jf#D2C%=)681BKu+;-!Izp>IkP`JFR?bgw(Ll(b9N%8mcKW&OWO=w>zuaF*WEc(FW*F_)_%wGdxMBM?L<=>Y z`bwH+wh7A^+syES^&XdQyZA1~FNzgIsXKg4#0*m>F?TfZ$5N;xclzT?8#>|7vXL$$ z>MYlk*=C10oV*5=dTdwff}tY~)s>IUQgO(}j%InO`5Z#uC5i}?G| zy$IXzK-Yk`%)EQMv1Rmu#pbLsmBvv=!!uz(LoCj>kRSQBDYRXKCcf&Zm*&g3be#n| zegQ1V_WO~VI+sNY?dTZn(c)#7iS%d3Y8m}c?@8on!-0$r-S5oy zL|xksX#*Vt#;?pk;ILb2hifw_ss_`{lC5eb4S}}xK5c>94+lsq;OQHtcO2`Qt`Cs; zqPsqy>&=|}B2*!7?~QTX-rct)4Err@PY>0{So3l2>_2Ua;ZDqg#8<4z(JSjn#ge;b zz__4JYWvQ$$5ly91Jcolf;>T-9qR{+i9Vg_qGVMf$1+)(->qJ`;}wt0D*N)aasIr9 zQVs9IeKaM(xqfCOPs0R#RGYht?4Ra0`boZ7e@^nz=!-BSSjhb}%-UGLcq71QbNU=et6T}9yA`>LqYA0xq9G=+p;^MN+!sGe%c zH!!bpI1Z^TqM*;@G-zN{Ii?2jx=n z?d;~)IR*|(qDLW(*LsH%upNPuE~a<2TzN_JU&^=?de+T~w69WF66+Q@y}WfwAg^ceIOWHb_f`tHasE$?!+^nmDis!2Rn01s3zk$1@K z^InyjXb&|qsrR~wV1J~US^js$yD$KuO*jCP_?$S-#MX+UH(7MQps1na^_rMdSA1|f zM>ox2$@{SCGkichsoFOujT3a2<<9Tu4lU%k1^(Uazv{0>JP$IYFQnJ-YL1Kx^N-&$ zkr{*ltCQvg@i98*R1(_-9x^VVxnKN+wN$^d5|Kv5DBpE)RExSR^>FT0s@X+v8HSj@ zRAj!7#QuwHsN&6E4#8r@ciopxY3+ii2rpD@=xLgdkCa`F5D8m4>Lss{BJOBZnRkyCeuoWMdPwX%LS%_lhoAkQRBlm)fQ;-;FAw%?Y zv^hRSvMoGgArvEwhlDvc_(S2@8t23#lnOJzf5dO%*yGimfg&g_G{rZd2h_WsK&u$q z*Xu>(&Ubr^eh-|`*-jqbBs1R0#1U+L9>o1o#dG6XDlO)>^n^PFpi-E3Y;6`ZMC9#^ z(KM@TgBAM+LZ9e?MmuQ7HD{vCV#6zV4`6ppXbK}1=VXkRc3u7pT%i|MssSAKWle3W z9s6KqQoTqg&zrRTA2XH2kx-YK?xB(PvOc51#0?D~uqP28*dx4bt{b)D!krizbcp-F@GtH#A^U3~qZ2z9LYey7To5 zXUt8yjf9vZl$kh=?lbOTM%axqh)GWqg>_x4tqu53bC7U4>gHP9YYd5I`}MrYbb~lP zB~1F9o+RgYB4I~l{~Gr!<8YEaWVi~ZF2F&sdHBG2B-$#NrH&}W4TCDDR{bpg)j=-Z zBKP%>&jlsnS6cvsE(WaAB)9kPcJ`u9FOy0FGC3NF|%aQI*IB6 z5J&6IBuAXgOXVf^*luV1>aF*3=*XKY7To>vr!cY+Z z_H+~obzJObzs4Ae8u)Dc&>{O|v;F>>%<1Y05UUQxj&It4(%58OVUmUH-Ab5GqhDv>NuvfS zZ3wLd_@HI`%42Wt<#8m)PT^ZgU5>a2xjySATTHu>!K`}0zL$pU6c@71VRJS|V4_VXIv6w z&rXFtmHQPIa34^q)Lt!;KXdvyIQgH;za|a$SeD|+Xoj2m7d8hD^0@sKjxZL{9OoFv zdbfFrVZYlzBH zIM5P0)x(8v(LS!8qw*Ks0Q_GSSB>jynP= z(PGHJ_v)uyMUEra(z z!Wd1IZ!%nGlC}Pmk*45-M*<-C*~d3<^T1yyQ`z%x*Sh9#nsqHctL4;8`DMk2COY;6 zeSfYgNX|~nn?pf~U@8RE~&B6GYP&NY7F%AFI!G{1~+Q zA(xj0dZdyJ@r$M^d^~YyNiIET$k*p4)m&^yqSXFEp=_>bHX zq+V{(|7ZC{`j4Y`*e_Wl9YE_uUj{F~*k4~?Av1)h*RrC32=(wG_b7<#_ECznG2)w+ z03lAoxg-H3C|zg+%+RPf;~!4e-Iq;q{34+E8!Fi3C5wT`EBt*JK9*H%TMtXHx`b2> zr}|h=S$jLk+A^AeWyIrFl##Jy*v)1zSdsBoGBOl7L2Fgi`%TcKRvJ}Sb@ixZh>Kpf z9rWCBRPb`{RFR@CZ#sswg?)olx^(bmbOP-63I(RLM=qg{ zUbyJq_^io7=SWS||CI5BeYNjFBJrVbifbJx;Xc=i*Iq=zu!Y)p;tY&tzSKv!P)%vv zJ=ynJtmJTVl#=k+QZB}>#@B5+2%_U&Jm8RP=$3<6N1xZ=<{yU8+k7Ek5_&Z|m|?O1 z?!ZSctXB9(#oUo<4&7!(2$gBbL4E^AW0K)}bG(HNAHIcp0EXRS!^ek^W0_jjHQ8no zL$p1YzRPzn6-k-bn+TdMTbVakU6)%MlAof=D7bY30KU1nE2|HNpN^&ZsatXU+N#U} zr*Ampg}I~edQ8{>Wz5hHf_VFADdKoF==vZ(^|93tmNznL}o z?SU1XpbCppn=h76r+N=vHH_>Ef<1KHSK}rE=&KeoEDwXlGKDI3vV$2f>1UE+d7Ap0 zk*=2c@!DEmPRs_2Yn8_ew?1Bk`PTl)abUMO(h>bW&NK3e!qn4Ls+D=X z`aIs(6PZ^l@@VNd_(E-VOfP7>ZO&TiUlA*uTkr}7iknn~j5Vi2f9xue=L99MokMW4 zuWpROTr8b`3DTgF!D|)A@)!Qs75Rw_CGVG=E_>DDCQfMEs&wtX>}jt%fT~Vib~bet z-YQK)yppkauIq6Ndw`v7MO3jqTy%Z!zE#cE@rPmf72UDKd}NiQa-^bWan-o0xaiO) zyNr;E4G4P}ZRYO@?cn(_o4)_UCjOt$P}@A|Iz4Q>d#bsjb5zCId3usEZ8|3Y5Ok^g z@i)iwQ>3h~FDR*UKEFisRrx?|uZ%@u$ZV~RL-`Ud zJ_by3Y#DHyOi+6IQOTi;n6U}YRXvF>iK@eBUD^(giL-Wi(S?c5 zWrs%6ol2w6N$s(6v^WQ~SYH1%&nM%hKqWLRe=t37O=C;{W8hc$&j`S`t;BvWDx`C8 z?7c1ww5SbyLM^d>91W~4Q%Ngu!rbDhM)!ku$jE{LQ4{FEJvdioEARYb-3*I1Qn-^) z@P?3W(v+N-SInV(X{qeo@c35syYwxi`EV1qU73tObzuE! zf-he$(b|I)y~!69i#W#&V>T)HBs(PXkql5_8SR4phRnP6P^NKd1`w5!<;aO3tyvdUM`L_yz$Sc4wjZK`N>7S`?Fv4D}60FB@m^Z1J6D zx>EDA$?d$H^iiE0--2kPXL7RB5^HMJazLyWqn`c3K5_9zei|x<4;D1$OWQa>=k;x0 zoT4j{{G{SI*2Cs^$*@Q-E%T7pC3bL!%n!X~a=ykFl`XDTjU6^V&BCr0IX>K<6kT^} zAF1B{1nm^`SI6y#LnKA&_V`e_k6gpW?nL!Nr*=Mm&6mY?_%_SW>^LOmKrkFPx=+8k zP=`)gx0~kSbS3tL^d?eqykxhO-&Ecp8#Ugac6IgD*#oa=N&xM#_<+sj71EKnYfAf- zULz`fP26p3>&wbEhf%!oTa@lWwpqoVyiUbzhvUV7-kzk9(AiKYUFW^8^V1$8VdKlz zA$qoCmXGd{MjAH5Ap62)ib83N^y4{EjhCtx@jzH2u6Cm-H8Qlm>*o_O>lGn0@9KXs zw|vh>p%8O*Tvg?h5@f?Hv^QdZaHemt^b*`|3!a||HLI;1I0$_MOaZ4;eIIXOm8W`K z@?WfU^dF)z4Y)L7x$8r=1zJ%LaP`{Zu8V*x#%Btv;9qQ$@<%cpk;!Q`;7zhB>1zW@QJ7!o+pR7=3zJ zY{P|AQjIn6cp+f3cAc=no#LQ+Jo`WECx_-9hdCtu3JYa(*tZ&it zWqa|Y?ls&jv~G8y|YFszRx{;ET#V%Y1PQ4Tg)xP z+Pqdl+?-n#>fs~ehmWF}B9~7%3qbu&D`2N0Jw{B~n%tWf0Gux~@VH{F`Mi7h%TC#f zIDHz;1MN6Sk~jQ^$55a4JtX*t!0+uX&=G2#pCx;3u?uS2iqY33zGTeO0yNsUtt2X7 zlS#oOlY=^gU(!5>22f3b0WOxI&^5rxtZV$DfzwLS!zy9bDrU7>pqA( zAPv3EbhR5rxj}|&6onOY*IW}TGbw3p3+Kxhwzi7iOZ+cvaD(kdsfkI9wH530E-XY zOYgQDSjEb>!C1KkGo|w1XnY0v)-(y*NJ-|!$t>6{_2Ar65mN28(y&8q%V)0-|CL9j z26hut^Y)Kph!Ltq!W!e@VtltAcHvlyZaU%7jAW)M9_NwqOGisIv=9F@FRVxnDkN?F zSyCc|7jqz8m#Q%%28qq%Wp^w)2Q*_|`&bjxV7_IQ;}y2QR{_+vHhLk?wU%c6yw2(F zVXv{Pty0nIFnoc&8`m1K_v!<4wi?Ti(dHFBY1d_*unO)Tg!5E3usAZi?xQvz)ZhJ* zwI+UJvFiV@_mx3$ZdtntfnW&)cSwN7CAho0duW2YJBv@*#wLyDUvhLl~u;%rt!8FtHb zTgUyVQ|Pra$QNrhSkbs|7}rHxlTb!wXb{`$wCnPKC?=qp)7*&GhD5aV(_f6+FXCS= zn(-rOJS96zk$8d&;G$o^_r1SWrMNMkXED_3tTj?T(x1&s;T3Ct@666Ft#XiYM>}jM zAXLG}w^}iq{*8UVFh&u@rmKq**s};3X^TIEmD-&v#}Qw3;#&}<6QcwPM=N5B7O6+sL!eUR*1y)e00A7vvfubvzx&6}VFpIkI`jOeV{KWHXKub)TB{ z9~*@@&7@?*r9>@P-Mcy!VyPbMQ@?z-+@;RvloYZLB7MS&RU{0b^^CPd>M;$GwY(jg z+aB`TG_;-cdgA(w-)0>Vn9yScCt|BT}h&F8x*}*TBS66>aOw3CWfOFD;`LOr&x`;_~ga$NyhvT zy;DCdq^c$VT_s?+%a{PZ=44!?<|GCk%|O;=XK=!lB@@kedipgMmg`kLhvC*5!|0$B zHM1?a6qg13iP>S7BqQEycnLfZSQctr%g``{{)O>-;)?)Jo)vGioRAL_H#v1=kT{Oh zsRyjNeRW<@s)DT`);Y&lI`S35#sbvbO#&P8R7tuGayz z5`1%y*TTG4HAMO*fYyx#)rg0Ah6rSOP!VIddHy_Pz4Y7FQi+{YigPcqMSZbMUFY6_0~S8$1e|X^4np%G(x&H1k?2qf^Ze}Y-{}%Nx zT!|0mN^vrk`cWvNX+LGLy`+R9+8PSO!7n1Npu-1VOzdK<19DcIc}%H_kO*`QEkz^Q zdy470-0n1EvyH%hWv$UVinUS5DN!Ol zn~8nfsproXBgZ1*qBF1{j{1d_T~SkNU&p^g(>8qQkf0M5Z9))#a?x`mWj3ze?w?Op zSW~lQOeDUJ$QC3?5R0FqR7*wrDZp23;0Z|DW)b?vLHJ>|>~JC7RdI-q4^hIwQL%D6 z8L;a(RN%!~(ua6g$-1eZ5m789HEo<0dfZF1a}hUjJltKRB*R*?L|VsLsy29^g@5th zS{~gZzscv;V0bAQu2G>E67*wHmH6I`{Fev`nr;T=Ujj_8f7>K4RK*ye>r5Jzg%>IW zwMh(an}!`q!x5TlhhF^pD7NcN8_JI>HJ&*}L+Ov$c&ItAi9RWj zv|EXNe<@>)XM-+YBHQ>mb+n(xot<$p`I^+1SGknf8D)Yn^90x%Q5`lRx`x$|{GL%@ z4JM@O7TRn-1&e3)XUhcBt*M%&ROg&JJYO#p9kCB%B8^gwa1Mkb&iG?1FjNDViXA@8MDwb|=`DM{2bFGIFs&INhR9ne^g+EZ!E}LxjHN@{op^60R~VZ=@ML1G)eK zCor)8d>PWed|qRLNt?Y2?xu3P>sF{^ehPaoqw5_;iX<{s!SCO2kWh^3?z3R@ko}f4iZm z8Y-vf!I@^pg3#4^6GArs=3fJpf1c>@4fycGUDxM(ieHkzgDCAQVW{c~5)PXDIq<)n zdmw)U-Lm)H)0nLf>y+Af?_daVUu{UT`~Ryh{yDSv`oGnZq@rML8&zO@=*zpcAGq%% zYJP8r|LG6v*IYC|*|EVf`rL-9zYc(kJ~EhJ?|i8X`QO0nz3*oXT!&Lv@mCy9-duBUuiOuy)UxEux~c~Y z{Q%ahOH5F<&Hj5Yfd3h4zx;kY7gh1^1fc0R+fSs2BpE*UzmUL7bOd%^nZ_tq`~if@ zk|R?jpxUB99m8knZ@~>`TcP|fthR>u`&Hrp0A1)e{v!`&L22Mmic&m9`i~SP2qn!w z*&}NW>p!yxl+XU;TER}*|ID>es`)1x^wapyGzcZ;e_|-O%Kywzzd7?yK1TxnXFmUJ zVf;z8n5{7Xk!pVnB!uE26b%0>kiS`@|J9KH1?0Nk{r>=2Su}`1M(;cP zm3hdAcHRfJFnMBB1pU9+<$hA{i?yB87T2sUtsS{+_-~p&v&WYIC69{!-j_!Di=PIT za|DgP zZVwwPIIiI?-V!@nj4^PHHCsdyo&F<~B<$4iNs4Oxpm&8OOlH*b-+B(lqR`Rf`O@te zP`M?re8yIQ;r1QEZUx@1THb7FmV zrQ&uclp3tT#7$y)BUx#{BKf`=LGcvbC-VVKtR?CLRGei^$0woFFXy!XjeA!F3xzhC zmmbt5(1awC*|0E7K0tBaZ+7fSpTR331hg7IV>&DW4XxBef0X%A-~M*%ezqjh-|X$b z(F548FQDYenecZXG3y+zQs}i5Sr_el+278kZx88bM7~glB^^S3ymJo&=NAqCZX?k6 zzJRNg{sIi6p>|7v!LR))XpX4byjCCJr(y2Z?+F#Cn!n)qC~x;yQJiCX6wKBV@s2#K zzsk9j;c{c@=tppLHq~02bEiL7YX!52VrG@Z8z}V7dqeb;M#u}EHt{O_mKM6 zgQA&KD3Y$fm-z$Kq+wpRfv!jp2YL-j)%agWK&}`A6`C6P(ZsA+XX)kPw>Vg7*{lh; z%8q0|jjJtgKtnp-Z}&Hg=3P}l#vilET!Dt$X)a|J{_69-v-*RE^#0-J|HCd&G>4_j z4)#lG%KPwtGa>C^gvu{Y0I2GtCqgARd4(OU5X{^~tKad)it{h>6%0hnT2s?6QYa=x z0u}~DCrf1+!P4lR-Y4kWU0r{gs4~hhw15h^L>Ribxa(NZo(R`tB^&q)|1J>W3;4rU z`?s?B;+GV&?j**i>BrdkF zpOfr_&IkG5&8PSGeC*2<@+qM65sr43$_jvmirJ$Vz11S=rvt@r)%GnB$}*c}>i(K{ z9m*G_MaYh_6C0$KYdV4S7iIE&kMSG+Pn7>|Q~$$a^_3=r?t9Eu*>z~}F5g*9UG^{W zjIV-!X!rb|+RfnB_E+&laxnehiAVQTTNHZanD}p!`MvG`CK;j5(vmBs1(ClD>oW-4!~0!}%bkSARjD#>*;t;Ntd+rbf5(oX9u8&zksP|PYhsV4rb40`_9SM7y= z*ZCr+&Vy&QjJ1S`jn_*v_vm?3*1jgw7P!Rp57qC2!)vN;P7B>!K(A-ud&ahBBCs*4 zD!LgJCrj4)`P|&DmNhNBo_6$x{}iPf!qnh`-^B$_=o<@x#LQg9GZXoEL10 z9a>(H_RqS%K+_b8UxU*QNwYkhS*^4=ymg|D^taEu2g))6ykCt9cAuj9-G+yxGovm( zJHN9;RTGIW)qbC!f0s({(mo^;(?N+*Sve|Eu3pcyJA|EJDoJfIDiH7Qw<$Ox?yOX5 za-|&q?hBoSLyY5Po?uaAP-Ka>5AusMgVZWhFi7)pPpIz51{_tX;IxAS#KLG8**?=Hej=nlGV)t(~2Ij@m`#zHa1Uj@_t zQ0shs^DCrus$+k+W$|oKx+Vs<*qryN^PAQC=Z(DNFN1G<3k&(G-5pR1JUMQ>xU4kkLPz6&3bYNWEth6RIXFVUvKp;MpQt~U6pN$}h zN=a(6j6&8LxOdmXt6AyCi5@U;oMc1PeyY@huwL?aV0+wHMZ`A%I{+7_`+0o0&ONN}$?EBY()Icu-d&v78+kXA5OnZ_ zYMV}porL4#WN!WxD*F@{{g5|lFSR;jMuqmP@RDXOdi};|)0sL???znnK9w2$ApT=B zGb+t3--cQtKK?8p(OaD<@szEu)QmFw@n6tbRzZ%L21|R8Wx5@b>&8e-n~DHh9PFXbyYPM z#rN%%bj)Rq22dI*(Ok(dkA%|WAQK5vTrSnzNl6w+Hlb_NoITD^Kqt z`jVEEHplU0+;30W?k)pwJfD6hRA4@6D*Yxm%)N=}+{OaMaBcd86iu=(K1MKyZIVezLmx(zSe^fw=)O`zHmBot=+}G-j&$C1 zjL)?;w*L0%*7|gg7dLYaC*&55CH~<&EpX+$Mh`Zq$L};gL^Ui}NmHnbTgM;(T|?Fq zJ|MmhUAvOTLj*RYD_OELPL&328)}DVnO%g%@9Fgdc`P3`iITwq#aJF1paL@gzNP{aHtsf&bJw8?!a- zm<*LN8urfUH)9Ov6Dg>jD)X$-6Rf`S(KL3+LUUZ{d~D-Q{5bUVjt{gF=`UUbt`^O> zKmJkJPJ5&S&1X&g>{qKM3f-oUa8uamRl3YLVS*%h8igvddphalvC;JDjY zHR@qe7#o=DTg0@KH8@d{i{*Oti5;YlXpg$dLPMjnRRy(`8{&$!;x#CVZ=+`ZgLI*G z_Vjk|%h!WQ$y7EDI5V!tn-Q3t)$oj-To?-8jVy=b>o~H#e5Otgd9(w1{>PRD!IP)& zMY9I^x~A~f6B_m_dX~FO^G|O>XE(wIbbIBd-1?d-|QCQ0k0vk%cSH9&AuoL&SsnOTXDNCiF9x z7~)OXo@LsS$anBvxe-4Cfr_1A&SuTMbnwZkaI5Pol5hImFuA_A7la8RRm1 zuY?#3kbWaT+`H19xze0-g4^M z91LUCXvr2m`dxQOk_r5k1>Rw`S9h_EBAzFdo@4DiXeUc9QV}zCO2>^985Dio)1s%) z&q8edR%6!aMM3&9{PIDgm>2EPQ9tPxwu!#ZD5cx_MW7~$JR!HDp9W53@!`7j;!w+% z+LNxfaz@eAl*sIc8R~cklpUYId_8{6*0;y!x|zY5Ak^0!EkEcuddd}ZE>O07!ml8! zy>*p*Ij8$?f3OhUhxiQ7`YtsFWPUv4#Cp|ZJ0jnxiKVYGiqG4$S3Kad?~=xZKbAKC zipXM+JfaSnviC#`a@N@x_wlGIq-2>*N$tp-g15CbAIF>sRUtX7^QzoAujX_pNTRCz z=*aE+5x`7Og(6rrRe`v-;*iZv@_@E`EHglVggfxj<|EfRUV(L*mZwh74oe(-7HAo} z2yG)YPK;CYTjdQ$ z0D;R`Wo5Bh8C8~f9)yyE=y%4pk9P^vj496HDGhIH4#~Dqt`s&p9_VWy#xPkV2zlHIdjzhsu=ww+K8SF=j!PLi zShP-p*p9v|ZfTxcR^8{PX14`nrEb3_E3;t7T1_Kau;U(6N!{hzK zWlv*hw%$n4%+(u%wX6_V9MrprV|XXHkKNE$T5#K_M0CLeq9T|bW`~HFYQ>)Eb;Rgp zbPdOK?3ZS48{Fv6m&zs6ZA!H`;!9Mo@=?6F1EGAbJ*1tj+TJ_oIb<~lISvw(FISHx zZFwWkK32U4F}fVzj^gGL)JX{i*XFD)A~|dWb>4$0)G8ToXirWO62v=JgT|&~Ky|ZK z)N;do-;pz)6%5)VLH}BFzkaL z7i)FIh0-&V0~uY0fFNaC`uWNVwbW*_JuaXHT$(b;IK+c(k0f%urnrxX2YIn^n@pla zWi!h38Cv|zTyV7Fjhh^-MnJA0vFC^%;MvsZb5@Q&w>6!G&G6NjFI}cBC8@0AE_kc` zx_<7x9>AHz#?2(13Uu29@u5Brby!__Og3&g#F_g}u}}X%6`BhLw#bcEbG23LBE>%*ut$1;E4Dxf!kqe5EWyi zCnK;z(?PQ3D(QVSIeurL71g8-eH>r@!gU(;yoKhnMKrR*l!Mi0b8j#HWaIV`^Sau* zV-~E`UwGb9+v6qv`~>wk|hE!Ey*SB8l8onC`pf5`*cmogDvM$?=W9mdCfi?Vy}5)f>xsI zD7%ggOUKiJ6(fOZX-E?!L-vRR#Cjy~vxsQ#U=M%X(zN!ju<9!Ng#% zp;{6=RnY{;82SxzvrhhLnsb4fAyBgeqO{bYf+Jjao70Bzg4anXxCme=4N(H|pME9$OGc#?@J`n(qGGH$`|> z=?L(fts@R+%8D{hiofY5cCt0NAqf_vblGT7yZL+z2)9?Wri~^GlzlqtRC#Vf9@~{+ z42)fxv8S-Hq&6s{4S!;~*|k_|wkePaI10^gI}xhcMqaOnR$`af%~z@1FMKYoWW$Th zPk%k{ekna=zmCo8ScPcA(`|BaTcNqP&2wWhOAte9IVRy1y7q9Aj=GjnNw3dkF~r>R zM-*c!k3&)2562Fn7INMD@blZnZB0F{TjK3g50Llgm4>Fl)C}gu3J9Jr6^~?C01i~W z+qt?qzKs@*`hvc)yK9GXbAzRhpFHO%#D=U&%E9%!DOzGg*e3>aoj!y&$<*8R@m|QQ zP4iDtmeU@@G&&Q#i2&k;ybyXHmwD2_EgRja)#qdWvYGw`#q|*nxgR-r9J%G!z4UYU zq|Ga=GRjU}hj-moc86Z#I)v9wkMxNnME+Y;7=clB*{Fbq&A+*sF_0U^%C6n0^%%vT z{jP5<_Y*7!tv5uIoSt5X)3 z>%kFIMDJx6I$UM5f^zvZ%-i9)e;4$%h<|>G{)4n=yi^TieE*|ne&ZsRjFnRHQ4&45 z*44M!!;5iPip7l4nwte}%M&)aQ+xnFJ^(fNiv8N9_CQr4j=_@ZDg0VzELq(v%sM(@ zhf{rYF7&Boc+6Zx+h8JrVO%+JJdKwOr&h%AKwbWdzqoPE*)UHRa1h>m_a2qjpgPT6J%-xB{V{w9$6WBaB${sEubx5AbV9PQ_^@3jEsCr3fjcO(23 z+bSn}v+P3qY`r%cd~NohCLsAnNUNwSK^rkIo7_JCI=8DH@lN%fHslEu;6X*IuChht z)n8f++Bl+*(|_KtRVMDrO}{sYxyNl^#WooKN$jS&ETC+pH`ZI>#TRZa((@>`h+4KI zBJ#s7K86MH2s~_&KLq{M*E_y1+!jjRiMU)}CDXPU6JdKAi#hYr*t$hUGo6jH z4Id@;Yt|+$uk$iee{LSQTceuzWGGkSX*wyv+Bf9Bn)PfN^GIoEB-8Xfzslz2sq0}a zH|#?ukTYg7;0@|-ChTv8NBJz=<5hw`GxV6_C^IV%k9yR9$Tc5jCZDRyR+i>~%`lT} z_SnIC)03uPyyPX{-sl4OrF4-p=XPYid;h8Z>`pa_HG=SB#G-3w`>E+XmSh~{zV(A` zx2t&odc~l~d({rD_6tJ)$S1N664Kk|MRTti&V?HSyi^V_pQmqgaMA{U${k<9*l^^o zWr=kE2|>9HnDCL0SKMN6f+23(Xcot8v$cuv0#2`g!4(>I8YxjcCNXz(U|POUFz-87s7d;bCu0x1o?Mr%w!n?7x;Q1ZTk?B)lvHm+1@Cm44L?Rb3^d~s zE!iR13J;^)tSgVhi95C@c8Z zwjp=<7SRr_B{BCP3vx+lM&XXK@7@#SY~T-|+>253=nL`evQ0m&J>N55?k}_m7C9YS z_CrgGoDlt9usc!D#cr)yo0P3F3!g7HYu>PJFn9}e`<##-Wrudy=*!uMN#-_`_5p#Wko|6B#m^^(q z*!CPJd%N_(Pa}vWLG;}qqJ$Uo-Nl^y?$SW|&pY>cw9~9dFW`22i3I^QVri1 zi?@}GMKPGqIly^SFo?F*K&is)%z&24pA^Zjb!Q|Ky)5XgYHDjGyb z@hTgf?X1Ec2{?TYh|x4J>Uv^=9~m%P`sQ5{S{WCt)-=^8=v}6T66e%qbP8xE@U_q{ zMjLQv#xeaG!8g^V2aq`}X6qSd7aKizOUeanhEN4%2&>TjXVAk$SR^mq$fL3};wmbc zOvL@qkOZe4fs^hZkl|H?=+<^@kupeC!dmL~?V#WVu^F;*pyqihvD@cda)(v_w&PlL ztr^geZMDZ4?l)D@DSVoo#OB9KpEm@BH;p6@L(5-2Bt`Z%DDpQJ)gLpGeZ1F7Q0l3Q zBn=Yd&C^%I^|--*ChN18#5CuTz%+NiK}$-Ail!B->(KOOE>3z%XB+D|n_cEdgS&pz zT3l6Zr;Rh7S9r|8WdgSJ1FauiHPLuG3`?_Dw ze6v+ISVc}tXzsXqZ03j$=GubSyLHB_R9FmO2X#^MqR4j-!7pwaG#2f~D3eHBG6|$) zl09s+M@zF_+Jy#r>y}z7=1!h^G*BFCci?Ogk>%bk6m_@u*N01-HY*(0lm6I0QWbMs z7FhCD9A9WLZQu~CjD7ZYxF0%?B-IE#|2srTj+`M*bgK76l6_7GJ@0( zu<07X6kk7#b8D;|M6|}_ud|ap$ezC%jARtO@rbOC&3_BZ>{+wU-ZKJF>{(xBb39nY2{MC$M@|D@HoX>5h$QY>;H-8J@=yvr&Uu^I#M}h11)& zWSC4PX%S&kgZT|o@12C;uNN55Y33!72pLEKys%U)4NsX~0t}b}+}v-c$nx!J%7Yu% z)D+_byC(N@+Jn>8wP@rj@6Y*I5I}pQ>~YoY-GFO>(?}Sk>Wfo(!iVbDu?LjU z+lREbOI(A;^2b_fyeLxE)ZWWlsF!~5(!C#?nf^vfx?&d%06T2AIB>3be zw=Hmn>%aaCdO|JQn?{vC#=OQLg*9R^oqytcx?;-xfrNGpp1EYEAK6e$^h;v4)Ai7c z02(2w^GUqv^%V;j!B95bLOfjM&iDa*w9|{wF(?QW6`=ZAY<|SV7tkO9` zI@(2MIm)D%k_A&dfNm3Gd|FTog8Q35a{}GAs%~NgxaTmLYrQaa8RkdxJo;m8H*0;S zsNo2~Boh5}tYt9D=au2IQ;f4*qgam$HmReO(>=O%&&t*Dg-^s(koU|_53i09LMhHf z-zV3T^#9s%4ZO~eMNBR}8rNE0C=I30PHIUv_rfQ2R+KTk`{AioVbyg=Hh_@tWo*Fn z21!o!AkO$YAe#brfP8C$M<+|KKS=!8^UA4SbZ~^7e=yeBaa4?_5@K4!uqkJlqwsmp zxzveqs0|ot$kplALt$#tPnlY%5~8d`ju+S7Pb7UC zhuGsq+QY_6?WZ+|gU^%fwwO4BUM`=eNV_T4YLVg1?ZCen?Na$oVaFEJmjOWjbI;1E zV^?hMYFGA3N{P0S`hq#@&bXxWpUNBX-nl@!hGnSLd zJYrG84nC!c82QKpXq>-OijbD1qaIo#3;ZMLWkYegY&SUF}Cz5AACS>ytzAt-2 z{O*yl_Y)Es`gGkz$0B>^0E_ve3Q%<1e%J+;wNQOhKUopyK~ z90W;e5Rq-q_Z?b5M4_^zfcl%dcr%9SCM?>>juoG@0&W3w!YiiLqaC-C+(jHI)k%qh zj){ERypr~8pagk|eV^}XH_^2w-K;@mOiHA`tq%9>xTaZc`lXOU>*?4s%QZF!@L zH=IH9lE6fSpV*tYqUO8Y#cz)v90izvnHUQ1HmX@<{yJa#avtN6uyqr6;jO3M!Lxb5 z;?hF2YHu!7?CpwYy+b1KM~cN(>F;$7zf&wtq;QO9^CT^dU8k?*&&azH1j0gMI!G34 zp8er!P-pK$U|&@|lvG!Q^kV~NsP^6tdjp6vK;hTY>^1P%@ zS#vtvlKWADCjgk2(s|lX}|k^++jN(D)wW3?VTOc*^G~E&z4s3Vc|+!6D(WN2JsrK-~`iO$>`)vhJ7QYInNqq3*7ejDVEt=TM*&@;K7 z`$dwFHJj3uCEe#E!lnB_>W?1t#Tmq)1LQr>YRbl^fR6k(=@Rzfkl!(^CuH`Xi|avYshA8}gL{$K08Yt;1+7 zH%o}B>QCq3f(_fl+^!k?48{R`!NO zY3uRdehB*H-bfuJ7HXoM&JVLGe6YX#j5_F3vBikmDU&)S0%p>D9JNhfeO2u=Tl~caj)P^Fmi~0*>(pXd0&1sjg*l zTHxfoJ7Ag?a=xQ_^z&%3){~|p<1%H+i7V0iF8mOtsLjDue_r~k$5YO!X+>J?7#FR$`6cnv;!zi(Vj^I|y<*n3Le>ZJWGvhmOI6ho0sWG9IFUItxJ4dXWr zFx;7XsFo^YcJUE$@J#Lxs4+03*(Xf5ou3@>0Lfp6Vi8BQqtfn;pdH2@>UfpPDh@FC zj|(?-XVc@jqPocVMN=bc(HCZ?mAxa4sXKnn05+F_*8^NkyL@-5Ay|_xhNr}XGzxAc zaa7EY2_2iD2g}mKHgZUeu480~#l2XiVPxovNt=zUYBY_w&%!*&U|21g_msu!;iaSS zum|&U0=aV1$E=(4QA+9MzVSgAqMTftYvM>a+^@Kljlz-Vzr@ObkE zt;Hz0ccX_v%`)EGqT!HpW@Hfn)=Rfny_jA1i_PxvMN!@bv27ZnjNf)LGp8U3Rl^J;NQDRL0P8 z6R!uH^@Zw=Or=}5#)}ze6dJS2?E>X*xT@<5_hE&9MHAl5yd>1xfq|Jsi3}X zC%dz!UE#oGca=7NEu#oo9C!my7-P0C<-%Hj!)cED>lWiTb^{ZMrUMTZi#_;xN;u&! zwm87B;U}`|rSyO@QCzRR*=q+m9q;+RN< za4(O*bGy;78)!dJfwz434KeUu!Z^8e??y{>IY`Tm^kio{%7|iuSNLoDZQIhBApTD} z3=diU=E=uokDcgXo09_T|o_UbRIE|#HF-WcfMBW?BGfcDCH$g{2&-GU~AXtZB{eJm#Q^Ot7FCb&nb3VhL) zac_906#X&<7od=Qq#KedFbfNy{vftr1zdC#5f0>DxA9L~Xa~UrJt@eL?F6}^bxP`? zoZh-D-9P13Y37B6J;efYraEZ1eK%g8HvlVF2jb~qhUxMGGO4h9cUa_3!>Y!e`Bcv) zp&cjn%*O2+Dr3*AY&9^`=LA#_H74usX&;$2Fk?rx6>zsX<$9=em%w}v_zXIYiZaVm z3-=*2>Yq=Z?shx0ZjwT>H1?E09w4!@qulWNd$w}?B`V=*N$Wd7shi@De0dWgYk=(o zG++~#!H&nWd~Mk=b?xb4vdgNH>8uYILgO)WW$-hN(19@hf)c~rL;kaq3sO|j=Mo=y zVYp=%rPqBN^%((2H4K_#1wS!6>;J(2V1H_3U_8X%n|vFJ^StFvKI>#qFnHp@uHs^ z1Y%RMCV^TemH~7!O;Tm%>;~<;QmZ4~6>T+R;L0jV-5AQJ*})8k-gqhn!jpcxx?0LiRsz65 z9(Z5B>x)SYLD#`Hj%yi9XiZ*r(#Op@uBdGc;|{4CH&C z~`6E^8je~G);?R!+WFUx;ZUFB?>crXlLu+B@(v0snzBUn^}aVlhMXxqVpU?i7df* zQFcolZE%cjZ#$CUKZ_rr{s?$eo!i8%KfdtBDdq*<0N)wv`MVKjE<(=7Ot)4a0H;c z_xVdS#?DFN)pRA%p*M!}^$_wL=EWx1w5N1u(LG#uRvl;ineP-nb=9gQIOX6KXm{v6 z{a}8Iy&K||H_OH>ck(b zdL)lH!rNC9jT=3pDQNc13Hz1$D3`|oP0F%P%r3dy>Ae=CT*x_t=@<0Yg@r3DLft?c57%BG)z*q{&bYLg|0S}If27Q)I0nPq}l9)H0*hZEM!A;IX?~Lr%nP6 z2nw^j()(&!_4v$l=>TpX7*6J~5%+~!ccN+Y4*o>z+YRI6-n7wl)AoC-LQ|Uvw*VO3%KmY_ zr4v?SY0dq-E3Uy>m4{MN@h(OHVTM`6vD_hl;goo)IU^l5al;y5mLH7(B~S)|p&t{nYhSMV{WZtEsB?c%!Bdrwrdm(~hm|J(IUQADWuIO1__jrSDHg zNBN}NVk$o#hT-)ezw4BFBpCz0YY{yy2}y#2+HMU_t13xdCQ=8X%*+!U(Ns^u_0aM; zDFZQ=%qS+yhx~AXnZLmrMgPVphcSyV?XbP{#+B~ok<_#43jet{XTj@J2k%ZNp+W4{ z!$Gc6x=p^QYp#i4{`eIX4f1){8cLZC$R{;cv9S-c`kznqvLqVi%?cpO8F?fhAVpN2 zv{&=yNymwDieqD5ab}Q1iy1Imc=<+djUof3}EvH3KXA zR9J)d;7Do~1hf!%(=U@Tv*Ovqohz4Ph9Ru3rA~99TdRWcD$Sf!AaD`d>GVN?%5vMx zWiL*0oFbF?8KB_Pk!=!Hmvp$vMxFoK!?iM;ST_Yv2Gv3%m!z*u2Lt|6IcIA!jx#!! zJ|9OKu>BUh(=w1n<#zH@av333SX#&^PvUdkK@H>FLTKbT%_si@!F$!287r-*T1f`Z zNBj{+7ep7nW`Qe2iIYL&+JF&srU{Wy!I_!}m(C0cXo#S8zF}|U;@3|8=#_GzhtwiZ zszg*rDi+e!M9*66jaa#NZQak+ee}>|LNxn#^2$~|wMjK^7+rBk%}C}T9Oh3`Lvkc5 z%8A^nGI-5;DI@j~$ocJh()IeCv>Da2)&=)v%fVO1ZyV`jAGsp8ZMTU#gpSYFL$Iw| zXA@cL7PFA7Ee>6aqq-Q~pZ3;V4pyjRrYr%m3*To4PC}*7jSIX>2Ka%QV16J%xb@@N zbr7g3Pe*S!30%xL$XBG|WtAcTLzc0^Ai%h>h|FRdi3PX!iIl@Cbw}to!i|&OyXC#9 zhD7mW&#h`zO1bhxKM}p+(?$m(CR0LL$uopl#b$lp5$;lWZH%WSsRrD-XvX=Tw0s}kTb9%lEEmKWv{S-Lx}crh-^VLVH1-wUQwCfZ}v2kH|)&R zDNNyqW9Y|5XZP2ey5K9Zk6(?6Z-~*>Ek{%fC9w*>1r%(G;T{mEEQJ?wIFW9bok2!~&{yj49Do(x)t7YH|_a+Y}#ehEE zd09{V^JT3Ci~}>fbbyAN&6HN%nUoJY_X2sCG%oO3D%A)FRsKC4GrOjHWIQV zgB0yq6edhwRWO}Z{8r>LrSmHoX4WG6!)kMnk;>3;mZl1Yt(bLkv z&tf~pv)LyK#%g)Gj0Iy_#Yej@DE&7c41U!^9X0U_$M5YweTnea%VyI@?+F))Y4cmb z6S3wsbz4r(G<2WL4e}CM_p={>!}pFYcI4`V_c6I zFiGpTsB>Dg_>Q6Bn?GjgskV{0n^Xzkb7H;=Wl_FMWn7ye32fq0a488k93A-=t?Ohk zELen91hkIWwA@1gu#SEP8Yp41{Pw?=IdY;UhOk>sb`52W{g94dhvMibh8l5_t&Nbt7C$O9VMrYTx!mAoitu+>Xa zKMkuCaH|(e4~J@9+pP0)@Ob|bZU>LdPg6P_X_v}chqHfYRy_KgtERWo&@hu0ET&wG zd7L|cm(I~ii!;qed_M7gNu-OWA$=N#ZqHVGL$I5RWhA@da~)+KgihjJA+|{MLHgQ# z9H&qPk!)WD4M@U|XCu7;VBEadgv17_fPE$SHW@we8`);bJTBiXREyg2Jr7#DO{COD z)V>Yc#gx#gPw5JN#r>p3x|9aq2xRmio@-}j=r=b^5k=brBQCU~`xaKx=13s|CvZrC z)+9ZNQ38z$psEH&abmH?PWc1bO$$pZttnO;Y?o@L=#>Qr>!Ojfi;&$Gi_p$!XjEsT z9jd84tkd#e8#1?!=M=Mn(oMpnI%4uD|&uBt?1a zq(iKCM)5iWEm)IL1`egkD!vImS#|MZ2A=7@WVEK_o9Mxe?U$gy)HASm@6md`KxYM8 zR-WH1^&%Rzo7N2G7ID{#^(c|aoRH1j{kj{)g}%Iwg;zl@Wh6JTD}!$mVHM@=Kmcyc zQ6Sx~2uidTQ6gZX^lUwfpqg;*vJ@6qKtE{aN6T06n2$3ar+6b5vm0J6QT?md)98@1 zm~W6|$Ed2Wr{!2#c4{vYmE=HXhzqXF4t>^_c(72xLTm>Q>{DIA^S0`a2XAgC_pTS3 zmfb(SFdk3zo#G6Hqb}4vR;WIzcHD)e{Gax|GAycf4Ot>fkw$t*=^S#%VZNpN>~r?^eCJ%>_y6$Yy;xk&de;;8dq2-w zym8p@nIR#<%F3WJ@lydyv_s#Jx@?!1Xv%>iHY|at#|D_BXeU@OIC2oX+YyoV#&pgg z$$)lQppH5xBRQ};z{avp5aP?5*S!%tBkw@urG;5hq$0PuATD#L=R(PiF&=zdXrxi` zXpU=B21_0mVS2^~kIwQtbGj0}L;Xg#73YJ|!pv=u5a=;MKqqALDsID_^1Rgw%$HGX zXw^Gw!_hJ6gGb3znCe*ZUhuc{3|wYD+gS*vWg!ScaUOan=}1G&Nt-GlyHfv>v=B3N z$GxXfPmV^C5#o!l3GwapSYN~pvu_W=?6$MiaF)y4ZqU$>V$EE z&1#6zZaLG$bBeSSRt9vkh~?;^3x5KdnrcimfQQ_9}4a2(STju{rFrAzU#etX7ms@tiaLvR~g){APRi|h%(}km$xqEj8ETJ!_yx!^ZrDgZmcu1-! zbwt=?CYL*xdLuM#FX2~IBstnPrySCtBW~rr&Yv+|4^$dq<^bM*zMUwxF)=g20?jf8b{PTD*t*uXO}X^P>awo@aXZDo9>T+pGBCVT3WOC`su3 z#cBtOS8HS1j8f=(-v>s%WSU@F-xt!NIL{V(5^H7SnqLhjxc647vHp;pN%bH9RD*;P z!nqU9(nmL*X>v$nUIpsO@0*-#dD-0%ch!+Ip_zI2t}F7ZJQUE5)mQg&AL-|yX+rOz?d7Sqp(4F~=cJIEkw12C zSQ)*(V=!7DyTU=_CrHqGUnMHEVrLDYmeq}rHNq9mU{_I}NPTyaWA;a?&X1*NXOOm|kUBO)mKWF8S24FMPV2Gg|`%pB~Fz1Dtmp8(eZG4d6uyg+)}8`-oQ z(3C*(-M8#Zne9S9atj$+A^royglR1=%C;n*tx@CYmgpC*zUq6H;jE;1Zaqn(C#@lO z3sisutk-c}tz)7NKiy0W`-lw-_0glQhU#f@Yj-7^erIjo@)l0bCE1Ems(!L^Uw@z* zm|qnjdoOL`J#IaIE3wiBmxpl2J)81Az4C)tdzFEpPw5QihcW2|^Yh%c=jwunVvXz~ zE^(UbVYZZv=b>W8TO3)-UuWN@i+53~L?u`RB+|at77@+51$t=%ZBOgT=xk9Mpy+z7 zo;+_RZinp8;|aOP^EJ8G>IjW6JR*@^5$yQCZZ@frWFuJiS912x}I0lvR=de8edb9=?z<7qr zO&zht8ZK-s({GqAbL-h#2i8r;t$xXTScPKxj?RA+)ZrgfF1pAuq@lIOvt=kB6D^ExpwWN98kp7Ls+Q{W>e&Vnq^Hz%W>DTyY z1MM9Y#wa_#RDSU*oBL+4pMk3#OK~KyOS70OjLKQW>h*i z1RQcwbw+b&dp~5q`YR|#d1)hHPIBaqhFHh8L@SmN<6)#Ta4|TB_nF? ziojr91u<_=Zls6zTy~_w&ET22qq3IEesDOdqD~E!ocpz|lXs105F-|lzwQlC^ zAT@*Ut$H)jY}5n}L1aaeMMI*JZ~_wRVk2kGL?7@;$o?dmxV`C>qREdHBFz$Gxz!Q; zpFUaeXRLW`pEX`^#>;k-HJ&X_G;Qg-7=ldiQ!s0mITwGPcwuHWo_&@bcPyxXB^>xp zYtiMiC9%(BEm-m{4t3P71_f#ozkj_6m{2Sp(H?SB%=1 z@5`0OS|j5QfyF`jjdIZ0eV+ilPpIaz@!~fc&*prFDk7M1U0c^t_B^k=o^2})_;b}B zq0dYE3kE&tf-r~YIb3*hcj45YZ}H3NUStUBE}rR>H!Hu`ik6MRFc7U=W)3&t#95kk zrS#jq!vlx8KGBf-P+WKK5p_{4GnS2@NI(ZcA|8pwjJ;+DDqep+dhSx#;m4}YN?j3- z{l*#R<;1WT@nkJ|xPq+ufvgMF*6n`&lIp-1IkiP|KTggUjj?^!JW!D(Gr|f5b`_y4fhUqnhYFbdMoT& zqG(O-5KWU(bseF>3767euGp~CofxJP7>=rLBOAkUVW#Bz}G$6B@+;8e8sv@{86ZMfsTCc$p^7qUz2w6M~PV z6`J~_;j>XR+EHo4%(zdv?Vg)jah&9|U1butg6^t*=8ahj!9+w8Of4Z#@b8+_Jqgqf zPl(vuY7khUPd>hJSFB1RVD8EWYT?}wQ@}@ZUk{>^1H)vFRDLS z!_NPEEB^?sc=PyhFurE7ogvYROeDn0#Q%YEmCcA8%Cuq1yabZ2|PyRN9vMMbf- z3^{8$`pVasKql5eWhBtPg9JBMHCKQZ3)AxW{S#^Hto(W}#}&Ooq{ZzyE;U^sW13f-4HbpTIi+i8wx!xiuyJ{2-*jb6qxs*cM6s=+PV^ z+z~?z!pVERe{Ea#t^0POS&A*K)nf3k`bcNMOJVP93%OUm)jC$IZP-h*TCYHRN(-#Q zq-+j5tRL|Z^U|s=>n&S^n-%KlKKE5j)|x@MeU+iFkby)9#4v}^-9oRFZzKd=zpn9( z*SDuzMx%>Xw5Q&G80`~-{F;Bc!>N?PKH8`*z)ObEv1`%o+RW}@o{1CeO4RtZ>DS`V zZA*S_e))5Ag_0kP@Y_Tmn@}d2UxFpkz{cQyN_B268Wt)fy5%coNh5P4p_JAvu?OWz zWfHp2qc}8m=>oG#r0Xs{);VhPGx2PvY*s;{);Pt?ZG~Z^3(V_oJ829=uQK%{;$f|r z1+)rob`nc6h5xv9MajB&nkH|Zu!`e)7bZi8>0#6Pty)8}jyOD~Z5x5p61VT6)s zxG&TA!>pP>pZzQ*$^Hbk@(fE&Q-BP+9q>SXVp${7W}-JkgwmK|Pcma=zgG=qeOI^b z8m#d}1`}?)Ne?z&wMKPILff0PmOJ!n8hqE9&KAv7-aNZjjAUC_aVD~F8oO;Th>yfv zB;p&fI}REb=Rm=5>+cmHl=j zo;)Ai<+(B(=!{uS=+!8;WA3(d{LJ5~0(IS4AG#6Qd`JjR8@(;8hhQc);gkkPV?U4x z40k>Y>e_Rs-3c3E4uY--ZVFsI$TJA4KDxCx8U$O|QPM~;POLA`sHVuz#@vv^ak#J4 z$}}JmSST9Im4w(^*^)Fy6+HE>>F?TTXb6e1;VQdN@e&l#m5~|%+NSzQZ$ox@df4b; zT$cp8rDjjOv4&kYYla(tdogE9)D3*y z$6E=KG%gZc#Ulolvwo!cdkIif1}WcUGV%J8rMFx$)^Z^(!#m(+P1uTuU{ zjKpy%xG2g6bzkT6SFjSWKAC7rpg0gIInYTmnQbk%HvCy)|32oGKKCe7bJX)YJqki{ zk5LPhq;WPCO zLppZ@ty_0e#uJ(IM*_^cFKiCnc?XYMW3)zzy{yJV2qNg(lg%c{FY?)U9&ot$Zusbo zk8o5$3{}Y|6tO3zi!Klhw=a9Yaa=-FS@vm+o07WFDD|ck@jRr$r{mqBoM6lAmv4Hj=?PO3f6b6|~m(mQ6Z1CdsRMjCIw%961~Qs0Z|| zsXXr4Z7jwA%mx+U0aysq@yD+x?2g^NIZK->I#!3|avxqV z9+*OXo#2Viw91sNSfcfct;eO4j0oqz&-7jMyX&e~BSROvRWALW+@Tt+uJ6Q)be}Wv zKdOpx^x7^~d-LSLdZO`?P@aqZc9>xSIkNl++`DMWX_C$p;=!OTjQz%Jt}6WDNjdnl zXXOdXh{}z$4IfcaUDs#k z+D7I*`{V2P7AM2gNopjrBrQz-^((ODZ5qa`OBvmQEw9r*wulP<=N3`G6cP&;FG*_dczT;CelJT6?Xv=;x z$E4TdunBzj?F0w&WIyHBPBa7RLdGIQb~Zjo-{||TY;m)%H(a$4%vRUaSiwZTeNNf3 z-nZjf4}CbVg=4T0wEFHPaXD%!IW=QuvPbm!$Kx3=;-D0hnz4bW?UD3{L!FBT~cFaW2w{c$jMWS>T%B-y$}tDq$e5- z*LX4nbJRLM;5!%`siu6}d-=E_%(ibzbnQYGNq=14#1L6ruVxK=EDQB|H6!`OJL#kW zTxNH~lqf7?Fo09({{49)PcW}??5YgJw|IEgXsEt&?PAsRgQQNBICUtwc)L7>czb^w zj&C|ohSH^Lu3d1;(k<}LsI`swmS19e#jNMcO`$$-?Ujahfw!r4I8mJl8#2{c6^oqq zO=d~3WwZe}xsjm{YXnq#N^zeJOO_@yI!8ssdse1x{`1E%xTZj2!WEPtspYw-?Y|I32GK&aap#3R-`^ zU?vJjyh)fPfHCABr;jTafx&&R8;uY!<@DOi?^@faBLT~# z3Su*B_Uf<}^CXb(s>b6dkw%lWcMn93%-y+B1(|5?RRTshvTKc6XTlSIEFA&v$Lt-T zoMJOJ>m1|LsbcjGryuX7mhDS>q4bX~4NSI=SI_2!PYKXXRFH{4QD}4QX(S3bI;_yZ z{`#kqTWhO=?EL-ibi6!n4QWEdG?80f)aPX_XT78@%}}<0df* zdZZS~1DmAVe%^VL*AOq;yabCfF5j4`Y&FH$+pl<`nnA0|Jd0n>X;*D}COz>5#_G)d zP+~^Q-nRu;_TYIRd511bX%H0-7F;dPbzD=6g0m*ynrP3Rx4A1-f}#W=5*A|e=C+$J z7Ch4sP`TsvGDrb!`?jm9Pf7cseMZ{IImhBLE4t~fUEaumauzyv+yt_H+W4r;o8{Qq zFiflvgS33uY^`gQ*J#jXcIr!wxV~G-hK&ag&7$Db@o(W^B6>tWpCQGJ13E)E-TA*3o$zt>vCVZ4PRcQG}Fid1ciF*}wz;Eh!NbS_SbJj-dv8+v?fajb8_tMlNpf)K6 z)0GBazNjo{K`bTB2I6TSQzUDT6MZ5P!dk(;yOX|*4t!gj96u_Dw zI(eG}E|>(jWU8PH$h|TIrB3S;esQwGk;4*}&a7iqW)TjvjR|~@OCzK$lzaR8ETg;n z8tc2*KpbU*qQQmHY78*<9@9U=C(bUETp$$RY_)ZxI!pvr<-=J>W}8^Ao!=_cKB>^- zntiTxR91azcl^+)xjBAzbydA_AkvX&bkoPQ`p1?AmC+>Qce`d|!9dID92tH}vTRD% z;D)P13_C>&Ra-sQHI-i)<0&gy5x}gi6uS=}gQ%{rp%aRbcLI+Ny9mBr+=ork(zs4k zfsAk4#E-T+&!o5nGcMLwj8BkOjMgeGj$fRUR!l*}Uttb2^2ttd$ycCTFB)9&bO2q>47& z|AH2+>iX;&4Q`?ZsC~*Oq|_{>w+SXZM8ySypfi)}twnDas!jRm&kOht-_UAkQfgoF z(8YFMFcJOsEu^c5uu(|MKJsvtt>$1kuq$`H?!|zn#ro-POA`$@c6)i?1_6)COpA6bW zoO3ZtRM>ZsB0Slw)`r$d*UFBq8_DLpyrT^=T*0dKdXY-z18Zv+VAa!uaBtJ`_HaJ2 z(+{Si5v+;#40RBa+ro~kK~4h$?s?R}I?qxu^e{lrw8)(y|E=R&(D=_;MbcSj~vwV@-dZuD9HE*xuIOho@M)GwN5e5?CV^MdyFQEk~Kf0FoNiUVh zE~4ML*3;I_#)7NOMmWu7x6zIdS#!R`qJT6=dqo|_v&M3FvoOu|At*;werjxac1*`n zI0Mlu=Zd~{L@qw-28~b~79l{gELQdFNLg@yGQu79^4+(FIIkNtyH}fKbOYpbjk2kL z>4AX&>&sCU3(4GLg8mf@-b%t=&4-^B4hKIKWZ7zLXgN4z}7+! zf1LBt7#BNe!3tbT`ydHDbv%7)Ny}3M;%s-fj%}2aWSYOPA0r+h>a)o9jD#Q_>saRG zgZ*X;C;HLBp!LFXaI5S!*s5>xi&v<`Y4G5Emaeu!x6V?Ry`UleRjZ1c2-KReOpt?^ zRg|#6_MBqHXalFnV#rkcCZ`1f4`ec=4LWG0NP56bTAB_O-jt~8m<{VoCh;YubK_Yk zVi=sW6OSl2INsj)Fx=iVq_gNUN0H_;XJr+a8!ItD>h!8r<ct%tkc>b6}h zdsfWR%#9%5NUv!|!v<7L!56>5hRT&0QRb{kS?#X=;W`D~&589!MV)7ieSO2kLyPe` zpKUa_#+hAINKD6O$aN0i=&sYyxpDJ~7qR+`%hWXM1rb6kH%ha$4Xf0O-rS#Ik=uqQ z1oMYc7xzp;X!7(2onIDGPum@qTjn)Z9JTH78h&cHEEwO8#)EG)SI*QJp~yu;$}$eu zN@RxWxRMTMqe+Epb!%o$j0elIUB(x%fn%VU`$)g_1n(8v-mxl}u9-X*nP?n2V(E@?M}kn5KPU7gW8KeH#$eX z%Gx=t$TYC3GWD~VhPhLt^LGs?N6xuMaK`#b>hGgLi!~V6LvG$C%vzcBOIOT#=IRjr z0LAP*$BeA5AL3xn1cQi5_{Guf0)8ymM^kpPt7LxCxFCy5uw|uACJtz*>P~#aK!ZWkMElKX1It5< zN{U&>IYG~eZ;49^wYS5Ub@#8utfR`~OPI<^!EeJr6dJoU5_zj{#Zy&JGa1zcvB|T~ zltd{EP_PT6J>wA**fwN;HogE z13ntI^~Iuc{c_6+0|3IPEpn+ve;$82$VR8O4s)N@Lo9OQ*R% z*NENg>}X%hmP@$w{-ioX>6^4%?{Lw^RLxfzG@jeAwCPX;B{ROvk2Okz9pjebJX(p% zU1|P0vP#yo^x9pq*we1PV=8rS8wus%ayvZHZ`k8L1^zijQz7zmNS~xm_A(jU_Gs60 ziPJ2n@Q5J{5+N=VB>B8Gnuk6w3wdIbM0=SVFY#lnGM+W0qjU5x4v4Kz6)(#U%#sc} zk)~6pVP??nA2pj6a$&TZj|_8DFFZM(yA-JhDNm!5dworE#VW^n83xJB2W1FgF9={l ztP|2`sbzL*zj+*kYp{9U9!f%~D}Eo|GVN{@`X!!vlhI=Xf4B-XQUNW6{3t9e*svw5 zwH^yrJcUP6xR9cJnii`J_;7ron13zAD8NCYR8I%_f}6muugU#u@YQUys3F(+H?nTj zFY*Aj&D;uWzs$iV; zQQ%h6t4OKie00|Bv@D6fF+nxUle^`@g*oVuH=lG_DS%-tabK6o)^^9G#T^L%(uT`rU8I1s@*~ zd)$l5h}TV)YP|;Yl*wMA`6jSC4B}u14*c%ym{&ee)!s+-UxuQlmFtbdcny98dLzsn zY~l|$C7zzK5Ul1OOQh79W{Ev#d{dla@$U zLs6g7W)s)Y8DBeKFw;4ikLB2BYB8(>)DNz?DweuLD)yM`?MG&OwL>PVPng*bEO-#W zobK#=uS)VxUf*4j#o}qWHSiCEehs6a4W9Jd77jdZOwaw@{kb?|qFoZ8SaCH19-O|b zD)kWyC-630x|7<7<6Zq>Tz|RI%O&#l>4hkMU$~NvZ>j9gRB7&g?cR?;EkDC=jRlLD zj?U-*t7-IarIY~7#mhSr0gb3;4<8X*dP`w}s_f)#Y{Jj{e(fntLuxyN&E=6MzoHY5 zB6W*Jf!TN%52F0JQ8#o_6s#pu~cOo)ea~ z{uZUoTZA6()zims-hl5Ln=oMS-(_=|cdegn4{PCQzHZ8nO~_XJ_SZ-lYv1I;g83t3 zpa0X(|G?1&khGMu!*2jh+Gn#QJQA_p`eXmkrnp!zzRB!~+t?2@=64fL0J|<8n~T6* z`$E4VZcG^eNqm2HCk>2}n-pRLqeLFdH@b}RmYAgG<0C11e!?mN(BV1wje`iGhu-=1 zfqziupLY`CDFXp#)AQ|N-rXOM9VS%dc)HuNnRvLXjF3{5z2cWe_VDL>^hs2p(I@PiTBB@Rdr((yyh25Lcbd-(6GKcR)x`xKEFy8 z{x`G(koW*Z;(_=0G73?4ThrHO$+Eo;a8xoNP}HFoR6~_sSZVlnTkc=>|N4#I z0U8gP$(QmNP)g2QcWgu8ahL{;QiGo)Cz`e~82J;Jt?Ln*YrJ7Qg`VVi?X{ zkJE2Tukk*S0XRBJGQy9S3DVDL2MYBQweA?e_w-k>c%qp^@w-{M6qW)Hnt0~WTWeTV*oFVrvC}VQi~p)DA9)`L zB)>LVqK|8(nFe{S%zqm&SN!&Y;rbvP#SAoVSqWy0PgVVNlcLHgezV}uOMrHvw#2<5CR!|s0d>zi0j0FnQqj(_&} zH_iU%{{OlEFHrg)`u~5)XSIn}9p8KS0Z#@BcdjQ_>hEm$mudoo18pgM))2A3pVWNZ z{6_f@D1&>1ly6GYSG3&Q%nsH zFAs-8EhprVCA-szauzID^;#JxmM5Mq@HtIiust{HfDPKzngA+0{Z>YeWk>QbP{_;eIAb~OUCPoRYq1??c*k#5^S{#C1>xFANp`B! zh?3LmNN4iSL4v`kZ$JroBYE`qNB&0TKvKO2<;eL@RQv~V1^@}-9x;F6liwxc*Y-{#`98`ul)X)JMHLf1!rIrHx;jNIe7s)LN_c;J-^L0#Xc5Dk%Tk1F!hK z0!k!*$DHT?T`CHYlJ++j_;+Rg>YG+PAZIQr=l^#p1|So^$L_QKFMa^r;!gxXr!!vb rKf>+T?b5{m1K1yM`hNwO9|m_fkugx@?K`EC2rkE|$V` literal 0 HcmV?d00001 diff --git a/doc/source/contributing.rst b/doc/source/contributing.rst index 83cc1777b35f6..82f9b18c1e2eb 100644 --- a/doc/source/contributing.rst +++ b/doc/source/contributing.rst @@ -113,13 +113,6 @@ want to clone your fork to your machine:: This creates the directory `pandas-yourname` and connects your repository to the upstream (main project) *pandas* repository. -The testing suite will run automatically on Travis-CI and Appveyor once your -pull request is submitted. However, if you wish to run the test suite on a -branch prior to submitting the pull request, then Travis-CI and/or AppVeyor -need to be hooked up to your GitHub repository. Instructions for doing so -are `here `__ for -Travis-CI and `here `__ for AppVeyor. - Creating a branch ----------------- @@ -432,7 +425,8 @@ Building master branch documentation When pull requests are merged into the *pandas* ``master`` branch, the main parts of the documentation are also built by Travis-CI. These docs are then hosted `here -`__. +`__, see also +the :ref:`Continuous Integration ` section. Contributing to the code base ============================= @@ -444,8 +438,9 @@ Code standards -------------- Writing good code is not just about what you write. It is also about *how* you -write it. During testing on Travis-CI, several tools will be run to check your -code for stylistic errors. Generating any warnings will cause the test to fail. +write it. During :ref:`Continuous Integration ` testing, several +tools will be run to check your code for stylistic errors. +Generating any warnings will cause the test to fail. Thus, good style is a requirement for submitting code to *pandas*. In addition, because a lot of people use our library, it is important that we @@ -467,7 +462,8 @@ Here are *some* of the more common ``cpplint`` issues: - we restrict line-length to 80 characters to promote readability - every header file must include a header guard to avoid name collisions if re-included -Travis-CI will run the `cpplint `_ tool +:ref:`Continuous Integration `. will run the +`cpplint `_ tool and report any stylistic errors in your code. Therefore, it is helpful before submitting code to run the check yourself:: @@ -514,7 +510,8 @@ the more common ``PEP8`` issues: - we restrict line-length to 79 characters to promote readability - passing arguments should have spaces after commas, e.g. ``foo(arg1, arg2, kw1='bar')`` -Travis-CI will run the `flake8 `_ tool +:ref:`Continuous Integration ` will run +the `flake8 `_ tool and report any stylistic errors in your code. Therefore, it is helpful before submitting code to run the check yourself on the diff:: @@ -542,6 +539,35 @@ existing code, so don't break it if at all possible. If you think breakage is r clearly state why as part of the pull request. Also, be careful when changing method signatures and add deprecation warnings where needed. +.. _contributing.ci: + +Testing Thru Continuous Integration +----------------------------------- + +The pandas testing suite will run automatically on Travis-CI, Appveyor, and Circle CI +continuous integration services, once your pull request is submitted. +However, if you wish to run the test suite on a branch prior to submitting the pull request, +then Travis-CI, Appveyor and/or CircleCI need to be hooked up to your GitHub repository. +Instructions for doing so are `here `__ for +Travis-CI, `here `__ for Appveyor, and +`here `__ for CircleCI. + +A pull-request will be considered for merging when you have an all 'green' build. See +this example. + +.. image:: _static/ci.png + + +.. note:: + + Pushing to *your* branch will cancel any non-currently-running tests for that + same pull-request for Appveyor. For Travis CI, you can enable the auto-cancel feature + `here `__ and + for CircleCI `here `__. + +.. _contributing.tdd: + + Test-driven development/code writing ------------------------------------ @@ -875,12 +901,8 @@ updated. Pushing them to GitHub again is done by:: git push -f origin shiny-new-feature This will automatically update your pull request with the latest code and restart the -Travis-CI tests. +:ref:`Continuous Integration ` tests. -If your pull request is related to the ``pandas.io.gbq`` module, please see -the section on :ref:`Running Google BigQuery Integration Tests -` to configure a Google BigQuery service -account for your pull request on Travis-CI. Delete your merged branch (optional) ------------------------------------ From cd51bdd27423fab8a69431dec5dabf4b6bf56c44 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Mon, 3 Apr 2017 15:49:00 -0400 Subject: [PATCH 327/933] DOC: add section on how to use parametrize to contributing.rst (#15883) closes #15608 --- doc/source/contributing.rst | 132 +++++++++++++++++++++++++++++------- 1 file changed, 108 insertions(+), 24 deletions(-) diff --git a/doc/source/contributing.rst b/doc/source/contributing.rst index 82f9b18c1e2eb..467d6456d60cd 100644 --- a/doc/source/contributing.rst +++ b/doc/source/contributing.rst @@ -51,14 +51,9 @@ Bug reports must: ... ``` -#. Include the full version string of *pandas* and its dependencies. In versions - of *pandas* after 0.12 you can use a built in function:: - - >>> from pandas.util.print_versions import show_versions - >>> show_versions() - - and in *pandas* 0.13.1 onwards:: +#. Include the full version string of *pandas* and its dependencies. You can use the built in function:: + >>> import pandas as pd >>> pd.show_versions() #. Explain why the current behavior is wrong/not desired and what you expect instead. @@ -209,7 +204,7 @@ At this point you can easily do an *in-place* install, as detailed in the next s Creating a Windows development environment ------------------------------------------ -To build on Windows, you need to have compilers installed to build the extensions. You will need to install the appropriate Visual Studio compilers, VS 2008 for Python 2.7, VS 2010 for 3.4, and VS 2015 for Python 3.5. +To build on Windows, you need to have compilers installed to build the extensions. You will need to install the appropriate Visual Studio compilers, VS 2008 for Python 2.7, VS 2010 for 3.4, and VS 2015 for Python 3.5 and 3.6. For Python 2.7, you can install the ``mingw`` compiler which will work equivalently to VS 2008:: @@ -219,7 +214,7 @@ or use the `Microsoft Visual Studio VC++ compiler for Python `__. Read the references below as there may be various gotchas during the installation. -For Python 3.5, you can download and install the `Visual Studio 2015 Community Edition `__. +For Python 3.5 and 3.6, you can download and install the `Visual Studio 2015 Community Edition `__. Here are some references and blogs: @@ -544,26 +539,26 @@ signatures and add deprecation warnings where needed. Testing Thru Continuous Integration ----------------------------------- -The pandas testing suite will run automatically on Travis-CI, Appveyor, and Circle CI -continuous integration services, once your pull request is submitted. +The *pandas* testing suite will run automatically on `Travis-CI `__, +`Appveyor `__, and `Circle CI `__ continuous integration +services, once your pull request is submitted. However, if you wish to run the test suite on a branch prior to submitting the pull request, -then Travis-CI, Appveyor and/or CircleCI need to be hooked up to your GitHub repository. -Instructions for doing so are `here `__ for -Travis-CI, `here `__ for Appveyor, and -`here `__ for CircleCI. +then the continuous integration services need to be hooked to your GitHub repository. Instructions are here +for `Travis-CI `__, +`Appveyor `__ , and `CircleCI `__. -A pull-request will be considered for merging when you have an all 'green' build. See -this example. +A pull-request will be considered for merging when you have an all 'green' build. If any tests are failing, +then you will get a red 'X', where you can click thru to see the individual failed tests. +This is an example of a green build. .. image:: _static/ci.png - .. note:: - Pushing to *your* branch will cancel any non-currently-running tests for that - same pull-request for Appveyor. For Travis CI, you can enable the auto-cancel feature - `here `__ and - for CircleCI `here `__. + Each time you push to *your* fork, a *new* run of the tests will trigger on the CI. Appveyor will auto-cancel + any non-currently-running tests for that same pull-request. You can enable the auto-cancel feature for + `Travis-CI here `__ and + for `CircleCI here `__. .. _contributing.tdd: @@ -620,8 +615,96 @@ the expected correct result:: assert_frame_equal(pivoted, expected) +How to use ``parametrize`` +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +`pytest `__ has a nice feature `parametrize `__ to allow +testing of many cases in a concise way that enables an easy-to-read syntax. + +.. note:: + + .. code-block:: python + + *pandas* existing test structure is *mostly* classed based, meaning that you will typically find tests wrapped in a class, inheriting from ``tm.TestCase``. + + class TestReallyCoolFeature(tm.TestCase): + .... + + Going forward we are moving to a more *functional* style, please see below. + + +Here is an example of a self-contained set of tests that illustrate multiple features that we like to use. + +- functional style: tests are like ``test_*`` and *only* take arguments that are either fixtures or parameters +- using ``parametrize``: allow testing of multiple cases +- ``fixture``, code for object construction, on a per-test basis +- using bare ``assert`` for scalars and truth-testing +- ``tm.assert_series_equal`` (and its counter part ``tm.assert_frame_equal``), for pandas object comparisons. +- the typical pattern of constructing an ``expected`` and comparing versus the ``result`` + +We would name this file ``test_cool_feature.py`` and put in an appropriate place in the ``pandas/tests/`` sturcture. + +.. code-block:: python + + import pytest + import numpy as np + import pandas as pd + from pandas.util import testing as tm + + @pytest.mark.parametrize('dtype', ['int8', 'int16', 'int32', 'int64']) + def test_dtypes(dtype): + assert str(np.dtype(dtype)) == dtype + + @pytest.fixture + def series(): + return pd.Series([1, 2, 3]) + + @pytest.fixture(params=['int8', 'int16', 'int32', 'int64']) + def dtype(request): + return request.param + + def test_series(series, dtype): + result = series.astype(dtype) + assert result.dtype == dtype + + expected = pd.Series([1, 2, 3], dtype=dtype) + tm.assert_series_equal(result, expected) + + +A test run of this yields + +.. code-block:: shell + + ((pandas) bash-3.2$ pytest test_cool_feature.py -v + =========================== test session starts =========================== + platform darwin -- Python 3.5.2, pytest-3.0.5, py-1.4.31, pluggy-0.4.0 + collected 8 items + + tester.py::test_dtypes[int8] PASSED + tester.py::test_dtypes[int16] PASSED + tester.py::test_dtypes[int32] PASSED + tester.py::test_dtypes[int64] PASSED + tester.py::test_series[int8] PASSED + tester.py::test_series[int16] PASSED + tester.py::test_series[int32] PASSED + tester.py::test_series[int64] PASSED + +Tests that we have ``parametrized`` are now accessible via the test name, for example we could run these with ``-k int8`` to sub-select *only* those tests which match ``int8``. + + +.. code-block:: shell + + ((pandas) bash-3.2$ pytest test_cool_feature.py -v -k int8 + =========================== test session starts =========================== + platform darwin -- Python 3.5.2, pytest-3.0.5, py-1.4.31, pluggy-0.4.0 + collected 8 items + + test_cool_feature.py::test_dtypes[int8] PASSED + test_cool_feature.py::test_series[int8] PASSED + + Running the test suite -~~~~~~~~~~~~~~~~~~~~~~ +---------------------- The tests can then be run directly inside your Git clone (without having to install *pandas*) by typing:: @@ -675,7 +758,8 @@ Furthermore one can run with an imported pandas to run tests similarly. Running the performance test suite -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +---------------------------------- + Performance matters and it is worth considering whether your code has introduced performance regressions. *pandas* is in the process of migrating to `asv benchmarks `__ From ff652a5abafee88cbd858c12cc06dd60e73a6647 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Mon, 3 Apr 2017 16:45:41 -0400 Subject: [PATCH 328/933] BUG: Patch handling no NA values in TextFileReader When cleaning `na_values` during initialization of `TextFileReader`, we return a `list` whenever we specify that `na_values` should be empty. However, the rest of the code expects a `set`. Closes #15835. Author: gfyoung Closes #15881 from gfyoung/keep-default-na-excel and squashes the following commits: 0bb6f64 [gfyoung] BUG: Patch handling no NA values in TextFileReader --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/io/parsers.py | 2 +- pandas/tests/io/parser/na_values.py | 11 ++++++++++- 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 63aea96ef3369..fd7744158829f 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -995,6 +995,7 @@ I/O - Bug in ``pd.read_csv()`` for the C engine where ``usecols`` were being indexed incorrectly with ``parse_dates`` (:issue:`14792`) - Bug in ``pd.read_csv()`` with ``parse_dates`` when multiline headers are specified (:issue:`15376`) - Bug in ``pd.read_csv()`` with ``float_precision='round_trip'`` which caused a segfault when a text entry is parsed (:issue:`15140`) +- Bug in ``pd.read_csv()`` when an index was specified and no values were specified as null values (:issue:`15835`) - Added checks in ``pd.read_csv()`` ensuring that values for ``nrows`` and ``chunksize`` are valid (:issue:`15767`) - Bug in ``pd.tools.hashing.hash_pandas_object()`` in which hashing of categoricals depended on the ordering of categories, instead of just their values. (:issue:`15143`) - Bug in ``.to_json()`` where ``lines=True`` and contents (keys or values) contain escaped characters (:issue:`15096`) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 30b88de91ef76..0080ded1ac03d 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -2890,7 +2890,7 @@ def _clean_na_values(na_values, keep_default_na=True): if keep_default_na: na_values = _NA_VALUES else: - na_values = [] + na_values = set() na_fvalues = set() elif isinstance(na_values, dict): na_values = na_values.copy() # Prevent aliasing. diff --git a/pandas/tests/io/parser/na_values.py b/pandas/tests/io/parser/na_values.py index 2cbd7cdedf2ab..cf29dbdfef49d 100644 --- a/pandas/tests/io/parser/na_values.py +++ b/pandas/tests/io/parser/na_values.py @@ -11,7 +11,7 @@ import pandas.io.parsers as parsers import pandas.util.testing as tm -from pandas import DataFrame, MultiIndex +from pandas import DataFrame, Index, MultiIndex from pandas.compat import StringIO, range @@ -303,3 +303,12 @@ def test_na_values_uint64(self): expected = DataFrame([[str(2**63), 1], ['', 2]]) out = self.read_csv(StringIO(data), header=None) tm.assert_frame_equal(out, expected) + + def test_empty_na_values_no_default_with_index(self): + # see gh-15835 + data = "a,1\nb,2" + + expected = DataFrame({'1': [2]}, index=Index(["b"], name="a")) + out = self.read_csv(StringIO(data), keep_default_na=False, index_col=0) + + tm.assert_frame_equal(out, expected) From eedcc8fd493158be3d88cf2aa139914a7b21c349 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Mon, 3 Apr 2017 17:37:33 -0400 Subject: [PATCH 329/933] DOC: whatsnew cleaning --- doc/source/whatsnew/v0.20.0.txt | 40 ++++++++++++++++++++------------- 1 file changed, 24 insertions(+), 16 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index fd7744158829f..107b682a86d00 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -1,7 +1,7 @@ .. _whatsnew_0200: -v0.20.0 (????, 2017) --------------------- +v0.20.0 (April ??, 2017) +------------------------ This is a major release from 0.19 and includes a small number of API changes, several new features, enhancements, and performance improvements along with a large number of bug fixes. We recommend that all @@ -9,12 +9,13 @@ users upgrade to this version. Highlights include: -- Building pandas for development now requires ``cython >= 0.23`` (:issue:`14831`) - The ``.ix`` indexer has been deprecated, see :ref:`here ` -- Switched the test framework to `pytest`_ (:issue:`13097`) +- Improved user API when accessing levels in ``.groupby()``, see :ref:`here ` +- Improved support for UInt64 dtypes, see :ref:`here ` - A new orient for JSON serialization, ``orient='table'``, that uses the Table Schema spec, see :ref:`here ` - -.. _pytest: http://doc.pytest.org/en/latest/ +- Support for S3 handling now uses ``s3fs``, see :ref:`here ` +- Google BigQuery support now uses the ``pandas-gbq`` library, see :ref:`here ` +- Switched the test framework to use `pytest `__ (:issue:`13097`) Check the :ref:`API Changes ` and :ref:`deprecations ` before updating. @@ -542,7 +543,7 @@ S3 File Handling ^^^^^^^^^^^^^^^^ pandas now uses `s3fs `_ for handling S3 connections. This shouldn't break -any code. However, since s3fs is not a required dependency, you will need to install it separately, like ``boto`` +any code. However, since ``s3fs`` is not a required dependency, you will need to install it separately, like ``boto`` in prior versions of pandas. (:issue:`11915`). .. _whatsnew_0200.api_breaking.partial_string_indexing: @@ -776,9 +777,9 @@ New Behavior: Index.intersection and inner join now preserve the order of the left Index ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -`:meth:Index.intersection` now preserves the order of the calling ``Index`` (left) +:meth:`Index.intersection` now preserves the order of the calling ``Index`` (left) instead of the other ``Index`` (right) (:issue:`15582`). This affects the inner -joins (`:meth:DataFrame.join` and `:func:merge`) and the ``.align`` methods. +joins, :meth:`DataFrame.join` and :func:`merge`, and the ``.align`` methods. - ``Index.intersection`` @@ -844,8 +845,6 @@ Other API Changes - ``inplace`` arguments now require a boolean value, else a ``ValueError`` is thrown (:issue:`14189`) - ``pandas.api.types.is_datetime64_ns_dtype`` will now report ``True`` on a tz-aware dtype, similar to ``pandas.api.types.is_datetime64_any_dtype`` - ``DataFrame.asof()`` will return a null filled ``Series`` instead the scalar ``NaN`` if a match is not found (:issue:`15118`) -- Reorganization of timeseries development tests (:issue:`14854`) -- Reorganization of date converter tests (:issue:`15707`) - Specific support for ``copy.copy()`` and ``copy.deepcopy()`` functions on NDFrame objects (:issue:`15444`) - ``Series.sort_values()`` accepts a one element list of bool for consistency with the behavior of ``DataFrame.sort_values()`` (:issue:`15604`) - ``.merge()`` and ``.join()`` on ``category`` dtype columns will now preserve the category dtype when possible (:issue:`10409`) @@ -860,6 +859,16 @@ Other API Changes - ``NaT`` will now returns ``NaT`` for ``tz_localize`` and ``tz_convert`` methods (:issue:`15830`) +.. _whatsnew_0200.develop: + +Development Changes +~~~~~~~~~~~~~~~~~~~ + +- Building pandas for development now requires ``cython >= 0.23`` (:issue:`14831`) +- Require at least 0.23 version of cython to avoid problems with character encodings (:issue:`14699`) +- Reorganization of timeseries tests (:issue:`14854`) +- Reorganization of date converter tests (:issue:`15707`) + .. _whatsnew_0200.deprecations: Deprecations @@ -915,7 +924,7 @@ Performance Improvements ~~~~~~~~~~~~~~~~~~~~~~~~ - Improved performance of ``pd.wide_to_long()`` (:issue:`14779`) -- Increased performance of ``pd.factorize()`` by releasing the GIL with ``object`` dtype when inferred as strings (:issue:`14859`) +- Improved performance of ``pd.factorize()`` by releasing the GIL with ``object`` dtype when inferred as strings (:issue:`14859`) - Improved performance of timeseries plotting with an irregular DatetimeIndex (or with ``compat_x=True``) (:issue:`15073`). - Improved performance of ``groupby().cummin()`` and ``groupby().cummax()`` (:issue:`15048`, :issue:`15109`, :issue:`15561`, :issue:`15635`) @@ -1000,7 +1009,8 @@ I/O - Bug in ``pd.tools.hashing.hash_pandas_object()`` in which hashing of categoricals depended on the ordering of categories, instead of just their values. (:issue:`15143`) - Bug in ``.to_json()`` where ``lines=True`` and contents (keys or values) contain escaped characters (:issue:`15096`) - Bug in ``.to_json()`` causing single byte ascii characters to be expanded to four byte unicode (:issue:`15344`) -- Bug in ``.read_json()`` for Python 2 where ``lines=True`` and contents contain non-ascii unicode characters (:issue:`15132`) +- Bug in ``.to_json()`` for the C engine where rollover was not correctly handled for case where frac is odd and diff is exactly 0.5 (:issue:`15716`, :issue:`15864`) +- Bug in ``pd.read_json()`` for Python 2 where ``lines=True`` and contents contain non-ascii unicode characters (:issue:`15132`) - Bug in ``pd.read_msgpack()`` in which ``Series`` categoricals were being improperly processed (:issue:`14901`) - Bug in ``pd.read_msgpack()`` which did not allow loading of a dataframe with an index of type ``CategoricalIndex`` (:issue:`15487`) - Bug in ``pd.read_msgpack()`` when deserializing a ``CategoricalIndex`` (:issue:`15487`) @@ -1011,7 +1021,6 @@ I/O - Bug in ``pd.read_hdf()`` passing a ``Timestamp`` to the ``where`` parameter with a non date column (:issue:`15492`) - Bug in ``DataFrame.to_stata()`` and ``StataWriter`` which produces incorrectly formatted files to be produced for some locales (:issue:`13856`) - Bug in ``StataReader`` and ``StataWriter`` which allows invalid encodings (:issue:`15723`) -- Bug in ``pd.to_json()`` for the C engine where rollover was not correctly handled for case where frac is odd and diff is exactly 0.5 (:issue:`15716`, :issue:`15864`) Plotting ^^^^^^^^ @@ -1026,7 +1035,7 @@ Groupby/Resample/Rolling - Properly set ``__name__`` and ``__qualname__`` for ``Groupby.*`` functions (:issue:`14620`) - Bug in ``GroupBy.get_group()`` failing with a categorical grouper (:issue:`15155`) - Bug in ``.groupby(...).rolling(...)`` when ``on`` is specified and using a ``DatetimeIndex`` (:issue:`15130`) -- Bug in groupby operations with timedelta64 when passing ``numeric_only=False`` (:issue:`5724`) +- Bug in groupby operations with ``timedelta64`` when passing ``numeric_only=False`` (:issue:`5724`) - Bug in ``groupby.apply()`` coercing ``object`` dtypes to numeric types, when not all values were numeric (:issue:`14423`, :issue:`15421`, :issue:`15670`) - Bug in ``resample``, where a non-string ``loffset`` argument would not be applied when resampling a timeseries (:issue:`13218`) - Bug in ``DataFrame.groupby().describe()`` when grouping on ``Index`` containing tuples (:issue:`14848`) @@ -1073,6 +1082,5 @@ Other - Compat with SciPy 0.19.0 for testing on ``.interpolate()`` (:issue:`15662`) - Compat for 32-bit platforms for ``.qcut/cut``; bins will now be ``int64`` dtype (:issue:`14866`) -- Require at least 0.23 version of cython to avoid problems with character encodings (:issue:`14699`) - Bug in interactions with ``Qt`` when a ``QtApplication`` already exists (:issue:`14372`) - Avoid use of ``np.finfo()`` during ``import pandas`` removed to mitigate deadlock on Python GIL misuse (:issue:`14641`) From da0523a346abd9575ab05746e242ec67c1c442d4 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Mon, 3 Apr 2017 17:49:22 -0400 Subject: [PATCH 330/933] API: expose pandas.errors closes #14800 Author: Jeff Reback Closes #15541 from jreback/exceptions and squashes the following commits: e5fbdc8 [Jeff Reback] give nicer deprecation / message on infer_dtype moving ab4525b [Jeff Reback] typo on pandas.errors in whatsnew d636ef7 [Jeff Reback] document removed exceptions 3dc4b9a [Jeff Reback] more docs for exceptions 2bb1fbd [Jeff Reback] remove AmbiguousIndexError, completely unused 5754630 [Jeff Reback] fix doc-string 35d225f [Jeff Reback] more examples e91901d [Jeff Reback] DOC: better docs on infer_type 7e8432d [Jeff Reback] remove need for PandasError sub-class 92b2fdc [Jeff Reback] corrections 991fbb4 [Jeff Reback] API: expose pandas.errors eec40cd [Jeff Reback] add pandas.api.lib add infer_dtype to pandas.api.lib --- doc/source/whatsnew/v0.20.0.txt | 26 +++++ pandas/__init__.py | 3 +- pandas/_libs/src/inference.pyx | 110 +++++++++++++++--- pandas/api/lib/__init__.py | 5 + pandas/compat/numpy/function.py | 2 +- pandas/computation/align.py | 7 +- pandas/core/common.py | 30 +---- pandas/core/frame.py | 4 +- pandas/core/indexing.py | 2 +- pandas/core/ops.py | 4 +- pandas/core/panel.py | 8 +- pandas/errors/__init__.py | 57 +++++++++ pandas/indexes/multi.py | 6 +- pandas/io/common.py | 45 ++----- pandas/io/excel.py | 4 +- pandas/io/html.py | 3 +- pandas/io/packers.py | 2 +- pandas/io/parsers.py | 5 +- pandas/io/parsers.pyx | 6 +- pandas/io/pytables.py | 3 +- pandas/lib.py | 5 +- pandas/tests/api/test_api.py | 83 +------------ pandas/tests/api/test_lib.py | 10 ++ pandas/tests/api/test_types.py | 83 +++++++++++++ pandas/tests/computation/test_eval.py | 5 +- .../tests/frame/test_axis_select_reindex.py | 2 +- pandas/tests/frame/test_constructors.py | 8 +- pandas/tests/frame/test_to_csv.py | 2 +- pandas/tests/groupby/test_groupby.py | 4 +- pandas/tests/indexes/datetimes/test_ops.py | 2 +- pandas/tests/indexes/test_multi.py | 2 +- pandas/tests/indexing/test_ix.py | 2 +- pandas/tests/indexing/test_multiindex.py | 2 +- pandas/tests/io/parser/common.py | 3 +- pandas/tests/io/parser/dialect.py | 2 +- pandas/tests/io/parser/dtypes.py | 2 +- pandas/tests/io/parser/skiprows.py | 2 +- pandas/tests/io/parser/test_unsupported.py | 2 +- pandas/tests/io/test_common.py | 18 --- pandas/tests/io/test_packers.py | 2 +- pandas/tests/test_errors.py | 50 ++++++++ pandas/tests/test_window.py | 2 +- pandas/tests/tseries/test_resample.py | 2 +- pandas/tseries/index.py | 10 +- pandas/tslib.py | 2 +- pandas/util/depr_module.py | 15 ++- setup.py | 2 + 47 files changed, 420 insertions(+), 236 deletions(-) create mode 100644 pandas/api/lib/__init__.py create mode 100644 pandas/errors/__init__.py create mode 100644 pandas/tests/api/test_lib.py create mode 100644 pandas/tests/api/test_types.py create mode 100644 pandas/tests/test_errors.py diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 107b682a86d00..74fe7916523c5 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -76,6 +76,28 @@ Commonly called 'unix epoch' or POSIX time. pd.to_datetime([1, 2, 3], unit='D') +.. _whatsnew_0200.enhancements.errors: + +pandas errors +^^^^^^^^^^^^^ + +We are adding a standard public location for all pandas exceptions & warnings ``pandas.errors``. (:issue:`14800`). Previously +these exceptions & warnings could be imported from ``pandas.core.common`` or ``pandas.io.common``. These exceptions and warnings +will be removed from the ``*.common`` locations in a future release. (:issue:`15541`) + +The following are now part of this API: + +.. code-block:: python + + ['DtypeWarning', + 'EmptyDataError', + 'OutOfBoundsDatetime', + 'ParserError', + 'ParserWarning', + 'PerformanceWarning', + 'UnsortedIndexError', + 'UnsupportedFunctionCall'] + .. _whatsnew_0200.enhancements.groupby_access: Groupby Enhancements @@ -858,6 +880,10 @@ Other API Changes - ``NaT`` will now correctly return ``np.nan`` for ``Timedelta`` and ``Period`` accessors such as ``days`` and ``quarter`` (:issue:`15782`) - ``NaT`` will now returns ``NaT`` for ``tz_localize`` and ``tz_convert`` methods (:issue:`15830`) +- ``DataFrame`` and ``Panel`` constructors with invalid input will now raise ``ValueError`` rather than ``PandasError``, if called with scalar inputs and not axes (:issue:`15541`) + +- ``DataFrame`` and ``Panel`` constructors with invalid input will now raise ``ValueError`` rather than ``pandas.core.common.PandasError``, if called with scalar inputs and not axes; The exception ``PandasError`` is removed as well. (:issue:`15541`) +- The exception ``pandas.core.common.AmbiguousIndexError`` is removed as it is not referenced (:issue:`15541`) .. _whatsnew_0200.develop: diff --git a/pandas/__init__.py b/pandas/__init__.py index 5c7c9d44c5d10..1bc85899fb89f 100644 --- a/pandas/__init__.py +++ b/pandas/__init__.py @@ -62,7 +62,8 @@ json = _DeprecatedModule(deprmod='pandas.json', deprmodto='pandas.io.json.libjson') parser = _DeprecatedModule(deprmod='pandas.parser', deprmodto='pandas.io.libparsers') -lib = _DeprecatedModule(deprmod='pandas.lib', deprmodto='pandas._libs.lib') +lib = _DeprecatedModule(deprmod='pandas.lib', deprmodto='pandas._libs.lib', + moved={'infer_dtype': 'pandas.api.lib.infer_dtype'}) tslib = _DeprecatedModule(deprmod='pandas.tslib', deprmodto='pandas._libs.tslib') # use the closest tagged version if possible diff --git a/pandas/_libs/src/inference.pyx b/pandas/_libs/src/inference.pyx index 933fc8fb1cc9b..b0fb7048f154c 100644 --- a/pandas/_libs/src/inference.pyx +++ b/pandas/_libs/src/inference.pyx @@ -218,9 +218,91 @@ cdef _try_infer_map(v): return None -def infer_dtype(object _values): +def infer_dtype(object value): """ - we are coercing to an ndarray here + Effeciently infer the type of a passed val, or list-like + array of values. Return a string describing the type. + + Parameters + ---------- + value : scalar, list, ndarray, or pandas type + + Returns + ------- + string describing the common type of the input data. + Results can include: + + - string + - unicode + - bytes + - floating + - integer + - mixed-integer + - mixed-integer-float + - complex + - categorical + - boolean + - datetime64 + - datetime + - date + - timedelta64 + - timedelta + - time + - period + - mixed + + Raises + ------ + TypeError if ndarray-like but cannot infer the dtype + + Notes + ----- + - 'mixed' is the catchall for anything that is not otherwise + specialized + - 'mixed-integer-float' are floats and integers + - 'mixed-integer' are integers mixed with non-integers + + Examples + -------- + >>> infer_dtype(['foo', 'bar']) + 'string' + + >>> infer_dtype([b'foo', b'bar']) + 'bytes' + + >>> infer_dtype([1, 2, 3]) + 'integer' + + >>> infer_dtype([1, 2, 3.5]) + 'mixed-integer-float' + + >>> infer_dtype([1.0, 2.0, 3.5]) + 'floating' + + >>> infer_dtype(['a', 1]) + 'mixed-integer' + + >>> infer_dtype([True, False]) + 'boolean' + + >>> infer_dtype([True, False, np.nan]) + 'mixed' + + >>> infer_dtype([pd.Timestamp('20130101')]) + 'datetime' + + >>> infer_dtype([datetime.date(2013, 1, 1)]) + 'date' + + >>> infer_dtype([np.datetime64('2013-01-01')]) + 'datetime64' + + >>> infer_dtype([datetime.timedelta(0, 1, 1)]) + 'timedelta' + + >>> infer_dtype(pd.Series(list('aabc')).astype('category')) + 'categorical' + """ cdef: @@ -229,27 +311,27 @@ def infer_dtype(object _values): ndarray values bint seen_pdnat = False, seen_val = False - if isinstance(_values, np.ndarray): - values = _values - elif hasattr(_values, 'dtype'): + if isinstance(value, np.ndarray): + values = value + elif hasattr(value, 'dtype'): # this will handle ndarray-like # e.g. categoricals try: - values = getattr(_values, '_values', getattr( - _values, 'values', _values)) + values = getattr(value, '_values', getattr( + value, 'values', value)) except: - val = _try_infer_map(_values) - if val is not None: - return val + value = _try_infer_map(value) + if value is not None: + return value # its ndarray like but we can't handle - raise ValueError("cannot infer type for {0}".format(type(_values))) + raise ValueError("cannot infer type for {0}".format(type(value))) else: - if not isinstance(_values, list): - _values = list(_values) - values = list_to_object_array(_values) + if not isinstance(value, list): + value = list(value) + values = list_to_object_array(value) values = getattr(values, 'values', values) val = _try_infer_map(values) diff --git a/pandas/api/lib/__init__.py b/pandas/api/lib/__init__.py new file mode 100644 index 0000000000000..c86bfc6148655 --- /dev/null +++ b/pandas/api/lib/__init__.py @@ -0,0 +1,5 @@ +# flake8: noqa + +""" public toolkit API """ + +from pandas._libs.lib import infer_dtype diff --git a/pandas/compat/numpy/function.py b/pandas/compat/numpy/function.py index f448a9aad04c6..1dd22795533fc 100644 --- a/pandas/compat/numpy/function.py +++ b/pandas/compat/numpy/function.py @@ -21,7 +21,7 @@ from numpy import ndarray from pandas.util.validators import (validate_args, validate_kwargs, validate_args_and_kwargs) -from pandas.core.common import UnsupportedFunctionCall +from pandas.errors import UnsupportedFunctionCall from pandas.types.common import is_integer, is_bool from pandas.compat import OrderedDict diff --git a/pandas/computation/align.py b/pandas/computation/align.py index 4e12d58a4ab85..b4c80f4d493af 100644 --- a/pandas/computation/align.py +++ b/pandas/computation/align.py @@ -9,7 +9,8 @@ import pandas as pd from pandas import compat -import pandas.core.common as com +from pandas.errors import PerformanceWarning +from pandas.core.common import flatten from pandas.computation.common import _result_type_many @@ -101,7 +102,7 @@ def _align_core(terms): 'than an order of magnitude on term {1!r}, ' 'by more than {2:.4g}; performance may ' 'suffer'.format(axis, terms[i].name, ordm), - category=pd.core.common.PerformanceWarning, + category=PerformanceWarning, stacklevel=6) if transpose: @@ -121,7 +122,7 @@ def _align(terms): """Align a set of terms""" try: # flatten the parse tree (a nested list, really) - terms = list(com.flatten(terms)) + terms = list(flatten(terms)) except TypeError: # can't iterate so it must just be a constant or single variable if isinstance(terms.value, pd.core.generic.NDFrame): diff --git a/pandas/core/common.py b/pandas/core/common.py index 93e24dce8b623..bf4acf1fbf257 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -20,6 +20,10 @@ from pandas.api import types from pandas.types import common +# compat +from pandas.errors import ( # noqa + PerformanceWarning, UnsupportedFunctionCall, UnsortedIndexError) + # back-compat of public API # deprecate these functions m = sys.modules['pandas.core.common'] @@ -73,14 +77,6 @@ def array_equivalent(*args, **kwargs): return missing.array_equivalent(*args, **kwargs) -class PandasError(Exception): - pass - - -class PerformanceWarning(Warning): - pass - - class SettingWithCopyError(ValueError): pass @@ -89,24 +85,6 @@ class SettingWithCopyWarning(Warning): pass -class AmbiguousIndexError(PandasError, KeyError): - pass - - -class UnsupportedFunctionCall(ValueError): - pass - - -class UnsortedIndexError(KeyError): - """ Error raised when attempting to get a slice of a MultiIndex - and the index has not been lexsorted. Subclass of `KeyError`. - - .. versionadded:: 0.20.0 - - """ - pass - - class AbstractMethodError(NotImplementedError): """Raise this error instead of NotImplementedError for abstract methods while keeping compatibility with Python 2 and Python 3. diff --git a/pandas/core/frame.py b/pandas/core/frame.py index ffae22447cc65..237af0f85e866 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -56,7 +56,7 @@ is_named_tuple) from pandas.types.missing import isnull, notnull -from pandas.core.common import (PandasError, _try_sort, +from pandas.core.common import (_try_sort, _default_index, _values_from_object, _maybe_box_datetimelike, @@ -347,7 +347,7 @@ def __init__(self, data=None, index=None, columns=None, dtype=None, mgr = self._init_ndarray(values, index, columns, dtype=dtype, copy=False) else: - raise PandasError('DataFrame constructor not properly called!') + raise ValueError('DataFrame constructor not properly called!') NDFrame.__init__(self, mgr, fastpath=True) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 61a847ccf1523..9e22bdd5facc4 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1140,7 +1140,7 @@ def _convert_to_indexer(self, obj, axis=0, is_setter=False): ix[['foo', 'bar', 'baz']] -> [i, j, k] (indices of foo, bar, baz) Going by Zen of Python? - "In the face of ambiguity, refuse the temptation to guess." + 'In the face of ambiguity, refuse the temptation to guess.' raise AmbiguousIndexError with integer labels? - No, prefer label-based indexing """ diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 5dac8a7e4d2da..9e777fd94de66 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -21,8 +21,8 @@ from pandas.compat import bind_method import pandas.core.missing as missing -from pandas.core.common import (_values_from_object, _maybe_match_name, - PerformanceWarning) +from pandas.errors import PerformanceWarning +from pandas.core.common import _values_from_object, _maybe_match_name from pandas.types.missing import notnull, isnull from pandas.types.common import (needs_i8_conversion, is_datetimelike_v_numeric, diff --git a/pandas/core/panel.py b/pandas/core/panel.py index 5ab3c44b175fe..9e95023ccb359 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -21,7 +21,7 @@ from pandas import compat from pandas.compat import (map, zip, range, u, OrderedDict, OrderedDefaultdict) from pandas.compat.numpy import function as nv -from pandas.core.common import PandasError, _try_sort, _default_index +from pandas.core.common import _try_sort, _default_index from pandas.core.frame import DataFrame from pandas.core.generic import NDFrame, _shared_docs from pandas.core.index import (Index, MultiIndex, _ensure_index, @@ -174,7 +174,7 @@ def _init_data(self, data, copy, dtype, **kwargs): copy=False) copy = False else: # pragma: no cover - raise PandasError('Panel constructor not properly called!') + raise ValueError('Panel constructor not properly called!') NDFrame.__init__(self, mgr, axes=axes, copy=copy, dtype=dtype) @@ -1150,8 +1150,8 @@ def _construct_return_type(self, result, axes=None): return self._constructor_sliced( result, **self._extract_axes_for_slice(self, axes)) - raise PandasError('invalid _construct_return_type [self->%s] ' - '[result->%s]' % (self, result)) + raise ValueError('invalid _construct_return_type [self->%s] ' + '[result->%s]' % (self, result)) def _wrap_result(self, result, axis): axis = self._get_axis_name(axis) diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py new file mode 100644 index 0000000000000..f6719e7be421b --- /dev/null +++ b/pandas/errors/__init__.py @@ -0,0 +1,57 @@ +# flake8: noqa + +""" expose public exceptions & warnings """ + +from pandas._libs.tslib import OutOfBoundsDatetime + + +class PerformanceWarning(Warning): + """ + Warnings shown when there is a possible performance + impact. + """ + +class UnsupportedFunctionCall(ValueError): + """ + If attempting to call a numpy function on a pandas + object. For example using ``np.cumsum(groupby_object)``. + """ + +class UnsortedIndexError(KeyError): + """ + Error raised when attempting to get a slice of a MultiIndex + and the index has not been lexsorted. Subclass of `KeyError`. + + .. versionadded:: 0.20.0 + + """ + + +class ParserError(ValueError): + """ + Exception that is thrown by an error is encountered in `pd.read_csv` + """ + + +class DtypeWarning(Warning): + """ + Warning that is raised for a dtype incompatiblity. This is + can happen whenever `pd.read_csv` encounters non- + uniform dtypes in a column(s) of a given CSV file + """ + + +class EmptyDataError(ValueError): + """ + Exception that is thrown in `pd.read_csv` (by both the C and + Python engines) when empty data or header is encountered + """ + + +class ParserWarning(Warning): + """ + Warning that is raised in `pd.read_csv` whenever it is necessary + to change parsers (generally from 'c' to 'python') contrary to the + one specified by the user due to lack of support or functionality for + parsing particular attributes of a CSV file with the requsted engine + """ diff --git a/pandas/indexes/multi.py b/pandas/indexes/multi.py index e6ae0605d4758..f12b10ae682fa 100644 --- a/pandas/indexes/multi.py +++ b/pandas/indexes/multi.py @@ -19,12 +19,10 @@ is_list_like, is_scalar) from pandas.types.missing import isnull, array_equivalent +from pandas.errors import PerformanceWarning, UnsortedIndexError from pandas.core.common import (_values_from_object, is_bool_indexer, - is_null_slice, - PerformanceWarning, - UnsortedIndexError) - + is_null_slice) import pandas.core.base as base from pandas.util.decorators import (Appender, cache_readonly, diff --git a/pandas/io/common.py b/pandas/io/common.py index e42d218d7925f..8bc7217db87f9 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -12,6 +12,14 @@ from pandas.core.common import AbstractMethodError from pandas.types.common import is_number +# compat +from pandas.errors import (ParserError, DtypeWarning, # noqa + EmptyDataError, ParserWarning) + +# gh-12665: Alias for now and remove later. +CParserError = ParserError + + try: from s3fs import S3File need_text_wrapping = (BytesIO, S3File) @@ -69,43 +77,6 @@ def urlopen(*args, **kwargs): _VALID_URLS.discard('') -class ParserError(ValueError): - """ - Exception that is thrown by an error is encountered in `pd.read_csv` - """ - pass - - -# gh-12665: Alias for now and remove later. -CParserError = ParserError - - -class DtypeWarning(Warning): - """ - Warning that is raised whenever `pd.read_csv` encounters non- - uniform dtypes in a column(s) of a given CSV file - """ - pass - - -class EmptyDataError(ValueError): - """ - Exception that is thrown in `pd.read_csv` (by both the C and - Python engines) when empty data or header is encountered - """ - pass - - -class ParserWarning(Warning): - """ - Warning that is raised in `pd.read_csv` whenever it is necessary - to change parsers (generally from 'c' to 'python') contrary to the - one specified by the user due to lack of support or functionality for - parsing particular attributes of a CSV file with the requsted engine - """ - pass - - class BaseIterator(object): """Subclass this and provide a "__next__()" method to obtain an iterator. Useful only when the object being iterated is non-reusable (e.g. OK for a diff --git a/pandas/io/excel.py b/pandas/io/excel.py index d324855bc2f4d..6d136869fc73f 100644 --- a/pandas/io/excel.py +++ b/pandas/io/excel.py @@ -15,9 +15,9 @@ from pandas.core.frame import DataFrame from pandas.io.parsers import TextParser +from pandas.errors import EmptyDataError from pandas.io.common import (_is_url, _urlopen, _validate_header_arg, - EmptyDataError, get_filepath_or_buffer, - _NA_VALUES) + get_filepath_or_buffer, _NA_VALUES) from pandas.tseries.period import Period from pandas.io.json import libjson from pandas.compat import (map, zip, reduce, range, lrange, u, add_metaclass, diff --git a/pandas/io/html.py b/pandas/io/html.py index 8a3709dba2176..7b58e612de2df 100644 --- a/pandas/io/html.py +++ b/pandas/io/html.py @@ -13,7 +13,8 @@ import numpy as np from pandas.types.common import is_list_like -from pandas.io.common import (EmptyDataError, _is_url, urlopen, +from pandas.errors import EmptyDataError +from pandas.io.common import (_is_url, urlopen, parse_url, _validate_header_arg) from pandas.io.parsers import TextParser from pandas.compat import (lrange, lmap, u, string_types, iteritems, diff --git a/pandas/io/packers.py b/pandas/io/packers.py index 4662e8b635d3f..ca5a27ee5b68e 100644 --- a/pandas/io/packers.py +++ b/pandas/io/packers.py @@ -59,7 +59,7 @@ from pandas.sparse.api import SparseSeries, SparseDataFrame from pandas.sparse.array import BlockIndex, IntIndex from pandas.core.generic import NDFrame -from pandas.core.common import PerformanceWarning +from pandas.errors import PerformanceWarning from pandas.io.common import get_filepath_or_buffer from pandas.core.internals import BlockManager, make_block, _safe_reshape import pandas.core.internals as internals diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 0080ded1ac03d..b624d2cc0c7ad 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -29,10 +29,11 @@ from pandas.core import algorithms from pandas.core.common import AbstractMethodError from pandas.io.date_converters import generic_parser +from pandas.errors import ParserWarning, ParserError, EmptyDataError from pandas.io.common import (get_filepath_or_buffer, _validate_header_arg, _get_handle, UnicodeReader, UTF8Recoder, - BaseIterator, ParserError, EmptyDataError, - ParserWarning, _NA_VALUES, _infer_compression) + BaseIterator, + _NA_VALUES, _infer_compression) from pandas.tseries import tools from pandas.util.decorators import Appender diff --git a/pandas/io/parsers.pyx b/pandas/io/parsers.pyx index 3728cda559050..4053e726d0a04 100644 --- a/pandas/io/parsers.pyx +++ b/pandas/io/parsers.pyx @@ -13,12 +13,12 @@ from cpython cimport (PyObject, PyBytes_FromString, PyUnicode_Check, PyUnicode_AsUTF8String, PyErr_Occurred, PyErr_Fetch) from cpython.ref cimport PyObject, Py_XDECREF -from pandas.io.common import (ParserError, DtypeWarning, - EmptyDataError, ParserWarning) +from pandas.errors import (ParserError, DtypeWarning, + EmptyDataError, ParserWarning) # Import CParserError as alias of ParserError for backwards compatibility. # Ultimately, we want to remove this import. See gh-12665 and gh-14479. -from pandas.io.common import CParserError +CParserError = ParserError cdef extern from "Python.h": object PyUnicode_FromString(char *v) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index f75a4761e0948..9b525b76b0f17 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -32,7 +32,8 @@ from pandas.sparse.array import BlockIndex, IntIndex from pandas.core.base import StringMixin from pandas.formats.printing import adjoin, pprint_thing -from pandas.core.common import _asarray_tuplesafe, PerformanceWarning +from pandas.errors import PerformanceWarning +from pandas.core.common import _asarray_tuplesafe from pandas.core.algorithms import match, unique from pandas.core.categorical import Categorical, _factorize_from_iterables from pandas.core.internals import (BlockManager, make_block, diff --git a/pandas/lib.py b/pandas/lib.py index 6c26627a97de3..859a78060fcc1 100644 --- a/pandas/lib.py +++ b/pandas/lib.py @@ -2,6 +2,7 @@ import warnings warnings.warn("The pandas.lib module is deprecated and will be " - "removed in a future version. Please import from " - "the pandas._libs.lib instead", FutureWarning, stacklevel=2) + "removed in a future version. These are private functions " + "and can be accessed from pandas._libs.lib instead", + FutureWarning, stacklevel=2) from pandas._libs.lib import * diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index 73222c246fc70..7d1308d67668e 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -1,12 +1,9 @@ # -*- coding: utf-8 -*- from warnings import catch_warnings -import numpy as np import pandas as pd -from pandas.core import common as com from pandas import api -from pandas.api import types from pandas.util import testing as tm @@ -33,7 +30,7 @@ class TestPDApi(Base, tm.TestCase): # top-level sub-packages lib = ['api', 'compat', 'computation', 'core', - 'indexes', 'formats', 'pandas', + 'indexes', 'formats', 'errors', 'pandas', 'test', 'tools', 'tseries', 'sparse', 'types', 'util', 'options', 'io'] @@ -129,80 +126,6 @@ def test_api(self): self.check(api, self.allowed) -class TestTypes(Base, tm.TestCase): - - allowed = ['is_any_int_dtype', 'is_bool', 'is_bool_dtype', - 'is_categorical', 'is_categorical_dtype', 'is_complex', - 'is_complex_dtype', 'is_datetime64_any_dtype', - 'is_datetime64_dtype', 'is_datetime64_ns_dtype', - 'is_datetime64tz_dtype', 'is_datetimetz', 'is_dtype_equal', - 'is_extension_type', 'is_float', 'is_float_dtype', - 'is_floating_dtype', 'is_int64_dtype', 'is_integer', - 'is_integer_dtype', 'is_number', 'is_numeric_dtype', - 'is_object_dtype', 'is_scalar', 'is_sparse', - 'is_string_dtype', 'is_signed_integer_dtype', - 'is_timedelta64_dtype', 'is_timedelta64_ns_dtype', - 'is_unsigned_integer_dtype', 'is_period', - 'is_period_dtype', 'is_re', 'is_re_compilable', - 'is_dict_like', 'is_iterator', - 'is_list_like', 'is_hashable', - 'is_named_tuple', 'is_sequence', - 'pandas_dtype'] - - def test_types(self): - - self.check(types, self.allowed) - - def check_deprecation(self, fold, fnew): - with tm.assert_produces_warning(DeprecationWarning): - try: - result = fold('foo') - expected = fnew('foo') - self.assertEqual(result, expected) - except TypeError: - self.assertRaises(TypeError, - lambda: fnew('foo')) - except AttributeError: - self.assertRaises(AttributeError, - lambda: fnew('foo')) - - def test_deprecation_core_common(self): - - # test that we are in fact deprecating - # the pandas.core.common introspectors - for t in self.allowed: - self.check_deprecation(getattr(com, t), getattr(types, t)) - - def test_deprecation_core_common_array_equivalent(self): - - with tm.assert_produces_warning(DeprecationWarning): - com.array_equivalent(np.array([1, 2]), np.array([1, 2])) - - def test_deprecation_core_common_moved(self): - - # these are in pandas.types.common - l = ['is_datetime_arraylike', - 'is_datetime_or_timedelta_dtype', - 'is_datetimelike', - 'is_datetimelike_v_numeric', - 'is_datetimelike_v_object', - 'is_datetimetz', - 'is_int_or_datetime_dtype', - 'is_period_arraylike', - 'is_string_like', - 'is_string_like_dtype'] - - from pandas.types import common as c - for t in l: - self.check_deprecation(getattr(com, t), getattr(c, t)) - - def test_removed_from_core_common(self): - - for t in ['is_null_datelike_scalar', - 'ensure_float']: - self.assertRaises(AttributeError, lambda: getattr(com, t)) - - class TestDatetoolsDeprecation(tm.TestCase): def test_deprecation_access_func(self): @@ -264,11 +187,11 @@ class TestLib(tm.TestCase): def test_deprecation_access_func(self): with catch_warnings(record=True): - pd.lib.infer_dtype + pd.lib.infer_dtype('foo') class TestTSLib(tm.TestCase): def test_deprecation_access_func(self): with catch_warnings(record=True): - pd.tslib.Timestamp + pd.tslib.Timestamp('20160101') diff --git a/pandas/tests/api/test_lib.py b/pandas/tests/api/test_lib.py new file mode 100644 index 0000000000000..db2c68c6197d7 --- /dev/null +++ b/pandas/tests/api/test_lib.py @@ -0,0 +1,10 @@ +# -*- coding: utf-8 -*- + +from warnings import catch_warnings +import pandas # noqa + + +def test_moved_infer_dtype(): + with catch_warnings(record=True): + e = pandas.lib.infer_dtype('foo') + assert e is not None diff --git a/pandas/tests/api/test_types.py b/pandas/tests/api/test_types.py new file mode 100644 index 0000000000000..686de4a196034 --- /dev/null +++ b/pandas/tests/api/test_types.py @@ -0,0 +1,83 @@ +# -*- coding: utf-8 -*- + +import numpy as np + +from pandas.core import common as com +from pandas.api import types +from pandas.util import testing as tm + +from .test_api import Base + + +class TestTypes(Base, tm.TestCase): + + allowed = ['is_any_int_dtype', 'is_bool', 'is_bool_dtype', + 'is_categorical', 'is_categorical_dtype', 'is_complex', + 'is_complex_dtype', 'is_datetime64_any_dtype', + 'is_datetime64_dtype', 'is_datetime64_ns_dtype', + 'is_datetime64tz_dtype', 'is_datetimetz', 'is_dtype_equal', + 'is_extension_type', 'is_float', 'is_float_dtype', + 'is_floating_dtype', 'is_int64_dtype', 'is_integer', + 'is_integer_dtype', 'is_number', 'is_numeric_dtype', + 'is_object_dtype', 'is_scalar', 'is_sparse', + 'is_string_dtype', 'is_signed_integer_dtype', + 'is_timedelta64_dtype', 'is_timedelta64_ns_dtype', + 'is_unsigned_integer_dtype', 'is_period', + 'is_period_dtype', 'is_re', 'is_re_compilable', + 'is_dict_like', 'is_iterator', + 'is_list_like', 'is_hashable', + 'is_named_tuple', 'is_sequence', + 'pandas_dtype'] + + def test_types(self): + + self.check(types, self.allowed) + + def check_deprecation(self, fold, fnew): + with tm.assert_produces_warning(DeprecationWarning): + try: + result = fold('foo') + expected = fnew('foo') + self.assertEqual(result, expected) + except TypeError: + self.assertRaises(TypeError, + lambda: fnew('foo')) + except AttributeError: + self.assertRaises(AttributeError, + lambda: fnew('foo')) + + def test_deprecation_core_common(self): + + # test that we are in fact deprecating + # the pandas.core.common introspectors + for t in self.allowed: + self.check_deprecation(getattr(com, t), getattr(types, t)) + + def test_deprecation_core_common_array_equivalent(self): + + with tm.assert_produces_warning(DeprecationWarning): + com.array_equivalent(np.array([1, 2]), np.array([1, 2])) + + def test_deprecation_core_common_moved(self): + + # these are in pandas.types.common + l = ['is_datetime_arraylike', + 'is_datetime_or_timedelta_dtype', + 'is_datetimelike', + 'is_datetimelike_v_numeric', + 'is_datetimelike_v_object', + 'is_datetimetz', + 'is_int_or_datetime_dtype', + 'is_period_arraylike', + 'is_string_like', + 'is_string_like_dtype'] + + from pandas.types import common as c + for t in l: + self.check_deprecation(getattr(com, t), getattr(c, t)) + + def test_removed_from_core_common(self): + + for t in ['is_null_datelike_scalar', + 'ensure_float']: + self.assertRaises(AttributeError, lambda: getattr(com, t)) diff --git a/pandas/tests/computation/test_eval.py b/pandas/tests/computation/test_eval.py index 81e9b7c77a81b..97ed88b1dc22b 100644 --- a/pandas/tests/computation/test_eval.py +++ b/pandas/tests/computation/test_eval.py @@ -10,6 +10,7 @@ from pandas.types.common import is_list_like, is_scalar import pandas as pd from pandas.core import common as com +from pandas.errors import PerformanceWarning from pandas import DataFrame, Series, Panel, date_range from pandas.util.testing import makeCustomDataframe as mkdf @@ -1023,7 +1024,7 @@ def test_performance_warning_for_poor_alignment(self, engine, parser): df = DataFrame(randn(1000, 10)) s = Series(randn(10000)) if engine == 'numexpr': - seen = pd.core.common.PerformanceWarning + seen = PerformanceWarning else: seen = False @@ -1045,7 +1046,7 @@ def test_performance_warning_for_poor_alignment(self, engine, parser): is_python_engine = engine == 'python' if not is_python_engine: - wrn = pd.core.common.PerformanceWarning + wrn = PerformanceWarning else: wrn = False diff --git a/pandas/tests/frame/test_axis_select_reindex.py b/pandas/tests/frame/test_axis_select_reindex.py index 839ceb5368240..7ed2bfb601eb8 100644 --- a/pandas/tests/frame/test_axis_select_reindex.py +++ b/pandas/tests/frame/test_axis_select_reindex.py @@ -16,7 +16,7 @@ assert_frame_equal, assertRaisesRegexp) -from pandas.core.common import PerformanceWarning +from pandas.errors import PerformanceWarning import pandas.util.testing as tm from pandas.tests.frame.common import TestData diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index ba7e45d7e66fb..1ab292649a973 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -20,9 +20,7 @@ from pandas import (DataFrame, Index, Series, isnull, MultiIndex, Timedelta, Timestamp, date_range) -from pandas.core.common import PandasError import pandas as pd -import pandas.core.common as com import pandas._libs.lib as lib import pandas.util.testing as tm @@ -774,7 +772,7 @@ def test_constructor_more(self): # corner, silly # TODO: Fix this Exception to be better... - with tm.assertRaisesRegexp(PandasError, 'constructor not ' + with tm.assertRaisesRegexp(ValueError, 'constructor not ' 'properly called'): DataFrame((1, 2, 3)) @@ -1242,8 +1240,8 @@ def test_constructor_single_value(self): dtype=object), index=[1, 2], columns=['a', 'c'])) - self.assertRaises(com.PandasError, DataFrame, 'a', [1, 2]) - self.assertRaises(com.PandasError, DataFrame, 'a', columns=['a', 'c']) + self.assertRaises(ValueError, DataFrame, 'a', [1, 2]) + self.assertRaises(ValueError, DataFrame, 'a', columns=['a', 'c']) with tm.assertRaisesRegexp(TypeError, 'incompatible data and dtype'): DataFrame('a', [1, 2], ['a', 'c'], float) diff --git a/pandas/tests/frame/test_to_csv.py b/pandas/tests/frame/test_to_csv.py index e49dfffc48803..927b9f6a48718 100644 --- a/pandas/tests/frame/test_to_csv.py +++ b/pandas/tests/frame/test_to_csv.py @@ -8,7 +8,7 @@ import numpy as np from pandas.compat import (lmap, range, lrange, StringIO, u) -from pandas.io.common import ParserError +from pandas.errors import ParserError from pandas import (DataFrame, Index, Series, MultiIndex, Timestamp, date_range, read_csv, compat, to_datetime) import pandas as pd diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 83502434e6053..c17c98c5448be 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -8,7 +8,7 @@ from pandas import (date_range, bdate_range, Timestamp, isnull, Index, MultiIndex, DataFrame, Series, concat, Panel) -from pandas.core.common import UnsupportedFunctionCall +from pandas.errors import UnsupportedFunctionCall, PerformanceWarning from pandas.util.testing import (assert_panel_equal, assert_frame_equal, assert_series_equal, assert_almost_equal, assert_index_equal, assertRaisesRegexp) @@ -3475,7 +3475,7 @@ def test_groupby_multiindex_not_lexsorted(self): tm.assert_frame_equal(lexsorted_df, not_lexsorted_df) expected = lexsorted_df.groupby('a').mean() - with tm.assert_produces_warning(com.PerformanceWarning): + with tm.assert_produces_warning(PerformanceWarning): result = not_lexsorted_df.groupby('a').mean() tm.assert_frame_equal(expected, result) diff --git a/pandas/tests/indexes/datetimes/test_ops.py b/pandas/tests/indexes/datetimes/test_ops.py index 4681879d708c4..4be9999982f12 100644 --- a/pandas/tests/indexes/datetimes/test_ops.py +++ b/pandas/tests/indexes/datetimes/test_ops.py @@ -7,7 +7,7 @@ import pandas as pd import pandas._libs.tslib as tslib import pandas.util.testing as tm -from pandas.core.common import PerformanceWarning +from pandas.errors import PerformanceWarning from pandas.tseries.index import cdate_range from pandas import (DatetimeIndex, PeriodIndex, Series, Timestamp, Timedelta, date_range, TimedeltaIndex, _np_version_under1p10, Index, diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index 0c274b2f6c4ff..470526043234f 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -15,7 +15,7 @@ from pandas import (CategoricalIndex, DataFrame, Index, MultiIndex, compat, date_range, period_range) from pandas.compat import PY3, long, lrange, lzip, range, u -from pandas.core.common import PerformanceWarning, UnsortedIndexError +from pandas.errors import PerformanceWarning, UnsortedIndexError from pandas.indexes.base import InvalidIndexError from pandas._libs import lib from pandas._libs.lib import Timestamp diff --git a/pandas/tests/indexing/test_ix.py b/pandas/tests/indexing/test_ix.py index e68e8015a2f39..b12d1eb97f88b 100644 --- a/pandas/tests/indexing/test_ix.py +++ b/pandas/tests/indexing/test_ix.py @@ -9,7 +9,7 @@ from pandas.compat import lrange from pandas import Series, DataFrame, option_context, MultiIndex from pandas.util import testing as tm -from pandas.core.common import PerformanceWarning +from pandas.errors import PerformanceWarning class TestIX(tm.TestCase): diff --git a/pandas/tests/indexing/test_multiindex.py b/pandas/tests/indexing/test_multiindex.py index ed943202872a7..1fc0a87764b94 100644 --- a/pandas/tests/indexing/test_multiindex.py +++ b/pandas/tests/indexing/test_multiindex.py @@ -5,7 +5,7 @@ from pandas import (Panel, Series, MultiIndex, DataFrame, Timestamp, Index, date_range) from pandas.util import testing as tm -from pandas.core.common import PerformanceWarning, UnsortedIndexError +from pandas.errors import PerformanceWarning, UnsortedIndexError from pandas.tests.indexing.common import _mklbl diff --git a/pandas/tests/io/parser/common.py b/pandas/tests/io/parser/common.py index 2c8bca490f274..7faf485b65d10 100644 --- a/pandas/tests/io/parser/common.py +++ b/pandas/tests/io/parser/common.py @@ -19,7 +19,8 @@ from pandas import compat from pandas.compat import (StringIO, BytesIO, PY3, range, lrange, u) -from pandas.io.common import DtypeWarning, EmptyDataError, URLError +from pandas.errors import DtypeWarning, EmptyDataError +from pandas.io.common import URLError from pandas.io.parsers import TextFileReader, TextParser diff --git a/pandas/tests/io/parser/dialect.py b/pandas/tests/io/parser/dialect.py index ee50cf812f72e..82871628e54d6 100644 --- a/pandas/tests/io/parser/dialect.py +++ b/pandas/tests/io/parser/dialect.py @@ -9,7 +9,7 @@ from pandas import DataFrame from pandas.compat import StringIO -from pandas.io.common import ParserWarning +from pandas.errors import ParserWarning import pandas.util.testing as tm diff --git a/pandas/tests/io/parser/dtypes.py b/pandas/tests/io/parser/dtypes.py index fa95c18c4d7a9..8066718363803 100644 --- a/pandas/tests/io/parser/dtypes.py +++ b/pandas/tests/io/parser/dtypes.py @@ -12,7 +12,7 @@ from pandas import DataFrame, Series, Index, MultiIndex, Categorical from pandas.compat import StringIO from pandas.types.dtypes import CategoricalDtype -from pandas.io.common import ParserWarning +from pandas.errors import ParserWarning class DtypeTests(object): diff --git a/pandas/tests/io/parser/skiprows.py b/pandas/tests/io/parser/skiprows.py index c53e6a1579267..cb1b656e42be2 100644 --- a/pandas/tests/io/parser/skiprows.py +++ b/pandas/tests/io/parser/skiprows.py @@ -12,7 +12,7 @@ import pandas.util.testing as tm from pandas import DataFrame -from pandas.io.common import EmptyDataError +from pandas.errors import EmptyDataError from pandas.compat import StringIO, range, lrange diff --git a/pandas/tests/io/parser/test_unsupported.py b/pandas/tests/io/parser/test_unsupported.py index 48dd5d4ba506b..14146a3ad1e9a 100644 --- a/pandas/tests/io/parser/test_unsupported.py +++ b/pandas/tests/io/parser/test_unsupported.py @@ -13,7 +13,7 @@ import pandas.util.testing as tm from pandas.compat import StringIO -from pandas.io.common import ParserError +from pandas.errors import ParserError from pandas.io.parsers import read_csv, read_table diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py index 3c980cae3351a..c08d235b07c9e 100644 --- a/pandas/tests/io/test_common.py +++ b/pandas/tests/io/test_common.py @@ -11,7 +11,6 @@ from pandas.compat import is_platform_windows, StringIO from pandas import read_csv, concat -import pandas as pd try: from pathlib import Path @@ -89,23 +88,6 @@ def test_iterator(self): tm.assert_frame_equal(first, expected.iloc[[0]]) tm.assert_frame_equal(concat(it), expected.iloc[1:]) - def test_error_rename(self): - # see gh-12665 - try: - raise common.CParserError() - except common.ParserError: - pass - - try: - raise common.ParserError() - except common.CParserError: - pass - - try: - raise common.ParserError() - except pd.parser.CParserError: - pass - class TestMMapWrapper(tm.TestCase): diff --git a/pandas/tests/io/test_packers.py b/pandas/tests/io/test_packers.py index efa8587d64657..1b6b0fc62f913 100644 --- a/pandas/tests/io/test_packers.py +++ b/pandas/tests/io/test_packers.py @@ -10,7 +10,7 @@ from pandas.compat import u, PY3 from pandas import (Series, DataFrame, Panel, MultiIndex, bdate_range, date_range, period_range, Index, Categorical) -from pandas.core.common import PerformanceWarning +from pandas.errors import PerformanceWarning from pandas.io.packers import to_msgpack, read_msgpack import pandas.util.testing as tm from pandas.util.testing import (ensure_clean, diff --git a/pandas/tests/test_errors.py b/pandas/tests/test_errors.py new file mode 100644 index 0000000000000..aabce7ecb7066 --- /dev/null +++ b/pandas/tests/test_errors.py @@ -0,0 +1,50 @@ +# -*- coding: utf-8 -*- + +import pytest +import pandas # noqa +import pandas as pd + + +@pytest.mark.parametrize( + "exc", ['UnsupportedFunctionCall', 'UnsortedIndexError', + 'OutOfBoundsDatetime', + 'ParserError', 'PerformanceWarning', 'DtypeWarning', + 'EmptyDataError', 'ParserWarning']) +def test_exception_importable(exc): + from pandas import errors + e = getattr(errors, exc) + assert e is not None + + # check that we can raise on them + with pytest.raises(e): + raise e() + + +def test_catch_oob(): + from pandas import errors + + try: + pd.Timestamp('15000101') + except errors.OutOfBoundsDatetime: + pass + + +def test_error_rename(): + # see gh-12665 + from pandas.errors import ParserError + from pandas.io.common import CParserError + + try: + raise CParserError() + except ParserError: + pass + + try: + raise ParserError() + except CParserError: + pass + + try: + raise ParserError() + except pd.parser.CParserError: + pass diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index fe03d7886e661..ceb12c6c03074 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -16,7 +16,7 @@ import pandas.core.window as rwindow import pandas.tseries.offsets as offsets from pandas.core.base import SpecificationError -from pandas.core.common import UnsupportedFunctionCall +from pandas.errors import UnsupportedFunctionCall import pandas.util.testing as tm from pandas.compat import range, zip, PY3 diff --git a/pandas/tests/tseries/test_resample.py b/pandas/tests/tseries/test_resample.py index 57a655b0b7610..57e5a1631f8e8 100755 --- a/pandas/tests/tseries/test_resample.py +++ b/pandas/tests/tseries/test_resample.py @@ -14,7 +14,7 @@ from pandas.types.generic import ABCSeries, ABCDataFrame from pandas.compat import range, lrange, zip, product, OrderedDict from pandas.core.base import SpecificationError -from pandas.core.common import UnsupportedFunctionCall +from pandas.errors import UnsupportedFunctionCall from pandas.core.groupby import DataError from pandas.tseries.frequencies import MONTHS, DAYS from pandas.tseries.frequencies import to_offset diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py index 9123131a6dccf..8fa842a836051 100644 --- a/pandas/tseries/index.py +++ b/pandas/tseries/index.py @@ -25,8 +25,8 @@ from pandas.types.missing import isnull import pandas.types.concat as _concat -from pandas.core.common import (_values_from_object, _maybe_box, - PerformanceWarning) +from pandas.errors import PerformanceWarning +from pandas.core.common import _values_from_object, _maybe_box from pandas.core.index import Index, Int64Index, Float64Index from pandas.indexes.base import _index_shared_docs @@ -618,8 +618,7 @@ def _has_same_tz(self, other): def _cached_range(cls, start=None, end=None, periods=None, offset=None, name=None): if start is None and end is None: - # I somewhat believe this should never be raised externally and - # therefore should be a `PandasError` but whatever... + # I somewhat believe this should never be raised externally raise TypeError('Must specify either start or end.') if start is not None: start = Timestamp(start) @@ -630,8 +629,7 @@ def _cached_range(cls, start=None, end=None, periods=None, offset=None, 'Must either specify period or provide both start and end.') if offset is None: - # This can't happen with external-facing code, therefore - # PandasError + # This can't happen with external-facing code raise TypeError('Must provide offset.') drc = _daterange_cache diff --git a/pandas/tslib.py b/pandas/tslib.py index 3ecbffa20700d..3d96dc496c0de 100644 --- a/pandas/tslib.py +++ b/pandas/tslib.py @@ -3,6 +3,6 @@ import warnings warnings.warn("The pandas.tslib module is deprecated and will be " "removed in a future version. Please import from " - "the pandas._libs.tslib instead", FutureWarning, stacklevel=2) + "the pandas or pandas.errors instead", FutureWarning, stacklevel=2) from pandas._libs.tslib import (Timestamp, Timedelta, NaT, OutOfBoundsDatetime) diff --git a/pandas/util/depr_module.py b/pandas/util/depr_module.py index af7faf9dd96c8..0885c81ce2757 100644 --- a/pandas/util/depr_module.py +++ b/pandas/util/depr_module.py @@ -18,14 +18,19 @@ class _DeprecatedModule(object): be used when needed. removals : objects or methods in module that will no longer be accessible once module is removed. + moved : dict, optional + dictionary of function name -> new location for moved + objects """ - def __init__(self, deprmod, deprmodto=None, removals=None): + def __init__(self, deprmod, deprmodto=None, removals=None, + moved=None): self.deprmod = deprmod self.deprmodto = deprmodto self.removals = removals if self.removals is not None: self.removals = frozenset(self.removals) + self.moved = moved # For introspection purposes. self.self_dir = frozenset(dir(self.__class__)) @@ -60,6 +65,14 @@ def __getattr__(self, name): "{deprmod}.{name} is deprecated and will be removed in " "a future version.".format(deprmod=self.deprmod, name=name), FutureWarning, stacklevel=2) + elif self.moved is not None and name in self.moved: + warnings.warn( + "{deprmod} is deprecated and will be removed in " + "a future version.\nYou can access {name} in {moved}".format( + deprmod=self.deprmod, + name=name, + moved=self.moved[name]), + FutureWarning, stacklevel=2) else: deprmodto = self.deprmodto if deprmodto is None: diff --git a/setup.py b/setup.py index 1b471f76ac5e6..96b25f7427370 100755 --- a/setup.py +++ b/setup.py @@ -631,11 +631,13 @@ def pxd(name): packages=['pandas', 'pandas.api', 'pandas.api.types', + 'pandas.api.lib', 'pandas.compat', 'pandas.compat.numpy', 'pandas.computation', 'pandas.core', 'pandas.indexes', + 'pandas.errors', 'pandas.io', 'pandas.io.json', 'pandas.io.sas', From faf6401dd41c1469be50e360cba071555205e219 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 4 Apr 2017 14:29:38 +0200 Subject: [PATCH 331/933] DOC fixes in contributing.rst (#15887) --- doc/source/contributing.rst | 14 +++++++------- pandas/core/series.py | 2 +- pandas/io/json/normalize.py | 8 ++++---- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/doc/source/contributing.rst b/doc/source/contributing.rst index 467d6456d60cd..8af7de688a2ae 100644 --- a/doc/source/contributing.rst +++ b/doc/source/contributing.rst @@ -536,10 +536,10 @@ signatures and add deprecation warnings where needed. .. _contributing.ci: -Testing Thru Continuous Integration +Testing With Continuous Integration ----------------------------------- -The *pandas* testing suite will run automatically on `Travis-CI `__, +The *pandas* test suite will run automatically on `Travis-CI `__, `Appveyor `__, and `Circle CI `__ continuous integration services, once your pull request is submitted. However, if you wish to run the test suite on a branch prior to submitting the pull request, @@ -548,14 +548,14 @@ for `Travis-CI `__, `Appveyor `__ , and `CircleCI `__. A pull-request will be considered for merging when you have an all 'green' build. If any tests are failing, -then you will get a red 'X', where you can click thru to see the individual failed tests. +then you will get a red 'X', where you can click through to see the individual failed tests. This is an example of a green build. .. image:: _static/ci.png .. note:: - Each time you push to *your* fork, a *new* run of the tests will trigger on the CI. Appveyor will auto-cancel + Each time you push to *your* fork, a *new* run of the tests will be triggered on the CI. Appveyor will auto-cancel any non-currently-running tests for that same pull-request. You can enable the auto-cancel feature for `Travis-CI here `__ and for `CircleCI here `__. @@ -623,12 +623,12 @@ testing of many cases in a concise way that enables an easy-to-read syntax. .. note:: - .. code-block:: python + *pandas* existing test structure is *mostly* classed based, meaning that you will typically find tests wrapped in a class, inheriting from ``tm.TestCase``. - *pandas* existing test structure is *mostly* classed based, meaning that you will typically find tests wrapped in a class, inheriting from ``tm.TestCase``. + .. code-block:: python class TestReallyCoolFeature(tm.TestCase): - .... + .... Going forward we are moving to a more *functional* style, please see below. diff --git a/pandas/core/series.py b/pandas/core/series.py index bcd58ea791083..1aaa106d2c68f 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -80,7 +80,7 @@ If True, performs operation inplace and returns None.""", unique='np.ndarray', duplicated='Series', optional_by='', - versionadded_to_excel='\n.. versionadded:: 0.20.0\n') + versionadded_to_excel='\n .. versionadded:: 0.20.0\n') def _coerce_method(converter): diff --git a/pandas/io/json/normalize.py b/pandas/io/json/normalize.py index 518e0bc2064e2..401d8d9ead2b8 100644 --- a/pandas/io/json/normalize.py +++ b/pandas/io/json/normalize.py @@ -114,10 +114,10 @@ def json_normalize(data, record_path=None, meta=None, meta_prefix : string, default None errors : {'raise', 'ignore'}, default 'raise' - * ignore : will ignore KeyError if keys listed in meta are not - always present - * raise : will raise KeyError if keys listed in meta are not - always present + * 'ignore' : will ignore KeyError if keys listed in meta are not + always present + * 'raise' : will raise KeyError if keys listed in meta are not + always present .. versionadded:: 0.20.0 From e50d397efe31404802c55637793ea97850ac4e84 Mon Sep 17 00:00:00 2001 From: Aleksey Bilogur Date: Tue, 4 Apr 2017 13:34:11 -0400 Subject: [PATCH 332/933] API: add top-level melt function as method to DataFrame xref #12640 xref #14876 Author: Aleksey Bilogur Closes #15521 from ResidentMario/12640 and squashes the following commits: 1657246 [Aleksey Bilogur] two doc changes 28a38f2 [Aleksey Bilogur] tweak whatsnew entry. 5f306a9 [Aleksey Bilogur] +whatsnew ff895fe [Aleksey Bilogur] Add tests, update docs. 11f3fe4 [Aleksey Bilogur] rm stray debug. 3cbbed5 [Aleksey Bilogur] Melt docstring. d54dc2f [Aleksey Bilogur] +pd.DataFrame.melt. --- doc/source/api.rst | 1 + doc/source/reshaping.rst | 11 ++-- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/core/frame.py | 104 ++++++++++++++++++++++++++++++++ pandas/core/reshape.py | 96 ++--------------------------- pandas/tests/test_reshape.py | 102 +++++++++++++++++++------------ 6 files changed, 182 insertions(+), 133 deletions(-) diff --git a/doc/source/api.rst b/doc/source/api.rst index dfeaf8e60feb1..24bad7d515305 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -933,6 +933,7 @@ Reshaping, sorting, transposing DataFrame.swaplevel DataFrame.stack DataFrame.unstack + DataFrame.melt DataFrame.T DataFrame.to_panel DataFrame.to_xarray diff --git a/doc/source/reshaping.rst b/doc/source/reshaping.rst index eccaa9474bf6d..2c5aae133d4d9 100644 --- a/doc/source/reshaping.rst +++ b/doc/source/reshaping.rst @@ -265,8 +265,8 @@ the right thing: Reshaping by Melt ----------------- -The :func:`~pandas.melt` function is useful to massage a -DataFrame into a format where one or more columns are identifier variables, +The top-level :func:``melt` and :func:`~DataFrame.melt` functions are useful to +massage a DataFrame into a format where one or more columns are identifier variables, while all other columns, considered measured variables, are "unpivoted" to the row axis, leaving just two non-identifier columns, "variable" and "value". The names of those columns can be customized by supplying the ``var_name`` and @@ -281,10 +281,11 @@ For instance, 'height' : [5.5, 6.0], 'weight' : [130, 150]}) cheese - pd.melt(cheese, id_vars=['first', 'last']) - pd.melt(cheese, id_vars=['first', 'last'], var_name='quantity') + cheese.melt(id_vars=['first', 'last']) + cheese.melt(id_vars=['first', 'last'], var_name='quantity') -Another way to transform is to use the ``wide_to_long`` panel data convenience function. +Another way to transform is to use the ``wide_to_long`` panel data convenience +function. .. ipython:: python diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 74fe7916523c5..355dceba1b953 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -324,6 +324,7 @@ Other Enhancements - ``Series.sort_index`` accepts parameters ``kind`` and ``na_position`` (:issue:`13589`, :issue:`14444`) - ``DataFrame`` has gained a ``nunique()`` method to count the distinct values over an axis (:issue:`14336`). +- ``DataFrame`` has gained a ``melt()`` method, equivalent to ``pd.melt()``, for unpivoting from a wide to long format (:issue:`12640`). - ``DataFrame.groupby()`` has gained a ``.nunique()`` method to count the distinct values for all columns within each group (:issue:`14336`, :issue:`15197`). - ``pd.read_excel()`` now preserves sheet order when using ``sheetname=None`` (:issue:`9930`) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 237af0f85e866..3980bf6cdbc09 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4051,6 +4051,110 @@ def unstack(self, level=-1, fill_value=None): from pandas.core.reshape import unstack return unstack(self, level, fill_value) + _shared_docs['melt'] = (""" + "Unpivots" a DataFrame from wide format to long format, optionally + leaving identifier variables set. + + This function is useful to massage a DataFrame into a format where one + or more columns are identifier variables (`id_vars`), while all other + columns, considered measured variables (`value_vars`), are "unpivoted" to + the row axis, leaving just two non-identifier columns, 'variable' and + 'value'. + + %(versionadded)s + Parameters + ---------- + frame : DataFrame + id_vars : tuple, list, or ndarray, optional + Column(s) to use as identifier variables. + value_vars : tuple, list, or ndarray, optional + Column(s) to unpivot. If not specified, uses all columns that + are not set as `id_vars`. + var_name : scalar + Name to use for the 'variable' column. If None it uses + ``frame.columns.name`` or 'variable'. + value_name : scalar, default 'value' + Name to use for the 'value' column. + col_level : int or string, optional + If columns are a MultiIndex then use this level to melt. + + See also + -------- + %(other)s + pivot_table + DataFrame.pivot + + Examples + -------- + >>> import pandas as pd + >>> df = pd.DataFrame({'A': {0: 'a', 1: 'b', 2: 'c'}, + ... 'B': {0: 1, 1: 3, 2: 5}, + ... 'C': {0: 2, 1: 4, 2: 6}}) + >>> df + A B C + 0 a 1 2 + 1 b 3 4 + 2 c 5 6 + + >>> %(caller)sid_vars=['A'], value_vars=['B']) + A variable value + 0 a B 1 + 1 b B 3 + 2 c B 5 + + >>> %(caller)sid_vars=['A'], value_vars=['B', 'C']) + A variable value + 0 a B 1 + 1 b B 3 + 2 c B 5 + 3 a C 2 + 4 b C 4 + 5 c C 6 + + The names of 'variable' and 'value' columns can be customized: + + >>> %(caller)sid_vars=['A'], value_vars=['B'], + ... var_name='myVarname', value_name='myValname') + A myVarname myValname + 0 a B 1 + 1 b B 3 + 2 c B 5 + + If you have multi-index columns: + + >>> df.columns = [list('ABC'), list('DEF')] + >>> df + A B C + D E F + 0 a 1 2 + 1 b 3 4 + 2 c 5 6 + + >>> %(caller)scol_level=0, id_vars=['A'], value_vars=['B']) + A variable value + 0 a B 1 + 1 b B 3 + 2 c B 5 + + >>> %(caller)sid_vars=[('A', 'D')], value_vars=[('B', 'E')]) + (A, D) variable_0 variable_1 value + 0 a B E 1 + 1 b B E 3 + 2 c B E 5 + + """) + + @Appender(_shared_docs['melt'] % + dict(caller='df.melt(', + versionadded='.. versionadded:: 0.20.0\n', + other='melt')) + def melt(self, id_vars=None, value_vars=None, var_name=None, + value_name='value', col_level=None): + from pandas.core.reshape import melt + return melt(self, id_vars=id_vars, value_vars=value_vars, + var_name=var_name, value_name=value_name, + col_level=col_level) + # ---------------------------------------------------------------------- # Time series-related diff --git a/pandas/core/reshape.py b/pandas/core/reshape.py index 2822d98b7c906..c7e06d63fbda9 100644 --- a/pandas/core/reshape.py +++ b/pandas/core/reshape.py @@ -28,6 +28,8 @@ import pandas.core.algorithms as algos from pandas._libs import algos as _algos, reshape as _reshape +from pandas.core.frame import _shared_docs +from pandas.util.decorators import Appender from pandas.core.index import MultiIndex, _get_na_value @@ -701,98 +703,12 @@ def _convert_level_number(level_num, columns): return result +@Appender(_shared_docs['melt'] % + dict(caller='pd.melt(df, ', + versionadded="", + other='DataFrame.melt')) def melt(frame, id_vars=None, value_vars=None, var_name=None, value_name='value', col_level=None): - """ - "Unpivots" a DataFrame from wide format to long format, optionally leaving - identifier variables set. - - This function is useful to massage a DataFrame into a format where one - or more columns are identifier variables (`id_vars`), while all other - columns, considered measured variables (`value_vars`), are "unpivoted" to - the row axis, leaving just two non-identifier columns, 'variable' and - 'value'. - - Parameters - ---------- - frame : DataFrame - id_vars : tuple, list, or ndarray, optional - Column(s) to use as identifier variables. - value_vars : tuple, list, or ndarray, optional - Column(s) to unpivot. If not specified, uses all columns that - are not set as `id_vars`. - var_name : scalar - Name to use for the 'variable' column. If None it uses - ``frame.columns.name`` or 'variable'. - value_name : scalar, default 'value' - Name to use for the 'value' column. - col_level : int or string, optional - If columns are a MultiIndex then use this level to melt. - - See also - -------- - pivot_table - DataFrame.pivot - - Examples - -------- - >>> import pandas as pd - >>> df = pd.DataFrame({'A': {0: 'a', 1: 'b', 2: 'c'}, - ... 'B': {0: 1, 1: 3, 2: 5}, - ... 'C': {0: 2, 1: 4, 2: 6}}) - >>> df - A B C - 0 a 1 2 - 1 b 3 4 - 2 c 5 6 - - >>> pd.melt(df, id_vars=['A'], value_vars=['B']) - A variable value - 0 a B 1 - 1 b B 3 - 2 c B 5 - - >>> pd.melt(df, id_vars=['A'], value_vars=['B', 'C']) - A variable value - 0 a B 1 - 1 b B 3 - 2 c B 5 - 3 a C 2 - 4 b C 4 - 5 c C 6 - - The names of 'variable' and 'value' columns can be customized: - - >>> pd.melt(df, id_vars=['A'], value_vars=['B'], - ... var_name='myVarname', value_name='myValname') - A myVarname myValname - 0 a B 1 - 1 b B 3 - 2 c B 5 - - If you have multi-index columns: - - >>> df.columns = [list('ABC'), list('DEF')] - >>> df - A B C - D E F - 0 a 1 2 - 1 b 3 4 - 2 c 5 6 - - >>> pd.melt(df, col_level=0, id_vars=['A'], value_vars=['B']) - A variable value - 0 a B 1 - 1 b B 3 - 2 c B 5 - - >>> pd.melt(df, id_vars=[('A', 'D')], value_vars=[('B', 'E')]) - (A, D) variable_0 variable_1 value - 0 a B E 1 - 1 b B E 3 - 2 c B E 5 - - """ # TODO: what about the existing index? if id_vars is not None: if not is_list_like(id_vars): diff --git a/pandas/tests/test_reshape.py b/pandas/tests/test_reshape.py index 7ba743a6c425c..ee255c1863b41 100644 --- a/pandas/tests/test_reshape.py +++ b/pandas/tests/test_reshape.py @@ -30,23 +30,46 @@ def setUp(self): self.df1.columns = [list('ABC'), list('abc')] self.df1.columns.names = ['CAP', 'low'] - def test_default_col_names(self): + def test_top_level_method(self): result = melt(self.df) self.assertEqual(result.columns.tolist(), ['variable', 'value']) - result1 = melt(self.df, id_vars=['id1']) + def test_method_signatures(self): + tm.assert_frame_equal(self.df.melt(), + melt(self.df)) + + tm.assert_frame_equal(self.df.melt(id_vars=['id1', 'id2'], + value_vars=['A', 'B']), + melt(self.df, + id_vars=['id1', 'id2'], + value_vars=['A', 'B'])) + + tm.assert_frame_equal(self.df.melt(var_name=self.var_name, + value_name=self.value_name), + melt(self.df, + var_name=self.var_name, + value_name=self.value_name)) + + tm.assert_frame_equal(self.df1.melt(col_level=0), + melt(self.df1, col_level=0)) + + def test_default_col_names(self): + result = self.df.melt() + self.assertEqual(result.columns.tolist(), ['variable', 'value']) + + result1 = self.df.melt(id_vars=['id1']) self.assertEqual(result1.columns.tolist(), ['id1', 'variable', 'value' ]) - result2 = melt(self.df, id_vars=['id1', 'id2']) + result2 = self.df.melt(id_vars=['id1', 'id2']) self.assertEqual(result2.columns.tolist(), ['id1', 'id2', 'variable', 'value']) def test_value_vars(self): - result3 = melt(self.df, id_vars=['id1', 'id2'], value_vars='A') + result3 = self.df.melt(id_vars=['id1', 'id2'], value_vars='A') self.assertEqual(len(result3), 10) - result4 = melt(self.df, id_vars=['id1', 'id2'], value_vars=['A', 'B']) + result4 = self.df.melt(id_vars=['id1', 'id2'], value_vars=['A', 'B']) expected4 = DataFrame({'id1': self.df['id1'].tolist() * 2, 'id2': self.df['id2'].tolist() * 2, 'variable': ['A'] * 10 + ['B'] * 10, @@ -65,8 +88,8 @@ def test_value_vars_types(self): columns=['id1', 'id2', 'variable', 'value']) for type_ in (tuple, list, np.array): - result = melt(self.df, id_vars=['id1', 'id2'], - value_vars=type_(('A', 'B'))) + result = self.df.melt(id_vars=['id1', 'id2'], + value_vars=type_(('A', 'B'))) tm.assert_frame_equal(result, expected) def test_vars_work_with_multiindex(self): @@ -77,7 +100,7 @@ def test_vars_work_with_multiindex(self): 'value': self.df1[('B', 'b')], }, columns=[('A', 'a'), 'CAP', 'low', 'value']) - result = melt(self.df1, id_vars=[('A', 'a')], value_vars=[('B', 'b')]) + result = self.df1.melt(id_vars=[('A', 'a')], value_vars=[('B', 'b')]) tm.assert_frame_equal(result, expected) def test_tuple_vars_fail_with_multiindex(self): @@ -92,26 +115,26 @@ def test_tuple_vars_fail_with_multiindex(self): for id_vars, value_vars in ((tuple_a, list_b), (list_a, tuple_b), (tuple_a, tuple_b)): with tm.assertRaisesRegexp(ValueError, r'MultiIndex'): - melt(self.df1, id_vars=id_vars, value_vars=value_vars) + self.df1.melt(id_vars=id_vars, value_vars=value_vars) def test_custom_var_name(self): - result5 = melt(self.df, var_name=self.var_name) + result5 = self.df.melt(var_name=self.var_name) self.assertEqual(result5.columns.tolist(), ['var', 'value']) - result6 = melt(self.df, id_vars=['id1'], var_name=self.var_name) + result6 = self.df.melt(id_vars=['id1'], var_name=self.var_name) self.assertEqual(result6.columns.tolist(), ['id1', 'var', 'value']) - result7 = melt(self.df, id_vars=['id1', 'id2'], var_name=self.var_name) + result7 = self.df.melt(id_vars=['id1', 'id2'], var_name=self.var_name) self.assertEqual(result7.columns.tolist(), ['id1', 'id2', 'var', 'value']) - result8 = melt(self.df, id_vars=['id1', 'id2'], value_vars='A', - var_name=self.var_name) + result8 = self.df.melt(id_vars=['id1', 'id2'], value_vars='A', + var_name=self.var_name) self.assertEqual(result8.columns.tolist(), ['id1', 'id2', 'var', 'value']) - result9 = melt(self.df, id_vars=['id1', 'id2'], value_vars=['A', 'B'], - var_name=self.var_name) + result9 = self.df.melt(id_vars=['id1', 'id2'], value_vars=['A', 'B'], + var_name=self.var_name) expected9 = DataFrame({'id1': self.df['id1'].tolist() * 2, 'id2': self.df['id2'].tolist() * 2, self.var_name: ['A'] * 10 + ['B'] * 10, @@ -121,24 +144,24 @@ def test_custom_var_name(self): tm.assert_frame_equal(result9, expected9) def test_custom_value_name(self): - result10 = melt(self.df, value_name=self.value_name) + result10 = self.df.melt(value_name=self.value_name) self.assertEqual(result10.columns.tolist(), ['variable', 'val']) - result11 = melt(self.df, id_vars=['id1'], value_name=self.value_name) + result11 = self.df.melt(id_vars=['id1'], value_name=self.value_name) self.assertEqual(result11.columns.tolist(), ['id1', 'variable', 'val']) - result12 = melt(self.df, id_vars=['id1', 'id2'], - value_name=self.value_name) + result12 = self.df.melt(id_vars=['id1', 'id2'], + value_name=self.value_name) self.assertEqual(result12.columns.tolist(), ['id1', 'id2', 'variable', 'val']) - result13 = melt(self.df, id_vars=['id1', 'id2'], value_vars='A', - value_name=self.value_name) + result13 = self.df.melt(id_vars=['id1', 'id2'], value_vars='A', + value_name=self.value_name) self.assertEqual(result13.columns.tolist(), ['id1', 'id2', 'variable', 'val']) - result14 = melt(self.df, id_vars=['id1', 'id2'], value_vars=['A', 'B'], - value_name=self.value_name) + result14 = self.df.melt(id_vars=['id1', 'id2'], value_vars=['A', 'B'], + value_name=self.value_name) expected14 = DataFrame({'id1': self.df['id1'].tolist() * 2, 'id2': self.df['id2'].tolist() * 2, 'variable': ['A'] * 10 + ['B'] * 10, @@ -150,26 +173,29 @@ def test_custom_value_name(self): def test_custom_var_and_value_name(self): - result15 = melt(self.df, var_name=self.var_name, - value_name=self.value_name) + result15 = self.df.melt(var_name=self.var_name, + value_name=self.value_name) self.assertEqual(result15.columns.tolist(), ['var', 'val']) - result16 = melt(self.df, id_vars=['id1'], var_name=self.var_name, - value_name=self.value_name) + result16 = self.df.melt(id_vars=['id1'], var_name=self.var_name, + value_name=self.value_name) self.assertEqual(result16.columns.tolist(), ['id1', 'var', 'val']) - result17 = melt(self.df, id_vars=['id1', 'id2'], - var_name=self.var_name, value_name=self.value_name) + result17 = self.df.melt(id_vars=['id1', 'id2'], + var_name=self.var_name, + value_name=self.value_name) self.assertEqual(result17.columns.tolist(), ['id1', 'id2', 'var', 'val' ]) - result18 = melt(self.df, id_vars=['id1', 'id2'], value_vars='A', - var_name=self.var_name, value_name=self.value_name) + result18 = self.df.melt(id_vars=['id1', 'id2'], value_vars='A', + var_name=self.var_name, + value_name=self.value_name) self.assertEqual(result18.columns.tolist(), ['id1', 'id2', 'var', 'val' ]) - result19 = melt(self.df, id_vars=['id1', 'id2'], value_vars=['A', 'B'], - var_name=self.var_name, value_name=self.value_name) + result19 = self.df.melt(id_vars=['id1', 'id2'], value_vars=['A', 'B'], + var_name=self.var_name, + value_name=self.value_name) expected19 = DataFrame({'id1': self.df['id1'].tolist() * 2, 'id2': self.df['id2'].tolist() * 2, self.var_name: ['A'] * 10 + ['B'] * 10, @@ -181,17 +207,17 @@ def test_custom_var_and_value_name(self): df20 = self.df.copy() df20.columns.name = 'foo' - result20 = melt(df20) + result20 = df20.melt() self.assertEqual(result20.columns.tolist(), ['foo', 'value']) def test_col_level(self): - res1 = melt(self.df1, col_level=0) - res2 = melt(self.df1, col_level='CAP') + res1 = self.df1.melt(col_level=0) + res2 = self.df1.melt(col_level='CAP') self.assertEqual(res1.columns.tolist(), ['CAP', 'value']) self.assertEqual(res2.columns.tolist(), ['CAP', 'value']) def test_multiindex(self): - res = pd.melt(self.df1) + res = self.df1.melt() self.assertEqual(res.columns.tolist(), ['CAP', 'low', 'value']) From e0b60c07295a92eb760c38870c5f8c40e412f7dc Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 4 Apr 2017 18:02:28 -0400 Subject: [PATCH 333/933] BUG: Bug in DataFrame construction with nulls and datetimes in a list like closes #15869 Author: Jeff Reback Closes #15892 from jreback/construct and squashes the following commits: 6bf2148 [Jeff Reback] fix perf 7fcd4e5 [Jeff Reback] BUG: Bug in DataFrame construction with nulls and datetimes in a list-like --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/_libs/src/inference.pyx | 79 +++++++++++++++--- pandas/tests/frame/test_constructors.py | 9 ++ pandas/tests/frame/test_misc_api.py | 12 ++- pandas/tests/series/test_constructors.py | 8 ++ pandas/types/cast.py | 102 ++++++++++++----------- 6 files changed, 149 insertions(+), 62 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 355dceba1b953..2e1cc396287ce 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -997,6 +997,7 @@ Conversion - Bug in ``Series.ffill()`` with mixed dtypes containing tz-aware datetimes. (:issue:`14956`) - Bug in ``DataFrame.fillna()`` where the argument ``downcast`` was ignored when fillna value was of type ``dict`` (:issue:`15277`) - Bug in ``.asfreq()``, where frequency was not set for empty ``Series`` (:issue:`14320`) +- Bug in ``DataFrame`` construction with nulls and datetimes in a list-like (:issue:`15869`) Indexing ^^^^^^^^ diff --git a/pandas/_libs/src/inference.pyx b/pandas/_libs/src/inference.pyx index b0fb7048f154c..33c05f302dd94 100644 --- a/pandas/_libs/src/inference.pyx +++ b/pandas/_libs/src/inference.pyx @@ -439,31 +439,86 @@ def infer_dtype(object value): return 'mixed' -cpdef bint is_possible_datetimelike_array(object arr): - # determine if we have a possible datetimelike (or null-like) array +cpdef object infer_datetimelike_array(object arr): + """ + infer if we have a datetime or timedelta array + - date: we have *only* date and maybe strings, nulls + - datetime: we have *only* datetimes and maybe strings, nulls + - timedelta: we have *only* timedeltas and maybe strings, nulls + - nat: we do not have *any* date, datetimes or timedeltas, but do have + at least a NaT + - mixed: other objects (strings or actual objects) + + Parameters + ---------- + arr : object array + + Returns + ------- + string: {datetime, timedelta, date, nat, mixed} + + """ + cdef: Py_ssize_t i, n = len(arr) - bint seen_timedelta = 0, seen_datetime = 0 + bint seen_timedelta = 0, seen_date = 0, seen_datetime = 0 + bint seen_nat = 0 + list objs = [] object v for i in range(n): v = arr[i] if util.is_string_object(v): - continue + objs.append(v) + + if len(objs) == 3: + break + elif util._checknull(v): - continue - elif is_datetime(v): - seen_datetime=1 - elif is_timedelta(v): - seen_timedelta=1 + # nan or None + pass + elif v is NaT: + seen_nat = 1 + elif is_datetime(v) or util.is_datetime64_object(v): + # datetime, or np.datetime64 + seen_datetime = 1 + elif is_date(v): + seen_date = 1 + elif is_timedelta(v) or util.is_timedelta64_object(v): + # timedelta, or timedelta64 + seen_timedelta = 1 else: - return False - return seen_datetime or seen_timedelta + return 'mixed' + + if seen_date and not (seen_datetime or seen_timedelta): + return 'date' + elif seen_datetime and not seen_timedelta: + return 'datetime' + elif seen_timedelta and not seen_datetime: + return 'timedelta' + elif seen_nat: + return 'nat' + + # short-circuit by trying to + # actually convert these strings + # this is for performance as we don't need to try + # convert *every* string array + if len(objs): + try: + tslib.array_to_datetime(objs, errors='raise') + return 'datetime' + except: + pass + + # we are *not* going to infer from strings + # for timedelta as too much ambiguity + + return 'mixed' cdef inline bint is_null_datetimelike(v): # determine if we have a null for a timedelta/datetime (or integer - # versions)x + # versions) if util._checknull(v): return True elif v is NaT: diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 1ab292649a973..6d28d3b4dfcd5 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -1366,6 +1366,15 @@ def test_constructor_with_datetimes(self): .reset_index(drop=True), 'b': i_no_tz}) tm.assert_frame_equal(df, expected) + def test_constructor_datetimes_with_nulls(self): + # gh-15869 + for arr in [np.array([None, None, None, None, + datetime.now(), None]), + np.array([None, None, datetime.now(), None])]: + result = DataFrame(arr).get_dtype_counts() + expected = Series({'datetime64[ns]': 1}) + tm.assert_series_equal(result, expected) + def test_constructor_for_list_with_dtypes(self): # TODO(wesm): unused intname = np.dtype(np.int_).name # noqa diff --git a/pandas/tests/frame/test_misc_api.py b/pandas/tests/frame/test_misc_api.py index 42427df90401d..50fa0dca6bf04 100644 --- a/pandas/tests/frame/test_misc_api.py +++ b/pandas/tests/frame/test_misc_api.py @@ -12,7 +12,7 @@ from numpy.random import randn import numpy as np -from pandas import DataFrame, Series +from pandas import DataFrame, Series, date_range, timedelta_range import pandas as pd from pandas.util.testing import (assert_almost_equal, @@ -328,6 +328,16 @@ def test_empty_nonzero(self): self.assertTrue(df.empty) self.assertTrue(df.T.empty) + def test_with_datetimelikes(self): + + df = DataFrame({'A': date_range('20130101', periods=10), + 'B': timedelta_range('1 day', periods=10)}) + t = df.T + + result = t.get_dtype_counts() + expected = Series({'object': 10}) + tm.assert_series_equal(result, expected) + def test_inplace_return_self(self): # re #1893 diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 24e4355fa9f9a..dbe2db67359f3 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -327,6 +327,14 @@ def test_constructor_datelike_coercion(self): result = df.loc['216'] self.assertTrue(result.dtype == object) + def test_constructor_datetimes_with_nulls(self): + # gh-15869 + for arr in [np.array([None, None, None, None, + datetime.now(), None]), + np.array([None, None, datetime.now(), None])]: + result = Series(arr) + assert result.dtype == 'M8[ns]' + def test_constructor_dtype_datetime64(self): s = Series(iNaT, dtype='M8[ns]', index=lrange(5)) diff --git a/pandas/types/cast.py b/pandas/types/cast.py index 985e5b9f95831..580ce12de3333 100644 --- a/pandas/types/cast.py +++ b/pandas/types/cast.py @@ -748,8 +748,6 @@ def maybe_infer_to_datetimelike(value, convert_dates=False): this is pretty strict in that a datetime/timedelta is REQUIRED in addition to possible nulls/string likes - ONLY strings are NOT datetimelike - Parameters ---------- value : np.array / Series / Index / list-like @@ -770,64 +768,70 @@ def maybe_infer_to_datetimelike(value, convert_dates=False): if not is_list_like(v): v = [v] v = np.array(v, copy=False) + + # we only care about object dtypes + if not is_object_dtype(v): + return value + shape = v.shape if not v.ndim == 1: v = v.ravel() - if len(v): - - def _try_datetime(v): - # safe coerce to datetime64 - try: - v = tslib.array_to_datetime(v, errors='raise') - except ValueError: + if not len(v): + return value - # we might have a sequence of the same-datetimes with tz's - # if so coerce to a DatetimeIndex; if they are not the same, - # then these stay as object dtype - try: - from pandas import to_datetime - return to_datetime(v) - except: - pass + def try_datetime(v): + # safe coerce to datetime64 + try: + v = tslib.array_to_datetime(v, errors='raise') + except ValueError: + # we might have a sequence of the same-datetimes with tz's + # if so coerce to a DatetimeIndex; if they are not the same, + # then these stay as object dtype + try: + from pandas import to_datetime + return to_datetime(v) except: pass - return v.reshape(shape) + except: + pass - def _try_timedelta(v): - # safe coerce to timedelta64 + return v.reshape(shape) - # will try first with a string & object conversion - from pandas import to_timedelta - try: - return to_timedelta(v)._values.reshape(shape) - except: - return v - - # do a quick inference for perf - sample = v[:min(3, len(v))] - inferred_type = lib.infer_dtype(sample) - - if (inferred_type in ['datetime', 'datetime64'] or - (convert_dates and inferred_type in ['date'])): - value = _try_datetime(v) - elif inferred_type in ['timedelta', 'timedelta64']: - value = _try_timedelta(v) - - # It's possible to have nulls intermixed within the datetime or - # timedelta. These will in general have an inferred_type of 'mixed', - # so have to try both datetime and timedelta. - - # try timedelta first to avoid spurious datetime conversions - # e.g. '00:00:01' is a timedelta but technically is also a datetime - elif inferred_type in ['mixed']: - - if lib.is_possible_datetimelike_array(_ensure_object(v)): - value = _try_timedelta(v) - if lib.infer_dtype(value) in ['mixed']: - value = _try_datetime(v) + def try_timedelta(v): + # safe coerce to timedelta64 + + # will try first with a string & object conversion + from pandas import to_timedelta + try: + return to_timedelta(v)._values.reshape(shape) + except: + return v + + inferred_type = lib.infer_datetimelike_array(_ensure_object(v)) + + if inferred_type == 'date' and convert_dates: + value = try_datetime(v) + elif inferred_type == 'datetime': + value = try_datetime(v) + elif inferred_type == 'timedelta': + value = try_timedelta(v) + elif inferred_type == 'nat': + + # if all NaT, return as datetime + if isnull(v).all(): + value = try_datetime(v) + else: + + # We have at least a NaT and a string + # try timedelta first to avoid spurious datetime conversions + # e.g. '00:00:01' is a timedelta but + # technically is also a datetime + value = try_timedelta(v) + if lib.infer_dtype(value) in ['mixed']: + value = try_datetime(v) return value From ca8ef494df8c841ccfde779b5b120ffea218ed46 Mon Sep 17 00:00:00 2001 From: the-nose-knows Date: Tue, 4 Apr 2017 15:32:46 -0700 Subject: [PATCH 334/933] ENH: Citing source in README file (#15856) * Citing source in README file For GH users who strictly or heavily use the web-view instead of a local Git, having a direct link is handy, as it does not require downloading the PDF _if_ the user wanted to go to the source of it directly. It's an alternative that allows those interested in more uploads similar to this PDF from the same author(s). * jorisvandenbossche's feedback I re-read the PDF and made sure the wording reflected the content presented. I also changed the source-citing so that is more friendly for .TXT files instead of Markdown or unspecified. * Update README.txt * English enhancement Improved sentence structure for English speakers. --- doc/cheatsheet/README.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/doc/cheatsheet/README.txt b/doc/cheatsheet/README.txt index e2f6ec042e9cc..d32fe5bcd05a6 100644 --- a/doc/cheatsheet/README.txt +++ b/doc/cheatsheet/README.txt @@ -2,3 +2,7 @@ The Pandas Cheat Sheet was created using Microsoft Powerpoint 2013. To create the PDF version, within Powerpoint, simply do a "Save As" and pick "PDF' as the format. +This cheat sheet was inspired by the RstudioData Wrangling Cheatsheet[1], written by Irv Lustig, Princeton Consultants[2]. + +[1]: https://www.rstudio.com/wp-content/uploads/2015/02/data-wrangling-cheatsheet.pdf +[2]: http://www.princetonoptimization.com/ From 0a3706780feb77f241715ffcdebb14ad7d678d3d Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 5 Apr 2017 12:59:17 +0200 Subject: [PATCH 335/933] DEPR: correct locations to access public tslib objects (#15897) --- pandas/__init__.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pandas/__init__.py b/pandas/__init__.py index 1bc85899fb89f..83ad85e3e292b 100644 --- a/pandas/__init__.py +++ b/pandas/__init__.py @@ -64,7 +64,11 @@ parser = _DeprecatedModule(deprmod='pandas.parser', deprmodto='pandas.io.libparsers') lib = _DeprecatedModule(deprmod='pandas.lib', deprmodto='pandas._libs.lib', moved={'infer_dtype': 'pandas.api.lib.infer_dtype'}) -tslib = _DeprecatedModule(deprmod='pandas.tslib', deprmodto='pandas._libs.tslib') +tslib = _DeprecatedModule(deprmod='pandas.tslib', deprmodto='pandas._libs.tslib', + moved={'Timestamp': 'pandas.Timestamp', + 'Timedelta': 'pandas.Timedelta', + 'NaT': 'pandas.NaT', + 'OutOfBoundsDatetime': 'pandas.errors.OutOfBoundsDatetime'}) # use the closest tagged version if possible from ._version import get_versions From dbc1654fb1604b99c1b4fe31a26b5548ea623565 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 5 Apr 2017 15:15:37 -0400 Subject: [PATCH 336/933] TST: better testing of Series.nlargest/nsmallest xref #15299 Author: Jeff Reback Closes #15902 from jreback/series_n and squashes the following commits: 657eac8 [Jeff Reback] TST: better testing of Series.nlargest/nsmallest --- pandas/core/algorithms.py | 56 ++++++-- pandas/tests/series/test_analytics.py | 180 +++++++++++++++----------- 2 files changed, 151 insertions(+), 85 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index a62d290277443..99ef76e0f4812 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -12,6 +12,7 @@ from pandas.types.common import (is_unsigned_integer_dtype, is_signed_integer_dtype, is_integer_dtype, + is_complex_dtype, is_categorical_dtype, is_extension_type, is_datetimetz, @@ -40,6 +41,44 @@ from pandas._libs.tslib import iNaT +# --------------- # +# dtype access # +# --------------- # + +def _ensure_data_view(values): + """ + helper routine to ensure that our data is of the correct + input dtype for lower-level routines + + Parameters + ---------- + values : array-like + """ + + if needs_i8_conversion(values): + values = values.view(np.int64) + elif is_period_arraylike(values): + from pandas.tseries.period import PeriodIndex + values = PeriodIndex(values).asi8 + elif is_categorical_dtype(values): + values = values.values.codes + elif isinstance(values, (ABCSeries, ABCIndex)): + values = values.values + + if is_signed_integer_dtype(values): + values = _ensure_int64(values) + elif is_unsigned_integer_dtype(values): + values = _ensure_uint64(values) + elif is_complex_dtype(values): + values = _ensure_float64(values) + elif is_float_dtype(values): + values = _ensure_float64(values) + else: + values = _ensure_object(values) + + return values + + # --------------- # # top-level algos # # --------------- # @@ -867,9 +906,7 @@ def nsmallest(arr, n, keep='first'): narr = len(arr) n = min(n, narr) - sdtype = str(arr.dtype) - arr = arr.view(_dtype_map.get(sdtype, sdtype)) - + arr = _ensure_data_view(arr) kth_val = algos.kth_smallest(arr.copy(), n - 1) return _finalize_nsmallest(arr, kth_val, n, keep, narr) @@ -880,8 +917,7 @@ def nlargest(arr, n, keep='first'): Note: Fails silently with NaN. """ - sdtype = str(arr.dtype) - arr = arr.view(_dtype_map.get(sdtype, sdtype)) + arr = _ensure_data_view(arr) return nsmallest(-arr, n, keep=keep) @@ -910,9 +946,10 @@ def select_n_series(series, n, keep, method): nordered : Series """ dtype = series.dtype - if not issubclass(dtype.type, (np.integer, np.floating, np.datetime64, - np.timedelta64)): - raise TypeError("Cannot use method %r with dtype %s" % (method, dtype)) + if not ((is_numeric_dtype(dtype) and not is_complex_dtype(dtype)) or + needs_i8_conversion(dtype)): + raise TypeError("Cannot use method '{method}' with " + "dtype {dtype}".format(method=method, dtype=dtype)) if keep not in ('first', 'last'): raise ValueError('keep must be either "first", "last"') @@ -964,9 +1001,6 @@ def _finalize_nsmallest(arr, kth_val, n, keep, narr): return inds -_dtype_map = {'datetime64[ns]': 'int64', 'timedelta64[ns]': 'int64'} - - # ------- # # helpers # # ------- # diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index b747a680c17dd..732142f1bce9a 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -1381,80 +1381,6 @@ def test_is_monotonic(self): self.assertFalse(s.is_monotonic) self.assertTrue(s.is_monotonic_decreasing) - def test_nsmallest_nlargest(self): - # float, int, datetime64 (use i8), timedelts64 (same), - # object that are numbers, object that are strings - - base = [3, 2, 1, 2, 5] - - s_list = [ - Series(base, dtype='int8'), - Series(base, dtype='int16'), - Series(base, dtype='int32'), - Series(base, dtype='int64'), - Series(base, dtype='float32'), - Series(base, dtype='float64'), - Series(base, dtype='uint8'), - Series(base, dtype='uint16'), - Series(base, dtype='uint32'), - Series(base, dtype='uint64'), - Series(base).astype('timedelta64[ns]'), - Series(pd.to_datetime(['2003', '2002', '2001', '2002', '2005'])), - ] - - raising = [ - Series([3., 2, 1, 2, '5'], dtype='object'), - Series([3., 2, 1, 2, 5], dtype='object'), - # not supported on some archs - # Series([3., 2, 1, 2, 5], dtype='complex256'), - Series([3., 2, 1, 2, 5], dtype='complex128'), - ] - - for r in raising: - dt = r.dtype - msg = "Cannot use method 'n(larg|small)est' with dtype %s" % dt - args = 2, len(r), 0, -1 - methods = r.nlargest, r.nsmallest - for method, arg in product(methods, args): - with tm.assertRaisesRegexp(TypeError, msg): - method(arg) - - for s in s_list: - - assert_series_equal(s.nsmallest(2), s.iloc[[2, 1]]) - assert_series_equal(s.nsmallest(2, keep='last'), s.iloc[[2, 3]]) - - empty = s.iloc[0:0] - assert_series_equal(s.nsmallest(0), empty) - assert_series_equal(s.nsmallest(-1), empty) - assert_series_equal(s.nlargest(0), empty) - assert_series_equal(s.nlargest(-1), empty) - - assert_series_equal(s.nsmallest(len(s)), s.sort_values()) - assert_series_equal(s.nsmallest(len(s) + 1), s.sort_values()) - assert_series_equal(s.nlargest(len(s)), s.iloc[[4, 0, 1, 3, 2]]) - assert_series_equal(s.nlargest(len(s) + 1), - s.iloc[[4, 0, 1, 3, 2]]) - - s = Series([3., np.nan, 1, 2, 5]) - assert_series_equal(s.nlargest(), s.iloc[[4, 0, 3, 2]]) - assert_series_equal(s.nsmallest(), s.iloc[[2, 3, 0, 4]]) - - msg = 'keep must be either "first", "last"' - with tm.assertRaisesRegexp(ValueError, msg): - s.nsmallest(keep='invalid') - with tm.assertRaisesRegexp(ValueError, msg): - s.nlargest(keep='invalid') - - # GH 13412 - s = Series([1, 4, 3, 2], index=[0, 0, 1, 1]) - result = s.nlargest(3) - expected = s.sort_values(ascending=False).head(3) - assert_series_equal(result, expected) - result = s.nsmallest(3) - expected = s.sort_values().head(3) - assert_series_equal(result, expected) - def test_sort_index_level(self): mi = MultiIndex.from_tuples([[1, 1, 3], [1, 1, 1]], names=list('ABC')) s = Series([1, 2], mi) @@ -1729,3 +1655,109 @@ def test_value_counts_categorical_not_ordered(self): index=exp_idx, name='xxx') tm.assert_series_equal(s.value_counts(normalize=True), exp) tm.assert_series_equal(idx.value_counts(normalize=True), exp) + + +@pytest.fixture +def s_main_dtypes(): + df = pd.DataFrame( + {'datetime': pd.to_datetime(['2003', '2002', + '2001', '2002', + '2005']), + 'datetimetz': pd.to_datetime( + ['2003', '2002', + '2001', '2002', + '2005']).tz_localize('US/Eastern'), + 'timedelta': pd.to_timedelta(['3d', '2d', '1d', + '2d', '5d'])}) + + for dtype in ['int8', 'int16', 'int32', 'int64', + 'float32', 'float64', + 'uint8', 'uint16', 'uint32', 'uint64']: + df[dtype] = Series([3, 2, 1, 2, 5], dtype=dtype) + + return df + + +class TestNLargestNSmallest(object): + + @pytest.mark.parametrize( + "r", [Series([3., 2, 1, 2, '5'], dtype='object'), + Series([3., 2, 1, 2, 5], dtype='object'), + # not supported on some archs + # Series([3., 2, 1, 2, 5], dtype='complex256'), + Series([3., 2, 1, 2, 5], dtype='complex128'), + Series(list('abcde'), dtype='category'), + Series(list('abcde'))]) + def test_error(self, r): + dt = r.dtype + msg = ("Cannot use method 'n(larg|small)est' with " + "dtype {dt}".format(dt=dt)) + args = 2, len(r), 0, -1 + methods = r.nlargest, r.nsmallest + for method, arg in product(methods, args): + with tm.assertRaisesRegexp(TypeError, msg): + method(arg) + + @pytest.mark.parametrize( + "s", + [v for k, v in s_main_dtypes().iteritems()]) + def test_nsmallest_nlargest(self, s): + # float, int, datetime64 (use i8), timedelts64 (same), + # object that are numbers, object that are strings + + assert_series_equal(s.nsmallest(2), s.iloc[[2, 1]]) + assert_series_equal(s.nsmallest(2, keep='last'), s.iloc[[2, 3]]) + + empty = s.iloc[0:0] + assert_series_equal(s.nsmallest(0), empty) + assert_series_equal(s.nsmallest(-1), empty) + assert_series_equal(s.nlargest(0), empty) + assert_series_equal(s.nlargest(-1), empty) + + assert_series_equal(s.nsmallest(len(s)), s.sort_values()) + assert_series_equal(s.nsmallest(len(s) + 1), s.sort_values()) + assert_series_equal(s.nlargest(len(s)), s.iloc[[4, 0, 1, 3, 2]]) + assert_series_equal(s.nlargest(len(s) + 1), + s.iloc[[4, 0, 1, 3, 2]]) + + def test_misc(self): + + s = Series([3., np.nan, 1, 2, 5]) + assert_series_equal(s.nlargest(), s.iloc[[4, 0, 3, 2]]) + assert_series_equal(s.nsmallest(), s.iloc[[2, 3, 0, 4]]) + + msg = 'keep must be either "first", "last"' + with tm.assertRaisesRegexp(ValueError, msg): + s.nsmallest(keep='invalid') + with tm.assertRaisesRegexp(ValueError, msg): + s.nlargest(keep='invalid') + + # GH 15297 + s = Series([1] * 5, index=[1, 2, 3, 4, 5]) + expected_first = Series([1] * 3, index=[1, 2, 3]) + expected_last = Series([1] * 3, index=[5, 4, 3]) + + result = s.nsmallest(3) + assert_series_equal(result, expected_first) + + result = s.nsmallest(3, keep='last') + assert_series_equal(result, expected_last) + + result = s.nlargest(3) + assert_series_equal(result, expected_first) + + result = s.nlargest(3, keep='last') + assert_series_equal(result, expected_last) + + @pytest.mark.parametrize('n', range(1, 5)) + def test_n(self, n): + + # GH 13412 + s = Series([1, 4, 3, 2], index=[0, 0, 1, 1]) + result = s.nlargest(n) + expected = s.sort_values(ascending=False).head(n) + assert_series_equal(result, expected) + + result = s.nsmallest(n) + expected = s.sort_values().head(n) + assert_series_equal(result, expected) From e4e87ec55765d31e59e97d89c71ed5a3fa2f3d38 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Wed, 5 Apr 2017 15:16:40 -0400 Subject: [PATCH 337/933] ENH: Add file buffer validation to I/O ops 1) Allows for more uniform handling of invalid file buffers to our `read_*` functions. 2) Adds a ton of new documentation to `inference.py` Closes #15337. xref #15895. Author: gfyoung Closes #15894 from gfyoung/validate-file-like and squashes the following commits: 5a8f8da [gfyoung] DOC: Document all of inference.py 81103f7 [gfyoung] ENH: Add file buffer validation to I/O ops --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/io/common.py | 23 +- pandas/io/excel.py | 5 +- pandas/tests/api/test_types.py | 2 +- pandas/tests/io/parser/common.py | 17 ++ pandas/tests/types/test_inference.py | 16 +- pandas/types/api.py | 1 + pandas/types/inference.py | 328 +++++++++++++++++++++++++-- 8 files changed, 361 insertions(+), 32 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 2e1cc396287ce..cbb4d32cc5edb 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -1033,6 +1033,7 @@ I/O - Bug in ``pd.read_csv()`` with ``parse_dates`` when multiline headers are specified (:issue:`15376`) - Bug in ``pd.read_csv()`` with ``float_precision='round_trip'`` which caused a segfault when a text entry is parsed (:issue:`15140`) - Bug in ``pd.read_csv()`` when an index was specified and no values were specified as null values (:issue:`15835`) +- Bug in ``pd.read_csv()`` in which certain invalid file objects caused the Python interpreter to crash (:issue:`15337`) - Added checks in ``pd.read_csv()`` ensuring that values for ``nrows`` and ``chunksize`` are valid (:issue:`15767`) - Bug in ``pd.tools.hashing.hash_pandas_object()`` in which hashing of categoricals depended on the ordering of categories, instead of just their values. (:issue:`15143`) - Bug in ``.to_json()`` where ``lines=True`` and contents (keys or values) contain escaped characters (:issue:`15096`) diff --git a/pandas/io/common.py b/pandas/io/common.py index 8bc7217db87f9..8ee6ded67f790 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -10,7 +10,7 @@ from pandas import compat from pandas.formats.printing import pprint_thing from pandas.core.common import AbstractMethodError -from pandas.types.common import is_number +from pandas.types.common import is_number, is_file_like # compat from pandas.errors import (ParserError, DtypeWarning, # noqa @@ -197,9 +197,19 @@ def get_filepath_or_buffer(filepath_or_buffer, encoding=None, encoding=encoding, compression=compression) - # It is a pathlib.Path/py.path.local or string + # Convert pathlib.Path/py.path.local or string filepath_or_buffer = _stringify_path(filepath_or_buffer) - return _expand_user(filepath_or_buffer), None, compression + + if isinstance(filepath_or_buffer, (compat.string_types, + compat.binary_type, + mmap.mmap)): + return _expand_user(filepath_or_buffer), None, compression + + if not is_file_like(filepath_or_buffer): + msg = "Invalid file path or buffer object type: {_type}" + raise ValueError(msg.format(_type=type(filepath_or_buffer))) + + return filepath_or_buffer, None, compression def file_path_to_url(path): @@ -416,6 +426,9 @@ def __init__(self, f): def __getattr__(self, name): return getattr(self.mmap, name) + def __iter__(self): + return self + def __next__(self): newline = self.mmap.readline() @@ -433,6 +446,10 @@ def __next__(self): return newline +if not compat.PY3: + MMapWrapper.next = lambda self: self.__next__() + + class UTF8Recoder(BaseIterator): """ diff --git a/pandas/io/excel.py b/pandas/io/excel.py index 6d136869fc73f..737141f11d7d1 100644 --- a/pandas/io/excel.py +++ b/pandas/io/excel.py @@ -243,9 +243,8 @@ def __init__(self, io, **kwds): # to get_filepath_or_buffer() if _is_url(io): io = _urlopen(io) - # Deal with S3 urls, path objects, etc. Will convert them to - # buffer or path string - io, _, _ = get_filepath_or_buffer(io) + elif not isinstance(io, (ExcelFile, xlrd.Book)): + io, _, _ = get_filepath_or_buffer(io) if engine == 'xlrd' and isinstance(io, xlrd.Book): self.book = io diff --git a/pandas/tests/api/test_types.py b/pandas/tests/api/test_types.py index 686de4a196034..f3fd6332417a1 100644 --- a/pandas/tests/api/test_types.py +++ b/pandas/tests/api/test_types.py @@ -24,7 +24,7 @@ class TestTypes(Base, tm.TestCase): 'is_timedelta64_dtype', 'is_timedelta64_ns_dtype', 'is_unsigned_integer_dtype', 'is_period', 'is_period_dtype', 'is_re', 'is_re_compilable', - 'is_dict_like', 'is_iterator', + 'is_dict_like', 'is_iterator', 'is_file_like', 'is_list_like', 'is_hashable', 'is_named_tuple', 'is_sequence', 'pandas_dtype'] diff --git a/pandas/tests/io/parser/common.py b/pandas/tests/io/parser/common.py index 7faf485b65d10..36d5f2dd5274b 100644 --- a/pandas/tests/io/parser/common.py +++ b/pandas/tests/io/parser/common.py @@ -1678,3 +1678,20 @@ def test_file_handles(self): if PY3: self.assertFalse(m.closed) m.close() + + def test_invalid_file_buffer(self): + # see gh-15337 + + class InvalidBuffer(object): + pass + + msg = "Invalid file path or buffer object type" + + with tm.assertRaisesRegexp(ValueError, msg): + self.read_csv(InvalidBuffer()) + + if PY3: + from unittest import mock + + with tm.assertRaisesRegexp(ValueError, msg): + self.read_csv(mock.Mock()) diff --git a/pandas/tests/types/test_inference.py b/pandas/tests/types/test_inference.py index b41df0da45234..de3a2ca35a7f5 100644 --- a/pandas/tests/types/test_inference.py +++ b/pandas/tests/types/test_inference.py @@ -17,7 +17,7 @@ from pandas import (Series, Index, DataFrame, Timedelta, DatetimeIndex, TimedeltaIndex, Timestamp, Panel, Period, Categorical) -from pandas.compat import u, PY2, lrange +from pandas.compat import u, PY2, PY3, StringIO, lrange from pandas.types import inference from pandas.types.common import (is_timedelta64_dtype, is_timedelta64_ns_dtype, @@ -78,6 +78,20 @@ def test_is_dict_like(): assert not inference.is_dict_like(f) +def test_is_file_like(): + is_file = inference.is_file_like + + data = StringIO("data") + assert is_file(data) + + data = [1, 2, 3] + assert not is_file(data) + + if PY3: + from unittest import mock + assert not is_file(mock.Mock()) + + def test_is_named_tuple(): passes = (collections.namedtuple('Test', list('abc'))(1, 2, 3), ) fails = ((1, 2, 3), 'a', Series({'pi': 3.14})) diff --git a/pandas/types/api.py b/pandas/types/api.py index c809cb3614a8c..e78514ce77822 100644 --- a/pandas/types/api.py +++ b/pandas/types/api.py @@ -52,6 +52,7 @@ is_re_compilable, is_dict_like, is_iterator, + is_file_like, is_list_like, is_hashable, is_named_tuple, diff --git a/pandas/types/inference.py b/pandas/types/inference.py index d8e3b3ee7329b..91418677c6b19 100644 --- a/pandas/types/inference.py +++ b/pandas/types/inference.py @@ -4,7 +4,7 @@ import re import numpy as np from numbers import Number -from pandas.compat import (string_types, text_type, +from pandas.compat import (PY2, string_types, text_type, string_and_binary_types) from pandas._libs import lib @@ -22,28 +22,211 @@ def is_number(obj): + """ + Check if the object is a number. + + Parameters + ---------- + obj : The object to check. + + Returns + ------- + is_number : bool + Whether `obj` is a number or not. + + Examples + -------- + >>> is_number(1) + True + >>> is_number("foo") + False + """ + return isinstance(obj, (Number, np.number)) def is_string_like(obj): + """ + Check if the object is a string. + + Parameters + ---------- + obj : The object to check. + + Examples + -------- + >>> is_string_like("foo") + True + >>> is_string_like(1) + False + + Returns + ------- + is_str_like : bool + Whether `obj` is a string or not. + """ + return isinstance(obj, (text_type, string_types)) -def _iterable_not_string(x): - return (isinstance(x, collections.Iterable) and - not isinstance(x, string_types)) +def _iterable_not_string(obj): + """ + Check if the object is an iterable but not a string. + + Parameters + ---------- + obj : The object to check. + + Returns + ------- + is_iter_not_string : bool + Whether `obj` is a non-string iterable. + + Examples + -------- + >>> _iterable_not_string([1, 2, 3]) + True + >>> _iterable_not_string("foo") + False + >>> _iterable_not_string(1) + False + """ + + return (isinstance(obj, collections.Iterable) and + not isinstance(obj, string_types)) def is_iterator(obj): - # python 3 generators have __next__ instead of next - return hasattr(obj, 'next') or hasattr(obj, '__next__') + """ + Check if the object is an iterator. + + For example, lists are considered iterators + but not strings or datetime objects. + + Parameters + ---------- + obj : The object to check. + + Returns + ------- + is_iter : bool + Whether `obj` is an iterator. + + Examples + -------- + >>> is_iterator([1, 2, 3]) + True + >>> is_iterator(datetime(2017, 1, 1)) + False + >>> is_iterator("foo") + False + >>> is_iterator(1) + False + """ + + if not hasattr(obj, '__iter__'): + return False + + if PY2: + return hasattr(obj, 'next') + else: + # Python 3 generators have + # __next__ instead of next + return hasattr(obj, '__next__') + + +def is_file_like(obj): + """ + Check if the object is a file-like object. + + For objects to be considered file-like, they must + be an iterator AND have the following four methods: + + 1) read + 2) write + 3) seek + 4) tell + + Note: file-like objects must be iterable, but + iterable objects need not be file-like. + + .. versionadded:: 0.20.0 + + Parameters + ---------- + obj : The object to check. + + Returns + ------- + is_file_like : bool + Whether `obj` has file-like properties. + + Examples + -------- + >>> buffer(StringIO("data")) + >>> is_file_like(buffer) + True + >>> is_file_like([1, 2, 3]) + False + """ + + file_attrs = ('read', 'write', 'seek', 'tell') + + for attr in file_attrs: + if not hasattr(obj, attr): + return False + + if not is_iterator(obj): + return False + + return True def is_re(obj): + """ + Check if the object is a regex pattern instance. + + Parameters + ---------- + obj : The object to check. + + Returns + ------- + is_regex : bool + Whether `obj` is a regex pattern. + + Examples + -------- + >>> is_re(re.compile(".*")) + True + >>> is_re("foo") + False + """ + return isinstance(obj, re._pattern_type) def is_re_compilable(obj): + """ + Check if the object can be compiled into a regex pattern instance. + + Parameters + ---------- + obj : The object to check. + + Returns + ------- + is_regex_compilable : bool + Whether `obj` can be compiled as a regex pattern. + + Examples + -------- + >>> is_re_compilable(".*") + True + >>> is_re_compilable(1) + False + """ + try: re.compile(obj) except TypeError: @@ -52,21 +235,95 @@ def is_re_compilable(obj): return True -def is_list_like(arg): - return (hasattr(arg, '__iter__') and - not isinstance(arg, string_and_binary_types)) +def is_list_like(obj): + """ + Check if the object is list-like. + + Objects that are considered list-like are for example Python + lists, tuples, sets, NumPy arrays, and Pandas Series. + + Strings and datetime objects, however, are not considered list-like. + + Parameters + ---------- + obj : The object to check. + + Returns + ------- + is_list_like : bool + Whether `obj` has list-like properties. + + Examples + -------- + >>> is_list_like([1, 2, 3]) + True + >>> is_list_like({1, 2, 3}) + True + >>> is_list_like(datetime(2017, 1, 1)) + False + >>> is_list_like("foo") + False + >>> is_list_like(1) + False + """ + + return (hasattr(obj, '__iter__') and + not isinstance(obj, string_and_binary_types)) + +def is_dict_like(obj): + """ + Check if the object is dict-like. -def is_dict_like(arg): - return hasattr(arg, '__getitem__') and hasattr(arg, 'keys') + Parameters + ---------- + obj : The object to check. + Returns + ------- + is_dict_like : bool + Whether `obj` has dict-like properties. -def is_named_tuple(arg): - return isinstance(arg, tuple) and hasattr(arg, '_fields') + Examples + -------- + >>> is_dict_like({1: 2}) + True + >>> is_dict_like([1, 2, 3]) + False + """ + + return hasattr(obj, '__getitem__') and hasattr(obj, 'keys') + + +def is_named_tuple(obj): + """ + Check if the object is a named tuple. + Parameters + ---------- + obj : The object to check. -def is_hashable(arg): - """Return True if hash(arg) will succeed, False otherwise. + Returns + ------- + is_named_tuple : bool + Whether `obj` is a named tuple. + + Examples + -------- + >>> Point = namedtuple("Point", ["x", "y"]) + >>> p = Point(1, 2) + >>> + >>> is_named_tuple(p) + True + >>> is_named_tuple((1, 2)) + False + """ + + return isinstance(obj, tuple) and hasattr(obj, '_fields') + + +def is_hashable(obj): + """Return True if hash(obj) will succeed, False otherwise. Some types will pass a test against collections.Hashable but fail when they are actually hashed with hash(). @@ -82,25 +339,48 @@ def is_hashable(arg): >>> is_hashable(a) False """ - # unfortunately, we can't use isinstance(arg, collections.Hashable), which - # can be faster than calling hash, because numpy scalars on Python 3 fail - # this test + # Unfortunately, we can't use isinstance(obj, collections.Hashable), which + # can be faster than calling hash. That is because numpy scalars on Python + # 3 fail this test. - # reconsider this decision once this numpy bug is fixed: + # Reconsider this decision once this numpy bug is fixed: # https://github.com/numpy/numpy/issues/5562 try: - hash(arg) + hash(obj) except TypeError: return False else: return True -def is_sequence(x): +def is_sequence(obj): + """ + Check if the object is a sequence of objects. + String types are not included as sequences here. + + Parameters + ---------- + obj : The object to check. + + Returns + ------- + is_sequence : bool + Whether `obj` is a sequence of objects. + + Examples + -------- + >>> l = [1, 2, 3] + >>> + >>> is_sequence(l) + True + >>> is_sequence(iter(l)) + False + """ + try: - iter(x) - len(x) # it has a length - return not isinstance(x, string_and_binary_types) + iter(obj) # Can iterate over it. + len(obj) # Has a length associated with it. + return not isinstance(obj, string_and_binary_types) except (TypeError, AttributeError): return False From ba30e3a2e376035549b009079d44ba5ca7a4c48f Mon Sep 17 00:00:00 2001 From: alexandercbooth Date: Wed, 5 Apr 2017 16:47:07 -0500 Subject: [PATCH 338/933] BUG: addresses #14855 by fixing color kwarg conflict - [x] closes #14855 - [x] tests passed - [x] passes ``git diff upstream/master | flake8 --diff`` Author: alexandercbooth This patch had conflicts when merged, resolved by Committer: Tom Augspurger Closes #14871 from alexandercbooth/fix-color-scatterm-bug and squashes the following commits: 3245f09b9 [alexandercbooth] DOC: moving whatsnew entry to 0.20.0 8ff5f51f1 [alexandercbooth] BUG: addresses #14855 by fixing color kwarg conflict --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/tests/plotting/test_misc.py | 6 ++++++ pandas/tools/plotting.py | 8 +++----- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index cbb4d32cc5edb..ad190671cbbdc 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -1056,6 +1056,7 @@ Plotting - Bug in ``DataFrame.hist`` where ``plt.tight_layout`` caused an ``AttributeError`` (use ``matplotlib >= 2.0.1``) (:issue:`9351`) - Bug in ``DataFrame.boxplot`` where ``fontsize`` was not applied to the tick labels on both axes (:issue:`15108`) +- Bug in ``pd.scatter_matrix()`` could accept either ``color`` or ``c``, but not both (:issue:`14855`) Groupby/Resample/Rolling ^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/pandas/tests/plotting/test_misc.py b/pandas/tests/plotting/test_misc.py index 11f00386ec592..812f039f1a2c7 100644 --- a/pandas/tests/plotting/test_misc.py +++ b/pandas/tests/plotting/test_misc.py @@ -76,6 +76,12 @@ def scat(**kwds): _check_plot_works(scat, diagonal='hist') with tm.assert_produces_warning(UserWarning): _check_plot_works(scat, range_padding=.1) + with tm.assert_produces_warning(UserWarning): + _check_plot_works(scat, color='rgb') + with tm.assert_produces_warning(UserWarning): + _check_plot_works(scat, c='rgb') + with tm.assert_produces_warning(UserWarning): + _check_plot_works(scat, facecolor='rgb') def scat2(x, y, by=None, ax=None, figsize=None): return plotting.scatter_plot(df, x, y, by, ax, figsize=None) diff --git a/pandas/tools/plotting.py b/pandas/tools/plotting.py index d311b0e6d83eb..f70a2b0b22140 100644 --- a/pandas/tools/plotting.py +++ b/pandas/tools/plotting.py @@ -349,7 +349,6 @@ def scatter_matrix(frame, alpha=0.5, figsize=None, ax=None, grid=False, >>> df = DataFrame(np.random.randn(1000, 4), columns=['A','B','C','D']) >>> scatter_matrix(df, alpha=0.2) """ - import matplotlib.pyplot as plt df = frame._get_numeric_data() n = df.columns.size @@ -367,8 +366,8 @@ def scatter_matrix(frame, alpha=0.5, figsize=None, ax=None, grid=False, hist_kwds = hist_kwds or {} density_kwds = density_kwds or {} - # workaround because `c='b'` is hardcoded in matplotlibs scatter method - kwds.setdefault('c', plt.rcParams['patch.facecolor']) + # GH 14855 + kwds.setdefault('edgecolors', 'none') boundaries_list = [] for a in df.columns: @@ -2864,8 +2863,7 @@ def scatter_plot(data, x, y, by=None, ax=None, figsize=None, grid=False, """ import matplotlib.pyplot as plt - # workaround because `c='b'` is hardcoded in matplotlibs scatter method - kwargs.setdefault('c', plt.rcParams['patch.facecolor']) + kwargs.setdefault('edgecolors', 'none') def plot_group(group, ax): xvals = group[x].values From 1fbdc23def1cc91280c508ac5b7806ced579b264 Mon Sep 17 00:00:00 2001 From: Tong SHEN Date: Thu, 6 Apr 2017 15:07:58 +0800 Subject: [PATCH 339/933] DOC: Fix a typo in travis.yml (#15915) --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index d864b755541de..e5e05ed26da56 100644 --- a/.travis.yml +++ b/.travis.yml @@ -7,7 +7,7 @@ python: 3.5 # set NOCACHE-true # To delete caches go to https://travis-ci.org/OWNER/REPOSITORY/caches or run # travis cache --delete inside the project directory from the travis command line client -# The cash directories will be deleted if anything in ci/ changes in a commit +# The cache directories will be deleted if anything in ci/ changes in a commit cache: ccache: true directories: From b070d519c94bda36e116327b6cf854d8e9888308 Mon Sep 17 00:00:00 2001 From: Baurzhan Muftakhidinov Date: Thu, 6 Apr 2017 16:34:24 +0500 Subject: [PATCH 340/933] Fix a docstring typo in _fill_mi_header (#15918) [ci skip] --- pandas/io/excel.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/excel.py b/pandas/io/excel.py index 737141f11d7d1..7f2f0cf4943b8 100644 --- a/pandas/io/excel.py +++ b/pandas/io/excel.py @@ -571,7 +571,7 @@ def _fill_mi_header(row, control_row): ---------- row : list List of items in a single row. - constrol_row : list of boolean + control_row : list of boolean Helps to determine if particular column is in same parent index as the previous value. Used to stop propagation of empty cells between different indexes. From 763197c3422d46b8e4cc807d58a63c6be6a9a288 Mon Sep 17 00:00:00 2001 From: Tong SHEN Date: Thu, 6 Apr 2017 19:35:59 +0800 Subject: [PATCH 341/933] DOC: Fix a typo in indexing.rst (#15916) * DOC: Fix a typo in indexing.rst * more typos fixed --- doc/source/indexing.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/indexing.rst b/doc/source/indexing.rst index bc8997b313053..f988fb7cd6806 100644 --- a/doc/source/indexing.rst +++ b/doc/source/indexing.rst @@ -69,7 +69,7 @@ Different Choices for Indexing .. versionadded:: 0.11.0 Object selection has had a number of user-requested additions in order to -support more explicit location based indexing. pandas now supports three types +support more explicit location based indexing. Pandas now supports three types of multi-axis indexing. - ``.loc`` is primarily label based, but may also be used with a boolean array. ``.loc`` will raise ``KeyError`` when the items are not found. Allowed inputs are: @@ -401,7 +401,7 @@ Selection By Position This is sometimes called ``chained assignment`` and should be avoided. See :ref:`Returning a View versus Copy ` -pandas provides a suite of methods in order to get **purely integer based indexing**. The semantics follow closely python and numpy slicing. These are ``0-based`` indexing. When slicing, the start bounds is *included*, while the upper bound is *excluded*. Trying to use a non-integer, even a **valid** label will raise a ``IndexError``. +Pandas provides a suite of methods in order to get **purely integer based indexing**. The semantics follow closely python and numpy slicing. These are ``0-based`` indexing. When slicing, the start bounds is *included*, while the upper bound is *excluded*. Trying to use a non-integer, even a **valid** label will raise an ``IndexError``. The ``.iloc`` attribute is the primary access method. The following are valid inputs: From a0b089e1feee4e132d274271215d867295fc091a Mon Sep 17 00:00:00 2001 From: gfyoung Date: Thu, 6 Apr 2017 09:31:31 -0400 Subject: [PATCH 342/933] BUG: Standardize malformed row handling in Python engine (#15913) Closes gh-15910. --- doc/source/whatsnew/v0.20.0.txt | 4 +- pandas/io/parsers.py | 87 +++++++++++--------- pandas/tests/io/parser/c_parser_only.py | 9 ++ pandas/tests/io/parser/python_parser_only.py | 18 ++-- 4 files changed, 72 insertions(+), 46 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index ad190671cbbdc..462341d3d692d 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -365,6 +365,7 @@ Other Enhancements - ``pandas.io.json.json_normalize()`` gained the option ``errors='ignore'|'raise'``; the default is ``errors='raise'`` which is backward compatible. (:issue:`14583`) - ``pandas.io.json.json_normalize()`` with an empty ``list`` will return an empty ``DataFrame`` (:issue:`15534`) - ``pandas.io.json.json_normalize()`` has gained a ``sep`` option that accepts ``str`` to separate joined fields; the default is ".", which is backward compatible. (:issue:`14883`) +- ``pd.read_csv()`` will now raise a ``csv.Error`` error whenever an end-of-file character is encountered in the middle of a data row (:issue:`15913`) .. _ISO 8601 duration: https://en.wikipedia.org/wiki/ISO_8601#Durations @@ -1034,7 +1035,8 @@ I/O - Bug in ``pd.read_csv()`` with ``float_precision='round_trip'`` which caused a segfault when a text entry is parsed (:issue:`15140`) - Bug in ``pd.read_csv()`` when an index was specified and no values were specified as null values (:issue:`15835`) - Bug in ``pd.read_csv()`` in which certain invalid file objects caused the Python interpreter to crash (:issue:`15337`) -- Added checks in ``pd.read_csv()`` ensuring that values for ``nrows`` and ``chunksize`` are valid (:issue:`15767`) +- Bug in ``pd.read_csv()`` in which invalid values for ``nrows`` and ``chunksize`` were allowed (:issue:`15767`) +- Bug in ``pd.read_csv()`` for the Python engine in which unhelpful error messages were being raised when parsing errors occurred (:issue:`15910`) - Bug in ``pd.tools.hashing.hash_pandas_object()`` in which hashing of categoricals depended on the ordering of categories, instead of just their values. (:issue:`15143`) - Bug in ``.to_json()`` where ``lines=True`` and contents (keys or values) contain escaped characters (:issue:`15096`) - Bug in ``.to_json()`` causing single byte ascii characters to be expanded to four byte unicode (:issue:`15344`) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index b624d2cc0c7ad..a85f9cda50879 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -2469,26 +2469,7 @@ def _next_line(self): next(self.data) while True: - try: - orig_line = next(self.data) - except csv.Error as e: - msg = str(e) - - if 'NULL byte' in str(e): - msg = ('NULL byte detected. This byte ' - 'cannot be processed in Python\'s ' - 'native csv library at the moment, ' - 'so please pass in engine=\'c\' instead') - - if self.skipfooter > 0: - reason = ('Error could possibly be due to ' - 'parsing errors in the skipped footer rows ' - '(the skipfooter keyword is only applied ' - 'after Python\'s csv library has parsed ' - 'all rows).') - msg += '. ' + reason - - raise csv.Error(msg) + orig_line = self._next_iter_line() line = self._check_comments([orig_line])[0] self.pos += 1 if (not self.skip_blank_lines and @@ -2510,6 +2491,43 @@ def _next_line(self): self.buf.append(line) return line + def _next_iter_line(self, **kwargs): + """ + Wrapper around iterating through `self.data` (CSV source). + + When a CSV error is raised, we check for specific + error messages that allow us to customize the + error message displayed to the user. + + Parameters + ---------- + kwargs : Keyword arguments used to customize the error message. + """ + + try: + return next(self.data) + except csv.Error as e: + msg = str(e) + + if 'NULL byte' in msg: + msg = ('NULL byte detected. This byte ' + 'cannot be processed in Python\'s ' + 'native csv library at the moment, ' + 'so please pass in engine=\'c\' instead') + elif 'newline inside string' in msg: + msg = ('EOF inside string starting with ' + 'line ' + str(kwargs['row_num'])) + + if self.skipfooter > 0: + reason = ('Error could possibly be due to ' + 'parsing errors in the skipped footer rows ' + '(the skipfooter keyword is only applied ' + 'after Python\'s csv library has parsed ' + 'all rows).') + msg += '. ' + reason + + raise csv.Error(msg) + def _check_comments(self, lines): if self.comment is None: return lines @@ -2688,7 +2706,6 @@ def _rows_to_cols(self, content): return zipped_content def _get_lines(self, rows=None): - source = self.data lines = self.buf new_rows = None @@ -2703,14 +2720,14 @@ def _get_lines(self, rows=None): rows -= len(self.buf) if new_rows is None: - if isinstance(source, list): - if self.pos > len(source): + if isinstance(self.data, list): + if self.pos > len(self.data): raise StopIteration if rows is None: - new_rows = source[self.pos:] - new_pos = len(source) + new_rows = self.data[self.pos:] + new_pos = len(self.data) else: - new_rows = source[self.pos:self.pos + rows] + new_rows = self.data[self.pos:self.pos + rows] new_pos = self.pos + rows # Check for stop rows. n.b.: self.skiprows is a set. @@ -2726,21 +2743,17 @@ def _get_lines(self, rows=None): try: if rows is not None: for _ in range(rows): - new_rows.append(next(source)) + new_rows.append(next(self.data)) lines.extend(new_rows) else: rows = 0 + while True: - try: - new_rows.append(next(source)) - rows += 1 - except csv.Error as inst: - if 'newline inside string' in str(inst): - row_num = str(self.pos + rows) - msg = ('EOF inside string starting with ' - 'line ' + row_num) - raise Exception(msg) - raise + new_row = self._next_iter_line( + row_num=self.pos + rows) + new_rows.append(new_row) + rows += 1 + except StopIteration: if self.skiprows: new_rows = [row for i, row in enumerate(new_rows) diff --git a/pandas/tests/io/parser/c_parser_only.py b/pandas/tests/io/parser/c_parser_only.py index ffbd904843bfc..837b7a7922d75 100644 --- a/pandas/tests/io/parser/c_parser_only.py +++ b/pandas/tests/io/parser/c_parser_only.py @@ -408,3 +408,12 @@ def test_large_difference_in_columns(self): expected = DataFrame([row.split(',')[0] for row in rows]) tm.assert_frame_equal(result, expected) + + def test_data_after_quote(self): + # see gh-15910 + + data = 'a\n1\n"b"a' + result = self.read_csv(StringIO(data)) + expected = DataFrame({'a': ['1', 'ba']}) + + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/io/parser/python_parser_only.py b/pandas/tests/io/parser/python_parser_only.py index bd76070933c47..36356315419c4 100644 --- a/pandas/tests/io/parser/python_parser_only.py +++ b/pandas/tests/io/parser/python_parser_only.py @@ -225,15 +225,17 @@ def test_multi_char_sep_quotes(self): def test_skipfooter_bad_row(self): # see gh-13879 + # see gh-15910 - data = 'a,b,c\ncat,foo,bar\ndog,foo,"baz' msg = 'parsing errors in the skipped footer rows' - with tm.assertRaisesRegexp(csv.Error, msg): - self.read_csv(StringIO(data), skipfooter=1) - - # We expect no match, so there should be an assertion - # error out of the inner context manager. - with tm.assertRaises(AssertionError): + for data in ('a\n1\n"b"a', + 'a,b,c\ncat,foo,bar\ndog,foo,"baz'): with tm.assertRaisesRegexp(csv.Error, msg): - self.read_csv(StringIO(data)) + self.read_csv(StringIO(data), skipfooter=1) + + # We expect no match, so there should be an assertion + # error out of the inner context manager. + with tm.assertRaises(AssertionError): + with tm.assertRaisesRegexp(csv.Error, msg): + self.read_csv(StringIO(data)) From c1122523ede85340b042b83b629731db8176378f Mon Sep 17 00:00:00 2001 From: Roger Thomas Date: Thu, 6 Apr 2017 09:38:11 -0400 Subject: [PATCH 343/933] BUG: Fix nsmallest/nlargest With Identical Values closes #15297 Author: Roger Thomas Closes #15299 from RogerThomas/fix_nsmallest_nlargest_with_n_identical_values and squashes the following commits: d3964f8 [Roger Thomas] Fix nsmallest/nlargest With Identical Values --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/core/algorithms.py | 75 +++++++++- pandas/tests/frame/test_analytics.py | 199 ++++++++++++++++++--------- 3 files changed, 200 insertions(+), 75 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 462341d3d692d..cb9e2496757ef 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -1097,6 +1097,7 @@ Reshaping - Bug in ``pd.pivot_table()`` where no error was raised when values argument was not in the columns (:issue:`14938`) - Bug in ``pd.concat()`` in which concatting with an empty dataframe with ``join='inner'`` was being improperly handled (:issue:`15328`) - Bug with ``sort=True`` in ``DataFrame.join`` and ``pd.merge`` when joining on indexes (:issue:`15582`) +- Bug in ``DataFrame.nsmallest`` and ``DataFrame.nlargest`` where identical values resulted in duplicated rows (:issue:`15297`) Numeric ^^^^^^^ diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 99ef76e0f4812..80664a9ba3019 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -931,6 +931,15 @@ def select_n_slow(dropped, n, keep, method): _select_methods = {'nsmallest': nsmallest, 'nlargest': nlargest} +def _is_valid_dtype_n_method(dtype): + """ + Helper function to determine if dtype is valid for + nsmallest/nlargest methods + """ + return ((is_numeric_dtype(dtype) and not is_complex_dtype(dtype)) or + needs_i8_conversion(dtype)) + + def select_n_series(series, n, keep, method): """Implement n largest/smallest for pandas Series @@ -946,8 +955,7 @@ def select_n_series(series, n, keep, method): nordered : Series """ dtype = series.dtype - if not ((is_numeric_dtype(dtype) and not is_complex_dtype(dtype)) or - needs_i8_conversion(dtype)): + if not _is_valid_dtype_n_method(dtype): raise TypeError("Cannot use method '{method}' with " "dtype {dtype}".format(method=method, dtype=dtype)) @@ -981,14 +989,67 @@ def select_n_frame(frame, columns, n, method, keep): ------- nordered : DataFrame """ - from pandas.core.series import Series + from pandas import Int64Index if not is_list_like(columns): columns = [columns] columns = list(columns) - ser = getattr(frame[columns[0]], method)(n, keep=keep) - if isinstance(ser, Series): - ser = ser.to_frame() - return ser.merge(frame, on=columns[0], left_index=True)[frame.columns] + for column in columns: + dtype = frame[column].dtype + if not _is_valid_dtype_n_method(dtype): + raise TypeError(( + "Column {column!r} has dtype {dtype}, cannot use method " + "{method!r} with this dtype" + ).format(column=column, dtype=dtype, method=method)) + + def get_indexer(current_indexer, other_indexer): + """Helper function to concat `current_indexer` and `other_indexer` + depending on `method` + """ + if method == 'nsmallest': + return current_indexer.append(other_indexer) + else: + return other_indexer.append(current_indexer) + + # Below we save and reset the index in case index contains duplicates + original_index = frame.index + cur_frame = frame = frame.reset_index(drop=True) + cur_n = n + indexer = Int64Index([]) + + for i, column in enumerate(columns): + + # For each column we apply method to cur_frame[column]. If it is the + # last column in columns, or if the values returned are unique in + # frame[column] we save this index and break + # Otherwise we must save the index of the non duplicated values + # and set the next cur_frame to cur_frame filtered on all duplcicated + # values (#GH15297) + series = cur_frame[column] + values = getattr(series, method)(cur_n, keep=keep) + is_last_column = len(columns) - 1 == i + if is_last_column or values.nunique() == series.isin(values).sum(): + + # Last column in columns or values are unique in series => values + # is all that matters + indexer = get_indexer(indexer, values.index) + break + + duplicated_filter = series.duplicated(keep=False) + duplicated = values[duplicated_filter] + non_duplicated = values[~duplicated_filter] + indexer = get_indexer(indexer, non_duplicated.index) + + # Must set cur frame to include all duplicated values to consider for + # the next column, we also can reduce cur_n by the current length of + # the indexer + cur_frame = cur_frame[series.isin(duplicated)] + cur_n = n - len(indexer) + + frame = frame.take(indexer) + + # Restore the index on frame + frame.index = original_index.take(indexer) + return frame def _finalize_nsmallest(arr, kth_val, n, keep, narr): diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index aa15e9fbab4cc..dda52bbc536c9 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -7,11 +7,12 @@ import sys import pytest +from string import ascii_lowercase from numpy import nan from numpy.random import randn import numpy as np -from pandas.compat import lrange +from pandas.compat import lrange, product from pandas import (compat, isnull, notnull, DataFrame, Series, MultiIndex, date_range, Timestamp) import pandas as pd @@ -1120,73 +1121,6 @@ def __nonzero__(self): self.assertTrue(r1.all()) # ---------------------------------------------------------------------- - # Top / bottom - - def test_nlargest(self): - # GH10393 - from string import ascii_lowercase - df = pd.DataFrame({'a': np.random.permutation(10), - 'b': list(ascii_lowercase[:10])}) - result = df.nlargest(5, 'a') - expected = df.sort_values('a', ascending=False).head(5) - tm.assert_frame_equal(result, expected) - - def test_nlargest_multiple_columns(self): - from string import ascii_lowercase - df = pd.DataFrame({'a': np.random.permutation(10), - 'b': list(ascii_lowercase[:10]), - 'c': np.random.permutation(10).astype('float64')}) - result = df.nlargest(5, ['a', 'b']) - expected = df.sort_values(['a', 'b'], ascending=False).head(5) - tm.assert_frame_equal(result, expected) - - def test_nsmallest(self): - from string import ascii_lowercase - df = pd.DataFrame({'a': np.random.permutation(10), - 'b': list(ascii_lowercase[:10])}) - result = df.nsmallest(5, 'a') - expected = df.sort_values('a').head(5) - tm.assert_frame_equal(result, expected) - - def test_nsmallest_multiple_columns(self): - from string import ascii_lowercase - df = pd.DataFrame({'a': np.random.permutation(10), - 'b': list(ascii_lowercase[:10]), - 'c': np.random.permutation(10).astype('float64')}) - result = df.nsmallest(5, ['a', 'c']) - expected = df.sort_values(['a', 'c']).head(5) - tm.assert_frame_equal(result, expected) - - def test_nsmallest_nlargest_duplicate_index(self): - # GH 13412 - df = pd.DataFrame({'a': [1, 2, 3, 4], - 'b': [4, 3, 2, 1], - 'c': [0, 1, 2, 3]}, - index=[0, 0, 1, 1]) - result = df.nsmallest(4, 'a') - expected = df.sort_values('a').head(4) - tm.assert_frame_equal(result, expected) - - result = df.nlargest(4, 'a') - expected = df.sort_values('a', ascending=False).head(4) - tm.assert_frame_equal(result, expected) - - result = df.nsmallest(4, ['a', 'c']) - expected = df.sort_values(['a', 'c']).head(4) - tm.assert_frame_equal(result, expected) - - result = df.nsmallest(4, ['c', 'a']) - expected = df.sort_values(['c', 'a']).head(4) - tm.assert_frame_equal(result, expected) - - result = df.nlargest(4, ['a', 'c']) - expected = df.sort_values(['a', 'c'], ascending=False).head(4) - tm.assert_frame_equal(result, expected) - - result = df.nlargest(4, ['c', 'a']) - expected = df.sort_values(['c', 'a'], ascending=False).head(4) - tm.assert_frame_equal(result, expected) - # ---------------------------------------------------------------------- # Isin def test_isin(self): @@ -1965,3 +1899,132 @@ def test_dot(self): with tm.assertRaisesRegexp(ValueError, 'aligned'): df.dot(df2) + + +@pytest.fixture +def df_duplicates(): + return pd.DataFrame({'a': [1, 2, 3, 4, 4], + 'b': [1, 1, 1, 1, 1], + 'c': [0, 1, 2, 5, 4]}, + index=[0, 0, 1, 1, 1]) + + +@pytest.fixture +def df_strings(): + return pd.DataFrame({'a': np.random.permutation(10), + 'b': list(ascii_lowercase[:10]), + 'c': np.random.permutation(10).astype('float64')}) + + +@pytest.fixture +def df_main_dtypes(): + return pd.DataFrame( + {'group': [1, 1, 2], + 'int': [1, 2, 3], + 'float': [4., 5., 6.], + 'string': list('abc'), + 'category_string': pd.Series(list('abc')).astype('category'), + 'category_int': [7, 8, 9], + 'datetime': pd.date_range('20130101', periods=3), + 'datetimetz': pd.date_range('20130101', + periods=3, + tz='US/Eastern'), + 'timedelta': pd.timedelta_range('1 s', periods=3, freq='s')}, + columns=['group', 'int', 'float', 'string', + 'category_string', 'category_int', + 'datetime', 'datetimetz', + 'timedelta']) + + +class TestNLargestNSmallest(object): + + dtype_error_msg_template = ("Column {column!r} has dtype {dtype}, cannot " + "use method {method!r} with this dtype") + + # ---------------------------------------------------------------------- + # Top / bottom + @pytest.mark.parametrize( + 'method, n, order', + product(['nsmallest', 'nlargest'], range(1, 11), + [['a'], + ['c'], + ['a', 'b'], + ['a', 'c'], + ['b', 'a'], + ['b', 'c'], + ['a', 'b', 'c'], + ['c', 'a', 'b'], + ['c', 'b', 'a'], + ['b', 'c', 'a'], + ['b', 'a', 'c'], + + # dups! + ['b', 'c', 'c'], + + ])) + def test_n(self, df_strings, method, n, order): + # GH10393 + df = df_strings + if 'b' in order: + + error_msg = self.dtype_error_msg_template.format( + column='b', method=method, dtype='object') + with tm.assertRaisesRegexp(TypeError, error_msg): + getattr(df, method)(n, order) + else: + ascending = method == 'nsmallest' + result = getattr(df, method)(n, order) + expected = df.sort_values(order, ascending=ascending).head(n) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + 'method, columns', + product(['nsmallest', 'nlargest'], + product(['group'], ['category_string', 'string']) + )) + def test_n_error(self, df_main_dtypes, method, columns): + df = df_main_dtypes + error_msg = self.dtype_error_msg_template.format( + column=columns[1], method=method, dtype=df[columns[1]].dtype) + with tm.assertRaisesRegexp(TypeError, error_msg): + getattr(df, method)(2, columns) + + def test_n_all_dtypes(self, df_main_dtypes): + df = df_main_dtypes + df.nsmallest(2, list(set(df) - {'category_string', 'string'})) + df.nlargest(2, list(set(df) - {'category_string', 'string'})) + + def test_n_identical_values(self): + # GH15297 + df = pd.DataFrame({'a': [1] * 5, 'b': [1, 2, 3, 4, 5]}) + + result = df.nlargest(3, 'a') + expected = pd.DataFrame( + {'a': [1] * 3, 'b': [1, 2, 3]}, index=[0, 1, 2] + ) + tm.assert_frame_equal(result, expected) + + result = df.nsmallest(3, 'a') + expected = pd.DataFrame({'a': [1] * 3, 'b': [1, 2, 3]}) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + 'n, order', + product([1, 2, 3, 4, 5], + [['a', 'b', 'c'], + ['c', 'b', 'a'], + ['a'], + ['b'], + ['a', 'b'], + ['c', 'b']])) + def test_n_duplicate_index(self, df_duplicates, n, order): + # GH 13412 + + df = df_duplicates + result = df.nsmallest(n, order) + expected = df.sort_values(order).head(n) + tm.assert_frame_equal(result, expected) + + result = df.nlargest(n, order) + expected = df.sort_values(order, ascending=False).head(n) + tm.assert_frame_equal(result, expected) From 4502e82083f4e253630588665a4fc6002c4f32ed Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Thu, 6 Apr 2017 12:41:07 -0400 Subject: [PATCH 344/933] TST: skip decimal conversion tests on 32-bit (#15922) xref #15865 --- pandas/tests/io/json/test_pandas.py | 5 ++++- pandas/tests/io/json/test_ujson.py | 26 ++++++-------------------- 2 files changed, 10 insertions(+), 21 deletions(-) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 8fc8ecbdf8abc..a24e8cdaf0273 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -1,7 +1,8 @@ # -*- coding: utf-8 -*- # pylint: disable-msg=W0612,E1101 import pytest -from pandas.compat import range, lrange, StringIO, OrderedDict +from pandas.compat import (range, lrange, StringIO, + OrderedDict, is_platform_32bit) import os import numpy as np @@ -380,6 +381,8 @@ def test_frame_from_json_nones(self): unser = read_json(df.to_json(), dtype=False) self.assertTrue(np.isnan(unser[2][0])) + @pytest.mark.skipif(is_platform_32bit(), + reason="not compliant on 32-bit, xref #15865") def test_frame_to_json_float_precision(self): df = pd.DataFrame([dict(a_float=0.95)]) encoded = df.to_json(double_precision=1) diff --git a/pandas/tests/io/json/test_ujson.py b/pandas/tests/io/json/test_ujson.py index c2cbbe1ca65ab..dcfa939f84d7e 100644 --- a/pandas/tests/io/json/test_ujson.py +++ b/pandas/tests/io/json/test_ujson.py @@ -8,8 +8,6 @@ import simplejson as json import math import pytest -import platform -import sys import time import datetime import calendar @@ -25,18 +23,14 @@ import pandas.util.testing as tm -def _skip_if_python_ver(skip_major, skip_minor=None): - major, minor = sys.version_info[:2] - if major == skip_major and (skip_minor is None or minor == skip_minor): - pytest.skip("skipping Python version %d.%d" % (major, minor)) - - json_unicode = (json.dumps if compat.PY3 else partial(json.dumps, encoding="utf-8")) class UltraJSONTests(TestCase): + @pytest.mark.skipif(compat.is_platform_32bit(), + reason="not compliant on 32-bit, xref #15865") def test_encodeDecimal(self): sut = decimal.Decimal("1337.1337") encoded = ujson.encode(sut, double_precision=15) @@ -153,10 +147,9 @@ def test_decimalDecodeTestPrecise(self): decoded = ujson.decode(encoded, precise_float=True) self.assertEqual(sut, decoded) + @pytest.mark.skipif(compat.is_platform_windows() and not compat.PY3, + reason="buggy on win-64 for py2") def test_encodeDoubleTinyExponential(self): - if compat.is_platform_windows() and not compat.PY3: - pytest.skip("buggy on win-64 for py2") - num = 1e-40 self.assertEqual(num, ujson.decode(ujson.encode(num))) num = 1e-100 @@ -275,8 +268,6 @@ def test_encodeUnicodeConversion2(self): self.assertEqual(dec, json.loads(enc)) def test_encodeUnicodeSurrogatePair(self): - _skip_if_python_ver(2, 5) - _skip_if_python_ver(2, 6) input = "\xf0\x90\x8d\x86" enc = ujson.encode(input) dec = ujson.decode(enc) @@ -285,8 +276,6 @@ def test_encodeUnicodeSurrogatePair(self): self.assertEqual(dec, json.loads(enc)) def test_encodeUnicode4BytesUTF8(self): - _skip_if_python_ver(2, 5) - _skip_if_python_ver(2, 6) input = "\xf0\x91\x80\xb0TRAILINGNORMAL" enc = ujson.encode(input) dec = ujson.decode(enc) @@ -295,8 +284,6 @@ def test_encodeUnicode4BytesUTF8(self): self.assertEqual(dec, json.loads(enc)) def test_encodeUnicode4BytesUTF8Highest(self): - _skip_if_python_ver(2, 5) - _skip_if_python_ver(2, 6) input = "\xf3\xbf\xbf\xbfTRAILINGNORMAL" enc = ujson.encode(input) @@ -462,7 +449,6 @@ def test_datetime_units(self): self.assertRaises(ValueError, ujson.encode, val, date_unit='foo') def test_encodeToUTF8(self): - _skip_if_python_ver(2, 5) input = "\xe6\x97\xa5\xd1\x88" enc = ujson.encode(input, ensure_ascii=False) dec = ujson.decode(enc) @@ -696,8 +682,8 @@ def test_decodeNumericIntNeg(self): input = "-31337" self.assertEqual(-31337, ujson.decode(input)) + @pytest.mark.skipif(compat.PY3, reason="only PY2") def test_encodeUnicode4BytesUTF8Fail(self): - _skip_if_python_ver(3) input = "\xfd\xbf\xbf\xbf\xbf\xbf" try: enc = ujson.encode(input) # noqa @@ -1029,7 +1015,7 @@ def testIntMax(self): num = np.uint32(np.iinfo(np.uint32).max) self.assertEqual(np.uint32(ujson.decode(ujson.encode(num))), num) - if platform.architecture()[0] != '32bit': + if not compat.is_platform_32bit(): num = np.int64(np.iinfo(np.int64).max) self.assertEqual(np.int64(ujson.decode(ujson.encode(num))), num) From 0cfc08cf4584e8442c84c30d53f1dceafeac5abf Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Thu, 6 Apr 2017 20:16:55 -0400 Subject: [PATCH 345/933] CLN: algos (#15929) * CLN: clean up select_n algos * CLN: clean ensure_data closes #15903 * return ndtype, so can eliminate special cases * unique * fixups --- pandas/core/algorithms.py | 942 ++++++++++++++---------------- pandas/core/frame.py | 10 +- pandas/core/series.py | 6 +- pandas/tests/test_algos.py | 21 +- pandas/tests/types/test_dtypes.py | 1 + pandas/types/common.py | 2 + pandas/types/dtypes.py | 2 + 7 files changed, 471 insertions(+), 513 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 80664a9ba3019..244f882f2c103 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -8,30 +8,22 @@ from pandas import compat, _np_version_under1p8 from pandas.types.cast import maybe_promote -from pandas.types.generic import ABCSeries, ABCIndex -from pandas.types.common import (is_unsigned_integer_dtype, - is_signed_integer_dtype, - is_integer_dtype, - is_complex_dtype, - is_categorical_dtype, - is_extension_type, - is_datetimetz, - is_period_dtype, - is_period_arraylike, - is_numeric_dtype, - is_float_dtype, - is_bool_dtype, - needs_i8_conversion, - is_categorical, - is_datetime64_dtype, - is_timedelta64_dtype, - is_scalar, - _ensure_platform_int, - _ensure_object, - _ensure_float64, - _ensure_uint64, - _ensure_int64, - is_list_like) +from pandas.types.generic import (ABCSeries, ABCIndex, + ABCIndexClass, ABCCategorical) +from pandas.types.common import ( + is_unsigned_integer_dtype, is_signed_integer_dtype, + is_integer_dtype, is_complex_dtype, + is_categorical_dtype, is_sparse, + is_period_dtype, + is_numeric_dtype, is_float_dtype, + is_bool_dtype, needs_i8_conversion, + is_categorical, is_datetimetz, + is_datetime64_any_dtype, is_datetime64tz_dtype, + is_timedelta64_dtype, + is_scalar, is_list_like, + _ensure_platform_int, _ensure_object, + _ensure_float64, _ensure_uint64, + _ensure_int64) from pandas.compat.numpy import _np_version_under1p10 from pandas.types.missing import isnull @@ -45,40 +37,190 @@ # dtype access # # --------------- # -def _ensure_data_view(values): +def _ensure_data(values, dtype=None): """ - helper routine to ensure that our data is of the correct + routine to ensure that our data is of the correct input dtype for lower-level routines + This will coerce: + - ints -> int64 + - uint -> uint64 + - bool -> uint64 (TODO this should be uint8) + - datetimelike -> i8 + - datetime64tz -> i8 (in local tz) + - categorical -> codes + Parameters ---------- values : array-like + dtype : pandas_dtype, optional + coerce to this dtype + + Returns + ------- + (ndarray, pandas_dtype, algo dtype as a string) + """ - if needs_i8_conversion(values): - values = values.view(np.int64) - elif is_period_arraylike(values): - from pandas.tseries.period import PeriodIndex - values = PeriodIndex(values).asi8 - elif is_categorical_dtype(values): - values = values.values.codes - elif isinstance(values, (ABCSeries, ABCIndex)): - values = values.values - - if is_signed_integer_dtype(values): + if (needs_i8_conversion(values) or + is_period_dtype(dtype) or + is_datetime64_any_dtype(dtype) or + is_timedelta64_dtype(dtype)): + if is_period_dtype(values) or is_period_dtype(dtype): + from pandas import PeriodIndex + values = PeriodIndex(values) + dtype = values.dtype + elif is_timedelta64_dtype(values) or is_timedelta64_dtype(dtype): + from pandas import TimedeltaIndex + values = TimedeltaIndex(values) + dtype = values.dtype + else: + # Datetime + from pandas import DatetimeIndex + values = DatetimeIndex(values) + dtype = values.dtype + + return values.asi8, dtype, 'int64' + + elif is_categorical_dtype(values) or is_categorical_dtype(dtype): + values = getattr(values, 'values', values) + values = values.codes + dtype = 'category' + + # we are actually coercing to int64 + # until our algos suppport int* directly (not all do) values = _ensure_int64(values) - elif is_unsigned_integer_dtype(values): - values = _ensure_uint64(values) - elif is_complex_dtype(values): - values = _ensure_float64(values) - elif is_float_dtype(values): - values = _ensure_float64(values) - else: + + return values, dtype, 'int64' + + values = np.asarray(values) + + try: + if is_bool_dtype(values) or is_bool_dtype(dtype): + # we are actually coercing to uint64 + # until our algos suppport uint8 directly (see TODO) + values = values.astype('uint64') + dtype = 'bool' + ndtype = 'uint64' + elif is_signed_integer_dtype(values) or is_signed_integer_dtype(dtype): + values = _ensure_int64(values) + ndtype = dtype = 'int64' + elif (is_unsigned_integer_dtype(values) or + is_unsigned_integer_dtype(dtype)): + values = _ensure_uint64(values) + ndtype = dtype = 'uint64' + elif is_complex_dtype(values) or is_complex_dtype(dtype): + values = _ensure_float64(values) + ndtype = dtype = 'float64' + elif is_float_dtype(values) or is_float_dtype(dtype): + values = _ensure_float64(values) + ndtype = dtype = 'float64' + else: + values = _ensure_object(values) + ndtype = dtype = 'object' + + except (TypeError, ValueError): + # if we are trying to coerce to a dtype + # and it is incompat this will fall thru to here values = _ensure_object(values) + ndtype = dtype = 'object' + + return values, dtype, ndtype + + +def _reconstruct_data(values, dtype, original): + """ + reverse of _ensure_data + + Parameters + ---------- + values : ndarray + dtype : pandas_dtype + original : ndarray-like + + Returns + ------- + Index for extension types, otherwise ndarray casted to dtype + + """ + from pandas import Index + if is_categorical_dtype(dtype): + pass + elif is_datetime64tz_dtype(dtype) or is_period_dtype(dtype): + values = Index(original)._shallow_copy(values, name=None) + elif dtype is not None: + values = values.astype(dtype) return values +def _ensure_arraylike(values): + """ + ensure that we are arraylike if not already + """ + if not isinstance(values, (np.ndarray, ABCCategorical, + ABCIndexClass, ABCSeries)): + values = np.array(values) + return values + + +_hashtables = { + 'float64': (htable.Float64HashTable, htable.Float64Vector), + 'uint64': (htable.UInt64HashTable, htable.UInt64Vector), + 'int64': (htable.Int64HashTable, htable.Int64Vector), + 'string': (htable.StringHashTable, htable.ObjectVector), + 'object': (htable.PyObjectHashTable, htable.ObjectVector) +} + + +def _get_hashtable_algo(values): + """ + Parameters + ---------- + values : arraylike + + Returns + ------- + tuples(hashtable class, + vector class, + values, + dtype, + ndtype) + """ + values, dtype, ndtype = _ensure_data(values) + + if ndtype == 'object': + + # its cheaper to use a String Hash Table than Object + if lib.infer_dtype(values) in ['string']: + ndtype = 'string' + else: + ndtype = 'object' + + htable, table = _hashtables[ndtype] + return (htable, table, values, dtype, ndtype) + + +def _get_data_algo(values, func_map): + + if is_categorical_dtype(values): + values = values._values_for_rank() + + values, dtype, ndtype = _ensure_data(values) + if ndtype == 'object': + + # its cheaper to use a String Hash Table than Object + if lib.infer_dtype(values) in ['string']: + try: + f = func_map['string'] + except KeyError: + pass + + f = func_map.get(ndtype, func_map['object']) + + return f, values + + # --------------- # # top-level algos # # --------------- # @@ -104,92 +246,41 @@ def match(to_match, values, na_sentinel=-1): match : ndarray of integers """ values = com._asarray_tuplesafe(values) - if issubclass(values.dtype.type, string_types): - values = np.array(values, dtype='O') - - f = lambda htype, caster: _match_object(to_match, values, htype, caster) - result = _hashtable_algo(f, values, np.int64) + htable, _, values, dtype, ndtype = _get_hashtable_algo(values) + to_match, _, _ = _ensure_data(to_match, dtype) + table = htable(min(len(to_match), 1000000)) + table.map_locations(values) + result = table.lookup(to_match) if na_sentinel != -1: # replace but return a numpy array # use a Series because it handles dtype conversions properly - from pandas.core.series import Series + from pandas import Series result = Series(result.ravel()).replace(-1, na_sentinel).values.\ reshape(result.shape) return result -def _match_object(values, index, table_type, type_caster): - values = type_caster(values) - index = type_caster(index) - table = table_type(min(len(index), 1000000)) - table.map_locations(index) - return table.lookup(values) - - -def unique(values): - """ - Compute unique values (not necessarily sorted) efficiently from input array - of values - - Parameters - ---------- - values : array-like - - Returns - ------- - uniques - """ - values = com._asarray_tuplesafe(values) - - f = lambda htype, caster: _unique_object(values, htype, caster) - return _hashtable_algo(f, values) - - -def _unique_object(values, table_type, type_caster): - values = type_caster(values) - table = table_type(min(len(values), 1000000)) - uniques = table.unique(values) - return type_caster(uniques) - - def unique1d(values): """ Hash table-based unique """ - if np.issubdtype(values.dtype, np.floating): - table = htable.Float64HashTable(len(values)) - uniques = np.array(table.unique(_ensure_float64(values)), - dtype=np.float64) - elif np.issubdtype(values.dtype, np.datetime64): - table = htable.Int64HashTable(len(values)) - uniques = table.unique(_ensure_int64(values)) - uniques = uniques.view('M8[ns]') - elif np.issubdtype(values.dtype, np.timedelta64): - table = htable.Int64HashTable(len(values)) - uniques = table.unique(_ensure_int64(values)) - uniques = uniques.view('m8[ns]') - elif np.issubdtype(values.dtype, np.signedinteger): - table = htable.Int64HashTable(len(values)) - uniques = table.unique(_ensure_int64(values)) - elif np.issubdtype(values.dtype, np.unsignedinteger): - table = htable.UInt64HashTable(len(values)) - uniques = table.unique(_ensure_uint64(values)) - else: - - # its cheaper to use a String Hash Table than Object - if lib.infer_dtype(values) in ['string']: - table = htable.StringHashTable(len(values)) - else: - table = htable.PyObjectHashTable(len(values)) + values = _ensure_arraylike(values) + original = values + htable, _, values, dtype, ndtype = _get_hashtable_algo(values) - uniques = table.unique(_ensure_object(values)) + table = htable(len(values)) + uniques = table.unique(values) + uniques = _reconstruct_data(uniques, dtype, original) return uniques +unique = unique1d + + def isin(comps, values): """ Compute the isin boolean array @@ -213,38 +304,11 @@ def isin(comps, values): " to isin(), you passed a " "[{0}]".format(type(values).__name__)) - from pandas import DatetimeIndex, TimedeltaIndex, PeriodIndex - if not isinstance(values, (ABCIndex, ABCSeries, np.ndarray)): values = np.array(list(values), dtype='object') - if needs_i8_conversion(comps): - if is_period_dtype(values): - comps = PeriodIndex(comps) - values = PeriodIndex(values) - elif is_timedelta64_dtype(comps): - comps = TimedeltaIndex(comps) - values = TimedeltaIndex(values) - else: - comps = DatetimeIndex(comps) - values = DatetimeIndex(values) - - values = values.asi8 - comps = comps.asi8 - elif is_bool_dtype(comps): - - try: - comps = np.asarray(comps).view('uint8') - values = np.asarray(values).view('uint8') - except TypeError: - # object array conversion will fail - pass - elif is_numeric_dtype(comps): - comps = np.asarray(comps) - values = np.asarray(values) - else: - comps = np.asarray(comps).astype(object) - values = np.asarray(values).astype(object) + comps, dtype, _ = _ensure_data(comps) + values, _, _ = _ensure_data(values, dtype=dtype) # GH11232 # work-around for numpy < 1.8 and comparisions on py3 @@ -396,53 +460,32 @@ def factorize(values, sort=False, order=None, na_sentinel=-1, size_hint=None): note: an array of Periods will ignore sort as it returns an always sorted PeriodIndex """ - from pandas import Index, Series, DatetimeIndex, PeriodIndex - - # handling possibilities here - # - for a numpy datetimelike simply view as i8 then cast back - # - bool handled as uint8 then cast back - # - for an extension datetimelike view as i8 then - # reconstruct from boxed values to transfer metadata - dtype = None - if needs_i8_conversion(values): - if is_period_dtype(values): - values = PeriodIndex(values) - vals = values.asi8 - elif is_datetimetz(values): - values = DatetimeIndex(values) - vals = values.asi8 - else: - # numpy dtype - dtype = values.dtype - vals = values.view(np.int64) - elif is_bool_dtype(values): - dtype = bool - vals = np.asarray(values).view('uint8') - else: - vals = np.asarray(values) - (hash_klass, vec_klass), vals = _get_data_algo(vals, _hashtables) + original = values + values, dtype, _ = _ensure_data(values) + (hash_klass, vec_klass), values = _get_data_algo(values, _hashtables) - table = hash_klass(size_hint or len(vals)) + table = hash_klass(size_hint or len(values)) uniques = vec_klass() - check_nulls = not is_integer_dtype(values) - labels = table.get_labels(vals, uniques, 0, na_sentinel, check_nulls) + check_nulls = not is_integer_dtype(original) + labels = table.get_labels(values, uniques, 0, na_sentinel, check_nulls) labels = _ensure_platform_int(labels) - uniques = uniques.to_array() if sort and len(uniques) > 0: uniques, labels = safe_sort(uniques, labels, na_sentinel=na_sentinel, assume_unique=True) - if dtype is not None: - uniques = uniques.astype(dtype) + uniques = _reconstruct_data(uniques, dtype, original) - if isinstance(values, Index): - uniques = values._shallow_copy(uniques, name=None) - elif isinstance(values, Series): + # return original tenor + if isinstance(original, ABCIndexClass): + uniques = original._shallow_copy(uniques, name=None) + elif isinstance(original, ABCSeries): + from pandas import Index uniques = Index(uniques) + return labels, uniques @@ -471,7 +514,7 @@ def value_counts(values, sort=True, ascending=False, normalize=False, value_counts : Series """ - from pandas.core.series import Series + from pandas.core.series import Series, Index name = getattr(values, 'name', None) if bins is not None: @@ -483,17 +526,16 @@ def value_counts(values, sort=True, ascending=False, normalize=False, raise TypeError("bins argument only works with numeric data.") values = cat.codes - if is_extension_type(values) and not is_datetimetz(values): + if is_categorical_dtype(values) or is_sparse(values): + # handle Categorical and sparse, - # datetime tz can be handeled in ndarray path result = Series(values).values.value_counts(dropna=dropna) result.name = name counts = result.values + else: - # ndarray path. pass original to handle DatetimeTzBlock - keys, counts = _value_counts_arraylike(values, dropna=dropna) + keys, counts = _value_counts_arraylike(values, dropna) - from pandas import Index, Series if not isinstance(keys, Index): keys = Index(keys) result = Series(counts, index=keys, name=name) @@ -513,60 +555,45 @@ def value_counts(values, sort=True, ascending=False, normalize=False, return result -def _value_counts_arraylike(values, dropna=True): - is_datetimetz_type = is_datetimetz(values) - is_period_type = (is_period_dtype(values) or - is_period_arraylike(values)) - - orig = values - - from pandas.core.series import Series - values = Series(values).values - dtype = values.dtype +def _value_counts_arraylike(values, dropna): + """ + Parameters + ---------- + values : arraylike + dropna : boolean - if needs_i8_conversion(dtype) or is_period_type: + Returns + ------- + (uniques, counts) - from pandas.tseries.index import DatetimeIndex - from pandas.tseries.period import PeriodIndex + """ + values = _ensure_arraylike(values) + original = values + values, dtype, ndtype = _ensure_data(values) - if is_period_type: - # values may be an object - values = PeriodIndex(values) - freq = values.freq + if needs_i8_conversion(dtype): + # i8 - values = values.view(np.int64) keys, counts = htable.value_count_int64(values, dropna) if dropna: msk = keys != iNaT keys, counts = keys[msk], counts[msk] - # convert the keys back to the dtype we came in - keys = keys.astype(dtype) - - # dtype handling - if is_datetimetz_type: - keys = DatetimeIndex._simple_new(keys, tz=orig.dtype.tz) - elif is_period_type: - keys = PeriodIndex._from_ordinals(keys, freq=freq) - - elif is_signed_integer_dtype(dtype): - values = _ensure_int64(values) - keys, counts = htable.value_count_int64(values, dropna) - elif is_unsigned_integer_dtype(dtype): - values = _ensure_uint64(values) - keys, counts = htable.value_count_uint64(values, dropna) - elif is_float_dtype(dtype): - values = _ensure_float64(values) - keys, counts = htable.value_count_float64(values, dropna) else: - values = _ensure_object(values) - keys, counts = htable.value_count_object(values, dropna) + # ndarray like + + # TODO: handle uint8 + f = getattr(htable, "value_count_{dtype}".format(dtype=ndtype)) + keys, counts = f(values, dropna) mask = isnull(values) if not dropna and mask.any(): - keys = np.insert(keys, 0, np.NaN) - counts = np.insert(counts, 0, mask.sum()) + if not isnull(keys).any(): + keys = np.insert(keys, 0, np.NaN) + counts = np.insert(counts, 0, mask.sum()) + + keys = _reconstruct_data(keys, original.dtype, original) return keys, counts @@ -593,33 +620,9 @@ def duplicated(values, keep='first'): duplicated : ndarray """ - dtype = values.dtype - - # no need to revert to original type - if needs_i8_conversion(dtype): - values = values.view(np.int64) - elif is_period_arraylike(values): - from pandas.tseries.period import PeriodIndex - values = PeriodIndex(values).asi8 - elif is_categorical_dtype(dtype): - values = values.values.codes - elif isinstance(values, (ABCSeries, ABCIndex)): - values = values.values - - if is_signed_integer_dtype(dtype): - values = _ensure_int64(values) - duplicated = htable.duplicated_int64(values, keep=keep) - elif is_unsigned_integer_dtype(dtype): - values = _ensure_uint64(values) - duplicated = htable.duplicated_uint64(values, keep=keep) - elif is_float_dtype(dtype): - values = _ensure_float64(values) - duplicated = htable.duplicated_float64(values, keep=keep) - else: - values = _ensure_object(values) - duplicated = htable.duplicated_object(values, keep=keep) - - return duplicated + values, dtype, ndtype = _ensure_data(values) + f = getattr(htable, "duplicated_{dtype}".format(dtype=ndtype)) + return f(values, keep=keep) def mode(values): @@ -635,40 +638,34 @@ def mode(values): ------- mode : Series """ + from pandas import Series - # must sort because hash order isn't necessarily defined. - from pandas.core.series import Series + values = _ensure_arraylike(values) + original = values - if isinstance(values, Series): - constructor = values._constructor - values = values.values - else: - values = np.asanyarray(values) - constructor = Series + # categorical is a fast-path + if is_categorical_dtype(values): - dtype = values.dtype - if is_signed_integer_dtype(values): - values = _ensure_int64(values) - result = constructor(np.sort(htable.mode_int64(values)), dtype=dtype) - elif is_unsigned_integer_dtype(values): - values = _ensure_uint64(values) - result = constructor(np.sort(htable.mode_uint64(values)), dtype=dtype) - elif issubclass(values.dtype.type, (np.datetime64, np.timedelta64)): - dtype = values.dtype - values = values.view(np.int64) - result = constructor(np.sort(htable.mode_int64(values)), dtype=dtype) - elif is_categorical_dtype(values): - result = constructor(values.mode()) - else: + if isinstance(values, Series): + return Series(values.values.mode()) + return values.mode() + + values, dtype, ndtype = _ensure_data(values) + + # TODO: this should support float64 + if ndtype not in ['int64', 'uint64', 'object']: + ndtype = 'object' values = _ensure_object(values) - res = htable.mode_object(values) - try: - res = np.sort(res) - except TypeError as e: - warn("Unable to sort modes: %s" % e) - result = constructor(res, dtype=dtype) - return result + f = getattr(htable, "mode_{dtype}".format(dtype=ndtype)) + result = f(values) + try: + result = np.sort(result) + except TypeError as e: + warn("Unable to sort modes: %s" % e) + + result = _reconstruct_data(result, original.dtype, original) + return Series(result) def rank(values, axis=0, method='average', na_option='keep', @@ -859,6 +856,12 @@ def quantile(x, q, interpolation_method='fraction'): values = np.sort(x) + def _interpolate(a, b, fraction): + """Returns the point at the given fraction between a and b, where + 'fraction' must be between 0 and 1. + """ + return a + (b - a) * fraction + def _get_score(at): if len(values) == 0: return np.nan @@ -887,261 +890,186 @@ def _get_score(at): return algos.arrmap_float64(q, _get_score) -def _interpolate(a, b, fraction): - """Returns the point at the given fraction between a and b, where - 'fraction' must be between 0 and 1. - """ - return a + (b - a) * fraction - - -def nsmallest(arr, n, keep='first'): - """ - Find the indices of the n smallest values of a numpy array. - - Note: Fails silently with NaN. - """ - if keep == 'last': - arr = arr[::-1] - - narr = len(arr) - n = min(n, narr) - - arr = _ensure_data_view(arr) - kth_val = algos.kth_smallest(arr.copy(), n - 1) - return _finalize_nsmallest(arr, kth_val, n, keep, narr) - +# --------------- # +# select n # +# --------------- # -def nlargest(arr, n, keep='first'): - """ - Find the indices of the n largest values of a numpy array. +class SelectN(object): - Note: Fails silently with NaN. - """ - arr = _ensure_data_view(arr) - return nsmallest(-arr, n, keep=keep) + def __init__(self, obj, n, keep): + self.obj = obj + self.n = n + self.keep = keep + if self.keep not in ('first', 'last'): + raise ValueError('keep must be either "first", "last"') -def select_n_slow(dropped, n, keep, method): - reverse_it = (keep == 'last' or method == 'nlargest') - ascending = method == 'nsmallest' - slc = np.s_[::-1] if reverse_it else np.s_[:] - return dropped[slc].sort_values(ascending=ascending).head(n) + def nlargest(self): + return self.compute('nlargest') + def nsmallest(self): + return self.compute('nsmallest') -_select_methods = {'nsmallest': nsmallest, 'nlargest': nlargest} + @staticmethod + def is_valid_dtype_n_method(dtype): + """ + Helper function to determine if dtype is valid for + nsmallest/nlargest methods + """ + return ((is_numeric_dtype(dtype) and not is_complex_dtype(dtype)) or + needs_i8_conversion(dtype)) -def _is_valid_dtype_n_method(dtype): - """ - Helper function to determine if dtype is valid for - nsmallest/nlargest methods +class SelectNSeries(SelectN): """ - return ((is_numeric_dtype(dtype) and not is_complex_dtype(dtype)) or - needs_i8_conversion(dtype)) - - -def select_n_series(series, n, keep, method): - """Implement n largest/smallest for pandas Series + Implement n largest/smallest for Series Parameters ---------- - series : pandas.Series object + obj : Series n : int keep : {'first', 'last'}, default 'first' - method : str, {'nlargest', 'nsmallest'} Returns ------- nordered : Series """ - dtype = series.dtype - if not _is_valid_dtype_n_method(dtype): - raise TypeError("Cannot use method '{method}' with " - "dtype {dtype}".format(method=method, dtype=dtype)) - if keep not in ('first', 'last'): - raise ValueError('keep must be either "first", "last"') + def compute(self, method): + + n = self.n + dtype = self.obj.dtype + if not self.is_valid_dtype_n_method(dtype): + raise TypeError("Cannot use method '{method}' with " + "dtype {dtype}".format(method=method, + dtype=dtype)) + + if n <= 0: + return self.obj[[]] + + dropped = self.obj.dropna() + + # slow method + if n >= len(self.obj): - if n <= 0: - return series[[]] + reverse_it = (self.keep == 'last' or method == 'nlargest') + ascending = method == 'nsmallest' + slc = np.s_[::-1] if reverse_it else np.s_[:] + return dropped[slc].sort_values(ascending=ascending).head(n) - dropped = series.dropna() + # fast method + arr, _, _ = _ensure_data(dropped.values) + if method == 'nlargest': + arr = -arr - if n >= len(series): - return select_n_slow(dropped, n, keep, method) + if self.keep == 'last': + arr = arr[::-1] - inds = _select_methods[method](dropped.values, n, keep) - return dropped.iloc[inds] + narr = len(arr) + n = min(n, narr) + kth_val = algos.kth_smallest(arr.copy(), n - 1) + ns, = np.nonzero(arr <= kth_val) + inds = ns[arr[ns].argsort(kind='mergesort')][:n] + if self.keep == 'last': + # reverse indices + inds = narr - 1 - inds -def select_n_frame(frame, columns, n, method, keep): - """Implement n largest/smallest for pandas DataFrame + return dropped.iloc[inds] + + +class SelectNFrame(SelectN): + """ + Implement n largest/smallest for DataFrame Parameters ---------- - frame : pandas.DataFrame object - columns : list or str + obj : DataFrame n : int keep : {'first', 'last'}, default 'first' - method : str, {'nlargest', 'nsmallest'} + columns : list or str Returns ------- nordered : DataFrame """ - from pandas import Int64Index - if not is_list_like(columns): - columns = [columns] - columns = list(columns) - for column in columns: - dtype = frame[column].dtype - if not _is_valid_dtype_n_method(dtype): - raise TypeError(( - "Column {column!r} has dtype {dtype}, cannot use method " - "{method!r} with this dtype" - ).format(column=column, dtype=dtype, method=method)) - - def get_indexer(current_indexer, other_indexer): - """Helper function to concat `current_indexer` and `other_indexer` - depending on `method` - """ - if method == 'nsmallest': - return current_indexer.append(other_indexer) - else: - return other_indexer.append(current_indexer) - - # Below we save and reset the index in case index contains duplicates - original_index = frame.index - cur_frame = frame = frame.reset_index(drop=True) - cur_n = n - indexer = Int64Index([]) - - for i, column in enumerate(columns): - - # For each column we apply method to cur_frame[column]. If it is the - # last column in columns, or if the values returned are unique in - # frame[column] we save this index and break - # Otherwise we must save the index of the non duplicated values - # and set the next cur_frame to cur_frame filtered on all duplcicated - # values (#GH15297) - series = cur_frame[column] - values = getattr(series, method)(cur_n, keep=keep) - is_last_column = len(columns) - 1 == i - if is_last_column or values.nunique() == series.isin(values).sum(): - - # Last column in columns or values are unique in series => values - # is all that matters - indexer = get_indexer(indexer, values.index) - break - - duplicated_filter = series.duplicated(keep=False) - duplicated = values[duplicated_filter] - non_duplicated = values[~duplicated_filter] - indexer = get_indexer(indexer, non_duplicated.index) - - # Must set cur frame to include all duplicated values to consider for - # the next column, we also can reduce cur_n by the current length of - # the indexer - cur_frame = cur_frame[series.isin(duplicated)] - cur_n = n - len(indexer) - - frame = frame.take(indexer) - - # Restore the index on frame - frame.index = original_index.take(indexer) - return frame - - -def _finalize_nsmallest(arr, kth_val, n, keep, narr): - ns, = np.nonzero(arr <= kth_val) - inds = ns[arr[ns].argsort(kind='mergesort')][:n] - if keep == 'last': - # reverse indices - return narr - 1 - inds - else: - return inds - - -# ------- # -# helpers # -# ------- # - -def _hashtable_algo(f, values, return_dtype=None): - """ - f(HashTable, type_caster) -> result - """ - - dtype = values.dtype - if is_float_dtype(dtype): - return f(htable.Float64HashTable, _ensure_float64) - elif is_signed_integer_dtype(dtype): - return f(htable.Int64HashTable, _ensure_int64) - elif is_unsigned_integer_dtype(dtype): - return f(htable.UInt64HashTable, _ensure_uint64) - elif is_datetime64_dtype(dtype): - return_dtype = return_dtype or 'M8[ns]' - return f(htable.Int64HashTable, _ensure_int64).view(return_dtype) - elif is_timedelta64_dtype(dtype): - return_dtype = return_dtype or 'm8[ns]' - return f(htable.Int64HashTable, _ensure_int64).view(return_dtype) - - # its cheaper to use a String Hash Table than Object - if lib.infer_dtype(values) in ['string']: - return f(htable.StringHashTable, _ensure_object) - - # use Object - return f(htable.PyObjectHashTable, _ensure_object) - - -_hashtables = { - 'float64': (htable.Float64HashTable, htable.Float64Vector), - 'uint64': (htable.UInt64HashTable, htable.UInt64Vector), - 'int64': (htable.Int64HashTable, htable.Int64Vector), - 'string': (htable.StringHashTable, htable.ObjectVector), - 'object': (htable.PyObjectHashTable, htable.ObjectVector) -} - - -def _get_data_algo(values, func_map): - - f = None - - if is_categorical_dtype(values): - values = values._values_for_rank() - - if is_float_dtype(values): - f = func_map['float64'] - values = _ensure_float64(values) - - elif needs_i8_conversion(values): - f = func_map['int64'] - values = values.view('i8') - - elif is_signed_integer_dtype(values): - f = func_map['int64'] - values = _ensure_int64(values) - - elif is_unsigned_integer_dtype(values): - f = func_map['uint64'] - values = _ensure_uint64(values) - - else: - values = _ensure_object(values) - - # its cheaper to use a String Hash Table than Object - if lib.infer_dtype(values) in ['string']: - try: - f = func_map['string'] - except KeyError: - pass - - if f is None: - f = func_map['object'] - - return f, values - -# ---- # + def __init__(self, obj, n, keep, columns): + super(SelectNFrame, self).__init__(obj, n, keep) + if not is_list_like(columns): + columns = [columns] + columns = list(columns) + self.columns = columns + + def compute(self, method): + + from pandas import Int64Index + n = self.n + frame = self.obj + columns = self.columns + + for column in columns: + dtype = frame[column].dtype + if not self.is_valid_dtype_n_method(dtype): + raise TypeError(( + "Column {column!r} has dtype {dtype}, cannot use method " + "{method!r} with this dtype" + ).format(column=column, dtype=dtype, method=method)) + + def get_indexer(current_indexer, other_indexer): + """Helper function to concat `current_indexer` and `other_indexer` + depending on `method` + """ + if method == 'nsmallest': + return current_indexer.append(other_indexer) + else: + return other_indexer.append(current_indexer) + + # Below we save and reset the index in case index contains duplicates + original_index = frame.index + cur_frame = frame = frame.reset_index(drop=True) + cur_n = n + indexer = Int64Index([]) + + for i, column in enumerate(columns): + + # For each column we apply method to cur_frame[column]. + # If it is the last column in columns, or if the values + # returned are unique in frame[column] we save this index + # and break + # Otherwise we must save the index of the non duplicated values + # and set the next cur_frame to cur_frame filtered on all + # duplcicated values (#GH15297) + series = cur_frame[column] + values = getattr(series, method)(cur_n, keep=self.keep) + is_last_column = len(columns) - 1 == i + if is_last_column or values.nunique() == series.isin(values).sum(): + + # Last column in columns or values are unique in + # series => values + # is all that matters + indexer = get_indexer(indexer, values.index) + break + + duplicated_filter = series.duplicated(keep=False) + duplicated = values[duplicated_filter] + non_duplicated = values[~duplicated_filter] + indexer = get_indexer(indexer, non_duplicated.index) + + # Must set cur frame to include all duplicated values + # to consider for the next column, we also can reduce + # cur_n by the current length of the indexer + cur_frame = cur_frame[series.isin(duplicated)] + cur_n = n - len(indexer) + + frame = frame.take(indexer) + + # Restore the index on frame + frame.index = original_index.take(indexer) + return frame + + +# ------- ## ---- # # take # # ---- # @@ -1534,23 +1462,41 @@ def func(arr, indexer, out, fill_value=np.nan): def diff(arr, n, axis=0): - """ difference of n between self, - analagoust to s-s.shift(n) """ + """ + difference of n between self, + analagoust to s-s.shift(n) + + Parameters + ---------- + arr : ndarray + n : int + number of periods + axis : int + axis to shift on + + Returns + ------- + shifted + + """ n = int(n) na = np.nan dtype = arr.dtype + is_timedelta = False if needs_i8_conversion(arr): dtype = np.float64 arr = arr.view('i8') na = iNaT is_timedelta = True - elif issubclass(dtype.type, np.integer): - dtype = np.float64 - elif issubclass(dtype.type, np.bool_): + + elif is_bool_dtype(dtype): dtype = np.object_ + elif is_integer_dtype(dtype): + dtype = np.float64 + dtype = np.dtype(dtype) out_arr = np.empty(arr.shape, dtype=dtype) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 3980bf6cdbc09..f6199be2d1fc9 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3441,7 +3441,10 @@ def nlargest(self, n, columns, keep='first'): 1 10 b 2 2 8 d NaN """ - return algorithms.select_n_frame(self, columns, n, 'nlargest', keep) + return algorithms.SelectNFrame(self, + n=n, + keep=keep, + columns=columns).nlargest() def nsmallest(self, n, columns, keep='first'): """Get the rows of a DataFrame sorted by the `n` smallest @@ -3475,7 +3478,10 @@ def nsmallest(self, n, columns, keep='first'): 0 1 a 1 2 8 d NaN """ - return algorithms.select_n_frame(self, columns, n, 'nsmallest', keep) + return algorithms.SelectNFrame(self, + n=n, + keep=keep, + columns=columns).nsmallest() def swaplevel(self, i=-2, j=-1, axis=0): """ diff --git a/pandas/core/series.py b/pandas/core/series.py index 1aaa106d2c68f..d6a1a9d98faf4 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1856,8 +1856,7 @@ def nlargest(self, n=5, keep='first'): 121637 4.240952 dtype: float64 """ - return algorithms.select_n_series(self, n=n, keep=keep, - method='nlargest') + return algorithms.SelectNSeries(self, n=n, keep=keep).nlargest() def nsmallest(self, n=5, keep='first'): """Return the smallest `n` elements. @@ -1903,8 +1902,7 @@ def nsmallest(self, n=5, keep='first'): 359919 -4.331927 dtype: float64 """ - return algorithms.select_n_series(self, n=n, keep=keep, - method='nsmallest') + return algorithms.SelectNSeries(self, n=n, keep=keep).nsmallest() def sortlevel(self, level=0, ascending=True, sort_remaining=True): """ diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index ac3a42c3cf122..d893183dae0ed 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -620,9 +620,9 @@ def test_dropna(self): # 32-bit linux has a different ordering if not compat.is_platform_32bit(): - tm.assert_series_equal( - pd.Series([10.3, 5., 5., None]).value_counts(dropna=False), - pd.Series([2, 1, 1], index=[5., 10.3, np.nan])) + result = pd.Series([10.3, 5., 5., None]).value_counts(dropna=False) + expected = pd.Series([2, 1, 1], index=[5., 10.3, np.nan]) + tm.assert_series_equal(result, expected) def test_value_counts_normalized(self): # GH12558 @@ -1356,16 +1356,19 @@ def test_uint64_overflow(self): def test_categorical(self): c = Categorical([1, 2]) - exp = Series([1, 2], dtype=np.int64) - tm.assert_series_equal(algos.mode(c), exp) + exp = c + tm.assert_categorical_equal(algos.mode(c), exp) + tm.assert_categorical_equal(c.mode(), exp) c = Categorical([1, 'a', 'a']) - exp = Series(['a'], dtype=object) - tm.assert_series_equal(algos.mode(c), exp) + exp = Categorical(['a'], categories=[1, 'a']) + tm.assert_categorical_equal(algos.mode(c), exp) + tm.assert_categorical_equal(c.mode(), exp) c = Categorical([1, 1, 2, 3, 3]) - exp = Series([1, 3], dtype=np.int64) - tm.assert_series_equal(algos.mode(c), exp) + exp = Categorical([1, 3], categories=[1, 2, 3]) + tm.assert_categorical_equal(algos.mode(c), exp) + tm.assert_categorical_equal(c.mode(), exp) def test_index(self): idx = Index([1, 2, 3]) diff --git a/pandas/tests/types/test_dtypes.py b/pandas/tests/types/test_dtypes.py index 8ef2868ae324f..e7b2edeb57714 100644 --- a/pandas/tests/types/test_dtypes.py +++ b/pandas/tests/types/test_dtypes.py @@ -149,6 +149,7 @@ def test_construction_from_string(self): lambda: DatetimeTZDtype.construct_from_string('foo')) def test_is_dtype(self): + self.assertFalse(DatetimeTZDtype.is_dtype(None)) self.assertTrue(DatetimeTZDtype.is_dtype(self.dtype)) self.assertTrue(DatetimeTZDtype.is_dtype('datetime64[ns, US/Eastern]')) self.assertFalse(DatetimeTZDtype.is_dtype('foo')) diff --git a/pandas/types/common.py b/pandas/types/common.py index a1f03e59a5e6e..017805673defe 100644 --- a/pandas/types/common.py +++ b/pandas/types/common.py @@ -359,6 +359,8 @@ def _coerce_to_dtype(dtype): def _get_dtype(arr_or_dtype): + if arr_or_dtype is None: + raise TypeError if isinstance(arr_or_dtype, np.dtype): return arr_or_dtype elif isinstance(arr_or_dtype, type): diff --git a/pandas/types/dtypes.py b/pandas/types/dtypes.py index 43135ba94ab46..c3494df93476b 100644 --- a/pandas/types/dtypes.py +++ b/pandas/types/dtypes.py @@ -82,6 +82,8 @@ def is_dtype(cls, dtype): return True elif isinstance(dtype, np.dtype): return False + elif dtype is None: + return False try: return cls.construct_from_string(dtype) is not None except: From 2f160f033f37d539e6799da61733ce38f9234119 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Fri, 7 Apr 2017 08:03:55 -0400 Subject: [PATCH 346/933] TST: suppress some warnings (#15932) --- pandas/core/algorithms.py | 8 ++++++-- pandas/tests/test_errors.py | 10 ++++++---- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 244f882f2c103..9b88ea23483bd 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -3,7 +3,7 @@ intended for public consumption """ from __future__ import division -from warnings import warn +from warnings import warn, catch_warnings import numpy as np from pandas import compat, _np_version_under1p8 @@ -110,7 +110,11 @@ def _ensure_data(values, dtype=None): values = _ensure_uint64(values) ndtype = dtype = 'uint64' elif is_complex_dtype(values) or is_complex_dtype(dtype): - values = _ensure_float64(values) + + # ignore the fact that we are casting to float + # which discards complex parts + with catch_warnings(record=True): + values = _ensure_float64(values) ndtype = dtype = 'float64' elif is_float_dtype(values) or is_float_dtype(dtype): values = _ensure_float64(values) diff --git a/pandas/tests/test_errors.py b/pandas/tests/test_errors.py index aabce7ecb7066..4a0850734e134 100644 --- a/pandas/tests/test_errors.py +++ b/pandas/tests/test_errors.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- import pytest +from warnings import catch_warnings import pandas # noqa import pandas as pd @@ -44,7 +45,8 @@ def test_error_rename(): except CParserError: pass - try: - raise ParserError() - except pd.parser.CParserError: - pass + with catch_warnings(record=True): + try: + raise ParserError() + except pd.parser.CParserError: + pass From 3b53202d90e86a1bc0f5db7a9c2e66c164f909a9 Mon Sep 17 00:00:00 2001 From: Tong SHEN Date: Fri, 7 Apr 2017 20:08:57 +0800 Subject: [PATCH 347/933] DOC: Fix a typo in advanced.rst (#15933) --- doc/source/advanced.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/advanced.rst b/doc/source/advanced.rst index f380070ddac79..0b81bc6d934e1 100644 --- a/doc/source/advanced.rst +++ b/doc/source/advanced.rst @@ -46,7 +46,7 @@ data with an arbitrary number of dimensions in lower dimensional data structures like Series (1d) and DataFrame (2d). In this section, we will show what exactly we mean by "hierarchical" indexing -and how it integrates with the all of the pandas indexing functionality +and how it integrates with all of the pandas indexing functionality described above and in prior sections. Later, when discussing :ref:`group by ` and :ref:`pivoting and reshaping data `, we'll show non-trivial applications to illustrate how it aids in structuring data for From f478e4f4b0a353fa48ddb19e70cb9abe5b36e1b5 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Fri, 7 Apr 2017 11:17:38 -0400 Subject: [PATCH 348/933] BUG: DataFrame.sort_index broken if not both lexsorted and monotonic in levels closes #15622 closes #15687 closes #14015 closes #13431 Author: Jeff Reback Closes #15694 from jreback/sort3 and squashes the following commits: bd17d2b [Jeff Reback] rename sort_index_montonic -> _sort_index_monotonic 31097fc [Jeff Reback] add doc-strings, rename sort_monotonic -> sort_levels_monotonic 48249ab [Jeff Reback] add doc example 527c3a6 [Jeff Reback] simpler algo for remove_used_levels 520c9c1 [Jeff Reback] versionadded tags f2ddc9c [Jeff Reback] replace _reconstruct with: sort_monotonic, and remove_unused_levels (public) 3c4ca22 [Jeff Reback] add degenerate test case 269cb3b [Jeff Reback] small doc updates b234bdb [Jeff Reback] support for removing unused levels (internally) 7be8941 [Jeff Reback] incorrectly raising KeyError rather than UnsortedIndexError, caught by doc-example 47c67d6 [Jeff Reback] BUG: construct MultiIndex identically from levels/labels when concatting --- asv_bench/benchmarks/timeseries.py | 5 +- doc/source/advanced.rst | 63 +++++----- doc/source/api.rst | 1 + doc/source/whatsnew/v0.20.0.txt | 70 +++++++++++- pandas/core/frame.py | 19 +-- pandas/core/groupby.py | 9 +- pandas/core/reshape.py | 9 +- pandas/core/series.py | 18 ++- pandas/core/sorting.py | 21 ++++ pandas/indexes/multi.py | 144 ++++++++++++++++++++++- pandas/tests/indexes/test_multi.py | 98 ++++++++++++++++ pandas/tests/series/test_analytics.py | 2 +- pandas/tests/test_multilevel.py | 159 ++++++++++++++++++++++++++ pandas/tests/tools/test_hashing.py | 29 +++++ pandas/tests/tools/test_pivot.py | 3 +- 15 files changed, 593 insertions(+), 57 deletions(-) diff --git a/asv_bench/benchmarks/timeseries.py b/asv_bench/benchmarks/timeseries.py index 6e9ef4b10273c..dfe3f0ef87c11 100644 --- a/asv_bench/benchmarks/timeseries.py +++ b/asv_bench/benchmarks/timeseries.py @@ -292,7 +292,10 @@ def setup(self): self.rng3 = date_range(start='1/1/2000', periods=1500000, freq='S') self.ts3 = Series(1, index=self.rng3) - def time_sort_index(self): + def time_sort_index_monotonic(self): + self.ts2.sort_index() + + def time_sort_index_non_monotonic(self): self.ts.sort_index() def time_timeseries_slice_minutely(self): diff --git a/doc/source/advanced.rst b/doc/source/advanced.rst index 0b81bc6d934e1..43373fc86c4d1 100644 --- a/doc/source/advanced.rst +++ b/doc/source/advanced.rst @@ -136,7 +136,7 @@ can find yourself working with hierarchically-indexed data without creating a may wish to generate your own ``MultiIndex`` when preparing the data set. Note that how the index is displayed by be controlled using the -``multi_sparse`` option in ``pandas.set_printoptions``: +``multi_sparse`` option in ``pandas.set_options()``: .. ipython:: python @@ -175,35 +175,40 @@ completely analogous way to selecting a column in a regular DataFrame: See :ref:`Cross-section with hierarchical index ` for how to select on a deeper level. -.. note:: +.. _advanced.shown_levels: + +Defined Levels +~~~~~~~~~~~~~~ + +The repr of a ``MultiIndex`` shows ALL the defined levels of an index, even +if the they are not actually used. When slicing an index, you may notice this. +For example: - The repr of a ``MultiIndex`` shows ALL the defined levels of an index, even - if the they are not actually used. When slicing an index, you may notice this. - For example: +.. ipython:: python - .. ipython:: python + # original multi-index + df.columns - # original multi-index - df.columns + # sliced + df[['foo','qux']].columns - # sliced - df[['foo','qux']].columns +This is done to avoid a recomputation of the levels in order to make slicing +highly performant. If you want to see the actual used levels. - This is done to avoid a recomputation of the levels in order to make slicing - highly performant. If you want to see the actual used levels. +.. ipython:: python - .. ipython:: python + df[['foo','qux']].columns.values - df[['foo','qux']].columns.values + # for a specific level + df[['foo','qux']].columns.get_level_values(0) - # for a specific level - df[['foo','qux']].columns.get_level_values(0) +To reconstruct the multiindex with only the used levels - To reconstruct the multiindex with only the used levels +.. versionadded:: 0.20.0 - .. ipython:: python +.. ipython:: python - pd.MultiIndex.from_tuples(df[['foo','qux']].columns.values) + df[['foo','qux']].columns.remove_unused_levels() Data alignment and using ``reindex`` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -288,7 +293,7 @@ As usual, **both sides** of the slicers are included as this is label indexing. .. code-block:: python - df.loc[(slice('A1','A3'),.....),:] + df.loc[(slice('A1','A3'),.....), :] rather than this: @@ -317,43 +322,43 @@ Basic multi-index slicing using slices, lists, and labels. .. ipython:: python - dfmi.loc[(slice('A1','A3'),slice(None), ['C1','C3']),:] + dfmi.loc[(slice('A1','A3'), slice(None), ['C1', 'C3']), :] You can use a ``pd.IndexSlice`` to have a more natural syntax using ``:`` rather than using ``slice(None)`` .. ipython:: python idx = pd.IndexSlice - dfmi.loc[idx[:,:,['C1','C3']],idx[:,'foo']] + dfmi.loc[idx[:, :, ['C1', 'C3']], idx[:, 'foo']] It is possible to perform quite complicated selections using this method on multiple axes at the same time. .. ipython:: python - dfmi.loc['A1',(slice(None),'foo')] - dfmi.loc[idx[:,:,['C1','C3']],idx[:,'foo']] + dfmi.loc['A1', (slice(None), 'foo')] + dfmi.loc[idx[:, :, ['C1', 'C3']], idx[:, 'foo']] Using a boolean indexer you can provide selection related to the *values*. .. ipython:: python - mask = dfmi[('a','foo')]>200 - dfmi.loc[idx[mask,:,['C1','C3']],idx[:,'foo']] + mask = dfmi[('a', 'foo')] > 200 + dfmi.loc[idx[mask, :, ['C1', 'C3']], idx[:, 'foo']] You can also specify the ``axis`` argument to ``.loc`` to interpret the passed slicers on a single axis. .. ipython:: python - dfmi.loc(axis=0)[:,:,['C1','C3']] + dfmi.loc(axis=0)[:, :, ['C1', 'C3']] Furthermore you can *set* the values using these methods .. ipython:: python df2 = dfmi.copy() - df2.loc(axis=0)[:,:,['C1','C3']] = -10 + df2.loc(axis=0)[:, :, ['C1', 'C3']] = -10 df2 You can use a right-hand-side of an alignable object as well. @@ -361,7 +366,7 @@ You can use a right-hand-side of an alignable object as well. .. ipython:: python df2 = dfmi.copy() - df2.loc[idx[:,:,['C1','C3']],:] = df2*1000 + df2.loc[idx[:, :, ['C1', 'C3']], :] = df2 * 1000 df2 .. _advanced.xs: diff --git a/doc/source/api.rst b/doc/source/api.rst index 24bad7d515305..336b0b9b14c6c 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -1432,6 +1432,7 @@ MultiIndex Components MultiIndex.droplevel MultiIndex.swaplevel MultiIndex.reorder_levels + MultiIndex.remove_unused_levels .. _api.datetimeindex: diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index cb9e2496757ef..21b259e7663ba 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -366,6 +366,8 @@ Other Enhancements - ``pandas.io.json.json_normalize()`` with an empty ``list`` will return an empty ``DataFrame`` (:issue:`15534`) - ``pandas.io.json.json_normalize()`` has gained a ``sep`` option that accepts ``str`` to separate joined fields; the default is ".", which is backward compatible. (:issue:`14883`) - ``pd.read_csv()`` will now raise a ``csv.Error`` error whenever an end-of-file character is encountered in the middle of a data row (:issue:`15913`) +- A new function has been added to a ``MultiIndex`` to facilitate :ref:`Removing Unused Levels `. (:issue:`15694`) +- :func:`MultiIndex.remove_unused_levels` has been added to facilitate :ref:`removing unused levels `. (:issue:`15694`) .. _ISO 8601 duration: https://en.wikipedia.org/wiki/ISO_8601#Durations @@ -714,6 +716,72 @@ If indicated, a deprecation warning will be issued if you reference that module. "pandas._hash", "pandas.tools.libhash", "" "pandas._window", "pandas.core.libwindow", "" +.. _whatsnew_0200.api_breaking.sort_index: + +DataFrame.sort_index changes +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In certain cases, calling ``.sort_index()`` on a MultiIndexed DataFrame would return the *same* DataFrame without seeming to sort. +This would happen with a ``lexsorted``, but non-monotonic levels. (:issue:`15622`, :issue:`15687`, :issue:`14015`, :issue:`13431`) + +This is UNCHANGED between versions, but showing for illustration purposes: + +.. ipython:: python + + df = DataFrame(np.arange(6), columns=['value'], index=MultiIndex.from_product([list('BA'), range(3)])) + df + +.. ipython:: python + + df.index.is_lexsorted() + df.index.is_monotonic + +Sorting works as expected + +.. ipython:: python + + df.sort_index() + +.. ipython:: python + + df.sort_index().index.is_lexsorted() + df.sort_index().index.is_monotonic + +However, this example, which has a non-monotonic 2nd level, +doesn't behave as desired. + +.. ipython:: python + df = pd.DataFrame( + {'value': [1, 2, 3, 4]}, + index=pd.MultiIndex(levels=[['a', 'b'], ['bb', 'aa']], + labels=[[0, 0, 1, 1], [0, 1, 0, 1]])) + +Previous Behavior: + +.. ipython:: python + + In [11]: df.sort_index() + Out[11]: + value + a bb 1 + aa 2 + b bb 3 + aa 4 + + In [14]: df.sort_index().index.is_lexsorted() + Out[14]: True + + In [15]: df.sort_index().index.is_monotonic + Out[15]: False + +New Behavior: + +.. ipython:: python + + df.sort_index() + df.sort_index().index.is_lexsorted() + df.sort_index().index.is_monotonic + .. _whatsnew_0200.api_breaking.groupby_describe: @@ -965,7 +1033,7 @@ Performance Improvements - Improve performance of ``pd.core.groupby.GroupBy.apply`` when the applied function used the ``.name`` attribute of the group DataFrame (:issue:`15062`). - Improved performance of ``iloc`` indexing with a list or array (:issue:`15504`). - +- Improved performance of ``Series.sort_index()`` with a monotonic index (:issue:`15694`) .. _whatsnew_0200.bug_fixes: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index f6199be2d1fc9..c8c21b0c5fd7d 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3322,6 +3322,10 @@ def trans(v): def sort_index(self, axis=0, level=None, ascending=True, inplace=False, kind='quicksort', na_position='last', sort_remaining=True, by=None): + + # TODO: this can be combined with Series.sort_index impl as + # almost identical + inplace = validate_bool_kwarg(inplace, 'inplace') # 10726 if by is not None: @@ -3335,8 +3339,7 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False, axis = self._get_axis_number(axis) labels = self._get_axis(axis) - # sort by the index - if level is not None: + if level: new_axis, indexer = labels.sortlevel(level, ascending=ascending, sort_remaining=sort_remaining) @@ -3346,17 +3349,14 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False, # make sure that the axis is lexsorted to start # if not we need to reconstruct to get the correct indexer - if not labels.is_lexsorted(): - labels = MultiIndex.from_tuples(labels.values) - + labels = labels._sort_levels_monotonic() indexer = lexsort_indexer(labels.labels, orders=ascending, na_position=na_position) else: from pandas.core.sorting import nargsort - # GH11080 - Check monotonic-ness before sort an index - # if monotonic (already sorted), return None or copy() according - # to 'inplace' + # Check monotonic-ness before sort an index + # GH11080 if ((ascending and labels.is_monotonic_increasing) or (not ascending and labels.is_monotonic_decreasing)): if inplace: @@ -3367,8 +3367,9 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False, indexer = nargsort(labels, kind=kind, ascending=ascending, na_position=na_position) + baxis = self._get_block_manager_axis(axis) new_data = self._data.take(indexer, - axis=self._get_block_manager_axis(axis), + axis=baxis, convert=False, verify=False) if inplace: diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index fe764a099bb63..add2987b8f452 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -1882,6 +1882,13 @@ def get_group_levels(self): 'ohlc': lambda *args: ['open', 'high', 'low', 'close'] } + def _is_builtin_func(self, arg): + """ + if we define an builtin function for this argument, return it, + otherwise return the arg + """ + return SelectionMixin._builtin_table.get(arg, arg) + def _get_cython_function(self, kind, how, values, is_numeric): dtype_str = values.dtype.name @@ -2107,7 +2114,7 @@ def _aggregate_series_fast(self, obj, func): # avoids object / Series creation overhead dummy = obj._get_values(slice(None, 0)).to_dense() indexer = get_group_index_sorter(group_index, ngroups) - obj = obj.take(indexer, convert=False) + obj = obj.take(indexer, convert=False).to_dense() group_index = algorithms.take_nd( group_index, indexer, allow_fill=False) grouper = lib.SeriesGrouper(obj, func, group_index, ngroups, diff --git a/pandas/core/reshape.py b/pandas/core/reshape.py index c7e06d63fbda9..b03c3d77928c7 100644 --- a/pandas/core/reshape.py +++ b/pandas/core/reshape.py @@ -22,8 +22,8 @@ from pandas.sparse.libsparse import IntIndex from pandas.core.categorical import Categorical, _factorize_from_iterable -from pandas.core.sorting import (get_group_index, compress_group_index, - decons_obs_group_ids) +from pandas.core.sorting import (get_group_index, get_compressed_ids, + compress_group_index, decons_obs_group_ids) import pandas.core.algorithms as algos from pandas._libs import algos as _algos, reshape as _reshape @@ -496,11 +496,6 @@ def _unstack_frame(obj, level, fill_value=None): return unstacker.get_result() -def get_compressed_ids(labels, sizes): - ids = get_group_index(labels, sizes, sort=True, xnull=False) - return compress_group_index(ids, sort=True) - - def stack(frame, level=-1, dropna=True): """ Convert DataFrame to Series with multi-level Index. Columns become the diff --git a/pandas/core/series.py b/pandas/core/series.py index d6a1a9d98faf4..760abc20351cf 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1751,17 +1751,31 @@ def _try_kind_sort(arr): def sort_index(self, axis=0, level=None, ascending=True, inplace=False, kind='quicksort', na_position='last', sort_remaining=True): + # TODO: this can be combined with DataFrame.sort_index impl as + # almost identical inplace = validate_bool_kwarg(inplace, 'inplace') axis = self._get_axis_number(axis) index = self.index - if level is not None: + + if level: new_index, indexer = index.sortlevel(level, ascending=ascending, sort_remaining=sort_remaining) elif isinstance(index, MultiIndex): from pandas.core.sorting import lexsort_indexer - indexer = lexsort_indexer(index.labels, orders=ascending) + labels = index._sort_levels_monotonic() + indexer = lexsort_indexer(labels.labels, orders=ascending) else: from pandas.core.sorting import nargsort + + # Check monotonic-ness before sort an index + # GH11080 + if ((ascending and index.is_monotonic_increasing) or + (not ascending and index.is_monotonic_decreasing)): + if inplace: + return + else: + return self.copy() + indexer = nargsort(index, kind=kind, ascending=ascending, na_position=na_position) diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py index 205d0d94d2ec3..e56a4f50de134 100644 --- a/pandas/core/sorting.py +++ b/pandas/core/sorting.py @@ -93,6 +93,27 @@ def maybe_lift(lab, size): # pormote nan values return loop(list(labels), list(shape)) +def get_compressed_ids(labels, sizes): + """ + + Group_index is offsets into cartesian product of all possible labels. This + space can be huge, so this function compresses it, by computing offsets + (comp_ids) into the list of unique labels (obs_group_ids). + + Parameters + ---------- + labels : list of label arrays + sizes : list of size of the levels + + Returns + ------- + tuple of (comp_ids, obs_group_ids) + + """ + ids = get_group_index(labels, sizes, sort=True, xnull=False) + return compress_group_index(ids, sort=True) + + def is_int64_overflow_possible(shape): the_prod = long(1) for x in shape: diff --git a/pandas/indexes/multi.py b/pandas/indexes/multi.py index f12b10ae682fa..96e0effbd7608 100644 --- a/pandas/indexes/multi.py +++ b/pandas/indexes/multi.py @@ -1171,9 +1171,142 @@ def from_product(cls, iterables, sortorder=None, names=None): labels, levels = _factorize_from_iterables(iterables) labels = cartesian_product(labels) + return MultiIndex(levels, labels, sortorder=sortorder, names=names) - return MultiIndex(levels=levels, labels=labels, sortorder=sortorder, - names=names) + def _sort_levels_monotonic(self): + """ + .. versionadded:: 0.20.0 + + This is an *internal* function. + + create a new MultiIndex from the current to monotonically sorted + items IN the levels. This does not actually make the entire MultiIndex + monotonic, JUST the levels. + + The resulting MultiIndex will have the same outward + appearance, meaning the same .values and ordering. It will also + be .equals() to the original. + + Returns + ------- + MultiIndex + + Examples + -------- + + >>> i = pd.MultiIndex(levels=[['a', 'b'], ['bb', 'aa']], + labels=[[0, 0, 1, 1], [0, 1, 0, 1]]) + >>> i + MultiIndex(levels=[['a', 'b'], ['bb', 'aa']], + labels=[[0, 0, 1, 1], [0, 1, 0, 1]]) + + >>> i.sort_monotonic() + MultiIndex(levels=[['a', 'b'], ['aa', 'bb']], + labels=[[0, 0, 1, 1], [1, 0, 1, 0]]) + + """ + + if self.is_lexsorted() and self.is_monotonic: + return self + + new_levels = [] + new_labels = [] + + for lev, lab in zip(self.levels, self.labels): + + if lev.is_monotonic: + new_levels.append(lev) + new_labels.append(lab) + continue + + # indexer to reorder the levels + indexer = lev.argsort() + lev = lev.take(indexer) + + # indexer to reorder the labels + ri = lib.get_reverse_indexer(indexer, len(indexer)) + lab = algos.take_1d(ri, lab) + + new_levels.append(lev) + new_labels.append(lab) + + return MultiIndex(new_levels, new_labels, + names=self.names, sortorder=self.sortorder, + verify_integrity=False) + + def remove_unused_levels(self): + """ + create a new MultiIndex from the current that removing + unused levels, meaning that they are not expressed in the labels + + The resulting MultiIndex will have the same outward + appearance, meaning the same .values and ordering. It will also + be .equals() to the original. + + .. versionadded:: 0.20.0 + + Returns + ------- + MultiIndex + + Examples + -------- + >>> i = pd.MultiIndex.from_product([range(2), list('ab')]) + MultiIndex(levels=[[0, 1], ['a', 'b']], + labels=[[0, 0, 1, 1], [0, 1, 0, 1]]) + + + >>> i[2:] + MultiIndex(levels=[[0, 1], ['a', 'b']], + labels=[[1, 1], [0, 1]]) + + # the 0 from the first level is not represented + # and can be removed + >>> i[2:].remove_unused_levels() + MultiIndex(levels=[[1], ['a', 'b']], + labels=[[0, 0], [0, 1]]) + + """ + + new_levels = [] + new_labels = [] + + changed = np.ones(self.nlevels, dtype=bool) + for i, (lev, lab) in enumerate(zip(self.levels, self.labels)): + + uniques = algos.unique(lab) + + # nothing unused + if len(uniques) == len(lev): + new_levels.append(lev) + new_labels.append(lab) + changed[i] = False + continue + + # set difference, then reverse sort + diff = Index(np.arange(len(lev))).difference(uniques) + unused = diff.sort_values(ascending=False) + + # new levels are simple + lev = lev.take(uniques) + + # new labels, we remove the unsued + # by decrementing the labels for that value + # prob a better way + for u in unused: + + lab = np.where(lab > u, lab - 1, lab) + + new_levels.append(lev) + new_labels.append(lab) + + # nothing changed + if not changed.any(): + return self + + return MultiIndex(new_levels, new_labels, + names=self.names, sortorder=self.sortorder, + verify_integrity=False) @property def nlevels(self): @@ -1744,9 +1877,10 @@ def slice_locs(self, start=None, end=None, step=None, kind=None): def _partial_tup_index(self, tup, side='left'): if len(tup) > self.lexsort_depth: - raise KeyError('Key length (%d) was greater than MultiIndex' - ' lexsort depth (%d)' % - (len(tup), self.lexsort_depth)) + raise UnsortedIndexError( + 'Key length (%d) was greater than MultiIndex' + ' lexsort depth (%d)' % + (len(tup), self.lexsort_depth)) n = len(tup) start, end = 0, len(self) diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index 470526043234f..e93319a30d5d8 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -2411,6 +2411,80 @@ def test_is_monotonic(self): self.assertFalse(i.is_monotonic) + def test_reconstruct_sort(self): + + # starts off lexsorted & monotonic + mi = MultiIndex.from_arrays([ + ['A', 'A', 'B', 'B', 'B'], [1, 2, 1, 2, 3] + ]) + assert mi.is_lexsorted() + assert mi.is_monotonic + + recons = mi._sort_levels_monotonic() + assert recons.is_lexsorted() + assert recons.is_monotonic + assert mi is recons + + assert mi.equals(recons) + assert Index(mi.values).equals(Index(recons.values)) + + # cannot convert to lexsorted + mi = pd.MultiIndex.from_tuples([('z', 'a'), ('x', 'a'), ('y', 'b'), + ('x', 'b'), ('y', 'a'), ('z', 'b')], + names=['one', 'two']) + assert not mi.is_lexsorted() + assert not mi.is_monotonic + + recons = mi._sort_levels_monotonic() + assert not recons.is_lexsorted() + assert not recons.is_monotonic + + assert mi.equals(recons) + assert Index(mi.values).equals(Index(recons.values)) + + # cannot convert to lexsorted + mi = MultiIndex(levels=[['b', 'd', 'a'], [1, 2, 3]], + labels=[[0, 1, 0, 2], [2, 0, 0, 1]], + names=['col1', 'col2']) + assert not mi.is_lexsorted() + assert not mi.is_monotonic + + recons = mi._sort_levels_monotonic() + assert not recons.is_lexsorted() + assert not recons.is_monotonic + + assert mi.equals(recons) + assert Index(mi.values).equals(Index(recons.values)) + + def test_reconstruct_remove_unused(self): + # xref to GH 2770 + df = DataFrame([['deleteMe', 1, 9], + ['keepMe', 2, 9], + ['keepMeToo', 3, 9]], + columns=['first', 'second', 'third']) + df2 = df.set_index(['first', 'second'], drop=False) + df2 = df2[df2['first'] != 'deleteMe'] + + # removed levels are there + expected = MultiIndex(levels=[['deleteMe', 'keepMe', 'keepMeToo'], + [1, 2, 3]], + labels=[[1, 2], [1, 2]], + names=['first', 'second']) + result = df2.index + tm.assert_index_equal(result, expected) + + expected = MultiIndex(levels=[['keepMe', 'keepMeToo'], + [2, 3]], + labels=[[0, 1], [0, 1]], + names=['first', 'second']) + result = df2.index.remove_unused_levels() + tm.assert_index_equal(result, expected) + + # idempotent + result2 = result.remove_unused_levels() + tm.assert_index_equal(result2, expected) + assert result2 is result + def test_isin(self): values = [('foo', 2), ('bar', 3), ('quux', 4)] @@ -2699,6 +2773,30 @@ def test_unsortedindex(self): with assertRaises(KeyError): df.loc(axis=0)['q', :] + def test_unsortedindex_doc_examples(self): + # http://pandas.pydata.org/pandas-docs/stable/advanced.html#sorting-a-multiindex # noqa + dfm = DataFrame({'jim': [0, 0, 1, 1], + 'joe': ['x', 'x', 'z', 'y'], + 'jolie': np.random.rand(4)}) + + dfm = dfm.set_index(['jim', 'joe']) + with tm.assert_produces_warning(PerformanceWarning): + dfm.loc[(1, 'z')] + + with pytest.raises(UnsortedIndexError): + dfm.loc[(0, 'y'):(1, 'z')] + + assert not dfm.index.is_lexsorted() + assert dfm.index.lexsort_depth == 1 + + # sort it + dfm = dfm.sort_index() + dfm.loc[(1, 'z')] + dfm.loc[(0, 'y'):(1, 'z')] + + assert dfm.index.is_lexsorted() + assert dfm.index.lexsort_depth == 2 + def test_tuples_with_name_string(self): # GH 15110 and GH 14848 diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index 732142f1bce9a..a682e8643d251 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -1526,7 +1526,7 @@ def test_unstack(self): labels=[[0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1]]) expected = DataFrame({'bar': s.values}, index=exp_index).sort_index(level=0) - unstacked = s.unstack(0) + unstacked = s.unstack(0).sort_index() assert_frame_equal(unstacked, expected) # GH5873 diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index 5584c1ac6a239..914d26fcafb4a 100755 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -11,6 +11,7 @@ from pandas.core.index import Index, MultiIndex from pandas import Panel, DataFrame, Series, notnull, isnull, Timestamp +from pandas.core.common import UnsortedIndexError from pandas.types.common import is_float_dtype, is_integer_dtype import pandas.core.common as com import pandas.util.testing as tm @@ -2438,6 +2439,30 @@ def test_getitem_slice_not_sorted(self): expected = df.reindex(columns=df.columns[:3]) tm.assert_frame_equal(result, expected) + def test_frame_getitem_not_sorted2(self): + # 13431 + df = DataFrame({'col1': ['b', 'd', 'b', 'a'], + 'col2': [3, 1, 1, 2], + 'data': ['one', 'two', 'three', 'four']}) + + df2 = df.set_index(['col1', 'col2']) + df2_original = df2.copy() + + df2.index.set_levels(['b', 'd', 'a'], level='col1', inplace=True) + df2.index.set_labels([0, 1, 0, 2], level='col1', inplace=True) + assert not df2.index.is_lexsorted() + assert not df2.index.is_monotonic + + assert df2_original.index.equals(df2.index) + expected = df2.sort_index() + assert not expected.index.is_lexsorted() + assert expected.index.is_monotonic + + result = df2.sort_index(level=0) + assert not result.index.is_lexsorted() + assert result.index.is_monotonic + tm.assert_frame_equal(result, expected) + def test_frame_getitem_not_sorted(self): df = self.frame.T df['foo', 'four'] = 'foo' @@ -2474,3 +2499,137 @@ def test_series_getitem_not_sorted(self): expected.index = expected.index.droplevel(0) tm.assert_series_equal(result, expected) tm.assert_series_equal(result2, expected) + + def test_sort_index_and_reconstruction(self): + + # 15622 + # lexsortedness should be identical + # across MultiIndex consruction methods + + df = DataFrame([[1, 1], [2, 2]], index=list('ab')) + expected = DataFrame([[1, 1], [2, 2], [1, 1], [2, 2]], + index=MultiIndex.from_tuples([(0.5, 'a'), + (0.5, 'b'), + (0.8, 'a'), + (0.8, 'b')])) + assert expected.index.is_lexsorted() + + result = DataFrame( + [[1, 1], [2, 2], [1, 1], [2, 2]], + index=MultiIndex.from_product([[0.5, 0.8], list('ab')])) + result = result.sort_index() + assert result.index.is_lexsorted() + assert result.index.is_monotonic + + tm.assert_frame_equal(result, expected) + + result = DataFrame( + [[1, 1], [2, 2], [1, 1], [2, 2]], + index=MultiIndex(levels=[[0.5, 0.8], ['a', 'b']], + labels=[[0, 0, 1, 1], [0, 1, 0, 1]])) + result = result.sort_index() + assert result.index.is_lexsorted() + + tm.assert_frame_equal(result, expected) + + concatted = pd.concat([df, df], keys=[0.8, 0.5]) + result = concatted.sort_index() + + # this will be monotonic, but not lexsorted! + assert not result.index.is_lexsorted() + assert result.index.is_monotonic + + tm.assert_frame_equal(result, expected) + + # 14015 + df = DataFrame([[1, 2], [6, 7]], + columns=MultiIndex.from_tuples( + [(0, '20160811 12:00:00'), + (0, '20160809 12:00:00')], + names=['l1', 'Date'])) + + df.columns.set_levels(pd.to_datetime(df.columns.levels[1]), + level=1, + inplace=True) + assert not df.columns.is_lexsorted() + assert not df.columns.is_monotonic + result = df.sort_index(axis=1) + assert result.columns.is_lexsorted() + assert result.columns.is_monotonic + result = df.sort_index(axis=1, level=1) + assert result.columns.is_lexsorted() + assert result.columns.is_monotonic + + def test_sort_index_and_reconstruction_doc_example(self): + # doc example + df = DataFrame({'value': [1, 2, 3, 4]}, + index=MultiIndex( + levels=[['a', 'b'], ['bb', 'aa']], + labels=[[0, 0, 1, 1], [0, 1, 0, 1]])) + assert df.index.is_lexsorted() + assert not df.index.is_monotonic + + # sort it + expected = DataFrame({'value': [2, 1, 4, 3]}, + index=MultiIndex( + levels=[['a', 'b'], ['aa', 'bb']], + labels=[[0, 0, 1, 1], [0, 1, 0, 1]])) + result = df.sort_index() + assert not result.index.is_lexsorted() + assert result.index.is_monotonic + + tm.assert_frame_equal(result, expected) + + # reconstruct + result = df.sort_index().copy() + result.index = result.index._sort_levels_monotonic() + assert result.index.is_lexsorted() + assert result.index.is_monotonic + + tm.assert_frame_equal(result, expected) + + def test_sort_index_reorder_on_ops(self): + # 15687 + df = pd.DataFrame( + np.random.randn(8, 2), + index=MultiIndex.from_product( + [['a', 'b'], + ['big', 'small'], + ['red', 'blu']], + names=['letter', 'size', 'color']), + columns=['near', 'far']) + df = df.sort_index() + + def my_func(group): + group.index = ['newz', 'newa'] + return group + + result = df.groupby(level=['letter', 'size']).apply( + my_func).sort_index() + expected = MultiIndex.from_product( + [['a', 'b'], + ['big', 'small'], + ['newa', 'newz']], + names=['letter', 'size', None]) + + tm.assert_index_equal(result.index, expected) + + def test_sort_non_lexsorted(self): + # degenerate case where we sort but don't + # have a satisfying result :< + + idx = MultiIndex([['A', 'B', 'C'], + ['c', 'b', 'a']], + [[0, 1, 2, 0, 1, 2], + [0, 2, 1, 1, 0, 2]]) + + df = DataFrame({'col': range(len(idx))}, index=idx) + assert df.index.is_lexsorted() is False + assert df.index.is_monotonic is False + + result = df.sort_index() + assert result.index.is_lexsorted() is False + assert result.index.is_monotonic is True + + with pytest.raises(UnsortedIndexError): + result.loc[pd.IndexSlice['B':'C', 'a':'c'], :] diff --git a/pandas/tests/tools/test_hashing.py b/pandas/tests/tools/test_hashing.py index 9bed0d428bc41..864b5018abc75 100644 --- a/pandas/tests/tools/test_hashing.py +++ b/pandas/tests/tools/test_hashing.py @@ -87,6 +87,35 @@ def test_multiindex_unique(self): result = hash_pandas_object(mi) self.assertTrue(result.is_unique) + def test_multiindex_objects(self): + mi = MultiIndex(levels=[['b', 'd', 'a'], [1, 2, 3]], + labels=[[0, 1, 0, 2], [2, 0, 0, 1]], + names=['col1', 'col2']) + recons = mi._sort_levels_monotonic() + + # these are equal + assert mi.equals(recons) + assert Index(mi.values).equals(Index(recons.values)) + + # _hashed_values and hash_pandas_object(..., index=False) + # equivalency + expected = hash_pandas_object( + mi, index=False).values + result = mi._hashed_values + tm.assert_numpy_array_equal(result, expected) + + expected = hash_pandas_object( + recons, index=False).values + result = recons._hashed_values + tm.assert_numpy_array_equal(result, expected) + + expected = mi._hashed_values + result = recons._hashed_values + + # values should match, but in different order + tm.assert_numpy_array_equal(np.sort(result), + np.sort(expected)) + def test_hash_pandas_object(self): for obj in [Series([1, 2, 3]), diff --git a/pandas/tests/tools/test_pivot.py b/pandas/tests/tools/test_pivot.py index 4502f232c6d9c..c8dfaf5e29bc6 100644 --- a/pandas/tests/tools/test_pivot.py +++ b/pandas/tests/tools/test_pivot.py @@ -2,6 +2,7 @@ import numpy as np +from collections import OrderedDict import pandas as pd from pandas import (DataFrame, Series, Index, MultiIndex, Grouper, date_range, concat) @@ -513,7 +514,7 @@ def test_pivot_columns_lexsorted(self): self.assertTrue(pivoted.columns.is_monotonic) def test_pivot_complex_aggfunc(self): - f = {'D': ['std'], 'E': ['sum']} + f = OrderedDict([('D', ['std']), ('E', ['sum'])]) expected = self.data.groupby(['A', 'B']).agg(f).unstack('B') result = self.data.pivot_table(index='A', columns='B', aggfunc=f) From d9e00d2ac0803c561c9f7af1131f586aecfb8113 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Fri, 7 Apr 2017 11:21:04 -0400 Subject: [PATCH 349/933] DEPR: deprecate pd.get_store as not api consistent and cluttering (#15940) --- doc/source/whatsnew/v0.20.0.txt | 3 ++- pandas/io/pytables.py | 7 +++++++ pandas/tests/api/test_api.py | 16 ++++++++++++++-- 3 files changed, 23 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 21b259e7663ba..31b0efa14a44d 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -981,13 +981,14 @@ Deprecations - importing ``concat`` from ``pandas.tools.merge`` has been deprecated in favor of imports from the ``pandas`` namespace. This should only affect explict imports (:issue:`15358`) - ``Series/DataFrame/Panel.consolidate()`` been deprecated as a public method. (:issue:`15483`) - The ``as_indexer`` keyword of ``Series.str.match()`` has been deprecated (ignored keyword) (:issue:`15257`). -- The following top-level pandas functions have been deprecated and will be removed in a future version (:issue:`13790`) +- The following top-level pandas functions have been deprecated and will be removed in a future version (:issue:`13790`, :issue:`15940`) * ``pd.pnow()``, replaced by ``Period.now()`` * ``pd.Term``, is removed, as it is not applicable to user code. Instead use in-line string expressions in the where clause when searching in HDFStore * ``pd.Expr``, is removed, as it is not applicable to user code. * ``pd.match()``, is removed. * ``pd.groupby()``, replaced by using the ``.groupby()`` method directly on a ``Series/DataFrame`` + * ``pd.get_store()``, replaced by a direct call to ``pd.HDFStore(...)`` .. _whatsnew_0200.prior_deprecations: diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 9b525b76b0f17..802f460ecba07 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -1323,6 +1323,13 @@ def _read_group(self, group, **kwargs): def get_store(path, **kwargs): """ Backwards compatible alias for ``HDFStore`` """ + warnings.warn( + "get_store is deprecated and be " + "removed in a future version\n" + "HDFStore(path, **kwargs) is the replacement", + FutureWarning, + stacklevel=6) + return HDFStore(path, **kwargs) diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index 7d1308d67668e..7301c87026114 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -2,6 +2,7 @@ from warnings import catch_warnings +import pytest import pandas as pd from pandas import api from pandas.util import testing as tm @@ -63,7 +64,7 @@ class TestPDApi(Base, tm.TestCase): # top-level functions funcs = ['bdate_range', 'concat', 'crosstab', 'cut', 'date_range', 'eval', - 'factorize', 'get_dummies', 'get_store', + 'factorize', 'get_dummies', 'infer_freq', 'isnull', 'lreshape', 'melt', 'notnull', 'offsets', 'merge', 'merge_ordered', 'merge_asof', @@ -102,7 +103,7 @@ class TestPDApi(Base, tm.TestCase): 'rolling_median', 'rolling_min', 'rolling_quantile', 'rolling_skew', 'rolling_std', 'rolling_sum', 'rolling_var', 'rolling_window', 'ordered_merge', - 'pnow', 'match', 'groupby'] + 'pnow', 'match', 'groupby', 'get_store'] def test_api(self): @@ -140,6 +141,7 @@ def test_deprecation_access_obj(self): class TestTopLevelDeprecations(tm.TestCase): + # top-level API deprecations # GH 13790 @@ -168,6 +170,16 @@ def test_groupby(self): check_stacklevel=False): pd.groupby(pd.Series([1, 2, 3]), [1, 1, 1]) + # GH 15940 + + def test_get_store(self): + pytest.importorskip('tables') + with tm.ensure_clean() as path: + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + s = pd.get_store(path) + s.close() + class TestJson(tm.TestCase): From 88bed545067fe2b466eb3f2d04a1802b493c4909 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 7 Apr 2017 20:58:20 +0200 Subject: [PATCH 350/933] BLD: update merge script to update on github (#15917) * BLD: update merge script to update on github * adapt question --- scripts/{merge-py.py => merge-pr.py} | 55 +++++++++++++++++++++++++++- 1 file changed, 53 insertions(+), 2 deletions(-) rename scripts/{merge-py.py => merge-pr.py} (83%) diff --git a/scripts/merge-py.py b/scripts/merge-pr.py similarity index 83% rename from scripts/merge-py.py rename to scripts/merge-pr.py index b9350f8feceb8..1fc4eef3d0583 100755 --- a/scripts/merge-py.py +++ b/scripts/merge-pr.py @@ -99,6 +99,14 @@ def continue_maybe(prompt): fail("Okay, exiting") +def continue_maybe2(prompt): + result = input("\n%s (y/n): " % prompt) + if result.lower() != "y": + return False + else: + return True + + original_head = run_cmd("git rev-parse HEAD")[:8] @@ -193,6 +201,40 @@ def merge_pr(pr_num, target_ref): return merge_hash +def update_pr(pr_num, user_login, base_ref): + + pr_branch_name = "%s_MERGE_PR_%s" % (BRANCH_PREFIX, pr_num) + + run_cmd("git fetch %s pull/%s/head:%s" % (PR_REMOTE_NAME, pr_num, + pr_branch_name)) + run_cmd("git checkout %s" % pr_branch_name) + + continue_maybe("Update ready (local ref %s)? Push to %s/%s?" % ( + pr_branch_name, user_login, base_ref)) + + push_user_remote = "https://github.com/%s/pandas.git" % user_login + + try: + run_cmd('git push %s %s:%s' % (push_user_remote, pr_branch_name, + base_ref)) + except Exception as e: + + if continue_maybe2("Force push?"): + try: + run_cmd( + 'git push -f %s %s:%s' % (push_user_remote, pr_branch_name, + base_ref)) + except Exception as e: + fail("Exception while pushing: %s" % e) + clean_up() + else: + fail("Exception while pushing: %s" % e) + clean_up() + + clean_up() + print("Pull request #%s updated!" % pr_num) + + def cherry_pick(pr_num, merge_hash, default_branch): pick_ref = input("Enter a branch name [%s]: " % default_branch) if pick_ref == "": @@ -257,8 +299,17 @@ def fix_version_from_branch(branch, versions): print("\n=== Pull Request #%s ===" % pr_num) print("title\t%s\nsource\t%s\ntarget\t%s\nurl\t%s" % (title, pr_repo_desc, target_ref, url)) -continue_maybe("Proceed with merging pull request #%s?" % pr_num) + + merged_refs = [target_ref] -merge_hash = merge_pr(pr_num, target_ref) +print("\nProceed with updating or merging pull request #%s?" % pr_num) +update = input("Update PR and push to remote (r), merge locally (l), " + "or do nothing (n) ?") +update = update.lower() + +if update == 'r': + merge_hash = update_pr(pr_num, user_login, base_ref) +elif update == 'l': + merge_hash = merge_pr(pr_num, target_ref) From c4dca36801a58d7ae5cdc395404d4e35f03d110d Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 7 Apr 2017 21:04:31 +0200 Subject: [PATCH 351/933] DEPR: correct locations to access public json/parser objects in depr message (#15909) * DEPR: correct locations to access public json objects in depr message * Additional corrections --- pandas/__init__.py | 10 +++++++--- pandas/tslib.py | 3 +-- pandas/util/depr_module.py | 2 +- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/pandas/__init__.py b/pandas/__init__.py index 83ad85e3e292b..529750cd97076 100644 --- a/pandas/__init__.py +++ b/pandas/__init__.py @@ -60,11 +60,15 @@ # extension module deprecations from pandas.util.depr_module import _DeprecatedModule -json = _DeprecatedModule(deprmod='pandas.json', deprmodto='pandas.io.json.libjson') -parser = _DeprecatedModule(deprmod='pandas.parser', deprmodto='pandas.io.libparsers') +json = _DeprecatedModule(deprmod='pandas.json', + moved={'dumps': 'pandas.io.json.dumps', + 'loads': 'pandas.io.json.loads'}) +parser = _DeprecatedModule(deprmod='pandas.parser', + removals=['na_values'], + moved={'CParserError': 'pandas.errors.ParserError'}) lib = _DeprecatedModule(deprmod='pandas.lib', deprmodto='pandas._libs.lib', moved={'infer_dtype': 'pandas.api.lib.infer_dtype'}) -tslib = _DeprecatedModule(deprmod='pandas.tslib', deprmodto='pandas._libs.tslib', +tslib = _DeprecatedModule(deprmod='pandas.tslib', moved={'Timestamp': 'pandas.Timestamp', 'Timedelta': 'pandas.Timedelta', 'NaT': 'pandas.NaT', diff --git a/pandas/tslib.py b/pandas/tslib.py index 3d96dc496c0de..f7d99538c2ea2 100644 --- a/pandas/tslib.py +++ b/pandas/tslib.py @@ -2,7 +2,6 @@ import warnings warnings.warn("The pandas.tslib module is deprecated and will be " - "removed in a future version. Please import from " - "the pandas or pandas.errors instead", FutureWarning, stacklevel=2) + "removed in a future version.", FutureWarning, stacklevel=2) from pandas._libs.tslib import (Timestamp, Timedelta, NaT, OutOfBoundsDatetime) diff --git a/pandas/util/depr_module.py b/pandas/util/depr_module.py index 0885c81ce2757..1f428198c19f3 100644 --- a/pandas/util/depr_module.py +++ b/pandas/util/depr_module.py @@ -68,7 +68,7 @@ def __getattr__(self, name): elif self.moved is not None and name in self.moved: warnings.warn( "{deprmod} is deprecated and will be removed in " - "a future version.\nYou can access {name} in {moved}".format( + "a future version.\nYou can access {name} as {moved}".format( deprmod=self.deprmod, name=name, moved=self.moved[name]), From c25fbde09272f369f280212e5216441d5975687c Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Fri, 7 Apr 2017 15:09:09 -0400 Subject: [PATCH 352/933] DEPR: Panel deprecated closes #13563 on top of #15677 Author: Jeff Reback Closes #15601 from jreback/panel and squashes the following commits: 04104a7 [Jeff Reback] fine grained catching warnings in tests f8800dc [Jeff Reback] add numpy reference for searchsorted fa136dd [Jeff Reback] doc correction c39453a [Jeff Reback] add perf optimization in searchsorted for FrozenNDArray 0e9c4a4 [Jeff Reback] fix docs as per review & column name changes 3df0abe [Jeff Reback] remove Panel from doc-strings, catch internal warning on Panel construction 755606d [Jeff Reback] more docs d04db2e [Jeff Reback] add deprecate_panel section to docs 538b8e8 [Jeff Reback] pep fix 912d523 [Jeff Reback] TST: separate out test_append_to_multiple_dropna to two tests; when drop=False this is sometimes failing a2625ba [Jeff Reback] remove most Term references in test_pytables.py cd5b6b8 [Jeff Reback] DEPR: Panel deprecated 6b20ddc [Jeff Reback] fix names on return structure f41d3df [Jeff Reback] API: df.rolling(..).corr()/cov() when pairwise=True to return MI DataFrame 84e788b [Jeff Reback] BUG/PERF: handle a slice correctly in get_level_indexer --- doc/source/computation.rst | 20 +- doc/source/dsintro.rst | 55 + doc/source/whatsnew/v0.20.0.txt | 75 + pandas/core/panel.py | 16 +- pandas/core/window.py | 68 +- pandas/indexes/frozen.py | 24 + pandas/indexes/multi.py | 20 +- pandas/tests/io/test_pytables.py | 1462 ++++++------ pandas/tests/test_expressions.py | 32 +- pandas/tests/test_generic.py | 171 +- pandas/tests/test_panel.py | 3422 +++++++++++++++------------- pandas/tests/test_window.py | 441 ++-- pandas/tests/tools/test_concat.py | 91 +- pandas/tests/types/test_missing.py | 14 +- pandas/util/testing.py | 6 +- 15 files changed, 3193 insertions(+), 2724 deletions(-) diff --git a/doc/source/computation.rst b/doc/source/computation.rst index 57480a244f308..2423f1a342994 100644 --- a/doc/source/computation.rst +++ b/doc/source/computation.rst @@ -505,13 +505,18 @@ two ``Series`` or any combination of ``DataFrame/Series`` or - ``DataFrame/DataFrame``: by default compute the statistic for matching column names, returning a DataFrame. If the keyword argument ``pairwise=True`` is passed then computes the statistic for each pair of columns, returning a - ``Panel`` whose ``items`` are the dates in question (see :ref:`the next section + ``MultiIndexed DataFrame`` whose ``index`` are the dates in question (see :ref:`the next section `). For example: .. ipython:: python + df = pd.DataFrame(np.random.randn(1000, 4), + index=pd.date_range('1/1/2000', periods=1000), + columns=['A', 'B', 'C', 'D']) + df = df.cumsum() + df2 = df[:20] df2.rolling(window=5).corr(df2['B']) @@ -520,11 +525,16 @@ For example: Computing rolling pairwise covariances and correlations ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. warning:: + + Prior to version 0.20.0 if ``pairwise=True`` was passed, a ``Panel`` would be returned. + This will now return a 2-level MultiIndexed DataFrame, see the whatsnew :ref:`here ` + In financial data analysis and other fields it's common to compute covariance and correlation matrices for a collection of time series. Often one is also interested in moving-window covariance and correlation matrices. This can be done by passing the ``pairwise`` keyword argument, which in the case of -``DataFrame`` inputs will yield a ``Panel`` whose ``items`` are the dates in +``DataFrame`` inputs will yield a MultiIndexed ``DataFrame`` whose ``index`` are the dates in question. In the case of a single DataFrame argument the ``pairwise`` argument can even be omitted: @@ -539,12 +549,12 @@ can even be omitted: .. ipython:: python covs = df[['B','C','D']].rolling(window=50).cov(df[['A','B','C']], pairwise=True) - covs[df.index[-50]] + covs.loc['2002-09-22':] .. ipython:: python correls = df.rolling(window=50).corr() - correls[df.index[-50]] + correls.loc['2002-09-22':] You can efficiently retrieve the time series of correlations between two columns using ``.loc`` indexing: @@ -557,7 +567,7 @@ columns using ``.loc`` indexing: .. ipython:: python @savefig rolling_corr_pairwise_ex.png - correls.loc[:, 'A', 'C'].plot() + correls.loc[:, ('A', 'C')].plot() .. _stats.aggregate: diff --git a/doc/source/dsintro.rst b/doc/source/dsintro.rst index 4fcb63c18757a..2b11b23b1d1c2 100644 --- a/doc/source/dsintro.rst +++ b/doc/source/dsintro.rst @@ -763,6 +763,11 @@ completion mechanism so they can be tab-completed: Panel ----- +.. warning:: + + In 0.20.0, ``Panel`` is deprecated and will be removed in + a future version. See the section :ref:`Deprecate Panel `. + Panel is a somewhat less-used, but still important container for 3-dimensional data. The term `panel data `__ is derived from econometrics and is partially responsible for the name pandas: @@ -783,6 +788,7 @@ From 3D ndarray with optional axis labels ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. ipython:: python + :okwarning: wp = pd.Panel(np.random.randn(2, 5, 4), items=['Item1', 'Item2'], major_axis=pd.date_range('1/1/2000', periods=5), @@ -794,6 +800,7 @@ From dict of DataFrame objects ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. ipython:: python + :okwarning: data = {'Item1' : pd.DataFrame(np.random.randn(4, 3)), 'Item2' : pd.DataFrame(np.random.randn(4, 2))} @@ -816,6 +823,7 @@ dictionary of DataFrames as above, and the following named parameters: For example, compare to the construction above: .. ipython:: python + :okwarning: pd.Panel.from_dict(data, orient='minor') @@ -824,6 +832,7 @@ DataFrame objects with mixed-type columns, all of the data will get upcasted to ``dtype=object`` unless you pass ``orient='minor'``: .. ipython:: python + :okwarning: df = pd.DataFrame({'a': ['foo', 'bar', 'baz'], 'b': np.random.randn(3)}) @@ -851,6 +860,7 @@ This method was introduced in v0.7 to replace ``LongPanel.to_long``, and convert a DataFrame with a two-level index to a Panel. .. ipython:: python + :okwarning: midx = pd.MultiIndex(levels=[['one', 'two'], ['x','y']], labels=[[1,1,0,0],[1,0,1,0]]) df = pd.DataFrame({'A' : [1, 2, 3, 4], 'B': [5, 6, 7, 8]}, index=midx) @@ -880,6 +890,7 @@ A Panel can be rearranged using its ``transpose`` method (which does not make a copy by default unless the data are heterogeneous): .. ipython:: python + :okwarning: wp.transpose(2, 0, 1) @@ -909,6 +920,7 @@ Squeezing Another way to change the dimensionality of an object is to ``squeeze`` a 1-len object, similar to ``wp['Item1']`` .. ipython:: python + :okwarning: wp.reindex(items=['Item1']).squeeze() wp.reindex(items=['Item1'], minor=['B']).squeeze() @@ -923,12 +935,55 @@ for more on this. To convert a Panel to a DataFrame, use the ``to_frame`` method: .. ipython:: python + :okwarning: panel = pd.Panel(np.random.randn(3, 5, 4), items=['one', 'two', 'three'], major_axis=pd.date_range('1/1/2000', periods=5), minor_axis=['a', 'b', 'c', 'd']) panel.to_frame() + +.. _dsintro.deprecate_panel: + +Deprecate Panel +--------------- + +Over the last few years, pandas has increased in both breadth and depth, with new features, +datatype support, and manipulation routines. As a result, supporting efficient indexing and functional +routines for ``Series``, ``DataFrame`` and ``Panel`` has contributed to an increasingly fragmented and +difficult-to-understand codebase. + +The 3-d structure of a ``Panel`` is much less common for many types of data analysis, +than the 1-d of the ``Series`` or the 2-D of the ``DataFrame``. Going forward it makes sense for +pandas to focus on these areas exclusively. + +Oftentimes, one can simply use a MultiIndex ``DataFrame`` for easily working with higher dimensional data. + +In additon, the ``xarray`` package was built from the ground up, specifically in order to +support the multi-dimensional analysis that is one of ``Panel`` s main usecases. +`Here is a link to the xarray panel-transition documentation `__. + +.. ipython:: python + :okwarning: + + p = tm.makePanel() + p + +Convert to a MultiIndex DataFrame + +.. ipython:: python + :okwarning: + + p.to_frame() + +Alternatively, one can convert to an xarray ``DataArray``. + +.. ipython:: python + + p.to_xarray() + +You can see the full-documentation for the `xarray package `__. + .. _dsintro.panelnd: .. _dsintro.panel4d: diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 31b0efa14a44d..132f20cb73142 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -10,13 +10,16 @@ users upgrade to this version. Highlights include: - The ``.ix`` indexer has been deprecated, see :ref:`here ` +- ``Panel`` has been deprecated, see :ref:`here ` - Improved user API when accessing levels in ``.groupby()``, see :ref:`here ` - Improved support for UInt64 dtypes, see :ref:`here ` - A new orient for JSON serialization, ``orient='table'``, that uses the Table Schema spec, see :ref:`here ` +- Window Binary Corr/Cov operations return a MultiIndexed ``DataFrame`` rather than a ``Panel``, as ``Panel`` is now deprecated, see :ref:`here ` - Support for S3 handling now uses ``s3fs``, see :ref:`here ` - Google BigQuery support now uses the ``pandas-gbq`` library, see :ref:`here ` - Switched the test framework to use `pytest `__ (:issue:`13097`) + Check the :ref:`API Changes ` and :ref:`deprecations ` before updating. .. contents:: What's new in v0.20.0 @@ -425,6 +428,33 @@ Using ``.iloc``. Here we will get the location of the 'A' column, then use *posi df.iloc[[0, 2], df.columns.get_loc('A')] +.. _whatsnew_0200.api_breaking.deprecate_panel: + +Deprecate Panel +^^^^^^^^^^^^^^^ + +``Panel`` is deprecated and will be removed in a future version. The recommended way to represent 3-D data are +with a ``MultiIndex``on a ``DataFrame`` via the :meth:`~Panel.to_frame` or with the `xarray package `__. Pandas +provides a :meth:`~Panel.to_xarray` method to automate this conversion. See the documentation :ref:`Deprecate Panel `. (:issue:`13563`). + +.. ipython:: python + :okwarning: + + p = tm.makePanel() + p + +Convert to a MultiIndex DataFrame + +.. ipython:: python + + p.to_frame() + +Convert to an xarray DataArray + +.. ipython:: python + + p.to_xarray() + .. _whatsnew.api_breaking.io_compat: Possible incompat for HDF5 formats for pandas < 0.13.0 @@ -836,6 +866,51 @@ New Behavior: df.groupby('A').agg([np.mean, np.std, np.min, np.max]) +.. _whatsnew_0200.api_breaking.rolling_pairwise: + +Window Binary Corr/Cov operations return a MultiIndex DataFrame +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +A binary window operation, like ``.corr()`` or ``.cov()``, when operating on a ``.rolling(..)``, ``.expanding(..)``, or ``.ewm(..)`` object, +will now return a 2-level ``MultiIndexed DataFrame`` rather than a ``Panel``, as ``Panel`` is now deprecated, +see :ref:`here <_whatsnew_0200.api_breaking.deprecate_panel>`. These are equivalent in function, +but MultiIndexed ``DataFrame`` s enjoy more support in pandas. +See the section on :ref:`Windowed Binary Operations ` for more information. (:issue:`15677`) + +.. ipython:: python + + np.random.seed(1234) + df = pd.DataFrame(np.random.rand(100, 2), + columns=pd.Index(['A', 'B'], name='bar'), + index=pd.date_range('20160101', + periods=100, freq='D', name='foo')) + df.tail() + +Old Behavior: + +.. code-block:: ipython + + In [2]: df.rolling(12).corr() + Out[2]: + + Dimensions: 100 (items) x 2 (major_axis) x 2 (minor_axis) + Items axis: 2016-01-01 00:00:00 to 2016-04-09 00:00:00 + Major_axis axis: A to B + Minor_axis axis: A to B + +New Behavior: + +.. ipython:: python + + res = df.rolling(12).corr() + res.tail() + +Retrieving a correlation matrix for a cross-section + +.. ipython:: python + + df.rolling(12).corr().loc['2016-04-07'] + .. _whatsnew_0200.api_breaking.hdfstore_where: HDFStore where string comparison diff --git a/pandas/core/panel.py b/pandas/core/panel.py index 9e95023ccb359..24f4d219fb9ca 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -4,10 +4,8 @@ # pylint: disable=E1103,W0231,W0212,W0621 from __future__ import division -import warnings - import numpy as np - +import warnings from pandas.types.cast import (infer_dtype_from_scalar, maybe_cast_item) from pandas.types.common import (is_integer, is_list_like, @@ -132,6 +130,18 @@ def _constructor(self): def __init__(self, data=None, items=None, major_axis=None, minor_axis=None, copy=False, dtype=None): + # deprecation GH13563 + warnings.warn("\nPanel is deprecated and will be removed in a " + "future version.\nThe recommended way to represent " + "these types of 3-dimensional data are with a " + "MultiIndex on a DataFrame, via the " + "Panel.to_frame() method\n" + "Alternatively, you can use the xarray package " + "http://xarray.pydata.org/en/stable/.\n" + "Pandas provides a `.to_xarray()` method to help " + "automate this conversion.\n", + DeprecationWarning, stacklevel=3) + self._init_data(data=data, items=items, major_axis=major_axis, minor_axis=minor_axis, copy=copy, dtype=dtype) diff --git a/pandas/core/window.py b/pandas/core/window.py index 9c9f861451309..89d2f5b24d77e 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -927,8 +927,9 @@ def f(arg, *args, **kwargs): If False then only matching columns between self and other will be used and the output will be a DataFrame. If True then all pairwise combinations will be calculated and the - output will be a Panel in the case of DataFrame inputs. In the case of - missing elements, only complete pairwise observations will be used. + output will be a MultiIndexed DataFrame in the case of DataFrame + inputs. In the case of missing elements, only complete pairwise + observations will be used. ddof : int, default 1 Delta Degrees of Freedom. The divisor used in calculations is ``N - ddof``, where ``N`` represents the number of elements.""") @@ -964,11 +965,12 @@ def _get_cov(X, Y): other : Series, DataFrame, or ndarray, optional if not supplied then will default to self and produce pairwise output pairwise : bool, default None - If False then only matching columns between self and other will be used - and the output will be a DataFrame. + If False then only matching columns between self and other will be + used and the output will be a DataFrame. If True then all pairwise combinations will be calculated and the - output will be a Panel in the case of DataFrame inputs. In the case of - missing elements, only complete pairwise observations will be used.""") + output will be a MultiIndex DataFrame in the case of DataFrame inputs. + In the case of missing elements, only complete pairwise observations + will be used.""") def corr(self, other=None, pairwise=None, **kwargs): if other is None: @@ -1397,8 +1399,9 @@ def _constructor(self): If False then only matching columns between self and other will be used and the output will be a DataFrame. If True then all pairwise combinations will be calculated and the output - will be a Panel in the case of DataFrame inputs. In the case of missing - elements, only complete pairwise observations will be used. + will be a MultiIndex DataFrame in the case of DataFrame inputs. + In the case of missing elements, only complete pairwise observations will + be used. bias : boolean, default False Use a standard estimation bias correction """ @@ -1652,7 +1655,8 @@ def _cov(x, y): def _flex_binary_moment(arg1, arg2, f, pairwise=False): - from pandas import Series, DataFrame, Panel + from pandas import Series, DataFrame + if not (isinstance(arg1, (np.ndarray, Series, DataFrame)) and isinstance(arg2, (np.ndarray, Series, DataFrame))): raise TypeError("arguments to moment function must be of type " @@ -1684,10 +1688,13 @@ def dataframe_from_int_dict(data, frame_template): raise ValueError("'arg1' columns are not unique") if not arg2.columns.is_unique: raise ValueError("'arg2' columns are not unique") - X, Y = arg1.align(arg2, join='outer') + with warnings.catch_warnings(record=True): + X, Y = arg1.align(arg2, join='outer') X = X + 0 * Y Y = Y + 0 * X - res_columns = arg1.columns.union(arg2.columns) + + with warnings.catch_warnings(record=True): + res_columns = arg1.columns.union(arg2.columns) for col in res_columns: if col in X and col in Y: results[col] = f(X[col], Y[col]) @@ -1703,12 +1710,39 @@ def dataframe_from_int_dict(data, frame_template): else: results[i][j] = f(*_prep_binary(arg1.iloc[:, i], arg2.iloc[:, j])) - p = Panel.from_dict(results).swapaxes('items', 'major') - if len(p.major_axis) > 0: - p.major_axis = arg1.columns[p.major_axis] - if len(p.minor_axis) > 0: - p.minor_axis = arg2.columns[p.minor_axis] - return p + + # TODO: not the most efficient (perf-wise) + # though not bad code-wise + from pandas import Panel, MultiIndex, Index + with warnings.catch_warnings(record=True): + p = Panel.from_dict(results).swapaxes('items', 'major') + if len(p.major_axis) > 0: + p.major_axis = arg1.columns[p.major_axis] + if len(p.minor_axis) > 0: + p.minor_axis = arg2.columns[p.minor_axis] + + if len(p.items): + result = pd.concat( + [p.iloc[i].T for i in range(len(p.items))], + keys=p.items) + else: + + result = DataFrame( + index=MultiIndex(levels=[arg1.index, arg1.columns], + labels=[[], []]), + columns=arg2.columns, + dtype='float64') + + # reset our index names to arg1 names + # reset our column names to arg2 names + # careful not to mutate the original names + result.columns = Index(result.columns).set_names( + arg2.columns.name) + result.index = result.index.set_names( + [arg1.index.name, arg1.columns.name]) + + return result + else: raise ValueError("'pairwise' is not True/False") else: diff --git a/pandas/indexes/frozen.py b/pandas/indexes/frozen.py index 97a1a3ea99e65..ab1228c008ca8 100644 --- a/pandas/indexes/frozen.py +++ b/pandas/indexes/frozen.py @@ -117,6 +117,30 @@ def __unicode__(self): quote_strings=True) return "%s(%s, dtype='%s')" % (type(self).__name__, prepr, self.dtype) + def searchsorted(self, v, side='left', sorter=None): + """ + Find indices where elements of v should be inserted + in a to maintain order. + + For full documentation, see `numpy.searchsorted` + + See Also + -------- + numpy.searchsorted : equivalent function + """ + + # we are much more performant if the searched + # indexer is the same type as the array + # this doesn't matter for int64, but DOES + # matter for smaller int dtypes + # https://github.com/numpy/numpy/issues/5370 + try: + v = self.dtype.type(v) + except: + pass + return super(FrozenNDArray, self).searchsorted( + v, side=side, sorter=sorter) + def _ensure_frozen(array_like, categories, copy=False): array_like = coerce_indexer_dtype(array_like, categories) diff --git a/pandas/indexes/multi.py b/pandas/indexes/multi.py index 96e0effbd7608..77774f3284fef 100644 --- a/pandas/indexes/multi.py +++ b/pandas/indexes/multi.py @@ -2203,20 +2203,14 @@ def convert_indexer(start, stop, step, indexer=indexer, labels=labels): else: loc = level_index.get_loc(key) - if level > 0 or self.lexsort_depth == 0: + if isinstance(loc, slice): + return loc + elif level > 0 or self.lexsort_depth == 0: return np.array(labels == loc, dtype=bool) - else: - # sorted, so can return slice object -> view - try: - loc = labels.dtype.type(loc) - except TypeError: - # this occurs when loc is a slice (partial string indexing) - # but the TypeError raised by searchsorted in this case - # is catched in Index._has_valid_type() - pass - i = labels.searchsorted(loc, side='left') - j = labels.searchsorted(loc, side='right') - return slice(i, j) + + i = labels.searchsorted(loc, side='left') + j = labels.searchsorted(loc, side='right') + return slice(i, j) def get_locs(self, tup): """ diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index 82a98f5d08488..9908a320a6646 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -1,9 +1,9 @@ import pytest import sys import os -from warnings import catch_warnings import tempfile from contextlib import contextmanager +from warnings import catch_warnings import datetime from datetime import timedelta @@ -11,7 +11,7 @@ import pandas import pandas as pd -from pandas import (Series, DataFrame, Panel, MultiIndex, Int64Index, +from pandas import (Series, DataFrame, Panel, Panel4D, MultiIndex, Int64Index, RangeIndex, Categorical, bdate_range, date_range, timedelta_range, Index, DatetimeIndex, isnull) @@ -22,8 +22,6 @@ tables = pytest.importorskip('tables') from pandas.io.pytables import TableIterator from pandas.io.pytables import (HDFStore, get_store, Term, read_hdf, - IncompatibilityWarning, PerformanceWarning, - AttributeConflictWarning, PossibleDataLossError, ClosedFileError) from pandas.io import pytables as pytables @@ -205,8 +203,10 @@ def roundtrip(key, obj, **kwargs): o = tm.makeDataFrame() assert_frame_equal(o, roundtrip('frame', o)) - o = tm.makePanel() - assert_panel_equal(o, roundtrip('panel', o)) + with catch_warnings(record=True): + + o = tm.makePanel() + assert_panel_equal(o, roundtrip('panel', o)) # table df = DataFrame(dict(A=lrange(5), B=lrange(5))) @@ -368,8 +368,9 @@ def test_keys(self): store['a'] = tm.makeTimeSeries() store['b'] = tm.makeStringSeries() store['c'] = tm.makeDataFrame() - store['d'] = tm.makePanel() - store['foo/bar'] = tm.makePanel() + with catch_warnings(record=True): + store['d'] = tm.makePanel() + store['foo/bar'] = tm.makePanel() self.assertEqual(len(store), 5) expected = set(['/a', '/b', '/c', '/d', '/foo/bar']) self.assertTrue(set(store.keys()) == expected) @@ -388,9 +389,11 @@ def test_repr(self): store['a'] = tm.makeTimeSeries() store['b'] = tm.makeStringSeries() store['c'] = tm.makeDataFrame() - store['d'] = tm.makePanel() - store['foo/bar'] = tm.makePanel() - store.append('e', tm.makePanel()) + + with catch_warnings(record=True): + store['d'] = tm.makePanel() + store['foo/bar'] = tm.makePanel() + store.append('e', tm.makePanel()) df = tm.makeDataFrame() df['obj1'] = 'foo' @@ -755,6 +758,7 @@ def test_put_mixed_type(self): with ensure_clean_store(self.path) as store: _maybe_remove(store, 'df') + # PerformanceWarning with catch_warnings(record=True): store.put('df', df) @@ -764,39 +768,42 @@ def test_put_mixed_type(self): def test_append(self): with ensure_clean_store(self.path) as store: - df = tm.makeTimeDataFrame() - _maybe_remove(store, 'df1') - store.append('df1', df[:10]) - store.append('df1', df[10:]) - tm.assert_frame_equal(store['df1'], df) - - _maybe_remove(store, 'df2') - store.put('df2', df[:10], format='table') - store.append('df2', df[10:]) - tm.assert_frame_equal(store['df2'], df) - - _maybe_remove(store, 'df3') - store.append('/df3', df[:10]) - store.append('/df3', df[10:]) - tm.assert_frame_equal(store['df3'], df) # this is allowed by almost always don't want to do it # tables.NaturalNameWarning): with catch_warnings(record=True): + + df = tm.makeTimeDataFrame() + _maybe_remove(store, 'df1') + store.append('df1', df[:10]) + store.append('df1', df[10:]) + tm.assert_frame_equal(store['df1'], df) + + _maybe_remove(store, 'df2') + store.put('df2', df[:10], format='table') + store.append('df2', df[10:]) + tm.assert_frame_equal(store['df2'], df) + + _maybe_remove(store, 'df3') + store.append('/df3', df[:10]) + store.append('/df3', df[10:]) + tm.assert_frame_equal(store['df3'], df) + + # this is allowed by almost always don't want to do it + # tables.NaturalNameWarning _maybe_remove(store, '/df3 foo') store.append('/df3 foo', df[:10]) store.append('/df3 foo', df[10:]) tm.assert_frame_equal(store['df3 foo'], df) - # panel - wp = tm.makePanel() - _maybe_remove(store, 'wp1') - store.append('wp1', wp.iloc[:, :10, :]) - store.append('wp1', wp.iloc[:, 10:, :]) - assert_panel_equal(store['wp1'], wp) + # panel + wp = tm.makePanel() + _maybe_remove(store, 'wp1') + store.append('wp1', wp.iloc[:, :10, :]) + store.append('wp1', wp.iloc[:, 10:, :]) + assert_panel_equal(store['wp1'], wp) - # ndim - with catch_warnings(record=True): + # ndim p4d = tm.makePanel4D() _maybe_remove(store, 'p4d') store.append('p4d', p4d.iloc[:, :, :10, :]) @@ -820,42 +827,42 @@ def test_append(self): 'p4d2', p4d2, axes=['items', 'major_axis', 'minor_axis']) assert_panel4d_equal(store['p4d2'], p4d2) - # test using differt order of items on the non-index axes - _maybe_remove(store, 'wp1') - wp_append1 = wp.iloc[:, :10, :] - store.append('wp1', wp_append1) - wp_append2 = wp.iloc[:, 10:, :].reindex(items=wp.items[::-1]) - store.append('wp1', wp_append2) - assert_panel_equal(store['wp1'], wp) - - # dtype issues - mizxed type in a single object column - df = DataFrame(data=[[1, 2], [0, 1], [1, 2], [0, 0]]) - df['mixed_column'] = 'testing' - df.loc[2, 'mixed_column'] = np.nan - _maybe_remove(store, 'df') - store.append('df', df) - tm.assert_frame_equal(store['df'], df) - - # uints - test storage of uints - uint_data = DataFrame({ - 'u08': Series(np.random.randint(0, high=255, size=5), - dtype=np.uint8), - 'u16': Series(np.random.randint(0, high=65535, size=5), - dtype=np.uint16), - 'u32': Series(np.random.randint(0, high=2**30, size=5), - dtype=np.uint32), - 'u64': Series([2**58, 2**59, 2**60, 2**61, 2**62], - dtype=np.uint64)}, index=np.arange(5)) - _maybe_remove(store, 'uints') - store.append('uints', uint_data) - tm.assert_frame_equal(store['uints'], uint_data) - - # uints - test storage of uints in indexable columns - _maybe_remove(store, 'uints') - # 64-bit indices not yet supported - store.append('uints', uint_data, data_columns=[ - 'u08', 'u16', 'u32']) - tm.assert_frame_equal(store['uints'], uint_data) + # test using differt order of items on the non-index axes + _maybe_remove(store, 'wp1') + wp_append1 = wp.iloc[:, :10, :] + store.append('wp1', wp_append1) + wp_append2 = wp.iloc[:, 10:, :].reindex(items=wp.items[::-1]) + store.append('wp1', wp_append2) + assert_panel_equal(store['wp1'], wp) + + # dtype issues - mizxed type in a single object column + df = DataFrame(data=[[1, 2], [0, 1], [1, 2], [0, 0]]) + df['mixed_column'] = 'testing' + df.loc[2, 'mixed_column'] = np.nan + _maybe_remove(store, 'df') + store.append('df', df) + tm.assert_frame_equal(store['df'], df) + + # uints - test storage of uints + uint_data = DataFrame({ + 'u08': Series(np.random.randint(0, high=255, size=5), + dtype=np.uint8), + 'u16': Series(np.random.randint(0, high=65535, size=5), + dtype=np.uint16), + 'u32': Series(np.random.randint(0, high=2**30, size=5), + dtype=np.uint32), + 'u64': Series([2**58, 2**59, 2**60, 2**61, 2**62], + dtype=np.uint64)}, index=np.arange(5)) + _maybe_remove(store, 'uints') + store.append('uints', uint_data) + tm.assert_frame_equal(store['uints'], uint_data) + + # uints - test storage of uints in indexable columns + _maybe_remove(store, 'uints') + # 64-bit indices not yet supported + store.append('uints', uint_data, data_columns=[ + 'u08', 'u16', 'u32']) + tm.assert_frame_equal(store['uints'], uint_data) def test_append_series(self): @@ -937,8 +944,9 @@ def check(format, index): # only support for fixed types (and they have a perf warning) self.assertRaises(TypeError, check, 'table', index) - with tm.assert_produces_warning( - expected_warning=PerformanceWarning): + + # PerformanceWarning + with catch_warnings(record=True): check('fixed', index) def test_encoding(self): @@ -1131,15 +1139,17 @@ def test_append_all_nans(self): [[np.nan, np.nan, np.nan], [np.nan, 5, 6]], [[np.nan, np.nan, np.nan], [np.nan, 3, np.nan]]] - panel_with_missing = Panel(matrix, items=['Item1', 'Item2', 'Item3'], - major_axis=[1, 2], - minor_axis=['A', 'B', 'C']) + with catch_warnings(record=True): + panel_with_missing = Panel(matrix, + items=['Item1', 'Item2', 'Item3'], + major_axis=[1, 2], + minor_axis=['A', 'B', 'C']) - with ensure_clean_path(self.path) as path: - panel_with_missing.to_hdf( - path, 'panel_with_missing', format='table') - reloaded_panel = read_hdf(path, 'panel_with_missing') - tm.assert_panel_equal(panel_with_missing, reloaded_panel) + with ensure_clean_path(self.path) as path: + panel_with_missing.to_hdf( + path, 'panel_with_missing', format='table') + reloaded_panel = read_hdf(path, 'panel_with_missing') + tm.assert_panel_equal(panel_with_missing, reloaded_panel) def test_append_frame_column_oriented(self): @@ -1158,13 +1168,14 @@ def test_append_frame_column_oriented(self): # selection on the non-indexable result = store.select( - 'df1', ('columns=A', Term('index=df.index[0:4]'))) + 'df1', ('columns=A', 'index=df.index[0:4]')) expected = df.reindex(columns=['A'], index=df.index[0:4]) tm.assert_frame_equal(expected, result) # this isn't supported - self.assertRaises(TypeError, store.select, 'df1', ( - 'columns=A', Term('index>df.index[4]'))) + with pytest.raises(TypeError): + store.select('df1', + 'columns=A and index>df.index[4]') def test_append_with_different_block_ordering(self): @@ -1265,15 +1276,15 @@ def check_indexers(key, indexers): assert_panel4d_equal(result, expected) # partial selection2 - result = store.select('p4d', [Term( - 'labels=l1'), Term('items=ItemA'), Term('minor_axis=B')]) + result = store.select( + 'p4d', "labels='l1' and items='ItemA' and minor_axis='B'") expected = p4d.reindex( labels=['l1'], items=['ItemA'], minor_axis=['B']) assert_panel4d_equal(result, expected) # non-existant partial selection - result = store.select('p4d', [Term( - 'labels=l1'), Term('items=Item1'), Term('minor_axis=B')]) + result = store.select( + 'p4d', "labels='l1' and items='Item1' and minor_axis='B'") expected = p4d.reindex(labels=['l1'], items=[], minor_axis=['B']) assert_panel4d_equal(result, expected) @@ -1281,100 +1292,103 @@ def check_indexers(key, indexers): def test_append_with_strings(self): with ensure_clean_store(self.path) as store: - wp = tm.makePanel() - wp2 = wp.rename_axis( - dict([(x, "%s_extra" % x) for x in wp.minor_axis]), axis=2) - - def check_col(key, name, size): - self.assertEqual(getattr(store.get_storer( - key).table.description, name).itemsize, size) - - store.append('s1', wp, min_itemsize=20) - store.append('s1', wp2) - expected = concat([wp, wp2], axis=2) - expected = expected.reindex(minor_axis=sorted(expected.minor_axis)) - assert_panel_equal(store['s1'], expected) - check_col('s1', 'minor_axis', 20) - - # test dict format - store.append('s2', wp, min_itemsize={'minor_axis': 20}) - store.append('s2', wp2) - expected = concat([wp, wp2], axis=2) - expected = expected.reindex(minor_axis=sorted(expected.minor_axis)) - assert_panel_equal(store['s2'], expected) - check_col('s2', 'minor_axis', 20) - - # apply the wrong field (similar to #1) - store.append('s3', wp, min_itemsize={'major_axis': 20}) - self.assertRaises(ValueError, store.append, 's3', wp2) - - # test truncation of bigger strings - store.append('s4', wp) - self.assertRaises(ValueError, store.append, 's4', wp2) - - # avoid truncation on elements - df = DataFrame([[123, 'asdqwerty'], [345, 'dggnhebbsdfbdfb']]) - store.append('df_big', df) - tm.assert_frame_equal(store.select('df_big'), df) - check_col('df_big', 'values_block_1', 15) - - # appending smaller string ok - df2 = DataFrame([[124, 'asdqy'], [346, 'dggnhefbdfb']]) - store.append('df_big', df2) - expected = concat([df, df2]) - tm.assert_frame_equal(store.select('df_big'), expected) - check_col('df_big', 'values_block_1', 15) - - # avoid truncation on elements - df = DataFrame([[123, 'asdqwerty'], [345, 'dggnhebbsdfbdfb']]) - store.append('df_big2', df, min_itemsize={'values': 50}) - tm.assert_frame_equal(store.select('df_big2'), df) - check_col('df_big2', 'values_block_1', 50) - - # bigger string on next append - store.append('df_new', df) - df_new = DataFrame( - [[124, 'abcdefqhij'], [346, 'abcdefghijklmnopqrtsuvwxyz']]) - self.assertRaises(ValueError, store.append, 'df_new', df_new) - - # min_itemsize on Series index (GH 11412) - df = tm.makeMixedDataFrame().set_index('C') - store.append('ss', df['B'], min_itemsize={'index': 4}) - tm.assert_series_equal(store.select('ss'), df['B']) - - # same as above, with data_columns=True - store.append('ss2', df['B'], data_columns=True, - min_itemsize={'index': 4}) - tm.assert_series_equal(store.select('ss2'), df['B']) - - # min_itemsize in index without appending (GH 10381) - store.put('ss3', df, format='table', - min_itemsize={'index': 6}) - # just make sure there is a longer string: - df2 = df.copy().reset_index().assign(C='longer').set_index('C') - store.append('ss3', df2) - tm.assert_frame_equal(store.select('ss3'), - pd.concat([df, df2])) - - # same as above, with a Series - store.put('ss4', df['B'], format='table', - min_itemsize={'index': 6}) - store.append('ss4', df2['B']) - tm.assert_series_equal(store.select('ss4'), - pd.concat([df['B'], df2['B']])) - - # with nans - _maybe_remove(store, 'df') - df = tm.makeTimeDataFrame() - df['string'] = 'foo' - df.loc[1:4, 'string'] = np.nan - df['string2'] = 'bar' - df.loc[4:8, 'string2'] = np.nan - df['string3'] = 'bah' - df.loc[1:, 'string3'] = np.nan - store.append('df', df) - result = store.select('df') - tm.assert_frame_equal(result, df) + with catch_warnings(record=True): + wp = tm.makePanel() + wp2 = wp.rename_axis( + dict([(x, "%s_extra" % x) for x in wp.minor_axis]), axis=2) + + def check_col(key, name, size): + self.assertEqual(getattr(store.get_storer( + key).table.description, name).itemsize, size) + + store.append('s1', wp, min_itemsize=20) + store.append('s1', wp2) + expected = concat([wp, wp2], axis=2) + expected = expected.reindex( + minor_axis=sorted(expected.minor_axis)) + assert_panel_equal(store['s1'], expected) + check_col('s1', 'minor_axis', 20) + + # test dict format + store.append('s2', wp, min_itemsize={'minor_axis': 20}) + store.append('s2', wp2) + expected = concat([wp, wp2], axis=2) + expected = expected.reindex( + minor_axis=sorted(expected.minor_axis)) + assert_panel_equal(store['s2'], expected) + check_col('s2', 'minor_axis', 20) + + # apply the wrong field (similar to #1) + store.append('s3', wp, min_itemsize={'major_axis': 20}) + self.assertRaises(ValueError, store.append, 's3', wp2) + + # test truncation of bigger strings + store.append('s4', wp) + self.assertRaises(ValueError, store.append, 's4', wp2) + + # avoid truncation on elements + df = DataFrame([[123, 'asdqwerty'], [345, 'dggnhebbsdfbdfb']]) + store.append('df_big', df) + tm.assert_frame_equal(store.select('df_big'), df) + check_col('df_big', 'values_block_1', 15) + + # appending smaller string ok + df2 = DataFrame([[124, 'asdqy'], [346, 'dggnhefbdfb']]) + store.append('df_big', df2) + expected = concat([df, df2]) + tm.assert_frame_equal(store.select('df_big'), expected) + check_col('df_big', 'values_block_1', 15) + + # avoid truncation on elements + df = DataFrame([[123, 'asdqwerty'], [345, 'dggnhebbsdfbdfb']]) + store.append('df_big2', df, min_itemsize={'values': 50}) + tm.assert_frame_equal(store.select('df_big2'), df) + check_col('df_big2', 'values_block_1', 50) + + # bigger string on next append + store.append('df_new', df) + df_new = DataFrame( + [[124, 'abcdefqhij'], [346, 'abcdefghijklmnopqrtsuvwxyz']]) + self.assertRaises(ValueError, store.append, 'df_new', df_new) + + # min_itemsize on Series index (GH 11412) + df = tm.makeMixedDataFrame().set_index('C') + store.append('ss', df['B'], min_itemsize={'index': 4}) + tm.assert_series_equal(store.select('ss'), df['B']) + + # same as above, with data_columns=True + store.append('ss2', df['B'], data_columns=True, + min_itemsize={'index': 4}) + tm.assert_series_equal(store.select('ss2'), df['B']) + + # min_itemsize in index without appending (GH 10381) + store.put('ss3', df, format='table', + min_itemsize={'index': 6}) + # just make sure there is a longer string: + df2 = df.copy().reset_index().assign(C='longer').set_index('C') + store.append('ss3', df2) + tm.assert_frame_equal(store.select('ss3'), + pd.concat([df, df2])) + + # same as above, with a Series + store.put('ss4', df['B'], format='table', + min_itemsize={'index': 6}) + store.append('ss4', df2['B']) + tm.assert_series_equal(store.select('ss4'), + pd.concat([df['B'], df2['B']])) + + # with nans + _maybe_remove(store, 'df') + df = tm.makeTimeDataFrame() + df['string'] = 'foo' + df.loc[1:4, 'string'] = np.nan + df['string2'] = 'bar' + df.loc[4:8, 'string2'] = np.nan + df['string3'] = 'bah' + df.loc[1:, 'string3'] = np.nan + store.append('df', df) + result = store.select('df') + tm.assert_frame_equal(result, df) with ensure_clean_store(self.path) as store: @@ -1452,13 +1466,13 @@ def test_append_with_data_columns(self): assert(store._handle.root.df.table.cols.B.is_indexed is True) # data column searching - result = store.select('df', [Term('B>0')]) + result = store.select('df', 'B>0') expected = df[df.B > 0] tm.assert_frame_equal(result, expected) # data column searching (with an indexable and a data_columns) result = store.select( - 'df', [Term('B>0'), Term('index>df.index[3]')]) + 'df', 'B>0 and index>df.index[3]') df_new = df.reindex(index=df.index[4:]) expected = df_new[df_new.B > 0] tm.assert_frame_equal(result, expected) @@ -1470,7 +1484,7 @@ def test_append_with_data_columns(self): df_new.loc[5:6, 'string'] = 'bar' _maybe_remove(store, 'df') store.append('df', df_new, data_columns=['string']) - result = store.select('df', [Term('string=foo')]) + result = store.select('df', "string='foo'") expected = df_new[df_new.string == 'foo'] tm.assert_frame_equal(result, expected) @@ -1523,15 +1537,15 @@ def check_col(key, name, size): _maybe_remove(store, 'df') store.append( 'df', df_new, data_columns=['A', 'B', 'string', 'string2']) - result = store.select('df', [Term('string=foo'), Term( - 'string2=foo'), Term('A>0'), Term('B<0')]) + result = store.select('df', + "string='foo' and string2='foo'" + " and A>0 and B<0") expected = df_new[(df_new.string == 'foo') & ( df_new.string2 == 'foo') & (df_new.A > 0) & (df_new.B < 0)] tm.assert_frame_equal(result, expected, check_index_type=False) # yield an empty frame - result = store.select('df', [Term('string=foo'), Term( - 'string2=cool')]) + result = store.select('df', "string='foo' and string2='cool'") expected = df_new[(df_new.string == 'foo') & ( df_new.string2 == 'cool')] tm.assert_frame_equal(result, expected, check_index_type=False) @@ -1551,7 +1565,7 @@ def check_col(key, name, size): store.append('df_dc', df_dc, data_columns=['B', 'C', 'string', 'string2', 'datetime']) - result = store.select('df_dc', [Term('B>0')]) + result = store.select('df_dc', 'B>0') expected = df_dc[df_dc.B > 0] tm.assert_frame_equal(result, expected, check_index_type=False) @@ -1578,7 +1592,7 @@ def check_col(key, name, size): store.append('df_dc', df_dc, data_columns=[ 'B', 'C', 'string', 'string2']) - result = store.select('df_dc', [Term('B>0')]) + result = store.select('df_dc', 'B>0') expected = df_dc[df_dc.B > 0] tm.assert_frame_equal(result, expected) @@ -1589,99 +1603,104 @@ def check_col(key, name, size): tm.assert_frame_equal(result, expected) with ensure_clean_store(self.path) as store: - # panel - # GH5717 not handling data_columns - np.random.seed(1234) - p = tm.makePanel() - - store.append('p1', p) - tm.assert_panel_equal(store.select('p1'), p) - - store.append('p2', p, data_columns=True) - tm.assert_panel_equal(store.select('p2'), p) - - result = store.select('p2', where='ItemA>0') - expected = p.to_frame() - expected = expected[expected['ItemA'] > 0] - tm.assert_frame_equal(result.to_frame(), expected) - - result = store.select('p2', where='ItemA>0 & minor_axis=["A","B"]') - expected = p.to_frame() - expected = expected[expected['ItemA'] > 0] - expected = expected[expected.reset_index( - level=['major']).index.isin(['A', 'B'])] - tm.assert_frame_equal(result.to_frame(), expected) + with catch_warnings(record=True): + # panel + # GH5717 not handling data_columns + np.random.seed(1234) + p = tm.makePanel() + + store.append('p1', p) + tm.assert_panel_equal(store.select('p1'), p) + + store.append('p2', p, data_columns=True) + tm.assert_panel_equal(store.select('p2'), p) + + result = store.select('p2', where='ItemA>0') + expected = p.to_frame() + expected = expected[expected['ItemA'] > 0] + tm.assert_frame_equal(result.to_frame(), expected) + + result = store.select( + 'p2', where='ItemA>0 & minor_axis=["A","B"]') + expected = p.to_frame() + expected = expected[expected['ItemA'] > 0] + expected = expected[expected.reset_index( + level=['major']).index.isin(['A', 'B'])] + tm.assert_frame_equal(result.to_frame(), expected) def test_create_table_index(self): with ensure_clean_store(self.path) as store: - def col(t, column): - return getattr(store.get_storer(t).table.cols, column) + with catch_warnings(record=True): + def col(t, column): + return getattr(store.get_storer(t).table.cols, column) - # index=False - wp = tm.makePanel() - store.append('p5', wp, index=False) - store.create_table_index('p5', columns=['major_axis']) - assert(col('p5', 'major_axis').is_indexed is True) - assert(col('p5', 'minor_axis').is_indexed is False) - - # index=True - store.append('p5i', wp, index=True) - assert(col('p5i', 'major_axis').is_indexed is True) - assert(col('p5i', 'minor_axis').is_indexed is True) - - # default optlevels - store.get_storer('p5').create_index() - assert(col('p5', 'major_axis').index.optlevel == 6) - assert(col('p5', 'minor_axis').index.kind == 'medium') - - # let's change the indexing scheme - store.create_table_index('p5') - assert(col('p5', 'major_axis').index.optlevel == 6) - assert(col('p5', 'minor_axis').index.kind == 'medium') - store.create_table_index('p5', optlevel=9) - assert(col('p5', 'major_axis').index.optlevel == 9) - assert(col('p5', 'minor_axis').index.kind == 'medium') - store.create_table_index('p5', kind='full') - assert(col('p5', 'major_axis').index.optlevel == 9) - assert(col('p5', 'minor_axis').index.kind == 'full') - store.create_table_index('p5', optlevel=1, kind='light') - assert(col('p5', 'major_axis').index.optlevel == 1) - assert(col('p5', 'minor_axis').index.kind == 'light') - - # data columns - df = tm.makeTimeDataFrame() - df['string'] = 'foo' - df['string2'] = 'bar' - store.append('f', df, data_columns=['string', 'string2']) - assert(col('f', 'index').is_indexed is True) - assert(col('f', 'string').is_indexed is True) - assert(col('f', 'string2').is_indexed is True) - - # specify index=columns - store.append( - 'f2', df, index=['string'], data_columns=['string', 'string2']) - assert(col('f2', 'index').is_indexed is False) - assert(col('f2', 'string').is_indexed is True) - assert(col('f2', 'string2').is_indexed is False) + # index=False + wp = tm.makePanel() + store.append('p5', wp, index=False) + store.create_table_index('p5', columns=['major_axis']) + assert(col('p5', 'major_axis').is_indexed is True) + assert(col('p5', 'minor_axis').is_indexed is False) + + # index=True + store.append('p5i', wp, index=True) + assert(col('p5i', 'major_axis').is_indexed is True) + assert(col('p5i', 'minor_axis').is_indexed is True) + + # default optlevels + store.get_storer('p5').create_index() + assert(col('p5', 'major_axis').index.optlevel == 6) + assert(col('p5', 'minor_axis').index.kind == 'medium') + + # let's change the indexing scheme + store.create_table_index('p5') + assert(col('p5', 'major_axis').index.optlevel == 6) + assert(col('p5', 'minor_axis').index.kind == 'medium') + store.create_table_index('p5', optlevel=9) + assert(col('p5', 'major_axis').index.optlevel == 9) + assert(col('p5', 'minor_axis').index.kind == 'medium') + store.create_table_index('p5', kind='full') + assert(col('p5', 'major_axis').index.optlevel == 9) + assert(col('p5', 'minor_axis').index.kind == 'full') + store.create_table_index('p5', optlevel=1, kind='light') + assert(col('p5', 'major_axis').index.optlevel == 1) + assert(col('p5', 'minor_axis').index.kind == 'light') + + # data columns + df = tm.makeTimeDataFrame() + df['string'] = 'foo' + df['string2'] = 'bar' + store.append('f', df, data_columns=['string', 'string2']) + assert(col('f', 'index').is_indexed is True) + assert(col('f', 'string').is_indexed is True) + assert(col('f', 'string2').is_indexed is True) + + # specify index=columns + store.append( + 'f2', df, index=['string'], + data_columns=['string', 'string2']) + assert(col('f2', 'index').is_indexed is False) + assert(col('f2', 'string').is_indexed is True) + assert(col('f2', 'string2').is_indexed is False) - # try to index a non-table - _maybe_remove(store, 'f2') - store.put('f2', df) - self.assertRaises(TypeError, store.create_table_index, 'f2') + # try to index a non-table + _maybe_remove(store, 'f2') + store.put('f2', df) + self.assertRaises(TypeError, store.create_table_index, 'f2') def test_append_diff_item_order(self): - wp = tm.makePanel() - wp1 = wp.iloc[:, :10, :] - wp2 = wp.iloc[wp.items.get_indexer(['ItemC', 'ItemB', 'ItemA']), - 10:, :] + with catch_warnings(record=True): + wp = tm.makePanel() + wp1 = wp.iloc[:, :10, :] + wp2 = wp.iloc[wp.items.get_indexer(['ItemC', 'ItemB', 'ItemA']), + 10:, :] - with ensure_clean_store(self.path) as store: - store.put('panel', wp1, format='table') - self.assertRaises(ValueError, store.put, 'panel', wp2, - append=True) + with ensure_clean_store(self.path) as store: + store.put('panel', wp1, format='table') + self.assertRaises(ValueError, store.put, 'panel', wp2, + append=True) def test_append_hierarchical(self): index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], @@ -1909,8 +1928,9 @@ def check(obj, comparator): df['time2'] = Timestamp('20130102') check(df, tm.assert_frame_equal) - p = tm.makePanel() - check(p, assert_panel_equal) + with catch_warnings(record=True): + p = tm.makePanel() + check(p, assert_panel_equal) with catch_warnings(record=True): p4d = tm.makePanel4D() @@ -1936,21 +1956,23 @@ def check(obj, comparator): store.put('df2', df) assert_frame_equal(store.select('df2'), df) - # 0 len - p_empty = Panel(items=list('ABC')) - store.append('p', p_empty) - self.assertRaises(KeyError, store.select, 'p') + with catch_warnings(record=True): - # repeated append of 0/non-zero frames - p = Panel(np.random.randn(3, 4, 5), items=list('ABC')) - store.append('p', p) - assert_panel_equal(store.select('p'), p) - store.append('p', p_empty) - assert_panel_equal(store.select('p'), p) + # 0 len + p_empty = Panel(items=list('ABC')) + store.append('p', p_empty) + self.assertRaises(KeyError, store.select, 'p') - # store - store.put('p2', p_empty) - assert_panel_equal(store.select('p2'), p_empty) + # repeated append of 0/non-zero frames + p = Panel(np.random.randn(3, 4, 5), items=list('ABC')) + store.append('p', p) + assert_panel_equal(store.select('p'), p) + store.append('p', p_empty) + assert_panel_equal(store.select('p'), p) + + # store + store.put('p2', p_empty) + assert_panel_equal(store.select('p2'), p_empty) def test_append_raise(self): @@ -2066,22 +2088,25 @@ def test_table_mixed_dtypes(self): store.append('df1_mixed', df) tm.assert_frame_equal(store.select('df1_mixed'), df) - # panel - wp = tm.makePanel() - wp['obj1'] = 'foo' - wp['obj2'] = 'bar' - wp['bool1'] = wp['ItemA'] > 0 - wp['bool2'] = wp['ItemB'] > 0 - wp['int1'] = 1 - wp['int2'] = 2 - wp = wp._consolidate() + with catch_warnings(record=True): - with ensure_clean_store(self.path) as store: - store.append('p1_mixed', wp) - assert_panel_equal(store.select('p1_mixed'), wp) + # panel + wp = tm.makePanel() + wp['obj1'] = 'foo' + wp['obj2'] = 'bar' + wp['bool1'] = wp['ItemA'] > 0 + wp['bool2'] = wp['ItemB'] > 0 + wp['int1'] = 1 + wp['int2'] = 2 + wp = wp._consolidate() with catch_warnings(record=True): + with ensure_clean_store(self.path) as store: + store.append('p1_mixed', wp) + assert_panel_equal(store.select('p1_mixed'), wp) + + with catch_warnings(record=True): # ndim wp = tm.makePanel4D() wp['obj1'] = 'foo' @@ -2166,9 +2191,12 @@ def test_append_with_timedelta(self): result = store.select('df') assert_frame_equal(result, df) - result = store.select('df', "C<100000") + result = store.select('df', where="C<100000") assert_frame_equal(result, df) + result = store.select('df', where="Cfoo') - self.assertRaises(KeyError, store.remove, 'a', [crit1]) + with catch_warnings(record=True): - # try to remove non-table (with crit) - # non-table ok (where = None) - wp = tm.makePanel(30) - store.put('wp', wp, format='table') - store.remove('wp', ["minor_axis=['A', 'D']"]) - rs = store.select('wp') - expected = wp.reindex(minor_axis=['B', 'C']) - assert_panel_equal(rs, expected) + # non-existance + crit1 = 'index>foo' + self.assertRaises(KeyError, store.remove, 'a', [crit1]) - # empty where - _maybe_remove(store, 'wp') - store.put('wp', wp, format='table') + # try to remove non-table (with crit) + # non-table ok (where = None) + wp = tm.makePanel(30) + store.put('wp', wp, format='table') + store.remove('wp', ["minor_axis=['A', 'D']"]) + rs = store.select('wp') + expected = wp.reindex(minor_axis=['B', 'C']) + assert_panel_equal(rs, expected) - # deleted number (entire table) - n = store.remove('wp', []) - self.assertTrue(n == 120) + # empty where + _maybe_remove(store, 'wp') + store.put('wp', wp, format='table') - # non - empty where - _maybe_remove(store, 'wp') - store.put('wp', wp, format='table') - self.assertRaises(ValueError, store.remove, - 'wp', ['foo']) + # deleted number (entire table) + n = store.remove('wp', []) + self.assertTrue(n == 120) - # selectin non-table with a where - # store.put('wp2', wp, format='f') - # self.assertRaises(ValueError, store.remove, - # 'wp2', [('column', ['A', 'D'])]) + # non - empty where + _maybe_remove(store, 'wp') + store.put('wp', wp, format='table') + self.assertRaises(ValueError, store.remove, + 'wp', ['foo']) def test_remove_startstop(self): # GH #4835 and #6177 with ensure_clean_store(self.path) as store: - wp = tm.makePanel(30) - - # start - _maybe_remove(store, 'wp1') - store.put('wp1', wp, format='t') - n = store.remove('wp1', start=32) - self.assertTrue(n == 120 - 32) - result = store.select('wp1') - expected = wp.reindex(major_axis=wp.major_axis[:32 // 4]) - assert_panel_equal(result, expected) - - _maybe_remove(store, 'wp2') - store.put('wp2', wp, format='t') - n = store.remove('wp2', start=-32) - self.assertTrue(n == 32) - result = store.select('wp2') - expected = wp.reindex(major_axis=wp.major_axis[:-32 // 4]) - assert_panel_equal(result, expected) - - # stop - _maybe_remove(store, 'wp3') - store.put('wp3', wp, format='t') - n = store.remove('wp3', stop=32) - self.assertTrue(n == 32) - result = store.select('wp3') - expected = wp.reindex(major_axis=wp.major_axis[32 // 4:]) - assert_panel_equal(result, expected) - - _maybe_remove(store, 'wp4') - store.put('wp4', wp, format='t') - n = store.remove('wp4', stop=-32) - self.assertTrue(n == 120 - 32) - result = store.select('wp4') - expected = wp.reindex(major_axis=wp.major_axis[-32 // 4:]) - assert_panel_equal(result, expected) - - # start n stop - _maybe_remove(store, 'wp5') - store.put('wp5', wp, format='t') - n = store.remove('wp5', start=16, stop=-16) - self.assertTrue(n == 120 - 32) - result = store.select('wp5') - expected = wp.reindex(major_axis=wp.major_axis[ - :16 // 4].union(wp.major_axis[-16 // 4:])) - assert_panel_equal(result, expected) - - _maybe_remove(store, 'wp6') - store.put('wp6', wp, format='t') - n = store.remove('wp6', start=16, stop=16) - self.assertTrue(n == 0) - result = store.select('wp6') - expected = wp.reindex(major_axis=wp.major_axis) - assert_panel_equal(result, expected) - - # with where - _maybe_remove(store, 'wp7') - - # TODO: unused? - date = wp.major_axis.take(np.arange(0, 30, 3)) # noqa - - crit = Term('major_axis=date') - store.put('wp7', wp, format='t') - n = store.remove('wp7', where=[crit], stop=80) - self.assertTrue(n == 28) - result = store.select('wp7') - expected = wp.reindex(major_axis=wp.major_axis.difference( - wp.major_axis[np.arange(0, 20, 3)])) - assert_panel_equal(result, expected) + with catch_warnings(record=True): + wp = tm.makePanel(30) + + # start + _maybe_remove(store, 'wp1') + store.put('wp1', wp, format='t') + n = store.remove('wp1', start=32) + self.assertTrue(n == 120 - 32) + result = store.select('wp1') + expected = wp.reindex(major_axis=wp.major_axis[:32 // 4]) + assert_panel_equal(result, expected) + + _maybe_remove(store, 'wp2') + store.put('wp2', wp, format='t') + n = store.remove('wp2', start=-32) + self.assertTrue(n == 32) + result = store.select('wp2') + expected = wp.reindex(major_axis=wp.major_axis[:-32 // 4]) + assert_panel_equal(result, expected) + + # stop + _maybe_remove(store, 'wp3') + store.put('wp3', wp, format='t') + n = store.remove('wp3', stop=32) + self.assertTrue(n == 32) + result = store.select('wp3') + expected = wp.reindex(major_axis=wp.major_axis[32 // 4:]) + assert_panel_equal(result, expected) + + _maybe_remove(store, 'wp4') + store.put('wp4', wp, format='t') + n = store.remove('wp4', stop=-32) + self.assertTrue(n == 120 - 32) + result = store.select('wp4') + expected = wp.reindex(major_axis=wp.major_axis[-32 // 4:]) + assert_panel_equal(result, expected) + + # start n stop + _maybe_remove(store, 'wp5') + store.put('wp5', wp, format='t') + n = store.remove('wp5', start=16, stop=-16) + self.assertTrue(n == 120 - 32) + result = store.select('wp5') + expected = wp.reindex( + major_axis=(wp.major_axis[:16 // 4] + .union(wp.major_axis[-16 // 4:]))) + assert_panel_equal(result, expected) + + _maybe_remove(store, 'wp6') + store.put('wp6', wp, format='t') + n = store.remove('wp6', start=16, stop=16) + self.assertTrue(n == 0) + result = store.select('wp6') + expected = wp.reindex(major_axis=wp.major_axis) + assert_panel_equal(result, expected) + + # with where + _maybe_remove(store, 'wp7') + + # TODO: unused? + date = wp.major_axis.take(np.arange(0, 30, 3)) # noqa + + crit = 'major_axis=date' + store.put('wp7', wp, format='t') + n = store.remove('wp7', where=[crit], stop=80) + self.assertTrue(n == 28) + result = store.select('wp7') + expected = wp.reindex(major_axis=wp.major_axis.difference( + wp.major_axis[np.arange(0, 20, 3)])) + assert_panel_equal(result, expected) def test_remove_crit(self): with ensure_clean_store(self.path) as store: - wp = tm.makePanel(30) - - # group row removal - _maybe_remove(store, 'wp3') - date4 = wp.major_axis.take([0, 1, 2, 4, 5, 6, 8, 9, 10]) - crit4 = Term('major_axis=date4') - store.put('wp3', wp, format='t') - n = store.remove('wp3', where=[crit4]) - self.assertTrue(n == 36) - - result = store.select('wp3') - expected = wp.reindex(major_axis=wp.major_axis.difference(date4)) - assert_panel_equal(result, expected) - - # upper half - _maybe_remove(store, 'wp') - store.put('wp', wp, format='table') - date = wp.major_axis[len(wp.major_axis) // 2] - - crit1 = Term('major_axis>date') - crit2 = Term("minor_axis=['A', 'D']") - n = store.remove('wp', where=[crit1]) - self.assertTrue(n == 56) - - n = store.remove('wp', where=[crit2]) - self.assertTrue(n == 32) - - result = store['wp'] - expected = wp.truncate(after=date).reindex(minor=['B', 'C']) - assert_panel_equal(result, expected) - - # individual row elements - _maybe_remove(store, 'wp2') - store.put('wp2', wp, format='table') - - date1 = wp.major_axis[1:3] - crit1 = Term('major_axis=date1') - store.remove('wp2', where=[crit1]) - result = store.select('wp2') - expected = wp.reindex(major_axis=wp.major_axis.difference(date1)) - assert_panel_equal(result, expected) - - date2 = wp.major_axis[5] - crit2 = Term('major_axis=date2') - store.remove('wp2', where=[crit2]) - result = store['wp2'] - expected = wp.reindex(major_axis=wp.major_axis.difference(date1) - .difference(Index([date2]))) - assert_panel_equal(result, expected) - - date3 = [wp.major_axis[7], wp.major_axis[9]] - crit3 = Term('major_axis=date3') - store.remove('wp2', where=[crit3]) - result = store['wp2'] - expected = wp.reindex(major_axis=wp.major_axis - .difference(date1) - .difference(Index([date2])) - .difference(Index(date3))) - assert_panel_equal(result, expected) - - # corners - _maybe_remove(store, 'wp4') - store.put('wp4', wp, format='table') - n = store.remove( - 'wp4', where=[Term('major_axis>wp.major_axis[-1]')]) - result = store.select('wp4') - assert_panel_equal(result, wp) + with catch_warnings(record=True): + wp = tm.makePanel(30) + + # group row removal + _maybe_remove(store, 'wp3') + date4 = wp.major_axis.take([0, 1, 2, 4, 5, 6, 8, 9, 10]) + crit4 = 'major_axis=date4' + store.put('wp3', wp, format='t') + n = store.remove('wp3', where=[crit4]) + self.assertTrue(n == 36) + + result = store.select('wp3') + expected = wp.reindex( + major_axis=wp.major_axis.difference(date4)) + assert_panel_equal(result, expected) + + # upper half + _maybe_remove(store, 'wp') + store.put('wp', wp, format='table') + date = wp.major_axis[len(wp.major_axis) // 2] + + crit1 = 'major_axis>date' + crit2 = "minor_axis=['A', 'D']" + n = store.remove('wp', where=[crit1]) + self.assertTrue(n == 56) + + n = store.remove('wp', where=[crit2]) + self.assertTrue(n == 32) + + result = store['wp'] + expected = wp.truncate(after=date).reindex(minor=['B', 'C']) + assert_panel_equal(result, expected) + + # individual row elements + _maybe_remove(store, 'wp2') + store.put('wp2', wp, format='table') + + date1 = wp.major_axis[1:3] + crit1 = 'major_axis=date1' + store.remove('wp2', where=[crit1]) + result = store.select('wp2') + expected = wp.reindex( + major_axis=wp.major_axis.difference(date1)) + assert_panel_equal(result, expected) + + date2 = wp.major_axis[5] + crit2 = 'major_axis=date2' + store.remove('wp2', where=[crit2]) + result = store['wp2'] + expected = wp.reindex( + major_axis=(wp.major_axis + .difference(date1) + .difference(Index([date2])) + )) + assert_panel_equal(result, expected) + + date3 = [wp.major_axis[7], wp.major_axis[9]] + crit3 = 'major_axis=date3' + store.remove('wp2', where=[crit3]) + result = store['wp2'] + expected = wp.reindex(major_axis=wp.major_axis + .difference(date1) + .difference(Index([date2])) + .difference(Index(date3))) + assert_panel_equal(result, expected) + + # corners + _maybe_remove(store, 'wp4') + store.put('wp4', wp, format='table') + n = store.remove( + 'wp4', where="major_axis>wp.major_axis[-1]") + result = store.select('wp4') + assert_panel_equal(result, wp) def test_invalid_terms(self): @@ -2464,24 +2497,32 @@ def test_terms(self): with ensure_clean_store(self.path) as store: - wp = tm.makePanel() - wpneg = Panel.fromDict({-1: tm.makeDataFrame(), - 0: tm.makeDataFrame(), - 1: tm.makeDataFrame()}) - with catch_warnings(record=True): + wp = tm.makePanel() + wpneg = Panel.fromDict({-1: tm.makeDataFrame(), + 0: tm.makeDataFrame(), + 1: tm.makeDataFrame()}) p4d = tm.makePanel4D() store.put('p4d', p4d, format='table') - - store.put('wp', wp, format='table') - store.put('wpneg', wpneg, format='table') - - # panel - result = store.select( - 'wp', "major_axis<'20000108' and minor_axis=['A', 'B']") - expected = wp.truncate(after='20000108').reindex(minor=['A', 'B']) - assert_panel_equal(result, expected) + store.put('wp', wp, format='table') + store.put('wpneg', wpneg, format='table') + + # panel + result = store.select( + 'wp', + "major_axis<'20000108' and minor_axis=['A', 'B']") + expected = wp.truncate( + after='20000108').reindex(minor=['A', 'B']) + assert_panel_equal(result, expected) + + # with deprecation + result = store.select( + 'wp', where=("major_axis<'20000108' " + "and minor_axis=['A', 'B']")) + expected = wp.truncate( + after='20000108').reindex(minor=['A', 'B']) + tm.assert_panel_equal(result, expected) # p4d with catch_warnings(record=True): @@ -2516,74 +2557,79 @@ def test_terms(self): store.select('p4d', t) # valid for p4d only - terms = [(("labels=['l1', 'l2']"),), - Term("labels=['l1', 'l2']"), - ] - + terms = ["labels=['l1', 'l2']"] for t in terms: store.select('p4d', t) - with tm.assertRaisesRegexp(TypeError, - 'Only named functions are supported'): - store.select('wp', Term( - 'major_axis == (lambda x: x)("20130101")')) + with tm.assertRaisesRegexp( + TypeError, 'Only named functions are supported'): + store.select( + 'wp', + 'major_axis == (lambda x: x)("20130101")') - # check USub node parsing - res = store.select('wpneg', Term('items == -1')) - expected = Panel({-1: wpneg[-1]}) - tm.assert_panel_equal(res, expected) + with catch_warnings(record=True): + # check USub node parsing + res = store.select('wpneg', 'items == -1') + expected = Panel({-1: wpneg[-1]}) + tm.assert_panel_equal(res, expected) - with tm.assertRaisesRegexp(NotImplementedError, - 'Unary addition not supported'): - store.select('wpneg', Term('items == +1')) + with tm.assertRaisesRegexp(NotImplementedError, + 'Unary addition not supported'): + store.select('wpneg', 'items == +1') def test_term_compat(self): with ensure_clean_store(self.path) as store: - wp = Panel(np.random.randn(2, 5, 4), items=['Item1', 'Item2'], - major_axis=date_range('1/1/2000', periods=5), - minor_axis=['A', 'B', 'C', 'D']) - store.append('wp', wp) + with catch_warnings(record=True): + wp = Panel(np.random.randn(2, 5, 4), items=['Item1', 'Item2'], + major_axis=date_range('1/1/2000', periods=5), + minor_axis=['A', 'B', 'C', 'D']) + store.append('wp', wp) - result = store.select( - 'wp', "major_axis>20000102 and minor_axis=['A', 'B']") - expected = wp.loc[:, wp.major_axis > - Timestamp('20000102'), ['A', 'B']] - assert_panel_equal(result, expected) + result = store.select( + 'wp', where=("major_axis>20000102 " + "and minor_axis=['A', 'B']")) + expected = wp.loc[:, wp.major_axis > + Timestamp('20000102'), ['A', 'B']] + assert_panel_equal(result, expected) - store.remove('wp', 'major_axis>20000103') - result = store.select('wp') - expected = wp.loc[:, wp.major_axis <= Timestamp('20000103'), :] - assert_panel_equal(result, expected) + store.remove('wp', 'major_axis>20000103') + result = store.select('wp') + expected = wp.loc[:, wp.major_axis <= Timestamp('20000103'), :] + assert_panel_equal(result, expected) with ensure_clean_store(self.path) as store: - wp = Panel(np.random.randn(2, 5, 4), items=['Item1', 'Item2'], - major_axis=date_range('1/1/2000', periods=5), - minor_axis=['A', 'B', 'C', 'D']) - store.append('wp', wp) - - # stringified datetimes - result = store.select( - 'wp', "major_axis>datetime.datetime(2000, 1, 2)") - expected = wp.loc[:, wp.major_axis > Timestamp('20000102')] - assert_panel_equal(result, expected) - - result = store.select( - 'wp', "major_axis>datetime.datetime(2000, 1, 2, 0, 0)") - expected = wp.loc[:, wp.major_axis > Timestamp('20000102')] - assert_panel_equal(result, expected) - - result = store.select( - 'wp', ("major_axis=[datetime.datetime(2000, 1, 2, 0, 0), " - "datetime.datetime(2000, 1, 3, 0, 0)]")) - expected = wp.loc[:, [Timestamp('20000102'), - Timestamp('20000103')]] - assert_panel_equal(result, expected) - - result = store.select('wp', "minor_axis=['A', 'B']") - expected = wp.loc[:, :, ['A', 'B']] - assert_panel_equal(result, expected) + with catch_warnings(record=True): + wp = Panel(np.random.randn(2, 5, 4), + items=['Item1', 'Item2'], + major_axis=date_range('1/1/2000', periods=5), + minor_axis=['A', 'B', 'C', 'D']) + store.append('wp', wp) + + # stringified datetimes + result = store.select( + 'wp', 'major_axis>datetime.datetime(2000, 1, 2)') + expected = wp.loc[:, wp.major_axis > Timestamp('20000102')] + assert_panel_equal(result, expected) + + result = store.select( + 'wp', 'major_axis>datetime.datetime(2000, 1, 2)') + expected = wp.loc[:, wp.major_axis > Timestamp('20000102')] + assert_panel_equal(result, expected) + + result = store.select( + 'wp', + "major_axis=[datetime.datetime(2000, 1, 2, 0, 0), " + "datetime.datetime(2000, 1, 3, 0, 0)]") + expected = wp.loc[:, [Timestamp('20000102'), + Timestamp('20000103')]] + assert_panel_equal(result, expected) + + result = store.select( + 'wp', "minor_axis=['A', 'B']") + expected = wp.loc[:, :, ['A', 'B']] + assert_panel_equal(result, expected) def test_same_name_scoping(self): @@ -2678,12 +2724,13 @@ def test_tuple_index(self): def test_index_types(self): - values = np.random.randn(2) + with catch_warnings(record=True): + values = np.random.randn(2) - func = lambda l, r: tm.assert_series_equal(l, r, - check_dtype=True, - check_index_type=True, - check_series_type=True) + func = lambda l, r: tm.assert_series_equal(l, r, + check_dtype=True, + check_index_type=True, + check_series_type=True) with catch_warnings(record=True): ser = Series(values, [0, 'y']) @@ -2702,18 +2749,31 @@ def test_index_types(self): self._check_roundtrip(ser, func) with catch_warnings(record=True): + + ser = Series(values, [0, 'y']) + self._check_roundtrip(ser, func) + + ser = Series(values, [datetime.datetime.today(), 0]) + self._check_roundtrip(ser, func) + + ser = Series(values, ['y', 0]) + self._check_roundtrip(ser, func) + + ser = Series(values, [datetime.date.today(), 'a']) + self._check_roundtrip(ser, func) + ser = Series(values, [1.23, 'b']) self._check_roundtrip(ser, func) - ser = Series(values, [1, 1.53]) - self._check_roundtrip(ser, func) + ser = Series(values, [1, 1.53]) + self._check_roundtrip(ser, func) - ser = Series(values, [1, 5]) - self._check_roundtrip(ser, func) + ser = Series(values, [1, 5]) + self._check_roundtrip(ser, func) - ser = Series(values, [datetime.datetime( - 2012, 1, 1), datetime.datetime(2012, 1, 2)]) - self._check_roundtrip(ser, func) + ser = Series(values, [datetime.datetime( + 2012, 1, 1), datetime.datetime(2012, 1, 2)]) + self._check_roundtrip(ser, func) def test_timeseries_preepoch(self): @@ -2876,13 +2936,9 @@ def _make_one(): def test_wide(self): - wp = tm.makePanel() - self._check_roundtrip(wp, assert_panel_equal) - - def test_wide_table(self): - - wp = tm.makePanel() - self._check_roundtrip_table(wp, assert_panel_equal) + with catch_warnings(record=True): + wp = tm.makePanel() + self._check_roundtrip(wp, assert_panel_equal) def test_select_with_dups(self): @@ -2944,25 +3000,24 @@ def test_select_with_dups(self): assert_frame_equal(result, expected, by_blocks=True) def test_wide_table_dups(self): - wp = tm.makePanel() with ensure_clean_store(self.path) as store: - store.put('panel', wp, format='table') - store.put('panel', wp, format='table', append=True) - with catch_warnings(record=True): + + wp = tm.makePanel() + store.put('panel', wp, format='table') + store.put('panel', wp, format='table', append=True) + recons = store['panel'] - assert_panel_equal(recons, wp) + assert_panel_equal(recons, wp) def test_long(self): def _check(left, right): assert_panel_equal(left.to_panel(), right.to_panel()) - wp = tm.makePanel() - self._check_roundtrip(wp.to_frame(), _check) - - # empty - # self._check_roundtrip(wp.to_frame()[:0], _check) + with catch_warnings(record=True): + wp = tm.makePanel() + self._check_roundtrip(wp.to_frame(), _check) def test_longpanel(self): pass @@ -3009,70 +3064,72 @@ def test_sparse_with_compression(self): check_frame_type=True) def test_select(self): - wp = tm.makePanel() with ensure_clean_store(self.path) as store: - # put/select ok - _maybe_remove(store, 'wp') - store.put('wp', wp, format='table') - store.select('wp') - - # non-table ok (where = None) - _maybe_remove(store, 'wp') - store.put('wp2', wp) - store.select('wp2') - - # selection on the non-indexable with a large number of columns - wp = Panel(np.random.randn(100, 100, 100), - items=['Item%03d' % i for i in range(100)], - major_axis=date_range('1/1/2000', periods=100), - minor_axis=['E%03d' % i for i in range(100)]) - - _maybe_remove(store, 'wp') - store.append('wp', wp) - items = ['Item%03d' % i for i in range(80)] - result = store.select('wp', Term('items=items')) - expected = wp.reindex(items=items) - assert_panel_equal(expected, result) - - # selectin non-table with a where - # self.assertRaises(ValueError, store.select, - # 'wp2', ('column', ['A', 'D'])) + with catch_warnings(record=True): + wp = tm.makePanel() - # select with columns= - df = tm.makeTimeDataFrame() - _maybe_remove(store, 'df') - store.append('df', df) - result = store.select('df', columns=['A', 'B']) - expected = df.reindex(columns=['A', 'B']) - tm.assert_frame_equal(expected, result) + # put/select ok + _maybe_remove(store, 'wp') + store.put('wp', wp, format='table') + store.select('wp') + + # non-table ok (where = None) + _maybe_remove(store, 'wp') + store.put('wp2', wp) + store.select('wp2') + + # selection on the non-indexable with a large number of columns + wp = Panel(np.random.randn(100, 100, 100), + items=['Item%03d' % i for i in range(100)], + major_axis=date_range('1/1/2000', periods=100), + minor_axis=['E%03d' % i for i in range(100)]) + + _maybe_remove(store, 'wp') + store.append('wp', wp) + items = ['Item%03d' % i for i in range(80)] + result = store.select('wp', 'items=items') + expected = wp.reindex(items=items) + assert_panel_equal(expected, result) + + # selectin non-table with a where + # self.assertRaises(ValueError, store.select, + # 'wp2', ('column', ['A', 'D'])) + + # select with columns= + df = tm.makeTimeDataFrame() + _maybe_remove(store, 'df') + store.append('df', df) + result = store.select('df', columns=['A', 'B']) + expected = df.reindex(columns=['A', 'B']) + tm.assert_frame_equal(expected, result) - # equivalentsly - result = store.select('df', [("columns=['A', 'B']")]) - expected = df.reindex(columns=['A', 'B']) - tm.assert_frame_equal(expected, result) + # equivalentsly + result = store.select('df', [("columns=['A', 'B']")]) + expected = df.reindex(columns=['A', 'B']) + tm.assert_frame_equal(expected, result) - # with a data column - _maybe_remove(store, 'df') - store.append('df', df, data_columns=['A']) - result = store.select('df', ['A > 0'], columns=['A', 'B']) - expected = df[df.A > 0].reindex(columns=['A', 'B']) - tm.assert_frame_equal(expected, result) + # with a data column + _maybe_remove(store, 'df') + store.append('df', df, data_columns=['A']) + result = store.select('df', ['A > 0'], columns=['A', 'B']) + expected = df[df.A > 0].reindex(columns=['A', 'B']) + tm.assert_frame_equal(expected, result) - # all a data columns - _maybe_remove(store, 'df') - store.append('df', df, data_columns=True) - result = store.select('df', ['A > 0'], columns=['A', 'B']) - expected = df[df.A > 0].reindex(columns=['A', 'B']) - tm.assert_frame_equal(expected, result) + # all a data columns + _maybe_remove(store, 'df') + store.append('df', df, data_columns=True) + result = store.select('df', ['A > 0'], columns=['A', 'B']) + expected = df[df.A > 0].reindex(columns=['A', 'B']) + tm.assert_frame_equal(expected, result) - # with a data column, but different columns - _maybe_remove(store, 'df') - store.append('df', df, data_columns=['A']) - result = store.select('df', ['A > 0'], columns=['C', 'D']) - expected = df[df.A > 0].reindex(columns=['C', 'D']) - tm.assert_frame_equal(expected, result) + # with a data column, but different columns + _maybe_remove(store, 'df') + store.append('df', df, data_columns=['A']) + result = store.select('df', ['A > 0'], columns=['C', 'D']) + expected = df[df.A > 0].reindex(columns=['C', 'D']) + tm.assert_frame_equal(expected, result) def test_select_dtypes(self): @@ -3084,7 +3141,7 @@ def test_select_dtypes(self): _maybe_remove(store, 'df') store.append('df', df, data_columns=['ts', 'A']) - result = store.select('df', [Term("ts>=Timestamp('2012-02-01')")]) + result = store.select('df', "ts>=Timestamp('2012-02-01')") expected = df[df.ts >= Timestamp('2012-02-01')] tm.assert_frame_equal(expected, result) @@ -3099,15 +3156,15 @@ def test_select_dtypes(self): expected = (df[df.boolv == True] # noqa .reindex(columns=['A', 'boolv'])) for v in [True, 'true', 1]: - result = store.select('df', Term( - 'boolv == %s' % str(v)), columns=['A', 'boolv']) + result = store.select('df', 'boolv == %s' % str(v), + columns=['A', 'boolv']) tm.assert_frame_equal(expected, result) expected = (df[df.boolv == False] # noqa .reindex(columns=['A', 'boolv'])) for v in [False, 'false', 0]: - result = store.select('df', Term( - 'boolv == %s' % str(v)), columns=['A', 'boolv']) + result = store.select( + 'df', 'boolv == %s' % str(v), columns=['A', 'boolv']) tm.assert_frame_equal(expected, result) # integer index @@ -3115,7 +3172,7 @@ def test_select_dtypes(self): _maybe_remove(store, 'df_int') store.append('df_int', df) result = store.select( - 'df_int', [Term("index<10"), Term("columns=['A']")]) + 'df_int', "index<10 and columns=['A']") expected = df.reindex(index=list(df.index)[0:10], columns=['A']) tm.assert_frame_equal(expected, result) @@ -3125,7 +3182,7 @@ def test_select_dtypes(self): _maybe_remove(store, 'df_float') store.append('df_float', df) result = store.select( - 'df_float', [Term("index<10.0"), Term("columns=['A']")]) + 'df_float', "index<10.0 and columns=['A']") expected = df.reindex(index=list(df.index)[0:10], columns=['A']) tm.assert_frame_equal(expected, result) @@ -3196,14 +3253,14 @@ def test_select_with_many_inputs(self): store.append('df', df, data_columns=['ts', 'A', 'B', 'users']) # regular select - result = store.select('df', [Term("ts>=Timestamp('2012-02-01')")]) + result = store.select('df', "ts>=Timestamp('2012-02-01')") expected = df[df.ts >= Timestamp('2012-02-01')] tm.assert_frame_equal(expected, result) # small selector result = store.select( - 'df', [Term("ts>=Timestamp('2012-02-01') & " - "users=['a','b','c']")]) + 'df', + "ts>=Timestamp('2012-02-01') & users=['a','b','c']") expected = df[(df.ts >= Timestamp('2012-02-01')) & df.users.isin(['a', 'b', 'c'])] tm.assert_frame_equal(expected, result) @@ -3211,21 +3268,21 @@ def test_select_with_many_inputs(self): # big selector along the columns selector = ['a', 'b', 'c'] + ['a%03d' % i for i in range(60)] result = store.select( - 'df', [Term("ts>=Timestamp('2012-02-01')"), - Term('users=selector')]) + 'df', + "ts>=Timestamp('2012-02-01') and users=selector") expected = df[(df.ts >= Timestamp('2012-02-01')) & df.users.isin(selector)] tm.assert_frame_equal(expected, result) selector = range(100, 200) - result = store.select('df', [Term('B=selector')]) + result = store.select('df', 'B=selector') expected = df[df.B.isin(selector)] tm.assert_frame_equal(expected, result) self.assertEqual(len(result), 100) # big selector along the index selector = Index(df.ts[0:100].values) - result = store.select('df', [Term('ts=selector')]) + result = store.select('df', 'ts=selector') expected = df[df.ts.isin(selector.values)] tm.assert_frame_equal(expected, result) self.assertEqual(len(result), 100) @@ -3296,17 +3353,6 @@ def test_select_iterator(self): result = concat(results) tm.assert_frame_equal(expected, result) - # where selection - # expected = store.select_as_multiple( - # ['df1', 'df2'], where= Term('A>0'), selector='df1') - # results = [] - # for s in store.select_as_multiple( - # ['df1', 'df2'], where= Term('A>0'), selector='df1', - # chunksize=25): - # results.append(s) - # result = concat(results) - # tm.assert_frame_equal(expected, result) - def test_select_iterator_complete_8014(self): # GH 8014 @@ -3518,8 +3564,7 @@ def test_retain_index_attributes(self): getattr(getattr(result, idx), attr, None)) # try to append a table with a different frequency - with tm.assert_produces_warning( - expected_warning=AttributeConflictWarning): + with catch_warnings(record=True): df2 = DataFrame(dict( A=Series(lrange(3), index=date_range('2002-1-1', @@ -3544,9 +3589,7 @@ def test_retain_index_attributes(self): def test_retain_index_attributes2(self): with ensure_clean_path(self.path) as path: - expected_warning = Warning if PY35 else AttributeConflictWarning - with tm.assert_produces_warning(expected_warning=expected_warning, - check_stacklevel=False): + with catch_warnings(record=True): df = DataFrame(dict( A=Series(lrange(3), @@ -3566,8 +3609,7 @@ def test_retain_index_attributes2(self): self.assertEqual(read_hdf(path, 'data').index.name, 'foo') - with tm.assert_produces_warning(expected_warning=expected_warning, - check_stacklevel=False): + with catch_warnings(record=True): idx2 = date_range('2001-1-1', periods=3, freq='H') idx2.name = 'bar' @@ -3578,23 +3620,28 @@ def test_retain_index_attributes2(self): def test_panel_select(self): - wp = tm.makePanel() - with ensure_clean_store(self.path) as store: - store.put('wp', wp, format='table') - date = wp.major_axis[len(wp.major_axis) // 2] - crit1 = ('major_axis>=date') - crit2 = ("minor_axis=['A', 'D']") + with catch_warnings(record=True): - result = store.select('wp', [crit1, crit2]) - expected = wp.truncate(before=date).reindex(minor=['A', 'D']) - assert_panel_equal(result, expected) + wp = tm.makePanel() - result = store.select( - 'wp', ['major_axis>="20000124"', ("minor_axis=['A', 'B']")]) - expected = wp.truncate(before='20000124').reindex(minor=['A', 'B']) - assert_panel_equal(result, expected) + store.put('wp', wp, format='table') + date = wp.major_axis[len(wp.major_axis) // 2] + + crit1 = ('major_axis>=date') + crit2 = ("minor_axis=['A', 'D']") + + result = store.select('wp', [crit1, crit2]) + expected = wp.truncate(before=date).reindex(minor=['A', 'D']) + assert_panel_equal(result, expected) + + result = store.select( + 'wp', ['major_axis>="20000124"', + ("minor_axis=['A', 'B']")]) + expected = wp.truncate( + before='20000124').reindex(minor=['A', 'B']) + assert_panel_equal(result, expected) def test_frame_select(self): @@ -3622,7 +3669,7 @@ def test_frame_select(self): df = tm.makeTimeDataFrame() store.append('df_time', df) self.assertRaises( - ValueError, store.select, 'df_time', [Term("index>0")]) + ValueError, store.select, 'df_time', "index>0") # can't select if not written as table # store['frame'] = df @@ -3701,7 +3748,7 @@ def test_frame_select_complex2(self): hist.to_hdf(hh, 'df', mode='w', format='table') - expected = read_hdf(hh, 'df', where="l1=[2, 3, 4]") + expected = read_hdf(hh, 'df', where='l1=[2, 3, 4]') # sccope with list like l = selection.index.tolist() # noqa @@ -4005,6 +4052,7 @@ def test_append_to_multiple_dropna(self): df = concat([df1, df2], axis=1) with ensure_clean_store(self.path) as store: + # dropna=True should guarantee rows are synchronized store.append_to_multiple( {'df1': ['A', 'B'], 'df2': None}, df, selector='df1', @@ -4015,14 +4063,27 @@ def test_append_to_multiple_dropna(self): tm.assert_index_equal(store.select('df1').index, store.select('df2').index) + @pytest.mark.xfail(run=False, + reason="append_to_multiple_dropna_false " + "is not raising as failed") + def test_append_to_multiple_dropna_false(self): + df1 = tm.makeTimeDataFrame() + df2 = tm.makeTimeDataFrame().rename(columns=lambda x: "%s_2" % x) + df1.iloc[1, df1.columns.get_indexer(['A', 'B'])] = np.nan + df = concat([df1, df2], axis=1) + + with ensure_clean_store(self.path) as store: + # dropna=False shouldn't synchronize row indexes store.append_to_multiple( - {'df1': ['A', 'B'], 'df2': None}, df, selector='df1', + {'df1a': ['A', 'B'], 'df2a': None}, df, selector='df1a', dropna=False) - self.assertRaises( - ValueError, store.select_as_multiple, ['df1', 'df2']) - assert not store.select('df1').index.equals( - store.select('df2').index) + + with pytest.raises(ValueError): + store.select_as_multiple(['df1a', 'df2a']) + + assert not store.select('df1a').index.equals( + store.select('df2a').index) def test_select_as_multiple(self): @@ -4220,7 +4281,7 @@ def _check_roundtrip_table(self, obj, comparator, compression=False): with ensure_clean_store(self.path, 'w', **options) as store: store.put('obj', obj, format='table') retrieved = store['obj'] - # sorted_obj = _test_sort(obj) + comparator(retrieved, obj) def test_multiple_open_close(self): @@ -4351,16 +4412,16 @@ def test_legacy_table_read(self): with ensure_clean_store( tm.get_data_path('legacy_hdf/legacy_table.h5'), mode='r') as store: - store.select('df1') - store.select('df2') - store.select('wp1') - # force the frame - store.select('df2', typ='legacy_frame') + with catch_warnings(record=True): + store.select('df1') + store.select('df2') + store.select('wp1') + + # force the frame + store.select('df2', typ='legacy_frame') - # old version warning - with tm.assert_produces_warning( - expected_warning=IncompatibilityWarning): + # old version warning self.assertRaises( Exception, store.select, 'wp1', 'minor_axis=B') @@ -4466,7 +4527,8 @@ def test_legacy_table_write(self): 'legacy_hdf/legacy_table_%s.h5' % pandas.__version__), 'a') df = tm.makeDataFrame() - wp = tm.makePanel() + with catch_warnings(record=True): + wp = tm.makePanel() index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two', 'three']], @@ -4803,12 +4865,11 @@ def test_to_hdf_with_object_column_names(self): for index in types_should_fail: df = DataFrame(np.random.randn(10, 2), columns=index(2)) with ensure_clean_path(self.path) as path: - with self.assertRaises( + with catch_warnings(record=True): + with self.assertRaises( ValueError, msg=("cannot have non-object label " "DataIndexableCol")): - with catch_warnings(record=True): - df.to_hdf(path, 'df', - format='table', + df.to_hdf(path, 'df', format='table', data_columns=True) for index in types_should_run: @@ -4979,7 +5040,7 @@ def test_query_compare_column_type(self): with ensure_clean_store(self.path) as store: store.append('test', df, format='table', data_columns=True) - ts = pd.Timestamp('2014-01-01') # noqa + ts = pd.Timestamp('2014-01-01') # noqa result = store.select('test', where='real_date > ts') expected = df.loc[[1], :] tm.assert_frame_equal(expected, result) @@ -5092,28 +5153,30 @@ def test_complex_mixed_table(self): assert_frame_equal(df, reread) def test_complex_across_dimensions_fixed(self): - complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j]) - s = Series(complex128, index=list('abcd')) - df = DataFrame({'A': s, 'B': s}) - p = Panel({'One': df, 'Two': df}) + with catch_warnings(record=True): + complex128 = np.array( + [1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j]) + s = Series(complex128, index=list('abcd')) + df = DataFrame({'A': s, 'B': s}) + p = Panel({'One': df, 'Two': df}) - objs = [s, df, p] - comps = [tm.assert_series_equal, tm.assert_frame_equal, - tm.assert_panel_equal] - for obj, comp in zip(objs, comps): - with ensure_clean_path(self.path) as path: - obj.to_hdf(path, 'obj', format='fixed') - reread = read_hdf(path, 'obj') - comp(obj, reread) + objs = [s, df, p] + comps = [tm.assert_series_equal, tm.assert_frame_equal, + tm.assert_panel_equal] + for obj, comp in zip(objs, comps): + with ensure_clean_path(self.path) as path: + obj.to_hdf(path, 'obj', format='fixed') + reread = read_hdf(path, 'obj') + comp(obj, reread) def test_complex_across_dimensions(self): complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j]) s = Series(complex128, index=list('abcd')) df = DataFrame({'A': s, 'B': s}) - p = Panel({'One': df, 'Two': df}) with catch_warnings(record=True): - p4d = pd.Panel4D({'i': p, 'ii': p}) + p = Panel({'One': df, 'Two': df}) + p4d = Panel4D({'i': p, 'ii': p}) objs = [df, p, p4d] comps = [tm.assert_frame_equal, tm.assert_panel_equal, @@ -5430,12 +5493,3 @@ def test_dst_transitions(self): store.append('df', df) result = store.select('df') assert_frame_equal(result, df) - - -def _test_sort(obj): - if isinstance(obj, DataFrame): - return obj.reindex(sorted(obj.index)) - elif isinstance(obj, Panel): - return obj.reindex(major=sorted(obj.major_axis)) - else: - raise ValueError('type not supported here') diff --git a/pandas/tests/test_expressions.py b/pandas/tests/test_expressions.py index f669ebe371f9d..dc4787176a0b5 100644 --- a/pandas/tests/test_expressions.py +++ b/pandas/tests/test_expressions.py @@ -2,6 +2,7 @@ from __future__ import print_function # pylint: disable-msg=W0612,E1101 +from warnings import catch_warnings import re import operator import pytest @@ -32,19 +33,26 @@ 'D': _frame2['D'].astype('int32')}) _integer = DataFrame( np.random.randint(1, 100, - size=(10001, 4)), columns=list('ABCD'), dtype='int64') + size=(10001, 4)), + columns=list('ABCD'), dtype='int64') _integer2 = DataFrame(np.random.randint(1, 100, size=(101, 4)), columns=list('ABCD'), dtype='int64') -_frame_panel = Panel(dict(ItemA=_frame.copy(), ItemB=( - _frame.copy() + 3), ItemC=_frame.copy(), ItemD=_frame.copy())) -_frame2_panel = Panel(dict(ItemA=_frame2.copy(), ItemB=(_frame2.copy() + 3), - ItemC=_frame2.copy(), ItemD=_frame2.copy())) -_integer_panel = Panel(dict(ItemA=_integer, ItemB=(_integer + 34).astype( - 'int64'))) -_integer2_panel = Panel(dict(ItemA=_integer2, ItemB=(_integer2 + 34).astype( - 'int64'))) -_mixed_panel = Panel(dict(ItemA=_mixed, ItemB=(_mixed + 3))) -_mixed2_panel = Panel(dict(ItemA=_mixed2, ItemB=(_mixed2 + 3))) + +with catch_warnings(record=True): + _frame_panel = Panel(dict(ItemA=_frame.copy(), + ItemB=(_frame.copy() + 3), + ItemC=_frame.copy(), + ItemD=_frame.copy())) + _frame2_panel = Panel(dict(ItemA=_frame2.copy(), + ItemB=(_frame2.copy() + 3), + ItemC=_frame2.copy(), + ItemD=_frame2.copy())) + _integer_panel = Panel(dict(ItemA=_integer, + ItemB=(_integer + 34).astype('int64'))) + _integer2_panel = Panel(dict(ItemA=_integer2, + ItemB=(_integer2 + 34).astype('int64'))) + _mixed_panel = Panel(dict(ItemA=_mixed, ItemB=(_mixed + 3))) + _mixed2_panel = Panel(dict(ItemA=_mixed2, ItemB=(_mixed2 + 3))) @pytest.mark.skipif(not expr._USE_NUMEXPR, reason='not using numexpr') @@ -204,7 +212,7 @@ def test_float_panel(self): @slow def test_panel4d(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): self.run_panel(tm.makePanel4D(), np.random.randn() + 0.5, assert_func=assert_panel4d_equal, binary_comp=3) diff --git a/pandas/tests/test_generic.py b/pandas/tests/test_generic.py index 0e8e8dc43ff03..118039d1f354c 100644 --- a/pandas/tests/test_generic.py +++ b/pandas/tests/test_generic.py @@ -3,6 +3,8 @@ from operator import methodcaller from copy import copy, deepcopy +from warnings import catch_warnings + import pytest import numpy as np from numpy import nan @@ -1570,17 +1572,18 @@ def test_to_xarray(self): tm._skip_if_no_xarray() from xarray import DataArray - p = tm.makePanel() + with catch_warnings(record=True): + p = tm.makePanel() - result = p.to_xarray() - self.assertIsInstance(result, DataArray) - self.assertEqual(len(result.coords), 3) - assert_almost_equal(list(result.coords.keys()), - ['items', 'major_axis', 'minor_axis']) - self.assertEqual(len(result.dims), 3) + result = p.to_xarray() + self.assertIsInstance(result, DataArray) + self.assertEqual(len(result.coords), 3) + assert_almost_equal(list(result.coords.keys()), + ['items', 'major_axis', 'minor_axis']) + self.assertEqual(len(result.dims), 3) - # idempotency - assert_panel_equal(result.to_pandas(), p) + # idempotency + assert_panel_equal(result.to_pandas(), p) class TestPanel4D(tm.TestCase, Generic): @@ -1590,15 +1593,12 @@ class TestPanel4D(tm.TestCase, Generic): def test_sample(self): pytest.skip("sample on Panel4D") - def test_copy_and_deepcopy(self): - pytest.skip("copy_and_deepcopy on Panel4D") - def test_to_xarray(self): tm._skip_if_no_xarray() from xarray import DataArray - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): p = tm.makePanel4D() result = p.to_xarray() @@ -1624,12 +1624,20 @@ def test_to_xarray(self): 'test_stat_unexpected_keyword', 'test_api_compat', 'test_stat_non_defaults_args', 'test_clip', 'test_truncate_out_of_bounds', 'test_numpy_clip', - 'test_metadata_propagation']: + 'test_metadata_propagation', 'test_copy_and_deepcopy', + 'test_sample']: + + def f(): + def tester(self): + with catch_warnings(record=True): + return getattr(super(TestPanel, self), t)() + return tester + + setattr(TestPanel, t, f()) def f(): def tester(self): - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): + with catch_warnings(record=True): return getattr(super(TestPanel4D, self), t)() return tester @@ -1660,10 +1668,11 @@ def test_sample(sel): with tm.assertRaises(ValueError): s.sample(n=3, weights='weight_column') - panel = pd.Panel(items=[0, 1, 2], major_axis=[2, 3, 4], - minor_axis=[3, 4, 5]) - with tm.assertRaises(ValueError): - panel.sample(n=1, weights='weight_column') + with catch_warnings(record=True): + panel = Panel(items=[0, 1, 2], major_axis=[2, 3, 4], + minor_axis=[3, 4, 5]) + with tm.assertRaises(ValueError): + panel.sample(n=1, weights='weight_column') with tm.assertRaises(ValueError): df.sample(n=1, weights='weight_column', axis=1) @@ -1726,14 +1735,15 @@ def test_sample(sel): assert_frame_equal(sample1, df[['colString']]) # Test default axes - p = pd.Panel(items=['a', 'b', 'c'], major_axis=[2, 4, 6], - minor_axis=[1, 3, 5]) - assert_panel_equal( - p.sample(n=3, random_state=42), p.sample(n=3, axis=1, - random_state=42)) - assert_frame_equal( - df.sample(n=3, random_state=42), df.sample(n=3, axis=0, - random_state=42)) + with catch_warnings(record=True): + p = Panel(items=['a', 'b', 'c'], major_axis=[2, 4, 6], + minor_axis=[1, 3, 5]) + assert_panel_equal( + p.sample(n=3, random_state=42), p.sample(n=3, axis=1, + random_state=42)) + assert_frame_equal( + df.sample(n=3, random_state=42), df.sample(n=3, axis=0, + random_state=42)) # Test that function aligns weights with frame df = DataFrame( @@ -1763,9 +1773,10 @@ def test_squeeze(self): tm.assert_series_equal(s.squeeze(), s) for df in [tm.makeTimeDataFrame()]: tm.assert_frame_equal(df.squeeze(), df) - for p in [tm.makePanel()]: - tm.assert_panel_equal(p.squeeze(), p) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): + for p in [tm.makePanel()]: + tm.assert_panel_equal(p.squeeze(), p) + with catch_warnings(record=True): for p4d in [tm.makePanel4D()]: tm.assert_panel4d_equal(p4d.squeeze(), p4d) @@ -1773,24 +1784,26 @@ def test_squeeze(self): df = tm.makeTimeDataFrame().reindex(columns=['A']) tm.assert_series_equal(df.squeeze(), df['A']) - p = tm.makePanel().reindex(items=['ItemA']) - tm.assert_frame_equal(p.squeeze(), p['ItemA']) + with catch_warnings(record=True): + p = tm.makePanel().reindex(items=['ItemA']) + tm.assert_frame_equal(p.squeeze(), p['ItemA']) - p = tm.makePanel().reindex(items=['ItemA'], minor_axis=['A']) - tm.assert_series_equal(p.squeeze(), p.loc['ItemA', :, 'A']) + p = tm.makePanel().reindex(items=['ItemA'], minor_axis=['A']) + tm.assert_series_equal(p.squeeze(), p.loc['ItemA', :, 'A']) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): p4d = tm.makePanel4D().reindex(labels=['label1']) tm.assert_panel_equal(p4d.squeeze(), p4d['label1']) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): p4d = tm.makePanel4D().reindex(labels=['label1'], items=['ItemA']) tm.assert_frame_equal(p4d.squeeze(), p4d.loc['label1', 'ItemA']) # don't fail with 0 length dimensions GH11229 & GH8999 - empty_series = pd.Series([], name='five') - empty_frame = pd.DataFrame([empty_series]) - empty_panel = pd.Panel({'six': empty_frame}) + empty_series = Series([], name='five') + empty_frame = DataFrame([empty_series]) + with catch_warnings(record=True): + empty_panel = Panel({'six': empty_frame}) [tm.assert_series_equal(empty_series, higher_dim.squeeze()) for higher_dim in [empty_series, empty_frame, empty_panel]] @@ -1825,13 +1838,15 @@ def test_transpose(self): tm.assert_series_equal(s.transpose(), s) for df in [tm.makeTimeDataFrame()]: tm.assert_frame_equal(df.transpose().transpose(), df) - for p in [tm.makePanel()]: - tm.assert_panel_equal(p.transpose(2, 0, 1) - .transpose(1, 2, 0), p) - tm.assertRaisesRegexp(TypeError, msg, p.transpose, - 2, 0, 1, axes=(2, 0, 1)) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): + for p in [tm.makePanel()]: + tm.assert_panel_equal(p.transpose(2, 0, 1) + .transpose(1, 2, 0), p) + tm.assertRaisesRegexp(TypeError, msg, p.transpose, + 2, 0, 1, axes=(2, 0, 1)) + + with catch_warnings(record=True): for p4d in [tm.makePanel4D()]: tm.assert_panel4d_equal(p4d.transpose(2, 0, 3, 1) .transpose(1, 3, 0, 2), p4d) @@ -1853,12 +1868,13 @@ def test_numpy_transpose(self): tm.assertRaisesRegexp(ValueError, msg, np.transpose, df, axes=1) - p = tm.makePanel() - tm.assert_panel_equal(np.transpose( - np.transpose(p, axes=(2, 0, 1)), - axes=(1, 2, 0)), p) + with catch_warnings(record=True): + p = tm.makePanel() + tm.assert_panel_equal(np.transpose( + np.transpose(p, axes=(2, 0, 1)), + axes=(1, 2, 0)), p) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): p4d = tm.makePanel4D() tm.assert_panel4d_equal(np.transpose( np.transpose(p4d, axes=(2, 0, 3, 1)), @@ -1880,15 +1896,16 @@ def test_take(self): tm.assert_frame_equal(out, expected) indices = [-3, 2, 0, 1] - for p in [tm.makePanel()]: - out = p.take(indices) - expected = Panel(data=p.values.take(indices, axis=0), - items=p.items.take(indices), - major_axis=p.major_axis, - minor_axis=p.minor_axis) - tm.assert_panel_equal(out, expected) - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): + for p in [tm.makePanel()]: + out = p.take(indices) + expected = Panel(data=p.values.take(indices, axis=0), + items=p.items.take(indices), + major_axis=p.major_axis, + minor_axis=p.minor_axis) + tm.assert_panel_equal(out, expected) + + with catch_warnings(record=True): for p4d in [tm.makePanel4D()]: out = p4d.take(indices) expected = Panel4D(data=p4d.values.take(indices, axis=0), @@ -1902,9 +1919,9 @@ def test_take_invalid_kwargs(self): indices = [-3, 2, 0, 1] s = tm.makeFloatSeries() df = tm.makeTimeDataFrame() - p = tm.makePanel() - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): + p = tm.makePanel() p4d = tm.makePanel4D() for obj in (s, df, p, p4d): @@ -2011,8 +2028,9 @@ def test_equals(self): self.assertTrue(e.equals(f)) def test_describe_raises(self): - with tm.assertRaises(NotImplementedError): - tm.makePanel().describe() + with catch_warnings(record=True): + with tm.assertRaises(NotImplementedError): + tm.makePanel().describe() def test_pipe(self): df = DataFrame({'A': [1, 2, 3]}) @@ -2043,15 +2061,16 @@ def test_pipe_tuple_error(self): df.A.pipe((f, 'y'), x=1, y=0) def test_pipe_panel(self): - wp = Panel({'r1': DataFrame({"A": [1, 2, 3]})}) - f = lambda x, y: x + y - result = wp.pipe(f, 2) - expected = wp + 2 - assert_panel_equal(result, expected) - - result = wp.pipe((f, 'y'), x=1) - expected = wp + 1 - assert_panel_equal(result, expected) - - with tm.assertRaises(ValueError): - result = wp.pipe((f, 'y'), x=1, y=1) + with catch_warnings(record=True): + wp = Panel({'r1': DataFrame({"A": [1, 2, 3]})}) + f = lambda x, y: x + y + result = wp.pipe(f, 2) + expected = wp + 2 + assert_panel_equal(result, expected) + + result = wp.pipe((f, 'y'), x=1) + expected = wp + 1 + assert_panel_equal(result, expected) + + with tm.assertRaises(ValueError): + result = wp.pipe((f, 'y'), x=1, y=1) diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index ab0322abbcf06..bc7bb8a4dfec1 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -3,7 +3,6 @@ from warnings import catch_warnings from datetime import datetime - import operator import pytest @@ -31,25 +30,37 @@ import pandas.util.testing as tm +def make_test_panel(): + with catch_warnings(record=True): + _panel = tm.makePanel() + tm.add_nans(_panel) + _panel = _panel.copy() + return _panel + + class PanelTests(object): panel = None def test_pickle(self): - unpickled = self.round_trip_pickle(self.panel) - assert_frame_equal(unpickled['ItemA'], self.panel['ItemA']) + with catch_warnings(record=True): + unpickled = self.round_trip_pickle(self.panel) + assert_frame_equal(unpickled['ItemA'], self.panel['ItemA']) def test_rank(self): - self.assertRaises(NotImplementedError, lambda: self.panel.rank()) + with catch_warnings(record=True): + self.assertRaises(NotImplementedError, lambda: self.panel.rank()) def test_cumsum(self): - cumsum = self.panel.cumsum() - assert_frame_equal(cumsum['ItemA'], self.panel['ItemA'].cumsum()) + with catch_warnings(record=True): + cumsum = self.panel.cumsum() + assert_frame_equal(cumsum['ItemA'], self.panel['ItemA'].cumsum()) def not_hashable(self): - c_empty = Panel() - c = Panel(Panel([[[1]]])) - self.assertRaises(TypeError, hash, c_empty) - self.assertRaises(TypeError, hash, c) + with catch_warnings(record=True): + c_empty = Panel() + c = Panel(Panel([[[1]]])) + self.assertRaises(TypeError, hash, c_empty) + self.assertRaises(TypeError, hash, c) class SafeForLongAndSparse(object): @@ -58,11 +69,12 @@ def test_repr(self): repr(self.panel) def test_copy_names(self): - for attr in ('major_axis', 'minor_axis'): - getattr(self.panel, attr).name = None - cp = self.panel.copy() - getattr(cp, attr).name = 'foo' - self.assertIsNone(getattr(self.panel, attr).name) + with catch_warnings(record=True): + for attr in ('major_axis', 'minor_axis'): + getattr(self.panel, attr).name = None + cp = self.panel.copy() + getattr(cp, attr).name = 'foo' + self.assertIsNone(getattr(self.panel, attr).name) def test_iter(self): tm.equalContents(list(self.panel), self.panel.items) @@ -107,10 +119,6 @@ def this_skew(x): self._check_stat_op('skew', this_skew) - # def test_mad(self): - # f = lambda x: np.abs(x - x.mean()).mean() - # self._check_stat_op('mad', f) - def test_var(self): def alt(x): if len(x) < 2: @@ -239,47 +247,48 @@ def test_get_plane_axes(self): index, columns = self.panel._get_plane_axes(0) def test_truncate(self): - dates = self.panel.major_axis - start, end = dates[1], dates[5] - - trunced = self.panel.truncate(start, end, axis='major') - expected = self.panel['ItemA'].truncate(start, end) + with catch_warnings(record=True): + dates = self.panel.major_axis + start, end = dates[1], dates[5] - assert_frame_equal(trunced['ItemA'], expected) + trunced = self.panel.truncate(start, end, axis='major') + expected = self.panel['ItemA'].truncate(start, end) - trunced = self.panel.truncate(before=start, axis='major') - expected = self.panel['ItemA'].truncate(before=start) + assert_frame_equal(trunced['ItemA'], expected) - assert_frame_equal(trunced['ItemA'], expected) + trunced = self.panel.truncate(before=start, axis='major') + expected = self.panel['ItemA'].truncate(before=start) - trunced = self.panel.truncate(after=end, axis='major') - expected = self.panel['ItemA'].truncate(after=end) + assert_frame_equal(trunced['ItemA'], expected) - assert_frame_equal(trunced['ItemA'], expected) + trunced = self.panel.truncate(after=end, axis='major') + expected = self.panel['ItemA'].truncate(after=end) - # XXX test other axes + assert_frame_equal(trunced['ItemA'], expected) def test_arith(self): - self._test_op(self.panel, operator.add) - self._test_op(self.panel, operator.sub) - self._test_op(self.panel, operator.mul) - self._test_op(self.panel, operator.truediv) - self._test_op(self.panel, operator.floordiv) - self._test_op(self.panel, operator.pow) - - self._test_op(self.panel, lambda x, y: y + x) - self._test_op(self.panel, lambda x, y: y - x) - self._test_op(self.panel, lambda x, y: y * x) - self._test_op(self.panel, lambda x, y: y / x) - self._test_op(self.panel, lambda x, y: y ** x) - - self._test_op(self.panel, lambda x, y: x + y) # panel + 1 - self._test_op(self.panel, lambda x, y: x - y) # panel - 1 - self._test_op(self.panel, lambda x, y: x * y) # panel * 1 - self._test_op(self.panel, lambda x, y: x / y) # panel / 1 - self._test_op(self.panel, lambda x, y: x ** y) # panel ** 1 - - self.assertRaises(Exception, self.panel.__add__, self.panel['ItemA']) + with catch_warnings(record=True): + self._test_op(self.panel, operator.add) + self._test_op(self.panel, operator.sub) + self._test_op(self.panel, operator.mul) + self._test_op(self.panel, operator.truediv) + self._test_op(self.panel, operator.floordiv) + self._test_op(self.panel, operator.pow) + + self._test_op(self.panel, lambda x, y: y + x) + self._test_op(self.panel, lambda x, y: y - x) + self._test_op(self.panel, lambda x, y: y * x) + self._test_op(self.panel, lambda x, y: y / x) + self._test_op(self.panel, lambda x, y: y ** x) + + self._test_op(self.panel, lambda x, y: x + y) # panel + 1 + self._test_op(self.panel, lambda x, y: x - y) # panel - 1 + self._test_op(self.panel, lambda x, y: x * y) # panel * 1 + self._test_op(self.panel, lambda x, y: x / y) # panel / 1 + self._test_op(self.panel, lambda x, y: x ** y) # panel ** 1 + + self.assertRaises(Exception, self.panel.__add__, + self.panel['ItemA']) @staticmethod def _test_op(panel, op): @@ -299,92 +308,100 @@ def test_iteritems(self): len(self.panel.items)) def test_combineFrame(self): - def check_op(op, name): - # items - df = self.panel['ItemA'] + with catch_warnings(record=True): + def check_op(op, name): + # items + df = self.panel['ItemA'] - func = getattr(self.panel, name) + func = getattr(self.panel, name) - result = func(df, axis='items') + result = func(df, axis='items') - assert_frame_equal(result['ItemB'], op(self.panel['ItemB'], df)) + assert_frame_equal( + result['ItemB'], op(self.panel['ItemB'], df)) - # major - xs = self.panel.major_xs(self.panel.major_axis[0]) - result = func(xs, axis='major') + # major + xs = self.panel.major_xs(self.panel.major_axis[0]) + result = func(xs, axis='major') - idx = self.panel.major_axis[1] + idx = self.panel.major_axis[1] - assert_frame_equal(result.major_xs(idx), - op(self.panel.major_xs(idx), xs)) + assert_frame_equal(result.major_xs(idx), + op(self.panel.major_xs(idx), xs)) - # minor - xs = self.panel.minor_xs(self.panel.minor_axis[0]) - result = func(xs, axis='minor') + # minor + xs = self.panel.minor_xs(self.panel.minor_axis[0]) + result = func(xs, axis='minor') - idx = self.panel.minor_axis[1] + idx = self.panel.minor_axis[1] - assert_frame_equal(result.minor_xs(idx), - op(self.panel.minor_xs(idx), xs)) + assert_frame_equal(result.minor_xs(idx), + op(self.panel.minor_xs(idx), xs)) - ops = ['add', 'sub', 'mul', 'truediv', 'floordiv', 'pow', 'mod'] - if not compat.PY3: - ops.append('div') + ops = ['add', 'sub', 'mul', 'truediv', 'floordiv', 'pow', 'mod'] + if not compat.PY3: + ops.append('div') - for op in ops: - try: - check_op(getattr(operator, op), op) - except: - pprint_thing("Failing operation: %r" % op) - raise - if compat.PY3: - try: - check_op(operator.truediv, 'div') - except: - pprint_thing("Failing operation: %r" % 'div') - raise + for op in ops: + try: + check_op(getattr(operator, op), op) + except: + pprint_thing("Failing operation: %r" % op) + raise + if compat.PY3: + try: + check_op(operator.truediv, 'div') + except: + pprint_thing("Failing operation: %r" % 'div') + raise def test_combinePanel(self): - result = self.panel.add(self.panel) - self.assert_panel_equal(result, self.panel * 2) + with catch_warnings(record=True): + result = self.panel.add(self.panel) + assert_panel_equal(result, self.panel * 2) def test_neg(self): - self.assert_panel_equal(-self.panel, self.panel * -1) + with catch_warnings(record=True): + assert_panel_equal(-self.panel, self.panel * -1) # issue 7692 def test_raise_when_not_implemented(self): - p = Panel(np.arange(3 * 4 * 5).reshape(3, 4, 5), - items=['ItemA', 'ItemB', 'ItemC'], - major_axis=pd.date_range('20130101', periods=4), - minor_axis=list('ABCDE')) - d = p.sum(axis=1).iloc[0] - ops = ['add', 'sub', 'mul', 'truediv', 'floordiv', 'div', 'mod', 'pow'] - for op in ops: - with self.assertRaises(NotImplementedError): - getattr(p, op)(d, axis=0) + with catch_warnings(record=True): + p = Panel(np.arange(3 * 4 * 5).reshape(3, 4, 5), + items=['ItemA', 'ItemB', 'ItemC'], + major_axis=pd.date_range('20130101', periods=4), + minor_axis=list('ABCDE')) + d = p.sum(axis=1).iloc[0] + ops = ['add', 'sub', 'mul', 'truediv', + 'floordiv', 'div', 'mod', 'pow'] + for op in ops: + with self.assertRaises(NotImplementedError): + getattr(p, op)(d, axis=0) def test_select(self): - p = self.panel + with catch_warnings(record=True): + p = self.panel - # select items - result = p.select(lambda x: x in ('ItemA', 'ItemC'), axis='items') - expected = p.reindex(items=['ItemA', 'ItemC']) - self.assert_panel_equal(result, expected) + # select items + result = p.select(lambda x: x in ('ItemA', 'ItemC'), axis='items') + expected = p.reindex(items=['ItemA', 'ItemC']) + assert_panel_equal(result, expected) - # select major_axis - result = p.select(lambda x: x >= datetime(2000, 1, 15), axis='major') - new_major = p.major_axis[p.major_axis >= datetime(2000, 1, 15)] - expected = p.reindex(major=new_major) - self.assert_panel_equal(result, expected) + # select major_axis + result = p.select(lambda x: x >= datetime( + 2000, 1, 15), axis='major') + new_major = p.major_axis[p.major_axis >= datetime(2000, 1, 15)] + expected = p.reindex(major=new_major) + assert_panel_equal(result, expected) - # select minor_axis - result = p.select(lambda x: x in ('D', 'A'), axis=2) - expected = p.reindex(minor=['A', 'D']) - self.assert_panel_equal(result, expected) + # select minor_axis + result = p.select(lambda x: x in ('D', 'A'), axis=2) + expected = p.reindex(minor=['A', 'D']) + assert_panel_equal(result, expected) - # corner case, empty thing - result = p.select(lambda x: x in ('foo', ), axis='items') - self.assert_panel_equal(result, p.reindex(items=[])) + # corner case, empty thing + result = p.select(lambda x: x in ('foo', ), axis='items') + assert_panel_equal(result, p.reindex(items=[])) def test_get_value(self): for item in self.panel.items: @@ -396,27 +413,28 @@ def test_get_value(self): def test_abs(self): - result = self.panel.abs() - result2 = abs(self.panel) - expected = np.abs(self.panel) - self.assert_panel_equal(result, expected) - self.assert_panel_equal(result2, expected) - - df = self.panel['ItemA'] - result = df.abs() - result2 = abs(df) - expected = np.abs(df) - assert_frame_equal(result, expected) - assert_frame_equal(result2, expected) + with catch_warnings(record=True): + result = self.panel.abs() + result2 = abs(self.panel) + expected = np.abs(self.panel) + assert_panel_equal(result, expected) + assert_panel_equal(result2, expected) - s = df['A'] - result = s.abs() - result2 = abs(s) - expected = np.abs(s) - assert_series_equal(result, expected) - assert_series_equal(result2, expected) - self.assertEqual(result.name, 'A') - self.assertEqual(result2.name, 'A') + df = self.panel['ItemA'] + result = df.abs() + result2 = abs(df) + expected = np.abs(df) + assert_frame_equal(result, expected) + assert_frame_equal(result2, expected) + + s = df['A'] + result = s.abs() + result2 = abs(s) + expected = np.abs(s) + assert_series_equal(result, expected) + assert_series_equal(result2, expected) + self.assertEqual(result.name, 'A') + self.assertEqual(result2.name, 'A') class CheckIndexing(object): @@ -425,188 +443,200 @@ def test_getitem(self): self.assertRaises(Exception, self.panel.__getitem__, 'ItemQ') def test_delitem_and_pop(self): - expected = self.panel['ItemA'] - result = self.panel.pop('ItemA') - assert_frame_equal(expected, result) - self.assertNotIn('ItemA', self.panel.items) + with catch_warnings(record=True): + expected = self.panel['ItemA'] + result = self.panel.pop('ItemA') + assert_frame_equal(expected, result) + self.assertNotIn('ItemA', self.panel.items) - del self.panel['ItemB'] - self.assertNotIn('ItemB', self.panel.items) - self.assertRaises(Exception, self.panel.__delitem__, 'ItemB') + del self.panel['ItemB'] + self.assertNotIn('ItemB', self.panel.items) + self.assertRaises(Exception, self.panel.__delitem__, 'ItemB') - values = np.empty((3, 3, 3)) - values[0] = 0 - values[1] = 1 - values[2] = 2 + values = np.empty((3, 3, 3)) + values[0] = 0 + values[1] = 1 + values[2] = 2 - panel = Panel(values, lrange(3), lrange(3), lrange(3)) + panel = Panel(values, lrange(3), lrange(3), lrange(3)) - # did we delete the right row? + # did we delete the right row? - panelc = panel.copy() - del panelc[0] - assert_frame_equal(panelc[1], panel[1]) - assert_frame_equal(panelc[2], panel[2]) + panelc = panel.copy() + del panelc[0] + assert_frame_equal(panelc[1], panel[1]) + assert_frame_equal(panelc[2], panel[2]) - panelc = panel.copy() - del panelc[1] - assert_frame_equal(panelc[0], panel[0]) - assert_frame_equal(panelc[2], panel[2]) + panelc = panel.copy() + del panelc[1] + assert_frame_equal(panelc[0], panel[0]) + assert_frame_equal(panelc[2], panel[2]) - panelc = panel.copy() - del panelc[2] - assert_frame_equal(panelc[1], panel[1]) - assert_frame_equal(panelc[0], panel[0]) + panelc = panel.copy() + del panelc[2] + assert_frame_equal(panelc[1], panel[1]) + assert_frame_equal(panelc[0], panel[0]) def test_setitem(self): - # LongPanel with one item - lp = self.panel.filter(['ItemA', 'ItemB']).to_frame() - with tm.assertRaises(ValueError): - self.panel['ItemE'] = lp + with catch_warnings(record=True): - # DataFrame - df = self.panel['ItemA'][2:].filter(items=['A', 'B']) - self.panel['ItemF'] = df - self.panel['ItemE'] = df + # LongPanel with one item + lp = self.panel.filter(['ItemA', 'ItemB']).to_frame() + with tm.assertRaises(ValueError): + self.panel['ItemE'] = lp - df2 = self.panel['ItemF'] + # DataFrame + df = self.panel['ItemA'][2:].filter(items=['A', 'B']) + self.panel['ItemF'] = df + self.panel['ItemE'] = df - assert_frame_equal(df, df2.reindex(index=df.index, columns=df.columns)) + df2 = self.panel['ItemF'] - # scalar - self.panel['ItemG'] = 1 - self.panel['ItemE'] = True - self.assertEqual(self.panel['ItemG'].values.dtype, np.int64) - self.assertEqual(self.panel['ItemE'].values.dtype, np.bool_) + assert_frame_equal(df, df2.reindex( + index=df.index, columns=df.columns)) - # object dtype - self.panel['ItemQ'] = 'foo' - self.assertEqual(self.panel['ItemQ'].values.dtype, np.object_) + # scalar + self.panel['ItemG'] = 1 + self.panel['ItemE'] = True + self.assertEqual(self.panel['ItemG'].values.dtype, np.int64) + self.assertEqual(self.panel['ItemE'].values.dtype, np.bool_) - # boolean dtype - self.panel['ItemP'] = self.panel['ItemA'] > 0 - self.assertEqual(self.panel['ItemP'].values.dtype, np.bool_) + # object dtype + self.panel['ItemQ'] = 'foo' + self.assertEqual(self.panel['ItemQ'].values.dtype, np.object_) - self.assertRaises(TypeError, self.panel.__setitem__, 'foo', - self.panel.loc[['ItemP']]) + # boolean dtype + self.panel['ItemP'] = self.panel['ItemA'] > 0 + self.assertEqual(self.panel['ItemP'].values.dtype, np.bool_) - # bad shape - p = Panel(np.random.randn(4, 3, 2)) - with tm.assertRaisesRegexp(ValueError, - r"shape of value must be \(3, 2\), " - r"shape of given object was \(4, 2\)"): - p[0] = np.random.randn(4, 2) + self.assertRaises(TypeError, self.panel.__setitem__, 'foo', + self.panel.loc[['ItemP']]) + + # bad shape + p = Panel(np.random.randn(4, 3, 2)) + with tm.assertRaisesRegexp(ValueError, + r"shape of value must be \(3, 2\), " + r"shape of given object was \(4, 2\)"): + p[0] = np.random.randn(4, 2) def test_setitem_ndarray(self): - timeidx = date_range(start=datetime(2009, 1, 1), - end=datetime(2009, 12, 31), - freq=MonthEnd()) - lons_coarse = np.linspace(-177.5, 177.5, 72) - lats_coarse = np.linspace(-87.5, 87.5, 36) - P = Panel(items=timeidx, major_axis=lons_coarse, - minor_axis=lats_coarse) - data = np.random.randn(72 * 36).reshape((72, 36)) - key = datetime(2009, 2, 28) - P[key] = data - - assert_almost_equal(P[key].values, data) + with catch_warnings(record=True): + timeidx = date_range(start=datetime(2009, 1, 1), + end=datetime(2009, 12, 31), + freq=MonthEnd()) + lons_coarse = np.linspace(-177.5, 177.5, 72) + lats_coarse = np.linspace(-87.5, 87.5, 36) + P = Panel(items=timeidx, major_axis=lons_coarse, + minor_axis=lats_coarse) + data = np.random.randn(72 * 36).reshape((72, 36)) + key = datetime(2009, 2, 28) + P[key] = data + + assert_almost_equal(P[key].values, data) def test_set_minor_major(self): - # GH 11014 - df1 = DataFrame(['a', 'a', 'a', np.nan, 'a', np.nan]) - df2 = DataFrame([1.0, np.nan, 1.0, np.nan, 1.0, 1.0]) - panel = Panel({'Item1': df1, 'Item2': df2}) + with catch_warnings(record=True): + # GH 11014 + df1 = DataFrame(['a', 'a', 'a', np.nan, 'a', np.nan]) + df2 = DataFrame([1.0, np.nan, 1.0, np.nan, 1.0, 1.0]) + panel = Panel({'Item1': df1, 'Item2': df2}) - newminor = notnull(panel.iloc[:, :, 0]) - panel.loc[:, :, 'NewMinor'] = newminor - assert_frame_equal(panel.loc[:, :, 'NewMinor'], - newminor.astype(object)) + newminor = notnull(panel.iloc[:, :, 0]) + panel.loc[:, :, 'NewMinor'] = newminor + assert_frame_equal(panel.loc[:, :, 'NewMinor'], + newminor.astype(object)) - newmajor = notnull(panel.iloc[:, 0, :]) - panel.loc[:, 'NewMajor', :] = newmajor - assert_frame_equal(panel.loc[:, 'NewMajor', :], - newmajor.astype(object)) + newmajor = notnull(panel.iloc[:, 0, :]) + panel.loc[:, 'NewMajor', :] = newmajor + assert_frame_equal(panel.loc[:, 'NewMajor', :], + newmajor.astype(object)) def test_major_xs(self): - ref = self.panel['ItemA'] + with catch_warnings(record=True): + ref = self.panel['ItemA'] - idx = self.panel.major_axis[5] - xs = self.panel.major_xs(idx) + idx = self.panel.major_axis[5] + xs = self.panel.major_xs(idx) - result = xs['ItemA'] - assert_series_equal(result, ref.xs(idx), check_names=False) - self.assertEqual(result.name, 'ItemA') + result = xs['ItemA'] + assert_series_equal(result, ref.xs(idx), check_names=False) + self.assertEqual(result.name, 'ItemA') - # not contained - idx = self.panel.major_axis[0] - BDay() - self.assertRaises(Exception, self.panel.major_xs, idx) + # not contained + idx = self.panel.major_axis[0] - BDay() + self.assertRaises(Exception, self.panel.major_xs, idx) def test_major_xs_mixed(self): - self.panel['ItemD'] = 'foo' - xs = self.panel.major_xs(self.panel.major_axis[0]) - self.assertEqual(xs['ItemA'].dtype, np.float64) - self.assertEqual(xs['ItemD'].dtype, np.object_) + with catch_warnings(record=True): + self.panel['ItemD'] = 'foo' + xs = self.panel.major_xs(self.panel.major_axis[0]) + self.assertEqual(xs['ItemA'].dtype, np.float64) + self.assertEqual(xs['ItemD'].dtype, np.object_) def test_minor_xs(self): - ref = self.panel['ItemA'] + with catch_warnings(record=True): + ref = self.panel['ItemA'] - idx = self.panel.minor_axis[1] - xs = self.panel.minor_xs(idx) + idx = self.panel.minor_axis[1] + xs = self.panel.minor_xs(idx) - assert_series_equal(xs['ItemA'], ref[idx], check_names=False) + assert_series_equal(xs['ItemA'], ref[idx], check_names=False) - # not contained - self.assertRaises(Exception, self.panel.minor_xs, 'E') + # not contained + self.assertRaises(Exception, self.panel.minor_xs, 'E') def test_minor_xs_mixed(self): - self.panel['ItemD'] = 'foo' + with catch_warnings(record=True): + self.panel['ItemD'] = 'foo' - xs = self.panel.minor_xs('D') - self.assertEqual(xs['ItemA'].dtype, np.float64) - self.assertEqual(xs['ItemD'].dtype, np.object_) + xs = self.panel.minor_xs('D') + self.assertEqual(xs['ItemA'].dtype, np.float64) + self.assertEqual(xs['ItemD'].dtype, np.object_) def test_xs(self): - itemA = self.panel.xs('ItemA', axis=0) - expected = self.panel['ItemA'] - assert_frame_equal(itemA, expected) + with catch_warnings(record=True): + itemA = self.panel.xs('ItemA', axis=0) + expected = self.panel['ItemA'] + assert_frame_equal(itemA, expected) - # get a view by default - itemA_view = self.panel.xs('ItemA', axis=0) - itemA_view.values[:] = np.nan - self.assertTrue(np.isnan(self.panel['ItemA'].values).all()) + # get a view by default + itemA_view = self.panel.xs('ItemA', axis=0) + itemA_view.values[:] = np.nan + self.assertTrue(np.isnan(self.panel['ItemA'].values).all()) - # mixed-type yields a copy - self.panel['strings'] = 'foo' - result = self.panel.xs('D', axis=2) - self.assertIsNotNone(result.is_copy) + # mixed-type yields a copy + self.panel['strings'] = 'foo' + result = self.panel.xs('D', axis=2) + self.assertIsNotNone(result.is_copy) def test_getitem_fancy_labels(self): - p = self.panel + with catch_warnings(record=True): + p = self.panel - items = p.items[[1, 0]] - dates = p.major_axis[::2] - cols = ['D', 'C', 'F'] + items = p.items[[1, 0]] + dates = p.major_axis[::2] + cols = ['D', 'C', 'F'] - # all 3 specified - assert_panel_equal(p.loc[items, dates, cols], - p.reindex(items=items, major=dates, minor=cols)) + # all 3 specified + assert_panel_equal(p.loc[items, dates, cols], + p.reindex(items=items, major=dates, minor=cols)) - # 2 specified - assert_panel_equal(p.loc[:, dates, cols], - p.reindex(major=dates, minor=cols)) + # 2 specified + assert_panel_equal(p.loc[:, dates, cols], + p.reindex(major=dates, minor=cols)) - assert_panel_equal(p.loc[items, :, cols], - p.reindex(items=items, minor=cols)) + assert_panel_equal(p.loc[items, :, cols], + p.reindex(items=items, minor=cols)) - assert_panel_equal(p.loc[items, dates, :], - p.reindex(items=items, major=dates)) + assert_panel_equal(p.loc[items, dates, :], + p.reindex(items=items, major=dates)) - # only 1 - assert_panel_equal(p.loc[items, :, :], p.reindex(items=items)) + # only 1 + assert_panel_equal(p.loc[items, :, :], p.reindex(items=items)) - assert_panel_equal(p.loc[:, dates, :], p.reindex(major=dates)) + assert_panel_equal(p.loc[:, dates, :], p.reindex(major=dates)) - assert_panel_equal(p.loc[:, :, cols], p.reindex(minor=cols)) + assert_panel_equal(p.loc[:, :, cols], p.reindex(minor=cols)) def test_getitem_fancy_slice(self): pass @@ -646,127 +676,132 @@ def test_getitem_fancy_xs(self): assert_series_equal(p.loc[:, date, col], p.major_xs(date).loc[col]) def test_getitem_fancy_xs_check_view(self): - item = 'ItemB' - date = self.panel.major_axis[5] - - # make sure it's always a view - NS = slice(None, None) - - # DataFrames - comp = assert_frame_equal - self._check_view(item, comp) - self._check_view((item, NS), comp) - self._check_view((item, NS, NS), comp) - self._check_view((NS, date), comp) - self._check_view((NS, date, NS), comp) - self._check_view((NS, NS, 'C'), comp) - - # Series - comp = assert_series_equal - self._check_view((item, date), comp) - self._check_view((item, date, NS), comp) - self._check_view((item, NS, 'C'), comp) - self._check_view((NS, date, 'C'), comp) + with catch_warnings(record=True): + item = 'ItemB' + date = self.panel.major_axis[5] + + # make sure it's always a view + NS = slice(None, None) + + # DataFrames + comp = assert_frame_equal + self._check_view(item, comp) + self._check_view((item, NS), comp) + self._check_view((item, NS, NS), comp) + self._check_view((NS, date), comp) + self._check_view((NS, date, NS), comp) + self._check_view((NS, NS, 'C'), comp) + + # Series + comp = assert_series_equal + self._check_view((item, date), comp) + self._check_view((item, date, NS), comp) + self._check_view((item, NS, 'C'), comp) + self._check_view((NS, date, 'C'), comp) def test_getitem_callable(self): - p = self.panel - # GH 12533 + with catch_warnings(record=True): + p = self.panel + # GH 12533 - assert_frame_equal(p[lambda x: 'ItemB'], p.loc['ItemB']) - assert_panel_equal(p[lambda x: ['ItemB', 'ItemC']], - p.loc[['ItemB', 'ItemC']]) + assert_frame_equal(p[lambda x: 'ItemB'], p.loc['ItemB']) + assert_panel_equal(p[lambda x: ['ItemB', 'ItemC']], + p.loc[['ItemB', 'ItemC']]) def test_ix_setitem_slice_dataframe(self): - a = Panel(items=[1, 2, 3], major_axis=[11, 22, 33], - minor_axis=[111, 222, 333]) - b = DataFrame(np.random.randn(2, 3), index=[111, 333], - columns=[1, 2, 3]) + with catch_warnings(record=True): + a = Panel(items=[1, 2, 3], major_axis=[11, 22, 33], + minor_axis=[111, 222, 333]) + b = DataFrame(np.random.randn(2, 3), index=[111, 333], + columns=[1, 2, 3]) - a.loc[:, 22, [111, 333]] = b + a.loc[:, 22, [111, 333]] = b - assert_frame_equal(a.loc[:, 22, [111, 333]], b) + assert_frame_equal(a.loc[:, 22, [111, 333]], b) def test_ix_align(self): - from pandas import Series - b = Series(np.random.randn(10), name=0) - b.sort_values() - df_orig = Panel(np.random.randn(3, 10, 2)) - df = df_orig.copy() + with catch_warnings(record=True): + from pandas import Series + b = Series(np.random.randn(10), name=0) + b.sort_values() + df_orig = Panel(np.random.randn(3, 10, 2)) + df = df_orig.copy() - df.loc[0, :, 0] = b - assert_series_equal(df.loc[0, :, 0].reindex(b.index), b) + df.loc[0, :, 0] = b + assert_series_equal(df.loc[0, :, 0].reindex(b.index), b) - df = df_orig.swapaxes(0, 1) - df.loc[:, 0, 0] = b - assert_series_equal(df.loc[:, 0, 0].reindex(b.index), b) + df = df_orig.swapaxes(0, 1) + df.loc[:, 0, 0] = b + assert_series_equal(df.loc[:, 0, 0].reindex(b.index), b) - df = df_orig.swapaxes(1, 2) - df.loc[0, 0, :] = b - assert_series_equal(df.loc[0, 0, :].reindex(b.index), b) + df = df_orig.swapaxes(1, 2) + df.loc[0, 0, :] = b + assert_series_equal(df.loc[0, 0, :].reindex(b.index), b) def test_ix_frame_align(self): - p_orig = tm.makePanel() - df = p_orig.iloc[0].copy() - assert_frame_equal(p_orig['ItemA'], df) + with catch_warnings(record=True): + p_orig = tm.makePanel() + df = p_orig.iloc[0].copy() + assert_frame_equal(p_orig['ItemA'], df) - p = p_orig.copy() - p.iloc[0, :, :] = df - assert_panel_equal(p, p_orig) + p = p_orig.copy() + p.iloc[0, :, :] = df + assert_panel_equal(p, p_orig) - p = p_orig.copy() - p.iloc[0] = df - assert_panel_equal(p, p_orig) + p = p_orig.copy() + p.iloc[0] = df + assert_panel_equal(p, p_orig) - p = p_orig.copy() - p.iloc[0, :, :] = df - assert_panel_equal(p, p_orig) + p = p_orig.copy() + p.iloc[0, :, :] = df + assert_panel_equal(p, p_orig) - p = p_orig.copy() - p.iloc[0] = df - assert_panel_equal(p, p_orig) + p = p_orig.copy() + p.iloc[0] = df + assert_panel_equal(p, p_orig) - p = p_orig.copy() - p.loc['ItemA'] = df - assert_panel_equal(p, p_orig) + p = p_orig.copy() + p.loc['ItemA'] = df + assert_panel_equal(p, p_orig) - p = p_orig.copy() - p.loc['ItemA', :, :] = df - assert_panel_equal(p, p_orig) + p = p_orig.copy() + p.loc['ItemA', :, :] = df + assert_panel_equal(p, p_orig) - p = p_orig.copy() - p['ItemA'] = df - assert_panel_equal(p, p_orig) + p = p_orig.copy() + p['ItemA'] = df + assert_panel_equal(p, p_orig) - p = p_orig.copy() - p.iloc[0, [0, 1, 3, 5], -2:] = df - out = p.iloc[0, [0, 1, 3, 5], -2:] - assert_frame_equal(out, df.iloc[[0, 1, 3, 5], [2, 3]]) + p = p_orig.copy() + p.iloc[0, [0, 1, 3, 5], -2:] = df + out = p.iloc[0, [0, 1, 3, 5], -2:] + assert_frame_equal(out, df.iloc[[0, 1, 3, 5], [2, 3]]) - # GH3830, panel assignent by values/frame - for dtype in ['float64', 'int64']: + # GH3830, panel assignent by values/frame + for dtype in ['float64', 'int64']: - panel = Panel(np.arange(40).reshape((2, 4, 5)), - items=['a1', 'a2'], dtype=dtype) - df1 = panel.iloc[0] - df2 = panel.iloc[1] + panel = Panel(np.arange(40).reshape((2, 4, 5)), + items=['a1', 'a2'], dtype=dtype) + df1 = panel.iloc[0] + df2 = panel.iloc[1] - tm.assert_frame_equal(panel.loc['a1'], df1) - tm.assert_frame_equal(panel.loc['a2'], df2) + tm.assert_frame_equal(panel.loc['a1'], df1) + tm.assert_frame_equal(panel.loc['a2'], df2) - # Assignment by Value Passes for 'a2' - panel.loc['a2'] = df1.values - tm.assert_frame_equal(panel.loc['a1'], df1) - tm.assert_frame_equal(panel.loc['a2'], df1) + # Assignment by Value Passes for 'a2' + panel.loc['a2'] = df1.values + tm.assert_frame_equal(panel.loc['a1'], df1) + tm.assert_frame_equal(panel.loc['a2'], df1) - # Assignment by DataFrame Ok w/o loc 'a2' - panel['a2'] = df2 - tm.assert_frame_equal(panel.loc['a1'], df1) - tm.assert_frame_equal(panel.loc['a2'], df2) + # Assignment by DataFrame Ok w/o loc 'a2' + panel['a2'] = df2 + tm.assert_frame_equal(panel.loc['a1'], df1) + tm.assert_frame_equal(panel.loc['a2'], df2) - # Assignment by DataFrame Fails for 'a2' - panel.loc['a2'] = df2 - tm.assert_frame_equal(panel.loc['a1'], df1) - tm.assert_frame_equal(panel.loc['a2'], df2) + # Assignment by DataFrame Fails for 'a2' + panel.loc['a2'] = df2 + tm.assert_frame_equal(panel.loc['a1'], df1) + tm.assert_frame_equal(panel.loc['a2'], df2) def _check_view(self, indexer, comp): cp = self.panel.copy() @@ -776,57 +811,60 @@ def _check_view(self, indexer, comp): comp(cp.loc[indexer].reindex_like(obj), obj) def test_logical_with_nas(self): - d = Panel({'ItemA': {'a': [np.nan, False]}, - 'ItemB': {'a': [True, True]}}) + with catch_warnings(record=True): + d = Panel({'ItemA': {'a': [np.nan, False]}, + 'ItemB': {'a': [True, True]}}) - result = d['ItemA'] | d['ItemB'] - expected = DataFrame({'a': [np.nan, True]}) - assert_frame_equal(result, expected) + result = d['ItemA'] | d['ItemB'] + expected = DataFrame({'a': [np.nan, True]}) + assert_frame_equal(result, expected) - # this is autodowncasted here - result = d['ItemA'].fillna(False) | d['ItemB'] - expected = DataFrame({'a': [True, True]}) - assert_frame_equal(result, expected) + # this is autodowncasted here + result = d['ItemA'].fillna(False) | d['ItemB'] + expected = DataFrame({'a': [True, True]}) + assert_frame_equal(result, expected) def test_neg(self): - # what to do? - assert_panel_equal(-self.panel, -1 * self.panel) + with catch_warnings(record=True): + assert_panel_equal(-self.panel, -1 * self.panel) def test_invert(self): - assert_panel_equal(-(self.panel < 0), ~(self.panel < 0)) + with catch_warnings(record=True): + assert_panel_equal(-(self.panel < 0), ~(self.panel < 0)) def test_comparisons(self): - p1 = tm.makePanel() - p2 = tm.makePanel() + with catch_warnings(record=True): + p1 = tm.makePanel() + p2 = tm.makePanel() - tp = p1.reindex(items=p1.items + ['foo']) - df = p1[p1.items[0]] + tp = p1.reindex(items=p1.items + ['foo']) + df = p1[p1.items[0]] - def test_comp(func): + def test_comp(func): - # versus same index - result = func(p1, p2) - self.assert_numpy_array_equal(result.values, - func(p1.values, p2.values)) + # versus same index + result = func(p1, p2) + self.assert_numpy_array_equal(result.values, + func(p1.values, p2.values)) - # versus non-indexed same objs - self.assertRaises(Exception, func, p1, tp) + # versus non-indexed same objs + self.assertRaises(Exception, func, p1, tp) - # versus different objs - self.assertRaises(Exception, func, p1, df) + # versus different objs + self.assertRaises(Exception, func, p1, df) - # versus scalar - result3 = func(self.panel, 0) - self.assert_numpy_array_equal(result3.values, - func(self.panel.values, 0)) + # versus scalar + result3 = func(self.panel, 0) + self.assert_numpy_array_equal(result3.values, + func(self.panel.values, 0)) - with np.errstate(invalid='ignore'): - test_comp(operator.eq) - test_comp(operator.ne) - test_comp(operator.lt) - test_comp(operator.gt) - test_comp(operator.ge) - test_comp(operator.le) + with np.errstate(invalid='ignore'): + test_comp(operator.eq) + test_comp(operator.ne) + test_comp(operator.lt) + test_comp(operator.gt) + test_comp(operator.ge) + test_comp(operator.le) def test_get_value(self): for item in self.panel.items: @@ -840,28 +878,26 @@ def test_get_value(self): self.panel.get_value('a') def test_set_value(self): - for item in self.panel.items: - for mjr in self.panel.major_axis[::2]: - for mnr in self.panel.minor_axis: - self.panel.set_value(item, mjr, mnr, 1.) - assert_almost_equal(self.panel[item][mnr][mjr], 1.) - - # resize - res = self.panel.set_value('ItemE', 'foo', 'bar', 1.5) - tm.assertIsInstance(res, Panel) - self.assertIsNot(res, self.panel) - self.assertEqual(res.get_value('ItemE', 'foo', 'bar'), 1.5) - - res3 = self.panel.set_value('ItemE', 'foobar', 'baz', 5) - self.assertTrue(is_float_dtype(res3['ItemE'].values)) - with tm.assertRaisesRegexp(TypeError, - "There must be an argument for each axis" - " plus the value provided"): - self.panel.set_value('a') - - -_panel = tm.makePanel() -tm.add_nans(_panel) + with catch_warnings(record=True): + for item in self.panel.items: + for mjr in self.panel.major_axis[::2]: + for mnr in self.panel.minor_axis: + self.panel.set_value(item, mjr, mnr, 1.) + assert_almost_equal(self.panel[item][mnr][mjr], 1.) + + # resize + res = self.panel.set_value('ItemE', 'foo', 'bar', 1.5) + tm.assertIsInstance(res, Panel) + self.assertIsNot(res, self.panel) + self.assertEqual(res.get_value('ItemE', 'foo', 'bar'), 1.5) + + res3 = self.panel.set_value('ItemE', 'foobar', 'baz', 5) + self.assertTrue(is_float_dtype(res3['ItemE'].values)) + with tm.assertRaisesRegexp(TypeError, + "There must be an argument " + "for each axis" + " plus the value provided"): + self.panel.set_value('a') class TestPanel(tm.TestCase, PanelTests, CheckIndexing, SafeForLongAndSparse, @@ -872,292 +908,315 @@ def assert_panel_equal(cls, x, y): assert_panel_equal(x, y) def setUp(self): - self.panel = _panel.copy() + self.panel = make_test_panel() self.panel.major_axis.name = None self.panel.minor_axis.name = None self.panel.items.name = None def test_constructor(self): - # with BlockManager - wp = Panel(self.panel._data) - self.assertIs(wp._data, self.panel._data) - - wp = Panel(self.panel._data, copy=True) - self.assertIsNot(wp._data, self.panel._data) - assert_panel_equal(wp, self.panel) - - # strings handled prop - wp = Panel([[['foo', 'foo', 'foo', ], ['foo', 'foo', 'foo']]]) - self.assertEqual(wp.values.dtype, np.object_) - - vals = self.panel.values - - # no copy - wp = Panel(vals) - self.assertIs(wp.values, vals) - - # copy - wp = Panel(vals, copy=True) - self.assertIsNot(wp.values, vals) - - # GH #8285, test when scalar data is used to construct a Panel - # if dtype is not passed, it should be inferred - value_and_dtype = [(1, 'int64'), (3.14, 'float64'), - ('foo', np.object_)] - for (val, dtype) in value_and_dtype: - wp = Panel(val, items=range(2), major_axis=range(3), - minor_axis=range(4)) - vals = np.empty((2, 3, 4), dtype=dtype) - vals.fill(val) - assert_panel_equal(wp, Panel(vals, dtype=dtype)) - - # test the case when dtype is passed - wp = Panel(1, items=range(2), major_axis=range(3), minor_axis=range(4), - dtype='float32') - vals = np.empty((2, 3, 4), dtype='float32') - vals.fill(1) - assert_panel_equal(wp, Panel(vals, dtype='float32')) + with catch_warnings(record=True): + # with BlockManager + wp = Panel(self.panel._data) + self.assertIs(wp._data, self.panel._data) + + wp = Panel(self.panel._data, copy=True) + self.assertIsNot(wp._data, self.panel._data) + assert_panel_equal(wp, self.panel) + + # strings handled prop + wp = Panel([[['foo', 'foo', 'foo', ], ['foo', 'foo', 'foo']]]) + self.assertEqual(wp.values.dtype, np.object_) + + vals = self.panel.values + + # no copy + wp = Panel(vals) + self.assertIs(wp.values, vals) + + # copy + wp = Panel(vals, copy=True) + self.assertIsNot(wp.values, vals) + + # GH #8285, test when scalar data is used to construct a Panel + # if dtype is not passed, it should be inferred + value_and_dtype = [(1, 'int64'), (3.14, 'float64'), + ('foo', np.object_)] + for (val, dtype) in value_and_dtype: + wp = Panel(val, items=range(2), major_axis=range(3), + minor_axis=range(4)) + vals = np.empty((2, 3, 4), dtype=dtype) + vals.fill(val) + assert_panel_equal(wp, Panel(vals, dtype=dtype)) + + # test the case when dtype is passed + wp = Panel(1, items=range(2), major_axis=range(3), + minor_axis=range(4), + dtype='float32') + vals = np.empty((2, 3, 4), dtype='float32') + vals.fill(1) + assert_panel_equal(wp, Panel(vals, dtype='float32')) def test_constructor_cast(self): - zero_filled = self.panel.fillna(0) + with catch_warnings(record=True): + zero_filled = self.panel.fillna(0) - casted = Panel(zero_filled._data, dtype=int) - casted2 = Panel(zero_filled.values, dtype=int) + casted = Panel(zero_filled._data, dtype=int) + casted2 = Panel(zero_filled.values, dtype=int) - exp_values = zero_filled.values.astype(int) - assert_almost_equal(casted.values, exp_values) - assert_almost_equal(casted2.values, exp_values) + exp_values = zero_filled.values.astype(int) + assert_almost_equal(casted.values, exp_values) + assert_almost_equal(casted2.values, exp_values) - casted = Panel(zero_filled._data, dtype=np.int32) - casted2 = Panel(zero_filled.values, dtype=np.int32) + casted = Panel(zero_filled._data, dtype=np.int32) + casted2 = Panel(zero_filled.values, dtype=np.int32) - exp_values = zero_filled.values.astype(np.int32) - assert_almost_equal(casted.values, exp_values) - assert_almost_equal(casted2.values, exp_values) + exp_values = zero_filled.values.astype(np.int32) + assert_almost_equal(casted.values, exp_values) + assert_almost_equal(casted2.values, exp_values) - # can't cast - data = [[['foo', 'bar', 'baz']]] - self.assertRaises(ValueError, Panel, data, dtype=float) + # can't cast + data = [[['foo', 'bar', 'baz']]] + self.assertRaises(ValueError, Panel, data, dtype=float) def test_constructor_empty_panel(self): - empty = Panel() - self.assertEqual(len(empty.items), 0) - self.assertEqual(len(empty.major_axis), 0) - self.assertEqual(len(empty.minor_axis), 0) + with catch_warnings(record=True): + empty = Panel() + self.assertEqual(len(empty.items), 0) + self.assertEqual(len(empty.major_axis), 0) + self.assertEqual(len(empty.minor_axis), 0) def test_constructor_observe_dtype(self): - # GH #411 - panel = Panel(items=lrange(3), major_axis=lrange(3), - minor_axis=lrange(3), dtype='O') - self.assertEqual(panel.values.dtype, np.object_) + with catch_warnings(record=True): + # GH #411 + panel = Panel(items=lrange(3), major_axis=lrange(3), + minor_axis=lrange(3), dtype='O') + self.assertEqual(panel.values.dtype, np.object_) def test_constructor_dtypes(self): - # GH #797 - - def _check_dtype(panel, dtype): - for i in panel.items: - self.assertEqual(panel[i].values.dtype.name, dtype) - - # only nan holding types allowed here - for dtype in ['float64', 'float32', 'object']: - panel = Panel(items=lrange(2), major_axis=lrange(10), - minor_axis=lrange(5), dtype=dtype) - _check_dtype(panel, dtype) - - for dtype in ['float64', 'float32', 'int64', 'int32', 'object']: - panel = Panel(np.array(np.random.randn(2, 10, 5), dtype=dtype), - items=lrange(2), - major_axis=lrange(10), - minor_axis=lrange(5), dtype=dtype) - _check_dtype(panel, dtype) - - for dtype in ['float64', 'float32', 'int64', 'int32', 'object']: - panel = Panel(np.array(np.random.randn(2, 10, 5), dtype='O'), - items=lrange(2), - major_axis=lrange(10), - minor_axis=lrange(5), dtype=dtype) - _check_dtype(panel, dtype) - - for dtype in ['float64', 'float32', 'int64', 'int32', 'object']: - panel = Panel(np.random.randn(2, 10, 5), items=lrange( - 2), major_axis=lrange(10), minor_axis=lrange(5), dtype=dtype) - _check_dtype(panel, dtype) - - for dtype in ['float64', 'float32', 'int64', 'int32', 'object']: - df1 = DataFrame(np.random.randn(2, 5), - index=lrange(2), columns=lrange(5)) - df2 = DataFrame(np.random.randn(2, 5), - index=lrange(2), columns=lrange(5)) - panel = Panel.from_dict({'a': df1, 'b': df2}, dtype=dtype) - _check_dtype(panel, dtype) + with catch_warnings(record=True): + # GH #797 + + def _check_dtype(panel, dtype): + for i in panel.items: + self.assertEqual(panel[i].values.dtype.name, dtype) + + # only nan holding types allowed here + for dtype in ['float64', 'float32', 'object']: + panel = Panel(items=lrange(2), major_axis=lrange(10), + minor_axis=lrange(5), dtype=dtype) + _check_dtype(panel, dtype) + + for dtype in ['float64', 'float32', 'int64', 'int32', 'object']: + panel = Panel(np.array(np.random.randn(2, 10, 5), dtype=dtype), + items=lrange(2), + major_axis=lrange(10), + minor_axis=lrange(5), dtype=dtype) + _check_dtype(panel, dtype) + + for dtype in ['float64', 'float32', 'int64', 'int32', 'object']: + panel = Panel(np.array(np.random.randn(2, 10, 5), dtype='O'), + items=lrange(2), + major_axis=lrange(10), + minor_axis=lrange(5), dtype=dtype) + _check_dtype(panel, dtype) + + for dtype in ['float64', 'float32', 'int64', 'int32', 'object']: + panel = Panel( + np.random.randn(2, 10, 5), + items=lrange(2), major_axis=lrange(10), + minor_axis=lrange(5), + dtype=dtype) + _check_dtype(panel, dtype) + + for dtype in ['float64', 'float32', 'int64', 'int32', 'object']: + df1 = DataFrame(np.random.randn(2, 5), + index=lrange(2), columns=lrange(5)) + df2 = DataFrame(np.random.randn(2, 5), + index=lrange(2), columns=lrange(5)) + panel = Panel.from_dict({'a': df1, 'b': df2}, dtype=dtype) + _check_dtype(panel, dtype) def test_constructor_fails_with_not_3d_input(self): - with tm.assertRaisesRegexp(ValueError, - "The number of dimensions required is 3"): - Panel(np.random.randn(10, 2)) + with catch_warnings(record=True): + with tm.assertRaisesRegexp(ValueError, "The number of dimensions required is 3"): # noqa + Panel(np.random.randn(10, 2)) def test_consolidate(self): - self.assertTrue(self.panel._data.is_consolidated()) + with catch_warnings(record=True): + self.assertTrue(self.panel._data.is_consolidated()) - self.panel['foo'] = 1. - self.assertFalse(self.panel._data.is_consolidated()) + self.panel['foo'] = 1. + self.assertFalse(self.panel._data.is_consolidated()) - panel = self.panel._consolidate() - self.assertTrue(panel._data.is_consolidated()) + panel = self.panel._consolidate() + self.assertTrue(panel._data.is_consolidated()) def test_ctor_dict(self): - itema = self.panel['ItemA'] - itemb = self.panel['ItemB'] + with catch_warnings(record=True): + itema = self.panel['ItemA'] + itemb = self.panel['ItemB'] - d = {'A': itema, 'B': itemb[5:]} - d2 = {'A': itema._series, 'B': itemb[5:]._series} - d3 = {'A': None, - 'B': DataFrame(itemb[5:]._series), - 'C': DataFrame(itema._series)} + d = {'A': itema, 'B': itemb[5:]} + d2 = {'A': itema._series, 'B': itemb[5:]._series} + d3 = {'A': None, + 'B': DataFrame(itemb[5:]._series), + 'C': DataFrame(itema._series)} - wp = Panel.from_dict(d) - wp2 = Panel.from_dict(d2) # nested Dict + wp = Panel.from_dict(d) + wp2 = Panel.from_dict(d2) # nested Dict - # TODO: unused? - wp3 = Panel.from_dict(d3) # noqa + # TODO: unused? + wp3 = Panel.from_dict(d3) # noqa - self.assert_index_equal(wp.major_axis, self.panel.major_axis) - assert_panel_equal(wp, wp2) + self.assert_index_equal(wp.major_axis, self.panel.major_axis) + assert_panel_equal(wp, wp2) - # intersect - wp = Panel.from_dict(d, intersect=True) - self.assert_index_equal(wp.major_axis, itemb.index[5:]) + # intersect + wp = Panel.from_dict(d, intersect=True) + self.assert_index_equal(wp.major_axis, itemb.index[5:]) - # use constructor - assert_panel_equal(Panel(d), Panel.from_dict(d)) - assert_panel_equal(Panel(d2), Panel.from_dict(d2)) - assert_panel_equal(Panel(d3), Panel.from_dict(d3)) + # use constructor + assert_panel_equal(Panel(d), Panel.from_dict(d)) + assert_panel_equal(Panel(d2), Panel.from_dict(d2)) + assert_panel_equal(Panel(d3), Panel.from_dict(d3)) - # a pathological case - d4 = {'A': None, 'B': None} + # a pathological case + d4 = {'A': None, 'B': None} - # TODO: unused? - wp4 = Panel.from_dict(d4) # noqa + # TODO: unused? + wp4 = Panel.from_dict(d4) # noqa - assert_panel_equal(Panel(d4), Panel(items=['A', 'B'])) + assert_panel_equal(Panel(d4), Panel(items=['A', 'B'])) - # cast - dcasted = dict((k, v.reindex(wp.major_axis).fillna(0)) - for k, v in compat.iteritems(d)) - result = Panel(dcasted, dtype=int) - expected = Panel(dict((k, v.astype(int)) - for k, v in compat.iteritems(dcasted))) - assert_panel_equal(result, expected) + # cast + dcasted = dict((k, v.reindex(wp.major_axis).fillna(0)) + for k, v in compat.iteritems(d)) + result = Panel(dcasted, dtype=int) + expected = Panel(dict((k, v.astype(int)) + for k, v in compat.iteritems(dcasted))) + assert_panel_equal(result, expected) - result = Panel(dcasted, dtype=np.int32) - expected = Panel(dict((k, v.astype(np.int32)) - for k, v in compat.iteritems(dcasted))) - assert_panel_equal(result, expected) + result = Panel(dcasted, dtype=np.int32) + expected = Panel(dict((k, v.astype(np.int32)) + for k, v in compat.iteritems(dcasted))) + assert_panel_equal(result, expected) def test_constructor_dict_mixed(self): - data = dict((k, v.values) for k, v in self.panel.iteritems()) - result = Panel(data) - exp_major = Index(np.arange(len(self.panel.major_axis))) - self.assert_index_equal(result.major_axis, exp_major) + with catch_warnings(record=True): + data = dict((k, v.values) for k, v in self.panel.iteritems()) + result = Panel(data) + exp_major = Index(np.arange(len(self.panel.major_axis))) + self.assert_index_equal(result.major_axis, exp_major) - result = Panel(data, items=self.panel.items, - major_axis=self.panel.major_axis, - minor_axis=self.panel.minor_axis) - assert_panel_equal(result, self.panel) + result = Panel(data, items=self.panel.items, + major_axis=self.panel.major_axis, + minor_axis=self.panel.minor_axis) + assert_panel_equal(result, self.panel) - data['ItemC'] = self.panel['ItemC'] - result = Panel(data) - assert_panel_equal(result, self.panel) + data['ItemC'] = self.panel['ItemC'] + result = Panel(data) + assert_panel_equal(result, self.panel) - # corner, blow up - data['ItemB'] = data['ItemB'][:-1] - self.assertRaises(Exception, Panel, data) + # corner, blow up + data['ItemB'] = data['ItemB'][:-1] + self.assertRaises(Exception, Panel, data) - data['ItemB'] = self.panel['ItemB'].values[:, :-1] - self.assertRaises(Exception, Panel, data) + data['ItemB'] = self.panel['ItemB'].values[:, :-1] + self.assertRaises(Exception, Panel, data) def test_ctor_orderedDict(self): - keys = list(set(np.random.randint(0, 5000, 100)))[ - :50] # unique random int keys - d = OrderedDict([(k, mkdf(10, 5)) for k in keys]) - p = Panel(d) - self.assertTrue(list(p.items) == keys) + with catch_warnings(record=True): + keys = list(set(np.random.randint(0, 5000, 100)))[ + :50] # unique random int keys + d = OrderedDict([(k, mkdf(10, 5)) for k in keys]) + p = Panel(d) + self.assertTrue(list(p.items) == keys) - p = Panel.from_dict(d) - self.assertTrue(list(p.items) == keys) + p = Panel.from_dict(d) + self.assertTrue(list(p.items) == keys) def test_constructor_resize(self): - data = self.panel._data - items = self.panel.items[:-1] - major = self.panel.major_axis[:-1] - minor = self.panel.minor_axis[:-1] - - result = Panel(data, items=items, major_axis=major, minor_axis=minor) - expected = self.panel.reindex(items=items, major=major, minor=minor) - assert_panel_equal(result, expected) + with catch_warnings(record=True): + data = self.panel._data + items = self.panel.items[:-1] + major = self.panel.major_axis[:-1] + minor = self.panel.minor_axis[:-1] + + result = Panel(data, items=items, + major_axis=major, minor_axis=minor) + expected = self.panel.reindex( + items=items, major=major, minor=minor) + assert_panel_equal(result, expected) - result = Panel(data, items=items, major_axis=major) - expected = self.panel.reindex(items=items, major=major) - assert_panel_equal(result, expected) + result = Panel(data, items=items, major_axis=major) + expected = self.panel.reindex(items=items, major=major) + assert_panel_equal(result, expected) - result = Panel(data, items=items) - expected = self.panel.reindex(items=items) - assert_panel_equal(result, expected) + result = Panel(data, items=items) + expected = self.panel.reindex(items=items) + assert_panel_equal(result, expected) - result = Panel(data, minor_axis=minor) - expected = self.panel.reindex(minor=minor) - assert_panel_equal(result, expected) + result = Panel(data, minor_axis=minor) + expected = self.panel.reindex(minor=minor) + assert_panel_equal(result, expected) def test_from_dict_mixed_orient(self): - df = tm.makeDataFrame() - df['foo'] = 'bar' + with catch_warnings(record=True): + df = tm.makeDataFrame() + df['foo'] = 'bar' - data = {'k1': df, 'k2': df} + data = {'k1': df, 'k2': df} - panel = Panel.from_dict(data, orient='minor') + panel = Panel.from_dict(data, orient='minor') - self.assertEqual(panel['foo'].values.dtype, np.object_) - self.assertEqual(panel['A'].values.dtype, np.float64) + self.assertEqual(panel['foo'].values.dtype, np.object_) + self.assertEqual(panel['A'].values.dtype, np.float64) def test_constructor_error_msgs(self): - def testit(): - Panel(np.random.randn(3, 4, 5), lrange(4), lrange(5), lrange(5)) + with catch_warnings(record=True): + def testit(): + Panel(np.random.randn(3, 4, 5), + lrange(4), lrange(5), lrange(5)) - assertRaisesRegexp(ValueError, - r"Shape of passed values is \(3, 4, 5\), " - r"indices imply \(4, 5, 5\)", - testit) + assertRaisesRegexp(ValueError, + r"Shape of passed values is \(3, 4, 5\), " + r"indices imply \(4, 5, 5\)", + testit) - def testit(): - Panel(np.random.randn(3, 4, 5), lrange(5), lrange(4), lrange(5)) + def testit(): + Panel(np.random.randn(3, 4, 5), + lrange(5), lrange(4), lrange(5)) - assertRaisesRegexp(ValueError, - r"Shape of passed values is \(3, 4, 5\), " - r"indices imply \(5, 4, 5\)", - testit) + assertRaisesRegexp(ValueError, + r"Shape of passed values is \(3, 4, 5\), " + r"indices imply \(5, 4, 5\)", + testit) - def testit(): - Panel(np.random.randn(3, 4, 5), lrange(5), lrange(5), lrange(4)) + def testit(): + Panel(np.random.randn(3, 4, 5), + lrange(5), lrange(5), lrange(4)) - assertRaisesRegexp(ValueError, - r"Shape of passed values is \(3, 4, 5\), " - r"indices imply \(5, 5, 4\)", - testit) + assertRaisesRegexp(ValueError, + r"Shape of passed values is \(3, 4, 5\), " + r"indices imply \(5, 5, 4\)", + testit) def test_conform(self): - df = self.panel['ItemA'][:-5].filter(items=['A', 'B']) - conformed = self.panel.conform(df) + with catch_warnings(record=True): + df = self.panel['ItemA'][:-5].filter(items=['A', 'B']) + conformed = self.panel.conform(df) - tm.assert_index_equal(conformed.index, self.panel.major_axis) - tm.assert_index_equal(conformed.columns, self.panel.minor_axis) + tm.assert_index_equal(conformed.index, self.panel.major_axis) + tm.assert_index_equal(conformed.columns, self.panel.minor_axis) def test_convert_objects(self): + with catch_warnings(record=True): - # GH 4937 - p = Panel(dict(A=dict(a=['1', '1.0']))) - expected = Panel(dict(A=dict(a=[1, 1.0]))) - result = p._convert(numeric=True, coerce=True) - assert_panel_equal(result, expected) + # GH 4937 + p = Panel(dict(A=dict(a=['1', '1.0']))) + expected = Panel(dict(A=dict(a=[1, 1.0]))) + result = p._convert(numeric=True, coerce=True) + assert_panel_equal(result, expected) def test_dtypes(self): @@ -1166,875 +1225,940 @@ def test_dtypes(self): assert_series_equal(result, expected) def test_astype(self): - # GH7271 - data = np.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]]) - panel = Panel(data, ['a', 'b'], ['c', 'd'], ['e', 'f']) + with catch_warnings(record=True): + # GH7271 + data = np.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]]) + panel = Panel(data, ['a', 'b'], ['c', 'd'], ['e', 'f']) - str_data = np.array([[['1', '2'], ['3', '4']], - [['5', '6'], ['7', '8']]]) - expected = Panel(str_data, ['a', 'b'], ['c', 'd'], ['e', 'f']) - assert_panel_equal(panel.astype(str), expected) + str_data = np.array([[['1', '2'], ['3', '4']], + [['5', '6'], ['7', '8']]]) + expected = Panel(str_data, ['a', 'b'], ['c', 'd'], ['e', 'f']) + assert_panel_equal(panel.astype(str), expected) - self.assertRaises(NotImplementedError, panel.astype, {0: str}) + self.assertRaises(NotImplementedError, panel.astype, {0: str}) def test_apply(self): - # GH1148 - - # ufunc - applied = self.panel.apply(np.sqrt) - with np.errstate(invalid='ignore'): - expected = np.sqrt(self.panel.values) - assert_almost_equal(applied.values, expected) - - # ufunc same shape - result = self.panel.apply(lambda x: x * 2, axis='items') - expected = self.panel * 2 - assert_panel_equal(result, expected) - result = self.panel.apply(lambda x: x * 2, axis='major_axis') - expected = self.panel * 2 - assert_panel_equal(result, expected) - result = self.panel.apply(lambda x: x * 2, axis='minor_axis') - expected = self.panel * 2 - assert_panel_equal(result, expected) - - # reduction to DataFrame - result = self.panel.apply(lambda x: x.dtype, axis='items') - expected = DataFrame(np.dtype('float64'), index=self.panel.major_axis, - columns=self.panel.minor_axis) - assert_frame_equal(result, expected) - result = self.panel.apply(lambda x: x.dtype, axis='major_axis') - expected = DataFrame(np.dtype('float64'), index=self.panel.minor_axis, - columns=self.panel.items) - assert_frame_equal(result, expected) - result = self.panel.apply(lambda x: x.dtype, axis='minor_axis') - expected = DataFrame(np.dtype('float64'), index=self.panel.major_axis, - columns=self.panel.items) - assert_frame_equal(result, expected) + with catch_warnings(record=True): + # GH1148 - # reductions via other dims - expected = self.panel.sum(0) - result = self.panel.apply(lambda x: x.sum(), axis='items') - assert_frame_equal(result, expected) - expected = self.panel.sum(1) - result = self.panel.apply(lambda x: x.sum(), axis='major_axis') - assert_frame_equal(result, expected) - expected = self.panel.sum(2) - result = self.panel.apply(lambda x: x.sum(), axis='minor_axis') - assert_frame_equal(result, expected) + # ufunc + applied = self.panel.apply(np.sqrt) + with np.errstate(invalid='ignore'): + expected = np.sqrt(self.panel.values) + assert_almost_equal(applied.values, expected) - # pass kwargs - result = self.panel.apply(lambda x, y: x.sum() + y, axis='items', y=5) - expected = self.panel.sum(0) + 5 - assert_frame_equal(result, expected) + # ufunc same shape + result = self.panel.apply(lambda x: x * 2, axis='items') + expected = self.panel * 2 + assert_panel_equal(result, expected) + result = self.panel.apply(lambda x: x * 2, axis='major_axis') + expected = self.panel * 2 + assert_panel_equal(result, expected) + result = self.panel.apply(lambda x: x * 2, axis='minor_axis') + expected = self.panel * 2 + assert_panel_equal(result, expected) + + # reduction to DataFrame + result = self.panel.apply(lambda x: x.dtype, axis='items') + expected = DataFrame(np.dtype('float64'), + index=self.panel.major_axis, + columns=self.panel.minor_axis) + assert_frame_equal(result, expected) + result = self.panel.apply(lambda x: x.dtype, axis='major_axis') + expected = DataFrame(np.dtype('float64'), + index=self.panel.minor_axis, + columns=self.panel.items) + assert_frame_equal(result, expected) + result = self.panel.apply(lambda x: x.dtype, axis='minor_axis') + expected = DataFrame(np.dtype('float64'), + index=self.panel.major_axis, + columns=self.panel.items) + assert_frame_equal(result, expected) + + # reductions via other dims + expected = self.panel.sum(0) + result = self.panel.apply(lambda x: x.sum(), axis='items') + assert_frame_equal(result, expected) + expected = self.panel.sum(1) + result = self.panel.apply(lambda x: x.sum(), axis='major_axis') + assert_frame_equal(result, expected) + expected = self.panel.sum(2) + result = self.panel.apply(lambda x: x.sum(), axis='minor_axis') + assert_frame_equal(result, expected) + + # pass kwargs + result = self.panel.apply( + lambda x, y: x.sum() + y, axis='items', y=5) + expected = self.panel.sum(0) + 5 + assert_frame_equal(result, expected) def test_apply_slabs(self): + with catch_warnings(record=True): - # same shape as original - result = self.panel.apply(lambda x: x * 2, - axis=['items', 'major_axis']) - expected = (self.panel * 2).transpose('minor_axis', 'major_axis', - 'items') - assert_panel_equal(result, expected) - result = self.panel.apply(lambda x: x * 2, - axis=['major_axis', 'items']) - assert_panel_equal(result, expected) - - result = self.panel.apply(lambda x: x * 2, - axis=['items', 'minor_axis']) - expected = (self.panel * 2).transpose('major_axis', 'minor_axis', - 'items') - assert_panel_equal(result, expected) - result = self.panel.apply(lambda x: x * 2, - axis=['minor_axis', 'items']) - assert_panel_equal(result, expected) - - result = self.panel.apply(lambda x: x * 2, - axis=['major_axis', 'minor_axis']) - expected = self.panel * 2 - assert_panel_equal(result, expected) - result = self.panel.apply(lambda x: x * 2, - axis=['minor_axis', 'major_axis']) - assert_panel_equal(result, expected) - - # reductions - result = self.panel.apply(lambda x: x.sum(0), axis=[ - 'items', 'major_axis' - ]) - expected = self.panel.sum(1).T - assert_frame_equal(result, expected) + # same shape as original + result = self.panel.apply(lambda x: x * 2, + axis=['items', 'major_axis']) + expected = (self.panel * 2).transpose('minor_axis', 'major_axis', + 'items') + assert_panel_equal(result, expected) + result = self.panel.apply(lambda x: x * 2, + axis=['major_axis', 'items']) + assert_panel_equal(result, expected) - result = self.panel.apply(lambda x: x.sum(1), axis=[ - 'items', 'major_axis' - ]) - expected = self.panel.sum(0) - assert_frame_equal(result, expected) + result = self.panel.apply(lambda x: x * 2, + axis=['items', 'minor_axis']) + expected = (self.panel * 2).transpose('major_axis', 'minor_axis', + 'items') + assert_panel_equal(result, expected) + result = self.panel.apply(lambda x: x * 2, + axis=['minor_axis', 'items']) + assert_panel_equal(result, expected) + + result = self.panel.apply(lambda x: x * 2, + axis=['major_axis', 'minor_axis']) + expected = self.panel * 2 + assert_panel_equal(result, expected) + result = self.panel.apply(lambda x: x * 2, + axis=['minor_axis', 'major_axis']) + assert_panel_equal(result, expected) - # transforms - f = lambda x: ((x.T - x.mean(1)) / x.std(1)).T + # reductions + result = self.panel.apply(lambda x: x.sum(0), axis=[ + 'items', 'major_axis' + ]) + expected = self.panel.sum(1).T + assert_frame_equal(result, expected) # make sure that we don't trigger any warnings with catch_warnings(record=True): + result = self.panel.apply(lambda x: x.sum(1), axis=[ + 'items', 'major_axis' + ]) + expected = self.panel.sum(0) + assert_frame_equal(result, expected) + + # transforms + f = lambda x: ((x.T - x.mean(1)) / x.std(1)).T + + # make sure that we don't trigger any warnings result = self.panel.apply(f, axis=['items', 'major_axis']) expected = Panel(dict([(ax, f(self.panel.loc[:, :, ax])) for ax in self.panel.minor_axis])) assert_panel_equal(result, expected) - result = self.panel.apply(f, axis=['major_axis', 'minor_axis']) - expected = Panel(dict([(ax, f(self.panel.loc[ax])) - for ax in self.panel.items])) - assert_panel_equal(result, expected) - - result = self.panel.apply(f, axis=['minor_axis', 'items']) - expected = Panel(dict([(ax, f(self.panel.loc[:, ax])) - for ax in self.panel.major_axis])) - assert_panel_equal(result, expected) - - # with multi-indexes - # GH7469 - index = MultiIndex.from_tuples([('one', 'a'), ('one', 'b'), ( - 'two', 'a'), ('two', 'b')]) - dfa = DataFrame(np.array(np.arange(12, dtype='int64')).reshape( - 4, 3), columns=list("ABC"), index=index) - dfb = DataFrame(np.array(np.arange(10, 22, dtype='int64')).reshape( - 4, 3), columns=list("ABC"), index=index) - p = Panel({'f': dfa, 'g': dfb}) - result = p.apply(lambda x: x.sum(), axis=0) - - # on windows this will be in32 - result = result.astype('int64') - expected = p.sum(0) - assert_frame_equal(result, expected) + result = self.panel.apply(f, axis=['major_axis', 'minor_axis']) + expected = Panel(dict([(ax, f(self.panel.loc[ax])) + for ax in self.panel.items])) + assert_panel_equal(result, expected) + + result = self.panel.apply(f, axis=['minor_axis', 'items']) + expected = Panel(dict([(ax, f(self.panel.loc[:, ax])) + for ax in self.panel.major_axis])) + assert_panel_equal(result, expected) + + # with multi-indexes + # GH7469 + index = MultiIndex.from_tuples([('one', 'a'), ('one', 'b'), ( + 'two', 'a'), ('two', 'b')]) + dfa = DataFrame(np.array(np.arange(12, dtype='int64')).reshape( + 4, 3), columns=list("ABC"), index=index) + dfb = DataFrame(np.array(np.arange(10, 22, dtype='int64')).reshape( + 4, 3), columns=list("ABC"), index=index) + p = Panel({'f': dfa, 'g': dfb}) + result = p.apply(lambda x: x.sum(), axis=0) + + # on windows this will be in32 + result = result.astype('int64') + expected = p.sum(0) + assert_frame_equal(result, expected) def test_apply_no_or_zero_ndim(self): - # GH10332 - self.panel = Panel(np.random.rand(5, 5, 5)) + with catch_warnings(record=True): + # GH10332 + self.panel = Panel(np.random.rand(5, 5, 5)) - result_int = self.panel.apply(lambda df: 0, axis=[1, 2]) - result_float = self.panel.apply(lambda df: 0.0, axis=[1, 2]) - result_int64 = self.panel.apply(lambda df: np.int64(0), axis=[1, 2]) - result_float64 = self.panel.apply(lambda df: np.float64(0.0), - axis=[1, 2]) + result_int = self.panel.apply(lambda df: 0, axis=[1, 2]) + result_float = self.panel.apply(lambda df: 0.0, axis=[1, 2]) + result_int64 = self.panel.apply( + lambda df: np.int64(0), axis=[1, 2]) + result_float64 = self.panel.apply(lambda df: np.float64(0.0), + axis=[1, 2]) - expected_int = expected_int64 = Series([0] * 5) - expected_float = expected_float64 = Series([0.0] * 5) + expected_int = expected_int64 = Series([0] * 5) + expected_float = expected_float64 = Series([0.0] * 5) - assert_series_equal(result_int, expected_int) - assert_series_equal(result_int64, expected_int64) - assert_series_equal(result_float, expected_float) - assert_series_equal(result_float64, expected_float64) + assert_series_equal(result_int, expected_int) + assert_series_equal(result_int64, expected_int64) + assert_series_equal(result_float, expected_float) + assert_series_equal(result_float64, expected_float64) def test_reindex(self): - ref = self.panel['ItemB'] + with catch_warnings(record=True): + ref = self.panel['ItemB'] - # items - result = self.panel.reindex(items=['ItemA', 'ItemB']) - assert_frame_equal(result['ItemB'], ref) + # items + result = self.panel.reindex(items=['ItemA', 'ItemB']) + assert_frame_equal(result['ItemB'], ref) - # major - new_major = list(self.panel.major_axis[:10]) - result = self.panel.reindex(major=new_major) - assert_frame_equal(result['ItemB'], ref.reindex(index=new_major)) + # major + new_major = list(self.panel.major_axis[:10]) + result = self.panel.reindex(major=new_major) + assert_frame_equal(result['ItemB'], ref.reindex(index=new_major)) - # raise exception put both major and major_axis - self.assertRaises(Exception, self.panel.reindex, major_axis=new_major, - major=new_major) + # raise exception put both major and major_axis + self.assertRaises(Exception, self.panel.reindex, + major_axis=new_major, + major=new_major) - # minor - new_minor = list(self.panel.minor_axis[:2]) - result = self.panel.reindex(minor=new_minor) - assert_frame_equal(result['ItemB'], ref.reindex(columns=new_minor)) + # minor + new_minor = list(self.panel.minor_axis[:2]) + result = self.panel.reindex(minor=new_minor) + assert_frame_equal(result['ItemB'], ref.reindex(columns=new_minor)) - # this ok - result = self.panel.reindex() - assert_panel_equal(result, self.panel) - self.assertFalse(result is self.panel) + # this ok + result = self.panel.reindex() + assert_panel_equal(result, self.panel) + self.assertFalse(result is self.panel) - # with filling - smaller_major = self.panel.major_axis[::5] - smaller = self.panel.reindex(major=smaller_major) + # with filling + smaller_major = self.panel.major_axis[::5] + smaller = self.panel.reindex(major=smaller_major) - larger = smaller.reindex(major=self.panel.major_axis, method='pad') + larger = smaller.reindex(major=self.panel.major_axis, method='pad') - assert_frame_equal(larger.major_xs(self.panel.major_axis[1]), - smaller.major_xs(smaller_major[0])) + assert_frame_equal(larger.major_xs(self.panel.major_axis[1]), + smaller.major_xs(smaller_major[0])) - # don't necessarily copy - result = self.panel.reindex(major=self.panel.major_axis, copy=False) - assert_panel_equal(result, self.panel) - self.assertTrue(result is self.panel) + # don't necessarily copy + result = self.panel.reindex( + major=self.panel.major_axis, copy=False) + assert_panel_equal(result, self.panel) + self.assertTrue(result is self.panel) def test_reindex_multi(self): + with catch_warnings(record=True): - # with and without copy full reindexing - result = self.panel.reindex(items=self.panel.items, - major=self.panel.major_axis, - minor=self.panel.minor_axis, copy=False) - - self.assertIs(result.items, self.panel.items) - self.assertIs(result.major_axis, self.panel.major_axis) - self.assertIs(result.minor_axis, self.panel.minor_axis) - - result = self.panel.reindex(items=self.panel.items, - major=self.panel.major_axis, - minor=self.panel.minor_axis, copy=False) - assert_panel_equal(result, self.panel) - - # multi-axis indexing consistency - # GH 5900 - df = DataFrame(np.random.randn(4, 3)) - p = Panel({'Item1': df}) - expected = Panel({'Item1': df}) - expected['Item2'] = np.nan - - items = ['Item1', 'Item2'] - major_axis = np.arange(4) - minor_axis = np.arange(3) - - results = [] - results.append(p.reindex(items=items, major_axis=major_axis, - copy=True)) - results.append(p.reindex(items=items, major_axis=major_axis, - copy=False)) - results.append(p.reindex(items=items, minor_axis=minor_axis, - copy=True)) - results.append(p.reindex(items=items, minor_axis=minor_axis, - copy=False)) - results.append(p.reindex(items=items, major_axis=major_axis, - minor_axis=minor_axis, copy=True)) - results.append(p.reindex(items=items, major_axis=major_axis, - minor_axis=minor_axis, copy=False)) - - for i, r in enumerate(results): - assert_panel_equal(expected, r) + # with and without copy full reindexing + result = self.panel.reindex( + items=self.panel.items, + major=self.panel.major_axis, + minor=self.panel.minor_axis, copy=False) + + self.assertIs(result.items, self.panel.items) + self.assertIs(result.major_axis, self.panel.major_axis) + self.assertIs(result.minor_axis, self.panel.minor_axis) + + result = self.panel.reindex( + items=self.panel.items, + major=self.panel.major_axis, + minor=self.panel.minor_axis, copy=False) + assert_panel_equal(result, self.panel) + + # multi-axis indexing consistency + # GH 5900 + df = DataFrame(np.random.randn(4, 3)) + p = Panel({'Item1': df}) + expected = Panel({'Item1': df}) + expected['Item2'] = np.nan + + items = ['Item1', 'Item2'] + major_axis = np.arange(4) + minor_axis = np.arange(3) + + results = [] + results.append(p.reindex(items=items, major_axis=major_axis, + copy=True)) + results.append(p.reindex(items=items, major_axis=major_axis, + copy=False)) + results.append(p.reindex(items=items, minor_axis=minor_axis, + copy=True)) + results.append(p.reindex(items=items, minor_axis=minor_axis, + copy=False)) + results.append(p.reindex(items=items, major_axis=major_axis, + minor_axis=minor_axis, copy=True)) + results.append(p.reindex(items=items, major_axis=major_axis, + minor_axis=minor_axis, copy=False)) + + for i, r in enumerate(results): + assert_panel_equal(expected, r) def test_reindex_like(self): - # reindex_like - smaller = self.panel.reindex(items=self.panel.items[:-1], - major=self.panel.major_axis[:-1], - minor=self.panel.minor_axis[:-1]) - smaller_like = self.panel.reindex_like(smaller) - assert_panel_equal(smaller, smaller_like) + with catch_warnings(record=True): + # reindex_like + smaller = self.panel.reindex(items=self.panel.items[:-1], + major=self.panel.major_axis[:-1], + minor=self.panel.minor_axis[:-1]) + smaller_like = self.panel.reindex_like(smaller) + assert_panel_equal(smaller, smaller_like) def test_take(self): - # axis == 0 - result = self.panel.take([2, 0, 1], axis=0) - expected = self.panel.reindex(items=['ItemC', 'ItemA', 'ItemB']) - assert_panel_equal(result, expected) + with catch_warnings(record=True): + # axis == 0 + result = self.panel.take([2, 0, 1], axis=0) + expected = self.panel.reindex(items=['ItemC', 'ItemA', 'ItemB']) + assert_panel_equal(result, expected) - # axis >= 1 - result = self.panel.take([3, 0, 1, 2], axis=2) - expected = self.panel.reindex(minor=['D', 'A', 'B', 'C']) - assert_panel_equal(result, expected) + # axis >= 1 + result = self.panel.take([3, 0, 1, 2], axis=2) + expected = self.panel.reindex(minor=['D', 'A', 'B', 'C']) + assert_panel_equal(result, expected) - # neg indicies ok - expected = self.panel.reindex(minor=['D', 'D', 'B', 'C']) - result = self.panel.take([3, -1, 1, 2], axis=2) - assert_panel_equal(result, expected) + # neg indicies ok + expected = self.panel.reindex(minor=['D', 'D', 'B', 'C']) + result = self.panel.take([3, -1, 1, 2], axis=2) + assert_panel_equal(result, expected) - self.assertRaises(Exception, self.panel.take, [4, 0, 1, 2], axis=2) + self.assertRaises(Exception, self.panel.take, [4, 0, 1, 2], axis=2) def test_sort_index(self): - import random - - ritems = list(self.panel.items) - rmajor = list(self.panel.major_axis) - rminor = list(self.panel.minor_axis) - random.shuffle(ritems) - random.shuffle(rmajor) - random.shuffle(rminor) - - random_order = self.panel.reindex(items=ritems) - sorted_panel = random_order.sort_index(axis=0) - assert_panel_equal(sorted_panel, self.panel) - - # descending - random_order = self.panel.reindex(items=ritems) - sorted_panel = random_order.sort_index(axis=0, ascending=False) - assert_panel_equal(sorted_panel, - self.panel.reindex(items=self.panel.items[::-1])) - - random_order = self.panel.reindex(major=rmajor) - sorted_panel = random_order.sort_index(axis=1) - assert_panel_equal(sorted_panel, self.panel) - - random_order = self.panel.reindex(minor=rminor) - sorted_panel = random_order.sort_index(axis=2) - assert_panel_equal(sorted_panel, self.panel) + with catch_warnings(record=True): + import random + + ritems = list(self.panel.items) + rmajor = list(self.panel.major_axis) + rminor = list(self.panel.minor_axis) + random.shuffle(ritems) + random.shuffle(rmajor) + random.shuffle(rminor) + + random_order = self.panel.reindex(items=ritems) + sorted_panel = random_order.sort_index(axis=0) + assert_panel_equal(sorted_panel, self.panel) + + # descending + random_order = self.panel.reindex(items=ritems) + sorted_panel = random_order.sort_index(axis=0, ascending=False) + assert_panel_equal( + sorted_panel, + self.panel.reindex(items=self.panel.items[::-1])) + + random_order = self.panel.reindex(major=rmajor) + sorted_panel = random_order.sort_index(axis=1) + assert_panel_equal(sorted_panel, self.panel) + + random_order = self.panel.reindex(minor=rminor) + sorted_panel = random_order.sort_index(axis=2) + assert_panel_equal(sorted_panel, self.panel) def test_fillna(self): - filled = self.panel.fillna(0) - self.assertTrue(np.isfinite(filled.values).all()) - - filled = self.panel.fillna(method='backfill') - assert_frame_equal(filled['ItemA'], - self.panel['ItemA'].fillna(method='backfill')) - - panel = self.panel.copy() - panel['str'] = 'foo' - - filled = panel.fillna(method='backfill') - assert_frame_equal(filled['ItemA'], - panel['ItemA'].fillna(method='backfill')) - - empty = self.panel.reindex(items=[]) - filled = empty.fillna(0) - assert_panel_equal(filled, empty) - - self.assertRaises(ValueError, self.panel.fillna) - self.assertRaises(ValueError, self.panel.fillna, 5, method='ffill') - - self.assertRaises(TypeError, self.panel.fillna, [1, 2]) - self.assertRaises(TypeError, self.panel.fillna, (1, 2)) - - # limit not implemented when only value is specified - p = Panel(np.random.randn(3, 4, 5)) - p.iloc[0:2, 0:2, 0:2] = np.nan - self.assertRaises(NotImplementedError, lambda: p.fillna(999, limit=1)) - - # Test in place fillNA - # Expected result - expected = Panel([[[0, 1], [2, 1]], [[10, 11], [12, 11]]], - items=['a', 'b'], minor_axis=['x', 'y'], - dtype=np.float64) - # method='ffill' - p1 = Panel([[[0, 1], [2, np.nan]], [[10, 11], [12, np.nan]]], - items=['a', 'b'], minor_axis=['x', 'y'], - dtype=np.float64) - p1.fillna(method='ffill', inplace=True) - assert_panel_equal(p1, expected) - - # method='bfill' - p2 = Panel([[[0, np.nan], [2, 1]], [[10, np.nan], [12, 11]]], - items=['a', 'b'], minor_axis=['x', 'y'], dtype=np.float64) - p2.fillna(method='bfill', inplace=True) - assert_panel_equal(p2, expected) + with catch_warnings(record=True): + filled = self.panel.fillna(0) + self.assertTrue(np.isfinite(filled.values).all()) + + filled = self.panel.fillna(method='backfill') + assert_frame_equal(filled['ItemA'], + self.panel['ItemA'].fillna(method='backfill')) + + panel = self.panel.copy() + panel['str'] = 'foo' + + filled = panel.fillna(method='backfill') + assert_frame_equal(filled['ItemA'], + panel['ItemA'].fillna(method='backfill')) + + empty = self.panel.reindex(items=[]) + filled = empty.fillna(0) + assert_panel_equal(filled, empty) + + self.assertRaises(ValueError, self.panel.fillna) + self.assertRaises(ValueError, self.panel.fillna, 5, method='ffill') + + self.assertRaises(TypeError, self.panel.fillna, [1, 2]) + self.assertRaises(TypeError, self.panel.fillna, (1, 2)) + + # limit not implemented when only value is specified + p = Panel(np.random.randn(3, 4, 5)) + p.iloc[0:2, 0:2, 0:2] = np.nan + self.assertRaises(NotImplementedError, + lambda: p.fillna(999, limit=1)) + + # Test in place fillNA + # Expected result + expected = Panel([[[0, 1], [2, 1]], [[10, 11], [12, 11]]], + items=['a', 'b'], minor_axis=['x', 'y'], + dtype=np.float64) + # method='ffill' + p1 = Panel([[[0, 1], [2, np.nan]], [[10, 11], [12, np.nan]]], + items=['a', 'b'], minor_axis=['x', 'y'], + dtype=np.float64) + p1.fillna(method='ffill', inplace=True) + assert_panel_equal(p1, expected) + + # method='bfill' + p2 = Panel([[[0, np.nan], [2, 1]], [[10, np.nan], [12, 11]]], + items=['a', 'b'], minor_axis=['x', 'y'], + dtype=np.float64) + p2.fillna(method='bfill', inplace=True) + assert_panel_equal(p2, expected) def test_ffill_bfill(self): - assert_panel_equal(self.panel.ffill(), - self.panel.fillna(method='ffill')) - assert_panel_equal(self.panel.bfill(), - self.panel.fillna(method='bfill')) + with catch_warnings(record=True): + assert_panel_equal(self.panel.ffill(), + self.panel.fillna(method='ffill')) + assert_panel_equal(self.panel.bfill(), + self.panel.fillna(method='bfill')) def test_truncate_fillna_bug(self): - # #1823 - result = self.panel.truncate(before=None, after=None, axis='items') + with catch_warnings(record=True): + # #1823 + result = self.panel.truncate(before=None, after=None, axis='items') - # it works! - result.fillna(value=0.0) + # it works! + result.fillna(value=0.0) def test_swapaxes(self): - result = self.panel.swapaxes('items', 'minor') - self.assertIs(result.items, self.panel.minor_axis) + with catch_warnings(record=True): + result = self.panel.swapaxes('items', 'minor') + self.assertIs(result.items, self.panel.minor_axis) - result = self.panel.swapaxes('items', 'major') - self.assertIs(result.items, self.panel.major_axis) + result = self.panel.swapaxes('items', 'major') + self.assertIs(result.items, self.panel.major_axis) - result = self.panel.swapaxes('major', 'minor') - self.assertIs(result.major_axis, self.panel.minor_axis) + result = self.panel.swapaxes('major', 'minor') + self.assertIs(result.major_axis, self.panel.minor_axis) - panel = self.panel.copy() - result = panel.swapaxes('major', 'minor') - panel.values[0, 0, 1] = np.nan - expected = panel.swapaxes('major', 'minor') - assert_panel_equal(result, expected) + panel = self.panel.copy() + result = panel.swapaxes('major', 'minor') + panel.values[0, 0, 1] = np.nan + expected = panel.swapaxes('major', 'minor') + assert_panel_equal(result, expected) - # this should also work - result = self.panel.swapaxes(0, 1) - self.assertIs(result.items, self.panel.major_axis) + # this should also work + result = self.panel.swapaxes(0, 1) + self.assertIs(result.items, self.panel.major_axis) - # this works, but return a copy - result = self.panel.swapaxes('items', 'items') - assert_panel_equal(self.panel, result) - self.assertNotEqual(id(self.panel), id(result)) + # this works, but return a copy + result = self.panel.swapaxes('items', 'items') + assert_panel_equal(self.panel, result) + self.assertNotEqual(id(self.panel), id(result)) def test_transpose(self): - result = self.panel.transpose('minor', 'major', 'items') - expected = self.panel.swapaxes('items', 'minor') - assert_panel_equal(result, expected) - - # test kwargs - result = self.panel.transpose(items='minor', major='major', - minor='items') - expected = self.panel.swapaxes('items', 'minor') - assert_panel_equal(result, expected) - - # text mixture of args - result = self.panel.transpose('minor', major='major', minor='items') - expected = self.panel.swapaxes('items', 'minor') - assert_panel_equal(result, expected) - - result = self.panel.transpose('minor', 'major', minor='items') - expected = self.panel.swapaxes('items', 'minor') - assert_panel_equal(result, expected) - - # duplicate axes - with tm.assertRaisesRegexp(TypeError, - 'not enough/duplicate arguments'): - self.panel.transpose('minor', maj='major', minor='items') + with catch_warnings(record=True): + result = self.panel.transpose('minor', 'major', 'items') + expected = self.panel.swapaxes('items', 'minor') + assert_panel_equal(result, expected) - with tm.assertRaisesRegexp(ValueError, 'repeated axis in transpose'): - self.panel.transpose('minor', 'major', major='minor', - minor='items') + # test kwargs + result = self.panel.transpose(items='minor', major='major', + minor='items') + expected = self.panel.swapaxes('items', 'minor') + assert_panel_equal(result, expected) - result = self.panel.transpose(2, 1, 0) - assert_panel_equal(result, expected) + # text mixture of args + result = self.panel.transpose( + 'minor', major='major', minor='items') + expected = self.panel.swapaxes('items', 'minor') + assert_panel_equal(result, expected) - result = self.panel.transpose('minor', 'items', 'major') - expected = self.panel.swapaxes('items', 'minor') - expected = expected.swapaxes('major', 'minor') - assert_panel_equal(result, expected) + result = self.panel.transpose('minor', + 'major', + minor='items') + expected = self.panel.swapaxes('items', 'minor') + assert_panel_equal(result, expected) - result = self.panel.transpose(2, 0, 1) - assert_panel_equal(result, expected) + # duplicate axes + with tm.assertRaisesRegexp(TypeError, + 'not enough/duplicate arguments'): + self.panel.transpose('minor', maj='major', minor='items') - self.assertRaises(ValueError, self.panel.transpose, 0, 0, 1) + with tm.assertRaisesRegexp(ValueError, + 'repeated axis in transpose'): + self.panel.transpose('minor', 'major', major='minor', + minor='items') + + result = self.panel.transpose(2, 1, 0) + assert_panel_equal(result, expected) + + result = self.panel.transpose('minor', 'items', 'major') + expected = self.panel.swapaxes('items', 'minor') + expected = expected.swapaxes('major', 'minor') + assert_panel_equal(result, expected) + + result = self.panel.transpose(2, 0, 1) + assert_panel_equal(result, expected) + + self.assertRaises(ValueError, self.panel.transpose, 0, 0, 1) def test_transpose_copy(self): - panel = self.panel.copy() - result = panel.transpose(2, 0, 1, copy=True) - expected = panel.swapaxes('items', 'minor') - expected = expected.swapaxes('major', 'minor') - assert_panel_equal(result, expected) + with catch_warnings(record=True): + panel = self.panel.copy() + result = panel.transpose(2, 0, 1, copy=True) + expected = panel.swapaxes('items', 'minor') + expected = expected.swapaxes('major', 'minor') + assert_panel_equal(result, expected) - panel.values[0, 1, 1] = np.nan - self.assertTrue(notnull(result.values[1, 0, 1])) + panel.values[0, 1, 1] = np.nan + self.assertTrue(notnull(result.values[1, 0, 1])) def test_to_frame(self): - # filtered - filtered = self.panel.to_frame() - expected = self.panel.to_frame().dropna(how='any') - assert_frame_equal(filtered, expected) - - # unfiltered - unfiltered = self.panel.to_frame(filter_observations=False) - assert_panel_equal(unfiltered.to_panel(), self.panel) - - # names - self.assertEqual(unfiltered.index.names, ('major', 'minor')) - - # unsorted, round trip - df = self.panel.to_frame(filter_observations=False) - unsorted = df.take(np.random.permutation(len(df))) - pan = unsorted.to_panel() - assert_panel_equal(pan, self.panel) - - # preserve original index names - df = DataFrame(np.random.randn(6, 2), - index=[['a', 'a', 'b', 'b', 'c', 'c'], - [0, 1, 0, 1, 0, 1]], - columns=['one', 'two']) - df.index.names = ['foo', 'bar'] - df.columns.name = 'baz' - - rdf = df.to_panel().to_frame() - self.assertEqual(rdf.index.names, df.index.names) - self.assertEqual(rdf.columns.names, df.columns.names) + with catch_warnings(record=True): + # filtered + filtered = self.panel.to_frame() + expected = self.panel.to_frame().dropna(how='any') + assert_frame_equal(filtered, expected) + + # unfiltered + unfiltered = self.panel.to_frame(filter_observations=False) + assert_panel_equal(unfiltered.to_panel(), self.panel) + + # names + self.assertEqual(unfiltered.index.names, ('major', 'minor')) + + # unsorted, round trip + df = self.panel.to_frame(filter_observations=False) + unsorted = df.take(np.random.permutation(len(df))) + pan = unsorted.to_panel() + assert_panel_equal(pan, self.panel) + + # preserve original index names + df = DataFrame(np.random.randn(6, 2), + index=[['a', 'a', 'b', 'b', 'c', 'c'], + [0, 1, 0, 1, 0, 1]], + columns=['one', 'two']) + df.index.names = ['foo', 'bar'] + df.columns.name = 'baz' + + rdf = df.to_panel().to_frame() + self.assertEqual(rdf.index.names, df.index.names) + self.assertEqual(rdf.columns.names, df.columns.names) def test_to_frame_mixed(self): - panel = self.panel.fillna(0) - panel['str'] = 'foo' - panel['bool'] = panel['ItemA'] > 0 - - lp = panel.to_frame() - wp = lp.to_panel() - self.assertEqual(wp['bool'].values.dtype, np.bool_) - # Previously, this was mutating the underlying index and changing its - # name - assert_frame_equal(wp['bool'], panel['bool'], check_names=False) - - # GH 8704 - # with categorical - df = panel.to_frame() - df['category'] = df['str'].astype('category') - - # to_panel - # TODO: this converts back to object - p = df.to_panel() - expected = panel.copy() - expected['category'] = 'foo' - assert_panel_equal(p, expected) + with catch_warnings(record=True): + panel = self.panel.fillna(0) + panel['str'] = 'foo' + panel['bool'] = panel['ItemA'] > 0 + + lp = panel.to_frame() + wp = lp.to_panel() + self.assertEqual(wp['bool'].values.dtype, np.bool_) + # Previously, this was mutating the underlying + # index and changing its name + assert_frame_equal(wp['bool'], panel['bool'], check_names=False) + + # GH 8704 + # with categorical + df = panel.to_frame() + df['category'] = df['str'].astype('category') + + # to_panel + # TODO: this converts back to object + p = df.to_panel() + expected = panel.copy() + expected['category'] = 'foo' + assert_panel_equal(p, expected) def test_to_frame_multi_major(self): - idx = MultiIndex.from_tuples([(1, 'one'), (1, 'two'), (2, 'one'), ( - 2, 'two')]) - df = DataFrame([[1, 'a', 1], [2, 'b', 1], [3, 'c', 1], [4, 'd', 1]], - columns=['A', 'B', 'C'], index=idx) - wp = Panel({'i1': df, 'i2': df}) - expected_idx = MultiIndex.from_tuples( - [ - (1, 'one', 'A'), (1, 'one', 'B'), - (1, 'one', 'C'), (1, 'two', 'A'), - (1, 'two', 'B'), (1, 'two', 'C'), - (2, 'one', 'A'), (2, 'one', 'B'), - (2, 'one', 'C'), (2, 'two', 'A'), - (2, 'two', 'B'), (2, 'two', 'C') - ], - names=[None, None, 'minor']) - expected = DataFrame({'i1': [1, 'a', 1, 2, 'b', 1, 3, - 'c', 1, 4, 'd', 1], - 'i2': [1, 'a', 1, 2, 'b', - 1, 3, 'c', 1, 4, 'd', 1]}, - index=expected_idx) - result = wp.to_frame() - assert_frame_equal(result, expected) - - wp.iloc[0, 0].iloc[0] = np.nan # BUG on setting. GH #5773 - result = wp.to_frame() - assert_frame_equal(result, expected[1:]) - - idx = MultiIndex.from_tuples([(1, 'two'), (1, 'one'), (2, 'one'), ( - np.nan, 'two')]) - df = DataFrame([[1, 'a', 1], [2, 'b', 1], [3, 'c', 1], [4, 'd', 1]], - columns=['A', 'B', 'C'], index=idx) - wp = Panel({'i1': df, 'i2': df}) - ex_idx = MultiIndex.from_tuples([(1, 'two', 'A'), (1, 'two', 'B'), - (1, 'two', 'C'), - (1, 'one', 'A'), - (1, 'one', 'B'), - (1, 'one', 'C'), - (2, 'one', 'A'), - (2, 'one', 'B'), - (2, 'one', 'C'), - (np.nan, 'two', 'A'), - (np.nan, 'two', 'B'), - (np.nan, 'two', 'C')], - names=[None, None, 'minor']) - expected.index = ex_idx - result = wp.to_frame() - assert_frame_equal(result, expected) + with catch_warnings(record=True): + idx = MultiIndex.from_tuples( + [(1, 'one'), (1, 'two'), (2, 'one'), (2, 'two')]) + df = DataFrame([[1, 'a', 1], [2, 'b', 1], + [3, 'c', 1], [4, 'd', 1]], + columns=['A', 'B', 'C'], index=idx) + wp = Panel({'i1': df, 'i2': df}) + expected_idx = MultiIndex.from_tuples( + [ + (1, 'one', 'A'), (1, 'one', 'B'), + (1, 'one', 'C'), (1, 'two', 'A'), + (1, 'two', 'B'), (1, 'two', 'C'), + (2, 'one', 'A'), (2, 'one', 'B'), + (2, 'one', 'C'), (2, 'two', 'A'), + (2, 'two', 'B'), (2, 'two', 'C') + ], + names=[None, None, 'minor']) + expected = DataFrame({'i1': [1, 'a', 1, 2, 'b', 1, 3, + 'c', 1, 4, 'd', 1], + 'i2': [1, 'a', 1, 2, 'b', + 1, 3, 'c', 1, 4, 'd', 1]}, + index=expected_idx) + result = wp.to_frame() + assert_frame_equal(result, expected) + + wp.iloc[0, 0].iloc[0] = np.nan # BUG on setting. GH #5773 + result = wp.to_frame() + assert_frame_equal(result, expected[1:]) + + idx = MultiIndex.from_tuples( + [(1, 'two'), (1, 'one'), (2, 'one'), (np.nan, 'two')]) + df = DataFrame([[1, 'a', 1], [2, 'b', 1], + [3, 'c', 1], [4, 'd', 1]], + columns=['A', 'B', 'C'], index=idx) + wp = Panel({'i1': df, 'i2': df}) + ex_idx = MultiIndex.from_tuples([(1, 'two', 'A'), (1, 'two', 'B'), + (1, 'two', 'C'), + (1, 'one', 'A'), + (1, 'one', 'B'), + (1, 'one', 'C'), + (2, 'one', 'A'), + (2, 'one', 'B'), + (2, 'one', 'C'), + (np.nan, 'two', 'A'), + (np.nan, 'two', 'B'), + (np.nan, 'two', 'C')], + names=[None, None, 'minor']) + expected.index = ex_idx + result = wp.to_frame() + assert_frame_equal(result, expected) def test_to_frame_multi_major_minor(self): - cols = MultiIndex(levels=[['C_A', 'C_B'], ['C_1', 'C_2']], - labels=[[0, 0, 1, 1], [0, 1, 0, 1]]) - idx = MultiIndex.from_tuples([(1, 'one'), (1, 'two'), (2, 'one'), ( - 2, 'two'), (3, 'three'), (4, 'four')]) - df = DataFrame([[1, 2, 11, 12], [3, 4, 13, 14], - ['a', 'b', 'w', 'x'], - ['c', 'd', 'y', 'z'], [-1, -2, -3, -4], - [-5, -6, -7, -8]], columns=cols, index=idx) - wp = Panel({'i1': df, 'i2': df}) - - exp_idx = MultiIndex.from_tuples( - [(1, 'one', 'C_A', 'C_1'), (1, 'one', 'C_A', 'C_2'), - (1, 'one', 'C_B', 'C_1'), (1, 'one', 'C_B', 'C_2'), - (1, 'two', 'C_A', 'C_1'), (1, 'two', 'C_A', 'C_2'), - (1, 'two', 'C_B', 'C_1'), (1, 'two', 'C_B', 'C_2'), - (2, 'one', 'C_A', 'C_1'), (2, 'one', 'C_A', 'C_2'), - (2, 'one', 'C_B', 'C_1'), (2, 'one', 'C_B', 'C_2'), - (2, 'two', 'C_A', 'C_1'), (2, 'two', 'C_A', 'C_2'), - (2, 'two', 'C_B', 'C_1'), (2, 'two', 'C_B', 'C_2'), - (3, 'three', 'C_A', 'C_1'), (3, 'three', 'C_A', 'C_2'), - (3, 'three', 'C_B', 'C_1'), (3, 'three', 'C_B', 'C_2'), - (4, 'four', 'C_A', 'C_1'), (4, 'four', 'C_A', 'C_2'), - (4, 'four', 'C_B', 'C_1'), (4, 'four', 'C_B', 'C_2')], - names=[None, None, None, None]) - exp_val = [[1, 1], [2, 2], [11, 11], [12, 12], [3, 3], [4, 4], - [13, 13], [14, 14], ['a', 'a'], ['b', 'b'], ['w', 'w'], - ['x', 'x'], ['c', 'c'], ['d', 'd'], ['y', 'y'], ['z', 'z'], - [-1, -1], [-2, -2], [-3, -3], [-4, -4], [-5, -5], [-6, -6], - [-7, -7], [-8, -8]] - result = wp.to_frame() - expected = DataFrame(exp_val, columns=['i1', 'i2'], index=exp_idx) - assert_frame_equal(result, expected) + with catch_warnings(record=True): + cols = MultiIndex(levels=[['C_A', 'C_B'], ['C_1', 'C_2']], + labels=[[0, 0, 1, 1], [0, 1, 0, 1]]) + idx = MultiIndex.from_tuples([(1, 'one'), (1, 'two'), (2, 'one'), ( + 2, 'two'), (3, 'three'), (4, 'four')]) + df = DataFrame([[1, 2, 11, 12], [3, 4, 13, 14], + ['a', 'b', 'w', 'x'], + ['c', 'd', 'y', 'z'], [-1, -2, -3, -4], + [-5, -6, -7, -8]], columns=cols, index=idx) + wp = Panel({'i1': df, 'i2': df}) + + exp_idx = MultiIndex.from_tuples( + [(1, 'one', 'C_A', 'C_1'), (1, 'one', 'C_A', 'C_2'), + (1, 'one', 'C_B', 'C_1'), (1, 'one', 'C_B', 'C_2'), + (1, 'two', 'C_A', 'C_1'), (1, 'two', 'C_A', 'C_2'), + (1, 'two', 'C_B', 'C_1'), (1, 'two', 'C_B', 'C_2'), + (2, 'one', 'C_A', 'C_1'), (2, 'one', 'C_A', 'C_2'), + (2, 'one', 'C_B', 'C_1'), (2, 'one', 'C_B', 'C_2'), + (2, 'two', 'C_A', 'C_1'), (2, 'two', 'C_A', 'C_2'), + (2, 'two', 'C_B', 'C_1'), (2, 'two', 'C_B', 'C_2'), + (3, 'three', 'C_A', 'C_1'), (3, 'three', 'C_A', 'C_2'), + (3, 'three', 'C_B', 'C_1'), (3, 'three', 'C_B', 'C_2'), + (4, 'four', 'C_A', 'C_1'), (4, 'four', 'C_A', 'C_2'), + (4, 'four', 'C_B', 'C_1'), (4, 'four', 'C_B', 'C_2')], + names=[None, None, None, None]) + exp_val = [[1, 1], [2, 2], [11, 11], [12, 12], + [3, 3], [4, 4], + [13, 13], [14, 14], ['a', 'a'], + ['b', 'b'], ['w', 'w'], + ['x', 'x'], ['c', 'c'], ['d', 'd'], [ + 'y', 'y'], ['z', 'z'], + [-1, -1], [-2, -2], [-3, -3], [-4, -4], + [-5, -5], [-6, -6], + [-7, -7], [-8, -8]] + result = wp.to_frame() + expected = DataFrame(exp_val, columns=['i1', 'i2'], index=exp_idx) + assert_frame_equal(result, expected) def test_to_frame_multi_drop_level(self): - idx = MultiIndex.from_tuples([(1, 'one'), (2, 'one'), (2, 'two')]) - df = DataFrame({'A': [np.nan, 1, 2]}, index=idx) - wp = Panel({'i1': df, 'i2': df}) - result = wp.to_frame() - exp_idx = MultiIndex.from_tuples([(2, 'one', 'A'), (2, 'two', 'A')], - names=[None, None, 'minor']) - expected = DataFrame({'i1': [1., 2], 'i2': [1., 2]}, index=exp_idx) - assert_frame_equal(result, expected) + with catch_warnings(record=True): + idx = MultiIndex.from_tuples([(1, 'one'), (2, 'one'), (2, 'two')]) + df = DataFrame({'A': [np.nan, 1, 2]}, index=idx) + wp = Panel({'i1': df, 'i2': df}) + result = wp.to_frame() + exp_idx = MultiIndex.from_tuples( + [(2, 'one', 'A'), (2, 'two', 'A')], + names=[None, None, 'minor']) + expected = DataFrame({'i1': [1., 2], 'i2': [1., 2]}, index=exp_idx) + assert_frame_equal(result, expected) def test_to_panel_na_handling(self): - df = DataFrame(np.random.randint(0, 10, size=20).reshape((10, 2)), - index=[[0, 0, 0, 0, 0, 0, 1, 1, 1, 1], - [0, 1, 2, 3, 4, 5, 2, 3, 4, 5]]) + with catch_warnings(record=True): + df = DataFrame(np.random.randint(0, 10, size=20).reshape((10, 2)), + index=[[0, 0, 0, 0, 0, 0, 1, 1, 1, 1], + [0, 1, 2, 3, 4, 5, 2, 3, 4, 5]]) - panel = df.to_panel() - self.assertTrue(isnull(panel[0].loc[1, [0, 1]]).all()) + panel = df.to_panel() + self.assertTrue(isnull(panel[0].loc[1, [0, 1]]).all()) def test_to_panel_duplicates(self): # #2441 - df = DataFrame({'a': [0, 0, 1], 'b': [1, 1, 1], 'c': [1, 2, 3]}) - idf = df.set_index(['a', 'b']) - assertRaisesRegexp(ValueError, 'non-uniquely indexed', idf.to_panel) + with catch_warnings(record=True): + df = DataFrame({'a': [0, 0, 1], 'b': [1, 1, 1], 'c': [1, 2, 3]}) + idf = df.set_index(['a', 'b']) + assertRaisesRegexp( + ValueError, 'non-uniquely indexed', idf.to_panel) def test_panel_dups(self): + with catch_warnings(record=True): - # GH 4960 - # duplicates in an index + # GH 4960 + # duplicates in an index - # items - data = np.random.randn(5, 100, 5) - no_dup_panel = Panel(data, items=list("ABCDE")) - panel = Panel(data, items=list("AACDE")) + # items + data = np.random.randn(5, 100, 5) + no_dup_panel = Panel(data, items=list("ABCDE")) + panel = Panel(data, items=list("AACDE")) - expected = no_dup_panel['A'] - result = panel.iloc[0] - assert_frame_equal(result, expected) + expected = no_dup_panel['A'] + result = panel.iloc[0] + assert_frame_equal(result, expected) - expected = no_dup_panel['E'] - result = panel.loc['E'] - assert_frame_equal(result, expected) + expected = no_dup_panel['E'] + result = panel.loc['E'] + assert_frame_equal(result, expected) - expected = no_dup_panel.loc[['A', 'B']] - expected.items = ['A', 'A'] - result = panel.loc['A'] - assert_panel_equal(result, expected) + expected = no_dup_panel.loc[['A', 'B']] + expected.items = ['A', 'A'] + result = panel.loc['A'] + assert_panel_equal(result, expected) - # major - data = np.random.randn(5, 5, 5) - no_dup_panel = Panel(data, major_axis=list("ABCDE")) - panel = Panel(data, major_axis=list("AACDE")) + # major + data = np.random.randn(5, 5, 5) + no_dup_panel = Panel(data, major_axis=list("ABCDE")) + panel = Panel(data, major_axis=list("AACDE")) - expected = no_dup_panel.loc[:, 'A'] - result = panel.iloc[:, 0] - assert_frame_equal(result, expected) + expected = no_dup_panel.loc[:, 'A'] + result = panel.iloc[:, 0] + assert_frame_equal(result, expected) - expected = no_dup_panel.loc[:, 'E'] - result = panel.loc[:, 'E'] - assert_frame_equal(result, expected) + expected = no_dup_panel.loc[:, 'E'] + result = panel.loc[:, 'E'] + assert_frame_equal(result, expected) - expected = no_dup_panel.loc[:, ['A', 'B']] - expected.major_axis = ['A', 'A'] - result = panel.loc[:, 'A'] - assert_panel_equal(result, expected) + expected = no_dup_panel.loc[:, ['A', 'B']] + expected.major_axis = ['A', 'A'] + result = panel.loc[:, 'A'] + assert_panel_equal(result, expected) - # minor - data = np.random.randn(5, 100, 5) - no_dup_panel = Panel(data, minor_axis=list("ABCDE")) - panel = Panel(data, minor_axis=list("AACDE")) + # minor + data = np.random.randn(5, 100, 5) + no_dup_panel = Panel(data, minor_axis=list("ABCDE")) + panel = Panel(data, minor_axis=list("AACDE")) - expected = no_dup_panel.loc[:, :, 'A'] - result = panel.iloc[:, :, 0] - assert_frame_equal(result, expected) + expected = no_dup_panel.loc[:, :, 'A'] + result = panel.iloc[:, :, 0] + assert_frame_equal(result, expected) - expected = no_dup_panel.loc[:, :, 'E'] - result = panel.loc[:, :, 'E'] - assert_frame_equal(result, expected) + expected = no_dup_panel.loc[:, :, 'E'] + result = panel.loc[:, :, 'E'] + assert_frame_equal(result, expected) - expected = no_dup_panel.loc[:, :, ['A', 'B']] - expected.minor_axis = ['A', 'A'] - result = panel.loc[:, :, 'A'] - assert_panel_equal(result, expected) + expected = no_dup_panel.loc[:, :, ['A', 'B']] + expected.minor_axis = ['A', 'A'] + result = panel.loc[:, :, 'A'] + assert_panel_equal(result, expected) def test_filter(self): pass def test_compound(self): - compounded = self.panel.compound() + with catch_warnings(record=True): + compounded = self.panel.compound() - assert_series_equal(compounded['ItemA'], - (1 + self.panel['ItemA']).product(0) - 1, - check_names=False) + assert_series_equal(compounded['ItemA'], + (1 + self.panel['ItemA']).product(0) - 1, + check_names=False) def test_shift(self): - # major - idx = self.panel.major_axis[0] - idx_lag = self.panel.major_axis[1] - shifted = self.panel.shift(1) - assert_frame_equal(self.panel.major_xs(idx), shifted.major_xs(idx_lag)) - - # minor - idx = self.panel.minor_axis[0] - idx_lag = self.panel.minor_axis[1] - shifted = self.panel.shift(1, axis='minor') - assert_frame_equal(self.panel.minor_xs(idx), shifted.minor_xs(idx_lag)) - - # items - idx = self.panel.items[0] - idx_lag = self.panel.items[1] - shifted = self.panel.shift(1, axis='items') - assert_frame_equal(self.panel[idx], shifted[idx_lag]) - - # negative numbers, #2164 - result = self.panel.shift(-1) - expected = Panel(dict((i, f.shift(-1)[:-1]) - for i, f in self.panel.iteritems())) - assert_panel_equal(result, expected) - - # mixed dtypes #6959 - data = [('item ' + ch, makeMixedDataFrame()) for ch in list('abcde')] - data = dict(data) - mixed_panel = Panel.from_dict(data, orient='minor') - shifted = mixed_panel.shift(1) - assert_series_equal(mixed_panel.dtypes, shifted.dtypes) + with catch_warnings(record=True): + # major + idx = self.panel.major_axis[0] + idx_lag = self.panel.major_axis[1] + shifted = self.panel.shift(1) + assert_frame_equal(self.panel.major_xs(idx), + shifted.major_xs(idx_lag)) + + # minor + idx = self.panel.minor_axis[0] + idx_lag = self.panel.minor_axis[1] + shifted = self.panel.shift(1, axis='minor') + assert_frame_equal(self.panel.minor_xs(idx), + shifted.minor_xs(idx_lag)) + + # items + idx = self.panel.items[0] + idx_lag = self.panel.items[1] + shifted = self.panel.shift(1, axis='items') + assert_frame_equal(self.panel[idx], shifted[idx_lag]) + + # negative numbers, #2164 + result = self.panel.shift(-1) + expected = Panel(dict((i, f.shift(-1)[:-1]) + for i, f in self.panel.iteritems())) + assert_panel_equal(result, expected) + + # mixed dtypes #6959 + data = [('item ' + ch, makeMixedDataFrame()) + for ch in list('abcde')] + data = dict(data) + mixed_panel = Panel.from_dict(data, orient='minor') + shifted = mixed_panel.shift(1) + assert_series_equal(mixed_panel.dtypes, shifted.dtypes) def test_tshift(self): # PeriodIndex - ps = tm.makePeriodPanel() - shifted = ps.tshift(1) - unshifted = shifted.tshift(-1) + with catch_warnings(record=True): + ps = tm.makePeriodPanel() + shifted = ps.tshift(1) + unshifted = shifted.tshift(-1) - assert_panel_equal(unshifted, ps) + assert_panel_equal(unshifted, ps) - shifted2 = ps.tshift(freq='B') - assert_panel_equal(shifted, shifted2) + shifted2 = ps.tshift(freq='B') + assert_panel_equal(shifted, shifted2) - shifted3 = ps.tshift(freq=BDay()) - assert_panel_equal(shifted, shifted3) + shifted3 = ps.tshift(freq=BDay()) + assert_panel_equal(shifted, shifted3) - assertRaisesRegexp(ValueError, 'does not match', ps.tshift, freq='M') + assertRaisesRegexp(ValueError, 'does not match', + ps.tshift, freq='M') - # DatetimeIndex - panel = _panel - shifted = panel.tshift(1) - unshifted = shifted.tshift(-1) + # DatetimeIndex + panel = make_test_panel() + shifted = panel.tshift(1) + unshifted = shifted.tshift(-1) - assert_panel_equal(panel, unshifted) + assert_panel_equal(panel, unshifted) - shifted2 = panel.tshift(freq=panel.major_axis.freq) - assert_panel_equal(shifted, shifted2) + shifted2 = panel.tshift(freq=panel.major_axis.freq) + assert_panel_equal(shifted, shifted2) - inferred_ts = Panel(panel.values, items=panel.items, - major_axis=Index(np.asarray(panel.major_axis)), - minor_axis=panel.minor_axis) - shifted = inferred_ts.tshift(1) - unshifted = shifted.tshift(-1) - assert_panel_equal(shifted, panel.tshift(1)) - assert_panel_equal(unshifted, inferred_ts) + inferred_ts = Panel(panel.values, items=panel.items, + major_axis=Index(np.asarray(panel.major_axis)), + minor_axis=panel.minor_axis) + shifted = inferred_ts.tshift(1) + unshifted = shifted.tshift(-1) + assert_panel_equal(shifted, panel.tshift(1)) + assert_panel_equal(unshifted, inferred_ts) - no_freq = panel.iloc[:, [0, 5, 7], :] - self.assertRaises(ValueError, no_freq.tshift) + no_freq = panel.iloc[:, [0, 5, 7], :] + self.assertRaises(ValueError, no_freq.tshift) def test_pct_change(self): - df1 = DataFrame({'c1': [1, 2, 5], 'c2': [3, 4, 6]}) - df2 = df1 + 1 - df3 = DataFrame({'c1': [3, 4, 7], 'c2': [5, 6, 8]}) - wp = Panel({'i1': df1, 'i2': df2, 'i3': df3}) - # major, 1 - result = wp.pct_change() # axis='major' - expected = Panel({'i1': df1.pct_change(), - 'i2': df2.pct_change(), - 'i3': df3.pct_change()}) - assert_panel_equal(result, expected) - result = wp.pct_change(axis=1) - assert_panel_equal(result, expected) - # major, 2 - result = wp.pct_change(periods=2) - expected = Panel({'i1': df1.pct_change(2), - 'i2': df2.pct_change(2), - 'i3': df3.pct_change(2)}) - assert_panel_equal(result, expected) - # minor, 1 - result = wp.pct_change(axis='minor') - expected = Panel({'i1': df1.pct_change(axis=1), - 'i2': df2.pct_change(axis=1), - 'i3': df3.pct_change(axis=1)}) - assert_panel_equal(result, expected) - result = wp.pct_change(axis=2) - assert_panel_equal(result, expected) - # minor, 2 - result = wp.pct_change(periods=2, axis='minor') - expected = Panel({'i1': df1.pct_change(periods=2, axis=1), - 'i2': df2.pct_change(periods=2, axis=1), - 'i3': df3.pct_change(periods=2, axis=1)}) - assert_panel_equal(result, expected) - # items, 1 - result = wp.pct_change(axis='items') - expected = Panel({'i1': DataFrame({'c1': [np.nan, np.nan, np.nan], - 'c2': [np.nan, np.nan, np.nan]}), - 'i2': DataFrame({'c1': [1, 0.5, .2], - 'c2': [1. / 3, 0.25, 1. / 6]}), - 'i3': DataFrame({'c1': [.5, 1. / 3, 1. / 6], - 'c2': [.25, .2, 1. / 7]})}) - assert_panel_equal(result, expected) - result = wp.pct_change(axis=0) - assert_panel_equal(result, expected) - # items, 2 - result = wp.pct_change(periods=2, axis='items') - expected = Panel({'i1': DataFrame({'c1': [np.nan, np.nan, np.nan], - 'c2': [np.nan, np.nan, np.nan]}), - 'i2': DataFrame({'c1': [np.nan, np.nan, np.nan], - 'c2': [np.nan, np.nan, np.nan]}), - 'i3': DataFrame({'c1': [2, 1, .4], - 'c2': [2. / 3, .5, 1. / 3]})}) - assert_panel_equal(result, expected) + with catch_warnings(record=True): + df1 = DataFrame({'c1': [1, 2, 5], 'c2': [3, 4, 6]}) + df2 = df1 + 1 + df3 = DataFrame({'c1': [3, 4, 7], 'c2': [5, 6, 8]}) + wp = Panel({'i1': df1, 'i2': df2, 'i3': df3}) + # major, 1 + result = wp.pct_change() # axis='major' + expected = Panel({'i1': df1.pct_change(), + 'i2': df2.pct_change(), + 'i3': df3.pct_change()}) + assert_panel_equal(result, expected) + result = wp.pct_change(axis=1) + assert_panel_equal(result, expected) + # major, 2 + result = wp.pct_change(periods=2) + expected = Panel({'i1': df1.pct_change(2), + 'i2': df2.pct_change(2), + 'i3': df3.pct_change(2)}) + assert_panel_equal(result, expected) + # minor, 1 + result = wp.pct_change(axis='minor') + expected = Panel({'i1': df1.pct_change(axis=1), + 'i2': df2.pct_change(axis=1), + 'i3': df3.pct_change(axis=1)}) + assert_panel_equal(result, expected) + result = wp.pct_change(axis=2) + assert_panel_equal(result, expected) + # minor, 2 + result = wp.pct_change(periods=2, axis='minor') + expected = Panel({'i1': df1.pct_change(periods=2, axis=1), + 'i2': df2.pct_change(periods=2, axis=1), + 'i3': df3.pct_change(periods=2, axis=1)}) + assert_panel_equal(result, expected) + # items, 1 + result = wp.pct_change(axis='items') + expected = Panel( + {'i1': DataFrame({'c1': [np.nan, np.nan, np.nan], + 'c2': [np.nan, np.nan, np.nan]}), + 'i2': DataFrame({'c1': [1, 0.5, .2], + 'c2': [1. / 3, 0.25, 1. / 6]}), + 'i3': DataFrame({'c1': [.5, 1. / 3, 1. / 6], + 'c2': [.25, .2, 1. / 7]})}) + assert_panel_equal(result, expected) + result = wp.pct_change(axis=0) + assert_panel_equal(result, expected) + # items, 2 + result = wp.pct_change(periods=2, axis='items') + expected = Panel( + {'i1': DataFrame({'c1': [np.nan, np.nan, np.nan], + 'c2': [np.nan, np.nan, np.nan]}), + 'i2': DataFrame({'c1': [np.nan, np.nan, np.nan], + 'c2': [np.nan, np.nan, np.nan]}), + 'i3': DataFrame({'c1': [2, 1, .4], + 'c2': [2. / 3, .5, 1. / 3]})}) + assert_panel_equal(result, expected) def test_round(self): - values = [[[-3.2, 2.2], [0, -4.8213], [3.123, 123.12], - [-1566.213, 88.88], [-12, 94.5]], - [[-5.82, 3.5], [6.21, -73.272], [-9.087, 23.12], - [272.212, -99.99], [23, -76.5]]] - evalues = [[[float(np.around(i)) for i in j] for j in k] - for k in values] - p = Panel(values, items=['Item1', 'Item2'], - major_axis=pd.date_range('1/1/2000', periods=5), - minor_axis=['A', 'B']) - expected = Panel(evalues, items=['Item1', 'Item2'], - major_axis=pd.date_range('1/1/2000', periods=5), - minor_axis=['A', 'B']) - result = p.round() - self.assert_panel_equal(expected, result) + with catch_warnings(record=True): + values = [[[-3.2, 2.2], [0, -4.8213], [3.123, 123.12], + [-1566.213, 88.88], [-12, 94.5]], + [[-5.82, 3.5], [6.21, -73.272], [-9.087, 23.12], + [272.212, -99.99], [23, -76.5]]] + evalues = [[[float(np.around(i)) for i in j] for j in k] + for k in values] + p = Panel(values, items=['Item1', 'Item2'], + major_axis=pd.date_range('1/1/2000', periods=5), + minor_axis=['A', 'B']) + expected = Panel(evalues, items=['Item1', 'Item2'], + major_axis=pd.date_range('1/1/2000', periods=5), + minor_axis=['A', 'B']) + result = p.round() + assert_panel_equal(expected, result) def test_numpy_round(self): - values = [[[-3.2, 2.2], [0, -4.8213], [3.123, 123.12], - [-1566.213, 88.88], [-12, 94.5]], - [[-5.82, 3.5], [6.21, -73.272], [-9.087, 23.12], - [272.212, -99.99], [23, -76.5]]] - evalues = [[[float(np.around(i)) for i in j] for j in k] - for k in values] - p = Panel(values, items=['Item1', 'Item2'], - major_axis=pd.date_range('1/1/2000', periods=5), - minor_axis=['A', 'B']) - expected = Panel(evalues, items=['Item1', 'Item2'], - major_axis=pd.date_range('1/1/2000', periods=5), - minor_axis=['A', 'B']) - result = np.round(p) - self.assert_panel_equal(expected, result) - - msg = "the 'out' parameter is not supported" - tm.assertRaisesRegexp(ValueError, msg, np.round, p, out=p) + with catch_warnings(record=True): + values = [[[-3.2, 2.2], [0, -4.8213], [3.123, 123.12], + [-1566.213, 88.88], [-12, 94.5]], + [[-5.82, 3.5], [6.21, -73.272], [-9.087, 23.12], + [272.212, -99.99], [23, -76.5]]] + evalues = [[[float(np.around(i)) for i in j] for j in k] + for k in values] + p = Panel(values, items=['Item1', 'Item2'], + major_axis=pd.date_range('1/1/2000', periods=5), + minor_axis=['A', 'B']) + expected = Panel(evalues, items=['Item1', 'Item2'], + major_axis=pd.date_range('1/1/2000', periods=5), + minor_axis=['A', 'B']) + result = np.round(p) + assert_panel_equal(expected, result) + + msg = "the 'out' parameter is not supported" + tm.assertRaisesRegexp(ValueError, msg, np.round, p, out=p) def test_multiindex_get(self): - ind = MultiIndex.from_tuples([('a', 1), ('a', 2), ('b', 1), ('b', 2)], - names=['first', 'second']) - wp = Panel(np.random.random((4, 5, 5)), - items=ind, - major_axis=np.arange(5), - minor_axis=np.arange(5)) - f1 = wp['a'] - f2 = wp.loc['a'] - assert_panel_equal(f1, f2) - - self.assertTrue((f1.items == [1, 2]).all()) - self.assertTrue((f2.items == [1, 2]).all()) - - ind = MultiIndex.from_tuples([('a', 1), ('a', 2), ('b', 1)], - names=['first', 'second']) + with catch_warnings(record=True): + ind = MultiIndex.from_tuples( + [('a', 1), ('a', 2), ('b', 1), ('b', 2)], + names=['first', 'second']) + wp = Panel(np.random.random((4, 5, 5)), + items=ind, + major_axis=np.arange(5), + minor_axis=np.arange(5)) + f1 = wp['a'] + f2 = wp.loc['a'] + assert_panel_equal(f1, f2) + + self.assertTrue((f1.items == [1, 2]).all()) + self.assertTrue((f2.items == [1, 2]).all()) + + ind = MultiIndex.from_tuples([('a', 1), ('a', 2), ('b', 1)], + names=['first', 'second']) def test_multiindex_blocks(self): - ind = MultiIndex.from_tuples([('a', 1), ('a', 2), ('b', 1)], - names=['first', 'second']) - wp = Panel(self.panel._data) - wp.items = ind - f1 = wp['a'] - self.assertTrue((f1.items == [1, 2]).all()) + with catch_warnings(record=True): + ind = MultiIndex.from_tuples([('a', 1), ('a', 2), ('b', 1)], + names=['first', 'second']) + wp = Panel(self.panel._data) + wp.items = ind + f1 = wp['a'] + self.assertTrue((f1.items == [1, 2]).all()) - f1 = wp[('b', 1)] - self.assertTrue((f1.columns == ['A', 'B', 'C', 'D']).all()) + f1 = wp[('b', 1)] + self.assertTrue((f1.columns == ['A', 'B', 'C', 'D']).all()) def test_repr_empty(self): - empty = Panel() - repr(empty) + with catch_warnings(record=True): + empty = Panel() + repr(empty) def test_rename(self): - mapper = {'ItemA': 'foo', 'ItemB': 'bar', 'ItemC': 'baz'} + with catch_warnings(record=True): + mapper = {'ItemA': 'foo', 'ItemB': 'bar', 'ItemC': 'baz'} - renamed = self.panel.rename_axis(mapper, axis=0) - exp = Index(['foo', 'bar', 'baz']) - self.assert_index_equal(renamed.items, exp) + renamed = self.panel.rename_axis(mapper, axis=0) + exp = Index(['foo', 'bar', 'baz']) + self.assert_index_equal(renamed.items, exp) - renamed = self.panel.rename_axis(str.lower, axis=2) - exp = Index(['a', 'b', 'c', 'd']) - self.assert_index_equal(renamed.minor_axis, exp) + renamed = self.panel.rename_axis(str.lower, axis=2) + exp = Index(['a', 'b', 'c', 'd']) + self.assert_index_equal(renamed.minor_axis, exp) - # don't copy - renamed_nocopy = self.panel.rename_axis(mapper, axis=0, copy=False) - renamed_nocopy['foo'] = 3. - self.assertTrue((self.panel['ItemA'].values == 3).all()) + # don't copy + renamed_nocopy = self.panel.rename_axis(mapper, axis=0, copy=False) + renamed_nocopy['foo'] = 3. + self.assertTrue((self.panel['ItemA'].values == 3).all()) def test_get_attr(self): assert_frame_equal(self.panel['ItemA'], self.panel.ItemA) @@ -2046,12 +2170,13 @@ def test_get_attr(self): assert_frame_equal(self.panel['i'], self.panel.i) def test_from_frame_level1_unsorted(self): - tuples = [('MSFT', 3), ('MSFT', 2), ('AAPL', 2), ('AAPL', 1), - ('MSFT', 1)] - midx = MultiIndex.from_tuples(tuples) - df = DataFrame(np.random.rand(5, 4), index=midx) - p = df.to_panel() - assert_frame_equal(p.minor_xs(2), df.xs(2, level=1).sort_index()) + with catch_warnings(record=True): + tuples = [('MSFT', 3), ('MSFT', 2), ('AAPL', 2), ('AAPL', 1), + ('MSFT', 1)] + midx = MultiIndex.from_tuples(tuples) + df = DataFrame(np.random.rand(5, 4), index=midx) + p = df.to_panel() + assert_frame_equal(p.minor_xs(2), df.xs(2, level=1).sort_index()) def test_to_excel(self): try: @@ -2094,162 +2219,191 @@ def test_to_excel_xlsxwriter(self): assert_frame_equal(df, recdf) def test_dropna(self): - p = Panel(np.random.randn(4, 5, 6), major_axis=list('abcde')) - p.loc[:, ['b', 'd'], 0] = np.nan + with catch_warnings(record=True): + p = Panel(np.random.randn(4, 5, 6), major_axis=list('abcde')) + p.loc[:, ['b', 'd'], 0] = np.nan - result = p.dropna(axis=1) - exp = p.loc[:, ['a', 'c', 'e'], :] - assert_panel_equal(result, exp) - inp = p.copy() - inp.dropna(axis=1, inplace=True) - assert_panel_equal(inp, exp) + result = p.dropna(axis=1) + exp = p.loc[:, ['a', 'c', 'e'], :] + assert_panel_equal(result, exp) + inp = p.copy() + inp.dropna(axis=1, inplace=True) + assert_panel_equal(inp, exp) - result = p.dropna(axis=1, how='all') - assert_panel_equal(result, p) + result = p.dropna(axis=1, how='all') + assert_panel_equal(result, p) - p.loc[:, ['b', 'd'], :] = np.nan - result = p.dropna(axis=1, how='all') - exp = p.loc[:, ['a', 'c', 'e'], :] - assert_panel_equal(result, exp) + p.loc[:, ['b', 'd'], :] = np.nan + result = p.dropna(axis=1, how='all') + exp = p.loc[:, ['a', 'c', 'e'], :] + assert_panel_equal(result, exp) - p = Panel(np.random.randn(4, 5, 6), items=list('abcd')) - p.loc[['b'], :, 0] = np.nan + p = Panel(np.random.randn(4, 5, 6), items=list('abcd')) + p.loc[['b'], :, 0] = np.nan - result = p.dropna() - exp = p.loc[['a', 'c', 'd']] - assert_panel_equal(result, exp) + result = p.dropna() + exp = p.loc[['a', 'c', 'd']] + assert_panel_equal(result, exp) - result = p.dropna(how='all') - assert_panel_equal(result, p) + result = p.dropna(how='all') + assert_panel_equal(result, p) - p.loc['b'] = np.nan - result = p.dropna(how='all') - exp = p.loc[['a', 'c', 'd']] - assert_panel_equal(result, exp) + p.loc['b'] = np.nan + result = p.dropna(how='all') + exp = p.loc[['a', 'c', 'd']] + assert_panel_equal(result, exp) def test_drop(self): - df = DataFrame({"A": [1, 2], "B": [3, 4]}) - panel = Panel({"One": df, "Two": df}) + with catch_warnings(record=True): + df = DataFrame({"A": [1, 2], "B": [3, 4]}) + panel = Panel({"One": df, "Two": df}) - def check_drop(drop_val, axis_number, aliases, expected): - try: - actual = panel.drop(drop_val, axis=axis_number) - assert_panel_equal(actual, expected) - for alias in aliases: - actual = panel.drop(drop_val, axis=alias) + def check_drop(drop_val, axis_number, aliases, expected): + try: + actual = panel.drop(drop_val, axis=axis_number) assert_panel_equal(actual, expected) - except AssertionError: - pprint_thing("Failed with axis_number %d and aliases: %s" % - (axis_number, aliases)) - raise - # Items - expected = Panel({"One": df}) - check_drop('Two', 0, ['items'], expected) - - self.assertRaises(ValueError, panel.drop, 'Three') - - # errors = 'ignore' - dropped = panel.drop('Three', errors='ignore') - assert_panel_equal(dropped, panel) - dropped = panel.drop(['Two', 'Three'], errors='ignore') - expected = Panel({"One": df}) - assert_panel_equal(dropped, expected) - - # Major - exp_df = DataFrame({"A": [2], "B": [4]}, index=[1]) - expected = Panel({"One": exp_df, "Two": exp_df}) - check_drop(0, 1, ['major_axis', 'major'], expected) - - exp_df = DataFrame({"A": [1], "B": [3]}, index=[0]) - expected = Panel({"One": exp_df, "Two": exp_df}) - check_drop([1], 1, ['major_axis', 'major'], expected) - - # Minor - exp_df = df[['B']] - expected = Panel({"One": exp_df, "Two": exp_df}) - check_drop(["A"], 2, ['minor_axis', 'minor'], expected) - - exp_df = df[['A']] - expected = Panel({"One": exp_df, "Two": exp_df}) - check_drop("B", 2, ['minor_axis', 'minor'], expected) + for alias in aliases: + actual = panel.drop(drop_val, axis=alias) + assert_panel_equal(actual, expected) + except AssertionError: + pprint_thing("Failed with axis_number %d and aliases: %s" % + (axis_number, aliases)) + raise + # Items + expected = Panel({"One": df}) + check_drop('Two', 0, ['items'], expected) + + self.assertRaises(ValueError, panel.drop, 'Three') + + # errors = 'ignore' + dropped = panel.drop('Three', errors='ignore') + assert_panel_equal(dropped, panel) + dropped = panel.drop(['Two', 'Three'], errors='ignore') + expected = Panel({"One": df}) + assert_panel_equal(dropped, expected) + + # Major + exp_df = DataFrame({"A": [2], "B": [4]}, index=[1]) + expected = Panel({"One": exp_df, "Two": exp_df}) + check_drop(0, 1, ['major_axis', 'major'], expected) + + exp_df = DataFrame({"A": [1], "B": [3]}, index=[0]) + expected = Panel({"One": exp_df, "Two": exp_df}) + check_drop([1], 1, ['major_axis', 'major'], expected) + + # Minor + exp_df = df[['B']] + expected = Panel({"One": exp_df, "Two": exp_df}) + check_drop(["A"], 2, ['minor_axis', 'minor'], expected) + + exp_df = df[['A']] + expected = Panel({"One": exp_df, "Two": exp_df}) + check_drop("B", 2, ['minor_axis', 'minor'], expected) def test_update(self): - pan = Panel([[[1.5, np.nan, 3.], [1.5, np.nan, 3.], [1.5, np.nan, 3.], - [1.5, np.nan, 3.]], - [[1.5, np.nan, 3.], [1.5, np.nan, 3.], [1.5, np.nan, 3.], - [1.5, np.nan, 3.]]]) + with catch_warnings(record=True): + pan = Panel([[[1.5, np.nan, 3.], [1.5, np.nan, 3.], + [1.5, np.nan, 3.], + [1.5, np.nan, 3.]], + [[1.5, np.nan, 3.], [1.5, np.nan, 3.], + [1.5, np.nan, 3.], + [1.5, np.nan, 3.]]]) - other = Panel([[[3.6, 2., np.nan], [np.nan, np.nan, 7]]], items=[1]) + other = Panel( + [[[3.6, 2., np.nan], [np.nan, np.nan, 7]]], items=[1]) - pan.update(other) + pan.update(other) - expected = Panel([[[1.5, np.nan, 3.], [1.5, np.nan, 3.], - [1.5, np.nan, 3.], [1.5, np.nan, 3.]], - [[3.6, 2., 3], [1.5, np.nan, 7], [1.5, np.nan, 3.], - [1.5, np.nan, 3.]]]) + expected = Panel([[[1.5, np.nan, 3.], [1.5, np.nan, 3.], + [1.5, np.nan, 3.], [1.5, np.nan, 3.]], + [[3.6, 2., 3], [1.5, np.nan, 7], + [1.5, np.nan, 3.], + [1.5, np.nan, 3.]]]) - assert_panel_equal(pan, expected) + assert_panel_equal(pan, expected) def test_update_from_dict(self): - pan = Panel({'one': DataFrame([[1.5, np.nan, 3], [1.5, np.nan, 3], - [1.5, np.nan, 3.], [1.5, np.nan, 3.]]), - 'two': DataFrame([[1.5, np.nan, 3.], [1.5, np.nan, 3.], - [1.5, np.nan, 3.], [1.5, np.nan, 3.]])}) - - other = {'two': DataFrame([[3.6, 2., np.nan], [np.nan, np.nan, 7]])} - - pan.update(other) - - expected = Panel( - {'two': DataFrame([[3.6, 2., 3], [1.5, np.nan, 7], - [1.5, np.nan, 3.], [1.5, np.nan, 3.]]), - 'one': DataFrame([[1.5, np.nan, 3.], [1.5, np.nan, 3.], - [1.5, np.nan, 3.], [1.5, np.nan, 3.]])}) - - assert_panel_equal(pan, expected) + with catch_warnings(record=True): + pan = Panel({'one': DataFrame([[1.5, np.nan, 3], + [1.5, np.nan, 3], + [1.5, np.nan, 3.], + [1.5, np.nan, 3.]]), + 'two': DataFrame([[1.5, np.nan, 3.], + [1.5, np.nan, 3.], + [1.5, np.nan, 3.], + [1.5, np.nan, 3.]])}) + + other = {'two': DataFrame( + [[3.6, 2., np.nan], [np.nan, np.nan, 7]])} + + pan.update(other) + + expected = Panel( + {'two': DataFrame([[3.6, 2., 3], + [1.5, np.nan, 7], + [1.5, np.nan, 3.], + [1.5, np.nan, 3.]]), + 'one': DataFrame([[1.5, np.nan, 3.], + [1.5, np.nan, 3.], + [1.5, np.nan, 3.], + [1.5, np.nan, 3.]])}) + + assert_panel_equal(pan, expected) def test_update_nooverwrite(self): - pan = Panel([[[1.5, np.nan, 3.], [1.5, np.nan, 3.], [1.5, np.nan, 3.], - [1.5, np.nan, 3.]], - [[1.5, np.nan, 3.], [1.5, np.nan, 3.], [1.5, np.nan, 3.], - [1.5, np.nan, 3.]]]) + with catch_warnings(record=True): + pan = Panel([[[1.5, np.nan, 3.], [1.5, np.nan, 3.], + [1.5, np.nan, 3.], + [1.5, np.nan, 3.]], + [[1.5, np.nan, 3.], [1.5, np.nan, 3.], + [1.5, np.nan, 3.], + [1.5, np.nan, 3.]]]) - other = Panel([[[3.6, 2., np.nan], [np.nan, np.nan, 7]]], items=[1]) + other = Panel( + [[[3.6, 2., np.nan], [np.nan, np.nan, 7]]], items=[1]) - pan.update(other, overwrite=False) + pan.update(other, overwrite=False) - expected = Panel([[[1.5, np.nan, 3], [1.5, np.nan, 3], - [1.5, np.nan, 3.], [1.5, np.nan, 3.]], - [[1.5, 2., 3.], [1.5, np.nan, 3.], [1.5, np.nan, 3.], - [1.5, np.nan, 3.]]]) + expected = Panel([[[1.5, np.nan, 3], [1.5, np.nan, 3], + [1.5, np.nan, 3.], [1.5, np.nan, 3.]], + [[1.5, 2., 3.], [1.5, np.nan, 3.], + [1.5, np.nan, 3.], + [1.5, np.nan, 3.]]]) - assert_panel_equal(pan, expected) + assert_panel_equal(pan, expected) def test_update_filtered(self): - pan = Panel([[[1.5, np.nan, 3.], [1.5, np.nan, 3.], [1.5, np.nan, 3.], - [1.5, np.nan, 3.]], - [[1.5, np.nan, 3.], [1.5, np.nan, 3.], [1.5, np.nan, 3.], - [1.5, np.nan, 3.]]]) + with catch_warnings(record=True): + pan = Panel([[[1.5, np.nan, 3.], [1.5, np.nan, 3.], + [1.5, np.nan, 3.], + [1.5, np.nan, 3.]], + [[1.5, np.nan, 3.], [1.5, np.nan, 3.], + [1.5, np.nan, 3.], + [1.5, np.nan, 3.]]]) - other = Panel([[[3.6, 2., np.nan], [np.nan, np.nan, 7]]], items=[1]) + other = Panel( + [[[3.6, 2., np.nan], [np.nan, np.nan, 7]]], items=[1]) - pan.update(other, filter_func=lambda x: x > 2) + pan.update(other, filter_func=lambda x: x > 2) - expected = Panel([[[1.5, np.nan, 3.], [1.5, np.nan, 3.], - [1.5, np.nan, 3.], [1.5, np.nan, 3.]], - [[1.5, np.nan, 3], [1.5, np.nan, 7], - [1.5, np.nan, 3.], [1.5, np.nan, 3.]]]) + expected = Panel([[[1.5, np.nan, 3.], [1.5, np.nan, 3.], + [1.5, np.nan, 3.], [1.5, np.nan, 3.]], + [[1.5, np.nan, 3], [1.5, np.nan, 7], + [1.5, np.nan, 3.], [1.5, np.nan, 3.]]]) - assert_panel_equal(pan, expected) + assert_panel_equal(pan, expected) def test_update_raise(self): - pan = Panel([[[1.5, np.nan, 3.], [1.5, np.nan, 3.], [1.5, np.nan, 3.], - [1.5, np.nan, 3.]], - [[1.5, np.nan, 3.], [1.5, np.nan, 3.], [1.5, np.nan, 3.], - [1.5, np.nan, 3.]]]) + with catch_warnings(record=True): + pan = Panel([[[1.5, np.nan, 3.], [1.5, np.nan, 3.], + [1.5, np.nan, 3.], + [1.5, np.nan, 3.]], + [[1.5, np.nan, 3.], [1.5, np.nan, 3.], + [1.5, np.nan, 3.], + [1.5, np.nan, 3.]]]) - self.assertRaises(Exception, pan.update, *(pan, ), - **{'raise_conflict': True}) + self.assertRaises(Exception, pan.update, *(pan, ), + **{'raise_conflict': True}) def test_all_any(self): self.assertTrue((self.panel.all(axis=0).values == nanall( @@ -2276,90 +2430,95 @@ class TestLongPanel(tm.TestCase): """ def setUp(self): - import warnings - warnings.filterwarnings(action='ignore', category=FutureWarning) - - panel = tm.makePanel() - tm.add_nans(panel) - + panel = make_test_panel() self.panel = panel.to_frame() self.unfiltered_panel = panel.to_frame(filter_observations=False) def test_ops_differently_indexed(self): - # trying to set non-identically indexed panel - wp = self.panel.to_panel() - wp2 = wp.reindex(major=wp.major_axis[:-1]) - lp2 = wp2.to_frame() + with catch_warnings(record=True): + # trying to set non-identically indexed panel + wp = self.panel.to_panel() + wp2 = wp.reindex(major=wp.major_axis[:-1]) + lp2 = wp2.to_frame() - result = self.panel + lp2 - assert_frame_equal(result.reindex(lp2.index), lp2 * 2) + result = self.panel + lp2 + assert_frame_equal(result.reindex(lp2.index), lp2 * 2) - # careful, mutation - self.panel['foo'] = lp2['ItemA'] - assert_series_equal(self.panel['foo'].reindex(lp2.index), lp2['ItemA'], - check_names=False) + # careful, mutation + self.panel['foo'] = lp2['ItemA'] + assert_series_equal(self.panel['foo'].reindex(lp2.index), + lp2['ItemA'], + check_names=False) def test_ops_scalar(self): - result = self.panel.mul(2) - expected = DataFrame.__mul__(self.panel, 2) - assert_frame_equal(result, expected) + with catch_warnings(record=True): + result = self.panel.mul(2) + expected = DataFrame.__mul__(self.panel, 2) + assert_frame_equal(result, expected) def test_combineFrame(self): - wp = self.panel.to_panel() - result = self.panel.add(wp['ItemA'].stack(), axis=0) - assert_frame_equal(result.to_panel()['ItemA'], wp['ItemA'] * 2) + with catch_warnings(record=True): + wp = self.panel.to_panel() + result = self.panel.add(wp['ItemA'].stack(), axis=0) + assert_frame_equal(result.to_panel()['ItemA'], wp['ItemA'] * 2) def test_combinePanel(self): - wp = self.panel.to_panel() - result = self.panel.add(self.panel) - wide_result = result.to_panel() - assert_frame_equal(wp['ItemA'] * 2, wide_result['ItemA']) + with catch_warnings(record=True): + wp = self.panel.to_panel() + result = self.panel.add(self.panel) + wide_result = result.to_panel() + assert_frame_equal(wp['ItemA'] * 2, wide_result['ItemA']) - # one item - result = self.panel.add(self.panel.filter(['ItemA'])) + # one item + result = self.panel.add(self.panel.filter(['ItemA'])) def test_combine_scalar(self): - result = self.panel.mul(2) - expected = DataFrame(self.panel._data) * 2 - assert_frame_equal(result, expected) + with catch_warnings(record=True): + result = self.panel.mul(2) + expected = DataFrame(self.panel._data) * 2 + assert_frame_equal(result, expected) def test_combine_series(self): - s = self.panel['ItemA'][:10] - result = self.panel.add(s, axis=0) - expected = DataFrame.add(self.panel, s, axis=0) - assert_frame_equal(result, expected) + with catch_warnings(record=True): + s = self.panel['ItemA'][:10] + result = self.panel.add(s, axis=0) + expected = DataFrame.add(self.panel, s, axis=0) + assert_frame_equal(result, expected) - s = self.panel.iloc[5] - result = self.panel + s - expected = DataFrame.add(self.panel, s, axis=1) - assert_frame_equal(result, expected) + s = self.panel.iloc[5] + result = self.panel + s + expected = DataFrame.add(self.panel, s, axis=1) + assert_frame_equal(result, expected) def test_operators(self): - wp = self.panel.to_panel() - result = (self.panel + 1).to_panel() - assert_frame_equal(wp['ItemA'] + 1, result['ItemA']) + with catch_warnings(record=True): + wp = self.panel.to_panel() + result = (self.panel + 1).to_panel() + assert_frame_equal(wp['ItemA'] + 1, result['ItemA']) def test_arith_flex_panel(self): - ops = ['add', 'sub', 'mul', 'div', 'truediv', 'pow', 'floordiv', 'mod'] - if not compat.PY3: - aliases = {} - else: - aliases = {'div': 'truediv'} - self.panel = self.panel.to_panel() - - for n in [np.random.randint(-50, -1), np.random.randint(1, 50), 0]: - for op in ops: - alias = aliases.get(op, op) - f = getattr(operator, alias) - exp = f(self.panel, n) - result = getattr(self.panel, op)(n) - assert_panel_equal(result, exp, check_panel_type=True) - - # rops - r_f = lambda x, y: f(y, x) - exp = r_f(self.panel, n) - result = getattr(self.panel, 'r' + op)(n) - assert_panel_equal(result, exp) + with catch_warnings(record=True): + ops = ['add', 'sub', 'mul', 'div', + 'truediv', 'pow', 'floordiv', 'mod'] + if not compat.PY3: + aliases = {} + else: + aliases = {'div': 'truediv'} + self.panel = self.panel.to_panel() + + for n in [np.random.randint(-50, -1), np.random.randint(1, 50), 0]: + for op in ops: + alias = aliases.get(op, op) + f = getattr(operator, alias) + exp = f(self.panel, n) + result = getattr(self.panel, op)(n) + assert_panel_equal(result, exp, check_panel_type=True) + + # rops + r_f = lambda x, y: f(y, x) + exp = r_f(self.panel, n) + result = getattr(self.panel, 'r' + op)(n) + assert_panel_equal(result, exp) def test_sort(self): def is_sorted(arr): @@ -2382,43 +2541,44 @@ def test_to_sparse(self): self.panel.to_sparse) def test_truncate(self): - dates = self.panel.index.levels[0] - start, end = dates[1], dates[5] + with catch_warnings(record=True): + dates = self.panel.index.levels[0] + start, end = dates[1], dates[5] - trunced = self.panel.truncate(start, end).to_panel() - expected = self.panel.to_panel()['ItemA'].truncate(start, end) + trunced = self.panel.truncate(start, end).to_panel() + expected = self.panel.to_panel()['ItemA'].truncate(start, end) - # TODO trucate drops index.names - assert_frame_equal(trunced['ItemA'], expected, check_names=False) + # TODO trucate drops index.names + assert_frame_equal(trunced['ItemA'], expected, check_names=False) - trunced = self.panel.truncate(before=start).to_panel() - expected = self.panel.to_panel()['ItemA'].truncate(before=start) + trunced = self.panel.truncate(before=start).to_panel() + expected = self.panel.to_panel()['ItemA'].truncate(before=start) - # TODO trucate drops index.names - assert_frame_equal(trunced['ItemA'], expected, check_names=False) + # TODO trucate drops index.names + assert_frame_equal(trunced['ItemA'], expected, check_names=False) - trunced = self.panel.truncate(after=end).to_panel() - expected = self.panel.to_panel()['ItemA'].truncate(after=end) + trunced = self.panel.truncate(after=end).to_panel() + expected = self.panel.to_panel()['ItemA'].truncate(after=end) - # TODO trucate drops index.names - assert_frame_equal(trunced['ItemA'], expected, check_names=False) + # TODO trucate drops index.names + assert_frame_equal(trunced['ItemA'], expected, check_names=False) - # truncate on dates that aren't in there - wp = self.panel.to_panel() - new_index = wp.major_axis[::5] + # truncate on dates that aren't in there + wp = self.panel.to_panel() + new_index = wp.major_axis[::5] - wp2 = wp.reindex(major=new_index) + wp2 = wp.reindex(major=new_index) - lp2 = wp2.to_frame() - lp_trunc = lp2.truncate(wp.major_axis[2], wp.major_axis[-2]) + lp2 = wp2.to_frame() + lp_trunc = lp2.truncate(wp.major_axis[2], wp.major_axis[-2]) - wp_trunc = wp2.truncate(wp.major_axis[2], wp.major_axis[-2]) + wp_trunc = wp2.truncate(wp.major_axis[2], wp.major_axis[-2]) - assert_panel_equal(wp_trunc, lp_trunc.to_panel()) + assert_panel_equal(wp_trunc, lp_trunc.to_panel()) - # throw proper exception - self.assertRaises(Exception, lp2.truncate, wp.major_axis[-2], - wp.major_axis[2]) + # throw proper exception + self.assertRaises(Exception, lp2.truncate, wp.major_axis[-2], + wp.major_axis[2]) def test_axis_dummies(self): from pandas.core.reshape import make_axis_dummies @@ -2449,82 +2609,70 @@ def test_get_dummies(self): self.assert_numpy_array_equal(dummies.values, minor_dummies.values) def test_mean(self): - means = self.panel.mean(level='minor') + with catch_warnings(record=True): + means = self.panel.mean(level='minor') - # test versus Panel version - wide_means = self.panel.to_panel().mean('major') - assert_frame_equal(means, wide_means) + # test versus Panel version + wide_means = self.panel.to_panel().mean('major') + assert_frame_equal(means, wide_means) def test_sum(self): - sums = self.panel.sum(level='minor') + with catch_warnings(record=True): + sums = self.panel.sum(level='minor') - # test versus Panel version - wide_sums = self.panel.to_panel().sum('major') - assert_frame_equal(sums, wide_sums) + # test versus Panel version + wide_sums = self.panel.to_panel().sum('major') + assert_frame_equal(sums, wide_sums) def test_count(self): - index = self.panel.index + with catch_warnings(record=True): + index = self.panel.index - major_count = self.panel.count(level=0)['ItemA'] - labels = index.labels[0] - for i, idx in enumerate(index.levels[0]): - self.assertEqual(major_count[i], (labels == i).sum()) + major_count = self.panel.count(level=0)['ItemA'] + labels = index.labels[0] + for i, idx in enumerate(index.levels[0]): + self.assertEqual(major_count[i], (labels == i).sum()) - minor_count = self.panel.count(level=1)['ItemA'] - labels = index.labels[1] - for i, idx in enumerate(index.levels[1]): - self.assertEqual(minor_count[i], (labels == i).sum()) + minor_count = self.panel.count(level=1)['ItemA'] + labels = index.labels[1] + for i, idx in enumerate(index.levels[1]): + self.assertEqual(minor_count[i], (labels == i).sum()) def test_join(self): - lp1 = self.panel.filter(['ItemA', 'ItemB']) - lp2 = self.panel.filter(['ItemC']) + with catch_warnings(record=True): + lp1 = self.panel.filter(['ItemA', 'ItemB']) + lp2 = self.panel.filter(['ItemC']) - joined = lp1.join(lp2) + joined = lp1.join(lp2) - self.assertEqual(len(joined.columns), 3) + self.assertEqual(len(joined.columns), 3) - self.assertRaises(Exception, lp1.join, - self.panel.filter(['ItemB', 'ItemC'])) + self.assertRaises(Exception, lp1.join, + self.panel.filter(['ItemB', 'ItemC'])) def test_pivot(self): - from pandas.core.reshape import _slow_pivot - - one, two, three = (np.array([1, 2, 3, 4, 5]), - np.array(['a', 'b', 'c', 'd', 'e']), - np.array([1, 2, 3, 5, 4.])) - df = pivot(one, two, three) - self.assertEqual(df['a'][1], 1) - self.assertEqual(df['b'][2], 2) - self.assertEqual(df['c'][3], 3) - self.assertEqual(df['d'][4], 5) - self.assertEqual(df['e'][5], 4) - assert_frame_equal(df, _slow_pivot(one, two, three)) - - # weird overlap, TODO: test? - a, b, c = (np.array([1, 2, 3, 4, 4]), - np.array(['a', 'a', 'a', 'a', 'a']), - np.array([1., 2., 3., 4., 5.])) - self.assertRaises(Exception, pivot, a, b, c) - - # corner case, empty - df = pivot(np.array([]), np.array([]), np.array([])) - - -def test_monotonic(): - pos = np.array([1, 2, 3, 5]) - - def _monotonic(arr): - return not (arr[1:] < arr[:-1]).any() - - assert _monotonic(pos) - - neg = np.array([1, 2, 3, 4, 3]) - - assert not _monotonic(neg) - - neg2 = np.array([5, 1, 2, 3, 4, 5]) - - assert not _monotonic(neg2) + with catch_warnings(record=True): + from pandas.core.reshape import _slow_pivot + + one, two, three = (np.array([1, 2, 3, 4, 5]), + np.array(['a', 'b', 'c', 'd', 'e']), + np.array([1, 2, 3, 5, 4.])) + df = pivot(one, two, three) + self.assertEqual(df['a'][1], 1) + self.assertEqual(df['b'][2], 2) + self.assertEqual(df['c'][3], 3) + self.assertEqual(df['d'][4], 5) + self.assertEqual(df['e'][5], 4) + assert_frame_equal(df, _slow_pivot(one, two, three)) + + # weird overlap, TODO: test? + a, b, c = (np.array([1, 2, 3, 4, 4]), + np.array(['a', 'a', 'a', 'a', 'a']), + np.array([1., 2., 3., 4., 5.])) + self.assertRaises(Exception, pivot, a, b, c) + + # corner case, empty + df = pivot(np.array([]), np.array([]), np.array([])) def test_panel_index(): diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index ceb12c6c03074..5fc31e9321f31 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -10,8 +10,8 @@ from distutils.version import LooseVersion import pandas as pd -from pandas import (Series, DataFrame, Panel, bdate_range, isnull, - notnull, concat, Timestamp) +from pandas import (Series, DataFrame, bdate_range, isnull, + notnull, concat, Timestamp, Index) import pandas.stats.moments as mom import pandas.core.window as rwindow import pandas.tseries.offsets as offsets @@ -172,7 +172,7 @@ def test_agg_consistency(self): tm.assert_index_equal(result, expected) result = r['A'].agg([np.sum, np.mean]).columns - expected = pd.Index(['sum', 'mean']) + expected = Index(['sum', 'mean']) tm.assert_index_equal(result, expected) result = r.agg({'A': [np.sum, np.mean]}).columns @@ -1688,6 +1688,160 @@ def _check_ew_structures(self, func, name): self.assertEqual(type(frame_result), DataFrame) +class TestPairwise(object): + + # GH 7738 + df1s = [DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], columns=[0, 1]), + DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], columns=[1, 0]), + DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], columns=[1, 1]), + DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], + columns=['C', 'C']), + DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], columns=[1., 0]), + DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], columns=[0., 1]), + DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], columns=['C', 1]), + DataFrame([[2., 4.], [1., 2.], [5., 2.], [8., 1.]], + columns=[1, 0.]), + DataFrame([[2, 4.], [1, 2.], [5, 2.], [8, 1.]], + columns=[0, 1.]), + DataFrame([[2, 4], [1, 2], [5, 2], [8, 1.]], + columns=[1., 'X']), ] + df2 = DataFrame([[None, 1, 1], [None, 1, 2], + [None, 3, 2], [None, 8, 1]], columns=['Y', 'Z', 'X']) + s = Series([1, 1, 3, 8]) + + def compare(self, result, expected): + + # since we have sorted the results + # we can only compare non-nans + result = result.dropna().values + expected = expected.dropna().values + + tm.assert_numpy_array_equal(result, expected) + + @pytest.mark.parametrize('f', [lambda x: x.cov(), lambda x: x.corr()]) + def test_no_flex(self, f): + + # DataFrame methods (which do not call _flex_binary_moment()) + + results = [f(df) for df in self.df1s] + for (df, result) in zip(self.df1s, results): + tm.assert_index_equal(result.index, df.columns) + tm.assert_index_equal(result.columns, df.columns) + for i, result in enumerate(results): + if i > 0: + self.compare(result, results[0]) + + @pytest.mark.parametrize( + 'f', [lambda x: x.expanding().cov(pairwise=True), + lambda x: x.expanding().corr(pairwise=True), + lambda x: x.rolling(window=3).cov(pairwise=True), + lambda x: x.rolling(window=3).corr(pairwise=True), + lambda x: x.ewm(com=3).cov(pairwise=True), + lambda x: x.ewm(com=3).corr(pairwise=True)]) + def test_pairwise_with_self(self, f): + + # DataFrame with itself, pairwise=True + results = [f(df) for df in self.df1s] + for (df, result) in zip(self.df1s, results): + tm.assert_index_equal(result.index.levels[0], + df.index, + check_names=False) + tm.assert_index_equal(result.index.levels[1], + df.columns, + check_names=False) + tm.assert_index_equal(result.columns, df.columns) + for i, result in enumerate(results): + if i > 0: + self.compare(result, results[0]) + + @pytest.mark.parametrize( + 'f', [lambda x: x.expanding().cov(pairwise=False), + lambda x: x.expanding().corr(pairwise=False), + lambda x: x.rolling(window=3).cov(pairwise=False), + lambda x: x.rolling(window=3).corr(pairwise=False), + lambda x: x.ewm(com=3).cov(pairwise=False), + lambda x: x.ewm(com=3).corr(pairwise=False), ]) + def test_no_pairwise_with_self(self, f): + + # DataFrame with itself, pairwise=False + results = [f(df) for df in self.df1s] + for (df, result) in zip(self.df1s, results): + tm.assert_index_equal(result.index, df.index) + tm.assert_index_equal(result.columns, df.columns) + for i, result in enumerate(results): + if i > 0: + self.compare(result, results[0]) + + @pytest.mark.parametrize( + 'f', [lambda x, y: x.expanding().cov(y, pairwise=True), + lambda x, y: x.expanding().corr(y, pairwise=True), + lambda x, y: x.rolling(window=3).cov(y, pairwise=True), + lambda x, y: x.rolling(window=3).corr(y, pairwise=True), + lambda x, y: x.ewm(com=3).cov(y, pairwise=True), + lambda x, y: x.ewm(com=3).corr(y, pairwise=True), ]) + def test_pairwise_with_other(self, f): + + # DataFrame with another DataFrame, pairwise=True + results = [f(df, self.df2) for df in self.df1s] + for (df, result) in zip(self.df1s, results): + tm.assert_index_equal(result.index.levels[0], + df.index, + check_names=False) + tm.assert_index_equal(result.index.levels[1], + self.df2.columns, + check_names=False) + for i, result in enumerate(results): + if i > 0: + self.compare(result, results[0]) + + @pytest.mark.parametrize( + 'f', [lambda x, y: x.expanding().cov(y, pairwise=False), + lambda x, y: x.expanding().corr(y, pairwise=False), + lambda x, y: x.rolling(window=3).cov(y, pairwise=False), + lambda x, y: x.rolling(window=3).corr(y, pairwise=False), + lambda x, y: x.ewm(com=3).cov(y, pairwise=False), + lambda x, y: x.ewm(com=3).corr(y, pairwise=False), ]) + def test_no_pairwise_with_other(self, f): + + # DataFrame with another DataFrame, pairwise=False + results = [f(df, self.df2) if df.columns.is_unique else None + for df in self.df1s] + for (df, result) in zip(self.df1s, results): + if result is not None: + with catch_warnings(record=True): + # we can have int and str columns + expected_index = df.index.union(self.df2.index) + expected_columns = df.columns.union(self.df2.columns) + tm.assert_index_equal(result.index, expected_index) + tm.assert_index_equal(result.columns, expected_columns) + else: + tm.assertRaisesRegexp( + ValueError, "'arg1' columns are not unique", f, df, + self.df2) + tm.assertRaisesRegexp( + ValueError, "'arg2' columns are not unique", f, + self.df2, df) + + @pytest.mark.parametrize( + 'f', [lambda x, y: x.expanding().cov(y), + lambda x, y: x.expanding().corr(y), + lambda x, y: x.rolling(window=3).cov(y), + lambda x, y: x.rolling(window=3).corr(y), + lambda x, y: x.ewm(com=3).cov(y), + lambda x, y: x.ewm(com=3).corr(y), ]) + def test_pairwise_with_series(self, f): + + # DataFrame with a Series + results = ([f(df, self.s) for df in self.df1s] + + [f(self.s, df) for df in self.df1s]) + for (df, result) in zip(self.df1s, results): + tm.assert_index_equal(result.index, df.index) + tm.assert_index_equal(result.columns, df.columns) + for i, result in enumerate(results): + if i > 0: + self.compare(result, results[0]) + + # create the data only once as we are not setting it def _create_consistency_data(): def create_series(): @@ -2083,21 +2237,6 @@ def test_expanding_consistency(self): assert_equal(expanding_f_result, expanding_apply_f_result) - if (name in ['cov', 'corr']) and isinstance(x, - DataFrame): - # test pairwise=True - expanding_f_result = expanding_f(x, pairwise=True) - expected = Panel(items=x.index, - major_axis=x.columns, - minor_axis=x.columns) - for i, _ in enumerate(x.columns): - for j, _ in enumerate(x.columns): - expected.iloc[:, i, j] = getattr( - x.iloc[:, i].expanding( - min_periods=min_periods), - name)(x.iloc[:, j]) - tm.assert_panel_equal(expanding_f_result, expected) - @tm.slow def test_rolling_consistency(self): @@ -2203,25 +2342,6 @@ def cases(): assert_equal(rolling_f_result, rolling_apply_f_result) - if (name in ['cov', 'corr']) and isinstance( - x, DataFrame): - # test pairwise=True - rolling_f_result = rolling_f(x, - pairwise=True) - expected = Panel(items=x.index, - major_axis=x.columns, - minor_axis=x.columns) - for i, _ in enumerate(x.columns): - for j, _ in enumerate(x.columns): - expected.iloc[:, i, j] = ( - getattr( - x.iloc[:, i] - .rolling(window=window, - min_periods=min_periods, - center=center), - name)(x.iloc[:, j])) - tm.assert_panel_equal(rolling_f_result, expected) - # binary moments def test_rolling_cov(self): A = self.series @@ -2257,11 +2377,11 @@ def _check_pairwise_moment(self, dispatch, name, **kwargs): def get_result(obj, obj2=None): return getattr(getattr(obj, dispatch)(**kwargs), name)(obj2) - panel = get_result(self.frame) - actual = panel.loc[:, 1, 5] + result = get_result(self.frame) + result = result.loc[(slice(None), 1), 5] + result.index = result.index.droplevel(1) expected = get_result(self.frame[1], self.frame[5]) - tm.assert_series_equal(actual, expected, check_names=False) - self.assertEqual(actual.name, 5) + tm.assert_series_equal(result, expected, check_names=False) def test_flex_binary_moment(self): # GH3155 @@ -2429,17 +2549,14 @@ def test_expanding_cov_pairwise(self): rolling_result = self.frame.rolling(window=len(self.frame), min_periods=1).corr() - for i in result.items: - tm.assert_almost_equal(result[i], rolling_result[i]) + tm.assert_frame_equal(result, rolling_result) def test_expanding_corr_pairwise(self): result = self.frame.expanding().corr() rolling_result = self.frame.rolling(window=len(self.frame), min_periods=1).corr() - - for i in result.items: - tm.assert_almost_equal(result[i], rolling_result[i]) + tm.assert_frame_equal(result, rolling_result) def test_expanding_cov_diff_index(self): # GH 7512 @@ -2507,8 +2624,6 @@ def test_rolling_functions_window_non_shrinkage(self): s_expected = Series(np.nan, index=s.index) df = DataFrame([[1, 5], [3, 2], [3, 9], [-1, 0]], columns=['A', 'B']) df_expected = DataFrame(np.nan, index=df.index, columns=df.columns) - df_expected_panel = Panel(items=df.index, major_axis=df.columns, - minor_axis=df.columns) functions = [lambda x: (x.rolling(window=10, min_periods=5) .cov(x, pairwise=False)), @@ -2540,13 +2655,24 @@ def test_rolling_functions_window_non_shrinkage(self): # scipy needed for rolling_window continue + def test_rolling_functions_window_non_shrinkage_binary(self): + + # corr/cov return a MI DataFrame + df = DataFrame([[1, 5], [3, 2], [3, 9], [-1, 0]], + columns=Index(['A', 'B'], name='foo'), + index=Index(range(4), name='bar')) + df_expected = DataFrame( + columns=Index(['A', 'B'], name='foo'), + index=pd.MultiIndex.from_product([df.index, df.columns], + names=['bar', 'foo']), + dtype='float64') functions = [lambda x: (x.rolling(window=10, min_periods=5) .cov(x, pairwise=True)), lambda x: (x.rolling(window=10, min_periods=5) .corr(x, pairwise=True))] for f in functions: - df_result_panel = f(df) - tm.assert_panel_equal(df_result_panel, df_expected_panel) + df_result = f(df) + tm.assert_frame_equal(df_result, df_expected) def test_moment_functions_zero_length(self): # GH 8056 @@ -2554,13 +2680,9 @@ def test_moment_functions_zero_length(self): s_expected = s df1 = DataFrame() df1_expected = df1 - df1_expected_panel = Panel(items=df1.index, major_axis=df1.columns, - minor_axis=df1.columns) df2 = DataFrame(columns=['a']) df2['a'] = df2['a'].astype('float64') df2_expected = df2 - df2_expected_panel = Panel(items=df2.index, major_axis=df2.columns, - minor_axis=df2.columns) functions = [lambda x: x.expanding().count(), lambda x: x.expanding(min_periods=5).cov( @@ -2613,6 +2735,23 @@ def test_moment_functions_zero_length(self): # scipy needed for rolling_window continue + def test_moment_functions_zero_length_pairwise(self): + + df1 = DataFrame() + df1_expected = df1 + df2 = DataFrame(columns=Index(['a'], name='foo'), + index=Index([], name='bar')) + df2['a'] = df2['a'].astype('float64') + + df1_expected = DataFrame( + index=pd.MultiIndex.from_product([df1.index, df1.columns]), + columns=Index([])) + df2_expected = DataFrame( + index=pd.MultiIndex.from_product([df2.index, df2.columns], + names=['bar', 'foo']), + columns=Index(['a'], name='foo'), + dtype='float64') + functions = [lambda x: (x.expanding(min_periods=5) .cov(x, pairwise=True)), lambda x: (x.expanding(min_periods=5) @@ -2623,24 +2762,33 @@ def test_moment_functions_zero_length(self): .corr(x, pairwise=True)), ] for f in functions: - df1_result_panel = f(df1) - tm.assert_panel_equal(df1_result_panel, df1_expected_panel) + df1_result = f(df1) + tm.assert_frame_equal(df1_result, df1_expected) - df2_result_panel = f(df2) - tm.assert_panel_equal(df2_result_panel, df2_expected_panel) + df2_result = f(df2) + tm.assert_frame_equal(df2_result, df2_expected) def test_expanding_cov_pairwise_diff_length(self): # GH 7512 - df1 = DataFrame([[1, 5], [3, 2], [3, 9]], columns=['A', 'B']) - df1a = DataFrame([[1, 5], [3, 9]], index=[0, 2], columns=['A', 'B']) - df2 = DataFrame([[5, 6], [None, None], [2, 1]], columns=['X', 'Y']) - df2a = DataFrame([[5, 6], [2, 1]], index=[0, 2], columns=['X', 'Y']) - result1 = df1.expanding().cov(df2a, pairwise=True)[2] - result2 = df1.expanding().cov(df2a, pairwise=True)[2] - result3 = df1a.expanding().cov(df2, pairwise=True)[2] - result4 = df1a.expanding().cov(df2a, pairwise=True)[2] - expected = DataFrame([[-3., -5.], [-6., -10.]], index=['A', 'B'], - columns=['X', 'Y']) + df1 = DataFrame([[1, 5], [3, 2], [3, 9]], + columns=Index(['A', 'B'], name='foo')) + df1a = DataFrame([[1, 5], [3, 9]], + index=[0, 2], + columns=Index(['A', 'B'], name='foo')) + df2 = DataFrame([[5, 6], [None, None], [2, 1]], + columns=Index(['X', 'Y'], name='foo')) + df2a = DataFrame([[5, 6], [2, 1]], + index=[0, 2], + columns=Index(['X', 'Y'], name='foo')) + # TODO: xref gh-15826 + # .loc is not preserving the names + result1 = df1.expanding().cov(df2a, pairwise=True).loc[2] + result2 = df1.expanding().cov(df2a, pairwise=True).loc[2] + result3 = df1a.expanding().cov(df2, pairwise=True).loc[2] + result4 = df1a.expanding().cov(df2a, pairwise=True).loc[2] + expected = DataFrame([[-3.0, -6.0], [-5.0, -10.0]], + columns=Index(['A', 'B'], name='foo'), + index=Index(['X', 'Y'], name='foo')) tm.assert_frame_equal(result1, expected) tm.assert_frame_equal(result2, expected) tm.assert_frame_equal(result3, expected) @@ -2648,149 +2796,30 @@ def test_expanding_cov_pairwise_diff_length(self): def test_expanding_corr_pairwise_diff_length(self): # GH 7512 - df1 = DataFrame([[1, 2], [3, 2], [3, 4]], columns=['A', 'B']) - df1a = DataFrame([[1, 2], [3, 4]], index=[0, 2], columns=['A', 'B']) - df2 = DataFrame([[5, 6], [None, None], [2, 1]], columns=['X', 'Y']) - df2a = DataFrame([[5, 6], [2, 1]], index=[0, 2], columns=['X', 'Y']) - result1 = df1.expanding().corr(df2, pairwise=True)[2] - result2 = df1.expanding().corr(df2a, pairwise=True)[2] - result3 = df1a.expanding().corr(df2, pairwise=True)[2] - result4 = df1a.expanding().corr(df2a, pairwise=True)[2] - expected = DataFrame([[-1.0, -1.0], [-1.0, -1.0]], index=['A', 'B'], - columns=['X', 'Y']) + df1 = DataFrame([[1, 2], [3, 2], [3, 4]], + columns=['A', 'B'], + index=Index(range(3), name='bar')) + df1a = DataFrame([[1, 2], [3, 4]], + index=Index([0, 2], name='bar'), + columns=['A', 'B']) + df2 = DataFrame([[5, 6], [None, None], [2, 1]], + columns=['X', 'Y'], + index=Index(range(3), name='bar')) + df2a = DataFrame([[5, 6], [2, 1]], + index=Index([0, 2], name='bar'), + columns=['X', 'Y']) + result1 = df1.expanding().corr(df2, pairwise=True).loc[2] + result2 = df1.expanding().corr(df2a, pairwise=True).loc[2] + result3 = df1a.expanding().corr(df2, pairwise=True).loc[2] + result4 = df1a.expanding().corr(df2a, pairwise=True).loc[2] + expected = DataFrame([[-1.0, -1.0], [-1.0, -1.0]], + columns=['A', 'B'], + index=Index(['X', 'Y'])) tm.assert_frame_equal(result1, expected) tm.assert_frame_equal(result2, expected) tm.assert_frame_equal(result3, expected) tm.assert_frame_equal(result4, expected) - def test_pairwise_stats_column_names_order(self): - # GH 7738 - df1s = [DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], columns=[0, 1]), - DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], columns=[1, 0]), - DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], columns=[1, 1]), - DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], - columns=['C', 'C']), - DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], columns=[1., 0]), - DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], columns=[0., 1]), - DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], columns=['C', 1]), - DataFrame([[2., 4.], [1., 2.], [5., 2.], [8., 1.]], - columns=[1, 0.]), - DataFrame([[2, 4.], [1, 2.], [5, 2.], [8, 1.]], - columns=[0, 1.]), - DataFrame([[2, 4], [1, 2], [5, 2], [8, 1.]], - columns=[1., 'X']), ] - df2 = DataFrame([[None, 1, 1], [None, 1, 2], - [None, 3, 2], [None, 8, 1]], columns=['Y', 'Z', 'X']) - s = Series([1, 1, 3, 8]) - - # suppress warnings about incomparable objects, as we are deliberately - # testing with such column labels - with warnings.catch_warnings(): - warnings.filterwarnings("ignore", - message=".*incomparable objects.*", - category=RuntimeWarning) - - # DataFrame methods (which do not call _flex_binary_moment()) - for f in [lambda x: x.cov(), lambda x: x.corr(), ]: - results = [f(df) for df in df1s] - for (df, result) in zip(df1s, results): - tm.assert_index_equal(result.index, df.columns) - tm.assert_index_equal(result.columns, df.columns) - for i, result in enumerate(results): - if i > 0: - # compare internal values, as columns can be different - self.assert_numpy_array_equal(result.values, - results[0].values) - - # DataFrame with itself, pairwise=True - for f in [lambda x: x.expanding().cov(pairwise=True), - lambda x: x.expanding().corr(pairwise=True), - lambda x: x.rolling(window=3).cov(pairwise=True), - lambda x: x.rolling(window=3).corr(pairwise=True), - lambda x: x.ewm(com=3).cov(pairwise=True), - lambda x: x.ewm(com=3).corr(pairwise=True), ]: - results = [f(df) for df in df1s] - for (df, result) in zip(df1s, results): - tm.assert_index_equal(result.items, df.index) - tm.assert_index_equal(result.major_axis, df.columns) - tm.assert_index_equal(result.minor_axis, df.columns) - for i, result in enumerate(results): - if i > 0: - self.assert_numpy_array_equal(result.values, - results[0].values) - - # DataFrame with itself, pairwise=False - for f in [lambda x: x.expanding().cov(pairwise=False), - lambda x: x.expanding().corr(pairwise=False), - lambda x: x.rolling(window=3).cov(pairwise=False), - lambda x: x.rolling(window=3).corr(pairwise=False), - lambda x: x.ewm(com=3).cov(pairwise=False), - lambda x: x.ewm(com=3).corr(pairwise=False), ]: - results = [f(df) for df in df1s] - for (df, result) in zip(df1s, results): - tm.assert_index_equal(result.index, df.index) - tm.assert_index_equal(result.columns, df.columns) - for i, result in enumerate(results): - if i > 0: - self.assert_numpy_array_equal(result.values, - results[0].values) - - # DataFrame with another DataFrame, pairwise=True - for f in [lambda x, y: x.expanding().cov(y, pairwise=True), - lambda x, y: x.expanding().corr(y, pairwise=True), - lambda x, y: x.rolling(window=3).cov(y, pairwise=True), - lambda x, y: x.rolling(window=3).corr(y, pairwise=True), - lambda x, y: x.ewm(com=3).cov(y, pairwise=True), - lambda x, y: x.ewm(com=3).corr(y, pairwise=True), ]: - results = [f(df, df2) for df in df1s] - for (df, result) in zip(df1s, results): - tm.assert_index_equal(result.items, df.index) - tm.assert_index_equal(result.major_axis, df.columns) - tm.assert_index_equal(result.minor_axis, df2.columns) - for i, result in enumerate(results): - if i > 0: - self.assert_numpy_array_equal(result.values, - results[0].values) - - # DataFrame with another DataFrame, pairwise=False - for f in [lambda x, y: x.expanding().cov(y, pairwise=False), - lambda x, y: x.expanding().corr(y, pairwise=False), - lambda x, y: x.rolling(window=3).cov(y, pairwise=False), - lambda x, y: x.rolling(window=3).corr(y, pairwise=False), - lambda x, y: x.ewm(com=3).cov(y, pairwise=False), - lambda x, y: x.ewm(com=3).corr(y, pairwise=False), ]: - results = [f(df, df2) if df.columns.is_unique else None - for df in df1s] - for (df, result) in zip(df1s, results): - if result is not None: - expected_index = df.index.union(df2.index) - expected_columns = df.columns.union(df2.columns) - tm.assert_index_equal(result.index, expected_index) - tm.assert_index_equal(result.columns, expected_columns) - else: - tm.assertRaisesRegexp( - ValueError, "'arg1' columns are not unique", f, df, - df2) - tm.assertRaisesRegexp( - ValueError, "'arg2' columns are not unique", f, - df2, df) - - # DataFrame with a Series - for f in [lambda x, y: x.expanding().cov(y), - lambda x, y: x.expanding().corr(y), - lambda x, y: x.rolling(window=3).cov(y), - lambda x, y: x.rolling(window=3).corr(y), - lambda x, y: x.ewm(com=3).cov(y), - lambda x, y: x.ewm(com=3).corr(y), ]: - results = [f(df, s) for df in df1s] + [f(s, df) for df in df1s] - for (df, result) in zip(df1s, results): - tm.assert_index_equal(result.index, df.index) - tm.assert_index_equal(result.columns, df.columns) - for i, result in enumerate(results): - if i > 0: - self.assert_numpy_array_equal(result.values, - results[0].values) - def test_rolling_skew_edge_cases(self): all_nan = Series([np.NaN] * 5) diff --git a/pandas/tests/tools/test_concat.py b/pandas/tests/tools/test_concat.py index c41924a7987bd..623c5fa02fcb2 100644 --- a/pandas/tests/tools/test_concat.py +++ b/pandas/tests/tools/test_concat.py @@ -1,4 +1,5 @@ from warnings import catch_warnings + import numpy as np from numpy.random import randn @@ -1283,8 +1284,9 @@ def test_concat_mixed_objs(self): assert_frame_equal(result, expected) # invalid concatente of mixed dims - panel = tm.makePanel() - self.assertRaises(ValueError, lambda: concat([panel, s1], axis=1)) + with catch_warnings(record=True): + panel = tm.makePanel() + self.assertRaises(ValueError, lambda: concat([panel, s1], axis=1)) def test_empty_dtype_coerce(self): @@ -1322,56 +1324,59 @@ def test_dtype_coerceion(self): tm.assert_series_equal(result.dtypes, df.dtypes) def test_panel_concat_other_axes(self): - panel = tm.makePanel() + with catch_warnings(record=True): + panel = tm.makePanel() - p1 = panel.iloc[:, :5, :] - p2 = panel.iloc[:, 5:, :] + p1 = panel.iloc[:, :5, :] + p2 = panel.iloc[:, 5:, :] - result = concat([p1, p2], axis=1) - tm.assert_panel_equal(result, panel) + result = concat([p1, p2], axis=1) + tm.assert_panel_equal(result, panel) - p1 = panel.iloc[:, :, :2] - p2 = panel.iloc[:, :, 2:] + p1 = panel.iloc[:, :, :2] + p2 = panel.iloc[:, :, 2:] - result = concat([p1, p2], axis=2) - tm.assert_panel_equal(result, panel) + result = concat([p1, p2], axis=2) + tm.assert_panel_equal(result, panel) - # if things are a bit misbehaved - p1 = panel.iloc[:2, :, :2] - p2 = panel.iloc[:, :, 2:] - p1['ItemC'] = 'baz' + # if things are a bit misbehaved + p1 = panel.iloc[:2, :, :2] + p2 = panel.iloc[:, :, 2:] + p1['ItemC'] = 'baz' - result = concat([p1, p2], axis=2) + result = concat([p1, p2], axis=2) - expected = panel.copy() - expected['ItemC'] = expected['ItemC'].astype('O') - expected.loc['ItemC', :, :2] = 'baz' - tm.assert_panel_equal(result, expected) + expected = panel.copy() + expected['ItemC'] = expected['ItemC'].astype('O') + expected.loc['ItemC', :, :2] = 'baz' + tm.assert_panel_equal(result, expected) def test_panel_concat_buglet(self): - # #2257 - def make_panel(): - index = 5 - cols = 3 - - def df(): - return DataFrame(np.random.randn(index, cols), - index=["I%s" % i for i in range(index)], - columns=["C%s" % i for i in range(cols)]) - return Panel(dict([("Item%s" % x, df()) for x in ['A', 'B', 'C']])) - - panel1 = make_panel() - panel2 = make_panel() - - panel2 = panel2.rename_axis(dict([(x, "%s_1" % x) - for x in panel2.major_axis]), - axis=1) - - panel3 = panel2.rename_axis(lambda x: '%s_1' % x, axis=1) - panel3 = panel3.rename_axis(lambda x: '%s_1' % x, axis=2) - - # it works! - concat([panel1, panel3], axis=1, verify_integrity=True) + with catch_warnings(record=True): + # #2257 + def make_panel(): + index = 5 + cols = 3 + + def df(): + return DataFrame(np.random.randn(index, cols), + index=["I%s" % i for i in range(index)], + columns=["C%s" % i for i in range(cols)]) + return Panel(dict([("Item%s" % x, df()) + for x in ['A', 'B', 'C']])) + + panel1 = make_panel() + panel2 = make_panel() + + panel2 = panel2.rename_axis(dict([(x, "%s_1" % x) + for x in panel2.major_axis]), + axis=1) + + panel3 = panel2.rename_axis(lambda x: '%s_1' % x, axis=1) + panel3 = panel3.rename_axis(lambda x: '%s_1' % x, axis=2) + + # it works! + concat([panel1, panel3], axis=1, verify_integrity=True) def test_panel4d_concat(self): with catch_warnings(record=True): diff --git a/pandas/tests/types/test_missing.py b/pandas/tests/types/test_missing.py index 2e35f5c1badbb..efd6dda02ab4b 100644 --- a/pandas/tests/types/test_missing.py +++ b/pandas/tests/types/test_missing.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- +from warnings import catch_warnings import numpy as np from datetime import datetime from pandas.util import testing as tm @@ -76,14 +77,15 @@ def test_isnull(self): tm.assert_frame_equal(result, expected) # panel - for p in [tm.makePanel(), tm.makePeriodPanel(), - tm.add_nans(tm.makePanel())]: - result = isnull(p) - expected = p.apply(isnull) - tm.assert_panel_equal(result, expected) + with catch_warnings(record=True): + for p in [tm.makePanel(), tm.makePeriodPanel(), + tm.add_nans(tm.makePanel())]: + result = isnull(p) + expected = p.apply(isnull) + tm.assert_panel_equal(result, expected) # panel 4d - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): for p in [tm.makePanel4D(), tm.add_nans_panel4d(tm.makePanel4D())]: result = isnull(p) expected = p.apply(isnull) diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 9a9f3c6c6b945..9d7b004374318 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -1747,8 +1747,10 @@ def makePeriodPanel(nper=None): def makePanel4D(nper=None): - return Panel4D(dict(l1=makePanel(nper), l2=makePanel(nper), - l3=makePanel(nper))) + with warnings.catch_warnings(record=True): + d = dict(l1=makePanel(nper), l2=makePanel(nper), + l3=makePanel(nper)) + return Panel4D(d) def makeCustomIndex(nentries, nlevels, prefix='#', names=False, ndupe_l=None, From 5d17a94506c9234abb6578e232a5bb4a921c851c Mon Sep 17 00:00:00 2001 From: gfyoung Date: Fri, 7 Apr 2017 15:47:29 -0400 Subject: [PATCH 353/933] ENH: Support malformed row handling in Python engine (#15925) --- doc/source/io.rst | 4 +- doc/source/whatsnew/v0.20.0.txt | 3 +- pandas/io/parsers.py | 168 ++++++++++++------- pandas/tests/io/parser/common.py | 42 ++++- pandas/tests/io/parser/python_parser_only.py | 9 +- 5 files changed, 155 insertions(+), 71 deletions(-) diff --git a/doc/source/io.rst b/doc/source/io.rst index 5cec27c329a7f..f4676f3ad964e 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -342,11 +342,11 @@ error_bad_lines : boolean, default ``True`` Lines with too many fields (e.g. a csv line with too many commas) will by default cause an exception to be raised, and no DataFrame will be returned. If ``False``, then these "bad lines" will dropped from the DataFrame that is - returned (only valid with C parser). See :ref:`bad lines ` + returned. See :ref:`bad lines ` below. warn_bad_lines : boolean, default ``True`` If error_bad_lines is ``False``, and warn_bad_lines is ``True``, a warning for - each "bad line" will be output (only valid with C parser). + each "bad line" will be output. .. _io.dtypes: diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 132f20cb73142..d3207ffa86c6a 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -368,9 +368,10 @@ Other Enhancements - ``pandas.io.json.json_normalize()`` gained the option ``errors='ignore'|'raise'``; the default is ``errors='raise'`` which is backward compatible. (:issue:`14583`) - ``pandas.io.json.json_normalize()`` with an empty ``list`` will return an empty ``DataFrame`` (:issue:`15534`) - ``pandas.io.json.json_normalize()`` has gained a ``sep`` option that accepts ``str`` to separate joined fields; the default is ".", which is backward compatible. (:issue:`14883`) -- ``pd.read_csv()`` will now raise a ``csv.Error`` error whenever an end-of-file character is encountered in the middle of a data row (:issue:`15913`) - A new function has been added to a ``MultiIndex`` to facilitate :ref:`Removing Unused Levels `. (:issue:`15694`) - :func:`MultiIndex.remove_unused_levels` has been added to facilitate :ref:`removing unused levels `. (:issue:`15694`) +- ``pd.read_csv()`` will now raise a ``ParserError`` error whenever any parsing error occurs (:issue:`15913`, :issue:`15925`) +- ``pd.read_csv()`` now supports the ``error_bad_lines`` and ``warn_bad_lines`` arguments for the Python parser (:issue:`15925`) .. _ISO 8601 duration: https://en.wikipedia.org/wiki/ISO_8601#Durations diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index a85f9cda50879..10f8c53987471 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -263,10 +263,10 @@ Lines with too many fields (e.g. a csv line with too many commas) will by default cause an exception to be raised, and no DataFrame will be returned. If False, then these "bad lines" will dropped from the DataFrame that is - returned. (Only valid with C parser) + returned. warn_bad_lines : boolean, default True If error_bad_lines is False, and warn_bad_lines is True, a warning for each - "bad line" will be output. (Only valid with C parser). + "bad line" will be output. low_memory : boolean, default True Internally process the file in chunks, resulting in lower memory use while parsing, but possibly mixed type inference. To ensure no mixed @@ -485,8 +485,6 @@ def _read(filepath_or_buffer, kwds): _python_unsupported = set([ 'low_memory', 'buffer_lines', - 'error_bad_lines', - 'warn_bad_lines', 'float_precision', ]) _deprecated_args = set([ @@ -1897,6 +1895,9 @@ def __init__(self, f, **kwds): self.usecols, _ = _validate_usecols_arg(kwds['usecols']) self.skip_blank_lines = kwds['skip_blank_lines'] + self.warn_bad_lines = kwds['warn_bad_lines'] + self.error_bad_lines = kwds['error_bad_lines'] + self.names_passed = kwds['names'] or None self.na_filter = kwds['na_filter'] @@ -2469,16 +2470,19 @@ def _next_line(self): next(self.data) while True: - orig_line = self._next_iter_line() - line = self._check_comments([orig_line])[0] + orig_line = self._next_iter_line(row_num=self.pos + 1) self.pos += 1 - if (not self.skip_blank_lines and - (self._empty(orig_line) or line)): - break - elif self.skip_blank_lines: - ret = self._check_empty([line]) - if ret: - line = ret[0] + + if orig_line is not None: + line = self._check_comments([orig_line])[0] + + if self.skip_blank_lines: + ret = self._check_empty([line]) + + if ret: + line = ret[0] + break + elif self._empty(orig_line) or line: break # This was the first line of the file, @@ -2491,7 +2495,28 @@ def _next_line(self): self.buf.append(line) return line - def _next_iter_line(self, **kwargs): + def _alert_malformed(self, msg, row_num): + """ + Alert a user about a malformed row. + + If `self.error_bad_lines` is True, the alert will be `ParserError`. + If `self.warn_bad_lines` is True, the alert will be printed out. + + Parameters + ---------- + msg : The error message to display. + row_num : The row number where the parsing error occurred. + Because this row number is displayed, we 1-index, + even though we 0-index internally. + """ + + if self.error_bad_lines: + raise ParserError(msg) + elif self.warn_bad_lines: + base = 'Skipping line {row_num}: '.format(row_num=row_num) + sys.stderr.write(base + msg + '\n') + + def _next_iter_line(self, row_num): """ Wrapper around iterating through `self.data` (CSV source). @@ -2501,32 +2526,34 @@ def _next_iter_line(self, **kwargs): Parameters ---------- - kwargs : Keyword arguments used to customize the error message. + row_num : The row number of the line being parsed. """ try: return next(self.data) except csv.Error as e: - msg = str(e) - - if 'NULL byte' in msg: - msg = ('NULL byte detected. This byte ' - 'cannot be processed in Python\'s ' - 'native csv library at the moment, ' - 'so please pass in engine=\'c\' instead') - elif 'newline inside string' in msg: - msg = ('EOF inside string starting with ' - 'line ' + str(kwargs['row_num'])) - - if self.skipfooter > 0: - reason = ('Error could possibly be due to ' - 'parsing errors in the skipped footer rows ' - '(the skipfooter keyword is only applied ' - 'after Python\'s csv library has parsed ' - 'all rows).') - msg += '. ' + reason - - raise csv.Error(msg) + if self.warn_bad_lines or self.error_bad_lines: + msg = str(e) + + if 'NULL byte' in msg: + msg = ('NULL byte detected. This byte ' + 'cannot be processed in Python\'s ' + 'native csv library at the moment, ' + 'so please pass in engine=\'c\' instead') + elif 'newline inside string' in msg: + msg = ('EOF inside string starting with ' + 'line ' + str(row_num)) + + if self.skipfooter > 0: + reason = ('Error could possibly be due to ' + 'parsing errors in the skipped footer rows ' + '(the skipfooter keyword is only applied ' + 'after Python\'s csv library has parsed ' + 'all rows).') + msg += '. ' + reason + + self._alert_malformed(msg, row_num) + return None def _check_comments(self, lines): if self.comment is None: @@ -2657,42 +2684,57 @@ def _get_index_name(self, columns): return index_name, orig_names, columns def _rows_to_cols(self, content): + if self.skipfooter < 0: + raise ValueError('skip footer cannot be negative') + col_len = self.num_original_columns if self._implicit_index: col_len += len(self.index_col) - # see gh-13320 - zipped_content = list(lib.to_object_array( - content, min_width=col_len).T) - zip_len = len(zipped_content) - - if self.skipfooter < 0: - raise ValueError('skip footer cannot be negative') + max_len = max([len(row) for row in content]) - # Loop through rows to verify lengths are correct. - if (col_len != zip_len and + # Check that there are no rows with too many + # elements in their row (rows with too few + # elements are padded with NaN). + if (max_len > col_len and self.index_col is not False and self.usecols is None): - i = 0 - for (i, l) in enumerate(content): - if len(l) != col_len: - break - footers = 0 - if self.skipfooter: - footers = self.skipfooter + footers = self.skipfooter if self.skipfooter else 0 + bad_lines = [] - row_num = self.pos - (len(content) - i + footers) + iter_content = enumerate(content) + content_len = len(content) + content = [] - msg = ('Expected %d fields in line %d, saw %d' % - (col_len, row_num + 1, zip_len)) - if len(self.delimiter) > 1 and self.quoting != csv.QUOTE_NONE: - # see gh-13374 - reason = ('Error could possibly be due to quotes being ' - 'ignored when a multi-char delimiter is used.') - msg += '. ' + reason - raise ValueError(msg) + for (i, l) in iter_content: + actual_len = len(l) + + if actual_len > col_len: + if self.error_bad_lines or self.warn_bad_lines: + row_num = self.pos - (content_len - i + footers) + bad_lines.append((row_num, actual_len)) + + if self.error_bad_lines: + break + else: + content.append(l) + + for row_num, actual_len in bad_lines: + msg = ('Expected %d fields in line %d, saw %d' % + (col_len, row_num + 1, actual_len)) + if len(self.delimiter) > 1 and self.quoting != csv.QUOTE_NONE: + # see gh-13374 + reason = ('Error could possibly be due to quotes being ' + 'ignored when a multi-char delimiter is used.') + msg += '. ' + reason + + self._alert_malformed(msg, row_num + 1) + + # see gh-13320 + zipped_content = list(lib.to_object_array( + content, min_width=col_len).T) if self.usecols: if self._implicit_index: @@ -2750,10 +2792,12 @@ def _get_lines(self, rows=None): while True: new_row = self._next_iter_line( - row_num=self.pos + rows) - new_rows.append(new_row) + row_num=self.pos + rows + 1) rows += 1 + if new_row is not None: + new_rows.append(new_row) + except StopIteration: if self.skiprows: new_rows = [row for i, row in enumerate(new_rows) diff --git a/pandas/tests/io/parser/common.py b/pandas/tests/io/parser/common.py index 36d5f2dd5274b..ee0f00506cef3 100644 --- a/pandas/tests/io/parser/common.py +++ b/pandas/tests/io/parser/common.py @@ -19,7 +19,7 @@ from pandas import compat from pandas.compat import (StringIO, BytesIO, PY3, range, lrange, u) -from pandas.errors import DtypeWarning, EmptyDataError +from pandas.errors import DtypeWarning, EmptyDataError, ParserError from pandas.io.common import URLError from pandas.io.parsers import TextFileReader, TextParser @@ -1569,7 +1569,7 @@ def test_null_byte_char(self): tm.assert_frame_equal(out, expected) else: msg = "NULL byte detected" - with tm.assertRaisesRegexp(csv.Error, msg): + with tm.assertRaisesRegexp(ParserError, msg): self.read_csv(StringIO(data), names=cols) def test_utf8_bom(self): @@ -1695,3 +1695,41 @@ class InvalidBuffer(object): with tm.assertRaisesRegexp(ValueError, msg): self.read_csv(mock.Mock()) + + def test_skip_bad_lines(self): + # see gh-15925 + data = 'a\n1\n1,2,3\n4\n5,6,7' + + with tm.assertRaises(ParserError): + self.read_csv(StringIO(data)) + + with tm.assertRaises(ParserError): + self.read_csv(StringIO(data), error_bad_lines=True) + + stderr = sys.stderr + expected = DataFrame({'a': [1, 4]}) + + sys.stderr = StringIO() + try: + out = self.read_csv(StringIO(data), + error_bad_lines=False, + warn_bad_lines=False) + tm.assert_frame_equal(out, expected) + + val = sys.stderr.getvalue() + self.assertEqual(val, '') + finally: + sys.stderr = stderr + + sys.stderr = StringIO() + try: + out = self.read_csv(StringIO(data), + error_bad_lines=False, + warn_bad_lines=True) + tm.assert_frame_equal(out, expected) + + val = sys.stderr.getvalue() + self.assertTrue('Skipping line 3' in val) + self.assertTrue('Skipping line 5' in val) + finally: + sys.stderr = stderr diff --git a/pandas/tests/io/parser/python_parser_only.py b/pandas/tests/io/parser/python_parser_only.py index 36356315419c4..9a1eb94270e28 100644 --- a/pandas/tests/io/parser/python_parser_only.py +++ b/pandas/tests/io/parser/python_parser_only.py @@ -14,6 +14,7 @@ import pandas.util.testing as tm from pandas import DataFrame, Index from pandas import compat +from pandas.errors import ParserError from pandas.compat import StringIO, BytesIO, u @@ -213,13 +214,13 @@ def test_multi_char_sep_quotes(self): data = 'a,,b\n1,,a\n2,,"2,,b"' msg = 'ignored when a multi-char delimiter is used' - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assertRaisesRegexp(ParserError, msg): self.read_csv(StringIO(data), sep=',,') # We expect no match, so there should be an assertion # error out of the inner context manager. with tm.assertRaises(AssertionError): - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assertRaisesRegexp(ParserError, msg): self.read_csv(StringIO(data), sep=',,', quoting=csv.QUOTE_NONE) @@ -231,11 +232,11 @@ def test_skipfooter_bad_row(self): for data in ('a\n1\n"b"a', 'a,b,c\ncat,foo,bar\ndog,foo,"baz'): - with tm.assertRaisesRegexp(csv.Error, msg): + with tm.assertRaisesRegexp(ParserError, msg): self.read_csv(StringIO(data), skipfooter=1) # We expect no match, so there should be an assertion # error out of the inner context manager. with tm.assertRaises(AssertionError): - with tm.assertRaisesRegexp(csv.Error, msg): + with tm.assertRaisesRegexp(ParserError, msg): self.read_csv(StringIO(data)) From 6d90a436674a1e285d4577553b5cb75906a1bd27 Mon Sep 17 00:00:00 2001 From: "Christopher C. Aycock" Date: Fri, 7 Apr 2017 16:37:41 -0400 Subject: [PATCH 354/933] BUG: use entire size of DatetimeTZBlock when coercing result (#15855) (#15924) * BUG: use entire size of DatetimeTZBlock when coercing result (#15855) * Moved test * Removed unnecessary 'self' * Removed unnecessary 'self', again --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/core/internals.py | 2 +- pandas/tests/frame/test_missing.py | 14 ++++++++++++++ pandas/tests/series/test_missing.py | 12 ++++++++++++ 4 files changed, 28 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index d3207ffa86c6a..0b98e57c606a3 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -1144,6 +1144,7 @@ Conversion - Bug in ``DataFrame.fillna()`` where the argument ``downcast`` was ignored when fillna value was of type ``dict`` (:issue:`15277`) - Bug in ``.asfreq()``, where frequency was not set for empty ``Series`` (:issue:`14320`) - Bug in ``DataFrame`` construction with nulls and datetimes in a list-like (:issue:`15869`) +- Bug in ``DataFrame.fillna()`` with tz-aware datetimes (:issue:`15855`) Indexing ^^^^^^^^ diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 8db801f8e7212..57361886eab8c 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -2475,7 +2475,7 @@ def _try_coerce_result(self, result): if isinstance(result, np.ndarray): # allow passing of > 1dim if its trivial if result.ndim > 1: - result = result.reshape(len(result)) + result = result.reshape(np.prod(result.shape)) result = self.values._shallow_copy(result) return result diff --git a/pandas/tests/frame/test_missing.py b/pandas/tests/frame/test_missing.py index 93c3ba78a0abf..eacf032bbcc85 100644 --- a/pandas/tests/frame/test_missing.py +++ b/pandas/tests/frame/test_missing.py @@ -257,6 +257,20 @@ def test_fillna(self): result = df.fillna(value={'Date': df['Date2']}) assert_frame_equal(result, expected) + # with timezone + # GH 15855 + df = pd.DataFrame({'A': [pd.Timestamp('2012-11-11 00:00:00+01:00'), + pd.NaT]}) + exp = pd.DataFrame({'A': [pd.Timestamp('2012-11-11 00:00:00+01:00'), + pd.Timestamp('2012-11-11 00:00:00+01:00')]}) + assert_frame_equal(df.fillna(method='pad'), exp) + + df = pd.DataFrame({'A': [pd.NaT, + pd.Timestamp('2012-11-11 00:00:00+01:00')]}) + exp = pd.DataFrame({'A': [pd.Timestamp('2012-11-11 00:00:00+01:00'), + pd.Timestamp('2012-11-11 00:00:00+01:00')]}) + assert_frame_equal(df.fillna(method='bfill'), exp) + def test_fillna_downcast(self): # GH 15277 # infer int64 from float64 diff --git a/pandas/tests/series/test_missing.py b/pandas/tests/series/test_missing.py index 7174283494fe7..ea49abeee21c5 100644 --- a/pandas/tests/series/test_missing.py +++ b/pandas/tests/series/test_missing.py @@ -258,6 +258,18 @@ def test_datetime64_tz_fillna(self): self.assert_series_equal(expected, result) self.assert_series_equal(pd.isnull(s), null_loc) + # with timezone + # GH 15855 + df = pd.Series([pd.Timestamp('2012-11-11 00:00:00+01:00'), pd.NaT]) + exp = pd.Series([pd.Timestamp('2012-11-11 00:00:00+01:00'), + pd.Timestamp('2012-11-11 00:00:00+01:00')]) + assert_series_equal(df.fillna(method='pad'), exp) + + df = pd.Series([pd.NaT, pd.Timestamp('2012-11-11 00:00:00+01:00')]) + exp = pd.Series([pd.Timestamp('2012-11-11 00:00:00+01:00'), + pd.Timestamp('2012-11-11 00:00:00+01:00')]) + assert_series_equal(df.fillna(method='bfill'), exp) + def test_datetime64tz_fillna_round_issue(self): # GH 14872 From bc2fa160b9d281889b344e7bc15352998e7b0955 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Fri, 7 Apr 2017 18:42:29 -0400 Subject: [PATCH 355/933] BUG/DOC: Add documentation in types/common.py (#15941) * DOC: document internal methods in types/common.py Partially addresses gh-15895. * BUG: Catch TypeError when calling _get_dtype The following functions were not catching the TypeError raised by _get_dtype: 1) is_string_dtype 2) is_string_like_dtype 3) is_timedelta64_ns_dtype Thus, when "None" was passed in, an Exception was raised instead of returning False, as other functions did. * TST: use ids to have nice parameterized function names --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/tests/types/test_common.py | 27 ++++ pandas/types/common.py | 253 +++++++++++++++++++++++++++--- 3 files changed, 259 insertions(+), 22 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 0b98e57c606a3..436d51da6e873 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -1145,6 +1145,7 @@ Conversion - Bug in ``.asfreq()``, where frequency was not set for empty ``Series`` (:issue:`14320`) - Bug in ``DataFrame`` construction with nulls and datetimes in a list-like (:issue:`15869`) - Bug in ``DataFrame.fillna()`` with tz-aware datetimes (:issue:`15855`) +- Bug in ``is_string_dtype``, ``is_timedelta64_ns_dtype``, and ``is_string_like_dtype`` in which an error was raised when ``None`` was passed in (:issue:`15941`) Indexing ^^^^^^^^ diff --git a/pandas/tests/types/test_common.py b/pandas/tests/types/test_common.py index c15f219c8fad6..21772bab44d01 100644 --- a/pandas/tests/types/test_common.py +++ b/pandas/tests/types/test_common.py @@ -80,3 +80,30 @@ def test_dtype_equal_strict(): assert not is_dtype_equal( pandas_dtype('datetime64[ns, US/Eastern]'), pandas_dtype('datetime64[ns, CET]')) + + # see gh-15941: no exception should be raised + assert not is_dtype_equal(None, None) + + +def get_is_dtype_funcs(): + """ + Get all functions in pandas.types.common that + begin with 'is_' and end with 'dtype' + + """ + import pandas.types.common as com + + fnames = [f for f in dir(com) if (f.startswith('is_') and + f.endswith('dtype'))] + return [getattr(com, fname) for fname in fnames] + + +@pytest.mark.parametrize('func', + get_is_dtype_funcs(), + ids=lambda x: x.__name__) +def test_get_dtype_error_catch(func): + # see gh-15941 + # + # No exception should be raised. + + assert not func(None) diff --git a/pandas/types/common.py b/pandas/types/common.py index 017805673defe..7ab2e068ac69f 100644 --- a/pandas/types/common.py +++ b/pandas/types/common.py @@ -31,6 +31,20 @@ def _ensure_float(arr): + """ + Ensure that an array object has a float dtype if possible. + + Parameters + ---------- + arr : ndarray, Series + The array whose data type we want to enforce as float. + + Returns + ------- + float_arr : The original array cast to the float dtype if + possible. Otherwise, the original array is returned. + """ + if issubclass(arr.dtype.type, (np.integer, np.bool_)): arr = arr.astype(float) return arr @@ -46,6 +60,20 @@ def _ensure_float(arr): def _ensure_categorical(arr): + """ + Ensure that an array-like object is a Categorical (if not already). + + Parameters + ---------- + arr : array-like + The array that we want to convert into a Categorical. + + Returns + ------- + cat_arr : The original array cast as a Categorical. If it already + is a Categorical, we return as is. + """ + if not is_categorical(arr): from pandas import Categorical arr = Categorical(arr) @@ -116,8 +144,40 @@ def is_categorical_dtype(arr_or_dtype): def is_string_dtype(arr_or_dtype): - dtype = _get_dtype(arr_or_dtype) - return dtype.kind in ('O', 'S', 'U') and not is_period_dtype(dtype) + """ + Check whether the provided array or dtype is of the string dtype. + + Parameters + ---------- + arr_or_dtype : ndarray, dtype, type + The array or dtype to check. + + Returns + ------- + boolean : Whether or not the array or dtype is of the string dtype. + + Examples + -------- + >>> is_string_dtype(str) + True + >>> is_string_dtype(object) + True + >>> is_string_dtype(int) + False + >>> + >>> is_string_dtype(np.array(['a', 'b'])) + True + >>> is_string_dtype(np.array([1, 2])) + False + """ + + # TODO: gh-15585: consider making the checks stricter. + + try: + dtype = _get_dtype(arr_or_dtype) + return dtype.kind in ('O', 'S', 'U') and not is_period_dtype(dtype) + except TypeError: + return False def is_period_arraylike(arr): @@ -209,8 +269,40 @@ def is_datetime64_ns_dtype(arr_or_dtype): def is_timedelta64_ns_dtype(arr_or_dtype): - tipo = _get_dtype(arr_or_dtype) - return tipo == _TD_DTYPE + """ + Check whether the provided array or dtype is of the timedelta64[ns] dtype. + + This is a very specific dtype, so generic ones like `np.timedelta64` + will return False if passed into this function. + + Parameters + ---------- + arr_or_dtype : ndarray, dtype, type + The array or dtype to check. + + Returns + ------- + boolean : Whether or not the array or dtype + is of the timedelta64[ns] dtype. + + Examples + -------- + >>> is_timedelta64_ns_dtype(np.dtype('m8[ns]') + True + >>> is_timedelta64_ns_dtype(np.dtype('m8[ps]') # Wrong frequency + False + >>> + >>> is_timedelta64_ns_dtype(np.array([1, 2], dtype='m8[ns]')) + True + >>> is_timedelta64_ns_dtype(np.array([1, 2], dtype=np.timedelta64)) + False + """ + + try: + tipo = _get_dtype(arr_or_dtype) + return tipo == _TD_DTYPE + except TypeError: + return False def is_datetime_or_timedelta_dtype(arr_or_dtype): @@ -220,10 +312,21 @@ def is_datetime_or_timedelta_dtype(arr_or_dtype): def _is_unorderable_exception(e): """ - return a boolean if we an unorderable exception error message + Check if the exception raised is an unorderable exception. - These are different error message for PY>=3<=3.5 and PY>=3.6 + The error message differs for 3 <= PY <= 3.5 and PY >= 3.6, so + we need to condition based on Python version. + + Parameters + ---------- + e : Exception or sub-class + The exception object to check. + + Returns + ------- + boolean : Whether or not the exception raised is an unorderable exception. """ + if PY36: return "'>' not supported between instances of" in str(e) @@ -302,9 +405,39 @@ def is_numeric_dtype(arr_or_dtype): def is_string_like_dtype(arr_or_dtype): - # exclude object as its a mixed dtype - dtype = _get_dtype(arr_or_dtype) - return dtype.kind in ('S', 'U') + """ + Check whether the provided array or dtype is of a string-like dtype. + + Unlike `is_string_dtype`, the object dtype is excluded because it + is a mixed dtype. + + Parameters + ---------- + arr_or_dtype : ndarray, dtype, type + The array or dtype to check. + + Returns + ------- + boolean : Whether or not the array or dtype is of the string dtype. + + Examples + -------- + >>> is_string_like_dtype(str) + True + >>> is_string_like_dtype(object) + False + >>> + >>> is_string_like_dtype(np.array(['a', 'b'])) + True + >>> is_string_like_dtype(np.array([1, 2])) + False + """ + + try: + dtype = _get_dtype(arr_or_dtype) + return dtype.kind in ('S', 'U') + except TypeError: + return False def is_float_dtype(arr_or_dtype): @@ -346,7 +479,22 @@ def is_complex_dtype(arr_or_dtype): def _coerce_to_dtype(dtype): - """ coerce a string / np.dtype to a dtype """ + """ + Coerce a string or np.dtype to a pandas or numpy + dtype if possible. + + If we cannot convert to a pandas dtype initially, + we convert to a numpy dtype. + + Parameters + ---------- + dtype : The dtype that we want to coerce. + + Returns + ------- + pd_or_np_dtype : The coerced dtype. + """ + if is_categorical_dtype(dtype): dtype = CategoricalDtype() elif is_datetime64tz_dtype(dtype): @@ -359,8 +507,27 @@ def _coerce_to_dtype(dtype): def _get_dtype(arr_or_dtype): + """ + Get the dtype instance associated with an array + or dtype object. + + Parameters + ---------- + arr_or_dtype : ndarray, Series, dtype, type + The array-like or dtype object whose dtype we want to extract. + + Returns + ------- + obj_dtype : The extract dtype instance from the + passed in array or dtype object. + + Raises + ------ + TypeError : The passed in object is None. + """ + if arr_or_dtype is None: - raise TypeError + raise TypeError("Cannot deduce dtype from null object") if isinstance(arr_or_dtype, np.dtype): return arr_or_dtype elif isinstance(arr_or_dtype, type): @@ -385,6 +552,21 @@ def _get_dtype(arr_or_dtype): def _get_dtype_type(arr_or_dtype): + """ + Get the type (NOT dtype) instance associated with + an array or dtype object. + + Parameters + ---------- + arr_or_dtype : ndarray, Series, dtype, type + The array-like or dtype object whose type we want to extract. + + Returns + ------- + obj_type : The extract type instance from the + passed in array or dtype object. + """ + if isinstance(arr_or_dtype, np.dtype): return arr_or_dtype.type elif isinstance(arr_or_dtype, type): @@ -410,16 +592,27 @@ def _get_dtype_type(arr_or_dtype): def _get_dtype_from_object(dtype): - """Get a numpy dtype.type-style object. This handles the datetime64[ns] - and datetime64[ns, TZ] compat + """ + Get a numpy dtype.type-style object for a dtype object. - Notes - ----- - If nothing can be found, returns ``object``. + This methods also includes handling of the datetime64[ns] and + datetime64[ns, TZ] objects. + + If no dtype can be found, we return ``object``. + + Parameters + ---------- + dtype : dtype, type + The dtype object whose numpy dtype.type-style + object we want to extract. + + Returns + ------- + dtype_object : The extracted numpy dtype.type-style object. """ - # type object from a dtype if isinstance(dtype, type) and issubclass(dtype, np.generic): + # Type object from a dtype return dtype elif is_categorical(dtype): return CategoricalDtype().type @@ -429,7 +622,7 @@ def _get_dtype_from_object(dtype): try: _validate_date_like_dtype(dtype) except TypeError: - # should still pass if we don't have a datelike + # Should still pass if we don't have a date-like pass return dtype.type elif isinstance(dtype, string_types): @@ -444,10 +637,11 @@ def _get_dtype_from_object(dtype): try: return _get_dtype_from_object(getattr(np, dtype)) except (AttributeError, TypeError): - # handles cases like _get_dtype(int) - # i.e., python objects that are valid dtypes (unlike user-defined - # types, in general) - # TypeError handles the float16 typecode of 'e' + # Handles cases like _get_dtype(int) i.e., + # Python objects that are valid dtypes + # (unlike user-defined types, in general) + # + # TypeError handles the float16 type code of 'e' # further handle internal types pass @@ -455,6 +649,21 @@ def _get_dtype_from_object(dtype): def _validate_date_like_dtype(dtype): + """ + Check whether the dtype is a date-like dtype. Raises an error if invalid. + + Parameters + ---------- + dtype : dtype, type + The dtype to check. + + Raises + ------ + TypeError : The dtype could not be casted to a date-like dtype. + ValueError : The dtype is an illegal date-like dtype (e.g. the + the frequency provided is too specific) + """ + try: typ = np.datetime_data(dtype)[0] except ValueError as e: From d60f490b4e764ab9af170adf4d88639014804fe6 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Fri, 7 Apr 2017 18:59:27 -0400 Subject: [PATCH 356/933] DOC: minor doc corrections --- ci/requirements-3.5_DOC.run | 1 + doc/source/computation.rst | 4 ++-- doc/source/dsintro.rst | 4 ++-- doc/source/whatsnew/v0.20.0.txt | 6 +++--- 4 files changed, 8 insertions(+), 7 deletions(-) diff --git a/ci/requirements-3.5_DOC.run b/ci/requirements-3.5_DOC.run index 644a16f51f4b6..740d4714e96b4 100644 --- a/ci/requirements-3.5_DOC.run +++ b/ci/requirements-3.5_DOC.run @@ -18,4 +18,5 @@ sqlalchemy numexpr bottleneck statsmodels +xarray pyqt=4.11.4 diff --git a/doc/source/computation.rst b/doc/source/computation.rst index 2423f1a342994..a37cbc96b2d8c 100644 --- a/doc/source/computation.rst +++ b/doc/source/computation.rst @@ -557,7 +557,7 @@ can even be omitted: correls.loc['2002-09-22':] You can efficiently retrieve the time series of correlations between two -columns using ``.loc`` indexing: +columns by reshaping and indexing: .. ipython:: python :suppress: @@ -567,7 +567,7 @@ columns using ``.loc`` indexing: .. ipython:: python @savefig rolling_corr_pairwise_ex.png - correls.loc[:, ('A', 'C')].plot() + correls.unstack(1)[('A', 'C')].plot() .. _stats.aggregate: diff --git a/doc/source/dsintro.rst b/doc/source/dsintro.rst index 2b11b23b1d1c2..0086cb0f94747 100644 --- a/doc/source/dsintro.rst +++ b/doc/source/dsintro.rst @@ -953,8 +953,8 @@ datatype support, and manipulation routines. As a result, supporting efficient i routines for ``Series``, ``DataFrame`` and ``Panel`` has contributed to an increasingly fragmented and difficult-to-understand codebase. -The 3-d structure of a ``Panel`` is much less common for many types of data analysis, -than the 1-d of the ``Series`` or the 2-D of the ``DataFrame``. Going forward it makes sense for +The 3-D structure of a ``Panel`` is much less common for many types of data analysis, +than the 1-D of the ``Series`` or the 2-D of the ``DataFrame``. Going forward it makes sense for pandas to focus on these areas exclusively. Oftentimes, one can simply use a MultiIndex ``DataFrame`` for easily working with higher dimensional data. diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 436d51da6e873..d571c0f2d9620 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -435,7 +435,7 @@ Deprecate Panel ^^^^^^^^^^^^^^^ ``Panel`` is deprecated and will be removed in a future version. The recommended way to represent 3-D data are -with a ``MultiIndex``on a ``DataFrame`` via the :meth:`~Panel.to_frame` or with the `xarray package `__. Pandas +with a ``MultiIndex`` on a ``DataFrame`` via the :meth:`~Panel.to_frame` or with the `xarray package `__. Pandas provides a :meth:`~Panel.to_xarray` method to automate this conversion. See the documentation :ref:`Deprecate Panel `. (:issue:`13563`). .. ipython:: python @@ -874,8 +874,8 @@ Window Binary Corr/Cov operations return a MultiIndex DataFrame A binary window operation, like ``.corr()`` or ``.cov()``, when operating on a ``.rolling(..)``, ``.expanding(..)``, or ``.ewm(..)`` object, will now return a 2-level ``MultiIndexed DataFrame`` rather than a ``Panel``, as ``Panel`` is now deprecated, -see :ref:`here <_whatsnew_0200.api_breaking.deprecate_panel>`. These are equivalent in function, -but MultiIndexed ``DataFrame`` s enjoy more support in pandas. +see :ref:`here `. These are equivalent in function, +but a MultiIndexed ``DataFrame`` enjoys more support in pandas. See the section on :ref:`Windowed Binary Operations ` for more information. (:issue:`15677`) .. ipython:: python From 11c6f488c12a0bb0da2f6f9bede9da75c581c2a2 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sat, 8 Apr 2017 09:22:08 -0400 Subject: [PATCH 357/933] DOC: whatsnew typos --- doc/source/whatsnew/v0.20.0.txt | 11 ++++++----- pandas/indexes/multi.py | 5 +++-- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index d571c0f2d9620..3952ac4a419c9 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -368,7 +368,6 @@ Other Enhancements - ``pandas.io.json.json_normalize()`` gained the option ``errors='ignore'|'raise'``; the default is ``errors='raise'`` which is backward compatible. (:issue:`14583`) - ``pandas.io.json.json_normalize()`` with an empty ``list`` will return an empty ``DataFrame`` (:issue:`15534`) - ``pandas.io.json.json_normalize()`` has gained a ``sep`` option that accepts ``str`` to separate joined fields; the default is ".", which is backward compatible. (:issue:`14883`) -- A new function has been added to a ``MultiIndex`` to facilitate :ref:`Removing Unused Levels `. (:issue:`15694`) - :func:`MultiIndex.remove_unused_levels` has been added to facilitate :ref:`removing unused levels `. (:issue:`15694`) - ``pd.read_csv()`` will now raise a ``ParserError`` error whenever any parsing error occurs (:issue:`15913`, :issue:`15925`) - ``pd.read_csv()`` now supports the ``error_bad_lines`` and ``warn_bad_lines`` arguments for the Python parser (:issue:`15925`) @@ -678,9 +677,9 @@ New Behavior: Pandas Google BigQuery support has moved ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -pandas has split off Google BigQuery support into a separate package ``pandas-gbq``. You can ``pip install pandas-gbq`` to get it. -The functionality of :func:`read_gbq` and :meth:`DataFrame.to_gbq` remain the same with the currently released version of ``pandas-gbq=0.1.3``. -Documentation is now hosted `here `__ (:issue:`15347`) +pandas has split off Google BigQuery support into a separate package ``pandas-gbq``. You can ``conda install pandas-gbq -c conda-forge`` or +``pip install pandas-gbq`` to get it. The functionality of :func:`read_gbq` and :meth:`DataFrame.to_gbq` remain the same with the +currently released version of ``pandas-gbq=0.1.4``. Documentation is now hosted `here `__ (:issue:`15347`) .. _whatsnew_0200.api_breaking.memory_usage: @@ -782,14 +781,16 @@ However, this example, which has a non-monotonic 2nd level, doesn't behave as desired. .. ipython:: python + df = pd.DataFrame( {'value': [1, 2, 3, 4]}, index=pd.MultiIndex(levels=[['a', 'b'], ['bb', 'aa']], labels=[[0, 0, 1, 1], [0, 1, 0, 1]])) + df Previous Behavior: -.. ipython:: python +.. code-block:: python In [11]: df.sort_index() Out[11]: diff --git a/pandas/indexes/multi.py b/pandas/indexes/multi.py index 77774f3284fef..3f84d8b292980 100644 --- a/pandas/indexes/multi.py +++ b/pandas/indexes/multi.py @@ -1260,8 +1260,9 @@ def remove_unused_levels(self): MultiIndex(levels=[[0, 1], ['a', 'b']], labels=[[1, 1], [0, 1]]) - # the 0 from the first level is not represented - # and can be removed + The 0 from the first level is not represented + and can be removed + >>> i[2:].remove_unused_levels() MultiIndex(levels=[[1], ['a', 'b']], labels=[[0, 0], [0, 1]]) From b28eb1016949f2ed038c713a69480ab9bcb04168 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Sat, 8 Apr 2017 09:24:43 -0400 Subject: [PATCH 358/933] BUG: Validate the skipfooter parameter in read_csv (#15945) Previously, the skipfooter parameter was assumed to be an integer, but that was not checked. --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/io/parsers.py | 36 +++++++++++++++++--- pandas/tests/io/parser/common.py | 2 +- pandas/tests/io/parser/python_parser_only.py | 28 ++++++++------- pandas/tests/io/parser/test_unsupported.py | 2 +- 5 files changed, 50 insertions(+), 19 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 3952ac4a419c9..4e29e01415ba6 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -1185,6 +1185,7 @@ I/O - Bug in ``pd.read_csv()`` in which certain invalid file objects caused the Python interpreter to crash (:issue:`15337`) - Bug in ``pd.read_csv()`` in which invalid values for ``nrows`` and ``chunksize`` were allowed (:issue:`15767`) - Bug in ``pd.read_csv()`` for the Python engine in which unhelpful error messages were being raised when parsing errors occurred (:issue:`15910`) +- Bug in ``pd.read_csv()`` in which the ``skipfooter`` parameter was not being properly validated (:issue:`15925`) - Bug in ``pd.tools.hashing.hash_pandas_object()`` in which hashing of categoricals depended on the ordering of categories, instead of just their values. (:issue:`15143`) - Bug in ``.to_json()`` where ``lines=True`` and contents (keys or values) contain escaped characters (:issue:`15096`) - Bug in ``.to_json()`` causing single byte ascii characters to be expanded to four byte unicode (:issue:`15344`) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 10f8c53987471..a968a2b9623d9 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -1036,6 +1036,37 @@ def _evaluate_usecols(usecols, names): return usecols +def _validate_skipfooter_arg(skipfooter): + """ + Validate the 'skipfooter' parameter. + + Checks whether 'skipfooter' is a non-negative integer. + Raises a ValueError if that is not the case. + + Parameters + ---------- + skipfooter : non-negative integer + The number of rows to skip at the end of the file. + + Returns + ------- + validated_skipfooter : non-negative integer + The original input if the validation succeeds. + + Raises + ------ + ValueError : 'skipfooter' was not a non-negative integer. + """ + + if not is_integer(skipfooter): + raise ValueError("skipfooter must be an integer") + + if skipfooter < 0: + raise ValueError("skipfooter cannot be negative") + + return skipfooter + + def _validate_usecols_arg(usecols): """ Validate the 'usecols' parameter. @@ -1880,7 +1911,7 @@ def __init__(self, f, **kwds): else: self.skipfunc = lambda x: x in self.skiprows - self.skipfooter = kwds['skipfooter'] + self.skipfooter = _validate_skipfooter_arg(kwds['skipfooter']) self.delimiter = kwds['delimiter'] self.quotechar = kwds['quotechar'] @@ -2684,9 +2715,6 @@ def _get_index_name(self, columns): return index_name, orig_names, columns def _rows_to_cols(self, content): - if self.skipfooter < 0: - raise ValueError('skip footer cannot be negative') - col_len = self.num_original_columns if self._implicit_index: diff --git a/pandas/tests/io/parser/common.py b/pandas/tests/io/parser/common.py index ee0f00506cef3..ab30301e710a6 100644 --- a/pandas/tests/io/parser/common.py +++ b/pandas/tests/io/parser/common.py @@ -546,7 +546,7 @@ def test_iterator(self): if self.engine == 'python': # test bad parameter (skipfooter) reader = self.read_csv(StringIO(self.data1), index_col=0, - iterator=True, skipfooter=True) + iterator=True, skipfooter=1) self.assertRaises(ValueError, reader.read, 3) def test_pass_names_with_index(self): diff --git a/pandas/tests/io/parser/python_parser_only.py b/pandas/tests/io/parser/python_parser_only.py index 9a1eb94270e28..510e3c689649c 100644 --- a/pandas/tests/io/parser/python_parser_only.py +++ b/pandas/tests/io/parser/python_parser_only.py @@ -20,20 +20,22 @@ class PythonParserTests(object): - def test_negative_skipfooter_raises(self): - text = """#foo,a,b,c -#foo,a,b,c -#foo,a,b,c -#foo,a,b,c -#foo,a,b,c -#foo,a,b,c -1/1/2000,1.,2.,3. -1/2/2000,4,5,6 -1/3/2000,7,8,9 -""" + def test_invalid_skipfooter(self): + text = "a\n1\n2" + + # see gh-15925 (comment) + msg = "skipfooter must be an integer" + with tm.assertRaisesRegexp(ValueError, msg): + self.read_csv(StringIO(text), skipfooter="foo") + + with tm.assertRaisesRegexp(ValueError, msg): + self.read_csv(StringIO(text), skipfooter=1.5) + + with tm.assertRaisesRegexp(ValueError, msg): + self.read_csv(StringIO(text), skipfooter=True) - with tm.assertRaisesRegexp( - ValueError, 'skip footer cannot be negative'): + msg = "skipfooter cannot be negative" + with tm.assertRaisesRegexp(ValueError, msg): self.read_csv(StringIO(text), skipfooter=-1) def test_sniff_delimiter(self): diff --git a/pandas/tests/io/parser/test_unsupported.py b/pandas/tests/io/parser/test_unsupported.py index 14146a3ad1e9a..9637b449de6da 100644 --- a/pandas/tests/io/parser/test_unsupported.py +++ b/pandas/tests/io/parser/test_unsupported.py @@ -112,8 +112,8 @@ def test_deprecated_args(self): 'as_recarray': True, 'buffer_lines': True, 'compact_ints': True, - 'skip_footer': True, 'use_unsigned': True, + 'skip_footer': 1, } engines = 'c', 'python' From 751119f3bf9da9ce4291ebe7d67d5ebacb16159d Mon Sep 17 00:00:00 2001 From: gfyoung Date: Sat, 8 Apr 2017 09:25:24 -0400 Subject: [PATCH 359/933] MAINT: Refactor Python engine empty line funcs (#15946) The Python engine's _empty and _check_empty methods were uninformative and undocumented. This commit renames them to _is_line_empty and _remove_empty_lines respectively and provides appropriate documentation. --- pandas/io/parsers.py | 44 ++++++++++++++++++++++++++++++++++++-------- 1 file changed, 36 insertions(+), 8 deletions(-) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index a968a2b9623d9..efbf6d64404c0 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -2472,7 +2472,19 @@ def _check_for_bom(self, first_row): # return an empty string. return [""] - def _empty(self, line): + def _is_line_empty(self, line): + """ + Check if a line is empty or not. + + Parameters + ---------- + line : str, array-like + The line of data to check. + + Returns + ------- + boolean : Whether or not the line is empty. + """ return not line or all(not x for x in line) def _next_line(self): @@ -2485,11 +2497,12 @@ def _next_line(self): line = self._check_comments([self.data[self.pos]])[0] self.pos += 1 # either uncommented or blank to begin with - if not self.skip_blank_lines and (self._empty(self.data[ - self.pos - 1]) or line): + if (not self.skip_blank_lines and + (self._is_line_empty( + self.data[self.pos - 1]) or line)): break elif self.skip_blank_lines: - ret = self._check_empty([line]) + ret = self._remove_empty_lines([line]) if ret: line = ret[0] break @@ -2508,12 +2521,12 @@ def _next_line(self): line = self._check_comments([orig_line])[0] if self.skip_blank_lines: - ret = self._check_empty([line]) + ret = self._remove_empty_lines([line]) if ret: line = ret[0] break - elif self._empty(orig_line) or line: + elif self._is_line_empty(orig_line) or line: break # This was the first line of the file, @@ -2604,7 +2617,22 @@ def _check_comments(self, lines): ret.append(rl) return ret - def _check_empty(self, lines): + def _remove_empty_lines(self, lines): + """ + Iterate through the lines and remove any that are + either empty or contain only one whitespace value + + Parameters + ---------- + lines : array-like + The array of lines that we are to filter. + + Returns + ------- + filtered_lines : array-like + The same array of lines with the "empty" ones removed. + """ + ret = [] for l in lines: # Remove empty lines and lines with only one whitespace value @@ -2844,7 +2872,7 @@ def _get_lines(self, rows=None): lines = self._check_comments(lines) if self.skip_blank_lines: - lines = self._check_empty(lines) + lines = self._remove_empty_lines(lines) lines = self._check_thousands(lines) return self._check_decimal(lines) From 84de51c04d5d3c5c7f1a28434c957af6fca943bc Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sat, 8 Apr 2017 10:32:09 -0400 Subject: [PATCH 360/933] COMPAT: 32bit compat on indexing for MI.remove_unused_levels (#15948) --- pandas/indexes/multi.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/indexes/multi.py b/pandas/indexes/multi.py index 3f84d8b292980..74c45aac8b620 100644 --- a/pandas/indexes/multi.py +++ b/pandas/indexes/multi.py @@ -1224,6 +1224,7 @@ def _sort_levels_monotonic(self): lev = lev.take(indexer) # indexer to reorder the labels + indexer = _ensure_int64(indexer) ri = lib.get_reverse_indexer(indexer, len(indexer)) lab = algos.take_1d(ri, lab) From 88f585155b6cae58b10f875c481871d510b8c591 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Sat, 8 Apr 2017 11:11:09 -0500 Subject: [PATCH 361/933] DOC: Use nbsphinx for notebook doc build (#15581) Adds a new doc-dependency nbsphinx for converting jupyter notebooks to ReST, which works better with the sphinx conversion process. Remvoes the hacky notebook -> HTML -> raw include we had before. --- ci/requirements-3.5_DOC.run | 1 + ci/requirements-3.5_DOC.sh | 2 +- ci/requirements_all.txt | 2 + doc/README.rst | 4 +- doc/make.py | 122 +++++------------- doc/source/conf.py | 14 +- doc/source/contributing.rst | 5 +- .../{html-styling.ipynb => style.ipynb} | 95 ++++++-------- doc/source/style.rst | 10 -- .../nature_with_gtoc/static/nature.css_t | 37 ++++-- 10 files changed, 118 insertions(+), 174 deletions(-) rename doc/source/{html-styling.ipynb => style.ipynb} (95%) delete mode 100644 doc/source/style.rst diff --git a/ci/requirements-3.5_DOC.run b/ci/requirements-3.5_DOC.run index 740d4714e96b4..7ed60758612bb 100644 --- a/ci/requirements-3.5_DOC.run +++ b/ci/requirements-3.5_DOC.run @@ -5,6 +5,7 @@ nbconvert nbformat notebook matplotlib +seaborn scipy lxml beautifulsoup4 diff --git a/ci/requirements-3.5_DOC.sh b/ci/requirements-3.5_DOC.sh index 1a5d4643edcf2..e43e483d77a73 100644 --- a/ci/requirements-3.5_DOC.sh +++ b/ci/requirements-3.5_DOC.sh @@ -6,6 +6,6 @@ echo "[install DOC_BUILD deps]" pip install pandas-gbq -conda install -n pandas -c conda-forge feather-format +conda install -n pandas -c conda-forge feather-format nbsphinx pandoc conda install -n pandas -c r r rpy2 --yes diff --git a/ci/requirements_all.txt b/ci/requirements_all.txt index 4ff80a478f247..e9f49ed879c86 100644 --- a/ci/requirements_all.txt +++ b/ci/requirements_all.txt @@ -3,6 +3,7 @@ pytest-cov pytest-xdist flake8 sphinx +nbsphinx ipython python-dateutil pytz @@ -19,6 +20,7 @@ scipy numexpr pytables matplotlib +seaborn lxml sqlalchemy bottleneck diff --git a/doc/README.rst b/doc/README.rst index a3733846d9ed1..0ea3234dec348 100644 --- a/doc/README.rst +++ b/doc/README.rst @@ -81,7 +81,9 @@ have ``sphinx`` and ``ipython`` installed. `numpydoc `_ is used to parse the docstrings that follow the Numpy Docstring Standard (see above), but you don't need to install this because a local copy of ``numpydoc`` is included in the pandas source -code. +code. `nbsphinx `_ is used to convert +Jupyter notebooks. You will need to install it if you intend to modify any of +the notebooks included in the documentation. Furthermore, it is recommended to have all `optional dependencies `_ diff --git a/doc/make.py b/doc/make.py index 30cd2ad8b61c9..e70655c3e2f92 100755 --- a/doc/make.py +++ b/doc/make.py @@ -106,106 +106,42 @@ def clean(): @contextmanager -def cleanup_nb(nb): - try: - yield - finally: - try: - os.remove(nb + '.executed') - except OSError: - pass - - -def get_kernel(): - """Find the kernel name for your python version""" - return 'python%s' % sys.version_info.major - - -def execute_nb(src, dst, allow_errors=False, timeout=1000, kernel_name=''): - """ - Execute notebook in `src` and write the output to `dst` - - Parameters - ---------- - src, dst: str - path to notebook - allow_errors: bool - timeout: int - kernel_name: str - defualts to value set in notebook metadata - - Returns - ------- - dst: str - """ - import nbformat - from nbconvert.preprocessors import ExecutePreprocessor - - with io.open(src, encoding='utf-8') as f: - nb = nbformat.read(f, as_version=4) - - ep = ExecutePreprocessor(allow_errors=allow_errors, - timeout=timeout, - kernel_name=kernel_name) - ep.preprocess(nb, resources={}) - - with io.open(dst, 'wt', encoding='utf-8') as f: - nbformat.write(nb, f) - return dst - - -def convert_nb(src, dst, to='html', template_file='basic'): +def maybe_exclude_notebooks(): """ - Convert a notebook `src`. - - Parameters - ---------- - src, dst: str - filepaths - to: {'rst', 'html'} - format to export to - template_file: str - name of template file to use. Default 'basic' + Skip building the notebooks if pandoc is not installed. + This assumes that nbsphinx is installed. """ - from nbconvert import HTMLExporter, RSTExporter - - dispatch = {'rst': RSTExporter, 'html': HTMLExporter} - exporter = dispatch[to.lower()](template_file=template_file) - - (body, resources) = exporter.from_filename(src) - with io.open(dst, 'wt', encoding='utf-8') as f: - f.write(body) - return dst + base = os.path.dirname(__file__) + notebooks = [os.path.join(base, 'source', nb) + for nb in ['style.ipynb']] + contents = {} + try: + import nbconvert + nbconvert.utils.pandoc.get_pandoc_version() + except (ImportError, nbconvert.utils.pandoc.PandocMissing): + print("Warning: Pandoc is not installed. Skipping Notebooks.") + for nb in notebooks: + with open(nb, 'rt') as f: + contents[nb] = f.read() + os.remove(nb) + yield + for nb, content in contents.items(): + with open(nb, 'wt') as f: + f.write(content) def html(): check_build() - notebooks = [ - 'source/html-styling.ipynb', - ] - - for nb in notebooks: - with cleanup_nb(nb): - try: - print("Converting %s" % nb) - kernel_name = get_kernel() - executed = execute_nb(nb, nb + '.executed', allow_errors=True, - kernel_name=kernel_name) - convert_nb(executed, nb.rstrip('.ipynb') + '.html') - except (ImportError, IndexError) as e: - print(e) - print("Failed to convert %s" % nb) - - if os.system('sphinx-build -P -b html -d build/doctrees ' - 'source build/html'): - raise SystemExit("Building HTML failed.") - try: - # remove stale file - os.remove('source/html-styling.html') - os.remove('build/html/pandas.zip') - except: - pass + with maybe_exclude_notebooks(): + if os.system('sphinx-build -P -b html -d build/doctrees ' + 'source build/html'): + raise SystemExit("Building HTML failed.") + try: + # remove stale file + os.remove('build/html/pandas.zip') + except: + pass def zip_html(): diff --git a/doc/source/conf.py b/doc/source/conf.py index 0b0de16411e9b..a2a6dca57c34c 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -52,14 +52,16 @@ 'numpydoc', # used to parse numpy-style docstrings for autodoc 'ipython_sphinxext.ipython_directive', 'ipython_sphinxext.ipython_console_highlighting', + 'IPython.sphinxext.ipython_console_highlighting', # lowercase didn't work 'sphinx.ext.intersphinx', 'sphinx.ext.coverage', 'sphinx.ext.mathjax', 'sphinx.ext.ifconfig', 'sphinx.ext.linkcode', + 'nbsphinx', ] - +exclude_patterns = ['**.ipynb_checkpoints'] with open("index.rst") as f: index_rst_lines = f.readlines() @@ -70,15 +72,16 @@ # JP: added from sphinxdocs autosummary_generate = False -if any([re.match("\s*api\s*",l) for l in index_rst_lines]): +if any([re.match("\s*api\s*", l) for l in index_rst_lines]): autosummary_generate = True files_to_delete = [] for f in os.listdir(os.path.dirname(__file__)): - if not f.endswith('.rst') or f.startswith('.') or os.path.basename(f) == 'index.rst': + if (not f.endswith(('.ipynb', '.rst')) or + f.startswith('.') or os.path.basename(f) == 'index.rst'): continue - _file_basename = f.split('.rst')[0] + _file_basename = os.path.splitext(f)[0] _regex_to_match = "\s*{}\s*$".format(_file_basename) if not any([re.match(_regex_to_match, line) for line in index_rst_lines]): files_to_delete.append(f) @@ -261,6 +264,9 @@ # Output file base name for HTML help builder. htmlhelp_basename = 'pandas' +# -- Options for nbsphinx ------------------------------------------------ + +nbsphinx_allow_errors = True # -- Options for LaTeX output -------------------------------------------- diff --git a/doc/source/contributing.rst b/doc/source/contributing.rst index 8af7de688a2ae..aac1e4eade932 100644 --- a/doc/source/contributing.rst +++ b/doc/source/contributing.rst @@ -347,15 +347,14 @@ have ``sphinx`` and ``ipython`` installed. `numpydoc `_ is used to parse the docstrings that follow the Numpy Docstring Standard (see above), but you don't need to install this because a local copy of numpydoc is included in the *pandas* source -code. -`nbconvert `_ and -`nbformat `_ are required to build +code. `nbsphinx `_ is required to build the Jupyter notebooks included in the documentation. If you have a conda environment named ``pandas_dev``, you can install the extra requirements with:: conda install -n pandas_dev sphinx ipython nbconvert nbformat + conda install -n pandas_dev -c conda-forge nbsphinx Furthermore, it is recommended to have all :ref:`optional dependencies `. installed. This is not strictly necessary, but be aware that you will see some error diff --git a/doc/source/html-styling.ipynb b/doc/source/style.ipynb similarity index 95% rename from doc/source/html-styling.ipynb rename to doc/source/style.ipynb index 1a97378fd30b1..7e408f96f6c28 100644 --- a/doc/source/html-styling.ipynb +++ b/doc/source/style.ipynb @@ -4,9 +4,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ + "# HTML Styling\n", + "\n", "*New in version 0.17.1*\n", "\n", - "

*Provisional: This is a new feature and still under development. We'll be adding features and possibly making breaking changes in future releases. We'd love to hear your [feedback](https://github.com/pandas-dev/pandas/issues).*

\n", + "

*Provisional: This is a new feature and still under development. We'll be adding features and possibly making breaking changes in future releases. We'd love to hear your feedback.*

\n", "\n", "This document is written as a Jupyter Notebook, and can be viewed or downloaded [here](http://nbviewer.ipython.org/github/pandas-dev/pandas/blob/master/doc/source/html-styling.ipynb).\n", "\n", @@ -17,25 +19,14 @@ "\n", "The styling is accomplished using CSS.\n", "You write \"style functions\" that take scalars, `DataFrame`s or `Series`, and return *like-indexed* DataFrames or Series with CSS `\"attribute: value\"` pairs for the values.\n", - "These functions can be incrementally passed to the `Styler` which collects the styles before rendering.\n", - "\n", - "### Contents\n", - "\n", - "- [Building Styles](#Building-Styles)\n", - "- [Finer Control: Slicing](#Finer-Control:-Slicing)\n", - "- [Builtin Styles](#Builtin-Styles)\n", - "- [Other options](#Other-options)\n", - "- [Sharing Styles](#Sharing-Styles)\n", - "- [Limitations](#Limitations)\n", - "- [Terms](#Terms)\n", - "- [Extensibility](#Extensibility)" + "These functions can be incrementally passed to the `Styler` which collects the styles before rendering." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "# Building Styles\n", + "## Building Styles\n", "\n", "Pass your style functions into one of the following methods:\n", "\n", @@ -58,7 +49,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false + "collapsed": true }, "outputs": [], "source": [ @@ -83,7 +74,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false + "collapsed": true }, "outputs": [], "source": [ @@ -103,7 +94,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false + "collapsed": true }, "outputs": [], "source": [ @@ -156,7 +147,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false + "collapsed": true }, "outputs": [], "source": [ @@ -204,7 +195,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false + "collapsed": true }, "outputs": [], "source": [ @@ -230,7 +221,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false + "collapsed": true }, "outputs": [], "source": [ @@ -286,7 +277,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false + "collapsed": true }, "outputs": [], "source": [ @@ -336,7 +327,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false + "collapsed": true }, "outputs": [], "source": [ @@ -354,7 +345,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false + "collapsed": true }, "outputs": [], "source": [ @@ -389,7 +380,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false + "collapsed": true }, "outputs": [], "source": [ @@ -407,7 +398,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false + "collapsed": true }, "outputs": [], "source": [ @@ -425,7 +416,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false + "collapsed": true }, "outputs": [], "source": [ @@ -450,7 +441,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false + "collapsed": true }, "outputs": [], "source": [ @@ -468,7 +459,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false + "collapsed": true }, "outputs": [], "source": [ @@ -491,7 +482,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false + "collapsed": true }, "outputs": [], "source": [ @@ -503,7 +494,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false + "collapsed": true }, "outputs": [], "source": [ @@ -525,7 +516,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false + "collapsed": true }, "outputs": [], "source": [ @@ -543,7 +534,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false + "collapsed": true }, "outputs": [], "source": [ @@ -554,7 +545,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false + "collapsed": true }, "outputs": [], "source": [ @@ -572,7 +563,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false + "collapsed": true }, "outputs": [], "source": [ @@ -599,7 +590,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false + "collapsed": true }, "outputs": [], "source": [ @@ -612,7 +603,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false + "collapsed": true }, "outputs": [], "source": [ @@ -653,7 +644,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Precision" + "### Precision" ] }, { @@ -667,7 +658,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false + "collapsed": true }, "outputs": [], "source": [ @@ -689,7 +680,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false + "collapsed": true }, "outputs": [], "source": [ @@ -724,7 +715,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false + "collapsed": true }, "outputs": [], "source": [ @@ -752,7 +743,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false + "collapsed": true }, "outputs": [], "source": [ @@ -792,7 +783,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# CSS Classes\n", + "### CSS Classes\n", "\n", "Certain CSS classes are attached to cells.\n", "\n", @@ -813,7 +804,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Limitations\n", + "### Limitations\n", "\n", "- DataFrame only `(use Series.to_frame().style)`\n", "- The index and columns must be unique\n", @@ -828,7 +819,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Terms\n", + "### Terms\n", "\n", "- Style function: a function that's passed into `Styler.apply` or `Styler.applymap` and returns values like `'css attribute: value'`\n", "- Builtin style functions: style functions that are methods on `Styler`\n", @@ -850,7 +841,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false + "collapsed": true }, "outputs": [], "source": [ @@ -867,7 +858,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false + "collapsed": true }, "outputs": [], "source": [ @@ -888,7 +879,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false + "collapsed": true }, "outputs": [], "source": [ @@ -907,7 +898,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Extensibility\n", + "## Extensibility\n", "\n", "The core of pandas is, and will remain, its \"high-performance, easy-to-use data structures\".\n", "With that in mind, we hope that `DataFrame.style` accomplishes two goals\n", @@ -917,7 +908,7 @@ "\n", "If you build a great library on top of this, let us know and we'll [link](http://pandas.pydata.org/pandas-docs/stable/ecosystem.html) to it.\n", "\n", - "## Subclassing\n", + "### Subclassing\n", "\n", "This section contains a bit of information about the implementation of `Styler`.\n", "Since the feature is so new all of this is subject to change, even more so than the end-use API.\n", @@ -933,7 +924,7 @@ "The `.translate` method takes `self.ctx` and builds another dictionary ready to be passed into `Styler.template.render`, the Jinja template.\n", "\n", "\n", - "## Alternate templates\n", + "### Alternate templates\n", "\n", "We've used [Jinja](http://jinja.pocoo.org/) templates to build up the HTML.\n", "The template is stored as a class variable ``Styler.template.``. Subclasses can override that.\n", @@ -961,9 +952,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.5.1" + "version": "3.6.1" } }, "nbformat": 4, - "nbformat_minor": 0 + "nbformat_minor": 1 } diff --git a/doc/source/style.rst b/doc/source/style.rst deleted file mode 100644 index 506b38bf06e65..0000000000000 --- a/doc/source/style.rst +++ /dev/null @@ -1,10 +0,0 @@ -.. _style: - -.. currentmodule:: pandas - -***** -Style -***** - -.. raw:: html - :file: html-styling.html diff --git a/doc/source/themes/nature_with_gtoc/static/nature.css_t b/doc/source/themes/nature_with_gtoc/static/nature.css_t index 2948f0d68b402..2958678dc8221 100644 --- a/doc/source/themes/nature_with_gtoc/static/nature.css_t +++ b/doc/source/themes/nature_with_gtoc/static/nature.css_t @@ -299,18 +299,35 @@ td.field-body blockquote { padding-left: 30px; } -.rendered_html table { +// Adapted from the new Jupyter notebook style +// https://github.com/jupyter/notebook/blob/c8841b68c4c0739bbee1291e0214771f24194079/notebook/static/notebook/less/renderedhtml.less#L59 +table { margin-left: auto; margin-right: auto; - border-right: 1px solid #cbcbcb; - border-bottom: 1px solid #cbcbcb; -} - -.rendered_html td, th { - border-left: 1px solid #cbcbcb; - border-top: 1px solid #cbcbcb; - margin: 0; - padding: 0.5em .75em; + border: none; + border-collapse: collapse; + border-spacing: 0; + color: @rendered_html_border_color; + table-layout: fixed; +} +thead { + border-bottom: 1px solid @rendered_html_border_color; + vertical-align: bottom; +} +tr, th, td { + text-align: right; + vertical-align: middle; + padding: 0.5em 0.5em; + line-height: normal; + white-space: normal; + max-width: none; + border: none; +} +th { + font-weight: bold; +} +tbody tr:nth-child(odd) { + background: #f5f5f5; } /** From 0aef74fdf1e943f058aa185f47e322aafeeee002 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Sat, 8 Apr 2017 15:58:17 -0500 Subject: [PATCH 362/933] DOC: Cleanup for nbsphinx output (#15951) Followup to https://github.com/pandas-dev/pandas/pull/15581 Using the `nbsphinx: hidden` metadata to hide the ouptut, so readers don't see matplotlib's fc-list warning. Make the tables monospaced in CSS. --- doc/source/style.ipynb | 14 ++++++++++++++ .../themes/nature_with_gtoc/static/nature.css_t | 6 ++++++ 2 files changed, 20 insertions(+) diff --git a/doc/source/style.ipynb b/doc/source/style.ipynb index 7e408f96f6c28..38b39bad8b415 100644 --- a/doc/source/style.ipynb +++ b/doc/source/style.ipynb @@ -45,6 +45,20 @@ "Let's see some examples." ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true, + "nbsphinx": "hidden" + }, + "outputs": [], + "source": [ + "import matplotlib\n", + "# We have this here to trigger matplotlib's font cache stuff.\n", + "# This cell is hidden from the output" + ] + }, { "cell_type": "code", "execution_count": null, diff --git a/doc/source/themes/nature_with_gtoc/static/nature.css_t b/doc/source/themes/nature_with_gtoc/static/nature.css_t index 2958678dc8221..1adaaf58d79c5 100644 --- a/doc/source/themes/nature_with_gtoc/static/nature.css_t +++ b/doc/source/themes/nature_with_gtoc/static/nature.css_t @@ -330,6 +330,12 @@ tbody tr:nth-child(odd) { background: #f5f5f5; } +table td.data, table th.row_heading table th.col_heading { + font-family: monospace; + text-align: right; +} + + /** * See also */ From 860d555a9d27958b151412527034fddffb446b31 Mon Sep 17 00:00:00 2001 From: Mikolaj Chwalisz Date: Sat, 8 Apr 2017 17:37:59 -0400 Subject: [PATCH 363/933] DOC: timeseries.rst floating point precision (#15817) closes #15817 Author: Mikolaj Chwalisz Closes #15919 from mchwalisz/timeseries-precision and squashes the following commits: 7b82e8b [Mikolaj Chwalisz] DOC: timeseries.rst floating point precision (#15817) --- doc/source/timeseries.rst | 28 ++++++++++++++++++++++------ pandas/tseries/tools.py | 10 ++++++++++ 2 files changed, 32 insertions(+), 6 deletions(-) diff --git a/doc/source/timeseries.rst b/doc/source/timeseries.rst index 44c200e13b877..45fe271e9de9d 100644 --- a/doc/source/timeseries.rst +++ b/doc/source/timeseries.rst @@ -265,17 +265,23 @@ Typical epoch stored units pd.to_datetime([1349720105100, 1349720105200, 1349720105300, 1349720105400, 1349720105500 ], unit='ms') -These *work*, but the results may be unexpected. +.. note:: -.. ipython:: python + Epoch times will be rounded to the nearest nanosecond. - pd.to_datetime([1]) +.. warning:: - pd.to_datetime([1, 3.14], unit='s') + Conversion of float epoch times can lead to inaccurate and unexpected results. + :ref:`Python floats ` have about 15 digits precision in + decimal. Rounding during conversion from float to high precision ``Timestamp`` is + unavoidable. The only way to achieve exact precision is to use a fixed-width + types (e.g. an int64). -.. note:: + .. ipython:: python - Epoch times will be rounded to the nearest nanosecond. + 1490195805.433502912 + pd.to_datetime([1490195805.433, 1490195805.433502912], unit='s') + pd.to_datetime(1490195805433502912, unit='ns') .. _timeseries.origin: @@ -300,6 +306,16 @@ Commonly called 'unix epoch' or POSIX time. pd.to_datetime([1, 2, 3], unit='D') +.. note:: + + Without specifying origin the following examples still evaluate, but the results + may be unexpected. + + .. ipython:: python + + pd.to_datetime([1]) + pd.to_datetime([1, 3.14], unit='s') + .. _timeseries.daterange: Generating Ranges of Timestamps diff --git a/pandas/tseries/tools.py b/pandas/tseries/tools.py index d0f1671f9e309..9d5821d859187 100644 --- a/pandas/tseries/tools.py +++ b/pandas/tseries/tools.py @@ -315,6 +315,16 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False, >>> %timeit pd.to_datetime(s,infer_datetime_format=False) 1 loop, best of 3: 471 ms per loop + Using a unix epoch time + + >>> pd.to_datetime(1490195805, unit='s') + Timestamp('2017-03-22 15:16:45') + >>> pd.to_datetime(1490195805433502912, unit='ns') + Timestamp('2017-03-22 15:16:45.433502912') + + .. warning:: For float arg, precision rounding might happen. To prevent + unexpected behavior use a fixed-width exact type. + Using a non-unix epoch origin >>> pd.to_datetime([1, 2, 3], unit='D', From fddd722a5ddd0c4568805e67dccf97ae8f8b1d2e Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sat, 8 Apr 2017 17:52:13 -0400 Subject: [PATCH 364/933] DOC: add inverse epoch conversion --- doc/source/timeseries.rst | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/doc/source/timeseries.rst b/doc/source/timeseries.rst index 45fe271e9de9d..54e574adc05d4 100644 --- a/doc/source/timeseries.rst +++ b/doc/source/timeseries.rst @@ -247,6 +247,8 @@ Return NaT for input when unparseable Out[6]: DatetimeIndex(['2009-07-31', 'NaT'], dtype='datetime64[ns]', freq=None) +.. _timeseries.converting.epoch: + Epoch Timestamps ~~~~~~~~~~~~~~~~ @@ -279,10 +281,27 @@ Typical epoch stored units .. ipython:: python - 1490195805.433502912 pd.to_datetime([1490195805.433, 1490195805.433502912], unit='s') pd.to_datetime(1490195805433502912, unit='ns') +.. _timeseries.converting.epoch_inverse: + +From Timestamps to Epoch +~~~~~~~~~~~~~~~~~~~~~~~~ + +To invert the operation from above, namely, to convert from a ``Timestamp`` to a 'unix' epoch: + +.. ipython:: python + + stamps = pd.date_range('2012-10-08 18:15:05', periods=4, freq='D') + stamps + +We convert the ``DatetimeIndex`` to an ``int64`` array, then divide by the conversion unit. + +.. ipython:: python + + stamps.view('int64') // pd.Timedelta(1, unit='s') + .. _timeseries.origin: Using the Origin Parameter @@ -306,16 +325,6 @@ Commonly called 'unix epoch' or POSIX time. pd.to_datetime([1, 2, 3], unit='D') -.. note:: - - Without specifying origin the following examples still evaluate, but the results - may be unexpected. - - .. ipython:: python - - pd.to_datetime([1]) - pd.to_datetime([1, 3.14], unit='s') - .. _timeseries.daterange: Generating Ranges of Timestamps From 4e383968f7164f7e372f76afd2193ace57cd24c3 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Sat, 8 Apr 2017 17:58:31 -0400 Subject: [PATCH 365/933] TST: Add decorators for redirecting stdout/err (#15952) --- pandas/tests/frame/test_repr_info.py | 4 +- pandas/tests/io/parser/common.py | 81 +++++++-------- pandas/tests/io/parser/python_parser_only.py | 14 +-- pandas/tests/io/parser/test_textreader.py | 23 ++--- pandas/tests/io/test_sql.py | 43 +++----- pandas/tests/plotting/test_frame.py | 21 ++-- pandas/tests/series/test_repr.py | 21 ++-- pandas/util/decorators.py | 14 +-- pandas/util/testing.py | 101 ++++++++++++++++++- 9 files changed, 184 insertions(+), 138 deletions(-) diff --git a/pandas/tests/frame/test_repr_info.py b/pandas/tests/frame/test_repr_info.py index 024e11e63a924..918938c1758ed 100644 --- a/pandas/tests/frame/test_repr_info.py +++ b/pandas/tests/frame/test_repr_info.py @@ -191,6 +191,7 @@ def test_latex_repr(self): # GH 12182 self.assertIsNone(df._repr_latex_()) + @tm.capture_stdout def test_info(self): io = StringIO() self.frame.info(buf=io) @@ -198,11 +199,8 @@ def test_info(self): frame = DataFrame(np.random.randn(5, 3)) - import sys - sys.stdout = StringIO() frame.info() frame.info(verbose=False) - sys.stdout = sys.__stdout__ def test_info_wide(self): from pandas import set_option, reset_option diff --git a/pandas/tests/io/parser/common.py b/pandas/tests/io/parser/common.py index ab30301e710a6..6eadf2c61c974 100644 --- a/pandas/tests/io/parser/common.py +++ b/pandas/tests/io/parser/common.py @@ -1254,6 +1254,7 @@ def test_regex_separator(self): columns=['a', 'b', 'c']) tm.assert_frame_equal(result, expected) + @tm.capture_stdout def test_verbose_import(self): text = """a,b,c,d one,1,2,3 @@ -1265,22 +1266,18 @@ def test_verbose_import(self): one,1,2,3 two,1,2,3""" - buf = StringIO() - sys.stdout = buf + # Engines are verbose in different ways. + self.read_csv(StringIO(text), verbose=True) + output = sys.stdout.getvalue() - try: # engines are verbose in different ways - self.read_csv(StringIO(text), verbose=True) - if self.engine == 'c': - self.assertIn('Tokenization took:', buf.getvalue()) - self.assertIn('Parser memory cleanup took:', buf.getvalue()) - else: # Python engine - self.assertEqual(buf.getvalue(), - 'Filled 3 NA values in column a\n') - finally: - sys.stdout = sys.__stdout__ + if self.engine == 'c': + assert 'Tokenization took:' in output + assert 'Parser memory cleanup took:' in output + else: # Python engine + assert output == 'Filled 3 NA values in column a\n' - buf = StringIO() - sys.stdout = buf + # Reset the stdout buffer. + sys.stdout = StringIO() text = """a,b,c,d one,1,2,3 @@ -1292,16 +1289,15 @@ def test_verbose_import(self): seven,1,2,3 eight,1,2,3""" - try: # engines are verbose in different ways - self.read_csv(StringIO(text), verbose=True, index_col=0) - if self.engine == 'c': - self.assertIn('Tokenization took:', buf.getvalue()) - self.assertIn('Parser memory cleanup took:', buf.getvalue()) - else: # Python engine - self.assertEqual(buf.getvalue(), - 'Filled 1 NA values in column a\n') - finally: - sys.stdout = sys.__stdout__ + self.read_csv(StringIO(text), verbose=True, index_col=0) + output = sys.stdout.getvalue() + + # Engines are verbose in different ways. + if self.engine == 'c': + assert 'Tokenization took:' in output + assert 'Parser memory cleanup took:' in output + else: # Python engine + assert output == 'Filled 1 NA values in column a\n' def test_iteration_open_handle(self): if PY3: @@ -1696,6 +1692,7 @@ class InvalidBuffer(object): with tm.assertRaisesRegexp(ValueError, msg): self.read_csv(mock.Mock()) + @tm.capture_stderr def test_skip_bad_lines(self): # see gh-15925 data = 'a\n1\n1,2,3\n4\n5,6,7' @@ -1706,30 +1703,24 @@ def test_skip_bad_lines(self): with tm.assertRaises(ParserError): self.read_csv(StringIO(data), error_bad_lines=True) - stderr = sys.stderr expected = DataFrame({'a': [1, 4]}) - sys.stderr = StringIO() - try: - out = self.read_csv(StringIO(data), - error_bad_lines=False, - warn_bad_lines=False) - tm.assert_frame_equal(out, expected) + out = self.read_csv(StringIO(data), + error_bad_lines=False, + warn_bad_lines=False) + tm.assert_frame_equal(out, expected) - val = sys.stderr.getvalue() - self.assertEqual(val, '') - finally: - sys.stderr = stderr + val = sys.stderr.getvalue() + assert val == '' + # Reset the stderr buffer. sys.stderr = StringIO() - try: - out = self.read_csv(StringIO(data), - error_bad_lines=False, - warn_bad_lines=True) - tm.assert_frame_equal(out, expected) - val = sys.stderr.getvalue() - self.assertTrue('Skipping line 3' in val) - self.assertTrue('Skipping line 5' in val) - finally: - sys.stderr = stderr + out = self.read_csv(StringIO(data), + error_bad_lines=False, + warn_bad_lines=True) + tm.assert_frame_equal(out, expected) + + val = sys.stderr.getvalue() + assert 'Skipping line 3' in val + assert 'Skipping line 5' in val diff --git a/pandas/tests/io/parser/python_parser_only.py b/pandas/tests/io/parser/python_parser_only.py index 510e3c689649c..2949254257d15 100644 --- a/pandas/tests/io/parser/python_parser_only.py +++ b/pandas/tests/io/parser/python_parser_only.py @@ -8,7 +8,6 @@ """ import csv -import sys import pytest import pandas.util.testing as tm @@ -92,16 +91,9 @@ def test_BytesIO_input(self): def test_single_line(self): # see gh-6607: sniff separator - - buf = StringIO() - sys.stdout = buf - - try: - df = self.read_csv(StringIO('1,2'), names=['a', 'b'], - header=None, sep=None) - tm.assert_frame_equal(DataFrame({'a': [1], 'b': [2]}), df) - finally: - sys.stdout = sys.__stdout__ + df = self.read_csv(StringIO('1,2'), names=['a', 'b'], + header=None, sep=None) + tm.assert_frame_equal(DataFrame({'a': [1], 'b': [2]}), df) def test_skipfooter(self): # see gh-6607 diff --git a/pandas/tests/io/parser/test_textreader.py b/pandas/tests/io/parser/test_textreader.py index b6a9900b0b087..505dc16942f31 100644 --- a/pandas/tests/io/parser/test_textreader.py +++ b/pandas/tests/io/parser/test_textreader.py @@ -142,6 +142,7 @@ def test_integer_thousands_alt(self): expected = DataFrame([123456, 12500]) tm.assert_frame_equal(result, expected) + @tm.capture_stderr def test_skip_bad_lines(self): # too many lines, see #2430 for why data = ('a:b:c\n' @@ -165,19 +166,15 @@ def test_skip_bad_lines(self): 2: ['c', 'f', 'i', 'n']} assert_array_dicts_equal(result, expected) - stderr = sys.stderr - sys.stderr = StringIO() - try: - reader = TextReader(StringIO(data), delimiter=':', - header=None, - error_bad_lines=False, - warn_bad_lines=True) - reader.read() - val = sys.stderr.getvalue() - self.assertTrue('Skipping line 4' in val) - self.assertTrue('Skipping line 6' in val) - finally: - sys.stderr = stderr + reader = TextReader(StringIO(data), delimiter=':', + header=None, + error_bad_lines=False, + warn_bad_lines=True) + reader.read() + val = sys.stderr.getvalue() + + assert 'Skipping line 4' in val + assert 'Skipping line 6' in val def test_header_not_enough_lines(self): data = ('skip this\n' diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 890f52e8c65e9..5318e8532c58e 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -23,7 +23,6 @@ import sqlite3 import csv import os -import sys import warnings import numpy as np @@ -36,7 +35,7 @@ from pandas import DataFrame, Series, Index, MultiIndex, isnull, concat from pandas import date_range, to_datetime, to_timedelta, Timestamp import pandas.compat as compat -from pandas.compat import StringIO, range, lrange, string_types, PY36 +from pandas.compat import range, lrange, string_types, PY36 from pandas.tseries.tools import format as date_format import pandas.io.sql as sql @@ -2220,6 +2219,7 @@ def test_schema(self): cur = self.conn.cursor() cur.execute(create_sql) + @tm.capture_stdout def test_execute_fail(self): create_sql = """ CREATE TABLE test @@ -2236,14 +2236,10 @@ def test_execute_fail(self): sql.execute('INSERT INTO test VALUES("foo", "bar", 1.234)', self.conn) sql.execute('INSERT INTO test VALUES("foo", "baz", 2.567)', self.conn) - try: - sys.stdout = StringIO() - self.assertRaises(Exception, sql.execute, - 'INSERT INTO test VALUES("foo", "bar", 7)', - self.conn) - finally: - sys.stdout = sys.__stdout__ + with pytest.raises(Exception): + sql.execute('INSERT INTO test VALUES("foo", "bar", 7)', self.conn) + @tm.capture_stdout def test_execute_closed_connection(self): create_sql = """ CREATE TABLE test @@ -2259,12 +2255,9 @@ def test_execute_closed_connection(self): sql.execute('INSERT INTO test VALUES("foo", "bar", 1.234)', self.conn) self.conn.close() - try: - sys.stdout = StringIO() - self.assertRaises(Exception, tquery, "select * from test", - con=self.conn) - finally: - sys.stdout = sys.__stdout__ + + with pytest.raises(Exception): + tquery("select * from test", con=self.conn) # Initialize connection again (needed for tearDown) self.setUp() @@ -2534,6 +2527,7 @@ def test_schema(self): cur.execute(drop_sql) cur.execute(create_sql) + @tm.capture_stdout def test_execute_fail(self): _skip_if_no_pymysql() drop_sql = "DROP TABLE IF EXISTS test" @@ -2553,14 +2547,10 @@ def test_execute_fail(self): sql.execute('INSERT INTO test VALUES("foo", "bar", 1.234)', self.conn) sql.execute('INSERT INTO test VALUES("foo", "baz", 2.567)', self.conn) - try: - sys.stdout = StringIO() - self.assertRaises(Exception, sql.execute, - 'INSERT INTO test VALUES("foo", "bar", 7)', - self.conn) - finally: - sys.stdout = sys.__stdout__ + with pytest.raises(Exception): + sql.execute('INSERT INTO test VALUES("foo", "bar", 7)', self.conn) + @tm.capture_stdout def test_execute_closed_connection(self): _skip_if_no_pymysql() drop_sql = "DROP TABLE IF EXISTS test" @@ -2579,12 +2569,9 @@ def test_execute_closed_connection(self): sql.execute('INSERT INTO test VALUES("foo", "bar", 1.234)', self.conn) self.conn.close() - try: - sys.stdout = StringIO() - self.assertRaises(Exception, tquery, "select * from test", - con=self.conn) - finally: - sys.stdout = sys.__stdout__ + + with pytest.raises(Exception): + tquery("select * from test", con=self.conn) # Initialize connection again (needed for tearDown) self.setUp() diff --git a/pandas/tests/plotting/test_frame.py b/pandas/tests/plotting/test_frame.py index 48af366f24ea4..1527637ea3eff 100644 --- a/pandas/tests/plotting/test_frame.py +++ b/pandas/tests/plotting/test_frame.py @@ -12,7 +12,7 @@ from pandas import (Series, DataFrame, MultiIndex, PeriodIndex, date_range, bdate_range) from pandas.types.api import is_list_like -from pandas.compat import (range, lrange, StringIO, lmap, lzip, u, zip, PY3) +from pandas.compat import range, lrange, lmap, lzip, u, zip, PY3 from pandas.formats.printing import pprint_thing import pandas.util.testing as tm from pandas.util.testing import slow @@ -1558,8 +1558,8 @@ def test_line_label_none(self): self.assertEqual(ax.get_legend().get_texts()[0].get_text(), 'None') @slow + @tm.capture_stdout def test_line_colors(self): - import sys from matplotlib import cm custom_colors = 'rgcby' @@ -1568,16 +1568,13 @@ def test_line_colors(self): ax = df.plot(color=custom_colors) self._check_colors(ax.get_lines(), linecolors=custom_colors) - tmp = sys.stderr - sys.stderr = StringIO() - try: - tm.close() - ax2 = df.plot(colors=custom_colors) - lines2 = ax2.get_lines() - for l1, l2 in zip(ax.get_lines(), lines2): - self.assertEqual(l1.get_color(), l2.get_color()) - finally: - sys.stderr = tmp + tm.close() + + ax2 = df.plot(colors=custom_colors) + lines2 = ax2.get_lines() + + for l1, l2 in zip(ax.get_lines(), lines2): + self.assertEqual(l1.get_color(), l2.get_color()) tm.close() diff --git a/pandas/tests/series/test_repr.py b/pandas/tests/series/test_repr.py index 99a406a71b12b..188b96638344c 100644 --- a/pandas/tests/series/test_repr.py +++ b/pandas/tests/series/test_repr.py @@ -3,13 +3,15 @@ from datetime import datetime, timedelta +import sys + import numpy as np import pandas as pd from pandas import (Index, Series, DataFrame, date_range) from pandas.core.index import MultiIndex -from pandas.compat import StringIO, lrange, range, u +from pandas.compat import lrange, range, u from pandas import compat import pandas.util.testing as tm @@ -112,20 +114,15 @@ def test_tidy_repr(self): a.name = 'title1' repr(a) # should not raise exception + @tm.capture_stderr def test_repr_bool_fails(self): s = Series([DataFrame(np.random.randn(2, 2)) for i in range(5)]) - import sys - - buf = StringIO() - tmp = sys.stderr - sys.stderr = buf - try: - # it works (with no Cython exception barf)! - repr(s) - finally: - sys.stderr = tmp - self.assertEqual(buf.getvalue(), '') + # It works (with no Cython exception barf)! + repr(s) + + output = sys.stderr.getvalue() + assert output == '' def test_repr_name_iterable_indexable(self): s = Series([1, 2, 3], name=np.int64(3)) diff --git a/pandas/util/decorators.py b/pandas/util/decorators.py index 4e1719958e8b7..ca588e2a0432e 100644 --- a/pandas/util/decorators.py +++ b/pandas/util/decorators.py @@ -1,7 +1,6 @@ -from pandas.compat import StringIO, callable, signature +from pandas.compat import callable, signature from pandas._libs.lib import cache_readonly # noqa import types -import sys import warnings from textwrap import dedent from functools import wraps, update_wrapper @@ -196,17 +195,6 @@ def indent(text, indents=1): return jointext.join(text.split('\n')) -def suppress_stdout(f): - def wrapped(*args, **kwargs): - try: - sys.stdout = StringIO() - f(*args, **kwargs) - finally: - sys.stdout = sys.__stdout__ - - return wrapped - - def make_signature(func): """ Returns a string repr of the arg list of a func call, with any defaults diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 9d7b004374318..ef0fa04548cab 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -38,7 +38,7 @@ from pandas.compat import ( filter, map, zip, range, unichr, lrange, lmap, lzip, u, callable, Counter, raise_with_traceback, httplib, is_platform_windows, is_platform_32bit, - PY3 + StringIO, PY3 ) from pandas.computation import expressions as expr @@ -629,6 +629,105 @@ def _valid_locales(locales, normalize): return list(filter(_can_set_locale, map(normalizer, locales))) +# ----------------------------------------------------------------------------- +# Stdout / stderr decorators + + +def capture_stdout(f): + """ + Decorator to capture stdout in a buffer so that it can be checked + (or suppressed) during testing. + + Parameters + ---------- + f : callable + The test that is capturing stdout. + + Returns + ------- + f : callable + The decorated test ``f``, which captures stdout. + + Examples + -------- + + >>> from pandas.util.testing import capture_stdout + >>> + >>> import sys + >>> + >>> @capture_stdout + ... def test_print_pass(): + ... print("foo") + ... out = sys.stdout.getvalue() + ... assert out == "foo\n" + >>> + >>> @capture_stdout + ... def test_print_fail(): + ... print("foo") + ... out = sys.stdout.getvalue() + ... assert out == "bar\n" + ... + AssertionError: assert 'foo\n' == 'bar\n' + """ + + @wraps(f) + def wrapper(*args, **kwargs): + try: + sys.stdout = StringIO() + f(*args, **kwargs) + finally: + sys.stdout = sys.__stdout__ + + return wrapper + + +def capture_stderr(f): + """ + Decorator to capture stderr in a buffer so that it can be checked + (or suppressed) during testing. + + Parameters + ---------- + f : callable + The test that is capturing stderr. + + Returns + ------- + f : callable + The decorated test ``f``, which captures stderr. + + Examples + -------- + + >>> from pandas.util.testing import capture_stderr + >>> + >>> import sys + >>> + >>> @capture_stderr + ... def test_stderr_pass(): + ... sys.stderr.write("foo") + ... out = sys.stderr.getvalue() + ... assert out == "foo\n" + >>> + >>> @capture_stderr + ... def test_stderr_fail(): + ... sys.stderr.write("foo") + ... out = sys.stderr.getvalue() + ... assert out == "bar\n" + ... + AssertionError: assert 'foo\n' == 'bar\n' + """ + + @wraps(f) + def wrapper(*args, **kwargs): + try: + sys.stderr = StringIO() + f(*args, **kwargs) + finally: + sys.stderr = sys.__stderr__ + + return wrapper + # ----------------------------------------------------------------------------- # Console debugging tools From f35209e2279154ed7060dd0e17d41da96f9c0186 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Sat, 8 Apr 2017 14:59:06 -0700 Subject: [PATCH 366/933] BUG: Correct Timestamp localization with tz near DST (#11481) (#15934) * BUG: Timestamp doesn't respect tz DST closes #11481 closes #15777 * DOC: add doc-strings to tz_convert/tz_localize in tslib.pyx TST: more tests, xref #15823, xref #11708 --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/_libs/tslib.pyx | 40 +++++++++++++++++++- pandas/tests/series/test_indexing.py | 6 +-- pandas/tests/tseries/test_timezones.py | 51 +++++++++++++++++++++++++- 4 files changed, 91 insertions(+), 7 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 4e29e01415ba6..7664688ffa4f4 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -1124,6 +1124,7 @@ Conversion - Bug in ``Timestamp.replace`` now raises ``TypeError`` when incorrect argument names are given; previously this raised ``ValueError`` (:issue:`15240`) - Bug in ``Timestamp.replace`` with compat for passing long integers (:issue:`15030`) - Bug in ``Timestamp`` returning UTC based time/date attributes when a timezone was provided (:issue:`13303`) +- Bug in ``Timestamp`` incorrectly localizing timezones during construction (:issue:`11481`, :issue:`15777`) - Bug in ``TimedeltaIndex`` addition where overflow was being allowed without error (:issue:`14816`) - Bug in ``TimedeltaIndex`` raising a ``ValueError`` when boolean indexing with ``loc`` (:issue:`14946`) - Bug in catching an overflow in ``Timestamp`` + ``Timedelta/Offset`` operations (:issue:`15126`) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index cc1439711c1d4..ed0bb263ed6cf 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -1569,7 +1569,9 @@ cpdef convert_str_to_tsobject(object ts, object tz, object unit, ts = obj.value if tz is not None: # shift for _localize_tso - ts = tz_convert_single(ts, tz, 'UTC') + ts = tz_localize_to_utc(np.array([ts], dtype='i8'), tz, + ambiguous='raise', + errors='raise')[0] except ValueError: try: ts = parse_datetime_string( @@ -4073,7 +4075,23 @@ except: have_pytz = False +@cython.boundscheck(False) +@cython.wraparound(False) def tz_convert(ndarray[int64_t] vals, object tz1, object tz2): + """ + Convert the values (in i8) from timezone1 to timezone2 + + Parameters + ---------- + vals : int64 ndarray + tz1 : string / timezone object + tz2 : string / timezone object + + Returns + ------- + int64 ndarray of converted + """ + cdef: ndarray[int64_t] utc_dates, tt, result, trans, deltas Py_ssize_t i, j, pos, n = len(vals) @@ -4175,6 +4193,23 @@ def tz_convert(ndarray[int64_t] vals, object tz1, object tz2): def tz_convert_single(int64_t val, object tz1, object tz2): + """ + Convert the val (in i8) from timezone1 to timezone2 + + This is a single timezone versoin of tz_convert + + Parameters + ---------- + val : int64 + tz1 : string / timezone object + tz2 : string / timezone object + + Returns + ------- + int64 converted + + """ + cdef: ndarray[int64_t] trans, deltas Py_ssize_t pos @@ -4374,7 +4409,7 @@ cpdef ndarray _unbox_utcoffsets(object transinfo): def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, object errors='raise'): """ - Localize tzinfo-naive DateRange to given time zone (using pytz). If + Localize tzinfo-naive i8 to given time zone (using pytz). If there are ambiguities in the values, raise AmbiguousTimeError. Returns @@ -4546,6 +4581,7 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, return result + cdef inline bisect_right_i8(int64_t *data, int64_t val, Py_ssize_t n): cdef Py_ssize_t pivot, left = 0, right = n diff --git a/pandas/tests/series/test_indexing.py b/pandas/tests/series/test_indexing.py index 0b6c0c601ac72..48410c1c73479 100644 --- a/pandas/tests/series/test_indexing.py +++ b/pandas/tests/series/test_indexing.py @@ -1024,9 +1024,9 @@ def test_setitem_with_tz_dst(self): # scalar s = orig.copy() s[1] = pd.Timestamp('2011-01-01', tz=tz) - exp = pd.Series([pd.Timestamp('2016-11-06 00:00', tz=tz), - pd.Timestamp('2011-01-01 00:00', tz=tz), - pd.Timestamp('2016-11-06 02:00', tz=tz)]) + exp = pd.Series([pd.Timestamp('2016-11-06 00:00-04:00', tz=tz), + pd.Timestamp('2011-01-01 00:00-05:00', tz=tz), + pd.Timestamp('2016-11-06 01:00-05:00', tz=tz)]) tm.assert_series_equal(s, exp) s = orig.copy() diff --git a/pandas/tests/tseries/test_timezones.py b/pandas/tests/tseries/test_timezones.py index 1fc0e1b73df6b..3e1b29f4c282c 100644 --- a/pandas/tests/tseries/test_timezones.py +++ b/pandas/tests/tseries/test_timezones.py @@ -1,4 +1,5 @@ # pylint: disable-msg=E1101,W0612 +import pytest import pytz import numpy as np from distutils.version import LooseVersion @@ -159,6 +160,52 @@ def test_timestamp_constructed_by_date_and_tz_explicit(self): self.assertEqual(result.hour, expected.hour) self.assertEqual(result, expected) + def test_timestamp_constructor_near_dst_boundary(self): + # GH 11481 & 15777 + # Naive string timestamps were being localized incorrectly + # with tz_convert_single instead of tz_localize_to_utc + + for tz in ['Europe/Brussels', 'Europe/Prague']: + result = Timestamp('2015-10-25 01:00', tz=tz) + expected = Timestamp('2015-10-25 01:00').tz_localize(tz) + assert result == expected + + with pytest.raises(pytz.AmbiguousTimeError): + Timestamp('2015-10-25 02:00', tz=tz) + + result = Timestamp('2017-03-26 01:00', tz='Europe/Paris') + expected = Timestamp('2017-03-26 01:00').tz_localize('Europe/Paris') + assert result == expected + + with pytest.raises(pytz.NonExistentTimeError): + Timestamp('2017-03-26 02:00', tz='Europe/Paris') + + # GH 11708 + result = to_datetime("2015-11-18 15:30:00+05:30").tz_localize( + 'UTC').tz_convert('Asia/Kolkata') + expected = Timestamp('2015-11-18 15:30:00+0530', tz='Asia/Kolkata') + assert result == expected + + # GH 15823 + result = Timestamp('2017-03-26 00:00', tz='Europe/Paris') + expected = Timestamp('2017-03-26 00:00:00+0100', tz='Europe/Paris') + assert result == expected + + result = Timestamp('2017-03-26 01:00', tz='Europe/Paris') + expected = Timestamp('2017-03-26 01:00:00+0100', tz='Europe/Paris') + assert result == expected + + with pytest.raises(pytz.NonExistentTimeError): + Timestamp('2017-03-26 02:00', tz='Europe/Paris') + result = Timestamp('2017-03-26 02:00:00+0100', tz='Europe/Paris') + expected = Timestamp(result.value).tz_localize( + 'UTC').tz_convert('Europe/Paris') + assert result == expected + + result = Timestamp('2017-03-26 03:00', tz='Europe/Paris') + expected = Timestamp('2017-03-26 03:00:00+0200', tz='Europe/Paris') + assert result == expected + def test_timestamp_to_datetime_tzoffset(self): # tzoffset from dateutil.tz import tzoffset @@ -517,8 +564,8 @@ def f(): freq="H")) if dateutil.__version__ != LooseVersion('2.6.0'): # GH 14621 - self.assertEqual(times[-1], Timestamp('2013-10-27 01:00', tz=tz, - freq="H")) + self.assertEqual(times[-1], Timestamp('2013-10-27 01:00:00+0000', + tz=tz, freq="H")) def test_ambiguous_nat(self): tz = self.tz('US/Eastern') From 56c20195ab2a96c752d12e7aaa75912019de369e Mon Sep 17 00:00:00 2001 From: Tong SHEN Date: Sun, 9 Apr 2017 22:40:12 +0800 Subject: [PATCH 367/933] DOC: Fix a comment typo in pandas/tools/concat.py (#15956) --- pandas/tools/concat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tools/concat.py b/pandas/tools/concat.py index 6405106118472..5df9a5abb78b2 100644 --- a/pandas/tools/concat.py +++ b/pandas/tools/concat.py @@ -278,7 +278,7 @@ def __init__(self, objs, axis=0, join='outer', join_axes=None, break else: - # filter out the empties if we have not multi-index possibiltes + # filter out the empties if we have not multi-index possibilities # note to keep empty Series as it affect to result columns / name non_empties = [obj for obj in objs if sum(obj.shape) > 0 or isinstance(obj, Series)] From c3c60f0d7a782cd429e3d7115a99cdc068a6d528 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sun, 9 Apr 2017 11:28:51 -0400 Subject: [PATCH 368/933] DOC/API/TST: add pd.unique doc-string & consistent return value for Categorical/tz-aware datetime (#15939) closes #9346 --- doc/source/whatsnew/v0.20.0.txt | 71 +++++++++++ pandas/core/algorithms.py | 89 ++++++++++++- pandas/core/base.py | 20 ++- pandas/core/categorical.py | 27 ++++ pandas/core/series.py | 10 +- pandas/tests/test_algos.py | 220 ++++++++++++++++++++++++-------- 6 files changed, 371 insertions(+), 66 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 7664688ffa4f4..4c0594c024774 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -593,6 +593,76 @@ result. On the other hand, this might have backward incompatibilities: e.g. compared to numpy arrays, ``Index`` objects are not mutable. To get the original ndarray, you can always convert explicitly using ``np.asarray(idx.hour)``. +.. _whatsnew_0200.api_breaking.unique: + +pd.unique will now be consistent with extension types +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In prior versions, using ``Series.unique()`` and ``pd.unique(Series)`` on ``Categorical`` and tz-aware +datatypes would yield different return types. These are now made consistent. (:issue:`15903`) + +- Datetime tz-aware + + Previous behaviour: + + .. code-block:: ipython + + # Series + In [5]: pd.Series([pd.Timestamp('20160101', tz='US/Eastern'), + pd.Timestamp('20160101', tz='US/Eastern')]).unique() + Out[5]: array([Timestamp('2016-01-01 00:00:00-0500', tz='US/Eastern')], dtype=object) + + In [6]: pd.unique(pd.Series([pd.Timestamp('20160101', tz='US/Eastern'), + pd.Timestamp('20160101', tz='US/Eastern')])) + Out[6]: array(['2016-01-01T05:00:00.000000000'], dtype='datetime64[ns]') + + # Index + In [7]: pd.Index([pd.Timestamp('20160101', tz='US/Eastern'), + pd.Timestamp('20160101', tz='US/Eastern')]).unique() + Out[7]: DatetimeIndex(['2016-01-01 00:00:00-05:00'], dtype='datetime64[ns, US/Eastern]', freq=None) + + In [8]: pd.unique([pd.Timestamp('20160101', tz='US/Eastern'), + pd.Timestamp('20160101', tz='US/Eastern')]) + Out[8]: array(['2016-01-01T05:00:00.000000000'], dtype='datetime64[ns]') + + New Behavior: + + .. ipython:: python + + # Series, returns an array of Timestamp tz-aware + pd.Series([pd.Timestamp('20160101', tz='US/Eastern'), + pd.Timestamp('20160101', tz='US/Eastern')]).unique() + pd.unique(pd.Series([pd.Timestamp('20160101', tz='US/Eastern'), + pd.Timestamp('20160101', tz='US/Eastern')])) + + # Index, returns a DatetimeIndex + pd.Index([pd.Timestamp('20160101', tz='US/Eastern'), + pd.Timestamp('20160101', tz='US/Eastern')]).unique() + pd.unique(pd.Index([pd.Timestamp('20160101', tz='US/Eastern'), + pd.Timestamp('20160101', tz='US/Eastern')])) + +- Categoricals + + Previous behaviour: + + .. code-block:: ipython + + In [1]: pd.Series(pd.Categorical(list('baabc'))).unique() + Out[1]: + [b, a, c] + Categories (3, object): [b, a, c] + + In [2]: pd.unique(pd.Series(pd.Categorical(list('baabc')))) + Out[2]: array(['b', 'a', 'c'], dtype=object) + + New Behavior: + + .. ipython:: python + + # returns a Categorical + pd.Series(pd.Categorical(list('baabc'))).unique() + pd.unique(pd.Series(pd.Categorical(list('baabc'))).unique()) + .. _whatsnew_0200.api_breaking.s3: S3 File Handling @@ -1148,6 +1218,7 @@ Conversion - Bug in ``DataFrame`` construction with nulls and datetimes in a list-like (:issue:`15869`) - Bug in ``DataFrame.fillna()`` with tz-aware datetimes (:issue:`15855`) - Bug in ``is_string_dtype``, ``is_timedelta64_ns_dtype``, and ``is_string_like_dtype`` in which an error was raised when ``None`` was passed in (:issue:`15941`) +- Bug in the return type of ``pd.unique`` on a ``Categorical``, which was returning an ndarray and not a ``Categorical`` (:issue:`15903`) Indexing ^^^^^^^^ diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 9b88ea23483bd..654e38e43b6c0 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -267,11 +267,85 @@ def match(to_match, values, na_sentinel=-1): return result -def unique1d(values): +def unique(values): """ - Hash table-based unique + Hash table-based unique. Uniques are returned in order + of appearance. This does NOT sort. + + Significantly faster than numpy.unique. Includes NA values. + + Parameters + ---------- + values : 1d array-like + + Returns + ------- + unique values. + - If the input is an Index, the return is an Index + - If the input is a Categorical dtype, the return is a Categorical + - If the input is a Series/ndarray, the return will be an ndarray + + Examples + -------- + pd.unique(pd.Series([2, 1, 3, 3])) + array([2, 1, 3]) + + >>> pd.unique(pd.Series([2] + [1] * 5)) + array([2, 1]) + + >>> pd.unique(Series([pd.Timestamp('20160101'), + ... pd.Timestamp('20160101')])) + array(['2016-01-01T00:00:00.000000000'], dtype='datetime64[ns]') + + >>> pd.unique(pd.Series([pd.Timestamp('20160101', tz='US/Eastern'), + ... pd.Timestamp('20160101', tz='US/Eastern')])) + array([Timestamp('2016-01-01 00:00:00-0500', tz='US/Eastern')], + dtype=object) + + >>> pd.unique(pd.Index([pd.Timestamp('20160101', tz='US/Eastern'), + ... pd.Timestamp('20160101', tz='US/Eastern')])) + DatetimeIndex(['2016-01-01 00:00:00-05:00'], + ... dtype='datetime64[ns, US/Eastern]', freq=None) + + >>> pd.unique(list('baabc')) + array(['b', 'a', 'c'], dtype=object) + + An unordered Categorical will return categories in the + order of appearance. + + >>> pd.unique(Series(pd.Categorical(list('baabc')))) + [b, a, c] + Categories (3, object): [b, a, c] + + >>> pd.unique(Series(pd.Categorical(list('baabc'), + ... categories=list('abc')))) + [b, a, c] + Categories (3, object): [b, a, c] + + An ordered Categorical preserves the category ordering. + + >>> pd.unique(Series(pd.Categorical(list('baabc'), + ... categories=list('abc'), + ... ordered=True))) + [b, a, c] + Categories (3, object): [a < b < c] + + See Also + -------- + pandas.Index.unique + pandas.Series.unique + """ + values = _ensure_arraylike(values) + + # categorical is a fast-path + # this will coerce Categorical, CategoricalIndex, + # and category dtypes Series to same return of Category + if is_categorical_dtype(values): + values = getattr(values, '.values', values) + return values.unique() + original = values htable, _, values, dtype, ndtype = _get_hashtable_algo(values) @@ -279,10 +353,17 @@ def unique1d(values): uniques = table.unique(values) uniques = _reconstruct_data(uniques, dtype, original) + if isinstance(original, ABCSeries) and is_datetime64tz_dtype(dtype): + # we are special casing datetime64tz_dtype + # to return an object array of tz-aware Timestamps + + # TODO: it must return DatetimeArray with tz in pandas 2.0 + uniques = uniques.asobject.values + return uniques -unique = unique1d +unique1d = unique def isin(comps, values): @@ -651,7 +732,7 @@ def mode(values): if is_categorical_dtype(values): if isinstance(values, Series): - return Series(values.values.mode()) + return Series(values.values.mode(), name=values.name) return values.mode() values, dtype, ndtype = _ensure_data(values) diff --git a/pandas/core/base.py b/pandas/core/base.py index 3401c7c59cb56..56bdeee6982d5 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -855,13 +855,24 @@ def value_counts(self, normalize=False, sort=True, ascending=False, _shared_docs['unique'] = ( """ - Return %(unique)s of unique values in the object. - Significantly faster than numpy.unique. Includes NA values. - The order of the original is preserved. + Return unique values in the object. Uniques are returned in order + of appearance, this does NOT sort. Hash table-based unique. + + Parameters + ---------- + values : 1d array-like Returns ------- - uniques : %(unique)s + unique values. + - If the input is an Index, the return is an Index + - If the input is a Categorical dtype, the return is a Categorical + - If the input is a Series/ndarray, the return will be an ndarray + + See Also + -------- + pandas.unique + pandas.Categorical.unique """) @Appender(_shared_docs['unique'] % _indexops_doc_kwargs) @@ -873,6 +884,7 @@ def unique(self): else: from pandas.core.algorithms import unique1d result = unique1d(values) + return result def nunique(self, dropna=True): diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index 0fcf8664e755d..e3d6792604c4c 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -1895,6 +1895,33 @@ def unique(self): Returns ------- unique values : ``Categorical`` + + Examples + -------- + An unordered Categorical will return categories in the + order of appearance. + + >>> pd.Categorical(list('baabc')) + [b, a, c] + Categories (3, object): [b, a, c] + + >>> pd.Categorical(list('baabc'), categories=list('abc')) + [b, a, c] + Categories (3, object): [b, a, c] + + An ordered Categorical preserves the category ordering. + + >>> pd.Categorical(list('baabc'), + ... categories=list('abc'), + ... ordered=True) + [b, a, c] + Categories (3, object): [a < b < c] + + See Also + -------- + pandas.unique + pandas.CategoricalIndex.unique + """ # unlike np.unique, unique1d does not sort diff --git a/pandas/core/series.py b/pandas/core/series.py index 760abc20351cf..5ee3ca73742ae 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1204,10 +1204,14 @@ def mode(self): @Appender(base._shared_docs['unique'] % _shared_doc_kwargs) def unique(self): result = super(Series, self).unique() + if is_datetime64tz_dtype(self.dtype): - # to return array of Timestamp with tz - # ToDo: it must return DatetimeArray with tz in pandas 2.0 - return result.asobject.values + # we are special casing datetime64tz_dtype + # to return an object array of tz-aware Timestamps + + # TODO: it must return DatetimeArray with tz in pandas 2.0 + result = result.asobject.values + return result @Appender(base._shared_docs['drop_duplicates'] % _shared_doc_kwargs) diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index d893183dae0ed..d9f81968c684d 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -6,7 +6,8 @@ from numpy import nan from datetime import datetime from itertools import permutations -from pandas import Series, Categorical, CategoricalIndex, Index +from pandas import (Series, Categorical, CategoricalIndex, Index, + Timestamp, DatetimeIndex) import pandas as pd from pandas import compat @@ -34,7 +35,7 @@ def test_ints(self): expected = Series(np.array([0, 2, 1, 1, 0, 2, np.nan, 0])) tm.assert_series_equal(result, expected) - s = pd.Series(np.arange(5), dtype=np.float32) + s = Series(np.arange(5), dtype=np.float32) result = algos.match(s, [2, 4]) expected = np.array([-1, -1, 0, -1, 1], dtype=np.int64) self.assert_numpy_array_equal(result, expected) @@ -204,20 +205,20 @@ def test_mixed(self): def test_datelike(self): # M8 - v1 = pd.Timestamp('20130101 09:00:00.00004') - v2 = pd.Timestamp('20130101') + v1 = Timestamp('20130101 09:00:00.00004') + v2 = Timestamp('20130101') x = Series([v1, v1, v1, v2, v2, v1]) labels, uniques = algos.factorize(x) exp = np.array([0, 0, 0, 1, 1, 0], dtype=np.intp) self.assert_numpy_array_equal(labels, exp) - exp = pd.DatetimeIndex([v1, v2]) + exp = DatetimeIndex([v1, v2]) self.assert_index_equal(uniques, exp) labels, uniques = algos.factorize(x, sort=True) exp = np.array([1, 1, 1, 0, 0, 1], dtype=np.intp) self.assert_numpy_array_equal(labels, exp) - exp = pd.DatetimeIndex([v2, v1]) + exp = DatetimeIndex([v2, v1]) self.assert_index_equal(uniques, exp) # period @@ -350,7 +351,7 @@ def test_datetime64_dtype_array_returned(self): tm.assert_numpy_array_equal(result, expected) self.assertEqual(result.dtype, expected.dtype) - s = pd.Series(dt_index) + s = Series(dt_index) result = algos.unique(s) tm.assert_numpy_array_equal(result, expected) self.assertEqual(result.dtype, expected.dtype) @@ -369,7 +370,7 @@ def test_timedelta64_dtype_array_returned(self): tm.assert_numpy_array_equal(result, expected) self.assertEqual(result.dtype, expected.dtype) - s = pd.Series(td_index) + s = Series(td_index) result = algos.unique(s) tm.assert_numpy_array_equal(result, expected) self.assertEqual(result.dtype, expected.dtype) @@ -380,10 +381,119 @@ def test_timedelta64_dtype_array_returned(self): self.assertEqual(result.dtype, expected.dtype) def test_uint64_overflow(self): - s = pd.Series([1, 2, 2**63, 2**63], dtype=np.uint64) + s = Series([1, 2, 2**63, 2**63], dtype=np.uint64) exp = np.array([1, 2, 2**63], dtype=np.uint64) tm.assert_numpy_array_equal(algos.unique(s), exp) + def test_categorical(self): + + # we are expecting to return in the order + # of appearance + expected = pd.Categorical(list('bac'), + categories=list('bac')) + + # we are expecting to return in the order + # of the categories + expected_o = pd.Categorical(list('bac'), + categories=list('abc'), + ordered=True) + + # GH 15939 + c = pd.Categorical(list('baabc')) + result = c.unique() + tm.assert_categorical_equal(result, expected) + + result = algos.unique(c) + tm.assert_categorical_equal(result, expected) + + c = pd.Categorical(list('baabc'), ordered=True) + result = c.unique() + tm.assert_categorical_equal(result, expected_o) + + result = algos.unique(c) + tm.assert_categorical_equal(result, expected_o) + + # Series of categorical dtype + s = Series(pd.Categorical(list('baabc')), name='foo') + result = s.unique() + tm.assert_categorical_equal(result, expected) + + result = pd.unique(s) + tm.assert_categorical_equal(result, expected) + + # CI -> return CI + ci = pd.CategoricalIndex(pd.Categorical(list('baabc'), + categories=list('bac'))) + expected = pd.CategoricalIndex(expected) + result = ci.unique() + tm.assert_index_equal(result, expected) + + result = pd.unique(ci) + tm.assert_index_equal(result, expected) + + def test_datetime64tz_aware(self): + # GH 15939 + + result = Series( + pd.Index([Timestamp('20160101', tz='US/Eastern'), + Timestamp('20160101', tz='US/Eastern')])).unique() + expected = np.array([Timestamp('2016-01-01 00:00:00-0500', + tz='US/Eastern')], dtype=object) + tm.assert_numpy_array_equal(result, expected) + + result = pd.Index([Timestamp('20160101', tz='US/Eastern'), + Timestamp('20160101', tz='US/Eastern')]).unique() + expected = DatetimeIndex(['2016-01-01 00:00:00'], + dtype='datetime64[ns, US/Eastern]', freq=None) + tm.assert_index_equal(result, expected) + + result = pd.unique( + Series(pd.Index([Timestamp('20160101', tz='US/Eastern'), + Timestamp('20160101', tz='US/Eastern')]))) + expected = np.array([Timestamp('2016-01-01 00:00:00-0500', + tz='US/Eastern')], dtype=object) + tm.assert_numpy_array_equal(result, expected) + + result = pd.unique(pd.Index([Timestamp('20160101', tz='US/Eastern'), + Timestamp('20160101', tz='US/Eastern')])) + expected = DatetimeIndex(['2016-01-01 00:00:00'], + dtype='datetime64[ns, US/Eastern]', freq=None) + tm.assert_index_equal(result, expected) + + def test_order_of_appearance(self): + # 9346 + # light testing of guarantee of order of appearance + # these also are the doc-examples + result = pd.unique(Series([2, 1, 3, 3])) + tm.assert_numpy_array_equal(result, + np.array([2, 1, 3], dtype='int64')) + + result = pd.unique(Series([2] + [1] * 5)) + tm.assert_numpy_array_equal(result, + np.array([2, 1], dtype='int64')) + + result = pd.unique(Series([Timestamp('20160101'), + Timestamp('20160101')])) + expected = np.array(['2016-01-01T00:00:00.000000000'], + dtype='datetime64[ns]') + tm.assert_numpy_array_equal(result, expected) + + result = pd.unique(pd.Index( + [Timestamp('20160101', tz='US/Eastern'), + Timestamp('20160101', tz='US/Eastern')])) + expected = DatetimeIndex(['2016-01-01 00:00:00'], + dtype='datetime64[ns, US/Eastern]', + freq=None) + tm.assert_index_equal(result, expected) + + result = pd.unique(list('aabc')) + expected = np.array(['a', 'b', 'c'], dtype=object) + tm.assert_numpy_array_equal(result, expected) + + result = pd.unique(Series(pd.Categorical(list('aabc')))) + expected = pd.Categorical(list('abc')) + tm.assert_categorical_equal(result, expected) + class TestIsin(tm.TestCase): @@ -403,15 +513,15 @@ def test_basic(self): expected = np.array([True, False]) tm.assert_numpy_array_equal(result, expected) - result = algos.isin(pd.Series([1, 2]), [1]) + result = algos.isin(Series([1, 2]), [1]) expected = np.array([True, False]) tm.assert_numpy_array_equal(result, expected) - result = algos.isin(pd.Series([1, 2]), pd.Series([1])) + result = algos.isin(Series([1, 2]), Series([1])) expected = np.array([True, False]) tm.assert_numpy_array_equal(result, expected) - result = algos.isin(pd.Series([1, 2]), set([1])) + result = algos.isin(Series([1, 2]), set([1])) expected = np.array([True, False]) tm.assert_numpy_array_equal(result, expected) @@ -419,11 +529,11 @@ def test_basic(self): expected = np.array([True, False]) tm.assert_numpy_array_equal(result, expected) - result = algos.isin(pd.Series(['a', 'b']), pd.Series(['a'])) + result = algos.isin(Series(['a', 'b']), Series(['a'])) expected = np.array([True, False]) tm.assert_numpy_array_equal(result, expected) - result = algos.isin(pd.Series(['a', 'b']), set(['a'])) + result = algos.isin(Series(['a', 'b']), set(['a'])) expected = np.array([True, False]) tm.assert_numpy_array_equal(result, expected) @@ -520,33 +630,33 @@ def test_value_counts_nat(self): self.assertEqual(len(vc), 1) self.assertEqual(len(vc_with_na), 2) - exp_dt = pd.Series({pd.Timestamp('2014-01-01 00:00:00'): 1}) + exp_dt = Series({Timestamp('2014-01-01 00:00:00'): 1}) tm.assert_series_equal(algos.value_counts(dt), exp_dt) # TODO same for (timedelta) def test_value_counts_datetime_outofbounds(self): # GH 13663 - s = pd.Series([datetime(3000, 1, 1), datetime(5000, 1, 1), - datetime(5000, 1, 1), datetime(6000, 1, 1), - datetime(3000, 1, 1), datetime(3000, 1, 1)]) + s = Series([datetime(3000, 1, 1), datetime(5000, 1, 1), + datetime(5000, 1, 1), datetime(6000, 1, 1), + datetime(3000, 1, 1), datetime(3000, 1, 1)]) res = s.value_counts() exp_index = pd.Index([datetime(3000, 1, 1), datetime(5000, 1, 1), datetime(6000, 1, 1)], dtype=object) - exp = pd.Series([3, 2, 1], index=exp_index) + exp = Series([3, 2, 1], index=exp_index) tm.assert_series_equal(res, exp) # GH 12424 - res = pd.to_datetime(pd.Series(['2362-01-01', np.nan]), + res = pd.to_datetime(Series(['2362-01-01', np.nan]), errors='ignore') - exp = pd.Series(['2362-01-01', np.nan], dtype=object) + exp = Series(['2362-01-01', np.nan], dtype=object) tm.assert_series_equal(res, exp) def test_categorical(self): s = Series(pd.Categorical(list('aaabbc'))) result = s.value_counts() - expected = pd.Series([3, 2, 1], - index=pd.CategoricalIndex(['a', 'b', 'c'])) + expected = Series([3, 2, 1], + index=pd.CategoricalIndex(['a', 'b', 'c'])) tm.assert_series_equal(result, expected, check_index_type=True) # preserve order? @@ -559,11 +669,11 @@ def test_categorical_nans(self): s = Series(pd.Categorical(list('aaaaabbbcc'))) # 4,3,2,1 (nan) s.iloc[1] = np.nan result = s.value_counts() - expected = pd.Series([4, 3, 2], index=pd.CategoricalIndex( + expected = Series([4, 3, 2], index=pd.CategoricalIndex( ['a', 'b', 'c'], categories=['a', 'b', 'c'])) tm.assert_series_equal(result, expected, check_index_type=True) result = s.value_counts(dropna=False) - expected = pd.Series([ + expected = Series([ 4, 3, 2, 1 ], index=pd.CategoricalIndex(['a', 'b', 'c', np.nan])) tm.assert_series_equal(result, expected, check_index_type=True) @@ -573,12 +683,12 @@ def test_categorical_nans(self): list('aaaaabbbcc'), ordered=True, categories=['b', 'a', 'c'])) s.iloc[1] = np.nan result = s.value_counts() - expected = pd.Series([4, 3, 2], index=pd.CategoricalIndex( + expected = Series([4, 3, 2], index=pd.CategoricalIndex( ['a', 'b', 'c'], categories=['b', 'a', 'c'], ordered=True)) tm.assert_series_equal(result, expected, check_index_type=True) result = s.value_counts(dropna=False) - expected = pd.Series([4, 3, 2, 1], index=pd.CategoricalIndex( + expected = Series([4, 3, 2, 1], index=pd.CategoricalIndex( ['a', 'b', 'c', np.nan], categories=['b', 'a', 'c'], ordered=True)) tm.assert_series_equal(result, expected, check_index_type=True) @@ -595,33 +705,33 @@ def test_dropna(self): # https://github.com/pandas-dev/pandas/issues/9443#issuecomment-73719328 tm.assert_series_equal( - pd.Series([True, True, False]).value_counts(dropna=True), - pd.Series([2, 1], index=[True, False])) + Series([True, True, False]).value_counts(dropna=True), + Series([2, 1], index=[True, False])) tm.assert_series_equal( - pd.Series([True, True, False]).value_counts(dropna=False), - pd.Series([2, 1], index=[True, False])) + Series([True, True, False]).value_counts(dropna=False), + Series([2, 1], index=[True, False])) tm.assert_series_equal( - pd.Series([True, True, False, None]).value_counts(dropna=True), - pd.Series([2, 1], index=[True, False])) + Series([True, True, False, None]).value_counts(dropna=True), + Series([2, 1], index=[True, False])) tm.assert_series_equal( - pd.Series([True, True, False, None]).value_counts(dropna=False), - pd.Series([2, 1, 1], index=[True, False, np.nan])) + Series([True, True, False, None]).value_counts(dropna=False), + Series([2, 1, 1], index=[True, False, np.nan])) tm.assert_series_equal( - pd.Series([10.3, 5., 5.]).value_counts(dropna=True), - pd.Series([2, 1], index=[5., 10.3])) + Series([10.3, 5., 5.]).value_counts(dropna=True), + Series([2, 1], index=[5., 10.3])) tm.assert_series_equal( - pd.Series([10.3, 5., 5.]).value_counts(dropna=False), - pd.Series([2, 1], index=[5., 10.3])) + Series([10.3, 5., 5.]).value_counts(dropna=False), + Series([2, 1], index=[5., 10.3])) tm.assert_series_equal( - pd.Series([10.3, 5., 5., None]).value_counts(dropna=True), - pd.Series([2, 1], index=[5., 10.3])) + Series([10.3, 5., 5., None]).value_counts(dropna=True), + Series([2, 1], index=[5., 10.3])) # 32-bit linux has a different ordering if not compat.is_platform_32bit(): - result = pd.Series([10.3, 5., 5., None]).value_counts(dropna=False) - expected = pd.Series([2, 1, 1], index=[5., 10.3, np.nan]) + result = Series([10.3, 5., 5., None]).value_counts(dropna=False) + expected = Series([2, 1, 1], index=[5., 10.3, np.nan]) tm.assert_series_equal(result, expected) def test_value_counts_normalized(self): @@ -736,15 +846,15 @@ def test_numeric_object_likes(self): tm.assert_numpy_array_equal(res_false, exp_false) # series - for s in [pd.Series(case), pd.Series(case, dtype='category')]: + for s in [Series(case), Series(case, dtype='category')]: res_first = s.duplicated(keep='first') - tm.assert_series_equal(res_first, pd.Series(exp_first)) + tm.assert_series_equal(res_first, Series(exp_first)) res_last = s.duplicated(keep='last') - tm.assert_series_equal(res_last, pd.Series(exp_last)) + tm.assert_series_equal(res_last, Series(exp_last)) res_false = s.duplicated(keep=False) - tm.assert_series_equal(res_false, pd.Series(exp_false)) + tm.assert_series_equal(res_false, Series(exp_false)) def test_datetime_likes(self): @@ -753,8 +863,8 @@ def test_datetime_likes(self): td = ['1 days', '2 days', '1 days', 'NaT', '3 days', '2 days', '4 days', '1 days', 'NaT', '6 days'] - cases = [np.array([pd.Timestamp(d) for d in dt]), - np.array([pd.Timestamp(d, tz='US/Eastern') for d in dt]), + cases = [np.array([Timestamp(d) for d in dt]), + np.array([Timestamp(d, tz='US/Eastern') for d in dt]), np.array([pd.Period(d, freq='D') for d in dt]), np.array([np.datetime64(d) for d in dt]), np.array([pd.Timedelta(d) for d in td])] @@ -788,16 +898,16 @@ def test_datetime_likes(self): tm.assert_numpy_array_equal(res_false, exp_false) # series - for s in [pd.Series(case), pd.Series(case, dtype='category'), - pd.Series(case, dtype=object)]: + for s in [Series(case), Series(case, dtype='category'), + Series(case, dtype=object)]: res_first = s.duplicated(keep='first') - tm.assert_series_equal(res_first, pd.Series(exp_first)) + tm.assert_series_equal(res_first, Series(exp_first)) res_last = s.duplicated(keep='last') - tm.assert_series_equal(res_last, pd.Series(exp_last)) + tm.assert_series_equal(res_last, Series(exp_last)) res_false = s.duplicated(keep=False) - tm.assert_series_equal(res_false, pd.Series(exp_false)) + tm.assert_series_equal(res_false, Series(exp_false)) def test_unique_index(self): cases = [pd.Index([1, 2, 3]), pd.RangeIndex(0, 3)] @@ -939,7 +1049,7 @@ def test_lookup_overflow(self): np.arange(len(xs), dtype=np.int64)) def test_get_unique(self): - s = pd.Series([1, 2, 2**63, 2**63], dtype=np.uint64) + s = Series([1, 2, 2**63, 2**63], dtype=np.uint64) exp = np.array([1, 2, 2**63], dtype=np.uint64) self.assert_numpy_array_equal(s.unique(), exp) From b7ddb0a734eeed3fc4692724a6951ae7a62b1dea Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sun, 9 Apr 2017 12:07:35 -0400 Subject: [PATCH 369/933] DOC: add .unique to top-level in api --- doc/source/api.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/api.rst b/doc/source/api.rst index 336b0b9b14c6c..8b4f295392a68 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -165,6 +165,7 @@ Data manipulations concat get_dummies factorize + unique wide_to_long Top-level missing data From b4c6fc1dc4fac2951c7e3ee035968760b4f4adb9 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sun, 9 Apr 2017 13:58:31 -0400 Subject: [PATCH 370/933] DOC: add some See Also to Categorical --- doc/source/api.rst | 10 +++++++++- pandas/core/algorithms.py | 2 +- pandas/core/categorical.py | 17 +++++++++++++---- 3 files changed, 23 insertions(+), 6 deletions(-) diff --git a/doc/source/api.rst b/doc/source/api.rst index 8b4f295392a68..2e6f693677e4e 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -653,13 +653,21 @@ adding ordering information or special categories is need at creation time of th Categorical.from_codes ``np.asarray(categorical)`` works by implementing the array interface. Be aware, that this converts -the Categorical back to a numpy array, so levels and order information is not preserved! +the Categorical back to a numpy array, so categories and order information is not preserved! .. autosummary:: :toctree: generated/ Categorical.__array__ +Categorical methods + +.. autosummary:: + :toctree: generated/ + + Categorical.unique + Categorical.value_counts + Plotting ~~~~~~~~ diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 654e38e43b6c0..7fab9295bb94e 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -287,7 +287,7 @@ def unique(values): Examples -------- - pd.unique(pd.Series([2, 1, 3, 3])) + >>> pd.unique(pd.Series([2, 1, 3, 3])) array([2, 1, 3]) >>> pd.unique(pd.Series([2] + [1] * 5)) diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index e3d6792604c4c..906e8efafe4af 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -1137,8 +1137,9 @@ def isnull(self): See also -------- - pandas.isnull : pandas version + isnull : pandas version Categorical.notnull : boolean inverse of Categorical.isnull + """ ret = self._codes == -1 @@ -1164,8 +1165,9 @@ def notnull(self): See also -------- - pandas.notnull : pandas version + notnull : pandas version Categorical.isnull : boolean inverse of Categorical.notnull + """ return ~self.isnull() @@ -1206,6 +1208,11 @@ def value_counts(self, dropna=True): Returns ------- counts : Series + + See Also + -------- + Series.value_counts + """ from numpy import bincount from pandas.types.missing import isnull @@ -1308,6 +1315,7 @@ def sort_values(self, inplace=False, ascending=True, na_position='last'): See Also -------- Categorical.sort + Series.sort_values Examples -------- @@ -1919,8 +1927,9 @@ def unique(self): See Also -------- - pandas.unique - pandas.CategoricalIndex.unique + unique + CategoricalIndex.unique + Series.unique """ From b4701a6dcb432ba6c5c5b757f4956ae59d282781 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sun, 9 Apr 2017 14:24:05 -0400 Subject: [PATCH 371/933] DOC: suppress some doc build warnings (#15958) --- doc/source/dsintro.rst | 1 + doc/source/io.rst | 1 + doc/source/whatsnew/v0.13.0.txt | 4 ++-- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/doc/source/dsintro.rst b/doc/source/dsintro.rst index 0086cb0f94747..3c6572229802d 100644 --- a/doc/source/dsintro.rst +++ b/doc/source/dsintro.rst @@ -979,6 +979,7 @@ Convert to a MultiIndex DataFrame Alternatively, one can convert to an xarray ``DataArray``. .. ipython:: python + :okwarning: p.to_xarray() diff --git a/doc/source/io.rst b/doc/source/io.rst index f4676f3ad964e..2b3d2895333d3 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -4487,6 +4487,7 @@ See the `Full Documentation `__ Write to a feather file. .. ipython:: python + :okwarning: df.to_feather('example.feather') diff --git a/doc/source/whatsnew/v0.13.0.txt b/doc/source/whatsnew/v0.13.0.txt index 118632cc2c0ee..3347b05a5df37 100644 --- a/doc/source/whatsnew/v0.13.0.txt +++ b/doc/source/whatsnew/v0.13.0.txt @@ -357,11 +357,11 @@ HDFStore API Changes .. ipython:: python path = 'test.h5' - df = DataFrame(randn(10,2)) + df = pd.DataFrame(np.random.randn(10,2)) df.to_hdf(path,'df_table',format='table') df.to_hdf(path,'df_table2',append=True) df.to_hdf(path,'df_fixed') - with get_store(path) as store: + with pd.HDFStore(path) as store: print(store) .. ipython:: python From 9cb2c2db0dd763bb9e6586d3103a564875ed25d5 Mon Sep 17 00:00:00 2001 From: Tong Shen Date: Mon, 10 Apr 2017 08:08:14 -0400 Subject: [PATCH 372/933] BUG: Fix MultiIndex names handling in pd.concat closes #15787 Author: Tong Shen Closes #15955 from funnycrab/fix_bug_in_concat and squashes the following commits: 8c0e721 [Tong Shen] explicitly specify dtype when constructing DataFrame to avoid test failure db7866f [Tong Shen] construct expected results as DataFrame instead of FrozenList 7f82be9 [Tong Shen] BUG: Fix MultiIndex names handling in pd.concat --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/indexes/api.py | 2 +- pandas/tests/tools/test_concat.py | 24 ++++++++++++++++++++++++ 3 files changed, 26 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 4c0594c024774..e8170b4bf2113 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -1241,6 +1241,7 @@ Indexing - Bug in creating a ``MultiIndex`` with tuples and not passing a list of names; this will now raise ``ValueError`` (:issue:`15110`) - Bug in the HTML display with with a ``MultiIndex`` and truncation (:issue:`14882`) - Bug in the display of ``.info()`` where a qualifier (+) would always be displayed with a ``MultiIndex`` that contains only non-strings (:issue:`15245`) +- Bug in ``pd.concat()`` where the names of ``MultiIndex`` of resulting ``DataFrame`` are not handled correctly when ``None`` is presented in the names of ``MultiIndex`` of input ``DataFrame`` (:issue:`15787`) I/O ^^^ diff --git a/pandas/indexes/api.py b/pandas/indexes/api.py index a38453e0d2ccc..a3cb54ca97071 100644 --- a/pandas/indexes/api.py +++ b/pandas/indexes/api.py @@ -107,7 +107,7 @@ def _get_consensus_names(indexes): # find the non-none names, need to tupleify to make # the set hashable, then reverse on return consensus_names = set([tuple(i.names) for i in indexes - if all(n is not None for n in i.names)]) + if any(n is not None for n in i.names)]) if len(consensus_names) == 1: return list(list(consensus_names)[0]) return [None] * indexes[0].nlevels diff --git a/pandas/tests/tools/test_concat.py b/pandas/tests/tools/test_concat.py index 623c5fa02fcb2..c61f2a3dc8066 100644 --- a/pandas/tests/tools/test_concat.py +++ b/pandas/tests/tools/test_concat.py @@ -1048,6 +1048,30 @@ def test_concat_multiindex_with_tz(self): result = concat([df, df]) tm.assert_frame_equal(result, expected) + def test_concat_multiindex_with_none_in_index_names(self): + # GH 15787 + index = pd.MultiIndex.from_product([[1], range(5)], + names=['level1', None]) + df = pd.DataFrame({'col': range(5)}, index=index, dtype=np.int32) + + result = concat([df, df], keys=[1, 2], names=['level2']) + index = pd.MultiIndex.from_product([[1, 2], [1], range(5)], + names=['level2', 'level1', None]) + expected = pd.DataFrame({'col': list(range(5)) * 2}, + index=index, dtype=np.int32) + assert_frame_equal(result, expected) + + result = concat([df, df[:2]], keys=[1, 2], names=['level2']) + level2 = [1] * 5 + [2] * 2 + level1 = [1] * 7 + no_name = list(range(5)) + list(range(2)) + tuples = list(zip(level2, level1, no_name)) + index = pd.MultiIndex.from_tuples(tuples, + names=['level2', 'level1', None]) + expected = pd.DataFrame({'col': no_name}, index=index, + dtype=np.int32) + assert_frame_equal(result, expected) + def test_concat_keys_and_levels(self): df = DataFrame(np.random.randn(1, 3)) df2 = DataFrame(np.random.randn(1, 4)) From d984cfc391121882906564541fbce49c48cdc229 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Mon, 10 Apr 2017 08:12:00 -0400 Subject: [PATCH 373/933] TST: clean up series/frame api tests inheritance a bit (#15949) * TST: clean up series/frame api tests inheritance a bit * BUG: Index.to_series() is not copying the index --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/indexes/base.py | 4 +- .../frame/{test_misc_api.py => test_api.py} | 0 pandas/tests/frame/test_query_eval.py | 2 +- pandas/tests/indexes/common.py | 9 ++++ .../series/{test_misc_api.py => test_api.py} | 0 pandas/tests/series/test_quantile.py | 42 ++++++++----------- pandas/tests/sparse/test_frame.py | 2 +- pandas/tests/sparse/test_series.py | 2 +- pandas/tseries/index.py | 4 +- 10 files changed, 36 insertions(+), 30 deletions(-) rename pandas/tests/frame/{test_misc_api.py => test_api.py} (100%) rename pandas/tests/series/{test_misc_api.py => test_api.py} (100%) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index e8170b4bf2113..fd1cd3d0022c9 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -1219,6 +1219,7 @@ Conversion - Bug in ``DataFrame.fillna()`` with tz-aware datetimes (:issue:`15855`) - Bug in ``is_string_dtype``, ``is_timedelta64_ns_dtype``, and ``is_string_like_dtype`` in which an error was raised when ``None`` was passed in (:issue:`15941`) - Bug in the return type of ``pd.unique`` on a ``Categorical``, which was returning an ndarray and not a ``Categorical`` (:issue:`15903`) +- Bug in ``Index.to_series()`` where the index was not copied (and so mutating later would change the original), (:issue:`15949`) Indexing ^^^^^^^^ diff --git a/pandas/indexes/base.py b/pandas/indexes/base.py index 91e2422873dd4..bf7975bcdb964 100644 --- a/pandas/indexes/base.py +++ b/pandas/indexes/base.py @@ -944,7 +944,9 @@ def to_series(self, **kwargs): """ from pandas import Series - return Series(self._to_embed(), index=self, name=self.name) + return Series(self._to_embed(), + index=self._shallow_copy(), + name=self.name) def _to_embed(self, keep_tz=False): """ diff --git a/pandas/tests/frame/test_misc_api.py b/pandas/tests/frame/test_api.py similarity index 100% rename from pandas/tests/frame/test_misc_api.py rename to pandas/tests/frame/test_api.py diff --git a/pandas/tests/frame/test_query_eval.py b/pandas/tests/frame/test_query_eval.py index 647af92b42273..f90b37b66d200 100644 --- a/pandas/tests/frame/test_query_eval.py +++ b/pandas/tests/frame/test_query_eval.py @@ -484,7 +484,7 @@ def test_date_index_query_with_NaT_duplicates(self): df = DataFrame(d) df.loc[np.random.rand(n) > 0.5, 'dates1'] = pd.NaT df.set_index('dates1', inplace=True, drop=True) - res = df.query('index < 20130101 < dates3', engine=engine, + res = df.query('dates1 < 20130101 < dates3', engine=engine, parser=parser) expec = df[(df.index.to_series() < '20130101') & ('20130101' < df.dates3)] diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index ba76945834aff..08f8f8d48e705 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -38,6 +38,15 @@ def test_pickle_compat_construction(self): # need an object to create with self.assertRaises(TypeError, self._holder) + def test_to_series(self): + # assert that we are creating a copy of the index + + idx = self.create_index() + s = idx.to_series() + assert s.values is not idx.values + assert s.index is not idx + assert s.name == idx.name + def test_shift(self): # GH8083 test the base class for shift diff --git a/pandas/tests/series/test_misc_api.py b/pandas/tests/series/test_api.py similarity index 100% rename from pandas/tests/series/test_misc_api.py rename to pandas/tests/series/test_api.py diff --git a/pandas/tests/series/test_quantile.py b/pandas/tests/series/test_quantile.py index b8d1b92081858..5aca34fb86576 100644 --- a/pandas/tests/series/test_quantile.py +++ b/pandas/tests/series/test_quantile.py @@ -16,17 +16,16 @@ class TestSeriesQuantile(TestData, tm.TestCase): def test_quantile(self): - from numpy import percentile q = self.ts.quantile(0.1) - self.assertEqual(q, percentile(self.ts.valid(), 10)) + self.assertEqual(q, np.percentile(self.ts.valid(), 10)) q = self.ts.quantile(0.9) - self.assertEqual(q, percentile(self.ts.valid(), 90)) + self.assertEqual(q, np.percentile(self.ts.valid(), 90)) # object dtype q = Series(self.ts, dtype=object).quantile(0.9) - self.assertEqual(q, percentile(self.ts.valid(), 90)) + self.assertEqual(q, np.percentile(self.ts.valid(), 90)) # datetime64[ns] dtype dts = self.ts.index.to_series() @@ -48,12 +47,11 @@ def test_quantile(self): self.ts.quantile(invalid) def test_quantile_multi(self): - from numpy import percentile qs = [.1, .9] result = self.ts.quantile(qs) - expected = pd.Series([percentile(self.ts.valid(), 10), - percentile(self.ts.valid(), 90)], + expected = pd.Series([np.percentile(self.ts.valid(), 10), + np.percentile(self.ts.valid(), 90)], index=qs, name=self.ts.name) tm.assert_series_equal(result, expected) @@ -70,50 +68,44 @@ def test_quantile_multi(self): [], dtype=float)) tm.assert_series_equal(result, expected) + @pytest.mark.skipif(_np_version_under1p9, + reason="Numpy version is under 1.9") def test_quantile_interpolation(self): # GH #10174 - if _np_version_under1p9: - pytest.skip("Numpy version is under 1.9") - - from numpy import percentile # interpolation = linear (default case) q = self.ts.quantile(0.1, interpolation='linear') - self.assertEqual(q, percentile(self.ts.valid(), 10)) + self.assertEqual(q, np.percentile(self.ts.valid(), 10)) q1 = self.ts.quantile(0.1) - self.assertEqual(q1, percentile(self.ts.valid(), 10)) + self.assertEqual(q1, np.percentile(self.ts.valid(), 10)) # test with and without interpolation keyword self.assertEqual(q, q1) + @pytest.mark.skipif(_np_version_under1p9, + reason="Numpy version is under 1.9") def test_quantile_interpolation_dtype(self): # GH #10174 - if _np_version_under1p9: - pytest.skip("Numpy version is under 1.9") - - from numpy import percentile # interpolation = linear (default case) q = pd.Series([1, 3, 4]).quantile(0.5, interpolation='lower') - self.assertEqual(q, percentile(np.array([1, 3, 4]), 50)) + self.assertEqual(q, np.percentile(np.array([1, 3, 4]), 50)) self.assertTrue(is_integer(q)) q = pd.Series([1, 3, 4]).quantile(0.5, interpolation='higher') - self.assertEqual(q, percentile(np.array([1, 3, 4]), 50)) + self.assertEqual(q, np.percentile(np.array([1, 3, 4]), 50)) self.assertTrue(is_integer(q)) + @pytest.mark.skipif(not _np_version_under1p9, + reason="Numpy version is greater 1.9") def test_quantile_interpolation_np_lt_1p9(self): # GH #10174 - if not _np_version_under1p9: - pytest.skip("Numpy version is greater than 1.9") - - from numpy import percentile # interpolation = linear (default case) q = self.ts.quantile(0.1, interpolation='linear') - self.assertEqual(q, percentile(self.ts.valid(), 10)) + self.assertEqual(q, np.percentile(self.ts.valid(), 10)) q1 = self.ts.quantile(0.1) - self.assertEqual(q1, percentile(self.ts.valid(), 10)) + self.assertEqual(q1, np.percentile(self.ts.valid(), 10)) # interpolation other than linear expErrMsg = "Interpolation methods other than " diff --git a/pandas/tests/sparse/test_frame.py b/pandas/tests/sparse/test_frame.py index ae1a1e35f1859..e6482d70e0ae3 100644 --- a/pandas/tests/sparse/test_frame.py +++ b/pandas/tests/sparse/test_frame.py @@ -22,7 +22,7 @@ from pandas.sparse.libsparse import BlockIndex, IntIndex from pandas.sparse.api import SparseSeries, SparseDataFrame, SparseArray -from pandas.tests.frame.test_misc_api import SharedWithSparse +from pandas.tests.frame.test_api import SharedWithSparse from pandas.tests.sparse.common import spmatrix # noqa: F401 diff --git a/pandas/tests/sparse/test_series.py b/pandas/tests/sparse/test_series.py index 8aa85a5b7f396..83f0237841dbd 100644 --- a/pandas/tests/sparse/test_series.py +++ b/pandas/tests/sparse/test_series.py @@ -18,7 +18,7 @@ from pandas.sparse.libsparse import BlockIndex, IntIndex from pandas.sparse.api import SparseSeries -from pandas.tests.series.test_misc_api import SharedWithSparse +from pandas.tests.series.test_api import SharedWithSparse def _test_data1(): diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py index 8fa842a836051..2c14d4f8ea79e 100644 --- a/pandas/tseries/index.py +++ b/pandas/tseries/index.py @@ -895,7 +895,9 @@ def to_series(self, keep_tz=False): Series """ from pandas import Series - return Series(self._to_embed(keep_tz), index=self, name=self.name) + return Series(self._to_embed(keep_tz), + index=self._shallow_copy(), + name=self.name) def _to_embed(self, keep_tz=False): """ From 838e09ccef896a1265b5447fb14f0969f7cd86d5 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Mon, 10 Apr 2017 08:16:38 -0400 Subject: [PATCH 374/933] DOC: remove Categorical.unique refs from doc-strings (#15964) closes #15957 --- doc/source/api.rst | 8 -------- pandas/core/base.py | 5 +++-- 2 files changed, 3 insertions(+), 10 deletions(-) diff --git a/doc/source/api.rst b/doc/source/api.rst index 2e6f693677e4e..bf9d521e2a12a 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -660,14 +660,6 @@ the Categorical back to a numpy array, so categories and order information is no Categorical.__array__ -Categorical methods - -.. autosummary:: - :toctree: generated/ - - Categorical.unique - Categorical.value_counts - Plotting ~~~~~~~~ diff --git a/pandas/core/base.py b/pandas/core/base.py index 56bdeee6982d5..bdbfb7b949986 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -871,8 +871,9 @@ def value_counts(self, normalize=False, sort=True, ascending=False, See Also -------- - pandas.unique - pandas.Categorical.unique + unique + Index.unique + Series.unique """) @Appender(_shared_docs['unique'] % _indexops_doc_kwargs) From fbbcc10948e5847f1aa5f20684c15cdfc516f91c Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 11 Apr 2017 06:29:30 -0400 Subject: [PATCH 375/933] TST/DEPR: fix bunch of Panel deprecation warnings (#15965) * TST: remove using .to_datetime w/Timestamp as deprecated * TST: split some sorting tests * fix isnan comparison deprecation warning * TST/DEPR: catch Panel deprecation warnings * close files in tests pickles with compression --- pandas/tests/computation/test_eval.py | 12 +- pandas/tests/frame/test_reshape.py | 12 +- pandas/tests/frame/test_subclass.py | 38 ++- pandas/tests/groupby/test_groupby.py | 111 +++--- pandas/tests/indexes/datetimes/test_tools.py | 3 +- pandas/tests/indexing/common.py | 57 ++-- pandas/tests/indexing/test_multiindex.py | 171 +++++----- pandas/tests/indexing/test_panel.py | 316 +++++++++--------- pandas/tests/indexing/test_partial.py | 55 +-- .../tests/io/generate_legacy_storage_files.py | 19 +- pandas/tests/io/msgpack/common.py | 10 + pandas/tests/io/msgpack/test_buffer.py | 3 +- pandas/tests/io/msgpack/test_extension.py | 6 +- pandas/tests/io/test_excel.py | 17 +- pandas/tests/io/test_packers.py | 20 +- pandas/tests/io/test_pickle.py | 11 +- pandas/tests/sparse/test_frame.py | 28 +- pandas/tests/test_categorical.py | 9 +- pandas/tests/test_multilevel.py | 17 +- pandas/tests/test_sorting.py | 25 +- pandas/tests/tools/test_concat.py | 28 +- pandas/tests/tools/test_hashing.py | 14 +- pandas/tests/tools/test_join.py | 144 ++++---- pandas/tests/tseries/test_resample.py | 82 ++--- pandas/tests/types/test_generic.py | 4 +- pandas/tests/types/test_inference.py | 7 +- pandas/types/missing.py | 2 +- 27 files changed, 667 insertions(+), 554 deletions(-) create mode 100644 pandas/tests/io/msgpack/common.py diff --git a/pandas/tests/computation/test_eval.py b/pandas/tests/computation/test_eval.py index 97ed88b1dc22b..78aad90cacf94 100644 --- a/pandas/tests/computation/test_eval.py +++ b/pandas/tests/computation/test_eval.py @@ -1,4 +1,5 @@ import warnings +from warnings import catch_warnings import operator from itertools import product @@ -1130,11 +1131,12 @@ def test_bool_ops_with_constants(self): self.assertEqual(res, exp) def test_panel_fails(self): - x = Panel(randn(3, 4, 5)) - y = Series(randn(10)) - with pytest.raises(NotImplementedError): - self.eval('x + y', - local_dict={'x': x, 'y': y}) + with catch_warnings(record=True): + x = Panel(randn(3, 4, 5)) + y = Series(randn(10)) + with pytest.raises(NotImplementedError): + self.eval('x + y', + local_dict={'x': x, 'y': y}) def test_4d_ndarray_fails(self): x = randn(3, 4, 5, 6) diff --git a/pandas/tests/frame/test_reshape.py b/pandas/tests/frame/test_reshape.py index c8c7313ddd071..a0ee4ca2ce287 100644 --- a/pandas/tests/frame/test_reshape.py +++ b/pandas/tests/frame/test_reshape.py @@ -2,6 +2,7 @@ from __future__ import print_function +from warnings import catch_warnings from datetime import datetime import itertools @@ -53,11 +54,12 @@ def test_pivot(self): self.assertEqual(pivoted.index.name, 'index') self.assertEqual(pivoted.columns.names, (None, 'columns')) - # pivot multiple columns - wp = tm.makePanel() - lp = wp.to_frame() - df = lp.reset_index() - assert_frame_equal(df.pivot('major', 'minor'), lp.unstack()) + with catch_warnings(record=True): + # pivot multiple columns + wp = tm.makePanel() + lp = wp.to_frame() + df = lp.reset_index() + assert_frame_equal(df.pivot('major', 'minor'), lp.unstack()) def test_pivot_duplicates(self): data = DataFrame({'a': ['bar', 'bar', 'foo', 'foo', 'foo'], diff --git a/pandas/tests/frame/test_subclass.py b/pandas/tests/frame/test_subclass.py index 9052a16bf973c..7444490d18373 100644 --- a/pandas/tests/frame/test_subclass.py +++ b/pandas/tests/frame/test_subclass.py @@ -2,6 +2,7 @@ from __future__ import print_function +from warnings import catch_warnings import numpy as np from pandas import DataFrame, Series, MultiIndex, Panel @@ -128,24 +129,25 @@ def test_indexing_sliced(self): def test_to_panel_expanddim(self): # GH 9762 - class SubclassedFrame(DataFrame): - - @property - def _constructor_expanddim(self): - return SubclassedPanel - - class SubclassedPanel(Panel): - pass - - index = MultiIndex.from_tuples([(0, 0), (0, 1), (0, 2)]) - df = SubclassedFrame({'X': [1, 2, 3], 'Y': [4, 5, 6]}, index=index) - result = df.to_panel() - self.assertTrue(isinstance(result, SubclassedPanel)) - expected = SubclassedPanel([[[1, 2, 3]], [[4, 5, 6]]], - items=['X', 'Y'], major_axis=[0], - minor_axis=[0, 1, 2], - dtype='int64') - tm.assert_panel_equal(result, expected) + with catch_warnings(record=True): + class SubclassedFrame(DataFrame): + + @property + def _constructor_expanddim(self): + return SubclassedPanel + + class SubclassedPanel(Panel): + pass + + index = MultiIndex.from_tuples([(0, 0), (0, 1), (0, 2)]) + df = SubclassedFrame({'X': [1, 2, 3], 'Y': [4, 5, 6]}, index=index) + result = df.to_panel() + self.assertTrue(isinstance(result, SubclassedPanel)) + expected = SubclassedPanel([[[1, 2, 3]], [[4, 5, 6]]], + items=['X', 'Y'], major_axis=[0], + minor_axis=[0, 1, 2], + dtype='int64') + tm.assert_panel_equal(result, expected) def test_subclass_attr_err_propagation(self): # GH 11808 diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index c17c98c5448be..68955c954206e 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- from __future__ import print_function +from warnings import catch_warnings from string import ascii_lowercase from datetime import datetime from numpy import nan @@ -814,12 +815,14 @@ def f(grp): assert_series_equal(result, e) def test_get_group(self): - wp = tm.makePanel() - grouped = wp.groupby(lambda x: x.month, axis='major') + with catch_warnings(record=True): + wp = tm.makePanel() + grouped = wp.groupby(lambda x: x.month, axis='major') - gp = grouped.get_group(1) - expected = wp.reindex(major=[x for x in wp.major_axis if x.month == 1]) - assert_panel_equal(gp, expected) + gp = grouped.get_group(1) + expected = wp.reindex( + major=[x for x in wp.major_axis if x.month == 1]) + assert_panel_equal(gp, expected) # GH 5267 # be datelike friendly @@ -1317,16 +1320,17 @@ def test_multi_iter_frame(self): pass def test_multi_iter_panel(self): - wp = tm.makePanel() - grouped = wp.groupby([lambda x: x.month, lambda x: x.weekday()], - axis=1) - - for (month, wd), group in grouped: - exp_axis = [x - for x in wp.major_axis - if x.month == month and x.weekday() == wd] - expected = wp.reindex(major=exp_axis) - assert_panel_equal(group, expected) + with catch_warnings(record=True): + wp = tm.makePanel() + grouped = wp.groupby([lambda x: x.month, lambda x: x.weekday()], + axis=1) + + for (month, wd), group in grouped: + exp_axis = [x + for x in wp.major_axis + if x.month == month and x.weekday() == wd] + expected = wp.reindex(major=exp_axis) + assert_panel_equal(group, expected) def test_multi_func(self): col1 = self.df['A'] @@ -1387,25 +1391,26 @@ def test_groupby_multiple_columns(self): def _check_op(op): - result1 = op(grouped) - - expected = defaultdict(dict) - for n1, gp1 in data.groupby('A'): - for n2, gp2 in gp1.groupby('B'): - expected[n1][n2] = op(gp2.loc[:, ['C', 'D']]) - expected = dict((k, DataFrame(v)) - for k, v in compat.iteritems(expected)) - expected = Panel.fromDict(expected).swapaxes(0, 1) - expected.major_axis.name, expected.minor_axis.name = 'A', 'B' - - # a little bit crude - for col in ['C', 'D']: - result_col = op(grouped[col]) - exp = expected[col] - pivoted = result1[col].unstack() - pivoted2 = result_col.unstack() - assert_frame_equal(pivoted.reindex_like(exp), exp) - assert_frame_equal(pivoted2.reindex_like(exp), exp) + with catch_warnings(record=True): + result1 = op(grouped) + + expected = defaultdict(dict) + for n1, gp1 in data.groupby('A'): + for n2, gp2 in gp1.groupby('B'): + expected[n1][n2] = op(gp2.loc[:, ['C', 'D']]) + expected = dict((k, DataFrame(v)) + for k, v in compat.iteritems(expected)) + expected = Panel.fromDict(expected).swapaxes(0, 1) + expected.major_axis.name, expected.minor_axis.name = 'A', 'B' + + # a little bit crude + for col in ['C', 'D']: + result_col = op(grouped[col]) + exp = expected[col] + pivoted = result1[col].unstack() + pivoted2 = result_col.unstack() + assert_frame_equal(pivoted.reindex_like(exp), exp) + assert_frame_equal(pivoted2.reindex_like(exp), exp) _check_op(lambda x: x.sum()) _check_op(lambda x: x.mean()) @@ -2980,8 +2985,9 @@ def test_dictify(self): def test_sparse_friendly(self): sdf = self.df[['C', 'D']].to_sparse() - panel = tm.makePanel() - tm.add_nans(panel) + with catch_warnings(record=True): + panel = tm.makePanel() + tm.add_nans(panel) def _check_work(gp): gp.mean() @@ -2997,27 +3003,28 @@ def _check_work(gp): # _check_work(panel.groupby(lambda x: x.month, axis=1)) def test_panel_groupby(self): - self.panel = tm.makePanel() - tm.add_nans(self.panel) - grouped = self.panel.groupby({'ItemA': 0, 'ItemB': 0, 'ItemC': 1}, - axis='items') - agged = grouped.mean() - agged2 = grouped.agg(lambda x: x.mean('items')) + with catch_warnings(record=True): + self.panel = tm.makePanel() + tm.add_nans(self.panel) + grouped = self.panel.groupby({'ItemA': 0, 'ItemB': 0, 'ItemC': 1}, + axis='items') + agged = grouped.mean() + agged2 = grouped.agg(lambda x: x.mean('items')) - tm.assert_panel_equal(agged, agged2) + tm.assert_panel_equal(agged, agged2) - self.assert_index_equal(agged.items, Index([0, 1])) + self.assert_index_equal(agged.items, Index([0, 1])) - grouped = self.panel.groupby(lambda x: x.month, axis='major') - agged = grouped.mean() + grouped = self.panel.groupby(lambda x: x.month, axis='major') + agged = grouped.mean() - exp = Index(sorted(list(set(self.panel.major_axis.month)))) - self.assert_index_equal(agged.major_axis, exp) + exp = Index(sorted(list(set(self.panel.major_axis.month)))) + self.assert_index_equal(agged.major_axis, exp) - grouped = self.panel.groupby({'A': 0, 'B': 0, 'C': 1, 'D': 1}, - axis='minor') - agged = grouped.mean() - self.assert_index_equal(agged.minor_axis, Index([0, 1])) + grouped = self.panel.groupby({'A': 0, 'B': 0, 'C': 1, 'D': 1}, + axis='minor') + agged = grouped.mean() + self.assert_index_equal(agged.minor_axis, Index([0, 1])) def test_groupby_2d_malformed(self): d = DataFrame(index=lrange(2)) diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index 02630c76abb93..1260ee4e5ab07 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -1533,7 +1533,8 @@ def units_from_epochs(): return list(range(5)) -@pytest.fixture(params=[epoch_1960(), epoch_1960().to_datetime(), +@pytest.fixture(params=[epoch_1960(), + epoch_1960().to_pydatetime(), epoch_1960().to_datetime64(), str(epoch_1960())]) def epochs(request): diff --git a/pandas/tests/indexing/common.py b/pandas/tests/indexing/common.py index c7637a00910c6..0f8a9573a233b 100644 --- a/pandas/tests/indexing/common.py +++ b/pandas/tests/indexing/common.py @@ -37,41 +37,46 @@ def setUp(self): self.frame_ints = DataFrame(np.random.randn(4, 4), index=lrange(0, 8, 2), columns=lrange(0, 12, 3)) - self.panel_ints = Panel(np.random.rand(4, 4, 4), - items=lrange(0, 8, 2), - major_axis=lrange(0, 12, 3), - minor_axis=lrange(0, 16, 4)) + with catch_warnings(record=True): + self.panel_ints = Panel(np.random.rand(4, 4, 4), + items=lrange(0, 8, 2), + major_axis=lrange(0, 12, 3), + minor_axis=lrange(0, 16, 4)) self.series_uints = Series(np.random.rand(4), index=UInt64Index(lrange(0, 8, 2))) self.frame_uints = DataFrame(np.random.randn(4, 4), index=UInt64Index(lrange(0, 8, 2)), columns=UInt64Index(lrange(0, 12, 3))) - self.panel_uints = Panel(np.random.rand(4, 4, 4), - items=UInt64Index(lrange(0, 8, 2)), - major_axis=UInt64Index(lrange(0, 12, 3)), - minor_axis=UInt64Index(lrange(0, 16, 4))) + with catch_warnings(record=True): + self.panel_uints = Panel(np.random.rand(4, 4, 4), + items=UInt64Index(lrange(0, 8, 2)), + major_axis=UInt64Index(lrange(0, 12, 3)), + minor_axis=UInt64Index(lrange(0, 16, 4))) self.series_labels = Series(np.random.randn(4), index=list('abcd')) self.frame_labels = DataFrame(np.random.randn(4, 4), index=list('abcd'), columns=list('ABCD')) - self.panel_labels = Panel(np.random.randn(4, 4, 4), - items=list('abcd'), - major_axis=list('ABCD'), - minor_axis=list('ZYXW')) + with catch_warnings(record=True): + self.panel_labels = Panel(np.random.randn(4, 4, 4), + items=list('abcd'), + major_axis=list('ABCD'), + minor_axis=list('ZYXW')) self.series_mixed = Series(np.random.randn(4), index=[2, 4, 'null', 8]) self.frame_mixed = DataFrame(np.random.randn(4, 4), index=[2, 4, 'null', 8]) - self.panel_mixed = Panel(np.random.randn(4, 4, 4), - items=[2, 4, 'null', 8]) + with catch_warnings(record=True): + self.panel_mixed = Panel(np.random.randn(4, 4, 4), + items=[2, 4, 'null', 8]) self.series_ts = Series(np.random.randn(4), index=date_range('20130101', periods=4)) self.frame_ts = DataFrame(np.random.randn(4, 4), index=date_range('20130101', periods=4)) - self.panel_ts = Panel(np.random.randn(4, 4, 4), - items=date_range('20130101', periods=4)) + with catch_warnings(record=True): + self.panel_ts = Panel(np.random.randn(4, 4, 4), + items=date_range('20130101', periods=4)) dates_rev = (date_range('20130101', periods=4) .sort_values(ascending=False)) @@ -79,12 +84,14 @@ def setUp(self): index=dates_rev) self.frame_ts_rev = DataFrame(np.random.randn(4, 4), index=dates_rev) - self.panel_ts_rev = Panel(np.random.randn(4, 4, 4), - items=dates_rev) + with catch_warnings(record=True): + self.panel_ts_rev = Panel(np.random.randn(4, 4, 4), + items=dates_rev) self.frame_empty = DataFrame({}) self.series_empty = Series({}) - self.panel_empty = Panel({}) + with catch_warnings(record=True): + self.panel_empty = Panel({}) # form agglomerates for o in self._objs: @@ -255,8 +262,18 @@ def _print(result, error=None): continue obj = d[t] - if obj is not None: + if obj is None: + continue + + def _call(obj=obj): obj = obj.copy() k2 = key2 _eq(t, o, a, obj, key1, k2) + + # Panel deprecations + if isinstance(obj, Panel): + with catch_warnings(record=True): + _call() + else: + _call() diff --git a/pandas/tests/indexing/test_multiindex.py b/pandas/tests/indexing/test_multiindex.py index 1fc0a87764b94..07786b9fb4b72 100644 --- a/pandas/tests/indexing/test_multiindex.py +++ b/pandas/tests/indexing/test_multiindex.py @@ -1164,87 +1164,98 @@ def f(): class TestMultiIndexPanel(tm.TestCase): def test_iloc_getitem_panel_multiindex(self): - # GH 7199 - # Panel with multi-index - multi_index = pd.MultiIndex.from_tuples([('ONE', 'one'), - ('TWO', 'two'), - ('THREE', 'three')], - names=['UPPER', 'lower']) - - simple_index = [x[0] for x in multi_index] - wd1 = Panel(items=['First', 'Second'], major_axis=['a', 'b', 'c', 'd'], - minor_axis=multi_index) - - wd2 = Panel(items=['First', 'Second'], major_axis=['a', 'b', 'c', 'd'], - minor_axis=simple_index) - - expected1 = wd1['First'].iloc[[True, True, True, False], [0, 2]] - result1 = wd1.iloc[0, [True, True, True, False], [0, 2]] # WRONG - tm.assert_frame_equal(result1, expected1) - - expected2 = wd2['First'].iloc[[True, True, True, False], [0, 2]] - result2 = wd2.iloc[0, [True, True, True, False], [0, 2]] - tm.assert_frame_equal(result2, expected2) - - expected1 = DataFrame(index=['a'], columns=multi_index, - dtype='float64') - result1 = wd1.iloc[0, [0], [0, 1, 2]] - tm.assert_frame_equal(result1, expected1) - - expected2 = DataFrame(index=['a'], columns=simple_index, - dtype='float64') - result2 = wd2.iloc[0, [0], [0, 1, 2]] - tm.assert_frame_equal(result2, expected2) - - # GH 7516 - mi = MultiIndex.from_tuples([(0, 'x'), (1, 'y'), (2, 'z')]) - p = Panel(np.arange(3 * 3 * 3, dtype='int64').reshape(3, 3, 3), - items=['a', 'b', 'c'], major_axis=mi, - minor_axis=['u', 'v', 'w']) - result = p.iloc[:, 1, 0] - expected = Series([3, 12, 21], index=['a', 'b', 'c'], name='u') - tm.assert_series_equal(result, expected) - result = p.loc[:, (1, 'y'), 'u'] - tm.assert_series_equal(result, expected) + with catch_warnings(record=True): + + # GH 7199 + # Panel with multi-index + multi_index = pd.MultiIndex.from_tuples([('ONE', 'one'), + ('TWO', 'two'), + ('THREE', 'three')], + names=['UPPER', 'lower']) + + simple_index = [x[0] for x in multi_index] + wd1 = Panel(items=['First', 'Second'], + major_axis=['a', 'b', 'c', 'd'], + minor_axis=multi_index) + + wd2 = Panel(items=['First', 'Second'], + major_axis=['a', 'b', 'c', 'd'], + minor_axis=simple_index) + + expected1 = wd1['First'].iloc[[True, True, True, False], [0, 2]] + result1 = wd1.iloc[0, [True, True, True, False], [0, 2]] # WRONG + tm.assert_frame_equal(result1, expected1) + + expected2 = wd2['First'].iloc[[True, True, True, False], [0, 2]] + result2 = wd2.iloc[0, [True, True, True, False], [0, 2]] + tm.assert_frame_equal(result2, expected2) + + expected1 = DataFrame(index=['a'], columns=multi_index, + dtype='float64') + result1 = wd1.iloc[0, [0], [0, 1, 2]] + tm.assert_frame_equal(result1, expected1) + + expected2 = DataFrame(index=['a'], columns=simple_index, + dtype='float64') + result2 = wd2.iloc[0, [0], [0, 1, 2]] + tm.assert_frame_equal(result2, expected2) + + # GH 7516 + mi = MultiIndex.from_tuples([(0, 'x'), (1, 'y'), (2, 'z')]) + p = Panel(np.arange(3 * 3 * 3, dtype='int64').reshape(3, 3, 3), + items=['a', 'b', 'c'], major_axis=mi, + minor_axis=['u', 'v', 'w']) + result = p.iloc[:, 1, 0] + expected = Series([3, 12, 21], index=['a', 'b', 'c'], name='u') + tm.assert_series_equal(result, expected) + + result = p.loc[:, (1, 'y'), 'u'] + tm.assert_series_equal(result, expected) def test_panel_setitem_with_multiindex(self): - # 10360 - # failing with a multi-index - arr = np.array([[[1, 2, 3], [0, 0, 0]], [[0, 0, 0], [0, 0, 0]]], - dtype=np.float64) - - # reg index - axes = dict(items=['A', 'B'], major_axis=[0, 1], - minor_axis=['X', 'Y', 'Z']) - p1 = Panel(0., **axes) - p1.iloc[0, 0, :] = [1, 2, 3] - expected = Panel(arr, **axes) - tm.assert_panel_equal(p1, expected) - - # multi-indexes - axes['items'] = pd.MultiIndex.from_tuples([('A', 'a'), ('B', 'b')]) - p2 = Panel(0., **axes) - p2.iloc[0, 0, :] = [1, 2, 3] - expected = Panel(arr, **axes) - tm.assert_panel_equal(p2, expected) - - axes['major_axis'] = pd.MultiIndex.from_tuples([('A', 1), ('A', 2)]) - p3 = Panel(0., **axes) - p3.iloc[0, 0, :] = [1, 2, 3] - expected = Panel(arr, **axes) - tm.assert_panel_equal(p3, expected) - - axes['minor_axis'] = pd.MultiIndex.from_product([['X'], range(3)]) - p4 = Panel(0., **axes) - p4.iloc[0, 0, :] = [1, 2, 3] - expected = Panel(arr, **axes) - tm.assert_panel_equal(p4, expected) - - arr = np.array( - [[[1, 0, 0], [2, 0, 0]], [[0, 0, 0], [0, 0, 0]]], dtype=np.float64) - p5 = Panel(0., **axes) - p5.iloc[0, :, 0] = [1, 2] - expected = Panel(arr, **axes) - tm.assert_panel_equal(p5, expected) + with catch_warnings(record=True): + # 10360 + # failing with a multi-index + arr = np.array([[[1, 2, 3], [0, 0, 0]], + [[0, 0, 0], [0, 0, 0]]], + dtype=np.float64) + + # reg index + axes = dict(items=['A', 'B'], major_axis=[0, 1], + minor_axis=['X', 'Y', 'Z']) + p1 = Panel(0., **axes) + p1.iloc[0, 0, :] = [1, 2, 3] + expected = Panel(arr, **axes) + tm.assert_panel_equal(p1, expected) + + # multi-indexes + axes['items'] = pd.MultiIndex.from_tuples( + [('A', 'a'), ('B', 'b')]) + p2 = Panel(0., **axes) + p2.iloc[0, 0, :] = [1, 2, 3] + expected = Panel(arr, **axes) + tm.assert_panel_equal(p2, expected) + + axes['major_axis'] = pd.MultiIndex.from_tuples( + [('A', 1), ('A', 2)]) + p3 = Panel(0., **axes) + p3.iloc[0, 0, :] = [1, 2, 3] + expected = Panel(arr, **axes) + tm.assert_panel_equal(p3, expected) + + axes['minor_axis'] = pd.MultiIndex.from_product( + [['X'], range(3)]) + p4 = Panel(0., **axes) + p4.iloc[0, 0, :] = [1, 2, 3] + expected = Panel(arr, **axes) + tm.assert_panel_equal(p4, expected) + + arr = np.array( + [[[1, 0, 0], [2, 0, 0]], [[0, 0, 0], [0, 0, 0]]], + dtype=np.float64) + p5 = Panel(0., **axes) + p5.iloc[0, :, 0] = [1, 2] + expected = Panel(arr, **axes) + tm.assert_panel_equal(p5, expected) diff --git a/pandas/tests/indexing/test_panel.py b/pandas/tests/indexing/test_panel.py index 0677ea498c282..8daef6155212c 100644 --- a/pandas/tests/indexing/test_panel.py +++ b/pandas/tests/indexing/test_panel.py @@ -1,3 +1,4 @@ +import pytest from warnings import catch_warnings import numpy as np @@ -9,201 +10,210 @@ class TestPanel(tm.TestCase): def test_iloc_getitem_panel(self): - # GH 7189 - p = Panel(np.arange(4 * 3 * 2).reshape(4, 3, 2), - items=['A', 'B', 'C', 'D'], - major_axis=['a', 'b', 'c'], - minor_axis=['one', 'two']) + with catch_warnings(record=True): + # GH 7189 + p = Panel(np.arange(4 * 3 * 2).reshape(4, 3, 2), + items=['A', 'B', 'C', 'D'], + major_axis=['a', 'b', 'c'], + minor_axis=['one', 'two']) - result = p.iloc[1] - expected = p.loc['B'] - tm.assert_frame_equal(result, expected) + result = p.iloc[1] + expected = p.loc['B'] + tm.assert_frame_equal(result, expected) - result = p.iloc[1, 1] - expected = p.loc['B', 'b'] - tm.assert_series_equal(result, expected) + result = p.iloc[1, 1] + expected = p.loc['B', 'b'] + tm.assert_series_equal(result, expected) - result = p.iloc[1, 1, 1] - expected = p.loc['B', 'b', 'two'] - self.assertEqual(result, expected) + result = p.iloc[1, 1, 1] + expected = p.loc['B', 'b', 'two'] + self.assertEqual(result, expected) - # slice - result = p.iloc[1:3] - expected = p.loc[['B', 'C']] - tm.assert_panel_equal(result, expected) + # slice + result = p.iloc[1:3] + expected = p.loc[['B', 'C']] + tm.assert_panel_equal(result, expected) - result = p.iloc[:, 0:2] - expected = p.loc[:, ['a', 'b']] - tm.assert_panel_equal(result, expected) + result = p.iloc[:, 0:2] + expected = p.loc[:, ['a', 'b']] + tm.assert_panel_equal(result, expected) - # list of integers - result = p.iloc[[0, 2]] - expected = p.loc[['A', 'C']] - tm.assert_panel_equal(result, expected) + # list of integers + result = p.iloc[[0, 2]] + expected = p.loc[['A', 'C']] + tm.assert_panel_equal(result, expected) - # neg indicies - result = p.iloc[[-1, 1], [-1, 1]] - expected = p.loc[['D', 'B'], ['c', 'b']] - tm.assert_panel_equal(result, expected) + # neg indicies + result = p.iloc[[-1, 1], [-1, 1]] + expected = p.loc[['D', 'B'], ['c', 'b']] + tm.assert_panel_equal(result, expected) - # dups indicies - result = p.iloc[[-1, -1, 1], [-1, 1]] - expected = p.loc[['D', 'D', 'B'], ['c', 'b']] - tm.assert_panel_equal(result, expected) + # dups indicies + result = p.iloc[[-1, -1, 1], [-1, 1]] + expected = p.loc[['D', 'D', 'B'], ['c', 'b']] + tm.assert_panel_equal(result, expected) - # combined - result = p.iloc[0, [True, True], [0, 1]] - expected = p.loc['A', ['a', 'b'], ['one', 'two']] - tm.assert_frame_equal(result, expected) + # combined + result = p.iloc[0, [True, True], [0, 1]] + expected = p.loc['A', ['a', 'b'], ['one', 'two']] + tm.assert_frame_equal(result, expected) - # out-of-bounds exception - self.assertRaises(IndexError, p.iloc.__getitem__, tuple([10, 5])) + # out-of-bounds exception + with pytest.raises(IndexError): + p.iloc[tuple([10, 5])] - def f(): - p.iloc[0, [True, True], [0, 1, 2]] + def f(): + p.iloc[0, [True, True], [0, 1, 2]] - self.assertRaises(IndexError, f) + self.assertRaises(IndexError, f) - # trying to use a label - self.assertRaises(ValueError, p.iloc.__getitem__, tuple(['j', 'D'])) + # trying to use a label + with pytest.raises(ValueError): + p.iloc[tuple(['j', 'D'])] - # GH - p = Panel( - np.random.rand(4, 3, 2), items=['A', 'B', 'C', 'D'], - major_axis=['U', 'V', 'W'], minor_axis=['X', 'Y']) - expected = p['A'] + # GH + p = Panel( + np.random.rand(4, 3, 2), items=['A', 'B', 'C', 'D'], + major_axis=['U', 'V', 'W'], minor_axis=['X', 'Y']) + expected = p['A'] - result = p.iloc[0, :, :] - tm.assert_frame_equal(result, expected) + result = p.iloc[0, :, :] + tm.assert_frame_equal(result, expected) - result = p.iloc[0, [True, True, True], :] - tm.assert_frame_equal(result, expected) + result = p.iloc[0, [True, True, True], :] + tm.assert_frame_equal(result, expected) - result = p.iloc[0, [True, True, True], [0, 1]] - tm.assert_frame_equal(result, expected) + result = p.iloc[0, [True, True, True], [0, 1]] + tm.assert_frame_equal(result, expected) - def f(): - p.iloc[0, [True, True, True], [0, 1, 2]] + def f(): + p.iloc[0, [True, True, True], [0, 1, 2]] - self.assertRaises(IndexError, f) + self.assertRaises(IndexError, f) - def f(): - p.iloc[0, [True, True, True], [2]] + def f(): + p.iloc[0, [True, True, True], [2]] - self.assertRaises(IndexError, f) + self.assertRaises(IndexError, f) def test_iloc_panel_issue(self): - # GH 3617 - p = Panel(np.random.randn(4, 4, 4)) + with catch_warnings(record=True): + # GH 3617 + p = Panel(np.random.randn(4, 4, 4)) - self.assertEqual(p.iloc[:3, :3, :3].shape, (3, 3, 3)) - self.assertEqual(p.iloc[1, :3, :3].shape, (3, 3)) - self.assertEqual(p.iloc[:3, 1, :3].shape, (3, 3)) - self.assertEqual(p.iloc[:3, :3, 1].shape, (3, 3)) - self.assertEqual(p.iloc[1, 1, :3].shape, (3, )) - self.assertEqual(p.iloc[1, :3, 1].shape, (3, )) - self.assertEqual(p.iloc[:3, 1, 1].shape, (3, )) + self.assertEqual(p.iloc[:3, :3, :3].shape, (3, 3, 3)) + self.assertEqual(p.iloc[1, :3, :3].shape, (3, 3)) + self.assertEqual(p.iloc[:3, 1, :3].shape, (3, 3)) + self.assertEqual(p.iloc[:3, :3, 1].shape, (3, 3)) + self.assertEqual(p.iloc[1, 1, :3].shape, (3, )) + self.assertEqual(p.iloc[1, :3, 1].shape, (3, )) + self.assertEqual(p.iloc[:3, 1, 1].shape, (3, )) def test_panel_getitem(self): - # GH4016, date selection returns a frame when a partial string - # selection - ind = date_range(start="2000", freq="D", periods=1000) - df = DataFrame( - np.random.randn( - len(ind), 5), index=ind, columns=list('ABCDE')) - panel = Panel(dict([('frame_' + c, df) for c in list('ABC')])) - - test2 = panel.loc[:, "2002":"2002-12-31"] - test1 = panel.loc[:, "2002"] - tm.assert_panel_equal(test1, test2) - # GH8710 - # multi-element getting with a list - panel = tm.makePanel() - - expected = panel.iloc[[0, 1]] - - result = panel.loc[['ItemA', 'ItemB']] - tm.assert_panel_equal(result, expected) + with catch_warnings(record=True): + # GH4016, date selection returns a frame when a partial string + # selection + ind = date_range(start="2000", freq="D", periods=1000) + df = DataFrame( + np.random.randn( + len(ind), 5), index=ind, columns=list('ABCDE')) + panel = Panel(dict([('frame_' + c, df) for c in list('ABC')])) - result = panel.loc[['ItemA', 'ItemB'], :, :] - tm.assert_panel_equal(result, expected) + test2 = panel.loc[:, "2002":"2002-12-31"] + test1 = panel.loc[:, "2002"] + tm.assert_panel_equal(test1, test2) - result = panel[['ItemA', 'ItemB']] - tm.assert_panel_equal(result, expected) + # GH8710 + # multi-element getting with a list + panel = tm.makePanel() - result = panel.loc['ItemA':'ItemB'] - tm.assert_panel_equal(result, expected) + expected = panel.iloc[[0, 1]] - with catch_warnings(record=True): - result = panel.ix[['ItemA', 'ItemB']] - tm.assert_panel_equal(result, expected) + result = panel.loc[['ItemA', 'ItemB']] + tm.assert_panel_equal(result, expected) - # with an object-like - # GH 9140 - class TestObject: + result = panel.loc[['ItemA', 'ItemB'], :, :] + tm.assert_panel_equal(result, expected) - def __str__(self): - return "TestObject" + result = panel[['ItemA', 'ItemB']] + tm.assert_panel_equal(result, expected) - obj = TestObject() + result = panel.loc['ItemA':'ItemB'] + tm.assert_panel_equal(result, expected) - p = Panel(np.random.randn(1, 5, 4), items=[obj], - major_axis=date_range('1/1/2000', periods=5), - minor_axis=['A', 'B', 'C', 'D']) + with catch_warnings(record=True): + result = panel.ix[['ItemA', 'ItemB']] + tm.assert_panel_equal(result, expected) - expected = p.iloc[0] - result = p[obj] - tm.assert_frame_equal(result, expected) + # with an object-like + # GH 9140 + class TestObject: - def test_panel_setitem(self): + def __str__(self): + return "TestObject" - # GH 7763 - # loc and setitem have setting differences - np.random.seed(0) - index = range(3) - columns = list('abc') + obj = TestObject() - panel = Panel({'A': DataFrame(np.random.randn(3, 3), - index=index, columns=columns), - 'B': DataFrame(np.random.randn(3, 3), - index=index, columns=columns), - 'C': DataFrame(np.random.randn(3, 3), - index=index, columns=columns)}) + p = Panel(np.random.randn(1, 5, 4), items=[obj], + major_axis=date_range('1/1/2000', periods=5), + minor_axis=['A', 'B', 'C', 'D']) - replace = DataFrame(np.eye(3, 3), index=range(3), columns=columns) - expected = Panel({'A': replace, 'B': replace, 'C': replace}) + expected = p.iloc[0] + result = p[obj] + tm.assert_frame_equal(result, expected) - p = panel.copy() - for idx in list('ABC'): - p[idx] = replace - tm.assert_panel_equal(p, expected) + def test_panel_setitem(self): - p = panel.copy() - for idx in list('ABC'): - p.loc[idx, :, :] = replace - tm.assert_panel_equal(p, expected) + with catch_warnings(record=True): + # GH 7763 + # loc and setitem have setting differences + np.random.seed(0) + index = range(3) + columns = list('abc') + + panel = Panel({'A': DataFrame(np.random.randn(3, 3), + index=index, columns=columns), + 'B': DataFrame(np.random.randn(3, 3), + index=index, columns=columns), + 'C': DataFrame(np.random.randn(3, 3), + index=index, columns=columns)}) + + replace = DataFrame(np.eye(3, 3), index=range(3), columns=columns) + expected = Panel({'A': replace, 'B': replace, 'C': replace}) + + p = panel.copy() + for idx in list('ABC'): + p[idx] = replace + tm.assert_panel_equal(p, expected) + + p = panel.copy() + for idx in list('ABC'): + p.loc[idx, :, :] = replace + tm.assert_panel_equal(p, expected) def test_panel_assignment(self): - # GH3777 - wp = Panel(np.random.randn(2, 5, 4), items=['Item1', 'Item2'], - major_axis=date_range('1/1/2000', periods=5), - minor_axis=['A', 'B', 'C', 'D']) - wp2 = Panel(np.random.randn(2, 5, 4), items=['Item1', 'Item2'], - major_axis=date_range('1/1/2000', periods=5), - minor_axis=['A', 'B', 'C', 'D']) - - # TODO: unused? - # expected = wp.loc[['Item1', 'Item2'], :, ['A', 'B']] - - def f(): - wp.loc[['Item1', 'Item2'], :, ['A', 'B']] = wp2.loc[ - ['Item1', 'Item2'], :, ['A', 'B']] - - self.assertRaises(NotImplementedError, f) - - # to_assign = wp2.loc[['Item1', 'Item2'], :, ['A', 'B']] - # wp.loc[['Item1', 'Item2'], :, ['A', 'B']] = to_assign - # result = wp.loc[['Item1', 'Item2'], :, ['A', 'B']] - # tm.assert_panel_equal(result,expected) + + with catch_warnings(record=True): + # GH3777 + wp = Panel(np.random.randn(2, 5, 4), items=['Item1', 'Item2'], + major_axis=date_range('1/1/2000', periods=5), + minor_axis=['A', 'B', 'C', 'D']) + wp2 = Panel(np.random.randn(2, 5, 4), items=['Item1', 'Item2'], + major_axis=date_range('1/1/2000', periods=5), + minor_axis=['A', 'B', 'C', 'D']) + + # TODO: unused? + # expected = wp.loc[['Item1', 'Item2'], :, ['A', 'B']] + + def f(): + wp.loc[['Item1', 'Item2'], :, ['A', 'B']] = wp2.loc[ + ['Item1', 'Item2'], :, ['A', 'B']] + + self.assertRaises(NotImplementedError, f) + + # to_assign = wp2.loc[['Item1', 'Item2'], :, ['A', 'B']] + # wp.loc[['Item1', 'Item2'], :, ['A', 'B']] = to_assign + # result = wp.loc[['Item1', 'Item2'], :, ['A', 'B']] + # tm.assert_panel_equal(result,expected) diff --git a/pandas/tests/indexing/test_partial.py b/pandas/tests/indexing/test_partial.py index 31fadcc88583c..f51f050c57624 100644 --- a/pandas/tests/indexing/test_partial.py +++ b/pandas/tests/indexing/test_partial.py @@ -119,33 +119,34 @@ def f(): df.ix[:, 'C'] = df.ix[:, 'A'] tm.assert_frame_equal(df, expected) - # ## panel ## - p_orig = Panel(np.arange(16).reshape(2, 4, 2), - items=['Item1', 'Item2'], - major_axis=pd.date_range('2001/1/12', periods=4), - minor_axis=['A', 'B'], dtype='float64') - - # panel setting via item - p_orig = Panel(np.arange(16).reshape(2, 4, 2), - items=['Item1', 'Item2'], - major_axis=pd.date_range('2001/1/12', periods=4), - minor_axis=['A', 'B'], dtype='float64') - expected = p_orig.copy() - expected['Item3'] = expected['Item1'] - p = p_orig.copy() - p.loc['Item3'] = p['Item1'] - tm.assert_panel_equal(p, expected) - - # panel with aligned series - expected = p_orig.copy() - expected = expected.transpose(2, 1, 0) - expected['C'] = DataFrame({'Item1': [30, 30, 30, 30], - 'Item2': [32, 32, 32, 32]}, - index=p_orig.major_axis) - expected = expected.transpose(2, 1, 0) - p = p_orig.copy() - p.loc[:, :, 'C'] = Series([30, 32], index=p_orig.items) - tm.assert_panel_equal(p, expected) + with catch_warnings(record=True): + # ## panel ## + p_orig = Panel(np.arange(16).reshape(2, 4, 2), + items=['Item1', 'Item2'], + major_axis=pd.date_range('2001/1/12', periods=4), + minor_axis=['A', 'B'], dtype='float64') + + # panel setting via item + p_orig = Panel(np.arange(16).reshape(2, 4, 2), + items=['Item1', 'Item2'], + major_axis=pd.date_range('2001/1/12', periods=4), + minor_axis=['A', 'B'], dtype='float64') + expected = p_orig.copy() + expected['Item3'] = expected['Item1'] + p = p_orig.copy() + p.loc['Item3'] = p['Item1'] + tm.assert_panel_equal(p, expected) + + # panel with aligned series + expected = p_orig.copy() + expected = expected.transpose(2, 1, 0) + expected['C'] = DataFrame({'Item1': [30, 30, 30, 30], + 'Item2': [32, 32, 32, 32]}, + index=p_orig.major_axis) + expected = expected.transpose(2, 1, 0) + p = p_orig.copy() + p.loc[:, :, 'C'] = Series([30, 32], index=p_orig.items) + tm.assert_panel_equal(p, expected) # GH 8473 dates = date_range('1/1/2000', periods=8) diff --git a/pandas/tests/io/generate_legacy_storage_files.py b/pandas/tests/io/generate_legacy_storage_files.py index d0365cb2c30b3..22c62b738e6a2 100644 --- a/pandas/tests/io/generate_legacy_storage_files.py +++ b/pandas/tests/io/generate_legacy_storage_files.py @@ -1,5 +1,6 @@ """ self-contained to write legacy storage (pickle/msgpack) files """ from __future__ import print_function +from warnings import catch_warnings from distutils.version import LooseVersion from pandas import (Series, DataFrame, Panel, SparseSeries, SparseDataFrame, @@ -127,14 +128,16 @@ def create_data(): u'B': Timestamp('20130603', tz='CET')}, index=range(5)) ) - mixed_dup_panel = Panel({u'ItemA': frame[u'float'], - u'ItemB': frame[u'int']}) - mixed_dup_panel.items = [u'ItemA', u'ItemA'] - panel = dict(float=Panel({u'ItemA': frame[u'float'], - u'ItemB': frame[u'float'] + 1}), - dup=Panel(np.arange(30).reshape(3, 5, 2).astype(np.float64), - items=[u'A', u'B', u'A']), - mixed_dup=mixed_dup_panel) + with catch_warnings(record=True): + mixed_dup_panel = Panel({u'ItemA': frame[u'float'], + u'ItemB': frame[u'int']}) + mixed_dup_panel.items = [u'ItemA', u'ItemA'] + panel = dict(float=Panel({u'ItemA': frame[u'float'], + u'ItemB': frame[u'float'] + 1}), + dup=Panel( + np.arange(30).reshape(3, 5, 2).astype(np.float64), + items=[u'A', u'B', u'A']), + mixed_dup=mixed_dup_panel) cat = dict(int8=Categorical(list('abcdefg')), int16=Categorical(np.arange(1000)), diff --git a/pandas/tests/io/msgpack/common.py b/pandas/tests/io/msgpack/common.py new file mode 100644 index 0000000000000..b770d12cffbfa --- /dev/null +++ b/pandas/tests/io/msgpack/common.py @@ -0,0 +1,10 @@ +from pandas.compat import PY3 + + +# array compat +if PY3: + frombytes = lambda obj, data: obj.frombytes(data) + tobytes = lambda obj: obj.tobytes() +else: + frombytes = lambda obj, data: obj.fromstring(data) + tobytes = lambda obj: obj.tostring() diff --git a/pandas/tests/io/msgpack/test_buffer.py b/pandas/tests/io/msgpack/test_buffer.py index 5a2dc3dba5dfa..8ebec734f1d3d 100644 --- a/pandas/tests/io/msgpack/test_buffer.py +++ b/pandas/tests/io/msgpack/test_buffer.py @@ -1,12 +1,13 @@ # coding: utf-8 from pandas.io.msgpack import packb, unpackb +from .common import frombytes def test_unpack_buffer(): from array import array buf = array('b') - buf.fromstring(packb((b'foo', b'bar'))) + frombytes(buf, packb((b'foo', b'bar'))) obj = unpackb(buf, use_list=1) assert [b'foo', b'bar'] == obj diff --git a/pandas/tests/io/msgpack/test_extension.py b/pandas/tests/io/msgpack/test_extension.py index a5a111efbb835..26a611bea224c 100644 --- a/pandas/tests/io/msgpack/test_extension.py +++ b/pandas/tests/io/msgpack/test_extension.py @@ -1,7 +1,9 @@ from __future__ import print_function import array + import pandas.io.msgpack as msgpack from pandas.io.msgpack import ExtType +from .common import frombytes, tobytes def test_pack_ext_type(): @@ -42,7 +44,7 @@ def default(obj): print('default called', obj) if isinstance(obj, array.array): typecode = 123 # application specific typecode - data = obj.tostring() + data = tobytes(obj) return ExtType(typecode, data) raise TypeError("Unknwon type object %r" % (obj, )) @@ -50,7 +52,7 @@ def ext_hook(code, data): print('ext_hook called', code, data) assert code == 123 obj = array.array('d') - obj.fromstring(data) + frombytes(obj, data) return obj obj = [42, b'hello', array.array('d', [1.1, 2.2, 3.3])] diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py index 256a37e922177..d83e26995020c 100644 --- a/pandas/tests/io/test_excel.py +++ b/pandas/tests/io/test_excel.py @@ -7,6 +7,7 @@ from distutils.version import LooseVersion import warnings +from warnings import catch_warnings import operator import functools import pytest @@ -2340,9 +2341,13 @@ def check_called(func): writer = ExcelWriter('something.test') tm.assertIsInstance(writer, DummyClass) df = tm.makeCustomDataframe(1, 1) - panel = tm.makePanel() - func = lambda: df.to_excel('something.test') - check_called(func) - check_called(lambda: panel.to_excel('something.test')) - check_called(lambda: df.to_excel('something.xlsx')) - check_called(lambda: df.to_excel('something.xls', engine='dummy')) + + with catch_warnings(record=True): + panel = tm.makePanel() + func = lambda: df.to_excel('something.test') + check_called(func) + check_called(lambda: panel.to_excel('something.test')) + check_called(lambda: df.to_excel('something.xlsx')) + check_called( + lambda: df.to_excel( + 'something.xls', engine='dummy')) diff --git a/pandas/tests/io/test_packers.py b/pandas/tests/io/test_packers.py index 1b6b0fc62f913..4856cd8c5e9a6 100644 --- a/pandas/tests/io/test_packers.py +++ b/pandas/tests/io/test_packers.py @@ -1,5 +1,6 @@ import pytest +from warnings import catch_warnings import os import datetime import numpy as np @@ -452,9 +453,10 @@ def setUp(self): 'int': DataFrame(dict(A=data['B'], B=Series(data['B']) + 1)), 'mixed': DataFrame(data)} - self.panel = { - 'float': Panel(dict(ItemA=self.frame['float'], - ItemB=self.frame['float'] + 1))} + with catch_warnings(record=True): + self.panel = { + 'float': Panel(dict(ItemA=self.frame['float'], + ItemB=self.frame['float'] + 1))} def test_basic_frame(self): @@ -464,9 +466,10 @@ def test_basic_frame(self): def test_basic_panel(self): - for s, i in self.panel.items(): - i_rec = self.encode_decode(i) - assert_panel_equal(i, i_rec) + with catch_warnings(record=True): + for s, i in self.panel.items(): + i_rec = self.encode_decode(i) + assert_panel_equal(i, i_rec) def test_multi(self): @@ -899,8 +902,9 @@ def test_msgpacks_legacy(self, current_packers_data, all_packers_data, continue vf = os.path.join(pth, f) try: - self.compare(current_packers_data, all_packers_data, - vf, version) + with catch_warnings(record=True): + self.compare(current_packers_data, all_packers_data, + vf, version) except ImportError: # blosc not installed continue diff --git a/pandas/tests/io/test_pickle.py b/pandas/tests/io/test_pickle.py index f46f62e781006..0746cacb01bb9 100644 --- a/pandas/tests/io/test_pickle.py +++ b/pandas/tests/io/test_pickle.py @@ -14,6 +14,8 @@ """ import pytest +from warnings import catch_warnings + import os from distutils.version import LooseVersion import pandas as pd @@ -202,7 +204,8 @@ def test_pickles(current_pickle_data, version): n = 0 for f in os.listdir(pth): vf = os.path.join(pth, f) - data = compare(current_pickle_data, vf, version) + with catch_warnings(record=True): + data = compare(current_pickle_data, vf, version) if data is None: continue @@ -339,7 +342,8 @@ def compress_file(self, src_path, dest_path, compression): raise ValueError(msg) if compression != "zip": - f.write(open(src_path, "rb").read()) + with open(src_path, "rb") as fh: + f.write(fh.read()) f.close() def decompress_file(self, src_path, dest_path, compression): @@ -369,7 +373,8 @@ def decompress_file(self, src_path, dest_path, compression): msg = 'Unrecognized compression type: {}'.format(compression) raise ValueError(msg) - open(dest_path, "wb").write(f.read()) + with open(dest_path, "wb") as fh: + fh.write(f.read()) f.close() @pytest.mark.parametrize('compression', [None, 'gzip', 'bz2', 'xz']) diff --git a/pandas/tests/sparse/test_frame.py b/pandas/tests/sparse/test_frame.py index e6482d70e0ae3..075d5efcefbe0 100644 --- a/pandas/tests/sparse/test_frame.py +++ b/pandas/tests/sparse/test_frame.py @@ -3,7 +3,7 @@ import operator import pytest - +from warnings import catch_warnings from numpy import nan import numpy as np import pandas as pd @@ -953,23 +953,25 @@ def _check(frame, orig): self._check_all(_check) def test_stack_sparse_frame(self): - def _check(frame): - dense_frame = frame.to_dense() # noqa + with catch_warnings(record=True): + + def _check(frame): + dense_frame = frame.to_dense() # noqa - wp = Panel.from_dict({'foo': frame}) - from_dense_lp = wp.to_frame() + wp = Panel.from_dict({'foo': frame}) + from_dense_lp = wp.to_frame() - from_sparse_lp = spf.stack_sparse_frame(frame) + from_sparse_lp = spf.stack_sparse_frame(frame) - self.assert_numpy_array_equal(from_dense_lp.values, - from_sparse_lp.values) + self.assert_numpy_array_equal(from_dense_lp.values, + from_sparse_lp.values) - _check(self.frame) - _check(self.iframe) + _check(self.frame) + _check(self.iframe) - # for now - self.assertRaises(Exception, _check, self.zframe) - self.assertRaises(Exception, _check, self.fill_frame) + # for now + self.assertRaises(Exception, _check, self.zframe) + self.assertRaises(Exception, _check, self.fill_frame) def test_transpose(self): diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py index 63c1ae70e35a6..adacbb95f5162 100644 --- a/pandas/tests/test_categorical.py +++ b/pandas/tests/test_categorical.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- # pylint: disable=E1101,E1103,W0232 +from warnings import catch_warnings import pytest import sys from datetime import datetime @@ -1816,9 +1817,11 @@ def test_construction_frame(self): def test_reshaping(self): - p = tm.makePanel() - p['str'] = 'foo' - df = p.to_frame() + with catch_warnings(record=True): + p = tm.makePanel() + p['str'] = 'foo' + df = p.to_frame() + df['category'] = df['str'].astype('category') result = df['category'].unstack() diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index 914d26fcafb4a..e3193cddbaaab 100755 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -1253,14 +1253,15 @@ def test_swaplevel(self): tm.assert_frame_equal(swapped, exp) def test_swaplevel_panel(self): - panel = Panel({'ItemA': self.frame, 'ItemB': self.frame * 2}) - expected = panel.copy() - expected.major_axis = expected.major_axis.swaplevel(0, 1) - - for result in (panel.swaplevel(axis='major'), - panel.swaplevel(0, axis='major'), - panel.swaplevel(0, 1, axis='major')): - tm.assert_panel_equal(result, expected) + with catch_warnings(record=True): + panel = Panel({'ItemA': self.frame, 'ItemB': self.frame * 2}) + expected = panel.copy() + expected.major_axis = expected.major_axis.swaplevel(0, 1) + + for result in (panel.swaplevel(axis='major'), + panel.swaplevel(0, axis='major'), + panel.swaplevel(0, 1, axis='major')): + tm.assert_panel_equal(result, expected) def test_reorder_levels(self): result = self.ymd.reorder_levels(['month', 'day', 'year']) diff --git a/pandas/tests/test_sorting.py b/pandas/tests/test_sorting.py index 99361695b2371..fad1fbc52bbe3 100644 --- a/pandas/tests/test_sorting.py +++ b/pandas/tests/test_sorting.py @@ -18,6 +18,7 @@ class TestSorting(tm.TestCase): + @pytest.mark.slow def test_int64_overflow(self): B = np.concatenate((np.arange(1000), np.arange(1000), np.arange(500))) @@ -51,9 +52,11 @@ def test_int64_overflow(self): expected = df.groupby(tups).sum()['values'] for k, v in compat.iteritems(expected): - self.assertEqual(left[k], right[k[::-1]]) - self.assertEqual(left[k], v) - self.assertEqual(len(left), len(right)) + assert left[k] == right[k[::-1]] + assert left[k] == v + assert len(left) == len(right) + + def test_int64_overflow_moar(self): # GH9096 values = range(55109) @@ -62,7 +65,7 @@ def test_int64_overflow(self): 'c': values, 'd': values}) grouped = data.groupby(['a', 'b', 'c', 'd']) - self.assertEqual(len(grouped), len(values)) + assert len(grouped) == len(values) arr = np.random.randint(-1 << 12, 1 << 12, (1 << 15, 5)) i = np.random.choice(len(arr), len(arr) * 4) @@ -76,7 +79,7 @@ def test_int64_overflow(self): gr = df.groupby(list('abcde')) # verify this is testing what it is supposed to test! - self.assertTrue(is_int64_overflow_possible(gr.grouper.shape)) + assert is_int64_overflow_possible(gr.grouper.shape) # mannually compute groupings jim, joe = defaultdict(list), defaultdict(list) @@ -84,7 +87,7 @@ def test_int64_overflow(self): jim[key].append(a) joe[key].append(b) - self.assertEqual(len(gr), len(jim)) + assert len(gr) == len(jim) mi = MultiIndex.from_tuples(jim.keys(), names=list('abcde')) def aggr(func): @@ -201,7 +204,7 @@ def test_int64_overflow_issues(self): # it works! result = merge(df1, df2, how='outer') - self.assertTrue(len(result) == 2000) + assert len(result) == 2000 low, high, n = -1 << 10, 1 << 10, 1 << 20 left = DataFrame(np.random.randint(low, high, (n, 7)), @@ -216,11 +219,11 @@ def test_int64_overflow_issues(self): right['right'] *= -1 out = merge(left, right, how='outer') - self.assertEqual(len(out), len(left)) + assert len(out) == len(left) assert_series_equal(out['left'], - out['right'], check_names=False) result = out.iloc[:, :-2].sum(axis=1) assert_series_equal(out['left'], result, check_names=False) - self.assertTrue(result.name is None) + assert result.name is None out.sort_values(out.columns.tolist(), inplace=True) out.index = np.arange(len(out)) @@ -241,7 +244,7 @@ def test_int64_overflow_issues(self): # confirm that this is checking what it is supposed to check shape = left.apply(Series.nunique).values - self.assertTrue(is_int64_overflow_possible(shape)) + assert is_int64_overflow_possible(shape) # add duplicates to left frame left = concat([left, left], ignore_index=True) @@ -307,7 +310,7 @@ def verify_order(df): for how in 'left', 'right', 'outer', 'inner': mask = jmask[how] frame = align(out[mask].copy()) - self.assertTrue(mask.all() ^ mask.any() or how == 'outer') + assert mask.all() ^ mask.any() or how == 'outer' for sort in [False, True]: res = merge(left, right, how=how, sort=sort) diff --git a/pandas/tests/tools/test_concat.py b/pandas/tests/tools/test_concat.py index c61f2a3dc8066..2ff287acc4c47 100644 --- a/pandas/tests/tools/test_concat.py +++ b/pandas/tests/tools/test_concat.py @@ -1936,21 +1936,23 @@ def test_concat_multiindex_dfs_with_deepcopy(self): @pytest.mark.parametrize('pdt', [pd.Series, pd.DataFrame, pd.Panel]) @pytest.mark.parametrize('dt', np.sctypes['float']) def test_concat_no_unnecessary_upcast(dt, pdt): - # GH 13247 - dims = pdt().ndim - dfs = [pdt(np.array([1], dtype=dt, ndmin=dims)), - pdt(np.array([np.nan], dtype=dt, ndmin=dims)), - pdt(np.array([5], dtype=dt, ndmin=dims))] - x = pd.concat(dfs) - assert x.values.dtype == dt + with catch_warnings(record=True): + # GH 13247 + dims = pdt().ndim + dfs = [pdt(np.array([1], dtype=dt, ndmin=dims)), + pdt(np.array([np.nan], dtype=dt, ndmin=dims)), + pdt(np.array([5], dtype=dt, ndmin=dims))] + x = pd.concat(dfs) + assert x.values.dtype == dt @pytest.mark.parametrize('pdt', [pd.Series, pd.DataFrame, pd.Panel]) @pytest.mark.parametrize('dt', np.sctypes['int']) def test_concat_will_upcast(dt, pdt): - dims = pdt().ndim - dfs = [pdt(np.array([1], dtype=dt, ndmin=dims)), - pdt(np.array([np.nan], ndmin=dims)), - pdt(np.array([5], dtype=dt, ndmin=dims))] - x = pd.concat(dfs) - assert x.values.dtype == 'float64' + with catch_warnings(record=True): + dims = pdt().ndim + dfs = [pdt(np.array([1], dtype=dt, ndmin=dims)), + pdt(np.array([np.nan], ndmin=dims)), + pdt(np.array([5], dtype=dt, ndmin=dims))] + x = pd.concat(dfs) + assert x.values.dtype == 'float64' diff --git a/pandas/tests/tools/test_hashing.py b/pandas/tests/tools/test_hashing.py index 864b5018abc75..467b058fabc67 100644 --- a/pandas/tests/tools/test_hashing.py +++ b/pandas/tests/tools/test_hashing.py @@ -1,3 +1,6 @@ +import pytest + +from warnings import catch_warnings import numpy as np import pandas as pd @@ -195,11 +198,14 @@ def test_categorical_with_nan_consistency(self): def test_pandas_errors(self): - for obj in [pd.Timestamp('20130101'), tm.makePanel()]: - def f(): - hash_pandas_object(f) + for obj in [pd.Timestamp('20130101')]: + with pytest.raises(TypeError): + hash_pandas_object(obj) - self.assertRaises(TypeError, f) + with catch_warnings(record=True): + obj = tm.makePanel() + with pytest.raises(TypeError): + hash_pandas_object(obj) def test_hash_keys(self): # using different hash keys, should have different hashes diff --git a/pandas/tests/tools/test_join.py b/pandas/tests/tools/test_join.py index b65f800802bca..8571a1ff16701 100644 --- a/pandas/tests/tools/test_join.py +++ b/pandas/tests/tools/test_join.py @@ -1,5 +1,6 @@ # pylint: disable=E1103 +from warnings import catch_warnings from numpy.random import randn import numpy as np @@ -629,87 +630,90 @@ def test_join_dups(self): assert_frame_equal(dta, expected) def test_panel_join(self): - panel = tm.makePanel() - tm.add_nans(panel) - - p1 = panel.iloc[:2, :10, :3] - p2 = panel.iloc[2:, 5:, 2:] - - # left join - result = p1.join(p2) - expected = p1.copy() - expected['ItemC'] = p2['ItemC'] - tm.assert_panel_equal(result, expected) - - # right join - result = p1.join(p2, how='right') - expected = p2.copy() - expected['ItemA'] = p1['ItemA'] - expected['ItemB'] = p1['ItemB'] - expected = expected.reindex(items=['ItemA', 'ItemB', 'ItemC']) - tm.assert_panel_equal(result, expected) - - # inner join - result = p1.join(p2, how='inner') - expected = panel.iloc[:, 5:10, 2:3] - tm.assert_panel_equal(result, expected) - - # outer join - result = p1.join(p2, how='outer') - expected = p1.reindex(major=panel.major_axis, - minor=panel.minor_axis) - expected = expected.join(p2.reindex(major=panel.major_axis, - minor=panel.minor_axis)) - tm.assert_panel_equal(result, expected) + with catch_warnings(record=True): + panel = tm.makePanel() + tm.add_nans(panel) + + p1 = panel.iloc[:2, :10, :3] + p2 = panel.iloc[2:, 5:, 2:] + + # left join + result = p1.join(p2) + expected = p1.copy() + expected['ItemC'] = p2['ItemC'] + tm.assert_panel_equal(result, expected) + + # right join + result = p1.join(p2, how='right') + expected = p2.copy() + expected['ItemA'] = p1['ItemA'] + expected['ItemB'] = p1['ItemB'] + expected = expected.reindex(items=['ItemA', 'ItemB', 'ItemC']) + tm.assert_panel_equal(result, expected) + + # inner join + result = p1.join(p2, how='inner') + expected = panel.iloc[:, 5:10, 2:3] + tm.assert_panel_equal(result, expected) + + # outer join + result = p1.join(p2, how='outer') + expected = p1.reindex(major=panel.major_axis, + minor=panel.minor_axis) + expected = expected.join(p2.reindex(major=panel.major_axis, + minor=panel.minor_axis)) + tm.assert_panel_equal(result, expected) def test_panel_join_overlap(self): - panel = tm.makePanel() - tm.add_nans(panel) - - p1 = panel.loc[['ItemA', 'ItemB', 'ItemC']] - p2 = panel.loc[['ItemB', 'ItemC']] - - # Expected index is - # - # ItemA, ItemB_p1, ItemC_p1, ItemB_p2, ItemC_p2 - joined = p1.join(p2, lsuffix='_p1', rsuffix='_p2') - p1_suf = p1.loc[['ItemB', 'ItemC']].add_suffix('_p1') - p2_suf = p2.loc[['ItemB', 'ItemC']].add_suffix('_p2') - no_overlap = panel.loc[['ItemA']] - expected = no_overlap.join(p1_suf.join(p2_suf)) - tm.assert_panel_equal(joined, expected) + with catch_warnings(record=True): + panel = tm.makePanel() + tm.add_nans(panel) + + p1 = panel.loc[['ItemA', 'ItemB', 'ItemC']] + p2 = panel.loc[['ItemB', 'ItemC']] + + # Expected index is + # + # ItemA, ItemB_p1, ItemC_p1, ItemB_p2, ItemC_p2 + joined = p1.join(p2, lsuffix='_p1', rsuffix='_p2') + p1_suf = p1.loc[['ItemB', 'ItemC']].add_suffix('_p1') + p2_suf = p2.loc[['ItemB', 'ItemC']].add_suffix('_p2') + no_overlap = panel.loc[['ItemA']] + expected = no_overlap.join(p1_suf.join(p2_suf)) + tm.assert_panel_equal(joined, expected) def test_panel_join_many(self): - tm.K = 10 - panel = tm.makePanel() - tm.K = 4 + with catch_warnings(record=True): + tm.K = 10 + panel = tm.makePanel() + tm.K = 4 - panels = [panel.iloc[:2], panel.iloc[2:6], panel.iloc[6:]] + panels = [panel.iloc[:2], panel.iloc[2:6], panel.iloc[6:]] - joined = panels[0].join(panels[1:]) - tm.assert_panel_equal(joined, panel) + joined = panels[0].join(panels[1:]) + tm.assert_panel_equal(joined, panel) - panels = [panel.iloc[:2, :-5], - panel.iloc[2:6, 2:], - panel.iloc[6:, 5:-7]] + panels = [panel.iloc[:2, :-5], + panel.iloc[2:6, 2:], + panel.iloc[6:, 5:-7]] - data_dict = {} - for p in panels: - data_dict.update(p.iteritems()) + data_dict = {} + for p in panels: + data_dict.update(p.iteritems()) - joined = panels[0].join(panels[1:], how='inner') - expected = pd.Panel.from_dict(data_dict, intersect=True) - tm.assert_panel_equal(joined, expected) + joined = panels[0].join(panels[1:], how='inner') + expected = pd.Panel.from_dict(data_dict, intersect=True) + tm.assert_panel_equal(joined, expected) - joined = panels[0].join(panels[1:], how='outer') - expected = pd.Panel.from_dict(data_dict, intersect=False) - tm.assert_panel_equal(joined, expected) + joined = panels[0].join(panels[1:], how='outer') + expected = pd.Panel.from_dict(data_dict, intersect=False) + tm.assert_panel_equal(joined, expected) - # edge cases - self.assertRaises(ValueError, panels[0].join, panels[1:], - how='outer', lsuffix='foo', rsuffix='bar') - self.assertRaises(ValueError, panels[0].join, panels[1:], - how='right') + # edge cases + self.assertRaises(ValueError, panels[0].join, panels[1:], + how='outer', lsuffix='foo', rsuffix='bar') + self.assertRaises(ValueError, panels[0].join, panels[1:], + how='right') def _check_join(left, right, result, join_col, how='left', diff --git a/pandas/tests/tseries/test_resample.py b/pandas/tests/tseries/test_resample.py index 57e5a1631f8e8..9c66cae292c4e 100755 --- a/pandas/tests/tseries/test_resample.py +++ b/pandas/tests/tseries/test_resample.py @@ -1,5 +1,6 @@ # pylint: disable=E1101 +from warnings import catch_warnings from datetime import datetime, timedelta from functools import partial @@ -1479,44 +1480,47 @@ def test_resample_panel(self): rng = date_range('1/1/2000', '6/30/2000') n = len(rng) - panel = Panel(np.random.randn(3, n, 5), - items=['one', 'two', 'three'], - major_axis=rng, - minor_axis=['a', 'b', 'c', 'd', 'e']) + with catch_warnings(record=True): + panel = Panel(np.random.randn(3, n, 5), + items=['one', 'two', 'three'], + major_axis=rng, + minor_axis=['a', 'b', 'c', 'd', 'e']) - result = panel.resample('M', axis=1).mean() + result = panel.resample('M', axis=1).mean() - def p_apply(panel, f): - result = {} - for item in panel.items: - result[item] = f(panel[item]) - return Panel(result, items=panel.items) + def p_apply(panel, f): + result = {} + for item in panel.items: + result[item] = f(panel[item]) + return Panel(result, items=panel.items) - expected = p_apply(panel, lambda x: x.resample('M').mean()) - tm.assert_panel_equal(result, expected) + expected = p_apply(panel, lambda x: x.resample('M').mean()) + tm.assert_panel_equal(result, expected) - panel2 = panel.swapaxes(1, 2) - result = panel2.resample('M', axis=2).mean() - expected = p_apply(panel2, lambda x: x.resample('M', axis=1).mean()) - tm.assert_panel_equal(result, expected) + panel2 = panel.swapaxes(1, 2) + result = panel2.resample('M', axis=2).mean() + expected = p_apply(panel2, + lambda x: x.resample('M', axis=1).mean()) + tm.assert_panel_equal(result, expected) def test_resample_panel_numpy(self): rng = date_range('1/1/2000', '6/30/2000') n = len(rng) - panel = Panel(np.random.randn(3, n, 5), - items=['one', 'two', 'three'], - major_axis=rng, - minor_axis=['a', 'b', 'c', 'd', 'e']) + with catch_warnings(record=True): + panel = Panel(np.random.randn(3, n, 5), + items=['one', 'two', 'three'], + major_axis=rng, + minor_axis=['a', 'b', 'c', 'd', 'e']) - result = panel.resample('M', axis=1).apply(lambda x: x.mean(1)) - expected = panel.resample('M', axis=1).mean() - tm.assert_panel_equal(result, expected) + result = panel.resample('M', axis=1).apply(lambda x: x.mean(1)) + expected = panel.resample('M', axis=1).mean() + tm.assert_panel_equal(result, expected) - panel = panel.swapaxes(1, 2) - result = panel.resample('M', axis=2).apply(lambda x: x.mean(2)) - expected = panel.resample('M', axis=2).mean() - tm.assert_panel_equal(result, expected) + panel = panel.swapaxes(1, 2) + result = panel.resample('M', axis=2).apply(lambda x: x.mean(2)) + expected = panel.resample('M', axis=2).mean() + tm.assert_panel_equal(result, expected) def test_resample_anchored_ticks(self): # If a fixed delta (5 minute, 4 hour) evenly divides a day, we should @@ -3037,20 +3041,22 @@ def test_apply_iteration(self): def test_panel_aggregation(self): ind = pd.date_range('1/1/2000', periods=100) data = np.random.randn(2, len(ind), 4) - wp = pd.Panel(data, items=['Item1', 'Item2'], major_axis=ind, - minor_axis=['A', 'B', 'C', 'D']) - tg = TimeGrouper('M', axis=1) - _, grouper, _ = tg._get_grouper(wp) - bingrouped = wp.groupby(grouper) - binagg = bingrouped.mean() + with catch_warnings(record=True): + wp = Panel(data, items=['Item1', 'Item2'], major_axis=ind, + minor_axis=['A', 'B', 'C', 'D']) - def f(x): - assert (isinstance(x, Panel)) - return x.mean(1) + tg = TimeGrouper('M', axis=1) + _, grouper, _ = tg._get_grouper(wp) + bingrouped = wp.groupby(grouper) + binagg = bingrouped.mean() + + def f(x): + assert (isinstance(x, Panel)) + return x.mean(1) - result = bingrouped.agg(f) - tm.assert_panel_equal(result, binagg) + result = bingrouped.agg(f) + tm.assert_panel_equal(result, binagg) def test_fails_on_no_datetime_index(self): index_names = ('Int64Index', 'Index', 'Float64Index', 'MultiIndex') diff --git a/pandas/tests/types/test_generic.py b/pandas/tests/types/test_generic.py index c7c8b0becad63..7994aa77bb220 100644 --- a/pandas/tests/types/test_generic.py +++ b/pandas/tests/types/test_generic.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- +from warnings import catch_warnings import numpy as np import pandas as pd import pandas.util.testing as tm @@ -33,7 +34,8 @@ def test_abc_types(self): self.assertIsInstance(pd.Int64Index([1, 2, 3]), gt.ABCIndexClass) self.assertIsInstance(pd.Series([1, 2, 3]), gt.ABCSeries) self.assertIsInstance(self.df, gt.ABCDataFrame) - self.assertIsInstance(self.df.to_panel(), gt.ABCPanel) + with catch_warnings(record=True): + self.assertIsInstance(self.df.to_panel(), gt.ABCPanel) self.assertIsInstance(self.sparse_series, gt.ABCSparseSeries) self.assertIsInstance(self.sparse_array, gt.ABCSparseArray) self.assertIsInstance(self.categorical, gt.ABCCategorical) diff --git a/pandas/tests/types/test_inference.py b/pandas/tests/types/test_inference.py index de3a2ca35a7f5..ec61903d3f20c 100644 --- a/pandas/tests/types/test_inference.py +++ b/pandas/tests/types/test_inference.py @@ -5,7 +5,7 @@ related to inference and not otherwise tested in types/test_common.py """ - +from warnings import catch_warnings import collections import re from datetime import datetime, date, timedelta, time @@ -930,8 +930,9 @@ def test_lisscalar_pandas_containers(self): self.assertFalse(is_scalar(Series([1]))) self.assertFalse(is_scalar(DataFrame())) self.assertFalse(is_scalar(DataFrame([[1]]))) - self.assertFalse(is_scalar(Panel())) - self.assertFalse(is_scalar(Panel([[[1]]]))) + with catch_warnings(record=True): + self.assertFalse(is_scalar(Panel())) + self.assertFalse(is_scalar(Panel([[[1]]]))) self.assertFalse(is_scalar(Index([]))) self.assertFalse(is_scalar(Index([1]))) diff --git a/pandas/types/missing.py b/pandas/types/missing.py index cc8b5edc27542..ea49af9884f5a 100644 --- a/pandas/types/missing.py +++ b/pandas/types/missing.py @@ -302,7 +302,7 @@ def array_equivalent(left, right, strict_nan=False): # NaNs can occur in float and complex arrays. if is_float_dtype(left) or is_complex_dtype(left): - return ((left == right) | (np.isnan(left) & np.isnan(right))).all() + return ((left == right) | (isnull(left) & isnull(right))).all() # numpy will will not allow this type of datetimelike vs integer comparison elif is_datetimelike_v_numeric(left, right): From 6c0cfff8aa643d31462de07e8a693d035fc0e77b Mon Sep 17 00:00:00 2001 From: Tong SHEN Date: Tue, 11 Apr 2017 19:37:01 +0800 Subject: [PATCH 376/933] DOC: Fix typos in doc style.ipynb (#15968) --- doc/source/style.ipynb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/source/style.ipynb b/doc/source/style.ipynb index 38b39bad8b415..2b8bf35a913c1 100644 --- a/doc/source/style.ipynb +++ b/doc/source/style.ipynb @@ -99,7 +99,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "*Note*: The `DataFrame.style` attribute is a propetry that returns a `Styler` object. `Styler` has a `_repr_html_` method defined on it so they are rendered automatically. If you want the actual HTML back for further processing or for writing to file call the `.render()` method which returns a string.\n", + "*Note*: The `DataFrame.style` attribute is a property that returns a `Styler` object. `Styler` has a `_repr_html_` method defined on it so they are rendered automatically. If you want the actual HTML back for further processing or for writing to file call the `.render()` method which returns a string.\n", "\n", "The above output looks very similar to the standard DataFrame HTML representation. But we've done some work behind the scenes to attach CSS classes to each cell. We can view these by calling the `.render` method." ] @@ -512,7 +512,7 @@ }, "outputs": [], "source": [ - "# Compreess the color range\n", + "# Compress the color range\n", "(df.loc[:4]\n", " .style\n", " .background_gradient(cmap='viridis', low=.5, high=0)\n", @@ -637,7 +637,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Other options\n", + "## Other Options\n", "\n", "You've seen a few methods for data-driven styling.\n", "`Styler` also provides a few other options for styles that don't depend on the data.\n", From 1751628adef96b913d0083a48e51658a70dac8c4 Mon Sep 17 00:00:00 2001 From: Tong SHEN Date: Wed, 12 Apr 2017 15:21:14 +0800 Subject: [PATCH 377/933] DOC: Fix typo in timeseries.rst (#15981) --- doc/source/timeseries.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/timeseries.rst b/doc/source/timeseries.rst index 54e574adc05d4..61812684e7648 100644 --- a/doc/source/timeseries.rst +++ b/doc/source/timeseries.rst @@ -113,7 +113,7 @@ For example: pd.Period('2012-05', freq='D') ``Timestamp`` and ``Period`` can be the index. Lists of ``Timestamp`` and -``Period`` are automatically coerce to ``DatetimeIndex`` and ``PeriodIndex`` +``Period`` are automatically coerced to ``DatetimeIndex`` and ``PeriodIndex`` respectively. .. ipython:: python From c4d71cea79bee1e9ed3b6ca97f3a5c1b8ad9369f Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 12 Apr 2017 19:29:50 +0000 Subject: [PATCH 378/933] TST: allow the network decorator to catch ssl certificate test failures (#15985) --- pandas/util/testing.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/util/testing.py b/pandas/util/testing.py index ef0fa04548cab..d5986a7f390e5 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -2213,6 +2213,7 @@ def dec(f): 'Temporary failure in name resolution', 'Name or service not known', 'Connection refused', + 'certificate verify', ) # or this e.errno/e.reason.errno From 7b8a6b1bc22f6fc0023c02ac8301e07b4ab80417 Mon Sep 17 00:00:00 2001 From: Sam Foo Date: Wed, 12 Apr 2017 13:49:49 -0700 Subject: [PATCH 379/933] VIS: Allow 'C0'-like plotting for plotting colors #15516 (#15873) * VIS: Allow 'C0'-like plotting for plotting colors * Added case color='' and support for mpl < 2.0 * Updated prop_cycle references to be compatible with matplotlib 1.5 and 2.0 * Separated test; Used more consise regex --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/tests/plotting/test_frame.py | 16 ++++++++++++++++ pandas/tools/plotting.py | 21 ++++++++++++++++----- 3 files changed, 33 insertions(+), 5 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index fd1cd3d0022c9..defabee3cef8c 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -345,6 +345,7 @@ Other Enhancements - The ``skiprows`` argument in ``pd.read_csv()`` now accepts a callable function as a value (:issue:`10882`) - The ``nrows`` and ``chunksize`` arguments in ``pd.read_csv()`` are supported if both are passed (:issue:`6774`, :issue:`15755`) - ``pd.DataFrame.plot`` now prints a title above each subplot if ``suplots=True`` and ``title`` is a list of strings (:issue:`14753`) +- ``pd.DataFrame.plot`` can pass `matplotlib 2.0 default color cycle as a single string as color parameter `__. (:issue:`15516`) - ``pd.Series.interpolate`` now supports timedelta as an index type with ``method='time'`` (:issue:`6424`) - ``Timedelta.isoformat`` method added for formatting Timedeltas as an `ISO 8601 duration`_. See the :ref:`Timedelta docs ` (:issue:`15136`) - ``.select_dtypes()`` now allows the string 'datetimetz' to generically select datetimes with tz (:issue:`14910`) diff --git a/pandas/tests/plotting/test_frame.py b/pandas/tests/plotting/test_frame.py index 1527637ea3eff..8090b9cc44ca3 100644 --- a/pandas/tests/plotting/test_frame.py +++ b/pandas/tests/plotting/test_frame.py @@ -141,6 +141,22 @@ def test_plot(self): result = ax.get_axes() # deprecated self.assertIs(result, axes[0]) + # GH 15516 + def test_mpl2_color_cycle_str(self): + # test CN mpl 2.0 color cycle + if self.mpl_ge_2_0_0: + colors = ['C' + str(x) for x in range(10)] + df = DataFrame(randn(10, 3), columns=['a', 'b', 'c']) + for c in colors: + _check_plot_works(df.plot, color=c) + else: + pytest.skip("not supported in matplotlib < 2.0.0") + + def test_color_empty_string(self): + df = DataFrame(randn(10, 2)) + with tm.assertRaises(ValueError): + df.plot(color='') + def test_color_and_style_arguments(self): df = DataFrame({'x': [1, 2], 'y': [3, 4]}) # passing both 'color' and 'style' arguments should be allowed diff --git a/pandas/tools/plotting.py b/pandas/tools/plotting.py index f70a2b0b22140..99e56ca80cf97 100644 --- a/pandas/tools/plotting.py +++ b/pandas/tools/plotting.py @@ -225,10 +225,18 @@ def _maybe_valid_colors(colors): # check whether each character can be convertable to colors maybe_color_cycle = _maybe_valid_colors(list(colors)) if maybe_single_color and maybe_color_cycle and len(colors) > 1: - msg = ("'{0}' can be parsed as both single color and " - "color cycle. Specify each color using a list " - "like ['{0}'] or {1}") - raise ValueError(msg.format(colors, list(colors))) + # Special case for single str 'CN' match and convert to hex + # for supporting matplotlib < 2.0.0 + if re.match(r'\AC[0-9]\Z', colors) and _mpl_ge_2_0_0(): + hex_color = [c['color'] + for c in list(plt.rcParams['axes.prop_cycle'])] + colors = [hex_color[int(colors[1])]] + else: + # this may no longer be required + msg = ("'{0}' can be parsed as both single color and " + "color cycle. Specify each color using a list " + "like ['{0}'] or {1}") + raise ValueError(msg.format(colors, list(colors))) elif maybe_single_color: colors = [colors] else: @@ -237,7 +245,10 @@ def _maybe_valid_colors(colors): pass if len(colors) != num_colors: - multiple = num_colors // len(colors) - 1 + try: + multiple = num_colors // len(colors) - 1 + except ZeroDivisionError: + raise ValueError("Invalid color argument: ''") mod = num_colors % len(colors) colors += multiple * colors From 1c4dacb4464fa0139216130b1835e5f4d4b73342 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Thu, 13 Apr 2017 10:18:04 +0000 Subject: [PATCH 380/933] DEPR: deprecate relableling dicts in groupby.agg (#15931) * DEPR: deprecate relabling dictionarys in groupby.agg --- doc/source/computation.rst | 8 -- doc/source/groupby.rst | 32 ++++-- doc/source/timeseries.rst | 8 -- doc/source/whatsnew/v0.20.0.txt | 82 +++++++++++++ pandas/core/base.py | 152 +++++++++++++++++++++---- pandas/core/groupby.py | 52 +++++---- pandas/tests/groupby/test_aggregate.py | 83 +++++++++++--- pandas/tests/groupby/test_groupby.py | 14 ++- pandas/tests/groupby/test_whitelist.py | 2 +- pandas/tests/test_window.py | 22 ++-- pandas/tests/tseries/test_resample.py | 67 ++++++----- pandas/types/cast.py | 17 +++ 12 files changed, 418 insertions(+), 121 deletions(-) diff --git a/doc/source/computation.rst b/doc/source/computation.rst index a37cbc96b2d8c..f46a00826a8d9 100644 --- a/doc/source/computation.rst +++ b/doc/source/computation.rst @@ -610,14 +610,6 @@ aggregation with, outputting a DataFrame: r['A'].agg([np.sum, np.mean, np.std]) -If a dict is passed, the keys will be used to name the columns. Otherwise the -function's name (stored in the function object) will be used. - -.. ipython:: python - - r['A'].agg({'result1' : np.sum, - 'result2' : np.mean}) - On a widowed DataFrame, you can pass a list of functions to apply to each column, which produces an aggregated result with a hierarchical index: diff --git a/doc/source/groupby.rst b/doc/source/groupby.rst index cbe3588104439..03ee5e0d67913 100644 --- a/doc/source/groupby.rst +++ b/doc/source/groupby.rst @@ -502,7 +502,7 @@ index are the group names and whose values are the sizes of each group. Applying multiple functions at once ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -With grouped Series you can also pass a list or dict of functions to do +With grouped ``Series`` you can also pass a list or dict of functions to do aggregation with, outputting a DataFrame: .. ipython:: python @@ -510,23 +510,35 @@ aggregation with, outputting a DataFrame: grouped = df.groupby('A') grouped['C'].agg([np.sum, np.mean, np.std]) -If a dict is passed, the keys will be used to name the columns. Otherwise the -function's name (stored in the function object) will be used. +On a grouped ``DataFrame``, you can pass a list of functions to apply to each +column, which produces an aggregated result with a hierarchical index: .. ipython:: python - grouped['D'].agg({'result1' : np.sum, - 'result2' : np.mean}) + grouped.agg([np.sum, np.mean, np.std]) -On a grouped DataFrame, you can pass a list of functions to apply to each -column, which produces an aggregated result with a hierarchical index: + +The resulting aggregations are named for the functions themselves. If you +need to rename, then you can add in a chained operation for a ``Series`` like this: .. ipython:: python - grouped.agg([np.sum, np.mean, np.std]) + (grouped['C'].agg([np.sum, np.mean, np.std]) + .rename(columns={'sum': 'foo', + 'mean': 'bar', + 'std': 'baz'}) + ) + +For a grouped ``DataFrame``, you can rename in a similar manner: + +.. ipython:: python + + (grouped.agg([np.sum, np.mean, np.std]) + .rename(columns={'sum': 'foo', + 'mean': 'bar', + 'std': 'baz'}) + ) -Passing a dict of functions has different behavior by default, see the next -section. Applying different functions to DataFrame columns ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/doc/source/timeseries.rst b/doc/source/timeseries.rst index 61812684e7648..0a957772d785e 100644 --- a/doc/source/timeseries.rst +++ b/doc/source/timeseries.rst @@ -1549,14 +1549,6 @@ You can pass a list or dict of functions to do aggregation with, outputting a Da r['A'].agg([np.sum, np.mean, np.std]) -If a dict is passed, the keys will be used to name the columns. Otherwise the -function's name (stored in the function object) will be used. - -.. ipython:: python - - r['A'].agg({'result1' : np.sum, - 'result2' : np.mean}) - On a resampled DataFrame, you can pass a list of functions to apply to each column, which produces an aggregated result with a hierarchical index: diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index defabee3cef8c..c243e4ef81b38 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -456,6 +456,88 @@ Convert to an xarray DataArray p.to_xarray() +.. _whatsnew_0200.api_breaking.deprecate_group_agg_dict: + +Deprecate groupby.agg() with a dictionary when renaming +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The ``.groupby(..).agg(..)``, ``.rolling(..).agg(..)``, and ``.resample(..).agg(..)`` syntax can accept a variable of inputs, including scalars, +list, and a dict of column names to scalars or lists. This provides a useful syntax for constructing multiple +(potentially different) aggregations. + +However, ``.agg(..)`` can *also* accept a dict that allows 'renaming' of the result columns. This is a complicated and confusing syntax, as well as not consistent +between ``Series`` and ``DataFrame``. We are deprecating this 'renaming' functionaility. + +1) We are deprecating passing a dict to a grouped/rolled/resampled ``Series``. This allowed +one to ``rename`` the resulting aggregation, but this had a completely different +meaning than passing a dictionary to a grouped ``DataFrame``, which accepts column-to-aggregations. +2) We are deprecating passing a dict-of-dicts to a grouped/rolled/resampled ``DataFrame`` in a similar manner. + +This is an illustrative example: + +.. ipython:: python + + df = pd.DataFrame({'A': [1, 1, 1, 2, 2], + 'B': range(5), + 'C': range(5)}) + df + +Here is a typical useful syntax for computing different aggregations for different columns. This +is a natural (and useful) syntax. We aggregate from the dict-to-list by taking the specified +columns and applying the list of functions. This returns a ``MultiIndex`` for the columns. + +.. ipython:: python + + df.groupby('A').agg({'B': 'sum', 'C': 'min'}) + +Here's an example of the first deprecation (1), passing a dict to a grouped ``Series``. This +is a combination aggregation & renaming: + +.. code-block:: ipython + + In [6]: df.groupby('A').B.agg({'foo': 'count'}) + FutureWarning: using a dict on a Series for aggregation + is deprecated and will be removed in a future version + + Out[6]: + foo + A + 1 3 + 2 2 + +You can accomplish the same operation, more idiomatically by: + +.. ipython:: python + + df.groupby('A').B.agg(['count']).rename({'count': 'foo'}) + + +Here's an example of the second deprecation (2), passing a dict-of-dict to a grouped ``DataFrame``: + +.. code-block:: python + + In [23]: (df.groupby('A') + .agg({'B': {'foo': 'sum'}, 'C': {'bar': 'min'}}) + ) + FutureWarning: using a dict with renaming is deprecated and will be removed in a future version + + Out[23]: + B C + foo bar + A + 1 3 0 + 2 7 3 + + +You can accomplish nearly the same by: + +.. ipython:: python + + (df.groupby('A') + .agg({'B': 'sum', 'C': 'min'}) + .rename(columns={'B': 'foo', 'C': 'bar'}) + ) + .. _whatsnew.api_breaking.io_compat: Possible incompat for HDF5 formats for pandas < 0.13.0 diff --git a/pandas/core/base.py b/pandas/core/base.py index bdbfb7b949986..6566ee38c1ade 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -1,6 +1,7 @@ """ Base and utility classes for pandas objects. """ +import warnings from pandas import compat from pandas.compat import builtins import numpy as np @@ -290,7 +291,12 @@ class SelectionMixin(object): } @property - def name(self): + def _selection_name(self): + """ + return a name for myself; this would ideally be called + the 'name' property, but we cannot conflict with the + Series.name property which can be set + """ if self._selection is None: return None # 'result' else: @@ -405,6 +411,26 @@ def aggregate(self, func, *args, **kwargs): agg = aggregate + def _try_aggregate_string_function(self, arg, *args, **kwargs): + """ + if arg is a string, then try to operate on it: + - try to find a function on ourselves + - try to find a numpy function + - raise + + """ + assert isinstance(arg, compat.string_types) + + f = getattr(self, arg, None) + if f is not None: + return f(*args, **kwargs) + + f = getattr(np, arg, None) + if f is not None: + return f(self, *args, **kwargs) + + raise ValueError("{} is an unknown string function".format(arg)) + def _aggregate(self, arg, *args, **kwargs): """ provide an implementation for the aggregators @@ -424,18 +450,22 @@ def _aggregate(self, arg, *args, **kwargs): how can be a string describe the required post-processing, or None if not required """ - is_aggregator = lambda x: isinstance(x, (list, tuple, dict)) is_nested_renamer = False + _axis = kwargs.pop('_axis', None) + if _axis is None: + _axis = getattr(self, 'axis', 0) _level = kwargs.pop('_level', None) + if isinstance(arg, compat.string_types): - return getattr(self, arg)(*args, **kwargs), None + return self._try_aggregate_string_function(arg, *args, + **kwargs), None if isinstance(arg, dict): # aggregate based on the passed dict - if self.axis != 0: # pragma: no cover + if _axis != 0: # pragma: no cover raise ValueError('Can only pass dict with axis=0') obj = self._selected_obj @@ -454,7 +484,7 @@ def _aggregate(self, arg, *args, **kwargs): # the keys must be in the columns # for ndim=2, or renamers for ndim=1 - # ok + # ok for now, but deprecated # {'A': { 'ra': 'mean' }} # {'A': { 'ra': ['mean'] }} # {'ra': ['mean']} @@ -469,8 +499,28 @@ def _aggregate(self, arg, *args, **kwargs): 'for {0} with a nested ' 'dictionary'.format(k)) + # deprecation of nested renaming + # GH 15931 + warnings.warn( + ("using a dict with renaming " + "is deprecated and will be removed in a future " + "version"), + FutureWarning, stacklevel=4) + arg = new_arg + else: + # deprecation of renaming keys + # GH 15931 + keys = list(compat.iterkeys(arg)) + if (isinstance(obj, ABCDataFrame) and + len(obj.columns.intersection(keys)) != len(keys)): + warnings.warn( + ("using a dict with renaming " + "is deprecated and will be removed in a future " + "version"), + FutureWarning, stacklevel=4) + from pandas.tools.concat import concat def _agg_1dim(name, how, subset=None): @@ -534,7 +584,7 @@ def _agg(arg, func): agg_how: _agg_1dim(self._selection, agg_how)) # we are selecting the same set as we are aggregating - elif not len(sl - set(compat.iterkeys(arg))): + elif not len(sl - set(keys)): result = _agg(arg, _agg_1dim) @@ -555,32 +605,74 @@ def _agg(arg, func): result = _agg(arg, _agg_2dim) # combine results + + def is_any_series(): + # return a boolean if we have *any* nested series + return any([isinstance(r, ABCSeries) + for r in compat.itervalues(result)]) + + def is_any_frame(): + # return a boolean if we have *any* nested series + return any([isinstance(r, ABCDataFrame) + for r in compat.itervalues(result)]) + if isinstance(result, list): - result = concat(result, keys=keys, axis=1) - elif isinstance(list(compat.itervalues(result))[0], - ABCDataFrame): - result = concat([result[k] for k in keys], keys=keys, axis=1) - else: - from pandas import DataFrame + return concat(result, keys=keys, axis=1), True + + elif is_any_frame(): + # we have a dict of DataFrames + # return a MI DataFrame + + return concat([result[k] for k in keys], + keys=keys, axis=1), True + + elif isinstance(self, ABCSeries) and is_any_series(): + + # we have a dict of Series + # return a MI Series + try: + result = concat(result) + except TypeError: + # we want to give a nice error here if + # we have non-same sized objects, so + # we don't automatically broadcast + + raise ValueError("cannot perform both aggregation " + "and transformation operations " + "simultaneously") + + return result, True + + # fall thru + from pandas import DataFrame, Series + try: result = DataFrame(result) + except ValueError: + + # we have a dict of scalars + result = Series(result, + name=getattr(self, 'name', None)) return result, True - elif hasattr(arg, '__iter__'): - return self._aggregate_multiple_funcs(arg, _level=_level), None + elif is_list_like(arg) and arg not in compat.string_types: + # we require a list, but not an 'str' + return self._aggregate_multiple_funcs(arg, + _level=_level, + _axis=_axis), None else: result = None - cy_func = self._is_cython_func(arg) - if cy_func and not args and not kwargs: - return getattr(self, cy_func)(), None + f = self._is_cython_func(arg) + if f and not args and not kwargs: + return getattr(self, f)(), None # caller can react return result, True - def _aggregate_multiple_funcs(self, arg, _level): + def _aggregate_multiple_funcs(self, arg, _level, _axis): from pandas.tools.concat import concat - if self.axis != 0: + if _axis != 0: raise NotImplementedError("axis other than 0 is not supported") if self._selected_obj.ndim == 1: @@ -615,10 +707,30 @@ def _aggregate_multiple_funcs(self, arg, _level): keys.append(col) except (TypeError, DataError): pass + except ValueError: + # cannot aggregate + continue except SpecificationError: raise - return concat(results, keys=keys, axis=1) + # if we are empty + if not len(results): + raise ValueError("no results") + + try: + return concat(results, keys=keys, axis=1) + except TypeError: + + # we are concatting non-NDFrame objects, + # e.g. a list of scalars + + from pandas.types.cast import is_nested_object + from pandas import Series + result = Series(results, index=keys, name=self.name) + if is_nested_object(result): + raise ValueError("cannot combine transform and " + "aggregation operations") + return result def _shallow_copy(self, obj=None, obj_type=None, **kwargs): """ return a new object with the replacement attributes """ diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index add2987b8f452..5e55196803c22 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -722,7 +722,7 @@ def _python_apply_general(self, f): not_indexed_same=mutated or self.mutated) def _iterate_slices(self): - yield self.name, self._selected_obj + yield self._selection_name, self._selected_obj def transform(self, func, *args, **kwargs): raise AbstractMethodError(self) @@ -921,9 +921,9 @@ def reset_identity(values): result = concat(values, axis=self.axis) if (isinstance(result, Series) and - getattr(self, 'name', None) is not None): + getattr(self, '_selection_name', None) is not None): - result.name = self.name + result.name = self._selection_name return result @@ -1123,7 +1123,7 @@ def size(self): result = self.grouper.size() if isinstance(self.obj, Series): - result.name = getattr(self, 'name', None) + result.name = getattr(self.obj, 'name', None) return result @classmethod @@ -2736,7 +2736,7 @@ class SeriesGroupBy(GroupBy): exec(_def_str) @property - def name(self): + def _selection_name(self): """ since we are a series, we by definition only have a single name, but may be the result of a selection or @@ -2834,6 +2834,17 @@ def aggregate(self, func_or_funcs, *args, **kwargs): def _aggregate_multiple_funcs(self, arg, _level): if isinstance(arg, dict): + + # show the deprecation, but only if we + # have not shown a higher level one + # GH 15931 + if isinstance(self._selected_obj, Series) and _level <= 1: + warnings.warn( + ("using a dict on a Series for aggregation\n" + "is deprecated and will be removed in a future " + "version"), + FutureWarning, stacklevel=4) + columns = list(arg.keys()) arg = list(arg.items()) elif any(isinstance(x, (tuple, list)) for x in arg): @@ -2879,12 +2890,12 @@ def _aggregate_multiple_funcs(self, arg, _level): def _wrap_output(self, output, index, names=None): """ common agg/transform wrapping logic """ - output = output[self.name] + output = output[self._selection_name] if names is not None: return DataFrame(output, index=index, columns=names) else: - name = self.name + name = self._selection_name if name is None: name = self._selected_obj.name return Series(output, index=index, name=name) @@ -2902,7 +2913,7 @@ def _wrap_transformed_output(self, output, names=None): def _wrap_applied_output(self, keys, values, not_indexed_same=False): if len(keys) == 0: # GH #6265 - return Series([], name=self.name, index=keys) + return Series([], name=self._selection_name, index=keys) def _get_index(): if self.grouper.nkeys > 1: @@ -2915,7 +2926,7 @@ def _get_index(): # GH #823 index = _get_index() result = DataFrame(values, index=index).stack() - result.name = self.name + result.name = self._selection_name return result if isinstance(values[0], (Series, dict)): @@ -2927,7 +2938,8 @@ def _get_index(): not_indexed_same=not_indexed_same) else: # GH #6265 - return Series(values, index=_get_index(), name=self.name) + return Series(values, index=_get_index(), + name=self._selection_name) def _aggregate_named(self, func, *args, **kwargs): result = {} @@ -3098,7 +3110,7 @@ def nunique(self, dropna=True): return Series(res, index=ri, - name=self.name) + name=self._selection_name) @Appender(Series.describe.__doc__) def describe(self, **kwargs): @@ -3156,7 +3168,7 @@ def value_counts(self, normalize=False, sort=True, ascending=False, # multi-index components labels = list(map(rep, self.grouper.recons_labels)) + [lab[inc]] levels = [ping.group_index for ping in self.grouper.groupings] + [lev] - names = self.grouper.names + [self.name] + names = self.grouper.names + [self._selection_name] if dropna: mask = labels[-1] != -1 @@ -3191,7 +3203,7 @@ def value_counts(self, normalize=False, sort=True, ascending=False, if is_integer_dtype(out): out = _ensure_int64(out) - return Series(out, index=mi, name=self.name) + return Series(out, index=mi, name=self._selection_name) # for compat. with libgroupby.value_counts need to ensure every # bin is present at every index level, null filled with zeros @@ -3222,7 +3234,7 @@ def value_counts(self, normalize=False, sort=True, ascending=False, if is_integer_dtype(out): out = _ensure_int64(out) - return Series(out, index=mi, name=self.name) + return Series(out, index=mi, name=self._selection_name) def count(self): """ Compute count of group, excluding missing values """ @@ -3235,7 +3247,7 @@ def count(self): return Series(out, index=self.grouper.result_index, - name=self.name, + name=self._selection_name, dtype='int64') def _apply_to_column_groupbys(self, func): @@ -3391,7 +3403,7 @@ def aggregate(self, arg, *args, **kwargs): try: assert not args and not kwargs result = self._aggregate_multiple_funcs( - [arg], _level=_level) + [arg], _level=_level, _axis=self.axis) result.columns = Index( result.columns.levels[0], name=self._selected_obj.columns.name) @@ -3623,7 +3635,8 @@ def first_non_None_value(values): except (ValueError, AttributeError): # GH1738: values is list of arrays of unequal lengths fall # through to the outer else caluse - return Series(values, index=key_index, name=self.name) + return Series(values, index=key_index, + name=self._selection_name) # if we have date/time like in the original, then coerce dates # as we are stacking can easily have object dtypes here @@ -3647,8 +3660,9 @@ def first_non_None_value(values): # only coerce dates if we find at least 1 datetime coerce = True if any([isinstance(x, Timestamp) for x in values]) else False - # self.name not passed through to Series as the result - # should not take the name of original selection of columns + # self._selection_name not passed through to Series as the + # result should not take the name of original selection + # of columns return (Series(values, index=key_index) ._convert(datetime=True, coerce=coerce)) diff --git a/pandas/tests/groupby/test_aggregate.py b/pandas/tests/groupby/test_aggregate.py index 52b35048b6762..c2d6422c50d02 100644 --- a/pandas/tests/groupby/test_aggregate.py +++ b/pandas/tests/groupby/test_aggregate.py @@ -14,7 +14,7 @@ import pandas as pd from pandas import (date_range, MultiIndex, DataFrame, - Series, Index, bdate_range) + Series, Index, bdate_range, concat) from pandas.util.testing import assert_frame_equal, assert_series_equal from pandas.core.groupby import SpecificationError, DataError from pandas.compat import OrderedDict @@ -291,8 +291,10 @@ def test_aggregate_api_consistency(self): expected.columns = MultiIndex.from_product([['C', 'D'], ['mean', 'sum']]) - result = grouped[['D', 'C']].agg({'r': np.sum, - 'r2': np.mean}) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + result = grouped[['D', 'C']].agg({'r': np.sum, + 'r2': np.mean}) expected = pd.concat([d_sum, c_sum, d_mean, @@ -302,6 +304,28 @@ def test_aggregate_api_consistency(self): ['D', 'C']]) assert_frame_equal(result, expected, check_like=True) + def test_agg_dict_renaming_deprecation(self): + # 15931 + df = pd.DataFrame({'A': [1, 1, 1, 2, 2], + 'B': range(5), + 'C': range(5)}) + + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False) as w: + df.groupby('A').agg({'B': {'foo': ['sum', 'max']}, + 'C': {'bar': ['count', 'min']}}) + assert "using a dict with renaming" in str(w[0].message) + + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + df.groupby('A')[['B', 'C']].agg({'ma': 'max'}) + + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False) as w: + df.groupby('A').B.agg({'foo': 'count'}) + assert "using a dict on a Series for aggregation" in str( + w[0].message) + def test_agg_compat(self): # GH 12334 @@ -320,14 +344,19 @@ def test_agg_compat(self): axis=1) expected.columns = MultiIndex.from_tuples([('C', 'sum'), ('C', 'std')]) - result = g['D'].agg({'C': ['sum', 'std']}) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + result = g['D'].agg({'C': ['sum', 'std']}) assert_frame_equal(result, expected, check_like=True) expected = pd.concat([g['D'].sum(), g['D'].std()], axis=1) expected.columns = ['C', 'D'] - result = g['D'].agg({'C': 'sum', 'D': 'std'}) + + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + result = g['D'].agg({'C': 'sum', 'D': 'std'}) assert_frame_equal(result, expected, check_like=True) def test_agg_nested_dicts(self): @@ -348,8 +377,10 @@ def f(): self.assertRaises(SpecificationError, f) - result = g.agg({'C': {'ra': ['mean', 'std']}, - 'D': {'rb': ['mean', 'std']}}) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + result = g.agg({'C': {'ra': ['mean', 'std']}, + 'D': {'rb': ['mean', 'std']}}) expected = pd.concat([g['C'].mean(), g['C'].std(), g['D'].mean(), g['D'].std()], axis=1) expected.columns = pd.MultiIndex.from_tuples([('ra', 'mean'), ( @@ -358,9 +389,14 @@ def f(): # same name as the original column # GH9052 - expected = g['D'].agg({'result1': np.sum, 'result2': np.mean}) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + expected = g['D'].agg({'result1': np.sum, 'result2': np.mean}) expected = expected.rename(columns={'result1': 'D'}) - result = g['D'].agg({'D': np.sum, 'result2': np.mean}) + + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + result = g['D'].agg({'D': np.sum, 'result2': np.mean}) assert_frame_equal(result, expected, check_like=True) def test_agg_python_multiindex(self): @@ -627,7 +663,6 @@ def test_agg_multiple_functions_too_many_lambdas(self): self.assertRaises(SpecificationError, grouped.agg, funcs) def test_more_flexible_frame_multi_function(self): - from pandas import concat grouped = self.df.groupby('A') @@ -655,9 +690,12 @@ def foo(x): def bar(x): return np.std(x, ddof=1) - d = OrderedDict([['C', np.mean], ['D', OrderedDict( - [['foo', np.mean], ['bar', np.std]])]]) - result = grouped.aggregate(d) + # this uses column selection & renaming + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + d = OrderedDict([['C', np.mean], ['D', OrderedDict( + [['foo', np.mean], ['bar', np.std]])]]) + result = grouped.aggregate(d) d = OrderedDict([['C', [np.mean]], ['D', [foo, bar]]]) expected = grouped.aggregate(d) @@ -671,16 +709,29 @@ def test_multi_function_flexible_mix(self): d = OrderedDict([['C', OrderedDict([['foo', 'mean'], [ 'bar', 'std' ]])], ['D', 'sum']]) - result = grouped.aggregate(d) + + # this uses column selection & renaming + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + result = grouped.aggregate(d) + d2 = OrderedDict([['C', OrderedDict([['foo', 'mean'], [ 'bar', 'std' ]])], ['D', ['sum']]]) - result2 = grouped.aggregate(d2) + + # this uses column selection & renaming + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + result2 = grouped.aggregate(d2) d3 = OrderedDict([['C', OrderedDict([['foo', 'mean'], [ 'bar', 'std' ]])], ['D', {'sum': 'sum'}]]) - expected = grouped.aggregate(d3) + + # this uses column selection & renaming + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + expected = grouped.aggregate(d3) assert_frame_equal(result, expected) assert_frame_equal(result2, expected) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 68955c954206e..8f3d8e2307f45 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -59,7 +59,10 @@ def checkit(dtype): # complex agg agged = grouped.aggregate([np.mean, np.std]) - agged = grouped.aggregate({'one': np.mean, 'two': np.std}) + + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + agged = grouped.aggregate({'one': np.mean, 'two': np.std}) group_constants = {0: 10, 1: 20, 2: 30} agged = grouped.agg(lambda x: group_constants[x.name] + x.mean()) @@ -1262,7 +1265,9 @@ def test_frame_set_name_single(self): result = grouped['C'].agg([np.mean, np.std]) self.assertEqual(result.index.name, 'A') - result = grouped['C'].agg({'foo': np.mean, 'bar': np.std}) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + result = grouped['C'].agg({'foo': np.mean, 'bar': np.std}) self.assertEqual(result.index.name, 'A') def test_multi_iter(self): @@ -1438,7 +1443,10 @@ def test_groupby_as_index_agg(self): grouped = self.df.groupby('A', as_index=True) expected3 = grouped['C'].sum() expected3 = DataFrame(expected3).rename(columns={'C': 'Q'}) - result3 = grouped['C'].agg({'Q': np.sum}) + + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + result3 = grouped['C'].agg({'Q': np.sum}) assert_frame_equal(result3, expected3) # multi-key diff --git a/pandas/tests/groupby/test_whitelist.py b/pandas/tests/groupby/test_whitelist.py index d566f34b7eae8..5a4f282789eeb 100644 --- a/pandas/tests/groupby/test_whitelist.py +++ b/pandas/tests/groupby/test_whitelist.py @@ -233,7 +233,7 @@ def test_tab_completion(mframe): expected = set( ['A', 'B', 'C', 'agg', 'aggregate', 'apply', 'boxplot', 'filter', 'first', 'get_group', 'groups', 'hist', 'indices', 'last', 'max', - 'mean', 'median', 'min', 'name', 'ngroups', 'nth', 'ohlc', 'plot', + 'mean', 'median', 'min', 'ngroups', 'nth', 'ohlc', 'plot', 'prod', 'size', 'std', 'sum', 'transform', 'var', 'sem', 'count', 'nunique', 'head', 'describe', 'cummax', 'quantile', 'rank', 'cumprod', 'tail', 'resample', 'cummin', 'fillna', diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index 5fc31e9321f31..9cd3b8b839a9b 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -134,16 +134,18 @@ def test_agg(self): expected.columns = ['mean', 'sum'] tm.assert_frame_equal(result, expected) - result = r.aggregate({'A': {'mean': 'mean', 'sum': 'sum'}}) + with catch_warnings(record=True): + result = r.aggregate({'A': {'mean': 'mean', 'sum': 'sum'}}) expected = pd.concat([a_mean, a_sum], axis=1) expected.columns = pd.MultiIndex.from_tuples([('A', 'mean'), ('A', 'sum')]) tm.assert_frame_equal(result, expected, check_like=True) - result = r.aggregate({'A': {'mean': 'mean', - 'sum': 'sum'}, - 'B': {'mean2': 'mean', - 'sum2': 'sum'}}) + with catch_warnings(record=True): + result = r.aggregate({'A': {'mean': 'mean', + 'sum': 'sum'}, + 'B': {'mean2': 'mean', + 'sum2': 'sum'}}) expected = pd.concat([a_mean, a_sum, b_mean, b_sum], axis=1) exp_cols = [('A', 'mean'), ('A', 'sum'), ('B', 'mean2'), ('B', 'sum2')] expected.columns = pd.MultiIndex.from_tuples(exp_cols) @@ -195,12 +197,14 @@ def f(): r['B'].std()], axis=1) expected.columns = pd.MultiIndex.from_tuples([('ra', 'mean'), ( 'ra', 'std'), ('rb', 'mean'), ('rb', 'std')]) - result = r[['A', 'B']].agg({'A': {'ra': ['mean', 'std']}, - 'B': {'rb': ['mean', 'std']}}) + with catch_warnings(record=True): + result = r[['A', 'B']].agg({'A': {'ra': ['mean', 'std']}, + 'B': {'rb': ['mean', 'std']}}) tm.assert_frame_equal(result, expected, check_like=True) - result = r.agg({'A': {'ra': ['mean', 'std']}, - 'B': {'rb': ['mean', 'std']}}) + with catch_warnings(record=True): + result = r.agg({'A': {'ra': ['mean', 'std']}, + 'B': {'rb': ['mean', 'std']}}) expected.columns = pd.MultiIndex.from_tuples([('A', 'ra', 'mean'), ( 'A', 'ra', 'std'), ('B', 'rb', 'mean'), ('B', 'rb', 'std')]) tm.assert_frame_equal(result, expected, check_like=True) diff --git a/pandas/tests/tseries/test_resample.py b/pandas/tests/tseries/test_resample.py index 9c66cae292c4e..98664c1ec118c 100755 --- a/pandas/tests/tseries/test_resample.py +++ b/pandas/tests/tseries/test_resample.py @@ -394,8 +394,10 @@ def test_agg_consistency(self): r = df.resample('3T') - expected = r[['A', 'B', 'C']].agg({'r1': 'mean', 'r2': 'sum'}) - result = r.agg({'r1': 'mean', 'r2': 'sum'}) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + expected = r[['A', 'B', 'C']].agg({'r1': 'mean', 'r2': 'sum'}) + result = r.agg({'r1': 'mean', 'r2': 'sum'}) assert_frame_equal(result, expected) # TODO: once GH 14008 is fixed, move these tests into @@ -459,7 +461,9 @@ def test_agg(self): expected.columns = pd.MultiIndex.from_tuples([('A', 'mean'), ('A', 'sum')]) for t in cases: - result = t.aggregate({'A': {'mean': 'mean', 'sum': 'sum'}}) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + result = t.aggregate({'A': {'mean': 'mean', 'sum': 'sum'}}) assert_frame_equal(result, expected, check_like=True) expected = pd.concat([a_mean, a_sum, b_mean, b_sum], axis=1) @@ -468,8 +472,10 @@ def test_agg(self): ('B', 'mean2'), ('B', 'sum2')]) for t in cases: - result = t.aggregate({'A': {'mean': 'mean', 'sum': 'sum'}, - 'B': {'mean2': 'mean', 'sum2': 'sum'}}) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + result = t.aggregate({'A': {'mean': 'mean', 'sum': 'sum'}, + 'B': {'mean2': 'mean', 'sum2': 'sum'}}) assert_frame_equal(result, expected, check_like=True) expected = pd.concat([a_mean, a_std, b_mean, b_std], axis=1) @@ -529,9 +535,12 @@ def test_agg_misc(self): ('result1', 'B'), ('result2', 'A'), ('result2', 'B')]) + for t in cases: - result = t[['A', 'B']].agg(OrderedDict([('result1', np.sum), - ('result2', np.mean)])) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + result = t[['A', 'B']].agg(OrderedDict([('result1', np.sum), + ('result2', np.mean)])) assert_frame_equal(result, expected, check_like=True) # agg with different hows @@ -557,7 +566,9 @@ def test_agg_misc(self): # series like aggs for t in cases: - result = t['A'].agg({'A': ['sum', 'std']}) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + result = t['A'].agg({'A': ['sum', 'std']}) expected = pd.concat([t['A'].sum(), t['A'].std()], axis=1) @@ -572,15 +583,20 @@ def test_agg_misc(self): ('A', 'std'), ('B', 'mean'), ('B', 'std')]) - result = t['A'].agg({'A': ['sum', 'std'], 'B': ['mean', 'std']}) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + result = t['A'].agg({'A': ['sum', 'std'], + 'B': ['mean', 'std']}) assert_frame_equal(result, expected, check_like=True) # errors # invalid names in the agg specification for t in cases: def f(): - t[['A']].agg({'A': ['sum', 'std'], - 'B': ['mean', 'std']}) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + t[['A']].agg({'A': ['sum', 'std'], + 'B': ['mean', 'std']}) self.assertRaises(SpecificationError, f) @@ -617,12 +633,16 @@ def f(): expected.columns = pd.MultiIndex.from_tuples([('ra', 'mean'), ( 'ra', 'std'), ('rb', 'mean'), ('rb', 'std')]) - result = t[['A', 'B']].agg({'A': {'ra': ['mean', 'std']}, - 'B': {'rb': ['mean', 'std']}}) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + result = t[['A', 'B']].agg({'A': {'ra': ['mean', 'std']}, + 'B': {'rb': ['mean', 'std']}}) assert_frame_equal(result, expected, check_like=True) - result = t.agg({'A': {'ra': ['mean', 'std']}, - 'B': {'rb': ['mean', 'std']}}) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + result = t.agg({'A': {'ra': ['mean', 'std']}, + 'B': {'rb': ['mean', 'std']}}) assert_frame_equal(result, expected, check_like=True) def test_selection_api_validation(self): @@ -752,16 +772,7 @@ def test_resample_empty_series(self): expected.index = s.index._shallow_copy(freq=freq) assert_index_equal(result.index, expected.index) self.assertEqual(result.index.freq, expected.index.freq) - - if (method == 'size' and - isinstance(result.index, PeriodIndex) and - freq in ['M', 'D']): - # GH12871 - TODO: name should propagate, but currently - # doesn't on lower / same frequency with PeriodIndex - assert_series_equal(result, expected, check_dtype=False) - - else: - assert_series_equal(result, expected, check_dtype=False) + assert_series_equal(result, expected, check_dtype=False) def test_resample_empty_dataframe(self): # GH13212 @@ -1846,10 +1857,12 @@ def test_how_lambda_functions(self): tm.assert_series_equal(result['foo'], foo_exp) tm.assert_series_equal(result['bar'], bar_exp) + # this is a MI Series, so comparing the names of the results + # doesn't make sense result = ts.resample('M').aggregate({'foo': lambda x: x.mean(), 'bar': lambda x: x.std(ddof=1)}) - tm.assert_series_equal(result['foo'], foo_exp) - tm.assert_series_equal(result['bar'], bar_exp) + tm.assert_series_equal(result['foo'], foo_exp, check_names=False) + tm.assert_series_equal(result['bar'], bar_exp, check_names=False) def test_resample_unequal_times(self): # #1772 diff --git a/pandas/types/cast.py b/pandas/types/cast.py index 580ce12de3333..85053dba0c18b 100644 --- a/pandas/types/cast.py +++ b/pandas/types/cast.py @@ -45,6 +45,23 @@ def maybe_convert_platform(values): return values +def is_nested_object(obj): + """ + return a boolean if we have a nested object, e.g. a Series with 1 or + more Series elements + + This may not be necessarily be performant. + + """ + + if isinstance(obj, ABCSeries) and is_object_dtype(obj): + + if any(isinstance(v, ABCSeries) for v in obj.values): + return True + + return False + + def maybe_downcast_to_dtype(result, dtype): """ try to cast to the specified dtype (e.g. convert back to bool/int or could be an astype of float64->float32 From 73222392f389f918272a9d96c5f623f0b13966eb Mon Sep 17 00:00:00 2001 From: carlosdanielcsantos Date: Thu, 13 Apr 2017 07:28:30 -0400 Subject: [PATCH 381/933] ENH: Rolling window endpoints inclusion closes #13965 Author: carlosdanielcsantos Author: carlosdanielcsantos Author: carlosdanielcsantos Closes #15795 from carlosdanielcsantos/rwindow-endpoints-inclusion and squashes the following commits: aad97dc [carlosdanielcsantos] Updating docs 568c12f [carlosdanielcsantos] Innocuous change to rerun tests 037b84e [carlosdanielcsantos] Fixing style c18a31b [carlosdanielcsantos] Fixing test of assertion of closed parameter in fixed windows Style corrections 90dfb0c [carlosdanielcsantos] Correcting bug in window validation 8bd336a [carlosdanielcsantos] Almost there 306b9f7 [carlosdanielcsantos] Commiting progress on default=None. Still not tested Adding computation.rst section (still not written) ec4bbc7 [carlosdanielcsantos] Changing l_closed and r_closed variable names 0e8e65c [carlosdanielcsantos] Adding doc-strings and PEP8 corrections 5eaf3b4 [carlosdanielcsantos] str closed -> object closed Adding test of assert for closed parameter Adding assert for closed parameter in get_window_indexer 2cf6804 [carlosdanielcsantos] Time-based windows working da034bf [carlosdanielcsantos] Commiting progress 34f1309 [carlosdanielcsantos] Adding window slicing endpoint inclusion selection to VariableWindowIndexer --- doc/source/computation.rst | 42 ++++++++++ doc/source/whatsnew/v0.20.0.txt | 1 + pandas/core/generic.py | 4 +- pandas/core/window.py | 46 +++++++---- pandas/core/window.pyx | 135 +++++++++++++++++++++++--------- pandas/tests/test_window.py | 45 +++++++++++ 6 files changed, 222 insertions(+), 51 deletions(-) diff --git a/doc/source/computation.rst b/doc/source/computation.rst index f46a00826a8d9..f6c912bf59b34 100644 --- a/doc/source/computation.rst +++ b/doc/source/computation.rst @@ -459,6 +459,48 @@ default of the index) in a DataFrame. dft dft.rolling('2s', on='foo').sum() +.. _stats.rolling_window.endpoints: + +Rolling Window Endpoints +~~~~~~~~~~~~~~~~~~~~~~~~ + +.. versionadded:: 0.20.0 + +The inclusion of the interval endpoints in rolling window calculations can be specified with the ``closed`` +parameter: + +.. csv-table:: + :header: "``closed``", "Description", "Default for" + :widths: 20, 30, 30 + + ``right``, close right endpoint, time-based windows + ``left``, close left endpoint, + ``both``, close both endpoints, fixed windows + ``neither``, open endpoints, + +For example, having the right endpoint open is useful in many problems that require that there is no contamination +from present information back to past information. This allows the rolling window to compute statistics +"up to that point in time", but not including that point in time. + +.. ipython:: python + + df = pd.DataFrame({'x': 1}, + index = [pd.Timestamp('20130101 09:00:01'), + pd.Timestamp('20130101 09:00:02'), + pd.Timestamp('20130101 09:00:03'), + pd.Timestamp('20130101 09:00:04'), + pd.Timestamp('20130101 09:00:06')]) + + df["right"] = df.rolling('2s', closed='right').x.sum() # default + df["both"] = df.rolling('2s', closed='both').x.sum() + df["left"] = df.rolling('2s', closed='left').x.sum() + df["neither"] = df.rolling('2s', closed='neither').x.sum() + + df + +Currently, this feature is only implemented for time-based windows. +For fixed windows, the closed parameter cannot be set and the rolling window will always have both endpoints closed. + .. _stats.moments.ts-versus-resampling: Time-aware Rolling vs. Resampling diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index c243e4ef81b38..07f393a814f8b 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -319,6 +319,7 @@ To convert a ``SparseDataFrame`` back to sparse SciPy matrix in COO format, you Other Enhancements ^^^^^^^^^^^^^^^^^^ +- ``DataFrame.rolling()`` now accepts the parameter ``closed='right'|'left'|'both'|'neither'`` to choose the rolling window endpoint closedness. See the :ref:`documentation ` (:issue:`13965`) - Integration with the ``feather-format``, including a new top-level ``pd.read_feather()`` and ``DataFrame.to_feather()`` method, see :ref:`here `. - ``Series.str.replace()`` now accepts a callable, as replacement, which is passed to ``re.sub`` (:issue:`15055`) - ``Series.str.replace()`` now accepts a compiled regular expression as a pattern (:issue:`15446`) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index ad56ea44a0dc6..86978a9739ca4 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5962,12 +5962,12 @@ def _add_series_or_dataframe_operations(cls): @Appender(rwindow.rolling.__doc__) def rolling(self, window, min_periods=None, freq=None, center=False, - win_type=None, on=None, axis=0): + win_type=None, on=None, axis=0, closed=None): axis = self._get_axis_number(axis) return rwindow.rolling(self, window=window, min_periods=min_periods, freq=freq, center=center, win_type=win_type, - on=on, axis=axis) + on=on, axis=axis, closed=closed) cls.rolling = rolling diff --git a/pandas/core/window.py b/pandas/core/window.py index 89d2f5b24d77e..5b84b075ce81a 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -56,11 +56,12 @@ class _Window(PandasObject, SelectionMixin): _attributes = ['window', 'min_periods', 'freq', 'center', 'win_type', - 'axis', 'on'] + 'axis', 'on', 'closed'] exclusions = set() def __init__(self, obj, window=None, min_periods=None, freq=None, - center=False, win_type=None, axis=0, on=None, **kwargs): + center=False, win_type=None, axis=0, on=None, closed=None, + **kwargs): if freq is not None: warnings.warn("The freq kw is deprecated and will be removed in a " @@ -71,6 +72,7 @@ def __init__(self, obj, window=None, min_periods=None, freq=None, self.blocks = [] self.obj = obj self.on = on + self.closed = closed self.window = window self.min_periods = min_periods self.freq = freq @@ -101,6 +103,10 @@ def validate(self): if self.min_periods is not None and not \ is_integer(self.min_periods): raise ValueError("min_periods must be an integer") + if self.closed is not None and self.closed not in \ + ['right', 'both', 'left', 'neither']: + raise ValueError("closed must be 'right', 'left', 'both' or " + "'neither'") def _convert_freq(self, how=None): """ resample according to the how, return a new object """ @@ -374,8 +380,14 @@ class Window(_Window): on : string, optional For a DataFrame, column on which to calculate the rolling window, rather than the index + closed : string, default None + Make the interval closed on the 'right', 'left', 'both' or + 'neither' endpoints. + For offset-based windows, it defaults to 'right'. + For fixed windows, defaults to 'both'. Remaining cases not implemented + for fixed windows. - .. versionadded:: 0.19.0 + .. versionadded:: 0.20.0 axis : int or string, default 0 @@ -717,12 +729,12 @@ def _apply(self, func, name=None, window=None, center=None, raise ValueError("we do not support this function " "in _window.{0}".format(func)) - def func(arg, window, min_periods=None): + def func(arg, window, min_periods=None, closed=None): minp = check_minp(min_periods, window) # ensure we are only rolling on floats arg = _ensure_float64(arg) return cfunc(arg, - window, minp, indexi, **kwargs) + window, minp, indexi, closed, **kwargs) # calculation function if center: @@ -731,11 +743,13 @@ def func(arg, window, min_periods=None): def calc(x): return func(np.concatenate((x, additional_nans)), - window, min_periods=self.min_periods) + window, min_periods=self.min_periods, + closed=self.closed) else: def calc(x): - return func(x, window, min_periods=self.min_periods) + return func(x, window, min_periods=self.min_periods, + closed=self.closed) with np.errstate(all='ignore'): if values.ndim > 1: @@ -768,7 +782,8 @@ def count(self): for b in blocks: result = b.notnull().astype(int) result = self._constructor(result, window=window, min_periods=0, - center=self.center).sum() + center=self.center, + closed=self.closed).sum() results.append(result) return self._wrap_results(results, blocks, obj) @@ -789,11 +804,10 @@ def apply(self, func, args=(), kwargs={}): offset = _offset(window, self.center) index, indexi = self._get_index() - def f(arg, window, min_periods): + def f(arg, window, min_periods, closed): minp = _use_window(min_periods, window) - return _window.roll_generic(arg, window, minp, indexi, - offset, func, args, - kwargs) + return _window.roll_generic(arg, window, minp, indexi, closed, + offset, func, args, kwargs) return self._apply(f, func, args=args, kwargs=kwargs, center=False) @@ -864,7 +878,7 @@ def std(self, ddof=1, *args, **kwargs): def f(arg, *args, **kwargs): minp = _require_min_periods(1)(self.min_periods, window) return _zsqrt(_window.roll_var(arg, window, minp, indexi, - ddof)) + self.closed, ddof)) return self._apply(f, 'std', check_minp=_require_min_periods(1), ddof=ddof, **kwargs) @@ -911,7 +925,7 @@ def quantile(self, quantile, **kwargs): def f(arg, *args, **kwargs): minp = _use_window(self.min_periods, window) return _window.roll_quantile(arg, window, minp, indexi, - quantile) + self.closed, quantile) return self._apply(f, 'quantile', quantile=quantile, **kwargs) @@ -1044,6 +1058,10 @@ def validate(self): elif self.window < 0: raise ValueError("window must be non-negative") + if not self.is_datetimelike and self.closed is not None: + raise ValueError("closed only implemented for datetimelike " + "and offset based windows") + def _validate_monotonic(self): """ validate on is monotonic """ if not self._on.is_monotonic: diff --git a/pandas/core/window.pyx b/pandas/core/window.pyx index a06e616002ee2..3bb8abe26c781 100644 --- a/pandas/core/window.pyx +++ b/pandas/core/window.pyx @@ -158,9 +158,14 @@ cdef class MockFixedWindowIndexer(WindowIndexer): index of the input floor: optional unit for flooring + left_closed: bint + left endpoint closedness + right_closed: bint + right endpoint closedness """ def __init__(self, ndarray input, int64_t win, int64_t minp, + bint left_closed, bint right_closed, object index=None, object floor=None): assert index is None @@ -191,9 +196,14 @@ cdef class FixedWindowIndexer(WindowIndexer): index of the input floor: optional unit for flooring the unit + left_closed: bint + left endpoint closedness + right_closed: bint + right endpoint closedness """ def __init__(self, ndarray input, int64_t win, int64_t minp, + bint left_closed, bint right_closed, object index=None, object floor=None): cdef ndarray start_s, start_e, end_s, end_e @@ -229,10 +239,16 @@ cdef class VariableWindowIndexer(WindowIndexer): min number of obs in a window to consider non-NaN index: ndarray index of the input + left_closed: bint + left endpoint closedness + True if the left endpoint is closed, False if open + right_closed: bint + right endpoint closedness + True if the right endpoint is closed, False if open """ def __init__(self, ndarray input, int64_t win, int64_t minp, - ndarray index): + bint left_closed, bint right_closed, ndarray index): self.is_variable = 1 self.N = len(index) @@ -244,12 +260,13 @@ cdef class VariableWindowIndexer(WindowIndexer): self.end = np.empty(self.N, dtype='int64') self.end.fill(-1) - self.build(index, win) + self.build(index, win, left_closed, right_closed) # max window size self.win = (self.end - self.start).max() - def build(self, ndarray[int64_t] index, int64_t win): + def build(self, ndarray[int64_t] index, int64_t win, bint left_closed, + bint right_closed): cdef: ndarray[int64_t] start, end @@ -261,7 +278,13 @@ cdef class VariableWindowIndexer(WindowIndexer): N = self.N start[0] = 0 - end[0] = 1 + + # right endpoint is closed + if right_closed: + end[0] = 1 + # right endpoint is open + else: + end[0] = 0 with nogil: @@ -271,6 +294,10 @@ cdef class VariableWindowIndexer(WindowIndexer): end_bound = index[i] start_bound = index[i] - win + # left endpoint is closed + if left_closed: + start_bound -= 1 + # advance the start bound until we are # within the constraint start[i] = i @@ -286,9 +313,13 @@ cdef class VariableWindowIndexer(WindowIndexer): else: end[i] = end[i - 1] + # right endpoint is open + if not right_closed: + end[i] -= 1 + -def get_window_indexer(input, win, minp, index, floor=None, - use_mock=True): +def get_window_indexer(input, win, minp, index, closed, + floor=None, use_mock=True): """ return the correct window indexer for the computation @@ -299,6 +330,10 @@ def get_window_indexer(input, win, minp, index, floor=None, minp: integer, minimum periods index: 1d ndarray, optional index to the input array + closed: string, default None + {'right', 'left', 'both', 'neither'} + window endpoint closedness. Defaults to 'right' in + VariableWindowIndexer and to 'both' in FixedWindowIndexer floor: optional unit for flooring the unit use_mock: boolean, default True @@ -307,18 +342,38 @@ def get_window_indexer(input, win, minp, index, floor=None, compat Indexer that allows us to use a standard code path with all of the indexers. + Returns ------- tuple of 1d int64 ndarrays of the offsets & data about the window """ + cdef: + bint left_closed = False + bint right_closed = False + + assert closed is None or closed in ['right', 'left', 'both', 'neither'] + + # if windows is variable, default is 'right', otherwise default is 'both' + if closed is None: + closed = 'right' if index is not None else 'both' + + if closed in ['right', 'both']: + right_closed = True + + if closed in ['left', 'both']: + left_closed = True + if index is not None: - indexer = VariableWindowIndexer(input, win, minp, index) + indexer = VariableWindowIndexer(input, win, minp, left_closed, + right_closed, index) elif use_mock: - indexer = MockFixedWindowIndexer(input, win, minp, index, floor) + indexer = MockFixedWindowIndexer(input, win, minp, left_closed, + right_closed, index, floor) else: - indexer = FixedWindowIndexer(input, win, minp, index, floor) + indexer = FixedWindowIndexer(input, win, minp, left_closed, + right_closed, index, floor) return indexer.get_data() # ---------------------------------------------------------------------- @@ -327,7 +382,7 @@ def get_window_indexer(input, win, minp, index, floor=None, def roll_count(ndarray[double_t] input, int64_t win, int64_t minp, - object index): + object index, object closed): cdef: double val, count_x = 0.0 int64_t s, e, nobs, N @@ -336,7 +391,7 @@ def roll_count(ndarray[double_t] input, int64_t win, int64_t minp, ndarray[double_t] output start, end, N, win, minp, _ = get_window_indexer(input, win, - minp, index) + minp, index, closed) output = np.empty(N, dtype=float) with nogil: @@ -408,7 +463,7 @@ cdef inline void remove_sum(double val, int64_t *nobs, double *sum_x) nogil: def roll_sum(ndarray[double_t] input, int64_t win, int64_t minp, - object index): + object index, object closed): cdef: double val, prev_x, sum_x = 0 int64_t s, e @@ -418,7 +473,8 @@ def roll_sum(ndarray[double_t] input, int64_t win, int64_t minp, ndarray[double_t] output start, end, N, win, minp, is_variable = get_window_indexer(input, win, - minp, index) + minp, index, + closed) output = np.empty(N, dtype=float) # for performance we are going to iterate @@ -523,7 +579,7 @@ cdef inline void remove_mean(double val, Py_ssize_t *nobs, double *sum_x, def roll_mean(ndarray[double_t] input, int64_t win, int64_t minp, - object index): + object index, object closed): cdef: double val, prev_x, result, sum_x = 0 int64_t s, e @@ -533,7 +589,8 @@ def roll_mean(ndarray[double_t] input, int64_t win, int64_t minp, ndarray[double_t] output start, end, N, win, minp, is_variable = get_window_indexer(input, win, - minp, index) + minp, index, + closed) output = np.empty(N, dtype=float) # for performance we are going to iterate @@ -647,7 +704,7 @@ cdef inline void remove_var(double val, double *nobs, double *mean_x, def roll_var(ndarray[double_t] input, int64_t win, int64_t minp, - object index, int ddof=1): + object index, object closed, int ddof=1): """ Numerically stable implementation using Welford's method. """ @@ -660,7 +717,8 @@ def roll_var(ndarray[double_t] input, int64_t win, int64_t minp, ndarray[double_t] output start, end, N, win, minp, is_variable = get_window_indexer(input, win, - minp, index) + minp, index, + closed) output = np.empty(N, dtype=float) # Check for windows larger than array, addresses #7297 @@ -789,7 +847,7 @@ cdef inline void remove_skew(double val, int64_t *nobs, double *x, double *xx, def roll_skew(ndarray[double_t] input, int64_t win, int64_t minp, - object index): + object index, object closed): cdef: double val, prev double x = 0, xx = 0, xxx = 0 @@ -800,7 +858,8 @@ def roll_skew(ndarray[double_t] input, int64_t win, int64_t minp, ndarray[double_t] output start, end, N, win, minp, is_variable = get_window_indexer(input, win, - minp, index) + minp, index, + closed) output = np.empty(N, dtype=float) if is_variable: @@ -916,7 +975,7 @@ cdef inline void remove_kurt(double val, int64_t *nobs, double *x, double *xx, def roll_kurt(ndarray[double_t] input, int64_t win, int64_t minp, - object index): + object index, object closed): cdef: double val, prev double x = 0, xx = 0, xxx = 0, xxxx = 0 @@ -927,7 +986,8 @@ def roll_kurt(ndarray[double_t] input, int64_t win, int64_t minp, ndarray[double_t] output start, end, N, win, minp, is_variable = get_window_indexer(input, win, - minp, index) + minp, index, + closed) output = np.empty(N, dtype=float) if is_variable: @@ -985,11 +1045,11 @@ def roll_kurt(ndarray[double_t] input, int64_t win, int64_t minp, def roll_median_c(ndarray[float64_t] input, int64_t win, int64_t minp, - object index): + object index, object closed): cdef: double val, res, prev - bint err=0, is_variable - int ret=0 + bint err = 0, is_variable + int ret = 0 skiplist_t *sl Py_ssize_t i, j int64_t nobs = 0, N, s, e @@ -1001,7 +1061,7 @@ def roll_median_c(ndarray[float64_t] input, int64_t win, int64_t minp, # actual skiplist ops outweigh any window computation costs start, end, N, win, minp, is_variable = get_window_indexer( input, win, - minp, index, + minp, index, closed, use_mock=False) output = np.empty(N, dtype=float) @@ -1111,7 +1171,7 @@ cdef inline numeric calc_mm(int64_t minp, Py_ssize_t nobs, def roll_max(ndarray[numeric] input, int64_t win, int64_t minp, - object index): + object index, object closed): """ Moving max of 1d array of any numeric type along axis=0 ignoring NaNs. @@ -1123,12 +1183,15 @@ def roll_max(ndarray[numeric] input, int64_t win, int64_t minp, is below this, output a NaN index: ndarray, optional index for window computation + closed: 'right', 'left', 'both', 'neither' + make the interval closed on the right, left, + both or neither endpoints """ - return _roll_min_max(input, win, minp, index, is_max=1) + return _roll_min_max(input, win, minp, index, closed=closed, is_max=1) def roll_min(ndarray[numeric] input, int64_t win, int64_t minp, - object index): + object index, object closed): """ Moving max of 1d array of any numeric type along axis=0 ignoring NaNs. @@ -1141,11 +1204,11 @@ def roll_min(ndarray[numeric] input, int64_t win, int64_t minp, index: ndarray, optional index for window computation """ - return _roll_min_max(input, win, minp, index, is_max=0) + return _roll_min_max(input, win, minp, index, is_max=0, closed=closed) cdef _roll_min_max(ndarray[numeric] input, int64_t win, int64_t minp, - object index, bint is_max): + object index, object closed, bint is_max): """ Moving min/max of 1d array of any numeric type along axis=0 ignoring NaNs. @@ -1170,7 +1233,7 @@ cdef _roll_min_max(ndarray[numeric] input, int64_t win, int64_t minp, starti, endi, N, win, minp, is_variable = get_window_indexer( input, win, - minp, index) + minp, index, closed) output = np.empty(N, dtype=input.dtype) @@ -1272,7 +1335,8 @@ cdef _roll_min_max(ndarray[numeric] input, int64_t win, int64_t minp, def roll_quantile(ndarray[float64_t, cast=True] input, int64_t win, - int64_t minp, object index, double quantile): + int64_t minp, object index, object closed, + double quantile): """ O(N log(window)) implementation using skip list """ @@ -1292,7 +1356,7 @@ def roll_quantile(ndarray[float64_t, cast=True] input, int64_t win, # actual skiplist ops outweigh any window computation costs start, end, N, win, minp, is_variable = get_window_indexer( input, win, - minp, index, + minp, index, closed, use_mock=False) output = np.empty(N, dtype=float) skiplist = IndexableSkiplist(win) @@ -1335,7 +1399,7 @@ def roll_quantile(ndarray[float64_t, cast=True] input, int64_t win, def roll_generic(ndarray[float64_t, cast=True] input, - int64_t win, int64_t minp, object index, + int64_t win, int64_t minp, object index, object closed, int offset, object func, object args, object kwargs): cdef: @@ -1355,12 +1419,13 @@ def roll_generic(ndarray[float64_t, cast=True] input, start, end, N, win, minp, is_variable = get_window_indexer(input, win, minp, index, + closed, floor=0) output = np.empty(N, dtype=float) counts = roll_sum(np.concatenate([np.isfinite(input).astype(float), np.array([0.] * offset)]), - win, minp, index)[offset:] + win, minp, index, closed)[offset:] if is_variable: diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index 9cd3b8b839a9b..29b91fb115a33 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -435,6 +435,12 @@ def test_numpy_compat(self): tm.assertRaisesRegexp(UnsupportedFunctionCall, msg, getattr(r, func), dtype=np.float64) + def test_closed(self): + df = DataFrame({'A': [0, 1, 2, 3, 4]}) + # closed only allowed for datetimelike + with pytest.raises(ValueError): + df.rolling(window=3, closed='neither') + class TestExpanding(Base): @@ -3389,6 +3395,45 @@ def test_min_periods(self): result = df.rolling('2s', min_periods=1).sum() tm.assert_frame_equal(result, expected) + def test_closed(self): + + # xref GH13965 + + df = DataFrame({'A': [1] * 5}, + index=[pd.Timestamp('20130101 09:00:01'), + pd.Timestamp('20130101 09:00:02'), + pd.Timestamp('20130101 09:00:03'), + pd.Timestamp('20130101 09:00:04'), + pd.Timestamp('20130101 09:00:06')]) + + # closed must be 'right', 'left', 'both', 'neither' + with pytest.raises(ValueError): + self.regular.rolling(window='2s', closed="blabla") + + expected = df.copy() + expected["A"] = [1.0, 2, 2, 2, 1] + result = df.rolling('2s', closed='right').sum() + tm.assert_frame_equal(result, expected) + + # default should be 'right' + result = df.rolling('2s').sum() + tm.assert_frame_equal(result, expected) + + expected = df.copy() + expected["A"] = [1.0, 2, 3, 3, 2] + result = df.rolling('2s', closed='both').sum() + tm.assert_frame_equal(result, expected) + + expected = df.copy() + expected["A"] = [np.nan, 1.0, 2, 2, 1] + result = df.rolling('2s', closed='left').sum() + tm.assert_frame_equal(result, expected) + + expected = df.copy() + expected["A"] = [np.nan, 1.0, 1, 1, np.nan] + result = df.rolling('2s', closed='neither').sum() + tm.assert_frame_equal(result, expected) + def test_ragged_sum(self): df = self.ragged From f2ed595d0b77c2e4c68edf1eae9ddca8fba42651 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Thu, 13 Apr 2017 11:41:03 +0000 Subject: [PATCH 382/933] TST: use checkstack level as per comments in groupby.agg with dicts depr testing (#15992) --- pandas/core/groupby.py | 2 +- pandas/tests/groupby/test_aggregate.py | 9 +++------ 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 5e55196803c22..5591ce4b0d4aa 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -2843,7 +2843,7 @@ def _aggregate_multiple_funcs(self, arg, _level): ("using a dict on a Series for aggregation\n" "is deprecated and will be removed in a future " "version"), - FutureWarning, stacklevel=4) + FutureWarning, stacklevel=3) columns = list(arg.keys()) arg = list(arg.items()) diff --git a/pandas/tests/groupby/test_aggregate.py b/pandas/tests/groupby/test_aggregate.py index c2d6422c50d02..22d1de99c48be 100644 --- a/pandas/tests/groupby/test_aggregate.py +++ b/pandas/tests/groupby/test_aggregate.py @@ -310,18 +310,15 @@ def test_agg_dict_renaming_deprecation(self): 'B': range(5), 'C': range(5)}) - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False) as w: + with tm.assert_produces_warning(FutureWarning) as w: df.groupby('A').agg({'B': {'foo': ['sum', 'max']}, 'C': {'bar': ['count', 'min']}}) assert "using a dict with renaming" in str(w[0].message) - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): + with tm.assert_produces_warning(FutureWarning): df.groupby('A')[['B', 'C']].agg({'ma': 'max'}) - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False) as w: + with tm.assert_produces_warning(FutureWarning) as w: df.groupby('A').B.agg({'foo': 'count'}) assert "using a dict on a Series for aggregation" in str( w[0].message) From 7ee73ffcfd1cdf896a53589eebf74557210ab26c Mon Sep 17 00:00:00 2001 From: gfyoung Date: Thu, 13 Apr 2017 18:11:33 -0400 Subject: [PATCH 383/933] BUG: Don't overflow PeriodIndex in to_csv (#15984) * BUG: Don't overflow PeriodIndex in to_csv Closes gh-15982. * TST: Test to_native_types for Period/DatetimeIndex --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/formats/format.py | 5 +- pandas/indexes/base.py | 21 +++++++- pandas/tests/frame/test_to_csv.py | 28 +++++++++++ .../tests/indexes/datetimes/test_formats.py | 47 ++++++++++++++++++ pandas/tests/indexes/period/test_formats.py | 48 +++++++++++++++++++ 6 files changed, 145 insertions(+), 5 deletions(-) create mode 100644 pandas/tests/indexes/datetimes/test_formats.py create mode 100644 pandas/tests/indexes/period/test_formats.py diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 07f393a814f8b..a105a6801fb61 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -1344,6 +1344,7 @@ I/O - Bug in ``pd.read_csv()`` in which invalid values for ``nrows`` and ``chunksize`` were allowed (:issue:`15767`) - Bug in ``pd.read_csv()`` for the Python engine in which unhelpful error messages were being raised when parsing errors occurred (:issue:`15910`) - Bug in ``pd.read_csv()`` in which the ``skipfooter`` parameter was not being properly validated (:issue:`15925`) +- Bug in ``pd.to_csv()`` in which there was numeric overflow when a timestamp index was being written (:issue:`15982`) - Bug in ``pd.tools.hashing.hash_pandas_object()`` in which hashing of categoricals depended on the ordering of categories, instead of just their values. (:issue:`15143`) - Bug in ``.to_json()`` where ``lines=True`` and contents (keys or values) contain escaped characters (:issue:`15096`) - Bug in ``.to_json()`` causing single byte ascii characters to be expanded to four byte unicode (:issue:`15344`) diff --git a/pandas/formats/format.py b/pandas/formats/format.py index 2665f5aea145d..66a81aadc4213 100644 --- a/pandas/formats/format.py +++ b/pandas/formats/format.py @@ -1564,10 +1564,7 @@ def __init__(self, obj, path_or_buf=None, sep=",", na_rep='', self.chunksize = int(chunksize) self.data_index = obj.index - if isinstance(obj.index, PeriodIndex): - self.data_index = obj.index.to_timestamp() - - if (isinstance(self.data_index, DatetimeIndex) and + if (isinstance(self.data_index, (DatetimeIndex, PeriodIndex)) and date_format is not None): self.data_index = Index([x.strftime(date_format) if notnull(x) else '' for x in self.data_index]) diff --git a/pandas/indexes/base.py b/pandas/indexes/base.py index bf7975bcdb964..ab5c01388e652 100644 --- a/pandas/indexes/base.py +++ b/pandas/indexes/base.py @@ -1820,7 +1820,26 @@ def _format_with_header(self, header, na_rep='NaN', **kwargs): return header + result def to_native_types(self, slicer=None, **kwargs): - """ slice and dice then format """ + """ + Format specified values of `self` and return them. + + Parameters + ---------- + slicer : int, array-like + An indexer into `self` that specifies which values + are used in the formatting process. + kwargs : dict + Options for specifying how the values should be formatted. + These options include the following: + + 1) na_rep : str + The value that serves as a placeholder for NULL values + 2) quoting : bool or None + Whether or not there are quoted values in `self` + 3) date_format : str + The format used to represent date-like values + """ + values = self if slicer is not None: values = values[slicer] diff --git a/pandas/tests/frame/test_to_csv.py b/pandas/tests/frame/test_to_csv.py index 927b9f6a48718..2d2dfa9a3d849 100644 --- a/pandas/tests/frame/test_to_csv.py +++ b/pandas/tests/frame/test_to_csv.py @@ -1143,3 +1143,31 @@ def test_to_csv_quoting(self): df = df.set_index(['a', 'b']) expected = '"a","b","c"\n"1","3","5"\n"2","4","6"\n' self.assertEqual(df.to_csv(quoting=csv.QUOTE_ALL), expected) + + def test_period_index_date_overflow(self): + # see gh-15982 + + dates = ["1990-01-01", "2000-01-01", "3005-01-01"] + index = pd.PeriodIndex(dates, freq="D") + + df = pd.DataFrame([4, 5, 6], index=index) + result = df.to_csv() + + expected = ',0\n1990-01-01,4\n2000-01-01,5\n3005-01-01,6\n' + assert result == expected + + date_format = "%m-%d-%Y" + result = df.to_csv(date_format=date_format) + + expected = ',0\n01-01-1990,4\n01-01-2000,5\n01-01-3005,6\n' + assert result == expected + + # Overflow with pd.NaT + dates = ["1990-01-01", pd.NaT, "3005-01-01"] + index = pd.PeriodIndex(dates, freq="D") + + df = pd.DataFrame([4, 5, 6], index=index) + result = df.to_csv() + + expected = ',0\n1990-01-01,4\n,5\n3005-01-01,6\n' + assert result == expected diff --git a/pandas/tests/indexes/datetimes/test_formats.py b/pandas/tests/indexes/datetimes/test_formats.py new file mode 100644 index 0000000000000..ea2731f66f0ef --- /dev/null +++ b/pandas/tests/indexes/datetimes/test_formats.py @@ -0,0 +1,47 @@ +from pandas import DatetimeIndex + +import numpy as np + +import pandas.util.testing as tm +import pandas as pd + + +def test_to_native_types(): + index = DatetimeIndex(freq='1D', periods=3, start='2017-01-01') + + # First, with no arguments. + expected = np.array(['2017-01-01', '2017-01-02', + '2017-01-03'], dtype=object) + + result = index.to_native_types() + tm.assert_numpy_array_equal(result, expected) + + # No NaN values, so na_rep has no effect + result = index.to_native_types(na_rep='pandas') + tm.assert_numpy_array_equal(result, expected) + + # Make sure slicing works + expected = np.array(['2017-01-01', '2017-01-03'], dtype=object) + + result = index.to_native_types([0, 2]) + tm.assert_numpy_array_equal(result, expected) + + # Make sure date formatting works + expected = np.array(['01-2017-01', '01-2017-02', + '01-2017-03'], dtype=object) + + result = index.to_native_types(date_format='%m-%Y-%d') + tm.assert_numpy_array_equal(result, expected) + + # NULL object handling should work + index = DatetimeIndex(['2017-01-01', pd.NaT, '2017-01-03']) + expected = np.array(['2017-01-01', 'NaT', '2017-01-03'], dtype=object) + + result = index.to_native_types() + tm.assert_numpy_array_equal(result, expected) + + expected = np.array(['2017-01-01', 'pandas', + '2017-01-03'], dtype=object) + + result = index.to_native_types(na_rep='pandas') + tm.assert_numpy_array_equal(result, expected) diff --git a/pandas/tests/indexes/period/test_formats.py b/pandas/tests/indexes/period/test_formats.py new file mode 100644 index 0000000000000..533481ce051f7 --- /dev/null +++ b/pandas/tests/indexes/period/test_formats.py @@ -0,0 +1,48 @@ +from pandas import PeriodIndex + +import numpy as np + +import pandas.util.testing as tm +import pandas as pd + + +def test_to_native_types(): + index = PeriodIndex(['2017-01-01', '2017-01-02', + '2017-01-03'], freq='D') + + # First, with no arguments. + expected = np.array(['2017-01-01', '2017-01-02', + '2017-01-03'], dtype=' Date: Thu, 13 Apr 2017 18:47:39 -0400 Subject: [PATCH 384/933] DOC: Demo incorporating pytest into test classes (#15989) * MAINT: Change CI picture to reflect non-owner * DOC: Demo using pytest features in test classes --- doc/source/_static/ci.png | Bin 224055 -> 374599 bytes doc/source/contributing.rst | 33 ++++++++++++++++++++++----------- 2 files changed, 22 insertions(+), 11 deletions(-) diff --git a/doc/source/_static/ci.png b/doc/source/_static/ci.png index 82985ff8c204abcae6eead66d676cb880f92d3af..4570ed21555860ad5c8ab41f159e78e492334e82 100644 GIT binary patch literal 374599 zcmeFXV{m10^Da7*2`08RadvD?>||owwzK1jZSB~$ZS2^b*tRFS^S<|+O`@2clmoGoQNQnxm05i`wmJv=b+dAA_aR`RrLXCVJxZ3apz$IbFp z{cN>kkEN2Mi;L{5-87kY93xNA9()X=L?@4~;2lR%m6uI*bDK7^MeEj3dnJKnSRkNl z&)5zr=H!4~e3&M~eQ9OGcD~Ys>x~tNY_Q1HjdOfm`;N&4 z5%IaXrjdr_RxH-WY2tx;XVNuo!K&7)X7W)D5vJDf%JJ@?P2Q8P8maXJk(;2mzgoG& zIkoA6p2Is4^GKWD@djJH5Owt}-D2(_pLKuNv)?2h^&qa|;FDRJwzaCGva0IZV4&nc z+~nPdk`w#f-Izff*RuKx;XMlud$Ze8+vg5Pl@+W!%vr}fH|qZzv0?U-+Z6u|MSHRM z+yK_yOY?Jqj>D^Ln2EKVnK#F1Vn9w)z0k{33CR{*vINGRbHL?Mr?1=C=gY~L%MbRd zoQ3By)sl;2w)Lys5h#@S5TEKT-_gy_J9s@R?Mxox+fjYm&SW^f$$`HOu>}e=RB6aK zT^~^ck+{=M#LU*K23qTS4>K=z3=K3+x3v;;UgAf*?-Kq{sc2}{D{EUb;6u+x{ZJ$%2+b; z;L}l!NU_cQc0XLAz)Eo$TXdQ6q~{|_pD*X;MM@Mz-3#fAS!wJ0NX*~AVY8R?OCudp z@Bgz)4*_iF;8PBbpecsfm=T9Le>yrAWw>U)qb?qMXo9&9tK&VmTe*T)xQI2M+MH`y zDp{=RP?_c@4jxYLXKU~K8Ri<47(rYxYrl$dX6h_`sQ6I7y9=9aqF&x^Y`)4@kEk(- z048JJK%Iut1@AEm_<}G+4`~#^BrC4$K=pcN*>`!OtDaYK76sVIA4V)%ahN=6T`}6X zj;^(?Gp6~c_+r`dVAcqWVY((9b5rl>Vnq%|n>}}LD!36qeH4&|1j(3mfWox`7$deg zTkC>&@w2-0B`^shSr`=^%-!&X#dJHFkaw#e__X#|l~v^Fu6rBB6#;98I>uB-V@kI+ zIEUehhAuxms3?5AtSA?rV60TfCW8z5G#aNZp}q~E8y~sN>{Ph=7<)G@B`uJoD!ikG z&LZ2qbnojNaG}~)(?G%8T1=w@>t@q~kgHD!FPyQ=&pYVVh|ihiS(Q@iR&rdwCkqiJ zK$3Z4Q)LoPaWyW%-p0iSL_q}qeYEaK`BahSAZG31DR$>QIA)@%VC25`8@qt zVoalu#Sq@x{)bQi$fx=oAhX@nRVo&UW$Ns+}_!?oG5{gA<5u%3TChEp6|^o<&> zNI}}?uFE<-ruyVKUDy;gatGu;t}m8 zn%uCw?`cuy>pHQpO7I8lj$^7zr&#P;qMtD|J;5l#VSd*CO6-+CL_{2EHWQb?-t`M= zR^i0LT7FyIuspxUPKvqVZ3&`9vK+t+jPMGdP51}0d<-U*v+d$%E8g*60RhLVh$xpi z(^>9()?oD;-E*zTSod+8<}BjJ$hv2@GKgfk(W8IIPkMM%qMoT<@@<+Oxj_jKDo&W| z=+Jm7CmxDtMXw=_6;HLOJ?ElP7bDoWcfOoPXxhzr-v%ww=eTCLC+hB6(?m*1V2gHr ziz#;1qra?e(>vPhp!m~#^AjS^y-$wE+|8|b`!wC!KI`Q4+mgDKw*d(B__TANCPXB} zV!;2MyZyJ=VQt--$wtMV)i{U6*6B?|5HBL=9!7^iFR_DS?CDa`I;jj7cgbk*coP0^ z7xQw4{D``}bH#~skCD;X%3JCz9h)Cv#{+5$vE7|B6?yfgLYt~_iJekC+ecHwGBIRw zegv)~ZyqO78&SzbInA^;yw`63?BYpuuG{9cZx%(-7yoD-rdgpCgI`VVQN#Uy^)BgE(M8h8H<1ohq zvVcK@U46O#OCZtN_;ijq+0;`_YiGK#{bqi%SLkBMPzbI6vXT%gQnO?T(^ zw$oJp%*Z?C1M+A3g;(tJOBQA9G9ql}hXpqZTJ&_=^INJ;mX70}zp#PYpYWeMa^X34 zZ5jeeukr_R@+6lte8BOo&kdrIJGjwdsz!I>o1E+gV5jYNC;Wc_+|5yZ?=SBwKypES z9NjtVX|{9gQ^=lJv=p~FAM@%;`Ham=(*41&Rbp+5ym~0H1ri)NE}mOlY}~3UENiP( z&ja@<__8Q(X3A4S)?>>~&4p|AU+a2L8mX$&yYl%d74LJ*J1+eXP2#11KkjN$w-}tb ziI6^aJQ0cA5=K84IILUgO3J&F-362V;XtwWTztG{IBPa`=e;}&lkQ?%w}%gi$>t_P zIj!!3EiJ?2mVpZnZ9NC?{$4#Aa>DYVkes^E6rp)JyJ{q_*)mAwHQ#Dlr;S+lZgq?^ z)#|h<(lg-TqwU?fsDBhm9!*I2FvQA9jm!Qy)5c*Z)}Nlu$Gsaj_B3D4w6HFGl@an>9omHr+8DFZ_XuX}(aCLNDW-UhKD+CpHn*5_QcrSee*A$zc|DUC)& zE+w3BP?(kvY{^#Vw3S)wYo_B}?qy3+9ngx;yA9)w)(ip|mMf$L8fqSk#iWUuXbLF_ z`hQK2Hq}lVLyECAh*K^ED59V-Lds(^1 z^tNadRynD}#}~mW;-G>7nt3CtF=oS%1-~^x2|l|1w*u?EryI-J2~;GO2`CiYis?NS zd2=%^=o^i-gfL*&T7+-EzD3(;QncYP6YI}j=r}IrII~NoS)f;H=FIl)p0e~Y+sX{| z-O+h(4YMG!6>a)#0i@DV#9*b;@1f!STzsx&S;FehOpmhTxm zGIL^2+=K&GG0Mrr-|^d1k>Fiu+Qap4>vUn(*qGRZ)yNeF>9DSbSLuGx@uKy}HdXDd zxoVllmX{e?>+6>UkWNeXhmj*lVVbmukHL?y&LGqhu*nY4peg(VMT*B*DQ8Ar3^94e z*5goOV%c%z0LRRPV(!)D5@yk3-bx zNYo@-a?bUvl~KHu(!sqw8uIVU1d&{I(t#eh2W5k0gM|Bf=U2$Z36@ z;ZG^s4UM@UZ#U;KDFK=ZwHoAw!H<`CJmYo7*LKrF50@xVVt5HdsE%4FCLG1KdmdOZ z#=go`ohrX0qzYiWDt;k7RbR72HmyAYQuAJpo;cpKX$8ghPl7%cp}?l)6osYluT3T=MDo8^C|Df*a zgL>!3ZCr~x{(WYSgZ{W@mQxmONQ8Tuy1CkduYcA{1oRJRDjO`VHBu-qmIu8=VH%c?)wN*Mp`{rr_XeqFbC&FV#(8Ms zV~V9Q$^*Z8&O~XEgjv8|#xFU!j!bv(y%}>NB;1BjgLk=gi-rBx&f^l?)AIPjFG@h>F+3z?n&K@OswI)S6I?kah1}j1+QJ9SZo=>s#_aF}}j92L*+TnxqSO=U_7m8XGx;d{ELsyf_Py4*98 z%O%#nc7PN`mvsn4b?oF9?@5+E!6jTCA09aH%TaWJ}beb!Tq)z5DRa#U4{Uia#i zm+J=uA;Zn+icyT92L`T=IwVz{_qBdt zvOoXr8C-dBS+DK9a_jfH+9P;?s_33GI;0&_gPx73US?}|vtJabQ=c)SofS+^mq zD|A^ARv$LMtoH1?T!dh=-4tCNc8PH;jM*e6r!*@5*^VEAov|`(^I-jr;p%Dyc0#3P zaC1t8|Y#Lv(JK`|8g zobb-5f9>~9Y`@(%&15G~nB`&-t;c$a4BQwWE&sW2N%6+zWLcf0sY`GCrF>+z9(Qy< z;cTD=_;QBb$&Fv@&TOdHYND3>cZtkSpzc}mvcCRQv1;|$0>?v=U%+VdJ@Y6h>+ZAr zJK)k+W^WdzlCE0g=WA5tIRA7zfYr`$iRfm#AnGuSqhbvyCA|)m?fiDG?)|qV$B`+L zYZ@*IVzNp}cVVT{Cu~dSfXdQesb^zeUC{*fWIy2M#rZL(duMeMN}}s`MKCGnn1x-| zgM@(TlX2+Cy(K5_7zH|^7?$k!_E()NzJmt&>6e2u=Dfw>O0|_u8V!5KrC!ZL#4Bu+#2v~dm?Cmc8OV(~v zjQUTYBa|~FwLPTC3yC)_=b1HpP_@Jc^iO1kSxio`~CK4XvDXsgnbqx}J zMk_^5yIwAs>~<7CM&D=D@6#UK$%ThPBUUa0r<(7|NxAMTd1Bb-_VM5HlWJ>us!a;z zx!=9Wm#14j9|C$lUr87T5IGuLFFn>4YywsJ$cc5CxL+d%YLK3Xj=(yyMs~tb2Z-xS zgqOTAVdb__(#iEjiuG^^EcFh92A=>Jzt3O%gtksbHqKYsQnx?M=DH3~YUEl^8F8@j zrY>Vw=$4*yf2Z8Q9sQ-v{-_`SeD}z?At#k*bxKZ5WIC3?eUh7fFr`THV=Oci5VW?v zOH`rfd~DP~m@nM-kDy^2Lt@tkznic5e>VT4Qzq{c|I z=xNSkpRY^cn(AXl1C)IGw$CYgK8AiFi8I)CO$PqKGF>*>V11S)N%>bJZ?=PGGyDwv z1onF}H_Yz}0_Mb%mpW)XNItl($~7W`$U)3q=Pd1bEbwC%7qUzI&!A8NkBP2DSvNdP zwXu4?<($HiA6W?@tRaR|t~Pt^M6|R-zLsq{$^W5P2YZ4b^>)2vzT$dd)qy5)-HIXO z!l_m=@oEUekJ5Co2jRPv8v1^kqQYu#ExFOujh<5vIQ^!y>!MF+F&1T7< z$BDv*RFuolRlu{L4B6Y|)Jmc?wW^j{rJI@Kw&XHqLBxrI=$DD7v0ZJ4C!D9R6aQu>Y#E?{Va;)Vep+3|+?6KLt$ zrd_(lstrtc05LXWHmO#keG`uuTl%#-6AvLZ!I)^f^rSJ$n4)+IfM!yHDZnE>QCpR* zM7&EcU$p^%iz>cu&J)GeF)LZ8u~n|%22q^qQ!VecuCpd{2%DOU}IWf4Y$xH9K5EOcV&@Gjsknq#d^j#o2>Y~<`wnEaM^Z9D+3Yy-p9 zY}CoL3*$(b@0>faV9Y_Ru&OVGlLiy23@RcShQ2G1#gnS-$-eZtUvKzK^hK?!o;HY}JR8>0377_ehQe~F8wl57xved^NBEjUeg@lqigo6Q4G zRAFR2a1J1P7KW$bm$20*8mt4xQ!^I{^)A#LGceuW1GG3uxqK&AH=Q>-xlFHIro!No zU)LH{f<$DhkwLorM>8>&)mlLzqjE!LV*1T>^R5}BlVM=vLvScvLQq)*jvNVVdSeTL z)+3ghW%9)$oa*x60)opV_))v^e0}dDRyb2J^lzs`3-LTWdOkUc9DMy-PEf`RL_PP; zaa0TAqsRL$NFDq&RS33g)5Lu(m`jlUaDPw4ZWDB#1~|Btpu0u;`3Nd?AM0B~*XulX zm8=iUcGsyXbXoUpJ1=k{cg3InvnvLCi>J zr^07E9*(H%UY820xe9fG9z{$%^U>?(*+2OBz1Jq8)G=vBG2Flk-N5a56*V8|+BwW2 zmaoyK-QP^W)*;l?)jzet9+C(u;q{CI*# z&dAn;*JldX7^qrR+AdhI(`slXYZ(V_OhVIkON~N`EYD1!wfBsks`VC75rz0nxd4X@ z&}Oqf)$rw-I>&l^?DsWgI=_Qt**t?o3M_3nNS4iX9%91}RZm@Wl&rt8XXH-9M8OzO zRb#I|Ii25x*~)Y(Th4x|PK>MNc@ zgjh3JarIf-c+A64QfiyFYJa_Q4Q~WdOf^m zSERerYlAwAK$WFYG%-Bn5r(bn!$@c`kgy%s86C}-D%1B2f5YQ(bNdT-KI77Vv_v1Mv_s-)8`G<1^ZMRJCKm2rf6LC= zH>;GuWx6R)bXPoB)5US4`_1d8d7cm+qa-$LgV-ROOqg*roeCrrosok#p-7SFPJjao z1;}(lIZ~247eU5%n!B7JnJTCNU%aG->%0S@3F6m$imf&wamie+7GU<8UfL}tB}NKM znI|xFmMxHUXa=6e6a^b8Sv>90^IJ)nLIblS3QLR=mQ?pK^{XRO@7%T#NUCAvJ+_!NXu3Y0^Y#gWHG zpd^WW-tehln{kn83EHwyV$fc=fVeyB5^n%mESLgM6%cfXX~fjW)!N#%`>Qz0)My)N zI83MC4%=>X{0^r92~e7BrW{$UI8cy|auEAtUt|!*hblO!RmT>5A|rGS4p}8Bbz)iC z2M8h{#&z_{DuGXxS!X!avDy{N<*Jq*(CkM~RYXeE$??$W`J?QKvvG2a23)_w;R#Jw zR!oUo_7!EQ_9Npu8za=L=9tVXMJ4W$^{>xcr^kH3+6NUPC#ChgjS3u`tmBw%WAL6l)Uf5gLkN1n{k^C!*tT$ z(V9Bd(!;fFm8xrIES5CvUl+U)m4ihSQ8)IGx-z)YtE9B_jvm#*vnJ8Hlk;|e-#j2L zQ>7vZN2WE&Rq5_cxrG-KqUsYcw_Q5pdQHH=FDnA&dMBS9)@2Cc0R90!khlb*p< z?Xs)vGfe@c>FCpcC!m*gHYQHwjdtB_t!1~s^j;{z7zaeJVBc*SgKWU{9@bB9{pd>%rLP{QlLZlnZ{xLT_2sjMd_*7QNy_uNt!^ zlj+I@OY6xzGE1@IaJ^6ouJ>-ZiqB2OzOQH5?3dl^9p<#g~BmM zbJk7=iJ=v%P7?L1zJLqE>uSkP+|B{&5e!u(GATm|cbQopE+q%64m-sHy|A|8r$`RD zyd;(S1Y@@IocxQ`F={ZBFKWR7vtvq}1!Y;2^y<@UsY&AoXqNZT3=LI`TguRYy`|Jf zVE>YyaErY*il87Y%$(0tqJ~@G%86g@7#cTtZbk-qzroOQ*iS<8o9v06)PlO%Je|dMAQ5lI*8G(Mt744Q(SMCxE ziJ78YHm3;@Tb=Q{{b5kzZzd!um(YccUYqru5@JiPA{O6uD#C1M58i{5j36c{fEOSR zt?1v_0=)vQ_^P4qM2X&#d=F~JKn_Dp)KXHbJy5Y{plgGjzqSH&N$|7PU?uI=N}g=N z+o?jTBgMgkW80<&{Gec*J|*QE{X=!oF{@skwEbkp!=v}XV2UY3vwS2HVxY`$vBk2= z_U`xt?jR%-{>o@<>*e6zcs3BQ*RU)jVV=o=7U6=seJq!DdZ>%nVoOxHsu2xg>o98I z7dzhuj(-pPQyyZfeywY~sz)6u1vZyySMThucE+u#ZRiM4cVwd5YbobW@!~G?9&=gQ zxkwFlX*XtB7Hq@vsHT779)n{W=+kYJHND{YnZZLb-8)juB#`Ugy&d^2BY0^a9lYzd zT43}0qnd|`Ak!IGdUro^w6kZM4TP3;_s4a?(Q^a7l>c(asij_EF#*uIln6g8eN*FT z@u*cDD5u-ggDPeFfy{8s==A6bFN0PbPud6N9y~mZtCJ@8s#lAG6#vv#hHqhk;);sv`E{Ur#8V5#j3N7HHQmH$4`Nt z{~8j8WN9ei=NJ5}>8Ppy*|Fb+ZTq@* zy3a9ZXThjExYNfP?T}}6PnCCC0^b#+PvGpvy?xAMKwxI_n?>3}F7WU#)=%WidSf6? zdx-7v=Y)xdoT4b?JAYRJs~tl-0=Bc6;?(^CzhlDhKa{j}2&)Z2F6)M94Bs1qG92X6 z4}(1X6*q>hFC3mOv52{`LX7jz#$kJgwv#SjC_=H(W7@yM#u@!SF@AAfvb%CP>z=fo z?BzhH+-ge)uBM<>9_Xr2C(MS?^^kVAgf~0Kig)x!aO|5g?JQnnW}$~-o4eO7FO^jl zwT}MIc*iDqKV3d2ggQqgC`Wc{eZP}m;i$F<0lP4U@bY;NOR-37{4v^Dc0kNh8xm}x z!IAq0zOPLPy5CKDu^1ewaM)W~S+}^^R?Dz5MO@Z1we<;JtdEQg&r`ptXlcER57*Z{ z(EFsVuF=1K8Apvw%sh90XV`~VS*JKAu6;VN_I)^0j@iY7_shuIpwIT689u#8%=R4; zx=Jc&QQpj*QR4`I$kaN>Y;_+zTR!ssaGDV7meSiYISN#D<7A7l>p%V>8jTg_r=8(S z7Z_#2W6=7U5Q)vm)t%{-oC3JuH!?QQfVqtOvm4RU74|O4-100No=Wp0tN)FDV2U=o zoA>ZVQ+m25*4aEU_EKgA5dFLx$zU&E)5^n3c7M5x!$EA@rlLzcuWN4VH5Tj3A;fA# zsW?_?j@o>w-2LzyjGXcIW(T#b-^a+Vsa-Muh>+2e6LX5R*P9YVpjE0o-@-vCN1rTZHlEb#yqugWugOR57pAf5^3Du5&tCfxbsCTvY9Oi zP=MId$SWX&164&u@1WizVc*Ats4MsWR@f}l@>&S-b^qw}o!YEWe|uuAZk<>LEfC)U zTl#x??9ucIoErRk7bmqJk1!9Nvy$6Ix=xI=p^MG-cqmPg32X(ML5_}QtoS@h>DEch zS9O{-wR-HOHMWWyRt-xwGmXT`<&7_O^Cdb0rl0Ea+?n!R(N_D1SlUmQH}g4HwX@aR z#@niEt=8I&+WfLl(*P+3$s;mKnCaH%TX?<82cT!NV*4jah5KcLcG@f*^nGm2ikI)G z!`80nCMCSK4F{sj@&Fq~jP&SS6qEGGZWLQYIGLCqL{oF@Rq>#dG{zu#xYW)9(>hIz z;a={0%u%kpzd9BwQnlok_&r=O9BYIy%)oG2jpCMXZE_5cp08 z&cp5o;-m|v-jwywI^VMyFicl6F(tZ-|^)T@$;eQ{9i!CZGGUo z5q*YzZfF&q2=bgTbpglEj_iRShf#Niy{>VI7+A;r(b~lIzE`lZL>mW3G5$wq7DHjH z+i{$q*8qLKGY1=9qq-Z`kGm}!CEHaEamnuEIgeLN`OmYG&P%V%7lEV8#Ev)nPe;TG z9+TViD=y#106(0jR>@xrXEW8}Q4-EP%gX)hVr{Z`9r7!GXQzQcd9X7AuY|~agz4GC zC6T&)_x8NGdM7U)tDFp4MNrRNaa~gY-u-CJRQT8v!0nSPmIyz|J5ZXPBI7BzN{FH!jogcARGyLW?~ZeqUAB+X#GI-eB) zPSxn^-`yMZP7254FchUrSTB+tBeub!8SS05g&r7X@n$FJT{%17r!(pi-o<+9iYp4m z3HwLer2wh~avhJ1iF|6uKTow*QbH7E!iJAB1wPrmruD9GJ^NOV(Eyk{&1xPKLu_TDEx=+HAMr4U(pxAsbIwg85cMFzAXJw4qQ;zsw_*2whUh- z7n&4@eE-_eYnt7z5fW3)(&!?cEpz+6h6@jF0j?XNLM|>p8qg zO+}4>03I}1buE@OFWd|0i=K4s5>|_~(1s5%=+9j%`uSafmfb;@%^0_^O*|lN5)|_q zR6rEyD4L4^_Y2(A=njAE9APnbUpyY2H%n7v%teVQGVor4NvN-WP3~+~@HOy`w%U$z zT@(tQ4nZ3%m>2PV7j|#(j5gDi;B>Wl-8enzA7r5?SnH(|Ua%Rq;o7bz zW`_OILQjA^q6|k*raz$(B{31m(tvUQnbXDjT176^@aAi0*8Td>p?`_MvJ^E@o({c$ z$M-$p!;TN(hXthJ56j}kG}X4D{;bZ<*WFh$&(?{t%JPZx>dWEWnJ{!@a+KVTw2Th= zt(R`+L1pMS#-TgcZ?a;#>Pczh8qhI~wW19bHY6mvlOtB43iq(K$SviiXJKqppotQb z6`L5_IWq|^g`>f7?(v(T7qvPRpO9#wz?%lhFgvF)n*qrupl78jckR9rQUYXs?Kz+Zj?{K)17HiRyL6;Nm>C9EzgF|iW)D%LYzlg zhKEpPg(ME8G=g+9j=`*QoFh_BXid6o1*HZ%>I$8sEgugE=^tE+(Od_go2=RANtDnb z@@Nwn*jqHpiBSQAl?Yg#53*EI{*FG6iqkL1!F^&i+PoAM&F{v%|5kcvPTv-#yVODg zRSaO34UdB}1Zfoaa|o$vms<>Y@)+LMn$!a%o5BqNHg#U1=~lAc9JpJrIFjp$DDy70 zLv{lDehmy` z=07>$i*E`4xS!(5YRaqUAP}x`glF3ZlE1KU30AZXS9iX&=cUs{*B}wr)acdMa=p?F57S2u3ODU_gT|aZ|Xbz>^XY$jM)kYt|=|}``79OZ3s(waXt-Em@4l!3HO`V zaC`WU*k>ajLlMfL&e(+NJyU>!XTBTR|LhF3eqBikJX(d6CW}NIdJH)@4~ca*u3kz20(<*L@#XY#Zr9KQhtnJ- z25p)Sd_5> z0-O6&el+&R5rvQ)U%Y7O#m0p_{zWc~1Z*z+_@r7ag8buPp+YPm!_BRORs&b*&fQ+k z^ph_@G3@1r&#KKd`=Bxvc9w?n$u5m|^Loc_%7^0zA(8X*IXmNKrn8M)U)HrjiZ@W_ z*1@lDW6%C%nw6*6zyh~bh0Rqe#cVJGRB7X8-K*RYWN8RP5@Wqb<4=YcMOiFYB0~h$ z(UEV{lw5k`hbc(yd^@Kv`3UhPZR2pNu3^xiBLZ#lI?Vqxa^%?7Y?0`|cB;3(y&>5h zcucXqkJ3zN7;)n5qS&;q<$;O%Lj)ri%$`%**0a5H8J}S($BO_v^>1BT zHnhbaGCAWmZd0-3*XF<0r^n8@g=?4TV!mFMN7x_@Z5b6*{zW%_#~1|D2y@i=e9kH- zK?37XdhX^JV%tAMqClpzbua>pYH>RrTWQ_~|I zq;Hj8Y(2v*R&V6%1%5vOy-H11o-z|;-tOK_d(6k+sKiE%M?G(_6LZAs=;$)>TY$`9 zffT;3q4qJj7q9z{qa2phR^bR*EB>*@X&St$DW9g_ z+`WQ}^XVu&=6I`oAe1>Rq+PnrUY>sg5dKVkmk^2rCyfRi(CzH-OK2HUiCOlPZ>{YOu=c6tg?2|RXT)qRjbW*AFWmo3K5fETo_FU@|e|?wYpL?9n7i1 zftRQ?l~dT)7Bf{TfCP}wU05Nr2H{Z_?B>2c;0tH=M4AE-P=Sf@ifv66Y~cXRU_s z-6r0FqjZafA<;}w48w9$)qiT!VrI3_OZjvxXRFaVjfULXc6Jyl?juye7__yKUCD zn&X4gYJ;(PD*x2SwiC^ zXKDHstn_?3v(bo}ahpd-wE55o3U!+&sPD&E~XKa_dALVGtRx{(tS2Fg-V8JqjXNUDB4 z;rRC$;f}{wvI;YF*|d%Bqn++upY=dBNj*J*;bP}r zONC~z)T(h^iqO6oUM(X)5>c^u-08Z~lNDF#1KteaAeX}!@q5PWe!fvt|tK&(t-`6DTJ)TrD=9AjAq9o|qnJEzHl zThLR)8q4yxDu}v$4h<Wsbq;f?Ub6bZRu<}^u?me8Lpe+2eb>5Ka1UI8J3Z(-Fy2KGrHnKORS>k zNV8)!<%=hs?WjdRKQQ(P&l`k(_Fsi;@ zaC#AVVSSS@;DBvkVIiu)mA?$PL#u}2PrhMs{&rK+%?WGDZ_bl;eYugOxu< zh0`zok&Ma{W@kN|A*X4e{_&rKN_X zm&!s|f&jzN+_N3Y8`{X}#F|95GOf^7i6h>#Kb@BVm%1O3YU`Wes3Y}CmB~rpoUxGi zo_4OnRVaxT>-sa8l!|mNUMjB3+fIAOV|AR>1ahf1J;4;wBq>2HZ7#gVrDLu6BxpPP z!A*0R^9Mp<&DlyUx-CBg#cB=uCGXDYRcE?KuO43c<6?1aiaEHOSp8aeE`xnM;6u96 zNA{xCBp5h836@CFO*A1_tN@HILd7D(E{hRw*gE?D$s?vAAS26&^#Bqxr`g1-1s|r4 zczi>EH*B_ahwcP23(}6>&Pg!sFOH-0du+ z59bed{mpe|DR_=V(91UB16f9!N6_U*$j;5LZm!kIDmg1fTdV9A;*ld< zcTQ)s3#bf{)ZJcDY50N0c%=`;;u$x2F8+j+_NgAoajdgnu+B~x7_VDSCL`UxFB7Xv} z+tYLtO^4+tz4#0c(yT7ICsiWiTa}r@Ld~&({B2)=R0fIOb(DE1 z)!XADE@LN$l@Uvimb0JRB66WHVc^O^axD&K7e23ZIprb`Z9TZ{YA;FQ}za*{Z1xJZAwiu*C(iXg{EW>+JO zTqn)$dPTD9Z;>4Ix{U5$jxsY{JbX0@s+KkOb!0akS+~&a)x5pXZzf=YC$veX#;^Q# z+q*nJg+&*L#xA#8UhiRighgi1(AL-#`=8^7*~!0S&!a+y2)2l@O!u{56r%`alwX%9 zhx^z1sIB+t1FPLax-Gmcfp+? zAJ1uh=0EuLR~Z^^!4u1Dv!Ef4T}uBp?tocy1o^G&p;cI_s-Rpp=4QZjcV>NBu?&`S zJs9A)kVo(r_>Kj0M07zO{0g%)oA7EowyDO9;kKtet@~x%XsQw;_cxFQG$8Z0*LK;jPg$A=&xH zIMlz4cqo;|2n_{z3m$F-oE}%XKkv2(9sb;1%laQ^NP@xjkkt+fzf16VHaPOGwx#{c zWP&bsAZ>Xzhyo2$IiOP%zANXEzl}dLAhriklE{-0mB2nJuj~`bd~$;&%-;?lSf{io zE-FQsa+J>Bv-8)TTDnMLAyQsQ>K(6>3M=4t^T{rO^5yd1|HhJ2;9tZ zfpwC}XR#&e~ z5Wj##JN*O>N3LoSZA}U+@nknv_9uVm^6kD@MiwBI(ok;Mv_22eM;`k>cn%(T{>DJe zYck>S-Omt+(p=-?0!|s0OiKNL4J*xOj&z+muN?3 z-XD|DuUG~*tU>B^xmtdc&xcQMJ^Fgr2mdezpC~~L@Wp7vSKYwwYLp6la55z<1n-;j z>dFAeg-BrT)(&UnXGFZ+GY4Fr`&dPf*LdXgqemTfK?3Z9u9P<)Rkm&Gt7pHFMf_!p zLI(R;hL_qCJV#o%g)0}=iKCm%dbbN^XaKm|5wUz*mdo|T>(ms1}XM3 zuMOq;rnq;81V*vE3ZZd!D%anwe)FaPGZAA@;U@0T4!+(!!qi*TT21%iZUy}CxHi^S z$hu|#c-jkL?Z>S<-DS`I(^mJMfFJi!HId@~1J*z(zYK?*pY=HzIe6uaYI@1XddXOq zq=54i>ESxhiKlDgSw?9bVuXiyZb(-`VSu24CPh~pDmfqy-*U3&^LO0}vAO5=58v=G zy=MN6!TW7WzwO|(4$=0EPJF?9Hs-^$B=(MIA0E>UEuQ%q)qKpwKfYs;&e&dDa_P4j zA1jVM$Tetoi8Bo*xiezZ|ALo8#fw(P^8I@*zxv4C{E_AN-*W4?j89#T_B3XFL;d&_ zUw`|K|NIYQ9`t~;eazlrj~Aytt!9CkU2rpBbK6uD?9Qq?@^?!X&$%DZxOhmIRRhG{ zXFNalI2lRw^oq^Jd+t|{T;Kkl$I#?%eoAOI7>jR-KK=vmuf}}6RJgBy!`aC*PQ!)+ z8L@s`aCI?3GzsZ^#cI(dJ+P>-{wR6>37_x@pYREv@b3+u2(M508^mtguJtavFbf4qhqxgbv*Whkgne&k5k-wL$Xz@x6hcz6>Uqjp`OT1?nWB;@xAm54e9tyF zeuY5z`L1a#sT&)oHQ?avoEJwfAsdogljPkc@jPduZ8GZ+Z@KN0KgNu+YgY3)H}eTS-(#I)6 z8aF&n*Ie9XIL|{y$FDd!X!E9H&}x=6vB%}RD`xL+n5G%y`3X(iqm8EjL`)cl#16Z3 zPR=Mrf!38&{C9kph2dqX|lut+9kNz5#1=qddp*`dMm zTZD(t*gxv>+^caWCXe$En5&$-Ob{0>oc007FZ%S|6rUyO{g&0cJBql+Dk*3M4F^9Z zYn}p*mg^G?L;R-A$BoB?0i)57=g<0d^9|x=O`X_ms~*bg(;oCWK6bH57#PYyg(LjYIj1i}UiHB#CnQrzG5g5PmHPx%j4C3Q>w@_T=sC29EyA{gEsbAZe6$9wF_eW0hxq$v92|IjerQ5I zAzIv1EkCf1CZwjt{kFi=n*Lu+cwVf)2U`d{wR_8Wa&Hp?+*XLzL)z1f zmKD(iZ4OTlIXWFNGE1Ct&Q`Gd8|hY%S<=qz3_ZX#z1T}a9>|dGJ4W0-ue}Ei5 zr?-E`*6U!6A~)B}wiypGtZf(VhDd4RmMJs_1s;vz z;Yb6s$ce`*R=pK*)F;Qpk_tmwNTE=MiBKIZ|CrI?OMd&iFZklP%c%w4&qgi`kOCrhOK6}RKtG7ClMIwL%JT617Gb-EBgG%drl!a!vxsW*NF2>r7CBob=(b$Uz(VVa zIx9fg;Q2@kq>fo{BjSgI<(@;eZz4Pk({->N3rh)1DG}I6)5G;!wAyXjK){~`xs08U zCV(XcN*UCFhv(Qhrb7O6tUe5ZFdfXGhto0%S}sj&p_IVHrR{t4TP}vDh!luMQ)f9v zv?RCh*d{9$O-<^Uc%g&om^4LBEo-oS@B);oG3tora*KRSSnau#HE5C0p>fQdgk;m93Q_&q#J(X%w7h^Uh(t0ZQc8EnlCvj2?E-ZP%<4LRxLSdAjS zx#PfhxM#z1yP{YwNYk21xrF^Tqh}r3w=?9`oG4oJut+)T!kIZ?yIu0IDVWwCb$1{C z;E*tI35=ZOdQJQ|C)uWyjg4nY!oa|k4S8B&XanV0ST2ZWL7q;TMlp|3P1Mypf>BAN zr4ULXEEl_fNc-jQIDY$#-@I;f+*XLc@Fm=hys}GzmWN|oSkgc$g`pfQ$HxytTJ1I= z{6v2EUje-%qKhJCzRH-cAu2;u=ZL{@kMoleN4|j<+gQT^N#Eplw#A%JDAGe#xg-z{ zfu+dJhPx~ynJ-ywu6SI3#fR?`{@ect^~nKxuS>Wp(6J`R#0h=e(8V&R4QdS|aVJLvGr z1*;m<#7kz4rWQVUA-#iRj!*l9G??j^7=>ME~T7&tA6)n+Mn`eB+Ms<&t$F*~S^$3PJ_?&lV7^ zHIxxmI%Bn5ay?E-+>rj>IR|HZyy_|r%$RE9GQGQIbN7L0Ry5rYoC*RT)JXmkUX z*CISPW&dQziiX~TznAKuQ91Y0z?#i0a=T)zUo0LWT9k;5YGN8r)X<>Fow4S%IoQCsnzhg7axXA(t4>^7FmM{MP zobzA_vkTV!J7(6D>+OcScXuq_`fM{vz1lLloiZOs#B~eR>a)M!;bbHUnlZ`vJ1*Y8 z=O4elW9pw{ojl|8?3gb;@6#{u5Dx|a`1K_Z;|Y&ZP3k>Ix&d$c4a1s$X5!_qeV+~J z97K%~^so;<^G}7E8qS2jZ(KlkFw5Zihwu8#ZZ;ySZXDe#gb-Eni*5=+i!( z?>Wck9f$0Hs$DZ`*4T#bol*No>8w_BTM zZja#Ll%t{G%v`fFCFL?AF%9avq%KREr-a>4_=HdRgirW{f3NsNczwd(Aa)^PT~}o5 zhxCX#GpizlNSZ$ zwjwDdRyV{Qb@2oHse)!_g6)L?y-|l&VjyP=nyjIyTnepdq>UvUyp~NjFtJU!lPb(N zRPl^BTeFBGn)@44O16Q`%zdixs76e?oo&eLhD^2) zcAG)VrRSNL(g4Fjd3&@*KCd+FCk6f13RSPrjUcrKwEU1!dncozBpAX$xLv$p!JcnW z=Oyd3WV0?Q+gd9O(ir9KV0*# zfMH-*HmcRdG9#?yhUj6Xc`U1PQKE#O{5eUhJjQPWmzaCf4=QM1jVw5 z&FqoM?KOAHl$)HLG^*)ivCqa$+3Qtg6*M#$RM1pZwWewW+Hw$EV?W8mfsjy5z z%Qmq2bE2(;Dk5Fp^YHB@_meI2++b5_EF+@MR$P5|!);WsLZkIATm2WGd zd+Iz#mlaxTs_mTUyZ^%F=92&OTa&Axf?~mHKIY@qlxbd4q&4O;r$~4G{gc4wud?j_ z_AmfLfG{OO{!kINBe@Mjpp0EL$PXFi22TkjRYj8*yMC7i#pIgJ*Z&tE=HK(5zEgCq z6j3f%EoOW-Ub3krBB{tX1!Y=M*OEqNG{uT&J>mBDj*G_)YYq96%TQ1W^eQ3DG=~-F zrlF||>MGk+yQ9%kA}teTDx|TiR5!%a+OEKs4MLab>dAjen5b5lV01*QmE*Cbj53y! zn7N-3d-p`~jAd1kIySBSHp8PfL11I*9IBeCC~4}Ps@Sl+{+18_he_J9xwmUna!0Yc z;r4#PGD@kug1D&I<~2pt{DiSI023ixe9vRhcWH+f>fd@o_*Y?9%Hw@FG)$;N71Nv`22C<0R;Bp613aF6JXw!Ex!(%QhOHf$df;-aLES7g%}i%G(4Ytu9p zO;bM=o9)gG(zdWW0d9L&rDlp<)vlB#t`c~vVPk0OrlKxmYP}+_8)Dfev_txVL(fx~ za+jnd{XN>FfR|{J3igHuzQ{0ziBb-J;1TqE+JXJFe>K4lP_~OBSETx>5=`?8d)OcM z{6$%l$u@@9Wze?i`vON9AWei5V*4Eifg+7EmSst_t;mc9?Eu2UPy*X_X*r6(Hc$c# zLm*5WWtu2MBb$n*X{ehgwZWem<0>ym7i-d0L{@^~wdg)O;rPvSPELB9h6TbrrQ1C6 zZVs*c1t}mgkjlbzO)S^LwIq%bC~1M^;)X4PZWrH?s667R@3ZJ@(rC@y{E}$ZViGCL z)s}}z!FX$vyGI0_BL;1szFEM0!e)BQCP|2tjeB&$=!?%ed%4exvw)uV5Cg>~Gnh=j zqh4iXvnkQI$2Jb|4Na;4wbQ%=JWSQ4-|h44pwHgGrRA6Of`I*&;KnXcbwSgVRJA70 zO48+qbh#x>OB&0ib9}_nXD>N9>vKBP_+dhhRyuZ(h`C;0sp~)!fnE57SmTH>7 z!#@adL+2;bqXKY|MvpKY^5Sg7`Lhv&K6q+Lan@(On(%#GvCvxN2wrRI|oE;4~9JL6;5`q?c zp~={5u=4_4m(+DlQBS^6OKk4ynfEfi$hKhB;CBGG-j;glJ<4UvIfCZ zvWaqW+ltdy8OrX_S;1pk;Z;jk4+-&j#^X9;kv9}|Lrp{0XmUe>8(_6UY%4_-5u3|z zxUM3azRTD*Aa6*W5&m$W_R)~TQ49ac{@`zEmH32D_=HdRgirXl#3#b*6aGe_(N#&d z*svY1h~kjk-9!03y0#*8eM~vR^*8jak1RJE=Cg<<$T$g2Dx)IXq(qNfs^%G@J;V#! zv>i!cN+c5D`E>Ss^h!zV!J^h0K}p@zL~+432Ho-q_WA_v08bcL!lp4pf-vM@-zO6A zaVzw+Y=v3gyCqaLl+j?`0zz%X58r;Vz6v>!K^(Uc|@q?Ji&NVIftOqYJgrPp?F zY;z|elCG(?k8INw(ERHZO6nn#f}G)63el%0|$#Xb`aoLCblq; z2!tU}VwY)Mps5<34nksg2)87*<niUWwy;|772&m z1d&adW`c?C@Y?Y?2rYW1)rVXrpS@4siB+pO+8+zbco?d>xh4%iLI#E_+g)sqv)uAds!M?)zoE4kyq3; z2t!~hg`-SNDKQKU!UEwVgpEQV>V_(>C^CbhD4?oo8e=yhq~(CC`N=xI%e1E4kj$>Q`Rc#$`){uKewJ{xE_R6uO+!^xRNAHX z8bbYJnCvf}AwLpc{}kz^tC}V+Xo>>eG!&~x@~{4PwqGkQ1t?>8wWhAA%9>jGsHDW1 zWfXBqT{l18%{{fFE081e#r(Xo@TtDQWW!u zPNI2U{qVh&)Okc%m6T|-Qka&FX{smkWoJ$#mB2C$EYTpV63U#qs?oy23I{j`=d{Ba zE&WKcNtirDJX$52_7&UBm|0m6dkzODJr17t30n%Kcm1yN9qCr(Iopq4vbz12?+gQF zXpARMsj99iDvjUI*=7~nv>~rF`cDWi3n5&Bmd|M9)9u=rruZ8PX8!OpRy4XID;v^U zP~w2;V=5Cz7${*pkr@hQn}I}IY+lBR-1vO;cSqA15uf_1#% zX1b(3j|h^Q#*tVZ8#lC&=071;Ygh>7<9Avd9fyp%F0KhmIy6QP&!4mJLsl1DWjXV0 zK`Lr66oyhL%OiAr2CgEs1yX{cLJTutC^m$xHPLp%Y_()DjoEHv;_ZfbIbpeunPe_a zlkdpXM$_o0dDC_91|eQIz_m?GG=^azq={>SZ#GDyrl>3GJfbe=3W)QAGFSq#)@p4V+m_l2^zK-3|sU%F19oyi z5MH`L=Mi~$T6;qKgHU?JQtZYyQ4Wm2sxwpGD)Qxcms+SX5%wjPe- z)A1CcBT(`wVa;|>mW3%m>XN2uC>yB76Y2G5sM3sNGACMWNJ}t+HiK8^oc;DK=O-S| zy##xZux@Sm>ive!?^nAze#1bRHs=4&-g`C4k!9DK-#N#%hlkG0NC}{{=x#Q7dDYmR zhk2R*JFoLP#%ps)BU_e2L8l5G;o(}(g9lKku0p9UHoM1i!O{$t8R70n`aXN@y|!&& zIUQWHB^i}65SEM6>(Lzz@iYg!NO^GBr96@>lMP%x#9#Kg-PzQufa{x-o2^0STreJ8 za5(62XhaDA2LJXMn=m2NTn_F(;QSx{o_8M{@ZphUq#Z(WMB*o`A6^qJBc9A|@UK1m z@C?^($p7p;UI85peaO-1fOqa4aeidevkJ0qhoG-`>Qop>wzokm(ljGpZ3&lK;<%tR zdJOJgaPQY2@!+h(TiuLaS(EevE~f#bPZq5ADjC!z)Dc-4GT)>uqK4+gp>saK>Dw5j zdoo4>E=nCT9GvjsyC=N=))7ZGxaN+|yGMk;sjJXX2;mHa5@R<9?gyC?) z^>oVZY|dh_U_PC4b2VieWNdPaysn@u+8_vC#)wE-0&vKvcffo154m^JW#HuKU60eA z#;j9g`8iFMQC1aMUJ&~M*G6=&DCytuq!x>z!Mv0#2ZW42CNCKY*IQ8hJ5 z1(`CCoi0YVk7=w?*__Rnzhiy%i0j^f?r_X#e9GbYg5${<_uo3=V$y3Rt^eFs`q0B4XuNq|GJs%a}i!Q?Be9RV4(|DVLj&RiyA2 z3xv~UcP!XB6;0L9)HRLRM{`Sw6aw)wyi-V|rdk;oC6QGFqLovtn})jPm0XOJZB(cb zvb~O!=t`oi=FQQfZ-9_(W{FU$JxB@hDhgU6P(q-jpjNFDwDlLYpK1R!t;7QjP1AgJ zC{}XsL~NsyUpZ=&M7?Z7UC}i4{@a2Eg_NqzPLTU&X=p#w)HK3IvrW7~pM#@5j-_Ku zkfM!fuZxPNtP!0d)_I?UzDC!O-y{GVpsw5TXj&;M_Joq!kBN}9x+ryn5cR%&HFeX* z>(@ddq(Z5-e+2Bua_>0Z7tW?`sOq+FKp9RSVusu2Az*cmPmau%&m0KpTbq4F2YUiJ*_x;n(S6#I0r1hg}2xVh;91e~w zq5y8U0o$@*<1T6HEvXnm{}8+5(K8f|e3P8e>;2O@yt{5<$V0Efql7##j9({*ZM>Vv0t%mjge+Q&x^Ws{7cpD zXJ4CxLbkPmbg|#VzPAF;clKe^?DtdzNUd$RXxA_RQtp4JP%j!L2GL_;GbN>^xxMAI}-?K`2NqAV-2qN1v0Tbrh=$g`57sHnb`l~Eua?A|eF zzxYQoV?;kJSeAS8NSUaEkLVm9^3jm$SR{LQmV3~svGLMqNxh9 zvLeiBGsCoYT${S22tsx*K4<#n=RCfga~VnU(E}#;hfJK34l&bb*UV=L8^sR{dieoR z3aRTzO+!h6;dar-=k)r0j$FmiR$qUMXg1pDLyY4tZy#B>ruyn{f5FVIuVep&-}1T> z-}-CO+AfBUR1&3Wn7uyc@R&ir!%0Ws=(4pHUVg*uqaD7*@T|+dv4NxONUewJ++*^V zPOqB|GsGV zYbo07WkWV-bpd5cRh1NVL)Cz`CpN31q$(TgMr#8&l#Xkgbh;*`10voMhXuDuMP0gR z9g`0yidf5tZ)entfII?gXyDj7t|igGuK?NJZkGFZr&6*dTleo%Db+snudRq#zg8Es zwFeYJ$=B=_X!3|UU9+06xt;IWhB;{?C~FCti|Gn9k)jzj;aX=I?AOOjn?V3lAoSLz z5PKWwWlLIn|D~*%U|*HcyPI%#TZu_yUt1LE~4hu4Ybmn7;NCqJMNqU{UQ(9kpu zDQlEeX#08e`U-kwYgKhS--JLY4W+gFnXc6sLZdSEeHOPsHc-<*ok13mCqAo4uqzxK z$E7=R=sG%lt+Hr)?hr~K4294&q+)+oY1g$B?L2OphPuosgBwz@W_o$U=MOJgggLvc zqO3F;cZAz77YVM?CwlKb_4_9rk3C!o#+!7l z?{J4Z+~E#)_#4I@;dO_yN1r zs*BaJIeY6qAN=xyv1ifUN13;Wijq1nX*3)&VPLL2blj>KxTQ zYBl+l#F9U?zxE5d*^{Qt_mNzG`t{ZkR5!0leo2Mq4zW#})6NOK!yW!EU>8QL{T=J= zlI=R-GBm*)a{lrAy!G}WqY^p|-@TS!ZZ9wA{rB@-uN+rzYQLmGQX^lrrOgn0b8gB1 zzaaL}|KbOFafR+8yXW*f=Y077fWLdUhX!cu$+)_vC|XCh;kr0I3&#ZW2TstkD9GcK zA}=WGimEQ!x+O(SQCCz_A`A^_83@Bf=q9>Un3|1d-a|Y5nESst;{W_F2Mlcuo!8ce zut0U`cpk@-E~H#MXNO;b_i1zA>5lod3pb<|Wb zRh>~ZHMNo`Q^&9*hG|f18bY<-&5M#W%_&Po)3lO@X_ArW1$F&RA_Sp>?9dtC=dJ%b zWb%vu!ar0E*(-;rL>L~L)uH1U^c}UAq&1MT%^o&Yi7I01G^8wYin4B9*5fY<2ru)%<%g=uq+y^m6Ki-*@5BGfJNHld?AVi_e|brx)clC(I)whuV%yIc$;o&gkmCGn|QXog4c z`~%+m-~R{ij9uRAs&CZeQ%)Cie1g|=ICKOe{#s8WL|e1X*~_kdr+n~V6fdnz9Vv7y zO=7DWt;%SMh@vb=t2UEfuPf@Zq{!NAR~ijn8n~K4*RJSzCaD8#*Mxq|O;XXAUB(mm z`}Z_HpJhC|os!Od@>DPo1_N8svD<*3pQT4$KQp}ik#cVjeEkk}zN6ee;^x_m-#m<2 zCOVyiG2Woh(6#CHa?GkGi)w<#iX>v;JbZ&D}_uk51cM?Y25v`W)e{d3q)Ejgl0o9^);2jMBh*)tq@$$I*j zPd^DMmOfEXAbKvnw@)~F>xh2W#unn|d4Iebo|`U)OxcJzhhVLB63#d^V9FC|m}%%h|xBXTR>AdX2QBq@Z~9fnfBiqth>Qb7f5P~n&&A=8fs|;A7s#^2&)(EB2|~RhfxlW4 zZ|;vD+EZUY>o-uRkZsuA-tfu)`#XO3`xiV7O%}$O)AIvPj}Pgon0`H{$ZD>GuPi=k zbkTYrF!T=jN8_Bcix)h9cE#n*jOl8_%nw*^HY|cCOs{5G{cF~{6m#TZ^gT2k^dIod zceukH?r?`Y{H@`R@Vdia9qOi{PJA+d&L+&+RyM+wOu8<**&dXo5N-67W#hRPNm8>7 zV??xM7fe{h1|y~6dKTKQq)B$!Za0@1(Z3d zACh%7^3uYT1)|Exvy>pr2_r#K))+s!4ii7fvV8ML1fZdmhHWd%s-lb%(p^O6TI8OA z5EY^-D3gT9k4W-{(r^*Fg)S8JtH%&uJF~Y@s#aSYMP-;$pqUPuH>5LkIk6Qw8FiTw zY(sWgPL`K#bfyyD?u-@!q0Cm!Q56_ikSX-SyZcv7M_1x+51#xb|Ej3^9oje=a5 zG}P#2K$XSpk`m9Vv9t!QDQK#gJWcS^hN#dGhQhVm$bOoa5#{1*?fkPL_HFvPulwyc zdUcR%d5(rE&nOe0EQeThkR5}8W#hOewk^?`l484J9jEwyKob-^@HDEf}++nkvh5IchA(4s%? z((x=TO`?BiM}2j$t;&Wl%J9RSfr8qUP-c`_NR$=$xgxi8TuZ~X6uPFhV^J40O+i`J zp*L*ldqk zr$g)#oIe?&ZyUC6H0}FF`g+<G>WegS}&u_+o*Gi{EmI7 z*=IA=Rn^wC%A$5Xq^-1ASvTz($r7?8B90m&-9TwtE1Tu?vASbiJ;W+Y%JrJv%`NK# z#iDBwh6}RWDVxoXxNH!%Nq6ipI5KbtLyD}2qir$Dl47-FcX`RC*I`yhG}8??*8!__ zN>SD5{K@wOMe6gYw2$odI2aMW<58#zYxidc!`G z6BE6NsI!DHu1E@j)D@Peqgh&;wFLVNS%s+thA2^WNSW;j!+^~;K#KO|s5GOMVpFYz zUYj*gQdT8pnGr=PNtTnBC3PcOdA&R(i$mhLBvc@^oUE!_dETGmITjLWdFb6kT-&Fs zMWkp*OmwTq;AqH^2UZnPWf^`{lhy*mP#7Hz-DtD!rLsZUbS>zPe!zVz1-gr75Akf9 zkkjkbpN7(%y$HzK$olqqa zX<8EG3NWxW4cCxu;MX@h6D8XHR*8QPIm0&lzpiVlqN1!a>LwzJQsT5ED_e&;bxB#| zq;X6XM>t9#RgIE5%CPBoO@`eDXB#q$XROvsUd+}gucVjESghAvZVFbJMQM#OoB@5? zV(6B1`WB(5koAr{$tX%2+naDQQoQ@NVzmjl-Y$uD2@#5}wCLJ8JyT(*?|OeU&{Qqq zU9>h(QdHDBKnZ0+k!J+CAgMJpT}QVy3{8H;Fu!t!@2^Kgl}2RiIsS6XbffSa2ha2v z3Mg&R4E|*jt z4bxKShIn;<@x5(-X%~Grl%`{N4u)%?s~oa~V6k9zbL_W+BB|MEgCo4t`;5k0PVL96{RQ(U1zA*6W+p>>$m#Kbi-||y(J?EHy3@hxcCkz! zqb|rcE4J5H%*~vsG%4f;;#+2mC5t#?Cv=3@!|L_0Eel&qurx4Kg(@SOcugFPS#JZZ zY(=%4vGKQTvVy3F@;jR%-Z1vx90;jt=&p^?vC)kLS!Tqm6|2i@7J8R?QIMo_qMIqp zl}}id#@9jRKJ|Q#zu6L(B?YwE?Nv^bEJ>mlTwgus z%jdUT%TtocfQ#NCN5@@y{)T*~F?&_7bx&TGIe8qA zB%3)w){vNEyzz+Bca9l)7M*=zYnqy-N?NIq*_y|X)`WFH5X8)OeOzTQ9(ibC0oxT> z&|#PL>6SXWUQwnA;c`c?42Tn*+?Ldhj#3(~s}nmw78A|4L|udQ$iS*gR27lLF{^dL zYNL^A^;gdCzX;lc6&M)0j^-)CyrkIrl-8E8J0KltC|M!OjKmM|=UbxKCbvfjtAn9y zAX|g^8$qB_Lzo`Q>SL)Jj4~h!LuQ){$26Ik60J_C(oS%0@PvZ{&)|HEUVR10F@EmO~LHB*w7qZR; z>s^GlNf;S|+S4JADB>-w<{}>vcVG-Rhbj-V*Eu& z6t;)Dnwq*yNj5(IN_q}wZ$U!j_ktgOhZ=BGJ!e=Z0O zAq^Z$r|&BKxFlKa@XZeKfg(dg)+u=wvsuN=X9-bRQ0adv+43grg+H4bQP(szG(w>} z7FN$j+hx?@nk4e@^FF<@!O$C;JSO)S#Qqk)5G2y!#L(#*66;%?v+!;4uyxZ$2>%o* zM1e#Z60`3>k)m#W%3zKk^jO4)C>u;!P=`yx)s(C2l*e<8-pMf!Y?}k!Ko{_R%2zZs zbvUP*eaZ4x@Z_eT)NZk!T`|2}u!}MiUc*dMg9+KYPu(3bmwlm~;oRK}7arJo1?L4I{o1dqigwQ}bn5M(H zXE5&8nAw_5e9iUjnlGN{)Y>i9@)1|FB{x}(ukk8syyj(%k*^%jO;b}>IeFrDBHm?B^|AzDKoM-Ce3oPJSW(N1W87cmz1@n zstU3=B-(B9O^bCcF`SaTq^;-kQ-(__l-+qu#|GWHz>cp- z{S6NET*kj|LC>oBol7OY8DPgviYJi2zt);osbLvCI? z<2H!dNQ-*V$2l0&aU5KsVW5!GrQ7dteD2W85|n?7AE!KtQtHkTnK!{09?`WOx)KZ_ zk(!P@8sMFbaUVtK(~NZIbNhVB#AxulHd#0&TU@hTMm+Uxw9zSVD~Aim#nB}{sNCSs ziUz1?s+=TFNRo)MjtRE`+bAQ*YtphM*z+_d@O}J^g|2Hfmc+CTgy}FGS{x3*i+#%Y zlH1uepFbVbbR)*al<9QJ7ps!1yhqkK!{{B-b4&(KjNLOZItocdT~-uwMD9*7hdPsq zU}!$V&3xjlqR@LZ%EYrYI;Qv@x!MMrf~HQ1vy{ay=eS={nzH?U5t5}5t5gz{CZ=K1 z>u6Zon{3U!Gdj=7{eUcmLLXtcha8_wcKlEMwCy)xE z+eovIsi!#gnkoxf$2r}!VxT0ANT}i&!FG%BUj@pNoU{p`+mWf@q*K2EQLsRcBf9oy&6 zxN>YR)ey6nQ_mi=diaRz*_xoL$@p>micmVz9b@#*81^rDt0O4#7=HnA?jjjsIYZ6| zHs@W9jv>)%1A8#QJ2=F1a;!2Sxw+)#vrie88zyCsZ8b-(9`p3+GoJY|3pGHD5AY^O zcpaCH2yhLBB`dVTrwVS^ZF=0!92)JKYW9NL`J7pnv;988>&Nt|p=0&BIDHS>3Q&21 ze|5=q=QA!DGKj$1J;8tci07Ac)=5s<0h5L*Psn#m>TpYfL1p%EZHtjDa5P>^Q*MBa zs@k$%P5I)pr^qy-b1doXu9$tf~1QfcU&4%%>lWv$Tbh=ku+Pq~3Yug~QLYH{m_R7%!6DuI?{jv)&-)h&`NCpaHIP-bQ8fTHWwIsSe9rCc zG5_{x%fnfLc5;C;ykImw;v_rd(3E)FTfW>@+>|9vSyD9(Ra4V6ZB5&<6x5AG*V{4( zceukH?r?`Y{H^1T@Vdia5B3_`bybl0Yr@5xAhXFj#~fWe;N$=FF6W~TeF?e%K@DX< zmE3am=n?g1#na`E&=0xY794jitb-xeO@WrI$O4abFv77KG^3>0g{W6|?j$vTprD6w7{Qw1k&#T37>{Fb-$} zpF}L!jU}6*pfO5B7_eLWET#cL<|4WW=&p&a)vqMf{uDxLNVA9T#B_`)E)j9)GhO(& zy5z#Nsj4kyc*E`GC66AiSv5Vzhxcd(V@3uvf8w=lC5s$1XMk<3>8gaV*s=CQJkMpK zbQC-C)eWoLTb@2CnMXQ}H{hh-#n1z~tf|v2vsFMxR}5=`sCLxRoQ>}@jTOFT({oHl zeI2V(fBS@&XzL~@YGjpD=DL--XHrE~rKH;h{_`)HCI(Lr z2OOMwbUPNu1Dj4qVQPZKvo&d!fNUu1_UAtiq9wePiR0R+zR4o5DOOuT&B8y?*;yKu zh=_Mv7K@PCEFp3#svh53%>A3!S8aw!Qz5FHA}z==&}<8H*uk6@sF!={Za~KZ!>nPq zC0Z=-HvwU7P?}x3roq6JSW13-=9FmfCxt}GhO(~6vYa%_$yI~V{1_*C8@lZ;QU9gb^@{>22ZyO*mv5}9}a03ZNKL_t*i zJ~TB|v?5Q!W;kEG;PQHfp9xgoWjOITI_cu}ZJaD&GVF4HsByFNx!ye| zTy}VVJHZGOs%FXR`4yl1_L2o7M0rZ&9CC47?Q7fo7<|=;y38rV4N2g$4huG6K(=1t zuVS`QK~|I$X-KwNu$!6OIxe#4WA_Dp+hpKOa7Onz@SbxZBjVMR0F5gz;h{T5-99Dx z@{+4*O4M|*hkZ`(^*FoNrRSWI`U{5cB}1{}W_raoQ(X2Au|tTTO}Tv*Fk9u6RrB?y z|4G?G0z@m$V>%uOV~@#LqvQJ&@g+CcM@XZh5;u6;XI#(LEXsyhg4#4bh=iB6|M<`Yv$7lPoG*OswT2@w4uZrn2ZJ3 zvY<=?!sU`}loF;n{%nQs$83^4k-!By_f4u$#~L!)F;U(jjNvGxFp6Ov*O9>_T zr}w)I=TlU!(D|zu@w6$8R@7taE{R@qqqu%|ZQ_6;lRYy`Re>Vz*hbUT?{&gkZg3wh37! zB~hVh%8VrNSx%>PwT8lU$a*fl!w#C|F*@pVd~7k?Zc+Xfv)dPZ`ndKzcnzPJI)L3F4^x7=VA9KGqJ;t8!((FFAwZ)U{y=7mV%hER* zLV`n;xArVo#yPzGIZ;{gf#!8`|be?qRBWzUg^F2GoIzi{m zn`e!DO8dN&^u$``(&P8LyR|LC#78HYpj*@}mR;PBMAT|{^fAPh!Zii7j~9qmr8zU2 z?{Vii7Fu8oz2@N)+}(Ke_rh#i49F;*uI@ysn|Vg%y1*FEBll{= zCsXg5`n$8_&96>$_P4u^$s)`1bS&+grka!pdv{$Npr>-s9Qdrw%(#APN+!!jXT_m+ z4}vZc^SoBIdG>q!h`t5gNBIhhU zEzVF8r3@|=Uzy=AM@W282g#Mw|8DGR_Wo;Qm%Ik!hCXtUkvy*+kj&S^n=(O!z6T(T z0Jq8ozcTx9DI{kyaJP^<@F_z~sMAq;uW38mg;yZ?T&IaFT-WxhnaM$^fA+W3-1b3; zQ(5dh3eJzuADmDjr(s?cycCp~>oK}kx3Z^bqU8#*;&nRjjByL|>VHPwNfY^7src_+ z+k)1NOPCtL(N$rY6&^f0y4GkirK6`qeo>4hGtZj5oU4IM>%uzF* z_JxTV+w_OqMDLB>t`A-Yzh+sLffb=|q)y^GH=-U8fhG|>({16BIKQ+w90oe|I}lF- zWU_r1uky8+GTtqtr2FzMNALI=gGbKpd{_QVUDQ#EMJ~1#58ywHLX9`+6geuU|m@I zy25x%Ue|E{srwz3avFB|o8*)Y3<()IVsR3ZBYukO>trrhgkorqcwhHV12c{RwmeB3 zE?LBr?nqt%{@oG_TRk7Lw|cLD3A091+(_bh#fL!+4qxVvZ1s>=Y|`;>Ve*gi!RED(ZzLpw=SwIi_(!x zo@fkx@k9wC1JmO$twlXZARxlNlu2k&Ivj|>UwO3a9-6VO$&0rLrP!M4<`vwwb?o}S zpkE+mc^6=gIQ8)lUPP(g3{!HhL)r7G#MUa^{#_Kjdh8>?Nm!KRsE8Z%(jxulWOajC zacb5uao~oiYhW>moHs&ijkH<|?HKIhAAiVrpcb^K9(oFi_pm+zcYT1%LCrpZwuUj`7_P~8qH^3+Yc*>cYKweUAE^w z7m>odq;l>p-1dMylO9ANQZHL+y#B{!*#UeCdcam0q0WojB1amGmkc-(SJgdaIN$ZSi~+iE25zJ8*8xk4Hek=V!R) z>f%e<`>mL1dKN11d=%z5*&ioJ?B3HFxegI5_9J#;2{#naKRGRasrz}1t0ivv1Oh)l z01oo`+?sOf&0hx)#43%+$=@{x_u5HxE54qHp2}qx*xv50SDR%ibZH6Fv3=?f(R(J4 zr*}(-^V8)ZZa+B9FrH8%d%j8(c+!38gX~0@DORpAN5%bcF;pV8M`=+t@*AjQ z%jy;C;$_!azIMSwv~T(_`Uxp`kz^}lrT-w&vG#px%%>zZlEgFDNJuqCdc?D`C$-TB zK72=N0I8RnQUbN^7%I@lyMGHO)LSEIfH4P&0%ftg37k3YvbJ;itN(YY?F)sRtuhKM zuefWs$;&x!rl+&iMBE#@j@>^YSJXxs2{Tu5y~=|&&t}6H$8drcJ^N1%Lbu~5_I$*8 z?N1hR4B?TkLuF0+&CTEMntg-`k>$lc5zpkTy35F|LH80*&Wa-=k}nx8bCooKDG+^g z24W?N@TN;S%zfaZBR_7SP9VFyVWRrCgh2f!a7tJ7FHevJ?eBzlR{HmErw>!sbx=mA z^T#N*a`Fh$SqZ{;0WQ3ilseaW3q^BzQzVhF5W{{mCIo90spWU)H-dBX)J?QlVls8; zNMAx<{Zu#kn)uzx?>amE1Uv30$Hv|O6cSlCavnE3OQSVLij#ukm&UgqjfWl3Pwe6* zgCQgC)?EwkZrqR%^*x_Bgzu9;K536-LH~j`pK0F+?wV6A3?r8tj0 zo*s$4#{1%WDIs|5&9Z?h=9ln|wmQJC{0?%=D~?6cEFWRS4%x<^JnAN%UI@oyKP>JZyGpM=@AZRA}yHntkyHEh=R z#{8bw%f|Z7>?s?~UGbQFsQTNT_K7K=ADOgW7t5MWNE>m4E3)y+&LyI{)9`d{cy%i z&~kw#VdFi3cq0!sv1C<*PI!z^dW`k%;KIf}ftP&$h7(&eT%?x-v1BC;PT6FI9aaUM zict||Vc85-T2!NS2r%#QJUfbw8H`Q2VlAwH{lM@Ho#|CV7Pn?AzNbaey6y@N2tQoU zrjR}+?JrSb&v+ZT@SL}+Yi&6v8jPof^%EEMceGMgJY93cheE_82^&aonv1GD($?F} zD;g~X?#~e$4)L+&c$U7*XPx{q;-sfz{U>%d8!Sc614l}8UOo5@fuGSE8icpH00zj< z@Kr(oK8SW_D*;~JEJu_@cal^7T%yW4y`1*=%g+HU;LHwVT8_xIcIL+q~skt$vbaaohb{Z&7FmlUmT(CspAEz?93)q z)dg92=&<`nbFZfPw@X8j+wV_D0FEbxQ45K=eATsW!y~uPlQglW}cz~@7_%}!!WR=%K4~P5e2cASvWDlhn z_2R6n9UERpD87(ZuXpR7PX7d9b&_z@I&wBZREp);zYmOP{SGIn3Umiw|yut(>tA;H9Od+1}7V z1^9Ulj}O@oKKrjb-}NbP%pwV1=Ow<`5%h9?0l2I;f8YGLsb-D?`N@Q^Y{hlWrA|`6 zpAVOOF{)z?FtoiOm>w*Ef_NVGgyiMr8oa-KH726etS7Ku+nv*h_n@g21*!=yWn|qJ_kC(CeEAEPR9NYvr*c=J-Ewh`<#TJHZ zgl{+mrEO4bkK63b=A@z@S9FY~iR`PZ#j6!6H#6-X zH<(Eu+h6d7?+>Gz@i`GMVwxBWibAqDW_EoR$8~LQC*3{1dpv&7AHQ1kIG`Tf9!@#& z#9`)UZkEBvQ*zH~wc=~8S6lb?BVE&huC}IGA8vSN zL}-T2o5n`e52W;Oud$Qw#!-J5!VZ8Xv561kIJ?aXkGrbe=i8^=rj?oC)I^~MxyXv;N6sD@|GT;ItTzAt~A)otxZ z-6$zjd;eBhT~Q?<1T{lYU}JAXwB!goz~Zotzqbr?u%DsEx@DM0CK;rKH=cQsJ$`(# z;g@+<;Jbu=dd|9N8(7-v`FtpUv!Dq(6ohWK$%=}z!a`%~nbpyMu)WP7c&;Op5|38p zU%Rro&#}=nZ0i@-<0N@#PdL895&Zqa;d12 z1*p4i>KNY01swmOAI+val0R{{K1{>Z@n>5l$lOj6}stACOL zAjwiQx*TG{FT94$u428M<~^^yp5^~Z4>zPxMe>qckQ5&~eB%{8OJ zmdj568g4FtwPz$OL-k#C{d{BXB)Ln6e%DYReP}h!il6{R(~VEJJcjG#p3?aleW^~W zCE=t!F+F^Kd;WteWM{!P@EYm(Pz?);w4sC9#pj3{ox7yy~SaUP@I3c~fCKtZ3 zuVdCL1weBC;YtRetx;}_{UnO#;Yd=4=QTm#s-L{oml?AObu+^u;(Q@;IJ{TZ{#GTi zeHf|P`Dwx)LxVJ==llJnr`%k-{rWP^aHB2TT}>mSR8G8*Lxawm2;>KTH2c8S{(LO~ zuMS~>d*sX(ZV1#!)(V}6?pAiL-gyCh;C|Rr+3UuhrwOb8pOJxpEik#5JHU=AQ;{9b z;$axOhCPQyzQ@9fP89u-*upnb^(_&SU!zj5&dnRXOYJ0VvVsyM_&~*eg061K)*`L-#Z&a2))v&r(__*tKyS=a*)w%l` z$7O&SMLgD=mtn@qt_uxh%}VG3_zr9&-Bd-_XvKB5K^#f*H|Ea8)%Y}QUsqd6K+|D! zIl!P+&Ti>7v|qBF1-s_mL1uTw?ze-0xT z!h-n6@uR3WxZSk?8}-DT>EAf+pPd(JGiWIY*MH%oH>yV_i_<~!0!^6uZuQ7DAWn55 zK4xoy*5V9)LCQ-IE*nNP_SC8=xnL5+Na9p!{RgMZ)2dN+nT2;5{lnxt7-bPJn4Co?YD)&i%XE8M#p6s9Cq9rl4qi7!*Db z$omu^znyH9nVqr|n56U`bu2Hy>?qbTmFI{{9S>XWBECI*ojCF>Wa}rLYrFh}4c5%N z1YBHT1b*yw00#n7Uun=nwQ6;psBeq3i(w#}sh<39i3|fhWa}1ahxblgC{(?}61%c; zE(dVu?-jC4Fa>Nr9K%k@SE9xpvGD8Ma>EjFkXbvfoPy2fLsKye#p~6F2|Zq_Mk`2} zq))Z1e6MgG$GAxNp~lr><<@MLVhzmSvt_@;8gvUiS&M^^gIb*UAiOTS2;3HvHlIJM z3SNgn46}q3h7CQNbAkl9#L?~Z`_jj0nW^8`Mo#PG?_i#x(y|a(=zGaIJw`3o=m8&Y z3nQWYY8N}d6C2IJk?Z7UxFsj*X^+0~Y<@YT;_b2&W46l_TiB9v$oEQP(4DILF!T=j zK@Gus1}lV7AC3xLLD40Gj@~9@Gxr|b7YwHx-;J*Bn&!)9tEOc!U9^}b2OExsMJ?dtld&yAAsK2 zm>h+0&ETbrM`Ql3bXEVGc*}I^J5psvui<6l`eu;^&hOp?ld|x{>oJ#g02K@yKGCUJ zQ4Bfbw6Wh+SbgKBrIzd~S|;wfQry=;#G$?Y(oG!m#})e6&$1qp?k~PS=X*w#?%@;V zns+Ua!b&%>j&HdJtrMThq>rDbOOWWGKs?RNKC?O+$9YS z`+9VLJV)^tBi|`w5?Xlr=7_o<$T|j{+_TqI^8(^;Y>vYgOXH}szQwXfa-e zd}@t<^ScgvVc9Wn3|FhBMad<(cy!=NE{C|L<;y9kz-3H5y?b#UA(D9x9k8~N%MHX@ zW)OZrQczw*T>}6Tg=Q6EcjznsC#eFoX zwrFSpRO~VA8y^Qd!(up+u)yg|W!2V=n;Q10aBC+ZI<{@KL5N5g5DW!Ya->&^)fy!@(TnLT8sJ(O+(@O;{5hVtGaP!oN6)?y*BFx4{|-3)nnJ2{)OR^ z+|GbQjDfWjPN4CqPO7+(~8F6XxUwM(K2Taja0PAY{hH;bf z0-vBo^5#+4<*RwB4Ibs1Q)#;#HY<9?B&6yU>nCowO|yXv_N?m4P7Mn3r!3<8a!nmf--2P&3k0!QJ!=a|0s6zLcwM7kbzrkt z%Z1!!sRXL@Rl+QwLbLHffanS)K?#Rx!0~MBu6Lp7+Rnkq%mf)F>lUE+E@M~y!I^E!OOC=tsCzj;qHaGRft2-ymC% zQ!HLyeP_i`(!HP?)WP^_cAJgngSF#cnd6=lPD*89q)|)bPjp0ZH<>GN;!;TW>`*_= zzeMo+LBelUx180m0L;O}_~6IKjCFOP<(yVd1g8<%<$UipJw;IkkbCsK!WSXp$wQ8P z6Szv<%~Y-#O|*IGwOWi({Xn@btfd`=P#nW7)^M-B@TZhHu>o^#DpuVq68PRY^LyXn zPKk~?r)qH(c(XIc8ooNM7%Yt`ueeNe5l%r5`1d*}y>KPMCI zX5jvkEwl{wLK0&xN^=(}LS8#N5s#{F*VXO1_HnSofCddk@DEMzk%Vc;xSV~2AKU@A zs(QGf{rm<&;P;yD4fmIbdW6JV6Tq`+`jd!XSvmw1k*ve~P$8I3ndA|^CO*m%6jT*e z11H4?Ig=V}R1)35=3*nvCewZyH**X4>5emfP`QSVyt;GI1!5Ld{gD_O?X0TST>XVu z&6+raU03bTFOelpeIt|D+FAs^6`FIVj3beJgR6w@_GFGlo=gWGZbnNVNIs>PEW1EH z{pOxP*J%HtHHP|l>YL`13R}ZGcZ_0ZNgJ2ctC{>wH7yAC_1u2joyj4zSw@%g+bX(s znF~OSxe~gyl;+WZ$kPHK!H$UFZ7E2efzwHdx+3JMrt2LwVO+rwdqG&e;wl!DCgdqO z^d@~2+8hHM^uMJN>O+=f59Huv+(ng(gz}W)@-@SAJX*|#v@%}M?N4cY)?PCJ&@R{o(Zj&ezP*DgUtUF16g5vM2vcZcw7`nuQm4P z-{{PB58qQ-_bhdhF&K2wQbrOdi1nd)t^8_jm8dFEe?#YV7;~?>yq5jtNgkeH@LF(JPx2&#yf<7 zjl^|J0z{(v;ItsSnS9m^_VO|}f34OkyKW>MD3Dj^!LlFw{^mNW)*QgM~Qy(wLFH%nh9&tg=BT2%pA(^Zacyr_g6l z^3>|1t_Itnn5fIp#CLcgi|DfT@42?7QF>24##3w3A1 z1l)|lO9L4&@n#tsQ7#=pjBfXg6_ zqQokMKUQsh!vc(&+AFDeFpga;c{Q%c@4Ps*bwQ0i*NIN?NK1K5BJgn6S!H=CC#bJ8 z!qZm53-ZiT`NB6QrARvOk%}LqO6I)sfzW1haq$;a6JhOqJj}_in?N{3^3zRGPTX68 z#|E*st<8E>(@_RvBiZm?wa^3phO%6GlA$iLQSz#R{%VZ5Yg06K=U6g}iyiaasy2Fd z18rjAKFm)s3p5yLa~Oqw{nG&x3d_kmZql3AKArl zvPd;vH({^~_Nd6yv%mH4zLDn<;MpX%C%ZcRgb!sfVMsMH-Qfz-6tZwX zVAGS+7HEY*>lath{EM3*k-@)g3;>&j1qH<>(K;ffm4mX7Fy?}TG(_FpSqE$M8d#D( zP>x`KTJN;8Q5=WXY*9PCi+O`L@A2G9ly!_ml+)8^La+<&JYU(L1=LyXi;-mMcY-y( zCKuyZ_w}kpeuxN;iQ=hf-`P1WuioIat>#3~WLjIY2N)-~!u6rsWh0m+Oj`T&#V!6AgR{r!a*k)_>d6(sp*9BLl zO3V!QyxNotO}0v5g3B{$Xx!Kh`NYM{p1N**`f<%#Ay9#R` z{9J!2arS0D-BamJ|H_4mAIodM4=};a7YO|L?8cmNSLY2C)*ly+V>k@X2e6wkN@qQ< zU#wEk96OEPhL#J;CCnc-ERzl4q&WwuFYAeP7%fmBqq>R2W(h+={WuA6eQQx9c#WrjnlbmXd7tv^O_Nlt|s?}BJjPeExv ztQl|7S4Q&d<018R3uz;bTHn5@J`9~1JaW*;VW+4|9x~q4-|cvJTWlHwiEJ(h#p@f> z@Z$P@I zt!w+~6p?Z@j$in&x~iTr^65(1L@*3Fs5)%-m(+2i|J+v-_@z`VFNdGcIUA#kvpvX+ zeW<6t{v6|`93JGEJM2DEcIQ?aa+xmKnujB3PBY{qL^TG878Xs*HdtKqfKd`^E^0}o z`|zl3`qO*aVPyp`OMs2|b~U9aFfgo8Vx8e#3t5FueAL zc{CwRj>D=NGsAb)B#{K5FDPiUv-e=bPCh(CMkDJ3>9tOuj)gK%_|ACL%`i z&7ELFAQ3Y>$h+Ye?slC1$1VN_j0KkNTISYY(i6$7Ao`)DuewjxB*GtLl(Hr$?6n0v z#8a_Gy?z86JTiWG%z9soNKIwdXt`!yQNW}_(S39GXYkvrxlUxZ*9TKq&2L5l(^+}n z7a1lALY)L#XsM9kr#gBaI+yTx;2>8p4Ipg=aa<#h&I*PI8saT*sXEVOy2FRM;fQx#SlpBAq# z^ZwD;=-mOr?OjqpqFuI~U*~byV2hp(vnsv;qjeHCHLqF9gp9yrv2NuVMawl`>95}O zIjU^#ytvw24m_H$Qvu9+W?Lve;~$@3D{Lt1lk>-|fZO`#BwNSOz7>;W@th;P(FLk1 zJPWO@GcLaZ5!-4#&&|!xbVnj`=Abj`ab7;xK1ergS5o5rV31Dr;IGU{8~&bMz-%wg zh+oV&+DwN0#K^$>{C@bo<#!D2A+-`c(N_tpE)Js!7D_pPcM58fL$(w~u!)NIS?ZOCA5=tHWdw@b zCy%rYKSh~^n{XR1@ggp>vaWBW!J^K1!gNp!r8o5KVx$?fuj-*eW=tTi_w-X~*^ryh zWg&x&Yh3Nl;W$`WjR7*+FU-mzVxPC2ROWHx9Ey71CMM>m>871-KPw$SUwFy|pRM1K zg@-d)Lj%GWygXkjntboW46efhGd=p&AJ<83ZTBZ1pE1=M9YU{WjQF4Dm+MqD(c``u zb#Cu_j%9fC#iGJ;Up44=HSEX!wJ|aF}UQ@q# z3Fc&ah-shNBgE3AkqYiiyJOy6LJ&R9r!(zuNMADKO$0OUidSx*NiU!;Pu?3xPfphk zre26C6!Fvf-|}4=U(U3jpHGA#C(n1h_#&jSj%p+(Xc`;iKsL{3A>qacU++h-Z}k<8 z<@3%<2kGkKPWvj-qM2*^bvZMG_{ccs-Jb9h=<)F7QTR6X0Yf;&s`?u0J>-2O{32m( zg`wu~OTT?H!~+Wp%azIOXJSy0hr6v4{&4IbKy zHv=WT#}Dmy!cMj%n2FmrE+>`7j5#p{T^ikHMwJ6Zm6ugjH;yjD)=Kz*oAdDt zrT8qudiIWqk?RjWq)$(RLhc?t5D1cr^PW}9;O;Y!zt!Ikt#kf*x>cwI{llMkeStMy z5{|rExBFO#INhE3LLx|cEjei!b_(2;M^;>1H?A((y+ltw0*?`gNPo4LitZE}D|%f- zOi(zR$FpNId;IopJOW&0x{0nLr9p8Y?te9ceJZQkl4Jy6?(aqS@SW;*NHcoL7d{No zVprV>3xoadk*7mn-0s#VyIVQ}rMrYp4b3_UQpaUh(|72!Z0QWIp-P1=2D2d@#9_s5 zlC2sk&;|Hdw$a;eWN`B>l#`KV2FznEZsG8Aa!&Km%IbX1d`x^8YEhlsIF0HVOFcYY;Bd}mhQP@l+;y!{YMLmZuiQM%Q%e2UKA$W279J`YVnq;jL;@8i$)(l@n2y@h zUxTo)?ykf9ke9*fo_+43&pes7D&jo*KE};r!kX9kx4W3vLUU$OwP@}(5?YLmn+2kH z8ur?SUa^$~!rjCDj@;yHNa7_rj7chAwXPN&R$dX+2gRh>ag$vfs&YHx!(@KP%*Ncn zYi5&L>%!Q%hxY`b3&eNWwVf3n>s;_D;G{kXQ`bCVk@UzaUenVZynk038zHdve4$)b zYbXZ|po6>n!!>#q6z}zg-BBpB>T*?&*w&zHm)diKSuNsY#c6D{2)=-x!NR zQpRmk(e&)JNc%64Uq^P_c)91o9f)#J0~{CSZx$PrzRrEsw|jiYjfCuxI8eQwJgeQo z_L(E$1JT5yb#%Ckx-R35Suf~B$5zLs#g^M1a_e}5p{vkVU8c5!>O zrO}@`tt^;{fPMBC4Gd>>NdJX|2j5U>nbl-KuFqR>`H0XNDg4wLGh?{b5vucc!<`V= zEB~T0z*`-dP5qnfOAF8c=JhXzS{5iRf4}_Kb6Z1cgX-U}!+x1h`O6&tLydFq!NLFf z^nZQ~)aL!GM*n6~#`%91`B(S;4@FzZ{(!;Tf9U_O!e6rZ|A-VCLGG_~^U5qjnsN%h zj18kQhe+=_i72ar+)dZ%`Un^kI_|$)e>(K%>FZ?0GaMNP0+qE{OO0iUd8^GkTQZbp zsHku`uZDpc|FNHec9hUgPK=B*q;Pq?#Fx7{_eD{6qn4N-s=aJ`JnCr|&vz0YwqB9= z7dX4K1^~C#N5FINhVe z+w}}nAWgtXag^5EHGY10{Cin`aj!z#ndE8^(LP^>lhg_ljqPb^pl9$yT@gc3lYycv zFRaAueG?Bp&z`46LkxVHG-c>}jibMY?*FB7xKOG(vc}Ykts7oCRl0+e9`PPIe@?vq z0>4=X0C=?MZKf>nVqW59L?RLP#+dpHl~XC3bGtkh)wKE8#UiV)-~-aQ+rml=4;J&- zFcGZEYH6pIXTB2ECTaMI+j3p$w|&a2qPQHeo#g+e==nf>^|=YXBxMH-yui!WI{@qU4(_7L=o2}G14G(`M zQ*aj=D-Y%ohA}gTZ@ZFI*tq(Fprfp9ucCDdzE5-IG||Byu8_X#JLv}<^@pTLab~Av z+VTkRR;4MnZpS-{Y1>zTJ58!-W@av1@EAIMaJ@hFTSP5L{u7E?@P^zP!Bj5H1qD0| zARfALR~=%U5sv_!8zaoK_|q?VP9=qOf8NI04`nfb>o~?N$udwGO-^tKY4T5OX%B7M zxZ8Rg2^~kW>TqrtGG@twa0XchDKjblEk~HEBTInRb~ca}7rhQqX^|bbvQx>pu=VN? z<8@sW^`L5rVQBf&lvh$ze7X~Yck!sNWu%FVYox6>KPw%HFHIjY%$|YYf0&Fe?v>cK zPF;yr=1GOA{vYjh3XaGeKpF=n{9rTsiX#Q2!(fg8w66 znB#<7ZnUvO7&w7|=8NGzNNsTj%T-EVMFum~a#WoWc`%oICuE-;IEn#g72a)W$jNbA z?EcH+`kSJ4&o$oaAlTYUrYBiX7}Kcll=ca6$Dq53gf0ZUcN#27vEvcm9K7UZ0`=De z5xPrKlvFMfTLJ&*806>DI>jBstQdu{6@ckIVj7<82`xMZU00z3f*>NqDUSW>>8 zLrM$BN9yhh2{&v99j&j7a~4KUD<#6Bip!VOK)^uB`30AJUIGN8)=_gdiC_f5bJac7->krPdwJ9r$N8Q67Y@57Bl zCDNG3l2t=NLYhkKbSA{OCd6ZryjsNUrN;hqG!;T)2i5=0$F*G99-^MOfNW>Sqp5Wc zV>7swCdHYXb00rGcJLOzmnuKhK@K0v_~&gKWRr;0LZ^6@`XA~)|KeV1exVG@RFe2U zZl)d2)>F{WLd)X-i$@3#ipeGZPf^}E>lEjHane@!_SQ&QQOO(9Zo&fYBXj=r`oJ*! z4n9KhpDk<)pQk=8`XR*$&EYXo0hu-c1lwho$EDJc<{$qQm3G|1M&@aoiA#1Zh`olE zt%8=~%+By)&`E<7FJ$r%aFZ8_TrzIVlXt{vTbm{K56*3U7C2xynPR&BPm+4_BH%Wb^>zloE^@+ds#jmoPHkSQF$J^F}gonRMx#N^EoN!w$E{qc{j*b z9pR?w5zAei<;pv<|7hb~f=)5zW=q5N*(JcUeYE2wsuB5MG74rgjYJMAI*Lk$m6wlr zL}w`Q?b3Rla-j}Dnap#KXJ7SN@M7wDdQ|}m^=uq|FLGe0;n(Vc2^z?WEbRVEs=mY$vvI{lM=C z?M3dr^X<4Vk)R;79A*m?% zHg&37RAxbMKGd{b)3Q;0hXWkXH1wH~B5?Jr^Hmmj&>+eBrycQwOQ-ys%96f00A(?5 z%O!-36~}HNbE#onKv@th6M^1hH-j_9D_B9p_CrRTMk?cUe?sn!tb1HOi;bGujZOEG zAzt98#V3B|`gOnwxYu6NiQwEd!u*G?^ttTEJpE}=L9(&~P!5xaek@59wJug!C5ucJ zluiAxRgKr(LZ3AM*O;1<#>M~(>T-u&ztNnXw*@;lvABM!@a-qN1fA8$v!raF0$JXM56vBdnq<9DF9sg>BE-}|Z zSiQ!PMhw{?MC|9eor8gNG)@#BYcNi916kV@E2C;3pfn1$ z&i20)J>O%25AL+jRfToe_cOXmzb04{29V8M(Gk2-6&4^ZX%z){C@o$RxSaVryqLT4 z;3jZqO-eK$E96&y$U2R~R-#~!a8gQ2QCS0HJ`7r11>DD`_Z=camBvDm@pt^yc@0&@ z%3oLX;(K#f2(?`ymq_fr$+4FC(^K<=(EUpqWC~FWwGd?Vz;gS?ysZU8XSgHuCA0Ox zEoNKb;^}#m+p4jw=-Rre|1|;HD_^aq)#{inOMgeZOPfE6O}sRpLbBC*kL@uBf0Dw7 zS=kg74>wW{0S%%Y6NUCHDuW>h6m{*zh3VC zsxPRv89&5a`31@7bIJ6HU5zltMeBqCdn-@Q-)8}u?xN&`^zsb8beRkmR_`)b9&)wS zOdSUr>x>!+p*CR0kF7Uk;$f#kP#b+R3Df0>i?)Ctj3RE z+~SROA1h>)7BRdP!d1(T*hR7@%g+S{_7u98r#j|+1n&ur-os`g3QGogpFB1vZOspF zi5Qz@Eu~xbem^&RXwco7DXW%IMMnU><6v~LABSh2!X_@&i>#G$?M`2d!i=9|%F7tv zBPF!(%@3%Iu3{`Dk_+MnD8FH zIe2Wk!+E`VHhAeHH|IUs?OwPeK9y*=q#ik3B4JI5Zt}y)CD_R(xeCdPqcXHJsvyow zXqWuh9$VKoW2CrgELNJ&EOTYsySC_~AGIQDX^vkP%Lt7CYUCn)X(et+)gMshACSRC zw+)f{|6P|ZDDIDWZc4@~RWc+es#atkGXyiLUAxEbAK1 zYIjwykc#C->jS~LeEG2$U_zjs&7=R*XS@CQORM? zMf!c1vSki=^xr?wI93TM0TgSc85cO`T8&c1UK{a)C%YfVa`=qWg+ z_WV_r$dmywCnnaWXvu?vxs)o%g=28xGuSL|oIg%$|1wTIvpjJJDE+r_|LZSrE3u6$ zCf6>*PAn}3t5^;tk7elEEi29a>rzFIdoj$j+^Cc^=Q$`(t6ol?DH_8uwrUG!!YVb@ z83eic6?wcZVZAFM(81<02{G10#z?Y=Z3Z<72f>dcq!5ia0z?31=SYb`YS3x)CE^j= zQ2xJ_LK+TJK!Vohh8kC~7mlI!gXKDP#h#JqCtT0sDA5n6{?dyWNdA!^En?Po#wF7n z1xbqFZ=YxlbE5a!r+{DAsmv&GaHmECDXHK7GEY=YB8!M3WJ+XjS84cmb&%Up9@B*7{d%w@sl_aw#UB9pq zVh57_=IBAGsjT!DEmX-~D!>6V|9B3n|~7c#waQO2)hjZ9@%FOXw|0-YH)D^Oyj3$2jJR^SUNYMeRe zP=@G-k7}o$7)Q+)fd}byh;EK)i0eY zB+H6;av8L2q#7Ud**xNfTRa|Jm-SLk4SMQeeQZBGyqzN1L7ZE>2GH_-NOE~9tx-}y zh+RRY0Fgo!9dAI@f**B^MVumw_~3+}H`;BOh950*X2J}+Io=Le7ARZ2R^5QR z9(F#4#q}M`)qe*Nul%i8SNQD>mBfa|1uMQ+3k*O&+o+LZ^_>jy+5*Ey6l$B3j+xm7 zV!}jhN|Tx4xt5l%H!HU}VIWySQd0c~r08PtDu>UITr|&q$jh;>^AOf$qyMxNP`V=% zpc(S+>>no4REr8_2ta80@-xGAFDIV;#T}Bb8~D8>OAPYZwmF@`i+8L9PK;njEjg0n z9POnZve)gJ>igH!?68p)N%WdgwOlVS%Afy0wbUIKFWdP}+3wId!5sxxQ7?AhL)q+7 zC5dPFb&XGboL*=3^`;5$l{xdgVO9`dfvdx`nk!)BP4naObkMLFa*HgNOWAp7d-LGzk=UOM zS*fpwDXO=2>?t1Lk;_A%JjS>y^g|vF)5kmN<$343JNbI>)h;$}DETS<--F?^AIq26 za3zG*q-OX~Z^8!4@}@_qs&*L&<@H9SJ7%(0v@8y_Uy`@kq6bHv>qXJHyt7uFbckY# zJ($`RDjDmJQQoGqwbV3PgpR7~QcXtX8dvF^;&3Aq+r%}m%T(RGRe3z3`(VFXOfV6` z5r0SmyBtMqIQQiZzmaoMfCaq~OVs1@SDa>ydR+!Pt5w6^AH64TaOA_1#sEzD_n6FR za-*AL=_t!ioiRQ`!*v__Xx{;wln_79UjF`oRBm{kR&@h+I6N^d@Y4befpt5o7zLKI z?^@U_1}&-%xd$D`zKQ$O8{`?>vPUOcse|i=V|1a$Mv3)!I-ABjCVz#9`8&#hv%X@+{r`um zuZoH*P?imZ;7*W1gWKTl!JXjlFu1!D+y-}dcXzko8r+@W?(leby>s5WfAg__X7-xu z>gwuh*^R0*c?8}Kv+K7aOp2g}iGnvclnwXj&DZ-FIZdGo-Cfyou{Q4j#m3R1k77MU zUvW-<+i*={!VgcUZrnYbkdezLBGG6wS1-$=BN4r@oP1QJB)nQ9-BTeenk#jHcWLU$)|JioP&0wu+X6E^DA_Tr z^pb!Evbs7KOfd&&OFf(IXFaQkX8)vVQK0(c>T8!b=Q6p(kU8(H zNo2sDkprFHiWI}q!qCOY!dhD7$lrE9_7Z& zvqxrwF)Bdh&}D|(|0CV*dzO+T&3p!pA}v-YH;Ct_Izx$l#EN#{XlT7op#penJsi(qj2KD#bn_tDUgR9)@f{ z6w6b2C4VBdrCGkDvJ{n(*J?k;MlzJHf85zNbOf*kQ&&MPb&vQHeLwjyYSyfr=WRW4 zzgM3~Fjc-CT}Y*Vq8`WQ&S4~vat3-5no~VQr&A~^{FH;nHExB6H0wI|;%GWlpHWJ5 zm@=3d*IIYBN7}8zQ(E~9y$}6%Kh%cbxQxUhpDJt%2G7krAF!{4ocBMf8#B)i&^*56 zmOQ%sAW>JCiz1Rmdll4MioQhXcL^65?G~$t>J%=?O%{(Z8H@%)+W!L@gcy@EpU`X9 z_dXM7Il+BgHZi7Zr~AK|%VU(rZ);M>S|zVa;?Qt9Oq9$zGt|IZxKY(=0_`G1hwBu5(tBk#$ZZ8@DJzgA_y!IGfL4ZM z{XJ^GMrmWq#5uW6XNQ?!R?<_OOvhsx7WBQ#PU9+h=~hksF4?RNsVb?tMP*qOZ^C}f zkY*o&Us_L2-M{C)wnp7KO{NnG+CiuS!{aG5oGLfcu(xUl0p+A{W&LstU?ib3Br?ev z#o)%tw{?6PYQs*ZdMv#>dO0SM533@>bA0XQ@$&~SjSHL>Y?%w-SIg$h9pi5)jM?6A zpVv*^j={P%ut-yuI7mZ#1Np=OyGqdM9Sb=zjOK|m#-~=uTmJp;gjm5 zIuX}(xh9CX0gAz9G^v5%J_SdVF=l^$_r!5Z)p9U{nFC0#Vfk|`=kb+z4uw6FT;HPf=2I61e703XhX*MOP2dI{hPAZE{bpCO| zl~(m#Kh%MKQ?6^`qbW~lk;LJc`C-PKbBId)GD65dfZ9_{>r~1I4!&M-!uk|`x!bEZ=&sVdASsByt z8F}V$1O4TMQTA2m^dcA`sXBFgE(Jux{?n(%+23eb`*jmSgC@0B^_op3Ej#t@^1xqR z`2Y7WH9!Nhb?LhZan-OZBjer|a}Sx&|2E~)eaqHYcNr97w9HxP9;8~v!b}${M$$N- zUjq~zr2iyDfPIYfC_)F@>p&ZJV_>mazCB$?k(%kU<@L$I>TB5(#Yr!7^`cEqWKnix z%IKvhKka4|sa4XtC?wC8hSz|`iUAp=8EffC3!!!@3{W732rf7L*w`DnPl-R3*GPby zJYfcf;`Djm>mBTFg=|0ImG#uue{aouweR>+J(e?@a(V4P^TZTjH?)L<{wO>rSNUh? zt4T48j17=6JQ>dUn}QYl|8CPRcR^nY6(om19EbgUlDI7?py<)O+`rpEiuwB|6ts10iiLlQh>B+Ugl|Sx~){4eKyk`;;z@Biw2t#HExe^^ac zPFTzyQikAazpXsjS>Z`b$=FQK1Xj3C8n?&_$N>(6<6z#U(NZR}6PSkd8^rdhnWrS` zGk|JTZ|0suPb@@2ElW`mC!UX8Aty^q3$MvfY6P1{`0?Te^~CwCz}gnfcjm^H1{&;r_V(J*KQ8TbV6@aZvAlew zaEyAWz;S!-Vo61Ntt9cdpe{GwF&8^tqQ#p_9lXt2)XmF?^cncf!95M36sa9$KHs`O zebjF$jq>D?UOOlM7fel=Jxyv16ysl#m!zDBVk zv(%VkxO(OIz25{stY9`(TarX1Mbl{Ki;D*wYVcte?=GeJvre|uv6{Xun6mWbGzyte zyC-?GizhJQ^dBBn_Mo#jolI0Gey+1%Mb#V#;6*~v&1jO`WB?~5SR{*BSG?ZQV6R%I z5+W`<-LeAc228W5jS$85aWdmgxg_IarA4a{4{oBB)Y+5BYjjW+@2+(<9liweV=BY*QK+M7SDWpsA7iV@f+u75O2ZJnYCn#tARCqpK^l7;sBxIEF$kcn zBs9%r+Er(kMuNtZc1y4rQ^w{{-`v>y*C@My^@)pH>7S8(C5|#{qV{b3gK3@#>0m^gNW7F|@7rZ%!L_Z*nB`3oLL ziGs$Op}lqd2}jeCy5%gaRoOWaU!@@{MRsh6x5pV)u_<5jVrWQ7GmZ$ z@^bH6b`ObP_ zJw>uA(aG5({*NFm)NI{3ue0U;9( zrbN`Fyq)~T)p_K&kj*Ulz4ZAuczEcZw&gGM-9B5DcKhhU0Sz7kzwD-uu7br+xtW>Y z@1p59yN;9e!5K(GecHKSBEM|!KqDmJ;jt#Gwt9HSpG}abfv$q466Bx@;@LAJr0-EQ zwQnhI`!hHfZ<^p>?Z%dTBA>6Z%JEiDwKa@g>*Xq0eXq|@l++Algt*V<1o*dh8U`gj zWnA$wq=dsG_f_m^WRuvdYr5+D&n*JZihj|d>LF4@LT1LvhB%=-m}NI|9!Hr)W8}$@ z+F#+FB>XF{_=ELVvXAWRlY&$j!bphFAu@%6Z`VHA^jB&;~x z(IXB zuP_?WABYm|AZLYb$+LxxnGV|Qn;@rAxiuUdk^JMQ%vz>W1Z$J1|5k3}g7kx&S%%BZ z^mR4$K$|;*QwsnC*%9S^t zG%Mr6IT#?|Jmq4=(y@|OsKb1VNZ5S?Jb2ofJw2xTf3$8--}WqqQz_qy$L%ri?0$== zwZ3VNz~bQC!`GpQ#xKGZLEqcgN&EY5mpJNxpe8RufGI0 zmHIg|<6LyyK|*-4t^6x=;$EFTIX&e#~B0Ya2e+b znb4T@R>`}T`3hP+zA=iCDm26)yADTlf#oEf?(;(KE{d2CubHlZg6jT@Ws5B;>w`^Q z!?Xqt@)&t`A}6PFJqQ(wsiOFG2{fELLV+JZnoncHTx56N@j*cIaRP!ZEHO4kTxDu1sAcdXjHsjEH}dAQrv#> zFCFe&2X4Py*iGYJ6=bvIhpXrs9RIV{eJPw;trJ1z1pd{|n4$~7BUKprUGo5*3wdz= zczAGkTHCd!G)YkZEiSKR^t7_7?R_TuzEMt6hQQe*yZp4f5Q|GHe6102KI2W;<(Ui< zE%)?jEa4d!gCpP9Uu7+?)!iG}>@#>`n>ZacEY;tCnC^`>?JmNSqM zcNz(EsJhneP@p#6_C_F#xg9CFNP_qn%{j~dc7})7t!B709>Q}SkUPJp{=*B{EaiB^ zuC=(iH@OP0VZN0_n_zwUuA}5j=uv7cGfOzvb!z|k7(*E?$CYLF+!~^_Sy<~6!Qo^5 z7}OoAL^A`6@5Qf{YxJEa6;+qF=L6MpN}|RcWs|v$8r%)e5Ynt=)1KEu2lDb z2Lvdc^I6Yj(HLrH9AN%SueLwC<9K`tA{VI?Tn@S>GQl|d4tM}$^6zY&!+2ee@DmX_ zwcwZr0wj$5_IFg1sp#|!ZVoJ8BX6I~<%>^Sfo}IV?v_8YbNjFY`lH>iC;9ka_d-#k zpKo_F5apA;x}@qGWEP(BXt!R`bYqd920>Sy_?)wVGd?r#Ye}j2T zQ2@sNuGPN8b3?GST6arVJ2&&8#4=LMJ}eg|KMHkn5j4H{O6Y5s?JB^3IPw(g zQ05xDtSF|Mr2s|5_eJ=_0TF8C5B_;HBOLom}baUd*!=_fG z&1rh#o}hJga|HdNEetf*h0^`6*thKn?%hMjG>;0lb9zV8JH(@D6xb9L+7EZa!O$V| zvO^{cVs~X9eZ7uEx8R8c|9cWEgG4b3oRZ-`w-{Y&y*hN&4E&iF8WHlsqq4KPt;+ZJi?!JTk(4k z+z_c`U1azx8wUOef(NW?vlmigXA2J5NQfdHf;oifbS=E zehU(aBK=Hq7_E z-2U0)_G6CKJUulfuH-mKR%cIp;$GQVT|t1VtP>s?IHX_Cl5%p%>!WsVfx9!UdG#to zo2A$FwDcb@!J^dRVb3h@F9G0>7nxahEb&EY<&zq+oU4A#q7MbyPi!{N_{9}IIfE0t z9Jqc>VcE+}s~azB>sP1qTn?kaAWb)>*C#YxW2ueML%-QBQ;+s08^-Nk#g+_g zmq^>ZgbuB;J&6Q%d-E}n<;0ze;uxD5lLA0MU}oF|JwSJf_hlf4cI0T1&F6X)o(D;I zA3;g_E1dc4;j2Ch9So{mD0H~Jg@J*cZ1`yKz`D^SQO7TpVb-_BD_R~a(e8Dwf8uYLDoaUb=#v^LI_{ich&YEFH99GfT0}s>6=yYoN!=N`w?^^Vi$gp}Dw(WGuEce$KV} z)O>@%;WOKSxd>@Zl!vEB*Td6HxlnLKeTT658~373_aK8Uy@TgO?hd)ki>VWlchCDU z(alUe2BF?$%Q(KWeDTij6ZJnbvBhv64sstdmjepMvxZ)t$$Fa_hV>eK^%YYGbIqw= zOHk`r#5&PjiGD6e0q%^TS4b)Qnc*`L!-9)5_|3Og2yI*3MlRM7* ztaIxjypMYc!^IeFETV?mfefh3;y6JHXch|wPS**ft3%Plto|G$SW@%Y&84>IiVOrw zY%H@4(TEI3vXuD0zWbpd@Gp@J9>LWw2j0{Juqy+}W|qja^)17O&S?AR4OpHvv0>?+V zY_HdZ_FtO3oheMAKQ#$$^o?i(V&Uszyw*(Y{#`!?aFaAreca;ubxW*sB*=eh^N{fa5y^}6PQ~cWjFu&aG=6BstnI_m~>zJB8=P~2-gpRCmsjc1=@u3=dU1yl||Cz_a3ut0- zvO^)`IrmzYBaiL7r#}eTEaf`}fyqZ?uBn=iyv`9sOin$0tOw=!Hy4Nk8=XTD5@;7vltD<9l*b@(}YMF`$=v_8UE%Dv;mqmMWypIp=FY{)|d=BwPA=ohVX3+zpmr zZCwr1BNmmlL;>;QLA!>#SC?yIQn*2h+X4q-g0RuLsr8&`u|Yi{>}XSDsk?T&=1>A= zdEaiCtz$Vo6#{}^PZQJ_TSy~8G*3ExI-4LLBj-?I?N+T$tPFa<0OJI!eO@ecF}8V@ z7}i2~|Jg4+Ewp*w08>vOJ6SV33rYU*0avGsl<(i%LtL@k9XAUJ@8r8o^!9W@9=6^( z6bSkvbGQ8>F`#)8FHt*Li>gu!7Oc^9Yr(3d+oMcuP`Hm-{wRI-5~AB#_}JgH9GU}^ zV|a+JFXEOvrN(QrTz70QRbyqAN@Yo{KI@0Rj#k!6RyiHlt#`iwiwyAx3ynfIpoNquf#D^SRQO`Uk zj7k1+KSlk=jp>@%SXkcC)-FnJ6PvX{CgnIil@eLh5dG}5NGY-?C2X6Gv>Zf7 zXO52*p_txkQvQHifep+Efyt7SRFYg%Xc%H@8ZWJ6P+DIZay3|}YY9DEn%f1GGv z^K}`z0ve@QD8i{js!;)@<4rMU1%*r(NKZ2{j8arNYP3VnMi;%D zpC0HY(IpMl?9{3hyPu@1HhluRJ6i`P^F#+Dj~*tu-j9R{JA9+tvEe*fBGxo|kVoe& zke&77Whny9C0O}PV`Ok9942At6-?^eb#iB5QPnr!lk=H!NF}ClT&>R|d+iTB161kM_UG(CYTfiP*aSlw>SjlXy-LG$a;H+c(jU7fEkZ}do2`kq}p-MRLpOc!NlZF zBrH3PKAB6rkPu5E%FPwGb&43d26=P}Nm7(-5bkzv-fVqN?gSj0&k_ z`xEN*eJ4}xDd2=C`DRO5cAzWO_cTaK3e^pxc1tM8X^Db<@{yCm8dLhCac%#KW@h3UcNHbveBgPnQr7Fe>zcqn`)bP8KVHnHehwq~3K(wO#Q#v> z@`>$zhk*DqC_2zA*-|4Q({_JTilr+!Q_pDk!{!HfFRu~v43>JnHIGP#Ekz7;EegbE z;(Cqf38$0L)ZWMq=a(yA)1i7#tzcP1Uj~FR_bIWf4_($WHL`mLc{gD1h&=S zjK8n-B54$bwT1TIwUWa*~ogi`Gw_pb{kF0y))bbV2aqFjm`W^oT0 zi%6}=3SeFSiW3XCsI9ws*HXV-r_iK+*~9E_(!ZZb7osfbXk$M8^za#iO7(IpEGU??BCVEbL ziOp3II!yili&=^mC!8cH< zrJY}=Q7H!7x=WBM_)@hD^bVmRW44P7QX3(pYfCF;=1!ngo5p}N+>#<{J`Dop=OjrW zrbJ>4wtPyM1eWYFogy;Gfrn+yi)gQB{2(Fb@ge+Sx=)$-R(yK&G8%wbD`iHhGr1$( zBz@xb&J}mzBfxO8`~kv8bEZ$#bx9Fn%C(Oz1g$+|MLy%=O;8}5IPX;A!qoNwEat7G zW?PLH^i03ArG9px_Xul06c2NRcg(XR<`AOpwHoM)5J@6wD&?yfW=oRP%M$IbnJ5TZ z2<48V(M{s{F}&r9>jgbMGAotkU*bO(WPmYwV0%0uJGQz`l|m!>tJi=rW8P{eb4p-A z>!9&kAz#{Tnz42%JUAQeVHGU*V4j_6zdGEBo9L0bzAuU;mG&FJoHpHk%9Mtj zf+Tm;NR!dD$c0~^j2}{1_5&qEB$M>H5<_m4Z!!cROz;!r7r8I`PbE98*v`4nYTAGq zmnF1lz$jz`lzsh_vSE>GJSKkKithM@RB%F~SckjVTEwAe#yq56a&o@fXt7y+y=y2E zk7uo;Zro$tv^S#m4$O&fzYK8L$ePaPE?9s>u;c;SR@ATGY9Np~%u=2i)kMsAE;t#4b7k|bEeeGg zi?!D2L(L4?>DgL|{-{fd6uqO|C*{ry$z_}h*UaDU-4$H=4FIn;@& z6>7h+In0ugr;(G@Qg9IJ8pTR^hZTAx?gxF*3;H7&V&E%L&SCUDWT0)hWz&>3GQ~K& zkhiylDSsyBCW83vGl}cI)*O_3WfY_nMD|7m2%jr-JK8 zTtg8!M|h?h(!Wc8<}=CZn$srKx>B%QarIIEp!+Kh-!0gXg`SsQ)iS|q<~DLNlw{7#&#`~AiJBtDX0F{R8c3{?G5Eh_A$i89fR$ryc|xt? zVu3^ghl`d_ocrjp6B^)R@GwH3_Q&yfESu#*g+{%}MB3+vS6SkMpXzZ^`n7Y;Esgu~ zn6&oW&f$eW*uM3vPvhQD&Z7t2E!LMT(3SZW{Hq8r zmi}BE`}C;b9ka6f<}sDYmvEd)Q<34H!?x=fW$0uX*!C9h`aZUf3cnQe zU_nb$CmD>eemArJX@0GZR!pvjF-XR?2OS6jQoXU*954TS*AmBMTxbv{^bZblGfyE_ zS;z0g;^zc(yTx`3R+IlvI8FFb!#hi2s4mhYRr0f+=;~XV&;bMXWPaqA)$4MF>{Nvc zWn|QBN}wOwvK7b)c{SbP5aan7R%M);I78B+Wn_|7p^5qmI0vmg5|W)33MgclFuccN1B( zp0(F($mHg$o+w%@w0L@)Ew+FuU(s5;S>-#f+iqOCYg)47Va>!<0K^|52J{N;Xz{c0 zKR1fPVXuFx!wP^o2(|HAawwha!-GLV6Ay3 z>w?Rs$wRQ?snsAH>CGEr=Krtz2mUQi;ER>%)=N2L?BVLZ^=jyCJ_GZx5s0@sW;BQYu1m=#sR@(fg38d`Dg%1Iwqn za(rES8(7JDvGU%qULI_(H+>!A`;kgg;Ks&`_&=JnZ*H%sr|J=Arw@(o(nCz8SF(mP z%Y@khpW)%(`frTrlK~k2_Rg_ZVO$?)HoJ${`{~;gAqCQzU-vdSpLU3evWI!Kl7OaY z@pEYP0l2jes&`>syi)b(Y}XLmjN1{PM-^_8d3my#FxiXTCC;|Ko^Sg0{MuaK@5E!n ztqQ7S5n&>g!@9-(hK&%>Qi9sh+Aj z1)!I9QCxEFS@7qLr`nels|`kbS%VJP`Kq+qZ2sZvqE{(DC|_y8H17 zAu9tI_WTRZ68W$nDXO237d!2~nX41h&W_f)?LlB`L0n)bAb{T1)thFSeDE}Ixcfar z@AcF%s)|JHiEAcrxc)ssRUr3d`BW8?i?G#d_c6{!Tye8Q_@wfowR>@2a4NqrI3d26 z_vw}TWA~83%>h6D0F5TKUWVC3QgUa=jL!sXFk=F7w=m#yxwzfc{qomGU*;R(gO)!b zOZYN%Ksf@xJNv#NeQxLAfkA-oSU)>-d~iZNOVSwJT%9qo<w#sncB zOxZGcf7w)~TF#yKcYRB6d;Q=>(v*wvj`~kRA-8cer^);XKy(%SL0IJGvcUt4?^LCy ztGj9W=!BHq%m-WiuHJRTu%pitFC`%&4zJIU3aP$Rw_I^i?d@;7g~KC@-rWd)4XYU4 zNc|5!c3ZwJ7}w^fD}%^z+`eo7;ckxs%5Jyuh%rF_H8+ay{~-2%pj%zGpeb;`8CZrH zAvTdm-}8~V4{sE!`b|_+-u`iJpB7xi8m!*07~t|v?nf1dny!`{yd7B+=~Y1<6cAAp zP&Xe#&nB9eBJ=&@U1RbF17Qp2C%oH=3=uI*TlVz>}`wvu;@;_=~ z7X3W_O|=o5a2v(Qa!S99kHu(XtAGBCE8U^J(g7n9Sj7FGu8%pm8AXLQx{gTeGEbO> z6+REPbasLrx99~)sX^T>5ro@Yu$@Cdght!Tl_AqiKv(+#rRIb42e-HymGCvi~Zo=dJM5QxZE=N zc~8!C+gj%fYyRhB?~P@Qc^oEBg?@f-F?;uMJ67`W>K5Fnk_-m-b}i8h*xBXK`h9!Gv{@!?PKRInE`2iw3am$c^yn`b!7E;${g%V%asZbRuux}AL_q<$!{7QRuWw= zh+kaA2XGUyI}R){dI0R*F&&*g^Ykf>4CT`3Q-{Jui>S*XNwpi+drEC;8fhidF+a@& zW@p_KC>KtTwyrcSWxtv@hi_jH1)Pr&2166H3&-QY?i3>+yzT{~YBlmTC%4?sn3$9& zH@Oe1QKx3PGRO`xGQ4aw*=NKguH9M6W)G-DS(g=Aj<%YR zkE~r={yL$CT+F=n{&I8e9t$CmWDy`GC^*^=4asih4BWE_nTt}P8zdEG!><$+j5!dpa70TyUyFyrYsu!7x~ zSb#yl^1BA_*B@Fx;)$;FJ8@BH;0E0%bKD254BW0A{ur_w_&AKaRS#3O^{YxNQ3s1e z5FhT6i(|-!Mpm$ypRR*G)`@PmQEV=K*Q=H0ykYfb$BpXZ9h2q&5m2)NyHc9}k3s(r z9{Z9*3BdxX?SYb88uO45+v@entwe-(=OcMg@R!fSYuyPXtRIN8kWayiC`zuzj^wm~9Jl(x{-_^J9A6DPLjO{|4X@>UXC0YJ!?HiGvEDE1!JMg2V z)(!*p*weM)(W?j7e96QkJR}d0U>ZTng{Z15v;as|Xn4k6L!wM$pK!hTVH<035Wy{}UFOO6~% zcHAwSep`i+CZ*^_gZ?$zai0d?|B*`4vuN;>QhpM)A0VC#$M8aKKx5|>GN~J1sYRf<{$(@1#VW_c)r?)JDbrCg z7Bvm7em?>GE%e@GGWT%$EefbxBr|QF&H9gJOsQyvg2kD)dDy0UZ2gUU)fz<{9@rhQ z**d)a;w*B9Y|G%lOxd%!lIz$qXoA2k#rWiKhHh7r`P-#=LuGWtnADbjCF{O5`nszYl^#q_OJ3ijt=uUu`d97B(QXDrxvO`|6>Z77N^8NNt|@ zZodv@4h%k~E2p*N6V97+VG-+Td_*!GIZ5(!64#N_q#70agN_ZjWSd-@T0ZUxU)rs4 zyKWM#wbk{%sYi+btuJjQmqWt{ImCba=Nls%&2{d)Wb@Ztg)Yrc)`r{3=3Jqi~tW zP^nAFMdz@tGj=V3BSU;iP6!64QwCsrA9LmwynG4H-=%#vkSfVZ_|cdPL_vyfG@cf8lv_T*()(0Idl)noBkj6J`B2Dom0_=$OpN$=Y%(Sk^>F8MZ!| z@Gu?pa?T;T`C|ri#cH*x)*zMGcdQAM!U4t=`gak6=lI?Wc)9+QE zWRrv&ReDDEUyJqs@dDgY=!z>bzyPb!i{g!fiy7Qs9eSu_B$*Nt%%tEe)f&seUtPXU zIhVSlws5hq8F}3UvR}KusipH}Ii*>#(=?Q0RqYyTaxATuOrN|S+eqlojm=K zg4rxrO!Ut|luQ-}N1^-Ne;{(c-$8e(2G~sGpYOX(LR&R)cFg3iThz~3&zV0zHVup? z8_zlJ*hy#`lg$cq>8#}pi~Tj4n{%U}WPk@N_1h50mjlhmnCh8}#)}57GJ5NbX~)eD zzt-)CmdL2<$Is>LSl4tf@4ruA@vMJYjFTu2hHE-<@>pB1WX@{I&`BI$#H#*Z!h|T= z52z$;0nfs#k`O}OOSMa0`a1}9bJ_=1T4nA&kKc3N)v^Y~r>Z#wKyQL&nU1U7nbh_pc zF^ohewdGLpZO1ISU!kLq)lN>n8oFGaG)ccIl*IO($h{JYtL)5Hk+d_qpM6R#doAOp zQnAs(q^P@1iDj#tj$4o#{O0$J_nWah5QF{W3d2*e5GV%mwU(e}b=)g9si2+UpHdhz z?3@xW>#9R*?9=EoJK43G+Zwo4Al0^8)6JZdO>W}oh7Bo54OtAjGA{GFjg5FnHyTt~ zth>I-q~;`f&+cR1@ETa)JK)Z*6Sm`JCT{(`vL|2i!5%xatM^F{XuhQ(S+0-0zvG$| zPZ>P%PyTZH`G{qW?~;LIy_mkF)tG225@fen@y_&B??lzF>e-UKpUL0KU{iwu$x4G> zt!c|U>}9O?p2{5mT7(3VTkWTz8dg-A*lD5E;N#79%==!wCLMJ1S+Nun7zu~Q54j4F z?f%Kqyo5B|KP=m*EM3y!oBLVKwB4ZHX%W>#MD+E>p--_(DCj%M4A~0cr19qBWGQ|r zUui(z+I^C3y8pyX{ph!KELmT+6Z0UXI{#fLWj@KjbLuq2k?6Yb2mW(YUpp2kA|vrm z&k5~zwjVdIq*=X zTo5EJXTusq9hOM)7dNuE(%Ifp=if^WOpe1Jl54j~CsqSD_sQieH0dG7PmGolJ$g*bCC2>;m4u-Pt-TZ6d2AC4nA~lI$+p(|M7C+LsZw@ z7~TBdr1BC9$2d2UltDxUbqgY5%lUH5rgp+J+!W?~R7lb^PMh?JvSAyQn>-oyl2FO- zbJ?FM@^@{r-Ndud)d5`_q0NN5si6ZFlVgb_`9bANJ}MOneADClwI zu+^e3HKG@n5UvpU(#p)m>T)TFFyLp>Nx5jZr&sb{QPALIY5;VQP`V*zf6lkUOH>!6 zP3O`VRL*ZCnJoP2Ec`F{#j|8b=QFFVBas2Z$S5dW9o{_~=g`d@7FKM$+FDgB3*|W$ z1lrY7qOe|=u|sJ2$);dkpI|-fnT<8W6aDPB)#hcMj^4^MZ=3ZcgW~f!IN;g|PPC{|x01@(dD)Nco zf{~aSiIm+Ovl$o9Q`@^1-~0)iW`9cnP(vIcUAK9-vlBPjJ0zMC{@rqX2t!XFKyro4$M2!@tlVRT0c`sha<&}r0;Rj2vma|LMFLctR zPQ4=<;Vpk8Nkki%6*~=jrGKDu{*=WbK`hl26-+v4Y(pN+srNTiy>y^?)Wblit>nB^ zgX4RSw%WFgHB^aM)rHf2K%sdZ`*qUMzVEOad|z(BTbLtXP-u`Ljs8{VmjXQt{k~KI z0um6D%;IlGHKSC1T*gzwUVW};!B1zl^!vuG{iU^)yQh$cGJGJ0Th>j4^k6qpF)V2d zr(H`Yu{~4EO3a?T{@kj|QnjWsJ}hHF1bCTq|1f*-^@MYG8Hb+`imYqG)48{DRP*e) zs|Ks1>uUn_r`13J@9Ja_n<{_#w(V16{UX~gmn)4Jth|3)kM_B&t1)|6Kj=vD1lB&> zq%EFF@4=?$kxQ02LoDQ{1a83AE|w`t9wGL zQ5a}oAE#}Xg^UAIG697?CB1!aTGf*jWJi^1Br?40ryK9@g-zeEB;D}m^BNf z`5*5_yDJ29>SX)q?52Mc<=t8CPtKrsJivAM3}+eNaq6)$ia$z{ovp35-S!1l@m&^; zQ%4p-v0@3WEG@dYpE#==azCi+zSIQ0DtQ9YmN=7SwPwkUWXs7@PFjFjaf77Zym;?OlvQo=XLN9P-nm4 zvU0DkdDx}9cNn{~3PmOjz{Be(QlO1AO#RtQagyWh*0*aKnHj>;N^d{4d1$F!%~Nr- zFH|4I^NNomQ|Fqn!Zv7Mw9naTggsHoX>57#7I4E=_NOL^GZXx%n{s0XF{7l4OXm&x z+cANdahcO)j-s(-ZsHQE3AyzuwZ8OS*=w#ocH%YApb>BhVmit=43eBWvJ)r~0RJ$c z2hU}6Ks6sIEZfelTYl~)F74$oD+83u{QGcN0o>Wle4e8>A4r2^6yCOp0dlG6x*3eT z>2_&9)%{h2nN{{AB~wpqpQh>6{PS*Wfq1Lv-QKdb|2~o|4S3M0G0h^Wzf&-%LDtYP zUImh6FyhPMfQw`f9rO0%o(u(9^_Vjeagal_BaQN8a=Q_0Vye$Vq8oUu2=FiOLtLrL zunSC;)Qy3dCCrq1ldvIJYBYuFQ7K4KoX7WWwVhKN_sE_+)-LRjknMg-og??}-!3}& z`W&{7f0)EkIlwEolEI0Ct69Y;MdoNJil?~NR}3vpvoqKET-w?0SNO#M;T0m7@J6gh zuWf`I7+sH?lmvYLhpn%Uio031O$Y>c3oc=Bw*Ucx%i!+L;O26rFag4^Kk?(Xgo zELgBN-?{hPcivrZty#1BpPA`jPj_`y?b^F?k5^#xgh~neorY%34|NN8WjNt@gXw=J zQ8ELs^MsO+K$RTFg;CW4jH5ltizzK0^NmaewkGi04fZcI9kc%zO@dmd137XB-5;9` zhDRGPgT{&XXC`Omr z>O(|&iF}TJ!$fD7ERK5H9q1ixcO|b~MdwhVhLeX8YYrHu zJhV!5Y*7W%2Nveqbp$v4k(|I3p=6Z)T-~&cSD<|3lIo19soRY9m3b~m0syz`H*h1| zT?egntoCSpxBD8VZDkOOgD^PYM!eFo(>0G{)IA#kqk&Lr8;1Se9>r3VmE=%&0(Y`r zbdTrRf`~gJ7;)!70Y4OFV+L5=pyUV z(H3+yM{npB6ZrXSBI|FyFP!e9-=`$F^HA_6ITl=n$D=D<|b{ZfR`Z{}B3n=DoT!b71tGXg-X1A4s&BwxyODuT`@}PgF}Q z^5?#99e~MMPXFbZWW#!G|ALQf2Qj4>ilU$!T+%6M{yNe@c4=lNz*S+67u3;*2esHg zAZTiJ&%*UT{IqbJAeSMql2ZI-(Q~vt`BUGgr$5{`##!u9rK(vn4dewChjPpBUSz}l!<7x)}cJ9;<3@nN*n8Q)#_2q22K@x zLU5opuPO=AX7bSD+$TEqz?W+^ziVJC45%yd({<98nn;Pzapd#KPh+<2nhOWd%&({) zRStfc$|qBesYGEBsw5uq=QDCRU7x;p6xFVT2pVZ=RtCB-6?n)w;CzQv30MVaIEQpB zbA11(4AFLQSxR=scV%TV-&R^dMj5{Bq}c+6yn#!#Mmi9}tLgATEu`GUW+u z_{7@Ew{un!p;4l8zEM>F_8xu1H^RTfW9f&e-cNgcYg)h(>5QXef$qMN|E-J=r2%$# zhZ{C1U^qmq_jA_swFdv+9e@I}DDxqKmbzjxoOqnd9

qENI^qr?)KYdgVhV(U$97 z3Dak<-KlSmj(Yf= z3%MOn6kniyh84cGM6E6ro;ErVx-WNfZ4KVMj~>(PgL72Q8KOfLhe$~!Al_`#6<0LlkgV~&&IQ}umOKekarsU@$M!(5GeT9pQPY@G(v)mXfhk; z@c?>7VwF-Of6@NDj;Caq0);IQGVKA!Ty@V*FE}4Tb}eEEVf!mQYVfT)*HgoQbU}MS4Oy}G%&0+()mh#&~qPQ|aQn*Bo zgq{g*2JfMvtF9X>gi$Rrlr{+P2`yZ_V5y+ck&RRc}o@{=kf9n#|q* z4g%uMCZ@1p3G%W$pkuPy3o$uZxG=78MZECwr={qU6odI38kj7bVPqKcs<)1)_Kgg# zf~`#!0c1GFT;_N4mE;bL(iI0Y@gXlbzq9+=B4M+^cv1?rCImKm4laZ@%Z0`eIDPsD z@7-#hnqR8{T`8(-r`!ri*nf1fv<<*$TI6cAQ+M-d^hQ#V` zhX~IRI|BMA5R=ap34(r=E}AjG1VNpd+k{hs2%DxtDR07_S)m7}l;I^W`9 zj{f*`FaK!OwT=a&Q`yn!*BIQdk@1GqTWUqaP(R*w_oUXnG%u&6d(nvjv;817IA|$^ z+GQ#1n{X!|EWh4Q2S5 z4~&9&q^0X38KKeuKUq~}C77MG6%lQNE}R_fBY<@wDCCzNC7g}vY{3wW84-j>F6PRr zHvP^{Gt?p~z38yXY=_4)Oa;NG$WVnU4{lspq|h9!sZuUWFPO=Nf?2vNQ;|>RVpWaX zw~{NQwJkq3BbTfemdkqiW2A4`>ZSpMECxGS=t}uuKdND|B&)*VSNVjObNbuxK!C8J zI+;<&+PxZC(`y^OJcAGdQL*x1$y^DHK$q2-A~XY~lDbYAA4zhpk3!@6{|5&bvs4cPo`ZsE61er ze_lDrf2x4BarCv*7;M9GA#4{YkPbtN(Ompx8{LG$kw--{&OXBl7*8(Pj{G%6YaP() z5LPc~T_=BWfal_4^+V43G`6=<6sc@trlr7{%RI}R5xSzj^_2vcs?~}C+ZGc|Hu2g5 z+CRqIGm*}(=TW+47Rt2i6mP&eRKI*|UneEvN@+mr4oJBf~u<`Q+I3U<^ z9+z)fls69n{Ce}&V>zGIeD5^!S<9S^VOARv8GGHT_SWTu1s8qzhk08Pi5nSh&>?Wq zQ(_U36qlCjI}&)V`PH=n=Ew*n5mxhN`E-+bFQ+v=bK%f%+lvrD@d(r>n%lfu=a>y?-(!$ePJOi`4KpP@md zYN0(3-|RcUKtbOPoW|rw6osM!>8o+?`e{W*a<5^h>0Q&S_e62IAki1p-&g)<$i(>D zJWI+uhO~0gN(7!t@pvJ5d)i^M7J9KvMPm7w5e243YSr{|R#AAagNptpW<45NWcV^b zT_Q$g#$#hww_lq+q3d>;-b~W7dZuWQ*`kvgir27P2CmzZhOH&C)8%m_G)*4#4d(rtQea*A2V<}XxpLBgU^_U zbSAqv^0Rm|aU9w=a}TZk>G`j!EqbNOQ@5`x)2W2Cn$gU>Z+|rSAOBEfePnx(;QmQN3{P z(j%tDF!Q`VJD(-UxTs>`y$@bA`F!M9rm|gqK6MF5j+Gi6aN;Mn+$OJfz_d%4*g$pr zy>48lX43oq9=%Tp*csgEDrMg}pg;R)rW9AjiWH8JIWcsTDrV_~D&a(8SE&RNPR22n z4paYFAk>1EY+bFX7(yT!Lwa-S@85@?VlwerRC_(Qfd)`F%oc!Ptn}sR{$H-S8AsmM;bbTR=@7<>CI`(P0EoeOMo)?C-j3v0tHloY8#52 za)dv!MKENiq_occy|1b>rqkt0%bn zKq5Y#BSW5jpf6j0&PiwiT&vwR=V}Exf4VkuHr974+})N|ZLxUpm<`-pt7GMZ|4yKK z>7cr;r>5UkpQ)`2SaM42AMqD6;s&!bd^mR9ls#y(~>4z<>}@_tC=z&i&uNwtO_NtBgWG z$S}yd&4jb8&En#;zneom&4%#P^WX-a0?Xd;Xn+_HUGCqi&4>8UoC==YGM?9-6EwOC z?l|PZ1?sNLt9OqV?j#$v$nU3XTB_s{#SLUfofHeH(66V|S9 z?4G$OGa=P_y%MY2@Qr%O#o;XS91^}B4JzHsE=w!7zxZPBkV&)gpAU!y>#IgpJ_Tap zMPE2p@89&=MtJK9Pqs#L>E5(&*+ac1)EDi!awLJO-l9c7wGRpuA?@7XstFFp!9#5w zXjn8+5{tZEwrc)z^oO1{ErsHSJ?KwVx!X9A2$5W#@&QOHPvsAhE1xfijgeA4y^WXN zZ*}|Y9KTTb+pnBw^ubDE^kzyDV*ByB`2NXag^`rXh;{6gv;7&a4Fssvy*QZDJ_CpxUe+0XbVin89&-B;AHH^^q2u5`C7SqtN43Y;h-%fDD!=nb2 zDh!|60=lj-U#aUi@Ivob&+eS$XX4M6ad%yW1f2+UH+Ft?8s@JabTwWDi5!91C&Qd0 zbh%6HxDZ;r&x(*|mV=Z!C+GX&UbcA;HSiX$Qu#BmshvkW2W;9#uc*wv%&l4a;FID8 zPt#r549(Btw>>`-&}gtK>h(>aod>$}_}rs?d)=mRZ(#KD>v3f3|LWBTK_3MC(qQh@ z&~l#nsMUM-)#i?n*yf2pi$+tr$7!b6$BIJovuqx%g>f5`RTR_Rs@h!!Hy3V|uR&XQuK4`UNCw(+M<2qVn6Cxn$Z{R#^(BbGS*XvPxpZ4BThMSKWKI468D>Ip` zHZdnv(l?Ji2FW-LM+?KI=3Gy6&G>s6(r8>{I4J~r?uulY$@*n$BABhdlqMJ$_34+S zaPdYH6Dw{m4aGXR%4!53xvKXa*3%Aff>kMM4=pe6JFRx%aqZ%c0uUT z;KW~m23*17p@(xX$iMe6Q!fRcWJwWVX*pZkW2ZA)QBH05RoPf31K8XB?eY^I!IS8? zDv>giv~5P~D)Uo^8ICn|LI8f;HkHA=4|a0FBD6^TxTf{|N`LV{T@>qboW#NU)#+Id zQ9rr*m*#u#Zagy1-#31{f_ja??CIh)R11jJPHtS8AET?Am6rDhE8HCmdh;&{c75ba zIp*9|HLS)B&;xTh!`BjNZ=BI}DsN{0k(gJ-q6K~&O7eYQ5x8oBS|F9N^kY`F!( zZgplqa~?&q8?f^5ZT}fYW*%fqVE-JJ@@Mk!?VI=gBh_HihWD$t>|e%<)-sZnE=*Wn zVqh!7dRdegI?BkuYXRE7>~u7-xEY+0wdJ;JAX%DIjiZpX+1rn36mH_r?-@$$VNeOb z!`EWY2N}j7`oyLM{EJ6+1Fcy)e;u;GYB5PK- zj=V6Gm(MR%#9SJHg=MbiX}86>x6k$>CDEggl1A5T=B_^Fn@NJt$gZ}g*;t~@BUOmD zL;ftZauSlP`K488C)_plo_G>z_@nj0JzXW{4h;`WT}MKOHsv2@CoE3X)Vu0o#_CW&O z)fJmDd*Ki=5=)=e?Ujze$qyN|KxeHyjN-`a(}zmQ`2Zzi4ge)HtO`F;qeR8Iq8hRo z|K83K7%J8rEpNtHj-h8g+mN9d6|g5cserWn-NUbM`k3xM8?8hOt)d|!DMp&+31vdT z(+BUy6k!(DRC;?+wZXexkKS8d z!Fjc8dBi2e#^Dl3Cu5+arxq#j2zG2;ZAV57QL1wR=h=0vLBMt_hC;l0^#r>`M>2I1-t?3b9ZI$qDL>S~zUYH`h)V@*2azmff0KSK|l zNzVrv5gh25_~^+shr8;byK=EYr8fGP!$>TmG~XT+mVWaQ?o5wwMXLQtNy<(bz5FfB z*(n(VU z9gXM7DpIOx(pH;eo*b?Hk}S5GuH6f&iye}Xx8^WU6b=_!jaMGd)2UlRg2R+%os6Lja$6};Vj2>WaEx%`ajbKb&4cC{PMx=`B5vCuGvW=+Da%2b*cJjOc zA_a8iIr4sE(lNZz6VAVCXw&$Sf|nkT1eNlARERQg7HAX65y128v-IiqWG~1OLJr0X zFVZtvG{`TK(d=nqyuwSs2}5k#_y7kd9f!5gCjFWom`x@`{!2%#^1;_G$?Z{X|07B9 zkL2{(hXDKtlMXQdjzLKFjp08B+E1CFCy(~$O`I#{e|0)xsinyyc+boyfzdL63Wd<7EB=qW4yQ-8*L#ETqpr{9oQ) zyAER%ctv;D!F9?jhM1MLl};m7AXBV@KnPG(G(%rY@irNmedkk=$q;giQ<)Hsv8au?RT){Kz^?M&l{9 z7@&XD?+rEX1}mFJrZpcC_ACrZWab0je1sYM7e(*oCpm9bFQ)VifsvSjC0W`56TI1D zkO6ft5`9xUYR_-N@vdScCI$y2qaQlrkG3LTa_qX(WK*%&SPT>4ar6`18f8}r$%{GS zcQJcu?wi~%5_4Dk^@qc(trHiV^2h{)=}gVVI5J)WMLRq^x`EAocIyKXp;Uq9;7k(7 zd#asRxX2q&#UJI-;x)+C8Sj`v+Mshtvte}TjGFd2u3MFTP2;?gKy2c{pjhDh<(&Xe zJPrfF>;1pVL)VEmezN>8k=~szk#C1&WS4ug>{v{H$zIYswc1DK?K0Zy!6AeR{h{UV z^m__1bXjDW{RD@51)lO?u`%W>4#B_h9S<*`Ute!HvSjJUo{4mu9TVQ1Z8I0NBUbvhgAu+z8p*yR^&cyeBk+UNRntk0#z9PhXZxhPJM8?;&#neY zirS2$3w4a&enCXn8z0r+1;^f&@}DzyG0FF98(VH~-TWP4l5cV96U)oBA+3U16EgeT z1}jzXv>5MG47lYQsVoq4t?M@KQwmR$gu-Bgi}XY0llD6xa3@aR{2vP#M^TdLu~;Bl zliU8oZ#)W{C^|?GZPMac?qHBJ$M^P%{Z8w4{*=)!do2|v6u{R-rUs{>U?JKc$&6*6 zhC%+{=R3rU8^MA0necSgf1FzF4>Y6_nJghGxr^%W{S)4fo8y$p0ZYIfSOgp5)q};t z0QCFh_Juha6TF6CTg%EsL)Z2k446V#VYY$bIBb@7QwBuX^zD$(}t5XnHta1j);T2gh!X>Z>kC>JnYp4gH=|uO0=>mHKEK zrNxtKH%%+nO~~jE-j)Pnu%3*(H&B{*f2zB(pF@Akym@k&-ewBl2;R1w$V?wzf6H*tfO3 zq|EIY+iU$X}t|gv`Zud3Qw|F7Dp~q-em6K3RVW_}a-c zNudzKVOuIS3Q~;h;;#p}g)%{gLXdROmp3EvACMZxh^drBm}-upJlUjJ{mQ&vPG^UYz^4ZmD zbS>SVp~67|Ufr{Gsc8nq%FMJF57B+E(zkrbGZsA$TcnfzhnE>f3Cnuf8-9J-lNu0gfCSO>7Z$-uo0pS*oFarng zAD(YSqdz+1?U@$ri38&c#k7QrY!WqkunZ+Xhf9x8MR;D(Zn?kJjcq9f?uO^(n&V8f z(I}8Qy*&i=D;FdcHUOrvfEUb z+i&WZfPRABE~f)DKQdLFn>~UA{2dAOwZ1tGo;cx7%Y5y4hOh24A$^_kcvpb6RK=%| zvPuWHxuJXpm1rOJ^*gj<{s6h)bKk6)cWQ(F++VfHbEmHwKb|hQF$`!dN3VPa8~(Nf zXwn>QjlP(qVK+op{SN_&v?TSA6-olgc{YLTEy^SW=<3N>Mb-J33TTd_3 z?P%lL$HbjW#DreH!vsP6G=+UR=9xG&O;lhKb1M;7-pzM>0{*Y*Y~R{?w=dxm;PKqw zrbR%=wWi%J>?ZtjnwqG!4hcZpsCBd8GS`fB3>1UWuH?|z(lO)E~TxVh>&+Eu; zhionWz2QG-`Q}{o`ogp{dl87{gQ6k3VDas(h)?gT_6)z`i6DTzQmz+VwVyb=wh@bI z4hytKE)56FXU+L9ZTa?lr^ zHH;P2rwwvI?1|OylywJmIp4}`7R=<(FX?C0`$ZgSXPPVZWL`xMzkZ0HM(p&~J(Z|ykA_#;%VH3^mPbV&S>!F3f%XZX0j;S3{QY##c6>&)9 z{|rWwURMLo3wpQneNh)#ktcE8=xOw!uQen;9NXo%%U(BsRY)Jk&Q|(d%(H6#_M0TD zgU8Gt1M@u`foB{>F`bfj>@mjvJ%V0#3lOm%7E>I|FP*0Fk-sEltztEZK42L!A%Y`| zERrV%mSZ~S#N}AsO4q*CrU^7shnA%=AP?uooues643&x`!+Y}4Lbdb?P@D*xgz_S? zog8WmV|feq@mgwxn>UJxtOyH{q8ml)T>CagJ`{xp)VWN|MileEoTt`#zm4qHN*+ng zToK%CBlcPj@Hh(DEA9!#AyfzV)YueQD3!Q3*Xo--+^xJ>E8@8xp`Y#tN^2=E}V%xhB^`Z2|eA~t0A>J@(dlKJ?_G9dc z9pr<@+NF3QRv-DO02XwW3a4jkZ5Bf0E< z5HyG+yE66b@?`hOtikb&OTc|pppSYud>ZTEUXjFC|H8h}{f;X$AqqWJ&xNNc_0r|68WZ2X(x_mH+;>em zVq$^0&ZAomEp^j-+=VY?mRY;Vz4;=i)TfEb@aS`#U z_c8N5^=v7hEw>?SUC4)OcI&$jdELfRrLk)gJ3IOewZqeJo_}2IVVCy}p08kH4&k7F z`aCDzXX-Mv{1ACu(;G(-?Xci*3$9>r=IDg0dbyY{#GQ`DKJZC?{U>krV((zkX~r(`A(zJyCktH(u-j*L-~{*_j~znAC}smw7x_`15+p%E}sF*Nk9iF(*zVe@-)} z%kTXq{Y`s@!#H@KXQ+%q6x2(Us?KXWoRiM|QYyk_R!eqTPa#jCXZi%OWuoO*i8l{H zH|EYEp?Kz%T}g=3783jlLz!51`7&jl<16)Y+)uVES>hHl5Ibc{`|6t3QO0oI+6k$9 znn)VS$k0$(8=_n;t;BdS#1UkNGFz*5<}zzr^;HmkWpEiI)D4?Q%dD_ zPnr`>rehpRT7gPD<;#ING7_HF7;C0OTqFUNiO9K14KUH=d;G;XgrJ}Ona}yFcLs2A%TdK*>b#s+8B}qJy&tJ%<2`P7mq;(i}x?; zFaL&7p=xfI)b-Hkr!wsIm=%MH>?o7*M z`ydTJ?~fxB^{wU!R~PEzFs=kf5 zC1FiRM^eGVE#vNCDvB1}cFOvwKZLNiT-=p@U3VL#UmBilG>8c&KqcQuastpQqDTZE zTz;VGtvyXT+)sKqCUby``-CXCdeCYZNC;2AmY<(F-uy))t4vz@WyN@lG%6Gn=%Br{ z&rr_!yJm4_3=g@e?&nqicXNYwLk`HN3NA~or*hv1%o)e6h^HWhXb-oREmL-yH1SB2 z_gZKr5+SI-D76*quQtEewg&1MZ03Bew6{8vc1EEh8`idb8dV&{TR@i-X)mB4m@ORu zCX*XB6RgDEdeBMiMdo|tEe>S6k9Iaw>Up;5IrHhEERC=BNl1~x5KM;p$qdsMfB6aJ z;(8GfFy&LGs%DY(T7LAsBxAvfd|eusqqiKHrzNMyxhEsJeCUT8Ob9Z9kjub{Ggq%+ z*|keL+lH>A3mV!VB5A0f&e`7X9;$0^H8_rPEwsQ9)f3Q2n)7?R4UkW!9TFa^URnug zTWc(}LYZ4@Z9Up$BK=MdVgv!qL|-*DBPWJ`3sQJF0r`kH2ysy;Q;bb_Jt^E=1A#nw z0$xKtP7`&~z(mPNb7W=-#5!8B6|NN>?>)74KRyw)Y7!@|e|0|lwGYvK5nX$Nrla|HEZAw%$I}L@F=DA< zaz`KP1#jm`8Y<5n&kqO}xl8AW+ig-Ab)UUr?&ep;R!nFwj7!Y<1&JNl^;s26b{IcT z;sm2zSy^)~x-T{RB@un`*4?C6x#iSGfW<1C%Yirs>N7XjKYYN6_nw?s?$l(>m`ueD zSgV50j_)?ni)VYsxGS^uLwtYs0`Hgqx>^7A{?ObmIULosNGjJVk>fjI1`qB#1P^{L zT7It8m?je!ALGen{~3xBk15b7-Cv4b`c5Ww0Oukw0Q8{M7Lw-&q#fZi9Y1%da0%`# z^^*ch^j7ZBW*9-U+tI@j0g6w5z|`K|AHR&Yu2t%OW^jULGPU@(w$I?8_vvNpHpxW7 zVjbF%LKp~rbg^SIh|z#Gvmcz*ZE9ummAep#-QFVXTBtCOPOGQQ->fZdRqmAJ zP@!E<{0VS~V#XGlA3%r4H9B}J+QXVV^r5G#-}z6iG3h(35?f-HE@YV? z!f}iRScm-X{G!2Lx8fBMeDDrL3bM)fC2q&6BxV17IL~90_Z_7`w?@o z0^8813`56s>D}ZORZ)glFNB%Q7*5$;%RJN{FfGC~c#wQ~V*St8za%I$c{-<*Wq!{r z4Zd+z;UdZV5PH{Aq*{vz}#A7X;BnGer(wZmZHqlq~yc}avaNL=xB zORqB+fjC+M^F|z~Dk~MrcFeeItfY8;?|5qM(u2Lgn2~Q15kYkTsW!q?kv%k(1`h0J zv?=C@Fg8lmEgRNs>~-@LvV-%)h+1pq%N9WGC$p$H6OC>CbgRPk4Zo5~bsKGfZW8_K zMj#s6uV3EHz!V4SoY!>#9!-7w zW<&A%DMgPHrOk3j^hOw3ZO-&|HT-=e(B0IR5pdOrsJMgeYdRVuiVypMtWhfQffW5V zBAg1mF+TM2F-b{fE`L%FmZ7!s)b-nav$42hb%|8dQVa!ku9@0(KuxAyx*s13j_C$> zQ>FDW@`XH?j52}uNb6CycD_y-I$zwE>>gcwgDY&pDNS8zinUySarwmZ0^5XeuqTQU zF=O$NjuWe&Ki(!;+noD$NjADaJOpy7pOA7ro$?)o=X8W?AUT%0icwsK6EndoNAVc! z$tp`5JxM_qu5)yte$~gtJ4V62a>_m^D1~bb&M+lI)95?B+VBoar&L@p6$3L*n%US4 z%xKxRySwJ9?6PIHvy(&LrAREgNbMBhk~0}^b8~9>Kd*5buNauBePAX;%0TBE_3k(B zur4e@jTKE1Ox&vb))T;a-GQt0bs~?#uT|WEC+myyij z&##~dzd@IhUQF<8k#nJpLMJFu_3SAN-LT3X=gBmnR+eKaN7-Io}^`+ zIztXV;)2%k9yQn@gC|PI#(!z2)3fn$-cUH8uhf;!U^!>LvU);NHN*8aA+^?v;WIt` zl5ptwxtdo8wJJ;meNum;+jQ@$5>oIqzc5wPA>Mj_ZJekR}yl_>X@;VDL~kcwHo!UVG~E>^UFuSL;BOX*m*s- z5DsQ)M5P8FHJp*|>r>czy!KmMKDm@48l^WoN~#{D>K@$mjPO(S(7$)FWxf5issP2D zSoxoNr7EYDGbamL6<Uxazn=RaE`BxVJP630@3Jx|T(78EuC6H*%A+ff z9Z5296i-(Ca%eM-qp}=im^mKBU6vcn09dKq;o{b6FLYSZIM$VI76?l%h8x~*!o-u; z{0vp^a9MF_rC>7H9wKwAA9X-AfAK#|=RbTq5eJVGS8GFB1S%^MOSE7lzhfY|pcc1{ zZxmw{*9?ZvCI;_gSK3c&mQ`!32Wfx5#E=iIu2_2MqtR?1|IKRlX@4&mma)wWnWcQn zp-6Q<_R3}8uhvK;Qcy;D`Q4J$<7N=vCcpDqtlin6O5{-BU*&^{pM9-#!)Uz#(R-cWqyNL^7oo3~bNRl7S$vGpnp{iCh= z_sdeUTEH#x!ep^*US!kvGpC8f{oUl#AgDCJJX(9H+&rE?mM(H*fZA$K{kik%@pL=) zPQK$9qgL|E7$!M!A_<22*ST~OF9xoa+yM>$4fshIk{xyRWBnH#HLYSl=dtsv04;5d zLztl8DM8^1xr#cGPL1Myou=!_yZ*)YKZJ9Qc6H|sjtwDasCZE10yYYJ!EqFIr5@T5(ML5Ne>tHVwC>@6X+DrnWZi^b2|&*` zoa8it*42%R6ea?((Y8M6{pq88$2(%iMrrdycGD5$8Bh<5a(?2#!p@UHVvrag)0$c4 z`H0+tz3HB(u%p&*MJwb&*!0z*XKMdCt;)?3Z%5vxKbCq-V zM~HJq%q&!_MOG>ilnasYpWPf2p%wSsr#JG>NaEbxg+a>20DevyfXdtdP_^h3-db3) z_&w8^9JdeWIU%#yAT+eUl&Jk0cfFr+3*zjqSo;$D8$pD5z+O0JzBjW|^`dQxx?KWGecd{azjAvYH5p#?1z+ z@uR$u(j%|1dZ+C9(|>49xGdkCSq=?=L9tT4o?zwItt|UeNcdr6b4Pm33*uh?ehn}f zWDSWQRX4Sm;yQB*F3Ya0dx4g)=}y!yNR4VwM7#@vuaq(TvRrmvoe47KP5JhAJI4@& zBZWl8>!}nAQbf2K_tDgTZFLZC20YiJLuIk2hnxf_kUDIohi_-S>vsRMZPhG@h0uW+TN7Or^#DGUtLf(J_Y4mh?q=RqQ75O@~Kh%Vor@(Zp-U?9Rq^| zmdP|#DXkn+;jF{wlSMdj3g#-NC=ioIQAbTj_&sF6%5wlvzaFGFh!q$Qhq& zt8DdUs%hOfI_o2ZjCizPE4cLPVK6+pV5cmUpErBwBtcgL2o%)?14`V*b z0ou${vjZY1a~i~8I)1TjnUYix%wYRhA6`;hc82$GICgUI{;~In6uoE? z3U<$=culn1DG!a}>N9KqT7-0*o;n3#*;+%+UmpbYHsc=cLUlKez2mXwU))i(B zk+Ve~lu`0oVQnp(sLf2J!P8#t58U*YL~YY zB_q)%>F~th)AHgZpd_|Sf$x0hx?cUeP%I`1SySWRTrsodZl_G4i2%nIk^AB4!>V3yh$4-bjePf zf>Q!Ux#+%0sP9G260jUdF3t2{5z1JIocm*UB)bYF($G>MCMmqvrTUyTasO!b{Bu&` zo+oTFtTy}pFShPS-x(3a8(*LQNJRcCr?Gwyoz%Ns(~;Mm)FQon%SDT2u1Z+nqT|TV zB&-u=|DXv>tpi1!z|C{4U*8?Mw})-AdygW6wYG2dV-bi!+jRIc%Rdvz+WX3K4C5sG zJOjYe*OW9ZW~v%R^I-9g~!Ao`2oiWs$-d zY<>2P5$kn*4gcGK|IePisJ^GZq^ISHVtH`7#!zT>S*He(Tubj$GstK3J|rr!=jI4e z9P>bhpb{!q#bkz;&jOxii_a!wwDuwzeR&3m$beVyW;O?^>UVSgV`S5D{fZIvnGqJss$ocj-@~J}>F^onJ$OqN# z`v!?f`fnWQf9Ba{c_Ft2=a3cI^vx5f1+RNJ8BTDD+)2kCGG*>Y_K)EDlS1`PBIB+=P!fGicngFT* zX}(=(@7dpf5VK=`Up-A{E?zfP0ihofpjSuKS&_o_APBiCLJU?!TS2@G_^B){dW7{C zuM(gnVu(~p_&r15((;s$SI8trz^JWn|6$H^la@rM-%}XS^IqE#M%X)Mu3%;5Q(?v2;s-k^@8Z za?3?U&P`+@4sS%W#-VuB^G}vRbdQ#$Lk~xJT`HU^j$76ZE$vd!);@$;!=Qabr!2un z0`fELI+aR+Hv--zlX}-?;6l>

bPdtoDn6;A}j!rXC8CVWxILC)p?jCs&{WD)h}5 zs0d&*Z$6QsXOzjU7YWevxB8!d9XjyYZAWS|To$DpF$oz?ed%X2i1vlX3gK#$ilkXi z(;|+=aDO*lz8TIN^$qbLZ>;=mU&1LL;;`B%~RB5Llj96C8_2 zaCj9fPU_gi1Il6=3P0l#SSf@09hOQpBI)v|1}#MexkjJt!LfuCT*{rHWUBv>@?ec5 z+i!1+NyH<#@#%&}CyW`+_J2RgSU?nMOE9GJ_>Gg9Q$fx}%V64=VYWh)O&V<};7YOO zBCQTZ14M~dS>FI5NriP3k64TbO!Jx8G7gtF!RBs2#vBQjFu08P1X@L{H>gsnwGH~u zC4i3GZZ<}{%5un=*rCtv$mhG-%eJ8;fGQ%bMH!Y)JoOU;&I925bApV(?JhSx%a++BO~n^n86TLu~)K3ed41oooer?*p0$LP6}3@@#dPO{g^_^J4>K^KQxy z5Bkxo6`Q|hi4_=hRLTfJ|7+Jl5BHTF2lS_X(3x=LT1Of>|JfE6=uv;s>+YnxK>F@!`pjn{^tSvXM8mxD=pglKnk?dmM78Rh2kuV%I-WVqSySzIUcLMPs7kD ziKxs2pw_ja*L4KynEvm{3(IbSK5dFpIu-`jQu%iF>Ng2%n-WVD%ho-vl5QwaVyLW$iy)RY<5h!VtE6cWia!5DG|>N=%fM3?tbWMVho+H2%lV~CFL2P|p7=zXvQc1Oh>WYj9s^aEIXT?(XjH1b0?&cXxMp zcXxLQZg+kAoYQvp`R~*Jd}o^vv<7P~M$H-}y=wF_PP798j7k%5*TJ9 znEzo-LCfrvy|msk>cQB>CH<9Wo@n~#AoNr_O?-wC+ zV-2YdRX6220;)QM5s~`@tBk+Sh$ZNO&QsUP8s|?^A^%hhK7;(uHZzcb?&l1O9*%I~ z+DK>$C%*1$-S^NmyfG`cf9ts4xS#r8N(;A3?m-St2S7E#opR6sbqPQoC$U9K?cFiP zKxrP!HkLA-b7a^f6(r75#quMY`#cT8fR1rf;U1`s5hPa)5$|vvSoPPBZmDjIrpms6 zdZauRn+K$;x?2`=Eq~eQDN2Bt5%QqjS_ncJ56t-Pwc<%gMx13E0`f^Jdj5QWA*Fyx zs5gVXp(c(rh$A5!wx7X)$>v*LN*HiJRhRfuxlvI~0TV`DAc`Rvs+(a3oN0>cX_!Y? zS)}6D&X6;t2h9S{j}xM-wu%HXh&<~Vd_2))N~)#A(H(%;4X8Ekjr25NL}2qYqwd$S zYX9T?y)s=IZtaVQmuF3GGV|`24ufAv;yEBii-z@;{c(?ELe^LTSsr|#3n;aW%vkL~ zzT7jqmcGp&Em`D)4n_5kyKm<8`X}mgYajs3u)6Pye@Un-IzqP44yNgxyt{&F?IxpX z4Wc<`WlOPz+=55}Z5bLpx^%+Hg0iL2%Sj!#T)X3MUAtYocUc<`n$qJI( zB4JEEuyb=A*w$?sYR4?WU#X~`DlM&2&z3crZvB?A%7~MW5|hTivedORnIb4sTwbS~ zwR+~Y)Ty+mP!nWv?|$4#5-DUspyO|$a;i9Afc6S~OToV^92FzSh`?Vra^%@+ZWPlz znYMW;o5$4z#MpR*{D;O}W4Ff-2hmu8T1Yya0igQ!L^2s;4N-p9kJv>Ef=@K48Fj~O z@H`dTyDGoGGo526c#_kFyQEZVd7jHRjwqHPE0s#B8^Xnd`cq=j*&#O3k6yXHAd9H} z*Is`wxhGuZ%j{Sc>I6S*9CDbM2HX~Q?hdb;yheL4W<+GN^$?w%dv;(0p<(|r?O!`&ZirwUP@sI;J;eZ?(B2h9?=gSXHWTFVo;Z3^S#vwO<;`$xzSaW`YH%T!h{Jtq-|XZ`V?P)Q zNn^D5`5FF(1#pwH4oZa*7_92%TaO{?roc_}@7fu#hsjct%!k;Ci5U(;7+Won)P6dl zn6gZ*7aipL%pfcM66d`bj20Be? zfE;m;S?2|IX%Ibahco!^`@MY5nG3Ag&6e64#J#Z90wS|0GV~ zxy=aG(k4D(<{{;I+|1ba{z;sYLp%QPn!diAJ@g*SNCEFg1TP$G)}Bw@#IWEoSDDH@ z8_MXXz@VdJJl~WN@6s*>&J8us0TWkNF*ek7wFL*YEI5QwY^@q9Dp@yvMvV`q!_ti7 zj+W(>cQ_jwi)mWg)cG@xFxVu`q>qR>1*_t*{}ZK2+;h0=@G@*Vk{8y~*DNBG%G{H{}< zC{G*e$QW4J^6@`?3oGk}QybgJ2)9HHw zn1=iwd+J@#IT>*&oRJkh)X$tY#B&iryPu_4I~L(gq(%wV88$;ypCi$|kx?wfa`>Ra z%PmIk;`CdMG^kig#4sTpzhlezW#!P?*W3kBk1ijesi|GE)S$v-gXdvPn+r3=ebS*q zg_OfcVjz`JSXfPCs-!TNu!4Y!l*QvqL+nS2oFcyeBCcK)M6?=LzGzwNUsp6t6~IT3 zCwziP2PYO6VoWRuJB687UUZt~m|CM{B^Dmm#xRFYjZDbA5)GXbBTP*4jaeK9SX^{V zqtu|6j$Sq!S5q_Riw2W7fWY4)96%w|Ep4`ZpNI|{GVq2DrXDW2Jc6dQ>CnfDx@PZhSH3&^gPP`z%P zr^cu5&59uZa`b}`jyR=S#Mf;FICrt2mL6ALAZdd-ugGUqZTwL78-IsfYKJ0gn~KP% z(Yn02xQ0e3p+?G~L5)vajS?0ra*l@&6NObjXNr6djK)Q2vY~=rNm$ELtwNz;H7%z> z5fZNTY0+CWiKH)92KhA#BdnO-EsSP?nicKVDe_9-)i|E*kC|zBnZwO^5J)%x%9;_~ zvMwUTmEK!{9yT&DB7T`&i5ReVoR0DT*kT{_Z)%X*QMZ@tFO=*$u4hkLV8;%aXV-4G zq^0J)H7h?RKnIxsqvlfTCV5T+B`Y<>jg}?AF~iLa)kRtQ&@eGaK><<{4kCF%km3>? z`FJ?1O0bGL3M!B@IYp2&`STXWQNT9zEdbP#8O3O7$5X_$});zlE0Y=XZ3Z;K^Te`p`(t9SZ`?B+1X9HYE_TH{YK03>^bmlav7VJi|*U~*`r4b!N{Cu=Q(KFMzZkh(mzRa{1df2&a7hAlmFrW7Qo~4MqTh-tqSYO zyytP96yX73O|mgUg+2Lmset3mpbg&DF?leT4@(vbKwl53Kgiy0^zkAiycF7Hmicz9 zsvNBdlg=76`F;0lFT|u5QiLWFnK>!V(Rs4BU&H#MaBYI2AT?bOQ6n|_SMw${xb>)3tgXrm>% z>BiJbeH+GtKCjGXAQ=7`n8mwS871J~@RO)YNxZ#-D821q^pp%^ltskkrH##IYKI(B zdud%cJXORP!JvZfvKhY;vmBO%RF#RtdUl1^#Kv`Kv=;p(RH*lY+04FZfm%dk)aTN(POn?P5_E)B<*-%o35?1wD?}ULvE>? zfXet34t#?Hj5e>=XFQFXEJ-R8mZX#nc1-Qv~y#Q+I=O^Z1}< zA#uUJztr!V5*F$av~ctO67|{&`Ll`){{-u)1u1ooWe5%JfS@QWNUf+|FL&+WpeShI z2ffYPgKjr)iISAczMxny`S;v5jDh@t+el5>u%&N^3Uy?`74pthZlWu}7rz~khV`|6 zyaL)EsUj@Vi)-52*Yk@mlA!gQ6~hy7wGE(uR2TdRy3hJ`C@^ceL`%Nb?$vuIPZSS+ z-x|ym@SxJL)IyZWQ`P1+Ev)kBx@@<|pG0{bHk`Zr#^w;N7 zFT*BbQY>v$#5xTq>JciWRIJfpiwoLHHwXG{;~FD5b@^Q1Pjr@yRA|X?p`Kz?l0OTE zJSX%$NALPMX{`%1Ct(X#!_+@{`$s9PoWgu}4NPYtl5_Qr9ADVG^}xHVbiI8JYdsD! z(~e)%E&u5b|7tdFX%WJS)NhqhjONS|U_nLyP zJcFBMiwfG7*IUC(2GJvY;lS4=e0}E!YAc~++GWX0*S6;UopWxLT~dV9u(}#m5)RAw zGPGMoWMu#29hZlga81m<)J|V>t$0*)8T12V05jj zco{;Qfh?nN%qlQ8%z29c`wx6F{{`LCw3eYIYCP4=w*8y+gvt%9SFdREnCY`X)Qtpp z_Ji$%Fqg{8<}@2DD=Kl~AN+*=>|!PoVZossrjv#Fr0N&a8xGBhf5kshxAaY2qjuiH zV15Z2fP!Yp8ope8w>L3bIKztIpYfpm|;uKX?2Qbrh8#6{P$b%3CO2 zfv*b>#M*Nh@$4;NKrQjJ#-BcGU=N|DQHv`#wwz6SdJYP%WUO-&!)@Ct0) zKvK^;OSyaXpMicU)mL+ek=7A*RL{i2zNc{zt$}wzM}fC}ZWuC8IN6iw&aw64eYoec zY3pOxn<_Ru1ZT!&;V=~u;`pniq0GeM071oYmqn+;Oaa%uNLu{Ob?}tVx;Z(aEXN_G zQh!2&N2;?xdwi%75WP%%>k_B2iP-3xfPy~cgmrDX}t zlFK?H&Yh0X>6Bh(xnF+qjGm>)I-U=)s7_b?m5=IFgsT$SqH#>5an0u~A$%~7ld%gg?fOZZe@Dr6 zKFG6u-p+091I?| zzqiN?wT-Uxcx$Q0hUPvs){Og8Sin*)F4$9$C$oW;baK0gNcC8Vx10aH0f)U%JD!7s z*WlvEO;>;wv9vV16{MqnsE$psGNZEEgrQ?MIO*ard;M3Xx9;^t2;Gq?#tUviv&QllCdQ&o*Hc$z(KU8=2`aj zQuI!t6$kELxdqPQuB~6l63&yTDlqInQhD>;0k+ zg5RPwC!0oDQ5enYjBHUzQ?tyAZl=l0=dXOiJkKfD=kT50tUK@4(rB#JA||dKgFp5> z!00{+q+2boZB{p7(bMo6%|8ZqzBzX3WQf%5BiG_w->c{-v^}NVaqYRckXllF13q=Y zzVont!7OnczrXMwJmsV<^cZXZ+{xwLZ9@036b;)p9x{!gb8~+Y)foMs>O5jHnZjjZ2t?3fXl>rP>`1%4Co`Bci@|XOutwL>cI>?`Tp2GA zvvce{?lWqq*Rx+W%*9(=bUd1DXhl7_qcSWzvYk-|g+{Zc*0i-7u*f8ZID8BK>Uqx! zKCwdT&13BCFwQIaksLk@96o~(YL!TY*TPGdOz?DyoqjXQ?3H;Bn+;$wk@Hh;Gw^EP zKRtzKRWYdPF?c_TdCME$UQQ^eNT%UVQ0w11ikN(VPE3PG{uYojE@=Nzn@=L+*Z(Ju z_+1Lv_97o7I*AAQ15HS#_D`+1pcWqob-M6gmCDDTgOA2h@ z7V?s6vgTfZ>xYig9^-yJ>&b)4I8qVP-fV|KTQT&e@Q3ZSvVFwZSW%h`N^MIlRj2 zr@Ln^zsdM>FR`H|iS;o`6F)PF4>b>2p{1S zYXh#+f4Y98e_a3MQDY}c(3jG5O1m17Zx!msL%$e_zWL6gn z7pzM%OtI*By!+mk5M-pIERqG^@mRrDPiBum5#BwfqlTkD%()ty7~#0wiLV*zkE_L_ zKe12l<85KZy*$CaJmex;knKszh_b1~s42Jl0gGr%4`3YL;@oNuE}h2O&awf-D}%vs z_Z)Dm@aiLjDKz({Ked(D*3lvg9xNB2!*itMzn`?^Q8w(YLZm8ev{pE zkNnK7=NAvZ=X(7?0L2AnG}P}>8P3aYkR@Gx9MR8V(5AHK9ltRRinu)BIHcUgLUsHp z;U;zFzMSK5TB4Aq7?aA%`$h1PGky1xzW2bns6VbJu}ele@Ss*^9~Kfn?ehTb2AAKVtL608l51en%mrm3x z9K05b&eh{@EH_m|ixnHTN>wou$8%&i6=6~gLySpH-A zj{h6B4QKG_2?|2{JAn9p5+S4&tA1*TjqimgmV7#AAP6r5rntbO&wl)r$;h!?KNj?@ zpUJjfHZ?H_ehcdUSWYWzGvZpP!~WShoHd>6G?T;4HR#U6Vu|R=?)`=WMrYBTple>^ zIO>|gIL_Uc3%+7f_~!+BeTaMpPadXa(4Wn0rfikkN>Ec8dIs=hJgV+IHSQJljf0C< z^hg+6MyzduaK`4gM`l?g(Us;GzA@LH#swKfsj;M|`^d&HVXw3x-Jh$#K9-h0GT(-* zgiIQKMp}zCejU4o?172+R6;XPg(`j-106vbv}7{$v1T@gjwrR>5T6v3?OO{H*S_E}(8C-DU814(^@h zo{-^mM2Q|!5G@yb=E-~|GWLmAXn5$V?W5u&BrSq`gg#f!Jba;va;FcqfmY(LS}f#V zoq=PFps6a*O~n|xj(av9Zs75qpkMFiDQpkaBPN(5z}Dh&8rZ1YB1q9+U_?I!t}Z83 zq$ARiSxO8qY!=oo9?sjv>-83HLwi;R;F;bBpduA2QW9)P$gTn-!7PQ+d8#%nUE|9! zlfmwJyFKnQ*<4w+YV`zJW$nQIu9S&@hel1#xv>Jcml9iL)cuR+LXbD#dLsP3>8^SNI;y6H0GdC zn1NYVZUJI+uBMzxJ8tTzK~!Jr2*5@96G~A5F$k&T#uOcrlMz@8WGDv+f%95Un+rPX zM^D9@k`$5g3;i+V%l%F07=~sspfsSbP>T%ccON-Hx}b!Jmhk{jpc*=mG|^B#g?Cb} z9fnn|&qJyUc?RcLP6@CI1>mc#bsJS1&YI6i>c>$Kg$f{l$yPS~&OenmZ{^@Qr@y^u ziD6)zvqzL#dqGkc7ef*Sog28cC<>6>iN~9D0BJETnpoZ5kx9|^dfqzc}+5l898A&2}u|;ACX+fPaf`5-c;M!e`Bm5tAH^0 z54`7z!_W0gQ>OFdg6@+aBJ$+oqaVL}GbuFnhm^?!gs%_2BAby*-NvX{`N<3G6M0+ZqKa%wMy)F@Y#QUUS>6I6!YhSii6 z&7`!gOfllA`?~lm&5GyE9p9@$p4Sw*tI{DoY zD}KZz%-~qToZO|cx;PxvqSe;vD*z%k`TISnVf-!-p!|J@KAZ!6-?}CHJ3gxJ1J3$& z%kx>l3`pd~xoCWHTZV};Cg@N^zm%Xu!4@zisq6jeuVmYcpt}Tf!7YiY#f(teZ1kJhwn#@7!$vUADf_LJbCdc|T_LxYzm>)L_9=tizQgep$E=7@{&J_3 zex+qx|5dr^WJ(U7E8K@zNKpSgMx2W&B4?LW zu&gH>RUR8)L{E(qG6oF+TwKb{gh?n7Lkz-c7l!Siufc}`m z<+Fq!3UbQejNlIf0=6ZS6=(}!uXGFzY6H%xHEOg=mM{xXz~xrhi#nq<1jG&^^sSEyz~469E{*q}3?b^awz?p1 zBWjpj$KE6Pc2k?2|l5Gi1EOoYO9L>O*&n*N-XUmQ`%FXlL7nvS24MbiogT&S)t&5xWp=!*NmI6wP z6Z6K%Y%drlV9b}EEjhWAl(Ggz{6Ln4xg<|Nwe=r(BASw3>MXfbG>?#DE`BP*7lu_f z;Fad1<_tU6di*O;01f(4_1wAwcDA0EeV^^8>lhu!FN4*u&a_!MlvFiy@_PwIxZA<(8NHuRd50nysR8JPfc?D1Hx zfc-Y7b!?0m8X`KH)%-~Tf;y1-CQ9Ce7^Bkw~D23 zX)IYwr@9Pv-!E`xYmuNX+B$vugnM_tHf*xhVdL$V6J5u<*L`LuycJ`P(hw0UC1+rd z-3guy2l3b7*oSTW+8=Yu9WX z0*qb8**Eu+8Rpe#wAtkp52qm(j;(&Z9zJrCIY_la61kscKP4n0vxN2Y`OSp#X#8kY zsaebLRLTT~Qt&8KNUlv9Q|pH|Tg$S34qW+JO{No}Hi^DAjvB4jbsf&_-u|gNDJDvo3mu8r zNwjeBpkI!kJStR?Z#^W`ONhWlV|%`!pke{I%6uy}FfSy0`{?_AOt|hW%8AN^P0s); zs{bqDXS)P!|3hC~r&tbGD8BeyNzNLzD29V_@gzeciIQ}E$_<;gom)JEj?x@&NzX~+ zVkTHRK`+m^EuReCa4l*43zi<@^m_Dl0Jf4zUN{o#qRX00f+VEJQeW;bg2!pGRa)I} zPI~)JzNEvdl(-7y1R=@q(ar?2^Fj8vL;7ud&cJGVp5bLcq~Hv^hHK=v*hG|l1nR>V zO&++S6+$9ZL1(FEJVx9|Hy3VWQj{EJq2BnT!t-zWGerbF;oLjQL+`fTCMKU3%aS#< z847gc&X5+c-ft4-^+#LX`ji{kc`h89Hc6mK>CuE3^N^rD#~Tkzp?TI8{<9MA?ftI+ z;*gkR2H`3WJ%g2eh-~aIEc`{`gd#g%0DEt*_PcSeLeQC4ED`tK(prRn&V*^1tO*Mjq zvQ)n_opAX;-mZl3EEYz1+}F?fNWNbg$Rdb>(tZ2&d-r=ojPP!j;>I|!Gr?>)P2Oz> z$2;4$)y*$*dCG;su`ZEB(BK_L?aP}ymPD9S>pc00M~V}O6A%_PHz!S*bd}d~k~_pLQqdhnx|{}1FYv53HjjlbzAkFfoslGyy)5q_8^Ee#UoHJO zql}ZL1R`XTlRd1Bk!i6If zcr{r`J^^TYfT*eLrROvVF3i>H)o^gy9s1Q2swtQ#Oh8Z}+%-I)uLXp{0Gu>94B3laJJudaIn7qk;EYO>geDH4d$iO1_ZN#Vs`&wNRkDdFdtJc=%mdeR$ zu=Xwcnd0|K_|p1{tObdYOi5!4exkxQgwyA?YADw8HIm42=WpbVF+j_OvmH%^d8w;! zefJiDA(GP*oian3gu6EGb-wFz+;fl`lea87R2Vo-tDi@`Z}oT->kf+{5D47NSvIFX zBDZ7>+IJth(RN5h1=@w`OsGYha9L7Yd`EHP5zB%q6Aa0ms-1Z(|0c$q$GJSEW~Ix$ zea%D0WFaY$J41hL*f;JvVW?1hjs54GHcws;miKtpnug7kHx3=v_`zi@ne<&aJ_*pj z7u#w!0)vkv_~<86Mg{am?$D~_e-@QNZs2qk5pa)|$Qgyc<1*&rCWsB{3Ry?bSPm=m z9ehbT9jb~XrIDXGPEe~TYLCfVEm#&00rp|M>SCV%ox!mAnE#Ija)lQ(JpRVW^AXNG z-5cgIYm*ecdHt{{$_33rM{g<#0!ANWZsWl-K!Zm+e)`yKRB=)bhif=aWGa|uUavB* zzSuOq+@T>*tU$+$Zv=I~ zqC%>frQqV{avi91DbYc<3$d6gaiF7hjbMY?sSKu(sG421?c3f9Dtp|sTLm-4e_r_@UA667 zkxZ)Cq<_9+cF_TFX&IcRP}0!nL~iiz-sWPYox5Q5;k{q^Hi<90M@w#USK&rJ9o{W^ zW`DsBD5o(hKzv8?0C50`fJZ$C2`_Cu7Uex!c2js5-i&WnyXLx;lE%R(U?>vhoTX3U z9~ZJ6G!V5$H|JzDC`o(YAk(DYDl7?Fz^rjt%jJoZH_mP_Gfv zTb(ra(>9?Z|KXMIEJObq-OvCrIxF$J-5DX8D4eQLez4#n_e3-#&V}2b#yIyU6n7<_ z7B4|$(Fs-+QTA;;ufg3#6tYUvj~%fhi4&rHAp8=({S@!kQ&q1TRZjkkDaoJ-d%>6+ zr-rT1(}U)kxOF|hoIzyw$(PF`-|cO}LJz4R;{j#9`_o^k}t=zyu=RQ@#go^8|Q)f24rx70KeY@H9; zw;^i}E?(;${7eI{@#!5{^ao;EYnXx6u(eMkgEki^4-;HmljGso1KW7(`?=iwp)-W7RpLu6Am^ty* zzYaYlOKgoZzxgBLwpW11Yr%*P+EkVhNNOA|4^c{Ile9tCw5(_u5e&B?xK3D@`3;MU z_wX^RUrzv~6weV{oC<(}a^~KX`U3wj>W@>U9T9ea1ozU$#kO7B={X>$FoOW!!Ar6b zWF^PwgL!@7J+Uvpwl)*qJ?7!be=;&OX8?paU$j+a2X zsxBLfsL20PHGXYU2satcenN^P2x)}cd~@ZQ`9twBLs%lzlS5-fVkIQN;Mr{W+Eu#2 zb?@XLO05{+l-e}Abu_j6Xqnj{eKq4d6mJTMQ6_Zsz4z>qMR7;IYXyyrU$ZI&pq_X0 z_~|GVOC;Q~mf_NW_U=anD-9R;tN%h|I>y7dZ}XX$exf(= zfF2BuuH>J>dl_U8-Kcq**BEGQj94Tc<(CDu%^c5it{pCqGDd;gQ7@kARA2}EqG9+w zH;G+&ZRE3i@hD-IzDu%Sw(T;=G$&gnHM9WfqD)y0qz|c7qn>u2Ge!r2Yy1Fa%8ThU zTsy9{-Lrmr@?hK*KeD)1afbi&k(ioW`)m^;1hRbbAkK#?w{V>xlQb*)24+-{gaYxI zP4B+`xhJ~Uj|6rOavt}9SBfoh>|FgGu)w!VQ7)5^u=<=W#|KUa@}Ev@=1)u@%#y$< zLOjY8H?XbdtD|&-r9^K7dQnj%MxWzrnxG1>WLO;T$b}f&PY@tgT6E>B!D?E(e2x2pT37D&;X91#%Mkorry3H@O-B|4R-EcSi`G!vows zAgNt-TdcWN1}pYY6x*ZXHoVGNrG&DZ6hszRo;1{VtLGQ5dla!*3{ij|RQlO8XM$(W z%2_#swlSI;e8Pjlc}!g^4*d5&{6a-v!hExwex3?mF|5SkSS{}b_4p%U2g)p+O<}_w z-4U($35x|>y$E{smv-VzvD1?Ta6^WQDENcLd}cVBUBTdbk&#j-x5zMIHlv|u=z|4U zrx9z-XZ!vLG#c8d65tILJwvp6wAb>czt0@AEZevXW(YP&o8kt)65WT$V4s~g9zViF7ltun`7h=&E57dg zSl2T1TzC6AmzkQ!5P?*6ns`fT7dYWI!chr;$1LXiDpW^Sp|c|)_mflMsd8|8`mU)a~jO{|u*9s=AFWp?QY+ori zP0`}b*jT4qxX2)HZ7!z_s0YiQzsT6Pep9Vo^}f<6uYPc#AeFyZF>U6oj3~VOwO(I- z^4z!*cDX`Xe<}e5sw=dt)FWY2?OAK^6J{(__s`u~!xL6<-Iad1Id=NfYq9s%vewR% z@9HC+M0IWd*ZX>6f9(wa82Dg*_xUmRiXZS1`O*B=i0>hapI-Veh?hKY$;u{#zm#`} zmFT)IKTTfrE*G1p9yI&#NluFQNP{5uWN>ZV<;@&oPfW>Fd{G@&c{Oojql zJlFfu87y$Gc=_SuZPwLU=QbDe%Ngw%_Iyda zLpQj08{@v-gey)sk3#NCX#TW^)A@^igJfqBq-ZEIDZCI^xf>4>l<$`ks#9vU!##(a zM3)pWI;6j@=2=wN(2##$L+J%w5`uqS7fcY^M1NiOg@7yMza9|FDdm^=@6U&XteL^I z{q=dlJ!yq4(!U-+^#A^af28sMr0{>2!mG;qr`SpZ8juAXXgewROS9V0BdRM~peu19 zv~N~w7HXml4#Ls0GdxIVz>e9F{+s3BMth`*G& z_W|hIp4BCxtrHcn%kJ6{e0eO3_V1^CzS6`Pw9|HC)TRPK$ue3rlYuuTTIwxd(04_g@Ov$QeqYP_g2`e-}1n#*wr7_c2dy(eo<=e z``6+^hFG)z-^L61|6zW{edgg2V>@*g)FbFIE!0Hl<#1w20A>k#W&P?e;w$@X-!e{- zA9?hWh%;fKJrO|f^ZnoUE4EbtWW78s%If>XddzXJ|GfH}940zGgAc@GowAXQ&s=i- zx#2?Cye&$Quw+f;Vp}^L-}M0fv}gahId$tGSuN zCdSs>wzu~h7tQk)A(NSIJALs3M@F@rc!p`emDc<*2dI})U`R%s2s`wZJ&t6*{N-WE zoaqYH{oh7?4FEF8dm1Q0^fDQl*SFEU+l!gp2gk?kyT@s4QuG6!hk6)4jp4||Fr`e& zcA;QZ4XRaB>M6(Far?FYt>rG^Ajk2v(5E(_)HrBDUYMrSh$UShXm?YRdS`D{O(JFH z;*M%h$XQNSdr8-M%iRB$;-@<3IDrf9T_%b#3daK$*YV<))0kc^7EYffR4vphh~0aU z+P6dQIQZTE?Ma5#bFs9EZpw!v^jLJJ@ugJD7BFuYA+$3Nx7}znPPbdWMeS~JOpJCm zqEqWebgK#^U`ZMel|Aj?zhZ&rwxmq3Xp3bFAw5-i(Wl<_Up9J$?{2+TV|bpj zoV!k!@tUGV-I}Zaw{Nra>;t-v zA43UCh9iz#JAd*7To93ddF++YT%4=0aZtn~v~^yXyw{gOpzyI6aA4cpEo>KjM|y&< znW448QDR@AQ-I=Tk?V39B8j0GXIT(S9yyfnz}4N z7O55;To%L)X^|->pQlPw+pdtSJ`(TH{SgppQGt9)OnH+zpr?VojYBhe*j5 zu+?z>Q*R_=WCw_ARh%WF&@*y;62{iM0-x=3+DM3r!onMEtmg z7&E|VY>>)s?pNN(&j6RnZu-X?zN+Z}?=ofN-P1!R#%$xe&t!OVFJeZU1KAUe`wQi+ zmnfU+>|n$9pS74IG(*Dt6It4FKbIKQ_ZjPO|baa{O%wccpD za5^_a3Zo}Vo7j7+={dOySE3<7dj&whQ5+SzZ%}V@`oQD$zSNqiTz#Dl#@}tGCC5k< zloNc$7`u1JU2+xYc!f`&OZyjphwas5Y!_gL`9qR8t>JpJw~XPyV|w$BrObWsD$WA?+1SmtGf6d_g(HuwH@ zrFSpy$7beK1A?mM8!_)&Fze9b%^nr|&DyUni~bsyfvwu;Cr8=6YxCgqU2t#IK-VX( zj9U@fZ<1kZ_o~h^>~2h3W}Lmt?J~FNI0)bD6~QvM=A|Q_N0(7|=!ffi@60k~@aO>e zo4elaQ3@2YRc-!;J-Sl-OcxC9uh1$E!GlWCJr9p^I^REfyGCo)`;MNj(Y-=g!Ys$RZkX754c`f+hEF6-n07M+I34Jw<3#CXsjlaA zc=QFlc(nb7UApnoT;#7Iz~VM?oZWXhM_+eLY}{NYFFPoH+7t%Jg)?6tq`CjGs0gN) zIprk+jaGWs3l*Mz`JS$TH6U0<#tKI}*=&i>d?I}$r6NfzVEKvbwMsI(dVFDn$ENMg zSAaL~w+pnUZ8~cFM6!0e{rjknor0U_L(x!bFpu77VVq9)Ns!q~m<(^nf%*1s`!#sO zhV@lIf&jDa#r^faT+1dQxN1ReNMYvK;}zw0B2|;#R8+L}>-L5sda?J8V4$|vJXn-9(n z?+aeNkmScKR;CCozOwikUK6U<65wd$`O*)JE|#-LUBa9-|1<6&p{*+fZuje6+_tivd+sA=G*e@_W#I=g$yF#=cL|>-zs#(cmAdDmi?7=9hSX@jW zpmyJ%LaZ}s_|9GO=o!XB-jabhO;C|c1cWZn$-gfwqmKETvS+zCP27@uJ%hOC zxpoWVlkQyRqbgb8Ed9mMsSCi}&n;FO`o_lGenhI0AKcem(^fUEvg$5~PexAje8il&yl)Dpe7_N)~?eS^!F5oz89i20z`TvmnqiSP(cs>6Kcnfib7v0RlN& zUd|3$2UY?Nxed6_>Cc?ZHsP8sq_D;psCOfVs@QGH{78Ct;Wrcm=^zG7# zrOufbx)ZJ)##s4%#f0OZQuW7-O}ia%t;I=gJ=2s!HE9Sgzca)m(iMzxSOuM8r>jer z{zfvvOM9wJ@CF-TDPikd%k^LtQ)5el$n@EDwuV|$wFCK(Q9r-M@ z>82(;rQX_1lT>J!Op3vVX;M4jI37G^2WX0Mcw|-bsQxsdKMQ;!+BQ$JW4LYVtPw=7 zv2jnfV6_NHo6qhU;n&Z+I{F|^7F`C>9Z{A3_0O~ZRX0AUhh6^ zWGfEsf^wx(`-P5dUK|wi;l@X^4)(#(?SquP(U-ag&2K8-E|h<-i${qcS&5e?V~(Zu z+Y7lQF+>TZ8K=va^08yc`uYgCQ6Q{=@nwq}Sfsx7#^{&x)hp#G|AE##+_E4QPGF7`D`{xm#K4llh`wceXPA+;Qbco7*Z4&7KH9ZZndp*zPcuMF zIj@Rh5d$GVa|cv= ziP-Lkj$Wn8_{QL`AHLI$tkFOI!2c6QO^_+G8uOdlSVfaJ-1KBYX!&{b&KCFKPK)z!lBraY+P+4>?gRK_E z;&R#7I<+Q-Ud(X%pfPXy&cW_w;Dv*hrb{h6*JhzItsE7vVdK0ZgGQBVIVkm;UDii& z^(qCI(3ult8PKhPi!U&2#DJyiY)T(rEIoB|?EppVj}-v{!MlmWXeo-N(B%dFrRZj; z;4prz#afp7L&nqz5*)b^W(mFJip6y&z#bgJtyI)_BSSUeIAhFKH{Fl@1_y4O#ok*k z&&~dRaJpa&RKTJ+lRB+;zF>1o;r}7(Eu-4(x~Sc!6pFh;fa2~B!5xabySuv-OK}?9 zDO%j!HMnbWcXvO$-#Onn|L>6>xs$c`nrp6kVLf|k4VnB%jxS;!;o#+|bZxs|C_>3} z=Mv&hx#6;nY_|iEjyhP1bgX`3iapu(5zIc>_Pw7>G56s;=?14oGsrvQa=pFTpENhE z%spJTuk65&77-K;q$Am~&D=KX-;Y+oLav+yNEUA{^O-Got2Gk2zE(+Kd{ zMZ*IR_R{$l9NyOn#K63GfX{Ekl?c$b?3wm+%pq0?v_d`U)7bTkO7b7nk`AT3IzsdKr%_p3wfZ`_>89%nr#J>cYWd&-N+W&oxt zzNz%=O6Q4O)mJBnIR8m^r_VLyf>a7p84L~QK|T-Z?No}0i0wq02`87$_4icOG7G$ zTvGF@o{)Tsd*<_WQ}H(Q?q7rIuyVi6-m!#pWKVKpH-UM&-`wEWQsv(5>@0vgdE=C| zGLZAJ>mD*-QcXn`B{>FK%oO6?+Vv5c+~`}jsqyq0%Rs%i+Se}?`Bp|+c7?oHY?(Si zi~IS~3i`=Lo$C$F+LEbin7}3K+(P9=|OY0dz0nQ5iu_3nX z)lL5IX917Loe!5lr%nfy5C4_^iEbKC>0Z+1Uw(^)!r51wx(dGPnV z3G7|fY-@bI`mYSK)@+1_V@R`EPYj%IyvCN7gGLei3W!uUN~eHUN^p#YRs52f8!z(z zJCSxB`@S$=DBFeC-`~8VI(ofAlI51&zt>S{57NoD0CinlvacsKS##tuzrka9+(3KF zjfVJaZmt{XuB>#^ao#s#^yB86#kQ4rZTe2^ch2a{jMc_6+zeI!U_2SCjYb}AFivv) z^)`Mswr_wpGX~ZkI=z53oj7A)-&kn}9oRuJbKk;`l7eERH1Dm{^hbW0kCSwDHa3M= z4vK=8$D6;tu6zAHu^VK1UAdd=YR|5$TsB1gR0RxNaYk7PruC7%xOfkiPcx@RoxA*o zl&y1iMo91U_SCs=6kjmEDtu}aFM_abE3c;zq0M5F_DO}ZC$?rE(s8z;I{f^j%qzDM zCpJn#9OMu^)44AUNC z_=i*vt}t4&Y)PMv)6)oxS1FTrrB-CRFs{QMcIh`+-8-%wUw%T)FEqv>jCOZkjo0&z zGBJ%?dt)RPT*^yR94fX2--S9?LB{7po$Y^c>al23Q4FVlruOkYW=b6(3U&6Wjk2!aO zm*uc#zc8FVx^1zF!~e0!pduHE>JY2fBIExdsLSsQTw<)%ui2IVfy0~{2YXxT*xV?A za}^!ZrZF?++(REUThP!kpnGArgKX5%ZnAL2Lm1i|26*wNDR#}9;O&TVZDj{3zY{lP z382K$0UTO(*C@-jLS>5koN9B0;{pK=geDQZpq@|LX#Cv+ToW^Y#Tf&8@#gw3@4X%&-Jh>q~~9uU)ygGHDH6UPdAJx`ZrfWM!SvHK<5W!)#E+Go>mHx~=Fxhv3iB zm?q&NF~#OkYnChjom*Znt?tq#~`Bs^*mAr4KV?Oc$MX*j+MA(xo$A=QoT~V$`oG+Crxy!WY6ziBk6a;c67`wW7x(zLkm!Beq7$T$ zs5_V4QRK)5oEXWlzB9HxlOWXQCQ9Liz36Fq(6mv%Q5u`JWj|N9v)(ll%`)#|LcF@g z-|Lin*bJAF|FsqbtmpUm@;FHQtWjo|2$-SJPY8uJS!-)J6&7JCn_za^ymRcEr~|h( zGO;G<%)cf?)`D-jcP`hon?WQl&%Qw9YwELejveOJQ;KU~%dFOu?oCMC(WvnAQ zfHsr?gr&cT30#_(DP?JKpJ6d=6!ON|`?PZzr$i)5Wub|)qzvJ1KA6w>Pelwe`$m~Y zb>UhSBL1FnYd+HKudPtcICkOki(@|e7JZ~|SDEh&7Fp@?hgtNPSOgr6x~J#!)@leD zU`DFm$~Fe6l=>I;>=>vY?m3_r#~Y)-4A?eKk7J3EKXMcB-*GnyLToWIMPmkTIM>Xh zUyaM%DhlP(EI$1WmgAF zFJ3HB+>0p{Bmp){`75@layP6bS7ocz|wzH~VOH?D`6jjhgvs^V_BT~5n#^XBYJ zJC}(n*sh)Yo?>W+Zrz5L*S}AKch$0tnu9H-ljvTRauW}{OY8fVLhqy>TfP@6^eIjs zMz>V*s*jnPR1o+2oe-4<8*%ma z8R+wzIc+9;z1Ps*iT&-DjHcM&LD$YQsB61PvGQq=T3@op*jiv{ad`fo*Y5$;{y`%5 zy2Pk5(t?$Vhl#5Fsd0THs&w#MeTEKfwZJhqTUh^QAdq~*+X1JM6t7WM4c{(`I)lIb z8a{kSF8DF;`1tX@*90V?bhE&ZxVj9c+xwKw03Y!eABRM%l1X~;Tb6QQtC7DOJNq{M zMw}+v%ShJ{8F8RrwD*12O@<-TOH20*r z?ES)x>LBfEnely@%K4jKfX8^fEid8e)50%+E@SS#&=^OBD;)oeT1-R-L=C>1n7UJ9fB0ypj`C;^by#TV5dXuD<8s(LW zNALSh!21i=Sp;65ks4L_zq{Ow99N-kF1?qpV8cLz1Dl5N$=;yvBr3XkHJ#6i#kiMM z+?1<#ze`w*+{?CjvnN!Ijv{0MG;A!{W^E* zwIkk1CVUKwXNiW_G#%7bjbg?L_z|6Wik9t5@`dmgI3`ZUtVWmj_1aDu@)#a;5{mNt$g%VpVcDy>*NEu^P-s$J@A(|+B1MVRS3!~RuARm&7R z{!!j|-W&WU6PCmPRIUln{|;@dLWN##|DVBcQGd|e1q`4?!f&z=&kOq8qR5Pmu-Zy! zYkN0R%F>?+L|;@x%9Vj1)-r34ykfecL?RAuH<$k7MQTJ z^EFd6+pp!5YXxn9xUCI3Kf=rg@*dRl@7Vx;Z=1f6&98y%)_fB{s3W4cq&{xhC$h7z>$|p1z_ryd?Rvi@DR**POxmxtsPC%|5glIY%?8^86<%&2nR@r$ z|Fl`OiYS;T3ZIErKo8zIfJUv}yKZ^1TLSyVnXIhwvMYqNb78>n|v?gYJ{fDDZ&)dig(C;!HRglM|2~X+4aE#;Y zwfFS^#(=1BUHX z&o<-CxB1r3>XyuT3_*Pgs2r&|sjfTTyjA4!b{pQACGIdEp>UR=-QLfzBNEtr^o0?0 zI>9=*w{>vq)Jkz);N983<&9Hp1Gc2G8(50-Jnj)8jKi)5zQ_l}PwrqNdZ9pThiZFh z$5kC7Q2_~`>d((^E@3aOB?-3VZFmgBP{8{>f{#DP#(D zd2B}E$1&Gd$-fs$N&g?gBg2{7SPiWi%atiS4(+4?!O<_}Qr>8bxx zF$I3U9U|n=CVn^W{t27>0kUPPTzs9K4K*|<&a5&rT1r%4*n2#Bh_q+b26Z z;&a}*_oQOQCT^^D8II_TS#_=-`F1~udRJY&U043}kdSoM48}8j?t^fW_~Tqf-GnBz z?2!czxmW*zT#5XAAi;Tpn1XD2{$M%uGQk-9&-6f6RWfJBFiV->H8^{OA2+DTn_MNg)W^;Yo~y@kp0HKXa&?|Eo-k10TG(cUta_#STzxKKJl;@#vYI zb^X^Nc=jH|6YH#>;!_AwFP3T(GUCW^_N$)kxyQM)F<0dqcZcf(G`K``oH53t-lt9+ zlrNl&i(YR)*)me;{Qfb4pB*?cJX`#FUOqZ(G~SKle60k}#lBek6f``5UpzpCJvlYD z)>npio!0X&kzrJt9ri)uiEXqfQL~mJ!(E#Zm%)zMh0@Ys2P9*_Dnlf3+7Qawzg%-Ws)DWfoHsiYGwC`>Imch1dM$m?qLZ`T5iv`eksa! z&g1b4j>Gdy5FA@OPLHL3rAqKf=O$w=e)3goJAPJayy_$T?$<#F1ONbA9hKG{-kKT@ zqG)CZl<@$DCA>^K)r84{jAZ8W2I`**m;|HRUo^5>QnW`=wZT-BavI{JiBA-yaDb6L zd)Wq{_lR243CHM{upl8V-LWs!AQG&SdHT~+jkNycBRqs8s`)H|d(=j&B){w7Ot%GN z-uA6p_8p5O)6uqiSrlD7y+u0{=pb%?RwxnIlAzQjRhz^FArZbArFYof*WL(zkc z?y-O$jFAQjJXb1a;Yd^kjY~jZrA}8v1_n~kd#TG#RhdO*zd#C7ImlRvm4Oz^Ek#0f zvj%F*?kT$|rjjA>i&-!UJc`Z!WB17+2X?*54D38(+PAQ7!2sPSd7-q7`-pETNX4{b zD`bm7zANVz_ZG>&wNuQdDG9vYAnw;mWUx>2N{y#$Q*E957I>^|*ug$w`9*z2c)2~T z@aL!D+KXG|Y1BP?xw|Su$rSl0`S+BchTBEwFU1PYL}DYNc_Mr+KE=V>X#UG?JJMsr+^T9tT2Y^P157xeQ+Lrqtn(;O=D{tx z{iA!s+z+%~;Tp*p5cpB=XnCn0u%=9i$TklMcGx$}mWckG?8|iKdFiM$(C?ZH=XDAw*XF&)OciSwjs>jRm7-+;6*qD}wsINi*+h!9M9TUmRMMr- zF3e4943kYycF!K60RUSh?%MRD4sDAXJ|u%U>OR#N0G>zryGTTk9VL&w6f z&2}sjVFER*NxW9S#6hZatA$i)E0!RA9X;$@Fgv)=QjM_9lkuO4tv`d z-lEfpQ@d8jw|tea#V<^wLAU|WE*tOe-KflmG!#3sA^LOUGq-(ktwfo|ClM;@!98Ib z01FIM)tI}wT22&30bc5v{~xpC^>_yj{9~Tp#>JJM-QhHi?^PXY9!h z>FbM!b&q45>;{^%MDq}t`F+A~vGmq?TzEW4>ykNY#D5RSsql3qLViLVHl3fF zEz1G}?8Lj>E@mX}IEhx+O*6zSQ8@yR29zAY>NN=ka zB^8+P#qj@$?rIk7&nM`XH}nDQUj0%biboZo@> zAw_caI>*hoEW1ljh(dh7OOSefo;D>G;5Tp7s@ADuZ94JV3kDc7L3$=|ic>USA3Hps zCWW5R8)Ut21cMI#XN^JeX!LrBd5JEyHHfgTr&eL$F-K|FQOm<)LNW$5SDb2nzW#qrlMX?dM# zbL^$O6?Y7G5+A5K7baZ#u5 zv^*+Ls^j$Ub9dn>HEs>+rd)c$b-5ymM#}#1E!;9La8xMpF*8A7>^md4ESZPrFIpkC)Vm;9^|O`UCcR47pQg zo+C*;`KZMEMxKC&ppG>t2}k*b4r7<+)Uq;4#X*?s7%`MM&n4^ql2+SO6XA3 zj-`7_a<<7vO=gBgNcRE(KB8VYeOQ6|e6qLm#C+a(JIV;N8~vZP6DnR_fJ^pMNUAmy z;PqwRjtrEL&OPr;av6OO8^%)`1vlr3OPe3e%T((&SMPlUuoXe=Y3MO8^*Nr+t*u79 zG?(>(Bwf>a0f%+Ae(UFoS6_K-I~AQTh)DoVUTt7-+cDcwHhZc)J7eeYdq5U9Dx%u! zukTq?yeCfA+8JJ(uJapY^z$VzU+$_9+l}gCl?jY@uyo6}h&o5_9YS6jf+F#MPdMP+ zf&3Oai?|JxCqz1%!&2mFswR%=tDe?|Q{BBmuUB7D8c$Z~y|J|7866HAVS(OUMH~84 z_~+X{JmEv&@3eCqEQfB&{`c&{!@5aZvL(Oc6|TNT$AO!qT=w0{Dn+&CL&*z@$V>X& z0Q>VSPX4y;YM16tOdrPl_g>w&BSR0Kr*?Uck>=ww(-C#Lm_!!*St0(N*In?adYFt? z|9S``F<^m~WgrgZHqo}Kr(e@#0nAeiY!MGSoQ`B5AH2XOV-&1tPbm*u<&Vh4S z#|#cgcgrG+{@rU623`h(HDiE5L%376bD=qqNb&bm~GQA8^EWxl?Wi{EXZLX$<(nP{xVK%qgLv<7CjL46BPMyXUj>ve zHk^jQLI`~&EMj)HQ2v=^BAwKn>AQ3VM%3_vcn6~7WWeZA!{iYI#RlJQ&zpYZ2(f$M zxXYBOQ-Fx2&+uLy)J~Y#w4+LCf9i1XyqA39E^CYZ^#DReo*eH%03{rB_gxPyriKfk z^OLAPk*hSwygLNT>Tt^mirfjt-F)N)a(8>`xm3z+_K?I%{2xp1>lyE-Fw?MIyK-$c z#(qj+-ZGL(m2924H9_8*#rffqet86U%})wJb!}I6=N%CPMx@>@yjoz~?50s_gE*o{ zQEk2m1yI8ZMwbti>%CvCK?!+xb^E%<=`Y5+kyk2^3ljEL%>tx+dEbj+zxYxP;rH19 zT>U$D%@VI=s4cSjfHeGmRp;;6J&>Z{VV?%t#m7UweBH4Gew3%&t?Qle5R!p%OZ1$2 zK8-hpn!!ywt??F=3#hxZlKAo?OWuBdBbBP2C|4|zmLaNZqSQhlk z`>ypL0>za4M8Bs(;s^q&Cn~qAYn#V~VsR>vw2F?6yi4S?q>#pzbHHKr{62P#i&|AI z-8jYJaMsl&(`3x_*fI_!Chuy)YGDxThH#(O>d=qW;y#PVfMauX9PQV3}escAB^4?#lY#ROm?{QPvF^OL-EO zdgzbo8pV+5WppVi#%Hqp^eU~@*p{|5ws8gNe_}dPsNRoWLn3O^mSVb9a5K6dH^_@k zn6E5mcLs=QxL9Q{!!Doq;@?vCdGDn83GuQ2R2cnM%loN#+qy}`zQ0i!(08v@;tdKC zlBc<$S|P>q5J+TxljEvtV%T?Wlt`iY<&^&QS;}5gS;B55^k2MUG6OkCCBj$guq|67 zqI&>Ur(TjedLkTn>gy&SVpGzB4S!IT%)o^owew}VQeI3=yj65ZX#}r|)5PgI9z^{$ zyOE+sKcNcQp~dvoWXw&q<{#Bi|0T`PuPCAw`(C@GkP8ehST~e^fF>;E{Tf6JKhJ?Y zm%oV;@KGhQk!R_4f~Fab%Oi03sD~u(_ygD?(*>jq18&1J&clG8^Vp56N_Gch=S9(u z+;%&pHg02^r>PIwD8B|UxFg`|kS$!s&A)N)!1N+|b>VKS6YkIAdvYTt*GjGP6}kKM z=0o%)MPmQ0vvKjy(1Wz8I@c<0lH^*%HECD&f{(^Sk47waJdPd|d41eQ5<=p$|0y&2 z>sb68T3A)U85@Zo7Lo4S=w-hfjCQXHg?7$)zoIQJwh!+NPczGI?=IpuVC4Q0g`H6# zWBk4PffKWfDSheRmUFBT00K(h77;b-M0xREL8NcxhT zU|fQzr%Qj8hf@)PiK6ffbdc7uiOF|N9j6}K=dVlw-^=9mER#>f5&~^>Un#!ZV$n3N z%CjXhmDK;K&+VK+x%D5fPdu|5S$o1xI898N{lm($_jrYGsh{#Ep|&I3*+@i~?{&Na#77gLW4(M6Bb2^l8656}jaQwcZm&QnL>Ew)kMasa z=wY(K4;vv%Rb*>*mXp~6zW1lgH{>eJzcG6;9)mSh(Lll_v@Y^_j4P{T`!Jpf-~)QE zEc$@yYwrt@p{f@!s;C@aNQoxvx|$!5zWEOOAVR9E#A`+qs73#*nVhB!1bF*BcJKVG z=TPZ`>JGXD_zC@QAy`vUfcO)SYrEi?j7um}OZ((#JB~EYP*bpg(RpJ0?>VKEjX@*5 zEzIHwUi@(CsLjub!8#eE?W@}W)wGsWu6Rf1d0=hBbl}Y?OqGD|`^b=c&*$X4vu(fW z)6Kbfbb&{$8qHbXPE|ZjDs=G{8VGm=gJM*59A3FKyH0MFIe&haVXuUEnpwv-B+F`2 zSPu?7R(g1Je4<+Q{s2G;)A7`(3cp_ev*&1<<*ll8kiN-0X``JqFuXh}a}8hPl7c0d z>>&OxR^?WQQ2|x*wvDL3PgDEA*MxqGFw$=L>ptp0s>yTu-gVFw7Zu^8b52%WI>WFtJj%r3$C}X9>;uVl1_+(JrMps5 zTY(zbjL9Gm+L|Sg{U(?%y`30nYk$@j4|lZ1>03XU%`!ryV8Ol0=&_m|5}ScFEk;m` ziXZby;+oRHKO{QQpYXC?j-)z+Ma2{3$=!&sf?*OQZCp0L+>93W(sRTF-({LDT4vr} zMs=}#RgqRge`Fn(1m!!78B-VR!9z)rCB5FT)!T=0@`LRlXYHE~WoUiqSqZA)Z(Jcm zg@P@BFfQjHf$Q zawq$SEVC*6TmfQ0n=y$(_6~sfl{TM4bp!>>kpTqu-U5$Ry;zHQzgzLuceb!UVIXXA zC@ZH_bM6)px-7ZD0e)70DdqisSb)?Av+N+2R`jDW*j94n6}9-=%6@H(h_K~NUpk5CEv zEhH+T)1VR@hoV@c(sqwEjC{YmJ!8bd4F|2~%DLmH{@C*2wB&YP5^&f~CE8K%Sz8bjz+>eT$ltJ?M)@kbC?Sr$ z_MS-{mu{CdYR1$>N_R<*OPGq3&t2|`-()Lt+-Q}R$pREGT8glGLdlMFP8SHbEZ)Uo z;->ZHB455sqd>2K@}pq0+3}L%Xch3Oky=S;#)v*Z_Y~beM{(3Oi6SbZo8A}XySL}} zuA(BW7;Y#O{|*I<-AbdG*Lkz_4uJlPhJ9GMl);jK&TV~W{=56LI( zlN27+q@jgh5s*ZUO~X+&=vaz(IA1dNPj2a_M^ZOu5hc?jDFgtO`8NWVTwR$L{tfqtRCOG!=h@uP1PRz8oTw9v9#(>deT?uno63F){;FcBE%wzGe$Ha=hRi z(fbHUKtlEfZC+PES z_$7`;lhe7e<$My2%71Zl@(#KUIS<_3dd;;mr7evd@aY?kqCdBAEpJFvA2#XOdVNmb z@PEcxP`mN9e+S`|3agP+&Ye;{%l`?ze3 z#Ec7fk-09N5GMNQ1w7rV*QYB#6DU#&QQE|OC8)V3rjHc3t?dD#0@pzn8D9UVNISm0 zJ*`^nXScxhox^Fd@V?YO;Z5c0N(?TUi8X>5g&!xkzj~Z=3{>aIqSSN$JwhOAgEQV& z!EM9A9LsDn$XO%T3O(f;Zh4`?&7{x*+mfIVjj>$(nRq|37jLQa6{lp%9~v+TS>3?T zV38H^KfT5MHX=5ZY>^Q!MB7(H4mElIIJNe$vz6TRNFeAud+9c3EM2Z}dLdH+l37$0 z#-Q_vkR9m)6`GP7^Q)NP6f}&Z7M{+iT3W8O>NHvh9Ry>asgtP)Q1mm%t1=|9HlQQJ zC&r18XDePZe^Qy>Y|7~}Nf`Zm7Lzf5A#OmgBvtfH^s++TQ9YV*|K?w^V~tH7o=VZ> zXgWsexbZyn@7DY8OQbyuy5B}!Kfnc=)^{6W@^0BHaM zjcR?+$g`KSbn(IT54PL!;QoYDm5}eowH2x7dhH%@_sgtV2KrOHZZMK?|JG1?tV5g; z-X#M(g=`p?0IKIpRJ?UrxrdzA*??&w{26tZ}O}ALDlnrh{< zr6fz%>6|u>i@Xp(OY)GFSp(%HDn3Q(P>hS8x7P7_q;$Xl_!F~XX@ z2S6dkE)F0QMCS$n%?dur^%H6Q@|Kj<;pufmRAxVT*Q*2=cw8fPW~YcJC}B+drY5{} zGGk?bRfgZzTP>%mAt9b$J{cqN(?z*9R_=+3bt+)lhxQyoy5U~4ifACpOH+86SorFk z_kX7?(rCT#m6y&&2 z4BGIbGMPnU?1{I9TSXjt7f5GWyBEDx#R-1s?My^6`6pY0DDvc9R+QPTacm46-lf`> z>8m<)+LN3%_9aM(_r5$;(BENX;m8!xXur?e9l&ri0dKmIh|DI7Ine+lK^JqwxH*gS zYjL9=K1+xOzW1-c@Q`GZlnr|xx; zLIf|GNUm?rUx0Ri9r_QUN2>sya?Wxt!t5Nw+GRWMX}ix&>0}>DNpR7armaJGll;eeBr2%=Dv zFOt(&usm$JqN=|M#VRS2F41D+cA6g%0Lq9tBXL(cU(*8yME1yEQ+b@O${cB2tdW`Vq#Vyo5|k4%-UYThzQ*r~GQiQQM&4t)|WrGgGyWNWF-w0J~5czgNV z@ml6Q8;}r2m=RSkc~oW(x(CkwtJ`-)vF50r;^?iPYEgtM z5qcCBVoEi`q8{p|fGBuYuog6bjN%X5_p4)JhO-*a<<6e4@f^;g9DB7i5mCbx;G!W- zT4{!3CV}PUN?k_wT_7jgL;7eKlE~YVAs$UqO&fn^qpe$mH`^qAf_);guhC*d65aK` zU+sese)cPG{?dVC4U^O)u;qC2n6O|oZ5ds{3fChnufbC_aqIPIHEA7{i5Av&?Ha~o zK4u3lT^=g`Y${1ra|Jv1J{_k&nTrF$`eSgE%jAmJ%$AU?cu3bTQ;SM?SP2{1Aj#~F zi~JJ@qzNo!kp$w(A(CwaMsxW)_<4Ea8PVWGiWL{%cl^Tj&yldf+M-BpQQp5*rE;9; zlTgOG6e={LQW$hs zgigg z(^ar)oi;@p+=&%cGKj*>@u!v!9N6o4HzdeJatBw;SwRB zUayu?*;~4-U&lmJ$w}DJv(|NGRJ&@%mfGft0q<%zs%{X5TUkyT#F%g`w~9eB1ZBZ`*>3_f3*>v zSSMq%Hi1C}!^sR7U#{?CG|vos{@*2ESViOAO;3~1WqX*%e1?nMsrX}T+;cdcObcy` ze6e4v&#OpoOXfy6DsmKhPoTI%*yE)#4H{HV?gLqk7Xk+PqV*Mvh(3^%!3K z6ZTAF%Q$%W67d&msGElN?`a3g%PX$%y$#>`<7Os)Cn;u*T z&-bzV;PxjVDP`xu^e?gCZm~$8ylW6M#4HHGRQ7r#UIVQ1xEan9E9GSPATAw&MCcXp z%aoxe>HG1pL50ssP1KL|H3D*Io}ayrFOSHmi@&AMVVEmuRm2+W4rNJ+QihzZ zo+5`@JhT4dnF;sRL7%l!rK(|+uqS%U$z#~|U}c z1D{Est%Wp5X%i8A@G@z&KjdBuM9(?QNqhL9OV_KysZV?q>Hgc>Zm~xpW6}7YC=Mn^ zlzHyJ!v315HjqA07RUQ>NG`ah#e^YO+6A##$A@X=P$YJ~^d!E3ArhsuZn>zQ>=pT& z+C07eSRewnY#$9Mj?pJ)A2)EQvX^zJaP$wc~PM2wHPlKe7B zgiv0i`{1TyW9Z=PtRq$jv}m(5wJ@fFBsb)kwQ^uf*5eaUJMd~ zgGV9RY)l8vFz5P=-<>93orI3|#^b|oHC^hEOHvp683_=h7-{Vv(iP6Yub7RV)=_dF zOga^cui1($=#(>6x8~KemW*3z;!C7$DJiQdZRY|cHW#2a2|MJ1oB)|qkSn@^_zRq3 zZ)Rd!`x3h|=_r8A;ZW855Jto1UTZTGkF06=ADTbq$5bka22fHmyPAATJ-Bp5kLN`& z4gGKq+3Z0Ikdw3!!PlZ0E^Jk7FO zEjSz4&5>1iE&r1X|G#qfDy=n@RLD@0fRZRI>l7xTBd06^wgGeOXdJv6HeGJ4whqo` z$ZN?Z~auS|3<9d-K_M42eh8X?1qV!y_Z@qESwIkP zgn$s7Z;$O{HVNKKp;fO$=l0!xq%&XPkz^)UEZ(EOk4VbY)d@Vkes!6O0~kI~3%K(T zrRd$!VWR5bR5;pwag}gy8Zi9=f6+wDiuBxn&L8&kO}eFpNs@j=3#vU$51YffuP(pd6g1Th1w0&fxPP9-cG%i87~VP8mUl~9zAix!!( z=%#c?o@hob&L!PnBrRi>r(`<)n&rJQ)t?^Zc^D$g50oXe#6n)q)7asO#qjcQcK6or_zz@J8DlZ>@E~u>x7=i7Uk1=X^wg_ zD)DBEdN=rx@QYTfmvJRl1%1ffxDUlXpS)a#ju|rxtcf%xUxX|()?ZRp+TiS+>IO!Lc zoo7E2ZPRFpmFJF$j{E1Qz>WbU0=(;A$O_wtrLh?9Yw=)hsu+Ry*$Ep)F&eb!{ih71M(9 zPa1t12p{2pAumwk2$WbbiK%=+vh7-ewe49T^C&?owjwdd%9M z0}kEvf}6!Ao#EAg%n`COKh>8eB;@Eh~D^RE7m%rI4SUe+_X|{uj|Q8QSFjas6by{hrSG%{9mLLqQTF&iM`CCdjpb|YG6j`LgD&to64`Iz~P*x6i*47&`T$QIEzzsSlV6P z>+y7ksA0EX^dD+=H47$XNTr-*%dr_5L`ewN0F($o$Oq9|G1|lLa?#bU&L#JfW9))n zg6NX-3rj-><_6a?S=ZXLSXtV7jDm;h5JO&k(O{*96&>1F3)p!p^l;*es@sg5wt0$$ z=g_q;uy0g-ONH~1(koxqOGU8}YyBW>?9C(FbWWXuQ`Oi@V6ruqws!Ll1==>y&?k$l zZ|ZTuS9DEE?vE1yR#6*B>74b_>Gq-4@qZ^Cbq7-U#F&Jp(kgv^=32#2eiCRr8~~X< z8x1HusZEJUEjkoJurkeMC)R_eklat~V5`kp5}NEmG=9UXoq#G0Z8>F)F-sWcN(8|S zLZ+Brj!k3T%@%Msb|~KV2#X>ku0#jc<4a3hr?7Nxp(_#-}h#}i9D^JXtyCus2Rntr626?ogbuqxl)JFAIqSJ zw?{$$j?D`0g6HI`Vs$a-yV85Uyz2L~f%V0q$D*^>)Vbi0M5dqQ@JT(KwB9x3^sXA_ z`ndhM#;#H>FZe0kh=Q%^iR0vwiaBhN^eF3eUe+`SpOFai7*r*4N6>DtU(PB)2Z*5> zJjVJPl6(lZ3s4nS4T4jk^%58J zUJfmxL)ay;&7O%SuX4iruf*CdqMAvq{&sCfc6ma}lCHy1MW z;#?I+*b(-;Iv*;>#zYixiB7$Q6d#HeDw;9MQtKfXjf;jrfbsD!zn5G6|*;d%vT6gc~ zKf4y%gd5^Q2d96^NPO!VpUgoMd;1o8L_77|u2Yp3&vtzM-x}fKh|yr0MWZPE;WPHi zLHxi>p3pv1g=_%(G5679WOs>@3FP%S^57`>q%9HSc+XcoVxIC}YobHky>ZHYn(#?v zDV+?r>O#UN&Q`O%ow%-*shNP&nGe00@~bW>0c?L5B9^(%Cg(iV#XXY)YQ=^(%rE>XBL%v>L7+)}X9&G@eUpL08-Ad7Qd7GauG{&#sJW z*Vfy>CwfVL(Cqc3?SQC^TCNk+m)an@KegB}$p}s-LDl%5hf zmYq^;pcc_yFg{K;qUMb}cUM?7bKZYOMNyv28g6E{zP`@wC_maC6cKkwV^z9c0YXmv z7&Tn~e~B!c1UafP{BFXDpPS(m^sSetHSIxZ&!|QFOH6_tiPz!jHOCU=gQFbN@>nh0 zr;=&(qyPb(=g}V2S$3rEbh8l0xvM?!WZ(1QrF~uEp85cljRJTd{ki*H?NQ52M`au- z5TMyU1fAii1^dVZu|_H=%*8@^`lO@p&ZgvLcE|XrTg#JI|z2#S2P18Oa zf&?c7_dw9#Zo?1)1Pc%#I1KLYK4^eogCsaikU)aFYj7Rh-3E8pGr6DN`#$$Mf52Jm ztkWO%mzmwNyQ;dos`|Pb=na%+^_$lY-)_>`$6vD+OftZsEgL!h{=<4vvUoaxIPNh0 zhhbE)Wm4M~Rhm3^K9=b>`+SzTX(+YA#t9V}iglu`E8TMSC4^4y65MymFksrrC{1lq z5P?Vm1Xm>e%O$&$$8)IwXL_fXW6^r4fgdnlaN3TNXYv5-cpRNS*O)o6&M>6%?5_y= zxUMk<|H|E#avY&+n*8M^(W1@O*szvI*8mg-_pjXVG3S z1tm!df!u9#?a)y{+gz+}6BjmYiCG13p!$UIUYI@BQgJFp ziTjB*?q9NEk#Jx+2MWfm?o5XAu9>RlWZFb=n$OJ?PT5SF`?QN$PyZPDugEmIlgiM~LP>7_RBRwH(=sC7#H2tQbR7T?`MTF~swT%|jD<*Wyo9F80-wK5r z2P>SzULp{%8>{?+-)hjSL4?w1^T-C+j1rlOY8k#9BlY=guNFq|Fgwas~7qiE?tq&YG`#A@F zrvj8ly!sKkpcOoCJGOhVjTZmO>RiMS|L{{+V&NIXe6?vpE}I7#)fCERYPIO=lNZ?Y zz~Io%S`2SNi?^>f?4(;h3K=k13ceNuvbRdCZ1IUn=Zya37us^1S(DgbK@a9**tC~! zSu7heDoLwD3JDCp@YM;--BI7ziJA268pi4Kt&O>3<*nc|tCpR$&6>DeBqqef+X+Ez zvpaYx*`5PT1I#(6d6~pqp_eo@3=JGtv8@s15nwmDJGhKc1I#Uu)DOaH1 zg!Y81(m-mmY^`{D(L}kgChblLfVa=H5f}S0dyy2U=6IVSb>s`B3p3Wb>7S8|Rz|Ia zOrseqgn#f-I=QcmV-dwvCG%v-3)a!vUQY95n^zlot?%1hF#4$0-E@tG8?MGAo zb$0c|a&wl`Cny{tzIvqDyZ9ZN{R_dzHEclyWoLIO-oB*76~VPL7-K`p&>FL6elCNL zira$>fo2F(eu>vY(orJ{J*hCRsQKJNkIukuc6{(ar)yJwRb78NrYaxJ&OiQr>*Ax! zw49H0U~M^Q49B?;}*a7Gd$?tGZJ^+{d25p#ulWvZx{b7ICHM-(mC_ zXQTIOKy9GK_fnNVSqFkbU-53N;9}s#r`Qh5fHzMc()Q-lHQtX{^LaYDw*8pTS`0q{ zkVL-Q6FKuJ*Ky%)F>BFr5=}chIn(TvkZmLzx8`Fmj7Ma@oX#s}p3L4b%tp0$Lk$>mEX;r*%P5IGbGE){1mgZ#Q%~D3IR1zroMY zNJt3mSacp3xZvOO3Wk@4&Ct=|)cjgZ*Mya%46;_NM0LycbLL8(ClE{IOWv(=53xpj zB$TB^r93>=HZ3UbaHi^&y@MI>Hn*(J?nN~gNXD7*#Svzgs9v`6y?+Mm$FM@7?m(mO zO1zoG;nn1Ez_vsMa-;5Ru-SKiEMa0(?QJFozvRZGZ2DyM{_D)fw=|-@hAt7Cv3~x* zr80fa`o?)@zoa-*a_|bFX5c%Wz$#)Zc~|XtII0)fkHu%pcssR}2qaWvZVNBB9>BNr z4q`e&K4v+R>7s5;y}=c4idQ%gOO-fIZym#E`FKzH;u z*h9bJ{)uw{6lHVhg!Ezd^W`w>jWxYa(<^UiUVl$tloK(YK%GTFEB1A7ae7(x)y~_H z&*+-X^R{)9GjApG6E7H7oV^1`JVU5RwC(z~YT1}f^1hl$ONpXyci~3A8oBcO{Tcn| zC;5e*1uNIM*uf*MAx5@%Dg`?g;^#JH-U@Mp$k=C6PIV{VL3bvETr7LeMjIe~ARD9pk>C z$@tLqF+B2+>^9a~+dR!dj?#JsN)F+yQHQlJxGfZ;c3#*Nsb?4IH#huP9&IAlt8zA=r4h)ANbxwz^1c_x{TAcgH&PCfFCL`0%nc>aHf!()Gw0!5aZ|sB`pp}T{?X)IGjYg$ymCj z&Rihn#JcHya`IA^G)CaVshB$ROAh(2Didfhu^v5Z%{sTsveD@FdLxJ8^ru5&+n&;T zN(ZlM^iheho|rWuY^z(U`>g;8n=WmWKhAzVn@pUa=4nrVDa=MAa&uaK{g9t6&KvB3 z>*+_;gF56dNUKxR&yhvSZ^zL)Ykg_@&(KBL3!dGVxrcyN931|&-8>5WiF+|Jr>c=y z#7B>c&hT!&!QR6v8OCd$k4m$saSvwF`Vzqu7QrMbEgBp0e(d0F!yab!FIfvzL6<&h zO?>9aqhL9Oc=h&tI8ke3b#l$XcHD{+0Ugd5CLwVaj<(UPdE#L8^_(i%uc)?9J?WyD z02Fx&S5SqHCZKWB*V#rZFSH-YuC=k{5Sor>(#tb6sn|VGZzlRooZ!9+Z>XeAEvPN& zTIs<;`TY($CBw^8Sk3o?8~kgE42k7RxC53Q|DN+eo;>qMIeSdT+4RiiEsl(nCnf)$ zvmxPUcBOLS+ox3*0R|WT-K~`O5B{f1Psg$>JJo5v#~ZZDH0e$v@*QA+Bn6ljR6xlR8hdlDyhQYCvKL6}oMJ=wC;)oJ36 zYQMZmi^^LwJ_>X6A`Mr%Js_Y;-`^{%?8<-k{P-@H;+-O4Lg8 zpVrmSuMQ`%nd8!k?Ne^JsmFk zxqCU3Br?L)%eod9wCcXpP>|G4xdCIMlr9$7o%p0Iw*Pq4!9|TLsgfQ(1babOjXK z5|IUH1&tMVK7)Vyi+C0u>caMp!Ut8ofE`933B6Dx!r?DGhuQy7FZ?I zb)84det~u5-~`c|6Y^i*5UnTp4MW8@SGw0HsPK48OSxuR;lyg|TlQF+G$nXx!N3@$@y;F(@SMe-0( zmqHK&s}mAiAI)LK<={0h0e_v+N7D;A~=|I{VdKS-(Cf--ZN>OEjssKH5adQ5%nv>d*Chb8FT2bi^ zjnPzzRwjc|m$hCcLmrPq5NNk%?nxRR!75BkH{pTXplaHbd?YjZD|hF_J6MVv{C=+wXh2Cwn1JzVe8flt^J5kc_Jb{p$o zhCHS9$&-W6G$m9dN7=d`orl+SB@Nru`8TUp_UYium#pBcrY#iN_|O7v*H4dQ9r=R% z6F8M}eK}0LkDGFRj|H@B?AM_$^dY^q^jWkRY+1Y^!=F6Smi^Xk05=#2SZ*&Iz^w;wv=83^DHu3bwWp z1jdmH>D5u8euuV2F8&^Ai_!dZ#ruV3-_^Si;NGy97-7-Y+!BN8%pTTaG68#hhvorG zjbrcy3qdVxyf7ko$Hg8O+b0RnvM~eU~wP7#? zwBRV*Y7B_^Jy(cwfOOEtcN4t1wAj0bhau3V0es_1_mm6!?Vkx`F;7H zE58-YTZ}B5A1oT2Jr2J*!V&Xs&xUQbZa7MOXaFaDH_$(8a}#p*0YHiLXu$pLUl<^a zu`pq)hU-GAL)%u^q0f<{zeNh-ZgXfM%4FC=v%^goZ;#_qI+KNqdF7j!%!QTI2KLYf!Jf7GE))a=JL%;2*PtNfy^n6$G z)!Goor`}|@bYy^Nn$l1HA(+h=F zKS-{ORAO>of%(>R z#$jHB@ArwKl$BESIzNta6b|emZY-65qpu1QxESX6DY|ui96Ejm@w#vH8>T@GsR_OL zwsVs#$KB>C(^NTTVe>8SOXBMYOB@~3ajg3@rlE#~VVrtVJd7j=r6(OL%NTv|5cihh6=i+M6+4!wmen1$mp22*AJ{kTb_fl@Mr zGewE_&!m~V2&-)jTGUv0j9`dy(&`YujfLg}6V2I4E2Yb^mDFzAceT^Bpn)_}8+%f; zPcW=45(f3Lc86kK9lJU~;f*T5`t7*?8U#j%Ya%o9k=Y_?;bYde{fGmOmhNul5?)`b z;>XYBx7ghzka}Ow?p6>(XJC7w%V{C2y}izc3Z!~8cGVjqF&~GPIGI89x=rtMdK8WG z=?VUmXZp+d_3s4T@M*nGzXA}nv5>XM{(OfCcR-sPzweJi6vglC75sHwPU^$H?&rm^O+!p2Ko>@+R})F)9)!rm8kF*b~KRNLj>9 zYM;RFP$s-Pg3HFhb)!p3;$LsV_SWf|s$%J_sDtvcZoVlA5xfgS(}&>k!4=XgcwLh< zY(ZE_wk|g$w3t;ZVr*v?d~pF(D1Hf@#iMB)34MtjF2XO`IK2`hR0tszPe0jFX){78 zp;S|1(u0#7i0%~~7L_kWTP^Fw`?C}2NG%Ox$;s@UpIyy@MS3!qKV|rJcRYc6CH7_S zBTly>`m~3>>l&9e%Gp?buLh^Zz|nDL$|QZU0M+64B`Q%I191gv~sZ1SeSEbI5eaG>8~*NR^!eP)mvVFz=395gd1G3>C)y7 z8~TMUp1EIp zoOQ;Uf6dINIciVp*?AW+5RW?nw903iR#p=z9vzKt#4p*{b;k+$6P)_Wc~SIbycb_; z@T*U9{id}6@!WF}o6uscw!6REY1=}Tfmwq+gxBxm7_6=Jw4HoGE|j-bKC})H%B#Cj zqhBL?irIf5#Ey%5oEMl*=-LdUvyhbFBzhz!d6VhEecV%|yLslIO({?(Gxh0@%7L(8 zj~mBhTz2uh7U@NuZqoGfs)}z17wS`qiJ+*Hu}{Hxwj+&ZRcpko$x4NKZ-5MR4L}s^Wa#iaO6FpkoY`0FTc8qz@nVWnz zNq3wYRWH(N%p><()F$MB)^3@pnp0_u5yk)Ol}lEQ<;~%Fhcl@7!b-;Xbg;{x)n#LfYnB=p$b`GsQFG9*6oQ9G+Sa@K*qp-nGhP|1 z_vA?8}rXO#eX(CC271;x#q6iNf!qIwmj~jPF$>> z_7vzmsiwh)yJGJCcR(8OHSU$5H=8t4pmxVKi^rkn%uSU0weXOP@~kMC%jqDG!*gxg4KIh}7C582yzoyPQ8_PDd>|j4B~r#uqMF*8*~MAIW{Nme!#m{Xl|y6Kv`iK-PxsR(Sf=2hqU?IDr3V1b59-KI z2SB&lk4#kdk6fduosmi4kRu+AgzF(rFHh&`ktgXDllj_56^&r1N?}*gJ@%a?w7FS~ zIvVSg{4G|u&1#QdVy(t(lTTPiG}3^R?3}Bm`{5_;o|6knMjwc$>x82kTelkzM<jv}WU=cwY2v7SJBurFp9)X`b%)KQ1q!55up?2so7 zuV!X$&d^2J|B6j<7-UHO(vfW2#vEN7d$~*Q4zU& zEt^)trqi;jJ=llUw#0oW-+oly(vE^a$Q=$fEP+-$ZID5jaK;(iZfUK?Mr^($Gj{7Y zO4FP#*DjW8anh5O82WZB9H4gXVH7o4OA#1w`jW6Fd4X(BuDU=Efj3J_QP-0Nu+qjDaGj-<8M4esv{v^i-dU)bAiNa_J-O@Q z&qw`8ZdueWG_Y&kmNArEJB19~_gPS+7@K>eM3pl!!prcq?lg3yB%jBz-aHhLQPUGl z)N^jt-oYgNq|qj>%azG+&bCN8B@+4rE}OTa$?t}`j(M7d)5K*+C1>naM)cB?9}Xp= zM8uUE$?lgYv#XP{RP{UO;;)Cp9faZYd~UHc(aT2m2M{BR63XIvauTbTTow8BlznY)<9nr6?8KG?0|yZ+&R ziIW3^$?psT5=ot4s-@eDya) zOJug5tSHG$#JI@72ngEcoZmQJLUh`~Tc&!>`tQt4j(Hmj=Mhp3U%JGkSPW6pS{Y}e zuywf##;DwmC^QUn7Frv&{y-cWB0<8-#dbv4#BX;T%0&NST5nKZKHznSP4Os|o#Rsm z`(g2wmVH>r@XBm>x5!)$Rn2LT$gW94S`k#Su9w!~`7zzv5@t^0QAK%W-=S*S&(1hL zJImq<@R1+*4*EtQji0d@XKDUm1k&7EA=)b#z@7ZMYtME}f5)*_POIEc!yDr(Dy&VD z>bVXcr)>Gu2-SDnkC?W55i7p^n?>1wjDwPY@aBq=R+P_KR40vm74y|K`a`T}@nzMz zO9}twmHl=MR-WR!bqmvs*!DL3Fkpte?0A*dL04{;@+Ul_x&iNmQ;8`H7hwjhrX-qy zZY@WA&`eHaQ{QDU=9BLSk2Q2nG`4*F17><$Q7PQSY4}8UkGYACpzv8kR@+K8hmnY( zmEHG?N!%Qzk89e`2_yw_n?IMA>zmwvyUgA22B{%+OJB=D>cmX_{Jvg*>t;BH4cCZI z#u$TnR=|F87UXgH$Rfc4&4Ds=I~5h9pQeGclTl|@av;g*_Q*!51ALpkHv*w#3MtAP z&7D9i>=|?t`rNo4OVsC@PGZF@=-ft;Rf482eqXD95;spUol*obc9_3GV`XS0P;dpV zy6W(-w)+x!;>b3I7HzMqIn9Fdc2UpA!(U%wcn`28p}M50+-m9#z1`yfj6y0-dx%R` z1kob9AT`IgnpyD?BZrkY#qqMT&t9Y-R?(GaapRIyV^lIECMM13S(ksi=>!K`v<=+4 zCfaCN;8aJ3bLLkuF%X4zdwT8!)bSd7nWYRFb*qxU^jb-IW+F=89I<&J92#(JtmLi6 zQ;d8pSuvQi%s&_zlI9fo^E>qe{p*c+a4<wy|YwOD_>K==xqk3+?lR*__9+*&973`8hTZzxWl{a1SUXTGJB8_;3NEm(fn+ z1QIqqb@dm?i$<=sT%9K``Xq=W zs2G96S)Z!;QC{J|7(sXJ;nmrH8!!o$7cD6(=Gfe#N}NNz(($`Dk`>o?{l23Y*-LuM zszwg@=q%w0T+jQ(@@$=~!Sw?g8EFymgnICD3tc^> z3qO0xB-?XNN)Fkt^M2k61GdE%+GM7#zUX#ecw^?I>4WhV@RqvrD5^eS?4`);Q*D%mbaw2)Qz$*>Y^X-#Q|Htsv&j4#}8| zT_l2v>dT08(H{=vViUhtFt5&*)=$ayb@thYliZ z`c%y$HKyUXtcUl6V!@nB;gu;~ww*ZacyHk2Cdol{xH$5yJ#s8<1@Hn*=>(ewg}W zhTUXmL!CmEL=D|Yk1q>gO6Fu}ks3|~p_f~qRv1pnh4;3z402O3K z>4Mm~v&m`cQdczA4_5}i$)uuAfD2FQG#n7;tOjIU@GsRjc1-PJ4s-ywq^E&6pCS5etGT#&#E`L!1uMh*#^ed!+g!qU~-GNgMZ&EZ^fa-#Yb z^tnVjfbqP;rwEm@A%E@Ro5=~ocn*p5u>zUgF43YXT8~tY4EMof=_E6Mun|8fDyewH zoeXb`{ey=?aw3k`?1*8zr>9X6iG`{5`&Oz~chtjnBl<@t(8nvgr3Ma`dtA^$_HyKo z6fR8c$Jg6?>rZ8mMv_ET7!8slR__F>{HJ{H1`@F_PEM&3$Ibp+V0bX`nr|Anjdlx_ z0m137YDKCllHJ$Vb;EEzxK8tc?^^Ag;I*h<6s`Vn3xa{vAzn%pDwyO8u&$^ec(2o_xe47hD}4TK$;d#$9G~E00$^ z7RN19fBJ)P+A8TE+%$qY77v0le9NgQ+DP#zLQsrk!5#sYCZhO%x zJ$5yV>5W|j9~?+s>vC@wD#ALx?{e>ZC9@J}fh|S#=ciAU&R&R(aAkka<*R!x<5=yr zALeHH(mseBv240h;=B9oO|5{lH)}$Sif@deB%3!tVw*SjWy)g|Wp{;03K`FIgS(8< zYng2l@q2)A76yZY?P$~w7a?Vp-Q7*TClnmikx^%5pa6^ZQY>u#FUxrtJl?+<(0v_Q zU4sO`4#Q&^K@~dZ)k|fbeMk2QNes^k=X3VNjZ)-7*b7lQ#GP8*iNV4x>E(U2_4_VY zBiY2RPT)1M;plg>kqhlhkt&VDGX2kk2DXx1TAY7wXpJaGtx=^82Pn*52>gkBfnD={ zY?U~t$y`I=^H4V954d*Fv)wtB#7AT5jPURy3X8q~oHB5l?e=;X@~tSUsF$>gAZN{& zj$0%N(u6#2rw#NXL`Jx$-HRl5Ilb;dg|BM??$;LVgH70_eo023j*XUi3Vr7ek`pbC z4-}|*r;uBvCosrv^yhMf8_(jKihi#a9)vjwiFe`|$J5g*0-iViiZu$tZuN(P;?I^M zl$Y3I7TH{dY9$EV>v)f~IY{PP6RuHwn|-HHJ&W<}T8Em4acw+YA{w%6iKhwIeS?@8 zm3YTS-kbtHI;0%V(>g72Vb9HQWf9erOe52**KQZ>zjV{wwu3M9vG#9C8$5Lz>AfTE zp|_H!WJmk81uJ!%>bYepUPS{TY=L^2?zaujm@k~Q(dE>`co^BSdG|N9m9FzFSX6_;2?npcF zP~g5HXx78EXT>YAP0+Ml-`DqSZT4~R?BY4(a%9B;jtg>ej0|f{ZgiK8k82o?$07emgQ%S^+*(*Iy!5MU z^)@2NCO8L)IRs?y#s73sKb^HXx0Y4C>QFU4F;*DxIJc==7tZeGj==5?q6s@s@&!h* zx_gl45e>F@X9}VO(qd9&^p4@-;Md;kkHG$+GU_E$*kux0XReHIdYk;27dNlxvmz?m ztx?#JYqZaS-CqsM2)h&-cWJcqGapjEBy!-Iak(x2Xot?WReeU~Qa1boP7r3d z#&O?e+Y*N->w2*HNg_1QopG)Oh`R$5mFu z#uhy9q4(}pF8$(YWbtVQcDL4jZY0!IE@@>`rvJ>zs-Ickyy`+8tC=WDyNbSC;yr8d zl!rf=R#klXSUB^+sMijI_Uq1uST@VJ3s7|~ZKoI=XtVZ?xBEHLFjj8^i}sah%O0wy zk^Ee6y8>rhU^GT$GS}Y8X`uyec)3K>a_D7kksfLFPSc#ylh~jptm6hkFsiNmYIDK;8OTsF1pg`ZJ(hIz-zcBvgn za&7t$RrDlJ;IBLiqEyI~nTYtRQ_AB)Gfno=9c|gWiJbE}CtWnH-=@id^i9-@sgA2P znz92>8SRvAw*u`0#Q)6a<(1Z2W9j#9FVB)_67~{1&aShQ1Ob-gB;6_FVwr`n;iAVC zmG^3a#CKnusEvs6TW+?WOf1>r*m$R$lEi#p{-pgFd z$iwlWu{GD19;&Aw5Y*PEZ;p#{2x?}n9AX6-)YZ<;w}i*7yHyw99qo-xi_Hq+yK94- zIiPpy(;L!#kE0k*2L?~?)IZ-}HrL9nldcqM=(NDOx7l=>aJ=>11*d)&V47e%5st~L zA@&belqSUw)*=Bn0Qhv&mOB|H-}70?(1)jCQ=awVDUpF-U<-VGVOrPD zFR7t~BzP9WgkWlJW5T<-&R_QL?7_Pd0*dD|V-Y(4mq_{kp85FI4ZgOC)eTEImbP5t zFRL;wx}!!J`TEHgrb2R<`9@d|p;=&gXj&f%?8?%DY)Y$$pOeayUr|zBT@SQ;9ER`+ zK0PG)ZyEq^lQ}a1o8~+}iwauXG5_00Z`y~zq+UobX|Mypw@~s(!A+kR<}zGbGCS|D zv~qWzCPx1Ke{uol?6z*wC2AE)7a0|e*SOlw003AAc_n)7ZfZe6G8}b$Hfpc$TV$b4 z=044_IPft<9JsK)vJtX(B4v~m-V5bLBdA87E5uHs5;cO_#>S%IE|Y@Ea^xcZK|u`f zg?(LWi0s|Fh!;a2wh)FvlvCiSn(N$8m$M}M_C-z}Ho-&R@pIY(G*aV#VhjtXn&W?q zw%{G+C=n6@uAV%)02h7!bTGkYe86p$9VR6u4!?G9FG_O4d+kgpK{fBK)!aoxC8$?r zxPvE{xR&RIH2*3EqHILx{3T{(HNn1Q0A&y(Vu+hWn$v=mndzrp>-m7i*|qbv_{vCy z8V${z8eLZ%c3;>iA5Owe)ZR%*M*=Uk*gSk(26FRPPA&bRC#L+$g7Vo#+*%eG z0I1MkE#Ax!ZCse2`;qaJl;K~dp*9A=Ans*HC%&?D$ryN~GL2)VezI|cWRaAFh$ow$ z-KX`Od)r}W!tv3N7B#vpt=eAT9iE6k?D{SFJ0^h}qaSMQN&dq3|EFw3|NJW?6w6mL zdSs(YY6w@^86f{VBSK1?#He{whJFV5>!w*Z-nhul@GV$BN=M_#!(j{0CYG`0tdQp5;FN zs}Y30XdsO4e}6}q!&JoE|0?y92ujZX%1?N2aUSB^{j1pXBRcx8;#`QxA^2AXak`2K za{GUk1#5&T{j2$e5a2IhZ#Rfmv< zk}NCmu5ptJp=Te(0BJDIb*GcmZrA^_GFl|}l1J>P^yi#jCS-p%i}YV*{r+z=^vsBC zr6@<`$kIRs9`mmz=>C5)bN?^MFTygulLB|AqD5Gr2-gRVv(}~j;S0+SX}CEVFnwl& z$XJN{k<|VJrx-n5|9&W$rd991^KcrB(4V;Je0$>dDNkGj%C=VVZh)9}h(Q8n|10qR z;DGQkyDM*MH4vZ`=283l`1SeZq5A*I(+2TBi-od!M;B=f=jtf!R)U|WGd7cEGL-GO zVnsC7zQNLNi?WIUy;)j;6UMF+_+RDjEk+2c6bPCf()?zS3AU2cQ|5sJpXM6KDgfV? zVf`q{dRCI3Y>^+?Hb9nI5p+dKkF){~3i?+Cc>mZ{Xj3Nu7`iO}$M=#l6DkRMtTM9> zDCmxYb2g|KS*4;8T=P*#lDd8O`0twwJmke65KR|^?)7CWbfnE1e=HT08)wr=%h2Zp zdYz~x$^P#UrNGbrn#SAD{CY|&+s@n|1l+;|!>vN)^I`jyd$g@CIuwHc)x|9dic5OX!lMMA4 zhEuHZ2?jT9Rb7R33hYaVm+pT29zU|}$xjh6`_Q&Z&(`D<4x3+CyA4^E&#l>z3^S-( z{#gPaiFfm|IBW{g-Y$=6N%k&hKZoAG`bXw|1?GW#K~qWW4ex2<4e=e_)V6aYS6Fel zr^Y6Rj0cOYT1c7peVfo*x+}Y$n&o>nrXyjT^vBkzm<7U${6@aK>-WyxMYxvt#730d zmN&WOSCfieoM&6vSytq`Jo!z+7QhTAeY8Qv_wik>b4V?fLg4*_O3#yp2_ggn@%e4& z-T+zBG(U)PeAzX(MaB@j^pT_CW_Q^dm_up6LjkEz7yO8UK5v(;5jp76iJSgVe@Nyp zf;V9%!4HikVMuuUqm$_?KU^?HsK`7edkw@fgXyzz7|71hr;{AebW-8U#IQlwu( z5V1V$spPII1PsDlP@k(3D3Oq8g0yWF0DT>U@KKDAy|*YMPHo8MaNafU=HpBr-0QM} zkw%?48ohsc-PzLNUk*r($V*jAa>;WUC@S*|RHnSUZn@hHT&sR+P(+@XaFgRsi&M@Y zJdN;L{IqJ)6#g3Bk^yM6UDn;CQ=gie_nY}Feb)0mXktPo0z>i@p0E7rNz{%frqOdb z`3=$c^M(WI)<7cxSjq;!G!72#p6}S;8*~KJ?+4%jOF<|mGAIN~ZEYlnNW}N*B7VwD zNll3*GCi%{MCaC2`&~Rr9Z5AwtJ7jCS}NwQuym%2SPApnNE(?CrvL}md+*_ku8_W= zbjBETQK~;PcLg6d25hVHep~emD{`Bu3Yk|!E&yXAYsqSgCCQsaN$MZ12)t8dNbd&; zO7ot-J>5y(8Dgk>WiEk7AR?00Rq1O%P}=218)iN1i)e`@d5E>;WVn+wio_`B8{es= zG%l>U4wp!H(zYJ}OW7ilo@LuIqME{T0>7iYk;;O=*<#SfCq@Vd5!r3y!d@U1zE39Y z-kzDtq+wEV5C`^QK2o*u3H~nI58N4QIuIV&V*q3Zc{!D&@GiT{KwIS_BcZh1<`+H* zsTE;knBm0vUy|4job!B(MPCix7MA)x3VZ9`RyXqCN;DX6C&Dem_SGSr%G3WqG%!#5 z^ZQa8#_@>C!c;j_Cm54G``csbNV@y7*=f#0?X!r;1xI!5_>qyX*dTqi7}mV%)o$(k zwlOT6l+wEyDDq!BWPU<<&ZBkMHq2woVWe}C&Dv4m%{@F7xtC!H(Vj(CDjWIGD+lwHy&9ne<*~ z18r;E;3NdYgn^Al4i%l%RArT!CyyD@D;JX|qZ1QdA?DuQlYJ}@YH|3=dFp`nP|+n9 z=zX#~nSBwjj<)$vg7S3l84^+DjO9;C!@^?1=0_*AJ#az2L5kD49#4ehyXSODV1QL2 ziU(q}R`yPRlTff{5P?X!znu!h&HsqmCft-O+0ZoPXw_>jo(sM4L_=88!5EejTu*1p zGJooEGWXeq!_gBeByo*!T}j^j;T}DjeV2Pnhy@7&E)$W-e3#iC8zRbmxX8g8oZQ=N z9j+@OX5{VEUkPbl+2FBg@rD?hMHee6mNu-NN9F?_HZV5lsP>1EH5zGcRKAD=^-vJ*GW5&Ac|PUNbC1C8;*?eZ$GL!aSki zH$+m4km%<6`Zp#){Uxx?SJ8QWy2hEqLdC+mSuroOCNpUa%wFbwjOOYMiOucx@HgXoFOI8`QUypAOmt zTui6bPJ7KWo{!VGC)V?)zDi}8ws-+}*vBAIV-`G2h%!#Mki9>eXQD`5OZE0{+gatN z@dI~PuCthR7=(hRdTdHp@eW-qio0G=;QzMy(Z|wlp?7fPGINRG z?}?x(ix}3R33s6e16J1);>EWdwZM0TPHUs~UbY-Jwm8V{A~6ROY;OY|s&_HXg~P&7 zpPDpJe9b+NOIDS3SIN+yBJTS03i?XT^)eK1dUzq?3)i*~z$17uP7#ZPM#(ds0vKb0 zrhxP`P@%qZ!Hd}6Y$PJi%v;ih1kY^tVWnurSJ`)e5EloocH^i9D)_{$$1HW~>)l&# zyN4or2Qy0wytc#8%t2ermuecnNNcem>6 z7lfKSX=X(C!=eUb`>@Nwu$6nFr{iVN>69p=*xS<$2YCR{(aZOE8r0Xya}|@prdQT< z|1Av$I)Npq4P%OAscp4aqM>3JwO)H8(TCf1WnTprDnF4x!M!(L$Kk^+8O;zpD+Re# z$n7(U!=Ymi;O34c6Cf9|aq(;{_~4m8+V|#=Y`!Nqlhe z_HY3eo8&db*r)lPuYmUBuO6aJV`P2fHv_bfVj{1S(w7xeLgHTweu{pj^s^6?P4Ej9 zTmR?UFXnuSiH7@?nd+uWYK8ns8;p!4fBj@+Z24eMx~!Y}%^ zKXw?WNh}tcTFvUBvsq!5Tcqhw%`<(B zGQ19a%IWp}pv9C=^r2Odisx=w5JHdn3@2Z~EHzelxkTQXw?+xp$g( zz2xGkW&bian*7&~Vj=!L#*SU{Nys3B;$C&4!$Kqf6WoUA*?w+34{o-|o<>HcObOzb z#XTP#hU@G1Pwiih=rV@9mrmu-=*St1iOY-8cjj`%kxcrf9TjC>^r2R-)dx0Y*u^%D z?)Yhtd{QH9ri`nQ2wT5Hg+VoU<2TG3VCV=Q?zAo&^Oz}*sIryVNs8IQjzdAo;TWH% z&1rZ|NR(?{W-#*U=*tw%oY`(7yKar`Y=zmt6?90fwf18j@a%e4{f|F{aF~}%eS~mx zxn!08$GdXNPlego~)~OxcZONWgBvtZNOVr0Nc|lw05(x zL*sa^V$&x`>XjM3QRWX;@{-jbtl~eQWoEYN`WbHlVgb#!-E;RPFC?%*;Z0AGGg?i# zX7+TrkpkZieSE454J|3~KdKoW+JQK03Z@esf}fGa{R9rE_uCe~EJQ7mPYK%l^hurV zksMbDLp$iB-;@n>mEN=>KxmYo-{-nTwvT5}>YHY|U`tT$bzBafw1Zi5NtQh)pkk0*~{N(crbTXf7iB3z@KUQPb z(>tFpf2m*1?tQi-R{SV^Lr{|RoYe9|oU={XVh`;s2&+0*jn~;VS~*M?DSFLZ(}wgp0B#X{6&=T zAxC!hGxs-aT4U$9IEANW5q$#&Wi9nCVs85s3>QvTtNkKn{Q^>>}NF?j&MxgvM|F#YjD#;-PQa_D(t`TH&WOIz;eTlIKf6>ev=*4kU?3b|* z0`5X3x>~99DW$Qy@odMWY{q)}IeDzTrV_i+TGHGE*16^x8Sz6(5*<^MrNXF0?__vw zBcfkT(i;-zhXi9}X01#rQc8EIK8@jS|BospvM*ywOVsjF_N=r`d4ATIbt1(6qG*&C z0D#F6$R%QFb*JlNn%Wv!3osEy*C72AU&~ge!&i;RkYg0MIPkC75mlF{bW4vIvNArV z!4$#UdrKrK0bjPci}9zq4=^97wY@QVll!`tZmSPdm%p5kK>q>hA%q=Sgx|B`g}7gc zs!JRkd1KF}j;zBH$*Sh}<29x_lDf3UV)VO9OA;XPOOKBsY3$F>+0JsHAnhnz(=)-D(xdW}K;N;{5F5*Y!X7-isno3GGT zTM989LY{+xb3WDd7D z7&-H4J6WSyzIt1=#+C9+nZ=XkhmPS?S-;5tMbtNNX%a2V&NQa&{@S)}P209@+qP}n zwr$(Crft4H>z;exKd4=+Dr0A4M#MWQLXzr$m2&ASg>pQ;pd+oTm3ymKM0HxmHJ@Op zW1s$&op)NVAKy`-XvcwkEz)9>;9)!$>Y@A>C#D6_K4l-}4jY5dbyW&pe550GtYG}2 z>A_{A+gtun!2n|MxG}>XC1s*EFTF&fWtKk;LIwNSOSqWLV27W-O{440;qUhjz`C?A zdSt7o)9}`muTagP{NfyQhH{@E%nH!~<}zhenY-_D_5F)m825<5>(NZww8=v!aZ5sI zxq;ap|uB6R5M^Jf1+$PAomv*$!I zE(yPQ=b}kmW$XQ)%{Ok#sr}xqBBK?vRc6Tc&(A1U#Uv@*YGDMNtKKnK@puB8VOw_J z3j7CS%#vjr#43MR!r(HBL@2T>j{2*9k&p~kfxmzLu0Q;GZbU((tuy^%DabH_ z6^6-IN)^Qy$61E_opu_C1>(rfE)Ql-s7Mr0yvI3E3ej>QCD{w{lsBi5`V|R$n)G!b zVN`GF73k8)YWwVYD=m+1{tvr^gZ`J%RNRN zAwe_g_y9$wSxL#3NR4WRDrB=nYRKm3KV$?##rSrIb?TL=GN`U{;_xSih`d7dP^Z#i z_D%c+Y6YsZwu{hbv}(`omB*CEWqg*%raAa%fjVpb3dX_9!L)ht%i0ZFyRg}X*dfDV`)+;o!52Ac_rGRU zXrRy?l6CUM+m-Up)SEgT46&nJBpOjS?>pd_tz9r34({l>@U&nA!=K zBOv@v=v9ka-J`Ql4ArEOp$Zq}vIv?n6(*$?0c2|8=_7|}cryz1x)2ixj5_lZ8OW26 zQ{@5!%wN7ZYMUc_Mxt>DZa97JQ70m0??qyFLHpPSL6?&DRHHBV}yv=mJUE>l+am-HlUhyZK1XID~>)iZ|4_yz4 z75?HQ#@3bLZGvZz*In0TQ;#Zv4N=FrQz}=`EL_mIIOOP=BTyadrcX}!@}Uq8D|Fsw z&Mo#qAzwC)cpmSmvJ}NbtzU;?CS#)JNpdY8&;;BA42T$#z}wha(WHNOs1_^QmZC6W z5r=S*$ar=tjWQ_4MhDA@-kv!0UqBRy1>%a&uZ3JQCYXAS_bhjgbIT}Y*B5xvHavMv)X!W)GdQztrH zGsZOrC0jE5-(3K=@KNHueuHHlBqBhhI!udN(LP#h`x^{vBxJd0%I*aj3~q$fhSs*@4w*cLv;pkdKpvT-mSB9~OgWK}^UlyR2zb_B`c;*Czu_%>n> z>`C={XC(9sWXfddaT}P*V9nf$cJp90ND}2&q!ULQBKemvlG*tZSTcf*!&)U}84E-Z zC3?)A93ZisrFD0lKk;wT`*_Axx~e(& zRA1$l+|Rm2K?&UJX@LOj#vho+9T3{MAi)Hv;FW8)*Q@B~6ocW5KqJUD^8mU0=ARH* z5ubqtk&us5MWdNz)`Sf=|Im_`1C8n#8n^p9ZWpASFD{~SqBe7VMQ5@`X?Cx-P~6Ye zV^zvo2B5*q;H$r%tTfwNckW7t$~=d=f4@YsKeeJljH2>M*k83}&--EE-rhE7D-_OH>y4;82Ar<v^r}@cq^qO%Wtwn%+93~J-TV>Qgc_DQ&;1Q@opLTmp{dC_}>b(;b z$WRFH&C|2;W5_`|*~=)Si-d5bAbB%H4t-Vjtr~l-Euu?HwM?LEt0h!S&Nj(J-6IUJ zr_=i3`eShm0}+J9`M&3it6NpJohS0-rflK}=N*N0x-|@hSu@}TzI_D6))Ex$^=V8h zcmnZ<{zMaFcZpmRS6gfuHVR{ice;Wc9R~3j51B|fKSYF9LUEP;2j7uZhNwuIo9hDS zAC;EP$C6EHfn+=BXn*r|v?7uu;S3QOX*LeJvT^pxcE_3;xBKJth^u%Z!~w@%iin>e z>g;{^G*BvqB3-f_*AqM=>EoH3l9o-U5Y0_VbZVsKGN9rBwX$4d`ZWRqkq*?Sqps8* zWBOCO`@6^oCIJlC<@y-A_v|&@-p6^9Y+=JO4S%&1%gcF6UJe}isTIk7_`mmK^?_8; zH~whi#7q-|$vsu1DZX3;F~^cNB$@Bz-m!Hz@&tJWXrPZ5iM!5(l6}bMAxZNf;TGe0p-ePLnGf0T7F#69? zSM=&@pUS#iBwA(J*W+Dy>DqgZ4Y`gDQ5my$2v!4a53=7U4a?vH>KtLFa^Rt_PvJ;1 zx1`csA+#yPhK6Ab*^D-yAf~2q;9@GUGGK|hiDHf)Xlq(sodonL3_-K-L~oMm)8=?> zxKC_*rekE7%iCv8-+;R+Xjmgc<5J?tNy^O>%G4LlQS40Xvo$Vru5KTOesd;A=R+nE z;iYcVojbLiQMaO3-^h#MDKd3LYn3;6xDL*p@pQmINyvnuAd%S2oaSgTq`pG~fj$mk zDtF6Z_24O5;;=G>53{nTfyf&diiNj?-k|chF%TvrxSXJ0V2Tkr7OEA>?i6gU9-ZBsTMp2S~j#|nQ^Bs1%?Kb~z53s#Fg)`~lnL|Ry9=IpR>m13wGJ@88oVazF` zQsqe8vdjfpv+^EetggzEU}rO4ekw^ z0bm;=&!~<}t`)qU)9jmK9~goTv-o|o8oc-7vC6obL7SNJb|Tn&qTE;kom!B_Yr|#4 zN}vPtYi4L4Qw2aVF3{PDN?2O)LF8XtyIZ}7KGeiBMg7uhMWd`x+6<_L-u;${_iIYy z6MVv~vCv4ON}y(@myMD*mm8g@XdjLuBXnQB*$Wz9mHz`v1>oeN_knm&ZBOl;**?3E z`;jdX4p>{l6!GP1B;!clb7FVzp+@`_&{>d*TTurUtU)d=wKuG=RXYHY)QG4E$|-zN zmKa>e-PT`I42_hrg&9I0>5ccKLgHbQ`ggXI?zVkfAu65TukkA@jM~oJ`b6?dEYUy> z!@6;`h(gF^@+|4A!9sqqj?B7_4#WS}IJLXjPbThhx{M|X{3K1uB%a7&tEs3XWKbE#nbYZWIyKQLH90Oa+DY1U@bf#4y_5Uue`JRMZ>A z2t8+JJkz>84oP%(YTdf00_C3nQ8)8roVsxUm~nzDMkYsDAC#HR3b}ubd)HuK0Mt6vaywV9?;nC;W=oK*_qQKid04q z8|9mm4L5j5A0a>xEfGv+lEYwkK17(TQQPd^uQai{s7zaeos||CPam(V$|aWAKcYU^ zaOr%%bI{)I>tzPF0Ke0?H#FG(EF(a1*YsE4ZFf)wZ{)*8- zH!Bh>Mo_CI3O87;4>eFORfc4uM5(UUEr(2Xx??N4N~ekn6hdUw5~MmxXUaevX6K<1 z>X58b&sD3l>kj8+v_Pi7kMa+;5!M_*xK|2SVU87ony4#VrdoCvX3D7*HIUqkb7h}* zteYZeF3UASnA4Z0OGg?9Hoe?Wq+G-b*!K#4ZQ0PVCu9qvjDHMW%bnTUitU(-my1O* zMih$69V*r1M=? z?{0Xp48&mu%NNKqGGm62>f*`7foj5u67h5dqRjqik_vh9E(q0y#l(YhljcdgBOL46 zLHq^6Wu|-T9Ld!LWb)NXa`T8+R^b@cq5?5=NJQyja%2W-Kq^tP`Q#O9gQJkCA{5>7 zysTE^$^PVC_CC}4*o+TUjo}hq%7t=HwNX3v4W*{Kt)|L?ZMkwT`Y2&CF1vC|fc;Huj%UA3C@>~nZ;%fa1isr8gqwMM`*+#{BweU;|$zk?yCaE(eCzLqhSaLEq z;acfdj6u&+(oSqhn&fyRScKeQpas;_Xn!VvD060g?+g(iL70M=JFW>D3F6;)!8D?2 z5Wk%KN~CHqh#VJ%>g9NEs@tPs?qmsgh7lQ%)<0q8@^b*^@OY{zG5^F{`6K;exhg1s z&7nONg=iK~eKUeo8Km~l)CRrbjxyb3e_DV%o{GIb9>0(Z;Axx!RG}!}zq!fP%VZT& z`v(z4#)PH`wCCVPIyJKI&A+lWdSntu*xF$}c->8rt=LSh1Iv_rGpqDEfoT1S-kqZ> zHF=S3_TfAjOObo&^?K^UxHW???2l7h!8_e}NXgKRdxIAYzR%~29@8aP;i3w+X`NM=K_$LSwb%iK|E8l9^ zSBT?gLmhzM-2h4&@jKb{p3m$jC#;w`kM=8rO|It}cK?9RwqqZsDvU6t?ZlDVsOdiJJHXx&^` zkIgaSQjIQ?1%$-t3?&nXs8Cp;|DJkgYZz95t>!eYv5F(RSTsgYuxaz7fKD3YxM<~s zMEjLugxZSLU+%jZ7_0f4D9x>VwElw?Y7yl#7WI*COh+t)w;*13_EY97iR`DHuOK42 zfUjv8G*v8Pi7vZxG_3}6t?=!$pE7qJcgVj?XJz*pNfd#Nuz_J8;ZVFW-NYm{cWRqF_5>;S>?M%xN}L zW_r-=|M-+x#?`*P@`{O28%8F?7qi&_|zg;_s><@BWf`p4sK z#|$Oc6eyg>Z?-aQOS-xMG2z;$?>%fQdt~(t^25Tl8_d6KgKI-(s8bLUjuL>s5# zjXC(85+XIz>Y}+I|G`#){k$=(g*ECrg-td= zZ|-mz-gbg#;m8mJ8XILvL@;%aOAZz(m7!6A`A7ew#RKUv)sy&s8-`a3j`BTuYo}9W zd5Y>F)NNhJjE`N2Bj<#KcBzrb&~o+*x#6|SNw%`N^{u_z11;z0^vm|)>pg4aQ7Yj> zh_VH~MvO+PCay9ws5s_`6YtIHEanRVVgKcxpl>83M@q|DrO-m;Qph; zA;Uj@cmSGLV;MsE7^=&!|rN`hP?v=&+*9pr518xU#nQhjUx^(K34VWW*MAl8=V|7t&yj_Z-&T%L1O+t+3{=U{o+?i5 zI8oX33o9v&*ujh|!B}PO!=Zu*MG-l9@8Rqy|9{x<%#o8-N|NU*Qd1~<0b-vB^T(nE zqaBkQ2s~rjzthQ ztMG*S6!RYq%g@}<8xPT&R%E1ma^$gBgw+H+>gH{}788yL46CwTp|^$7#nAmk=$dT) zFkAaCL>NvJWjl0t9^ap7d^z34<3Y^URNrQ^zTBGkQes1GrCjgR_a=Pyk1FuLJt=RYJWQvM zaIj9f%1K}&j(^M5pBdDL?IQAuW6-4jSc*ONd=cto*StDb7Bo4RVe=g1(wXRB(x1brX+E@R z7i(6I3(gM zI=v@@Q@63HcQQslDbG>%FNj@_S+q{sRC0GJXPY7;(q8 zj*734-3>m0G+vW7FI7lVYvirG9|}Mxs{}kTTk9AWUo@yKSr|NM&YP@+&>r!Yv|Hpw z>}s{YtGLw50I<~JPmcpZ7t_x@Wl2&8yz?cU3gq0gx<~?vm7~^TcwF*a%yOB@(Y~(c z{ct?RD=DooAqikk7$`kkeYYA2q6kRRV3IT2_N)x8;!ot*nKvH?BAJd@&W<^boNM1IK%01f72FVWmUZ7v=>iG;)<_m%`p6IU+K(@)kRZ zQN0Q70te4=LU13Fm-M14{l7ngD%glVF$W_f9}hOqrb$+i&m!|S9b<6}B8NO~#jz+n z{EphQYYuMO38@oM$tnofG%L_EHs}TPUhNW1=K7vqTL1tP>Q?mv`7l#O2i3lB7%kZ~ z&h5ttuATd8agkqv?^_fl5$~|!&-Ms_0ju~02GAmxD_ciTja{LuyUGF-6%z0}T|Zt$ zTI|VZ@LmM)<-G%9(`&zKw0eF2iKqnH(4;+*ZdN~|t&L=IxsHy=q%Sst zj+c)tP59E*EQ9OBj=vdy!~>!4{&a41xMEZi4qyf{ISAHNS+R`pFBTTAZbzY z^yb;WV<8v^tiwIAl{T3wGd%_J*W}Ho$l$&*B!m)lZ=kqYIxOH4-TaGl z+GtuKxj8D#De`;^31|fThE%eUNh&=SnbKEuuq};h5+s_A_%vJkaZ;w|Cs*wXdwB)& zX%oQ%OyygAMw1-lGO-0_v(+AnQu$xNQY~bO)Z;MNTN0{|#g!H$ddm1i>ny-K|E0Ms zU$qh+Vpg9+|$b%>2F|kU|KR_TR`0g!sQn_Nz+n z$zgqZ4DNG$%#s#^!~%!-C+}s>o1qh z(4pPGUNYxMZO=Z2fRmG<(@=LzFPsDOl>I%CI|{7dyR$M!>2_c{9^P6av%%6zh6t&W z(y1ZZcsNlP*$v9z(bPfhQv`K{!Lm3wuAh=Bh|dRZdUqWZ{|y>j1Z3(n+z``wZIl2u zxaZ$zJ7&eIU)M-CNijky!~O>M_ef~ta8ODt#$6Kb3XPgU*evR;ASAO{t6WXg-ect9 zGW=Cv%9Mi*Os0&-b@)C6JNpWite*E#rp^1)Z@lE>p&#_(m0f#Bk8PoL611=P#5lJ8 z0ZX!Ccy1wv&GRX>1WYJ;3!a^dkv6!q3+r|U6~_P+nJ5&x9uW4ogcG(EgM2cdLtQT< zL4@{YZs#jiV5EeYRPt|UpP2TYUh|=|ofKO&gW5G+st9ro7}fNT&p+^wd-v-Nc~r>E z>uF)%hzE*tt;k(&pKi8&L!<0^%o=y8y?F@RHca24|Cc?V3fs#PcX-rD=gVmX-~L-9;l08%DTK)vc7;+%#F#>P6$NehJF#F|p9S>Rvo1`LqFdO!I~Su$lZ4Gif)S}f9^Ic3rYTUE@e z+t0Lz+if(#E5Q7+o+_yq^>jjm=g{Ee6*S@zwxHEdgQL&uCFe!dOK?$h-%KfT89ypq zctaJh7_rB)&YmcN3m+eU8dTdlFBwA-4z^T#nhf2*Qj4_7qO}P-S_YPVt7g8Lp#C=S zi)-IfuU^)rZsSM$`W0w`i)|-6Zf1ew!O0EFA+XFeY7WQdKA)?^R97GM@VYdbuf`Ho z5YmYAm1-iR{xOv;u4e?lq`Z0!(y|>-Hw}SGzsGJJB`8R;AZj@1Oq-zSgXrFc5 z_d86njeA0E`u)W_LhMxk)W5%_(!$pxk6m?dX4th(*wmR!(V5L0uvVeITI_$8Om=mp z0EMWYfa%}l|2S3E`&CcIVQS>KQKVF5w@y>9&`iF{ZmiAUoSulfoBms#$S@6WY!yq# zN4(INil6AWTxPvElRdO}bF_t-X?g$kLmVaKsnX;;f(I#phlS9vj?bw?bVRP!FnE+< zUNRm66yWFQj~q{xpu?GE<35*Wlp4 z?7&jU{4AFM_!ROtJ5S8~Yj{GDi&p9$X8#_#fLoEc;@`BzU&F*^iCWgqk-pT&XYY8J zmtdPjr1=EqT10T66~zmQ$TC%@WJH)t*~3w5nNG7A!;XkQWbE3@YffYR>AOXWp}bK7 z2%wc>>Il}Lm@A`1zFQi8U0Z<%&H8NirzD-EwN5=DuMmwjgKbwRQY3F!uq6jjLmydO z(F*Y~Q#+3Esn|#AMuKbk6OoSds<8fda{iyHeoY%*FV?v1DC;D9c&yDPf;bFBKaDO0 zl4q^EBqfbOf~&;*gbmn;H6_Nbvv6RNV#|bjU^-b&SkxlBg`wYjM(s+^lQUn(`YZqP zu3A*6%C$aswyFy$mJKtb+NrOmKdxvW{e_(nG~0sGzGqhbgVmC&5J2Xq^y`!T z@U=K+Wd)R0YlExj*jgXqVkpYj7(L_L3`0?q?jb5c@||&}brkxJ*^ce|d(ETfcr=fX-I&=eH!cVQt-y`s$X>gB^~bko$-y7txgR>oGw_SsrCxlod1u`g^Oib%)1cIL0N zA#r6u+mN-h6qoU+cim-If^Fn-}h{ufN|O+t)#v?!@}u zLr+;y)+(SWLQplV`!Bg-nObt0T6Au2@vogkj~$NN-Z5a`dp<{~*I0KgA~@Jxzc~*g z@Q6H}!n~PJBF<7QX}GP8fo~L+S1@U;p(mswakme~-f{2lzaOBy?Q5pWGz)CF=f9}^ zbnafIrYqOC(yc&`(z((c+%j%FsZxu96~D1@cwR$$zJC-eWK?D;j3{||>~4R?($5}< zUk!T=KY3L2_}-5I873x{$;lLKx-W;^IA8DIqarbXKDP1-wYYn$Wm?k{J5$LdnF?Lz z^R8<`O@7R-yAREqcKe4?w|~y?3C8o0A@H?l;`N9AM=M~DeCzy_Ny4gWa|KevytNeS z0xNBXYmLrKi{3`rd=GIpB$0go=RQB+XQh%Wr@m91{2s-wU@8ENTl#vaQKPq9*Q3MO zElYNlB;QSR@pSeHAUC!GxZvM)5@y2r&MbJ|J}J4zxta5dx{g*A=J}dpe!B!rY+x$i zhaTN|4X5qbeh2D#42FPNL~n1)io?yB=4twGYB$)D-#hmb*}tfXN%I-&T6sE}@Urq0 zE&MGwdj}aOgz#v5`(YU!`x6d)O#kN7e-PXF-2DDN`@4D4N_`rd z`FcxilcQPZjPG_I`_$(vM~RoZGNp4Zt0($Cr?L6o@f`%WeSX(c5c#wm3q3;g8tn7o zInhjKIFad?q=1YvkmKC(;kv3+MIM4H+KYdfQ$ojfQ9Ha>xdEY!Nd5w>5&-YSs9QuN z&sK_6y$+!bn+XrDHUj0{a1sUYRNTJJ3^&Y1hk_5_xFQH38QBW&BaEEOuDlG}vJ8y_av_pmlxH??;_EnUoKM;Ao7wa`=!Q4G@T#y$|hPDm>{uYBe7) zSAfz!%ayu+N!|Mt2+s1ACX3_WWA->*P{>4i(8+Sh(rnn`Ic7nhjl_GwS?iou51$JE zr{7b_7~*;?Qg+O+3`nn)O92mrH0!r@MG@`%*hb z_hed%NUcuBS;#$R_NHMEDQ++Z)A9L+dRPsoo)!xruHZk|n~P?uE1A8UdF6%BMy&zZ zKNJ?a86tS|E=GkRgEaRm2A2aVKp^J|d3C0AIFlg= zc4okvcOb#EEOrYx)_?CYIiP_&!hYzn94~rr@S6uy+xG@Re2)FcjvQ>WMK>sB7AL#p zd{hBd-VH~aCl4RRk_M88yF+h*b~Vuo(SppGyFanA$NT!GOX0X^U5oR)8+h2i54bAq z5B{?{SR{qSkuyQ}EScHOVY*|}zGw3NzWAK$Pulp`Tk=|p4nrE1TN_=WcTVni?7@2p z_|8BCaNvjf4WfL$|1azzSNkw&lGREXd31S=4(R9qpY^>VLg9A@A}<$1S~Z#bFLGbT^bK>ajy4qU1?kY)^hu=pPEg^RTJT=O)crC;{kaIE9 zlM^~gnX*J}kvSTjkdq1;n|6yFwXoE>NL;;2&=v4w`Qb%xGKn=%CkuBc184>O&_t!z z5n^x$4!|Do+i(}HpgM5{?2fW)R-v>#o<4k4O~xeoj3^sr137rZStDqQ$kkFHQpT8N zQfObME*-QhX;qUg(m9Gz9bM>-0!RBI_` z_SdhswLw>}R1Vd?4{*faETLuk3d??f)M`L!O@RZ!lDw;XYNg4VIA z$fiX~sn?5RsR8i1SgMpm2J~Y1NDC_{5@}6`@$J;-sRRU(f3#t%m4@}(^=T!CX~Siy z#w{bKr@nK2v3fG|PlR~-Yq++HPi*%RAOF$`G-@STPs?>1$km8r*T9dSPaWH_pgW-5 zt=dI9%Ow~u=B}B;yn#FU8=&l1m;Qhu!E#;UGE$C)=yQZ%RJ zi^YWFXvP|dC#VdX%itpd4U=O{e)Iof(y)3P`2DGuHh$EHylR*AvIn^$yYQvyhmOBrezsw!| zMlHROGO6rfjarjB#o3wKOwS5sZ38)+5xdEmzs0`q3h%J!S3F%7PJJ&lZKy}j7hy_Q zIA@7y5k`6u?vd+_2Wjx3oinVr|LP>_ zL$PQ!8XFgd+$vr|o40;uKXpQPCW3*^mJ8HQfEL1Oyvd;dm+Q?`I#jMzm<<`GZq;7! zd&@h*+CxHv`KBEP!_JqKxZl4DtJtxXm08^WkEWPXaiy^Z$^x^P8QDwT8-h4I4j=Nu zSesn+KXhz@)Exlk!-#m<8| zm=5#Yr+XSGc-SJ2`1`4E%>jmUw1Q>7anq5pwrz(kyxRi-uAFpgj#h&>h}vrZ3apNb zX@kDITfjBeXRQdbpy_Qe4^Opo07rsCa#fs424mzs_!1mg5O%sc=H+8qpoL zpF~-C{gqTk!KUK`jaI@Jl$+B!MoY&ut8#f*MnL|qG#iC^6;gjNtvVz$} zvpalf$dHBnYt#fgv6U3X2DQt+O{o+`qA66Qn962+XUP3aD@-SwSF5Pt$k``(`Mz{F zx7zZ*IMY_WVW)Ch4~!1?B3p*u1rrHKmY0lZyf3?sbNOYxltVS;STH%}U~h{GLZ%6}w8S@nS3oujiRE$2vD1uP2cWMIJEqo$nxyei;laRn3YyWuH@{yHk2Bt!LbzSu zarV2AOoL+&W0Raf@(WSs+^J24!7pspy&RI?!FuJM3CcjDCPgXE0$^pwNn?Rze322+ znQjUTmugK5_@F+1$x}$`i87W(#>C>+;m$@e*U`r1qu}8UJ_DuKk-%b8RFqow6_edq z3aP8x6V?&<=K9re^ON6zijpF%3k1U`!tYCjk>)yHW<%O%j*(U6(Ge@L6Y5}Q`*0Cz zcm~%{Ws{SqAhEPq98*pdk3(1vH;<~BB$Lr2qpyk+cuAd-r*{*~bL#O5!bgU3CI`S* zR#t3pA>P&JPBKaw#ftB-0_AH1(>~4d6Ckja&E}C{1-@9}7J)R^BfE*I`65|;*!J?( zjEU*oP-rnq*Qio~b*4X!mRJ-!jI^4{4UPo<6M$9!9WV<&9+|&duZk+!476bd55qAy z70QdwC4j2RIAjkCQnyo+fJ z5C(YUlWaE0tiON9$V9tH1oH3hC({=oofV{Vr^IAo(I7;hQ}MTfqk3I=Nge}F=V8>G zPQ&`CHMY|3f%DJT7d#9{kq|ELZv>fjY!1lI9!Ekoa(HpP!WZ-XHI8j6O|_6F*i2rs zFj>SlB-Ftz3%3CaY}E!#RZ(X*M4!sQNk8g5^*CY+>tp{7CDGX@(`PJA>o%U8ghrWL znP{|Ec3>1sR?r!nVaY5*H~57!VZlzmn|0=gAw@I`3j_fQeywMw_!`>OSUm2)r`yC{ z6~HvR_(5s+#IM7q8D}<2Bf7RewJ=$XGOk8=oes#LUwLujNajM95aFy(8gD5=c;`n; zkB|Hl9qQ1xQ8ih!eLC8g`p;L>$f*=1 zR$qgLyUW7-xqvRJLssKg%dA_Ue9_;dcgu^D$IAj1msG0Ns$8>%G*$JVmqmX!zWw5? zn-G3&-huSH&D543&!m_2fH7h_M(>!I4UL8!weDiVc3&mJdN6%f z4zwozRqA)wm7=*D{hO_Sib3vccfU-rk*KDT@b=KMt8#zyi}9A}I;r(d(&b=!vy$72;{l6xoY^ftyPagu&|XVOGmADw;C+90 zt*3T5kzBxb#A!zgKf3A)w~Dtt-h~lPxW9G1u=zKMgO>;PB)egRj~Q8 z2|l>=FC9UzKwqr(kA&Pz+kFy)WXjlXRC?2bz2GPU2&ip`dF8xw(w448y+oQ+BP&Q9 zU|j43_AA)USEZ)OanGRFA+VaO-C2Xn>lFd274ZE7D^0qGWm%XY5UylD;vBygq&TYR zesxHLvdL)xo|Bea)j33e2M!)TdErSxV@UB8l;U<(K!t7fpn5>v9}{s&ZTb%FgzD<^ z*5;0IdsN=jg;0X^f4_E+9^DnA%PUF0f?`eaZ(QJh;o0)<8Tu*$vYk`i#o;WdhS4OA zY^Mo`$aGfpQ%cTRL_*#5Wh^>J1wb(pD`!Y8N-KNENwilDn9gn84;M0O^xOwL=l9ms z0v^nV?ax$ahaaWQU}D%!L!dvxN3B;;EQ&M2Hv|O=#2gm$)}q0c?$(>v@tyrgtp8q` zzaw-&XsT79vr!GJmzU?RQmt9%IDGCK@VvjcczQP-56Q=32<0NLE0s2`V+-v3GyZ4G zrlDsQfyZC*)$n1W*}39zC$iwz#t8 zJ&VxbWZQmPKXM2y1PRYG=ngRc3?H9mP~NZptnQfwtk<_xQ$Y~{!|RS%c0IVXV%PB( zd-^bSD)OKEm{oq75aLUI3&a^~skcX1E{u zv9RRbA-kvMb<^wuY8c`Dx^ZMXB4$y_H4&pgx4H4$_E*pCl zMcZ1ekd+Rqaykdx#5dbS?p{aW6J_<(7iaOWh5{%!``9=L=O!RyfdpY`8HB8Xl-LlemSpz#iTo zM-*N8s|2u~5Qyk`&QbYul=)b+2{)S5^A7ghEzW60xj zTC-3(Tc=-NI`@zM3=^{vLja2@aJrAjt*zwm_N?HE@4kkAjgF9?f2|6Qod^xN8l)vf zJAvwH-kR^Ns`51y$odV!;l)0BT)wWD(7OKnxjxCRhug6naG>9h?W5DOqBeH^iP3m= zTjbcYyWp|vv7Sg(Bww(czJEmj=K7?@o$1--=n~pR3*?U8NBDc~)jRVC*S5zlMl{IB z{r49&z1sfQ89CF;o;^@Yg;ouX!n?l#?uywWLhwoi2hp>kDrW}}ocAXXt5)A*=hbtP z$|E+@Qy$Y+w_+4la`NGWYP8S&<}KJ~DQof*`Q3J))<~_zb1w@SH^4 zCnos}-k4n5*|P2PjT9(Nit>EcY<9Nqx+wJBaSHN7Ad8Eb(hetJ%%~Y=@6I#3#U-$b z@CAkFIo!vuO`uLPj!8P+c6dc!!-0#Fb*mXUl{9^s8C__gNcxA*&4m__LZZXEQPzx> z33c^wL4t>^TM0kEJunCUVk$&oa!$TG=ERl`lw}h^qq*vlQqat>(%WQ(0 zQWa>{sGjFo%y|j?Hf!AwidRREyOU}Cv*X*4cwxc9O556+eLtkA;99czZI;uDFn7b; zZE|ZU-JW_|FP>)Qg6pGV_u3?OdU5n>j!qQ%i<~*TkxblPp^lo$=w+S$Sd$tBmA$yU zG_yQHO7;g}6A41qU#dbP*9oKF@!|e>Q)hOOD)g z9YYznKuu4t91`R^xr>`H8J-F(bZ9t=C?z7R2^&!j|LolDd+^LFI0B7E8f7FIiiMd) zv}ZcEY&VP2zXp$UP@EAq$&_9IC9t-4YW^OIpAB@qrr z;0V7};yrz3!obl3C1Fjx08;o$Dun(&Qtk4Uu~yYW&5D}UVS)fKDvC6c-b{O@u4-W_ z9Bxdx%XatrDthPLg3@$O`kcj{>!>Ka+r+Id5c3Wrpws&xRYou!e*&nnhV;Jg+`SiM zxERXJB>UrX@Zz_E9i7_o*KZGZ30lML;SEorVwW-H+U_~#)A0#|OPf7^hY`6z zo>avaDctypvdlhDaX^87zjnv|&Gm|{Q>pz^6nS5v`*CbsaPAuVz&`$+5~gqW%w=o& zL~Z~+7ET@~--eJ;Hg>+dkm28&e18mw%zDRu-CjXl8T-9YLA^X+kyNP9x_bq0uh=q_ z62hHisEfYyl^y&6q>Fk{yAmHISB|QsL~r^c0Gz!9>~A%ub7Ed=YM6|!@t>^*MIqMJ z5k!DyC~HadvWzC1i8DH@uc|+VAcWHDHqltZa?3rr(C6+5+q(td!i$<+Nd}HE$IJ0* z&dn`C29y9KGG*bBn`=8UwFx}(pfCIgkhoPLAXzSM7X>-DaNqATXYbJ{|Bt=5ii)#o z+JyrN?(PuW-91=!&uEAk&4esvl?hLN~0&QFT!mLr!6rs9igzIMx%>-MC$T|%w4T&Uds_hV)WMetN6sG7c z0-+eQ_y>q-!+6<+$|zFtaUQ;+Fs?~8h$oi))G)uXaS7E8_kJl($prCxd+rj8Mt<~< zpN;bo2Pkeq^SqRk#NRDt1(su(Ft2hIeu#hj`U^-^xj>a%^Bw*agHFuE`RbUH#qWhN zQ@CySkgv!8F5KP!HtS(ozrBAw^FES?wGox|A)nAW9f4LQ--H<88aA1uQ4H3U7z zAr5NSWN0z@K{ubi*~(Wky>c$r9{MOaC5Lj@JS)7co5!5;`&T2pfQf7_y_Jpoa*62m z$INrEu{eB4(WGI(9DT``?3Hh6R`dR{CuX;$85^cnlPv`of>4IERN>-6suT*}0OEb$ zG^!AfgufcP8EJk)1QuOt6N-MbpkMpAp#dzVn`SNr6X8xr`rP9>xx_T4Pu}hsz#MaA zQ~NCCD79>2VB7lmN7*BV4AMtYHyEtBoFFtUXNi216SDlxCfI4+uvW>8R`^Gi-*gw! z7MPMcP<@^&{M~ESjN%Lc}=D!yX=~_12tgXzIjhK>L*V_ipD+xNS zFH_wsO3odE1f}cR4Sm6p0PA7AY1E8qthKkpaSx}JO}6+Rj$SntyKz0E@k)A72=V1! z>KtVI5X#k`jf|dKOA9->3%@?8fa{8}Xj(l;{g?3 z9P(P}e3(t5JZ*R7qTNV}U(TSAVTJTCzu<-#pCHcScxdZSp@z&Jp5)3KhUsY^L8-YE z>)%ta!st~N{u7uP=POSCwPBpYHjrcBg_AQbLb65JBXu96u}2j{2&JA=?u1_(R1`$f z4^|UeoRg`lZB)XNprL=(M>{aC5ksFLd+J?YHE(3;C$>UxUC@j~Ja)kTtp%PlUq8$l zB@--z6c&KBx+LmsN4gU$(SU;Jf0DMLPH~cw>7X{;v+p(X#R`#6z1fLRP_Vw9@aV>Hz z;S^A}$vY1G|FDLNZq_=6)4qLCnlB3(Y<_jud)++G2%h?+EY<;YCyj2gvWx645_p;G z=9V-|wv-h0W49i+E57yhHB8bX#0pbV7!^%Gh%cJ2@=M_Z_rAX0kmzd`Gf$YvyrZJ) z3vdxo)Y`am;j)%{=`m0Yy=pn-=MbrzIi)uaUx78nDzce$dwmxcyNmft7ks7O zvE4A8$T>?*CT!+%g?a}I2Yi&P?`Li`&eBeI&eR8Q4e#G)!1h`K3OYJhg-l!a1N%&wMB`Vq4 zexAO_&IG^$xRTc)Qox;&!&-0CexJy>dAWQ{{@6n+KTdBivb(q#p)~%bCe!2^(pgdv zT_iuwhR>#=X+OElJPY(Vvv2&C=pbx_u6q9v^pHXZJ_7M)jrs6+eQI&;(uQMRBd}Ns z{8ao7gABtBvh~rvDeE#;TDC<&?_e5&80JDGL5h+Ny!E9}G6DdtxXCp~77|IDqAjE` zq!kos(fuce2lxT2I`R6?ycK!$Z+JdTaQ{;aFyl3FG*o12r`hJ!H;t$6(;m0-7&dUG z?b0k9iXuDg%ycgB`;1tqaEQL1GkzI(_zW}7qAt|NJ%SZz`xWU&pTUp!s{Q_!hLdLR z2dRgTVYA)}r8I6l7w+h`$l+FF8w6pLRtXkowm~t@Lc)8sFmN23TiY=YR|zSRU{rv| zAIpYZ7u3VR7eA%&Hx=>_$w!p$&Y!UNk*`st_MCl%X)U-WU*7HFEbPSZj^d;MB9AFP zi0(DwP1?I(c6oE<%6U0S{gQ8LOv`zG*FpC2d#IefMl@@qh8i#h#RT1P0rgOl^oR%- zb^lKi0!@0g?l}Llq`W*pu~fDQk|SkqzPiBSQvKn8vz$vYD3|L4pht?l`Z`pZp}oF@ zBolxf+bm0q$bZ(QT5aa**IPcf;-y=A5ia#(d1Mjt7Lg8ih@7f$!|d=xZj);%-x@Z#?!IwQO=fzrjcSDaTo6tsMp zn9f@HOXP&@ijqQEyvD$hbN^@T6&i$ME1(P|d{moKFBk8I=9pFAw1Woxir9iNV0!bbr2C@;ZYUnRrkeW!s z9Jy|X7%y9~&De?4*u77$(D`jWM1lK&VZ$PItroH(cePgPq&}0mNU%$ zH(%%NN6g38r<|Ca&sOFfR{o*uo9J><=xDP62lM@nXQ#bd(6365^Y7r-oLoFvBWeOv zobGZlTIyw@&c%0`bsy@f2_6kV@ir<~m#V$fKf8Ij+qMG|@JY2|6|;eXUt;%?rHlv| zAlPhKiE0K?B>rdgVga9$a0=N*aH;im@cn!(f7uXAy=T~G^9l2?-T%WkHFI^vwC%Gw z@CXH|F?3YF^3%?){fI~vM|%YC0+WPm;X1$n47Z}>-Hzp7z@B`Lu2TjX(H21SvH2s*|>5cL(pWpWm!);RT~vK+f4dCuLShX|`0pE8#WK zZdx5jQog$uIGY_SUxv|OeVP}(+VugmvXmb%tfqUo8nE$L zn;vg)###1lrZAUR6rvGON}}*^iEk+tpByqPJbr2PZL?(guv6LF547=v0gNI~JE52L zvV<%i@pPdv(Fbct7GzNe)OQv>@i`GV%j)_@_57QyF-^7`wk-5xqT5c%msY!!z38?~ znhPOk2$ODu*$=>+mO>q&eI9M_cM)=Z5_ZIIxlUszS3q-CwMa0Jwrfw@Ek?{EGwB`o zXm+MU;$&=bxfg4o0=us`(ebPw^>&oDK5sS$PUovNw9`gOC@k*AODD4B5y;r@Rz|> zv=~MKjqJO%ic6EAQc7(UEbNBdT!*GB^eQZF^Z*VTptW)y-*+PY`GuV@D$~GNF&&#^ zd)YX-p*IhK=rYMgpptC96)#94XLc%~=>1Op*#mwzuRzk@+7$Ru(qA%?2i3U%xSdm*Qpa(Lznrs5hP1?wOKn#$}M-To0E^ebXH^)~_ZPzI$1 z#eOE*`H_SWDIAQ`nwz(sidaHPd2~tSr3e=lM{a!%JS<${ELYJ_5q~?8$fp3aY*U$_ zBc^72Nu^HPk;703mvN5VhU%wAyfL;_Q)DFOoxd;B4_*hA?qiAYGdkuy5nrEe3Q4Kg z91~i0YLw)D9RCtA^i?y;b)DisChCGhsQjmC;1H5szjK7aV6fsJF_mmE^`TdnN9)#e zi2;r$S2SF$m#`7ZrZh$c9!4d{$!F&YG3Iv za9xMUuLVZQeMX!JX3>60&6QE+#mp+>z=9EDcaroqiwEA_EOjggPCKKhdaF;*ITs!4s z5+X-!!^`Di#JzBDOQqnH{TsnRS9>?Nwy{-RzAKDiH62*7V zIT)^c>9HbuD}~-y0o(3&9*M0xC0ZO~MNC=RdSz6y)3K~PO!k}{%b(ptU0~nCj)wbz z!~{LUvsgT@l8xKrfaA`|0Cjmi7TI~*Agibb63`I!@&@Usw-SE)P3qm6$ZqkxX*SZ( z^wSQ29c|iKF@bvVD(I4Pc-mEd?zclJL8)X-nNLXh#e(O%5NLwUeqq!zQ7X-mMqJ)V zR;;N}6g(x-AD_FH^lKoE8jg6#RZ(0RC1J=j=RO2V3wgntj&+CO*vIHQRcnBnUgE2p za;R44l3#5+uXlhCCIHdZuPk;lb{n1?+iIpEZryGP z&YJac8E`<-w-EO=zmaX_bniHdZ?3gj7lI*CX=}aVh~14) zjm5H=R5Rm8aLbCUIZz!&CA1ua#lD&=F*3epi_0(se}hqFH}|_C<^{=5TgQ)vmev`N zkWBFV4h%loJ38^1EMR-IvGiVIw0Vm0$_@fxjsfPtbc7}C?edByMW?wh-@XL9QcEev z=i?M8hkq{Gq9X+V*s-zl}3AG<|f48zQJFwPXSUO1Pt#3{aqAbN2L`YJ|i6*I4t^~>w?>m{?N zXa51@e@aqfPuw3#q3ghbM zC;=BQUDR~a(@!~m_MeyEg>o#`WQ=Ky^5*N2%?o9(MjZB&R_{AVNtWU`^C2%ZGN-4@ zRiyBn))_R_HM+$V&lh0ahg@kBYe0XZ{4SZdZ0zFIv(s!+uk}q~yvav7ZrqoKqq~A; zq_SR~7jyQ|d4|t$#(MIG8e_}*#vV&NyPL&upEOu(XwYC-ylf$Qys?Tg=!f)Imcf1J zK2T&n8$9_dA{;rUOTcydht3F@O8R($X#f(mer^ox_tDtfieJ~qM4#Xh7$m(g0*fpU zGPYSc^9y?K9q`EL>%!LI5xSTeG~<*i8k^&VZiw5&DYsA^b*8R3Bn1q8ZlN0xQmp|p zW;S+nk^4A594%nJ!zQRNeA#%_V5Cz{*}cpJnX4fXBkV8zz@2N zm;F}F@}7a#K?WU>ga%;j76rpHeiJZu0GOD0jt?|ufi9WW;kSQ`Rm2AHD7BrN}+ z^(Ex`qE?ABR#L#YnwYc0%JKIZ#vn^xp)_k^@p^;DEGp~jM;jM>((8rh5Y?uSBpo{x zTH9(O3@z09ij5lld%0oj1eX`J{e~T4JDw0f<>v9p-I!q2tSo^DH-(fng1+=)KO?SA zjB}r5Bm|I)M}5ytTMdK}fP=&|oHa6%irvNAb@?WQ#GB z(O&3=l17nbHo1Kp-n6K2%R2Gg3TbS=?oCzPRHnR|AO;J{m(+=b5bv=66xGDw!4-0u zlm?ZDu3JjgVM{{yC|)oicDsCI`$a^fnA@D@9VS9XLbqlmrOuAIm_N>BXLQ|Bk&qdg<10Cd_|Z>NcFG{?&=J&r zATk_GhT%;~_OGB4wq&y$^w7GZ`0)cr<9)j${`+60KWaovlilxxSVKNu6Svpe%)=xg z-RtUM*ib1jF)(`8hej1yatVDV>D>;^{(T3g6{Wcif{aA;otD+-&3Dw;Nu2OduPfW& z-juRoX7hBp!bM{J#~mJX<+6){enYOEj5931CI6T#2nYh*BO`W8J8RI=;w-#XoOMPQ zPw_i12c)SZsa%AZ~NDV{wbuXGv)s z9a5ZW%KG3fIXS8DY zdxb=$-^q+)^F)|fv8Cjv!P3t==I2+_{&%SjWIf1Q)URzSJ7MR}qP#2m!uuReMvv+K zCNHMxpg*+n)LH^vIWgvlh$*pq^Z*4ZHCXuBg8B2y1Hbf3qrPD<(6eUab}w}71|xwR zNKEQ@F}d!*W>szF6Y+2Y+y`^>Zlg4eRmI=F8K1X%F||x_b-c}MEt8ygOXnLC#_zV6 zFBx!6tRPc;!gR@DgnBdyV!4LOCtBsojtfcVnsO;EBAQ*DJzP4s_ z9**rD85r>L`vVn^5gA?=v+6DSz6_SPxU4(=2Y%pi;!=1ss__ko4L zm8Opw1h>T4#S!bqws{RMnvdU4g5TL^Gay9*95a|dr+c5+#VlBUsHF(w)O{q<{(h(a zp>j+%CX++Ne*`V7T#4?fCgdae!D$ueim|)*fEs1?4ddxM#tZu4!9)@6nq_~^nmP8*o7~#OTw~d25FjYaEI%yt)LZ0J7joM7mdjDfkX$Z z-c9|UV*6nbXf925xdlTEiwgm%(=4eCH56k-br@^crd^K;tU`5@Y;*k5P`@?1Lp|;f zAQinyR5wr8@Z82@Oo z8fwtG%jm8bi`l}#M4M|&$=p`9P_BT_$f1(n#Ss>y#>$)_$SOjCVu+!gFreX#(_$M} z`JAVXVYBm@{iYj9kv7KD>oL{5Zv&Xa1io2mGWq1|ACvK7yLL>Ce4LzCs!Z()yBa>Y z1DsA6=S#1oyNXB*4e-Q+|tZybKTZ_$rd&Qyef|Ok4nWC zvtjD~?MQ!jll$}ot3-*q_Nw0{LbwvxA8Ma4$WSRsPbGaSj~9NTpuoXO&nPU=q4?L! z5D@P@yVP&x`DDYVM`ca~RfNiKsh7;}j-fxw#UbQ+OfkPzi)3Y-(4{Iz~F9rFGpm<|#w4 z=ZshP@fmJ_AO8KvAGI3lp>3uwza!Zj^P}bLrqL+Yv~g0oXp!~f@QNZq5kK5NM2RR{ z15TgNJTHRLJYD=_w7XN4g?oMp#Fby_nI3Rc+7N%`hmSTRhU9%2py#t2Zb${seZ%a1 z9b(SgJ9^tzNfqEQU_qXEWRffC`z8tWuOPW+NT3pzVCG6#$8x$Xk(PmE#hAJT1rW;+ zW28Z)4B{hSYl)FN$}cF8cOJ?j<_-5@k{nYuO}Z;Pl#js*c^>Enq75^ zO%-Rl3NqTkdRy<$(-8bsH$~ZMRFLPsUZw^z#?G5ys6@m}=s| zjnW_-Py>H%+hl&^zWfkN#%FP8KCEqM>4uKZ&J170Wd8AAm`oHU6Bru7loVFT_%7>3 z)w71pLAh)f*U0`gCdwZY`K(rLwbJhz!52{L54X0YtEB8Ui6P){Rghd8B+3FA6cKXR za34rU_Yjsf322hm7dM1P@)V&Xv%LtssC^=CMS4M>=lrR&^3kraHi*y?`y1eNe_a<2 zm8J(4<>(Yj&>fX6aQo_t9RNUzW@X${eW#qBziMJ%)jQY8qMZjDO!W~Fc`dB=k&+@^ zgBzUsEW^Ro>sl;?ydD2HR1M}M!<;AL zdeqHTBO%h>`eS7K37vXonM8|=ek=B~0emGxzPv^^wJqT&tZvFgG0ZW)V#m$l&c`wg zD(19u*kGYT(iT=fx3;!phE2MPX`2Q|$KcP{=-OeN7?LQ2}f3Tv1HCu(@?xSObdX+YtVm}>4pF7MkG-w1 zXdU^de2C@`{FT|rHUTlrNe(EuVg7opcu~>)6O%43Fq7PS<>2hK98rAsg=KSl^FMZs zxdh(C2vX%z{tKo&>5eWy4$WxI5xmG07(i~a@!f;^g|^^79Fsn1uuGg>zW-ED-6%eF>F5^2LpwDas4vwA7?!$_;mR6QN$ssWh*EPG3x9Vgy z&M+-M>ll(=0UJvvT=GXm4}0wDj`Rx)!5y*Xnw0O24B3mw3#V|zI@rWY4ABsf;p9vO z!Bdk%*&(Pjl(JM<-$e=^-rd}$YN*Q!$-c!?#rIrc5{v)Ll)WT zUh}QDrUW{6yr4bj(0>_-xZzKRl+F6cdGBg`q$oo@&*BG5y>NeH(1v`x|YmFoCKf8|Kia+#$6T53Komx z#>L_aU(!Zi2c`DmuA=SvuxZA+?Y|?bo8QbU`0|!bxr)H4bdT#g|a9HNJOnbK-wGebq_`)7jd0q+H7^^UXTk+kkr#5>&Qm*@=X=qmS^)nRHpKv3(I2QteU zIu{xc*a5v-7bGbbQf`lh%6!-E{l|Z? z)xa(+bqp%6oTp_qyp#|?G_nzod(#$XFWu@FqZEaUp`TRmac9R%Mzh6ym{jUoB1Re^T+TS;VF6rbq z7w(?dbAefo3TeT8^PBtW zrFg9l=}Pq?uvF5X{loDb#?0ePSaOCzaFyupZH<9y5E?~Lqu36UY*QBuRC|Nei~{~F z!_jO&xi=boyojnj5@*R|DbZ0NIpHaI4b8|cyqKqXW+__Fgst%NLW?vb@kQ~pfuQ{a zQSG5*bnLoMfN>s;;T;bN??$)V$X?0dnOj0A=W%jPUr?!W0t#%4Yx`*U%B+Qbt;L+g zksng1E2*D%uOjXQG0MRlWm-j7Do^n0(}Kkxhh+9rah+0W?^1<^&5DZV=OmoKpN@vX zYAv08W?i)b=0(o988nvT%J0VTbRVba=U!9*f0((lFNOrPKiptBotmS=p>|e;9;SwGSbgJ=7x!sgPKSs9_pR zj!oe{YPnlHKVz3cnp)BS;Y4kCiP+*ZW?lp(Jq?fd;e*|a7JJ|37H-reNqx7-Wna*# z<`=aJZJ+_AeBxJoSIfKRYb}hQoC~FhpknT9_b2VtHK;nxPD;$6Gu%_XjL|uPi)YtL zv~44sl{|iAK98B)Htu~ztty6h#5`qjE?y9a47*DBlUwxe71Kaoe1&+j<0gs^=YMJe zP%2uay_3rsFN8dZFy~r&Zg}#P0eV+N(1qE7)~D0;5+abMup)+RHdDwavz^&cScbv4 zr%~D`%ebeWeR`-(QS4&rYDK-DX$uWS{e3I;gOa5BeB#+f80mav%p0Uz);}Vw;$%#}z!{?k~n-I}-R@P5zjaAtZq>xYQl;!ndP; z!*DjnV5VLU(*{Mv_m~l&%xh3;c^GQu+iFOXVb>M>6utLxQ_M<(uY1pJq<9t0ZEd;tw!e}j4MT{qf0JilwnefN4V z@=Qx+6aoFVg)HI;L)H;A{BV9#>Wls7?tJU5#sB@=6_n)Wd4rm#dD1Xv^7vc)?DSj4 z+wPlR*W0~65VY{*9O{3o5AI+0>+uH{54!Vg9qSc_51i*~=+t;TQ1-?64~PO?AMWtb z?l&fR{Ga_&meXCKD~^x&#=Tf{$E?~Ru8@{7&<=0jW~%lp=-67X#N~_IGn#zn3$v(C z@=Ej!l1oH+K(}tM%wtq*vE{xcRnx>mo!}$mc16w4hDR{DM*ZXzHGd z-bl<~DbR3gU%L%X=x8mEsBXTzYe!v`)J%@A`Oh6;>L`m}eNf6in{KOXISK>~dCjel zzsoFr$pO%poA2upsX3Qg;7a3}Q6GGK`t^t&(t{yPsR_6nDcJIT6{F!1A$>F;7RZpn zL7?R6TDxVG`KerSK?Z)cxl6`dwNh49TqH+Fmra&&YzUXes%9O4&rQ6Ug7)rj=!1db zSq}O5Y=sJRbjiidL7C&sj5N)=4U!cxPDH-X+idWdXp-Qxp}Tl=Z4Ddl71*$vH?*(H z+`hS#^&RwF*+wA^92AMmv=tcKJQaMP7GZj?LL9Z6 zzPlb!lboR%e~t+Ii#8z9`WjsetYC&<-ZH}9F9J@So4T7^Y#?Dq=pX)uXb z`g?&uYIedE=D%i-VqW5}xJ{x8Nau>fxU>CNhnqa7pvTNFj)?VuV#1*rA})mZ=a-?b z*4?W0s4dtYgS?{}HedT`eA8B`)3w=$m1D#h3sW@e95Cb3do|b)cpPd4NVLary@$6((_ZOXH= zTKqaAD>ZP7URK1Ax(VX|D8OXFtd=uZ)Z!Z&a?Y8@UnRk4qWKGDZuYyx^#G!(&kVs} zdYYdfT_%1@VC&r>?vj}4$WMrTyrn+(y^DWq1yR|bt1K@YxI2@0{S05>>oeNsfn?aU z#pEaKF{xaaWCOO-f+_I~w^`~cOz-q$W>yVZ%Tw`49q{omW?NuH7#g!0Hr^^IN+StQ2dY!2A6y<=2Ty2B8 zG6n&&>&*MZskTqmspol&kVcpz`53>d$IpRL6Q+u}ZfcU-=1>x3e7TL0I(1nBJi~aA zy2fEY@08Uc6;FRv*LuHk?mnZkUM&C!)_P{>;zkor53;`m($Cu!Uel^VwXS%3_-9Tj z{2{xFKL@N${%AS4`TFgoAN~C9DdUVkKG(s;Y|tK}3Pqc6b2oJ4u19eO=03VkeKzaU zaKPx3>LKr$Z~KT5XCJ^0JMvid(Puc6l>oFB#>Pic1kkFOAHPkA|cBQ5AF0Rr@LZ! z`T~Ebk%Zb>%*>sltkb~CP#kdQ%xjFleOsgcuhBz(owi#|&9kI-GK`q7iSAdSuIB^e z8(`mXL%yt4*?|^HHhG(?CIhJ?f2RBe1pbExM)Q(GDXt!SpjMVbrnaLx4K{3`!PryK z5nCmd8$>%yA*k{A0^HASw)b<)jUDeZ1T-K(-2Y3wQP8Xq-n!nQex9h1P~J+R?|Ubo zef2eYW8GlOG~iE#HlEw@ea5T6!6W2$5W=Wr;J@CDlK)3eQjg4fjr&>}-i{xr*^$@AKbeNAVX&{1G2ik%mYdNrCTL;Vt#QqoKG$ z{l*{spI3y3uUJ5I`(L#4S!Lrt@`l{Vn+6fUzo=;v!qvZ?Fc4J;8RNgmj_cp@gUneT z{lByX;t7Zp{v{&Flm72yA?R2yV8DLMS4uM(yT0|T;9nwX#bMCjyqs@~Y>0qn^|V%9 zOE7~0>_$-O>)kp1OHgZwrCfPTaykYT-t_h;Fc6UDtF?q({$bOhuJQ_R{{KuGh5Zkv z(cdMAVJ&9`t=te(tv!qn$(K{fN{;rezQ4wKJ}pXxJbA@pP;XY&#$+%K`Fy4 zGXL8iVgAFq35*bb*Ym3RMZ*5Y^@lzB#NC?^-73`963jwC>BgurLx-MXCBn-bH?#bI zmPk9xKldi$?AaiWM?-M$HS)hE&bKkVlRwNE+As?B_Y7S;*hpFQI!pbJ+OE*uHsgJy zIIpS+8Rmz0Y!?Jm*Hy+xsmz!|EA8%d#eAWKgz<_qW>Qlm74POD;s$!aD9ab5g{JJTW_Ct^S2QT9ydVdT? zA1V7&-F@ow-kx1Gl)ZxcuO4of-F>G2vCTVPp64BWRF}AHur*0+*eUkefoXm4XW@Fb3J0RTXDsC85?! zj-JByWqZDcqNdOjJ6@5wkZx_GgSO2%YNL0H1Se{^$5WW$qmeB8D}724>;3g^BrRTM z2}r3HlEg$gUOAVZ>9Srx)E8r?)Q@W76WQtAE+QVsQf!j&Hx7ntAY8HgjKB8!bdQ%v zzvFB+jZj3puLIc?s)?iP>gCbWdjVXxc;PYUk>{8CMm0IE^qLF_xv(HAj@NZvVB!WN) zfT375RC{AcLd<=_w01EM1EyyVYbl~+Xt0p8P`aN9!moFTtt+Ilc5`*F-)Ko`i46UI z5fh%XA}#`LDQvf2j~Z*hAjUArxbN4tX_Z|0;@%otuDWw8NJ_6=OWiezktIe6g1#3o zWh}Y4oV$2db&9-c?;ZVsIfP@{Zs5hgd2$d5Sd5U>ic^1wOEedrCY*N46Ra2P*YX)t zLtMqM1Xv>TOAqMm={DAD(rKbvmX#9Vh{0kN&qavL8+P5oe3X-8CS3nzP~Ew29}h^H z#X0^3x1fN7ou_2_sFuLS<&f)`3Y9{|P+m5!B9$bIO_<+&b~V@L#)8)f_Hw+V@JqIl ztLNa!eTb=AwM~peIm-)GHVFZ6X#KB6+3CAq}19NPpW;-%n>BjLY=7Ro!G6weZO zEB+}yU9i#MF#_X6OZl%lJwHM}v~C=d{3jXRF##PzU@s% zW{nWXq(nzmY`^}AH-Q1?B~Gtg2}N7fyiJU?NOU>U zEo?z`OwIt>ZO$z%o&$d;WOq3_&Z1}ZaEyLyQ3~8Y#*IL1ZP!UsDUbfm@)o#H<~#EC zh$p%?{lM}t*0FahAzeyP#U1$;(9NQuF8Dohm_3E;4UW|R8tXM{TG)@!H;n9g@O`tO zLJ;4_)-%&rRG;cxp(hM;(bu>8(hkqwx0~NZjW6c4ery}-B8Ut2VKWuR5 z+e&Yl9Z$m@dkL0V`@;QgcgI{U7*ET*7ZDX4!8Rk$*91zh4=N(QK0N@-F+Oe&ZAv?o z!}B#G@CNRkTT5*k`|Z(D9V<#}Nu-b8*deXxRxbbxas*kreST{*G>>Sx(jm6mZeAV7 zd9PoI@AaNiLmJcJRQOW(XiYUP?~)o-wOlhf~tOw_2| z70tV0&}=4`OdH-`dqY*MDPJek$@3nl^!7?CB+|OG$Yq>K^zmcaorUeXv9o_xm^+FX zb05FlrysvLd_+v0u0tTRlRB=Kd3i!%3QWchh)S0%S7;{EZhgy$ZK;DL>WlwCjg1rm zIi#xOKNRzJsS<5YK>I5XUI0?Ny8P54glsHd9?gj^5R;QXvaj_|}#oXM1g;f1|nqvPRdiq(P}ZTG6zNc_ru9 zbsV8r?xOKX_F66S7rmam;1IN1<91_|j0tpqEWvJ5gwn*k?Z~%_aca>F?bmz^^e75U zq^R}n{6tp9n!$)+b_Gi< zgi)xf4X1xox@HbX3BU4at#tahuimtT^9qfo8qI1}wzm%?CA+jrj_pU%nvUdpO9RiX zQMZR!NyRBbO#9|VFS|5=6wg=Uh-551{RZlY(MR>`9*n1&vI%vsYSHAem#GJXk4T;d zF^7ovV8VXoz0Dp!yPVf>yy|A9t*!nq{V>`4pjn%d>sd}c$Q6KTAShRC!3ntq)tqT8 zNH>$ycaNWInJOyv)Jw&E2?f9?qkPWFjTk7&@g4DsO7hOoTy+P!0>8UEo*$5-YAs?r)&Dt#=oGqVE%xobYItNcJA|mQZ^1W6=4~Nv!VJds3 z!PyuXJy3HVS1yBA9ER--O_5GPBPxDTicP$L4ln-!*%UbzRu)a3?a2 z-4c1tD5YN{!rMC+k$B($&4mJCr`ooDHEL1`5mB?pDR9Xq{4uvYFL|U>$uO3h`8dCy z>j(%OB)9(YsEzRuNPqA91dlQ736jLS09A81Lp;JXn#!foUuSK0NmZ3-6wV%mA zrlNoqJF6R)7imtNr^A_rd@ptT_b4VIhDKYsj%r?HjR$?R-~XZ$4% zSvSRRMWex&tF;9u;BSC@rO0p<(Ob6Tr0jS`QaVvR&E`TWVW2@qNk$u-lwRqO6edv*s}3I4?l{0sV>`76ZnM4bE5F-MuQrX_tozZ@7GH?6x9;N2 zCY0GsXV&!qF<+KgTKO`b-|KHYJ|FB)E$y$1QK&XIhIuV^QK?B6N60ln>rQiu zJ9rO>`pknMm~G3x#Sc@(iYtd-?Pq>#OZ1@p4&cPa;tnVD6=(~J$|*W>wV0*U%6VE! z;^^Sj-vki!UI8DBMlP9lU3N8B2Ld+157KhUAS6^W(d|PJdzfWdE2#TbF9mY^n;tI_ zgizG4GKR*=hxYN<&cNmqh$!CYHL}Es&MZH{STXZ8kyg~V%Ul|-RZB}pZ}Lq*j)`Mv zkbO9TwpHlY)n%iixpmLjZLH#|3YZgjACFjA(2&1g$iE#*3}8ZAxc%+wlCiLV+|#jn zW08sc1f{XALAJrp0uJV(mwbAgQV|!g)#EmvDR@*&G6QGwKBV`7J;gLZ(>dYEozTXe zp=6qJugF{;jKDgUJd>9iMVD`hhYd2`1GCdgC1@^~Rj!$UFs33;=8^Hf(50A^(wi5_ zMJs+O5$l=r1L%oTIT7^EX7ALLwZjM7uQ^{HYuH=vGG688(!HoJw9wH5Z~zcc7D&61 z>n>+2UfFuVm1VoBpNw5TT~0!K58K8ptjWMkmtw8=X>oaSijICGyW27R0#~i_M3OJ0 zPs_qyQL%-ibTp8FPY%bxIJF!(CybH<@A4u z*W}+bs`;aDV$;j6+Yz)&aCjXHp1ByZbuOASoQXW3P2Rv`pkYQwpS5z~5o8j&-{uGR zB7hWoYV;`)iN{uA$D#km-f{W{4%K;jatntc&ku#$ZS^^30YfT@nPqfMe8YSlOrGqw zK0?1WOmJkEHzg!2kn7g-kh0B5N^p{BD?e7wMfG;Xm4jgM;ARB4&o=1Np_YB+1i3tF z(cV}f@%vRWcr8I4X4t6r@G3HIm%i>}&NX#;mF$)g^l|&#EVez0`MfRCU8sXS97r&P zE`YZGVtfDM9zL2C@^^=PDQt_cM=e~-dwf?zFYa!}QG;ahq_!N+6w%|KI!V-0Y4PUt zntJhjyW#41>_$rZOT%&+rQ}65dEcF8XDpaD7kjdFw>1ej9 zA9eZVecfLPtcZrp24_OWion()JnJRQZWWkvkHj|LAPi>P&@n1#{iX7XzLBjt}{~ z6AtXezfcXVbh=`q)HcoctbTo&oi(<7uY->v$?mulP6_Ft72K~QybfT>lTkO(R7_6U zNqTGQUoz5_!S4-ml;8Q%W!35AGO??OMo@n*K0Oue#K> z!aM?x$Z`59g3>_*b7@x3hZ7j3EV8R3id`V==rg$Vq`hnD;(Kp|i_ggQ1%E#t!Z<)A z<4nZ+&K|U0YLR5FBXT2YuH?ZXynQ(AEaPRq%!4lkDrSN$1-3na z>^68Nx&y-mU@#U9#2K#gNZAIPmG%~6OoX4P`=ORq5|3OihRi+(B>*D3Wo67u`^MaY z^%9LiFo#aPE=ioa1^EE_(uc&s751MfH$9mDA5-rXU0D~liGG!eZQIzfZQHhOt72=% zc2coz?%1r@wkt{Hr2ih>r|;Lr8gq^D&UZcJsohS8yD!q~qw@W7P4$2@CE$pJ9{NSC z2Spto-T2)k8cfRAFJum{u3fWZV;>1aS~Q0WCDfAFl6G?R*MB2~6VeoZ2B(@?ZMpL= zHMA&DCz9Q$$57J+lUrOl@rdGxu?H~@A+jiv7#_MBq^x94x3HdajKEguh^|bVmNs}o!>&?QT81aRVl~ubD=!Xii}*%h+BUytw|&{AT?n+YcHHR) zMPrsSOMe9u3v;Y*t?=8nn!a=Oxlcbk6Rmi7j<%LdBa=fbt5fIlh+aUi2HXgUVg5+` zuWNvWiEFn`4K(O@zvxiXGWci z7c$)D&DpP=otKZgc}=(yw8?>bxCQH8%LF#OAbk90?zOswinn&$jKc!bh2@O-H=0#O zMQ$R=@2apQCzLet-CAoe)fpd{NipD;aI^jo7E)csOg;TMUcrOzzFj+PARR)*dWLYa zifR7mkbiuFZnG_+xvG&PI-MCIYTRt*GXr-IzRP+pD;(d@P~|X77`jEYsgN+p^EE9-}c$@Pck0$>`KDIQO-xf zclxau2Ft>s1=~fR*TiDaZIjiuwI+;ky`&n6|CLX6RJ`g2;x?Tx@^%>xc|q)OJylUS zIh+sG&S4%$xFw)0y3Qrj};qx7=uSY7*ikNH-*sm~0<8x;wm-96EigYy1zTez9H(u-xn=T!O zub!mm7$Rj$`!9R8bm6PLM95Sd^mT@WXwl=kesciR)*sH^=Bf zZE9MS49Bc{h&A?{sN7F%hG(e~lC_+Q@HY_TE$7T(jsj?06ZY~B1B5nV33rSSe&z7V zYAKMF6=y8j>=EWFov~R#a$sNqw8MOI50ZJTHRlQBrOU_$*T`A;F8>hNNQ8J!;C5Ar z&i!sUz)Oh*)2C7TU!k>yIGCLd##Y(Tl7-fINkk4KcHVdiBs8$n#=jQWkhy>7IJK1R z$Y(J0SEW1SveO2I)Iv)iXHc-J=Y>|2E4mjX8O-d697p4X_*C6A*d5(Cc5ZBYHNH*7? zCdHLK$4kUSI5Q?6x4ELtNV+dY^1-2CpAZs1>ntkFX6mtM%9XdMPhQQcloe*2nKFr) zW(^#MY;;=Vw{reiLCmXVQWtK>LzaJsmqhCvqIhtZP!lTDw$gwB)h%rA6mC^4-l97+ zke}{l3s2a*Q}k?7ApR<-BB8f7Ew#NjMJuyW&TY|xQG)1 z*;khr(u8XKqNWZNx6p9HCHRy2)8jdt?PqpB8&fD;DND;16!8Mz7my_dkWqU)azbTVtJwX*9QT!s9Eq?m>x4)CjAX zS`iGrRb=9^RIIg4Ig3@XorQO4ru8>JsH01a*ky5MS@V$KX3BulkhTz=6~-HolDw3j zo`7dRVOhEiMHPX-wEOvs*u|)#Y>bb4?J;tVkIVS!E`nrHUDQMFWaZH(;sY0w4z2I} zTl>dCN+$R?osq-q-f7|K-8rmgHgUqz>T;6V2+BVF%*_qU3wGBzV&iwg^3FCN>-O7vX4V{zgKw5NqMN%z9Wlo z3Bk_4oq3aor@%MvKV1=Hi$C<@iDgLW{{62pUi1#YW$p2rx=08Z0nOIFEz2o;jplwf1er(bzJG;@g_GY$Ck~N;OKRi(JeO{e>oIhak^=Z%PAC@fqs1f@- z^^f0?f88y|^+GI-PN3;0mPEH*SVjm*@7tRXe8&>-;Xkn!G-jZ+I)e0lr?}2-uLj|v z$e;I7dC_!4ed!=z=lhSw82$@36zJGXXS5uaq2iQjpZ0CO{vr^3>kxwYp)o$RJg|Tc z7Bxk-KW_^;8#qKo)U(T(P}iO#5o>ytH8%O5(HxaW)-zG#-KVar1Q(+1@F6Fz{2xtQ~7M67nZQ8bp%vNcC2!`v=39 zcAZfx(mVv04s6pZ!0=0x@bezx3&dyvsD^FvmDJJ}arO{xY}HcxO4^DL6#rf)FO&@6 zl~xnH{?3E^Ue8o#B1l#rf|*!REW%vg5qXgUMiTz%#()C}@BqHM{OZ@`g#N zWonu*F(pE@Q4_0&Rd}P8`S+)Nfy0)DdbfV)^ZLRhe3$nOzBeB~UjprbnUKq65%zee z)NHWCBsuyEU;Cnjf%h3m`A^;}x{b8N%LJi2Wu0HY+5*!aRGZ?h;P0kg74q}%V+?a1 zE?uV3$FN1&j%|2$&;16L$4VMiO}-SaIP@+fqJ#Ou&E#+yGlzK^w2f~HrpLE9=<&eN z;Ej{3@cr9@v$f?MSq3j)suYBB-VRv?q$B_mSbU!!+?tEQ-QK^Bm#Mi?TG+^!Mm%E+ zH}w+k0mj^LnXxMO62Cm%(see<^gVue@>G8R!y(9s=c;pnI&**uGS$Vj4cq-SJEa<$ z;P2U!A!UvlGcfAJtWj!h%)dk=vGF{MjIA=VgjPAOi#CW~b)Dr%OUY+3R7@ZIlR{)* z(n&tEGprpB2vRGEmY^pfTc4@&e!@lrNHP(%;(P$nWEufbLyU_J7>&GU9(NF17Xplw zB%kUE13Aln11xSCAk4g~#b?<4afGJH^lqQ!le44HZBpYTd&m%@UG^I#-U%-{jEr7s z8m9T>tf%hDqX*0GfB@;kbFq7lE66;baaVr~b6(@calTzYp7qKPrPVdg_*`8}Vr6 z2a;<{+ZIcja)CKrS`VI2Lf*m^{i!6-$vqA`KBLZLMIm>%LWLQ^{Ng3TDf^nI)CJgW zlq*&An81w;5gcb!Wecm1E~LO9$I=tLaXQD%XVj)$sJsAu87Ebl3RcOBP{cP9C@_-2 zNVAZ_1tl33SMX%vdYFC}D_%%l^2g6mwZLTGYWg>kvukc)&q1SapZqcuXcPK;C(@WK zho=>5zEQ=5`^2Whgk2g2KjvUJ?#u1}X+i5_amZCptJu@#398u(v`@`eXew$_<#a}q zW}19Ww;uTNPgV7RrzFT#gK;YyYYHAAKfb5kH_W&E*rz=K1Sf$nGDFK?e%|q4sP5KR_CnOfP52LillivQvb}s3JdQvBYv@V92wC)_Fisgo2$=UnKxsinzumjIIEy52us(R=YPO1xnZBGoUix-r}7qVO;*5 zkGFIVl~!7m$fvzq3^>E)dQdLzl&DrXp64n{r@NVd#Oee^H5xYKu1VxQ!ELoXV@68$ zD&4}3z2<)@YGYpFd|nb?Po{)||AG?~sd8%MR*uMQ=m&ahq|n5H3}dwh&d-H0Samek zFG*RlqH$8bb>7e8rg{E%(Uz8~ovwI$-*~+|HH#R&sPoX{eH$OU36RaAbLwrEyr5Ju zDTM0LFkoh}yzcdt=a3RmtECDm?ssyDt08AReKqXVbF{WbKm10?ez{gNU!Jfl2 z`BAN9r!4t+eY8x9L*vL+pXtNW0am)fCqZH{Cs0BICg)>bz~}?$+ivSK6caF|f^_R~ zo-#H!KHd`Q^buVJ!tJ94zleY`L!xc2W;q(7%$3G4#4vk{9&j3XTL=1+B-RQODnwG9 z0FL>?wFfupjho|PJf17Se*{e#bCYKwYLx|RF{--rdH97?QqB!>>dQvEVK%g96sY@s z%=l+b^V3$LR54lNGD<<#z3w*=xG3-6?9>`xQv3DxW>M>x102#)@ZtKl%bHb-Rd~%j z`@}@?3c}OB>4JeX(06pR&&2W)WfLFnJ0^lQ@d7$YxY*XLmpKKsi$>u-h{_yMW~bZ5 z`r#x#Nqfr7ZS<&E(d+({#xRHRtX#mNX~+OHOiSu!a<~t%st70Wgj!;LH)D|6WggVi zOL8T+w||Eg@HxVr&;`R^6TfZkvK!2RKCh!P)g_lFu{rX!bt|x?clFfua zPP!C9A+^9=%+)i&lW@se!%mKaGg0u8#@zjaNjV!^x*>|1y?MH_LvYRGN23(YK7}?a z+DSAx+E%ra4Zib>B*T*w=>&yZ5(>p?HHhTQ^9oJ{RxKQ$Ql!=>!=B=>pDG8C{gNR{ z6FQ~L(jBZf0bl@+Nwo7&Fp;4f#)VdeD$$FW_k5%rn-6W;O&0L#bHb-8RJQHs{#2MhBr$>6{zqt$ySEF@d=uqUx(^2F2 zz4UUI3SkhVHJVMEp)zNoABG2MdOFKucFr+X(#wEu>hW~=pzH4NTB*PjThqRCkkIe~caUmKsNc$&Z%o}q8fy=+NAlIANT5Iu%AlC z+(>apTY{Aq8G;z6k=)ZNx#(FB$SvqeP(c)~kTz(kXb029va8jLCXJIWV;wvTR@0Vh z3peKVY7Nu07r~s5CVfuFV!FIPHHB)?Lme@Pt($RREzn@7U*%tUpiwnvMM7MMsl@kD zYJG(~`J*{6gq=|)-eIAv180Vhig;2j3+cQe7UAd_Y5zZ)`v0u#_nL7+i|oNbiSi2N zNpzb`v~cc+DWg1UGaw5d<;OkW!5gwg@ASNAI#gLdyHFXBG|9|)Vlo}f-rVTQhRZxT zpRaxlA(1~U&pHR6x7+>lwn;Rkt93Y|)a{*F5em3-Oxb9E*S9s#96QZL7e`Sgu0YBn z5fVEarXY)U+15A2Knu}G91Ay7tMWay!#l(L%^p-$Z}aJcfItA97ADXQf3az=y4F;O zr;7)iC(vZ)F-?$DcffWsjfgr7kU{CnkhaQ~-Mc^k$bDcYL8}TT^VP(8Gb=eBGz7^u zSr4z=^dwRLon_&`Es#^+QmU$^Rq@QE|Lvr}uQgg&12dQ5R}l7GAZ zMv-q=lc_{;?|Itn{v48H#b)frZ@k{M>%-6>pZu&GqatpMrAZ#b==FC5>*M%9w)Qw0 zg5z6zQLH~Wb7IwmS9U=Xa-HQ~nL$n%1-)jE?R6Y`c~24Y-8z4-o)7vAU9%?*hHTN# z@97L4i98yKbm}CPW`%v37TdD_(C#)1cuvWdBzo8%Nd#?WtnGlVHwzB{))%Z6ix`&0 ztPUwtB_T?n&lO3j zMwC&TjIG8f!PJ-}N%DAfRW^H5R>e z@uC%m_O}=UOM9l8IvEE+hdo=vdQt2l?uI<|yT<$JHhgh5@Ah(@rVa`ird_tVUass6 zy})3?e`w>O5Xdc=W{bW|rng}UTA4ffxxyGQ^t7ny_pfqxF-l_pYL}h%vbA3oC3#Zd zU@vjRcEBc26>huC*(nsZve|P|4wH3869*mf)&66JFaq>lIILEYTHO~Pb;x;*IwEnq zz{X~;s{GoLy}e<^$reV-VulJlp{W3xtx*@pYPN9uRgJ`_Xw{PH#&>kGoXHze{EPhc z7AKX6(>NBXDxG0Ntq*#Vlau{oAyl^Wd1KoUZIXj$Y2+iBg#+p-o$h-)5kYGwN|NpG zVdeB?-QLAeBzr56$Zy7rwx}1k=#n_m>**GzZya_ME%TbH!)F*+=@}D;B^Yb2(=T&d z|6yzVVX|%7!;q0}f4h;wpIz33L;h)f@=L5DgD*}CfLittTx7qiW@k4mg2m(05k&=l zHJ)X~CCr6r{UFfNL$=+ux?2K7i)7wA4X?^(o~<+8^aZIUe7B@PUUs39;>CjaY`Zdd;?ls4SpHSH4EAq@;Rxza~m?CP30%+IX$E~<|Xq@Q!L#){N*t#RIm6>NVQN4JYrmRf?Ws z)$AuR4_@Ot;04gy!p8nP3PI~U7Rx^|?O)k~D~XVv~!3Hn`#0n>7ZLEt>?27K_VPJ%;{uTybs=Bj;j3kI`g zF=^Fd(KN>l$Tnp(0zqj0JV?N_hXaS~wBApb{}3Vuq3n4{FFi|{!$e#a*!=%1^$sm= zmQo5|oP>Na3u9t&Fg386WrS80yDDX#Yi}_-mP(cSMi-xet6P1!{el{3J=IKfSEW3I zg_Ye>^zI(<9yHENMOmSy2LhVaJPhck2YRg@1rS?FOE-_TcSw7)WAKg3U*Bkd9Q=$4 zLFEb~&!N=oLG@#lpv3m`1qZRN@B8vbF?1*?}x{TR-~8%oK8{rWUyCFi-phyhp1> zAU>@^#Z)+CFFWbuW#2E`x^h$H=OO)?1^>Emv%Ky8|AH1ID<7AI0^KneKw@t zQmuUlC?O|V93CPa=>Nk&f`zH_T0A<^naj2NJJ`#Z^5<~@omsot$j5d1sfw<)7AsZ+ z%hWP`22Buo>{eMyEI9Xyayqwe$ain=nEQdu^86WuQk}2yP}Xo%^#gdnLa?<7P#kP zphEZ0Rb~^L6&WogHd(}q02Dxv57LBG+0GcccU&IFCey|hqg{8u2!OVM1mUqWib++j z3>vGkv$}e#lkk}GJOQCQT}B@Na3Rx45fv82ZA znGN_e&<~Jdi~SEj&*Oz@J|~9jkPAAD*q!L0F7x%M!EPbPTI$N zihor$1Z(%7mPPz4k__~j&d|)!%zE5L}Hw& zqNBusDaV(PCuoA5uC)Q#dR3&Y9xQ?2+c)TUzPDXH=ZZ7|rN$$PPjqv?+b+WYYU|_$ zGY8M4@3uI&0^3YVsV6pNf%4QSJ&r-ldQWcF6P+=Ix?-E?9Sm@Rg;BvS=vvDY-C7Zr zvLmBhS(&o?Df)|{H~Y@r)qWz0mMS?~3oI+#{^B(JH5Zp^jgL_dPoHy+2APG5k=&Xj z5wB^QGE-?I(sZ!C0f4niciR}s2PJwq<|U1r7Ks|O*5H&&s~|q_P)7|!H>ICvfiqur zqND#a{YzgkWG`#PYTCu#=#|i7TqMDg{1c~2B#92exYMdkbiM7CAMUet^+H#3p#tna z5}@6rXFfF)mt3RP=tO`f4-LN|9*G-JA-3F-VS}jA5wB@KSNXSjf?2`@XvL9cI8v4V z`{r_id+nrN+hk{aUNyuGg{7C!*plvh>oiYcRwQxV*>u_=6yg=Cx>UMg=48ciG?TA9LyJ>QD>X%*|PNit9AjNyqn$?(s6$WqwC#pcK+bA$&Gi6ufP%)_K zF7bBZU#@ahpQpH_kLm11W?U$bIsLKZxL}IUdSy>=sgbF`-^HQq)Eu;D`@Lk#Cd0#i zl29QGMgkAm7=YvR6C7;A2&1gWhtp5DVi7GWbQoC$OiH6lUkAI$!S4({_H~|`kRfAg z$I7Jv%E>x=*teoYe9URTNvZK6R(O0?Av&xjU6_a}WZ%q!mFT5v({!EYKkgnAN+=|I zZX{dZSii#tv}4 zwGhX!ZU0@oj9mR+44hu~-}(CHu1!{dD?~4&lO3|Ra3036N`nHr97}j{Y?TsIzfqgb zozd*Eq3D#O|e8Pd>hw3J%?MEZhlM~pKXw|^Rspd?gEV)toIs^|m=0tY0A zX0bu4CBT##)bEz~?Yh2^oi>5pxg83o5TaZ1+rFQxf7-inkK|(u?FW_N&Icq;bwJq) zwRbKLT+@9Y+WI?@`WQ$&>eL;0dy_2%m-f{d7)XWIzx%iN{uzu5)k+o7^!&@2yvSJJ z9BuUAZhznq{Ac(TNtD?UXk^P5asfmD4P%7K3wk#GZ_GGG!qvvN6*0 z&H+XA!B4b#b30HgTEK{M_G{7b6VmaSgb`BJfs#>sOY0Ft;buEt*|uM4$fVn`_DtwW z)H}H$=$qjA42D?4ZnE20IrDafc3KOn_Ih~B6YV93Dc3_1Re)lO$2{|3JeTL3$ma#;EBnoeuS zQSsW8_ipP)|J5o|ASaGiN5~$3Myf$XE+Ez#7JgTKjie98Amd7uik`jx^wE-1pwQB?qCCGyYKT5rZ-_MC&hkVzV3Lf}w;xW_h74~<*0WX@ zSF3&8A)7=yZPT~hnu5<*q9y`WC*FO$Wb5{3b49Ahvo*6(*K8s6I~11&=?81b2mDCPsj#MhVh^Qj8v% zRtShYJZ615RC9DCQ0n17iDp@t!jJ}jq#iRzyc1ksrB--RQVQPUN&Ud)XlIwRlr}mB zV8Tgk(G2FR3eu>y!iVZ5OR$a>Ls+Jx48fp_52T}}L1smj(b&oVY?S|6{z7i!mpw#B z$CFIl_1s_ABtz)PU6WjNOn50vr_(g_Oof>idGWmS(7*mmax@g_{S|&xFYDNQcz#Kt zd6C#@X~ONE6(4bP#uiV%IZBc%mn4Nyx`ACLI-Z>J#NA^)=5E-i2KhvW0Mbu>bZFO_ zKC5#%4vRbXbfTrdCg9}S=5#{vSN(db>B<)+R4yU8P-~n)u-hs*%-}liYx-2tGU9Nu zq4eC@D)peg+mH|LlpWE)W+@+oThJ>Y#}+A~ktXS@j%Qms#=Z!TQe`x~X{%WISy>xa zBW@-whEMfq5gevGxU*C2-7H#jsnX~!r#=F$^8AcfVm++3R=t9|-&*{g<`m@%w_=NS zbc5Rb%w+|Bscr|RJH+<0d4G>g5ZujnuxI7$=vdv-kMSTuiWe zE+HEq|7PyD&udnE1^4dK@5>BqgJca#?r{v2rg=H2RTQ=E46j}zChCl8_siA+VcDVx zeg}cBJqvXvc{bRtAl(gf&$fXc(~!nuV_QQBYglPn`F6-Dj`ducJ@)0hkmKHFcj0k*4bE`4!pYwITQf;h?eww7RY5GBjv(`@l!lRd;uK|k=slVtBj?86rz3&_Pu#!JY> zhyF6~QVT%`)dbOC%(Ix9Q|jqNucnO~OP8axZ*~;_Zo$YZRj#;5v4?IZdd(RLWcnY@ zafeKX23DOo_0MAmG{GJHWFQ>Hcyz3GjU&!g+97QfZIz3noYbO+p8-nZxm}46?XI)+r-W3xG9@bP2 z_2w1U;F7=FQ(mMPoRfU8JaxthoUszYXBs?fSF@=UOC0P*eiD6VK)vvG?|hTiB=!B- zGg1NXG*x>Uca^eSG|W0Vzfv3VB_AmvF4|)Wo7Z`Hj`!XjAL>;t@*yP5#n1Ct4C$fs zrsfYgas0W_QMCCi*%HpC(I2LS-!NnXgsQHUTkUex_S0q9_~KYxW7X5mdMDZRShdVb zwqP!*NG?*N*!+WgM4t(JL0i1;gdBxMxWDU#Ez=h9VO+?Ft-xO!6Zp?j-vn{&27jY-|fzcl`ABQk6|fFfsI@o#a^hVFZ+yhO`V-OQucFN8Q{2{ z2t0M&xZDVbyeR6qxfE6X;Echky7&>JEe4QbLyBK}A6M+~)H?2opQ*)4rs8E|S4tpi z=*^qU^@swk#$wt2EiAgHclTn^Ph@n|s*{M_kBzD%&BfKI5biisWz_caK|ooB(Mu4* zfVXfRG)OoaHb=oipTyAYNt{Z)Y9c>h+n5uFLyl>xU8GYt z4~@@PBewZip<#!wI;4i^MK80=aV3$O8=f#ggxOEq;v8MzvQJK?02}S--03J)SH(J#6oUki((!Xht5`Mit%$7hPN?ud%K zQ=KgejiA4A`nCzV^69PAwgBG7dB20);`LsFZ1Pa5f)}!OL!ecdjcxFAsH#bGc0HA* zs%+$yFZkr=%y(7FDDdAnif~hq<2rb}e5q>T!kdfle<@RmchER-k*g#_rD1OL&plwP zk=k)Saz11}#shktyBJ);;hK%mxB~2&M$mWWYzM2)*ommA;^=2t)TyCiLFITU00Pk5 zmw!CIaWIO}U(727k&Y}9cmn0>F--1707)E{nDHRb3{8pHw`N7$VDs5?i_okPZ!IWc zzH%?!-VYDZK}k-cz&dIxto3}Gg>asWYd+iG#MUpO3G-x;>2#%(P)hGiYLZNg@o)yR70_Y%(HKn|WRj4ML7Tnt^ZjYOklLiL67m-cF zF6VyxbTD5O&zE|@SkIa+4GI*)b80PPK?JC6;$4?cT5Syob4tYMGSX&wDZey3n zK!d=Er-^USh0BhEi>p$Z&Xy}HyztCOH^bxC&f4eSj#5~)P9X`j!R6RH8PiN1Hh^#y zItqfMUQh2XrHUL&$Y!*2I&oOR0(DTKDtrGN#|xs!;=Dc9P%Co=*#d?)Dw;*~qi@f@ z|1BE%-%#{Vp#x9Tk)1+x>B`75h=U4p7WR@-(}FpbahsY*W$21T5m6f>x^dFfM-gfM zrAYpO8;ie_Nv&mY-TgvM;64n$55e?RtJ86+^njzlqwoA#8WG}T;?knm@UZMa^dDAW z`e@*J5+i79VP-tD*jcwzO2h6U^%lr$xV{uYmazD$MrVN{-vIMXYRzto`Q(dKA$b*m zoYur{tIRHqX_ymqUMfC8jQe2YJd=C|Cx9ndjE9H}eeRLU-#0yU&i}9WP2gn`)ZY1k zwVizfqyNj^ep5Tn=wSDIIS$l^Q%2ftNAQmSyAK=i?!!v60loV>tbHS>e`8G(wgY8? ztpR1G@}C4U+Are|h{ka8L^5?5J#?`!2}(OAefXi_<$)!RJE=}Iqh)Xm!y+uaq#0KT zW*l!InndCWX6K%=i{5#j`1p#$79r%WNhZe;F#aYu?}(hmIFk~ag-V9Pxi;wnF-vuj zg`Uq8t)qk{$U^7T0Omp76FxipM@lHEFeZw1L)0o@;m5e%{bd{4{W2P#z6mDG_-*w1 zNX|8zWs=0H_}F)Ktt7-T4;2iCcv&G1WuCQCtGnKThl$R^z}c=QMHi;j)~E}hIkjjE zb%DHW3@)_L6avwJxBvc1m#Tj>%;c5B0W{RnIv*QZlvpAylOnu>J}tNiW4Glz<@<_qgf=te z52GiTY^(Q!8=9L#ftF*N9&Uuk*BzRJQ_D+2=Tqj-V0%F;2>@bZjay%FCs!Ke3g(s&!y!miMoXC6-hgWD6Wc9`(0?sG_C z#uqaHvtz|1F1NOS&ttzFukZe@LWW05YpQlKepd#%mXQv}S{{xEb3Kej=crRgr3e=~ z?jHk84`SyatU9Bpm+#=yH3|>t;6Uz9@mX!-(Lsq-4t<@SeDb_rmgSLihyuIGFrKG63$HdS1RgMN3_TZTx zPtw9Cm%uPRRwPpiV+GLS>+zEVtokAO1CH`d?8qPT>gEPLgz^mH{m_4Z%+2u^x-Xop zL^q;%vREOTr@<{!E}@`gJK)QM+VR$fD1 zOlIg7Q$;x?N^K#(@r5V4+l;cz4rPp;vwyx}K;FDU~ z2oJZlG4yhH8acJ; zCj~~bNlwDvi@YniXBzS{yhZ!*n=xfL`JkX`tA7;ZK6!-oH{AiEYF7(!UH#m`=2(DE z(=IHedT*iLFdIvbv&>PnD!{j2%eUbHZL^ax5Q|F#+-rq8w$6@yqs9fw%zZVgmvkGH zaE(vE-+|M9?M7ECWBz!u3C=67KDOi9K0@|8r5M=1naK4Yk>LiaoRgt{RuvuedKcT` z4%W}X+q&AT;(V*aH_I3u{~!LXEm}qCFdCq420^V~a0aGI?N&PGB94twt;yVDKKg}b z-7T=}6g!QrMjZu&NZ*RRE-GNc5U!MRd1~oM5)=xag55WtIAS_k|80am#J3Zv0~R(0 zCzi3OOM{vD=h34_`|9n6@B}(^&Ix~DFG|z_cE51g@R#F_5-R^f4g#XujkKC9tSkhcRcU+-m~3%2Z&0pFisT1+8*y(bqTK-%A-?t zPcpr$<-H!V*DhaZkl2xHH?xlQU*2_<6s9< zY|1o-Ifw#VtE>d;4s2LI@I(@^VAMA5G3LX}Fbl5NR{0T5XZlZ%;2YV2szV$vMC^-T zJ&WolbX8393UYcjSYt9fqDeMqHiU=k%v*wJo&8#RV%p!;TNzxUHY$7@SKCBbR4cP$Ln? znY5j&^u1i&dvl9|?UFljpOJJgx{DrHW0spJ86Z?B73HnB=7ETh%s8iD?OCE zL`4!8G0WKWjORF8U7<7RTHSdImP4tlCrNCC`btqBh9c<_@P<%yk>P$M_sz|D43BV_+jS{$()eOrV2c zwWQ~ER3wR+)$05e&=G$%8}%RCzrg41;z=NRPwNJyo?*?mV!!kU8!i%f2bcH9EWT4e zjAY#puo{`tNcfss4Tfp1+c6dWWt}D4?H&y?GYhklu8wT)_VtDv{GfgPn`Wf0txL`p zY{Fh6*y@H-ae~s{1tUZLf47tAmYJw3QJ4$Xv*w<+6OZUHohPi+kxvA_NoMTiWCjZW z1B2GlMm3%MW_>@M!?>nx_yuKygG>FYNgH9)-$yLD>)G}k2Ukvoiob}w^GJd_lhK8t zqzU72qQ(>|GWZn#eiiLNZS?Y4@b?<|{S9B`bXAbBwdf$$M65kQg(URoAXQ{(X10Pi zd>Ok3gOwSWRNg^uvird?q$TPg>%6cEYv`}JMDqYD@ldLy7Egy7sA39dn;cIg13tx$ z*OuO|a+cJp<-ByZal1#uxiv(PFKfari7y|1g;cv=w^hQr>4}X9Ab1Rf# zz|TBrJ_}C8=pP%mckM6IPBKyjfcV!z7|yu6U~n->9(ga z=ohpa@%Pt|>rk&*+xx=u#~o)at6L5`MrfsLEY@cL)(*^l)AR#6at6qZDz&w!0dKTN zO;LNxs4Nt>e&Qny%`GG?!QwxjJWW=K$i04~1h1~NMtA@oNJ;b${WvDx{A@DN>oI{o zTjrj~w@8u#%JOKCyHPBYwyXrN&bOY8CcVZ(2@#$g;froF)Ooiy9uv!d)1uKD4A;wu z+)g3vMF@21bG2!R@%ljMsY(I?)=|Kej{cjjJFrkbJqpYbLsDsWu{I{YU6Q3{IeAC^7|}j z{fOH!pipY2R=X5Jnw@m>X7SJ20yG~fUU3u7o6UroIku&M0!6_*Oe8I$PFbJx>H|4d z9%wiwlE;ja(I1Z}@FxH>zfQE(JNZ8z+nr@NBjx0+P7KynejHD((TNfl>+ zwQ{{qSxHbx3lNGMT-R*PO+1O}P{gz)rkEmL8@RdYls-IV7pJEAdlk$F0jmZaH3%KG z0$oL>aa}rgdhQVP|%BGBD-+XuaqOEAF4%tn0kxqs2RHy(9#%iTFavm3^ z!lTA9?HN^IS;AYc|8KSD%MkkmB3gz&e3bh7%zgX^k3IwATFGHQ|GKBsg9e=0Ysqf) zCO^)@-vN}VU{C&Cu;Sd?BW=~n7|?O(U=$N2EhS7PBc7hTX&BCwvox>|w|rQ?Xz4&H zB2e<(_2Qh7*h>T=iUBRsLEqDF4B_;@K*w(f^<2#B0BpyOk3L|Hv~VnqTzZOU{1*17 z42ilm77YY3h6qh_l@oqc=)%p84m_qUt6g4G&Ng#b-|_i<_~b=SVTJ03tGE*AM4A2OFk23DWgf={g$f_i(_!FE7KAQ?2FRDxT zYy-?y+O6<(t#kQlp!`c1xmhnu7m@KiEelTdZp$y~ZqNC#}M zl*rsl>m&0`#c_UItwS4pzZ-;<{LGclSoS*37xM>(p@QOX9u!49^Dqr@$xxnto$usZ z*N*O^hjH6~2mjLjIQ*vp`YE;k0m;RG2#MMZZ;`HEtbOD9i0FnqH|6&%4NY3YV@P6; z6S0nDvQo_dX^eov4QPUCV0i6^n+UYZ*YflGH+b7~`&nrCJL}E=Q#dvD{Da)?6o7PR zsB=qO(^KMh+d@7EX>Yo3Ck`x|Qi+74rmla`T}jV(4Pf3EtV7rK@82CKv&rmNjwS|| zEXQs#Aq2sv)5MhYYl&O`ZX+Exv4{yi(n-WZ)B#u9`W0TKRBKWQ-x*0_kZ4XLeMj8R zuV#sH7Y&G#jwNZ~13=0qBC2?`i|oN+T{YjGgYbL4QUc%GKuda&W6XD1W*wwx45L-5 zXKvW|kGEyaFv;X{y5}&dnV7x3?45vLfMnUpXo%?pDM< z5A!mnuA(dn+S@bINCdD4ri9g%_v#bp9wR4?d`UM5V@-qYuaR}x}B17%>vU?TyIo)K}q)Bt^nZc53T{d=(8-hvb820p>P^ zmR|DflFBgNx)fhTQ^^&MQhd}V3Ha^r`(3B7lZBrQkkM5xq-l;QhHz*l?tKj&*@|4X zNa*MT-31jWyDJ&SMKyjN)q%yl%G|yhaB63Z3Y16Q;LM^K#M`P>skdee5cCY@V@(0U z_S%0l4d40kQAvhnbyp&dOgQa?A)GyzS)*Ig%%3gRfRB$Kepl9(kNrnyJe_l=}i zRSzwY+2t94LZ)DR**hc6iqgSlM1vk_C4U7|+kQa;iN~tH!bXhrbTu?GB+OW2K4@*I zDi0?3?;yD;z$p3Q8;~zS`T&`(M-U_6rL>j3%Q7o^vSpPKA_kJNF{KBoD4kM+pW!b|qCq?dwnalKx#PZ%9j;mwhB_j1z`~y6P%-4f;&qO?@k|w2MNW>c0p1(ewr5C3r7p zr`Me}blt+?CQJ>Q_1rzpbR5HKaMe4|Ox2|KJ5LC_c-PIs8fJIG$TxFOYRV)1c4{PU%edy#rc}S@I3scUr+$y|S4+5i5|i}FW^F_cJ)?7}sD%qTuoA6- z!2~_H%WTRrS@Xl@1A^)Su528h0)xKoJFQ=Us3rgIw@1Im8rfW+r3qHbvR<9Te{e6X zTEjd4I+4vwHFwi6!*uDb)F%4!8TPK|ySszWDyKgGQtAxQGSr}8*^c!-JPC4&kc1{2 zNU&XyY=u+ml*=uUytHBY-L2DY)=KV;WMngDcE7PR&VCDVIfQPg#R8G2?j!#o;D%7{ zd4=WFYHjN|J&l~?psSvRI7p?ygb?XMW`hpN z3D--`U>_$ZqeG{G?Q?QU6{UDo?tz@FfgHGkWj1JN?>#&|!piwhzbO^P&QIF*pTMe< z{5YRve{f2$;Oa-zdoD%{=#iw)P-2HUJB%LLFm%~o-)}0A=VkhBHgWhwgXL(M69>-x zUB5hDfYG?+1cR@xbHVPb_Riak*Y@~Gu{w!U%toRJ9-WH#6`~UA$mUZaC(EH@DBPa@@xamU`K&Vdr)pEQ8}Nw zat<1wwzWCx-GUPfDV54F9hJIS^*Q2ifixTLUaSe`>WN;!Ut5JVc^=h#?fSv$x{iQ{ z@bnkBT9ybds{_vW<|>j;Ayn&ukN3`MU14<}@V(G*Us3siK`n69Bgt1~F?EI{7ULeN zsARCS5RD%aZ(OvcgyHZ!0_ltJo{dJ_T}PLXDL#vmYK2J%+WCGH%h{_R_d2&DMFYlb zl$%?H2Q++z6AeGt0Hc6wt$2ryj%2-U8eEJ7YA0pC8WiY4At-1t8-^v^z<+xmP{xIu zL21C+bh#VKYFDOZN(J_kBPl=gq>_@R&~g4LO$ZGh@GD$n_Wngw`hTG>rn*LPB#f~4 z-Tie&)H(l03!oF^q*sD!_5X4Am2Gh@OS>y1Xpj)xA-KC+aCdiicXxMp3-0dj?hqt6 z3=9r~JDgd2@AqB%I)C6?*Xa+>%!he;x~r@2uCA`GJ7224W>2BkLb=7%$7z2zzwn}C>w)l)NRO&R-a0p zatc!p3GR{Njd$NZJ3~rTParqGfsN|)0-gqjY!@ro%vV4)DeRx%sKj3i@6=XUZM|h| zd-MlW*!&gZD&H{G#k8!5>AOg+o6L>G2$clfw-8lDMS5~|N_Y5c)A!inwl{q>9XmY!Gs?POu7X!W8u? z@sInsd7ELqGXWbzvW27V4)XGwV^ZK8hwM*vyG3hgW!?y(Pzj8pN=P@@p^W}|6PPA> zh=jBUQF;BS1x@xzgpdSO;O;_iC)&1ktaJQ?h}2xs{#E9@y#zD-5sQ?AdS)<gX001K+xDK!0KR)hgCNaPsorhVxf;a>l7J={tFfCfs`jvV zkPEwe*?u9R7QOl=QifdGlu1g#+Ar0ZqhRV=Z3eF+{`)L6tv#JMlx=g+N;kTdl)~zM z@j_eOs)ZbDU67uVY97dc|uTSUzjph2rXm2&9rN!l5 ziIMRvm5H7*J74?Z1W1){wGg=ziVFN`$fe+BzP4#=L{mKsQj*Fec`PqzR#D#cEtiv~ zKqcXnNDbaiG1R~$$}3s5^Q^0$z5HStbfjqO3iwf9tkv}n2HS_Np8I%?Gp}i38l`%tCf1cyN+3B7zTzBObZ5~?1oTsZ^8laH@2hOH3pYNQa+wlVkB&)Q}LO* zJ9h6TxkFFGn|o*9qsTd?sYa{)?cJj8J@eMqoqkV84p+`{Y8AVy(Kc*;b90dOyn{4j zr`C=8;>ta=ADXKdwDxJtr`>3Ez zz1H@wLhD3QOUa3ZJU-Lfk1`~z-*Oy?U;glzbU|2UQq)QjpE$0T*R5ZJ@B|p+#t(&5 z7m1>zlQ?8f4Ela4_KcNZx=U%%n0m-~UNP5zogGfG?c6RF2Wd1lGmqE%foWgIbJoS0 z@s%oDk3=&y;hrux97*JTf4aCuEqmvUBhvI9x-Zlgo|LtwE>e)1hy}&IMKa#ib+EW;o}oN@sNW4&NOk zVBCb~2aY5KYey<3t7224eGd=^?myIG<#u@M%9KF`nu*!C;#XAQ`c_3vpj$(kj$1lT z50~mWHlkL&$d$pnx-Z-uhe2gYN07T|Z_l_p=`d7tvpiJ(*8G%9JpW1=nD!|tzQ!mu zD)_ggbF%^*(S*pce;L-ok^9jLXJxBhy0F(qGp{x!Ocd7SReDqs~o29G*k&e-Ti%$3^~kNgx@5xN58b+P8CgrE+dJO*LoE`bF;YT}(E&SNUcUjtC{mFy0lU~Zqm_JequiO%z`~PI}=CZGeOOf+Sv~FC+8}?tLAYE;}XinZ7=1k?W zU`le80irUblL}PH}@j0 zx5VK^aucgi_FN=0B^>4Q4<3|HU<2lm*Q~y~KPVedS%dST?xV}KJ9k6*8qXgmFWu(e zEW+B+e@FjvB$`BcdnHa0YDOc=fB4D+T;dUC^E^90XTWqm3Q3$ZUR92VnRw+Vl8>dL z)qlE3r}U3rG9EcPi~Xbg(<$m#vq&>GGAAXIy|^HBdtEw5$=GMd%5ND&5;P-l*?j`g z*jGz*VQx=muqEQ-)qUa^#kk(kw|Ok??Emptf?@gbRB1%VpOQwyRNW*{M6mtcl*YqW zd@-{Q$S2WACX3IQ(<|nlvG~i92yd@K+5}s#L#**mXs@J=!B*y2^-Q zGAL>V>jSW0n1#qKCz#8YZz-7|WAF{K>ieLxiMkhhZ(R|TA~6Hf42LEx50SiwK>ZA+ zER(NG#?6h?;M;NQ7l6*Z)N>9xl ziLqu1n|HtUZ0A9}oZdyWTLhf<7MM?x;<)8k$w9;{QHJB^p6YZbaQ!?w3%4g5!^A!5 zJwRiJSiyyB9OpN-ux1_n6(o;&@7q?}nY=u^cfzqK#kyx3RCFdKRgb&VU#R!CP-;l+ zI{d~ekMe7;Y639dum${2)hzBZt!1#3&vVxy5-8=`*Ie_PqicOlr-8$IoFSjSO zEgQi_dc>O*bJ!zC^TE4!kg#-HTtmx73zbrIda<0ndf!K(ASNu~%73cZ2S)3b&e(1r z8gury_nL5Z>>r%6weXQUg~5Z5A_H-dp^EGoTR7qk$Uv1PtCPI-D2 z&%VNFmJwxscYKRf#(u)`S;k!dNozZ7LIg}w7rcpo_e2+SksazbI{gNjw42`ANLOhP zRiXK-#7w%>NtQg+MS1Y_VqtBqcrpCw+5h=H(RWu_VIDnYjWtM9covUYTRUL`O*_xZ z7{7t-uzIe+5I?LGY9F&?d>QV+}HGo@uRMHE`E5;yEccFGxmw#mO{@ z^i&XV6hC(55InMiZ@^lx0C#m^I0cG4pxF1-9L9*xU~HUEt~@0Lw0*k67ATZBdI^>l zR6u~Opu?9MqB3H%vFF791}DQRB@sp}y7CY`YL)sM6bLmvR*Gj1@z$Kw-t0dX^{wCn}WlDP3U`VODg8hl=PToXSdZrrbE0 zk?kdLW2;u9$|tfex$GxE^+&dA3W#qwVzdY)ZoW(gm8xRlGczf^4s#FR!K-tBWr9>| zcg)s=h1wTUspP-Ju*XmumJ;tAhpFa=dInY?#IDZI1ilDDQ?!a6z5M2x{GsFI#=O^p z&1h-mO>^Nn<;}ATw4FX5OKcVj)Qn+p23bpTftRlL%&boTN|QKO@`Cw_xtdOQvV8T@ zsva)u46(xy{M=-7%AN|IMw2qkmhcD+sgc&N=px};mB@Em!TCb|Bc61!NBPA9CDaY8 zMrhne9A+tSaK#`YX#U60nCjxH*GZ*Fp2DAo$x)6hxx|tJpg5fN|F(~(8AjsGLOJL# z>35eKt4QM0eyn=tI5}k4JEmM8j9s%WI)9!n>;aY)mwE0GS+KBFdx{avKD$%L3Pc@PrPFkV=N&NXl zbHkcs99pWqrW-VPLPw7s_f+;4om2PFYC91J{izM{2dLfxi(~dj=w2M`U(ZrM;srwV zKX^SY`d@%f%h%DZh2Wzq(kQF`d&{d0_bGyY&ueb+3kGRqxD6bTM%|{r!Si^~OG@BaSAQEyPe=r8AF*QCRMU`@y&v{=Xzo=g8+7CP*u&x}ZkB`rK zfXTUCraVp5pFd}=hdIY?AHE^w+Rv3Q+(P;S821qgd4<^euer z-pDo2XX@n=&uO(FZ}ypr@_YY+A`Ci-JzdIvy+=D#3QNc+jLgRR ztm*Z+C6E8`NvyMrqiPXGN3c1>_hXx*j`sbJw}d^@o*|&?h$#a6?%Nd+4npeS zX7tWcElj^D3Gb1;gK)hKjxJXLbXbkd#-aePHW4ekxbE0lLMo@_cM|EyY4WL1_UxVx zvuuwq4T9^{JCXsVyxBm1OWiv5Zlm_}zFUOS50A-aBL`v07TI(&8{@{=dEc z6gYZiC{f{z!LDWlf+~SPrv9s?Aww*K4rSW%R-UohAfnRF5Z;RV)I^Fd8=JgfCVQ z*h>1UR`yTRwNqQ7*1kq4{~7h0iYxu+pdZKg^H&e;x;8y#Et6M0qw!jy0P3HZQdjZ^ zDOCs>HFGx8&#Ph;`IpB~VQNeyYhQG%c>-SJ~j7;u1&wi=Wd3KUZc?Mv?LpNZRVcYSU2B zr_UIiwEteihxVCHYAHH@|LJ-c+6(({*T!F(mL0xP;?NW%g5q#iZ82d}jgXVp!y(Fg zA#A{wUwgr~1Bh~`V%`hBadKSrFfO3~fOba{>r`s-PIL4J99xJVq2RATs@?aO+t=_1 zrwHv@&V2FQU0F6kA-LxLwvaa3*9fR+c7TRf=KkD~FE`29W?Thejq0Rq z>X4^qaH)2;t9TY?Q@_B8&VT-Sj)k*2^O$|Jw?2 zY-MCgCNE9>2maAt8oUJsnLO-7n?Jw1e}s&SWyhp$M8j8V6T75Hc1pQ{kmK~aX8m5q zjre>Q3Xi2Wc;0u1U6Bd#@}5(N-wxr)l}Vo9UakXMTZJxCLpLOg)W~sfk1u@3pfC9W zrFm!ezpLj!kiJbZdbRcx#E~4s63gi;4_(-F?f3#U-k?sfSNyE|;nU^YZ7I)st&{dc zSp}Cxy-cIac%ki>GsnIuFF2=XTlcYIcQ&Jzaq$l1Du4Nx6M6{?>ae-sB`{=vhNlkA z%W-nzJK$*^5)FujIpNLZ7Q01^U0t)eE;v=XLY>KbcCP#VRRM4S@7y0Sx;i=mOOt4gzvabaBDo%`eGoG{aS56^w_?kz0#varJC#lP z3^aG5(DHgab{6SNvyDTC;6OO;&3r+Fd~tdFK>f;L*TMYVa&?W#thPxr}P3H2%{vF8L`$wQ;M<|SoCcokCQbL4mu}^~D|_QV7r9@OpO(=#w$NI#LdjxyicN!N_Y!6{4;hFOLVeJdZ z-U>f?iN*cdwsn-hDf2|n2oo0Q`*rU4lT?agBgR-)fL&Ub5NDh zsJ_9}lPvz^Vxs?~@ma+(hn5dSEL<8@NchSHiIX+M*yvO;nLXJR3)hLu{^K4yQ#`PV zPw)u_yB)#P6Get5Ful*J7AZRgoGAdiIisr)%oJ_Qwq&jqTi*SdWH!klMO-?hgiu?>!dPVhGfr}H+>Xp0)63qUFc2v*ts(G_l4DrwajszalGj@yW?vv5zexycn2 z8kLNV#R9Yb0*{Yj+z{!;U|`=$(kKrT0y=%XqisP%GV1Ic4k= zy>h4%d($6sPiY*>|Hj9G-29H;J!pjZunu3rm!C>OvVd@9`B{@Y70?yw@^TilvN3D8 z2T|p0Ph~T(jMy0l&GV;%xvWhXaaPhH%#^0B+rInT`dYu`Np&VT4A335&l&#x}3UltH(y|H_qbC6+1ZH@5sL$xWk+^(YCz0#Ax;Gd)Y_| z0wX<@*SUH8ejnEDE;#oMhpgexZ}oy$c=tcbs?}Ez%Xv<(stPT9jSiGyU?mH>;0&yA ztov^Y0@t~LzPU`gSPO#0&i;Dm@|Oe+;FLoSy@JGZ2S2Q3GkA^QN0MEh6&2~oq$0jy zAFRNvd%@%PLyLrryvL7DMjx*wYDL)p2wbL%dH)ng@UUw0_?tJRkVdiNmzA<7^jTM$ zVF7IcfKslMu|7bs4W?Ff6?cx`bEw}pD5pf~7n&xNdasbvDQDaVr`RR@v9zNp3-T^q z_cjVrOfaeWty zBrwnVqP_7;;N=LG?PTH1PN9hxO5n*yFE1#}UX!JRQ*8FfNqN+E>(81p&O?U)|xJgQcuW#Rog?(;XK{Eogy?+6oF^ zd)=hBm!HNm8nZ%-=lxYXWowIJtdP0IAY)ewI%hJ5LI>cA9PM9WS{DdetO~rNHmVyd zoFg=e?&fyN1FHcLo8go#o=1@d$1p|i7{%JM6HI{oLBwgFL4jsD4mphj2x^~Xs9j4@T;&7ih zIFM%ROiL$iO9gqIuY|*4*PM~?Z+Kw;j3ZF{W}S;`G~*~^pFNy3@dyz1UTM3Jp(gF3 zp9dV&V-y5x{*k4fi00AKL^`b#0mH+fGzuxh7+QPHuCR^g8(GdzrTNRDH3%!G1{xDS z8TF~JMFiheEQ8TL^5M?K9=IC*4vqZ}96u2LB~g6g2r3$o%i?TCwe{^o7yTrn|G7-~MwK;nQA}Ph_O?qGt`lxDR z!I74$;Ca%9jq*<0Qj}2!Oq%k6j0_ty!7;Xa-)PZfYGc1gx=rTiI&17p?U4u&!m5D7 zE&vvQaKAI}zQ$+b6I)19-zKR}KO7npR<-N+l>_db&f=rH!a{9a*8%itpS)jAL#m0_ z3fW2PVKGdK#6>{>=tqT+J~tS=r=@{vd+!Y(Vocd|;|0n+pEx)qw5m>BID-`M*ybyY z`5fF6RJs+xdjL_Zg*E4)ozLVxAdro6m^zTksk7!{?-MfpHPR&znQo+f4~p-+BIx_~ z{2$qwLWd4H6ZcApd(n9%!z)n<3j$nxx|rWYi2^D?zdM65LYyH2FBsoW&{Kla%^XBi z*Tbv`1uUBm0g%W7JIGJihr1`R#|5u+3Z)YQO>&1jfQGmB{JH~H^feKFT>gs3N4jk- zQsPZG-9yNn3AzvO^qYM>cvZDI-4Mwmg*fMo>aaem%lYRz0T7AFq$ny^k%m(_$M@+6 zGz*v_J)zdvV?NG(0yok)%fz3O0aO1I3-DW=1}-_u^)pp-_!#}H9DoQrt_)I#@2K?5 zH_=Eryn>}X7@zf$9fv3-O6gSLs*=YFzy3i$Iy0-`dK~T>T(M3QM(PUFEsXIWf~sar zBgPSZn?HWR-%LVN<6(&6Wy6)$OU_$hJmuV9KkD7`)<6|h`UAytf?kpHS{{F#vRCko+Qf&ZbA z>hkht3UY1&GS=Qzo>yvP6^)2V?_*Ur41c7=VmQk^sC)LF9f^Ttv(dV@UG(KOP--&K zL_i@YW?iBKi z!cSc2;JnspkG1>YHZLgpO_EJ2jYak)31I9hin*r>EkL==~BKp zGt$EM=wVMui=-v|vo?1NBy#aT$NMa^DX7cE_lDlvg;?Cey3-je9oE@8ipr@4y4`Nj zL4Xd-mC@pXX42)WvM$Wo2Mghy5?J4+5mjBILCmB!xxo&VRg+UoR#9J|{vhjG8k+?t zqMB7FmF|H44RVVer@LdHF){;D-yM(lipJd#%O?XPJa+T%)rZ!)bd;0)=~aIiIcns- ziGC3fM{WBuqF=Z3MK}kqt4_wbXU7n-`0A|?dCEA2T?Fbe2p+-AxOM;wuER^S|h(U8=GXNfhQzEOcbs9vU-?=XWpnyvq-C3d-wj z1%5M@rgT|pr$UiJG!(4E_32*ne=%GQROQOHbP%a+x&E1<<*y|28@cQp>3_i8nZW+My{ja zB%u?3lOp2&fVW*QZME?5vCvqXO>M6bz0nDq({gFj2JT>rqok3z!UV!3J-=FR?oLm{ z2P2-3^-JNdB}|H*gF380pM}_+?X^LP@wt~bNutwMY5E$qTpnwsr?!7$j6=G?4)A6tcvM;nYmpL+#I6tJWg1AGUGwcj>zkF@=D7yMx=JQ1 zY4v^H5qQ!>u=dNDt6Qk}1b%E19iWH!^8^&tP?l$_%Dee-W+K zMVLQ;;**GvZ|Xfh&2#+B!`}B5ha|_Y10H&mHN7Y(&sn_ZxFZI7gcxKP_(ikZuEU>S zTiO50)7>HtbJcU?^4Z}lzb*33!R5mw{s@mp@?>JvNe5zk_%2&TB^YO? zhHtpBXJmg{-)UHIm^=KVN%Hv{?szn&oH{h=w(M zO|IQw^PStFX1`<(`J&e$R293@K{r;4QUX0X1*6{7BEBZ$s!1e65KT)#2054zq_np5 zL?jCe`fJu;Nur;zEg%esB-ner~Ia zm~8rhQ*%qbj=bkiWHFR>$v$(EuIY0rEIeI%RjqWe>Mh-q&6M6>77m+~EJ|Op38VTl z(`wiAHzV+v)4hl!nlPQf{xzO<=%87SC9gei8pAu;!;i`pu>_(>cT+FTX*2L~w_){Tr)^EI$&2G`8to+1Y0iV_H+?AYN7d!#|x5x5f^qmPmY6De<#WTRn zfWA{#L2^$=&%9e7#zTO@>;gC#9U;t)$E}J1sr{FkM?Qg0DK4!!ZzfU=d<8R`=k{Ud zZy)paW%L3%iqUZyV5&@CuQKEpGVQbxM z+}~1T(ECxyqam~5;j)=^OK|35z&}Ek|EBmJ`&!&5wGRo3mZG*!d}OR~xQdaZzhF|2 zT@7D$H#?W1Yn|ynGwoTk{0iVayEkb3d}^bPTDKg$3y;pZe^9Ydsd`PyZ!Gt@_9=XP z@bt{@)r+sIW%BYensW+Xg_s0RNHanT6*Jjw{2?bO=oGT88s9>7E`lt&lFy~3f47Q+ z2~lUZDm3cAGv>wDHmh9q* z>3@j@tT@$oq?ScN9-M_ASdZ1gWjxxZm zM0y>h^cm8G&uoo=VT0e4Lx*8-l#NYdncmU!6!p@2INPQ}Pnw1L+6UOSQ}RV3Yx*22 zWty`EGVUTq+*PqfcGSHlYF7kWOfJmcStzmDWtl$D$djRvpG&`C!0%; z312L^+J()nZ*uMoSkm7it1sB5(VnogV2A^bP1 zH+>SA5(r~V-~1Utk$%@%lQ-vVtKW&`WpIUQQdJ1!K67=EH+zyB+%x-7r#4q4ufuVw z$BI*xpCx@#zI9X+hG_10dAN+(sZ-WTT$@Cj?lH_-r57pt7C$O=PPcgLbU%&=*p0G1}VnKQt~4Hg0ITpmF0aoZQ^Tn&l*cr25`qY%+R- zST~z>ytqYAn_>TBT?GUwUR?83aUmHua=wkCh$caNScmOTQF3jD|8|0HI{z0pwy-Yc ze}>E6!5%LNvA5SBG7=6^)EkiQZPVC3+GAe{{Zgt|}P1h4n*P3YDzv2rRCEcy~2_WB&g|T9!F}65ljy!AB=>FMLKACsM zo8~9p_KO`fBZmZt1y|FqvF%%@%eUfWUqA)tdPt}!%B>u=rPm3}tr^;k9vJ4tSIPa! zRimNmyRr`oE@~WJ8-Xfa&bl>>8`GJ7EgyXJP*>T8v1zTue691%sTMOlc@_73Xd z|4-)_J@@E-kW1+Z`9ZMLN~5p78?)xAk%y>4- z*;GGTX!0x6h>?93*-my-Ua?|N0i-zb)^&|g-1MCSdJ}&^@gL90ay5CVtW#d^agGee zthFHv_l->2&MCzj^?h~djwi8BaMpE=+Hbu0%Xecaj&-HtU;OQ362JcbUt(x&3~x8q zGyOZfnQ{}_sT=Us_Zw1Ncr_4?kih z10{Uj*7pfzsy{a{7TAn+Tx{jQ3u7eARb zRJ^(^_x|#nUXt7QYaOKjH%|U*8Tp$3ce@Byssd~7&{+iS_39bM9SE=Mtq_jb2tj(T zQU6=1@IL)7A_S5Brz;+g=1?C1ysUi(V`s@lGwxiHtj z&9i|eC3_3#mKD{H8aVoYq=+K+FDZ_E+79|1Af+`YQMU+514ki6p|=npn%fE2nrP4J zrdA7^|8O$RNcL|y`unLD1woJO;frggqJZWeZ#(cbowoVof2&1)!e^rI{PfSy!QZw+ z!EgLm;hYECdjHc4@aI?r7_I&hjIgjQ3s^M&RU8Zc!*c(hN)k|Hz-|Apl5Gwa#ebE3 z;lG*Z|ErK^{{PSfkH00WH>@e8kZk-X(Icb#4ESF{==y$L@T-Oz_raufe+jI{x*E%x z*9H1LV=$XLChTJWh-W{gP8+D~5^_j&^&AaB?@`Q&$PpXl!17%3Pi-(&xmiIs4Ov%` z3Ol5=G@}k6-aHPth;GpTRnxcMd>4WtKm5Q`va11iwEuy(fP&{nMSlF#67bag|8u1NAL#3)3HwjsA{tk&&8epwO77kxOHFF-seF$oew5)O+VNSypG9Vxl1McH12AIp4wj z(Y~(fj2`zMBQ?xvGU&HGo5N}(yp>#Kx8iI+vt;i-n$u$k#*i^7)>;}bjp=TS>1KuD zYK!6B1*RzjO-&#gxKa%vFdp~1+kE=Xsyu8-F{FSf*ATg$Tg+a~iO2ogc7ClnXivu& zTvJHT`@gE1a7o1bf|h_WvZkVG5>OCY6}TxfJqpN-6GiLgqtj+jL>B_*ZWo{SzHV7v1TCu|n3o2B(1d>R`s{aXla9ic&0z~v_3bK72! z!F0FAaG}R^S3~msEImMjrml+i4YwwSD%4|otH9K4_u6#wAO={mCRK>@&+6pOPXYR# zfH47qmKs!iu<1K&7fbzaBk!2(H<^4Yx2*7+p~`@xqMbJ+?CYe0?G?PXEoR`Qwaq^` zc-;V2%mfUw+Fg5%8-3NA_d4L4!tk5NaKZD%Kn=apvV9ZSye_x!L`!gJOUI^T)y95z z(zzQqvnu;)sa4;E?{SXOh8XZ^rR!{DeA4g~TkGJrioikJZ?YlGAT!(z0{w%cW^^sI z-v^1Kw@B#G6bjh_o|FE&oinDNl7>NfSKlUlW7U|zZoKQQiDW$Wlj4rut zs1x>$fdZ6hC;Rs@=+@j!Lp3R2+Rj}4q_6H^q+Or2Bt3E1`@WQ|TRC_7-Zg?5&WE!$ z7ck@mbF2Iuw_5RQX4JTm5nzzlZ#=`YW;)L`dmJitq`BcJnQ(^{3&cfqVltF=$`LK> zcV+&CM)}=Kp{lz6Ae-wHoW+9kj2uyLO~ zgDHdNPotP4XJPI;dxTepNHVqc!DTcPHUmyPrjs}h9&9BsnF1vYbC=ItvN7X_sD+<_ zi?DC)CZmn@z2MGyWnh6 z4rkV|vu4ZCeM%Oi--uAjik40nPS_%Ng^H-Q%^H>sqrhS{=uu}#Pccr3TRHnSLRsAx z|BQGc#ap6^cHdG%fq^msrnFr!&;+})g*#KeLE-1#+!_Skl-5?4PbPY^k>yQiEPYbK zE{c$AMgH~$`0^G%P%{I$npiQF5o<;R%eiu{DOhnJCLGiDK-M)@<#W1`xyV}$w6Krh4bbrdDvTGcXYK$=DR10b1mtQ z>&UvikBn*Rk-pL8B+?pRDmGv$%DQf!~f+Dl&X@8Osc}+67}K)UM{xTzWnP ze^1@RnwB{|&5LPETWKmj?7tz^cqz(8%)}smB^gC()XBYC0fob!jIjgu1*K(*LhJ#P zw`WoeoO2fNV$|@JfAnddEd^!ahWhou2wiRq03=6xy1PS$KKrKqjD|t4yMI~JgzDZc z-cm9XOA+TcTd3vsG>Y`Rf$J7t`-*USSaY36SNv*DJ@696pSTnxqeZ8VyJ`9{gJ8RH z;RH~S@P2oxDdNulYV;jx?-FFwy=Hl0DZ%DGwDUgd?dcS^y_vpilLNT+JF_pJ|KV!Y zHI59x={3E)p!I!d+ZWg@yaMVuos9ZTpPo|kUto5r?K#wic*5Q#&jaM1_QCvv7Ziqd zPl&tit&ehQN((kZAju|`Ty8BT`dIagp2@*ZxyR8{hI_WQ>nMU7Ah+781giU+MoWq2 zE#*rq=j!I+TNv|w6j{ty{t~xmT(uRR84tP(DgF{mYn^xke!~W@u&hzKheo>TX_+nW z8{tZI4jvImy&Axu>kAI;P2kFVe9O?Jk&CVELwO)P=E z*~-CtnS_&35c}4uDKRMAT^7mT8iB1a9f`BeCycz)hiuwr;>OOwwYop#&>I(QA+y(a zv=Tfx+4}MT3f}^e(&`?HbLQ2yHziKyZ9TKM5!>6)?2AwBDP2IlQC|ZqLaoR9ef#Y$ zRe3VGyY6ClEf+`GzxhNhis@)vJB)zLXYAs z2}w86y0!eMF6~8_e1@+_TyA__@>hOy97N>)y-v@zD zyF#(H-Ls{a1XrEhp|)EjZz8g-O{n)_&S&b@-Nu|7M)nWbZZ*97dRx;GbO9fQd2^Ne zl{FEmDCl0T96e8T?0nc# zS5~i&)-gUNYgDsoIw7n#pZ;u(p7}Y3xqYw}5$FwM^NyCcS}r%!^9*9B@E;fs?SVdt zAze70E>b>3(aD%a;o0TrerrtXc)V~bYkM4#sVPSnN*zV=xbC|iw-+R5uFBaYv>kcG zuM+it4^mw#O_7IqYV@tS{hjJ(6~GdsptnPKLz(Ejq-H0j~Z_Z zK71#W5~V}t>*=H1U_a~j>{1pusG#%M;1ao>WJW+>`=UkKtvpzRA${cXIhiW z<0Pn1yEH(J=XEVrW@t1%9wTQagC3jsDLs_vqd|F{%8Vn6L6_H%yWbekSBAS2FFO~L z3&SqV+HQd+-_T}*8QE@;+C@|9d-G>t@rtYW%(A>`{M|UZQ7n3D1Z+4L+mr$uTtY1Yldko&c}G zQ}!LkekY0SC~Pw>M>XWaJ(Ut8dVh@~FpQHqtQd46dKAv@-ik`@VW*nEM1c{GnFCRG zKaovnrj(EW7;A=2)yd3&R>g*LGV{2|I8{aZ8H-?A(zG#rEvCH#XisK;74Z6<4LwZx(FwauTWD=+j~WQ4HCOc1g(# z+=`nkQI>5@+Zk)zS!y+WV}4F>e8WC^M8v?Mvui~#GYng-CvsFHFV$$yOgr7GB4a{Z zUY3>$ds^Y(MvPSPnkhj93=~)NA4NZKYN00=e$x1eA2%N0K#H)G%2L@e$tnFL)h4>I zs_!$c$c;DPD#(5X!(%grG2vh@?T_LdzJTDlnP}50vmm=F?)~HA&yCBgM`honD(4^j z0b{>D=Q@%}V#!IQrt4Bo1TY~v(&f;mEbBzm&P^t4Xozt&@qtcAZM;Ol(YS5b9Eip-q3Pgut*Ws;GdCy2t* z&Jw-q#wk45@V@mJOQf2>$EHb-pw(O=bI~14623T=jU0;?xU%#XI?oDT!G2gkBu(m2v9nR%y77^M{%r&d9i(;B@qYJ zG>^cW(Ts!ff)c$d!3wjSE^jk5M;l?pL#4rk&|ODMjy&wO9-&D$P%dKSV%;194XvNb zlM7ePA2{lZ8LE5c$zP8rcc`q=lHl!wM_b+55Nr2b{;p3a%H5$5mV%-&GjKF4IyROA zf0h%QR$VpK(M2Ty_WOct+{_4Dx2z=nm1+uyKW$p_4bdOtJ_WzAUHt*~TxNQjW|#ge zm_-#hNr@WKIoTKWh77u^*4Q ztWJ4dTRNKxBN@D=oG(%M+FrsRE~DGMx{T}Vl9B8jZ?itqPBG|^v)A_=c?@kh_4!Vj zG4DB<5DF|tJXIBw!MhBi_0GW~`@B)3NrzqJl20+JKXXiILQ$>@OfB$WupcerR7)rC zRnu&_$?kBc9H*{9$VrZue~}G+L0GuQKHuW%Zbt0>U3{!nCzW~w zI~wQ4xyoD63rhb@iDSmb`LTvrJw)DsHU(uI_Q1)9Z*QmG0MZK1GPAI|urJwdKt|mM z2%#4eBgTp0-kM1_Mw%eVK~oKb?`Ou~PrHWY@p ziG7yhng(HzPBs*2(`1~VGjVU2*lAzupL}N*TFZ&=7#kN=YN*oJ2y!v%GLA8A9lci# z(;e(T-Zu68z;3)5E|b74 zZITv^qD+E>XUc199e!?KYyxBg_xKil^Io8_f{KIEh8`8db7adg(27_3Nvy1G-8h}7 zNUKIxuZRZU<2MT_3q1rBgzlOoC+|=OfY=U|gZG|EN?EW=?|8sCe&E(Lj3GdH)H(^eZ~kJwR$SLw_QA zzDqw>%aXkB3+42*4ObSobrC*Jx!4g-Rdsn9^Ta%g=^{}Q(=TbQriJk8P*Mlc{X$r1 zF|pe?F?cd8K-F`>Lk`YvC?!~(x~pxts*`oCVa|g+@6(S#@mifpY}O88gta2FkX}_DyW?R+>o@pL1%ND z^-fAJaf$7W;_xp0<0Bpq3y#YGckLX>))_9g19ln)&(xIpg4zBa!$%M3PjU|O5VyU~ z=A{d)tu^WRHBq%78y+wj9CJLF(Vr}s=M{^}f*auYF==a!*4hTWPLuU^ND^2)yI?*X zuz&XvM+Za3PD*A%QZFNLmMja)yzmHn+cZyKV7uk8=FM4TGxi5LM{}1tZ4Qg=*! z^f90A4LIBzQKnsp5@MD_)r>sP8D$lvMdgw^G3i>5or`B#Yo|25)tq6k;HA#LW39!O8P(#5`E0_$+-2f) zXlal|HLj1Hv+-%WpMs#8aH%M{Q z9pY^!LE5}U7 zQ-;G4lWE2*uUJ+Vi^26H{J2TdYSU}SbW#_uUQjP*4El2pjx$Wsq_ML_8U-|chtRCH zZCxy=rUy)C3l0`B3!Bnz$DG6iS z=K&wyT@pD@&wXBqAGb&wO?qo7%{0I_7E|Ud`-jX9_8A8~=Fu9hX2Mp}BMw$uyw1ne z*)gNZlHFmz(nf6WwCSWZW;tRxIbtvfc{plQx=lL0lnTClm~UG_RZwILro$nV;h0fY zFwHHc^>7@EU+xhu=NwE!7NsTN&sB5Vr|-WapAA_~M~o(O29qW8yjpdRaXfGXqNG8i z)1}#L(2fj|Q^4Yw#dOTkEZ|_#pw~;--b{!B7uVL9I)`$~cs%3D{(_u(Fo= zVoIvzgnT|>IGQk;ESN1<`&^9&CL~NhTW>-qa!CU)l$80B`FO-^IAS?Siti;{V6 zsqk@}njn8dHCxc1$K;j$gU@aC8CBVwVm4$toiG?J7|-+7HIHM!jqt;SW~W1|)1n!A zq`rgaI;-o1BBNXknT{v)M{_2#oV*lZYfQbQv!7*^NGC*XGF2SI_C z$SclT%kgr^oyh~P^*`lW{}Te^bGdbnH@mNqcoC_ee9Qh6&qr0m;c``h^(3>53&`uQ zxcPo6Fb>u@ZclvUBAYSd;46w-)EfH%&md2P0-w6 z>(U!s{;Qwy)K^1)+9ySMq|<6B(2^?>__Q;wz;MIC@iXzyHM`_kLI z{>BB~y1Ipz1Q@rb9v?8d@mubH_yPa?u;Sl_F_*4(SzixX939htbiin`WL$!2ohI3O zn>XHmlV5lt7rRS3{SO#Ee3##SdY}Jwd&=RcAS)}DcdoNM-lKZ-zjJWqXZ-xsfS)y< zFdFaiq^;_QB1L zsE>co!|S*BAHUt_#&H|>(tqL9<$(YCPK$OD;Q1M4cFf@64enn1fRFF)^XcOOgGE7R z9qOP(&^X1;r8l_n=8t&u^#tDyp9WrYvdJzFuKkwx|M^31_9Gq^+x+-%|BC;-)8l;P z;o2NK+GX;|KXL1$Px$|CEjehMr}LA)<-$*T{6{xn-Ob33?{IwoV?MlohYvnI;88zg zTtaDGz$fgSrM3MAXD(miSHJSOnueIC^VdB)@3Pn`qsot1Om;aq+~d)4MWa8(ndZ#% zkYYZhy!(6hKKK`YdvC!%?E3W1bvS>iO*xs+zrV+zKV@7(74?WWUgyk}H~GcSuJHOP z4|D4pqxb)w8#i}(?|9CWdCkHOS>C(OV!X$bCx64EGGXL52uw*(_8C3A#>3mUc<;t8 zAK&XUSk}z&@R~cM>z8=_?YDXB?T{;{0wUk}BJj#57U%sw=h@}K5!@a%=)U<=-u}CUi@lPwNB_d!%^Upp_Xpe@7^2M$8od_P!#&D= z#yq-Au=P4W`NeDehxRs|zZ;oBOB~79}gK<5jJSkPGWYFilpwd9Ng!@hu8V!<2!u%c*yQH z2XxUIL2H{cSKi|FpN3pH7Z7atxMiR6;5`oST;qTK_K4r!UedX8g=ViqG(N^Xc+7A- z=U`D|8m9<5XSn?4kNN4(Bd!Dub`mhl5&6+Io_zWN|M+kF{O;C*N{L|=V!ci$>Ytf0Jl75e*1rM@1yJd`tFpE=52yE{xjP@>G6MFGHfOu zo++sEF~j>Ga_67^Cm%nUaBbYezw|ag`M=w|o-{B{iC>JE?A_qu%^Tdfwacduj(IZ8 z>1Q>BZQP{8#`!n6@cNH=^R;ul9eQ|PiDSo%4{r0xKmQXSzI%%YW}AW6VmltvbjJ+m zH3zeZ_8ULr?0@L;%L@^HIwi~IJQ)=nWG;2DL)?jooj&E>F4z9yI=}gs`()c!u-z@z z8PZ+uGo4Pk*UwluDI`7C&%eeSKmQxvcr7LLe41tni(MvrpYZ-af5dOzd&DSQqv~$b z$c{;;2h56)u~}#P>RbFOY;!e8@RL>5_p5iM^CfJJEoM~nL#DG4`v3j#gD5dL z2M-xfhRkNC*`Jm)!+@6Sd{MPujj3n%*uU53qgykMhf5l*n%Y!M78T3fVk>Zi2I-kA zbau{h^>mBSHMk6^XOB31ION^;M$Geqv{6$#s|OQVZlQMYqBhC)1=i1==e5&@MigPZ z6esG@Zlvs_mTZ}E?_fedX|wG4R88-XdcoSNpqTVo3=fztVeWNUOIn<5d-VJaZ#krY z@Q}MVAMkMRnEgr4XjxO^u1cv&mUV;=Tq|I+Z^p3@!{bb2b|*4g>}}}W8B3t`SBxm@9%Q^!4XgTbNb7goDkfY zxUMIG|jXEUb@{v}vrLW$W}=-g@0|I$U+Eaq}6oM|aqN|6T4i z-{N5H4bGkRXolvG0(tMOeEE$ zQHkkSLnfX|lnJVQAMrvU%A9mNWiz_s`hLdShb;>)eP57ZA?Y^t-ah+Jo^g0!a4KnN zN=&)FWiz_v)y(0`#gLCb>{DqNXu5)ofFg!u#%40%>z^<1B!~V-&)9eBD29Y7<>bMF zVDZ5H)dTNt7fiP?+f0xN6G^XA?;kKcJLhm;;c3^Tq5&cyPqw&|YepBBTuxnXmmzTp zg{-649qjfYdj|uaD;h&pLy;upJf;X{go`oPZ|}GsEtz>4+f<^I4Nz?QPoC31J>#ff zL(^om_g#-tnL+LoZztTpeaH3nn5mbr2y${%6r)O|HKaK>=KQ3OR?%o^yLl&z9FYek z+cC3?OYZKcTrWdrfgqJE6y2m<@u-?1GhZhu_;-R=NuIa_iyNl*cf7e-ay#`13P{k9 zj0SdN!2an8Cr>O6nhLrq?GBBEphyFf%`LOLdtP5oxtsdNUa3Jwf zUB*!bd6W{YB3_Sm{>N7E2{L;PnHs-;Z6gXn7N)Fb3BR~V`Q<3*E)hXFiwiEnlzS@@n&rYyTcwwXigU zex*xVen%VAe=(VWMN)DTmMl~8zRL1HA_0ah(bOe|ro<-=g^%kppVk$QDl)l%c?{dU zWSxlLR18`w)YaYR+Qu%6D-%1pX_e+o6G4;dn2~@Z@GT+f?vIO&Fcr3SCfcB21%93g^QSN@FIwXjZ&0UwIP~j;^32Q9(Z{7 znwvMT`RdILFRrIdgABirC<+Owl-PzDn;6nECzk}7kT~mTm}y9|8RLyEc(}@VHM0oC zknX02mWJd(gzs(fJ(px1kgcIKOq@X%fy~o5MdTS-o|9w+S-JC6mhy}uj7g)EB+JRN zlp+twl7KJ@@rfwnoH#8hMTsa?|@-~U8eLilvRQx zX&@Pp=Ty`dQdPzvBv?N%yL`**U%ueQ*EhVpSrSSWGNX!-D#WpmC}w!ch?|AXjo)Oi zc1X`pXgN7ue~P!b;_iCEt1;+{HPv~ zW#;8bx(WjFD22s}xu3HLQmRdZ{&5E@%*oS~G|x#gL0*&y0cDYq#0hbnk`x75Sx}@g zBJxSXlt=`GNlKIzWJOAuXSlO_7Pqgtc=MKDfBlws4{OG5L6{amfut_MFPLs3N=c?f zqd4xMbrtGzf?jy|>k0Rl@3?Sv{6>J;s6s8oks`csOXS6D{3XeFLnt?i)gD#R<*?aD zHzd?@m)Ydc7x>$lL~0^iE$Ws=gOGf7!|eLkT)uqC%h&f@Epuk6iX<{LDI+aivYCsY z>7>ShQfPE54NN7bj21-e8%DPiUcCybCxUvfOHIT$QAq5&#J*4HCggLU6a&R+QDR^m zRZvLCqb0%WhSBWdpwp z?-;o&B3ICSy2qhkpvVfgmLqv9!to`GtJl1Hd&_0Kk7}nlc}|?UWNysl@`C$UFZlY^ zn3p#z=1EQ@sk`s=VnK>ZB3Z~H!&Lf|W{#SNB;k_fXvWQaMX5YvQ(N&xOCjl3P>O^+ zS+kixaC33V+v|u|(LVKR#IvlR%tLZ_hP%Gv^6g7reEphNS4*xYK3-OkNHV$Df-=L4 za-s~%GDlWVIqj+JSuts}Wi`6z;q4nP*)UViQlP!$v4EM&c2A3#Q{4UM5rN*yxsyBI}rD33-kX5z%(eQqicH z;5ZshQ=_8TI8~cYTW7H-D7I^YwL_3tB&CibfgMg9AO-Lmhq=$(7o=!t`mV!7DnpX(oCOLsTf(q9^wa{qD99s^XfYv|O~g7vwJLN~ z<^9QdPLW3V%LS|HoVjPPGI9oKghVGFucZat$}i z$qE@Yi^*e`AShV55q^}Dm*V$SF2B#?%n~Ah%62+pG+A&x5837trKX`GqsTc$ma zY*slB(;9>Ql_30J*-&xIUzczU`P^(h%%V7nVDRxCTh{bRBR-pM9m{2cfs^- z%>Bbo;PXlip-33AjG9G=G$e^5@?4;l?8fuRBab}t$PXS#+I3*5%B~1iSHHagm#_>8 zQ-zauLe`QP%?h@gjM+wIohSr3NO_LGO?fj;xfrKhZUxIiqENtAL83&8GG+^f7rG3o zg4KXTl29}WLzB?t-TTARC2Ui|R3#KqlK2_RP0H0Y<(G?+akvw?EEO~Xc_GNdjO|kJ zW~@<43JnGNbwQDrWJ%5@P#C)A-FjWi_Np!F--_GfueLmfWWhND>uO zqCilHg3!yj9m_b1#9>{=G@vPi@<*L{9{FFzqu}+(BR^1zvSb;rxeG@8a`}ocS1*`G z3!*&6R;~Z)%llpwLa;~$w_(XT736;)=tq{Ip~EM2nNR96pEp#_9GQkDkqUU@6@0ZR zcfwRylDyyjIvO-InWt5mpSDzbro>TYXBRSxb~cLR7&e*sgQ1sxzWaBRs3gk&Cv3}v zpoo0J+ZRk<{J(tt>nnbCy<(bJ__ZFxrcBoMLI%p#TnY$6rw$1Mis&IV1q?=;JE z!F*Ye$T}x`T@Lq7ki7@O``1i1K9}3KjP4u!^5%?8>CmfpX*P~HYEPK7m&l@E9BC-^ zA?DzmgOejZIIMEqbg@hMd$etf-(1IPlam`7$HNX!e*BDbeUE?ln(@--&b?%Muk+RQ zF;cBVw<1yhi^8EwAR`O61eafN`-^|(tBWcB`fkapbV!^5j-}AB3d%TPHC+?lz2@58 z61y8@T>kwoJEC;!l0t=^e}0GGF-xd6Lj*T4=gNs~oW&zvIO(-te=3zTsk-u+5uPhkJCI zH5@I)$QStAH7{2&E8iskj(Jj~1>rnI#5&zpo%*ni+Bl*_z`yhW03ZNKL_t*6?a-?x zRQw4K*RT1vFK>AD5HT%UXuT@O0|!S<2*)>U*ISm`3)W@C#bFm`{wdG&KK=b?w3mXx zU4rZ8%nB9VZqhk<%JKO#&U;M`D+!JY{_lHyWr#>OjNiTF#XtWWuP*O-IrUlQDpF%e zQiCU}q~fZ(cC^)t9_^yWnzB z650b=M|-qXpQf~8IrDj#Z}1-OcrCv}5h~|<2KA=+oeFPJlmyc)a&A%CYh#`s&?`4s z{yq28h@1I@>1BmnZjq)cZRe0mQ=stM4T&fO;oL=JE9`?V*4Y6);d1JYn9n`ljP6<9 zNPMZZNit~tU5%4Un|90QxGfmPYl39V+*|WzonfmA13O2ox473Mw)YFds6ejTbWS=9 zj%@1YhIDbwi(kIr7eBk;##2~ibqYI{%Oi`o~Sa-%44O5%Xn;U2%vjCRhR?QnD;&yNcOt3WRA=X;rA0DMoO|yW0oezMivH zs^py``pp{qEd#6c5y6tp%I9`^%R1b0X=y~d#(AxzD#Ux6znr+RS*IzuKusI zek;i$kLcl=>D>c23z<97XYZuTd9Q|}W+;ircJjdZa!wg#i0>1mAqvQS%5cVfw&7}= z;%jXhPYyY*J9I6Oq6Nu%#9|w9x0-UFC=6>VHBG|&1I20I@BiJ91rr|b$6Q^lScod3 zf68I2%CM!OrCajVi1|9^etOF!jA_&~WK`_M70K*@+eyy5uts;U$4A2!j+$eNfM7mn ze!n2tl-%65Nz;P*K}s4TX3r2SyWtfBY+q+sO9=UtTdNStC)r$L#hTc17|7n|^ z4YnLpxHG)gyb}JfULmp^#!HQ@r_zw--}JzC>IRpdK^2j6qKS4r~WvY%szb(@UO2jlL40GH}!%a)%AYnF*`PZA27fXTE zG&nxeIIv}g24r4}w@P>uq`Y}2$RnM)p>t5z_~Mxciom-Ph)Cu~&ntZX+~T9Q!k#8z zk}|oDd9@P!BG>8lRsQKfrBjj6%93cCF&#zRxdrd08Bs2CSTT53ll}zZLqL%eMFm&) z881f#GtEFh()m$c=EJ&#l4hhq!t1%FLCLLOl89Zau%kjlmsn>CT|=U(D(qKebV=sK zmU&~zxOst_ed}=TTL>U3ciz8?L=cza&p&Vw@3Xr^K@kUpqiYs#UvhOh=gm|mskSkC zM|Acz&gvQA_yOJbm@Zc=J%wqn$7~zWk7OG01L5Q~7jG~4)vGDXqKc?>*=zI}9yO@s z3zTKR!*s)HvSt#itkM>;)#j5YHZ}DdALE^{Q3`<@$&`_eQaxb*iG%Gaq~ZZNpK-ky zv06M(j9sioo1oriSlwsPta8{;SqB-jR7I_KsO&#yus`H%U^6sTO#l1L!1DcdK#)Kl zSe)2<>_0!mSZM_12;w!P?S%Qlp_pu_?#29Q&qU~d?gM#0evyzxYc_Z9xcmB7T--&x zo+-%fE{*<>X3OB9np18TNb4oDUnQc?TWLGg>q$x$dtN8w%PDx4G9U3P& zqAY)tBKO@CKuVNA;hS{!dvs5F^rbqfG{MR@%-jWwRfU`B8o6I`-fdwHPG~mfbd3>f zCC4vyDzz5flc($*>~q+t&|!wOyk>Fpn%9?OUXCQn=72{3knXX@lU~9yzhpd_@Guc9 zeFdXlX~EME;I> zQf7iQEkH=KZeh6RsEBjXh?bL00e4?)N!5tJBqd*5#a0JYzlc z@#2)xe1W}zzSiWZcTA(bqFZ@jriQFa6}?iYy?@N!(J_ZDhoPLIsS?4z|3wicm0vu(CY*GCr{ZoR`i9+O_s2kx@?zoR@n%r-eMK(6V?Ux?+jaz zdMSxsBIzw^2ge*5YX(7yoZqmTEV-TDDa}mEXB{g4D~9M22+Uq&+4tA$c&7RWZsTE6# z#U=C0uX%m3Rl?88V$2RuIrRlxK1M6g@-q&66Y4S z;CI9nQ5J|Yqm(TQyNlU5n)pQhiIEoH#GJP6~nIJv~4QO6fNCQ237nxA;?SW zs*I2dASTZOTrXj>EhsuBX1jr1)v-jxcC%!5J7OBzc+F!P?GyITdz|+aY%#|l-SEzx z<6b|oTI{j1_vzXeou-XdF=LL}sAjO(R%fq<%x-7v6p7de`+ zo-Md}2q^kJ8m&XRr$atGw5b}hHaM&*=#j^EI%Y9lGtCak8$F&Kobc?lK}&W~HY%@!C94~OQWi+!w+nsupHTY9Bab}t$X@}wZZAm^R4NjUx=ht75lKN37X zX@TaZELRCHXMz{L%+mv%gQE&R>B@X;3!)iJwUpUK!7rx*Sygy)Br&Y3oDNmOsO0Op zV2i@@1B0JDGdPk3PM9-~3cPjBJXDw}8iyT&AAex-v<8);z@J52BneWG@$+@gyil1P zl*B^D{Jz5!yTYX;Eto8G?iMBMp^DZvIX=+%$DxE3#Uz^&HO&}LbJ8GZn+UcAq>rz? z9~6&**CUVoz(|UeZR#-&X8dybl3y%-%|kGu6eTs?p>K8guw+)W1=qnhU1Gb> zm+UgQw%INt?EhXABqeC?tq~6@5+BrL7D>r0-q{-ddjt>{u*r5^VCjG8Y5o%tIim1M z;x*IRhPzSB#x>FPHpA2Boc;7CeAL%Dwlea?i1pziv(2vhxp8{VS{jS%Inw%;Rafxp-I%X$Q>L-Y z=x&8`vF6#coEm@mVbnK5R3S==I3(XL@s<@vQ{pJTapK6p=hvvySdwUi3 zt0g3BR-;>%*H_$m8PoW^=p`v&v`{NOTKa+`H6{KdIeEWD!5;KzfAT5EPxm-Kci0yy zxjaB&!-czHI`Vef9xopMJzgdnQk7A)cW!RXl!vo$)GMvI!PktV1ehj-B1&tw$`AfQ{uq{}bBB zAM){Mdwg_Mp`jj8YGpai|ejdN@^!{klMpr`T(Ubggr@DIhCJ^PDirDXTTi#*kL)hGA7_Y-rqO zHm2RCdvwI%$vy`yn@+SQQU2orsi33?mZbhmR;w}hQNdjAArC%aaD2-D@!1haU6Yy| zQf@{ZSpUM!#U-!34a@kJ$@+}9^Al_}r)$5TostA<1KHf8+I`N4&(HbvqaH^_9ciu; zUCjt;6H+%}Tm+Qvy9zelMape_rZr5akZ zO}*cwl3D0(jz8Jr=o^eMBS`~95|C#(dGWi4n3AHQ_0DOYe!|)FQ-1Wk&weGOmUST2 zU=g5?eD0$Ml69B4cg{2v^zv`bYgs{WpHM&ijMHc5{P_8RVI{%}nmCe9e81#!5pfxh zNH#qd{xM5oV|Tl>P6yQIW3*{bF!yv1(rdD!U0QU}`^aMG-BY-Q-) zgw5!hP2l3`I@-}0o%7H6;KM#YK2oVfcNBXC&MzO>eB~2)9<#eG56v$ASqt0g)2lfg zSu=uU!D@Ah=j}5OyGZE<79(75!Aj)Bj)isDrLkAX(GrBWVsgFW@{PxKtz%SL44-_+ z@rTcOwpZhMS0i6`31)p>y}ahDmviPDm*C=>r3I_~I&v=FZ^C52#4x)YJbljj#~*Xn zt8-R2X!kob9UaH_5Zf7nyTvOjlzNM{ZPB+CZ0*}C4jIgS^!7Q2XCL$9&!2JH)j70| z=qn1gJEyMA__w)BkSw`h2eg+N4XvR1&4NJEK<^?~_Srvu&X0cboWpLHrf$<~)#%zf z7SdmdAHIMxqRd_VFl4-iZDCO9Rd(HRmX5BO*xe?z#KpRck=-ew@32m47^a5)?VXuy zASn*E-C!`Rb2z9_t*EFfNM#kxYS5{hgiUxzQ`~uor=;X{K`H47xk}Y(&>Pk{7}z+L ziXwxg)zK}VcHP9SZFxvimZJb)SIKJzrddU;^>Lhnwl*OyTvmR<)GuhN5{{fB;ti?4 zU=90fWJm$X$?>5%bxC zpxYy^s~FV^%|V6vR*)?=gieLv&>+qQYP=y0x2$3rx3n=T6`Hn8MTCTl34W>(n)|ex zLymh5j#?JBrlKnjqBf-7Njd2*kVu$#A)Ap;U_xf>vIvDFqdN}Oy)KRJF3+rL>Zp=} zD$B^Ro+nQwFEfsWMZ&>J4nZZ)Wx29_!zmj$g(izla5vYEkVD}QHh*>A=#$sj8zQi)W? zMBawQWQw6l)P+q$wo#g=4AcRC+trCBhr@oIx}G7+JI9AV6;T#s(F!&*f?Oss+BmHa z$Ac<^hDk+L(TzGv=Y)RV;$t--mUOyp1w)Dmyo9wcQB;~VTgS9}4cZkITa(e1I_8wjuaX_p{ zf7Ua7{nz=O`z5^ z#Gb{`BH?gZK$dgo3!2W(vE)CVx9nabiYim>m>fNo_`6i0De0VcR0byKLQv)YBr`v&YA+r~IV*IX$zD zDl7lhm;47Q*<}$e6G2dj-|4dYrnBqRR`{pB$|p6IgNj63-&M~}-!T+<&`D#;e z6O^R??jS!cAuHk9FNunr5_n?E3`~UsOXh=`!V9-Vk^b;0LQ=vy6KpbmlMVJ~^NrvY zZ5U6t+&;u?wiUESpQFz`5U6U2NrLy z*=$N8y+iB#1CIXT|Ky9OeSSJnscQjzyEoVEJ?Bn56ca&<}wrd(Efz84?pK0|M5Ae{VFw)p{!mL50})fkX10{e!S)3 zZiP}?v+il^SHExP)E~Jd*mY~g0oitmyR;cMs;EW}=kSPwy)K`3S~RLvYKDfTfuY1C z>kaFxUvv3C{xAM@67u!7+=;ldj$Av#?0(LZVTyJd6ASTsI~so%84%e00fR3-<>+j{ zlRcRuQBs(F(qu;cDnfDvS&)$i1?V-*j!L^#XW(eu86}#qaq3-$XQvz-Hrcacn0ZX^ z7tF6Vgua8;>Tvk%IiLUCPx-W0;hEu+RD5R2729;g)#YL*cr7CwHRLe8!`+Nn#Xf7Z zhJE-6?T`O~PrhjK_vbndD+=s^v~Ua+ zMUqjZ5}AEW=aj+mhNx6Y6bGry(9<>XJVl>5Y^Mg6o*-ofX`T}#85j;$dyi(j#lX>N zn+jTJVcIQvhesTo9I)q<)E6-SJ%ShId%??l#d0;`Hp-YAN7#oyVegYq_=msm^WnaQ zA?KvCYtFI}m+>vh&4y(tFFb*lX)wq2#_m^h#OJ+1Q}G4yhxRG=E5 z89B*X!5hcSoH12R!4w&O;#0=qPVg!P<+r|DB~{1hpV4^oH=I7-=ZoichP51x6ZG{4 z^ZJ5xF<~7&5O2H8-H=&iVrRb<&Ll-a>zz=2`cqDy@A1X+I)iF~l)+@h^Uc;1ptk(1Oc>W97G3SRRyBpb* zH~;igJ|5P1ZVTkFOSJ4@YclHxpY6TJ^6d@Fio)_U1zqJ^f>#CI=yG)aoX`LMZ~1W0 z;LL)GrXVQ_*~Z8BrUaf#P*@aJlXk^oP*HItiS##i{xVqm=mfTIvw!j_fBTaUc`~d~lO#-8Mp0y> z@9e1gAt9iQDKZZ~3>a-;Yv@?53aze%Woc-Nj@)aK6du-nfaFdH{VuB{$IhXl?7F;m zBi4~+2ivT3IIM9vw5jRu?Hv^d)ojvfm=sNzCMoVb!Zk8dTOiOts#2}K|7_pJF?M0% zXf^aoxD(52Fpd*eqX4&Mk(D}_HMHsg+xBRymn?FZl^;{_a!wotl$O#-XUBB+TXY?RnkgZbDr!C8aKAyB1-yOj5p3r~p-ouS|Ge&|KMChSY_BWsYnv4T0AAiqjU!pvY>mnWxdGQ{L$RIM;>|Pkw^ZjkakOVMVF8j zcAK;JdGBFdlFkd}OF^vZ7NJU9PP zr?-TnDb%_KMoptBBtDi@Y#q9$Kq^X-T#y$66oIVn1h4#Cf|v0pjd543q069^C9#*W zkYw&OcxOq-9Tlrt;j9YJPe7GHQ6!Z2g*%ULy&n#bg4ZLD{3lX~k~oh^%M49cu~Z91 z`u$tR^nGX7oj>8t_KL4ouelAzq-91`cW4_8p45){y!|0hY9}=G8ix88`Mv$8BLpM` z`1wv)l7NZ=jw-QlNj#}4eA-ZW>d15&IcBv003ZNKL_t(d2}{{+$ua?c0h45x|Fuka zmFd4v`1qzvYY~^s<6Re6R0vE3I);R$e0RlYAs{IPN%4nszP>-Z%&ekJh|&T-fkfy? zx`Sy}s5=%;#l%u{WT}IuD#(o-Ei;fzld25kw(rmXO3TL8uABkYM{9TDH%DYtx?nc<0ZyP(1E+;ZaW;_3N4q4GYSu zWaFTYGi3LGv@>LwWen1U#A#%wOSrR3zu%`D&4{d_-jtNfoVrSBI`FznzjypCe>78t zf^b9-9rN(fB_DsVz`a~@zFtw!&bWS6Qm?OBP6k|@j_CJB40m?ffB2Zay&eyGhQQR+ z>m{4p8)mn88~N^O6#&(UNUqt*JXP&}WWNbO5d`hLNRyZ-^n^}t=S7H?ZLJdX_PkIU zDO*yyt=-nn6&(a zP~y!zS2{pzjh5zhU;Poh$JQ0)deIhmcOH?%#G@hE-iS^&BTGX9=?S`B`nws!!zg;H|-)wN@lp7;J9mLLKx`z+h z>BJ1=2A5aV^Ab}Vtf|PazUB7wgx}2v*nuPSw~)W&vu6_~c||D=tSZW-!I&;O=`lL& zb9kK4D=XY`%y>2B#qAO^pV56aVR>8An2a&2_64x}8 zWkZoSn9RPJvqC|TB9b9d(xn$Cbd!h#_+tylGw>NxvJ0Au8yQr74G_fTnU>%r`71Yx1F_QZ@B-&gOPb z?o&)>K-B3oh$X2qxFV;qIfdC!POs2ke@;2O;6_8H0KGs$zuNuKR!i*Sg5{*(dY03e zf~GMvUO>_z8SRmdH>73?Mart!XS%ZVCR6g+oWjPKctnyW^aD@gEN0W-yhem6I*N${ zO{BMVMuh}IR2Y(^0ZF1zmBH0D&MF)pM{8RtrHSK!P`9eC?;7iOVrxHzK}ej2q;Y^& z0won*fY1R!5)mc=T3V((a){c2IA-^hlvA$y5Nl25JZl_Du9nk4^ z*y+b4c>K@x5P6TS8Ymj9)p(T>=$J%nB6auN3LzrGSkWDXI9H*j2jn3ImPTWMM!O zwR2Uy9y?7Cg~Vw@6u0(?#33+PYbYy2xh|+Vr*MX^^(o~|yzM935|zb*>(g7V%Zj=- z=&Gi)HT9;(Rvs@SR1gvAfavaB!7B(Nf+!}50y?3hRD$~ZNADi@xW_&2@oxzpoCQ;R ztObNXD1j0lVLespS#K;k7313TU%sxXXOd1JkRF7$IBWRwq~`05WfOSf+Ougq=8p{j zx3%RSoWoRx+0t>kF4-85aqY)jPhB)rGsnvu*2cfKoBU6)n}k3JK^SQsJ`7n|n6Dr* zhHC8iP1*43LNVwlx`|?xDfWAshXX}F62w1Kt$vR`5AF%Cd)(vC2NI=Qoot`0eBiX7vIg1f!_W`@LiSX7o$`VfSx%m>rVnpA@I?7l8vR z*Ah_8mhkFmc#sJGeyI50_B6lfD0V|ZqFeH$vTfSJW#hT69G68~|LgnGz~2#Mvj%3h zXJa6gf?*(tr9cY7yG}Xhp|P#H>rYQL-#QDOGnB?sSVwNUXMz*+QV=4njxt zohNk)uN;9ktT5QhuqnWp5E&(OlY~(gl0~Y00VeiZGNdsU!qQ**eCR-a705+|ei!AzGp=3M9dHypaMSbxRxw@q@+S-;LG_bVoU9 z0N)Z`)~~5-MI|IQN|D_mQJj%TN!UIaQbz>YkSHCH=!nD_OjTm@oW_JSKT0(PZ#+_O z=Wx>y?h&Md_ujw8t)>`Lv3Wk@cCp|pzhZn5@-her!;~QG(La2Tr~l#q;G@6#1^O`@ zmL^SQ?j8EG~HY&uea;{f0zKB4P3M3RA3DZW=bE1dXNGG!$`-X=+?)@YW%e zAnIu1et?SP)@i8`#3VYP9|_VBRQc|w5?T^=L*h|F5bM?{boacj#+e1{+gDtD@f*JU z=A2JX=3FfcDrYdR0<)&7O6Ix6>VLTqd=HqWq1xosb&dBP6={-TMlwhV0);{#kw~qH zhY9IHO7OA*ps5XIUeQz%_x9!`CFlqdgalHe@2-JF3qi15?{6N&L6hV131z-yQb_V% zLU)ug7$roJ!kG>A;*8npmpuFAIiEkfA#<l+GV$&G7YX6~+mKt-Cc8xur#W2$f>K;2y<6i8)pzNRvk z!Ux#wfN1!H{lPOHMpw+|V{SHIvpCmWT^P=0*PK-iv#5i9_>}I$V}@Bo*IQP3gIP4V z%3kPuP7qVRhT!&4_L?;OTzG@oqp888)_|XdIhb(S#XM)Fp``dVwTXZI2)$M3@o9 zG2KYgQ4Pv&a3-gAhKjq|juK>yP+cM&k_3|QwF0rNw^*$pwG z)y;Eeub%P6#fGnLYnJto-TQtzg25xY$A8Pihnl~CoY0HkaZ-7YEtb@?D>jR3X2p{0 z)e^Z`avMdQ1cDF)roc2c%Z;T8dYHu;f19(LCD`FxuVzvql}2k#pakLXy+pVBq0$l+ zNQ8Gd+u&?l2iD6iS*A7N*7j)ov~?cH?QgXft@EL7u%=bYdl4X1N+(Tuup6Z(T1rPDbE=ioy4?sFcv9UNW z5n+hZ8ZFi zDu*i^)&(H{ct2`qe@hTc8Gx4vDztDICupn!sffaM+}~)0Hs5el;~~mwhh+^84?AWER?LZ zz}!3j?s1QM+~a>3cn{Wing)ykuLM#;C_K`88so`xOJ0>+yNdrf7W|h`pyi!4)&{Sw zH;%$UoO#y3wf{?^Eby4|Em_fr(55N{;td}_xOw8 zp76TIJ^qE^eJkm$T*bQCaK5=_Tu$kR8MSTbhaFO#5~`q;+B!q!8Yb0@SL<`WU7m2V zKI68WAQ1G!E|0Q9KIlE+z3wqb={|#~_m^(wk3%aWz&MDcpck|?yFTbB{;IF|RZr0m zBpd6=YfoqE&@o%pRc>1H>WAtYc-qv4mGxAvRldZct>^Va5f*Kd54Oe7|MZk~-+c$a zofmINHE#{RX(J~$O=~O!iJ&wN`?GWS{b6Ib#YMLc9IXWM?xk3?dUPS))Mfbd9?Y#% zyM2yr#PsVNL2XpE_)&FQwtdN41f_R)YjD=$ocoi4pWi$!-`OwX&eP-_ek6Z(1g|^P-Ia_FGT2Y@K$m|NM0P zzCX_4oxz(L<0VphJRv?BkwuV2J<|Oh`kjbjSPx|=I)4*0kB;yVEl-%WnBx!G;I z27%ZA-CpDEeW2a)G$QB`raSEPyBrM_C#w~HcFz1-adKk0yj?P{1TM=M9`3TUKO~Do z!dkSco?h?GZFr-14aRtC1Xc*}2|=1LN+hDYPq^3Rs2`Gr0b!I9j)wFPMszz%LYK3d zPq{k3CcK_fUzN-^0alMlvmyJvn7xi9Qi9D7PKNC>z5RLDCI%oiLIxVSIp2<{xEqnTRe9TE=ewRqX$T^e3fi<- z-|+TN=dmhGsFRC=ym-&3axi zpEuYfCLRn(Iw^6W2%5L<+g|Lo$?^7}3aJr6hcJ*R*(d2`^wN+pP-q>alL1LGW2g%% zyJoY_89R%ZY^c^XHi*%^9#I;Ts0t^b_=6F7ColB9gwZxgYU{xHxihhiE841r{%DWB zTMY2sjM_U6H3!~q_s&0yz4|@R`Qv^7NQ4ZCv)10pYsaSXc(Ddike9Zt7rBu5po7pM zJEM?DL-O|ix^19;Yc6?wtK4_)t$qKe^n=G+i>)fGEAY}#8;^Bw&Y(ZsFFn2up0YK@ zdP*axD~qxIH9>lJwziwx+kGL>aR(hs;&edRj~QixV5@rafEVrhkbk&7ghc2BsS`5M zCsAF7y_h7Bxa!9Y$@f_HzWsY~_uS(5ZcLk&Ga~AQgc+p$9=$XmbK>{Zj{dFx|Na=f zO)=l&9{0G%J^sf)yVHB`scO%9<0)$JS`vhcSP5c(mzJBYi|6iq{6k36RFWw{cc3{) z1v>#me^meK%}H;JW3x6~jvG#=mXoz(TtVqT>ek*0v>-yzIR7TN^JfMLDJ4iU&2Zp& zw1%PqFD)gOS?yUhj?Kz&>1(n`anH^GSa_>1G7@Vdu6{tRfG;kKA?wz=ka zvu}90I;9(Q_(ktA?{$y)u>T&R3UGLGQ*cvE_;z)|e;a?s7xQOa=VJtdop`{5?2x}3 zeazqOe9VLFfJDcC>E``pC;^AD;De6julsEp!}l`DPAIvl9KV@aJn(T(@i39C>&X9f z#nqkfpw+JYylebvkbpu;O{^7(asV{%`!sU3$Hxq@)hU^?4 z5pN_$%T~3eYn(4w6$R5x&8D)rOwziO3#z(gxi)MHfiWS0jOd1%Zs>@V*gAg%ZTg}% z<=S5+ukhV#Z>uhoik}uW{TdeUE!I1X<+YOnYjIVDZECE0E6QIeWRRig5UGgBJ5-fp zRx_${L*X1XpzvTS%w|TF&nZkrAp?kF!Zan&nozW=1LZx^S#UMpH8{JiPb6ALlqn3R zXmIXr)^f}IdVxrGhz~yGI5W2UzkoSsj4aj{{M<aW#d)XC?eY6O z8tu~m_GEGxd3B))SzD)oEv2)wPS=jW_X|Cy8LU+~|aUvQFlsgM7b z6o}91|K}-Z$EyI!yfGFlB+L1=lO5G;N>k`e&79^qh6Kd(+$PC zqB0Q{hoq!2HS5BVmmcdBfe6U7BGZykiq}M%_nvB1vKg-_R$XcvVeklF(U^*LZpaId zH3}s{(m>M*LP8~oq5=DdeI7qa_-4IA&MvsQ5`6Ys&t&Q-%82Zs$I~Z09zV)RlK^Qo zDh|+TfYc2#2uUA*#O{+{@{9c*@AoB%FkmXm##1y3l^v3f-s9-uE)PaANt_@?M|2OL zGUzNA+7;#1CD$UtO)pv8SSHPox_eAKIAkwN*a;g|U?05etW{YN}aIy(n$l!$g#axOp+x8( zqw*EVFeTc5pJ6=Wh^Ryo@+@=;2 z!XrF5Q_?J6v0N^ARjiqhuUYOyESdpIK4!tr@>+K~WJd>d@&(z&l4dPf zuS0GWOs0nEQloYfc8)sq1`(n12&?cqM5u%?=(Bq?;_$;K?C$pID@gB#-dnJyRfqZv zf-XmocGyp1QmxR@fKHt7uw%IuIm_9Ca%nL)71SVh6SBh&X*VKJ1{X?#Od;0}SJzl$ zXq?AzH}BhaJ7Y0sLVV4i~pK#nXRCP_Uo>MHROs5;J=GV+m8_xGLe2{V2wD*wP zv_-E2#kPIoox|B0XKIXdSkZQokf0)oN+e1)&=go(zgFhA&uziiRGSsc>oG-LVf!KT z0<3dwKX^-J9L73chgtYG<*&)7EON)yDyCWGWOzi{L1le1-RD;y?egfTLl&wxiYLI< z6|OP3u!m1~7{ob)IHq34XxX9HAF}u1Qw|;<(wETPru@V^a0YKJwU4M=#_q1*U}#uY z1&MO3YKt+Jx^~#Djrn>QjKMSw)~ap%)%N{Fw1Gci)+g#eW~X<^#}8tTN8wvjzTI1z z#$YOo-Z>;34C!pM;V!Gfw!fUAwvNI&YWHL2%{}gMk9*wX-v%7k zS(axJ$WleOpYR|J`Jav?e>;+Nw0M2ac<-qzM`H!Lqsc}A z4^qd-!_6PrU2TwIRW-c2s`>4Uny)I!beQn4r};mJl2I&)BXMpvFNwwhB>YUG}U-7%yH)J|tTFtTEvX_jAb%=A0MZM;9bIDh;=X^T-is!3S ztTUuR%0aTnNB#Hso6#@%+tEj4LHd_&+RuU#a1aary07>TJBoi8DSBE`d$?J6{%K}# z4hC8<3I$rciMIT)>D0vbKDrIeZ4)!SR^WX9>(;;Ad9f=P6Hu9*2RXA4Q~HO*k?5dc$h7Vmw_j+eul+8f`Y< zXDrt%CTqi@)HEc7Va6a1=|?q@f%7@gGN#es8acw?25hG1#i4X#(oTZ6jQN z?5pfn#U*50&{O`%vAv59+A?+CTTIhXloe%JQCF6tC@GdX<+{R{HwrHyTf!@lec~`C zld!5c)T?Xq^&zv(K7(A~T!UJ#C?{hUiv_Eyp;ifDCnoJ@#8J?u*%D$~o1rcG-qaOE zQBxHal`OG&$!1-VZ`zs=ulvUPZThAfBV&BJPd`gZqArQA=ruFe<5yh06hyvaHd`?t zub7=(an(Iwx;Uneh73M9;Nb_q=9k~mP2Sd+g`+x)Ir&k2yCp)!c3#MzMnb-sMr|6c zrmm=)j7Cck0dW+N#Tu<*!eod2C%@vC|L}KwxEu2{voxzE<$B4+MAWKB+#504852D+3B1~y2I$BzvAfce$7ArdXJC0Q^d;+&%!Y;&euF&-u?KyZrG|koHLlF zrmkz6$Yb7>E>#Fr7!h}RL~(+a1->dNr!zLUQ`W11P0tZYL%mwD7;l(el@x0SLK7ts zSvMk1T)RHrx^5eBzOBc?_os*R&eG%?irIwCDrZH9Zq#R#MeHS(*sUoyH@tfGiZ6e6 z!OMD&Vzkf0$G_&+|NB4ktDT0&ddgRFgMayo#k69)ZSZ$R;Ct(-mTT7IDa(1rGS~DY zL*!Si@-_2K!?KbX9}(!7P8iS)Tf!@U-Tj_swI;uvu$pHqa!n_+=xV__-!NS_EJ|p6 zj8-vO5YP)kA{i2feRdCqJbKb$bTLCQXL9Yy7lO+6XvBd1&XA`MdmJBTBr&LRTX&}u zqk0H9G6j-`Qh0?6H?AVwoTZ2Xyw18FgQ= zBVKZPd&zv$P_A#8=Z31&$Mug%M~93$5ra?>nutIqbdrF6;)ymnd9`TIg=Nkx)9mAs z`I2HeVYOMYs4O`VF72VSF0l@Xx8!Gg2uQ>>z3uM>LAVOclJzF%c4?U#O)%=w-S3hm z0g)(w|HZ+3s1~^O75VA`v-Jap8$qZoY(g&MWhS_`QO(+=Um0iCI( z<1bikDpsomOUTvSvf6}n_{upz001BWNkljq&oK1fiyBGQh|&I2C* z>aThF^oXNSFwg=iJeV3^7u03J%7hd?qML?fi6Ybz=euNS%u&}<)+HD7IaTekw+&Gs zk)4?Cenyf-1ggY^iXaP+!a$wVR3$}YD2*df0_k1b?z*N}H5BVMy`Bt1q*8D8$v5#i zKhHSa5*5Z6Ol_&H!y?<-bKY(1Pt-J(!HN!q30emPh_=R;@CfJd#^8*>T8DKGfx{X@ zRkmYpoX7ugWXm5zh;2H$fG~haTC8chb`;b!&()sN{|NJUGy2y1Y*ZKrdt z4I;7zQx?=!Nnsqoh zA#qfZ1q=LSN~R$YhNf;Ps)pJ~j8`bv{xyDkLy{n3ji1Fd>kzdp5x&ILIi+bRtV1ISgacEzwMmlNziMsza0vI6CO__+i3HQ*m8gbMo!y@E=n) z`yrQUg}Hvg^yRm_y3F}z(WC44IBY--JM^ABz^{(!&BsJ%Ir+C=aDwDFSBDfw9eQGc zDld5X*_ZrooO7!CRKxceJbaI%VTZ$HMi{{IPicz;83<$~ky_wP!{+*e@u$DxMgEZA z6$75iiui}}P5wy`NEHxdG4XDOzO8V}lKJI1=fC?M&#GIF9!liRmyAEZ!UWzyK&NaCFlA?IU zSD&8q<%>Dvri0D)*c)X$*$dH?qizLKTP{jN+(9Hgw2sjH7}eIF8{1SjQX-{8$_nq6 zEXL=2`{}3X>Iv?_h=E8+4~HB)?GfjOs+w_jdP>ku@ka@6=-6CcusFM9Tm)>?4*jPe z@ZR5dIPPQ|4Ia}SjTt2c!)8Tw@dkMUOhZ#Fsg{OCal`D{70hdX+x}=t+y)`mY*tsCeEkL8 zR3eV|IcO}-d+A_6d7?4C08MfDlLRZQE??an^#Z z@ut95E7sE~H|0ytug^JM7u+_2T39@GTUqZ92rpM-rr)r70nd`hWM-cSSxL{{^8B+e z`08@aGt*(yIp$z=%uX+5H}NDJSll(!S?v5d`3udn-V-|Rkb|@$t**KJ>PtSo%{i@8 zmf0hAcAhZmb=XTG4M2$yF*>Ak^ge@r!Jb&Mww9`vRN)S?`#$OL5f8dChY7?QR1}fy zAJ9L3LjUWRWX&3LdBHUKf-^T~R3>!H9404RO>@o`0=fGhqYr=0lcy1XuVd0IfN(&V z?r}Iw`Q?sdQ_Yw!E6Qqvz!MBJ2Jh{$^Kd{n35i62R9%wpkcW?k-2Nit)vKCme#wj9 z{TAm+77uz%_5#f08JnBueEG#o&Q>+`V2}PUp0fY`F}wW^y<$fA{ZcGJbiZEj|=yEXZ(9INaq<$|Qm{S%#+@Y7R=$%dIm^syY z!OLllN-bUMfd=PtHtRX3=MzMf5+5c6sUvJ=L*!#tlXK3;OJ1!6)hK58phK@8y-E8j1gIFI5lOn_D08UIoW-@~ zcAl^(16+K_LE2$I)AS&1rR zWWrD=@=li~@F-VM<_pGGOKz_#Owc7xcgeCY-9TewMUyG81=W1QYLT%hLej7ybUDk} zl#BBfXIB+@;h^(VpG~xOkyJ!c%Kk837c7;v+EIBDO5Kg zYSx(51(U05o}W!w`H;b0KpH7_1~GB9qFOJweSXSR?=$TT=?9L`tjQ9?2p>G6_vna`Oy~{v=&ok;-IQ__a&m@wwYND{FnqRWGddclXGJGN19UO2`ZPt$1?>Xx6VhLgkjj@rkLU8Kmeuyp}(|=YHKEG!E>r4?>5=IrYeZl;!n$w(g$IDWFjhaU~u*=dM3 zF7Q;m+jQ~$V}9|=CzQW_#BOAXuP6rdEu9c0o)2W|27zpQ__9S=~nHQ_*r8s}Eb zC#O97?2}gX6r{W#r|cgM*`N1_Pi`o$rktLhP=ZaZy$ic$X zJpPDyOL%pLH|!)=44VtCFFs+mIN`JADYqvb`pp zu9EAW$2JX3v7%fqnVEB*KfB`Dvl-KAg(Yn*W^1?k!P@?iswKQ&O}=OeFBOFd9ZvR0%OU|y&Ia?Rp)(LgkQe6)J$EpBZ<0fCTtZ#T`&sZ-Wa1gH9YfgE2GUn^+6}Rpf zl|3dL9<$TU*iAR2`mH_MHQ3oRHpMm1)CHU6L-xWAgX)Z%%UeFZ*)aD>tVww9HbH0hA2`yT1=F-P5mgQTqospA;EcS!%yWBQ#j znVGS^JZDl%PW+0*_Q>)}IQ^98=Tp8Md;IagXJ72I>>TmlFyuieYzePykAp#n4|g0V z7c-tO=aklC(=LNy#^AkOb{-7rBq5Q|NYx|h3^_U;GQUar=f#5Z^qd!8)ZAQNF@Dr% ze4J67f6MCfgsbtI>pDU{?z8v7Q}*6}#LmFdYsB{xUU)DCY$j~-oN*=DL@~SlnB8GW z6w2>7Q#_bk%GHMF&nm|Gj0Z!_FZ%`S%S&E-IpKCDSgQe}XpaZIjHknZVWV*Wn(E>G zsqh|OEos)5%vU)#xyD8rhl3smqYm8!q7c+uZT8y3V27$$(~qX4&5UZj;MKII=NwNW z->PNon$5;joKEmzm;Fr8Qx>4c+>qs$k)xGousE@in( z8ts#&9S$-@FBB-D5Ofjph%A|~8&u?*8P{iH8r5Z#2K3VaO-{AGVRG^{&wlruZ}qR3 z^#Vj3(jUg8X+R`G;w)kGpi5d#vFFz;#*#v$xNet6al(E_(+wpWjY_+y-9tLzE7J0Y zd=_$gGeTtvJDEh=HEwyyVsgRD+nm!)Nceur;Gjd+Z3wLR*`ltuCIWY==X|l@_SJ;F zB&K+r(0GTiYwCQ-BWk(>xvwU{>g+FfZnRGGY$qJ#msQAnsB>*$)vBCk3xc|Ak3%C#xtJ3Sa7lQ6y7r6 zM<~3^8e3d)b9TwsUl+KY$Lz;_nlSnQ*}Jo5Ns{Zp?|*lX@YuK9x4Yk}du!=#EF>Wg zF`U7Ww2@5PgJhDKzMCHOpf^!w(#uE`HJXvhNQokb8W6pA*Iswua$jQ8L*8mM2y{0H zG`b2v{LJd9DzhRpB0N3(_&J9}YdmS7wSl$`q_NI8GNnQ*1JlqLrhzOcFdH+QG`Scr z39TGgj>(7DoIO9|{OF3w+yZ~ygI6d6(~Ysi2A+C~wY+3<9rFCNgX7x7jzBEOEH9sP zadOV1mop|t%-+64)ux?%#pgNZfwo5B9n2tw0y*Oc#8eGJh zR&1b+F~;Dwr%;uSsMjX__-%WW0E`;L(c(|L<|hk0*jKF!-Z^ z!M(Pk<_H`iX*3k|vA{S@nO&8IX^loxXepzrz9)+ zTzTVP8p>V@n(@u+;ibY6wi@r@ALp-T=RO zE9jNVZvZ6>>Q=;N(Boz`WRWd+Ha+HYamDp=$kWktrmHEQ=`vbPIA2^-Qj%CP1HZ%F zXot6I2W)uV-^iX_uL!Zu?n=yc37ea=(JInQSWldQy&@M2c*cv{f1 z*L8mP1Ih2z6sCZWC+pzV;&bs8Ex}Q+;Y!|)74O87x)v;plIOFM55@&o>AD8*7qJwC zCWNLy|0)??pEayjC?~+*yi0pF84GSyj5HvTb#W_uV#`)}u`RS)zMRlIXO`4>%bn$E5yD7DU)ff=%yhHEg z5nC5094&_|uby#2z`L1?E^@L~14|g#XHzm>EsP5g7M9nbwRgy!$%xsf$IL&y;3Azf zy?oB)2vRGjSf$+L4zg9JSKFX}aF2&OZSKStiKVcMfVkJAf3QnGdqy|CX1us$`1F0A z2R4OUU}h#k)1zagtS<7Ol) zvf3nr7PW&-?mXzwtGl>r-9zm4&Y{+g%)Mqpve_b;WCXJXSK}LQo;~3-Y7lmJNO}>S z)&cD-=k4G%C0>LvjOD3fXkY_-x92BOntdI*y13Es~2Hg6j$Ce9YwN1!qy6sJDp| zgO!#)TLn=_Fnom5qq}p!+uxgDHD6#K54l)QS)4uNdstDU_vANyl`uvLJaluJ; z!PV3E`0-zlc`m28%L%8~IZBoIR!UY($W~J(tBmOi!q3OWrNj&atSCUSAisFdWj^M5 ze!<05vCwU5ja_>8_IUVUK)2@M|HJGHAu$3UJ4`TGkX=0EvY2o=zhXEkSy)|yWS7q2 zJ`dj7pjY$ol4HlL7PA2FNFnU)UN5w*ag{tP;5_pvk~+P?19tA+XTOusw;?ED@>}shc-s%F$XSlY zOpY&@Obcd4jOEpcJxy#&^z8%i=VN?ADlh}I6I1hSx^BVgYR1)zE3We?vsOWhq(C9^ z73T7U#l?zGbDK-6z*yd}oJ_fv3A*2--Dxw3Et<|0yyMI9@tvDpb}L=KLrQ0rR8 zNZZ9sS~x+5&p8eTg&RZE#C07aQ(;Si6ehw7QT~8Vcfp-;#$`EUdHyNKi&KhY3%i(7 z%!f>dD<*CiFN*1OYjo=tbvMVDeO(u%M3e%fnBnAS%q~}a{N6biR*uZ3EXEVAW?D{k!3sVbZQX|;hrY4|%c1?Ei37`Dq%amH#xJ!|Ykgf9 zPDs!j&?*G|qm0GPDbwdEAIy$;YUCK%jOpcwvCc@l4M4=zT~F7 zV)X2N9$i0Y<{9X6O0m4=W>k{;E#kOFs~*z!EkapCr9GOp6ZRSxi1{(6?@Jb*!3(uQ ztR~Dehl!P-j0{t)D>Mk95LS#C^yu{GEHgOICoE1r;@Rva(`P1TJ|Q1paWhqjMu%G1 zq?LrUBZW8EV4)Sgqb1|>3l>KOAEsx#FmmjC&SE%XxQbZ$H9DP`PA4EpT(lN!Z!Iv4 z8%F6Flcy^_xpBF;&F~WQ8;-6DCUy%eX;G_tbYmOaGBLsio$XChbxtvza&!I>#~iUb zvvI@%mQyB|Bd+HjxmTmti0L$ZeA~exg8nA6(qeF&bG16==E;KhZaiF-VWeYjE~m^4 zlibuuA#W=L2$bz()wifmEq2CN=$k3&#iu;^(IrDy;>eU@e#LA!<}|f2TQNzaK|8Rh zxfYga6V!5ccP3aj8N=~&u09&_!I8n4U64(#m|hDeONFH*!jNCk^RQB>R4Q+b67ogP z>}ttnTJV3(B`*VkEr5KT;k8=jOhy^wnIH`eHtQyveT%p2ifvcmDS@FCc4E<2*wxZ!=gf|5*UoKe6l0{F^ zu_gG5?WUy2GM*pL`FOtMe>#&yqM%4~M#G$!B@~`SPu5vsMGmt9yz&HEJ*a%u!+9%wW$JLmCCPyAtf~{ zMheL?U$V+m^0MG&Ibxd4IG&eWuSS@{U{z#{(+Q?DX*dZx!GL@5J`a*RH0}E9{wTi^ z3AhR(b6wu;Vp%dw3+7qLhvR}rlbo|f!K#3H0Y}Sqpz1-i&hpyvB$G@r%}Op-sk2a2D&S`_dwv92HrN&0#casM$s^9D4YH&`Gw|pQeB3bQaz17LD&8ec zq}#=|O?LKHlxc>cUhu)Q8?G*|c|3dpWr-{@N+Y0jTGYCG+<&mi{d-M%H3vtE*ZhOM z&hV0k#;qkJ8*SpV3I5TNo5_Uf$r0C`3(kj2M!g2q?$Xc(Z=EmEh>ciuub4kIz5k=+a^K%p!0N99NRNF{PDaETNdL z7@tp=Ph3(Q+$bX2=#ZRT5I&kS9?w`@oN|43%Ek4JL_4&_5EEgSQX4yVaoSC>(x;iFLW6$Y)IE3$7+PtKpP0jOiJZ$(ZZ$oTbsEw0pD$J3M%}#e>5Z?a0P01Q|aW(}a|m zae~w8VCN&u$tm;MlwcSd>|k>nyb=Z&A*S7+83pV&6_=^t zaW0UNM?9z#cN1L8M8EoOr44omw~=vhc!5>U(I20YKfB@T;+p4|iZWkk3tIIy{@?*S zdxyNe)#R{k6WeNC;sO(8Is~}$H%>`e<}Alk#;4az=OIfS;(9TGV-lGBB#88R zLW1F9SRt`z({f>$E*KqMF}70X^>x6j#6XoPc78^7ea+L;f+tx)S*`=Iy0(Ge?a*yE z*$6G_N@Aq1noyY-MTVW7k=|VK==8R{Y6;Tx&~b;jJ)qZWupLMeYu!WjC)U9c)3ynk zF8)Zv*@ASw!fAOp?HaD@;D}cnG6V<{q{L4e4E8q3^$Ck-*PNao^Xy#kXq`%nz@QW! z%InkXv>5Ev=+qrNQ{Gng6$tGhq9(RGAyDUdhG6bT7(oNicJPf?8(OXVSWZT8HsZyLYc4av^V_KJXXl|dpw{lut_8FLi%`@l@)h;kg8kMN$I~-TKfK~d zsCCaVSK}oIO?#mkvEF&Z#PC&nAV{0({g!aY&lw|hkjOE!m=R?5@xV=9F zrEtNl)2MaW-EFeb4hbEJO^DIkq=95({t|h5&eiiXPM#fAUNq zw2zr=5G@qj(-g%SN7qky{<7e?T=$VHijq>97-0{0b3nV>W)Q72C9P5-lANvW8^nt< zRwpkxKYhZL5R_73Y72>`NG)7dA`DqA{8cKI${VjFTjk8=3yy{>Ru_W5H`nbHL(oxZkBv;*ygpxRpV5hIiDY$%*adn#V;Su~;{-o8bG8LX{ zvRl_YY$)zTl34$0RhBAWuPWeGseH|Z5E#NB&^}se(jq4>a+dj$>(!96`4!W2%5b&r zu%v{-QYH;MVK><1Fy7;Cw8NI)NB>fFy51ayfLO1~7-j`Lot7-}f?>MO@_IQh7~N(j zEV7cLdBG<-=jB||vII3#a6eQ`(vo=&r;CEC%iL4yx{4q z;OVqr^m9syeV(HPbuFly>vZH_^#@7_fzUR(-XoHV?O6(Gj&@HG_7$U5!K_qZ#ps|- zd-ssNckXhq(`67_B({kXHU=h}dpUt)g0{eR@XjY(OmfmfgLH|y`*e37a(KAS!~Hfp zy#QC`LQ*i;9dm!ULPbM@QO*<> z^5pgs1DF!j^hDGFPv~3au8`!Qzj)lwx+B-$RyykkIvM2?G6{6!F z{k=nW@7?F#ogH=>7A<>S#;?RhHhVPFdu)&4X0GwC=lHV{;nr|!eVR?lUMD7B+^`fA z#@2wO8PWALzUQNTkKWc4OO}*UBlYaPWpEr#vo&a0EQ^_CF*CEp%oa1#h?$w0WwFH; zGc$~snOU-!nRcFgzZYWn|3+-Y=AVg~=&q@*%sf?nDl3zDilJC%5^h!)j6!Xm0$?+< zv3YRbPw3WUl#vra{~SrE_w_d*d4sIfrndHgCHkagBV4`$mOg}q-E6t%^lVm7w$+;E zWP?I?ii`uIIR3j$aMhl;k0)MD37y&m&qS)+uBGhUj|JX&*ULKQ#dTe>mL(ZrdT_16 zZhA{o96_sSz~MzN`r5ptPw=9;4kKJ~D+GbzlZ_|z<4U_<&qg)}Uag!icnF0XX})XN z#RDcDC!~r_=&#;c_?dfzz(T1#Pa$1CQ|7RG+4b1Yul)^79UMZ&VKL|FgB>w*Qhy;M zd(d;gWng35B9yc$czFSP529xU#Bvy0HKpTmfAhb&hF20hpN@x8k9mS0%LrF8!Nt(w@^_q)qsdl~kvY(P9UA;|&#Y$rVmaCYKxstd$&s?qr0OTOvfK7VJ?5&A zMqA_Wp(Xpf`I~$B;G>^+T@ZUZ3P55UFiEcn<~wUx)2_f_U02Hf*G0!$ zg4nL)VAR?E#AdhB2dZkseZgS5Dxrl#EWXCVMp>PHz%_d!R>7e9wK7@wcQ#aZrOk~q zQ1v*0TRlrox!B**ovK<4wRM0#ZKZ+?-^2Yqcwj+;11Wt&7v;e4=qo)bQGyhJYUe zc3Y=TEt&;}w@DwY!T~#mEiNaW2p8+!V{#n^?lwm2D}{|qCgpg@%W$^~*2z#swZhBG zok1nv)QT1U#CiB~A9?ojY?Bcr#)!1N+BsZKVm^ApYS#h}7ldm?X=3o6gR`;Mh)4$U zqmd69A_Ae_hkqa7Xq>nPC?`{miD-fw^_$r>3d=;8679IBWyz}P(8JZBCTo|pI`uqs zLi4Q?*)PXfc;c8@vO17hZGgSMV{n6~He#L+46^Qf)=N->=GUJBLKFGT82MU0P_;7s zT93fDU;b(Fd1&xhQ>a6o6a{s*YJ4?T=#h<7Uypt+_Ylze=i>0}wzuc_4imB2a6QEyGf@A0cbuk3-76dSfl=e9p4l$TE) zn&n}bZ0xNqj=oOCL!<`C8fA$Go@y;i8%QpGmazTMB%46gs*nsZ$Tt%MpkDS z$y_MD!l3*dToqu6O*JO*>XJWDl>15+5K@HBMJQh>w7=Y76xPF8BNsA@SZs>|C+1d{ zmoDe-C}7T`Q#fb7Qc z_Z}B8&yO1r0!f9Ezs4UQc{%>l^Q2f$I5$)0uJGBopi!Dqc+G`Qg#l=ga;85sK+CbOpE!Z?}?D~X5!`OF*GLEb>;1D!y)nL*}8E5IacEFDtu5&Pf zjltf*d7|jv?)(X&L{jWxf2>8879USSt~m*m<0cl9=WZ#`KybwhVgCZ$(_J94J>o)T zUbL~GeUT=H7t(=4#N^Hi)=Xg0Ih0LfngkQclMS-XZ)L6F2H>1QCZ;kd76lNc1DaSE zKw0jYbJE9==@a%*Z{jr@frLLy?7E`folGMsZhISa}3 zF1hx9y(U<-H6}=YLMR#FSzd61$d!lFlXncI9Vf)g~F-I1z3Us{VEvh~aVjE}L zfs8<6*V{(F6@sl;g>d%mc;Do*J` z(tpsimDeO_#-`0IBNM3{z*N(L&1gGYD#2#-%0WevU>LQ1*)r#EqriEp`Vp^mg(kEW&19gwXw_&2Y!Sk>GI9!#M}bu~%B_tecuV*fI2PTm~g( z+t{$4M{CaS99v%Lv)M3N7Zg%U1sBxb^`fO9ejv}KWQeF0?IM=Eb*Vq%6wpjSr}Di3 zA<-t&YZGkbBYCFIAf!i{=P4U8rmgrQIo&kI6Eavx{C{Yo+zmTa6jSLK^!H4AR zD+CStw~iD%zjY;fVK;*Lu9?xYDOTIosYf}v1_2kf*PFGS3FSS!a_Wa6V8Dyybo2|* zO_I2eSxRcOkzH1f+k%_K%Mr^;mRXypH&BvHC*dOCte!UHNS4Tsy@|Lx;|MW3f}$4A z&|L8|Ha&aQPLTW2<~hL$HNZWgRl3-9>xM_yq#8pW(C`I|XJkvEp#s39alWJ0kiUcH z5@EMTE2oXy?k=4G771j@;9?%u*oZ#R#(@mivNx6MMXgFDn}}3F=^jo|t@fJpFJuuS z8$Rsc;R+9oTo5fa<+|o@>$$U7WkBWVW}TVnu(T^`M!7B3STr-z4PzeUlN57|5Dc$Z zI!*H4FnvG4+U1hm0?9EUlNv0Nt(up4{&qDr}UY{$>w7`i+2A39sggpt{xAAa2S%D zclmyMeKb5mXcQKZjfeguLMRVX@>>?6(9&+U-)u>%ps_6*)f*o}`~G%i*?z2-RK|?MGt_ zY0}21b1^dZT+yg|{KJw>D*`sf&=6D2r+}D+YbhRqSnGV>!*s~e${jX@7gxMyUUR#I z+VN4BFT&~AO+i>oSKwh8#s<;>JwokrZC(_%EZ-lEOL+$-+fbg>^`YPjqYS_C@WSz) z)zQho*~DX!i)AF451Qv{b2Hl_DK<$n5e2@^NY~(Bp3v5O8yOuR!4k#6$4_&}1^R4WitQM*t~FDT;MD z-MFz~MC4Du<#w?xsFC}6YQDZLQiNg{+0h-#v8s`y&-T>NWlf8u#3Q|=7l)`QT+sy- z{4g+ZNANbR{gVmN$vtN(pOJ>%_XC@wi3oWBHo$!M^j;h%IK)saxRanT33*)YjA3yo zib*jw>Ot6htLX69rAEe|NoPDy#Ng>{)d!b%?EcaS?7*f*;}y0E3l$PE!RD=yt&yb{ zt`==%yNB}cS#2if_}p4ofAeFr;j5^W|?d5y=jQZ%6GP3?J z{R6eVvx#hT`ynAjo@}}?AwP@^R+)XB!m2iUF6fqXicBE)Vd9}?YifgF7-6Koq8)Ll z$iTEtA|gPuagdtS$l0}-X#R-otI>h)7d?`JR|+H&0VHvumPES$4_71lmE6#%_7gd< zGB5}qsK1<}YhW2+TnWxEl~<~LL&R5I@tJK%(-+zwTJY@KxqVS#kkMZj49b(2I1rGJ zNrRh1w)mBtJU^JLuC={7JQMOUjY&OFTtx*5-GRE+8=Q0W*cL;UCB455HxzwY3z_?L zLVAxrUV0jU+khWYf-fs`4P9$e^K)KAY%5J871&9%eU#Y6apfbmlmeq_)Rv*8a$_Sh zhnE~Zo9N)s-b4ivTM5G_M$7bf`D+x-@_jLi=`*V?IZeva30>!IH~%xybLMs2^Hd_y z6%lZL5hTH{L|1?ZhQi$<~|yuM`EJg8RqB%r)?Dny(wv|B=Y_I-U^{? zUa^Q21ht)sY#ERmi0aNm|5P3o-Q6|(v{s(=*!_fsr@>g!K_KNj&GD?Kd(HIInIJ;R zC9~qEUfcr0xSpf8<6sG6Nftb$+B~u3iL*B0{qO*Dnn^!e&S$3IVEyNw-_8~v=9~Ru z$z!MBZtiEAAF$tR?TlNIwH#P$7r3whJ)Zjp8MpP?{e1ax6a0BD65jo(U;o)DxTAoi z{k9S-v-W!Pu6{g0YNqfJ`1tv*e=Yd7miBh~sK({@q(Af7|C#=Irx)bcdOG_O`RGnR z>JmpenwUR45|!xfE9CjXsl)$j{qg$wZ2YnC=`_Li!qB@ZxTo-O%>NNZbhl#JZt--2 zYP8ib$Q`|T^eo{wl>7O-b=CcGs?cKjPQ7?RbW8Le<@0e%bhixP7DU@GeJ_H)iZ3sgX5ft*vg}VOPn@o`| zy5YUpLi|h>?0%7vPPpA-`$YTTdYL>GC|tTg&><-O>HA7$iG1vTZqUP`pOAvXNI^*# zNiNq8DqmdXhY*k$UDr8#sW%)_k$I+Q)K zBjInT|H%RLcEx?^%VJ`>G_4f$As;8dM#)i}zDJtleG{l4ug<5vpt)*OkthDwLipP_ zr>j!)4R47gF#!uGc%%d0$-#5rzW$NR=O(#?QM`~Ut4+bFM7$Dj*J4kmE|4Z=%Zywx z-W+GpN{px7gx$CI-~i)@Hmo(9$(~JDVfR;lo5Vk(^gn|&I;9HP0eX4yM@{UxV=j_X z!b~=mDg>`mC)IL|fgaJl5MR?6eW`?V$3eC7B*-bV3~*_u)r9G)ncVL1r<@DiDciPE zb!=4T3*Y`8KhGvw{8Vuli+bwISDtUuLc^7V<;^$pY@);TdE<^HU4dsuQJi-v+150m zx;wEdr~l_|awIRE6)(r(=2*h}s(A`~G@b-d=iw9fkteSmcSXVIes>p+#HbuAomQki zR=KA1KZP_H|X!A`$+UWazI<6o6#U&Sor}KbcdeQw-&7wx|^&y)82NGfFg(lYIoj5 z?`p%mls+~4TAJq_m)@-n^eo1)YIs+$dAZ&oNr!D2)J{ZK)%FnDl%;X|-t#19;JTl^ zTx-Ygx9#XVi{UfC;M3@KIzHNPOgNpzY+!H$xgEsb3~sA4nF(Q#xY z>WJ_8iJP&i1Qea->Xv)B+1yfJKgHYWe=fQ6^Ee`9OX}1yRIJIw)noiQ%x`LM4uwbu z%S1}JW}~=sf#ShN=eexV<$t~5AE$V$u#ziPeuk1Fu0{B^R@39TA#r<38T@aEYVzLq>f5s|4J+c`rG0G!V#oSF?nR7q{vi)tdo>l9~~pNc9qO`U#3wHK5FbD zeZq@bR61CGi7O`MvzN$eok!^MpET9K=JSl-v~qhd(n++LQ;28V_dPY4e{{DAhA2u{?SCNY(+UdJ9%BER$cuEm**ewhnz@l7)32Id{p#Kw z&DL|p)JZY%LBR_hC{-DC@!DV%V^i@;)7fjj#~Cya!`g$OazYD9jd5%{|_nVG7q<9|Db~$g(X4)n!801uLpw-6z0U5^s!I;tN zY*fXiKwSmLws`yra!7KC+r$-h6VG1H1fWvQWi@O2W(3As4jJwCyV7KL_oN4C2c?)? zAM;Y#Xal_B2<8GRvR{$Ko;;^4^@t{gGi<=#x+NRjQ!PmFZ}bY$#4mGlO9$<$pH#^Y zicK<|3;COOlK3I28)J6(mXv=Nn=MWRKIyWY%puwuMW(}##c&_7hAk3#ggH0!3wYk< zOn@Iu66{UaS|~x1-MyMhUz5Qfphk+vn)f-fgP7|($Wm}Ak9W!GGuYDx)GRiy3=$lB zj?wM_UH3!aT5jTW=M3W0zqb%1@e4d_o_I=;k8S2wY>k23t#uKngk4&8%g=q*qEt-A zg(Vg?u1`MJ5qDpA2L{#iw*E%Re1V#EzFy6AE(~HF`&&p6inYy2lK*3;v-Z*R-*)Mt z@^h*vMfzY#9!|&zwlH|EXm_Mx+f&uCcD$Qqn_MFew*ei{nkZO-hYt~%f$7b|;LfIL z?v~BqSiFK|9^K1hkC_n|P`-Rk=9QCgt*#C7JI12%G z72#S09|o6cUPzXZ7N_xM@9~EpOJ3gHYP%fUMfkgVX)Vx?dXLQSG|33^&{M}(c?d{) zkJoQaEIx(Db2oX7*KzLIY(K=QSo3#;3O=oQ`}D!VsFHWK*!a=(g&Q% z7*}OKX|}NGA;2ZBpxHyH7mMNu<-Rebl8@!0{Zam5&?Eny8ye)Ht z_Jiu^y1^jhI;68bS)*HFEwzJywo9Fp3x>>>{p)|A{9iMiU5Hg{v+^HrQdoq5 zRM1*Gxu7~KQ0jfIGx@l!>mT6h zQCkX3rC&xue-s%EQUpao#iCNxR&lBRKmILc3!vHq3hwqUfG~g&T1s7Z5z>{xh&K!*}&`ZL$CX29yo2tH=y2O*(+y++N0FK&T`cg-8 zjEeFvd7e?;CMRJGsa|}r3bXIDw1fyPM;Kd^$uT_s^)u?EYSstM9g?oELfqi5XbnC+ zO$P_+tBY6~d(@}4=)c9c1KmAhSf)2)kVkJ3+cgtxEk>+G)vNSvSN*aZw%q{51m{*& z<6(kFPst!0UR2F6?LUvD5CGDQ$IiG|yHLN_mFe%_rPBXySG0@tHJ|gxf4+-~?!PL6 z`uks!mI1o|zoc~@)R%uZA09qjRNN!;-z_k(L+bt4{UnZmsIUGr9w;aYGIF4S{x8w_ zN2mWK`^qZ+AM65V*IxwWXI;MStb#Hc=`yTN?*AGD#*kI+jzT?mm-w zGYe7}BfT}e1NtnWu%QnawTb^9btR+;%gV;np1sJyb?g_JGJLIOi?I|P`M+PIEVkV9 z^+L>UR7NBqtz$mA7DDI;Hq{@@|1<Lj{L^laC(f+cWGh127Z5XvZ?!zFLglt4^RxgiY=?ALKa-7kGP9! zj+g3!bpaI_KQ_dsAydCgh9^=?h-cb1i`1`p|9<^%aMk+Q8_i4r=XDkLlF_yQ_ z{$*K#jLbi~m5wmJ5*w};&z#MiE)-fu-Z1>zI1+L(K`wvO=W+SxG=pzyIxNPysE0>W-6imPLq>4$|gQqau)1899Y)w9(FyShc7x%*8GZ!?TLIu4)dG_ zYXH60fLh`hHu5IS|AweH3lRP!V3n28jMkY5Lu8r{WjiN0#tq# zyRVpp{$GE;8M>Kdq-X=*#IAw*mLA2JAQ=1)QvU|^k_CsAFq;{huV8%Y*o8O-@26=m zfNk$>2i6>E%D(<<9Ie6*1n3w3FViJl(2}58}a|?u!Q60o^%-a$@Fd|G1OP*a%w-WfAnsC)cN`H zsH(}5Et?+wI$lzOEm$~rGGjX!9f}d;(YtLia2;f}<>G>{!EUM+QiKcbbi z=$KSw3j<`qM#RoRvS?1VWNQmHY|xk*Tg7TQN(*~L)CnnvD>D+-Gl@QCs)Hqqhsg04 z6O&P0Ku;MmCJu)Q^yg{_7ObLtXJo&8H;Um^S6W~fYb+gR6$)=8RbnzFk52fDIHzz-Ar#iO5t>G^axub3)$bA$0v z$Q|>|#9KN}9ZoG}Jqzrr=hOu0Ok~lhfj!9BFnY5QC{eeLQo8ENDJ5mSYsh%PqF6`k zJsEbB&@NI3d}6SKBFJH)O2GkxfRQvO&d&ca_;s?#TLcf-U0q^?3VKy#IOhbFgj2Ch}C!)aDCi(&;z?why*U<7722 zZ*P6@`5tk2AkA|f{jru%^^i4y&4;m$o>UiXi86lKb$4zo0Pfa61d!Kkx$;GZ4N-&NYsCT910-dN)JdWKj9VQcwR4DeqZ=PYL zbP_0-9ZY(?MnNi!b*`K^(azkbRJoa>ZoEw=`>ty{yJ9h?bGa=nqsd6fHBf8#2W?MB zE+N;+757Ul*CrXf%jzqGcZyQXBThD9&EuW_kObP{sh#5G2ZkMk1Y3J-4WG%TUr#}& zv8~`Q7K0S~Q@N%pot@5e3iY&ItWG>Zg3X;e)-q55?_8prm44>eTaJ+jZD&qb_e<%~ z3rAU0-`w8JepRQsIsTV01&UTCoUP05Bebz}tq!uqNr!!u*>Q3OvgnDaRvSWd%q+X0%yI-7UeB4em$hd{Y4*0Mm1{z`9C#1+vp=4k z7CJs*ZdGjVrtD_a2w22Dcj0ZH{_p+7-M;(3!uVacJVMGj-8A6x2z1u@KIU5^9j}s~ zuY%v_n7xiLZg<;@g~8IHH~_WvYsRNmVP}7%N3Q)z0DX%LoqAyt^V6~8-=33~>RBUu zStlo<`FN8QCdjaq97b}9_R7m=U_A_S5Ba-#5%)AP#G9~c!{C$n|Y(9<@wvH4|N zBE-waCJfG{!%Ql;X71=YT3^wx*Ektd$+6U&;f%YpWaItue!4Wjk~}ge0sn#~Um#bv zheie=pV~Lq~s%XPJc$!UXuY-4nfqDmVG!qgwo4L63UF}R-HF! zT8Ra3HSu=^I*JN_jzB-BmrPMKG*{KswTo2$nuR6ym5OaAVwBJwAC3i?VUFXOCq8s0 zpt`xG33E;!Yp5VR{0*D#B1-%nly(qD;e|a;hU+uc&?p0n4G%1nmRQ{rWzA+x&6xT6 z+N<1}N;+1a)>g2Zdjx(-UutCqQG*#7=i{$+d^N!VnK{e&GXVR_fUvF+_hzxJC_!B| z#~eR3l+vEO0^iOXEuMs|ER8Kj|%&J9w!yKtCkn6G(l-G)13A?5N9aJFE<1_1+j#LWOvvUEf zb1f~jn&o82I8;I4=oabXs#+hOYAc}dnqy+QZmm?({B<1hBgCuwCGhh`P~OAJcE+7? zmvPg&b|nJC{hj+6H!W8=+h@3bN^s}%{Z&MOZ>#WLomhQKD?bSW;6|D#_)>NW7FK;Y zE>CEGH^}&OUqbn|+g0!?-@CQ$p4qZ@JQxbfciXIIOLz%)^Fe6-2r9@L_xM!Zot1Rf zHKK2`{(W=35NknnPeok+gVihdBeJ_&-G0rk_J&AC&+_bwC@{Tz!hc`v67I@nTQv^* zY-=a#^TY4@*fOZ#^pkf)&fTQ1YL7b@*t^>-W`7cJGoSRo*ueleuTOYNDbgjL9R?e`lt}uyW+vHJjrB*dV`zu)M z-CpCbNJm+IM~YNXYoyXkPvxJcX$Brqy-CpcTfU=gn&91ijWu$#YztD9>^uqbZKKF`$+`6d%FUAdhu|M`9NoGH1qC(^ z1tGz*8!41kk*qB6e2?)IecakVGvuY>2I11NpMf-i~O1gI^>k%%-s~qk_ zOFFZr0b*O5vBT<$2*aUM?v9uKkAhrgd*+vp)}{oB$!uR@sQe_OpmP~=S$W+DjyHI{ zHFfu*xb_*hU^I?J*;FcjlaSo()wVPf-8)_QaO~>mpug?m6Mn=R<}-)NbDG(ZA7@Y^q2*4g$`h5b>6v3_Sv~ald z`S={nTUO=1{lE$81fR@B;<}_v_mH~4Z#+T*K)S9hJn>KxAz<_K?!T>|nES|BcN*v| z{PcgsvYAMPYMP<2MQ78AGRvBV3mWveEZX@!nfEtI?iC-YIBUxIWEqtc!PXO4@d-Z)OG>1qOe&N+6TdvJ^`O zHW4t%$#m zIY#E^JH7?ja2>o}ae1pMV{nV9G7l*&5`rqqAfr4pJ2sZk5QK#B#8|k1yYhxMn#{Ov zxnrJ(W)Ce8dXL;e2+$G?(gkEPFY$tvqcofmEp@Fq&A3kU){Qbx;`fiot8Q%?UGuUU z?8O&i)HmW^n<3j*8e2dTs(6+4B^|{|9sgQVJWhNp`2D~Bz+8R^AH^(%j0Vjtxc|05 zl=zqf5ZJrhhf%+`r3#_;AkSb=BViz@vn$EmMd zFSL*C?-!V}j79i*Q`hBAE|M6W-m|x!)teVbNhYTPsn;D=E*<$nUB3q&8U*%ELgJc( zhy{*h%xsv%q+8J0FqjhaWt?WjG3j1S_9anG^34J#sFs>TuV`h($cLZ`$e%%L!?A`k zF?{omYelcc)yiY;D`3*#H~QoZq|-0n>$2>8pHtq`k>|i?_4FP6>po0(iQGC^FW&(s zEIa@GtYS`M%g|KEu}II{#v>xeOq8=>tNZdfH_|PthgOhuEHrD9VDEeJhILIHu95u9 zxI7|uw8UK$Mezfjep&pj(R;#o3XAIaD%<)FZd2N@$`lNa4CZX6Z7-Mix8J{z@g5|# z)HO660Q%HaOQ{xa#%-2AEDV)AG*re9qYJ?8)*)vg%Zk=7 zlMxu_iZ&pag{-p|t@>P1Rhrk%DmW}c4%gO>-e>R?m1L*>3bu~b4^6fB0G7wJ z0q1~PU(=MvqGVg62ydu^O^JErw zbJzT?1(;5&0Jy3RPRSak3r9^V1Aw1+iX-y!(5NUnv4o2 z_wz0n^>_wx6&@oVmm?=xmfU;xpo#V@olSQ5G_a^ehSv&t?J0yV^dro1Qc0CuEP**2 z@z>M~gjuA`ViY1h=(pIiGU}+vx>6=6VQWh=;>8~cSo3UnzcTrWGG&8Bkx$Q7WRsE^Y|Gfze zC@yN5)UZ&Tz#tq^O(W1zZf`2CYa(loQ(y6&(Ry@skJ*4uC>gbc9(N+y3^!o_EZ|zv zX? zKiYEV50ts=!-W$%;3^nXRS{LK+lJ%Xh*CF2CH8fH4z}RyVIz4scp_PkDu$|>;!+M= z4-mp3iA>J2S!I@tqzOvJbiSVX*}bQfY1~_K`6Rw+tsya8&7#IOmXT7n1SiO0qyvuW z7*%Pta0$+WGPP$hs;rTYG*kZ95&10$#6C!RzA^AGT zBd_$0a|i+xZva9nh|6;N7&Je|PiKhH=3Q$3J}A#J;wa9N!#-l~??VNGS>iIRdLBTk zdMN!d6#>kDm%?jUMaZZf8dvy5AaJ;U75<)tQnHumF=N2Pv$sDYk`u;05kuIOG>gbz z%lM}b)+3cUn=$rt(5M9pAc|bXmLZoh0VRc>7^5;!C}uF2 zSqM8JJtH_PpKaV)X4zs!>m*S$rx0Ix+#J3b<95}cKmI!$G2>Or&Bne z$#5P+XnKHEf8lqOx3PDEs*i#00R-@v=V0xN$+;-WMZnfL1W`#@vf;61QcD>`RP?u@ zdIR0rjHh!G+rMA|mwOU3zN&x%eZ#;Nz6goVS|v5{G#H7n!oh&or)#R4wyfZ+c=wgU zQ;EosX3TO7-MyBWgn`S4;>>yY#IB?n;Y|VGEAt+Y8<5-je6&>{z^+u>2}|m7(5~-k<2KJNp__rM!@GFQktT?JHpuO zGr-f`RZCODosl~m)G~^76ez$(OrKz(NGXCt!`)yoJf}L@azRtw>ns>&xfx0KP-B{) ziHy1@-WY6MS`IIr`7V{&kilF@Mh0`Xbe_}oUf`)juc?kc2m9gNcVU-PcgygJ-y=jvA2IqYzYlf>|0u}-+puDE4&qG8?auqOl;Q}g% zJHUYH?~8+D44a5ZVO5jtnZbXx2^L$IoAAp#I+S}vGl@qlzqzS*agt+9>PP<%15Ckd zs&u2W=wbhBME!<jDvPam|aXnO*-|AVXk=g;HxyF-k%s1khL`SjjY1$;9aHHAh|3RmBxQ7Qx1iKZ}NF7gp|oIL3KiClU6C~%QO zyAz7w+YE@;vJw{jO&G&Xk6;Q3hOxMm%+RwIUzm`{fl8u}I*%i!#5{(Tdu&(chd==X z+m}ogVNr+@vnRg08_%F1m?~0oE}ab)wz~}emHx8m(VzDEJ{;gZ`8+}`F$5ku z2+pmJPn}~0blTKf!zfpX+YtLFS-lE{$E1Vb0D#MYZ44)Ey77uqlVO_IJ6q|9 zv{+>5cqiS`!ahgrzzrL15H`k47(X-^6I9jfAUdDwS29Ygj0GoY#)N;5R)rh3dH`ek zgmiH|99D2)C6Gk+Er?K8cr7mxh^`^RP#j2aD`Xw-rkOjv~wl3p^R~3=*BLoDs z$H64INhr#w+u8k3QC{EPc2~_s^2vzh&~~{<*pRWqdiPE;Sf;4zqbqd0FYvdb!I;1} zqQhrFfH4=2AXTFgHnGJ1>eG72TLB`GUT0YzQ@tVS(o>O;FhYWJG>}b%10BaFCj8x& z=xn}mDQNQ6<(m;mG*=x@S;dNz_ut09mSE8SIk2+OjG7osH&J3wP0NwG_?4Q|Cp4J< zl@11eVt!VdKIRK5pNHDq zjx(1VsCkWU;hW=zlZdpVOowAx7k@BUwq!d!jVh()C>^F_#`7v%FIeM<;A_t=&qg9- zFC#-A@@{?`&5ioXn2?s$@05eIl8f)?_7u+;h9ni3j(1+ZexkQ`U!+}8lvYsR>BOEi z67Kt%e7p~_{>9FG{D?gi?vhW*0LJErKPUlAmtL|=OPvwNB$pr|b8ZS-U0~5YhpC$K z9(5hu5q>3G;hHpdw3&&fRQWGtEd|rV{SafGYXiRX2h`3@7O6Lp3h|wQKu1^k&tq^z zLjL^`q1>=Ak>VoCA>nxvz=Ouhrs>fFXG?7-(3a7ts1@eH`&l39d8Ck(T zzBo}dZ*DWHDR|KHnXvI~Z$G8`B+V{o)w0o#l)X|+;IoccnN12C#{fqe)T{LNVmPZDZR*DytzgW+i;)BlpKkSUPV4<822nSbHVt1Z zn_F5Nfu}Yi8QHr~`Zb>hxk$FTx9;ULM0)eRnd(vqaZf9k-FycD?I?Q3dcw>b?&& ze)qXsW=_;ar!sHb{>S}s?r%@j-R^2k7COJ*SM}@eU$i@#aj&IGjeQ@&cXo^k`8+4z zOOCQ+rHe7EssrlQF>L%`SZa7t3#vB6M`}_XpL5Yit z5Z#(gw8o>)(cYITlZ5Ms4??{|lwdNM1XX7ZrRJ=_u##vAoFrVG4$;|mAaFr<(|Zqh zY3b|PoC1!|vw+*h#?f9qcia)iTw9#q4Gz)AmWur5JNFcbQoA~gvDrM~Sv+A4ow~5^ z=$`5XWtZi~W~Cv(0TXgcakk7nZMJo|U376>eWuSKn}dh z^n<*2}G>tCTq?pt84p*O;rKa`<@K*xp1w<-``{dqD zn^;2r?c*8HW{EMBrq~4n1Hd4OVN82z#pf2pk5t#MMXkWl>ac&$C3N)l?YZrbB*C2} zz=*EP7;PO4`mh>x7TC+vcT5avDI%QElb$2mLn69<4vTxf`QWZ!glcGao=Dd709qtE za7|JNAr{Y=dU&O6aBdp8c_rb=fYI*YXA&{-c=t&V3G64Wh5hVwv2d|KH%V?4T z#8oco?M-xle5Eeqq^=~?r4W$hGC{mLS$am%;{ zERw9WPFi#7AEm@FEU9TbXCL6y8&ar*cP}LTJktu zAh*>!otCBp^2+iD>_+c(3OruKmCiC`+N~QD>CO@In|m)Fjr78aEdEBRpGW96O}!Iy zS>I1yIVK*09^&Kw<;cQ7?Gug!Khe~$=;$s$S>9!xX(Ew5g(LWp`0+vbDip?=v^^Th zl)I-Zcg(-ZadnN_dGF#{WmnsTT8l2+&#+ikaYMaVATs&z=;14HV8KONtr?9pk+00F zt7UpT>cX*<+FF}ax5}iUuRK8s|3K*E=|0|TFnZiTR?eTo860D}s}^@Zfl#H_-sf38 zkr+^#p&}QDlbwC_bc{2l0F)ig(1hFsLRe-l-r&mMX`fZNwRSBQ;y`2bb&Z!&P{PZ{ zt*i;|vy8Z@BbkZ(DJn0$m>gmQfSC%c{m zKd>Py?n(68bL!f09SR^b0}3+IWT%eS`6Hg}(XXu_W$0x_?d&rDS}U;AbK#+Tcj-G` zHXL#V&7sm5UnQ8@aPOX&^_7qq* zx&=NHhBoyD^VlERi~sKg(!w+zya zQ8Bs0Z)$!W%jd~2qq>QFrZeWtV9mz-31lTOHvy$`_V1u#Ui`m!ddKic+HhO=9mJY= zVs>Y{??zWx~iUAYu#&sgug_Q>uuq?t88VG z!SXbYbQv45h$VSu+&ZnS^FPxpZPcac>y8hJhGg)3WOx$!3{gI!JnaY6^?&m<5M{vEzH^{R30tI8TrHDa4stp1EWJHmOU0%tQAy26xKWib;FIeNJC zkVvrxJ2rv=BH9Xv=-?Rs?PnO@NeF_@hs%1Tt+_q$Yc%1VW#W?Gm~r?hc}qjp+~z$} z(^mD#Ci&kgX%y|T)zIGXA^yjy?{;qyQ|A!7dbViW`YOo&Y5 zeoPFqD%_^d{(({E_FnBu{kvJLnm(go^$yFz_! z>HKsWxV^wDPyE$Z^G2fyoUE|xx!WTVd*s`>jk&n)J{Nj;;oKdOb4Q>jzF5y`XEAyl z4>xN78a4y1kR+a758&O~ew9RgxjlwEd^bdv{l?YHIb3g`vj8c3o;R>FyzX4R987zB z-7)IyEi)moWMNk1F~7&a^YLFF6-BUR0p6v<`#Gp(o*XfqT$YIW`9_G47thW$#WBf~ z2k^X|(S6MqK%-axLeo9MwJI;l&A)RuY|7K-ov!=lku7_XZLhO1CM~?QoZAekMwxkvER#~NKqip7y3Ulg0@dh*k0gpx98U;(oQ?-#_2s#w<1q{3C)g{GEO|5a3&bD7NZ z>DPsqL-ObKGz%doljVd;&$I~WewEOjse65PP2xJ~_`n=e zM0#n;gHOmZBsK7BG1b~N^yTpTVF;ouf{gcHBB0s?4cIv97BXScgOn=Uy$sK9$wZE? z7myoBb7a$hRgb5gUUQF_U;q8w9CLL}=*PsGj@d}4AK{HmW20dDoE!7s?sp+4+jXTRf!9U3rGUB+JcF|nvahXI+1W_qZ6ne*Vljbre zJbXL{w}&{8(Z<}>8`JYK)2)5qwpR!{K=WQv=E8TN^KHHApQjecc7r$?X|zsg=Q%L{ z1z!0P`hJC3ezKOwBeeBXitlxg@bl^vB7iSx(ok6Z4G{@cXq{y95xKZg61D=w?I7lD z>wUZ6zd1#bdGaO1o5_aKa$dBPk4TsL0x{INW~sg9MdtgUxaPf-Ty#=_wxsNxcg-;E}II`$CtgchgLrK5K_&3bFcj_Av34 zwa+81ZHJy?zyOz}+{F@X{i`#`9zaUIqSyv*Jdg$)lN#G={4+4} z3wo{DpBMgj<)6Dfv5c_Q>|NhU^f6FHQ0+0$RwJ^Ts=C$JM4p1GiVqwQiaaAv({8&V zb^`;&PJ03oIgW`J1IWs0XwI#Dxd3*;U3f1h69=EJ?toUFCH~#xTr&a!2b$31E`6w{IRkzj^ zWdCoKL`Rn-Oja!mQu-}zU&Q~dSSS2lhMbtX11Se$AZm0Iyo3aZ9O=oCj&|Rd{z+L{ zkF2W13v7Q$dUw|gl4BF1xxjelV6ve?vo|3K%Go>VcXqx9}Y z1{6w4QiLapAJ3ky!aDtKQurCIEZv!1^-Jsev+fZr3({0gIz&O?#=rP`S;K(S3`&qx zQg*(-i(FPqALn1!HSw?-XwfJv>|=Wz8&c!-(%o9R-tf92P6UZ#s*{!#lyK-D5cTw+ zqI7-R@P1tLoXB9M`w29-F_@sCOEHEYH{bcbYDIvBk%f{JwT~C-1?2M{eA@{>#OCJF zww(JKQ9(ciGbJzAAB&W#qkVH%hX>y&yKwxMWGWvIbc~d&3emetTm2$d`Vd86iRW2~ z20)*gPkG+gq}R#1xpU}$rlnM*P{WFf4%t;Nhyazp&~NDC-KZQeosKRdc0MIwdxWr5 zAd7^>QxYSorB&l|BAiozjgYK0@^_CM_TtYa06#O@7 zl&U&0ZWqotEZ`=w-FMMcIdVmC@+F6-^T_EoXwVqYz9cH)DqzVw1^A^8~i!(KgIQ@5b>pjgo@!)DTQ^4#;Ff@{S?jWb?B3yQdeQHqNYHU-Fsy7j zd8FA4a!wJv(xYB_&+prMJNYkQs~~U1fKf=3YJw0v94HHxeRg_@teXc&BpRIJB^^{r zSmc$`lrk65EEZe%S%U+S0!kHx%lvE08SlpPR1*La3RVRLwV-QqFcqMr5;e=I1QP(Z z%(K7>SWZ)*hA&}cRqN+$NsLp5&pj0{kyF@S;Q~{K?)dLp2xl!*DZgD&mSQN^lD2>N zZ{!7>*;_e-NJ?rm_?h=)CDcoAX*vx2|D< z%w89-Q>1%_isN+q1c3Ab)Jj*5;^yJiA@|u>2yIBRbbRRY2~w|A$fRr&s?RrB$L06B zv0U~{jDC-Pr>lhY!_NJ>L({jH`Pk70;`^qnZ$}#N>Vg<_p$u7$8sOr4d=h?ArpBe3 zy~l*h=U#n`Sm1CO!bXQr?_sv??VG5f2@x88^O~i5%06xbK9j5=xJ(ss5<|D`ocQfO z%hi0slp}t%=Ba@g1WA)Jf7eB5V5iupKzhYG-1Io-Er%&6i>bBrdcTz}p>Fb9oz%a_ z*o&uxiu5naDrow8A=(gMGnm0gj^n4ZP{7L`=I?-_x0_jPuV;%9^F2@yOucthFRl@) z(KOWTA$@i-8V+w)L$o|yR0}RkyH9`FCeLA0C*S_O`a~O|J(u-~&$mgo>pM7g*4tn|(@0(~-+r>2{>#+@XvD^qc z$=cyia_u-;9cJE1_Xgb}UM`c-6ZuVVaZhlav!h78Oo}R(XobYort#(8mq!m07`~@q z(HG#O07YyCPMgoj>LmYuf0<&38~1?+o!mCEKDw0xD1+qaVOfNkd_PXS=HZyBbwQnky58Ww>*#2z|+# zMNnU}hDCYL*M04cTSfk{inHw0zq20dUexWeNnFJ|X3*!syKj|rP4A$t{+hB4cb+rs zqCHiucOAk&rz}qgV-%4*w!yQzSViy77vyZ0U41@t!{)^cG3b#{L8u+Z6JppViHO zdFS3mCD9j2>TtA$nlQZjWB#RleTl%4p`CpM`Oq;7 zh*Y~m(w;TyXT24QmTKBpmXOzG;-P;V#JbJcb?nZeD}ndHVcI7e%u*ytW3-)Pdp>Qp zcx;BuHksR5F}oLR#yfa9XQQF(f%_C=;H z#I(Y`4AU5wNs?v_kp}@WQGlHiNIH-@p|w!_OgNqtqQ9@dpp(r)Q29@O z^(S`-4bi<&k1ibUmWmjF^)jKSmQTTgZ|eF-6@iWA#h|Z+e)DIuC)w4eegMf@ zDhh1?#`ND&RX*dq>S#fyTg?JuUkoav7E;|dLnrs~veA@L@;+XUt!tr>v|L7Inbe8U zq+ig+{RzN?n{}ZoV7cxdNa5$)s<#W5^H5Dhv#$yceQ#4mO#Kd@9=y<*j8II7Blw$Q2G5nj0o{G}01p_B( zAlo(23>yx7TFfO}N8GDC7HUt?e})2S!qhf|$H1{ay$0!9A(;=^^dy%C45#4hU4VHRm}uUlIZ&PROLN;iG+hTw?YAdLT2hH$8(IfEou&VnXwQT~Vyp!gr>9CmJXI+L;rryf1A()R ze)~sfKlM&ry%u%h|No?2o>IV~L$`5#yob|?<2z9sI>j2*W%1^Ta_MQfyLr#uGwOiQ zlOvEsqar0DuapjNj5O0`keSqHE|P|Uq1;C^94DCX-g$d}Aa#rzU@j~#8LmL$6s4^7 zH%kEn4G3GHKt+AcgWk(pQX08!Cz(GsSl_vx;CB zLH%OXKW^34dP|aTJo4OQp#E~^1jN5iMEU!bxwO=Pqe9Ja_TsY6w<@^FOX|U6;&c=z z{}iB#O|pC}me4qu-%c3P9Ud^DIbfa1icMP>Mrb9^{Y3347Q(Y^X{VH~AXR^;eC9OFK>sWJh25o0mZ5&RBzb5*a`YK+3vu*|_(d@bb;qUDPU!Njhu?Itie|`W zC||YB`#EXTi)#yr3vxkjcC`)3<~5rhf>BbFZrafFlVm-DO>-jQ zUIQe7$8c10dqs&)kZm_pQddFuW0wJ^?G3Z@?JiPwLK4jtrKzs}qNCfM!Sv+@ls?TF ze~8m7FGCHrI_UkD7Obev(&{lj>+3VY@HxmmAr-QOv~0ea-!xCGM0U5&J+a`pu~8sg z@+W1}Ct#$!K-{pjLB!2B;_B$XCTH=w^;}im9Z_31@0Up3_lE>k!+7KOzc!td*An83 z(dr%1&CUPz&Ygyx+V}21PzS`XP*h?bHomRo2c9pon1~K&q%HZxZ%mUR%M#kpSk{`- zT}wy3#n@!~(+C&~RqFB|#(y4WKixPDjjEB^rw4j!vU*1}vWrPkv~~Q`XZ%@>ZyX5? z0Nc!*dvc6F8r38S8viO~ru@ySA{Jw2>&jVzQk^H2v4ga1HgjEbP z(_T8X2se5PHH{&eFT~43ZiZ{sM9Fj14zN5na+d(5{dg$3^zHhX{z63XgkR5MVW#UE zH^ftv5+zqZtO--RLC9;H>_cWWn7J#w*yi<^w&~^5udZaceQxL5I{Kau-Qd-AHt(O_ z9om!+CG=&Io=E?3)vDq3Y+e?&2el&&6iJ3 zgY|M6F5T)C9-^X3w1MI-#X-xzdob?kb9(}stq<~b>Ur2;C$c&{OH~w=_3hspM`9?l z{VBp()vE0e^H^iS)@Vg=sOx50LQB%_)_oWMna^Ru za%3X2k>rd2My|9X}Eut#pihx!cvY*x51-ZDYAiKP1EM#?ygY57!J4BlAp}A{i+;1Vp1?1 zE^ojXj||fx#qV@wn0fNJWElnmJbMW``3xA9+vgL`ZY=L_?@~N^d}Sc2ZRyy%_4OE8 zw(D_PBeA{s;fAc4^^#9t0bI<=8*!L94mZk*AN_HL+@YGk565$^DLokkmnV>as%Uw> z_8|j9GSv`%67!3sgOZ(4fq5d1OYNTToHB;f3h(R{s;p_U+2aLJ74l7IoHr>SwZ(W#-H9NX8s1T9&4 z0EJ5aXF&2Kbekr3hVB?Ollu}=aUSPyQ=!UW>gk7XYXKoVD=uzJ2}h@j2LXy%ohjGL z5l}3H`gUG_O_To3r|chvt1?WwHBH4nMw?#{-MBSP=xZAY9||wpKHc^En-&?;Bc;<= zkyu5^^$FK8nNDsxG_t0!67HN4LLzF{d$eGBr@oK(z~Qg%`$X~@%tVEz3rk4u|G#ZL ztW&Pp-1qnBoQ86_01yYgI##$b{d6);uYM21hO$@3)RMd2?Y4JJw(EDIIgDXOeK*wW z%?bFA!#^M+WKDwj>_)ne0|CDu;bltl!d#!vHG{s-C8N1e0ve2&#)un*JlZF3bxu!D zT?d^Xa|b>`y=63c@*}IJfX-&xXWQSLy-K_Cn$;I!mlwOv>DiXMSB%tzS{IWH41W|JF&0Z;7;$l%?#x^rqQt8zazH)K1x?b8}>SV`zXQyS5ssJZ+GfA>^SR^Tp@>xs* z*msr1XH`N^RCG|M5DALb3+?TsKkc%6zooy>#z8HJVsULwfk6%ohE~0ob4(0 zTw!6V=#`a+f!M!fkR809566;Ln@J$dF5+Q_8FD}jdMyW~Bxbbo;5lKPwaarcO3iwq zW7VkqD!CIzsag%LY}kMy*nBwe>7>Q#Z7XnpcNl7%wMcHWT;v^C?H~KnjheZu{eU9W z$V4^5o|ks*g2^=GxT7qJp*-Y5FuF+!i|eEnkgLyFf!`*Ye(~`|lbfIsm&*q?4rx$& zvWWH&Gw1;Q0@9j(^2Iz@U;IYLLgPC2H~kHl{2Y4hFxF};SRMP+k2!77#i7BR#3gbY z3lUe!hY4BRyoD3N<7{Gb7iy4hrq_Qj0_Ps*sB5a7lRaFVU+rBJHt4}@+ScN!J;{jL z7Hv(7v44z^mt4-Ae9yR}d$Z^<(jm)6pvUG^PK`CD^ip`E zNaog;we3y)E=SYU$>P^YG_H`?#VPU}z3kMrZ!li%MD0)DU*Vm_!)a1vj2@0@VGq>y z9N}g&l9L`v!bSv(WsE-*Yl4#QH_qm?So5P4H;r9wV_s2S@IC2Yq^r3ISX_gR?8jJj zVvVRJ-NNq9@>0$FHE=_?<2|athU(F?m^*IvOw%HRdG4DE2 z(M^358&b~}SU;`X^s?J`A^Tq31*i4L?rbhiUij6$ z&_kSZ^=T1iNR2c1g@bZf1)V%YT=FM;fbv6}W&LcY?#ah3qhsp1Sjob5cqZzSMruFX zDWcBl?_0#i#n}(WeC_x#@RsS+DH22kH4@0x{lpi^B_tIVicd%y&R@?sNK{VHSG`{z z8#&EVSfjoQRi%iD?Eq+I{s+xRI0}&vxiOl{^{A|ADYoHtR}{a!W@p z??9+gJ9)c6kE)gRq16eOzH;gN#U|H!V%~`1jhQ4`RTxdkObNOhGJ))%_oleiY&Rm~ zJ1^He86gkHPC>rC*6Gdw^?^f}&X`+sR;@-KWPSG@+(ia=D04RINTd#8jJ)r74=QUt0!S~ZQv64%e2n8SM% zPXp*9_Lg~KGuESqZ!1}%dYEb$7#Q_j;qB-Mwyq&IO#+?UGj@PV)?o+q*(s)yAGJB9 zG$2ViIFxndVr~jxtC6}f?Ey}ihM$vDDG!qS-k45{E)vuJB;=^xz7?SH~ z0wN-Dq6OD!&I)AI`1twJ=yyj%;80?~c|*;&h~Pr+Q!Qnyg6guiJlg$=vCN=cJYCmC z?_0K29>n>YNsjRNe`aQIiCnj-j;5UKzFgj4=TCwKa?Y%gGh#!X4?MNyF)sFt_5$ zNWwUcJjt)p(#-gayNtYqWJ&Ja!0#G}B^E;HhdL(&V%q)KYMcN|V_=jm!xb!uWy2 zq)}7lY(1t*iC^LNDsJN!W5BS3h{NgSeI{pdRY3%X9CfK+964HZr5eD$c+NZxx}od9 zY;fyP(%;rW4ZYsE`VworlWGlB2U0`m{;Z`j%#d-f(U#!C!EF zDt?ajJ^F}TU%^7Edr+;xK@}us^;3|;z%&Ozs0ysqDbj|e^<9cNsvuUn(Af;CVRbNP zZNd?nQ^^m>xUXUVl^{52Z@~$K2G=6qXvsHorUIzwGR-n~m#9w*h+%m^O7aQtB|eCm z(P*WZ>Rg75T6R8h%$jN`6Id{VG0&@???i4hjhxl+duM6$?NS4y{$*``tD6Tohv+Kk zH@8w03>!?@1AR#$uAiu{M4zS~2i;giwwkPWu;Hk-C!Jk*@Cj<)XS40@+|ps*!-=DdGw2S?BOht}1VL2}wQLC<==nO+mQZIQ^i^$$ zhSqv)noom|$LBEkcg`No5{++_8p>bS>q{=rs_*M!I6P`cdwX$h9J@76>q04>k}=Bx zA_?&o%;9Z$4xf>$8?sPQxBeWv?#ULN!6m~> zY~=!n$HvtFpE!}){+@B``+<}#tFR%Aw}U+{Ty}22c3HQmb!|^cnYbxZD*nyM;+;>E z{}m2~a&?Wreo0j>O>@Yhr$`mbpq|NUJP+QO&2FW&K8-}PqdU>sA#=VMO1n`udYS*I z2G1;|40G|&Fxb|#FD`7~Wy$v{H0C3@b3eah^qVd4ED7=6a%A!_62{Qv6s`&k*O53) zH=#aJnF_W%kRhn~BsmxJ9Tg*TeSVG;GGiD%)C^RR$6L=ay}FS72{cQJnRfqkzIW;V zG)wm}D%(1@hMT0ntx3l-y`NPhR`H%Wr`qM;Nmn2^b$5&plhFDmnR@+4QvzwQW_50KP!oW>Wq9ZoDXxJB`n{F|po7ls zS>L}6xK$X(yb+SRw7E$C7BzI;u_;>~EOZ3RG&1FeW7=PA@MOYs7@FgMLx3WznLijd z(8gQWXi?0W3@(-< z1v6Eq3f-@m*2SPv+MsoKi6P`kcI^|BaOLrTUatwVAJ>i(!Hx%@Rin0O9H;Ati)~Jd zi&3~6R)dZ!rbxb47?h`0-zUv1d>0Non#ldo`w!%NkpW}WoDfZXs}Lu|pjY7D@uH)o zyb0>E!4Oxt+A!`6qPZusCL4il$D@e8Uwg>xbLhJ#(N*@x!Iuz%h{~+l;bfD;)V775i8|C^AA5{42#_Ar*vuv-S&CjCOvq!;|o;-QSYfCJO&*`yrHdcMT z`P1*7p7kz`<}hr6s)N(cS6^F@Ldq*yL~EoS!dbyoEC;74xANUB^1b{V1$C=M=yR~v z=!&!QKH{dX=}P9`>q3W(#L;Y7M#;0jM8gfl;QW9`FNX%t=c2POS-RIF+9BZy!6U94 z9u`OFNY$JmowVaRYxaT#u>c})#tqjY%&`dyamoG)b3a7mt4(HaX_!+PQ^{#9aExbj z#}sCk1Kk=Zrkjd0_$qf%Q!u0d?!`onP6Y-}VJcB8vl?)+;UGd~t^0QsUR>7R7D#j| zh!;iM@p@?vzM)6vf*^~3#WX=PNUXg*7%z*CZ<~Bri~sqVaQ)oIl5rC+TQsD+GIQrj zqMz=?vhWR2XD;xmqllms9{zHx)m!Xi)nPe{Q>-MDMNSL-YauSof$OQA;6*>vXS`<` zDNqOPfg9q$Q??(u(vKT|=?G+FUGH(ybnB)UU@17qc>)Jl!qsa;tt?=D>_9)B`!iz+ z50gZc`swm_HLb3pAf`2oL5wZ;;iVJvA?E-<&GxP2_^9-cL>MHve<%Kt7EFb{hl{n# zr1}k|0hL-WKs}?w}HVFA>XI(1P=6w(I8p2L6JyTESOTW#KR2f4E8?wf1vm#w4m8ha3l3z z^LZ#p6152Aro@CRg*MxbY!$tNMFh>&S{a-lFwe;391>tTysg|_OVu1oE&w##d;|Y8 zs{4uqmT5P1)H_y7+!ALwnuF3rR--barltv^__G#WR!Uw7!tjbgX812Rml{d)FE!?H zsh&CP1YJMg;7&Ek(F^^cSoj9k5xC11mIu%4nvUSp^~trHVB0x_`zpH3K2wqd|2|Q_2F`0iw7}JX`gBud~pc_-43${l4 zkr>DtUycQ*T$85;$w1$0!=AUI%b*ai} zIjqhqGNQ*Rds!0ODQ0QG`mieY0*0w1fgI=Unc;jT-5Xu;_bk;nLX+GMLh(Q&GU>hM6;VCHJmNK2Ndvue=wo z&R>5)dzh&%mJ5a)+l>L4N!1Zkh2a#X^pkX*-OOQo`z)KxF3zVmE~c#uS$F@I!ozOP zR|5&?RF%v{QmG5Be#iv0oT7OQ45SMl9qAyo&gK%fu$}_$8w8#+M}~h(;OMJy=!-l0 zCO=GItv8052A9Xjn#}e#VpS_Csf(m;P$WK8!r?f--fq|`g^e8SJ*Svs*khX0R#0fX z6TWAn6Y~EXwBIbEK)6#EvFR{iB7)%0n9>GVyO_PFfN7zSwHJEwnToSnqA0b$7o_FT z_o4wVvxXqB^bOAS+|G++juLhx{c4uT;Ptho^GQ~et)-fWh;n(D%>)kLI~_wMI-j~Y zJQOTmMa{r+Df*>Ebi2R*r2<=rI|^qM^!5jRW;Q2{T+v zz_HM{W3eVzWo14B!stC5mvv9IXs^0r65OeJpBF~Fv2#LMt|ev*Efq`VzY*#Yc;o*_ zg_HsIEju2mwZ?pY01i!k;EL55VPmEmk(CsX8$;d?SD}goKR||i%WZX=R)Rx!{Rf@RzZBYH083x16gU8`Oer^m ztD=H}`;|n$w+n0Kx2K*tZrVPnh4q?8gbc3%hSb7!!o4t~LwA!?sSU!m2ggM4Rq3LZ z1vnV!r&6I7N$+;sZ_2}s$CU76z2o!jg3!m}2Q)Bk5%qzJQj#ukF>ih2;OQwgKA(_V zG>9MsqXPHcS*GbK)+Mw0e3nt=XtO0wfl67S&vGH2`qWsGRwG3weag|C`zz(_(+abA zik>Uu>1%yUEB^WYl5OkLAM3T=OejQXt_f#BmGV?k>iA;v2h8!R)ettu^q&phE9*kh zC@7p8H*o*mYG#Q6q>7oty%a;yDEDJd(a8{5*SqU!Pdb9EkB`zG2-+w=8>aruGT~!a z;V9U$U-1%5qWu&ss@3*dB(;MAD-5+2&@hiMlgO|PjDixCA#Mhc?T®cpL zyu7?2gHl%GOcPp1iB);=0HX8q|9()U9UDS2ZI;+<-PL!sG~r}iL6(w5=~ymIOb1T= z67q=JoDs5Gr(vVixC4L!4pm7pIn}s8lNYXYx=Rj=PIBKYKz<9P{57tCmO@IU4FC8= zEs9-dJlCWz#dppxFW3C*x_u2ENka>%8U@C1%D$X^3EGH@_-vKDBdyltfHOY+Y8P`~ z6KEKZY6?Tr^V9b_^8i#%Gp9NkwZzk>i}~ZTdCCdB*HW*k$pX52Z#yL35DBf_=KR*CxQuMiDhF0*BSiDgEglQLJw> z{YvoYX9JhY&z)yBi^ga}GgwyE(E((^j1{yHl2b2{jD2B+B8Sryu!y$OkZ_(c!a)E| zDJ|7)id8$Y24=Z}gt~+iKpwt;5!L1IJ{z1Ri$9fEzoN+^7im=S0twwFNipYAw9tA@ z4X_$<$Qql!SkbnRLqxjvpT5n$mXakXNp|Y$3Lj>$YP$uilDc>#!KB=$UasTAeLH5Q+QJEDQkMN|XGOh?Ce68K#Y+r5U0rF33}m^Fg`C zv%xHo7Wx6r)&A$l9_YSBk4wQT%wTzD85z+_bIOkGBsxdmoMVSyBW_cd?|+8QVu4Da z;!ugw?_y1GW{A_G^+-$xY4!Z8BM*y_5!*F=^SVFm{#R>r|a0m8*4YiWyyB!vs7z7!%9n9JdO zE&MJ+Z-0YUqyP(7f-EIvdY(4a5^_fnxRhdvObG|hMgLB+E`vgil<<=4>VQTSTT4s_ zkf5ew!;{ADt+u-dMl}BTx?W;v&LvRMQ!dR26n(Ls|JPyS1CGn4@ohrSC2@kq>`3*kotbdZ_^{z>(ibpr{#RyEye+aSsO~NEUaTIlNLMi^t^6QM|$ApH22 zg+!VsGEkjSi|eV?Xwp^u8zK6?S>r0;SEE+rm zS#&Nv?08-P-|ijS`(ZBy7p_wqtF+qAlgu=;g)hLW5{2;ch~gEFg-4% z!vT#XFIQ1HbRaYl!Zg|^@JkX@Ox)lYPAX;$C{2LX?k&aP@C(yNU?utQ8xqVSBP_v{ z^pEAH7|EYsdw<(&B7+TcHa#lLsA4%G18%}BCb#E=CTv3;WJ3yhN%caEV~~||uNNVU zy6OaZMzG-)tL49OTyV4d2Afl$xn2me&17#Il^(Q+3J8EBQu$6F z2cQ0IqGkpTBKb~%`?cm7ko-0xIlu=Qpng9n9*pYH+w9%&JHe z5AAx_b{X*R_e41gbS%}XX0QA3-`I*eF=z|wlImci;RGWlBX$NgFxtMiw_KMXY*h88 z!J&RKV;ad$Xun0zY8h%8d!7gH?h7-e1fE+aJJ5ka5`^y>fN(JNEt(Pb2ZGz?bF?;6N^nf&1KS{fleLD zE=K7}HpeB(q8(XZK+6hmm2jMkhm}Onei{JIT_zN*7HOwIP5crZbiR}_A8xDLD48%f zl$SCO6t0&`aDgeB?tRq4rBH+FBx{@Ms_t@{4hkwU*Bu3%H2TRmF?t~YCu0-y#-zgA zgjFtQgK$&W)6NZ3*c-v`qY@UWWV2tU_tI2ti`tqaLlK5!wu_sOeav&3HJv=P`P`;V zica-US7KVe%=|m2Vfr3ixjw*ui6XTcONfal#0~*|YDz)%3BtQyrm;Ks`b)-#7K*(A zBI?F+m0|gT>GlHB0nX|*BLMY6xsyTd{^{>&HYA96LW@jJ8>b(!RH<=vwrW|KOj?DA zD%7Wf@+AL-(bLqSsRGx9E{CvjX=PR46Ec_H z9K!X{B!~f99wkNFn3LLb(&XI^m(%tYGX{g`RNCU{3#er~=bGv9wc9K086HuYO= zvVAN$m0JpmH6bFpIub-aw#Ku1lf4%sAOiD7!}ChTNBv8ssluV^%1BJH$xQ6Knag6W zpozt&OEGWNyJ@Pf<|#TBq*2C~dZ%flqbH{;sPpeL=EpN4@B$FV`&_)OCMk~|jxH{0 z=N~v>rSUr@UY}Cd9Mp8h%x>Y1?*o5-i*ij0I`-i3iO`rb^epP_)7`)Z|NJap`%JPIb6ej=?qocx zfbUWh0y`c&`@7fV9S{8LA+hsHu#;0*@U=W;es7fiw}SZWb)6A&n0-C`<|wJSO`W`M z*0uZK&3DFg(A4gV6WC6)5!cBCOY#G@RI(aK8^4`&J1O=(BG8cRD$!u~mrA9<32bAJ zpggIRHuiKu!Oi#PZ%Z0csEJto^H1(U<*ccH3n|r&_|%|8Nea4frIHSQ8gg(V|3nlF zMEtQS!1v)bC#xI)W=&dymE8V&BPZ#dT1E1G2I6_ zB1|?r9-_-ej$dD>35=HdkZ_+y`dOe#X_?%kD_LI}S;X6FG-?|=Hg!LT9Mb3S*({_v zww&$p!s9(vL(dbOL5#~Ldl8t!UfmuRKXC&V=m!S+UQyi3PHOFN5pfCQiX&17-~)^6 z{@_aiwcx`T=XBpvZmHY~QA;L6`W(M2EWLbHmH~Q%sj?2o^jqtX;xbzg*9)v>@MJQ2 zH0s^lNLat$T3FGleP_N3O$eZ(qgY(3?ZLo}Y#;B5`xhtJf)>%I5!BVK96p5UcJhjy z1i4xhvH4){eR$8LZpo+|J^`i`E6VVFG39f280gHJz_F36N6VqDRMZEGD5ym~j$*+& zE1RY(83)y1?bEc0{$0G1kIu_tYio~L%+G``-g!kk7+`%C~rh*?f>Dv!kuBW-jrUooco1x!i z2g#Fe&(}mxvqi~o+xsA8^C?MUf`HRClQ*S@C`-BjkOevGT|CctSMnXRwzBBdD~mF; zk*E8ALVSn4Du+jq<=aLjI){7x#1_ls`dyXl05mHkq1rubH{0!arN9AFO*wk(f_TpK zxPhNhBJ%DbecZMzv^>&PF&M!Dy?UrWbE_rgXY1G!3bKmo!BbfJIcUg&?o|6;62kC) zPP~B@=(rJvITYW?RWw;2G%v0-XEt$ffITNBy(V-_-N-tY$)Z1J>}WP3uSyH4W!CM8 zEgdvhJXU)Wi>CyjW!iTWMO2{`Ey_Puc(**Z#5kfu7|01X`C-%*4I(&~cQQt?lfB-I zMwn;$PtjflV1r;?e$b7!kC}c}56kfPy(_OWoMIc3oE}!>?^)-Mr28s-BB`vQ>D-TY zvSS7{7OZlfo~I>ZAiBr@t!df>sAcESO~XoBPY?KYPVa7_Q)&wcyLn9%137AKkYznYgT`EUDj+mmE)5Q?7j z;Vk&vtlKJ+Wi?5OS@a3$AqF?dUH~EtC>CI_n&#u{c0)eBf$BHuc5BqHU8*jjErhqn zZS4z4h(dMCOtAr@ng;<{tQw75v+Nr?4iwrosM{2fR$h!Gh;qq+hW3%r+6#SUcJe&} z8MtDac zWDZr*p`GlV{AKb{aL3*M{TtC>$Q;DKd@CNZ=+msP(Y^8~|G1_JopAz6ezg16_+5is zV2xMclfv$Qa?4kmGW9*Ff#b-cvSckAG1j#dm?$#j9RlQhUIQHcngT9U^>U6)o!jXh z1U}s?<_QznoYRJ%f!Z6E{5!oD+!?IMSDzCJ+Esezcz&A_VRVRDazj&Ok6Hzn+k<>` zstO61BW7MW=&i^#muPPEB}}xV;nq3|7??CpaOosjm`e(~;u8E$X2JAab|v^BtiOgM z;7?n`2aQduN8e#=Yg$`FPCYuWiI9CxC$&nYSQY6(VT$)xh7auaDVTz)E8Ty<=s3g-cO z4Ecd`?Q+WPPIKcU8{nXQO$=jQGR*3WJ)MdwP*==ifneVl(F%H^ojD7a&EkD$i_8_=GqJlC#wEAzkqj0a( zI2o4G`md)uZpc1++a9`#v2!qpH&Q(}uW+zZi2#gK9CJ`L6Uji-u`vDmA2f~!Q#75g zO4@T6C!_Dx;DVIwx|XNFy1GN2W*r9+JkLv1losFv42da`0<(HQR8q#L4H+@QfC!!qO5vuDy%SBGScdF!%fdRVb}Mf6!VIqcM%#1JCWkJQ$U zY&LFk%*5NkVeD6d7G67{Fv|K-i^3B!TQ_=q^H!)U8(LRT8jKU;?mS@N^$8DCj?#oA z%}A1z?IxpSMK+yt74~U64!xi*_v$j~KlivMMCm;Dn&R!&_}d>sxQ;pE~{)U21euTS0%-+u4Awu-ag(fZOMDpd_9W zRN|G|`L+S?bpubG$ziJ!tt%>H>dxz*w7ajGYaFChc&i1vM- zgGPYsx_FMnmkF+(GaaXVu`XWQXc}uUMS;;8<96}7k^^TzV?QNHbGB(tvRad-895ua z!y#iwGVdu|C7_gO)u6S1pL^keL6VUq32C;y{eMXE8oN49W*#P zYSL@nD!guw)mnqCDy)^*Xdk!H=dih=5k<_U!Lb@?DvU9dhOelGEugiftZr?^?<@`P zaECkG;SR51FvhS+4AZ4ynbxZ9AarOril)`96oi2!4m|cehesXB{iYyz^Igzds>-6R zAn16s_Z=R$ENuto-&jZ0jm>#caXl&d?4sf{kt|vPdwt1!fh6`Nfdih__-V#)ZJ2&f zzV{EqVyq>r1Qm+zz~`~&(m6CdP7KRTvq}xi43es3Ggo{vaWSqWY73g`yWIlr@b8N| z!s`xq_!Ur@swTUZFZq}8pZRS0oQ4qAwTaybYb|M&a-H1p+2T3BAAiK> z%Tu;hf3D5?^-Gg9|9-H>P~{cnrhr@%2nBIS+76MekX9=u zbQHy6jbB!@n-PO{gKj&b8Tu@?IoHz_quGl2VuKU7go6&g=TVj#$5HGy0zBWLG=`+q zOy+Br+8}&I5H{G`JK*g{@6u|u@cmz!JiL9e7aoC9gv#LBjI7*}7dfi}G7VZ(*!2y` zMcF>$@V&=$j396nQ3#GxpiROiOPM7Fo7|v{;7!77wb5)-fz~dL@@RSv zO;6(~LGilSP%GW!Uo}MYR&9nvNnYn({4`jsHR#*!6m1y4Rdr^=>WuI5ah zza$PjjNk8)J~*QLL5JN3AM&WWwKIG@v|deaB+qbXA=C38qsn@9T}Y(5Rs8(L3$t2% z)aaBn^dB*(Z?|Jt8fsI(&NHVB&zp;N0h{u;2lpjwUCj-GKj%=l>Ja`5n&4}SAD$9pY$z9e!Agk3W} zKjY)+jEm(hZ~7I&tA*Pb5cwNgW`s^M##6y&Q8Lb4!Z_w$!)L$g5PJfde_JNQ?K+{T zYQ?aM@%DE)II8C|r8BBz&U$>!)#p#R8qGKxO*kui^cw*uyFLNxYu9nTesO%Cpw}Xp zEb&zW$&^i!Fis7TkTkzdl4MITYgRWedHK<&Ty7ML`yR~)P1>$c{0p$J)`BSwRjz9l z=9{dPF;LZ3q?-D<5GY0L2wWvF;uYCsFt;R=c{PrvLRT4DmsI8ltcJc5wVkz+iolV$ zrG;Emr4?0Gf8LiC!xgrAMR@TeY@}b008p#^bzV{=C1qhSw+c1^^|s5bw)@jiq3{)W zMM83MPJVsGRWjhlxz9g7I^Z|&4ru!hp{Q{4A$s|o=iZd*)so_N0yNrSsvM&$jMu~K z?{M!>5qcJ@GfbW^eeo&d^HZKrmOQ->Tv^NHVGB2iaB_uEZMsK$^xkrD(a0(zUyoUg zuDCe8;`y^{R^yOm_W^jCmQtv=iPJcs8z1u9e?H>Ty%w?hQeA6}!DK1AEGeB3?KlKR z&@5tHmEn*gOhIKVmEkMT6Kidqmsx7GHg|;A9qw?4JKW(7Sd1~Owwlq@ut*d#aEKyB z%ae2}!BPq0P|;`v9CQQz>qD3S=|I(uS=7&aYb|MN$a0JEByOyTWkolGm*4uahCHu$ zIm-F{i-PB_$80C!9}hkLmlMTNB=N1qZgTRW=8u;JA8!S7@pEfmbpudZKqVA|J&&E^ zx`rijPLME;*RjT!ySGFSZm2m$+k*a zmTSiOjCrv{Sa5|y>_jY!T^ykpXEQ#TKjq{37hEPoQk~%|kG|in39|lsJZPTK^Sgx3 z->n`ol-Cw4iUp(W}Mb0%bdGmmF|A?Jt#7?*&Qn0Oq3u>#FHyi|i)S(N3 za0J4wJNJ#L$mcVbFP<`Fmopns=@R>W(weVs@OEEr>w7Lrzc6akc@4TvEO~ z<;BH{=QG152lJ}vWGhU*BwviUJR5O#y<)Cos<1~pR=81&*KE>khcsdb*Qq<13JrNF z(5B8!@uCKeMu*n7jFZ&`)iNO8sGpF>_N4{Rk2@drITWI{Ka`@v+Jx?I_i*c z;1P(BsMnys7vm>jY{@1mSZoT?SYZQ0v0bqpPgoQ#smf@WfMCbc2xHvNK2du}Ge~Ka zTg>Pg%V#mC&0ShqNY7bP&CdArTdDZ+x?0z;Rh5q8mlYvY>ODL*se8&zeC*U(+vW;t{}Lzg%Xaa)jg^vt+K_G<=G2{ zUcn35#yBPJ*XIL4t?GPp2du$n3FUf0vRSiA4Qb@ydQJQ|rWt#Lg=Lu(Wa|Q5>f1~{ zdn>`BR;+q0oOqu>x6S>&%Q9axNvCWJgDwR@FQ#=kVCUWr?M6T->qT$%U4vl_dOIau zpR-xccyhjBZ3F!NK4Iw5R3TBTMJoK) zE!ZXrlSGi<)9gibdjWy}E%Sn@pqh}cPPsY{`Qt|}%N?IXzhw2r8N=rbo{bFKA|`0J z*=dICHC_XD=+778#QZ%Q^{J(ngy}0$&)abjx-&XS+_xWQ+|vM1fDshtOA4jzF3elg&ueF6*=) zQW|Aa^lHuadd70LAHE2Q>N(!U@Q9PBy+_c_5f|6b;AV>6BuzU~w~L zwc3(YhVmy7Z1p%)>jgPCtap57TZQjxVrkIp73<-c`D{xfVocZ}3S44ULY9ziQnE@= zHX8WN4*hWXZ3n5#oGP1;rc17uF&o=pcdvurY7#pU0TC=? z^g?mHs4zuInHA_NBTq_}!yBs1V0#C6{RVrnqSJ~neu-PISdSDw1?ZBjH0UTm`7Oc{ zLRDc*PGuCOit!sREmxp)hE6vuS6i0z6-m&=Hrg0Bq#5=If{d=aBrVr0rz@_;8-k8Y z6oDw$WSa@|)tcd|U?B}!{ff;9ceukH?r?{{9oSbrGM2nFq$LO=5SH4dNgL9nV7@4L zGAj9K1v4o*-g9^`aOnDySW3DbNp~&qG?3SJ+fJb9#)7s2TWE?*^CGPnE-WTgG+c%9 z1=b3b6-cYGT2PdhZEne>!Kn<}%rZ|b>xMx4im2&u+*O?TAWO|Uu`E){d}YZBi&j4) z!t1sPD9;U7Hw8CqOW73!UByvDaz9e^JxgC1ro)0S1dOyL(Uz^YRQ8)@+jscu;g0aS z!yWz|pp9m$6JFR0gs8o5PvS#bZp2MCwmz1xBENV zUK3aT{obxxYbKK^FV4=nna`+{!fD2MvO-Bq6nnIr5xsT;M@g3JgfuTn@`^$yAOyx( z)@jZrE2$9pzE2PZc%Dm|7u?L&SYvUdAoQKOKokcowi!uY;b?>Jf~|5^tJSSXy+Wy9 z(%i@e*}xCG?C-TXISToFv*g)q#M$#Ftc+!S;4*9$WW!4)r{`QxDz5TAyTLK{D~oD0 zXx%?VZck`mp5YHyY@a{jY_maL9WXy?)3kG(%{dn@F8OS-Wu}g)25)h6^pL}Dg9C4i zc46`t9ElJhJ%#io!V%c2VmrFT{`n7_W{>HW`;<}6PWdy{A3q8TsT|zcC+J7Gms|8W zV==r$eEbpZ?G<}ds7BAZ`tudfE>_G}243;P%7b8lALktO+dS@DlJSV^Rl@TpTZ(MN zQO{>TD6oqmqo>a~zut1SbZ~neP9F4m=iMItfhH(d#DPOAG=%ONDRRb>m;C8ZPuO-W z3pFOYe#w*PbDoV2^IRfd@m;pWY%fX6kNND6FZh2xnlj9qnC1Z|_x5=2QIDq3wQ=<; z8H`lm$B3|rny?c%j}X!|}bd3!qvf%GKOS4c--s*+@M&G?f)^SmnP=m*ftIsCCERNBJj` zh+}q7x*XnX(`@*!^ZLHa;ldhCxx8dE|5t8azU2S-XvJ00#@~CF!A_sI4}6XSD9x9* zsZijBE>X)Tj9eV~J@=__K*qR!o88?uj~+DmC{57UH(b5^j6!Obr%guP3Ol=D^7JX! zHzk+F4lTXIy}SbFHVOIL>^{l_o6OlJTV}Z+4FkHJkbc)A@V_OC#gq6opOFHSyo+I{(z%)%yHYJ?aMoxj$afKWPnm0twzZ4K#^t}E>6$5 zk+2)=(h4QrKtnobb@2t)S5rP2x{OVq4_t-lHj(o7^7tzpL9MhlnCdoRPC7{Cpd5u# zfT$?5HQVtu^Whbf^@ds1L|KS=RfG_rLX_K}6^9&lEa`m9i_?^q*vGg>h}I4_k2DSd z03ZNKL_t)s@6%S`e`k}1#hQ|QIc9zK1(yrKXH@i$G_Knx3_L=|LwNzpaZpO4P$*X+ zl===5t%h(yWV}z@-g4Bvp;#?B8@}Z8t$~M!=;Hx<-H1-0@Jt59hUxHvtJ5!d`f|%f z5?FP*94xTS?WJ$hRu^Nt8&QlWAL&ZBZ zqJVZ-5f&b)ClS>KlUTN!9cG&WkrX&~O*)-&b2?-?%Gl-(9^a;@`#uOjA%uq)g|zmY zMClfJHD+CUjE^Hay$~5!P_9{y#*AKGGaYAabgg9n$%NNtOqTh~1_7gkh{$vCy^?e> zWq5kQXjHKlJBa2EQ54Yf5>hNiTda^M$E&mVq(#~iTW;CRXAIA77!8-KGwS>=3p!8G z>npa?YfjHYZmJ#|eIF48>@*a<7odC>FHpF?#c|v^N7Lq%>y+usr%Y!B$$=#(0@#mf zdJ4~RaeWuhb#SG^l@iyH_)$z44p2o!Y!+-+71!fFvKiyb20;v7zNDC4arOM1XD=qK zI&UEl-{#(~Pq({66s_3_3P#0}@%4-{=+Kre+O9xsZ`jPQ7*1xqn3PN+LlJ!?-t7)| zxWgUp@ZTU_nbBx%DJsLZ6s&WwQh>Hpl_AeHqhZFyFymj&H6O19rt9$DvBP`&4&6`? zNy%WK7;FU2c|o$VoDU?=x{5$a;t+IFu%2f8`HPbO>&jw#9`}Ug{l1`MUil?igf?t* z!!j|Xz9rC>!a!jJRtTh$xJuv)fzp~PEtxG!KEAB@)0JVCS%hoRxB2cre*IEgI%kRI z>^$X@kzuxL2@VARwC^woBn?Lr27hr?k_o=@=mlLKG>`b8_crgg-{L{@ z_^*99KO6Q{AyaGevLG*Vkdmm?L`aSFH0TQD)&a?Z@8P?OM&z?eb2eE;UTW4!j*x<) z(g-OCd>7+6c)p9lQdF9#y0q{H3_HB@zU< zVTtfp-Oy~N#IG2dFovWwce$L?DXdGOXT2c3vcCD^=S-S)E%M=6}R zLD1?DMO#9#qFCIpe*OtpUO+pDXbf^1^4EF|zE$M79$u$MI?E{w1jN$o`Wcrd9*_uVHxCu&3vSD<& zC9ML8cR9HC7H{9*;hm!f-DXL!jcIk-4E8&8mnqFOp;%q<^7AKDO+o6dFzYEdg=VW9 zDgo}Ra$Ftoh1Eq}^jqFAnaoI>4Q0MYh(o&mh9KKA&pzwss%+=?g;`%Xucb}l&q_cm*UOMCG@&(f>XT7;${KE1w z3t(nhpItD0amBQ3QnZ?QS%tP1;YBF-i1uL2@xh3<24kKtZzjlZ zHenn(bi@jgt+|{OOsfvT&V3FZyv3c3yR!-X*+k$w{|?$T&%=mZ-| zy`@@QF}qyy5{o~-;^HP_xDn(fP#JVuQYK|>wFDUDBfS>gUYFyO4yTJX;dqP0!>ATs z++lye#qmKzGn7AK!TX8;%&GD~uXm=X)$9g#w{)f)<_qLyq2gi=*`gd)qmSpv|IxBTf)l(AT+v4*vv-^Pc_wzVH+e&uV&ng zDz>_d-@DJj(S7a@+8j3=8VJ(6ce!5>wH2&z6mhe~{?UNx2)yx_Y@`?kC8~^B2O7J+ zVR3fFXtZLqYO(U$WZM43T%}*(2!ud7J}PJscm++jL=-EQlOfk8WGhR=cFcA(<7SqS z79|=3T2mDoQz@)aAYG&r5;r0SeV^;iigdUoi!?#EP1x%ag&u*b3nJfp*hYiSD7OWx z%{3Kv_!e zFvP3{TUC-=PnnMsR=I=M8PM1n&~5}Y{2V`Q;m2*7!Ge}pP%KA`E_`SR)@;cq3#Kd0 zDz6J+KVCilCL{<4Cv4#E?$cR~=uQ*z?1tq{lXI`2j4L9&Vl|vGoTaRi3T@nUO70aL*|wwH)F0x35z_!?euBv?b2z6w7e48h}bkjg52O1E0Vdx=rU(5 z3uLxpJ)AR~=FBz~l>rAsscUO(V=Pja6ziPzHs>aGaD7eE@Yn>9&Jxy{q)<_<5N?u=0r)O)tIOeTgK}=T+|HO?x?DMJWFnB;YtreM zo7I+OX~=$ZX4@U^aECkG;qMX_Ol8QEl6)q49w~xM5DC6Qc#VfS!)e83CfEip`vaE` z_8i{sN?M-4N7C-P>})hgW6f$+vY8irekHJpWEDZR%2`iyKDp66TL_{M`WlP?9EIly z8V>kYGo6<_y;ekB=vlCipcM+1!jdHwiz$5JNM=$~B?Z@`lIJVKD1*F!z|^_#`E8Ho zk6OdLZdkUMN|U7(!>OXM0$*!drJ`juGS^(Ekh_W~RJ3BrK`02FJAK#R67C4EJKW({ zfF~Vx!#)y8X-W{5PZ!U5wmM^7ZaLpxF)x<*(j(Iu%Y2QmT=v2NkJ}IUP46B4q4ypK z(e7XSQ2rv+;l~=SHH|pt;NXBp++e%eu$;NfZgSFOLta*O0Yn>|z@rfbbW4rn7Gy<* zHip$U!}nZz%@|`XRi!c3uu5|BqN3N1ag?MH2S_1E^V*_jxk=fiMIBThg6BF!VTkAX zw}jX)3?WcKlj!~j>{`KZbd$n9$RSuQgA=-by@!S8v`@egS#|Isb zn*lBJh~Px>yMGFZ!zNEouXr|GuuN~5T+FfBptL|n`*cqpa{Tx`9>4#H58v5gzwP7A z#T%s0&x3RWg8nhB{Rz9>j01m#PUp<0hQ-YhSN&s-ypnqj^AmCwe#U-y5#G)NI*04bebXCXH&F(<<~3#7ccG+KK>0t399~c2%d8~ z%^9voY}STc84M1&*P(TMpWXZK@b(8E@Vf_lJZ?JlT!A07=$_o? z>+?^!Zni1gO*%o7g9iz5*N{EWC~q$>AzeiCK4I74-S-`=ap^o8^L#WVy;<`4_8doQ zl-^QQ9(j1g;OHKY{=-Awd;frYyA2{o;{Hlw{Nqq}YAe^FxjUf$=#c)ppf#Vf-7HyO zo--aC)0(I3^>$$AL-rh>|2TJP1Xn!0Nf<6pxk&`e3qhV}3fmz%dd%^|4|(sy_xNx( z=0VdZmiyTKhkWqcC8kV}PcC?Ry(XXEFwJw4Mu$f5fUt4JW1BHt!RTkOUQ&6uokJS; zf5*X%;oYkO#w)I_{>a(IO5x-6JM`JDrSu|B7rgXEl>7$H!a_HcUDtSzvz9HbN*Df9C<62P=-Jibs>@n8k?Ui~pN1 zrV*>BE)A~)8?kNgAl6e7uaNjDP79wkp9kF zJbw2Pzk9pQqkX?7mVN-zj|h+7p{L-(?B6J>&p4gtoG->CBRF4xHi}#{2@mdZ{N6wF z&WG>v_^8iGpa~pUetmMcmMUG6nIUPKl8P7^wApF+>_m#t5s0t1HVOxV9h^p&PAE8# z3#xU(i|Z|`RFSFt z(=DI=zt^04F1`{tqR!STw;_)7=n6qrR1}LG-BYz)ECL}FUL&Nj-z8e*co&+;_h|3O zboOEb-}y51{9E@%A(Thd+rfSC7O5I@@O;ARX3Dd*%L`ZHP^(4D+#sA?8oRsf-0N`I z4+$OlJ@csqJp4wV-u-tVXi`K|g5ip6bx5YG^K#;ZK>1Ca!CMSck9Ue|f~zHqt0#PR z?QrUVBMs6P6sAF{N&;Ep{iN&-fshgrC@`9Ie#y;~AxmWuSgOiWR88FW9w$c!>>cc~ z+X`q3gMYM-wVIoYl+o~GESbuTFCodGANrBJ>R28`iF}+9Z9UXCe z*y5-c68aW;JKTH!A>w*UHJWjAbHmex%c)c6)`=>|B$lx0l81^y{*W#59qw?4JKW*# zA9W7-Hp`eSOa9l1;NN_KBS4ti($MJI8AV7%FmU;>>+{Z`$3J!@$G#v|wL6g84cMy$ z|0T&8+*Dj{Ql5O0^Qo$v&rM~hN<-;5v`>5<9eVt(@9<7+Y1%puY&U|g(44(Yx!h_t z@A^FKDg3s>JNp%SSus1`@X=5(iUfhGbJo!ol*H`{qM60l2CXey11oai{0{8#BZqBSVM$|} zqBIi4p}1r1y}P>hyXNi(x4Hq500@8px~uv7ASybdP&eUrPFdGjYl!2Jc3EMq#hR4KB4aww2z{Skb2$s-JS!N_ z=A2IFR8>W%6%z+OaTH*zVUgwJWyN@&GF#-B(8F0v==p?UND%m6O5kOT!Nm#j=2eD4 z$d&1W^7w#qHev749tS5CPisdV4+y)fbT=-t_4U`ea_b_yqnM3`kFyO5FX2`N!x)U2 zGnwvl^6-?y(}Jvuz&7c(Rv4|{;>y(*JDhRU&dH*Wn49PeoXiXt zuJjn(T%(=%96g@0IDJ;O%mO}s+@P~_l}0b2_5+xnQq1ph{A8cgX~uqSsN+7tV2#zy zOI-Nc*LdxXTU=j@xY+PmKF{04t#7cNU1nqNi1j;rOrD&wcYMNpvc;@yVb&X5-fYq9 z<=lBvaQtkDoeglwCQ+x$?v*()Q>yfY@zG=6el+IZ-IBC)KtR&lrn9xe+SVm*-MYYQ zH&)qbdnEQ{IAMDj=aCHh^tM;&-9Mt~pJ0zCjOQ6gdr#RvNja_C&~Gs8B)oBaOj1nA z#&~UGm=Q&5lbuG&1@n+HhojkN|1`#V1f3DM8;l+uv$J}iqoaE~e6-KQndPBv5_j9I z@AiosUFu+lKgn23QYKHQ%%&X*d>Wk&ovl@R_aD;^_BlA2agr7s?d`FDGGoy6NQ?R- zw%@XZ7eRxSiJG?*!g(f7IC*h==*1FHsFQ0v;JVvdB8Au$bNpx@jT=7@Bn)- z=Wx9y?bpAtWQ27d;b4nap0fJ*gp2P#V1NHH_a5KpZW(gdHR!JOSiR82YdNep!DSWc z>4NF;oW&w1uc5->S)ZiWquAP@{oWys;DEp-q|qvIW5{YZVZGx>K20xWEkSpkR=Yzl ziuwA{F>W^Dc<%uxdt>hBHS^j7*Thc-tai6p-MGos-3z?2(`L0D@@b}|a$$s9zl7~{ zxOzCooIJ(mk2rdI$itJ2gW6DqJzS$pb7h;&*Kc#>)+H{j#cTuzq#qM-7V|M_y3c$* zXJLjUVViyGpfo~2h7V;x>2CLVQ&o{aI#m^kq13}QONh#+#$gT0ULYnCICquzjqUy)@w zEBEhl`edJbr#Xj3B7*;?V^MvqPA4|=fr0fRX=kpjlo7uf+(b))_B>3YW#>NZ{6j+$1~=&M;%18)`m3O zEo$e8Y(emuMwFT5yqLbj6ea1&V-6l4F-;4OGJ|QY(p=qTXLE)13#(jQAJC7WSr$~Q zBd8Yi_TM2oeV->!N)9r>1^8h?bI_%`(j)LKkw3wilDsHc5J78=BnjB)?~@;lIlObo zd;4=HWlairjUi!sm90x{u5GNbzBQ!V^l4Ov*7gdvny~leEgsx^#QTey;|ywiT->EO zyvSg4lkLG8msS!+&49RWU<_QmaEhzO9Pi)b@qNC>!2pT^}2)UM#<@Cp0-54d|$u%G(aXh;&rtT&*aIKs%{K4NgNh={ymYADl+ z*(~K@4i7)#PmD42I{|CMfUk8zuI|RXx#n@RZE5>wXA0-AzF}w#-oOu^m5 zly{zF{9;`5UJ3`S&?N0&RYC1!jBsSaY7h{c%Ju(b2a>So&_xfdKYeU$utRf zj@fu~!ZfeRu;4fF;x66QP1be>Z1)3Np?zM4(0Va`Z<9{txZ#Gxo3C^0WWi}#QB)q- zkfggtZ|x%6TSGQ`5wSHmV==7}t?U1Wi_sc?e5+)0(PH(DE3B*~Y{!Pk3oy|duKy<6 zw>J2T@(tcBhM3`HRyMk9^#ZIl5Uvo9YA$^vp=w>`=Ct5AH~6D#G*@=n9W2;rG>AJl z`1hR&M~v{cF0r}QW~CY8f;O#d-{zv<=Z|lfT*{loJJ(oSYqFDAJZmt)h+yyr7hfCj z=iU*wrwI&iu(~l|cNpS14^uaA$r>Bizr~yWkk!K(-<(%02(e+C-Y{WxGp3r&czyMC zCX0xA^(x)_D-j2DgQZ{6bZw;Kc(PS`n|FiA@?p!O16qsL%P@M#Dc!X>3x7y+_X;bk0qe2D zx`?QEfx)ihroTn9eZ*_y1>>})SdQZ-Xmp7?BYG>V>~3~giw%k6H$N$4EH)hA55K|o ztyTWaJLT%MMYw&1-bRbvUPu&n@piw?x=r}KD=Eo=p?&Rj*4LYCH5^UPU~C`Td5zU8 zeg5#zUuW~cqv&3zy*}W^T0|T;OlOj%4#P{VQu+82R*LhhGio zUVMX{XoOiiV*AYr)2w1q8SomoxI=$)hqdh?7lsjuZ=p;fpOVg}94$O1!8W6tZN9NO zr}P4d6N2`TI1KRLKf=A^@x&TDJe+Za&Jj1-#2dfI1-rqYb*9|Py9Apz7+j3F*!FPG z4l-lG#n@=b;NolC{BIK4myfx8nlsG}m;gVHX!jF*AF{(!(pf>#+QE$0cyp&kI|_*G z8fNqBT>9n;|E~FU4kkGZ9NuV?{*{ZYbR#x8cm>WlPrfIpns19P1?k26_D%(3NY&AVvJ~-=v*MsmPTh|PK`X5($ zZP6iZ?a<#?V|y6kW3dDT%@szk{XW;D5$?p}THfNq?QOO;htxsLPCR04+e|h$SznJC zBp*5lIUk}`8mkr8zTF^Pd5!Dil#|NC^{~=Z;jX) zMR?Di);AWsCP90X^{e0H4R1*Q)+x77bMo2;8xXf*TD=%o7tG%{CMzR~_GNk-Yg}JT zh~07&E7pT(m0bym06xHf9hh7XO7bhx-fbN8EUxekA@JEy-7 z-RrNjzS(5A5zz3#b~ahP{(F?Ii^P|XIh>>{O2{!_B5cs0HP~dZw!!XJk6z#i`Lthf zmdCpF+Vr+>u~R1mJEyF@IpJhclGTRVi!ni)?&>C+mq%<20y>ey_zk??ErwSTzGIH* zU9ZV|mq{-6xE##Fw(((-czgnjTOo;Wn47aaPxjvg-6V_Ywnuey&Sxx`55#U=l7&X>- zy3)^9PkUSp&ig93LTr{A+ zafNH&Y%rST?9NN_3XE}hL5!cYh@7EOY+X1xG&eeo5=-+#-IDcVyzWIhb&G4Q5VO|d@>ZL6;N!bxyIZR}?7kk-+MaM}Q8LY! z6Q~<6B1pP4+I`m6I}8#_{UblazT;kylXF7O?nHgRs$K zxISXF8_)^PMs__b6C5;%RxdL!4Q|gm#Q6$Wx0|#=_X%Z;mnF2lutn$6O}1JiygKB9 z37{zPvy`$bD5?s)CN}QT9rW4gg|z%<^PgETK3?3RvvHX%*TB1wvN0}5YfJ3~M4c9$ zQIC2prQFygYPRT)+H{(RCfD$Tb>3*TnbKivwMoMdaP3XJ?TTxy7M-+5Z_uU}`*>%8 zp`Y|fR(9E-Ni~?VeVUPD!Se}Q%TSDYK{?LoU)*BU_UU?`?uLc29yaKc1grELj;$oe znwmvvFky?Z)u%Tcvfhp81}p!545A91Xgs@L@1Q%{K2v;s~aaJ)c9M*5(`#$X; zCkZPS-iWE!XJ<8{8Gh2pM-dS@f0oxF?Ow>0+ilEx!DbHoA6qrWEKBz#9vdT%&Aw$L zGH2H`A6}-x3molX#0AgsN3D{r-HM|eW;I|9)_1H9eKtoP7dw`ozZ?~1tVg#Sa_x%2 zYe!t1!KvrCuoJR5@K~=Pi7X~AXs$0GPMOqDTZ``*T9IL;W$=yRV7%o!HVN zA~f)OiK9L9*&2FmHY^P$LSfg-#?)WvBsLe^#hYbDhi*DBE zQq?s@RWQk?+!;UMCr7{JzaIRYf875O2k9}^Sh_)*i_Hyg^{?_rYv1Nt=MqgXA#lOD zpY*V<>kocdPHJA3C5y$Jw|? z+eA@>G3HAMz3S&bRasJ`DS4KXK4KV|?GI895@~K$TM#8R;TpG0(`WdRhP2i?BgN=qe(YQWOQVc}0l_ zVL})LG(t;2MYWicr3)4Xq_xF{(K2-O9FeUtbxu_*NVA;jydW>_Svx`+XYDXl)Kx*c zSdcB!Ww2R6g$3*IJc}RL<^GlCRJC0OSV2f5v_!tanu@wADYA?_O-b|R6T0Pxwui9} z=Z6GAKoIx@XOp2m+a6RoWuB3y8ELv4L0Hx-=Weq;o(F+fK~=KIDk^+z6yrxRv1f>E zMU|(NXWKN(a*DF1c>b9BvYeB3J}&Tyq7dKn@T^(REAk1u>a+IM#ey_Tna)en%3;F< z=Z7={OK3~#Y)+9bSY#Em!eHZ=AZZYKj?mV)dbwX@XWJt!47H1Jen=zoh<%_+Df5&x z%~>o;imIl#p^x%-Y zkH$PXDk1FP$1Mi^n0C`A3~TDy5l_GOmpu9I|H*%Tx8@%w>#Tm~5BZb-@h`Z2eUmHx zjAorNUzB9EhY2FQD52pSqGuDz*7b63wYnlp7i5bCSy7S|XY*|tyA18UWe{DJC3Ok4 z3&D$N#EvAi*ea(gQnEB-J}=0tWq)V<02d|8{ivK_ii~+)lhqCv#`tkU;8`MDW9yQ- zE-12`G|kBJlDsUbst@*0>jgN^CkO+=(8seD&s59OS#{2Qo-&=Jl0+VH=+3rbg{d;CB4d#*-#4o)wej)8fTXePA074cFCcYUP%fs+{xPeV72u-f zJXxV{2yBfh^RvD?XOR^Y)pDMs=Y6-aI4{8Sm)jx?9nRY4rJp|eta5gKs7i`-L6M%F zAAa@tFb?Mj1W`obd&?4IrlKz99v z;t8DmgeUL)i2vn(`vHIX-N!s>euw7nxA>FazrcU^qankV2W7py8GhCuMrZHuJG0#1 zs*F6(n9Vb0^PJknm>?pFe3H=Oxep4;o_BSbQ}yk?ObD&s9nefrDb+;G+}*U#pE ztH{!fA}=V7gJAh^O>j0c#u%uI;_N(7kk&q>@d*P*=oyxS&TGm%Tb`?mlA^3XI8Qx4 zM>u>xz&qP6%VmGIHC|UGb-6qrEpkIzT7tkQihP`1-rRn+$husfTk@i2UOFl~;>ZyO zXJbI?8be7{RTNoHmS&Vy{Xt)NHu}a`hqYkq0)wG40kszp`+<<^QdLlw%k`a?hI!$LqUHK@=B%G7isd#bDvJ7p=k%8=WvE)`_8sQm8X!bQQZ5!0ML~|CVA)@rs(SWU&*u(wp0_-2xU%oAnOQ;IUvLMe3vh3M4&9m*dER|@T zU7owj60E}p5zhArY>lI$$TG?zCo7(Prf2(^Ti!JDd_3RB_mZmDE*EnHOYvNl`A(9naL^6Tj2~;#-lRJxs?Oo;+-ykfDaSmevO zu8L21m9++E!S@V-ZwQ<@yT*NXW;nafs%pxzA}?T3)GUf+wZRYF^1%0&*M6Z}mL&Af zu7UHsKD+L&Nvmbmo-kbgZ9t7Jm;ZlWKvDC2E^lL&?eM&1=#&@B--V$e3Jk8Qv1P@i zFf2+#5Ev5Q;Hruut;lPbTZ`{m8Xkmpc`Z_uHS??{Eg-L#@A-UvID_Yw_ZH&7EYI(} zxKhA>d-9L`uZMp{ZA$*9(ZA=9JKuN#t$4X;gqO&d$Qnc7d>S-ZOGadMPFCeiiy04R zdlYrSDC%>wcZJ)%YiuU#bb=PXeKiivUk#LMYoC`9?DvOUymSTQg@k+ir0E>r*2^!o z)+~PjSJn8796QZOrx`~lbE>Llk!9rLInl{%`6V>*v2j2n4zZrMyzx@i%oZgTjz+UX zv(+Vz8#w2_ocX(!pPFZYOJu`G7?@yCJ1;1hW9XWTcI;S2c0&Eyk~4~O#{t`!otKBhnUxQ%I+ zqY1#B?f<{w_=!I0Smyb9ch7hzqr>_a|w6Mkqcj`Q&vUAzXwK<}ge z+6$3Iu2c~*ujh<$>%_34ifEGB3X1}(zIuO7nzz{j`QeAJvr%aNyH(x%xT zG3ZCXVWiq}B#-aKbmEWe=UyGg`55OD1$=V3O0yijVw@vP{7)>IVm%-0Ez5;|+~0XY zj2Fb8szm1JWvs*41iO6Jy*Y}BHIAPxxP&w7Ua@S-fZ0OJB8AEHlK zl(8PhdIbJ*@Bj5bb<4RL!!P=TyF5bdy=7aR%eE*wNr2$)?rx2{H}3B4E+IGsm&V-* z5Fl7^3l_X_cXxLWc6;u%*O~MDhI_`R{?JuVy+do%uu6U5Z`;x%ox28#B`k&!Pfe>P(dpW;d`gjYIBly%Op?RT#K^q>m&3Vp3#_yh&r zO}p_NEi&PW_UZAQvh#s2*hE)&)0owrPugx%?Ay&=>z&=ICa(98_rNr~{%HpDk96g> z^;}_#9A$jq$4)N4+>Fbr;Z6s@c?>Do8!3x3bfLaOmL1s4CXFg zr2!75hr#ctC_^Nq?ive3zG-zo%Z3rswIzW-JfH6&u%>vE7u>dYdc5{SBUurj+-Wik z){dsm@1v6T({+YbBm)>dp!QOZwmzP}zu>+cM#N`465F6Ibp}Mm#vxwq<;Tj#wE6NWh$AgNSrUnYAsWMNEEV9KbV2XCv zGP`b7UwdDe{Z#DBy*+{_k;Dlf99dVMp+FP*lcut?kGnKAG^36}XLlADmdj)kD4w=A zIuabc=T0vi4hy>ynei0&BXg3)?w6HrXVH(XJ{<&enVv|idLY&yLc64q-pDT?#Pln~ zXP$2ysQwxhTs7A^Aj-ejy&B})k8gc=3~h7Ix1FZN{|WJlZ3jpAGK2%pG131Qyfx`B zF`f|c&23;5n;Y*=erE5f6J*2F5d>rYMyNtUr#l)PppNth!S8vLH+V6v4lpj#N>K;-;UMUAk zX*G`Uvd5xsl3bE7g!ziEJ}U@R9L~c=2xQCe8&_Os9aC8bupd0Wkz+Ejn>d-A>aGrd zyD-&t7U|=&fn39&*Myrnes{Dns2Z);#7!lo3onUKExHE8VItf8Cq_v z@H<=4ZtdVOE8g=L;{pRE{{y@2s+-gl#(d^Tu2=v9l#Kj5H89bYf;ca4kYTG zo(4r^G)=49HZGNUnLE$SZA{8BWRC+>>~}Nssh;YOZ*wqOw)AZrlG}#kYLMS8YK11y zr#eZie%(rNF(0K`r6w}kTuw#F1<#~^K%zGFCZd>4-OZStUmZ zii8Mw+_a}Y`qMD;!R6mR%1}4CscvHa_@NMgfj=kB&+h8(6zZnRTBfe~c~P;k+A{A= zI|44rM(WD3o8KcJP~>HSN4}CL6;$b-Hr3VWJOq!O@8TkDk$y?_zZuVGbR02mJ&%kT zy*OCu?SCAjzUF%19iWGki{W8-;V93^0fn8c*qpDEt^ewogiaYBd>|_5dQl?#TO7N5A+iY89)|=cP!5Qyv_M z7dGOqf(2ki1y$3a4|K>HqI*c*n~5~SeI!6tSHZEN8v<*l@dsB|Xj9Q$U#~Tq?#2<$ zFD%pN4_6_gH=a@LvuVvMRk$V{f3ezOG#j}jOB%t<_v)ZdfQ}?5y{FF5(Vwbv3|VLs42Xp8hS}ZwoUV5c&9LoW`E_^uNx5;-(5WkQFqvkkCPQf+s(%s3 z^EgD(-vz}C#>bl)ZR+Q@%$2nAQ{kh?`WPme=Fji*$EK2xL1u1E9=t)i)?-dlt!WgO4St%)d_TFvCD!4ujqY|^$7j-PJ|#%6DBKJK5kz? z%)*7bqw|(y<&+-&6e-G4gFcmPuzS|3^#Ag%q~xw<@o#xaBRYHL&Wjmhq?VWc1sz@e zB0)h&1mfGzGJ9}2qz|>ON^K2x040UZ0JM;VdrR!a2fZ;I*$Tx~Tg=8Rs@Tv&hdefO zPR1w*GyMm^6$(M=k?krI`_~9k68ybTPxgGJrRMuLe1CZus$;=;rvb7RGh+s^fKUTy zgMcwDkW%T0$s}zw#n=?}e^o8TwSdguyJ*DJhMInD98uMUtWCqV)I|fL%Ou<$rjIY4kVOkzkhbxu)OSl4u>hSmBJzFjp&J+}GKOs8w|BwY8J<(kea=?EJZ)VFF448Q1V~Ew*Wcqzcv83mO#kyFBs>7G5Xi~|GykzzvB=#(l6)N&!Ov_ zC5P_W?5@N8f1#-zYnrL3Qm)c>irL$n?lLJ|%Wz>=cYKv4?Mt3Ypl5IGS>L zRuGR?6?@m?>ZLL0F8`B$0kVvNrLhM2%8lBvbTll`ytb_c?VerI3szPH{qv0|p!kTEDNjgXfv5Ry2Uuw-r6QU+Np(lcWT0<=u z`GCc~^aXqwl?@nn1#Gbi?9+)@8jNP7)7z#(23~plYwagoKV45V6c`vgR&E}BylP2Oce&ky?lm@W86TYxL1f*}5WL3klcMjFTY z80po{OtZKBAK?py3jnkAu7|8D4wi?kEKpN>c2;9{;^1;y!rhCxx1Q*xz`LCZ)|zO2 zN!i37RxCZ&0z_CP-#*_dXXBq!yNDWl`!8$YJx2$*V1HneU;v-w$N~nYbW*Y~7L zyLM?yo!vd(5+IE$XZy9MPIem)=_j!?+`^Af zv#-IgVn_1*x8dAPEf83zF#Zbpj`bIS2ys_0s&c@%@_=*KA&sgUgk}l{93-rHAGjQ= zTah|nV3D_R*ChWrMIvh`gqbJG${Cez=uvT%y{x~)lje8!3(T|;Sq!rNvh6AUD57DCj3hL;btYk=x0@+|oWo3}f!^+Y! z^@I?;H!Z#%_M_j?m`~~)*DUocB2F@9UVC7A#Kp$J5$E3bznmQ-=FjaN?g$L*1eWu^ z9Q+w`etDrKB?YuTAjXl3Y_7|W=Vlh$oo{rJ<6zIyK;zEn64&{dN;andX zP&c-2yW0eLv)nxGGFs3}+@l&*(^qh_D<33~E`j4ly4uv5!sOYpe2Xgwn>OMko~C;g zAyz$h4zlgqkC9C3U{pGsh?kdxc<=Yuj{}(nw*;T(7t#;boRY=wsy1#Z09EY2M27mN~$ zs*@!C{NxE`K4HP_tKQOGRYED;!_U5z{b_0MVqJnA?>L|0&)wgtcGgGLsBqzY1q_xP zTy6cASOce?*PJhwT8pWONjW;7QJx>pT`o8eRQI=haHBp6oK`k38N%BR=8ZGw54o-3 z9K{TUJn(s6Fq7nuw+e&+%|mLsMr=4YkM;WWSiL%(8Uq65tzV~3Z|R-8UPFB!lz9zB zu{jT&jZOBZQHqp8QYX<`ao2SI=GiBPzx75A!~}fDyQ#lwd$DqJpWTrmuiZH_Z6Z7C#_zOmTdxwG zo~Io6?P4d>q%O~~pjg+^%OXuGUfC5jWa+_C!SP=oj9ZhnfcS0|{Y?^Y52pH65M3 zTbVyr|1x0P2+Zyzs zm%v24A~tm1h}8VqhdVH76f(bl>$=>$c#nR+K39L+5E@B7#BoCG{9>Q(#Awn;8gNx0 zVW?JG$`nU(d70j|iF+YzI=>OL0BfKhZ}Vw?4yC;uYg#&m_TqA;>mBNSVxzOaNL@ZH zW-;I<+pn#y+g>#}+!M!Zjt}Sqe!NolcCn5R3~1<4?HI~-*5ePsoA6xx^&OH4dGs7O z690Z=_!dtpi;0f=-&}wkw}T%auRNRr&bZ(G+oBfiH}&DVVkR`SHJ(x&ULrdl*0+q` zj!G5Ua5uG*5sf0kt!(?;-f%$YVZH}NfcxUj^F6S zlpy(p71oV<+{1&{%O(2Y=Nj*xEwAqHKfvV^kHVT_h6;36%6$A@qrK7``z2y+dV8A| z1A^6bPkxT?RkPH4X2UO=@Po>btZ=r?hSl#g*|oy-;OneEd#f-LwR=BOo{j*6zVS=G zpGSLPs8$BOW^!DO)cxpB+DlkCqK5!Cz0yR&nU|ZcqDM+LoH#k72z6?N`r}dMp?2bc zz~Q2Yi)&Dzcee4%Hk9)Q-{w_|g@Mj7*&xV{pB58RNFR#7)9a5yDzZD&vT2JmC6u?l zk9|{EQa-%Lw|59n>l^R=0={O`?FVvj58{Ig|Cl`=zUOy5_|={MI=|0Lo)F?098b1p z@pREis(3@HRPvs$WCX5I~s$sjtG_Bqw z#n~Q)tm}}7Ql|fGpdCrXp3R9jEp%4fCbZdL&2hu=p@0mFSQ9X*Ni3lb(7-nY2NtH( zZlqy_Dq?)*+gRCpieUA2XWwTB)RdEFmyzvFxS(&dX3QFI8n=IMeol(d&%Ze}uqvri zTN{O+=?bmINbvo>mGE{X%o^|%bJh#hw)0h--hy)cO)yi_;-53eqq+!*LB8<;W! zgqPJ)MWBxH$(Exv-#b>L5r+eFNRlKC0GtvqsQEtN%W6#_(nxhnCR=ho9bc+XBj{8S z>_y8~krkC;Y%~%rYEv-EM2D%_sQjeUMql9+*`kR$(t;3vlaVE5CeCDe}1ctPMe#4@f+B@7t7ZoeDH=C0D_Q z2XGSebQL&tR8zC|Sg9gfimWg|R`q3{zGV?``$L*UKa4;tM!TdSZ9Qsc49nwKHvQmQ z0x!=Lq9Qlx+QIyeZv%+mk@z#b3g_(rO<1GHdc^VdqpzXbX6XIGO7dIGIV6TZGsZXZM{o)RsBh8eR59 zJ=STxIrv}dM0+PJIAE|Il>H5PE-JLLGcO*Z%PrUJ0FJ)BrWPXsyU1ogbhk%dXAIMX ztQ)w`IN0WfR%-sY4W;(xdK+V=}drEw<1%@&qA zs(P~qx(e#(=sRhQb}CUIU^vBDu&}{3U5UoaFLzu9PV78C@iw4)%BjbO$Zbkl)*W$1 z&+_JK<7x*oz}tJ0?)vwK6Fti$O>E$T>NI+J(d0Xne8P|(eydx$nQ5-)3A*QoEP7H@ zWi}Qfe`8Wmr$*b~blzPdhyD&vfh=2}0vClv46L4Ry~*%=TX+vab>j3kK>y5{PIc%m z9M17{0>dghN9WWYX0-90h;}v3oqb7^Yd9lZsb4K%Qy%`qHBq9qMJCzM8>oa$nBx@oW8TUNQA?>;+53Uh@&k=Dn<7s8m-|HOs6xGO<*6zviSIJD z$cfl!1a;8AUSPTz=fyN9v0#8n!flYTl^ErP-|YB*{VY21W8oZPuVcDtI0uisk4VF8!RqtoFPOkA39=QaKDZFpbO=yvET!B|Py3x1I0(46d41 zncGH_836+vC4a1}qabZy5QnF1c*@q__45_#dM?%rSgK)MysT3&Q&uz0qQ*>c=$9`8 z2m+heeGU>3*gEY;88QO*AXg(yAmSHb)z~qn94pSx6I6OIq$nawzyd?b45V;{ls0nM z2-=uJm7KB~=9v*nBstx$Xi&sxFbmb!eo45W;MjA8Xi~yj2Me782qq3E3>-wrM}?Wu zSjdM6qIG+i7HocgKCJvX%|OVh2%{M!C?829w0a{H-V2~r8aH4m`oc|@%E&nl(`eH7 z8wF1Q-IM{WqJn^ZtCl=(7cn-5O<)O1LfE$s$^3|KyNfw0&6^ArG8DYNIDp6zjO9iS zg^;CNqGCI9+F3agGY<8OZOuTA#538&MI3! z+hd-ey5XetEnwLp80^2g1?bTJPtpC4xwhsIZCjmgK6vjq8yQLp_P zIQ1S$g9kB*#&RqMGR%!f5`-~*}M=1NQ!h=y#Zc^y?MZTMG9bZ#-A|c%z;pb&V630}X zDtkILDS{TJI8Xfie3x^J7>p1}=b{vzmPXZsp*4cN;LsFns$}8urSVk5CU+|}m~x&U zT-k!B&<<&i_j&oXT>>xSp$j<85YH>kLOt`8gU@ShuzJEf0E5ltr(?YDSO2t|E7rK7 z0ec6*7Uk!C8fFpck*?LQ7|smz`)2&(QUfKcM-E((AQd_pAN+ca($-%C%lrHu@~Yx~ANqtTE!S!S>Jq{#xdEeKUIRgd)d9mld@1!LLaV zH$Y==k_d)o*O(Q@u4odkS~9Wk7?-PvfsB&~dZFH6Cy6~YyvJ80wsMk03(Pvd89rdp zgDP75wrW$~H1%h+8EcoZjp;gvscz^MFf)`gGJ;-|89`hEedK}RDhsPC4Up3*=HDA2 z$R@!5RkPw#e>;=N_t!L?-N=d{t{WO&No8F$z3V*D8Qct&_zEix-;G0u` z=gJ$2k@cj=6v1uwAJ_@i%ku)1B~`wIAQ6 z!$Zp3@6P4YBmOfFkhmB{&f+6SK3Q@UcJlBaN4w_pS|cr;8A__a;r0zn!Ay38$I;ge zx>wFDRajX_qZX`Tk1R%Fy)&Gua$xyj#6xuLP&*dn8J_*h3}?hQv$&t? z|21?pcN?d$X9ht%8?ov{kGx2b=B6QDskP{;jY#@#2q1U`& zdUx-?9{4V7*ZS4U#%5dZa=0W$$@n_^{Ta7?>BuclTc$#j#DDU;QF3kkGtz^P0%dBe z#{R&>n>ESH1MG#YX0#=t?dQ?GZ0plg;_FkJ-%E?Vab)A03}+c(s>a1l0m+|~H?|zh z`Y3_LR&+n$Bylj~l<@ptlpVU3w~T+`TQ4&V?Bv({s_pK$j9Hm+%*$0?I%3I?<}0)K zHpfl>v#Y&Q`+ySbgQzt4qhXsmULn za=#z2+n}|B(<69(ZLFFy8f1-RL>3mayy=alQOSNiSh6?F`Z+Gg}mshgIl zF5`Lb4er0CtCg2Jasc=4IgiJ@I%-Iq6~JQmkk|;3?R890DG#Fth~U^Gw%FO)5!%*d zheYULyEkxqBi`yoT7}Ci=+!XI?FiL6l)<{;4Q?txd=DfPoy@KlfCvNB?u=kG{ zDLZPgI&8$mA*f0UOq=+7#XX&@|vv`dezSJz$?5EP>$+-aVv_ zU^Tw+drw-hHphwjJwoj^C(doBvCN~}bwf!a)H$Lo$Dc({X^pvhg+B52N-6yE#4)~_ zV1$QB1~$*Wb%-!=Q)+%%l- z-c+aK4_@O;&j%jN>!YgX^M2-*Iu&`2419eO7Sg9^T3+>f_kW4aUL9xEU3!Ko6H0wWj!S~IEEwz76ky%sUS}dTiHhe<{)rWpm;aCh z!6cCo+|O0c=;?LR@!+lO>{42#!i{z7?02dL-ltlpZzC1i(SE-`G|ETfeV`O{V`p#g zMz{Wwzje=7z3>*)Od=%Cwcu46z)TXjg&&?G=<$2BL>#5|r)f^g;iYP3*?TUvJhO=Gz)CfKEv%sPGj&dlNEp4Ol_nB- zMs_wLz1(-}e&Qqpi`f8gmkG4&lbhGQht*rbE74N23v8W(83Dpg0BnZ|PAtxV?T5%p$=kMx$kpy||@oi@aOsbqQ{v zb?o#evk}@JJ`La@9j>RNuM};7$BO!+%H41 z3K?EDbn3c!SZ%Am>|TRCD-WV#o=N@ZX|Ayzt38eB36?!^t3((yYl#+%4|LC&rNlN4 zym+aOlIa`8jBph+$5YH0Ks?sNmuG&tG^Q~mc9j2X`rs3XI9!5-g{*j*@bjWIIO#L? z5cLR{9c)~L=l4d-hn7suGP6#7P$$8EY?|+hkA#ba1E@71V+{>Wg7?V`E}5j?=&Soo z5FVUo9XSf={>OWR-kYdQrU_3K(Ta&dL$6ChagF9VUmEDucYY#tkE+DWPs)Mdekudu zfh5^5MTUJUwM1y_4zkH;SQ!Q$MgnY!r^?sB#h_F&5K+<+49H>F)h&7#iIq$EmE2JdHGh47ePhCWNdlX*qjj^&o{4`zLjemxW{ zo8|b}0%S^NX3pU!dE0rXb)GKZ@h$oYYs^14Xd=n7VWD8?q{<>bbP%01J0qPHF=Dq| z8BjM(C5$XyBB>wRbJcQoqJ>OqhVb#HL|P2oDj0}-}G9Cd^8J7M({RxNfuOfyyb9s zLua7s-!hzdXw%{|Mqt<}sgZx}-e+9-jr*PNc+p247kv7_qh02)j5X|-5=+rv5?4Ye z(@GYPW6>Qt)ekQ%&~|!GRO9=3cxW#pj~IZ;p++8LX5zyKlIdaNq$2C67Yiurx2uv- zd@ua#4p|6;kHU4R2fr62F^>9WX+yy3;xeY_JOu;{^<^f2xEH*Ngl9sgHD;Q z1{XZDa^iIYIKMsv>B!iKJzYI!vdAD4qlID1Vc6uDEpKJ9DL$PU%$YCBJ~T>@l~b$L z1=5nG8zi?q-TcK}-zxE^pI?@O8pUwCk2Yc7CMkZWzc2GD)^;xx3>J^K0Ssg!;m2FR z&cn(9ERJa;ZR5y3swIJGG7Q)R*LF;t)6YW8@0I@Kd_M5X{4GtBu1GajvRfM38dtcN za?J0lwrfRCHDM+=*qz6G$q{*DFQ#N+k*JIo7CIdRzO6OPr!&}O7B7jAChAFFvZxH*_Dy`&I>m;2(m0x5SxOrf;1o|@U-L4DtjR@eLUu=OCcX3S8Xw2M>#41 zVojB2%@Pyg&Txp^;I3sGeJ!Pf)J+Wnc#Ib^=peWF|UpJL3kX^{(+fyf|6#QmUbn zHPN9^VShsXdgz;A&x9zJ*{CS3rsAfIGa~yAf$3`<5?|t2cnNrK&k`j6%8J?^$Z!fZ%YeLR zQ+9;WS5MM2>M(3D#(I$O_szKDB#z7ah<6{k&Nt&P0*neNGzbM|;HZ+#WWVXd>Mn0v zNfa0EfvNfmKpZ51W5a2ArXC=Cl^sXb@S?PT39kF;A{CR2&KFS{c8tKdlUxiVgR_}+ zqCD5xTu6_wOO7b2bhI+jBu!4%0a;P@;csa*7MXKugq%T1rBsO#9Qc4E!|5Aj1Cijh z59968d}qnXRtozp<30VcR2W42wb2n#x>`D|sz6mPZl=^=sNfV%w@WEn1y*I=#g)CW zZa`bD6A6sbDlqPkFk_DJPo3^zgg^xiekYld16YX(?G%u?V-GhKd1L1 zGk7z$?@?MKIT{2@<{63>?0(dV?+})18?0H3!z#3W5`3OR6#ioo5mGz$TX`skLaO_> zhTE(l`6VXN0y*C4Ospe@+P-%p8CO}mW`PCW#@UxIdC5!WJxrE`_U%K&DNIxf98`+p z81%aPo3SY1ZJ11|j8a%5q&c%sRp4|d*3z%aSS1ETJ~5x8`VirEa%61aqEnD^wa9yK zBbdem`-kqiy8>p+yX-ld)O!qoTu2(S6sVNfoHREyEg!pOoi>>{5q`&35!etESL2{&cT zwsvr7{T`2#wr0(bEuhOJ!-J_1sS*=lF>}O*0q`@x-+qZ^hp2Sg*&5Kszs3TAyoc;> zj}pOxMWjBFbktzz=rqb=fGwNV)C!ds@_%yy%`Uxn zgfGNfoDAVdKLwYfxFYd2)d0vK_z5QDHDNLHFA?Bu_KVCE8^d%nT6<@njbD@ug%WZ| zD|AU=K`O(n1$H7|OOP~wmjwg65wu2pT~#no3~fvfkRs(Lc)IQ zdv{w2L#tNjZ%(8X)?f+{@BMyDFFNA05YBbHm;AhUMY$TbaT5@JSKW_x)5H^8L>|2!jQJ>H^yDNdpCx0(ugc; zkuuER7MP;tqFFHfMEVgOdZx%jY{Zfxn7*?cIjH)!$?fS~{yDz6uh_`AW8WJ?>90p0 z5R2Z1wU{B_atI}K+O&fCEI8d~bu4*n^?~-Qpdxt#I*25(ty^%&#+qKvYA1}l!&|`_ zDxmV4mO}X%M%5icaT4e{^~QaUM0QREfyPcuIu)DqXZuL0EfKXWa7ux*=<82gyoJQI zd`hwhGJb3RX|FUHJUuD2J}NjqBz?n2^GVdW6kz$ zf`UQu(ZnUHVLiTzTEormXVFh5b^tfg?~VGa=# zP3{KzG)uJVh-sP2Sp6+jFZ~nV-uzp0L!ltsU|e}PpajofvS?EH-ex-!6L>jR7GcNM zoyAT%Sk$OnhSLJ;J0gKsF}XYPCeN-iGq!a1Q#G?trAaj61Fn;$<>YK@gML_G*7V2?yA~4P&5I3QayGkCUy!kKPOb z+(ZuptWm~Ck)y8&n=;9S!X^QyCUxn>x-ZO(U!p-B!~ykdmJe>@s~~s|Y~2|)%S^!- za8kx-w%G%99LL9rG`7U0)%TbXlrI#7Wsm%m^1B+4oqaJjD*P z$(F|c4tA{66VBX!0{3qWx(G&))C1&vl74CmbE1jG2c@HMiFQu1=i!};E+y#T6eQZa z7NqB#KaN>GxhP{3!Z*H2uy0{*srStC90R>XmgwqqdRQQ4hT;tIP%;c7_76j>RqC%i^Y2jWe&Y_j zmgq~+Hz}%tA61QOnh4NWJM~mI%B;SkMS>|~Dg4IBSaIm}Jmk23FwiHS?C_Bo@%|=Z zsR2g8C^Do{#9wgJWg35h{tzj1 zVfAMr!oTG&(fnrANN0nQ$wBLe%hV2M4X`*pT69m2C3Fn$mWT!--!MW~3r0tVK$RUO zY6*Nl^KjxWFbB5GCq!|Y2)g61u=Ijx%w5Gg40c|hEw|ghipyY(W*DBj-|tg9uituq zIYc-8mgRw9nVj{rAR3zQE~*!8@B|<{g0JLGVd{D@#h~v=Xp9CQ@sS~czCYoG{P`BuP@+2us+(Cp%srpn^ntF2>70i=` zeAG`#S0)2gIdypbkIQ_Cb^RT4AMSH8VhzNGKLp#R4{gb#7++jpRX{!WLKkGw`M`{* z(c7~}r--E`g_0+_4_VZjOlNO0(8$_{h_?>MCW(u^8~k`We(ETfrjZmsy>*>1a%sN_ zS2oIXWtWB1z=XWY&=Ve(htd>BQc+wA2Dc7c5Nxl|?++t1K6}!qD)8$mabIxoL(0x7 zgoFdQ_B?nbf52#T1Z&Rxu)&B+Uq+&Ib^V!QW!Y+JBs5sCB}oA2x16#NR~g zhYGPx^foB&P__1`lAv#*Yhi!w&T+tF%lF6}-3ON_`9DIXYK3T4Ni{a~$oY0xapjM= z{_cyDaT8Sq$Y~x<`$SRy8JV0J&`EG ziP>RPWMfnuF}jn$c?sXkcWx7$FQf21o9b}YPQO9GQm%@hkbs_gZQRHGC^9fs0^$#=RJ#sYU;o@WpVaW5CVR%GrGKG|5p4PLALydjt9;XQ zQehlQd4;Jx*oFpl@|gLR?C{du`2=ZCUc+^J?J`p2`J-i`3?n9uH&Lu|mbhJy zVDnresm|W@gGXfF1=KHV*zADOX=1HjsZvYzxGg{9N#imfBplA>8=Y&t zS_&{KxRX4)fB5!+a6g?sID{WY+Qsf4^EcWhW+i=-BDuF1cCg!#;$H77-XNqPKxx)K zhCU(i-X;(8w_1~c#L&0<>m|N8M9Dgt;4o?^h7=|i%99t6Zu(-r81?NsgfAHsc%49n zLmqU>H9kKHNNtC}2~M+7Od^G9_SGGy*sG+fQ)~>n`@OjI6$*5tTMo#^`ZSK}H_Ok; zeS4WFq3VieP{b%HLIU#8VL@UiAEMO;YvvanG!}oz=)BUe@1b`jW5)d=N#_PjA=jI! zR_I`9fIDDnIjqm6d2h%mCw@bQc`qy@1xKVX*94SG0`yl;bykfT{rZKusYlpxKu9a`0A`R{Yn-K~4Hb*+E=RM3fpzE^ zw1um}&br}peM&e>t!{$K`|-N}KhB`v@c#r%31xu8ym{`Mx$KbxeSwr^NL?}c;gG~g z0!R{5KVjhc{tKB*dDO4#$mL^4l69vmuL?d;Glf%b|j?DYo9`h`!g4 zRw1yo^($BiCdbB!b;?`?pFwKAArTMS-hR!Bz-Bh;V4axx)-6=)#KjoEhdT3XycI|(!J?i%x(PUWPetMtbWtj*dCv0 z_PXAobx18J;)PCU#V18HBDV7L!ZVSEv;|Ub*Xc!EaG8PQ%E#dMS5lTfC1Jvg5m!Nq zW<>q59x%KU%m1)KVCmr2tbDPXgPTv}Y8%ng9lCfl<7uFji&Q~Y)5)+sVMCVgF6`rnuj!Wxeedm7V zp|#b>vLm68H=Wmdf>*e8^U3>nCT`+H`1c(|zv|d*4LVwSIgI2X{-`-ANXp0vOl7Kv zQ~TZ5yGGawoWrTVsvnX-GnB&oHA{rnaL!`fq!`}J?J6ZTF7VIhFJ+1;R*`0ejaLbG zzMhXm$tVx=inpuOjioWlkenN7qz@sj%NFLAVIJy5<$kqqBc75h1$d70&RzYzs3h+8 z75!uEc<1<_B4hW4{L8DTL9+z%Rs5v}<-?|{Z5-1iGN5_*ny_=;!_rFrju*aLLC|!& zysu*jtta8CU3SXu*VTLg@8ZtsoV`*9}U?;U4lp(x_Wqq4U@`BRohdv~#;yh9U`96J2`2BQ~@4D8~N2`7+PN&G@ULaEp3TilWM@{FnR-lYsUK*Il-ukA;2a z?Qj`?U{w!DQjtNV?}nLftv!J+*BR*o5;%zpC7MU8K7D7TT@z<_*$!GOtNL8J)qlJ| z7sm2HDw3xcyy~vrA(OswD(hWBuvF}vFRG63ZGyf>ayB-+w`&X!@8)j%g!4U4Owm`b z<-zRPjAzftD#i2mXG(^W;Oh$i?FHQ1oKTr+mDTo<<9OlC?y6@&J;Q?l%kFZY?!_bX zK`8ejs!76(G3N8G8=Q-F&-6H!ANx>SkHNYRp*M*WOX9%U<{v-P7t0xSHgsZ+5oD_i zu@-8haXB00?H~!=gH&6gx@dqa3I>IQGmSF~P;(vVqwp$g!8MUuL4h>>oA#ak$0Ay}D~K8$I%$Z539TKo!SuhIAA za0ur@XvyN{oO#(XcR)kxsS3U;&dAU$lpQ1p z7Xkg2Q=45E`Rz#`n9~{aK@ES-;9z`bkS;oA5-B)7t|f*N=LeQRvyS(H;rUu%&(0k4 zc@f>n(n3Sv*YeHfXl_LHuD)nY+RvQ?N){F)VI;Kt6kU#-C@uvnhw;AjirQF7O8pzH;up%1kU6nOe?TR?uG* z6zQN2DGC4f_el8!CBiTd4YpTxfFYaP=9^qBqw2qj#-gX5`bfQP8lI9``?!DJIzCPu z7ZxenMKHLuyDlrNzErboM8$N^=s!f@U-L>TuliGKG^-K8ba7z#`1Ir9{xxiUUEie0 z+JfN+Q5I;C!eQdiViUTtum{aM-P7rBcFV1vu2Cc@UuH;izQWrZtZ%6G$T5$SQPiW+a#x0C*v6vlcDdO3~;v4v4|Bq6Na#aE%Bvi zRGpvx)%M)0$6In2)5KVO>2dU=6y6$h?(dnv3?+i*3qbt_&Aw2MViSs(`1xc!F?{%b z$H##3>7YT!b4QS*YIW-y!-k6?b@3P0Xc6nPji7ZuuFdNe_VG^x;9WMU($K3+vly{O zoeGEJF|@WUf(+IK%zap4}o2?^0=XLBl< z_fvoMSk};9+q${)|F`}+TW(O+%sa)^ci=D;`18$^0gCYnA5tUg*w$TG)Z5q*IADyZ z^1fU5cg-ukL=UWzwft?t`7izzAk%b5IoCoT?CK0;KtT?iDa}cEO6i_%@y$Biy(%YF3tT&U%{DXIZW!=B`~ut+hAD=j|-b z*Vpus2q4vo|CsdK?I(N;WgResg>#2OEf3ma7J)tNRf&=J7`^NjKV{7$!}#UusP#fF zsxktx<6fw-N64=OK&QaRpxR}I$+ePE-GmprqltW+61gw_jxVvk|FQng^Niv(nPOEB zT12s#?c00b^S;A9BvXmUeE`YIMb6LtYg5x0^>l%5NQyCl?(c$|jknw9?`=a*Z};+R zO*NxQ@0e@Hk376^#Ly~=v}vC*~((OPMk&PGVM~=^7 zIj=5RIjd#R_*jgX1OI1l_p=q~RcX&l-oW7_GRZGV!>%7`ukQwlrXO%R|-{lEaH0!}s?=y~mKbLv9cMtj~oDH*Ntb zOuGuoN{cU0-{?CA1@(-&xkAV-yZ%{g&-%+W-&)J@6t6ixh6vQyy7)fy%Qn50);~g} zA{cGeqcAD19{*Z7`e8q&cx5$u!qn!-iK>G_ffH#{K?V^n>9W z`nCFF0#kmzxmvA!<6ObpEi#K{;9X`iONZRfWyYWOJO1bWRfpFnJG?T~!QCfbFqM}_ z@3Z|s;B>)az$>i>1^)PPs2IAa%V2!|FqUP57uBxOlb;8aTid5ZwQ0YdzVULbZX8yv||OsX%5`_RJLVL>LZ0&Q+;ZTNsHp%^_bD( za6KsYABYiwT|yA9HVfRK?r~&sUh5bJ^}><;%01WFOONr@X$9YRPp+{G$233gogZtd zQ0L)S+Cst&HL@J^^*|NQPi}Z+7`Ej5s?>fVUIUOzxqu`ys*h@ z=T>bDkJWV8lQatbU+;a!jccn=(>kfY@6HPG36LPsmmD?DSH;iU9j|EXLPoj#F@%iLoWM&-RRaoli82S6QQ{{Qm|2f5{ zWL%h`1?OTYRDBDuli&97%EqBtA+;f_7xjmW4B(7skh=&Hc~Lddd`%9^(ZxRJr-!*;{ z5_x;DTPfM$tu@~YeG6QT*~8TxrZ^j&yi%`I1uHi!QZQV5eJ&q=u70;8`odg4LUDe| zvA%I8ykNx_I|B-E;q4y-xZ~02ekd2uPtRM@w(&h@`QCPe(9x=~X=NGB_ma42lw=?0 zRLw&bEv^8h58qSR3h-o}Jav7v(2R}B;N|e|^cL|t#jZeYwm8e{&GS z-nz)Y1p#ycp@=~#X;y?#u+=i^6noUxV!aq6$?Qsh+oiU_(~#n8jIo1DMO!B_1?25Q zT8BZr`-P_)XkNC5ijQ&|!ZnST(d~10zxBVVdYwYP%Wf#1Km`}xS#_=<{V29TW>7Ku z;e8=bU2rf*8)Pk7Y0%?gzZOnhZ(BmpDw&P2P~B%|Q;y^!2jWs23<2t85toyor7Kd5 zLsX_Db@&YCi+5PQvFi|H=T5*&R{p+6xLFMPs)IgUZtkfGwB1Y#V->dWD{FdLMip2# zuh7juK{gQTS1PyQ%Zak%V;&{XGK#D(3U%}k)#DSRY$evRPRMPPZ9JX6plyc>4xKhA zg!pB-vlZEm%gPKX%A|&5vSCZrP?iIijcxUt+CJYz;T?`LYwB<5Vlz1e#p_@*pP*?u zwksN~=LwFNS{frjMCyWvxR?y8nt?_;ff=*@zvky)Z!W2)2B%m(WY4|)+#FmmJ z=3mD7wt)<|SMQH7ssY=Th;^ZYD- zcnuAf#UhB^-4fPzz;}MO)4pt0?1q14*&3>$A*wD;{QxYs#_y zu4c`U>-()%KNn9m6x980Jv>Lt{YVze_CV<^be18^;^s)Xzm>(u)8Lu38k$n!&ePsu_Q zc8S#yCx{`W>V)7}neqM#?*+V17U1xZ?P3a7k1yNvW=Wep2vME=Y}^q@w6B6UdRZAs zc!F(sb9(HG#-_gWVnpB3R1Nzo+-;iB>s_%F_3#_cp!e^Sc_d@aB1j`#fS|bi1g3}M z*gmsP$`L{Wd_9el`M>rZkp58;mOl-?a0609$n-XoYbzT|`pZB}nrBa$_)#h#RW`;x z2X_nBNA!D+meNSLh*atO@0$*7{f9{i^WR*L_j47FlC2exGc*u10?@T9|6Cm2kh3UatZV6TcYbte(I;uW z+@(YBNW~1|iUfrq#f0yXpmu{*;chsxQVD(r7s7p;{prqhugN>kI^ci7^G1Rc=*vj280$ zeMU2{R3?+Cmiplu$2!%T!W$`vGLdwIF;1y(ZIYcni(=vy|KTsI z^F?8xputY5hKvo*GAq@KTNjRc4KO&hoBB%cP#qJ`ehaQIl58vz$PO*gmGLKb1tKmo z(`zu8=K<*PLss&5B?Je;!T`~PVDo;nVOA1Is*>)uK+Q=Wi;#wE@He_#Q~8ySdpf+I zD0#9Q4GZ>~O*8JLUXVX+J1{aCel!${6duL?2Bv4{l~v6pV$rPs|5^a!OP#Jp>415$ zy2esI!9N)S^JaBk2Qj8sbHayTciG~2XbQIH z+3v-PYaxQ?J1aq@0t~$WGHu&CX=!#mj-8=0`xenH? zJ=z;ktvrd0D)}skiWoT&s|^aY2i(i=)iw{|ZCl5k)^}pwcuf7Ol^VAu>rhsFqLmK! zi+XnEzk%jI?^n#|lrA?hGsI9^731Ld2zbVuC&rYxRUaApNN;A~#v!?|VYbICJ8aJG zH8G3QqC_101PN0qv;?)Rpp>MfYIT>Jx-}wdHSF^;gM5D#Z$Ah-L=7L}>D|DyY^65K z%8mvwq7j84O)t>gp(zh+p)B_wWuHwG!nX5LPOtJHO8wfX!B;yVaBfmZ_+gk~} z77@By+S5F1YH=9v&K2UbG<0$d`#wJmzQblB`*y>eRe`XY#N3`=9TZi^tfTaGwbs@P zj{g%CY7ytYf;z59=3eXnCKHZ<|2*Wv+aTi$nx!hHS-9o_>3J4wOV`__4Gtz;h)s|m z!9&+Suv(GT5^4r%q!trRKmQKw#Gmu;?jD7!uKDxxR4<`|g{@`v-Fl76F#KY#YdMHH zq6kIFK9wMZnq^q~;yGP${5j$>RupBxZi6L*l7b8EE|m=ZVcIOTOkxRv*m_(HC=l3hf_zX_*HXM>|pM%)xQdEFUWSI$mPFs&16~eD8J~>G&)Y& zk=K5s;7X#x;jxZm2V^s#if7af@d%(L*{K_zO|`Ttm8%zTGWy6<$03@=_WxdM3nBpR zugNs0r9m39m>H*#E=&fyO2#iveNWfd_i{zdz>PduN%bXJ!SvZq}k>Q@DK6K0 z>yimZHPB$`ENzydW5O?{4aIt@efsx9&u77*cIjx!md}=7|GFjRpt8!v*E~4nYzoo9 zN8eOLS@HRElbeJuN>zi{w5#n>;YML%H=0kY9Q=TLY|tJwPCd01w{;8a0TlYM6_mhg zsROVm8`3k6%HZd?m^(VMcPA^0?#Aa+P?`7(3I11kb#N~{8;Z4Q1_!f~SY%>|1b;_0 zL~7;4%R(SCn?joAH2Yys7!@~l=wu3!6z+Gm1^nX5RuaWq{4A^;Y;Xy4 zpctvz6bPX_fR4GRj>iZLxk&ZX;V7opYkr5BJL*?e)T}v|3SjGNa3ztKR~d-m&wFB? zsVoavJeOeTXO5bO)L5LzJ0$-MwbZuZJ-YVNJalUWS;&zbzgu^+^@?x#paqAEkY#t> z9C+p3MF>jAK`UY%>+g*WQUL6mpp|sjgUQ+`sM5!D_NVy$Y#mED!&SLtr024x*eA4bQ;TpYdG$UH~=v!7|~sI zz;UKJi%QJ~6qmDXsaVMq{}5&dk^xXa7||Rif4UxBio4=`{37Cp<5A=%~PepMZgCdiYhiq-SL?PVP>1H z7R@rTtsPfLAG$S$)bsMba8BbAsKueB4L$37!uKl0+G%M2FoInaGAzVw{3^uR-3&PB zihVc$vkqe=2ipHJX`Cx*`i#f^kt487tK`p*^B?_yZvhc|xKNy_A;Bw)$m(~-NkuQq zW6Zj?CRIj-Eq=kXT1jp>uFk{K4g4jJs|Cx#=IVakak50**4b-cF}jk zsv2`&>e#EM?Pp8A96rBGk}%=yRyi~1hPBC#76v28bxfFLxiZDHP_9oeG)lK;wNFkl zFR$A^IYre&lSP!%txh@gWRWI?q7LPX^;jE7zoNPkKh3+A&XGMwMAb#8ZI5yfy1**< zbQ}!W`K0ev6^jT)*Q!a^J^*PouifaBCO>`9)GyxBXqu8eH=3kP>xPg8Q0D4klz0dz zz1D45{!UbG(=yx|I#if z0RL@9a-*!0#|7j=6s`npanL}{3U(A3QInSLcCDDAQKyY5CZMIM9=d34%L6#IMqI}ptYgr*K{ z0a4FdFZ$n2^_RIh%~ek6Vl+>UFkXXYXJCYqsPX-r&-*_F>%Nd_Y0VRcGF|@GMiYMAv3km(dX44D#Q3@ky`*6YH^YvXSlmb6gn&yhE4JUY-+7F+e&|o} zYB|1~payU+4wFXWy&835a3O9sn-g2?mQRaK;LzwD;ie@cNU0%A#tKU}U3DYCP}0kR z*)KTSQPm9>J5Wut0B7QlSYbJ4VuNltw>PWT7(a3WKAfp`Y?5ZcE&g%_Oo24EKq8$P z@}Yk+aq@sEzl&&K6}`eJX3Zn&hSQp7NM;&#f@#ES@^I(oPv+or?SnL_N;LN?5bFNo zj{!_GrNECkO1}F%o#|kt9c}SZwZPxdCsE5lBh5|-zWm=lrVG}$vNE=_~}QX zqGi2!J-&)6oW6g8OygNtwWy|ZCUaEh@15nQWC=>FhQK;s)*TZR&)2W!j9@Ae|Ci8% zKVm=iMYWbaM9&x43E+O?CLfuJKt|G^;|viDZ6E-Naq9wS3HSDFB(hpUGS|n6o%U0n z*o}1fZQsXFG)mS;X_L5;5^>{UQ>n7?IJ@w{o@to0zb<``r6P0n3J=R57>GeTRDqw3 z3MEHtOZA=7?G)m$83eAHDCe+g@EQgYGf{<5!s%U$rja5S!-gydEqFJM_rsuV!l4XY z`OWOh553(s63vZjvx@0$Je9^?GDg>nSwY}pDu!vSzy$NLr4#9=vLxHyLsqIGUV3Ai zVo7brZ`2I(-1rGWHpy|;U*pVSW#IMCo+qrvD}>RKW%_pg9VSKz2jeOCdO6&E#% zb+4{e-fs_CG)+Nv_lj_BN3|QWvfGCRGnlm;tm*43pQtmQ+#XN|I>0)5If6q-)Y!hm=1YQq1e1XkgLZpXBsLL9X$>p59w{m z?F2ruaKB|=)lV4j!V9dlb$)oS{4v^j**yYG?XH={ ze2FB3VO=34bR1b;TrAuO2dO>xiP6)!N4y<*_@fwjNQ@spS3=Gm!plDz5C3EQI(&H( z7BCffraf<-KKe=fkb`1}?kjUOS@?-eq7S5Duur@r%7KYg-=zG~uma7qXs9;8x;R2Y zCip}LP#I-eYCWw-9ECp6`|@Mycg7QYm70JIN8Ur?O8?{Q_lw z+dcAP{wO1zYY+~}#+T^LPx0AWMz#Dl){UDA?6cr%863j0`OR5|D{OybFGo%FaVp+Y zrHfx-lgU1uY=Lq9(Pd1W>T9Ws%M1L}FS*j#=qNR0rtC@@b-}S<^XbuHWYOq2=kR)kBFPC#2r5>wJ5YtsbXILbO)K6T*<-RgU>=KF^A>3!s zk6+SV#i6e8lF>tA5Xmta4#eVO)aV}z&FaN!4%7Ayke6HimHSEkl_i^VIYXafMh;V? z0v6Q<38d`ww;e{AVF8e{cS<|Ix|HV*lHmA6zY1b-oll~A(L0}QRPPz!uWF&h`R*DY z;79cIu*}v$9Secqb2e9a*x6h$>gjmtxDzs{*i0XzaRHa5UbJFAEvdkw!dz9TA$fz7 zx&}p;ydh#bG9xTc8;k*0!oc(hF=~5?9X{vd4Ir682t5r8cQ-G?Fl`@?AtTKv11dy~ z#eRn3##m*AhsZmQ%O8FmuRT|~#x|F{43Rt_zie*BCBy?0S(H%MUvL3rZQ)9$6{7O1! z2bC6k|4Nk0KVOdmsK>+T|Ngrb-i&;Lt&colh4v8D(o%uV<7ugo34$VtOj98;5m$Y$ zJ`PxL1@KBFi%tE9w_psvx)>PX&FOBAzBtMG|;EW-l6ou1ssA)iQ zWyCKP67bepp*OArT1c$gW?%9Yrtb=s@x)-wZ1CPshvDChFoP(L!K*E%t^=Qn6y)Uw zXEKtbt=KVj+{v)}{bSKivh3AC<(ra{BQ$v_+-}gOuUDy9e z!jzGxbO`sL$CGHr@OSUHTVOw`!=**>{hlJpq^8^5jkVS6zL*?yR#sKygHT;4*{9`^ zECIYxh5#9^J;=N-iA+j8+5QLPS8Vkk^c`H?q(ZHL_0Z3>Y!QCsT@m8oZf$lJr3{iJ zhqXV(d#n9yq^3~Wlz-cqv>z;dE~hYd(uRnvaH11XW$d9lvh@sU`yK`dgFgv5GDv}0y%FY?i{K$cFgw7^9JK*p$gbVDRZf{dzQ+s=bN!bI?IMg`CP+fv6w zu}vNP%8Kf2T$(2Pg~BM-ck z^!8Bf(9nll$G2ZGe%>i>XEC2psl_e+5-#_v;wR69iJ7p%9~$5$?I_ZWDJPc?BUhe5 z-060+^gp1YhZCEji9-w$b_E1;uwcLFG2K8O`B0}zSbE8PZ7FZ7<=Rer4gI}*Y^lCOjm z#JJZ-v+RPkXong+7t{CYH|Ajt8Mg6kigdKv`{bBd7L68xd1Ilk3{23* z+qz2Gy+NC?2c`5J?{=_A|JiU5TQF}dl_?jWZXv61Twi97liEV(%4X)4X1vf07$g+1 zx2DylYWm_x)o7_|t0N$Xpm{Cb^-9iR_@Ni3>AuYK{aAI^Za*M>Hf?WdZgn6ex02;e&IM#t(AKXjqk80hU zId|SCQgci*rBD>5+IRa@Q)aP@vEF0~%l&bWWd7`47;v`C_^{e9Y8KlK)N5(?NfpbK z)=9ZKSH-n{A3T=|6dQ1*XTVr^Ill9Gz94^l4E04X5~$IvWNT1cfOyn3gdMkbTy}dn zvAbx+H7Hv9c9S@gspS#|qX+Yumcd>lrH>MUsA;3TJqGH8qQAtgRZV$px0>~@fstRu z(^GVVBjF~P=mlxU`bk-7i{*d}4vkU9jvMM-H)P+kyBo4es-B{-e&X(4G{+V9H>~{l zRY<-(A*~7-SFAN0E^e^g1Rxh~Lwf}K-w{WEJzV-t_E5-uuBrU*M+bZw__tVH!@+@; z`haA{d2idmiM^+~Vr_-&FEv5$ep@hk`O2F6w%+;NQsh8JS^&lz<4?51eytPido8We zV$>Roj<(tOo$R-w_{nunY?@L(4C^F;!nqeV)fYUBMn>8!G8>9N>XHSF_|9^S%e+#72rWbWkGL<5#m z)eM2TMnM~S_1YWrxykn@WnI>p^J&@-e_)m2W)--R*XR8rtw7nDiOrOzajMK9%E}}7 z5bx9v&|-D@1eoeWOGvid@;+yiHd|N(JFQBY{X+W7lk;?QSlJiynw#5)Z{^U zwoG~UDPqcASx}xsg5!7fieOT`zCFI?nj@-i^Jf#zH12FgLJVmIdK{}-BIqL6HWB9_ z(_YZxc7r`oDJx;V<+_D$$vsW)ip86icDQ`wvsTH>(oDsBQGBkPV;uSjApvVGMk!gm z19zLbd7Yz=O$;cz$&@h+K`-9me@I_!#fTSsJ>kbIYt*iE?V2=%ZqPVhf_t9&qWr7| z{cL=SpKI5_E^LV=`h_;{DdB>q^V-&|eA`6-9++PNpBUefjFdyX+!Ev!bD~+u3vHg< z6IIuTsOc2BL8=iKJKA_(L)f_?t(0sdoJmDrkrIco4w!~KaspTXK0hiyt*mBngV`kV zrM=A)+#5%}wMmVHUrdqp2hyA#E8K6Jx|{B1$I1)`!YerhKNkLIW}csn64q7@1zW4r z*D}=(CBiO>g`g?W7?VCkvNBvbhH(8Qpq8)C)-g$o{cE_i7Reih{2lVyDAMp zf?AmOP-LJ2!BZVW6`f#^efNe<$}+|3&b0E~B1P{%3ADwvf_XhlyTU%X>7rBCY6w@3 ze_cvnfmkD#=%>pYIa#83)!Jj;P9bh!+IvpC_Soj3_xIE&*|-F zTbm&|I>rKzdPE*2I#1p!7d-@U%DdqICfF85{_Eo#9jT_LR%{5zpi&goZCLzE6!al+W*S zSM&RKO`W@#p-*@B1X&5>3PWb87x$nFkKe@-nUTy9dklx3SbNgb4xdcFFqT$x$t0M9 z#Imh>_n;~Z@8J8l3pNvnHqiqm`*k!1SsVm~LK_fOZZT$vo*X8*=$B|t-@-rL3GW#D zJ;mmSn^H0J^7HHXp7E-xLkryQ=o3Cg#B*}m0bCpKRD2PVnb#^Cz*3?(6 zpLOlc<9K72`vp-IGyQ-Mp9(hElL)7hlMlUwm$KfT#>wmA8c+|N=IyNbVf)!$P;Y~o zqOeuk7gIMtiKbfu$0}BTFE1+>_XL7S79$G@YhoN!Aymx{sS=Ecs8AUXVar}`dK>!4 zVfou0Ww~CJ0G`ARJcPe3>BX*>d_ct|K#ze``~jVuoHOdzx0u{TGrVBSl*y0dJHV2y z3oX%i_{r;e4Z1Wso=GSC)Om}kn+I6d(lB#8pTXBP^3Va;zJnyLq!RHiE%g1tF^&>3 z-fbxVL7|-o_Mx%}#yY$*#F)uvi0hlVx4Vu-BrW+S9=RPg96ppJ4>%i9Qs=?Z!;G?Iu|G3TT4rm zo}SeBj%9~;+H|ur0M|lgTv|0ZH@o?y4s5P1+B4L)cj7rAmSrP9Ss2OP1uzJBzV_ zS>p7j?~~YP-BZYsH90;^ru=ti@PCH~%_je?cThv>2E!wQX0|!9sL%gW|4c8Na!WU; zvcFP-;?J|B*e>zaF%ep-yXT1ur>*WpK8Q;lY<~&Y2xgLr0Lw6J?^_1VM{>eHv?}A0 zPf{og>~j!?gd`#lyW8*+Yczgf=hw2SsB}I!e$TJO{QOj0?BnyJpLK{TW(X40#3(Z= z|2PBg3c|U3MZbcgKLsL`v%F`)lkDr=hN>5R!q`_H$h3s#ftpApmFb(dolceO7&E)P z7NF!Sy!SZm_1GTbjAdio5;g@`U`m0_!xX=&95^2&mA5`F@l_h?Zn{WMO)w}(bE#Qa z`oK#Y5>MMg@b7W5(H z3FCe{hN5OUhliWIrFLlPC|#q3yt^8w>_|h$wFBT8KdQpP< z>Dm`+8&HWsncB(1NA5leaj7S1(&vNs#nR@x0v1|mcz-svzkpaiD=Kf4RM{k{>_yKI z&qfSL89Lz;^#{|Zj{>|sdYx-WawmTu>3!s;%gA;*f3QAY|J$=#+9zrx?N&9tys$N! ztXN+YqAyTrdBROg$Bly@DSSsxI(b~72Gd$hK`tyKkU~-2VT$d0Du&Px_-RzNM><`^ zeudJ1w{Nm$&m#TcKNHpdOp{5_lHZzRHE$lqMS;aHa zj`d6VR^MkeYr0{(I~gF$o*Id2DL`w4)6p(k*e=yrbpGvqZHq}u*g}j{fgqdu_cG)q zNDJQVJhX1=t>0gjxbso`>P0GcAu(hd-G0IkVPrT+* z`@lCzH7+c#Icu&A&Iq1e$t-G};u^I|wyO1}-dXoRyaZ@w6< zK9aXJ)zv?0n$8>2B^beVKK=egv&GKNKBwW0z}s<(R@OcHNNDdK*iRaVC4iBnYp$*Q zZgK;DXheRRFQINy%4*7gnqoqXWQdl>K_Xwl3RebG0%JeDmKkO-mMXL&I7b^m{TFEAe(JLH zLm*wthR7tv6>%kcjs0FeXOB&o5b{C8wmw7o z8}J>3zWvi1c&6m)&+j(};puGSu+q?uh zc%+ZurXtF@xU5yazu)WFcBb^uoy*m(=Dxvfbi4g)Xnz+}ROG_;L9Jh*I0fkG13q=% zUo5X9C6Pp;Y~9*A50id-22am(O&8G4mY19|7p)f^5NnJIEIli`GrX2CwuI>3IW1`_ ze39Ug9=s0#Svb2l_Y4eB72#fA$D4WeU*F>>kqhk*bO3hy;Bn^n0enXIN4j5&}u*a)Nv>VXGin)NRMW%XU z`q$x5DSA&gNOS0cQ8KDX!cz3tyID~|Lr|4`PmOIlw)bTG#osGQ-8zo=wqfi`+$jOe zh6U?w{+$uOCfcZ~I}&Vu8ri{-M9(;$ao5YtzJoWCB$Q_%Z7mIrdGU(RoLMOLOsRNC zs>HF#qV{vS4RYnowz~wh^U|59O3lJ8+t58GIXARMLo;_XLCe?O2!%LxC3cyS@q-*b z0aE*oc_AR)uv7Q(b!-=`cCIBN2o?*9qABMk3(YRyvUD9j*k)PJYq2>cj=GIL&c zrSXUBACzPro1w#Pd;~~GX3Qz{C=ip=xLmtI{{zb5oa%UMzkT!%i}O4?!C<8?&>5Z5PZ|cy-)nYa*7Q>%{Ud zyy9S!==a?N{03gxvLmXKN$ad<&o|#=I+8*#Y-NayvBn1!AYk3!swL#v^QNvp5(1Y= z?CZ${4?ntLbEg@`7f@jtTd?`_e7JHr`-yAVQ|1k*U@qd5WGQC6pgS1c+;Rz2j7x{68fqTcV|` zo<(n8<{yRirGe~-0Cv6z2qn{WM$FZ^6lHb6uX2H7L6ViL+l3#E`{`_`lGgE~fM||; zpvPEM@wfo;I8lMWhiwmpRdzVhlZ$~Rg~(`KDV$MVLVyOnvf0ONFtR5zE^Q+U=TI@c z(i$X`g{&^YnNK(#zLiOelpa-QXVVa4UZUf;M3To!m7(g=b^S;*PT{6<^F=6U!DjRC!h+O`#*R)AI1Gtt6T0Ya5hZ`H#c;!W++cJb!3!s7~T6IrUpr@(uD&JSZJGHl;EU!?P;Da-`W zT8b_o=30?Auwr#3>T+5W6a1^i4*FUTmYj<+H)9@F9B&r6aghny%?H4gbLd~ng%cJN zFBnZp4V>Msan+d~Gz5y98cLYo?_9^Hj6SM-kqR5qM=y@i(p6yBa3aW#@8PH72H zS7zcxGkriD$kP+Ca-20MMvxGd6FGK$c|-~g*~Zxk*q-W6d9N_$aj__$O&TpggtA*0 zsp&Yiv|?fD7F=Y$QSMr|t?Wkg+9b0oQGVmJOj38m({h^1ijpe5XVC7~4K<$Stg^TH zHT9M`bV1|yvK^K(pq&}zP)(8=PRmA@jw@4285B__gNnynvyX%y!%hTzOY8w|7-tBV zLtx3*dlO?v0= zA{EtFLd4O524Ago{H*i{qA{ifGgCV}`1&pL27Q1Z+IVK5Baa=CMkuzxFuo{~m}n}# zUTxIsH}x7eA59aVahGH@WDRjq9*qKA724SAv>0>&Klb~&&zoKjQF3h|1SyIB-)7}~ zX4RE7Xfak5Z%&o?SAeedDZgE`T+eiNx}*F|6t#xqLDz@u|>}iE%gpqQG=TRJ%#XFrMuUa3(x*E%KCgl`Ll6;{`d@A}NT| zPX{p?0UK@DyG!$z$)jyI9)w?)=XE6MxmxIHIYL{{i+1Sa10NCP z@p&%6v2nD)DI#ak*zARR_H$XZ(0MMMN@xrbh0xZ} z)qJauZ{OksTzNFV_Mluia|#{W|F+{k3IEZ6G^(swcocfd-6kuUd^BN1z?-FG!lu{# zs`h%SVhbN!wp;tHzWKz#b-hD!-2S(GlrFw=+_k6hm#qZY2S<&#My(tz!I7v{V9)H> z6cGcf%@=KKw9MlmTnIMneDvT#^Lcq-R z1he7sx6lv=mD#UvLQ!2EzNB=6iE0S{V3xhlcps~Ljh3)8!azP_+g^j^N2yIJn0`2I zwUUL4g-3_HVBC`@BMm*owus7RKtuo=cF_*l#Aj?;I%W`xWQn8aGJJD2K-vM=+&KuM z93A?Ou69XHBYPz|&INT4His)f(BL|zZ&M1+(#)s@?CxmFar=yJdv;l?hfbQ4$@eB+ zgl*3LoKFb*x>O()TGr9;HELZ|q{c%xeB2TA=+HqyMOS7^V@T0c#})K-L~6TM{LwmK zTtqV*f!slW4hBNL+~(sz|1c)SWcgb>S=9>3^L_BlBpXWocT;acL|I~kgdE>)B40o9 zMu6|BimD5&#q@dvUqP{|*l}W+twslokGAH^J<>z^^p!L3(HV1f5Zff%7I#gm#yH8Z zxU;z<1}x^4o5m_1Qo=`$B0`vUIdixKMW9|9PGm%hQ-xFCYn-e5AGIROno~#4&gkji zlTq2PNClprxTObvygN)c<)k7Lv33G%8$+yHL3j!R3>Z=d?XA0l$zX1hzRIcZa|^3w z(?@fh6=1LYzAmFIQQ+p5CTyoA)<9$jLUzz`OixHL|X+9zup~@H=z!}CF_Zm&9!estCivB zWhFL@S4DcJFW83Ul z9oy<89jjyen``ZJ_Bm(2-*wfG`Z1qp)-!5U)u@5{R$B})_EvCo`?)=f{YLYJ)mEk_ z9CW7Aid?~)zI7bN@EitjWTee`7bf3qZ+OctR<~j!C2qXHkfi`K@5bU9d9tRqjxyC2 zsAliX8zNlq8sm=Lp*PTiBuCrb%FT!sX?#r65~oIqP3^-0R9@Tk=ywr^!!_PH z+x!=@$piRSO{5+Ey#^8%9=|Dc^qV9}T@9ovX9+fbbN(L(N4`TeP0B7Xk+Nh+uC!aW zBv<=d2Ybe1AAiu}Kekwx<86s`@+KsI4ye}D4CyMW$}EbO*BZM!DAz7(IjL7vh#zb` z;y~ZnilBEF1z2A`l?FNfo==7t{0*RvkMnw>`~4$Vn6~Gd(My2|daj|a0>H2?It?wu z5xN)AcbaVQzMCAK2FRe|-cR=R?Z4bfPoow!r!-Z-ZfptL?93(Nf{3to8W45jii6oE$Z^l4n8hK5xKGZkqt#A%~3 z2e)I(kBg0+?w)W;`2+DWQX&^_zU`aeFeHk^E`$c1&2m)@3^+lW^)!H%nNLH9}wl!klY;h5+4ZtEGoel?F$KL{yGn?&;=j1Z(Ds zdv=(eGQcP+M2`B9|KpD;74ERc|RU3g5tAF3`qA80wc~M2BhV&~^L$+9Hw6eJ1E&I7m{le9X z-Oq^Mjud-HF^Zx&I5N(dp0m|qXOpLE*q*5&Mvw3d!H=&xjJ-S8r$$AawQ7NMc9**3 z-96Sg0|139*iB|lzqqaUCG~Yvs&ciE{UdQ`flbI zHh?}Y)V4P`Pu-*oTpW=uRV}#MspG+*Dp=}0u~)UKoxNrtfKA_W>dqqzjWpxq(9|Ws zFe;T& z_Wcfe`W@8QD4WBH8TW}G7CCLm`bl6+?)@_jLBDlABCyouXK*z^633wz+FXw}(9>(= zd&$*EJ(`&`MwqFz3@v3ppbk5FqO9NqC$#~sbMt5eo3shi=%*Yk*j*s7Zw69QQNc>s z{|h6(qAVwVej~rTS+NyVdRqS2E~#X<fU!Qf8O%M`>xSob|ij!JC) zYaN9NG`LyT+m zm9BolVXOwXYOgh)0T47O>5Ouf5*w`LnJh9;LrE?uLLUI&M^+IG0Nv5eb8K~%vXJ8o zt&EKG7#rm{M!(k>f6|^qM^&Tw#>#$j^`=R|yec^R=x#A)bXAp#TCsbB$;0}`XFd+D z7Ern7F&-qn&fjRL{~fdG{#W$4Iyrg2z*Y0=lm8K|NS`Lrns+vlMRvzI(+%6UnP4OG?M>1d0lcT*KEJuUyG_+*jvUo@on7xdua!`(?N= zoRVK%t7VOa?_(N%$;I{4DSO==UJ#L0on1I9zJY zfa2Kwu$}IzRE^F1nXmI@SJn|LxllIF0zPS#r_r~6T_U3Xf=`YbZ1<6i9681>;0we@1VNrHJumO8N z#MrL$=~uT$@3~A$6m;S5Gx3{`1RAP-tzlbp+Y6l6$#@Y#Kk!{SiPnjOlADh_b4AXL zg#)!v)1ny%Gdr90R1pr2<{Dy`<~Lj7LZ!g#=F1;}x~Gnx2gjA{D2idau(XX7q;g-B zoK4|xbU)QfRJV!v`DJeSp#Jb^ojR+Dto*23brGREY6kq;h+m=4c&+x8xp;yf#ua4> zgGgfeBIoQ3Erq>NW2C3%C}pBDc`T0D=z)UkA;Wb<9P zbSy;JFtg<5UY!_^46;y6$sSOWx+yU-)JhaLOL#nRwi4UB)GeD9+7;xJ7loTQX@O3i zPTk{yIWBG-`7tgBlyK^XcnD#tKsOsT8+pM;E4k*Kt3P`|1C{8SjmcIV6jLyM%@m3V z2!zQpu^QEqHYT~v!BE7%&gLr9x3+|&0#CG7HgKi}DO<3O6EO-4XsEg!=4{hf;-V@l zEo*{T?G4q#)5q|TX;|vyqZQ`!=Q---V4h%WD>(FG?wJ*mn~KZpfB*Bfz2CTu}|Wz0fe@rs9Q>#zR1|vl^wwy z-O}utJMju|;a{f0$>ytygvIM^epI^=RPy~1>KyF)MC7h%Yb#$cN^HI>xZXkgaW$-( z(?xu}XDWiG05=sUMa`*-jI=SRTUoyr5n03ZT~9Y)pjCGQ!TY=7z=n|>^LFjKs?6aa zdVJh84?zghA(=E9CIwn_5PzUbwGp@5>oXX-m!!_kCp_E-nYSj840wV}aN7FgW0tU5 z8|dd6;={IQoy8W7+3-|gYoMP&(-OL+UA&}ocmk)_Sk!ID8)GE{+b_9v*(ysZsq8c4ag`C-i5{7mmSM8Uy`H z-gL~6!wDux{Yh8YFj#L~pN{Cygx_E_ZwHbi(=mrwmxI%t)DzYmx}tI65C_g?x*o`i zx6zk#3IzoAkDPxFI!Yw_%`;`%r^wi(Y;NkgUtBnTxCh6!L^53mP(%E0%Ma(jEI-p( z4nbxYFwJyII~^8X#&BxaSjW4aU`3CtiYY&*z9Bu%Jj1KXB^!2jiN49S{$YMVW>1-r z7&d|zh-C^gu0T~VTX6rdFupAdb!3dB;C(t#yd()VOx9)N_i(#JV~Npgrx-1UzJAz5 zFpsh_8#YFWG_4(PvmGxVcxc-Y7u}76)prE+H9>=!W*+dgtfvCjpM;DHkDU(x92O13 zB(F{*hz^oWlAC^UR1Bu*SFU+D;s+9JfSLfWeA#7@|C}NNT*GAa&}XbE$RPtRzQb zaRX85Pc-RP2IBdsKukQ2A!n9~N-R(kPCqh_mO12HZzZ{$@@e(*^hm1CkWe2*6MjsU zd=w?c!aS}rs$k;taryIv&8CQ_3!@8-Sz?m4a}mB3Ht)J&?mxT z3h)o>O(&0x7p(u_F{yVHsjAP6#oU_Zdh$}H(D62dh|2ifF@b$Kzqu=R#o9 z>jL6arJH*uC%|V`cCW)g3s|q2Ad#)}fSiFHtuP>x@U0l$D3N^{L6}?}b3>=kV8i4v z*u(4M?d6w2WuqpSlbaH>(s2Q=`uzsyq<6|)Sf9{)L!KOFLlvpCY?KOVQQUU(mCvU? z-7fJSTLHaq)tr3D;Ic&AF6a+Ec>U|~;Kh`6UXA83<29`U;Yo}FtEQvX?}!NHUyxB7 zyMd=mpMHUvdko$OIejVt8SP@YkRhMUr9G-8Ikop~Irg9SV=Qx$lJjUF2h?U6NCtIE z*HkS()auq)%6(sX#w_5?*r+nZLRd}V(5Rx63lALTe);yEy}bBdZWDjXMAi_%&b$D} z*WlEKf1YqUO`WVPQHH@u&nROdZZ%qR$P?-SXR*|#`)s-D>2S58g2G6#c~h-st=F0| zsY=R8H}iOsIrt}*Q0|Vivd?*f=Vco(sHB1VM{rj8a75V`2259Hq&PrAZRy5&vkSu% zAj|eLwW_(Ygtl-#oU2|R3q=%GL@Tch#1TzKQG+*PEvWcbP{N!iF7c*JDt-FPkke4- z0?tm>Vy{luz|h9SLGQjtI_oZ?EIoE#1j#)CP9uv!Y&EsvD&rc z_xDtclqO7I!gp!qUyrGdLta`eBYtC1q7I6M=a#PC#B0+oBXOh5qBR4_EFp4br1Gw% zGSw}j77t_>T>3QgApT|aut;-2 zx&1Gr(=L*m_W`^29Sv|r#ZOPn<_8 zFGAbf19pPm9@*6`vg*v6q>XZ4cY-*qYA*^bFx3Q1@L)n_0i%C{j!k?2to7Zmb8vAU zRk>Q}7ndzVC9_e=ne=n-G?1{`+nBDd+b2@u2Ey5gZ+4+N@)3cnHPd*!zhNkTi6W<- z%uiVBk6T!>JoD>43-@>$4PXDYSvEg|9mDnt@n`6Ng3Ce{{~!o%1ql}*!04UOh~z(} z>g#M_!$eZ4xt1GAV;ZPRM>S_&2g27_qaUzhFPcrS?gjQtw>Jlo0jNN|ZfrZNe?~qnrp5-UCgZ_29^6i=xN?SY#ijB;Q0_l;~Q^Z0UR-N!+hEMGo3Rk6Tx}JNjBG zRa8NQL+?~kFcR_B%=;TXO?4f9BDNvRdrQE0MuXD)Ksuan3x>Nbt)P#q0CLc&Y9w~a zJ>Ak0?B`5QB1Pb$UQugs<2(E2%P5|<3&w2Ut>!A9R`DsmmJ0A2Le#MdVCnczpFqW9 zrRJY_6N&8te#JQG{6LQKrdG=;+q(L;evS2FBOAOikI9BasllJe=W{Qk0fQZUsxjY1 z);9asdGn4zRd>*dOxNraFj~_wWq^u+5{1Koa#W?cdYUd} zk0MeWSJ#0z`N?*(_S)mv?m1AJDYNR_!DZ&EoE}n?fz1QU%IF89b(dd@4EC$ek4nEV zWr+k4eY&r4+Z>LWHdS`jjcu!j&BhENA$mn`w6St=KRjn<7{3RilT1O63WSt&~wO$$+ur!w3*yo$X|qO2tZkEIRf!pn~{57~k>_0FybP!fvJT5P=po%kD3 zj7Da7CW^W&=rs-vt}94IAg{~jI;p2!+>MViO}@%H+@nR?6$x9iqO2XPk;Y~LQ&9Dh zbLZ?)Yy}u>Rzc_63)W4VnLo6p=j_ooy9c;6zw4;;f*I0md0iQx!26<&9>HhS6-;EA zfcVWb+5sABIR})HqNo{Nq+0Q=R%wsMVTI*FqssJ**EoFm=w@{!RG%|WdZ8%K0D)ct ztORz)!4k5r&I6PBlu|$wouaLO^^7!DR8#kJS==x%g84n z#cnIF#5w7zki00k$2&TfSJO(o9j=8z<;(rND-ts7~_GF8EU*!eNk7xXgaT1W_ewS#GD_v zDRRYjzZuwru7gAlEYY+)JFnkr0=2cJi5y84hR%=ANE($zYD0Afmt*-1CR!a=eJ95D6^F4T}%>LnB)InXH zQuFubD#5hT;(I8wy=X{WwyIi{YpJ!`%w~&{#o*xRiQU>L>{>JF(aAR|Psb>tK^GDC zKh_c4$?-ytwW%pNj&)!ac5ym!<2|(_EG9j3OSijVVQOaf8@C=WwkbcV2s&6$;$6RG z3-&OLJ(0)_zRHr$`sR|9&*-LB7^I_<-_kJHXP493t$J>=7w)$uLkHU|vlAoa>`R&}<@+YSqgtmYqb$R|bn->n{;+BTt?4B4_ zjSEL=&5>8wvkn{E1@*OqW#7QZenjgdd#KvhX$kx5FLt5l*{lOzz-0R5e&NOnHQq`! zYjrwID6@vidxmW~t~{5|9<5_%C6N}#$=P=I0oTix^sOs0q}OX|7xoS9#}QgN z%J0xh!HM1E{G6%Rl8FKk9aN(rS4H>!-00ui=K~wsHQkl>I@f9ou;UFAPK&mHFfw`+ z;6)YOsaX`qKfFyrI9Ve)=|Ir|q_pOYwEYibx?&W~mOJhmrR=*f(>%n-{L@GObZPGO z7T`Vgqy3#dL;FloFfQpR=;QA7V<&*`!-pw}%!xH%cP=n&0mRPt%gN(0&pZA@>Emm~ zIl3v?@D)t1qQNB_e@Sl4rFwYBJ0tP)r>0wioP$eV-b1ioiF%j%b^b^-u7G_ABz!N4 z*2$5-Q^6_FIQdccymiSnNuoU4+3KHNtC~t_Myxs|`Exs@@if9I;aeJAl=(K`R4v2J zR=!Wez5%~mfi;|gG#yq~Paa?1m`yFjT*a#O%33h4#F&<{Vj?ihlf&5CZPXQqeH5nr zB=3j3?sVZ5{3e{)Y#GoKJIP+K(R8+SBFe0`xO$5-jkM(1=w>iROy3lctJm_a zZ8+otbL1kzHKBpMq)hOvw3EE4s`@Z+ggVqPLc;}W^ zt%JV}F&l|4hJr-QP7sj1*OGLSZn6ScSzv;XP{2SMO_PM4beud6oB;1up^+6j92zHT z)+niTvtu#*If-E=5=jZ$+*G47YUy{fks!| zV&M|>Ctn#fFiOPsq>EBQ06H2Ig%q+lJ1ZgX`qdQ?^45|P_LI-jHrH|yh2lX`lHhFJ zG>Z)Wge*c=Sd;aFZ@7Z`0H=dkZ{}S(jA~6d>+)`hNP1bjd@-n4`@s?sv!t^UPLC=nBYmy~4Oc$@C<$YfEgGSXjY~R##|(r3$)$>-Sw4H#NVnSm<|8OzKu=aOH$Bd>*uTu=3RwUPY4K7HPcC$<}x4CmV;WxbLTr%gkvt z2rsekaquPVOf{k!ZiD#sEH zcSx(8xb;Y1kH)SJHRx?tL>(=TJhqAZih4Czs1p1&@bx~o*HpX1uX*3zny19 zS})vL_sogRrz;Dp4Tn` zR9@dpBL;K6mU)TJ@hoTam?h!dG-peg)%wGT+LlF|~c0Ub4S#u?<>m#ZNMVnut< zX6s^t>jr~(g3Ao!V6;W|c(ip~#F)Z~P#@%W0OWs(f zCn2Vfz>l|`j|2X0#qy+)Wlzek z!>!1A{%wu?N5_}szdR9Qd9v85tTR~14P0rR2na#^6cZ-bY}AY@a#k}7HryrhqP4TB zlY<{RI9O)V>5^H~FZ?L_*T=WCxe7TLI03r#h{6bwOzI5clHy+DldJ!lgOA}&KTc`q zB6$Cg4xfmag^GrzBm2MD2ePp}B-sg`2CX;pMfO>;E($bT!L#~4AOEPGk+xopq{$<5k(eN zke}#h3qQeaL+bzg<^SqT#b^HOKj7%tini6uM}B9U@`4!F$*C53@)6)d;>G6fGYz0S z*^=0&(>fzB9&K-ewit!J%+d~`VqL0;sC**>U~eOCnR)X3$D4(`nSSqcZ}lccm1vrP z)6%kkeF=SxXWjwFE`nR|cU`9ty+Wx4$ZJI^*y-p@zt?)zckG~6mg=6{gK#o?{0g@M zsRej}NpE$E>f%zkm(GO4PXD|4@9FlAJ9P(A^REGug6kB=9Y~cgZXXop<~A9^kOEjoMEh?Kj*U zRM8i1qtLQun`1EPM>Xiatb~T7q{v6Igmv>a`W)$q|NqYZzcdUDE+he(Ef2TJOIH90 z*TM>ms{yBmuax-{~zUyZCJ%-E(X5KW8D!Vc$B1XUc#S9eU1$`RMY8{}}1f zw(N%=*QpPBc|zgX*PAgi#-2D|IIsWIkE!_Vf4%}ugfyfVSMV)wT(TK4t^h(^9rzw& zPZTIjX65NNRUye=lRa}wVDP60jp=CrKP2(`i5Be`TNiO6~s41{tD|3oflxw@&8&~f6unTWc4;!NktH(2Wru2atG?B^;gWk z*MhJ=v*~nR9K216;LqHi{DIL!q4z-wV#-I@^l1FwTKISWu=aqhlkiXn4q8SEa@&dU z>_)NL;0Ab>)n3tkQSvrPk!U}&L!wP`g<2iIg8KX@LKZ-^YV)5XVGbU@bk1@h3lYvw zP)UVgPHLoe|3&dI5w-YYzqutwe)MeTzy27}RtUkbSrFQVo|L;3)=6C;I zLYf#Nzv1H%W)OL^G~-u~h&+yyC-!V_<9^{>bbQ~cAcxA(&n~p=Ej)pzo`qoSQ7D5< zR=Vi$`VYCp-DLkWAW0P|UVlswpSaE>g7H z5VXy@GCB)Ft+3Eth%5J5eAs9*V1dGGs^1G0gA2TXPkteP@dqJuO6rWnhWAk`e)#*vJ`=oNUP>T>nMmisXkdZV$~DiO{~CGgKe9zl6q$q1a1Ejg z&XXOlRaHe_C*S78Hwh3ML>9>Bj=6u#)#=&iilyeS$eDr9xaa`&IKs*Xa;9=*`QKrq`NfyijZcnURAM)i9bmhu1;5{Zy=^icX!)8Ak!S+$djc~oQ;P zsF$zD-$&Dh@SZw%#UGQZLBsO$h||3|Cr)YrPbj1R%sm`*&SjYodT`yi)Be{jPQl-W zYcM{*G^HmYOP_a)>~oqOn2x)QQQJvlqQ*&txdH}~$0+ikYrm8qkaf5H^ql*bY)~GLXW~{k*uyB~Slx)Komf^X^Bhq1CJ|V;U z07NLS9ueOH>IckSQpW)f_NtuUB@Zp*&+}xzT*B!YJPXh_!Zz+_on2-uGMVj>6xa~O z@kQ0-795IlHsU|rrbE9FdeCKgzyNS>owGcqw1XE}Id$r^tJ;W5Y@Kt@ufA>?Wzyst zwaQBDCXdcxv}pgyCQ(84BZuhDyu5~@fzspM_%=0cOYITk8k_GiqTewEDxJ9guXF63 z4QyCsdE}9c-VYGxBV861kVE|e*RLDL04ys5W@4BlNfU_eYDafZXg_9NBT%gm|6Xzx z5yH?yS~}gaqbApj>HBX)wanq2_*3b&E=%Plb&y5z6&#nRcF*CGS^U@BqAJK(fwgt7 zv5{OYy+?rcL84kAzbQ+4^d6MsC**`g{l^+fF=UNW(?-_lv|Ja=aDE$!TNLtV}A!_t1U$0X6-UkQY`y~d!ZU6Pf(Y!|62xb?nYfX38`E*RJmn2(W>*r zxOS5y{ns#g^h`cZk6KSJy={YVfgE2?;ln%yr7M5lz77P*#sAI)VES#x zi7Mg3CHI)Nb)m%xS)|ObA*h$HoEdXiYpl=2INDZ#4&&wG%c$ei=Qj`LxWZFz^8YJPavA3QbZ`};z^_=)}AMSQ%HOL^gG{5rJ9I=UD4)TsEQ!H3^w zLE1F5D@^jxk8AcHGwzVO_wOB<=(6}>kc13chQ_(F+#7w~b&B7heu>XrT+SX5L8o*6 zGR=47sAA-6RK?7t&Fw4Ba0&#;BFPol7>T2UY6l`xZhp*kHp3=Sr<+bXI}g7tem<8_ ztr@bR(vdHKDPyydfQw$_i>fx_fsO8?cv&&d4OWPCS69C3VYs~77-j~f$1rTxWy#^y zQe4wWn5F*s?E|N(T?H_`3fpk)cgAI<5^qAbGOE)Mt^cyln67y6YeTxAk?19qtaVG$ z2TXX`2^9?3e`e)>CDPIV6g?Uf;&UdG};8b!4zPI zWWX_D82YP47leXjrOlGF-}yc0oZuQV(HX>icsQgj*ZpNnZj}N0AqmziJ!#J@_3VKw z?H44BM(NK}K+<-4_DMbp;u-;hO+D`b6=LFI@~9XKI^{98bw?3;1*Hf-CrpKlIva4`y zfsbrfbBmErtvYa#@Ca*KGq*s(7n=3R%4I2ZX*kQ+@SK2*N5|i2(!H4ZmnHt>{?AsQ z0KO*pLn%sG)9nmA^R$jeaCPX^?D*8|l8qY=v_#N)dTeR)Z_i#<+y2zt;hrcs}&%49PMMl~RgK z?998URpACc?1i^qsDvM-YGrI3 zN2JjcHP#~Z#pp3Y#u^&Je&IRWT?Vww5<#d0h3X6KEtB-uoP2qS0>W4$mEXomVG&fr46QWm>_7NFrFG>DDrt#UAg^idb)3MP!2#2(HJ#pzMER~()@|lc zDDq$71(^LCph=aLgHNt_-pu=VWpMRSkC!QON_ce_NPd;r!)gkr5RYGpHT>bF|A>L;^kX^Ocw~xzd~#Ikk~)B1U(( z`zY1TmbU}EhWm22tsx0g&?-_C7?h^prNjC$qcU)3A9M&cV6&Fv9`oF%lGav5zl)Pd zKV59`>7<_2QGMB%(Av~z>+C*!vKvd}41Oo}87|e$;^02lDD#G4+p5dnWbFtZcO7}X z&jFQa%>mWP?>iuxrIhk}!L?A&N*Pk*Anv{cmcs+(<)uL6g@9>02uj1m6_~oe z2Lc)~7$|_@fcME#;xBxlyZe(MX~(9ulM17Q80bf$ozb47)5DddrU!2P_!ZGTF(}R2 z^a|^FB~ODmCTS6Z-^p#HzN$%058wDfDq_$CCSRX@yv<|&)sjbp&>)vo0;>*x6_ls@ zLr1G<6lhV@W}cSLM$dwD;pgBVhW4x15FB}|X87w3^-5OD@bs1WIUKrVy)soF5IqL*_0-wB_ItwyZlG1EL zYE+XULwj9ODCHDG+df$gGa4knq7pnsAT=DtW01o|xb1g|RNPdC)}l5_K+hWUW8NSC?An@k7u&b4YJ{oC!_a1Z)_+>_=Thb%je9 zVGK5zWm4Cxpc{Tk-Z^&1irdI0rUf`d4X(e)!6j?zyW}%R@B=TEpzaitGOC&l*}%JH zq=`mm@+9oFt1N*7sJgXkNpP`O^bx}dv1)10OW~Uhg5B8e&3%?E>i)~V+Xa1cqJiyA zyrR$dD-+j11(U?d49`}xG;A;lcb#^@O`7NmC=c$Q@>I1)U^g!Y#f> zX^_5(=-&G`80vk9h$r%>%E}9cUxfpUdM)L@?VEz)I8%SEFyF)ED~%1`U;}?|mxo)N zbLn-X`Y};aPa-*K3yv4l)N0%H%7orR`ECXLAmMl#xJNV@3%I%0qPr9SAcQ5(!GI%P z(viggBYR0Ik@jha01TjM&O{APy#)P_3X6VX?nJj}*|3U7O(8ajB;mr|h~L*ilU)V> zweRzo)1L-;cL^#4i03Gq!nASj9ncSEgo@DAGT`DvKg#5V2zv|qcS%#wHw7V3h4h-I zn-1HjHORTNuu*N*tk^@Q>co!(&!o{1Kd-%Rd)-P7O9v&FgA*Tz*R~-G4a|OIlgWvkDbyO9pJX=%!{sj(d z&$?Fk54+^}WCTT!2nd$-MmNB#J4A$rWcs1(5n-DCg4PIW2@5Yf=@7;%pZ0eBA>|ql zae4p|5^{P#GsqeKOE43{b(8OKeZen>LgXQ>1i^m#mtYz!I6b93)aAK>Cz=blKotyH zm?5|-^v6I&%4=m6(rU(aO)N0PRz}*{%e7Zg!rTDrc=xm_24#<5wX6+-?YGnmYM7!> zVPoawhh5wY`DDLL1Yh6c%M@WjUyaistrtuFeqnT21X`H>wT2$f3=VoJRn>cVY!b6g z&R4z127?(ZnQvxTaL)O?$;-v!#>L@>K4RFYkxx6iMWH1$@3&fXoxQ`Tko=$=8`_@( zFNs4ZRs7*?RLc%8HP_i1aFGoJk%Qh4&=7$b&~tf>SsbIRB4qt&#r%-)ISjP{l>lDm z2eL1DXnnXBcsbBjPYoaAfbX1G243dL7rVqjrTg`hZ7(w0z~=VOmd3138li`M+Jt?9 zvlr;VpPHmmAq<-s!>p%8hN3bp8+GfpTK3rEXmmN^DuSX>x*wx_-C{JM*S?Tj0}t3T zV-DTR;EOrfr{f>?k8=@sUs#IN?-}@slA4Ur#$cFPy;%lh`o3(-VBCT!1+Q5t zraLdy67BcV?ne=_~-#c}pwdivZSlt{EEWN6Nzy;DWHA^)Bb14tk zxDAd61ec`SYu)d!=vTIbH_LofH!<3iXdz*DOeSeqd!d6n+si&`Ym$E+PIUnRWVrfF>{-bAI&1U~!z2RdN;Eo(GLN#B2fSOI zEFw;;MM#k`<~46V-*;n0Pbm!^mN)e8X6iCUQ^F=V(`(53=t zvQ&2?CY@o4L3O+)fmB4TSfZ;(^5OSau*IC@F4fDGEXR(&C+mKL?@*K4*}XR7e;slD z1ixV|C2JMr8n&wI!l!$4bqY=7b}Z0tShsb^g25pc7J0EUaj3g#9fm0V@ndS#EB5$} zs4&f=s2K$>w?gK4D`042|8v+ptp|Oy_cQJI8IGW5e}U#U3xLK>xKybM6t92v{J^Da zv|^9Q##qNjYU^%Al2yO#;Ie?QTIplJr(_>1KFl*nwY~cDdh49RXk(Xz1Sac2Aful= zDd@~^VuNG%LAGzWekWJ3;u7_cY(rRy9j9?hE7n-w0lG&PD!Jj+kC7TH?8AYnZB1kg z2YQHY8itRIN8tK#QoPqV-T8rm$`0BPnsM5hW7Wxn=;n#y@CN$`SI6E&{=RHXTl4WH zbx+z5&1uT|+m&_J&9M}_3?n_1woJd$tlbko4?NGYC!r?vTOQKf+dfmjuzsEPo^HG| z(=d+%XR4F2G{hI^xiqnFJFY@|;HIqVtc$+T$)=TBoUzZb@11dDtm4?r#sK(JMGA*K z**?4q92x9AEXrSb$Tvn90nCG@EVyXXTqe(^8Yaf=V6o#B=nZG?pC*N`P{WB4qB+7$ zlN!@4I&=1XIp<<9$7MuoppAb2JuqGr?(^p?iJ)%ggn|iMkR02&Jw^^Apq4XL90i_~ zA4qwP=jxk72(5eThaw786-Ga-O2cQC;K_|gI5$b(PnCm*)47TqDZS8XD#Sdik z1`8QjOj*nEe$mr+^6?q4U_R{pLBC?lj==ePdeLgTi#Zk7L3k#v7KQ!fKK1C{@7ZA3 zE~7<7U`MI(7!pf-XP?`bH$Lj0qyZp(TmyU>R|ykuG57PCJi6u?SYG}$$d}mhncr^I z-R5@JMWF)blNHa5GbHj;|&{UnN3t_f|YdO+BYKCexpoEb8e;s{Y#E)+A8VhI;nVn~Ga^B|yi%!(CEr zf^EPqdO&jE?(ml6Ws2jl>#0Hr5ql;Ja z?5pqrT%#Fp=i&XLz^PtPwYTX~185LSf~)|Rt%|%AUO!{@={IY35|sBOogT?1(+21# z`}Hbi>wH#PCb9U;LGTULa4=myhCz>eA#Jc}$6(fzv-?d@xj%vz*^uUrdl`adcCd*YGz%A+z&7R0p~QO z1rY?wQBKw;CQ4=o0#wkcidEA}rB$3YA5LX@$7tLg6EzoPKP=ECcT+z;dVuOpLn;q? z%=*?|p&^5%GnXZ!CcRp93Ey%%7^&Rd+Z81q@0O3S*;hMXd@bD*VDWg8GrJMfkC|RE zY>th2!6Eh%a`xr^;pFv1igbr(d|M!CG-%zdpRHL7|2K@_q=Yc#tu#h112Q-Fxy$>u zV0DG0CBU4ET0bT|j`~;3lH4+FWSEHONFK0w)6CJgcQa46hj<2Q3?3mMa?)e(CXE5t zMSluQ6Hz)exkN4k@H4dc_m|fQ>r%;^-(R~lV&5_fw5z13nL&)pzTHRl=z2_38Ck?j zk&vPBfyT(ISud58X%6n`Wqq5UN?om$0xa-5Q6XkM2cIV?Z?+Ta;o5F0>Q>2jlXb<8 zWYRYMTlAww@ei`}7gy+Qa4zuZ<5Z$vR{6;}6|er{MK z0^?zyZYN%`8EFc~#T#6pX^@|+Mxn)?5s}?1t~k&t6ogE^rEBJHVZ69?Hd1qV6Dg1b zss9QTx8QSctp#6DT`1$eLrZ3%ROliFUHb$nNNc#HdbGw+%i3G;;W!0K7^;6YxKO>g z<>ozU6tbL!!*YWWxnNn%81x)EyAw!y3VW&{oi+ScmM617ICFBx!)wW6`BL@-I6Y1K z_3PK=vFotB|FriTUrhtx{XBV5If>dQW26dSe}g@T0rV4QeIn0pl`HIy_-=|=GMoqn z7C9s3tCIhu&)0u><)@SXj;}rZHd3JLRsvfg@;yUVu3uX+vK3ab`0Nw;^e@iS4_r9K zCwRA5rqdX%0W!8LSZ4LqfqD7XO>eOBwF=ahbJw3aItha7hOJzCNsneOPt{B#u*_i7 zy+=--m0>!oAd(Dkurv3AD!CAcZqZ2eXZZMb!6#_^bBeP@wePyKETy%Q!@Z?AOqZj~ z{buKJVb+IH();hF>5B3(!)vgf{OCN0=;dTFkES*02b3|3ID z-=eYjBJ_vfa0JJsWtkaoYvabcd*50;VVaA=Ng_*v!YVo^ZGI>fYR*fJpNNeWQ)3+Y z<|;!EmX^4=@5?hTaZiJwvOwM{g7ViU9x9!rnsfXvF#1P;XWt~7NyB2%km#eEwGVYu z90$okqIri50AXwbGdsW4#C>H?Y*%zV_d6+tCPcM_@K4bp znQsNHm`jD4y8icB zzQD}u!|-wBT1j)yh$-LE(;d7cZy$%(;ziv1Dfxbe&Ue^2hGBb`IqoD!z!o_qkUjq( zm+l$1Ri7^3#!So6f&Xo`qfo(O%y0G|p8Ip=LquAhC(-i+AFpl0EG6H)di!tv#^0yB z6p?1}A3+v_D%nfAIqR0AGxq&=^ueK`W7hq|I zA}aUSO4FV@YaeNn3O(COq+vDRr;4)B19^th@a05Wp+&7+JVw}MblRL<|3v8F5JG)b zUibXqIH_G2+pmcPwg|gn+s^%aOzN*kt>OEoLw%7WYHRkK`+|rsdc}jh{3Gc0K7%<4 zEZ9)qR63XUGCy93S>{un`ia`QDQtQ}vSIjpO;^4;CWY~A~(k5Ltllhe~{hdO(c zilhJ4V9Z~=j{3;8x9@*W$>?{t?k;_RHeBWH`6^2lw|jEkwQusCtk^0$(<*?QG; z7w9+U_>(Lcu{S5vin%*HLNw_jVTJTB0hdzNZ9{x~ymxlf7hd6@%hs&RR-FU1{Gb?Z zmuZ26QcJ^5nm71($)gA#q54(}kER_j^j*TM1%cB!6rI)B^sUaFSHf&(_UJaPBQivx zb7|+CBct(xk)Tq&BISx5sXk8qe)ZH}lN~k`oT#5Se{481=&P(_n6X!!MdrBj-_8Nf zOSu#*UVSlROR(AgUSp586PGj=I+8ae<(s7KBOt^^i4L72_2&dl*>)4}kD#|y##dbp zPkLOW5z!6u^yLF}We+(~yWU+=kvU7!{D^YMyvB%@A2qM|4HYGP{*E*EyHQ%YNH&pB z86Hji)BdDf=*aoXMW?XV**{bO*Fd`Pnzqk(>>Rk%PSBNPRUy(3{Vs(n_rW!jXxnFK z8i520rZM2lXV~jyB4^c)4ar$$4YXVGm3?YPOz4Vln{S453N*^*AR_6Lwm?Epf#!BY ztM5>B{PxX1`>{>+hH-R{LsqjJu46XD1R=qm)BQ-ivK6`!4c*T-74Zr5W<4Vd_@vz^ z(YMkkg7RhYY4wovG}fAF$A)N3H7nnu4vw+J?|<~$3Ud#k0bnV23*q#cly7JyS=n#Y zc!hb)dhlRLMPDDm)33+8b9zAnJipPow%(d>mp9^Ux!SeF(?tp1VTX zHU6y-Bk9Rh8-Df6#bM?xi$j#qY$04}_ z8q=1LICv*S)&i`&>tMK0in`i7l+bFLDX-DX9rn!c9ujy#cM=MBT3|GuHlZ>v%!}HO z5xq$5Sgan;;R(zYAHKg1EzDRo(u%QI-x20HdUHj@X7PB9lV$c(1PS7{Wt7L1z0=V5 z*X0P{-g5i}}cC7}UIIpSX-(FRfimCsOoVzgjiyIzZ)vd!&AULR2< zIS8Jf9Y)#O#>gm-3YKiz56#buW`W$CMBbJ&CNmqgnSby0o(UTw{(QKb$AeOrc(iFC?BczKNL;=f!z?|1(-{*w0)G6*=IIF}wSKXd*UaAE zbulF5Mx2-I{Nn$z3>U!9$8h!iKa*4cQ0OwUE~gLe?5cG%A_(l<{Io4CRi}Mv;9cfq zY$J^IXp2Z6!ok1<;)swfrw^@9eKFP6UIaIMz zhZecHY|#h)AH~RT-2ZL#tZK-(L65UIZVdo(>M#sm5X~UK6Dn&No+M?(D^Z(y%q!dn z#e|JnwlKO%%Oc8SB03{L<%;(tB-ry0xDUY`@HOIMpux=Vgy+C8q26UbTF`wkpeU#AyfH_sWwSni$AX-u3$oMt9k*PhP z2;YZ^eS$t_Kkz|f(VX%UF0kY#h*bKU+Dau%=))`A@+*0I-l36dwCEdoUy3-*&3k!g z`nyn1kC+oWo9eLQ-Zd^lxuWpCdG z!kGLZ#i1miH^{3L#}cWAtzV)AwqWOI{7Rg3*s2RTO+0;!*nP|lwKIBdzaWH?=Z|KB z7B4BRpBzjJ2~y(ieP)w0o?ip(MsxoCJCz2a5UUxR%Nr>}(#$f66~uH_uhDm)Rkohv z_GTAiq>`!m7=!+=Cg_1SBtN%UgRBETNlly*=0pPA4<$+V*w(_C85{+c-IylY zpsi(Y)0bOe>B=tvFr%I^!1u^h+lg&F`I~qcknrLM@6OuZs4VO<`b9#A7DACnaKI-kBVQa@FNM) znG6p{xZU`-MBjfByn^arukN6;(p3{E`PV8)Z+gmi`dT>tDxpw~8BPM5vD&HHEvQv% zH<#SwAoURswrY$Ctsw@N++#_((VFQd_tnCr7G;<7#v0P4!-nCh#S;0>T!+#Y-@(ao zFewQmm}k^<`eDk(yhX&m7@XBhH2jwU-?L(~;7QP5%ZI11p{m1%S&Q#&SznM>x%4;f zSFxgy=;HYtH*a2s^!hor(c;kgWrMrpxLD7$6n&mGSQ|Z)n7om&anEq|TERBcgFMWg zheY;~>1*cU68f`%Y)AO$C<=eF%tS*z-B2mJCkUlR{9>3h=7^iqE8vV)fRFYWTt(3| zNY6)sFF{v9XSKFpYO_T9qxkjqFa$zYD8XKegO#H!L_4|^-Dy!-^ip8zgJzQ`4-2tP zTi=mix~+v@)xGyMP*^Is7F=BB?vg%(xYIJ6Ox#H~b+M`s4cim1AsQSPTdAhy^nwtD zCfI_VXJT|7%Z)TFRZiBnG?U||f}4c^b>_;`*LCL5ZuompJJ)6bIeB*>9c#MPYFwHw zOebBzohjl%t>`KK`77Z36j;$3wQX~I!X>B`%mm`2riucm`^NeSh*}GM%#zkZBw-?c zROOXIL<|!wc?}z;JJ(OPrP41i`69%{dpIc{7pf`Fp>j+huPD^C;a_Tw5?J@lq}M3I z*k+r+o!Cwu;Z%2J<~sG~R_D{tlW#VNvZ%CC@zXQ{~B!HJ1H}v z;-8IpFBbNq;@Hqh)ekXjF=4BFB8SX!!Udr>%mF4!Os-H79dAN{MaIe+5tB6C$l8pP zw^lFep^IkX=j@Z%8KdGwMfsP8>S%$)}8FbwaIsGqTPA5P4e zJ=@Jvsexn3n^dM=x+QBcNPhC{>8zdYM@LKaE3{+HUXu5+^C~b#>>LxbezF2S16fp- z9+YVH(i+%})71589=HwTB+Wvx)*ZuIGo+UIigp^S_swXWq~;6IH6z?63wgfv3KK~l z^T21HI&v+#)oQ2Zbdu?b#Bd(1#P1=M-+CJ_{#`E^t^G88z4~h<;LId@82H!=a0TPt zC%}C%pY#{WtC{Cy4S#cc?*a3gdc!8zOBK(PVGchvw=#y3=?Mv?#n9Q5O_-Rp47^~v zQ;<@2qB(cnETS4*cR5{yHX8T$3`sLNWzKhpi7rksQ7LkJ|wq=;@To0vxrT=sq5%5Redl- z(*P6(>inBbvAc}8RZ^$g@kOr$bub&Zn0@2|xz{^L>346C&RIormE$>@!j5|qd2|>-As3V47I%8W22qUixU2{V{Ovbr?jGYhkYfkDCV0sM zAvJH#H0)6l05ce^g0E9_Z>%R#dm54NruWzXUcQF;p!C1*Fh&2 zml8U1K}7@+I`X5TU^m}%j5KP~LiB~yZ!sGj$)Zpt54GdiG^;k7mw3Ny#=o5K9Vr>0 zq*EWB0U3K*kAHuxl+dA$O}ujJqo82SIa?S_;Z(oF(b%UBt0f4NCEx0c=Pk1{j4Hgdw@~^?mK)zgT#A>1pn4G&}B3E zbeN>HcDmLL8mt75ALsVdNYuKd^b$7PuWJX4%agDzg!L+rh-|?t{a8bKXy+>w}1RMB`~~qLvj~ zixxiLw!I1qRtE6aUF4JpNwn*4+vy85kL_F>TLF=U+L5dVd3Y5_8n?XC^gGz9Sze@| zoe9biw7%1G#mpD7`jNUO`JGxsPV6t>j5Bh`ePCH>v%hY6~uBmStr?kTbi0iEV z%ZOir(jS<=5u^!o(Tv)kyTbBQqDHKulg$eRMmj)J)E} zj9uh1O355fSp2>pc(m=Fo0VdFgzeu+_1cq->sB|j>kUl^hn`aiNJhWkh^e`<{u~e+ zew>_#2Z#Ki*kksxfT%0*4vMuoN8g3#NGh|PD#puT8n|?o& z5w&f{Mj5$xEuL7dVRFoCUoG=_>NzD=#qEQ3+d}z+&~-;u$3Dm2!%yob=NPWsT8|KwOERcq^yR-5I zkrCec((%P_a=-hC(@v;^M(eM+kFUn9LMk@qbC@3))-aBdbmwc_G3?0iDzxk02(LN6 z;keEv+ZWN25|0gRJ=)7&-kZ&jtOcaVEc);6iuV>$*)fFz_#$7XX7>q4qSW=2t64q| z&4&EF)ErWIvvVLGAdr61e`AO`0i%YJvr2e`mq;DxEDn?Q3a12FH;De;{Uh-v^SOi7 z*PgGBD*Ajq$3Y@)W7H+bCIVi6iOphD723Sq`l2l~CK+G_*zqyU1Ub zTth%W%p*vvthIfd8X^An_b2`Fb=LhDmHEY$QmCQW<|HqenUlP$!}QyJ#67U6-GBJ* zANY*@ijVJYBCmq;mubLt;OEx7eMae8hfS0QrYweCbr<5xQWAmxmr{837V#Z-k_M;G z?H9iVca^8ByEwM1rPa_JB8>hYgZ#lIT~e?-Jou4zxdE5qh&}Z&V1$tG=vG3)#Z(ATquKoq+BmLhU6c2G zvW-g=IR1FxMfUr7C;=K+Tu>sezl{di35avISG-C4k;TfIeA)>hL*(39ru*tU~`cCbm@7VOp$32-YeE=7+n7r`TN{Pat0 zaFoTeG)-Wh`qP?om4?P!e2;%`AKgyh+af4bHL$pT4wc7;Vxj8>= zyVmCxTm#dlJDYM=;IQjifr}vY;p1mZ=ljj znWyKIo^l|0Uvn12=4^_51=Ia~cY(V>s&C3Pq(y*C3O~Ww5Ple^xoM>SB!K|$rjL>o zK^r(~r>UoR``F{tcX=1pu_1vr2LP($4gUsK+&>nuH!Y59*G?*TJ|T}Nh`*n)4Kh}b znsa`CF0R1Gzr+pO!7AT)#A)yM&6;nU47{=Xz|;_b&zsBe(H657Uec3EzhP`k$I;#^ zt-!}cnA$Via7iW}!ixhIiW=+189HVvt$_D!=7wF^nG4lE5+ONvK)WC20Qg8ZRESiq*a9?4Xi#I}Y^Jhc$&Dlf=Uk*cUTYkUtwL4jo z0=2n;v$yMn#Z2>G4eBmR!kd%Q8ApkEWZb)r64~lMFUDSD6K{A5QuF#TgdF<^kl;0( zayGSQVO4bbk7+x>J^M;Jj@g<)?_rw{b~f=gQ{P zu|yvyY4^|ZSZ?c${5f&j^Htx}`lIZc?3y;+&@{p_ROiB(HzPY+aOa!9_{m+wh>^bkA}$FTO@FG1(ui4z0AJqJPm{{#&eF8xoZ ztQ5{UWn4qN1h*|Y58Sp|oz{K-p2BMUo`Wh*6UR&ixA?y+7*1}*+|+bKE^+2Dr(eMP zXZe@#g=A9ic+c;G+hx?-UPz~ZZ_W1g-2oGkS*X_L?Zv%}erKyGzum2|Qve)FSf0ec zl}pUm|366ij!$ZagLfW#fB6fAAW?lgqWH=pBR_Sv=M6B99b6-6n^n!kj(+;!Ck_i_ zNwVvMeg_B=sYbx8iwa5V3`{Q4HIr{tQ)1k}CVw|Rv>}D&15cTpkyrGkJ(~hlr9s9K=eCe>0R=JMjB%bgyO^!8CKEqrVKoeHAX#nln z7N}BvOuj%;ynxRtz`K9Jbfh$q-<&9CIcCmVyp*}=G=1nRzUP@4D!{R`flQ?QlXyMe zlAK__*~COzeIiD0r6dYYOurtaf!0P21>WX{wvl*AQh>!vQ)AjOzE(+Fc?aEL*S|;# zU_YB`B7+KrA<4H#T*yL}Ag(h&b$M$XF@eAz6I_m{G9(3;mqZfq`K?Dnv2l!+3c2qPTNR>}2*$2na)N}tL*A+eroJwX<}kx#uY zuncT>(zTTaf>wuWFFVNnWktM7)Ek(bD73PFtJF)WH#CCW=o5UcmDs{J z8;ax+Sh)GfWG2U$_|Sr|bc+@5BhMS8vaL92%|MB+0XtElJa_fRCxGyB8F;ip-6%b_ zSdmx%o=Q7lr|-}OdDpQ)3 z9|KnB#C1H%I$>?6sU;C78XDy%;Y=pfLgz}qmIB0%5+Z`qUzz(?T}*e-ZJtCfr3`Ty za3#G%-y!N4KUX_ZYLc)Nbm%s2TYqG4Uphiv6-`i!_T>?LY?kQT}V`w87IavD*{ zo9pk_x!!bR9d!Cd#5f7bpN|K zJ!V1d9*wV58sUyRZad`$c}`NOO3ZUQ4?~Zi3P_9TRXw3AN+zfh%>5Sh(9j~HgM&pD zrHDQV$CJt3@RF`~G3-|tn^7cBcrV{$ufUECUpp{y?Yvxs-0z1ZEgJI1HT_wYFH(Q zJMf8$7Zeo;#jqq!m=}ShR?CJ9uHp|P z;hEW@U`1x%@`TP-QZ{ZI1wiO8Rg1wJhhC|AA$$4@w@$#bZ3Fv8r(pv6!#Vfd8EDLB zWJw!2)*DWO5-%s#gAFF)9IJ%|fP1Ac+qz*{yZ~Rmw5tg!#z3!5M-&RWz?ARb29PM!T!8xXlx|d$!_DaA}opVA*0B{+nD?{^%k9VUw7qHhTB&-ZGDTqyZgH z9#Nc~XS~{t>z8aF0we}ZrU|kfw!-386~(yoHkkD)jogN;1rE%8r8VsU8Qak;f0v;S_(g(P%v$QA4J27s>YUMI|Z95B>RK_Sr(w#zwN7<5H#q zONmlJRC4BTh@#ybBD*_H|23S|qgRYKs1Kp|FwVHkNC&H4kT@v0btV6J#DhNG%fvCo zcvCJRkU}~46Qun2_x~+6Aim??6M67|Fa4QuG8B`X#>S(jrs0RR;ZP(Zjy+=_=%?j%T(k+gog^_y-Z6}J7U1D@v{aUWola>t_$!Gx zjl$&&AA`qSWu9t>l}Eh5tju~$#U)hV2J zW)3_TUQwg4oNN>zyiuDK;+9-~2}>o(j5{UGum#pU-H@8+L0n{70MBU^(drGPNYQHA zYse?;$CTS1f~kR8Uo=yE2VMAcSCOMcSDxx4#JlHc`PxXwj;AUl36Z8<0s@aKEk@&d0$3sx1B)gbs zsnt{PG(IC1WFLq*Z-wZ2T|1@3NC8cx%Iw8e*f1(N04>k9-&vZ0qO>hC`wdB(gF%ls zmbjw3|C_%Y7OSz38P*}qR0mq+e5Y~XvS`!M!N@C$fUB6lEk~pKHN=&Wc^y6fCsgoX z6xDKO3%|gXmAtZ)46m@&*C2;wjLonn5O(0B%ScVqme!OD=JhS`XaHDv9$do~tkgR; zeRrURpffeE&q^RrK4&OK)64~_^@F+srywZAizZyvR9PJBm@C|7C5Lv$PkdLa^m z4O~hS+H&1AT}Gr)a}ddsR2cC!JfDCq`{O{N>H~uC7A%@MxrZ{$aG9AUd}`aF7{e+# zh;l@rhkFi>GO_t54P3{E;+0;xaKLi9AE(lr}Z3qcA0-afmbvrlw)w<+UvxV`pzV{<+@p zM}FjFT$}<{CIQO{g^Mtv0j;jh;4Wkj3HG0pDCVN=-O>e;NFKc31oyvRdEf2hr@8X+ zg~@1a)s&VMbpreI>lT-lmJR7U)ssT!MXSI(d2(5=01zO-AhN+{lJ!UlO67=g#Y>v< zalv6hG%gMq^u1k2G%exjvd z-Au{L_4F{!YcU>v1NF+b1Ej+r$nglgnNP;X27BK0iWUzL*w>E?hFQ){t4{tMCP|B; z)nzWUO%khi3j)7$d2Qz+e`)K-F>~Lu96McUMkSxz06zCgAvQzQcXrzt))(;z5UKCc zQIDC^VaPB_sg`xzR%khBrSS2QV&%E9-MCI68h=G9MB(5qRL~$)XE*54Grxj5T%}s< zxDXx?(V2aM0R;Vjb}3CDvIrFZfrN-|n*<{<^p6AFyeE_#h8FELZDAT2Pu2WG8E=(| z2N?8yq&qlG7voMT1-j{}&B@O*$TNxx?j%IJoa4gqr#pV&_moARJ|ce8O7=I7@y>Nj zoA$hvP&{da9z$#!OiCa}pO__r-4+n@gd+hS4x%+QSbA!7%e2jBlo_9tmPd`u~AaQS@DAip@AeBJ_Kd>!SJk}nJk4n+6G2a|D4lLV({q$)eY~nvXR54 z@Zu@1$H)a#dH)4-U2WNQk!BLFynzWnL-8o{uS97!Eh0A+mUBnr#*L)5X-(veX@UwjSX(P^LtPq%cOzjSb$x3i|Cz9||uwsPearNp=A z)LC3WKcpb9K%f$dMPLhrWH3;oqo5NR(<4)=55{zjgoxVG%ug0oQaa>bbCU?C8vnN5Eh^cZO`g5XKk~3@RRvY(wkVxIN>^q=Vfg(U(jpERVs-? zVl&MM2SqBl=4X(6M&TyOGOCk_!?Gf%ICq`8++h$k9H_N!3PI+V~O26alS9BU|1Q?oJ2 za4Lr?h|SrogL%>d=vvK`KoNx|La4l3S|M&)+ua|}rgs@pWu~Ud;1(x#3aX91E_$=Q z5s|aN+rvKL7NQ0#b3scl8in7OtgpgL3BX|*HmWSDkJB^Kq=T!-8VDiCiW^aqa6*wW z1|R31kBiT-3?j(H1kM7%T;;U1t7QUL)bQZhui^Q{1qyKl^R@IOsz#$LWjF;%^}Rk$ouzB&%8J+{2m3t(PCN zk@V`SYf);{Ymv!G_Tj<790T&Al|s=_l>GDZn2xi1=FAfOI!e_j_4Q;8fKR^Ye~6@L zr5N28b2cTJ52QSS!v(@P=|`{sR|_!AT)jXc1r)UoY+`Q;9Pjqtf~Ho=4!>iMLQ$I@ z0}{79BX^^X>Sx%m=Q;Bb-JnTxK4J|i*elk|K_MfDF?ZRII{*^Fb~5N$wn@gxH}Z|u zAsPn&`ZJU;;A!PjMdob6kcId;ruxR5xGYU1v~LIU#z;_yc>@SSe{`M6(UWfBKn=aqW_IOi;Go?|{X zV=j_RCkq6Rr({uVik7FrfCmM%OCure@Z=Y=K%zUm@rv6-lrr}97Ah6x74jtplcc3@ zbJx1Bw#<2H5Bo5Sl8fiRVG1whWhUm8v0tCwzJ`{tW1j|bgkapvC(iVF+M?_C$A`^Oj=AgQb;WQ%hC>^qHA`Esc zp;J5$W|~tZ&$u-D2^N#?3QbIeeZS#;O16HRynN!S#vvmK)Va=F8T{QM{yi@9om5-B zVLn==bP27Fp7vA^V)S{Ln|0FpMmbIdE3uzS+%PJ28TM;cBG&lCNgPtSl4_x>iagPZ z#4OgxU{TGw4b&~NE$I9S%({$Y?m%5PsKg(^>XTHDkxio76028p<*;5=HeR0yltT*U zXYW$Fv8>oyKvq56Wo_aG3?*9PMFg!>)&Iov^FwOUQA^QTwv~=_fGGo*QB*f`^`bzV z@yC!;GLi?ub%p|YVedsFB+KMPN%DmJ;&K1l6x=@A8>MW)bb2>V6L#2EIreU>9r$*# z%aC2kU8~q+6!IYjUUY%~mwO=&8f<92{?A!Nh+XHeqyj@NxQ=6FZM}eH|5rh4J8Kgb zDl98dn9ZuUQ!%f|5-za5EiBQ8(pvVvL|>=MoWrTVg`0PhUeI0p7;}X|Q^`))_0(j> zNlAKkT2=g}l;nITNMZuSQ+xlS!5QchC=sgaCb`$&8}D6f4FF2^ zN(rp2y-+8wV1JjBTdlTP}PQ{w(n-obI()OilMsNX*R zz4NhJ!=if1hm)dLBFV+#G(x|Td z^79mga?GT9(gUs^KcPVvREXs#yKy}He9VPNX7Gn@0( zrid;{SK7`_j3O671$O#hHjS1iu)J2Q#R`=mUVqUsI{2)i)uiY!cr>H)HzBu_N_X-K3TKUurwxRmHj+( zKJK}53E@wV88#7JqrrDb(!0>%E@~#W)nLu#Wj2uA;^YGJsoThRILN&kpjFh6(9m_m zs(8tGEASr**}B0kriaC*sW#c$;kuB-j%cVO&{g_5SAE3$@`FPEjt9}TZSBgO!>hS-m3=J?(4q2X1Bc_;*HI(#t%qq~S_mo2Hf-#&k;zl=U_zI30{6Wv#iIqJ>)%STL zOW;-$joFPnU59LSnAVn(n{1w3HG(I!6psH^IX4TFJXt;Kw}AiQuSte^L>P%%X*f_g z=Cvz3(0SYXc1lW;X|si+J|gr$)YVv-g%05akHIY!_g)pFO~f4MFj4~*P6q|L9VF-Y zOawYioSTf*i|Op-I!fyxDlNen1BIzWA8-czSPN}3X_=dg8&L`=7Nb^^PFhAfk)SS# z`3bA68!bsX!PuLa(agl^vJ-0rEHbulzwAl6<-3k}77by>ZHG3ad5xeI3}X_?og@CMY9YN2Q{FvQUj4FsveG zr2ig7WYuGy!T37U5fS|x;_YKTfR@VLrdXrp@1?b^)lGC_ViZlwVpFdaVV+NN@^&Sp zaKH`HIHhtF5O`78`<~d4y|MD6k-e#GbUbRxy8qwAm$dH}+=8+X&V~0lP;q*#QoXZv zTr-70%1je8&gdAhoMhmPH}g8W!8DEt9bL2nFos*%k>oe3ICBY zg9Af2F9~7nG(0BqRkFs4-ub(M>29!9=YxK$hJ`8)E!8pMz-P>LwjlF!y?gNMI`{AW z{}J%~kBX-U0x@Xif0mdcwF*Od?8;DuWsb$nQ6S%sDnheAK2!vt<2^pl` z-Yn@(YLwpFIc(%A?g{@U*3NcvFLLbc-Fn)!<>1dV7z=oD$ruum#c^c|JP=qWGRX5_w6g9Qt&K1@tLJax7uVWE|2 zD$I;vu=q?+{Oy6JxppRts}g{3%Aj;7vcQ)Q$R(=C@xp1s-cIunozZ?b-XOB&2xkXm zSvwxEeTSVA=Xh8e^QKfawSBx z!wDL3<1n3qNTEL!y2u@_S0Jb}EJq|cl{9tEnINQTBSVOkr+RvP1yQ7TLUCeA2a!}` z-F=lb-6}w4`nQMhKA%M|K*giK`ec=T(d_&4%p5;)|GoT*N30YDhJ78Ijrq5gy;z&I zO&SIZLV$vU1GfDDcRjf$|Du9bom}uxzzjb!@2vVPp27V!H7U?-k;(9LZkY}^kC7Pw zgN_aI@0ziLgo7L;Ji~^C@k)LAqF+zWB$mr)+ajJYa#D6O);48+)9at8P;-l{$^8^h%*jKmY% z=T!Z63rkhsBN>W;**J?6-TOw@r`IHFp*gzh*%;L3% zqmQvijhB`-p{fdZ{8=dcioDU(pf^1c?Y@%cg4Q%UU(nOW{+$mJpAKW=ToyV_3K^2qr483PyjF+rj-jfB}gFYl^ zS+gTwNnY*gjK%efZ9}RkdA`ZloIlxXz%y3f8bl!p^e^)GAe*hhw2wKPrU7z`N!ET_ zaaZWdc?%44HvlpZAyf0!${FJ%)cU@jZlji%(MM)SmO^w07hgY9e@-sArqF)L7!?m~ z|Irb4%sjDn00z0&q=rlokiWeCUEGZO#I3|oM>3m!g9$87@MWfQ*x+CAW9x+JN9KYBEp{0 zgF6qp%Usvy^%56Vo3!h{qCwm~Se!W;Mw+|F7#1CtwuH%Sj-N*x9i7e?D!4bOiSs)t z5GrdqEmzP#w1c-Rt3WMcQjvnhq1Kt4F$3xlI?|OdVvy<0Aa+dwfh zY7W@(%|xk>%pur7KT?S6jd|@D=GFDYWcJPae3#fAGwIl**jn;Xp9&0{1`6p@WNJAX z8FlH}x1#t1xM}sp=HgPCZ{%O}QcxsPXR`5c$fxHkkSuD`?VM67u&xp&*ISS$iaU~_ zbfU)7(Q&FFgL^@)uNg&^rq70OmY46izyMYNMx_z0(ckg{DpZ<>?D*OlX)yVSmc)B3 zdV^adn!cjH&RKv5I}su7b!Ua}J|PZyopK)G3p2dye;OVCvpxSpe1GoyU-<&!`ymp_ z=}4)IAW~}MLrD3$!|xeMFY?Xi{EBV66xV6v3HYUdz$v$(&JY?n%a)&B^f`hu7aMN8 zTk^aQmzaFFT|KXTxb`H%i3~#SqIe9EQ#c(N4lOYW6ETnLq_8IIRcVgDiSCwiPK7?I zCpaOzZkq+4<1+m3$LYzaHXB(ntY|ZtRiBzDAI=eZm>@KOtLuhVF(%Tj!*8JfPM=Tk zWpo!yTE@86u2#)>??_BEw9A*!qjlmjF9T2}kOJXIOjk*kkRc)I+p_?^`IR58^wU_G zuB|U%Jp9>sBj2Jigqiy;Y!T1V+}PB($j6YuN~R3Z8l6SgoUKc^8w$Y9xNu`G<=NJa zdqQf60XA((Ky0=j(fAvNwtj`V5h-dC;9pha=g;(^8{E&CbN^GSqC^ri%uu^wtN+7S zE(;2V9@-1!w#{;aqpcFYo?K}<6v`@_CEkqgsFCn5@r74-FypnCG4g8Ne?Z{FatFG9 zkW`NEU*y4lHQ8K^7-P7zVb#DQ0=o4&<&SuX#D)`mE7!j^VuQ7sseHcrHyyYGG6hTp zX$EhxL|89Did^~agBLV|$__f0@T~~RNH68c?y`qeG7@$@%+RnaSViUI%-fpFywPkB zaA-ft5O#AL6*ZaUG!g!5ZdUC8bv9N8q&H1Eg)0X!%{3~=v#I<1O)1QN5%+V9JyFv@ zz^39U5%_+Lr2h)R4`~zi2O&Mkx{j!zJ3e+763NB+cHWxZv(au$Zua(GTN zFpk|t(diVZyX$uGfuSPhP(19vScrcE?nst7=R&9ag6@!5dheCB!%5fo3F_2E_nL?< zLH)ARhEUA)3fG2{R-DA>VYXk_#Vo2d3-gyb^8ac6at$rBq4}jQ{jJ?eCQ?k)4k(4= z;9ULJp+tpIT9!r588;v4OD_}YjBulP3pfzG86}((Sgjv};m{Ilo}eDbNt$W%eoBlh z<(8Fbv`g}PI3_`oB>zr;ts#s1m)TQX7jM+TCiU@X;a$yZ(Q6fw-k)ZX{;Nke+9F^S zmJCVFEfCl|NmNp-FXNOgp|x!q-aM$34LKtx+s7~}y&a5y{MgD9kD#bW*nsL@HqKg= z$P8pfZ(di_AfM1pN&y16^V6JMokuKIfK4Z?UQUym-X7V*&*VlsR6rSuV#A3~?4FXd zs6QX4;l*!^>G6Ay@7~8PQJgCuw>H-WAN-*(zNTq=CB&P4{kfRP;s59zle%`y9P}~g z2w1VmW3(Y^{)tmz%!zv~cg%jkW}SbqXLZp)cdaEWACe)+1Ln z4M+37IOh1R?v8=C2%!NWFy(pn>I_T-(kT*BUj2mVhuOZocWtuKZxY# zjN;{IcU$-8WFA`nqG!5dEr@!CmBdRSsKng`w5nIa_NsEJ7-#x65Oms09D`ofLFU(A zPy6k@2idnoks96@9$qQTX%vgc7rmD>q^!Js?RQ@!Qn#? z;vFtEwkJX%SsKbw)3xeLK%Z%$Mn6LKU$`~=u04C)H_Xw-l}Te2o;^mnk_WYka6(;l zLc9^zoOxRZ$ScUqlFT>Wriyddb(woC@Ba>RNf0#OcgGrT>{nH{tT4JeNGeF2Y(;H( z4NypFQDRGJD=oSga&mUMZk%W%U%su1e~J?Z;q$s(tk#)~#QA4mfYp4j-cJEAM&PBn z@%!`+5GqjEelh`8iF+!`!aT}u{ z(x1Giv3lb4xGVoCeL~TGCU0TDp8r%kT2yPBs!wCj2;#8ZWSWMGIbwls&awU4N>&_Q6 zlnS(<4$@pe@X5Z=dEY=C8r^6HLwg2KueX&<1i=KtzzY>(^ISV?EGw%0or3kdr8^|V za6s4AxNXaY23YW*8~qv zLvV*caCdiicemh9qkn&U?|1*>+@0$)PLH~&QLAg!s+zNA)vTwUiO*B2bGiJNcrZ|N z%j!qGZZ*@mO@wp0(~X<7o`zZhP4Xgckq2&)24+a0I%2_+CTv2Dv6?s}T2)>{ow_y( zlDAbq)rNu01eXshZZ?Y~jCODLc1G3T@os+zxxXo?F;1}4FJr7h^p(-51hlTk? z$Og&Zgy;|POr6=6aj$2K@#IF_0Kwr$#vjQ(;GiFU6WumADl#`$V%JygMomC`=0iBc zHYLdXNk(mJnsO!SjF^iN6BDTQ()wo06lKW@I@s9u2ek>5#MH7ej#)5byNvG zg9`Tpnk?0u+6Y>)_1`p3HU^X*h+&>@6qr29UzIW|hWNM&lMgdQU%`=gsE3ubX3c~2 zQ;Ad;d?W;$layQqT73x%15oC6(;G8b8Rm3=4QV>7wbjs&YU#! zL!xxJziA~}13D%cYOha7Xs^yaA`?r7hHb>>2}rCFd8}B5=lpc*6qR0WSra`rnga6} zmvK<7X)ty2>w`IqD&T(2_n$v_vWKF(N&a$>UF15zcSy;|zIw8sIAk+$vWJzCp-}G} zzdPCpv8GrG&*)<|M{{+Zed`5Tg&Ht1^a#8xfILGF1B95PV_LJW-s0 zv~1JeKX+)aKf8m|*tT^mv^y&=d0~}}9`F`!JV@0*oy8hwGQ2mg+mSNKZ-|K1=dn!i z{%MIdJPz5MA<7}Y{U3YVzTrVCk*A#As(quIaf{TJq2k#@7{%kdDiTFD?F8k9lb1W9 z01yOIcUw(Iwz$d+u##9{4n?#v1hD9q=CY!cP!^Th$T2fgqY}~JHY_ZugzpB(+PV$} zIPJJBain;#Rz61yd8MiS`W#4rE!CGb@It_+IBS;)k&@QO#qmAaZ-b6_Mjj&&AGJ6R zQQo*2`Img~S_1~2Vmq#Qnn%p_8JB^6VWLL$`r{S!$n^C*+yMybU@4X@P$yui4nk|V zNqM^r4!9dSyqf9|6>dCq)t|?)7SF;NstSpqJQXC4C$e9p$r~xYiOQ?!XeQ%cwKY-0 z#nF&bUD$irgrvvwtY%xJ2QAKRX2qrz7XK*$Q?fDiw{Owl%A(MqHdHqXe9wNd{PZC0 zJEIb{=73^AqXCSk`KF1*hlXep@(aX=ZzPe?PPQxTDfW{dJHoYl=LsBns4URgvHiDZ zRDY%L0_iFLNl@pX0G}8+#%&{S^rKrGQ%3bnl|mFSB=}^JO(4LfAF>|YXpT=s663?` zl;dHt81vuL@a{YQ|4hQe7_w8WGxoc16hTR}qg;iJ*!FhGwt}=^vY6jhbzdW%lV@vk z#-Q$Ex-}?dOdqFABYaoJRU*9sMf0^of3Wi~B}9yf{Hy_yBytHy9TIqaMmk(Z!SJ!= z3$-gqr=sU6UTgk&yfVBnmth1H-&9eYcJS8!-bhN1Sd2L!Nr*2)qwOZqu*{wcLWBvr zkTp}A#z7jk8UO1(?yRr!7sFg|sYLu2Qu=a+v)X5Gs*KOH?zq*g#ad}Xf3%?-Vn7FN zD3eefr4vuj9eYLBOz6N;lW}^^Y>_F01ZoZIpX;b`--#Ah*(lh>oN|yz|1XdpQ8RA^RtmXey6^7T zvPD#)6hPnFX8QAc_&nurnx5GvcPZH)!*UgeJn|F_lHvq%J+ixyG_eC%DRI?>jM?u@ zmmdLi>P`2TG&Z>0-gcu&p;VAnWy4*mx_ud0vcnT6av0%GO4dS`WLG0PAskllrHJE@yxA&GF157ZP{ZEPpBJYc<4$ z0a5#ZzM3J5*Pt$q0IG5^HI%mt5nU^G+v!}otKOV86wbqt0-`YM?iw7u8Y`N0G)}_; z^#cFc&J^q&upaI-Z2hM3JhMgnMSV}>&r%gl?T8Z;(;t+yVqvk~*sN2|G{K~ehboOi zGM^I>A;i)oPtC>`(H@hBZ>ysj#DFW`N}&fxDn-7I0BY1I*e;m&d#m*1Nk76D%qTST4Spp{c(tR>iXADn2G7jS*z)I13W!vCzQA_=k# zV%FL#4fNWnqBUAV4;LhB+2|TD*PNU#l|Mfqgg8spC=5kJfG=W+vADnS%=I@I>G2Rn zi9eGiWg3<6xV0En%cWH4*Og9}gmUJ}CNXO<)M`kr&@*p#JG0c;_`had>&J*uh-o99 zVLjoU$vVXslohG*j^a8qOy`E+f9b2ftLNHzEiK2K>EVs&3KbA2-A}tS=^gMGwrfaN zyha_i>p4A7D)ov5{n;A7NzyTVhjP;sb-Gt%joYp>&$F^L=7*5#|n`QMP-oshRn z*?i+2&<WHIwZAdDz~we2T$>u`mL?`1y+>LR z_Lhw80u!jH;3Ij}Sh%B!Q9qe&O%`L4CkpRKwO%#s?Gfx5{usT1K4l^VKo3)tL$_-!ia=jmgjW(Y7UEm!W z_^Jb{AhH=nRQ z-p_xr0LIgv%pmkZ^rLzf-&Cp@0hd%2eYpcgxl{}0Btn3uJzrv5`a($|D`QDs0lj<# zGfk=@@HY?FtW48C#&*;0w(4YY5Ey|@GZzELY9(N$^8!Fz1(a=j(z4WG*p^IGpR{^& ztUKBSU7`P%jkr3TP`X@!F4(itD8!o)OBL77uOE#g|5EPX?=UPz5@sLD*e!fS^Crve zGAbp~FgGg|T$r_;*l63{zaduQqcE>io+Iaqv4d9%O?ay|n1Q9ox2D|bGRBao(Q$s& ztQs>xp8tJze+BX(wt6>3a56-<8i{aD06TEjjM1nBwg@F@8p>c8N5c1gDQV5d7mL4G!}oFQTfB z9&rDZ1YIov38jsn$zm3ZM(Bd=xEfe1TEe>yohG}{V87_bjnnD1ec7KcWlHK6fpiS2 zDDR}5O?u>uEya>k;+r~GLtX4d)c*s$LLqWCgutC^mrm_r}k7m4#)LETiw;Gkwt z@@S%*k1KQwC$3SGW!~02_9&XIrI!TW_&NBI211?adgsy9c0NwyhAPF<5QD+cux$EW zgD>uvU6Md$1ftALjl8#179bg^Xy{T$6=?kBff~s zhQf@(HSIvQ2V5N_C75p-!`@j}2b71E2AUa#fg7g$B-Ug}0zpze$$Xcc`qm=eTX8ns zz1lO$Fd*2D_Sfn(@vv=jCSfuJ~d6sDsYXWBIlE<8SLgHCHxCaFQROYQs`1`lYyJ=Mjj zh@qWK+(&F{$@4+L<3eoVugAvJOMYb{I9I2tIqP!g81=eJu@2uT zgGjAHTX~|{`zoWhd8=Hg=lB+aA4_@9^2ge0B+Am+r)bUQ%${O})ysIv8HfSP;7GRp9$Uc8dZrU9GcW12zGvP}Z5 zTS}`6FBG|;Sxv(6r996_I1J^6KdW;*#%|Lwbx;DXJ+*aw`ZmyWt{rX>t#F=_rGNQSs%DQI zX7X;@BOa#wh3DSK;b7;jxqHw<|9&;XH`CGEt+yVj8I^~0u)922eUw29U(KM?fW6t} z!iBccUYPC{qxVDSSiDEFUGRakS-5XB;-_3Wh6Bs7$_dEze#8yKQW*NNaLtkyk@7IP z@3j42aA6abVY1#PoUc>$WUscd7pjfl=*tdI!Z4|2s^T$1ly8`d{1zKaNwh?g%Sg0w z^IykS8`DS_-7?@hlwH)R|7<=~(W2_Hh+pY^-Iu^_k%mm>u^(Qz*$fImBeGqo9;g2X z%b5?<=j`m*>Q9F=Y}eZs@l|-BO0K>>9`I`u2)<*94B2{Vi6m{As^-v?cJk;x-J#K{ zWxxlDYl@>ZC@EPR*Y)t>dU%ca))*a(14Af?OC_N03?-%Jrpzn5Y4-Uk?uWy=h2V}! z16@^CZ2QTHCUKYDb11A+qO}KyDd}*ssUzj`R?aySA&NZuv1xZofpyrxzh4T^(FQB< z4RO>}CBBar-rTN^m!N!#zTh{e9K~12osWcmlKM%o5CFi{{Ux)S-)~?ZG=k!!fE`P( z+>(+jnbVjWphH}zx*8=TPf;g4JwWQ8l?;h?ObU0EO4gS|XMEXv$gk%8C zJ4X6DuzsIgcNlXDrV%uo>##Kt^!^ zbG>9|vxHFup}fL$lJC>rO-ONQGZ9uHtk%>`K-w}ku7jR`8YQt^FG<6OE_Zc8e}ayh z7hskX=T3N*P5Pbpt6b$>Uj?6B3PN04mfj+uvD1%(BO!^$qw?@dlB390bAdMgy?1ws z_zrw)7*tRCf$r&D+$p}pf1go$61<*F^c0p{8(0ywO11GxeH_y6*ZmyrM`e|H@IJLOCrSWX{kyqLtt};OHws{lJf74rW z7+y-e23T#t41^2(I>#;ALvtgT4@(ej$q?E?Tfx*J2Ukq>D9>^f60w9&U~XkgpyH&` z1x1(`YtR!T8VT_G8FR;;QHPCN&?xh%K-**>AYhTPFoeVI65 z3ky_XZ&mVoj}j7=`SVY$Ye-hM;t4E>#y=GOeT|KXY1oF?o5L8Z%uI ziyPoFpC4|E3YB|i*&pU2)0Edx>se$N@Vwk{4a=bfE;LNBoWhp|N_?M!F_qV&aJta_ zGU4E2kX6`*N;IMqkHqhoJI&)>4>Dx@)mH=ig9a(O%eScZJpB$&mGYOw&rc;ZQVmAU z2DM|7iMYA1tSp9@C&8o3iFe1S@wV0Y$jBB)dl984f*Vgot8_>ANO}lbNVh(v zR=V?&fT@x^2CnOQ@SXGc%7biwgZUzPOlaZQ@F8E>#HM3|&XpG{yJqhn2Z})WqCX6b zfDmb$5D>H7pwY%VjX%kVLN>Tk%zH%lsp`8Osj3D?L5hmMz(7Xolhjx*;>NyY7}h)LNsfA|kCQYvZ9SdhPImZqd(G#?BYZkd>CG8vyRfH( zr4Qm3=^$r6Us=P2tGuvJ-R2L6C8QVN?t^o6eAIsdj$O zI72CRqf8Se7d%BUNkdYPWC?fPpSTQ5%g3UmE23F0?(oN+EhVFKYrmGE^Tk9_OU5h= zJ$!{M=ua%UdEG4>K*zo1hGV4HSoU@)`RRuYP)mgQO&nf0IBKMMan0is4h4#jutdHXt!?lyy^~!on0u*1c?uP2fvapaSz*)crDj-rf?4F)KGLmIUns08 zp!(r5kL;Na-MTI)ODkitWA9YUA*F*`d+%ET^-09<2aHN#i^rV7prCMmJI7k^ZoHiX;Gdvfv>Eib`k}t>0PoX;=(9p&k5$aS+9qH2lo@xkL zb-(&TW&=-@-Ehbm&g?6*9+uY`d$uMJqs-@?vZ3VVb(aB5+AfF%y6K~pz}&=2;=Zwm zmmgs)fnT*9=dfOEN*L<%OaTKGpBr#nijSMXyRw&>vJ)Y+jyR?=Q;l6-_?cI&=&-?k z^*9LCd(Jw}Zt2%$c2#PzbybhWAcA`#kzr>|w+Sx;M~H9tyA$oBpnT3>D1jQ#A~5gp z@-zF3o!KLhoU8>`KDfspevUd>kbPp+oz4$!*WkQKqbA5?tKq8?TJk_;>kRb8siE?V`4Y>rsIR{pwZq3 z!o*-4tJOg?K~DpzNI~9&2sVFUyu-bxBhDf`0lNqk?#y#mfKUxg?%?o*IZ<41q&TScHQE0uT2jOCJc1me*_<# zy}6lt1@C?oVzek?n2l_tcA$OfEInM(462YHQS7nUR%s?hyZ5<$FnOwu-hODBM1-5^ zc3%AyOFpHJI}YkSJ|Mg~bb$2Nj~}*3FA^>@npejZ@3NxD4n*Crelj7BgoP;O)Zyk; z0M0j@Up-KgO8U5IAhgD{Nc6h;WVa85XLm<3yjd(NnN&S;E;Stmnl3dETh_O?(A09s z%-kHXM)+JI`#uP5z5QcM%Rl*51IaSqY+~@S(?$u0R}HU@Pf;g7E!C_YWdBA*0u3m~ z@I!B?p6EU@4ZTmB+3_hhIms^FIaMAEolFX-*}gS=F>^C=nK{EoBNBO{r${T*t2C_j z^zPrxCeG49Rjer-<4jUSc>Y}I;X5=vWlW;E)EP~zEStF4(JQ@c_rLacj z%uG4{&;q3}LZm=d^7yJ}7Ihaq=)gO+Sjqo>8@gIVq?Hs-X<2(o--F%qY`SBdZQ59< z#(1!wu^UNvcc%9C_TI7EobRL&#L6{NXHkzdMVSkN_0m*jRxNQK_Pb~hWebLDi8j|D zA+t=R(ZiM3OwPWtcnUmDCR*PM_wD!A=W@A6(xs9P0}W86S7cMoT<9h~Tb4ITJ`c;B zE;ll>I@6{nw?V>)?G2W+6T^KadF5s8A`9zso(72jw4rIfWA={i?!JVME=QccaCNl! zbf=bAnKy&Xlhw!8DXsuvy+Iu4rs+Rk{UqKS55<2ti!*AktG2(zwh7_uorKaFCeZN| zpH2sGn|W4E`i~Z!d?zLsV-+CEQpI74II(P#t7xvF{!xKBA38%EAI4z6ur<87TJY3L z1v~Py$|v@SoZmM^`O+Tix|GdR#Ap0hewDe@FBaK^#E`!;Pg@iSgJDAUt)D)kG!piF z5P1Vgc-9(&p_;qTlid+7#Kl9Iolwplscsx+#Skaa^P1314&_;mTGlH|8K2P3TE8;i zaXxq8a!4sl=6R3meoav1o9zLFa4~xl*v7lmBE29m>=e>VTf526&FkYZ=p9&m@lHIJe6PkyLdq8dsj| zt}-ZYVLwp4Q1&WhIlVG&#Sa-VsgnE z`-wFfAK&+FCnNc=F@se;$r>>KK{Yo65!>}BD(qC)zb`+CO!=P5yCXUuw&l?j-IKV= zzES9fyQe)hFAa4744eNU0As6#=tRaZNdhtG%m+5}!}|eVyJ!FYc*N>&lLo6<2w*_$ z9Zf(%BFNQIaw$N4`zh@yGI>nYXJVC)#=}}_nLhFAa@FT+5Q5Os=3xSzFq*eW^*^Ak z%W07YefH;l+w+aJYf1(n&NowxnXL!qqjS9r|+7lBiQ)cGa*n_;dXGjt5X({n>(o!%Wakes&#RTJK!Y>n#wwbAJlDJ=>cDRE&o*8| znD9w569s1oqP|ah_0*0A0vVclh%Tlamwx#&!y4;fEmk_et>2(l^0AjvNaZjCia5in#(V6YN> z1txIE5k_0LMiuOI0_588;R5a$cQ0sp8hOfoe@0}M7EbRJs)9hM!m{spe6qUgiDF*h zj|=qtTDBb~k>6fhwp+f-0Ro2{>kh9xPcO|T4+4%*eC^64ZYZ!?%OL4m4DaVsJtuCx zb%E;0iSc!-ys=RI^WD0ft=>Ka0pxYU#V)OCw>#TM9v>_P-mj#_HPDJb)O9(dK1=o* z3ROdr(e=qZXiSR=>m8=SG{dure3MumDSR&%0%oX)G}eQ7`9DUL5zqoSW?sydr=B`Y zioVg)b;WqYZh-NMm}%8Ipm82q#o$GZ1H|5^Kjw+41wbNR<{2tC?H*ko%6 z5{m9^cXN;jOqo`m%yO=}hi)=?GGtHP5O{!CHt367q zBGZo_cI7mW)ei6{`TY{EPHVws{(R&d?roFZS1H+KcLE!+$IpKse62J;lSD0AIQo9Y zumKzT4nG|T;Nuf2FVX>;v5RzXf1aXiH*>SPsEex16`%Y9p zBrPVOi1=x!@WTB5ltjE!2M;?#r_Qy4W#I+j82uAA<=MF&|CkLs1H;tCkA^VFP$7q@ z46XW}rV_faXv?+3F}hqa)$vj07Z5raHV(_r4T&}3@Y@XQ97U?F%4c23kCoj=nb~95 zA4SRPP)scdl!j=>UEy2{y~YUu3zb;2k4D<=b_yW7ulSGw4yqoq(ADgmc0p1rLS8x~7`bD1;f zKbpHchv@gAzW2&eT%dLPct2Rn5ZBH|5!76V4u2aflh}=A$uX!VDqZ(VMFBg5R9%Ms z^jliZM8m`P>2Jm&pDh?ULR!TZ0o(nUjD-DhiwZ3k3W8a#Bl%fD8a3tnl7RExZyRCQ zz7K(90CcKtaO_K?&sP2@m>~U(*X_3PInZfRNuS;oUT8bmxz9M>2eTPSlnANwQ(uyx z>GfJ|eWRD6d6d44cwUMrbW&nSnDo5*8(5TG*`%|N1H))NWglDBIvBJ|rIaPKC%&0) z`^knq&lT>cZ~xgI1KZ@q+5$ae}#pE|A8Pt50=pXcjNUTQNqZyEiQ+vVATrE7J=0Ds6u`L zSkK~&9O4QSMU4Fbe>(^o0L3nGEJ@qDd?|n{4B8Auh^6X@?iXjH1RHg_v>`^%u!=kM zJ@zUi=5w56O-?ml+L0695r_(P%fXru%wAK+8?P?x=^xeeTCH|3i6o66l4kp#P9bsM z$D-1AdN2X_dX7nFlvzY<4qUI}IFDve;Ob1!H$6!*`)X=BR&B=p>KLq?N?W?ruv9_e`W*{gD!_LW+`*|28s! z=Kb|GA}EY#+dcI+O(~2Gw`|bjMiCD|)h4=iE4-G2zWb7hy{)XE1HT+|nbok6gErJ% zFyrZ&Nm2gxJ;-0oj){cDzYEtxv{WL0jn&{i_%%#ewKLcHg3E6CHuHHW6xXCa=7>bV zSG^|(S%9pbpb6O!B{*&h>LB?U=0oD2bL1)fbus@!&W?uNAkxuw?9Vq}oWnXf{EHmL zBK)a(gs-?HvdP)a95TBYn}445t5@3))Wh-7`MqbRDoFE;6(;eTM5+I-2Ph75xek!U z1|JxDaH9-fvbCh;Vm_Lzmav{LlL(=Yh7=>fZ4}gujr*C=PiRlS=JzFf?hO^GPOe2Bf|r~bCy}*f*s8W8fi?GcUE9< zfl<;t6yRvMvFxr*7+5@o(6;?{tuOJrTNwN zkRz77cSDydj46ZFr&ojC!9mm>zE8ZUjr|RZ=_j`L@Vp4Zy+PVo);}zKUaus3t&T2@ zsemcqUd7(FKn3JP&9CtXo|6aNVlG?qFf8m6fgr0!kW!3IlKs#1(&q6-QU+p{YLfmO z)0=6lR(}?Glf(~`Q|J6bE6L0ppOR3My=vX2WdS6F-k@|Ds@@-+q$dycI=B*n_bUF` zp4v{=xdYjJ%xU5G;<5gFyEZ6`w#Ms*YK;54uGlToX(6aynnZDgtr zdxEDdH!VbS(?;()=+&Vm{cospyc;P-I2;kExgxL*y+5OlfAZ!xM7v}U(glAw&O1db z7w^xH)hnrz5v{WlM0H=!J)a{Hwa28zdk#uT0y!#t|LN^I$bioIqjl)#A{+#YW;(+V zBw;5&C-4wjsd##`h3c@7W2mb%nzD!-M>$#063ye6x04P~$($@*-_Pe;bgY>XN^Nsj za73NWIb0*2CbwDBre)>+j5AJ5Nhjp}_ePA$gEbI$&^=a>0bm1XA4U8?c<#1rD^Rz7 z_6k4}TP9$lK8+zTqaj`%u%ThTiTLz`U1S8rAw4#C`pDdSk9$G9Nw1IvZ9 z==Lz1#-)=M&xUA@e_FT-b!Exa|1q!}XD$aYBF#woV3tp(yfuxpyI)54$wrs?<(-EI z4f5pP=AO(t!)|!g-QysB!|UNEFx}M-?P|y+$7~QLIeFHL=yYeN!ay|7&BUnWF>|}#x1WDBLu5u0@XdHqtw#f}{5_7y^m)nnhVf45 z!v~xlz*li~d}SV3ui(T84Sq3K{grbvltVvY zRYZ#qtzj`Yz)`^%M2{dD%+^RLc*(k6CjD&zZ^jxk8f{tapIg!Vik$J0%>G)L91CJ684Hu#i`h=s$eW+94myg`r|Y7<5-iY+-S=Nw0!bA1$t@; z!l<)pn??Z=zPi~64fy(%h?e#3@K^D=S3CLKf)8hT>nRIIaj&YXjl8gtfpwbdE3`ze zR%dS3J;5dQJr4DR(yZ0LtE7^vW-qr?(&_7$PWppYchXY6teCnI#HYeLEER#cD;_(} z;gHRJnu0#Ir0y(j^x8LnlDw;*RYknI3uRt)z`naDFt1v0X|tEm5ucVNIx~8~yE2MB zHyDFTMcr5V`d<~yz%x1qGE(aj3*Rsi+jm-RTN5DkT{}ZkqdA!%aARAZk;MqJ&*c<_i_0u>Ec$HD_AVH zjC@^$B-68~bjNX=$j6U(Y2+;fXa9xb{ttlLiy}>Q@!&%6ZP{6W?Ml?}jqPRpn*1@l znz}}@!4b0jPI+ZK90JST)br&F9RSgAsN+teeIp;A33UDA=Mzg$=-U@ zigNb9B4*7KxocGb=fazeGWEwZ3^wE{IIHQ;P>+3}S-pv{?&W*EhovA`55Xk#9oy$C zYyF+-`nY`rRUog_GCTKW)b@{0=%1cPl(%GVsU8PHKYb)amxkx2G>`7j9JjA&)Zyf! zA`G{txx!?jsCS$Tt!G>8@9mJ zz~Yd1H(V%k8m)t*%+D%A`q{{DYTuodd>;^AE{g(s(mj+tOCy@%18NB`i=50(c|VPZ zy6L-g{IwcLK*d%e`0?Ywr2pU)%sX;43I_l> z$H7Ig_bg3&u%zFG&f7@NDr-enBZQlP>pl-Njxd_4n44*BhZbA@C2narxUR9#cUVT7 zJ+GK9wZ`5pqh?|(VYD>(y=UFlS~cZB*hsumc;LJ~#Vx#g_Zqgib9{sg{_4?|;Y%RYf9VX8jvqbSLXlPb%#`3s(r} zJKc95`WVTL5r#DIvye=?5+REZ8-I2Eg80WVqkRX-i7{@qER0m0?xI~QslaMf+tQ~p zL8}V?7!28&aLUM(WK2HGR%_1p32Vb5%+RE2hh;mPiAg?gQrCh#jea`;Q;t#Dfh4C@ zRR(%;6aJ7t7${x9&chD7Uz1+mTkB(1x)}I1LTMlc833v3z z=xf8xwY#TWHVRyEn3Dt|y*kosY>1F{lX4w^1nJ=DAzSc9F_DDiQK0P5qi!NK%!Gnxi{I@;Ql!in8yrc9$AO5M%_-5 zU(E-+Px@X@0Uu87VR2Wg4ChW=5knhLJAJEwWvx zy9>RukZ&hF3jO)d=N6q=8%-5;8uKib-_xv~?HLB1wS~)bWocP0#1(}b>(E%&0BOx6 z^&3KstZ#iks@AL8`SYz2G>5?)rTp}!7RyaOouGvb2OZW0VLx@OYk^VV#I_JkB>a-S zRa)98G#;@7fP30o5Z|8d*Gm!R&Q0UQv$B^~+ z?Wm_6hJc$J^v9FVC)CN3R=a=~wYQ^a;+LD*C_cPlajJIs4 zdbBQjz(^#5&2X)=xU%up%_Z=>d#ct!i;+BTeBV5F?OU-Bog@-1sQ;O9l1H4PaT&5C z(O{QH_+OYX(Cdw3tHTqk(mQxF7W6fJj0Ojha8I5zH8cS-BwX?8@c2k7~f4 z>u@O|jz~JNl^MyG2Djb=JE`@%IWfMNDMQ5sB9o)V;2T4i`&&%Wcf0O)Z~t`i*91Pu zO~5Oj2bxQX^=DTWzuMdiV%C!w_f{XUg-6r5L3&%+&?p!fe;(LBe>f{zH+fq+SeO0E zN9KPOm+jZIl{{&GeJ=WC+wkoQwTB&38gYA{LqkOaS0hWc;K%J>ZL;SW_xk6yn&tbs z^*;8qjo1WAx>DyQG=Tw1%{5{}jV%w*}{g;{|QWFcP=$iw+*yn(n^xVp9Wk&)b zSO0inB?M~;Q#23{R;|~xx8(pGtac(4-MfratC*=~_;>bwgcWXALvxFl=ZH1kWL$8q z-482M9Pb`1_M^PWxY#Q0C#_A0z7*yzlhT!@wVZimD--_Ak2yPtn)H7Yi6s{~+}ZHy z6Ma(KXbMsExN&FO-8;@kGMZLX%E@(M4gCSD|k8sdM_88jwmfy!R4ZWimikK?k1T(l= z$a$@|liP03QrR)<64RW|-MFDC%>F_kKT!V0LwGz3kw=+W)Y8Zac}{ zF4XxDb;iYuuMFAje2jYtfY6Lz5hn&*Tef*UhkM{-Hlj5C^-uk-uU;`Z;dRxIy+Bc< zpB2Y~modSWL>O$HV<>8Rw`_RTPnUg(Z%A_0y)V4sb0HG&X6fMg{JCgrH5F~YvHN73 z>g_~E#@F!;UKFy6@X{J^gGcji2ghY|X{p<@`!B>)XdfK65=lIMc)&I8WWw&(8 zEhA(-!2(PT*c6Aa>>MAh1>eePdGyZCf^^fyX-)=}J>QOOvodA4Yt>?s=|~Qc5ID>X z&=aVw(WBU-qx5AK?W`uro!(A4lqVL1NbwSn|M9ROY>Wu+`CAocuu(Nv-R?QDKiIi5 z3YyT08WZGcXAvv&Z?0$sFc$_md4|pwaKrx zK(=$4NF7Xl+;X;1Nc0psu7&RPbUf&Lbt_<~Z!eIpl(O00KYWZiDnx;<#MGD#gMpnF z&)4MUI~03P?(|-lH->5eralW>ckP#K z+`gVJ5_;;Ngvb~8J!jDUU4y{1opQv`8K^GHLeNS{BOeV z^0`DFJqyS_2dD6t(?WN#dvwR(7-lz+5mbBW-7T#<*i#b;!T22hCwqrjk|SIyrs zew_aCnB7vB)MYn%bxo8(paN?K@d74^+HVv_MN@U7V-oe?<+H%P7001#Di++N+M*5n-2mttPqj_{)5Fvx?00;2dDd@boG@qic50dfnn3w=h_K5-D*5y23 zd;TL1!xDFXn}M!T$;;8Z>zLCtesJ^2HI!oFC%#9@I|dK^mDNhgmRtpMaD`4gU~> zTH9w@z*Ewh61NkF<<&x~nTJk{L_cpPN2Th_R)XxN-UoO5LG(@Eu$RlkU5dg?tQ{Na z=07R>9M*f#)6;X_Zz9+L%2U3wE)05(hR=#glwz>1v5B`e^h%Tt+ws_N){^4D_$Q{ zsi8XXU+F%r`#*#J-^phjI`sdOD`>a4{NI8Z11`^h3#hFtp&I;eu5J<9*Z<~LZ2wES z{=4jq2l_GpKKB2==Kp1<)$g?+d~TP+!kVEshG}uiAxFr=zzlI*mrVGdzC0#h<5rA{ z`ZPPlAJa!cNr|!R=EClI%!7kTpBDvG=j5m%v`s56_ymVx_;C4e|34w{aZE=Gr&n7D zy6sU=p@;{R5|uY}h0i|i(^bMNM!;{(D~2Ia_*iEaJA z-~DWCW+Lcb7KUo*F$t-HWSlK=D9hPf(&50fb#47#{6=JA|9h}bO=eDHr>t|!I$!|d zF3fkuAAn~bjG-5*{69i^o5`ny{mMNRqib+`Xjc^D#kp(Xo$$O2I&kt@m-J;JXuDkj z$TnqW#>o&Wuq%92e&!MZkyjHEuQ-hkd9L>+{@WZ~e8d8Z-{G7|S5f(9!|kWE0qSdq z_U;KITX4`ih$8Y+%(>|zRe4P%xp;18yY{O#gc}6x326UUDUMs&D8GG$vzDdL&zQXp zZe3AzdH6GuuDkRixP@4>IE&sO>t!y6v z;lmOGLW}~k2PD)kNmqf4?ynWe{|}v1> z=iBE!_q*rp=l*}^dDg66y{f8L{kpn(bycq)DoWB=uZUkEARu7L%1EjqAfUD(ARsrs zM18JliROKSfPhJ8BO#$8D|L)Xr8Lo4l?zHd0Ylo#_q4N_31vXgSgz z^@OHg`!&06kNVSc9uFf>Iu7Bzc0b_`d-mWev zrhXW@x-U_XL_dV{eTER5D#g_nexUvMo1_*8!Te)5jRK;mQbu-aztUIW#)}x{TT*h0 zGo=BC0}XsM7n}{*)KuM=&61eN`1Yarv!)E$bw1tl1m~ov&!*-ShEbG3+{b@1qW;*Y z66MthyP4z`y+VH9V-|9*m6-MJgXK>$n=QKF5HYPlpV83xP{W7s9J7$dK--OaTP-c<0*PN^t$&_=lQ^gh_RJ%K}NOauV!tT z>FXqX`0>N%7~e3piM_c*xIb!@z7b094@VDXLTjd`6XLM0KMZ=%I3BAZ%%vAE*QGdH zFwZsm#)YarGE785VA*9Ud#4tB5Svc?j?zB7*V%O~u%FB6mvFlp7Gda6H2HK$*YxH) z_W4GcnAgI@(e~SxH_|fOL;?x%1bj1%j07xfyMo3zlRE0%g?BZcfr}^fB|M~PU7aL1 z2(-9eaW85`iB}qMxG+17CWu8kk;G$=es%gxan8KgQtJ+d%EaJ2ed|HtTR&|3(C{S& z?a`yVaLdAXjPP@)O ztKyEzB;I+?+fI42^?UqB!%2VN2{XkidT#tlj8o8<#4jwTNUJ!IRLU0@ zopyft`SO8$_x+|$5sy0QN#!TV7WBAy7rR%7LaFC9*~zEvGD%Q z%8^VoV5P5Ms;95Vhd}w%E-ymiNEmm_^ZVDsNU|?6Lb2(N!tIvXE~3)|I>LqLvX6Jw z>XjZw=MU^gnH0Lq)ipFkl>os-B3n_69YnKFv;bqAF9B#LDC%N_-;qv@@iQ(pV$e0A zh!~w}DlgmqVO{DMffg8B;sCPm?eh}tE<7D4$EJ(tQ& z5^G!v6PL4x*jaKB*XRS`57D()$qy*2)bGs^YcUzbCf{m|aR;j&&G##| z2||1?VG6co6rU+vhcGFC#!TGqwN^x;Numz(%!@z!0395j0NG+*%T8R{URCu&S79fHKDdA6a$a-2w0vPDPDvFNBc38b|32aM6<*Zq(Qfdhw3qY; zJR(WzJmkgn(;luhpS6!`Jf>u)Ts84G-#SL< z0gkzDEzexqfwU6k`5?*$$C=-mtP8;{15igwtWUI0msTN!np4>_a+o}oHnohym!tdb z=djnkiFlu7^O*|mil)>?)sIx+0}#j_gnv*eesho|c8LijbCqXTbs$}eH=VGjm?`#7 zRa^a9RaxCpeR<59OCk-8`D>EAzG7KXW}#4yZ%qD>?f?qJc>{k)b6|SNeGr_G9LpPX zosdnF&1yvJFZopZ5D*vbxHBySQc9{k%m*ddteb4y;LX^Lsl+Q;6JwCkW5dZ;h;^fp{iD+`9n3e zOfA<%yfE}DJ0{WB2+Tg>K9fGhzGC#WH-dCdbmbW;0=aVwj#0jUzi40QIOVi)yjWQm zNP@asr+YI5D+C&$5s4Ie7HJ!CL^K((6j>L^7+IB$t4OLiq1c$}mXcTGQB+d&u1I@C zg%iY?HOEsoD*H`VDgGw@Bb|G?`iRiT`7mG@#7v0ag^z-dh0jX2udI~P=HRlwQI+J8 zM3e+#I@V**Rc!cJZCmSYZ*M!&SXtffAW?T(r)xK9eGZu!P6D(pT_uq%vGh1J8-yz7@V5LG2QG-CCgz(iZ(r{hCn(nJ<+LmBKUAGKKjU1h)ApJf4Pn7ipF(I)>^P zq||?@Unu}LO7|fyOPllSeQxq@3%fJxmFuG}Rc`(}x!YZ*v-_u0lRcax=Kg2~Fp&!*@I=JJ+ve6l2%_3(83EER>KU>H9#`AWP2yat}*r(Z#&3sMe%|L6k z{mc=qcva-ka7P77nw}2_iiBxL0>0}P?L;1&VU~*-He9Yp!}4`gRu;qj?D~E@fU8(Z zaZ|gsrygiIn-&d#o`KB9nR?cV&OGq^M)wg5ASlTc~f)4H{}iI_Q03k=7(ncbA|1SjBBoC)<4O& zH$GPnNhtgnNh~`CJIzk8_vWe-c}czA;TQzh%%?_b4GFbezPraLNhpOVj>)|&fR9s5 zU({}P=>BBehHZp>`@A${kaVo`wQ8lp+Q_EPWe8H$o3S=TlSE@dldWB&ZSXX=w>=^f zU9M!f)ZXVea@Ek&@X){uYpyE$sL3?K>RT^t?)!+k6|#oq$U6%6Dj9CH>!*d#ju&N( zbfuFt?KtYTZn^?(ZzEq-g|$==>uWpsc0|@y09yO6=E>3oMW=Rnp)YJF}pD9y_9)uyXy*>3d!ob=^I^F zk8a{><|4ZvS!?ng;U9TNr=e8NC(Ebn<+7piRFXRxIEiefHY#YFy5w?gw>Y=IU6ctB zp>TUzL+QY<(ihTKHgE&a`V8JqoDM}LlT^+cl{nS`1g=__j{D~i7xt_8zt|r*?OE?- zOaN<#!2848*#Lq{G{X)T>Iw#kECm4}tZoG6A4x$T3T^^e=56qgB5- zchGMAsHk%Z-@9I6_{@w+=VHr)Uj0 zniOOrjecQ|7j9moRukJJMhfj*zF2qOjvrjjB|GtIKS(2D;&bs41;FkRa(cV#klXrI zHwvyt7n%a>Ik&+wn5o2^DXZ>qpw&O zmF>Q*^`h=PFBQ<8WOQ8-5b$aKIuK>mKAa#RAZFQU=(y=9D)5^*+OwFLJDOUsc-cEW zQzIY#yg8#YvS2gQ<%72i!0fgS`D5_9OIJ#I+a4sIr1%nq*9{}l57$&s{hHFL3XaL>4-#?$z!pr7A zTXJyy54D~ZWc}+7D?1At>%U|_vkLxI%dcYNWnrfyX=88U;QHK#Fgq^?m*77b{(rar zv&p})>i#DyA2-|IIsbO&KR5+h|I*-Z8vPTlf7CwXCHzW|^3{2>q-0YMZ&R#HsE3-MqH%jfMhnD-=G{3WGiOh64H zOCY2SPqI2vN1;&g<-2SZ#1aPku!6I^fplC+#Lt0ew8Yg|?-TpV0urUc^aM5Ejob2G z(pRa)AT?9I2&8(EbaM7!t#SW58hYGuBuuZ*ZkIJD|D6*5QVrAVl=#0<^j|b@zgr`sw!Lq;)A9PB zfcXbKYMU3<{{Y@UWlGi~ARE3m@MRMGcVIu$wW0o-4naV9uNQz=^}SLUKkMHi9e~Jc zi2QFsdClAzpe?CQfua2GI=C}{pz7bzh<^Q!Z`{e4R~8*y{GXWqpFub742S(YnjBHd zXR~@YKHdLY{l7QUe@b~aKfu4EQTc!7_upE?|E>9rB7Fu*tLk7*wX$Hl=VwlSs$%Xd zp{8>pSGD$%puQa)@Henx9r>-f`5*5EgTCyxgtD$AIsATVF_Q`4$!*bGl%w z@h9h#icut@mt_XHU?D}G92jMFj}*RY3z3n8uqzz}e;KT-zZU;4hbuOh(RNXrL8VapD*j?nd9*8_(Z}J%=VF>QZelJ?dc2ma_DF0T^P9m^>Lh>g+swe$ z6K3FB-*o2XOuoAoVQMRgXR)K`NUcP}eU2cS+~vb}-0yA5PUo!sDZz)?KfsemJR+ga zHrxBir=15&N7Ox$y<7g+QkvivakZK7;?v$1tTAyPmMQ7$g39j<>bU$7iFkqK5~lA0 zUDz=G?h`enpFL%%S;bk#dnM!JW!kmWOOzuQ-re9P!yWxA*BI0(H=7L+O1>WXPJl-V zlA*SDzQNpFbMbo>dNEl4qL_9LHf;w7+TlL1k%0Vn(WQWs)Xee(SYR#k5^_YP&(sgb zt>utZkg}?A8P;KfCL{4G^sF^ASiJJOOz3->`ZzyZbt!p&)|!tu7T%pX@umXPIg(>) zOZPyhiM&57fVPSMA>}K~nn+=&Jsfba=0?cYuZ&7|n)RY>K5_N8ac5aM6h}l;wO6b4 zsxm=zSEEkDexl}aDRdEBIox2PJAb_;bK&h5Byha2`D^Z->tbz&f(*nKou$uoH#vKS zK(k3*UpM8#SKL%{E8kW6zaZoI7c!oYX9w@aSLkaevU>T60>>W1y1q4Y#;VM$xlmNt8{1gSceOL(sfjB*G_EKDM=gsNy=Te zdt!A9oI{ZWv;K7Hd(HysO*$@sTG<1SG8O|6sW}q4FufQE3%@YkpXVo_l@@FObuwI7 zE&bMoA9i5EJtsXkuNcB<*wns$ZLH1sLJ#rwF+jzW9;nWrFN?ge1~u(nS-TR{+WAYE zXf{&2vi=to{QpA1pX?m7?3d_i(-4~%+*_Vu;hHRTaFCgKKk!K<3RMHOEl|tyH^-~D zLt{okLK6No(MhYv4jAhvo}0f2AYR_D+w|QXt5L6>4sCX2LE?KqU(*h;r5`eyrYOQZ zcAZGl!^l|K~&H>9^ zUHrc69|>hsSG7rn1d9aFshrn-g2{Mh=!&ocCeZT|uMxnAP5Ox5kXPBu9chZY+){`+ z*+8})!Zfwbd?rw^T8KnCTT9meDy&euuff{MfK;zOEQmPbr8haAHeysJlX%5^S|@IrpY zDwQi+-r#_6t~b87ot7eHI@cm8KjdzLPAgMw^794BS+r??$JKfShIb~f!!r4)%Tj68 z`&0V2I0L@ZA7*a7PqMNLY*y$ZX`pTS^izV4)&Z$+$&c)Y!vNZr z_?BcQ0?4ELd|QW|{#W_6yLI&DkbQo_7InboZP3&<5uaa$H@me@{8Z{;1|lHnQf+tj zou(HO$n#LFeR#ic2^5*m595xwAGI_g1XeLNC;QLRZN`{RhgC9eXRP>)LS(lfoGjY@ zeS_R%KT`$Q({hidT_U$+$p$%#;Fm5VknI3ghUJnxQ#)NQWZl+9==>DjIb;5+cSxdj<$;fgd%~5>+SLmfnsfQ0C+w_8 zHcLKjlH0wYUGKh`Ct*EgO;@Y>P+}P5Nn%PoBmEWAevb1H$GY~?J|Z>#MH`NY)@K0_&PF)P!M!*26X+1trv-@Ic;LaS1Gwjk0t6LhzBw77K+U&815^=an_8s^-q6h5|jg|m9*LVbON{k^ueuR^U2;P{L3VkkkLVLA#6pd`O0=ES?;lMXR1~h+j03D zYvW<)kUKfd<0$f8pigh~=mxHM#qlx0L2vmfpH+a#iZt6YtCvjMDEa;&Pr&;32?S$|wv^?FOKN*vFg7(V^N z+UGs^&2AE1LJJn!4Q%*$a`pa$ejpaGdLBjfO1HJ*ph=bh^CAFlY z%SVby-^ar~=f@A5cVW|&Rh2Hjx>XX6RLy^1iG8)DefEajWhz-p-_Yqho*XI9?G@V@ z(AJY2V9M{^=fyAVj<$Wy8cpH#Om=Z0*862dQqf91X~3as37$_VZA!kEEE5Eky$t(N z99;_UX+;QwcQKa0BY{}YrjHIt38){nif!p_v3tD#^^-gj-_b+F zTdo@37cEz2w-G2hKovkvBREgb^kPGGXMU0R=SfnbAz+tJBsQuk|Tl|ei-WeBgv~}_YvE_COg@STm2e(8t;M`54@}gu>dbPe{Bw-cu`63 z+nD>(2z`2G9 z)9P8tc}e`P*kLW{G#&-EO*Oz}ekQ&GFG*T4ohaA97t&N!bIFXU_yQX^o&} zYpc`MEOnPZQvchg|K6U#>kIaHj%r^qM?m*OuV8t~C){`~hOaD+qkZCz41kcr@;>}; z3@eF(pHObxQhhw^aAf-f9A+rhtC#hQc{-vAo?3{}4(1@}p9h7~PwhUgCGt3( z&*28ucNx^!?&>XDmHI@R3#NzQU$%XO2T%AGCqCY;3gvVJBfz`{m*WlC?vCnGuG_oo zm~vUO$fg+wnjgOCJd{)}b16*S)yxbR%xxUz5ECbsK2Xt2BZA?V|gBsR68H|Nx*`o7A>NA zpuR05gmHNJTTg3Vf$8eGfPn8TzNp;0b;RPiwiH+DQqt<Y%(hY~9y~-cI2C`-3dkebr zRX0tjlpJY5HVe9-RLp}r6~!BKDqBHmjNo98!LdwMz0e~r94d)n@Oz12*?hqGmR;&P zWB=k?O*E`L))kFhU~i9nfNdkMs0Hn%u38|v@PZv9v72mHS@#lb1+fI`rtHg)-RkAU zjj9MVf>QmgyMt3Ueguhw&#b#5NG}>>9|Q ze3Co*V+y`6D|Cty^98(G=}cFq_N{OXP*pA_PB_h~r%?V={*cmT(|X-<=4pQ;cwBE> zV*hdsS7eAOa#|@^3pPh=Z2(%93Pt0Xdh3@=;M0aBw7!%s?>gLE#=H8oB_-J(jML}Y zo1*E5lW*tsa^IsUzob^m2vfat$dB-TJ4+%@hpqrZYWds>?~NgV>EYdc zd7oufaT>vvjg_=iKRP{Ar4~xQS@=ik+`l9F;-NZe^r|G{xHA2{vxd0CQ;c%L_btcF!CLBi^&8(O=Na2(VqtNNBtHYYlgD8?bQY% z8Se*sPBe6n&4I-)6N58n9|r~{x25r$VZD((&x>Stft$s$q$MBW`uH3e=vh6(ewt-T zY9q74?pD33c0}h;vII?6k)-W-Jm;si!morGo4B#+SEnrVS;bvP0?xNmn7b?JpYRX+ zQv~8pzw)Y!U?_c+5|M3A`}>HA{rEh$7R-Fwfrv&(He*It z*?lQtb%1|Im#$;f&qM8_LsX(=#_vZRK7eo%W5>rP)V8bj;Hf#b-v-PD#eLtXgg&I? zV3{NbX&32()DxIjG|?ES`}38ph{)OfacZ;Eqq#Y_l1{^vcY^6C4#R0(hRr4>?ZTRh zb*TjzUH29bYzl6D?&!^r&bev;9bYK21M%niTB1ysj10w-#*yEDM69wb1cG3*B+V`G zCG^W4?!9?JNmgo#Erv5NF`S70Y4Mq z&hd0u()T2JPPQ;xh2+&)7oklUJnpWDiknYt^q9$m$tYl#;?UDk6B(^>^%Q^T#7m0$ zprk6vcvjp~x$4k=HiC%_$aXdjLDhHTWaPx7~mR{gF{q26570-x4}cUr@F7pk%j+zpm3Xq6)28U^6GLcP2mA z#pR5rG1YKnk!$&%x2q2s53$O5XDbnYt(>pU<0#lG>cwD#?tGmqKuc;LgH8ezd{O0~ ziCaIsuF#wRaN`^jT~8n}EnA@;mP0>*fBOA7(BXPL>B{wnJ&UFfHkjvvm3WzT0TQ*B zXnznjiMU651qTd*z(XGQ4OG|5-%d+ptdBhI9$4#QZWHNrU~L3RO8fiHuO91$Cv@yH z5-x~!?_3-YKxUXly54HNrd?LqImN}v$>ZH+2HClUr^YyLEi~lp?f@GzWz&)0ZV|xA z1HPGE26TME0}-8Pd;N6hK~PjwWEQT94LtsH7Me0nZD_PU|A&hIf!1K|BkZjHGa)>m z`Bl47_{CI--}U9u#^iVMG`_Zrhr-aKZ|F8MW0jsTbf5MM8gp-uJ5RSA(61L%DRlni zoIHFDl0W*ve&&b*^|XPCb+cDXqxpJC-e1oI&}~yUS3Iv6e0YAH+p~hd^kW_!k>k0+ z^oD&;4UTa{rXE#_j=2^dS*~@ep*W7dpO_LZv+C0zuKp+VALtsUjx&+>lU7m-jezZS z>4P0Y6W~8rPV|9Rnl{LW_AzHV6kNj16yvfI>>t?4uiz1fcg57}IG)s7rYu?tBw@i> z#%Ug+uJj@cF(Fcjj{YxQ(PTxp6E5)LRz;>^LX%pis{Yrm3lsMWc$XBs1QOl@;N%XH zxw<||eZC^%UmtH=Uh@~1f^$vTowZPpno({{fwV;o*PM6e#>eGw@7GLFTxHz;%T2XG zK$NZ|u&DfA2WgCZShFvVc*iE0R;-LpP>eAfG!5>5jJC-6b9h;Eo~bab;XRF5qjPsl zcN6?1v?uP-QJJz8Db&!_liUw!?#VY^Mm3KZP|11+rZG-g#U|t@ugm{55K}qcoO1oE zg<@>CB=PKvYb&)QGM+!*gn&=J*1X4K|5|M8y0gQvo`UzL#CpQgv2CY()O`SEL(8Ph zI4zMY9i|lBt!G~c4^y{FLwjOY)fN9_b|{$t{hc`Z%tBVzUNm+@_etj8{pPgnb7o{u zBV90g052wF%wz)BTTZUx2{Jpe4=8xZ>!_Ox8l1qT>B_prJFqe31K7BwvEdI1Mb6op zov`mJ@<1lj%f)_+w(*y?S}4!@$r>aDBr7Xp(KsP2R6!ep(y_+mB%0@2CH#O-rNb68 zoqB3MPis2n-XTlsAfca;Uu1)_#PpOst9x2o^~i0^jhG@2W)K32-yTqk4Xk2!&jDR0 z2^t}!cC0(D)v{~@yD9hZ-kjYdqb=T;EcU9W8svKKiz{P>e!h?%eJrIx$C44M-No$^ z!)WFLE6tEXD^0{&iuCWA!ErNqF{;EO%=?9p_xihC+|Z?k{a?`6r4(TsX zm6KpV+#xL=P*O(sX>C*;-JulreF8KJWRWN#X_l&O07x3{agKuf{uph$cuyp+XI=P9 z&gD&k&2$qF^(bLYr(n^*&NoY9#?uU#P!4&s20t~=b#N8uxRSrjmWfFRh*{$Ml87tK z*3~7~31sn8f4FUK4Nq0<@U~_Qt1HG}j_LS#LxI^4#|LZFbl`J8gP7!hHt`P^4tsX|(j5;n*B1WG*^M$JKE z2Ai#>_1pJ@Z7h#Ip#$yB=`hp&@%NI`_K^hzGq#PQSr0p=ZTHdyd)JM(b}{~DpM18< zz&6v5kc-SB;j#j^LZF|Hved0c6llWKcGve{)W3f@tMB4ck5bpylA-@f7gKm;`WIBA zJc~i7ekx$}^RBOJ+1!0m{Dr7q<%Q@(NVWcKRXWh0;Vb67@7&F5v(EJt_)aMAojS%7 zt?bG5FICH3qC{$;{Cc>0zlCA@J3gQb1!QUV!LZ=sWU)$besbb|U#adoc}`!(TRAEQ z9uJ2b*jMkYTm0%L1I$F5V!_=LD96==WXu`LGP+x8Tom<@Ch=76^pNCc{JnO8d#D|F zLHS3~-9`)F$A4TdQTQBb8n5Y3w3g&swHC~u-N|6Gg}h9FCUq{CHl6N9Z@)ft$<@V4 zl^?rtPb{cI4dXYxg~^Z0GG;^BK||+k-RFV{ho2YNE~2__H1Hd=*PmQtIS&ne=bjE) ztmZk;#an_N^a9uMR1`Fj!}iW)Lu?;cB5|A*CTz~@9t>CNf6X6cu zUO)J}Qkrw@xibytsYO^$P$b z^->RU(cdReQ?^G?KW}19$C76*gpGUxRFM?2!-O%@lA`(hzrVJw$yhhGd8<&1Qc+J9 zU7lp9dA#2-rWF)ysar0jQg2Y7oYv%6-J98Gl-~|2d#swUW(8kt$+HRp5GFF)cLr2< z*mL*SRJ8S6rd`~a&-p_77VvheFDb|6s(8})Q6kz5ZOz)|Z)|iON1)fl>OmjoeGDFB z?nFJgY^<@`W&vHtR-1Q0W&!F8=l@JnRD@@7)F%_~)MYf`#mIWGk-fR&ItO_jqZX20 zf#%bub_-=|)h+5h4`MnQkoAlH)}deJ+=E93a4qbe?1lc@{R zm1SILYf3K9{Opekud{U#>WVXnDA}OW#Bzmb#%VLiV^8zf?Jr`r_W{#Z8A7R#b*_9% zB@kET`DnA-^1sHr{5(r)>ZVpkLavC!In()}tm&=^)#j%*+mCU5RTo)V7B{Tuj)z0H zxEPjJYe7q+=WmDvc>F%S;Zt6!@UQ2J86xIZzGwSkiF{?VTQjb>TSFxPS(M^DD#3vP zuT}G6LxE6Mjv$1lVl&W{1f23~7Nl5AfD5q-Unqon-Q?CJ=#fSY(lz0#0h&tK0u5zF zLai)r)Gkv?;qrTrdrL;QO(A?-fVuWbre$isEX}7Z3i#P9{<8*pM?||qaKrGNkF4^# z2O>;r6RF`{sBH zuA_IBCwwJd$T)>bqEx8?tHf45AGJ-8Ms@j%nxTyDCaXsqJ5rF*+!nn2zzS<|aEk}b zZsx_|;JARVxV}fNht*k{>YDmrCU2GJ$=g<^M6gf>SH=5>3{J#gUEh8;ku9@HgDt`8 z5;~DiVMCbBf}ksjO?Fs2E2u(4Zg@$V@4a>z+0vL-4GJ$M<#6v@5o3NTG zE;zL*DSKGaJUU+{U#G^ilDx=+%bdJcdm1>9pmFuNj+9EcpTnyR zD=|t;Rz8|36(CEzJCzJuoERv1A;7dR|H@0Yb>-u3>P3`QisZVeD!YvJGU5_mYmWN% z%w5cvzM_ER(olOigS&`hG-^Xrrb}drI{?)tuvu78S``xe0dbV8n6lC=W~?J$)vp*N zv+SV~D6YOJMnzrWG16o`O}x}Xn8i3O+g!nS`&FP+EV4(Tg8FbR-BXSK3G69e6+1|^{W z+N62|4)>2ux`u~8bG?#oQQL+Q;_rteOfZ(Rr&bbPAQwo>u$@2d+|w%dUZD&|VQu6W z!up()tFSdpr+vT|s)jDo#2_%$b*<)eL!_R_98;P!OzC2b z^0Q3D)mw}9CY!9)_tOX+HQ853ZB?iY<%satv%h%-JpS^N1^AkWB0{#0`_5JzF zg0}bVuYgFe8FDUE=t~}Km027eiJ5V?PRcBVSo@`zi16h@vh;Q786GYg$F7)tsbeMx z_ou2ibnw9mcNFJdmO)=sw&}qxNitO#+<0#_NMgZQp-L99)M>zUi<1rw>A*cS)w%6_3O8!sGOm72rC1Zq*Cq)pPHl?on>(j=+W5q=j3LN^}XAhMZU?4Z^@7np?^ ze1ddn(L@SZFDf%1uO2g{MYK1$Xl?MZt6RoLbZbKnw!OhoC1Ib=x2!Lp>S~oS+-%09 zlMgg%6_&eHcxP?p7s64%R~ z$+|4$OS;rMF+`?%>6HpZ`New%GaXCuymS#Ts1v5!(>2*T1Xu`@Ad$kLLc7Oacix3Z zWJ7ur9)mq4iInUGA zAVSt(JYl`>D@$F$j+y+J@+R5wcMSH?mJX=B@3a6&!B`*$j4KwKOLy!i8I`*K^7vp@X4qI zbNCgPEK^@uwX0gz3C?VX)+{|a7_Jv~(8+T!+v^^{GJwqUN9j38YFC`@cZ*WqHq4Wu zGj)s9Vw;Yr_OBYj%wh#ANcQl6Wacr>O|YVjY6ybc$BLR!H7&Ek>m{BKIi6sU$xvAk zd*`HU$!YgQ*+EIAWmpbmV*vp18npwehY0xi1*K*$(@c&E7v$dAad;&w_$R zSQkPTHJbi6hcEhA)K7S$7-D+G$I!4F6<HCne2}ql*~aFm3tt+eVsdiCq4xr5 zK;%J*zSS%()iVrWKG)SXJ(mqRoi7~{IAJwgPZq)4#R(C&j#Mo=SMZ-cTnX#Ul8ld6 zjX(V!Co3T#yVT&XwE7@`7xv?9&8$i!8BIj6__0#~HMZ%Pcx!ArxW_VRLkcz03V-^?w@-#MyH zmt)`$ExNlG!eD3YW=rS1a}dh|OGEiBA#*!Uc=&nrT4&VZ?Aal<9p2EF%`}RWg5l|D ztyakNIvwOx-DgYYT-y^4pnQSIdP!(G7r}YjRu*|C$Ev0OF*|y6(~XXkOx|Dlj#S1V zcM3fDsJTXJ6ql^tJaQJYr)vrLmAu|aLMP$%I7s??NZ34Se5d1lq|0js2*uUEYrgKA zh^AX5IL*3s$A2N{Qahy555_qqGa5(lCqmA` zqNJhZWcETN2j_7NkxTH*Abpjp1@jJx4YJ>DQl{Bmrje1K37K<^?$LIpKQWj0O&Ku~fHjN{NEBMwY<2<+ss^#;3->2|Xhe zKD-FOo|jS}@6gZNkg;!Iu*4f%&|-E{lp&dZ-jjQcwTr_W@4-v!wzlgmW?VZQAClcn zIJ1SjT+YQ&y@POMFNeaeg7O!^%?4#Q8r5AaOAN1)^V`&09gx$s-S$Z_xp*;&nX~=6PR0U1K~OPa7uzP0u|N?_dT`LC9p2J})ohy}F{iA*$RPF$ zErK$`(};-QqXLuc{uiHHIS9*Eia%|5qrMk*LEiY%Bzq0mU{-i*@vfyM`uNorsrF`4LMuXFz8LL1cD_R)u zpr&I=^ovuZsGM23x>=@KP#RF9ZhX)#hLfaugOuhI+Z@%WoKf`|ac?b)hnk=)n;(&1 zAZ@(1P`*0JLIX8ov~h(%wq0zol)6tR)8rYkNNuByK&>Bp*twiDcOMpy@ zyT&OZYd+@Pec>A&UQp8F8@(}nLN;&gI7i0Zer(v%#HHivq?~1LjaqKb8eyIo$q??J~mW$Vhiw5MHiJOUDO{V{S)u zp1HF|-^ErJsw|*?Yx_)wmwkeb4DRq@?Vz(B{>H0kv%Ca*-&ndGTCPw#Totd~s z`4FW|ts#;$t5nBMj7F1QVZ!CtW?HFM)-T$aBHL@{;Q6a3dgxL@PmRs_6Ec8hZF`O9 zA#*1hqC%Q~5s-3|bb-0Qb3|lGK2WFOMqJs#5gG|a1>j21g1aVIE#Q>O8KZQ-fOmVY zYGg>$wc#AYKi5eO{f?Y3LCfsMmBUy?evauD934i?o6c=lkgLohzvvr~R_dnn6H85k zqVP|cmrMs;Lz_HdDay`K9qsjkAD`wPy$0feCN8T;YCO#IHN*qk28a00Yn_;t(2Zr=5CX0!sU9ZONeex#kR?h`#M@)GUE z;J_T;H0iu*+>wugpE28lCpN1PBlSuZ%55a^7ZDkf;j21fIS;|A6E9=SR zXOh!^*55UK#t6FxgRQ4R>O_CS`*e!?`1?`0IeE`X6H)mrjz09e_Mq=xp4GR)!p#%; z?Ww%Iy<1S2!aZ~f1@*?>ON5jFoEvn>?RU?Em8n76uT`3mi@-5qv@;{Z+1N}_(K~59 zTuF?uf`JU1A4y!czWvvP!E-oGNiKHMclj93qRP7}(H26|n-5bhystKPDlQJt+oD~m z3+tNyto)ewT9Eb@FL2We)N=8Kg0}2^>ASDMQj`r=ocTL}}UD&!)>#JYsaIU>(^|=@*iy1sYdrI*Se!p}*3RS@E{XJOuv!+@hla1mIhRD_{GgN|;xLdCo3rgp2c#`o_ zn+#RY@$5_vk;s`@#;-x^B1e72$L*1_x)i^grGtc2DDP<^dqDBz;_A2KKjewS^7Hqp zi`u<&f6|$37+(VLtE_$Z@C)*}_ZAm2d)-rT1+(w6*2fMw4!i0T_$?%2m@lOVYjbp?uZ*#G;HVqGQE|5VplBN0jyPoMO+^#!GPnIDH_F0;zYVE0*3Mt)--9G zZGf*n6#Tl6{is-45u?&J;7oEcTw1*ap}o0;9)TBK*FTKx1(-njyC zhOkr(z;(n)|LzfQA>Se4BJTc5{lR_JmUQg20v6s5*^OQ^v2)0tB`6vEVf|gu&c_u! z$xdi>P6O@D)~J}c1a;xI0dtjAVh4F#qtHk+_U6t!H43udG_h*0^vUi7jZQOkXLC*g zsb&{0YIjsRM|eHz=V^F%$&c&V*oNQ#bjepsP@#3?IW2jbEVMv(C9xOeN%b_J_UWoy zTPLvr6A;x*@pj9%@?gDcbxE1J=9)NW-|LJKPiBH-c=#cslJW3c#x=5w_DHfTd|lY; zb|FU-$!}5^zu&{|cbe}JxnG}@KagZ$`q~VDZ#t#FaH)x<=iZ+p_T$%8%O@_Ioufm& z8IYP#$nhDKd7*`I=BNSMV34}AHY1Tzc?o86Dat}3pnJLb#=NRV%?mpZpSmyWxp!F!)QRm( zGw^`D2J2?W1i*ryc)OS$k?F~5thK)ZIoyBKUS3p&OlmByBhIFEK>%sluI`}N{>pHl z1>MDsBZzdq(L)w&VJ&#r@@MRipPsnBBZK|BHU71yQT7|-M7%HY(}(2xVb>wDx*HY@ zWu~id5aI%|i3e0c%-nvT)VGdX^r6uEeySA>Z-~?}q*eBRi;?7BWiImIZ zW0o*j)hICL%C%;A-Rk*WSaWc2`BofB!R$k)Ir_zrUuGOX_)-4a}_F|8@OwMIzi>bzmQT|7LpdF(b#g^DJ~Ao0K0d#k9rwq;#7A&>+Q z?h@SHo!}5$ChqPM+#$HTyF+kycX#&y!6!O#?#w!8-#@Fhz4m_p+ih(=@Gux-RQ0N_ zs=nTqHjML^KCtJ?{DasP+K9lMm{-S}SXmD2a)Y1?Ch zAWeyGHgPD+AJjR{{>#4%ZKAn-6Hqb-E)Be3N-M2Hh8+fT+L5zAs-oMd%m{tG)W%}4 zp`6q-YXV%}o}E?6U7xmMGTD^(qB|tC)78B(9Ga~Dy0QOBrNfA(S#N52ZEUvjVs^!U zkXA_yg4E$!U|9Q*5folDm|kL{W$wLci#1W}?^rNP-k&EJC(7YC_R;#DOa_|;!Hk!; z)2q=^WkN*5#^Of<$qlG`!RLfNrGd}&rvv7c+Tn8nf-knIm{gTSSz~Oa1k1T^|LlXS zn;X@^VbS(_n@)MR>^ZA0CAjvVbu7pTz4AJXTl>s=Ug++&u`Jta0iqkydQEu2y-8~H zBzUfcM0hVzHGdfzdp}cCcRy1HSU!`G@GyyAb~V@F!DPD?zfXjBvCW@z#4S=W~BgrdI&H(F=XOJg07#)TKS% z6XZL$hzqkOc7(hRZV)gQbtaf+A7L}l?7Tx#t^F7meaxL0hBkGs&(}co&1z;-^!&W!HdHc6W^f>Ipi!meW&odT zPIBYg2)RFhEE>su9qGHVp~}ZyO0Bdz+giYWDv(8(ma3^?olf?)pFG@0pK(DthF}|O z5`7eu<0*cA(tLZ-!>p?Z`^-Nhj3x~x$+ImUV3!0ghphOE>2+DZFxmJ5Yv5vt1p@5 z(#n$=)?x9i`kH4pbhbl^KA(35L%L#tFQwAQh%s(>M$PhJ6=F=|{E%~Z9i7y5GispR__&($P5$Ki%XHgqTNE+m5y1 zyr%KSuHI`!L|k@&#*2)^1)xdP0*6_ygs-$!xymcA7nyWY=VPH>ySQxfs_F&VoOhWF z=`^C=1?=;!C@A$Nieu%gNn%9mY9|>E@i%AIW1ZI3kZ0U|n87$ueb=EL6~fQ&n`s_B zoCvCBy(%z?ZXJ||ZAN4cf1FC3I2R+`V%5EN_*KQN;pb+INao33-nfcFJXN32%bV2s zVVN02KcNVaXqSGgm4Bep2$3&URll^2PqmWde?l|ATx+5P=e7a*q!5b|? z$pM;20SFCpubvcrSM7P5$jv7lcP4WHAmn3`kjR?O{HZ2k$DdTjCu4II#(gYm;0`zia3&O+McaiISD08blyr#Ye`Oja4rmN} zjaY>Fu+K7DWvo=;hJ{qp_DA=*x6z&&%}m3SRPh@UHDd8!N%A)h4~XbUxtbZaaD+G!bCC zsR52#b7lE$|(9KPuHhYXo#mefg|DSyLtq*EkBg z`qH^MS@0-B{dm=@LKPJC7j;|DH|lra7|FUdZ@tFU1}b86Q$wtbwQy-OX_{Q!a~nqR zXg|?tL^Ti{yMR}hKr9V07$ho?6&t;2un}pw{_-TppXh{WNVfOcvrEoH7n)&E|9Uix zp)KeO&#OCgY;Wgb6V*QazX>qfA|FKQhso8gw=&*(_bdHgbpKjg*$A}KSWZ?pCs+UBMsNPXFo`@c3AD;Ey9*YK=rn9@k zps%=nD;QG`nQo+dT+Wic#MO>TC_&scPwc6k6V2@39mw|1y}{}IVc!d+ap{#SP_c8dQ;@%Db{i>tn7a%% z(-FJSJ9mTR*aig*dbGS>`EUj*+%N$}VREv8gk=_14ixSeb#2~MUH1PHF~O*_Pl&Ud=}(hPfWVQ^rux+w) zo=zD9+iSZNpEga)Jub-$y=bu6;pCR0@z!KTA3Vty?cnddV$N5h*PN4nYV(!sQEoC~ z_`6%*QECCpMlUnaJ6Gs?CY{A=yx$YSl@>IUO zh+lt4r|xc_Dpa8ER0GvUCYfy+jP4QZdJFUO$xhInYuLk`bp71T?V^M5K(Uq&`vW6? zRspWj)7H!pS4(0%sEsE{Qcs6uFW!55l%HzvTK^rNJFUp`>g|159eMqe$@xygwh;3 zTSS}XsqTB3aj*_loaB7pHgY(&{^!I8TO(+OEk~ny_R0TImfxqPYEzoR1JmA(W*yp= zqR+cg&;x@r?cnC&r98EVH9cOj31KRCmxL{6h_XWduoF)=cKU-ld0NK%06WvoDOcvj zL~Y&8H2>-2d#epREa0Vr?Y1Nu5ZYxu0_xl-`(V5PGx1Ea9`>@W9{(Hnm63Q)m_S*p zJGgP3SqnlN@f-Fpc%Dcv3*ta$Lp(@{F~dJ8^`;%LB3pJ!)5*X7rAkt0+|@R8ZW&F1 z>5D5&>Z30_MI))b8YYI6V2 zIRBq<{yW$GU*GtD#tCPqP2Ay$kS609|Ya{9+2ESz;MxBx*GqA zC+Y#heW&%g6o(dAzqL>_UB6xad8FE78JF*sm<-Pat99axUPp8-?LS~@57ktIjMSLv z1xM;a3*V1Hz5Tu45cd@Lc`|3z|su)|IZ zh+JU^%*wcH(R8&nG4a)5Yss4lVxJ3S^td1L{sQaN?li0Pz9qdZ)Z(8pO{v8P{m$*D zj^a)B;x7ABWG{}2TLdH>_yNA2R36ms&H+(vRM(eGZqDN!cDOBr-w-y3nN0ozlv6Zz zJ@}1df_g1)FR2<0@0h{i>}jBYtEc=JpF-JimwI_Oi8%oF&iN{7c=92-yn z_SP?!qbL82(%;nuj`!wITloo(N>hiLPtS8EGVcR^AEf-GA4o(E2Kk&{{81waE{@A= z;`D0(yv<7;aqFDF0r%hT?n_17<+M(vi1rq?#|I#g8M-;-18^C~0>V_%JN)xJC%JWS zb`%y0BFYoR0+#CE^no8j{rznv@PWzspS3vu zdrNnySRTyfs`4^!p>4qEby2r-`~psQ{(m3} z8XC|X6V2bke4)Q!G|Bcwfl&qxW2KKwfNJNRh2#ZOM``Bg6X%SLsJ1Kei^*M`oifE$BhJpylJ6B9U?on(a{jZE zHX;9I3hKtcO+m2pW(YD!u{E8%P%M zNB_4(`8Szb_RjB|&gH5GH|A|+O>ux-mMvskMOX_u)~V?30X&NBjWQrX_2C>AQFrbZ z4wuipuCFd85o{~sWsHeQ|Ez^QaAu|4{L+n|@wQU8fQmw^_?SM`%;O)Yfr%h9n8U;Udm|KE@% z$M`!l3wLmh0y~u}lu(;D_(`Y7o8Mngc%GI3+vcr z@iU^cH%kLVfm*06*9OU#E~_E3i09j@4EFK^!fxIFglq*)U);k^734L;?0? zb%2PGod@ID zh~*xSOi{IXf*w0@0}U$T-hx8%Bf#P)SXKY{58~*6Z$47#uPX@_hd~l8nG7rrHekN% zl6!wA(T(<+Ir*k?vHE(-{q3ajw=(`M+409iv&%5pDVy2dA?_>!+br=FFeH=AqmN4d zW163R`P*ThxIPv_!nr`@Dfyax!2-5Oj__XW=lr(pUnaKM6c|DcGPkP+Wg_b?G&0GN zWYdd>hx`v0%wLz|LTWLz&?r7_2q_zPQ6K-}R=wWkXJ9#N6mNa$24+(Hhd?0U8~uB; z0C}Lw1Z?U{AyUpha_`&fh%MS%Uuv&tgmtu}=!af^+VtjUT-mNISd^({aC~h#MEVwW znp@QBa-TbPe*@RSSFiP|sr90y(iPTSabyp5!SlBu^@rTP)c+F`?EZ=g0bEx+VT&}_&n~AqYi-GN9GT<2~UfMD-BKjXZU@K9on1P9`_L1k05SUd)tER zGcG{?_Ypw)$VV5(CFKsT-HrdSii*EiaSrii39Mcl<?E`cWnL!2o$<#x?Q73 z^2+>-;5(X=X@c*_vYTsgv5}EdtdS?vRjcj2L!Ez~9yW^+3Q-u=7Xc0~`c>4(l?aJ{ zYtZN$=p$bIEz15FzUsX>cM-SkUe>IITNIE4pC{4=D{ItFI86} zXC0re@S^raXntU=r1ain`tZ-Gf!-%@K!YpR8;X5f$&j5$K3e(-I5X0DQ?!oRxGVDX zi%ULpw#5E1kbBFK6spyJ60bMaAWP^Nsp)<$(th6Y;Qi@zyFA9Vr9H-u1v@1`d+nbk z|6j_)`0i4_i`t{XXr#FmvaPPCGXvb){M-GCEGDvrkKhh&S z#T{dNO$2aQEz~4-ianj|awYe48>kb+05JY%vLoB@w_?~zVJN{62UDLNpF;%(m>o|Y zmp3TR6a5>^uPTx_u32Ney-W&pQmEfaHgAUD+?`l+TQC2%zr5cD~NN+@u3GRe3cQFle|$$ zwLN}cAY{?8=j3)*zH^_oU^&9<-v^r!B3lX37s^DZwqL16%W02t+3LU71Zy6Q>%Y~8 z3!Evg%U%>4WZODjL%i9Wxm?F~fQ{8}%>@Tlu*V`)#KKMJwD_zCJQ_t7!{1ft+L+bS zD7(FHgK*f(m>e|NeWE>m6E?)K)A6Ppl;+1SW<_Vk{Lt&n-l+E>oY=G^8N_w zlnahBJpVbts2YQ=)8avw(WgP5(TCU%&V1G*7%8d+4{s}n=8|N#%K=wFf2om`W~BFy zofGcYiku%9oO_`NOWz0b+Y#?n+*-p-e(|aKe&F+Y=*E<8zrzmwfTT?4xdO{;gN6$- zMJ0m$3Ut5th}y)M1G)mC>2Xw=qD^cZ@wo!L@FTNs z7hrM?cBh-HhaVZCpopZp0(vi>E;+eb zN=e#O=^*40jSl#tMrBv4zK|uQrZ`@ciU8O?=Cq2IqaWLYlS^bEXdYY6TCE|tdD*cJ z8*;0|dYPkK6E~tWPjNErD*I}Q)&Z`)&}8uMd}5G+OcO+)wWzzv-ZEe_oHwYm<^;94 z6i`b?w!wqyxf`1=mb6er4U~e-jaa|P{aNsgPgqy^J66Xk7iBMIHAA4X>gIu12!9UqS%&MhU?SUYV7S2SF8qu7I1{y z8|GaHb_6zHu`Blbp{Y7~g_02fB`dmZLX)SgGBhkwq`WNedhd5)bbtN|W7Yc5!dA+s zeZ+v|%*#-Bl;tD{@Jh>GWYNZaqB$x9PZ-bwWvp3qhGG-O!;Qf7P zH~hvqiyRwmw_m&|lTyb(+!g@1DprRkX8e^v&3pNBUKqoHQOG&_1>oT8P)d8%@YGq6 z5}b70Fr$qih-9O-l5cR;iuie;wP(^#2>)bzZ?KdI@IjFtO)V2S3Ot%UMEI*5Ex6_` zMV}vv`=bW^7$o4+6>+r<0o6vgHJ1Zn`fh&S8q@#%P5LVIBpz>MxyA%h=N;8t$E|rE zYpH(&o*^V9rIcJRHF{>x!gv73tVCB?)tlB1+AiNVDFw;#60((sNI}gt)*rK+W(u%Y>g@Jy2$Bdc%|F$-@^qwp zG>ONtmnK#N@>XT_7js}a$a!HbMp-{C*6^(~&{T_zC(63;*&|c}n*zB2V8V-5ed4K9 zvP+5^-s-H}u3X`!xiOcAw{FD2jTzJt$d2g!i6IP${RJVa0kgPh+ReE@;?3EN(Ww+6 zOQkIs{g6e~r!1Z#)l@w#4NPsRIg`MdkDkioMS@D`E@z6khha?C?%-duY)pP+jSn2@ zLQr?-GhUYYfT(l3n?qBQG|S;!5-j^r=LNSyU0#%eLU%a-NH>()d=#rlbSWVq+hC%$ zUaE_LF(+A5&7V7DYNI|X2S=gf4m@va-|~KGH^t3V!f+tpGpMk1%fQQY(=YA{XEX@R2ZeJmt3h-0n{ z_fjX-&M z$8WQbTA+jy=$!JK$_u1&2CoMxR<1{>0(dycQ7q%07)``{3Ym`;^ZAnAndGX}5tmULm;sb3bt_G%Nn$ z$YB*RdxQF{LfvAo`#fs=|1~50dy-L3}?&<|_z$>G6`b zX&M<3=CVnqp<*|gkqvQ@WAI>1pQR+C?ZEo`wCt#N*Qj=4>Yeq(xT(fu#(YWobFgQx z(LS{lG!yrP%IHO9&j1$t_CcgC?`VbIBbYB(G8IZ?e5d_+kH*_rnRd%4UB0&%GoS4| zNbsUP2c7_s2moh5cSPP6U0UvJ&9DwD{3S)pj&A&gXTyCHvyTvGim4IcN^E8X1zNC@ z-#X>{c#%_lVC6{B=~_1L%YvC`f6~=88ISFO+{raQCpw6ve`nUQwcX+qJ2K}hZs@>1 z_GZCf_I@$W2~7PxDEufv_2PAz8-*j|=sC)}5A&FXDgS*)yqvg_b*y-z6riy>9<`#` zsKkQ0F8h0p)^aE{De0QMB8lB}E)j-N7>(Y`FmePV57~qFq-(>bTBb>6imNfF3Ex4- z8PCDfgw6fS#^zP}*g5m&xlMQ49ngCO-!Q>^dm`{=`Y5xZsmO?TH9ounb|UQ{$LYk( zQoA0`roCGD;o81fj`D3nrA;CA^aqsc;Ra;I^b0`J>;8}o1ktyaR4 zjCL)_y{^M;oBT>wHcwWYaQ8FIFoe_8T>BD%n=OgHG2T1#3m%UQh`tByxJ;d>Jefsb zQ|0x$jqCKapad!Oo$@FZy{ihsE0~_A1$5f!79&`e)&j2ZazC;Fepp(U0{pemg7eO> zekpd{K_sg+NTuS0VE30Nh6kP{2s9>~DdNe{yPMuUsq2GB)vjnr7Sjz>(Aw;mOf>4j zIjbJ8dA{vj_$}YTI3I*zExF#vMiqn2gXC&mYJK`E?4K8Xxk8m^#qw+n*&yM}_3_J- ziR3G?1uERnlj`qW7w{#|B5!Yyr6X!=T|M3zw>7+-Isp#xcc9?p$toBecKPF#B^taS zZ&%Iwk6aA)M|EXLAm*7=MmCj~F3Y7c)(+3C#C>q@RExNO^}@)aS%sM=UjhBhbk_)# zwHAx8-_A&DvGH$vm#BMqw0CIxx*{XB8_+G)o-Hcj5iYA9Mc7cwrWB z2RTzK(sTQKRo>?Pg$IzbG66#(0P+9*w$LF$RcH1WTs)R`L%$j)215BG zZ0E3)t0N&1n?v<|q$Sn6+$FzgS!0A%5lcK1-7~^~@Z_hD5r4-`_N!+u#;P+LM1#Zy z;aq+yTmFCq9p+y;@FRZ|C@^~&>o(>8ob~FL?@aP#c=6@_Lq!+R*vm}haL%4;XFc1$ zqj@NqJ#k=;ZjY2THy=KJ&dUOwpnqNG(JHcgd$B^0(3o~ivzFQ=<$y!en^~nN88P}{-l3IG1-Nb-q#gzZrw>^-!4&+P+uqH%la0`!&ii(? zJ-5da+;)$MFY;kq*9C53Gk-oa)FH116)5>k&mCT28 zofRPxW!57YEChKz9Oc*-{uNL6z7`{F5+9}0tUw|BG>Jw+t01~ave`a&zQdtDk)Fj?=(_|k53#%S($zjCguVUjNgJso2W*wyx_cr0jp?4!+uZq1rh z2wqVYehXQ>&0A9H)R82#+L><|GDB@E%{B4YS7kOMI70|F85p8l2ZnX(j~R4saXu5h zHzN{AoSO9H|M_yfE4%gPMc|1wNq3vL1Tm28j)R{z8JUW&(F;I^yjirHA_xl_18U;OQE zqJcsX$JdTyr!hJ7TnkWE5F&mV2Kf80-xI`-zT9so=%o2gbJ)#;JGNqk9#lh=T@-Gwi1>p zx*x5ocOI*Y^^k*VUVglJ@4Hxw*pBn9Z)HnBV20!bYyJ!EMMjaX(rJTecU{ID9>EXy zleA^i8Div%@kJxj?YnAQB@aF3s)C^hkHs(klC4#0_u3lR_b}$WQ`0ls(dD+?GTPiz5*;l!1hA1@D(630^VhFqYNJTfd zHzGgo*$5Ykd7mOlC(i_D01T)cj2g?!b>6LA&c0F+>*2bD-SRVQ$Mfox?# zI+0wO!I%>Tj6# z#Xr$>L=|HaR{Ej(ejLYbF+jQdxcnt)_sCJquGQP7F&XnD#U+CBG?z_YnS<63@lhv) z&2hI52kI<)OLfn4C5@!CF=B4#Xm5-&gC5Kcm4Ja1#*MYu#?gr11UcPhTDuRS^)6Tgf zK+MkbZfqx>FSK3cgooXUGjsl@HuUl{Vh`>pf(Ah!v0tQ~nW5rDI8yW|uCz_fR*p1d zsLbw_Uhk;MbTjXOY!^9RhCORuIh~Z~p3Ly@ek+8www&e-`1=GTK-U9*|0%(l+6?Zd zw^!tC7AYE8-k!d1rRO3cq=nKA>0FeHA#q_puaHFOL^7VD$_wvrmI#`Zy`enBh*O_S z@=iPJ)0ZeP;Y^L{Z#cAiH!YykP9d*hQ#R?>-ef#z*0n}Ib%0LbFZV^ZpzzlM(c8=` zWW2gNhcHeTTkKn&^*MF?LXz174a~U#Iw2T=`c@#jn3u7B0CAQ!!9StA>0`0J#8>&Y89AbF9x+Nz zgpylC#+D<%PykY>fcbR-C-w-#0OALE%NeY8@p5PDT48eV=MH5cLiOyp@7ISd^GOfE)4U^&vfki=&8u1*M+>c{;h%+x*OTncOz zt>(>*#ekv7F>F5f9}T3cN`>v=V<|YT==Oxa;U0w5jl5mWaB$uZtZb)*G?7qJlT}nw zv$Z@

n}GM~~wjFkcDQQl~}a^Kv3sInFI?RDQM>nXa{lW&uxwRA=2K3CN@D8C!DA z)RG5|Fwb#f2m1jcfg+vNvyk-M5F$VM(M3W{okIz4V=;N;?kDfWZq9s5TW1k;m+6nI zp0A@QRPu{6v8xsJErde*L|z*q=${yvr^wXF??1bkHDHiTTbv1olrBGt)J1}^y8N@aY^1S z*3KFgpLl;7Nx+7ZM9#tEU|z%I+>?o$hC)V34EJZKw-SSmrboS|(ZwOCHG@;B0jBHD zx&5g*J$Xqz#E!?7EGJy!a+P0Z#61|=@@hKg%zk)O(?4{y&&y5Hz~O1|!%M%gt;&6f zy|jGzFgra0{xHoWMY*RZrkkv?2L)}Lyj#ddgI==xEsfy1yO{bwGH97c0^XFSVMP(K z7BDuDVQ#nrg>-m^9wUR6N1|XFC7mB#?R-O3w>59I#sfEX%S%nA4ZW>1#M~yGsrRaA zE^$mQbdy4Nc#%py?RxmgK-^4BhLdZ5q!*K)WU;j>J6wCJC4tDH!itJ6D1N5iq4-J9 zvQ{|b9q1!cWMlqa`t!8v+TrRfmE)#4 z+{2ZC0c6@jhQxIkx|FnVrUwAF*I|ax6?@}BY?kmH)=O1>tfjR$DW!hJ3209i9-Lco zOC5cxn|Lh9J z5#G)reBIbYnNVdU2`d|Oq{~Y*tRmTByE&Cio$9Tun#FjVbMl+S3!a*0bE_Is7m1@z zrj9vpJuIw*58(#Hh&gy#{KPD*)QPQ9`T<}wI_Uv|CF+WPJ*^vNtc z0G^Vt=mj&UAIScK5(&1)j-N9bE#fpObG8zwRr59|p(=}s$JK(YKedfPN@^a|Z|%jKnmq)C!bG8(Kp_;ZK+@7J*n;st$T=7O>+M5(hP z(N3m}*V05Kb@1AEeBrN<2&_aJIa(<=B_1gb(N1s8Yx8T$Sj2O!T0Z1jJ>9w>SoZ_m z6`zxW+e`VZub`=Yd#cE9{Qv~Z%oaXIlQe19zGU67MT%9owOSnKfUZ#O2$j;Mz1n`N z{p9`+Wy$jHRr&WPC588Og&)g-TbB0|EVq&RaoE*+P5G8a4vK~a!$*J^oUMrWxiK~eQQwX+1nLbsbrIdFdSU^R-=g6 z#XTW)dz=#>K^A@{&*?LU(WWBJjC%te{jVRYIs{S(DHWaw$8g}*P1}hy>slLoLg!;$ zRt(Fp_)I7I{4F#^$amCQJ2rZcu9AJx6`qx`&-qWC=_@L6J&HB#OOw9s-Ph{KlnaGl zku>G^k(H>Op*tSvh^xfrK`3Cobw(lwrcps+t$b$^8c}|0dZ$A-cxA=3iA*-7g{cAI zn0-eZzm*`;(Czx=_Po?=y_1@wY42-B4Dr zAloF)#Xt{V8w}z?OPrXYegK56JyAt=7;S8F+8e#aJ|~yl0U`CLB-$2OZ_Pd&WY6n? zoY&wbG|iLx^7QeZC)75UxU0v0mj@8D>E->q<$R21MZ!^w$b9`nL=C+fxh(7D)4@p zCViED$KCx}oJCcO75gjNsU-*2_c$NU!o0d4micRu+P;>Y%yl@o(U!35 zDCO>r`6a1yt~K{}8h4M9ArDz5MTi%O`l^W_AjNkiT)P=x44WM6r(_sS*6Bhjs&+3M zTRTlQlJ4croB3@fCA6_8I>{|Un{ z$d2N%E?B~!JV$nLRbDNskg1`nHMCHcW-=F!V?PgGywv&?_H+b^cFMUrm2CE5Y1@kmrmb)@-FvU7@eJO7fe)3_j2eN-ovgEW!Tht0Cw!6uk zw;G@?pQY1N!t;YFM%yoKi5^%k$omnkN?#+XTjCX`^`k39=^RCii6%LPJg~Q7 z5ZHAVAqdhqyM-}PijMHfNcn+!ptWNmE^S8` zak**;2BV608h49gQd2_>$fHK@`8-UQP{yhFmO}c>=hhh|x)=MR{U)2(({5Z*YAxzT z6yWbio44SuYmtc_XtXU+v*e_5k2V@b=TlPO)0OX1PO4@n+Q)|I7R|+<+fS%g;L6MP z;R{9ihQZVyZR3Z$X)V|Goa@*&SE94H@{C^z0)(6s06YkO3Cb9H*WA*o=ibxa$UQzB z%L~cMmZ|_VHLxVW{A&%!N&*Q??sN-|`I!PEy%JrOa!TIPOZY&F|KDGB)u&)_8hJg6 z9*OUM7&3*~&r?b|BrQ+Lr=xv4cYQ}y;63j2Zcb@n za7ctHJ-KpK^``%bKWUAL(v19g;hMBra2)bF+Z@jNd({5Sl*JwMG=YYU^V43xEV`qs z<9k)sU)|P7_i`6$RPReVhKW`k(hP0t)jHq^(4=-j7>52Pqo2L9yin74y|eog3K*)w z07HsUm5mPok;Rq*(vef2d`BLfU)!!B-*BuWzg^u8T)m*#XIFrJ0>TeLu$i+WmAQ8> z2^Mzk&y`JuPSc%W)b}iGo+gH>T+2@N=W|d5N>XSej9PmtM>W(iU*rob^H!OCM95OM zCs|ewS4e5_PC`FTxEMT_KVrPHhH2fpDHWEL-h7qW&%Vqa$eVA;1}TF+^wC)j@&ELm zy={+J=6}}UpA5lDjlL}wj@sMMlb$47E@m`yQzTpm5txUBgtk|`^$hB;V9CqK-@rMS zZ7*7@K~kY1k|uGCr#qF2DvxO|f0$6QRttdMGhR|W)^}F)AVjjPw|c$ML(_-5;%p<-}XKs*(E|z z^`>7k+~6$lmJinH2K>mBaBH)Q#h<*rs5>0Jy?;^3JeNaKVw;qFw5zev{{a3ZW4tPU z()lPJl9!G-Ykz)GV9qn`hRzp1Mczm(P<_`=t?t+HNxw|siiLZRKEKok$bPU za?H4p&L5PYPBWXQn0N60MNs_3RM^t~2297>P+R2ckNFKTXhXxCpT=iZA$Q##{ zF_<8rU;eJf<%wVx5E9N(8-d=LQNq|v4EQrUE;&C<9W_XVVT9!BbEL-;G1aRWK#ZX+ zBOmER_baiBVZxfuNqg3@Uy@<;y~{OU9sf_Gb)+%MS?ICRNmGcnMR=Pq$nQWU~y*WxF5u4B6pA4k%x`;UzJ9e0L( zP!StEZtF}bvpUiNs1RLJ;oJ)#wdo71W*F1|NX6b8A%&{c?8d&4p>z?!Gn?QVqwOxs z&qwtrws*g-+@Ss8%Z%6-wkV7`Jp{z#oQLN7(XTiJj%A}4V| zPRb(rwv{uT{FtnHodY^|TEBgmIEqBZ*RRXe zW{c`LGZjYOF-ue*zaNiM^%Y;$exy$s>vd~IN0)GGeSfLydP^&|&X35fBtOWsHgwVx z*zAdd^azeh9inL%Q6fc1WJq%zLX=k2lOw9fwa7IcPQ4?|-CF^wbG3<9)M1F@eo5p| zajSLx;WV$b-=Ax%oU4T|!sT*ZtS?WK$mCt$;b4No((3g6_4rw&G*+#?AEso9%+e?}P6+i@pd=_fn#h!NDgMo%ed z?9s)wwFQ9X^WFBFP-z%ZRCpWp_gcqE0?_G$i=#5TIi5#s*v+`x1!|yP!RmvrQxo=# zcXd|L~Cn%qjA@ji0$_N$_maA&|~Y z?i{n40HaT;_xQCXD*X81+j|m^WR-ysUW2I;?zSMr7Zv0ZR(YcOI%9K|RLvuUXHtB? zsI~?F`6Il;rbKTti%u&QQ3bC7hTZ--yE8wf%O2gP>5WV`wK_@OoUHXn?G--J-pXh0 z6$z?txkrB%CBxloC9@VSybxPXc~iAV&)ic&6{grq_%Tw zXvX28z=YSDU#)U5hYsZo_+^@d0|l`u*7V$+wW!{>%oGtuy| zguvzG&YXQciqNQ(7SLSgklw*G{?{x3O?9fbM%A$E1W^JW9CT)U=W??ip>Nt;@y!W5 zFdWbEaOx(=GBWyAN_ejmmYXE{cE1uJoo?>hg*25{9J*uRPTL#f$!6M&FUTP@xT?&V z@Ep;2q8AmO^uDrr`X*%k=+PHaml%!LMMrvO<++p^h@{o1b~?3}fV$_5yti5MXdr0R z%w1^1Ef23bM+tK=c0(U8RNXY%-r(O~Xe0Z-6z%UnFi<{KlN?^ewWy+|$mM?|uW`FO z?Ncv4pJ(`q_}BxgH#)OkdvcU}!>Bm_u)Kppb7HyNc`Y`lnK-N-jlO&pDtxse=RudSbi_@EmT_E}Xe5>-_bbZsSi@O*r^ao=%)XgwK}o>D)uLmgNTPCuL&6A7p-8J2fFc!VVrH{6y|!=#Pt zC2i2l@dH#>Fh5V)AH|?rZeBu>A*Pt#@tM{4hGd!Blo0vgZM36>6(RIKy_Put)I`e7 zJ&gMwqS|lLry*#wjUHpC!M^aqGbxd`^V3mEt<9vu-|*6w_U$CSxYrZFw2X*1O*>F+ zoo>>w)ZDFGf9Xn6TJ!W(S@X24`Fg&8WidwiaoYWYbJI(^J15fyKWH0IYNvV7yBl*n z@zbfS6<;u;!-Q_E8Yq;~50IK9GTBKc`CUaNlI2IrBXgj~vpb>N$ZAV)KI)I~_d$_& zGuWxO-(#EkDA(#xLyfJwi^&z!K8|yNdiBCn9RCk{Zy8kAmPL&Q0tpZZ?v~*0?!n#N zCCEX8yC=8=cXxMpcXyYAySu!T+qZAueqXx3zpv_b6-Dhj1^ev1)?9OrIp$b%abT37 zn2_3<+ZPO;RJt9oV)S z*mpd#kmoPPlj_7W;$%+S&^tKr{2i2iP+3b~sv*nF#8rAalSmd|Co>tf8~^oejYWPeNj7E15!Yrmj{$ zI4>wdGfV{nCG;yQ)J_6wYl9bG)x(Gun5PUbMWiZ(xbL<5hAwbowCkL)W9==FJ0pegizX)cc z5F^zn;!uDNoq}3O+M`)W>PKKF-rsADv$GQ_k-ceMS@~4UC*^M3w@litrcmhoh(~xE zCCQ&6@?GGSr&zsYzP^&6d||%QE;~C}@s&Lx+$S#d8wQ2^$seV+yQabK{JbCh<3a^5 zKXIe6d3EB;n2;(p_2Ek?a$pb2V++0>&}GXO4YFUr8IckspMfDvLk_IBib>usoM z3=69*3MrWrCc?ST-?AVth>KXS_jzHW6dm*Wk%PMgn?rqu&Udz?Q(sraJ;Wxy(*_;{ z(duvvxnp4UW6c+2;YA~4Y$+}ZDHbAG)u+&+0|0C}&jhg^h-6BdJA3dsLt18)V>gb; zoX#J2nUa{>qR9iD0)>Za1b|UHk3!73s;TFJUwKfP2{NKz?Lo%X1`mrSsBw1Su7rel zd%R?S)5-p14@73On=J>&!Dtg75O-?n2+l`2)Y40CzLjeBID2k)XH7uny*)JZgMqDaAE&OUH>cKJOio40Ns90_l#=+ewfnf-NQU7 z?5p`#3}rXA7O>mZ$))z?wYZl87z#h^*#ZH&KK%uJH9!vL+ zAE9O1$L~ouO7b)yQkmD8c-+pdFz#xRYIA3lrWh8Xv=I%^!LJ53w~@s)^49D!0v?#a*iVn8%~ zNq_Yhi3)do634>;Op;~2v<6+X)QErbmVXhLKlx&zWzPl~D%}8l9h5rd{&-A@J}MIg zak?*3v{{y#Qts_4@Kn>cqA=VXbm_fadurD_ErWS|v%m%|g@_whmwJ-0Mv)Fm7ax`I zLfniTGAgBNh@HU~4Mt+>jFi2YH~{R7Km8m8Z4%_dPeiD~FD$_;em&nxEJ-?pR<%GO z(^?=o&oas1FNH+?bafgr3J>62iL~8d)4@dbl2+MYGRGo}tp@3x46P0I`MaXY96U0< zr8fn^C)~w4uwdhm9{=1P%U~}PAm!=zL%qFvf4QvU_nfn>!2_`C!oDPV(U!hdEXB-wtfDbQHk`ok+W5l)Udn7iYa-#;P zeXUUTgelf8qEKwC`*`()sb2ftm%VR8*8x%alLk$!x)0@gce030)#?Jo?f_bD_ktE~ ztiIM`tSq0W9_@pO6jcs00uowAj|EeB?ErX}caZre58WrOnG2!n7WCCcXFRQ_jJY(> z6@9Ap#?q%hc>l%4{6)%q(trR7Ek0LS~{TuFAmZ4$H*S>ij%8oodS7d;i9g5 zNf1XpbQJ*g#$e^aE>{kp9$DN8A6XK>Ykg8Ax+A!n+^g=6l%2jM3LZ1VIUskaIk@LK zQjcvtyxs^v8Yb^mR6nSKy6_bU8d)T+(IX@nt-PhQn_9;WWP5VJGJT(kUc)JOB?dLY z$>vdCbvCA1AKNVQx3bqig>xHFvS%q_6;GBl7ZI?TFoKgbn}%4wY)RNMp1 zIcm6%LSAizKEM_w6pI|=aC#!$O2BmMjJ=}OhDKSUIdl~DJfyN6utylgEkJ9l=Oykj zyT5YQlgVhRAoe_HKlACnvK_Z7zxWCI0_;b1rjcHER^|SHv*%X6m?hRnH+utIDH)!2 zO^_Y};*3dTbitoc#Bg7|5 zX&W~dA7ClKhj?AHPK*0fR~E$#&S&#$!7@n4rASC;cbmwCxl#h;Xj`#c4f~B5f~bhK zHbSy%ci}3lW3PIRkD#V`RKN%K`#Vs*uq}fSjq+B+ ztsHE{lp=U2Vp?ud#Ky&Z$tJ{IT0ZTEDq9)#K7{|xsn1fOx_~9u`?xI2L%l&dhY9?b zC8QVvz>yy%;7C~1rN+|5h1oM@J&)Aztx-rq0Hzr~1_}A7@YQK(_Rfs&glfl*ALH1$ zR{Z58lmzJfMB;t3NrmU}7c^{Z@(;tTZL2ajmkc?#O@R2jdWm*nnL*`YPXm_T7K=i3 zIufwssP`GCT^4d#n(1^ONY$D2G!~3$Fj~;(>jrYs$5M&&T~GVhajz3#qf>iTtYo^_ z5M3M~2AfGyiQsn1Kl_fiuF}I6mw51Ya(`H@HPbnwlJgUOEp+Wp@T=6sdof}c7F%qs z$*p#O393oD?e+4s>qud4aUH!MA_Qen&B|`PaHwC$Dy?NgJ2thx^uF*!y_4x0PnGHl zFq`OpvrCK+;0q?;z=C8;@UXW;v*|aCX^X;79+A9wVXL8Qs|R%_70n zMUKeT<9>-_y9$&72-Jr61!d_7j%dLv?R$M;#`7jA_2aDWFIB7bUj~)x`m4A%)K!{s zfrqfr#%QMrP)^c#eHBx9P9C>!)Qtu+D?oB=L~Flk!(3tKZ~J1rq0>68Q$8-|xNbdCjA^p8J|rv*J^o9m{7>IYgb?-yvh153%qTaEHe?@qBk=@Ub197y z<|TG3(W6t$#N(QiDfWZME%>N5n{oWT5^-5~yBXdu72@7@L0jGz3GaH&W3P>Uv-0ek(!q*mX`9`Sz-OZs{Yv1YO4Zz5zV@vqS+&G|JN(bzt5EJD3Xvu?9TNBUF*kH6FQPI|7|DGY|s#{FR;Mdb4n>1DSZfT)e3lb-&~fW zk0A4JwXA>QrwcF8cyW2*{P;}fBm}0<)YMPiAyGvSl0hkkNb$vWvs9|gm5_muaS$4L z&|71L^c+^rZ%&vVH#kG+-UNxTUDrY#AL3oVL6aQ26XN9JtQ=B}#i6M&pN_x3zoCLF zK8LDi3g-Id51Pgd<)bINaXh7MM;GdUivtb$>LY^N!)|E^2;D^jtAyn&=OW()aM zRaLNr(cURQiy*@}9-qWOi}m$E|7mkVrtUejG()5F54qvq0lSUOZ>{kkA36(o`KYXZ z8{H>EX|@rk8$7<0KMShd58Dg#iJOxO_vC$N7Ul_rN;914-+`_JEe1d18qCArKH8tp z==IB%ie4wv z)EdZD2jTn+`Z*tdDlz@|P3-+MO=`CQrAF*sSt!jg@o8A@v(URVPB8IGCv>58g=nRY5?z2(qGZ}MI?n?gzjYY@xc9P5dV1MmZ)6WpTbv9aVoD1(I$e+QCkgT214N^Yrec-o?W1%E%!O)rKMh=!&7F@$*-}|M> z^*1Qas|aC25C3?++@xm&l7iK$}o=%Y2vX_4G=hb%|E9DmGg$H=oxYQ ztKhO8$J$TkBgt@cFEGc{G?$9uQ=RExIz{8Hohaj=lb3xa-WuyQaY#xM4kygK&6%z= zPtLPS4A!1vIiOy;#`d>_16EE*C?_WOWWM;86G<&bN3ctd+~ZU-xYQoxT6C2d{rM2ham`Pb<70zt3RM_*tCEDR=g{AHuZeL5f6M<`6FuT73#S5hds$|9xE6`3T zJD^Mz7jCdmRGcFT==P3+sTF=jU8$~NfItMHz8;?TI?;=|u|nx=I$kf|Q|O9HC5;$H z`*S_W#KHV}ej6N|h1%f`8=ZS7c>6&5&9p0ahYZ5?Ty%rM2A%SYJ(pt63i>`d9d#y^ zs*BL^LVaX0EhlGhPZ|cnvx8ER?e6roJMudPes9)Jv6dX}us3tzfEGGl=Z-AV`Ybwde{D?n(u%VJc%^;B zZHwSP_TrPZ>nUU^j0M^ta2OWelFhcMKbWq6J|x?)wVf~5NMRaR;{7g8JG`=|^Z2{c zS!z;ri~^lg2fV(jGZ*7E09duO26rftR`C`b*duGlc z|GK%a+7DyAx=XJ&z-i~T_&DXxn$QD6o>|2Sw|#bR?lp_9jjp!O0#!5dkYb6Vnr^zKAU*7%#ifQOZ0~HWwscXyd`*d+{lASfcy1bZ5#L7EEPaz z)^$L^B9wvuEnnuBt2dRMO529GU&QsX^hS&EI)PMZ#PFR+Qs&Un2^!x^qZB&K9y7@? zo~@xiq8#s_bo`-8^b;+}o~eUy}hs4q1Gm*=A;cS4oz z8>ezaS`VJU^;o&=t#Yzlfldzzz*BHte;TEpKYQlUVj3r?Kq}JoH?t}xi2O1euAnpA zV_@678PjGTVi&y;tbHCE)9rPRJTV_9PLDJ}WR5!qAHVDg!-CO!7GCNmJ~)s-YcJ3& zZ>Kwef{1+tiJp@fKuR0nlqr}u5Kqg^(No77t3KtVohOCQHC0fW@huoCjCS8AtBvg) zJw2wdF#R>Xp)v!-L*dt$qQN2Fpp7a?IqW;|+T> zN)h;;<(33?sZ0b?F%*5iD|vJJB!KP8@c_ArW6J}P2%F_hY&_!u4{|Wnl4(R#;fNjt zE$|ZE*HR`7*|jCYLhZU?vlYb>=ow$Rv4l2IFTx z9-PjvISuq*Ng{909ihD__9c??xZuGMhmBp<3GGt8c|bGOv9nVEnA66mOkJ#Itrp-s z+uEQvh3+I4nz3wkG=h#j;%4jNKcoWo4}~gvR5FE5uMZc&1Gl@aT;~i9BrYq6$7_A7 zEpW@**-FuV%wgX$7X{1H8(ZtHG&xynpQ!mxVvp1ZNuQC(@JSy-ZHM3Bo${q<$xV1aU;lX%hj(-b-7d$Ybjt< zHjikWNhCHnBGlfPUQ@gr(><37QvMWS!$>SoqumC;sf)U+%G#0H7F}b~nKXMyixG|| zNBrqvQT4ToA+DQbAbz}wM?Q9d$K~}#1PzO@$FJD1Q+Rp>O8wQ_gXI zB^7BVITBE3J$vs&@H&=X#td?3VI+_P8?W!*FN#G+IN+=_e6%H{g%$1>b*bIeaql45 zn~!4iQ5zUD2&MVjo#B7vK64;U443XsC`gd_y)a97<*ouKYQzfEChyWS5yf(D9>DQFO zUK5NkOLc#0kx}u+KJogT7(tkWTU1wplP{#MVQ|sz1Jb6T-v`mw21JV%tH>l36Ravcj@XwJbRAY#u!Jg9@Q)R$-@ihj%yxz?Drs% zkF?!h%a{x5S6)P_zK`_FfZHR`g#z}sk@W?KT}+=oi?8MVax`D4gU0& zGR+0t*1n=$gFaUim8tEVgQ>oNr?jQCkv257B1%?2R-IBY<+u`m)@02?_4q+bu+FbH z0JjFdt05Z=hpGVbnk@Y%e|Uz_IzKKlGggL3ibUw6mDmWzlixl5<0P;kGt9fKZm7F( zF2g#7!(_airf}wGb%1xo6X^o5uui!f&Gm~c;!l1XC@s=xou}9(_d+?v8W%<{q&0a!xV}BZi+}E+R@urA+X)ui`3Pw zyZJ$H&m;XTaoX}g0XtjlW*U~&4hcuyq_Jrxnah;T|Q8t>tQ&hG# z^iHKkEB7))kPqh9Q21O{q+%{!VUm(F`i}Y3)HV=RQ=5*Yhof{z$ZOnOKfZ8|7z%2>|ygBV1saoSTDqLe( z+-Kn{zD;6rxG`vRotlr7tmf^0zUoAg_vOK@Ls%*;B8I6qo!Ug;@_QT^Px`5$E+w^r zTrT5l0wx+y%++J*;rvkA^u%(zFcKzNi;$zq?_Qq5J1R*vLp~oH#abx4>0v8@M|#dUE~M-> z7?aK33E#o91#60D6|KKAPVK9fk>>R(Y6Y=E2W56SrutWT_q>Omu%tFgDR_*vd~Bl| zz-Ak2hp~yqbZ@QC97*6=Vy$*r?EDe5q|+8hjoADp&O-6s8^Z&G&iRjb7Y)1at|0^E z@+^j?3>1Df?XHXjX(HqBMgMPV><_vIg$7)6&fc$${BIW1O33IC^zEurg^hpaqbl7X zPv!nJI^)@Vvc<#q;~$X-R9u*ix&~=9=LjUE;XUuPt|-DmJ{yeLAoP)N2Ed3hW*6BK zBPk-={zg%c*gflLh^ecNR-tEB^-TV~g2eF!gtBx(__4mmem9Azhb*$HJbl}%EY;<) z!t8MZxruRAocu%XE&Vl6fQ-gQ=Tf^x?llM_`9VLnd?7O&Sw{yViIg9T1z;ROQ==~) zBN&3T67$)?q{*$Pf0??k19gu_zd+@xa30oH``{?F_}7ZF5;Cv21*%bDvGtR-D)!B@ z`^rgG%q+i0$&hb~!|nJ$DkO=!x5Nvl^mSNomOkh7IM`?->#eaIS1l69_32UY&!N`! zA5n+r^X>Pm0Ro5#MnU~wDQ#T3tr~;H-`yq;1&$QLCtf3Fl+b+gIW$=iux$FMn(B9W z+SN>|@lfi9zi{19f&L#{(;EdWjx9_OYD5s3kaR=2YU}J0b(D4mI@x03c3_l38FAW{p==w{hU3`Tjd1JWt#<4m% ze2D!w)OX=d_~fnNN@txt_;ihP(O%!i?_N4E#z)EGbBdFFIzD)u)zZu%U6QQii4^5f zVFh;(814JBoG4cGec~s-5zmoE8kvy`-jSc;Hr+C`~j$TH;&AF4e@qIuSTf3&#neu?kOLUj%Ve%o6K!v!1x2G!GK#^`A|MGcaUEi zW_KpUGQ*zx? z(uq!q$Z78jE<%2vZrc5FKE1xbn0}AU7@E)R=vfjt&gTd_JoIRk&zlPU08j({cxw}? zPKE3~Zy{tsH<%#qo8!Xugn};FJjt_C=M7$`J{KdjWFFda6tAYS_k3vmdDveDowj2?`gLdFKdTv||Ez6$2J3-%&2L+-(cW#0R@Xi{mx z#IEjuM{ri6#R56Ain_N*sMgXAp~n(NdW)0PXdP4_d}-HO#<;j*DOt8>k@Wd^NB^Wx znyn`oaIGvH_Qx*Nge3{4Z zp0XH6n{Q=?7s)dv*%wsIBdv7k_eEBaX`*$G{4V2iPBniNF!DtA^D+iRn#}BcwmVVe zMNrzc$jT;W3iNs5sTt$8;tk$a21K|keJ!ikd(gH%cveV?)n7EDEMMU=m&8%8iH53^ zmXZX%(=cAxk#zqC9-QieCgE0qCMR$+>)P8?r(NrY#}s4yTOyJu9eKU6lo1aCR6#vM zF0q3KYsJvo6t%|!oin>|`!-ChT4i5i(t1Z9r?h#8Ly>A%iX~rp59#qIRn6^yCiY6{C%bX+J)zHLSZT>wov;-+6j=*v=Vg*tdOk zyA%6e{=BaJ2Uf89J5~rJWQY4J;rl->y!i!w0n%^GpnA|U*f8e~CzvH~_h~#&_W3m5HqvFf>{40&B zPxOgnd=jM8;s=q0acq%O|7v#ge`K^|Odx9P5LN}megBosB8k{w{oXkKc&Q(Lzp{=E z(|@-RY2V-b!e8%_K92Cqs5GVi%^?l|ehc0H@gWMIfwm=3A=+VySCqH`YQb2;^iCY`Xs)n*4;jOv#)Y{;$#e4?Wpm zkO2gi|L?X9f+T;7|2}aNIRCde`wL(EE13W9MgA+k`rnNF6I%ZN+O$UPx4P%nIyb|4 zdumsBH{FDVQ>C+=Bw-J>^MeQ`Mx4J8JILK;v7Fp3KBtAg0l2hOz`&O46#w6W49Z*cc*eFr6N=xgS7q zk%fJ~t90gd0>g{YgekW*!GiwD_kvs>?;R3?(cQh%Mqd5zu~$>5-4-u^`lOH?5S0N+ z{o>^RnP-9KJC-nb4sgV*I+gyU9_O$bek7k~K=$}{PE!f>IEl5VP{RsQX zq4{!~GC$3@aSG6UJMHdt14Ldu$A;};wQ}H3g)0j^SO_-*kDvW#FMx>w8Bl9YOEy=$ zk%zFpwIt#Pr>!{=u=PkCh2474EWZQC^ylenzSppRM{hCt-x zeag45Pk({_**gEHDs)aKlp5wEPe*$N{s!ykZ`&jAy31=;LyTyBt(qauX}E}-ppMjD z@4w}|TIt$!_*^0TpL_C}y@pZ+V(1|&WN?*I;m3U-tdRJ(=^lql?pq}1%^0yf0Y@Tw2jHAeGDgJ)OL!(VgJ?A4{JuP(I47F858}K@h{-j7z$JvmJ z^_W~UD(V$aO&CfG0jG(`Puc^sX4@V87+7cV+S9f}-7y>Gz#S)8)uax58Fk!{$F86@ zm_y>?IJX6;!rsDT{b2EPJV2!S>3CAe^oj6BJtXPQgyhtu>0C|tUx4XH3?GNGq-aNf zS39c1>rJ$d8D$U3ml|>U`YHgIWr1zMC11|7PngWJA1cofe#0fhu%!5S*{B7#mko%A zln6^02z1KU20Sl|@&2m%Y5W2ye-ws67u)@O-{X4BrM_SI8v^n?KW$Xn#J?;@?K$0Q zA-zFEsb`XtXqjzfhy%>&SgrV_%J7Ec-@a~zeww0q_i@Up_2Mb~jDyx^uH;?E;b~b( z7joS~pX#w25b?==(vj!z)h8w?O<+4Yo9`-MewvJo1#FxCwo1lY!tIUbNm>A0co1CL zy>KDu2AucQPN)zx3nZT4AKry@_f^Ih;>NAsuu2}Qjk)SwUgmkWeK+QfSVOBO%>CFa zW&5yc_O8|40_mlN6_q}2&3pIV3ZXSS zTaR(MIJPNUOp|zMGtJ({L1~M$cGerOMeHpOhc8OK`em$yo<_Vr+9CPTTtFMrvjS%@ z;jT9dj*e68q`EN6`azCT%k&z!~}>~UBGDU*1mqXVf@Mq%TDHW|K^-f<<~|u)Z(LP zh$GOB-V2+AnoF%`qWcG$n{*h_0=$i!KotT}%t?#~!e+PIgV|#5*Wu*vMWxOB=v7ZU zk3VRn#u@Mzxow>f5*YV5&H!!?KWf>?fupyv3&SC&7sP{n7SrORniKTYWywm4e zdYfheNIimfSRB#IwO15b4`Z@9M%fD0k`rjYVJTyS5^@t*c}ZB{Dnw#F*x3Z28{m@ZD5#A5X;C?>g-yInfY) z2>lrx|DIHL+P%V%S*g|mRqiY@!tGRZPq3-zs z%pG49kkXS0wvl={tLdV`q4R5V7PN;wazrcLy&v~?l?j*QA%PPV=YDs2>1ZN;mm0&4 zTk%inPUkHy4!9|4i(wU0Yn7+&Bl1J_ASFeD*Fz~KTeIVP7K_ozGL@pZSOS+3hXC#S z{uh(?7h9YjS)51{3=h3xL)+v|$8OdPXvW(bU9tG!f>}E9_;%*Fp`BI!<+hIwZr{d( z%%=*42#GC7HnHoIAX<&W49=r2%N1N%C1k{l0U{3HQS)*A*)eTw=yH%(T{z?ZUVYf(s&W9!7gBl82 zIVo)f55bW_qJ(WE1w zS8)#S7@AZFoa6Pt39sJA_gcJl9Fzu_Kua}J67VEMXz>OV`j4%u7l=3j@~Ae%uji|V zIcl|M9F!M&n%$OTxWHXM;xI@8T;jsXDlS9;s%I;o&7D0dd4@Owd={By-_9UTv4G*8 zHsfk7Iek!sB)#3NdS1iyP%iVA4y@}^;qUT!Yn3@AP$0~GpLDZ|E^2CozsR|UJMVqV z`-Gr_H!K9VkI$Sr(JJ6-_c*X(vNG&yQ3^!bb~xiCBx)6Ys|I`RP!}F%JA&N(nq+-; zSu%gKkC)s()H71I>_0J}^)+NL))6fYWN8{aW1T;^Fx$53P3M_Do~JaXKc-ZSHmX;p zLzeOHYaCgwc$Wzcj~~sNjN{-UH>UTkbPgLUCNGuA+E{Cpw7d5B(|JyVKgL<4yazSN zS(UQzzjPRE6dVe%54?i}+Hbm08AhOO6{@x9rP0dXc_ zk+sPNO%Z#Jfwc(NN9l<2}|uHwg}r&H<`|4UhM6;Ye9qkAjEcGW}c(P&D2(dyBOWh z%2tcVW9pAC^V!vyMS5pzAh}+)#euu}$=XqO>C{gGlf{cO#3xW4u3 zkzDyCPc{5;;2VW%hlD5mJ^!jeP_^rHeca+h%j&kso2f04H)~k#(RYMizDQ!6b3xfL zWMw}t@Jw5G<<1x|E@=OiOMXxFB5;~1@wUBG&&+L;4omB%-O=uTcj@-pbO4kY2z8&M z8SUDjwJpEhQo4uLfMnKEnqaD>+0=LEN?}j(ol*&^IFvD_(@irfg_-4DKnRp;H&hgg z!dN(8rrFUyZ4G84zM++PPOL;QFPe8TK{d#xxGMfw;Q7$(mKtjpD5by513~J9x_NbC zJ8%00Lo?KOmv{j=bE?|RLGub;x}o$EA~5k5vP@Ps8x9F7lFtyWd9rG){i4OQwuc5T zf#i$}mluT)=uvw5KJeU6X-Wa>;b24(aM$}`Oz-S1?~GYKqNXx((NU7s*m`?JR=rD& zDh~ov)N5iK+p3o9OX%y}y+OT=5>L4aqV`<1&MI47=aFnK6IJ((VOz&u+{?+4Aol@1 z*lF7tnSQkYkgGDuP@|^nh`M$E9n)J}cdG9;bOtz)^3+Q_v!{cc-&OLsym#vWhlzKq zjl4A&!NMyg0n$I5Y8+X)JQs`CzJl{CUF4J3J12LoV9Xo9&lcPOvtN>w-NoM4A2Sn$ zYXzLTInw4h61oFF?0EzeG(&!NLQP)w-tQ9D%i7M1dv2-_J-SgoR=4LL?P1V7*Gb7# zhBaztgS5CZlaV)l7MC|o5I95Ws@?=`Lh5`nT*4(EStbnY9S14Q4_V;B3ixJooflP; zdS{%8ltYl@`fM*XNb${5lS8B~ytxW(@wsH91DVyt7t14Oq2!NT8~`|Tk?$t~H*WzY zlGs2hOoOoSccr~4^sc>hObRDTMd}g7?$vf*-Z`XJiBGGbKkzW137mw^e7Jle?%X5@ zu2vsgb_^+kS}>c8XjFQg-BMbZ;de{iTp}$Sbn)&~%p62}+O?KUu9@g_VooCs8l&eK z!I>||v1K*d-wbyA;etUi@uj6X)A#!+sSRAol`UL@KAq)(pq=FgN~U<%?7oDq!+9XQ z!7^~Ag}~kpd)L9BXW7pDl0I#OgEvHsdsKhYP2kf-*sQh7#$mqXZDi%B!rBI*+-{eW zs_QU9$~2EAH4^<}-GIpsg4zt$>f_Rmba#-iUrG9;Ss}Un3{yr>chDM7JvH0H8dC^Pzt+4Li+tGl2B*?N&1t;Uw;%!G+F)0(* zPjipD4I-}CxA|Ib=2h7~D<*jF(4DV4iYpwV&DTI3kla}XqI1^`6}CVAZB@qekweh# z-`niyF*&u2Uu9r@qy>bM;H9P5wz36nP|^Uo@l#2d3d7(OG%^C8TVx;72+RTiv;_&phyQSj$d3OA9Ij7(Lkx6RzTDDu_BLNiWHSTRaxf?x|cK ziUOSQpLb$^<~i71A$Eg-wtjw6@}fX#rf4S`|1l_#&H$sWjMb$S*?T50m&(1MjwL{y zvOy zHjCpH`n1Kb@@5WA?Q!U?c}K5K0mh*O>&U${a2~V?uO}y9ommyV|6^&AU-p{C#mRUP z5f%XZHHb(enc~YC1zRI@N@T9H+BwZxd65zsI?XD#%2uR>ZQo#*MORn;_pemW9CBjs zNw#?=aq(1}-c+$9O+V_d)=68rGxxwr9f{RQNIDylB$jus$C0V)m7nbk* z>9&B)*|3rk(cYx)$qYeTU^l_GM`X2x*l4;By^*~b!I&RNGeaM?f#xLg@(ei7CJ}@} zNMAB7x$zSG5N}!R1>YC!9=reWoTZ$i<5fVs-_mOvQX;B7Uc7kX1-sP2BR4yNtRj~r z?i^7fLNowNw8x`E1mXqQPqUR9kr-KA(`I7F5R04C79;0r!mxwdD$d79XUNd8_xY$} z#3lNTD${9^FErwg^u4#u)2kuhtRLdOQNI#=2mzCxI84^8iV8KG1)VX`H3_4gQ6J~V zk%`7~5{PR1E*b$~%>0x2geYZ$v$0;$#x8D#;;Q}5O%KiFBRytnR58cG;Mv6I+w-DFyLc|@ZhJqpSeMbXQFZ@HRmoZ zPGIbi9GHSaO`dPk7cW|kq92hC;1QdPtNzZBwFmRb zUw2S_q58QV6XlE{Wnn#59{p*pqY_64+>pCUEpmZ$(J9R6+L2I)+-XI0(eM=1$9rL+ zf*3kFkMJv@lISH*^rtVq!T4&fM za@vG^T+TI%w?xsqlHw6kLp$0Be;@)F$(c7P z!|zWcTubI(etvu1PiFu}TT+rdE*aP&Te1P-8j%{q)b;F(+GfC&N{fMx3a+!mLY+faz?b1?6l;oe zG-YLZ(t~7Kh#k zqc_OqmhPSru@^Z#t(RYx$5aeS-IdpxvKWLoHMg`WC9**qt)ETmfdlxi86~PJd4A0cujT96c zdc&>^6QTgJ0mWMS%>gFt!+D-q>!Wo%zG8sp^NQ$;qGTWe-$_3@YRVy6Ob_;n@)wb~?@3 zg-m5Xh;69d%-FUiUmd{ANF=*RE+ukiE&IoiuBPpTc0X?TUgfA72(Jo?5v?OJB9&8_ zGmnj#tNDmosmocbUyVXtovwm`I80!~v6cQTHs^7}2^Ieq8zMpINJAK|pcgii+yBGf zTZP5BWDVPdK(GM8B|*}-LvV-S9^9MYjR%Lu0>K@EyF+ky2~Kc#Z`|GG>+G3%_n!IR znd`s4gYRh0o<8WVr@CsbTD9a}ufe;E=M~ROsvpwhbhG{HT=bL9E-|y*elhE!i)O9>&>5Ym( zK?7rt<{r|(q}K&FH#Gj(lzh5-Lqdp)5=xTgb+w1Ic(3rdhAEIV|4!Ff^KKy8r`$xB zEIW)=-V8l!Isz2v#=9Jvq#6=cb9GhAl2;v|=yWqGAB4^YVI&ydvh29!NwOi{Q)N!-PcIb&@-_g>GFAdow5 z#UoPKYHWxxpr2kzG%?4NHIjpQ`0)H!o8jssq`a4QmrVrlgLu*}_cL6XkBCjS^0TIz zMNs73B++{cg9#ywWXpxKa`!8}Cm-+1<+vK6l`Bk7o@s_YM|UTp53hRqca@NVw%;m_ zHiJMbqc(hkQnQP=3i%`(;QW^F>mpl_0}q#m9i3*+(lX0nTM++A z)E%B4@Jn;RD3mvhr6=T>Chj>-7b86{D+8OhQuJ53+ z%t^%a_od_o{lN*TBM_hs!oeHsU`|G!n)KvR*xjkY-^oz9%MNWH^Du)t$Ms}NHdd#+ z3(us!FCdwow1I&X6CqbcX*k5viN5Vl+>wMg0|_;P`7u$8E>1nh3_dgn${{co9}jei zW?T;B5N%o8h_@R9ul^o-kurMX8Ndwu~#Qoc^DUVs9Ef3M_GJXfDoo2 z(zXy7m3dtCmj#XN(gluAOeW}Sf&36lkP zI(@5Zie{?tD;mb&8BX;W7J>9s1a#4rP!0F!O}dU3i*Z-PCw~BfNTz zj-0~In$o8LVHBWwa9;*}73QDTu!<9GE?%?UhovY5h}$E~kC%jdZ>rP|B)aSBv=sYE5r3ArJ6Gh)}WEEE#JOyxo~>Lb{`xtWenH z4G7PkM)7xXdQQEp@YFE8Zx&oA46MBf+KT2GtJZW!JMT$rmtFEf*(Ns_tf);)(Q8b^Mcs0#*Oir2pPCJ!iCKbby&fR&69eQB8mE}I?7=u(}EMCo`nPKp?C$A zDPBow5vwX+N-I6l;$DQS&dL?Jsqkh%^YT(7DtcHd;8GNZRjk|`5c-%SlEP?yxY&Yl z3x5VT4Jy|BX%7+CKP&UU$8H{fzQ7B;KfXPSRK*$!MP~(T-(p3__ycCiou!;4nAu&W zr`rkFaNP!nuZm^5vm)#i%bXZT-I-02JLH(5!B^eYZ?p9lue6eBFi=FX_cq@6#py3` zDTAtC(ds|!dvc;`pIM8yH0vzhtzw5ZmNkg~Or&M`?5oZ;US}=r45`Yr(yWSlzl)n* zMOLzYvPVU*V_u{yKDAi3+{5a%NDFBrz#AoOOJ(oS7@+^UT4TETMRR2^SpIzC+1X3t zIt_BLQ<=lPA9+=!5krD!yrW-5r>KBsGLOOOzi=2y`_cFO#kp;e^GO5-RT__rSwIAMzUN;w5Ha8Veh7=|hmS29Hf z9JB&-&t(5{6+sh9R6#fVZ6&O=N<2-qIwZJdUcE&!I$+4rzsILsAe*>>x^y|C*|Xoo90>B z>(QOwC5)B%H>^unOXoil1WDZ_&y4{yhC&V4etPT=>M(VmA2YzN5dpG|K_$nQ!(^^h$8j0IwTBYV8V_Cn_EQ9ZPUHD>7 zA>Cn&d8>}uLGdMm$UtLAj~e+WO4v0UH+8>-(laH(4AkF1?>g>m9G*ROS23zIEu3k8s zp6d#S!(zHl9?#^s+-cxx4?}9w z*5K6#f4VVU3Jpfh6WXqx3e4{6AvW?ciKb*!iwH5i+UA9s6@$>#AB&nOy1Wn5Q?(^ybr9 zL_EFDLcP4}W0f^IM(1W+pY)OF5AuRZ_X0eYe~56oKVaJp>;BA5;oI2!W$^@Wbhx+; z0Y}~I&x;n3%{Ja;OI-eIsPDc5){ z72T(*8>IRgrT$<@duZ_z5GmOOY!==6L4*BL?BX3pTi*Q}!C2yE=c)a{8AnhZ;yUmQ zBj^XGdKN4{Jk>(;cDNl;$B2#_$EjQK?AgHz12H!|O&x84g3qr+uj*kR9Np5 zdh=d#4#y&+g*~~G^q0vAB?{o+Kri~ygF;oBoz?@gn6G;K(F}Qqd8SppVK5@_7ZeOt zO5Cv_|FUB2pgs6=C69x+@C#_oB;BnzG`&afeZ#^RXJ2h9O$5u>8nlE>`SWiVlJ-1mxn` z)59f=N1zJZ4XUt{U%9#B8tv#hEX=kb5zqhGmNx7W}YcXd^BnhFzsetJ22 zyzDfrEZz1ryK%S_N^{Pcv^$$JRMUHLiWG$Z;X%JTW+!Kr!%ytnk)(p#EZO`}W!B{P zm;zQ{N)Sh07La^DfB_ikO)t0jy4rPku6s#?g`a5Qq1s|P>*Xs{dGT#7c6P(vxx$W* zq}P-7(}WiLbHj7(Fl!C@I+I+)4R1M^(8NQh zA3I9k@G1(d3)W)|p51m`nZ^BUn2dZiSqN(O)iJ}*TDagyHg-TuKR&MTyT*Y_JvM8# z#XMscacu=!^HYcUe8$S~R{0eSiF=`2FQ`4lv>bHBopW=GHZ9oWhxGDowgqn;XnU;( zl{;tEQG{8~nF_knZ{MRv3Z1}psGi)N&%8ao_i7E6Q9nCq7npZ~p`2DjH3$^WdHl|$T#O6aahw&p1xHi`F z2Ja*<)t}s65qmKIDk|-%@!Te^*C9O>nqgZQ7iDGg@HIeX8o8|D$?Ln*_*LPrC;Y%b z(^0g#L+K!$WTGv-K)UbT-I$${vWq5mauvY zw7TP{gP=mJB|;@WMc)@6Y^Do67<&D*x$nt(sOJJ#qgQekf6Nngk^1uK)=CV3$tl-0 zDlD(hS&tTrF!%)EjX{_#<&r|@{N)VhdQp<&8q$7%m!FYK8A*NkDDd@KTcW~M7u{Q? zJo}<7NRmODO*kXsKAsUM^zhP{_>x1KU$St(IC@V> zoANZK*p_`s3nE=cJ>$&4R;c~=BPx{yLbBkrN}{BoFat`c zc)lVMKovPMHz%>+&DAeLMtQTcp8V>Sy;SQ1KeH16Q@ef@S3;*!d>XdwvWY%_lKp3M zH%d`lZ_$Sp&L`m6Nw;l*Xw0um63}9}Nh?|m3m7KqMF*gqhR#j&?W z|AS@+Ewiadicvd>zGICrtS7iN%BQ$^)9l}?a+x?jy2rV_&hF6C_TGO568@4->ZJen z=1tPmuUlP~OlzF#LW<|979`Y-`^P~@PgPkYBCxT_kL#h^`DO>gXBoM}qW5uOT#uvu zq>I`D#Q6{ly%S3S^U!EqVv*jyjS`we6pdcZ#ejB`rs93TRNeIQy1UbDCLjCg*L~#m zrYM+{t$2x)Z)#0ZrA#YLVSIptgrcT!xm`y_8ZTjRe!i8#g$5TIu*b?7EHTvKJ*8};% zBN(BU1t<-j3!Qa1pK5z}Fg8i9&_vcWA(s&vPZ<-H5H=1I!nP0C6EVN;x!246NrMi0 z?7y)eI+gj@lWfBw#*OZ&<{LNN5i(R73$oeioZtPbeQS#}YXvXxgYWYdjeOvKIGd$+ zmSBuyxypMn4;ar8P!8JR>QJYRiu9W&SM77ftXV~mD)U0an1cK%%u9zB?J+1ct3eQD zVcNP2VMXhBQhB~6A`I|yTgWX9Po9C2+H(%#9=S6pb%_6WmDvJ};HK_#@}>`kV{UoHcSmG7<>ReMdvJY1*8!l%H?>waapN_jxj7i0-O_3=in z${s^?V7c2O$R-e@b|}>c2O$gzv~?_?y6`K4aJrQ`D!`7RDXYnF=gs*%%4nOm%NdhP zNo>v7GIze?hF2=n6sqG&x`$)wot)NJVcXoB#hg-Os;F=p%w5n*oB4MC(=P@Bjr&+i zS#^;J32{LCr1eV_VDO8hzi#%JXdx&bXG!AIzVSY{sOo{%=quZKXEyd+{l}!fbbA?^ zM{1hf%o}boUk=uJ^Zpi2vfLGPijs_ZIqI1cr9-WI^)I)UjA*8M9sQ238b)3SMZd$Z z9oGOlwS_Y`D9_W=Qp5~eK|pObj*Ao{M5+VNWTl}c-I?pQEcAqf0gO4^uA|H>EkXk> zT;m1I=Bg49JtKq&qkuqLUu*1Hlj>Y=IhjL&txjJ#}$tDiX@ z2cLv8i+s=Tx>RwFS4-MK2VbzN;_rQx%~|V9+m#R>7lhB`czsJ&s&0MSHAmIr`#D&(eA* zGE$FVKEsMcFcF69R!HAOpW-RKnnFSl`UMkl4KpJAMC=F<|a)-@u$X_48;9I5fxr0Cd}pX`LIA9h@7Vj{VTr!P8%#>!2lc1a54%Y<08_4m7OFA;w6S~BrC{`rTTV)Q zDPJ+Z?Gc)Q=&vu|owuT`W(x}Up9#CEcK^5$z>50NCt-BaKwm7s5`I1vGFqtp;VyCP z>(fDzJz;Dl{k3M+e6HJbmJYhzMxibWGSd)`Ow1Ow;32yV@%|BNVk3-)_S>1J zJ7R(dc8F7i+n=_%5bP6T%*PX+X9lIb)Ly{E6=#(E-5APHd|4g)H32Dc`fA||InwkV$Su* zhCHIbTsHDK?}@DIwPh-}qBhkrh44kwKX$n-Nipvi5BH;B(eok4K z6oiq$Xc^659e!siJ`^c{R8FA_*3em3q>Yc-DMT!eKdA_}6o7Y@cYC$!-}0!HKAqVe*AuQSbo%vH(FeAI(xUC%g4vzNXhXq(krfAF`EI@efPLic4m=C zh}JH%e^=y1@>78rEV?k|$70xfqn*$q!cTwxWI7OiIxFKAd*yMu=QfgM!Frpa7#VVC zp~M3A&ef8t@n#~upGyNje)XA|!b(`)HPtZHI(j7@;)U=Z>`7>b0x zoqzJ<%}kyC|4BrMCJuW;w^A6R=;F;abnP}_K-!cu<4+}+b!%+Bdfgb?-Ozr zu$WWKsANa%sw}QBd|s279%H%;=7vz`qiIt8Iz)rtERBZ4we%b(#VfeT(}~}gkYpc$ zyJz+zTpB?dO*i9es@X*Om3DSW<{4>I{1vHoko>ozmr8oXVJ5^tqv3RVANAH4Frt4U z{tnrC?;{M!4xbBQ&m~8q))h|s01l_nxE$dz$+X!@8M1PTPZ5y`(>sft^lHLDy>lvh z&TZLL1$N9nzf9$Ag4e}P08`^uE>xM4n!y+gX^+|C1$D8LO0KxXlAM5K4g!n8hTvmv zOd2(~=*^-~7S*1F5|UoY{vI0fesP*~#!ZPqaLa}kvAOPmiRLsND~sU7=h81lv1;kwrDlCM5NB7gU#}TmlbkQZz`KxmME2L$an6h<&LiSx`i>n7XOj1(Wc^S zy=RN7hB8wx44}|h`z1b$a}jrn7Cwt38aTlxid|e)$a>D6-UOWmaPu&)UPi|9Vdoe3 z&Ww~U6z$MBkr2q@ahvumij4;Wm3F9yGhPFJ^=NU+t2-^ zv3W+>-ubfg(E>4v(BykNa^wxAq&Jh)?)58 z$`N_)!_0fBSYmfgb~XH{a}nj&jdlp6I&cWw_nJVyrW=qs7@CccPsdLT#0?K@-{Flh zb?yc*hv9fj)d5l6;;O~z?gP}6sJHRt_zlbI99w*5jDw6UK8sgUR2PLP^tyQ}tHLqe zo^GwJ^mar)y@ugDVuH$d?;RKnm2duw**G|9Dbhu#eAh#ZQ`V2BfKfo);4cb>pfC}) zlHV6?fEG8PS#*|=yFd`==@gur_NmAsm(<;U0^w$JgFq|lec&&d!@X$60(+{-f}@VB z=b?`0P2jkKMU?LRm7c=u&cTXo%ePvI#x~vasQ9dOa(ENPck`}8Ws#yTX0^?}1SOwp zDbGHiRNw3WP(&PzbF6)pv~LNI-7IhY@!@fz*d&2r-x*a!;Ik%`ouHQ#Tt3cp=HvV) zfp+(NMExElb_N3w9FrtlXXSDWQC9L;{MF}4w+FN}#>iEQ5<%_7w-F52kHB$W+iU6L}>%KG8F$jk}4WQSqMlkan#r_>OmE{*hvc?Q~XbI1~ zV`#$k_NAg+n&{qQ7kj*)VZdEtkiq3tjVb{ws#vttC_3T0{Oaqv{gX^#-q4*Df-v(s zA$Ka)vAlrM7wYuld}+S5aynekm)4f^_^Y2O#`G1W9TJ|3xOR0dBM72>Z`8)> z9q!)LmUz8PMl*+Yx9kos|1kDO?ZS}4D&rkz$bR^H{`t&dbc3-xNB#||>M;=AyyUR+ z5jV`YC)O92y0=^pa{(HSp7}%k0#aG}cW%flU@BrS?7XW)@D3*z5|Nh%dVKg%mt8kw z^<~o&^}chWBau%Scf79F0KdoSo2RQD@q=o7vrAQZ^U2th@g5$BpZp+--8UodEm#)0 zB||geaP+*|syd@OLFQuwhOe9@Jc5vUZbxK`m+B{4k5Xn*LF*GhMR842q+S0&PVjc^@>55I8}wzBA`IBh)kgx^sEHc~iuSK1T!&Qv7JN z;4XCw5;xn!skx49=f@y<8YrjjX;Eq(`B%xoQ1 zJM%IxM_W(jxSV$|pQ>5V&x?=w1hT5-Z3KMPjx~r>+xxv8G1S`#Jtr$L%+(mW(({RW zPR{dA8x8Ka9gPo53;Qq@>{n=hzADmlZ2`8UY`3+4HEXy^(tTW*>KgP^JR>Tml_zTc z8tc`%K^*e%fyb-xc1h!OP5e;lq){=atc(O1IMW+ZB9);62Y_Z$zdB%Q$#h~|oWM6X zEa%|wkcn8w)b9t!ey>}`{KlU=J(m@us)zL>SgebX?~1^3wo!LJLC|~5k3nQ{JeGDS zu2z?>Ct_hKIAW$5nhID8{N3{xTu+`gtE!`=O3xXC-aFk|jJdfgYur~) z!-Yj@wnrntD%6*}i(zV%R?vu>H z;K84!GQHS2tYSjKOgQk%?DH>`o1sK}PYnbG8C~=*xnFTL%X0C5gJ!K63pXY+cE1 zI|B<%7>3(6PJAcMGc{M_(iS~@7ofFp_g@Q5#M#^0iZp8c$1}nZipmWRQzNPAknb!j z!GV=Y^+qMt@c}`BkrC)7ay{moYi+9Cr-TkXA|3fdTM(yDq* z#50poH7LxwtuU{a4UFXTsO^=UY9%=6+b_cIVV(2EzPC^-y6!R2iAC!1y5=>+#T;)b z+6`?a=sECrqpUD2<{G!gi^@v7f*TkoxsS2xtmIB;E5enQ>^W_YO=J`G#yzZS%_F}~ zPDaT)y|{A@k$)mTU6txkybM{zQjjsdtpyFi}2_yUaj0tiCc9pq1*5 z(UOKUHo)M^qsFk-hbQ1U%Fo_UV1~O2XH0t6aUEb2$ zmr}Y>ICpN-qu2I0i;oxLXV+iRWM)pc_!bgH`$;s<8t}oG+Z+}}f-N4zgfY|Q#85#r zh%0%bL^d%gS(*!2M^zVX?%anaD>da{oVsj*P|{kwWA9HN1)aS8&=)HTM5t1K<@bxsBH!1C|m zR%ashao3@74*Eoc(tu$}a~}S%&mn1*PctxOmu)H2up~sYmC_MF9<#?)`EkoU`V_Me zI`Do~rNF2UCwXwMeJEp>b1Q-f0=f_@YnJF?$&Lo@bn_jh0GCM~P*tA8A~i5GfqE9>BrJ&bfBeuKcN-Q!HtLYFvGS_z|4@5DWKui|1Mrcu4mx+6N zMP+MjRR74B7_;L$RQGo}ivcxu>1`11HZ|3TqdQc|9@3!5v-y|{Zw9yi7QQas?XNlF zsk!;$VeO$%2W?m{AFq{e3r2 zD-4&}rOlXM*{CDXNzC+Q(eyNGbsn34&HWW-N#;heWI)Bgl+_E??r+qiOONs7nZ3ok zsLgNV3eVjq{~&kT`+;Y3m?z;RWGnFObZ)NDo(u?0IYjKASwVm=+c%X&A%DZ>Es5D& zEo_lBGti`mN0W!&R)1;gQrMYY0GzsClN$X>tI_>8pOEIq@3!P#MHwT|CSXlo8V8Ju za7=PAbd0V3>N8e9&&!(qzez+aTvn&{j0yfs*bq**m}RmBKlEzG4)2FV|Okx7b2$L$quWN7zqp= zPAqd2;+(np>}}bYF$t8%PUZC@3s8VW(=%!1Q+}aA z06AD#{@A*0)TLqk2poogY@8{5xO)#C89mAcG{jf$mw}f+qQA=AzpVs+*;*AyxfO@g z({#gLC&^06axK!Rz-q=ROX{{ON$q1<#w+U$kJrcjD<>-^@vFP(FPcd7t~>O+B0ymj zU+2j+(RdZst*0Zw)F z$`z}ga^>>ydOkbxPQ(UTL#GzqgLC zpc*MJ+Evt5<8SePsF;7cAH{I++`+)cHhb=q*^Yi1wWk|gUei>u4R2crKH~Q>lwf;Q zn|_q$n0YMZk{`!{HX;j_`B}HBYWtY`zX0o^WTEAME`yBlWhV(#V1oB>0L7Ew$RDTN zp?jy8YsN^r^F^({!YQqT3NAZ$UfsAaS%AiwweaeSC;j6<#$uM5Wm@(YE8t62wNDy> zZeF)=Pn_BVhxwOOOi6(zNY-GoK0;u+g&U6DxlWtKQhxN_C_xGE{bSzmh&7HMvd

ZQQ$QPVs~$+ZBpFMGDgZBfaZagd&!#%2;C5Qnp4 z|E3nF5_h^6GznKfda~_uIoJAP=2&U&OZsn9=c{{hDl@?`$ik_ChwO1v-*dVgK=;?1 z!{48&y`b;ZpKePHhJq51g9Xa5X8yqfs1}A5{*6x{FF;l*f#xduQ)wB+;u_~#-L*nn zKf+P-pCRvDL0ld-H{n6#7WTWUF){vN?`c_@b37y`&FOGL7DvkE`G?Fg*<gUE#)(F^r#1^ zh)JBS*E1V=r z^5;NjM9ut@@9MU<^8xd$iffKB-3I;+I5y7$pP)-5{_(e|*i~qkKR>F4JM@VvuvF-#4xDMKMT`rL5F)6~lfMvzHnh5Zp)4HIgi0LD9LRwCKH@76ssO!pQ#%o-w|4B`qjRYkiir_UPm3iMQEa|iSibya`hWfX%i z&7lJDme1YGCXc1K01xpI@Fzv!pEOUP#&11nXb%13d-=u&5Pvrh{YV+z`5$&i0o(8C z_NqW%FTyn|9s>%dQTLT45BN{8O*1HI06F4E-e1HBF;NojKOri|p^R!AVr6KLT)wR! zpg1CQ?Jzz>z57qJ&D`H_&zA?y82TR-4=@67zy7Bg|I^I8rGGakbdhEMp92fp?5j`I z4*#!FmVbGU_5ZDV5}<^xH5P@74nj{B>hGP$0wsrb7^dwQ`~IHV zd#qpj{~`x}{is6t$HEl;cVYf+N&I(V{$EH;+-gnf+A*1t=zkJ47$i{+Z|{{yEt1u6 zU-l^eQDzb%_%h~np+*wmZ{0BCKBoQeW$wjqLxy@^_ziVe6(R$#l$B3`hd()kNs z`25Aw{nr}XuW*2rKlI<{zZen7Z9&CJ5ANLd9KRIo+6j;zA3pE{I2?Uk=__v^^mNY-R*jN>B}i|;;6fla~{WRZT1@Zq}u+fJoKq$*)MoefiT69q)( z{LD;{<>kJAuis3!iu^LM4kxPAZhCf$$#Zy6U&0Jttnf7r*9j#Le~h_O3lTwjsc7%~ zCR_t-0s~JiQ3zghm?Z}5moaF@h-sFDg&?|L$cVv22fRkrwNL!^Y0yjNqti5MB-P>4 z%Y^$gwUmqM)nI55DmZE_m2CvodvgbGX>oE^9^a)gEQU>2>CXxd*HZ+1#?sPo8!5R z!QNZDFg~goNrBO7s&P|tKkq=Tt@h;`fB2>-p7%ue7L&*>L)$@-OIK-Qk_77#uNn96 z!*#alwitVB?T<2>2%UQl2s%e)TSyl3GlNPdwyk?8;+xH?B|Ch6cpO-)Z}`JY&bjNr zZE48L5$!kUW4~Se^B(Da93Vuz?6wAH(%*Yca(7ourgxpxSeT)zjCa(MFVN@|c;~s; zoCah{>-kEJ_ zrG`nW`v;6p|N6%(e+tQ^`jN5f^x?=%fCxf-93ePggJRm}b`Yb^+wUOjgH){uqx7x2(zr zw}}ig%)4?B!qdj^kFyFv;VNConfTA$^f|H`M-0~Y_%LHwKW;NM8>vz;%Ay=(_80QRTny|{+ z&b>E!SE6L^dRNBPgP3aJLlawIg5-vJ_P$xD(<_-Mh%Tg*VER=oyF@N$H1Rci&fcEf z&6I*5c>Xt9UjQj$mf8nO$L%Ye10n%bvt?-Hp7eU%L8Kj}Zld2uef` z;CB~(wFQA{m){I&Ewf@)1>C(MgAMI&vw1-{mk)bW7D(+WhPfdBHFe!1de1fp!+RzR zUCl+@*{?7ihEZil`8~eZGPHIEte7V-SkP0kbMq39@s`j*AA``5c$G}6>fo5SP%>@Q zl$;XPJYWzny>pzNjHl)g*r4x}0zU=f+2b6JH;?W!xL4WJ2e!6^W5J3VbrSI^QD2_^DS>i!ZV$un*`aU<>ArS47=NYvSkgT_UdNwXF`4r+9KmZ~5| z^8G4gjMp4orrSw!W%T*-scnZXpwx?;{|`^jnHcas-tbH1fiz_rBOqM6ViU8S94$?n5AlFKb$}*jtlG(4J2{sYLp8grOq02gp*#C>3~eTP^AoMs4JS zvJj5Z^at*>Pc^57u_sI4?+dF+$S%M+G@m;;SWWO{=)rF*e7+CRu6R6e0WW}|yubo8 zvg$SnqeC127ww^w)3REF0f}^oDDfBjZ(E>AdfcVsrnfBYXzv>)gm|NWwLz;BzomNx zGH5%m?%RJLie4}EpWRIuh37wNCE{Gc9`$f{{*18vd?MoR%B()$PbGEQ<`HnUbM?Jx ztWopwYnXk6r{8s*XXwaP%H+Z{=uOxbs&@9ol5;kMfs=RMJXCPOqk}PI_Y0L4=S6M0 z8wkOPh`r;no1n{i-^afnVeuOQfHqevYy6vw*Uam#9YgxAw80VEywRPG%5*nL_r=*808x z@bG^Qlm~m?_A-2dfr1}mcMvJ278?!$S}n%G=7$?EE;Jx7s?CKwB8!U`V(KhnZ!E|O zl>LyLz2t?B^KTkwFKQBVq+~8*Ddi69x*q?*P^1$KcgU2t!a($zg|)H?shPQ|ki*`F z3^RBlMe554e_B83K?aXNV5?#O>_+TWiqGO zVc}Ka;}O@^wwP2M^)VCGbF3ek1pB?v|*Vcgo$ z*=Z{u@ecC(!X%ct2sANw$CInfR);=R!ZNZ&G)#YkbP(1b>~1o)x9Zg^QcpOVV7d)Me9 zJ#Pxhx!J~iE}SV1h!Z>tH8H@^ zJT>|d z>l0jABl{O?pv>gO%7u%BNxBv$>G(@D>Dk!@~2<^lA+NON(I4`ER~Q3-NEi<4?J7x1HHIA z_UyteRZITrU=}4Duv);RpvO}%wmO!ZHHi;aGqNrCn)`s;Q_M}&Kk2Ny=l>_oP!lKU z0LchM5dPLXj4th? z#kDEXb%PI<%Vkjd+O6&p;Cgw<<&r7FU`Zsqe2o5r;_uwSoMAI(neF8zjp|eQi`H&! zjgQOkn|U^o_}YdTM!6PXm(sOD#j=mCaOI)2md5S0_6tWh{^Z%-!|&UljivQ8L`H^W z#by+3=C`AoU(eA1TzG0fYNi(@yLm8j*Mau&cC1WaQg=1#FEK9}Osh{HCz*;vBnvye zm;4PQ%*r6jOr#B;xha{h4py09E;C0<=FU`z)>dqC=A`AGa#hVhlf<))|QQgCF0wUPWrkvAP%?^2u_-*O){TGfgRg z`L4eiX(kUyb33U4nw3Hk%WiUlBzH-QocHNE=(e*r@$da}z*Du2sTD?R%x(KY1hKHs z{8i@+gL<_~BZQ)3_&lBfFpb57hUKHyWkp)tN~0a%Snn;1jBnL<0oqHj5#R~Z#K|JF zy@&lsy1UcUuO-4gKKuu@QCjFrD{9XQrECvx(x+9^eTt7`pGYk7T-BWQl%vNLm2^DI zxn63X<&N8j#P7fFJ_ny{KDUY9=olfUr*T|Bl*(1f0>jJRA>XT#Zu@M7SlxIHWI2{JC zV@XSSkijLj!@MD{goOJ#mK`p@rTG2MuIaLT18zLQOjg@uK$?|{>_3b@A=piY3@DX> z%bxq3nXs9$je4rioU1v9J;~*voMAg88^3k^!!oPI4<@9rLc`8UHIRW|XVg=EAE812 zwpLARZQDf<{%eKu*7EK37My&RRTKuy!Cti$KV4-*jQmFzg}H}Z8G2}C>My=v#WKBG zF@f$J_wdr-2v2*NK4o5>JH&P9ed0|dtq=~>qw^Va=i}!4PXzExbYW|>Qid)+Bt2EF ztp5r{M1n#Qp;#o^5kB9-&ZC!3B;8XIK$y}#CiDKv2I~$vi!==W4ESKZIUfrY*g%QA z$5A9;g9c|ULY@su&uKfRRE&W*rr9M*#isK!I%MV+FJw{eSiFZSWNb{y^8(8hTIpj$ zk4zn1&j4~ugLWDkliP_Q&oT_sW*g^t!jUT6q1eTwl4n<#s5B|A)P|3X5yo_I(oq!QI^@c!CEF9yCC3 zC;|j6ED)e@cXxMp_rfhW1b6qs-EU>?bJm>ee)sJ4a_`Ii_FFxT8ly|=t+oDdzb3;O zF544BxH*2%f_|or>CO46A8!^%ABAdIUM9uI_pJiDtliIVM=+Q%pxAn?13uUy$Rl@-l5f-=e~ zasx@Q#Dk^nN1qa_L!cZ&iQv+z@76+(js^WSKt4wSakbsgyAx4d9Piyq&Q4@jmF&dV z3Qg1*RE4QBbAu}rEgP;|H-m{O4i}PyiN=c=HZP_{B~~ z&lr?_IUb(+bCA&gR*J0kVKbzy!EG6A$6B-Z;S{DC;MjmRKh>`0N+`oE6zk7|XYv{H zE>8*u2I&n?FVGe^nq_YP^>;3GZ*V+e8=++D!VEUOA(4SElr;OafNTz(SC_bJ$O8KRA!^%9|&-QYjn;) z;BD#?BlmbKMhn_DZll4JVj@|t{olaDNKT(gJ=PHQX{4V|+p^*Hs4fD2`vVO* zYpW^j^D2!GFOW23*RRucFCH`~Y+fRWqE4|=es4_?HE=@bVj{;{}#WLCG_3Rw*8rFOfHg1vOt(oSAV0VbTSjQ z&Jv7@aKb=*)!n=m>?(`%G}9mh2*`+0?L=ibBspA=nB2}q`$$s=C!(S%*#3Qt3MO@D z6}m?)*-$kaL!8&;jj)s)@zWH3$tK24@In>HTRu6Y|3PA7Je~_Qp{BlFPcMHz!ucRS zwql970kOksxfEn>oTWAWeAC;%^64aCr^T{n>V_6n*GxR7fP2kHzUjxMy<~qyTC!)n zi#6|pV*IoLs09K-1+7bas#y*iM{I)izPEap=J?&3>_|Ey$do++y8AOL5Quksx|yEm zqZB%)ge1#!GH=q@I+=`r|GW$a2bR{>4XM}0CMHvx%whk~L+jC}2n zyK>Fm4dTpBv+v?H;0QQ|GgT34x|Jme#R?||Q{%=>InuHwAY47z!$~`Z(LAuu86%;tcvy5j*>^~PDtVb4a#%ls0WjR}xQ z5Z7s}MGh**FvFZcH=IzZ4rHa=T{A^s$u?`!)-R-Bu@?(v z6VfsSKF3EX*)+oN+8@-`%QpMk#WwtEw9c6c6=N9zdB9TzH87jlt`ErasjV;eKL+4D8FZ5Ba9X;Zt%O5uhZ;X8o zR`u|q&uiU~&Y;JW%ycV~J&C`U{}i=bD03`lAn~I#5BvBTzx>Ue)cJK&@1%cVJc-SN zBH=}unt9;F8OP~2@;Bj!T5-aTGzqJ0v{Tb-tLv1h0JsNQ>KB~;2i{GibOIZHPuc+_(Lsp^^HK3!eWaD27aK|(xv z8P9FI;anB*(_Cg18Lkv=L8?AFy`({;KY+!*dQZ3|LEhKPWb=WRRIs&(IVD4N<67r-X|GM+Y@@wGm*_$(O zDh5<(tpF6vme4X@6J-BzX*B`A?9*Zd^o4|X(xZ7s8_RWCXU7M0Bpe0FS#OZh+k7%Y z(<-JKoI$u`OMz{MlcC4oDk_{vjp(ntnI%?#goxqygqVA7NYLKp;ensH4XinI$88={BA%z^fSEPZ5W;23qw5acch?ncLro|yk+=NlKeXMgR zkq#WS4!Vn@y8%?4M_jWi0-H0M0-NC`pP$QjQQwc_q^3@{A^@VQif$+Ej5Q=!bo5hw znFJsQiY`g@#_$^_Q%9@RWXSt3c3Qrl5i`b}QPkLMoeKDdfN~yGa;TjOkVTg3cj3uu z566<%ZQFY!n=FD)wH|#=GxQ(cziXaq%-$=~bYG`nMx9M2Hd8l2nAJAB$U6nX-E z%uOR(_sz6-yj~wTU)*8y!>YrkmoLIrv!MEOLUk#09B{D@)gp!0D@0!?kCAkC`!3?6 zyA;W=KFYLL96iX7%>|~BhbV8%`qc_&4}6{9tDElas<;n9L8Wf$LBxTlFGN1oH!t|J z8Fy)`Z;K^Pv3}4fbgjDSUrw2SxVA{yyY~E`Kt3wdT^)fmQL98b?tp@4njP!=CN##z z^SBw~)2=7$9>G=ZYT3fL7~38@q4iT(ucB&#*-v9m}}~>-L~D z9c=?0pjYV8BD=`T^DZm8Hb{sXRsk5;i9bW6zBg z9Th;J3}R1Bi$1Gh?bJv!7XJ>R_&Ti*S=NqwsgTNdK2p9<<2Q4(0=G(;B0J?Z{j(Rk$U;DZ1VOm2+uv6RB4XGXsG zfT0IaU+FWg&tZirL0&y@Vq=MI_yth&rW#Qx&V>!%kx$DSpYr=r2;(0cXJErCE{*W( zqQ6o~Kr9_=3}ct5i9CYy3?_vE@NLupFYD97J8Ke2bs>OiRyUdLn6AWrPChcAt&5_V zOga8RfiNXtZv7;gIJ})y;`AKhb>UQrt?=)eGoJTW>k1}0ZG9$Vlsokzwi+zqdh5=` z^>90F0_Y9%LMC<9v{Kyq8yiPZx}*>edQs3jX9dB4zz(2WV25^e!MAQ6E&D~6n!(Kk zjDks8p7=>*8DKkQU!P*{sgSeha*dRO!P7ZU9Q%iv_i9geQim$5GOR(f*>(-(AO`cU z4T&>Jc?YMQV8=y*Gb`fa5?$I}txFB38)U z_ycH6>Qa6k|8xMP;cLg^v)P$V1Mf&YJ3|3Pk0LY4-)j)$Y$39-ws>fJV|)U9cQ2{l z3kaCt=P$UOvM}o{{nUqHDe2~655PTZM|jD9wq6dTqz2(Z5tB4LfN{&So6FU+Ic_#2R7Heubk&KkJ;? zqeMK5zn%@TRnrwvhkZNbq-}Kc<*b>Zg3ZM8T?Q3jtinh1(8?e9RfboLrOPRY0wqrq z;E(xqLKgnRc6w*kIdk8x5~~v8Brf)=P0LyKN<@9sN@WZ_WrAfYX`Li{dxKwDa-1OA zvmT+ilEW*viww-~$YPu~qQlg#78uF6ML4N2z1tSB_%dYn(2F0H5JR1zxBDZj5J3V? z-DX_|b99HS2cS;S?%8irTecGLd>-@pcBmKY`8T1JWD|PGb*sN`oQrOmeb-!~LUuS| zSPMYnfv?uSMfPX3Q#{ylIn4AuE@69_=57{(gBjHvsV!M;$8fDvV7!S^X-eSGq=b$` zkikR_*87Xsx)>$*yWw^8SHHkK+dx&5feLPyN`wdPDpwnho6}#(m0lI&Qn!Jb=GqJp9pRrA!iG#pYDU02*;07|xld=(3A9YkS*iEMF!`N$A zjPn;YC`3`XNlhLnqIglBt*isC0?6>IDG=L4CelNxix4kvqZquScZ+w}#%$-_U)H>4 zk9so3@16H>iN6@cl{=)YcjK~v~Jon45}YU^nE1~&(LH|SOov<-s!uS@brTfZ9jTxG?n0MIHpQQB)fQc34G~9|`HKMK9*Cf(YDl$lLWL6R2&Vy{$QtOGz;|yfD z%l*B4mkVM5zfBuA9uTCQvpaLUlC|W zn>4|a2a%rWFkAaMO29e~?N=EPw+dU6)tPo6Ke-Fc!dQtN&CRJVST4a6%Jwgcz9!n9 zl5k|QZui#qtWUb}Ewij~X?O9OQr}&)UA?QBSmkQRYho=Hji$nI`ifeZfd$cPhY5Kp z4q&-7730G^y$$_n{&GV@VX1?@8GLk=?aL}RBZkkosWiX9cq>4Hl@a{IE%{(}`uRao zbb&3~ZRW!1Q+nOI3Gn+vAV776M>R1d(p`<<>1+3Hcmr9;^D`Y*JmJfRqr`$W6eyUQ zvL|FxwmsDS8{8fy;5}m#HKPU+|2uoh<0H}w$EK19v&07urqx+Tq1Ms==CEj|mvl4} zC2**Vu|4QmT7P|29wQBEB{(UeZ-uBsFHI%c(t4}m?xby{$SWN7zKX8sQ|YKu4n3pHt}atE?dT1tY@P3&-DqWxW|C7vs$Os^y!{!%o;)^M zX-it7QEmp4-6$Ga{o;g>Mc*ArX}XXMK0|IC;X)apvLu z?sW(NzLP-Eht*|=3Yq~WTzbVM_)1)Ot=WJ8wT>${1Y<4Jc4ow*gz9ci?hjBG?@oKN z*_LRT`~`If<-~VWb08Xi2xWhFMD~}(e6tDaKD|K_K00-oVmI02Xl^1+?UXI z56sKWXj+-tUu8*zvWr9GFu*Xaj^HGUT^P(7n0GncdX;R%)^5>E>sN!ARns!KD=%s! zybJ4{dVQHj8oi7qAr0|Ugk&u*(m@6@-}eFLtn~03f|CsKc35}Kk33y2 z@f?#+n9995Rtd{F3P|^dtE*gj=ys!}c11X6EyU7psOev>^BJ@14Z z%I1b^n~D$4wrv1Be)7Z*Z=S+*3}jw#+XGnSAB4XM>rLH#8B5@B*UR81Bb7}tJJ-y2 zNqfC%T+tCZ8eEX?S{>$Yj`x;(DZ($LCanp!ZpnB>+=L@ne|v?Tto{u4??R}rNg@(( zZ105G@Uce^NL#wG7GNt7kGjDAoSeffjXz1(w!yBa~eUgj8YV_uxw$Tbk2g)o4e7@v?w08yDlIJuwVIS1w2FK5*7H(mhiqU zvWP4|;!Uqgj)lA{i~8xHJ6hgL9og+7;>iw=rSC^a$Nnqfp$(g%e1ug_cvFXvKpprm z`A%-Qd~c4OX2OnrPj~yWFDWqxA1$USL1s;{STq5%3hKg9RaBJ%Cb$}@3BeUvr^!Pr zr*x*jrAHSIsMKIncHg>aSfG>gt~D`#?+Gq6uCN}H*fuXZ?KqjZ*$u`|-uP^cO32VP zBs4&q2dpG>m{XjC!?bqFto_&<-Eyc^1ZV-|&=s;BEgtsCpcTQ@lPGJ22@Y+Z-V&*h zXi;=&lwvuLF{Xxzt6hH^4oV8{c6O{p`o1s#{G|~4sOzu5!;br)hBDh)tz$3Hh?dG^ zB&jtdt=UqLxQ=LyPld_#wxW%b&}~uXnF(LJWEmG94B=MWYn!wXrn{RZli`qr_^Es| zT0({G41cEp^5?8Y095YGSD~mMrLNfWWi#X5Zz zCLeyZT{-qBD-$=9eNZ?!YOXUO`?~{b#|Y(@Po0NrpE-5RN{RjSSECm(@&r2y{5dUV zUa2d$tLtApAp;CdvJouH;k&Wmxjy~&S&ggoHs_g6>Xx;foqNGkAH(11$%3|qR`oUN zQ#RbZ|D`we8a32_=MKtRm46EuIA#E;Vv&?Yi5MZkLUCnbs>o)I2dBi1=SQkT;~v*f z1e^{_-m4yW^%9WZNVGpaaALCE@nQ`Uj6IyZ6DFC4k_2>_jyy^@^n5!5?%zCw=UkrY z@e4LS&WwxF0v=|pqYj{boYrCLNHi*rhs9To&1dV!y~^?p$LjmXdv++dO3IhD_cRen z_lDzWz133#E%h2(%)KrlmVs5S>P+H0w_c!6XWSn|vCOASyO1#&3{rgIYK3X_0=qNF zK2^E$GsQ19Sn`~~U?25~&6#m&_Hkj_4lG%4OUk5IrA3O3PS&zk`v^dC1<+ zj@+nXVXL!o>-H6aaf_~`DIS4N zh{5mLSx4$N6Tv+AvSUT@pZq5u)!WwMnp{Yo&8Uj1;6NuDqH07pX|?(d6DkBKrJ?!? z?BL`LU;Iwlx0S#%l+uscFGvKu`ebw~MjqtSA6rIbBV)?~94<}~_-aV8Rk-&<&cjVh zHTVXGRJ@}@_7ci<#Gmn^XnDpB22g8P;T$f9e+m%fvE`?1vP-ac;6w^Q&Oh=M7bHjF z57%Te&UB<3e#S*+#6(lTeVzhK;xUaPPtxG4@UwY5J+$Qi{8nAMuXtzO561f`o*LNEDEqt1>dTq^1_y~Jaj+=I28JKgcw zi`|S^uzyKHt$kjz>A1#|JbIz`tkebnXE#ls58SF>E@;`^?r@1%CuiYHdb?%GsvWGZ z${Y^pFRJ#`P2P{)*{NebqqtaaK%tT;2rWpuN~gsz0&28g!B_~!owCG-ClOX%(QklC z&db}!8*e;!=C8yXctcOlF|2+RmMjS|PK{5x!&L;&F{cC8`SAt~RCQy+W#GbhRCDXc zs-dy+=R+5@5d7av`S{Vy1)I}18Dgv4=Mp)M82miBl=?yI$~3q*isfv%R;jI44p{f@ zir;fKE`9+%!qt43u9uQQXlpvgaOI!$r=pz1P^iti7QgrZ+H+~jq`OOT=tnPbE?wk} z&+6(w_xj8#g5j6S4l>~7je$#Z+fmTdyzC|mx15Vx00OT;hG1@HY7j}f@9=J4X4+DH zFTQPL(ygYsBG0*A_VqY`Yd{*bZCIu>yCZ*^?3q~mM)EaEVigfvcvyU_ z<5en|?SfzNlA{ChJ5W(bJCW`{cO0uzY@BtrC_k~80@p2e zht`UPdswD7-ZdxQQfWo+<++@2WB^Fv9jClx$P;;g!21Tz7bk+Gg+40)Q%E6kg+I9? zCA_M&6+|948xd_dNO=SFXPGh?C^|}f?-BDL2n}d(QpT*by^@IAf2K`z9|_JaHe$-y zn06!OTB@H1yrV=t;yZ3ePxqy{ORcr|&{w+;)S-M{ptQ87TJUX>iRE#Z6$79R(%_hb znQ)3h9rCFu!CsP$;yI1r2t36xn~>S-o2Yjm&2Sknnl=yizSeJ1E)^pmW4Zq<_a$!~ z!MKV2QOt)lGWKfGMHZ-BWQa+qyL{&Ncb(3&;2%O{fyqn*@7e5R;xjy#qZ%nBUO;<7 zCcO;XnoO)AV(?PG7U=mnzgpuYs2gt+Sg^c~!6P_{dBX&Rhu#Hc-_SctcjaL(Qm#8d zfpK!M`R}OM`LruSx|u%aAA-x^TZR zcqepwVYeT{irsSzEu{)V>)iwFR9&*xR8_r86=85GFd*Iu++XD{o$uD1(G}U=h*bu> z=j_XBRZCgpq9IyucZZqF@cAvJr`%Hb+ua@!ke(eiRH{ZrjF9m%ukk;btrEQq=pj`WYzL zT;r@om_VyHV-n3aREJ=KK5ZI%ewCcwstukM$WR-6vT578bMDVc=6TQL7!oVbIk>`d z&Fr|iY1L7(w%vB%Ce_m@7UF+68;nL#zX~fM&RJ|ql^s@d5nZqr*Fvia`6{lQgSZsN zT-B7)H$K~n-QgzHDSNJSWp$8Wd{Rh;x{%}mvRwd)J6iZ6OF0_c4VH~YdW}|Z2v(_q z=^Qt&%0r&8YhD_dAcgC^|gkELk|~7G?uoAuN*2E-+etD zAUeaFaBB~Rdp1l_|K?r~kh)r9U%0$44ivq=!XtRtWB3_V8UlP-h)^YVZ1O{?Sl@}g zhq_O-H{MAJ(ls4kUfq%g(yP~vk7I0=`kM}^KZ7ir-RdQr{yLg?b{>fXzvbcF%%SGG z=fSdV8Qihqe;T`_qcWa)6-qjX+u&M&ZFOkTZY?-$o6fM%B5B*Ze*^9Rqam9x89}>6 zhB473O!ve|G_timjh$iDt2H2-X$AdzPJO$l%ddjM2_^5PE);GfPWWst#}j_lEWJ_g zfaWayRH~}n$OZU}UnNg7eOg&{STBRL9^aJhx@nExPIunTsSDQDQn|g2=-PX@&(zdF zRDdj-yOBFxJ=V6PGd?%@RFg1A=xwy=k>PmR)k>@hL|iF;M&qO(a*&ph#mUlvJ;K4P zh0)&T@g`A^1*NOX70&fk^5#<`!%)wQ)Pc?nfElenIzpuL-L#pQY z)Bz=op!KeHcE+ahv>KrJ?AyefZ?GS^(Yup2@vDv_5=N?iO%A9VH>fzbH?=^+D%O+1 zYst=%>Me~3SjJBjIIwJ%b8TJ3=Cj^{p;_E-4NF)%OGerJsVw@WV=d>r4vUDxc1j+# zEWHynltHJYm!$mdIw=O}L*mXBv3WXYRV)PeU#j_;Gq$sB$7KcL0k{#gn^~5dzt33A z0zY4NOSt^(C*4sLJ$c6mzT`%VisQw2w{nS_gFOpu28s7FC~}z`>D+JvNV(L&a)0)$y3CibBj2X@c+83Zf?4Ugdw?^5i@JD0&-mqj`eI3m(I0ckZJ8t{>%F z0x9q>;XKeDQoC-;S=*LB#p=7;K>aXu`u2U|B7ju4N<@H-e{MCf+#rCaM?0CRC#5X^ zp;B=eM~-x2+6}UT8IcsU@jmCp0?RrgZ^I~Gz^^bN2CUT{3)VW8ISAkFs&k%h)3bGD zGG-oD^JQ^CJZgk9W=#w*?cVQW$Od_WkPs!aYyo2@qB6?)lN=*1pO4@zf`)_?JBNOS zY)yE0*27V0I5o%$|H%~aRw9cE9VP7Cv@ZCLim^T zoSsr++0);Zv!|!0X^egS?i!zrm2K}fh~>I=y;Gwjk)fY6ZoEndeh5o#Km=$F#8BPl zH^S-x<+p!LyfI^=Ph8P0m)#E2GwXb?(qx?()A^1G0pnlp^zJcg_KuON{seBa;HUhw zyUsxwb`?V}F>R=Ls3eR&ls)%mOd+!U2?DzUt`X%-Rg>3P|4te#y@DxT!xgy*Bb^h> z5zN*ez}5!Tm+0*nj8{`N@U$gRjjCuHJN9Hc)CX8gd_SP4ig^x9(zM&}soyxF-AJxo z{<47f{em7;lx|i`Kg)=DM#n(45+vV0arEZJOmM5}Cq$dZX;{GB@U4N-RV?%b5hStx z$pl^Koci^OFi2ispq7kszMI%zhCs}yxp<_zWHbhy!s1-<%k=?#V;d-DK_c$D1S*6h zvRy||V@r)N(hdjB;m*6zC@be1?pdmJ_Un!->WZUE-P>`{$YsrD{vM@6LbvLyX_x6! zkeK_8S8Qk^5TI$uPx%yXd#?7`;(%@CfhC`=;))6)%^zbzA!OCa;h3;!Y*cW%=D-Yk zCj9P#M%rpnu<2(Ok!8n`=UuH0k~?@#xM^PYSA=#(7xK4?s%J}(abqd3=c313*lXy4>I~9y*(Zj!%Y5|E!z3D}?mY z{TGHQ?aktjuoFU3FQ4|fi?4o8gMe{x)u&3}4w3D*Qsyt{5!DYSO>ckf%UWn8da3 zf3%NcQrL)gBR$vq!+1`@F-gRMb$hLhw&b+D%tPv;JnDdt;Zb)O+>ui|=Ap!|pd|s;D3g zT>*P6o4i?+o)O^SqRAKY*XFw0sOkQ9vp#|7U_g4d0S6fH?fzKV!8y8yQ#usy~WIDaw?GbMDpMZJUNeC7OF=# zdM>2Z4t1y-h+)OWE#Fx}q+bm!JdY4zKm;ska7_TWJ7<;XUIfpdG7M}Z`oFLZ_fuIo z*_f|g9h{$bbvsf%Hu1)`zO*+!51$lU=1#MMnwXhb8O8M{By{Wrhp{KXw8@-j^m7bi zwll4Cd`i&XKpPU7X0o+Fyl=W@`KPHM)aiutXu5Jh4BfG-q73>r1R%EVCyIlk3s)Q9 zL&PO9wByyFqQhXw7Fz|hKjan>55Y6VTEk~F(fwSjrPn5lrvnZSlwO}U6Oa}xXpay$Q@m;9hPn#R1fQ)n{`wmV z)+uW*$!a~Cie>8)FUggj92rqI-nfvV1z|!7`-y5hAR>~6U)e1Z5-V;e<+X0(S8wBn zddZwut1YT~)4FwAznyd#9MCLZo~`CY?1_BYQ+#1tcPRTQqbI`X_M zewGOZlVw|fTg0OU7k{1cAlvKOka0MKH@~dPG)yuJ)(AlCMcacrr zdH6QDKmSigBr_SY@Gi@OlCq?!V30e3o)X(0U1-2*w_D{4D)ykOavUwGX=_NdHXEUK z>K!AIfwA+)?AAa7ulhbHnBnhRE1FE`bz~fYr4G zPaanBfQvw5MI&&cNUDlu4Uz?F1_DF!n z3ZAY6Ijoa%tk}MG7*~hdL3U^^lTaUn%LbJ{LD}p!7CMhe{mg{++5O!E-VY#pkKi@?z0{fA+^B#5 zzfCTN9JCG+uHQoG6e-*(JC5=X3e86TyFD_Xkl6p@g-3WL{KEmoD1J61Myw;D;D_6C z0It7SXGzyD4X^+8F;)a#(X#lox+a)bMJJK7Or|qnQX;N4{pBkCzmX1qm}Oom($JnI z^1ix$<uA^`=%6*E+MSl^lBgG1L*Vov>4+>w4-YzwF)r^A)<*m)B44*qF{ zL`DhtTOso=9l(G6@GAZT?XdUf$$^8~b)OFp6tD6B45mHP5_{)R$7wfOA@(%d?CJ_! zY#gD<8+Rxugq;!ZpDj`UZ$z~}k2CwpQ%42O?FFZ#O*=r>miHkVLW|+YFUof$+B`pN z!);>S>M#8rI{W@kKK?r&1uB$#1$~Jh_)u=#KZjOhDWoGyg==!}_(&=K&SIoCOzrPP z{{IFi{;#zyf`IvFjl_KZ_cdYO`)%6|{LJkNGiKV6otP5j5Q{O3ndnEqi$h8IDRl_VG9Ok#hKl@zJJmh2ExuFOXe zEenHm_aojMQTbrd$^R|R_`lE28a0$@$GZ~>U0B1|Whj_`2Gfx$Z5_429gSq`5*9rWq;nD_YWTMUqdPSAMk)$fAD}dfAD~7iS$~8RDa5z{{tSd?hhVt?GGNX z^Sxs22k}3j7WLob0nPv50XzTT0sm)W{?{7+pNaYJftvqs>=?1r!Qe;xz|g?|j`m66 zHBN`&a;!f3+8wWzvKF3bD+g-Owy>oyd_w*#^kgyg#!+m44+50-e-sa*G znID{X6`B2{f^r3F)J}@8mCQ~~Q-f~VbU1)9w{+}^>A}~CW z(ElbzXdp_-UsDZZpT|Lrm4f;uPQKdg+g?adFtl*WiAm1S5No9nM=NJuhkk@=*UWfRqI$h+pYXg|t%EDpEZn`o6GT}_O92Zm;s5TX`*M2$Z%yYvqr99qqtu`YC9fj7Y5%g8 zvV6GL)3|;pX=H5iAu|hUJMvb74iNpJ_Jw6o5fS_qpUhgQeLgK_VlsPb$r>fq z#hA%M#rP2-bn89Dz#kuQm_Y{TK>E6^3=FSXh6xT1rzqDWX|q`tS~mVp1r+^ivnw*u(#qWuD_;JNTT`F&7nbXs~#FPb`ZD8`lkBil|{t50E8OLDg zdSFZI6S&*I(6ts`lE61VD)b32BPIB$$Z>guiOETAN9?>043%N-cqqrle(MHbLKgm$ zOn2D>X2gR!U9Y;#Ih%z}yk%w})x>)f{jY8C&k&+Ks1%AZS@rrTOzvCR|MpGPHV7}Z zJ+K*2hRpP2hlofjP(Tb5Gb@Ox+pDv%+x; z#X`7=f{0x5JCHI?bzj+5jxxV%Sw#MllS8b2x6#o0O>D+I)8>4GwTVKHs@_ZWB zq@JNu0TaJ~Rag+M)KFEaa!_k=hJ{ydt<&C0mm=mv-bG7fbZk(}^+o+A+KpV7_m7GC zn!a@jizH!mVuiaG6npXBZ@t6%gSX<}5{2IY6$y_DD?Y^MeZXUbSqb-3-fdh_hsPwx z3%#PtEwXf49P2&TW9w?`LFMlZEPnd-eVEiJt^F?<*ht3i^_of%!Sdoo%cqqL zbqcc>S-CdV1-ZIo~4gJ5Oi7rai_Mpbl{Row>G+Eq~Pb<)?_i zDqW-CszE|q09+^DLDe4T2R)sAlJ4GYhT-H3f{(L-f&R0Rd~M`_x!vZt?C469O@b+e zNk)+4iH8#&-U*LiF}D(ww@LprQ3`k+?p})vbWB3NuoTyjxu9ZBTdG8OwJ&F#$h$Vnof)2v2&(@SkJJcFPmd7K+Nd+1A+cgfcLsTeMHGZ)+#NvWddC- zC+<4Sbjj&Xd%#+o7U?F>;d?~_BRY$PsvNAH>JTIv3tZ!R3ic-}XD zWjrgi&Q^t!vuf8|r%}T$nvG!4x~)@Jfe%|FrlbVKm=x z!TI*~nN7w8*S%lcmUQ1S_C7Qj-8+76c*4T7aR@L+e{tNZ%h`L{x8Ki0b6`90Hf2M9 zXp%2n8=YRVuH-MkL8VOvtv)w9Cd|OXRUmnPKa=^d+*_$M3@-hbMYxLU!`ZWx?j#M5 zOG|q24_&e@?Bxv%wE~4+{S{v#TrBYMzSk*j42`pzAjK{)mURWz^^)gB4egP#E5iw4 zj|OIb8p!Z8ZS56Nmnq?8SqG)@MfxLR{f;C9%sgb@u1Q{!rR;p1hZ$N~_0O^_H3EIr zgqe+m+MQrZ(0?VbQHpJ%Mx!#K{4Q|}DM<;=O{w}&Z4rf7IVGg3-y5B&%^J9f$;rz~ zkx$E(+)^l~kJ`BCqmsk~28qRSZ`~U-SxLWf>!UF|7(W-qfH9E}NHz1B2iHLa+OSbC!1G%APOyG7(;4ED!3|=9)K|XlhGN)Fl))5s;?NBWO zJ0givAtH$h1iWVkyL?Wz_^UQ1C7s`-9(hRqSpu1J3NO(Oa_YFUVfEk2we-QS4UgvS02)^xnWL zz;f;pQ-W|ahC1upE&9`+`jwUn+n7&h#adV=_Uug*FI&sX^jd=1Qu{{H8w_^~q*Cou zW~{y9%e1o>j~w+y6kqJ?{O!;!`s*WvdiSX^KVQ_F&gWylU2@@Bm>&D2bE45eA9E7v zKWpw@wnZMe%5;?TEw(U)B9{C z*qpT*aU!|PP4)eL9;yAKXS>A<(>2Mc-Avgk7;Zt*LGq;KM6%*U^U0gD&jl^rp<$-H zKW&d>f8Jex)MV}^?)aFkFVm%+AEU^%Q!t%Rwd=Jz#N34Dx!Rg_fv+Eg8sO*_Tr+zq zEBWBZXRsMQW>W>g)_FMK9p6@X+-mDIHQ^f5EaE*uxF!)S;Q3O)^7irsHs^Vr!^BHc zDG@_hd8co>^;%gckWJ_rgI`E!Ew%f8dC@u(r%IRlD$#y0rCssM4DlHP+g(yz25$PM*CrF8G)^aY`e?+Pl{w8MXjNc-h zTSMyS)e-3GV_aWDD@*sQZEeQaxY1XM+~R78VsuY5@OdXv>-o-sK{SANYK;(+R(Uzw zcNqUirf$i}yUJC5kdLv0dgvGZWQ))^v$rP`vv@Nu&H(vbKgZ5QXO zSlv%6S!_SfOE?;tUur)Ram7Lv612r>CYmGV8zH|QBrnK-f!s%aPV*9}kB`Wyk#@AS zXg|R&1XeZj5-Y1Wn|s2FL8xd;SQA8~+0EJVK>64v@0x;9eoNp?Jr|Lm87auhBm&Z3 z<&oNDRCHp)I}T|{8*g=_M}J`CavwYPlzoWebPGFH@-zMZ=n(x*hjIHR_-50#;3mgO zkC&~4s)Et@ojqlSIB~j6AKvaPVU7Dl+iYazZ^BY6sN~@6AfOp!6s&5jS;jZ{HdEf? ztnyK!HV=Qui>mo!w&Z8lrZpseAR#jP-*A9TH7qEBukR|Vz3>gu?M7rufD|5}%Xsz* z(~+y2e8cJr&|Hc@-^S1pB`|==#@9i_+cn;-VkqwPuv-_3(JL}}Cn`S9DM zO4J@(J70Nl<%jfL1)D2V!J&-H zj`eN_Yz6T4{+kfmu6qefRF>D{uTws}h;Zvi#GTXTv(Sk2nzgte zd#I&+=TZ%Gd6Xs8>AH%JcJ z(x&<`xUz#Jb+c(i7VQk2&&0~9YfGpptYdjvB(dALD!=) z)T5aSN`a#0A(`Mct*7t)AA{G`r-;IiHBym1C znq())G7moeq|TR@vAZ!d8+IDU-nXt08Mb^2*D79xu-4koYLB0BeUH|2QyV|9T!;DN z%#7;O-JG+cAJ-QB%$cW*qnyL*BLcXxLS?(Xgmr!(@r znK@_Xe3#$Fd$+qAcCTK0Rn?OE|B4|mG-&50ZMYsXRc_g{-VuKBXoHu({?TtwX| zh8=De+I#XOw3S1-G2_?L0$fpz5ZNPh3^~$O0)hy?N+T1elaGl$3#}Y>O(4i{God|m z;H*fZ5wwkvT*{&qj|)JQfzh&wlu7DB6mi>n0)uu>k(}-m=cDII`^++nLgxSpCzoi- zYf}1U)By<MXKOL$p12S`!kk7eYcig7&7bbp*y!imG6ZcK=eF*q`_-!ojCo}L2fC7Z2 z3#T3XIUrHO;JUxYR2QzsCSbFP(@z7pmnW}xI^`TYXiaH%J`joJ1-Q1RLg8^mx~Ach zmB*M$jJuAL0hzUsedN4o;}J}7-aqYm4Gr|~Phmu&geXDaL0<9dd_JGP2YkqXVIN)j+oB3p&s^n}ZcdJVe51ErM%F zr0+zW8XUd*=Mu}08#?m#l+t`LPJ;aJF5}za(?YqZg%1G1Q>moa!(h792^{~<=lAS znCp6u$(BTy-tnVlL+WbU<=(*OB>XDd#NpIpW$|Q2$vXMweYsArD5@Aj*?lQeh$Ts& zO3K!mynjWzAha`L1_za~Tfg9Wf_&GWi*mrUzKO3N*HUR4YfJp}7~5yt4iq>G%Y<^! zW^(?ro7HK}-!&ciHk++`#(i32N_m3(hGg{uh7YA*os8H6L`<_(z}(d)q2my$3^u_# z4U*1G5+T{TOU70beRr+sT348bIdsISjf}?z)t4b58}W*SCC89E>a3OF28D_9rNE>r zi=xqAAN319>Zj~+D0ItcpT4cJR`^=Af@p?y68p!2{gmQ6v`@{16(TdFFR(I=5L88& zkZJwg5mp*sWT?C}w2m~U=1IjVpC5l#o)kFUsUcd1pD@0BJ^ex^`VHeX^*uRoA3|(u zeT8Nng_I|WDCiyxkabGuHLt7BXGNpcUcXJOPmRYLFlK4{^icb=D5JRmoRMQibt3Kz z&a(csry31GCH=h?8o1JlX*Ttx!rXm!B57%pVG6d2mR||?5WYyRbPa9d<2peItFo^v zg$D`V-Qz&v{I4qFP9~u%Dk|aq7Qh-R@O%*nmO1!00g)rKYZ6);;WNYhqIuedpqY{` zLX?JOd*VCan}Y?XWvU0l2x(z)^%)c|edxoLu+Ciy+e0%pbtZ9-KY~anOOA=nz(r$` zAk*&Qh8UH~04+F0U^1;rA6$2608@-|&aEI#syn&5`1fYKGRd9;j}RA@3O%r90^}a< zIPulUj!KW@>&<%PjK12;+g!$|mzUij)9lO#JxAOPpd1Gjl;nTK+HIg4XwJ;_;7WLC z`=}H(ogXRcW?nJ?RjYa!>Ds9plYJ6PRHv4MkOTdf*`if%-H?+?=l$49Kt=1SQD0DK z%kcPwH{?@6IRU^aK}sHEu)Nr)sOF5!JwAXs#Nq`BDAk zj|D}v{AQ?mxzhY~!Wb2n&+Y~Vw~(4nVlBlO+Q>gDFq4y&g&Ax=8LB;Srnt+TjMZ~o zS()8wJKDOhWe=dR$jWh}E=ojLpvajv9fs|D1H6Stp7;yra;BJ_z$>|V_d3>O=Bw{= zz3wd(4V5w1nrbNOr(y#*d3z;B4D$+IwEXAX3jV1GzT~>@-E_mN4&(B(9^>`YF*}lc z&Z#Fgztx3y0SFF>4#)MJy&+CSViyeCY-6qi)<$K@qqf0YxT$-CZ0Hjr{F1$k{F3O} z(P!T-vQO&98u*XJGVL>+VTja4yM)8FUV-Qm{jK@oQ62!JUdhfqwRgXs_>454VlhEYequBYK?0{O0o5g= zcjPC?8jxj70I1vpy{I^w>QmqO*uJ&e9eBC8SJIa!ai|q8!za6A@170sX?b(X@$)9P z>lTHx0rRBFQA!apq7MPxVSQ(B(``bLMi<-A%va{NZ!8qJD%|T#TC!T}bUv1r0%o2Z zW?UOFHT7Ik1`St_y>$GvEly%M(0eoZR*U@do(8s zCKk2(=&$Om4@#)+JPhJ3*FZbTJeIEL{l8Iksis8mhgp33TvQQM(x!$iQeoYMF0KuH zs#q`x8Iy~%8r{uv38J_b+#Ra69fy@4-SU?^feU9JyLraNA7T*bw)8Jk>_H?V#x}3V z$a)ggDr2m#{ywuK#=vBJN{~_j_F)t+DfARe$B+i50K69etw_2qoO$J3?z~#*+I5v1 zQpQD4@ymb0(3Pfr* z{5C@sI?wS|V*--8!=Yf1eaQ57YX`7F=;yhWmYLRygfeI1Ax_fPxmA z>ng;=1+wz-6HsM+KN!dh7rOKbMW+8M1d?wVIKov#EbB`C<&-k-)a-|!X5@$OP-A%= z$Ha!}NyUFeq7;M|Tr9JbmD*{ud-0WoLKlCxxdq3zE}4kFs&_R4b#tZGZO-)hLx*y; zbGYAo2Y*QcavD{x_jsJms)Ft*q#FvY41_{w@-076D+VYES_^-CDA@>9A6(+8R_~-e zY5AGC$0=#&+XLEi_{C3~CrU?ln+*{HpV1jUUrT0VqE=R{!9hYRC3xF=FduriW;l>o zvx=JeBUnEe5K&WWcTSF3Nc*s;VtK+uhO7{%{=~klBsIoqNVlY7D6*o0165j9&=s@4__%hb@_>-4kO4`;xWr|&8 zR};;UkZl=U8fu;&CWJN7&&mA|XZeWe{E{Rr>waGTBkMK*_lp5wDYH1;R@3w*NofOU# zt|=l10g*k>{-xmxK0~tko8zc2`&{}01~Ohv_p+rLZpzws9r`D zqof|dd{t=!3h>m{`~B$owddN{bGOdh47phlH#qjpSsRn9CWYocn9OyHoINc*z>okf z%QTz+`!OcXxFBWZnKfV!cN`Kgo9+18W=9d3PLkV}x>k3TR}HWFw!~!~H_06Sc(>RZ zja#u}h^oGtxA(>p^Sjq8lhoH+1}M0U1N0F+`KqXwNofl7isCvU8jbMc0_UXbG&hu! z^LqmeYBQfaA^*FVe!=e5#hGlYoUQeReOkm>&_I`A5ECO%6LcbWRzQ{g5%{s@5I2_nj*z zc~4O9_rXIjq#uwWNVy@lg_;Jo$i;poA|W)M=6J`9PjZH7K~%jvdo;R`pYVloW_`Vl zUt7_J5)D)J5|w2u_>RYx19rr&{6Hu*2}mx6DmHezWWy?ukvD+ik!wJ4uyOQPRyO8J z;SK*B%fOkj*eBygB%|{fDmvQ`%!r4SpUM?dHS^kYeFsSHwInhV_WqQ^U(A(oXzm;1 zer#C+eR7U)casr(Kb5zeBTyG%B(d+hVT^TN5HHB+^8jr(j#);}oDO-QsXcW=q^B7? zX$clr;FDErw9wMy_!^y>ZX-Loe;+3be7vD4N~dINMT3TgXF$SavLPOcpe3oC&mrg6}+XJ@Pf;Hy-3+QhcJD=zOBaEGoS5sfMODxwk_APY*!aK%YGPJBU z$5>vK3LF@q5=8$2i7fKF4mwba=M~--klWXkC+%B+Bkh<*Jp+Urrob()LB3|k;q$Jy zQ#8fn8mymVRK6Pf;A=$VhI&S1CB2=^bBl~Op4X&5P0f1a`p-*V0zddyhVd$5B%*%< z*_}tRdqU+R(#e{;9I1q^8D84S_mD&)9Fvo5X{K!@Gj=m-J052Vv!0I+{^G2*@NiD0 zA==+CJi4K^=73sy4bv!>f4t}C4BJpdBo_RkVm&F5uWC+x`UZg-Us%|q31hqp$4TOa zqN;Qm?s^V?@LcWu#8vbBgmG}epSUJ%+G+A~1i$n%kh3hWfwL^_c){))*1L>}{N=79 zFfs39=KDy~d)tz8u$tgxmD>M@z(pM}+!Q*yFjMZ6cjg+$RXE`$L{30cFS7m4at>ZlK_` z8X1Uzht?44U76Mz2ucmd3SKsk_fF5dQ;h61509y$s7?vx*{rTzD@wZQYOxt9!&14z!76#j^5;z8gzP5!WdOVnZgo+N_w zRt6v6i;yNA1&X&2uuQZ2rA7OI=n>UO9nI0AD^t90Zea5@h=B3~Dxrwp z^%vK!G`a-)*GD5>zFD#R!SvTU0O$TgN`qMzNO9T9)fAErq$R*NDwOtB!RILyWwiV! zs>(L=YiFJ6tzj}#j@~pFdk6AbfevFotkeQ#rTXjgM?&q<19f!1OZJoahye@zR{vM$rf1>NX%*ZZhYR24$t{DvGjZOD$|L8H3t}Ughuex0%D_e zWGxH9KZ)gb;yj@G0Y$51k)ac?hHB%13_>{xa@;j%JTT9YPS)@C6^HIK*`Mw6mRmi* zgBnKepr!+taRbgGsUK-Aw4~B(w=VdcFbtef*`ichNBA98)gJGs=lTa9(dq)iX+5g8 zUJr6UI0-$DaxU6c-SYWK@ntSNH|GJO(TEX{qnBOO|!9{Kq# z8&5-VC-`okOI&G6Ztt9>kh3*AL{ca`ovmcL-i(9m0WOvXAsz^w zhq&75-H}d8$$UFrs5T&(g!qTE=8gNf^1%h%ov5Ha(6MZ8*|MkOBkk-q`{Fyc%#>t zA&Y&^%-bYv#z_l_PPEY&8~^*4uu82ef2}W%Dknl3XkLLrnW7Y9j*nVG3%WZ#SV2~_ zZFkdB5X0L8SaXN+8w+sKyeTR&EGnd-s@ozuN{{77+8yZ7oAn*rQdnG6$V{a49ZdsF zOF!^vdpufJ!E*=|EC~V5KQ$FU*?v#Wk~>k?M%R^S)?th>*J0k{q;uWI&s*UQRINbD zC5)fCrfQ$E2m8&2o_n%sDT2Xw2y}l3-(8$P8i5srd3`G(ty)qOrMOjA{f1zr2ZVVI zt9VDzLUbh`%9#|QDr6bY3lmP;%VJUelV!A9QuC)Dg9D_-Fd@jW_DM!KehG2XX7wukHM7OuyELo$<}R~+9SaDH z6e9OFo=hja-X|^>k{%tE-{V9lG_xVq zd|W5Ysq|pC0Fcd`FyiQbyoABf&SPmzyY5WJ(ktD=173}T!^!vfkB`S}=V|wuEp_L= zaJ;v;iLNJwMW3tfGlnMGI(}4p=h>S*@aQAiQ+1kb9m((!+%AqUcLNL{uL^`j6(<>$ zCC)0Z5NSY=(yN%VXO$7O)S;L*&bi3Sq{iR!Jzw3BU%%lpZxaiGi{wQhPeYF}63+6S zQ7vWqS`wtk45KIot*crWC+606ho#C6&)ODx3Wp=Qd;>WGib2QREf9j_z4wUniooeGMxh=kWocv^R2TbhlZ`Plqp34p+dZ3)D6?+ z-(+q76dP?4$trY6EYP$0Q?AyltVv0jAUEgv`OoEj`7)ygx@U*tnnF|X+aAHSw_ z*I|1Fl6vkXl2iFHGF}w^#8b`2p6C%unqPXTFa>5aMk<6pWmQwlvQ? zilFr6z?LvSpa5$oQk3T}huH_<+3ziu_@=$J*@t-ZUt#l5d&?-_n zQiRdws4+>Na3C-}$_Nf2J$^6}UhJ(elRq0&3%k&^gnF^HGlkuzf5W0U6iumfVO2eJkV$?5qfBo+Rva}2 zMQn$B_$YD8t?+KsIQ-O4$s$;T5?NV3bEq+1^tkkw-R*pW{AodanDsFZwMXK@)w4lO z&dlN(JaeZ!a(wvVRpr|0q@fhMv)8`EDa6K$1r#Bn_fq)o{;2K%tz6c!35& ztpP!c#IiSOv0rsYlz|y#II_r6TtEU9wRh9{@e{JqHhlrTKIwIC1sMg42HV4oIaU)C zB_y+?(PNWN{FD>Ugwh9-hx{B-_d~YH%9gtsw`s;Ul=vM>6v+?9@viC@YhKww;wNo$ z0HCPiAj`yIjpb4^8YY_6ZCx`jL=`E1NAdfe;5VrIk96B7nnPY=&FS9V$0AoNd9Lz< zzFzIIPM_Ox4=}lPd)q3+;?8Cs<>jv^JJsiTsy;hTb9*3Z=yrO66db0<$zwiWci^*> zd@F>=6!4Oaq2RBh*eLa25jM+AewI(i270UB?R?1UCD*imKLp3NsX~pU!JMe+uhX8N9J!fFIiUQ&+LZ!uiX+dhkBZaxWHA!gJL0 zPRD7OHMVnt!}9EAxq;n`V1fG6*J;9 z2h?`@S1;;l8}#hCZQg)4^l=v-aFcyhJbrDdlE^~7&4&RlR943Ym?OSoskF5isc&!A z^$|`@Bb4F0bdfljh8tbEy_@VLaLxhm_G!nj<%$@ZB5O_0rI3bxzT4B$d1Z*>xz^zF zDf(rlUf3;93qyRS4BpOW)B27o-bw9N_4#Kkdl{*t&MnBj(jKtzD6~Fz%RJ2=quWwb zDVP+DgSlJs7D`MzA*1ROL*AH~P=5-|{{GG5(YX*PxICImBdePG3K%vtM``-D38dYG z7-Dnhbs-1H92MWXX<_5fYF{w-iDYYpUp5|W%;TrKX9*40GB(mHo5H$0j}!5yxTg% zs{Z9popCtZ#<~!?X4FCt$P>e>n%`kJnu$xX&p}vpu*PEnd#)+G$;6W=uj;@}CQih@ zMPcrMFzvf6lPOYM)wVZ7m!${!4`PZ6S%S=1i>w?>pRJ2DYz{E7$o2bI84k%VB`=1v z^YE%!6ts)3GXp|?GEOTiA`{U9{l1K0l!&2Nep$k?R3xL^*l|_phAd?^w89YGlJke4 z91RjON8#iTMyc3B>o6b{_o(94FFc%%7|O|5P?X`SiIAI&V&M9GlJ0&~~N~}wU zof>6;ybJVg9VpM|semV}8}^W#-CrP*JHHg~LzJJnjKkSxd5!QJT@Zk(r1eDwLo?2q!Yrr&cfW7^@#n z6|2Ayd|2_t>N|dqlEae#4IUQ`TkAyh%cybCzG+(d6khD)y1 z4Amtr1j~evrHD(1_}pB0u~~2$)rT>ar$S=$&XSL{yYld1v<5(GG6V`YroRL!?6+T2We3K%0d z{SrDVbTB#;npXLy^qo-%gw>h*(EwJ6@KHRV@>8?B$CsV`q|dx9ti_j?%ztES@@@jY ze+0fIu4?TGMwBLgVXkpfW^$)-tOW>o6tSHrW(R@VIz0fz%ygVyMWhG&Avpq^$5Mgy z&kaD3sEbon<~1pl`cw3Udnd?8rOT%DkOpkL;!)3H-=QGV z=s>0t(}HEpA6OrST(L0IGJ3~V18d7zJIp~ODX%xCYwfmiJm)x$A?$dwiEG>R*e=5d zbTWBk$vtGpA(XV4nQMyoF?y3E*V@%&0U(ypZ~AGa7G=U8O^?AVK>lA)>PK+;OLs6#Fk@kcJ~Ot}rn?;e&NP(IH| z53|QpTYd_wMGWF9YPkV{QRmIgD#wU|VH|Pj$1TcQ9TIV*3L^}sZ@dw%*bQ5&6XT(Q zyl<7mF}<&Mhkb?MJl-tR;q|UO8)Z7jQFZ!c<%bv0T=KF2Z? z`}lH~JXM|cG^iHAivHRWnJjmRVY?W_sMzZKxi5x^vE5m^JybqVoS>+@JDAkfLf?)a ztU5I96VWsz%suR*Fy8nfu)vf|`%BwGB?PQ9&aD@YU?g6sx5w6LP4Dp- zTmNL=ZI~K$2dwO&eS?VI4{eZkb&DN}3%Q3R{LsjYT-L@FR7$vv&K_dp_oF9keEsee8lJQ;fYk^Qfk7(v{!mgINI#YWj@+u3Fl~=-n3U587t%H7M4w>O!ORBes{G%{pG?U8F_g5nz z&dPKbJ{13J$xa~yY~$?@GYzrqgd2G~^)^p1z#&TEl=GH$GV#NuroM?oDg7*?{xEnU zP(J$I3)k{Edk$(Dd3t+XXhw&C;ZdJJu9M0~7{eosRhkY2ec=48(phJZm%fF3be+-c zUTaB(fuhLO0j^KgMGMj8HFaxEe_3o9H$mR;N_ik^Gj@WrrE&I}l7`)p$h#UsBsibM z+9Te5_!@I*8U3Fn9dic~XJ$85$RCqApwKjK)?7o_7J8D3SRP$JH5*)H`FizyhaON$ zL9Y4=yI--M=R_)E&#*B;;)UEXB@U<}(c4E0R;`54SyQ#5(xZ{%%`07yaYmi!q5_#_ zNjP=N(hM_SS9(O#A4^+n?wYvP^wfEEAc#!E3*O6q`aVPj zGW1B>k4URzAn56D0QqsZWmvhamd{iPIio(C_v6CR3EL zdm^+>7Bwf-;ZC^UeRRvjT+G7ASIdP(K^!r`N&W(n9HFAU)a@T$ww?$^v|h91l9^>KARZq+`^ zNh$TqE7EQK-`_xiq>GT*yYgu|NSYkHV(4rNb6~3*E$*&RgKuK?*w6PbwC{dxDY5I* zLeg6fGjWhUSQjBb8z&0^4l=M7-RS|FpBHRf1aowiW%HI?XqoLMW9^@%Nb4kCDO;8x1jecin~)MlH*(kGlqLwm9;8FFj; zNxliW{YUD@R^$n$3X2y(LC~+WWOi3=lLiG$~(hVf3X@UH%Hsjatl)Q-JS_NUh6=GU8xpxxE0@PP4_89DRVk z^a4OK7~%F4M-ggsweBD(^UhFPuMa#yp>Jbk(`q%a_ zN3E%ovfT=ujq-0EW1-fwB@5q*xdy}6%RkcJR>GE>dqA=5&WCyl=;!GknAj`E63}V( zv|gL4nzIHJvF^~OY-6lvh5yJS#pNaygUG<&`2mZda~UEvcU_~jGLhogo@2bLclWK` zi{q+8T$;#v1j=HdP9qat>kHN={1fX86*t2W-cND>k6`Tsu55+%mo}Y~)0-4QidkBL zsE1Dd6-6)MMe$p+7AE8&7W$A1OW@PkvT76ChYxgj^3Hb{smiUCx7 zco2f`JwuK*NzEiR#k<|tdWN7sw(wta_L?IES3C`owWfK!iVG@$mWdECAYkUovsHR` zRjA%pX4;;;q5oKJxao9suYOcJubKX+w=lNx`RkIMm)e2Vyu&qzVJQ)L0bJ~^-lmvy zl?gHxxe0&nGTH6xh5yb~)cC55{r0oLiH(s6Hj4ccO^GSt2LG0ZQwX z4qlcB1bf}yereFz%BXkB-6#e)%?`^y=(RUJW8C>|60s@FroP+vCPAxt%UOgBKC>Ca z9IR-~Ulf=7pM%NlUm?yGkQ-VD+_i9w9af$C(CG zgeJIZh%c878yjZ4>||nRJRYIW7)cnU=tVM^bb1^ky-uc1Ok5GGY?^f)Q-YXr4;zOC zJBsM8<`N;yL?WQebad#wd5LI2T(J3kE0?;OUlQ-+!)RjGo*ptxXxZC^NZ$n~-n!9g0`QjE z43NZp_Mz4!JVLASy9YCm?7m0txEb|5jT&elo81Y`%Ce}%Wo^!Ctb;MxjXdc)9jiD`mtM))h(of7O9;~b&gCu)Q-X?ApIu}fE3unhjP{b*c(rI?~R4yzSz~voL z56Y6cCbQ5V?ZoGau+sBge6Wh1P(EV}_+-_C5n?NZBTHjB+W@-&^^mP|#c%@-E+#4i zt&%4tO!;H$gEirV&WAH&^1(K6dVDAs({b=^9I>#A!hEt6|PN&WHBU++a2-by8Uu;)3@WfDc3=qC6 zxhm?LG~pb0q{@}O9=6#u95IsJczpTUO=oEdjvfQ=XLlAeKF|S$@5@PCQi=*F9apWu zx@YI1ubtt9Hl8w60mBOiSPyRvB%Vt@HU;*^e+062ma2zScJ5VVXKutBjuCOJvUa+Q z;oK{F-Rq2oL--`|+O9witDZ{Zqx`g5aBg%r-&zz8cCX5VkFNGAh`Bk9vZ6Q?TQ>1R z+qH~w>rLmb)SvodAX!EQxD?jtKK~a&+4p@gL17y%6U?Cv$TTsGLi#A&K`pAO-xT(H z?sm1q0eoZiicdw7yY47vsY(Eb`8|B!OJk6{ZlaPX+I`b;Xk&z{r$Ql~coJZiBkd5; zWI_q8$iGIlqFMfC@rr^TBSWq6ov)kXujUT=c-_RDt;Hj;fe$JeVK^M-ab{Fo=C0o)#zZ!FugtNJSQ2+6KtCyy>!kGd6Zl@b<5rqbv| z+vg$Z2>LQ^5yZ^|F-~w9QRU9Xv$&A*EsL0`C7mT6bnrAA_r^6TWtx+Ky zyCWKtjM&tjBJvr4Eagp=^|4w0%uxd$ivElPs~W}6zodkM=OW?MCZY<2;6rUSV5v%n zM8XPvBVFPFn{9!$^L=8j8NA1^p2Em_o`E93|(w zLWiqD_18n>?<&sEk}j?+U`UBscqt!;{F#~TQd)5VSW3KI%&`^#4TK74vJ$8|`>g4uParF& zn^tlI-WsfttN3$$_D3fFZ`5N7_$jGpn~z6;pAsr7x%t8GegHE7u`m(irdxYw;I&GM z*r>R*Tm2`Ro0pJY)N`i}%EKEZE3 ziA-r?;J)(Aisu6Z8WXE<=Pf4POls{!vcEjIKAC7{K4pggV!{5;H|hA>O`89_$tCVQ zMRqgh@^&*$F#4Z5hChjWUlV?}zdi!of+s<0UoZfW{sa5Pl#3q?DEB;vWvaXMD*sz~ zDT1F^?7xqrzd(ZRE`J+P#((y~Y_zLumov}%R?hcv@vKMunj_vEZW|B%Zpppnx&!ts ze=HCF$nXSP&)>Qe%>VmqY(fSTH^BYJKPU7i1N)0aCQYgT={}XHuQ5Y^W+(jHwF%?j z+6!jc{qL9j#)1BCcm3M{`Jb2n99_?^Z{EClebt5U^ZjRyo1+%QZxKo>udigAud97Q zhT@9YN7(9}v1b(1Q&%FyO?f5N7TJEedai;1837zbl*vj1^EgZ8!72sHwmJ1~)^PMH zltCn2tgg>YVXM0jUZsaGOt-^aOe0(?D#?$HV~(S{X_xG~jk_6_t=yRkn_0MPeD6qP zDIr`9;HMAoMRr-Ka=DfH|D#*^?Xr;Yye1G*^!8y$Wx)s||BGB(6r10( zgw77u|Kiz5WVe6QTQ3B1SfjwX^LO?`Ida;7a321@7_r1}B4s}&m`EATAZ=mD42~5=DR3lB;9vZQtNoX2n}OeWO)LR0 zUX!smxTKg2{H;o{-Uh7yqf!3LL;drD4htMY0Qx_FV`aWp#83a`otiO59YOuEjrXr% zBC4JA_k6?v4$Rl=$0V<}1+|M=Is1&Pl49kjn<{=@SK7>mCY zEXTGDmNz08&+xxv|Lsq?kiU$TeqhPy_z*}fa#U&JXzhfhygG#hZ!ULLnG|>9sM1S9 z!kuY~e&~BOw)?|8`a&8)^~HyG&ag{@-skG;yd)MHCLc;1;dMAo!E_<#` z+_nEop8xE%48=uSD3^oxJEnC1odKuT`i%wZ~G8n;R8dQ}h2sF?Vk4fWt0QK#p%-JIw3mibg3(b?Y!&sVkb+ zvRqKT3TKf@5Lx(@1k!D#r~MKAyAVzJVYifOl@ZKt1e4wL9StJZr_-(_587{^=J#01 z-cO!x7&}wE2ZAY8y>0afl~8cijnGW_aQx4*Fv@25$TC3G^R0H5@1mb%^4K@TqMVMN zCKYq6MnvK`y}sNH>>?9r(2JfNxR~x{k`Xy>QtoSVJ7VaYopTX z2<-sVne1)v_B_)9036%B?Z~1c5npVO6zsoc-alqbIQ&pXS>7#FmfQ1EP`El;%1Jvc zgRsw!)EV#Abk|X4E`IgWS%02*XtGy(oH%0Em$F>Xw){*}%18R;r+X3D3AS=l4`PFB zc>*MwNY(H|D%h1Ja|q(`|mr9bdX*^9@W(9}q1aXN*!SqtS>DYV#O z=vf|QC%qUFsjpESX!77oC^W;~_ya)KcF@29(?f<@RxULvdpgu3wP35;Tq-4mzMbQ2 zS*vUtT>>EH14ZqRaGEdz!MgLZ_4zvqXI;+SSVfXtD-lU^!d(jtnn(MPNZx8z4@?x2 zNJl~PTwMWi#6!z?SI1EM+l_2X1*ER=93`_bsvL&<#po$>%vQ#5@=66mvb!4g>WC0>CDeyccnQ9adCc=gA2!j1@O(QsW{#*LR$RN z{gm79WU%ZR_AHoz&rh2pZ5miKXj!liVHCk0QedR91840UJ&V&h(}1`7W4@?z%nui} zS(4Yi0O@xCfS?YK`Q`<_f}<-tc9r09sopGm_AS&a=!2!pdR$PrbN;atQ-a5A^V^*= ze>oqrnVgYN|8ksedzYsb=dQ;$J90%3!%$uAic)Gc*&a||_*TV*w)I<(8e(Wt&4^^M zRL4%dfV*Iv&uQISZsLlN_l=HRQ7VIO_nR*O;Sc(`X=r-(4;C~dRDzrCI%{*bm>{W! z0WDt7TADEOdDu;@zII)L*vtjE74v^`}8e^OdRn9q0nR8K{wl9 zNwx!l&0h%6>7@?2WuD$UakGAZ8lD7~c9AU?@VoetAiIKV&!ZaZ4cNG#@a8?Y${`!; zlP1*=)r*N~`z4L&6|^2Z%d26>uAOIho(G0( znKG*D4NG%V^$J)l&>L5VCIt?BLZ9+HP-H)99{8wKP;iwUi}zhvzZ5FC>@7T=ne>rB zPZ~q>z!XzCnE5e&$>67ysyfhCeqZ-ojQ&vl)Bgq_KaJ*1sv=l*)C{ZYcE`zr&l9O9 zApkpZre&FRW>nU1|AU0!~oq zy9F0vZG%8Jx#Lyz5E=Ix82Sb&>+cPX5&0x5FO?4lD4UsdQC2u~6(7dq?(4Th+TrK! zE5D-bFm)LMyL1oP>i!%2rauh%*{29RFf5TkOZj&oEd^ItEi%VVIriptzqccG6+6-Yw9Rd|@XbAc$_E=I8uk?$y?S z>^YI6w-gH#aon{KIiqVo14NxFBkq*5?AF8yEM;}z8#DKz|BR8w83dnM+!a7yd0iQP z_(ddCnRfjA0cWBv^tBQ)u(IMlRbTkjypC^mm9;Xi*R_4Vo(~M zTXdklD!T*R%-qvPg-OrH>iVb;U(5m{j^UiqoVmg$N}MtJU?rP1S*+(j2>dGcmG*EY ze0Z|8c-i|@{4D>FwP+QQ@qF6e?s@FqUES=lSaIxTNdV7wFwk4XdN**{vNdnKQ-DV+ z!`AaKHxZKPuRD4}JLlGE;gK4_C{0B^Qs`H$a49&HSCQ_~mrW@1)%}5uWzM5Ce6O}* z&&dPV)#>~=R=42`4{U|`CdOAYza!yIC≻hoxXUCsCdVwdW}Ip2x$v4BERr>&*TP zDd8V`wOlqTog*d$B^Fj1QGr^oF%5P`94N80GDxN>ZKwue$38yp=M+th=*OhEez)Vt zUKh}<3#ozZ3+{<*i+T3AP&cQ(nE9qFdJm6j$A{%Nlk_&S&C}}DW|1r^)dI6CTu2J5 z%VX%XE8ikYN*b4E^)?=YM3gdY5?PM=d*v#oD^`6)k4hpUS!UbX1I8-L56^6~X@8AI zq;q7=(z|{4Fn1QXY#kGhSX;R1n*b3_4%6pmWVFFOZ>={fj1Sf_>|`3B)U^Q@PFaj^ zv=+*rnf6bO=N!v7&=KFD7<6^2W-E zsA1W5+zTd);TxS!0GZk^kJ}mcy)6nhcA4%G9sw;bj^jVqenN;_`g)$THtiHHvv_$l z7bCf~-N7BQ9t)(igxg%DW(V!tezH2I&tx0yt7W-$f|1ya8cPRWG;Ho7s*Wf}pv6mb z6T1x>=TK{SyDOK(jup~Z8d!vn9qlPwZZTPXJKZgvsiV2HSE4Gl&s{R!G0s^gKkkmD z&%1;_ag(mx8ZcfEIedv2X+prdm1b&i&RWdY$mD)e3uY~KeGO#4)xvKsG}gvEDrs%( zTfG~ef9zq$YSOaP-u*EV&#G@AkH~6WWE1a6+u1kStAGEtnG&XTRDnu8;6LuQQIdR{R|n!;Z=V zoCyxP?=Rkqm}r!#M`|)*$8XRE>lOMBcLNn2NB1JOFSbZn>treqC0O|Da9M4Q&H&G! za)>34N75!rWds^BzO5Uvp!Cq)cmPzfy>wuD`69vnh!)T@`yp83fuCO zGr|IVtu>Jt%$p?Fzp{-B1Q~P#Nh-!qp=(Qc4s?1KqV7&25WnQeGYP^yR4Mei=<;y& za}w%>P4}1GR{mdIM5`Nu5n}B;&O9|ISC+Q~gip?;;VE3dc7zoVsl1?8WCTx#w0`UK zJWguugDuSTfBTja-u}tjWmTjsIlulzQvfsf$CESEup9y=hM`4m*F|3p4LZ&%gJ5#x z3Y<@=68NwI6OLLT?rSLmyIGIT^aNim|C620*-V-h6j)Yq5%ciKDBoSq zA?K69LcZzno&IbMeM%(K!sI^cSH{qT?Yj}aLr%#PrUl8y1Uca;UKG@JFO$S%U3OrM zxUGyA3Jswq1(okFC~0WyEBKSKZbg;02-{CRW(rxKMI|b3CO7^CR^okJcDuzoTH1+; zrqA2QN$fx-*CGQU^2FEQw@Gk=I|V>*+VA&mdcMx-;cP=jH!Dc4Mx#1PkHs!R9XkGD zHR~Z0wQO|7QGLy2&Sz6V2;%v-)1lADp#h?4b``e69wQ?)SaL{^UODNp^du1-SDRg2 zK2TyaLM3C65hP7ARyzcJSZ~ZYn|BbSXV$0g zYG&u$cbtBF_-{R5^DM|?Xwm@AT`!^FtN4Qg@c~Xy$Nu1V@?mrGY8%9-IEW?nfxUB4 zxm6HOJd&GzkUd|ew&xr^G>t2mS+YTggv=S7kd_Vr02%`W4=8Ua$Ow zUZR9{!i{)=ZpDnd_5NvI-jdc{3z-&O3H3ALQi&>sS#tJZp*O_&MN&lf{u{2i9pnp_ z>wRYMe7gL1%o;_}Ow{`8?xL0(oFiIDV^$QVCnAkDC{G-Fq0vG(l%*UP&1E)G&);^2 z$U63!@>bdQo8AWLYzb{%qbIlQI@6D4dY4b--1@$*g=eG3UVFY>L??VY>P7inLwMKDA0m}b=_y~V6jAQKyjAni zx;cxbn#A$M1EA#@aP8U|wBd!!&NudSHq}N{xeCK*!AI_&|>gST4 z_Mt>gdt@S<&Se}5Lm!vT=`c5 zn9YbG)>4pM=-(nnpcc%1GL^$0-UGg#nIfHN{`kFzv4FuU zXmnlNf|8vWA?2=!I)t@ug6lI7WusTWskMJ$*;J8>$UziQ1S z`koCuH2HVp&y^?dxisghdhwP=uh9|uaT+xhd1wp4K2OgmIb=F(i2QNFy0&`}h#bjR zaAtG`h+Mo7Zc2iswu8RdETk*QwAAr-pl*IAc>0MS|0!U8k0ShE4d{c2EFP6b8}r^j z<2Tgx3vtgaY7q_Hm!CvSH*2#7LT|N`Q|^D$enSJaReGb)#@Z^fXos`iButgs|7r*A zsUU!a>qKXKVl>ufSr&HVCnnIf!9MgK=;cw$(0x>AY8jap?A2SK$#(yQ{Dh&KwRfmf z?hStNBeh1Ez!t+&qAyJ25UqmGR-?pCFr%DWRav14Xkw_6N1+mGBPzX++i z>;PoK|9xkIR16w&OBf8A!ZzIo`&fc{dqE9MTLAUcp{Tp5C-XCgmxN<1YKd) zFcdC#FhAvc%H4SxGKWB8d#{}m!L?mA*n!8?XAO6(Nk>^K{Sl?+cglrcV1yHMybz^7 z;l2?^MJN&q{iw{)pyFZ#{(QktcgfT2{#GxMyS1humBDU?^pn;PXg=n=JF7zZtL_yR zCf6i=a;#sg64n&^TdrCLb(2*3Xf|H%SWujO8R3hUtGN-CLnF}jaadZ)Ap;JyluuoDEZsw^@!x<@l~`;{H9nrxYUCkAHVg` zup5bf_{A_U{@?I+01q&I`(L+b7~bJT;aVSP6i zrMn-J%(6)bCw6uT8H`#zVjtAai*>o)MHZu7kA*r8u}8H@{>@>9e|3i=J?qNdD7awp zmj3S!tQP$Bzimb2yHWbr-$U~LBWzM|Pm5=gL-ON^%u6b$tL9C7l1xRf?<7-%DFO{# z7rKZudsbC3jhCk15ufido*7JGtmHHx zmGm-JEZd)c&{$c{`~sr_i)c7hgJC^WP;{_Nt}OyP?D-fFNknJTy9uuuaZ0)RVl~+gmk&(EdUECCF6h$V;7IE327^Ycv8GU4|1tz&|OkSak(BYaa$K`P|7ua1Z` z)`IdZu!CcBa1E`wSz}0rT7-7=o?O-bWVC~Ya znsx#ZR2rd7D4)ZPq7f<7)11Tp*ECf_i|{H$+uRG5tR#xutoUn^Z(Ppi5#+V?Q=MM_ zdvixzB?j>E*l24H2QwcCizfE-CkQm?bHN7jPU2$b4h)^MPOpa;_BuF7$=Ac%`rfFG zjsJYw0t)*i*S;g(+}mEgO|JG>k{3gK^L-YOAf7?KtxvXZya6bC>l?5KFh&d+cd3dL$ zplA9oB^&*~(SDmLaPb=J%^SAa>l5&fTsu^;S=5XNZd%rADbCnC=5a&lvxBsJ4^3x& zEw?k5{`FcxbuIJ2S~IhIR!&r28_Z{4IHU$P zV(FA1^QXg`DKYUN`?<-29oiRUnsxi1S0WvmfYmjA{cnQE_!XP4p`!f~iHl3i>HQdg zxw{p<+F(CkV_yC}SxotK^Lw5omXVgVBS1sv>ell<=E4%&l_J>nIar~gqE3lydic9_ zRNgR+<0!`$%BlMMgO|nl$2^)rm+wTeNnR^|{v`8$!RxV6L=H%0h*fPaaTp=HMo|w{ zpZqpLVuE2EeI)V;ndaiOIG8CgQnNoYsMYWV8UTB-k7Jmz0-?ws68!=z(I z+2(N|;-anjucuGyQnU>-QnZ~ zZ_dl{u3rvPvLC0=S-4=W-oynpT1;)Q*r&Bx7O!>t_*KI@BKu?ZZH|{Qr>Y#hjxQ*H z|JplVI7vrlroz1C__JFFQ&3gpqmXn|LbEhR-eSY-0TfT>TSy3yo{L0bkLq+ySafGV zL&>E2!IidJg2tBQiM!9(|LW7@K@rmQo6UH2n9QWi(QPrzo>sebbqe@Z8NcMVCbX|$ z+8|rMTBn|29p&|`V`JE zB&YTFspwcBe?T&j)<6%5!}ZPwr?o#og!np+K61smibt(A8>}m6PA8a6!hcJQ<3PE~ zTJ4rWt^W-gVM%8^)%OAppbooL(4yC;P)`%oN?K!}poW%GP*W4wVb#iX$yqgNDCO@cDhEor!arD(OZJT=@Uej;py5wMb5vo~A0Pw$l9$WF2BCdzZfqFSjjnOTU_T2< z>5ZV0xDVu~xX;LEoEg5cuNpTS^9X^BWqq8y>kr^C0?bnMVx*+SCVJDR4)jBfewu`^Pa_I)sA_`Qy`XMIg3NlfD&|umv=)3qLldskOywtKC*}7Y=z%hmIQ& zR%~{!?ts*wALBq*fMB^7Li)V)*eD#faq*v=qjOx^=M6il+4dCy8;0NhoRHryRX!Y@S2oM~Q`^dC z#QHt!D3ZGVHBq8X+mQ2rI-JAdj}=5p??+Ei&RgiiQ@1Lrq+Mr&ju@Wn$NR>l_#T`r zpKRAM)lWU4R772sU&tbwd5?%p#+p3rs$SI&8^H9%>1tPm)aH|fT@N`;WoXTd%N&4WC^&_B2bmzWkoS@8`TXZGe zX6|C@;Lh~Y>v)`J_m$4Z#p5yM)lz?=W--Iu-(uoL>WNn!jE}V6m-nIMy<<_%*U|5e zcx&h`!fgxkxB<#{-Drw!%$`Lqkv zwKAZ#{wLauGk+?|-ghcYnyV&!UW!R372pH8mNDXQX{?$xU8?+$}r`)Cy`py$FQ&cuG1jz zrzzf|6_9EpwrUmBPSK2h*=x0V__>C0y7N8>8hqx%ZR~HS{J(mLf-r{5D>0eh$!xBb zj7p;+Bf%5f{F>s!X*0-=7G)DEIML9+)~a}oVSJMs`c4ZK>J_Fqd+04QWG0?Y8@I1_fza^E%r#6&eX3-tEvw736Ef7wpjOziW|?x&)90_LirJ%o7QG` zG#_z6=QA4`>iAyV-+p_G%zB@mBAV^rypfn*cwM4$Z4qS3M{Zi=CkDATU`hh7A=q#T zP##y$?-EHy&u_R9fQ_GeLmaqp^FYI|s0_;rWqd{3;7@jAB^zm zl5TK*beO~A^~$-@Sh0qFzE*T&Csa!9Ce3DVrk(sNvA%DF_<}VFLf(0B756GY5gAl_ zDPaa8E=!-r1!2f@Ng(kav*~MhN6$u9?8F&3Fx8jGmP8L;zaJ#M+__c6epGEHId?a} zy)sM)f_qkB(ZDqMVbK$gRl@pDuKjTI)nHJ(Q!DS`mrTdn4|xv0`KA-{J#?GW2og+% zKe0M4M%2>%8Gnj_yTIvIn?(0eQ9MvR&!BN9wRTG|ZF6d7Z1gAOm?PRJ;Z_8m#wn&s ztk~|cS?%-@1#A;tJJrz-6T5Td`CD}2XO7s$cMi>Pr_P~PtkGMIYQ2gR+FmlOo9=Km*~@5NduKI z5%~RovvJthV?+)$+FwCMx??Xd*oBtex^z*O+i!M<(5K0GH802cp}|)jc7NFjM4Te= zP*=3r=*MRE(si1EMV;-B`qzwM>y%nvI(I;pwd$Z1%uWUToWuZ)0dDqK5*YS|B)Rg* z9}D?g+t%G6-zD|om{HWr1Ls`DzUM|SJ=U=i+Ih^sVRXuUF@ zJ7=amvc6XEcAVcZw0Yc=_Wo|i?CK3%i2Ci(Yks7a3t4?L^Z-yN^s=_;P}06GzPmla z4pm%ikf51t0S?dZTOJ8 znLwV*Shn^5=F8BSh6d>=lv%)`&IkJ?e$be79z(9CVYg9MS~((5L#@?*8ZuO zdD>M}_>TW^qu$EXNFC+a=+Ng=l`&L@HP5Rl=Z?R|iIrC0 zhrMX8d23ioOIqGRoX&RcWV(8wVhEQV^_|5XP0741hzdh2V1#nc-d=k zlZP(}15uD|DLOSL%^@@nIjIUTBNWc38nH8uw^V#mwTF>coIIQyXI?NKXMO>8E0&Z5 zqja>edMMfUA0Z#Ng?>NHkfOe$lF(H~1Q|w7z_1CqBk?|(K8Pn*#_vuW@Khw_$z{GV zlI)S#tc}!NM636^lrf?`U};$%1|8396P%^>W00nKV{mSw*6A5%;1%@;jK==211uAC zA4(}KtnZ~7yFTqiw@3uJ~Hln~CzgX+gt23XbE6WjU#= zY*}Ym)4;Z0_L4-?5g*bd%Av|Nt!hb~D1ESvH6OL6NMrI!E3^5@Vm?WMm#)R_y$4zK z@f;i*?Ofp{c8j0l#yb!3?DsKrr}FrkyJnj$dA^Y7`s+6Rc|kdW$(v@`mMIN%Q7Jph z_B>?j+~bpiAAz4%9<~LYbMS~Yt^&?zvwv;J=<&OTj(kU7J{fZ=asbf%$FVU>=As2? zEckx$Qpl!H?t_4FE~^Y8p$pda4yX=gm=8;Cl-J9?JEz_Nb16K1Sj=5{wR=Z{IAI4$ zeETIHL=LY2w}dv+=oK+DaS@kc(q#F<_~ps*K0t#?B(04yFh)=>%tjlxPjFGv;R?R} zf&{rpbBJ7Rcv!0yCgoyIb=R_iIfSVHVwHwB`-C+duvq8U87SF46YuxR8luNX6zl5! z7j%RVd;If8ojaYnnK6z94|Uxm0R{_0xkP?`fYHQ^!w9GKO~qv|w=y%e z)sqJU2ixBnIwaj4RX%-lr@oT|j`sOGu@`)*!{PeV`4zU0S=o3xzr=>ORoyz0$PY>? zhazS0->pR!6yZhQs)saL5?4}O_LjAA+WLP?VSruxymJBD+-OB_JvV#9z1LaQHHqpK z>mZXu(@ziPLpS52{aLHPLt|o>9w8tpx((ePvh`-o)}8kCqIu32Iq(V%1v|88C3W7z zmcg3gL1(I2qmHev3ix%-;o_|C-^Bc~DcTdtuzlXwSjqKc5BFCqa9(f8BdqLY!OttYd8vu5z z&A8h`g89lWhRvO^$pm%s>wa>RMG^QBab{XI2Fczn9UKJXEe4p_B@34{rpv)*C~vBe z@$_~TrK>yn1hB;}?QQx+{9X{?XXl7_?{?X_$_cXDiFx@)Rq79g#nI9>9%-_6C32sB zo}jXiFdByg=ZnU$;lu#mYZUtakV%vU&)swy!SK{&mO71$SGaARi@1+jOvO{jVQ}A9 znI*tK2G{6#Ju`G_-Y4d07KUb{O)Thu@fNzT;ZxG|^*1V1&`lgCg##Vk-!MjsIEjJ) zNJ%GSsupWxD#LuKIe0~iq8cysS~t2`qrTjavJHT5^L1v#LX90c*3a}>|9gaR$74?2 z#eZb}_hbG2-;Y%~4gO5H1BF(18_1$-WVwEQ;j_O~MfFr^5UKg%6|uSyAH8X`M~Zm= zH|y+%-MdFlK3gQBbyn$<vPm5D{YJmlkbi>WK+7%P*ePug&%)!_!0OQOD zJ zPGupC`inf3;rpGWpPJ%#7m{7C;ZX*VD?@|$=3j=^$ z?n=HHCutXI36A~Pt_UTo66Y!bZ*y}9HOiTnMh8)`)-qO{-`9}5ThSnZ$5RdciyfMJ+9fKdBpBE`xF>sV|B2pytX4XxCh|y$pF+{BeqO<}Nb9Qj^(ij^SPxw8QA- zC!AC>_0Hr!ec7e0G?xi&loIoy;KW3P#mp;C$f;Us^4(q{XAHV@YhtHE&;96COeExw^~jQM3ep}G%rbDJ{D?n>v*(pNvwY8I;0IPyP%|Ji`3 z^0Igqg#*%qtk#;Y#cX}`2S4|b$tkrcTz2A?f&}U;ma9gx%^;a5+plw^6>n3J(!`f` zR`QxnH0ziE z2Vt?P*ew0-`L%X>qJFY=B=T+0JGXCR zo?{?VmhEx`cTd5-DG2L%=tJe3CnZJ}q?1q^n)0&XySmzd1^egX!(B7J{JexK(cR(> z)pMK#P@;a%D#|)!_WL$3QKFtbz5z(iiWil*AQIMx5Jdh$?lvQCcRW$o66W=3ntat$ zuUF&vf}&4=-<9wV`KKN{>`YGkHb;iWEu3>!6GYKtqbaY@O@+>e(OSZhe|SDkx}yi0k4TIu$Yw3KH*+W?8vhoN(20)xC1`_#bWXtw@Wj@a$l?n9GCVC)0# z3c+hJUC14VAKRF*>WT)6ei0pHcCTdXZr0JFDwWaygcfKeQ5vov>;8&vJ>uu!^CUuW zc3&SRI>FJOPL1>bpP7`!-yWc0aL`0~X)NnFE@j8dfog>!-UtO@rclicaX4jSEZN`Lv`ZBRzeiEaKl7Ci+4SAMe%m zl*2m2*}xfOsXXE|SN7l($sNj?n^rhag1b=nHx^fp9A$$2z{4w6?GIpBK0#l*qcz7A zbJoX{A-mOX&Kfa28TA3=NovPqV$Uaz>`$PK>`!h_m?POg-37dUB-hmFI9`8(T%>S% zt}Yl&2e{iQ(T(diL|uI_i$XQkM-V!ht0h>XODzGhQ~hIMzPBqV0c`-p&Pl6oV@z@- zy}EryHe(9)`_8bkKXNYq68S@q`g*xSuls_Q7p*M@N{d>e(tHe6Ia-$L-6y8xaYV)1 zX9IdjQ1cE|VlGhUHLkPW^poPK0-Tz{7-CpYJhfoN%nY-mm#9Efm3j-Y3~;27IF0Ap zy#Gt%fnJTkZYzDZPS?|*tO5lGx9eJ`&ri|SN?vP_y#pOvNA5{9?M_TsBrdCy3M5L$ z`I3s3R>$ju=g`8e{iRQ?I@#XZcoS-hFPLm$BiqtXu~gqOo!G$u_pl@On6LS&tf%uZ zzm7vY%-MkN$Y?_Y@I>Gt`~-k^IbKf1l%{+qsup0u)uH-C)Ge+%9Zq%zF!ENq!N$WQ zRoF@3d_$Ha0=s-)o|VQQoC`kJ2~oh$8+prbYvYJDFgol1fc)p4u+OWyGl+*w3zcTS zbHTbBK9M>hSIdAk$F;kphpI!(_M7ZxD!>U*|8-rS#&jRQ*=RTkA+#i65mb_?msNL| zz~W-`4ts!|a3!4P!@C~9$y|*ihC>m0byJxuiwB@LQ`J2zAQk7wj>bANAS?hXJr^sl6gkTM}0(HuwuF>;w5&0 z+Zg3ry`VMXak1k$Yl&Sibf1Qqlp8|JI=U3B+AQeK2IBBL|9PVVzLpNN`tRx+`IQL~ z1cUrHDh>prD%H%+W%`ifnh+y;}2F3cTauI2(NLA}<3D6fq6Ioq?!-!0rN+G*sv} z6QetOuCCz9ehl^4K8r}F(?{-m_HH5;&-o$pDs(ZWoBNY?_TQLI)_(!rj3TC|Zxz^W zJlyKFZ^g*w8DvU=qLNXCzfi0gSxzY&;9NmO6%cV`hlNqt_lt}d|p&=KE)+8 z;g_LKoJX4oq1#l3{c3PGiTzrqxAt2PWn+q;=u6`G7Vm#f|IL4?Il*77x_nFn?I-Qlm z!9*bzre}eCO?TYo6hg>JIXJOU{?LZ_66&rP<70xG(5^`xlOlQZyHyon7?Tm~eMt`O zZ~=hE;2$HKDO@kS1`1-W>uAQJ<1jjlv(`qj?V?yTa zPXD|v-}wyKIK&4*R53xChy%D|tGGZ1QokZBtDXK7%puZAR|CytZO|tAC+wd8>jhwuui8s{N?r`w2>P}vG%kaY z(-~S|W51*(JvKbu7zy4RA@}qOWczMI!Ox_`I!Gm4Vv)lk2~KeMnwG$ey*PI(naj?O zw9<#V;nQTxUDB^(lW<&xFH_1Seo%O&c~ziZD)1;1qahGSRoHp{Wg1 z?cH2ZizIcw)g(%?pG@C7@mzh47@9SYUgj=tBlr!WJTvRiW1PX-D)-9y2dv_)qKl^w zz=Ny}#oZzGs~?#bo=hp{UKE)B zLs*_B%AV~iYQTYH*m4C^m+{+SHgpuKI%c9(Uz|m&MjiUDiIGLl5&HvglU>eO8N~w^ zI=Q?{=|w`hCX;=jQ_2W!0&oaz0rmeD32+BcE4%L6f3oSM)T)lA)?KVk5}MrBu?EHh z6WRGXW!L}W8ELUAgMZC#3&FT@l5XSw>i7gS7GGRfW?y86lGDI_SUW8& zUm^0v26G#E?D<7SeqNG8^1dFVv4M;T8ZjYPRKV-#%^RwtRsIei53`}xd%5AO;PiUJ z(QRR5=@G*iuRooRkpc#F z4EZJh;-MGSJ%QCf5h?9nGNw9(P^%oHsY1w$ZX1+dyt`Cykki+(Zm%+(lQpQq6X)#y z(BkBvDA!AaT9<6Sb64`>Wu3Qya$RF2$t*R|WGpAkJCJzF!j!Sc)=rNo48rrz-Qr&u zBaTKsF1Ks`w5wkO(Q8;F!3V^KIZ>B(=Au|zUD6zf5a7IiUvY@w>#+qAD$Yy#p?awGS=FR~Ot-IG1 zr^xAyHYIClUHYMJcg8WUA4~gZVv?<|K3rX zz})bxs+PYI$~{KHH)LdX?Rdw{_#P?Oy-Si|;_sU{V~J&pdozgMz$IszL%G%`!e5m zHPAd=_ZVixzUmo+bFItI2*0b>^!I2oaLC7CRC7XR=(gR|Rab~s{&40^v0pCbFe#;Y z%qK@Vc*RxVR)l^q{M*mpAQA1~g|ZX6Q`HsQh@H<3rV%>P&Uz|T0_uKc>+IHLm6WT#LRZVuq#1WkR ztsov62GtH)25tR>u52IeZ18d_*dt#Jhbt7gBcJ};-S-O#;8s@4JN8b`ySc6Yb>K>~ zPQ!pLZeUxBXhlKi8HSeg@7eu)TIGEEgP>YC?JNVVS0fU3&$_Uq698+L2N81*niVi)8asjPm#^ z#rWcwJGL;4Z7paN+NNmn>h{R>7eT)ndPx&5{a#qVl(zn5Q1I*KaC*{UC~;JtN@hl! zpd$$@VWz%K4YIrZpg;mX%@|$Hg&wd4aR}6$ZYoe!2PZc zopz2$ce%|&lag%di$t+qy2_F#F;v%0o-d@o>FPhoj?w|l{v1ANVaGG1QP(|ZeKth+ zvG?@*6brNN=W^n#oy@C#il2 z{I_5Xb#7@?2BV~Y8cqDl^bvMiC~{T9?bKhcT^pJm?fvhlP~~e0>V6y|_IA7n&__(( z`?-HcsCq^v*4GZ8AKW-a7WvG9Z`@bNDKrky&olSLX#I7trRLLxAYOzZjw*dZVJ2g)FFR+ zun%KpSbQE0_N|_5qJq^Bm~G4;9rP1PoSFv#EpEe|hI+6d-p0j9?VO!(?Ef~AR#t=u zoliHV3ShsBV~Z9v?!iyn9%|2EgNm$j_D9MmG#{iWrw-W$dA9aXSJf1M`EqDXI+@iY zh(7J|$q|dLnlmzp5q3yud9zc+QYw*DXq{|W4)L;M#VcW>-pbb=t#XhEE&3gp!^I92 zEbfk%fBI*mg}@kyt20cj>iH`2zJBaGZ~UKI?na zZUl4Xg;|+T=iX7`aOA8qEGOUJUB`2_{imR~Sz>?_X&1-tuqE|X3V=>KU@IBtdRbNl z36tBL=a{8laX>7c4KM!s%hV&X6?`4?^_kk4W4B#8$flXz$!hM|%_>O-d!}g3KcZ5V z>fMBtG+rZlW;L280^8HOSpqiR>`~bX;J&$Ct~u#|-zLVpf+lg_=*!JduF#3Hjf0+1 z>8Z7%Cka8}@x_bZk_bM7|G*8>#cmZb@Sgo1qBx2qxW+%)O3g70KifB?q=-cYZ58d? zkgpO-Ah{$6H!5ydV5)C-cBUU0k~!7+Ehc8av{%|FOQLbBpyyZ~)++)~cf+hhx4RYo zHnJ5sUayk>(|w_;yJg5N`Yqj2D{m>@KT7>A?UmNE4trxt$x@n9eVpsoU#x|?pv8$LGVwG_oG;vTn@&8+}F2M9~Xvsk@&4+j6G2pd^v$$Q>V6MuL>fsdt zkV41%&30D1rP>B{u1YJcnc;k>Cb9S`#P~@A-R!bD{U81+N0-L$dG*d~lIpGCL{Y=3 zLV(v~wU^-Bz)z3APvz!3PQXhhRw1x`JWxsij{|t^y*}<)2Q#P8{p;68@Jf`3vNX4S1deqJK%;qIAmz&tVv#v|u ztwvo+=0XmiOa&MsbARR>t#`+YvFrH4DApRcK*Ptlxb?kO+q`&{j6~nVz84C=4g=^C&^(Zr4?a~)K z={(#(W&8W8{ZC@>$TzKHW0m66$JxcsgUmpyH_k@i0|65`YeD7uqV8Io{!C+e$oB4y zO3Z_KW>va>#KNRWhqftlH2k2f86q&dE8F?$&|^n<{e}-!w+uFv{wxZ+Kf46uip(1D zS{f-f%cQBM9{T^8y(Ejr3MZxgW~}MxvQAo!C_HXi@kf?3dN^6Wm0 zAC4`oAPP7G1Za0eu84gvkUje}=KJgSMI#R5=7I)&SZI9X6kE1{Dy_>FzW=Yt+=R{# z1qQEEw<4%XO7+~TXGEAJ9?l+wi*_I~2PqGO2B<`opHnoo?ZYr_4vUgv4s+hwoXvMe z;n=vs1Los3?5yAP<0n+mpn;KQ)71wEQEgxDoYkzrSLYi&%(MH^w1+XZBT5sIFmp4k__EY6`H5Y|!C-;D z)v)H&FdO6c>?Mr-kMzK@Y^!pHMRDw_5GLx4S2ft2VZ!$8QX|nzJdxxxmU%cHXjUC( z1M4ghY+6Z+w$v_Utl|*haZA|)QWqo~fPE)8gUaNcld)N;Q^6%F8gpfOSJ79S1~~%} zmnieBkcYqOjd4x|H#!=yE@WNxcWT|__P4x))O|8-qP}Cjla*|}mE>JS!s*jp0GR-x zHsd#kG|3@PA`6E4c*xF|vGH5In&=BR4tgbuQ2#~C>-ZcR8Lj?7lLhD9_^IV6uR=Wc zNVeBb;x!MlOS}QJm~*XANoPVDy*rs7!W|nqw2=)df47^q*E0t9d(72$#&_2PrExoX zS-m#MA5}O=BP|HKgY-S*;y5R?X(5|RN%4953SB`CUau-0rkBU2ya4syikC~q%*S02 z`6ad}1+KssQj2&IE`)IgD{(h>f>@0GTZn}GG-wd32Mx0EPc3M3~2Xxzkk1f$~26W!D z(KbH9ub=tEtU0e(|MDLzi;@KG1iPs5Cw62Uh^)a|y)`m{FbaZKrbm zBvq`_TSn_H*VD8Z3cL^&YWGcG$Z4K^dt$0ox!_`vmTq;|2R@3qeCcD+K2GPnYNGAbivA=y$9Tk~#zWdL`P>p=Bh z!UIov*OTnuLkSMf*;2fqih#wfTIzhW61jWv%Nf^)s#ZI1L_*|@mK zreK(O&-nH9A7HCWsdvz|8@x>!=QO{?%GBb)lxtA_UF{h5o2G6= z-vRVb-ytQpI;_mjfrN=V)7aU4pWP*sy>RZen@Iziieq``(lan#uhSqHxkmGToV%$HJ zLguT@;pF@FG`H)lnyC&r9{(xVFHzXBJxExa!hc(Wxh9>Pt9p7xeZ#UA-oL01s6;!B zzPq|6T@%{!A-Gsd?!2g9dMpV}YgW19oL?cICkvz`3cqJ*HokAbyvtt7u~6^6|Nkhg znBO1uRh?l(ihsJhl02T&#sc082=jVifZ6+5nl(Kq(n)&#RB>g$7N^)z*O;XNZ<5V# zsFgf4F5Q=eTS-tORhWck!&C~`=iI*kczSWFJ>ezzydH;%9=|zzfPNQz62v>m4 zjWQx4)>^U``!>jiHT$K%&fhkL{zaS$1;K>8?EkR$R?%@bNw%mZOSYJqvBYFCGc#Gt zlEq|OOcpaUGn2(sVrFJ$X694==X6g`PfyS5UF)8;^q_+LA~JJFRAyx4j`gMXQkLHC zF`8F?N2LXGVU2bsO%ZmOX1kMyHbyLro!w0{5TnJ@G0gM9RJGbNLwG5T5h4E3!t?2f zd=h2q8ivFz`6y(#Mh4?_f7w@^TC+7g2XOab`T=fw_wmE@60LQ9y>lTTdzh(4&`vs< zqHMv>vq7QSC_9fi0`2@NRU#fKL&X^0a{Q-P^3B2T*Z%Qy2_MR&s?lGb1c4K~^m(bt zFVG2mySr=S!!!cc$ZHWWAjSc(9+#FZaZu7urS%qaiG%nv94&3!W2_f~0}kZd%XP%< zG!5Hk^w<{#O|2`~EGB-6kMlXn?g-8hf-Ts)_9VAEgFIt&aZMT}Zv65*G|x?U0wDGo z15?|tJT*SG^6gGHF62M1XE<4cF}~jBzZ>9jX69|p+0)3vzWHmP8pazCis_YVD}ss? zv|3)trwJ(v@{J9EyG##jWqbZ|(?tiKw04GsMrFFen8J*O?5pzDn$7YQsi`$Fae81h zP3?7&SgZ0&Ez#XRL4hj57rO<8seiXGGUwwS(7uJ49MxvBfzaa9kH|Hx~5S)*&faro5qE1m7z!0|Kn1eU+j>2Fap?TSL3C`a)C|^{OOlVXB zKf;lsGN8O92h5TWzJ7h`L-O$G|tHAMnQ zWG~&TM3E>k$E8T@WPt9UICS~QUAf|o$Iov&d=l4BJW)JE6G5d33tnN$`fNtDph z_6&Xm?8TS326C=>#jlc;@+J0bAdR;Lnb0(M9Nn$mFjNvE0`L;UeKTzKapo6#)OJU%IpkzVRy5MBrh`p~!6hK@=SBA=} zrzgBWz*hN3x*;Pi6Ro<*-JF}W%Ja?p)5|S4MWHpERefU!561@?#B8BWl4uYsoks+j z_hSL^WvA;K%U=1|V}=hZ@t34LONE)0sDr9`-ryfGK?gaTmqvsbYVLNUhg6`0MO86G;yU!pQ* z#%xv5fG>v743{XN`rp<%f$7hYq{r9r1J!Jfw){}Tnb=4K@(KUQ8(HKCVptUIi&FYK z#V&|`kTYsf>-uu;eu=cbAcy?Rj3{!3#X(3SsWhj#CQEB*bZ7QDB&gUPwK^ zl9dHIf5PYV&rAN3lJbrej6AqD3@mBoPxqhP#yEC!6m=luY$A$Tci;Xjq;A-F^3&y`(kvtkqgCr8&EUs11keJfQtj$Q#(69ueaa zH@3CZxQ6xA@rpck_Z`~-=O2qcP(p&ZEAxNdqJGQYxJB(UolPY8LuSdnCw-5;albqk z-}F(9y(swJ#^M(ia%(LqcO}*<|6sRs|^QEWz8EB_}W?VtW;qy8Fym46)n>d#lv3|Gp;~1)C2Whjvd$|m7xeyy`f!JE}1HY8*itxZS(K3UwVE&26`|mH~(m@icYf49A zKzplLw&Ug<5G{Df2~`xsv+e5j{q)1udT;f9!b$xua^_3EgS95AS8Ek<%DnxT_TSrh zx=(7}umLrwNwHS>I`h8)d?)J)cVIuPIGUWr{<$yz*)@e|ka?<-tzOm0c9y=VPoY4^ zNr0&0f`dxj1GGU&Oc(owRgVvl$Uqjbp_u#~88imc(F3u{_oCo<5=YkH?|vn8?`^-9 z`9^&=mv{4UP3pd%)v7N-NWl6pln~sXekr=5_#gYLLJyj~_~DkaI?!5)hBJZG`wNI_ zT=kQg>hGYv!+0PoVd;+$@cOfMGKkI8R@|&?6yJ-R$Iyf4LGu$%d`(JWy0Fcw4ETdZ z^r$Zke;>tv{zsq}RK7y>GHf{a6DajSFq%z*NCZS!C&}O4!yWE4WDN2G)6aVB50es8@>*wcwB`6-uzI^#<;f}q; zRuo(*uiNo4b!@>UEUk^-7Ng2aEb&Pxf*8)@kkS8tgDI32*rO z(I$tXY@HR+euGX-?HsyOK;8o5X_!yvV^3*maNgy=p$IQ>{e#C=rptK}7~Qub#&8Jg zAW-*Uz@=;c&7hMh2>xk?wD6rxy0bLRdkPO4;t$a=7QQa&e`6v4PwCzz;-JZz#YOd^ z%h}kCgqfDe^7%u&PxPDR-*U#I|CwV*Z#vT&5Mu#06Z* zahCTAiTu!S=A|?mp(fFqaq)ZX#Tc^e(^umu-WV~TXuCu&e74Scoz6zmlAi-%S1f;> zA4WU9rCsyw^Nc4qC0?i8KQn>>Iv|ohpz4Fm{DS_^7q^igV0CXR<=fFFOWp>=M;rc5;gAA|b6u^Wb< zXY}tI&hUc-%(nAYsgYs;PZT$Q$aDyUl)XYyXB++(gYpk@|L?aGFabJWKLn46 z05b4x^_dPazT;c+e}lr`oBs7lM>+&YUN*P6>lg;CZidltLh>(o(7ahiT|oaYf?GkV zs(S8FCbSWL5Omix=Eu00kA)Tju{LWG;NDS zo2dWt>wiN9qqnQ74o#h>JYJu<< zL8tW%oDDQ<)q-6Ve}DMD#`0|nsNrq1+sshaMt;r)K48P;4}>$A691Kv{exIL3Xo^xpqq&i^3k zZ(ZMz18LS2Td;3cwXw@tZeu4r_w=_n7;|{YNvol#H5QpTzxzY7>CIFbGSyWbQuyesB~jK@C+aTg1!p zV0Ecs)(pTiC_t+2WB-8uuYLMQpZV7(PADMF(ph*-Lm?T)t4xMvIs!nDV3T-&`1ga8 zr3>m(sjgF`CMO0^1r68@!6Oe+b)0z*<$uB485u~k|7Vc@UpL6@CzY;93a>6i{+C~R zo}9i@Q%cUP)*)G0a|d&EG*5To0(N$(PbxYUu>Te)j(^<3XDMEuULQ5y6SNAYZE}!X z-9FXIHz6knSh<|@x;tIq{yaa53`*^`6IRE4Xt0VHR#m0s@&co%@#^+!dxk849km}{ zRm5U4;l|F>pzg#5?FD4KEcln&+_sf#CW4T& z&^HgRuQY}_Kxb&|OwY)&kD!F0!NbPG0)pF8L2cZBD+1JoZY&5|XVx&!EloBQ(9>CC zB3M{*fj~Uzw|Y1ax96?57TYTA*TX`0ZIZ|eSd(wU7LUJm%s$ZTmi2_|2pdqBca+rW zZ6vfg>A!s3J}KCbb)e8^OV-ahGee(&X_Gy+iKLqe5P+0{1avf@Yt~&Ahl7o6+kk&V zg^@v@A@4NXJ+|4lssupbbTbeAY(x6?K$PcDn!=Jk{lz;#zC$}$%GTD|jrFBq<{3i; z`B=(ubY0`Nt+dhSKm36mJSwBw#QJOaxveJ#5T`HcTW6`k4eES#G{~uzM*qy42!q8O zVtn`CzwF@EVaj@k-W1#O3e%~{A=3dWQk0ckeaA_z>#vEI7A?>oY3gs4Gwc<4Y zqSI3!iSTi4m2sCL*81uqxEYxvfe&*M)=51aPIPI$bRu!z==f7zL6dAh({nwRvHG$M-%*&~vkiD-8J^-3jN7dC2dE6H$LHLMU0YUF5{g z2;7-Pr&rLpuqZcm-cVc@TQd-$BTq8=qCPf2el}fTt-mz7@z$?*VqowG(_#Ic1G}a$ ztcicqz_V)e`^&~^SWni3{!b``qHJ``She)t&y{VV%4Am#58V)9gamtrea#f5BX4NT zi6&V|G7eZszIhA8C6h0e>%9ddHNWH;0f3$?VffyDAj4Yk-g_E3aXBx5?~zEzGD)u- z>U<_mK$V_Py$QXLntGD+%cuKSgnNZino-XG@QodO-Z1Xb-*JrOw6lk0FM4z6vCvjz zQhOPW zE+vk2by_~7&_Ak#w`i@c#r3@8eic&kT>m;dwmG50+!G>>-Iq?&QW z%LJiWqrJA)(o;e^IZXGBm8PxY(ef)!;T$qj|I7y*nZu*b&wrU%Br0V$1w(qL zkM;(ao7c}t`S!at*@>oB_JH*@9ADSZu0SyxhO8UBzEP?^im%0 zS@c?4igw8s$8G`{53>(%O}kn<8IOcENA~zK$M>6Y^CoZ}PY5UP@(7>Sm(bhP=yO-$ zttVUd9WX)z+r~wE=nS0AXWzSpQ46>Ga9h~u^$E;nAsqp{5xwhQiO?P|@`ITJ&A1 zfMa&;{J|&T)P(I_2J^nMB|5m8m^~trg{TRcu36=3yIF6)QZhAvnj!Yct8~B*)*(~M zHnu<)ZZ#r%>4>!1RKFE9R^t1e0!a0n{GDA40iXFP99Uh4QN7eVI|3T-b$c0m!oL>9 z74+U;fXwWevEwc37M6r~?)A)Fr*8$0lL5ZQ`;M7HFZ=O7>p+!}oCHnXz|(wzW1!sT z$69yV-vCX{eA2C_;jXz}(i@-q(%LZyYg%Uwu**R4Kl?`HGOIY~g+J@Pk=5S8SF zqXNt@=0P-S7pHBgl$0%D+~POYwGS7Frglb&NX{-U28-+WhKuVlR3aq35<8>NU@4TI zVLfHOb7Y%nVwR!SSajFp&zD=F%lZkrv=&~Uzsci!e+5P)qE=)ys(5>t)BL_vfCL}< zrFaRAH9H!fV}vc+h&TsWc^;DyUNZjvxmk?8HEH&IgLz8kVA9x>gNA&&}F32nDejg|BnRWn!_icK0?zsXLKTHW=978W3S);0y~ z*qnVuD_@X8&xx{A*uy&(Wy)M%KTvBanQ@)c4vp<@m_@TN2_i@Nb|Fp50gKIba$(o7 z?ws*HO=xxADqi?#Et!~mzCIPQU z@?e=23*DcSBPuyqXEB5#E{+x}co24iMq=1!DFm)=lmboXdG$Nqt0Di;0UOjQKSa7u z9d}-ZkE+h~e4A`q&74v6;Mj4A0qF=ywyIUIbbm8_)q}W{eFFITWH;H z$6`7G)OBONoeJ~Zi-2|>#`izmeCFe&6Ycy~n!H|S{p=NAX zz@r1V=?y~erkj5t6o;UWlJlA8F(Co2yW_bVS+&9q?iE40ugO9TbkkJmv3x1tdjb@Y z()D%vwWNlWjeo12HYKvyRGXtl$iIj#wc-ID{dje7U=B)l_4dmIhrBU!9POq z;k+by^V}RV!cM%rx@xgpuXu%x0mO^mygvIA^=JvM<^^o{b=1*1OB&AOQml33X!aN8 zNWE>{Ss(cp+CI_{96Gk6THKy1oh54nWAB_+e(aAP*y()u!uLopA6VCy-QB*iBflT@ zqZ!Gz+!c$@yL=+gTshD79pkam-TLw(1^8s(cW)Lsj5)f{{!T%w-2|KaK7(q#45`G? zeSCSmczu95iH%9GvXN=dzhNt#sHgg2&j;`1#ftq7e<+~Ip8;B&59*k}yRG4_Gs^}) zeXiI24eks7diaQKgx1xkhfk!n9+a+k?eNhaC;P1X`2B5!u*NNX_{f}jurpjQ&SHG$ z%aFN!>)``7hO`+lI|PFOejjkFG>(th7NdRor+dXvtybWg4ua$K)h!`yX&-KIr^I3N z#dYbQ@Ad^8%kD)hvp*t#FGhOid&c8#&#zav`fN0D(ZXb*j)}`O8utV1vsZna=&3kt z<(dV>=y)8UKc$btZ|yrwLlr5yGHyUq;5})}faY@qZ*iuG+!rjJ6eLZS`UY#SB-x(L z(8}oI`o+)0>0OhRUJ##dtE^+KngH)#H`M3Y2NH!HNwZ+XOL1A8a#l!ZBEa+URRAtt z=xHmF<9u&vb5=3T{y%Yo+;YzZ=8+6^h-#qw-=vLpy9`A7sbddRDh+}qBF{hmz;=mS zMT)3EL+EPMIPrSpY#V0lRKq!E`Nn-0(NVN1j9j2Pnl$=7&CQwJ(eo01EUH1%?78B= z*#q+i{|k}<-OVZ7sV9xAqr%Xf9&9L<6oD#WOU{AgJKm#l>HcqVWL}XILh~{&-;qj$ z&4opwCVG|ej8-?p3&Kx1mY+RDW%&uk={LC^A`jK8yHhg0^R{rIcOUFFqqq*YQ{1-i z+LCuKoG)b_RkE8}bI)#GsLiZZ+MR?YtJeuhC{PNutoTWYm%-Iry6S1b*1KbM8vwkn z=yJGbcCm^0E%V&&Yj~f7E%_!$@M4YnNn~C~m2kA&+J*MAg$L>)1lO}74whRGc~Ni6 z^7iN2Nasv3X1U7l%b)BmtfQz*`FR<3>=BOqb$t(AgiW|S#Hbh#njBAPO5`zS`)Suv z+XPw8NJto+^$Gog#`vT%xZH8h5sS7X`pUTq>cD^AGtOE@`R;o<;sjSohG3N<%2V-? zq6J9t$3N5x^^T*lN1PSAo#7ccl^q>TD0)U1d)45&-4W!v&H8<1U-53l(&Sbyb@W|A z^^ahZr|<3YR5}n6PsQ&nErFnDe?>6$jBSh5%raTl|Jh6qo;?dj#T%OYIfA6J5pHd5 z?ZO;c;PF$I?}pBH5k^ItQwr1@mF720xSLk?y<9dSwyfs{JR?pOCl$O1*o`Urq6kvD1 zL3bH)-&U=WCZf$$wS0O1&Td@{5E%ExlIF|8uql1d;CbZ4LQLzJrqZ+P?YVU#WRM!) zWej8e8P?ZrmfrTAE%04yN!lgS=l ztm!}hb57tt!qP>F5D=Zi>zJa$+}TaJu6ui%n@H5-!?=lc%zet`1N5O{an7s7mXWO>D)oX&pmRftC(oB~@j!e@Vejs-_xjMw)WfH_Omn=P$z(dm+oZxX4mKr;n zo=Gr(WQ)KF!yI3%*Py~JqKA)9Z$|Rcs1hDFtYQCz>%`DYM2F)o8)6}^fK_wQ89U1d zo173bvcNZI6~F}wY19ZuZui~>o~DFrBuDI*Z%&21nlkvd(MlNdI2zTogUd#;?;r9Ca+Oh+ADZL|avxQA{Fz1Z{6wa`MMm`H2Vlq zn7h(^ZNj<0v2PD-6i1bDm_H=?zC@kZgMjWw+P#39(f$Pd6bWRi^_>5HY}~r}cEEi~ z-jg(MbmzLuEo3XX!=T4B115~jdxAC9>*zOq--Yq$CTLr3fY(F|r;$PmrE5*fJbz=P| z^$!5vFCSXo>QY4@t*7T0a)0;-{xlJ3<#NLosWuQQ}<9dn{$T za$V-3v|xIPBH`YRD0ab+pSa zOmcaVD9>cB7;qbAX{mau$%3Ei8C?_R@+H|%iT|-9!;U;**Gz9<-n>Ue27WQpMGIk3 z68~4Q{8&$t=hJ>nL}K$^8edB{ix45!ymYDDZ|Y~_kD*F@v*FnJGtrSFJ%q6J`jdrC zKZjT=zOQ|^0u|bFoM40n@*>c@#f2u%@LypmmPu!>VF{ba;1K{aRPN<%^de2L15Z{m zl~8cbYxeYw8_PS=-)~!-Ia#M|QaA7eu*XMxejUT-md+Vf(~tY2oVc2Bvf)?%AbjA! zte^TD7l1g?;iLCLT;tkSxO3gSzcRPC(_bMXHGctk2!@jJ-ZEtxLDFSXhc-LMLUG; zws@Y-6RYx8`->)eC^g-JDu6v2eHB+{DaWyCPn&}x`r}XXn4tS!X7)zj+T02_Oa#!e z2bdDCL>DMzdjL_V}QKaV&R;?ZT4Ugd@!T9YA*oKOFFzoK9Y> zVJs2JLCuyGZku1Jy(cc%cVm~v&Cb34__H?QPUw5XJz7EEN=U)ax9B9VtQw7wa5$B< z6^vobl}PEG&D-(OXUTmFpZDi>{_J8W=Hc6P?)Luh)k@*{F_w=~TK0pYFW2R^7lB|- zMmrJ|wilX8^LK=(Q-z4#K;DX-%wOXO01_FU_Y!b!%r)PKpZ%!0DYMxU&#vfCFxQ7m zBK0iTB+F5nXJ$(57LS1v1k=&q5^s)K%}t=(x_Sz}M*M6=oVqW_DRfxCZWX7+Sek5) z06g^5=0h(rB^Xcfk6FBxJ+~9%>X5Oh=gW=h%j-it0fT ztEcmlj!-mYuYU{`kw&CL(TG-s?hXMMR1sC z$`_6QJXCMwpShVm>8V%_ejpw)Zf5+77tZ?mnCGjqOl#9g2_u=BIucOZtg}ahh7q8h zhQ-idVe+GjYPn56F9qlcF&C5aHBs+-MZL64VFJTRk*+OS2w60vgG%E!TeX{rq4g%5 zn)-n*-9lu0b)KpQSqvQ<6m|^xnCYNfrDOd;4~$gQ1Viy+{Se^D0KP1TppaybullP> z-dS4i$0}MZVKlBG6-^(*8kGWea9_wt5u^$WvM*4Nc<59a?F^1!&?Q42Qv92KQDLsS zn)HF@e%0X)jsRzLLg$au^Y{}mcFk(coyWe5WC%%16=b%;6!o9OQiHLbj=SkH zSAZWv9f#Y!@l9|0;(BQqWhU}N+Y8w8kCu(OfiH}tRM37@UryB+?XJVYYk1!ZJv^WR%Ink*uL`moo zp*oKbJ0Q?iy8VQHBOTI|k#k*8=(d(!uKW=EaKEAYu#0@^Nk4>WPv(MpjLZM7q>(}^ zBr8)k@TXo`ZN?%=2pKZ%cqY}Z&Zs;FumW(pln6SdIbF1u11K1Q&!=pdfN1R@WKmiO zj+qFiUHPTf6t;Gx!lQNT-~h4EUV70%oY1)rCKX!mkcNeDu`9E7hu_hw?@YD_oE9H0 zXWki&JK%SURf9u9q#NBIL*gK0Q(Zg9$8pIELK*ddekd?H9H)cPP&TA3K3=!ygvc`> zCRw+`0!b#@MLt@+dKI<$76cg$%K>IV{-)(0BP>1PtC!%1{Ay}~|N!@(W%^=ydUyRcRsnyv*>WQ$?Z(BGi z9Bia?)`2mTSJ>#x{%ihpc@wHeNb=dGd9r#Dus)c(!6_+00}+5cI&GtiJDC2y%4x;{ z(5}D6nhKb5b=;?rv=I-JJGkPjRe}ucVF?&C{qpN!7rN0@Uv0ykZ3?sBa8)oLUEt)2 zH>h0|Riu`N{sbFPqiWC}GdqFMtNnn*^M0U^)aLP->d@7;GGlsC<+ z`4b*wnz*7=ts9`?2hiOYIqGo!rXe1ioNx$Cq(-CKdB+uCAy8Cx17=$0rKR`rF}f%* zCop6VuAhqL%m)z3J7H>zzC$Ukr^)-+5G#p=cX0_`oZ-g)YD7l9An#=Mc+)n9nkRCm zJeAJjlb(K(*&&WMbDs;bjOO%=P$4pU(jQsNo#nJxQtDZCqXDOn*FUfK-vP@He~v0X zj^Y&dJTvFiUT2QrrYcm?MWe&AI|-F#yU)hcRgyAk4D8(x`?=VR-OG(nDt<5ClI`sH z$-{QX-Jkpz{+>|8U{c|8zQWT9rxl!y2!bb!3z|8D8OWSsK2;qnK>XHn)9Ngp{zb{d zWZ*}hrl;YEq3cE^n<~N>M3K{IbSan22zf8A6vPEH>Btq!WANr6kiI~8F`zwywYFwM zr_Fh2(bDs?dR1R0?S<$K^xl=5G_^MAI=i84pRrEHXP4zfQYO-QrM8>#U&u^ouZAC1 zMFrmOv40Yt%(?VnKdX)oaQ}(ld5Z}OAf=VSnwwdlU)BxH-2o(yk`8a=?3k(RuF&%QoI+f+R+z&yzxV3Kk=MB4{NGyy zj%|`R<@q(iKfY`nPlwHYih8>zVW4MVA(yWxDWPeUd6&*iq}LG>-6fApRNLYyr%}|< zFd0Lat64JSyyBEYUULgm6HUGGHy#0i0A3 z%kEpt)7A0N1Y@VAQ_P|yPOh31i@$-JU=nTNu{42R(8`AtG-|4B#%~OzUC~o)iTv=f zi(JG592#->!3~Pt9&AF+ea5w^F#oSXIje-nAC+o=y=62d*EGaqVqMfE+ zL*smc2Wqo2!BTRrJLNc~NB=AwDXd zKrkjUN_ZyY!QKMD-31f$)&Sn>56S_<<67cCk&g(|P$szk$l>9-zp390=?dPDgRFF6 zG8?{Aa|QWNz7ao-c|877CCe`~-x|ta(liaIeG-24*jsgAnd8GWLH<$ub9bT2jE(J` zBy-XQY3jUew9=}s%yy~z*-QqT!41B|rBrTXcVYI*7_e$;dB|7etQ-^65F)~}*k1a1 zZmKyw;aWtPXr!@hMNEjEw$VvGwr*0p-LseX*{&@?Glx+V6S3z-LJ@3(o0LX+7GQ{p{7o&OE_AD zmZiESRe@@%$O-b}{otXHwrJ7)NjBEZ%^A%(P=yNqS-2mO`Nt-{)`4!g{gt?PM(GVo zuv9R{mFW-#4;Q>>2ES6tc~7$rrwYrS8e1QG4Ty_#N#ftPiJOv|DnBSsd`FMZmQ{d7 zsVq@YvFdxg2JB$QMgLL_$+_O8q$?MA?1gFe9K34Bba7&}TtaJWYpZokWY=uC6SdT| z=1#JzHQ$!c(4pH{pGxhi9A->q(BSDD8cm#>s>IIlw87MMbEs3ozxSAeFoN?>ewM3R zu)!&JV--!;R*@r=mKPer1VGd>g5gu>SCNS<_@SlxjC4;Ey}u}ZpITxuXGTiTb7|`- zTMOO~9E>FZEm=1n@9#+pQdP~WQx+Pi19)`eBpq7S$z|^fc-hoZSN#S}%Rh|)VoNjg z5>ArZh~?{xEmdN37_)4Bt31DB1eza>fE=Zka_6u*pNkS1gKS4p5Iu|B?B3EkQ+j8c z?ezx_Cv$Vj!ovkGq8V0;zg%U|6I1$=B09bKX?oZz*~Gz-z}5*RiYfXzj?sfy;oYtU zb~mBew7RwxeyC`eN+Fb=A2x zE7-4tm~ZZT$CGI;Sst{Pm{P;k1ok} z>a;NTDRit8ljIt76MtOStJSRc=9wfXT;~XoyBZ&gKub}%Ho_$d5Lo9fl{)si2H#yt znkAc{Yl54P{w9}?_*8zWTPTa_K-OtVRoPdvgo*n~@lG|n1LjD_v#vIsi4Cu|Nbm7@cJxzk2={z6F@C#ItWk&v?X zAQ4W}?Ns^E?;Q!H!te$>K5C}1bj`HcJts=U(!F=QXCQTVeXj3i%f9@twN~xk3YzBh zNRHFt5yKp)2t6pbl^2`O5L_nf61k{pgWOfqyi18mDT6OiQ;4G_&QuG(<-vXPvN-& zlmSDf@h30XDeh0YC}lM~V1vXvfZnc`OE`0Ucd;(sE94Zv81Gg z@%Eh{pI?EnOHLE$(XFNNu>LMJ5z~dWW>(?D51jBZ#7JOi^2w~w?n*9^#=zH)OC%IZ zW$86|Qa*40**3I}NTVdP}(`MkK%>Gz5yPgbh^m-C()5ns-xG_?%^zkpVjlbiTHbRPygPAs!XwMFMn}&iT z!1C5zZO)3%)(hK;j#6C>_X_1Yj;99z!Va{W^t5bgd9xoF(Xn8nJGy5dT z?eVPmAp2CyBJb)kdJfmdP3iJUoxRj-Q+O@pbwEpT5ii^c?WCZtAwzM#bt?Zyc0`EB z?sQzDuWx_Ec44dwCDo9)ATso^>G;M$XSW4*pTl7u5TQTKFo`cc{P=uMwqPPMWWz*{ zu?8Kh_NUk~Yis3>#aGfObre-*`m$38|3KE_+g$J_ipcGSDl6mtbU{x=;!6gBGmWSB z4g2s`13XW0A%ie#Gz)_AJ_XOdi>$GuLd;ix&H7{C1Gn>zEbos@4|^3*<|arT1G!AD z7sl!AaZ$_FxL@edldEFXjpcu>NLw~zTWKmSJ@@;2GCIduSdMF)VxJ+hS`}Au_!fTC z)8wZgt@ccVbzodVi5%ULyo{TT72-Mfe$~w_B(-;s#L;pDEj;STpmgN`DXildvSdj9 zgjuivDSLi8<~bojw8)c#!j^JkWGdS)E9?ixmZa2E7Y~;z(QHim_ z<0C?#8mz(^_MBshiNyT(PzWz1ycgj)iu7M!*S{}XitX0{0+`_d=b4aP=Y$@u5XjVS zG|-kU{uVB|YR(KIH;3UtTCTamZroj=QA=wTlyEbj5gBY@{F^qLChtn5^u7o+@^x|# z5;Js$bhG#{?t+R93e2Dn#W?ly&7DD}8naPow~XgGSg=#jMhPQ*ir$E~qH&H-AMGq& zsiCB&?t~0^Q}`;Dxl7LRbHnfCHLoR-rYcm(%0@|;e9GNa0V`m2bGK7y1&dlhoY#eT_i5Pzrk>5_^Ne_Ha#iu{;%5>@Ua4P5S7A(NH<)qvQnu>o1$x4 zh_Ux`jx>JrV)eg9#~Gi>zutIYXZ#k6_ppY6)_t`kbym&dD1;~+`1TUcL1$({@Xoo8E-;Bwe)V1`?NF9fV>C*T%+IThaI92g(|@Y-OBTX+q+!ZEHrcS7rbe4;>R{bhnP>W^>82=5MZ^9uS%fhs6D{2<_SN>!l z_3YyUrKTBb;B0FBAa^VPpCWMi8;dh?F!-+;N!{M4YnBRGJFXgov?R=4+I)nuDUO=* z9jCa0GdLad_G=aZ%J;KL|8y0L^7k;kWVsAHO=mVzR1b?siT)|ZWV(otxSKshBm;A4 z4c!O$ISmY2jIMNxr8T3Rq8D6}YYd2HYYcZMT2~3D;yWAasz{UhKZZIKsvqH|_T>9e zSgo=*o6jaFA?8&tauib0j(!GjsY_;yutt*VbbdmVqf57UKj`zB6{$0a(^X^x5AKy? z-R{y%$aPa#$9Wt89erIBgw9sZnlNPl9OL-vwdSp>n8Jut^CWy_b_(^nP;;yOU9jtKUTxOFWfg6ilzF7t9Z3x#~mbq$))`nDe(M zb*;7OuOl5PWok;8UWAmski(XKop5!{f5KxE=jsftNFeA8aIviF7i>eAzwdj1X2L?M z!2D{-1?X-2T^#F+I&>{Ca{LpR5~;tYY-yiz0=n<=fjaXW=!Z5M3G_po?_vmcgroZn zMvX%mTI~P_XM0I4Wff36=IZ44yu8Gj=zwEyn|v93O1_%h46AEg!9qwC8X9%5;dc7Q zXiFy2(3?e&Vpdq%qspb_?08~mP zJmo`RewGnbzS51U@5Abwm}1<_92u#+*h{svKM9;p5>ZCQpFraVj&O+jB=GwKtb~q# zqARNl+g`m;=Q0L-gVDmmk=hikh{DBmF_)Z-50y2+a^L(CpabH?C5!QpZ6hc5imLs!0#@L(g#wFtqB>ISi- zxIo}t&fxWB(riLSx*WDTvpOG~2S7I`xH@Aqo5N9CHTR$dz(OUdgPqvNH}e!alqp9H zxTl;uanqAhV~#b`DPid)9jgjV{H9f9&K(eDK0-!ti`qkq>)m5G)GIwhxq zzOnZ;BKpidePpp{DSqM$6WnUIR>0#szmt?`_w!Lehhr=%Np(O5j~dHfFbina)fU;H zznyAJdfnKiWh%rMq$_%SpqsdQ zOwqFZO1RJydVYS^O%7KLG)AO<1hRt!Ps)R7(BmoYKiGNcJQpzTl#dyH;)jYhjPwfquGl`^W= zF3s2*`Wds}OwA~9-p6wOhOy?il|>mC!l#Nc@2S+YCWUtlvr|F=FHDuK4*TQMM{wa$ zPjWfJ2bA}<8p8L2pLY z3|wk2ce;$cu=N@9ylMARhM2Tnf^53NF|bn;Dwgi z%74E<z@pGA1q&gV!55du!!=jXy9K9-KO|4*+RN|I7QHK(@5|}_oATrRnGF4 zjsRb<2_#v2z4vAbelVI9W%3|6-myK8|&gK3w+Tl#EHW3X5gMui!bi zDl1v7?D)J^>?@KKfTP|-`z>ugd&M+q#qRbHS?x7T0JPk#te3RxtXJBi9nWFqw+3XV zo?w?&>*p?kJs}jxsdz~X!L%Y~VWa2$?G0QbRzyT;RI7P(u0|6d;_!*NYttwXpRNP^ zmGC;t`r-^Jr4Xc}ZJF%IQ5O0%>$bM8M%G$Y3Wz2aMQgKCMKe1Ky^8#*%2WTv9}5J+ zSgfO^|A)P|3XW?@wm@xx1r{?iOBORTGh>UHnI(&vnOU-!C5xGv$zo=(7+;?=aA)Se zIX~~~McfE|w089F-d$O_vMMWcE#VyEhunHbchGv99;%e<-8J&slz`R`O5%otoO`T#ibACy>te4m_ zS){O3*76ys#siyjy=isO#P`)=A?h2R8qC0TrQT0|$^(gq0vNQ8C;!&i@#Z*e=c|Xo zGo^~1V>clAi-Zxw)h!6nN58PknP=5|;0 z8%@h#{TOMHzLgNA68@K_bb}s!r^MXqi1o7 zHHye(?6F)Gn9YLs*KQ)aXMI%%Mxurm-aKtJA$({Jnzml=16;p_YFTIcZHmA#6m>Hq z>?ahLsQ#UwwMPhr9~mR>Sb#x~;UQpi5&PYnk5`Q`TQrq#JV(pNx0W)@gw~os7B9R| zkV>X0RUOPPgmXSpPc(nD%mJA|e&lUA?=imuRD32px3|J=AX7RY24aINGYY(pVOhTH zXkX=fOcLfzJo`^)Zdb;-Z91c<%F_&HS-OR}B)v)fR5$A_EkKV=2Lz+sc~1OMCfD;$ zL@f!J@~-=%NA{v&{Etq{x^HCiuztZ*ZW0smf>@m{XSx(!P6|t-AvTfzU9M8??mOOD z%cP{?r+1LW!F=-+{Z+tTzj3!$s0McDFNdok`6BXP!KP_8_g>!3wy)>ty0-gE>wnTR z$8K!#NSx}x2v{lxO`@Uf7SMHx0eajem%#M%Dz6WJ^jz62qO7^#_g^c4r=qZY!WCy$ zVl|(rLf?GOT!d}&-)@HQL$;6UlR}tEbh3G3Zl{#HZmD5lf(_7&Sfr+CU(x&4uF(c| zw)RA!*^Jr)MtLl%8r!E6cSMtQGR1S{j923|xNe+lG8Wx% zA}8NBdlmz&W3YjxJMs;vq*vqf1hdZfQ1%e>xD%jB+4n}6)r?vM+lyH=!<|rG8lWlP zw~iPDk_D;PEYU06o&aUrMkJtsRy-w}&>`|_j^tYuAmQc6FO0M`Xp zu~m>yB%*R8q=uom~%)j`XKCTmaFaHU?7r@`X9c9Np4{H#u+L$ec!yhoDn%YD(P| zKQ@Hsi|$$mP*vOlgBYjSWd3`hVu#u|QZ5+9yAP`&$m<46I*tn!gyF z8s3Hk88hjhXX?i3Ar_`@MTo333{Se&-m)zW%t|E|V5YxB_{xFgZ`}d=3KMnJiKS5H zUvVL&$$f%#!@2p6(BSFzjWw@}G%PL6Sr}=2Iq|>J=%V^xl>oXtxPU7Mo<(QMF!1Wu z>D_ke++Z-QI^fFTZUVpAU|<{er`N!2JB61!#wp^o9Ws2xzD#QQA^g+hNXn=z*JYd4-^@%wQICvh2?mPca(p7exP6u!2T8QnIKHoyw07v%C{n{C8~LiFExf znWX};k8KPdk3^64b*`7ifh{`A=&vS2db{Lwda)=c6IOx;2PFNK@_?}tBY*NG*F?6# zR5oJ70GnJ)4M##oRoS64=9wlytUdQgxyq0`7SgV`ycQ9(#VN1wcxB!m2mf7jINPMv zytBc^&kNX9#4gzW@9p-|ruW4AY3fG^q5kMB^)@iD1|%zAF%v@P4~O!p*F9Q3o-nSJ z&`YyP(aL(kyaiN|hzy&5)OgBjg)}Vtl`RE59j=Fld5DhLPEiX@=uAZGGim|X7eZHz z6{uoLM#~gY11`*-h3{sp9wak%K7^=KVh^=VEg&mgsst9%gTfipA^NyvEgf-l$wcpy zc8Ny5L3@EoSk%;TI$T+d(~p=#Mt_{V-M(UIL>5lFq@5lT*-WTue(6h%r!^{(5mlbm zXOY<(cXqQk0OL{fs=(uLir~DyVhlD(gb^XF>T3`AqIsfVX9QtDKDp5N_d2;5#bTwk zMNJNw-Ee+`TBQfm?~RvI{%tKkMIeGvrJOtkw-(=TkKx;opF0SzUvai&`;OSU06EUWTJBUB*)x|K!{A z__F(SR~j0fYp-Jeg-eY)V&&MvkWzyFkDUE50;MyHb1W?DkuNSZwTl+1bpMj_RlakK z%Xn7QMAt~LXH;squ!qkP>&=ElKFbsh@-<`ug0ws@;3tHF1^B(){S;%0CS4P#zCmD-#D zR{aeaiJ5<8@MZwG^DMJ$VtRHYT!gF3>X&P|F*E9o2|tU%lK{^U^TglsVy7H>0OF$1 z3n}7x&o>nuB63y0PFQJ;`%c=6B^yXCmmg2 z>EMb~ZGpJC$vHTk+e5d(^ZA+tO4Sj-J?$b%NcDFgURnfYUlFvj!Bic`vfeSit+a;? zhbWanA*nTK9Uv`YDe)?^*P8~Iw#r;g$}o?f$Jst)G$9?_L1Oj2Lw~c?#G8 z-Cdzvjv;Ou`Bo-1{%5bN-xoQ4gmxSc6P?~tx+#KfYy0yGvSq1LD-p<3t1OQP5)U5% zTSjp$bw?B)*MOeWV~`*r?t);UU(eE}zer!-d9&|{pS{0@kZ5y|!q~qZLv+=t#C0=g zfsu{bGvR2hqh<|kx;ydgw|F^sRJK9`&_@!!4WEpl&Gg}W?s8GE)cQdCqj!iqom}eV ziu(o!;_(^vjO~W5D*~A;Yk+VYP0NHYy@@066MK2Ir&*rOX?&XkK**|P?(#Rl;A>=GsN~j2+4Y(&89i8}j{p=E&1}jm@n0FtG=(B9HxHbQruC(HOP>RL_uB7fxe> zz2d8|nE>+Bev+rM+tuuI9&=k*I&XIV9@d7MjLE^q#6DFR^mB}dyKJe2m1GAlnS^H0 zj*IkiGy^q)TJ6dgCjM{2WynDXf3yy`Np84aaW~Mp#VuPwT9;xt&BF~%{Sc%%26m-} ze;zZX&(FWiHDBW{EW*C<9!u9B43pC_DS;@qUDTUa-hC4g05LM`aXb4+CGzJpR>4#= zG_uhs2_0mp@7<>mTVYUf$tg}Zom0J@m=MYW`BljCRZU|>PJ4(_+2Bt#dI<1 zfqVm_ZqWE{IT*atVQz(cwJ(%=O4Xhr`a8s~ggamO_RiO$mUvM|n=pL^-S|en`=U!@ z@?ou47v$jSo)`1Ri|`LL{fQmBlQZhcoos)c7xXD>MtT@7AJt-T9C>Iz9&=e0U-_0) zrsry#;DXUb_-+wMH#JmTfk5Ip0$!NJu8o)}bV%yQ+~;;NcXiN|$4R3A$&vH=F#g<`U) zfI86Akv5sQm*MSNYp`q#{4hA5efEYBK~FsSRl4T6&}VP{c0+OqMs{-;^}A zwaZI4rFiQNx;%yNZvsJw{eyaNf~C~bWqq@N1ML^O_@*U*JwwNT-j&wOI*;voG^c*I z^4t7SLK~1kDZc8(s(x#qH`yu-N?9tG0TQ4{j2pz3SjAFIAFSx*WRh)cic6hCigmb5>*1J~r)-T5XEYAahT6 zNrC=R(3qge6%mUm&@AB?( zlB_uB?Bb&9`7$GZwD?2h6pS<1dc~3@52PgHk~Vs7Z21)-`-@-Ax}RX4P^^{nM$ME< zl!kQD+{{bnL>vVzG;>&(bH-nu=cmT;eZ42P5vF0hGhHY!O`#j;4oaO@z-}m=c~^7PyV9^dW{{?Le8Gzk;dc&EXxWy%lltrPxV) zPj_o@`;903keppJd+_@HOX3t`ka~BxXH}aO4{+X#=y36K_cGZ9IkuKZ!L0f>=kT4? z@<({WBqw6GO?{x9`HpZ4VWnZ*C-x3+mILFX0SF^L5e{N8tXcm`esI zucaY~v^u|d5J((7DDfQnBQ4drYx||!VnV+|GA7n^N^_xpsMzi_3_!dy98lFYrOcmjd7rGnmD> zWen6n>R5~9h#YF=bx<@lHN*_kNKHvChRz#gG$WV`G%RYwbw+KssvI+=5I%5}VvxSw zVa(|sldG(9iB!MAzSy0+oMA5+kvzn{)|Gfgjrqzh5^40x=sBtOx zv1UQ-QTw%5(jRPB^6*eV3E8g;1&6yEX-mZUxE(EqrfqIS=zU6~Wp8*Z*V?8q@b#Sy z70ZNi=Ja9GtJF|8K}kxS@y+OAmPTtaR&8}0$nC;Y%rgn6EJ)X6Ux_wKD_4$Mq@cTw ztP1Rc$INk=_B|&w6ybFYd&Y$H)~%Ak>DWMxfuwqveuG4A64uPYY*tqKFiAe#39eso zxtx^YuHT!^%>e09au0Z79PYks)veMSn!fx}e7a-lTBehz+aG9A=JFQ6aXe<)#X}O z3(X$``Fkjj&CKl+1&hN`T!%4+>vbaHD|rtmagvb7Y-qm^NcEK2j2-6Ws}kBF@dNgN zMO$Uf`b`1SM+0axTKx6Zas}=sW|YIfxAtr*WHe%!XNVs7a^2kGJ9HOjpg{N|mdQP3s*9Kd=)BCJbN8H$z_N)G5{b%L?g$v7(d+d9IdK zv{nMZIcMHc%(EEbLf)?VK)SWF*zXFOtzBA=0yBhs5lqz&14$z_7IQz8>9 zMJ-05J$Nnk zsTNLVS0fncNC1Ge`&F2%z1Dm4Me2+b>=)YZqM^<3v=i z+FTler422sv<&Ny8he!}v|V;a)q!)svPVlTW?;?mM9q)m)MY-1xk`O_T!)I$p0Qf$ ziUfX=?<8~2k^O(<=+O`@EJS{XW&UzpI;&OWJmx>#Hz z!pN!Fpb~t3fW9=P^YF{FPKD$|L#bzfECj=K%)a`)L$JvA*}FJ(QzU5guY%jh4`$X) za*iWj^hmlIdTn@U?lK+CJ1kyT5df0JXZ^u(_P~n*gaov_`S*_8d94phH^nAkZkMUe z;0*~{_eZ;etqhGj3(v#2vQs@|1-*>}dT`_%FQd?fG2m1{_ zn23i_U&6;7!V&Wfuia|lhcPr z&R;9Y+B>lUf*%RIgn5y=51=je*$u-zVWbO-E<5+s_NZEl^73*9snq^kETb4e&GN=t z!0j8EGK)iI@3;HsI*(F^ndC0B5fa}(5U^&;4)7G5^~_k)Vr_{olzkix@D-Kr z4cyKY)aVD{j0$TN<#UBI!i`hMv(} zGqrTIVg$2mzFGIXp3GxVTxu3Ir<&i4t1L%A+W{yMZ$XB)Q2S#tS{Z)q4csH6D^gv; z*}l7sQ){vs8w@Fn{e+wR*AonZ7Fhx&Z$DQ~D?vp7Xirx(@@Wm&+xt)U!C z&N(#LXt<=jhDtDWy4u4vT;V2P^Z*&*+GVGfsygJf?cCQQQnM9Sm|LHj%LbMFiW)F< zDr5cpB42vVX|1)36{@%#Z;+?1*#}>}Pu?2hIp;IAj_A{#YHHTh)Knx2F{?(!__6Qj zIy4#WWs})`;LHV?8)oaQ7#o1>N(WdTrOI4)N_G7MlFXK;~4JDmU_VtNsk`E<`*^6>ExHtrN!Yo?nLq_L0 z5Q@HYJjYV-*JY9oxtED2ry4R$kBom~ARGoe^2}ZKN_!WLUA(&y)T4Nd+B$Ki@;22e zyxcjMO{q!2wU2ycW%p*EN}GmGjDDiG(Rv+8hch_WwV6J!394JJYi~FC0#}cUWj6wJXr@*NJ9BhG zETu+oF9Ch|w0fn;V}_%|0NdiNNgZ&FtFQDPxs1#A9kzkf2fjK2!IrauzGLN5*k;} zxZ931OL%^R6mB}69Td$!%gUfRI7(`acP+G+npmK8eU3ItaXXwhe$}BZy zx2cCYCf$m@quvA*a0|$^PnU$jf77`L(rpbQVz>uLu?suylPaqMikpiww(oA~u3z(w z>T&`W1^ilL!&?U0Su#$9JWnif zBTro^2{BGBy;}g2P(N|9pj#gU%JgCK}>9>n4GaG0{NawXb4MN6K& zjVxL8kgZe@vnqEE^&8qsj0~;5lpb;$`?nBQ8mhsz%XV-& zI<>cHYzSQ&aO59dU{p0s$9VlE3hdby&AiJ$xljWH%VdV3V89$6toUEk=_rkeYE+8+ zDT5mjCZ*(iV7c7%_0NGR!@Xg|$-3Wva60=yfUIb&)wrBv0adxly@H7EgTP*$&-fc= zZ_hXLlC?>Y&Gf)%GirWkSq~?n<>lSA3Wh|ADCcZ_S%j>j1=?>3%dY_6c3@;j%FJE| zOcd_YzLF(p?H@eXTj@l98qekrW;{OoZ`e>kwks?NF-4{(o3-ea6w(5*DLa(uoh@rk z=7IgpO#}ghg-?$JN0R3qb&}#|t9FKvtet7$WHM#w6Ik#nHc-fB>R^9Fm&ugr)zi-u zw3GtBKi$k*t(WPu_*?$tTmOSTS6K=;lop5!5kM-?g$SE1No{H%^RIdAr937Jv5rKAJM{JXYHYg^Wq?=Q)8f<^?}y0`z}0@#x`9eKq18&naNfVzS5%CkX7ZEPQMBtX9eFamtfaw$f-J@V1+g%*!=GLP_rJM8-yk|g zM#on|%dMhSzaUCUkX244g){zQG(mVx_uTzBUyjH_&&cHnGvZLA-EOExU;Xt9`3>TB z=v*iyU+lp~QuS|rCI9ff`nXE<^AcW^Iyd594+LNsX-k0|;N z=L^Ri$NQF;yoIKfC?WF z?}i0&a%&No@nm#4-w1GrpX%Ymed2O3hOhP*v3i8h9AwDsB{%hgJ=9tLOx)!Q>*3vo z-1&UMeE;T(il_SqqS4GSDX^rtlkgr4sZu9 z=fTD$K)@L7d1+j~(tipS{ENy5fQUaNCB>sp1D&%ybL9#e>Zj~?IJ%`cMS}mXpYaKb zBWd$_E&egg#q6LIDs|QIl6K%>7ypu;`|J}Urf^P zFCLI2Rv9<|q^Q-Fy~kG$u%zH|{8qpV119}Qe|6UuDez71rEHnbd3|GPu+_2OPr-oS zic1Op7m78p{EAT1+eympRZt~;V=Rd%!XUN43#b{0QvX#<1Sm4{Jq2=ld%tMIZZQas z&Xl!X5Tpt?fySH*QN;hdQVl3f30-};y){KGQsC1NIQL}KXC6~=ly@Y z=9h$=uF60fkE#lUs}?0L{Dc&m^9g42>fI6?QdH(&zV*DmT4y6B&f{vQMWum2VX0tcV6=huW_r@~1k{=(ATUxAjs6Qt{3^nOm->%WwQ z{B1Up^lYt2ob6aqu^{3EK0r(FnhwPO>%0E*^%gkL(jIx}2B-gsf>nrHy+>G*&_sVR z_Fe{S~gIQjoqHmxs68M$sPFqI?EbET0ZZu=$bI4?JDQP0< zd~e8nKDxAODxdEPZ&d*I3y(8=u!DE!)zaZ!(4>Eb4#JSk;B7WU=M(dFz)h9Kl)_~s zN$kkig6$)Zo2yn)F~@;5d(rtNInGV#A+`2y_2~Z|!3Ze97+0z?Pzdl)B|}ogxPmQM zWI)n>(qol=_4T|mx8i-a(Bw204>`57P2oyCq5pQPLHX~K>R*F4KiR5thGF&0_5zM) zmbEOlpR^Xn$JNa5RL5Kx-OQV!s5FIZ4bz7HhANTA?I*Tk6?j$Ie|On`4Vu%hkVfe` z%};moxdT9eU&=OZn-e5RKpPC%@P)A~V=W0vP%0{L2-)(BxO8b9|NWoX`>&t1#px#v zw8F+C43c<=+81Xul?gA9v>N(FA7><``-y>j-?JN%E1%7y0C%3^{JXS6H$^@wk-vDh z21p}+on~`HTXjodq{Cp_5CMTD+PVv>eIDBdlF1S;YkB4hoJ%dp@MyUXAQx=SI z02URM9~`I_xUmTt5y_^)yyPBEMpI-JI?bR%(@5(thlYE(HV%o=DeyyZYQ*hdGtY*S z6Ed>MbY^GXl^4wSe|^bv&?0Yi*u1vDDTjTa(KIVigNi!_=a5e}c_m;jA`|kA8EFk1$cEoc%UKBiP6aAnuB$#1)kS}9cBH##ySumkG z8TH14SHKDQrz-IjQv^ zm|PJE2R$Y1#aP-!7Djg+5gE_F{NKWB!l-pc?5l%0XuKd)^A2T-!kP8DGi4~S<19H- z!r?nX_w&cRipp%%e;-)f<{1j>&p!6eJKpE*9CW7M*$6*mLhUd6xK!jD;G z`)&psRCAhNZ+oBF#%9*LfQ3F)#vOR1r;U`ywgAd|T1-hXAI0ajkVbHChPQ7jkMdUU zmXxi`J9)A5)oEq#_IPctYXp;;I#g&w+76|~CWK@KLxNxK5I9WbO%+uB4kF_4pXt_| z=Y;w*8fUlnCw%f$WfH%uq5osjo08lq0847S2V6_UU9+|;^;KMR>ab3IXTRCizxwHr ze3?f_RhV0B4`<-PKDb)_Fe0=`Zx)Lq1Li~YYzHbS*@joMY|HFdDv0=4GWkz9)(!>n zG3P7=4O_QmwzEjKc>Q*{?B*Ip(8Sb!U^&Z#^Hh0z?)&*G96&&i%htzgceV-U%Hl+- zY2ri@xecb{QXH*ZT_pp5A8hXSjcvdev*}PRgQ+4>NL!y~lUw!S+zX;Zm=l^Z$-n#3 zG2xad;1#Od<7KJ)lTnYNKE$7xGTCT^Lg6(;BLs_W~x8yGAui6<@_Fw z-4py7F}74Jn{$FhV`i=KvX8CqT#K^TFl2!N6+cN!^t@x!6!=`Ktb-?IHgGAu)t6lA z&0te7hifr!jN+*q_UTwfYMro$;mYh?yKpdIEDAKRk`wL`76W(;!zz1V^SyQZk?R%2`mV6 zu#iEsyN8pBIZR}$U^r-%o=IT3MZFqhnrzzme&5}y55l+!MOy!5&FE4^J~N3C1orQ!9J>wHZn89Cmnor?16N&`SEPqjgM78x z%kI3X4U95nIx{fbOoe^UscVK7@hlJ9Eto;6w~{OlRxB#J%R@~G!^b}u{C%{eF2~FI z{o~NVZNwMxmwot(Ux{CWicXVg3k+IUGpan4rZ{+q_*>m-07#I?(-qa=QcVV;*(TPa zAiJEmxuM*4aknYOuoENO`-jw?rq5looeuAkT#sjN_HG?W$I&+Nqpn%JNwz0T26@D9E@n1^1|_zBozRf{4>*Mx^I6*^}&v} zSdAW-kA}TE2s`f=>bZGn&)XEraO!kbnMe3DCrs3WZJY2V#VL-@L;Ol$ldQ&Yeo7(o zPUf9;V)=!AFA;@(mL-<{VndCUuyr8|_{Q+mVe-4wf?Hdf_HGmMIDnL?#187;JV~?k zh`7gB;kVZHd=|Z3$UFu-v}0be{*yd*Qi5o(^<$0{!~W0> z1?HrfWp7)VC0nZr`s14#tmBVM{O<2LsChB8xdK&7IIDh*)YP^5r(|bp`ST8~cS8?0 zIH5h8*nyU`bymqp0EwMN_gflDG*E9z=-qx`sLwk(YBJ4#gRVoV0Pmuv8Qg&YRMRp zWwHXnPa7TD-{l0pdMF37zqOtnUQ}sxsVw#FaIg)cSjyabNl^B{3yh-@dX{>uGp z*{Po6NqZQn95u9?7CfdTT+_Xi${g60OVMjD&UTNvYg2>NSPEeRxKXqNjBp=Q+O-(k zQ+{iganM+vF(s+LpB39_@(KZ5XFeP>48uexP9F?^&c7I+VP$f%f86mP3*YCPUo^>_4 zD3C@6jy+8uP7=Y?R95r0KN?}WEQE3!n!km>^N6nd_2_^~N1v|*hFgd1rQ0j4ddA(p zgaSU3M%A`y%wmkbo7h&;2n5xUVS4jePZ~MJ#_JAS^L&97dB`nbDzY>Ib)M&H$GTqY zo~;xMJau@gl&_VM01~=I-r4tCHyiBL09Ku1(HP+H3&u%zUlcn7a-ISrqmW}S^`APE zFIz5AfB7-o;LcDUxC(E%js?Ts@N{k`Z9PcyM`hY{&~JqcV0g{(!`QvzA}Xv*aKtNT zEBKIOb#1c_yAG^Ya=o5L_0vts_6bC-&A|!2Zl{el1SgbPvE8bU~Sf zedf6+B%Vt6$5P&i<}=-jGZUHtG)hSyHF&HBI8-2{o3XxC_>2zW;BmxFxWO+d%EQ~L zCs5D(XBqbDe1zRaD&GpN%Pud*CLWzoSyOddvwP39pEoKf4|N#I-YV+6r3j8qBBjI> ztuM>q>W-KE5x<8Os}Bm+f3Txu=;FtqU*d&xko6Tl9b6DgT}QH)ucpe4k%twwY1}J( zeYAO1E8$TH2DX0HDBB}F(TQ`GXT?uak@UHmQ4fCT#UmZ-BjfQhc&WA&X2M`8_n8*m zckCZ=SorKk3L{u$e}TbdVRDdlWWElQp|~h>XiMsb7{Q78C(o@A(F*B4+Mt%Bu(V5tJEa@lTUUA{yVH*)#w06JaqQzW|-E@ zagqe%R8ot+O(DCx&A{^==}}dWd6t^DPYVX$yIygH+TvzlN6o>6!!1-LqRp+}xvPb5 z#fo;IX)wwThb4C6)Szv$=^k6eKDqo);fW2I$KxMJXV5wJoUxlkQ)9M>r^|~DHvO0O zB`5CL|JX4T7(%%XRAtnDQYvJ}1yF)aScrEMY$XLE4JWv>7+Jd&-D>XB4^250_M!rob@>+zRBI;%9I>Hr5Uuxp{1{>aJ5L_3q6}xqsUUc4QJ!?4!v}xvV2wo@1@86t*|9 zORU*oO;_p+UT&ERI;-Y%(c21hansTz8-GRKzug?IBtt|^DBqhl4fbs2zN2mv8@Pq7 z@_{FF$Wc+uw=^MgF6BJI0jTQW@SXZ#V+0{*m#?VTxg&pZb#Y)|H`l`_77dh{9J(C5 zic&LA$`DSgsOnXAN(?!b-_)6SHrrW3`Fm>XlQ zwz?7|_t5WPZ*Rr!;`ns}D&r~GIN*U#xw%x&vbiJ%pb*)Z+I6#|&=z1ttN4(EQ7ah4 z%0tH))cnbb_s_&QGZYA*_LLPWRl*7kc*GniHAA9Dx)l^udlFHAaibZh(nxma{+q@4 zk9<=2ni1QDSyV1(<<280m)HeYahuxYx8-)wJ{7}6(d>~>vIm?Mis0c=G$E4KJiSre zpT$g9d4&yWzt5EWR7`O>%?d?CDS`&Eo5hinvb2E5m!NdssQSf_+;U# zb&;)#@p7{@R`XzqD-OxDIa1KF3uT{d8WmpEVL)+3r%>b&g@}Jr`8{k;a}X9Cl+I)P zbEjz_4mz=bw=a07J%CG^GP^g03r+teXQ)q|7|Feb<^DUxq~kVk``0U7Y*OzZqAfe* zO$G*hEU0=F_N2!QTv*3)EWJjNDIM*opszb;3hJ#~uZJW^PSwKxUy55gd|o*5a;#*vtwlqFfNacrl5qCI`^t0ky#0 z|C)!|tMCs%fzw%vHC>qv5@jpB`)h75gxxow5J*)V)ZaV1nC zC^PR+=Trv^lZy@^>}QoBHyi-e8>?Mr`3i>n3qb(%JrA|HJMjrvOg~|(E>3>a?-|+U zLy3mjd&~RFqJSwhgJI%(2$0sp{oNa1kbCPSl$!KL{iZWlHpTVJ$wrQyhI>-Kk^|SQ zER95_zCFM1EqM?iAp2IoN3ZW~khU9F@wfLi@%t@e3VSBvlv7$SVUIe0m+1<0oeo)! zxlWzNz~yuXpfB78Z#RuT9Qy3PFeL*VbPXJ7wGrX}d03VX`I&CnIRkREkTn3fdD|(n zL!w8%>h1lxQzPt%<272Lj{L%0I+lg8&cQcOS%>X<7OV)7g!9pT5d=;x_4dMwIk=oI zRIPUdPlB5(5?P5C#a-Ta+PJyl-)kH`)3xSirKMpco1{kiB@lb!3mD5pq6qA{n5mF1 zt4TFQ${P{LB#rJJ-Z-XKD6|M^7k`F+-z0~!{X-mSs{0UuQ=Ck`7t7F=Wm<&nilA%) zZTZXcKzx2}S3Ft?8r{1KPIz)!l)0*n?D;WM6Ux^?I1%z)D6^blk2)}shzhtqA_lBz zwaDNF^~ZI;Cgn(P67M}^>(ExBi9HTJ=ZKTkrgJ@Uu+| z7(~Z66?6%43mytDD=qlY2Z@IHwGR6Hu`IpL0&n*vnb9b=@BvV50BG?#Llk`l-^(a33-Xn5K>rf+K`f} zI2-m{#}2}Ylp22bSR3EqngU`f9G5FO;uW*gH=PNyiA|IA(+m0TK1^cY@N=}-M>-d= z;K5!+@F305TcvO&rlQdy^6y;3wJ(!nNSUCovm*K%cguTfTa%AwSJcPWPeF%jz1J_SsAF9IV!`E-JDW30=~OcOa8K;IG5)9@L?x{< zTLYE5OFIv*cnn&O;!-7!D4;pVUxu4eUWiY>^l-G`)#AZ&HvuKS$`{5*p`3w`^_<38 zsR#XdgYTolanqS@^IdjB;6y*yu~klFW{f8qK}kA5ClX!G7=W~_4Vwbc$(g#sYV4k6 zMrnyjKKHC7-v%IdSkzi(SRK%Qk=xz>-8kqPd@CY5orl71sl6Pa;0-NfzEbha>3pQc zyH?od4n6`37l`*Mq_e|T2(rRx%yNZO2(;h=w453UhFkZ}!L>`ToMh&{&lTBkm zLJty~@b{s(y8sEHh4uL0QG*&*81t&zxHuaLm?sM?4(H8maVf9&XI|JGvw*Q`c=qGo z5(Av0R5mvnoHCTd9%;v@_zfFd_70uBc)3BrDZ;V>fvSCp71HGS;y5iUv9KdN@3Z4U zzqwbmp~1+!RHK!YX&mpol;*vs%6q5WbdDMCQHzQX2{uR1##4Qg*&#*tCMIgQkQ7?z z-n~dAs>YJHQ@XgK2${Gd&fS`Xgw^ClYMkRTWvYBLvg0>kdU{mg@DeZpm(q2pre_X=o)6jQ@!OnLixjSN& z;K6zpg_n;r+p^yIfm_09u|K-cR5bZ&w?bSk)$e$o;x~0uU#QgOcT~l02U2%7leM@h zelZ@0@_W9*P}K8$X7)35Q%L$=Lv4uW%CPve!J1bb<3U5&XN{9Cn>#-rt5aeqXG|#R zE{(v6AH2<}K5dwWo*BZtYxzhlr^)8J0^bgUCpnUEV*Yv1o(}zIh=3k5Y66A`quC-W zq#6YAhhFuU1&LeF{Y@IpqRZW-YnI54E3{wsJu*I|+3$8d$=)?sCD*=DsurFfAN;3G zQ|1kskKq59+7tN8i;2#0p6BLOR(EE5X_24?p|eHa*c`^6!+!%AI$czilnd{LW1NDW z*8y~dEi9g+q?H*QAS3O%0a;8p{S-|k1}I-gBwyEUD3~0PkvIHQj@*?>iCQzbSvpp4 z=beu7ZEZO`q7~Qj&jJX8&IZX|l*R}(g0HUzOvh@a&gMifAHBXY~NJx(3udslznW5F*r+T3tn0J>S^;> zq{^Vr`bsT<^Vh!cA=bQNW4-k|LUeGXZx#;2!tk5J$y#9U+{+nJTB<3v~P z%Cr2)m_sosXe-SBg&#htFo(Z-f59YHjis_SSl;)HXv-l~?EOyWIl*iZ)%ESYH{Hkf z$`*7&dw~yTt6C%bgF;A-WuPnplu%y#ceM!~JICp(t7WE@xRY(mcdVD3#LEnK&t6WA z!y0Nwk_B!S!_Nwxj#0z+3To;2TMU^|YXd779xDf5&&)H0jFG)#79co7QaJMEN)_hN z?${!1Xwhlnj=I~o;7LRh?cI#ls4BdhS&!K7w*CLeM5!GO&9~iTxkMauRrWYeD$?=& zv&MuO7Ua0;lod97#1pXrE$2OcV^zXcjkWIzk2#dJ=Jwq?X%C?d#!CaU@=T2O(9RQjIueD*&62L zAc`|XpN<$@167m}5;)R(3dS=UJ1gA~dZ5K4W9h4r%CM=mmSgjIHrvoqJ?^}L|B3%d zJCY@;nkN{w=DSqeq@$)xd=I4GA8vHVbEdxenewn7h1<;KSXRUFw>B)9P{i@=S;bM% zQIh^wwk9_A05+#z}7+%Sw=?qSQD0S4&SN$`3GA zs2P18RnS($tXnPTwfY7}R>*3w<;poVb<(N^IzvkbT>4%N?rR0@d2Q6bA zk8f9gF8N$aiy&Uv9c7AJRpG>b^rZuv8?vx3{G_ME@d~oEuMJ4_Q_ubt% z%UJ%kmFY4*0iOxxSa?)>=tDzd#ehnquzv9=wF|sRs{oy@Tq`*7h!U4?WmA!zcvXMl zOcY|(y2-oZEU9;J&OdKs{`nIXU?2Pf?a?laPz=)jjGA_1CjuaMEW6)FZDS_NGP&0S z-v2m9wvOwzJS%{}d)F4EUQ%3f7LY_9e|)9{wN_R$h#DfK6tuMtcTkFd zCH@PQm-7p_UpE70&sQ1maO>YB)gNJfvTZ|E< zQG|bh?EQN~-o%x6Z&4Ch3iN7$2U?^X};9!79{rhARqTo z3X{^Bcby(on9aC!NVFkEs~T*Fg&yP<99iMeJ|xO8jv)FNY&?stwOp=yBoPv`5#rU* zR-89Wo_L`nM;;UJ$3FrByK(q7k4qJ;d`YWd53kH@x$OITC(H4!F-c#%b56>A8_mOW zenP3200q`uQ}x?S=B_XxYk7tM9>ap&)I;haNcM^d#e`Y_`os{vg8|JDN8bWz&hIE!&kjEu@?6|t zFW)_QQL1)c5@EbTjz|9V9 zeB^$-+;ODzWPj`~Sw#)ind4nL7 ziqK`9_k+-7Bf+DD$cCIGWbafripFhCQHGSj8=yUl18wR7g+rFB1R%}!(-aggL)1Y8 zg@7d2UY?z@hffaCHO92wItscn5uCEQQJclPZaNM%+_m+S-XRY)2EleDO#Jf~HFeX7 z4Y&P9cJQ+okzQ!HKx7UA&0rA=iCG>4+19Tax-d!0#8JdMnG5rk2w;lp7PL*JV^N(zhF z1yxXL6;rtK<%DvXR~wEW*lx$#c&kwt?1NCn6JPT7L!XTTZS&(N^B?;Cgv>r}1BU+D zYeVm?7Y()jU5tj`j7=)*N_ z{C?xP4mm(MAspIBJk#1lpX$m zp$1wS&hPr(oK2NIA1C1qemxVCTXqfs7?#btl{=PW@EkgkEOHK#Eh&GcoaAHNPwk6z zri2-Tn#*34p{7GpFGc+S-Pxf3XKylRYnrlF4AHXos;M_u{IfOLIVg6nEFf4xUl|aI zi4k_sUrw#d23*W16+#WhD=nZE+Fo1lv`l;H#eqvsAo(BCu6Gy*FSl-@w2YsQggsAt z6|`GmxFj`2h5jTW7T9>8ehnoF;d)}+FRucYzO3U5e!~W1Go!SMPBZMZ1Nyx-deQ&b9SZ z4QOZ-eUnRF+N+_ixD%Z{(wGaP{bQDt6(FGk@Z4RPn>+w+CMOOWHJC3YgVs`^QBX^; z8MZFQW-MVTXuG*M680GCMVHOTFyUJ#aD8Oo+Gxl8xcXW~1>aqs(d=W$y6d-W?9(e_HE+ur^TIpj&JY~j7|AAV1sze5}GEl|bB7d`=2qT=e$qi2oJ686a)W2>zXHFK-K z8;h(s2ix9}I2#Tv>u@8m%8E9n8@Gn00c@;5b2{U&d*A}#jUL>3RJq1ES6rZo01uIwO$hNqWOewlgY z2(DJBXl7(jpAtkPv-L*V#LPny53A}{ze9|}4c#{tmjbSv9}wNYo|S#J?s@M1`$1T{ zrRXKDgH8p6D^xptF*5wUI<>+y-FyID9Bl9{|9eoc*}8Bu)>c`V`G){Ft4}^V2W#Mx zq9Ab7`v*>BfC+W>`{?7$3nKFEo7GYSjF|yrq^re=mej#*F+Oyf29kPI?0x-;TIWuwGeQEwOG?*XoS#bxZFbEw#7=9=vN>vqo>(|m|MZdlW@$2UVgdm*MLQ+<&>yk zDGd6v>(muB-T;P?rC0~nntbVCn!iJKO5Fse)rZKwTGWcm5#~aC6our9G(Ot>kd5|* zP0wufKeYzp&$2A63go_eHQ}IX8a!4kHsSxIhw(@7nwxdZX=_B|4n4 zi@0UzZ(G_K9&YYW#PF==V|+K4t>+f4vkR-$CqB0KX2hINPxUi@l6YGv~f>J{(}I*<$wXH^u-pQcX7A z=2|(8#j3;4a?e#rLsLUS%AhfBtpX#ZZ@IMY;2BHbDkbOOEi6%vY50bc_u*KDytSyb zw+gV?pe}a}T2(Kv8K>hxGnq0g5f{-Wq~n??tG@m>thkZ>z=|+_qLP->aPJtw`WNU= zGSj-BuX4RnJEpN=)IDo*)R0iZ6Kvf*dh0FM>eNkisU8=72uW{zI&p(XC4t|BlBBCz zm^5}}uQi`&yWz9zzURE7B#F%utrmI(a_;mq_hCf@E16&=COwv z7e4Y;AA4-({WzU2TCx6I)lj&OUYU9r;nb@%^tBwdRSbdPK$n0I$Q2B*lgR@hz>Hn$fw%^fu+LzGS+Z?6VinQ8PLXmhJeUkQ2pP?<) ziT3D*2nYBVyqc>xJ|zlxOIc^Eh|)5iv+))Vi}jt(J?>Bi;6zTy_K6wzp%MF3<7eg7 z>RE=9VCBNtIIV4dkn1xS7n&Cp7k=KLH|*{4r$q3l+GHB7b}9?OR6gwo89<0GP5V8< zZR?_`6-u}jh`|#nH$BNvwf1ejdd$8?^K#)vXy?5<4iJAd7s67bnVMwdgagNv_FOjdxFWMlTGm5-gpNh0)nr8Ki9I$R zyJrtYe*gzc7X#BOSa|FVEBW2!WWj`SSW;f93%B1KS&H0qO-`bQ=z)u8A6=1&d;L5Q z=TjJbdu&k~q=ab)=Z{Xl=0z;(IF)*_S2v4kUkCb{v8R5{pvw&hS2y&{OB29Vmr6uRzdomP)AD|bSV`kki%{OSc68^l?gcFK9#D#wFXYa$E_5U#x%C};7 zn}TX{bQyaj<6u|v(d*W4Cej2)gM@cm`lDU00==D`^|x*mcnUnlXBQ;Sp-VJaTaTUWpV`f-@93ipibHF z#t_H4DFWzzA==Lv9lo%;Kv(l6s1vZ8+V zn_jP2w-K#21ln2r6j&SgV2NUYfx7jL5uVZ7Nh{;yuH;$DU;yDGvQ5|9?i3z8G3U)f3c6`M9YWZq}Zyi3yp9E4l_Gy{xFRNlFeH6s$kg(nK;n?*Mm?swR~Ubn2r zg@yzspR zfkBz**Lc3u#~Uex5ekfJ_q096c}Km*I|m1$E`6S9+M;t8+=8v94&(}^MuE3Qx;YmW z{8D=|$$iwwEJ1b3YIYa?cTZJ&53nad_j#}^Fi#ML28tQ(PC|Il-=4Q)#8$9HJO+Nsr z`EEK3Z1^-qHDcnRGug%U`zPJ%2d#(X#N(8z2C5yJlk`m7@NUkNcnPZW*>qm+UpeBg z^8Jii>!!qV=sp}O^v7gLJzSo3T z4}2kIUtV_oh1Z0MlbNOL#s)dcQNhgv$Hj~rM}%#1U*y8antlgY&Aug+(Bm*~iFch|csUJehlEmKU3Pk+xnSL?)2$6NyJi+l2$Hx2q7GF>B`l zj+8R&5~BBV8G%j*JhHHa9a62B^P$nH!Y0Y>BSd=^9WTfi|m8 z2n#ehz>kX0F&(z@HUoaYaw2)~)p(!c`IJ!9Z8sL(Fg&fzzqlh)CNu>_ps}u#kJNPw z{i<>71gLj*bcxCX6`GT}SXUmk+WY?_C=Iz(VPFw0rq(Y8)JW6ZQ;XR#R&_TEk<6*u zLNv6*lM}Mt$|PjZ{+7386~C2kFlZ=tly?`}J(1-z9;AOB^u8G%@?|mLYi_#|P9%|g zk&7o`9BScgT*m{VqTEK$Vh^zJ`CGDnY9ZWM=G_Q&fCUtO=E~t_1r?V@16Dq!fX6VegFEG`m|6>bf zDMM$EJ=P@uKq4q7MYkrfPX}2_Fd0UB=|88t@7e_?8fu+Q$UJn${ccFbIO#{LI22R^ zBUMxb1^4znSAweW>Ju z@4M;g^Ub^nFxf)4mF>LT;DPHM#iTLH5R!P4C%2d%Ur&k`G3ja|1}>nKQDo$rJ$Or> zYGxuMyQ~tY>Y=r}vev@8oa#M)n{Gj{J&iOgJ+E6v>Ooqdr#NLV^X7t+4vLC4XR+{s(~DyxR?8%A$DYi5PgO4scvhXFuCCeVQV1EhCCSpF zoeC~m-5GCP)8E58uQyoSk`X95m+gCNnO8c&7StVft|UufYR2{An{-t;?f6}v*LDp&S$@K*5jj~D|>_6GrVw&2I2H0n2zfXBQt$M0N8LW(G)3f^}qS>(0 z?+mSLbl+=+7|t8>gjwz~SLv7M&8DVn+_cJCoim)0bCUcpo-bk#DyH_U)l9OF^yzMS zt8cC@oDy(RZ?sk&uI6%zj2dou4e4+s4tHKG+uVb` z+#;gOU-Yp;!XZWD8e&ym0gTPkM7eu$qmJbT3(tXaoSR44=;%mZ*7`7OWwg|83k{P_ zX($lE9m}|OkA|;g->+BpSAO^5PjfS_)>^8o|7V0Jhbh{Qc!j|VY%^2wHKzWGa z^@xd)Y2xaK3875PeEhczSrr&9d=-$1t@FbNQd=d(hHQou)o<$$Cefs%FZ9w(qv@_7 zUDkvs=cIjIpBF7m7RJuU^NpViCT{cC9~5_iaKN}78Igj-q^MD(cR)z@cjdMIB@w># zod(X=W~+&jm}s?Dv!=o<``;ZFo~&WyX2PxgfLm0-Q;^MwF1w9#$|Mm@aczc=bGvZR zCv7{KCP51>WXXOaVv#~oj_ZnYPa-+mS4*6Aq2?ktC9tfGVlK{msdnZmJsJ$E;OKOa zQ+0u>l|_5pHN6l&aE$B0_m{+@KmwYjv!h*@?+&BY@tLVnH7(3<;6=XKJBQL9?1(`c zRwGkCu8Lx)*Pmv2_pn{dgTtMD2Ly>%)Cp@7^XFAldo4yYeS~ME!d%Bxi-7XbFlS40 z?ChJH1>N~d7s>nwLmhHF7M9KvL-!r|BFD%)eSHBVGWn`K)=jT2)A)V|#MNM|-wF{R z{K8{8{vGKFFKbBGf#^|Ze?Udo0-_v;U%B7N$N+O!++G|XK&xR!M*P#qbEYYTjSJ^o z3@n`_-VUqjCX#U$M_1cpy7B}CnkeBe9fTac-#^0{T$IfzKbNYT-O>`-NWkOiD{79WA^3-AGcHUi zX*yetp}Dz-F}qn)xlg2F(aMjp=SaE@OD1w~N<5$5&~`(!_7$OsNx))V+{#j`hfu6h zu^Jh$`nstWqGqS+k}sI<}tDhg#;P+I0yR`=TX0Gsdy(oZ;Q(x(+Wdu#!_ZrysqHhcSQ^ zYoM9_fds)gnrQ=KpU%r3dK(!z;A-tA*D8Q6K=@R3b>3BwmPDi?(TTxjF^G32W-}mt zp6hmj;*w}{S39!0zG3i#V&Ba8aSbm8f)YVlpmhU|BIMoK67vXv2r4_`wACW20nO!P zv-y3u>R32E^K(|VeFy8NjnbQ3kgbF8_tnRd;>5?})7Jq~%>q&r5H)}9sp}4EQw+`Z z`Dz2?_RXb(KpJ`Z%&Mb-W7$=$&N?NCyJFw(Cy4w_K94A|K4)hEF{5jlbaocT&79~tcN|BTF+Dj7Pplts5%Ix0zBndW&i)sf z~<^PYB8dTY(6f3Cpx_j~dGSyBprR}^cs zE%wx@sj}HQ`c*Dg$d}uiXR(~qq>$1f( zS+r|)K~DSaO>@2%8s!x!?#^%>FGoY)l)PHn?cb&H{~H4Txb_5po;_$L>r3Ffgr}m! z-kM7!Z#?ac%WgLq9^LvpeVqoA#+RLD2w6`v{qU);%Fqdy(V3L~=RdY4{KBvX>Kz@! z&jZ&X5kI_VO?~s6oedOlz3VmX>)|AM^>f&M^R)ZiVR)BM+3R1Q<^M-0>wtBVV=$YR zH&KcUrzW??7SafxSF(XH+MWnz)a^$5R|pb>USjC`dPwdo=KtNA^4~CMRr*=#ZgBiv zt~}V)Ge42v#%B9vYb(d2?U=mnQ(pyWKl_goIN+F{NXC?-%vYU$Vm9_P>Nb)raLG zbGV5JgKsO`vY0wX24z$1STvYdBMsZ;Q=DA%An8{`wsxTb`1mRH7llw2qs*YPL>0MH zs=xm4=e{>ncl&0nV`q(t>e2rhJ6k_cd_QPjp-3cSg@X>mV`f4Ajp@QabfI7bv+Ip` z_Hto&IOsUbZ7}^kA0w@%bDOjb101>iA5UI`-@a|q!u@3E)Uh2!Q+Fu(z{!MP4nZ`@ z8*SUi0}ZyoCtmPpFGRK15Vx#)W6($Pnbk?HM8D2HoJ~_k_&yy!NuJ}p0`NUC6J{e( z{(KJw(3R2+D$qS&_0bksS2-w3k{xYBj%Fi@L?0+)4?f7Ptbl~$i@w&amFkRt@teuT zX|+b~w66qE3w6YmT=hcO&4_sZJ?TFX7$-?FdC)9_u9#V4e_M;d&%med{nX~N9Qn^m zkJ+&CBNzR5MFdyT`-F9^No+^VX~7{4voAH8pJ9j?eyspq$Qt1L)f21Hv4|y1&DF1J zztfx&a&zNPNfr691wyRa{$ad_(KZTsh}h2UYswz;^`Oa$>MS)y#dARgRG*y$HK#Lk zn4c0Gqe(rOh@7zt3f&7%BgP)%LG|xRZ0O`Aw=8mE1-26-fE5}g2|&O9{6^l=#Duvo zZ_jzbLw28`McSk!#dmy2lBgH95}9ELT!xtcZ57wfXN^AH;VG!7uiD2i)uED0 z+okZit0FMCvuY5Vp+1QS1m}CsG@1OqN{>dwQ^-E&;}bDXFb1AnP?j<11p95Qwl+Rl zf<~U)-<1zvb7|-$8n2rJ!5)z72@`!wKaTa>kw&}Q;7>wh5I>adT#uFu-n*Y_7;9SW zepJY{m^i6vFxcFU*zGvNbB}hypiba#8?>=Uw&g+ zBE#((u5TZn>^tE**Y_7p?044pyP2!!xRsih7vD`+qYLv9&aVR?bm9{F@{~Ma# zpZ#SuaklFz#YJkkvWH?lfr`YF|3|pc0UR1X^xXT-PegiYChoHQ>yZid=$DczzW?MV zb?4E^;JzQq;BgaW@CrF_eh#rpVIU8}kWSF=IWPa`Y0VFXPh`?t@Qqi(8T&6kYZB&v zEQtKDY8V!-PZS#7noV$iGz`Y6jwHmVL!8*e;xvmqu?C&cEZIyXVH(O%$jAN%(Cpt@ z%DNlydwOcThTHKS67Emayy2M+s)8R_=Pvhz=8SyaA}WzVV#l7Ma*ZAlcrPbQECSO_@uhN2=51Hp|AK?9n16hxnk=vAYi*iH-A@Mx&+2^rW z&FQ$+7K5<WAYmu%kdXFIu^NNNg2E25>GV#52w*iwVJI~I`jR) zMiaGuFiu!o6~2_xRCc=r->rWK`GUVe@uMpmy6x(h4${)|A&13mY_`@sdH}&4fgkKm z$s^?stqRcI!o`msJs|YE^A^}a|8xCdhLjt*U+AXfrSnRRO2EBXYsa0idipi84(n%k zQg+E`X12Jtp`4C=^yA)&4zey8LOz`xaEPKodt`;d3l#xHH>so|!v_)#A&>Qk?&+cs z4&g!t<@Tn?ebPtZUPMXT@p>SJDEdCFpHrtq<8>TL?P1kr&@ncNrw@D7%e!ic(x~0> zO8x~p>u)oH#2>4p{g1RRN%s0Ov@XP%>niQgkii1CqxK}8S3fQae^|mfjd2O5%UDJI z#wR@w@euckp`6+Fq(`c|NV8lqO;W|d#sen}UpqzlN^cV3`>KmVP?aM7_2Oa2Q_Q%Q z(IB`9uZ`UX19;$@a``4By$V$b)TJYQx>lApUb1riiG1ggkI2RH%255)PQPIJLY9~X z@ErhzZ-iFz$ac<4qqIGT$hGq5exWuMF#lq8U-YFx4cdSrK+Z_IsXo1(*6-R*jXWd9 z>-4cj%JJiru++@gCAqtekH8EmpI5V=nRF1UP0_3)i$tpsXi@m@j>no!U&;nh`9w`mQ#11@mUm$tp5t6@g#}JL zA?81s84p{J%WjukNGTCgC-UaC>$W4B${+iflWDdBm4%3N=~p6F>>^c?Xg3>xo+Blt ziM)Ev6r~BnK8Ds~gH~6=AG|+U^ZX{(5)Lj_g<4Itv^E`TW@UVf+hi$&|#+ zajqM>OI*qc+>;Y9jy3Te-gm^6B`1m0kNDzXG;jK@)c{Pk1A|;2`v>(B!cdwbCIXj*jZ>c5%Ss_Rt=gcz$Jw1C0wF@; ze5$j))U_F6&{*uF1G5TG$K4U;y)WeT$n{{?k1j@r zeius@u?tnm?=|ve(CrBHhk{j7KpE)y5gvV{y~rzFAmPr0-d%|F<@L=vt$-<|D2bY*D z*7-o|!FRTA4hf5JpiW}Xk2-2Ho#|W&44%jy>^t*vhnEcG*8Sz1j*(Cq97&tb#Bu`eXQWv|+R;66FJraoDB-GG_OBdZM=XzBu~E!-xIg zgjX6Z97TKUe+*2k`Li1tgxrL;r!@GK0mWBOPZTKLB!U|Z@5TF5V^rj5r z;5$nQ5-kFU#Gw&c#L65=csh>bxN1<9QN0#Z>s$*7{qVF~I^;O!oc>+hBh%~&zjxha z#=tlS8wl`tPUhSC(m(;dCA!L_D{v00SGhEtW47FIoIF7JrijDM(U76xH@+y14?qGe zj+o*1tm91RG5%wHHw!5Jf#D2C%=)681BKu+;-!Izp>IkP`JFR?bgw(Ll(b9N%8mcKW&OWO=w>zuaF*WEc(FW*F_)_%wGdxMBM?L<=>Y z`bwH+wh7A^+syES^&XdQyZA1~FNzgIsXKg4#0*m>F?TfZ$5N;xclzT?8#>|7vXL$$ z>MYlk*=C10oV*5=dTdwff}tY~)s>IUQgO(}j%InO`5Z#uC5i}?G| zy$IXzK-Yk`%)EQMv1Rmu#pbLsmBvv=!!uz(LoCj>kRSQBDYRXKCcf&Zm*&g3be#n| zegQ1V_WO~VI+sNY?dTZn(c)#7iS%d3Y8m}c?@8on!-0$r-S5oy zL|xksX#*Vt#;?pk;ILb2hifw_ss_`{lC5eb4S}}xK5c>94+lsq;OQHtcO2`Qt`Cs; zqPsqy>&=|}B2*!7?~QTX-rct)4Err@PY>0{So3l2>_2Ua;ZDqg#8<4z(JSjn#ge;b zz__4JYWvQ$$5ly91Jcolf;>T-9qR{+i9Vg_qGVMf$1+)(->qJ`;}wt0D*N)aasIr9 zQVs9IeKaM(xqfCOPs0R#RGYht?4Ra0`boZ7e@^nz=!-BSSjhb}%-UGLcq71QbNU=et6T}9yA`>LqYA0xq9G=+p;^MN+!sGe%c zH!!bpI1Z^TqM*;@G-zN{Ii?2jx=n z?d;~)IR*|(qDLW(*LsH%upNPuE~a<2TzN_JU&^=?de+T~w69WF66+Q@y}WfwAg^ceIOWHb_f`tHasE$?!+^nmDis!2Rn01s3zk$1@K z^InyjXb&|qsrR~wV1J~US^js$yD$KuO*jCP_?$S-#MX+UH(7MQps1na^_rMdSA1|f zM>ox2$@{SCGkichsoFOujT3a2<<9Tu4lU%k1^(Uazv{0>JP$IYFQnJ-YL1Kx^N-&$ zkr{*ltCQvg@i98*R1(_-9x^VVxnKN+wN$^d5|Kv5DBpE)RExSR^>FT0s@X+v8HSj@ zRAj!7#QuwHsN&6E4#8r@ciopxY3+ii2rpD@=xLgdkCa`F5D8m4>Lss{BJOBZnRkyCeuoWMdPwX%LS%_lhoAkQRBlm)fQ;-;FAw%?Y zv^hRSvMoGgArvEwhlDvc_(S2@8t23#lnOJzf5dO%*yGimfg&g_G{rZd2h_WsK&u$q z*Xu>(&Ubr^eh-|`*-jqbBs1R0#1U+L9>o1o#dG6XDlO)>^n^PFpi-E3Y;6`ZMC9#^ z(KM@TgBAM+LZ9e?MmuQ7HD{vCV#6zV4`6ppXbK}1=VXkRc3u7pT%i|MssSAKWle3W z9s6KqQoTqg&zrRTA2XH2kx-YK?xB(PvOc51#0?D~uqP28*dx4bt{b)D!krizbcp-F@GtH#A^U3~qZ2z9LYey7To5 zXUt8yjf9vZl$kh=?lbOTM%axqh)GWqg>_x4tqu53bC7U4>gHP9YYd5I`}MrYbb~lP zB~1F9o+RgYB4I~l{~Gr!<8YEaWVi~ZF2F&sdHBG2B-$#NrH&}W4TCDDR{bpg)j=-Z zBKP%>&jlsnS6cvsE(WaAB)9kPcJ`u9FOy0FGC3NF|%aQI*IB6 z5J&6IBuAXgOXVf^*luV1>aF*3=*XKY7To>vr!cY+Z z_H+~obzJObzs4Ae8u)Dc&>{O|v;F>>%<1Y05UUQxj&It4(%58OVUmUH-Ab5GqhDv>NuvfS zZ3wLd_@HI`%42Wt<#8m)PT^ZgU5>a2xjySATTHu>!K`}0zL$pU6c@71VRJS|V4_VXIv6w z&rXFtmHQPIa34^q)Lt!;KXdvyIQgH;za|a$SeD|+Xoj2m7d8hD^0@sKjxZL{9OoFv zdbfFrVZYlzBH zIM5P0)x(8v(LS!8qw*Ks0Q_GSSB>jynP= z(PGHJ_v)uyMUEra(z z!Wd1IZ!%nGlC}Pmk*45-M*<-C*~d3<^T1yyQ`z%x*Sh9#nsqHctL4;8`DMk2COY;6 zeSfYgNX|~nn?pf~U@8RE~&B6GYP&NY7F%AFI!G{1~+Q zA(xj0dZdyJ@r$M^d^~YyNiIET$k*p4)m&^yqSXFEp=_>bHX zq+V{(|7ZC{`j4Y`*e_Wl9YE_uUj{F~*k4~?Av1)h*RrC32=(wG_b7<#_ECznG2)w+ z03lAoxg-H3C|zg+%+RPf;~!4e-Iq;q{34+E8!Fi3C5wT`EBt*JK9*H%TMtXHx`b2> zr}|h=S$jLk+A^AeWyIrFl##Jy*v)1zSdsBoGBOl7L2Fgi`%TcKRvJ}Sb@ixZh>Kpf z9rWCBRPb`{RFR@CZ#sswg?)olx^(bmbOP-63I(RLM=qg{ zUbyJq_^io7=SWS||CI5BeYNjFBJrVbifbJx;Xc=i*Iq=zu!Y)p;tY&tzSKv!P)%vv zJ=ynJtmJTVl#=k+QZB}>#@B5+2%_U&Jm8RP=$3<6N1xZ=<{yU8+k7Ek5_&Z|m|?O1 z?!ZSctXB9(#oUo<4&7!(2$gBbL4E^AW0K)}bG(HNAHIcp0EXRS!^ek^W0_jjHQ8no zL$p1YzRPzn6-k-bn+TdMTbVakU6)%MlAof=D7bY30KU1nE2|HNpN^&ZsatXU+N#U} zr*Ampg}I~edQ8{>Wz5hHf_VFADdKoF==vZ(^|93tmNznL}o z?SU1XpbCppn=h76r+N=vHH_>Ef<1KHSK}rE=&KeoEDwXlGKDI3vV$2f>1UE+d7Ap0 zk*=2c@!DEmPRs_2Yn8_ew?1Bk`PTl)abUMO(h>bW&NK3e!qn4Ls+D=X z`aIs(6PZ^l@@VNd_(E-VOfP7>ZO&TiUlA*uTkr}7iknn~j5Vi2f9xue=L99MokMW4 zuWpROTr8b`3DTgF!D|)A@)!Qs75Rw_CGVG=E_>DDCQfMEs&wtX>}jt%fT~Vib~bet z-YQK)yppkauIq6Ndw`v7MO3jqTy%Z!zE#cE@rPmf72UDKd}NiQa-^bWan-o0xaiO) zyNr;E4G4P}ZRYO@?cn(_o4)_UCjOt$P}@A|Iz4Q>d#bsjb5zCId3usEZ8|3Y5Ok^g z@i)iwQ>3h~FDR*UKEFisRrx?|uZ%@u$ZV~RL-`Ud zJ_by3Y#DHyOi+6IQOTi;n6U}YRXvF>iK@eBUD^(giL-Wi(S?c5 zWrs%6ol2w6N$s(6v^WQ~SYH1%&nM%hKqWLRe=t37O=C;{W8hc$&j`S`t;BvWDx`C8 z?7c1ww5SbyLM^d>91W~4Q%Ngu!rbDhM)!ku$jE{LQ4{FEJvdioEARYb-3*I1Qn-^) z@P?3W(v+N-SInV(X{qeo@c35syYwxi`EV1qU73tObzuE! zf-he$(b|I)y~!69i#W#&V>T)HBs(PXkql5_8SR4phRnP6P^NKd1`w5!<;aO3tyvdUM`L_yz$Sc4wjZK`N>7S`?Fv4D}60FB@m^Z1J6D zx>EDA$?d$H^iiE0--2kPXL7RB5^HMJazLyWqn`c3K5_9zei|x<4;D1$OWQa>=k;x0 zoT4j{{G{SI*2Cs^$*@Q-E%T7pC3bL!%n!X~a=ykFl`XDTjU6^V&BCr0IX>K<6kT^} zAF1B{1nm^`SI6y#LnKA&_V`e_k6gpW?nL!Nr*=Mm&6mY?_%_SW>^LOmKrkFPx=+8k zP=`)gx0~kSbS3tL^d?eqykxhO-&Ecp8#Ugac6IgD*#oa=N&xM#_<+sj71EKnYfAf- zULz`fP26p3>&wbEhf%!oTa@lWwpqoVyiUbzhvUV7-kzk9(AiKYUFW^8^V1$8VdKlz zA$qoCmXGd{MjAH5Ap62)ib83N^y4{EjhCtx@jzH2u6Cm-H8Qlm>*o_O>lGn0@9KXs zw|vh>p%8O*Tvg?h5@f?Hv^QdZaHemt^b*`|3!a||HLI;1I0$_MOaZ4;eIIXOm8W`K z@?WfU^dF)z4Y)L7x$8r=1zJ%LaP`{Zu8V*x#%Btv;9qQ$@<%cpk;!Q`;7zhB>1zW@QJ7!o+pR7=3zJ zY{P|AQjIn6cp+f3cAc=no#LQ+Jo`WECx_-9hdCtu3JYa(*tZ&it zWqa|Y?ls&jv~G8y|YFszRx{;ET#V%Y1PQ4Tg)xP z+Pqdl+?-n#>fs~ehmWF}B9~7%3qbu&D`2N0Jw{B~n%tWf0Gux~@VH{F`Mi7h%TC#f zIDHz;1MN6Sk~jQ^$55a4JtX*t!0+uX&=G2#pCx;3u?uS2iqY33zGTeO0yNsUtt2X7 zlS#oOlY=^gU(!5>22f3b0WOxI&^5rxtZV$DfzwLS!zy9bDrU7>pqA( zAPv3EbhR5rxj}|&6onOY*IW}TGbw3p3+Kxhwzi7iOZ+cvaD(kdsfkI9wH530E-XY zOYgQDSjEb>!C1KkGo|w1XnY0v)-(y*NJ-|!$t>6{_2Ar65mN28(y&8q%V)0-|CL9j z26hut^Y)Kph!Ltq!W!e@VtltAcHvlyZaU%7jAW)M9_NwqOGisIv=9F@FRVxnDkN?F zSyCc|7jqz8m#Q%%28qq%Wp^w)2Q*_|`&bjxV7_IQ;}y2QR{_+vHhLk?wU%c6yw2(F zVXv{Pty0nIFnoc&8`m1K_v!<4wi?Ti(dHFBY1d_*unO)Tg!5E3usAZi?xQvz)ZhJ* zwI+UJvFiV@_mx3$ZdtntfnW&)cSwN7CAho0duW2YJBv@*#wLyDUvhLl~u;%rt!8FtHb zTgUyVQ|Pra$QNrhSkbs|7}rHxlTb!wXb{`$wCnPKC?=qp)7*&GhD5aV(_f6+FXCS= zn(-rOJS96zk$8d&;G$o^_r1SWrMNMkXED_3tTj?T(x1&s;T3Ct@666Ft#XiYM>}jM zAXLG}w^}iq{*8UVFh&u@rmKq**s};3X^TIEmD-&v#}Qw3;#&}<6QcwPM=N5B7O6+sL!eUR*1y)e00A7vvfubvzx&6}VFpIkI`jOeV{KWHXKub)TB{ z9~*@@&7@?*r9>@P-Mcy!VyPbMQ@?z-+@;RvloYZLB7MS&RU{0b^^CPd>M;$GwY(jg z+aB`TG_;-cdgA(w-)0>Vn9yScCt|BT}h&F8x*}*TBS66>aOw3CWfOFD;`LOr&x`;_~ga$NyhvT zy;DCdq^c$VT_s?+%a{PZ=44!?<|GCk%|O;=XK=!lB@@kedipgMmg`kLhvC*5!|0$B zHM1?a6qg13iP>S7BqQEycnLfZSQctr%g``{{)O>-;)?)Jo)vGioRAL_H#v1=kT{Oh zsRyjNeRW<@s)DT`);Y&lI`S35#sbvbO#&P8R7tuGayz z5`1%y*TTG4HAMO*fYyx#)rg0Ah6rSOP!VIddHy_Pz4Y7FQi+{YigPcqMSZbMUFY6_0~S8$1e|X^4np%G(x&H1k?2qf^Ze}Y-{}%Nx zT!|0mN^vrk`cWvNX+LGLy`+R9+8PSO!7n1Npu-1VOzdK<19DcIc}%H_kO*`QEkz^Q zdy470-0n1EvyH%hWv$UVinUS5DN!Ol zn~8nfsproXBgZ1*qBF1{j{1d_T~SkNU&p^g(>8qQkf0M5Z9))#a?x`mWj3ze?w?Op zSW~lQOeDUJ$QC3?5R0FqR7*wrDZp23;0Z|DW)b?vLHJ>|>~JC7RdI-q4^hIwQL%D6 z8L;a(RN%!~(ua6g$-1eZ5m789HEo<0dfZF1a}hUjJltKRB*R*?L|VsLsy29^g@5th zS{~gZzscv;V0bAQu2G>E67*wHmH6I`{Fev`nr;T=Ujj_8f7>K4RK*ye>r5Jzg%>IW zwMh(an}!`q!x5TlhhF^pD7NcN8_JI>HJ&*}L+Ov$c&ItAi9RWj zv|EXNe<@>)XM-+YBHQ>mb+n(xot<$p`I^+1SGknf8D)Yn^90x%Q5`lRx`x$|{GL%@ z4JM@O7TRn-1&e3)XUhcBt*M%&ROg&JJYO#p9kCB%B8^gwa1Mkb&iG?1FjNDViXA@8MDwb|=`DM{2bFGIFs&INhR9ne^g+EZ!E}LxjHN@{op^60R~VZ=@ML1G)eK zCor)8d>PWed|qRLNt?Y2?xu3P>sF{^ehPaoqw5_;iX<{s!SCO2kWh^3?z3R@ko}f4iZm z8Y-vf!I@^pg3#4^6GArs=3fJpf1c>@4fycGUDxM(ieHkzgDCAQVW{c~5)PXDIq<)n zdmw)U-Lm)H)0nLf>y+Af?_daVUu{UT`~Ryh{yDSv`oGnZq@rML8&zO@=*zpcAGq%% zYJP8r|LG6v*IYC|*|EVf`rL-9zYc(kJ~EhJ?|i8X`QO0nz3*oXT!&Lv@mCy9-duBUuiOuy)UxEux~c~Y z{Q%ahOH5F<&Hj5Yfd3h4zx;kY7gh1^1fc0R+fSs2BpE*UzmUL7bOd%^nZ_tq`~if@ zk|R?jpxUB99m8knZ@~>`TcP|fthR>u`&Hrp0A1)e{v!`&L22Mmic&m9`i~SP2qn!w z*&}NW>p!yxl+XU;TER}*|ID>es`)1x^wapyGzcZ;e_|-O%Kywzzd7?yK1TxnXFmUJ zVf;z8n5{7Xk!pVnB!uE26b%0>kiS`@|J9KH1?0Nk{r>=2Su}`1M(;cP zm3hdAcHRfJFnMBB1pU9+<$hA{i?yB87T2sUtsS{+_-~p&v&WYIC69{!-j_!Di=PIT za|DgP zZVwwPIIiI?-V!@nj4^PHHCsdyo&F<~B<$4iNs4Oxpm&8OOlH*b-+B(lqR`Rf`O@te zP`M?re8yIQ;r1QEZUx@1THb7FmV zrQ&uclp3tT#7$y)BUx#{BKf`=LGcvbC-VVKtR?CLRGei^$0woFFXy!XjeA!F3xzhC zmmbt5(1awC*|0E7K0tBaZ+7fSpTR331hg7IV>&DW4XxBef0X%A-~M*%ezqjh-|X$b z(F548FQDYenecZXG3y+zQs}i5Sr_el+278kZx88bM7~glB^^S3ymJo&=NAqCZX?k6 zzJRNg{sIi6p>|7v!LR))XpX4byjCCJr(y2Z?+F#Cn!n)qC~x;yQJiCX6wKBV@s2#K zzsk9j;c{c@=tppLHq~02bEiL7YX!52VrG@Z8z}V7dqeb;M#u}EHt{O_mKM6 zgQA&KD3Y$fm-z$Kq+wpRfv!jp2YL-j)%agWK&}`A6`C6P(ZsA+XX)kPw>Vg7*{lh; z%8q0|jjJtgKtnp-Z}&Hg=3P}l#vilET!Dt$X)a|J{_69-v-*RE^#0-J|HCd&G>4_j z4)#lG%KPwtGa>C^gvu{Y0I2GtCqgARd4(OU5X{^~tKad)it{h>6%0hnT2s?6QYa=x z0u}~DCrf1+!P4lR-Y4kWU0r{gs4~hhw15h^L>Ribxa(NZo(R`tB^&q)|1J>W3;4rU z`?s?B;+GV&?j**i>BrdkF zpOfr_&IkG5&8PSGeC*2<@+qM65sr43$_jvmirJ$Vz11S=rvt@r)%GnB$}*c}>i(K{ z9m*G_MaYh_6C0$KYdV4S7iIE&kMSG+Pn7>|Q~$$a^_3=r?t9Eu*>z~}F5g*9UG^{W zjIV-!X!rb|+RfnB_E+&laxnehiAVQTTNHZanD}p!`MvG`CK;j5(vmBs1(ClD>oW-4!~0!}%bkSARjD#>*;t;Ntd+rbf5(oX9u8&zksP|PYhsV4rb40`_9SM7y= z*ZCr+&Vy&QjJ1S`jn_*v_vm?3*1jgw7P!Rp57qC2!)vN;P7B>!K(A-ud&ahBBCs*4 zD!LgJCrj4)`P|&DmNhNBo_6$x{}iPf!qnh`-^B$_=o<@x#LQg9GZXoEL10 z9a>(H_RqS%K+_b8UxU*QNwYkhS*^4=ymg|D^taEu2g))6ykCt9cAuj9-G+yxGovm( zJHN9;RTGIW)qbC!f0s({(mo^;(?N+*Sve|Eu3pcyJA|EJDoJfIDiH7Qw<$Ox?yOX5 za-|&q?hBoSLyY5Po?uaAP-Ka>5AusMgVZWhFi7)pPpIz51{_tX;IxAS#KLG8**?=Hej=nlGV)t(~2Ij@m`#zHa1Uj@_t zQ0shs^DCrus$+k+W$|oKx+Vs<*qryN^PAQC=Z(DNFN1G<3k&(G-5pR1JUMQ>xU4kkLPz6&3bYNWEth6RIXFVUvKp;MpQt~U6pN$}h zN=a(6j6&8LxOdmXt6AyCi5@U;oMc1PeyY@huwL?aV0+wHMZ`A%I{+7_`+0o0&ONN}$?EBY()Icu-d&v78+kXA5OnZ_ zYMV}porL4#WN!WxD*F@{{g5|lFSR;jMuqmP@RDXOdi};|)0sL???znnK9w2$ApT=B zGb+t3--cQtKK?8p(OaD<@szEu)QmFw@n6tbRzZ%L21|R8Wx5@b>&8e-n~DHh9PFXbyYPM z#rN%%bj)Rq22dI*(Ok(dkA%|WAQK5vTrSnzNl6w+Hlb_NoITD^Kqt z`jVEEHplU0+;30W?k)pwJfD6hRA4@6D*Yxm%)N=}+{OaMaBcd86iu=(K1MKyZIVezLmx(zSe^fw=)O`zHmBot=+}G-j&$C1 zjL)?;w*L0%*7|gg7dLYaC*&55CH~<&EpX+$Mh`Zq$L};gL^Ui}NmHnbTgM;(T|?Fq zJ|MmhUAvOTLj*RYD_OELPL&328)}DVnO%g%@9Fgdc`P3`iITwq#aJF1paL@gzNP{aHtsf&bJw8?!a- zm<*LN8urfUH)9Ov6Dg>jD)X$-6Rf`S(KL3+LUUZ{d~D-Q{5bUVjt{gF=`UUbt`^O> zKmJkJPJ5&S&1X&g>{qKM3f-oUa8uamRl3YLVS*%h8igvddphalvC;JDjY zHR@qe7#o=DTg0@KH8@d{i{*Oti5;YlXpg$dLPMjnRRy(`8{&$!;x#CVZ=+`ZgLI*G z_Vjk|%h!WQ$y7EDI5V!tn-Q3t)$oj-To?-8jVy=b>o~H#e5Otgd9(w1{>PRD!IP)& zMY9I^x~A~f6B_m_dX~FO^G|O>XE(wIbbIBd-1?d-|QCQ0k0vk%cSH9&AuoL&SsnOTXDNCiF9x z7~)OXo@LsS$anBvxe-4Cfr_1A&SuTMbnwZkaI5Pol5hImFuA_A7la8RRm1 zuY?#3kbWaT+`H19xze0-g4^M z91LUCXvr2m`dxQOk_r5k1>Rw`S9h_EBAzFdo@4DiXeUc9QV}zCO2>^985Dio)1s%) z&q8edR%6!aMM3&9{PIDgm>2EPQ9tPxwu!#ZD5cx_MW7~$JR!HDp9W53@!`7j;!w+% z+LNxfaz@eAl*sIc8R~cklpUYId_8{6*0;y!x|zY5Ak^0!EkEcuddd}ZE>O07!ml8! zy>*p*Ij8$?f3OhUhxiQ7`YtsFWPUv4#Cp|ZJ0jnxiKVYGiqG4$S3Kad?~=xZKbAKC zipXM+JfaSnviC#`a@N@x_wlGIq-2>*N$tp-g15CbAIF>sRUtX7^QzoAujX_pNTRCz z=*aE+5x`7Og(6rrRe`v-;*iZv@_@E`EHglVggfxj<|EfRUV(L*mZwh74oe(-7HAo} z2yG)YPK;CYTjdQ$ z0D;R`Wo5Bh8C8~f9)yyE=y%4pk9P^vj496HDGhIH4#~Dqt`s&p9_VWy#xPkV2zlHIdjzhsu=ww+K8SF=j!PLi zShP-p*p9v|ZfTxcR^8{PX14`nrEb3_E3;t7T1_Kau;U(6N!{hzK zWlv*hw%$n4%+(u%wX6_V9MrprV|XXHkKNE$T5#K_M0CLeq9T|bW`~HFYQ>)Eb;Rgp zbPdOK?3ZS48{Fv6m&zs6ZA!H`;!9Mo@=?6F1EGAbJ*1tj+TJ_oIb<~lISvw(FISHx zZFwWkK32U4F}fVzj^gGL)JX{i*XFD)A~|dWb>4$0)G8ToXirWO62v=JgT|&~Ky|ZK z)N;do-;pz)6%5)VLH}BFzkaL z7i)FIh0-&V0~uY0fFNaC`uWNVwbW*_JuaXHT$(b;IK+c(k0f%urnrxX2YIn^n@pla zWi!h38Cv|zTyV7Fjhh^-MnJA0vFC^%;MvsZb5@Q&w>6!G&G6NjFI}cBC8@0AE_kc` zx_<7x9>AHz#?2(13Uu29@u5Brby!__Og3&g#F_g}u}}X%6`BhLw#bcEbG23LBE>%*ut$1;E4Dxf!kqe5EWyi zCnK;z(?PQ3D(QVSIeurL71g8-eH>r@!gU(;yoKhnMKrR*l!Mi0b8j#HWaIV`^Sau* zV-~E`UwGb9+v6qv`~>wk|hE!Ey*SB8l8onC`pf5`*cmogDvM$?=W9mdCfi?Vy}5)f>xsI zD7%ggOUKiJ6(fOZX-E?!L-vRR#Cjy~vxsQ#U=M%X(zN!ju<9!Ng#% zp;{6=RnY{;82SxzvrhhLnsb4fAyBgeqO{bYf+Jjao70Bzg4anXxCme=4N(H|pME9$OGc#?@J`n(qGGH$`|> z=?L(fts@R+%8D{hiofY5cCt0NAqf_vblGT7yZL+z2)9?Wri~^GlzlqtRC#Vf9@~{+ z42)fxv8S-Hq&6s{4S!;~*|k_|wkePaI10^gI}xhcMqaOnR$`af%~z@1FMKYoWW$Th zPk%k{ekna=zmCo8ScPcA(`|BaTcNqP&2wWhOAte9IVRy1y7q9Aj=GjnNw3dkF~r>R zM-*c!k3&)2562Fn7INMD@blZnZB0F{TjK3g50Llgm4>Fl)C}gu3J9Jr6^~?C01i~W z+qt?qzKs@*`hvc)yK9GXbAzRhpFHO%#D=U&%E9%!DOzGg*e3>aoj!y&$<*8R@m|QQ zP4iDtmeU@@G&&Q#i2&k;ybyXHmwD2_EgRja)#qdWvYGw`#q|*nxgR-r9J%G!z4UYU zq|Ga=GRjU}hj-moc86Z#I)v9wkMxNnME+Y;7=clB*{Fbq&A+*sF_0U^%C6n0^%%vT z{jP5<_Y*7!tv5uIoSt5X)3 z>%kFIMDJx6I$UM5f^zvZ%-i9)e;4$%h<|>G{)4n=yi^TieE*|ne&ZsRjFnRHQ4&45 z*44M!!;5iPip7l4nwte}%M&)aQ+xnFJ^(fNiv8N9_CQr4j=_@ZDg0VzELq(v%sM(@ zhf{rYF7&Boc+6Zx+h8JrVO%+JJdKwOr&h%AKwbWdzqoPE*)UHRa1h>m_a2qjpgPT6J%-xB{V{w9$6WBaB${sEubx5AbV9PQ_^@3jEsCr3fjcO(23 z+bSn}v+P3qY`r%cd~NohCLsAnNUNwSK^rkIo7_JCI=8DH@lN%fHslEu;6X*IuChht z)n8f++Bl+*(|_KtRVMDrO}{sYxyNl^#WooKN$jS&ETC+pH`ZI>#TRZa((@>`h+4KI zBJ#s7K86MH2s~_&KLq{M*E_y1+!jjRiMU)}CDXPU6JdKAi#hYr*t$hUGo6jH z4Id@;Yt|+$uk$iee{LSQTceuzWGGkSX*wyv+Bf9Bn)PfN^GIoEB-8Xfzslz2sq0}a zH|#?ukTYg7;0@|-ChTv8NBJz=<5hw`GxV6_C^IV%k9yR9$Tc5jCZDRyR+i>~%`lT} z_SnIC)03uPyyPX{-sl4OrF4-p=XPYid;h8Z>`pa_HG=SB#G-3w`>E+XmSh~{zV(A` zx2t&odc~l~d({rD_6tJ)$S1N664Kk|MRTti&V?HSyi^V_pQmqgaMA{U${k<9*l^^o zWr=kE2|>9HnDCL0SKMN6f+23(Xcot8v$cuv0#2`g!4(>I8YxjcCNXz(U|POUFz-87s7d;bCu0x1o?Mr%w!n?7x;Q1ZTk?B)lvHm+1@Cm44L?Rb3^d~s zE!iR13J;^)tSgVhi95C@c8Z zwjp=<7SRr_B{BCP3vx+lM&XXK@7@#SY~T-|+>253=nL`evQ0m&J>N55?k}_m7C9YS z_CrgGoDlt9usc!D#cr)yo0P3F3!g7HYu>PJFn9}e`<##-Wrudy=*!uMN#-_`_5p#Wko|6B#m^^(q z*!CPJd%N_(Pa}vWLG;}qqJ$Uo-Nl^y?$SW|&pY>cw9~9dFW`22i3I^QVri1 zi?@}GMKPGqIly^SFo?F*K&is)%z&24pA^Zjb!Q|Ky)5XgYHDjGyb z@hTgf?X1Ec2{?TYh|x4J>Uv^=9~m%P`sQ5{S{WCt)-=^8=v}6T66e%qbP8xE@U_q{ zMjLQv#xeaG!8g^V2aq`}X6qSd7aKizOUeanhEN4%2&>TjXVAk$SR^mq$fL3};wmbc zOvL@qkOZe4fs^hZkl|H?=+<^@kupeC!dmL~?V#WVu^F;*pyqihvD@cda)(v_w&PlL ztr^geZMDZ4?l)D@DSVoo#OB9KpEm@BH;p6@L(5-2Bt`Z%DDpQJ)gLpGeZ1F7Q0l3Q zBn=Yd&C^%I^|--*ChN18#5CuTz%+NiK}$-Ail!B->(KOOE>3z%XB+D|n_cEdgS&pz zT3l6Zr;Rh7S9r|8WdgSJ1FauiHPLuG3`?_Dw ze6v+ISVc}tXzsXqZ03j$=GubSyLHB_R9FmO2X#^MqR4j-!7pwaG#2f~D3eHBG6|$) zl09s+M@zF_+Jy#r>y}z7=1!h^G*BFCci?Ogk>%bk6m_@u*N01-HY*(0lm6I0QWbMs z7FhCD9A9WLZQu~CjD7ZYxF0%?B-IE#|2srTj+`M*bgK76l6_7GJ@0( zu<07X6kk7#b8D;|M6|}_ud|ap$ezC%jARtO@rbOC&3_BZ>{+wU-ZKJF>{(xBb39nY2{MC$M@|D@HoX>5h$QY>;H-8J@=yvr&Uu^I#M}h11)& zWSC4PX%S&kgZT|o@12C;uNN55Y33!72pLEKys%U)4NsX~0t}b}+}v-c$nx!J%7Yu% z)D+_byC(N@+Jn>8wP@rj@6Y*I5I}pQ>~YoY-GFO>(?}Sk>Wfo(!iVbDu?LjU z+lREbOI(A;^2b_fyeLxE)ZWWlsF!~5(!C#?nf^vfx?&d%06T2AIB>3be zw=Hmn>%aaCdO|JQn?{vC#=OQLg*9R^oqytcx?;-xfrNGpp1EYEAK6e$^h;v4)Ai7c z02(2w^GUqv^%V;j!B95bLOfjM&iDa*w9|{wF(?QW6`=ZAY<|SV7tkO9` zI@(2MIm)D%k_A&dfNm3Gd|FTog8Q35a{}GAs%~NgxaTmLYrQaa8RkdxJo;m8H*0;S zsNo2~Boh5}tYt9D=au2IQ;f4*qgam$HmReO(>=O%&&t*Dg-^s(koU|_53i09LMhHf z-zV3T^#9s%4ZO~eMNBR}8rNE0C=I30PHIUv_rfQ2R+KTk`{AioVbyg=Hh_@tWo*Fn z21!o!AkO$YAe#brfP8C$M<+|KKS=!8^UA4SbZ~^7e=yeBaa4?_5@K4!uqkJlqwsmp zxzveqs0|ot$kplALt$#tPnlY%5~8d`ju+S7Pb7UC zhuGsq+QY_6?WZ+|gU^%fwwO4BUM`=eNV_T4YLVg1?ZCen?Na$oVaFEJmjOWjbI;1E zV^?hMYFGA3N{P0S`hq#@&bXxWpUNBX-nl@!hGnSLd zJYrG84nC!c82QKpXq>-OijbD1qaIo#3;ZMLWkYegY&SUF}Cz5AACS>ytzAt-2 z{O*yl_Y)Es`gGkz$0B>^0E_ve3Q%<1e%J+;wNQOhKUopyK~ z90W;e5Rq-q_Z?b5M4_^zfcl%dcr%9SCM?>>juoG@0&W3w!YiiLqaC-C+(jHI)k%qh zj){ERypr~8pagk|eV^}XH_^2w-K;@mOiHA`tq%9>xTaZc`lXOU>*?4s%QZF!@L zH=IH9lE6fSpV*tYqUO8Y#cz)v90izvnHUQ1HmX@<{yJa#avtN6uyqr6;jO3M!Lxb5 z;?hF2YHu!7?CpwYy+b1KM~cN(>F;$7zf&wtq;QO9^CT^dU8k?*&&azH1j0gMI!G34 zp8er!P-pK$U|&@|lvG!Q^kV~NsP^6tdjp6vK;hTY>^1P%@ zS#vtvlKWADCjgk2(s|lX}|k^++jN(D)wW3?VTOc*^G~E&z4s3Vc|+!6D(WN2JsrK-~`iO$>`)vhJ7QYInNqq3*7ejDVEt=TM*&@;K7 z`$dwFHJj3uCEe#E!lnB_>W?1t#Tmq)1LQr>YRbl^fR6k(=@Rzfkl!(^CuH`Xi|avYshA8}gL{$K08Yt;1+7 zH%o}B>QCq3f(_fl+^!k?48{R`!NO zY3uRdehB*H-bfuJ7HXoM&JVLGe6YX#j5_F3vBikmDU&)S0%p>D9JNhfeO2u=Tl~caj)P^Fmi~0*>(pXd0&1sjg*l zTHxfoJ7Ag?a=xQ_^z&%3){~|p<1%H+i7V0iF8mOtsLjDue_r~k$5YO!X+>J?7#FR$`6cnv;!zi(Vj^I|y<*n3Le>ZJWGvhmOI6ho0sWG9IFUItxJ4dXWr zFx;7XsFo^YcJUE$@J#Lxs4+03*(Xf5ou3@>0Lfp6Vi8BQqtfn;pdH2@>UfpPDh@FC zj|(?-XVc@jqPocVMN=bc(HCZ?mAxa4sXKnn05+F_*8^NkyL@-5Ay|_xhNr}XGzxAc zaa7EY2_2iD2g}mKHgZUeu480~#l2XiVPxovNt=zUYBY_w&%!*&U|21g_msu!;iaSS zum|&U0=aV1$E=(4QA+9MzVSgAqMTftYvM>a+^@Kljlz-Vzr@ObkE zt;Hz0ccX_v%`)EGqT!HpW@Hfn)=Rfny_jA1i_PxvMN!@bv27ZnjNf)LGp8U3Rl^J;NQDRL0P8 z6R!uH^@Zw=Or=}5#)}ze6dJS2?E>X*xT@<5_hE&9MHAl5yd>1xfq|Jsi3}X zC%dz!UE#oGca=7NEu#oo9C!my7-P0C<-%Hj!)cED>lWiTb^{ZMrUMTZi#_;xN;u&! zwm87B;U}`|rSyO@QCzRR*=q+m9q;+RN< za4(O*bGy;78)!dJfwz434KeUu!Z^8e??y{>IY`Tm^kio{%7|iuSNLoDZQIhBApTD} z3=diU=E=uokDcgXo09_T|o_UbRIE|#HF-WcfMBW?BGfcDCH$g{2&-GU~AXtZB{eJm#Q^Ot7FCb&nb3VhL) zac_906#X&<7od=Qq#KedFbfNy{vftr1zdC#5f0>DxA9L~Xa~UrJt@eL?F6}^bxP`? zoZh-D-9P13Y37B6J;efYraEZ1eK%g8HvlVF2jb~qhUxMGGO4h9cUa_3!>Y!e`Bcv) zp&cjn%*O2+Dr3*AY&9^`=LA#_H74usX&;$2Fk?rx6>zsX<$9=em%w}v_zXIYiZaVm z3-=*2>Yq=Z?shx0ZjwT>H1?E09w4!@qulWNd$w}?B`V=*N$Wd7shi@De0dWgYk=(o zG++~#!H&nWd~Mk=b?xb4vdgNH>8uYILgO)WW$-hN(19@hf)c~rL;kaq3sO|j=Mo=y zVYp=%rPqBN^%((2H4K_#1wS!6>;J(2V1H_3U_8X%n|vFJ^StFvKI>#qFnHp@uHs^ z1Y%RMCV^TemH~7!O;Tm%>;~<;QmZ4~6>T+R;L0jV-5AQJ*})8k-gqhn!jpcxx?0LiRsz65 z9(Z5B>x)SYLD#`Hj%yi9XiZ*r(#Op@uBdGc;|{4CH&C z~`6E^8je~G);?R!+WFUx;ZUFB?>crXlLu+B@(v0snzBUn^}aVlhMXxqVpU?i7df* zQFcolZE%cjZ#$CUKZ_rr{s?$eo!i8%KfdtBDdq*<0N)wv`MVKjE<(=7Ot)4a0H;c z_xVdS#?DFN)pRA%p*M!}^$_wL=EWx1w5N1u(LG#uRvl;ineP-nb=9gQIOX6KXm{v6 z{a}8Iy&K||H_OH>ck(b zdL)lH!rNC9jT=3pDQNc13Hz1$D3`|oP0F%P%r3dy>Ae=CT*x_t=@<0Yg@r3DLft?c57%BG)z*q{&bYLg|0S}If27Q)I0nPq}l9)H0*hZEM!A;IX?~Lr%nP6 z2nw^j()(&!_4v$l=>TpX7*6J~5%+~!ccN+Y4*o>z+YRI6-n7wl)AoC-LQ|Uvw*VO3%KmY_ zr4v?SY0dq-E3Uy>m4{MN@h(OHVTM`6vD_hl;goo)IU^l5al;y5mLH7(B~S)|p&t{nYhSMV{WZtEsB?c%!Bdrwrdm(~hm|J(IUQADWuIO1__jrSDHg zNBN}NVk$o#hT-)ezw4BFBpCz0YY{yy2}y#2+HMU_t13xdCQ=8X%*+!U(Ns^u_0aM; zDFZQ=%qS+yhx~AXnZLmrMgPVphcSyV?XbP{#+B~ok<_#43jet{XTj@J2k%ZNp+W4{ z!$Gc6x=p^QYp#i4{`eIX4f1){8cLZC$R{;cv9S-c`kznqvLqVi%?cpO8F?fhAVpN2 zv{&=yNymwDieqD5ab}Q1iy1Imc=<+djUof3}EvH3KXA zR9J)d;7Do~1hf!%(=U@Tv*Ovqohz4Ph9Ru3rA~99TdRWcD$Sf!AaD`d>GVN?%5vMx zWiL*0oFbF?8KB_Pk!=!Hmvp$vMxFoK!?iM;ST_Yv2Gv3%m!z*u2Lt|6IcIA!jx#!! zJ|9OKu>BUh(=w1n<#zH@av333SX#&^PvUdkK@H>FLTKbT%_si@!F$!287r-*T1f`Z zNBj{+7ep7nW`Qe2iIYL&+JF&srU{Wy!I_!}m(C0cXo#S8zF}|U;@3|8=#_GzhtwiZ zszg*rDi+e!M9*66jaa#NZQak+ee}>|LNxn#^2$~|wMjK^7+rBk%}C}T9Oh3`Lvkc5 z%8A^nGI-5;DI@j~$ocJh()IeCv>Da2)&=)v%fVO1ZyV`jAGsp8ZMTU#gpSYFL$Iw| zXA@cL7PFA7Ee>6aqq-Q~pZ3;V4pyjRrYr%m3*To4PC}*7jSIX>2Ka%QV16J%xb@@N zbr7g3Pe*S!30%xL$XBG|WtAcTLzc0^Ai%h>h|FRdi3PX!iIl@Cbw}to!i|&OyXC#9 zhD7mW&#h`zO1bhxKM}p+(?$m(CR0LL$uopl#b$lp5$;lWZH%WSsRrD-XvX=Tw0s}kTb9%lEEmKWv{S-Lx}crh-^VLVH1-wUQwCfZ}v2kH|)&R zDNNyqW9Y|5XZP2ey5K9Zk6(?6Z-~*>Ek{%fC9w*>1r%(G;T{mEEQJ?wIFW9bok2!~&{yj49Do(x)t7YH|_a+Y}#ehEE zd09{V^JT3Ci~}>fbbyAN&6HN%nUoJY_X2sCG%oO3D%A)FRsKC4GrOjHWIQV zgB0yq6edhwRWO}Z{8r>LrSmHoX4WG6!)kMnk;>3;mZl1Yt(bLkv z&tf~pv)LyK#%g)Gj0Iy_#Yej@DE&7c41U!^9X0U_$M5YweTnea%VyI@?+F))Y4cmb z6S3wsbz4r(G<2WL4e}CM_p={>!}pFYcI4`V_c6I zFiGpTsB>Dg_>Q6Bn?GjgskV{0n^Xzkb7H;=Wl_FMWn7ye32fq0a488k93A-=t?Ohk zELen91hkIWwA@1gu#SEP8Yp41{Pw?=IdY;UhOk>sb`52W{g94dhvMibh8l5_t&Nbt7C$O9VMrYTx!mAoitu+>Xa zKMkuCaH|(e4~J@9+pP0)@Ob|bZU>LdPg6P_X_v}chqHfYRy_KgtERWo&@hu0ET&wG zd7L|cm(I~ii!;qed_M7gNu-OWA$=N#ZqHVGL$I5RWhA@da~)+KgihjJA+|{MLHgQ# z9H&qPk!)WD4M@U|XCu7;VBEadgv17_fPE$SHW@we8`);bJTBiXREyg2Jr7#DO{COD z)V>Yc#gx#gPw5JN#r>p3x|9aq2xRmio@-}j=r=b^5k=brBQCU~`xaKx=13s|CvZrC z)+9ZNQ38z$psEH&abmH?PWc1bO$$pZttnO;Y?o@L=#>Qr>!Ojfi;&$Gi_p$!XjEsT z9jd84tkd#e8#1?!=M=Mn(oMpnI%4uD|&uBt?1a zq(iKCM)5iWEm)IL1`egkD!vImS#|MZ2A=7@WVEK_o9Mxe?U$gy)HASm@6md`KxYM8 zR-WH1^&%Rzo7N2G7ID{#^(c|aoRH1j{kj{)g}%Iwg;zl@Wh6JTD}!$mVHM@=Kmcyc zQ6Sx~2uidTQ6gZX^lUwfpqg;*vJ@6qKtE{aN6T06n2$3ar+6b5vm0J6QT?md)98@1 zm~W6|$Ed2Wr{!2#c4{vYmE=HXhzqXF4t>^_c(72xLTm>Q>{DIA^S0`a2XAgC_pTS3 zmfb(SFdk3zo#G6Hqb}4vR;WIzcHD)e{Gax|GAycf4Ot>fkw$t*=^S#%VZNpN>~r?^eCJ%>_y6$Yy;xk&de;;8dq2-w zym8p@nIR#<%F3WJ@lydyv_s#Jx@?!1Xv%>iHY|at#|D_BXeU@OIC2oX+YyoV#&pgg z$$)lQppH5xBRQ};z{avp5aP?5*S!%tBkw@urG;5hq$0PuATD#L=R(PiF&=zdXrxi` zXpU=B21_0mVS2^~kIwQtbGj0}L;Xg#73YJ|!pv=u5a=;MKqqALDsID_^1Rgw%$HGX zXw^Gw!_hJ6gGb3znCe*ZUhuc{3|wYD+gS*vWg!ScaUOan=}1G&Nt-GlyHfv>v=B3N z$GxXfPmV^C5#o!l3GwapSYN~pvu_W=?6$MiaF)y4ZqU$>V$EE z&1#6zZaLG$bBeSSRt9vkh~?;^3x5KdnrcimfQQ_9}4a2(STju{rFrAzU#etX7ms@tiaLvR~g){APRi|h%(}km$xqEj8ETJ!_yx!^ZrDgZmcu1-! zbwt=?CYL*xdLuM#FX2~IBstnPrySCtBW~rr&Yv+|4^$dq<^bM*zMUwxF)=g20?jf8b{PTD*t*uXO}X^P>awo@aXZDo9>T+pGBCVT3WOC`su3 z#cBtOS8HS1j8f=(-v>s%WSU@F-xt!NIL{V(5^H7SnqLhjxc647vHp;pN%bH9RD*;P z!nqU9(nmL*X>v$nUIpsO@0*-#dD-0%ch!+Ip_zI2t}F7ZJQUE5)mQg&AL-|yX+rOz?d7Sqp(4F~=cJIEkw12C zSQ)*(V=!7DyTU=_CrHqGUnMHEVrLDYmeq}rHNq9mU{_I}NPTyaWA;a?&X1*NXOOm|kUBO)mKWF8S24FMPV2Gg|`%pB~Fz1Dtmp8(eZG4d6uyg+)}8`-oQ z(3C*(-M8#Zne9S9atj$+A^royglR1=%C;n*tx@CYmgpC*zUq6H;jE;1Zaqn(C#@lO z3sisutk-c}tz)7NKiy0W`-lw-_0glQhU#f@Yj-7^erIjo@)l0bCE1Ems(!L^Uw@z* zm|qnjdoOL`J#IaIE3wiBmxpl2J)81Az4C)tdzFEpPw5QihcW2|^Yh%c=jwunVvXz~ zE^(UbVYZZv=b>W8TO3)-UuWN@i+53~L?u`RB+|at77@+51$t=%ZBOgT=xk9Mpy+z7 zo;+_RZinp8;|aOP^EJ8G>IjW6JR*@^5$yQCZZ@frWFuJiS912x}I0lvR=de8edb9=?z<7qr zO&zht8ZK-s({GqAbL-h#2i8r;t$xXTScPKxj?RA+)ZrgfF1pAuq@lIOvt=kB6D^ExpwWN98kp7Ls+Q{W>e&Vnq^Hz%W>DTyY z1MM9Y#wa_#RDSU*oBL+4pMk3#OK~KyOS70OjLKQW>h*i z1RQcwbw+b&dp~5q`YR|#d1)hHPIBaqhFHh8L@SmN<6)#Ta4|TB_nF? ziojr91u<_=Zls6zTy~_w&ET22qq3IEesDOdqD~E!ocpz|lXs105F-|lzwQlC^ zAT@*Ut$H)jY}5n}L1aaeMMI*JZ~_wRVk2kGL?7@;$o?dmxV`C>qREdHBFz$Gxz!Q; zpFUaeXRLW`pEX`^#>;k-HJ&X_G;Qg-7=ldiQ!s0mITwGPcwuHWo_&@bcPyxXB^>xp zYtiMiC9%(BEm-m{4t3P71_f#ozkj_6m{2Sp(H?SB%=1 z@5`0OS|j5QfyF`jjdIZ0eV+ilPpIaz@!~fc&*prFDk7M1U0c^t_B^k=o^2})_;b}B zq0dYE3kE&tf-r~YIb3*hcj45YZ}H3NUStUBE}rR>H!Hu`ik6MRFc7U=W)3&t#95kk zrS#jq!vlx8KGBf-P+WKK5p_{4GnS2@NI(ZcA|8pwjJ;+DDqep+dhSx#;m4}YN?j3- z{l*#R<;1WT@nkJ|xPq+ufvgMF*6n`&lIp-1IkiP|KTggUjj?^!JW!D(Gr|f5b`_y4fhUqnhYFbdMoT& zqG(O-5KWU(bseF>3767euGp~CofxJP7>=rLBOAkUVW#Bz}G$6B@+;8e8sv@{86ZMfsTCc$p^7qUz2w6M~PV z6`J~_;j>XR+EHo4%(zdv?Vg)jah&9|U1butg6^t*=8ahj!9+w8Of4Z#@b8+_Jqgqf zPl(vuY7khUPd>hJSFB1RVD8EWYT?}wQ@}@ZUk{>^1H)vFRDLS z!_NPEEB^?sc=PyhFurE7ogvYROeDn0#Q%YEmCcA8%Cuq1yabZ2|PyRN9vMMbf- z3^{8$`pVasKql5eWhBtPg9JBMHCKQZ3)AxW{S#^Hto(W}#}&Ooq{ZzyE;U^sW13f-4HbpTIi+i8wx!xiuyJ{2-*jb6qxs*cM6s=+PV^ z+z~?z!pVERe{Ea#t^0POS&A*K)nf3k`bcNMOJVP93%OUm)jC$IZP-h*TCYHRN(-#Q zq-+j5tRL|Z^U|s=>n&S^n-%KlKKE5j)|x@MeU+iFkby)9#4v}^-9oRFZzKd=zpn9( z*SDuzMx%>Xw5Q&G80`~-{F;Bc!>N?PKH8`*z)ObEv1`%o+RW}@o{1CeO4RtZ>DS`V zZA*S_e))5Ag_0kP@Y_Tmn@}d2UxFpkz{cQyN_B268Wt)fy5%coNh5P4p_JAvu?OWz zWfHp2qc}8m=>oG#r0Xs{);VhPGx2PvY*s;{);Pt?ZG~Z^3(V_oJ829=uQK%{;$f|r z1+)rob`nc6h5xv9MajB&nkH|Zu!`e)7bZi8>0#6Pty)8}jyOD~Z5x5p61VT6)s zxG&TA!>pP>pZzQ*$^Hbk@(fE&Q-BP+9q>SXVp${7W}-JkgwmK|Pcma=zgG=qeOI^b z8m#d}1`}?)Ne?z&wMKPILff0PmOJ!n8hqE9&KAv7-aNZjjAUC_aVD~F8oO;Th>yfv zB;p&fI}REb=Rm=5>+cmHl=j zo;)Ai<+(B(=!{uS=+!8;WA3(d{LJ5~0(IS4AG#6Qd`JjR8@(;8hhQc);gkkPV?U4x z40k>Y>e_Rs-3c3E4uY--ZVFsI$TJA4KDxCx8U$O|QPM~;POLA`sHVuz#@vv^ak#J4 z$}}JmSST9Im4w(^*^)Fy6+HE>>F?TTXb6e1;VQdN@e&l#m5~|%+NSzQZ$ox@df4b; zT$cp8rDjjOv4&kYYla(tdogE9)D3*y z$6E=KG%gZc#Ulolvwo!cdkIif1}WcUGV%J8rMFx$)^Z^(!#m(+P1uTuU{ zjKpy%xG2g6bzkT6SFjSWKAC7rpg0gIInYTmnQbk%HvCy)|32oGKKCe7bJX)YJqki{ zk5LPhq;WPCO zLppZ@ty_0e#uJ(IM*_^cFKiCnc?XYMW3)zzy{yJV2qNg(lg%c{FY?)U9&ot$Zusbo zk8o5$3{}Y|6tO3zi!Klhw=a9Yaa=-FS@vm+o07WFDD|ck@jRr$r{mqBoM6lAmv4Hj=?PO3f6b6|~m(mQ6Z1CdsRMjCIw%961~Qs0Z|| zsXXr4Z7jwA%mx+U0aysq@yD+x?2g^NIZK->I#!3|avxqV z9+*OXo#2Viw91sNSfcfct;eO4j0oqz&-7jMyX&e~BSROvRWALW+@Tt+uJ6Q)be}Wv zKdOpx^x7^~d-LSLdZO`?P@aqZc9>xSIkNl++`DMWX_C$p;=!OTjQz%Jt}6WDNjdnl zXXOdXh{}z$4IfcaUDs#k z+D7I*`{V2P7AM2gNopjrBrQz-^((ODZ5qa`OBvmQEw9r*wulP<=N3`G6cP&;FG*_dczT;CelJT6?Xv=;x z$E4TdunBzj?F0w&WIyHBPBa7RLdGIQb~Zjo-{||TY;m)%H(a$4%vRUaSiwZTeNNf3 z-nZjf4}CbVg=4T0wEFHPaXD%!IW=QuvPbm!$Kx3=;-D0hnz4bW?UD3{L!FBT~cFaW2w{c$jMWS>T%B-y$}tDq$e5- z*LX4nbJRLM;5!%`siu6}d-=E_%(ibzbnQYGNq=14#1L6ruVxK=EDQB|H6!`OJL#kW zTxNH~lqf7?Fo09({{49)PcW}??5YgJw|IEgXsEt&?PAsRgQQNBICUtwc)L7>czb^w zj&C|ohSH^Lu3d1;(k<}LsI`swmS19e#jNMcO`$$-?Ujahfw!r4I8mJl8#2{c6^oqq zO=d~3WwZe}xsjm{YXnq#N^zeJOO_@yI!8ssdse1x{`1E%xTZj2!WEPtspYw-?Y|I32GK&aap#3R-`^ zU?vJjyh)fPfHCABr;jTafx&&R8;uY!<@DOi?^@faBLT~# z3Su*B_Uf<}^CXb(s>b6dkw%lWcMn93%-y+B1(|5?RRTshvTKc6XTlSIEFA&v$Lt-T zoMJOJ>m1|LsbcjGryuX7mhDS>q4bX~4NSI=SI_2!PYKXXRFH{4QD}4QX(S3bI;_yZ z{`#kqTWhO=?EL-ibi6!n4QWEdG?80f)aPX_XT78@%}}<0df* zdZZS~1DmAVe%^VL*AOq;yabCfF5j4`Y&FH$+pl<`nnA0|Jd0n>X;*D}COz>5#_G)d zP+~^Q-nRu;_TYIRd511bX%H0-7F;dPbzD=6g0m*ynrP3Rx4A1-f}#W=5*A|e=C+$J z7Ch4sP`TsvGDrb!`?jm9Pf7cseMZ{IImhBLE4t~fUEaumauzyv+yt_H+W4r;o8{Qq zFiflvgS33uY^`gQ*J#jXcIr!wxV~G-hK&ag&7$Db@o(W^B6>tWpCQGJ13E)E-TA*3o$zt>vCVZ4PRcQG}Fid1ciF*}wz;Eh!NbS_SbJj-dv8+v?fajb8_tMlNpf)K6 z)0GBazNjo{K`bTB2I6TSQzUDT6MZ5P!dk(;yOX|*4t!gj96u_Dw zI(eG}E|>(jWU8PH$h|TIrB3S;esQwGk;4*}&a7iqW)TjvjR|~@OCzK$lzaR8ETg;n z8tc2*KpbU*qQQmHY78*<9@9U=C(bUETp$$RY_)ZxI!pvr<-=J>W}8^Ao!=_cKB>^- zntiTxR91azcl^+)xjBAzbydA_AkvX&bkoPQ`p1?AmC+>Qce`d|!9dID92tH}vTRD% z;D)P13_C>&Ra-sQHI-i)<0&gy5x}gi6uS=}gQ%{rp%aRbcLI+Ny9mBr+=ork(zs4k zfsAk4#E-T+&!o5nGcMLwj8BkOjMgeGj$fRUR!l*}Uttb2^2ttd$ycCTFB)9&bO2q>47& z|AH2+>iX;&4Q`?ZsC~*Oq|_{>w+SXZM8ySypfi)}twnDas!jRm&kOht-_UAkQfgoF z(8YFMFcJOsEu^c5uu(|MKJsvtt>$1kuq$`H?!|zn#ro-POA`$@c6)i?1_6)COpA6bW zoO3ZtRM>ZsB0Slw)`r$d*UFBq8_DLpyrT^=T*0dKdXY-z18Zv+VAa!uaBtJ`_HaJ2 z(+{Si5v+;#40RBa+ro~kK~4h$?s?R}I?qxu^e{lrw8)(y|E=R&(D=_;MbcSj~vwV@-dZuD9HE*xuIOho@M)GwN5e5?CV^MdyFQEk~Kf0FoNiUVh zE~4ML*3;I_#)7NOMmWu7x6zIdS#!R`qJT6=dqo|_v&M3FvoOu|At*;werjxac1*`n zI0Mlu=Zd~{L@qw-28~b~79l{gELQdFNLg@yGQu79^4+(FIIkNtyH}fKbOYpbjk2kL z>4AX&>&sCU3(4GLg8mf@-b%t=&4-^B4hKIKWZ7zLXgN4z}7+! zf1LBt7#BNe!3tbT`ydHDbv%7)Ny}3M;%s-fj%}2aWSYOPA0r+h>a)o9jD#Q_>saRG zgZ*X;C;HLBp!LFXaI5S!*s5>xi&v<`Y4G5Emaeu!x6V?Ry`UleRjZ1c2-KReOpt?^ zRg|#6_MBqHXalFnV#rkcCZ`1f4`ec=4LWG0NP56bTAB_O-jt~8m<{VoCh;YubK_Yk zVi=sW6OSl2INsj)Fx=iVq_gNUN0H_;XJr+a8!ItD>h!8r<ct%tkc>b6}h zdsfWR%#9%5NUv!|!v<7L!56>5hRT&0QRb{kS?#X=;W`D~&589!MV)7ieSO2kLyPe` zpKUa_#+hAINKD6O$aN0i=&sYyxpDJ~7qR+`%hWXM1rb6kH%ha$4Xf0O-rS#Ik=uqQ z1oMYc7xzp;X!7(2onIDGPum@qTjn)Z9JTH78h&cHEEwO8#)EG)SI*QJp~yu;$}$eu zN@RxWxRMTMqe+Epb!%o$j0elIUB(x%fn%VU`$)g_1n(8v-mxl}u9-X*nP?n2V(E@?M}kn5KPU7gW8KeH#$eX z%Gx=t$TYC3GWD~VhPhLt^LGs?N6xuMaK`#b>hGgLi!~V6LvG$C%vzcBOIOT#=IRjr z0LAP*$BeA5AL3xn1cQi5_{Guf0)8ymM^kpPt7LxCxFCy5uw|uACJtz*>P~#aK!ZWkMElKX1It5< zN{U&>IYG~eZ;49^wYS5Ub@#8utfR`~OPI<^!EeJr6dJoU5_zj{#Zy&JGa1zcvB|T~ zltd{EP_PT6J>wA**fwN;HogE z13ntI^~Iuc{c_6+0|3IPEpn+ve;$82$VR8O4s)N@Lo9OQ*R% z*NENg>}X%hmP@$w{-ioX>6^4%?{Lw^RLxfzG@jeAwCPX;B{ROvk2Okz9pjebJX(p% zU1|P0vP#yo^x9pq*we1PV=8rS8wus%ayvZHZ`k8L1^zijQz7zmNS~xm_A(jU_Gs60 ziPJ2n@Q5J{5+N=VB>B8Gnuk6w3wdIbM0=SVFY#lnGM+W0qjU5x4v4Kz6)(#U%#sc} zk)~6pVP??nA2pj6a$&TZj|_8DFFZM(yA-JhDNm!5dworE#VW^n83xJB2W1FgF9={l ztP|2`sbzL*zj+*kYp{9U9!f%~D}Eo|GVN{@`X!!vlhI=Xf4B-XQUNW6{3t9e*svw5 zwH^yrJcUP6xR9cJnii`J_;7ron13zAD8NCYR8I%_f}6muugU#u@YQUys3F(+H?nTj zFY*Aj&D;uWzs$iV; zQQ%h6t4OKie00|Bv@D6fF+nxUle^`@g*oVuH=lG_DS%-tabK6o)^^9G#T^L%(uT`rU8I1s@*~ zd)$l5h}TV)YP|;Yl*wMA`6jSC4B}u14*c%ym{&ee)!s+-UxuQlmFtbdcny98dLzsn zY~l|$C7zzK5Ul1OOQh79W{Ev#d{dla@$U zLs6g7W)s)Y8DBeKFw;4ikLB2BYB8(>)DNz?DweuLD)yM`?MG&OwL>PVPng*bEO-#W zobK#=uS)VxUf*4j#o}qWHSiCEehs6a4W9Jd77jdZOwaw@{kb?|qFoZ8SaCH19-O|b zD)kWyC-630x|7<7<6Zq>Tz|RI%O&#l>4hkMU$~NvZ>j9gRB7&g?cR?;EkDC=jRlLD zj?U-*t7-IarIY~7#mhSr0gb3;4<8X*dP`w}s_f)#Y{Jj{e(fntLuxyN&E=6MzoHY5 zB6W*Jf!TN%52F0JQ8#o_6s#pu~cOo)ea~ z{uZUoTZA6()zims-hl5Ln=oMS-(_=|cdegn4{PCQzHZ8nO~_XJ_SZ-lYv1I;g83t3 zpa0X(|G?1&khGMu!*2jh+Gn#QJQA_p`eXmkrnp!zzRB!~+t?2@=64fL0J|<8n~T6* z`$E4VZcG^eNqm2HCk>2}n-pRLqeLFdH@b}RmYAgG<0C11e!?mN(BV1wje`iGhu-=1 zfqziupLY`CDFXp#)AQ|N-rXOM9VS%dc)HuNnRvLXjF3{5z2cWe_VDL>^hs2p(I@PiTBB@Rdr((yyh25Lcbd-(6GKcR)x`xKEFy8 z{x`G(koW*Z;(_=0G73?4ThrHO$+Eo;a8xoNP}HFoR6~_sSZVlnTkc=>|N4#I z0U8gP$(QmNP)g2QcWgu8ahL{;QiGo)Cz`e~82J;Jt?Ln*YrJ7Qg`VVi?X{ zkJE2Tukk*S0XRBJGQy9S3DVDL2MYBQweA?e_w-k>c%qp^@w-{M6qW)Hnt0~WTWeTV*oFVrvC}VQi~p)DA9)`L zB)>LVqK|8(nFe{S%zqm&SN!&Y;rbvP#SAoVSqWy0PgVVNlcLHgezV}uOMrHvw#2<5CR!|s0d>zi0j0FnQqj(_&} zH_iU%{{OlEFHrg)`u~5)XSIn}9p8KS0Z#@BcdjQ_>hEm$mudoo18pgM))2A3pVWNZ z{6_f@D1&>1ly6GYSG3&Q%nsH zFAs-8EhprVCA-szauzID^;#JxmM5Mq@HtIiust{HfDPKzngA+0{Z>YeWk>QbP{_;eIAb~OUCPoRYq1??c*k#5^S{#C1>xFANp`B! zh?3LmNN4iSL4v`kZ$JroBYE`qNB&0TKvKO2<;eL@RQv~V1^@}-9x;F6liwxc*Y-{#`98`ul)X)JMHLf1!rIrHx;jNIe7s)LN_c;J-^L0#Xc5Dk%Tk1F!hK z0!k!*$DHT?T`CHYlJ++j_;+Rg>YG+PAZIQr=l^#p1|So^$L_QKFMa^r;!gxXr!!vb rKf>+T?b5{m1K1yM`hNwO9|m_fkugx@?K`EC2rkE|$V` diff --git a/doc/source/contributing.rst b/doc/source/contributing.rst index aac1e4eade932..08e28582e7469 100644 --- a/doc/source/contributing.rst +++ b/doc/source/contributing.rst @@ -614,23 +614,34 @@ the expected correct result:: assert_frame_equal(pivoted, expected) -How to use ``parametrize`` -~~~~~~~~~~~~~~~~~~~~~~~~~~ +Transitioning to ``pytest`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~ -`pytest `__ has a nice feature `parametrize `__ to allow -testing of many cases in a concise way that enables an easy-to-read syntax. +*pandas* existing test structure is *mostly* classed based, meaning that you will typically find tests wrapped in a class, inheriting from ``tm.TestCase``. -.. note:: +.. code-block:: python + + class TestReallyCoolFeature(tm.TestCase): + .... - *pandas* existing test structure is *mostly* classed based, meaning that you will typically find tests wrapped in a class, inheriting from ``tm.TestCase``. +Going forward, we are moving to a more *functional* style using the `pytest `__ framework, which offers a richer testing +framework that will facilitate testing and developing. Thus, instead of writing test classes, we will write test functions like this: - .. code-block:: python +.. code-block:: python + + def test_really_cool_feature(): + .... - class TestReallyCoolFeature(tm.TestCase): - .... +Sometimes, it does make sense to bundle test functions together into a single class, either because the test file is testing multiple functions from a single module, and +using test classes allows for better organization. However, instead of inheriting from ``tm.TestCase``, we should just inherit from ``object``: + +.. code-block:: python - Going forward we are moving to a more *functional* style, please see below. + class TestReallyCoolFeature(object): + .... +Using ``pytest`` +~~~~~~~~~~~~~~~~ Here is an example of a self-contained set of tests that illustrate multiple features that we like to use. @@ -641,7 +652,7 @@ Here is an example of a self-contained set of tests that illustrate multiple fea - ``tm.assert_series_equal`` (and its counter part ``tm.assert_frame_equal``), for pandas object comparisons. - the typical pattern of constructing an ``expected`` and comparing versus the ``result`` -We would name this file ``test_cool_feature.py`` and put in an appropriate place in the ``pandas/tests/`` sturcture. +We would name this file ``test_cool_feature.py`` and put in an appropriate place in the ``pandas/tests/`` structure. .. code-block:: python From ebc0c0986fc01dfc937d493786d6fdbcd1c7eddd Mon Sep 17 00:00:00 2001 From: gfyoung Date: Fri, 14 Apr 2017 09:06:45 -0400 Subject: [PATCH 385/933] DEPR: Deprecate generic timestamp dtypes (#15987) * DEPR: Deprecate generic timestamp dtypes We only use the nanosecond frequency, and numpy doesn't even handle generic timestamp dtypes well. xref gh-15524 (comment). * TST: Use pytest idioms in series/test_dtypes.py --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/tests/series/test_constructors.py | 27 +++ pandas/tests/series/test_dtypes.py | 238 ++++++++++++++--------- pandas/types/cast.py | 25 ++- 4 files changed, 195 insertions(+), 96 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index a105a6801fb61..cb3e20e50380b 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -1204,6 +1204,7 @@ Deprecations - ``SparseArray.to_dense()`` has deprecated the ``fill`` parameter, as that parameter was not being respected (:issue:`14647`) - ``SparseSeries.to_dense()`` has deprecated the ``sparse_only`` parameter (:issue:`14647`) - ``Series.repeat()`` has deprecated the ``reps`` parameter in favor of ``repeats`` (:issue:`12662`) +- The ``Series`` constructor and ``.astype`` method have deprecated accepting timestamp dtypes without a frequency (e.g. ``np.datetime64``) for the ``dtype`` parameter (:issue:`15524`) - ``Index.repeat()`` and ``MultiIndex.repeat()`` have deprecated the ``n`` parameter in favor of ``repeats`` (:issue:`12662`) - ``Categorical.searchsorted()`` and ``Series.searchsorted()`` have deprecated the ``v`` parameter in favor of ``value`` (:issue:`12662`) - ``TimedeltaIndex.searchsorted()``, ``DatetimeIndex.searchsorted()``, and ``PeriodIndex.searchsorted()`` have deprecated the ``key`` parameter in favor of ``value`` (:issue:`12662`) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index dbe2db67359f3..8ad07afcacfcc 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -839,3 +839,30 @@ def test_constructor_cast_object(self): s = Series(date_range('1/1/2000', periods=10), dtype=object) exp = Series(date_range('1/1/2000', periods=10)) tm.assert_series_equal(s, exp) + + def test_constructor_generic_timestamp_deprecated(self): + # see gh-15524 + + with tm.assert_produces_warning(FutureWarning): + dtype = np.timedelta64 + s = Series([], dtype=dtype) + + assert s.empty + assert s.dtype == 'm8[ns]' + + with tm.assert_produces_warning(FutureWarning): + dtype = np.datetime64 + s = Series([], dtype=dtype) + + assert s.empty + assert s.dtype == 'M8[ns]' + + # These timestamps have the wrong frequencies, + # so an Exception should be raised now. + msg = "cannot convert timedeltalike" + with tm.assertRaisesRegexp(TypeError, msg): + Series([], dtype='m8[ps]') + + msg = "cannot convert datetimelike" + with tm.assertRaisesRegexp(TypeError, msg): + Series([], dtype='M8[ps]') diff --git a/pandas/tests/series/test_dtypes.py b/pandas/tests/series/test_dtypes.py index a2aaff25516ae..6bbf00d6cab22 100644 --- a/pandas/tests/series/test_dtypes.py +++ b/pandas/tests/series/test_dtypes.py @@ -1,9 +1,13 @@ # coding=utf-8 # pylint: disable-msg=E1101,W0612 -import sys +import pytest + from datetime import datetime + +import sys import string +import warnings from numpy import nan import numpy as np @@ -12,152 +16,199 @@ from pandas.compat import lrange, range, u from pandas import compat -from pandas.util.testing import assert_series_equal import pandas.util.testing as tm from .common import TestData -class TestSeriesDtypes(TestData, tm.TestCase): +class TestSeriesDtypes(TestData): - def test_astype(self): + @pytest.mark.parametrize("dtype", ["float32", "float64", + "int64", "int32"]) + def test_astype(self, dtype): s = Series(np.random.randn(5), name='foo') + as_typed = s.astype(dtype) - for dtype in ['float32', 'float64', 'int64', 'int32']: - astyped = s.astype(dtype) - self.assertEqual(astyped.dtype, dtype) - self.assertEqual(astyped.name, s.name) + assert as_typed.dtype == dtype + assert as_typed.name == s.name def test_dtype(self): - self.assertEqual(self.ts.dtype, np.dtype('float64')) - self.assertEqual(self.ts.dtypes, np.dtype('float64')) - self.assertEqual(self.ts.ftype, 'float64:dense') - self.assertEqual(self.ts.ftypes, 'float64:dense') - assert_series_equal(self.ts.get_dtype_counts(), Series(1, ['float64'])) - assert_series_equal(self.ts.get_ftype_counts(), Series( - 1, ['float64:dense'])) - - def test_astype_cast_nan_inf_int(self): - # GH14265, check nan and inf raise error when converting to int - types = [np.int32, np.int64] - values = [np.nan, np.inf] + assert self.ts.dtype == np.dtype('float64') + assert self.ts.dtypes == np.dtype('float64') + assert self.ts.ftype == 'float64:dense' + assert self.ts.ftypes == 'float64:dense' + tm.assert_series_equal(self.ts.get_dtype_counts(), + Series(1, ['float64'])) + tm.assert_series_equal(self.ts.get_ftype_counts(), + Series(1, ['float64:dense'])) + + @pytest.mark.parametrize("value", [np.nan, np.inf]) + @pytest.mark.parametrize("dtype", [np.int32, np.int64]) + def test_astype_cast_nan_inf_int(self, dtype, value): + # gh-14265: check NaN and inf raise error when converting to int msg = 'Cannot convert non-finite values \\(NA or inf\\) to integer' + s = Series([value]) - for this_type in types: - for this_val in values: - s = Series([this_val]) - with self.assertRaisesRegexp(ValueError, msg): - s.astype(this_type) + with tm.assertRaisesRegexp(ValueError, msg): + s.astype(dtype) - def test_astype_cast_object_int(self): + @pytest.mark.parametrize("dtype", [int, np.int8, np.int64]) + def test_astype_cast_object_int_fail(self, dtype): arr = Series(["car", "house", "tree", "1"]) + with pytest.raises(ValueError): + arr.astype(dtype) - self.assertRaises(ValueError, arr.astype, int) - self.assertRaises(ValueError, arr.astype, np.int64) - self.assertRaises(ValueError, arr.astype, np.int8) - + def test_astype_cast_object_int(self): arr = Series(['1', '2', '3', '4'], dtype=object) result = arr.astype(int) - self.assert_series_equal(result, Series(np.arange(1, 5))) + + tm.assert_series_equal(result, Series(np.arange(1, 5))) def test_astype_datetimes(self): import pandas._libs.tslib as tslib - s = Series(tslib.iNaT, dtype='M8[ns]', index=lrange(5)) + s = s.astype('O') - self.assertEqual(s.dtype, np.object_) + assert s.dtype == np.object_ s = Series([datetime(2001, 1, 2, 0, 0)]) + s = s.astype('O') - self.assertEqual(s.dtype, np.object_) + assert s.dtype == np.object_ s = Series([datetime(2001, 1, 2, 0, 0) for i in range(3)]) + s[1] = np.nan - self.assertEqual(s.dtype, 'M8[ns]') - s = s.astype('O') - self.assertEqual(s.dtype, np.object_) + assert s.dtype == 'M8[ns]' - def test_astype_str(self): - # GH4405 - digits = string.digits - s1 = Series([digits * 10, tm.rands(63), tm.rands(64), tm.rands(1000)]) - s2 = Series([digits * 10, tm.rands(63), tm.rands(64), nan, 1.0]) - types = (compat.text_type, np.str_) - for typ in types: - for s in (s1, s2): - res = s.astype(typ) - expec = s.map(compat.text_type) - assert_series_equal(res, expec) - - # GH9757 - # Test str and unicode on python 2.x and just str on python 3.x - for tt in set([str, compat.text_type]): - ts = Series([Timestamp('2010-01-04 00:00:00')]) - s = ts.astype(tt) - expected = Series([tt('2010-01-04')]) - assert_series_equal(s, expected) - - ts = Series([Timestamp('2010-01-04 00:00:00', tz='US/Eastern')]) - s = ts.astype(tt) - expected = Series([tt('2010-01-04 00:00:00-05:00')]) - assert_series_equal(s, expected) - - td = Series([Timedelta(1, unit='d')]) - s = td.astype(tt) - expected = Series([tt('1 days 00:00:00.000000000')]) - assert_series_equal(s, expected) + s = s.astype('O') + assert s.dtype == np.object_ + + @pytest.mark.parametrize("dtype", [compat.text_type, np.str_]) + @pytest.mark.parametrize("series", [Series([string.digits * 10, + tm.rands(63), + tm.rands(64), + tm.rands(1000)]), + Series([string.digits * 10, + tm.rands(63), + tm.rands(64), nan, 1.0])]) + def test_astype_str_map(self, dtype, series): + # see gh-4405 + result = series.astype(dtype) + expected = series.map(compat.text_type) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("dtype", [str, compat.text_type]) + def test_astype_str_cast(self, dtype): + # see gh-9757: test str and unicode on python 2.x + # and just str on python 3.x + ts = Series([Timestamp('2010-01-04 00:00:00')]) + s = ts.astype(dtype) + + expected = Series([dtype('2010-01-04')]) + tm.assert_series_equal(s, expected) + + ts = Series([Timestamp('2010-01-04 00:00:00', tz='US/Eastern')]) + s = ts.astype(dtype) + + expected = Series([dtype('2010-01-04 00:00:00-05:00')]) + tm.assert_series_equal(s, expected) + + td = Series([Timedelta(1, unit='d')]) + s = td.astype(dtype) + + expected = Series([dtype('1 days 00:00:00.000000000')]) + tm.assert_series_equal(s, expected) def test_astype_unicode(self): - - # GH7758 - # a bit of magic is required to set default encoding encoding to utf-8 + # see gh-7758: A bit of magic is required to set + # default encoding to utf-8 digits = string.digits test_series = [ Series([digits * 10, tm.rands(63), tm.rands(64), tm.rands(1000)]), Series([u('データーサイエンス、お前はもう死んでいる')]), - ] former_encoding = None + if not compat.PY3: - # in python we can force the default encoding for this test + # In Python, we can force the default encoding for this test former_encoding = sys.getdefaultencoding() reload(sys) # noqa + sys.setdefaultencoding("utf-8") if sys.getdefaultencoding() == "utf-8": test_series.append(Series([u('野菜食べないとやばい') .encode("utf-8")])) + for s in test_series: res = s.astype("unicode") expec = s.map(compat.text_type) - assert_series_equal(res, expec) - # restore the former encoding + tm.assert_series_equal(res, expec) + + # Restore the former encoding if former_encoding is not None and former_encoding != "utf-8": reload(sys) # noqa sys.setdefaultencoding(former_encoding) def test_astype_dict(self): - # GH7271 + # see gh-7271 s = Series(range(0, 10, 2), name='abc') result = s.astype({'abc': str}) expected = Series(['0', '2', '4', '6', '8'], name='abc') - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) result = s.astype({'abc': 'float64'}) expected = Series([0.0, 2.0, 4.0, 6.0, 8.0], dtype='float64', name='abc') - assert_series_equal(result, expected) - - self.assertRaises(KeyError, s.astype, {'abc': str, 'def': str}) - self.assertRaises(KeyError, s.astype, {0: str}) - - def test_complexx(self): - # GH4819 - # complex access for ndarray compat + tm.assert_series_equal(result, expected) + + with pytest.raises(KeyError): + s.astype({'abc': str, 'def': str}) + + with pytest.raises(KeyError): + s.astype({0: str}) + + def test_astype_generic_timestamp_deprecated(self): + # see gh-15524 + data = [1] + + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + s = Series(data) + dtype = np.datetime64 + result = s.astype(dtype) + expected = Series(data, dtype=dtype) + tm.assert_series_equal(result, expected) + + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + s = Series(data) + dtype = np.timedelta64 + result = s.astype(dtype) + expected = Series(data, dtype=dtype) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("dtype", np.typecodes['All']) + def test_astype_empty_constructor_equality(self, dtype): + # see gh-15524 + + if dtype not in ('S', 'V'): # poor support (if any) currently + with warnings.catch_warnings(record=True): + # Generic timestamp dtypes ('M' and 'm') are deprecated, + # but we test that already in series/test_constructors.py + + init_empty = Series([], dtype=dtype) + as_type_empty = Series([]).astype(dtype) + tm.assert_series_equal(init_empty, as_type_empty) + + def test_complex(self): + # see gh-4819: complex access for ndarray compat a = np.arange(5, dtype=np.float64) b = Series(a + 4j * a) + tm.assert_numpy_array_equal(a, b.real) tm.assert_numpy_array_equal(4 * a, b.imag) @@ -166,23 +217,22 @@ def test_complexx(self): tm.assert_numpy_array_equal(4 * a, b.imag) def test_arg_for_errors_in_astype(self): - # issue #14878 - - sr = Series([1, 2, 3]) + # see gh-14878 + s = Series([1, 2, 3]) - with self.assertRaises(ValueError): - sr.astype(np.float64, errors=False) + with pytest.raises(ValueError): + s.astype(np.float64, errors=False) with tm.assert_produces_warning(FutureWarning): - sr.astype(np.int8, raise_on_error=True) + s.astype(np.int8, raise_on_error=True) - sr.astype(np.int8, errors='raise') + s.astype(np.int8, errors='raise') def test_intercept_astype_object(self): series = Series(date_range('1/1/2000', periods=10)) - # this test no longer makes sense as series is by default already - # M8[ns] + # This test no longer makes sense, as + # Series is by default already M8[ns]. expected = series.astype('object') df = DataFrame({'a': series, @@ -192,9 +242,9 @@ def test_intercept_astype_object(self): tm.assert_series_equal(df.dtypes, exp_dtypes) result = df.values.squeeze() - self.assertTrue((result[:, 0] == expected.values).all()) + assert (result[:, 0] == expected.values).all() df = DataFrame({'a': series, 'b': ['foo'] * len(series)}) result = df.values.squeeze() - self.assertTrue((result[:, 0] == expected.values).all()) + assert (result[:, 0] == expected.values).all() diff --git a/pandas/types/cast.py b/pandas/types/cast.py index 85053dba0c18b..3954fb5c93da8 100644 --- a/pandas/types/cast.py +++ b/pandas/types/cast.py @@ -1,7 +1,10 @@ """ routings for casting """ from datetime import datetime, timedelta + import numpy as np +import warnings + from pandas._libs import tslib, lib from pandas._libs.tslib import iNaT from pandas.compat import string_types, text_type, PY3 @@ -620,6 +623,14 @@ def astype_nansafe(arr, dtype, copy=True): # work around NumPy brokenness, #1987 return lib.astype_intsafe(arr.ravel(), dtype).reshape(arr.shape) + if dtype.name in ("datetime64", "timedelta64"): + msg = ("Passing in '{dtype}' dtype with no frequency is " + "deprecated and will raise in a future version. " + "Please pass in '{dtype}[ns]' instead.") + warnings.warn(msg.format(dtype=dtype.name), + FutureWarning, stacklevel=5) + dtype = np.dtype(dtype.name + "[ns]") + if copy: return arr.astype(dtype) return arr.view(dtype) @@ -871,8 +882,15 @@ def maybe_cast_to_datetime(value, dtype, errors='raise'): if is_datetime64 or is_datetime64tz or is_timedelta64: # force the dtype if needed + msg = ("Passing in '{dtype}' dtype with no frequency is " + "deprecated and will raise in a future version. " + "Please pass in '{dtype}[ns]' instead.") + if is_datetime64 and not is_dtype_equal(dtype, _NS_DTYPE): - if dtype.name == 'datetime64[ns]': + if dtype.name in ('datetime64', 'datetime64[ns]'): + if dtype.name == 'datetime64': + warnings.warn(msg.format(dtype=dtype.name), + FutureWarning, stacklevel=5) dtype = _NS_DTYPE else: raise TypeError("cannot convert datetimelike to " @@ -886,7 +904,10 @@ def maybe_cast_to_datetime(value, dtype, errors='raise'): value = [value] elif is_timedelta64 and not is_dtype_equal(dtype, _TD_DTYPE): - if dtype.name == 'timedelta64[ns]': + if dtype.name in ('timedelta64', 'timedelta64[ns]'): + if dtype.name == 'timedelta64': + warnings.warn(msg.format(dtype=dtype.name), + FutureWarning, stacklevel=5) dtype = _TD_DTYPE else: raise TypeError("cannot convert timedeltalike to " From 3fde134617822773b23cf484310820298d9f88ac Mon Sep 17 00:00:00 2001 From: Sarma Tangirala Date: Fri, 14 Apr 2017 09:28:03 -0400 Subject: [PATCH 386/933] ENH: add option to sort class labels in parallel_coordinates (#15908) closes #15908 Author: Sarma Tangirala Closes #15935 from stangirala/master and squashes the following commits: 1467f9f [Sarma Tangirala] Add minor code change, what's new doc fix 3ede37a [Sarma Tangirala] Move feature test to new method, add to whatsnew 756e8d8 [Sarma Tangirala] ENH: Minor change to parallel_coordinates (#15908) --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/tests/plotting/test_misc.py | 20 ++++++++++++++++++++ pandas/tools/plotting.py | 11 ++++++++++- 3 files changed, 31 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index cb3e20e50380b..a18ddd9da8816 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -373,6 +373,7 @@ Other Enhancements - :func:`MultiIndex.remove_unused_levels` has been added to facilitate :ref:`removing unused levels `. (:issue:`15694`) - ``pd.read_csv()`` will now raise a ``ParserError`` error whenever any parsing error occurs (:issue:`15913`, :issue:`15925`) - ``pd.read_csv()`` now supports the ``error_bad_lines`` and ``warn_bad_lines`` arguments for the Python parser (:issue:`15925`) +- ``parallel_coordinates()`` has gained a ``sort_labels`` keyword arg that sorts class labels and the colours assigned to them (:issue:`15908`) .. _ISO 8601 duration: https://en.wikipedia.org/wiki/ISO_8601#Durations diff --git a/pandas/tests/plotting/test_misc.py b/pandas/tests/plotting/test_misc.py index 812f039f1a2c7..504c55bcfcfd0 100644 --- a/pandas/tests/plotting/test_misc.py +++ b/pandas/tests/plotting/test_misc.py @@ -241,6 +241,26 @@ def test_parallel_coordinates(self): with tm.assert_produces_warning(FutureWarning): parallel_coordinates(df, 'Name', colors=colors) + def test_parallel_coordinates_with_sorted_labels(self): + """ For #15908 """ + from pandas.tools.plotting import parallel_coordinates + + df = DataFrame({"feat": [i for i in range(30)], + "class": [2 for _ in range(10)] + + [3 for _ in range(10)] + + [1 for _ in range(10)]}) + ax = parallel_coordinates(df, 'class', sort_labels=True) + polylines, labels = ax.get_legend_handles_labels() + color_label_tuples = \ + zip([polyline.get_color() for polyline in polylines], labels) + ordered_color_label_tuples = sorted(color_label_tuples, + key=lambda x: x[1]) + prev_next_tupels = zip([i for i in ordered_color_label_tuples[0:-1]], + [i for i in ordered_color_label_tuples[1:]]) + for prev, nxt in prev_next_tupels: + # lables and colors are ordered strictly increasing + assert prev[1] < nxt[1] and prev[0] < nxt[0] + @slow def test_radviz(self): from pandas.tools.plotting import radviz diff --git a/pandas/tools/plotting.py b/pandas/tools/plotting.py index 99e56ca80cf97..141e3c74b91c4 100644 --- a/pandas/tools/plotting.py +++ b/pandas/tools/plotting.py @@ -705,7 +705,8 @@ def bootstrap_plot(series, fig=None, size=50, samples=500, **kwds): @deprecate_kwarg(old_arg_name='data', new_arg_name='frame', stacklevel=3) def parallel_coordinates(frame, class_column, cols=None, ax=None, color=None, use_columns=False, xticks=None, colormap=None, - axvlines=True, axvlines_kwds=None, **kwds): + axvlines=True, axvlines_kwds=None, sort_labels=False, + **kwds): """Parallel coordinates plotting. Parameters @@ -729,6 +730,11 @@ def parallel_coordinates(frame, class_column, cols=None, ax=None, color=None, If true, vertical lines will be added at each xtick axvlines_kwds: keywords, optional Options to be passed to axvline method for vertical lines + sort_labels: bool, False + Sort class_column labels, useful when assigning colours + + .. versionadded:: 0.20.0 + kwds: keywords Options to pass to matplotlib plotting method @@ -785,6 +791,9 @@ def parallel_coordinates(frame, class_column, cols=None, ax=None, color=None, colormap=colormap, color_type='random', color=color) + if sort_labels: + classes = sorted(classes) + color_values = sorted(color_values) colors = dict(zip(classes, color_values)) for i in range(n): From 9991579c812e5a7c977e69f03b390adf7974445f Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Fri, 14 Apr 2017 09:31:29 -0400 Subject: [PATCH 387/933] ENH: Intervalindex closes #7640 closes #8625 reprise of #8707 Author: Jeff Reback Author: Stephan Hoyer Closes #15309 from jreback/intervalindex and squashes the following commits: 11ab1e1 [Jeff Reback] merge conflicts 834df76 [Jeff Reback] more docs fbc1cf8 [Jeff Reback] doc example and bug 7577335 [Jeff Reback] fixup on merge of changes in algorithms.py 3a3e02e [Jeff Reback] sorting example 4333937 [Jeff Reback] api-types test fixing f0e3ad2 [Jeff Reback] pep b2d26eb [Jeff Reback] more docs e5f8082 [Jeff Reback] allow pd.cut to take an IntervalIndex for bins 4a5ebea [Jeff Reback] more tests & fixes for non-unique / overlaps rename _is_contained_in -> contains add sorting test 340c98b [Jeff Reback] CLN/COMPAT: IntervalIndex 74162aa [Stephan Hoyer] API/ENH: IntervalIndex --- asv_bench/benchmarks/indexing.py | 20 + doc/source/advanced.rst | 33 + doc/source/api.rst | 21 + doc/source/reshaping.rst | 10 +- doc/source/whatsnew/v0.20.0.txt | 58 ++ pandas/_libs/hashtable.pyx | 1 - pandas/_libs/interval.pyx | 215 +++++ pandas/_libs/intervaltree.pxi.in | 396 ++++++++ pandas/_libs/lib.pyx | 6 +- pandas/_libs/src/inference.pyx | 25 + pandas/_libs/tslib.pyx | 12 + pandas/core/algorithms.py | 46 +- pandas/core/api.py | 3 +- pandas/core/groupby.py | 30 +- pandas/core/indexing.py | 8 +- pandas/formats/format.py | 15 + pandas/indexes/api.py | 3 +- pandas/indexes/base.py | 73 +- pandas/indexes/category.py | 70 +- pandas/indexes/interval.py | 1062 ++++++++++++++++++++++ pandas/indexes/multi.py | 4 +- pandas/tests/api/test_api.py | 4 +- pandas/tests/api/test_types.py | 3 +- pandas/tests/frame/test_alter_axes.py | 66 +- pandas/tests/frame/test_sorting.py | 258 +++--- pandas/tests/groupby/test_categorical.py | 5 +- pandas/tests/groupby/test_groupby.py | 6 +- pandas/tests/indexes/common.py | 25 +- pandas/tests/indexes/test_base.py | 4 +- pandas/tests/indexes/test_category.py | 19 +- pandas/tests/indexes/test_interval.py | 798 ++++++++++++++++ pandas/tests/indexing/test_interval.py | 245 +++++ pandas/tests/scalar/test_interval.py | 129 +++ pandas/tests/series/test_constructors.py | 14 +- pandas/tests/series/test_missing.py | 11 +- pandas/tests/series/test_sorting.py | 19 +- pandas/tests/test_algos.py | 35 +- pandas/tests/test_base.py | 28 +- pandas/tests/test_categorical.py | 31 +- pandas/tests/tools/test_tile.py | 300 +++--- pandas/tests/types/test_dtypes.py | 118 ++- pandas/tests/types/test_missing.py | 8 + pandas/tools/tile.py | 240 ++--- pandas/tseries/base.py | 10 +- pandas/tseries/interval.py | 35 - pandas/tseries/period.py | 3 + pandas/types/api.py | 4 + pandas/types/common.py | 23 + pandas/types/dtypes.py | 109 +++ pandas/types/generic.py | 4 +- pandas/types/inference.py | 2 + pandas/types/missing.py | 5 +- pandas/util/testing.py | 22 +- setup.py | 5 + 54 files changed, 4195 insertions(+), 504 deletions(-) create mode 100644 pandas/_libs/interval.pyx create mode 100644 pandas/_libs/intervaltree.pxi.in create mode 100644 pandas/indexes/interval.py create mode 100644 pandas/tests/indexes/test_interval.py create mode 100644 pandas/tests/indexing/test_interval.py create mode 100644 pandas/tests/scalar/test_interval.py delete mode 100644 pandas/tseries/interval.py diff --git a/asv_bench/benchmarks/indexing.py b/asv_bench/benchmarks/indexing.py index d938cc6a6dc4d..a32c9f25a0f09 100644 --- a/asv_bench/benchmarks/indexing.py +++ b/asv_bench/benchmarks/indexing.py @@ -226,6 +226,26 @@ def time_is_monotonic(self): self.miint.is_monotonic +class IntervalIndexing(object): + goal_time = 0.2 + + def setup(self): + self.monotonic = Series(np.arange(1000000), + index=IntervalIndex.from_breaks(np.arange(1000001))) + + def time_getitem_scalar(self): + self.monotonic[80000] + + def time_loc_scalar(self): + self.monotonic.loc[80000] + + def time_getitem_list(self): + self.monotonic[80000:] + + def time_loc_list(self): + self.monotonic.loc[80000:] + + class PanelIndexing(object): goal_time = 0.2 diff --git a/doc/source/advanced.rst b/doc/source/advanced.rst index 43373fc86c4d1..ea00588ba156f 100644 --- a/doc/source/advanced.rst +++ b/doc/source/advanced.rst @@ -850,6 +850,39 @@ Of course if you need integer based selection, then use ``iloc`` dfir.iloc[0:5] +.. _indexing.intervallindex: + +IntervalIndex +~~~~~~~~~~~~~ + +.. versionadded:: 0.20.0 + +.. warning:: + + These indexing behaviors are provisional and may change in a future version of pandas. + +.. ipython:: python + + df = pd.DataFrame({'A': [1, 2, 3, 4]}, + index=pd.IntervalIndex.from_breaks([0, 1, 2, 3, 4])) + df + +Label based indexing via ``.loc`` along the edges of an interval works as you would expect, +selecting that particular interval. + +.. ipython:: python + + df.loc[2] + df.loc[[2, 3]] + +If you select a lable *contained* within an interval, this will also select the interval. + +.. ipython:: python + + df.loc[2.5] + df.loc[[2.5, 3.5]] + + Miscellaneous indexing FAQ -------------------------- diff --git a/doc/source/api.rst b/doc/source/api.rst index bf9d521e2a12a..6ba8c2b8ead67 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -1405,6 +1405,27 @@ Categorical Components CategoricalIndex.as_ordered CategoricalIndex.as_unordered +.. _api.intervalindex: + +IntervalIndex +------------- + +.. autosummary:: + :toctree: generated/ + + IntervalIndex + +IntervalIndex Components +~~~~~~~~~~~~~~~~~~~~~~~~ + +.. autosummary:: + :toctree: generated/ + + IntervalIndex.from_arrays + IntervalIndex.from_tuples + IntervalIndex.from_breaks + IntervalIndex.from_intervals + .. _api.multiindex: MultiIndex diff --git a/doc/source/reshaping.rst b/doc/source/reshaping.rst index 2c5aae133d4d9..b93749922c8ea 100644 --- a/doc/source/reshaping.rst +++ b/doc/source/reshaping.rst @@ -517,7 +517,15 @@ Alternatively we can specify custom bin-edges: .. ipython:: python - pd.cut(ages, bins=[0, 18, 35, 70]) + c = pd.cut(ages, bins=[0, 18, 35, 70]) + c + +.. versionadded:: 0.20.0 + +If the ``bins`` keyword is an ``IntervalIndex``, then these will be +used to bin the passed data. + + pd.cut([25, 20, 50], bins=c.categories) .. _reshaping.dummies: diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index a18ddd9da8816..04aed6c2c5466 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -13,6 +13,7 @@ Highlights include: - ``Panel`` has been deprecated, see :ref:`here ` - Improved user API when accessing levels in ``.groupby()``, see :ref:`here ` - Improved support for UInt64 dtypes, see :ref:`here ` +- Addition of an ``IntervalIndex`` and ``Interval`` scalar type, see :ref:`here ` - A new orient for JSON serialization, ``orient='table'``, that uses the Table Schema spec, see :ref:`here ` - Window Binary Corr/Cov operations return a MultiIndexed ``DataFrame`` rather than a ``Panel``, as ``Panel`` is now deprecated, see :ref:`here ` - Support for S3 handling now uses ``s3fs``, see :ref:`here ` @@ -314,6 +315,63 @@ To convert a ``SparseDataFrame`` back to sparse SciPy matrix in COO format, you sdf.to_coo() +.. _whatsnew_0200.enhancements.intervalindex: + +IntervalIndex +^^^^^^^^^^^^^ + +pandas has gained an ``IntervalIndex`` with its own dtype, ``interval`` as well as the ``Interval`` scalar type. These allow first-class support for interval +notation, specifically as a return type for the categories in ``pd.cut`` and ``pd.qcut``. The ``IntervalIndex`` allows some unique indexing, see the +:ref:`docs `. (:issue:`7640`, :issue:`8625`) + +Previous behavior: + +.. code-block:: ipython + + In [2]: pd.cut(range(3), 2) + Out[2]: + [(-0.002, 1], (-0.002, 1], (1, 2]] + Categories (2, object): [(-0.002, 1] < (1, 2]] + + # the returned categories are strings, representing Intervals + In [3]: pd.cut(range(3), 2).categories + Out[3]: Index(['(-0.002, 1]', '(1, 2]'], dtype='object') + +New behavior: + +.. ipython:: python + + c = pd.cut(range(4), bins=2) + c + c.categories + +Furthermore, this allows one to bin *other* data with these same bins. ``NaN`` represents a missing +value similar to other dtypes. + +.. ipython:: python + + pd.cut([0, 3, 1, 1], bins=c.categories) + +These can also used in ``Series`` and ``DataFrame``, and indexed. + +.. ipython:: python + + df = pd.DataFrame({'A': range(4), + 'B': pd.cut([0, 3, 1, 1], bins=c.categories)} + ).set_index('B') + +Selecting a specific interval + +.. ipython:: python + + df.loc[pd.Interval(1.5, 3.0)] + +Selecting via a scalar value that is contained in the intervals. + +.. ipython:: python + + df.loc[0] + .. _whatsnew_0200.enhancements.other: Other Enhancements diff --git a/pandas/_libs/hashtable.pyx b/pandas/_libs/hashtable.pyx index eee287b2c157b..c8aedcef77502 100644 --- a/pandas/_libs/hashtable.pyx +++ b/pandas/_libs/hashtable.pyx @@ -41,7 +41,6 @@ cdef extern from "Python.h": cdef size_t _INIT_VEC_CAP = 128 - include "hashtable_class_helper.pxi" include "hashtable_func_helper.pxi" diff --git a/pandas/_libs/interval.pyx b/pandas/_libs/interval.pyx new file mode 100644 index 0000000000000..60a34aff16e9d --- /dev/null +++ b/pandas/_libs/interval.pyx @@ -0,0 +1,215 @@ +cimport numpy as np +import numpy as np +import pandas as pd + +cimport util +cimport cython +import cython +from numpy cimport * +from tslib import Timestamp + +from cpython.object cimport (Py_EQ, Py_NE, Py_GT, Py_LT, Py_GE, Py_LE, + PyObject_RichCompare) + +import numbers +_VALID_CLOSED = frozenset(['left', 'right', 'both', 'neither']) + +cdef class IntervalMixin: + property closed_left: + def __get__(self): + return self.closed == 'left' or self.closed == 'both' + + property closed_right: + def __get__(self): + return self.closed == 'right' or self.closed == 'both' + + property open_left: + def __get__(self): + return not self.closed_left + + property open_right: + def __get__(self): + return not self.closed_right + + property mid: + def __get__(self): + try: + return 0.5 * (self.left + self.right) + except TypeError: + # datetime safe version + return self.left + 0.5 * (self.right - self.left) + + +cdef _interval_like(other): + return (hasattr(other, 'left') + and hasattr(other, 'right') + and hasattr(other, 'closed')) + + +cdef class Interval(IntervalMixin): + """ + Immutable object implementing an Interval, a bounded slice-like interval. + + .. versionadded:: 0.20.0 + + Properties + ---------- + left, right : values + Left and right bounds for each interval. + closed : {'left', 'right', 'both', 'neither'} + Whether the interval is closed on the left-side, right-side, both or + neither. Defaults to 'right'. + """ + + cdef readonly object left, right + cdef readonly str closed + + def __init__(self, left, right, str closed='right'): + # note: it is faster to just do these checks than to use a special + # constructor (__cinit__/__new__) to avoid them + if closed not in _VALID_CLOSED: + raise ValueError("invalid option for 'closed': %s" % closed) + if not left <= right: + raise ValueError('left side of interval must be <= right side') + self.left = left + self.right = right + self.closed = closed + + def __hash__(self): + return hash((self.left, self.right, self.closed)) + + def __contains__(self, key): + if _interval_like(key): + raise TypeError('__contains__ not defined for two intervals') + return ((self.left < key if self.open_left else self.left <= key) and + (key < self.right if self.open_right else key <= self.right)) + + def __richcmp__(self, other, int op): + if hasattr(other, 'ndim'): + # let numpy (or IntervalIndex) handle vectorization + return NotImplemented + + if _interval_like(other): + self_tuple = (self.left, self.right, self.closed) + other_tuple = (other.left, other.right, other.closed) + return PyObject_RichCompare(self_tuple, other_tuple, op) + + # nb. could just return NotImplemented now, but handling this + # explicitly allows us to opt into the Python 3 behavior, even on + # Python 2. + if op == Py_EQ or op == Py_NE: + return NotImplemented + else: + op_str = {Py_LT: '<', Py_LE: '<=', Py_GT: '>', Py_GE: '>='}[op] + raise TypeError( + 'unorderable types: %s() %s %s()' % + (type(self).__name__, op_str, type(other).__name__)) + + def __reduce__(self): + args = (self.left, self.right, self.closed) + return (type(self), args) + + def _repr_base(self): + left = self.left + right = self.right + + # TODO: need more general formatting methodology here + if isinstance(left, Timestamp) and isinstance(right, Timestamp): + left = left._short_repr + right = right._short_repr + + return left, right + + def __repr__(self): + + left, right = self._repr_base() + return ('%s(%r, %r, closed=%r)' % + (type(self).__name__, left, right, self.closed)) + + def __str__(self): + + left, right = self._repr_base() + start_symbol = '[' if self.closed_left else '(' + end_symbol = ']' if self.closed_right else ')' + return '%s%s, %s%s' % (start_symbol, left, right, end_symbol) + + def __add__(self, y): + if isinstance(y, numbers.Number): + return Interval(self.left + y, self.right + y) + elif isinstance(y, Interval) and isinstance(self, numbers.Number): + return Interval(y.left + self, y.right + self) + return NotImplemented + + def __sub__(self, y): + if isinstance(y, numbers.Number): + return Interval(self.left - y, self.right - y) + return NotImplemented + + def __mul__(self, y): + if isinstance(y, numbers.Number): + return Interval(self.left * y, self.right * y) + elif isinstance(y, Interval) and isinstance(self, numbers.Number): + return Interval(y.left * self, y.right * self) + return NotImplemented + + def __div__(self, y): + if isinstance(y, numbers.Number): + return Interval(self.left / y, self.right / y) + return NotImplemented + + def __truediv__(self, y): + if isinstance(y, numbers.Number): + return Interval(self.left / y, self.right / y) + return NotImplemented + + def __floordiv__(self, y): + if isinstance(y, numbers.Number): + return Interval(self.left // y, self.right // y) + return NotImplemented + + +@cython.wraparound(False) +@cython.boundscheck(False) +cpdef intervals_to_interval_bounds(ndarray intervals): + """ + Parameters + ---------- + intervals: ndarray object array of Intervals / nulls + + Returns + ------- + tuples (left: ndarray object array, + right: ndarray object array, + closed: str) + + """ + + cdef: + object closed = None, interval + int64_t n = len(intervals) + ndarray left, right + + left = np.empty(n, dtype=object) + right = np.empty(n, dtype=object) + + for i in range(len(intervals)): + interval = intervals[i] + if util._checknull(interval): + left[i] = np.nan + right[i] = np.nan + continue + + if not isinstance(interval, Interval): + raise TypeError("type {} with value {} is not an interval".format( + type(interval), interval)) + + left[i] = interval.left + right[i] = interval.right + if closed is None: + closed = interval.closed + elif closed != interval.closed: + raise ValueError('intervals must all be closed on the same side') + + return left, right, closed + +include "intervaltree.pxi" diff --git a/pandas/_libs/intervaltree.pxi.in b/pandas/_libs/intervaltree.pxi.in new file mode 100644 index 0000000000000..4fa0d6d156fa2 --- /dev/null +++ b/pandas/_libs/intervaltree.pxi.in @@ -0,0 +1,396 @@ +""" +Template for intervaltree + +WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in +""" + +from numpy cimport int64_t, float64_t +from numpy cimport ndarray, PyArray_ArgSort, NPY_QUICKSORT, PyArray_Take +import numpy as np + +cimport cython +cimport numpy as cnp +cnp.import_array() + +from hashtable cimport Int64Vector, Int64VectorData + + +ctypedef fused scalar_t: + float64_t + float32_t + int64_t + int32_t + + +#---------------------------------------------------------------------- +# IntervalTree +#---------------------------------------------------------------------- + +cdef class IntervalTree(IntervalMixin): + """A centered interval tree + + Based off the algorithm described on Wikipedia: + http://en.wikipedia.org/wiki/Interval_tree + + we are emulating the IndexEngine interface + """ + cdef: + readonly object left, right, root, dtype + readonly str closed + object _left_sorter, _right_sorter + + def __init__(self, left, right, closed='right', leaf_size=100): + """ + Parameters + ---------- + left, right : np.ndarray[ndim=1] + Left and right bounds for each interval. Assumed to contain no + NaNs. + closed : {'left', 'right', 'both', 'neither'}, optional + Whether the intervals are closed on the left-side, right-side, both + or neither. Defaults to 'right'. + leaf_size : int, optional + Parameter that controls when the tree switches from creating nodes + to brute-force search. Tune this parameter to optimize query + performance. + """ + if closed not in ['left', 'right', 'both', 'neither']: + raise ValueError("invalid option for 'closed': %s" % closed) + + left = np.asarray(left) + right = np.asarray(right) + self.dtype = np.result_type(left, right) + self.left = np.asarray(left, dtype=self.dtype) + self.right = np.asarray(right, dtype=self.dtype) + + indices = np.arange(len(left), dtype='int64') + + self.closed = closed + + node_cls = NODE_CLASSES[str(self.dtype), closed] + self.root = node_cls(self.left, self.right, indices, leaf_size) + + @property + def left_sorter(self): + """How to sort the left labels; this is used for binary search + """ + if self._left_sorter is None: + self._left_sorter = np.argsort(self.left) + return self._left_sorter + + @property + def right_sorter(self): + """How to sort the right labels + """ + if self._right_sorter is None: + self._right_sorter = np.argsort(self.right) + return self._right_sorter + + def get_loc(self, scalar_t key): + """Return all positions corresponding to intervals that overlap with + the given scalar key + """ + result = Int64Vector() + self.root.query(result, key) + if not result.data.n: + raise KeyError(key) + return result.to_array() + + def _get_partial_overlap(self, key_left, key_right, side): + """Return all positions corresponding to intervals with the given side + falling between the left and right bounds of an interval query + """ + if side == 'left': + values = self.left + sorter = self.left_sorter + else: + values = self.right + sorter = self.right_sorter + key = [key_left, key_right] + i, j = values.searchsorted(key, sorter=sorter) + return sorter[i:j] + + def get_loc_interval(self, key_left, key_right): + """Lookup the intervals enclosed in the given interval bounds + + The given interval is presumed to have closed bounds. + """ + import pandas as pd + left_overlap = self._get_partial_overlap(key_left, key_right, 'left') + right_overlap = self._get_partial_overlap(key_left, key_right, 'right') + enclosing = self.get_loc(0.5 * (key_left + key_right)) + combined = np.concatenate([left_overlap, right_overlap, enclosing]) + uniques = pd.unique(combined) + return uniques + + def get_indexer(self, scalar_t[:] target): + """Return the positions corresponding to unique intervals that overlap + with the given array of scalar targets. + """ + + # TODO: write get_indexer_intervals + cdef: + size_t old_len + Py_ssize_t i + Int64Vector result + + result = Int64Vector() + old_len = 0 + for i in range(len(target)): + self.root.query(result, target[i]) + if result.data.n == old_len: + result.append(-1) + elif result.data.n > old_len + 1: + raise KeyError( + 'indexer does not intersect a unique set of intervals') + old_len = result.data.n + return result.to_array() + + def get_indexer_non_unique(self, scalar_t[:] target): + """Return the positions corresponding to intervals that overlap with + the given array of scalar targets. Non-unique positions are repeated. + """ + cdef: + size_t old_len + Py_ssize_t i + Int64Vector result, missing + + result = Int64Vector() + missing = Int64Vector() + old_len = 0 + for i in range(len(target)): + self.root.query(result, target[i]) + if result.data.n == old_len: + result.append(-1) + missing.append(i) + old_len = result.data.n + return result.to_array(), missing.to_array() + + def __repr__(self): + return (''.format( + dtype=self.dtype, closed=self.closed, + n_elements=self.root.n_elements)) + + # compat with IndexEngine interface + def clear_mapping(self): + pass + + +cdef take(ndarray source, ndarray indices): + """Take the given positions from a 1D ndarray + """ + return PyArray_Take(source, indices, 0) + + +cdef sort_values_and_indices(all_values, all_indices, subset): + indices = take(all_indices, subset) + values = take(all_values, subset) + sorter = PyArray_ArgSort(values, 0, NPY_QUICKSORT) + sorted_values = take(values, sorter) + sorted_indices = take(indices, sorter) + return sorted_values, sorted_indices + +#---------------------------------------------------------------------- +# Nodes +#---------------------------------------------------------------------- + +# we need specialized nodes and leaves to optimize for different dtype and +# closed values + +{{py: + +nodes = [] +for dtype in ['float32', 'float64', 'int32', 'int64']: + for closed, cmp_left, cmp_right in [ + ('left', '<=', '<'), + ('right', '<', '<='), + ('both', '<=', '<='), + ('neither', '<', '<')]: + cmp_left_converse = '<' if cmp_left == '<=' else '<=' + cmp_right_converse = '<' if cmp_right == '<=' else '<=' + nodes.append((dtype, dtype.title(), + closed, closed.title(), + cmp_left, + cmp_right, + cmp_left_converse, + cmp_right_converse)) + +}} + +NODE_CLASSES = {} + +{{for dtype, dtype_title, closed, closed_title, cmp_left, cmp_right, + cmp_left_converse, cmp_right_converse in nodes}} + +cdef class {{dtype_title}}Closed{{closed_title}}IntervalNode: + """Non-terminal node for an IntervalTree + + Categorizes intervals by those that fall to the left, those that fall to + the right, and those that overlap with the pivot. + """ + cdef: + {{dtype_title}}Closed{{closed_title}}IntervalNode left_node, right_node + {{dtype}}_t[:] center_left_values, center_right_values, left, right + int64_t[:] center_left_indices, center_right_indices, indices + {{dtype}}_t min_left, max_right + readonly {{dtype}}_t pivot + readonly int64_t n_elements, n_center, leaf_size + readonly bint is_leaf_node + + def __init__(self, + ndarray[{{dtype}}_t, ndim=1] left, + ndarray[{{dtype}}_t, ndim=1] right, + ndarray[int64_t, ndim=1] indices, + int64_t leaf_size): + + self.n_elements = len(left) + self.leaf_size = leaf_size + + # min_left and min_right are used to speed-up query by skipping + # query on sub-nodes. If this node has size 0, query is cheap, + # so these values don't matter. + if left.size > 0: + self.min_left = left.min() + self.max_right = right.max() + else: + self.min_left = 0 + self.max_right = 0 + + if self.n_elements <= leaf_size: + # make this a terminal (leaf) node + self.is_leaf_node = True + self.left = left + self.right = right + self.indices = indices + self.n_center = 0 + else: + # calculate a pivot so we can create child nodes + self.is_leaf_node = False + self.pivot = np.median(left + right) / 2 + left_set, right_set, center_set = self.classify_intervals( + left, right) + + self.left_node = self.new_child_node(left, right, + indices, left_set) + self.right_node = self.new_child_node(left, right, + indices, right_set) + + self.center_left_values, self.center_left_indices = \ + sort_values_and_indices(left, indices, center_set) + self.center_right_values, self.center_right_indices = \ + sort_values_and_indices(right, indices, center_set) + self.n_center = len(self.center_left_indices) + + @cython.wraparound(False) + @cython.boundscheck(False) + cdef classify_intervals(self, {{dtype}}_t[:] left, {{dtype}}_t[:] right): + """Classify the given intervals based upon whether they fall to the + left, right, or overlap with this node's pivot. + """ + cdef: + Int64Vector left_ind, right_ind, overlapping_ind + Py_ssize_t i + + left_ind = Int64Vector() + right_ind = Int64Vector() + overlapping_ind = Int64Vector() + + for i in range(self.n_elements): + if right[i] {{cmp_right_converse}} self.pivot: + left_ind.append(i) + elif self.pivot {{cmp_left_converse}} left[i]: + right_ind.append(i) + else: + overlapping_ind.append(i) + + return (left_ind.to_array(), + right_ind.to_array(), + overlapping_ind.to_array()) + + cdef new_child_node(self, + ndarray[{{dtype}}_t, ndim=1] left, + ndarray[{{dtype}}_t, ndim=1] right, + ndarray[int64_t, ndim=1] indices, + ndarray[int64_t, ndim=1] subset): + """Create a new child node. + """ + left = take(left, subset) + right = take(right, subset) + indices = take(indices, subset) + return {{dtype_title}}Closed{{closed_title}}IntervalNode( + left, right, indices, self.leaf_size) + + @cython.wraparound(False) + @cython.boundscheck(False) + @cython.initializedcheck(False) + cpdef query(self, Int64Vector result, scalar_t point): + """Recursively query this node and its sub-nodes for intervals that + overlap with the query point. + """ + cdef: + int64_t[:] indices + {{dtype}}_t[:] values + Py_ssize_t i + + if self.is_leaf_node: + # Once we get down to a certain size, it doesn't make sense to + # continue the binary tree structure. Instead, we use linear + # search. + for i in range(self.n_elements): + if self.left[i] {{cmp_left}} point {{cmp_right}} self.right[i]: + result.append(self.indices[i]) + else: + # There are child nodes. Based on comparing our query to the pivot, + # look at the center values, then go to the relevant child. + if point < self.pivot: + values = self.center_left_values + indices = self.center_left_indices + for i in range(self.n_center): + if not values[i] {{cmp_left}} point: + break + result.append(indices[i]) + if point {{cmp_right}} self.left_node.max_right: + self.left_node.query(result, point) + elif point > self.pivot: + values = self.center_right_values + indices = self.center_right_indices + for i in range(self.n_center - 1, -1, -1): + if not point {{cmp_right}} values[i]: + break + result.append(indices[i]) + if self.right_node.min_left {{cmp_left}} point: + self.right_node.query(result, point) + else: + result.extend(self.center_left_indices) + + def __repr__(self): + if self.is_leaf_node: + return ('<{{dtype_title}}Closed{{closed_title}}IntervalNode: ' + '%s elements (terminal)>' % self.n_elements) + else: + n_left = self.left_node.n_elements + n_right = self.right_node.n_elements + n_center = self.n_elements - n_left - n_right + return ('<{{dtype_title}}Closed{{closed_title}}IntervalNode: ' + 'pivot %s, %s elements (%s left, %s right, %s ' + 'overlapping)>' % (self.pivot, self.n_elements, + n_left, n_right, n_center)) + + def counts(self): + """ + Inspect counts on this node + useful for debugging purposes + """ + if self.is_leaf_node: + return self.n_elements + else: + m = len(self.center_left_values) + l = self.left_node.counts() + r = self.right_node.counts() + return (m, (l, r)) + +NODE_CLASSES['{{dtype}}', + '{{closed}}'] = {{dtype_title}}Closed{{closed_title}}IntervalNode + +{{endfor}} diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index f902422b0916d..31402c38c770d 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -61,6 +61,8 @@ from tslib cimport (convert_to_tsobject, convert_to_timedelta64, _check_all_nulls) import tslib from tslib import NaT, Timestamp, Timedelta +import interval +from interval import Interval cdef int64_t NPY_NAT = util.get_nat() @@ -245,6 +247,7 @@ cpdef bint isscalar(object val): - instances of datetime.timedelta - Period - instances of decimal.Decimal + - Interval """ @@ -258,7 +261,8 @@ cpdef bint isscalar(object val): or PyDelta_Check(val) or PyTime_Check(val) or util.is_period_object(val) - or is_decimal(val)) + or is_decimal(val) + or is_interval(val)) def item_from_zerodim(object val): diff --git a/pandas/_libs/src/inference.pyx b/pandas/_libs/src/inference.pyx index 33c05f302dd94..f7dbae4ab736e 100644 --- a/pandas/_libs/src/inference.pyx +++ b/pandas/_libs/src/inference.pyx @@ -33,6 +33,10 @@ cpdef bint is_decimal(object obj): return isinstance(obj, Decimal) +cpdef bint is_interval(object obj): + return isinstance(obj, Interval) + + cpdef bint is_period(object val): """ Return a boolean if this is a Period object """ return util.is_period_object(val) @@ -429,6 +433,10 @@ def infer_dtype(object value): if is_period_array(values): return 'period' + elif is_interval(val): + if is_interval_array(values): + return 'interval' + for i in range(n): val = util.get_value_1d(values, i) if (util.is_integer_object(val) and @@ -880,6 +888,23 @@ cpdef bint is_period_array(ndarray[object] values): return null_count != n +cpdef bint is_interval_array(ndarray[object] values): + cdef: + Py_ssize_t i, n = len(values), null_count = 0 + object v + + if n == 0: + return False + for i in range(n): + v = values[i] + if util._checknull(v): + null_count += 1 + continue + if not is_interval(v): + return False + return null_count != n + + cdef extern from "parse_helper.h": inline int floatify(object, double *result, int *maybe_int) except -1 diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index ed0bb263ed6cf..47679966e3d5c 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -1296,6 +1296,18 @@ cdef class _Timestamp(datetime): return result + property _short_repr: + def __get__(self): + # format a Timestamp with only _date_repr if possible + # otherwise _repr_base + if (self.hour == 0 and + self.minute == 0 and + self.second == 0 and + self.microsecond == 0 and + self.nanosecond == 0): + return self._date_repr + return self._repr_base + property asm8: def __get__(self): return np.datetime64(self.value, 'ns') diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 7fab9295bb94e..5d2db864dd48e 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -19,7 +19,7 @@ is_bool_dtype, needs_i8_conversion, is_categorical, is_datetimetz, is_datetime64_any_dtype, is_datetime64tz_dtype, - is_timedelta64_dtype, + is_timedelta64_dtype, is_interval_dtype, is_scalar, is_list_like, _ensure_platform_int, _ensure_object, _ensure_float64, _ensure_uint64, @@ -605,31 +605,39 @@ def value_counts(values, sort=True, ascending=False, normalize=False, if bins is not None: try: from pandas.tools.tile import cut - values = Series(values).values - cat, bins = cut(values, bins, retbins=True) + values = Series(values) + ii = cut(values, bins, include_lowest=True) except TypeError: raise TypeError("bins argument only works with numeric data.") - values = cat.codes - if is_categorical_dtype(values) or is_sparse(values): + # count, remove nulls (from the index), and but the bins + result = ii.value_counts(dropna=dropna) + result = result[result.index.notnull()] + result.index = result.index.astype('interval') + result = result.sort_index() - # handle Categorical and sparse, - result = Series(values).values.value_counts(dropna=dropna) - result.name = name - counts = result.values + # if we are dropna and we have NO values + if dropna and (result.values == 0).all(): + result = result.iloc[0:0] + + # normalizing is by len of all (regardless of dropna) + counts = np.array([len(ii)]) else: - keys, counts = _value_counts_arraylike(values, dropna) - if not isinstance(keys, Index): - keys = Index(keys) - result = Series(counts, index=keys, name=name) + if is_categorical_dtype(values) or is_sparse(values): - if bins is not None: - # TODO: This next line should be more efficient - result = result.reindex(np.arange(len(cat.categories)), - fill_value=0) - result.index = bins[:-1] + # handle Categorical and sparse, + result = Series(values).values.value_counts(dropna=dropna) + result.name = name + counts = result.values + + else: + keys, counts = _value_counts_arraylike(values, dropna) + + if not isinstance(keys, Index): + keys = Index(keys) + result = Series(counts, index=keys, name=name) if sort: result = result.sort_values(ascending=ascending) @@ -1396,6 +1404,8 @@ def take_nd(arr, indexer, axis=0, out=None, fill_value=np.nan, mask_info=None, allow_fill=allow_fill) elif is_datetimetz(arr): return arr.take(indexer, fill_value=fill_value, allow_fill=allow_fill) + elif is_interval_dtype(arr): + return arr.take(indexer, fill_value=fill_value, allow_fill=allow_fill) if indexer is None: indexer = np.arange(arr.shape[axis], dtype=np.int64) diff --git a/pandas/core/api.py b/pandas/core/api.py index 65253dedb8b53..ea5be17ef3aaf 100644 --- a/pandas/core/api.py +++ b/pandas/core/api.py @@ -11,7 +11,8 @@ from pandas.formats.format import set_eng_float_format from pandas.core.index import (Index, CategoricalIndex, Int64Index, UInt64Index, RangeIndex, Float64Index, - MultiIndex) + MultiIndex, IntervalIndex) +from pandas.indexes.interval import Interval, interval_range from pandas.core.series import Series from pandas.core.frame import DataFrame diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 5591ce4b0d4aa..45a9577c8d8b2 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -18,6 +18,7 @@ from pandas.types.common import (is_numeric_dtype, is_timedelta64_dtype, is_datetime64_dtype, is_categorical_dtype, + is_interval_dtype, is_datetimelike, is_datetime64_any_dtype, is_bool, is_integer_dtype, @@ -40,11 +41,11 @@ from pandas.core.base import (PandasObject, SelectionMixin, GroupByError, DataError, SpecificationError) +from pandas.core.index import (Index, MultiIndex, + CategoricalIndex, _ensure_index) from pandas.core.categorical import Categorical from pandas.core.frame import DataFrame from pandas.core.generic import NDFrame -from pandas.core.index import (Index, MultiIndex, CategoricalIndex, - _ensure_index) from pandas.core.internals import BlockManager, make_block from pandas.core.series import Series from pandas.core.panel import Panel @@ -2660,7 +2661,7 @@ def _convert_grouper(axis, grouper): return grouper.reindex(axis)._values elif isinstance(grouper, (list, Series, Index, np.ndarray)): if len(grouper) != len(axis): - raise AssertionError('Grouper and axis must be same length') + raise ValueError('Grouper and axis must be same length') return grouper else: return grouper @@ -3145,20 +3146,29 @@ def value_counts(self, normalize=False, sort=True, ascending=False, if bins is None: lab, lev = algorithms.factorize(val, sort=True) + llab = lambda lab, inc: lab[inc] else: - cat, bins = cut(val, bins, retbins=True) - # bins[:-1] for backward compat; - # o.w. cat.categories could be better - lab, lev, dropna = cat.codes, bins[:-1], False - sorter = np.lexsort((lab, ids)) + # lab is a Categorical with categories an IntervalIndex + lab = cut(Series(val), bins, include_lowest=True) + lev = lab.cat.categories + lab = lev.take(lab.cat.codes) + llab = lambda lab, inc: lab[inc]._multiindex.labels[-1] + + if is_interval_dtype(lab): + # TODO: should we do this inside II? + sorter = np.lexsort((lab.left, lab.right, ids)) + else: + sorter = np.lexsort((lab, ids)) + ids, lab = ids[sorter], lab[sorter] # group boundaries are where group ids change idx = np.r_[0, 1 + np.nonzero(ids[1:] != ids[:-1])[0]] # new values are where sorted labels change - inc = np.r_[True, lab[1:] != lab[:-1]] + lchanges = llab(lab, slice(1, None)) != llab(lab, slice(None, -1)) + inc = np.r_[True, lchanges] inc[idx] = True # group boundaries are also new values out = np.diff(np.nonzero(np.r_[inc, True])[0]) # value counts @@ -3166,7 +3176,7 @@ def value_counts(self, normalize=False, sort=True, ascending=False, rep = partial(np.repeat, repeats=np.add.reduceat(inc, idx)) # multi-index components - labels = list(map(rep, self.grouper.recons_labels)) + [lab[inc]] + labels = list(map(rep, self.grouper.recons_labels)) + [llab(lab, inc)] levels = [ping.group_index for ping in self.grouper.groupings] + [lev] names = self.grouper.names + [self._selection_name] diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 9e22bdd5facc4..dd8fa2d3ddc81 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1087,10 +1087,10 @@ def _getitem_iterable(self, key, axis=0): return self.obj.take(inds, axis=axis, convert=False) else: # Have the index compute an indexer or return None - # if it cannot handle + # if it cannot handle; we only act on all found values indexer, keyarr = labels._convert_listlike_indexer( key, kind=self.name) - if indexer is not None: + if indexer is not None and (indexer != -1).all(): return self.obj.take(indexer, axis=axis) # existing labels are unique and indexer are unique @@ -1429,7 +1429,7 @@ def error(): try: key = self._convert_scalar_indexer(key, axis) - if key not in ax: + if not ax.contains(key): error() except TypeError as e: @@ -1897,7 +1897,7 @@ def convert_to_index_sliceable(obj, key): elif isinstance(key, compat.string_types): # we are an actual column - if key in obj._data.items: + if obj._data.items.contains(key): return None # We might have a datetimelike string that we can translate to a diff --git a/pandas/formats/format.py b/pandas/formats/format.py index 66a81aadc4213..907198d98cf5b 100644 --- a/pandas/formats/format.py +++ b/pandas/formats/format.py @@ -15,6 +15,7 @@ is_float_dtype, is_period_arraylike, is_integer_dtype, + is_interval_dtype, is_datetimetz, is_integer, is_float, @@ -575,6 +576,7 @@ def to_string(self): pprint_thing(frame.index))) text = info_line else: + strcols = self._to_str_columns() if self.line_width is None: # no need to wrap around just print # the whole frame @@ -2027,6 +2029,8 @@ def format_array(values, formatter, float_format=None, na_rep='NaN', if is_categorical_dtype(values): fmt_klass = CategoricalArrayFormatter + elif is_interval_dtype(values): + fmt_klass = IntervalArrayFormatter elif is_float_dtype(values.dtype): fmt_klass = FloatArrayFormatter elif is_period_arraylike(values): @@ -2294,6 +2298,17 @@ def _format_strings(self): return fmt_values.tolist() +class IntervalArrayFormatter(GenericArrayFormatter): + + def __init__(self, values, *args, **kwargs): + GenericArrayFormatter.__init__(self, values, *args, **kwargs) + + def _format_strings(self): + formatter = self.formatter or str + fmt_values = np.array([formatter(x) for x in self.values]) + return fmt_values + + class PeriodArrayFormatter(IntArrayFormatter): def _format_strings(self): diff --git a/pandas/indexes/api.py b/pandas/indexes/api.py index a3cb54ca97071..db076b60ab34e 100644 --- a/pandas/indexes/api.py +++ b/pandas/indexes/api.py @@ -3,6 +3,7 @@ InvalidIndexError) from pandas.indexes.category import CategoricalIndex # noqa from pandas.indexes.multi import MultiIndex # noqa +from pandas.indexes.interval import IntervalIndex # noqa from pandas.indexes.numeric import (NumericIndex, Float64Index, # noqa Int64Index, UInt64Index) from pandas.indexes.range import RangeIndex # noqa @@ -13,7 +14,7 @@ # TODO: there are many places that rely on these private methods existing in # pandas.core.index __all__ = ['Index', 'MultiIndex', 'NumericIndex', 'Float64Index', 'Int64Index', - 'CategoricalIndex', 'RangeIndex', 'UInt64Index', + 'CategoricalIndex', 'IntervalIndex', 'RangeIndex', 'UInt64Index', 'InvalidIndexError', '_new_Index', '_ensure_index', '_get_na_value', '_get_combined_index', diff --git a/pandas/indexes/base.py b/pandas/indexes/base.py index ab5c01388e652..00ad4ca71cb9d 100644 --- a/pandas/indexes/base.py +++ b/pandas/indexes/base.py @@ -24,6 +24,7 @@ is_dtype_equal, is_object_dtype, is_categorical_dtype, + is_interval_dtype, is_bool_dtype, is_signed_integer_dtype, is_unsigned_integer_dtype, @@ -49,9 +50,9 @@ from pandas.formats.printing import pprint_thing from pandas.core.ops import _comp_method_OBJECT_ARRAY from pandas.core.strings import StringAccessorMixin - from pandas.core.config import get_option + # simplify default_pprint = lambda x, max_seq_items=None: \ pprint_thing(x, escape_chars=('\t', '\r', '\n'), quote_strings=True, @@ -138,6 +139,9 @@ class Index(IndexOpsMixin, StringAccessorMixin, PandasObject): _is_numeric_dtype = False _can_hold_na = True + # would we like our indexing holder to defer to us + _defer_to_indexing = False + # prioritize current class for _shallow_copy_with_infer, # used to infer integers as datetime-likes _infer_as_myclass = False @@ -167,6 +171,12 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, from .category import CategoricalIndex return CategoricalIndex(data, copy=copy, name=name, **kwargs) + # interval + if is_interval_dtype(data): + from .interval import IntervalIndex + return IntervalIndex.from_intervals(data, name=name, + copy=copy) + # index-like elif isinstance(data, (np.ndarray, Index, ABCSeries)): @@ -276,6 +286,10 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, elif inferred in ['floating', 'mixed-integer-float']: from .numeric import Float64Index return Float64Index(subarr, copy=copy, name=name) + elif inferred == 'interval': + from .interval import IntervalIndex + return IntervalIndex.from_intervals(subarr, name=name, + copy=copy) elif inferred == 'boolean': # don't support boolean explicity ATM pass @@ -1210,6 +1224,9 @@ def is_object(self): def is_categorical(self): return self.inferred_type in ['categorical'] + def is_interval(self): + return self.inferred_type in ['interval'] + def is_mixed(self): return self.inferred_type in ['mixed'] @@ -1413,11 +1430,6 @@ def _convert_index_indexer(self, keyarr): @Appender(_index_shared_docs['_convert_list_indexer']) def _convert_list_indexer(self, keyarr, kind=None): - """ - passed a key that is tuplesafe that is integer based - and we have a mixed index (e.g. number/labels). figure out - the indexer. return None if we can't help - """ if (kind in [None, 'iloc', 'ix'] and is_integer_dtype(keyarr) and not self.is_floating() and not isinstance(keyarr, ABCPeriodIndex)): @@ -1553,9 +1565,41 @@ def __nonzero__(self): __bool__ = __nonzero__ + _index_shared_docs['__contains__'] = """ + return a boolean if this key is IN the index + + Parameters + ---------- + key : object + + Returns + ------- + boolean + """ + + @Appender(_index_shared_docs['__contains__'] % _index_doc_kwargs) def __contains__(self, key): hash(key) - # work around some kind of odd cython bug + try: + return key in self._engine + except TypeError: + return False + + _index_shared_docs['contains'] = """ + return a boolean if this key is IN the index + + Parameters + ---------- + key : object + + Returns + ------- + boolean + """ + + @Appender(_index_shared_docs['contains'] % _index_doc_kwargs) + def contains(self, key): + hash(key) try: return key in self._engine except TypeError: @@ -3341,6 +3385,13 @@ def _searchsorted_monotonic(self, label, side='left'): raise ValueError('index must be monotonic increasing or decreasing') + def _get_loc_only_exact_matches(self, key): + """ + This is overriden on subclasses (namely, IntervalIndex) to control + get_slice_bound. + """ + return self.get_loc(key) + def get_slice_bound(self, label, side, kind): """ Calculate slice bound that corresponds to given label. @@ -3370,7 +3421,7 @@ def get_slice_bound(self, label, side, kind): # we need to look up the label try: - slc = self.get_loc(label) + slc = self._get_loc_only_exact_matches(label) except KeyError as err: try: return self._searchsorted_monotonic(label, side) @@ -3606,7 +3657,9 @@ def _evaluate_compare(self, other): if needs_i8_conversion(self) and needs_i8_conversion(other): return self._evaluate_compare(other, op) - if is_object_dtype(self) and self.nlevels == 1: + if (is_object_dtype(self) and + self.nlevels == 1): + # don't pass MultiIndex with np.errstate(all='ignore'): result = _comp_method_OBJECT_ARRAY( @@ -3918,6 +3971,8 @@ def _ensure_index(index_like, copy=False): def _get_na_value(dtype): + if is_datetime64_any_dtype(dtype) or is_timedelta64_dtype(dtype): + return libts.NaT return {np.datetime64: libts.NaT, np.timedelta64: libts.NaT}.get(dtype, np.nan) diff --git a/pandas/indexes/category.py b/pandas/indexes/category.py index 7cfc95de5f538..6c57b2ed83705 100644 --- a/pandas/indexes/category.py +++ b/pandas/indexes/category.py @@ -7,7 +7,9 @@ from pandas.types.common import (is_categorical_dtype, _ensure_platform_int, is_list_like, + is_interval_dtype, is_scalar) +from pandas.core.common import _asarray_tuplesafe from pandas.types.missing import array_equivalent @@ -17,7 +19,6 @@ import pandas.core.base as base import pandas.core.missing as missing import pandas.indexes.base as ibase -from pandas.core.common import _asarray_tuplesafe _index_doc_kwargs = dict(ibase._index_doc_kwargs) _index_doc_kwargs.update(dict(target_klass='CategoricalIndex')) @@ -261,14 +262,35 @@ def ordered(self): def _reverse_indexer(self): return self._data._reverse_indexer() + @Appender(_index_shared_docs['__contains__'] % _index_doc_kwargs) def __contains__(self, key): hash(key) + + if self.categories._defer_to_indexing: + return key in self.categories + + return key in self.values + + @Appender(_index_shared_docs['contains'] % _index_doc_kwargs) + def contains(self, key): + hash(key) + + if self.categories._defer_to_indexing: + return self.categories.contains(key) + return key in self.values def __array__(self, dtype=None): """ the array interface, return my values """ return np.array(self._data, dtype=dtype) + @Appender(_index_shared_docs['astype']) + def astype(self, dtype, copy=True): + if is_interval_dtype(dtype): + from pandas import IntervalIndex + return IntervalIndex.from_intervals(np.array(self)) + return super(CategoricalIndex, self).astype(dtype=dtype, copy=copy) + @cache_readonly def _isnan(self): """ return if each value is nan""" @@ -431,8 +453,8 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None): method = missing.clean_reindex_fill_method(method) target = ibase._ensure_index(target) - if isinstance(target, CategoricalIndex): - target = target.categories + if self.equals(target): + return np.arange(len(self), dtype='intp') if method == 'pad' or method == 'backfill': raise NotImplementedError("method='pad' and method='backfill' not " @@ -440,10 +462,17 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None): elif method == 'nearest': raise NotImplementedError("method='nearest' not implemented yet " 'for CategoricalIndex') - else: + if (isinstance(target, CategoricalIndex) and + self.values.is_dtype_equal(target)): + # we have the same codes + codes = target.codes + else: + if isinstance(target, CategoricalIndex): + target = target.categories codes = self.categories.get_indexer(target) - indexer, _ = self._engine.get_indexer_non_unique(codes) + + indexer, _ = self._engine.get_indexer_non_unique(codes) return _ensure_platform_int(indexer) @@ -457,20 +486,39 @@ def get_indexer_non_unique(self, target): codes = self.categories.get_indexer(target) return self._engine.get_indexer_non_unique(codes) + @Appender(_index_shared_docs['_convert_scalar_indexer']) + def _convert_scalar_indexer(self, key, kind=None): + if self.categories._defer_to_indexing: + return self.categories._convert_scalar_indexer(key, kind=kind) + + return super(CategoricalIndex, self)._convert_scalar_indexer( + key, kind=kind) + @Appender(_index_shared_docs['_convert_list_indexer']) def _convert_list_indexer(self, keyarr, kind=None): # Return our indexer or raise if all of the values are not included in # the categories - codes = self.categories.get_indexer(keyarr) - if (codes == -1).any(): - raise KeyError("a list-indexer must only include values that are " - "in the categories") - return None + if self.categories._defer_to_indexing: + indexer = self.categories._convert_list_indexer(keyarr, kind=kind) + return Index(self.codes).get_indexer_for(indexer) + + indexer = self.categories.get_indexer(keyarr) + if (indexer == -1).any(): + raise KeyError( + "a list-indexer must only " + "include values that are " + "in the categories") + + return self.get_indexer(keyarr) @Appender(_index_shared_docs['_convert_arr_indexer']) def _convert_arr_indexer(self, keyarr): keyarr = _asarray_tuplesafe(keyarr) + + if self.categories._defer_to_indexing: + return keyarr + return self._shallow_copy(keyarr) @Appender(_index_shared_docs['_convert_index_indexer']) @@ -488,6 +536,8 @@ def take(self, indices, axis=0, allow_fill=True, na_value=-1) return self._create_from_codes(taken) + take_nd = take + def map(self, mapper): """Apply mapper function to its categories (not codes). diff --git a/pandas/indexes/interval.py b/pandas/indexes/interval.py new file mode 100644 index 0000000000000..63315ef861d12 --- /dev/null +++ b/pandas/indexes/interval.py @@ -0,0 +1,1062 @@ +""" define the IntervalIndex """ + +import numpy as np + +from pandas.types.missing import notnull, isnull +from pandas.types.generic import ABCPeriodIndex +from pandas.types.dtypes import IntervalDtype +from pandas.types.common import (_ensure_platform_int, + is_list_like, + is_datetime_or_timedelta_dtype, + is_integer_dtype, + is_object_dtype, + is_categorical_dtype, + is_float_dtype, + is_interval_dtype, + is_scalar, + is_integer) +from pandas.indexes.base import (Index, _ensure_index, + default_pprint, _index_shared_docs) + +from pandas._libs import Timestamp, Timedelta +from pandas._libs.interval import (Interval, IntervalMixin, IntervalTree, + intervals_to_interval_bounds) + +from pandas.indexes.multi import MultiIndex +from pandas.compat.numpy import function as nv +from pandas.core import common as com +from pandas.util.decorators import cache_readonly, Appender +from pandas.core.config import get_option + +import pandas.indexes.base as ibase +_index_doc_kwargs = dict(ibase._index_doc_kwargs) +_index_doc_kwargs.update( + dict(klass='IntervalIndex', + target_klass='IntervalIndex or list of Intervals')) + + +_VALID_CLOSED = set(['left', 'right', 'both', 'neither']) + + +def _get_next_label(label): + dtype = getattr(label, 'dtype', type(label)) + if isinstance(label, (Timestamp, Timedelta)): + dtype = 'datetime64' + if is_datetime_or_timedelta_dtype(dtype): + return label + np.timedelta64(1, 'ns') + elif is_integer_dtype(dtype): + return label + 1 + elif is_float_dtype(dtype): + return np.nextafter(label, np.infty) + else: + raise TypeError('cannot determine next label for type %r' + % type(label)) + + +def _get_prev_label(label): + dtype = getattr(label, 'dtype', type(label)) + if isinstance(label, (Timestamp, Timedelta)): + dtype = 'datetime64' + if is_datetime_or_timedelta_dtype(dtype): + return label - np.timedelta64(1, 'ns') + elif is_integer_dtype(dtype): + return label - 1 + elif is_float_dtype(dtype): + return np.nextafter(label, -np.infty) + else: + raise TypeError('cannot determine next label for type %r' + % type(label)) + + +def _get_interval_closed_bounds(interval): + """ + Given an Interval or IntervalIndex, return the corresponding interval with + closed bounds. + """ + left, right = interval.left, interval.right + if interval.open_left: + left = _get_next_label(left) + if interval.open_right: + right = _get_prev_label(right) + return left, right + + +def _new_IntervalIndex(cls, d): + """ This is called upon unpickling, + rather than the default which doesn't + have arguments and breaks __new__ """ + + return cls.from_arrays(**d) + + +class IntervalIndex(IntervalMixin, Index): + """ + Immutable Index implementing an ordered, sliceable set. IntervalIndex + represents an Index of intervals that are all closed on the same side. + + .. versionadded:: 0.20.0 + + Properties + ---------- + left, right : array-like (1-dimensional) + Left and right bounds for each interval. + closed : {'left', 'right', 'both', 'neither'}, optional + Whether the intervals are closed on the left-side, right-side, both or + neither. Defaults to 'right'. + name : object, optional + Name to be stored in the index. + copy : boolean, default False + Copy the meta-data + """ + _typ = 'intervalindex' + _comparables = ['name'] + _attributes = ['name', 'closed'] + _allow_index_ops = True + + # we would like our indexing holder to defer to us + _defer_to_indexing = True + + _mask = None + + def __new__(cls, data, closed='right', + name=None, copy=False, dtype=None, + fastpath=False, verify_integrity=True): + + if fastpath: + return cls._simple_new(data.left, data.right, closed, name, + copy=copy, verify_integrity=False) + + if name is None and hasattr(data, 'name'): + name = data.name + + if isinstance(data, IntervalIndex): + left = data.left + right = data.right + + else: + + # don't allow scalars + if is_scalar(data): + cls._scalar_data_error(data) + + data = IntervalIndex.from_intervals(data, name=name) + left, right = data.left, data.right + + return cls._simple_new(left, right, closed, name, + copy=copy, verify_integrity=verify_integrity) + + @classmethod + def _simple_new(cls, left, right, closed=None, name=None, + copy=False, verify_integrity=True): + result = IntervalMixin.__new__(cls) + + if closed is None: + closed = 'right' + left = _ensure_index(left, copy=copy) + right = _ensure_index(right, copy=copy) + + # coerce dtypes to match if needed + if is_float_dtype(left) and is_integer_dtype(right): + right = right.astype(left.dtype) + if is_float_dtype(right) and is_integer_dtype(left): + left = left.astype(right.dtype) + + if type(left) != type(right): + raise ValueError("must not have differing left [{}] " + "and right [{}] types".format( + type(left), type(right))) + + if isinstance(left, ABCPeriodIndex): + raise ValueError("Period dtypes are not supported, " + "use a PeriodIndex instead") + + result._left = left + result._right = right + result._closed = closed + result.name = name + if verify_integrity: + result._validate() + result._reset_identity() + return result + + @Appender(_index_shared_docs['_shallow_copy']) + def _shallow_copy(self, left=None, right=None, **kwargs): + if left is None: + + # no values passed + left, right = self.left, self.right + + elif right is None: + + # only single value passed, could be an IntervalIndex + # or array of Intervals + if not isinstance(left, IntervalIndex): + left = type(self).from_intervals(left) + + left, right = left.left, left.right + else: + + # both left and right are values + pass + + attributes = self._get_attributes_dict() + attributes.update(kwargs) + attributes['verify_integrity'] = False + return self._simple_new(left, right, **attributes) + + def _validate(self): + """ + Verify that the IntervalIndex is valid. + """ + if self.closed not in _VALID_CLOSED: + raise ValueError("invalid options for 'closed': %s" % self.closed) + if len(self.left) != len(self.right): + raise ValueError('left and right must have the same length') + left_mask = notnull(self.left) + right_mask = notnull(self.right) + if not (left_mask == right_mask).all(): + raise ValueError('missing values must be missing in the same ' + 'location both left and right sides') + if not (self.left[left_mask] <= self.right[left_mask]).all(): + raise ValueError('left side of interval must be <= right side') + self._mask = ~left_mask + + @cache_readonly + def hasnans(self): + """ return if I have any nans; enables various perf speedups """ + return self._isnan.any() + + @cache_readonly + def _isnan(self): + """ return if each value is nan""" + if self._mask is None: + self._mask = isnull(self.left) + return self._mask + + @cache_readonly + def _engine(self): + return IntervalTree(self.left, self.right, closed=self.closed) + + @property + def _constructor(self): + return type(self).from_intervals + + def __contains__(self, key): + """ + return a boolean if this key is IN the index + We *only* accept an Interval + + Parameters + ---------- + key : Interval + + Returns + ------- + boolean + """ + if not isinstance(key, Interval): + return False + + try: + self.get_loc(key) + return True + except KeyError: + return False + + def contains(self, key): + """ + return a boolean if this key is IN the index + + We accept / allow keys to be not *just* actual + objects. + + Parameters + ---------- + key : int, float, Interval + + Returns + ------- + boolean + """ + try: + self.get_loc(key) + return True + except KeyError: + return False + + @classmethod + def from_breaks(cls, breaks, closed='right', name=None, copy=False): + """ + Construct an IntervalIndex from an array of splits + + Parameters + ---------- + breaks : array-like (1-dimensional) + Left and right bounds for each interval. + closed : {'left', 'right', 'both', 'neither'}, optional + Whether the intervals are closed on the left-side, right-side, both + or neither. Defaults to 'right'. + name : object, optional + Name to be stored in the index. + copy : boolean, default False + copy the data + + Examples + -------- + + >>> IntervalIndex.from_breaks([0, 1, 2, 3]) + IntervalIndex(left=[0, 1, 2], + right=[1, 2, 3], + closed='right') + """ + breaks = np.asarray(breaks) + return cls.from_arrays(breaks[:-1], breaks[1:], closed, + name=name, copy=copy) + + @classmethod + def from_arrays(cls, left, right, closed='right', name=None, copy=False): + """ + Construct an IntervalIndex from a a left and right array + + Parameters + ---------- + left : array-like (1-dimensional) + Left bounds for each interval. + right : array-like (1-dimensional) + Right bounds for each interval. + closed : {'left', 'right', 'both', 'neither'}, optional + Whether the intervals are closed on the left-side, right-side, both + or neither. Defaults to 'right'. + name : object, optional + Name to be stored in the index. + copy : boolean, default False + copy the data + + Examples + -------- + + >>> IntervalIndex.from_arrays([0, 1, 2], [1, 2, 3]) + IntervalIndex(left=[0, 1, 2], + right=[1, 2, 3], + closed='right') + """ + left = np.asarray(left) + right = np.asarray(right) + return cls._simple_new(left, right, closed, name=name, + copy=copy, verify_integrity=True) + + @classmethod + def from_intervals(cls, data, name=None, copy=False): + """ + Construct an IntervalIndex from a 1d array of Interval objects + + Parameters + ---------- + data : array-like (1-dimensional) + Array of Interval objects. All intervals must be closed on the same + sides. + name : object, optional + Name to be stored in the index. + copy : boolean, default False + by-default copy the data, this is compat only and ignored + + Examples + -------- + + >>> IntervalIndex.from_intervals([Interval(0, 1), Interval(1, 2)]) + IntervalIndex(left=[0, 1], + right=[1, 2], + closed='right') + + The generic Index constructor work identically when it infers an array + of all intervals: + + >>> Index([Interval(0, 1), Interval(1, 2)]) + IntervalIndex(left=[0, 1], + right=[1, 2], + closed='right') + """ + data = np.asarray(data) + left, right, closed = intervals_to_interval_bounds(data) + return cls.from_arrays(left, right, closed, name=name, copy=False) + + @classmethod + def from_tuples(cls, data, closed='right', name=None, copy=False): + """ + Construct an IntervalIndex from a list/array of tuples + + Parameters + ---------- + data : array-like (1-dimensional) + Array of tuples + closed : {'left', 'right', 'both', 'neither'}, optional + Whether the intervals are closed on the left-side, right-side, both + or neither. Defaults to 'right'. + name : object, optional + Name to be stored in the index. + copy : boolean, default False + by-default copy the data, this is compat only and ignored + + Examples + -------- + + """ + left = [] + right = [] + for d in data: + + if isnull(d): + left.append(np.nan) + right.append(np.nan) + continue + + l, r = d + left.append(l) + right.append(r) + + # TODO + # if we have nulls and we previous had *only* + # integer data, then we have changed the dtype + + return cls.from_arrays(left, right, closed, name=name, copy=False) + + def to_tuples(self): + return Index(com._asarray_tuplesafe(zip(self.left, self.right))) + + @cache_readonly + def _multiindex(self): + return MultiIndex.from_arrays([self.left, self.right], + names=['left', 'right']) + + @property + def left(self): + return self._left + + @property + def right(self): + return self._right + + @property + def closed(self): + return self._closed + + def __len__(self): + return len(self.left) + + @cache_readonly + def values(self): + """ + Returns the IntervalIndex's data as a numpy array of Interval + objects (with dtype='object') + """ + left = self.left + right = self.right + mask = self._isnan + closed = self._closed + + result = np.empty(len(left), dtype=object) + for i in range(len(left)): + if mask[i]: + result[i] = np.nan + else: + result[i] = Interval(left[i], right[i], closed) + return result + + def __array__(self, result=None): + """ the array interface, return my values """ + return self.values + + def __array_wrap__(self, result, context=None): + # we don't want the superclass implementation + return result + + def _array_values(self): + return self.values + + def __reduce__(self): + d = dict(left=self.left, + right=self.right) + d.update(self._get_attributes_dict()) + return _new_IntervalIndex, (self.__class__, d), None + + @Appender(_index_shared_docs['copy']) + def copy(self, deep=False, name=None): + left = self.left.copy(deep=True) if deep else self.left + right = self.right.copy(deep=True) if deep else self.right + name = name if name is not None else self.name + return type(self).from_arrays(left, right, name=name) + + @Appender(_index_shared_docs['astype']) + def astype(self, dtype, copy=True): + if is_interval_dtype(dtype): + if copy: + self = self.copy() + return self + elif is_object_dtype(dtype): + return Index(self.values, dtype=object) + elif is_categorical_dtype(dtype): + from pandas import Categorical + return Categorical(self, ordered=True) + raise ValueError('Cannot cast IntervalIndex to dtype %s' % dtype) + + @cache_readonly + def dtype(self): + return IntervalDtype.construct_from_string(str(self.left.dtype)) + + @property + def inferred_type(self): + return 'interval' + + @Appender(Index.memory_usage.__doc__) + def memory_usage(self, deep=False): + # we don't use an explict engine + # so return the bytes here + return (self.left.memory_usage(deep=deep) + + self.right.memory_usage(deep=deep)) + + @cache_readonly + def mid(self): + """Returns the mid-point of each interval in the index as an array + """ + try: + return Index(0.5 * (self.left.values + self.right.values)) + except TypeError: + # datetime safe version + delta = self.right.values - self.left.values + return Index(self.left.values + 0.5 * delta) + + @cache_readonly + def is_monotonic(self): + return self._multiindex.is_monotonic + + @cache_readonly + def is_monotonic_increasing(self): + return self._multiindex.is_monotonic_increasing + + @cache_readonly + def is_monotonic_decreasing(self): + return self._multiindex.is_monotonic_decreasing + + @cache_readonly + def is_unique(self): + return self._multiindex.is_unique + + @cache_readonly + def is_non_overlapping_monotonic(self): + # must be increasing (e.g., [0, 1), [1, 2), [2, 3), ... ) + # or decreasing (e.g., [-1, 0), [-2, -1), [-3, -2), ...) + # we already require left <= right + return ((self.right[:-1] <= self.left[1:]).all() or + (self.left[:-1] >= self.right[1:]).all()) + + @Appender(_index_shared_docs['_convert_scalar_indexer']) + def _convert_scalar_indexer(self, key, kind=None): + if kind == 'iloc': + return super(IntervalIndex, self)._convert_scalar_indexer( + key, kind=kind) + return key + + def _maybe_cast_slice_bound(self, label, side, kind): + return getattr(self, side)._maybe_cast_slice_bound(label, side, kind) + + @Appender(_index_shared_docs['_convert_list_indexer']) + def _convert_list_indexer(self, keyarr, kind=None): + """ + we are passed a list-like indexer. Return the + indexer for matching intervals. + """ + locs = self.get_indexer_for(keyarr) + + # we have missing values + if (locs == -1).any(): + raise KeyError + + return locs + + def _maybe_cast_indexed(self, key): + """ + we need to cast the key, which could be a scalar + or an array-like to the type of our subtype + """ + if isinstance(key, IntervalIndex): + return key + + subtype = self.dtype.subtype + if is_float_dtype(subtype): + if is_integer(key): + key = float(key) + elif isinstance(key, (np.ndarray, Index)): + key = key.astype('float64') + elif is_integer_dtype(subtype): + if is_integer(key): + key = int(key) + + return key + + def _check_method(self, method): + if method is None: + return + + if method in ['bfill', 'backfill', 'pad', 'ffill', 'nearest']: + raise NotImplementedError( + 'method {} not yet implemented for ' + 'IntervalIndex'.format(method)) + + raise ValueError("Invalid fill method") + + def _searchsorted_monotonic(self, label, side, exclude_label=False): + if not self.is_non_overlapping_monotonic: + raise KeyError('can only get slices from an IntervalIndex if ' + 'bounds are non-overlapping and all monotonic ' + 'increasing or decreasing') + + if isinstance(label, IntervalMixin): + raise NotImplementedError + + if ((side == 'left' and self.left.is_monotonic_increasing) or + (side == 'right' and self.left.is_monotonic_decreasing)): + sub_idx = self.right + if self.open_right or exclude_label: + label = _get_next_label(label) + else: + sub_idx = self.left + if self.open_left or exclude_label: + label = _get_prev_label(label) + + return sub_idx._searchsorted_monotonic(label, side) + + def _get_loc_only_exact_matches(self, key): + if isinstance(key, Interval): + + if not self.is_unique: + raise ValueError("cannot index with a slice Interval" + " and a non-unique index") + + # TODO: this expands to a tuple index, see if we can + # do better + return Index(self._multiindex.values).get_loc(key) + raise KeyError + + def _find_non_overlapping_monotonic_bounds(self, key): + if isinstance(key, IntervalMixin): + start = self._searchsorted_monotonic( + key.left, 'left', exclude_label=key.open_left) + stop = self._searchsorted_monotonic( + key.right, 'right', exclude_label=key.open_right) + elif isinstance(key, slice): + # slice + start, stop = key.start, key.stop + if (key.step or 1) != 1: + raise NotImplementedError("cannot slice with a slice step") + if start is None: + start = 0 + else: + start = self._searchsorted_monotonic(start, 'left') + if stop is None: + stop = len(self) + else: + stop = self._searchsorted_monotonic(stop, 'right') + else: + # scalar or index-like + + start = self._searchsorted_monotonic(key, 'left') + stop = self._searchsorted_monotonic(key, 'right') + return start, stop + + def get_loc(self, key, method=None): + self._check_method(method) + + original_key = key + key = self._maybe_cast_indexed(key) + + if self.is_non_overlapping_monotonic: + if isinstance(key, Interval): + left = self._maybe_cast_slice_bound(key.left, 'left', None) + right = self._maybe_cast_slice_bound(key.right, 'right', None) + key = Interval(left, right, key.closed) + else: + key = self._maybe_cast_slice_bound(key, 'left', None) + + start, stop = self._find_non_overlapping_monotonic_bounds(key) + + if start is None or stop is None: + return slice(start, stop) + elif start + 1 == stop: + return start + elif start < stop: + return slice(start, stop) + else: + raise KeyError(original_key) + + else: + # use the interval tree + if isinstance(key, Interval): + left, right = _get_interval_closed_bounds(key) + return self._engine.get_loc_interval(left, right) + else: + return self._engine.get_loc(key) + + def get_value(self, series, key): + if com.is_bool_indexer(key): + loc = key + elif is_list_like(key): + loc = self.get_indexer(key) + elif isinstance(key, slice): + + if not (key.step is None or key.step == 1): + raise ValueError("cannot support not-default " + "step in a slice") + + try: + loc = self.get_loc(key) + except TypeError: + + # we didn't find exact intervals + # or are non-unique + raise ValueError("unable to slice with " + "this key: {}".format(key)) + + else: + loc = self.get_loc(key) + return series.iloc[loc] + + @Appender(_index_shared_docs['get_indexer'] % _index_doc_kwargs) + def get_indexer(self, target, method=None, limit=None, tolerance=None): + + self._check_method(method) + target = _ensure_index(target) + target = self._maybe_cast_indexed(target) + + if self.equals(target): + return np.arange(len(self), dtype='intp') + + if self.is_non_overlapping_monotonic: + start, stop = self._find_non_overlapping_monotonic_bounds(target) + + start_plus_one = start + 1 + if not ((start_plus_one < stop).any()): + return np.where(start_plus_one == stop, start, -1) + + if not self.is_unique: + raise ValueError("cannot handle non-unique indices") + + # IntervalIndex + if isinstance(target, IntervalIndex): + indexer = self._get_reindexer(target) + + # non IntervalIndex + else: + indexer = np.concatenate([self.get_loc(i) for i in target]) + + return _ensure_platform_int(indexer) + + def _get_reindexer(self, target): + """ + Return an indexer for a target IntervalIndex with self + """ + + # find the left and right indexers + lindexer = self._engine.get_indexer(target.left.values) + rindexer = self._engine.get_indexer(target.right.values) + + # we want to return an indexer on the intervals + # however, our keys could provide overlapping of multiple + # intervals, so we iterate thru the indexers and construct + # a set of indexers + + indexer = [] + n = len(self) + + for i, (l, r) in enumerate(zip(lindexer, rindexer)): + + target_value = target[i] + + # matching on the lhs bound + if (l != -1 and + self.closed == 'right' and + target_value.left == self[l].right): + l += 1 + + # matching on the lhs bound + if (r != -1 and + self.closed == 'left' and + target_value.right == self[r].left): + r -= 1 + + # not found + if l == -1 and r == -1: + indexer.append(np.array([-1])) + + elif r == -1: + + indexer.append(np.arange(l, n)) + + elif l == -1: + + # care about left/right closed here + value = self[i] + + # target.closed same as self.closed + if self.closed == target.closed: + if target_value.left < value.left: + indexer.append(np.array([-1])) + continue + + # target.closed == 'left' + elif self.closed == 'right': + if target_value.left <= value.left: + indexer.append(np.array([-1])) + continue + + # target.closed == 'right' + elif self.closed == 'left': + if target_value.left <= value.left: + indexer.append(np.array([-1])) + continue + + indexer.append(np.arange(0, r + 1)) + + else: + indexer.append(np.arange(l, r + 1)) + + return np.concatenate(indexer) + + @Appender(_index_shared_docs['get_indexer_non_unique'] % _index_doc_kwargs) + def get_indexer_non_unique(self, target): + target = self._maybe_cast_indexed(_ensure_index(target)) + return super(IntervalIndex, self).get_indexer_non_unique(target) + + @Appender(_index_shared_docs['where']) + def where(self, cond, other=None): + if other is None: + other = self._na_value + values = np.where(cond, self.values, other) + return self._shallow_copy(values) + + def delete(self, loc): + new_left = self.left.delete(loc) + new_right = self.right.delete(loc) + return self._shallow_copy(new_left, new_right) + + def insert(self, loc, item): + if not isinstance(item, Interval): + raise ValueError('can only insert Interval objects into an ' + 'IntervalIndex') + if not item.closed == self.closed: + raise ValueError('inserted item must be closed on the same side ' + 'as the index') + new_left = self.left.insert(loc, item.left) + new_right = self.right.insert(loc, item.right) + return self._shallow_copy(new_left, new_right) + + def _as_like_interval_index(self, other, error_msg): + self._assert_can_do_setop(other) + other = _ensure_index(other) + if (not isinstance(other, IntervalIndex) or + self.closed != other.closed): + raise ValueError(error_msg) + return other + + def _append_same_dtype(self, to_concat, name): + """ + assert that we all have the same .closed + we allow a 0-len index here as well + """ + if not len(set([i.closed for i in to_concat if len(i)])) == 1: + msg = ('can only append two IntervalIndex objects ' + 'that are closed on the same side') + raise ValueError(msg) + return super(IntervalIndex, self)._append_same_dtype(to_concat, name) + + @Appender(_index_shared_docs['take'] % _index_doc_kwargs) + def take(self, indices, axis=0, allow_fill=True, + fill_value=None, **kwargs): + nv.validate_take(tuple(), kwargs) + indices = _ensure_platform_int(indices) + left, right = self.left, self.right + + if fill_value is None: + fill_value = self._na_value + mask = indices == -1 + + if not mask.any(): + # we won't change dtype here in this case + # if we don't need + allow_fill = False + + taker = lambda x: x.take(indices, allow_fill=allow_fill, + fill_value=fill_value) + + try: + new_left = taker(left) + new_right = taker(right) + except ValueError: + + # we need to coerce; migth have NA's in an + # interger dtype + new_left = taker(left.astype(float)) + new_right = taker(right.astype(float)) + + return self._shallow_copy(new_left, new_right) + + def __getitem__(self, value): + mask = self._isnan[value] + if is_scalar(mask) and mask: + return self._na_value + + left = self.left[value] + right = self.right[value] + + # scalar + if not isinstance(left, Index): + return Interval(left, right, self.closed) + + return self._shallow_copy(left, right) + + # __repr__ associated methods are based on MultiIndex + + def _format_with_header(self, header, **kwargs): + return header + list(self._format_native_types(**kwargs)) + + def _format_native_types(self, na_rep='', quoting=None, **kwargs): + """ actually format my specific types """ + from pandas.formats.format import IntervalArrayFormatter + return IntervalArrayFormatter(values=self, + na_rep=na_rep, + justify='all').get_result() + + def _format_data(self): + + # TODO: integrate with categorical and make generic + n = len(self) + max_seq_items = min((get_option( + 'display.max_seq_items') or n) // 10, 10) + + formatter = str + + if n == 0: + summary = '[]' + elif n == 1: + first = formatter(self[0]) + summary = '[{}]'.format(first) + elif n == 2: + first = formatter(self[0]) + last = formatter(self[-1]) + summary = '[{}, {}]'.format(first, last) + else: + + if n > max_seq_items: + n = min(max_seq_items // 2, 10) + head = [formatter(x) for x in self[:n]] + tail = [formatter(x) for x in self[-n:]] + summary = '[{} ... {}]'.format(', '.join(head), + ', '.join(tail)) + else: + head = [] + tail = [formatter(x) for x in self] + summary = '[{}]'.format(', '.join(tail)) + + return summary + self._format_space() + + def _format_attrs(self): + attrs = [('closed', repr(self.closed))] + if self.name is not None: + attrs.append(('name', default_pprint(self.name))) + attrs.append(('dtype', "'%s'" % self.dtype)) + return attrs + + def _format_space(self): + return "\n%s" % (' ' * (len(self.__class__.__name__) + 1)) + + def argsort(self, *args, **kwargs): + return np.lexsort((self.right, self.left)) + + def equals(self, other): + + if self.is_(other): + return True + + # if we can coerce to an II + # then we can compare + if not isinstance(other, IntervalIndex): + if not is_interval_dtype(other): + return False + other = Index(getattr(other, '.values', other)) + + return (self.left.equals(other.left) and + self.right.equals(other.right) and + self.closed == other.closed) + + def _setop(op_name): + def func(self, other): + msg = ('can only do set operations between two IntervalIndex ' + 'objects that are closed on the same side') + other = self._as_like_interval_index(other, msg) + result = getattr(self._multiindex, op_name)(other._multiindex) + result_name = self.name if self.name == other.name else None + return type(self).from_tuples(result.values, closed=self.closed, + name=result_name) + return func + + union = _setop('union') + intersection = _setop('intersection') + difference = _setop('difference') + symmetric_differnce = _setop('symmetric_difference') + + # TODO: arithmetic operations + + +IntervalIndex._add_logical_methods_disabled() + + +def interval_range(start=None, end=None, freq=None, periods=None, + name=None, closed='right', **kwargs): + """ + Return a fixed frequency IntervalIndex + + Parameters + ---------- + start : string or datetime-like, default None + Left bound for generating data + end : string or datetime-like, default None + Right bound for generating data + freq : interger, string or DateOffset, default 1 + periods : interger, default None + name : str, default None + Name of the resulting index + closed : string, default 'right' + options are: 'left', 'right', 'both', 'neither' + + Notes + ----- + 2 of start, end, or periods must be specified + + Returns + ------- + rng : IntervalIndex + """ + + if freq is None: + freq = 1 + + if start is None: + if periods is None or end is None: + raise ValueError("must specify 2 of start, end, periods") + start = end - periods * freq + elif end is None: + if periods is None or start is None: + raise ValueError("must specify 2 of start, end, periods") + end = start + periods * freq + elif periods is None: + if start is None or end is None: + raise ValueError("must specify 2 of start, end, periods") + pass + + # must all be same units or None + arr = np.array([start, end, freq]) + if is_object_dtype(arr): + raise ValueError("start, end, freq need to be the same type") + + return IntervalIndex.from_breaks(np.arange(start, end, freq), + name=name, + closed=closed) diff --git a/pandas/indexes/multi.py b/pandas/indexes/multi.py index 74c45aac8b620..d1c8e0ba1cc4e 100644 --- a/pandas/indexes/multi.py +++ b/pandas/indexes/multi.py @@ -1318,15 +1318,17 @@ def nlevels(self): def levshape(self): return tuple(len(x) for x in self.levels) + @Appender(_index_shared_docs['__contains__'] % _index_doc_kwargs) def __contains__(self, key): hash(key) - # work around some kind of odd cython bug try: self.get_loc(key) return True except LookupError: return False + contains = __contains__ + def __reduce__(self): """Necessary for making this object picklable""" d = dict(levels=[lev for lev in self.levels], diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index 7301c87026114..a15d7cf26cbea 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -49,7 +49,7 @@ class TestPDApi(Base, tm.TestCase): 'Period', 'PeriodIndex', 'RangeIndex', 'UInt64Index', 'Series', 'SparseArray', 'SparseDataFrame', 'SparseSeries', 'TimeGrouper', 'Timedelta', - 'TimedeltaIndex', 'Timestamp'] + 'TimedeltaIndex', 'Timestamp', 'Interval', 'IntervalIndex'] # these are already deprecated; awaiting removal deprecated_classes = ['WidePanel', 'Panel4D', @@ -63,7 +63,7 @@ class TestPDApi(Base, tm.TestCase): # top-level functions funcs = ['bdate_range', 'concat', 'crosstab', 'cut', - 'date_range', 'eval', + 'date_range', 'interval_range', 'eval', 'factorize', 'get_dummies', 'infer_freq', 'isnull', 'lreshape', 'melt', 'notnull', 'offsets', diff --git a/pandas/tests/api/test_types.py b/pandas/tests/api/test_types.py index f3fd6332417a1..1d05eda88e265 100644 --- a/pandas/tests/api/test_types.py +++ b/pandas/tests/api/test_types.py @@ -23,7 +23,8 @@ class TestTypes(Base, tm.TestCase): 'is_string_dtype', 'is_signed_integer_dtype', 'is_timedelta64_dtype', 'is_timedelta64_ns_dtype', 'is_unsigned_integer_dtype', 'is_period', - 'is_period_dtype', 'is_re', 'is_re_compilable', + 'is_period_dtype', 'is_interval', 'is_interval_dtype', + 'is_re', 'is_re_compilable', 'is_dict_like', 'is_iterator', 'is_file_like', 'is_list_like', 'is_hashable', 'is_named_tuple', 'is_sequence', diff --git a/pandas/tests/frame/test_alter_axes.py b/pandas/tests/frame/test_alter_axes.py index e52bfdbd4f837..f05b6fdd6bc23 100644 --- a/pandas/tests/frame/test_alter_axes.py +++ b/pandas/tests/frame/test_alter_axes.py @@ -8,7 +8,10 @@ from pandas.compat import lrange from pandas import (DataFrame, Series, Index, MultiIndex, - RangeIndex, date_range) + RangeIndex, date_range, IntervalIndex) +from pandas.types.common import (is_object_dtype, + is_categorical_dtype, + is_interval_dtype) import pandas as pd from pandas.util.testing import (assert_series_equal, @@ -295,6 +298,17 @@ def test_set_index_dst(self): exp = pd.DataFrame({'b': [3, 4, 5]}, index=exp_index) tm.assert_frame_equal(res, exp) + def test_reset_index_with_intervals(self): + idx = pd.IntervalIndex.from_breaks(np.arange(11), name='x') + original = pd.DataFrame({'x': idx, 'y': np.arange(10)})[['x', 'y']] + + result = original.set_index('x') + expected = pd.DataFrame({'y': np.arange(10)}, index=idx) + assert_frame_equal(result, expected) + + result2 = result.reset_index() + assert_frame_equal(result2, original) + def test_set_index_multiindexcolumns(self): columns = MultiIndex.from_tuples([('foo', 1), ('foo', 2), ('bar', 1)]) df = DataFrame(np.random.randn(3, 3), columns=columns) @@ -730,3 +744,53 @@ def test_set_index_preserve_categorical_dtype(self): result = df.set_index(cols).reset_index() result = result.reindex(columns=df.columns) tm.assert_frame_equal(result, df) + + +class TestIntervalIndex(tm.TestCase): + + def test_setitem(self): + + df = DataFrame({'A': range(10)}) + s = pd.cut(df.A, 5) + self.assertIsInstance(s.cat.categories, IntervalIndex) + + # B & D end up as Categoricals + # the remainer are converted to in-line objects + # contining an IntervalIndex.values + df['B'] = s + df['C'] = np.array(s) + df['D'] = s.values + df['E'] = np.array(s.values) + + assert is_categorical_dtype(df['B']) + assert is_interval_dtype(df['B'].cat.categories) + assert is_categorical_dtype(df['D']) + assert is_interval_dtype(df['D'].cat.categories) + + assert is_object_dtype(df['C']) + assert is_object_dtype(df['E']) + + # they compare equal as Index + # when converted to numpy objects + c = lambda x: Index(np.array(x)) + tm.assert_index_equal(c(df.B), c(df.B), check_names=False) + tm.assert_index_equal(c(df.B), c(df.C), check_names=False) + tm.assert_index_equal(c(df.B), c(df.D), check_names=False) + tm.assert_index_equal(c(df.B), c(df.D), check_names=False) + + # B & D are the same Series + tm.assert_series_equal(df['B'], df['B'], check_names=False) + tm.assert_series_equal(df['B'], df['D'], check_names=False) + + # C & E are the same Series + tm.assert_series_equal(df['C'], df['C'], check_names=False) + tm.assert_series_equal(df['C'], df['E'], check_names=False) + + def test_set_reset_index(self): + + df = DataFrame({'A': range(10)}) + s = pd.cut(df.A, 5) + df['B'] = s + df = df.set_index('B') + + df = df.reset_index() diff --git a/pandas/tests/frame/test_sorting.py b/pandas/tests/frame/test_sorting.py index 5108fc6080866..97171123c4a36 100644 --- a/pandas/tests/frame/test_sorting.py +++ b/pandas/tests/frame/test_sorting.py @@ -1,12 +1,13 @@ # -*- coding: utf-8 -*- from __future__ import print_function - +import random import numpy as np +import pandas as pd from pandas.compat import lrange from pandas import (DataFrame, Series, MultiIndex, Timestamp, - date_range, NaT) + date_range, NaT, IntervalIndex) from pandas.util.testing import (assert_series_equal, assert_frame_equal, @@ -19,45 +20,6 @@ class TestDataFrameSorting(tm.TestCase, TestData): - def test_sort_index(self): - # GH13496 - - frame = DataFrame(np.arange(16).reshape(4, 4), index=[1, 2, 3, 4], - columns=['A', 'B', 'C', 'D']) - - # axis=0 : sort rows by index labels - unordered = frame.loc[[3, 2, 4, 1]] - result = unordered.sort_index(axis=0) - expected = frame - assert_frame_equal(result, expected) - - result = unordered.sort_index(ascending=False) - expected = frame[::-1] - assert_frame_equal(result, expected) - - # axis=1 : sort columns by column names - unordered = frame.iloc[:, [2, 1, 3, 0]] - result = unordered.sort_index(axis=1) - assert_frame_equal(result, frame) - - result = unordered.sort_index(axis=1, ascending=False) - expected = frame.iloc[:, ::-1] - assert_frame_equal(result, expected) - - def test_sort_index_multiindex(self): - # GH13496 - - # sort rows by specified level of multi-index - mi = MultiIndex.from_tuples([[2, 1, 3], [1, 1, 1]], names=list('ABC')) - df = DataFrame([[1, 2], [3, 4]], mi) - - # MI sort, but no level: sort_level has no effect - mi = MultiIndex.from_tuples([[1, 1, 3], [1, 1, 1]], names=list('ABC')) - df = DataFrame([[1, 2], [3, 4]], mi) - result = df.sort_index(sort_remaining=False) - expected = df.sort_index() - assert_frame_equal(result, expected) - def test_sort(self): frame = DataFrame(np.arange(16).reshape(4, 4), index=[1, 2, 3, 4], columns=['A', 'B', 'C', 'D']) @@ -151,21 +113,6 @@ def test_sort_values_inplace(self): expected = frame.sort_values(by=['A', 'B'], ascending=False) assert_frame_equal(sorted_df, expected) - def test_sort_index_categorical_index(self): - - df = (DataFrame({'A': np.arange(6, dtype='int64'), - 'B': Series(list('aabbca')) - .astype('category', categories=list('cab'))}) - .set_index('B')) - - result = df.sort_index() - expected = df.iloc[[4, 0, 1, 5, 2, 3]] - assert_frame_equal(result, expected) - - result = df.sort_index(ascending=False) - expected = df.iloc[[3, 2, 5, 1, 0, 4]] - assert_frame_equal(result, expected) - def test_sort_nan(self): # GH3917 nan = np.nan @@ -291,8 +238,86 @@ def test_stable_descending_multicolumn_sort(self): kind='mergesort') assert_frame_equal(sorted_df, expected) + def test_sort_datetimes(self): + + # GH 3461, argsort / lexsort differences for a datetime column + df = DataFrame(['a', 'a', 'a', 'b', 'c', 'd', 'e', 'f', 'g'], + columns=['A'], + index=date_range('20130101', periods=9)) + dts = [Timestamp(x) + for x in ['2004-02-11', '2004-01-21', '2004-01-26', + '2005-09-20', '2010-10-04', '2009-05-12', + '2008-11-12', '2010-09-28', '2010-09-28']] + df['B'] = dts[::2] + dts[1::2] + df['C'] = 2. + df['A1'] = 3. + + df1 = df.sort_values(by='A') + df2 = df.sort_values(by=['A']) + assert_frame_equal(df1, df2) + + df1 = df.sort_values(by='B') + df2 = df.sort_values(by=['B']) + assert_frame_equal(df1, df2) + + def test_frame_column_inplace_sort_exception(self): + s = self.frame['A'] + with assertRaisesRegexp(ValueError, "This Series is a view"): + s.sort_values(inplace=True) + + cp = s.copy() + cp.sort_values() # it works! + + def test_sort_nat_values_in_int_column(self): + + # GH 14922: "sorting with large float and multiple columns incorrect" + + # cause was that the int64 value NaT was considered as "na". Which is + # only correct for datetime64 columns. + + int_values = (2, int(NaT)) + float_values = (2.0, -1.797693e308) + + df = DataFrame(dict(int=int_values, float=float_values), + columns=["int", "float"]) + + df_reversed = DataFrame(dict(int=int_values[::-1], + float=float_values[::-1]), + columns=["int", "float"], + index=[1, 0]) + + # NaT is not a "na" for int64 columns, so na_position must not + # influence the result: + df_sorted = df.sort_values(["int", "float"], na_position="last") + assert_frame_equal(df_sorted, df_reversed) + + df_sorted = df.sort_values(["int", "float"], na_position="first") + assert_frame_equal(df_sorted, df_reversed) + + # reverse sorting order + df_sorted = df.sort_values(["int", "float"], ascending=False) + assert_frame_equal(df_sorted, df) + + # and now check if NaT is still considered as "na" for datetime64 + # columns: + df = DataFrame(dict(datetime=[Timestamp("2016-01-01"), NaT], + float=float_values), columns=["datetime", "float"]) + + df_reversed = DataFrame(dict(datetime=[NaT, Timestamp("2016-01-01")], + float=float_values[::-1]), + columns=["datetime", "float"], + index=[1, 0]) + + df_sorted = df.sort_values(["datetime", "float"], na_position="first") + assert_frame_equal(df_sorted, df_reversed) + + df_sorted = df.sort_values(["datetime", "float"], na_position="last") + assert_frame_equal(df_sorted, df_reversed) + + +class TestDataFrameSortIndexKinds(tm.TestCase, TestData): + def test_sort_index_multicolumn(self): - import random A = np.arange(5).repeat(20) B = np.tile(np.arange(5), 20) random.shuffle(A) @@ -448,78 +473,73 @@ def test_sort_index_level(self): res = df.sort_index(level=['A', 'B'], sort_remaining=False) assert_frame_equal(df, res) - def test_sort_datetimes(self): - - # GH 3461, argsort / lexsort differences for a datetime column - df = DataFrame(['a', 'a', 'a', 'b', 'c', 'd', 'e', 'f', 'g'], - columns=['A'], - index=date_range('20130101', periods=9)) - dts = [Timestamp(x) - for x in ['2004-02-11', '2004-01-21', '2004-01-26', - '2005-09-20', '2010-10-04', '2009-05-12', - '2008-11-12', '2010-09-28', '2010-09-28']] - df['B'] = dts[::2] + dts[1::2] - df['C'] = 2. - df['A1'] = 3. - - df1 = df.sort_values(by='A') - df2 = df.sort_values(by=['A']) - assert_frame_equal(df1, df2) - - df1 = df.sort_values(by='B') - df2 = df.sort_values(by=['B']) - assert_frame_equal(df1, df2) - - def test_frame_column_inplace_sort_exception(self): - s = self.frame['A'] - with assertRaisesRegexp(ValueError, "This Series is a view"): - s.sort_values(inplace=True) - - cp = s.copy() - cp.sort_values() # it works! + def test_sort_index_categorical_index(self): - def test_sort_nat_values_in_int_column(self): + df = (DataFrame({'A': np.arange(6, dtype='int64'), + 'B': Series(list('aabbca')) + .astype('category', categories=list('cab'))}) + .set_index('B')) - # GH 14922: "sorting with large float and multiple columns incorrect" + result = df.sort_index() + expected = df.iloc[[4, 0, 1, 5, 2, 3]] + assert_frame_equal(result, expected) - # cause was that the int64 value NaT was considered as "na". Which is - # only correct for datetime64 columns. + result = df.sort_index(ascending=False) + expected = df.iloc[[3, 2, 5, 1, 0, 4]] + assert_frame_equal(result, expected) - int_values = (2, int(NaT)) - float_values = (2.0, -1.797693e308) + def test_sort_index(self): + # GH13496 - df = DataFrame(dict(int=int_values, float=float_values), - columns=["int", "float"]) + frame = DataFrame(np.arange(16).reshape(4, 4), index=[1, 2, 3, 4], + columns=['A', 'B', 'C', 'D']) - df_reversed = DataFrame(dict(int=int_values[::-1], - float=float_values[::-1]), - columns=["int", "float"], - index=[1, 0]) + # axis=0 : sort rows by index labels + unordered = frame.loc[[3, 2, 4, 1]] + result = unordered.sort_index(axis=0) + expected = frame + assert_frame_equal(result, expected) - # NaT is not a "na" for int64 columns, so na_position must not - # influence the result: - df_sorted = df.sort_values(["int", "float"], na_position="last") - assert_frame_equal(df_sorted, df_reversed) + result = unordered.sort_index(ascending=False) + expected = frame[::-1] + assert_frame_equal(result, expected) - df_sorted = df.sort_values(["int", "float"], na_position="first") - assert_frame_equal(df_sorted, df_reversed) + # axis=1 : sort columns by column names + unordered = frame.iloc[:, [2, 1, 3, 0]] + result = unordered.sort_index(axis=1) + assert_frame_equal(result, frame) - # reverse sorting order - df_sorted = df.sort_values(["int", "float"], ascending=False) - assert_frame_equal(df_sorted, df) + result = unordered.sort_index(axis=1, ascending=False) + expected = frame.iloc[:, ::-1] + assert_frame_equal(result, expected) - # and now check if NaT is still considered as "na" for datetime64 - # columns: - df = DataFrame(dict(datetime=[Timestamp("2016-01-01"), NaT], - float=float_values), columns=["datetime", "float"]) + def test_sort_index_multiindex(self): + # GH13496 - df_reversed = DataFrame(dict(datetime=[NaT, Timestamp("2016-01-01")], - float=float_values[::-1]), - columns=["datetime", "float"], - index=[1, 0]) + # sort rows by specified level of multi-index + mi = MultiIndex.from_tuples([[2, 1, 3], [1, 1, 1]], names=list('ABC')) + df = DataFrame([[1, 2], [3, 4]], mi) - df_sorted = df.sort_values(["datetime", "float"], na_position="first") - assert_frame_equal(df_sorted, df_reversed) + # MI sort, but no level: sort_level has no effect + mi = MultiIndex.from_tuples([[1, 1, 3], [1, 1, 1]], names=list('ABC')) + df = DataFrame([[1, 2], [3, 4]], mi) + result = df.sort_index(sort_remaining=False) + expected = df.sort_index() + assert_frame_equal(result, expected) - df_sorted = df.sort_values(["datetime", "float"], na_position="last") - assert_frame_equal(df_sorted, df_reversed) + def test_sort_index_intervalindex(self): + # this is a de-facto sort via unstack + # confirming that we sort in the order of the bins + y = Series(np.random.randn(100)) + x1 = Series(np.sign(np.random.randn(100))) + x2 = pd.cut(Series(np.random.randn(100)), + bins=[-3, -0.5, 0, 0.5, 3]) + model = pd.concat([y, x1, x2], axis=1, keys=['Y', 'X1', 'X2']) + + result = model.groupby(['X1', 'X2']).mean().unstack() + expected = IntervalIndex.from_tuples( + [(-3.0, -0.5), (-0.5, 0.0), + (0.0, 0.5), (0.5, 3.0)], + closed='right') + result = result.columns.levels[1].categories + tm.assert_index_equal(result, expected) diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py index cfcb531bedab8..68bdc0c6d5112 100644 --- a/pandas/tests/groupby/test_categorical.py +++ b/pandas/tests/groupby/test_categorical.py @@ -7,7 +7,7 @@ import pandas as pd from pandas import (Index, MultiIndex, CategoricalIndex, - DataFrame, Categorical, Series) + DataFrame, Categorical, Series, Interval) from pandas.util.testing import assert_frame_equal, assert_series_equal import pandas.util.testing as tm from .common import MixIn @@ -519,7 +519,8 @@ def test_groupby_categorical_two_columns(self): res = groups_double_key.agg('mean') nan = np.nan idx = MultiIndex.from_product( - [Categorical(["(1, 2]", "(2, 3]", "(3, 6]"], ordered=True), + [Categorical([Interval(1, 2), Interval(2, 3), + Interval(3, 6)], ordered=True), [1, 2, 3, 4]], names=["cat", "C2"]) exp = DataFrame({"C1": [nan, nan, nan, nan, 3, 3, diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 8f3d8e2307f45..25f89b29021ce 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -864,11 +864,13 @@ def test_get_group_empty_bins(self): bins = [0, 5, 10, 15] g = d.groupby(pd.cut(d[0], bins)) - result = g.get_group('(0, 5]') + # TODO: should prob allow a str of Interval work as well + # IOW '(0, 5]' + result = g.get_group(pd.Interval(0, 5)) expected = DataFrame([3, 1], index=[0, 1]) assert_frame_equal(result, expected) - self.assertRaises(KeyError, lambda: g.get_group('(10, 15]')) + self.assertRaises(KeyError, lambda: g.get_group(pd.Interval(10, 15))) def test_get_group_grouped_by_tuple(self): # GH 8121 diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index 08f8f8d48e705..54d47d02c5e8e 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -7,7 +7,8 @@ from pandas import (Series, Index, Float64Index, Int64Index, UInt64Index, RangeIndex, MultiIndex, CategoricalIndex, DatetimeIndex, - TimedeltaIndex, PeriodIndex, notnull, isnull) + TimedeltaIndex, PeriodIndex, IntervalIndex, + notnull, isnull) from pandas.types.common import needs_i8_conversion from pandas.util.testing import assertRaisesRegexp from pandas._libs.tslib import iNaT @@ -255,18 +256,21 @@ def test_ensure_copied_data(self): tm.assert_numpy_array_equal(index.values, result.values, check_same='copy') - if not isinstance(index, PeriodIndex): - result = index_type(index.values, copy=False, **init_kwargs) - tm.assert_numpy_array_equal(index.values, result.values, - check_same='same') - tm.assert_numpy_array_equal(index._values, result._values, - check_same='same') - else: + if isinstance(index, PeriodIndex): # .values an object array of Period, thus copied result = index_type(ordinal=index.asi8, copy=False, **init_kwargs) tm.assert_numpy_array_equal(index._values, result._values, check_same='same') + elif isinstance(index, IntervalIndex): + # checked in test_interval.py + pass + else: + result = index_type(index.values, copy=False, **init_kwargs) + tm.assert_numpy_array_equal(index.values, result.values, + check_same='same') + tm.assert_numpy_array_equal(index._values, result._values, + check_same='same') def test_copy_and_deepcopy(self): from copy import copy, deepcopy @@ -377,8 +381,9 @@ def test_memory_usage(self): result2 = index.memory_usage() result3 = index.memory_usage(deep=True) - # RangeIndex doesn't use a hashtable engine - if not isinstance(index, RangeIndex): + # RangeIndex, IntervalIndex + # don't have engines + if not isinstance(index, (RangeIndex, IntervalIndex)): self.assertTrue(result2 > result) if index.inferred_type == 'object': diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index a8197b070b032..cc819ff83b1dd 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -14,7 +14,7 @@ from pandas import (period_range, date_range, Series, DataFrame, Float64Index, Int64Index, CategoricalIndex, DatetimeIndex, TimedeltaIndex, - PeriodIndex) + PeriodIndex, isnull) from pandas.core.index import _get_combined_index from pandas.util.testing import assert_almost_equal from pandas.compat.numpy import np_datetime64_compat @@ -504,7 +504,7 @@ def test_is_(self): def test_asof(self): d = self.dateIndex[0] self.assertEqual(self.dateIndex.asof(d), d) - self.assertTrue(np.isnan(self.dateIndex.asof(d - timedelta(1)))) + self.assertTrue(isnull(self.dateIndex.asof(d - timedelta(1)))) d = self.dateIndex[-1] self.assertEqual(self.dateIndex.asof(d + timedelta(1)), d) diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py index 0d75ba5f2bd46..f2e409deb2ce4 100644 --- a/pandas/tests/indexes/test_category.py +++ b/pandas/tests/indexes/test_category.py @@ -8,7 +8,7 @@ import numpy as np -from pandas import Categorical, compat, notnull +from pandas import Categorical, IntervalIndex, compat, notnull from pandas.util.testing import assert_almost_equal import pandas.core.config as cf import pandas as pd @@ -343,11 +343,26 @@ def test_astype(self): self.assertIsInstance(result, Index) self.assertNotIsInstance(result, CategoricalIndex) + # interval + ii = IntervalIndex.from_arrays(left=[-0.001, 2.0], + right=[2, 4], + closed='right') + + ci = CategoricalIndex(Categorical.from_codes( + [0, 1, -1], categories=ii, ordered=True)) + + result = ci.astype('interval') + expected = ii.take([0, 1, -1]) + tm.assert_index_equal(result, expected) + + result = IntervalIndex.from_intervals(result.values) + tm.assert_index_equal(result, expected) + def test_reindex_base(self): # determined by cat ordering idx = self.create_index() - expected = np.array([4, 0, 1, 5, 2, 3], dtype=np.intp) + expected = np.arange(len(idx), dtype=np.intp) actual = idx.get_indexer(idx) tm.assert_numpy_array_equal(expected, actual) diff --git a/pandas/tests/indexes/test_interval.py b/pandas/tests/indexes/test_interval.py new file mode 100644 index 0000000000000..25ca961895ca3 --- /dev/null +++ b/pandas/tests/indexes/test_interval.py @@ -0,0 +1,798 @@ +from __future__ import division + +import pytest +import numpy as np + +from pandas import (Interval, IntervalIndex, Index, isnull, + interval_range, Timestamp, Timedelta) +from pandas._libs.interval import IntervalTree +from pandas.tests.indexes.common import Base +import pandas.util.testing as tm +import pandas as pd + + +class TestIntervalIndex(Base, tm.TestCase): + _holder = IntervalIndex + + def setUp(self): + self.index = IntervalIndex.from_arrays([0, 1], [1, 2]) + self.index_with_nan = IntervalIndex.from_tuples( + [(0, 1), np.nan, (1, 2)]) + self.indices = dict(intervalIndex=tm.makeIntervalIndex(10)) + + def create_index(self): + return IntervalIndex.from_breaks(np.arange(10)) + + def test_constructors(self): + expected = self.index + actual = IntervalIndex.from_breaks(np.arange(3), closed='right') + self.assertTrue(expected.equals(actual)) + + alternate = IntervalIndex.from_breaks(np.arange(3), closed='left') + self.assertFalse(expected.equals(alternate)) + + actual = IntervalIndex.from_intervals([Interval(0, 1), Interval(1, 2)]) + self.assertTrue(expected.equals(actual)) + + actual = IntervalIndex([Interval(0, 1), Interval(1, 2)]) + self.assertTrue(expected.equals(actual)) + + actual = IntervalIndex.from_arrays(np.arange(2), np.arange(2) + 1, + closed='right') + self.assertTrue(expected.equals(actual)) + + actual = Index([Interval(0, 1), Interval(1, 2)]) + self.assertIsInstance(actual, IntervalIndex) + self.assertTrue(expected.equals(actual)) + + actual = Index(expected) + self.assertIsInstance(actual, IntervalIndex) + self.assertTrue(expected.equals(actual)) + + def test_constructors_other(self): + + # all-nan + result = IntervalIndex.from_intervals([np.nan]) + expected = np.array([np.nan], dtype=object) + tm.assert_numpy_array_equal(result.values, expected) + + # empty + result = IntervalIndex.from_intervals([]) + expected = np.array([], dtype=object) + tm.assert_numpy_array_equal(result.values, expected) + + def test_constructors_errors(self): + + # scalar + with pytest.raises(TypeError): + IntervalIndex(5) + + # not an interval + with pytest.raises(TypeError): + IntervalIndex([0, 1]) + + with pytest.raises(TypeError): + IntervalIndex.from_intervals([0, 1]) + + # invalid closed + with pytest.raises(ValueError): + IntervalIndex.from_arrays([0, 1], [1, 2], closed='invalid') + + # mismatched closed + with pytest.raises(ValueError): + IntervalIndex.from_intervals([Interval(0, 1), + Interval(1, 2, closed='left')]) + + with pytest.raises(ValueError): + IntervalIndex.from_arrays([0, 10], [3, 5]) + + with pytest.raises(ValueError): + Index([Interval(0, 1), Interval(2, 3, closed='left')]) + + # no point in nesting periods in an IntervalIndex + with pytest.raises(ValueError): + IntervalIndex.from_breaks( + pd.period_range('2000-01-01', periods=3)) + + def test_constructors_datetimelike(self): + + # DTI / TDI + for idx in [pd.date_range('20130101', periods=5), + pd.timedelta_range('1 day', periods=5)]: + result = IntervalIndex.from_breaks(idx) + expected = IntervalIndex.from_breaks(idx.values) + tm.assert_index_equal(result, expected) + + expected_scalar_type = type(idx[0]) + i = result[0] + self.assertTrue(isinstance(i.left, expected_scalar_type)) + self.assertTrue(isinstance(i.right, expected_scalar_type)) + + def test_constructors_error(self): + + # non-intervals + def f(): + IntervalIndex.from_intervals([0.997, 4.0]) + self.assertRaises(TypeError, f) + + def test_properties(self): + index = self.index + self.assertEqual(len(index), 2) + self.assertEqual(index.size, 2) + self.assertEqual(index.shape, (2, )) + + self.assert_index_equal(index.left, Index([0, 1])) + self.assert_index_equal(index.right, Index([1, 2])) + self.assert_index_equal(index.mid, Index([0.5, 1.5])) + + self.assertEqual(index.closed, 'right') + + expected = np.array([Interval(0, 1), Interval(1, 2)], dtype=object) + self.assert_numpy_array_equal(np.asarray(index), expected) + self.assert_numpy_array_equal(index.values, expected) + + # with nans + index = self.index_with_nan + self.assertEqual(len(index), 3) + self.assertEqual(index.size, 3) + self.assertEqual(index.shape, (3, )) + + self.assert_index_equal(index.left, Index([0, np.nan, 1])) + self.assert_index_equal(index.right, Index([1, np.nan, 2])) + self.assert_index_equal(index.mid, Index([0.5, np.nan, 1.5])) + + self.assertEqual(index.closed, 'right') + + expected = np.array([Interval(0, 1), np.nan, + Interval(1, 2)], dtype=object) + self.assert_numpy_array_equal(np.asarray(index), expected) + self.assert_numpy_array_equal(index.values, expected) + + def test_with_nans(self): + index = self.index + self.assertFalse(index.hasnans) + self.assert_numpy_array_equal(index.isnull(), + np.array([False, False])) + self.assert_numpy_array_equal(index.notnull(), + np.array([True, True])) + + index = self.index_with_nan + self.assertTrue(index.hasnans) + self.assert_numpy_array_equal(index.notnull(), + np.array([True, False, True])) + self.assert_numpy_array_equal(index.isnull(), + np.array([False, True, False])) + + def test_copy(self): + actual = self.index.copy() + self.assertTrue(actual.equals(self.index)) + + actual = self.index.copy(deep=True) + self.assertTrue(actual.equals(self.index)) + self.assertIsNot(actual.left, self.index.left) + + def test_ensure_copied_data(self): + # exercise the copy flag in the constructor + + # not copying + index = self.index + result = IntervalIndex(index, copy=False) + tm.assert_numpy_array_equal(index.left.values, result.left.values, + check_same='same') + tm.assert_numpy_array_equal(index.right.values, result.right.values, + check_same='same') + + # by-definition make a copy + result = IntervalIndex.from_intervals(index.values, copy=False) + tm.assert_numpy_array_equal(index.left.values, result.left.values, + check_same='copy') + tm.assert_numpy_array_equal(index.right.values, result.right.values, + check_same='copy') + + def test_equals(self): + + idx = self.index + self.assertTrue(idx.equals(idx)) + self.assertTrue(idx.equals(idx.copy())) + + self.assertFalse(idx.equals(idx.astype(object))) + self.assertFalse(idx.equals(np.array(idx))) + self.assertFalse(idx.equals(list(idx))) + + self.assertFalse(idx.equals([1, 2])) + self.assertFalse(idx.equals(np.array([1, 2]))) + self.assertFalse(idx.equals( + pd.date_range('20130101', periods=2))) + + def test_astype(self): + + idx = self.index + + for dtype in [np.int64, np.float64, 'datetime64[ns]', + 'datetime64[ns, US/Eastern]', 'timedelta64', + 'period[M]']: + self.assertRaises(ValueError, idx.astype, dtype) + + result = idx.astype(object) + tm.assert_index_equal(result, Index(idx.values, dtype='object')) + self.assertFalse(idx.equals(result)) + self.assertTrue(idx.equals(IntervalIndex.from_intervals(result))) + + result = idx.astype('interval') + tm.assert_index_equal(result, idx) + self.assertTrue(result.equals(idx)) + + result = idx.astype('category') + expected = pd.Categorical(idx, ordered=True) + tm.assert_categorical_equal(result, expected) + + def test_where(self): + expected = self.index + result = self.index.where(self.index.notnull()) + tm.assert_index_equal(result, expected) + + idx = IntervalIndex.from_breaks([1, 2]) + result = idx.where([True, False]) + expected = IntervalIndex.from_intervals( + [Interval(1.0, 2.0, closed='right'), np.nan]) + tm.assert_index_equal(result, expected) + + def test_where_array_like(self): + pass + + def test_delete(self): + expected = IntervalIndex.from_breaks([1, 2]) + actual = self.index.delete(0) + self.assertTrue(expected.equals(actual)) + + def test_insert(self): + expected = IntervalIndex.from_breaks(range(4)) + actual = self.index.insert(2, Interval(2, 3)) + self.assertTrue(expected.equals(actual)) + + self.assertRaises(ValueError, self.index.insert, 0, 1) + self.assertRaises(ValueError, self.index.insert, 0, + Interval(2, 3, closed='left')) + + def test_take(self): + actual = self.index.take([0, 1]) + self.assertTrue(self.index.equals(actual)) + + expected = IntervalIndex.from_arrays([0, 0, 1], [1, 1, 2]) + actual = self.index.take([0, 0, 1]) + self.assertTrue(expected.equals(actual)) + + def test_monotonic_and_unique(self): + self.assertTrue(self.index.is_monotonic) + self.assertTrue(self.index.is_unique) + + idx = IntervalIndex.from_tuples([(0, 1), (0.5, 1.5)]) + self.assertTrue(idx.is_monotonic) + self.assertTrue(idx.is_unique) + + idx = IntervalIndex.from_tuples([(0, 1), (2, 3), (1, 2)]) + self.assertFalse(idx.is_monotonic) + self.assertTrue(idx.is_unique) + + idx = IntervalIndex.from_tuples([(0, 2), (0, 2)]) + self.assertFalse(idx.is_unique) + self.assertTrue(idx.is_monotonic) + + @pytest.mark.xfail(reason='not a valid repr as we use interval notation') + def test_repr(self): + i = IntervalIndex.from_tuples([(0, 1), (1, 2)], closed='right') + expected = ("IntervalIndex(left=[0, 1]," + "\n right=[1, 2]," + "\n closed='right'," + "\n dtype='interval[int64]')") + self.assertEqual(repr(i), expected) + + i = IntervalIndex.from_tuples((Timestamp('20130101'), + Timestamp('20130102')), + (Timestamp('20130102'), + Timestamp('20130103')), + closed='right') + expected = ("IntervalIndex(left=['2013-01-01', '2013-01-02']," + "\n right=['2013-01-02', '2013-01-03']," + "\n closed='right'," + "\n dtype='interval[datetime64[ns]]')") + self.assertEqual(repr(i), expected) + + @pytest.mark.xfail(reason='not a valid repr as we use interval notation') + def test_repr_max_seq_item_setting(self): + super(TestIntervalIndex, self).test_repr_max_seq_item_setting() + + @pytest.mark.xfail(reason='not a valid repr as we use interval notation') + def test_repr_roundtrip(self): + super(TestIntervalIndex, self).test_repr_roundtrip() + + def test_get_item(self): + i = IntervalIndex.from_arrays((0, 1, np.nan), (1, 2, np.nan), + closed='right') + assert i[0] == Interval(0.0, 1.0) + assert i[1] == Interval(1.0, 2.0) + assert isnull(i[2]) + + result = i[0:1] + expected = IntervalIndex.from_arrays((0.,), (1.,), closed='right') + tm.assert_index_equal(result, expected) + + result = i[0:2] + expected = IntervalIndex.from_arrays((0., 1), (1., 2.), closed='right') + tm.assert_index_equal(result, expected) + + result = i[1:3] + expected = IntervalIndex.from_arrays((1., np.nan), (2., np.nan), + closed='right') + tm.assert_index_equal(result, expected) + + def test_get_loc_value(self): + self.assertRaises(KeyError, self.index.get_loc, 0) + self.assertEqual(self.index.get_loc(0.5), 0) + self.assertEqual(self.index.get_loc(1), 0) + self.assertEqual(self.index.get_loc(1.5), 1) + self.assertEqual(self.index.get_loc(2), 1) + self.assertRaises(KeyError, self.index.get_loc, -1) + self.assertRaises(KeyError, self.index.get_loc, 3) + + idx = IntervalIndex.from_tuples([(0, 2), (1, 3)]) + self.assertEqual(idx.get_loc(0.5), 0) + self.assertEqual(idx.get_loc(1), 0) + self.assert_numpy_array_equal(idx.get_loc(1.5), + np.array([0, 1], dtype='int64')) + self.assert_numpy_array_equal(np.sort(idx.get_loc(2)), + np.array([0, 1], dtype='int64')) + self.assertEqual(idx.get_loc(3), 1) + self.assertRaises(KeyError, idx.get_loc, 3.5) + + idx = IntervalIndex.from_arrays([0, 2], [1, 3]) + self.assertRaises(KeyError, idx.get_loc, 1.5) + + def slice_locs_cases(self, breaks): + # TODO: same tests for more index types + index = IntervalIndex.from_breaks([0, 1, 2], closed='right') + self.assertEqual(index.slice_locs(), (0, 2)) + self.assertEqual(index.slice_locs(0, 1), (0, 1)) + self.assertEqual(index.slice_locs(1, 1), (0, 1)) + self.assertEqual(index.slice_locs(0, 2), (0, 2)) + self.assertEqual(index.slice_locs(0.5, 1.5), (0, 2)) + self.assertEqual(index.slice_locs(0, 0.5), (0, 1)) + self.assertEqual(index.slice_locs(start=1), (0, 2)) + self.assertEqual(index.slice_locs(start=1.2), (1, 2)) + self.assertEqual(index.slice_locs(end=1), (0, 1)) + self.assertEqual(index.slice_locs(end=1.1), (0, 2)) + self.assertEqual(index.slice_locs(end=1.0), (0, 1)) + self.assertEqual(*index.slice_locs(-1, -1)) + + index = IntervalIndex.from_breaks([0, 1, 2], closed='neither') + self.assertEqual(index.slice_locs(0, 1), (0, 1)) + self.assertEqual(index.slice_locs(0, 2), (0, 2)) + self.assertEqual(index.slice_locs(0.5, 1.5), (0, 2)) + self.assertEqual(index.slice_locs(1, 1), (1, 1)) + self.assertEqual(index.slice_locs(1, 2), (1, 2)) + + index = IntervalIndex.from_breaks([0, 1, 2], closed='both') + self.assertEqual(index.slice_locs(1, 1), (0, 2)) + self.assertEqual(index.slice_locs(1, 2), (0, 2)) + + def test_slice_locs_int64(self): + self.slice_locs_cases([0, 1, 2]) + + def test_slice_locs_float64(self): + self.slice_locs_cases([0.0, 1.0, 2.0]) + + def slice_locs_decreasing_cases(self, tuples): + index = IntervalIndex.from_tuples(tuples) + self.assertEqual(index.slice_locs(1.5, 0.5), (1, 3)) + self.assertEqual(index.slice_locs(2, 0), (1, 3)) + self.assertEqual(index.slice_locs(2, 1), (1, 3)) + self.assertEqual(index.slice_locs(3, 1.1), (0, 3)) + self.assertEqual(index.slice_locs(3, 3), (0, 2)) + self.assertEqual(index.slice_locs(3.5, 3.3), (0, 1)) + self.assertEqual(index.slice_locs(1, -3), (2, 3)) + self.assertEqual(*index.slice_locs(-1, -1)) + + def test_slice_locs_decreasing_int64(self): + self.slice_locs_cases([(2, 4), (1, 3), (0, 2)]) + + def test_slice_locs_decreasing_float64(self): + self.slice_locs_cases([(2., 4.), (1., 3.), (0., 2.)]) + + def test_slice_locs_fails(self): + index = IntervalIndex.from_tuples([(1, 2), (0, 1), (2, 3)]) + with self.assertRaises(KeyError): + index.slice_locs(1, 2) + + def test_get_loc_interval(self): + self.assertEqual(self.index.get_loc(Interval(0, 1)), 0) + self.assertEqual(self.index.get_loc(Interval(0, 0.5)), 0) + self.assertEqual(self.index.get_loc(Interval(0, 1, 'left')), 0) + self.assertRaises(KeyError, self.index.get_loc, Interval(2, 3)) + self.assertRaises(KeyError, self.index.get_loc, + Interval(-1, 0, 'left')) + + def test_get_indexer(self): + actual = self.index.get_indexer([-1, 0, 0.5, 1, 1.5, 2, 3]) + expected = np.array([-1, -1, 0, 0, 1, 1, -1], dtype='int64') + self.assert_numpy_array_equal(actual, expected) + + actual = self.index.get_indexer(self.index) + expected = np.array([0, 1], dtype='int64') + self.assert_numpy_array_equal(actual, expected) + + index = IntervalIndex.from_breaks([0, 1, 2], closed='left') + actual = index.get_indexer([-1, 0, 0.5, 1, 1.5, 2, 3]) + expected = np.array([-1, 0, 0, 1, 1, -1, -1], dtype='int64') + self.assert_numpy_array_equal(actual, expected) + + actual = self.index.get_indexer(index[:1]) + expected = np.array([0], dtype='int64') + self.assert_numpy_array_equal(actual, expected) + + actual = self.index.get_indexer(index) + expected = np.array([-1, 1], dtype='int64') + self.assert_numpy_array_equal(actual, expected) + + def test_get_indexer_subintervals(self): + + # TODO: is this right? + # return indexers for wholly contained subintervals + target = IntervalIndex.from_breaks(np.linspace(0, 2, 5)) + actual = self.index.get_indexer(target) + expected = np.array([0, 0, 1, 1], dtype='int64') + self.assert_numpy_array_equal(actual, expected) + + target = IntervalIndex.from_breaks([0, 0.67, 1.33, 2]) + actual = self.index.get_indexer(target) + expected = np.array([0, 0, 1, 1], dtype='int64') + self.assert_numpy_array_equal(actual, expected) + + actual = self.index.get_indexer(target[[0, -1]]) + expected = np.array([0, 1], dtype='int64') + self.assert_numpy_array_equal(actual, expected) + + target = IntervalIndex.from_breaks([0, 0.33, 0.67, 1], closed='left') + actual = self.index.get_indexer(target) + expected = np.array([0, 0, 0], dtype='int64') + self.assert_numpy_array_equal(actual, expected) + + def test_contains(self): + # only endpoints are valid + i = IntervalIndex.from_arrays([0, 1], [1, 2]) + + # invalid + self.assertNotIn(0, i) + self.assertNotIn(1, i) + self.assertNotIn(2, i) + + # valid + self.assertIn(Interval(0, 1), i) + self.assertIn(Interval(0, 2), i) + self.assertIn(Interval(0, 0.5), i) + self.assertNotIn(Interval(3, 5), i) + self.assertNotIn(Interval(-1, 0, closed='left'), i) + + def testcontains(self): + # can select values that are IN the range of a value + i = IntervalIndex.from_arrays([0, 1], [1, 2]) + + assert i.contains(0.1) + assert i.contains(0.5) + assert i.contains(1) + assert i.contains(Interval(0, 1)) + assert i.contains(Interval(0, 2)) + + # these overlaps completely + assert i.contains(Interval(0, 3)) + assert i.contains(Interval(1, 3)) + + assert not i.contains(20) + assert not i.contains(-20) + + def test_dropna(self): + + expected = IntervalIndex.from_tuples([(0.0, 1.0), (1.0, 2.0)]) + + ii = IntervalIndex.from_tuples([(0, 1), (1, 2), np.nan]) + result = ii.dropna() + tm.assert_index_equal(result, expected) + + ii = IntervalIndex.from_arrays([0, 1, np.nan], [1, 2, np.nan]) + result = ii.dropna() + tm.assert_index_equal(result, expected) + + def test_non_contiguous(self): + index = IntervalIndex.from_tuples([(0, 1), (2, 3)]) + target = [0.5, 1.5, 2.5] + actual = index.get_indexer(target) + expected = np.array([0, -1, 1], dtype='int64') + self.assert_numpy_array_equal(actual, expected) + + self.assertNotIn(1.5, index) + + def test_union(self): + other = IntervalIndex.from_arrays([2], [3]) + expected = IntervalIndex.from_arrays(range(3), range(1, 4)) + actual = self.index.union(other) + self.assertTrue(expected.equals(actual)) + + actual = other.union(self.index) + self.assertTrue(expected.equals(actual)) + + tm.assert_index_equal(self.index.union(self.index), self.index) + tm.assert_index_equal(self.index.union(self.index[:1]), + self.index) + + def test_intersection(self): + other = IntervalIndex.from_breaks([1, 2, 3]) + expected = IntervalIndex.from_breaks([1, 2]) + actual = self.index.intersection(other) + self.assertTrue(expected.equals(actual)) + + tm.assert_index_equal(self.index.intersection(self.index), + self.index) + + def test_difference(self): + tm.assert_index_equal(self.index.difference(self.index[:1]), + self.index[1:]) + + def test_symmetric_difference(self): + result = self.index[:1].symmetric_difference(self.index[1:]) + expected = self.index + tm.assert_index_equal(result, expected) + + def test_set_operation_errors(self): + self.assertRaises(ValueError, self.index.union, self.index.left) + + other = IntervalIndex.from_breaks([0, 1, 2], closed='neither') + self.assertRaises(ValueError, self.index.union, other) + + def test_isin(self): + actual = self.index.isin(self.index) + self.assert_numpy_array_equal(np.array([True, True]), actual) + + actual = self.index.isin(self.index[:1]) + self.assert_numpy_array_equal(np.array([True, False]), actual) + + def test_comparison(self): + actual = Interval(0, 1) < self.index + expected = np.array([False, True]) + self.assert_numpy_array_equal(actual, expected) + + actual = Interval(0.5, 1.5) < self.index + expected = np.array([False, True]) + self.assert_numpy_array_equal(actual, expected) + actual = self.index > Interval(0.5, 1.5) + self.assert_numpy_array_equal(actual, expected) + + actual = self.index == self.index + expected = np.array([True, True]) + self.assert_numpy_array_equal(actual, expected) + actual = self.index <= self.index + self.assert_numpy_array_equal(actual, expected) + actual = self.index >= self.index + self.assert_numpy_array_equal(actual, expected) + + actual = self.index < self.index + expected = np.array([False, False]) + self.assert_numpy_array_equal(actual, expected) + actual = self.index > self.index + self.assert_numpy_array_equal(actual, expected) + + actual = self.index == IntervalIndex.from_breaks([0, 1, 2], 'left') + self.assert_numpy_array_equal(actual, expected) + + actual = self.index == self.index.values + self.assert_numpy_array_equal(actual, np.array([True, True])) + actual = self.index.values == self.index + self.assert_numpy_array_equal(actual, np.array([True, True])) + actual = self.index <= self.index.values + self.assert_numpy_array_equal(actual, np.array([True, True])) + actual = self.index != self.index.values + self.assert_numpy_array_equal(actual, np.array([False, False])) + actual = self.index > self.index.values + self.assert_numpy_array_equal(actual, np.array([False, False])) + actual = self.index.values > self.index + self.assert_numpy_array_equal(actual, np.array([False, False])) + + # invalid comparisons + actual = self.index == 0 + self.assert_numpy_array_equal(actual, np.array([False, False])) + actual = self.index == self.index.left + self.assert_numpy_array_equal(actual, np.array([False, False])) + + with self.assertRaisesRegexp(TypeError, 'unorderable types'): + self.index > 0 + with self.assertRaisesRegexp(TypeError, 'unorderable types'): + self.index <= 0 + with self.assertRaises(TypeError): + self.index > np.arange(2) + with self.assertRaises(ValueError): + self.index > np.arange(3) + + def test_missing_values(self): + idx = pd.Index([np.nan, pd.Interval(0, 1), pd.Interval(1, 2)]) + idx2 = pd.IntervalIndex.from_arrays([np.nan, 0, 1], [np.nan, 1, 2]) + assert idx.equals(idx2) + + with pytest.raises(ValueError): + IntervalIndex.from_arrays([np.nan, 0, 1], np.array([0, 1, 2])) + + self.assert_numpy_array_equal(isnull(idx), + np.array([True, False, False])) + + def test_sort_values(self): + expected = IntervalIndex.from_breaks([1, 2, 3, 4]) + actual = IntervalIndex.from_tuples([(3, 4), (1, 2), + (2, 3)]).sort_values() + tm.assert_index_equal(expected, actual) + + # nan + idx = self.index_with_nan + mask = idx.isnull() + self.assert_numpy_array_equal(mask, np.array([False, True, False])) + + result = idx.sort_values() + mask = result.isnull() + self.assert_numpy_array_equal(mask, np.array([False, False, True])) + + result = idx.sort_values(ascending=False) + mask = result.isnull() + self.assert_numpy_array_equal(mask, np.array([True, False, False])) + + def test_datetime(self): + dates = pd.date_range('2000', periods=3) + idx = IntervalIndex.from_breaks(dates) + + tm.assert_index_equal(idx.left, dates[:2]) + tm.assert_index_equal(idx.right, dates[-2:]) + + expected = pd.date_range('2000-01-01T12:00', periods=2) + tm.assert_index_equal(idx.mid, expected) + + self.assertNotIn(pd.Timestamp('2000-01-01T12'), idx) + self.assertNotIn(pd.Timestamp('2000-01-01T12'), idx) + + target = pd.date_range('1999-12-31T12:00', periods=7, freq='12H') + actual = idx.get_indexer(target) + expected = np.array([-1, -1, 0, 0, 1, 1, -1], dtype='int64') + self.assert_numpy_array_equal(actual, expected) + + def test_append(self): + + index1 = IntervalIndex.from_arrays([0, 1], [1, 2]) + index2 = IntervalIndex.from_arrays([1, 2], [2, 3]) + + result = index1.append(index2) + expected = IntervalIndex.from_arrays([0, 1, 1, 2], [1, 2, 2, 3]) + tm.assert_index_equal(result, expected) + + result = index1.append([index1, index2]) + expected = IntervalIndex.from_arrays([0, 1, 0, 1, 1, 2], + [1, 2, 1, 2, 2, 3]) + tm.assert_index_equal(result, expected) + + def f(): + index1.append(IntervalIndex.from_arrays([0, 1], [1, 2], + closed='both')) + + self.assertRaises(ValueError, f) + + +class TestIntervalRange(tm.TestCase): + + def test_construction(self): + result = interval_range(0, 5, name='foo', closed='both') + expected = IntervalIndex.from_breaks( + np.arange(0, 5), name='foo', closed='both') + tm.assert_index_equal(result, expected) + + def test_errors(self): + + # not enough params + def f(): + interval_range(0) + + self.assertRaises(ValueError, f) + + def f(): + interval_range(periods=2) + + self.assertRaises(ValueError, f) + + def f(): + interval_range() + + self.assertRaises(ValueError, f) + + # mixed units + def f(): + interval_range(0, Timestamp('20130101'), freq=2) + + self.assertRaises(ValueError, f) + + def f(): + interval_range(0, 10, freq=Timedelta('1day')) + + self.assertRaises(ValueError, f) + + +class TestIntervalTree(tm.TestCase): + def setUp(self): + gentree = lambda dtype: IntervalTree(np.arange(5, dtype=dtype), + np.arange(5, dtype=dtype) + 2) + self.tree = gentree('int64') + self.trees = {dtype: gentree(dtype) + for dtype in ['int32', 'int64', 'float32', 'float64']} + + def test_get_loc(self): + for dtype, tree in self.trees.items(): + self.assert_numpy_array_equal(tree.get_loc(1), + np.array([0], dtype='int64')) + self.assert_numpy_array_equal(np.sort(tree.get_loc(2)), + np.array([0, 1], dtype='int64')) + with self.assertRaises(KeyError): + tree.get_loc(-1) + + def test_get_indexer(self): + for dtype, tree in self.trees.items(): + self.assert_numpy_array_equal( + tree.get_indexer(np.array([1.0, 5.5, 6.5])), + np.array([0, 4, -1], dtype='int64')) + with self.assertRaises(KeyError): + tree.get_indexer(np.array([3.0])) + + def test_get_indexer_non_unique(self): + indexer, missing = self.tree.get_indexer_non_unique( + np.array([1.0, 2.0, 6.5])) + self.assert_numpy_array_equal(indexer[:1], + np.array([0], dtype='int64')) + self.assert_numpy_array_equal(np.sort(indexer[1:3]), + np.array([0, 1], dtype='int64')) + self.assert_numpy_array_equal(np.sort(indexer[3:]), + np.array([-1], dtype='int64')) + self.assert_numpy_array_equal(missing, np.array([2], dtype='int64')) + + def test_duplicates(self): + tree = IntervalTree([0, 0, 0], [1, 1, 1]) + self.assert_numpy_array_equal(np.sort(tree.get_loc(0.5)), + np.array([0, 1, 2], dtype='int64')) + + with self.assertRaises(KeyError): + tree.get_indexer(np.array([0.5])) + + indexer, missing = tree.get_indexer_non_unique(np.array([0.5])) + self.assert_numpy_array_equal(np.sort(indexer), + np.array([0, 1, 2], dtype='int64')) + self.assert_numpy_array_equal(missing, np.array([], dtype='int64')) + + def test_get_loc_closed(self): + for closed in ['left', 'right', 'both', 'neither']: + tree = IntervalTree([0], [1], closed=closed) + for p, errors in [(0, tree.open_left), + (1, tree.open_right)]: + if errors: + with self.assertRaises(KeyError): + tree.get_loc(p) + else: + self.assert_numpy_array_equal(tree.get_loc(p), + np.array([0], dtype='int64')) + + def test_get_indexer_closed(self): + x = np.arange(1000, dtype='int64') + found = x + not_found = (-1 * np.ones(1000)).astype('int64') + for leaf_size in [1, 10, 100, 10000]: + for closed in ['left', 'right', 'both', 'neither']: + tree = IntervalTree(x, x + 0.5, closed=closed, + leaf_size=leaf_size) + self.assert_numpy_array_equal(found, + tree.get_indexer(x + 0.25)) + + expected = found if tree.closed_left else not_found + self.assert_numpy_array_equal(expected, + tree.get_indexer(x + 0.0)) + + expected = found if tree.closed_right else not_found + self.assert_numpy_array_equal(expected, + tree.get_indexer(x + 0.5)) diff --git a/pandas/tests/indexing/test_interval.py b/pandas/tests/indexing/test_interval.py new file mode 100644 index 0000000000000..bccc21ed6c086 --- /dev/null +++ b/pandas/tests/indexing/test_interval.py @@ -0,0 +1,245 @@ +import pytest +import numpy as np +import pandas as pd + +from pandas import Series, DataFrame, IntervalIndex, Interval +import pandas.util.testing as tm + + +class TestIntervalIndex(tm.TestCase): + + def setUp(self): + self.s = Series(np.arange(5), IntervalIndex.from_breaks(np.arange(6))) + + def test_loc_with_scalar(self): + + s = self.s + expected = 0 + + result = s.loc[0.5] + assert result == expected + + result = s.loc[1] + assert result == expected + + with pytest.raises(KeyError): + s.loc[0] + + expected = s.iloc[:3] + tm.assert_series_equal(expected, s.loc[:3]) + tm.assert_series_equal(expected, s.loc[:2.5]) + tm.assert_series_equal(expected, s.loc[0.1:2.5]) + tm.assert_series_equal(expected, s.loc[-1:3]) + + expected = s.iloc[1:4] + tm.assert_series_equal(expected, s.loc[[1.5, 2.5, 3.5]]) + tm.assert_series_equal(expected, s.loc[[2, 3, 4]]) + tm.assert_series_equal(expected, s.loc[[1.5, 3, 4]]) + + expected = s.iloc[2:5] + tm.assert_series_equal(expected, s.loc[s >= 2]) + + def test_getitem_with_scalar(self): + + s = self.s + expected = 0 + + result = s[0.5] + assert result == expected + + result = s[1] + assert result == expected + + with pytest.raises(KeyError): + s[0] + + expected = s.iloc[:3] + tm.assert_series_equal(expected, s[:3]) + tm.assert_series_equal(expected, s[:2.5]) + tm.assert_series_equal(expected, s[0.1:2.5]) + tm.assert_series_equal(expected, s[-1:3]) + + expected = s.iloc[1:4] + tm.assert_series_equal(expected, s[[1.5, 2.5, 3.5]]) + tm.assert_series_equal(expected, s[[2, 3, 4]]) + tm.assert_series_equal(expected, s[[1.5, 3, 4]]) + + expected = s.iloc[2:5] + tm.assert_series_equal(expected, s[s >= 2]) + + def test_with_interval(self): + + s = self.s + expected = 0 + + result = s.loc[Interval(0, 1)] + assert result == expected + + result = s[Interval(0, 1)] + assert result == expected + + expected = s.iloc[3:5] + result = s.loc[Interval(3, 6)] + tm.assert_series_equal(expected, result) + + expected = s.iloc[3:5] + result = s.loc[[Interval(3, 6)]] + tm.assert_series_equal(expected, result) + + expected = s.iloc[3:5] + result = s.loc[[Interval(3, 5)]] + tm.assert_series_equal(expected, result) + + # missing + with pytest.raises(KeyError): + s.loc[Interval(-2, 0)] + + with pytest.raises(KeyError): + s[Interval(-2, 0)] + + with pytest.raises(KeyError): + s.loc[Interval(5, 6)] + + with pytest.raises(KeyError): + s[Interval(5, 6)] + + def test_with_slices(self): + + s = self.s + + # slice of interval + with pytest.raises(NotImplementedError): + result = s.loc[Interval(3, 6):] + + with pytest.raises(NotImplementedError): + result = s[Interval(3, 6):] + + expected = s.iloc[3:5] + result = s[[Interval(3, 6)]] + tm.assert_series_equal(expected, result) + + # slice of scalar with step != 1 + with pytest.raises(ValueError): + s[0:4:2] + + def test_with_overlaps(self): + + s = self.s + expected = s.iloc[[3, 4, 3, 4]] + result = s.loc[[Interval(3, 6), Interval(3, 6)]] + tm.assert_series_equal(expected, result) + + idx = IntervalIndex.from_tuples([(1, 5), (3, 7)]) + s = Series(range(len(idx)), index=idx) + + result = s[4] + expected = s + tm.assert_series_equal(expected, result) + + result = s[[4]] + expected = s + tm.assert_series_equal(expected, result) + + result = s.loc[[4]] + expected = s + tm.assert_series_equal(expected, result) + + result = s[Interval(3, 5)] + expected = s + tm.assert_series_equal(expected, result) + + result = s.loc[Interval(3, 5)] + expected = s + tm.assert_series_equal(expected, result) + + # doesn't intersect unique set of intervals + with pytest.raises(KeyError): + s[[Interval(3, 5)]] + + with pytest.raises(KeyError): + s.loc[[Interval(3, 5)]] + + def test_non_unique(self): + + idx = IntervalIndex.from_tuples([(1, 3), (3, 7)]) + + s = pd.Series(range(len(idx)), index=idx) + + result = s.loc[Interval(1, 3)] + assert result == 0 + + result = s.loc[[Interval(1, 3)]] + expected = s.iloc[0:1] + tm.assert_series_equal(expected, result) + + def test_non_unique_moar(self): + + idx = IntervalIndex.from_tuples([(1, 3), (1, 3), (3, 7)]) + s = Series(range(len(idx)), index=idx) + + result = s.loc[Interval(1, 3)] + expected = s.iloc[[0, 1]] + tm.assert_series_equal(expected, result) + + # non-unique index and slices not allowed + with pytest.raises(ValueError): + s.loc[Interval(1, 3):] + + with pytest.raises(ValueError): + s[Interval(1, 3):] + + # non-unique + with pytest.raises(ValueError): + s[[Interval(1, 3)]] + + def test_non_matching(self): + s = self.s + + # this is a departure from our current + # indexin scheme, but simpler + with pytest.raises(KeyError): + s.loc[[-1, 3, 4, 5]] + + with pytest.raises(KeyError): + s.loc[[-1, 3]] + + def test_large_series(self): + s = Series(np.arange(1000000), + index=IntervalIndex.from_breaks(np.arange(1000001))) + + result1 = s.loc[:80000] + result2 = s.loc[0:80000] + result3 = s.loc[0:80000:1] + tm.assert_series_equal(result1, result2) + tm.assert_series_equal(result1, result3) + + def test_loc_getitem_frame(self): + + df = DataFrame({'A': range(10)}) + s = pd.cut(df.A, 5) + df['B'] = s + df = df.set_index('B') + + result = df.loc[4] + expected = df.iloc[4:6] + tm.assert_frame_equal(result, expected) + + with pytest.raises(KeyError): + df.loc[10] + + # single list-like + result = df.loc[[4]] + expected = df.iloc[4:6] + tm.assert_frame_equal(result, expected) + + # non-unique + result = df.loc[[4, 5]] + expected = df.take([4, 5, 4, 5]) + tm.assert_frame_equal(result, expected) + + with pytest.raises(KeyError): + df.loc[[10]] + + # partial missing + with pytest.raises(KeyError): + df.loc[[10, 4]] diff --git a/pandas/tests/scalar/test_interval.py b/pandas/tests/scalar/test_interval.py new file mode 100644 index 0000000000000..63e57fb472861 --- /dev/null +++ b/pandas/tests/scalar/test_interval.py @@ -0,0 +1,129 @@ +from __future__ import division + +import pytest +from pandas import Interval +import pandas.util.testing as tm + + +class TestInterval(tm.TestCase): + def setUp(self): + self.interval = Interval(0, 1) + + def test_properties(self): + self.assertEqual(self.interval.closed, 'right') + self.assertEqual(self.interval.left, 0) + self.assertEqual(self.interval.right, 1) + self.assertEqual(self.interval.mid, 0.5) + + def test_repr(self): + self.assertEqual(repr(self.interval), + "Interval(0, 1, closed='right')") + self.assertEqual(str(self.interval), "(0, 1]") + + interval_left = Interval(0, 1, closed='left') + self.assertEqual(repr(interval_left), + "Interval(0, 1, closed='left')") + self.assertEqual(str(interval_left), "[0, 1)") + + def test_contains(self): + self.assertIn(0.5, self.interval) + self.assertIn(1, self.interval) + self.assertNotIn(0, self.interval) + self.assertRaises(TypeError, lambda: self.interval in self.interval) + + interval = Interval(0, 1, closed='both') + self.assertIn(0, interval) + self.assertIn(1, interval) + + interval = Interval(0, 1, closed='neither') + self.assertNotIn(0, interval) + self.assertIn(0.5, interval) + self.assertNotIn(1, interval) + + def test_equal(self): + self.assertEqual(Interval(0, 1), Interval(0, 1, closed='right')) + self.assertNotEqual(Interval(0, 1), Interval(0, 1, closed='left')) + self.assertNotEqual(Interval(0, 1), 0) + + def test_comparison(self): + with self.assertRaisesRegexp(TypeError, 'unorderable types'): + Interval(0, 1) < 2 + + self.assertTrue(Interval(0, 1) < Interval(1, 2)) + self.assertTrue(Interval(0, 1) < Interval(0, 2)) + self.assertTrue(Interval(0, 1) < Interval(0.5, 1.5)) + self.assertTrue(Interval(0, 1) <= Interval(0, 1)) + self.assertTrue(Interval(0, 1) > Interval(-1, 2)) + self.assertTrue(Interval(0, 1) >= Interval(0, 1)) + + def test_hash(self): + # should not raise + hash(self.interval) + + def test_math_add(self): + expected = Interval(1, 2) + actual = self.interval + 1 + self.assertEqual(expected, actual) + + expected = Interval(1, 2) + actual = 1 + self.interval + self.assertEqual(expected, actual) + + actual = self.interval + actual += 1 + self.assertEqual(expected, actual) + + with pytest.raises(TypeError): + self.interval + Interval(1, 2) + + with pytest.raises(TypeError): + self.interval + 'foo' + + def test_math_sub(self): + expected = Interval(-1, 0) + actual = self.interval - 1 + self.assertEqual(expected, actual) + + actual = self.interval + actual -= 1 + self.assertEqual(expected, actual) + + with pytest.raises(TypeError): + self.interval - Interval(1, 2) + + with pytest.raises(TypeError): + self.interval - 'foo' + + def test_math_mult(self): + expected = Interval(0, 2) + actual = self.interval * 2 + self.assertEqual(expected, actual) + + expected = Interval(0, 2) + actual = 2 * self.interval + self.assertEqual(expected, actual) + + actual = self.interval + actual *= 2 + self.assertEqual(expected, actual) + + with pytest.raises(TypeError): + self.interval * Interval(1, 2) + + with pytest.raises(TypeError): + self.interval * 'foo' + + def test_math_div(self): + expected = Interval(0, 0.5) + actual = self.interval / 2.0 + self.assertEqual(expected, actual) + + actual = self.interval + actual /= 2.0 + self.assertEqual(expected, actual) + + with pytest.raises(TypeError): + self.interval / Interval(1, 2) + + with pytest.raises(TypeError): + self.interval / 'foo' diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 8ad07afcacfcc..f4297208b2e26 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -10,8 +10,7 @@ from pandas.types.common import is_categorical_dtype, is_datetime64tz_dtype from pandas import (Index, Series, isnull, date_range, - period_range, NaT) -from pandas.core.index import MultiIndex + NaT, period_range, MultiIndex, IntervalIndex) from pandas.tseries.index import Timestamp, DatetimeIndex from pandas._libs import lib @@ -543,6 +542,17 @@ def test_constructor_with_datetime_tz(self): expected = Series(pd.DatetimeIndex(['NaT', 'NaT'], tz='US/Eastern')) assert_series_equal(s, expected) + def test_construction_interval(self): + # construction from interval & array of intervals + index = IntervalIndex.from_breaks(np.arange(3), closed='right') + result = Series(index) + repr(result) + str(result) + tm.assert_index_equal(Index(result.values), index) + + result = Series(index.values) + tm.assert_index_equal(Index(result.values), index) + def test_construction_consistency(self): # make sure that we are not re-localizing upon construction diff --git a/pandas/tests/series/test_missing.py b/pandas/tests/series/test_missing.py index ea49abeee21c5..4a3332c2de6d8 100644 --- a/pandas/tests/series/test_missing.py +++ b/pandas/tests/series/test_missing.py @@ -10,7 +10,7 @@ import pandas as pd from pandas import (Series, DataFrame, isnull, date_range, - MultiIndex, Index, Timestamp, NaT) + MultiIndex, Index, Timestamp, NaT, IntervalIndex) from pandas.compat import range from pandas._libs.tslib import iNaT from pandas.util.testing import assert_series_equal, assert_frame_equal @@ -556,6 +556,15 @@ def test_dropna_no_nan(self): s2.dropna(inplace=True) self.assert_series_equal(s2, s) + def test_dropna_intervals(self): + s = Series([np.nan, 1, 2, 3], IntervalIndex.from_arrays( + [np.nan, 0, 1, 2], + [np.nan, 1, 2, 3])) + + result = s.dropna() + expected = s.iloc[1:] + assert_series_equal(result, expected) + def test_valid(self): ts = self.ts.copy() ts[::2] = np.NaN diff --git a/pandas/tests/series/test_sorting.py b/pandas/tests/series/test_sorting.py index 66ecba960ae0b..26c51ec976f74 100644 --- a/pandas/tests/series/test_sorting.py +++ b/pandas/tests/series/test_sorting.py @@ -3,9 +3,9 @@ import numpy as np import random -from pandas import (DataFrame, Series, MultiIndex) +from pandas import DataFrame, Series, MultiIndex, IntervalIndex -from pandas.util.testing import (assert_series_equal, assert_almost_equal) +from pandas.util.testing import assert_series_equal, assert_almost_equal import pandas.util.testing as tm from .common import TestData @@ -177,3 +177,18 @@ def test_sort_index_na_position(self): expected_series_last = Series(index=[1, 2, 3, 3, 4, np.nan]) index_sorted_series = series.sort_index(na_position='last') assert_series_equal(expected_series_last, index_sorted_series) + + def test_sort_index_intervals(self): + s = Series([np.nan, 1, 2, 3], IntervalIndex.from_arrays( + [0, 1, 2, 3], + [1, 2, 3, 4])) + + result = s.sort_index() + expected = s + assert_series_equal(result, expected) + + result = s.sort_index(ascending=False) + expected = Series([3, 2, 1, np.nan], IntervalIndex.from_arrays( + [3, 2, 1, 0], + [4, 3, 2, 1])) + assert_series_equal(result, expected) diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index d9f81968c684d..cd1ec915d3aeb 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -1,20 +1,20 @@ # -*- coding: utf-8 -*- -from pandas.compat import range import numpy as np from numpy.random import RandomState from numpy import nan from datetime import datetime from itertools import permutations -from pandas import (Series, Categorical, CategoricalIndex, Index, - Timestamp, DatetimeIndex) +from pandas import (Series, Categorical, CategoricalIndex, + Timestamp, DatetimeIndex, + Index, IntervalIndex) import pandas as pd from pandas import compat from pandas._libs import (groupby as libgroupby, algos as libalgos, hashtable) from pandas._libs.hashtable import unique_label_indices -from pandas.compat import lrange +from pandas.compat import lrange, range import pandas.core.algorithms as algos import pandas.util.testing as tm from pandas.compat.numpy import np_array_datetime64_compat @@ -588,24 +588,27 @@ def test_value_counts(self): arr = np.random.randn(4) factor = cut(arr, 4) - tm.assertIsInstance(factor, Categorical) + # tm.assertIsInstance(factor, n) result = algos.value_counts(factor) - cats = ['(-1.194, -0.535]', '(-0.535, 0.121]', '(0.121, 0.777]', - '(0.777, 1.433]'] - expected_index = CategoricalIndex(cats, cats, ordered=True) - expected = Series([1, 1, 1, 1], index=expected_index) + breaks = [-1.194, -0.535, 0.121, 0.777, 1.433] + expected_index = pd.IntervalIndex.from_breaks( + breaks).astype('category') + expected = Series([1, 1, 1, 1], + index=expected_index) tm.assert_series_equal(result.sort_index(), expected.sort_index()) def test_value_counts_bins(self): s = [1, 2, 3, 4] result = algos.value_counts(s, bins=1) - self.assertEqual(result.tolist(), [4]) - self.assertEqual(result.index[0], 0.997) + expected = Series([4], + index=IntervalIndex.from_tuples([(0.996, 4.0)])) + tm.assert_series_equal(result, expected) result = algos.value_counts(s, bins=2, sort=False) - self.assertEqual(result.tolist(), [2, 2]) - self.assertEqual(result.index[0], 0.997) - self.assertEqual(result.index[1], 2.5) + expected = Series([2, 2], + index=IntervalIndex.from_tuples([(0.996, 2.5), + (2.5, 4.0)])) + tm.assert_series_equal(result, expected) def test_value_counts_dtypes(self): result = algos.value_counts([1, 1.]) @@ -657,6 +660,7 @@ def test_categorical(self): result = s.value_counts() expected = Series([3, 2, 1], index=pd.CategoricalIndex(['a', 'b', 'c'])) + tm.assert_series_equal(result, expected, check_index_type=True) # preserve order? @@ -670,12 +674,13 @@ def test_categorical_nans(self): s.iloc[1] = np.nan result = s.value_counts() expected = Series([4, 3, 2], index=pd.CategoricalIndex( + ['a', 'b', 'c'], categories=['a', 'b', 'c'])) tm.assert_series_equal(result, expected, check_index_type=True) result = s.value_counts(dropna=False) expected = Series([ 4, 3, 2, 1 - ], index=pd.CategoricalIndex(['a', 'b', 'c', np.nan])) + ], index=CategoricalIndex(['a', 'b', 'c', np.nan])) tm.assert_series_equal(result, expected, check_index_type=True) # out of order diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py index 032e3a186b84a..4a1cf6314aaed 100644 --- a/pandas/tests/test_base.py +++ b/pandas/tests/test_base.py @@ -13,7 +13,7 @@ needs_i8_conversion) import pandas.util.testing as tm from pandas import (Series, Index, DatetimeIndex, TimedeltaIndex, PeriodIndex, - Timedelta) + Timedelta, IntervalIndex, Interval) from pandas.compat import StringIO from pandas.compat.numpy import np_array_datetime64_compat from pandas.core.base import PandasDelegate, NoNewAttributesMixin @@ -575,10 +575,10 @@ def test_value_counts_bins(self): s1 = Series([1, 1, 2, 3]) res1 = s1.value_counts(bins=1) - exp1 = Series({0.998: 4}) + exp1 = Series({Interval(0.997, 3.0): 4}) tm.assert_series_equal(res1, exp1) res1n = s1.value_counts(bins=1, normalize=True) - exp1n = Series({0.998: 1.0}) + exp1n = Series({Interval(0.997, 3.0): 1.0}) tm.assert_series_equal(res1n, exp1n) if isinstance(s1, Index): @@ -589,18 +589,20 @@ def test_value_counts_bins(self): self.assertEqual(s1.nunique(), 3) - res4 = s1.value_counts(bins=4) - exp4 = Series({0.998: 2, - 1.5: 1, - 2.0: 0, - 2.5: 1}, index=[0.998, 2.5, 1.5, 2.0]) + # these return the same + res4 = s1.value_counts(bins=4, dropna=True) + intervals = IntervalIndex.from_breaks([0.997, 1.5, 2.0, 2.5, 3.0]) + exp4 = Series([2, 1, 1, 0], index=intervals.take([0, 3, 1, 2])) tm.assert_series_equal(res4, exp4) + + res4 = s1.value_counts(bins=4, dropna=False) + intervals = IntervalIndex.from_breaks([0.997, 1.5, 2.0, 2.5, 3.0]) + exp4 = Series([2, 1, 1, 0], index=intervals.take([0, 3, 1, 2])) + tm.assert_series_equal(res4, exp4) + res4n = s1.value_counts(bins=4, normalize=True) - exp4n = Series( - {0.998: 0.5, - 1.5: 0.25, - 2.0: 0.0, - 2.5: 0.25}, index=[0.998, 2.5, 1.5, 2.0]) + exp4n = Series([0.5, 0.25, 0.25, 0], + index=intervals.take([0, 3, 1, 2])) tm.assert_series_equal(res4n, exp4n) # handle NA's properly diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py index adacbb95f5162..dd370f0a20c2e 100644 --- a/pandas/tests/test_categorical.py +++ b/pandas/tests/test_categorical.py @@ -21,7 +21,8 @@ Timestamp, CategoricalIndex, isnull, date_range, DatetimeIndex, period_range, PeriodIndex, - timedelta_range, TimedeltaIndex, NaT) + timedelta_range, TimedeltaIndex, NaT, + Interval, IntervalIndex) from pandas.compat import range, lrange, u, PY3 from pandas.core.config import option_context @@ -121,6 +122,16 @@ def test_constructor_unsortable(self): self.assertRaises( TypeError, lambda: Categorical(arr, ordered=True)) + def test_constructor_interval(self): + result = Categorical([Interval(1, 2), Interval(2, 3), Interval(3, 6)], + ordered=True) + ii = IntervalIndex.from_intervals([Interval(1, 2), + Interval(2, 3), + Interval(3, 6)]) + exp = Categorical(ii, ordered=True) + self.assert_categorical_equal(result, exp) + tm.assert_index_equal(result.categories, ii) + def test_is_equal_dtype(self): # test dtype comparisons between cats @@ -1598,10 +1609,11 @@ def setUp(self): df = DataFrame({'value': np.random.randint(0, 10000, 100)}) labels = ["{0} - {1}".format(i, i + 499) for i in range(0, 10000, 500)] + cat_labels = Categorical(labels, labels) df = df.sort_values(by=['value'], ascending=True) - df['value_group'] = pd.cut(df.value, range(0, 10500, 500), right=False, - labels=labels) + df['value_group'] = pd.cut(df.value, range(0, 10500, 500), + right=False, labels=cat_labels) self.cat = df def test_dtypes(self): @@ -2008,9 +2020,10 @@ def test_series_functions_no_warnings(self): def test_assignment_to_dataframe(self): # assignment - df = DataFrame({'value': np.array(np.random.randint(0, 10000, 100), - dtype='int32')}) - labels = ["{0} - {1}".format(i, i + 499) for i in range(0, 10000, 500)] + df = DataFrame({'value': np.array( + np.random.randint(0, 10000, 100), dtype='int32')}) + labels = Categorical(["{0} - {1}".format(i, i + 499) + for i in range(0, 10000, 500)]) df = df.sort_values(by=['value'], ascending=True) s = pd.cut(df.value, range(0, 10500, 500), right=False, labels=labels) @@ -3007,7 +3020,7 @@ def f(x): # GH 9603 df = pd.DataFrame({'a': [1, 0, 0, 0]}) - c = pd.cut(df.a, [0, 1, 2, 3, 4]) + c = pd.cut(df.a, [0, 1, 2, 3, 4], labels=pd.Categorical(list('abcd'))) result = df.groupby(c).apply(len) exp_index = pd.CategoricalIndex(c.values.categories, @@ -3124,7 +3137,7 @@ def test_slicing(self): df = DataFrame({'value': (np.arange(100) + 1).astype('int64')}) df['D'] = pd.cut(df.value, bins=[0, 25, 50, 75, 100]) - expected = Series([11, '(0, 25]'], index=['value', 'D'], name=10) + expected = Series([11, Interval(0, 25)], index=['value', 'D'], name=10) result = df.iloc[10] tm.assert_series_equal(result, expected) @@ -3134,7 +3147,7 @@ def test_slicing(self): result = df.iloc[10:20] tm.assert_frame_equal(result, expected) - expected = Series([9, '(0, 25]'], index=['value', 'D'], name=8) + expected = Series([9, Interval(0, 25)], index=['value', 'D'], name=8) result = df.loc[8] tm.assert_series_equal(result, expected) diff --git a/pandas/tests/tools/test_tile.py b/pandas/tests/tools/test_tile.py index cc80c1ff5db29..742568870c3c3 100644 --- a/pandas/tests/tools/test_tile.py +++ b/pandas/tests/tools/test_tile.py @@ -3,21 +3,20 @@ import numpy as np from pandas.compat import zip -from pandas import Series, Index, Categorical +from pandas import (Series, Index, isnull, + to_datetime, DatetimeIndex, Timestamp, + Interval, IntervalIndex, Categorical, + cut, qcut, date_range) import pandas.util.testing as tm -from pandas.util.testing import assertRaisesRegexp -import pandas.core.common as com from pandas.core.algorithms import quantile -from pandas.tools.tile import cut, qcut import pandas.tools.tile as tmod -from pandas import to_datetime, DatetimeIndex, Timestamp class TestCut(tm.TestCase): def test_simple(self): - data = np.ones(5) + data = np.ones(5, dtype='int64') result = cut(data, 4, labels=False) expected = np.array([1, 1, 1, 1, 1]) tm.assert_numpy_array_equal(result, expected, @@ -27,34 +26,62 @@ def test_bins(self): data = np.array([.2, 1.4, 2.5, 6.2, 9.7, 2.1]) result, bins = cut(data, 3, retbins=True) - exp_codes = np.array([0, 0, 0, 1, 2, 0], dtype=np.int8) - tm.assert_numpy_array_equal(result.codes, exp_codes) - exp = np.array([0.1905, 3.36666667, 6.53333333, 9.7]) - tm.assert_almost_equal(bins, exp) + intervals = IntervalIndex.from_breaks(bins.round(3)) + expected = intervals.take([0, 0, 0, 1, 2, 0]).astype('category') + tm.assert_categorical_equal(result, expected) + tm.assert_almost_equal(bins, np.array([0.1905, 3.36666667, + 6.53333333, 9.7])) def test_right(self): data = np.array([.2, 1.4, 2.5, 6.2, 9.7, 2.1, 2.575]) result, bins = cut(data, 4, right=True, retbins=True) - exp_codes = np.array([0, 0, 0, 2, 3, 0, 0], dtype=np.int8) - tm.assert_numpy_array_equal(result.codes, exp_codes) - exp = np.array([0.1905, 2.575, 4.95, 7.325, 9.7]) - tm.assert_numpy_array_equal(bins, exp) + intervals = IntervalIndex.from_breaks(bins.round(3)) + expected = intervals.astype('category').take([0, 0, 0, 2, 3, 0, 0]) + tm.assert_categorical_equal(result, expected) + tm.assert_almost_equal(bins, np.array([0.1905, 2.575, 4.95, + 7.325, 9.7])) def test_noright(self): data = np.array([.2, 1.4, 2.5, 6.2, 9.7, 2.1, 2.575]) result, bins = cut(data, 4, right=False, retbins=True) - exp_codes = np.array([0, 0, 0, 2, 3, 0, 1], dtype=np.int8) - tm.assert_numpy_array_equal(result.codes, exp_codes) - exp = np.array([0.2, 2.575, 4.95, 7.325, 9.7095]) - tm.assert_almost_equal(bins, exp) + intervals = IntervalIndex.from_breaks(bins.round(3), closed='left') + expected = intervals.take([0, 0, 0, 2, 3, 0, 1]).astype('category') + tm.assert_categorical_equal(result, expected) + tm.assert_almost_equal(bins, np.array([0.2, 2.575, 4.95, + 7.325, 9.7095])) def test_arraylike(self): data = [.2, 1.4, 2.5, 6.2, 9.7, 2.1] result, bins = cut(data, 3, retbins=True) - exp_codes = np.array([0, 0, 0, 1, 2, 0], dtype=np.int8) - tm.assert_numpy_array_equal(result.codes, exp_codes) - exp = np.array([0.1905, 3.36666667, 6.53333333, 9.7]) - tm.assert_almost_equal(bins, exp) + intervals = IntervalIndex.from_breaks(bins.round(3)) + expected = intervals.take([0, 0, 0, 1, 2, 0]).astype('category') + tm.assert_categorical_equal(result, expected) + tm.assert_almost_equal(bins, np.array([0.1905, 3.36666667, + 6.53333333, 9.7])) + + def test_bins_from_intervalindex(self): + c = cut(range(5), 3) + expected = c + result = cut(range(5), bins=expected.categories) + tm.assert_categorical_equal(result, expected) + + expected = Categorical.from_codes(np.append(c.codes, -1), + categories=c.categories, + ordered=True) + result = cut(range(6), bins=expected.categories) + tm.assert_categorical_equal(result, expected) + + # doc example + # make sure we preserve the bins + ages = np.array([10, 15, 13, 12, 23, 25, 28, 59, 60]) + c = cut(ages, bins=[0, 18, 35, 70]) + expected = IntervalIndex.from_tuples([(0, 18), (18, 35), (35, 70)]) + tm.assert_index_equal(c.categories, expected) + + result = cut([25, 20, 50], bins=c.categories) + tm.assert_index_equal(result.categories, expected) + tm.assert_numpy_array_equal(result.codes, + np.array([1, 1, 2], dtype='int8')) def test_bins_not_monotonic(self): data = [.2, 1.4, 2.5, 6.2, 9.7, 2.1] @@ -82,14 +109,13 @@ def test_labels(self): arr = np.tile(np.arange(0, 1.01, 0.1), 4) result, bins = cut(arr, 4, retbins=True) - ex_levels = Index(['(-0.001, 0.25]', '(0.25, 0.5]', '(0.5, 0.75]', - '(0.75, 1]']) - self.assert_index_equal(result.categories, ex_levels) + ex_levels = IntervalIndex.from_breaks([-1e-3, 0.25, 0.5, 0.75, 1]) + tm.assert_index_equal(result.categories, ex_levels) result, bins = cut(arr, 4, retbins=True, right=False) - ex_levels = Index(['[0, 0.25)', '[0.25, 0.5)', '[0.5, 0.75)', - '[0.75, 1.001)']) - self.assert_index_equal(result.categories, ex_levels) + ex_levels = IntervalIndex.from_breaks([0, 0.25, 0.5, 0.75, 1 + 1e-3], + closed='left') + tm.assert_index_equal(result.categories, ex_levels) def test_cut_pass_series_name_to_factor(self): s = Series(np.random.randn(100), name='foo') @@ -101,9 +127,9 @@ def test_label_precision(self): arr = np.arange(0, 0.73, 0.01) result = cut(arr, 4, precision=2) - ex_levels = Index(['(-0.00072, 0.18]', '(0.18, 0.36]', - '(0.36, 0.54]', '(0.54, 0.72]']) - self.assert_index_equal(result.categories, ex_levels) + ex_levels = IntervalIndex.from_breaks([-0.00072, 0.18, 0.36, + 0.54, 0.72]) + tm.assert_index_equal(result.categories, ex_levels) def test_na_handling(self): arr = np.arange(0, 0.75, 0.01) @@ -113,39 +139,43 @@ def test_na_handling(self): result_arr = np.asarray(result) - ex_arr = np.where(com.isnull(arr), np.nan, result_arr) + ex_arr = np.where(isnull(arr), np.nan, result_arr) tm.assert_almost_equal(result_arr, ex_arr) result = cut(arr, 4, labels=False) - ex_result = np.where(com.isnull(arr), np.nan, result) + ex_result = np.where(isnull(arr), np.nan, result) tm.assert_almost_equal(result, ex_result) def test_inf_handling(self): data = np.arange(6) data_ser = Series(data, dtype='int64') - result = cut(data, [-np.inf, 2, 4, np.inf]) - result_ser = cut(data_ser, [-np.inf, 2, 4, np.inf]) + bins = [-np.inf, 2, 4, np.inf] + result = cut(data, bins) + result_ser = cut(data_ser, bins) - ex_categories = Index(['(-inf, 2]', '(2, 4]', '(4, inf]']) - - tm.assert_index_equal(result.categories, ex_categories) - tm.assert_index_equal(result_ser.cat.categories, ex_categories) - self.assertEqual(result[5], '(4, inf]') - self.assertEqual(result[0], '(-inf, 2]') - self.assertEqual(result_ser[5], '(4, inf]') - self.assertEqual(result_ser[0], '(-inf, 2]') + ex_uniques = IntervalIndex.from_breaks(bins) + tm.assert_index_equal(result.categories, ex_uniques) + self.assertEqual(result[5], Interval(4, np.inf)) + self.assertEqual(result[0], Interval(-np.inf, 2)) + self.assertEqual(result_ser[5], Interval(4, np.inf)) + self.assertEqual(result_ser[0], Interval(-np.inf, 2)) def test_qcut(self): arr = np.random.randn(1000) + # we store the bins as Index that have been rounded + # to comparisions are a bit tricky labels, bins = qcut(arr, 4, retbins=True) ex_bins = quantile(arr, [0, .25, .5, .75, 1.]) - tm.assert_almost_equal(bins, ex_bins) + result = labels.categories.left.values + self.assertTrue(np.allclose(result, ex_bins[:-1], atol=1e-2)) + result = labels.categories.right.values + self.assertTrue(np.allclose(result, ex_bins[1:], atol=1e-2)) ex_levels = cut(arr, ex_bins, include_lowest=True) - self.assert_categorical_equal(labels, ex_levels) + tm.assert_categorical_equal(labels, ex_levels) def test_qcut_bounds(self): arr = np.random.randn(1000) @@ -161,15 +191,15 @@ def test_qcut_specify_quantiles(self): tm.assert_categorical_equal(factor, expected) def test_qcut_all_bins_same(self): - assertRaisesRegexp(ValueError, "edges.*unique", qcut, - [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 3) + tm.assertRaisesRegexp(ValueError, "edges.*unique", qcut, + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 3) def test_cut_out_of_bounds(self): arr = np.random.randn(100) result = cut(arr, [-1, 0, 1]) - mask = result.codes == -1 + mask = isnull(result) ex_mask = (arr < -1) | (arr > 1) self.assert_numpy_array_equal(mask, ex_mask) @@ -179,30 +209,41 @@ def test_cut_pass_labels(self): labels = ['Small', 'Medium', 'Large'] result = cut(arr, bins, labels=labels) + exp = Categorical(['Medium'] + 4 * ['Small'] + ['Medium', 'Large'], + ordered=True) + self.assert_categorical_equal(result, exp) - exp = cut(arr, bins) - exp.categories = labels - - tm.assert_categorical_equal(result, exp) + result = cut(arr, bins, labels=Categorical.from_codes([0, 1, 2], + labels)) + exp = Categorical.from_codes([1] + 4 * [0] + [1, 2], labels) + self.assert_categorical_equal(result, exp) def test_qcut_include_lowest(self): values = np.arange(10) - cats = qcut(values, 4) + ii = qcut(values, 4) - ex_levels = ['[0, 2.25]', '(2.25, 4.5]', '(4.5, 6.75]', '(6.75, 9]'] - self.assertTrue((cats.categories == ex_levels).all()) + ex_levels = IntervalIndex.from_intervals( + [Interval(-0.001, 2.25), + Interval(2.25, 4.5), + Interval(4.5, 6.75), + Interval(6.75, 9)]) + tm.assert_index_equal(ii.categories, ex_levels) def test_qcut_nas(self): arr = np.random.randn(100) arr[:20] = np.nan result = qcut(arr, 4) - self.assertTrue(com.isnull(result[:20]).all()) + self.assertTrue(isnull(result[:20]).all()) - def test_label_formatting(self): - self.assertEqual(tmod._trim_zeros('1.000'), '1') + def test_qcut_index(self): + result = qcut([0, 2], 2) + expected = Index([Interval(-0.001, 1), Interval(1, 2)]).astype( + 'category') + self.assert_categorical_equal(result, expected) + def test_round_frac(self): # it works result = cut(np.arange(11.), 2) @@ -210,10 +251,15 @@ def test_label_formatting(self): # #1979, negative numbers - result = tmod._format_label(-117.9998, precision=3) - self.assertEqual(result, '-118') - result = tmod._format_label(117.9998, precision=3) - self.assertEqual(result, '118') + result = tmod._round_frac(-117.9998, precision=3) + self.assertEqual(result, -118) + result = tmod._round_frac(117.9998, precision=3) + self.assertEqual(result, 118) + + result = tmod._round_frac(117.9998, precision=2) + self.assertEqual(result, 118) + result = tmod._round_frac(0.000123456, precision=2) + self.assertEqual(result, 0.00012) def test_qcut_binning_issues(self): # #1978, 1979 @@ -224,9 +270,9 @@ def test_qcut_binning_issues(self): starts = [] ends = [] - for lev in result.categories: - s, e = lev[1:-1].split(',') - + for lev in np.unique(result): + s = lev.left + e = lev.right self.assertTrue(s != e) starts.append(float(s)) @@ -238,44 +284,47 @@ def test_qcut_binning_issues(self): self.assertTrue(ep < en) self.assertTrue(ep <= sn) - def test_cut_return_categorical(self): + def test_cut_return_intervals(self): s = Series([0, 1, 2, 3, 4, 5, 6, 7, 8]) res = cut(s, 3) - exp = Series(Categorical.from_codes([0, 0, 0, 1, 1, 1, 2, 2, 2], - ["(-0.008, 2.667]", - "(2.667, 5.333]", "(5.333, 8]"], - ordered=True)) + exp_bins = np.linspace(0, 8, num=4).round(3) + exp_bins[0] -= 0.008 + exp = Series(IntervalIndex.from_breaks(exp_bins, closed='right').take( + [0, 0, 0, 1, 1, 1, 2, 2, 2])).astype('category', ordered=True) tm.assert_series_equal(res, exp) - def test_qcut_return_categorical(self): + def test_qcut_return_intervals(self): s = Series([0, 1, 2, 3, 4, 5, 6, 7, 8]) res = qcut(s, [0, 0.333, 0.666, 1]) - exp = Series(Categorical.from_codes([0, 0, 0, 1, 1, 1, 2, 2, 2], - ["[0, 2.664]", - "(2.664, 5.328]", "(5.328, 8]"], - ordered=True)) + exp_levels = np.array([Interval(-0.001, 2.664), + Interval(2.664, 5.328), Interval(5.328, 8)]) + exp = Series(exp_levels.take([0, 0, 0, 1, 1, 1, 2, 2, 2])).astype( + 'category', ordered=True) tm.assert_series_equal(res, exp) def test_series_retbins(self): # GH 8589 s = Series(np.arange(4)) result, bins = cut(s, 2, retbins=True) - tm.assert_numpy_array_equal(result.cat.codes.values, - np.array([0, 0, 1, 1], dtype=np.int8)) - tm.assert_numpy_array_equal(bins, np.array([-0.003, 1.5, 3])) + expected = Series(IntervalIndex.from_breaks( + [-0.003, 1.5, 3], closed='right').repeat(2)).astype('category', + ordered=True) + tm.assert_series_equal(result, expected) result, bins = qcut(s, 2, retbins=True) - tm.assert_numpy_array_equal(result.cat.codes.values, - np.array([0, 0, 1, 1], dtype=np.int8)) - tm.assert_numpy_array_equal(bins, np.array([0, 1.5, 3])) + expected = Series(IntervalIndex.from_breaks( + [-0.001, 1.5, 3], closed='right').repeat(2)).astype('category', + ordered=True) + tm.assert_series_equal(result, expected) def test_qcut_duplicates_bin(self): # GH 7751 values = [0, 0, 0, 0, 1, 2, 3] - result_levels = ['[0, 1]', '(1, 3]'] + expected = IntervalIndex.from_intervals([Interval(-0.001, 1), + Interval(1, 3)]) - cats = qcut(values, 3, duplicates='drop') - self.assertTrue((cats.categories == result_levels).all()) + result = qcut(values, 3, duplicates='drop') + tm.assert_index_equal(result.categories, expected) self.assertRaises(ValueError, qcut, values, 3) self.assertRaises(ValueError, qcut, values, 3, duplicates='raise') @@ -291,51 +340,57 @@ def test_single_quantile(self): result = qcut(s, 1, labels=False) tm.assert_series_equal(result, expected) result = qcut(s, 1) - exp_lab = Series(Categorical.from_codes([0, 0], ["[9, 9]"], - ordered=True)) - tm.assert_series_equal(result, exp_lab) + intervals = IntervalIndex([Interval(8.999, 9.0), + Interval(8.999, 9.0)], closed='right') + expected = Series(intervals).astype('category', ordered=True) + tm.assert_series_equal(result, expected) s = Series([-9., -9.]) + expected = Series([0, 0]) result = qcut(s, 1, labels=False) tm.assert_series_equal(result, expected) result = qcut(s, 1) - exp_lab = Series(Categorical.from_codes([0, 0], ["[-9, -9]"], - ordered=True)) - tm.assert_series_equal(result, exp_lab) + intervals = IntervalIndex([Interval(-9.001, -9.0), + Interval(-9.001, -9.0)], closed='right') + expected = Series(intervals).astype('category', ordered=True) + tm.assert_series_equal(result, expected) s = Series([0., 0.]) + expected = Series([0, 0]) result = qcut(s, 1, labels=False) tm.assert_series_equal(result, expected) result = qcut(s, 1) - exp_lab = Series(Categorical.from_codes([0, 0], ["[0, 0]"], - ordered=True)) - tm.assert_series_equal(result, exp_lab) - - expected = Series([0]) + intervals = IntervalIndex([Interval(-0.001, 0.0), + Interval(-0.001, 0.0)], closed='right') + expected = Series(intervals).astype('category', ordered=True) + tm.assert_series_equal(result, expected) s = Series([9]) + expected = Series([0]) result = qcut(s, 1, labels=False) tm.assert_series_equal(result, expected) result = qcut(s, 1) - exp_lab = Series(Categorical.from_codes([0], ["[9, 9]"], - ordered=True)) - tm.assert_series_equal(result, exp_lab) + intervals = IntervalIndex([Interval(8.999, 9.0)], closed='right') + expected = Series(intervals).astype('category', ordered=True) + tm.assert_series_equal(result, expected) s = Series([-9]) + expected = Series([0]) result = qcut(s, 1, labels=False) tm.assert_series_equal(result, expected) result = qcut(s, 1) - exp_lab = Series(Categorical.from_codes([0], ["[-9, -9]"], - ordered=True)) - tm.assert_series_equal(result, exp_lab) + intervals = IntervalIndex([Interval(-9.001, -9.0)], closed='right') + expected = Series(intervals).astype('category', ordered=True) + tm.assert_series_equal(result, expected) s = Series([0]) + expected = Series([0]) result = qcut(s, 1, labels=False) tm.assert_series_equal(result, expected) result = qcut(s, 1) - exp_lab = Series(Categorical.from_codes([0], ["[0, 0]"], - ordered=True)) - tm.assert_series_equal(result, exp_lab) + intervals = IntervalIndex([Interval(-0.001, 0.0)], closed='right') + expected = Series(intervals).astype('category', ordered=True) + tm.assert_series_equal(result, expected) def test_single_bin(self): # issue 14652 @@ -376,11 +431,18 @@ def test_datetime_cut(self): # GH 14714 # testing for time data to be present as series data = to_datetime(Series(['2013-01-01', '2013-01-02', '2013-01-03'])) + result, bins = cut(data, 3, retbins=True) - expected = Series(['(2012-12-31 23:57:07.200000, 2013-01-01 16:00:00]', - '(2013-01-01 16:00:00, 2013-01-02 08:00:00]', - '(2013-01-02 08:00:00, 2013-01-03 00:00:00]'], - ).astype("category", ordered=True) + expected = ( + Series(IntervalIndex.from_intervals([ + Interval(Timestamp('2012-12-31 23:57:07.200000'), + Timestamp('2013-01-01 16:00:00')), + Interval(Timestamp('2013-01-01 16:00:00'), + Timestamp('2013-01-02 08:00:00')), + Interval(Timestamp('2013-01-02 08:00:00'), + Timestamp('2013-01-03 00:00:00'))])) + .astype('category', ordered=True)) + tm.assert_series_equal(result, expected) # testing for time data to be present as list @@ -404,9 +466,11 @@ def test_datetime_cut(self): def test_datetime_bin(self): data = [np.datetime64('2012-12-13'), np.datetime64('2012-12-15')] bin_data = ['2012-12-12', '2012-12-14', '2012-12-16'] - expected = Series(['(2012-12-12 00:00:00, 2012-12-14 00:00:00]', - '(2012-12-14 00:00:00, 2012-12-16 00:00:00]'], - ).astype("category", ordered=True) + expected = ( + Series(IntervalIndex.from_intervals([ + Interval(Timestamp(bin_data[0]), Timestamp(bin_data[1])), + Interval(Timestamp(bin_data[1]), Timestamp(bin_data[2]))])) + .astype('category', ordered=True)) for conv in [Timestamp, Timestamp, np.datetime64]: bins = [conv(v) for v in bin_data] @@ -421,6 +485,20 @@ def test_datetime_bin(self): result = cut(data, bins=bin_pydatetime) tm.assert_series_equal(Series(result), expected) + def test_datetime_nan(self): + + def f(): + cut(date_range('20130101', periods=3), bins=[0, 2, 4]) + self.assertRaises(ValueError, f) + + result = cut(date_range('20130102', periods=5), + bins=date_range('20130101', periods=2)) + mask = result.categories.isnull() + self.assert_numpy_array_equal(mask, np.array([False])) + mask = result.isnull() + self.assert_numpy_array_equal( + mask, np.array([False, True, True, True, True])) + def curpath(): pth, _ = os.path.split(os.path.abspath(__file__)) diff --git a/pandas/tests/types/test_dtypes.py b/pandas/tests/types/test_dtypes.py index e7b2edeb57714..79d9fd84396e7 100644 --- a/pandas/tests/types/test_dtypes.py +++ b/pandas/tests/types/test_dtypes.py @@ -3,14 +3,15 @@ import numpy as np import pandas as pd -from pandas import Series, Categorical, date_range +from pandas import Series, Categorical, IntervalIndex, date_range -from pandas.types.dtypes import DatetimeTZDtype, PeriodDtype, CategoricalDtype +from pandas.types.dtypes import (DatetimeTZDtype, PeriodDtype, + IntervalDtype, CategoricalDtype) from pandas.types.common import (is_categorical_dtype, is_categorical, is_datetime64tz_dtype, is_datetimetz, is_period_dtype, is_period, is_dtype_equal, is_datetime64_ns_dtype, - is_datetime64_dtype, + is_datetime64_dtype, is_interval_dtype, is_datetime64_any_dtype, is_string_dtype, _coerce_to_dtype) import pandas.util.testing as tm @@ -351,3 +352,114 @@ def test_empty(self): def test_not_string(self): # though PeriodDtype has object kind, it cannot be string self.assertFalse(is_string_dtype(PeriodDtype('D'))) + + +class TestIntervalDtype(Base, tm.TestCase): + + # TODO: placeholder + def setUp(self): + self.dtype = IntervalDtype('int64') + + def test_construction(self): + with tm.assertRaises(ValueError): + IntervalDtype('xx') + + for s in ['interval[int64]', 'Interval[int64]', 'int64']: + i = IntervalDtype(s) + self.assertEqual(i.subtype, np.dtype('int64')) + self.assertTrue(is_interval_dtype(i)) + + def test_construction_generic(self): + # generic + i = IntervalDtype('interval') + self.assertIs(i.subtype, None) + self.assertTrue(is_interval_dtype(i)) + self.assertTrue(str(i) == 'interval') + + i = IntervalDtype() + self.assertIs(i.subtype, None) + self.assertTrue(is_interval_dtype(i)) + self.assertTrue(str(i) == 'interval') + + def test_subclass(self): + a = IntervalDtype('interval[int64]') + b = IntervalDtype('interval[int64]') + + self.assertTrue(issubclass(type(a), type(a))) + self.assertTrue(issubclass(type(a), type(b))) + + def test_is_dtype(self): + self.assertTrue(IntervalDtype.is_dtype(self.dtype)) + self.assertTrue(IntervalDtype.is_dtype('interval')) + self.assertTrue(IntervalDtype.is_dtype(IntervalDtype('float64'))) + self.assertTrue(IntervalDtype.is_dtype(IntervalDtype('int64'))) + self.assertTrue(IntervalDtype.is_dtype(IntervalDtype(np.int64))) + + self.assertFalse(IntervalDtype.is_dtype('D')) + self.assertFalse(IntervalDtype.is_dtype('3D')) + self.assertFalse(IntervalDtype.is_dtype('U')) + self.assertFalse(IntervalDtype.is_dtype('S')) + self.assertFalse(IntervalDtype.is_dtype('foo')) + self.assertFalse(IntervalDtype.is_dtype(np.object_)) + self.assertFalse(IntervalDtype.is_dtype(np.int64)) + self.assertFalse(IntervalDtype.is_dtype(np.float64)) + + def test_identity(self): + self.assertEqual(IntervalDtype('interval[int64]'), + IntervalDtype('interval[int64]')) + + def test_coerce_to_dtype(self): + self.assertEqual(_coerce_to_dtype('interval[int64]'), + IntervalDtype('interval[int64]')) + + def test_construction_from_string(self): + result = IntervalDtype('interval[int64]') + self.assertTrue(is_dtype_equal(self.dtype, result)) + result = IntervalDtype.construct_from_string('interval[int64]') + self.assertTrue(is_dtype_equal(self.dtype, result)) + with tm.assertRaises(TypeError): + IntervalDtype.construct_from_string('foo') + with tm.assertRaises(TypeError): + IntervalDtype.construct_from_string('interval[foo]') + with tm.assertRaises(TypeError): + IntervalDtype.construct_from_string('foo[int64]') + + def test_equality(self): + self.assertTrue(is_dtype_equal(self.dtype, 'interval[int64]')) + self.assertTrue(is_dtype_equal(self.dtype, IntervalDtype('int64'))) + self.assertTrue(is_dtype_equal(self.dtype, IntervalDtype('int64'))) + self.assertTrue(is_dtype_equal(IntervalDtype('int64'), + IntervalDtype('int64'))) + + self.assertFalse(is_dtype_equal(self.dtype, 'int64')) + self.assertFalse(is_dtype_equal(IntervalDtype('int64'), + IntervalDtype('float64'))) + + def test_basic(self): + self.assertTrue(is_interval_dtype(self.dtype)) + + ii = IntervalIndex.from_breaks(range(3)) + + self.assertTrue(is_interval_dtype(ii.dtype)) + self.assertTrue(is_interval_dtype(ii)) + + s = Series(ii, name='A') + + # dtypes + # series results in object dtype currently, + self.assertFalse(is_interval_dtype(s.dtype)) + self.assertFalse(is_interval_dtype(s)) + + def test_basic_dtype(self): + self.assertTrue(is_interval_dtype('interval[int64]')) + self.assertTrue(is_interval_dtype(IntervalIndex.from_tuples([(0, 1)]))) + self.assertTrue(is_interval_dtype + (IntervalIndex.from_breaks(np.arange(4)))) + self.assertTrue(is_interval_dtype( + IntervalIndex.from_breaks(date_range('20130101', periods=3)))) + self.assertFalse(is_interval_dtype('U')) + self.assertFalse(is_interval_dtype('S')) + self.assertFalse(is_interval_dtype('foo')) + self.assertFalse(is_interval_dtype(np.object_)) + self.assertFalse(is_interval_dtype(np.int64)) + self.assertFalse(is_interval_dtype(np.float64)) diff --git a/pandas/tests/types/test_missing.py b/pandas/tests/types/test_missing.py index efd6dda02ab4b..31bf2817c8bab 100644 --- a/pandas/tests/types/test_missing.py +++ b/pandas/tests/types/test_missing.py @@ -55,6 +55,14 @@ def test_0d_array(self): self.assertFalse(isnull(np.array(0.0, dtype=object))) self.assertFalse(isnull(np.array(0, dtype=object))) + def test_empty_object(self): + + for shape in [(4, 0), (4,)]: + arr = np.empty(shape=shape, dtype=object) + result = isnull(arr) + expected = np.ones(shape=shape, dtype=bool) + tm.assert_numpy_array_equal(result, expected) + def test_isnull(self): self.assertFalse(isnull(1.)) self.assertTrue(isnull(None)) diff --git a/pandas/tools/tile.py b/pandas/tools/tile.py index 4a3d452228e01..2a258d4a7b7e5 100644 --- a/pandas/tools/tile.py +++ b/pandas/tools/tile.py @@ -3,17 +3,19 @@ """ from pandas.types.missing import isnull -from pandas.types.common import (is_float, is_integer, - is_scalar, _ensure_int64) +from pandas.types.common import (is_integer, + is_scalar, + is_categorical_dtype, + is_datetime64_dtype, + is_timedelta64_dtype, + _ensure_int64) -from pandas.core.api import Series -from pandas.core.categorical import Categorical import pandas.core.algorithms as algos import pandas.core.nanops as nanops -from pandas.compat import zip -from pandas import to_timedelta, to_datetime -from pandas.types.common import is_datetime64_dtype, is_timedelta64_dtype from pandas._libs.lib import infer_dtype +from pandas import (to_timedelta, to_datetime, + Categorical, Timestamp, Timedelta, + Series, Interval, IntervalIndex) import numpy as np @@ -27,7 +29,7 @@ def cut(x, bins, right=True, labels=None, retbins=False, precision=3, ---------- x : array-like Input array to be binned. It has to be 1-dimensional. - bins : int or sequence of scalars + bins : int, sequence of scalars, or IntervalIndex If `bins` is an int, it defines the number of equal-width bins in the range of `x`. However, in this case, the range of `x` is extended by .1% on each side to include the min or max values of `x`. If @@ -45,9 +47,9 @@ def cut(x, bins, right=True, labels=None, retbins=False, precision=3, retbins : bool, optional Whether to return the bins or not. Can be useful if bins is given as a scalar. - precision : int + precision : int, optional The precision at which to store and display the bins labels - include_lowest : bool + include_lowest : bool, optional Whether the first interval should be left-inclusive or not. Returns @@ -76,10 +78,12 @@ def cut(x, bins, right=True, labels=None, retbins=False, precision=3, (6.533, 9.7], (0.191, 3.367]] Categories (3, object): [(0.191, 3.367] < (3.367, 6.533] < (6.533, 9.7]], array([ 0.1905 , 3.36666667, 6.53333333, 9.7 ])) + >>> pd.cut(np.array([.2, 1.4, 2.5, 6.2, 9.7, 2.1]), 3, labels=["good","medium","bad"]) [good, good, good, medium, bad, good] Categories (3, object): [good < medium < bad] + >>> pd.cut(np.ones(5), 4, labels=False) array([1, 1, 1, 1, 1], dtype=int64) """ @@ -93,14 +97,16 @@ def cut(x, bins, right=True, labels=None, retbins=False, precision=3, if is_scalar(bins) and bins < 1: raise ValueError("`bins` should be a positive integer.") - sz = x.size + try: # for array-like + sz = x.size + except AttributeError: + x = np.asarray(x) + sz = x.size if sz == 0: raise ValueError('Cannot cut empty array') - # handle empty arrays. Can't determine range, so use 0-1. - # rng = (0, 1) - else: - rng = (nanops.nanmin(x), nanops.nanmax(x)) + + rng = (nanops.nanmin(x), nanops.nanmax(x)) mn, mx = [mi + 0.0 for mi in rng] if mn == mx: # adjust end points before binning @@ -115,15 +121,18 @@ def cut(x, bins, right=True, labels=None, retbins=False, precision=3, else: bins[-1] += adj + elif isinstance(bins, IntervalIndex): + pass else: bins = np.asarray(bins) - bins = _convert_bin_to_numeric_type(bins) + bins = _convert_bin_to_numeric_type(bins, dtype) if (np.diff(bins) < 0).any(): raise ValueError('bins must increase monotonically.') fac, bins = _bins_to_cuts(x, bins, right=right, labels=labels, precision=precision, - include_lowest=include_lowest, dtype=dtype) + include_lowest=include_lowest, + dtype=dtype) return _postprocess_for_cut(fac, bins, retbins, x_is_series, series_index, name) @@ -147,9 +156,9 @@ def qcut(x, q, labels=None, retbins=False, precision=3, duplicates='raise'): the resulting bins. If False, return only integer indicators of the bins. retbins : bool, optional - Whether to return the bins or not. Can be useful if bins is given - as a scalar. - precision : int + Whether to return the (bins, labels) or not. Can be useful if bins + is given as a scalar. + precision : int, optional The precision at which to store and display the bins labels duplicates : {default 'raise', 'drop'}, optional If bin edges are not unique, raise ValueError or drop non-uniques. @@ -174,9 +183,11 @@ def qcut(x, q, labels=None, retbins=False, precision=3, duplicates='raise'): >>> pd.qcut(range(5), 4) [[0, 1], [0, 1], (1, 2], (2, 3], (3, 4]] Categories (4, object): [[0, 1] < (1, 2] < (2, 3] < (3, 4]] + >>> pd.qcut(range(5), 3, labels=["good","medium","bad"]) [good, good, medium, bad, bad] Categories (3, object): [good < medium < bad] + >>> pd.qcut(range(5), 4, labels=False) array([0, 0, 1, 2, 3], dtype=int64) """ @@ -205,6 +216,13 @@ def _bins_to_cuts(x, bins, right=True, labels=None, raise ValueError("invalid value for 'duplicates' parameter, " "valid options are: raise, drop") + if isinstance(bins, IntervalIndex): + # we have a fast-path here + ids = bins.get_indexer(x) + result = algos.take_nd(bins, ids) + result = Categorical(result, categories=bins, ordered=True) + return result, bins + unique_bins = algos.unique(bins) if len(unique_bins) < len(bins) and len(bins) != 2: if duplicates == 'raise': @@ -225,96 +243,26 @@ def _bins_to_cuts(x, bins, right=True, labels=None, if labels is not False: if labels is None: - increases = 0 - while True: - try: - levels = _format_levels(bins, precision, right=right, - include_lowest=include_lowest, - dtype=dtype) - except ValueError: - increases += 1 - precision += 1 - if increases >= 20: - raise - else: - break - + labels = _format_labels(bins, precision, right=right, + include_lowest=include_lowest, + dtype=dtype) else: if len(labels) != len(bins) - 1: raise ValueError('Bin labels must be one fewer than ' 'the number of bin edges') - levels = labels + if not is_categorical_dtype(labels): + labels = Categorical(labels, ordered=True) - levels = np.asarray(levels, dtype=object) np.putmask(ids, na_mask, 0) - fac = Categorical(ids - 1, levels, ordered=True, fastpath=True) - else: - fac = ids - 1 - if has_nas: - fac = fac.astype(np.float64) - np.putmask(fac, na_mask, np.nan) + result = algos.take_nd(labels, ids - 1) - return fac, bins - - -def _format_levels(bins, prec, right=True, - include_lowest=False, dtype=None): - fmt = lambda v: _format_label(v, precision=prec, dtype=dtype) - if right: - levels = [] - for a, b in zip(bins, bins[1:]): - fa, fb = fmt(a), fmt(b) - - if a != b and fa == fb: - raise ValueError('precision too low') - - formatted = '(%s, %s]' % (fa, fb) - - levels.append(formatted) - - if include_lowest: - levels[0] = '[' + levels[0][1:] else: - levels = ['[%s, %s)' % (fmt(a), fmt(b)) - for a, b in zip(bins, bins[1:])] - return levels + result = ids - 1 + if has_nas: + result = result.astype(np.float64) + np.putmask(result, na_mask, np.nan) - -def _format_label(x, precision=3, dtype=None): - fmt_str = '%%.%dg' % precision - - if is_datetime64_dtype(dtype): - return to_datetime(x, unit='ns') - if is_timedelta64_dtype(dtype): - return to_timedelta(x, unit='ns') - if np.isinf(x): - return str(x) - elif is_float(x): - frac, whole = np.modf(x) - sgn = '-' if x < 0 else '' - whole = abs(whole) - if frac != 0.0: - val = fmt_str % frac - - # rounded up or down - if '.' not in val: - if x < 0: - return '%d' % (-whole - 1) - else: - return '%d' % (whole + 1) - - if 'e' in val: - return _trim_zeros(fmt_str % x) - else: - val = _trim_zeros(val) - if '.' in val: - return sgn + '.'.join(('%d' % whole, val.split('.')[1])) - else: # pragma: no cover - return sgn + '.'.join(('%d' % whole, val)) - else: - return sgn + '%0.f' % whole - else: - return str(x) + return result, bins def _trim_zeros(x): @@ -343,17 +291,65 @@ def _coerce_to_type(x): return x, dtype -def _convert_bin_to_numeric_type(x): +def _convert_bin_to_numeric_type(bins, dtype): """ if the passed bin is of datetime/timedelta type, this method converts it to integer + + Parameters + ---------- + bins : list-liek of bins + dtype : dtype of data + + Raises + ------ + ValueError if bins are not of a compat dtype to dtype """ - dtype = infer_dtype(x) - if dtype == 'timedelta' or dtype == 'timedelta64': - x = to_timedelta(x).view(np.int64) - elif dtype == 'datetime' or dtype == 'datetime64': - x = to_datetime(x).view(np.int64) - return x + bins_dtype = infer_dtype(bins) + if is_timedelta64_dtype(dtype): + if bins_dtype in ['timedelta', 'timedelta64']: + bins = to_timedelta(bins).view(np.int64) + else: + raise ValueError("bins must be of timedelta64 dtype") + elif is_datetime64_dtype(dtype): + if bins_dtype in ['datetime', 'datetime64']: + bins = to_datetime(bins).view(np.int64) + else: + raise ValueError("bins must be of datetime64 dtype") + + return bins + + +def _format_labels(bins, precision, right=True, + include_lowest=False, dtype=None): + """ based on the dtype, return our labels """ + + closed = 'right' if right else 'left' + + if is_datetime64_dtype(dtype): + formatter = Timestamp + adjust = lambda x: x - Timedelta('1ns') + elif is_timedelta64_dtype(dtype): + formatter = Timedelta + adjust = lambda x: x - Timedelta('1ns') + else: + precision = _infer_precision(precision, bins) + formatter = lambda x: _round_frac(x, precision) + adjust = lambda x: x - 10 ** (-precision) + + breaks = [formatter(b) for b in bins] + labels = IntervalIndex.from_breaks(breaks, closed=closed) + + if right and include_lowest: + # we will adjust the left hand side by precision to + # account that we are all right closed + v = adjust(labels[0].left) + + i = IntervalIndex.from_intervals( + [Interval(v, labels[0].right, closed='right')]) + labels = i.append(labels[1:]) + + return labels def _preprocess_for_cut(x): @@ -375,7 +371,8 @@ def _preprocess_for_cut(x): return x_is_series, series_index, name, x -def _postprocess_for_cut(fac, bins, retbins, x_is_series, series_index, name): +def _postprocess_for_cut(fac, bins, retbins, x_is_series, + series_index, name): """ handles post processing for the cut method where we combine the index information if the originally passed @@ -388,3 +385,28 @@ def _postprocess_for_cut(fac, bins, retbins, x_is_series, series_index, name): return fac return fac, bins + + +def _round_frac(x, precision): + """ + Round the fractional part of the given number + """ + if not np.isfinite(x) or x == 0: + return x + else: + frac, whole = np.modf(x) + if whole == 0: + digits = -int(np.floor(np.log10(abs(frac)))) - 1 + precision + else: + digits = precision + return np.around(x, digits) + + +def _infer_precision(base_precision, bins): + """Infer an appropriate precision for _round_frac + """ + for precision in range(base_precision, 20): + levels = [_round_frac(b, precision) for b in bins] + if algos.unique(levels).size == bins.size: + return precision + return base_precision # default diff --git a/pandas/tseries/base.py b/pandas/tseries/base.py index ae40c2f66a590..48d236177b474 100644 --- a/pandas/tseries/base.py +++ b/pandas/tseries/base.py @@ -31,6 +31,9 @@ import pandas.types.concat as _concat import pandas.tseries.frequencies as frequencies +import pandas.indexes.base as ibase +_index_doc_kwargs = dict(ibase._index_doc_kwargs) + class DatelikeOps(object): """ common ops for DatetimeIndex/PeriodIndex, but not TimedeltaIndex """ @@ -242,6 +245,7 @@ def _box_values(self, values): def _format_with_header(self, header, **kwargs): return header + list(self._format_native_types(**kwargs)) + @Appender(_index_shared_docs['__contains__'] % _index_doc_kwargs) def __contains__(self, key): try: res = self.get_loc(key) @@ -249,6 +253,8 @@ def __contains__(self, key): except (KeyError, TypeError, ValueError): return False + contains = __contains__ + def __getitem__(self, key): """ This getitem defers to the underlying array, which by-definition can @@ -381,7 +387,7 @@ def sort_values(self, return_indexer=False, ascending=True): return self._simple_new(sorted_values, **attribs) - @Appender(_index_shared_docs['take']) + @Appender(_index_shared_docs['take'] % _index_doc_kwargs) def take(self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs): nv.validate_take(tuple(), kwargs) @@ -798,7 +804,7 @@ def repeat(self, repeats, *args, **kwargs): return self._shallow_copy(self.asi8.repeat(repeats), freq=freq) - @Appender(_index_shared_docs['where']) + @Appender(_index_shared_docs['where'] % _index_doc_kwargs) def where(self, cond, other=None): other = _ensure_datetimelike_to_i8(other) values = _ensure_datetimelike_to_i8(self) diff --git a/pandas/tseries/interval.py b/pandas/tseries/interval.py deleted file mode 100644 index 22801318a1853..0000000000000 --- a/pandas/tseries/interval.py +++ /dev/null @@ -1,35 +0,0 @@ - -from pandas.core.index import Index - - -class Interval(object): - """ - Represents an interval of time defined by two timestamps - """ - - def __init__(self, start, end): - self.start = start - self.end = end - - -class PeriodInterval(object): - """ - Represents an interval of time defined by two Period objects (time - ordinals) - """ - - def __init__(self, start, end): - self.start = start - self.end = end - - -class IntervalIndex(Index): - """ - - """ - - def __new__(self, starts, ends): - pass - - def dtype(self): - return self.values.dtype diff --git a/pandas/tseries/period.py b/pandas/tseries/period.py index 1e1496bbe9c27..7f7b3286fd4f8 100644 --- a/pandas/tseries/period.py +++ b/pandas/tseries/period.py @@ -347,6 +347,7 @@ def _coerce_scalar_to_index(self, item): """ return PeriodIndex([item], **self._get_attributes_dict()) + @Appender(_index_shared_docs['__contains__']) def __contains__(self, key): if isinstance(key, Period): if key.freq != self.freq: @@ -361,6 +362,8 @@ def __contains__(self, key): return False return False + contains = __contains__ + @property def asi8(self): return self._values.view('i8') diff --git a/pandas/types/api.py b/pandas/types/api.py index e78514ce77822..6dbd3dc6b640c 100644 --- a/pandas/types/api.py +++ b/pandas/types/api.py @@ -10,6 +10,10 @@ is_categorical, is_categorical_dtype, + # interval + is_interval, + is_interval_dtype, + # datetimelike is_datetimetz, is_datetime64_dtype, diff --git a/pandas/types/common.py b/pandas/types/common.py index 7ab2e068ac69f..0b14e484d40a7 100644 --- a/pandas/types/common.py +++ b/pandas/types/common.py @@ -7,6 +7,7 @@ from .dtypes import (CategoricalDtype, CategoricalDtypeType, DatetimeTZDtype, DatetimeTZDtypeType, PeriodDtype, PeriodDtypeType, + IntervalDtype, IntervalDtypeType, ExtensionDtype) from .generic import (ABCCategorical, ABCPeriodIndex, ABCDatetimeIndex, ABCSeries, @@ -139,6 +140,10 @@ def is_period_dtype(arr_or_dtype): return PeriodDtype.is_dtype(arr_or_dtype) +def is_interval_dtype(arr_or_dtype): + return IntervalDtype.is_dtype(arr_or_dtype) + + def is_categorical_dtype(arr_or_dtype): return CategoricalDtype.is_dtype(arr_or_dtype) @@ -501,6 +506,8 @@ def _coerce_to_dtype(dtype): dtype = DatetimeTZDtype(dtype) elif is_period_dtype(dtype): dtype = PeriodDtype(dtype) + elif is_interval_dtype(dtype): + dtype = IntervalDtype(dtype) else: dtype = np.dtype(dtype) return dtype @@ -538,6 +545,8 @@ def _get_dtype(arr_or_dtype): return arr_or_dtype elif isinstance(arr_or_dtype, PeriodDtype): return arr_or_dtype + elif isinstance(arr_or_dtype, IntervalDtype): + return arr_or_dtype elif isinstance(arr_or_dtype, string_types): if is_categorical_dtype(arr_or_dtype): return CategoricalDtype.construct_from_string(arr_or_dtype) @@ -545,6 +554,8 @@ def _get_dtype(arr_or_dtype): return DatetimeTZDtype.construct_from_string(arr_or_dtype) elif is_period_dtype(arr_or_dtype): return PeriodDtype.construct_from_string(arr_or_dtype) + elif is_interval_dtype(arr_or_dtype): + return IntervalDtype.construct_from_string(arr_or_dtype) if hasattr(arr_or_dtype, 'dtype'): arr_or_dtype = arr_or_dtype.dtype @@ -575,6 +586,8 @@ def _get_dtype_type(arr_or_dtype): return CategoricalDtypeType elif isinstance(arr_or_dtype, DatetimeTZDtype): return DatetimeTZDtypeType + elif isinstance(arr_or_dtype, IntervalDtype): + return IntervalDtypeType elif isinstance(arr_or_dtype, PeriodDtype): return PeriodDtypeType elif isinstance(arr_or_dtype, string_types): @@ -584,6 +597,8 @@ def _get_dtype_type(arr_or_dtype): return DatetimeTZDtypeType elif is_period_dtype(arr_or_dtype): return PeriodDtypeType + elif is_interval_dtype(arr_or_dtype): + return IntervalDtypeType return _get_dtype_type(np.dtype(arr_or_dtype)) try: return arr_or_dtype.dtype.type @@ -695,6 +710,8 @@ def pandas_dtype(dtype): return dtype elif isinstance(dtype, CategoricalDtype): return dtype + elif isinstance(dtype, IntervalDtype): + return dtype elif isinstance(dtype, string_types): try: return DatetimeTZDtype.construct_from_string(dtype) @@ -708,6 +725,12 @@ def pandas_dtype(dtype): except TypeError: pass + elif dtype.startswith('interval[') or dtype.startswith('Interval['): + try: + return IntervalDtype.construct_from_string(dtype) + except TypeError: + pass + try: return CategoricalDtype.construct_from_string(dtype) except TypeError: diff --git a/pandas/types/dtypes.py b/pandas/types/dtypes.py index c3494df93476b..7913950a597c9 100644 --- a/pandas/types/dtypes.py +++ b/pandas/types/dtypes.py @@ -367,3 +367,112 @@ def is_dtype(cls, dtype): else: return False return super(PeriodDtype, cls).is_dtype(dtype) + + +class IntervalDtypeType(type): + """ + the type of IntervalDtype, this metaclass determines subclass ability + """ + pass + + +class IntervalDtype(ExtensionDtype): + __metaclass__ = IntervalDtypeType + """ + A Interval duck-typed class, suitable for holding an interval + + THIS IS NOT A REAL NUMPY DTYPE + """ + type = IntervalDtypeType + kind = None + str = '|O08' + base = np.dtype('O') + num = 103 + _metadata = ['subtype'] + _match = re.compile("(I|i)nterval\[(?P.+)\]") + _cache = {} + + def __new__(cls, subtype=None): + """ + Parameters + ---------- + subtype : the dtype of the Interval + """ + + if isinstance(subtype, IntervalDtype): + return subtype + elif subtype is None or (isinstance(subtype, compat.string_types) and + subtype == 'interval'): + subtype = None + else: + if isinstance(subtype, compat.string_types): + m = cls._match.search(subtype) + if m is not None: + subtype = m.group('subtype') + + from pandas.types.common import pandas_dtype + try: + subtype = pandas_dtype(subtype) + except TypeError: + raise ValueError("could not construct IntervalDtype") + + try: + return cls._cache[str(subtype)] + except KeyError: + u = object.__new__(cls) + u.subtype = subtype + cls._cache[str(subtype)] = u + return u + + @classmethod + def construct_from_string(cls, string): + """ + attempt to construct this type from a string, raise a TypeError + if its not possible + """ + if isinstance(string, compat.string_types): + try: + return cls(string) + except ValueError: + pass + raise TypeError("could not construct IntervalDtype") + + def __unicode__(self): + if self.subtype is None: + return "interval" + return "interval[{subtype}]".format(subtype=self.subtype) + + @property + def name(self): + return str(self) + + def __hash__(self): + # make myself hashable + return hash(str(self)) + + def __eq__(self, other): + if isinstance(other, compat.string_types): + return other == self.name or other == self.name.title() + + return (isinstance(other, IntervalDtype) and + self.subtype == other.subtype) + + @classmethod + def is_dtype(cls, dtype): + """ + Return a boolean if we if the passed type is an actual dtype that we + can match (via string or type) + """ + + if isinstance(dtype, compat.string_types): + if dtype.lower().startswith('interval'): + try: + if cls.construct_from_string(dtype) is not None: + return True + else: + return False + except ValueError: + return False + else: + return False + return super(IntervalDtype, cls).is_dtype(dtype) diff --git a/pandas/types/generic.py b/pandas/types/generic.py index e7b54ccc6f25e..90608c18ae503 100644 --- a/pandas/types/generic.py +++ b/pandas/types/generic.py @@ -32,12 +32,14 @@ def _check(cls, inst): ("periodindex", )) ABCCategoricalIndex = create_pandas_abc_type("ABCCategoricalIndex", "_typ", ("categoricalindex", )) +ABCIntervalIndex = create_pandas_abc_type("ABCIntervalIndex", "_typ", + ("intervalindex", )) ABCIndexClass = create_pandas_abc_type("ABCIndexClass", "_typ", ("index", "int64index", "rangeindex", "float64index", "uint64index", "multiindex", "datetimeindex", "timedeltaindex", "periodindex", - "categoricalindex")) + "categoricalindex", "intervalindex")) ABCSeries = create_pandas_abc_type("ABCSeries", "_typ", ("series", )) ABCDataFrame = create_pandas_abc_type("ABCDataFrame", "_typ", ("dataframe", )) diff --git a/pandas/types/inference.py b/pandas/types/inference.py index 91418677c6b19..b0a93d24228af 100644 --- a/pandas/types/inference.py +++ b/pandas/types/inference.py @@ -20,6 +20,8 @@ is_decimal = lib.is_decimal +is_interval = lib.is_interval + def is_number(obj): """ diff --git a/pandas/types/missing.py b/pandas/types/missing.py index ea49af9884f5a..af3a873bc2866 100644 --- a/pandas/types/missing.py +++ b/pandas/types/missing.py @@ -9,7 +9,7 @@ from .common import (is_string_dtype, is_datetimelike, is_datetimelike_v_numeric, is_float_dtype, is_datetime64_dtype, is_datetime64tz_dtype, - is_timedelta64_dtype, + is_timedelta64_dtype, is_interval_dtype, is_complex_dtype, is_categorical_dtype, is_string_like_dtype, is_bool_dtype, is_integer_dtype, is_dtype_equal, @@ -127,6 +127,9 @@ def _isnull_ndarraylike(obj): if not isinstance(values, Categorical): values = values.values result = values.isnull() + elif is_interval_dtype(values): + from pandas import IntervalIndex + result = IntervalIndex(obj).isnull() else: # Working around NumPy ticket 1542 diff --git a/pandas/util/testing.py b/pandas/util/testing.py index d5986a7f390e5..c73cca56f975a 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -29,6 +29,7 @@ is_number, is_bool, needs_i8_conversion, is_categorical_dtype, + is_interval_dtype, is_sequence, is_list_like) from pandas.formats.printing import pprint_thing @@ -43,9 +44,11 @@ from pandas.computation import expressions as expr -from pandas import (bdate_range, CategoricalIndex, Categorical, DatetimeIndex, - TimedeltaIndex, PeriodIndex, RangeIndex, Index, MultiIndex, +from pandas import (bdate_range, CategoricalIndex, Categorical, IntervalIndex, + DatetimeIndex, TimedeltaIndex, PeriodIndex, RangeIndex, + Index, MultiIndex, Series, DataFrame, Panel, Panel4D) + from pandas.util.decorators import deprecate from pandas.util import libtesting from pandas.io.common import urlopen @@ -943,6 +946,9 @@ def _get_ilevel_values(index, level): assert_attr_equal('names', left, right, obj=obj) if isinstance(left, pd.PeriodIndex) or isinstance(right, pd.PeriodIndex): assert_attr_equal('freq', left, right, obj=obj) + if (isinstance(left, pd.IntervalIndex) or + isinstance(right, pd.IntervalIndex)): + assert_attr_equal('closed', left, right, obj=obj) if check_categorical: if is_categorical_dtype(left) or is_categorical_dtype(right): @@ -1307,6 +1313,12 @@ def assert_series_equal(left, right, check_dtype=True, else: assert_numpy_array_equal(left.get_values(), right.get_values(), check_dtype=check_dtype) + elif is_interval_dtype(left) or is_interval_dtype(right): + # TODO: big hack here + l = pd.IntervalIndex(left) + r = pd.IntervalIndex(right) + assert_index_equal(l, r, obj='{0}.index'.format(obj)) + else: libtesting.assert_almost_equal(left.get_values(), right.get_values(), check_less_precise=check_less_precise, @@ -1687,6 +1699,12 @@ def makeCategoricalIndex(k=10, n=3, name=None): return CategoricalIndex(np.random.choice(x, k), name=name) +def makeIntervalIndex(k=10, name=None): + """ make a length k IntervalIndex """ + x = np.linspace(0, 100, num=(k + 1)) + return IntervalIndex.from_breaks(x, name=name) + + def makeBoolIndex(k=10, name=None): if k == 1: return Index([True], name=name) diff --git a/setup.py b/setup.py index 96b25f7427370..6707af7eb0908 100755 --- a/setup.py +++ b/setup.py @@ -119,6 +119,7 @@ def is_platform_mac(): '_libs/hashtable_func_helper.pxi.in'], 'index': ['_libs/index_class_helper.pxi.in'], 'sparse': ['sparse/sparse_op_helper.pxi.in'], + 'interval': ['_libs/intervaltree.pxi.in'] } _pxifiles = [] @@ -335,6 +336,7 @@ class CheckSDist(sdist_class): 'pandas/_libs/index.pyx', 'pandas/_libs/algos.pyx', 'pandas/_libs/join.pyx', + 'pandas/_libs/interval.pyx', 'pandas/core/window.pyx', 'pandas/sparse/sparse.pyx', 'pandas/util/testing.pyx', @@ -508,6 +510,9 @@ def pxd(name): 'depends': _pxi_dep['join']}, '_libs.reshape': {'pyxfile': '_libs/reshape', 'depends': _pxi_dep['reshape']}, + '_libs.interval': {'pyxfile': '_libs/interval', + 'pxdfiles': ['_libs/hashtable'], + 'depends': _pxi_dep['interval']}, 'core.libwindow': {'pyxfile': 'core/window', 'pxdfiles': ['_libs/src/skiplist', '_libs/src/util'], 'depends': ['pandas/_libs/src/skiplist.pyx', From 8b404539b8b8f2ce2eaf38c7cd2f7f3925c6e171 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Fri, 14 Apr 2017 15:37:03 +0000 Subject: [PATCH 388/933] ENH: add Series & DataFrame .agg/.aggregate (#14668) * ENH: add Series & DataFrame .agg/.aggregate to provide convienent function application that mimics the groupby(..).agg/.aggregate interface .apply is now a synonym for .agg, and will accept dict/list-likes for aggregations CLN: rename .name attr -> ._selection_name from SeriesGroupby for compat (didn't exist on DataFrameGroupBy) resolves conflicts w.r.t. setting .name on a groupby object closes #1623 closes #14464 custom .describe closes #14483 closes #15015 closes #7014 * DOC/TST: test for deprecation in .agg additional doc updates * whatsnew fixes --- doc/source/api.rst | 4 + doc/source/basics.rst | 231 +++++++++++++++++++++- doc/source/computation.rst | 12 +- doc/source/groupby.rst | 4 +- doc/source/timeseries.rst | 6 +- doc/source/whatsnew/v0.20.0.txt | 64 ++++++ pandas/core/base.py | 25 +-- pandas/core/frame.py | 70 ++++++- pandas/core/generic.py | 85 +++++++- pandas/core/series.py | 55 ++++++ pandas/tests/frame/test_apply.py | 178 +++++++++++++++++ pandas/tests/groupby/test_aggregate.py | 6 +- pandas/tests/groupby/test_value_counts.py | 1 + pandas/tests/series/test_apply.py | 181 ++++++++++++++++- 14 files changed, 877 insertions(+), 45 deletions(-) diff --git a/doc/source/api.rst b/doc/source/api.rst index 6ba8c2b8ead67..6d1765ce65714 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -314,6 +314,8 @@ Function application, GroupBy & Window :toctree: generated/ Series.apply + Series.aggregate + Series.transform Series.map Series.groupby Series.rolling @@ -831,6 +833,8 @@ Function application, GroupBy & Window DataFrame.apply DataFrame.applymap + DataFrame.aggregate + DataFrame.transform DataFrame.groupby DataFrame.rolling DataFrame.expanding diff --git a/doc/source/basics.rst b/doc/source/basics.rst index f649b3fd8a9a3..ab5d7e69c923b 100644 --- a/doc/source/basics.rst +++ b/doc/source/basics.rst @@ -702,7 +702,8 @@ on an entire ``DataFrame`` or ``Series``, row- or column-wise, or elementwise. 1. `Tablewise Function Application`_: :meth:`~DataFrame.pipe` 2. `Row or Column-wise Function Application`_: :meth:`~DataFrame.apply` -3. Elementwise_ function application: :meth:`~DataFrame.applymap` +3. `Aggregation API`_: :meth:`~DataFrame.agg` and :meth:`~DataFrame.transform` +4. `Applying Elementwise Functions`_: :meth:`~DataFrame.applymap` .. _basics.pipe: @@ -778,6 +779,13 @@ statistics methods, take an optional ``axis`` argument: df.apply(np.cumsum) df.apply(np.exp) +``.apply()`` will also dispatch on a string method name. + +.. ipython:: python + + df.apply('mean') + df.apply('mean', axis=1) + Depending on the return type of the function passed to :meth:`~DataFrame.apply`, the result will either be of lower dimension or the same dimension. @@ -827,16 +835,223 @@ set to True, the passed function will instead receive an ndarray object, which has positive performance implications if you do not need the indexing functionality. -.. seealso:: +.. _basics.aggregate: + +Aggregation API +~~~~~~~~~~~~~~~ + +.. versionadded:: 0.20.0 + +The aggregation API allows one to express possibly multiple aggregation operations in a single concise way. +This API is similar across pandas objects, see :ref:`groupby API `, the +:ref:`window functions API `, and the :ref:`resample API `. +The entry point for aggregation is the method :meth:`~DataFrame.aggregate`, or the alias :meth:`~DataFrame.agg`. + +We will use a similar starting frame from above: + +.. ipython:: python + + tsdf = pd.DataFrame(np.random.randn(10, 3), columns=['A', 'B', 'C'], + index=pd.date_range('1/1/2000', periods=10)) + tsdf.iloc[3:7] = np.nan + tsdf + +Using a single function is equivalent to :meth:`~DataFrame.apply`; You can also pass named methods as strings. +These will return a ``Series`` of the aggregated output: + +.. ipython:: python + + tsdf.agg(np.sum) + + tsdf.agg('sum') + + # these are equivalent to a ``.sum()`` because we are aggregating on a single function + tsdf.sum() + +Single aggregations on a ``Series`` this will result in a scalar value: + +.. ipython:: python + + tsdf.A.agg('sum') + + +Aggregating with multiple functions ++++++++++++++++++++++++++++++++++++ + +You can pass multiple aggregation arguments as a list. +The results of each of the passed functions will be a row in the resultant ``DataFrame``. +These are naturally named from the aggregation function. + +.. ipython:: python + + tsdf.agg(['sum']) + +Multiple functions yield multiple rows: + +.. ipython:: python + + tsdf.agg(['sum', 'mean']) + +On a ``Series``, multiple functions return a ``Series``, indexed by the function names: + +.. ipython:: python + + tsdf.A.agg(['sum', 'mean']) + +Passing a ``lambda`` function will yield a ```` named row: + +.. ipython:: python + + tsdf.A.agg(['sum', lambda x: x.mean()]) + +Passing a named function will yield that name for the row: + +.. ipython:: python + + def mymean(x): + return x.mean() + + tsdf.A.agg(['sum', mymean]) + +Aggregating with a dict ++++++++++++++++++++++++ + +Passing a dictionary of column names to a scalar or a list of scalars, to ``DataFame.agg`` +allows you to customize which functions are applied to which columns. + +.. ipython:: python + + tsdf.agg({'A': 'mean', 'B': 'sum'}) + +Passing a list-like will generate a ``DataFrame`` output. You will get a matrix-like output +of all of the aggregators. The output will consist of all unique functions. Those that are +not noted for a particular column will be ``NaN``: + +.. ipython:: python + + tsdf.agg({'A': ['mean', 'min'], 'B': 'sum'}) + +.. _basics.aggregation.mixed_dtypes: + +Mixed Dtypes +++++++++++++ - The section on :ref:`GroupBy ` demonstrates related, flexible - functionality for grouping by some criterion, applying, and combining the - results into a Series, DataFrame, etc. +When presented with mixed dtypes that cannot aggregate, ``.agg`` will only take the valid +aggregations. This is similiar to how groupby ``.agg`` works. -.. _Elementwise: +.. ipython:: python + + mdf = pd.DataFrame({'A': [1, 2, 3], + 'B': [1., 2., 3.], + 'C': ['foo', 'bar', 'baz'], + 'D': pd.date_range('20130101', periods=3)}) + mdf.dtypes + +.. ipython:: python + + mdf.agg(['min', 'sum']) + +.. _basics.aggregation.custom_describe: + +Custom describe ++++++++++++++++ + +With ``.agg()`` is it possible to easily create a custom describe function, similar +to the built in :ref:`describe function `. + +.. ipython:: python + + from functools import partial + + q_25 = partial(pd.Series.quantile, q=0.25) + q_25.__name__ = '25%' + q_75 = partial(pd.Series.quantile, q=0.75) + q_75.__name__ = '75%' + + tsdf.agg(['count', 'mean', 'std', 'min', q_25, 'median', q_75, 'max']) + +.. _basics.transform: + +Transform API +~~~~~~~~~~~~~ -Applying elementwise Python functions -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. versionadded:: 0.20.0 + +The :method:`~DataFrame.transform` method returns an object that is indexed the same (same size) +as the original. This API allows you to provide *multiple* operations at the same +time rather than one-by-one. Its api is quite similar to the ``.agg`` API. + +Use a similar frame to the above sections. + +.. ipython:: python + + tsdf = pd.DataFrame(np.random.randn(10, 3), columns=['A', 'B', 'C'], + index=pd.date_range('1/1/2000', periods=10)) + tsdf.iloc[3:7] = np.nan + tsdf + +Transform the entire frame. Transform allows functions to input as a numpy function, string +function name and user defined function. + +.. ipython:: python + + tsdf.transform(np.abs) + tsdf.transform('abs') + tsdf.transform(lambda x: x.abs()) + +Since this is a single function, this is equivalent to a ufunc application + +.. ipython:: python + + np.abs(tsdf) + +Passing a single function to ``.transform()`` with a Series will yield a single Series in return. + +.. ipython:: python + + tsdf.A.transform(np.abs) + + +Transform with multiple functions ++++++++++++++++++++++++++++++++++ + +Passing multiple functions will yield a column multi-indexed DataFrame. +The first level will be the original frame column names; the second level +will be the names of the transforming functions. + +.. ipython:: python + + tsdf.transform([np.abs, lambda x: x+1]) + +Passing multiple functions to a Series will yield a DataFrame. The +resulting column names will be the transforming functions. + +.. ipython:: python + + tsdf.A.transform([np.abs, lambda x: x+1]) + + +Transforming with a dict +++++++++++++++++++++++++ + + +Passing a dict of functions will will allow selective transforming per column. + +.. ipython:: python + + tsdf.transform({'A': np.abs, 'B': lambda x: x+1}) + +Passing a dict of lists will generate a multi-indexed DataFrame with these +selective transforms. + +.. ipython:: python + + tsdf.transform({'A': np.abs, 'B': [lambda x: x+1, 'sqrt']}) + +.. _basics.elementwise: + +Applying Elementwise Functions +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Since not all functions can be vectorized (accept NumPy arrays and return another array or value), the methods :meth:`~DataFrame.applymap` on DataFrame diff --git a/doc/source/computation.rst b/doc/source/computation.rst index f6c912bf59b34..8c75d4355c99a 100644 --- a/doc/source/computation.rst +++ b/doc/source/computation.rst @@ -617,7 +617,9 @@ Aggregation ----------- Once the ``Rolling``, ``Expanding`` or ``EWM`` objects have been created, several methods are available to -perform multiple computations on the data. This is very similar to a ``.groupby(...).agg`` seen :ref:`here `. +perform multiple computations on the data. These operations are similar to the :ref:`aggregating API `, +:ref:`groupby aggregates `, and :ref:`resample API `. + .. ipython:: python @@ -642,10 +644,10 @@ columns if none are selected. .. _stats.aggregate.multifunc: -Applying multiple functions at once -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Applying multiple functions +~~~~~~~~~~~~~~~~~~~~~~~~~~~ -With windowed Series you can also pass a list or dict of functions to do +With windowed ``Series`` you can also pass a list of functions to do aggregation with, outputting a DataFrame: .. ipython:: python @@ -666,7 +668,7 @@ Applying different functions to DataFrame columns ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ By passing a dict to ``aggregate`` you can apply a different aggregation to the -columns of a DataFrame: +columns of a ``DataFrame``: .. ipython:: python :okexcept: diff --git a/doc/source/groupby.rst b/doc/source/groupby.rst index 03ee5e0d67913..cf4f1059ae17a 100644 --- a/doc/source/groupby.rst +++ b/doc/source/groupby.rst @@ -439,7 +439,9 @@ Aggregation ----------- Once the GroupBy object has been created, several methods are available to -perform a computation on the grouped data. +perform a computation on the grouped data. These operations are similar to the +:ref:`aggregating API `, :ref:`window functions API `, +and :ref:`resample API `. An obvious one is aggregation via the ``aggregate`` or equivalently ``agg`` method: diff --git a/doc/source/timeseries.rst b/doc/source/timeseries.rst index 0a957772d785e..6a4ea2d5319ab 100644 --- a/doc/source/timeseries.rst +++ b/doc/source/timeseries.rst @@ -1519,11 +1519,13 @@ We can instead only resample those groups where we have points as follows: ts.groupby(partial(round, freq='3T')).sum() +.. _timeseries.aggregate: + Aggregation ~~~~~~~~~~~ -Similar to :ref:`groupby aggregates ` and the :ref:`window functions `, a ``Resampler`` can be selectively -resampled. +Similar to the :ref:`aggregating API `, :ref:`groupby aggregates API `, and the :ref:`window functions API `, +a ``Resampler`` can be selectively resampled. Resampling a ``DataFrame``, the default will be to act on all columns with the same function. diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 04aed6c2c5466..da32de750e7de 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -9,6 +9,8 @@ users upgrade to this version. Highlights include: +- new ``.agg()`` API for Series/DataFrame similar to the groupby-rolling-resample API's, see :ref:`here ` +- Integration with the ``feather-format``, including a new top-level ``pd.read_feather()`` and ``DataFrame.to_feather()`` method, see :ref:`here `. - The ``.ix`` indexer has been deprecated, see :ref:`here ` - ``Panel`` has been deprecated, see :ref:`here ` - Improved user API when accessing levels in ``.groupby()``, see :ref:`here ` @@ -32,6 +34,68 @@ Check the :ref:`API Changes ` and :ref:`deprecations New features ~~~~~~~~~~~~ +.. _whatsnew_0200.enhancements.agg: + +``agg`` API +^^^^^^^^^^^ + +Series & DataFrame have been enhanced to support the aggregation API. This is an already familiar API that +is supported for groupby, window operations, and resampling. This allows one to express, possibly multiple +aggregation operations, in a single concise way by using :meth:`~DataFrame.agg`, +and :meth:`~DataFrame.transform`. The full documentation is :ref:`here `` (:issue:`1623`) + +Here is a sample + +.. ipython:: python + + df = pd.DataFrame(np.random.randn(10, 3), columns=['A', 'B', 'C'], + index=pd.date_range('1/1/2000', periods=10)) + df.iloc[3:7] = np.nan + df + +One can operate using string function names, callables, lists, or dictionaries of these. + +Using a single function is equivalent to ``.apply``. + +.. ipython:: python + + df.agg('sum') + +Multiple functions in lists. + +.. ipython:: python + + df.agg(['sum', 'min']) + +Dictionaries to provide the ability to provide selective aggregation per column. +You will get a matrix-like output of all of the aggregators. The output will consist +of all unique functions. Those that are not noted for a particular column will be ``NaN``: + +.. ipython:: python + + df.agg({'A' : ['sum', 'min'], 'B' : ['min', 'max']}) + +The API also supports a ``.transform()`` function to provide for broadcasting results. + +.. ipython:: python + + df.transform(['abs', lambda x: x - x.min()]) + +When presented with mixed dtypes that cannot aggregate, ``.agg()`` will only take the valid +aggregations. This is similiar to how groupby ``.agg()`` works. (:issue:`15015`) + +.. ipython:: python + + df = pd.DataFrame({'A': [1, 2, 3], + 'B': [1., 2., 3.], + 'C': ['foo', 'bar', 'baz'], + 'D': pd.date_range('20130101', periods=3)}) + df.dtypes + +.. ipython:: python + + df.agg(['min', 'sum']) + .. _whatsnew_0200.enhancements.dataio_dtype: ``dtype`` keyword for data IO diff --git a/pandas/core/base.py b/pandas/core/base.py index 6566ee38c1ade..33c95197debdc 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -470,6 +470,15 @@ def _aggregate(self, arg, *args, **kwargs): obj = self._selected_obj + def nested_renaming_depr(level=4): + # deprecation of nested renaming + # GH 15931 + warnings.warn( + ("using a dict with renaming " + "is deprecated and will be removed in a future " + "version"), + FutureWarning, stacklevel=level) + # if we have a dict of any non-scalars # eg. {'A' : ['mean']}, normalize all to # be list-likes @@ -498,14 +507,10 @@ def _aggregate(self, arg, *args, **kwargs): raise SpecificationError('cannot perform renaming ' 'for {0} with a nested ' 'dictionary'.format(k)) + nested_renaming_depr(4 + (_level or 0)) - # deprecation of nested renaming - # GH 15931 - warnings.warn( - ("using a dict with renaming " - "is deprecated and will be removed in a future " - "version"), - FutureWarning, stacklevel=4) + elif isinstance(obj, ABCSeries): + nested_renaming_depr() arg = new_arg @@ -515,11 +520,7 @@ def _aggregate(self, arg, *args, **kwargs): keys = list(compat.iterkeys(arg)) if (isinstance(obj, ABCDataFrame) and len(obj.columns.intersection(keys)) != len(keys)): - warnings.warn( - ("using a dict with renaming " - "is deprecated and will be removed in a future " - "version"), - FutureWarning, stacklevel=4) + nested_renaming_depr() from pandas.tools.concat import concat diff --git a/pandas/core/frame.py b/pandas/core/frame.py index c8c21b0c5fd7d..4565250c78387 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4189,6 +4189,42 @@ def diff(self, periods=1, axis=0): # ---------------------------------------------------------------------- # Function application + def _gotitem(self, key, ndim, subset=None): + """ + sub-classes to define + return a sliced object + + Parameters + ---------- + key : string / list of selections + ndim : 1,2 + requested ndim of result + subset : object, default None + subset to act on + """ + if subset is None: + subset = self + + # TODO: _shallow_copy(subset)? + return self[key] + + @Appender(_shared_docs['aggregate'] % _shared_doc_kwargs) + def aggregate(self, func, axis=0, *args, **kwargs): + axis = self._get_axis_number(axis) + + # TODO: flipped axis + result = None + if axis == 0: + try: + result, how = self._aggregate(func, axis=0, *args, **kwargs) + except TypeError: + pass + if result is None: + return self.apply(func, axis=axis, args=args, **kwargs) + return result + + agg = aggregate + def apply(self, func, axis=0, broadcast=False, raw=False, reduce=None, args=(), **kwds): """ @@ -4244,22 +4280,35 @@ def apply(self, func, axis=0, broadcast=False, raw=False, reduce=None, See also -------- DataFrame.applymap: For elementwise operations + DataFrame.agg: only perform aggregating type operations + DataFrame.transform: only perform transformating type operations Returns ------- applied : Series or DataFrame """ axis = self._get_axis_number(axis) - if kwds or args and not isinstance(func, np.ufunc): + ignore_failures = kwds.pop('ignore_failures', False) + + # dispatch to agg + if axis == 0 and isinstance(func, (list, dict)): + return self.aggregate(func, axis=axis, *args, **kwds) + + if len(self.columns) == 0 and len(self.index) == 0: + return self._apply_empty_result(func, axis, reduce, *args, **kwds) + # if we are a string, try to dispatch + if isinstance(func, compat.string_types): + if axis: + kwds['axis'] = axis + return getattr(self, func)(*args, **kwds) + + if kwds or args and not isinstance(func, np.ufunc): def f(x): return func(x, *args, **kwds) else: f = func - if len(self.columns) == 0 and len(self.index) == 0: - return self._apply_empty_result(func, axis, reduce, *args, **kwds) - if isinstance(f, np.ufunc): with np.errstate(all='ignore'): results = f(self.values) @@ -4276,7 +4325,10 @@ def f(x): else: if reduce is None: reduce = True - return self._apply_standard(f, axis, reduce=reduce) + return self._apply_standard( + f, axis, + reduce=reduce, + ignore_failures=ignore_failures) else: return self._apply_broadcast(f, axis) @@ -5085,7 +5137,13 @@ def f(x): # this can end up with a non-reduction # but not always. if the types are mixed # with datelike then need to make sure a series - result = self.apply(f, reduce=False) + + # we only end up here if we have not specified + # numeric_only and yet we have tried a + # column-by-column reduction, where we have mixed type. + # So let's just do what we can + result = self.apply(f, reduce=False, + ignore_failures=True) if result.ndim == self.ndim: result = result.iloc[0] return result diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 86978a9739ca4..316c9f5e2ccd8 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -32,7 +32,7 @@ SettingWithCopyError, SettingWithCopyWarning, AbstractMethodError) -from pandas.core.base import PandasObject +from pandas.core.base import PandasObject, SelectionMixin from pandas.core.index import (Index, MultiIndex, _ensure_index, InvalidIndexError) import pandas.core.indexing as indexing @@ -91,7 +91,7 @@ def _single_replace(self, to_replace, method, inplace, limit): return result -class NDFrame(PandasObject): +class NDFrame(PandasObject, SelectionMixin): """ N-dimensional analogue of DataFrame. Store multi-dimensional in a size-mutable, labeled data structure @@ -459,6 +459,16 @@ def size(self): """number of elements in the NDFrame""" return np.prod(self.shape) + @property + def _selected_obj(self): + """ internal compat with SelectionMixin """ + return self + + @property + def _obj_with_exclusions(self): + """ internal compat with SelectionMixin """ + return self + def _expand_axes(self, key): new_axes = [] for k, ax in zip(key, self.axes): @@ -2853,6 +2863,66 @@ def pipe(self, func, *args, **kwargs): else: return func(self, *args, **kwargs) + _shared_docs['aggregate'] = (""" + Aggregate using input function or dict of {column -> + function} + + .. versionadded:: 0.20.0 + + Parameters + ---------- + func : callable, string, dictionary, or list of string/callables + Function to use for aggregating the data. If a function, must either + work when passed a DataFrame or when passed to DataFrame.apply. If + passed a dict, the keys must be DataFrame column names. + + Accepted Combinations are: + - string function name + - function + - list of functions + - dict of column names -> functions (or list of functions) + + Notes + ----- + Numpy functions mean/median/prod/sum/std/var are special cased so the + default behavior is applying the function along axis=0 + (e.g., np.mean(arr_2d, axis=0)) as opposed to + mimicking the default Numpy behavior (e.g., np.mean(arr_2d)). + + Returns + ------- + aggregated : %(klass)s + + See also + -------- + """) + + _shared_docs['transform'] = (""" + Call function producing a like-indexed %(klass)s + and return a %(klass)s with the transformed values` + + .. versionadded:: 0.20.0 + + Parameters + ---------- + func : callable, string, dictionary, or list of string/callables + To apply to column + + Accepted Combinations are: + - string function name + - function + - list of functions + - dict of column names -> functions (or list of functions) + + Examples + -------- + >>> df.transform(lambda x: (x - x.mean()) / x.std()) + + Returns + ------- + transformed : %(klass)s + """) + # ---------------------------------------------------------------------- # Attribute access @@ -5990,6 +6060,17 @@ def ewm(self, com=None, span=None, halflife=None, alpha=None, cls.ewm = ewm + @Appender(_shared_docs['transform'] % _shared_doc_kwargs) + def transform(self, func, *args, **kwargs): + result = self.agg(func, *args, **kwargs) + if is_scalar(result) or len(result) != len(self): + raise ValueError("transforms cannot produce " + "aggregated results") + + return result + + cls.transform = transform + def _doc_parms(cls): """Return a tuple of the doc parms.""" diff --git a/pandas/core/series.py b/pandas/core/series.py index 5ee3ca73742ae..3305f0b6c439e 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2144,6 +2144,49 @@ def map_f(values, f): return self._constructor(new_values, index=self.index).__finalize__(self) + def _gotitem(self, key, ndim, subset=None): + """ + sub-classes to define + return a sliced object + + Parameters + ---------- + key : string / list of selections + ndim : 1,2 + requested ndim of result + subset : object, default None + subset to act on + """ + return self + + @Appender(generic._shared_docs['aggregate'] % _shared_doc_kwargs) + def aggregate(self, func, axis=0, *args, **kwargs): + axis = self._get_axis_number(axis) + result, how = self._aggregate(func, *args, **kwargs) + if result is None: + + # we can be called from an inner function which + # passes this meta-data + kwargs.pop('_axis', None) + kwargs.pop('_level', None) + + # try a regular apply, this evaluates lambdas + # row-by-row; however if the lambda is expected a Series + # expression, e.g.: lambda x: x-x.quantile(0.25) + # this will fail, so we can try a vectorized evaluation + + # we cannot FIRST try the vectorized evaluation, becuase + # then .agg and .apply would have different semantics if the + # operation is actually defined on the Series, e.g. str + try: + result = self.apply(func, *args, **kwargs) + except (ValueError, AttributeError, TypeError): + result = func(self, *args, **kwargs) + + return result + + agg = aggregate + def apply(self, func, convert_dtype=True, args=(), **kwds): """ Invoke function on values of Series. Can be ufunc (a NumPy function @@ -2167,6 +2210,8 @@ def apply(self, func, convert_dtype=True, args=(), **kwds): See also -------- Series.map: For element-wise operations + Series.agg: only perform aggregating type operations + Series.transform: only perform transformating type operations Examples -------- @@ -2244,6 +2289,15 @@ def apply(self, func, convert_dtype=True, args=(), **kwds): return self._constructor(dtype=self.dtype, index=self.index).__finalize__(self) + # dispatch to agg + if isinstance(func, (list, dict)): + return self.aggregate(func, *args, **kwds) + + # if we are a string, try to dispatch + if isinstance(func, compat.string_types): + return self._try_aggregate_string_function(func, *args, **kwds) + + # handle ufuncs and lambdas if kwds or args and not isinstance(func, np.ufunc): f = lambda x: func(x, *args, **kwds) else: @@ -2253,6 +2307,7 @@ def apply(self, func, convert_dtype=True, args=(), **kwds): if isinstance(f, np.ufunc): return f(self) + # row-wise access if is_extension_type(self.dtype): mapped = self._values.map(f) else: diff --git a/pandas/tests/frame/test_apply.py b/pandas/tests/frame/test_apply.py index 30fde4b5b78d8..157cd1cdf1b22 100644 --- a/pandas/tests/frame/test_apply.py +++ b/pandas/tests/frame/test_apply.py @@ -106,6 +106,17 @@ def test_apply_standard_nonunique(self): rs = df.T.apply(lambda s: s[0], axis=0) assert_series_equal(rs, xp) + def test_with_string_args(self): + + for arg in ['sum', 'mean', 'min', 'max', 'std']: + result = self.frame.apply(arg) + expected = getattr(self.frame, arg)() + tm.assert_series_equal(result, expected) + + result = self.frame.apply(arg, axis=1) + expected = getattr(self.frame, arg)(axis=1) + tm.assert_series_equal(result, expected) + def test_apply_broadcast(self): broadcasted = self.frame.apply(np.mean, broadcast=True) agged = self.frame.apply(np.mean) @@ -455,3 +466,170 @@ def test_apply_non_numpy_dtype(self): df = DataFrame({'dt': ['a', 'b', 'c', 'a']}, dtype='category') result = df.apply(lambda x: x) assert_frame_equal(result, df) + + +def zip_frames(*frames): + """ + take a list of frames, zip the columns together for each + assume that these all have the first frame columns + + return a new frame + """ + columns = frames[0].columns + zipped = [f[c] for c in columns for f in frames] + return pd.concat(zipped, axis=1) + + +class TestDataFrameAggregate(tm.TestCase, TestData): + + _multiprocess_can_split_ = True + + def test_agg_transform(self): + + with np.errstate(all='ignore'): + + f_sqrt = np.sqrt(self.frame) + f_abs = np.abs(self.frame) + + # ufunc + result = self.frame.transform(np.sqrt) + expected = f_sqrt.copy() + assert_frame_equal(result, expected) + + result = self.frame.apply(np.sqrt) + assert_frame_equal(result, expected) + + result = self.frame.transform(np.sqrt) + assert_frame_equal(result, expected) + + # list-like + result = self.frame.apply([np.sqrt]) + expected = f_sqrt.copy() + expected.columns = pd.MultiIndex.from_product( + [self.frame.columns, ['sqrt']]) + assert_frame_equal(result, expected) + + result = self.frame.transform([np.sqrt]) + assert_frame_equal(result, expected) + + # multiple items in list + # these are in the order as if we are applying both + # functions per series and then concatting + expected = zip_frames(f_sqrt, f_abs) + expected.columns = pd.MultiIndex.from_product( + [self.frame.columns, ['sqrt', 'absolute']]) + result = self.frame.apply([np.sqrt, np.abs]) + assert_frame_equal(result, expected) + + result = self.frame.transform(['sqrt', np.abs]) + assert_frame_equal(result, expected) + + def test_transform_and_agg_err(self): + # cannot both transform and agg + def f(): + self.frame.transform(['max', 'min']) + self.assertRaises(ValueError, f) + + def f(): + with np.errstate(all='ignore'): + self.frame.agg(['max', 'sqrt']) + self.assertRaises(ValueError, f) + + def f(): + with np.errstate(all='ignore'): + self.frame.transform(['max', 'sqrt']) + self.assertRaises(ValueError, f) + + df = pd.DataFrame({'A': range(5), 'B': 5}) + + def f(): + with np.errstate(all='ignore'): + df.agg({'A': ['abs', 'sum'], 'B': ['mean', 'max']}) + + def test_demo(self): + # demonstration tests + df = pd.DataFrame({'A': range(5), 'B': 5}) + + result = df.agg(['min', 'max']) + expected = DataFrame({'A': [0, 4], 'B': [5, 5]}, + columns=['A', 'B'], + index=['min', 'max']) + tm.assert_frame_equal(result, expected) + + result = df.agg({'A': ['min', 'max'], 'B': ['sum', 'max']}) + expected = DataFrame({'A': [4.0, 0.0, np.nan], + 'B': [5.0, np.nan, 25.0]}, + columns=['A', 'B'], + index=['max', 'min', 'sum']) + tm.assert_frame_equal(result.reindex_like(expected), expected) + + def test_agg_dict_nested_renaming_depr(self): + + df = pd.DataFrame({'A': range(5), 'B': 5}) + + # nested renaming + with tm.assert_produces_warning(FutureWarning): + df.agg({'A': {'foo': 'min'}, + 'B': {'bar': 'max'}}) + + def test_agg_reduce(self): + # all reducers + expected = zip_frames(self.frame.mean().to_frame(), + self.frame.max().to_frame(), + self.frame.sum().to_frame()).T + expected.index = ['mean', 'max', 'sum'] + result = self.frame.agg(['mean', 'max', 'sum']) + assert_frame_equal(result, expected) + + # dict input with scalars + result = self.frame.agg({'A': 'mean', 'B': 'sum'}) + expected = Series([self.frame.A.mean(), self.frame.B.sum()], + index=['A', 'B']) + assert_series_equal(result.reindex_like(expected), expected) + + # dict input with lists + result = self.frame.agg({'A': ['mean'], 'B': ['sum']}) + expected = DataFrame({'A': Series([self.frame.A.mean()], + index=['mean']), + 'B': Series([self.frame.B.sum()], + index=['sum'])}) + assert_frame_equal(result.reindex_like(expected), expected) + + # dict input with lists with multiple + result = self.frame.agg({'A': ['mean', 'sum'], + 'B': ['sum', 'max']}) + expected = DataFrame({'A': Series([self.frame.A.mean(), + self.frame.A.sum()], + index=['mean', 'sum']), + 'B': Series([self.frame.B.sum(), + self.frame.B.max()], + index=['sum', 'max'])}) + assert_frame_equal(result.reindex_like(expected), expected) + + def test_nuiscance_columns(self): + + # GH 15015 + df = DataFrame({'A': [1, 2, 3], + 'B': [1., 2., 3.], + 'C': ['foo', 'bar', 'baz'], + 'D': pd.date_range('20130101', periods=3)}) + + result = df.agg('min') + expected = Series([1, 1., 'bar', pd.Timestamp('20130101')], + index=df.columns) + assert_series_equal(result, expected) + + result = df.agg(['min']) + expected = DataFrame([[1, 1., 'bar', pd.Timestamp('20130101')]], + index=['min'], columns=df.columns) + assert_frame_equal(result, expected) + + result = df.agg('sum') + expected = Series([6, 6., 'foobarbaz'], + index=['A', 'B', 'C']) + assert_series_equal(result, expected) + + result = df.agg(['sum']) + expected = DataFrame([[6, 6., 'foobarbaz']], + index=['sum'], columns=['A', 'B', 'C']) + assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/test_aggregate.py b/pandas/tests/groupby/test_aggregate.py index 22d1de99c48be..2abae97b3151b 100644 --- a/pandas/tests/groupby/test_aggregate.py +++ b/pandas/tests/groupby/test_aggregate.py @@ -310,12 +310,14 @@ def test_agg_dict_renaming_deprecation(self): 'B': range(5), 'C': range(5)}) - with tm.assert_produces_warning(FutureWarning) as w: + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False) as w: df.groupby('A').agg({'B': {'foo': ['sum', 'max']}, 'C': {'bar': ['count', 'min']}}) assert "using a dict with renaming" in str(w[0].message) - with tm.assert_produces_warning(FutureWarning): + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): df.groupby('A')[['B', 'C']].agg({'ma': 'max'}) with tm.assert_produces_warning(FutureWarning) as w: diff --git a/pandas/tests/groupby/test_value_counts.py b/pandas/tests/groupby/test_value_counts.py index 801d0da070112..b70a03ec3a1d3 100644 --- a/pandas/tests/groupby/test_value_counts.py +++ b/pandas/tests/groupby/test_value_counts.py @@ -7,6 +7,7 @@ from pandas import MultiIndex, DataFrame, Series, date_range +@pytest.mark.slow @pytest.mark.parametrize("n,m", product((100, 1000), (5, 20))) def test_series_groupby_value_counts(n, m): np.random.seed(1234) diff --git a/pandas/tests/series/test_apply.py b/pandas/tests/series/test_apply.py index 16d1466bb90fe..524167602c249 100644 --- a/pandas/tests/series/test_apply.py +++ b/pandas/tests/series/test_apply.py @@ -1,13 +1,14 @@ # coding=utf-8 # pylint: disable-msg=E1101,W0612 +from collections import OrderedDict import numpy as np import pandas as pd from pandas import (Index, Series, DataFrame, isnull) from pandas.compat import lrange from pandas import compat -from pandas.util.testing import assert_series_equal +from pandas.util.testing import assert_series_equal, assert_frame_equal import pandas.util.testing as tm from .common import TestData @@ -23,16 +24,11 @@ def test_apply(self): import math assert_series_equal(self.ts.apply(math.exp), np.exp(self.ts)) - # how to handle Series result, #2316 - result = self.ts.apply(lambda x: Series( - [x, x ** 2], index=['x', 'x^2'])) - expected = DataFrame({'x': self.ts, 'x^2': self.ts ** 2}) - tm.assert_frame_equal(result, expected) - # empty series s = Series(dtype=object, name='foo', index=pd.Index([], name='bar')) rs = s.apply(lambda x: x) tm.assert_series_equal(s, rs) + # check all metadata (GH 9322) self.assertIsNot(s, rs) self.assertIs(s.index, rs.index) @@ -64,6 +60,13 @@ def test_apply_dont_convert_dtype(self): result = s.apply(f, convert_dtype=False) self.assertEqual(result.dtype, object) + def test_with_string_args(self): + + for arg in ['sum', 'mean', 'min', 'max', 'std']: + result = self.ts.apply(arg) + expected = getattr(self.ts, arg)() + self.assertEqual(result, expected) + def test_apply_args(self): s = Series(['foo,bar']) @@ -136,6 +139,170 @@ def f(x): exp = pd.Series(['Asia/Tokyo'] * 25, name='XX') tm.assert_series_equal(result, exp) + def test_apply_dict_depr(self): + + tsdf = pd.DataFrame(np.random.randn(10, 3), + columns=['A', 'B', 'C'], + index=pd.date_range('1/1/2000', periods=10)) + with tm.assert_produces_warning(FutureWarning): + tsdf.A.agg({'foo': ['sum', 'mean']}) + + +class TestSeriesAggregate(TestData, tm.TestCase): + + _multiprocess_can_split_ = True + + def test_transform(self): + # transforming functions + + with np.errstate(all='ignore'): + + f_sqrt = np.sqrt(self.series) + f_abs = np.abs(self.series) + + # ufunc + result = self.series.transform(np.sqrt) + expected = f_sqrt.copy() + assert_series_equal(result, expected) + + result = self.series.apply(np.sqrt) + assert_series_equal(result, expected) + + # list-like + result = self.series.transform([np.sqrt]) + expected = f_sqrt.to_frame().copy() + expected.columns = ['sqrt'] + assert_frame_equal(result, expected) + + result = self.series.transform([np.sqrt]) + assert_frame_equal(result, expected) + + result = self.series.transform(['sqrt']) + assert_frame_equal(result, expected) + + # multiple items in list + # these are in the order as if we are applying both functions per + # series and then concatting + expected = pd.concat([f_sqrt, f_abs], axis=1) + expected.columns = ['sqrt', 'absolute'] + result = self.series.apply([np.sqrt, np.abs]) + assert_frame_equal(result, expected) + + result = self.series.transform(['sqrt', 'abs']) + expected.columns = ['sqrt', 'abs'] + assert_frame_equal(result, expected) + + # dict, provide renaming + expected = pd.concat([f_sqrt, f_abs], axis=1) + expected.columns = ['foo', 'bar'] + expected = expected.unstack().rename('series') + + result = self.series.apply({'foo': np.sqrt, 'bar': np.abs}) + assert_series_equal(result.reindex_like(expected), expected) + + def test_transform_and_agg_error(self): + # we are trying to transform with an aggregator + def f(): + self.series.transform(['min', 'max']) + self.assertRaises(ValueError, f) + + def f(): + with np.errstate(all='ignore'): + self.series.agg(['sqrt', 'max']) + self.assertRaises(ValueError, f) + + def f(): + with np.errstate(all='ignore'): + self.series.transform(['sqrt', 'max']) + self.assertRaises(ValueError, f) + + def f(): + with np.errstate(all='ignore'): + self.series.agg({'foo': np.sqrt, 'bar': 'sum'}) + self.assertRaises(ValueError, f) + + def test_demo(self): + # demonstration tests + s = Series(range(6), dtype='int64', name='series') + + result = s.agg(['min', 'max']) + expected = Series([0, 5], index=['min', 'max'], name='series') + tm.assert_series_equal(result, expected) + + result = s.agg({'foo': 'min'}) + expected = Series([0], index=['foo'], name='series') + tm.assert_series_equal(result, expected) + + # nested renaming + with tm.assert_produces_warning(FutureWarning): + result = s.agg({'foo': ['min', 'max']}) + + expected = DataFrame( + {'foo': [0, 5]}, + index=['min', 'max']).unstack().rename('series') + tm.assert_series_equal(result, expected) + + def test_multiple_aggregators_with_dict_api(self): + + s = Series(range(6), dtype='int64', name='series') + # nested renaming + with tm.assert_produces_warning(FutureWarning): + result = s.agg({'foo': ['min', 'max'], 'bar': ['sum', 'mean']}) + + expected = DataFrame( + {'foo': [5.0, np.nan, 0.0, np.nan], + 'bar': [np.nan, 2.5, np.nan, 15.0]}, + columns=['foo', 'bar'], + index=['max', 'mean', + 'min', 'sum']).unstack().rename('series') + tm.assert_series_equal(result.reindex_like(expected), expected) + + def test_agg_apply_evaluate_lambdas_the_same(self): + # test that we are evaluating row-by-row first + # before vectorized evaluation + result = self.series.apply(lambda x: str(x)) + expected = self.series.agg(lambda x: str(x)) + tm.assert_series_equal(result, expected) + + result = self.series.apply(str) + expected = self.series.agg(str) + tm.assert_series_equal(result, expected) + + def test_with_nested_series(self): + # GH 2316 + # .agg with a reducer and a transform, what to do + result = self.ts.apply(lambda x: Series( + [x, x ** 2], index=['x', 'x^2'])) + expected = DataFrame({'x': self.ts, 'x^2': self.ts ** 2}) + tm.assert_frame_equal(result, expected) + + result = self.ts.agg(lambda x: Series( + [x, x ** 2], index=['x', 'x^2'])) + tm.assert_frame_equal(result, expected) + + def test_replicate_describe(self): + # this also tests a result set that is all scalars + expected = self.series.describe() + result = self.series.apply(OrderedDict( + [('count', 'count'), + ('mean', 'mean'), + ('std', 'std'), + ('min', 'min'), + ('25%', lambda x: x.quantile(0.25)), + ('50%', 'median'), + ('75%', lambda x: x.quantile(0.75)), + ('max', 'max')])) + assert_series_equal(result, expected) + + def test_reduce(self): + # reductions with named functions + result = self.series.agg(['sum', 'mean']) + expected = Series([self.series.sum(), + self.series.mean()], + ['sum', 'mean'], + name=self.series.name) + assert_series_equal(result, expected) + class TestSeriesMap(TestData, tm.TestCase): From 614a48e3e6640b5694fd1ed6cbe56a760e89dd50 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Fri, 14 Apr 2017 11:37:19 -0400 Subject: [PATCH 389/933] DOC: whatsnew updates --- doc/source/basics.rst | 15 ++-- doc/source/computation.rst | 2 +- doc/source/timeseries.rst | 2 +- doc/source/whatsnew/v0.20.0.txt | 122 ++++++++++++++++++-------------- 4 files changed, 80 insertions(+), 61 deletions(-) diff --git a/doc/source/basics.rst b/doc/source/basics.rst index ab5d7e69c923b..5789f39266927 100644 --- a/doc/source/basics.rst +++ b/doc/source/basics.rst @@ -917,7 +917,8 @@ Aggregating with a dict +++++++++++++++++++++++ Passing a dictionary of column names to a scalar or a list of scalars, to ``DataFame.agg`` -allows you to customize which functions are applied to which columns. +allows you to customize which functions are applied to which columns. Note that the results +are not in any particular order, you can use an ``OrderedDict`` instead to guarantee ordering. .. ipython:: python @@ -977,9 +978,9 @@ Transform API .. versionadded:: 0.20.0 -The :method:`~DataFrame.transform` method returns an object that is indexed the same (same size) +The :meth:`~DataFrame.transform` method returns an object that is indexed the same (same size) as the original. This API allows you to provide *multiple* operations at the same -time rather than one-by-one. Its api is quite similar to the ``.agg`` API. +time rather than one-by-one. Its API is quite similar to the ``.agg`` API. Use a similar frame to the above sections. @@ -990,8 +991,8 @@ Use a similar frame to the above sections. tsdf.iloc[3:7] = np.nan tsdf -Transform the entire frame. Transform allows functions to input as a numpy function, string -function name and user defined function. +Transform the entire frame. ``.transform()`` allows input functions as: a numpy function, a string +function name or a user defined function. .. ipython:: python @@ -999,13 +1000,13 @@ function name and user defined function. tsdf.transform('abs') tsdf.transform(lambda x: x.abs()) -Since this is a single function, this is equivalent to a ufunc application +Here ``.transform()`` received a single function; this is equivalent to a ufunc application .. ipython:: python np.abs(tsdf) -Passing a single function to ``.transform()`` with a Series will yield a single Series in return. +Passing a single function to ``.transform()`` with a ``Series`` will yield a single ``Series`` in return. .. ipython:: python diff --git a/doc/source/computation.rst b/doc/source/computation.rst index 8c75d4355c99a..76a030d355e33 100644 --- a/doc/source/computation.rst +++ b/doc/source/computation.rst @@ -618,7 +618,7 @@ Aggregation Once the ``Rolling``, ``Expanding`` or ``EWM`` objects have been created, several methods are available to perform multiple computations on the data. These operations are similar to the :ref:`aggregating API `, -:ref:`groupby aggregates `, and :ref:`resample API `. +:ref:`groupby API `, and :ref:`resample API `. .. ipython:: python diff --git a/doc/source/timeseries.rst b/doc/source/timeseries.rst index 6a4ea2d5319ab..71d85f9b3995b 100644 --- a/doc/source/timeseries.rst +++ b/doc/source/timeseries.rst @@ -1524,7 +1524,7 @@ We can instead only resample those groups where we have points as follows: Aggregation ~~~~~~~~~~~ -Similar to the :ref:`aggregating API `, :ref:`groupby aggregates API `, and the :ref:`window functions API `, +Similar to the :ref:`aggregating API `, :ref:`groupby API `, and the :ref:`window functions API `, a ``Resampler`` can be selectively resampled. Resampling a ``DataFrame``, the default will be to act on all columns with the same function. diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index da32de750e7de..133757b131312 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -1,9 +1,9 @@ .. _whatsnew_0200: -v0.20.0 (April ??, 2017) +v0.20.0 (May 12, 2017) ------------------------ -This is a major release from 0.19 and includes a small number of API changes, several new features, +This is a major release from 0.19.2 and includes a small number of API changes, deprecations, new features, enhancements, and performance improvements along with a large number of bug fixes. We recommend that all users upgrade to this version. @@ -13,11 +13,11 @@ Highlights include: - Integration with the ``feather-format``, including a new top-level ``pd.read_feather()`` and ``DataFrame.to_feather()`` method, see :ref:`here `. - The ``.ix`` indexer has been deprecated, see :ref:`here ` - ``Panel`` has been deprecated, see :ref:`here ` -- Improved user API when accessing levels in ``.groupby()``, see :ref:`here ` -- Improved support for UInt64 dtypes, see :ref:`here ` - Addition of an ``IntervalIndex`` and ``Interval`` scalar type, see :ref:`here ` -- A new orient for JSON serialization, ``orient='table'``, that uses the Table Schema spec, see :ref:`here ` -- Window Binary Corr/Cov operations return a MultiIndexed ``DataFrame`` rather than a ``Panel``, as ``Panel`` is now deprecated, see :ref:`here ` +- Improved user API when accessing levels in ``.groupby()``, see :ref:`here ` +- Improved support for ``UInt64`` dtypes, see :ref:`here ` +- A new orient for JSON serialization, ``orient='table'``, that uses the :ref:`Table Schema spec ` +- Window Binary Corr/Cov operations now return a MultiIndexed ``DataFrame`` rather than a ``Panel``, as ``Panel`` is now deprecated, see :ref:`here ` - Support for S3 handling now uses ``s3fs``, see :ref:`here ` - Google BigQuery support now uses the ``pandas-gbq`` library, see :ref:`here ` - Switched the test framework to use `pytest `__ (:issue:`13097`) @@ -42,7 +42,7 @@ New features Series & DataFrame have been enhanced to support the aggregation API. This is an already familiar API that is supported for groupby, window operations, and resampling. This allows one to express, possibly multiple aggregation operations, in a single concise way by using :meth:`~DataFrame.agg`, -and :meth:`~DataFrame.transform`. The full documentation is :ref:`here `` (:issue:`1623`) +and :meth:`~DataFrame.transform`. The full documentation is :ref:`here ` (:issue:`1623`) Here is a sample @@ -67,7 +67,7 @@ Multiple functions in lists. df.agg(['sum', 'min']) -Dictionaries to provide the ability to provide selective aggregation per column. +Using a dict provides the ability to have selective aggregation per column. You will get a matrix-like output of all of the aggregators. The output will consist of all unique functions. Those that are not noted for a particular column will be ``NaN``: @@ -129,7 +129,7 @@ fixed-width text files, and :func:`read_excel` for parsing Excel files. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ :func:`to_datetime` has gained a new parameter, ``origin``, to define a reference date -from where to compute the resulting ``DatetimeIndex``. (:issue:`11276`, :issue:`11745`) +from where to compute the resulting ``DatetimeIndex`` when ``unit`` is specified. (:issue:`11276`, :issue:`11745`) Start with 1960-01-01 as the starting date @@ -138,7 +138,7 @@ Start with 1960-01-01 as the starting date pd.to_datetime([1, 2, 3], unit='D', origin=pd.Timestamp('1960-01-01')) The default is set at ``origin='unix'``, which defaults to ``1970-01-01 00:00:00``. -Commonly called 'unix epoch' or POSIX time. +Commonly called 'unix epoch' or POSIX time. This was the previous default, so this is a backward compatible change. .. ipython:: python @@ -149,7 +149,7 @@ Commonly called 'unix epoch' or POSIX time. pandas errors ^^^^^^^^^^^^^ -We are adding a standard public location for all pandas exceptions & warnings ``pandas.errors``. (:issue:`14800`). Previously +We are adding a standard public module for all pandas exceptions & warnings ``pandas.errors``. (:issue:`14800`). Previously these exceptions & warnings could be imported from ``pandas.core.common`` or ``pandas.io.common``. These exceptions and warnings will be removed from the ``*.common`` locations in a future release. (:issue:`15541`) @@ -243,7 +243,7 @@ Inferring compression type from the extension rt = pd.read_pickle("data.pkl.xz", compression="infer") rt -The default is to 'infer +The default is to ``infer``: .. ipython:: python @@ -348,7 +348,7 @@ protocol). This gives frontends like the Jupyter notebook and `nteract`_ more flexiblity in how they display pandas objects, since they have more information about the data. -You must enable this by setting the ``display.html.table_schema`` option to True. +You must enable this by setting the ``display.html.table_schema`` option to ``True``. .. _Table Schema: http://specs.frictionlessdata.io/json-table-schema/ .. _nteract: http://nteract.io/ @@ -385,21 +385,24 @@ IntervalIndex ^^^^^^^^^^^^^ pandas has gained an ``IntervalIndex`` with its own dtype, ``interval`` as well as the ``Interval`` scalar type. These allow first-class support for interval -notation, specifically as a return type for the categories in ``pd.cut`` and ``pd.qcut``. The ``IntervalIndex`` allows some unique indexing, see the +notation, specifically as a return type for the categories in :func:`cut` and :func:`qcut`. The ``IntervalIndex`` allows some unique indexing, see the :ref:`docs `. (:issue:`7640`, :issue:`8625`) Previous behavior: +The returned categories were strings, representing Intervals + .. code-block:: ipython - In [2]: pd.cut(range(3), 2) + In [1]: c = pd.cut(range(4), bins=2) + + In [2]: c Out[2]: - [(-0.002, 1], (-0.002, 1], (1, 2]] - Categories (2, object): [(-0.002, 1] < (1, 2]] + [(-0.003, 1.5], (-0.003, 1.5], (1.5, 3], (1.5, 3]] + Categories (2, object): [(-0.003, 1.5] < (1.5, 3]] - # the returned categories are strings, representing Intervals - In [3]: pd.cut(range(3), 2).categories - Out[3]: Index(['(-0.002, 1]', '(1, 2]'], dtype='object') + In [3]: c.categories + Out[3]: Index(['(-0.003, 1.5]', '(1.5, 3]'], dtype='object') New behavior: @@ -409,28 +412,29 @@ New behavior: c c.categories -Furthermore, this allows one to bin *other* data with these same bins. ``NaN`` represents a missing +Furthermore, this allows one to bin *other* data with these same bins, with ``NaN`` represents a missing value similar to other dtypes. .. ipython:: python - pd.cut([0, 3, 1, 1], bins=c.categories) + pd.cut([0, 3, 5, 1], bins=c.categories) -These can also used in ``Series`` and ``DataFrame``, and indexed. +An ``IntervalIndex`` can also be used in ``Series`` and ``DataFrame`` as the index. .. ipython:: python df = pd.DataFrame({'A': range(4), 'B': pd.cut([0, 3, 1, 1], bins=c.categories)} ).set_index('B') + df -Selecting a specific interval +Selecting via a specific interval: .. ipython:: python df.loc[pd.Interval(1.5, 3.0)] -Selecting via a scalar value that is contained in the intervals. +Selecting via a scalar value that is contained *in* the intervals. .. ipython:: python @@ -454,7 +458,7 @@ Other Enhancements - ``DataFrame.groupby()`` has gained a ``.nunique()`` method to count the distinct values for all columns within each group (:issue:`14336`, :issue:`15197`). - ``pd.read_excel()`` now preserves sheet order when using ``sheetname=None`` (:issue:`9930`) -- Multiple offset aliases with decimal points are now supported (e.g. '0.5min' is parsed as '30s') (:issue:`8419`) +- Multiple offset aliases with decimal points are now supported (e.g. ``0.5min`` is parsed as ``30s``) (:issue:`8419`) - ``.isnull()`` and ``.notnull()`` have been added to ``Index`` object to make them more consistent with the ``Series`` API (:issue:`15300`) - New ``UnsortedIndexError`` (subclass of ``KeyError``) raised when indexing/slicing into an @@ -467,11 +471,11 @@ Other Enhancements - The ``usecols`` argument in ``pd.read_csv()`` now accepts a callable function as a value (:issue:`14154`) - The ``skiprows`` argument in ``pd.read_csv()`` now accepts a callable function as a value (:issue:`10882`) - The ``nrows`` and ``chunksize`` arguments in ``pd.read_csv()`` are supported if both are passed (:issue:`6774`, :issue:`15755`) -- ``pd.DataFrame.plot`` now prints a title above each subplot if ``suplots=True`` and ``title`` is a list of strings (:issue:`14753`) -- ``pd.DataFrame.plot`` can pass `matplotlib 2.0 default color cycle as a single string as color parameter `__. (:issue:`15516`) -- ``pd.Series.interpolate`` now supports timedelta as an index type with ``method='time'`` (:issue:`6424`) +- ``DataFrame.plot`` now prints a title above each subplot if ``suplots=True`` and ``title`` is a list of strings (:issue:`14753`) +- ``DataFrame.plot`` can pass the matplotlib 2.0 default color cycle as a single string as color parameter, see `here `__. (:issue:`15516`) +- ``Series.interpolate()`` now supports timedelta as an index type with ``method='time'`` (:issue:`6424`) - ``Timedelta.isoformat`` method added for formatting Timedeltas as an `ISO 8601 duration`_. See the :ref:`Timedelta docs ` (:issue:`15136`) -- ``.select_dtypes()`` now allows the string 'datetimetz' to generically select datetimes with tz (:issue:`14910`) +- ``.select_dtypes()`` now allows the string ``datetimetz`` to generically select datetimes with tz (:issue:`14910`) - The ``.to_latex()`` method will now accept ``multicolumn`` and ``multirow`` arguments to use the accompanying LaTeX enhancements - ``pd.merge_asof()`` gained the option ``direction='backward'|'forward'|'nearest'`` (:issue:`14887`) @@ -483,16 +487,16 @@ Other Enhancements - ``pd.read_html()`` will parse multiple header rows, creating a multiindex header. (:issue:`13434`). - HTML table output skips ``colspan`` or ``rowspan`` attribute if equal to 1. (:issue:`15403`) -- ``pd.TimedeltaIndex`` now has a custom datetick formatter specifically designed for nanosecond level precision (:issue:`8711`) +- ``TimedeltaIndex`` now has a custom datetick formatter specifically designed for nanosecond level precision (:issue:`8711`) - ``pd.types.concat.union_categoricals`` gained the ``ignore_ordered`` argument to allow ignoring the ordered attribute of unioned categoricals (:issue:`13410`). See the :ref:`categorical union docs ` for more information. -- ``pd.DataFrame.to_latex`` and ``pd.DataFrame.to_string`` now allow optional header aliases. (:issue:`15536`) -- Re-enable the ``parse_dates`` keyword of ``read_excel`` to parse string columns as dates (:issue:`14326`) +- ``DataFrame.to_latex()`` and ``DataFrame.to_string()`` now allow optional header aliases. (:issue:`15536`) +- Re-enable the ``parse_dates`` keyword of ``pd.read_excel()`` to parse string columns as dates (:issue:`14326`) - Added ``.empty`` property to subclasses of ``Index``. (:issue:`15270`) - Enabled floor division for ``Timedelta`` and ``TimedeltaIndex`` (:issue:`15828`) - ``pandas.io.json.json_normalize()`` gained the option ``errors='ignore'|'raise'``; the default is ``errors='raise'`` which is backward compatible. (:issue:`14583`) - ``pandas.io.json.json_normalize()`` with an empty ``list`` will return an empty ``DataFrame`` (:issue:`15534`) - ``pandas.io.json.json_normalize()`` has gained a ``sep`` option that accepts ``str`` to separate joined fields; the default is ".", which is backward compatible. (:issue:`14883`) -- :func:`MultiIndex.remove_unused_levels` has been added to facilitate :ref:`removing unused levels `. (:issue:`15694`) +- :method:`~MultiIndex.remove_unused_levels` has been added to facilitate :ref:`removing unused levels `. (:issue:`15694`) - ``pd.read_csv()`` will now raise a ``ParserError`` error whenever any parsing error occurs (:issue:`15913`, :issue:`15925`) - ``pd.read_csv()`` now supports the ``error_bad_lines`` and ``warn_bad_lines`` arguments for the Python parser (:issue:`15925`) - ``parallel_coordinates()`` has gained a ``sort_labels`` keyword arg that sorts class labels and the colours assigned to them (:issue:`15908`) @@ -592,10 +596,10 @@ list, and a dict of column names to scalars or lists. This provides a useful syn However, ``.agg(..)`` can *also* accept a dict that allows 'renaming' of the result columns. This is a complicated and confusing syntax, as well as not consistent between ``Series`` and ``DataFrame``. We are deprecating this 'renaming' functionaility. -1) We are deprecating passing a dict to a grouped/rolled/resampled ``Series``. This allowed +- We are deprecating passing a dict to a grouped/rolled/resampled ``Series``. This allowed one to ``rename`` the resulting aggregation, but this had a completely different meaning than passing a dictionary to a grouped ``DataFrame``, which accepts column-to-aggregations. -2) We are deprecating passing a dict-of-dicts to a grouped/rolled/resampled ``DataFrame`` in a similar manner. +- We are deprecating passing a dict-of-dicts to a grouped/rolled/resampled ``DataFrame`` in a similar manner. This is an illustrative example: @@ -607,14 +611,14 @@ This is an illustrative example: df Here is a typical useful syntax for computing different aggregations for different columns. This -is a natural (and useful) syntax. We aggregate from the dict-to-list by taking the specified +is a natural, and useful syntax. We aggregate from the dict-to-list by taking the specified columns and applying the list of functions. This returns a ``MultiIndex`` for the columns. .. ipython:: python df.groupby('A').agg({'B': 'sum', 'C': 'min'}) -Here's an example of the first deprecation (1), passing a dict to a grouped ``Series``. This +Here's an example of the first deprecation, passing a dict to a grouped ``Series``. This is a combination aggregation & renaming: .. code-block:: ipython @@ -633,17 +637,18 @@ You can accomplish the same operation, more idiomatically by: .. ipython:: python - df.groupby('A').B.agg(['count']).rename({'count': 'foo'}) + df.groupby('A').B.agg(['count']).rename(columns={'count': 'foo'}) -Here's an example of the second deprecation (2), passing a dict-of-dict to a grouped ``DataFrame``: +Here's an example of the second deprecation, passing a dict-of-dict to a grouped ``DataFrame``: .. code-block:: python In [23]: (df.groupby('A') .agg({'B': {'foo': 'sum'}, 'C': {'bar': 'min'}}) ) - FutureWarning: using a dict with renaming is deprecated and will be removed in a future version + FutureWarning: using a dict with renaming is deprecated and + will be removed in a future version Out[23]: B C @@ -805,7 +810,7 @@ ndarray, you can always convert explicitly using ``np.asarray(idx.hour)``. pd.unique will now be consistent with extension types ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -In prior versions, using ``Series.unique()`` and ``pd.unique(Series)`` on ``Categorical`` and tz-aware +In prior versions, using ``Series.unique()`` and :func:`unique` on ``Categorical`` and tz-aware datatypes would yield different return types. These are now made consistent. (:issue:`15903`) - Datetime tz-aware @@ -884,7 +889,7 @@ in prior versions of pandas. (:issue:`11915`). Partial String Indexing Changes ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -:ref:`DatetimeIndex Partial String Indexing ` now works as exact match, provided that string resolution coincides with index resolution, including a case when both are seconds (:issue:`14826`). See :ref:`Slice vs. Exact Match ` for details. +:ref:`DatetimeIndex Partial String Indexing ` now works as an exact match, provided that string resolution coincides with index resolution, including a case when both are seconds (:issue:`14826`). See :ref:`Slice vs. Exact Match ` for details. .. ipython:: python @@ -1031,7 +1036,7 @@ DataFrame.sort_index changes In certain cases, calling ``.sort_index()`` on a MultiIndexed DataFrame would return the *same* DataFrame without seeming to sort. This would happen with a ``lexsorted``, but non-monotonic levels. (:issue:`15622`, :issue:`15687`, :issue:`14015`, :issue:`13431`) -This is UNCHANGED between versions, but showing for illustration purposes: +This is *unchanged* from prior versions, but shown for illustration purposes: .. ipython:: python @@ -1196,21 +1201,28 @@ HDFStore where string comparison ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ In previous versions most types could be compared to string column in a ``HDFStore`` -usually resulting in an invalid comparsion. These comparisions will now raise a +usually resulting in an invalid comparsion, returning an empty result frame. These comparisions will now raise a ``TypeError`` (:issue:`15492`) -New Behavior: +.. ipython:: python + + df = pd.DataFrame({'unparsed_date': ['2014-01-01', '2014-01-01']}) + df.to_hdf('store.h5', 'key', format='table', data_columns=True) + df.dtypes + +Previous Behavior: .. code-block:: ipython - In [15]: df = pd.DataFrame({'unparsed_date': ['2014-01-01', '2014-01-01']}) + In [4]: pd.read_hdf('store.h5', 'key', where='unparsed_date > ts') + File "", line 1 + (unparsed_date > 1970-01-01 00:00:01.388552400) + ^ + SyntaxError: invalid token - In [16]: df.dtypes - Out[16]: - unparsed_date object - dtype: object +New Behavior: - In [17]: df.to_hdf('store.h5', 'key', format='table', data_columns=True) +.. code-block:: ipython In [18]: ts = pd.Timestamp('2014-01-01') @@ -1218,6 +1230,12 @@ New Behavior: TypeError: Cannot compare 2014-01-01 00:00:00 of type to string column +.. ipython:: python + :suppress: + + import os + os.remove('store.h5') + .. _whatsnew_0200.api_breaking.index_order: Index.intersection and inner join now preserve the order of the left Index From 2d9451ddc36c194f1d65dab7568901c9ea998002 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Fri, 14 Apr 2017 19:55:19 +0000 Subject: [PATCH 390/933] DEPR: more deprecation warnings (#16001) --- pandas/tests/io/test_pytables.py | 19 +++++++++++-------- pandas/tests/io/test_sql.py | 8 +++++--- 2 files changed, 16 insertions(+), 11 deletions(-) diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index 9908a320a6646..2df8872e23616 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -150,20 +150,23 @@ class TestHDFStore(Base, tm.TestCase): def test_factory_fun(self): path = create_tempfile(self.path) try: - with get_store(path) as tbl: - raise ValueError('blah') + with catch_warnings(record=True): + with get_store(path) as tbl: + raise ValueError('blah') except ValueError: pass finally: safe_remove(path) try: - with get_store(path) as tbl: - tbl['a'] = tm.makeDataFrame() + with catch_warnings(record=True): + with get_store(path) as tbl: + tbl['a'] = tm.makeDataFrame() - with get_store(path) as tbl: - self.assertEqual(len(tbl), 1) - self.assertEqual(type(tbl['a']), DataFrame) + with catch_warnings(record=True): + with get_store(path) as tbl: + self.assertEqual(len(tbl), 1) + self.assertEqual(type(tbl['a']), DataFrame) finally: safe_remove(self.path) @@ -348,7 +351,7 @@ def test_api_default_format(self): pandas.set_option('io.hdf.default_format', 'fixed') df.to_hdf(path, 'df') - with get_store(path) as store: + with HDFStore(path) as store: self.assertFalse(store.get_storer('df').is_table) self.assertRaises(ValueError, df.to_hdf, path, 'df2', append=True) diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 5318e8532c58e..b4e8d6a3b972c 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -18,6 +18,7 @@ """ from __future__ import print_function +from warnings import catch_warnings import pytest import unittest import sqlite3 @@ -586,9 +587,10 @@ def test_to_sql_series(self): tm.assert_frame_equal(s.to_frame(), s2) def test_to_sql_panel(self): - panel = tm.makePanel() - self.assertRaises(NotImplementedError, sql.to_sql, panel, - 'test_panel', self.conn) + with catch_warnings(record=True): + panel = tm.makePanel() + self.assertRaises(NotImplementedError, sql.to_sql, panel, + 'test_panel', self.conn) def test_roundtrip(self): sql.to_sql(self.test_frame1, 'test_frame_roundtrip', From 85a9f8c2d5218c92a042bc7fdafe619758bfe31c Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Fri, 14 Apr 2017 21:31:31 +0000 Subject: [PATCH 391/933] TST: test addl feather dtypes (#16004) --- pandas/tests/io/test_feather.py | 38 +++++++++++++++++---------------- 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/pandas/tests/io/test_feather.py b/pandas/tests/io/test_feather.py index 6e2c28a0f68de..3fad2637ef057 100644 --- a/pandas/tests/io/test_feather.py +++ b/pandas/tests/io/test_feather.py @@ -8,23 +8,18 @@ from pandas.io.feather_format import to_feather, read_feather from feather import FeatherError -import pandas.util.testing as tm from pandas.util.testing import assert_frame_equal, ensure_clean -class TestFeather(tm.TestCase): - - def setUp(self): - pass +class TestFeather(object): def check_error_on_write(self, df, exc): # check that we are raising the exception # on writing - def f(): + with pytest.raises(exc): with ensure_clean() as path: to_feather(df, path) - self.assertRaises(exc, f) def check_round_trip(self, df): @@ -41,17 +36,21 @@ def test_error(self): def test_basic(self): - df = pd.DataFrame({'a': list('abc'), - 'b': list(range(1, 4)), - 'c': np.arange(3, 6).astype('u1'), - 'd': np.arange(4.0, 7.0, dtype='float64'), - 'e': [True, False, True], - 'f': pd.Categorical(list('abc')), - 'g': pd.date_range('20130101', periods=3), - 'h': pd.date_range('20130101', periods=3, - tz='US/Eastern'), - 'i': pd.date_range('20130101', periods=3, - freq='ns')}) + df = pd.DataFrame({'string': list('abc'), + 'int': list(range(1, 4)), + 'uint': np.arange(3, 6).astype('u1'), + 'float': np.arange(4.0, 7.0, dtype='float64'), + 'float_with_null': [1., np.nan, 3], + 'bool': [True, False, True], + 'bool_with_null': [True, np.nan, False], + 'cat': pd.Categorical(list('abc')), + 'dt': pd.date_range('20130101', periods=3), + 'dttz': pd.date_range('20130101', periods=3, + tz='US/Eastern'), + 'dt_with_null': [pd.Timestamp('20130101'), pd.NaT, + pd.Timestamp('20130103')], + 'dtns': pd.date_range('20130101', periods=3, + freq='ns')}) self.check_round_trip(df) @@ -80,6 +79,9 @@ def test_unsupported(self): df = pd.DataFrame({'a': pd.period_range('2013', freq='M', periods=3)}) self.check_error_on_write(df, ValueError) + df = pd.DataFrame({'a': pd.timedelta_range('1 day', periods=3)}) + self.check_error_on_write(df, FeatherError) + # non-strings df = pd.DataFrame({'a': ['a', 1, 2.0]}) self.check_error_on_write(df, ValueError) From 9c56098a5fdf2ce1bec989671a077722e64d647f Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sat, 15 Apr 2017 02:02:51 +0000 Subject: [PATCH 392/933] TST: 32bit compat for interval get_indexer (#16006) --- pandas/tests/indexes/test_interval.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/pandas/tests/indexes/test_interval.py b/pandas/tests/indexes/test_interval.py index 25ca961895ca3..79b6ff2e7a2a7 100644 --- a/pandas/tests/indexes/test_interval.py +++ b/pandas/tests/indexes/test_interval.py @@ -413,24 +413,24 @@ def test_get_loc_interval(self): def test_get_indexer(self): actual = self.index.get_indexer([-1, 0, 0.5, 1, 1.5, 2, 3]) - expected = np.array([-1, -1, 0, 0, 1, 1, -1], dtype='int64') + expected = np.array([-1, -1, 0, 0, 1, 1, -1], dtype='intp') self.assert_numpy_array_equal(actual, expected) actual = self.index.get_indexer(self.index) - expected = np.array([0, 1], dtype='int64') + expected = np.array([0, 1], dtype='intp') self.assert_numpy_array_equal(actual, expected) index = IntervalIndex.from_breaks([0, 1, 2], closed='left') actual = index.get_indexer([-1, 0, 0.5, 1, 1.5, 2, 3]) - expected = np.array([-1, 0, 0, 1, 1, -1, -1], dtype='int64') + expected = np.array([-1, 0, 0, 1, 1, -1, -1], dtype='intp') self.assert_numpy_array_equal(actual, expected) actual = self.index.get_indexer(index[:1]) - expected = np.array([0], dtype='int64') + expected = np.array([0], dtype='intp') self.assert_numpy_array_equal(actual, expected) actual = self.index.get_indexer(index) - expected = np.array([-1, 1], dtype='int64') + expected = np.array([-1, 1], dtype='intp') self.assert_numpy_array_equal(actual, expected) def test_get_indexer_subintervals(self): @@ -439,21 +439,21 @@ def test_get_indexer_subintervals(self): # return indexers for wholly contained subintervals target = IntervalIndex.from_breaks(np.linspace(0, 2, 5)) actual = self.index.get_indexer(target) - expected = np.array([0, 0, 1, 1], dtype='int64') + expected = np.array([0, 0, 1, 1], dtype='p') self.assert_numpy_array_equal(actual, expected) target = IntervalIndex.from_breaks([0, 0.67, 1.33, 2]) actual = self.index.get_indexer(target) - expected = np.array([0, 0, 1, 1], dtype='int64') + expected = np.array([0, 0, 1, 1], dtype='intp') self.assert_numpy_array_equal(actual, expected) actual = self.index.get_indexer(target[[0, -1]]) - expected = np.array([0, 1], dtype='int64') + expected = np.array([0, 1], dtype='intp') self.assert_numpy_array_equal(actual, expected) target = IntervalIndex.from_breaks([0, 0.33, 0.67, 1], closed='left') actual = self.index.get_indexer(target) - expected = np.array([0, 0, 0], dtype='int64') + expected = np.array([0, 0, 0], dtype='intp') self.assert_numpy_array_equal(actual, expected) def test_contains(self): @@ -505,7 +505,7 @@ def test_non_contiguous(self): index = IntervalIndex.from_tuples([(0, 1), (2, 3)]) target = [0.5, 1.5, 2.5] actual = index.get_indexer(target) - expected = np.array([0, -1, 1], dtype='int64') + expected = np.array([0, -1, 1], dtype='intp') self.assert_numpy_array_equal(actual, expected) self.assertNotIn(1.5, index) @@ -655,7 +655,7 @@ def test_datetime(self): target = pd.date_range('1999-12-31T12:00', periods=7, freq='12H') actual = idx.get_indexer(target) - expected = np.array([-1, -1, 0, 0, 1, 1, -1], dtype='int64') + expected = np.array([-1, -1, 0, 0, 1, 1, -1], dtype='intp') self.assert_numpy_array_equal(actual, expected) def test_append(self): @@ -779,9 +779,9 @@ def test_get_loc_closed(self): np.array([0], dtype='int64')) def test_get_indexer_closed(self): - x = np.arange(1000, dtype='int64') + x = np.arange(1000, dtype='intp') found = x - not_found = (-1 * np.ones(1000)).astype('int64') + not_found = (-1 * np.ones(1000)).astype('intp') for leaf_size in [1, 10, 100, 10000]: for closed in ['left', 'right', 'both', 'neither']: tree = IntervalTree(x, x + 0.5, closed=closed, From 61d84dbf161f72081165d170a36978d2d942e19d Mon Sep 17 00:00:00 2001 From: Daniel Himmelstein Date: Sat, 15 Apr 2017 09:46:43 -0400 Subject: [PATCH 393/933] Support dicts with default values in series.map (#16002) * series.map: support dicts with defaults closes #15999 --- doc/source/whatsnew/v0.20.0.txt | 4 +-- pandas/core/series.py | 41 ++++++++++++++++++++++++------ pandas/tests/series/test_apply.py | 42 ++++++++++++++++++++++++++++++- 3 files changed, 75 insertions(+), 12 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 133757b131312..089c4f59445e3 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -449,10 +449,7 @@ Other Enhancements - Integration with the ``feather-format``, including a new top-level ``pd.read_feather()`` and ``DataFrame.to_feather()`` method, see :ref:`here `. - ``Series.str.replace()`` now accepts a callable, as replacement, which is passed to ``re.sub`` (:issue:`15055`) - ``Series.str.replace()`` now accepts a compiled regular expression as a pattern (:issue:`15446`) - - - ``Series.sort_index`` accepts parameters ``kind`` and ``na_position`` (:issue:`13589`, :issue:`14444`) - - ``DataFrame`` has gained a ``nunique()`` method to count the distinct values over an axis (:issue:`14336`). - ``DataFrame`` has gained a ``melt()`` method, equivalent to ``pd.melt()``, for unpivoting from a wide to long format (:issue:`12640`). - ``DataFrame.groupby()`` has gained a ``.nunique()`` method to count the distinct values for all columns within each group (:issue:`14336`, :issue:`15197`). @@ -1302,6 +1299,7 @@ Other API Changes - ``CParserError`` has been renamed to ``ParserError`` in ``pd.read_csv()`` and will be removed in the future (:issue:`12665`) - ``SparseArray.cumsum()`` and ``SparseSeries.cumsum()`` will now always return ``SparseArray`` and ``SparseSeries`` respectively (:issue:`12855`) - ``DataFrame.applymap()`` with an empty ``DataFrame`` will return a copy of the empty ``DataFrame`` instead of a ``Series`` (:issue:`8222`) +- ``Series.map()`` now respects default values of dictionary subclasses with a ``__missing__`` method, such as ``collections.Counter`` (:issue:`15999`) - ``.loc`` has compat with ``.ix`` for accepting iterators, and NamedTuples (:issue:`15120`) - ``interpolate()`` and ``fillna()`` will raise a ``ValueError`` if the ``limit`` keyword argument is not greater than 0. (:issue:`9217`) - ``pd.read_csv()`` will now issue a ``ParserWarning`` whenever there are conflicting values provided by the ``dialect`` parameter and the user (:issue:`14898`) diff --git a/pandas/core/series.py b/pandas/core/series.py index 3305f0b6c439e..7f8a97af99490 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2079,8 +2079,8 @@ def map(self, arg, na_action=None): two bar three baz - Mapping a dictionary keys on the index labels works similar as - with a `Series`: + If `arg` is a dictionary, return a new Series with values converted + according to the dictionary's mapping: >>> z = {1: 'A', 2: 'B', 3: 'C'} @@ -2094,16 +2094,14 @@ def map(self, arg, na_action=None): >>> s = pd.Series([1, 2, 3, np.nan]) - >>> s2 = s.map(lambda x: 'this is a string {}'.format(x), - na_action=None) + >>> s2 = s.map('this is a string {}'.format, na_action=None) 0 this is a string 1.0 1 this is a string 2.0 2 this is a string 3.0 3 this is a string nan dtype: object - >>> s3 = s.map(lambda x: 'this is a string {}'.format(x), - na_action='ignore') + >>> s3 = s.map('this is a string {}'.format, na_action='ignore') 0 this is a string 1.0 1 this is a string 2.0 2 this is a string 3.0 @@ -2115,6 +2113,23 @@ def map(self, arg, na_action=None): Series.apply: For applying more complex functions on a Series DataFrame.apply: Apply a function row-/column-wise DataFrame.applymap: Apply a function elementwise on a whole DataFrame + + Notes + ----- + When `arg` is a dictionary, values in Series that are not in the + dictionary (as keys) are converted to ``NaN``. However, if the + dictionary is a ``dict`` subclass that defines ``__missing__`` (i.e. + provides a method for default values), then this default is used + rather than ``NaN``: + + >>> from collections import Counter + >>> counter = Counter() + >>> counter['bar'] += 1 + >>> y.map(counter) + 1 0 + 2 1 + 3 0 + dtype: int64 """ if is_extension_type(self.dtype): @@ -2132,13 +2147,23 @@ def map_f(values, f): else: map_f = lib.map_infer - if isinstance(arg, (dict, Series)): - if isinstance(arg, dict): + if isinstance(arg, dict): + if hasattr(arg, '__missing__'): + # If a dictionary subclass defines a default value method, + # convert arg to a lookup function (GH #15999). + dict_with_default = arg + arg = lambda x: dict_with_default[x] + else: + # Dictionary does not have a default. Thus it's safe to + # convert to an indexed series for efficiency. arg = self._constructor(arg, index=arg.keys()) + if isinstance(arg, Series): + # arg is a Series indexer = arg.index.get_indexer(values) new_values = algorithms.take_1d(arg._values, indexer) else: + # arg is a function new_values = map_f(values, arg) return self._constructor(new_values, diff --git a/pandas/tests/series/test_apply.py b/pandas/tests/series/test_apply.py index 524167602c249..a4a49e3aeb826 100644 --- a/pandas/tests/series/test_apply.py +++ b/pandas/tests/series/test_apply.py @@ -1,7 +1,7 @@ # coding=utf-8 # pylint: disable-msg=E1101,W0612 -from collections import OrderedDict +from collections import Counter, defaultdict, OrderedDict import numpy as np import pandas as pd @@ -411,6 +411,46 @@ def test_map_dict_with_tuple_keys(self): tm.assert_series_equal(df['labels'], df['expected_labels'], check_names=False) + def test_map_counter(self): + s = Series(['a', 'b', 'c'], index=[1, 2, 3]) + counter = Counter() + counter['b'] = 5 + counter['c'] += 1 + result = s.map(counter) + expected = Series([0, 5, 1], index=[1, 2, 3]) + assert_series_equal(result, expected) + + def test_map_defaultdict(self): + s = Series([1, 2, 3], index=['a', 'b', 'c']) + default_dict = defaultdict(lambda: 'blank') + default_dict[1] = 'stuff' + result = s.map(default_dict) + expected = Series(['stuff', 'blank', 'blank'], index=['a', 'b', 'c']) + assert_series_equal(result, expected) + + def test_map_dict_subclass_with_missing(self): + """ + Test Series.map with a dictionary subclass that defines __missing__, + i.e. sets a default value (GH #15999). + """ + class DictWithMissing(dict): + def __missing__(self, key): + return 'missing' + s = Series([1, 2, 3]) + dictionary = DictWithMissing({3: 'three'}) + result = s.map(dictionary) + expected = Series(['missing', 'missing', 'three']) + assert_series_equal(result, expected) + + def test_map_dict_subclass_without_missing(self): + class DictWithoutMissing(dict): + pass + s = Series([1, 2, 3]) + dictionary = DictWithoutMissing({3: 'three'}) + result = s.map(dictionary) + expected = Series([np.nan, np.nan, 'three']) + assert_series_equal(result, expected) + def test_map_box(self): vals = [pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-02')] s = pd.Series(vals) From 413e2c64d4cfa17c331052e8d0a2b78551cdb53e Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Sat, 15 Apr 2017 08:59:00 -0500 Subject: [PATCH 394/933] ENH: Style blocks (#15954) --- .gitignore | 1 + MANIFEST.in | 1 + ci/requirements-3.5_DOC.run | 1 + doc/source/style.ipynb | 286 ++++++++++++++++++----------- doc/source/template_structure.html | 60 ++++++ doc/source/whatsnew/v0.20.0.txt | 3 + pandas/formats/style.py | 122 ++++++------ pandas/formats/templates/html.tpl | 70 +++++++ pandas/io/api.py | 17 ++ pandas/tests/api/test_api.py | 3 +- pandas/tests/formats/test_style.py | 34 +++- pandas/util/importing.py | 10 + setup.py | 3 +- 13 files changed, 447 insertions(+), 164 deletions(-) create mode 100644 doc/source/template_structure.html create mode 100644 pandas/formats/templates/html.tpl create mode 100644 pandas/util/importing.py diff --git a/.gitignore b/.gitignore index a509fcf736ea8..c953020f59342 100644 --- a/.gitignore +++ b/.gitignore @@ -103,3 +103,4 @@ doc/source/index.rst doc/build/html/index.html # Windows specific leftover: doc/tmp.sv +doc/source/templates/ diff --git a/MANIFEST.in b/MANIFEST.in index b7a7e6039ac9a..31de3466cb357 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -25,3 +25,4 @@ global-exclude *.png # recursive-include LICENSES * include versioneer.py include pandas/_version.py +include pandas/formats/templates/*.tpl diff --git a/ci/requirements-3.5_DOC.run b/ci/requirements-3.5_DOC.run index 7ed60758612bb..9647ab53ab835 100644 --- a/ci/requirements-3.5_DOC.run +++ b/ci/requirements-3.5_DOC.run @@ -1,5 +1,6 @@ ipython ipykernel +ipywidgets sphinx nbconvert nbformat diff --git a/doc/source/style.ipynb b/doc/source/style.ipynb index 2b8bf35a913c1..06763b2a5e741 100644 --- a/doc/source/style.ipynb +++ b/doc/source/style.ipynb @@ -54,7 +54,7 @@ }, "outputs": [], "source": [ - "import matplotlib\n", + "import matplotlib.pyplot\n", "# We have this here to trigger matplotlib's font cache stuff.\n", "# This cell is hidden from the output" ] @@ -87,9 +87,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "df.style" @@ -107,9 +105,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "df.style.highlight_null().render().split('\\n')[:10]" @@ -160,9 +156,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "s = df.style.applymap(color_negative_red)\n", @@ -208,9 +202,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "df.style.apply(highlight_max)" @@ -234,9 +226,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "df.style.\\\n", @@ -290,9 +280,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "df.style.apply(highlight_max, color='darkorange', axis=None)" @@ -340,9 +328,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "df.style.apply(highlight_max, subset=['B', 'C', 'D'])" @@ -358,9 +344,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "df.style.applymap(color_negative_red,\n", @@ -393,9 +377,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "df.style.format(\"{:.2%}\")" @@ -411,9 +393,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "df.style.format({'B': \"{:0<4.0f}\", 'D': '{:+.2f}'})" @@ -429,9 +409,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "df.style.format({\"B\": lambda x: \"±{:.2f}\".format(abs(x))})" @@ -454,9 +432,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "df.style.highlight_null(null_color='red')" @@ -472,9 +448,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "import seaborn as sns\n", @@ -495,9 +469,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "# Uses the full color range\n", @@ -507,9 +479,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "# Compress the color range\n", @@ -529,9 +499,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "df.style.bar(subset=['A', 'B'], color='#d65f5f')" @@ -547,9 +515,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "df.style.highlight_max(axis=0)" @@ -558,9 +524,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "df.style.highlight_min(axis=0)" @@ -576,9 +540,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "df.style.set_properties(**{'background-color': 'black',\n", @@ -603,9 +565,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "df2 = -df\n", @@ -616,9 +576,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "style2 = df2.style\n", @@ -671,9 +629,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "with pd.option_context('display.precision', 2):\n", @@ -693,9 +649,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "df.style\\\n", @@ -728,9 +682,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "df.style.set_caption('Colormaps, with a caption.')\\\n", @@ -756,9 +708,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "from IPython.display import HTML\n", @@ -854,9 +804,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "from IPython.html import widgets\n", @@ -892,16 +840,14 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "np.random.seed(25)\n", "cmap = cmap=sns.diverging_palette(5, 250, as_cmap=True)\n", - "df = pd.DataFrame(np.random.randn(20, 25)).cumsum()\n", + "bigdf = pd.DataFrame(np.random.randn(20, 25)).cumsum()\n", "\n", - "df.style.background_gradient(cmap, axis=1)\\\n", + "bigdf.style.background_gradient(cmap, axis=1)\\\n", " .set_properties(**{'max-width': '80px', 'font-size': '1pt'})\\\n", " .set_caption(\"Hover to magify\")\\\n", " .set_precision(2)\\\n", @@ -924,29 +870,157 @@ "\n", "### Subclassing\n", "\n", - "This section contains a bit of information about the implementation of `Styler`.\n", - "Since the feature is so new all of this is subject to change, even more so than the end-use API.\n", - "\n", - "As users apply styles (via `.apply`, `.applymap` or one of the builtins), we don't actually calculate anything.\n", - "Instead, we append functions and arguments to a list `self._todo`.\n", - "When asked (typically in `.render` we'll walk through the list and execute each function (this is in `self._compute()`.\n", - "These functions update an internal `defaultdict(list)`, `self.ctx` which maps DataFrame row / column positions to CSS attribute, value pairs.\n", - "\n", - "We take the extra step through `self._todo` so that we can export styles and set them on other `Styler`s.\n", - "\n", - "Rendering uses [Jinja](http://jinja.pocoo.org/) templates.\n", - "The `.translate` method takes `self.ctx` and builds another dictionary ready to be passed into `Styler.template.render`, the Jinja template.\n", - "\n", - "\n", - "### Alternate templates\n", - "\n", - "We've used [Jinja](http://jinja.pocoo.org/) templates to build up the HTML.\n", - "The template is stored as a class variable ``Styler.template.``. Subclasses can override that.\n", + "If the default template doesn't quite suit your needs, you can subclass Styler and extend or override the template.\n", + "We'll show an example of extending the default template to insert a custom header before each table." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "from jinja2 import Environment, ChoiceLoader, FileSystemLoader\n", + "from IPython.display import HTML\n", + "from pandas.io.api import Styler" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%mkdir templates" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This next cell writes the custom template.\n", + "We extend the template `html.tpl`, which comes with pandas." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%file templates/myhtml.tpl\n", + "{% extends \"html.tpl\" %}\n", + "{% block table %}\n", + "

{{ table_title|default(\"My Table\") }}

\n", + "{{ super() }}\n", + "{% endblock table %}" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now that we've created a template, we need to set up a subclass of ``pd.Styler`` that\n", + "knows about it." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "class MyStyler(pd.Styler):\n", + " env = Environment(\n", + " loader=ChoiceLoader([\n", + " FileSystemLoader(\"templates\"), # contains ours\n", + " pd.Styler.loader, # the default\n", + " ])\n", + " )\n", + " template = env.get_template(\"myhtml.tpl\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Notice that we include the original loader in our environment's loader.\n", + "That's because we extend the original template, so the Jinja environment needs\n", + "to be able to find it.\n", "\n", - "```python\n", - "class CustomStyle(Styler):\n", - " template = Template(\"\"\"...\"\"\")\n", - "```" + "Now we can use that custom styler. It's `__init__` takes a DataFrame." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "MyStyler(df)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Our custom template accepts a `table_title` keyword. We can provide the value in the `.render` method." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "HTML(MyStyler(df).render(table_title=\"Extending Example\"))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "For convenience, we provide the `Styler.from_custom_template` method that does the same as the custom subclass." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "EasyStyler = pd.Styler.from_custom_template(\"templates\", \"myhtml.tpl\")\n", + "EasyStyler(df)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Here's the template structure:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "with open(\"template_structure.html\") as f:\n", + " structure = f.read()\n", + " \n", + "HTML(structure)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "See the template in the [GitHub repo](https://github.com/pandas-dev/pandas) for more details." ] } ], diff --git a/doc/source/template_structure.html b/doc/source/template_structure.html new file mode 100644 index 0000000000000..81dbe2b7d0217 --- /dev/null +++ b/doc/source/template_structure.html @@ -0,0 +1,60 @@ + + + +
before_style
+
style +
<style type="text/css">
+
table_styles
+
before_cellstyle
+
cellstyle
+
</style>
+
+ +
before_table
+ +
table +
<table ...>
+
caption
+ +
thead +
before_head_rows
+
head_tr (loop over headers)
+
after_head_rows
+
+ +
tbody +
before_rows
+
tr (loop over data rows)
+
after_rows
+
+
</table>
+
+ +
after_table
diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 089c4f59445e3..821b178c1cd17 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -483,6 +483,9 @@ Other Enhancements - ``DataFrame.to_excel()`` has a new ``freeze_panes`` parameter to turn on Freeze Panes when exporting to Excel (:issue:`15160`) - ``pd.read_html()`` will parse multiple header rows, creating a multiindex header. (:issue:`13434`). - HTML table output skips ``colspan`` or ``rowspan`` attribute if equal to 1. (:issue:`15403`) +- ``pd.io.api.Styler`` template now has blocks for easier extension, :ref:`see the example notebook ` (:issue:`15649`) +- ``pd.io.api.Styler.render`` now accepts ``**kwargs`` to allow user-defined variables in the template (:issue:`15649`) + - ``TimedeltaIndex`` now has a custom datetick formatter specifically designed for nanosecond level precision (:issue:`8711`) - ``pd.types.concat.union_categoricals`` gained the ``ignore_ordered`` argument to allow ignoring the ordered attribute of unioned categoricals (:issue:`13410`). See the :ref:`categorical union docs ` for more information. diff --git a/pandas/formats/style.py b/pandas/formats/style.py index e712010a8b4f2..af02077bd5b41 100644 --- a/pandas/formats/style.py +++ b/pandas/formats/style.py @@ -10,7 +10,9 @@ from collections import defaultdict, MutableMapping try: - from jinja2 import Template + from jinja2 import ( + PackageLoader, Environment, ChoiceLoader, FileSystemLoader + ) except ImportError: msg = "pandas.Styler requires jinja2. "\ "Please install with `conda install Jinja2`\n"\ @@ -68,7 +70,9 @@ class Styler(object): Attributes ---------- - template: Jinja Template + env : Jinja2 Environment + template: Jinja2 Template + loader : Jinja2 Loader Notes ----- @@ -103,56 +107,12 @@ class Styler(object): -------- pandas.DataFrame.style """ - template = Template(""" - - - - {% if caption %} - - {% endif %} - - - {% for r in head %} - - {% for c in r %} - {% if c.is_visible != False %} - <{{c.type}} class="{{c.class}}" {{ c.attributes|join(" ") }}> - {{c.value}} - {% endif %} - {% endfor %} - - {% endfor %} - - - {% for r in body %} - - {% for c in r %} - {% if c.is_visible != False %} - <{{c.type}} id="T_{{uuid}}{{c.id}}" - class="{{c.class}}" {{ c.attributes|join(" ") }}> - {{ c.display_value }} - {% endif %} - {% endfor %} - - {% endfor %} - -
{{caption}}
- """) + loader = PackageLoader("pandas", "formats/templates") + env = Environment( + loader=loader, + trim_blocks=True, + ) + template = env.get_template("html.tpl") def __init__(self, data, precision=None, table_styles=None, uuid=None, caption=None, table_attributes=None): @@ -400,12 +360,22 @@ def format(self, formatter, subset=None): self._display_funcs[(i, j)] = formatter return self - def render(self): - """ + def render(self, **kwargs): + r""" Render the built up styles to HTML .. versionadded:: 0.17.1 + Parameters + ---------- + **kwargs: + Any additional keyword arguments are passed through + to ``self.template.render``. This is useful when you + need to provide additional variables for a custom + template. + + .. versionadded:: 0.20 + Returns ------- rendered: str @@ -418,8 +388,22 @@ def render(self): last item in a Notebook cell. When calling ``Styler.render()`` directly, wrap the result in ``IPython.display.HTML`` to view the rendered HTML in the notebook. + + Pandas uses the following keys in render. Arguments passed + in ``**kwargs`` take precedence, so think carefuly if you want + to override them: + + * head + * cellstyle + * body + * uuid + * precision + * table_styles + * caption + * table_attributes """ self._compute() + # TODO: namespace all the pandas keys d = self._translate() # filter out empty styles, every cell will have a class # but the list of props may just be [['', '']]. @@ -427,6 +411,7 @@ def render(self): trimmed = [x for x in d['cellstyle'] if any(any(y) for y in x['props'])] d['cellstyle'] = trimmed + d.update(kwargs) return self.template.render(**d) def _update_ctx(self, attrs): @@ -961,6 +946,35 @@ def _highlight_extrema(data, color='yellow', max_=True): return pd.DataFrame(np.where(extrema, attr, ''), index=data.index, columns=data.columns) + @classmethod + def from_custom_template(cls, searchpath, name): + """ + Factory function for creating a subclass of ``Styler`` + with a custom template and Jinja environment. + + Parameters + ---------- + searchpath : str or list + Path or paths of directories containing the templates + name : str + Name of your custom template to use for rendering + + Returns + ------- + MyStyler : subclass of Styler + has the correct ``env`` and ``template`` class attributes set. + """ + loader = ChoiceLoader([ + FileSystemLoader(searchpath), + cls.loader, + ]) + + class MyStyler(cls): + env = Environment(loader=loader) + template = env.get_template(name) + + return MyStyler + def _is_visible(idx_row, idx_col, lengths): """ diff --git a/pandas/formats/templates/html.tpl b/pandas/formats/templates/html.tpl new file mode 100644 index 0000000000000..706db1ecdd961 --- /dev/null +++ b/pandas/formats/templates/html.tpl @@ -0,0 +1,70 @@ +{# Update the template_structure.html document too #} +{%- block before_style -%}{%- endblock before_style -%} +{% block style %} + +{%- endblock style %} +{%- block before_table %}{% endblock before_table %} +{%- block table %} + +{%- block caption %} +{%- if caption -%} + +{%- endif -%} +{%- endblock caption %} +{%- block thead %} + + {%- block before_head_rows %}{% endblock %} + {%- for r in head %} + {%- block head_tr scoped %} + + {%- for c in r %} + {%- if c.is_visible != False %} + <{{ c.type }} class="{{c.class}}" {{ c.attributes|join(" ") }}>{{c.value}} + {%- endif %} + {%- endfor %} + + {%- endblock head_tr %} + {%- endfor %} + {%- block after_head_rows %}{% endblock %} + +{%- endblock thead %} +{%- block tbody %} + + {%- block before_rows %}{%- endblock before_rows %} + {%- for r in body %} + {%- block tr scoped %} + + {%- for c in r %} + {%- if c.is_visible != False %} + <{{ c.type }} id="T_{{ uuid }}{{ c.id }}" class="{{ c.class }}" {{ c.attributes|join(" ") }}>{{ c.display_value }} + {%- endif %} + {%- endfor %} + + {%- endblock tr %} + {%- endfor %} + {%- block after_rows %}{%- endblock after_rows %} + +{%- endblock tbody %} +
{{caption}}
+{%- endblock table %} +{%- block after_table %}{% endblock after_table %} diff --git a/pandas/io/api.py b/pandas/io/api.py index e312e7bc2f300..4744d41472ff1 100644 --- a/pandas/io/api.py +++ b/pandas/io/api.py @@ -17,6 +17,23 @@ from pandas.io.pickle import read_pickle, to_pickle from pandas.io.packers import read_msgpack, to_msgpack from pandas.io.gbq import read_gbq +try: + from pandas.formats.style import Styler +except ImportError: + from pandas.compat import add_metaclass as _add_metaclass + from pandas.util.importing import _UnSubclassable + + # We want to *not* raise an ImportError upon importing this module + # We *do* want to raise an ImportError with a custom message + # when the class is instantiated or subclassed. + @_add_metaclass(_UnSubclassable) + class Styler(object): + msg = ("pandas.io.api.Styler requires jinja2. " + "Please install with `conda install jinja2` " + "or `pip install jinja2`") + def __init__(self, *args, **kargs): + raise ImportError(self.msg) + # deprecation, xref #13790 def Term(*args, **kwargs): diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index a15d7cf26cbea..6d92898042b23 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -49,7 +49,8 @@ class TestPDApi(Base, tm.TestCase): 'Period', 'PeriodIndex', 'RangeIndex', 'UInt64Index', 'Series', 'SparseArray', 'SparseDataFrame', 'SparseSeries', 'TimeGrouper', 'Timedelta', - 'TimedeltaIndex', 'Timestamp', 'Interval', 'IntervalIndex'] + 'TimedeltaIndex', 'Timestamp', 'Interval', 'IntervalIndex', + 'Styler'] # these are already deprecated; awaiting removal deprecated_classes = ['WidePanel', 'Panel4D', diff --git a/pandas/tests/formats/test_style.py b/pandas/tests/formats/test_style.py index 44af0b8ebb085..08f8f2f32763d 100644 --- a/pandas/tests/formats/test_style.py +++ b/pandas/tests/formats/test_style.py @@ -1,6 +1,7 @@ -import pytest - import copy +import textwrap + +import pytest import numpy as np import pandas as pd from pandas import DataFrame @@ -717,3 +718,32 @@ def test_background_gradient(self): result = (df.style.background_gradient(subset=pd.IndexSlice[1, 'A']) ._compute().ctx) self.assertEqual(result[(1, 0)], ['background-color: #fff7fb']) + + +def test_block_names(): + # catch accidental removal of a block + expected = { + 'before_style', 'style', 'table_styles', 'before_cellstyle', + 'cellstyle', 'before_table', 'table', 'caption', 'thead', 'tbody', + 'after_table', 'before_head_rows', 'head_tr', 'after_head_rows', + 'before_rows', 'tr', 'after_rows', + } + result = set(Styler.template.blocks) + assert result == expected + + +def test_from_custom_template(tmpdir): + p = tmpdir.mkdir("templates").join("myhtml.tpl") + p.write(textwrap.dedent("""\ + {% extends "html.tpl" %} + {% block table %} +

{{ table_title|default("My Table") }}

+ {{ super() }} + {% endblock table %}""")) + result = Styler.from_custom_template(str(tmpdir.join('templates')), + 'myhtml.tpl') + assert issubclass(result, Styler) + assert result.env is not Styler.env + assert result.template is not Styler.template + styler = result(pd.DataFrame({"A": [1, 2]})) + assert styler.render() diff --git a/pandas/util/importing.py b/pandas/util/importing.py new file mode 100644 index 0000000000000..9323fb97baac0 --- /dev/null +++ b/pandas/util/importing.py @@ -0,0 +1,10 @@ +class _UnSubclassable(type): + """ + Metaclass to raise an ImportError when subclassed + """ + msg = "" + + def __init__(cls, name, bases, clsdict): + if len(cls.mro()) > 2: + raise ImportError(cls.msg) + super(_UnSubclassable, cls).__init__(name, bases, clsdict) diff --git a/setup.py b/setup.py index 6707af7eb0908..d8ee52f9b4f43 100755 --- a/setup.py +++ b/setup.py @@ -704,7 +704,8 @@ def pxd(name): 'data/html_encoding/*.html', 'json/data/*.json'], 'pandas.tests.tools': ['data/*.csv'], - 'pandas.tests.tseries': ['data/*.pickle'] + 'pandas.tests.tseries': ['data/*.pickle'], + 'pandas.formats': ['templates/*.tpl'] }, ext_modules=extensions, maintainer_email=EMAIL, From 7993fc81098936a893ec0dc0d84d41cfe4eb4218 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Sat, 15 Apr 2017 18:02:25 +0200 Subject: [PATCH 395/933] CLN/API: move plotting funcs to pandas.plotting (#16005) closes #12548 --- doc/source/visualization.rst | 28 +- doc/source/whatsnew/v0.20.0.txt | 26 + pandas/__init__.py | 10 +- pandas/core/config_init.py | 2 +- pandas/core/frame.py | 10 +- pandas/core/groupby.py | 2 +- pandas/core/series.py | 2 +- pandas/plotting/__init__.py | 19 + pandas/plotting/_compat.py | 67 + pandas/plotting/_converter.py | 1026 +++++ pandas/plotting/_core.py | 2828 ++++++++++++ pandas/plotting/_misc.py | 573 +++ pandas/plotting/_style.py | 246 + pandas/plotting/_timeseries.py | 339 ++ pandas/plotting/_tools.py | 383 ++ pandas/tests/api/test_api.py | 8 +- pandas/tests/plotting/common.py | 22 +- pandas/tests/plotting/test_boxplot_method.py | 5 +- .../{tseries => plotting}/test_converter.py | 2 +- pandas/tests/plotting/test_datetimelike.py | 6 +- pandas/tests/plotting/test_deprecated.py | 58 + pandas/tests/plotting/test_frame.py | 18 +- pandas/tests/plotting/test_hist_method.py | 15 +- pandas/tests/plotting/test_misc.py | 16 +- pandas/tests/plotting/test_series.py | 43 +- pandas/tools/plotting.py | 4040 +---------------- pandas/tseries/converter.py | 1043 +---- pandas/tseries/plotting.py | 345 +- pandas/util/doctools.py | 2 +- setup.py | 1 + 30 files changed, 5696 insertions(+), 5489 deletions(-) create mode 100644 pandas/plotting/__init__.py create mode 100644 pandas/plotting/_compat.py create mode 100644 pandas/plotting/_converter.py create mode 100644 pandas/plotting/_core.py create mode 100644 pandas/plotting/_misc.py create mode 100644 pandas/plotting/_style.py create mode 100644 pandas/plotting/_timeseries.py create mode 100644 pandas/plotting/_tools.py rename pandas/tests/{tseries => plotting}/test_converter.py (99%) create mode 100644 pandas/tests/plotting/test_deprecated.py diff --git a/doc/source/visualization.rst b/doc/source/visualization.rst index e8998bf6f6f5c..fb799c642131d 100644 --- a/doc/source/visualization.rst +++ b/doc/source/visualization.rst @@ -152,7 +152,7 @@ You can also create these other plots using the methods ``DataFrame.plot.` In addition to these ``kind`` s, there are the :ref:`DataFrame.hist() `, and :ref:`DataFrame.boxplot() ` methods, which use a separate interface. -Finally, there are several :ref:`plotting functions ` in ``pandas.tools.plotting`` +Finally, there are several :ref:`plotting functions ` in ``pandas.plotting`` that take a :class:`Series` or :class:`DataFrame` as an argument. These include @@ -823,7 +823,7 @@ before plotting. Plotting Tools -------------- -These functions can be imported from ``pandas.tools.plotting`` +These functions can be imported from ``pandas.plotting`` and take a :class:`Series` or :class:`DataFrame` as an argument. .. _visualization.scatter_matrix: @@ -834,7 +834,7 @@ Scatter Matrix Plot .. versionadded:: 0.7.3 You can create a scatter plot matrix using the -``scatter_matrix`` method in ``pandas.tools.plotting``: +``scatter_matrix`` method in ``pandas.plotting``: .. ipython:: python :suppress: @@ -843,7 +843,7 @@ You can create a scatter plot matrix using the .. ipython:: python - from pandas.tools.plotting import scatter_matrix + from pandas.plotting import scatter_matrix df = pd.DataFrame(np.random.randn(1000, 4), columns=['a', 'b', 'c', 'd']) @savefig scatter_matrix_kde.png @@ -896,7 +896,7 @@ of the same class will usually be closer together and form larger structures. .. ipython:: python - from pandas.tools.plotting import andrews_curves + from pandas.plotting import andrews_curves data = pd.read_csv('data/iris.data') @@ -918,7 +918,7 @@ represents one data point. Points that tend to cluster will appear closer togeth .. ipython:: python - from pandas.tools.plotting import parallel_coordinates + from pandas.plotting import parallel_coordinates data = pd.read_csv('data/iris.data') @@ -948,7 +948,7 @@ implies that the underlying data are not random. .. ipython:: python - from pandas.tools.plotting import lag_plot + from pandas.plotting import lag_plot plt.figure() @@ -983,7 +983,7 @@ confidence band. .. ipython:: python - from pandas.tools.plotting import autocorrelation_plot + from pandas.plotting import autocorrelation_plot plt.figure() @@ -1016,7 +1016,7 @@ are what constitutes the bootstrap plot. .. ipython:: python - from pandas.tools.plotting import bootstrap_plot + from pandas.plotting import bootstrap_plot data = pd.Series(np.random.rand(1000)) @@ -1048,7 +1048,7 @@ be colored differently. .. ipython:: python - from pandas.tools.plotting import radviz + from pandas.plotting import radviz data = pd.read_csv('data/iris.data') @@ -1228,14 +1228,14 @@ Using the ``x_compat`` parameter, you can suppress this behavior: plt.close('all') If you have more than one plot that needs to be suppressed, the ``use`` method -in ``pandas.plot_params`` can be used in a `with statement`: +in ``pandas.plotting.plot_params`` can be used in a `with statement`: .. ipython:: python plt.figure() @savefig ser_plot_suppress_context.png - with pd.plot_params.use('x_compat', True): + with pd.plotting.plot_params.use('x_compat', True): df.A.plot(color='r') df.B.plot(color='g') df.C.plot(color='b') @@ -1450,11 +1450,11 @@ Also, you can pass different :class:`DataFrame` or :class:`Series` for ``table`` plt.close('all') -Finally, there is a helper function ``pandas.tools.plotting.table`` to create a table from :class:`DataFrame` and :class:`Series`, and add it to an ``matplotlib.Axes``. This function can accept keywords which matplotlib table has. +Finally, there is a helper function ``pandas.plotting.table`` to create a table from :class:`DataFrame` and :class:`Series`, and add it to an ``matplotlib.Axes``. This function can accept keywords which matplotlib table has. .. ipython:: python - from pandas.tools.plotting import table + from pandas.plotting import table fig, ax = plt.subplots(1, 1) table(ax, np.round(df.describe(), 2), diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 821b178c1cd17..914995244fe5f 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -21,6 +21,7 @@ Highlights include: - Support for S3 handling now uses ``s3fs``, see :ref:`here ` - Google BigQuery support now uses the ``pandas-gbq`` library, see :ref:`here ` - Switched the test framework to use `pytest `__ (:issue:`13097`) +- The ``pandas.tools.plotting`` module has been deprecated, moved to ``pandas.plotting``. See :ref:`here ` Check the :ref:`API Changes ` and :ref:`deprecations ` before updating. @@ -557,6 +558,31 @@ Using ``.iloc``. Here we will get the location of the 'A' column, then use *posi df.iloc[[0, 2], df.columns.get_loc('A')] +.. _whatsnew_0200.api_breaking.deprecate_plotting + +Deprecate .plotting +^^^^^^^^^^^^^^^^^^^ + +The ``pandas.tools.plotting`` module has been deprecated, in favor of the top level ``pandas.plotting`` module. All the public plotting functions are now available +from ``pandas.plotting`` (:issue:`12548`). + +Furthermore, the top-level ``pandas.scatter_matrix`` and ``pandas.plot_params`` are deprecated. +Users can import these from ``pandas.plotting`` as well. + +Previous script: + +.. code-block:: python + + pd.tools.plotting.scatter_matrix(df) + pd.scatter_matrix(df) + +Should be changed to: + +.. code-block:: python + + pd.plotting.scatter_matrix(df) + + .. _whatsnew_0200.api_breaking.deprecate_panel: Deprecate Panel diff --git a/pandas/__init__.py b/pandas/__init__.py index 529750cd97076..bc38919f2c78c 100644 --- a/pandas/__init__.py +++ b/pandas/__init__.py @@ -49,7 +49,15 @@ from pandas.tools.merge import (merge, ordered_merge, merge_ordered, merge_asof) from pandas.tools.pivot import pivot_table, crosstab -from pandas.tools.plotting import scatter_matrix, plot_params + +# deprecate tools.plotting, plot_params and scatter_matrix on the top namespace +import pandas.tools.plotting +plot_params = pandas.plotting._style._Options(deprecated=True) +# do not import deprecate to top namespace +scatter_matrix = pandas.util.decorators.deprecate( + 'pandas.scatter_matrix', pandas.plotting.scatter_matrix, + 'pandas.plotting.scatter_matrix') + from pandas.tools.tile import cut, qcut from pandas.tools.util import to_numeric from pandas.core.reshape import melt diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py index 931fe0661818d..cf2a653638e90 100644 --- a/pandas/core/config_init.py +++ b/pandas/core/config_init.py @@ -285,7 +285,7 @@ def mpl_style_cb(key): stacklevel=5) import sys - from pandas.tools.plotting import mpl_stylesheet + from pandas.plotting._style import mpl_stylesheet global style_backup val = cf.get_option(key) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 4565250c78387..a5256868ce419 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -90,7 +90,7 @@ import pandas.core.ops as ops import pandas.formats.format as fmt from pandas.formats.printing import pprint_thing -import pandas.tools.plotting as gfx +import pandas.plotting._core as gfx from pandas._libs import lib, algos as libalgos @@ -5909,11 +5909,11 @@ def _put_str(s, space): @Appender(_shared_docs['boxplot'] % _shared_doc_kwargs) def boxplot(self, column=None, by=None, ax=None, fontsize=None, rot=0, grid=True, figsize=None, layout=None, return_type=None, **kwds): - import pandas.tools.plotting as plots + from pandas.plotting._core import boxplot import matplotlib.pyplot as plt - ax = plots.boxplot(self, column=column, by=by, ax=ax, fontsize=fontsize, - grid=grid, rot=rot, figsize=figsize, layout=layout, - return_type=return_type, **kwds) + ax = boxplot(self, column=column, by=by, ax=ax, fontsize=fontsize, + grid=grid, rot=rot, figsize=figsize, layout=layout, + return_type=return_type, **kwds) plt.draw_if_interactive() return ax diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 45a9577c8d8b2..27e256a8eb572 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -4159,7 +4159,7 @@ def groupby_series(obj, col=None): return results -from pandas.tools.plotting import boxplot_frame_groupby # noqa +from pandas.plotting._core import boxplot_frame_groupby # noqa DataFrameGroupBy.boxplot = boxplot_frame_groupby diff --git a/pandas/core/series.py b/pandas/core/series.py index 7f8a97af99490..1cf537cf3c315 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -3026,7 +3026,7 @@ def create_from_value(value, index, dtype): # ---------------------------------------------------------------------- # Add plotting methods to Series -import pandas.tools.plotting as _gfx # noqa +import pandas.plotting._core as _gfx # noqa Series.plot = base.AccessorProperty(_gfx.SeriesPlotMethods, _gfx.SeriesPlotMethods) diff --git a/pandas/plotting/__init__.py b/pandas/plotting/__init__.py new file mode 100644 index 0000000000000..c3cbedb0fc28c --- /dev/null +++ b/pandas/plotting/__init__.py @@ -0,0 +1,19 @@ +""" +Plotting api +""" + +# flake8: noqa + +try: # mpl optional + from pandas.plotting import _converter + _converter.register() # needs to override so set_xlim works with str/number +except ImportError: + pass + +from pandas.plotting._misc import (scatter_matrix, radviz, + andrews_curves, bootstrap_plot, + parallel_coordinates, lag_plot, + autocorrelation_plot) +from pandas.plotting._core import boxplot +from pandas.plotting._style import plot_params +from pandas.plotting._tools import table diff --git a/pandas/plotting/_compat.py b/pandas/plotting/_compat.py new file mode 100644 index 0000000000000..7b04b9e1171ec --- /dev/null +++ b/pandas/plotting/_compat.py @@ -0,0 +1,67 @@ +# being a bit too dynamic +# pylint: disable=E1101 +from __future__ import division + +from distutils.version import LooseVersion + + +def _mpl_le_1_2_1(): + try: + import matplotlib as mpl + return (str(mpl.__version__) <= LooseVersion('1.2.1') and + str(mpl.__version__)[0] != '0') + except ImportError: + return False + + +def _mpl_ge_1_3_1(): + try: + import matplotlib + # The or v[0] == '0' is because their versioneer is + # messed up on dev + return (matplotlib.__version__ >= LooseVersion('1.3.1') or + matplotlib.__version__[0] == '0') + except ImportError: + return False + + +def _mpl_ge_1_4_0(): + try: + import matplotlib + return (matplotlib.__version__ >= LooseVersion('1.4') or + matplotlib.__version__[0] == '0') + except ImportError: + return False + + +def _mpl_ge_1_5_0(): + try: + import matplotlib + return (matplotlib.__version__ >= LooseVersion('1.5') or + matplotlib.__version__[0] == '0') + except ImportError: + return False + + +def _mpl_ge_2_0_0(): + try: + import matplotlib + return matplotlib.__version__ >= LooseVersion('2.0') + except ImportError: + return False + + +def _mpl_le_2_0_0(): + try: + import matplotlib + return matplotlib.compare_versions('2.0.0', matplotlib.__version__) + except ImportError: + return False + + +def _mpl_ge_2_0_1(): + try: + import matplotlib + return matplotlib.__version__ >= LooseVersion('2.0.1') + except ImportError: + return False diff --git a/pandas/plotting/_converter.py b/pandas/plotting/_converter.py new file mode 100644 index 0000000000000..0aa8cc31646c5 --- /dev/null +++ b/pandas/plotting/_converter.py @@ -0,0 +1,1026 @@ +from datetime import datetime, timedelta +import datetime as pydt +import numpy as np + +from dateutil.relativedelta import relativedelta + +import matplotlib.units as units +import matplotlib.dates as dates + +from matplotlib.ticker import Formatter, AutoLocator, Locator +from matplotlib.transforms import nonsingular + + +from pandas.types.common import (is_float, is_integer, + is_integer_dtype, + is_float_dtype, + is_datetime64_ns_dtype, + is_period_arraylike, + ) + +from pandas.compat import lrange +import pandas.compat as compat +import pandas._libs.lib as lib +import pandas.core.common as com +from pandas.core.index import Index + +from pandas.core.series import Series +from pandas.tseries.index import date_range +import pandas.tseries.tools as tools +import pandas.tseries.frequencies as frequencies +from pandas.tseries.frequencies import FreqGroup +from pandas.tseries.period import Period, PeriodIndex + +from pandas.plotting._compat import _mpl_le_2_0_0 + +# constants +HOURS_PER_DAY = 24. +MIN_PER_HOUR = 60. +SEC_PER_MIN = 60. + +SEC_PER_HOUR = SEC_PER_MIN * MIN_PER_HOUR +SEC_PER_DAY = SEC_PER_HOUR * HOURS_PER_DAY + +MUSEC_PER_DAY = 1e6 * SEC_PER_DAY + + +def register(): + units.registry[lib.Timestamp] = DatetimeConverter() + units.registry[Period] = PeriodConverter() + units.registry[pydt.datetime] = DatetimeConverter() + units.registry[pydt.date] = DatetimeConverter() + units.registry[pydt.time] = TimeConverter() + units.registry[np.datetime64] = DatetimeConverter() + + +def _to_ordinalf(tm): + tot_sec = (tm.hour * 3600 + tm.minute * 60 + tm.second + + float(tm.microsecond / 1e6)) + return tot_sec + + +def time2num(d): + if isinstance(d, compat.string_types): + parsed = tools.to_datetime(d) + if not isinstance(parsed, datetime): + raise ValueError('Could not parse time %s' % d) + return _to_ordinalf(parsed.time()) + if isinstance(d, pydt.time): + return _to_ordinalf(d) + return d + + +class TimeConverter(units.ConversionInterface): + + @staticmethod + def convert(value, unit, axis): + valid_types = (str, pydt.time) + if (isinstance(value, valid_types) or is_integer(value) or + is_float(value)): + return time2num(value) + if isinstance(value, Index): + return value.map(time2num) + if isinstance(value, (list, tuple, np.ndarray, Index)): + return [time2num(x) for x in value] + return value + + @staticmethod + def axisinfo(unit, axis): + if unit != 'time': + return None + + majloc = AutoLocator() + majfmt = TimeFormatter(majloc) + return units.AxisInfo(majloc=majloc, majfmt=majfmt, label='time') + + @staticmethod + def default_units(x, axis): + return 'time' + + +# time formatter +class TimeFormatter(Formatter): + + def __init__(self, locs): + self.locs = locs + + def __call__(self, x, pos=0): + fmt = '%H:%M:%S' + s = int(x) + ms = int((x - s) * 1e3) + us = int((x - s) * 1e6 - ms) + m, s = divmod(s, 60) + h, m = divmod(m, 60) + _, h = divmod(h, 24) + if us != 0: + fmt += '.%6f' + elif ms != 0: + fmt += '.%3f' + + return pydt.time(h, m, s, us).strftime(fmt) + + +# Period Conversion + + +class PeriodConverter(dates.DateConverter): + + @staticmethod + def convert(values, units, axis): + if not hasattr(axis, 'freq'): + raise TypeError('Axis must have `freq` set to convert to Periods') + valid_types = (compat.string_types, datetime, + Period, pydt.date, pydt.time) + if (isinstance(values, valid_types) or is_integer(values) or + is_float(values)): + return get_datevalue(values, axis.freq) + if isinstance(values, PeriodIndex): + return values.asfreq(axis.freq)._values + if isinstance(values, Index): + return values.map(lambda x: get_datevalue(x, axis.freq)) + if is_period_arraylike(values): + return PeriodIndex(values, freq=axis.freq)._values + if isinstance(values, (list, tuple, np.ndarray, Index)): + return [get_datevalue(x, axis.freq) for x in values] + return values + + +def get_datevalue(date, freq): + if isinstance(date, Period): + return date.asfreq(freq).ordinal + elif isinstance(date, (compat.string_types, datetime, + pydt.date, pydt.time)): + return Period(date, freq).ordinal + elif (is_integer(date) or is_float(date) or + (isinstance(date, (np.ndarray, Index)) and (date.size == 1))): + return date + elif date is None: + return None + raise ValueError("Unrecognizable date '%s'" % date) + + +def _dt_to_float_ordinal(dt): + """ + Convert :mod:`datetime` to the Gregorian date as UTC float days, + preserving hours, minutes, seconds and microseconds. Return value + is a :func:`float`. + """ + if (isinstance(dt, (np.ndarray, Index, Series) + ) and is_datetime64_ns_dtype(dt)): + base = dates.epoch2num(dt.asi8 / 1.0E9) + else: + base = dates.date2num(dt) + return base + + +# Datetime Conversion +class DatetimeConverter(dates.DateConverter): + + @staticmethod + def convert(values, unit, axis): + def try_parse(values): + try: + return _dt_to_float_ordinal(tools.to_datetime(values)) + except Exception: + return values + + if isinstance(values, (datetime, pydt.date)): + return _dt_to_float_ordinal(values) + elif isinstance(values, np.datetime64): + return _dt_to_float_ordinal(lib.Timestamp(values)) + elif isinstance(values, pydt.time): + return dates.date2num(values) + elif (is_integer(values) or is_float(values)): + return values + elif isinstance(values, compat.string_types): + return try_parse(values) + elif isinstance(values, (list, tuple, np.ndarray, Index)): + if isinstance(values, Index): + values = values.values + if not isinstance(values, np.ndarray): + values = com._asarray_tuplesafe(values) + + if is_integer_dtype(values) or is_float_dtype(values): + return values + + try: + values = tools.to_datetime(values) + if isinstance(values, Index): + values = _dt_to_float_ordinal(values) + else: + values = [_dt_to_float_ordinal(x) for x in values] + except Exception: + values = _dt_to_float_ordinal(values) + + return values + + @staticmethod + def axisinfo(unit, axis): + """ + Return the :class:`~matplotlib.units.AxisInfo` for *unit*. + + *unit* is a tzinfo instance or None. + The *axis* argument is required but not used. + """ + tz = unit + + majloc = PandasAutoDateLocator(tz=tz) + majfmt = PandasAutoDateFormatter(majloc, tz=tz) + datemin = pydt.date(2000, 1, 1) + datemax = pydt.date(2010, 1, 1) + + return units.AxisInfo(majloc=majloc, majfmt=majfmt, label='', + default_limits=(datemin, datemax)) + + +class PandasAutoDateFormatter(dates.AutoDateFormatter): + + def __init__(self, locator, tz=None, defaultfmt='%Y-%m-%d'): + dates.AutoDateFormatter.__init__(self, locator, tz, defaultfmt) + # matplotlib.dates._UTC has no _utcoffset called by pandas + if self._tz is dates.UTC: + self._tz._utcoffset = self._tz.utcoffset(None) + + # For mpl > 2.0 the format strings are controlled via rcparams + # so do not mess with them. For mpl < 2.0 change the second + # break point and add a musec break point + if _mpl_le_2_0_0(): + self.scaled[1. / SEC_PER_DAY] = '%H:%M:%S' + self.scaled[1. / MUSEC_PER_DAY] = '%H:%M:%S.%f' + + +class PandasAutoDateLocator(dates.AutoDateLocator): + + def get_locator(self, dmin, dmax): + 'Pick the best locator based on a distance.' + delta = relativedelta(dmax, dmin) + + num_days = (delta.years * 12.0 + delta.months) * 31.0 + delta.days + num_sec = (delta.hours * 60.0 + delta.minutes) * 60.0 + delta.seconds + tot_sec = num_days * 86400. + num_sec + + if abs(tot_sec) < self.minticks: + self._freq = -1 + locator = MilliSecondLocator(self.tz) + locator.set_axis(self.axis) + + locator.set_view_interval(*self.axis.get_view_interval()) + locator.set_data_interval(*self.axis.get_data_interval()) + return locator + + return dates.AutoDateLocator.get_locator(self, dmin, dmax) + + def _get_unit(self): + return MilliSecondLocator.get_unit_generic(self._freq) + + +class MilliSecondLocator(dates.DateLocator): + + UNIT = 1. / (24 * 3600 * 1000) + + def __init__(self, tz): + dates.DateLocator.__init__(self, tz) + self._interval = 1. + + def _get_unit(self): + return self.get_unit_generic(-1) + + @staticmethod + def get_unit_generic(freq): + unit = dates.RRuleLocator.get_unit_generic(freq) + if unit < 0: + return MilliSecondLocator.UNIT + return unit + + def __call__(self): + # if no data have been set, this will tank with a ValueError + try: + dmin, dmax = self.viewlim_to_dt() + except ValueError: + return [] + + if dmin > dmax: + dmax, dmin = dmin, dmax + # We need to cap at the endpoints of valid datetime + + # TODO(wesm) unused? + # delta = relativedelta(dmax, dmin) + # try: + # start = dmin - delta + # except ValueError: + # start = _from_ordinal(1.0) + + # try: + # stop = dmax + delta + # except ValueError: + # # The magic number! + # stop = _from_ordinal(3652059.9999999) + + nmax, nmin = dates.date2num((dmax, dmin)) + + num = (nmax - nmin) * 86400 * 1000 + max_millis_ticks = 6 + for interval in [1, 10, 50, 100, 200, 500]: + if num <= interval * (max_millis_ticks - 1): + self._interval = interval + break + else: + # We went through the whole loop without breaking, default to 1 + self._interval = 1000. + + estimate = (nmax - nmin) / (self._get_unit() * self._get_interval()) + + if estimate > self.MAXTICKS * 2: + raise RuntimeError(('MillisecondLocator estimated to generate %d ' + 'ticks from %s to %s: exceeds Locator.MAXTICKS' + '* 2 (%d) ') % + (estimate, dmin, dmax, self.MAXTICKS * 2)) + + freq = '%dL' % self._get_interval() + tz = self.tz.tzname(None) + st = _from_ordinal(dates.date2num(dmin)) # strip tz + ed = _from_ordinal(dates.date2num(dmax)) + all_dates = date_range(start=st, end=ed, freq=freq, tz=tz).asobject + + try: + if len(all_dates) > 0: + locs = self.raise_if_exceeds(dates.date2num(all_dates)) + return locs + except Exception: # pragma: no cover + pass + + lims = dates.date2num([dmin, dmax]) + return lims + + def _get_interval(self): + return self._interval + + def autoscale(self): + """ + Set the view limits to include the data range. + """ + dmin, dmax = self.datalim_to_dt() + if dmin > dmax: + dmax, dmin = dmin, dmax + + # We need to cap at the endpoints of valid datetime + + # TODO(wesm): unused? + + # delta = relativedelta(dmax, dmin) + # try: + # start = dmin - delta + # except ValueError: + # start = _from_ordinal(1.0) + + # try: + # stop = dmax + delta + # except ValueError: + # # The magic number! + # stop = _from_ordinal(3652059.9999999) + + dmin, dmax = self.datalim_to_dt() + + vmin = dates.date2num(dmin) + vmax = dates.date2num(dmax) + + return self.nonsingular(vmin, vmax) + + +def _from_ordinal(x, tz=None): + ix = int(x) + dt = datetime.fromordinal(ix) + remainder = float(x) - ix + hour, remainder = divmod(24 * remainder, 1) + minute, remainder = divmod(60 * remainder, 1) + second, remainder = divmod(60 * remainder, 1) + microsecond = int(1e6 * remainder) + if microsecond < 10: + microsecond = 0 # compensate for rounding errors + dt = datetime(dt.year, dt.month, dt.day, int(hour), int(minute), + int(second), microsecond) + if tz is not None: + dt = dt.astimezone(tz) + + if microsecond > 999990: # compensate for rounding errors + dt += timedelta(microseconds=1e6 - microsecond) + + return dt + +# Fixed frequency dynamic tick locators and formatters + +# ------------------------------------------------------------------------- +# --- Locators --- +# ------------------------------------------------------------------------- + + +def _get_default_annual_spacing(nyears): + """ + Returns a default spacing between consecutive ticks for annual data. + """ + if nyears < 11: + (min_spacing, maj_spacing) = (1, 1) + elif nyears < 20: + (min_spacing, maj_spacing) = (1, 2) + elif nyears < 50: + (min_spacing, maj_spacing) = (1, 5) + elif nyears < 100: + (min_spacing, maj_spacing) = (5, 10) + elif nyears < 200: + (min_spacing, maj_spacing) = (5, 25) + elif nyears < 600: + (min_spacing, maj_spacing) = (10, 50) + else: + factor = nyears // 1000 + 1 + (min_spacing, maj_spacing) = (factor * 20, factor * 100) + return (min_spacing, maj_spacing) + + +def period_break(dates, period): + """ + Returns the indices where the given period changes. + + Parameters + ---------- + dates : PeriodIndex + Array of intervals to monitor. + period : string + Name of the period to monitor. + """ + current = getattr(dates, period) + previous = getattr(dates - 1, period) + return np.nonzero(current - previous)[0] + + +def has_level_label(label_flags, vmin): + """ + Returns true if the ``label_flags`` indicate there is at least one label + for this level. + + if the minimum view limit is not an exact integer, then the first tick + label won't be shown, so we must adjust for that. + """ + if label_flags.size == 0 or (label_flags.size == 1 and + label_flags[0] == 0 and + vmin % 1 > 0.0): + return False + else: + return True + + +def _daily_finder(vmin, vmax, freq): + periodsperday = -1 + + if freq >= FreqGroup.FR_HR: + if freq == FreqGroup.FR_NS: + periodsperday = 24 * 60 * 60 * 1000000000 + elif freq == FreqGroup.FR_US: + periodsperday = 24 * 60 * 60 * 1000000 + elif freq == FreqGroup.FR_MS: + periodsperday = 24 * 60 * 60 * 1000 + elif freq == FreqGroup.FR_SEC: + periodsperday = 24 * 60 * 60 + elif freq == FreqGroup.FR_MIN: + periodsperday = 24 * 60 + elif freq == FreqGroup.FR_HR: + periodsperday = 24 + else: # pragma: no cover + raise ValueError("unexpected frequency: %s" % freq) + periodsperyear = 365 * periodsperday + periodspermonth = 28 * periodsperday + + elif freq == FreqGroup.FR_BUS: + periodsperyear = 261 + periodspermonth = 19 + elif freq == FreqGroup.FR_DAY: + periodsperyear = 365 + periodspermonth = 28 + elif frequencies.get_freq_group(freq) == FreqGroup.FR_WK: + periodsperyear = 52 + periodspermonth = 3 + else: # pragma: no cover + raise ValueError("unexpected frequency") + + # save this for later usage + vmin_orig = vmin + + (vmin, vmax) = (Period(ordinal=int(vmin), freq=freq), + Period(ordinal=int(vmax), freq=freq)) + span = vmax.ordinal - vmin.ordinal + 1 + dates_ = PeriodIndex(start=vmin, end=vmax, freq=freq) + # Initialize the output + info = np.zeros(span, + dtype=[('val', np.int64), ('maj', bool), + ('min', bool), ('fmt', '|S20')]) + info['val'][:] = dates_._values + info['fmt'][:] = '' + info['maj'][[0, -1]] = True + # .. and set some shortcuts + info_maj = info['maj'] + info_min = info['min'] + info_fmt = info['fmt'] + + def first_label(label_flags): + if (label_flags[0] == 0) and (label_flags.size > 1) and \ + ((vmin_orig % 1) > 0.0): + return label_flags[1] + else: + return label_flags[0] + + # Case 1. Less than a month + if span <= periodspermonth: + day_start = period_break(dates_, 'day') + month_start = period_break(dates_, 'month') + + def _hour_finder(label_interval, force_year_start): + _hour = dates_.hour + _prev_hour = (dates_ - 1).hour + hour_start = (_hour - _prev_hour) != 0 + info_maj[day_start] = True + info_min[hour_start & (_hour % label_interval == 0)] = True + year_start = period_break(dates_, 'year') + info_fmt[hour_start & (_hour % label_interval == 0)] = '%H:%M' + info_fmt[day_start] = '%H:%M\n%d-%b' + info_fmt[year_start] = '%H:%M\n%d-%b\n%Y' + if force_year_start and not has_level_label(year_start, vmin_orig): + info_fmt[first_label(day_start)] = '%H:%M\n%d-%b\n%Y' + + def _minute_finder(label_interval): + hour_start = period_break(dates_, 'hour') + _minute = dates_.minute + _prev_minute = (dates_ - 1).minute + minute_start = (_minute - _prev_minute) != 0 + info_maj[hour_start] = True + info_min[minute_start & (_minute % label_interval == 0)] = True + year_start = period_break(dates_, 'year') + info_fmt = info['fmt'] + info_fmt[minute_start & (_minute % label_interval == 0)] = '%H:%M' + info_fmt[day_start] = '%H:%M\n%d-%b' + info_fmt[year_start] = '%H:%M\n%d-%b\n%Y' + + def _second_finder(label_interval): + minute_start = period_break(dates_, 'minute') + _second = dates_.second + _prev_second = (dates_ - 1).second + second_start = (_second - _prev_second) != 0 + info['maj'][minute_start] = True + info['min'][second_start & (_second % label_interval == 0)] = True + year_start = period_break(dates_, 'year') + info_fmt = info['fmt'] + info_fmt[second_start & (_second % + label_interval == 0)] = '%H:%M:%S' + info_fmt[day_start] = '%H:%M:%S\n%d-%b' + info_fmt[year_start] = '%H:%M:%S\n%d-%b\n%Y' + + if span < periodsperday / 12000.0: + _second_finder(1) + elif span < periodsperday / 6000.0: + _second_finder(2) + elif span < periodsperday / 2400.0: + _second_finder(5) + elif span < periodsperday / 1200.0: + _second_finder(10) + elif span < periodsperday / 800.0: + _second_finder(15) + elif span < periodsperday / 400.0: + _second_finder(30) + elif span < periodsperday / 150.0: + _minute_finder(1) + elif span < periodsperday / 70.0: + _minute_finder(2) + elif span < periodsperday / 24.0: + _minute_finder(5) + elif span < periodsperday / 12.0: + _minute_finder(15) + elif span < periodsperday / 6.0: + _minute_finder(30) + elif span < periodsperday / 2.5: + _hour_finder(1, False) + elif span < periodsperday / 1.5: + _hour_finder(2, False) + elif span < periodsperday * 1.25: + _hour_finder(3, False) + elif span < periodsperday * 2.5: + _hour_finder(6, True) + elif span < periodsperday * 4: + _hour_finder(12, True) + else: + info_maj[month_start] = True + info_min[day_start] = True + year_start = period_break(dates_, 'year') + info_fmt = info['fmt'] + info_fmt[day_start] = '%d' + info_fmt[month_start] = '%d\n%b' + info_fmt[year_start] = '%d\n%b\n%Y' + if not has_level_label(year_start, vmin_orig): + if not has_level_label(month_start, vmin_orig): + info_fmt[first_label(day_start)] = '%d\n%b\n%Y' + else: + info_fmt[first_label(month_start)] = '%d\n%b\n%Y' + + # Case 2. Less than three months + elif span <= periodsperyear // 4: + month_start = period_break(dates_, 'month') + info_maj[month_start] = True + if freq < FreqGroup.FR_HR: + info['min'] = True + else: + day_start = period_break(dates_, 'day') + info['min'][day_start] = True + week_start = period_break(dates_, 'week') + year_start = period_break(dates_, 'year') + info_fmt[week_start] = '%d' + info_fmt[month_start] = '\n\n%b' + info_fmt[year_start] = '\n\n%b\n%Y' + if not has_level_label(year_start, vmin_orig): + if not has_level_label(month_start, vmin_orig): + info_fmt[first_label(week_start)] = '\n\n%b\n%Y' + else: + info_fmt[first_label(month_start)] = '\n\n%b\n%Y' + # Case 3. Less than 14 months ............... + elif span <= 1.15 * periodsperyear: + year_start = period_break(dates_, 'year') + month_start = period_break(dates_, 'month') + week_start = period_break(dates_, 'week') + info_maj[month_start] = True + info_min[week_start] = True + info_min[year_start] = False + info_min[month_start] = False + info_fmt[month_start] = '%b' + info_fmt[year_start] = '%b\n%Y' + if not has_level_label(year_start, vmin_orig): + info_fmt[first_label(month_start)] = '%b\n%Y' + # Case 4. Less than 2.5 years ............... + elif span <= 2.5 * periodsperyear: + year_start = period_break(dates_, 'year') + quarter_start = period_break(dates_, 'quarter') + month_start = period_break(dates_, 'month') + info_maj[quarter_start] = True + info_min[month_start] = True + info_fmt[quarter_start] = '%b' + info_fmt[year_start] = '%b\n%Y' + # Case 4. Less than 4 years ................. + elif span <= 4 * periodsperyear: + year_start = period_break(dates_, 'year') + month_start = period_break(dates_, 'month') + info_maj[year_start] = True + info_min[month_start] = True + info_min[year_start] = False + + month_break = dates_[month_start].month + jan_or_jul = month_start[(month_break == 1) | (month_break == 7)] + info_fmt[jan_or_jul] = '%b' + info_fmt[year_start] = '%b\n%Y' + # Case 5. Less than 11 years ................ + elif span <= 11 * periodsperyear: + year_start = period_break(dates_, 'year') + quarter_start = period_break(dates_, 'quarter') + info_maj[year_start] = True + info_min[quarter_start] = True + info_min[year_start] = False + info_fmt[year_start] = '%Y' + # Case 6. More than 12 years ................ + else: + year_start = period_break(dates_, 'year') + year_break = dates_[year_start].year + nyears = span / periodsperyear + (min_anndef, maj_anndef) = _get_default_annual_spacing(nyears) + major_idx = year_start[(year_break % maj_anndef == 0)] + info_maj[major_idx] = True + minor_idx = year_start[(year_break % min_anndef == 0)] + info_min[minor_idx] = True + info_fmt[major_idx] = '%Y' + + return info + + +def _monthly_finder(vmin, vmax, freq): + periodsperyear = 12 + + vmin_orig = vmin + (vmin, vmax) = (int(vmin), int(vmax)) + span = vmax - vmin + 1 + + # Initialize the output + info = np.zeros(span, + dtype=[('val', int), ('maj', bool), ('min', bool), + ('fmt', '|S8')]) + info['val'] = np.arange(vmin, vmax + 1) + dates_ = info['val'] + info['fmt'] = '' + year_start = (dates_ % 12 == 0).nonzero()[0] + info_maj = info['maj'] + info_fmt = info['fmt'] + + if span <= 1.15 * periodsperyear: + info_maj[year_start] = True + info['min'] = True + + info_fmt[:] = '%b' + info_fmt[year_start] = '%b\n%Y' + + if not has_level_label(year_start, vmin_orig): + if dates_.size > 1: + idx = 1 + else: + idx = 0 + info_fmt[idx] = '%b\n%Y' + + elif span <= 2.5 * periodsperyear: + quarter_start = (dates_ % 3 == 0).nonzero() + info_maj[year_start] = True + # TODO: Check the following : is it really info['fmt'] ? + info['fmt'][quarter_start] = True + info['min'] = True + + info_fmt[quarter_start] = '%b' + info_fmt[year_start] = '%b\n%Y' + + elif span <= 4 * periodsperyear: + info_maj[year_start] = True + info['min'] = True + + jan_or_jul = (dates_ % 12 == 0) | (dates_ % 12 == 6) + info_fmt[jan_or_jul] = '%b' + info_fmt[year_start] = '%b\n%Y' + + elif span <= 11 * periodsperyear: + quarter_start = (dates_ % 3 == 0).nonzero() + info_maj[year_start] = True + info['min'][quarter_start] = True + + info_fmt[year_start] = '%Y' + + else: + nyears = span / periodsperyear + (min_anndef, maj_anndef) = _get_default_annual_spacing(nyears) + years = dates_[year_start] // 12 + 1 + major_idx = year_start[(years % maj_anndef == 0)] + info_maj[major_idx] = True + info['min'][year_start[(years % min_anndef == 0)]] = True + + info_fmt[major_idx] = '%Y' + + return info + + +def _quarterly_finder(vmin, vmax, freq): + periodsperyear = 4 + vmin_orig = vmin + (vmin, vmax) = (int(vmin), int(vmax)) + span = vmax - vmin + 1 + + info = np.zeros(span, + dtype=[('val', int), ('maj', bool), ('min', bool), + ('fmt', '|S8')]) + info['val'] = np.arange(vmin, vmax + 1) + info['fmt'] = '' + dates_ = info['val'] + info_maj = info['maj'] + info_fmt = info['fmt'] + year_start = (dates_ % 4 == 0).nonzero()[0] + + if span <= 3.5 * periodsperyear: + info_maj[year_start] = True + info['min'] = True + + info_fmt[:] = 'Q%q' + info_fmt[year_start] = 'Q%q\n%F' + if not has_level_label(year_start, vmin_orig): + if dates_.size > 1: + idx = 1 + else: + idx = 0 + info_fmt[idx] = 'Q%q\n%F' + + elif span <= 11 * periodsperyear: + info_maj[year_start] = True + info['min'] = True + info_fmt[year_start] = '%F' + + else: + years = dates_[year_start] // 4 + 1 + nyears = span / periodsperyear + (min_anndef, maj_anndef) = _get_default_annual_spacing(nyears) + major_idx = year_start[(years % maj_anndef == 0)] + info_maj[major_idx] = True + info['min'][year_start[(years % min_anndef == 0)]] = True + info_fmt[major_idx] = '%F' + + return info + + +def _annual_finder(vmin, vmax, freq): + (vmin, vmax) = (int(vmin), int(vmax + 1)) + span = vmax - vmin + 1 + + info = np.zeros(span, + dtype=[('val', int), ('maj', bool), ('min', bool), + ('fmt', '|S8')]) + info['val'] = np.arange(vmin, vmax + 1) + info['fmt'] = '' + dates_ = info['val'] + + (min_anndef, maj_anndef) = _get_default_annual_spacing(span) + major_idx = dates_ % maj_anndef == 0 + info['maj'][major_idx] = True + info['min'][(dates_ % min_anndef == 0)] = True + info['fmt'][major_idx] = '%Y' + + return info + + +def get_finder(freq): + if isinstance(freq, compat.string_types): + freq = frequencies.get_freq(freq) + fgroup = frequencies.get_freq_group(freq) + + if fgroup == FreqGroup.FR_ANN: + return _annual_finder + elif fgroup == FreqGroup.FR_QTR: + return _quarterly_finder + elif freq == FreqGroup.FR_MTH: + return _monthly_finder + elif ((freq >= FreqGroup.FR_BUS) or fgroup == FreqGroup.FR_WK): + return _daily_finder + else: # pragma: no cover + errmsg = "Unsupported frequency: %s" % (freq) + raise NotImplementedError(errmsg) + + +class TimeSeries_DateLocator(Locator): + """ + Locates the ticks along an axis controlled by a :class:`Series`. + + Parameters + ---------- + freq : {var} + Valid frequency specifier. + minor_locator : {False, True}, optional + Whether the locator is for minor ticks (True) or not. + dynamic_mode : {True, False}, optional + Whether the locator should work in dynamic mode. + base : {int}, optional + quarter : {int}, optional + month : {int}, optional + day : {int}, optional + """ + + def __init__(self, freq, minor_locator=False, dynamic_mode=True, + base=1, quarter=1, month=1, day=1, plot_obj=None): + if isinstance(freq, compat.string_types): + freq = frequencies.get_freq(freq) + self.freq = freq + self.base = base + (self.quarter, self.month, self.day) = (quarter, month, day) + self.isminor = minor_locator + self.isdynamic = dynamic_mode + self.offset = 0 + self.plot_obj = plot_obj + self.finder = get_finder(freq) + + def _get_default_locs(self, vmin, vmax): + "Returns the default locations of ticks." + + if self.plot_obj.date_axis_info is None: + self.plot_obj.date_axis_info = self.finder(vmin, vmax, self.freq) + + locator = self.plot_obj.date_axis_info + + if self.isminor: + return np.compress(locator['min'], locator['val']) + return np.compress(locator['maj'], locator['val']) + + def __call__(self): + 'Return the locations of the ticks.' + # axis calls Locator.set_axis inside set_m_formatter + vi = tuple(self.axis.get_view_interval()) + if vi != self.plot_obj.view_interval: + self.plot_obj.date_axis_info = None + self.plot_obj.view_interval = vi + vmin, vmax = vi + if vmax < vmin: + vmin, vmax = vmax, vmin + if self.isdynamic: + locs = self._get_default_locs(vmin, vmax) + else: # pragma: no cover + base = self.base + (d, m) = divmod(vmin, base) + vmin = (d + 1) * base + locs = lrange(vmin, vmax + 1, base) + return locs + + def autoscale(self): + """ + Sets the view limits to the nearest multiples of base that contain the + data. + """ + # requires matplotlib >= 0.98.0 + (vmin, vmax) = self.axis.get_data_interval() + + locs = self._get_default_locs(vmin, vmax) + (vmin, vmax) = locs[[0, -1]] + if vmin == vmax: + vmin -= 1 + vmax += 1 + return nonsingular(vmin, vmax) + +# ------------------------------------------------------------------------- +# --- Formatter --- +# ------------------------------------------------------------------------- + + +class TimeSeries_DateFormatter(Formatter): + """ + Formats the ticks along an axis controlled by a :class:`PeriodIndex`. + + Parameters + ---------- + freq : {int, string} + Valid frequency specifier. + minor_locator : {False, True} + Whether the current formatter should apply to minor ticks (True) or + major ticks (False). + dynamic_mode : {True, False} + Whether the formatter works in dynamic mode or not. + """ + + def __init__(self, freq, minor_locator=False, dynamic_mode=True, + plot_obj=None): + if isinstance(freq, compat.string_types): + freq = frequencies.get_freq(freq) + self.format = None + self.freq = freq + self.locs = [] + self.formatdict = None + self.isminor = minor_locator + self.isdynamic = dynamic_mode + self.offset = 0 + self.plot_obj = plot_obj + self.finder = get_finder(freq) + + def _set_default_format(self, vmin, vmax): + "Returns the default ticks spacing." + + if self.plot_obj.date_axis_info is None: + self.plot_obj.date_axis_info = self.finder(vmin, vmax, self.freq) + info = self.plot_obj.date_axis_info + + if self.isminor: + format = np.compress(info['min'] & np.logical_not(info['maj']), + info) + else: + format = np.compress(info['maj'], info) + self.formatdict = dict([(x, f) for (x, _, _, f) in format]) + return self.formatdict + + def set_locs(self, locs): + 'Sets the locations of the ticks' + # don't actually use the locs. This is just needed to work with + # matplotlib. Force to use vmin, vmax + self.locs = locs + + (vmin, vmax) = vi = tuple(self.axis.get_view_interval()) + if vi != self.plot_obj.view_interval: + self.plot_obj.date_axis_info = None + self.plot_obj.view_interval = vi + if vmax < vmin: + (vmin, vmax) = (vmax, vmin) + self._set_default_format(vmin, vmax) + + def __call__(self, x, pos=0): + if self.formatdict is None: + return '' + else: + fmt = self.formatdict.pop(x, '') + return Period(ordinal=int(x), freq=self.freq).strftime(fmt) + + +class TimeSeries_TimedeltaFormatter(Formatter): + """ + Formats the ticks along an axis controlled by a :class:`TimedeltaIndex`. + """ + + @staticmethod + def format_timedelta_ticks(x, pos, n_decimals): + """ + Convert seconds to 'D days HH:MM:SS.F' + """ + s, ns = divmod(x, 1e9) + m, s = divmod(s, 60) + h, m = divmod(m, 60) + d, h = divmod(h, 24) + decimals = int(ns * 10**(n_decimals - 9)) + s = r'{:02d}:{:02d}:{:02d}'.format(int(h), int(m), int(s)) + if n_decimals > 0: + s += '.{{:0{:0d}d}}'.format(n_decimals).format(decimals) + if d != 0: + s = '{:d} days '.format(int(d)) + s + return s + + def __call__(self, x, pos=0): + (vmin, vmax) = tuple(self.axis.get_view_interval()) + n_decimals = int(np.ceil(np.log10(100 * 1e9 / (vmax - vmin)))) + if n_decimals > 9: + n_decimals = 9 + return self.format_timedelta_ticks(x, pos, n_decimals) diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py new file mode 100644 index 0000000000000..3980f5e7f2f61 --- /dev/null +++ b/pandas/plotting/_core.py @@ -0,0 +1,2828 @@ +# being a bit too dynamic +# pylint: disable=E1101 +from __future__ import division + +import warnings +import re +from collections import namedtuple +from distutils.version import LooseVersion + +import numpy as np + +from pandas.util.decorators import cache_readonly +from pandas.core.base import PandasObject +from pandas.types.common import (is_list_like, + is_integer, + is_number, + is_hashable, + is_iterator) +from pandas.core.common import AbstractMethodError, isnull, _try_sort +from pandas.core.generic import _shared_docs, _shared_doc_kwargs +from pandas.core.index import Index, MultiIndex +from pandas.core.series import Series, remove_na +from pandas.tseries.period import PeriodIndex +from pandas.compat import range, lrange, map, zip, string_types +import pandas.compat as compat +from pandas.formats.printing import pprint_thing +from pandas.util.decorators import Appender + +from pandas.plotting._compat import (_mpl_ge_1_3_1, + _mpl_ge_1_5_0) +from pandas.plotting._style import (mpl_stylesheet, plot_params, + _get_standard_colors) +from pandas.plotting._tools import (_subplots, _flatten, table, + _handle_shared_axes, _get_all_lines, + _get_xlim, _set_ticks_props, + format_date_labels) + + +if _mpl_ge_1_5_0(): + # Compat with mp 1.5, which uses cycler. + import cycler + colors = mpl_stylesheet.pop('axes.color_cycle') + mpl_stylesheet['axes.prop_cycle'] = cycler.cycler('color', colors) + + +def _get_standard_kind(kind): + return {'density': 'kde'}.get(kind, kind) + + +def _gca(): + import matplotlib.pyplot as plt + return plt.gca() + + +def _gcf(): + import matplotlib.pyplot as plt + return plt.gcf() + + +class MPLPlot(object): + """ + Base class for assembling a pandas plot using matplotlib + + Parameters + ---------- + data : + + """ + + @property + def _kind(self): + """Specify kind str. Must be overridden in child class""" + raise NotImplementedError + + _layout_type = 'vertical' + _default_rot = 0 + orientation = None + _pop_attributes = ['label', 'style', 'logy', 'logx', 'loglog', + 'mark_right', 'stacked'] + _attr_defaults = {'logy': False, 'logx': False, 'loglog': False, + 'mark_right': True, 'stacked': False} + + def __init__(self, data, kind=None, by=None, subplots=False, sharex=None, + sharey=False, use_index=True, + figsize=None, grid=None, legend=True, rot=None, + ax=None, fig=None, title=None, xlim=None, ylim=None, + xticks=None, yticks=None, + sort_columns=False, fontsize=None, + secondary_y=False, colormap=None, + table=False, layout=None, **kwds): + + self.data = data + self.by = by + + self.kind = kind + + self.sort_columns = sort_columns + + self.subplots = subplots + + if sharex is None: + if ax is None: + self.sharex = True + else: + # if we get an axis, the users should do the visibility + # setting... + self.sharex = False + else: + self.sharex = sharex + + self.sharey = sharey + self.figsize = figsize + self.layout = layout + + self.xticks = xticks + self.yticks = yticks + self.xlim = xlim + self.ylim = ylim + self.title = title + self.use_index = use_index + + self.fontsize = fontsize + + if rot is not None: + self.rot = rot + # need to know for format_date_labels since it's rotated to 30 by + # default + self._rot_set = True + else: + self._rot_set = False + self.rot = self._default_rot + + if grid is None: + grid = False if secondary_y else self.plt.rcParams['axes.grid'] + + self.grid = grid + self.legend = legend + self.legend_handles = [] + self.legend_labels = [] + + for attr in self._pop_attributes: + value = kwds.pop(attr, self._attr_defaults.get(attr, None)) + setattr(self, attr, value) + + self.ax = ax + self.fig = fig + self.axes = None + + # parse errorbar input if given + xerr = kwds.pop('xerr', None) + yerr = kwds.pop('yerr', None) + self.errors = {} + for kw, err in zip(['xerr', 'yerr'], [xerr, yerr]): + self.errors[kw] = self._parse_errorbars(kw, err) + + if not isinstance(secondary_y, (bool, tuple, list, np.ndarray, Index)): + secondary_y = [secondary_y] + self.secondary_y = secondary_y + + # ugly TypeError if user passes matplotlib's `cmap` name. + # Probably better to accept either. + if 'cmap' in kwds and colormap: + raise TypeError("Only specify one of `cmap` and `colormap`.") + elif 'cmap' in kwds: + self.colormap = kwds.pop('cmap') + else: + self.colormap = colormap + + self.table = table + + self.kwds = kwds + + self._validate_color_args() + + def _validate_color_args(self): + if 'color' not in self.kwds and 'colors' in self.kwds: + warnings.warn(("'colors' is being deprecated. Please use 'color'" + "instead of 'colors'")) + colors = self.kwds.pop('colors') + self.kwds['color'] = colors + + if ('color' in self.kwds and self.nseries == 1): + # support series.plot(color='green') + self.kwds['color'] = [self.kwds['color']] + + if ('color' in self.kwds or 'colors' in self.kwds) and \ + self.colormap is not None: + warnings.warn("'color' and 'colormap' cannot be used " + "simultaneously. Using 'color'") + + if 'color' in self.kwds and self.style is not None: + if is_list_like(self.style): + styles = self.style + else: + styles = [self.style] + # need only a single match + for s in styles: + if re.match('^[a-z]+?', s) is not None: + raise ValueError( + "Cannot pass 'style' string with a color " + "symbol and 'color' keyword argument. Please" + " use one or the other or pass 'style' " + "without a color symbol") + + def _iter_data(self, data=None, keep_index=False, fillna=None): + if data is None: + data = self.data + if fillna is not None: + data = data.fillna(fillna) + + # TODO: unused? + # if self.sort_columns: + # columns = _try_sort(data.columns) + # else: + # columns = data.columns + + for col, values in data.iteritems(): + if keep_index is True: + yield col, values + else: + yield col, values.values + + @property + def nseries(self): + if self.data.ndim == 1: + return 1 + else: + return self.data.shape[1] + + def draw(self): + self.plt.draw_if_interactive() + + def generate(self): + self._args_adjust() + self._compute_plot_data() + self._setup_subplots() + self._make_plot() + self._add_table() + self._make_legend() + self._adorn_subplots() + + for ax in self.axes: + self._post_plot_logic_common(ax, self.data) + self._post_plot_logic(ax, self.data) + + def _args_adjust(self): + pass + + def _has_plotted_object(self, ax): + """check whether ax has data""" + return (len(ax.lines) != 0 or + len(ax.artists) != 0 or + len(ax.containers) != 0) + + def _maybe_right_yaxis(self, ax, axes_num): + if not self.on_right(axes_num): + # secondary axes may be passed via ax kw + return self._get_ax_layer(ax) + + if hasattr(ax, 'right_ax'): + # if it has right_ax proparty, ``ax`` must be left axes + return ax.right_ax + elif hasattr(ax, 'left_ax'): + # if it has left_ax proparty, ``ax`` must be right axes + return ax + else: + # otherwise, create twin axes + orig_ax, new_ax = ax, ax.twinx() + # TODO: use Matplotlib public API when available + new_ax._get_lines = orig_ax._get_lines + new_ax._get_patches_for_fill = orig_ax._get_patches_for_fill + orig_ax.right_ax, new_ax.left_ax = new_ax, orig_ax + + if not self._has_plotted_object(orig_ax): # no data on left y + orig_ax.get_yaxis().set_visible(False) + return new_ax + + def _setup_subplots(self): + if self.subplots: + fig, axes = _subplots(naxes=self.nseries, + sharex=self.sharex, sharey=self.sharey, + figsize=self.figsize, ax=self.ax, + layout=self.layout, + layout_type=self._layout_type) + else: + if self.ax is None: + fig = self.plt.figure(figsize=self.figsize) + axes = fig.add_subplot(111) + else: + fig = self.ax.get_figure() + if self.figsize is not None: + fig.set_size_inches(self.figsize) + axes = self.ax + + axes = _flatten(axes) + + if self.logx or self.loglog: + [a.set_xscale('log') for a in axes] + if self.logy or self.loglog: + [a.set_yscale('log') for a in axes] + + self.fig = fig + self.axes = axes + + @property + def result(self): + """ + Return result axes + """ + if self.subplots: + if self.layout is not None and not is_list_like(self.ax): + return self.axes.reshape(*self.layout) + else: + return self.axes + else: + sec_true = isinstance(self.secondary_y, bool) and self.secondary_y + all_sec = (is_list_like(self.secondary_y) and + len(self.secondary_y) == self.nseries) + if (sec_true or all_sec): + # if all data is plotted on secondary, return right axes + return self._get_ax_layer(self.axes[0], primary=False) + else: + return self.axes[0] + + def _compute_plot_data(self): + data = self.data + + if isinstance(data, Series): + label = self.label + if label is None and data.name is None: + label = 'None' + data = data.to_frame(name=label) + + numeric_data = data._convert(datetime=True)._get_numeric_data() + + try: + is_empty = numeric_data.empty + except AttributeError: + is_empty = not len(numeric_data) + + # no empty frames or series allowed + if is_empty: + raise TypeError('Empty {0!r}: no numeric data to ' + 'plot'.format(numeric_data.__class__.__name__)) + + self.data = numeric_data + + def _make_plot(self): + raise AbstractMethodError(self) + + def _add_table(self): + if self.table is False: + return + elif self.table is True: + data = self.data.transpose() + else: + data = self.table + ax = self._get_ax(0) + table(ax, data) + + def _post_plot_logic_common(self, ax, data): + """Common post process for each axes""" + labels = [pprint_thing(key) for key in data.index] + labels = dict(zip(range(len(data.index)), labels)) + + if self.orientation == 'vertical' or self.orientation is None: + if self._need_to_set_index: + xticklabels = [labels.get(x, '') for x in ax.get_xticks()] + ax.set_xticklabels(xticklabels) + self._apply_axis_properties(ax.xaxis, rot=self.rot, + fontsize=self.fontsize) + self._apply_axis_properties(ax.yaxis, fontsize=self.fontsize) + elif self.orientation == 'horizontal': + if self._need_to_set_index: + yticklabels = [labels.get(y, '') for y in ax.get_yticks()] + ax.set_yticklabels(yticklabels) + self._apply_axis_properties(ax.yaxis, rot=self.rot, + fontsize=self.fontsize) + self._apply_axis_properties(ax.xaxis, fontsize=self.fontsize) + else: # pragma no cover + raise ValueError + + def _post_plot_logic(self, ax, data): + """Post process for each axes. Overridden in child classes""" + pass + + def _adorn_subplots(self): + """Common post process unrelated to data""" + if len(self.axes) > 0: + all_axes = self._get_subplots() + nrows, ncols = self._get_axes_layout() + _handle_shared_axes(axarr=all_axes, nplots=len(all_axes), + naxes=nrows * ncols, nrows=nrows, + ncols=ncols, sharex=self.sharex, + sharey=self.sharey) + + for ax in self.axes: + if self.yticks is not None: + ax.set_yticks(self.yticks) + + if self.xticks is not None: + ax.set_xticks(self.xticks) + + if self.ylim is not None: + ax.set_ylim(self.ylim) + + if self.xlim is not None: + ax.set_xlim(self.xlim) + + ax.grid(self.grid) + + if self.title: + if self.subplots: + if is_list_like(self.title): + if len(self.title) != self.nseries: + msg = ('The length of `title` must equal the number ' + 'of columns if using `title` of type `list` ' + 'and `subplots=True`.\n' + 'length of title = {}\n' + 'number of columns = {}').format( + len(self.title), self.nseries) + raise ValueError(msg) + + for (ax, title) in zip(self.axes, self.title): + ax.set_title(title) + else: + self.fig.suptitle(self.title) + else: + if is_list_like(self.title): + msg = ('Using `title` of type `list` is not supported ' + 'unless `subplots=True` is passed') + raise ValueError(msg) + self.axes[0].set_title(self.title) + + def _apply_axis_properties(self, axis, rot=None, fontsize=None): + labels = axis.get_majorticklabels() + axis.get_minorticklabels() + for label in labels: + if rot is not None: + label.set_rotation(rot) + if fontsize is not None: + label.set_fontsize(fontsize) + + @property + def legend_title(self): + if not isinstance(self.data.columns, MultiIndex): + name = self.data.columns.name + if name is not None: + name = pprint_thing(name) + return name + else: + stringified = map(pprint_thing, + self.data.columns.names) + return ','.join(stringified) + + def _add_legend_handle(self, handle, label, index=None): + if label is not None: + if self.mark_right and index is not None: + if self.on_right(index): + label = label + ' (right)' + self.legend_handles.append(handle) + self.legend_labels.append(label) + + def _make_legend(self): + ax, leg = self._get_ax_legend(self.axes[0]) + + handles = [] + labels = [] + title = '' + + if not self.subplots: + if leg is not None: + title = leg.get_title().get_text() + handles = leg.legendHandles + labels = [x.get_text() for x in leg.get_texts()] + + if self.legend: + if self.legend == 'reverse': + self.legend_handles = reversed(self.legend_handles) + self.legend_labels = reversed(self.legend_labels) + + handles += self.legend_handles + labels += self.legend_labels + if self.legend_title is not None: + title = self.legend_title + + if len(handles) > 0: + ax.legend(handles, labels, loc='best', title=title) + + elif self.subplots and self.legend: + for ax in self.axes: + if ax.get_visible(): + ax.legend(loc='best') + + def _get_ax_legend(self, ax): + leg = ax.get_legend() + other_ax = (getattr(ax, 'left_ax', None) or + getattr(ax, 'right_ax', None)) + other_leg = None + if other_ax is not None: + other_leg = other_ax.get_legend() + if leg is None and other_leg is not None: + leg = other_leg + ax = other_ax + return ax, leg + + @cache_readonly + def plt(self): + import matplotlib.pyplot as plt + return plt + + @staticmethod + def mpl_ge_1_3_1(): + return _mpl_ge_1_3_1() + + @staticmethod + def mpl_ge_1_5_0(): + return _mpl_ge_1_5_0() + + _need_to_set_index = False + + def _get_xticks(self, convert_period=False): + index = self.data.index + is_datetype = index.inferred_type in ('datetime', 'date', + 'datetime64', 'time') + + if self.use_index: + if convert_period and isinstance(index, PeriodIndex): + self.data = self.data.reindex(index=index.sort_values()) + x = self.data.index.to_timestamp()._mpl_repr() + elif index.is_numeric(): + """ + Matplotlib supports numeric values or datetime objects as + xaxis values. Taking LBYL approach here, by the time + matplotlib raises exception when using non numeric/datetime + values for xaxis, several actions are already taken by plt. + """ + x = index._mpl_repr() + elif is_datetype: + self.data = self.data.sort_index() + x = self.data.index._mpl_repr() + else: + self._need_to_set_index = True + x = lrange(len(index)) + else: + x = lrange(len(index)) + + return x + + @classmethod + def _plot(cls, ax, x, y, style=None, is_errorbar=False, **kwds): + mask = isnull(y) + if mask.any(): + y = np.ma.array(y) + y = np.ma.masked_where(mask, y) + + if isinstance(x, Index): + x = x._mpl_repr() + + if is_errorbar: + if 'xerr' in kwds: + kwds['xerr'] = np.array(kwds.get('xerr')) + if 'yerr' in kwds: + kwds['yerr'] = np.array(kwds.get('yerr')) + return ax.errorbar(x, y, **kwds) + else: + # prevent style kwarg from going to errorbar, where it is + # unsupported + if style is not None: + args = (x, y, style) + else: + args = (x, y) + return ax.plot(*args, **kwds) + + def _get_index_name(self): + if isinstance(self.data.index, MultiIndex): + name = self.data.index.names + if any(x is not None for x in name): + name = ','.join([pprint_thing(x) for x in name]) + else: + name = None + else: + name = self.data.index.name + if name is not None: + name = pprint_thing(name) + + return name + + @classmethod + def _get_ax_layer(cls, ax, primary=True): + """get left (primary) or right (secondary) axes""" + if primary: + return getattr(ax, 'left_ax', ax) + else: + return getattr(ax, 'right_ax', ax) + + def _get_ax(self, i): + # get the twinx ax if appropriate + if self.subplots: + ax = self.axes[i] + ax = self._maybe_right_yaxis(ax, i) + self.axes[i] = ax + else: + ax = self.axes[0] + ax = self._maybe_right_yaxis(ax, i) + + ax.get_yaxis().set_visible(True) + return ax + + def on_right(self, i): + if isinstance(self.secondary_y, bool): + return self.secondary_y + + if isinstance(self.secondary_y, (tuple, list, np.ndarray, Index)): + return self.data.columns[i] in self.secondary_y + + def _apply_style_colors(self, colors, kwds, col_num, label): + """ + Manage style and color based on column number and its label. + Returns tuple of appropriate style and kwds which "color" may be added. + """ + style = None + if self.style is not None: + if isinstance(self.style, list): + try: + style = self.style[col_num] + except IndexError: + pass + elif isinstance(self.style, dict): + style = self.style.get(label, style) + else: + style = self.style + + has_color = 'color' in kwds or self.colormap is not None + nocolor_style = style is None or re.match('[a-z]+', style) is None + if (has_color or self.subplots) and nocolor_style: + kwds['color'] = colors[col_num % len(colors)] + return style, kwds + + def _get_colors(self, num_colors=None, color_kwds='color'): + if num_colors is None: + num_colors = self.nseries + + return _get_standard_colors(num_colors=num_colors, + colormap=self.colormap, + color=self.kwds.get(color_kwds)) + + def _parse_errorbars(self, label, err): + """ + Look for error keyword arguments and return the actual errorbar data + or return the error DataFrame/dict + + Error bars can be specified in several ways: + Series: the user provides a pandas.Series object of the same + length as the data + ndarray: provides a np.ndarray of the same length as the data + DataFrame/dict: error values are paired with keys matching the + key in the plotted DataFrame + str: the name of the column within the plotted DataFrame + """ + + if err is None: + return None + + from pandas import DataFrame, Series + + def match_labels(data, e): + e = e.reindex_axis(data.index) + return e + + # key-matched DataFrame + if isinstance(err, DataFrame): + + err = match_labels(self.data, err) + # key-matched dict + elif isinstance(err, dict): + pass + + # Series of error values + elif isinstance(err, Series): + # broadcast error series across data + err = match_labels(self.data, err) + err = np.atleast_2d(err) + err = np.tile(err, (self.nseries, 1)) + + # errors are a column in the dataframe + elif isinstance(err, string_types): + evalues = self.data[err].values + self.data = self.data[self.data.columns.drop(err)] + err = np.atleast_2d(evalues) + err = np.tile(err, (self.nseries, 1)) + + elif is_list_like(err): + if is_iterator(err): + err = np.atleast_2d(list(err)) + else: + # raw error values + err = np.atleast_2d(err) + + err_shape = err.shape + + # asymmetrical error bars + if err.ndim == 3: + if (err_shape[0] != self.nseries) or \ + (err_shape[1] != 2) or \ + (err_shape[2] != len(self.data)): + msg = "Asymmetrical error bars should be provided " + \ + "with the shape (%u, 2, %u)" % \ + (self.nseries, len(self.data)) + raise ValueError(msg) + + # broadcast errors to each data series + if len(err) == 1: + err = np.tile(err, (self.nseries, 1)) + + elif is_number(err): + err = np.tile([err], (self.nseries, len(self.data))) + + else: + msg = "No valid %s detected" % label + raise ValueError(msg) + + return err + + def _get_errorbars(self, label=None, index=None, xerr=True, yerr=True): + from pandas import DataFrame + errors = {} + + for kw, flag in zip(['xerr', 'yerr'], [xerr, yerr]): + if flag: + err = self.errors[kw] + # user provided label-matched dataframe of errors + if isinstance(err, (DataFrame, dict)): + if label is not None and label in err.keys(): + err = err[label] + else: + err = None + elif index is not None and err is not None: + err = err[index] + + if err is not None: + errors[kw] = err + return errors + + def _get_subplots(self): + from matplotlib.axes import Subplot + return [ax for ax in self.axes[0].get_figure().get_axes() + if isinstance(ax, Subplot)] + + def _get_axes_layout(self): + axes = self._get_subplots() + x_set = set() + y_set = set() + for ax in axes: + # check axes coordinates to estimate layout + points = ax.get_position().get_points() + x_set.add(points[0][0]) + y_set.add(points[0][1]) + return (len(y_set), len(x_set)) + + +class PlanePlot(MPLPlot): + """ + Abstract class for plotting on plane, currently scatter and hexbin. + """ + + _layout_type = 'single' + + def __init__(self, data, x, y, **kwargs): + MPLPlot.__init__(self, data, **kwargs) + if x is None or y is None: + raise ValueError(self._kind + ' requires and x and y column') + if is_integer(x) and not self.data.columns.holds_integer(): + x = self.data.columns[x] + if is_integer(y) and not self.data.columns.holds_integer(): + y = self.data.columns[y] + self.x = x + self.y = y + + @property + def nseries(self): + return 1 + + def _post_plot_logic(self, ax, data): + x, y = self.x, self.y + ax.set_ylabel(pprint_thing(y)) + ax.set_xlabel(pprint_thing(x)) + + +class ScatterPlot(PlanePlot): + _kind = 'scatter' + + def __init__(self, data, x, y, s=None, c=None, **kwargs): + if s is None: + # hide the matplotlib default for size, in case we want to change + # the handling of this argument later + s = 20 + super(ScatterPlot, self).__init__(data, x, y, s=s, **kwargs) + if is_integer(c) and not self.data.columns.holds_integer(): + c = self.data.columns[c] + self.c = c + + def _make_plot(self): + x, y, c, data = self.x, self.y, self.c, self.data + ax = self.axes[0] + + c_is_column = is_hashable(c) and c in self.data.columns + + # plot a colorbar only if a colormap is provided or necessary + cb = self.kwds.pop('colorbar', self.colormap or c_is_column) + + # pandas uses colormap, matplotlib uses cmap. + cmap = self.colormap or 'Greys' + cmap = self.plt.cm.get_cmap(cmap) + color = self.kwds.pop("color", None) + if c is not None and color is not None: + raise TypeError('Specify exactly one of `c` and `color`') + elif c is None and color is None: + c_values = self.plt.rcParams['patch.facecolor'] + elif color is not None: + c_values = color + elif c_is_column: + c_values = self.data[c].values + else: + c_values = c + + if self.legend and hasattr(self, 'label'): + label = self.label + else: + label = None + scatter = ax.scatter(data[x].values, data[y].values, c=c_values, + label=label, cmap=cmap, **self.kwds) + if cb: + img = ax.collections[0] + kws = dict(ax=ax) + if self.mpl_ge_1_3_1(): + kws['label'] = c if c_is_column else '' + self.fig.colorbar(img, **kws) + + if label is not None: + self._add_legend_handle(scatter, label) + else: + self.legend = False + + errors_x = self._get_errorbars(label=x, index=0, yerr=False) + errors_y = self._get_errorbars(label=y, index=0, xerr=False) + if len(errors_x) > 0 or len(errors_y) > 0: + err_kwds = dict(errors_x, **errors_y) + err_kwds['ecolor'] = scatter.get_facecolor()[0] + ax.errorbar(data[x].values, data[y].values, + linestyle='none', **err_kwds) + + +class HexBinPlot(PlanePlot): + _kind = 'hexbin' + + def __init__(self, data, x, y, C=None, **kwargs): + super(HexBinPlot, self).__init__(data, x, y, **kwargs) + if is_integer(C) and not self.data.columns.holds_integer(): + C = self.data.columns[C] + self.C = C + + def _make_plot(self): + x, y, data, C = self.x, self.y, self.data, self.C + ax = self.axes[0] + # pandas uses colormap, matplotlib uses cmap. + cmap = self.colormap or 'BuGn' + cmap = self.plt.cm.get_cmap(cmap) + cb = self.kwds.pop('colorbar', True) + + if C is None: + c_values = None + else: + c_values = data[C].values + + ax.hexbin(data[x].values, data[y].values, C=c_values, cmap=cmap, + **self.kwds) + if cb: + img = ax.collections[0] + self.fig.colorbar(img, ax=ax) + + def _make_legend(self): + pass + + +class LinePlot(MPLPlot): + _kind = 'line' + _default_rot = 0 + orientation = 'vertical' + + def __init__(self, data, **kwargs): + MPLPlot.__init__(self, data, **kwargs) + if self.stacked: + self.data = self.data.fillna(value=0) + self.x_compat = plot_params['x_compat'] + if 'x_compat' in self.kwds: + self.x_compat = bool(self.kwds.pop('x_compat')) + + def _is_ts_plot(self): + # this is slightly deceptive + return not self.x_compat and self.use_index and self._use_dynamic_x() + + def _use_dynamic_x(self): + from pandas.plotting._timeseries import _use_dynamic_x + return _use_dynamic_x(self._get_ax(0), self.data) + + def _make_plot(self): + if self._is_ts_plot(): + from pandas.plotting._timeseries import _maybe_convert_index + data = _maybe_convert_index(self._get_ax(0), self.data) + + x = data.index # dummy, not used + plotf = self._ts_plot + it = self._iter_data(data=data, keep_index=True) + else: + x = self._get_xticks(convert_period=True) + plotf = self._plot + it = self._iter_data() + + stacking_id = self._get_stacking_id() + is_errorbar = any(e is not None for e in self.errors.values()) + + colors = self._get_colors() + for i, (label, y) in enumerate(it): + ax = self._get_ax(i) + kwds = self.kwds.copy() + style, kwds = self._apply_style_colors(colors, kwds, i, label) + + errors = self._get_errorbars(label=label, index=i) + kwds = dict(kwds, **errors) + + label = pprint_thing(label) # .encode('utf-8') + kwds['label'] = label + + newlines = plotf(ax, x, y, style=style, column_num=i, + stacking_id=stacking_id, + is_errorbar=is_errorbar, + **kwds) + self._add_legend_handle(newlines[0], label, index=i) + + lines = _get_all_lines(ax) + left, right = _get_xlim(lines) + ax.set_xlim(left, right) + + @classmethod + def _plot(cls, ax, x, y, style=None, column_num=None, + stacking_id=None, **kwds): + # column_num is used to get the target column from protf in line and + # area plots + if column_num == 0: + cls._initialize_stacker(ax, stacking_id, len(y)) + y_values = cls._get_stacked_values(ax, stacking_id, y, kwds['label']) + lines = MPLPlot._plot(ax, x, y_values, style=style, **kwds) + cls._update_stacker(ax, stacking_id, y) + return lines + + @classmethod + def _ts_plot(cls, ax, x, data, style=None, **kwds): + from pandas.plotting._timeseries import (_maybe_resample, + _decorate_axes, + format_dateaxis) + # accept x to be consistent with normal plot func, + # x is not passed to tsplot as it uses data.index as x coordinate + # column_num must be in kwds for stacking purpose + freq, data = _maybe_resample(data, ax, kwds) + + # Set ax with freq info + _decorate_axes(ax, freq, kwds) + # digging deeper + if hasattr(ax, 'left_ax'): + _decorate_axes(ax.left_ax, freq, kwds) + if hasattr(ax, 'right_ax'): + _decorate_axes(ax.right_ax, freq, kwds) + ax._plot_data.append((data, cls._kind, kwds)) + + lines = cls._plot(ax, data.index, data.values, style=style, **kwds) + # set date formatter, locators and rescale limits + format_dateaxis(ax, ax.freq, data.index) + return lines + + def _get_stacking_id(self): + if self.stacked: + return id(self.data) + else: + return None + + @classmethod + def _initialize_stacker(cls, ax, stacking_id, n): + if stacking_id is None: + return + if not hasattr(ax, '_stacker_pos_prior'): + ax._stacker_pos_prior = {} + if not hasattr(ax, '_stacker_neg_prior'): + ax._stacker_neg_prior = {} + ax._stacker_pos_prior[stacking_id] = np.zeros(n) + ax._stacker_neg_prior[stacking_id] = np.zeros(n) + + @classmethod + def _get_stacked_values(cls, ax, stacking_id, values, label): + if stacking_id is None: + return values + if not hasattr(ax, '_stacker_pos_prior'): + # stacker may not be initialized for subplots + cls._initialize_stacker(ax, stacking_id, len(values)) + + if (values >= 0).all(): + return ax._stacker_pos_prior[stacking_id] + values + elif (values <= 0).all(): + return ax._stacker_neg_prior[stacking_id] + values + + raise ValueError('When stacked is True, each column must be either ' + 'all positive or negative.' + '{0} contains both positive and negative values' + .format(label)) + + @classmethod + def _update_stacker(cls, ax, stacking_id, values): + if stacking_id is None: + return + if (values >= 0).all(): + ax._stacker_pos_prior[stacking_id] += values + elif (values <= 0).all(): + ax._stacker_neg_prior[stacking_id] += values + + def _post_plot_logic(self, ax, data): + condition = (not self._use_dynamic_x() and + data.index.is_all_dates and + not self.subplots or + (self.subplots and self.sharex)) + + index_name = self._get_index_name() + + if condition: + # irregular TS rotated 30 deg. by default + # probably a better place to check / set this. + if not self._rot_set: + self.rot = 30 + format_date_labels(ax, rot=self.rot) + + if index_name is not None and self.use_index: + ax.set_xlabel(index_name) + + +class AreaPlot(LinePlot): + _kind = 'area' + + def __init__(self, data, **kwargs): + kwargs.setdefault('stacked', True) + data = data.fillna(value=0) + LinePlot.__init__(self, data, **kwargs) + + if not self.stacked: + # use smaller alpha to distinguish overlap + self.kwds.setdefault('alpha', 0.5) + + if self.logy or self.loglog: + raise ValueError("Log-y scales are not supported in area plot") + + @classmethod + def _plot(cls, ax, x, y, style=None, column_num=None, + stacking_id=None, is_errorbar=False, **kwds): + + if column_num == 0: + cls._initialize_stacker(ax, stacking_id, len(y)) + y_values = cls._get_stacked_values(ax, stacking_id, y, kwds['label']) + + # need to remove label, because subplots uses mpl legend as it is + line_kwds = kwds.copy() + if cls.mpl_ge_1_5_0(): + line_kwds.pop('label') + lines = MPLPlot._plot(ax, x, y_values, style=style, **line_kwds) + + # get data from the line to get coordinates for fill_between + xdata, y_values = lines[0].get_data(orig=False) + + # unable to use ``_get_stacked_values`` here to get starting point + if stacking_id is None: + start = np.zeros(len(y)) + elif (y >= 0).all(): + start = ax._stacker_pos_prior[stacking_id] + elif (y <= 0).all(): + start = ax._stacker_neg_prior[stacking_id] + else: + start = np.zeros(len(y)) + + if 'color' not in kwds: + kwds['color'] = lines[0].get_color() + + rect = ax.fill_between(xdata, start, y_values, **kwds) + cls._update_stacker(ax, stacking_id, y) + + # LinePlot expects list of artists + res = [rect] if cls.mpl_ge_1_5_0() else lines + return res + + def _add_legend_handle(self, handle, label, index=None): + if not self.mpl_ge_1_5_0(): + from matplotlib.patches import Rectangle + # Because fill_between isn't supported in legend, + # specifically add Rectangle handle here + alpha = self.kwds.get('alpha', None) + handle = Rectangle((0, 0), 1, 1, fc=handle.get_color(), + alpha=alpha) + LinePlot._add_legend_handle(self, handle, label, index=index) + + def _post_plot_logic(self, ax, data): + LinePlot._post_plot_logic(self, ax, data) + + if self.ylim is None: + if (data >= 0).all().all(): + ax.set_ylim(0, None) + elif (data <= 0).all().all(): + ax.set_ylim(None, 0) + + +class BarPlot(MPLPlot): + _kind = 'bar' + _default_rot = 90 + orientation = 'vertical' + + def __init__(self, data, **kwargs): + self.bar_width = kwargs.pop('width', 0.5) + pos = kwargs.pop('position', 0.5) + kwargs.setdefault('align', 'center') + self.tick_pos = np.arange(len(data)) + + self.bottom = kwargs.pop('bottom', 0) + self.left = kwargs.pop('left', 0) + + self.log = kwargs.pop('log', False) + MPLPlot.__init__(self, data, **kwargs) + + if self.stacked or self.subplots: + self.tickoffset = self.bar_width * pos + if kwargs['align'] == 'edge': + self.lim_offset = self.bar_width / 2 + else: + self.lim_offset = 0 + else: + if kwargs['align'] == 'edge': + w = self.bar_width / self.nseries + self.tickoffset = self.bar_width * (pos - 0.5) + w * 0.5 + self.lim_offset = w * 0.5 + else: + self.tickoffset = self.bar_width * pos + self.lim_offset = 0 + + self.ax_pos = self.tick_pos - self.tickoffset + + def _args_adjust(self): + if is_list_like(self.bottom): + self.bottom = np.array(self.bottom) + if is_list_like(self.left): + self.left = np.array(self.left) + + @classmethod + def _plot(cls, ax, x, y, w, start=0, log=False, **kwds): + return ax.bar(x, y, w, bottom=start, log=log, **kwds) + + @property + def _start_base(self): + return self.bottom + + def _make_plot(self): + import matplotlib as mpl + + colors = self._get_colors() + ncolors = len(colors) + + pos_prior = neg_prior = np.zeros(len(self.data)) + K = self.nseries + + for i, (label, y) in enumerate(self._iter_data(fillna=0)): + ax = self._get_ax(i) + kwds = self.kwds.copy() + kwds['color'] = colors[i % ncolors] + + errors = self._get_errorbars(label=label, index=i) + kwds = dict(kwds, **errors) + + label = pprint_thing(label) + + if (('yerr' in kwds) or ('xerr' in kwds)) \ + and (kwds.get('ecolor') is None): + kwds['ecolor'] = mpl.rcParams['xtick.color'] + + start = 0 + if self.log and (y >= 1).all(): + start = 1 + start = start + self._start_base + + if self.subplots: + w = self.bar_width / 2 + rect = self._plot(ax, self.ax_pos + w, y, self.bar_width, + start=start, label=label, + log=self.log, **kwds) + ax.set_title(label) + elif self.stacked: + mask = y > 0 + start = np.where(mask, pos_prior, neg_prior) + self._start_base + w = self.bar_width / 2 + rect = self._plot(ax, self.ax_pos + w, y, self.bar_width, + start=start, label=label, + log=self.log, **kwds) + pos_prior = pos_prior + np.where(mask, y, 0) + neg_prior = neg_prior + np.where(mask, 0, y) + else: + w = self.bar_width / K + rect = self._plot(ax, self.ax_pos + (i + 0.5) * w, y, w, + start=start, label=label, + log=self.log, **kwds) + self._add_legend_handle(rect, label, index=i) + + def _post_plot_logic(self, ax, data): + if self.use_index: + str_index = [pprint_thing(key) for key in data.index] + else: + str_index = [pprint_thing(key) for key in range(data.shape[0])] + name = self._get_index_name() + + s_edge = self.ax_pos[0] - 0.25 + self.lim_offset + e_edge = self.ax_pos[-1] + 0.25 + self.bar_width + self.lim_offset + + self._decorate_ticks(ax, name, str_index, s_edge, e_edge) + + def _decorate_ticks(self, ax, name, ticklabels, start_edge, end_edge): + ax.set_xlim((start_edge, end_edge)) + ax.set_xticks(self.tick_pos) + ax.set_xticklabels(ticklabels) + if name is not None and self.use_index: + ax.set_xlabel(name) + + +class BarhPlot(BarPlot): + _kind = 'barh' + _default_rot = 0 + orientation = 'horizontal' + + @property + def _start_base(self): + return self.left + + @classmethod + def _plot(cls, ax, x, y, w, start=0, log=False, **kwds): + return ax.barh(x, y, w, left=start, log=log, **kwds) + + def _decorate_ticks(self, ax, name, ticklabels, start_edge, end_edge): + # horizontal bars + ax.set_ylim((start_edge, end_edge)) + ax.set_yticks(self.tick_pos) + ax.set_yticklabels(ticklabels) + if name is not None and self.use_index: + ax.set_ylabel(name) + + +class HistPlot(LinePlot): + _kind = 'hist' + + def __init__(self, data, bins=10, bottom=0, **kwargs): + self.bins = bins # use mpl default + self.bottom = bottom + # Do not call LinePlot.__init__ which may fill nan + MPLPlot.__init__(self, data, **kwargs) + + def _args_adjust(self): + if is_integer(self.bins): + # create common bin edge + values = (self.data._convert(datetime=True)._get_numeric_data()) + values = np.ravel(values) + values = values[~isnull(values)] + + hist, self.bins = np.histogram( + values, bins=self.bins, + range=self.kwds.get('range', None), + weights=self.kwds.get('weights', None)) + + if is_list_like(self.bottom): + self.bottom = np.array(self.bottom) + + @classmethod + def _plot(cls, ax, y, style=None, bins=None, bottom=0, column_num=0, + stacking_id=None, **kwds): + if column_num == 0: + cls._initialize_stacker(ax, stacking_id, len(bins) - 1) + y = y[~isnull(y)] + + base = np.zeros(len(bins) - 1) + bottom = bottom + \ + cls._get_stacked_values(ax, stacking_id, base, kwds['label']) + # ignore style + n, bins, patches = ax.hist(y, bins=bins, bottom=bottom, **kwds) + cls._update_stacker(ax, stacking_id, n) + return patches + + def _make_plot(self): + colors = self._get_colors() + stacking_id = self._get_stacking_id() + + for i, (label, y) in enumerate(self._iter_data()): + ax = self._get_ax(i) + + kwds = self.kwds.copy() + + label = pprint_thing(label) + kwds['label'] = label + + style, kwds = self._apply_style_colors(colors, kwds, i, label) + if style is not None: + kwds['style'] = style + + kwds = self._make_plot_keywords(kwds, y) + artists = self._plot(ax, y, column_num=i, + stacking_id=stacking_id, **kwds) + self._add_legend_handle(artists[0], label, index=i) + + def _make_plot_keywords(self, kwds, y): + """merge BoxPlot/KdePlot properties to passed kwds""" + # y is required for KdePlot + kwds['bottom'] = self.bottom + kwds['bins'] = self.bins + return kwds + + def _post_plot_logic(self, ax, data): + if self.orientation == 'horizontal': + ax.set_xlabel('Frequency') + else: + ax.set_ylabel('Frequency') + + @property + def orientation(self): + if self.kwds.get('orientation', None) == 'horizontal': + return 'horizontal' + else: + return 'vertical' + + +class KdePlot(HistPlot): + _kind = 'kde' + orientation = 'vertical' + + def __init__(self, data, bw_method=None, ind=None, **kwargs): + MPLPlot.__init__(self, data, **kwargs) + self.bw_method = bw_method + self.ind = ind + + def _args_adjust(self): + pass + + def _get_ind(self, y): + if self.ind is None: + # np.nanmax() and np.nanmin() ignores the missing values + sample_range = np.nanmax(y) - np.nanmin(y) + ind = np.linspace(np.nanmin(y) - 0.5 * sample_range, + np.nanmax(y) + 0.5 * sample_range, 1000) + else: + ind = self.ind + return ind + + @classmethod + def _plot(cls, ax, y, style=None, bw_method=None, ind=None, + column_num=None, stacking_id=None, **kwds): + from scipy.stats import gaussian_kde + from scipy import __version__ as spv + + y = remove_na(y) + + if LooseVersion(spv) >= '0.11.0': + gkde = gaussian_kde(y, bw_method=bw_method) + else: + gkde = gaussian_kde(y) + if bw_method is not None: + msg = ('bw_method was added in Scipy 0.11.0.' + + ' Scipy version in use is %s.' % spv) + warnings.warn(msg) + + y = gkde.evaluate(ind) + lines = MPLPlot._plot(ax, ind, y, style=style, **kwds) + return lines + + def _make_plot_keywords(self, kwds, y): + kwds['bw_method'] = self.bw_method + kwds['ind'] = self._get_ind(y) + return kwds + + def _post_plot_logic(self, ax, data): + ax.set_ylabel('Density') + + +class PiePlot(MPLPlot): + _kind = 'pie' + _layout_type = 'horizontal' + + def __init__(self, data, kind=None, **kwargs): + data = data.fillna(value=0) + if (data < 0).any().any(): + raise ValueError("{0} doesn't allow negative values".format(kind)) + MPLPlot.__init__(self, data, kind=kind, **kwargs) + + def _args_adjust(self): + self.grid = False + self.logy = False + self.logx = False + self.loglog = False + + def _validate_color_args(self): + pass + + def _make_plot(self): + colors = self._get_colors( + num_colors=len(self.data), color_kwds='colors') + self.kwds.setdefault('colors', colors) + + for i, (label, y) in enumerate(self._iter_data()): + ax = self._get_ax(i) + if label is not None: + label = pprint_thing(label) + ax.set_ylabel(label) + + kwds = self.kwds.copy() + + def blank_labeler(label, value): + if value == 0: + return '' + else: + return label + + idx = [pprint_thing(v) for v in self.data.index] + labels = kwds.pop('labels', idx) + # labels is used for each wedge's labels + # Blank out labels for values of 0 so they don't overlap + # with nonzero wedges + if labels is not None: + blabels = [blank_labeler(l, value) for + l, value in zip(labels, y)] + else: + blabels = None + results = ax.pie(y, labels=blabels, **kwds) + + if kwds.get('autopct', None) is not None: + patches, texts, autotexts = results + else: + patches, texts = results + autotexts = [] + + if self.fontsize is not None: + for t in texts + autotexts: + t.set_fontsize(self.fontsize) + + # leglabels is used for legend labels + leglabels = labels if labels is not None else idx + for p, l in zip(patches, leglabels): + self._add_legend_handle(p, l) + + +class BoxPlot(LinePlot): + _kind = 'box' + _layout_type = 'horizontal' + + _valid_return_types = (None, 'axes', 'dict', 'both') + # namedtuple to hold results + BP = namedtuple("Boxplot", ['ax', 'lines']) + + def __init__(self, data, return_type='axes', **kwargs): + # Do not call LinePlot.__init__ which may fill nan + if return_type not in self._valid_return_types: + raise ValueError( + "return_type must be {None, 'axes', 'dict', 'both'}") + + self.return_type = return_type + MPLPlot.__init__(self, data, **kwargs) + + def _args_adjust(self): + if self.subplots: + # Disable label ax sharing. Otherwise, all subplots shows last + # column label + if self.orientation == 'vertical': + self.sharex = False + else: + self.sharey = False + + @classmethod + def _plot(cls, ax, y, column_num=None, return_type='axes', **kwds): + if y.ndim == 2: + y = [remove_na(v) for v in y] + # Boxplot fails with empty arrays, so need to add a NaN + # if any cols are empty + # GH 8181 + y = [v if v.size > 0 else np.array([np.nan]) for v in y] + else: + y = remove_na(y) + bp = ax.boxplot(y, **kwds) + + if return_type == 'dict': + return bp, bp + elif return_type == 'both': + return cls.BP(ax=ax, lines=bp), bp + else: + return ax, bp + + def _validate_color_args(self): + if 'color' in self.kwds: + if self.colormap is not None: + warnings.warn("'color' and 'colormap' cannot be used " + "simultaneously. Using 'color'") + self.color = self.kwds.pop('color') + + if isinstance(self.color, dict): + valid_keys = ['boxes', 'whiskers', 'medians', 'caps'] + for key, values in compat.iteritems(self.color): + if key not in valid_keys: + raise ValueError("color dict contains invalid " + "key '{0}' " + "The key must be either {1}" + .format(key, valid_keys)) + else: + self.color = None + + # get standard colors for default + colors = _get_standard_colors(num_colors=3, + colormap=self.colormap, + color=None) + # use 2 colors by default, for box/whisker and median + # flier colors isn't needed here + # because it can be specified by ``sym`` kw + self._boxes_c = colors[0] + self._whiskers_c = colors[0] + self._medians_c = colors[2] + self._caps_c = 'k' # mpl default + + def _get_colors(self, num_colors=None, color_kwds='color'): + pass + + def maybe_color_bp(self, bp): + if isinstance(self.color, dict): + boxes = self.color.get('boxes', self._boxes_c) + whiskers = self.color.get('whiskers', self._whiskers_c) + medians = self.color.get('medians', self._medians_c) + caps = self.color.get('caps', self._caps_c) + else: + # Other types are forwarded to matplotlib + # If None, use default colors + boxes = self.color or self._boxes_c + whiskers = self.color or self._whiskers_c + medians = self.color or self._medians_c + caps = self.color or self._caps_c + + from matplotlib.artist import setp + setp(bp['boxes'], color=boxes, alpha=1) + setp(bp['whiskers'], color=whiskers, alpha=1) + setp(bp['medians'], color=medians, alpha=1) + setp(bp['caps'], color=caps, alpha=1) + + def _make_plot(self): + if self.subplots: + self._return_obj = Series() + + for i, (label, y) in enumerate(self._iter_data()): + ax = self._get_ax(i) + kwds = self.kwds.copy() + + ret, bp = self._plot(ax, y, column_num=i, + return_type=self.return_type, **kwds) + self.maybe_color_bp(bp) + self._return_obj[label] = ret + + label = [pprint_thing(label)] + self._set_ticklabels(ax, label) + else: + y = self.data.values.T + ax = self._get_ax(0) + kwds = self.kwds.copy() + + ret, bp = self._plot(ax, y, column_num=0, + return_type=self.return_type, **kwds) + self.maybe_color_bp(bp) + self._return_obj = ret + + labels = [l for l, _ in self._iter_data()] + labels = [pprint_thing(l) for l in labels] + if not self.use_index: + labels = [pprint_thing(key) for key in range(len(labels))] + self._set_ticklabels(ax, labels) + + def _set_ticklabels(self, ax, labels): + if self.orientation == 'vertical': + ax.set_xticklabels(labels) + else: + ax.set_yticklabels(labels) + + def _make_legend(self): + pass + + def _post_plot_logic(self, ax, data): + pass + + @property + def orientation(self): + if self.kwds.get('vert', True): + return 'vertical' + else: + return 'horizontal' + + @property + def result(self): + if self.return_type is None: + return super(BoxPlot, self).result + else: + return self._return_obj + + +# kinds supported by both dataframe and series +_common_kinds = ['line', 'bar', 'barh', + 'kde', 'density', 'area', 'hist', 'box'] +# kinds supported by dataframe +_dataframe_kinds = ['scatter', 'hexbin'] +# kinds supported only by series or dataframe single column +_series_kinds = ['pie'] +_all_kinds = _common_kinds + _dataframe_kinds + _series_kinds + +_klasses = [LinePlot, BarPlot, BarhPlot, KdePlot, HistPlot, BoxPlot, + ScatterPlot, HexBinPlot, AreaPlot, PiePlot] + +_plot_klass = {} +for klass in _klasses: + _plot_klass[klass._kind] = klass + + +def _plot(data, x=None, y=None, subplots=False, + ax=None, kind='line', **kwds): + kind = _get_standard_kind(kind.lower().strip()) + if kind in _all_kinds: + klass = _plot_klass[kind] + else: + raise ValueError("%r is not a valid plot kind" % kind) + + from pandas import DataFrame + if kind in _dataframe_kinds: + if isinstance(data, DataFrame): + plot_obj = klass(data, x=x, y=y, subplots=subplots, ax=ax, + kind=kind, **kwds) + else: + raise ValueError("plot kind %r can only be used for data frames" + % kind) + + elif kind in _series_kinds: + if isinstance(data, DataFrame): + if y is None and subplots is False: + msg = "{0} requires either y column or 'subplots=True'" + raise ValueError(msg.format(kind)) + elif y is not None: + if is_integer(y) and not data.columns.holds_integer(): + y = data.columns[y] + # converted to series actually. copy to not modify + data = data[y].copy() + data.index.name = y + plot_obj = klass(data, subplots=subplots, ax=ax, kind=kind, **kwds) + else: + if isinstance(data, DataFrame): + if x is not None: + if is_integer(x) and not data.columns.holds_integer(): + x = data.columns[x] + data = data.set_index(x) + + if y is not None: + if is_integer(y) and not data.columns.holds_integer(): + y = data.columns[y] + label = kwds['label'] if 'label' in kwds else y + series = data[y].copy() # Don't modify + series.name = label + + for kw in ['xerr', 'yerr']: + if (kw in kwds) and \ + (isinstance(kwds[kw], string_types) or + is_integer(kwds[kw])): + try: + kwds[kw] = data[kwds[kw]] + except (IndexError, KeyError, TypeError): + pass + data = series + plot_obj = klass(data, subplots=subplots, ax=ax, kind=kind, **kwds) + + plot_obj.generate() + plot_obj.draw() + return plot_obj.result + + +df_kind = """- 'scatter' : scatter plot + - 'hexbin' : hexbin plot""" +series_kind = "" + +df_coord = """x : label or position, default None + y : label or position, default None + Allows plotting of one column versus another""" +series_coord = "" + +df_unique = """stacked : boolean, default False in line and + bar plots, and True in area plot. If True, create stacked plot. + sort_columns : boolean, default False + Sort column names to determine plot ordering + secondary_y : boolean or sequence, default False + Whether to plot on the secondary y-axis + If a list/tuple, which columns to plot on secondary y-axis""" +series_unique = """label : label argument to provide to plot + secondary_y : boolean or sequence of ints, default False + If True then y-axis will be on the right""" + +df_ax = """ax : matplotlib axes object, default None + subplots : boolean, default False + Make separate subplots for each column + sharex : boolean, default True if ax is None else False + In case subplots=True, share x axis and set some x axis labels to + invisible; defaults to True if ax is None otherwise False if an ax + is passed in; Be aware, that passing in both an ax and sharex=True + will alter all x axis labels for all axis in a figure! + sharey : boolean, default False + In case subplots=True, share y axis and set some y axis labels to + invisible + layout : tuple (optional) + (rows, columns) for the layout of subplots""" +series_ax = """ax : matplotlib axes object + If not passed, uses gca()""" + +df_note = """- If `kind` = 'scatter' and the argument `c` is the name of a dataframe + column, the values of that column are used to color each point. + - If `kind` = 'hexbin', you can control the size of the bins with the + `gridsize` argument. By default, a histogram of the counts around each + `(x, y)` point is computed. You can specify alternative aggregations + by passing values to the `C` and `reduce_C_function` arguments. + `C` specifies the value at each `(x, y)` point and `reduce_C_function` + is a function of one argument that reduces all the values in a bin to + a single number (e.g. `mean`, `max`, `sum`, `std`).""" +series_note = "" + +_shared_doc_df_kwargs = dict(klass='DataFrame', klass_obj='df', + klass_kind=df_kind, klass_coord=df_coord, + klass_ax=df_ax, klass_unique=df_unique, + klass_note=df_note) +_shared_doc_series_kwargs = dict(klass='Series', klass_obj='s', + klass_kind=series_kind, + klass_coord=series_coord, klass_ax=series_ax, + klass_unique=series_unique, + klass_note=series_note) + +_shared_docs['plot'] = """ + Make plots of %(klass)s using matplotlib / pylab. + + *New in version 0.17.0:* Each plot kind has a corresponding method on the + ``%(klass)s.plot`` accessor: + ``%(klass_obj)s.plot(kind='line')`` is equivalent to + ``%(klass_obj)s.plot.line()``. + + Parameters + ---------- + data : %(klass)s + %(klass_coord)s + kind : str + - 'line' : line plot (default) + - 'bar' : vertical bar plot + - 'barh' : horizontal bar plot + - 'hist' : histogram + - 'box' : boxplot + - 'kde' : Kernel Density Estimation plot + - 'density' : same as 'kde' + - 'area' : area plot + - 'pie' : pie plot + %(klass_kind)s + %(klass_ax)s + figsize : a tuple (width, height) in inches + use_index : boolean, default True + Use index as ticks for x axis + title : string or list + Title to use for the plot. If a string is passed, print the string at + the top of the figure. If a list is passed and `subplots` is True, + print each item in the list above the corresponding subplot. + grid : boolean, default None (matlab style default) + Axis grid lines + legend : False/True/'reverse' + Place legend on axis subplots + style : list or dict + matplotlib line style per column + logx : boolean, default False + Use log scaling on x axis + logy : boolean, default False + Use log scaling on y axis + loglog : boolean, default False + Use log scaling on both x and y axes + xticks : sequence + Values to use for the xticks + yticks : sequence + Values to use for the yticks + xlim : 2-tuple/list + ylim : 2-tuple/list + rot : int, default None + Rotation for ticks (xticks for vertical, yticks for horizontal plots) + fontsize : int, default None + Font size for xticks and yticks + colormap : str or matplotlib colormap object, default None + Colormap to select colors from. If string, load colormap with that name + from matplotlib. + colorbar : boolean, optional + If True, plot colorbar (only relevant for 'scatter' and 'hexbin' plots) + position : float + Specify relative alignments for bar plot layout. + From 0 (left/bottom-end) to 1 (right/top-end). Default is 0.5 (center) + layout : tuple (optional) + (rows, columns) for the layout of the plot + table : boolean, Series or DataFrame, default False + If True, draw a table using the data in the DataFrame and the data will + be transposed to meet matplotlib's default layout. + If a Series or DataFrame is passed, use passed data to draw a table. + yerr : DataFrame, Series, array-like, dict and str + See :ref:`Plotting with Error Bars ` for + detail. + xerr : same types as yerr. + %(klass_unique)s + mark_right : boolean, default True + When using a secondary_y axis, automatically mark the column + labels with "(right)" in the legend + kwds : keywords + Options to pass to matplotlib plotting method + + Returns + ------- + axes : matplotlib.AxesSubplot or np.array of them + + Notes + ----- + + - See matplotlib documentation online for more on this subject + - If `kind` = 'bar' or 'barh', you can specify relative alignments + for bar plot layout by `position` keyword. + From 0 (left/bottom-end) to 1 (right/top-end). Default is 0.5 (center) + %(klass_note)s + + """ + + +@Appender(_shared_docs['plot'] % _shared_doc_df_kwargs) +def plot_frame(data, x=None, y=None, kind='line', ax=None, + subplots=False, sharex=None, sharey=False, layout=None, + figsize=None, use_index=True, title=None, grid=None, + legend=True, style=None, logx=False, logy=False, loglog=False, + xticks=None, yticks=None, xlim=None, ylim=None, + rot=None, fontsize=None, colormap=None, table=False, + yerr=None, xerr=None, + secondary_y=False, sort_columns=False, + **kwds): + return _plot(data, kind=kind, x=x, y=y, ax=ax, + subplots=subplots, sharex=sharex, sharey=sharey, + layout=layout, figsize=figsize, use_index=use_index, + title=title, grid=grid, legend=legend, + style=style, logx=logx, logy=logy, loglog=loglog, + xticks=xticks, yticks=yticks, xlim=xlim, ylim=ylim, + rot=rot, fontsize=fontsize, colormap=colormap, table=table, + yerr=yerr, xerr=xerr, + secondary_y=secondary_y, sort_columns=sort_columns, + **kwds) + + +@Appender(_shared_docs['plot'] % _shared_doc_series_kwargs) +def plot_series(data, kind='line', ax=None, # Series unique + figsize=None, use_index=True, title=None, grid=None, + legend=False, style=None, logx=False, logy=False, loglog=False, + xticks=None, yticks=None, xlim=None, ylim=None, + rot=None, fontsize=None, colormap=None, table=False, + yerr=None, xerr=None, + label=None, secondary_y=False, # Series unique + **kwds): + + import matplotlib.pyplot as plt + """ + If no axes is specified, check whether there are existing figures + If there is no existing figures, _gca() will + create a figure with the default figsize, causing the figsize=parameter to + be ignored. + """ + if ax is None and len(plt.get_fignums()) > 0: + ax = _gca() + ax = MPLPlot._get_ax_layer(ax) + return _plot(data, kind=kind, ax=ax, + figsize=figsize, use_index=use_index, title=title, + grid=grid, legend=legend, + style=style, logx=logx, logy=logy, loglog=loglog, + xticks=xticks, yticks=yticks, xlim=xlim, ylim=ylim, + rot=rot, fontsize=fontsize, colormap=colormap, table=table, + yerr=yerr, xerr=xerr, + label=label, secondary_y=secondary_y, + **kwds) + + +_shared_docs['boxplot'] = """ + Make a box plot from DataFrame column optionally grouped by some columns or + other inputs + + Parameters + ---------- + data : the pandas object holding the data + column : column name or list of names, or vector + Can be any valid input to groupby + by : string or sequence + Column in the DataFrame to group by + ax : Matplotlib axes object, optional + fontsize : int or string + rot : label rotation angle + figsize : A tuple (width, height) in inches + grid : Setting this to True will show the grid + layout : tuple (optional) + (rows, columns) for the layout of the plot + return_type : {None, 'axes', 'dict', 'both'}, default None + The kind of object to return. The default is ``axes`` + 'axes' returns the matplotlib axes the boxplot is drawn on; + 'dict' returns a dictionary whose values are the matplotlib + Lines of the boxplot; + 'both' returns a namedtuple with the axes and dict. + + When grouping with ``by``, a Series mapping columns to ``return_type`` + is returned, unless ``return_type`` is None, in which case a NumPy + array of axes is returned with the same shape as ``layout``. + See the prose documentation for more. + + kwds : other plotting keyword arguments to be passed to matplotlib boxplot + function + + Returns + ------- + lines : dict + ax : matplotlib Axes + (ax, lines): namedtuple + + Notes + ----- + Use ``return_type='dict'`` when you want to tweak the appearance + of the lines after plotting. In this case a dict containing the Lines + making up the boxes, caps, fliers, medians, and whiskers is returned. + """ + + +@Appender(_shared_docs['boxplot'] % _shared_doc_kwargs) +def boxplot(data, column=None, by=None, ax=None, fontsize=None, + rot=0, grid=True, figsize=None, layout=None, return_type=None, + **kwds): + + # validate return_type: + if return_type not in BoxPlot._valid_return_types: + raise ValueError("return_type must be {'axes', 'dict', 'both'}") + + from pandas import Series, DataFrame + if isinstance(data, Series): + data = DataFrame({'x': data}) + column = 'x' + + def _get_colors(): + return _get_standard_colors(color=kwds.get('color'), num_colors=1) + + def maybe_color_bp(bp): + if 'color' not in kwds: + from matplotlib.artist import setp + setp(bp['boxes'], color=colors[0], alpha=1) + setp(bp['whiskers'], color=colors[0], alpha=1) + setp(bp['medians'], color=colors[2], alpha=1) + + def plot_group(keys, values, ax): + keys = [pprint_thing(x) for x in keys] + values = [remove_na(v) for v in values] + bp = ax.boxplot(values, **kwds) + if fontsize is not None: + ax.tick_params(axis='both', labelsize=fontsize) + if kwds.get('vert', 1): + ax.set_xticklabels(keys, rotation=rot) + else: + ax.set_yticklabels(keys, rotation=rot) + maybe_color_bp(bp) + + # Return axes in multiplot case, maybe revisit later # 985 + if return_type == 'dict': + return bp + elif return_type == 'both': + return BoxPlot.BP(ax=ax, lines=bp) + else: + return ax + + colors = _get_colors() + if column is None: + columns = None + else: + if isinstance(column, (list, tuple)): + columns = column + else: + columns = [column] + + if by is not None: + # Prefer array return type for 2-D plots to match the subplot layout + # https://github.com/pandas-dev/pandas/pull/12216#issuecomment-241175580 + result = _grouped_plot_by_column(plot_group, data, columns=columns, + by=by, grid=grid, figsize=figsize, + ax=ax, layout=layout, + return_type=return_type) + else: + if return_type is None: + return_type = 'axes' + if layout is not None: + raise ValueError("The 'layout' keyword is not supported when " + "'by' is None") + + if ax is None: + ax = _gca() + data = data._get_numeric_data() + if columns is None: + columns = data.columns + else: + data = data[columns] + + result = plot_group(columns, data.values.T, ax) + ax.grid(grid) + + return result + + +def scatter_plot(data, x, y, by=None, ax=None, figsize=None, grid=False, + **kwargs): + """ + Make a scatter plot from two DataFrame columns + + Parameters + ---------- + data : DataFrame + x : Column name for the x-axis values + y : Column name for the y-axis values + ax : Matplotlib axis object + figsize : A tuple (width, height) in inches + grid : Setting this to True will show the grid + kwargs : other plotting keyword arguments + To be passed to scatter function + + Returns + ------- + fig : matplotlib.Figure + """ + import matplotlib.pyplot as plt + + kwargs.setdefault('edgecolors', 'none') + + def plot_group(group, ax): + xvals = group[x].values + yvals = group[y].values + ax.scatter(xvals, yvals, **kwargs) + ax.grid(grid) + + if by is not None: + fig = _grouped_plot(plot_group, data, by=by, figsize=figsize, ax=ax) + else: + if ax is None: + fig = plt.figure() + ax = fig.add_subplot(111) + else: + fig = ax.get_figure() + plot_group(data, ax) + ax.set_ylabel(pprint_thing(y)) + ax.set_xlabel(pprint_thing(x)) + + ax.grid(grid) + + return fig + + +def hist_frame(data, column=None, by=None, grid=True, xlabelsize=None, + xrot=None, ylabelsize=None, yrot=None, ax=None, sharex=False, + sharey=False, figsize=None, layout=None, bins=10, **kwds): + """ + Draw histogram of the DataFrame's series using matplotlib / pylab. + + Parameters + ---------- + data : DataFrame + column : string or sequence + If passed, will be used to limit data to a subset of columns + by : object, optional + If passed, then used to form histograms for separate groups + grid : boolean, default True + Whether to show axis grid lines + xlabelsize : int, default None + If specified changes the x-axis label size + xrot : float, default None + rotation of x axis labels + ylabelsize : int, default None + If specified changes the y-axis label size + yrot : float, default None + rotation of y axis labels + ax : matplotlib axes object, default None + sharex : boolean, default True if ax is None else False + In case subplots=True, share x axis and set some x axis labels to + invisible; defaults to True if ax is None otherwise False if an ax + is passed in; Be aware, that passing in both an ax and sharex=True + will alter all x axis labels for all subplots in a figure! + sharey : boolean, default False + In case subplots=True, share y axis and set some y axis labels to + invisible + figsize : tuple + The size of the figure to create in inches by default + layout : tuple, optional + Tuple of (rows, columns) for the layout of the histograms + bins : integer, default 10 + Number of histogram bins to be used + kwds : other plotting keyword arguments + To be passed to hist function + """ + + if by is not None: + axes = grouped_hist(data, column=column, by=by, ax=ax, grid=grid, + figsize=figsize, sharex=sharex, sharey=sharey, + layout=layout, bins=bins, xlabelsize=xlabelsize, + xrot=xrot, ylabelsize=ylabelsize, + yrot=yrot, **kwds) + return axes + + if column is not None: + if not isinstance(column, (list, np.ndarray, Index)): + column = [column] + data = data[column] + data = data._get_numeric_data() + naxes = len(data.columns) + + fig, axes = _subplots(naxes=naxes, ax=ax, squeeze=False, + sharex=sharex, sharey=sharey, figsize=figsize, + layout=layout) + _axes = _flatten(axes) + + for i, col in enumerate(_try_sort(data.columns)): + ax = _axes[i] + ax.hist(data[col].dropna().values, bins=bins, **kwds) + ax.set_title(col) + ax.grid(grid) + + _set_ticks_props(axes, xlabelsize=xlabelsize, xrot=xrot, + ylabelsize=ylabelsize, yrot=yrot) + fig.subplots_adjust(wspace=0.3, hspace=0.3) + + return axes + + +def hist_series(self, by=None, ax=None, grid=True, xlabelsize=None, + xrot=None, ylabelsize=None, yrot=None, figsize=None, + bins=10, **kwds): + """ + Draw histogram of the input series using matplotlib + + Parameters + ---------- + by : object, optional + If passed, then used to form histograms for separate groups + ax : matplotlib axis object + If not passed, uses gca() + grid : boolean, default True + Whether to show axis grid lines + xlabelsize : int, default None + If specified changes the x-axis label size + xrot : float, default None + rotation of x axis labels + ylabelsize : int, default None + If specified changes the y-axis label size + yrot : float, default None + rotation of y axis labels + figsize : tuple, default None + figure size in inches by default + bins: integer, default 10 + Number of histogram bins to be used + kwds : keywords + To be passed to the actual plotting function + + Notes + ----- + See matplotlib documentation online for more on this + + """ + import matplotlib.pyplot as plt + + if by is None: + if kwds.get('layout', None) is not None: + raise ValueError("The 'layout' keyword is not supported when " + "'by' is None") + # hack until the plotting interface is a bit more unified + fig = kwds.pop('figure', plt.gcf() if plt.get_fignums() else + plt.figure(figsize=figsize)) + if (figsize is not None and tuple(figsize) != + tuple(fig.get_size_inches())): + fig.set_size_inches(*figsize, forward=True) + if ax is None: + ax = fig.gca() + elif ax.get_figure() != fig: + raise AssertionError('passed axis not bound to passed figure') + values = self.dropna().values + + ax.hist(values, bins=bins, **kwds) + ax.grid(grid) + axes = np.array([ax]) + + _set_ticks_props(axes, xlabelsize=xlabelsize, xrot=xrot, + ylabelsize=ylabelsize, yrot=yrot) + + else: + if 'figure' in kwds: + raise ValueError("Cannot pass 'figure' when using the " + "'by' argument, since a new 'Figure' instance " + "will be created") + axes = grouped_hist(self, by=by, ax=ax, grid=grid, figsize=figsize, + bins=bins, xlabelsize=xlabelsize, xrot=xrot, + ylabelsize=ylabelsize, yrot=yrot, **kwds) + + if hasattr(axes, 'ndim'): + if axes.ndim == 1 and len(axes) == 1: + return axes[0] + return axes + + +def grouped_hist(data, column=None, by=None, ax=None, bins=50, figsize=None, + layout=None, sharex=False, sharey=False, rot=90, grid=True, + xlabelsize=None, xrot=None, ylabelsize=None, yrot=None, + **kwargs): + """ + Grouped histogram + + Parameters + ---------- + data: Series/DataFrame + column: object, optional + by: object, optional + ax: axes, optional + bins: int, default 50 + figsize: tuple, optional + layout: optional + sharex: boolean, default False + sharey: boolean, default False + rot: int, default 90 + grid: bool, default True + kwargs: dict, keyword arguments passed to matplotlib.Axes.hist + + Returns + ------- + axes: collection of Matplotlib Axes + """ + def plot_group(group, ax): + ax.hist(group.dropna().values, bins=bins, **kwargs) + + xrot = xrot or rot + + fig, axes = _grouped_plot(plot_group, data, column=column, + by=by, sharex=sharex, sharey=sharey, ax=ax, + figsize=figsize, layout=layout, rot=rot) + + _set_ticks_props(axes, xlabelsize=xlabelsize, xrot=xrot, + ylabelsize=ylabelsize, yrot=yrot) + + fig.subplots_adjust(bottom=0.15, top=0.9, left=0.1, right=0.9, + hspace=0.5, wspace=0.3) + return axes + + +def boxplot_frame_groupby(grouped, subplots=True, column=None, fontsize=None, + rot=0, grid=True, ax=None, figsize=None, + layout=None, **kwds): + """ + Make box plots from DataFrameGroupBy data. + + Parameters + ---------- + grouped : Grouped DataFrame + subplots : + * ``False`` - no subplots will be used + * ``True`` - create a subplot for each group + column : column name or list of names, or vector + Can be any valid input to groupby + fontsize : int or string + rot : label rotation angle + grid : Setting this to True will show the grid + ax : Matplotlib axis object, default None + figsize : A tuple (width, height) in inches + layout : tuple (optional) + (rows, columns) for the layout of the plot + kwds : other plotting keyword arguments to be passed to matplotlib boxplot + function + + Returns + ------- + dict of key/value = group key/DataFrame.boxplot return value + or DataFrame.boxplot return value in case subplots=figures=False + + Examples + -------- + >>> import pandas + >>> import numpy as np + >>> import itertools + >>> + >>> tuples = [t for t in itertools.product(range(1000), range(4))] + >>> index = pandas.MultiIndex.from_tuples(tuples, names=['lvl0', 'lvl1']) + >>> data = np.random.randn(len(index),4) + >>> df = pandas.DataFrame(data, columns=list('ABCD'), index=index) + >>> + >>> grouped = df.groupby(level='lvl1') + >>> boxplot_frame_groupby(grouped) + >>> + >>> grouped = df.unstack(level='lvl1').groupby(level=0, axis=1) + >>> boxplot_frame_groupby(grouped, subplots=False) + """ + if subplots is True: + naxes = len(grouped) + fig, axes = _subplots(naxes=naxes, squeeze=False, + ax=ax, sharex=False, sharey=True, + figsize=figsize, layout=layout) + axes = _flatten(axes) + + ret = Series() + for (key, group), ax in zip(grouped, axes): + d = group.boxplot(ax=ax, column=column, fontsize=fontsize, + rot=rot, grid=grid, **kwds) + ax.set_title(pprint_thing(key)) + ret.loc[key] = d + fig.subplots_adjust(bottom=0.15, top=0.9, left=0.1, + right=0.9, wspace=0.2) + else: + from pandas.tools.concat import concat + keys, frames = zip(*grouped) + if grouped.axis == 0: + df = concat(frames, keys=keys, axis=1) + else: + if len(frames) > 1: + df = frames[0].join(frames[1::]) + else: + df = frames[0] + ret = df.boxplot(column=column, fontsize=fontsize, rot=rot, + grid=grid, ax=ax, figsize=figsize, + layout=layout, **kwds) + return ret + + +def _grouped_plot(plotf, data, column=None, by=None, numeric_only=True, + figsize=None, sharex=True, sharey=True, layout=None, + rot=0, ax=None, **kwargs): + from pandas import DataFrame + + if figsize == 'default': + # allowed to specify mpl default with 'default' + warnings.warn("figsize='default' is deprecated. Specify figure" + "size by tuple instead", FutureWarning, stacklevel=4) + figsize = None + + grouped = data.groupby(by) + if column is not None: + grouped = grouped[column] + + naxes = len(grouped) + fig, axes = _subplots(naxes=naxes, figsize=figsize, + sharex=sharex, sharey=sharey, ax=ax, + layout=layout) + + _axes = _flatten(axes) + + for i, (key, group) in enumerate(grouped): + ax = _axes[i] + if numeric_only and isinstance(group, DataFrame): + group = group._get_numeric_data() + plotf(group, ax, **kwargs) + ax.set_title(pprint_thing(key)) + + return fig, axes + + +def _grouped_plot_by_column(plotf, data, columns=None, by=None, + numeric_only=True, grid=False, + figsize=None, ax=None, layout=None, + return_type=None, **kwargs): + grouped = data.groupby(by) + if columns is None: + if not isinstance(by, (list, tuple)): + by = [by] + columns = data._get_numeric_data().columns.difference(by) + naxes = len(columns) + fig, axes = _subplots(naxes=naxes, sharex=True, sharey=True, + figsize=figsize, ax=ax, layout=layout) + + _axes = _flatten(axes) + + result = Series() + ax_values = [] + + for i, col in enumerate(columns): + ax = _axes[i] + gp_col = grouped[col] + keys, values = zip(*gp_col) + re_plotf = plotf(keys, values, ax, **kwargs) + ax.set_title(col) + ax.set_xlabel(pprint_thing(by)) + ax_values.append(re_plotf) + ax.grid(grid) + + result = Series(ax_values, index=columns) + + # Return axes in multiplot case, maybe revisit later # 985 + if return_type is None: + result = axes + + byline = by[0] if len(by) == 1 else by + fig.suptitle('Boxplot grouped by %s' % byline) + fig.subplots_adjust(bottom=0.15, top=0.9, left=0.1, right=0.9, wspace=0.2) + + return result + + +class BasePlotMethods(PandasObject): + + def __init__(self, data): + self._data = data + + def __call__(self, *args, **kwargs): + raise NotImplementedError + + +class SeriesPlotMethods(BasePlotMethods): + """Series plotting accessor and method + + Examples + -------- + >>> s.plot.line() + >>> s.plot.bar() + >>> s.plot.hist() + + Plotting methods can also be accessed by calling the accessor as a method + with the ``kind`` argument: + ``s.plot(kind='line')`` is equivalent to ``s.plot.line()`` + """ + + def __call__(self, kind='line', ax=None, + figsize=None, use_index=True, title=None, grid=None, + legend=False, style=None, logx=False, logy=False, + loglog=False, xticks=None, yticks=None, + xlim=None, ylim=None, + rot=None, fontsize=None, colormap=None, table=False, + yerr=None, xerr=None, + label=None, secondary_y=False, **kwds): + return plot_series(self._data, kind=kind, ax=ax, figsize=figsize, + use_index=use_index, title=title, grid=grid, + legend=legend, style=style, logx=logx, logy=logy, + loglog=loglog, xticks=xticks, yticks=yticks, + xlim=xlim, ylim=ylim, rot=rot, fontsize=fontsize, + colormap=colormap, table=table, yerr=yerr, + xerr=xerr, label=label, secondary_y=secondary_y, + **kwds) + __call__.__doc__ = plot_series.__doc__ + + def line(self, **kwds): + """ + Line plot + + .. versionadded:: 0.17.0 + + Parameters + ---------- + **kwds : optional + Keyword arguments to pass on to :py:meth:`pandas.Series.plot`. + + Returns + ------- + axes : matplotlib.AxesSubplot or np.array of them + """ + return self(kind='line', **kwds) + + def bar(self, **kwds): + """ + Vertical bar plot + + .. versionadded:: 0.17.0 + + Parameters + ---------- + **kwds : optional + Keyword arguments to pass on to :py:meth:`pandas.Series.plot`. + + Returns + ------- + axes : matplotlib.AxesSubplot or np.array of them + """ + return self(kind='bar', **kwds) + + def barh(self, **kwds): + """ + Horizontal bar plot + + .. versionadded:: 0.17.0 + + Parameters + ---------- + **kwds : optional + Keyword arguments to pass on to :py:meth:`pandas.Series.plot`. + + Returns + ------- + axes : matplotlib.AxesSubplot or np.array of them + """ + return self(kind='barh', **kwds) + + def box(self, **kwds): + """ + Boxplot + + .. versionadded:: 0.17.0 + + Parameters + ---------- + **kwds : optional + Keyword arguments to pass on to :py:meth:`pandas.Series.plot`. + + Returns + ------- + axes : matplotlib.AxesSubplot or np.array of them + """ + return self(kind='box', **kwds) + + def hist(self, bins=10, **kwds): + """ + Histogram + + .. versionadded:: 0.17.0 + + Parameters + ---------- + bins: integer, default 10 + Number of histogram bins to be used + **kwds : optional + Keyword arguments to pass on to :py:meth:`pandas.Series.plot`. + + Returns + ------- + axes : matplotlib.AxesSubplot or np.array of them + """ + return self(kind='hist', bins=bins, **kwds) + + def kde(self, **kwds): + """ + Kernel Density Estimate plot + + .. versionadded:: 0.17.0 + + Parameters + ---------- + **kwds : optional + Keyword arguments to pass on to :py:meth:`pandas.Series.plot`. + + Returns + ------- + axes : matplotlib.AxesSubplot or np.array of them + """ + return self(kind='kde', **kwds) + + density = kde + + def area(self, **kwds): + """ + Area plot + + .. versionadded:: 0.17.0 + + Parameters + ---------- + **kwds : optional + Keyword arguments to pass on to :py:meth:`pandas.Series.plot`. + + Returns + ------- + axes : matplotlib.AxesSubplot or np.array of them + """ + return self(kind='area', **kwds) + + def pie(self, **kwds): + """ + Pie chart + + .. versionadded:: 0.17.0 + + Parameters + ---------- + **kwds : optional + Keyword arguments to pass on to :py:meth:`pandas.Series.plot`. + + Returns + ------- + axes : matplotlib.AxesSubplot or np.array of them + """ + return self(kind='pie', **kwds) + + +class FramePlotMethods(BasePlotMethods): + """DataFrame plotting accessor and method + + Examples + -------- + >>> df.plot.line() + >>> df.plot.scatter('x', 'y') + >>> df.plot.hexbin() + + These plotting methods can also be accessed by calling the accessor as a + method with the ``kind`` argument: + ``df.plot(kind='line')`` is equivalent to ``df.plot.line()`` + """ + + def __call__(self, x=None, y=None, kind='line', ax=None, + subplots=False, sharex=None, sharey=False, layout=None, + figsize=None, use_index=True, title=None, grid=None, + legend=True, style=None, logx=False, logy=False, loglog=False, + xticks=None, yticks=None, xlim=None, ylim=None, + rot=None, fontsize=None, colormap=None, table=False, + yerr=None, xerr=None, + secondary_y=False, sort_columns=False, **kwds): + return plot_frame(self._data, kind=kind, x=x, y=y, ax=ax, + subplots=subplots, sharex=sharex, sharey=sharey, + layout=layout, figsize=figsize, use_index=use_index, + title=title, grid=grid, legend=legend, style=style, + logx=logx, logy=logy, loglog=loglog, xticks=xticks, + yticks=yticks, xlim=xlim, ylim=ylim, rot=rot, + fontsize=fontsize, colormap=colormap, table=table, + yerr=yerr, xerr=xerr, secondary_y=secondary_y, + sort_columns=sort_columns, **kwds) + __call__.__doc__ = plot_frame.__doc__ + + def line(self, x=None, y=None, **kwds): + """ + Line plot + + .. versionadded:: 0.17.0 + + Parameters + ---------- + x, y : label or position, optional + Coordinates for each point. + **kwds : optional + Keyword arguments to pass on to :py:meth:`pandas.DataFrame.plot`. + + Returns + ------- + axes : matplotlib.AxesSubplot or np.array of them + """ + return self(kind='line', x=x, y=y, **kwds) + + def bar(self, x=None, y=None, **kwds): + """ + Vertical bar plot + + .. versionadded:: 0.17.0 + + Parameters + ---------- + x, y : label or position, optional + Coordinates for each point. + **kwds : optional + Keyword arguments to pass on to :py:meth:`pandas.DataFrame.plot`. + + Returns + ------- + axes : matplotlib.AxesSubplot or np.array of them + """ + return self(kind='bar', x=x, y=y, **kwds) + + def barh(self, x=None, y=None, **kwds): + """ + Horizontal bar plot + + .. versionadded:: 0.17.0 + + Parameters + ---------- + x, y : label or position, optional + Coordinates for each point. + **kwds : optional + Keyword arguments to pass on to :py:meth:`pandas.DataFrame.plot`. + + Returns + ------- + axes : matplotlib.AxesSubplot or np.array of them + """ + return self(kind='barh', x=x, y=y, **kwds) + + def box(self, by=None, **kwds): + """ + Boxplot + + .. versionadded:: 0.17.0 + + Parameters + ---------- + by : string or sequence + Column in the DataFrame to group by. + \*\*kwds : optional + Keyword arguments to pass on to :py:meth:`pandas.DataFrame.plot`. + + Returns + ------- + axes : matplotlib.AxesSubplot or np.array of them + """ + return self(kind='box', by=by, **kwds) + + def hist(self, by=None, bins=10, **kwds): + """ + Histogram + + .. versionadded:: 0.17.0 + + Parameters + ---------- + by : string or sequence + Column in the DataFrame to group by. + bins: integer, default 10 + Number of histogram bins to be used + **kwds : optional + Keyword arguments to pass on to :py:meth:`pandas.DataFrame.plot`. + + Returns + ------- + axes : matplotlib.AxesSubplot or np.array of them + """ + return self(kind='hist', by=by, bins=bins, **kwds) + + def kde(self, **kwds): + """ + Kernel Density Estimate plot + + .. versionadded:: 0.17.0 + + Parameters + ---------- + **kwds : optional + Keyword arguments to pass on to :py:meth:`pandas.DataFrame.plot`. + + Returns + ------- + axes : matplotlib.AxesSubplot or np.array of them + """ + return self(kind='kde', **kwds) + + density = kde + + def area(self, x=None, y=None, **kwds): + """ + Area plot + + .. versionadded:: 0.17.0 + + Parameters + ---------- + x, y : label or position, optional + Coordinates for each point. + **kwds : optional + Keyword arguments to pass on to :py:meth:`pandas.DataFrame.plot`. + + Returns + ------- + axes : matplotlib.AxesSubplot or np.array of them + """ + return self(kind='area', x=x, y=y, **kwds) + + def pie(self, y=None, **kwds): + """ + Pie chart + + .. versionadded:: 0.17.0 + + Parameters + ---------- + y : label or position, optional + Column to plot. + **kwds : optional + Keyword arguments to pass on to :py:meth:`pandas.DataFrame.plot`. + + Returns + ------- + axes : matplotlib.AxesSubplot or np.array of them + """ + return self(kind='pie', y=y, **kwds) + + def scatter(self, x, y, s=None, c=None, **kwds): + """ + Scatter plot + + .. versionadded:: 0.17.0 + + Parameters + ---------- + x, y : label or position, optional + Coordinates for each point. + s : scalar or array_like, optional + Size of each point. + c : label or position, optional + Color of each point. + **kwds : optional + Keyword arguments to pass on to :py:meth:`pandas.DataFrame.plot`. + + Returns + ------- + axes : matplotlib.AxesSubplot or np.array of them + """ + return self(kind='scatter', x=x, y=y, c=c, s=s, **kwds) + + def hexbin(self, x, y, C=None, reduce_C_function=None, gridsize=None, + **kwds): + """ + Hexbin plot + + .. versionadded:: 0.17.0 + + Parameters + ---------- + x, y : label or position, optional + Coordinates for each point. + C : label or position, optional + The value at each `(x, y)` point. + reduce_C_function : callable, optional + Function of one argument that reduces all the values in a bin to + a single number (e.g. `mean`, `max`, `sum`, `std`). + gridsize : int, optional + Number of bins. + **kwds : optional + Keyword arguments to pass on to :py:meth:`pandas.DataFrame.plot`. + + Returns + ------- + axes : matplotlib.AxesSubplot or np.array of them + """ + if reduce_C_function is not None: + kwds['reduce_C_function'] = reduce_C_function + if gridsize is not None: + kwds['gridsize'] = gridsize + return self(kind='hexbin', x=x, y=y, C=C, **kwds) diff --git a/pandas/plotting/_misc.py b/pandas/plotting/_misc.py new file mode 100644 index 0000000000000..2c32a532dd2e2 --- /dev/null +++ b/pandas/plotting/_misc.py @@ -0,0 +1,573 @@ +# being a bit too dynamic +# pylint: disable=E1101 +from __future__ import division + +import numpy as np + +from pandas.util.decorators import deprecate_kwarg +from pandas.types.missing import notnull +from pandas.compat import range, lrange, lmap, zip +from pandas.formats.printing import pprint_thing + + +from pandas.plotting._style import _get_standard_colors +from pandas.plotting._tools import _subplots, _set_ticks_props + + +def scatter_matrix(frame, alpha=0.5, figsize=None, ax=None, grid=False, + diagonal='hist', marker='.', density_kwds=None, + hist_kwds=None, range_padding=0.05, **kwds): + """ + Draw a matrix of scatter plots. + + Parameters + ---------- + frame : DataFrame + alpha : float, optional + amount of transparency applied + figsize : (float,float), optional + a tuple (width, height) in inches + ax : Matplotlib axis object, optional + grid : bool, optional + setting this to True will show the grid + diagonal : {'hist', 'kde'} + pick between 'kde' and 'hist' for + either Kernel Density Estimation or Histogram + plot in the diagonal + marker : str, optional + Matplotlib marker type, default '.' + hist_kwds : other plotting keyword arguments + To be passed to hist function + density_kwds : other plotting keyword arguments + To be passed to kernel density estimate plot + range_padding : float, optional + relative extension of axis range in x and y + with respect to (x_max - x_min) or (y_max - y_min), + default 0.05 + kwds : other plotting keyword arguments + To be passed to scatter function + + Examples + -------- + >>> df = DataFrame(np.random.randn(1000, 4), columns=['A','B','C','D']) + >>> scatter_matrix(df, alpha=0.2) + """ + + df = frame._get_numeric_data() + n = df.columns.size + naxes = n * n + fig, axes = _subplots(naxes=naxes, figsize=figsize, ax=ax, + squeeze=False) + + # no gaps between subplots + fig.subplots_adjust(wspace=0, hspace=0) + + mask = notnull(df) + + marker = _get_marker_compat(marker) + + hist_kwds = hist_kwds or {} + density_kwds = density_kwds or {} + + # GH 14855 + kwds.setdefault('edgecolors', 'none') + + boundaries_list = [] + for a in df.columns: + values = df[a].values[mask[a].values] + rmin_, rmax_ = np.min(values), np.max(values) + rdelta_ext = (rmax_ - rmin_) * range_padding / 2. + boundaries_list.append((rmin_ - rdelta_ext, rmax_ + rdelta_ext)) + + for i, a in zip(lrange(n), df.columns): + for j, b in zip(lrange(n), df.columns): + ax = axes[i, j] + + if i == j: + values = df[a].values[mask[a].values] + + # Deal with the diagonal by drawing a histogram there. + if diagonal == 'hist': + ax.hist(values, **hist_kwds) + + elif diagonal in ('kde', 'density'): + from scipy.stats import gaussian_kde + y = values + gkde = gaussian_kde(y) + ind = np.linspace(y.min(), y.max(), 1000) + ax.plot(ind, gkde.evaluate(ind), **density_kwds) + + ax.set_xlim(boundaries_list[i]) + + else: + common = (mask[a] & mask[b]).values + + ax.scatter(df[b][common], df[a][common], + marker=marker, alpha=alpha, **kwds) + + ax.set_xlim(boundaries_list[j]) + ax.set_ylim(boundaries_list[i]) + + ax.set_xlabel(b) + ax.set_ylabel(a) + + if j != 0: + ax.yaxis.set_visible(False) + if i != n - 1: + ax.xaxis.set_visible(False) + + if len(df.columns) > 1: + lim1 = boundaries_list[0] + locs = axes[0][1].yaxis.get_majorticklocs() + locs = locs[(lim1[0] <= locs) & (locs <= lim1[1])] + adj = (locs - lim1[0]) / (lim1[1] - lim1[0]) + + lim0 = axes[0][0].get_ylim() + adj = adj * (lim0[1] - lim0[0]) + lim0[0] + axes[0][0].yaxis.set_ticks(adj) + + if np.all(locs == locs.astype(int)): + # if all ticks are int + locs = locs.astype(int) + axes[0][0].yaxis.set_ticklabels(locs) + + _set_ticks_props(axes, xlabelsize=8, xrot=90, ylabelsize=8, yrot=0) + + return axes + + +def _get_marker_compat(marker): + import matplotlib.lines as mlines + import matplotlib as mpl + if mpl.__version__ < '1.1.0' and marker == '.': + return 'o' + if marker not in mlines.lineMarkers: + return 'o' + return marker + + +def radviz(frame, class_column, ax=None, color=None, colormap=None, **kwds): + """RadViz - a multivariate data visualization algorithm + + Parameters: + ----------- + frame: DataFrame + class_column: str + Column name containing class names + ax: Matplotlib axis object, optional + color: list or tuple, optional + Colors to use for the different classes + colormap : str or matplotlib colormap object, default None + Colormap to select colors from. If string, load colormap with that name + from matplotlib. + kwds: keywords + Options to pass to matplotlib scatter plotting method + + Returns: + -------- + ax: Matplotlib axis object + """ + import matplotlib.pyplot as plt + import matplotlib.patches as patches + + def normalize(series): + a = min(series) + b = max(series) + return (series - a) / (b - a) + + n = len(frame) + classes = frame[class_column].drop_duplicates() + class_col = frame[class_column] + df = frame.drop(class_column, axis=1).apply(normalize) + + if ax is None: + ax = plt.gca(xlim=[-1, 1], ylim=[-1, 1]) + + to_plot = {} + colors = _get_standard_colors(num_colors=len(classes), colormap=colormap, + color_type='random', color=color) + + for kls in classes: + to_plot[kls] = [[], []] + + m = len(frame.columns) - 1 + s = np.array([(np.cos(t), np.sin(t)) + for t in [2.0 * np.pi * (i / float(m)) + for i in range(m)]]) + + for i in range(n): + row = df.iloc[i].values + row_ = np.repeat(np.expand_dims(row, axis=1), 2, axis=1) + y = (s * row_).sum(axis=0) / row.sum() + kls = class_col.iat[i] + to_plot[kls][0].append(y[0]) + to_plot[kls][1].append(y[1]) + + for i, kls in enumerate(classes): + ax.scatter(to_plot[kls][0], to_plot[kls][1], color=colors[i], + label=pprint_thing(kls), **kwds) + ax.legend() + + ax.add_patch(patches.Circle((0.0, 0.0), radius=1.0, facecolor='none')) + + for xy, name in zip(s, df.columns): + + ax.add_patch(patches.Circle(xy, radius=0.025, facecolor='gray')) + + if xy[0] < 0.0 and xy[1] < 0.0: + ax.text(xy[0] - 0.025, xy[1] - 0.025, name, + ha='right', va='top', size='small') + elif xy[0] < 0.0 and xy[1] >= 0.0: + ax.text(xy[0] - 0.025, xy[1] + 0.025, name, + ha='right', va='bottom', size='small') + elif xy[0] >= 0.0 and xy[1] < 0.0: + ax.text(xy[0] + 0.025, xy[1] - 0.025, name, + ha='left', va='top', size='small') + elif xy[0] >= 0.0 and xy[1] >= 0.0: + ax.text(xy[0] + 0.025, xy[1] + 0.025, name, + ha='left', va='bottom', size='small') + + ax.axis('equal') + return ax + + +@deprecate_kwarg(old_arg_name='data', new_arg_name='frame') +def andrews_curves(frame, class_column, ax=None, samples=200, color=None, + colormap=None, **kwds): + """ + Generates a matplotlib plot of Andrews curves, for visualising clusters of + multivariate data. + + Andrews curves have the functional form: + + f(t) = x_1/sqrt(2) + x_2 sin(t) + x_3 cos(t) + + x_4 sin(2t) + x_5 cos(2t) + ... + + Where x coefficients correspond to the values of each dimension and t is + linearly spaced between -pi and +pi. Each row of frame then corresponds to + a single curve. + + Parameters: + ----------- + frame : DataFrame + Data to be plotted, preferably normalized to (0.0, 1.0) + class_column : Name of the column containing class names + ax : matplotlib axes object, default None + samples : Number of points to plot in each curve + color: list or tuple, optional + Colors to use for the different classes + colormap : str or matplotlib colormap object, default None + Colormap to select colors from. If string, load colormap with that name + from matplotlib. + kwds: keywords + Options to pass to matplotlib plotting method + + Returns: + -------- + ax: Matplotlib axis object + + """ + from math import sqrt, pi + import matplotlib.pyplot as plt + + def function(amplitudes): + def f(t): + x1 = amplitudes[0] + result = x1 / sqrt(2.0) + + # Take the rest of the coefficients and resize them + # appropriately. Take a copy of amplitudes as otherwise numpy + # deletes the element from amplitudes itself. + coeffs = np.delete(np.copy(amplitudes), 0) + coeffs.resize(int((coeffs.size + 1) / 2), 2) + + # Generate the harmonics and arguments for the sin and cos + # functions. + harmonics = np.arange(0, coeffs.shape[0]) + 1 + trig_args = np.outer(harmonics, t) + + result += np.sum(coeffs[:, 0, np.newaxis] * np.sin(trig_args) + + coeffs[:, 1, np.newaxis] * np.cos(trig_args), + axis=0) + return result + return f + + n = len(frame) + class_col = frame[class_column] + classes = frame[class_column].drop_duplicates() + df = frame.drop(class_column, axis=1) + t = np.linspace(-pi, pi, samples) + used_legends = set([]) + + color_values = _get_standard_colors(num_colors=len(classes), + colormap=colormap, color_type='random', + color=color) + colors = dict(zip(classes, color_values)) + if ax is None: + ax = plt.gca(xlim=(-pi, pi)) + for i in range(n): + row = df.iloc[i].values + f = function(row) + y = f(t) + kls = class_col.iat[i] + label = pprint_thing(kls) + if label not in used_legends: + used_legends.add(label) + ax.plot(t, y, color=colors[kls], label=label, **kwds) + else: + ax.plot(t, y, color=colors[kls], **kwds) + + ax.legend(loc='upper right') + ax.grid() + return ax + + +def bootstrap_plot(series, fig=None, size=50, samples=500, **kwds): + """Bootstrap plot. + + Parameters: + ----------- + series: Time series + fig: matplotlib figure object, optional + size: number of data points to consider during each sampling + samples: number of times the bootstrap procedure is performed + kwds: optional keyword arguments for plotting commands, must be accepted + by both hist and plot + + Returns: + -------- + fig: matplotlib figure + """ + import random + import matplotlib.pyplot as plt + + # random.sample(ndarray, int) fails on python 3.3, sigh + data = list(series.values) + samplings = [random.sample(data, size) for _ in range(samples)] + + means = np.array([np.mean(sampling) for sampling in samplings]) + medians = np.array([np.median(sampling) for sampling in samplings]) + midranges = np.array([(min(sampling) + max(sampling)) * 0.5 + for sampling in samplings]) + if fig is None: + fig = plt.figure() + x = lrange(samples) + axes = [] + ax1 = fig.add_subplot(2, 3, 1) + ax1.set_xlabel("Sample") + axes.append(ax1) + ax1.plot(x, means, **kwds) + ax2 = fig.add_subplot(2, 3, 2) + ax2.set_xlabel("Sample") + axes.append(ax2) + ax2.plot(x, medians, **kwds) + ax3 = fig.add_subplot(2, 3, 3) + ax3.set_xlabel("Sample") + axes.append(ax3) + ax3.plot(x, midranges, **kwds) + ax4 = fig.add_subplot(2, 3, 4) + ax4.set_xlabel("Mean") + axes.append(ax4) + ax4.hist(means, **kwds) + ax5 = fig.add_subplot(2, 3, 5) + ax5.set_xlabel("Median") + axes.append(ax5) + ax5.hist(medians, **kwds) + ax6 = fig.add_subplot(2, 3, 6) + ax6.set_xlabel("Midrange") + axes.append(ax6) + ax6.hist(midranges, **kwds) + for axis in axes: + plt.setp(axis.get_xticklabels(), fontsize=8) + plt.setp(axis.get_yticklabels(), fontsize=8) + return fig + + +@deprecate_kwarg(old_arg_name='colors', new_arg_name='color') +@deprecate_kwarg(old_arg_name='data', new_arg_name='frame', stacklevel=3) +def parallel_coordinates(frame, class_column, cols=None, ax=None, color=None, + use_columns=False, xticks=None, colormap=None, + axvlines=True, axvlines_kwds=None, sort_labels=False, + **kwds): + """Parallel coordinates plotting. + + Parameters + ---------- + frame: DataFrame + class_column: str + Column name containing class names + cols: list, optional + A list of column names to use + ax: matplotlib.axis, optional + matplotlib axis object + color: list or tuple, optional + Colors to use for the different classes + use_columns: bool, optional + If true, columns will be used as xticks + xticks: list or tuple, optional + A list of values to use for xticks + colormap: str or matplotlib colormap, default None + Colormap to use for line colors. + axvlines: bool, optional + If true, vertical lines will be added at each xtick + axvlines_kwds: keywords, optional + Options to be passed to axvline method for vertical lines + sort_labels: bool, False + Sort class_column labels, useful when assigning colours + + .. versionadded:: 0.20.0 + + kwds: keywords + Options to pass to matplotlib plotting method + + Returns + ------- + ax: matplotlib axis object + + Examples + -------- + >>> from pandas import read_csv + >>> from pandas.tools.plotting import parallel_coordinates + >>> from matplotlib import pyplot as plt + >>> df = read_csv('https://raw.github.com/pandas-dev/pandas/master' + '/pandas/tests/data/iris.csv') + >>> parallel_coordinates(df, 'Name', color=('#556270', + '#4ECDC4', '#C7F464')) + >>> plt.show() + """ + if axvlines_kwds is None: + axvlines_kwds = {'linewidth': 1, 'color': 'black'} + import matplotlib.pyplot as plt + + n = len(frame) + classes = frame[class_column].drop_duplicates() + class_col = frame[class_column] + + if cols is None: + df = frame.drop(class_column, axis=1) + else: + df = frame[cols] + + used_legends = set([]) + + ncols = len(df.columns) + + # determine values to use for xticks + if use_columns is True: + if not np.all(np.isreal(list(df.columns))): + raise ValueError('Columns must be numeric to be used as xticks') + x = df.columns + elif xticks is not None: + if not np.all(np.isreal(xticks)): + raise ValueError('xticks specified must be numeric') + elif len(xticks) != ncols: + raise ValueError('Length of xticks must match number of columns') + x = xticks + else: + x = lrange(ncols) + + if ax is None: + ax = plt.gca() + + color_values = _get_standard_colors(num_colors=len(classes), + colormap=colormap, color_type='random', + color=color) + + if sort_labels: + classes = sorted(classes) + color_values = sorted(color_values) + colors = dict(zip(classes, color_values)) + + for i in range(n): + y = df.iloc[i].values + kls = class_col.iat[i] + label = pprint_thing(kls) + if label not in used_legends: + used_legends.add(label) + ax.plot(x, y, color=colors[kls], label=label, **kwds) + else: + ax.plot(x, y, color=colors[kls], **kwds) + + if axvlines: + for i in x: + ax.axvline(i, **axvlines_kwds) + + ax.set_xticks(x) + ax.set_xticklabels(df.columns) + ax.set_xlim(x[0], x[-1]) + ax.legend(loc='upper right') + ax.grid() + return ax + + +def lag_plot(series, lag=1, ax=None, **kwds): + """Lag plot for time series. + + Parameters: + ----------- + series: Time series + lag: lag of the scatter plot, default 1 + ax: Matplotlib axis object, optional + kwds: Matplotlib scatter method keyword arguments, optional + + Returns: + -------- + ax: Matplotlib axis object + """ + import matplotlib.pyplot as plt + + # workaround because `c='b'` is hardcoded in matplotlibs scatter method + kwds.setdefault('c', plt.rcParams['patch.facecolor']) + + data = series.values + y1 = data[:-lag] + y2 = data[lag:] + if ax is None: + ax = plt.gca() + ax.set_xlabel("y(t)") + ax.set_ylabel("y(t + %s)" % lag) + ax.scatter(y1, y2, **kwds) + return ax + + +def autocorrelation_plot(series, ax=None, **kwds): + """Autocorrelation plot for time series. + + Parameters: + ----------- + series: Time series + ax: Matplotlib axis object, optional + kwds : keywords + Options to pass to matplotlib plotting method + + Returns: + ----------- + ax: Matplotlib axis object + """ + import matplotlib.pyplot as plt + n = len(series) + data = np.asarray(series) + if ax is None: + ax = plt.gca(xlim=(1, n), ylim=(-1.0, 1.0)) + mean = np.mean(data) + c0 = np.sum((data - mean) ** 2) / float(n) + + def r(h): + return ((data[:n - h] - mean) * + (data[h:] - mean)).sum() / float(n) / c0 + x = np.arange(n) + 1 + y = lmap(r, x) + z95 = 1.959963984540054 + z99 = 2.5758293035489004 + ax.axhline(y=z99 / np.sqrt(n), linestyle='--', color='grey') + ax.axhline(y=z95 / np.sqrt(n), color='grey') + ax.axhline(y=0.0, color='black') + ax.axhline(y=-z95 / np.sqrt(n), color='grey') + ax.axhline(y=-z99 / np.sqrt(n), linestyle='--', color='grey') + ax.set_xlabel("Lag") + ax.set_ylabel("Autocorrelation") + ax.plot(x, y, **kwds) + if 'label' in kwds: + ax.legend() + ax.grid() + return ax diff --git a/pandas/plotting/_style.py b/pandas/plotting/_style.py new file mode 100644 index 0000000000000..5d6dc7cbcdfc6 --- /dev/null +++ b/pandas/plotting/_style.py @@ -0,0 +1,246 @@ +# being a bit too dynamic +# pylint: disable=E1101 +from __future__ import division + +import warnings +from contextlib import contextmanager +import re + +import numpy as np + +from pandas.types.common import is_list_like +from pandas.compat import range, lrange, lmap +import pandas.compat as compat +from pandas.plotting._compat import _mpl_ge_2_0_0 + + +# Extracted from https://gist.github.com/huyng/816622 +# this is the rcParams set when setting display.with_mpl_style +# to True. +mpl_stylesheet = { + 'axes.axisbelow': True, + 'axes.color_cycle': ['#348ABD', + '#7A68A6', + '#A60628', + '#467821', + '#CF4457', + '#188487', + '#E24A33'], + 'axes.edgecolor': '#bcbcbc', + 'axes.facecolor': '#eeeeee', + 'axes.grid': True, + 'axes.labelcolor': '#555555', + 'axes.labelsize': 'large', + 'axes.linewidth': 1.0, + 'axes.titlesize': 'x-large', + 'figure.edgecolor': 'white', + 'figure.facecolor': 'white', + 'figure.figsize': (6.0, 4.0), + 'figure.subplot.hspace': 0.5, + 'font.family': 'monospace', + 'font.monospace': ['Andale Mono', + 'Nimbus Mono L', + 'Courier New', + 'Courier', + 'Fixed', + 'Terminal', + 'monospace'], + 'font.size': 10, + 'interactive': True, + 'keymap.all_axes': ['a'], + 'keymap.back': ['left', 'c', 'backspace'], + 'keymap.forward': ['right', 'v'], + 'keymap.fullscreen': ['f'], + 'keymap.grid': ['g'], + 'keymap.home': ['h', 'r', 'home'], + 'keymap.pan': ['p'], + 'keymap.save': ['s'], + 'keymap.xscale': ['L', 'k'], + 'keymap.yscale': ['l'], + 'keymap.zoom': ['o'], + 'legend.fancybox': True, + 'lines.antialiased': True, + 'lines.linewidth': 1.0, + 'patch.antialiased': True, + 'patch.edgecolor': '#EEEEEE', + 'patch.facecolor': '#348ABD', + 'patch.linewidth': 0.5, + 'toolbar': 'toolbar2', + 'xtick.color': '#555555', + 'xtick.direction': 'in', + 'xtick.major.pad': 6.0, + 'xtick.major.size': 0.0, + 'xtick.minor.pad': 6.0, + 'xtick.minor.size': 0.0, + 'ytick.color': '#555555', + 'ytick.direction': 'in', + 'ytick.major.pad': 6.0, + 'ytick.major.size': 0.0, + 'ytick.minor.pad': 6.0, + 'ytick.minor.size': 0.0 +} + + +def _get_standard_colors(num_colors=None, colormap=None, color_type='default', + color=None): + import matplotlib.pyplot as plt + + if color is None and colormap is not None: + if isinstance(colormap, compat.string_types): + import matplotlib.cm as cm + cmap = colormap + colormap = cm.get_cmap(colormap) + if colormap is None: + raise ValueError("Colormap {0} is not recognized".format(cmap)) + colors = lmap(colormap, np.linspace(0, 1, num=num_colors)) + elif color is not None: + if colormap is not None: + warnings.warn("'color' and 'colormap' cannot be used " + "simultaneously. Using 'color'") + colors = list(color) if is_list_like(color) else color + else: + if color_type == 'default': + # need to call list() on the result to copy so we don't + # modify the global rcParams below + try: + colors = [c['color'] + for c in list(plt.rcParams['axes.prop_cycle'])] + except KeyError: + colors = list(plt.rcParams.get('axes.color_cycle', + list('bgrcmyk'))) + if isinstance(colors, compat.string_types): + colors = list(colors) + elif color_type == 'random': + import random + + def random_color(column): + random.seed(column) + return [random.random() for _ in range(3)] + + colors = lmap(random_color, lrange(num_colors)) + else: + raise ValueError("color_type must be either 'default' or 'random'") + + if isinstance(colors, compat.string_types): + import matplotlib.colors + conv = matplotlib.colors.ColorConverter() + + def _maybe_valid_colors(colors): + try: + [conv.to_rgba(c) for c in colors] + return True + except ValueError: + return False + + # check whether the string can be convertable to single color + maybe_single_color = _maybe_valid_colors([colors]) + # check whether each character can be convertable to colors + maybe_color_cycle = _maybe_valid_colors(list(colors)) + if maybe_single_color and maybe_color_cycle and len(colors) > 1: + # Special case for single str 'CN' match and convert to hex + # for supporting matplotlib < 2.0.0 + if re.match(r'\AC[0-9]\Z', colors) and _mpl_ge_2_0_0(): + hex_color = [c['color'] + for c in list(plt.rcParams['axes.prop_cycle'])] + colors = [hex_color[int(colors[1])]] + else: + # this may no longer be required + msg = ("'{0}' can be parsed as both single color and " + "color cycle. Specify each color using a list " + "like ['{0}'] or {1}") + raise ValueError(msg.format(colors, list(colors))) + elif maybe_single_color: + colors = [colors] + else: + # ``colors`` is regarded as color cycle. + # mpl will raise error any of them is invalid + pass + + if len(colors) != num_colors: + try: + multiple = num_colors // len(colors) - 1 + except ZeroDivisionError: + raise ValueError("Invalid color argument: ''") + mod = num_colors % len(colors) + + colors += multiple * colors + colors += colors[:mod] + + return colors + + +class _Options(dict): + """ + Stores pandas plotting options. + Allows for parameter aliasing so you can just use parameter names that are + the same as the plot function parameters, but is stored in a canonical + format that makes it easy to breakdown into groups later + """ + + # alias so the names are same as plotting method parameter names + _ALIASES = {'x_compat': 'xaxis.compat'} + _DEFAULT_KEYS = ['xaxis.compat'] + + def __init__(self, deprecated=False): + self._deprecated = deprecated + # self['xaxis.compat'] = False + super(_Options, self).__setitem__('xaxis.compat', False) + + def _warn_if_deprecated(self): + if self._deprecated: + warnings.warn("'pandas.plot_params' is deprecated. Use " + "'pandas.plotting.plot_params' instead", + FutureWarning, stacklevel=3) + + def __getitem__(self, key): + self._warn_if_deprecated() + key = self._get_canonical_key(key) + if key not in self: + raise ValueError('%s is not a valid pandas plotting option' % key) + return super(_Options, self).__getitem__(key) + + def __setitem__(self, key, value): + self._warn_if_deprecated() + key = self._get_canonical_key(key) + return super(_Options, self).__setitem__(key, value) + + def __delitem__(self, key): + key = self._get_canonical_key(key) + if key in self._DEFAULT_KEYS: + raise ValueError('Cannot remove default parameter %s' % key) + return super(_Options, self).__delitem__(key) + + def __contains__(self, key): + key = self._get_canonical_key(key) + return super(_Options, self).__contains__(key) + + def reset(self): + """ + Reset the option store to its initial state + + Returns + ------- + None + """ + self._warn_if_deprecated() + self.__init__() + + def _get_canonical_key(self, key): + return self._ALIASES.get(key, key) + + @contextmanager + def use(self, key, value): + """ + Temporarily set a parameter value using the with statement. + Aliasing allowed. + """ + self._warn_if_deprecated() + old_value = self[key] + try: + self[key] = value + yield self + finally: + self[key] = old_value + + +plot_params = _Options() diff --git a/pandas/plotting/_timeseries.py b/pandas/plotting/_timeseries.py new file mode 100644 index 0000000000000..7533e417b0026 --- /dev/null +++ b/pandas/plotting/_timeseries.py @@ -0,0 +1,339 @@ +# TODO: Use the fact that axis can have units to simplify the process + +import numpy as np + +from matplotlib import pylab +from pandas.tseries.period import Period +from pandas.tseries.offsets import DateOffset +import pandas.tseries.frequencies as frequencies +from pandas.tseries.index import DatetimeIndex +from pandas.tseries.period import PeriodIndex +from pandas.tseries.tdi import TimedeltaIndex +from pandas.formats.printing import pprint_thing +import pandas.compat as compat + +from pandas.plotting._converter import (TimeSeries_DateLocator, + TimeSeries_DateFormatter, + TimeSeries_TimedeltaFormatter) + +# --------------------------------------------------------------------- +# Plotting functions and monkey patches + + +def tsplot(series, plotf, ax=None, **kwargs): + """ + Plots a Series on the given Matplotlib axes or the current axes + + Parameters + ---------- + axes : Axes + series : Series + + Notes + _____ + Supports same kwargs as Axes.plot + + """ + # Used inferred freq is possible, need a test case for inferred + if ax is None: + import matplotlib.pyplot as plt + ax = plt.gca() + + freq, series = _maybe_resample(series, ax, kwargs) + + # Set ax with freq info + _decorate_axes(ax, freq, kwargs) + ax._plot_data.append((series, plotf, kwargs)) + lines = plotf(ax, series.index._mpl_repr(), series.values, **kwargs) + + # set date formatter, locators and rescale limits + format_dateaxis(ax, ax.freq, series.index) + return lines + + +def _maybe_resample(series, ax, kwargs): + # resample against axes freq if necessary + freq, ax_freq = _get_freq(ax, series) + + if freq is None: # pragma: no cover + raise ValueError('Cannot use dynamic axis without frequency info') + + # Convert DatetimeIndex to PeriodIndex + if isinstance(series.index, DatetimeIndex): + series = series.to_period(freq=freq) + + if ax_freq is not None and freq != ax_freq: + if frequencies.is_superperiod(freq, ax_freq): # upsample input + series = series.copy() + series.index = series.index.asfreq(ax_freq, how='s') + freq = ax_freq + elif _is_sup(freq, ax_freq): # one is weekly + how = kwargs.pop('how', 'last') + series = getattr(series.resample('D'), how)().dropna() + series = getattr(series.resample(ax_freq), how)().dropna() + freq = ax_freq + elif frequencies.is_subperiod(freq, ax_freq) or _is_sub(freq, ax_freq): + _upsample_others(ax, freq, kwargs) + ax_freq = freq + else: # pragma: no cover + raise ValueError('Incompatible frequency conversion') + return freq, series + + +def _is_sub(f1, f2): + return ((f1.startswith('W') and frequencies.is_subperiod('D', f2)) or + (f2.startswith('W') and frequencies.is_subperiod(f1, 'D'))) + + +def _is_sup(f1, f2): + return ((f1.startswith('W') and frequencies.is_superperiod('D', f2)) or + (f2.startswith('W') and frequencies.is_superperiod(f1, 'D'))) + + +def _upsample_others(ax, freq, kwargs): + legend = ax.get_legend() + lines, labels = _replot_ax(ax, freq, kwargs) + _replot_ax(ax, freq, kwargs) + + other_ax = None + if hasattr(ax, 'left_ax'): + other_ax = ax.left_ax + if hasattr(ax, 'right_ax'): + other_ax = ax.right_ax + + if other_ax is not None: + rlines, rlabels = _replot_ax(other_ax, freq, kwargs) + lines.extend(rlines) + labels.extend(rlabels) + + if (legend is not None and kwargs.get('legend', True) and + len(lines) > 0): + title = legend.get_title().get_text() + if title == 'None': + title = None + ax.legend(lines, labels, loc='best', title=title) + + +def _replot_ax(ax, freq, kwargs): + data = getattr(ax, '_plot_data', None) + + # clear current axes and data + ax._plot_data = [] + ax.clear() + + _decorate_axes(ax, freq, kwargs) + + lines = [] + labels = [] + if data is not None: + for series, plotf, kwds in data: + series = series.copy() + idx = series.index.asfreq(freq, how='S') + series.index = idx + ax._plot_data.append((series, plotf, kwds)) + + # for tsplot + if isinstance(plotf, compat.string_types): + from pandas.plotting._core import _plot_klass + plotf = _plot_klass[plotf]._plot + + lines.append(plotf(ax, series.index._mpl_repr(), + series.values, **kwds)[0]) + labels.append(pprint_thing(series.name)) + + return lines, labels + + +def _decorate_axes(ax, freq, kwargs): + """Initialize axes for time-series plotting""" + if not hasattr(ax, '_plot_data'): + ax._plot_data = [] + + ax.freq = freq + xaxis = ax.get_xaxis() + xaxis.freq = freq + if not hasattr(ax, 'legendlabels'): + ax.legendlabels = [kwargs.get('label', None)] + else: + ax.legendlabels.append(kwargs.get('label', None)) + ax.view_interval = None + ax.date_axis_info = None + + +def _get_ax_freq(ax): + """ + Get the freq attribute of the ax object if set. + Also checks shared axes (eg when using secondary yaxis, sharex=True + or twinx) + """ + ax_freq = getattr(ax, 'freq', None) + if ax_freq is None: + # check for left/right ax in case of secondary yaxis + if hasattr(ax, 'left_ax'): + ax_freq = getattr(ax.left_ax, 'freq', None) + elif hasattr(ax, 'right_ax'): + ax_freq = getattr(ax.right_ax, 'freq', None) + if ax_freq is None: + # check if a shared ax (sharex/twinx) has already freq set + shared_axes = ax.get_shared_x_axes().get_siblings(ax) + if len(shared_axes) > 1: + for shared_ax in shared_axes: + ax_freq = getattr(shared_ax, 'freq', None) + if ax_freq is not None: + break + return ax_freq + + +def _get_freq(ax, series): + # get frequency from data + freq = getattr(series.index, 'freq', None) + if freq is None: + freq = getattr(series.index, 'inferred_freq', None) + + ax_freq = _get_ax_freq(ax) + + # use axes freq if no data freq + if freq is None: + freq = ax_freq + + # get the period frequency + if isinstance(freq, DateOffset): + freq = freq.rule_code + else: + freq = frequencies.get_base_alias(freq) + + freq = frequencies.get_period_alias(freq) + return freq, ax_freq + + +def _use_dynamic_x(ax, data): + freq = _get_index_freq(data) + ax_freq = _get_ax_freq(ax) + + if freq is None: # convert irregular if axes has freq info + freq = ax_freq + else: # do not use tsplot if irregular was plotted first + if (ax_freq is None) and (len(ax.get_lines()) > 0): + return False + + if freq is None: + return False + + if isinstance(freq, DateOffset): + freq = freq.rule_code + else: + freq = frequencies.get_base_alias(freq) + freq = frequencies.get_period_alias(freq) + + if freq is None: + return False + + # hack this for 0.10.1, creating more technical debt...sigh + if isinstance(data.index, DatetimeIndex): + base = frequencies.get_freq(freq) + x = data.index + if (base <= frequencies.FreqGroup.FR_DAY): + return x[:1].is_normalized + return Period(x[0], freq).to_timestamp(tz=x.tz) == x[0] + return True + + +def _get_index_freq(data): + freq = getattr(data.index, 'freq', None) + if freq is None: + freq = getattr(data.index, 'inferred_freq', None) + if freq == 'B': + weekdays = np.unique(data.index.dayofweek) + if (5 in weekdays) or (6 in weekdays): + freq = None + return freq + + +def _maybe_convert_index(ax, data): + # tsplot converts automatically, but don't want to convert index + # over and over for DataFrames + if isinstance(data.index, DatetimeIndex): + freq = getattr(data.index, 'freq', None) + + if freq is None: + freq = getattr(data.index, 'inferred_freq', None) + if isinstance(freq, DateOffset): + freq = freq.rule_code + + if freq is None: + freq = _get_ax_freq(ax) + + if freq is None: + raise ValueError('Could not get frequency alias for plotting') + + freq = frequencies.get_base_alias(freq) + freq = frequencies.get_period_alias(freq) + + data = data.to_period(freq=freq) + return data + + +# Patch methods for subplot. Only format_dateaxis is currently used. +# Do we need the rest for convenience? + +def format_timedelta_ticks(x, pos, n_decimals): + """ + Convert seconds to 'D days HH:MM:SS.F' + """ + s, ns = divmod(x, 1e9) + m, s = divmod(s, 60) + h, m = divmod(m, 60) + d, h = divmod(h, 24) + decimals = int(ns * 10**(n_decimals - 9)) + s = r'{:02d}:{:02d}:{:02d}'.format(int(h), int(m), int(s)) + if n_decimals > 0: + s += '.{{:0{:0d}d}}'.format(n_decimals).format(decimals) + if d != 0: + s = '{:d} days '.format(int(d)) + s + return s + + +def format_dateaxis(subplot, freq, index): + """ + Pretty-formats the date axis (x-axis). + + Major and minor ticks are automatically set for the frequency of the + current underlying series. As the dynamic mode is activated by + default, changing the limits of the x axis will intelligently change + the positions of the ticks. + """ + + # handle index specific formatting + # Note: DatetimeIndex does not use this + # interface. DatetimeIndex uses matplotlib.date directly + if isinstance(index, PeriodIndex): + + majlocator = TimeSeries_DateLocator(freq, dynamic_mode=True, + minor_locator=False, + plot_obj=subplot) + minlocator = TimeSeries_DateLocator(freq, dynamic_mode=True, + minor_locator=True, + plot_obj=subplot) + subplot.xaxis.set_major_locator(majlocator) + subplot.xaxis.set_minor_locator(minlocator) + + majformatter = TimeSeries_DateFormatter(freq, dynamic_mode=True, + minor_locator=False, + plot_obj=subplot) + minformatter = TimeSeries_DateFormatter(freq, dynamic_mode=True, + minor_locator=True, + plot_obj=subplot) + subplot.xaxis.set_major_formatter(majformatter) + subplot.xaxis.set_minor_formatter(minformatter) + + # x and y coord info + subplot.format_coord = lambda t, y: ( + "t = {0} y = {1:8f}".format(Period(ordinal=int(t), freq=freq), y)) + + elif isinstance(index, TimedeltaIndex): + subplot.xaxis.set_major_formatter( + TimeSeries_TimedeltaFormatter()) + else: + raise TypeError('index type not supported') + + pylab.draw_if_interactive() diff --git a/pandas/plotting/_tools.py b/pandas/plotting/_tools.py new file mode 100644 index 0000000000000..720f776279869 --- /dev/null +++ b/pandas/plotting/_tools.py @@ -0,0 +1,383 @@ +# being a bit too dynamic +# pylint: disable=E1101 +from __future__ import division + +import warnings +from math import ceil + +import numpy as np + +from pandas.types.common import is_list_like +from pandas.core.index import Index +from pandas.core.series import Series +from pandas.compat import range + + +def format_date_labels(ax, rot): + # mini version of autofmt_xdate + try: + for label in ax.get_xticklabels(): + label.set_ha('right') + label.set_rotation(rot) + fig = ax.get_figure() + fig.subplots_adjust(bottom=0.2) + except Exception: # pragma: no cover + pass + + +def table(ax, data, rowLabels=None, colLabels=None, + **kwargs): + """ + Helper function to convert DataFrame and Series to matplotlib.table + + Parameters + ---------- + `ax`: Matplotlib axes object + `data`: DataFrame or Series + data for table contents + `kwargs`: keywords, optional + keyword arguments which passed to matplotlib.table.table. + If `rowLabels` or `colLabels` is not specified, data index or column + name will be used. + + Returns + ------- + matplotlib table object + """ + from pandas import DataFrame + if isinstance(data, Series): + data = DataFrame(data, columns=[data.name]) + elif isinstance(data, DataFrame): + pass + else: + raise ValueError('Input data must be DataFrame or Series') + + if rowLabels is None: + rowLabels = data.index + + if colLabels is None: + colLabels = data.columns + + cellText = data.values + + import matplotlib.table + table = matplotlib.table.table(ax, cellText=cellText, + rowLabels=rowLabels, + colLabels=colLabels, **kwargs) + return table + + +def _get_layout(nplots, layout=None, layout_type='box'): + if layout is not None: + if not isinstance(layout, (tuple, list)) or len(layout) != 2: + raise ValueError('Layout must be a tuple of (rows, columns)') + + nrows, ncols = layout + + # Python 2 compat + ceil_ = lambda x: int(ceil(x)) + if nrows == -1 and ncols > 0: + layout = nrows, ncols = (ceil_(float(nplots) / ncols), ncols) + elif ncols == -1 and nrows > 0: + layout = nrows, ncols = (nrows, ceil_(float(nplots) / nrows)) + elif ncols <= 0 and nrows <= 0: + msg = "At least one dimension of layout must be positive" + raise ValueError(msg) + + if nrows * ncols < nplots: + raise ValueError('Layout of %sx%s must be larger than ' + 'required size %s' % (nrows, ncols, nplots)) + + return layout + + if layout_type == 'single': + return (1, 1) + elif layout_type == 'horizontal': + return (1, nplots) + elif layout_type == 'vertical': + return (nplots, 1) + + layouts = {1: (1, 1), 2: (1, 2), 3: (2, 2), 4: (2, 2)} + try: + return layouts[nplots] + except KeyError: + k = 1 + while k ** 2 < nplots: + k += 1 + + if (k - 1) * k >= nplots: + return k, (k - 1) + else: + return k, k + +# copied from matplotlib/pyplot.py and modified for pandas.plotting + + +def _subplots(naxes=None, sharex=False, sharey=False, squeeze=True, + subplot_kw=None, ax=None, layout=None, layout_type='box', + **fig_kw): + """Create a figure with a set of subplots already made. + + This utility wrapper makes it convenient to create common layouts of + subplots, including the enclosing figure object, in a single call. + + Keyword arguments: + + naxes : int + Number of required axes. Exceeded axes are set invisible. Default is + nrows * ncols. + + sharex : bool + If True, the X axis will be shared amongst all subplots. + + sharey : bool + If True, the Y axis will be shared amongst all subplots. + + squeeze : bool + + If True, extra dimensions are squeezed out from the returned axis object: + - if only one subplot is constructed (nrows=ncols=1), the resulting + single Axis object is returned as a scalar. + - for Nx1 or 1xN subplots, the returned object is a 1-d numpy object + array of Axis objects are returned as numpy 1-d arrays. + - for NxM subplots with N>1 and M>1 are returned as a 2d array. + + If False, no squeezing at all is done: the returned axis object is always + a 2-d array containing Axis instances, even if it ends up being 1x1. + + subplot_kw : dict + Dict with keywords passed to the add_subplot() call used to create each + subplots. + + ax : Matplotlib axis object, optional + + layout : tuple + Number of rows and columns of the subplot grid. + If not specified, calculated from naxes and layout_type + + layout_type : {'box', 'horziontal', 'vertical'}, default 'box' + Specify how to layout the subplot grid. + + fig_kw : Other keyword arguments to be passed to the figure() call. + Note that all keywords not recognized above will be + automatically included here. + + Returns: + + fig, ax : tuple + - fig is the Matplotlib Figure object + - ax can be either a single axis object or an array of axis objects if + more than one subplot was created. The dimensions of the resulting array + can be controlled with the squeeze keyword, see above. + + **Examples:** + + x = np.linspace(0, 2*np.pi, 400) + y = np.sin(x**2) + + # Just a figure and one subplot + f, ax = plt.subplots() + ax.plot(x, y) + ax.set_title('Simple plot') + + # Two subplots, unpack the output array immediately + f, (ax1, ax2) = plt.subplots(1, 2, sharey=True) + ax1.plot(x, y) + ax1.set_title('Sharing Y axis') + ax2.scatter(x, y) + + # Four polar axes + plt.subplots(2, 2, subplot_kw=dict(polar=True)) + """ + import matplotlib.pyplot as plt + + if subplot_kw is None: + subplot_kw = {} + + if ax is None: + fig = plt.figure(**fig_kw) + else: + if is_list_like(ax): + ax = _flatten(ax) + if layout is not None: + warnings.warn("When passing multiple axes, layout keyword is " + "ignored", UserWarning) + if sharex or sharey: + warnings.warn("When passing multiple axes, sharex and sharey " + "are ignored. These settings must be specified " + "when creating axes", UserWarning, + stacklevel=4) + if len(ax) == naxes: + fig = ax[0].get_figure() + return fig, ax + else: + raise ValueError("The number of passed axes must be {0}, the " + "same as the output plot".format(naxes)) + + fig = ax.get_figure() + # if ax is passed and a number of subplots is 1, return ax as it is + if naxes == 1: + if squeeze: + return fig, ax + else: + return fig, _flatten(ax) + else: + warnings.warn("To output multiple subplots, the figure containing " + "the passed axes is being cleared", UserWarning, + stacklevel=4) + fig.clear() + + nrows, ncols = _get_layout(naxes, layout=layout, layout_type=layout_type) + nplots = nrows * ncols + + # Create empty object array to hold all axes. It's easiest to make it 1-d + # so we can just append subplots upon creation, and then + axarr = np.empty(nplots, dtype=object) + + # Create first subplot separately, so we can share it if requested + ax0 = fig.add_subplot(nrows, ncols, 1, **subplot_kw) + + if sharex: + subplot_kw['sharex'] = ax0 + if sharey: + subplot_kw['sharey'] = ax0 + axarr[0] = ax0 + + # Note off-by-one counting because add_subplot uses the MATLAB 1-based + # convention. + for i in range(1, nplots): + kwds = subplot_kw.copy() + # Set sharex and sharey to None for blank/dummy axes, these can + # interfere with proper axis limits on the visible axes if + # they share axes e.g. issue #7528 + if i >= naxes: + kwds['sharex'] = None + kwds['sharey'] = None + ax = fig.add_subplot(nrows, ncols, i + 1, **kwds) + axarr[i] = ax + + if naxes != nplots: + for ax in axarr[naxes:]: + ax.set_visible(False) + + _handle_shared_axes(axarr, nplots, naxes, nrows, ncols, sharex, sharey) + + if squeeze: + # Reshape the array to have the final desired dimension (nrow,ncol), + # though discarding unneeded dimensions that equal 1. If we only have + # one subplot, just return it instead of a 1-element array. + if nplots == 1: + axes = axarr[0] + else: + axes = axarr.reshape(nrows, ncols).squeeze() + else: + # returned axis array will be always 2-d, even if nrows=ncols=1 + axes = axarr.reshape(nrows, ncols) + + return fig, axes + + +def _remove_labels_from_axis(axis): + for t in axis.get_majorticklabels(): + t.set_visible(False) + + try: + # set_visible will not be effective if + # minor axis has NullLocator and NullFormattor (default) + import matplotlib.ticker as ticker + if isinstance(axis.get_minor_locator(), ticker.NullLocator): + axis.set_minor_locator(ticker.AutoLocator()) + if isinstance(axis.get_minor_formatter(), ticker.NullFormatter): + axis.set_minor_formatter(ticker.FormatStrFormatter('')) + for t in axis.get_minorticklabels(): + t.set_visible(False) + except Exception: # pragma no cover + raise + axis.get_label().set_visible(False) + + +def _handle_shared_axes(axarr, nplots, naxes, nrows, ncols, sharex, sharey): + if nplots > 1: + + if nrows > 1: + try: + # first find out the ax layout, + # so that we can correctly handle 'gaps" + layout = np.zeros((nrows + 1, ncols + 1), dtype=np.bool) + for ax in axarr: + layout[ax.rowNum, ax.colNum] = ax.get_visible() + + for ax in axarr: + # only the last row of subplots should get x labels -> all + # other off layout handles the case that the subplot is + # the last in the column, because below is no subplot/gap. + if not layout[ax.rowNum + 1, ax.colNum]: + continue + if sharex or len(ax.get_shared_x_axes() + .get_siblings(ax)) > 1: + _remove_labels_from_axis(ax.xaxis) + + except IndexError: + # if gridspec is used, ax.rowNum and ax.colNum may different + # from layout shape. in this case, use last_row logic + for ax in axarr: + if ax.is_last_row(): + continue + if sharex or len(ax.get_shared_x_axes() + .get_siblings(ax)) > 1: + _remove_labels_from_axis(ax.xaxis) + + if ncols > 1: + for ax in axarr: + # only the first column should get y labels -> set all other to + # off as we only have labels in teh first column and we always + # have a subplot there, we can skip the layout test + if ax.is_first_col(): + continue + if sharey or len(ax.get_shared_y_axes().get_siblings(ax)) > 1: + _remove_labels_from_axis(ax.yaxis) + + +def _flatten(axes): + if not is_list_like(axes): + return np.array([axes]) + elif isinstance(axes, (np.ndarray, Index)): + return axes.ravel() + return np.array(axes) + + +def _get_all_lines(ax): + lines = ax.get_lines() + + if hasattr(ax, 'right_ax'): + lines += ax.right_ax.get_lines() + + if hasattr(ax, 'left_ax'): + lines += ax.left_ax.get_lines() + + return lines + + +def _get_xlim(lines): + left, right = np.inf, -np.inf + for l in lines: + x = l.get_xdata(orig=False) + left = min(x[0], left) + right = max(x[-1], right) + return left, right + + +def _set_ticks_props(axes, xlabelsize=None, xrot=None, + ylabelsize=None, yrot=None): + import matplotlib.pyplot as plt + + for ax in _flatten(axes): + if xlabelsize is not None: + plt.setp(ax.get_xticklabels(), fontsize=xlabelsize) + if xrot is not None: + plt.setp(ax.get_xticklabels(), rotation=xrot) + if ylabelsize is not None: + plt.setp(ax.get_yticklabels(), fontsize=ylabelsize) + if yrot is not None: + plt.setp(ax.get_yticklabels(), rotation=yrot) + return axes diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index 6d92898042b23..02734189ca340 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -31,7 +31,7 @@ class TestPDApi(Base, tm.TestCase): # top-level sub-packages lib = ['api', 'compat', 'computation', 'core', - 'indexes', 'formats', 'errors', 'pandas', + 'indexes', 'formats', 'errors', 'pandas', 'plotting', 'test', 'tools', 'tseries', 'sparse', 'types', 'util', 'options', 'io'] @@ -70,8 +70,7 @@ class TestPDApi(Base, tm.TestCase): 'melt', 'notnull', 'offsets', 'merge', 'merge_ordered', 'merge_asof', 'period_range', - 'pivot', 'pivot_table', 'plot_params', 'qcut', - 'scatter_matrix', + 'pivot', 'pivot_table', 'qcut', 'show_versions', 'timedelta_range', 'unique', 'value_counts', 'wide_to_long'] @@ -104,7 +103,8 @@ class TestPDApi(Base, tm.TestCase): 'rolling_median', 'rolling_min', 'rolling_quantile', 'rolling_skew', 'rolling_std', 'rolling_sum', 'rolling_var', 'rolling_window', 'ordered_merge', - 'pnow', 'match', 'groupby', 'get_store'] + 'pnow', 'match', 'groupby', 'get_store', + 'plot_params', 'scatter_matrix'] def test_api(self): diff --git a/pandas/tests/plotting/common.py b/pandas/tests/plotting/common.py index c31d8b539ae6f..d81f73e73ae69 100644 --- a/pandas/tests/plotting/common.py +++ b/pandas/tests/plotting/common.py @@ -16,7 +16,8 @@ import numpy as np from numpy import random -import pandas.tools.plotting as plotting +import pandas.plotting as plotting +from pandas.plotting._tools import _flatten """ @@ -48,12 +49,12 @@ def setUp(self): import matplotlib as mpl mpl.rcdefaults() - self.mpl_le_1_2_1 = plotting._mpl_le_1_2_1() - self.mpl_ge_1_3_1 = plotting._mpl_ge_1_3_1() - self.mpl_ge_1_4_0 = plotting._mpl_ge_1_4_0() - self.mpl_ge_1_5_0 = plotting._mpl_ge_1_5_0() - self.mpl_ge_2_0_0 = plotting._mpl_ge_2_0_0() - self.mpl_ge_2_0_1 = plotting._mpl_ge_2_0_1() + self.mpl_le_1_2_1 = plotting._compat._mpl_le_1_2_1() + self.mpl_ge_1_3_1 = plotting._compat._mpl_ge_1_3_1() + self.mpl_ge_1_4_0 = plotting._compat._mpl_ge_1_4_0() + self.mpl_ge_1_5_0 = plotting._compat._mpl_ge_1_5_0() + self.mpl_ge_2_0_0 = plotting._compat._mpl_ge_2_0_0() + self.mpl_ge_2_0_1 = plotting._compat._mpl_ge_2_0_1() if self.mpl_ge_1_4_0: self.bp_n_objects = 7 @@ -73,7 +74,8 @@ def setUp(self): self.default_tick_position = 'left' if self.mpl_ge_2_0_0 else 'default' # common test data from pandas import read_csv - path = os.path.join(os.path.dirname(curpath()), 'data', 'iris.csv') + base = os.path.join(os.path.dirname(curpath()), os.pardir) + path = os.path.join(base, 'tests', 'data', 'iris.csv') self.iris = read_csv(path) n = 100 @@ -353,7 +355,7 @@ def _check_axes_shape(self, axes, axes_num=None, layout=None, self.assertTrue(len(ax.get_children()) > 0) if layout is not None: - result = self._get_axes_layout(plotting._flatten(axes)) + result = self._get_axes_layout(_flatten(axes)) self.assertEqual(result, layout) self.assert_numpy_array_equal( @@ -379,7 +381,7 @@ def _flatten_visible(self, axes): axes : matplotlib Axes object, or its list-like """ - axes = plotting._flatten(axes) + axes = _flatten(axes) axes = [ax for ax in axes if ax.get_visible()] return axes diff --git a/pandas/tests/plotting/test_boxplot_method.py b/pandas/tests/plotting/test_boxplot_method.py index 31c150bc1e64f..5b9c13bd26708 100644 --- a/pandas/tests/plotting/test_boxplot_method.py +++ b/pandas/tests/plotting/test_boxplot_method.py @@ -14,7 +14,7 @@ from numpy import random from numpy.random import randn -import pandas.tools.plotting as plotting +import pandas.plotting as plotting from pandas.tests.plotting.common import (TestPlotBase, _check_plot_works) @@ -54,7 +54,8 @@ def test_boxplot_legacy(self): _check_plot_works(df.boxplot, by='indic') with tm.assert_produces_warning(UserWarning): _check_plot_works(df.boxplot, by=['indic', 'indic2']) - _check_plot_works(plotting.boxplot, data=df['one'], return_type='dict') + _check_plot_works(plotting._core.boxplot, data=df['one'], + return_type='dict') _check_plot_works(df.boxplot, notch=1, return_type='dict') with tm.assert_produces_warning(UserWarning): _check_plot_works(df.boxplot, by='indic', notch=1) diff --git a/pandas/tests/tseries/test_converter.py b/pandas/tests/plotting/test_converter.py similarity index 99% rename from pandas/tests/tseries/test_converter.py rename to pandas/tests/plotting/test_converter.py index 5351e26f0e62b..4629103d033f5 100644 --- a/pandas/tests/tseries/test_converter.py +++ b/pandas/tests/plotting/test_converter.py @@ -8,7 +8,7 @@ from pandas.tseries.offsets import Second, Milli, Micro, Day from pandas.compat.numpy import np_datetime64_compat -converter = pytest.importorskip('pandas.tseries.converter') +converter = pytest.importorskip('pandas.plotting._converter') def test_timtetonum_accepts_unicode(): diff --git a/pandas/tests/plotting/test_datetimelike.py b/pandas/tests/plotting/test_datetimelike.py index 673c34903b259..b3692c5a8d2d2 100644 --- a/pandas/tests/plotting/test_datetimelike.py +++ b/pandas/tests/plotting/test_datetimelike.py @@ -144,7 +144,7 @@ def test_high_freq(self): _check_plot_works(ser.plot) def test_get_datevalue(self): - from pandas.tseries.converter import get_datevalue + from pandas.plotting._converter import get_datevalue self.assertIsNone(get_datevalue(None, 'D')) self.assertEqual(get_datevalue(1987, 'A'), 1987) self.assertEqual(get_datevalue(Period(1987, 'A'), 'M'), @@ -243,7 +243,7 @@ def test_plot_multiple_inferred_freq(self): @slow def test_uhf(self): - import pandas.tseries.converter as conv + import pandas.plotting._converter as conv import matplotlib.pyplot as plt fig = plt.gcf() plt.clf() @@ -387,7 +387,7 @@ def _test(ax): _test(ax) def test_get_finder(self): - import pandas.tseries.converter as conv + import pandas.plotting._converter as conv self.assertEqual(conv.get_finder('B'), conv._daily_finder) self.assertEqual(conv.get_finder('D'), conv._daily_finder) diff --git a/pandas/tests/plotting/test_deprecated.py b/pandas/tests/plotting/test_deprecated.py new file mode 100644 index 0000000000000..d7eaa69460a3a --- /dev/null +++ b/pandas/tests/plotting/test_deprecated.py @@ -0,0 +1,58 @@ +# coding: utf-8 + +import string + +import pandas as pd +import pandas.util.testing as tm +from pandas.util.testing import slow + +from numpy.random import randn + +import pandas.tools.plotting as plotting + +from pandas.tests.plotting.common import TestPlotBase + + +""" +Test cases for plot functions imported from deprecated +pandas.tools.plotting +""" + + +@tm.mplskip +class TestDeprecatedNameSpace(TestPlotBase): + + @slow + def test_scatter_plot_legacy(self): + tm._skip_if_no_scipy() + + df = pd.DataFrame(randn(100, 2)) + + with tm.assert_produces_warning(FutureWarning): + plotting.scatter_matrix(df) + + with tm.assert_produces_warning(FutureWarning): + pd.scatter_matrix(df) + + @slow + def test_boxplot_deprecated(self): + df = pd.DataFrame(randn(6, 4), + index=list(string.ascii_letters[:6]), + columns=['one', 'two', 'three', 'four']) + df['indic'] = ['foo', 'bar'] * 3 + + with tm.assert_produces_warning(FutureWarning): + plotting.boxplot(df, column=['one', 'two'], + by='indic') + + @slow + def test_radviz_deprecated(self): + df = self.iris + with tm.assert_produces_warning(FutureWarning): + plotting.radviz(frame=df, class_column='Name') + + @slow + def test_plot_params(self): + + with tm.assert_produces_warning(FutureWarning): + pd.plot_params['xaxis.compat'] = True diff --git a/pandas/tests/plotting/test_frame.py b/pandas/tests/plotting/test_frame.py index 8090b9cc44ca3..404752b567f63 100644 --- a/pandas/tests/plotting/test_frame.py +++ b/pandas/tests/plotting/test_frame.py @@ -22,7 +22,7 @@ import numpy as np from numpy.random import rand, randn -import pandas.tools.plotting as plotting +import pandas.plotting as plotting from pandas.tests.plotting.common import (TestPlotBase, _check_plot_works, _skip_if_no_scipy_gaussian_kde, _ok_for_gaussian_kde) @@ -240,13 +240,13 @@ def test_xcompat(self): self.assertNotIsInstance(lines[0].get_xdata(), PeriodIndex) tm.close() - pd.plot_params['xaxis.compat'] = True + pd.plotting.plot_params['xaxis.compat'] = True ax = df.plot() lines = ax.get_lines() self.assertNotIsInstance(lines[0].get_xdata(), PeriodIndex) tm.close() - pd.plot_params['x_compat'] = False + pd.plotting.plot_params['x_compat'] = False ax = df.plot() lines = ax.get_lines() self.assertNotIsInstance(lines[0].get_xdata(), PeriodIndex) @@ -254,7 +254,7 @@ def test_xcompat(self): tm.close() # useful if you're plotting a bunch together - with pd.plot_params.use('x_compat', True): + with pd.plotting.plot_params.use('x_compat', True): ax = df.plot() lines = ax.get_lines() self.assertNotIsInstance(lines[0].get_xdata(), PeriodIndex) @@ -1979,7 +1979,7 @@ def test_unordered_ts(self): def test_kind_both_ways(self): df = DataFrame({'x': [1, 2, 3]}) - for kind in plotting._common_kinds: + for kind in plotting._core._common_kinds: if not _ok_for_gaussian_kde(kind): continue df.plot(kind=kind) @@ -1990,7 +1990,7 @@ def test_kind_both_ways(self): def test_all_invalid_plot_data(self): df = DataFrame(list('abcd')) - for kind in plotting._common_kinds: + for kind in plotting._core._common_kinds: if not _ok_for_gaussian_kde(kind): continue with tm.assertRaises(TypeError): @@ -2001,7 +2001,7 @@ def test_partially_invalid_plot_data(self): with tm.RNGContext(42): df = DataFrame(randn(10, 2), dtype=object) df[np.random.rand(df.shape[0]) > 0.5] = 'a' - for kind in plotting._common_kinds: + for kind in plotting._core._common_kinds: if not _ok_for_gaussian_kde(kind): continue with tm.assertRaises(TypeError): @@ -2454,7 +2454,7 @@ def test_memory_leak(self): import gc results = {} - for kind in plotting._plot_klass.keys(): + for kind in plotting._core._plot_klass.keys(): if not _ok_for_gaussian_kde(kind): continue args = {} @@ -2653,7 +2653,7 @@ def test_df_grid_settings(self): # Make sure plot defaults to rcParams['axes.grid'] setting, GH 9792 self._check_grid_settings( DataFrame({'a': [1, 2, 3], 'b': [2, 3, 4]}), - plotting._dataframe_kinds, kws={'x': 'a', 'y': 'b'}) + plotting._core._dataframe_kinds, kws={'x': 'a', 'y': 'b'}) def test_option_mpl_style(self): with tm.assert_produces_warning(FutureWarning, diff --git a/pandas/tests/plotting/test_hist_method.py b/pandas/tests/plotting/test_hist_method.py index 380bdc12abce4..0a13a6e9893a8 100644 --- a/pandas/tests/plotting/test_hist_method.py +++ b/pandas/tests/plotting/test_hist_method.py @@ -9,7 +9,7 @@ import numpy as np from numpy.random import randn -import pandas.tools.plotting as plotting +from pandas.plotting._core import grouped_hist from pandas.tests.plotting.common import (TestPlotBase, _check_plot_works) @@ -260,7 +260,7 @@ def test_grouped_hist_legacy(self): df['C'] = np.random.randint(0, 4, 500) df['D'] = ['X'] * 500 - axes = plotting.grouped_hist(df.A, by=df.C) + axes = grouped_hist(df.A, by=df.C) self._check_axes_shape(axes, axes_num=4, layout=(2, 2)) tm.close() @@ -277,10 +277,9 @@ def test_grouped_hist_legacy(self): # make sure kwargs to hist are handled xf, yf = 20, 18 xrot, yrot = 30, 40 - axes = plotting.grouped_hist(df.A, by=df.C, normed=True, - cumulative=True, bins=4, - xlabelsize=xf, xrot=xrot, - ylabelsize=yf, yrot=yrot) + axes = grouped_hist(df.A, by=df.C, normed=True, cumulative=True, + bins=4, xlabelsize=xf, xrot=xrot, + ylabelsize=yf, yrot=yrot) # height of last bin (index 5) must be 1.0 for ax in axes.ravel(): rects = [x for x in ax.get_children() if isinstance(x, Rectangle)] @@ -290,14 +289,14 @@ def test_grouped_hist_legacy(self): ylabelsize=yf, yrot=yrot) tm.close() - axes = plotting.grouped_hist(df.A, by=df.C, log=True) + axes = grouped_hist(df.A, by=df.C, log=True) # scale of y must be 'log' self._check_ax_scales(axes, yaxis='log') tm.close() # propagate attr exception from matplotlib.Axes.hist with tm.assertRaises(AttributeError): - plotting.grouped_hist(df.A, by=df.C, foo='bar') + grouped_hist(df.A, by=df.C, foo='bar') with tm.assert_produces_warning(FutureWarning): df.hist(by='C', figsize='default') diff --git a/pandas/tests/plotting/test_misc.py b/pandas/tests/plotting/test_misc.py index 504c55bcfcfd0..fe0b6c103a0e1 100644 --- a/pandas/tests/plotting/test_misc.py +++ b/pandas/tests/plotting/test_misc.py @@ -11,7 +11,7 @@ from numpy import random from numpy.random import randn -import pandas.tools.plotting as plotting +import pandas.plotting as plotting from pandas.tests.plotting.common import (TestPlotBase, _check_plot_works, _ok_for_gaussian_kde) @@ -29,7 +29,7 @@ def setUp(self): @slow def test_autocorrelation_plot(self): - from pandas.tools.plotting import autocorrelation_plot + from pandas.plotting import autocorrelation_plot _check_plot_works(autocorrelation_plot, series=self.ts) _check_plot_works(autocorrelation_plot, series=self.ts.values) @@ -38,13 +38,13 @@ def test_autocorrelation_plot(self): @slow def test_lag_plot(self): - from pandas.tools.plotting import lag_plot + from pandas.plotting import lag_plot _check_plot_works(lag_plot, series=self.ts) _check_plot_works(lag_plot, series=self.ts, lag=5) @slow def test_bootstrap_plot(self): - from pandas.tools.plotting import bootstrap_plot + from pandas.plotting import bootstrap_plot _check_plot_works(bootstrap_plot, series=self.ts, size=10) @@ -84,7 +84,7 @@ def scat(**kwds): _check_plot_works(scat, facecolor='rgb') def scat2(x, y, by=None, ax=None, figsize=None): - return plotting.scatter_plot(df, x, y, by, ax, figsize=None) + return plotting._core.scatter_plot(df, x, y, by, ax, figsize=None) _check_plot_works(scat2, x=0, y=1) grouper = Series(np.repeat([1, 2, 3, 4, 5], 20), df.index) @@ -130,7 +130,7 @@ def test_scatter_matrix_axis(self): @slow def test_andrews_curves(self): - from pandas.tools.plotting import andrews_curves + from pandas.plotting import andrews_curves from matplotlib import cm df = self.iris @@ -195,7 +195,7 @@ def test_andrews_curves(self): @slow def test_parallel_coordinates(self): - from pandas.tools.plotting import parallel_coordinates + from pandas.plotting import parallel_coordinates from matplotlib import cm df = self.iris @@ -263,7 +263,7 @@ def test_parallel_coordinates_with_sorted_labels(self): @slow def test_radviz(self): - from pandas.tools.plotting import radviz + from pandas.plotting import radviz from matplotlib import cm df = self.iris diff --git a/pandas/tests/plotting/test_series.py b/pandas/tests/plotting/test_series.py index 8c00d606059a4..c3bc3ca6bf414 100644 --- a/pandas/tests/plotting/test_series.py +++ b/pandas/tests/plotting/test_series.py @@ -16,7 +16,7 @@ import numpy as np from numpy.random import randn -import pandas.tools.plotting as plotting +import pandas.plotting as plotting from pandas.tests.plotting.common import (TestPlotBase, _check_plot_works, _skip_if_no_scipy_gaussian_kde, _ok_for_gaussian_kde) @@ -622,7 +622,9 @@ def test_boxplot_series(self): @slow def test_kind_both_ways(self): s = Series(range(3)) - for kind in plotting._common_kinds + plotting._series_kinds: + kinds = (plotting._core._common_kinds + + plotting._core._series_kinds) + for kind in kinds: if not _ok_for_gaussian_kde(kind): continue s.plot(kind=kind) @@ -631,7 +633,7 @@ def test_kind_both_ways(self): @slow def test_invalid_plot_data(self): s = Series(list('abcd')) - for kind in plotting._common_kinds: + for kind in plotting._core._common_kinds: if not _ok_for_gaussian_kde(kind): continue with tm.assertRaises(TypeError): @@ -640,14 +642,14 @@ def test_invalid_plot_data(self): @slow def test_valid_object_plot(self): s = Series(lrange(10), dtype=object) - for kind in plotting._common_kinds: + for kind in plotting._core._common_kinds: if not _ok_for_gaussian_kde(kind): continue _check_plot_works(s.plot, kind=kind) def test_partially_invalid_plot_data(self): s = Series(['a', 'b', 1.0, 2]) - for kind in plotting._common_kinds: + for kind in plotting._core._common_kinds: if not _ok_for_gaussian_kde(kind): continue with tm.assertRaises(TypeError): @@ -718,54 +720,57 @@ def test_table(self): def test_series_grid_settings(self): # Make sure plot defaults to rcParams['axes.grid'] setting, GH 9792 self._check_grid_settings(Series([1, 2, 3]), - plotting._series_kinds + - plotting._common_kinds) + plotting._core._series_kinds + + plotting._core._common_kinds) @slow def test_standard_colors(self): + from pandas.plotting._style import _get_standard_colors + for c in ['r', 'red', 'green', '#FF0000']: - result = plotting._get_standard_colors(1, color=c) + result = _get_standard_colors(1, color=c) self.assertEqual(result, [c]) - result = plotting._get_standard_colors(1, color=[c]) + result = _get_standard_colors(1, color=[c]) self.assertEqual(result, [c]) - result = plotting._get_standard_colors(3, color=c) + result = _get_standard_colors(3, color=c) self.assertEqual(result, [c] * 3) - result = plotting._get_standard_colors(3, color=[c]) + result = _get_standard_colors(3, color=[c]) self.assertEqual(result, [c] * 3) @slow def test_standard_colors_all(self): import matplotlib.colors as colors + from pandas.plotting._style import _get_standard_colors # multiple colors like mediumaquamarine for c in colors.cnames: - result = plotting._get_standard_colors(num_colors=1, color=c) + result = _get_standard_colors(num_colors=1, color=c) self.assertEqual(result, [c]) - result = plotting._get_standard_colors(num_colors=1, color=[c]) + result = _get_standard_colors(num_colors=1, color=[c]) self.assertEqual(result, [c]) - result = plotting._get_standard_colors(num_colors=3, color=c) + result = _get_standard_colors(num_colors=3, color=c) self.assertEqual(result, [c] * 3) - result = plotting._get_standard_colors(num_colors=3, color=[c]) + result = _get_standard_colors(num_colors=3, color=[c]) self.assertEqual(result, [c] * 3) # single letter colors like k for c in colors.ColorConverter.colors: - result = plotting._get_standard_colors(num_colors=1, color=c) + result = _get_standard_colors(num_colors=1, color=c) self.assertEqual(result, [c]) - result = plotting._get_standard_colors(num_colors=1, color=[c]) + result = _get_standard_colors(num_colors=1, color=[c]) self.assertEqual(result, [c]) - result = plotting._get_standard_colors(num_colors=3, color=c) + result = _get_standard_colors(num_colors=3, color=c) self.assertEqual(result, [c] * 3) - result = plotting._get_standard_colors(num_colors=3, color=[c]) + result = _get_standard_colors(num_colors=3, color=[c]) self.assertEqual(result, [c] * 3) def test_series_plot_color_kwargs(self): diff --git a/pandas/tools/plotting.py b/pandas/tools/plotting.py index 141e3c74b91c4..a68da67a219e2 100644 --- a/pandas/tools/plotting.py +++ b/pandas/tools/plotting.py @@ -1,4032 +1,20 @@ -# being a bit too dynamic -# pylint: disable=E1101 -from __future__ import division - +import sys import warnings -import re -from math import ceil -from collections import namedtuple -from contextlib import contextmanager -from distutils.version import LooseVersion - -import numpy as np - -from pandas.types.common import (is_list_like, - is_integer, - is_number, - is_hashable, - is_iterator) -from pandas.types.missing import isnull, notnull - -from pandas.util.decorators import cache_readonly, deprecate_kwarg -from pandas.core.base import PandasObject - -from pandas.core.common import AbstractMethodError, _try_sort -from pandas.core.generic import _shared_docs, _shared_doc_kwargs -from pandas.core.index import Index, MultiIndex -from pandas.core.series import Series, remove_na -from pandas.tseries.period import PeriodIndex -from pandas.compat import range, lrange, lmap, map, zip, string_types -import pandas.compat as compat -from pandas.formats.printing import pprint_thing -from pandas.util.decorators import Appender -try: # mpl optional - import pandas.tseries.converter as conv - conv.register() # needs to override so set_xlim works with str/number -except ImportError: - pass - - -# Extracted from https://gist.github.com/huyng/816622 -# this is the rcParams set when setting display.with_mpl_style -# to True. -mpl_stylesheet = { - 'axes.axisbelow': True, - 'axes.color_cycle': ['#348ABD', - '#7A68A6', - '#A60628', - '#467821', - '#CF4457', - '#188487', - '#E24A33'], - 'axes.edgecolor': '#bcbcbc', - 'axes.facecolor': '#eeeeee', - 'axes.grid': True, - 'axes.labelcolor': '#555555', - 'axes.labelsize': 'large', - 'axes.linewidth': 1.0, - 'axes.titlesize': 'x-large', - 'figure.edgecolor': 'white', - 'figure.facecolor': 'white', - 'figure.figsize': (6.0, 4.0), - 'figure.subplot.hspace': 0.5, - 'font.family': 'monospace', - 'font.monospace': ['Andale Mono', - 'Nimbus Mono L', - 'Courier New', - 'Courier', - 'Fixed', - 'Terminal', - 'monospace'], - 'font.size': 10, - 'interactive': True, - 'keymap.all_axes': ['a'], - 'keymap.back': ['left', 'c', 'backspace'], - 'keymap.forward': ['right', 'v'], - 'keymap.fullscreen': ['f'], - 'keymap.grid': ['g'], - 'keymap.home': ['h', 'r', 'home'], - 'keymap.pan': ['p'], - 'keymap.save': ['s'], - 'keymap.xscale': ['L', 'k'], - 'keymap.yscale': ['l'], - 'keymap.zoom': ['o'], - 'legend.fancybox': True, - 'lines.antialiased': True, - 'lines.linewidth': 1.0, - 'patch.antialiased': True, - 'patch.edgecolor': '#EEEEEE', - 'patch.facecolor': '#348ABD', - 'patch.linewidth': 0.5, - 'toolbar': 'toolbar2', - 'xtick.color': '#555555', - 'xtick.direction': 'in', - 'xtick.major.pad': 6.0, - 'xtick.major.size': 0.0, - 'xtick.minor.pad': 6.0, - 'xtick.minor.size': 0.0, - 'ytick.color': '#555555', - 'ytick.direction': 'in', - 'ytick.major.pad': 6.0, - 'ytick.major.size': 0.0, - 'ytick.minor.pad': 6.0, - 'ytick.minor.size': 0.0 -} - - -def _mpl_le_1_2_1(): - try: - import matplotlib as mpl - return (str(mpl.__version__) <= LooseVersion('1.2.1') and - str(mpl.__version__)[0] != '0') - except ImportError: - return False - - -def _mpl_ge_1_3_1(): - try: - import matplotlib - # The or v[0] == '0' is because their versioneer is - # messed up on dev - return (matplotlib.__version__ >= LooseVersion('1.3.1') or - matplotlib.__version__[0] == '0') - except ImportError: - return False - - -def _mpl_ge_1_4_0(): - try: - import matplotlib - return (matplotlib.__version__ >= LooseVersion('1.4') or - matplotlib.__version__[0] == '0') - except ImportError: - return False - - -def _mpl_ge_1_5_0(): - try: - import matplotlib - return (matplotlib.__version__ >= LooseVersion('1.5') or - matplotlib.__version__[0] == '0') - except ImportError: - return False - - -def _mpl_ge_2_0_0(): - try: - import matplotlib - return matplotlib.__version__ >= LooseVersion('2.0') - except ImportError: - return False - - -def _mpl_ge_2_0_1(): - try: - import matplotlib - return matplotlib.__version__ >= LooseVersion('2.0.1') - except ImportError: - return False - - -if _mpl_ge_1_5_0(): - # Compat with mp 1.5, which uses cycler. - import cycler - colors = mpl_stylesheet.pop('axes.color_cycle') - mpl_stylesheet['axes.prop_cycle'] = cycler.cycler('color', colors) - - -def _get_standard_kind(kind): - return {'density': 'kde'}.get(kind, kind) - - -def _get_standard_colors(num_colors=None, colormap=None, color_type='default', - color=None): - import matplotlib.pyplot as plt - - if color is None and colormap is not None: - if isinstance(colormap, compat.string_types): - import matplotlib.cm as cm - cmap = colormap - colormap = cm.get_cmap(colormap) - if colormap is None: - raise ValueError("Colormap {0} is not recognized".format(cmap)) - colors = lmap(colormap, np.linspace(0, 1, num=num_colors)) - elif color is not None: - if colormap is not None: - warnings.warn("'color' and 'colormap' cannot be used " - "simultaneously. Using 'color'") - colors = list(color) if is_list_like(color) else color - else: - if color_type == 'default': - # need to call list() on the result to copy so we don't - # modify the global rcParams below - try: - colors = [c['color'] - for c in list(plt.rcParams['axes.prop_cycle'])] - except KeyError: - colors = list(plt.rcParams.get('axes.color_cycle', - list('bgrcmyk'))) - if isinstance(colors, compat.string_types): - colors = list(colors) - elif color_type == 'random': - import random - - def random_color(column): - random.seed(column) - return [random.random() for _ in range(3)] - - colors = lmap(random_color, lrange(num_colors)) - else: - raise ValueError("color_type must be either 'default' or 'random'") - - if isinstance(colors, compat.string_types): - import matplotlib.colors - conv = matplotlib.colors.ColorConverter() - - def _maybe_valid_colors(colors): - try: - [conv.to_rgba(c) for c in colors] - return True - except ValueError: - return False - - # check whether the string can be convertable to single color - maybe_single_color = _maybe_valid_colors([colors]) - # check whether each character can be convertable to colors - maybe_color_cycle = _maybe_valid_colors(list(colors)) - if maybe_single_color and maybe_color_cycle and len(colors) > 1: - # Special case for single str 'CN' match and convert to hex - # for supporting matplotlib < 2.0.0 - if re.match(r'\AC[0-9]\Z', colors) and _mpl_ge_2_0_0(): - hex_color = [c['color'] - for c in list(plt.rcParams['axes.prop_cycle'])] - colors = [hex_color[int(colors[1])]] - else: - # this may no longer be required - msg = ("'{0}' can be parsed as both single color and " - "color cycle. Specify each color using a list " - "like ['{0}'] or {1}") - raise ValueError(msg.format(colors, list(colors))) - elif maybe_single_color: - colors = [colors] - else: - # ``colors`` is regarded as color cycle. - # mpl will raise error any of them is invalid - pass - - if len(colors) != num_colors: - try: - multiple = num_colors // len(colors) - 1 - except ZeroDivisionError: - raise ValueError("Invalid color argument: ''") - mod = num_colors % len(colors) - - colors += multiple * colors - colors += colors[:mod] - - return colors - - -class _Options(dict): - """ - Stores pandas plotting options. - Allows for parameter aliasing so you can just use parameter names that are - the same as the plot function parameters, but is stored in a canonical - format that makes it easy to breakdown into groups later - """ - - # alias so the names are same as plotting method parameter names - _ALIASES = {'x_compat': 'xaxis.compat'} - _DEFAULT_KEYS = ['xaxis.compat'] - - def __init__(self): - self['xaxis.compat'] = False - - def __getitem__(self, key): - key = self._get_canonical_key(key) - if key not in self: - raise ValueError('%s is not a valid pandas plotting option' % key) - return super(_Options, self).__getitem__(key) - - def __setitem__(self, key, value): - key = self._get_canonical_key(key) - return super(_Options, self).__setitem__(key, value) - - def __delitem__(self, key): - key = self._get_canonical_key(key) - if key in self._DEFAULT_KEYS: - raise ValueError('Cannot remove default parameter %s' % key) - return super(_Options, self).__delitem__(key) - - def __contains__(self, key): - key = self._get_canonical_key(key) - return super(_Options, self).__contains__(key) - - def reset(self): - """ - Reset the option store to its initial state - - Returns - ------- - None - """ - self.__init__() - - def _get_canonical_key(self, key): - return self._ALIASES.get(key, key) - - @contextmanager - def use(self, key, value): - """ - Temporarily set a parameter value using the with statement. - Aliasing allowed. - """ - old_value = self[key] - try: - self[key] = value - yield self - finally: - self[key] = old_value - - -plot_params = _Options() - - -def scatter_matrix(frame, alpha=0.5, figsize=None, ax=None, grid=False, - diagonal='hist', marker='.', density_kwds=None, - hist_kwds=None, range_padding=0.05, **kwds): - """ - Draw a matrix of scatter plots. - - Parameters - ---------- - frame : DataFrame - alpha : float, optional - amount of transparency applied - figsize : (float,float), optional - a tuple (width, height) in inches - ax : Matplotlib axis object, optional - grid : bool, optional - setting this to True will show the grid - diagonal : {'hist', 'kde'} - pick between 'kde' and 'hist' for - either Kernel Density Estimation or Histogram - plot in the diagonal - marker : str, optional - Matplotlib marker type, default '.' - hist_kwds : other plotting keyword arguments - To be passed to hist function - density_kwds : other plotting keyword arguments - To be passed to kernel density estimate plot - range_padding : float, optional - relative extension of axis range in x and y - with respect to (x_max - x_min) or (y_max - y_min), - default 0.05 - kwds : other plotting keyword arguments - To be passed to scatter function - - Examples - -------- - >>> df = DataFrame(np.random.randn(1000, 4), columns=['A','B','C','D']) - >>> scatter_matrix(df, alpha=0.2) - """ - - df = frame._get_numeric_data() - n = df.columns.size - naxes = n * n - fig, axes = _subplots(naxes=naxes, figsize=figsize, ax=ax, - squeeze=False) - - # no gaps between subplots - fig.subplots_adjust(wspace=0, hspace=0) - - mask = notnull(df) - - marker = _get_marker_compat(marker) - - hist_kwds = hist_kwds or {} - density_kwds = density_kwds or {} - - # GH 14855 - kwds.setdefault('edgecolors', 'none') - - boundaries_list = [] - for a in df.columns: - values = df[a].values[mask[a].values] - rmin_, rmax_ = np.min(values), np.max(values) - rdelta_ext = (rmax_ - rmin_) * range_padding / 2. - boundaries_list.append((rmin_ - rdelta_ext, rmax_ + rdelta_ext)) - - for i, a in zip(lrange(n), df.columns): - for j, b in zip(lrange(n), df.columns): - ax = axes[i, j] - - if i == j: - values = df[a].values[mask[a].values] - - # Deal with the diagonal by drawing a histogram there. - if diagonal == 'hist': - ax.hist(values, **hist_kwds) - - elif diagonal in ('kde', 'density'): - from scipy.stats import gaussian_kde - y = values - gkde = gaussian_kde(y) - ind = np.linspace(y.min(), y.max(), 1000) - ax.plot(ind, gkde.evaluate(ind), **density_kwds) - - ax.set_xlim(boundaries_list[i]) - - else: - common = (mask[a] & mask[b]).values - - ax.scatter(df[b][common], df[a][common], - marker=marker, alpha=alpha, **kwds) - - ax.set_xlim(boundaries_list[j]) - ax.set_ylim(boundaries_list[i]) - - ax.set_xlabel(b) - ax.set_ylabel(a) - - if j != 0: - ax.yaxis.set_visible(False) - if i != n - 1: - ax.xaxis.set_visible(False) - - if len(df.columns) > 1: - lim1 = boundaries_list[0] - locs = axes[0][1].yaxis.get_majorticklocs() - locs = locs[(lim1[0] <= locs) & (locs <= lim1[1])] - adj = (locs - lim1[0]) / (lim1[1] - lim1[0]) - - lim0 = axes[0][0].get_ylim() - adj = adj * (lim0[1] - lim0[0]) + lim0[0] - axes[0][0].yaxis.set_ticks(adj) - - if np.all(locs == locs.astype(int)): - # if all ticks are int - locs = locs.astype(int) - axes[0][0].yaxis.set_ticklabels(locs) - - _set_ticks_props(axes, xlabelsize=8, xrot=90, ylabelsize=8, yrot=0) - - return axes - - -def _gca(): - import matplotlib.pyplot as plt - return plt.gca() - - -def _gcf(): - import matplotlib.pyplot as plt - return plt.gcf() - - -def _get_marker_compat(marker): - import matplotlib.lines as mlines - import matplotlib as mpl - if mpl.__version__ < '1.1.0' and marker == '.': - return 'o' - if marker not in mlines.lineMarkers: - return 'o' - return marker - - -def radviz(frame, class_column, ax=None, color=None, colormap=None, **kwds): - """RadViz - a multivariate data visualization algorithm - - Parameters: - ----------- - frame: DataFrame - class_column: str - Column name containing class names - ax: Matplotlib axis object, optional - color: list or tuple, optional - Colors to use for the different classes - colormap : str or matplotlib colormap object, default None - Colormap to select colors from. If string, load colormap with that name - from matplotlib. - kwds: keywords - Options to pass to matplotlib scatter plotting method - - Returns: - -------- - ax: Matplotlib axis object - """ - import matplotlib.pyplot as plt - import matplotlib.patches as patches - - def normalize(series): - a = min(series) - b = max(series) - return (series - a) / (b - a) - - n = len(frame) - classes = frame[class_column].drop_duplicates() - class_col = frame[class_column] - df = frame.drop(class_column, axis=1).apply(normalize) - - if ax is None: - ax = plt.gca(xlim=[-1, 1], ylim=[-1, 1]) - - to_plot = {} - colors = _get_standard_colors(num_colors=len(classes), colormap=colormap, - color_type='random', color=color) - - for kls in classes: - to_plot[kls] = [[], []] - - m = len(frame.columns) - 1 - s = np.array([(np.cos(t), np.sin(t)) - for t in [2.0 * np.pi * (i / float(m)) - for i in range(m)]]) - - for i in range(n): - row = df.iloc[i].values - row_ = np.repeat(np.expand_dims(row, axis=1), 2, axis=1) - y = (s * row_).sum(axis=0) / row.sum() - kls = class_col.iat[i] - to_plot[kls][0].append(y[0]) - to_plot[kls][1].append(y[1]) - - for i, kls in enumerate(classes): - ax.scatter(to_plot[kls][0], to_plot[kls][1], color=colors[i], - label=pprint_thing(kls), **kwds) - ax.legend() - - ax.add_patch(patches.Circle((0.0, 0.0), radius=1.0, facecolor='none')) - - for xy, name in zip(s, df.columns): - - ax.add_patch(patches.Circle(xy, radius=0.025, facecolor='gray')) - - if xy[0] < 0.0 and xy[1] < 0.0: - ax.text(xy[0] - 0.025, xy[1] - 0.025, name, - ha='right', va='top', size='small') - elif xy[0] < 0.0 and xy[1] >= 0.0: - ax.text(xy[0] - 0.025, xy[1] + 0.025, name, - ha='right', va='bottom', size='small') - elif xy[0] >= 0.0 and xy[1] < 0.0: - ax.text(xy[0] + 0.025, xy[1] - 0.025, name, - ha='left', va='top', size='small') - elif xy[0] >= 0.0 and xy[1] >= 0.0: - ax.text(xy[0] + 0.025, xy[1] + 0.025, name, - ha='left', va='bottom', size='small') - - ax.axis('equal') - return ax - - -@deprecate_kwarg(old_arg_name='data', new_arg_name='frame') -def andrews_curves(frame, class_column, ax=None, samples=200, color=None, - colormap=None, **kwds): - """ - Generates a matplotlib plot of Andrews curves, for visualising clusters of - multivariate data. - - Andrews curves have the functional form: - - f(t) = x_1/sqrt(2) + x_2 sin(t) + x_3 cos(t) + - x_4 sin(2t) + x_5 cos(2t) + ... - - Where x coefficients correspond to the values of each dimension and t is - linearly spaced between -pi and +pi. Each row of frame then corresponds to - a single curve. - - Parameters: - ----------- - frame : DataFrame - Data to be plotted, preferably normalized to (0.0, 1.0) - class_column : Name of the column containing class names - ax : matplotlib axes object, default None - samples : Number of points to plot in each curve - color: list or tuple, optional - Colors to use for the different classes - colormap : str or matplotlib colormap object, default None - Colormap to select colors from. If string, load colormap with that name - from matplotlib. - kwds: keywords - Options to pass to matplotlib plotting method - - Returns: - -------- - ax: Matplotlib axis object - - """ - from math import sqrt, pi - import matplotlib.pyplot as plt - - def function(amplitudes): - def f(t): - x1 = amplitudes[0] - result = x1 / sqrt(2.0) - - # Take the rest of the coefficients and resize them - # appropriately. Take a copy of amplitudes as otherwise numpy - # deletes the element from amplitudes itself. - coeffs = np.delete(np.copy(amplitudes), 0) - coeffs.resize(int((coeffs.size + 1) / 2), 2) - - # Generate the harmonics and arguments for the sin and cos - # functions. - harmonics = np.arange(0, coeffs.shape[0]) + 1 - trig_args = np.outer(harmonics, t) - - result += np.sum(coeffs[:, 0, np.newaxis] * np.sin(trig_args) + - coeffs[:, 1, np.newaxis] * np.cos(trig_args), - axis=0) - return result - return f - - n = len(frame) - class_col = frame[class_column] - classes = frame[class_column].drop_duplicates() - df = frame.drop(class_column, axis=1) - t = np.linspace(-pi, pi, samples) - used_legends = set([]) - - color_values = _get_standard_colors(num_colors=len(classes), - colormap=colormap, color_type='random', - color=color) - colors = dict(zip(classes, color_values)) - if ax is None: - ax = plt.gca(xlim=(-pi, pi)) - for i in range(n): - row = df.iloc[i].values - f = function(row) - y = f(t) - kls = class_col.iat[i] - label = pprint_thing(kls) - if label not in used_legends: - used_legends.add(label) - ax.plot(t, y, color=colors[kls], label=label, **kwds) - else: - ax.plot(t, y, color=colors[kls], **kwds) - - ax.legend(loc='upper right') - ax.grid() - return ax - - -def bootstrap_plot(series, fig=None, size=50, samples=500, **kwds): - """Bootstrap plot. - - Parameters: - ----------- - series: Time series - fig: matplotlib figure object, optional - size: number of data points to consider during each sampling - samples: number of times the bootstrap procedure is performed - kwds: optional keyword arguments for plotting commands, must be accepted - by both hist and plot - - Returns: - -------- - fig: matplotlib figure - """ - import random - import matplotlib.pyplot as plt - - # random.sample(ndarray, int) fails on python 3.3, sigh - data = list(series.values) - samplings = [random.sample(data, size) for _ in range(samples)] - - means = np.array([np.mean(sampling) for sampling in samplings]) - medians = np.array([np.median(sampling) for sampling in samplings]) - midranges = np.array([(min(sampling) + max(sampling)) * 0.5 - for sampling in samplings]) - if fig is None: - fig = plt.figure() - x = lrange(samples) - axes = [] - ax1 = fig.add_subplot(2, 3, 1) - ax1.set_xlabel("Sample") - axes.append(ax1) - ax1.plot(x, means, **kwds) - ax2 = fig.add_subplot(2, 3, 2) - ax2.set_xlabel("Sample") - axes.append(ax2) - ax2.plot(x, medians, **kwds) - ax3 = fig.add_subplot(2, 3, 3) - ax3.set_xlabel("Sample") - axes.append(ax3) - ax3.plot(x, midranges, **kwds) - ax4 = fig.add_subplot(2, 3, 4) - ax4.set_xlabel("Mean") - axes.append(ax4) - ax4.hist(means, **kwds) - ax5 = fig.add_subplot(2, 3, 5) - ax5.set_xlabel("Median") - axes.append(ax5) - ax5.hist(medians, **kwds) - ax6 = fig.add_subplot(2, 3, 6) - ax6.set_xlabel("Midrange") - axes.append(ax6) - ax6.hist(midranges, **kwds) - for axis in axes: - plt.setp(axis.get_xticklabels(), fontsize=8) - plt.setp(axis.get_yticklabels(), fontsize=8) - return fig - - -@deprecate_kwarg(old_arg_name='colors', new_arg_name='color') -@deprecate_kwarg(old_arg_name='data', new_arg_name='frame', stacklevel=3) -def parallel_coordinates(frame, class_column, cols=None, ax=None, color=None, - use_columns=False, xticks=None, colormap=None, - axvlines=True, axvlines_kwds=None, sort_labels=False, - **kwds): - """Parallel coordinates plotting. - - Parameters - ---------- - frame: DataFrame - class_column: str - Column name containing class names - cols: list, optional - A list of column names to use - ax: matplotlib.axis, optional - matplotlib axis object - color: list or tuple, optional - Colors to use for the different classes - use_columns: bool, optional - If true, columns will be used as xticks - xticks: list or tuple, optional - A list of values to use for xticks - colormap: str or matplotlib colormap, default None - Colormap to use for line colors. - axvlines: bool, optional - If true, vertical lines will be added at each xtick - axvlines_kwds: keywords, optional - Options to be passed to axvline method for vertical lines - sort_labels: bool, False - Sort class_column labels, useful when assigning colours - - .. versionadded:: 0.20.0 - - kwds: keywords - Options to pass to matplotlib plotting method - - Returns - ------- - ax: matplotlib axis object - - Examples - -------- - >>> from pandas import read_csv - >>> from pandas.tools.plotting import parallel_coordinates - >>> from matplotlib import pyplot as plt - >>> df = read_csv('https://raw.github.com/pandas-dev/pandas/master' - '/pandas/tests/data/iris.csv') - >>> parallel_coordinates(df, 'Name', color=('#556270', - '#4ECDC4', '#C7F464')) - >>> plt.show() - """ - if axvlines_kwds is None: - axvlines_kwds = {'linewidth': 1, 'color': 'black'} - import matplotlib.pyplot as plt - - n = len(frame) - classes = frame[class_column].drop_duplicates() - class_col = frame[class_column] - - if cols is None: - df = frame.drop(class_column, axis=1) - else: - df = frame[cols] - - used_legends = set([]) - - ncols = len(df.columns) - - # determine values to use for xticks - if use_columns is True: - if not np.all(np.isreal(list(df.columns))): - raise ValueError('Columns must be numeric to be used as xticks') - x = df.columns - elif xticks is not None: - if not np.all(np.isreal(xticks)): - raise ValueError('xticks specified must be numeric') - elif len(xticks) != ncols: - raise ValueError('Length of xticks must match number of columns') - x = xticks - else: - x = lrange(ncols) - - if ax is None: - ax = plt.gca() - - color_values = _get_standard_colors(num_colors=len(classes), - colormap=colormap, color_type='random', - color=color) - - if sort_labels: - classes = sorted(classes) - color_values = sorted(color_values) - colors = dict(zip(classes, color_values)) - - for i in range(n): - y = df.iloc[i].values - kls = class_col.iat[i] - label = pprint_thing(kls) - if label not in used_legends: - used_legends.add(label) - ax.plot(x, y, color=colors[kls], label=label, **kwds) - else: - ax.plot(x, y, color=colors[kls], **kwds) - - if axvlines: - for i in x: - ax.axvline(i, **axvlines_kwds) - - ax.set_xticks(x) - ax.set_xticklabels(df.columns) - ax.set_xlim(x[0], x[-1]) - ax.legend(loc='upper right') - ax.grid() - return ax - - -def lag_plot(series, lag=1, ax=None, **kwds): - """Lag plot for time series. - - Parameters: - ----------- - series: Time series - lag: lag of the scatter plot, default 1 - ax: Matplotlib axis object, optional - kwds: Matplotlib scatter method keyword arguments, optional - - Returns: - -------- - ax: Matplotlib axis object - """ - import matplotlib.pyplot as plt - - # workaround because `c='b'` is hardcoded in matplotlibs scatter method - kwds.setdefault('c', plt.rcParams['patch.facecolor']) - - data = series.values - y1 = data[:-lag] - y2 = data[lag:] - if ax is None: - ax = plt.gca() - ax.set_xlabel("y(t)") - ax.set_ylabel("y(t + %s)" % lag) - ax.scatter(y1, y2, **kwds) - return ax - - -def autocorrelation_plot(series, ax=None, **kwds): - """Autocorrelation plot for time series. - - Parameters: - ----------- - series: Time series - ax: Matplotlib axis object, optional - kwds : keywords - Options to pass to matplotlib plotting method - - Returns: - ----------- - ax: Matplotlib axis object - """ - import matplotlib.pyplot as plt - n = len(series) - data = np.asarray(series) - if ax is None: - ax = plt.gca(xlim=(1, n), ylim=(-1.0, 1.0)) - mean = np.mean(data) - c0 = np.sum((data - mean) ** 2) / float(n) - - def r(h): - return ((data[:n - h] - mean) * - (data[h:] - mean)).sum() / float(n) / c0 - x = np.arange(n) + 1 - y = lmap(r, x) - z95 = 1.959963984540054 - z99 = 2.5758293035489004 - ax.axhline(y=z99 / np.sqrt(n), linestyle='--', color='grey') - ax.axhline(y=z95 / np.sqrt(n), color='grey') - ax.axhline(y=0.0, color='black') - ax.axhline(y=-z95 / np.sqrt(n), color='grey') - ax.axhline(y=-z99 / np.sqrt(n), linestyle='--', color='grey') - ax.set_xlabel("Lag") - ax.set_ylabel("Autocorrelation") - ax.plot(x, y, **kwds) - if 'label' in kwds: - ax.legend() - ax.grid() - return ax - - -class MPLPlot(object): - """ - Base class for assembling a pandas plot using matplotlib - - Parameters - ---------- - data : - - """ - - @property - def _kind(self): - """Specify kind str. Must be overridden in child class""" - raise NotImplementedError - - _layout_type = 'vertical' - _default_rot = 0 - orientation = None - _pop_attributes = ['label', 'style', 'logy', 'logx', 'loglog', - 'mark_right', 'stacked'] - _attr_defaults = {'logy': False, 'logx': False, 'loglog': False, - 'mark_right': True, 'stacked': False} - - def __init__(self, data, kind=None, by=None, subplots=False, sharex=None, - sharey=False, use_index=True, - figsize=None, grid=None, legend=True, rot=None, - ax=None, fig=None, title=None, xlim=None, ylim=None, - xticks=None, yticks=None, - sort_columns=False, fontsize=None, - secondary_y=False, colormap=None, - table=False, layout=None, **kwds): - - self.data = data - self.by = by - - self.kind = kind - - self.sort_columns = sort_columns - - self.subplots = subplots - - if sharex is None: - if ax is None: - self.sharex = True - else: - # if we get an axis, the users should do the visibility - # setting... - self.sharex = False - else: - self.sharex = sharex - - self.sharey = sharey - self.figsize = figsize - self.layout = layout - - self.xticks = xticks - self.yticks = yticks - self.xlim = xlim - self.ylim = ylim - self.title = title - self.use_index = use_index - - self.fontsize = fontsize - - if rot is not None: - self.rot = rot - # need to know for format_date_labels since it's rotated to 30 by - # default - self._rot_set = True - else: - self._rot_set = False - self.rot = self._default_rot - - if grid is None: - grid = False if secondary_y else self.plt.rcParams['axes.grid'] - - self.grid = grid - self.legend = legend - self.legend_handles = [] - self.legend_labels = [] - - for attr in self._pop_attributes: - value = kwds.pop(attr, self._attr_defaults.get(attr, None)) - setattr(self, attr, value) - - self.ax = ax - self.fig = fig - self.axes = None - - # parse errorbar input if given - xerr = kwds.pop('xerr', None) - yerr = kwds.pop('yerr', None) - self.errors = {} - for kw, err in zip(['xerr', 'yerr'], [xerr, yerr]): - self.errors[kw] = self._parse_errorbars(kw, err) - - if not isinstance(secondary_y, (bool, tuple, list, np.ndarray, Index)): - secondary_y = [secondary_y] - self.secondary_y = secondary_y - - # ugly TypeError if user passes matplotlib's `cmap` name. - # Probably better to accept either. - if 'cmap' in kwds and colormap: - raise TypeError("Only specify one of `cmap` and `colormap`.") - elif 'cmap' in kwds: - self.colormap = kwds.pop('cmap') - else: - self.colormap = colormap - - self.table = table - - self.kwds = kwds - - self._validate_color_args() - - def _validate_color_args(self): - if 'color' not in self.kwds and 'colors' in self.kwds: - warnings.warn(("'colors' is being deprecated. Please use 'color'" - "instead of 'colors'")) - colors = self.kwds.pop('colors') - self.kwds['color'] = colors - - if ('color' in self.kwds and self.nseries == 1): - # support series.plot(color='green') - self.kwds['color'] = [self.kwds['color']] - - if ('color' in self.kwds or 'colors' in self.kwds) and \ - self.colormap is not None: - warnings.warn("'color' and 'colormap' cannot be used " - "simultaneously. Using 'color'") - - if 'color' in self.kwds and self.style is not None: - if is_list_like(self.style): - styles = self.style - else: - styles = [self.style] - # need only a single match - for s in styles: - if re.match('^[a-z]+?', s) is not None: - raise ValueError( - "Cannot pass 'style' string with a color " - "symbol and 'color' keyword argument. Please" - " use one or the other or pass 'style' " - "without a color symbol") - - def _iter_data(self, data=None, keep_index=False, fillna=None): - if data is None: - data = self.data - if fillna is not None: - data = data.fillna(fillna) - - # TODO: unused? - # if self.sort_columns: - # columns = _try_sort(data.columns) - # else: - # columns = data.columns - - for col, values in data.iteritems(): - if keep_index is True: - yield col, values - else: - yield col, values.values - - @property - def nseries(self): - if self.data.ndim == 1: - return 1 - else: - return self.data.shape[1] - - def draw(self): - self.plt.draw_if_interactive() - - def generate(self): - self._args_adjust() - self._compute_plot_data() - self._setup_subplots() - self._make_plot() - self._add_table() - self._make_legend() - self._adorn_subplots() - - for ax in self.axes: - self._post_plot_logic_common(ax, self.data) - self._post_plot_logic(ax, self.data) - - def _args_adjust(self): - pass - - def _has_plotted_object(self, ax): - """check whether ax has data""" - return (len(ax.lines) != 0 or - len(ax.artists) != 0 or - len(ax.containers) != 0) - - def _maybe_right_yaxis(self, ax, axes_num): - if not self.on_right(axes_num): - # secondary axes may be passed via ax kw - return self._get_ax_layer(ax) - - if hasattr(ax, 'right_ax'): - # if it has right_ax proparty, ``ax`` must be left axes - return ax.right_ax - elif hasattr(ax, 'left_ax'): - # if it has left_ax proparty, ``ax`` must be right axes - return ax - else: - # otherwise, create twin axes - orig_ax, new_ax = ax, ax.twinx() - # TODO: use Matplotlib public API when available - new_ax._get_lines = orig_ax._get_lines - new_ax._get_patches_for_fill = orig_ax._get_patches_for_fill - orig_ax.right_ax, new_ax.left_ax = new_ax, orig_ax - - if not self._has_plotted_object(orig_ax): # no data on left y - orig_ax.get_yaxis().set_visible(False) - return new_ax - - def _setup_subplots(self): - if self.subplots: - fig, axes = _subplots(naxes=self.nseries, - sharex=self.sharex, sharey=self.sharey, - figsize=self.figsize, ax=self.ax, - layout=self.layout, - layout_type=self._layout_type) - else: - if self.ax is None: - fig = self.plt.figure(figsize=self.figsize) - axes = fig.add_subplot(111) - else: - fig = self.ax.get_figure() - if self.figsize is not None: - fig.set_size_inches(self.figsize) - axes = self.ax - - axes = _flatten(axes) - - if self.logx or self.loglog: - [a.set_xscale('log') for a in axes] - if self.logy or self.loglog: - [a.set_yscale('log') for a in axes] - - self.fig = fig - self.axes = axes - - @property - def result(self): - """ - Return result axes - """ - if self.subplots: - if self.layout is not None and not is_list_like(self.ax): - return self.axes.reshape(*self.layout) - else: - return self.axes - else: - sec_true = isinstance(self.secondary_y, bool) and self.secondary_y - all_sec = (is_list_like(self.secondary_y) and - len(self.secondary_y) == self.nseries) - if (sec_true or all_sec): - # if all data is plotted on secondary, return right axes - return self._get_ax_layer(self.axes[0], primary=False) - else: - return self.axes[0] - - def _compute_plot_data(self): - data = self.data - - if isinstance(data, Series): - label = self.label - if label is None and data.name is None: - label = 'None' - data = data.to_frame(name=label) - - numeric_data = data._convert(datetime=True)._get_numeric_data() - - try: - is_empty = numeric_data.empty - except AttributeError: - is_empty = not len(numeric_data) - - # no empty frames or series allowed - if is_empty: - raise TypeError('Empty {0!r}: no numeric data to ' - 'plot'.format(numeric_data.__class__.__name__)) - - self.data = numeric_data - - def _make_plot(self): - raise AbstractMethodError(self) - - def _add_table(self): - if self.table is False: - return - elif self.table is True: - data = self.data.transpose() - else: - data = self.table - ax = self._get_ax(0) - table(ax, data) - - def _post_plot_logic_common(self, ax, data): - """Common post process for each axes""" - labels = [pprint_thing(key) for key in data.index] - labels = dict(zip(range(len(data.index)), labels)) - - if self.orientation == 'vertical' or self.orientation is None: - if self._need_to_set_index: - xticklabels = [labels.get(x, '') for x in ax.get_xticks()] - ax.set_xticklabels(xticklabels) - self._apply_axis_properties(ax.xaxis, rot=self.rot, - fontsize=self.fontsize) - self._apply_axis_properties(ax.yaxis, fontsize=self.fontsize) - elif self.orientation == 'horizontal': - if self._need_to_set_index: - yticklabels = [labels.get(y, '') for y in ax.get_yticks()] - ax.set_yticklabels(yticklabels) - self._apply_axis_properties(ax.yaxis, rot=self.rot, - fontsize=self.fontsize) - self._apply_axis_properties(ax.xaxis, fontsize=self.fontsize) - else: # pragma no cover - raise ValueError - - def _post_plot_logic(self, ax, data): - """Post process for each axes. Overridden in child classes""" - pass - - def _adorn_subplots(self): - """Common post process unrelated to data""" - if len(self.axes) > 0: - all_axes = self._get_subplots() - nrows, ncols = self._get_axes_layout() - _handle_shared_axes(axarr=all_axes, nplots=len(all_axes), - naxes=nrows * ncols, nrows=nrows, - ncols=ncols, sharex=self.sharex, - sharey=self.sharey) - - for ax in self.axes: - if self.yticks is not None: - ax.set_yticks(self.yticks) - - if self.xticks is not None: - ax.set_xticks(self.xticks) - - if self.ylim is not None: - ax.set_ylim(self.ylim) - - if self.xlim is not None: - ax.set_xlim(self.xlim) - - ax.grid(self.grid) - - if self.title: - if self.subplots: - if is_list_like(self.title): - if len(self.title) != self.nseries: - msg = ('The length of `title` must equal the number ' - 'of columns if using `title` of type `list` ' - 'and `subplots=True`.\n' - 'length of title = {}\n' - 'number of columns = {}').format( - len(self.title), self.nseries) - raise ValueError(msg) - - for (ax, title) in zip(self.axes, self.title): - ax.set_title(title) - else: - self.fig.suptitle(self.title) - else: - if is_list_like(self.title): - msg = ('Using `title` of type `list` is not supported ' - 'unless `subplots=True` is passed') - raise ValueError(msg) - self.axes[0].set_title(self.title) - - def _apply_axis_properties(self, axis, rot=None, fontsize=None): - labels = axis.get_majorticklabels() + axis.get_minorticklabels() - for label in labels: - if rot is not None: - label.set_rotation(rot) - if fontsize is not None: - label.set_fontsize(fontsize) - - @property - def legend_title(self): - if not isinstance(self.data.columns, MultiIndex): - name = self.data.columns.name - if name is not None: - name = pprint_thing(name) - return name - else: - stringified = map(pprint_thing, - self.data.columns.names) - return ','.join(stringified) - - def _add_legend_handle(self, handle, label, index=None): - if label is not None: - if self.mark_right and index is not None: - if self.on_right(index): - label = label + ' (right)' - self.legend_handles.append(handle) - self.legend_labels.append(label) - - def _make_legend(self): - ax, leg = self._get_ax_legend(self.axes[0]) - - handles = [] - labels = [] - title = '' - - if not self.subplots: - if leg is not None: - title = leg.get_title().get_text() - handles = leg.legendHandles - labels = [x.get_text() for x in leg.get_texts()] - - if self.legend: - if self.legend == 'reverse': - self.legend_handles = reversed(self.legend_handles) - self.legend_labels = reversed(self.legend_labels) - - handles += self.legend_handles - labels += self.legend_labels - if self.legend_title is not None: - title = self.legend_title - - if len(handles) > 0: - ax.legend(handles, labels, loc='best', title=title) - - elif self.subplots and self.legend: - for ax in self.axes: - if ax.get_visible(): - ax.legend(loc='best') - - def _get_ax_legend(self, ax): - leg = ax.get_legend() - other_ax = (getattr(ax, 'left_ax', None) or - getattr(ax, 'right_ax', None)) - other_leg = None - if other_ax is not None: - other_leg = other_ax.get_legend() - if leg is None and other_leg is not None: - leg = other_leg - ax = other_ax - return ax, leg - - @cache_readonly - def plt(self): - import matplotlib.pyplot as plt - return plt - - @staticmethod - def mpl_ge_1_3_1(): - return _mpl_ge_1_3_1() - - @staticmethod - def mpl_ge_1_5_0(): - return _mpl_ge_1_5_0() - - _need_to_set_index = False - - def _get_xticks(self, convert_period=False): - index = self.data.index - is_datetype = index.inferred_type in ('datetime', 'date', - 'datetime64', 'time') - - if self.use_index: - if convert_period and isinstance(index, PeriodIndex): - self.data = self.data.reindex(index=index.sort_values()) - x = self.data.index.to_timestamp()._mpl_repr() - elif index.is_numeric(): - """ - Matplotlib supports numeric values or datetime objects as - xaxis values. Taking LBYL approach here, by the time - matplotlib raises exception when using non numeric/datetime - values for xaxis, several actions are already taken by plt. - """ - x = index._mpl_repr() - elif is_datetype: - self.data = self.data.sort_index() - x = self.data.index._mpl_repr() - else: - self._need_to_set_index = True - x = lrange(len(index)) - else: - x = lrange(len(index)) - - return x - - @classmethod - def _plot(cls, ax, x, y, style=None, is_errorbar=False, **kwds): - mask = isnull(y) - if mask.any(): - y = np.ma.array(y) - y = np.ma.masked_where(mask, y) - - if isinstance(x, Index): - x = x._mpl_repr() - - if is_errorbar: - if 'xerr' in kwds: - kwds['xerr'] = np.array(kwds.get('xerr')) - if 'yerr' in kwds: - kwds['yerr'] = np.array(kwds.get('yerr')) - return ax.errorbar(x, y, **kwds) - else: - # prevent style kwarg from going to errorbar, where it is - # unsupported - if style is not None: - args = (x, y, style) - else: - args = (x, y) - return ax.plot(*args, **kwds) - - def _get_index_name(self): - if isinstance(self.data.index, MultiIndex): - name = self.data.index.names - if any(x is not None for x in name): - name = ','.join([pprint_thing(x) for x in name]) - else: - name = None - else: - name = self.data.index.name - if name is not None: - name = pprint_thing(name) - - return name - - @classmethod - def _get_ax_layer(cls, ax, primary=True): - """get left (primary) or right (secondary) axes""" - if primary: - return getattr(ax, 'left_ax', ax) - else: - return getattr(ax, 'right_ax', ax) - - def _get_ax(self, i): - # get the twinx ax if appropriate - if self.subplots: - ax = self.axes[i] - ax = self._maybe_right_yaxis(ax, i) - self.axes[i] = ax - else: - ax = self.axes[0] - ax = self._maybe_right_yaxis(ax, i) - - ax.get_yaxis().set_visible(True) - return ax - - def on_right(self, i): - if isinstance(self.secondary_y, bool): - return self.secondary_y - - if isinstance(self.secondary_y, (tuple, list, np.ndarray, Index)): - return self.data.columns[i] in self.secondary_y - - def _apply_style_colors(self, colors, kwds, col_num, label): - """ - Manage style and color based on column number and its label. - Returns tuple of appropriate style and kwds which "color" may be added. - """ - style = None - if self.style is not None: - if isinstance(self.style, list): - try: - style = self.style[col_num] - except IndexError: - pass - elif isinstance(self.style, dict): - style = self.style.get(label, style) - else: - style = self.style - - has_color = 'color' in kwds or self.colormap is not None - nocolor_style = style is None or re.match('[a-z]+', style) is None - if (has_color or self.subplots) and nocolor_style: - kwds['color'] = colors[col_num % len(colors)] - return style, kwds - - def _get_colors(self, num_colors=None, color_kwds='color'): - if num_colors is None: - num_colors = self.nseries - - return _get_standard_colors(num_colors=num_colors, - colormap=self.colormap, - color=self.kwds.get(color_kwds)) - - def _parse_errorbars(self, label, err): - """ - Look for error keyword arguments and return the actual errorbar data - or return the error DataFrame/dict - - Error bars can be specified in several ways: - Series: the user provides a pandas.Series object of the same - length as the data - ndarray: provides a np.ndarray of the same length as the data - DataFrame/dict: error values are paired with keys matching the - key in the plotted DataFrame - str: the name of the column within the plotted DataFrame - """ - - if err is None: - return None - - from pandas import DataFrame, Series - - def match_labels(data, e): - e = e.reindex_axis(data.index) - return e - - # key-matched DataFrame - if isinstance(err, DataFrame): - - err = match_labels(self.data, err) - # key-matched dict - elif isinstance(err, dict): - pass - - # Series of error values - elif isinstance(err, Series): - # broadcast error series across data - err = match_labels(self.data, err) - err = np.atleast_2d(err) - err = np.tile(err, (self.nseries, 1)) - - # errors are a column in the dataframe - elif isinstance(err, string_types): - evalues = self.data[err].values - self.data = self.data[self.data.columns.drop(err)] - err = np.atleast_2d(evalues) - err = np.tile(err, (self.nseries, 1)) - - elif is_list_like(err): - if is_iterator(err): - err = np.atleast_2d(list(err)) - else: - # raw error values - err = np.atleast_2d(err) - - err_shape = err.shape - - # asymmetrical error bars - if err.ndim == 3: - if (err_shape[0] != self.nseries) or \ - (err_shape[1] != 2) or \ - (err_shape[2] != len(self.data)): - msg = "Asymmetrical error bars should be provided " + \ - "with the shape (%u, 2, %u)" % \ - (self.nseries, len(self.data)) - raise ValueError(msg) - - # broadcast errors to each data series - if len(err) == 1: - err = np.tile(err, (self.nseries, 1)) - - elif is_number(err): - err = np.tile([err], (self.nseries, len(self.data))) - - else: - msg = "No valid %s detected" % label - raise ValueError(msg) - - return err - - def _get_errorbars(self, label=None, index=None, xerr=True, yerr=True): - from pandas import DataFrame - errors = {} - - for kw, flag in zip(['xerr', 'yerr'], [xerr, yerr]): - if flag: - err = self.errors[kw] - # user provided label-matched dataframe of errors - if isinstance(err, (DataFrame, dict)): - if label is not None and label in err.keys(): - err = err[label] - else: - err = None - elif index is not None and err is not None: - err = err[index] - - if err is not None: - errors[kw] = err - return errors - - def _get_subplots(self): - from matplotlib.axes import Subplot - return [ax for ax in self.axes[0].get_figure().get_axes() - if isinstance(ax, Subplot)] - - def _get_axes_layout(self): - axes = self._get_subplots() - x_set = set() - y_set = set() - for ax in axes: - # check axes coordinates to estimate layout - points = ax.get_position().get_points() - x_set.add(points[0][0]) - y_set.add(points[0][1]) - return (len(y_set), len(x_set)) - - -class PlanePlot(MPLPlot): - """ - Abstract class for plotting on plane, currently scatter and hexbin. - """ - - _layout_type = 'single' - - def __init__(self, data, x, y, **kwargs): - MPLPlot.__init__(self, data, **kwargs) - if x is None or y is None: - raise ValueError(self._kind + ' requires and x and y column') - if is_integer(x) and not self.data.columns.holds_integer(): - x = self.data.columns[x] - if is_integer(y) and not self.data.columns.holds_integer(): - y = self.data.columns[y] - self.x = x - self.y = y - - @property - def nseries(self): - return 1 - - def _post_plot_logic(self, ax, data): - x, y = self.x, self.y - ax.set_ylabel(pprint_thing(y)) - ax.set_xlabel(pprint_thing(x)) - - -class ScatterPlot(PlanePlot): - _kind = 'scatter' - - def __init__(self, data, x, y, s=None, c=None, **kwargs): - if s is None: - # hide the matplotlib default for size, in case we want to change - # the handling of this argument later - s = 20 - super(ScatterPlot, self).__init__(data, x, y, s=s, **kwargs) - if is_integer(c) and not self.data.columns.holds_integer(): - c = self.data.columns[c] - self.c = c - - def _make_plot(self): - x, y, c, data = self.x, self.y, self.c, self.data - ax = self.axes[0] - - c_is_column = is_hashable(c) and c in self.data.columns - - # plot a colorbar only if a colormap is provided or necessary - cb = self.kwds.pop('colorbar', self.colormap or c_is_column) - - # pandas uses colormap, matplotlib uses cmap. - cmap = self.colormap or 'Greys' - cmap = self.plt.cm.get_cmap(cmap) - color = self.kwds.pop("color", None) - if c is not None and color is not None: - raise TypeError('Specify exactly one of `c` and `color`') - elif c is None and color is None: - c_values = self.plt.rcParams['patch.facecolor'] - elif color is not None: - c_values = color - elif c_is_column: - c_values = self.data[c].values - else: - c_values = c - - if self.legend and hasattr(self, 'label'): - label = self.label - else: - label = None - scatter = ax.scatter(data[x].values, data[y].values, c=c_values, - label=label, cmap=cmap, **self.kwds) - if cb: - img = ax.collections[0] - kws = dict(ax=ax) - if self.mpl_ge_1_3_1(): - kws['label'] = c if c_is_column else '' - self.fig.colorbar(img, **kws) - - if label is not None: - self._add_legend_handle(scatter, label) - else: - self.legend = False - - errors_x = self._get_errorbars(label=x, index=0, yerr=False) - errors_y = self._get_errorbars(label=y, index=0, xerr=False) - if len(errors_x) > 0 or len(errors_y) > 0: - err_kwds = dict(errors_x, **errors_y) - err_kwds['ecolor'] = scatter.get_facecolor()[0] - ax.errorbar(data[x].values, data[y].values, - linestyle='none', **err_kwds) - - -class HexBinPlot(PlanePlot): - _kind = 'hexbin' - - def __init__(self, data, x, y, C=None, **kwargs): - super(HexBinPlot, self).__init__(data, x, y, **kwargs) - if is_integer(C) and not self.data.columns.holds_integer(): - C = self.data.columns[C] - self.C = C - - def _make_plot(self): - x, y, data, C = self.x, self.y, self.data, self.C - ax = self.axes[0] - # pandas uses colormap, matplotlib uses cmap. - cmap = self.colormap or 'BuGn' - cmap = self.plt.cm.get_cmap(cmap) - cb = self.kwds.pop('colorbar', True) - - if C is None: - c_values = None - else: - c_values = data[C].values - - ax.hexbin(data[x].values, data[y].values, C=c_values, cmap=cmap, - **self.kwds) - if cb: - img = ax.collections[0] - self.fig.colorbar(img, ax=ax) - - def _make_legend(self): - pass - - -class LinePlot(MPLPlot): - _kind = 'line' - _default_rot = 0 - orientation = 'vertical' - - def __init__(self, data, **kwargs): - MPLPlot.__init__(self, data, **kwargs) - if self.stacked: - self.data = self.data.fillna(value=0) - self.x_compat = plot_params['x_compat'] - if 'x_compat' in self.kwds: - self.x_compat = bool(self.kwds.pop('x_compat')) - - def _is_ts_plot(self): - # this is slightly deceptive - return not self.x_compat and self.use_index and self._use_dynamic_x() - - def _use_dynamic_x(self): - from pandas.tseries.plotting import _use_dynamic_x - return _use_dynamic_x(self._get_ax(0), self.data) - - def _make_plot(self): - if self._is_ts_plot(): - from pandas.tseries.plotting import _maybe_convert_index - data = _maybe_convert_index(self._get_ax(0), self.data) - - x = data.index # dummy, not used - plotf = self._ts_plot - it = self._iter_data(data=data, keep_index=True) - else: - x = self._get_xticks(convert_period=True) - plotf = self._plot - it = self._iter_data() - - stacking_id = self._get_stacking_id() - is_errorbar = any(e is not None for e in self.errors.values()) - - colors = self._get_colors() - for i, (label, y) in enumerate(it): - ax = self._get_ax(i) - kwds = self.kwds.copy() - style, kwds = self._apply_style_colors(colors, kwds, i, label) - - errors = self._get_errorbars(label=label, index=i) - kwds = dict(kwds, **errors) - - label = pprint_thing(label) # .encode('utf-8') - kwds['label'] = label - - newlines = plotf(ax, x, y, style=style, column_num=i, - stacking_id=stacking_id, - is_errorbar=is_errorbar, - **kwds) - self._add_legend_handle(newlines[0], label, index=i) - - lines = _get_all_lines(ax) - left, right = _get_xlim(lines) - ax.set_xlim(left, right) - - @classmethod - def _plot(cls, ax, x, y, style=None, column_num=None, - stacking_id=None, **kwds): - # column_num is used to get the target column from protf in line and - # area plots - if column_num == 0: - cls._initialize_stacker(ax, stacking_id, len(y)) - y_values = cls._get_stacked_values(ax, stacking_id, y, kwds['label']) - lines = MPLPlot._plot(ax, x, y_values, style=style, **kwds) - cls._update_stacker(ax, stacking_id, y) - return lines - - @classmethod - def _ts_plot(cls, ax, x, data, style=None, **kwds): - from pandas.tseries.plotting import (_maybe_resample, - _decorate_axes, - format_dateaxis) - # accept x to be consistent with normal plot func, - # x is not passed to tsplot as it uses data.index as x coordinate - # column_num must be in kwds for stacking purpose - freq, data = _maybe_resample(data, ax, kwds) - - # Set ax with freq info - _decorate_axes(ax, freq, kwds) - # digging deeper - if hasattr(ax, 'left_ax'): - _decorate_axes(ax.left_ax, freq, kwds) - if hasattr(ax, 'right_ax'): - _decorate_axes(ax.right_ax, freq, kwds) - ax._plot_data.append((data, cls._kind, kwds)) - - lines = cls._plot(ax, data.index, data.values, style=style, **kwds) - # set date formatter, locators and rescale limits - format_dateaxis(ax, ax.freq, data.index) - return lines - - def _get_stacking_id(self): - if self.stacked: - return id(self.data) - else: - return None - - @classmethod - def _initialize_stacker(cls, ax, stacking_id, n): - if stacking_id is None: - return - if not hasattr(ax, '_stacker_pos_prior'): - ax._stacker_pos_prior = {} - if not hasattr(ax, '_stacker_neg_prior'): - ax._stacker_neg_prior = {} - ax._stacker_pos_prior[stacking_id] = np.zeros(n) - ax._stacker_neg_prior[stacking_id] = np.zeros(n) - - @classmethod - def _get_stacked_values(cls, ax, stacking_id, values, label): - if stacking_id is None: - return values - if not hasattr(ax, '_stacker_pos_prior'): - # stacker may not be initialized for subplots - cls._initialize_stacker(ax, stacking_id, len(values)) - - if (values >= 0).all(): - return ax._stacker_pos_prior[stacking_id] + values - elif (values <= 0).all(): - return ax._stacker_neg_prior[stacking_id] + values - - raise ValueError('When stacked is True, each column must be either ' - 'all positive or negative.' - '{0} contains both positive and negative values' - .format(label)) - - @classmethod - def _update_stacker(cls, ax, stacking_id, values): - if stacking_id is None: - return - if (values >= 0).all(): - ax._stacker_pos_prior[stacking_id] += values - elif (values <= 0).all(): - ax._stacker_neg_prior[stacking_id] += values - - def _post_plot_logic(self, ax, data): - condition = (not self._use_dynamic_x() and - data.index.is_all_dates and - not self.subplots or - (self.subplots and self.sharex)) - - index_name = self._get_index_name() - - if condition: - # irregular TS rotated 30 deg. by default - # probably a better place to check / set this. - if not self._rot_set: - self.rot = 30 - format_date_labels(ax, rot=self.rot) - - if index_name is not None and self.use_index: - ax.set_xlabel(index_name) - - -class AreaPlot(LinePlot): - _kind = 'area' - - def __init__(self, data, **kwargs): - kwargs.setdefault('stacked', True) - data = data.fillna(value=0) - LinePlot.__init__(self, data, **kwargs) - - if not self.stacked: - # use smaller alpha to distinguish overlap - self.kwds.setdefault('alpha', 0.5) - - if self.logy or self.loglog: - raise ValueError("Log-y scales are not supported in area plot") - - @classmethod - def _plot(cls, ax, x, y, style=None, column_num=None, - stacking_id=None, is_errorbar=False, **kwds): - - if column_num == 0: - cls._initialize_stacker(ax, stacking_id, len(y)) - y_values = cls._get_stacked_values(ax, stacking_id, y, kwds['label']) - - # need to remove label, because subplots uses mpl legend as it is - line_kwds = kwds.copy() - if cls.mpl_ge_1_5_0(): - line_kwds.pop('label') - lines = MPLPlot._plot(ax, x, y_values, style=style, **line_kwds) - - # get data from the line to get coordinates for fill_between - xdata, y_values = lines[0].get_data(orig=False) - - # unable to use ``_get_stacked_values`` here to get starting point - if stacking_id is None: - start = np.zeros(len(y)) - elif (y >= 0).all(): - start = ax._stacker_pos_prior[stacking_id] - elif (y <= 0).all(): - start = ax._stacker_neg_prior[stacking_id] - else: - start = np.zeros(len(y)) - - if 'color' not in kwds: - kwds['color'] = lines[0].get_color() - - rect = ax.fill_between(xdata, start, y_values, **kwds) - cls._update_stacker(ax, stacking_id, y) - - # LinePlot expects list of artists - res = [rect] if cls.mpl_ge_1_5_0() else lines - return res - - def _add_legend_handle(self, handle, label, index=None): - if not self.mpl_ge_1_5_0(): - from matplotlib.patches import Rectangle - # Because fill_between isn't supported in legend, - # specifically add Rectangle handle here - alpha = self.kwds.get('alpha', None) - handle = Rectangle((0, 0), 1, 1, fc=handle.get_color(), - alpha=alpha) - LinePlot._add_legend_handle(self, handle, label, index=index) - - def _post_plot_logic(self, ax, data): - LinePlot._post_plot_logic(self, ax, data) - - if self.ylim is None: - if (data >= 0).all().all(): - ax.set_ylim(0, None) - elif (data <= 0).all().all(): - ax.set_ylim(None, 0) - - -class BarPlot(MPLPlot): - _kind = 'bar' - _default_rot = 90 - orientation = 'vertical' - - def __init__(self, data, **kwargs): - self.bar_width = kwargs.pop('width', 0.5) - pos = kwargs.pop('position', 0.5) - kwargs.setdefault('align', 'center') - self.tick_pos = np.arange(len(data)) - - self.bottom = kwargs.pop('bottom', 0) - self.left = kwargs.pop('left', 0) - - self.log = kwargs.pop('log', False) - MPLPlot.__init__(self, data, **kwargs) - - if self.stacked or self.subplots: - self.tickoffset = self.bar_width * pos - if kwargs['align'] == 'edge': - self.lim_offset = self.bar_width / 2 - else: - self.lim_offset = 0 - else: - if kwargs['align'] == 'edge': - w = self.bar_width / self.nseries - self.tickoffset = self.bar_width * (pos - 0.5) + w * 0.5 - self.lim_offset = w * 0.5 - else: - self.tickoffset = self.bar_width * pos - self.lim_offset = 0 - - self.ax_pos = self.tick_pos - self.tickoffset - - def _args_adjust(self): - if is_list_like(self.bottom): - self.bottom = np.array(self.bottom) - if is_list_like(self.left): - self.left = np.array(self.left) - - @classmethod - def _plot(cls, ax, x, y, w, start=0, log=False, **kwds): - return ax.bar(x, y, w, bottom=start, log=log, **kwds) - - @property - def _start_base(self): - return self.bottom - - def _make_plot(self): - import matplotlib as mpl - - colors = self._get_colors() - ncolors = len(colors) - - pos_prior = neg_prior = np.zeros(len(self.data)) - K = self.nseries - - for i, (label, y) in enumerate(self._iter_data(fillna=0)): - ax = self._get_ax(i) - kwds = self.kwds.copy() - kwds['color'] = colors[i % ncolors] - - errors = self._get_errorbars(label=label, index=i) - kwds = dict(kwds, **errors) - - label = pprint_thing(label) - - if (('yerr' in kwds) or ('xerr' in kwds)) \ - and (kwds.get('ecolor') is None): - kwds['ecolor'] = mpl.rcParams['xtick.color'] - - start = 0 - if self.log and (y >= 1).all(): - start = 1 - start = start + self._start_base - - if self.subplots: - w = self.bar_width / 2 - rect = self._plot(ax, self.ax_pos + w, y, self.bar_width, - start=start, label=label, - log=self.log, **kwds) - ax.set_title(label) - elif self.stacked: - mask = y > 0 - start = np.where(mask, pos_prior, neg_prior) + self._start_base - w = self.bar_width / 2 - rect = self._plot(ax, self.ax_pos + w, y, self.bar_width, - start=start, label=label, - log=self.log, **kwds) - pos_prior = pos_prior + np.where(mask, y, 0) - neg_prior = neg_prior + np.where(mask, 0, y) - else: - w = self.bar_width / K - rect = self._plot(ax, self.ax_pos + (i + 0.5) * w, y, w, - start=start, label=label, - log=self.log, **kwds) - self._add_legend_handle(rect, label, index=i) - - def _post_plot_logic(self, ax, data): - if self.use_index: - str_index = [pprint_thing(key) for key in data.index] - else: - str_index = [pprint_thing(key) for key in range(data.shape[0])] - name = self._get_index_name() - - s_edge = self.ax_pos[0] - 0.25 + self.lim_offset - e_edge = self.ax_pos[-1] + 0.25 + self.bar_width + self.lim_offset - - self._decorate_ticks(ax, name, str_index, s_edge, e_edge) - - def _decorate_ticks(self, ax, name, ticklabels, start_edge, end_edge): - ax.set_xlim((start_edge, end_edge)) - ax.set_xticks(self.tick_pos) - ax.set_xticklabels(ticklabels) - if name is not None and self.use_index: - ax.set_xlabel(name) - - -class BarhPlot(BarPlot): - _kind = 'barh' - _default_rot = 0 - orientation = 'horizontal' - - @property - def _start_base(self): - return self.left - - @classmethod - def _plot(cls, ax, x, y, w, start=0, log=False, **kwds): - return ax.barh(x, y, w, left=start, log=log, **kwds) - - def _decorate_ticks(self, ax, name, ticklabels, start_edge, end_edge): - # horizontal bars - ax.set_ylim((start_edge, end_edge)) - ax.set_yticks(self.tick_pos) - ax.set_yticklabels(ticklabels) - if name is not None and self.use_index: - ax.set_ylabel(name) - - -class HistPlot(LinePlot): - _kind = 'hist' - - def __init__(self, data, bins=10, bottom=0, **kwargs): - self.bins = bins # use mpl default - self.bottom = bottom - # Do not call LinePlot.__init__ which may fill nan - MPLPlot.__init__(self, data, **kwargs) - - def _args_adjust(self): - if is_integer(self.bins): - # create common bin edge - values = (self.data._convert(datetime=True)._get_numeric_data()) - values = np.ravel(values) - values = values[~isnull(values)] - - hist, self.bins = np.histogram( - values, bins=self.bins, - range=self.kwds.get('range', None), - weights=self.kwds.get('weights', None)) - - if is_list_like(self.bottom): - self.bottom = np.array(self.bottom) - - @classmethod - def _plot(cls, ax, y, style=None, bins=None, bottom=0, column_num=0, - stacking_id=None, **kwds): - if column_num == 0: - cls._initialize_stacker(ax, stacking_id, len(bins) - 1) - y = y[~isnull(y)] - - base = np.zeros(len(bins) - 1) - bottom = bottom + \ - cls._get_stacked_values(ax, stacking_id, base, kwds['label']) - # ignore style - n, bins, patches = ax.hist(y, bins=bins, bottom=bottom, **kwds) - cls._update_stacker(ax, stacking_id, n) - return patches - - def _make_plot(self): - colors = self._get_colors() - stacking_id = self._get_stacking_id() - - for i, (label, y) in enumerate(self._iter_data()): - ax = self._get_ax(i) - - kwds = self.kwds.copy() - - label = pprint_thing(label) - kwds['label'] = label - - style, kwds = self._apply_style_colors(colors, kwds, i, label) - if style is not None: - kwds['style'] = style - - kwds = self._make_plot_keywords(kwds, y) - artists = self._plot(ax, y, column_num=i, - stacking_id=stacking_id, **kwds) - self._add_legend_handle(artists[0], label, index=i) - - def _make_plot_keywords(self, kwds, y): - """merge BoxPlot/KdePlot properties to passed kwds""" - # y is required for KdePlot - kwds['bottom'] = self.bottom - kwds['bins'] = self.bins - return kwds - - def _post_plot_logic(self, ax, data): - if self.orientation == 'horizontal': - ax.set_xlabel('Frequency') - else: - ax.set_ylabel('Frequency') - - @property - def orientation(self): - if self.kwds.get('orientation', None) == 'horizontal': - return 'horizontal' - else: - return 'vertical' - - -class KdePlot(HistPlot): - _kind = 'kde' - orientation = 'vertical' - - def __init__(self, data, bw_method=None, ind=None, **kwargs): - MPLPlot.__init__(self, data, **kwargs) - self.bw_method = bw_method - self.ind = ind - - def _args_adjust(self): - pass - - def _get_ind(self, y): - if self.ind is None: - # np.nanmax() and np.nanmin() ignores the missing values - sample_range = np.nanmax(y) - np.nanmin(y) - ind = np.linspace(np.nanmin(y) - 0.5 * sample_range, - np.nanmax(y) + 0.5 * sample_range, 1000) - else: - ind = self.ind - return ind - - @classmethod - def _plot(cls, ax, y, style=None, bw_method=None, ind=None, - column_num=None, stacking_id=None, **kwds): - from scipy.stats import gaussian_kde - from scipy import __version__ as spv - - y = remove_na(y) - - if LooseVersion(spv) >= '0.11.0': - gkde = gaussian_kde(y, bw_method=bw_method) - else: - gkde = gaussian_kde(y) - if bw_method is not None: - msg = ('bw_method was added in Scipy 0.11.0.' + - ' Scipy version in use is %s.' % spv) - warnings.warn(msg) - - y = gkde.evaluate(ind) - lines = MPLPlot._plot(ax, ind, y, style=style, **kwds) - return lines - - def _make_plot_keywords(self, kwds, y): - kwds['bw_method'] = self.bw_method - kwds['ind'] = self._get_ind(y) - return kwds - - def _post_plot_logic(self, ax, data): - ax.set_ylabel('Density') - - -class PiePlot(MPLPlot): - _kind = 'pie' - _layout_type = 'horizontal' - - def __init__(self, data, kind=None, **kwargs): - data = data.fillna(value=0) - if (data < 0).any().any(): - raise ValueError("{0} doesn't allow negative values".format(kind)) - MPLPlot.__init__(self, data, kind=kind, **kwargs) - - def _args_adjust(self): - self.grid = False - self.logy = False - self.logx = False - self.loglog = False - - def _validate_color_args(self): - pass - - def _make_plot(self): - colors = self._get_colors( - num_colors=len(self.data), color_kwds='colors') - self.kwds.setdefault('colors', colors) - - for i, (label, y) in enumerate(self._iter_data()): - ax = self._get_ax(i) - if label is not None: - label = pprint_thing(label) - ax.set_ylabel(label) - - kwds = self.kwds.copy() - - def blank_labeler(label, value): - if value == 0: - return '' - else: - return label - - idx = [pprint_thing(v) for v in self.data.index] - labels = kwds.pop('labels', idx) - # labels is used for each wedge's labels - # Blank out labels for values of 0 so they don't overlap - # with nonzero wedges - if labels is not None: - blabels = [blank_labeler(l, value) for - l, value in zip(labels, y)] - else: - blabels = None - results = ax.pie(y, labels=blabels, **kwds) - - if kwds.get('autopct', None) is not None: - patches, texts, autotexts = results - else: - patches, texts = results - autotexts = [] - - if self.fontsize is not None: - for t in texts + autotexts: - t.set_fontsize(self.fontsize) - - # leglabels is used for legend labels - leglabels = labels if labels is not None else idx - for p, l in zip(patches, leglabels): - self._add_legend_handle(p, l) - - -class BoxPlot(LinePlot): - _kind = 'box' - _layout_type = 'horizontal' - - _valid_return_types = (None, 'axes', 'dict', 'both') - # namedtuple to hold results - BP = namedtuple("Boxplot", ['ax', 'lines']) - - def __init__(self, data, return_type='axes', **kwargs): - # Do not call LinePlot.__init__ which may fill nan - if return_type not in self._valid_return_types: - raise ValueError( - "return_type must be {None, 'axes', 'dict', 'both'}") - - self.return_type = return_type - MPLPlot.__init__(self, data, **kwargs) - - def _args_adjust(self): - if self.subplots: - # Disable label ax sharing. Otherwise, all subplots shows last - # column label - if self.orientation == 'vertical': - self.sharex = False - else: - self.sharey = False - - @classmethod - def _plot(cls, ax, y, column_num=None, return_type='axes', **kwds): - if y.ndim == 2: - y = [remove_na(v) for v in y] - # Boxplot fails with empty arrays, so need to add a NaN - # if any cols are empty - # GH 8181 - y = [v if v.size > 0 else np.array([np.nan]) for v in y] - else: - y = remove_na(y) - bp = ax.boxplot(y, **kwds) - - if return_type == 'dict': - return bp, bp - elif return_type == 'both': - return cls.BP(ax=ax, lines=bp), bp - else: - return ax, bp - - def _validate_color_args(self): - if 'color' in self.kwds: - if self.colormap is not None: - warnings.warn("'color' and 'colormap' cannot be used " - "simultaneously. Using 'color'") - self.color = self.kwds.pop('color') - - if isinstance(self.color, dict): - valid_keys = ['boxes', 'whiskers', 'medians', 'caps'] - for key, values in compat.iteritems(self.color): - if key not in valid_keys: - raise ValueError("color dict contains invalid " - "key '{0}' " - "The key must be either {1}" - .format(key, valid_keys)) - else: - self.color = None - - # get standard colors for default - colors = _get_standard_colors(num_colors=3, - colormap=self.colormap, - color=None) - # use 2 colors by default, for box/whisker and median - # flier colors isn't needed here - # because it can be specified by ``sym`` kw - self._boxes_c = colors[0] - self._whiskers_c = colors[0] - self._medians_c = colors[2] - self._caps_c = 'k' # mpl default - - def _get_colors(self, num_colors=None, color_kwds='color'): - pass - - def maybe_color_bp(self, bp): - if isinstance(self.color, dict): - boxes = self.color.get('boxes', self._boxes_c) - whiskers = self.color.get('whiskers', self._whiskers_c) - medians = self.color.get('medians', self._medians_c) - caps = self.color.get('caps', self._caps_c) - else: - # Other types are forwarded to matplotlib - # If None, use default colors - boxes = self.color or self._boxes_c - whiskers = self.color or self._whiskers_c - medians = self.color or self._medians_c - caps = self.color or self._caps_c - - from matplotlib.artist import setp - setp(bp['boxes'], color=boxes, alpha=1) - setp(bp['whiskers'], color=whiskers, alpha=1) - setp(bp['medians'], color=medians, alpha=1) - setp(bp['caps'], color=caps, alpha=1) - - def _make_plot(self): - if self.subplots: - self._return_obj = Series() - - for i, (label, y) in enumerate(self._iter_data()): - ax = self._get_ax(i) - kwds = self.kwds.copy() - - ret, bp = self._plot(ax, y, column_num=i, - return_type=self.return_type, **kwds) - self.maybe_color_bp(bp) - self._return_obj[label] = ret - - label = [pprint_thing(label)] - self._set_ticklabels(ax, label) - else: - y = self.data.values.T - ax = self._get_ax(0) - kwds = self.kwds.copy() - - ret, bp = self._plot(ax, y, column_num=0, - return_type=self.return_type, **kwds) - self.maybe_color_bp(bp) - self._return_obj = ret - - labels = [l for l, _ in self._iter_data()] - labels = [pprint_thing(l) for l in labels] - if not self.use_index: - labels = [pprint_thing(key) for key in range(len(labels))] - self._set_ticklabels(ax, labels) - - def _set_ticklabels(self, ax, labels): - if self.orientation == 'vertical': - ax.set_xticklabels(labels) - else: - ax.set_yticklabels(labels) - - def _make_legend(self): - pass - - def _post_plot_logic(self, ax, data): - pass - - @property - def orientation(self): - if self.kwds.get('vert', True): - return 'vertical' - else: - return 'horizontal' - - @property - def result(self): - if self.return_type is None: - return super(BoxPlot, self).result - else: - return self._return_obj - - -# kinds supported by both dataframe and series -_common_kinds = ['line', 'bar', 'barh', - 'kde', 'density', 'area', 'hist', 'box'] -# kinds supported by dataframe -_dataframe_kinds = ['scatter', 'hexbin'] -# kinds supported only by series or dataframe single column -_series_kinds = ['pie'] -_all_kinds = _common_kinds + _dataframe_kinds + _series_kinds - -_klasses = [LinePlot, BarPlot, BarhPlot, KdePlot, HistPlot, BoxPlot, - ScatterPlot, HexBinPlot, AreaPlot, PiePlot] - -_plot_klass = {} -for klass in _klasses: - _plot_klass[klass._kind] = klass - - -def _plot(data, x=None, y=None, subplots=False, - ax=None, kind='line', **kwds): - kind = _get_standard_kind(kind.lower().strip()) - if kind in _all_kinds: - klass = _plot_klass[kind] - else: - raise ValueError("%r is not a valid plot kind" % kind) - - from pandas import DataFrame - if kind in _dataframe_kinds: - if isinstance(data, DataFrame): - plot_obj = klass(data, x=x, y=y, subplots=subplots, ax=ax, - kind=kind, **kwds) - else: - raise ValueError("plot kind %r can only be used for data frames" - % kind) - - elif kind in _series_kinds: - if isinstance(data, DataFrame): - if y is None and subplots is False: - msg = "{0} requires either y column or 'subplots=True'" - raise ValueError(msg.format(kind)) - elif y is not None: - if is_integer(y) and not data.columns.holds_integer(): - y = data.columns[y] - # converted to series actually. copy to not modify - data = data[y].copy() - data.index.name = y - plot_obj = klass(data, subplots=subplots, ax=ax, kind=kind, **kwds) - else: - if isinstance(data, DataFrame): - if x is not None: - if is_integer(x) and not data.columns.holds_integer(): - x = data.columns[x] - data = data.set_index(x) - - if y is not None: - if is_integer(y) and not data.columns.holds_integer(): - y = data.columns[y] - label = kwds['label'] if 'label' in kwds else y - series = data[y].copy() # Don't modify - series.name = label - - for kw in ['xerr', 'yerr']: - if (kw in kwds) and \ - (isinstance(kwds[kw], string_types) or - is_integer(kwds[kw])): - try: - kwds[kw] = data[kwds[kw]] - except (IndexError, KeyError, TypeError): - pass - data = series - plot_obj = klass(data, subplots=subplots, ax=ax, kind=kind, **kwds) - - plot_obj.generate() - plot_obj.draw() - return plot_obj.result - - -df_kind = """- 'scatter' : scatter plot - - 'hexbin' : hexbin plot""" -series_kind = "" - -df_coord = """x : label or position, default None - y : label or position, default None - Allows plotting of one column versus another""" -series_coord = "" - -df_unique = """stacked : boolean, default False in line and - bar plots, and True in area plot. If True, create stacked plot. - sort_columns : boolean, default False - Sort column names to determine plot ordering - secondary_y : boolean or sequence, default False - Whether to plot on the secondary y-axis - If a list/tuple, which columns to plot on secondary y-axis""" -series_unique = """label : label argument to provide to plot - secondary_y : boolean or sequence of ints, default False - If True then y-axis will be on the right""" - -df_ax = """ax : matplotlib axes object, default None - subplots : boolean, default False - Make separate subplots for each column - sharex : boolean, default True if ax is None else False - In case subplots=True, share x axis and set some x axis labels to - invisible; defaults to True if ax is None otherwise False if an ax - is passed in; Be aware, that passing in both an ax and sharex=True - will alter all x axis labels for all axis in a figure! - sharey : boolean, default False - In case subplots=True, share y axis and set some y axis labels to - invisible - layout : tuple (optional) - (rows, columns) for the layout of subplots""" -series_ax = """ax : matplotlib axes object - If not passed, uses gca()""" - -df_note = """- If `kind` = 'scatter' and the argument `c` is the name of a dataframe - column, the values of that column are used to color each point. - - If `kind` = 'hexbin', you can control the size of the bins with the - `gridsize` argument. By default, a histogram of the counts around each - `(x, y)` point is computed. You can specify alternative aggregations - by passing values to the `C` and `reduce_C_function` arguments. - `C` specifies the value at each `(x, y)` point and `reduce_C_function` - is a function of one argument that reduces all the values in a bin to - a single number (e.g. `mean`, `max`, `sum`, `std`).""" -series_note = "" - -_shared_doc_df_kwargs = dict(klass='DataFrame', klass_obj='df', - klass_kind=df_kind, klass_coord=df_coord, - klass_ax=df_ax, klass_unique=df_unique, - klass_note=df_note) -_shared_doc_series_kwargs = dict(klass='Series', klass_obj='s', - klass_kind=series_kind, - klass_coord=series_coord, klass_ax=series_ax, - klass_unique=series_unique, - klass_note=series_note) - -_shared_docs['plot'] = """ - Make plots of %(klass)s using matplotlib / pylab. - - *New in version 0.17.0:* Each plot kind has a corresponding method on the - ``%(klass)s.plot`` accessor: - ``%(klass_obj)s.plot(kind='line')`` is equivalent to - ``%(klass_obj)s.plot.line()``. - - Parameters - ---------- - data : %(klass)s - %(klass_coord)s - kind : str - - 'line' : line plot (default) - - 'bar' : vertical bar plot - - 'barh' : horizontal bar plot - - 'hist' : histogram - - 'box' : boxplot - - 'kde' : Kernel Density Estimation plot - - 'density' : same as 'kde' - - 'area' : area plot - - 'pie' : pie plot - %(klass_kind)s - %(klass_ax)s - figsize : a tuple (width, height) in inches - use_index : boolean, default True - Use index as ticks for x axis - title : string or list - Title to use for the plot. If a string is passed, print the string at - the top of the figure. If a list is passed and `subplots` is True, - print each item in the list above the corresponding subplot. - grid : boolean, default None (matlab style default) - Axis grid lines - legend : False/True/'reverse' - Place legend on axis subplots - style : list or dict - matplotlib line style per column - logx : boolean, default False - Use log scaling on x axis - logy : boolean, default False - Use log scaling on y axis - loglog : boolean, default False - Use log scaling on both x and y axes - xticks : sequence - Values to use for the xticks - yticks : sequence - Values to use for the yticks - xlim : 2-tuple/list - ylim : 2-tuple/list - rot : int, default None - Rotation for ticks (xticks for vertical, yticks for horizontal plots) - fontsize : int, default None - Font size for xticks and yticks - colormap : str or matplotlib colormap object, default None - Colormap to select colors from. If string, load colormap with that name - from matplotlib. - colorbar : boolean, optional - If True, plot colorbar (only relevant for 'scatter' and 'hexbin' plots) - position : float - Specify relative alignments for bar plot layout. - From 0 (left/bottom-end) to 1 (right/top-end). Default is 0.5 (center) - layout : tuple (optional) - (rows, columns) for the layout of the plot - table : boolean, Series or DataFrame, default False - If True, draw a table using the data in the DataFrame and the data will - be transposed to meet matplotlib's default layout. - If a Series or DataFrame is passed, use passed data to draw a table. - yerr : DataFrame, Series, array-like, dict and str - See :ref:`Plotting with Error Bars ` for - detail. - xerr : same types as yerr. - %(klass_unique)s - mark_right : boolean, default True - When using a secondary_y axis, automatically mark the column - labels with "(right)" in the legend - kwds : keywords - Options to pass to matplotlib plotting method - - Returns - ------- - axes : matplotlib.AxesSubplot or np.array of them - - Notes - ----- - - - See matplotlib documentation online for more on this subject - - If `kind` = 'bar' or 'barh', you can specify relative alignments - for bar plot layout by `position` keyword. - From 0 (left/bottom-end) to 1 (right/top-end). Default is 0.5 (center) - %(klass_note)s - - """ - - -@Appender(_shared_docs['plot'] % _shared_doc_df_kwargs) -def plot_frame(data, x=None, y=None, kind='line', ax=None, - subplots=False, sharex=None, sharey=False, layout=None, - figsize=None, use_index=True, title=None, grid=None, - legend=True, style=None, logx=False, logy=False, loglog=False, - xticks=None, yticks=None, xlim=None, ylim=None, - rot=None, fontsize=None, colormap=None, table=False, - yerr=None, xerr=None, - secondary_y=False, sort_columns=False, - **kwds): - return _plot(data, kind=kind, x=x, y=y, ax=ax, - subplots=subplots, sharex=sharex, sharey=sharey, - layout=layout, figsize=figsize, use_index=use_index, - title=title, grid=grid, legend=legend, - style=style, logx=logx, logy=logy, loglog=loglog, - xticks=xticks, yticks=yticks, xlim=xlim, ylim=ylim, - rot=rot, fontsize=fontsize, colormap=colormap, table=table, - yerr=yerr, xerr=xerr, - secondary_y=secondary_y, sort_columns=sort_columns, - **kwds) - - -@Appender(_shared_docs['plot'] % _shared_doc_series_kwargs) -def plot_series(data, kind='line', ax=None, # Series unique - figsize=None, use_index=True, title=None, grid=None, - legend=False, style=None, logx=False, logy=False, loglog=False, - xticks=None, yticks=None, xlim=None, ylim=None, - rot=None, fontsize=None, colormap=None, table=False, - yerr=None, xerr=None, - label=None, secondary_y=False, # Series unique - **kwds): - - import matplotlib.pyplot as plt - """ - If no axes is specified, check whether there are existing figures - If there is no existing figures, _gca() will - create a figure with the default figsize, causing the figsize=parameter to - be ignored. - """ - if ax is None and len(plt.get_fignums()) > 0: - ax = _gca() - ax = MPLPlot._get_ax_layer(ax) - return _plot(data, kind=kind, ax=ax, - figsize=figsize, use_index=use_index, title=title, - grid=grid, legend=legend, - style=style, logx=logx, logy=logy, loglog=loglog, - xticks=xticks, yticks=yticks, xlim=xlim, ylim=ylim, - rot=rot, fontsize=fontsize, colormap=colormap, table=table, - yerr=yerr, xerr=xerr, - label=label, secondary_y=secondary_y, - **kwds) - - -_shared_docs['boxplot'] = """ - Make a box plot from DataFrame column optionally grouped by some columns or - other inputs - - Parameters - ---------- - data : the pandas object holding the data - column : column name or list of names, or vector - Can be any valid input to groupby - by : string or sequence - Column in the DataFrame to group by - ax : Matplotlib axes object, optional - fontsize : int or string - rot : label rotation angle - figsize : A tuple (width, height) in inches - grid : Setting this to True will show the grid - layout : tuple (optional) - (rows, columns) for the layout of the plot - return_type : {None, 'axes', 'dict', 'both'}, default None - The kind of object to return. The default is ``axes`` - 'axes' returns the matplotlib axes the boxplot is drawn on; - 'dict' returns a dictionary whose values are the matplotlib - Lines of the boxplot; - 'both' returns a namedtuple with the axes and dict. - - When grouping with ``by``, a Series mapping columns to ``return_type`` - is returned, unless ``return_type`` is None, in which case a NumPy - array of axes is returned with the same shape as ``layout``. - See the prose documentation for more. - - kwds : other plotting keyword arguments to be passed to matplotlib boxplot - function - - Returns - ------- - lines : dict - ax : matplotlib Axes - (ax, lines): namedtuple - - Notes - ----- - Use ``return_type='dict'`` when you want to tweak the appearance - of the lines after plotting. In this case a dict containing the Lines - making up the boxes, caps, fliers, medians, and whiskers is returned. - """ - - -@Appender(_shared_docs['boxplot'] % _shared_doc_kwargs) -def boxplot(data, column=None, by=None, ax=None, fontsize=None, - rot=0, grid=True, figsize=None, layout=None, return_type=None, - **kwds): - - # validate return_type: - if return_type not in BoxPlot._valid_return_types: - raise ValueError("return_type must be {'axes', 'dict', 'both'}") - - from pandas import Series, DataFrame - if isinstance(data, Series): - data = DataFrame({'x': data}) - column = 'x' - - def _get_colors(): - return _get_standard_colors(color=kwds.get('color'), num_colors=1) - - def maybe_color_bp(bp): - if 'color' not in kwds: - from matplotlib.artist import setp - setp(bp['boxes'], color=colors[0], alpha=1) - setp(bp['whiskers'], color=colors[0], alpha=1) - setp(bp['medians'], color=colors[2], alpha=1) - - def plot_group(keys, values, ax): - keys = [pprint_thing(x) for x in keys] - values = [remove_na(v) for v in values] - bp = ax.boxplot(values, **kwds) - if fontsize is not None: - ax.tick_params(axis='both', labelsize=fontsize) - if kwds.get('vert', 1): - ax.set_xticklabels(keys, rotation=rot) - else: - ax.set_yticklabels(keys, rotation=rot) - maybe_color_bp(bp) - - # Return axes in multiplot case, maybe revisit later # 985 - if return_type == 'dict': - return bp - elif return_type == 'both': - return BoxPlot.BP(ax=ax, lines=bp) - else: - return ax - - colors = _get_colors() - if column is None: - columns = None - else: - if isinstance(column, (list, tuple)): - columns = column - else: - columns = [column] - - if by is not None: - # Prefer array return type for 2-D plots to match the subplot layout - # https://github.com/pandas-dev/pandas/pull/12216#issuecomment-241175580 - result = _grouped_plot_by_column(plot_group, data, columns=columns, - by=by, grid=grid, figsize=figsize, - ax=ax, layout=layout, - return_type=return_type) - else: - if return_type is None: - return_type = 'axes' - if layout is not None: - raise ValueError("The 'layout' keyword is not supported when " - "'by' is None") - - if ax is None: - ax = _gca() - data = data._get_numeric_data() - if columns is None: - columns = data.columns - else: - data = data[columns] - - result = plot_group(columns, data.values.T, ax) - ax.grid(grid) - - return result - - -def format_date_labels(ax, rot): - # mini version of autofmt_xdate - try: - for label in ax.get_xticklabels(): - label.set_ha('right') - label.set_rotation(rot) - fig = ax.get_figure() - fig.subplots_adjust(bottom=0.2) - except Exception: # pragma: no cover - pass - - -def scatter_plot(data, x, y, by=None, ax=None, figsize=None, grid=False, - **kwargs): - """ - Make a scatter plot from two DataFrame columns - - Parameters - ---------- - data : DataFrame - x : Column name for the x-axis values - y : Column name for the y-axis values - ax : Matplotlib axis object - figsize : A tuple (width, height) in inches - grid : Setting this to True will show the grid - kwargs : other plotting keyword arguments - To be passed to scatter function - - Returns - ------- - fig : matplotlib.Figure - """ - import matplotlib.pyplot as plt - - kwargs.setdefault('edgecolors', 'none') - - def plot_group(group, ax): - xvals = group[x].values - yvals = group[y].values - ax.scatter(xvals, yvals, **kwargs) - ax.grid(grid) - - if by is not None: - fig = _grouped_plot(plot_group, data, by=by, figsize=figsize, ax=ax) - else: - if ax is None: - fig = plt.figure() - ax = fig.add_subplot(111) - else: - fig = ax.get_figure() - plot_group(data, ax) - ax.set_ylabel(pprint_thing(y)) - ax.set_xlabel(pprint_thing(x)) - - ax.grid(grid) - - return fig - - -def hist_frame(data, column=None, by=None, grid=True, xlabelsize=None, - xrot=None, ylabelsize=None, yrot=None, ax=None, sharex=False, - sharey=False, figsize=None, layout=None, bins=10, **kwds): - """ - Draw histogram of the DataFrame's series using matplotlib / pylab. - - Parameters - ---------- - data : DataFrame - column : string or sequence - If passed, will be used to limit data to a subset of columns - by : object, optional - If passed, then used to form histograms for separate groups - grid : boolean, default True - Whether to show axis grid lines - xlabelsize : int, default None - If specified changes the x-axis label size - xrot : float, default None - rotation of x axis labels - ylabelsize : int, default None - If specified changes the y-axis label size - yrot : float, default None - rotation of y axis labels - ax : matplotlib axes object, default None - sharex : boolean, default True if ax is None else False - In case subplots=True, share x axis and set some x axis labels to - invisible; defaults to True if ax is None otherwise False if an ax - is passed in; Be aware, that passing in both an ax and sharex=True - will alter all x axis labels for all subplots in a figure! - sharey : boolean, default False - In case subplots=True, share y axis and set some y axis labels to - invisible - figsize : tuple - The size of the figure to create in inches by default - layout : tuple, optional - Tuple of (rows, columns) for the layout of the histograms - bins : integer, default 10 - Number of histogram bins to be used - kwds : other plotting keyword arguments - To be passed to hist function - """ - - if by is not None: - axes = grouped_hist(data, column=column, by=by, ax=ax, grid=grid, - figsize=figsize, sharex=sharex, sharey=sharey, - layout=layout, bins=bins, xlabelsize=xlabelsize, - xrot=xrot, ylabelsize=ylabelsize, - yrot=yrot, **kwds) - return axes - - if column is not None: - if not isinstance(column, (list, np.ndarray, Index)): - column = [column] - data = data[column] - data = data._get_numeric_data() - naxes = len(data.columns) - - fig, axes = _subplots(naxes=naxes, ax=ax, squeeze=False, - sharex=sharex, sharey=sharey, figsize=figsize, - layout=layout) - _axes = _flatten(axes) - - for i, col in enumerate(_try_sort(data.columns)): - ax = _axes[i] - ax.hist(data[col].dropna().values, bins=bins, **kwds) - ax.set_title(col) - ax.grid(grid) - - _set_ticks_props(axes, xlabelsize=xlabelsize, xrot=xrot, - ylabelsize=ylabelsize, yrot=yrot) - fig.subplots_adjust(wspace=0.3, hspace=0.3) - - return axes - - -def hist_series(self, by=None, ax=None, grid=True, xlabelsize=None, - xrot=None, ylabelsize=None, yrot=None, figsize=None, - bins=10, **kwds): - """ - Draw histogram of the input series using matplotlib - - Parameters - ---------- - by : object, optional - If passed, then used to form histograms for separate groups - ax : matplotlib axis object - If not passed, uses gca() - grid : boolean, default True - Whether to show axis grid lines - xlabelsize : int, default None - If specified changes the x-axis label size - xrot : float, default None - rotation of x axis labels - ylabelsize : int, default None - If specified changes the y-axis label size - yrot : float, default None - rotation of y axis labels - figsize : tuple, default None - figure size in inches by default - bins: integer, default 10 - Number of histogram bins to be used - kwds : keywords - To be passed to the actual plotting function - - Notes - ----- - See matplotlib documentation online for more on this - - """ - import matplotlib.pyplot as plt - - if by is None: - if kwds.get('layout', None) is not None: - raise ValueError("The 'layout' keyword is not supported when " - "'by' is None") - # hack until the plotting interface is a bit more unified - fig = kwds.pop('figure', plt.gcf() if plt.get_fignums() else - plt.figure(figsize=figsize)) - if (figsize is not None and tuple(figsize) != - tuple(fig.get_size_inches())): - fig.set_size_inches(*figsize, forward=True) - if ax is None: - ax = fig.gca() - elif ax.get_figure() != fig: - raise AssertionError('passed axis not bound to passed figure') - values = self.dropna().values - - ax.hist(values, bins=bins, **kwds) - ax.grid(grid) - axes = np.array([ax]) - - _set_ticks_props(axes, xlabelsize=xlabelsize, xrot=xrot, - ylabelsize=ylabelsize, yrot=yrot) - - else: - if 'figure' in kwds: - raise ValueError("Cannot pass 'figure' when using the " - "'by' argument, since a new 'Figure' instance " - "will be created") - axes = grouped_hist(self, by=by, ax=ax, grid=grid, figsize=figsize, - bins=bins, xlabelsize=xlabelsize, xrot=xrot, - ylabelsize=ylabelsize, yrot=yrot, **kwds) - - if hasattr(axes, 'ndim'): - if axes.ndim == 1 and len(axes) == 1: - return axes[0] - return axes - - -def grouped_hist(data, column=None, by=None, ax=None, bins=50, figsize=None, - layout=None, sharex=False, sharey=False, rot=90, grid=True, - xlabelsize=None, xrot=None, ylabelsize=None, yrot=None, - **kwargs): - """ - Grouped histogram - - Parameters - ---------- - data: Series/DataFrame - column: object, optional - by: object, optional - ax: axes, optional - bins: int, default 50 - figsize: tuple, optional - layout: optional - sharex: boolean, default False - sharey: boolean, default False - rot: int, default 90 - grid: bool, default True - kwargs: dict, keyword arguments passed to matplotlib.Axes.hist - - Returns - ------- - axes: collection of Matplotlib Axes - """ - def plot_group(group, ax): - ax.hist(group.dropna().values, bins=bins, **kwargs) - - xrot = xrot or rot - - fig, axes = _grouped_plot(plot_group, data, column=column, - by=by, sharex=sharex, sharey=sharey, ax=ax, - figsize=figsize, layout=layout, rot=rot) - - _set_ticks_props(axes, xlabelsize=xlabelsize, xrot=xrot, - ylabelsize=ylabelsize, yrot=yrot) - - fig.subplots_adjust(bottom=0.15, top=0.9, left=0.1, right=0.9, - hspace=0.5, wspace=0.3) - return axes - - -def boxplot_frame_groupby(grouped, subplots=True, column=None, fontsize=None, - rot=0, grid=True, ax=None, figsize=None, - layout=None, **kwds): - """ - Make box plots from DataFrameGroupBy data. - - Parameters - ---------- - grouped : Grouped DataFrame - subplots : - * ``False`` - no subplots will be used - * ``True`` - create a subplot for each group - column : column name or list of names, or vector - Can be any valid input to groupby - fontsize : int or string - rot : label rotation angle - grid : Setting this to True will show the grid - ax : Matplotlib axis object, default None - figsize : A tuple (width, height) in inches - layout : tuple (optional) - (rows, columns) for the layout of the plot - kwds : other plotting keyword arguments to be passed to matplotlib boxplot - function - - Returns - ------- - dict of key/value = group key/DataFrame.boxplot return value - or DataFrame.boxplot return value in case subplots=figures=False - - Examples - -------- - >>> import pandas - >>> import numpy as np - >>> import itertools - >>> - >>> tuples = [t for t in itertools.product(range(1000), range(4))] - >>> index = pandas.MultiIndex.from_tuples(tuples, names=['lvl0', 'lvl1']) - >>> data = np.random.randn(len(index),4) - >>> df = pandas.DataFrame(data, columns=list('ABCD'), index=index) - >>> - >>> grouped = df.groupby(level='lvl1') - >>> boxplot_frame_groupby(grouped) - >>> - >>> grouped = df.unstack(level='lvl1').groupby(level=0, axis=1) - >>> boxplot_frame_groupby(grouped, subplots=False) - """ - if subplots is True: - naxes = len(grouped) - fig, axes = _subplots(naxes=naxes, squeeze=False, - ax=ax, sharex=False, sharey=True, - figsize=figsize, layout=layout) - axes = _flatten(axes) - - ret = Series() - for (key, group), ax in zip(grouped, axes): - d = group.boxplot(ax=ax, column=column, fontsize=fontsize, - rot=rot, grid=grid, **kwds) - ax.set_title(pprint_thing(key)) - ret.loc[key] = d - fig.subplots_adjust(bottom=0.15, top=0.9, left=0.1, - right=0.9, wspace=0.2) - else: - from pandas.tools.concat import concat - keys, frames = zip(*grouped) - if grouped.axis == 0: - df = concat(frames, keys=keys, axis=1) - else: - if len(frames) > 1: - df = frames[0].join(frames[1::]) - else: - df = frames[0] - ret = df.boxplot(column=column, fontsize=fontsize, rot=rot, - grid=grid, ax=ax, figsize=figsize, - layout=layout, **kwds) - return ret - - -def _grouped_plot(plotf, data, column=None, by=None, numeric_only=True, - figsize=None, sharex=True, sharey=True, layout=None, - rot=0, ax=None, **kwargs): - from pandas import DataFrame - - if figsize == 'default': - # allowed to specify mpl default with 'default' - warnings.warn("figsize='default' is deprecated. Specify figure" - "size by tuple instead", FutureWarning, stacklevel=4) - figsize = None - - grouped = data.groupby(by) - if column is not None: - grouped = grouped[column] - - naxes = len(grouped) - fig, axes = _subplots(naxes=naxes, figsize=figsize, - sharex=sharex, sharey=sharey, ax=ax, - layout=layout) - - _axes = _flatten(axes) - - for i, (key, group) in enumerate(grouped): - ax = _axes[i] - if numeric_only and isinstance(group, DataFrame): - group = group._get_numeric_data() - plotf(group, ax, **kwargs) - ax.set_title(pprint_thing(key)) - - return fig, axes - - -def _grouped_plot_by_column(plotf, data, columns=None, by=None, - numeric_only=True, grid=False, - figsize=None, ax=None, layout=None, - return_type=None, **kwargs): - grouped = data.groupby(by) - if columns is None: - if not isinstance(by, (list, tuple)): - by = [by] - columns = data._get_numeric_data().columns.difference(by) - naxes = len(columns) - fig, axes = _subplots(naxes=naxes, sharex=True, sharey=True, - figsize=figsize, ax=ax, layout=layout) - - _axes = _flatten(axes) - - result = Series() - ax_values = [] - - for i, col in enumerate(columns): - ax = _axes[i] - gp_col = grouped[col] - keys, values = zip(*gp_col) - re_plotf = plotf(keys, values, ax, **kwargs) - ax.set_title(col) - ax.set_xlabel(pprint_thing(by)) - ax_values.append(re_plotf) - ax.grid(grid) - - result = Series(ax_values, index=columns) - - # Return axes in multiplot case, maybe revisit later # 985 - if return_type is None: - result = axes - - byline = by[0] if len(by) == 1 else by - fig.suptitle('Boxplot grouped by %s' % byline) - fig.subplots_adjust(bottom=0.15, top=0.9, left=0.1, right=0.9, wspace=0.2) - - return result - - -def table(ax, data, rowLabels=None, colLabels=None, - **kwargs): - """ - Helper function to convert DataFrame and Series to matplotlib.table - - Parameters - ---------- - `ax`: Matplotlib axes object - `data`: DataFrame or Series - data for table contents - `kwargs`: keywords, optional - keyword arguments which passed to matplotlib.table.table. - If `rowLabels` or `colLabels` is not specified, data index or column - name will be used. - - Returns - ------- - matplotlib table object - """ - from pandas import DataFrame - if isinstance(data, Series): - data = DataFrame(data, columns=[data.name]) - elif isinstance(data, DataFrame): - pass - else: - raise ValueError('Input data must be DataFrame or Series') - - if rowLabels is None: - rowLabels = data.index - - if colLabels is None: - colLabels = data.columns - - cellText = data.values - - import matplotlib.table - table = matplotlib.table.table(ax, cellText=cellText, - rowLabels=rowLabels, - colLabels=colLabels, **kwargs) - return table - - -def _get_layout(nplots, layout=None, layout_type='box'): - if layout is not None: - if not isinstance(layout, (tuple, list)) or len(layout) != 2: - raise ValueError('Layout must be a tuple of (rows, columns)') - - nrows, ncols = layout - - # Python 2 compat - ceil_ = lambda x: int(ceil(x)) - if nrows == -1 and ncols > 0: - layout = nrows, ncols = (ceil_(float(nplots) / ncols), ncols) - elif ncols == -1 and nrows > 0: - layout = nrows, ncols = (nrows, ceil_(float(nplots) / nrows)) - elif ncols <= 0 and nrows <= 0: - msg = "At least one dimension of layout must be positive" - raise ValueError(msg) - - if nrows * ncols < nplots: - raise ValueError('Layout of %sx%s must be larger than ' - 'required size %s' % (nrows, ncols, nplots)) - - return layout - - if layout_type == 'single': - return (1, 1) - elif layout_type == 'horizontal': - return (1, nplots) - elif layout_type == 'vertical': - return (nplots, 1) - - layouts = {1: (1, 1), 2: (1, 2), 3: (2, 2), 4: (2, 2)} - try: - return layouts[nplots] - except KeyError: - k = 1 - while k ** 2 < nplots: - k += 1 - - if (k - 1) * k >= nplots: - return k, (k - 1) - else: - return k, k - -# copied from matplotlib/pyplot.py and modified for pandas.plotting - - -def _subplots(naxes=None, sharex=False, sharey=False, squeeze=True, - subplot_kw=None, ax=None, layout=None, layout_type='box', - **fig_kw): - """Create a figure with a set of subplots already made. - - This utility wrapper makes it convenient to create common layouts of - subplots, including the enclosing figure object, in a single call. - - Keyword arguments: - - naxes : int - Number of required axes. Exceeded axes are set invisible. Default is - nrows * ncols. - - sharex : bool - If True, the X axis will be shared amongst all subplots. - - sharey : bool - If True, the Y axis will be shared amongst all subplots. - - squeeze : bool - - If True, extra dimensions are squeezed out from the returned axis object: - - if only one subplot is constructed (nrows=ncols=1), the resulting - single Axis object is returned as a scalar. - - for Nx1 or 1xN subplots, the returned object is a 1-d numpy object - array of Axis objects are returned as numpy 1-d arrays. - - for NxM subplots with N>1 and M>1 are returned as a 2d array. - - If False, no squeezing at all is done: the returned axis object is always - a 2-d array containing Axis instances, even if it ends up being 1x1. - - subplot_kw : dict - Dict with keywords passed to the add_subplot() call used to create each - subplots. - - ax : Matplotlib axis object, optional - - layout : tuple - Number of rows and columns of the subplot grid. - If not specified, calculated from naxes and layout_type - - layout_type : {'box', 'horziontal', 'vertical'}, default 'box' - Specify how to layout the subplot grid. - - fig_kw : Other keyword arguments to be passed to the figure() call. - Note that all keywords not recognized above will be - automatically included here. - - Returns: - - fig, ax : tuple - - fig is the Matplotlib Figure object - - ax can be either a single axis object or an array of axis objects if - more than one subplot was created. The dimensions of the resulting array - can be controlled with the squeeze keyword, see above. - - **Examples:** - - x = np.linspace(0, 2*np.pi, 400) - y = np.sin(x**2) - - # Just a figure and one subplot - f, ax = plt.subplots() - ax.plot(x, y) - ax.set_title('Simple plot') - - # Two subplots, unpack the output array immediately - f, (ax1, ax2) = plt.subplots(1, 2, sharey=True) - ax1.plot(x, y) - ax1.set_title('Sharing Y axis') - ax2.scatter(x, y) - - # Four polar axes - plt.subplots(2, 2, subplot_kw=dict(polar=True)) - """ - import matplotlib.pyplot as plt - - if subplot_kw is None: - subplot_kw = {} - - if ax is None: - fig = plt.figure(**fig_kw) - else: - if is_list_like(ax): - ax = _flatten(ax) - if layout is not None: - warnings.warn("When passing multiple axes, layout keyword is " - "ignored", UserWarning) - if sharex or sharey: - warnings.warn("When passing multiple axes, sharex and sharey " - "are ignored. These settings must be specified " - "when creating axes", UserWarning, - stacklevel=4) - if len(ax) == naxes: - fig = ax[0].get_figure() - return fig, ax - else: - raise ValueError("The number of passed axes must be {0}, the " - "same as the output plot".format(naxes)) - - fig = ax.get_figure() - # if ax is passed and a number of subplots is 1, return ax as it is - if naxes == 1: - if squeeze: - return fig, ax - else: - return fig, _flatten(ax) - else: - warnings.warn("To output multiple subplots, the figure containing " - "the passed axes is being cleared", UserWarning, - stacklevel=4) - fig.clear() - - nrows, ncols = _get_layout(naxes, layout=layout, layout_type=layout_type) - nplots = nrows * ncols - - # Create empty object array to hold all axes. It's easiest to make it 1-d - # so we can just append subplots upon creation, and then - axarr = np.empty(nplots, dtype=object) - - # Create first subplot separately, so we can share it if requested - ax0 = fig.add_subplot(nrows, ncols, 1, **subplot_kw) - - if sharex: - subplot_kw['sharex'] = ax0 - if sharey: - subplot_kw['sharey'] = ax0 - axarr[0] = ax0 - - # Note off-by-one counting because add_subplot uses the MATLAB 1-based - # convention. - for i in range(1, nplots): - kwds = subplot_kw.copy() - # Set sharex and sharey to None for blank/dummy axes, these can - # interfere with proper axis limits on the visible axes if - # they share axes e.g. issue #7528 - if i >= naxes: - kwds['sharex'] = None - kwds['sharey'] = None - ax = fig.add_subplot(nrows, ncols, i + 1, **kwds) - axarr[i] = ax - - if naxes != nplots: - for ax in axarr[naxes:]: - ax.set_visible(False) - - _handle_shared_axes(axarr, nplots, naxes, nrows, ncols, sharex, sharey) - - if squeeze: - # Reshape the array to have the final desired dimension (nrow,ncol), - # though discarding unneeded dimensions that equal 1. If we only have - # one subplot, just return it instead of a 1-element array. - if nplots == 1: - axes = axarr[0] - else: - axes = axarr.reshape(nrows, ncols).squeeze() - else: - # returned axis array will be always 2-d, even if nrows=ncols=1 - axes = axarr.reshape(nrows, ncols) - - return fig, axes - - -def _remove_labels_from_axis(axis): - for t in axis.get_majorticklabels(): - t.set_visible(False) - - try: - # set_visible will not be effective if - # minor axis has NullLocator and NullFormattor (default) - import matplotlib.ticker as ticker - if isinstance(axis.get_minor_locator(), ticker.NullLocator): - axis.set_minor_locator(ticker.AutoLocator()) - if isinstance(axis.get_minor_formatter(), ticker.NullFormatter): - axis.set_minor_formatter(ticker.FormatStrFormatter('')) - for t in axis.get_minorticklabels(): - t.set_visible(False) - except Exception: # pragma no cover - raise - axis.get_label().set_visible(False) - - -def _handle_shared_axes(axarr, nplots, naxes, nrows, ncols, sharex, sharey): - if nplots > 1: - - if nrows > 1: - try: - # first find out the ax layout, - # so that we can correctly handle 'gaps" - layout = np.zeros((nrows + 1, ncols + 1), dtype=np.bool) - for ax in axarr: - layout[ax.rowNum, ax.colNum] = ax.get_visible() - - for ax in axarr: - # only the last row of subplots should get x labels -> all - # other off layout handles the case that the subplot is - # the last in the column, because below is no subplot/gap. - if not layout[ax.rowNum + 1, ax.colNum]: - continue - if sharex or len(ax.get_shared_x_axes() - .get_siblings(ax)) > 1: - _remove_labels_from_axis(ax.xaxis) - - except IndexError: - # if gridspec is used, ax.rowNum and ax.colNum may different - # from layout shape. in this case, use last_row logic - for ax in axarr: - if ax.is_last_row(): - continue - if sharex or len(ax.get_shared_x_axes() - .get_siblings(ax)) > 1: - _remove_labels_from_axis(ax.xaxis) - - if ncols > 1: - for ax in axarr: - # only the first column should get y labels -> set all other to - # off as we only have labels in teh first column and we always - # have a subplot there, we can skip the layout test - if ax.is_first_col(): - continue - if sharey or len(ax.get_shared_y_axes().get_siblings(ax)) > 1: - _remove_labels_from_axis(ax.yaxis) - - -def _flatten(axes): - if not is_list_like(axes): - return np.array([axes]) - elif isinstance(axes, (np.ndarray, Index)): - return axes.ravel() - return np.array(axes) - - -def _get_all_lines(ax): - lines = ax.get_lines() - - if hasattr(ax, 'right_ax'): - lines += ax.right_ax.get_lines() - - if hasattr(ax, 'left_ax'): - lines += ax.left_ax.get_lines() - - return lines - - -def _get_xlim(lines): - left, right = np.inf, -np.inf - for l in lines: - x = l.get_xdata(orig=False) - left = min(x[0], left) - right = max(x[-1], right) - return left, right - - -def _set_ticks_props(axes, xlabelsize=None, xrot=None, - ylabelsize=None, yrot=None): - import matplotlib.pyplot as plt - - for ax in _flatten(axes): - if xlabelsize is not None: - plt.setp(ax.get_xticklabels(), fontsize=xlabelsize) - if xrot is not None: - plt.setp(ax.get_xticklabels(), rotation=xrot) - if ylabelsize is not None: - plt.setp(ax.get_yticklabels(), fontsize=ylabelsize) - if yrot is not None: - plt.setp(ax.get_yticklabels(), rotation=yrot) - return axes - - -class BasePlotMethods(PandasObject): - - def __init__(self, data): - self._data = data - - def __call__(self, *args, **kwargs): - raise NotImplementedError - - -class SeriesPlotMethods(BasePlotMethods): - """Series plotting accessor and method - - Examples - -------- - >>> s.plot.line() - >>> s.plot.bar() - >>> s.plot.hist() - - Plotting methods can also be accessed by calling the accessor as a method - with the ``kind`` argument: - ``s.plot(kind='line')`` is equivalent to ``s.plot.line()`` - """ - - def __call__(self, kind='line', ax=None, - figsize=None, use_index=True, title=None, grid=None, - legend=False, style=None, logx=False, logy=False, - loglog=False, xticks=None, yticks=None, - xlim=None, ylim=None, - rot=None, fontsize=None, colormap=None, table=False, - yerr=None, xerr=None, - label=None, secondary_y=False, **kwds): - return plot_series(self._data, kind=kind, ax=ax, figsize=figsize, - use_index=use_index, title=title, grid=grid, - legend=legend, style=style, logx=logx, logy=logy, - loglog=loglog, xticks=xticks, yticks=yticks, - xlim=xlim, ylim=ylim, rot=rot, fontsize=fontsize, - colormap=colormap, table=table, yerr=yerr, - xerr=xerr, label=label, secondary_y=secondary_y, - **kwds) - __call__.__doc__ = plot_series.__doc__ - - def line(self, **kwds): - """ - Line plot - - .. versionadded:: 0.17.0 - - Parameters - ---------- - **kwds : optional - Keyword arguments to pass on to :py:meth:`pandas.Series.plot`. - - Returns - ------- - axes : matplotlib.AxesSubplot or np.array of them - """ - return self(kind='line', **kwds) - - def bar(self, **kwds): - """ - Vertical bar plot - - .. versionadded:: 0.17.0 - - Parameters - ---------- - **kwds : optional - Keyword arguments to pass on to :py:meth:`pandas.Series.plot`. - - Returns - ------- - axes : matplotlib.AxesSubplot or np.array of them - """ - return self(kind='bar', **kwds) - - def barh(self, **kwds): - """ - Horizontal bar plot - - .. versionadded:: 0.17.0 - - Parameters - ---------- - **kwds : optional - Keyword arguments to pass on to :py:meth:`pandas.Series.plot`. - - Returns - ------- - axes : matplotlib.AxesSubplot or np.array of them - """ - return self(kind='barh', **kwds) - - def box(self, **kwds): - """ - Boxplot - - .. versionadded:: 0.17.0 - - Parameters - ---------- - **kwds : optional - Keyword arguments to pass on to :py:meth:`pandas.Series.plot`. - - Returns - ------- - axes : matplotlib.AxesSubplot or np.array of them - """ - return self(kind='box', **kwds) - - def hist(self, bins=10, **kwds): - """ - Histogram - - .. versionadded:: 0.17.0 - - Parameters - ---------- - bins: integer, default 10 - Number of histogram bins to be used - **kwds : optional - Keyword arguments to pass on to :py:meth:`pandas.Series.plot`. - - Returns - ------- - axes : matplotlib.AxesSubplot or np.array of them - """ - return self(kind='hist', bins=bins, **kwds) - - def kde(self, **kwds): - """ - Kernel Density Estimate plot - - .. versionadded:: 0.17.0 - - Parameters - ---------- - **kwds : optional - Keyword arguments to pass on to :py:meth:`pandas.Series.plot`. - - Returns - ------- - axes : matplotlib.AxesSubplot or np.array of them - """ - return self(kind='kde', **kwds) - - density = kde - - def area(self, **kwds): - """ - Area plot - - .. versionadded:: 0.17.0 - - Parameters - ---------- - **kwds : optional - Keyword arguments to pass on to :py:meth:`pandas.Series.plot`. - - Returns - ------- - axes : matplotlib.AxesSubplot or np.array of them - """ - return self(kind='area', **kwds) - - def pie(self, **kwds): - """ - Pie chart - - .. versionadded:: 0.17.0 - - Parameters - ---------- - **kwds : optional - Keyword arguments to pass on to :py:meth:`pandas.Series.plot`. - - Returns - ------- - axes : matplotlib.AxesSubplot or np.array of them - """ - return self(kind='pie', **kwds) - - -class FramePlotMethods(BasePlotMethods): - """DataFrame plotting accessor and method - - Examples - -------- - >>> df.plot.line() - >>> df.plot.scatter('x', 'y') - >>> df.plot.hexbin() - - These plotting methods can also be accessed by calling the accessor as a - method with the ``kind`` argument: - ``df.plot(kind='line')`` is equivalent to ``df.plot.line()`` - """ - - def __call__(self, x=None, y=None, kind='line', ax=None, - subplots=False, sharex=None, sharey=False, layout=None, - figsize=None, use_index=True, title=None, grid=None, - legend=True, style=None, logx=False, logy=False, loglog=False, - xticks=None, yticks=None, xlim=None, ylim=None, - rot=None, fontsize=None, colormap=None, table=False, - yerr=None, xerr=None, - secondary_y=False, sort_columns=False, **kwds): - return plot_frame(self._data, kind=kind, x=x, y=y, ax=ax, - subplots=subplots, sharex=sharex, sharey=sharey, - layout=layout, figsize=figsize, use_index=use_index, - title=title, grid=grid, legend=legend, style=style, - logx=logx, logy=logy, loglog=loglog, xticks=xticks, - yticks=yticks, xlim=xlim, ylim=ylim, rot=rot, - fontsize=fontsize, colormap=colormap, table=table, - yerr=yerr, xerr=xerr, secondary_y=secondary_y, - sort_columns=sort_columns, **kwds) - __call__.__doc__ = plot_frame.__doc__ - - def line(self, x=None, y=None, **kwds): - """ - Line plot - - .. versionadded:: 0.17.0 - - Parameters - ---------- - x, y : label or position, optional - Coordinates for each point. - **kwds : optional - Keyword arguments to pass on to :py:meth:`pandas.DataFrame.plot`. - - Returns - ------- - axes : matplotlib.AxesSubplot or np.array of them - """ - return self(kind='line', x=x, y=y, **kwds) - - def bar(self, x=None, y=None, **kwds): - """ - Vertical bar plot - - .. versionadded:: 0.17.0 - - Parameters - ---------- - x, y : label or position, optional - Coordinates for each point. - **kwds : optional - Keyword arguments to pass on to :py:meth:`pandas.DataFrame.plot`. - - Returns - ------- - axes : matplotlib.AxesSubplot or np.array of them - """ - return self(kind='bar', x=x, y=y, **kwds) - - def barh(self, x=None, y=None, **kwds): - """ - Horizontal bar plot - - .. versionadded:: 0.17.0 - - Parameters - ---------- - x, y : label or position, optional - Coordinates for each point. - **kwds : optional - Keyword arguments to pass on to :py:meth:`pandas.DataFrame.plot`. - - Returns - ------- - axes : matplotlib.AxesSubplot or np.array of them - """ - return self(kind='barh', x=x, y=y, **kwds) - - def box(self, by=None, **kwds): - """ - Boxplot - - .. versionadded:: 0.17.0 - - Parameters - ---------- - by : string or sequence - Column in the DataFrame to group by. - \*\*kwds : optional - Keyword arguments to pass on to :py:meth:`pandas.DataFrame.plot`. - - Returns - ------- - axes : matplotlib.AxesSubplot or np.array of them - """ - return self(kind='box', by=by, **kwds) - - def hist(self, by=None, bins=10, **kwds): - """ - Histogram - - .. versionadded:: 0.17.0 - - Parameters - ---------- - by : string or sequence - Column in the DataFrame to group by. - bins: integer, default 10 - Number of histogram bins to be used - **kwds : optional - Keyword arguments to pass on to :py:meth:`pandas.DataFrame.plot`. - - Returns - ------- - axes : matplotlib.AxesSubplot or np.array of them - """ - return self(kind='hist', by=by, bins=bins, **kwds) - - def kde(self, **kwds): - """ - Kernel Density Estimate plot - - .. versionadded:: 0.17.0 - - Parameters - ---------- - **kwds : optional - Keyword arguments to pass on to :py:meth:`pandas.DataFrame.plot`. - - Returns - ------- - axes : matplotlib.AxesSubplot or np.array of them - """ - return self(kind='kde', **kwds) - - density = kde - - def area(self, x=None, y=None, **kwds): - """ - Area plot - - .. versionadded:: 0.17.0 - - Parameters - ---------- - x, y : label or position, optional - Coordinates for each point. - **kwds : optional - Keyword arguments to pass on to :py:meth:`pandas.DataFrame.plot`. - - Returns - ------- - axes : matplotlib.AxesSubplot or np.array of them - """ - return self(kind='area', x=x, y=y, **kwds) - - def pie(self, y=None, **kwds): - """ - Pie chart - - .. versionadded:: 0.17.0 - - Parameters - ---------- - y : label or position, optional - Column to plot. - **kwds : optional - Keyword arguments to pass on to :py:meth:`pandas.DataFrame.plot`. - - Returns - ------- - axes : matplotlib.AxesSubplot or np.array of them - """ - return self(kind='pie', y=y, **kwds) - - def scatter(self, x, y, s=None, c=None, **kwds): - """ - Scatter plot - - .. versionadded:: 0.17.0 - - Parameters - ---------- - x, y : label or position, optional - Coordinates for each point. - s : scalar or array_like, optional - Size of each point. - c : label or position, optional - Color of each point. - **kwds : optional - Keyword arguments to pass on to :py:meth:`pandas.DataFrame.plot`. - Returns - ------- - axes : matplotlib.AxesSubplot or np.array of them - """ - return self(kind='scatter', x=x, y=y, c=c, s=s, **kwds) +import pandas.plotting as _plotting - def hexbin(self, x, y, C=None, reduce_C_function=None, gridsize=None, - **kwds): - """ - Hexbin plot +# back-compat of public API +# deprecate these functions +m = sys.modules['pandas.tools.plotting'] +for t in [t for t in dir(_plotting) if not t.startswith('_')]: - .. versionadded:: 0.17.0 + def outer(t=t): - Parameters - ---------- - x, y : label or position, optional - Coordinates for each point. - C : label or position, optional - The value at each `(x, y)` point. - reduce_C_function : callable, optional - Function of one argument that reduces all the values in a bin to - a single number (e.g. `mean`, `max`, `sum`, `std`). - gridsize : int, optional - Number of bins. - **kwds : optional - Keyword arguments to pass on to :py:meth:`pandas.DataFrame.plot`. + def wrapper(*args, **kwargs): + warnings.warn("'pandas.tools.plotting.{t}' is deprecated, " + "import 'pandas.plotting.{t}' instead.".format(t=t), + FutureWarning, stacklevel=2) + return getattr(_plotting, t)(*args, **kwargs) + return wrapper - Returns - ------- - axes : matplotlib.AxesSubplot or np.array of them - """ - if reduce_C_function is not None: - kwds['reduce_C_function'] = reduce_C_function - if gridsize is not None: - kwds['gridsize'] = gridsize - return self(kind='hexbin', x=x, y=y, C=C, **kwds) + setattr(m, t, outer(t)) diff --git a/pandas/tseries/converter.py b/pandas/tseries/converter.py index bc768a8bc5b58..df603c4d880d8 100644 --- a/pandas/tseries/converter.py +++ b/pandas/tseries/converter.py @@ -1,1032 +1,11 @@ -from datetime import datetime, timedelta -import datetime as pydt -import numpy as np - -from dateutil.relativedelta import relativedelta - -import matplotlib.units as units -import matplotlib.dates as dates - -from matplotlib.ticker import Formatter, AutoLocator, Locator -from matplotlib.transforms import nonsingular - - -from pandas.types.common import (is_float, is_integer, - is_integer_dtype, - is_float_dtype, - is_datetime64_ns_dtype, - is_period_arraylike, - ) - -from pandas.compat import lrange -import pandas.compat as compat -import pandas._libs.lib as lib -import pandas.core.common as com -from pandas.core.index import Index - -from pandas.core.series import Series -from pandas.tseries.index import date_range -import pandas.tseries.tools as tools -import pandas.tseries.frequencies as frequencies -from pandas.tseries.frequencies import FreqGroup -from pandas.tseries.period import Period, PeriodIndex - -# constants -HOURS_PER_DAY = 24. -MIN_PER_HOUR = 60. -SEC_PER_MIN = 60. - -SEC_PER_HOUR = SEC_PER_MIN * MIN_PER_HOUR -SEC_PER_DAY = SEC_PER_HOUR * HOURS_PER_DAY - -MUSEC_PER_DAY = 1e6 * SEC_PER_DAY - - -def _mpl_le_2_0_0(): - try: - import matplotlib - return matplotlib.compare_versions('2.0.0', matplotlib.__version__) - except ImportError: - return False - - -def register(): - units.registry[lib.Timestamp] = DatetimeConverter() - units.registry[Period] = PeriodConverter() - units.registry[pydt.datetime] = DatetimeConverter() - units.registry[pydt.date] = DatetimeConverter() - units.registry[pydt.time] = TimeConverter() - units.registry[np.datetime64] = DatetimeConverter() - - -def _to_ordinalf(tm): - tot_sec = (tm.hour * 3600 + tm.minute * 60 + tm.second + - float(tm.microsecond / 1e6)) - return tot_sec - - -def time2num(d): - if isinstance(d, compat.string_types): - parsed = tools.to_datetime(d) - if not isinstance(parsed, datetime): - raise ValueError('Could not parse time %s' % d) - return _to_ordinalf(parsed.time()) - if isinstance(d, pydt.time): - return _to_ordinalf(d) - return d - - -class TimeConverter(units.ConversionInterface): - - @staticmethod - def convert(value, unit, axis): - valid_types = (str, pydt.time) - if (isinstance(value, valid_types) or is_integer(value) or - is_float(value)): - return time2num(value) - if isinstance(value, Index): - return value.map(time2num) - if isinstance(value, (list, tuple, np.ndarray, Index)): - return [time2num(x) for x in value] - return value - - @staticmethod - def axisinfo(unit, axis): - if unit != 'time': - return None - - majloc = AutoLocator() - majfmt = TimeFormatter(majloc) - return units.AxisInfo(majloc=majloc, majfmt=majfmt, label='time') - - @staticmethod - def default_units(x, axis): - return 'time' - - -# time formatter -class TimeFormatter(Formatter): - - def __init__(self, locs): - self.locs = locs - - def __call__(self, x, pos=0): - fmt = '%H:%M:%S' - s = int(x) - ms = int((x - s) * 1e3) - us = int((x - s) * 1e6 - ms) - m, s = divmod(s, 60) - h, m = divmod(m, 60) - _, h = divmod(h, 24) - if us != 0: - fmt += '.%6f' - elif ms != 0: - fmt += '.%3f' - - return pydt.time(h, m, s, us).strftime(fmt) - - -# Period Conversion - - -class PeriodConverter(dates.DateConverter): - - @staticmethod - def convert(values, units, axis): - if not hasattr(axis, 'freq'): - raise TypeError('Axis must have `freq` set to convert to Periods') - valid_types = (compat.string_types, datetime, - Period, pydt.date, pydt.time) - if (isinstance(values, valid_types) or is_integer(values) or - is_float(values)): - return get_datevalue(values, axis.freq) - if isinstance(values, PeriodIndex): - return values.asfreq(axis.freq)._values - if isinstance(values, Index): - return values.map(lambda x: get_datevalue(x, axis.freq)) - if is_period_arraylike(values): - return PeriodIndex(values, freq=axis.freq)._values - if isinstance(values, (list, tuple, np.ndarray, Index)): - return [get_datevalue(x, axis.freq) for x in values] - return values - - -def get_datevalue(date, freq): - if isinstance(date, Period): - return date.asfreq(freq).ordinal - elif isinstance(date, (compat.string_types, datetime, - pydt.date, pydt.time)): - return Period(date, freq).ordinal - elif (is_integer(date) or is_float(date) or - (isinstance(date, (np.ndarray, Index)) and (date.size == 1))): - return date - elif date is None: - return None - raise ValueError("Unrecognizable date '%s'" % date) - - -def _dt_to_float_ordinal(dt): - """ - Convert :mod:`datetime` to the Gregorian date as UTC float days, - preserving hours, minutes, seconds and microseconds. Return value - is a :func:`float`. - """ - if (isinstance(dt, (np.ndarray, Index, Series) - ) and is_datetime64_ns_dtype(dt)): - base = dates.epoch2num(dt.asi8 / 1.0E9) - else: - base = dates.date2num(dt) - return base - - -# Datetime Conversion -class DatetimeConverter(dates.DateConverter): - - @staticmethod - def convert(values, unit, axis): - def try_parse(values): - try: - return _dt_to_float_ordinal(tools.to_datetime(values)) - except Exception: - return values - - if isinstance(values, (datetime, pydt.date)): - return _dt_to_float_ordinal(values) - elif isinstance(values, np.datetime64): - return _dt_to_float_ordinal(lib.Timestamp(values)) - elif isinstance(values, pydt.time): - return dates.date2num(values) - elif (is_integer(values) or is_float(values)): - return values - elif isinstance(values, compat.string_types): - return try_parse(values) - elif isinstance(values, (list, tuple, np.ndarray, Index)): - if isinstance(values, Index): - values = values.values - if not isinstance(values, np.ndarray): - values = com._asarray_tuplesafe(values) - - if is_integer_dtype(values) or is_float_dtype(values): - return values - - try: - values = tools.to_datetime(values) - if isinstance(values, Index): - values = _dt_to_float_ordinal(values) - else: - values = [_dt_to_float_ordinal(x) for x in values] - except Exception: - values = _dt_to_float_ordinal(values) - - return values - - @staticmethod - def axisinfo(unit, axis): - """ - Return the :class:`~matplotlib.units.AxisInfo` for *unit*. - - *unit* is a tzinfo instance or None. - The *axis* argument is required but not used. - """ - tz = unit - - majloc = PandasAutoDateLocator(tz=tz) - majfmt = PandasAutoDateFormatter(majloc, tz=tz) - datemin = pydt.date(2000, 1, 1) - datemax = pydt.date(2010, 1, 1) - - return units.AxisInfo(majloc=majloc, majfmt=majfmt, label='', - default_limits=(datemin, datemax)) - - -class PandasAutoDateFormatter(dates.AutoDateFormatter): - - def __init__(self, locator, tz=None, defaultfmt='%Y-%m-%d'): - dates.AutoDateFormatter.__init__(self, locator, tz, defaultfmt) - # matplotlib.dates._UTC has no _utcoffset called by pandas - if self._tz is dates.UTC: - self._tz._utcoffset = self._tz.utcoffset(None) - - # For mpl > 2.0 the format strings are controlled via rcparams - # so do not mess with them. For mpl < 2.0 change the second - # break point and add a musec break point - if _mpl_le_2_0_0(): - self.scaled[1. / SEC_PER_DAY] = '%H:%M:%S' - self.scaled[1. / MUSEC_PER_DAY] = '%H:%M:%S.%f' - - -class PandasAutoDateLocator(dates.AutoDateLocator): - - def get_locator(self, dmin, dmax): - 'Pick the best locator based on a distance.' - delta = relativedelta(dmax, dmin) - - num_days = (delta.years * 12.0 + delta.months) * 31.0 + delta.days - num_sec = (delta.hours * 60.0 + delta.minutes) * 60.0 + delta.seconds - tot_sec = num_days * 86400. + num_sec - - if abs(tot_sec) < self.minticks: - self._freq = -1 - locator = MilliSecondLocator(self.tz) - locator.set_axis(self.axis) - - locator.set_view_interval(*self.axis.get_view_interval()) - locator.set_data_interval(*self.axis.get_data_interval()) - return locator - - return dates.AutoDateLocator.get_locator(self, dmin, dmax) - - def _get_unit(self): - return MilliSecondLocator.get_unit_generic(self._freq) - - -class MilliSecondLocator(dates.DateLocator): - - UNIT = 1. / (24 * 3600 * 1000) - - def __init__(self, tz): - dates.DateLocator.__init__(self, tz) - self._interval = 1. - - def _get_unit(self): - return self.get_unit_generic(-1) - - @staticmethod - def get_unit_generic(freq): - unit = dates.RRuleLocator.get_unit_generic(freq) - if unit < 0: - return MilliSecondLocator.UNIT - return unit - - def __call__(self): - # if no data have been set, this will tank with a ValueError - try: - dmin, dmax = self.viewlim_to_dt() - except ValueError: - return [] - - if dmin > dmax: - dmax, dmin = dmin, dmax - # We need to cap at the endpoints of valid datetime - - # TODO(wesm) unused? - # delta = relativedelta(dmax, dmin) - # try: - # start = dmin - delta - # except ValueError: - # start = _from_ordinal(1.0) - - # try: - # stop = dmax + delta - # except ValueError: - # # The magic number! - # stop = _from_ordinal(3652059.9999999) - - nmax, nmin = dates.date2num((dmax, dmin)) - - num = (nmax - nmin) * 86400 * 1000 - max_millis_ticks = 6 - for interval in [1, 10, 50, 100, 200, 500]: - if num <= interval * (max_millis_ticks - 1): - self._interval = interval - break - else: - # We went through the whole loop without breaking, default to 1 - self._interval = 1000. - - estimate = (nmax - nmin) / (self._get_unit() * self._get_interval()) - - if estimate > self.MAXTICKS * 2: - raise RuntimeError(('MillisecondLocator estimated to generate %d ' - 'ticks from %s to %s: exceeds Locator.MAXTICKS' - '* 2 (%d) ') % - (estimate, dmin, dmax, self.MAXTICKS * 2)) - - freq = '%dL' % self._get_interval() - tz = self.tz.tzname(None) - st = _from_ordinal(dates.date2num(dmin)) # strip tz - ed = _from_ordinal(dates.date2num(dmax)) - all_dates = date_range(start=st, end=ed, freq=freq, tz=tz).asobject - - try: - if len(all_dates) > 0: - locs = self.raise_if_exceeds(dates.date2num(all_dates)) - return locs - except Exception: # pragma: no cover - pass - - lims = dates.date2num([dmin, dmax]) - return lims - - def _get_interval(self): - return self._interval - - def autoscale(self): - """ - Set the view limits to include the data range. - """ - dmin, dmax = self.datalim_to_dt() - if dmin > dmax: - dmax, dmin = dmin, dmax - - # We need to cap at the endpoints of valid datetime - - # TODO(wesm): unused? - - # delta = relativedelta(dmax, dmin) - # try: - # start = dmin - delta - # except ValueError: - # start = _from_ordinal(1.0) - - # try: - # stop = dmax + delta - # except ValueError: - # # The magic number! - # stop = _from_ordinal(3652059.9999999) - - dmin, dmax = self.datalim_to_dt() - - vmin = dates.date2num(dmin) - vmax = dates.date2num(dmax) - - return self.nonsingular(vmin, vmax) - - -def _from_ordinal(x, tz=None): - ix = int(x) - dt = datetime.fromordinal(ix) - remainder = float(x) - ix - hour, remainder = divmod(24 * remainder, 1) - minute, remainder = divmod(60 * remainder, 1) - second, remainder = divmod(60 * remainder, 1) - microsecond = int(1e6 * remainder) - if microsecond < 10: - microsecond = 0 # compensate for rounding errors - dt = datetime(dt.year, dt.month, dt.day, int(hour), int(minute), - int(second), microsecond) - if tz is not None: - dt = dt.astimezone(tz) - - if microsecond > 999990: # compensate for rounding errors - dt += timedelta(microseconds=1e6 - microsecond) - - return dt - -# Fixed frequency dynamic tick locators and formatters - -# ------------------------------------------------------------------------- -# --- Locators --- -# ------------------------------------------------------------------------- - - -def _get_default_annual_spacing(nyears): - """ - Returns a default spacing between consecutive ticks for annual data. - """ - if nyears < 11: - (min_spacing, maj_spacing) = (1, 1) - elif nyears < 20: - (min_spacing, maj_spacing) = (1, 2) - elif nyears < 50: - (min_spacing, maj_spacing) = (1, 5) - elif nyears < 100: - (min_spacing, maj_spacing) = (5, 10) - elif nyears < 200: - (min_spacing, maj_spacing) = (5, 25) - elif nyears < 600: - (min_spacing, maj_spacing) = (10, 50) - else: - factor = nyears // 1000 + 1 - (min_spacing, maj_spacing) = (factor * 20, factor * 100) - return (min_spacing, maj_spacing) - - -def period_break(dates, period): - """ - Returns the indices where the given period changes. - - Parameters - ---------- - dates : PeriodIndex - Array of intervals to monitor. - period : string - Name of the period to monitor. - """ - current = getattr(dates, period) - previous = getattr(dates - 1, period) - return np.nonzero(current - previous)[0] - - -def has_level_label(label_flags, vmin): - """ - Returns true if the ``label_flags`` indicate there is at least one label - for this level. - - if the minimum view limit is not an exact integer, then the first tick - label won't be shown, so we must adjust for that. - """ - if label_flags.size == 0 or (label_flags.size == 1 and - label_flags[0] == 0 and - vmin % 1 > 0.0): - return False - else: - return True - - -def _daily_finder(vmin, vmax, freq): - periodsperday = -1 - - if freq >= FreqGroup.FR_HR: - if freq == FreqGroup.FR_NS: - periodsperday = 24 * 60 * 60 * 1000000000 - elif freq == FreqGroup.FR_US: - periodsperday = 24 * 60 * 60 * 1000000 - elif freq == FreqGroup.FR_MS: - periodsperday = 24 * 60 * 60 * 1000 - elif freq == FreqGroup.FR_SEC: - periodsperday = 24 * 60 * 60 - elif freq == FreqGroup.FR_MIN: - periodsperday = 24 * 60 - elif freq == FreqGroup.FR_HR: - periodsperday = 24 - else: # pragma: no cover - raise ValueError("unexpected frequency: %s" % freq) - periodsperyear = 365 * periodsperday - periodspermonth = 28 * periodsperday - - elif freq == FreqGroup.FR_BUS: - periodsperyear = 261 - periodspermonth = 19 - elif freq == FreqGroup.FR_DAY: - periodsperyear = 365 - periodspermonth = 28 - elif frequencies.get_freq_group(freq) == FreqGroup.FR_WK: - periodsperyear = 52 - periodspermonth = 3 - else: # pragma: no cover - raise ValueError("unexpected frequency") - - # save this for later usage - vmin_orig = vmin - - (vmin, vmax) = (Period(ordinal=int(vmin), freq=freq), - Period(ordinal=int(vmax), freq=freq)) - span = vmax.ordinal - vmin.ordinal + 1 - dates_ = PeriodIndex(start=vmin, end=vmax, freq=freq) - # Initialize the output - info = np.zeros(span, - dtype=[('val', np.int64), ('maj', bool), - ('min', bool), ('fmt', '|S20')]) - info['val'][:] = dates_._values - info['fmt'][:] = '' - info['maj'][[0, -1]] = True - # .. and set some shortcuts - info_maj = info['maj'] - info_min = info['min'] - info_fmt = info['fmt'] - - def first_label(label_flags): - if (label_flags[0] == 0) and (label_flags.size > 1) and \ - ((vmin_orig % 1) > 0.0): - return label_flags[1] - else: - return label_flags[0] - - # Case 1. Less than a month - if span <= periodspermonth: - day_start = period_break(dates_, 'day') - month_start = period_break(dates_, 'month') - - def _hour_finder(label_interval, force_year_start): - _hour = dates_.hour - _prev_hour = (dates_ - 1).hour - hour_start = (_hour - _prev_hour) != 0 - info_maj[day_start] = True - info_min[hour_start & (_hour % label_interval == 0)] = True - year_start = period_break(dates_, 'year') - info_fmt[hour_start & (_hour % label_interval == 0)] = '%H:%M' - info_fmt[day_start] = '%H:%M\n%d-%b' - info_fmt[year_start] = '%H:%M\n%d-%b\n%Y' - if force_year_start and not has_level_label(year_start, vmin_orig): - info_fmt[first_label(day_start)] = '%H:%M\n%d-%b\n%Y' - - def _minute_finder(label_interval): - hour_start = period_break(dates_, 'hour') - _minute = dates_.minute - _prev_minute = (dates_ - 1).minute - minute_start = (_minute - _prev_minute) != 0 - info_maj[hour_start] = True - info_min[minute_start & (_minute % label_interval == 0)] = True - year_start = period_break(dates_, 'year') - info_fmt = info['fmt'] - info_fmt[minute_start & (_minute % label_interval == 0)] = '%H:%M' - info_fmt[day_start] = '%H:%M\n%d-%b' - info_fmt[year_start] = '%H:%M\n%d-%b\n%Y' - - def _second_finder(label_interval): - minute_start = period_break(dates_, 'minute') - _second = dates_.second - _prev_second = (dates_ - 1).second - second_start = (_second - _prev_second) != 0 - info['maj'][minute_start] = True - info['min'][second_start & (_second % label_interval == 0)] = True - year_start = period_break(dates_, 'year') - info_fmt = info['fmt'] - info_fmt[second_start & (_second % - label_interval == 0)] = '%H:%M:%S' - info_fmt[day_start] = '%H:%M:%S\n%d-%b' - info_fmt[year_start] = '%H:%M:%S\n%d-%b\n%Y' - - if span < periodsperday / 12000.0: - _second_finder(1) - elif span < periodsperday / 6000.0: - _second_finder(2) - elif span < periodsperday / 2400.0: - _second_finder(5) - elif span < periodsperday / 1200.0: - _second_finder(10) - elif span < periodsperday / 800.0: - _second_finder(15) - elif span < periodsperday / 400.0: - _second_finder(30) - elif span < periodsperday / 150.0: - _minute_finder(1) - elif span < periodsperday / 70.0: - _minute_finder(2) - elif span < periodsperday / 24.0: - _minute_finder(5) - elif span < periodsperday / 12.0: - _minute_finder(15) - elif span < periodsperday / 6.0: - _minute_finder(30) - elif span < periodsperday / 2.5: - _hour_finder(1, False) - elif span < periodsperday / 1.5: - _hour_finder(2, False) - elif span < periodsperday * 1.25: - _hour_finder(3, False) - elif span < periodsperday * 2.5: - _hour_finder(6, True) - elif span < periodsperday * 4: - _hour_finder(12, True) - else: - info_maj[month_start] = True - info_min[day_start] = True - year_start = period_break(dates_, 'year') - info_fmt = info['fmt'] - info_fmt[day_start] = '%d' - info_fmt[month_start] = '%d\n%b' - info_fmt[year_start] = '%d\n%b\n%Y' - if not has_level_label(year_start, vmin_orig): - if not has_level_label(month_start, vmin_orig): - info_fmt[first_label(day_start)] = '%d\n%b\n%Y' - else: - info_fmt[first_label(month_start)] = '%d\n%b\n%Y' - - # Case 2. Less than three months - elif span <= periodsperyear // 4: - month_start = period_break(dates_, 'month') - info_maj[month_start] = True - if freq < FreqGroup.FR_HR: - info['min'] = True - else: - day_start = period_break(dates_, 'day') - info['min'][day_start] = True - week_start = period_break(dates_, 'week') - year_start = period_break(dates_, 'year') - info_fmt[week_start] = '%d' - info_fmt[month_start] = '\n\n%b' - info_fmt[year_start] = '\n\n%b\n%Y' - if not has_level_label(year_start, vmin_orig): - if not has_level_label(month_start, vmin_orig): - info_fmt[first_label(week_start)] = '\n\n%b\n%Y' - else: - info_fmt[first_label(month_start)] = '\n\n%b\n%Y' - # Case 3. Less than 14 months ............... - elif span <= 1.15 * periodsperyear: - year_start = period_break(dates_, 'year') - month_start = period_break(dates_, 'month') - week_start = period_break(dates_, 'week') - info_maj[month_start] = True - info_min[week_start] = True - info_min[year_start] = False - info_min[month_start] = False - info_fmt[month_start] = '%b' - info_fmt[year_start] = '%b\n%Y' - if not has_level_label(year_start, vmin_orig): - info_fmt[first_label(month_start)] = '%b\n%Y' - # Case 4. Less than 2.5 years ............... - elif span <= 2.5 * periodsperyear: - year_start = period_break(dates_, 'year') - quarter_start = period_break(dates_, 'quarter') - month_start = period_break(dates_, 'month') - info_maj[quarter_start] = True - info_min[month_start] = True - info_fmt[quarter_start] = '%b' - info_fmt[year_start] = '%b\n%Y' - # Case 4. Less than 4 years ................. - elif span <= 4 * periodsperyear: - year_start = period_break(dates_, 'year') - month_start = period_break(dates_, 'month') - info_maj[year_start] = True - info_min[month_start] = True - info_min[year_start] = False - - month_break = dates_[month_start].month - jan_or_jul = month_start[(month_break == 1) | (month_break == 7)] - info_fmt[jan_or_jul] = '%b' - info_fmt[year_start] = '%b\n%Y' - # Case 5. Less than 11 years ................ - elif span <= 11 * periodsperyear: - year_start = period_break(dates_, 'year') - quarter_start = period_break(dates_, 'quarter') - info_maj[year_start] = True - info_min[quarter_start] = True - info_min[year_start] = False - info_fmt[year_start] = '%Y' - # Case 6. More than 12 years ................ - else: - year_start = period_break(dates_, 'year') - year_break = dates_[year_start].year - nyears = span / periodsperyear - (min_anndef, maj_anndef) = _get_default_annual_spacing(nyears) - major_idx = year_start[(year_break % maj_anndef == 0)] - info_maj[major_idx] = True - minor_idx = year_start[(year_break % min_anndef == 0)] - info_min[minor_idx] = True - info_fmt[major_idx] = '%Y' - - return info - - -def _monthly_finder(vmin, vmax, freq): - periodsperyear = 12 - - vmin_orig = vmin - (vmin, vmax) = (int(vmin), int(vmax)) - span = vmax - vmin + 1 - - # Initialize the output - info = np.zeros(span, - dtype=[('val', int), ('maj', bool), ('min', bool), - ('fmt', '|S8')]) - info['val'] = np.arange(vmin, vmax + 1) - dates_ = info['val'] - info['fmt'] = '' - year_start = (dates_ % 12 == 0).nonzero()[0] - info_maj = info['maj'] - info_fmt = info['fmt'] - - if span <= 1.15 * periodsperyear: - info_maj[year_start] = True - info['min'] = True - - info_fmt[:] = '%b' - info_fmt[year_start] = '%b\n%Y' - - if not has_level_label(year_start, vmin_orig): - if dates_.size > 1: - idx = 1 - else: - idx = 0 - info_fmt[idx] = '%b\n%Y' - - elif span <= 2.5 * periodsperyear: - quarter_start = (dates_ % 3 == 0).nonzero() - info_maj[year_start] = True - # TODO: Check the following : is it really info['fmt'] ? - info['fmt'][quarter_start] = True - info['min'] = True - - info_fmt[quarter_start] = '%b' - info_fmt[year_start] = '%b\n%Y' - - elif span <= 4 * periodsperyear: - info_maj[year_start] = True - info['min'] = True - - jan_or_jul = (dates_ % 12 == 0) | (dates_ % 12 == 6) - info_fmt[jan_or_jul] = '%b' - info_fmt[year_start] = '%b\n%Y' - - elif span <= 11 * periodsperyear: - quarter_start = (dates_ % 3 == 0).nonzero() - info_maj[year_start] = True - info['min'][quarter_start] = True - - info_fmt[year_start] = '%Y' - - else: - nyears = span / periodsperyear - (min_anndef, maj_anndef) = _get_default_annual_spacing(nyears) - years = dates_[year_start] // 12 + 1 - major_idx = year_start[(years % maj_anndef == 0)] - info_maj[major_idx] = True - info['min'][year_start[(years % min_anndef == 0)]] = True - - info_fmt[major_idx] = '%Y' - - return info - - -def _quarterly_finder(vmin, vmax, freq): - periodsperyear = 4 - vmin_orig = vmin - (vmin, vmax) = (int(vmin), int(vmax)) - span = vmax - vmin + 1 - - info = np.zeros(span, - dtype=[('val', int), ('maj', bool), ('min', bool), - ('fmt', '|S8')]) - info['val'] = np.arange(vmin, vmax + 1) - info['fmt'] = '' - dates_ = info['val'] - info_maj = info['maj'] - info_fmt = info['fmt'] - year_start = (dates_ % 4 == 0).nonzero()[0] - - if span <= 3.5 * periodsperyear: - info_maj[year_start] = True - info['min'] = True - - info_fmt[:] = 'Q%q' - info_fmt[year_start] = 'Q%q\n%F' - if not has_level_label(year_start, vmin_orig): - if dates_.size > 1: - idx = 1 - else: - idx = 0 - info_fmt[idx] = 'Q%q\n%F' - - elif span <= 11 * periodsperyear: - info_maj[year_start] = True - info['min'] = True - info_fmt[year_start] = '%F' - - else: - years = dates_[year_start] // 4 + 1 - nyears = span / periodsperyear - (min_anndef, maj_anndef) = _get_default_annual_spacing(nyears) - major_idx = year_start[(years % maj_anndef == 0)] - info_maj[major_idx] = True - info['min'][year_start[(years % min_anndef == 0)]] = True - info_fmt[major_idx] = '%F' - - return info - - -def _annual_finder(vmin, vmax, freq): - (vmin, vmax) = (int(vmin), int(vmax + 1)) - span = vmax - vmin + 1 - - info = np.zeros(span, - dtype=[('val', int), ('maj', bool), ('min', bool), - ('fmt', '|S8')]) - info['val'] = np.arange(vmin, vmax + 1) - info['fmt'] = '' - dates_ = info['val'] - - (min_anndef, maj_anndef) = _get_default_annual_spacing(span) - major_idx = dates_ % maj_anndef == 0 - info['maj'][major_idx] = True - info['min'][(dates_ % min_anndef == 0)] = True - info['fmt'][major_idx] = '%Y' - - return info - - -def get_finder(freq): - if isinstance(freq, compat.string_types): - freq = frequencies.get_freq(freq) - fgroup = frequencies.get_freq_group(freq) - - if fgroup == FreqGroup.FR_ANN: - return _annual_finder - elif fgroup == FreqGroup.FR_QTR: - return _quarterly_finder - elif freq == FreqGroup.FR_MTH: - return _monthly_finder - elif ((freq >= FreqGroup.FR_BUS) or fgroup == FreqGroup.FR_WK): - return _daily_finder - else: # pragma: no cover - errmsg = "Unsupported frequency: %s" % (freq) - raise NotImplementedError(errmsg) - - -class TimeSeries_DateLocator(Locator): - """ - Locates the ticks along an axis controlled by a :class:`Series`. - - Parameters - ---------- - freq : {var} - Valid frequency specifier. - minor_locator : {False, True}, optional - Whether the locator is for minor ticks (True) or not. - dynamic_mode : {True, False}, optional - Whether the locator should work in dynamic mode. - base : {int}, optional - quarter : {int}, optional - month : {int}, optional - day : {int}, optional - """ - - def __init__(self, freq, minor_locator=False, dynamic_mode=True, - base=1, quarter=1, month=1, day=1, plot_obj=None): - if isinstance(freq, compat.string_types): - freq = frequencies.get_freq(freq) - self.freq = freq - self.base = base - (self.quarter, self.month, self.day) = (quarter, month, day) - self.isminor = minor_locator - self.isdynamic = dynamic_mode - self.offset = 0 - self.plot_obj = plot_obj - self.finder = get_finder(freq) - - def _get_default_locs(self, vmin, vmax): - "Returns the default locations of ticks." - - if self.plot_obj.date_axis_info is None: - self.plot_obj.date_axis_info = self.finder(vmin, vmax, self.freq) - - locator = self.plot_obj.date_axis_info - - if self.isminor: - return np.compress(locator['min'], locator['val']) - return np.compress(locator['maj'], locator['val']) - - def __call__(self): - 'Return the locations of the ticks.' - # axis calls Locator.set_axis inside set_m_formatter - vi = tuple(self.axis.get_view_interval()) - if vi != self.plot_obj.view_interval: - self.plot_obj.date_axis_info = None - self.plot_obj.view_interval = vi - vmin, vmax = vi - if vmax < vmin: - vmin, vmax = vmax, vmin - if self.isdynamic: - locs = self._get_default_locs(vmin, vmax) - else: # pragma: no cover - base = self.base - (d, m) = divmod(vmin, base) - vmin = (d + 1) * base - locs = lrange(vmin, vmax + 1, base) - return locs - - def autoscale(self): - """ - Sets the view limits to the nearest multiples of base that contain the - data. - """ - # requires matplotlib >= 0.98.0 - (vmin, vmax) = self.axis.get_data_interval() - - locs = self._get_default_locs(vmin, vmax) - (vmin, vmax) = locs[[0, -1]] - if vmin == vmax: - vmin -= 1 - vmax += 1 - return nonsingular(vmin, vmax) - -# ------------------------------------------------------------------------- -# --- Formatter --- -# ------------------------------------------------------------------------- - - -class TimeSeries_DateFormatter(Formatter): - """ - Formats the ticks along an axis controlled by a :class:`PeriodIndex`. - - Parameters - ---------- - freq : {int, string} - Valid frequency specifier. - minor_locator : {False, True} - Whether the current formatter should apply to minor ticks (True) or - major ticks (False). - dynamic_mode : {True, False} - Whether the formatter works in dynamic mode or not. - """ - - def __init__(self, freq, minor_locator=False, dynamic_mode=True, - plot_obj=None): - if isinstance(freq, compat.string_types): - freq = frequencies.get_freq(freq) - self.format = None - self.freq = freq - self.locs = [] - self.formatdict = None - self.isminor = minor_locator - self.isdynamic = dynamic_mode - self.offset = 0 - self.plot_obj = plot_obj - self.finder = get_finder(freq) - - def _set_default_format(self, vmin, vmax): - "Returns the default ticks spacing." - - if self.plot_obj.date_axis_info is None: - self.plot_obj.date_axis_info = self.finder(vmin, vmax, self.freq) - info = self.plot_obj.date_axis_info - - if self.isminor: - format = np.compress(info['min'] & np.logical_not(info['maj']), - info) - else: - format = np.compress(info['maj'], info) - self.formatdict = dict([(x, f) for (x, _, _, f) in format]) - return self.formatdict - - def set_locs(self, locs): - 'Sets the locations of the ticks' - # don't actually use the locs. This is just needed to work with - # matplotlib. Force to use vmin, vmax - self.locs = locs - - (vmin, vmax) = vi = tuple(self.axis.get_view_interval()) - if vi != self.plot_obj.view_interval: - self.plot_obj.date_axis_info = None - self.plot_obj.view_interval = vi - if vmax < vmin: - (vmin, vmax) = (vmax, vmin) - self._set_default_format(vmin, vmax) - - def __call__(self, x, pos=0): - if self.formatdict is None: - return '' - else: - fmt = self.formatdict.pop(x, '') - return Period(ordinal=int(x), freq=self.freq).strftime(fmt) - - -class TimeSeries_TimedeltaFormatter(Formatter): - """ - Formats the ticks along an axis controlled by a :class:`TimedeltaIndex`. - """ - - @staticmethod - def format_timedelta_ticks(x, pos, n_decimals): - """ - Convert seconds to 'D days HH:MM:SS.F' - """ - s, ns = divmod(x, 1e9) - m, s = divmod(s, 60) - h, m = divmod(m, 60) - d, h = divmod(h, 24) - decimals = int(ns * 10**(n_decimals - 9)) - s = r'{:02d}:{:02d}:{:02d}'.format(int(h), int(m), int(s)) - if n_decimals > 0: - s += '.{{:0{:0d}d}}'.format(n_decimals).format(decimals) - if d != 0: - s = '{:d} days '.format(int(d)) + s - return s - - def __call__(self, x, pos=0): - (vmin, vmax) = tuple(self.axis.get_view_interval()) - n_decimals = int(np.ceil(np.log10(100 * 1e9 / (vmax - vmin)))) - if n_decimals > 9: - n_decimals = 9 - return self.format_timedelta_ticks(x, pos, n_decimals) +# flake8: noqa + +from pandas.plotting._converter import (register, time2num, + TimeConverter, TimeFormatter, + PeriodConverter, get_datevalue, + DatetimeConverter, + PandasAutoDateFormatter, + PandasAutoDateLocator, + MilliSecondLocator, get_finder, + TimeSeries_DateLocator, + TimeSeries_DateFormatter) diff --git a/pandas/tseries/plotting.py b/pandas/tseries/plotting.py index 4eddf54701889..302016907635d 100644 --- a/pandas/tseries/plotting.py +++ b/pandas/tseries/plotting.py @@ -1,344 +1,3 @@ -""" -Period formatters and locators adapted from scikits.timeseries by -Pierre GF Gerard-Marchant & Matt Knox -""" +# flake8: noqa -# TODO: Use the fact that axis can have units to simplify the process - -import numpy as np - -from matplotlib import pylab -from pandas.tseries.period import Period -from pandas.tseries.offsets import DateOffset -import pandas.tseries.frequencies as frequencies -from pandas.tseries.index import DatetimeIndex -from pandas.tseries.period import PeriodIndex -from pandas.tseries.tdi import TimedeltaIndex -from pandas.formats.printing import pprint_thing -import pandas.compat as compat - -from pandas.tseries.converter import (TimeSeries_DateLocator, - TimeSeries_DateFormatter, - TimeSeries_TimedeltaFormatter) - -# --------------------------------------------------------------------- -# Plotting functions and monkey patches - - -def tsplot(series, plotf, ax=None, **kwargs): - """ - Plots a Series on the given Matplotlib axes or the current axes - - Parameters - ---------- - axes : Axes - series : Series - - Notes - _____ - Supports same kwargs as Axes.plot - - """ - # Used inferred freq is possible, need a test case for inferred - if ax is None: - import matplotlib.pyplot as plt - ax = plt.gca() - - freq, series = _maybe_resample(series, ax, kwargs) - - # Set ax with freq info - _decorate_axes(ax, freq, kwargs) - ax._plot_data.append((series, plotf, kwargs)) - lines = plotf(ax, series.index._mpl_repr(), series.values, **kwargs) - - # set date formatter, locators and rescale limits - format_dateaxis(ax, ax.freq, series.index) - return lines - - -def _maybe_resample(series, ax, kwargs): - # resample against axes freq if necessary - freq, ax_freq = _get_freq(ax, series) - - if freq is None: # pragma: no cover - raise ValueError('Cannot use dynamic axis without frequency info') - - # Convert DatetimeIndex to PeriodIndex - if isinstance(series.index, DatetimeIndex): - series = series.to_period(freq=freq) - - if ax_freq is not None and freq != ax_freq: - if frequencies.is_superperiod(freq, ax_freq): # upsample input - series = series.copy() - series.index = series.index.asfreq(ax_freq, how='s') - freq = ax_freq - elif _is_sup(freq, ax_freq): # one is weekly - how = kwargs.pop('how', 'last') - series = getattr(series.resample('D'), how)().dropna() - series = getattr(series.resample(ax_freq), how)().dropna() - freq = ax_freq - elif frequencies.is_subperiod(freq, ax_freq) or _is_sub(freq, ax_freq): - _upsample_others(ax, freq, kwargs) - ax_freq = freq - else: # pragma: no cover - raise ValueError('Incompatible frequency conversion') - return freq, series - - -def _is_sub(f1, f2): - return ((f1.startswith('W') and frequencies.is_subperiod('D', f2)) or - (f2.startswith('W') and frequencies.is_subperiod(f1, 'D'))) - - -def _is_sup(f1, f2): - return ((f1.startswith('W') and frequencies.is_superperiod('D', f2)) or - (f2.startswith('W') and frequencies.is_superperiod(f1, 'D'))) - - -def _upsample_others(ax, freq, kwargs): - legend = ax.get_legend() - lines, labels = _replot_ax(ax, freq, kwargs) - _replot_ax(ax, freq, kwargs) - - other_ax = None - if hasattr(ax, 'left_ax'): - other_ax = ax.left_ax - if hasattr(ax, 'right_ax'): - other_ax = ax.right_ax - - if other_ax is not None: - rlines, rlabels = _replot_ax(other_ax, freq, kwargs) - lines.extend(rlines) - labels.extend(rlabels) - - if (legend is not None and kwargs.get('legend', True) and - len(lines) > 0): - title = legend.get_title().get_text() - if title == 'None': - title = None - ax.legend(lines, labels, loc='best', title=title) - - -def _replot_ax(ax, freq, kwargs): - data = getattr(ax, '_plot_data', None) - - # clear current axes and data - ax._plot_data = [] - ax.clear() - - _decorate_axes(ax, freq, kwargs) - - lines = [] - labels = [] - if data is not None: - for series, plotf, kwds in data: - series = series.copy() - idx = series.index.asfreq(freq, how='S') - series.index = idx - ax._plot_data.append((series, plotf, kwds)) - - # for tsplot - if isinstance(plotf, compat.string_types): - from pandas.tools.plotting import _plot_klass - plotf = _plot_klass[plotf]._plot - - lines.append(plotf(ax, series.index._mpl_repr(), - series.values, **kwds)[0]) - labels.append(pprint_thing(series.name)) - - return lines, labels - - -def _decorate_axes(ax, freq, kwargs): - """Initialize axes for time-series plotting""" - if not hasattr(ax, '_plot_data'): - ax._plot_data = [] - - ax.freq = freq - xaxis = ax.get_xaxis() - xaxis.freq = freq - if not hasattr(ax, 'legendlabels'): - ax.legendlabels = [kwargs.get('label', None)] - else: - ax.legendlabels.append(kwargs.get('label', None)) - ax.view_interval = None - ax.date_axis_info = None - - -def _get_ax_freq(ax): - """ - Get the freq attribute of the ax object if set. - Also checks shared axes (eg when using secondary yaxis, sharex=True - or twinx) - """ - ax_freq = getattr(ax, 'freq', None) - if ax_freq is None: - # check for left/right ax in case of secondary yaxis - if hasattr(ax, 'left_ax'): - ax_freq = getattr(ax.left_ax, 'freq', None) - elif hasattr(ax, 'right_ax'): - ax_freq = getattr(ax.right_ax, 'freq', None) - if ax_freq is None: - # check if a shared ax (sharex/twinx) has already freq set - shared_axes = ax.get_shared_x_axes().get_siblings(ax) - if len(shared_axes) > 1: - for shared_ax in shared_axes: - ax_freq = getattr(shared_ax, 'freq', None) - if ax_freq is not None: - break - return ax_freq - - -def _get_freq(ax, series): - # get frequency from data - freq = getattr(series.index, 'freq', None) - if freq is None: - freq = getattr(series.index, 'inferred_freq', None) - - ax_freq = _get_ax_freq(ax) - - # use axes freq if no data freq - if freq is None: - freq = ax_freq - - # get the period frequency - if isinstance(freq, DateOffset): - freq = freq.rule_code - else: - freq = frequencies.get_base_alias(freq) - - freq = frequencies.get_period_alias(freq) - return freq, ax_freq - - -def _use_dynamic_x(ax, data): - freq = _get_index_freq(data) - ax_freq = _get_ax_freq(ax) - - if freq is None: # convert irregular if axes has freq info - freq = ax_freq - else: # do not use tsplot if irregular was plotted first - if (ax_freq is None) and (len(ax.get_lines()) > 0): - return False - - if freq is None: - return False - - if isinstance(freq, DateOffset): - freq = freq.rule_code - else: - freq = frequencies.get_base_alias(freq) - freq = frequencies.get_period_alias(freq) - - if freq is None: - return False - - # hack this for 0.10.1, creating more technical debt...sigh - if isinstance(data.index, DatetimeIndex): - base = frequencies.get_freq(freq) - x = data.index - if (base <= frequencies.FreqGroup.FR_DAY): - return x[:1].is_normalized - return Period(x[0], freq).to_timestamp(tz=x.tz) == x[0] - return True - - -def _get_index_freq(data): - freq = getattr(data.index, 'freq', None) - if freq is None: - freq = getattr(data.index, 'inferred_freq', None) - if freq == 'B': - weekdays = np.unique(data.index.dayofweek) - if (5 in weekdays) or (6 in weekdays): - freq = None - return freq - - -def _maybe_convert_index(ax, data): - # tsplot converts automatically, but don't want to convert index - # over and over for DataFrames - if isinstance(data.index, DatetimeIndex): - freq = getattr(data.index, 'freq', None) - - if freq is None: - freq = getattr(data.index, 'inferred_freq', None) - if isinstance(freq, DateOffset): - freq = freq.rule_code - - if freq is None: - freq = _get_ax_freq(ax) - - if freq is None: - raise ValueError('Could not get frequency alias for plotting') - - freq = frequencies.get_base_alias(freq) - freq = frequencies.get_period_alias(freq) - - data = data.to_period(freq=freq) - return data - - -# Patch methods for subplot. Only format_dateaxis is currently used. -# Do we need the rest for convenience? - -def format_timedelta_ticks(x, pos, n_decimals): - """ - Convert seconds to 'D days HH:MM:SS.F' - """ - s, ns = divmod(x, 1e9) - m, s = divmod(s, 60) - h, m = divmod(m, 60) - d, h = divmod(h, 24) - decimals = int(ns * 10**(n_decimals - 9)) - s = r'{:02d}:{:02d}:{:02d}'.format(int(h), int(m), int(s)) - if n_decimals > 0: - s += '.{{:0{:0d}d}}'.format(n_decimals).format(decimals) - if d != 0: - s = '{:d} days '.format(int(d)) + s - return s - - -def format_dateaxis(subplot, freq, index): - """ - Pretty-formats the date axis (x-axis). - - Major and minor ticks are automatically set for the frequency of the - current underlying series. As the dynamic mode is activated by - default, changing the limits of the x axis will intelligently change - the positions of the ticks. - """ - - # handle index specific formatting - # Note: DatetimeIndex does not use this - # interface. DatetimeIndex uses matplotlib.date directly - if isinstance(index, PeriodIndex): - - majlocator = TimeSeries_DateLocator(freq, dynamic_mode=True, - minor_locator=False, - plot_obj=subplot) - minlocator = TimeSeries_DateLocator(freq, dynamic_mode=True, - minor_locator=True, - plot_obj=subplot) - subplot.xaxis.set_major_locator(majlocator) - subplot.xaxis.set_minor_locator(minlocator) - - majformatter = TimeSeries_DateFormatter(freq, dynamic_mode=True, - minor_locator=False, - plot_obj=subplot) - minformatter = TimeSeries_DateFormatter(freq, dynamic_mode=True, - minor_locator=True, - plot_obj=subplot) - subplot.xaxis.set_major_formatter(majformatter) - subplot.xaxis.set_minor_formatter(minformatter) - - # x and y coord info - subplot.format_coord = lambda t, y: ( - "t = {0} y = {1:8f}".format(Period(ordinal=int(t), freq=freq), y)) - - elif isinstance(index, TimedeltaIndex): - subplot.xaxis.set_major_formatter( - TimeSeries_TimedeltaFormatter()) - else: - raise TypeError('index type not supported') - - pylab.draw_if_interactive() +from pandas.plotting._timeseries import tsplot diff --git a/pandas/util/doctools.py b/pandas/util/doctools.py index 6df6444aeafab..cbc9518b96416 100644 --- a/pandas/util/doctools.py +++ b/pandas/util/doctools.py @@ -131,7 +131,7 @@ def _make_table(self, ax, df, title, height=None): ax.set_visible(False) return - import pandas.tools.plotting as plotting + import pandas.plotting as plotting idx_nlevels = df.index.nlevels col_nlevels = df.columns.nlevels diff --git a/setup.py b/setup.py index d8ee52f9b4f43..d76c6fa508008 100755 --- a/setup.py +++ b/setup.py @@ -649,6 +649,7 @@ def pxd(name): 'pandas.io.msgpack', 'pandas._libs', 'pandas.formats', + 'pandas.plotting', 'pandas.sparse', 'pandas.stats', 'pandas.util', From 005a09e971a8afb26acdd408a025f71a9951f52b Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sat, 15 Apr 2017 19:53:31 -0400 Subject: [PATCH 396/933] CLN: move top-level dirs xref #13634 pandas.types -> pandas.core.dtypes pandas.computation -> pandas.core.computation pandas.sparse -> pandas.core.sparse Author: Jeff Reback Closes #15998 from jreback/move_types and squashes the following commits: 5fe80ae [Jeff Reback] move fixtures to top-level conftest 9d36104 [Jeff Reback] moar ci d165a3f [Jeff Reback] more verbose built test 3ca5ba3 [Jeff Reback] pep f1505d7 [Jeff Reback] try with pyargs f63b76c [Jeff Reback] CLN: pandas.sparse -> pandas.core.sparse 751fb33 [Jeff Reback] move privacy changes to development section 221a7b5 [Jeff Reback] rename .typed -> .dtypes 290315e [Jeff Reback] move pandas.api.lib.infer_dtype -> pandas.api.types ba32641 [Jeff Reback] CLN: move pandas.computation -> pandas.core.computation bbdb1ee [Jeff Reback] CLN: move pandas.types -> pandas.core.typed --- asv_bench/benchmarks/binary_ops.py | 2 +- asv_bench/benchmarks/categoricals.py | 2 +- asv_bench/benchmarks/eval.py | 2 +- asv_bench/benchmarks/indexing.py | 2 +- asv_bench/benchmarks/sparse.py | 4 +- ci/install_travis.sh | 1 + ci/script_multi.sh | 4 + doc/source/categorical.rst | 2 +- doc/source/whatsnew/v0.20.0.txt | 130 ++++++++++-------- pandas/__init__.py | 4 +- pandas/api/types/__init__.py | 3 +- pandas/compat/numpy/function.py | 2 +- pandas/compat/pickle_compat.py | 14 +- pandas/conftest.py | 8 ++ pandas/core/algorithms.py | 13 +- pandas/core/api.py | 2 +- pandas/core/base.py | 8 +- pandas/core/categorical.py | 35 ++--- pandas/core/common.py | 14 +- pandas/{ => core}/computation/__init__.py | 0 pandas/{ => core}/computation/align.py | 2 +- pandas/{ => core}/computation/api.py | 4 +- pandas/{ => core}/computation/common.py | 0 pandas/{ => core}/computation/engines.py | 7 +- pandas/{ => core}/computation/eval.py | 8 +- pandas/{ => core}/computation/expr.py | 13 +- pandas/{ => core}/computation/expressions.py | 2 +- pandas/{ => core}/computation/ops.py | 6 +- pandas/{ => core}/computation/pytables.py | 10 +- pandas/{ => core}/computation/scope.py | 2 +- pandas/core/config_init.py | 2 +- pandas/{sparse => core/dtypes}/__init__.py | 0 pandas/{types => core/dtypes}/api.py | 0 pandas/{types => core/dtypes}/cast.py | 0 pandas/{types => core/dtypes}/common.py | 0 pandas/{types => core/dtypes}/concat.py | 41 +++--- pandas/{types => core/dtypes}/dtypes.py | 11 +- pandas/{types => core/dtypes}/generic.py | 0 pandas/{types => core/dtypes}/inference.py | 0 pandas/{types => core/dtypes}/missing.py | 0 pandas/core/frame.py | 72 +++++----- pandas/core/generic.py | 33 ++--- pandas/core/groupby.py | 43 +++--- pandas/core/indexing.py | 23 ++-- pandas/core/internals.py | 78 ++++++----- pandas/core/missing.py | 25 ++-- pandas/core/nanops.py | 23 ++-- pandas/core/ops.py | 27 ++-- pandas/core/panel.py | 16 ++- pandas/core/reshape.py | 19 +-- pandas/core/series.py | 48 +++---- pandas/core/sorting.py | 9 +- pandas/core/sparse.py | 10 -- .../computation => core/sparse}/__init__.py | 0 pandas/core/sparse/api.py | 6 + pandas/{ => core}/sparse/array.py | 31 +++-- pandas/{ => core}/sparse/frame.py | 10 +- pandas/{ => core}/sparse/list.py | 6 +- pandas/{ => core}/sparse/scipy_sparse.py | 0 pandas/{ => core}/sparse/series.py | 20 +-- pandas/{ => core}/sparse/sparse.pyx | 0 .../{ => core}/sparse/sparse_op_helper.pxi.in | 0 pandas/core/strings.py | 21 +-- pandas/core/window.py | 30 ++-- pandas/formats/format.py | 29 ++-- pandas/formats/printing.py | 2 +- pandas/formats/style.py | 2 +- pandas/indexes/base.py | 45 +++--- pandas/indexes/category.py | 15 +- pandas/indexes/frozen.py | 2 +- pandas/indexes/interval.py | 27 ++-- pandas/indexes/multi.py | 15 +- pandas/indexes/numeric.py | 7 +- pandas/indexes/range.py | 7 +- pandas/io/common.py | 2 +- pandas/io/excel.py | 5 +- pandas/io/html.py | 2 +- pandas/io/json/json.py | 2 +- pandas/io/json/table_schema.py | 2 +- pandas/io/packers.py | 9 +- pandas/io/parsers.py | 15 +- pandas/io/parsers.pyx | 13 +- pandas/io/pickle.py | 2 +- pandas/io/pytables.py | 23 ++-- pandas/io/sql.py | 9 +- pandas/io/stata.py | 5 +- pandas/plotting/_converter.py | 12 +- pandas/plotting/_core.py | 11 +- pandas/plotting/_misc.py | 2 +- pandas/plotting/_style.py | 2 +- pandas/plotting/_tools.py | 2 +- pandas/sparse/api.py | 6 - pandas/stats/moments.py | 2 +- pandas/tests/api/test_api.py | 8 +- pandas/tests/api/test_lib.py | 10 -- pandas/tests/api/test_types.py | 15 +- pandas/tests/{sparse => core}/__init__.py | 0 .../{types => core/computation}/__init__.py | 0 .../{ => core}/computation/test_compat.py | 8 +- .../tests/{ => core}/computation/test_eval.py | 24 ++-- .../{types => tests/core/dtypes}/__init__.py | 0 .../tests/{types => core/dtypes}/test_cast.py | 21 +-- .../{types => core/dtypes}/test_common.py | 10 +- .../{types => core/dtypes}/test_concat.py | 2 +- .../{types => core/dtypes}/test_dtypes.py | 20 +-- .../{types => core/dtypes}/test_generic.py | 2 +- .../{types => core/dtypes}/test_inference.py | 35 +++-- .../tests/{types => core/dtypes}/test_io.py | 0 .../{types => core/dtypes}/test_missing.py | 7 +- pandas/tests/core/sparse/__init__.py | 0 pandas/tests/core/sparse/common.py | 0 .../{ => core}/sparse/test_arithmetics.py | 0 pandas/tests/{ => core}/sparse/test_array.py | 4 +- .../{ => core}/sparse/test_combine_concat.py | 0 pandas/tests/{ => core}/sparse/test_format.py | 0 pandas/tests/{ => core}/sparse/test_frame.py | 19 ++- .../tests/{ => core}/sparse/test_groupby.py | 0 .../tests/{ => core}/sparse/test_indexing.py | 0 .../tests/{ => core}/sparse/test_libsparse.py | 4 +- pandas/tests/{ => core}/sparse/test_list.py | 2 +- pandas/tests/{ => core}/sparse/test_pivot.py | 0 pandas/tests/{ => core}/sparse/test_series.py | 6 +- pandas/tests/formats/test_format.py | 29 ++-- pandas/tests/frame/test_alter_axes.py | 7 +- pandas/tests/frame/test_apply.py | 2 +- pandas/tests/frame/test_constructors.py | 2 +- pandas/tests/frame/test_dtypes.py | 2 +- pandas/tests/frame/test_indexing.py | 9 +- pandas/tests/frame/test_query_eval.py | 14 +- pandas/tests/groupby/test_bin_groupby.py | 2 +- pandas/tests/groupby/test_transform.py | 3 +- pandas/tests/indexes/common.py | 2 +- .../indexes/datetimes/test_construction.py | 15 ++ pandas/tests/indexes/datetimes/test_tools.py | 2 +- pandas/tests/indexing/common.py | 2 +- pandas/tests/indexing/test_indexing.py | 5 +- pandas/tests/indexing/test_ix.py | 2 +- .../tests/io/json/test_json_table_schema.py | 9 +- pandas/tests/io/parser/dtypes.py | 2 +- pandas/tests/io/test_feather.py | 2 + pandas/tests/io/test_sql.py | 5 +- pandas/tests/io/test_stata.py | 2 +- pandas/tests/plotting/common.py | 2 +- pandas/tests/plotting/test_frame.py | 2 +- pandas/tests/series/test_constructors.py | 4 +- pandas/tests/series/test_datetime_values.py | 2 +- pandas/tests/series/test_indexing.py | 2 +- pandas/tests/series/test_quantile.py | 2 +- pandas/tests/sparse/common.py | 10 -- pandas/tests/test_base.py | 5 +- pandas/tests/test_categorical.py | 9 +- pandas/tests/test_expressions.py | 2 +- pandas/tests/test_generic.py | 2 +- pandas/tests/test_internals.py | 2 +- pandas/tests/test_multilevel.py | 2 +- pandas/tests/test_nanops.py | 2 +- pandas/tests/test_panel.py | 2 +- pandas/tests/test_panel4d.py | 2 +- pandas/tests/tools/test_merge.py | 4 +- pandas/tests/tools/test_union_categoricals.py | 2 +- pandas/tests/tseries/test_resample.py | 2 +- pandas/tests/tseries/test_timezones.py | 2 +- pandas/tools/concat.py | 2 +- pandas/tools/hashing.py | 12 +- pandas/tools/merge.py | 37 ++--- pandas/tools/pivot.py | 2 +- pandas/tools/tile.py | 15 +- pandas/tools/util.py | 19 +-- pandas/tseries/base.py | 18 +-- pandas/tseries/common.py | 11 +- pandas/tseries/frequencies.py | 11 +- pandas/tseries/index.py | 37 ++--- pandas/tseries/offsets.py | 2 +- pandas/tseries/period.py | 31 +++-- pandas/tseries/tdi.py | 27 ++-- pandas/tseries/timedeltas.py | 11 +- pandas/tseries/tools.py | 28 ++-- pandas/tseries/util.py | 2 +- pandas/util/testing.py | 34 ++--- pandas/util/testing.pyx | 4 +- pandas/util/validators.py | 2 +- setup.py | 21 ++- vb_suite/binary_ops.py | 12 +- vb_suite/eval.py | 2 +- vb_suite/indexing.py | 4 +- vb_suite/sparse.py | 4 +- 186 files changed, 1021 insertions(+), 891 deletions(-) rename pandas/{ => core}/computation/__init__.py (100%) rename pandas/{ => core}/computation/align.py (98%) rename pandas/{ => core}/computation/api.py (74%) rename pandas/{ => core}/computation/common.py (100%) rename pandas/{ => core}/computation/engines.py (95%) rename pandas/{ => core}/computation/eval.py (97%) rename pandas/{ => core}/computation/expr.py (98%) rename pandas/{ => core}/computation/expressions.py (99%) rename pandas/{ => core}/computation/ops.py (98%) rename pandas/{ => core}/computation/pytables.py (98%) rename pandas/{ => core}/computation/scope.py (99%) rename pandas/{sparse => core/dtypes}/__init__.py (100%) rename pandas/{types => core/dtypes}/api.py (100%) rename pandas/{types => core/dtypes}/cast.py (100%) rename pandas/{types => core/dtypes}/common.py (100%) rename pandas/{types => core/dtypes}/concat.py (95%) rename pandas/{types => core/dtypes}/dtypes.py (97%) rename pandas/{types => core/dtypes}/generic.py (100%) rename pandas/{types => core/dtypes}/inference.py (100%) rename pandas/{types => core/dtypes}/missing.py (100%) delete mode 100644 pandas/core/sparse.py rename pandas/{tests/computation => core/sparse}/__init__.py (100%) create mode 100644 pandas/core/sparse/api.py rename pandas/{ => core}/sparse/array.py (97%) rename pandas/{ => core}/sparse/frame.py (99%) rename pandas/{ => core}/sparse/list.py (96%) rename pandas/{ => core}/sparse/scipy_sparse.py (100%) rename pandas/{ => core}/sparse/series.py (98%) rename pandas/{ => core}/sparse/sparse.pyx (100%) rename pandas/{ => core}/sparse/sparse_op_helper.pxi.in (100%) delete mode 100644 pandas/sparse/api.py delete mode 100644 pandas/tests/api/test_lib.py rename pandas/tests/{sparse => core}/__init__.py (100%) rename pandas/tests/{types => core/computation}/__init__.py (100%) rename pandas/tests/{ => core}/computation/test_compat.py (84%) rename pandas/tests/{ => core}/computation/test_eval.py (99%) rename pandas/{types => tests/core/dtypes}/__init__.py (100%) rename pandas/tests/{types => core/dtypes}/test_cast.py (95%) rename pandas/tests/{types => core/dtypes}/test_common.py (92%) rename pandas/tests/{types => core/dtypes}/test_concat.py (98%) rename pandas/tests/{types => core/dtypes}/test_dtypes.py (96%) rename pandas/tests/{types => core/dtypes}/test_generic.py (97%) rename pandas/tests/{types => core/dtypes}/test_inference.py (97%) rename pandas/tests/{types => core/dtypes}/test_io.py (100%) rename pandas/tests/{types => core/dtypes}/test_missing.py (98%) create mode 100644 pandas/tests/core/sparse/__init__.py create mode 100644 pandas/tests/core/sparse/common.py rename pandas/tests/{ => core}/sparse/test_arithmetics.py (100%) rename pandas/tests/{ => core}/sparse/test_array.py (99%) rename pandas/tests/{ => core}/sparse/test_combine_concat.py (100%) rename pandas/tests/{ => core}/sparse/test_format.py (100%) rename pandas/tests/{ => core}/sparse/test_frame.py (99%) rename pandas/tests/{ => core}/sparse/test_groupby.py (100%) rename pandas/tests/{ => core}/sparse/test_indexing.py (100%) rename pandas/tests/{ => core}/sparse/test_libsparse.py (99%) rename pandas/tests/{ => core}/sparse/test_list.py (98%) rename pandas/tests/{ => core}/sparse/test_pivot.py (100%) rename pandas/tests/{ => core}/sparse/test_series.py (99%) delete mode 100644 pandas/tests/sparse/common.py diff --git a/asv_bench/benchmarks/binary_ops.py b/asv_bench/benchmarks/binary_ops.py index 72700c3de282e..cc869996b49cd 100644 --- a/asv_bench/benchmarks/binary_ops.py +++ b/asv_bench/benchmarks/binary_ops.py @@ -1,5 +1,5 @@ from .pandas_vb_common import * -import pandas.computation.expressions as expr +import pandas.core.computation.expressions as expr class Ops(object): diff --git a/asv_bench/benchmarks/categoricals.py b/asv_bench/benchmarks/categoricals.py index 153107911ca2c..5b0dd126acdea 100644 --- a/asv_bench/benchmarks/categoricals.py +++ b/asv_bench/benchmarks/categoricals.py @@ -1,6 +1,6 @@ from .pandas_vb_common import * try: - from pandas.types.concat import union_categoricals + from pandas.core.dtypes.concat import union_categoricals except ImportError: pass diff --git a/asv_bench/benchmarks/eval.py b/asv_bench/benchmarks/eval.py index a0819e33dc254..ee091e57c6403 100644 --- a/asv_bench/benchmarks/eval.py +++ b/asv_bench/benchmarks/eval.py @@ -1,6 +1,6 @@ from .pandas_vb_common import * import pandas as pd -import pandas.computation.expressions as expr +import pandas.core.computation.expressions as expr class Eval(object): diff --git a/asv_bench/benchmarks/indexing.py b/asv_bench/benchmarks/indexing.py index a32c9f25a0f09..79844414f2746 100644 --- a/asv_bench/benchmarks/indexing.py +++ b/asv_bench/benchmarks/indexing.py @@ -1,6 +1,6 @@ from .pandas_vb_common import * try: - import pandas.computation.expressions as expr + import pandas.core.computation.expressions as expr except: expr = None diff --git a/asv_bench/benchmarks/sparse.py b/asv_bench/benchmarks/sparse.py index 717fe7218ceda..7d424592ed877 100644 --- a/asv_bench/benchmarks/sparse.py +++ b/asv_bench/benchmarks/sparse.py @@ -1,5 +1,5 @@ from .pandas_vb_common import * -import pandas.sparse.series +import pandas.core.sparse.series import scipy.sparse from pandas.core.sparse import SparseSeries, SparseDataFrame from pandas.core.sparse import SparseDataFrame @@ -37,7 +37,7 @@ def setup(self): self.A = scipy.sparse.coo_matrix(([3.0, 1.0, 2.0], ([1, 0, 0], [0, 2, 3])), shape=(100, 100)) def time_sparse_series_from_coo(self): - self.ss = pandas.sparse.series.SparseSeries.from_coo(self.A) + self.ss = pandas.core.sparse.series.SparseSeries.from_coo(self.A) class sparse_series_to_coo(object): diff --git a/ci/install_travis.sh b/ci/install_travis.sh index f71df979c9df0..09668cbccc9d2 100755 --- a/ci/install_travis.sh +++ b/ci/install_travis.sh @@ -123,6 +123,7 @@ if [ "$BUILD_TEST" ]; then # build & install testing echo ["Starting installation test."] + rm -rf dist python setup.py clean python setup.py build_ext --inplace python setup.py sdist --formats=gztar diff --git a/ci/script_multi.sh b/ci/script_multi.sh index 88ecaf344a410..663d2feb5be23 100755 --- a/ci/script_multi.sh +++ b/ci/script_multi.sh @@ -19,7 +19,11 @@ export PYTHONHASHSEED=$(python -c 'import random; print(random.randint(1, 429496 echo PYTHONHASHSEED=$PYTHONHASHSEED if [ "$BUILD_TEST" ]; then + echo "build-test" cd /tmp + pwd + conda list pandas + echo "running" python -c "import pandas; pandas.test(['-n 2'])" elif [ "$DOC" ]; then echo "We are not running pytest as this is a doc-build" diff --git a/doc/source/categorical.rst b/doc/source/categorical.rst index 411f973e9a71f..a508e84465107 100644 --- a/doc/source/categorical.rst +++ b/doc/source/categorical.rst @@ -673,7 +673,7 @@ will be the union of the categories being combined. .. ipython:: python - from pandas.types.concat import union_categoricals + from pandas.api.types import union_categoricals a = pd.Categorical(["b", "c"]) b = pd.Categorical(["a", "b"]) union_categoricals([a, b]) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 914995244fe5f..33d80f8347b0a 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -21,8 +21,11 @@ Highlights include: - Support for S3 handling now uses ``s3fs``, see :ref:`here ` - Google BigQuery support now uses the ``pandas-gbq`` library, see :ref:`here ` - Switched the test framework to use `pytest `__ (:issue:`13097`) -- The ``pandas.tools.plotting`` module has been deprecated, moved to ``pandas.plotting``. See :ref:`here ` +.. warning:: + + Pandas has changed the internal structure and layout of the codebase. + This can affect imports that are not from the top-level ``pandas.*`` namespace, please see the changes :ref:`here `. Check the :ref:`API Changes ` and :ref:`deprecations ` before updating. @@ -489,7 +492,7 @@ Other Enhancements - ``TimedeltaIndex`` now has a custom datetick formatter specifically designed for nanosecond level precision (:issue:`8711`) -- ``pd.types.concat.union_categoricals`` gained the ``ignore_ordered`` argument to allow ignoring the ordered attribute of unioned categoricals (:issue:`13410`). See the :ref:`categorical union docs ` for more information. +- ``pd.api.types.union_categoricals`` gained the ``ignore_ordered`` argument to allow ignoring the ordered attribute of unioned categoricals (:issue:`13410`). See the :ref:`categorical union docs ` for more information. - ``DataFrame.to_latex()`` and ``DataFrame.to_string()`` now allow optional header aliases. (:issue:`15536`) - Re-enable the ``parse_dates`` keyword of ``pd.read_excel()`` to parse string columns as dates (:issue:`14326`) - Added ``.empty`` property to subclasses of ``Index``. (:issue:`15270`) @@ -558,31 +561,6 @@ Using ``.iloc``. Here we will get the location of the 'A' column, then use *posi df.iloc[[0, 2], df.columns.get_loc('A')] -.. _whatsnew_0200.api_breaking.deprecate_plotting - -Deprecate .plotting -^^^^^^^^^^^^^^^^^^^ - -The ``pandas.tools.plotting`` module has been deprecated, in favor of the top level ``pandas.plotting`` module. All the public plotting functions are now available -from ``pandas.plotting`` (:issue:`12548`). - -Furthermore, the top-level ``pandas.scatter_matrix`` and ``pandas.plot_params`` are deprecated. -Users can import these from ``pandas.plotting`` as well. - -Previous script: - -.. code-block:: python - - pd.tools.plotting.scatter_matrix(df) - pd.scatter_matrix(df) - -Should be changed to: - -.. code-block:: python - - pd.plotting.scatter_matrix(df) - - .. _whatsnew_0200.api_breaking.deprecate_panel: Deprecate Panel @@ -1026,34 +1004,6 @@ New Behavior: In [11]: index.memory_usage(deep=True) Out[11]: 260 -.. _whatsnew_0200.api_breaking.extensions: - -Extension Modules Moved -^^^^^^^^^^^^^^^^^^^^^^^ - -Some formerly public c/c++/cython extension modules have been moved and/or renamed. These are all removed from the public API. -If indicated, a deprecation warning will be issued if you reference that module. (:issue:`12588`) - -.. csv-table:: - :header: "Previous Location", "New Location", "Deprecated" - :widths: 30, 30, 4 - - "pandas.lib", "pandas._libs.lib", "X" - "pandas.tslib", "pandas._libs.tslib", "X" - "pandas._join", "pandas._libs.join", "" - "pandas._period", "pandas._libs.period", "" - "pandas.msgpack", "pandas.io.msgpack", "" - "pandas.index", "pandas._libs.index", "" - "pandas.algos", "pandas._libs.algos", "" - "pandas.hashtable", "pandas._libs.hashtable", "" - "pandas.json", "pandas.io.json.libjson", "X" - "pandas.parser", "pandas.io.libparsers", "X" - "pandas.io.sas.saslib", "pandas.io.sas.libsas", "" - "pandas._testing", "pandas.util.libtesting", "" - "pandas._sparse", "pandas.sparse.libsparse", "" - "pandas._hash", "pandas.tools.libhash", "" - "pandas._window", "pandas.core.libwindow", "" - .. _whatsnew_0200.api_breaking.sort_index: DataFrame.sort_index changes @@ -1354,10 +1304,74 @@ Other API Changes - ``DataFrame`` and ``Panel`` constructors with invalid input will now raise ``ValueError`` rather than ``pandas.core.common.PandasError``, if called with scalar inputs and not axes; The exception ``PandasError`` is removed as well. (:issue:`15541`) - The exception ``pandas.core.common.AmbiguousIndexError`` is removed as it is not referenced (:issue:`15541`) -.. _whatsnew_0200.develop: +.. _whatsnew_0200.privacy: + +Privacy Changes +~~~~~~~~~~~~~~~ + +.. _whatsnew_0200.privacy.extensions: + +Modules Privacy Has Changed +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Some formerly public python/c/c++/cython extension modules have been moved and/or renamed. These are all removed from the public API. +Furthermore, the ``pandas.core``, ``pandas.io``, and ``pandas.util`` top-level modules are now considered to be PRIVATE. +If indicated, a deprecation warning will be issued if you reference theses modules. (:issue:`12588`) + +.. csv-table:: + :header: "Previous Location", "New Location", "Deprecated" + :widths: 30, 30, 4 + + "pandas.lib", "pandas._libs.lib", "X" + "pandas.tslib", "pandas._libs.tslib", "X" + "pandas.computation", "pandas.core.computation", "" + "pandas.msgpack", "pandas.io.msgpack", "" + "pandas.index", "pandas._libs.index", "" + "pandas.algos", "pandas._libs.algos", "" + "pandas.hashtable", "pandas._libs.hashtable", "" + "pandas.json", "pandas.io.json.libjson", "X" + "pandas.parser", "pandas.io.libparsers", "X" + "pandas.sparse", "pandas.core.sparse", "" + "pandas.types", "pandas.core.dtypes", "" + "pandas.io.sas.saslib", "pandas.io.sas.libsas", "" + "pandas._join", "pandas._libs.join", "" + "pandas._hash", "pandas.tools.libhash", "" + "pandas._period", "pandas._libs.period", "" + "pandas._sparse", "pandas.core.sparse.libsparse", "" + "pandas._testing", "pandas.util.libtesting", "" + "pandas._window", "pandas.core.libwindow", "" + +- The function :func:`~pandas.api.type.union_categoricals` is now importable from ``pandas.api.types``, formerly from ``pandas.types.concat`` (:issue:`15998`) + +.. _whatsnew_0200.privacy.deprecate_plotting + +Deprecate .plotting +^^^^^^^^^^^^^^^^^^^ + +The ``pandas.tools.plotting`` module has been deprecated, in favor of the top level ``pandas.plotting`` module. All the public plotting functions are now available +from ``pandas.plotting`` (:issue:`12548`). + +Furthermore, the top-level ``pandas.scatter_matrix`` and ``pandas.plot_params`` are deprecated. +Users can import these from ``pandas.plotting`` as well. + +Previous script: + +.. code-block:: python + + pd.tools.plotting.scatter_matrix(df) + pd.scatter_matrix(df) + +Should be changed to: + +.. code-block:: python + + pd.plotting.scatter_matrix(df) + + +.. _whatsnew_0200.privacy.development: -Development Changes -~~~~~~~~~~~~~~~~~~~ +Other Developement Changes +^^^^^^^^^^^^^^^^^^^^^^^^^^ - Building pandas for development now requires ``cython >= 0.23`` (:issue:`14831`) - Require at least 0.23 version of cython to avoid problems with character encodings (:issue:`14699`) diff --git a/pandas/__init__.py b/pandas/__init__.py index bc38919f2c78c..4e1bcbd613965 100644 --- a/pandas/__init__.py +++ b/pandas/__init__.py @@ -40,10 +40,10 @@ import pandas.core.config_init from pandas.core.api import * -from pandas.sparse.api import * +from pandas.core.sparse.api import * from pandas.stats.api import * from pandas.tseries.api import * -from pandas.computation.api import * +from pandas.core.computation.api import * from pandas.tools.concat import concat from pandas.tools.merge import (merge, ordered_merge, diff --git a/pandas/api/types/__init__.py b/pandas/api/types/__init__.py index ee217543f0420..06fb5742ba067 100644 --- a/pandas/api/types/__init__.py +++ b/pandas/api/types/__init__.py @@ -1,4 +1,5 @@ """ public toolkit API """ -from pandas.types.api import * # noqa +from pandas.core.dtypes.api import * # noqa +from pandas.core.dtypes.concat import union_categoricals # noqa del np # noqa diff --git a/pandas/compat/numpy/function.py b/pandas/compat/numpy/function.py index 1dd22795533fc..d707ac66c4eab 100644 --- a/pandas/compat/numpy/function.py +++ b/pandas/compat/numpy/function.py @@ -22,7 +22,7 @@ from pandas.util.validators import (validate_args, validate_kwargs, validate_args_and_kwargs) from pandas.errors import UnsupportedFunctionCall -from pandas.types.common import is_integer, is_bool +from pandas.core.dtypes.common import is_integer, is_bool from pandas.compat import OrderedDict diff --git a/pandas/compat/pickle_compat.py b/pandas/compat/pickle_compat.py index 5b4fcad252192..e977fdc3a267d 100644 --- a/pandas/compat/pickle_compat.py +++ b/pandas/compat/pickle_compat.py @@ -67,16 +67,24 @@ def load_reduce(self): ('pandas.core.series', 'TimeSeries'): ('pandas.core.series', 'Series'), ('pandas.sparse.series', 'SparseTimeSeries'): - ('pandas.sparse.series', 'SparseSeries'), + ('pandas.core.sparse.series', 'SparseSeries'), # 12588, extensions moving ('pandas._sparse', 'BlockIndex'): - ('pandas.sparse.libsparse', 'BlockIndex'), + ('pandas.core.sparse.libsparse', 'BlockIndex'), ('pandas.tslib', 'Timestamp'): ('pandas._libs.tslib', 'Timestamp'), ('pandas.tslib', '__nat_unpickle'): ('pandas._libs.tslib', '__nat_unpickle'), - ('pandas._period', 'Period'): ('pandas._libs.period', 'Period') + ('pandas._period', 'Period'): ('pandas._libs.period', 'Period'), + + # 15998 top-level dirs moving + ('pandas.sparse.array', 'SparseArray'): + ('pandas.core.sparse.array', 'SparseArray'), + ('pandas.sparse.series', 'SparseSeries'): + ('pandas.core.sparse.series', 'SparseSeries'), + ('pandas.sparse.frame', 'SparseDataFrame'): + ('pandas.core.sparse.frame', 'SparseDataFrame') } diff --git a/pandas/conftest.py b/pandas/conftest.py index e0a15f740688b..caced6a0c568e 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -2,6 +2,7 @@ import numpy import pandas +import pandas.util.testing as tm def pytest_addoption(parser): @@ -30,3 +31,10 @@ def pytest_runtest_setup(item): def add_imports(doctest_namespace): doctest_namespace['np'] = numpy doctest_namespace['pd'] = pandas + + +@pytest.fixture(params=['bsr', 'coo', 'csc', 'csr', 'dia', 'dok', 'lil']) +def spmatrix(request): + tm._skip_if_no_scipy() + from scipy import sparse + return getattr(sparse, request.param + '_matrix') diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 5d2db864dd48e..6df7fce631a3c 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -7,10 +7,11 @@ import numpy as np from pandas import compat, _np_version_under1p8 -from pandas.types.cast import maybe_promote -from pandas.types.generic import (ABCSeries, ABCIndex, - ABCIndexClass, ABCCategorical) -from pandas.types.common import ( +from pandas.core.dtypes.cast import maybe_promote +from pandas.core.dtypes.generic import ( + ABCSeries, ABCIndex, + ABCIndexClass, ABCCategorical) +from pandas.core.dtypes.common import ( is_unsigned_integer_dtype, is_signed_integer_dtype, is_integer_dtype, is_complex_dtype, is_categorical_dtype, is_sparse, @@ -25,9 +26,9 @@ _ensure_float64, _ensure_uint64, _ensure_int64) from pandas.compat.numpy import _np_version_under1p10 -from pandas.types.missing import isnull +from pandas.core.dtypes.missing import isnull -import pandas.core.common as com +from pandas.core import common as com from pandas.compat import string_types from pandas._libs import algos, lib, hashtable as htable from pandas._libs.tslib import iNaT diff --git a/pandas/core/api.py b/pandas/core/api.py index ea5be17ef3aaf..8e8969e1f6b26 100644 --- a/pandas/core/api.py +++ b/pandas/core/api.py @@ -5,7 +5,7 @@ import numpy as np from pandas.core.algorithms import factorize, unique, value_counts -from pandas.types.missing import isnull, notnull +from pandas.core.dtypes.missing import isnull, notnull from pandas.core.categorical import Categorical from pandas.core.groupby import Grouper from pandas.formats.format import set_eng_float_format diff --git a/pandas/core/base.py b/pandas/core/base.py index 33c95197debdc..e30751a6582f9 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -6,9 +6,9 @@ from pandas.compat import builtins import numpy as np -from pandas.types.missing import isnull -from pandas.types.generic import ABCDataFrame, ABCSeries, ABCIndexClass -from pandas.types.common import is_object_dtype, is_list_like, is_scalar +from pandas.core.dtypes.missing import isnull +from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries, ABCIndexClass +from pandas.core.dtypes.common import is_object_dtype, is_list_like, is_scalar from pandas.util.validators import validate_bool_kwarg from pandas.core import common as com @@ -725,7 +725,7 @@ def _aggregate_multiple_funcs(self, arg, _level, _axis): # we are concatting non-NDFrame objects, # e.g. a list of scalars - from pandas.types.cast import is_nested_object + from pandas.core.dtypes.cast import is_nested_object from pandas import Series result = Series(results, index=keys, name=self.name) if is_nested_object(result): diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index 906e8efafe4af..d1f060113cf1d 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -8,21 +8,24 @@ from pandas.compat import u, lzip from pandas._libs import lib, algos as libalgos -from pandas.types.generic import ABCSeries, ABCIndexClass, ABCCategoricalIndex -from pandas.types.missing import isnull, notnull -from pandas.types.cast import (maybe_infer_to_datetimelike, - coerce_indexer_dtype) -from pandas.types.dtypes import CategoricalDtype -from pandas.types.common import (_ensure_int64, - _ensure_object, - _ensure_platform_int, - is_dtype_equal, - is_datetimelike, - is_categorical, - is_categorical_dtype, - is_integer_dtype, is_bool, - is_list_like, is_sequence, - is_scalar) +from pandas.core.dtypes.generic import ( + ABCSeries, ABCIndexClass, ABCCategoricalIndex) +from pandas.core.dtypes.missing import isnull, notnull +from pandas.core.dtypes.cast import ( + maybe_infer_to_datetimelike, + coerce_indexer_dtype) +from pandas.core.dtypes.dtypes import CategoricalDtype +from pandas.core.dtypes.common import ( + _ensure_int64, + _ensure_object, + _ensure_platform_int, + is_dtype_equal, + is_datetimelike, + is_categorical, + is_categorical_dtype, + is_integer_dtype, is_bool, + is_list_like, is_sequence, + is_scalar) from pandas.core.common import is_null_slice from pandas.core.algorithms import factorize, take_1d, unique1d @@ -1215,7 +1218,7 @@ def value_counts(self, dropna=True): """ from numpy import bincount - from pandas.types.missing import isnull + from pandas.core.dtypes.missing import isnull from pandas.core.series import Series from pandas.core.index import CategoricalIndex diff --git a/pandas/core/common.py b/pandas/core/common.py index bf4acf1fbf257..39a5da0aa6912 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -13,12 +13,12 @@ from pandas import compat from pandas.compat import long, zip, iteritems from pandas.core.config import get_option -from pandas.types.generic import ABCSeries -from pandas.types.common import _NS_DTYPE -from pandas.types.inference import _iterable_not_string -from pandas.types.missing import isnull +from pandas.core.dtypes.generic import ABCSeries +from pandas.core.dtypes.common import _NS_DTYPE +from pandas.core.dtypes.inference import _iterable_not_string +from pandas.core.dtypes.missing import isnull from pandas.api import types -from pandas.types import common +from pandas.core.dtypes import common # compat from pandas.errors import ( # noqa @@ -60,7 +60,7 @@ def wrapper(*args, **kwargs): warnings.warn("pandas.core.common.{t} is deprecated. " "These are not longer public API functions, " "but can be imported from " - "pandas.types.common.{t} instead".format(t=t), + "pandas.api.types.{t} instead".format(t=t), DeprecationWarning, stacklevel=3) return getattr(common, t)(*args, **kwargs) return wrapper @@ -73,7 +73,7 @@ def wrapper(*args, **kwargs): def array_equivalent(*args, **kwargs): warnings.warn("'pandas.core.common.array_equivalent' is deprecated and " "is no longer public API", DeprecationWarning, stacklevel=2) - from pandas.types import missing + from pandas.core.dtypes import missing return missing.array_equivalent(*args, **kwargs) diff --git a/pandas/computation/__init__.py b/pandas/core/computation/__init__.py similarity index 100% rename from pandas/computation/__init__.py rename to pandas/core/computation/__init__.py diff --git a/pandas/computation/align.py b/pandas/core/computation/align.py similarity index 98% rename from pandas/computation/align.py rename to pandas/core/computation/align.py index b4c80f4d493af..1c75301082297 100644 --- a/pandas/computation/align.py +++ b/pandas/core/computation/align.py @@ -11,7 +11,7 @@ from pandas import compat from pandas.errors import PerformanceWarning from pandas.core.common import flatten -from pandas.computation.common import _result_type_many +from pandas.core.computation.common import _result_type_many def _align_core_single_unary_op(term): diff --git a/pandas/computation/api.py b/pandas/core/computation/api.py similarity index 74% rename from pandas/computation/api.py rename to pandas/core/computation/api.py index fe3dad015048e..a6fe5aae822df 100644 --- a/pandas/computation/api.py +++ b/pandas/core/computation/api.py @@ -1,6 +1,6 @@ # flake8: noqa -from pandas.computation.eval import eval +from pandas.core.computation.eval import eval # deprecation, xref #13790 @@ -10,5 +10,5 @@ def Expr(*args, **kwargs): warnings.warn("pd.Expr is deprecated as it is not " "applicable to user code", FutureWarning, stacklevel=2) - from pandas.computation.expr import Expr + from pandas.core.computation.expr import Expr return Expr(*args, **kwargs) diff --git a/pandas/computation/common.py b/pandas/core/computation/common.py similarity index 100% rename from pandas/computation/common.py rename to pandas/core/computation/common.py diff --git a/pandas/computation/engines.py b/pandas/core/computation/engines.py similarity index 95% rename from pandas/computation/engines.py rename to pandas/core/computation/engines.py index aebc5bb02d59d..675a3d5eca792 100644 --- a/pandas/computation/engines.py +++ b/pandas/core/computation/engines.py @@ -7,9 +7,10 @@ from pandas import compat from pandas.compat import map import pandas.formats.printing as printing -from pandas.computation.align import _align, _reconstruct_object -from pandas.computation.ops import (UndefinedVariableError, - _mathops, _reductions) +from pandas.core.computation.align import _align, _reconstruct_object +from pandas.core.computation.ops import ( + UndefinedVariableError, + _mathops, _reductions) _ne_builtins = frozenset(_mathops + _reductions) diff --git a/pandas/computation/eval.py b/pandas/core/computation/eval.py similarity index 97% rename from pandas/computation/eval.py rename to pandas/core/computation/eval.py index 5b21c753a71da..fc3986e317d13 100644 --- a/pandas/computation/eval.py +++ b/pandas/core/computation/eval.py @@ -6,11 +6,11 @@ import warnings import tokenize from pandas.formats.printing import pprint_thing -from pandas.computation import _NUMEXPR_INSTALLED -from pandas.computation.expr import Expr, _parsers, tokenize_string -from pandas.computation.scope import _ensure_scope +from pandas.core.computation import _NUMEXPR_INSTALLED +from pandas.core.computation.expr import Expr, _parsers, tokenize_string +from pandas.core.computation.scope import _ensure_scope from pandas.compat import string_types -from pandas.computation.engines import _engines +from pandas.core.computation.engines import _engines from pandas.util.validators import validate_bool_kwarg diff --git a/pandas/computation/expr.py b/pandas/core/computation/expr.py similarity index 98% rename from pandas/computation/expr.py rename to pandas/core/computation/expr.py index e78806b38c667..01c5d1f6f100c 100644 --- a/pandas/computation/expr.py +++ b/pandas/core/computation/expr.py @@ -14,12 +14,13 @@ from pandas.core import common as com import pandas.formats.printing as printing from pandas.tools.util import compose -from pandas.computation.ops import (_cmp_ops_syms, _bool_ops_syms, - _arith_ops_syms, _unary_ops_syms, is_term) -from pandas.computation.ops import _reductions, _mathops, _LOCAL_TAG -from pandas.computation.ops import Op, BinOp, UnaryOp, Term, Constant, Div -from pandas.computation.ops import UndefinedVariableError, FuncNode -from pandas.computation.scope import Scope +from pandas.core.computation.ops import ( + _cmp_ops_syms, _bool_ops_syms, + _arith_ops_syms, _unary_ops_syms, is_term) +from pandas.core.computation.ops import _reductions, _mathops, _LOCAL_TAG +from pandas.core.computation.ops import Op, BinOp, UnaryOp, Term, Constant, Div +from pandas.core.computation.ops import UndefinedVariableError, FuncNode +from pandas.core.computation.scope import Scope def tokenize_string(source): diff --git a/pandas/computation/expressions.py b/pandas/core/computation/expressions.py similarity index 99% rename from pandas/computation/expressions.py rename to pandas/core/computation/expressions.py index 8fd9ab3477b74..4eeefb183001e 100644 --- a/pandas/computation/expressions.py +++ b/pandas/core/computation/expressions.py @@ -9,7 +9,7 @@ import warnings import numpy as np from pandas.core.common import _values_from_object -from pandas.computation import _NUMEXPR_INSTALLED +from pandas.core.computation import _NUMEXPR_INSTALLED if _NUMEXPR_INSTALLED: import numexpr as ne diff --git a/pandas/computation/ops.py b/pandas/core/computation/ops.py similarity index 98% rename from pandas/computation/ops.py rename to pandas/core/computation/ops.py index 6ba2a21940d55..91c414bbc0ec1 100644 --- a/pandas/computation/ops.py +++ b/pandas/core/computation/ops.py @@ -7,14 +7,14 @@ import numpy as np -from pandas.types.common import is_list_like, is_scalar +from pandas.core.dtypes.common import is_list_like, is_scalar import pandas as pd from pandas.compat import PY3, string_types, text_type import pandas.core.common as com from pandas.formats.printing import pprint_thing, pprint_thing_encoded from pandas.core.base import StringMixin -from pandas.computation.common import _ensure_decoded, _result_type_many -from pandas.computation.scope import _DEFAULT_GLOBALS +from pandas.core.computation.common import _ensure_decoded, _result_type_many +from pandas.core.computation.scope import _DEFAULT_GLOBALS _reductions = 'sum', 'prod' diff --git a/pandas/computation/pytables.py b/pandas/core/computation/pytables.py similarity index 98% rename from pandas/computation/pytables.py rename to pandas/core/computation/pytables.py index 2a5056963fe8d..8d0f23e28c0a2 100644 --- a/pandas/computation/pytables.py +++ b/pandas/core/computation/pytables.py @@ -5,15 +5,15 @@ import numpy as np import pandas as pd -from pandas.types.common import is_list_like +from pandas.core.dtypes.common import is_list_like import pandas.core.common as com from pandas.compat import u, string_types, DeepChainMap from pandas.core.base import StringMixin from pandas.formats.printing import pprint_thing, pprint_thing_encoded -from pandas.computation import expr, ops -from pandas.computation.ops import is_term, UndefinedVariableError -from pandas.computation.expr import BaseExprVisitor -from pandas.computation.common import _ensure_decoded +from pandas.core.computation import expr, ops +from pandas.core.computation.ops import is_term, UndefinedVariableError +from pandas.core.computation.expr import BaseExprVisitor +from pandas.core.computation.common import _ensure_decoded from pandas.tseries.timedeltas import _coerce_scalar_to_timedelta_type diff --git a/pandas/computation/scope.py b/pandas/core/computation/scope.py similarity index 99% rename from pandas/computation/scope.py rename to pandas/core/computation/scope.py index 9ade755e0ff12..5a589473f64b7 100644 --- a/pandas/computation/scope.py +++ b/pandas/core/computation/scope.py @@ -15,7 +15,7 @@ import pandas as pd # noqa from pandas.compat import DeepChainMap, map, StringIO from pandas.core.base import StringMixin -import pandas.computation as compu +import pandas.core.computation as compu def _ensure_scope(level, global_dict=None, local_dict=None, resolvers=(), diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py index cf2a653638e90..7307980c8312e 100644 --- a/pandas/core/config_init.py +++ b/pandas/core/config_init.py @@ -415,7 +415,7 @@ def mpl_style_cb(key): def use_inf_as_null_cb(key): - from pandas.types.missing import _use_inf_as_null + from pandas.core.dtypes.missing import _use_inf_as_null _use_inf_as_null(key) diff --git a/pandas/sparse/__init__.py b/pandas/core/dtypes/__init__.py similarity index 100% rename from pandas/sparse/__init__.py rename to pandas/core/dtypes/__init__.py diff --git a/pandas/types/api.py b/pandas/core/dtypes/api.py similarity index 100% rename from pandas/types/api.py rename to pandas/core/dtypes/api.py diff --git a/pandas/types/cast.py b/pandas/core/dtypes/cast.py similarity index 100% rename from pandas/types/cast.py rename to pandas/core/dtypes/cast.py diff --git a/pandas/types/common.py b/pandas/core/dtypes/common.py similarity index 100% rename from pandas/types/common.py rename to pandas/core/dtypes/common.py diff --git a/pandas/types/concat.py b/pandas/core/dtypes/concat.py similarity index 95% rename from pandas/types/concat.py rename to pandas/core/dtypes/concat.py index b098bbb75d984..ddff78c9d511f 100644 --- a/pandas/types/concat.py +++ b/pandas/core/dtypes/concat.py @@ -5,20 +5,21 @@ import numpy as np import pandas._libs.tslib as tslib from pandas import compat -from pandas.core.algorithms import take_1d -from .common import (is_categorical_dtype, - is_sparse, - is_datetimetz, - is_datetime64_dtype, - is_timedelta64_dtype, - is_period_dtype, - is_object_dtype, - is_bool_dtype, - is_dtype_equal, - _NS_DTYPE, - _TD_DTYPE) -from pandas.types.generic import (ABCDatetimeIndex, ABCTimedeltaIndex, - ABCPeriodIndex) +from pandas.core.dtypes.common import ( + is_categorical_dtype, + is_sparse, + is_datetimetz, + is_datetime64_dtype, + is_timedelta64_dtype, + is_period_dtype, + is_object_dtype, + is_bool_dtype, + is_dtype_equal, + _NS_DTYPE, + _TD_DTYPE) +from pandas.core.dtypes.generic import ( + ABCDatetimeIndex, ABCTimedeltaIndex, + ABCPeriodIndex) def get_dtype_kinds(l): @@ -68,7 +69,7 @@ def _get_series_result_type(result): if isinstance(result, dict): # concat Series with axis 1 if all(is_sparse(c) for c in compat.itervalues(result)): - from pandas.sparse.api import SparseDataFrame + from pandas.core.sparse.api import SparseDataFrame return SparseDataFrame else: from pandas.core.frame import DataFrame @@ -76,7 +77,7 @@ def _get_series_result_type(result): elif is_sparse(result): # concat Series with axis 1 - from pandas.sparse.api import SparseSeries + from pandas.core.sparse.api import SparseSeries return SparseSeries else: from pandas.core.series import Series @@ -90,7 +91,7 @@ def _get_frame_result_type(result, objs): otherwise, return 1st obj """ if any(b.is_sparse for b in result.blocks): - from pandas.sparse.api import SparseDataFrame + from pandas.core.sparse.api import SparseDataFrame return SparseDataFrame else: return objs[0] @@ -276,6 +277,8 @@ def _maybe_unwrap(x): if sort_categories and not categories.is_monotonic_increasing: categories = categories.sort_values() indexer = categories.get_indexer(first.categories) + + from pandas.core.algorithms import take_1d new_codes = take_1d(indexer, new_codes, fill_value=-1) elif ignore_order or all(not c.ordered for c in to_union): # different categories - union and recode @@ -288,6 +291,8 @@ def _maybe_unwrap(x): for c in to_union: if len(c.categories) > 0: indexer = categories.get_indexer(c.categories) + + from pandas.core.algorithms import take_1d new_codes.append(take_1d(indexer, c.codes, fill_value=-1)) else: # must be all NaN @@ -433,7 +438,7 @@ def _concat_sparse(to_concat, axis=0, typs=None): a single array, preserving the combined dtypes """ - from pandas.sparse.array import SparseArray, _make_index + from pandas.core.sparse.array import SparseArray, _make_index def convert_sparse(x, axis): # coerce to native type diff --git a/pandas/types/dtypes.py b/pandas/core/dtypes/dtypes.py similarity index 97% rename from pandas/types/dtypes.py rename to pandas/core/dtypes/dtypes.py index 7913950a597c9..59c23addd418e 100644 --- a/pandas/types/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -209,8 +209,15 @@ def __new__(cls, unit=None, tz=None): raise ValueError("DatetimeTZDtype constructor must have a tz " "supplied") + # hash with the actual tz if we can + # some cannot be hashed, so stringfy + try: + key = (unit, tz) + hash(key) + except TypeError: + key = (unit, str(tz)) + # set/retrieve from cache - key = (unit, str(tz)) try: return cls._cache[key] except KeyError: @@ -410,7 +417,7 @@ def __new__(cls, subtype=None): if m is not None: subtype = m.group('subtype') - from pandas.types.common import pandas_dtype + from pandas.core.dtypes.common import pandas_dtype try: subtype = pandas_dtype(subtype) except TypeError: diff --git a/pandas/types/generic.py b/pandas/core/dtypes/generic.py similarity index 100% rename from pandas/types/generic.py rename to pandas/core/dtypes/generic.py diff --git a/pandas/types/inference.py b/pandas/core/dtypes/inference.py similarity index 100% rename from pandas/types/inference.py rename to pandas/core/dtypes/inference.py diff --git a/pandas/types/missing.py b/pandas/core/dtypes/missing.py similarity index 100% rename from pandas/types/missing.py rename to pandas/core/dtypes/missing.py diff --git a/pandas/core/frame.py b/pandas/core/frame.py index a5256868ce419..3a5a0e7044e79 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -23,38 +23,40 @@ import numpy as np import numpy.ma as ma -from pandas.types.cast import (maybe_upcast, infer_dtype_from_scalar, - maybe_cast_to_datetime, - maybe_infer_to_datetimelike, - maybe_convert_platform, - maybe_downcast_to_dtype, - invalidate_string_dtypes, - coerce_to_dtypes, - maybe_upcast_putmask, - find_common_type) -from pandas.types.common import (is_categorical_dtype, - is_object_dtype, - is_extension_type, - is_datetimetz, - is_datetime64_any_dtype, - is_datetime64tz_dtype, - is_bool_dtype, - is_integer_dtype, - is_float_dtype, - is_integer, - is_scalar, - is_dtype_equal, - needs_i8_conversion, - _get_dtype_from_object, - _ensure_float, - _ensure_float64, - _ensure_int64, - _ensure_platform_int, - is_list_like, - is_iterator, - is_sequence, - is_named_tuple) -from pandas.types.missing import isnull, notnull +from pandas.core.dtypes.cast import ( + maybe_upcast, infer_dtype_from_scalar, + maybe_cast_to_datetime, + maybe_infer_to_datetimelike, + maybe_convert_platform, + maybe_downcast_to_dtype, + invalidate_string_dtypes, + coerce_to_dtypes, + maybe_upcast_putmask, + find_common_type) +from pandas.core.dtypes.common import ( + is_categorical_dtype, + is_object_dtype, + is_extension_type, + is_datetimetz, + is_datetime64_any_dtype, + is_datetime64tz_dtype, + is_bool_dtype, + is_integer_dtype, + is_float_dtype, + is_integer, + is_scalar, + is_dtype_equal, + needs_i8_conversion, + _get_dtype_from_object, + _ensure_float, + _ensure_float64, + _ensure_int64, + _ensure_platform_int, + is_list_like, + is_iterator, + is_sequence, + is_named_tuple) +from pandas.core.dtypes.missing import isnull, notnull from pandas.core.common import (_try_sort, _default_index, @@ -70,9 +72,9 @@ create_block_manager_from_blocks) from pandas.core.series import Series from pandas.core.categorical import Categorical -import pandas.computation.expressions as expressions +import pandas.core.computation.expressions as expressions import pandas.core.algorithms as algorithms -from pandas.computation.eval import eval as _eval +from pandas.core.computation.eval import eval as _eval from pandas.compat import (range, map, zip, lrange, lmap, lzip, StringIO, u, OrderedDict, raise_with_traceback) from pandas import compat @@ -1269,7 +1271,7 @@ def to_sparse(self, fill_value=None, kind='block'): ------- y : SparseDataFrame """ - from pandas.core.sparse import SparseDataFrame + from pandas.core.sparse.frame import SparseDataFrame return SparseDataFrame(self._series, index=self.index, columns=self.columns, default_kind=kind, default_fill_value=fill_value) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 316c9f5e2ccd8..167af8dfc0d8e 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -10,22 +10,23 @@ import pandas as pd from pandas._libs import tslib, lib -from pandas.types.common import (_coerce_to_dtype, - _ensure_int64, - needs_i8_conversion, - is_scalar, - is_integer, is_bool, - is_bool_dtype, - is_numeric_dtype, - is_datetime64_dtype, - is_timedelta64_dtype, - is_datetime64tz_dtype, - is_list_like, - is_dict_like, - is_re_compilable) -from pandas.types.cast import maybe_promote, maybe_upcast_putmask -from pandas.types.missing import isnull, notnull -from pandas.types.generic import ABCSeries, ABCPanel +from pandas.core.dtypes.common import ( + _coerce_to_dtype, + _ensure_int64, + needs_i8_conversion, + is_scalar, + is_integer, is_bool, + is_bool_dtype, + is_numeric_dtype, + is_datetime64_dtype, + is_timedelta64_dtype, + is_datetime64tz_dtype, + is_list_like, + is_dict_like, + is_re_compilable) +from pandas.core.dtypes.cast import maybe_promote, maybe_upcast_putmask +from pandas.core.dtypes.missing import isnull, notnull +from pandas.core.dtypes.generic import ABCSeries, ABCPanel from pandas.core.common import (_values_from_object, _maybe_box_datetimelike, diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 27e256a8eb572..2cbcb9ef6efec 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -15,26 +15,27 @@ from pandas.compat.numpy import function as nv, _np_version_under1p8 from pandas.compat import set_function_name -from pandas.types.common import (is_numeric_dtype, - is_timedelta64_dtype, is_datetime64_dtype, - is_categorical_dtype, - is_interval_dtype, - is_datetimelike, - is_datetime64_any_dtype, - is_bool, is_integer_dtype, - is_complex_dtype, - is_bool_dtype, - is_scalar, - is_list_like, - needs_i8_conversion, - _ensure_float64, - _ensure_platform_int, - _ensure_int64, - _ensure_object, - _ensure_categorical, - _ensure_float) -from pandas.types.cast import maybe_downcast_to_dtype -from pandas.types.missing import isnull, notnull, _maybe_fill +from pandas.core.dtypes.common import ( + is_numeric_dtype, + is_timedelta64_dtype, is_datetime64_dtype, + is_categorical_dtype, + is_interval_dtype, + is_datetimelike, + is_datetime64_any_dtype, + is_bool, is_integer_dtype, + is_complex_dtype, + is_bool_dtype, + is_scalar, + is_list_like, + needs_i8_conversion, + _ensure_float64, + _ensure_platform_int, + _ensure_int64, + _ensure_object, + _ensure_categorical, + _ensure_float) +from pandas.core.dtypes.cast import maybe_downcast_to_dtype +from pandas.core.dtypes.missing import isnull, notnull, _maybe_fill from pandas.core.common import (_values_from_object, AbstractMethodError, _default_index) @@ -4079,7 +4080,7 @@ def _apply_to_column_groupbys(self, func): def count(self): """ Compute count of group, excluding missing values """ from functools import partial - from pandas.types.missing import _isnull_ndarraylike as isnull + from pandas.core.dtypes.missing import _isnull_ndarraylike as isnull data, _ = self._get_data_to_aggregate() ids, _, ngroups = self.grouper.group_info diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index dd8fa2d3ddc81..a01e3dc46dfe9 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -4,17 +4,18 @@ import numpy as np from pandas.compat import range, zip import pandas.compat as compat -from pandas.types.generic import ABCDataFrame, ABCPanel, ABCSeries -from pandas.types.common import (is_integer_dtype, - is_integer, is_float, - is_list_like, - is_sequence, - is_iterator, - is_scalar, - is_sparse, - _is_unorderable_exception, - _ensure_platform_int) -from pandas.types.missing import isnull, _infer_fill_value +from pandas.core.dtypes.generic import ABCDataFrame, ABCPanel, ABCSeries +from pandas.core.dtypes.common import ( + is_integer_dtype, + is_integer, is_float, + is_list_like, + is_sequence, + is_iterator, + is_scalar, + is_sparse, + _is_unorderable_exception, + _ensure_platform_int) +from pandas.core.dtypes.missing import isnull, _infer_fill_value from pandas.core.index import Index, MultiIndex diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 57361886eab8c..f7d7efd66f8db 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -9,41 +9,45 @@ from pandas.core.base import PandasObject -from pandas.types.dtypes import (ExtensionDtype, DatetimeTZDtype, - CategoricalDtype) -from pandas.types.common import (_TD_DTYPE, _NS_DTYPE, - _ensure_int64, _ensure_platform_int, - is_integer, - is_dtype_equal, - is_timedelta64_dtype, - is_datetime64_dtype, is_datetimetz, is_sparse, - is_categorical, is_categorical_dtype, - is_integer_dtype, - is_datetime64tz_dtype, - is_object_dtype, - is_datetimelike_v_numeric, - is_float_dtype, is_numeric_dtype, - is_numeric_v_string_like, is_extension_type, - is_list_like, - is_re, - is_re_compilable, - is_scalar, - _get_dtype) -from pandas.types.cast import (maybe_downcast_to_dtype, - maybe_convert_string_to_object, - maybe_upcast, - maybe_convert_scalar, maybe_promote, - infer_dtype_from_scalar, - soft_convert_objects, - maybe_convert_objects, - astype_nansafe, - find_common_type) -from pandas.types.missing import (isnull, array_equivalent, - _is_na_compat, - is_null_datelike_scalar) -import pandas.types.concat as _concat - -from pandas.types.generic import ABCSeries +from pandas.core.dtypes.dtypes import ( + ExtensionDtype, DatetimeTZDtype, + CategoricalDtype) +from pandas.core.dtypes.common import ( + _TD_DTYPE, _NS_DTYPE, + _ensure_int64, _ensure_platform_int, + is_integer, + is_dtype_equal, + is_timedelta64_dtype, + is_datetime64_dtype, is_datetimetz, is_sparse, + is_categorical, is_categorical_dtype, + is_integer_dtype, + is_datetime64tz_dtype, + is_object_dtype, + is_datetimelike_v_numeric, + is_float_dtype, is_numeric_dtype, + is_numeric_v_string_like, is_extension_type, + is_list_like, + is_re, + is_re_compilable, + is_scalar, + _get_dtype) +from pandas.core.dtypes.cast import ( + maybe_downcast_to_dtype, + maybe_convert_string_to_object, + maybe_upcast, + maybe_convert_scalar, maybe_promote, + infer_dtype_from_scalar, + soft_convert_objects, + maybe_convert_objects, + astype_nansafe, + find_common_type) +from pandas.core.dtypes.missing import ( + isnull, array_equivalent, + _is_na_compat, + is_null_datelike_scalar) +import pandas.core.dtypes.concat as _concat + +from pandas.core.dtypes.generic import ABCSeries from pandas.core.common import is_null_slice import pandas.core.algorithms as algos @@ -54,12 +58,12 @@ from pandas.formats.printing import pprint_thing import pandas.core.missing as missing -from pandas.sparse.array import _maybe_to_sparse, SparseArray +from pandas.core.sparse.array import _maybe_to_sparse, SparseArray from pandas._libs import lib, tslib from pandas._libs.tslib import Timedelta from pandas._libs.lib import BlockPlacement -import pandas.computation.expressions as expressions +import pandas.core.computation.expressions as expressions from pandas.util.decorators import cache_readonly from pandas.util.validators import validate_bool_kwarg diff --git a/pandas/core/missing.py b/pandas/core/missing.py index 91039f3270af2..3010348423340 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -8,18 +8,19 @@ from pandas._libs import algos, lib from pandas.compat import range, string_types -from pandas.types.common import (is_numeric_v_string_like, - is_float_dtype, - is_datetime64_dtype, - is_datetime64tz_dtype, - is_integer_dtype, - is_scalar, - is_integer, - needs_i8_conversion, - _ensure_float64) - -from pandas.types.cast import infer_dtype_from_array -from pandas.types.missing import isnull +from pandas.core.dtypes.common import ( + is_numeric_v_string_like, + is_float_dtype, + is_datetime64_dtype, + is_datetime64tz_dtype, + is_integer_dtype, + is_scalar, + is_integer, + needs_i8_conversion, + _ensure_float64) + +from pandas.core.dtypes.cast import infer_dtype_from_array +from pandas.core.dtypes.missing import isnull def mask_missing(arr, values_to_mask): diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 6ec94e69740a2..5ce302967de24 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -11,17 +11,18 @@ from pandas import compat from pandas._libs import tslib, algos, lib -from pandas.types.common import (_get_dtype, - is_float, is_scalar, - is_integer, is_complex, is_float_dtype, - is_complex_dtype, is_integer_dtype, - is_bool_dtype, is_object_dtype, - is_numeric_dtype, - is_datetime64_dtype, is_timedelta64_dtype, - is_datetime_or_timedelta_dtype, - is_int_or_datetime_dtype, is_any_int_dtype) -from pandas.types.cast import _int64_max, maybe_upcast_putmask -from pandas.types.missing import isnull, notnull +from pandas.core.dtypes.common import ( + _get_dtype, + is_float, is_scalar, + is_integer, is_complex, is_float_dtype, + is_complex_dtype, is_integer_dtype, + is_bool_dtype, is_object_dtype, + is_numeric_dtype, + is_datetime64_dtype, is_timedelta64_dtype, + is_datetime_or_timedelta_dtype, + is_int_or_datetime_dtype, is_any_int_dtype) +from pandas.core.dtypes.cast import _int64_max, maybe_upcast_putmask +from pandas.core.dtypes.missing import isnull, notnull from pandas.core.common import _values_from_object diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 9e777fd94de66..50815498f40df 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -16,25 +16,26 @@ from pandas import compat from pandas.util.decorators import Appender -import pandas.computation.expressions as expressions +import pandas.core.computation.expressions as expressions from pandas.compat import bind_method import pandas.core.missing as missing from pandas.errors import PerformanceWarning from pandas.core.common import _values_from_object, _maybe_match_name -from pandas.types.missing import notnull, isnull -from pandas.types.common import (needs_i8_conversion, - is_datetimelike_v_numeric, - is_integer_dtype, is_categorical_dtype, - is_object_dtype, is_timedelta64_dtype, - is_datetime64_dtype, is_datetime64tz_dtype, - is_bool_dtype, is_datetimetz, - is_list_like, - is_scalar, - _ensure_object) -from pandas.types.cast import maybe_upcast_putmask, find_common_type -from pandas.types.generic import ABCSeries, ABCIndex, ABCPeriodIndex +from pandas.core.dtypes.missing import notnull, isnull +from pandas.core.dtypes.common import ( + needs_i8_conversion, + is_datetimelike_v_numeric, + is_integer_dtype, is_categorical_dtype, + is_object_dtype, is_timedelta64_dtype, + is_datetime64_dtype, is_datetime64tz_dtype, + is_bool_dtype, is_datetimetz, + is_list_like, + is_scalar, + _ensure_object) +from pandas.core.dtypes.cast import maybe_upcast_putmask, find_common_type +from pandas.core.dtypes.generic import ABCSeries, ABCIndex, ABCPeriodIndex # ----------------------------------------------------------------------------- # Functions that add arithmetic methods to objects, given arithmetic factory diff --git a/pandas/core/panel.py b/pandas/core/panel.py index 24f4d219fb9ca..76053b3bdb83d 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -6,13 +6,15 @@ import numpy as np import warnings -from pandas.types.cast import (infer_dtype_from_scalar, - maybe_cast_item) -from pandas.types.common import (is_integer, is_list_like, - is_string_like, is_scalar) -from pandas.types.missing import notnull - -import pandas.computation.expressions as expressions +from pandas.core.dtypes.cast import ( + infer_dtype_from_scalar, + maybe_cast_item) +from pandas.core.dtypes.common import ( + is_integer, is_list_like, + is_string_like, is_scalar) +from pandas.core.dtypes.missing import notnull + +import pandas.core.computation.expressions as expressions import pandas.core.common as com import pandas.core.ops as ops import pandas.core.missing as missing diff --git a/pandas/core/reshape.py b/pandas/core/reshape.py index b03c3d77928c7..b3a06d85967f2 100644 --- a/pandas/core/reshape.py +++ b/pandas/core/reshape.py @@ -7,19 +7,20 @@ import numpy as np -from pandas.types.common import (_ensure_platform_int, - is_list_like, is_bool_dtype, - needs_i8_conversion) -from pandas.types.cast import maybe_promote -from pandas.types.missing import notnull -import pandas.types.concat as _concat +from pandas.core.dtypes.common import ( + _ensure_platform_int, + is_list_like, is_bool_dtype, + needs_i8_conversion) +from pandas.core.dtypes.cast import maybe_promote +from pandas.core.dtypes.missing import notnull +import pandas.core.dtypes.concat as _concat from pandas.core.series import Series from pandas.core.frame import DataFrame -from pandas.core.sparse import SparseDataFrame, SparseSeries -from pandas.sparse.array import SparseArray -from pandas.sparse.libsparse import IntIndex +from pandas.core.sparse.api import SparseDataFrame, SparseSeries +from pandas.core.sparse.array import SparseArray +from pandas.core.sparse.libsparse import IntIndex from pandas.core.categorical import Categorical, _factorize_from_iterable from pandas.core.sorting import (get_group_index, get_compressed_ids, diff --git a/pandas/core/series.py b/pandas/core/series.py index 1cf537cf3c315..596dae4345cb3 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -13,26 +13,28 @@ import numpy as np import numpy.ma as ma -from pandas.types.common import (_coerce_to_dtype, is_categorical_dtype, - is_bool, - is_integer, is_integer_dtype, - is_float_dtype, - is_extension_type, is_datetimetz, - is_datetimelike, - is_datetime64tz_dtype, - is_timedelta64_dtype, - is_list_like, - is_hashable, - is_iterator, - is_dict_like, - is_scalar, - _is_unorderable_exception, - _ensure_platform_int) -from pandas.types.generic import ABCSparseArray, ABCDataFrame -from pandas.types.cast import (maybe_upcast, infer_dtype_from_scalar, - maybe_convert_platform, - maybe_cast_to_datetime, maybe_castable) -from pandas.types.missing import isnull, notnull +from pandas.core.dtypes.common import ( + _coerce_to_dtype, is_categorical_dtype, + is_bool, + is_integer, is_integer_dtype, + is_float_dtype, + is_extension_type, is_datetimetz, + is_datetimelike, + is_datetime64tz_dtype, + is_timedelta64_dtype, + is_list_like, + is_hashable, + is_iterator, + is_dict_like, + is_scalar, + _is_unorderable_exception, + _ensure_platform_int) +from pandas.core.dtypes.generic import ABCSparseArray, ABCDataFrame +from pandas.core.dtypes.cast import ( + maybe_upcast, infer_dtype_from_scalar, + maybe_convert_platform, + maybe_cast_to_datetime, maybe_castable) +from pandas.core.dtypes.missing import isnull, notnull from pandas.core.common import (is_bool_indexer, _default_index, @@ -255,7 +257,7 @@ def from_array(cls, arr, index=None, name=None, dtype=None, copy=False, fastpath=False): # return a sparse series here if isinstance(arr, ABCSparseArray): - from pandas.sparse.series import SparseSeries + from pandas.core.sparse.series import SparseSeries cls = SparseSeries return cls(arr, index=index, name=name, dtype=dtype, copy=copy, @@ -1130,7 +1132,7 @@ def to_sparse(self, kind='block', fill_value=None): ------- sp : SparseSeries """ - from pandas.core.sparse import SparseSeries + from pandas.core.sparse.series import SparseSeries return SparseSeries(self, kind=kind, fill_value=fill_value).__finalize__(self) @@ -2867,8 +2869,6 @@ def _sanitize_index(data, index, copy=False): data = data.asobject elif isinstance(data, DatetimeIndex): data = data._to_embed(keep_tz=True) - if copy: - data = data.copy() elif isinstance(data, np.ndarray): # coerce datetimelike types diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py index e56a4f50de134..69b427df981b7 100644 --- a/pandas/core/sorting.py +++ b/pandas/core/sorting.py @@ -3,10 +3,11 @@ import numpy as np from pandas.compat import long from pandas.core.categorical import Categorical -from pandas.types.common import (_ensure_platform_int, - _ensure_int64, - is_categorical_dtype) -from pandas.types.missing import isnull +from pandas.core.dtypes.common import ( + _ensure_platform_int, + _ensure_int64, + is_categorical_dtype) +from pandas.core.dtypes.missing import isnull import pandas.core.algorithms as algorithms from pandas._libs import lib, algos, hashtable from pandas._libs.hashtable import unique_label_indices diff --git a/pandas/core/sparse.py b/pandas/core/sparse.py deleted file mode 100644 index 4fc329844d616..0000000000000 --- a/pandas/core/sparse.py +++ /dev/null @@ -1,10 +0,0 @@ -""" -Data structures for sparse float data. Life is made simpler by dealing only -with float64 data -""" - -# pylint: disable=W0611 -# flake8: noqa - -from pandas.sparse.series import SparseSeries -from pandas.sparse.frame import SparseDataFrame diff --git a/pandas/tests/computation/__init__.py b/pandas/core/sparse/__init__.py similarity index 100% rename from pandas/tests/computation/__init__.py rename to pandas/core/sparse/__init__.py diff --git a/pandas/core/sparse/api.py b/pandas/core/sparse/api.py new file mode 100644 index 0000000000000..f79bb4886da4b --- /dev/null +++ b/pandas/core/sparse/api.py @@ -0,0 +1,6 @@ +# pylint: disable=W0611 +# flake8: noqa +from pandas.core.sparse.array import SparseArray +from pandas.core.sparse.list import SparseList +from pandas.core.sparse.series import SparseSeries +from pandas.core.sparse.frame import SparseDataFrame diff --git a/pandas/sparse/array.py b/pandas/core/sparse/array.py similarity index 97% rename from pandas/sparse/array.py rename to pandas/core/sparse/array.py index f149e724c19c3..74e9be54ae6df 100644 --- a/pandas/sparse/array.py +++ b/pandas/core/sparse/array.py @@ -14,20 +14,23 @@ from pandas.compat import range from pandas.compat.numpy import function as nv -from pandas.types.generic import ABCSparseArray, ABCSparseSeries -from pandas.types.common import (_ensure_platform_int, - is_float, is_integer, - is_integer_dtype, - is_bool_dtype, - is_list_like, - is_string_dtype, - is_scalar, is_dtype_equal) -from pandas.types.cast import (maybe_convert_platform, maybe_promote, - astype_nansafe, find_common_type) -from pandas.types.missing import isnull, notnull, na_value_for_dtype - -from pandas.sparse import libsparse as splib -from pandas.sparse.libsparse import SparseIndex, BlockIndex, IntIndex +from pandas.core.dtypes.generic import ( + ABCSparseArray, ABCSparseSeries) +from pandas.core.dtypes.common import ( + _ensure_platform_int, + is_float, is_integer, + is_integer_dtype, + is_bool_dtype, + is_list_like, + is_string_dtype, + is_scalar, is_dtype_equal) +from pandas.core.dtypes.cast import ( + maybe_convert_platform, maybe_promote, + astype_nansafe, find_common_type) +from pandas.core.dtypes.missing import isnull, notnull, na_value_for_dtype + +from pandas.core.sparse import libsparse as splib +from pandas.core.sparse.libsparse import SparseIndex, BlockIndex, IntIndex from pandas._libs import index as libindex import pandas.core.algorithms as algos import pandas.core.ops as ops diff --git a/pandas/sparse/frame.py b/pandas/core/sparse/frame.py similarity index 99% rename from pandas/sparse/frame.py rename to pandas/core/sparse/frame.py index 455d120cca640..05c97fac4b53a 100644 --- a/pandas/sparse/frame.py +++ b/pandas/core/sparse/frame.py @@ -10,9 +10,9 @@ from pandas import compat import numpy as np -from pandas.types.missing import isnull, notnull -from pandas.types.cast import maybe_upcast, find_common_type -from pandas.types.common import _ensure_platform_int, is_scipy_sparse +from pandas.core.dtypes.missing import isnull, notnull +from pandas.core.dtypes.cast import maybe_upcast, find_common_type +from pandas.core.dtypes.common import _ensure_platform_int, is_scipy_sparse from pandas.core.common import _try_sort from pandas.compat.numpy import function as nv @@ -24,8 +24,8 @@ from pandas.core.internals import (BlockManager, create_block_manager_from_arrays) import pandas.core.generic as generic -from pandas.sparse.series import SparseSeries, SparseArray -from pandas.sparse.libsparse import BlockIndex, get_blocks +from pandas.core.sparse.series import SparseSeries, SparseArray +from pandas.core.sparse.libsparse import BlockIndex, get_blocks from pandas.util.decorators import Appender import pandas.core.ops as ops diff --git a/pandas/sparse/list.py b/pandas/core/sparse/list.py similarity index 96% rename from pandas/sparse/list.py rename to pandas/core/sparse/list.py index 54ebf5e51045d..381a811ac828b 100644 --- a/pandas/sparse/list.py +++ b/pandas/core/sparse/list.py @@ -3,10 +3,10 @@ from pandas.core.base import PandasObject from pandas.formats.printing import pprint_thing -from pandas.types.common import is_scalar -from pandas.sparse.array import SparseArray +from pandas.core.dtypes.common import is_scalar +from pandas.core.sparse.array import SparseArray from pandas.util.validators import validate_bool_kwarg -import pandas.sparse.libsparse as splib +from pandas.core.sparse import libsparse as splib class SparseList(PandasObject): diff --git a/pandas/sparse/scipy_sparse.py b/pandas/core/sparse/scipy_sparse.py similarity index 100% rename from pandas/sparse/scipy_sparse.py rename to pandas/core/sparse/scipy_sparse.py diff --git a/pandas/sparse/series.py b/pandas/core/sparse/series.py similarity index 98% rename from pandas/sparse/series.py rename to pandas/core/sparse/series.py index 7ec42f02c3998..a77bce8f06783 100644 --- a/pandas/sparse/series.py +++ b/pandas/core/sparse/series.py @@ -8,8 +8,8 @@ import numpy as np import warnings -from pandas.types.missing import isnull, notnull -from pandas.types.common import is_scalar +from pandas.core.dtypes.missing import isnull, notnull +from pandas.core.dtypes.common import is_scalar from pandas.core.common import _values_from_object, _maybe_match_name from pandas.compat.numpy import function as nv @@ -23,13 +23,15 @@ import pandas._libs.index as _index from pandas.util.decorators import Appender -from pandas.sparse.array import (make_sparse, _sparse_array_op, SparseArray, - _make_index) -from pandas.sparse.libsparse import BlockIndex, IntIndex -import pandas.sparse.libsparse as splib +from pandas.core.sparse.array import ( + make_sparse, _sparse_array_op, SparseArray, + _make_index) +from pandas.core.sparse.libsparse import BlockIndex, IntIndex +import pandas.core.sparse.libsparse as splib -from pandas.sparse.scipy_sparse import (_sparse_series_to_coo, - _coo_to_sparse_series) +from pandas.core.sparse.scipy_sparse import ( + _sparse_series_to_coo, + _coo_to_sparse_series) _shared_doc_kwargs = dict(axes='index', klass='SparseSeries', @@ -264,7 +266,7 @@ def _constructor(self): @property def _constructor_expanddim(self): - from pandas.sparse.api import SparseDataFrame + from pandas.core.sparse.api import SparseDataFrame return SparseDataFrame @property diff --git a/pandas/sparse/sparse.pyx b/pandas/core/sparse/sparse.pyx similarity index 100% rename from pandas/sparse/sparse.pyx rename to pandas/core/sparse/sparse.pyx diff --git a/pandas/sparse/sparse_op_helper.pxi.in b/pandas/core/sparse/sparse_op_helper.pxi.in similarity index 100% rename from pandas/sparse/sparse_op_helper.pxi.in rename to pandas/core/sparse/sparse_op_helper.pxi.in diff --git a/pandas/core/strings.py b/pandas/core/strings.py index 504d3dd47cc21..5082ac7f80fbf 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -1,16 +1,17 @@ import numpy as np from pandas.compat import zip -from pandas.types.generic import ABCSeries, ABCIndex -from pandas.types.missing import isnull, notnull -from pandas.types.common import (is_bool_dtype, - is_categorical_dtype, - is_object_dtype, - is_string_like, - is_list_like, - is_scalar, - is_integer, - is_re) +from pandas.core.dtypes.generic import ABCSeries, ABCIndex +from pandas.core.dtypes.missing import isnull, notnull +from pandas.core.dtypes.common import ( + is_bool_dtype, + is_categorical_dtype, + is_object_dtype, + is_string_like, + is_list_like, + is_scalar, + is_integer, + is_re) from pandas.core.common import _values_from_object from pandas.core.algorithms import take_1d diff --git a/pandas/core/window.py b/pandas/core/window.py index 5b84b075ce81a..6fdc05a13b773 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -12,20 +12,22 @@ from collections import defaultdict from datetime import timedelta -from pandas.types.generic import (ABCSeries, - ABCDataFrame, - ABCDatetimeIndex, - ABCTimedeltaIndex, - ABCPeriodIndex) -from pandas.types.common import (is_integer, - is_bool, - is_float_dtype, - is_integer_dtype, - needs_i8_conversion, - is_timedelta64_dtype, - is_list_like, - _ensure_float64, - is_scalar) +from pandas.core.dtypes.generic import ( + ABCSeries, + ABCDataFrame, + ABCDatetimeIndex, + ABCTimedeltaIndex, + ABCPeriodIndex) +from pandas.core.dtypes.common import ( + is_integer, + is_bool, + is_float_dtype, + is_integer_dtype, + needs_i8_conversion, + is_timedelta64_dtype, + is_list_like, + _ensure_float64, + is_scalar) import pandas as pd from pandas.core.base import (PandasObject, SelectionMixin, diff --git a/pandas/formats/format.py b/pandas/formats/format.py index 907198d98cf5b..aad6c182416f6 100644 --- a/pandas/formats/format.py +++ b/pandas/formats/format.py @@ -10,20 +10,21 @@ import sys -from pandas.types.missing import isnull, notnull -from pandas.types.common import (is_categorical_dtype, - is_float_dtype, - is_period_arraylike, - is_integer_dtype, - is_interval_dtype, - is_datetimetz, - is_integer, - is_float, - is_numeric_dtype, - is_datetime64_dtype, - is_timedelta64_dtype, - is_list_like) -from pandas.types.generic import ABCSparseArray +from pandas.core.dtypes.missing import isnull, notnull +from pandas.core.dtypes.common import ( + is_categorical_dtype, + is_float_dtype, + is_period_arraylike, + is_integer_dtype, + is_interval_dtype, + is_datetimetz, + is_integer, + is_float, + is_numeric_dtype, + is_datetime64_dtype, + is_timedelta64_dtype, + is_list_like) +from pandas.core.dtypes.generic import ABCSparseArray from pandas.core.base import PandasObject from pandas.core.index import Index, MultiIndex, _ensure_index from pandas import compat diff --git a/pandas/formats/printing.py b/pandas/formats/printing.py index 37bd4b63d6f7a..5ea47df2c817f 100644 --- a/pandas/formats/printing.py +++ b/pandas/formats/printing.py @@ -2,7 +2,7 @@ printing tools """ -from pandas.types.inference import is_sequence +from pandas.core.dtypes.inference import is_sequence from pandas import compat from pandas.compat import u from pandas.core.config import get_option diff --git a/pandas/formats/style.py b/pandas/formats/style.py index af02077bd5b41..3ca1d8259729d 100644 --- a/pandas/formats/style.py +++ b/pandas/formats/style.py @@ -19,7 +19,7 @@ "or `pip install Jinja2`" raise ImportError(msg) -from pandas.types.common import is_float, is_string_like +from pandas.core.dtypes.common import is_float, is_string_like import numpy as np import pandas as pd diff --git a/pandas/indexes/base.py b/pandas/indexes/base.py index 00ad4ca71cb9d..b0439e122ea9e 100644 --- a/pandas/indexes/base.py +++ b/pandas/indexes/base.py @@ -13,27 +13,28 @@ from pandas import compat -from pandas.types.generic import ABCSeries, ABCMultiIndex, ABCPeriodIndex -from pandas.types.missing import isnull, array_equivalent -from pandas.types.common import (_ensure_int64, - _ensure_object, - _ensure_categorical, - _ensure_platform_int, - is_integer, - is_float, - is_dtype_equal, - is_object_dtype, - is_categorical_dtype, - is_interval_dtype, - is_bool_dtype, - is_signed_integer_dtype, - is_unsigned_integer_dtype, - is_integer_dtype, is_float_dtype, - is_datetime64_any_dtype, - is_timedelta64_dtype, - needs_i8_conversion, - is_iterator, is_list_like, - is_scalar) +from pandas.core.dtypes.generic import ABCSeries, ABCMultiIndex, ABCPeriodIndex +from pandas.core.dtypes.missing import isnull, array_equivalent +from pandas.core.dtypes.common import ( + _ensure_int64, + _ensure_object, + _ensure_categorical, + _ensure_platform_int, + is_integer, + is_float, + is_dtype_equal, + is_object_dtype, + is_categorical_dtype, + is_interval_dtype, + is_bool_dtype, + is_signed_integer_dtype, + is_unsigned_integer_dtype, + is_integer_dtype, is_float_dtype, + is_datetime64_any_dtype, + is_timedelta64_dtype, + needs_i8_conversion, + is_iterator, is_list_like, + is_scalar) from pandas.core.common import (is_bool_indexer, _values_from_object, _asarray_tuplesafe) @@ -44,7 +45,7 @@ deprecate, deprecate_kwarg) from pandas.indexes.frozen import FrozenList import pandas.core.common as com -import pandas.types.concat as _concat +import pandas.core.dtypes.concat as _concat import pandas.core.missing as missing import pandas.core.algorithms as algos from pandas.formats.printing import pprint_thing diff --git a/pandas/indexes/category.py b/pandas/indexes/category.py index 6c57b2ed83705..5f9d106189767 100644 --- a/pandas/indexes/category.py +++ b/pandas/indexes/category.py @@ -3,14 +3,15 @@ from pandas import compat from pandas.compat.numpy import function as nv -from pandas.types.generic import ABCCategorical, ABCSeries -from pandas.types.common import (is_categorical_dtype, - _ensure_platform_int, - is_list_like, - is_interval_dtype, - is_scalar) +from pandas.core.dtypes.generic import ABCCategorical, ABCSeries +from pandas.core.dtypes.common import ( + is_categorical_dtype, + _ensure_platform_int, + is_list_like, + is_interval_dtype, + is_scalar) from pandas.core.common import _asarray_tuplesafe -from pandas.types.missing import array_equivalent +from pandas.core.dtypes.missing import array_equivalent from pandas.util.decorators import Appender, cache_readonly diff --git a/pandas/indexes/frozen.py b/pandas/indexes/frozen.py index ab1228c008ca8..19b04319b37f9 100644 --- a/pandas/indexes/frozen.py +++ b/pandas/indexes/frozen.py @@ -10,7 +10,7 @@ import numpy as np from pandas.core.base import PandasObject -from pandas.types.cast import coerce_indexer_dtype +from pandas.core.dtypes.cast import coerce_indexer_dtype from pandas.formats.printing import pprint_thing diff --git a/pandas/indexes/interval.py b/pandas/indexes/interval.py index 63315ef861d12..88a2b0ff9595b 100644 --- a/pandas/indexes/interval.py +++ b/pandas/indexes/interval.py @@ -2,19 +2,20 @@ import numpy as np -from pandas.types.missing import notnull, isnull -from pandas.types.generic import ABCPeriodIndex -from pandas.types.dtypes import IntervalDtype -from pandas.types.common import (_ensure_platform_int, - is_list_like, - is_datetime_or_timedelta_dtype, - is_integer_dtype, - is_object_dtype, - is_categorical_dtype, - is_float_dtype, - is_interval_dtype, - is_scalar, - is_integer) +from pandas.core.dtypes.missing import notnull, isnull +from pandas.core.dtypes.generic import ABCPeriodIndex +from pandas.core.dtypes.dtypes import IntervalDtype +from pandas.core.dtypes.common import ( + _ensure_platform_int, + is_list_like, + is_datetime_or_timedelta_dtype, + is_integer_dtype, + is_object_dtype, + is_categorical_dtype, + is_float_dtype, + is_interval_dtype, + is_scalar, + is_integer) from pandas.indexes.base import (Index, _ensure_index, default_pprint, _index_shared_docs) diff --git a/pandas/indexes/multi.py b/pandas/indexes/multi.py index d1c8e0ba1cc4e..f410dbddb4428 100644 --- a/pandas/indexes/multi.py +++ b/pandas/indexes/multi.py @@ -12,13 +12,14 @@ from pandas.compat.numpy import function as nv from pandas import compat -from pandas.types.common import (_ensure_int64, - _ensure_platform_int, - is_object_dtype, - is_iterator, - is_list_like, - is_scalar) -from pandas.types.missing import isnull, array_equivalent +from pandas.core.dtypes.common import ( + _ensure_int64, + _ensure_platform_int, + is_object_dtype, + is_iterator, + is_list_like, + is_scalar) +from pandas.core.dtypes.missing import isnull, array_equivalent from pandas.errors import PerformanceWarning, UnsortedIndexError from pandas.core.common import (_values_from_object, is_bool_indexer, diff --git a/pandas/indexes/numeric.py b/pandas/indexes/numeric.py index 31258c785d9e8..2f68101520229 100644 --- a/pandas/indexes/numeric.py +++ b/pandas/indexes/numeric.py @@ -1,9 +1,10 @@ import numpy as np from pandas._libs import (index as libindex, algos as libalgos, join as libjoin) -from pandas.types.common import (is_dtype_equal, pandas_dtype, - is_float_dtype, is_object_dtype, - is_integer_dtype, is_scalar) +from pandas.core.dtypes.common import ( + is_dtype_equal, pandas_dtype, + is_float_dtype, is_object_dtype, + is_integer_dtype, is_scalar) from pandas.core.common import _asarray_tuplesafe, _values_from_object from pandas import compat diff --git a/pandas/indexes/range.py b/pandas/indexes/range.py index be68c97fb7890..1eedfcc619aec 100644 --- a/pandas/indexes/range.py +++ b/pandas/indexes/range.py @@ -4,9 +4,10 @@ import numpy as np from pandas._libs import index as libindex -from pandas.types.common import (is_integer, - is_scalar, - is_int64_dtype) +from pandas.core.dtypes.common import ( + is_integer, + is_scalar, + is_int64_dtype) from pandas import compat from pandas.compat import lrange, range diff --git a/pandas/io/common.py b/pandas/io/common.py index 8ee6ded67f790..5cd5a9cd3e8dc 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -10,7 +10,7 @@ from pandas import compat from pandas.formats.printing import pprint_thing from pandas.core.common import AbstractMethodError -from pandas.types.common import is_number, is_file_like +from pandas.core.dtypes.common import is_number, is_file_like # compat from pandas.errors import (ParserError, DtypeWarning, # noqa diff --git a/pandas/io/excel.py b/pandas/io/excel.py index 7f2f0cf4943b8..b19837973a94a 100644 --- a/pandas/io/excel.py +++ b/pandas/io/excel.py @@ -10,8 +10,9 @@ import abc import numpy as np -from pandas.types.common import (is_integer, is_float, - is_bool, is_list_like) +from pandas.core.dtypes.common import ( + is_integer, is_float, + is_bool, is_list_like) from pandas.core.frame import DataFrame from pandas.io.parsers import TextParser diff --git a/pandas/io/html.py b/pandas/io/html.py index 7b58e612de2df..8e5b8def1ea91 100644 --- a/pandas/io/html.py +++ b/pandas/io/html.py @@ -12,7 +12,7 @@ import numpy as np -from pandas.types.common import is_list_like +from pandas.core.dtypes.common import is_list_like from pandas.errors import EmptyDataError from pandas.io.common import (_is_url, urlopen, parse_url, _validate_header_arg) diff --git a/pandas/io/json/json.py b/pandas/io/json/json.py index 114ec4bb2723e..19e84c04b7ddb 100644 --- a/pandas/io/json/json.py +++ b/pandas/io/json/json.py @@ -12,7 +12,7 @@ from pandas.formats.printing import pprint_thing from .normalize import _convert_to_line_delimits from .table_schema import build_table_schema -from pandas.types.common import is_period_dtype +from pandas.core.dtypes.common import is_period_dtype loads = libjson.loads dumps = libjson.dumps diff --git a/pandas/io/json/table_schema.py b/pandas/io/json/table_schema.py index 48f92d28baf61..d8ef3afc9591f 100644 --- a/pandas/io/json/table_schema.py +++ b/pandas/io/json/table_schema.py @@ -3,7 +3,7 @@ http://specs.frictionlessdata.io/json-table-schema/ """ -from pandas.types.common import ( +from pandas.core.dtypes.common import ( is_integer_dtype, is_timedelta64_dtype, is_numeric_dtype, is_bool_dtype, is_datetime64_dtype, is_datetime64tz_dtype, is_categorical_dtype, is_period_dtype, is_string_dtype diff --git a/pandas/io/packers.py b/pandas/io/packers.py index ca5a27ee5b68e..a4b454eda7472 100644 --- a/pandas/io/packers.py +++ b/pandas/io/packers.py @@ -48,16 +48,17 @@ from pandas import compat from pandas.compat import u, u_safe -from pandas.types.common import (is_categorical_dtype, is_object_dtype, - needs_i8_conversion, pandas_dtype) +from pandas.core.dtypes.common import ( + is_categorical_dtype, is_object_dtype, + needs_i8_conversion, pandas_dtype) from pandas import (Timestamp, Period, Series, DataFrame, # noqa Index, MultiIndex, Float64Index, Int64Index, Panel, RangeIndex, PeriodIndex, DatetimeIndex, NaT, Categorical, CategoricalIndex) from pandas._libs.tslib import NaTType -from pandas.sparse.api import SparseSeries, SparseDataFrame -from pandas.sparse.array import BlockIndex, IntIndex +from pandas.core.sparse.api import SparseSeries, SparseDataFrame +from pandas.core.sparse.array import BlockIndex, IntIndex from pandas.core.generic import NDFrame from pandas.errors import PerformanceWarning from pandas.io.common import get_filepath_or_buffer diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index efbf6d64404c0..f2449e3064867 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -15,13 +15,14 @@ from pandas import compat from pandas.compat import (range, lrange, StringIO, lzip, zip, string_types, map, u) -from pandas.types.common import (is_integer, _ensure_object, - is_list_like, is_integer_dtype, - is_float, is_dtype_equal, - is_object_dtype, is_string_dtype, - is_scalar, is_categorical_dtype) -from pandas.types.missing import isnull -from pandas.types.cast import astype_nansafe +from pandas.core.dtypes.common import ( + is_integer, _ensure_object, + is_list_like, is_integer_dtype, + is_float, is_dtype_equal, + is_object_dtype, is_string_dtype, + is_scalar, is_categorical_dtype) +from pandas.core.dtypes.missing import isnull +from pandas.core.dtypes.cast import astype_nansafe from pandas.core.index import Index, MultiIndex, RangeIndex from pandas.core.series import Series from pandas.core.frame import DataFrame diff --git a/pandas/io/parsers.pyx b/pandas/io/parsers.pyx index 4053e726d0a04..2def4dc9dcf24 100644 --- a/pandas/io/parsers.pyx +++ b/pandas/io/parsers.pyx @@ -39,14 +39,15 @@ cimport util import pandas._libs.lib as lib import pandas.compat as compat -from pandas.types.common import (is_categorical_dtype, CategoricalDtype, - is_integer_dtype, is_float_dtype, - is_bool_dtype, is_object_dtype, - is_string_dtype, is_datetime64_dtype, - pandas_dtype) +from pandas.core.dtypes.common import ( + is_categorical_dtype, CategoricalDtype, + is_integer_dtype, is_float_dtype, + is_bool_dtype, is_object_dtype, + is_string_dtype, is_datetime64_dtype, + pandas_dtype) from pandas.core.categorical import Categorical from pandas.core.algorithms import take_1d -from pandas.types.concat import union_categoricals +from pandas.core.dtypes.concat import union_categoricals from pandas import Index import time diff --git a/pandas/io/pickle.py b/pandas/io/pickle.py index 969a2a51cb15d..0f91c407766fb 100644 --- a/pandas/io/pickle.py +++ b/pandas/io/pickle.py @@ -3,7 +3,7 @@ import numpy as np from numpy.lib.format import read_array, write_array from pandas.compat import BytesIO, cPickle as pkl, pickle_compat as pc, PY3 -from pandas.types.common import is_datetime64_dtype, _NS_DTYPE +from pandas.core.dtypes.common import is_datetime64_dtype, _NS_DTYPE from pandas.io.common import _get_handle, _infer_compression diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 802f460ecba07..4771134f3fe5c 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -12,15 +12,16 @@ import warnings import os -from pandas.types.common import (is_list_like, - is_categorical_dtype, - is_timedelta64_dtype, - is_datetime64tz_dtype, - is_datetime64_dtype, - _ensure_object, - _ensure_int64, - _ensure_platform_int) -from pandas.types.missing import array_equivalent +from pandas.core.dtypes.common import ( + is_list_like, + is_categorical_dtype, + is_timedelta64_dtype, + is_datetime64tz_dtype, + is_datetime64_dtype, + _ensure_object, + _ensure_int64, + _ensure_platform_int) +from pandas.core.dtypes.missing import array_equivalent import numpy as np from pandas import (Series, DataFrame, Panel, Panel4D, Index, @@ -29,7 +30,7 @@ DatetimeIndex, TimedeltaIndex) from pandas.core import config from pandas.io.common import _stringify_path -from pandas.sparse.array import BlockIndex, IntIndex +from pandas.core.sparse.array import BlockIndex, IntIndex from pandas.core.base import StringMixin from pandas.formats.printing import adjoin, pprint_thing from pandas.errors import PerformanceWarning @@ -43,7 +44,7 @@ from pandas import compat from pandas.compat import u_safe as u, PY3, range, lrange, string_types, filter from pandas.core.config import get_option -from pandas.computation.pytables import Expr, maybe_expression +from pandas.core.computation.pytables import Expr, maybe_expression from pandas._libs import tslib, algos, lib diff --git a/pandas/io/sql.py b/pandas/io/sql.py index b210baedaaf6d..de47a8ad5401f 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -12,10 +12,11 @@ import numpy as np import pandas._libs.lib as lib -from pandas.types.missing import isnull -from pandas.types.dtypes import DatetimeTZDtype -from pandas.types.common import (is_list_like, is_dict_like, - is_datetime64tz_dtype) +from pandas.core.dtypes.missing import isnull +from pandas.core.dtypes.dtypes import DatetimeTZDtype +from pandas.core.dtypes.common import ( + is_list_like, is_dict_like, + is_datetime64tz_dtype) from pandas.compat import (map, zip, raise_with_traceback, string_types, text_type) diff --git a/pandas/io/stata.py b/pandas/io/stata.py index 1d2951da68086..691582629251a 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -15,8 +15,9 @@ import struct from dateutil.relativedelta import relativedelta -from pandas.types.common import (is_categorical_dtype, is_datetime64_dtype, - _ensure_object) +from pandas.core.dtypes.common import ( + is_categorical_dtype, is_datetime64_dtype, + _ensure_object) from pandas.core.base import StringMixin from pandas.core.categorical import Categorical diff --git a/pandas/plotting/_converter.py b/pandas/plotting/_converter.py index 0aa8cc31646c5..0e51e95057be2 100644 --- a/pandas/plotting/_converter.py +++ b/pandas/plotting/_converter.py @@ -11,12 +11,12 @@ from matplotlib.transforms import nonsingular -from pandas.types.common import (is_float, is_integer, - is_integer_dtype, - is_float_dtype, - is_datetime64_ns_dtype, - is_period_arraylike, - ) +from pandas.core.dtypes.common import ( + is_float, is_integer, + is_integer_dtype, + is_float_dtype, + is_datetime64_ns_dtype, + is_period_arraylike) from pandas.compat import lrange import pandas.compat as compat diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index 3980f5e7f2f61..02f2df4949189 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -11,11 +11,12 @@ from pandas.util.decorators import cache_readonly from pandas.core.base import PandasObject -from pandas.types.common import (is_list_like, - is_integer, - is_number, - is_hashable, - is_iterator) +from pandas.core.dtypes.common import ( + is_list_like, + is_integer, + is_number, + is_hashable, + is_iterator) from pandas.core.common import AbstractMethodError, isnull, _try_sort from pandas.core.generic import _shared_docs, _shared_doc_kwargs from pandas.core.index import Index, MultiIndex diff --git a/pandas/plotting/_misc.py b/pandas/plotting/_misc.py index 2c32a532dd2e2..f09bcef82b45d 100644 --- a/pandas/plotting/_misc.py +++ b/pandas/plotting/_misc.py @@ -5,7 +5,7 @@ import numpy as np from pandas.util.decorators import deprecate_kwarg -from pandas.types.missing import notnull +from pandas.core.dtypes.missing import notnull from pandas.compat import range, lrange, lmap, zip from pandas.formats.printing import pprint_thing diff --git a/pandas/plotting/_style.py b/pandas/plotting/_style.py index 5d6dc7cbcdfc6..8cb4e30e0d91c 100644 --- a/pandas/plotting/_style.py +++ b/pandas/plotting/_style.py @@ -8,7 +8,7 @@ import numpy as np -from pandas.types.common import is_list_like +from pandas.core.dtypes.common import is_list_like from pandas.compat import range, lrange, lmap import pandas.compat as compat from pandas.plotting._compat import _mpl_ge_2_0_0 diff --git a/pandas/plotting/_tools.py b/pandas/plotting/_tools.py index 720f776279869..0c2314087525c 100644 --- a/pandas/plotting/_tools.py +++ b/pandas/plotting/_tools.py @@ -7,7 +7,7 @@ import numpy as np -from pandas.types.common import is_list_like +from pandas.core.dtypes.common import is_list_like from pandas.core.index import Index from pandas.core.series import Series from pandas.compat import range diff --git a/pandas/sparse/api.py b/pandas/sparse/api.py deleted file mode 100644 index 90be0a216535f..0000000000000 --- a/pandas/sparse/api.py +++ /dev/null @@ -1,6 +0,0 @@ -# pylint: disable=W0611 -# flake8: noqa -from pandas.sparse.array import SparseArray -from pandas.sparse.list import SparseList -from pandas.sparse.series import SparseSeries -from pandas.sparse.frame import SparseDataFrame diff --git a/pandas/stats/moments.py b/pandas/stats/moments.py index 914c4c08863a2..f98ffa26e0c2b 100644 --- a/pandas/stats/moments.py +++ b/pandas/stats/moments.py @@ -6,7 +6,7 @@ import warnings import numpy as np -from pandas.types.common import is_scalar +from pandas.core.dtypes.common import is_scalar from pandas.core.api import DataFrame, Series from pandas.util.decorators import Substitution, Appender diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index 02734189ca340..221458e629055 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -30,10 +30,10 @@ class TestPDApi(Base, tm.TestCase): ignored = ['tests', 'locale', 'conftest'] # top-level sub-packages - lib = ['api', 'compat', 'computation', 'core', - 'indexes', 'formats', 'errors', 'pandas', 'plotting', - 'test', 'tools', 'tseries', 'sparse', - 'types', 'util', 'options', 'io'] + lib = ['api', 'compat', 'core', + 'indexes', 'formats', 'errors', 'pandas', + 'plotting', 'test', 'tools', 'tseries', + 'util', 'options', 'io'] # these are already deprecated; awaiting removal deprecated_modules = ['stats', 'datetools', 'parser', diff --git a/pandas/tests/api/test_lib.py b/pandas/tests/api/test_lib.py deleted file mode 100644 index db2c68c6197d7..0000000000000 --- a/pandas/tests/api/test_lib.py +++ /dev/null @@ -1,10 +0,0 @@ -# -*- coding: utf-8 -*- - -from warnings import catch_warnings -import pandas # noqa - - -def test_moved_infer_dtype(): - with catch_warnings(record=True): - e = pandas.lib.infer_dtype('foo') - assert e is not None diff --git a/pandas/tests/api/test_types.py b/pandas/tests/api/test_types.py index 1d05eda88e265..e0267d2990085 100644 --- a/pandas/tests/api/test_types.py +++ b/pandas/tests/api/test_types.py @@ -1,7 +1,9 @@ # -*- coding: utf-8 -*- +from warnings import catch_warnings import numpy as np +import pandas from pandas.core import common as com from pandas.api import types from pandas.util import testing as tm @@ -28,7 +30,7 @@ class TestTypes(Base, tm.TestCase): 'is_dict_like', 'is_iterator', 'is_file_like', 'is_list_like', 'is_hashable', 'is_named_tuple', 'is_sequence', - 'pandas_dtype'] + 'pandas_dtype', 'union_categoricals'] def test_types(self): @@ -61,7 +63,7 @@ def test_deprecation_core_common_array_equivalent(self): def test_deprecation_core_common_moved(self): - # these are in pandas.types.common + # these are in pandas.core.dtypes.common l = ['is_datetime_arraylike', 'is_datetime_or_timedelta_dtype', 'is_datetimelike', @@ -73,7 +75,7 @@ def test_deprecation_core_common_moved(self): 'is_string_like', 'is_string_like_dtype'] - from pandas.types import common as c + from pandas.core.dtypes import common as c for t in l: self.check_deprecation(getattr(com, t), getattr(c, t)) @@ -82,3 +84,10 @@ def test_removed_from_core_common(self): for t in ['is_null_datelike_scalar', 'ensure_float']: self.assertRaises(AttributeError, lambda: getattr(com, t)) + + +def test_moved_infer_dtype(): + + with catch_warnings(record=True): + e = pandas.lib.infer_dtype('foo') + assert e is not None diff --git a/pandas/tests/sparse/__init__.py b/pandas/tests/core/__init__.py similarity index 100% rename from pandas/tests/sparse/__init__.py rename to pandas/tests/core/__init__.py diff --git a/pandas/tests/types/__init__.py b/pandas/tests/core/computation/__init__.py similarity index 100% rename from pandas/tests/types/__init__.py rename to pandas/tests/core/computation/__init__.py diff --git a/pandas/tests/computation/test_compat.py b/pandas/tests/core/computation/test_compat.py similarity index 84% rename from pandas/tests/computation/test_compat.py rename to pandas/tests/core/computation/test_compat.py index 56a7cab730f1f..7b6c0f9c4c9aa 100644 --- a/pandas/tests/computation/test_compat.py +++ b/pandas/tests/core/computation/test_compat.py @@ -4,15 +4,15 @@ import pandas as pd from pandas.util import testing as tm -from pandas.computation.engines import _engines -import pandas.computation.expr as expr -from pandas.computation import _MIN_NUMEXPR_VERSION +from pandas.core.computation.engines import _engines +import pandas.core.computation.expr as expr +from pandas.core.computation import _MIN_NUMEXPR_VERSION def test_compat(): # test we have compat with our version of nu - from pandas.computation import _NUMEXPR_INSTALLED + from pandas.core.computation import _NUMEXPR_INSTALLED try: import numexpr as ne ver = ne.__version__ diff --git a/pandas/tests/computation/test_eval.py b/pandas/tests/core/computation/test_eval.py similarity index 99% rename from pandas/tests/computation/test_eval.py rename to pandas/tests/core/computation/test_eval.py index 78aad90cacf94..1f519174ce210 100644 --- a/pandas/tests/computation/test_eval.py +++ b/pandas/tests/core/computation/test_eval.py @@ -8,23 +8,25 @@ from numpy.random import randn, rand, randint import numpy as np -from pandas.types.common import is_list_like, is_scalar +from pandas.core.dtypes.common import is_list_like, is_scalar import pandas as pd from pandas.core import common as com from pandas.errors import PerformanceWarning from pandas import DataFrame, Series, Panel, date_range from pandas.util.testing import makeCustomDataframe as mkdf -from pandas.computation import pytables -from pandas.computation.engines import _engines, NumExprClobberingError -from pandas.computation.expr import PythonExprVisitor, PandasExprVisitor -from pandas.computation.expressions import _USE_NUMEXPR, _NUMEXPR_INSTALLED -from pandas.computation.ops import (_binary_ops_dict, - _special_case_arith_ops_syms, - _arith_ops_syms, _bool_ops_syms, - _unary_math_ops, _binary_math_ops) - -import pandas.computation.expr as expr +from pandas.core.computation import pytables +from pandas.core.computation.engines import _engines, NumExprClobberingError +from pandas.core.computation.expr import PythonExprVisitor, PandasExprVisitor +from pandas.core.computation.expressions import ( + _USE_NUMEXPR, _NUMEXPR_INSTALLED) +from pandas.core.computation.ops import ( + _binary_ops_dict, + _special_case_arith_ops_syms, + _arith_ops_syms, _bool_ops_syms, + _unary_math_ops, _binary_math_ops) + +import pandas.core.computation.expr as expr import pandas.util.testing as tm from pandas.util.testing import (assert_frame_equal, randbool, assertRaisesRegexp, assert_numpy_array_equal, diff --git a/pandas/types/__init__.py b/pandas/tests/core/dtypes/__init__.py similarity index 100% rename from pandas/types/__init__.py rename to pandas/tests/core/dtypes/__init__.py diff --git a/pandas/tests/types/test_cast.py b/pandas/tests/core/dtypes/test_cast.py similarity index 95% rename from pandas/tests/types/test_cast.py rename to pandas/tests/core/dtypes/test_cast.py index de6ef7af9d7f9..a1490426ebf9d 100644 --- a/pandas/tests/types/test_cast.py +++ b/pandas/tests/core/dtypes/test_cast.py @@ -10,15 +10,18 @@ import numpy as np from pandas import Timedelta, Timestamp, DatetimeIndex -from pandas.types.cast import (maybe_downcast_to_dtype, - maybe_convert_objects, - infer_dtype_from_scalar, - infer_dtype_from_array, - maybe_convert_string_to_object, - maybe_convert_scalar, - find_common_type) -from pandas.types.dtypes import (CategoricalDtype, - DatetimeTZDtype, PeriodDtype) +from pandas.core.dtypes.cast import ( + maybe_downcast_to_dtype, + maybe_convert_objects, + infer_dtype_from_scalar, + infer_dtype_from_array, + maybe_convert_string_to_object, + maybe_convert_scalar, + find_common_type) +from pandas.core.dtypes.dtypes import ( + CategoricalDtype, + DatetimeTZDtype, + PeriodDtype) from pandas.util import testing as tm diff --git a/pandas/tests/types/test_common.py b/pandas/tests/core/dtypes/test_common.py similarity index 92% rename from pandas/tests/types/test_common.py rename to pandas/tests/core/dtypes/test_common.py index 21772bab44d01..1017f93b8241c 100644 --- a/pandas/tests/types/test_common.py +++ b/pandas/tests/core/dtypes/test_common.py @@ -3,8 +3,10 @@ import pytest import numpy as np -from pandas.types.dtypes import DatetimeTZDtype, PeriodDtype, CategoricalDtype -from pandas.types.common import pandas_dtype, is_dtype_equal +from pandas.core.dtypes.dtypes import ( + DatetimeTZDtype, PeriodDtype, CategoricalDtype) +from pandas.core.dtypes.common import ( + pandas_dtype, is_dtype_equal) import pandas.util.testing as tm @@ -87,11 +89,11 @@ def test_dtype_equal_strict(): def get_is_dtype_funcs(): """ - Get all functions in pandas.types.common that + Get all functions in pandas.core.dtypes.common that begin with 'is_' and end with 'dtype' """ - import pandas.types.common as com + import pandas.core.dtypes.common as com fnames = [f for f in dir(com) if (f.startswith('is_') and f.endswith('dtype'))] diff --git a/pandas/tests/types/test_concat.py b/pandas/tests/core/dtypes/test_concat.py similarity index 98% rename from pandas/tests/types/test_concat.py rename to pandas/tests/core/dtypes/test_concat.py index f4faab45f4ba2..e8eb042d78f30 100644 --- a/pandas/tests/types/test_concat.py +++ b/pandas/tests/core/dtypes/test_concat.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- import pandas as pd -import pandas.types.concat as _concat +import pandas.core.dtypes.concat as _concat import pandas.util.testing as tm diff --git a/pandas/tests/types/test_dtypes.py b/pandas/tests/core/dtypes/test_dtypes.py similarity index 96% rename from pandas/tests/types/test_dtypes.py rename to pandas/tests/core/dtypes/test_dtypes.py index 79d9fd84396e7..ec9876df14e3b 100644 --- a/pandas/tests/types/test_dtypes.py +++ b/pandas/tests/core/dtypes/test_dtypes.py @@ -5,15 +5,17 @@ import pandas as pd from pandas import Series, Categorical, IntervalIndex, date_range -from pandas.types.dtypes import (DatetimeTZDtype, PeriodDtype, - IntervalDtype, CategoricalDtype) -from pandas.types.common import (is_categorical_dtype, is_categorical, - is_datetime64tz_dtype, is_datetimetz, - is_period_dtype, is_period, - is_dtype_equal, is_datetime64_ns_dtype, - is_datetime64_dtype, is_interval_dtype, - is_datetime64_any_dtype, is_string_dtype, - _coerce_to_dtype) +from pandas.core.dtypes.dtypes import ( + DatetimeTZDtype, PeriodDtype, + IntervalDtype, CategoricalDtype) +from pandas.core.dtypes.common import ( + is_categorical_dtype, is_categorical, + is_datetime64tz_dtype, is_datetimetz, + is_period_dtype, is_period, + is_dtype_equal, is_datetime64_ns_dtype, + is_datetime64_dtype, is_interval_dtype, + is_datetime64_any_dtype, is_string_dtype, + _coerce_to_dtype) import pandas.util.testing as tm diff --git a/pandas/tests/types/test_generic.py b/pandas/tests/core/dtypes/test_generic.py similarity index 97% rename from pandas/tests/types/test_generic.py rename to pandas/tests/core/dtypes/test_generic.py index 7994aa77bb220..d550b5535cea3 100644 --- a/pandas/tests/types/test_generic.py +++ b/pandas/tests/core/dtypes/test_generic.py @@ -4,7 +4,7 @@ import numpy as np import pandas as pd import pandas.util.testing as tm -from pandas.types import generic as gt +from pandas.core.dtypes import generic as gt class TestABCClasses(tm.TestCase): diff --git a/pandas/tests/types/test_inference.py b/pandas/tests/core/dtypes/test_inference.py similarity index 97% rename from pandas/tests/types/test_inference.py rename to pandas/tests/core/dtypes/test_inference.py index ec61903d3f20c..94d1d21d59d88 100644 --- a/pandas/tests/types/test_inference.py +++ b/pandas/tests/core/dtypes/test_inference.py @@ -18,26 +18,25 @@ DatetimeIndex, TimedeltaIndex, Timestamp, Panel, Period, Categorical) from pandas.compat import u, PY2, PY3, StringIO, lrange -from pandas.types import inference -from pandas.types.common import (is_timedelta64_dtype, - is_timedelta64_ns_dtype, - is_datetime64_dtype, - is_datetime64_ns_dtype, - is_datetime64_any_dtype, - is_datetime64tz_dtype, - is_number, - is_integer, - is_float, - is_bool, - is_scalar, - is_scipy_sparse, - _ensure_int32, - _ensure_categorical) -from pandas.types.missing import isnull +from pandas.core.dtypes import inference +from pandas.core.dtypes.common import ( + is_timedelta64_dtype, + is_timedelta64_ns_dtype, + is_datetime64_dtype, + is_datetime64_ns_dtype, + is_datetime64_any_dtype, + is_datetime64tz_dtype, + is_number, + is_integer, + is_float, + is_bool, + is_scalar, + is_scipy_sparse, + _ensure_int32, + _ensure_categorical) +from pandas.core.dtypes.missing import isnull from pandas.util import testing as tm -from pandas.tests.sparse.test_frame import spmatrix # noqa: F401 - def test_is_sequence(): is_seq = inference.is_sequence diff --git a/pandas/tests/types/test_io.py b/pandas/tests/core/dtypes/test_io.py similarity index 100% rename from pandas/tests/types/test_io.py rename to pandas/tests/core/dtypes/test_io.py diff --git a/pandas/tests/types/test_missing.py b/pandas/tests/core/dtypes/test_missing.py similarity index 98% rename from pandas/tests/types/test_missing.py rename to pandas/tests/core/dtypes/test_missing.py index 31bf2817c8bab..52dec66fe73eb 100644 --- a/pandas/tests/types/test_missing.py +++ b/pandas/tests/core/dtypes/test_missing.py @@ -11,9 +11,10 @@ from pandas._libs.tslib import iNaT from pandas import (NaT, Float64Index, Series, DatetimeIndex, TimedeltaIndex, date_range) -from pandas.types.dtypes import DatetimeTZDtype -from pandas.types.missing import (array_equivalent, isnull, notnull, - na_value_for_dtype) +from pandas.core.dtypes.dtypes import DatetimeTZDtype +from pandas.core.dtypes.missing import ( + array_equivalent, isnull, notnull, + na_value_for_dtype) def test_notnull(): diff --git a/pandas/tests/core/sparse/__init__.py b/pandas/tests/core/sparse/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/tests/core/sparse/common.py b/pandas/tests/core/sparse/common.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/tests/sparse/test_arithmetics.py b/pandas/tests/core/sparse/test_arithmetics.py similarity index 100% rename from pandas/tests/sparse/test_arithmetics.py rename to pandas/tests/core/sparse/test_arithmetics.py diff --git a/pandas/tests/sparse/test_array.py b/pandas/tests/core/sparse/test_array.py similarity index 99% rename from pandas/tests/sparse/test_array.py rename to pandas/tests/core/sparse/test_array.py index 15531cecfe79b..b7b664e7bfb8a 100644 --- a/pandas/tests/sparse/test_array.py +++ b/pandas/tests/core/sparse/test_array.py @@ -7,8 +7,8 @@ import numpy as np from pandas import _np_version_under1p8 -from pandas.sparse.api import SparseArray, SparseSeries -from pandas.sparse.libsparse import IntIndex +from pandas.core.sparse.api import SparseArray, SparseSeries +from pandas.core.sparse.libsparse import IntIndex from pandas.util.testing import assert_almost_equal, assertRaisesRegexp import pandas.util.testing as tm diff --git a/pandas/tests/sparse/test_combine_concat.py b/pandas/tests/core/sparse/test_combine_concat.py similarity index 100% rename from pandas/tests/sparse/test_combine_concat.py rename to pandas/tests/core/sparse/test_combine_concat.py diff --git a/pandas/tests/sparse/test_format.py b/pandas/tests/core/sparse/test_format.py similarity index 100% rename from pandas/tests/sparse/test_format.py rename to pandas/tests/core/sparse/test_format.py diff --git a/pandas/tests/sparse/test_frame.py b/pandas/tests/core/sparse/test_frame.py similarity index 99% rename from pandas/tests/sparse/test_frame.py rename to pandas/tests/core/sparse/test_frame.py index 075d5efcefbe0..adb813a27e7e9 100644 --- a/pandas/tests/sparse/test_frame.py +++ b/pandas/tests/core/sparse/test_frame.py @@ -9,23 +9,22 @@ import pandas as pd from pandas import Series, DataFrame, bdate_range, Panel -from pandas.types.common import (is_bool_dtype, - is_float_dtype, - is_object_dtype, - is_float) +from pandas.core.dtypes.common import ( + is_bool_dtype, + is_float_dtype, + is_object_dtype, + is_float) from pandas.tseries.index import DatetimeIndex from pandas.tseries.offsets import BDay -import pandas.util.testing as tm +from pandas.util import testing as tm from pandas.compat import lrange from pandas import compat -import pandas.sparse.frame as spf +from pandas.core.sparse import frame as spf -from pandas.sparse.libsparse import BlockIndex, IntIndex -from pandas.sparse.api import SparseSeries, SparseDataFrame, SparseArray +from pandas.core.sparse.libsparse import BlockIndex, IntIndex +from pandas.core.sparse.api import SparseSeries, SparseDataFrame, SparseArray from pandas.tests.frame.test_api import SharedWithSparse -from pandas.tests.sparse.common import spmatrix # noqa: F401 - class TestSparseDataFrame(tm.TestCase, SharedWithSparse): klass = SparseDataFrame diff --git a/pandas/tests/sparse/test_groupby.py b/pandas/tests/core/sparse/test_groupby.py similarity index 100% rename from pandas/tests/sparse/test_groupby.py rename to pandas/tests/core/sparse/test_groupby.py diff --git a/pandas/tests/sparse/test_indexing.py b/pandas/tests/core/sparse/test_indexing.py similarity index 100% rename from pandas/tests/sparse/test_indexing.py rename to pandas/tests/core/sparse/test_indexing.py diff --git a/pandas/tests/sparse/test_libsparse.py b/pandas/tests/core/sparse/test_libsparse.py similarity index 99% rename from pandas/tests/sparse/test_libsparse.py rename to pandas/tests/core/sparse/test_libsparse.py index 696d2cf47f4c0..e4c3d6d3050cb 100644 --- a/pandas/tests/sparse/test_libsparse.py +++ b/pandas/tests/core/sparse/test_libsparse.py @@ -7,8 +7,8 @@ from pandas import compat -from pandas.sparse.array import IntIndex, BlockIndex, _make_index -import pandas.sparse.libsparse as splib +from pandas.core.sparse.array import IntIndex, BlockIndex, _make_index +import pandas.core.sparse.libsparse as splib TEST_LENGTH = 20 diff --git a/pandas/tests/sparse/test_list.py b/pandas/tests/core/sparse/test_list.py similarity index 98% rename from pandas/tests/sparse/test_list.py rename to pandas/tests/core/sparse/test_list.py index 8511cd5997368..9f91d73a8228a 100644 --- a/pandas/tests/sparse/test_list.py +++ b/pandas/tests/core/sparse/test_list.py @@ -4,7 +4,7 @@ from numpy import nan import numpy as np -from pandas.sparse.api import SparseList, SparseArray +from pandas.core.sparse.api import SparseList, SparseArray import pandas.util.testing as tm diff --git a/pandas/tests/sparse/test_pivot.py b/pandas/tests/core/sparse/test_pivot.py similarity index 100% rename from pandas/tests/sparse/test_pivot.py rename to pandas/tests/core/sparse/test_pivot.py diff --git a/pandas/tests/sparse/test_series.py b/pandas/tests/core/sparse/test_series.py similarity index 99% rename from pandas/tests/sparse/test_series.py rename to pandas/tests/core/sparse/test_series.py index 83f0237841dbd..0b71dffe1782b 100644 --- a/pandas/tests/sparse/test_series.py +++ b/pandas/tests/core/sparse/test_series.py @@ -14,10 +14,10 @@ from pandas import compat from pandas.tools.util import cartesian_product -import pandas.sparse.frame as spf +import pandas.core.sparse.frame as spf -from pandas.sparse.libsparse import BlockIndex, IntIndex -from pandas.sparse.api import SparseSeries +from pandas.core.sparse.libsparse import BlockIndex, IntIndex +from pandas.core.sparse.api import SparseSeries from pandas.tests.series.test_api import SharedWithSparse diff --git a/pandas/tests/formats/test_format.py b/pandas/tests/formats/test_format.py index 83458c82a3d7c..92f6a600a9e2a 100644 --- a/pandas/tests/formats/test_format.py +++ b/pandas/tests/formats/test_format.py @@ -1420,20 +1420,23 @@ def test_repr_html_wide_multiindex_cols(self): assert '...' in wide_repr def test_repr_html_long(self): - max_rows = get_option('display.max_rows') - h = max_rows - 1 - df = DataFrame({'A': np.arange(1, 1 + h), 'B': np.arange(41, 41 + h)}) - reg_repr = df._repr_html_() - assert '..' not in reg_repr - assert str(41 + max_rows // 2) in reg_repr + with option_context('display.max_rows', 60): + max_rows = get_option('display.max_rows') + h = max_rows - 1 + df = DataFrame({'A': np.arange(1, 1 + h), + 'B': np.arange(41, 41 + h)}) + reg_repr = df._repr_html_() + assert '..' not in reg_repr + assert str(41 + max_rows // 2) in reg_repr - h = max_rows + 1 - df = DataFrame({'A': np.arange(1, 1 + h), 'B': np.arange(41, 41 + h)}) - long_repr = df._repr_html_() - assert '..' in long_repr - assert str(41 + max_rows // 2) not in long_repr - assert u('%d rows ') % h in long_repr - assert u('2 columns') in long_repr + h = max_rows + 1 + df = DataFrame({'A': np.arange(1, 1 + h), + 'B': np.arange(41, 41 + h)}) + long_repr = df._repr_html_() + assert '..' in long_repr + assert str(41 + max_rows // 2) not in long_repr + assert u('%d rows ') % h in long_repr + assert u('2 columns') in long_repr def test_repr_html_float(self): with option_context('display.max_rows', 60): diff --git a/pandas/tests/frame/test_alter_axes.py b/pandas/tests/frame/test_alter_axes.py index f05b6fdd6bc23..9add944d2293e 100644 --- a/pandas/tests/frame/test_alter_axes.py +++ b/pandas/tests/frame/test_alter_axes.py @@ -9,9 +9,10 @@ from pandas.compat import lrange from pandas import (DataFrame, Series, Index, MultiIndex, RangeIndex, date_range, IntervalIndex) -from pandas.types.common import (is_object_dtype, - is_categorical_dtype, - is_interval_dtype) +from pandas.core.dtypes.common import ( + is_object_dtype, + is_categorical_dtype, + is_interval_dtype) import pandas as pd from pandas.util.testing import (assert_series_equal, diff --git a/pandas/tests/frame/test_apply.py b/pandas/tests/frame/test_apply.py index 157cd1cdf1b22..1afb048ad825a 100644 --- a/pandas/tests/frame/test_apply.py +++ b/pandas/tests/frame/test_apply.py @@ -10,7 +10,7 @@ from pandas import (notnull, DataFrame, Series, MultiIndex, date_range, Timestamp, compat) import pandas as pd -from pandas.types.dtypes import CategoricalDtype +from pandas.core.dtypes.dtypes import CategoricalDtype from pandas.util.testing import (assert_series_equal, assert_frame_equal) import pandas.util.testing as tm diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 6d28d3b4dfcd5..508053a6367fa 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -13,7 +13,7 @@ import numpy.ma as ma import numpy.ma.mrecords as mrecords -from pandas.types.common import is_integer_dtype +from pandas.core.dtypes.common import is_integer_dtype from pandas.compat import (lmap, long, zip, range, lrange, lzip, OrderedDict, is_platform_little_endian) from pandas import compat diff --git a/pandas/tests/frame/test_dtypes.py b/pandas/tests/frame/test_dtypes.py index f7d2c1a654cd5..14334dfbeddb3 100644 --- a/pandas/tests/frame/test_dtypes.py +++ b/pandas/tests/frame/test_dtypes.py @@ -7,7 +7,7 @@ from pandas import (DataFrame, Series, date_range, Timedelta, Timestamp, compat, concat, option_context) from pandas.compat import u -from pandas.types.dtypes import DatetimeTZDtype +from pandas.core.dtypes.dtypes import DatetimeTZDtype from pandas.tests.frame.common import TestData from pandas.util.testing import (assert_series_equal, assert_frame_equal, diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py index f0dfc4553886b..b624657ca4b4b 100644 --- a/pandas/tests/frame/test_indexing.py +++ b/pandas/tests/frame/test_indexing.py @@ -20,9 +20,10 @@ from pandas._libs.tslib import iNaT from pandas.tseries.offsets import BDay -from pandas.types.common import (is_float_dtype, - is_integer, - is_scalar) +from pandas.core.dtypes.common import ( + is_float_dtype, + is_integer, + is_scalar) from pandas.util.testing import (assert_almost_equal, assert_numpy_array_equal, assert_series_equal, @@ -1866,7 +1867,7 @@ def test_iloc_duplicates(self): assert_frame_equal(result, expected) def test_iloc_sparse_propegate_fill_value(self): - from pandas.sparse.api import SparseDataFrame + from pandas.core.sparse.api import SparseDataFrame df = SparseDataFrame({'A': [999, 1]}, default_fill_value=999) self.assertTrue(len(df['A'].sp_values) == len(df.iloc[:, 0].sp_values)) diff --git a/pandas/tests/frame/test_query_eval.py b/pandas/tests/frame/test_query_eval.py index f90b37b66d200..a531b86699e90 100644 --- a/pandas/tests/frame/test_query_eval.py +++ b/pandas/tests/frame/test_query_eval.py @@ -19,7 +19,7 @@ makeCustomDataframe as mkdf) import pandas.util.testing as tm -from pandas.computation import _NUMEXPR_INSTALLED +from pandas.core.computation import _NUMEXPR_INSTALLED from pandas.tests.frame.common import TestData @@ -511,7 +511,7 @@ def test_query_syntax_error(self): df.query('i - +', engine=engine, parser=parser) def test_query_scope(self): - from pandas.computation.ops import UndefinedVariableError + from pandas.core.computation.ops import UndefinedVariableError engine, parser = self.engine, self.parser skip_if_no_pandas_parser(parser) @@ -535,7 +535,7 @@ def test_query_scope(self): df.query('@a > b > c', engine=engine, parser=parser) def test_query_doesnt_pickup_local(self): - from pandas.computation.ops import UndefinedVariableError + from pandas.core.computation.ops import UndefinedVariableError engine, parser = self.engine, self.parser n = m = 10 @@ -546,7 +546,7 @@ def test_query_doesnt_pickup_local(self): df.query('sin > 5', engine=engine, parser=parser) def test_query_builtin(self): - from pandas.computation.engines import NumExprClobberingError + from pandas.core.computation.engines import NumExprClobberingError engine, parser = self.engine, self.parser n = m = 10 @@ -624,7 +624,7 @@ def test_nested_scope(self): assert_frame_equal(result, expected) def test_nested_raises_on_local_self_reference(self): - from pandas.computation.ops import UndefinedVariableError + from pandas.core.computation.ops import UndefinedVariableError df = DataFrame(np.random.randn(5, 3)) @@ -683,7 +683,7 @@ def test_at_inside_string(self): assert_frame_equal(result, expected) def test_query_undefined_local(self): - from pandas.computation.ops import UndefinedVariableError + from pandas.core.computation.ops import UndefinedVariableError engine, parser = self.engine, self.parser skip_if_no_pandas_parser(parser) df = DataFrame(np.random.rand(10, 2), columns=list('ab')) @@ -803,7 +803,7 @@ def test_date_index_query_with_NaT_duplicates(self): df.query('index < 20130101 < dates3', engine=engine, parser=parser) def test_nested_scope(self): - from pandas.computation.ops import UndefinedVariableError + from pandas.core.computation.ops import UndefinedVariableError engine = self.engine parser = self.parser # smoke test diff --git a/pandas/tests/groupby/test_bin_groupby.py b/pandas/tests/groupby/test_bin_groupby.py index 02c7933e020ea..289723ed5667a 100644 --- a/pandas/tests/groupby/test_bin_groupby.py +++ b/pandas/tests/groupby/test_bin_groupby.py @@ -3,7 +3,7 @@ from numpy import nan import numpy as np -from pandas.types.common import _ensure_int64 +from pandas.core.dtypes.common import _ensure_int64 from pandas import Index, isnull from pandas.util.testing import assert_almost_equal import pandas.util.testing as tm diff --git a/pandas/tests/groupby/test_transform.py b/pandas/tests/groupby/test_transform.py index 3b85fadda6cfe..541f5d28be421 100644 --- a/pandas/tests/groupby/test_transform.py +++ b/pandas/tests/groupby/test_transform.py @@ -4,7 +4,8 @@ import pandas as pd from pandas.util import testing as tm from pandas import Series, DataFrame, Timestamp, MultiIndex, concat, date_range -from pandas.types.common import _ensure_platform_int, is_timedelta64_dtype +from pandas.core.dtypes.common import ( + _ensure_platform_int, is_timedelta64_dtype) from pandas.compat import StringIO from pandas._libs import groupby from .common import MixIn, assert_fp_equal diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index 54d47d02c5e8e..25214e6b170b5 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -9,7 +9,7 @@ RangeIndex, MultiIndex, CategoricalIndex, DatetimeIndex, TimedeltaIndex, PeriodIndex, IntervalIndex, notnull, isnull) -from pandas.types.common import needs_i8_conversion +from pandas.core.dtypes.common import needs_i8_conversion from pandas.util.testing import assertRaisesRegexp from pandas._libs.tslib import iNaT diff --git a/pandas/tests/indexes/datetimes/test_construction.py b/pandas/tests/indexes/datetimes/test_construction.py index 16881de6e8c39..d4e672d0584cf 100644 --- a/pandas/tests/indexes/datetimes/test_construction.py +++ b/pandas/tests/indexes/datetimes/test_construction.py @@ -12,21 +12,36 @@ class TestDatetimeIndex(tm.TestCase): + def test_construction_caching(self): + + df = pd.DataFrame({'dt': pd.date_range('20130101', periods=3), + 'dttz': pd.date_range('20130101', periods=3, + tz='US/Eastern'), + 'dt_with_null': [pd.Timestamp('20130101'), pd.NaT, + pd.Timestamp('20130103')], + 'dtns': pd.date_range('20130101', periods=3, + freq='ns')}) + assert df.dttz.dtype.tz.zone == 'US/Eastern' + def test_construction_with_alt(self): i = pd.date_range('20130101', periods=5, freq='H', tz='US/Eastern') i2 = DatetimeIndex(i, dtype=i.dtype) self.assert_index_equal(i, i2) + assert i.tz.zone == 'US/Eastern' i2 = DatetimeIndex(i.tz_localize(None).asi8, tz=i.dtype.tz) self.assert_index_equal(i, i2) + assert i.tz.zone == 'US/Eastern' i2 = DatetimeIndex(i.tz_localize(None).asi8, dtype=i.dtype) self.assert_index_equal(i, i2) + assert i.tz.zone == 'US/Eastern' i2 = DatetimeIndex( i.tz_localize(None).asi8, dtype=i.dtype, tz=i.dtype.tz) self.assert_index_equal(i, i2) + assert i.tz.zone == 'US/Eastern' # localize into the provided tz i2 = DatetimeIndex(i.tz_localize(None).asi8, tz='UTC') diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index 1260ee4e5ab07..28fbce43bf983 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -14,7 +14,7 @@ from pandas.tseries.tools import normalize_date from pandas.compat import lmap from pandas.compat.numpy import np_array_datetime64_compat -from pandas.types.common import is_datetime64_ns_dtype +from pandas.core.dtypes.common import is_datetime64_ns_dtype from pandas.util import testing as tm from pandas.util.testing import assert_series_equal, _skip_if_has_locale from pandas import (isnull, to_datetime, Timestamp, Series, DataFrame, diff --git a/pandas/tests/indexing/common.py b/pandas/tests/indexing/common.py index 0f8a9573a233b..51c0889a6f091 100644 --- a/pandas/tests/indexing/common.py +++ b/pandas/tests/indexing/common.py @@ -5,7 +5,7 @@ import numpy as np from pandas.compat import lrange -from pandas.types.common import is_scalar +from pandas.core.dtypes.common import is_scalar from pandas import Series, DataFrame, Panel, date_range, UInt64Index from pandas.util import testing as tm from pandas.formats.printing import pprint_thing diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index 0d6ca383a1be1..53812feaa8da7 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -6,8 +6,9 @@ from warnings import catch_warnings from datetime import datetime -from pandas.types.common import (is_integer_dtype, - is_float_dtype) +from pandas.core.dtypes.common import ( + is_integer_dtype, + is_float_dtype) from pandas.compat import range, lrange, lzip, StringIO import numpy as np diff --git a/pandas/tests/indexing/test_ix.py b/pandas/tests/indexing/test_ix.py index b12d1eb97f88b..6eda8b2b6f631 100644 --- a/pandas/tests/indexing/test_ix.py +++ b/pandas/tests/indexing/test_ix.py @@ -5,7 +5,7 @@ import numpy as np import pandas as pd -from pandas.types.common import is_scalar +from pandas.core.dtypes.common import is_scalar from pandas.compat import lrange from pandas import Series, DataFrame, option_context, MultiIndex from pandas.util import testing as tm diff --git a/pandas/tests/io/json/test_json_table_schema.py b/pandas/tests/io/json/test_json_table_schema.py index d1795f2816817..2a785375acaea 100644 --- a/pandas/tests/io/json/test_json_table_schema.py +++ b/pandas/tests/io/json/test_json_table_schema.py @@ -7,11 +7,14 @@ import pytest from pandas import DataFrame -from pandas.types.dtypes import PeriodDtype, CategoricalDtype, DatetimeTZDtype +from pandas.core.dtypes.dtypes import ( + PeriodDtype, CategoricalDtype, DatetimeTZDtype) import pandas.util.testing as tm from pandas.io.json.table_schema import ( - as_json_table_type, build_table_schema, make_field, set_default_names -) + as_json_table_type, + build_table_schema, + make_field, + set_default_names) class TestBuildSchema(tm.TestCase): diff --git a/pandas/tests/io/parser/dtypes.py b/pandas/tests/io/parser/dtypes.py index 8066718363803..50c9a1bc724fc 100644 --- a/pandas/tests/io/parser/dtypes.py +++ b/pandas/tests/io/parser/dtypes.py @@ -11,7 +11,7 @@ from pandas import DataFrame, Series, Index, MultiIndex, Categorical from pandas.compat import StringIO -from pandas.types.dtypes import CategoricalDtype +from pandas.core.dtypes.dtypes import CategoricalDtype from pandas.errors import ParserWarning diff --git a/pandas/tests/io/test_feather.py b/pandas/tests/io/test_feather.py index 3fad2637ef057..232bb126d9d67 100644 --- a/pandas/tests/io/test_feather.py +++ b/pandas/tests/io/test_feather.py @@ -11,6 +11,7 @@ from pandas.util.testing import assert_frame_equal, ensure_clean +@pytest.mark.single class TestFeather(object): def check_error_on_write(self, df, exc): @@ -52,6 +53,7 @@ def test_basic(self): 'dtns': pd.date_range('20130101', periods=3, freq='ns')}) + assert df.dttz.dtype.tz.zone == 'US/Eastern' self.check_round_trip(df) def test_strided_data_issues(self): diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index b4e8d6a3b972c..ce411bb4d5c4e 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -31,8 +31,9 @@ from datetime import datetime, date, time -from pandas.types.common import (is_object_dtype, is_datetime64_dtype, - is_datetime64tz_dtype) +from pandas.core.dtypes.common import ( + is_object_dtype, is_datetime64_dtype, + is_datetime64tz_dtype) from pandas import DataFrame, Series, Index, MultiIndex, isnull, concat from pandas import date_range, to_datetime, to_timedelta, Timestamp import pandas.compat as compat diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py index db594889c91ee..50d3342c56522 100644 --- a/pandas/tests/io/test_stata.py +++ b/pandas/tests/io/test_stata.py @@ -20,7 +20,7 @@ from pandas.io.stata import (read_stata, StataReader, InvalidColumnName, PossiblePrecisionLoss, StataMissingValue) from pandas._libs.tslib import NaT -from pandas.types.common import is_categorical_dtype +from pandas.core.dtypes.common import is_categorical_dtype class TestStata(tm.TestCase): diff --git a/pandas/tests/plotting/common.py b/pandas/tests/plotting/common.py index d81f73e73ae69..0ffd53b149d7a 100644 --- a/pandas/tests/plotting/common.py +++ b/pandas/tests/plotting/common.py @@ -8,7 +8,7 @@ from pandas import DataFrame, Series from pandas.compat import zip, iteritems from pandas.util.decorators import cache_readonly -from pandas.types.api import is_list_like +from pandas.core.dtypes.api import is_list_like import pandas.util.testing as tm from pandas.util.testing import (ensure_clean, assert_is_valid_plot_return_object) diff --git a/pandas/tests/plotting/test_frame.py b/pandas/tests/plotting/test_frame.py index 404752b567f63..fe07f5b9f193e 100644 --- a/pandas/tests/plotting/test_frame.py +++ b/pandas/tests/plotting/test_frame.py @@ -11,7 +11,7 @@ import pandas as pd from pandas import (Series, DataFrame, MultiIndex, PeriodIndex, date_range, bdate_range) -from pandas.types.api import is_list_like +from pandas.core.dtypes.api import is_list_like from pandas.compat import range, lrange, lmap, lzip, u, zip, PY3 from pandas.formats.printing import pprint_thing import pandas.util.testing as tm diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index f4297208b2e26..24b2a12d70709 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -8,7 +8,9 @@ import numpy.ma as ma import pandas as pd -from pandas.types.common import is_categorical_dtype, is_datetime64tz_dtype +from pandas.core.dtypes.common import ( + is_categorical_dtype, + is_datetime64tz_dtype) from pandas import (Index, Series, isnull, date_range, NaT, period_range, MultiIndex, IntervalIndex) from pandas.tseries.index import Timestamp, DatetimeIndex diff --git a/pandas/tests/series/test_datetime_values.py b/pandas/tests/series/test_datetime_values.py index 89f972a33a630..8825ba5607a20 100644 --- a/pandas/tests/series/test_datetime_values.py +++ b/pandas/tests/series/test_datetime_values.py @@ -6,7 +6,7 @@ import numpy as np import pandas as pd -from pandas.types.common import is_integer_dtype, is_list_like +from pandas.core.dtypes.common import is_integer_dtype, is_list_like from pandas import (Index, Series, DataFrame, bdate_range, date_range, period_range, timedelta_range, PeriodIndex, Timestamp, DatetimeIndex, TimedeltaIndex) diff --git a/pandas/tests/series/test_indexing.py b/pandas/tests/series/test_indexing.py index 48410c1c73479..6c1d77acd70d5 100644 --- a/pandas/tests/series/test_indexing.py +++ b/pandas/tests/series/test_indexing.py @@ -8,7 +8,7 @@ import pandas as pd import pandas._libs.index as _index -from pandas.types.common import is_integer, is_scalar +from pandas.core.dtypes.common import is_integer, is_scalar from pandas import (Index, Series, DataFrame, isnull, date_range, NaT, MultiIndex, Timestamp, DatetimeIndex, Timedelta) diff --git a/pandas/tests/series/test_quantile.py b/pandas/tests/series/test_quantile.py index 5aca34fb86576..339d871b63049 100644 --- a/pandas/tests/series/test_quantile.py +++ b/pandas/tests/series/test_quantile.py @@ -7,7 +7,7 @@ from pandas import (Index, Series, _np_version_under1p9) from pandas.tseries.index import Timestamp -from pandas.types.common import is_integer +from pandas.core.dtypes.common import is_integer import pandas.util.testing as tm from .common import TestData diff --git a/pandas/tests/sparse/common.py b/pandas/tests/sparse/common.py deleted file mode 100644 index 3aeef8d436e1a..0000000000000 --- a/pandas/tests/sparse/common.py +++ /dev/null @@ -1,10 +0,0 @@ -import pytest - -import pandas.util.testing as tm - - -@pytest.fixture(params=['bsr', 'coo', 'csc', 'csr', 'dia', 'dok', 'lil']) -def spmatrix(request): - tm._skip_if_no_scipy() - from scipy import sparse - return getattr(sparse, request.param + '_matrix') diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py index 4a1cf6314aaed..91c06a2c30e50 100644 --- a/pandas/tests/test_base.py +++ b/pandas/tests/test_base.py @@ -9,8 +9,9 @@ import pandas as pd import pandas.compat as compat -from pandas.types.common import (is_object_dtype, is_datetimetz, - needs_i8_conversion) +from pandas.core.dtypes.common import ( + is_object_dtype, is_datetimetz, + needs_i8_conversion) import pandas.util.testing as tm from pandas import (Series, Index, DatetimeIndex, TimedeltaIndex, PeriodIndex, Timedelta, IntervalIndex, Interval) diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py index dd370f0a20c2e..3296673e96316 100644 --- a/pandas/tests/test_categorical.py +++ b/pandas/tests/test_categorical.py @@ -9,10 +9,11 @@ import numpy as np -from pandas.types.dtypes import CategoricalDtype -from pandas.types.common import (is_categorical_dtype, - is_float_dtype, - is_integer_dtype) +from pandas.core.dtypes.dtypes import CategoricalDtype +from pandas.core.dtypes.common import ( + is_categorical_dtype, + is_float_dtype, + is_integer_dtype) import pandas as pd import pandas.compat as compat diff --git a/pandas/tests/test_expressions.py b/pandas/tests/test_expressions.py index dc4787176a0b5..b353f73f4004d 100644 --- a/pandas/tests/test_expressions.py +++ b/pandas/tests/test_expressions.py @@ -12,7 +12,7 @@ import numpy as np from pandas.core.api import DataFrame, Panel -from pandas.computation import expressions as expr +from pandas.core.computation import expressions as expr from pandas import compat, _np_version_under1p11 from pandas.util.testing import (assert_almost_equal, assert_series_equal, assert_frame_equal, assert_panel_equal, diff --git a/pandas/tests/test_generic.py b/pandas/tests/test_generic.py index 118039d1f354c..80059277407c3 100644 --- a/pandas/tests/test_generic.py +++ b/pandas/tests/test_generic.py @@ -11,7 +11,7 @@ import pandas as pd from distutils.version import LooseVersion -from pandas.types.common import is_scalar +from pandas.core.dtypes.common import is_scalar from pandas import (Index, Series, DataFrame, Panel, isnull, date_range, period_range, Panel4D) from pandas.core.index import MultiIndex diff --git a/pandas/tests/test_internals.py b/pandas/tests/test_internals.py index af7c584249416..b18214bbef926 100644 --- a/pandas/tests/test_internals.py +++ b/pandas/tests/test_internals.py @@ -12,7 +12,7 @@ from pandas import (Index, MultiIndex, DataFrame, DatetimeIndex, Series, Categorical) from pandas.compat import OrderedDict, lrange -from pandas.sparse.array import SparseArray +from pandas.core.sparse.array import SparseArray from pandas.core.internals import (BlockPlacement, SingleBlockManager, make_block, BlockManager) import pandas.core.algorithms as algos diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index e3193cddbaaab..648a3b98b245a 100755 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -12,7 +12,7 @@ from pandas import Panel, DataFrame, Series, notnull, isnull, Timestamp from pandas.core.common import UnsortedIndexError -from pandas.types.common import is_float_dtype, is_integer_dtype +from pandas.core.dtypes.common import is_float_dtype, is_integer_dtype import pandas.core.common as com import pandas.util.testing as tm from pandas.compat import (range, lrange, StringIO, lzip, u, product as diff --git a/pandas/tests/test_nanops.py b/pandas/tests/test_nanops.py index 54de8c1e34031..20a9238310ccf 100644 --- a/pandas/tests/test_nanops.py +++ b/pandas/tests/test_nanops.py @@ -6,7 +6,7 @@ import warnings import numpy as np from pandas import Series, isnull, _np_version_under1p9 -from pandas.types.common import is_integer_dtype +from pandas.core.dtypes.common import is_integer_dtype import pandas.core.nanops as nanops import pandas.util.testing as tm diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index bc7bb8a4dfec1..f0e53046e3552 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -9,7 +9,7 @@ import numpy as np import pandas as pd -from pandas.types.common import is_float_dtype +from pandas.core.dtypes.common import is_float_dtype from pandas import (Series, DataFrame, Index, date_range, isnull, notnull, pivot, MultiIndex) from pandas.core.nanops import nanall, nanany diff --git a/pandas/tests/test_panel4d.py b/pandas/tests/test_panel4d.py index c0511581cd299..3af47a2b408bc 100644 --- a/pandas/tests/test_panel4d.py +++ b/pandas/tests/test_panel4d.py @@ -6,7 +6,7 @@ from warnings import catch_warnings import numpy as np -from pandas.types.common import is_float_dtype +from pandas.core.dtypes.common import is_float_dtype from pandas import Series, Index, isnull, notnull from pandas.core.panel import Panel from pandas.core.panel4d import Panel4D diff --git a/pandas/tests/tools/test_merge.py b/pandas/tests/tools/test_merge.py index 8011bc4a1cfc2..cc4a97df33801 100644 --- a/pandas/tests/tools/test_merge.py +++ b/pandas/tests/tools/test_merge.py @@ -12,8 +12,8 @@ from pandas.tools.concat import concat from pandas.tools.merge import merge, MergeError from pandas.util.testing import assert_frame_equal, assert_series_equal -from pandas.types.dtypes import CategoricalDtype -from pandas.types.common import is_categorical_dtype, is_object_dtype +from pandas.core.dtypes.dtypes import CategoricalDtype +from pandas.core.dtypes.common import is_categorical_dtype, is_object_dtype from pandas import DataFrame, Index, MultiIndex, Series, Categorical import pandas.util.testing as tm diff --git a/pandas/tests/tools/test_union_categoricals.py b/pandas/tests/tools/test_union_categoricals.py index 299b60f2a00b0..f9224d0126f6c 100644 --- a/pandas/tests/tools/test_union_categoricals.py +++ b/pandas/tests/tools/test_union_categoricals.py @@ -1,7 +1,7 @@ import numpy as np import pandas as pd from pandas import Categorical, Series, CategoricalIndex -from pandas.types.concat import union_categoricals +from pandas.core.dtypes.concat import union_categoricals from pandas.util import testing as tm diff --git a/pandas/tests/tseries/test_resample.py b/pandas/tests/tseries/test_resample.py index 98664c1ec118c..e81dfd8649e8e 100755 --- a/pandas/tests/tseries/test_resample.py +++ b/pandas/tests/tseries/test_resample.py @@ -12,7 +12,7 @@ from pandas import (Series, DataFrame, Panel, Index, isnull, notnull, Timestamp) -from pandas.types.generic import ABCSeries, ABCDataFrame +from pandas.core.dtypes.generic import ABCSeries, ABCDataFrame from pandas.compat import range, lrange, zip, product, OrderedDict from pandas.core.base import SpecificationError from pandas.errors import UnsupportedFunctionCall diff --git a/pandas/tests/tseries/test_timezones.py b/pandas/tests/tseries/test_timezones.py index 3e1b29f4c282c..125e031b5e3a2 100644 --- a/pandas/tests/tseries/test_timezones.py +++ b/pandas/tests/tseries/test_timezones.py @@ -11,7 +11,7 @@ import pandas.tseries.offsets as offsets from pandas.compat import lrange, zip from pandas.tseries.index import bdate_range, date_range -from pandas.types.dtypes import DatetimeTZDtype +from pandas.core.dtypes.dtypes import DatetimeTZDtype from pandas._libs import tslib from pandas import (Index, Series, DataFrame, isnull, Timestamp, NaT, DatetimeIndex, to_datetime) diff --git a/pandas/tools/concat.py b/pandas/tools/concat.py index 5df9a5abb78b2..af2eb734a02f6 100644 --- a/pandas/tools/concat.py +++ b/pandas/tools/concat.py @@ -12,7 +12,7 @@ from pandas.core.internals import concatenate_block_managers from pandas.core import common as com from pandas.core.generic import NDFrame -import pandas.types.concat as _concat +import pandas.core.dtypes.concat as _concat # --------------------------------------------------------------------- # Concatenate DataFrame objects diff --git a/pandas/tools/hashing.py b/pandas/tools/hashing.py index 85ceb439435ee..275c1c87ea57a 100644 --- a/pandas/tools/hashing.py +++ b/pandas/tools/hashing.py @@ -7,10 +7,14 @@ from pandas import Series, factorize, Categorical, Index, MultiIndex from pandas.tools import libhashing as _hash from pandas._libs.lib import is_bool_array -from pandas.types.generic import ABCIndexClass, ABCSeries, ABCDataFrame -from pandas.types.common import (is_categorical_dtype, is_numeric_dtype, - is_datetime64_dtype, is_timedelta64_dtype, - is_list_like) +from pandas.core.dtypes.generic import ( + ABCIndexClass, + ABCSeries, + ABCDataFrame) +from pandas.core.dtypes.common import ( + is_categorical_dtype, is_numeric_dtype, + is_datetime64_dtype, is_timedelta64_dtype, + is_list_like) # 16 byte long hashing key _default_hash_key = '0123456789123456' diff --git a/pandas/tools/merge.py b/pandas/tools/merge.py index 7de2549cadfc7..53208fbdd5529 100644 --- a/pandas/tools/merge.py +++ b/pandas/tools/merge.py @@ -14,24 +14,25 @@ from pandas import (Categorical, Series, DataFrame, Index, MultiIndex, Timedelta) from pandas.core.frame import _merge_doc -from pandas.types.common import (is_datetime64tz_dtype, - is_datetime64_dtype, - needs_i8_conversion, - is_int64_dtype, - is_categorical_dtype, - is_integer_dtype, - is_float_dtype, - is_numeric_dtype, - is_integer, - is_int_or_datetime_dtype, - is_dtype_equal, - is_bool, - is_list_like, - _ensure_int64, - _ensure_float64, - _ensure_object, - _get_dtype) -from pandas.types.missing import na_value_for_dtype +from pandas.core.dtypes.common import ( + is_datetime64tz_dtype, + is_datetime64_dtype, + needs_i8_conversion, + is_int64_dtype, + is_categorical_dtype, + is_integer_dtype, + is_float_dtype, + is_numeric_dtype, + is_integer, + is_int_or_datetime_dtype, + is_dtype_equal, + is_bool, + is_list_like, + _ensure_int64, + _ensure_float64, + _ensure_object, + _get_dtype) +from pandas.core.dtypes.missing import na_value_for_dtype from pandas.core.internals import (items_overlap_with_suffix, concatenate_block_managers) from pandas.util.decorators import Appender, Substitution diff --git a/pandas/tools/pivot.py b/pandas/tools/pivot.py index e23beb8332fd4..11ca2e548f171 100644 --- a/pandas/tools/pivot.py +++ b/pandas/tools/pivot.py @@ -1,7 +1,7 @@ # pylint: disable=E1103 -from pandas.types.common import is_list_like, is_scalar +from pandas.core.dtypes.common import is_list_like, is_scalar from pandas import Series, DataFrame, MultiIndex, Index, concat from pandas.core.groupby import Grouper from pandas.tools.util import cartesian_product diff --git a/pandas/tools/tile.py b/pandas/tools/tile.py index 2a258d4a7b7e5..746742f47f2aa 100644 --- a/pandas/tools/tile.py +++ b/pandas/tools/tile.py @@ -2,13 +2,14 @@ Quantilization functions and related stuff """ -from pandas.types.missing import isnull -from pandas.types.common import (is_integer, - is_scalar, - is_categorical_dtype, - is_datetime64_dtype, - is_timedelta64_dtype, - _ensure_int64) +from pandas.core.dtypes.missing import isnull +from pandas.core.dtypes.common import ( + is_integer, + is_scalar, + is_categorical_dtype, + is_datetime64_dtype, + is_timedelta64_dtype, + _ensure_int64) import pandas.core.algorithms as algos import pandas.core.nanops as nanops diff --git a/pandas/tools/util.py b/pandas/tools/util.py index 263d2f16a4216..baf968440858d 100644 --- a/pandas/tools/util.py +++ b/pandas/tools/util.py @@ -1,15 +1,16 @@ import numpy as np import pandas._libs.lib as lib -from pandas.types.common import (is_number, - is_numeric_dtype, - is_datetime_or_timedelta_dtype, - is_list_like, - _ensure_object, - is_decimal, - is_scalar as isscalar) - -from pandas.types.cast import maybe_downcast_to_dtype +from pandas.core.dtypes.common import ( + is_number, + is_numeric_dtype, + is_datetime_or_timedelta_dtype, + is_list_like, + _ensure_object, + is_decimal, + is_scalar as isscalar) + +from pandas.core.dtypes.cast import maybe_downcast_to_dtype import pandas as pd from pandas.compat import reduce diff --git a/pandas/tseries/base.py b/pandas/tseries/base.py index 48d236177b474..cf79cadef78dd 100644 --- a/pandas/tseries/base.py +++ b/pandas/tseries/base.py @@ -9,13 +9,15 @@ from pandas.compat.numpy import function as nv import numpy as np -from pandas.types.common import (is_integer, is_float, - is_bool_dtype, _ensure_int64, - is_scalar, is_dtype_equal, - is_list_like) -from pandas.types.generic import (ABCIndex, ABCSeries, - ABCPeriodIndex, ABCIndexClass) -from pandas.types.missing import isnull +from pandas.core.dtypes.common import ( + is_integer, is_float, + is_bool_dtype, _ensure_int64, + is_scalar, is_dtype_equal, + is_list_like) +from pandas.core.dtypes.generic import ( + ABCIndex, ABCSeries, + ABCPeriodIndex, ABCIndexClass) +from pandas.core.dtypes.missing import isnull from pandas.core import common as com, algorithms from pandas.core.algorithms import checked_add_with_arr from pandas.core.common import AbstractMethodError @@ -28,7 +30,7 @@ from pandas.core.index import Index from pandas.indexes.base import _index_shared_docs from pandas.util.decorators import Appender, cache_readonly -import pandas.types.concat as _concat +import pandas.core.dtypes.concat as _concat import pandas.tseries.frequencies as frequencies import pandas.indexes.base as ibase diff --git a/pandas/tseries/common.py b/pandas/tseries/common.py index 955edce2591e6..2154cfd4b2857 100644 --- a/pandas/tseries/common.py +++ b/pandas/tseries/common.py @@ -4,11 +4,12 @@ import numpy as np -from pandas.types.common import (is_period_arraylike, - is_datetime_arraylike, is_integer_dtype, - is_datetime64_dtype, is_datetime64tz_dtype, - is_timedelta64_dtype, is_categorical_dtype, - is_list_like) +from pandas.core.dtypes.common import ( + is_period_arraylike, + is_datetime_arraylike, is_integer_dtype, + is_datetime64_dtype, is_datetime64tz_dtype, + is_timedelta64_dtype, is_categorical_dtype, + is_list_like) from pandas.core.base import PandasDelegate, NoNewAttributesMixin from pandas.tseries.index import DatetimeIndex diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index 8013947babc5a..06d70f1456518 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -6,11 +6,12 @@ import numpy as np -from pandas.types.generic import ABCSeries -from pandas.types.common import (is_integer, - is_period_arraylike, - is_timedelta64_dtype, - is_datetime64_dtype) +from pandas.core.dtypes.generic import ABCSeries +from pandas.core.dtypes.common import ( + is_integer, + is_period_arraylike, + is_timedelta64_dtype, + is_datetime64_dtype) import pandas.core.algorithms as algos from pandas.core.algorithms import unique diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py index 2c14d4f8ea79e..95594652e3943 100644 --- a/pandas/tseries/index.py +++ b/pandas/tseries/index.py @@ -7,24 +7,25 @@ import numpy as np from pandas.core.base import _shared_docs -from pandas.types.common import (_NS_DTYPE, _INT64_DTYPE, - is_object_dtype, is_datetime64_dtype, - is_datetimetz, is_dtype_equal, - is_integer, is_float, - is_integer_dtype, - is_datetime64_ns_dtype, - is_period_dtype, - is_bool_dtype, - is_string_dtype, - is_list_like, - is_scalar, - pandas_dtype, - _ensure_int64) -from pandas.types.generic import ABCSeries -from pandas.types.dtypes import DatetimeTZDtype -from pandas.types.missing import isnull - -import pandas.types.concat as _concat +from pandas.core.dtypes.common import ( + _NS_DTYPE, _INT64_DTYPE, + is_object_dtype, is_datetime64_dtype, + is_datetimetz, is_dtype_equal, + is_integer, is_float, + is_integer_dtype, + is_datetime64_ns_dtype, + is_period_dtype, + is_bool_dtype, + is_string_dtype, + is_list_like, + is_scalar, + pandas_dtype, + _ensure_int64) +from pandas.core.dtypes.generic import ABCSeries +from pandas.core.dtypes.dtypes import DatetimeTZDtype +from pandas.core.dtypes.missing import isnull + +import pandas.core.dtypes.concat as _concat from pandas.errors import PerformanceWarning from pandas.core.common import _values_from_object, _maybe_box diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index 2b6a684fc39dd..a097c56a0ffd3 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -3,7 +3,7 @@ from pandas import compat import numpy as np -from pandas.types.generic import ABCSeries, ABCDatetimeIndex, ABCPeriod +from pandas.core.dtypes.generic import ABCSeries, ABCDatetimeIndex, ABCPeriod from pandas.tseries.tools import to_datetime, normalize_date from pandas.core.common import AbstractMethodError diff --git a/pandas/tseries/period.py b/pandas/tseries/period.py index 7f7b3286fd4f8..66275925ff355 100644 --- a/pandas/tseries/period.py +++ b/pandas/tseries/period.py @@ -5,21 +5,22 @@ from pandas.core import common as com -from pandas.types.common import (is_integer, - is_float, - is_object_dtype, - is_integer_dtype, - is_float_dtype, - is_scalar, - is_datetime64_dtype, - is_datetime64tz_dtype, - is_timedelta64_dtype, - is_period_dtype, - is_bool_dtype, - pandas_dtype, - _ensure_object) -from pandas.types.dtypes import PeriodDtype -from pandas.types.generic import ABCSeries +from pandas.core.dtypes.common import ( + is_integer, + is_float, + is_object_dtype, + is_integer_dtype, + is_float_dtype, + is_scalar, + is_datetime64_dtype, + is_datetime64tz_dtype, + is_timedelta64_dtype, + is_period_dtype, + is_bool_dtype, + pandas_dtype, + _ensure_object) +from pandas.core.dtypes.dtypes import PeriodDtype +from pandas.core.dtypes.generic import ABCSeries import pandas.tseries.frequencies as frequencies from pandas.tseries.frequencies import get_freq_code as _gfc diff --git a/pandas/tseries/tdi.py b/pandas/tseries/tdi.py index d0f373fcc5a45..c26f023ea942a 100644 --- a/pandas/tseries/tdi.py +++ b/pandas/tseries/tdi.py @@ -2,18 +2,19 @@ from datetime import timedelta import numpy as np -from pandas.types.common import (_TD_DTYPE, - is_integer, is_float, - is_bool_dtype, - is_list_like, - is_scalar, - is_integer_dtype, - is_object_dtype, - is_timedelta64_dtype, - is_timedelta64_ns_dtype, - _ensure_int64) -from pandas.types.missing import isnull -from pandas.types.generic import ABCSeries +from pandas.core.dtypes.common import ( + _TD_DTYPE, + is_integer, is_float, + is_bool_dtype, + is_list_like, + is_scalar, + is_integer_dtype, + is_object_dtype, + is_timedelta64_dtype, + is_timedelta64_ns_dtype, + _ensure_int64) +from pandas.core.dtypes.missing import isnull +from pandas.core.dtypes.generic import ABCSeries from pandas.core.common import _maybe_box, _values_from_object, is_bool_indexer from pandas.core.index import Index, Int64Index @@ -24,7 +25,7 @@ from pandas.core.base import _shared_docs from pandas.indexes.base import _index_shared_docs import pandas.core.common as com -import pandas.types.concat as _concat +import pandas.core.dtypes.concat as _concat from pandas.util.decorators import Appender, Substitution, deprecate_kwarg from pandas.tseries.base import TimelikeOps, DatetimeIndexOpsMixin from pandas.tseries.timedeltas import (to_timedelta, diff --git a/pandas/tseries/timedeltas.py b/pandas/tseries/timedeltas.py index ead602ee80e32..fe03f89fdb2c5 100644 --- a/pandas/tseries/timedeltas.py +++ b/pandas/tseries/timedeltas.py @@ -6,11 +6,12 @@ import pandas as pd import pandas._libs.tslib as tslib -from pandas.types.common import (_ensure_object, - is_integer_dtype, - is_timedelta64_dtype, - is_list_like) -from pandas.types.generic import ABCSeries, ABCIndexClass +from pandas.core.dtypes.common import ( + _ensure_object, + is_integer_dtype, + is_timedelta64_dtype, + is_list_like) +from pandas.core.dtypes.generic import ABCSeries, ABCIndexClass def to_timedelta(arg, unit='ns', box=True, errors='raise'): diff --git a/pandas/tseries/tools.py b/pandas/tseries/tools.py index 9d5821d859187..db7aa5974e562 100644 --- a/pandas/tseries/tools.py +++ b/pandas/tseries/tools.py @@ -4,19 +4,21 @@ from pandas._libs import lib, tslib -from pandas.types.common import (_ensure_object, - is_datetime64_ns_dtype, - is_datetime64_dtype, - is_datetime64tz_dtype, - is_integer_dtype, - is_integer, - is_float, - is_list_like, - is_scalar, - is_numeric_dtype) -from pandas.types.generic import (ABCIndexClass, ABCSeries, - ABCDataFrame) -from pandas.types.missing import notnull +from pandas.core.dtypes.common import ( + _ensure_object, + is_datetime64_ns_dtype, + is_datetime64_dtype, + is_datetime64tz_dtype, + is_integer_dtype, + is_integer, + is_float, + is_list_like, + is_scalar, + is_numeric_dtype) +from pandas.core.dtypes.generic import ( + ABCIndexClass, ABCSeries, + ABCDataFrame) +from pandas.core.dtypes.missing import notnull from pandas.core import algorithms import pandas.compat as compat diff --git a/pandas/tseries/util.py b/pandas/tseries/util.py index da3bb075dd02c..5934f5843736c 100644 --- a/pandas/tseries/util.py +++ b/pandas/tseries/util.py @@ -2,7 +2,7 @@ from pandas.compat import lrange import numpy as np -from pandas.types.common import _ensure_platform_int +from pandas.core.dtypes.common import _ensure_platform_int from pandas.core.frame import DataFrame import pandas.core.algorithms as algorithms diff --git a/pandas/util/testing.py b/pandas/util/testing.py index c73cca56f975a..638a190d810a5 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -23,15 +23,16 @@ import numpy as np import pandas as pd -from pandas.types.missing import array_equivalent -from pandas.types.common import (is_datetimelike_v_numeric, - is_datetimelike_v_object, - is_number, is_bool, - needs_i8_conversion, - is_categorical_dtype, - is_interval_dtype, - is_sequence, - is_list_like) +from pandas.core.dtypes.missing import array_equivalent +from pandas.core.dtypes.common import ( + is_datetimelike_v_numeric, + is_datetimelike_v_object, + is_number, is_bool, + needs_i8_conversion, + is_categorical_dtype, + is_interval_dtype, + is_sequence, + is_list_like) from pandas.formats.printing import pprint_thing from pandas.core.algorithms import take_1d @@ -42,7 +43,7 @@ StringIO, PY3 ) -from pandas.computation import expressions as expr +from pandas.core.computation import expressions as expr from pandas import (bdate_range, CategoricalIndex, Categorical, IntervalIndex, DatetimeIndex, TimedeltaIndex, PeriodIndex, RangeIndex, @@ -401,8 +402,9 @@ def _incompat_bottleneck_version(method): def skip_if_no_ne(engine='numexpr'): - from pandas.computation.expressions import (_USE_NUMEXPR, - _NUMEXPR_INSTALLED) + from pandas.core.computation.expressions import ( + _USE_NUMEXPR, + _NUMEXPR_INSTALLED) if engine == 'numexpr': if not _USE_NUMEXPR: @@ -1539,10 +1541,10 @@ def assert_sp_array_equal(left, right, check_dtype=True): check_dtype=check_dtype) # SparseIndex comparison - assertIsInstance(left.sp_index, - pd.sparse.libsparse.SparseIndex, '[SparseIndex]') - assertIsInstance(right.sp_index, - pd.sparse.libsparse.SparseIndex, '[SparseIndex]') + assertIsInstance( + left.sp_index, pd.core.sparse.libsparse.SparseIndex, '[SparseIndex]') + assertIsInstance( + right.sp_index, pd.core.sparse.libsparse.SparseIndex, '[SparseIndex]') if not left.sp_index.equals(right.sp_index): raise_assert_detail('SparseArray.index', 'index are not equal', diff --git a/pandas/util/testing.pyx b/pandas/util/testing.pyx index cda21ba9c4ce1..9495af87f5c31 100644 --- a/pandas/util/testing.pyx +++ b/pandas/util/testing.pyx @@ -1,8 +1,8 @@ import numpy as np from pandas import compat -from pandas.types.missing import isnull, array_equivalent -from pandas.types.common import is_dtype_equal +from pandas.core.dtypes.missing import isnull, array_equivalent +from pandas.core.dtypes.common import is_dtype_equal cdef NUMERIC_TYPES = ( bool, diff --git a/pandas/util/validators.py b/pandas/util/validators.py index f22412a2bcd17..6b19904f4a665 100644 --- a/pandas/util/validators.py +++ b/pandas/util/validators.py @@ -3,7 +3,7 @@ for validating data or function arguments """ -from pandas.types.common import is_bool +from pandas.core.dtypes.common import is_bool def _check_arg_length(fname, args, max_fname_arg_count, compat_args): diff --git a/setup.py b/setup.py index d76c6fa508008..b7c4581c4ecfe 100755 --- a/setup.py +++ b/setup.py @@ -118,7 +118,7 @@ def is_platform_mac(): 'hashtable': ['_libs/hashtable_class_helper.pxi.in', '_libs/hashtable_func_helper.pxi.in'], 'index': ['_libs/index_class_helper.pxi.in'], - 'sparse': ['sparse/sparse_op_helper.pxi.in'], + 'sparse': ['core/sparse/sparse_op_helper.pxi.in'], 'interval': ['_libs/intervaltree.pxi.in'] } @@ -338,7 +338,7 @@ class CheckSDist(sdist_class): 'pandas/_libs/join.pyx', 'pandas/_libs/interval.pyx', 'pandas/core/window.pyx', - 'pandas/sparse/sparse.pyx', + 'pandas/core/sparse/sparse.pyx', 'pandas/util/testing.pyx', 'pandas/tools/hash.pyx', 'pandas/io/parsers.pyx', @@ -523,8 +523,8 @@ def pxd(name): 'pandas/_libs/src/numpy_helper.h'], 'sources': ['pandas/_libs/src/parser/tokenizer.c', 'pandas/_libs/src/parser/io.c']}, - 'sparse.libsparse': {'pyxfile': 'sparse/sparse', - 'depends': (['pandas/sparse/sparse.pyx'] + + 'core.sparse.libsparse': {'pyxfile': 'core/sparse/sparse', + 'depends': (['pandas/core/sparse/sparse.pyx'] + _pxi_dep['sparse'])}, 'util.libtesting': {'pyxfile': 'util/testing', 'depends': ['pandas/util/testing.pyx']}, @@ -636,11 +636,12 @@ def pxd(name): packages=['pandas', 'pandas.api', 'pandas.api.types', - 'pandas.api.lib', 'pandas.compat', 'pandas.compat.numpy', - 'pandas.computation', 'pandas.core', + 'pandas.core.dtypes', + 'pandas.core.computation', + 'pandas.core.sparse', 'pandas.indexes', 'pandas.errors', 'pandas.io', @@ -650,12 +651,13 @@ def pxd(name): 'pandas._libs', 'pandas.formats', 'pandas.plotting', - 'pandas.sparse', 'pandas.stats', 'pandas.util', 'pandas.tests', 'pandas.tests.api', - 'pandas.tests.computation', + 'pandas.tests.core.dtypes', + 'pandas.tests.core.computation', + 'pandas.tests.core.sparse', 'pandas.tests.frame', 'pandas.tests.indexes', 'pandas.tests.indexes.datetimes', @@ -670,14 +672,11 @@ def pxd(name): 'pandas.tests.series', 'pandas.tests.formats', 'pandas.tests.scalar', - 'pandas.tests.sparse', 'pandas.tests.tseries', 'pandas.tests.tools', - 'pandas.tests.types', 'pandas.tests.plotting', 'pandas.tools', 'pandas.tseries', - 'pandas.types', 'pandas.util.clipboard' ], package_data={'pandas.tests': ['data/*.csv'], diff --git a/vb_suite/binary_ops.py b/vb_suite/binary_ops.py index 7c821374a83ab..edc29bf3eec37 100644 --- a/vb_suite/binary_ops.py +++ b/vb_suite/binary_ops.py @@ -21,7 +21,7 @@ start_date=datetime(2012, 1, 1)) setup = common_setup + """ -import pandas.computation.expressions as expr +import pandas.core.computation.expressions as expr df = DataFrame(np.random.randn(20000, 100)) df2 = DataFrame(np.random.randn(20000, 100)) expr.set_numexpr_threads(1) @@ -32,7 +32,7 @@ start_date=datetime(2013, 2, 26)) setup = common_setup + """ -import pandas.computation.expressions as expr +import pandas.core.computation.expressions as expr df = DataFrame(np.random.randn(20000, 100)) df2 = DataFrame(np.random.randn(20000, 100)) expr.set_use_numexpr(False) @@ -53,7 +53,7 @@ start_date=datetime(2012, 1, 1)) setup = common_setup + """ -import pandas.computation.expressions as expr +import pandas.core.computation.expressions as expr df = DataFrame(np.random.randn(20000, 100)) df2 = DataFrame(np.random.randn(20000, 100)) expr.set_numexpr_threads(1) @@ -63,7 +63,7 @@ start_date=datetime(2013, 2, 26)) setup = common_setup + """ -import pandas.computation.expressions as expr +import pandas.core.computation.expressions as expr df = DataFrame(np.random.randn(20000, 100)) df2 = DataFrame(np.random.randn(20000, 100)) expr.set_use_numexpr(False) @@ -129,7 +129,7 @@ start_date=datetime(2012, 1, 1)) setup = common_setup + """ -import pandas.computation.expressions as expr +import pandas.core.computation.expressions as expr df = DataFrame(np.random.randn(20000, 100)) df2 = DataFrame(np.random.randn(20000, 100)) expr.set_numexpr_threads(1) @@ -139,7 +139,7 @@ start_date=datetime(2013, 2, 26)) setup = common_setup + """ -import pandas.computation.expressions as expr +import pandas.core.computation.expressions as expr df = DataFrame(np.random.randn(20000, 100)) df2 = DataFrame(np.random.randn(20000, 100)) expr.set_use_numexpr(False) diff --git a/vb_suite/eval.py b/vb_suite/eval.py index bf80aad956184..011669256a9bc 100644 --- a/vb_suite/eval.py +++ b/vb_suite/eval.py @@ -10,7 +10,7 @@ """ setup = common_setup + """ -import pandas.computation.expressions as expr +import pandas.core.computation.expressions as expr expr.set_numexpr_threads(1) """ diff --git a/vb_suite/indexing.py b/vb_suite/indexing.py index 3d95d52dccd71..ff634bf2a8fc7 100644 --- a/vb_suite/indexing.py +++ b/vb_suite/indexing.py @@ -141,7 +141,7 @@ setup = common_setup + """ try: - import pandas.computation.expressions as expr + import pandas.core.computation.expressions as expr except: expr = None @@ -159,7 +159,7 @@ setup = common_setup + """ try: - import pandas.computation.expressions as expr + import pandas.core.computation.expressions as expr except: expr = None diff --git a/vb_suite/sparse.py b/vb_suite/sparse.py index 53e2778ee0865..b1c1a2f24e41d 100644 --- a/vb_suite/sparse.py +++ b/vb_suite/sparse.py @@ -55,11 +55,11 @@ setup = common_setup + """ import scipy.sparse -import pandas.sparse.series +import pandas.core.sparse.series A = scipy.sparse.coo_matrix(([3.0, 1.0, 2.0], ([1, 0, 0], [0, 2, 3])), shape=(100, 100)) """ -stmt = "ss = pandas.sparse.series.SparseSeries.from_coo(A)" +stmt = "ss = pandas.core.sparse.series.SparseSeries.from_coo(A)" sparse_series_from_coo = Benchmark(stmt, setup, name="sparse_series_from_coo", start_date=datetime(2015, 1, 3)) From 3119e909f7902a8d5f6d588e645e8744578afda9 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sun, 16 Apr 2017 00:51:29 +0000 Subject: [PATCH 397/933] COMPAT: use the correct dtype for interval comparisons on 32-bit (#16011) --- pandas/tests/indexes/test_interval.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/indexes/test_interval.py b/pandas/tests/indexes/test_interval.py index 79b6ff2e7a2a7..2d0015a5258ed 100644 --- a/pandas/tests/indexes/test_interval.py +++ b/pandas/tests/indexes/test_interval.py @@ -779,8 +779,8 @@ def test_get_loc_closed(self): np.array([0], dtype='int64')) def test_get_indexer_closed(self): - x = np.arange(1000, dtype='intp') - found = x + x = np.arange(1000, dtype='float64') + found = x.astype('intp') not_found = (-1 * np.ones(1000)).astype('intp') for leaf_size in [1, 10, 100, 10000]: for closed in ['left', 'right', 'both', 'neither']: From 39d7b113bc6e44a8883f3bf6a7cb6927cee15a07 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Sun, 16 Apr 2017 13:18:45 +0200 Subject: [PATCH 398/933] CLN: updates to benchmarks after repo reorg (#16020) --- asv_bench/benchmarks/binary_ops.py | 5 ++++- asv_bench/benchmarks/categoricals.py | 7 +++++-- asv_bench/benchmarks/eval.py | 5 ++++- asv_bench/benchmarks/indexing.py | 4 ---- asv_bench/benchmarks/plotting.py | 5 ++++- asv_bench/benchmarks/reshape.py | 2 +- asv_bench/benchmarks/sparse.py | 6 ++---- asv_bench/benchmarks/timeseries.py | 5 ++++- 8 files changed, 24 insertions(+), 15 deletions(-) diff --git a/asv_bench/benchmarks/binary_ops.py b/asv_bench/benchmarks/binary_ops.py index cc869996b49cd..0ca21b929ea17 100644 --- a/asv_bench/benchmarks/binary_ops.py +++ b/asv_bench/benchmarks/binary_ops.py @@ -1,5 +1,8 @@ from .pandas_vb_common import * -import pandas.core.computation.expressions as expr +try: + import pandas.core.computation.expressions as expr +except ImportError: + import pandas.computation.expressions as expr class Ops(object): diff --git a/asv_bench/benchmarks/categoricals.py b/asv_bench/benchmarks/categoricals.py index 5b0dd126acdea..6432ccfb19efe 100644 --- a/asv_bench/benchmarks/categoricals.py +++ b/asv_bench/benchmarks/categoricals.py @@ -1,8 +1,11 @@ from .pandas_vb_common import * try: - from pandas.core.dtypes.concat import union_categoricals + from pandas.api.types import union_categoricals except ImportError: - pass + try: + from pandas.types.concat import union_categoricals + except ImportError: + pass class Categoricals(object): diff --git a/asv_bench/benchmarks/eval.py b/asv_bench/benchmarks/eval.py index ee091e57c6403..6f33590ee9e33 100644 --- a/asv_bench/benchmarks/eval.py +++ b/asv_bench/benchmarks/eval.py @@ -1,6 +1,9 @@ from .pandas_vb_common import * import pandas as pd -import pandas.core.computation.expressions as expr +try: + import pandas.core.computation.expressions as expr +except ImportError: + import pandas.computation.expressions as expr class Eval(object): diff --git a/asv_bench/benchmarks/indexing.py b/asv_bench/benchmarks/indexing.py index 79844414f2746..8947a0fdd796c 100644 --- a/asv_bench/benchmarks/indexing.py +++ b/asv_bench/benchmarks/indexing.py @@ -1,8 +1,4 @@ from .pandas_vb_common import * -try: - import pandas.core.computation.expressions as expr -except: - expr = None class Int64Indexing(object): diff --git a/asv_bench/benchmarks/plotting.py b/asv_bench/benchmarks/plotting.py index 757c3e27dd333..dda684b35e301 100644 --- a/asv_bench/benchmarks/plotting.py +++ b/asv_bench/benchmarks/plotting.py @@ -4,7 +4,10 @@ except ImportError: def date_range(start=None, end=None, periods=None, freq=None): return DatetimeIndex(start, end, periods=periods, offset=freq) -from pandas.tools.plotting import andrews_curves +try: + from pandas.plotting import andrews_curves +except ImportError: + from pandas.tools.plotting import andrews_curves class TimeseriesPlotting(object): diff --git a/asv_bench/benchmarks/reshape.py b/asv_bench/benchmarks/reshape.py index b9346c497b9ef..177e3e7cb87fa 100644 --- a/asv_bench/benchmarks/reshape.py +++ b/asv_bench/benchmarks/reshape.py @@ -1,5 +1,5 @@ from .pandas_vb_common import * -from pandas.core.reshape import melt, wide_to_long +from pandas import melt, wide_to_long class melt_dataframe(object): diff --git a/asv_bench/benchmarks/sparse.py b/asv_bench/benchmarks/sparse.py index 7d424592ed877..500149b89b08b 100644 --- a/asv_bench/benchmarks/sparse.py +++ b/asv_bench/benchmarks/sparse.py @@ -1,8 +1,6 @@ from .pandas_vb_common import * -import pandas.core.sparse.series import scipy.sparse -from pandas.core.sparse import SparseSeries, SparseDataFrame -from pandas.core.sparse import SparseDataFrame +from pandas import SparseSeries, SparseDataFrame class sparse_series_to_frame(object): @@ -37,7 +35,7 @@ def setup(self): self.A = scipy.sparse.coo_matrix(([3.0, 1.0, 2.0], ([1, 0, 0], [0, 2, 3])), shape=(100, 100)) def time_sparse_series_from_coo(self): - self.ss = pandas.core.sparse.series.SparseSeries.from_coo(self.A) + self.ss = SparseSeries.from_coo(self.A) class sparse_series_to_coo(object): diff --git a/asv_bench/benchmarks/timeseries.py b/asv_bench/benchmarks/timeseries.py index dfe3f0ef87c11..b63b3386a7563 100644 --- a/asv_bench/benchmarks/timeseries.py +++ b/asv_bench/benchmarks/timeseries.py @@ -1,4 +1,7 @@ -from pandas.tseries.converter import DatetimeConverter +try: + from pandas.plotting._converter import DatetimeConverter +except ImportError: + from pandas.tseries.converter import DatetimeConverter from .pandas_vb_common import * import pandas as pd from datetime import timedelta From 1f812e37f33bf79beacbbfd2b1e0fa38958006e2 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sun, 16 Apr 2017 13:01:17 +0000 Subject: [PATCH 399/933] CLN: move pandas.formats -> pandas.io.formats (#16013) --- MANIFEST.in | 2 +- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/core/api.py | 2 +- pandas/core/categorical.py | 4 ++-- pandas/core/computation/engines.py | 2 +- pandas/core/computation/eval.py | 2 +- pandas/core/computation/expr.py | 2 +- pandas/core/computation/ops.py | 2 +- pandas/core/computation/pytables.py | 2 +- pandas/core/config.py | 4 ++-- pandas/core/config_init.py | 2 +- pandas/core/frame.py | 10 +++++----- pandas/core/generic.py | 4 ++-- pandas/core/groupby.py | 2 +- pandas/core/internals.py | 6 +++--- pandas/core/panel.py | 2 +- pandas/core/series.py | 2 +- pandas/core/sparse/array.py | 2 +- pandas/core/sparse/list.py | 2 +- pandas/indexes/base.py | 6 +++--- pandas/indexes/frozen.py | 2 +- pandas/indexes/interval.py | 2 +- pandas/indexes/multi.py | 4 ++-- pandas/indexes/numeric.py | 2 +- pandas/io/api.py | 2 +- pandas/io/common.py | 2 +- pandas/io/excel.py | 2 +- pandas/{ => io}/formats/__init__.py | 0 pandas/{ => io}/formats/format.py | 2 +- pandas/{ => io}/formats/printing.py | 0 pandas/{ => io}/formats/style.py | 2 +- pandas/{ => io}/formats/templates/html.tpl | 0 pandas/io/html.py | 2 +- pandas/io/json/json.py | 2 +- pandas/io/pytables.py | 2 +- pandas/plotting/_core.py | 2 +- pandas/plotting/_misc.py | 2 +- pandas/plotting/_timeseries.py | 2 +- pandas/tests/api/test_api.py | 2 +- pandas/tests/frame/test_analytics.py | 2 +- pandas/tests/frame/test_operators.py | 2 +- pandas/tests/frame/test_repr_info.py | 2 +- pandas/tests/groupby/test_aggregate.py | 2 +- pandas/tests/indexing/common.py | 2 +- pandas/tests/{ => io}/formats/__init__.py | 0 pandas/tests/{ => io}/formats/data/unicode_series.csv | 0 pandas/tests/{ => io}/formats/test_eng_formatting.py | 2 +- pandas/tests/{ => io}/formats/test_format.py | 6 +++--- pandas/tests/{ => io}/formats/test_printing.py | 4 ++-- pandas/tests/{ => io}/formats/test_style.py | 2 +- pandas/tests/{ => io}/formats/test_to_csv.py | 0 pandas/tests/{ => io}/formats/test_to_html.py | 2 +- pandas/tests/{ => io}/formats/test_to_latex.py | 0 pandas/tests/io/test_excel.py | 4 ++-- pandas/tests/io/test_pytables.py | 2 +- pandas/tests/plotting/test_frame.py | 2 +- pandas/tests/series/test_api.py | 2 +- pandas/tests/test_expressions.py | 2 +- pandas/tests/test_generic.py | 2 +- pandas/tests/test_panel.py | 2 +- pandas/tseries/base.py | 2 +- pandas/tseries/index.py | 6 +++--- pandas/tseries/tdi.py | 4 ++-- pandas/util/testing.py | 2 +- setup.py | 4 ++-- 65 files changed, 78 insertions(+), 77 deletions(-) rename pandas/{ => io}/formats/__init__.py (100%) rename pandas/{ => io}/formats/format.py (99%) rename pandas/{ => io}/formats/printing.py (100%) rename pandas/{ => io}/formats/style.py (99%) rename pandas/{ => io}/formats/templates/html.tpl (100%) rename pandas/tests/{ => io}/formats/__init__.py (100%) rename pandas/tests/{ => io}/formats/data/unicode_series.csv (100%) rename pandas/tests/{ => io}/formats/test_eng_formatting.py (99%) rename pandas/tests/{ => io}/formats/test_format.py (99%) rename pandas/tests/{ => io}/formats/test_printing.py (98%) rename pandas/tests/{ => io}/formats/test_style.py (99%) rename pandas/tests/{ => io}/formats/test_to_csv.py (100%) rename pandas/tests/{ => io}/formats/test_to_html.py (99%) rename pandas/tests/{ => io}/formats/test_to_latex.py (100%) diff --git a/MANIFEST.in b/MANIFEST.in index 31de3466cb357..8bd83a7d56948 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -25,4 +25,4 @@ global-exclude *.png # recursive-include LICENSES * include versioneer.py include pandas/_version.py -include pandas/formats/templates/*.tpl +include pandas/io/formats/templates/*.tpl diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 33d80f8347b0a..08208973b70d2 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -1331,6 +1331,7 @@ If indicated, a deprecation warning will be issued if you reference theses modul "pandas.hashtable", "pandas._libs.hashtable", "" "pandas.json", "pandas.io.json.libjson", "X" "pandas.parser", "pandas.io.libparsers", "X" + "pandas.formats", "pandas.io.formats", "" "pandas.sparse", "pandas.core.sparse", "" "pandas.types", "pandas.core.dtypes", "" "pandas.io.sas.saslib", "pandas.io.sas.libsas", "" diff --git a/pandas/core/api.py b/pandas/core/api.py index 8e8969e1f6b26..3c739d85d0074 100644 --- a/pandas/core/api.py +++ b/pandas/core/api.py @@ -8,7 +8,7 @@ from pandas.core.dtypes.missing import isnull, notnull from pandas.core.categorical import Categorical from pandas.core.groupby import Grouper -from pandas.formats.format import set_eng_float_format +from pandas.io.formats.format import set_eng_float_format from pandas.core.index import (Index, CategoricalIndex, Int64Index, UInt64Index, RangeIndex, Float64Index, MultiIndex, IntervalIndex) diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index d1f060113cf1d..50181486d8cf7 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -1615,7 +1615,7 @@ def _repr_categories(self): """ return the base repr for the categories """ max_categories = (10 if get_option("display.max_categories") == 0 else get_option("display.max_categories")) - from pandas.formats import format as fmt + from pandas.io.formats import format as fmt if len(self.categories) > max_categories: num = max_categories // 2 head = fmt.format_array(self.categories[:num], None) @@ -1663,7 +1663,7 @@ def _repr_footer(self): return u('Length: %d\n%s') % (len(self), self._repr_categories_info()) def _get_repr(self, length=True, na_rep='NaN', footer=True): - from pandas.formats import format as fmt + from pandas.io.formats import format as fmt formatter = fmt.CategoricalFormatter(self, length=length, na_rep=na_rep, footer=footer) result = formatter.to_string() diff --git a/pandas/core/computation/engines.py b/pandas/core/computation/engines.py index 675a3d5eca792..f45d0355e7442 100644 --- a/pandas/core/computation/engines.py +++ b/pandas/core/computation/engines.py @@ -6,7 +6,7 @@ from pandas import compat from pandas.compat import map -import pandas.formats.printing as printing +import pandas.io.formats.printing as printing from pandas.core.computation.align import _align, _reconstruct_object from pandas.core.computation.ops import ( UndefinedVariableError, diff --git a/pandas/core/computation/eval.py b/pandas/core/computation/eval.py index fc3986e317d13..15e13025a7c53 100644 --- a/pandas/core/computation/eval.py +++ b/pandas/core/computation/eval.py @@ -5,7 +5,7 @@ import warnings import tokenize -from pandas.formats.printing import pprint_thing +from pandas.io.formats.printing import pprint_thing from pandas.core.computation import _NUMEXPR_INSTALLED from pandas.core.computation.expr import Expr, _parsers, tokenize_string from pandas.core.computation.scope import _ensure_scope diff --git a/pandas/core/computation/expr.py b/pandas/core/computation/expr.py index 01c5d1f6f100c..51785ebcd9ec8 100644 --- a/pandas/core/computation/expr.py +++ b/pandas/core/computation/expr.py @@ -12,7 +12,7 @@ from pandas.compat import StringIO, lmap, zip, reduce, string_types from pandas.core.base import StringMixin from pandas.core import common as com -import pandas.formats.printing as printing +import pandas.io.formats.printing as printing from pandas.tools.util import compose from pandas.core.computation.ops import ( _cmp_ops_syms, _bool_ops_syms, diff --git a/pandas/core/computation/ops.py b/pandas/core/computation/ops.py index 91c414bbc0ec1..7ba2c16530cad 100644 --- a/pandas/core/computation/ops.py +++ b/pandas/core/computation/ops.py @@ -11,7 +11,7 @@ import pandas as pd from pandas.compat import PY3, string_types, text_type import pandas.core.common as com -from pandas.formats.printing import pprint_thing, pprint_thing_encoded +from pandas.io.formats.printing import pprint_thing, pprint_thing_encoded from pandas.core.base import StringMixin from pandas.core.computation.common import _ensure_decoded, _result_type_many from pandas.core.computation.scope import _DEFAULT_GLOBALS diff --git a/pandas/core/computation/pytables.py b/pandas/core/computation/pytables.py index 8d0f23e28c0a2..285ff346158a0 100644 --- a/pandas/core/computation/pytables.py +++ b/pandas/core/computation/pytables.py @@ -9,7 +9,7 @@ import pandas.core.common as com from pandas.compat import u, string_types, DeepChainMap from pandas.core.base import StringMixin -from pandas.formats.printing import pprint_thing, pprint_thing_encoded +from pandas.io.formats.printing import pprint_thing, pprint_thing_encoded from pandas.core.computation import expr, ops from pandas.core.computation.ops import is_term, UndefinedVariableError from pandas.core.computation.expr import BaseExprVisitor diff --git a/pandas/core/config.py b/pandas/core/config.py index 39ed2f9545266..b406f6724aa6d 100644 --- a/pandas/core/config.py +++ b/pandas/core/config.py @@ -774,7 +774,7 @@ def is_instance_factory(_type): """ if isinstance(_type, (tuple, list)): _type = tuple(_type) - from pandas.formats.printing import pprint_thing + from pandas.io.formats.printing import pprint_thing type_repr = "|".join(map(pprint_thing, _type)) else: type_repr = "'%s'" % _type @@ -792,7 +792,7 @@ def is_one_of_factory(legal_values): legal_values = [c for c in legal_values if not callable(c)] def inner(x): - from pandas.formats.printing import pprint_thing as pp + from pandas.io.formats.printing import pprint_thing as pp if x not in legal_values: if not any([c(x) for c in callables]): diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py index 7307980c8312e..f8cbdffa27bb4 100644 --- a/pandas/core/config_init.py +++ b/pandas/core/config_init.py @@ -15,7 +15,7 @@ from pandas.core.config import (is_int, is_bool, is_text, is_instance_factory, is_one_of_factory, get_default_val, is_callable) -from pandas.formats.format import detect_console_encoding +from pandas.io.formats.format import detect_console_encoding # # options from the "display" namespace diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 3a5a0e7044e79..732ce7ce695b0 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -90,8 +90,8 @@ import pandas.core.common as com import pandas.core.nanops as nanops import pandas.core.ops as ops -import pandas.formats.format as fmt -from pandas.formats.printing import pprint_thing +import pandas.io.formats.format as fmt +from pandas.io.formats.printing import pprint_thing import pandas.plotting._core as gfx from pandas._libs import lib, algos as libalgos @@ -636,9 +636,9 @@ def style(self): See Also -------- - pandas.formats.style.Styler + pandas.io.formats.style.Styler """ - from pandas.formats.style import Styler + from pandas.io.formats.style import Styler return Styler(self) def iteritems(self): @@ -1724,7 +1724,7 @@ def info(self, verbose=None, buf=None, max_cols=None, memory_usage=None, - If False, never show counts. """ - from pandas.formats.format import _put_lines + from pandas.io.formats.format import _put_lines if buf is None: # pragma: no cover buf = sys.stdout diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 167af8dfc0d8e..5f0c65ddfb9c3 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -43,8 +43,8 @@ import pandas.core.algorithms as algos import pandas.core.common as com import pandas.core.missing as missing -from pandas.formats.printing import pprint_thing -from pandas.formats.format import format_percentiles +from pandas.io.formats.printing import pprint_thing +from pandas.io.formats.format import format_percentiles from pandas.tseries.frequencies import to_offset from pandas import compat from pandas.compat.numpy import function as nv diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 2cbcb9ef6efec..3fd41f3456732 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -55,7 +55,7 @@ decons_obs_group_ids, get_indexer_dict) from pandas.util.decorators import (cache_readonly, Substitution, Appender, make_signature) -from pandas.formats.printing import pprint_thing +from pandas.io.formats.printing import pprint_thing from pandas.util.validators import validate_kwargs import pandas.core.algorithms as algorithms diff --git a/pandas/core/internals.py b/pandas/core/internals.py index f7d7efd66f8db..c698bcb9fa5ee 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -55,7 +55,7 @@ from pandas.core.indexing import maybe_convert_indices, length_of_indexer from pandas.core.categorical import Categorical, maybe_to_categorical from pandas.tseries.index import DatetimeIndex -from pandas.formats.printing import pprint_thing +from pandas.io.formats.printing import pprint_thing import pandas.core.missing as missing from pandas.core.sparse.array import _maybe_to_sparse, SparseArray @@ -1614,7 +1614,7 @@ def to_native_types(self, slicer=None, na_rep='', float_format=None, values[mask] = na_rep return values - from pandas.formats.format import FloatArrayFormatter + from pandas.io.formats.format import FloatArrayFormatter formatter = FloatArrayFormatter(values, na_rep=na_rep, float_format=float_format, decimal=decimal, quoting=quoting, @@ -2328,7 +2328,7 @@ def to_native_types(self, slicer=None, na_rep=None, date_format=None, if slicer is not None: values = values[..., slicer] - from pandas.formats.format import _get_format_datetime64_from_values + from pandas.io.formats.format import _get_format_datetime64_from_values format = _get_format_datetime64_from_values(values, date_format) result = tslib.format_array_from_datetime( diff --git a/pandas/core/panel.py b/pandas/core/panel.py index 76053b3bdb83d..fefe75163d033 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -26,7 +26,7 @@ from pandas.core.generic import NDFrame, _shared_docs from pandas.core.index import (Index, MultiIndex, _ensure_index, _get_combined_index) -from pandas.formats.printing import pprint_thing +from pandas.io.formats.printing import pprint_thing from pandas.core.indexing import maybe_droplevels from pandas.core.internals import (BlockManager, create_block_manager_from_arrays, diff --git a/pandas/core/series.py b/pandas/core/series.py index 596dae4345cb3..9022bff092ac3 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -67,7 +67,7 @@ import pandas.core.common as com import pandas.core.nanops as nanops -import pandas.formats.format as fmt +import pandas.io.formats.format as fmt from pandas.util.decorators import Appender, deprecate_kwarg, Substitution from pandas.util.validators import validate_bool_kwarg diff --git a/pandas/core/sparse/array.py b/pandas/core/sparse/array.py index 74e9be54ae6df..d3fdfe5533a03 100644 --- a/pandas/core/sparse/array.py +++ b/pandas/core/sparse/array.py @@ -34,7 +34,7 @@ from pandas._libs import index as libindex import pandas.core.algorithms as algos import pandas.core.ops as ops -import pandas.formats.printing as printing +import pandas.io.formats.printing as printing from pandas.util.decorators import Appender from pandas.indexes.base import _index_shared_docs diff --git a/pandas/core/sparse/list.py b/pandas/core/sparse/list.py index 381a811ac828b..e69ad6d0ab7ad 100644 --- a/pandas/core/sparse/list.py +++ b/pandas/core/sparse/list.py @@ -1,7 +1,7 @@ import warnings import numpy as np from pandas.core.base import PandasObject -from pandas.formats.printing import pprint_thing +from pandas.io.formats.printing import pprint_thing from pandas.core.dtypes.common import is_scalar from pandas.core.sparse.array import SparseArray diff --git a/pandas/indexes/base.py b/pandas/indexes/base.py index b0439e122ea9e..d88e54dcc9521 100644 --- a/pandas/indexes/base.py +++ b/pandas/indexes/base.py @@ -48,7 +48,7 @@ import pandas.core.dtypes.concat as _concat import pandas.core.missing as missing import pandas.core.algorithms as algos -from pandas.formats.printing import pprint_thing +from pandas.io.formats.printing import pprint_thing from pandas.core.ops import _comp_method_OBJECT_ARRAY from pandas.core.strings import StringAccessorMixin from pandas.core.config import get_option @@ -831,7 +831,7 @@ def _format_data(self): """ Return the formatted data as a unicode string """ - from pandas.formats.format import get_console_size, _get_adjustment + from pandas.io.formats.format import get_console_size, _get_adjustment display_width, _ = get_console_size() if display_width is None: display_width = get_option('display.width') or 80 @@ -1842,7 +1842,7 @@ def format(self, name=False, formatter=None, **kwargs): def _format_with_header(self, header, na_rep='NaN', **kwargs): values = self.values - from pandas.formats.format import format_array + from pandas.io.formats.format import format_array if is_categorical_dtype(values.dtype): values = np.array(values) diff --git a/pandas/indexes/frozen.py b/pandas/indexes/frozen.py index 19b04319b37f9..3c6b922178abf 100644 --- a/pandas/indexes/frozen.py +++ b/pandas/indexes/frozen.py @@ -11,7 +11,7 @@ import numpy as np from pandas.core.base import PandasObject from pandas.core.dtypes.cast import coerce_indexer_dtype -from pandas.formats.printing import pprint_thing +from pandas.io.formats.printing import pprint_thing class FrozenList(PandasObject, list): diff --git a/pandas/indexes/interval.py b/pandas/indexes/interval.py index 88a2b0ff9595b..6f68e67d702fe 100644 --- a/pandas/indexes/interval.py +++ b/pandas/indexes/interval.py @@ -921,7 +921,7 @@ def _format_with_header(self, header, **kwargs): def _format_native_types(self, na_rep='', quoting=None, **kwargs): """ actually format my specific types """ - from pandas.formats.format import IntervalArrayFormatter + from pandas.io.formats.format import IntervalArrayFormatter return IntervalArrayFormatter(values=self, na_rep=na_rep, justify='all').get_result() diff --git a/pandas/indexes/multi.py b/pandas/indexes/multi.py index f410dbddb4428..b341bfe7b5215 100644 --- a/pandas/indexes/multi.py +++ b/pandas/indexes/multi.py @@ -31,7 +31,7 @@ import pandas.core.common as com import pandas.core.missing as missing import pandas.core.algorithms as algos -from pandas.formats.printing import pprint_thing +from pandas.io.formats.printing import pprint_thing from pandas.core.config import get_option @@ -935,7 +935,7 @@ def format(self, space=2, sparsify=None, adjoin=True, names=False, sentinel=sentinel) if adjoin: - from pandas.formats.format import _get_adjustment + from pandas.io.formats.format import _get_adjustment adj = _get_adjustment() return adj.adjoin(space, *result_levels).split('\n') else: diff --git a/pandas/indexes/numeric.py b/pandas/indexes/numeric.py index 2f68101520229..6b9999239cd88 100644 --- a/pandas/indexes/numeric.py +++ b/pandas/indexes/numeric.py @@ -302,7 +302,7 @@ def _convert_slice_indexer(self, key, kind=None): def _format_native_types(self, na_rep='', float_format=None, decimal='.', quoting=None, **kwargs): - from pandas.formats.format import FloatArrayFormatter + from pandas.io.formats.format import FloatArrayFormatter formatter = FloatArrayFormatter(self.values, na_rep=na_rep, float_format=float_format, decimal=decimal, quoting=quoting, diff --git a/pandas/io/api.py b/pandas/io/api.py index 4744d41472ff1..58c388d306721 100644 --- a/pandas/io/api.py +++ b/pandas/io/api.py @@ -18,7 +18,7 @@ from pandas.io.packers import read_msgpack, to_msgpack from pandas.io.gbq import read_gbq try: - from pandas.formats.style import Styler + from pandas.io.formats.style import Styler except ImportError: from pandas.compat import add_metaclass as _add_metaclass from pandas.util.importing import _UnSubclassable diff --git a/pandas/io/common.py b/pandas/io/common.py index 5cd5a9cd3e8dc..28f90972f95de 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -8,7 +8,7 @@ from pandas.compat import StringIO, BytesIO, string_types, text_type from pandas import compat -from pandas.formats.printing import pprint_thing +from pandas.io.formats.printing import pprint_thing from pandas.core.common import AbstractMethodError from pandas.core.dtypes.common import is_number, is_file_like diff --git a/pandas/io/excel.py b/pandas/io/excel.py index b19837973a94a..637635a64d4d0 100644 --- a/pandas/io/excel.py +++ b/pandas/io/excel.py @@ -24,7 +24,7 @@ from pandas.compat import (map, zip, reduce, range, lrange, u, add_metaclass, string_types, OrderedDict) from pandas.core import config -from pandas.formats.printing import pprint_thing +from pandas.io.formats.printing import pprint_thing import pandas.compat as compat import pandas.compat.openpyxl_compat as openpyxl_compat from warnings import warn diff --git a/pandas/formats/__init__.py b/pandas/io/formats/__init__.py similarity index 100% rename from pandas/formats/__init__.py rename to pandas/io/formats/__init__.py diff --git a/pandas/formats/format.py b/pandas/io/formats/format.py similarity index 99% rename from pandas/formats/format.py rename to pandas/io/formats/format.py index aad6c182416f6..20df60eb96299 100644 --- a/pandas/formats/format.py +++ b/pandas/io/formats/format.py @@ -33,7 +33,7 @@ from pandas.util.terminal import get_terminal_size from pandas.core.config import get_option, set_option from pandas.io.common import _get_handle, UnicodeWriter, _expand_user -from pandas.formats.printing import adjoin, justify, pprint_thing +from pandas.io.formats.printing import adjoin, justify, pprint_thing import pandas.core.common as com import pandas._libs.lib as lib from pandas._libs.tslib import (iNaT, Timestamp, Timedelta, diff --git a/pandas/formats/printing.py b/pandas/io/formats/printing.py similarity index 100% rename from pandas/formats/printing.py rename to pandas/io/formats/printing.py diff --git a/pandas/formats/style.py b/pandas/io/formats/style.py similarity index 99% rename from pandas/formats/style.py rename to pandas/io/formats/style.py index 3ca1d8259729d..9321c29c99790 100644 --- a/pandas/formats/style.py +++ b/pandas/io/formats/style.py @@ -107,7 +107,7 @@ class Styler(object): -------- pandas.DataFrame.style """ - loader = PackageLoader("pandas", "formats/templates") + loader = PackageLoader("pandas", "io/formats/templates") env = Environment( loader=loader, trim_blocks=True, diff --git a/pandas/formats/templates/html.tpl b/pandas/io/formats/templates/html.tpl similarity index 100% rename from pandas/formats/templates/html.tpl rename to pandas/io/formats/templates/html.tpl diff --git a/pandas/io/html.py b/pandas/io/html.py index 8e5b8def1ea91..2613f26ae5f52 100644 --- a/pandas/io/html.py +++ b/pandas/io/html.py @@ -21,7 +21,7 @@ raise_with_traceback, binary_type) from pandas import Series from pandas.core.common import AbstractMethodError -from pandas.formats.printing import pprint_thing +from pandas.io.formats.printing import pprint_thing _IMPORTS = False _HAS_BS4 = False diff --git a/pandas/io/json/json.py b/pandas/io/json/json.py index 19e84c04b7ddb..7149ab497a00d 100644 --- a/pandas/io/json/json.py +++ b/pandas/io/json/json.py @@ -9,7 +9,7 @@ from pandas import Series, DataFrame, to_datetime from pandas.io.common import get_filepath_or_buffer, _get_handle from pandas.core.common import AbstractMethodError -from pandas.formats.printing import pprint_thing +from pandas.io.formats.printing import pprint_thing from .normalize import _convert_to_line_delimits from .table_schema import build_table_schema from pandas.core.dtypes.common import is_period_dtype diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 4771134f3fe5c..17bedd016f617 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -32,7 +32,7 @@ from pandas.io.common import _stringify_path from pandas.core.sparse.array import BlockIndex, IntIndex from pandas.core.base import StringMixin -from pandas.formats.printing import adjoin, pprint_thing +from pandas.io.formats.printing import adjoin, pprint_thing from pandas.errors import PerformanceWarning from pandas.core.common import _asarray_tuplesafe from pandas.core.algorithms import match, unique diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index 02f2df4949189..374244acfe173 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -24,7 +24,7 @@ from pandas.tseries.period import PeriodIndex from pandas.compat import range, lrange, map, zip, string_types import pandas.compat as compat -from pandas.formats.printing import pprint_thing +from pandas.io.formats.printing import pprint_thing from pandas.util.decorators import Appender from pandas.plotting._compat import (_mpl_ge_1_3_1, diff --git a/pandas/plotting/_misc.py b/pandas/plotting/_misc.py index f09bcef82b45d..93eceba9a3f02 100644 --- a/pandas/plotting/_misc.py +++ b/pandas/plotting/_misc.py @@ -7,7 +7,7 @@ from pandas.util.decorators import deprecate_kwarg from pandas.core.dtypes.missing import notnull from pandas.compat import range, lrange, lmap, zip -from pandas.formats.printing import pprint_thing +from pandas.io.formats.printing import pprint_thing from pandas.plotting._style import _get_standard_colors diff --git a/pandas/plotting/_timeseries.py b/pandas/plotting/_timeseries.py index 7533e417b0026..f8c7c1ee9ee10 100644 --- a/pandas/plotting/_timeseries.py +++ b/pandas/plotting/_timeseries.py @@ -9,7 +9,7 @@ from pandas.tseries.index import DatetimeIndex from pandas.tseries.period import PeriodIndex from pandas.tseries.tdi import TimedeltaIndex -from pandas.formats.printing import pprint_thing +from pandas.io.formats.printing import pprint_thing import pandas.compat as compat from pandas.plotting._converter import (TimeSeries_DateLocator, diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index 221458e629055..32ed77d94f637 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -31,7 +31,7 @@ class TestPDApi(Base, tm.TestCase): # top-level sub-packages lib = ['api', 'compat', 'core', - 'indexes', 'formats', 'errors', 'pandas', + 'indexes', 'errors', 'pandas', 'plotting', 'test', 'tools', 'tseries', 'util', 'options', 'io'] diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index dda52bbc536c9..e165e30c59f0f 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -18,7 +18,7 @@ import pandas as pd import pandas.core.nanops as nanops import pandas.core.algorithms as algorithms -import pandas.formats.printing as printing +import pandas.io.formats.printing as printing import pandas.util.testing as tm from pandas.tests.frame.common import TestData diff --git a/pandas/tests/frame/test_operators.py b/pandas/tests/frame/test_operators.py index 268854fe6b62d..afb26143f4540 100644 --- a/pandas/tests/frame/test_operators.py +++ b/pandas/tests/frame/test_operators.py @@ -15,7 +15,7 @@ from pandas import (DataFrame, Series, MultiIndex, Timestamp, date_range) import pandas.core.common as com -import pandas.formats.printing as printing +import pandas.io.formats.printing as printing import pandas as pd from pandas.util.testing import (assert_numpy_array_equal, diff --git a/pandas/tests/frame/test_repr_info.py b/pandas/tests/frame/test_repr_info.py index 918938c1758ed..740a24f38c316 100644 --- a/pandas/tests/frame/test_repr_info.py +++ b/pandas/tests/frame/test_repr_info.py @@ -11,7 +11,7 @@ from pandas import (DataFrame, compat, option_context) from pandas.compat import StringIO, lrange, u -import pandas.formats.format as fmt +import pandas.io.formats.format as fmt import pandas as pd import pandas.util.testing as tm diff --git a/pandas/tests/groupby/test_aggregate.py b/pandas/tests/groupby/test_aggregate.py index 2abae97b3151b..53a92ece5d6cc 100644 --- a/pandas/tests/groupby/test_aggregate.py +++ b/pandas/tests/groupby/test_aggregate.py @@ -18,7 +18,7 @@ from pandas.util.testing import assert_frame_equal, assert_series_equal from pandas.core.groupby import SpecificationError, DataError from pandas.compat import OrderedDict -from pandas.formats.printing import pprint_thing +from pandas.io.formats.printing import pprint_thing import pandas.util.testing as tm diff --git a/pandas/tests/indexing/common.py b/pandas/tests/indexing/common.py index 51c0889a6f091..b555a9c1fd0df 100644 --- a/pandas/tests/indexing/common.py +++ b/pandas/tests/indexing/common.py @@ -8,7 +8,7 @@ from pandas.core.dtypes.common import is_scalar from pandas import Series, DataFrame, Panel, date_range, UInt64Index from pandas.util import testing as tm -from pandas.formats.printing import pprint_thing +from pandas.io.formats.printing import pprint_thing _verbose = False diff --git a/pandas/tests/formats/__init__.py b/pandas/tests/io/formats/__init__.py similarity index 100% rename from pandas/tests/formats/__init__.py rename to pandas/tests/io/formats/__init__.py diff --git a/pandas/tests/formats/data/unicode_series.csv b/pandas/tests/io/formats/data/unicode_series.csv similarity index 100% rename from pandas/tests/formats/data/unicode_series.csv rename to pandas/tests/io/formats/data/unicode_series.csv diff --git a/pandas/tests/formats/test_eng_formatting.py b/pandas/tests/io/formats/test_eng_formatting.py similarity index 99% rename from pandas/tests/formats/test_eng_formatting.py rename to pandas/tests/io/formats/test_eng_formatting.py index d2badd4fc160a..225a4921961cf 100644 --- a/pandas/tests/formats/test_eng_formatting.py +++ b/pandas/tests/io/formats/test_eng_formatting.py @@ -2,7 +2,7 @@ import pandas as pd from pandas import DataFrame from pandas.compat import u -import pandas.formats.format as fmt +import pandas.io.formats.format as fmt from pandas.util import testing as tm diff --git a/pandas/tests/formats/test_format.py b/pandas/tests/io/formats/test_format.py similarity index 99% rename from pandas/tests/formats/test_format.py rename to pandas/tests/io/formats/test_format.py index 92f6a600a9e2a..7c74f82741e8c 100644 --- a/pandas/tests/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -25,8 +25,8 @@ is_platform_32bit) import pandas.compat as compat -import pandas.formats.format as fmt -import pandas.formats.printing as printing +import pandas.io.formats.format as fmt +import pandas.io.formats.printing as printing import pandas.util.testing as tm from pandas.util.terminal import get_terminal_size @@ -889,7 +889,7 @@ def test_frame_info_encoding(self): fmt.set_option('display.max_rows', 200) def test_pprint_thing(self): - from pandas.formats.printing import pprint_thing as pp_t + from pandas.io.formats.printing import pprint_thing as pp_t if PY3: pytest.skip("doesn't work on Python 3") diff --git a/pandas/tests/formats/test_printing.py b/pandas/tests/io/formats/test_printing.py similarity index 98% rename from pandas/tests/formats/test_printing.py rename to pandas/tests/io/formats/test_printing.py index cacba2ad3f3ba..23aaf472316ec 100644 --- a/pandas/tests/formats/test_printing.py +++ b/pandas/tests/io/formats/test_printing.py @@ -2,8 +2,8 @@ import pytest from pandas import compat import pandas as pd -import pandas.formats.printing as printing -import pandas.formats.format as fmt +import pandas.io.formats.printing as printing +import pandas.io.formats.format as fmt import pandas.util.testing as tm import pandas.core.config as cf diff --git a/pandas/tests/formats/test_style.py b/pandas/tests/io/formats/test_style.py similarity index 99% rename from pandas/tests/formats/test_style.py rename to pandas/tests/io/formats/test_style.py index 08f8f2f32763d..c02d94d8918b3 100644 --- a/pandas/tests/formats/test_style.py +++ b/pandas/tests/io/formats/test_style.py @@ -9,7 +9,7 @@ import pandas.util.testing as tm jinja2 = pytest.importorskip('jinja2') -from pandas.formats.style import Styler, _get_level_lengths # noqa +from pandas.io.formats.style import Styler, _get_level_lengths # noqa class TestStyler(TestCase): diff --git a/pandas/tests/formats/test_to_csv.py b/pandas/tests/io/formats/test_to_csv.py similarity index 100% rename from pandas/tests/formats/test_to_csv.py rename to pandas/tests/io/formats/test_to_csv.py diff --git a/pandas/tests/formats/test_to_html.py b/pandas/tests/io/formats/test_to_html.py similarity index 99% rename from pandas/tests/formats/test_to_html.py rename to pandas/tests/io/formats/test_to_html.py index 771c66e84037c..e90224dcb705a 100644 --- a/pandas/tests/formats/test_to_html.py +++ b/pandas/tests/io/formats/test_to_html.py @@ -11,7 +11,7 @@ from pandas import compat, DataFrame, MultiIndex, option_context, Index from pandas.compat import u, lrange, StringIO from pandas.util import testing as tm -import pandas.formats.format as fmt +import pandas.io.formats.format as fmt div_style = '' try: diff --git a/pandas/tests/formats/test_to_latex.py b/pandas/tests/io/formats/test_to_latex.py similarity index 100% rename from pandas/tests/formats/test_to_latex.py rename to pandas/tests/io/formats/test_to_latex.py diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py index d83e26995020c..2fada4e1dc2cc 100644 --- a/pandas/tests/io/test_excel.py +++ b/pandas/tests/io/test_excel.py @@ -2006,7 +2006,7 @@ def test_to_excel_styleconverter(self): self.assertEqual(kw['protection'], protection) def test_write_cells_merge_styled(self): - from pandas.formats.format import ExcelCell + from pandas.io.formats.format import ExcelCell from openpyxl import styles sheet_name = 'merge_styled' @@ -2119,7 +2119,7 @@ def test_write_cells_merge_styled(self): if not openpyxl_compat.is_compat(major_ver=2): pytest.skip('incompatible openpyxl version') - from pandas.formats.format import ExcelCell + from pandas.io.formats.format import ExcelCell sheet_name = 'merge_styled' diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index 2df8872e23616..c6a54211e73ad 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -17,7 +17,7 @@ isnull) from pandas.compat import is_platform_windows, PY3, PY35 -from pandas.formats.printing import pprint_thing +from pandas.io.formats.printing import pprint_thing tables = pytest.importorskip('tables') from pandas.io.pytables import TableIterator diff --git a/pandas/tests/plotting/test_frame.py b/pandas/tests/plotting/test_frame.py index fe07f5b9f193e..e8ff77f9cc0eb 100644 --- a/pandas/tests/plotting/test_frame.py +++ b/pandas/tests/plotting/test_frame.py @@ -13,7 +13,7 @@ bdate_range) from pandas.core.dtypes.api import is_list_like from pandas.compat import range, lrange, lmap, lzip, u, zip, PY3 -from pandas.formats.printing import pprint_thing +from pandas.io.formats.printing import pprint_thing import pandas.util.testing as tm from pandas.util.testing import slow diff --git a/pandas/tests/series/test_api.py b/pandas/tests/series/test_api.py index 2facbaf1fe31e..3cb1e29bde7d9 100644 --- a/pandas/tests/series/test_api.py +++ b/pandas/tests/series/test_api.py @@ -9,7 +9,7 @@ from pandas.compat import range from pandas import compat -import pandas.formats.printing as printing +import pandas.io.formats.printing as printing from pandas.util.testing import (assert_series_equal, ensure_clean) import pandas.util.testing as tm diff --git a/pandas/tests/test_expressions.py b/pandas/tests/test_expressions.py index b353f73f4004d..ddbaedc3ef919 100644 --- a/pandas/tests/test_expressions.py +++ b/pandas/tests/test_expressions.py @@ -17,7 +17,7 @@ from pandas.util.testing import (assert_almost_equal, assert_series_equal, assert_frame_equal, assert_panel_equal, assert_panel4d_equal, slow) -from pandas.formats.printing import pprint_thing +from pandas.io.formats.printing import pprint_thing import pandas.util.testing as tm diff --git a/pandas/tests/test_generic.py b/pandas/tests/test_generic.py index 80059277407c3..a2ded195d9343 100644 --- a/pandas/tests/test_generic.py +++ b/pandas/tests/test_generic.py @@ -16,7 +16,7 @@ date_range, period_range, Panel4D) from pandas.core.index import MultiIndex -import pandas.formats.printing as printing +import pandas.io.formats.printing as printing from pandas.compat import range, zip, PY3 from pandas import compat diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index f0e53046e3552..60173dda57e04 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -16,7 +16,7 @@ from pandas.core.panel import Panel from pandas.core.series import remove_na -from pandas.formats.printing import pprint_thing +from pandas.io.formats.printing import pprint_thing from pandas import compat from pandas.compat import range, lrange, StringIO, OrderedDict, signature diff --git a/pandas/tseries/base.py b/pandas/tseries/base.py index cf79cadef78dd..b419aae709683 100644 --- a/pandas/tseries/base.py +++ b/pandas/tseries/base.py @@ -22,7 +22,7 @@ from pandas.core.algorithms import checked_add_with_arr from pandas.core.common import AbstractMethodError -import pandas.formats.printing as printing +import pandas.io.formats.printing as printing from pandas._libs import (tslib as libts, lib, Timedelta, Timestamp, iNaT, NaT) from pandas._libs.period import Period diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py index 95594652e3943..a964b6d9e09d3 100644 --- a/pandas/tseries/index.py +++ b/pandas/tseries/index.py @@ -686,12 +686,12 @@ def _mpl_repr(self): @cache_readonly def _is_dates_only(self): - from pandas.formats.format import _is_dates_only + from pandas.io.formats.format import _is_dates_only return _is_dates_only(self.values) @property def _formatter_func(self): - from pandas.formats.format import _get_format_datetime64 + from pandas.io.formats.format import _get_format_datetime64 formatter = _get_format_datetime64(is_dates_only=self._is_dates_only) return lambda x: "'%s'" % formatter(x, tz=self.tz) @@ -830,7 +830,7 @@ def _add_offset(self, offset): return self.astype('O') + offset def _format_native_types(self, na_rep='NaT', date_format=None, **kwargs): - from pandas.formats.format import _get_format_datetime64_from_values + from pandas.io.formats.format import _get_format_datetime64_from_values format = _get_format_datetime64_from_values(self, date_format) return libts.format_array_from_datetime(self.asi8, diff --git a/pandas/tseries/tdi.py b/pandas/tseries/tdi.py index c26f023ea942a..020b7328238b7 100644 --- a/pandas/tseries/tdi.py +++ b/pandas/tseries/tdi.py @@ -290,7 +290,7 @@ def _simple_new(cls, values, name=None, freq=None, **kwargs): @property def _formatter_func(self): - from pandas.formats.format import _get_format_timedelta64 + from pandas.io.formats.format import _get_format_timedelta64 return _get_format_timedelta64(self, box=True) def __setstate__(self, state): @@ -366,7 +366,7 @@ def _sub_datelike(self, other): def _format_native_types(self, na_rep=u('NaT'), date_format=None, **kwargs): - from pandas.formats.format import Timedelta64Formatter + from pandas.io.formats.format import Timedelta64Formatter return Timedelta64Formatter(values=self, nat_rep=na_rep, justify='all').get_result() diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 638a190d810a5..08323fc0c9050 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -33,7 +33,7 @@ is_interval_dtype, is_sequence, is_list_like) -from pandas.formats.printing import pprint_thing +from pandas.io.formats.printing import pprint_thing from pandas.core.algorithms import take_1d import pandas.compat as compat diff --git a/setup.py b/setup.py index b7c4581c4ecfe..5a7efa15452ac 100755 --- a/setup.py +++ b/setup.py @@ -648,8 +648,8 @@ def pxd(name): 'pandas.io.json', 'pandas.io.sas', 'pandas.io.msgpack', + 'pandas.io.formats', 'pandas._libs', - 'pandas.formats', 'pandas.plotting', 'pandas.stats', 'pandas.util', @@ -668,9 +668,9 @@ def pxd(name): 'pandas.tests.io.parser', 'pandas.tests.io.sas', 'pandas.tests.io.msgpack', + 'pandas.tests.io.formats', 'pandas.tests.groupby', 'pandas.tests.series', - 'pandas.tests.formats', 'pandas.tests.scalar', 'pandas.tests.tseries', 'pandas.tests.tools', From 8daf9a7e45344dc5a247410e037dccb41b97a3db Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Sun, 16 Apr 2017 16:05:17 +0200 Subject: [PATCH 400/933] API: expose public testing functions in pandas.testing (GH9895) (#16003) --- doc/source/api.rst | 10 ++++++++++ doc/source/whatsnew/v0.20.0.txt | 19 +++++++++++++++++-- pandas/__init__.py | 1 + pandas/testing.py | 8 ++++++++ pandas/tests/api/test_api.py | 13 ++++++++++++- pandas/util/testing.py | 2 +- 6 files changed, 49 insertions(+), 4 deletions(-) create mode 100644 pandas/testing.py diff --git a/doc/source/api.rst b/doc/source/api.rst index 6d1765ce65714..d0f548cc3d0b1 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -1886,3 +1886,13 @@ Working with options get_option set_option option_context + +Testing functions +~~~~~~~~~~~~~~~~~ + +.. autosummary:: + :toctree: generated/ + + testing.assert_frame_equal + testing.assert_series_equal + testing.assert_index_equal diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 08208973b70d2..de33b7d4e3371 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -150,8 +150,8 @@ Commonly called 'unix epoch' or POSIX time. This was the previous default, so th .. _whatsnew_0200.enhancements.errors: -pandas errors -^^^^^^^^^^^^^ +``pandas.errors`` +^^^^^^^^^^^^^^^^^ We are adding a standard public module for all pandas exceptions & warnings ``pandas.errors``. (:issue:`14800`). Previously these exceptions & warnings could be imported from ``pandas.core.common`` or ``pandas.io.common``. These exceptions and warnings @@ -170,6 +170,21 @@ The following are now part of this API: 'UnsortedIndexError', 'UnsupportedFunctionCall'] + +.. _whatsnew_0200.enhancements.testing: + +``pandas.testing`` +^^^^^^^^^^^^^^^^^^ + +We are adding a standard module that exposes the public testing functions in ``pandas.testing``(:issue:`9895`). Those functions can be used when writing tests for functionality using pandas objects. + +The following testing functions are now part of this API: + +- :func:`testing.assert_frame_equal` +- :func:`testing.assert_series_equal` +- :func:`testing.assert_index_equal` + + .. _whatsnew_0200.enhancements.groupby_access: Groupby Enhancements diff --git a/pandas/__init__.py b/pandas/__init__.py index 4e1bcbd613965..b221f9e43876b 100644 --- a/pandas/__init__.py +++ b/pandas/__init__.py @@ -64,6 +64,7 @@ from pandas.util.print_versions import show_versions from pandas.io.api import * from pandas.util._tester import test +import pandas.testing # extension module deprecations from pandas.util.depr_module import _DeprecatedModule diff --git a/pandas/testing.py b/pandas/testing.py new file mode 100644 index 0000000000000..3baf99957cb33 --- /dev/null +++ b/pandas/testing.py @@ -0,0 +1,8 @@ +# flake8: noqa + +""" +Public testing utility functions. +""" + +from pandas.util.testing import ( + assert_frame_equal, assert_series_equal, assert_index_equal) diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index 32ed77d94f637..ec9e6039c6ee4 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -32,7 +32,7 @@ class TestPDApi(Base, tm.TestCase): # top-level sub-packages lib = ['api', 'compat', 'core', 'indexes', 'errors', 'pandas', - 'plotting', 'test', 'tools', 'tseries', + 'plotting', 'test', 'testing', 'tools', 'tseries', 'util', 'options', 'io'] # these are already deprecated; awaiting removal @@ -128,6 +128,17 @@ def test_api(self): self.check(api, self.allowed) +class TestTesting(Base): + + funcs = ['assert_frame_equal', 'assert_series_equal', + 'assert_index_equal'] + + def test_testing(self): + + from pandas import testing + self.check(testing, self.funcs) + + class TestDatetoolsDeprecation(tm.TestCase): def test_deprecation_access_func(self): diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 08323fc0c9050..d1f88c7041e05 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -861,7 +861,7 @@ def assert_index_equal(left, right, exact='equiv', check_names=True, right : Index exact : bool / string {'equiv'}, default False Whether to check the Index class, dtype and inferred_type - are identical. If 'equiv', then RangeIndex can be substitued for + are identical. If 'equiv', then RangeIndex can be substituted for Int64Index as well check_names : bool, default True Whether to check the names attribute. From c4ede001c9c2853967d6c541ecbcda9e745f7686 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sun, 16 Apr 2017 14:22:54 -0400 Subject: [PATCH 401/933] TST: fix location of formats data & templates --- doc/source/whatsnew/v0.19.0.txt | 2 +- setup.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt index 9b003034aa94a..0c9bb029b9b68 100644 --- a/doc/source/whatsnew/v0.19.0.txt +++ b/doc/source/whatsnew/v0.19.0.txt @@ -268,7 +268,7 @@ Categorical Concatenation .. ipython:: python - from pandas.types.concat import union_categoricals + from pandas.api.types import union_categoricals a = pd.Categorical(["b", "c"]) b = pd.Categorical(["a", "b"]) union_categoricals([a, b]) diff --git a/setup.py b/setup.py index 5a7efa15452ac..5e474153d0ee1 100755 --- a/setup.py +++ b/setup.py @@ -680,7 +680,6 @@ def pxd(name): 'pandas.util.clipboard' ], package_data={'pandas.tests': ['data/*.csv'], - 'pandas.tests.formats': ['data/*.csv'], 'pandas.tests.indexes': ['data/*.pickle'], 'pandas.tests.io': ['data/legacy_hdf/*.h5', 'data/legacy_pickle/*/*.pickle', @@ -703,9 +702,10 @@ def pxd(name): 'data/*.html', 'data/html_encoding/*.html', 'json/data/*.json'], + 'pandas.tests.io.formats': ['data/*.csv'], 'pandas.tests.tools': ['data/*.csv'], 'pandas.tests.tseries': ['data/*.pickle'], - 'pandas.formats': ['templates/*.tpl'] + 'pandas.io.formats': ['templates/*.tpl'] }, ext_modules=extensions, maintainer_email=EMAIL, From c364e1d10839bc3514a98fff26a658e5a6b9b336 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Sun, 16 Apr 2017 14:27:07 -0400 Subject: [PATCH 402/933] MAINT: Remove assert_equal from testing.py (#16017) --- pandas/tests/core/computation/test_compat.py | 2 +- pandas/tests/core/computation/test_eval.py | 24 ++++++++++---------- pandas/tests/core/sparse/test_frame.py | 10 ++++---- pandas/tests/frame/test_constructors.py | 4 ++-- pandas/tests/frame/test_replace.py | 4 ++-- pandas/tests/frame/test_subclass.py | 6 ++--- pandas/tests/groupby/test_groupby.py | 4 ++-- pandas/tests/indexes/datetimes/test_tools.py | 2 +- pandas/tests/io/formats/test_format.py | 4 ++-- pandas/tests/io/formats/test_printing.py | 4 ++-- pandas/tests/io/json/test_pandas.py | 2 +- pandas/tests/io/parser/common.py | 6 ++--- pandas/tests/io/test_excel.py | 2 +- pandas/tests/io/test_pickle.py | 22 +++++++++--------- pandas/tests/io/test_pytables.py | 6 ++--- pandas/tests/io/test_stata.py | 4 ++-- pandas/tests/plotting/test_converter.py | 3 +-- pandas/tests/series/test_api.py | 4 ++-- pandas/tests/test_common.py | 6 ++--- pandas/tests/test_generic.py | 4 ++-- pandas/tests/test_util.py | 4 ++-- pandas/tests/tools/test_util.py | 4 ++-- pandas/util/testing.py | 19 +--------------- 23 files changed, 66 insertions(+), 84 deletions(-) diff --git a/pandas/tests/core/computation/test_compat.py b/pandas/tests/core/computation/test_compat.py index 7b6c0f9c4c9aa..9ee9f674a1ddd 100644 --- a/pandas/tests/core/computation/test_compat.py +++ b/pandas/tests/core/computation/test_compat.py @@ -30,7 +30,7 @@ def test_invalid_numexpr_version(engine, parser): def testit(): a, b = 1, 2 # noqa res = pd.eval('a + b', engine=engine, parser=parser) - tm.assert_equal(res, 3) + assert res == 3 if engine == 'numexpr': try: diff --git a/pandas/tests/core/computation/test_eval.py b/pandas/tests/core/computation/test_eval.py index 1f519174ce210..38a310a17a9ab 100644 --- a/pandas/tests/core/computation/test_eval.py +++ b/pandas/tests/core/computation/test_eval.py @@ -1020,7 +1020,7 @@ def test_complex_series_frame_alignment(self, engine, parser): parser=parser) else: res = pd.eval('df2 + s + df', engine=engine, parser=parser) - tm.assert_equal(res.shape, expected.shape) + assert res.shape == expected.shape assert_frame_equal(res, expected) def test_performance_warning_for_poor_alignment(self, engine, parser): @@ -1057,13 +1057,13 @@ def test_performance_warning_for_poor_alignment(self, engine, parser): pd.eval('df + s', engine=engine, parser=parser) if not is_python_engine: - tm.assert_equal(len(w), 1) + assert len(w) == 1 msg = str(w[0].message) expected = ("Alignment difference on axis {0} is larger" " than an order of magnitude on term {1!r}, " "by more than {2:.4g}; performance may suffer" "".format(1, 'df', np.log10(s.size - df.shape[1]))) - tm.assert_equal(msg, expected) + assert msg == expected # ------------------------------------ @@ -1104,17 +1104,17 @@ def test_simple_arith_ops(self): else: expec = _eval_single_bin(1, op, 1, self.engine) x = self.eval(ex, engine=self.engine, parser=self.parser) - tm.assert_equal(x, expec) + assert x == expec expec = _eval_single_bin(x, op, 1, self.engine) y = self.eval(ex2, local_dict={'x': x}, engine=self.engine, parser=self.parser) - tm.assert_equal(y, expec) + assert y == expec expec = _eval_single_bin(1, op, x + 1, self.engine) y = self.eval(ex3, local_dict={'x': x}, engine=self.engine, parser=self.parser) - tm.assert_equal(y, expec) + assert y == expec def test_simple_bool_ops(self): for op, lhs, rhs in product(expr._bool_ops_syms, (True, False), @@ -1149,7 +1149,7 @@ def test_4d_ndarray_fails(self): def test_constant(self): x = self.eval('1') - tm.assert_equal(x, 1) + assert x == 1 def test_single_variable(self): df = DataFrame(randn(10, 2)) @@ -1508,7 +1508,7 @@ def test_check_many_exprs(self): expr = ' * '.join('a' * 33) expected = 1 res = pd.eval(expr, engine=self.engine, parser=self.parser) - tm.assert_equal(res, expected) + assert res == expected def test_fails_and(self): df = DataFrame(np.random.randn(5, 3)) @@ -1736,14 +1736,14 @@ def test_no_new_locals(self, engine, parser): pd.eval('x + 1', local_dict=lcls, engine=engine, parser=parser) lcls2 = locals().copy() lcls2.pop('lcls') - tm.assert_equal(lcls, lcls2) + assert lcls == lcls2 def test_no_new_globals(self, engine, parser): x = 1 # noqa gbls = globals().copy() pd.eval('x + 1', engine=engine, parser=parser) gbls2 = globals().copy() - tm.assert_equal(gbls, gbls2) + assert gbls == gbls2 def test_invalid_engine(): @@ -1810,7 +1810,7 @@ def test_numexpr_builtin_raises(engine, parser): pd.eval('sin + dotted_line', engine=engine, parser=parser) else: res = pd.eval('sin + dotted_line', engine=engine, parser=parser) - tm.assert_equal(res, sin + dotted_line) + assert res == sin + dotted_line def test_bad_resolver_raises(engine, parser): @@ -1854,7 +1854,7 @@ def test_inf(engine, parser): s = 'inf + 1' expected = np.inf result = pd.eval(s, engine=engine, parser=parser) - tm.assert_equal(result, expected) + assert result == expected def test_negate_lt_eq_le(engine, parser): diff --git a/pandas/tests/core/sparse/test_frame.py b/pandas/tests/core/sparse/test_frame.py index adb813a27e7e9..5774a74c6290e 100644 --- a/pandas/tests/core/sparse/test_frame.py +++ b/pandas/tests/core/sparse/test_frame.py @@ -1183,7 +1183,7 @@ def test_from_to_scipy(spmatrix, index, columns, fill_value, dtype): tm.assert_frame_equal(sdf_obj.to_dense(), expected.to_dense()) # Assert spmatrices equal - tm.assert_equal(dict(sdf.to_coo().todok()), dict(spm.todok())) + assert dict(sdf.to_coo().todok()) == dict(spm.todok()) # Ensure dtype is preserved if possible was_upcast = ((fill_value is None or is_float(fill_value)) and @@ -1193,11 +1193,11 @@ def test_from_to_scipy(spmatrix, index, columns, fill_value, dtype): float if was_upcast else dtype) tm.assert_contains_all(sdf.dtypes, {np.dtype(res_dtype)}) - tm.assert_equal(sdf.to_coo().dtype, res_dtype) + assert sdf.to_coo().dtype == res_dtype # However, adding a str column results in an upcast to object sdf['strings'] = np.arange(len(sdf)).astype(str) - tm.assert_equal(sdf.to_coo().dtype, np.object_) + assert sdf.to_coo().dtype == np.object_ @pytest.mark.parametrize('fill_value', [None, 0, np.nan]) # noqa: F811 @@ -1237,12 +1237,12 @@ def test_from_to_scipy_object(spmatrix, fill_value): tm.assert_frame_equal(sdf_obj.to_dense(), expected.to_dense()) # Assert spmatrices equal - tm.assert_equal(dict(sdf.to_coo().todok()), dict(spm.todok())) + assert dict(sdf.to_coo().todok()) == dict(spm.todok()) # Ensure dtype is preserved if possible res_dtype = object tm.assert_contains_all(sdf.dtypes, {np.dtype(res_dtype)}) - tm.assert_equal(sdf.to_coo().dtype, res_dtype) + assert sdf.to_coo().dtype == res_dtype class TestSparseDataFrameArithmetic(tm.TestCase): diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 508053a6367fa..b1d31aee53b6a 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -259,8 +259,8 @@ def test_constructor_dict(self): # Dict with None value frame_none = DataFrame(dict(a=None), index=[0]) frame_none_list = DataFrame(dict(a=[None]), index=[0]) - tm.assert_equal(frame_none.get_value(0, 'a'), None) - tm.assert_equal(frame_none_list.get_value(0, 'a'), None) + assert frame_none.get_value(0, 'a') is None + assert frame_none_list.get_value(0, 'a') is None tm.assert_frame_equal(frame_none, frame_none_list) # GH10856 diff --git a/pandas/tests/frame/test_replace.py b/pandas/tests/frame/test_replace.py index fce59e10bf4bd..f8e411c30fe38 100644 --- a/pandas/tests/frame/test_replace.py +++ b/pandas/tests/frame/test_replace.py @@ -974,7 +974,7 @@ def test_replace_period(self): 'out_augmented_MAY_2011.json', 'out_augmented_AUG_2011.json', 'out_augmented_JAN_2011.json'], columns=['fname']) - tm.assert_equal(set(df.fname.values), set(d['fname'].keys())) + assert set(df.fname.values) == set(d['fname'].keys()) expected = DataFrame({'fname': [d['fname'][k] for k in df.fname.values]}) result = df.replace(d) @@ -997,7 +997,7 @@ def test_replace_datetime(self): 'out_augmented_MAY_2011.json', 'out_augmented_AUG_2011.json', 'out_augmented_JAN_2011.json'], columns=['fname']) - tm.assert_equal(set(df.fname.values), set(d['fname'].keys())) + assert set(df.fname.values) == set(d['fname'].keys()) expected = DataFrame({'fname': [d['fname'][k] for k in df.fname.values]}) result = df.replace(d) diff --git a/pandas/tests/frame/test_subclass.py b/pandas/tests/frame/test_subclass.py index 7444490d18373..dbb2e04173faf 100644 --- a/pandas/tests/frame/test_subclass.py +++ b/pandas/tests/frame/test_subclass.py @@ -229,9 +229,9 @@ def test_subclass_sparse_slice(self): tm.SubclassedSparseDataFrame(rows[:2])) tm.assert_sp_frame_equal(ssdf[:2], tm.SubclassedSparseDataFrame(rows[:2])) - tm.assert_equal(ssdf.loc[:2].testattr, "testattr") - tm.assert_equal(ssdf.iloc[:2].testattr, "testattr") - tm.assert_equal(ssdf[:2].testattr, "testattr") + assert ssdf.loc[:2].testattr == "testattr" + assert ssdf.iloc[:2].testattr == "testattr" + assert ssdf[:2].testattr == "testattr" tm.assert_sp_series_equal(ssdf.loc[1], tm.SubclassedSparseSeries(rows[1]), diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 25f89b29021ce..f486c70d86f9d 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -3275,7 +3275,7 @@ def f(group): # we expect 2 zeros because we call ``f`` once to see if a faster route # can be used. expected_names = [0, 0, 1, 2] - tm.assert_equal(names, expected_names) + assert names == expected_names def test_no_dummy_key_names(self): # GH #1291 @@ -3987,7 +3987,7 @@ def test_grouping_string_repr(self): result = gr.grouper.groupings[0].__repr__() expected = "Grouping(('A', 'a'))" - tm.assert_equal(result, expected) + assert result == expected def test_group_shift_with_null_key(self): # This test is designed to replicate the segfault in issue #13813. diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index 28fbce43bf983..f8eb923d51f75 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -1295,7 +1295,7 @@ def test_parsers_time(self): res = tools.to_time(np.array(arg)) self.assertIsInstance(res, list) - self.assert_equal(res, expected_arr) + assert res == expected_arr def test_parsers_monthfreq(self): cases = {'201101': datetime(2011, 1, 1, 0, 0), diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index 7c74f82741e8c..b880ba8b182e9 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -2536,11 +2536,11 @@ def test_nat_representations(self): def test_format_percentiles(): result = fmt.format_percentiles([0.01999, 0.02001, 0.5, 0.666666, 0.9999]) expected = ['1.999%', '2.001%', '50%', '66.667%', '99.99%'] - tm.assert_equal(result, expected) + assert result == expected result = fmt.format_percentiles([0, 0.5, 0.02001, 0.5, 0.666666, 0.9999]) expected = ['0%', '50%', '2.0%', '50%', '66.67%', '99.99%'] - tm.assert_equal(result, expected) + assert result == expected tm.assertRaises(ValueError, fmt.format_percentiles, [0.1, np.nan, 0.5]) tm.assertRaises(ValueError, fmt.format_percentiles, [-0.001, 0.1, 0.5]) diff --git a/pandas/tests/io/formats/test_printing.py b/pandas/tests/io/formats/test_printing.py index 23aaf472316ec..0df35da05578a 100644 --- a/pandas/tests/io/formats/test_printing.py +++ b/pandas/tests/io/formats/test_printing.py @@ -27,9 +27,9 @@ def test_repr_binary_type(): raw = btype(letters) b = compat.text_type(compat.bytes_to_str(raw)) res = printing.pprint_thing(b, quote_strings=True) - tm.assert_equal(res, repr(b)) + assert res == repr(b) res = printing.pprint_thing(b, quote_strings=False) - tm.assert_equal(res, b) + assert res == b class TestFormattBase(tm.TestCase): diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index a24e8cdaf0273..45ce87bf069aa 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -420,7 +420,7 @@ def test_frame_empty(self): # GH 7445 result = pd.DataFrame({'test': []}, index=[]).to_json(orient='columns') expected = '{"test":{}}' - tm.assert_equal(result, expected) + assert result == expected def test_frame_empty_mixedtype(self): # mixed type diff --git a/pandas/tests/io/parser/common.py b/pandas/tests/io/parser/common.py index 6eadf2c61c974..120bb005fb3ff 100644 --- a/pandas/tests/io/parser/common.py +++ b/pandas/tests/io/parser/common.py @@ -1332,9 +1332,9 @@ def test_1000_sep_with_decimal(self): 'C': [5, 10.] }) - tm.assert_equal(expected.A.dtype, 'int64') - tm.assert_equal(expected.B.dtype, 'float') - tm.assert_equal(expected.C.dtype, 'float') + assert expected.A.dtype == 'int64' + assert expected.B.dtype == 'float' + assert expected.C.dtype == 'float' df = self.read_csv(StringIO(data), sep='|', thousands=',', decimal='.') tm.assert_frame_equal(df, expected) diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py index 2fada4e1dc2cc..d9e036481d0c2 100644 --- a/pandas/tests/io/test_excel.py +++ b/pandas/tests/io/test_excel.py @@ -412,7 +412,7 @@ def test_reading_all_sheets(self): tm.assert_contains_all(expected_keys, dfs.keys()) # Issue 9930 # Ensure sheet order is preserved - tm.assert_equal(expected_keys, list(dfs.keys())) + assert expected_keys == list(dfs.keys()) def test_reading_multiple_specific_sheets(self): # Test reading specific sheetnames by specifying a mixed list diff --git a/pandas/tests/io/test_pickle.py b/pandas/tests/io/test_pickle.py index 0746cacb01bb9..e14c39d1de228 100644 --- a/pandas/tests/io/test_pickle.py +++ b/pandas/tests/io/test_pickle.py @@ -50,8 +50,8 @@ def compare_element(result, expected, typ, version=None): if expected is pd.NaT: assert result is pd.NaT else: - tm.assert_equal(result, expected) - tm.assert_equal(result.freq, expected.freq) + assert result == expected + assert result.freq == expected.freq else: comparator = getattr(tm, "assert_%s_equal" % typ, tm.assert_almost_equal) @@ -102,21 +102,21 @@ def compare_sp_series_ts(res, exp, typ, version): def compare_series_ts(result, expected, typ, version): # GH 7748 tm.assert_series_equal(result, expected) - tm.assert_equal(result.index.freq, expected.index.freq) - tm.assert_equal(result.index.freq.normalize, False) + assert result.index.freq == expected.index.freq + assert not result.index.freq.normalize tm.assert_series_equal(result > 0, expected > 0) # GH 9291 freq = result.index.freq - tm.assert_equal(freq + Day(1), Day(2)) + assert freq + Day(1) == Day(2) res = freq + pandas.Timedelta(hours=1) - tm.assert_equal(isinstance(res, pandas.Timedelta), True) - tm.assert_equal(res, pandas.Timedelta(days=1, hours=1)) + assert isinstance(res, pandas.Timedelta) + assert res == pandas.Timedelta(days=1, hours=1) res = freq + pandas.Timedelta(nanoseconds=1) - tm.assert_equal(isinstance(res, pandas.Timedelta), True) - tm.assert_equal(res, pandas.Timedelta(days=1, nanoseconds=1)) + assert isinstance(res, pandas.Timedelta) + assert res == pandas.Timedelta(days=1, nanoseconds=1) def compare_series_dt_tz(result, expected, typ, version): @@ -170,8 +170,8 @@ def compare_frame_cat_and_float(result, expected, typ, version): def compare_index_period(result, expected, typ, version): tm.assert_index_equal(result, expected) tm.assertIsInstance(result.freq, MonthEnd) - tm.assert_equal(result.freq, MonthEnd()) - tm.assert_equal(result.freqstr, 'M') + assert result.freq == MonthEnd() + assert result.freqstr == 'M' tm.assert_index_equal(result.shift(2), expected.shift(2)) diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index c6a54211e73ad..f28b2a0231433 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -3516,7 +3516,7 @@ def test_select_iterator_many_empty_frames(self): results = [s for s in store.select( 'df', where=where, chunksize=chunksize)] - tm.assert_equal(1, len(results)) + assert len(results) == 1 result = concat(results) rexpected = expected[expected.index <= end_dt] tm.assert_frame_equal(rexpected, result) @@ -3527,7 +3527,7 @@ def test_select_iterator_many_empty_frames(self): 'df', where=where, chunksize=chunksize)] # should be 1, is 10 - tm.assert_equal(1, len(results)) + assert len(results) == 1 result = concat(results) rexpected = expected[(expected.index >= beg_dt) & (expected.index <= end_dt)] @@ -3545,7 +3545,7 @@ def test_select_iterator_many_empty_frames(self): 'df', where=where, chunksize=chunksize)] # should be [] - tm.assert_equal(0, len(results)) + assert len(results) == 0 def test_retain_index_attributes(self): diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py index 50d3342c56522..9ddd81ae53062 100644 --- a/pandas/tests/io/test_stata.py +++ b/pandas/tests/io/test_stata.py @@ -1129,14 +1129,14 @@ def test_write_variable_labels(self): 'a': 'City Rank', 'b': 'City Exponent', 'c': 'City'} - tm.assert_equal(read_labels, expected_labels) + assert read_labels == expected_labels variable_labels['index'] = 'The Index' with tm.ensure_clean() as path: original.to_stata(path, variable_labels=variable_labels) with StataReader(path) as sr: read_labels = sr.variable_labels() - tm.assert_equal(read_labels, variable_labels) + assert read_labels == variable_labels def test_write_variable_label_errors(self): original = pd.DataFrame({'a': [1, 2, 3, 4], diff --git a/pandas/tests/plotting/test_converter.py b/pandas/tests/plotting/test_converter.py index 4629103d033f5..683f4ee89687f 100644 --- a/pandas/tests/plotting/test_converter.py +++ b/pandas/tests/plotting/test_converter.py @@ -153,8 +153,7 @@ class Axis(object): def test_convert_accepts_unicode(self): r1 = self.pc.convert("2012-1-1", None, self.axis) r2 = self.pc.convert(u("2012-1-1"), None, self.axis) - self.assert_equal(r1, r2, - "PeriodConverter.convert should accept unicode") + assert r1 == r2 def test_conversion(self): rs = self.pc.convert(['2012-1-1'], None, self.axis)[0] diff --git a/pandas/tests/series/test_api.py b/pandas/tests/series/test_api.py index 3cb1e29bde7d9..faf987c9b3820 100644 --- a/pandas/tests/series/test_api.py +++ b/pandas/tests/series/test_api.py @@ -344,7 +344,7 @@ def test_str_attribute(self): def test_empty_method(self): s_empty = pd.Series() - tm.assert_equal(s_empty.empty, True) + assert s_empty.empty for full_series in [pd.Series([1]), pd.Series(index=[1])]: - tm.assert_equal(full_series.empty, False) + assert not full_series.empty diff --git a/pandas/tests/test_common.py b/pandas/tests/test_common.py index 90b1157572be1..5222f8fc18520 100644 --- a/pandas/tests/test_common.py +++ b/pandas/tests/test_common.py @@ -142,12 +142,12 @@ def test_random_state(): import numpy.random as npr # Check with seed state = com._random_state(5) - tm.assert_equal(state.uniform(), npr.RandomState(5).uniform()) + assert state.uniform() == npr.RandomState(5).uniform() # Check with random state object state2 = npr.RandomState(10) - tm.assert_equal( - com._random_state(state2).uniform(), npr.RandomState(10).uniform()) + assert (com._random_state(state2).uniform() == + npr.RandomState(10).uniform()) # check with no arg random state assert com._random_state() is np.random diff --git a/pandas/tests/test_generic.py b/pandas/tests/test_generic.py index a2ded195d9343..8706a05cfe8a2 100644 --- a/pandas/tests/test_generic.py +++ b/pandas/tests/test_generic.py @@ -1810,12 +1810,12 @@ def test_squeeze(self): # axis argument df = tm.makeTimeDataFrame(nper=1).iloc[:, :1] - tm.assert_equal(df.shape, (1, 1)) + assert df.shape == (1, 1) tm.assert_series_equal(df.squeeze(axis=0), df.iloc[0]) tm.assert_series_equal(df.squeeze(axis='index'), df.iloc[0]) tm.assert_series_equal(df.squeeze(axis=1), df.iloc[:, 0]) tm.assert_series_equal(df.squeeze(axis='columns'), df.iloc[:, 0]) - tm.assert_equal(df.squeeze(), df.iloc[0, 0]) + assert df.squeeze() == df.iloc[0, 0] tm.assertRaises(ValueError, df.squeeze, axis=2) tm.assertRaises(ValueError, df.squeeze, axis='x') diff --git a/pandas/tests/test_util.py b/pandas/tests/test_util.py index 1bf9f4da45bff..2793cc14df19a 100644 --- a/pandas/tests/test_util.py +++ b/pandas/tests/test_util.py @@ -213,7 +213,7 @@ def test_validate_bool_kwarg(self): validate_bool_kwarg(value, name) for value in valid_values: - tm.assert_equal(validate_bool_kwarg(value, name), value) + assert validate_bool_kwarg(value, name) == value class TestValidateKwargsAndArgs(tm.TestCase): @@ -400,4 +400,4 @@ def test_numpy_errstate_is_default(): import numpy as np from pandas.compat import numpy # noqa # The errstate should be unchanged after that import. - tm.assert_equal(np.geterr(), expected) + assert np.geterr() == expected diff --git a/pandas/tests/tools/test_util.py b/pandas/tests/tools/test_util.py index ed64e8f42d84b..3ac7d8b32516e 100644 --- a/pandas/tests/tools/test_util.py +++ b/pandas/tests/tools/test_util.py @@ -50,7 +50,7 @@ def test_empty(self): # empty product (empty input): result = cartesian_product([]) expected = [] - tm.assert_equal(result, expected) + assert result == expected def test_invalid_input(self): invalid_inputs = [1, [1], [1, 2], [[1], 2], @@ -482,4 +482,4 @@ def test_downcast_limits(self): for dtype, downcast, min_max in dtype_downcast_min_max: series = pd.to_numeric(pd.Series(min_max), downcast=downcast) - tm.assert_equal(series.dtype, dtype) + assert series.dtype == dtype diff --git a/pandas/util/testing.py b/pandas/util/testing.py index d1f88c7041e05..47ed762b3e561 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -833,23 +833,6 @@ def equalContents(arr1, arr2): return frozenset(arr1) == frozenset(arr2) -def assert_equal(a, b, msg=""): - """asserts that a equals b, like nose's assert_equal, - but allows custom message to start. Passes a and b to - format string as well. So you can use '{0}' and '{1}' - to display a and b. - - Examples - -------- - >>> assert_equal(2, 2, "apples") - >>> assert_equal(5.2, 1.2, "{0} was really a dead parrot") - Traceback (most recent call last): - ... - AssertionError: 5.2 was really a dead parrot: 5.2 != 1.2 - """ - assert a == b, "%s: %r != %r" % (msg.format(a, b), a, b) - - def assert_index_equal(left, right, exact='equiv', check_names=True, check_less_precise=False, check_exact=True, check_categorical=True, obj='Index'): @@ -862,7 +845,7 @@ def assert_index_equal(left, right, exact='equiv', check_names=True, exact : bool / string {'equiv'}, default False Whether to check the Index class, dtype and inferred_type are identical. If 'equiv', then RangeIndex can be substituted for - Int64Index as well + Int64Index as well. check_names : bool, default True Whether to check the names attribute. check_less_precise : bool or int, default False From d7913621195424f538748915e82c5c2fcc4164bd Mon Sep 17 00:00:00 2001 From: gfyoung Date: Sun, 16 Apr 2017 14:27:46 -0400 Subject: [PATCH 403/933] MAINT: Strip internals from TestCase class (#16016) * MAINT: Move reset_display_options outside of TestCase * MAINT: Move round_trip_pickle outside of TestCase * MAINT: Remove all deprecated aliases from TestCase * DOC: Add doc explaining TestCase usage under pytest --- pandas/tests/core/dtypes/test_dtypes.py | 2 +- pandas/tests/core/sparse/test_array.py | 2 +- pandas/tests/core/sparse/test_frame.py | 2 +- pandas/tests/core/sparse/test_series.py | 2 +- pandas/tests/frame/test_block_internals.py | 6 +-- pandas/tests/frame/test_repr_info.py | 2 +- pandas/tests/frame/test_subclass.py | 2 +- pandas/tests/indexes/common.py | 2 +- .../tests/indexes/datetimes/test_datetime.py | 2 +- pandas/tests/indexes/datetimes/test_ops.py | 4 +- pandas/tests/indexes/period/test_period.py | 4 +- pandas/tests/indexes/test_multi.py | 4 +- .../indexes/timedeltas/test_timedelta.py | 2 +- .../tests/io/formats/test_eng_formatting.py | 4 +- pandas/tests/io/formats/test_format.py | 16 +++--- pandas/tests/scalar/test_period.py | 2 +- pandas/tests/scalar/test_timedelta.py | 2 +- pandas/tests/series/test_io.py | 2 +- pandas/tests/series/test_timeseries.py | 6 +-- pandas/tests/test_multilevel.py | 2 +- pandas/tests/test_panel.py | 2 +- pandas/tests/test_testing.py | 20 -------- pandas/tests/tseries/test_offsets.py | 4 +- pandas/util/decorators.py | 2 +- pandas/util/testing.py | 51 ++++++++++--------- 25 files changed, 66 insertions(+), 83 deletions(-) diff --git a/pandas/tests/core/dtypes/test_dtypes.py b/pandas/tests/core/dtypes/test_dtypes.py index ec9876df14e3b..46569fecf553f 100644 --- a/pandas/tests/core/dtypes/test_dtypes.py +++ b/pandas/tests/core/dtypes/test_dtypes.py @@ -40,7 +40,7 @@ def f(): self.assertNotEqual(np.str_, self.dtype) def test_pickle(self): - result = self.round_trip_pickle(self.dtype) + result = tm.round_trip_pickle(self.dtype) self.assertEqual(result, self.dtype) diff --git a/pandas/tests/core/sparse/test_array.py b/pandas/tests/core/sparse/test_array.py index b7b664e7bfb8a..9a1346430175d 100644 --- a/pandas/tests/core/sparse/test_array.py +++ b/pandas/tests/core/sparse/test_array.py @@ -562,7 +562,7 @@ def _check_inplace_op(op): def test_pickle(self): def _check_roundtrip(obj): - unpickled = self.round_trip_pickle(obj) + unpickled = tm.round_trip_pickle(obj) tm.assert_sp_array_equal(unpickled, obj) _check_roundtrip(self.arr) diff --git a/pandas/tests/core/sparse/test_frame.py b/pandas/tests/core/sparse/test_frame.py index 5774a74c6290e..279fe9ea75e53 100644 --- a/pandas/tests/core/sparse/test_frame.py +++ b/pandas/tests/core/sparse/test_frame.py @@ -278,7 +278,7 @@ def test_array_interface(self): def test_pickle(self): def _test_roundtrip(frame, orig): - result = self.round_trip_pickle(frame) + result = tm.round_trip_pickle(frame) tm.assert_sp_frame_equal(frame, result) tm.assert_frame_equal(result.to_dense(), orig, check_dtype=False) diff --git a/pandas/tests/core/sparse/test_series.py b/pandas/tests/core/sparse/test_series.py index 0b71dffe1782b..52032b618cd1d 100644 --- a/pandas/tests/core/sparse/test_series.py +++ b/pandas/tests/core/sparse/test_series.py @@ -390,7 +390,7 @@ def test_to_frame(self): def test_pickle(self): def _test_roundtrip(series): - unpickled = self.round_trip_pickle(series) + unpickled = tm.round_trip_pickle(series) tm.assert_sp_series_equal(series, unpickled) tm.assert_series_equal(series.to_dense(), unpickled.to_dense()) diff --git a/pandas/tests/frame/test_block_internals.py b/pandas/tests/frame/test_block_internals.py index accd3ddeb03d7..bfe1b0aae90b1 100644 --- a/pandas/tests/frame/test_block_internals.py +++ b/pandas/tests/frame/test_block_internals.py @@ -350,18 +350,18 @@ def test_copy(self): self.assertIsNot(copy._data, self.mixed_frame._data) def test_pickle(self): - unpickled = self.round_trip_pickle(self.mixed_frame) + unpickled = tm.round_trip_pickle(self.mixed_frame) assert_frame_equal(self.mixed_frame, unpickled) # buglet self.mixed_frame._data.ndim # empty - unpickled = self.round_trip_pickle(self.empty) + unpickled = tm.round_trip_pickle(self.empty) repr(unpickled) # tz frame - unpickled = self.round_trip_pickle(self.tzframe) + unpickled = tm.round_trip_pickle(self.tzframe) assert_frame_equal(self.tzframe, unpickled) def test_consolidate_datetime64(self): diff --git a/pandas/tests/frame/test_repr_info.py b/pandas/tests/frame/test_repr_info.py index 740a24f38c316..be55efac2992b 100644 --- a/pandas/tests/frame/test_repr_info.py +++ b/pandas/tests/frame/test_repr_info.py @@ -118,7 +118,7 @@ def test_repr_unsortable(self): fmt.set_option('display.max_rows', 1000, 'display.max_columns', 1000) repr(self.frame) - self.reset_display_options() + tm.reset_display_options() warnings.filters = warn_filters diff --git a/pandas/tests/frame/test_subclass.py b/pandas/tests/frame/test_subclass.py index dbb2e04173faf..1899df74c60ab 100644 --- a/pandas/tests/frame/test_subclass.py +++ b/pandas/tests/frame/test_subclass.py @@ -85,7 +85,7 @@ def test_dataframe_metadata(self): self.assertEqual(df.iloc[0:1, :].testattr, 'XXX') # GH10553 - unpickled = self.round_trip_pickle(df) + unpickled = tm.round_trip_pickle(df) tm.assert_frame_equal(df, unpickled) self.assertEqual(df._metadata, unpickled._metadata) self.assertEqual(df.testattr, unpickled.testattr) diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index 25214e6b170b5..15eceac6b00c9 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -28,7 +28,7 @@ def setup_indices(self): setattr(self, name, idx) def verify_pickle(self, index): - unpickled = self.round_trip_pickle(index) + unpickled = tm.round_trip_pickle(index) self.assertTrue(index.equals(unpickled)) def test_pickle_compat_construction(self): diff --git a/pandas/tests/indexes/datetimes/test_datetime.py b/pandas/tests/indexes/datetimes/test_datetime.py index 78c37f773547a..feedde77ebdd2 100644 --- a/pandas/tests/indexes/datetimes/test_datetime.py +++ b/pandas/tests/indexes/datetimes/test_datetime.py @@ -106,7 +106,7 @@ def test_roundtrip_pickle_with_tz(self): # GH 8367 # round-trip of timezone index = date_range('20130101', periods=3, tz='US/Eastern', name='foo') - unpickled = self.round_trip_pickle(index) + unpickled = tm.round_trip_pickle(index) self.assert_index_equal(index, unpickled) def test_reindex_preserves_tz_if_target_is_empty_list_or_array(self): diff --git a/pandas/tests/indexes/datetimes/test_ops.py b/pandas/tests/indexes/datetimes/test_ops.py index 4be9999982f12..6e6d6bf190291 100644 --- a/pandas/tests/indexes/datetimes/test_ops.py +++ b/pandas/tests/indexes/datetimes/test_ops.py @@ -1121,7 +1121,7 @@ def test_comparison(self): self.assertFalse(comp[9]) def test_pickle_unpickle(self): - unpickled = self.round_trip_pickle(self.rng) + unpickled = tm.round_trip_pickle(self.rng) self.assertIsNotNone(unpickled.offset) def test_copy(self): @@ -1272,7 +1272,7 @@ def test_shift(self): self.assertEqual(shifted[0], rng[0] + CDay()) def test_pickle_unpickle(self): - unpickled = self.round_trip_pickle(self.rng) + unpickled = tm.round_trip_pickle(self.rng) self.assertIsNotNone(unpickled.offset) def test_summary(self): diff --git a/pandas/tests/indexes/period/test_period.py b/pandas/tests/indexes/period/test_period.py index 6639fcd985ac4..fcbb1c10426bc 100644 --- a/pandas/tests/indexes/period/test_period.py +++ b/pandas/tests/indexes/period/test_period.py @@ -58,7 +58,7 @@ def test_pickle_compat_construction(self): def test_pickle_round_trip(self): for freq in ['D', 'M', 'Y']: idx = PeriodIndex(['2016-05-16', 'NaT', NaT, np.NaN], freq='D') - result = self.round_trip_pickle(idx) + result = tm.round_trip_pickle(idx) tm.assert_index_equal(result, idx) def test_get_loc(self): @@ -761,7 +761,7 @@ def test_append_concat(self): def test_pickle_freq(self): # GH2891 prng = period_range('1/1/2011', '1/1/2012', freq='M') - new_prng = self.round_trip_pickle(prng) + new_prng = tm.round_trip_pickle(prng) self.assertEqual(new_prng.freq, offsets.MonthEnd()) self.assertEqual(new_prng.freqstr, 'M') diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index e93319a30d5d8..b33a317eefd44 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -1044,7 +1044,7 @@ def test_roundtrip_pickle_with_tz(self): [[1, 2], ['a', 'b'], date_range('20130101', periods=3, tz='US/Eastern') ], names=['one', 'two', 'three']) - unpickled = self.round_trip_pickle(index) + unpickled = tm.round_trip_pickle(index) self.assertTrue(index.equal_levels(unpickled)) def test_from_tuples_index_values(self): @@ -1392,7 +1392,7 @@ def test_format_sparse_config(self): result = self.index.format() self.assertEqual(result[1], 'foo two') - self.reset_display_options() + tm.reset_display_options() warnings.filters = warn_filters diff --git a/pandas/tests/indexes/timedeltas/test_timedelta.py b/pandas/tests/indexes/timedeltas/test_timedelta.py index 3abc2d8422fd3..f434938a6e803 100644 --- a/pandas/tests/indexes/timedeltas/test_timedelta.py +++ b/pandas/tests/indexes/timedeltas/test_timedelta.py @@ -454,7 +454,7 @@ def test_pass_TimedeltaIndex_to_index(self): def test_pickle(self): rng = timedelta_range('1 days', periods=10) - rng_p = self.round_trip_pickle(rng) + rng_p = tm.round_trip_pickle(rng) tm.assert_index_equal(rng, rng_p) def test_hash_error(self): diff --git a/pandas/tests/io/formats/test_eng_formatting.py b/pandas/tests/io/formats/test_eng_formatting.py index 225a4921961cf..8eb4ed576fff1 100644 --- a/pandas/tests/io/formats/test_eng_formatting.py +++ b/pandas/tests/io/formats/test_eng_formatting.py @@ -38,7 +38,7 @@ def test_eng_float_formatter(self): '3 1E+06') self.assertEqual(result, expected) - self.reset_display_options() + tm.reset_display_options() def compare(self, formatter, input, output): formatted_input = formatter(input) @@ -185,7 +185,7 @@ def test_nan(self): fmt.set_eng_float_format(accuracy=1) result = pt.to_string() self.assertTrue('NaN' in result) - self.reset_display_options() + tm.reset_display_options() def test_inf(self): # Issue #11981 diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index b880ba8b182e9..bb766ae389a10 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -138,7 +138,7 @@ def test_eng_float_formatter(self): fmt.set_eng_float_format(accuracy=0) repr(self.frame) - self.reset_display_options() + tm.reset_display_options() def test_show_null_counts(self): @@ -1197,7 +1197,7 @@ def test_to_string_line_width_no_index(self): self.assertEqual(df_s, expected) def test_to_string_float_formatting(self): - self.reset_display_options() + tm.reset_display_options() fmt.set_option('display.precision', 5, 'display.column_space', 12, 'display.notebook_repr_html', False) @@ -1226,7 +1226,7 @@ def test_to_string_float_formatting(self): expected = (' x\n' '0 3234.000\n' '1 0.253') self.assertEqual(df_s, expected) - self.reset_display_options() + tm.reset_display_options() self.assertEqual(get_option("display.precision"), 6) df = DataFrame({'x': [1e9, 0.2512]}) @@ -1310,14 +1310,14 @@ def test_to_string_index_formatter(self): self.assertEqual(rs, xp) def test_to_string_left_justify_cols(self): - self.reset_display_options() + tm.reset_display_options() df = DataFrame({'x': [3234, 0.253]}) df_s = df.to_string(justify='left') expected = (' x \n' '0 3234.000\n' '1 0.253') self.assertEqual(df_s, expected) def test_to_string_format_na(self): - self.reset_display_options() + tm.reset_display_options() df = DataFrame({'A': [np.nan, -1, -2.1234, 3, 4], 'B': [np.nan, 'foo', 'foooo', 'fooooo', 'bar']}) result = df.to_string() @@ -1380,7 +1380,7 @@ def test_repr_html(self): fmt.set_option('display.notebook_repr_html', False) self.frame._repr_html_() - self.reset_display_options() + tm.reset_display_options() df = DataFrame([[1, 2], [3, 4]]) fmt.set_option('display.show_dimensions', True) @@ -1388,7 +1388,7 @@ def test_repr_html(self): fmt.set_option('display.show_dimensions', False) self.assertFalse('2 rows' in df._repr_html_()) - self.reset_display_options() + tm.reset_display_options() def test_repr_html_wide(self): max_cols = get_option('display.max_columns') @@ -1552,7 +1552,7 @@ def get_ipython(): repstr = self.frame._repr_html_() self.assertIn('class', repstr) # info fallback - self.reset_display_options() + tm.reset_display_options() def test_pprint_pathological_object(self): """ diff --git a/pandas/tests/scalar/test_period.py b/pandas/tests/scalar/test_period.py index 7a15600d6041e..98af0028469bf 100644 --- a/pandas/tests/scalar/test_period.py +++ b/pandas/tests/scalar/test_period.py @@ -909,7 +909,7 @@ def test_multiples(self): def test_round_trip(self): p = Period('2000Q1') - new_p = self.round_trip_pickle(p) + new_p = tm.round_trip_pickle(p) self.assertEqual(new_p, p) diff --git a/pandas/tests/scalar/test_timedelta.py b/pandas/tests/scalar/test_timedelta.py index c22d1d2329fba..abdbf29008b7e 100644 --- a/pandas/tests/scalar/test_timedelta.py +++ b/pandas/tests/scalar/test_timedelta.py @@ -559,7 +559,7 @@ def test_overflow(self): def test_pickle(self): v = Timedelta('1 days 10:11:12.0123456') - v_p = self.round_trip_pickle(v) + v_p = tm.round_trip_pickle(v) self.assertEqual(v, v_p) def test_timedelta_hash_equality(self): diff --git a/pandas/tests/series/test_io.py b/pandas/tests/series/test_io.py index d514fbfc142f0..a86ca880e75e4 100644 --- a/pandas/tests/series/test_io.py +++ b/pandas/tests/series/test_io.py @@ -134,7 +134,7 @@ def test_timeseries_periodindex(self): from pandas import period_range prng = period_range('1/1/2011', '1/1/2012', freq='M') ts = Series(np.random.randn(len(prng)), prng) - new_ts = self.round_trip_pickle(ts) + new_ts = tm.round_trip_pickle(ts) self.assertEqual(new_ts.index.freq, 'M') def test_pickle_preserve_name(self): diff --git a/pandas/tests/series/test_timeseries.py b/pandas/tests/series/test_timeseries.py index ce7d5a573bfab..5a88b5bf98699 100644 --- a/pandas/tests/series/test_timeseries.py +++ b/pandas/tests/series/test_timeseries.py @@ -827,11 +827,11 @@ def test_asfreq_resample_set_correct_freq(self): def test_pickle(self): # GH4606 - p = self.round_trip_pickle(NaT) + p = tm.round_trip_pickle(NaT) self.assertTrue(p is NaT) idx = pd.to_datetime(['2013-01-01', NaT, '2014-01-06']) - idx_p = self.round_trip_pickle(idx) + idx_p = tm.round_trip_pickle(idx) self.assertTrue(idx_p[0] == idx[0]) self.assertTrue(idx_p[1] is NaT) self.assertTrue(idx_p[2] == idx[2]) @@ -839,7 +839,7 @@ def test_pickle(self): # GH11002 # don't infer freq idx = date_range('1750-1-1', '2050-1-1', freq='7D') - idx_p = self.round_trip_pickle(idx) + idx_p = tm.round_trip_pickle(idx) tm.assert_index_equal(idx, idx_p) def test_setops_preserve_freq(self): diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index 648a3b98b245a..24bbf895508d7 100755 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -179,7 +179,7 @@ def _check_op(opname): def test_pickle(self): def _test_roundtrip(frame): - unpickled = self.round_trip_pickle(frame) + unpickled = tm.round_trip_pickle(frame) tm.assert_frame_equal(frame, unpickled) _test_roundtrip(self.frame) diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index 60173dda57e04..4e9805ca9d5a6 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -43,7 +43,7 @@ class PanelTests(object): def test_pickle(self): with catch_warnings(record=True): - unpickled = self.round_trip_pickle(self.panel) + unpickled = tm.round_trip_pickle(self.panel) assert_frame_equal(unpickled['ItemA'], self.panel['ItemA']) def test_rank(self): diff --git a/pandas/tests/test_testing.py b/pandas/tests/test_testing.py index e5cb953cb35a5..fe4149583182d 100644 --- a/pandas/tests/test_testing.py +++ b/pandas/tests/test_testing.py @@ -746,26 +746,6 @@ def test_RNGContext(self): self.assertEqual(np.random.randn(), expected0) -class TestDeprecatedTests(tm.TestCase): - - def test_warning(self): - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - self.assertEquals(1, 1) - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - self.assertNotEquals(1, 2) - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - self.assert_(True) - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - self.assertAlmostEquals(1.0, 1.0000000001) - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - self.assertNotAlmostEquals(1, 2) - - class TestLocale(tm.TestCase): def test_locale(self): diff --git a/pandas/tests/tseries/test_offsets.py b/pandas/tests/tseries/test_offsets.py index f644c353982f6..2dc2485550bc5 100644 --- a/pandas/tests/tseries/test_offsets.py +++ b/pandas/tests/tseries/test_offsets.py @@ -1906,7 +1906,7 @@ def test_calendar(self): def test_roundtrip_pickle(self): def _check_roundtrip(obj): - unpickled = self.round_trip_pickle(obj) + unpickled = tm.round_trip_pickle(obj) self.assertEqual(unpickled, obj) _check_roundtrip(self.offset) @@ -1967,7 +1967,7 @@ def test_offsets_compare_equal(self): def test_roundtrip_pickle(self): def _check_roundtrip(obj): - unpickled = self.round_trip_pickle(obj) + unpickled = tm.round_trip_pickle(obj) self.assertEqual(unpickled, obj) _check_roundtrip(self._object()) diff --git a/pandas/util/decorators.py b/pandas/util/decorators.py index ca588e2a0432e..772b206f82e69 100644 --- a/pandas/util/decorators.py +++ b/pandas/util/decorators.py @@ -24,7 +24,7 @@ def deprecate_kwarg(old_arg_name, new_arg_name, mapping=None, stacklevel=2): old_arg_name : str Name of argument in function to deprecate new_arg_name : str - Name of prefered argument in function + Name of preferred argument in function mapping : dict or callable If mapping is present, use it to translate old arguments to new arguments. A callable must do its own value checking; diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 47ed762b3e561..e9ec9d553d3e4 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -50,7 +50,6 @@ Index, MultiIndex, Series, DataFrame, Panel, Panel4D) -from pandas.util.decorators import deprecate from pandas.util import libtesting from pandas.io.common import urlopen slow = pytest.mark.slow @@ -83,6 +82,14 @@ def reset_testing_mode(): class TestCase(unittest.TestCase): + """ + The test case class that we originally used when using the + nosetests framework. Under the new pytest framework, we are + moving away from this class. + + Do not create new test classes derived from this one. Rather, + they should inherit from object directly. + """ @classmethod def setUpClass(cls): @@ -92,36 +99,32 @@ def setUpClass(cls): def tearDownClass(cls): pass - def reset_display_options(self): - # reset the display options - pd.reset_option('^display.', silent=True) - - def round_trip_pickle(self, obj, path=None): - return round_trip_pickle(obj, path=path) - # https://docs.python.org/3/library/unittest.html#deprecated-aliases - def assertEquals(self, *args, **kwargs): - return deprecate('assertEquals', - self.assertEqual)(*args, **kwargs) +def reset_display_options(): + """ + Reset the display options for printing and representing objects. + """ - def assertNotEquals(self, *args, **kwargs): - return deprecate('assertNotEquals', - self.assertNotEqual)(*args, **kwargs) + pd.reset_option('^display.', silent=True) - def assert_(self, *args, **kwargs): - return deprecate('assert_', - self.assertTrue)(*args, **kwargs) - def assertAlmostEquals(self, *args, **kwargs): - return deprecate('assertAlmostEquals', - self.assertAlmostEqual)(*args, **kwargs) +def round_trip_pickle(obj, path=None): + """ + Pickle an object and then read it again. - def assertNotAlmostEquals(self, *args, **kwargs): - return deprecate('assertNotAlmostEquals', - self.assertNotAlmostEqual)(*args, **kwargs) + Parameters + ---------- + obj : pandas object + The object to pickle and then re-read. + path : str, default None + The path where the pickled object is written and then read. + Returns + ------- + round_trip_pickled_object : pandas object + The original object that was pickled and then re-read. + """ -def round_trip_pickle(obj, path=None): if path is None: path = u('__%s__.pickle' % rands(10)) with ensure_clean(path) as path: From a25272ba72741c3e2862f5b0e06279242ffef2b2 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Sun, 16 Apr 2017 20:28:33 +0200 Subject: [PATCH 404/933] CLN: update pandas.lib deprecation messages (GH15936) (#16021) --- pandas/__init__.py | 7 +++++-- pandas/util/depr_module.py | 25 ++++++++++++++++--------- 2 files changed, 21 insertions(+), 11 deletions(-) diff --git a/pandas/__init__.py b/pandas/__init__.py index b221f9e43876b..01bf22bcc5e73 100644 --- a/pandas/__init__.py +++ b/pandas/__init__.py @@ -75,8 +75,11 @@ parser = _DeprecatedModule(deprmod='pandas.parser', removals=['na_values'], moved={'CParserError': 'pandas.errors.ParserError'}) -lib = _DeprecatedModule(deprmod='pandas.lib', deprmodto='pandas._libs.lib', - moved={'infer_dtype': 'pandas.api.lib.infer_dtype'}) +lib = _DeprecatedModule(deprmod='pandas.lib', deprmodto=False, + moved={'Timestamp': 'pandas.Timestamp', + 'Timedelta': 'pandas.Timedelta', + 'NaT': 'pandas.NaT', + 'infer_dtype': 'pandas.api.lib.infer_dtype'}) tslib = _DeprecatedModule(deprmod='pandas.tslib', moved={'Timestamp': 'pandas.Timestamp', 'Timedelta': 'pandas.Timedelta', diff --git a/pandas/util/depr_module.py b/pandas/util/depr_module.py index 1f428198c19f3..b438c91d980af 100644 --- a/pandas/util/depr_module.py +++ b/pandas/util/depr_module.py @@ -75,15 +75,22 @@ def __getattr__(self, name): FutureWarning, stacklevel=2) else: deprmodto = self.deprmodto - if deprmodto is None: - deprmodto = "{modname}.{name}".format( - modname=obj.__module__, name=name) - # The object is actually located in another module. - warnings.warn( - "{deprmod}.{name} is deprecated. Please use " - "{deprmodto}.{name} instead.".format( - deprmod=self.deprmod, name=name, deprmodto=deprmodto), - FutureWarning, stacklevel=2) + if deprmodto is False: + warnings.warn( + "{deprmod}.{name} is deprecated and will be removed in " + "a future version.".format( + deprmod=self.deprmod, name=name), + FutureWarning, stacklevel=2) + else: + if deprmodto is None: + deprmodto = "{modname}.{name}".format( + modname=obj.__module__, name=name) + # The object is actually located in another module. + warnings.warn( + "{deprmod}.{name} is deprecated. Please use " + "{deprmodto}.{name} instead.".format( + deprmod=self.deprmod, name=name, deprmodto=deprmodto), + FutureWarning, stacklevel=2) return obj From f60b914e2100d44740df377d55f4d43b3709478c Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sun, 16 Apr 2017 21:31:38 +0000 Subject: [PATCH 405/933] CLN: move infer_dtype to pandas.api.types (#16023) --- pandas/__init__.py | 2 +- pandas/api/lib/__init__.py | 5 ----- pandas/api/types/__init__.py | 1 + pandas/tests/api/test_types.py | 2 +- 4 files changed, 3 insertions(+), 7 deletions(-) delete mode 100644 pandas/api/lib/__init__.py diff --git a/pandas/__init__.py b/pandas/__init__.py index 01bf22bcc5e73..5f6d54fd904b1 100644 --- a/pandas/__init__.py +++ b/pandas/__init__.py @@ -79,7 +79,7 @@ moved={'Timestamp': 'pandas.Timestamp', 'Timedelta': 'pandas.Timedelta', 'NaT': 'pandas.NaT', - 'infer_dtype': 'pandas.api.lib.infer_dtype'}) + 'infer_dtype': 'pandas.api.types.infer_dtype'}) tslib = _DeprecatedModule(deprmod='pandas.tslib', moved={'Timestamp': 'pandas.Timestamp', 'Timedelta': 'pandas.Timedelta', diff --git a/pandas/api/lib/__init__.py b/pandas/api/lib/__init__.py deleted file mode 100644 index c86bfc6148655..0000000000000 --- a/pandas/api/lib/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -# flake8: noqa - -""" public toolkit API """ - -from pandas._libs.lib import infer_dtype diff --git a/pandas/api/types/__init__.py b/pandas/api/types/__init__.py index 06fb5742ba067..dcf010dcf4bc2 100644 --- a/pandas/api/types/__init__.py +++ b/pandas/api/types/__init__.py @@ -2,4 +2,5 @@ from pandas.core.dtypes.api import * # noqa from pandas.core.dtypes.concat import union_categoricals # noqa +from pandas._libs.lib import infer_dtype # noqa del np # noqa diff --git a/pandas/tests/api/test_types.py b/pandas/tests/api/test_types.py index e0267d2990085..057f7d8f3e286 100644 --- a/pandas/tests/api/test_types.py +++ b/pandas/tests/api/test_types.py @@ -30,7 +30,7 @@ class TestTypes(Base, tm.TestCase): 'is_dict_like', 'is_iterator', 'is_file_like', 'is_list_like', 'is_hashable', 'is_named_tuple', 'is_sequence', - 'pandas_dtype', 'union_categoricals'] + 'pandas_dtype', 'union_categoricals', 'infer_dtype'] def test_types(self): From 5146b5971324009873cf1a54c8f0828de63874ae Mon Sep 17 00:00:00 2001 From: gfyoung Date: Sun, 16 Apr 2017 18:07:25 -0400 Subject: [PATCH 406/933] MAINT: Remove tm.assertIsNot from testing (#16024) --- pandas/tests/core/sparse/test_frame.py | 18 +++--- pandas/tests/core/sparse/test_indexing.py | 6 +- pandas/tests/core/sparse/test_series.py | 18 +++--- .../tests/frame/test_axis_select_reindex.py | 56 ++++++++++--------- pandas/tests/frame/test_block_internals.py | 15 ++--- pandas/tests/frame/test_indexing.py | 8 +-- pandas/tests/frame/test_missing.py | 8 +-- pandas/tests/frame/test_timeseries.py | 7 +-- pandas/tests/indexes/common.py | 28 ++++++---- pandas/tests/indexes/test_base.py | 20 +++---- pandas/tests/indexes/test_category.py | 12 ++-- pandas/tests/indexes/test_frozen.py | 27 +++++---- pandas/tests/indexes/test_interval.py | 6 +- pandas/tests/indexes/test_multi.py | 23 ++++---- pandas/tests/series/test_apply.py | 14 ++--- pandas/tests/series/test_timeseries.py | 12 ++-- pandas/tests/test_generic.py | 2 +- pandas/tests/test_internals.py | 2 +- pandas/tests/test_panel.py | 18 +++--- pandas/tests/test_panel4d.py | 26 ++++----- pandas/tests/tools/test_concat.py | 32 +++++------ pandas/util/testing.py | 19 ++++--- 22 files changed, 194 insertions(+), 183 deletions(-) diff --git a/pandas/tests/core/sparse/test_frame.py b/pandas/tests/core/sparse/test_frame.py index 279fe9ea75e53..0a58713125a30 100644 --- a/pandas/tests/core/sparse/test_frame.py +++ b/pandas/tests/core/sparse/test_frame.py @@ -422,24 +422,24 @@ def test_iloc(self): def test_set_value(self): - # ok as the index gets conver to object + # ok, as the index gets converted to object frame = self.frame.copy() res = frame.set_value('foobar', 'B', 1.5) - self.assertEqual(res.index.dtype, 'object') + assert res.index.dtype == 'object' res = self.frame res.index = res.index.astype(object) res = self.frame.set_value('foobar', 'B', 1.5) - self.assertIsNot(res, self.frame) - self.assertEqual(res.index[-1], 'foobar') - self.assertEqual(res.get_value('foobar', 'B'), 1.5) + assert res is not self.frame + assert res.index[-1] == 'foobar' + assert res.get_value('foobar', 'B') == 1.5 res2 = res.set_value('foobar', 'qux', 1.5) - self.assertIsNot(res2, res) - self.assert_index_equal(res2.columns, - pd.Index(list(self.frame.columns) + ['qux'])) - self.assertEqual(res2.get_value('foobar', 'qux'), 1.5) + assert res2 is not res + tm.assert_index_equal(res2.columns, + pd.Index(list(self.frame.columns) + ['qux'])) + assert res2.get_value('foobar', 'qux') == 1.5 def test_fancy_index_misc(self): # axis = 0 diff --git a/pandas/tests/core/sparse/test_indexing.py b/pandas/tests/core/sparse/test_indexing.py index 1a0782c0a3db9..4a9bea798be36 100644 --- a/pandas/tests/core/sparse/test_indexing.py +++ b/pandas/tests/core/sparse/test_indexing.py @@ -1,6 +1,6 @@ # pylint: disable-msg=E1101,W0612 -import pytest # noqa +import pytest import numpy as np import pandas as pd import pandas.util.testing as tm @@ -578,7 +578,7 @@ def test_reindex(self): exp = orig.reindex(['A'], level=0).to_sparse() tm.assert_sp_series_equal(res, exp) - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): # Incomplete keys are not accepted for reindexing: sparse.reindex(['A', 'C']) @@ -586,7 +586,7 @@ def test_reindex(self): res = sparse.reindex(sparse.index, copy=True) exp = orig.reindex(orig.index, copy=True).to_sparse() tm.assert_sp_series_equal(res, exp) - self.assertIsNot(sparse, res) + assert sparse is not res class TestSparseDataFrameIndexing(tm.TestCase): diff --git a/pandas/tests/core/sparse/test_series.py b/pandas/tests/core/sparse/test_series.py index 52032b618cd1d..f5a27a8161909 100644 --- a/pandas/tests/core/sparse/test_series.py +++ b/pandas/tests/core/sparse/test_series.py @@ -314,9 +314,9 @@ def test_constructor_empty(self): def test_copy_astype(self): cop = self.bseries.astype(np.float64) - self.assertIsNot(cop, self.bseries) - self.assertIs(cop.sp_index, self.bseries.sp_index) - self.assertEqual(cop.dtype, np.float64) + assert cop is not self.bseries + assert cop.sp_index is self.bseries.sp_index + assert cop.dtype == np.float64 cop2 = self.iseries.copy() @@ -325,8 +325,8 @@ def test_copy_astype(self): # test that data is copied cop[:5] = 97 - self.assertEqual(cop.sp_values[0], 97) - self.assertNotEqual(self.bseries.sp_values[0], 97) + assert cop.sp_values[0] == 97 + assert self.bseries.sp_values[0] != 97 # correct fill value zbcop = self.zbseries.copy() @@ -338,7 +338,7 @@ def test_copy_astype(self): # no deep copy view = self.bseries.copy(deep=False) view.sp_values[:5] = 5 - self.assertTrue((self.bseries.sp_values[:5] == 5).all()) + assert (self.bseries.sp_values[:5] == 5).all() def test_shape(self): # GH 10452 @@ -639,7 +639,7 @@ def _compare_with_series(sps, new_index): # special cases same_index = self.bseries.reindex(self.bseries.index) tm.assert_sp_series_equal(self.bseries, same_index) - self.assertIsNot(same_index, self.bseries) + assert same_index is not self.bseries # corner cases sp = SparseSeries([], index=[]) @@ -650,7 +650,7 @@ def _compare_with_series(sps, new_index): # with copy=False reindexed = self.bseries.reindex(self.bseries.index, copy=True) reindexed.sp_values[:] = 1. - self.assertTrue((self.bseries.sp_values != 1.).all()) + assert (self.bseries.sp_values != 1.).all() reindexed = self.bseries.reindex(self.bseries.index, copy=False) reindexed.sp_values[:] = 1. @@ -824,7 +824,7 @@ def test_shift(self): series = SparseSeries([nan, 1., 2., 3., nan, nan], index=np.arange(6)) shifted = series.shift(0) - self.assertIsNot(shifted, series) + assert shifted is not series tm.assert_sp_series_equal(shifted, series) f = lambda s: s.shift(1) diff --git a/pandas/tests/frame/test_axis_select_reindex.py b/pandas/tests/frame/test_axis_select_reindex.py index 7ed2bfb601eb8..c814b6ad0e30a 100644 --- a/pandas/tests/frame/test_axis_select_reindex.py +++ b/pandas/tests/frame/test_axis_select_reindex.py @@ -2,6 +2,8 @@ from __future__ import print_function +import pytest + from datetime import datetime from numpy import random @@ -409,33 +411,35 @@ def test_reindex_dups(self): def test_align(self): af, bf = self.frame.align(self.frame) - self.assertIsNot(af._data, self.frame._data) + assert af._data is not self.frame._data af, bf = self.frame.align(self.frame, copy=False) - self.assertIs(af._data, self.frame._data) + assert af._data is self.frame._data # axis = 0 other = self.frame.iloc[:-5, :3] af, bf = self.frame.align(other, axis=0, fill_value=-1) - self.assert_index_equal(bf.columns, other.columns) + + tm.assert_index_equal(bf.columns, other.columns) + # test fill value join_idx = self.frame.index.join(other.index) diff_a = self.frame.index.difference(join_idx) diff_b = other.index.difference(join_idx) diff_a_vals = af.reindex(diff_a).values diff_b_vals = bf.reindex(diff_b).values - self.assertTrue((diff_a_vals == -1).all()) + assert (diff_a_vals == -1).all() af, bf = self.frame.align(other, join='right', axis=0) - self.assert_index_equal(bf.columns, other.columns) - self.assert_index_equal(bf.index, other.index) - self.assert_index_equal(af.index, other.index) + tm.assert_index_equal(bf.columns, other.columns) + tm.assert_index_equal(bf.index, other.index) + tm.assert_index_equal(af.index, other.index) # axis = 1 other = self.frame.iloc[:-5, :3].copy() af, bf = self.frame.align(other, axis=1) - self.assert_index_equal(bf.columns, self.frame.columns) - self.assert_index_equal(bf.index, other.index) + tm.assert_index_equal(bf.columns, self.frame.columns) + tm.assert_index_equal(bf.index, other.index) # test fill value join_idx = self.frame.index.join(other.index) @@ -446,42 +450,42 @@ def test_align(self): # TODO(wesm): unused? diff_b_vals = bf.reindex(diff_b).values # noqa - self.assertTrue((diff_a_vals == -1).all()) + assert (diff_a_vals == -1).all() af, bf = self.frame.align(other, join='inner', axis=1) - self.assert_index_equal(bf.columns, other.columns) + tm.assert_index_equal(bf.columns, other.columns) af, bf = self.frame.align(other, join='inner', axis=1, method='pad') - self.assert_index_equal(bf.columns, other.columns) + tm.assert_index_equal(bf.columns, other.columns) # test other non-float types af, bf = self.intframe.align(other, join='inner', axis=1, method='pad') - self.assert_index_equal(bf.columns, other.columns) + tm.assert_index_equal(bf.columns, other.columns) af, bf = self.mixed_frame.align(self.mixed_frame, join='inner', axis=1, method='pad') - self.assert_index_equal(bf.columns, self.mixed_frame.columns) + tm.assert_index_equal(bf.columns, self.mixed_frame.columns) af, bf = self.frame.align(other.iloc[:, 0], join='inner', axis=1, method=None, fill_value=None) - self.assert_index_equal(bf.index, Index([])) + tm.assert_index_equal(bf.index, Index([])) af, bf = self.frame.align(other.iloc[:, 0], join='inner', axis=1, method=None, fill_value=0) - self.assert_index_equal(bf.index, Index([])) + tm.assert_index_equal(bf.index, Index([])) # mixed floats/ints af, bf = self.mixed_float.align(other.iloc[:, 0], join='inner', axis=1, method=None, fill_value=0) - self.assert_index_equal(bf.index, Index([])) + tm.assert_index_equal(bf.index, Index([])) af, bf = self.mixed_int.align(other.iloc[:, 0], join='inner', axis=1, method=None, fill_value=0) - self.assert_index_equal(bf.index, Index([])) + tm.assert_index_equal(bf.index, Index([])) - # try to align dataframe to series along bad axis - self.assertRaises(ValueError, self.frame.align, af.iloc[0, :3], - join='inner', axis=2) + # Try to align DataFrame to Series along bad axis + with pytest.raises(ValueError): + self.frame.align(af.iloc[0, :3], join='inner', axis=2) # align dataframe to series with broadcast or not idx = self.frame.index @@ -490,7 +494,7 @@ def test_align(self): left, right = self.frame.align(s, axis=0) tm.assert_index_equal(left.index, self.frame.index) tm.assert_index_equal(right.index, self.frame.index) - self.assertTrue(isinstance(right, Series)) + assert isinstance(right, Series) left, right = self.frame.align(s, broadcast_axis=1) tm.assert_index_equal(left.index, self.frame.index) @@ -499,17 +503,17 @@ def test_align(self): expected[c] = s expected = DataFrame(expected, index=self.frame.index, columns=self.frame.columns) - assert_frame_equal(right, expected) + tm.assert_frame_equal(right, expected) - # GH 9558 + # see gh-9558 df = DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}) result = df[df['a'] == 2] expected = DataFrame([[2, 5]], index=[1], columns=['a', 'b']) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = df.where(df['a'] == 2, 0) expected = DataFrame({'a': [0, 2, 0], 'b': [0, 5, 0]}) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def _check_align(self, a, b, axis, fill_axis, how, method, limit=None): aa, ab = a.align(b, axis=axis, join=how, method=method, limit=limit, diff --git a/pandas/tests/frame/test_block_internals.py b/pandas/tests/frame/test_block_internals.py index bfe1b0aae90b1..74ae89a876294 100644 --- a/pandas/tests/frame/test_block_internals.py +++ b/pandas/tests/frame/test_block_internals.py @@ -41,17 +41,18 @@ def test_cast_internals(self): def test_consolidate(self): self.frame['E'] = 7. consolidated = self.frame._consolidate() - self.assertEqual(len(consolidated._data.blocks), 1) + assert len(consolidated._data.blocks) == 1 # Ensure copy, do I want this? recons = consolidated._consolidate() - self.assertIsNot(recons, consolidated) - assert_frame_equal(recons, consolidated) + assert recons is not consolidated + tm.assert_frame_equal(recons, consolidated) self.frame['F'] = 8. - self.assertEqual(len(self.frame._data.blocks), 3) + assert len(self.frame._data.blocks) == 3 + self.frame._consolidate(inplace=True) - self.assertEqual(len(self.frame._data.blocks), 1) + assert len(self.frame._data.blocks) == 1 def test_consolidate_deprecation(self): self.frame['E'] = 7 @@ -343,11 +344,11 @@ def test_no_copy_blocks(self): def test_copy(self): cop = self.frame.copy() cop['E'] = cop['A'] - self.assertNotIn('E', self.frame) + assert 'E' not in self.frame # copy objects copy = self.mixed_frame.copy() - self.assertIsNot(copy._data, self.mixed_frame._data) + assert copy._data is not self.mixed_frame._data def test_pickle(self): unpickled = tm.round_trip_pickle(self.mixed_frame) diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py index b624657ca4b4b..a1705084c0edf 100644 --- a/pandas/tests/frame/test_indexing.py +++ b/pandas/tests/frame/test_indexing.py @@ -672,19 +672,19 @@ def test_setitem_ambig(self): self.assertEqual(dm[2].dtype, np.object_) def test_setitem_clear_caches(self): - # GH #304 + # see gh-304 df = DataFrame({'x': [1.1, 2.1, 3.1, 4.1], 'y': [5.1, 6.1, 7.1, 8.1]}, index=[0, 1, 2, 3]) df.insert(2, 'z', np.nan) # cache it foo = df['z'] - df.loc[df.index[2:], 'z'] = 42 expected = Series([np.nan, np.nan, 42, 42], index=df.index, name='z') - self.assertIsNot(df['z'], foo) - assert_series_equal(df['z'], expected) + + assert df['z'] is not foo + tm.assert_series_equal(df['z'], expected) def test_setitem_None(self): # GH #766 diff --git a/pandas/tests/frame/test_missing.py b/pandas/tests/frame/test_missing.py index eacf032bbcc85..9bb77a57f0f37 100644 --- a/pandas/tests/frame/test_missing.py +++ b/pandas/tests/frame/test_missing.py @@ -403,18 +403,18 @@ def test_fillna_inplace(self): df[3][-4:] = np.nan expected = df.fillna(value=0) - self.assertIsNot(expected, df) + assert expected is not df df.fillna(value=0, inplace=True) - assert_frame_equal(df, expected) + tm.assert_frame_equal(df, expected) df[1][:4] = np.nan df[3][-4:] = np.nan expected = df.fillna(method='ffill') - self.assertIsNot(expected, df) + assert expected is not df df.fillna(method='ffill', inplace=True) - assert_frame_equal(df, expected) + tm.assert_frame_equal(df, expected) def test_fillna_dict_series(self): df = DataFrame({'a': [nan, 1, 2, nan, nan], diff --git a/pandas/tests/frame/test_timeseries.py b/pandas/tests/frame/test_timeseries.py index 862f76b4ecc05..37b6f0c261789 100644 --- a/pandas/tests/frame/test_timeseries.py +++ b/pandas/tests/frame/test_timeseries.py @@ -14,8 +14,7 @@ import pandas as pd import pandas.tseries.offsets as offsets -from pandas.util.testing import (assert_almost_equal, - assert_series_equal, +from pandas.util.testing import (assert_series_equal, assert_frame_equal, assertRaisesRegexp) @@ -355,7 +354,7 @@ def test_asfreq(self): offset_monthly = self.tsframe.asfreq(offsets.BMonthEnd()) rule_monthly = self.tsframe.asfreq('BM') - assert_almost_equal(offset_monthly['A'], rule_monthly['A']) + tm.assert_almost_equal(offset_monthly['A'], rule_monthly['A']) filled = rule_monthly.asfreq('B', method='pad') # noqa # TODO: actually check that this worked. @@ -366,7 +365,7 @@ def test_asfreq(self): # test does not blow up on length-0 DataFrame zero_length = self.tsframe.reindex([]) result = zero_length.asfreq('BM') - self.assertIsNot(result, zero_length) + assert result is not zero_length def test_asfreq_datetimeindex(self): df = DataFrame({'A': [1, 2, 3]}, diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index 15eceac6b00c9..bec55083829b6 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -214,8 +214,9 @@ def test_hash_error(self): hash(ind) def test_copy_name(self): - # Check that "name" argument passed at initialization is honoured - # GH12309 + # gh-12309: Check that the "name" argument + # passed at initialization is honored. + for name, index in compat.iteritems(self.indices): if isinstance(index, MultiIndex): continue @@ -224,18 +225,21 @@ def test_copy_name(self): second = first.__class__(first, copy=False) # Even though "copy=False", we want a new object. - self.assertIsNot(first, second) - # Not using tm.assert_index_equal() since names differ: - self.assertTrue(index.equals(first)) + assert first is not second - self.assertEqual(first.name, 'mario') - self.assertEqual(second.name, 'mario') + # Not using tm.assert_index_equal() since names differ. + assert index.equals(first) + + assert first.name == 'mario' + assert second.name == 'mario' s1 = Series(2, index=first) s2 = Series(3, index=second[:-1]) - if not isinstance(index, CategoricalIndex): # See GH13365 + + if not isinstance(index, CategoricalIndex): + # See gh-13365 s3 = s1 * s2 - self.assertEqual(s3.index.name, 'mario') + assert s3.index.name == 'mario' def test_ensure_copied_data(self): # Check the "copy" argument of each Index.__new__ is honoured @@ -283,11 +287,11 @@ def test_copy_and_deepcopy(self): for func in (copy, deepcopy): idx_copy = func(ind) - self.assertIsNot(idx_copy, ind) - self.assertTrue(idx_copy.equals(ind)) + assert idx_copy is not ind + assert idx_copy.equals(ind) new_copy = ind.copy(deep=True, name="banana") - self.assertEqual(new_copy.name, "banana") + assert new_copy.name == "banana" def test_duplicates(self): for ind in self.indices.values(): diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index cc819ff83b1dd..165ad91086d0a 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -1851,22 +1851,22 @@ def test_copy_name(self): second = first.__class__(first, copy=False) # Even though "copy=False", we want a new object. - self.assertIsNot(first, second) + assert first is not second # Not using tm.assert_index_equal() since names differ: - self.assertTrue(idx.equals(first)) + assert idx.equals(first) - self.assertEqual(first.name, 'mario') - self.assertEqual(second.name, 'mario') + assert first.name == 'mario' + assert second.name == 'mario' s1 = Series(2, index=first) s2 = Series(3, index=second[:-1]) - if PY3: - with tm.assert_produces_warning(RuntimeWarning): - # unorderable types - s3 = s1 * s2 - else: + + warning_type = RuntimeWarning if PY3 else None + with tm.assert_produces_warning(warning_type): + # Python 3: Unorderable types s3 = s1 * s2 - self.assertEqual(s3.index.name, 'mario') + + assert s3.index.name == 'mario' def test_copy_name2(self): # Check that adding a "name" parameter to the copy is honored diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py index f2e409deb2ce4..e714bbd4f9d44 100644 --- a/pandas/tests/indexes/test_category.py +++ b/pandas/tests/indexes/test_category.py @@ -536,18 +536,20 @@ def test_identical(self): self.assertFalse(ci1.identical(ci2)) def test_ensure_copied_data(self): - # Check the "copy" argument of each Index.__new__ is honoured - # GH12309 + # gh-12309: Check the "copy" argument of each + # Index.__new__ is honored. + # # Must be tested separately from other indexes because - # self.value is not an ndarray + # self.value is not an ndarray. _base = lambda ar: ar if ar.base is None else ar.base + for index in self.indices.values(): result = CategoricalIndex(index.values, copy=True) tm.assert_index_equal(index, result) - self.assertIsNot(_base(index.values), _base(result.values)) + assert _base(index.values) is not _base(result.values) result = CategoricalIndex(index.values, copy=False) - self.assertIs(_base(index.values), _base(result.values)) + assert _base(index.values) is _base(result.values) def test_equals_categorical(self): ci1 = CategoricalIndex(['a', 'b'], categories=['a', 'b'], ordered=True) diff --git a/pandas/tests/indexes/test_frozen.py b/pandas/tests/indexes/test_frozen.py index a82409fbf9513..cb90beb6a5bfb 100644 --- a/pandas/tests/indexes/test_frozen.py +++ b/pandas/tests/indexes/test_frozen.py @@ -42,13 +42,13 @@ def setUp(self): def test_shallow_copying(self): original = self.container.copy() - self.assertIsInstance(self.container.view(), FrozenNDArray) - self.assertFalse(isinstance( - self.container.view(np.ndarray), FrozenNDArray)) - self.assertIsNot(self.container.view(), self.container) - self.assert_numpy_array_equal(self.container, original) - # shallow copy should be the same too - self.assertIsInstance(self.container._shallow_copy(), FrozenNDArray) + assert isinstance(self.container.view(), FrozenNDArray) + assert not isinstance(self.container.view(np.ndarray), FrozenNDArray) + assert self.container.view() is not self.container + tm.assert_numpy_array_equal(self.container, original) + + # Shallow copy should be the same too + assert isinstance(self.container._shallow_copy(), FrozenNDArray) # setting should not be allowed def testit(container): @@ -59,10 +59,13 @@ def testit(container): def test_values(self): original = self.container.view(np.ndarray).copy() n = original[0] + 15 + vals = self.container.values() - self.assert_numpy_array_equal(original, vals) - self.assertIsNot(original, vals) + tm.assert_numpy_array_equal(original, vals) + + assert original is not vals vals[0] = n - self.assertIsInstance(self.container, FrozenNDArray) - self.assert_numpy_array_equal(self.container.values(), original) - self.assertEqual(vals[0], n) + + assert isinstance(self.container, FrozenNDArray) + tm.assert_numpy_array_equal(self.container.values(), original) + assert vals[0] == n diff --git a/pandas/tests/indexes/test_interval.py b/pandas/tests/indexes/test_interval.py index 2d0015a5258ed..d99ef9538c5b1 100644 --- a/pandas/tests/indexes/test_interval.py +++ b/pandas/tests/indexes/test_interval.py @@ -165,11 +165,11 @@ def test_with_nans(self): def test_copy(self): actual = self.index.copy() - self.assertTrue(actual.equals(self.index)) + assert actual.equals(self.index) actual = self.index.copy(deep=True) - self.assertTrue(actual.equals(self.index)) - self.assertIsNot(actual.left, self.index.left) + assert actual.equals(self.index) + assert actual.left is not self.index.left def test_ensure_copied_data(self): # exercise the copy flag in the constructor diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index b33a317eefd44..75ced9439c398 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -584,21 +584,20 @@ def test_constructor_mismatched_label_levels(self): self.index.copy().labels = [[0, 0, 0, 0], [0, 0]] def assert_multiindex_copied(self, copy, original): - # levels should be (at least, shallow copied) - assert_copy(copy.levels, original.levels) + # Levels should be (at least, shallow copied) + tm.assert_copy(copy.levels, original.levels) + tm.assert_almost_equal(copy.labels, original.labels) - assert_almost_equal(copy.labels, original.labels) + # Labels doesn't matter which way copied + tm.assert_almost_equal(copy.labels, original.labels) + assert copy.labels is not original.labels - # labels doesn't matter which way copied - assert_almost_equal(copy.labels, original.labels) - self.assertIsNot(copy.labels, original.labels) + # Names doesn't matter which way copied + assert copy.names == original.names + assert copy.names is not original.names - # names doesn't matter which way copied - self.assertEqual(copy.names, original.names) - self.assertIsNot(copy.names, original.names) - - # sort order should be copied - self.assertEqual(copy.sortorder, original.sortorder) + # Sort order should be copied + assert copy.sortorder == original.sortorder def test_copy(self): i_copy = self.index.copy() diff --git a/pandas/tests/series/test_apply.py b/pandas/tests/series/test_apply.py index a4a49e3aeb826..d2116c71048ef 100644 --- a/pandas/tests/series/test_apply.py +++ b/pandas/tests/series/test_apply.py @@ -18,11 +18,11 @@ class TestSeriesApply(TestData, tm.TestCase): def test_apply(self): with np.errstate(all='ignore'): - assert_series_equal(self.ts.apply(np.sqrt), np.sqrt(self.ts)) + tm.assert_series_equal(self.ts.apply(np.sqrt), np.sqrt(self.ts)) - # elementwise-apply + # element-wise apply import math - assert_series_equal(self.ts.apply(math.exp), np.exp(self.ts)) + tm.assert_series_equal(self.ts.apply(math.exp), np.exp(self.ts)) # empty series s = Series(dtype=object, name='foo', index=pd.Index([], name='bar')) @@ -30,10 +30,10 @@ def test_apply(self): tm.assert_series_equal(s, rs) # check all metadata (GH 9322) - self.assertIsNot(s, rs) - self.assertIs(s.index, rs.index) - self.assertEqual(s.dtype, rs.dtype) - self.assertEqual(s.name, rs.name) + assert s is not rs + assert s.index is rs.index + assert s.dtype == rs.dtype + assert s.name == rs.name # index but no data s = Series(index=[1, 2, 3]) diff --git a/pandas/tests/series/test_timeseries.py b/pandas/tests/series/test_timeseries.py index 5a88b5bf98699..431e26ae4fdf9 100644 --- a/pandas/tests/series/test_timeseries.py +++ b/pandas/tests/series/test_timeseries.py @@ -240,25 +240,25 @@ def test_asfreq(self): daily_ts = ts.asfreq('B') monthly_ts = daily_ts.asfreq('BM') - assert_series_equal(monthly_ts, ts) + tm.assert_series_equal(monthly_ts, ts) daily_ts = ts.asfreq('B', method='pad') monthly_ts = daily_ts.asfreq('BM') - assert_series_equal(monthly_ts, ts) + tm.assert_series_equal(monthly_ts, ts) daily_ts = ts.asfreq(BDay()) monthly_ts = daily_ts.asfreq(BMonthEnd()) - assert_series_equal(monthly_ts, ts) + tm.assert_series_equal(monthly_ts, ts) result = ts[:0].asfreq('M') - self.assertEqual(len(result), 0) - self.assertIsNot(result, ts) + assert len(result) == 0 + assert result is not ts daily_ts = ts.asfreq('D', fill_value=-1) result = daily_ts.value_counts().sort_index() expected = Series([60, 1, 1, 1], index=[-1.0, 2.0, 1.0, 0.0]).sort_index() - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) def test_asfreq_datetimeindex_empty_series(self): # GH 14320 diff --git a/pandas/tests/test_generic.py b/pandas/tests/test_generic.py index 8706a05cfe8a2..d740d8bd26581 100644 --- a/pandas/tests/test_generic.py +++ b/pandas/tests/test_generic.py @@ -687,7 +687,7 @@ def test_copy_and_deepcopy(self): lambda x: x.copy(deep=False), lambda x: x.copy(deep=True)]: obj_copy = func(obj) - self.assertIsNot(obj_copy, obj) + assert obj_copy is not obj self._compare(obj_copy, obj) diff --git a/pandas/tests/test_internals.py b/pandas/tests/test_internals.py index b18214bbef926..adca47488413d 100644 --- a/pandas/tests/test_internals.py +++ b/pandas/tests/test_internals.py @@ -248,7 +248,7 @@ def test_merge(self): def test_copy(self): cop = self.fblock.copy() - self.assertIsNot(cop, self.fblock) + assert cop is not self.fblock assert_block_equal(self.fblock, cop) def test_reindex_index(self): diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index 4e9805ca9d5a6..184052741aa11 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -883,20 +883,20 @@ def test_set_value(self): for mjr in self.panel.major_axis[::2]: for mnr in self.panel.minor_axis: self.panel.set_value(item, mjr, mnr, 1.) - assert_almost_equal(self.panel[item][mnr][mjr], 1.) + tm.assert_almost_equal(self.panel[item][mnr][mjr], 1.) # resize res = self.panel.set_value('ItemE', 'foo', 'bar', 1.5) - tm.assertIsInstance(res, Panel) - self.assertIsNot(res, self.panel) - self.assertEqual(res.get_value('ItemE', 'foo', 'bar'), 1.5) + assert isinstance(res, Panel) + assert res is not self.panel + assert res.get_value('ItemE', 'foo', 'bar') == 1.5 res3 = self.panel.set_value('ItemE', 'foobar', 'baz', 5) - self.assertTrue(is_float_dtype(res3['ItemE'].values)) - with tm.assertRaisesRegexp(TypeError, - "There must be an argument " - "for each axis" - " plus the value provided"): + assert is_float_dtype(res3['ItemE'].values) + + msg = ("There must be an argument for each " + "axis plus the value provided") + with tm.assertRaisesRegexp(TypeError, msg): self.panel.set_value('a') diff --git a/pandas/tests/test_panel4d.py b/pandas/tests/test_panel4d.py index 3af47a2b408bc..f704c94cff9f0 100644 --- a/pandas/tests/test_panel4d.py +++ b/pandas/tests/test_panel4d.py @@ -587,20 +587,20 @@ def test_set_value(self): for mjr in self.panel4d.major_axis[::2]: for mnr in self.panel4d.minor_axis: self.panel4d.set_value(label, item, mjr, mnr, 1.) - assert_almost_equal( + tm.assert_almost_equal( self.panel4d[label][item][mnr][mjr], 1.) res3 = self.panel4d.set_value('l4', 'ItemE', 'foobar', 'baz', 5) - self.assertTrue(is_float_dtype(res3['l4'].values)) + assert is_float_dtype(res3['l4'].values) # resize res = self.panel4d.set_value('l4', 'ItemE', 'foo', 'bar', 1.5) - tm.assertIsInstance(res, Panel4D) - self.assertIsNot(res, self.panel4d) - self.assertEqual(res.get_value('l4', 'ItemE', 'foo', 'bar'), 1.5) + assert isinstance(res, Panel4D) + assert res is not self.panel4d + assert res.get_value('l4', 'ItemE', 'foo', 'bar') == 1.5 res3 = self.panel4d.set_value('l4', 'ItemE', 'foobar', 'baz', 5) - self.assertTrue(is_float_dtype(res3['l4'].values)) + assert is_float_dtype(res3['l4'].values) class TestPanel4d(tm.TestCase, CheckIndexing, SafeForSparse, @@ -619,21 +619,21 @@ def test_constructor(self): with catch_warnings(record=True): panel4d = Panel4D(self.panel4d._data) - self.assertIs(panel4d._data, self.panel4d._data) + assert panel4d._data is self.panel4d._data panel4d = Panel4D(self.panel4d._data, copy=True) - self.assertIsNot(panel4d._data, self.panel4d._data) - assert_panel4d_equal(panel4d, self.panel4d) + assert panel4d._data is not self.panel4d._data + tm.assert_panel4d_equal(panel4d, self.panel4d) vals = self.panel4d.values # no copy panel4d = Panel4D(vals) - self.assertIs(panel4d.values, vals) + assert panel4d.values is vals # copy panel4d = Panel4D(vals, copy=True) - self.assertIsNot(panel4d.values, vals) + assert panel4d.values is not vals # GH #8285, test when scalar data is used to construct a Panel4D # if dtype is not passed, it should be inferred @@ -645,7 +645,7 @@ def test_constructor(self): vals = np.empty((2, 3, 4, 5), dtype=dtype) vals.fill(val) expected = Panel4D(vals, dtype=dtype) - assert_panel4d_equal(panel4d, expected) + tm.assert_panel4d_equal(panel4d, expected) # test the case when dtype is passed panel4d = Panel4D(1, labels=range(2), items=range( @@ -654,7 +654,7 @@ def test_constructor(self): vals.fill(1) expected = Panel4D(vals, dtype='float32') - assert_panel4d_equal(panel4d, expected) + tm.assert_panel4d_equal(panel4d, expected) def test_constructor_cast(self): with catch_warnings(record=True): diff --git a/pandas/tests/tools/test_concat.py b/pandas/tests/tools/test_concat.py index 2ff287acc4c47..bcfa3351ce181 100644 --- a/pandas/tests/tools/test_concat.py +++ b/pandas/tests/tools/test_concat.py @@ -12,8 +12,7 @@ DatetimeIndex) from pandas.util import testing as tm from pandas.util.testing import (assert_frame_equal, - makeCustomDataframe as mkdf, - assert_almost_equal) + makeCustomDataframe as mkdf) import pytest @@ -708,25 +707,25 @@ def test_append(self): end_frame = self.frame.reindex(end_index) appended = begin_frame.append(end_frame) - assert_almost_equal(appended['A'], self.frame['A']) + tm.assert_almost_equal(appended['A'], self.frame['A']) del end_frame['A'] partial_appended = begin_frame.append(end_frame) - self.assertIn('A', partial_appended) + assert 'A' in partial_appended partial_appended = end_frame.append(begin_frame) - self.assertIn('A', partial_appended) + assert 'A' in partial_appended # mixed type handling appended = self.mixed_frame[:5].append(self.mixed_frame[5:]) - assert_frame_equal(appended, self.mixed_frame) + tm.assert_frame_equal(appended, self.mixed_frame) # what to test here mixed_appended = self.mixed_frame[:5].append(self.frame[5:]) mixed_appended2 = self.frame[:5].append(self.mixed_frame[5:]) # all equal except 'foo' column - assert_frame_equal( + tm.assert_frame_equal( mixed_appended.reindex(columns=['A', 'B', 'C', 'D']), mixed_appended2.reindex(columns=['A', 'B', 'C', 'D'])) @@ -734,25 +733,24 @@ def test_append(self): empty = DataFrame({}) appended = self.frame.append(empty) - assert_frame_equal(self.frame, appended) - self.assertIsNot(appended, self.frame) + tm.assert_frame_equal(self.frame, appended) + assert appended is not self.frame appended = empty.append(self.frame) - assert_frame_equal(self.frame, appended) - self.assertIsNot(appended, self.frame) + tm.assert_frame_equal(self.frame, appended) + assert appended is not self.frame - # overlap - self.assertRaises(ValueError, self.frame.append, self.frame, - verify_integrity=True) + # Overlap + with pytest.raises(ValueError): + self.frame.append(self.frame, verify_integrity=True) - # new columns - # GH 6129 + # see gh-6129: new columns df = DataFrame({'a': {'x': 1, 'y': 2}, 'b': {'x': 3, 'y': 4}}) row = Series([5, 6, 7], index=['a', 'b', 'c'], name='z') expected = DataFrame({'a': {'x': 1, 'y': 2, 'z': 5}, 'b': { 'x': 3, 'y': 4, 'z': 6}, 'c': {'z': 7}}) result = df.append(row) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_append_length0_frame(self): df = DataFrame(columns=['A', 'B', 'C']) diff --git a/pandas/util/testing.py b/pandas/util/testing.py index e9ec9d553d3e4..45c66627ad4d6 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -1043,12 +1043,6 @@ def assertIs(first, second, msg=''): assert a is b, "%s: %r is not %r" % (msg.format(a, b), a, b) -def assertIsNot(first, second, msg=''): - """Checks that 'first' is not 'second'""" - a, b = first, second - assert a is not b, "%s: %r is %r" % (msg.format(a, b), a, b) - - def assertIn(first, second, msg=''): """Checks that 'first' is in 'second'""" a, b = first, second @@ -1068,7 +1062,7 @@ def assertIsNone(expr, msg=''): def assertIsNotNone(expr, msg=''): """Checks that 'expr' is not None""" - return assertIsNot(expr, None, msg) + assert expr is not None, msg def assertIsInstance(obj, cls, msg=''): @@ -1178,10 +1172,17 @@ def assert_numpy_array_equal(left, right, strict_nan=False, def _get_base(obj): return obj.base if getattr(obj, 'base', None) is not None else obj + left_base = _get_base(left) + right_base = _get_base(right) + if check_same == 'same': - assertIs(_get_base(left), _get_base(right)) + if left_base is not right_base: + msg = "%r is not %r" % (left_base, right_base) + raise AssertionError(msg) elif check_same == 'copy': - assertIsNot(_get_base(left), _get_base(right)) + if left_base is right_base: + msg = "%r is %r" % (left_base, right_base) + raise AssertionError(msg) def _raise(left, right, err_msg): if err_msg is None: From 0e2bbcf95624e5312c9ba0f9de48e9b2a1f2ede0 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Mon, 17 Apr 2017 03:52:52 -0400 Subject: [PATCH 407/933] MAINT: Remove assertIsNotNone from testing (#16027) Follow-up to gh-16024. Also removes some vestigial assertIsNot calls missed in #16024. Partially addresses #15990. --- pandas/tests/core/computation/test_eval.py | 9 +- pandas/tests/frame/test_indexing.py | 41 ++++--- .../indexes/datetimes/test_construction.py | 98 +++++++-------- pandas/tests/indexes/datetimes/test_ops.py | 4 +- pandas/tests/indexes/test_multi.py | 43 ++++--- .../indexing/test_chaining_and_caching.py | 114 ++++++++---------- pandas/tests/io/formats/test_format.py | 4 +- pandas/tests/io/formats/test_printing.py | 6 +- pandas/tests/io/test_pytables.py | 36 +++--- pandas/tests/plotting/test_datetimelike.py | 6 +- pandas/tests/series/test_indexing.py | 12 +- pandas/tests/series/test_timeseries.py | 4 +- pandas/tests/test_base.py | 26 ++-- pandas/tests/test_panel.py | 29 +++-- pandas/tests/test_panel4d.py | 11 +- pandas/tests/tools/test_concat.py | 26 ++-- pandas/util/testing.py | 5 - 17 files changed, 237 insertions(+), 237 deletions(-) diff --git a/pandas/tests/core/computation/test_eval.py b/pandas/tests/core/computation/test_eval.py index 38a310a17a9ab..0ba4fe61ae78f 100644 --- a/pandas/tests/core/computation/test_eval.py +++ b/pandas/tests/core/computation/test_eval.py @@ -1308,16 +1308,17 @@ def test_column_in(self): assert_series_equal(result, expected) def assignment_not_inplace(self): - # GH 9297 + # see gh-9297 df = DataFrame(np.random.randn(5, 2), columns=list('ab')) actual = df.eval('c = a + b', inplace=False) - self.assertIsNotNone(actual) + assert actual is not None + expected = df.copy() expected['c'] = expected['a'] + expected['b'] - assert_frame_equal(df, expected) + tm.assert_frame_equal(df, expected) - # default for inplace will change + # Default for inplace will change with tm.assert_produces_warnings(FutureWarning): df.eval('c = a + b') diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py index a1705084c0edf..fcd972cb7e09b 100644 --- a/pandas/tests/frame/test_indexing.py +++ b/pandas/tests/frame/test_indexing.py @@ -10,6 +10,8 @@ from numpy import nan from numpy.random import randn + +import pytest import numpy as np import pandas.core.common as com @@ -25,7 +27,6 @@ is_integer, is_scalar) from pandas.util.testing import (assert_almost_equal, - assert_numpy_array_equal, assert_series_equal, assert_frame_equal, assertRaisesRegexp, @@ -40,30 +41,33 @@ class TestDataFrameIndexing(tm.TestCase, TestData): def test_getitem(self): - # slicing + # Slicing sl = self.frame[:20] - self.assertEqual(20, len(sl.index)) - - # column access + assert len(sl.index) == 20 + # Column access for _, series in compat.iteritems(sl): - self.assertEqual(20, len(series.index)) - self.assertTrue(tm.equalContents(series.index, sl.index)) + assert len(series.index) == 20 + assert tm.equalContents(series.index, sl.index) for key, _ in compat.iteritems(self.frame._series): - self.assertIsNotNone(self.frame[key]) + assert self.frame[key] is not None - self.assertNotIn('random', self.frame) + assert 'random' not in self.frame with assertRaisesRegexp(KeyError, 'random'): self.frame['random'] df = self.frame.copy() df['$10'] = randn(len(df)) + ad = randn(len(df)) df['@awesome_domain'] = ad - self.assertRaises(KeyError, df.__getitem__, 'df["$10"]') + + with pytest.raises(KeyError): + df.__getitem__('df["$10"]') + res = df['@awesome_domain'] - assert_numpy_array_equal(ad, res.values) + tm.assert_numpy_array_equal(ad, res.values) def test_getitem_dupe_cols(self): df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=['a', 'a', 'b']) @@ -648,10 +652,10 @@ def test_setitem_corner2(self): self.assertEqual(df.loc[1, 'cruft'], 0) def test_setitem_ambig(self): - # difficulties with mixed-type data + # Difficulties with mixed-type data from decimal import Decimal - # created as float type + # Created as float type dm = DataFrame(index=lrange(3), columns=lrange(3)) coercable_series = Series([Decimal(1) for _ in range(3)], @@ -659,17 +663,14 @@ def test_setitem_ambig(self): uncoercable_series = Series(['foo', 'bzr', 'baz'], index=lrange(3)) dm[0] = np.ones(3) - self.assertEqual(len(dm.columns), 3) - # self.assertIsNone(dm.objects) + assert len(dm.columns) == 3 dm[1] = coercable_series - self.assertEqual(len(dm.columns), 3) - # self.assertIsNone(dm.objects) + assert len(dm.columns) == 3 dm[2] = uncoercable_series - self.assertEqual(len(dm.columns), 3) - # self.assertIsNotNone(dm.objects) - self.assertEqual(dm[2].dtype, np.object_) + assert len(dm.columns) == 3 + assert dm[2].dtype == np.object_ def test_setitem_clear_caches(self): # see gh-304 diff --git a/pandas/tests/indexes/datetimes/test_construction.py b/pandas/tests/indexes/datetimes/test_construction.py index d4e672d0584cf..a7c33dd2e00e9 100644 --- a/pandas/tests/indexes/datetimes/test_construction.py +++ b/pandas/tests/indexes/datetimes/test_construction.py @@ -53,15 +53,14 @@ def test_construction_with_alt(self): i.tz_localize(None).asi8, dtype=i.dtype, tz='US/Pacific')) def test_construction_index_with_mixed_timezones(self): - # GH 11488 - # no tz results in DatetimeIndex + # gh-11488: no tz results in DatetimeIndex result = Index([Timestamp('2011-01-01'), Timestamp('2011-01-02')], name='idx') exp = DatetimeIndex([Timestamp('2011-01-01'), Timestamp('2011-01-02')], name='idx') - self.assert_index_equal(result, exp, exact=True) - self.assertTrue(isinstance(result, DatetimeIndex)) - self.assertIsNone(result.tz) + tm.assert_index_equal(result, exp, exact=True) + assert isinstance(result, DatetimeIndex) + assert result.tz is None # same tz results in DatetimeIndex result = Index([Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), @@ -70,10 +69,10 @@ def test_construction_index_with_mixed_timezones(self): exp = DatetimeIndex( [Timestamp('2011-01-01 10:00'), Timestamp('2011-01-02 10:00') ], tz='Asia/Tokyo', name='idx') - self.assert_index_equal(result, exp, exact=True) - self.assertTrue(isinstance(result, DatetimeIndex)) - self.assertIsNotNone(result.tz) - self.assertEqual(result.tz, exp.tz) + tm.assert_index_equal(result, exp, exact=True) + assert isinstance(result, DatetimeIndex) + assert result.tz is not None + assert result.tz == exp.tz # same tz results in DatetimeIndex (DST) result = Index([Timestamp('2011-01-01 10:00', tz='US/Eastern'), @@ -82,20 +81,20 @@ def test_construction_index_with_mixed_timezones(self): exp = DatetimeIndex([Timestamp('2011-01-01 10:00'), Timestamp('2011-08-01 10:00')], tz='US/Eastern', name='idx') - self.assert_index_equal(result, exp, exact=True) - self.assertTrue(isinstance(result, DatetimeIndex)) - self.assertIsNotNone(result.tz) - self.assertEqual(result.tz, exp.tz) + tm.assert_index_equal(result, exp, exact=True) + assert isinstance(result, DatetimeIndex) + assert result.tz is not None + assert result.tz == exp.tz - # different tz results in Index(dtype=object) + # Different tz results in Index(dtype=object) result = Index([Timestamp('2011-01-01 10:00'), Timestamp('2011-01-02 10:00', tz='US/Eastern')], name='idx') exp = Index([Timestamp('2011-01-01 10:00'), Timestamp('2011-01-02 10:00', tz='US/Eastern')], dtype='object', name='idx') - self.assert_index_equal(result, exp, exact=True) - self.assertFalse(isinstance(result, DatetimeIndex)) + tm.assert_index_equal(result, exp, exact=True) + assert not isinstance(result, DatetimeIndex) result = Index([Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), Timestamp('2011-01-02 10:00', tz='US/Eastern')], @@ -103,37 +102,37 @@ def test_construction_index_with_mixed_timezones(self): exp = Index([Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), Timestamp('2011-01-02 10:00', tz='US/Eastern')], dtype='object', name='idx') - self.assert_index_equal(result, exp, exact=True) - self.assertFalse(isinstance(result, DatetimeIndex)) + tm.assert_index_equal(result, exp, exact=True) + assert not isinstance(result, DatetimeIndex) # length = 1 result = Index([Timestamp('2011-01-01')], name='idx') exp = DatetimeIndex([Timestamp('2011-01-01')], name='idx') - self.assert_index_equal(result, exp, exact=True) - self.assertTrue(isinstance(result, DatetimeIndex)) - self.assertIsNone(result.tz) + tm.assert_index_equal(result, exp, exact=True) + assert isinstance(result, DatetimeIndex) + assert result.tz is None # length = 1 with tz result = Index( [Timestamp('2011-01-01 10:00', tz='Asia/Tokyo')], name='idx') exp = DatetimeIndex([Timestamp('2011-01-01 10:00')], tz='Asia/Tokyo', name='idx') - self.assert_index_equal(result, exp, exact=True) - self.assertTrue(isinstance(result, DatetimeIndex)) - self.assertIsNotNone(result.tz) - self.assertEqual(result.tz, exp.tz) + tm.assert_index_equal(result, exp, exact=True) + assert isinstance(result, DatetimeIndex) + assert result.tz is not None + assert result.tz == exp.tz def test_construction_index_with_mixed_timezones_with_NaT(self): - # GH 11488 + # see gh-11488 result = Index([pd.NaT, Timestamp('2011-01-01'), pd.NaT, Timestamp('2011-01-02')], name='idx') exp = DatetimeIndex([pd.NaT, Timestamp('2011-01-01'), pd.NaT, Timestamp('2011-01-02')], name='idx') - self.assert_index_equal(result, exp, exact=True) - self.assertTrue(isinstance(result, DatetimeIndex)) - self.assertIsNone(result.tz) + tm.assert_index_equal(result, exp, exact=True) + assert isinstance(result, DatetimeIndex) + assert result.tz is None - # same tz results in DatetimeIndex + # Same tz results in DatetimeIndex result = Index([pd.NaT, Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), pd.NaT, Timestamp('2011-01-02 10:00', tz='Asia/Tokyo')], @@ -141,10 +140,10 @@ def test_construction_index_with_mixed_timezones_with_NaT(self): exp = DatetimeIndex([pd.NaT, Timestamp('2011-01-01 10:00'), pd.NaT, Timestamp('2011-01-02 10:00')], tz='Asia/Tokyo', name='idx') - self.assert_index_equal(result, exp, exact=True) - self.assertTrue(isinstance(result, DatetimeIndex)) - self.assertIsNotNone(result.tz) - self.assertEqual(result.tz, exp.tz) + tm.assert_index_equal(result, exp, exact=True) + assert isinstance(result, DatetimeIndex) + assert result.tz is not None + assert result.tz == exp.tz # same tz results in DatetimeIndex (DST) result = Index([Timestamp('2011-01-01 10:00', tz='US/Eastern'), @@ -154,10 +153,10 @@ def test_construction_index_with_mixed_timezones_with_NaT(self): exp = DatetimeIndex([Timestamp('2011-01-01 10:00'), pd.NaT, Timestamp('2011-08-01 10:00')], tz='US/Eastern', name='idx') - self.assert_index_equal(result, exp, exact=True) - self.assertTrue(isinstance(result, DatetimeIndex)) - self.assertIsNotNone(result.tz) - self.assertEqual(result.tz, exp.tz) + tm.assert_index_equal(result, exp, exact=True) + assert isinstance(result, DatetimeIndex) + assert result.tz is not None + assert result.tz == exp.tz # different tz results in Index(dtype=object) result = Index([pd.NaT, Timestamp('2011-01-01 10:00'), @@ -167,8 +166,8 @@ def test_construction_index_with_mixed_timezones_with_NaT(self): exp = Index([pd.NaT, Timestamp('2011-01-01 10:00'), pd.NaT, Timestamp('2011-01-02 10:00', tz='US/Eastern')], dtype='object', name='idx') - self.assert_index_equal(result, exp, exact=True) - self.assertFalse(isinstance(result, DatetimeIndex)) + tm.assert_index_equal(result, exp, exact=True) + assert not isinstance(result, DatetimeIndex) result = Index([pd.NaT, Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), pd.NaT, Timestamp('2011-01-02 10:00', @@ -176,23 +175,24 @@ def test_construction_index_with_mixed_timezones_with_NaT(self): exp = Index([pd.NaT, Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), pd.NaT, Timestamp('2011-01-02 10:00', tz='US/Eastern')], dtype='object', name='idx') - self.assert_index_equal(result, exp, exact=True) - self.assertFalse(isinstance(result, DatetimeIndex)) + tm.assert_index_equal(result, exp, exact=True) + assert not isinstance(result, DatetimeIndex) # all NaT result = Index([pd.NaT, pd.NaT], name='idx') exp = DatetimeIndex([pd.NaT, pd.NaT], name='idx') - self.assert_index_equal(result, exp, exact=True) - self.assertTrue(isinstance(result, DatetimeIndex)) - self.assertIsNone(result.tz) + tm.assert_index_equal(result, exp, exact=True) + assert isinstance(result, DatetimeIndex) + assert result.tz is None # all NaT with tz result = Index([pd.NaT, pd.NaT], tz='Asia/Tokyo', name='idx') exp = DatetimeIndex([pd.NaT, pd.NaT], tz='Asia/Tokyo', name='idx') - self.assert_index_equal(result, exp, exact=True) - self.assertTrue(isinstance(result, DatetimeIndex)) - self.assertIsNotNone(result.tz) - self.assertEqual(result.tz, exp.tz) + + tm.assert_index_equal(result, exp, exact=True) + assert isinstance(result, DatetimeIndex) + assert result.tz is not None + assert result.tz == exp.tz def test_construction_dti_with_mixed_timezones(self): # GH 11488 (not changed, added explicit tests) diff --git a/pandas/tests/indexes/datetimes/test_ops.py b/pandas/tests/indexes/datetimes/test_ops.py index 6e6d6bf190291..8ab29c0c0b6f2 100644 --- a/pandas/tests/indexes/datetimes/test_ops.py +++ b/pandas/tests/indexes/datetimes/test_ops.py @@ -1122,7 +1122,7 @@ def test_comparison(self): def test_pickle_unpickle(self): unpickled = tm.round_trip_pickle(self.rng) - self.assertIsNotNone(unpickled.offset) + assert unpickled.offset is not None def test_copy(self): cp = self.rng.copy() @@ -1273,7 +1273,7 @@ def test_shift(self): def test_pickle_unpickle(self): unpickled = tm.round_trip_pickle(self.rng) - self.assertIsNotNone(unpickled.offset) + assert unpickled.offset is not None def test_summary(self): self.rng.summary() diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index 75ced9439c398..f907741950b01 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -393,39 +393,46 @@ def test_inplace_mutation_resets_values(self): levels = [['a', 'b', 'c'], [4]] levels2 = [[1, 2, 3], ['a']] labels = [[0, 1, 0, 2, 2, 0], [0, 0, 0, 0, 0, 0]] + mi1 = MultiIndex(levels=levels, labels=labels) mi2 = MultiIndex(levels=levels2, labels=labels) vals = mi1.values.copy() vals2 = mi2.values.copy() - self.assertIsNotNone(mi1._tuples) - # make sure level setting works + assert mi1._tuples is not None + + # Make sure level setting works new_vals = mi1.set_levels(levels2).values - assert_almost_equal(vals2, new_vals) - # non-inplace doesn't kill _tuples [implementation detail] - assert_almost_equal(mi1._tuples, vals) - # and values is still same too - assert_almost_equal(mi1.values, vals) + tm.assert_almost_equal(vals2, new_vals) + + # Non-inplace doesn't kill _tuples [implementation detail] + tm.assert_almost_equal(mi1._tuples, vals) + + # ...and values is still same too + tm.assert_almost_equal(mi1.values, vals) - # inplace should kill _tuples + # Inplace should kill _tuples mi1.set_levels(levels2, inplace=True) - assert_almost_equal(mi1.values, vals2) + tm.assert_almost_equal(mi1.values, vals2) - # make sure label setting works too + # Make sure label setting works too labels2 = [[0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0]] exp_values = np.empty((6, ), dtype=object) exp_values[:] = [(long(1), 'a')] * 6 - # must be 1d array of tuples - self.assertEqual(exp_values.shape, (6, )) + + # Must be 1d array of tuples + assert exp_values.shape == (6, ) new_values = mi2.set_labels(labels2).values - # not inplace shouldn't change - assert_almost_equal(mi2._tuples, vals2) - # should have correct values - assert_almost_equal(exp_values, new_values) - # and again setting inplace should kill _tuples, etc + # Not inplace shouldn't change + tm.assert_almost_equal(mi2._tuples, vals2) + + # Should have correct values + tm.assert_almost_equal(exp_values, new_values) + + # ...and again setting inplace should kill _tuples, etc mi2.set_labels(labels2, inplace=True) - assert_almost_equal(mi2.values, new_values) + tm.assert_almost_equal(mi2.values, new_values) def test_copy_in_constructor(self): levels = np.array(["a", "b", "c"]) diff --git a/pandas/tests/indexing/test_chaining_and_caching.py b/pandas/tests/indexing/test_chaining_and_caching.py index 72e704537ba3f..725de7ce20f5c 100644 --- a/pandas/tests/indexing/test_chaining_and_caching.py +++ b/pandas/tests/indexing/test_chaining_and_caching.py @@ -1,5 +1,7 @@ from warnings import catch_warnings +import pytest + import numpy as np import pandas as pd from pandas.core import common as com @@ -134,7 +136,8 @@ def test_detect_chained_assignment(self): expected = DataFrame([[-5, 1], [-6, 3]], columns=list('AB')) df = DataFrame(np.arange(4).reshape(2, 2), columns=list('AB'), dtype='int64') - self.assertIsNone(df.is_copy) + assert df.is_copy is None + df['A'][0] = -5 df['A'][1] = -6 tm.assert_frame_equal(df, expected) @@ -142,71 +145,56 @@ def test_detect_chained_assignment(self): # test with the chaining df = DataFrame({'A': Series(range(2), dtype='int64'), 'B': np.array(np.arange(2, 4), dtype=np.float64)}) - self.assertIsNone(df.is_copy) + assert df.is_copy is None - def f(): + with pytest.raises(com.SettingWithCopyError): df['A'][0] = -5 - self.assertRaises(com.SettingWithCopyError, f) - - def f(): + with pytest.raises(com.SettingWithCopyError): df['A'][1] = np.nan - self.assertRaises(com.SettingWithCopyError, f) - self.assertIsNone(df['A'].is_copy) + assert df['A'].is_copy is None - # using a copy (the chain), fails + # Using a copy (the chain), fails df = DataFrame({'A': Series(range(2), dtype='int64'), 'B': np.array(np.arange(2, 4), dtype=np.float64)}) - def f(): + with pytest.raises(com.SettingWithCopyError): df.loc[0]['A'] = -5 - self.assertRaises(com.SettingWithCopyError, f) - - # doc example + # Doc example df = DataFrame({'a': ['one', 'one', 'two', 'three', 'two', 'one', 'six'], 'c': Series(range(7), dtype='int64')}) - self.assertIsNone(df.is_copy) - expected = DataFrame({'a': ['one', 'one', 'two', 'three', - 'two', 'one', 'six'], - 'c': [42, 42, 2, 3, 4, 42, 6]}) + assert df.is_copy is None - def f(): + with pytest.raises(com.SettingWithCopyError): indexer = df.a.str.startswith('o') df[indexer]['c'] = 42 - self.assertRaises(com.SettingWithCopyError, f) - expected = DataFrame({'A': [111, 'bbb', 'ccc'], 'B': [1, 2, 3]}) df = DataFrame({'A': ['aaa', 'bbb', 'ccc'], 'B': [1, 2, 3]}) - def f(): + with pytest.raises(com.SettingWithCopyError): df['A'][0] = 111 - self.assertRaises(com.SettingWithCopyError, f) - - def f(): + with pytest.raises(com.SettingWithCopyError): df.loc[0]['A'] = 111 - self.assertRaises(com.SettingWithCopyError, f) - df.loc[0, 'A'] = 111 tm.assert_frame_equal(df, expected) - # make sure that is_copy is picked up reconstruction - # GH5475 + # gh-5475: Make sure that is_copy is picked up reconstruction df = DataFrame({"A": [1, 2]}) - self.assertIsNone(df.is_copy) + assert df.is_copy is None + with tm.ensure_clean('__tmp__pickle') as path: df.to_pickle(path) df2 = pd.read_pickle(path) df2["B"] = df2["A"] df2["B"] = df2["A"] - # a suprious raise as we are setting the entire column here - # GH5597 + # gh-5597: a spurious raise as we are setting the entire column here from string import ascii_letters as letters def random_text(nobs=100): @@ -214,42 +202,48 @@ def random_text(nobs=100): for i in range(nobs): idx = np.random.randint(len(letters), size=2) idx.sort() + df.append([letters[idx[0]:idx[1]]]) return DataFrame(df, columns=['letters']) df = random_text(100000) - # always a copy + # Always a copy x = df.iloc[[0, 1, 2]] - self.assertIsNotNone(x.is_copy) + assert x.is_copy is not None + x = df.iloc[[0, 1, 2, 4]] - self.assertIsNotNone(x.is_copy) + assert x.is_copy is not None - # explicity copy + # Explicitly copy indexer = df.letters.apply(lambda x: len(x) > 10) df = df.loc[indexer].copy() - self.assertIsNone(df.is_copy) + + assert df.is_copy is None df['letters'] = df['letters'].apply(str.lower) - # implicity take + # Implicitly take df = random_text(100000) indexer = df.letters.apply(lambda x: len(x) > 10) df = df.loc[indexer] - self.assertIsNotNone(df.is_copy) + + assert df.is_copy is not None df['letters'] = df['letters'].apply(str.lower) - # implicity take 2 + # Implicitly take 2 df = random_text(100000) indexer = df.letters.apply(lambda x: len(x) > 10) + df = df.loc[indexer] - self.assertIsNotNone(df.is_copy) + assert df.is_copy is not None df.loc[:, 'letters'] = df['letters'].apply(str.lower) - # should be ok even though it's a copy! - self.assertIsNone(df.is_copy) + # Should be ok even though it's a copy! + assert df.is_copy is None + df['letters'] = df['letters'].apply(str.lower) - self.assertIsNone(df.is_copy) + assert df.is_copy is None df = random_text(100000) indexer = df.letters.apply(lambda x: len(x) > 10) @@ -258,11 +252,10 @@ def random_text(nobs=100): # an identical take, so no copy df = DataFrame({'a': [1]}).dropna() - self.assertIsNone(df.is_copy) + assert df.is_copy is None df['a'] += 1 - # inplace ops - # original from: + # Inplace ops, originally from: # http://stackoverflow.com/questions/20508968/series-fillna-in-a-multiindex-dataframe-does-not-fill-is-this-a-bug a = [12, 23] b = [123, None] @@ -277,23 +270,25 @@ def random_text(nobs=100): multiind = MultiIndex.from_tuples(tuples, names=['part', 'side']) zed = DataFrame(events, index=['a', 'b'], columns=multiind) - def f(): + with pytest.raises(com.SettingWithCopyError): zed['eyes']['right'].fillna(value=555, inplace=True) - self.assertRaises(com.SettingWithCopyError, f) - df = DataFrame(np.random.randn(10, 4)) s = df.iloc[:, 0].sort_values() + tm.assert_series_equal(s, df.iloc[:, 0].sort_values()) tm.assert_series_equal(s, df[0].sort_values()) - # false positives GH6025 + # see gh-6025: false positives df = DataFrame({'column1': ['a', 'a', 'a'], 'column2': [4, 8, 9]}) str(df) + df['column1'] = df['column1'] + 'b' str(df) + df = df[df['column2'] != 8] str(df) + df['column1'] = df['column1'] + 'c' str(df) @@ -302,33 +297,24 @@ def f(): df = DataFrame(np.arange(0, 9), columns=['count']) df['group'] = 'b' - def f(): + with pytest.raises(com.SettingWithCopyError): df.iloc[0:5]['group'] = 'a' - self.assertRaises(com.SettingWithCopyError, f) - - # mixed type setting - # same dtype & changing dtype + # Mixed type setting but same dtype & changing dtype df = DataFrame(dict(A=date_range('20130101', periods=5), B=np.random.randn(5), C=np.arange(5, dtype='int64'), D=list('abcde'))) - def f(): + with pytest.raises(com.SettingWithCopyError): df.loc[2]['D'] = 'foo' - self.assertRaises(com.SettingWithCopyError, f) - - def f(): + with pytest.raises(com.SettingWithCopyError): df.loc[2]['C'] = 'foo' - self.assertRaises(com.SettingWithCopyError, f) - - def f(): + with pytest.raises(com.SettingWithCopyError): df['C'][2] = 'foo' - self.assertRaises(com.SettingWithCopyError, f) - def test_setting_with_copy_bug(self): # operating on a copy diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index bb766ae389a10..354ce99f567ea 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -1546,12 +1546,12 @@ def get_ipython(): {'parent_appname': 'ipython-qtconsole'}}} repstr = self.frame._repr_html_() - self.assertIsNotNone(repstr) + assert repstr is not None fmt.set_option('display.max_rows', 5, 'display.max_columns', 2) repstr = self.frame._repr_html_() - self.assertIn('class', repstr) # info fallback + assert 'class' in repstr # info fallback tm.reset_display_options() def test_pprint_pathological_object(self): diff --git a/pandas/tests/io/formats/test_printing.py b/pandas/tests/io/formats/test_printing.py index 0df35da05578a..d2c3b47aba042 100644 --- a/pandas/tests/io/formats/test_printing.py +++ b/pandas/tests/io/formats/test_printing.py @@ -170,13 +170,15 @@ def test_config_on(self): df = pd.DataFrame({"A": [1, 2]}) with pd.option_context("display.html.table_schema", True): result = df._repr_table_schema_() - self.assertIsNotNone(result) + + assert result is not None def test_config_default_off(self): df = pd.DataFrame({"A": [1, 2]}) with pd.option_context("display.html.table_schema", False): result = df._repr_table_schema_() - self.assertIsNone(result) + + assert result is None # TODO: fix this broken test diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index f28b2a0231433..57effd8163be5 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -4667,7 +4667,7 @@ def test_categorical(self): with ensure_clean_store(self.path) as store: - # basic + # Basic _maybe_remove(store, 's') s = Series(Categorical(['a', 'b', 'b', 'a', 'a', 'c'], categories=[ 'a', 'b', 'c', 'd'], ordered=False)) @@ -4683,12 +4683,13 @@ def test_categorical(self): tm.assert_series_equal(s, result) _maybe_remove(store, 'df') + df = DataFrame({"s": s, "vals": [1, 2, 3, 4, 5, 6]}) store.append('df', df, format='table') result = store.select('df') tm.assert_frame_equal(result, df) - # dtypes + # Dtypes s = Series([1, 1, 2, 2, 3, 4, 5]).astype('category') store.append('si', s) result = store.select('si') @@ -4699,17 +4700,17 @@ def test_categorical(self): result = store.select('si2') tm.assert_series_equal(result, s) - # multiple + # Multiple df2 = df.copy() df2['s2'] = Series(list('abcdefg')).astype('category') store.append('df2', df2) result = store.select('df2') tm.assert_frame_equal(result, df2) - # make sure the metadata is ok - self.assertTrue('/df2 ' in str(store)) - self.assertTrue('/df2/meta/values_block_0/meta' in str(store)) - self.assertTrue('/df2/meta/values_block_1/meta' in str(store)) + # Make sure the metadata is OK + assert '/df2 ' in str(store) + assert '/df2/meta/values_block_0/meta' in str(store) + assert '/df2/meta/values_block_1/meta' in str(store) # unordered s = Series(Categorical(['a', 'b', 'b', 'a', 'a', 'c'], categories=[ @@ -4718,7 +4719,7 @@ def test_categorical(self): result = store.select('s2') tm.assert_series_equal(result, s) - # query + # Query store.append('df3', df, data_columns=['s']) expected = df[df.s.isin(['b', 'c'])] result = store.select('df3', where=['s in ["b","c"]']) @@ -4736,7 +4737,7 @@ def test_categorical(self): result = store.select('df3', where=['s in ["f"]']) tm.assert_frame_equal(result, expected) - # appending with same categories is ok + # Appending with same categories is ok store.append('df3', df) df = concat([df, df]) @@ -4744,20 +4745,21 @@ def test_categorical(self): result = store.select('df3', where=['s in ["b","c"]']) tm.assert_frame_equal(result, expected) - # appending must have the same categories + # Appending must have the same categories df3 = df.copy() df3['s'].cat.remove_unused_categories(inplace=True) - self.assertRaises(ValueError, lambda: store.append('df3', df3)) + with pytest.raises(ValueError): + store.append('df3', df3) - # remove - # make sure meta data is removed (its a recursive removal so should - # be) + # Remove, and make sure meta data is removed (its a recursive + # removal so should be). result = store.select('df3/meta/s/meta') - self.assertIsNotNone(result) + assert result is not None store.remove('df3') - self.assertRaises( - KeyError, lambda: store.select('df3/meta/s/meta')) + + with pytest.raises(KeyError): + store.select('df3/meta/s/meta') def test_categorical_conversion(self): diff --git a/pandas/tests/plotting/test_datetimelike.py b/pandas/tests/plotting/test_datetimelike.py index b3692c5a8d2d2..547770ebcf6e5 100644 --- a/pandas/tests/plotting/test_datetimelike.py +++ b/pandas/tests/plotting/test_datetimelike.py @@ -296,12 +296,14 @@ def test_irregular_datetime64_repr_bug(self): fig = plt.gcf() plt.clf() + ax = fig.add_subplot(211) + ret = ser.plot() - self.assertIsNotNone(ret) + assert ret is not None for rs, xp in zip(ax.get_lines()[0].get_xdata(), ser.index): - self.assertEqual(rs, xp) + assert rs == xp def test_business_freq(self): import matplotlib.pyplot as plt # noqa diff --git a/pandas/tests/series/test_indexing.py b/pandas/tests/series/test_indexing.py index 6c1d77acd70d5..38251ab0b228b 100644 --- a/pandas/tests/series/test_indexing.py +++ b/pandas/tests/series/test_indexing.py @@ -1873,14 +1873,14 @@ def test_align_nocopy(self): rb[:2] = 5 self.assertTrue((b[:2] == 5).all()) - def test_align_sameindex(self): + def test_align_same_index(self): a, b = self.ts.align(self.ts, copy=False) - self.assertIs(a.index, self.ts.index) - self.assertIs(b.index, self.ts.index) + assert a.index is self.ts.index + assert b.index is self.ts.index - # a, b = self.ts.align(self.ts, copy=True) - # self.assertIsNot(a.index, self.ts.index) - # self.assertIsNot(b.index, self.ts.index) + a, b = self.ts.align(self.ts, copy=True) + assert a.index is not self.ts.index + assert b.index is not self.ts.index def test_align_multiindex(self): # GH 10665 diff --git a/pandas/tests/series/test_timeseries.py b/pandas/tests/series/test_timeseries.py index 431e26ae4fdf9..0f960a890e72b 100644 --- a/pandas/tests/series/test_timeseries.py +++ b/pandas/tests/series/test_timeseries.py @@ -411,12 +411,12 @@ def test_contiguous_boolean_preserve_freq(self): masked = rng[mask] expected = rng[10:20] - self.assertIsNotNone(expected.freq) + assert expected.freq is not None assert_range_equal(masked, expected) mask[22] = True masked = rng[mask] - self.assertIsNone(masked.freq) + assert masked.freq is None def test_to_datetime_unit(self): diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py index 91c06a2c30e50..148f2ae425629 100644 --- a/pandas/tests/test_base.py +++ b/pandas/tests/test_base.py @@ -304,26 +304,28 @@ def test_none_comparison(self): def test_ndarray_compat_properties(self): for o in self.objs: + # Check that we work. + for p in ['shape', 'dtype', 'flags', 'T', + 'strides', 'itemsize', 'nbytes']: + assert getattr(o, p, None) is not None - # check that we work - for p in ['shape', 'dtype', 'flags', 'T', 'strides', 'itemsize', - 'nbytes']: - self.assertIsNotNone(getattr(o, p, None)) - self.assertTrue(hasattr(o, 'base')) + assert hasattr(o, 'base') - # if we have a datetimelike dtype then needs a view to work + # If we have a datetime-like dtype then needs a view to work # but the user is responsible for that try: - self.assertIsNotNone(o.data) + assert o.data is not None except ValueError: pass - self.assertRaises(ValueError, o.item) # len > 1 - self.assertEqual(o.ndim, 1) - self.assertEqual(o.size, len(o)) + with pytest.raises(ValueError): + o.item() # len > 1 - self.assertEqual(Index([1]).item(), 1) - self.assertEqual(Series([1]).item(), 1) + assert o.ndim == 1 + assert o.size == len(o) + + assert Index([1]).item() == 1 + assert Series([1]).item() == 1 def test_ops(self): for op in ['max', 'min']: diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index 184052741aa11..55e0e512169fb 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -597,17 +597,18 @@ def test_xs(self): with catch_warnings(record=True): itemA = self.panel.xs('ItemA', axis=0) expected = self.panel['ItemA'] - assert_frame_equal(itemA, expected) + tm.assert_frame_equal(itemA, expected) - # get a view by default + # Get a view by default. itemA_view = self.panel.xs('ItemA', axis=0) itemA_view.values[:] = np.nan - self.assertTrue(np.isnan(self.panel['ItemA'].values).all()) - # mixed-type yields a copy + assert np.isnan(self.panel['ItemA'].values).all() + + # Mixed-type yields a copy. self.panel['strings'] = 'foo' result = self.panel.xs('D', axis=2) - self.assertIsNotNone(result.is_copy) + assert result.is_copy is not None def test_getitem_fancy_labels(self): with catch_warnings(record=True): @@ -917,25 +918,25 @@ def test_constructor(self): with catch_warnings(record=True): # with BlockManager wp = Panel(self.panel._data) - self.assertIs(wp._data, self.panel._data) + assert wp._data is self.panel._data wp = Panel(self.panel._data, copy=True) - self.assertIsNot(wp._data, self.panel._data) - assert_panel_equal(wp, self.panel) + assert wp._data is not self.panel._data + tm.assert_panel_equal(wp, self.panel) # strings handled prop wp = Panel([[['foo', 'foo', 'foo', ], ['foo', 'foo', 'foo']]]) - self.assertEqual(wp.values.dtype, np.object_) + assert wp.values.dtype == np.object_ vals = self.panel.values # no copy wp = Panel(vals) - self.assertIs(wp.values, vals) + assert wp.values is vals # copy wp = Panel(vals, copy=True) - self.assertIsNot(wp.values, vals) + assert wp.values is not vals # GH #8285, test when scalar data is used to construct a Panel # if dtype is not passed, it should be inferred @@ -946,7 +947,8 @@ def test_constructor(self): minor_axis=range(4)) vals = np.empty((2, 3, 4), dtype=dtype) vals.fill(val) - assert_panel_equal(wp, Panel(vals, dtype=dtype)) + + tm.assert_panel_equal(wp, Panel(vals, dtype=dtype)) # test the case when dtype is passed wp = Panel(1, items=range(2), major_axis=range(3), @@ -954,7 +956,8 @@ def test_constructor(self): dtype='float32') vals = np.empty((2, 3, 4), dtype='float32') vals.fill(1) - assert_panel_equal(wp, Panel(vals, dtype='float32')) + + tm.assert_panel_equal(wp, Panel(vals, dtype='float32')) def test_constructor_cast(self): with catch_warnings(record=True): diff --git a/pandas/tests/test_panel4d.py b/pandas/tests/test_panel4d.py index f704c94cff9f0..fa3bb2d66b573 100644 --- a/pandas/tests/test_panel4d.py +++ b/pandas/tests/test_panel4d.py @@ -510,18 +510,19 @@ def test_minor_xs_mixed(self): def test_xs(self): l1 = self.panel4d.xs('l1', axis=0) expected = self.panel4d['l1'] - assert_panel_equal(l1, expected) + tm.assert_panel_equal(l1, expected) - # view if possible + # View if possible l1_view = self.panel4d.xs('l1', axis=0) l1_view.values[:] = np.nan - self.assertTrue(np.isnan(self.panel4d['l1'].values).all()) + assert np.isnan(self.panel4d['l1'].values).all() - # mixed-type + # Mixed-type self.panel4d['strings'] = 'foo' with catch_warnings(record=True): result = self.panel4d.xs('D', axis=3) - self.assertIsNotNone(result.is_copy) + + assert result.is_copy is not None def test_getitem_fancy_labels(self): with catch_warnings(record=True): diff --git a/pandas/tests/tools/test_concat.py b/pandas/tests/tools/test_concat.py index bcfa3351ce181..e6514a1e2e81e 100644 --- a/pandas/tests/tools/test_concat.py +++ b/pandas/tests/tools/test_concat.py @@ -854,39 +854,37 @@ def test_append_missing_column_proper_upcast(self): class TestConcatenate(ConcatenateBase): def test_concat_copy(self): - df = DataFrame(np.random.randn(4, 3)) df2 = DataFrame(np.random.randint(0, 10, size=4).reshape(4, 1)) df3 = DataFrame({5: 'foo'}, index=range(4)) - # these are actual copies + # These are actual copies. result = concat([df, df2, df3], axis=1, copy=True) + for b in result._data.blocks: - self.assertIsNone(b.values.base) + assert b.values.base is None - # these are the same + # These are the same. result = concat([df, df2, df3], axis=1, copy=False) + for b in result._data.blocks: if b.is_float: - self.assertTrue( - b.values.base is df._data.blocks[0].values.base) + assert b.values.base is df._data.blocks[0].values.base elif b.is_integer: - self.assertTrue( - b.values.base is df2._data.blocks[0].values.base) + assert b.values.base is df2._data.blocks[0].values.base elif b.is_object: - self.assertIsNotNone(b.values.base) + assert b.values.base is not None - # float block was consolidated + # Float block was consolidated. df4 = DataFrame(np.random.randn(4, 1)) result = concat([df, df2, df3, df4], axis=1, copy=False) for b in result._data.blocks: if b.is_float: - self.assertIsNone(b.values.base) + assert b.values.base is None elif b.is_integer: - self.assertTrue( - b.values.base is df2._data.blocks[0].values.base) + assert b.values.base is df2._data.blocks[0].values.base elif b.is_object: - self.assertIsNotNone(b.values.base) + assert b.values.base is not None def test_concat_with_group_keys(self): df = DataFrame(np.random.randn(4, 3)) diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 45c66627ad4d6..b5797674641c8 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -1060,11 +1060,6 @@ def assertIsNone(expr, msg=''): return assertIs(expr, None, msg) -def assertIsNotNone(expr, msg=''): - """Checks that 'expr' is not None""" - assert expr is not None, msg - - def assertIsInstance(obj, cls, msg=''): """Test that obj is an instance of cls (which can be a class or a tuple of classes, From c6060a80933f9b2495f9f8c1e7a215bac7f85f19 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 17 Apr 2017 13:14:38 +0200 Subject: [PATCH 408/933] CLN: clean benchmarks to get them running (#16025) * fix lib and algos import * fix take_1d import * string uppercase -> ascii_uppercase (py3 compat) * sas test file path * fix datetools usage * fix hashing benchmarks * dict values py3 compat * avoid overflow by using higher freq * xrange -> range * fix xport path * revised hdfstore_bench to use new query syntax rename table variables * change default python version to 3.6 --- asv_bench/asv.conf.json | 2 +- asv_bench/benchmarks/algorithms.py | 13 +++++-- asv_bench/benchmarks/frame_ctor.py | 4 +- asv_bench/benchmarks/frame_methods.py | 4 +- asv_bench/benchmarks/gil.py | 18 ++++++--- asv_bench/benchmarks/groupby.py | 2 +- asv_bench/benchmarks/hdfstore_bench.py | 48 ++++++++++++------------ asv_bench/benchmarks/inference.py | 4 +- asv_bench/benchmarks/join_merge.py | 4 +- asv_bench/benchmarks/packers.py | 16 ++++---- asv_bench/benchmarks/pandas_vb_common.py | 4 +- asv_bench/benchmarks/panel_ctor.py | 5 ++- asv_bench/benchmarks/replace.py | 2 - asv_bench/benchmarks/timeseries.py | 3 +- 14 files changed, 68 insertions(+), 61 deletions(-) diff --git a/asv_bench/asv.conf.json b/asv_bench/asv.conf.json index 4fc6f9f634426..62f1c090a7462 100644 --- a/asv_bench/asv.conf.json +++ b/asv_bench/asv.conf.json @@ -26,7 +26,7 @@ // The Pythons you'd like to test against. If not provided, defaults // to the current version of Python used to run `asv`. // "pythons": ["2.7", "3.4"], - "pythons": ["2.7"], + "pythons": ["3.6"], // The matrix of dependencies to test. Each key is the name of a // package (in PyPI) and the values are version numbers. An empty diff --git a/asv_bench/benchmarks/algorithms.py b/asv_bench/benchmarks/algorithms.py index fe657936c403e..0e2182c58d44c 100644 --- a/asv_bench/benchmarks/algorithms.py +++ b/asv_bench/benchmarks/algorithms.py @@ -2,6 +2,11 @@ import pandas as pd from pandas.util import testing as tm +try: + from pandas.tools.hashing import hash_pandas_object +except ImportError: + pass + class Algorithms(object): goal_time = 0.2 @@ -103,13 +108,13 @@ def setup(self): self.df.iloc[10:20] = np.nan def time_frame(self): - self.df.hash() + hash_pandas_object(self.df) def time_series_int(self): - self.df.E.hash() + hash_pandas_object(self.df.E) def time_series_string(self): - self.df.B.hash() + hash_pandas_object(self.df.B) def time_series_categorical(self): - self.df.C.hash() + hash_pandas_object(self.df.C) diff --git a/asv_bench/benchmarks/frame_ctor.py b/asv_bench/benchmarks/frame_ctor.py index 05c1a27fdf8ca..dec4fcba0eb5e 100644 --- a/asv_bench/benchmarks/frame_ctor.py +++ b/asv_bench/benchmarks/frame_ctor.py @@ -20,12 +20,12 @@ def setup(self): self.data = self.frame.to_dict() except: self.data = self.frame.toDict() - self.some_dict = self.data.values()[0] + self.some_dict = list(self.data.values())[0] self.dict_list = [dict(zip(self.columns, row)) for row in self.frame.values] self.data2 = dict( ((i, dict(((j, float(j)) for j in range(100)))) for i in - xrange(2000))) + range(2000))) def time_frame_ctor_list_of_dict(self): DataFrame(self.dict_list) diff --git a/asv_bench/benchmarks/frame_methods.py b/asv_bench/benchmarks/frame_methods.py index 9f491302a4d6f..af72ca1e9a6ab 100644 --- a/asv_bench/benchmarks/frame_methods.py +++ b/asv_bench/benchmarks/frame_methods.py @@ -56,7 +56,7 @@ def time_reindex_both_axes_ix(self): self.df.ix[(self.idx, self.idx)] def time_reindex_upcast(self): - self.df2.reindex(permutation(range(1200))) + self.df2.reindex(np.random.permutation(range(1200))) #---------------------------------------------------------------------- @@ -583,7 +583,7 @@ class frame_assign_timeseries_index(object): goal_time = 0.2 def setup(self): - self.idx = date_range('1/1/2000', periods=100000, freq='D') + self.idx = date_range('1/1/2000', periods=100000, freq='H') self.df = DataFrame(randn(100000, 1), columns=['A'], index=self.idx) def time_frame_assign_timeseries_index(self): diff --git a/asv_bench/benchmarks/gil.py b/asv_bench/benchmarks/gil.py index 1c5e59672cb57..78a94976e732d 100644 --- a/asv_bench/benchmarks/gil.py +++ b/asv_bench/benchmarks/gil.py @@ -1,11 +1,17 @@ from .pandas_vb_common import * -from pandas.core import common as com + +from pandas.core.algorithms import take_1d try: from cStringIO import StringIO except ImportError: from io import StringIO +try: + from pandas._libs import algos +except ImportError: + from pandas import algos + try: from pandas.util.testing import test_parallel @@ -167,11 +173,11 @@ def time_nogil_take1d_float64(self): @test_parallel(num_threads=2) def take_1d_pg2_int64(self): - com.take_1d(self.df.int64.values, self.indexer) + take_1d(self.df.int64.values, self.indexer) @test_parallel(num_threads=2) def take_1d_pg2_float64(self): - com.take_1d(self.df.float64.values, self.indexer) + take_1d(self.df.float64.values, self.indexer) class nogil_take1d_int64(object): @@ -193,11 +199,11 @@ def time_nogil_take1d_int64(self): @test_parallel(num_threads=2) def take_1d_pg2_int64(self): - com.take_1d(self.df.int64.values, self.indexer) + take_1d(self.df.int64.values, self.indexer) @test_parallel(num_threads=2) def take_1d_pg2_float64(self): - com.take_1d(self.df.float64.values, self.indexer) + take_1d(self.df.float64.values, self.indexer) class nogil_kth_smallest(object): @@ -226,7 +232,7 @@ class nogil_datetime_fields(object): def setup(self): self.N = 100000000 - self.dti = pd.date_range('1900-01-01', periods=self.N, freq='D') + self.dti = pd.date_range('1900-01-01', periods=self.N, freq='T') self.period = self.dti.to_period('D') if (not have_real_test_parallel): raise NotImplementedError diff --git a/asv_bench/benchmarks/groupby.py b/asv_bench/benchmarks/groupby.py index b8d8e8b7912d7..c0c3a42cc4464 100644 --- a/asv_bench/benchmarks/groupby.py +++ b/asv_bench/benchmarks/groupby.py @@ -331,7 +331,7 @@ def setup(self): def get_test_data(self, ngroups=100, n=100000): self.unique_groups = range(self.ngroups) - self.arr = np.asarray(np.tile(self.unique_groups, (n / self.ngroups)), dtype=object) + self.arr = np.asarray(np.tile(self.unique_groups, int(n / self.ngroups)), dtype=object) if (len(self.arr) < n): self.arr = np.asarray((list(self.arr) + self.unique_groups[:(n - len(self.arr))]), dtype=object) random.shuffle(self.arr) diff --git a/asv_bench/benchmarks/hdfstore_bench.py b/asv_bench/benchmarks/hdfstore_bench.py index 78de5267a2969..dc72f3d548aaf 100644 --- a/asv_bench/benchmarks/hdfstore_bench.py +++ b/asv_bench/benchmarks/hdfstore_bench.py @@ -31,16 +31,12 @@ def setup(self): self.remove(self.f) self.store = HDFStore(self.f) - self.store.put('df1', self.df) - self.store.put('df_mixed', self.df_mixed) - - self.store.append('df5', self.df_mixed) - self.store.append('df7', self.df) - - self.store.append('df9', self.df_wide) - - self.store.append('df11', self.df_wide2) - self.store.append('df12', self.df2) + self.store.put('fixed', self.df) + self.store.put('fixed_mixed', self.df_mixed) + self.store.append('table', self.df2) + self.store.append('table_mixed', self.df_mixed) + self.store.append('table_wide', self.df_wide) + self.store.append('table_wide2', self.df_wide2) def teardown(self): self.store.close() @@ -52,45 +48,47 @@ def remove(self, f): pass def time_read_store(self): - self.store.get('df1') + self.store.get('fixed') def time_read_store_mixed(self): - self.store.get('df_mixed') + self.store.get('fixed_mixed') def time_write_store(self): - self.store.put('df2', self.df) + self.store.put('fixed_write', self.df) def time_write_store_mixed(self): - self.store.put('df_mixed2', self.df_mixed) + self.store.put('fixed_mixed_write', self.df_mixed) def time_read_store_table_mixed(self): - self.store.select('df5') + self.store.select('table_mixed') def time_write_store_table_mixed(self): - self.store.append('df6', self.df_mixed) + self.store.append('table_mixed_write', self.df_mixed) def time_read_store_table(self): - self.store.select('df7') + self.store.select('table') def time_write_store_table(self): - self.store.append('df8', self.df) + self.store.append('table_write', self.df) def time_read_store_table_wide(self): - self.store.select('df9') + self.store.select('table_wide') def time_write_store_table_wide(self): - self.store.append('df10', self.df_wide) + self.store.append('table_wide_write', self.df_wide) def time_write_store_table_dc(self): - self.store.append('df15', self.df, data_columns=True) + self.store.append('table_dc_write', self.df_dc, data_columns=True) def time_query_store_table_wide(self): - self.store.select('df11', [('index', '>', self.df_wide2.index[10000]), - ('index', '<', self.df_wide2.index[15000])]) + start = self.df_wide2.index[10000] + stop = self.df_wide2.index[15000] + self.store.select('table_wide', where="index > start and index < stop") def time_query_store_table(self): - self.store.select('df12', [('index', '>', self.df2.index[10000]), - ('index', '<', self.df2.index[15000])]) + start = self.df2.index[10000] + stop = self.df2.index[15000] + self.store.select('table', where="index > start and index < stop") class HDF5Panel(object): diff --git a/asv_bench/benchmarks/inference.py b/asv_bench/benchmarks/inference.py index 3635438a7f76b..dc1d6de73f8ae 100644 --- a/asv_bench/benchmarks/inference.py +++ b/asv_bench/benchmarks/inference.py @@ -113,5 +113,5 @@ def setup(self): self.na_values = set() def time_convert(self): - pd.lib.maybe_convert_numeric(self.data, self.na_values, - coerce_numeric=False) + lib.maybe_convert_numeric(self.data, self.na_values, + coerce_numeric=False) diff --git a/asv_bench/benchmarks/join_merge.py b/asv_bench/benchmarks/join_merge.py index 776316343e009..3b0e33b72ddc1 100644 --- a/asv_bench/benchmarks/join_merge.py +++ b/asv_bench/benchmarks/join_merge.py @@ -314,12 +314,12 @@ def setup(self): self.df1 = pd.DataFrame( {'time': np.random.randint(0, one_count / 20, one_count), - 'key': np.random.choice(list(string.uppercase), one_count), + 'key': np.random.choice(list(string.ascii_uppercase), one_count), 'key2': np.random.randint(0, 25, one_count), 'value1': np.random.randn(one_count)}) self.df2 = pd.DataFrame( {'time': np.random.randint(0, two_count / 20, two_count), - 'key': np.random.choice(list(string.uppercase), two_count), + 'key': np.random.choice(list(string.ascii_uppercase), two_count), 'key2': np.random.randint(0, 25, two_count), 'value2': np.random.randn(two_count)}) diff --git a/asv_bench/benchmarks/packers.py b/asv_bench/benchmarks/packers.py index cd43e305ead8f..24f80cc836dd4 100644 --- a/asv_bench/benchmarks/packers.py +++ b/asv_bench/benchmarks/packers.py @@ -153,18 +153,20 @@ def time_packers_read_stata_with_validation(self): class packers_read_sas(_Packers): def setup(self): - self.f = os.path.join(os.path.dirname(__file__), '..', '..', - 'pandas', 'io', 'tests', 'sas', 'data', - 'test1.sas7bdat') - self.f2 = os.path.join(os.path.dirname(__file__), '..', '..', - 'pandas', 'io', 'tests', 'sas', 'data', - 'paxraw_d_short.xpt') + + testdir = os.path.join(os.path.dirname(__file__), '..', '..', + 'pandas', 'tests', 'io', 'sas') + if not os.path.exists(testdir): + testdir = os.path.join(os.path.dirname(__file__), '..', '..', + 'pandas', 'io', 'tests', 'sas') + self.f = os.path.join(testdir, 'data', 'test1.sas7bdat') + self.f2 = os.path.join(testdir, 'data', 'paxraw_d_short.xpt') def time_read_sas7bdat(self): pd.read_sas(self.f, format='sas7bdat') def time_read_xport(self): - pd.read_sas(self.f, format='xport') + pd.read_sas(self.f2, format='xport') class CSV(_Packers): diff --git a/asv_bench/benchmarks/pandas_vb_common.py b/asv_bench/benchmarks/pandas_vb_common.py index 56ccc94c414fb..b1a58e49fe86c 100644 --- a/asv_bench/benchmarks/pandas_vb_common.py +++ b/asv_bench/benchmarks/pandas_vb_common.py @@ -1,9 +1,7 @@ from pandas import * import pandas as pd -from datetime import timedelta from numpy.random import randn from numpy.random import randint -from numpy.random import permutation import pandas.util.testing as tm import random import numpy as np @@ -18,7 +16,7 @@ np.random.seed(1234) # try em until it works! -for imp in ['pandas_tseries', 'pandas.lib', 'pandas._libs.lib']: +for imp in ['pandas._libs.lib', 'pandas.lib', 'pandas_tseries']: try: lib = import_module(imp) break diff --git a/asv_bench/benchmarks/panel_ctor.py b/asv_bench/benchmarks/panel_ctor.py index faedce6c574ec..cc6071b054662 100644 --- a/asv_bench/benchmarks/panel_ctor.py +++ b/asv_bench/benchmarks/panel_ctor.py @@ -1,4 +1,5 @@ from .pandas_vb_common import * +from datetime import timedelta class Constructors1(object): @@ -24,7 +25,7 @@ class Constructors2(object): def setup(self): self.data_frames = {} for x in range(100): - self.dr = np.asarray(DatetimeIndex(start=datetime(1990, 1, 1), end=datetime(2012, 1, 1), freq=datetools.Day(1))) + self.dr = np.asarray(DatetimeIndex(start=datetime(1990, 1, 1), end=datetime(2012, 1, 1), freq='D')) self.df = DataFrame({'a': ([0] * len(self.dr)), 'b': ([1] * len(self.dr)), 'c': ([2] * len(self.dr)), }, index=self.dr) self.data_frames[x] = self.df @@ -36,7 +37,7 @@ class Constructors3(object): goal_time = 0.2 def setup(self): - self.dr = np.asarray(DatetimeIndex(start=datetime(1990, 1, 1), end=datetime(2012, 1, 1), freq=datetools.Day(1))) + self.dr = np.asarray(DatetimeIndex(start=datetime(1990, 1, 1), end=datetime(2012, 1, 1), freq='D')) self.data_frames = {} for x in range(100): self.df = DataFrame({'a': ([0] * len(self.dr)), 'b': ([1] * len(self.dr)), 'c': ([2] * len(self.dr)), }, index=self.dr) diff --git a/asv_bench/benchmarks/replace.py b/asv_bench/benchmarks/replace.py index 66b8af53801ac..63562f90eab2b 100644 --- a/asv_bench/benchmarks/replace.py +++ b/asv_bench/benchmarks/replace.py @@ -1,6 +1,4 @@ from .pandas_vb_common import * -from pandas.compat import range -from datetime import timedelta class replace_fillna(object): diff --git a/asv_bench/benchmarks/timeseries.py b/asv_bench/benchmarks/timeseries.py index b63b3386a7563..f5ea4d7875931 100644 --- a/asv_bench/benchmarks/timeseries.py +++ b/asv_bench/benchmarks/timeseries.py @@ -4,7 +4,6 @@ from pandas.tseries.converter import DatetimeConverter from .pandas_vb_common import * import pandas as pd -from datetime import timedelta import datetime as dt try: import pandas.tseries.holiday @@ -57,7 +56,7 @@ def setup(self): self.a = self.rng7[:50000].append(self.rng7[50002:]) def time_add_timedelta(self): - (self.rng + timedelta(minutes=2)) + (self.rng + dt.timedelta(minutes=2)) def time_add_offset_delta(self): (self.rng + self.delta_offset) From 90dd3f94cf120f628c086fdd965fc82878951bd9 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Mon, 17 Apr 2017 11:20:53 +0000 Subject: [PATCH 409/933] TST: reduce amount of nesting in tests, specifically move core routines up higher (#16030) --- pandas/tests/{core => computation}/__init__.py | 0 pandas/tests/{core => }/computation/test_compat.py | 0 pandas/tests/{core => }/computation/test_eval.py | 0 pandas/tests/core/sparse/__init__.py | 0 pandas/tests/{core/computation => dtypes}/__init__.py | 0 pandas/tests/{core => }/dtypes/test_cast.py | 0 pandas/tests/{core => }/dtypes/test_common.py | 0 pandas/tests/{core => }/dtypes/test_concat.py | 0 pandas/tests/{core => }/dtypes/test_dtypes.py | 0 pandas/tests/{core => }/dtypes/test_generic.py | 0 pandas/tests/{core => }/dtypes/test_inference.py | 0 pandas/tests/{core => }/dtypes/test_io.py | 0 pandas/tests/{core => }/dtypes/test_missing.py | 0 pandas/tests/{core/dtypes => sparse}/__init__.py | 0 pandas/tests/{core => }/sparse/common.py | 0 pandas/tests/{core => }/sparse/test_arithmetics.py | 0 pandas/tests/{core => }/sparse/test_array.py | 0 pandas/tests/{core => }/sparse/test_combine_concat.py | 0 pandas/tests/{core => }/sparse/test_format.py | 0 pandas/tests/{core => }/sparse/test_frame.py | 0 pandas/tests/{core => }/sparse/test_groupby.py | 0 pandas/tests/{core => }/sparse/test_indexing.py | 0 pandas/tests/{core => }/sparse/test_libsparse.py | 0 pandas/tests/{core => }/sparse/test_list.py | 0 pandas/tests/{core => }/sparse/test_pivot.py | 0 pandas/tests/{core => }/sparse/test_series.py | 0 setup.py | 6 +++--- 27 files changed, 3 insertions(+), 3 deletions(-) rename pandas/tests/{core => computation}/__init__.py (100%) rename pandas/tests/{core => }/computation/test_compat.py (100%) rename pandas/tests/{core => }/computation/test_eval.py (100%) delete mode 100644 pandas/tests/core/sparse/__init__.py rename pandas/tests/{core/computation => dtypes}/__init__.py (100%) rename pandas/tests/{core => }/dtypes/test_cast.py (100%) rename pandas/tests/{core => }/dtypes/test_common.py (100%) rename pandas/tests/{core => }/dtypes/test_concat.py (100%) rename pandas/tests/{core => }/dtypes/test_dtypes.py (100%) rename pandas/tests/{core => }/dtypes/test_generic.py (100%) rename pandas/tests/{core => }/dtypes/test_inference.py (100%) rename pandas/tests/{core => }/dtypes/test_io.py (100%) rename pandas/tests/{core => }/dtypes/test_missing.py (100%) rename pandas/tests/{core/dtypes => sparse}/__init__.py (100%) rename pandas/tests/{core => }/sparse/common.py (100%) rename pandas/tests/{core => }/sparse/test_arithmetics.py (100%) rename pandas/tests/{core => }/sparse/test_array.py (100%) rename pandas/tests/{core => }/sparse/test_combine_concat.py (100%) rename pandas/tests/{core => }/sparse/test_format.py (100%) rename pandas/tests/{core => }/sparse/test_frame.py (100%) rename pandas/tests/{core => }/sparse/test_groupby.py (100%) rename pandas/tests/{core => }/sparse/test_indexing.py (100%) rename pandas/tests/{core => }/sparse/test_libsparse.py (100%) rename pandas/tests/{core => }/sparse/test_list.py (100%) rename pandas/tests/{core => }/sparse/test_pivot.py (100%) rename pandas/tests/{core => }/sparse/test_series.py (100%) diff --git a/pandas/tests/core/__init__.py b/pandas/tests/computation/__init__.py similarity index 100% rename from pandas/tests/core/__init__.py rename to pandas/tests/computation/__init__.py diff --git a/pandas/tests/core/computation/test_compat.py b/pandas/tests/computation/test_compat.py similarity index 100% rename from pandas/tests/core/computation/test_compat.py rename to pandas/tests/computation/test_compat.py diff --git a/pandas/tests/core/computation/test_eval.py b/pandas/tests/computation/test_eval.py similarity index 100% rename from pandas/tests/core/computation/test_eval.py rename to pandas/tests/computation/test_eval.py diff --git a/pandas/tests/core/sparse/__init__.py b/pandas/tests/core/sparse/__init__.py deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/pandas/tests/core/computation/__init__.py b/pandas/tests/dtypes/__init__.py similarity index 100% rename from pandas/tests/core/computation/__init__.py rename to pandas/tests/dtypes/__init__.py diff --git a/pandas/tests/core/dtypes/test_cast.py b/pandas/tests/dtypes/test_cast.py similarity index 100% rename from pandas/tests/core/dtypes/test_cast.py rename to pandas/tests/dtypes/test_cast.py diff --git a/pandas/tests/core/dtypes/test_common.py b/pandas/tests/dtypes/test_common.py similarity index 100% rename from pandas/tests/core/dtypes/test_common.py rename to pandas/tests/dtypes/test_common.py diff --git a/pandas/tests/core/dtypes/test_concat.py b/pandas/tests/dtypes/test_concat.py similarity index 100% rename from pandas/tests/core/dtypes/test_concat.py rename to pandas/tests/dtypes/test_concat.py diff --git a/pandas/tests/core/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py similarity index 100% rename from pandas/tests/core/dtypes/test_dtypes.py rename to pandas/tests/dtypes/test_dtypes.py diff --git a/pandas/tests/core/dtypes/test_generic.py b/pandas/tests/dtypes/test_generic.py similarity index 100% rename from pandas/tests/core/dtypes/test_generic.py rename to pandas/tests/dtypes/test_generic.py diff --git a/pandas/tests/core/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py similarity index 100% rename from pandas/tests/core/dtypes/test_inference.py rename to pandas/tests/dtypes/test_inference.py diff --git a/pandas/tests/core/dtypes/test_io.py b/pandas/tests/dtypes/test_io.py similarity index 100% rename from pandas/tests/core/dtypes/test_io.py rename to pandas/tests/dtypes/test_io.py diff --git a/pandas/tests/core/dtypes/test_missing.py b/pandas/tests/dtypes/test_missing.py similarity index 100% rename from pandas/tests/core/dtypes/test_missing.py rename to pandas/tests/dtypes/test_missing.py diff --git a/pandas/tests/core/dtypes/__init__.py b/pandas/tests/sparse/__init__.py similarity index 100% rename from pandas/tests/core/dtypes/__init__.py rename to pandas/tests/sparse/__init__.py diff --git a/pandas/tests/core/sparse/common.py b/pandas/tests/sparse/common.py similarity index 100% rename from pandas/tests/core/sparse/common.py rename to pandas/tests/sparse/common.py diff --git a/pandas/tests/core/sparse/test_arithmetics.py b/pandas/tests/sparse/test_arithmetics.py similarity index 100% rename from pandas/tests/core/sparse/test_arithmetics.py rename to pandas/tests/sparse/test_arithmetics.py diff --git a/pandas/tests/core/sparse/test_array.py b/pandas/tests/sparse/test_array.py similarity index 100% rename from pandas/tests/core/sparse/test_array.py rename to pandas/tests/sparse/test_array.py diff --git a/pandas/tests/core/sparse/test_combine_concat.py b/pandas/tests/sparse/test_combine_concat.py similarity index 100% rename from pandas/tests/core/sparse/test_combine_concat.py rename to pandas/tests/sparse/test_combine_concat.py diff --git a/pandas/tests/core/sparse/test_format.py b/pandas/tests/sparse/test_format.py similarity index 100% rename from pandas/tests/core/sparse/test_format.py rename to pandas/tests/sparse/test_format.py diff --git a/pandas/tests/core/sparse/test_frame.py b/pandas/tests/sparse/test_frame.py similarity index 100% rename from pandas/tests/core/sparse/test_frame.py rename to pandas/tests/sparse/test_frame.py diff --git a/pandas/tests/core/sparse/test_groupby.py b/pandas/tests/sparse/test_groupby.py similarity index 100% rename from pandas/tests/core/sparse/test_groupby.py rename to pandas/tests/sparse/test_groupby.py diff --git a/pandas/tests/core/sparse/test_indexing.py b/pandas/tests/sparse/test_indexing.py similarity index 100% rename from pandas/tests/core/sparse/test_indexing.py rename to pandas/tests/sparse/test_indexing.py diff --git a/pandas/tests/core/sparse/test_libsparse.py b/pandas/tests/sparse/test_libsparse.py similarity index 100% rename from pandas/tests/core/sparse/test_libsparse.py rename to pandas/tests/sparse/test_libsparse.py diff --git a/pandas/tests/core/sparse/test_list.py b/pandas/tests/sparse/test_list.py similarity index 100% rename from pandas/tests/core/sparse/test_list.py rename to pandas/tests/sparse/test_list.py diff --git a/pandas/tests/core/sparse/test_pivot.py b/pandas/tests/sparse/test_pivot.py similarity index 100% rename from pandas/tests/core/sparse/test_pivot.py rename to pandas/tests/sparse/test_pivot.py diff --git a/pandas/tests/core/sparse/test_series.py b/pandas/tests/sparse/test_series.py similarity index 100% rename from pandas/tests/core/sparse/test_series.py rename to pandas/tests/sparse/test_series.py diff --git a/setup.py b/setup.py index 5e474153d0ee1..a1ec567a20ee2 100755 --- a/setup.py +++ b/setup.py @@ -655,9 +655,9 @@ def pxd(name): 'pandas.util', 'pandas.tests', 'pandas.tests.api', - 'pandas.tests.core.dtypes', - 'pandas.tests.core.computation', - 'pandas.tests.core.sparse', + 'pandas.tests.dtypes', + 'pandas.tests.computation', + 'pandas.tests.sparse', 'pandas.tests.frame', 'pandas.tests.indexes', 'pandas.tests.indexes.datetimes', From 89bd26871727615aeef63be6243ced4ed5501f26 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Mon, 17 Apr 2017 08:37:23 -0400 Subject: [PATCH 410/933] DOC: fix Styler import in api docs --- doc/source/api.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/api.rst b/doc/source/api.rst index d0f548cc3d0b1..868f0d7f9c962 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -1821,7 +1821,7 @@ Computations / Descriptive Stats Style ----- -.. currentmodule:: pandas.formats.style +.. currentmodule:: pandas.io.formats.style ``Styler`` objects are returned by :attr:`pandas.DataFrame.style`. From 1a117fc52bd07bba3e42121cf900d16c7184e622 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Mon, 17 Apr 2017 14:10:27 +0000 Subject: [PATCH 411/933] CLN: move pandas.indexes -> pandas.core.indexes (#16031) --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/compat/pickle_compat.py | 16 +++++++++++++--- pandas/core/api.py | 2 +- pandas/core/categorical.py | 2 +- pandas/core/index.py | 4 ++-- pandas/{ => core}/indexes/__init__.py | 0 pandas/{ => core}/indexes/api.py | 12 ++++++------ pandas/{ => core}/indexes/base.py | 4 ++-- pandas/{ => core}/indexes/category.py | 4 ++-- pandas/{ => core}/indexes/frozen.py | 0 pandas/{ => core}/indexes/interval.py | 14 ++++++++------ pandas/{ => core}/indexes/multi.py | 12 +++++++----- pandas/{ => core}/indexes/numeric.py | 5 +++-- pandas/{ => core}/indexes/range.py | 6 +++--- pandas/core/sparse/array.py | 2 +- pandas/tests/api/test_api.py | 3 +-- pandas/tests/indexes/common.py | 2 +- pandas/tests/indexes/test_base.py | 2 +- pandas/tests/indexes/test_category.py | 2 +- pandas/tests/indexes/test_frozen.py | 2 +- pandas/tests/indexes/test_multi.py | 2 +- pandas/tseries/base.py | 4 ++-- pandas/tseries/index.py | 2 +- pandas/tseries/period.py | 4 ++-- pandas/tseries/tdi.py | 2 +- setup.py | 2 +- 26 files changed, 63 insertions(+), 48 deletions(-) rename pandas/{ => core}/indexes/__init__.py (100%) rename pandas/{ => core}/indexes/api.py (90%) rename pandas/{ => core}/indexes/base.py (99%) rename pandas/{ => core}/indexes/category.py (99%) rename pandas/{ => core}/indexes/frozen.py (100%) rename pandas/{ => core}/indexes/interval.py (99%) rename pandas/{ => core}/indexes/multi.py (99%) rename pandas/{ => core}/indexes/numeric.py (99%) rename pandas/{ => core}/indexes/range.py (99%) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index de33b7d4e3371..9df82b8ac7338 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -1344,6 +1344,7 @@ If indicated, a deprecation warning will be issued if you reference theses modul "pandas.index", "pandas._libs.index", "" "pandas.algos", "pandas._libs.algos", "" "pandas.hashtable", "pandas._libs.hashtable", "" + "pandas.indexes", "pandas.core.indexes", "" "pandas.json", "pandas.io.json.libjson", "X" "pandas.parser", "pandas.io.libparsers", "X" "pandas.formats", "pandas.io.formats", "" diff --git a/pandas/compat/pickle_compat.py b/pandas/compat/pickle_compat.py index e977fdc3a267d..f7d451ce7c92f 100644 --- a/pandas/compat/pickle_compat.py +++ b/pandas/compat/pickle_compat.py @@ -59,9 +59,9 @@ def load_reduce(self): # 15477 ('pandas.core.base', 'FrozenNDArray'): - ('pandas.indexes.frozen', 'FrozenNDArray'), + ('pandas.core.indexes.frozen', 'FrozenNDArray'), ('pandas.core.base', 'FrozenList'): - ('pandas.indexes.frozen', 'FrozenList'), + ('pandas.core.indexes.frozen', 'FrozenList'), # 10890 ('pandas.core.series', 'TimeSeries'): @@ -84,7 +84,17 @@ def load_reduce(self): ('pandas.sparse.series', 'SparseSeries'): ('pandas.core.sparse.series', 'SparseSeries'), ('pandas.sparse.frame', 'SparseDataFrame'): - ('pandas.core.sparse.frame', 'SparseDataFrame') + ('pandas.core.sparse.frame', 'SparseDataFrame'), + ('pandas.indexes.base', '_new_Index'): + ('pandas.core.indexes.base', '_new_Index'), + ('pandas.indexes.base', 'Index'): + ('pandas.core.indexes.base', 'Index'), + ('pandas.indexes.numeric', 'Int64Index'): + ('pandas.core.indexes.numeric', 'Int64Index'), + ('pandas.indexes.range', 'RangeIndex'): + ('pandas.core.indexes.range', 'RangeIndex'), + ('pandas.indexes.multi', 'MultiIndex'): + ('pandas.core.indexes.multi', 'MultiIndex') } diff --git a/pandas/core/api.py b/pandas/core/api.py index 3c739d85d0074..865fe367873d8 100644 --- a/pandas/core/api.py +++ b/pandas/core/api.py @@ -12,7 +12,7 @@ from pandas.core.index import (Index, CategoricalIndex, Int64Index, UInt64Index, RangeIndex, Float64Index, MultiIndex, IntervalIndex) -from pandas.indexes.interval import Interval, interval_range +from pandas.core.indexes.interval import Interval, interval_range from pandas.core.series import Series from pandas.core.frame import DataFrame diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index 50181486d8cf7..a12cec33fb350 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -2128,7 +2128,7 @@ def _factorize_from_iterable(values): If `values` has a categorical dtype, then `categories` is a CategoricalIndex keeping the categories and order of `values`. """ - from pandas.indexes.category import CategoricalIndex + from pandas.core.indexes.category import CategoricalIndex if not is_list_like(values): raise TypeError("Input must be list-like") diff --git a/pandas/core/index.py b/pandas/core/index.py index 05f98d59a1f56..2d1c22f5623a1 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -1,3 +1,3 @@ # flake8: noqa -from pandas.indexes.api import * -from pandas.indexes.multi import _sparsify +from pandas.core.indexes.api import * +from pandas.core.indexes.multi import _sparsify diff --git a/pandas/indexes/__init__.py b/pandas/core/indexes/__init__.py similarity index 100% rename from pandas/indexes/__init__.py rename to pandas/core/indexes/__init__.py diff --git a/pandas/indexes/api.py b/pandas/core/indexes/api.py similarity index 90% rename from pandas/indexes/api.py rename to pandas/core/indexes/api.py index db076b60ab34e..d40f6da4c4ee5 100644 --- a/pandas/indexes/api.py +++ b/pandas/core/indexes/api.py @@ -1,12 +1,12 @@ -from pandas.indexes.base import (Index, _new_Index, # noqa +from pandas.core.indexes.base import (Index, _new_Index, # noqa _ensure_index, _get_na_value, InvalidIndexError) -from pandas.indexes.category import CategoricalIndex # noqa -from pandas.indexes.multi import MultiIndex # noqa -from pandas.indexes.interval import IntervalIndex # noqa -from pandas.indexes.numeric import (NumericIndex, Float64Index, # noqa +from pandas.core.indexes.category import CategoricalIndex # noqa +from pandas.core.indexes.multi import MultiIndex # noqa +from pandas.core.indexes.interval import IntervalIndex # noqa +from pandas.core.indexes.numeric import (NumericIndex, Float64Index, # noqa Int64Index, UInt64Index) -from pandas.indexes.range import RangeIndex # noqa +from pandas.core.indexes.range import RangeIndex # noqa import pandas.core.common as com import pandas._libs.lib as lib diff --git a/pandas/indexes/base.py b/pandas/core/indexes/base.py similarity index 99% rename from pandas/indexes/base.py rename to pandas/core/indexes/base.py index d88e54dcc9521..5149d45514e2e 100644 --- a/pandas/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -43,7 +43,7 @@ import pandas.core.base as base from pandas.util.decorators import (Appender, Substitution, cache_readonly, deprecate, deprecate_kwarg) -from pandas.indexes.frozen import FrozenList +from pandas.core.indexes.frozen import FrozenList import pandas.core.common as com import pandas.core.dtypes.concat as _concat import pandas.core.missing as missing @@ -1675,7 +1675,7 @@ def append(self, other): if self.is_categorical(): # if calling index is category, don't check dtype of others - from pandas.indexes.category import CategoricalIndex + from pandas.core.indexes.category import CategoricalIndex return CategoricalIndex._append_same_dtype(self, to_concat, name) typs = _concat.get_dtype_kinds(to_concat) diff --git a/pandas/indexes/category.py b/pandas/core/indexes/category.py similarity index 99% rename from pandas/indexes/category.py rename to pandas/core/indexes/category.py index 5f9d106189767..257ca86947f2b 100644 --- a/pandas/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -16,10 +16,10 @@ from pandas.util.decorators import Appender, cache_readonly from pandas.core.config import get_option -from pandas.indexes.base import Index, _index_shared_docs +from pandas.core.indexes.base import Index, _index_shared_docs import pandas.core.base as base import pandas.core.missing as missing -import pandas.indexes.base as ibase +import pandas.core.indexes.base as ibase _index_doc_kwargs = dict(ibase._index_doc_kwargs) _index_doc_kwargs.update(dict(target_klass='CategoricalIndex')) diff --git a/pandas/indexes/frozen.py b/pandas/core/indexes/frozen.py similarity index 100% rename from pandas/indexes/frozen.py rename to pandas/core/indexes/frozen.py diff --git a/pandas/indexes/interval.py b/pandas/core/indexes/interval.py similarity index 99% rename from pandas/indexes/interval.py rename to pandas/core/indexes/interval.py index 6f68e67d702fe..f14e7bf6bd183 100644 --- a/pandas/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -16,20 +16,22 @@ is_interval_dtype, is_scalar, is_integer) -from pandas.indexes.base import (Index, _ensure_index, - default_pprint, _index_shared_docs) +from pandas.core.indexes.base import ( + Index, _ensure_index, + default_pprint, _index_shared_docs) from pandas._libs import Timestamp, Timedelta -from pandas._libs.interval import (Interval, IntervalMixin, IntervalTree, - intervals_to_interval_bounds) +from pandas._libs.interval import ( + Interval, IntervalMixin, IntervalTree, + intervals_to_interval_bounds) -from pandas.indexes.multi import MultiIndex +from pandas.core.indexes.multi import MultiIndex from pandas.compat.numpy import function as nv from pandas.core import common as com from pandas.util.decorators import cache_readonly, Appender from pandas.core.config import get_option -import pandas.indexes.base as ibase +import pandas.core.indexes.base as ibase _index_doc_kwargs = dict(ibase._index_doc_kwargs) _index_doc_kwargs.update( dict(klass='IntervalIndex', diff --git a/pandas/indexes/multi.py b/pandas/core/indexes/multi.py similarity index 99% rename from pandas/indexes/multi.py rename to pandas/core/indexes/multi.py index b341bfe7b5215..40e7118ca0f6a 100644 --- a/pandas/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -35,11 +35,13 @@ from pandas.core.config import get_option -from pandas.indexes.base import (Index, _ensure_index, - _get_na_value, InvalidIndexError, - _index_shared_docs) -from pandas.indexes.frozen import FrozenNDArray, FrozenList, _ensure_frozen -import pandas.indexes.base as ibase +from pandas.core.indexes.base import ( + Index, _ensure_index, + _get_na_value, InvalidIndexError, + _index_shared_docs) +from pandas.core.indexes.frozen import ( + FrozenNDArray, FrozenList, _ensure_frozen) +import pandas.core.indexes.base as ibase _index_doc_kwargs = dict(ibase._index_doc_kwargs) _index_doc_kwargs.update( dict(klass='MultiIndex', diff --git a/pandas/indexes/numeric.py b/pandas/core/indexes/numeric.py similarity index 99% rename from pandas/indexes/numeric.py rename to pandas/core/indexes/numeric.py index 6b9999239cd88..21ba2a386d96a 100644 --- a/pandas/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -9,9 +9,10 @@ from pandas import compat from pandas.core import algorithms -from pandas.indexes.base import Index, InvalidIndexError, _index_shared_docs +from pandas.core.indexes.base import ( + Index, InvalidIndexError, _index_shared_docs) from pandas.util.decorators import Appender, cache_readonly -import pandas.indexes.base as ibase +import pandas.core.indexes.base as ibase _num_index_shared_docs = dict() diff --git a/pandas/indexes/range.py b/pandas/core/indexes/range.py similarity index 99% rename from pandas/indexes/range.py rename to pandas/core/indexes/range.py index 1eedfcc619aec..acd040693af2e 100644 --- a/pandas/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -12,11 +12,11 @@ from pandas import compat from pandas.compat import lrange, range from pandas.compat.numpy import function as nv -from pandas.indexes.base import Index, _index_shared_docs +from pandas.core.indexes.base import Index, _index_shared_docs from pandas.util.decorators import Appender, cache_readonly -import pandas.indexes.base as ibase +import pandas.core.indexes.base as ibase -from pandas.indexes.numeric import Int64Index +from pandas.core.indexes.numeric import Int64Index class RangeIndex(Int64Index): diff --git a/pandas/core/sparse/array.py b/pandas/core/sparse/array.py index d3fdfe5533a03..ef3600266c037 100644 --- a/pandas/core/sparse/array.py +++ b/pandas/core/sparse/array.py @@ -36,7 +36,7 @@ import pandas.core.ops as ops import pandas.io.formats.printing as printing from pandas.util.decorators import Appender -from pandas.indexes.base import _index_shared_docs +from pandas.core.indexes.base import _index_shared_docs _sparray_doc_kwargs = dict(klass='SparseArray') diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index ec9e6039c6ee4..13e6d065382a6 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -30,8 +30,7 @@ class TestPDApi(Base, tm.TestCase): ignored = ['tests', 'locale', 'conftest'] # top-level sub-packages - lib = ['api', 'compat', 'core', - 'indexes', 'errors', 'pandas', + lib = ['api', 'compat', 'core', 'errors', 'pandas', 'plotting', 'test', 'testing', 'tools', 'tseries', 'util', 'options', 'io'] diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index bec55083829b6..9003a3707e417 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -419,7 +419,7 @@ def test_numpy_argsort(self): # pandas compatibility input validation - the # rest already perform separate (or no) such # validation via their 'values' attribute as - # defined in pandas.indexes/base.py - they + # defined in pandas.core.indexes/base.py - they # cannot be changed at the moment due to # backwards compatibility concerns if isinstance(type(ind), (CategoricalIndex, RangeIndex)): diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 165ad91086d0a..de15abe89712a 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -3,7 +3,7 @@ from datetime import datetime, timedelta import pandas.util.testing as tm -from pandas.indexes.api import Index, MultiIndex +from pandas.core.indexes.api import Index, MultiIndex from pandas.tests.indexes.common import Base from pandas.compat import (range, lrange, lzip, u, diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py index e714bbd4f9d44..6e869890bfcd6 100644 --- a/pandas/tests/indexes/test_category.py +++ b/pandas/tests/indexes/test_category.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- import pandas.util.testing as tm -from pandas.indexes.api import Index, CategoricalIndex +from pandas.core.indexes.api import Index, CategoricalIndex from .common import Base from pandas.compat import range, PY3 diff --git a/pandas/tests/indexes/test_frozen.py b/pandas/tests/indexes/test_frozen.py index cb90beb6a5bfb..ed2e3d94aa4a4 100644 --- a/pandas/tests/indexes/test_frozen.py +++ b/pandas/tests/indexes/test_frozen.py @@ -1,7 +1,7 @@ import numpy as np from pandas.util import testing as tm from pandas.tests.test_base import CheckImmutable, CheckStringMixin -from pandas.indexes.frozen import FrozenList, FrozenNDArray +from pandas.core.indexes.frozen import FrozenList, FrozenNDArray from pandas.compat import u diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index f907741950b01..d45182d8d82c5 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -16,7 +16,7 @@ compat, date_range, period_range) from pandas.compat import PY3, long, lrange, lzip, range, u from pandas.errors import PerformanceWarning, UnsortedIndexError -from pandas.indexes.base import InvalidIndexError +from pandas.core.indexes.base import InvalidIndexError from pandas._libs import lib from pandas._libs.lib import Timestamp diff --git a/pandas/tseries/base.py b/pandas/tseries/base.py index b419aae709683..3daa88fe396f6 100644 --- a/pandas/tseries/base.py +++ b/pandas/tseries/base.py @@ -28,12 +28,12 @@ from pandas._libs.period import Period from pandas.core.index import Index -from pandas.indexes.base import _index_shared_docs +from pandas.core.indexes.base import _index_shared_docs from pandas.util.decorators import Appender, cache_readonly import pandas.core.dtypes.concat as _concat import pandas.tseries.frequencies as frequencies -import pandas.indexes.base as ibase +import pandas.core.indexes.base as ibase _index_doc_kwargs = dict(ibase._index_doc_kwargs) diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py index a964b6d9e09d3..d9aa72fe065ab 100644 --- a/pandas/tseries/index.py +++ b/pandas/tseries/index.py @@ -30,7 +30,7 @@ from pandas.core.common import _values_from_object, _maybe_box from pandas.core.index import Index, Int64Index, Float64Index -from pandas.indexes.base import _index_shared_docs +from pandas.core.indexes.base import _index_shared_docs import pandas.compat as compat from pandas.tseries.frequencies import ( to_offset, get_period_alias, diff --git a/pandas/tseries/period.py b/pandas/tseries/period.py index 66275925ff355..b19e086b818f0 100644 --- a/pandas/tseries/period.py +++ b/pandas/tseries/period.py @@ -37,14 +37,14 @@ _quarter_to_myear) from pandas.core.base import _shared_docs -from pandas.indexes.base import _index_shared_docs, _ensure_index +from pandas.core.indexes.base import _index_shared_docs, _ensure_index from pandas import compat from pandas.util.decorators import (Appender, Substitution, cache_readonly, deprecate_kwarg) from pandas.compat import zip, u -import pandas.indexes.base as ibase +import pandas.core.indexes.base as ibase _index_doc_kwargs = dict(ibase._index_doc_kwargs) _index_doc_kwargs.update( dict(target_klass='PeriodIndex or list of Periods')) diff --git a/pandas/tseries/tdi.py b/pandas/tseries/tdi.py index 020b7328238b7..7768b4a340775 100644 --- a/pandas/tseries/tdi.py +++ b/pandas/tseries/tdi.py @@ -23,7 +23,7 @@ from pandas.tseries.frequencies import to_offset from pandas.core.algorithms import checked_add_with_arr from pandas.core.base import _shared_docs -from pandas.indexes.base import _index_shared_docs +from pandas.core.indexes.base import _index_shared_docs import pandas.core.common as com import pandas.core.dtypes.concat as _concat from pandas.util.decorators import Appender, Substitution, deprecate_kwarg diff --git a/setup.py b/setup.py index a1ec567a20ee2..6fc66e2355c0f 100755 --- a/setup.py +++ b/setup.py @@ -640,9 +640,9 @@ def pxd(name): 'pandas.compat.numpy', 'pandas.core', 'pandas.core.dtypes', + 'pandas.core.indexes', 'pandas.core.computation', 'pandas.core.sparse', - 'pandas.indexes', 'pandas.errors', 'pandas.io', 'pandas.io.json', From 1a094376680368e9d4e5dfa6c900e99f6291ca41 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 17 Apr 2017 22:27:06 +0200 Subject: [PATCH 412/933] CLN: remove old vb_suite files (now asv_bench) (#16034) --- test_perf.sh | 5 - vb_suite/.gitignore | 4 - vb_suite/attrs_caching.py | 20 - vb_suite/binary_ops.py | 199 ------ vb_suite/categoricals.py | 16 - vb_suite/ctors.py | 39 -- vb_suite/eval.py | 150 ----- vb_suite/frame_ctor.py | 123 ---- vb_suite/frame_methods.py | 525 --------------- vb_suite/generate_rst_files.py | 2 - vb_suite/gil.py | 110 ---- vb_suite/groupby.py | 620 ------------------ vb_suite/hdfstore_bench.py | 278 -------- vb_suite/index_object.py | 173 ----- vb_suite/indexing.py | 292 --------- vb_suite/inference.py | 36 - vb_suite/io_bench.py | 150 ----- vb_suite/io_sql.py | 126 ---- vb_suite/join_merge.py | 270 -------- vb_suite/make.py | 167 ----- vb_suite/measure_memory_consumption.py | 55 -- vb_suite/miscellaneous.py | 32 - vb_suite/packers.py | 252 ------- vb_suite/pandas_vb_common.py | 30 - vb_suite/panel_ctor.py | 76 --- vb_suite/panel_methods.py | 28 - vb_suite/parser_vb.py | 112 ---- vb_suite/perf_HEAD.py | 243 ------- vb_suite/plotting.py | 25 - vb_suite/reindex.py | 225 ------- vb_suite/replace.py | 36 - vb_suite/reshape.py | 65 -- vb_suite/run_suite.py | 15 - vb_suite/series_methods.py | 39 -- vb_suite/source/_static/stub | 0 vb_suite/source/conf.py | 225 ------- vb_suite/source/themes/agogo/layout.html | 95 --- .../source/themes/agogo/static/agogo.css_t | 476 -------------- .../source/themes/agogo/static/bgfooter.png | Bin 434 -> 0 bytes vb_suite/source/themes/agogo/static/bgtop.png | Bin 430 -> 0 bytes vb_suite/source/themes/agogo/theme.conf | 19 - vb_suite/sparse.py | 65 -- vb_suite/stat_ops.py | 126 ---- vb_suite/strings.py | 59 -- vb_suite/suite.py | 164 ----- vb_suite/test.py | 67 -- vb_suite/test_perf.py | 616 ----------------- vb_suite/timedelta.py | 32 - vb_suite/timeseries.py | 445 ------------- 49 files changed, 6927 deletions(-) delete mode 100755 test_perf.sh delete mode 100644 vb_suite/.gitignore delete mode 100644 vb_suite/attrs_caching.py delete mode 100644 vb_suite/binary_ops.py delete mode 100644 vb_suite/categoricals.py delete mode 100644 vb_suite/ctors.py delete mode 100644 vb_suite/eval.py delete mode 100644 vb_suite/frame_ctor.py delete mode 100644 vb_suite/frame_methods.py delete mode 100644 vb_suite/generate_rst_files.py delete mode 100644 vb_suite/gil.py delete mode 100644 vb_suite/groupby.py delete mode 100644 vb_suite/hdfstore_bench.py delete mode 100644 vb_suite/index_object.py delete mode 100644 vb_suite/indexing.py delete mode 100644 vb_suite/inference.py delete mode 100644 vb_suite/io_bench.py delete mode 100644 vb_suite/io_sql.py delete mode 100644 vb_suite/join_merge.py delete mode 100755 vb_suite/make.py delete mode 100755 vb_suite/measure_memory_consumption.py delete mode 100644 vb_suite/miscellaneous.py delete mode 100644 vb_suite/packers.py delete mode 100644 vb_suite/pandas_vb_common.py delete mode 100644 vb_suite/panel_ctor.py delete mode 100644 vb_suite/panel_methods.py delete mode 100644 vb_suite/parser_vb.py delete mode 100755 vb_suite/perf_HEAD.py delete mode 100644 vb_suite/plotting.py delete mode 100644 vb_suite/reindex.py delete mode 100644 vb_suite/replace.py delete mode 100644 vb_suite/reshape.py delete mode 100755 vb_suite/run_suite.py delete mode 100644 vb_suite/series_methods.py delete mode 100644 vb_suite/source/_static/stub delete mode 100644 vb_suite/source/conf.py delete mode 100644 vb_suite/source/themes/agogo/layout.html delete mode 100644 vb_suite/source/themes/agogo/static/agogo.css_t delete mode 100644 vb_suite/source/themes/agogo/static/bgfooter.png delete mode 100644 vb_suite/source/themes/agogo/static/bgtop.png delete mode 100644 vb_suite/source/themes/agogo/theme.conf delete mode 100644 vb_suite/sparse.py delete mode 100644 vb_suite/stat_ops.py delete mode 100644 vb_suite/strings.py delete mode 100644 vb_suite/suite.py delete mode 100644 vb_suite/test.py delete mode 100755 vb_suite/test_perf.py delete mode 100644 vb_suite/timedelta.py delete mode 100644 vb_suite/timeseries.py diff --git a/test_perf.sh b/test_perf.sh deleted file mode 100755 index 022de25bca8fc..0000000000000 --- a/test_perf.sh +++ /dev/null @@ -1,5 +0,0 @@ -#!/bin/sh - -CURDIR=$(pwd) -BASEDIR=$(cd "$(dirname "$0")"; pwd) -python "$BASEDIR"/vb_suite/test_perf.py $@ diff --git a/vb_suite/.gitignore b/vb_suite/.gitignore deleted file mode 100644 index cc110f04e1225..0000000000000 --- a/vb_suite/.gitignore +++ /dev/null @@ -1,4 +0,0 @@ -benchmarks.db -build/* -source/vbench/* -source/*.rst \ No newline at end of file diff --git a/vb_suite/attrs_caching.py b/vb_suite/attrs_caching.py deleted file mode 100644 index a7e3ed7094ed6..0000000000000 --- a/vb_suite/attrs_caching.py +++ /dev/null @@ -1,20 +0,0 @@ -from vbench.benchmark import Benchmark - -common_setup = """from .pandas_vb_common import * -""" - -#---------------------------------------------------------------------- -# DataFrame.index / columns property lookup time - -setup = common_setup + """ -df = DataFrame(np.random.randn(10, 6)) -cur_index = df.index -""" -stmt = "foo = df.index" - -getattr_dataframe_index = Benchmark(stmt, setup, - name="getattr_dataframe_index") - -stmt = "df.index = cur_index" -setattr_dataframe_index = Benchmark(stmt, setup, - name="setattr_dataframe_index") diff --git a/vb_suite/binary_ops.py b/vb_suite/binary_ops.py deleted file mode 100644 index edc29bf3eec37..0000000000000 --- a/vb_suite/binary_ops.py +++ /dev/null @@ -1,199 +0,0 @@ -from vbench.benchmark import Benchmark -from datetime import datetime - -common_setup = """from .pandas_vb_common import * -""" - -SECTION = 'Binary ops' - -#---------------------------------------------------------------------- -# binary ops - -#---------------------------------------------------------------------- -# add - -setup = common_setup + """ -df = DataFrame(np.random.randn(20000, 100)) -df2 = DataFrame(np.random.randn(20000, 100)) -""" -frame_add = \ - Benchmark("df + df2", setup, name='frame_add', - start_date=datetime(2012, 1, 1)) - -setup = common_setup + """ -import pandas.core.computation.expressions as expr -df = DataFrame(np.random.randn(20000, 100)) -df2 = DataFrame(np.random.randn(20000, 100)) -expr.set_numexpr_threads(1) -""" - -frame_add_st = \ - Benchmark("df + df2", setup, name='frame_add_st',cleanup="expr.set_numexpr_threads()", - start_date=datetime(2013, 2, 26)) - -setup = common_setup + """ -import pandas.core.computation.expressions as expr -df = DataFrame(np.random.randn(20000, 100)) -df2 = DataFrame(np.random.randn(20000, 100)) -expr.set_use_numexpr(False) -""" -frame_add_no_ne = \ - Benchmark("df + df2", setup, name='frame_add_no_ne',cleanup="expr.set_use_numexpr(True)", - start_date=datetime(2013, 2, 26)) - -#---------------------------------------------------------------------- -# mult - -setup = common_setup + """ -df = DataFrame(np.random.randn(20000, 100)) -df2 = DataFrame(np.random.randn(20000, 100)) -""" -frame_mult = \ - Benchmark("df * df2", setup, name='frame_mult', - start_date=datetime(2012, 1, 1)) - -setup = common_setup + """ -import pandas.core.computation.expressions as expr -df = DataFrame(np.random.randn(20000, 100)) -df2 = DataFrame(np.random.randn(20000, 100)) -expr.set_numexpr_threads(1) -""" -frame_mult_st = \ - Benchmark("df * df2", setup, name='frame_mult_st',cleanup="expr.set_numexpr_threads()", - start_date=datetime(2013, 2, 26)) - -setup = common_setup + """ -import pandas.core.computation.expressions as expr -df = DataFrame(np.random.randn(20000, 100)) -df2 = DataFrame(np.random.randn(20000, 100)) -expr.set_use_numexpr(False) -""" -frame_mult_no_ne = \ - Benchmark("df * df2", setup, name='frame_mult_no_ne',cleanup="expr.set_use_numexpr(True)", - start_date=datetime(2013, 2, 26)) - -#---------------------------------------------------------------------- -# division - -setup = common_setup + """ -df = DataFrame(np.random.randn(1000, 1000)) -""" -frame_float_div_by_zero = \ - Benchmark("df / 0", setup, name='frame_float_div_by_zero') - -setup = common_setup + """ -df = DataFrame(np.random.randn(1000, 1000)) -""" -frame_float_floor_by_zero = \ - Benchmark("df // 0", setup, name='frame_float_floor_by_zero') - -setup = common_setup + """ -df = DataFrame(np.random.random_integers(np.iinfo(np.int16).min, np.iinfo(np.int16).max, size=(1000, 1000))) -""" -frame_int_div_by_zero = \ - Benchmark("df / 0", setup, name='frame_int_div_by_zero') - -setup = common_setup + """ -df = DataFrame(np.random.randn(1000, 1000)) -df2 = DataFrame(np.random.randn(1000, 1000)) -""" -frame_float_div = \ - Benchmark("df // df2", setup, name='frame_float_div') - -#---------------------------------------------------------------------- -# modulo - -setup = common_setup + """ -df = DataFrame(np.random.randn(1000, 1000)) -df2 = DataFrame(np.random.randn(1000, 1000)) -""" -frame_float_mod = \ - Benchmark("df / df2", setup, name='frame_float_mod') - -setup = common_setup + """ -df = DataFrame(np.random.random_integers(np.iinfo(np.int16).min, np.iinfo(np.int16).max, size=(1000, 1000))) -df2 = DataFrame(np.random.random_integers(np.iinfo(np.int16).min, np.iinfo(np.int16).max, size=(1000, 1000))) -""" -frame_int_mod = \ - Benchmark("df / df2", setup, name='frame_int_mod') - -#---------------------------------------------------------------------- -# multi and - -setup = common_setup + """ -df = DataFrame(np.random.randn(20000, 100)) -df2 = DataFrame(np.random.randn(20000, 100)) -""" -frame_multi_and = \ - Benchmark("df[(df>0) & (df2>0)]", setup, name='frame_multi_and', - start_date=datetime(2012, 1, 1)) - -setup = common_setup + """ -import pandas.core.computation.expressions as expr -df = DataFrame(np.random.randn(20000, 100)) -df2 = DataFrame(np.random.randn(20000, 100)) -expr.set_numexpr_threads(1) -""" -frame_multi_and_st = \ - Benchmark("df[(df>0) & (df2>0)]", setup, name='frame_multi_and_st',cleanup="expr.set_numexpr_threads()", - start_date=datetime(2013, 2, 26)) - -setup = common_setup + """ -import pandas.core.computation.expressions as expr -df = DataFrame(np.random.randn(20000, 100)) -df2 = DataFrame(np.random.randn(20000, 100)) -expr.set_use_numexpr(False) -""" -frame_multi_and_no_ne = \ - Benchmark("df[(df>0) & (df2>0)]", setup, name='frame_multi_and_no_ne',cleanup="expr.set_use_numexpr(True)", - start_date=datetime(2013, 2, 26)) - -#---------------------------------------------------------------------- -# timeseries - -setup = common_setup + """ -N = 1000000 -halfway = N // 2 - 1 -s = Series(date_range('20010101', periods=N, freq='T')) -ts = s[halfway] -""" - -timestamp_series_compare = Benchmark("ts >= s", setup, - start_date=datetime(2013, 9, 27)) -series_timestamp_compare = Benchmark("s <= ts", setup, - start_date=datetime(2012, 2, 21)) - -setup = common_setup + """ -N = 1000000 -s = Series(date_range('20010101', periods=N, freq='s')) -""" - -timestamp_ops_diff1 = Benchmark("s.diff()", setup, - start_date=datetime(2013, 1, 1)) -timestamp_ops_diff2 = Benchmark("s-s.shift()", setup, - start_date=datetime(2013, 1, 1)) - -#---------------------------------------------------------------------- -# timeseries with tz - -setup = common_setup + """ -N = 10000 -halfway = N // 2 - 1 -s = Series(date_range('20010101', periods=N, freq='T', tz='US/Eastern')) -ts = s[halfway] -""" - -timestamp_tz_series_compare = Benchmark("ts >= s", setup, - start_date=datetime(2013, 9, 27)) -series_timestamp_tz_compare = Benchmark("s <= ts", setup, - start_date=datetime(2012, 2, 21)) - -setup = common_setup + """ -N = 10000 -s = Series(date_range('20010101', periods=N, freq='s', tz='US/Eastern')) -""" - -timestamp_tz_ops_diff1 = Benchmark("s.diff()", setup, - start_date=datetime(2013, 1, 1)) -timestamp_tz_ops_diff2 = Benchmark("s-s.shift()", setup, - start_date=datetime(2013, 1, 1)) diff --git a/vb_suite/categoricals.py b/vb_suite/categoricals.py deleted file mode 100644 index a08d479df20cb..0000000000000 --- a/vb_suite/categoricals.py +++ /dev/null @@ -1,16 +0,0 @@ -from vbench.benchmark import Benchmark -from datetime import datetime - -common_setup = """from .pandas_vb_common import * -""" - -#---------------------------------------------------------------------- -# Series constructors - -setup = common_setup + """ -s = pd.Series(list('aabbcd') * 1000000).astype('category') -""" - -concat_categorical = \ - Benchmark("concat([s, s])", setup=setup, name='concat_categorical', - start_date=datetime(year=2015, month=7, day=15)) diff --git a/vb_suite/ctors.py b/vb_suite/ctors.py deleted file mode 100644 index 8123322383f0a..0000000000000 --- a/vb_suite/ctors.py +++ /dev/null @@ -1,39 +0,0 @@ -from vbench.benchmark import Benchmark -from datetime import datetime - -common_setup = """from .pandas_vb_common import * -""" - -#---------------------------------------------------------------------- -# Series constructors - -setup = common_setup + """ -data = np.random.randn(100) -index = Index(np.arange(100)) -""" - -ctor_series_ndarray = \ - Benchmark("Series(data, index=index)", setup=setup, - name='series_constructor_ndarray') - -setup = common_setup + """ -arr = np.random.randn(100, 100) -""" - -ctor_frame_ndarray = \ - Benchmark("DataFrame(arr)", setup=setup, - name='frame_constructor_ndarray') - -setup = common_setup + """ -data = np.array(['foo', 'bar', 'baz'], dtype=object) -""" - -ctor_index_array_string = Benchmark('Index(data)', setup=setup) - -# index constructors -setup = common_setup + """ -s = Series([Timestamp('20110101'),Timestamp('20120101'),Timestamp('20130101')]*1000) -""" -index_from_series_ctor = Benchmark('Index(s)', setup=setup) - -dtindex_from_series_ctor = Benchmark('DatetimeIndex(s)', setup=setup) diff --git a/vb_suite/eval.py b/vb_suite/eval.py deleted file mode 100644 index 011669256a9bc..0000000000000 --- a/vb_suite/eval.py +++ /dev/null @@ -1,150 +0,0 @@ -from vbench.benchmark import Benchmark -from datetime import datetime - -common_setup = """from .pandas_vb_common import * -import pandas as pd -df = DataFrame(np.random.randn(20000, 100)) -df2 = DataFrame(np.random.randn(20000, 100)) -df3 = DataFrame(np.random.randn(20000, 100)) -df4 = DataFrame(np.random.randn(20000, 100)) -""" - -setup = common_setup + """ -import pandas.core.computation.expressions as expr -expr.set_numexpr_threads(1) -""" - -SECTION = 'Eval' - -#---------------------------------------------------------------------- -# binary ops - -#---------------------------------------------------------------------- -# add -eval_frame_add_all_threads = \ - Benchmark("pd.eval('df + df2 + df3 + df4')", common_setup, - name='eval_frame_add_all_threads', - start_date=datetime(2013, 7, 21)) - - - -eval_frame_add_one_thread = \ - Benchmark("pd.eval('df + df2 + df3 + df4')", setup, - name='eval_frame_add_one_thread', - start_date=datetime(2013, 7, 26)) - -eval_frame_add_python = \ - Benchmark("pd.eval('df + df2 + df3 + df4', engine='python')", common_setup, - name='eval_frame_add_python', start_date=datetime(2013, 7, 21)) - -eval_frame_add_python_one_thread = \ - Benchmark("pd.eval('df + df2 + df3 + df4', engine='python')", setup, - name='eval_frame_add_python_one_thread', - start_date=datetime(2013, 7, 26)) -#---------------------------------------------------------------------- -# mult - -eval_frame_mult_all_threads = \ - Benchmark("pd.eval('df * df2 * df3 * df4')", common_setup, - name='eval_frame_mult_all_threads', - start_date=datetime(2013, 7, 21)) - -eval_frame_mult_one_thread = \ - Benchmark("pd.eval('df * df2 * df3 * df4')", setup, - name='eval_frame_mult_one_thread', - start_date=datetime(2013, 7, 26)) - -eval_frame_mult_python = \ - Benchmark("pd.eval('df * df2 * df3 * df4', engine='python')", - common_setup, - name='eval_frame_mult_python', start_date=datetime(2013, 7, 21)) - -eval_frame_mult_python_one_thread = \ - Benchmark("pd.eval('df * df2 * df3 * df4', engine='python')", setup, - name='eval_frame_mult_python_one_thread', - start_date=datetime(2013, 7, 26)) - -#---------------------------------------------------------------------- -# multi and - -eval_frame_and_all_threads = \ - Benchmark("pd.eval('(df > 0) & (df2 > 0) & (df3 > 0) & (df4 > 0)')", - common_setup, - name='eval_frame_and_all_threads', - start_date=datetime(2013, 7, 21)) - -eval_frame_and_one_thread = \ - Benchmark("pd.eval('(df > 0) & (df2 > 0) & (df3 > 0) & (df4 > 0)')", setup, - name='eval_frame_and_one_thread', - start_date=datetime(2013, 7, 26)) - -eval_frame_and_python = \ - Benchmark("pd.eval('(df > 0) & (df2 > 0) & (df3 > 0) & (df4 > 0)', engine='python')", - common_setup, name='eval_frame_and_python', - start_date=datetime(2013, 7, 21)) - -eval_frame_and_one_thread = \ - Benchmark("pd.eval('(df > 0) & (df2 > 0) & (df3 > 0) & (df4 > 0)', engine='python')", - setup, - name='eval_frame_and_python_one_thread', - start_date=datetime(2013, 7, 26)) - -#-------------------------------------------------------------------- -# chained comp -eval_frame_chained_cmp_all_threads = \ - Benchmark("pd.eval('df < df2 < df3 < df4')", common_setup, - name='eval_frame_chained_cmp_all_threads', - start_date=datetime(2013, 7, 21)) - -eval_frame_chained_cmp_one_thread = \ - Benchmark("pd.eval('df < df2 < df3 < df4')", setup, - name='eval_frame_chained_cmp_one_thread', - start_date=datetime(2013, 7, 26)) - -eval_frame_chained_cmp_python = \ - Benchmark("pd.eval('df < df2 < df3 < df4', engine='python')", - common_setup, name='eval_frame_chained_cmp_python', - start_date=datetime(2013, 7, 26)) - -eval_frame_chained_cmp_one_thread = \ - Benchmark("pd.eval('df < df2 < df3 < df4', engine='python')", setup, - name='eval_frame_chained_cmp_python_one_thread', - start_date=datetime(2013, 7, 26)) - - -common_setup = """from .pandas_vb_common import * -""" - -setup = common_setup + """ -N = 1000000 -halfway = N // 2 - 1 -index = date_range('20010101', periods=N, freq='T') -s = Series(index) -ts = s.iloc[halfway] -""" - -series_setup = setup + """ -df = DataFrame({'dates': s.values}) -""" - -query_datetime_series = Benchmark("df.query('dates < @ts')", - series_setup, - start_date=datetime(2013, 9, 27)) - -index_setup = setup + """ -df = DataFrame({'a': np.random.randn(N)}, index=index) -""" - -query_datetime_index = Benchmark("df.query('index < @ts')", - index_setup, start_date=datetime(2013, 9, 27)) - -setup = setup + """ -N = 1000000 -df = DataFrame({'a': np.random.randn(N)}) -min_val = df['a'].min() -max_val = df['a'].max() -""" - -query_with_boolean_selection = Benchmark("df.query('(a >= @min_val) & (a <= @max_val)')", - setup, start_date=datetime(2013, 9, 27)) - diff --git a/vb_suite/frame_ctor.py b/vb_suite/frame_ctor.py deleted file mode 100644 index 0d57da7b88d3b..0000000000000 --- a/vb_suite/frame_ctor.py +++ /dev/null @@ -1,123 +0,0 @@ -from vbench.benchmark import Benchmark -from datetime import datetime -try: - import pandas.tseries.offsets as offsets -except: - import pandas.core.datetools as offsets - -common_setup = """from .pandas_vb_common import * -try: - from pandas.tseries.offsets import * -except: - from pandas.core.datetools import * -""" - -#---------------------------------------------------------------------- -# Creation from nested dict - -setup = common_setup + """ -N, K = 5000, 50 -index = tm.makeStringIndex(N) -columns = tm.makeStringIndex(K) -frame = DataFrame(np.random.randn(N, K), index=index, columns=columns) - -try: - data = frame.to_dict() -except: - data = frame.toDict() - -some_dict = data.values()[0] -dict_list = [dict(zip(columns, row)) for row in frame.values] -""" - -frame_ctor_nested_dict = Benchmark("DataFrame(data)", setup) - -# From JSON-like stuff -frame_ctor_list_of_dict = Benchmark("DataFrame(dict_list)", setup, - start_date=datetime(2011, 12, 20)) - -series_ctor_from_dict = Benchmark("Series(some_dict)", setup) - -# nested dict, integer indexes, regression described in #621 -setup = common_setup + """ -data = dict((i,dict((j,float(j)) for j in range(100))) for i in xrange(2000)) -""" -frame_ctor_nested_dict_int64 = Benchmark("DataFrame(data)", setup) - -# dynamically generate benchmarks for every offset -# -# get_period_count & get_index_for_offset are there because blindly taking each -# offset times 1000 can easily go out of Timestamp bounds and raise errors. -dynamic_benchmarks = {} -n_steps = [1, 2] -offset_kwargs = {'WeekOfMonth': {'weekday': 1, 'week': 1}, - 'LastWeekOfMonth': {'weekday': 1, 'week': 1}, - 'FY5253': {'startingMonth': 1, 'weekday': 1}, - 'FY5253Quarter': {'qtr_with_extra_week': 1, 'startingMonth': 1, 'weekday': 1}} - -offset_extra_cases = {'FY5253': {'variation': ['nearest', 'last']}, - 'FY5253Quarter': {'variation': ['nearest', 'last']}} - -for offset in offsets.__all__: - for n in n_steps: - kwargs = {} - if offset in offset_kwargs: - kwargs = offset_kwargs[offset] - - if offset in offset_extra_cases: - extras = offset_extra_cases[offset] - else: - extras = {'': ['']} - - for extra_arg in extras: - for extra in extras[extra_arg]: - if extra: - kwargs[extra_arg] = extra - setup = common_setup + """ - -def get_period_count(start_date, off): - ten_offsets_in_days = ((start_date + off * 10) - start_date).days - if ten_offsets_in_days == 0: - return 1000 - else: - return min(9 * ((Timestamp.max - start_date).days // - ten_offsets_in_days), - 1000) - -def get_index_for_offset(off): - start_date = Timestamp('1/1/1900') - return date_range(start_date, - periods=min(1000, get_period_count(start_date, off)), - freq=off) - -idx = get_index_for_offset({}({}, **{})) -df = DataFrame(np.random.randn(len(idx),10), index=idx) -d = dict([ (col,df[col]) for col in df.columns ]) -""".format(offset, n, kwargs) - key = 'frame_ctor_dtindex_{}x{}'.format(offset, n) - if extra: - key += '__{}_{}'.format(extra_arg, extra) - dynamic_benchmarks[key] = Benchmark("DataFrame(d)", setup, name=key) - -# Have to stuff them in globals() so vbench detects them -globals().update(dynamic_benchmarks) - -# from a mi-series -setup = common_setup + """ -mi = MultiIndex.from_tuples([(x,y) for x in range(100) for y in range(100)]) -s = Series(randn(10000), index=mi) -""" -frame_from_series = Benchmark("DataFrame(s)", setup) - -#---------------------------------------------------------------------- -# get_numeric_data - -setup = common_setup + """ -df = DataFrame(randn(10000, 25)) -df['foo'] = 'bar' -df['bar'] = 'baz' -df = df.consolidate() -""" - -frame_get_numeric_data = Benchmark('df._get_numeric_data()', setup, - start_date=datetime(2011, 11, 1)) diff --git a/vb_suite/frame_methods.py b/vb_suite/frame_methods.py deleted file mode 100644 index 46343e9c607fd..0000000000000 --- a/vb_suite/frame_methods.py +++ /dev/null @@ -1,525 +0,0 @@ -from vbench.api import Benchmark -from datetime import datetime - -common_setup = """from .pandas_vb_common import * -""" - -#---------------------------------------------------------------------- -# lookup - -setup = common_setup + """ -df = DataFrame(np.random.randn(10000, 8), columns=list('abcdefgh')) -df['foo'] = 'bar' - -row_labels = list(df.index[::10])[:900] -col_labels = list(df.columns) * 100 -row_labels_all = np.array(list(df.index) * len(df.columns), dtype='object') -col_labels_all = np.array(list(df.columns) * len(df.index), dtype='object') -""" - -frame_fancy_lookup = Benchmark('df.lookup(row_labels, col_labels)', setup, - start_date=datetime(2012, 1, 12)) - -frame_fancy_lookup_all = Benchmark('df.lookup(row_labels_all, col_labels_all)', - setup, - start_date=datetime(2012, 1, 12)) - -#---------------------------------------------------------------------- -# fillna in place - -setup = common_setup + """ -df = DataFrame(randn(10000, 100)) -df.values[::2] = np.nan -""" - -frame_fillna_inplace = Benchmark('df.fillna(0, inplace=True)', setup, - start_date=datetime(2012, 4, 4)) - - -#---------------------------------------------------------------------- -# reindex both axes - -setup = common_setup + """ -df = DataFrame(randn(10000, 10000)) -idx = np.arange(4000, 7000) -""" - -frame_reindex_axis0 = Benchmark('df.reindex(idx)', setup) - -frame_reindex_axis1 = Benchmark('df.reindex(columns=idx)', setup) - -frame_reindex_both_axes = Benchmark('df.reindex(index=idx, columns=idx)', - setup, start_date=datetime(2011, 1, 1)) - -frame_reindex_both_axes_ix = Benchmark('df.ix[idx, idx]', setup, - start_date=datetime(2011, 1, 1)) - -#---------------------------------------------------------------------- -# reindex with upcasts -setup = common_setup + """ -df=DataFrame(dict([(c, { - 0: randint(0, 2, 1000).astype(np.bool_), - 1: randint(0, 1000, 1000).astype(np.int16), - 2: randint(0, 1000, 1000).astype(np.int32), - 3: randint(0, 1000, 1000).astype(np.int64) - }[randint(0, 4)]) for c in range(1000)])) -""" - -frame_reindex_upcast = Benchmark('df.reindex(permutation(range(1200)))', setup) - -#---------------------------------------------------------------------- -# boolean indexing - -setup = common_setup + """ -df = DataFrame(randn(10000, 100)) -bool_arr = np.zeros(10000, dtype=bool) -bool_arr[:1000] = True -""" - -frame_boolean_row_select = Benchmark('df[bool_arr]', setup, - start_date=datetime(2011, 1, 1)) - -#---------------------------------------------------------------------- -# iteritems (monitor no-copying behaviour) - -setup = common_setup + """ -df = DataFrame(randn(10000, 1000)) -df2 = DataFrame(randn(3000,1),columns=['A']) -df3 = DataFrame(randn(3000,1)) - -def f(): - if hasattr(df, '_item_cache'): - df._item_cache.clear() - for name, col in df.iteritems(): - pass - -def g(): - for name, col in df.iteritems(): - pass - -def h(): - for i in range(10000): - df2['A'] - -def j(): - for i in range(10000): - df3[0] - -""" - -# as far back as the earliest test currently in the suite -frame_iteritems = Benchmark('f()', setup, - start_date=datetime(2010, 6, 1)) - -frame_iteritems_cached = Benchmark('g()', setup, - start_date=datetime(2010, 6, 1)) - -frame_getitem_single_column = Benchmark('h()', setup, - start_date=datetime(2010, 6, 1)) - -frame_getitem_single_column2 = Benchmark('j()', setup, - start_date=datetime(2010, 6, 1)) - -#---------------------------------------------------------------------- -# assignment - -setup = common_setup + """ -idx = date_range('1/1/2000', periods=100000, freq='D') -df = DataFrame(randn(100000, 1),columns=['A'],index=idx) -def f(df): - x = df.copy() - x['date'] = x.index -""" - -frame_assign_timeseries_index = Benchmark('f(df)', setup, - start_date=datetime(2013, 10, 1)) - - -#---------------------------------------------------------------------- -# to_string - -setup = common_setup + """ -df = DataFrame(randn(100, 10)) -""" - -frame_to_string_floats = Benchmark('df.to_string()', setup, - start_date=datetime(2010, 6, 1)) - -#---------------------------------------------------------------------- -# to_html - -setup = common_setup + """ -nrows=500 -df = DataFrame(randn(nrows, 10)) -df[0]=period_range("2000","2010",nrows) -df[1]=range(nrows) - -""" - -frame_to_html_mixed = Benchmark('df.to_html()', setup, - start_date=datetime(2011, 11, 18)) - - -# truncated repr_html, single index - -setup = common_setup + """ -nrows=10000 -data=randn(nrows,10) -idx=MultiIndex.from_arrays(np.tile(randn(3,nrows/100),100)) -df=DataFrame(data,index=idx) - -""" - -frame_html_repr_trunc_mi = Benchmark('df._repr_html_()', setup, - start_date=datetime(2013, 11, 25)) - -# truncated repr_html, MultiIndex - -setup = common_setup + """ -nrows=10000 -data=randn(nrows,10) -idx=randn(nrows) -df=DataFrame(data,index=idx) - -""" - -frame_html_repr_trunc_si = Benchmark('df._repr_html_()', setup, - start_date=datetime(2013, 11, 25)) - - -# insert many columns - -setup = common_setup + """ -N = 1000 - -def f(K=500): - df = DataFrame(index=range(N)) - new_col = np.random.randn(N) - for i in range(K): - df[i] = new_col -""" - -frame_insert_500_columns_end = Benchmark('f()', setup, start_date=datetime(2011, 1, 1)) - -setup = common_setup + """ -N = 1000 - -def f(K=100): - df = DataFrame(index=range(N)) - new_col = np.random.randn(N) - for i in range(K): - df.insert(0,i,new_col) -""" - -frame_insert_100_columns_begin = Benchmark('f()', setup, start_date=datetime(2011, 1, 1)) - -#---------------------------------------------------------------------- -# strings methods, #2602 - -setup = common_setup + """ -s = Series(['abcdefg', np.nan]*500000) -""" - -series_string_vector_slice = Benchmark('s.str[:5]', setup, - start_date=datetime(2012, 8, 1)) - -#---------------------------------------------------------------------- -# df.info() and get_dtype_counts() # 2807 - -setup = common_setup + """ -df = pandas.DataFrame(np.random.randn(10,10000)) -""" - -frame_get_dtype_counts = Benchmark('df.get_dtype_counts()', setup, - start_date=datetime(2012, 8, 1)) - -## -setup = common_setup + """ -df = pandas.DataFrame(np.random.randn(10,10000)) -""" - -frame_repr_wide = Benchmark('repr(df)', setup, - start_date=datetime(2012, 8, 1)) - -## -setup = common_setup + """ -df = pandas.DataFrame(np.random.randn(10000, 10)) -""" - -frame_repr_tall = Benchmark('repr(df)', setup, - start_date=datetime(2012, 8, 1)) - -## -setup = common_setup + """ -df = DataFrame(randn(100000, 1)) -""" - -frame_xs_row = Benchmark('df.xs(50000)', setup) - -## -setup = common_setup + """ -df = DataFrame(randn(1,100000)) -""" - -frame_xs_col = Benchmark('df.xs(50000,axis = 1)', setup) - -#---------------------------------------------------------------------- -# nulls/masking - -## masking -setup = common_setup + """ -data = np.random.randn(1000, 500) -df = DataFrame(data) -df = df.where(df > 0) # create nans -bools = df > 0 -mask = isnull(df) -""" - -frame_mask_bools = Benchmark('bools.mask(mask)', setup, - start_date=datetime(2013,1,1)) - -frame_mask_floats = Benchmark('bools.astype(float).mask(mask)', setup, - start_date=datetime(2013,1,1)) - -## isnull -setup = common_setup + """ -data = np.random.randn(1000, 1000) -df = DataFrame(data) -""" -frame_isnull = Benchmark('isnull(df)', setup, - start_date=datetime(2012,1,1)) - -## dropna -dropna_setup = common_setup + """ -data = np.random.randn(10000, 1000) -df = DataFrame(data) -df.ix[50:1000,20:50] = np.nan -df.ix[2000:3000] = np.nan -df.ix[:,60:70] = np.nan -""" -frame_dropna_axis0_any = Benchmark('df.dropna(how="any",axis=0)', dropna_setup, - start_date=datetime(2012,1,1)) -frame_dropna_axis0_all = Benchmark('df.dropna(how="all",axis=0)', dropna_setup, - start_date=datetime(2012,1,1)) - -frame_dropna_axis1_any = Benchmark('df.dropna(how="any",axis=1)', dropna_setup, - start_date=datetime(2012,1,1)) - -frame_dropna_axis1_all = Benchmark('df.dropna(how="all",axis=1)', dropna_setup, - start_date=datetime(2012,1,1)) - -# dropna on mixed dtypes -dropna_mixed_setup = common_setup + """ -data = np.random.randn(10000, 1000) -df = DataFrame(data) -df.ix[50:1000,20:50] = np.nan -df.ix[2000:3000] = np.nan -df.ix[:,60:70] = np.nan -df['foo'] = 'bar' -""" -frame_dropna_axis0_any_mixed_dtypes = Benchmark('df.dropna(how="any",axis=0)', dropna_mixed_setup, - start_date=datetime(2012,1,1)) -frame_dropna_axis0_all_mixed_dtypes = Benchmark('df.dropna(how="all",axis=0)', dropna_mixed_setup, - start_date=datetime(2012,1,1)) - -frame_dropna_axis1_any_mixed_dtypes = Benchmark('df.dropna(how="any",axis=1)', dropna_mixed_setup, - start_date=datetime(2012,1,1)) - -frame_dropna_axis1_all_mixed_dtypes = Benchmark('df.dropna(how="all",axis=1)', dropna_mixed_setup, - start_date=datetime(2012,1,1)) - -## dropna multi -dropna_setup = common_setup + """ -data = np.random.randn(10000, 1000) -df = DataFrame(data) -df.ix[50:1000,20:50] = np.nan -df.ix[2000:3000] = np.nan -df.ix[:,60:70] = np.nan -df.index = MultiIndex.from_tuples(df.index.map(lambda x: (x, x))) -df.columns = MultiIndex.from_tuples(df.columns.map(lambda x: (x, x))) -""" -frame_count_level_axis0_multi = Benchmark('df.count(axis=0, level=1)', dropna_setup, - start_date=datetime(2012,1,1)) - -frame_count_level_axis1_multi = Benchmark('df.count(axis=1, level=1)', dropna_setup, - start_date=datetime(2012,1,1)) - -# dropna on mixed dtypes -dropna_mixed_setup = common_setup + """ -data = np.random.randn(10000, 1000) -df = DataFrame(data) -df.ix[50:1000,20:50] = np.nan -df.ix[2000:3000] = np.nan -df.ix[:,60:70] = np.nan -df['foo'] = 'bar' -df.index = MultiIndex.from_tuples(df.index.map(lambda x: (x, x))) -df.columns = MultiIndex.from_tuples(df.columns.map(lambda x: (x, x))) -""" -frame_count_level_axis0_mixed_dtypes_multi = Benchmark('df.count(axis=0, level=1)', dropna_mixed_setup, - start_date=datetime(2012,1,1)) - -frame_count_level_axis1_mixed_dtypes_multi = Benchmark('df.count(axis=1, level=1)', dropna_mixed_setup, - start_date=datetime(2012,1,1)) - -#---------------------------------------------------------------------- -# apply - -setup = common_setup + """ -s = Series(np.arange(1028.)) -df = DataFrame({ i:s for i in range(1028) }) -""" -frame_apply_user_func = Benchmark('df.apply(lambda x: np.corrcoef(x,s)[0,1])', setup, - name = 'frame_apply_user_func', - start_date=datetime(2012,1,1)) - -setup = common_setup + """ -df = DataFrame(np.random.randn(1000,100)) -""" -frame_apply_lambda_mean = Benchmark('df.apply(lambda x: x.sum())', setup, - name = 'frame_apply_lambda_mean', - start_date=datetime(2012,1,1)) -setup = common_setup + """ -df = DataFrame(np.random.randn(1000,100)) -""" -frame_apply_np_mean = Benchmark('df.apply(np.mean)', setup, - name = 'frame_apply_np_mean', - start_date=datetime(2012,1,1)) - -setup = common_setup + """ -df = DataFrame(np.random.randn(1000,100)) -""" -frame_apply_pass_thru = Benchmark('df.apply(lambda x: x)', setup, - name = 'frame_apply_pass_thru', - start_date=datetime(2012,1,1)) - -setup = common_setup + """ -df = DataFrame(np.random.randn(1000,100)) -""" -frame_apply_axis_1 = Benchmark('df.apply(lambda x: x+1,axis=1)', setup, - name = 'frame_apply_axis_1', - start_date=datetime(2012,1,1)) - -setup = common_setup + """ -df = DataFrame(np.random.randn(1000,3),columns=list('ABC')) -""" -frame_apply_ref_by_name = Benchmark('df.apply(lambda x: x["A"] + x["B"],axis=1)', setup, - name = 'frame_apply_ref_by_name', - start_date=datetime(2012,1,1)) - -#---------------------------------------------------------------------- -# dtypes - -setup = common_setup + """ -df = DataFrame(np.random.randn(1000,1000)) -""" -frame_dtypes = Benchmark('df.dtypes', setup, - start_date=datetime(2012,1,1)) - -#---------------------------------------------------------------------- -# equals -setup = common_setup + """ -def make_pair(frame): - df = frame - df2 = df.copy() - df2.ix[-1,-1] = np.nan - return df, df2 - -def test_equal(name): - df, df2 = pairs[name] - return df.equals(df) - -def test_unequal(name): - df, df2 = pairs[name] - return df.equals(df2) - -float_df = DataFrame(np.random.randn(1000, 1000)) -object_df = DataFrame([['foo']*1000]*1000) -nonunique_cols = object_df.copy() -nonunique_cols.columns = ['A']*len(nonunique_cols.columns) - -pairs = dict([(name, make_pair(frame)) - for name, frame in (('float_df', float_df), ('object_df', object_df), ('nonunique_cols', nonunique_cols))]) -""" -frame_float_equal = Benchmark('test_equal("float_df")', setup) -frame_object_equal = Benchmark('test_equal("object_df")', setup) -frame_nonunique_equal = Benchmark('test_equal("nonunique_cols")', setup) - -frame_float_unequal = Benchmark('test_unequal("float_df")', setup) -frame_object_unequal = Benchmark('test_unequal("object_df")', setup) -frame_nonunique_unequal = Benchmark('test_unequal("nonunique_cols")', setup) - -#----------------------------------------------------------------------------- -# interpolate -# this is the worst case, where every column has NaNs. -setup = common_setup + """ -df = DataFrame(randn(10000, 100)) -df.values[::2] = np.nan -""" - -frame_interpolate = Benchmark('df.interpolate()', setup, - start_date=datetime(2014, 2, 7)) - -setup = common_setup + """ -df = DataFrame({'A': np.arange(0, 10000), - 'B': np.random.randint(0, 100, 10000), - 'C': randn(10000), - 'D': randn(10000)}) -df.loc[1::5, 'A'] = np.nan -df.loc[1::5, 'C'] = np.nan -""" - -frame_interpolate_some_good = Benchmark('df.interpolate()', setup, - start_date=datetime(2014, 2, 7)) -frame_interpolate_some_good_infer = Benchmark('df.interpolate(downcast="infer")', - setup, - start_date=datetime(2014, 2, 7)) - - -#------------------------------------------------------------------------- -# frame shift speedup issue-5609 - -setup = common_setup + """ -df = DataFrame(np.random.rand(10000,500)) -# note: df._data.blocks are f_contigous -""" -frame_shift_axis0 = Benchmark('df.shift(1,axis=0)', setup, - start_date=datetime(2014,1,1)) -frame_shift_axis1 = Benchmark('df.shift(1,axis=1)', setup, - name = 'frame_shift_axis_1', - start_date=datetime(2014,1,1)) - - -#----------------------------------------------------------------------------- -# from_records issue-6700 - -setup = common_setup + """ -def get_data(n=100000): - return ((x, x*20, x*100) for x in range(n)) -""" - -frame_from_records_generator = Benchmark('df = DataFrame.from_records(get_data())', - setup, - name='frame_from_records_generator', - start_date=datetime(2013,10,4)) # issue-4911 - -frame_from_records_generator_nrows = Benchmark('df = DataFrame.from_records(get_data(), nrows=1000)', - setup, - name='frame_from_records_generator_nrows', - start_date=datetime(2013,10,04)) # issue-4911 - -#----------------------------------------------------------------------------- -# duplicated - -setup = common_setup + ''' -n = 1 << 20 - -t = date_range('2015-01-01', freq='S', periods=n // 64) -xs = np.random.randn(n // 64).round(2) - -df = DataFrame({'a':np.random.randint(- 1 << 8, 1 << 8, n), - 'b':np.random.choice(t, n), - 'c':np.random.choice(xs, n)}) -''' - -frame_duplicated = Benchmark('df.duplicated()', setup, - name='frame_duplicated') diff --git a/vb_suite/generate_rst_files.py b/vb_suite/generate_rst_files.py deleted file mode 100644 index 92e7cd4d59b71..0000000000000 --- a/vb_suite/generate_rst_files.py +++ /dev/null @@ -1,2 +0,0 @@ -from suite import benchmarks, generate_rst_files -generate_rst_files(benchmarks) diff --git a/vb_suite/gil.py b/vb_suite/gil.py deleted file mode 100644 index df2bd2dcd8db4..0000000000000 --- a/vb_suite/gil.py +++ /dev/null @@ -1,110 +0,0 @@ -from vbench.api import Benchmark -from datetime import datetime - -common_setup = """from .pandas_vb_common import * -""" - -basic = common_setup + """ -try: - from pandas.util.testing import test_parallel - have_real_test_parallel = True -except ImportError: - have_real_test_parallel = False - def test_parallel(num_threads=1): - def wrapper(fname): - return fname - - return wrapper - -N = 1000000 -ngroups = 1000 -np.random.seed(1234) - -df = DataFrame({'key' : np.random.randint(0,ngroups,size=N), - 'data' : np.random.randn(N) }) - -if not have_real_test_parallel: - raise NotImplementedError -""" - -setup = basic + """ - -def f(): - df.groupby('key')['data'].sum() - -# run consecutivily -def g2(): - for i in range(2): - f() -def g4(): - for i in range(4): - f() -def g8(): - for i in range(8): - f() - -# run in parallel -@test_parallel(num_threads=2) -def pg2(): - f() - -@test_parallel(num_threads=4) -def pg4(): - f() - -@test_parallel(num_threads=8) -def pg8(): - f() - -""" - -nogil_groupby_sum_4 = Benchmark( - 'pg4()', setup, - start_date=datetime(2015, 1, 1)) - -nogil_groupby_sum_8 = Benchmark( - 'pg8()', setup, - start_date=datetime(2015, 1, 1)) - - -#### test all groupby funcs #### - -setup = basic + """ - -@test_parallel(num_threads=2) -def pg2(): - df.groupby('key')['data'].func() - -""" - -for f in ['sum','prod','var','count','min','max','mean','last']: - - name = "nogil_groupby_{f}_2".format(f=f) - bmark = Benchmark('pg2()', setup.replace('func',f), start_date=datetime(2015, 1, 1)) - bmark.name = name - globals()[name] = bmark - -del bmark - - -#### test take_1d #### -setup = basic + """ -from pandas.core import common as com - -N = 1e7 -df = DataFrame({'int64' : np.arange(N,dtype='int64'), - 'float64' : np.arange(N,dtype='float64')}) -indexer = np.arange(100,len(df)-100) - -@test_parallel(num_threads=2) -def take_1d_pg2_int64(): - com.take_1d(df.int64.values,indexer) - -@test_parallel(num_threads=2) -def take_1d_pg2_float64(): - com.take_1d(df.float64.values,indexer) - -""" - -nogil_take1d_float64 = Benchmark('take_1d_pg2_int64()', setup, start_date=datetime(2015, 1, 1)) -nogil_take1d_int64 = Benchmark('take_1d_pg2_float64()', setup, start_date=datetime(2015, 1, 1)) diff --git a/vb_suite/groupby.py b/vb_suite/groupby.py deleted file mode 100644 index 268d71f864823..0000000000000 --- a/vb_suite/groupby.py +++ /dev/null @@ -1,620 +0,0 @@ -from vbench.api import Benchmark -from datetime import datetime - -common_setup = """from .pandas_vb_common import * -""" - -setup = common_setup + """ -N = 100000 -ngroups = 100 - -def get_test_data(ngroups=100, n=100000): - unique_groups = range(ngroups) - arr = np.asarray(np.tile(unique_groups, n / ngroups), dtype=object) - - if len(arr) < n: - arr = np.asarray(list(arr) + unique_groups[:n - len(arr)], - dtype=object) - - random.shuffle(arr) - return arr - -# aggregate multiple columns -df = DataFrame({'key1' : get_test_data(ngroups=ngroups), - 'key2' : get_test_data(ngroups=ngroups), - 'data1' : np.random.randn(N), - 'data2' : np.random.randn(N)}) -def f(): - df.groupby(['key1', 'key2']).agg(lambda x: x.values.sum()) - -simple_series = Series(np.random.randn(N)) -key1 = df['key1'] -""" - -stmt1 = "df.groupby(['key1', 'key2'])['data1'].agg(lambda x: x.values.sum())" -groupby_multi_python = Benchmark(stmt1, setup, - start_date=datetime(2011, 7, 1)) - -stmt3 = "df.groupby(['key1', 'key2']).sum()" -groupby_multi_cython = Benchmark(stmt3, setup, - start_date=datetime(2011, 7, 1)) - -stmt = "df.groupby(['key1', 'key2'])['data1'].agg(np.std)" -groupby_multi_series_op = Benchmark(stmt, setup, - start_date=datetime(2011, 8, 1)) - -groupby_series_simple_cython = \ - Benchmark('simple_series.groupby(key1).sum()', setup, - start_date=datetime(2011, 3, 1)) - - -stmt4 = "df.groupby('key1').rank(pct=True)" -groupby_series_simple_cython = Benchmark(stmt4, setup, - start_date=datetime(2014, 1, 16)) - -#---------------------------------------------------------------------- -# 2d grouping, aggregate many columns - -setup = common_setup + """ -labels = np.random.randint(0, 100, size=1000) -df = DataFrame(randn(1000, 1000)) -""" - -groupby_frame_cython_many_columns = Benchmark( - 'df.groupby(labels).sum()', setup, - start_date=datetime(2011, 8, 1), - logy=True) - -#---------------------------------------------------------------------- -# single key, long, integer key - -setup = common_setup + """ -data = np.random.randn(100000, 1) -labels = np.random.randint(0, 1000, size=100000) -df = DataFrame(data) -""" - -groupby_frame_singlekey_integer = \ - Benchmark('df.groupby(labels).sum()', setup, - start_date=datetime(2011, 8, 1), logy=True) - -#---------------------------------------------------------------------- -# group with different functions per column - -setup = common_setup + """ -fac1 = np.array(['A', 'B', 'C'], dtype='O') -fac2 = np.array(['one', 'two'], dtype='O') - -df = DataFrame({'key1': fac1.take(np.random.randint(0, 3, size=100000)), - 'key2': fac2.take(np.random.randint(0, 2, size=100000)), - 'value1' : np.random.randn(100000), - 'value2' : np.random.randn(100000), - 'value3' : np.random.randn(100000)}) -""" - -groupby_multi_different_functions = \ - Benchmark("""df.groupby(['key1', 'key2']).agg({'value1' : 'mean', - 'value2' : 'var', - 'value3' : 'sum'})""", - setup, start_date=datetime(2011, 9, 1)) - -groupby_multi_different_numpy_functions = \ - Benchmark("""df.groupby(['key1', 'key2']).agg({'value1' : np.mean, - 'value2' : np.var, - 'value3' : np.sum})""", - setup, start_date=datetime(2011, 9, 1)) - -#---------------------------------------------------------------------- -# size() speed - -setup = common_setup + """ -n = 100000 -offsets = np.random.randint(n, size=n).astype('timedelta64[ns]') -dates = np.datetime64('now') + offsets -df = DataFrame({'key1': np.random.randint(0, 500, size=n), - 'key2': np.random.randint(0, 100, size=n), - 'value1' : np.random.randn(n), - 'value2' : np.random.randn(n), - 'value3' : np.random.randn(n), - 'dates' : dates}) -""" - -groupby_multi_size = Benchmark("df.groupby(['key1', 'key2']).size()", - setup, start_date=datetime(2011, 10, 1)) - -groupby_dt_size = Benchmark("df.groupby(['dates']).size()", - setup, start_date=datetime(2011, 10, 1)) - -groupby_dt_timegrouper_size = Benchmark("df.groupby(TimeGrouper(key='dates', freq='M')).size()", - setup, start_date=datetime(2011, 10, 1)) - -#---------------------------------------------------------------------- -# count() speed - -setup = common_setup + """ -n = 10000 -offsets = np.random.randint(n, size=n).astype('timedelta64[ns]') - -dates = np.datetime64('now') + offsets -dates[np.random.rand(n) > 0.5] = np.datetime64('nat') - -offsets[np.random.rand(n) > 0.5] = np.timedelta64('nat') - -value2 = np.random.randn(n) -value2[np.random.rand(n) > 0.5] = np.nan - -obj = np.random.choice(list('ab'), size=n).astype(object) -obj[np.random.randn(n) > 0.5] = np.nan - -df = DataFrame({'key1': np.random.randint(0, 500, size=n), - 'key2': np.random.randint(0, 100, size=n), - 'dates': dates, - 'value2' : value2, - 'value3' : np.random.randn(n), - 'ints': np.random.randint(0, 1000, size=n), - 'obj': obj, - 'offsets': offsets}) -""" - -groupby_multi_count = Benchmark("df.groupby(['key1', 'key2']).count()", - setup, name='groupby_multi_count', - start_date=datetime(2014, 5, 5)) - -setup = common_setup + """ -n = 10000 - -df = DataFrame({'key1': randint(0, 500, size=n), - 'key2': randint(0, 100, size=n), - 'ints': randint(0, 1000, size=n), - 'ints2': randint(0, 1000, size=n)}) -""" - -groupby_int_count = Benchmark("df.groupby(['key1', 'key2']).count()", - setup, name='groupby_int_count', - start_date=datetime(2014, 5, 6)) -#---------------------------------------------------------------------- -# Series.value_counts - -setup = common_setup + """ -s = Series(np.random.randint(0, 1000, size=100000)) -""" - -series_value_counts_int64 = Benchmark('s.value_counts()', setup, - start_date=datetime(2011, 10, 21)) - -# value_counts on lots of strings - -setup = common_setup + """ -K = 1000 -N = 100000 -uniques = tm.makeStringIndex(K).values -s = Series(np.tile(uniques, N // K)) -""" - -series_value_counts_strings = Benchmark('s.value_counts()', setup, - start_date=datetime(2011, 10, 21)) - -#value_counts on float dtype - -setup = common_setup + """ -s = Series(np.random.randint(0, 1000, size=100000)).astype(float) -""" - -series_value_counts_float64 = Benchmark('s.value_counts()', setup, - start_date=datetime(2015, 8, 17)) - -#---------------------------------------------------------------------- -# pivot_table - -setup = common_setup + """ -fac1 = np.array(['A', 'B', 'C'], dtype='O') -fac2 = np.array(['one', 'two'], dtype='O') - -ind1 = np.random.randint(0, 3, size=100000) -ind2 = np.random.randint(0, 2, size=100000) - -df = DataFrame({'key1': fac1.take(ind1), -'key2': fac2.take(ind2), -'key3': fac2.take(ind2), -'value1' : np.random.randn(100000), -'value2' : np.random.randn(100000), -'value3' : np.random.randn(100000)}) -""" - -stmt = "df.pivot_table(index='key1', columns=['key2', 'key3'])" -groupby_pivot_table = Benchmark(stmt, setup, start_date=datetime(2011, 12, 15)) - - -#---------------------------------------------------------------------- -# dict return values - -setup = common_setup + """ -labels = np.arange(1000).repeat(10) -data = Series(randn(len(labels))) -f = lambda x: {'first': x.values[0], 'last': x.values[-1]} -""" - -groupby_apply_dict_return = Benchmark('data.groupby(labels).apply(f)', - setup, start_date=datetime(2011, 12, 15)) - -#---------------------------------------------------------------------- -# First / last functions - -setup = common_setup + """ -labels = np.arange(10000).repeat(10) -data = Series(randn(len(labels))) -data[::3] = np.nan -data[1::3] = np.nan -data2 = Series(randn(len(labels)),dtype='float32') -data2[::3] = np.nan -data2[1::3] = np.nan -labels = labels.take(np.random.permutation(len(labels))) -""" - -groupby_first_float64 = Benchmark('data.groupby(labels).first()', setup, - start_date=datetime(2012, 5, 1)) - -groupby_first_float32 = Benchmark('data2.groupby(labels).first()', setup, - start_date=datetime(2013, 1, 1)) - -groupby_last_float64 = Benchmark('data.groupby(labels).last()', setup, - start_date=datetime(2012, 5, 1)) - -groupby_last_float32 = Benchmark('data2.groupby(labels).last()', setup, - start_date=datetime(2013, 1, 1)) - -groupby_nth_float64_none = Benchmark('data.groupby(labels).nth(0)', setup, - start_date=datetime(2012, 5, 1)) -groupby_nth_float32_none = Benchmark('data2.groupby(labels).nth(0)', setup, - start_date=datetime(2013, 1, 1)) -groupby_nth_float64_any = Benchmark('data.groupby(labels).nth(0,dropna="all")', setup, - start_date=datetime(2012, 5, 1)) -groupby_nth_float32_any = Benchmark('data2.groupby(labels).nth(0,dropna="all")', setup, - start_date=datetime(2013, 1, 1)) - -# with datetimes (GH7555) -setup = common_setup + """ -df = DataFrame({'a' : date_range('1/1/2011',periods=100000,freq='s'),'b' : range(100000)}) -""" - -groupby_first_datetimes = Benchmark('df.groupby("b").first()', setup, - start_date=datetime(2013, 5, 1)) -groupby_last_datetimes = Benchmark('df.groupby("b").last()', setup, - start_date=datetime(2013, 5, 1)) -groupby_nth_datetimes_none = Benchmark('df.groupby("b").nth(0)', setup, - start_date=datetime(2013, 5, 1)) -groupby_nth_datetimes_any = Benchmark('df.groupby("b").nth(0,dropna="all")', setup, - start_date=datetime(2013, 5, 1)) - -# with object -setup = common_setup + """ -df = DataFrame({'a' : ['foo']*100000,'b' : range(100000)}) -""" - -groupby_first_object = Benchmark('df.groupby("b").first()', setup, - start_date=datetime(2013, 5, 1)) -groupby_last_object = Benchmark('df.groupby("b").last()', setup, - start_date=datetime(2013, 5, 1)) -groupby_nth_object_none = Benchmark('df.groupby("b").nth(0)', setup, - start_date=datetime(2013, 5, 1)) -groupby_nth_object_any = Benchmark('df.groupby("b").nth(0,dropna="any")', setup, - start_date=datetime(2013, 5, 1)) - -#---------------------------------------------------------------------- -# groupby_indices replacement, chop up Series - -setup = common_setup + """ -try: - rng = date_range('1/1/2000', '12/31/2005', freq='H') - year, month, day = rng.year, rng.month, rng.day -except: - rng = date_range('1/1/2000', '12/31/2000', offset=datetools.Hour()) - year = rng.map(lambda x: x.year) - month = rng.map(lambda x: x.month) - day = rng.map(lambda x: x.day) - -ts = Series(np.random.randn(len(rng)), index=rng) -""" - -groupby_indices = Benchmark('len(ts.groupby([year, month, day]))', - setup, start_date=datetime(2012, 1, 1)) - -#---------------------------------------------------------------------- -# median - -#---------------------------------------------------------------------- -# single key, long, integer key - -setup = common_setup + """ -data = np.random.randn(100000, 2) -labels = np.random.randint(0, 1000, size=100000) -df = DataFrame(data) -""" - -groupby_frame_median = \ - Benchmark('df.groupby(labels).median()', setup, - start_date=datetime(2011, 8, 1), logy=True) - - -setup = common_setup + """ -data = np.random.randn(1000000, 2) -labels = np.random.randint(0, 1000, size=1000000) -df = DataFrame(data) -""" - -groupby_simple_compress_timing = \ - Benchmark('df.groupby(labels).mean()', setup, - start_date=datetime(2011, 8, 1)) - - -#---------------------------------------------------------------------- -# DataFrame Apply overhead - -setup = common_setup + """ -N = 10000 -labels = np.random.randint(0, 2000, size=N) -labels2 = np.random.randint(0, 3, size=N) -df = DataFrame({'key': labels, -'key2': labels2, -'value1': randn(N), -'value2': ['foo', 'bar', 'baz', 'qux'] * (N / 4)}) -def f(g): - return 1 -""" - -groupby_frame_apply_overhead = Benchmark("df.groupby('key').apply(f)", setup, - start_date=datetime(2011, 10, 1)) - -groupby_frame_apply = Benchmark("df.groupby(['key', 'key2']).apply(f)", setup, - start_date=datetime(2011, 10, 1)) - - -#---------------------------------------------------------------------- -# DataFrame nth - -setup = common_setup + """ -df = DataFrame(np.random.randint(1, 100, (10000, 2))) -""" - -# Not really a fair test as behaviour has changed! -groupby_frame_nth_none = Benchmark("df.groupby(0).nth(0)", setup, - start_date=datetime(2014, 3, 1)) - -groupby_series_nth_none = Benchmark("df[1].groupby(df[0]).nth(0)", setup, - start_date=datetime(2014, 3, 1)) -groupby_frame_nth_any= Benchmark("df.groupby(0).nth(0,dropna='any')", setup, - start_date=datetime(2014, 3, 1)) - -groupby_series_nth_any = Benchmark("df[1].groupby(df[0]).nth(0,dropna='any')", setup, - start_date=datetime(2014, 3, 1)) - - -#---------------------------------------------------------------------- -# Sum booleans #2692 - -setup = common_setup + """ -N = 500 -df = DataFrame({'ii':range(N),'bb':[True for x in range(N)]}) -""" - -groupby_sum_booleans = Benchmark("df.groupby('ii').sum()", setup) - - -#---------------------------------------------------------------------- -# multi-indexed group sum #9049 - -setup = common_setup + """ -N = 50 -df = DataFrame({'A': range(N) * 2, 'B': range(N*2), 'C': 1}).set_index(["A", "B"]) -""" - -groupby_sum_multiindex = Benchmark("df.groupby(level=[0, 1]).sum()", setup) - - -#---------------------------------------------------------------------- -# Transform testing - -setup = common_setup + """ -n_dates = 400 -n_securities = 250 -n_columns = 3 -share_na = 0.1 - -dates = date_range('1997-12-31', periods=n_dates, freq='B') -dates = Index(map(lambda x: x.year * 10000 + x.month * 100 + x.day, dates)) - -secid_min = int('10000000', 16) -secid_max = int('F0000000', 16) -step = (secid_max - secid_min) // (n_securities - 1) -security_ids = map(lambda x: hex(x)[2:10].upper(), range(secid_min, secid_max + 1, step)) - -data_index = MultiIndex(levels=[dates.values, security_ids], - labels=[[i for i in range(n_dates) for _ in xrange(n_securities)], range(n_securities) * n_dates], - names=['date', 'security_id']) -n_data = len(data_index) - -columns = Index(['factor{}'.format(i) for i in range(1, n_columns + 1)]) - -data = DataFrame(np.random.randn(n_data, n_columns), index=data_index, columns=columns) - -step = int(n_data * share_na) -for column_index in range(n_columns): - index = column_index - while index < n_data: - data.set_value(data_index[index], columns[column_index], np.nan) - index += step - -f_fillna = lambda x: x.fillna(method='pad') -""" - -groupby_transform = Benchmark("data.groupby(level='security_id').transform(f_fillna)", setup) -groupby_transform_ufunc = Benchmark("data.groupby(level='date').transform(np.max)", setup) - -setup = common_setup + """ -np.random.seed(0) - -N = 120000 -N_TRANSITIONS = 1400 - -# generate groups -transition_points = np.random.permutation(np.arange(N))[:N_TRANSITIONS] -transition_points.sort() -transitions = np.zeros((N,), dtype=np.bool) -transitions[transition_points] = True -g = transitions.cumsum() - -df = DataFrame({ 'signal' : np.random.rand(N)}) -""" -groupby_transform_series = Benchmark("df['signal'].groupby(g).transform(np.mean)", setup) - -setup = common_setup + """ -np.random.seed(0) - -df=DataFrame( { 'id' : np.arange( 100000 ) / 3, - 'val': np.random.randn( 100000) } ) -""" - -groupby_transform_series2 = Benchmark("df.groupby('id')['val'].transform(np.mean)", setup) - -setup = common_setup + ''' -np.random.seed(2718281) -n = 20000 -df = DataFrame(np.random.randint(1, n, (n, 3)), - columns=['jim', 'joe', 'jolie']) -''' - -stmt = "df.groupby(['jim', 'joe'])['jolie'].transform('max')"; -groupby_transform_multi_key1 = Benchmark(stmt, setup) -groupby_transform_multi_key2 = Benchmark(stmt, setup + "df['jim'] = df['joe']") - -setup = common_setup + ''' -np.random.seed(2718281) -n = 200000 -df = DataFrame(np.random.randint(1, n / 10, (n, 3)), - columns=['jim', 'joe', 'jolie']) -''' -groupby_transform_multi_key3 = Benchmark(stmt, setup) -groupby_transform_multi_key4 = Benchmark(stmt, setup + "df['jim'] = df['joe']") - -setup = common_setup + ''' -np.random.seed(27182) -n = 100000 -df = DataFrame(np.random.randint(1, n / 100, (n, 3)), - columns=['jim', 'joe', 'jolie']) -''' - -groupby_agg_builtins1 = Benchmark("df.groupby('jim').agg([sum, min, max])", setup) -groupby_agg_builtins2 = Benchmark("df.groupby(['jim', 'joe']).agg([sum, min, max])", setup) - - -setup = common_setup + ''' -arr = np.random.randint(- 1 << 12, 1 << 12, (1 << 17, 5)) -i = np.random.choice(len(arr), len(arr) * 5) -arr = np.vstack((arr, arr[i])) # add sume duplicate rows - -i = np.random.permutation(len(arr)) -arr = arr[i] # shuffle rows - -df = DataFrame(arr, columns=list('abcde')) -df['jim'], df['joe'] = np.random.randn(2, len(df)) * 10 -''' - -groupby_int64_overflow = Benchmark("df.groupby(list('abcde')).max()", setup, - name='groupby_int64_overflow') - - -setup = common_setup + ''' -from itertools import product -from string import ascii_letters, digits - -n = 5 * 7 * 11 * (1 << 9) -alpha = list(map(''.join, product(ascii_letters + digits, repeat=4))) -f = lambda k: np.repeat(np.random.choice(alpha, n // k), k) - -df = DataFrame({'a': f(11), 'b': f(7), 'c': f(5), 'd': f(1)}) -df['joe'] = (np.random.randn(len(df)) * 10).round(3) - -i = np.random.permutation(len(df)) -df = df.iloc[i].reset_index(drop=True).copy() -''' - -groupby_multi_index = Benchmark("df.groupby(list('abcd')).max()", setup, - name='groupby_multi_index') - -#---------------------------------------------------------------------- -# groupby with a variable value for ngroups - - -ngroups_list = [100, 10000] -no_arg_func_list = [ - 'all', - 'any', - 'count', - 'cumcount', - 'cummax', - 'cummin', - 'cumprod', - 'cumsum', - 'describe', - 'diff', - 'first', - 'head', - 'last', - 'mad', - 'max', - 'mean', - 'median', - 'min', - 'nunique', - 'pct_change', - 'prod', - 'rank', - 'sem', - 'size', - 'skew', - 'std', - 'sum', - 'tail', - 'unique', - 'var', - 'value_counts', -] - - -_stmt_template = "df.groupby('value')['timestamp'].%s" -_setup_template = common_setup + """ -np.random.seed(1234) -ngroups = %s -size = ngroups * 2 -rng = np.arange(ngroups) -df = DataFrame(dict( - timestamp=rng.take(np.random.randint(0, ngroups, size=size)), - value=np.random.randint(0, size, size=size) -)) -""" -START_DATE = datetime(2011, 7, 1) - - -def make_large_ngroups_bmark(ngroups, func_name, func_args=''): - bmark_name = 'groupby_ngroups_%s_%s' % (ngroups, func_name) - stmt = _stmt_template % ('%s(%s)' % (func_name, func_args)) - setup = _setup_template % ngroups - bmark = Benchmark(stmt, setup, start_date=START_DATE) - # MUST set name - bmark.name = bmark_name - return bmark - - -def inject_bmark_into_globals(bmark): - if not bmark.name: - raise AssertionError('benchmark must have a name') - globals()[bmark.name] = bmark - - -for ngroups in ngroups_list: - for func_name in no_arg_func_list: - bmark = make_large_ngroups_bmark(ngroups, func_name) - inject_bmark_into_globals(bmark) - -# avoid bmark to be collected as Benchmark object -del bmark diff --git a/vb_suite/hdfstore_bench.py b/vb_suite/hdfstore_bench.py deleted file mode 100644 index 393fd4cc77e66..0000000000000 --- a/vb_suite/hdfstore_bench.py +++ /dev/null @@ -1,278 +0,0 @@ -from vbench.api import Benchmark -from datetime import datetime - -start_date = datetime(2012, 7, 1) - -common_setup = """from .pandas_vb_common import * -import os - -f = '__test__.h5' -def remove(f): - try: - os.remove(f) - except: - pass - -""" - -#---------------------------------------------------------------------- -# get from a store - -setup1 = common_setup + """ -index = tm.makeStringIndex(25000) -df = DataFrame({'float1' : randn(25000), - 'float2' : randn(25000)}, - index=index) -remove(f) -store = HDFStore(f) -store.put('df1',df) -""" - -read_store = Benchmark("store.get('df1')", setup1, cleanup="store.close()", - start_date=start_date) - - -#---------------------------------------------------------------------- -# write to a store - -setup2 = common_setup + """ -index = tm.makeStringIndex(25000) -df = DataFrame({'float1' : randn(25000), - 'float2' : randn(25000)}, - index=index) -remove(f) -store = HDFStore(f) -""" - -write_store = Benchmark( - "store.put('df2',df)", setup2, cleanup="store.close()", - start_date=start_date) - -#---------------------------------------------------------------------- -# get from a store (mixed) - -setup3 = common_setup + """ -index = tm.makeStringIndex(25000) -df = DataFrame({'float1' : randn(25000), - 'float2' : randn(25000), - 'string1' : ['foo'] * 25000, - 'bool1' : [True] * 25000, - 'int1' : np.random.randint(0, 250000, size=25000)}, - index=index) -remove(f) -store = HDFStore(f) -store.put('df3',df) -""" - -read_store_mixed = Benchmark( - "store.get('df3')", setup3, cleanup="store.close()", - start_date=start_date) - - -#---------------------------------------------------------------------- -# write to a store (mixed) - -setup4 = common_setup + """ -index = tm.makeStringIndex(25000) -df = DataFrame({'float1' : randn(25000), - 'float2' : randn(25000), - 'string1' : ['foo'] * 25000, - 'bool1' : [True] * 25000, - 'int1' : np.random.randint(0, 250000, size=25000)}, - index=index) -remove(f) -store = HDFStore(f) -""" - -write_store_mixed = Benchmark( - "store.put('df4',df)", setup4, cleanup="store.close()", - start_date=start_date) - -#---------------------------------------------------------------------- -# get from a table (mixed) - -setup5 = common_setup + """ -N=10000 -index = tm.makeStringIndex(N) -df = DataFrame({'float1' : randn(N), - 'float2' : randn(N), - 'string1' : ['foo'] * N, - 'bool1' : [True] * N, - 'int1' : np.random.randint(0, N, size=N)}, - index=index) - -remove(f) -store = HDFStore(f) -store.append('df5',df) -""" - -read_store_table_mixed = Benchmark( - "store.select('df5')", setup5, cleanup="store.close()", - start_date=start_date) - - -#---------------------------------------------------------------------- -# write to a table (mixed) - -setup6 = common_setup + """ -index = tm.makeStringIndex(25000) -df = DataFrame({'float1' : randn(25000), - 'float2' : randn(25000), - 'string1' : ['foo'] * 25000, - 'bool1' : [True] * 25000, - 'int1' : np.random.randint(0, 25000, size=25000)}, - index=index) -remove(f) -store = HDFStore(f) -""" - -write_store_table_mixed = Benchmark( - "store.append('df6',df)", setup6, cleanup="store.close()", - start_date=start_date) - -#---------------------------------------------------------------------- -# select from a table - -setup7 = common_setup + """ -index = tm.makeStringIndex(25000) -df = DataFrame({'float1' : randn(25000), - 'float2' : randn(25000) }, - index=index) - -remove(f) -store = HDFStore(f) -store.append('df7',df) -""" - -read_store_table = Benchmark( - "store.select('df7')", setup7, cleanup="store.close()", - start_date=start_date) - - -#---------------------------------------------------------------------- -# write to a table - -setup8 = common_setup + """ -index = tm.makeStringIndex(25000) -df = DataFrame({'float1' : randn(25000), - 'float2' : randn(25000) }, - index=index) -remove(f) -store = HDFStore(f) -""" - -write_store_table = Benchmark( - "store.append('df8',df)", setup8, cleanup="store.close()", - start_date=start_date) - -#---------------------------------------------------------------------- -# get from a table (wide) - -setup9 = common_setup + """ -df = DataFrame(np.random.randn(25000,100)) - -remove(f) -store = HDFStore(f) -store.append('df9',df) -""" - -read_store_table_wide = Benchmark( - "store.select('df9')", setup9, cleanup="store.close()", - start_date=start_date) - - -#---------------------------------------------------------------------- -# write to a table (wide) - -setup10 = common_setup + """ -df = DataFrame(np.random.randn(25000,100)) - -remove(f) -store = HDFStore(f) -""" - -write_store_table_wide = Benchmark( - "store.append('df10',df)", setup10, cleanup="store.close()", - start_date=start_date) - -#---------------------------------------------------------------------- -# get from a table (wide) - -setup11 = common_setup + """ -index = date_range('1/1/2000', periods = 25000) -df = DataFrame(np.random.randn(25000,100), index = index) - -remove(f) -store = HDFStore(f) -store.append('df11',df) -""" - -query_store_table_wide = Benchmark( - "store.select('df11', [ ('index', '>', df.index[10000]), ('index', '<', df.index[15000]) ])", setup11, cleanup="store.close()", - start_date=start_date) - - -#---------------------------------------------------------------------- -# query from a table - -setup12 = common_setup + """ -index = date_range('1/1/2000', periods = 25000) -df = DataFrame({'float1' : randn(25000), - 'float2' : randn(25000) }, - index=index) - -remove(f) -store = HDFStore(f) -store.append('df12',df) -""" - -query_store_table = Benchmark( - "store.select('df12', [ ('index', '>', df.index[10000]), ('index', '<', df.index[15000]) ])", setup12, cleanup="store.close()", - start_date=start_date) - -#---------------------------------------------------------------------- -# select from a panel table - -setup13 = common_setup + """ -p = Panel(randn(20, 1000, 25), items= [ 'Item%03d' % i for i in range(20) ], - major_axis=date_range('1/1/2000', periods=1000), minor_axis = [ 'E%03d' % i for i in range(25) ]) - -remove(f) -store = HDFStore(f) -store.append('p1',p) -""" - -read_store_table_panel = Benchmark( - "store.select('p1')", setup13, cleanup="store.close()", - start_date=start_date) - - -#---------------------------------------------------------------------- -# write to a panel table - -setup14 = common_setup + """ -p = Panel(randn(20, 1000, 25), items= [ 'Item%03d' % i for i in range(20) ], - major_axis=date_range('1/1/2000', periods=1000), minor_axis = [ 'E%03d' % i for i in range(25) ]) - -remove(f) -store = HDFStore(f) -""" - -write_store_table_panel = Benchmark( - "store.append('p2',p)", setup14, cleanup="store.close()", - start_date=start_date) - -#---------------------------------------------------------------------- -# write to a table (data_columns) - -setup15 = common_setup + """ -df = DataFrame(np.random.randn(10000,10),columns = [ 'C%03d' % i for i in range(10) ]) - -remove(f) -store = HDFStore(f) -""" - -write_store_table_dc = Benchmark( - "store.append('df15',df,data_columns=True)", setup15, cleanup="store.close()", - start_date=start_date) - diff --git a/vb_suite/index_object.py b/vb_suite/index_object.py deleted file mode 100644 index 2ab2bc15f3853..0000000000000 --- a/vb_suite/index_object.py +++ /dev/null @@ -1,173 +0,0 @@ -from vbench.benchmark import Benchmark -from datetime import datetime - -SECTION = "Index / MultiIndex objects" - - -common_setup = """from .pandas_vb_common import * -""" - -#---------------------------------------------------------------------- -# intersection, union - -setup = common_setup + """ -rng = DatetimeIndex(start='1/1/2000', periods=10000, freq=datetools.Minute()) -if rng.dtype == object: - rng = rng.view(Index) -else: - rng = rng.asobject -rng2 = rng[:-1] -""" - -index_datetime_intersection = Benchmark("rng.intersection(rng2)", setup) -index_datetime_union = Benchmark("rng.union(rng2)", setup) - -setup = common_setup + """ -rng = date_range('1/1/2000', periods=10000, freq='T') -rng2 = rng[:-1] -""" - -datetime_index_intersection = Benchmark("rng.intersection(rng2)", setup, - start_date=datetime(2013, 9, 27)) -datetime_index_union = Benchmark("rng.union(rng2)", setup, - start_date=datetime(2013, 9, 27)) - -# integers -setup = common_setup + """ -N = 1000000 -options = np.arange(N) - -left = Index(options.take(np.random.permutation(N)[:N // 2])) -right = Index(options.take(np.random.permutation(N)[:N // 2])) -""" - -index_int64_union = Benchmark('left.union(right)', setup, - start_date=datetime(2011, 1, 1)) - -index_int64_intersection = Benchmark('left.intersection(right)', setup, - start_date=datetime(2011, 1, 1)) - -#---------------------------------------------------------------------- -# string index slicing -setup = common_setup + """ -idx = tm.makeStringIndex(1000000) - -mask = np.arange(1000000) % 3 == 0 -series_mask = Series(mask) -""" -index_str_slice_indexer_basic = Benchmark('idx[:-1]', setup) -index_str_slice_indexer_even = Benchmark('idx[::2]', setup) -index_str_boolean_indexer = Benchmark('idx[mask]', setup) -index_str_boolean_series_indexer = Benchmark('idx[series_mask]', setup) - -#---------------------------------------------------------------------- -# float64 index -#---------------------------------------------------------------------- -# construction -setup = common_setup + """ -baseidx = np.arange(1e6) -""" - -index_float64_construct = Benchmark('Index(baseidx)', setup, - name='index_float64_construct', - start_date=datetime(2014, 4, 13)) - -setup = common_setup + """ -idx = tm.makeFloatIndex(1000000) - -mask = np.arange(idx.size) % 3 == 0 -series_mask = Series(mask) -""" -#---------------------------------------------------------------------- -# getting -index_float64_get = Benchmark('idx[1]', setup, name='index_float64_get', - start_date=datetime(2014, 4, 13)) - - -#---------------------------------------------------------------------- -# slicing -index_float64_slice_indexer_basic = Benchmark('idx[:-1]', setup, - name='index_float64_slice_indexer_basic', - start_date=datetime(2014, 4, 13)) -index_float64_slice_indexer_even = Benchmark('idx[::2]', setup, - name='index_float64_slice_indexer_even', - start_date=datetime(2014, 4, 13)) -index_float64_boolean_indexer = Benchmark('idx[mask]', setup, - name='index_float64_boolean_indexer', - start_date=datetime(2014, 4, 13)) -index_float64_boolean_series_indexer = Benchmark('idx[series_mask]', setup, - name='index_float64_boolean_series_indexer', - start_date=datetime(2014, 4, 13)) - -#---------------------------------------------------------------------- -# arith ops -index_float64_mul = Benchmark('idx * 2', setup, name='index_float64_mul', - start_date=datetime(2014, 4, 13)) -index_float64_div = Benchmark('idx / 2', setup, name='index_float64_div', - start_date=datetime(2014, 4, 13)) - - -# Constructing MultiIndex from cartesian product of iterables -# - -setup = common_setup + """ -iterables = [tm.makeStringIndex(10000), range(20)] -""" - -multiindex_from_product = Benchmark('MultiIndex.from_product(iterables)', - setup, name='multiindex_from_product', - start_date=datetime(2014, 6, 30)) - -#---------------------------------------------------------------------- -# MultiIndex with DatetimeIndex level - -setup = common_setup + """ -level1 = range(1000) -level2 = date_range(start='1/1/2012', periods=100) -mi = MultiIndex.from_product([level1, level2]) -""" - -multiindex_with_datetime_level_full = \ - Benchmark("mi.copy().values", setup, - name='multiindex_with_datetime_level_full', - start_date=datetime(2014, 10, 11)) - - -multiindex_with_datetime_level_sliced = \ - Benchmark("mi[:10].values", setup, - name='multiindex_with_datetime_level_sliced', - start_date=datetime(2014, 10, 11)) - -# multi-index duplicated -setup = common_setup + """ -n, k = 200, 5000 -levels = [np.arange(n), tm.makeStringIndex(n).values, 1000 + np.arange(n)] -labels = [np.random.choice(n, k * n) for lev in levels] -mi = MultiIndex(levels=levels, labels=labels) -""" - -multiindex_duplicated = Benchmark('mi.duplicated()', setup, - name='multiindex_duplicated') - -#---------------------------------------------------------------------- -# repr - -setup = common_setup + """ -dr = pd.date_range('20000101', freq='D', periods=100000) -""" - -datetime_index_repr = \ - Benchmark("dr._is_dates_only", setup, - start_date=datetime(2012, 1, 11)) - -setup = common_setup + """ -n = 3 * 5 * 7 * 11 * (1 << 10) -low, high = - 1 << 12, 1 << 12 -f = lambda k: np.repeat(np.random.randint(low, high, n // k), k) - -i = np.random.permutation(n) -mi = MultiIndex.from_arrays([f(11), f(7), f(5), f(3), f(1)])[i] -""" - -multiindex_sortlevel_int64 = Benchmark('mi.sortlevel()', setup, - name='multiindex_sortlevel_int64') diff --git a/vb_suite/indexing.py b/vb_suite/indexing.py deleted file mode 100644 index ff634bf2a8fc7..0000000000000 --- a/vb_suite/indexing.py +++ /dev/null @@ -1,292 +0,0 @@ -from vbench.benchmark import Benchmark -from datetime import datetime - -SECTION = 'Indexing and scalar value access' - -common_setup = """from .pandas_vb_common import * -""" - -#---------------------------------------------------------------------- -# Series.__getitem__, get_value, __getitem__(slice) - -setup = common_setup + """ -tm.N = 1000 -ts = tm.makeTimeSeries() -dt = ts.index[500] -""" -statement = "ts[dt]" -bm_getitem = Benchmark(statement, setup, ncalls=100000, - name='time_series_getitem_scalar') - -setup = common_setup + """ -index = tm.makeStringIndex(1000) -s = Series(np.random.rand(1000), index=index) -idx = index[100] -""" -statement = "s.get_value(idx)" -bm_get_value = Benchmark(statement, setup, - name='series_get_value', - start_date=datetime(2011, 11, 12)) - - -setup = common_setup + """ -index = tm.makeStringIndex(1000000) -s = Series(np.random.rand(1000000), index=index) -""" -series_getitem_pos_slice = Benchmark("s[:800000]", setup, - name="series_getitem_pos_slice") - - -setup = common_setup + """ -index = tm.makeStringIndex(1000000) -s = Series(np.random.rand(1000000), index=index) -lbl = s.index[800000] -""" -series_getitem_label_slice = Benchmark("s[:lbl]", setup, - name="series_getitem_label_slice") - - -#---------------------------------------------------------------------- -# DataFrame __getitem__ - -setup = common_setup + """ -index = tm.makeStringIndex(1000) -columns = tm.makeStringIndex(30) -df = DataFrame(np.random.rand(1000, 30), index=index, - columns=columns) -idx = index[100] -col = columns[10] -""" -statement = "df[col][idx]" -bm_df_getitem = Benchmark(statement, setup, - name='dataframe_getitem_scalar') - -setup = common_setup + """ -try: - klass = DataMatrix -except: - klass = DataFrame - -index = tm.makeStringIndex(1000) -columns = tm.makeStringIndex(30) -df = klass(np.random.rand(1000, 30), index=index, columns=columns) -idx = index[100] -col = columns[10] -""" -statement = "df[col][idx]" -bm_df_getitem2 = Benchmark(statement, setup, - name='datamatrix_getitem_scalar') - - -#---------------------------------------------------------------------- -# ix get scalar - -setup = common_setup + """ -index = tm.makeStringIndex(1000) -columns = tm.makeStringIndex(30) -df = DataFrame(np.random.randn(1000, 30), index=index, columns=columns) -idx = index[100] -col = columns[10] -""" - -indexing_frame_get_value_ix = Benchmark("df.ix[idx,col]", setup, - name='indexing_frame_get_value_ix', - start_date=datetime(2011, 11, 12)) - -indexing_frame_get_value = Benchmark("df.get_value(idx,col)", setup, - name='indexing_frame_get_value', - start_date=datetime(2011, 11, 12)) - -setup = common_setup + """ -mi = MultiIndex.from_tuples([(x,y) for x in range(1000) for y in range(1000)]) -s = Series(np.random.randn(1000000), index=mi) -""" - -series_xs_mi_ix = Benchmark("s.ix[999]", setup, - name='series_xs_mi_ix', - start_date=datetime(2013, 1, 1)) - -setup = common_setup + """ -mi = MultiIndex.from_tuples([(x,y) for x in range(1000) for y in range(1000)]) -s = Series(np.random.randn(1000000), index=mi) -df = DataFrame(s) -""" - -frame_xs_mi_ix = Benchmark("df.ix[999]", setup, - name='frame_xs_mi_ix', - start_date=datetime(2013, 1, 1)) - -#---------------------------------------------------------------------- -# Boolean DataFrame row selection - -setup = common_setup + """ -df = DataFrame(np.random.randn(10000, 4), columns=['A', 'B', 'C', 'D']) -indexer = df['B'] > 0 -obj_indexer = indexer.astype('O') -""" -indexing_dataframe_boolean_rows = \ - Benchmark("df[indexer]", setup, name='indexing_dataframe_boolean_rows') - -indexing_dataframe_boolean_rows_object = \ - Benchmark("df[obj_indexer]", setup, - name='indexing_dataframe_boolean_rows_object') - -setup = common_setup + """ -df = DataFrame(np.random.randn(50000, 100)) -df2 = DataFrame(np.random.randn(50000, 100)) -""" -indexing_dataframe_boolean = \ - Benchmark("df > df2", setup, name='indexing_dataframe_boolean', - start_date=datetime(2012, 1, 1)) - -setup = common_setup + """ -try: - import pandas.core.computation.expressions as expr -except: - expr = None - -if expr is None: - raise NotImplementedError -df = DataFrame(np.random.randn(50000, 100)) -df2 = DataFrame(np.random.randn(50000, 100)) -expr.set_numexpr_threads(1) -""" - -indexing_dataframe_boolean_st = \ - Benchmark("df > df2", setup, name='indexing_dataframe_boolean_st',cleanup="expr.set_numexpr_threads()", - start_date=datetime(2013, 2, 26)) - - -setup = common_setup + """ -try: - import pandas.core.computation.expressions as expr -except: - expr = None - -if expr is None: - raise NotImplementedError -df = DataFrame(np.random.randn(50000, 100)) -df2 = DataFrame(np.random.randn(50000, 100)) -expr.set_use_numexpr(False) -""" - -indexing_dataframe_boolean_no_ne = \ - Benchmark("df > df2", setup, name='indexing_dataframe_boolean_no_ne',cleanup="expr.set_use_numexpr(True)", - start_date=datetime(2013, 2, 26)) -#---------------------------------------------------------------------- -# MultiIndex sortlevel - -setup = common_setup + """ -a = np.repeat(np.arange(100), 1000) -b = np.tile(np.arange(1000), 100) -midx = MultiIndex.from_arrays([a, b]) -midx = midx.take(np.random.permutation(np.arange(100000))) -""" -sort_level_zero = Benchmark("midx.sortlevel(0)", setup, - start_date=datetime(2012, 1, 1)) -sort_level_one = Benchmark("midx.sortlevel(1)", setup, - start_date=datetime(2012, 1, 1)) - -#---------------------------------------------------------------------- -# Panel subset selection - -setup = common_setup + """ -p = Panel(np.random.randn(100, 100, 100)) -inds = range(0, 100, 10) -""" - -indexing_panel_subset = Benchmark('p.ix[inds, inds, inds]', setup, - start_date=datetime(2012, 1, 1)) - -#---------------------------------------------------------------------- -# Iloc - -setup = common_setup + """ -df = DataFrame({'A' : [0.1] * 3000, 'B' : [1] * 3000}) -idx = np.array(range(30)) * 99 -df2 = DataFrame({'A' : [0.1] * 1000, 'B' : [1] * 1000}) -df2 = concat([df2, 2*df2, 3*df2]) -""" - -frame_iloc_dups = Benchmark('df2.iloc[idx]', setup, - start_date=datetime(2013, 1, 1)) - -frame_loc_dups = Benchmark('df2.loc[idx]', setup, - start_date=datetime(2013, 1, 1)) - -setup = common_setup + """ -df = DataFrame(dict( A = [ 'foo'] * 1000000)) -""" - -frame_iloc_big = Benchmark('df.iloc[:100,0]', setup, - start_date=datetime(2013, 1, 1)) - -#---------------------------------------------------------------------- -# basic tests for [], .loc[], .iloc[] and .ix[] - -setup = common_setup + """ -s = Series(np.random.rand(1000000)) -""" - -series_getitem_scalar = Benchmark("s[800000]", setup) -series_getitem_slice = Benchmark("s[:800000]", setup) -series_getitem_list_like = Benchmark("s[[800000]]", setup) -series_getitem_array = Benchmark("s[np.arange(10000)]", setup) - -series_loc_scalar = Benchmark("s.loc[800000]", setup) -series_loc_slice = Benchmark("s.loc[:800000]", setup) -series_loc_list_like = Benchmark("s.loc[[800000]]", setup) -series_loc_array = Benchmark("s.loc[np.arange(10000)]", setup) - -series_iloc_scalar = Benchmark("s.iloc[800000]", setup) -series_iloc_slice = Benchmark("s.iloc[:800000]", setup) -series_iloc_list_like = Benchmark("s.iloc[[800000]]", setup) -series_iloc_array = Benchmark("s.iloc[np.arange(10000)]", setup) - -series_ix_scalar = Benchmark("s.ix[800000]", setup) -series_ix_slice = Benchmark("s.ix[:800000]", setup) -series_ix_list_like = Benchmark("s.ix[[800000]]", setup) -series_ix_array = Benchmark("s.ix[np.arange(10000)]", setup) - - -# multi-index slicing -setup = common_setup + """ -np.random.seed(1234) -idx=pd.IndexSlice -n=100000 -mdt = pandas.DataFrame() -mdt['A'] = np.random.choice(range(10000,45000,1000), n) -mdt['B'] = np.random.choice(range(10,400), n) -mdt['C'] = np.random.choice(range(1,150), n) -mdt['D'] = np.random.choice(range(10000,45000), n) -mdt['x'] = np.random.choice(range(400), n) -mdt['y'] = np.random.choice(range(25), n) - - -test_A = 25000 -test_B = 25 -test_C = 40 -test_D = 35000 - -eps_A = 5000 -eps_B = 5 -eps_C = 5 -eps_D = 5000 -mdt2 = mdt.set_index(['A','B','C','D']).sortlevel() -""" - -multiindex_slicers = Benchmark('mdt2.loc[idx[test_A-eps_A:test_A+eps_A,test_B-eps_B:test_B+eps_B,test_C-eps_C:test_C+eps_C,test_D-eps_D:test_D+eps_D],:]', setup, - start_date=datetime(2015, 1, 1)) - -#---------------------------------------------------------------------- -# take - -setup = common_setup + """ -s = Series(np.random.rand(100000)) -ts = Series(np.random.rand(100000), - index=date_range('2011-01-01', freq='S', periods=100000)) -indexer = [True, False, True, True, False] * 20000 -""" - -series_take_intindex = Benchmark("s.take(indexer)", setup) -series_take_dtindex = Benchmark("ts.take(indexer)", setup) diff --git a/vb_suite/inference.py b/vb_suite/inference.py deleted file mode 100644 index aaa51aa5163ce..0000000000000 --- a/vb_suite/inference.py +++ /dev/null @@ -1,36 +0,0 @@ -from vbench.api import Benchmark -from datetime import datetime -import sys - -# from GH 7332 - -setup = """from .pandas_vb_common import * -import pandas as pd -N = 500000 -df_int64 = DataFrame(dict(A = np.arange(N,dtype='int64'), B = np.arange(N,dtype='int64'))) -df_int32 = DataFrame(dict(A = np.arange(N,dtype='int32'), B = np.arange(N,dtype='int32'))) -df_uint32 = DataFrame(dict(A = np.arange(N,dtype='uint32'), B = np.arange(N,dtype='uint32'))) -df_float64 = DataFrame(dict(A = np.arange(N,dtype='float64'), B = np.arange(N,dtype='float64'))) -df_float32 = DataFrame(dict(A = np.arange(N,dtype='float32'), B = np.arange(N,dtype='float32'))) -df_datetime64 = DataFrame(dict(A = pd.to_datetime(np.arange(N,dtype='int64'),unit='ms'), - B = pd.to_datetime(np.arange(N,dtype='int64'),unit='ms'))) -df_timedelta64 = DataFrame(dict(A = df_datetime64['A']-df_datetime64['B'], - B = df_datetime64['B'])) -""" - -dtype_infer_int64 = Benchmark('df_int64["A"] + df_int64["B"]', setup, - start_date=datetime(2014, 1, 1)) -dtype_infer_int32 = Benchmark('df_int32["A"] + df_int32["B"]', setup, - start_date=datetime(2014, 1, 1)) -dtype_infer_uint32 = Benchmark('df_uint32["A"] + df_uint32["B"]', setup, - start_date=datetime(2014, 1, 1)) -dtype_infer_float64 = Benchmark('df_float64["A"] + df_float64["B"]', setup, - start_date=datetime(2014, 1, 1)) -dtype_infer_float32 = Benchmark('df_float32["A"] + df_float32["B"]', setup, - start_date=datetime(2014, 1, 1)) -dtype_infer_datetime64 = Benchmark('df_datetime64["A"] - df_datetime64["B"]', setup, - start_date=datetime(2014, 1, 1)) -dtype_infer_timedelta64_1 = Benchmark('df_timedelta64["A"] + df_timedelta64["B"]', setup, - start_date=datetime(2014, 1, 1)) -dtype_infer_timedelta64_2 = Benchmark('df_timedelta64["A"] + df_timedelta64["A"]', setup, - start_date=datetime(2014, 1, 1)) diff --git a/vb_suite/io_bench.py b/vb_suite/io_bench.py deleted file mode 100644 index af5f6076515cc..0000000000000 --- a/vb_suite/io_bench.py +++ /dev/null @@ -1,150 +0,0 @@ -from vbench.api import Benchmark -from datetime import datetime - -common_setup = """from .pandas_vb_common import * -from io import StringIO -""" - -#---------------------------------------------------------------------- -# read_csv - -setup1 = common_setup + """ -index = tm.makeStringIndex(10000) -df = DataFrame({'float1' : randn(10000), - 'float2' : randn(10000), - 'string1' : ['foo'] * 10000, - 'bool1' : [True] * 10000, - 'int1' : np.random.randint(0, 100000, size=10000)}, - index=index) -df.to_csv('__test__.csv') -""" - -read_csv_standard = Benchmark("read_csv('__test__.csv')", setup1, - start_date=datetime(2011, 9, 15)) - -#---------------------------------- -# skiprows - -setup1 = common_setup + """ -index = tm.makeStringIndex(20000) -df = DataFrame({'float1' : randn(20000), - 'float2' : randn(20000), - 'string1' : ['foo'] * 20000, - 'bool1' : [True] * 20000, - 'int1' : np.random.randint(0, 200000, size=20000)}, - index=index) -df.to_csv('__test__.csv') -""" - -read_csv_skiprows = Benchmark("read_csv('__test__.csv', skiprows=10000)", setup1, - start_date=datetime(2011, 9, 15)) - -#---------------------------------------------------------------------- -# write_csv - -setup2 = common_setup + """ -index = tm.makeStringIndex(10000) -df = DataFrame({'float1' : randn(10000), - 'float2' : randn(10000), - 'string1' : ['foo'] * 10000, - 'bool1' : [True] * 10000, - 'int1' : np.random.randint(0, 100000, size=10000)}, - index=index) -""" - -write_csv_standard = Benchmark("df.to_csv('__test__.csv')", setup2, - start_date=datetime(2011, 9, 15)) - -#---------------------------------- -setup = common_setup + """ -df = DataFrame(np.random.randn(3000, 30)) -""" -frame_to_csv = Benchmark("df.to_csv('__test__.csv')", setup, - start_date=datetime(2011, 1, 1)) -#---------------------------------- - -setup = common_setup + """ -df=DataFrame({'A':range(50000)}) -df['B'] = df.A + 1.0 -df['C'] = df.A + 2.0 -df['D'] = df.A + 3.0 -""" -frame_to_csv2 = Benchmark("df.to_csv('__test__.csv')", setup, - start_date=datetime(2011, 1, 1)) - -#---------------------------------- -setup = common_setup + """ -from pandas import concat, Timestamp - -def create_cols(name): - return [ "%s%03d" % (name,i) for i in range(5) ] -df_float = DataFrame(np.random.randn(5000, 5),dtype='float64',columns=create_cols('float')) -df_int = DataFrame(np.random.randn(5000, 5),dtype='int64',columns=create_cols('int')) -df_bool = DataFrame(True,index=df_float.index,columns=create_cols('bool')) -df_object = DataFrame('foo',index=df_float.index,columns=create_cols('object')) -df_dt = DataFrame(Timestamp('20010101'),index=df_float.index,columns=create_cols('date')) - -# add in some nans -df_float.ix[30:500,1:3] = np.nan - -df = concat([ df_float, df_int, df_bool, df_object, df_dt ], axis=1) - -""" -frame_to_csv_mixed = Benchmark("df.to_csv('__test__.csv')", setup, - start_date=datetime(2012, 6, 1)) - -#---------------------------------------------------------------------- -# parse dates, ISO8601 format - -setup = common_setup + """ -rng = date_range('1/1/2000', periods=1000) -data = '\\n'.join(rng.map(lambda x: x.strftime("%Y-%m-%d %H:%M:%S"))) -""" - -stmt = ("read_csv(StringIO(data), header=None, names=['foo'], " - " parse_dates=['foo'])") -read_parse_dates_iso8601 = Benchmark(stmt, setup, - start_date=datetime(2012, 3, 1)) - -setup = common_setup + """ -rng = date_range('1/1/2000', periods=1000) -data = DataFrame(rng, index=rng) -""" - -stmt = ("data.to_csv('__test__.csv', date_format='%Y%m%d')") - -frame_to_csv_date_formatting = Benchmark(stmt, setup, - start_date=datetime(2013, 9, 1)) - -#---------------------------------------------------------------------- -# infer datetime format - -setup = common_setup + """ -rng = date_range('1/1/2000', periods=1000) -data = '\\n'.join(rng.map(lambda x: x.strftime("%Y-%m-%d %H:%M:%S"))) -""" - -stmt = ("read_csv(StringIO(data), header=None, names=['foo'], " - " parse_dates=['foo'], infer_datetime_format=True)") - -read_csv_infer_datetime_format_iso8601 = Benchmark(stmt, setup) - -setup = common_setup + """ -rng = date_range('1/1/2000', periods=1000) -data = '\\n'.join(rng.map(lambda x: x.strftime("%Y%m%d"))) -""" - -stmt = ("read_csv(StringIO(data), header=None, names=['foo'], " - " parse_dates=['foo'], infer_datetime_format=True)") - -read_csv_infer_datetime_format_ymd = Benchmark(stmt, setup) - -setup = common_setup + """ -rng = date_range('1/1/2000', periods=1000) -data = '\\n'.join(rng.map(lambda x: x.strftime("%m/%d/%Y %H:%M:%S.%f"))) -""" - -stmt = ("read_csv(StringIO(data), header=None, names=['foo'], " - " parse_dates=['foo'], infer_datetime_format=True)") - -read_csv_infer_datetime_format_custom = Benchmark(stmt, setup) diff --git a/vb_suite/io_sql.py b/vb_suite/io_sql.py deleted file mode 100644 index ba8367e7e356b..0000000000000 --- a/vb_suite/io_sql.py +++ /dev/null @@ -1,126 +0,0 @@ -from vbench.api import Benchmark -from datetime import datetime - -common_setup = """from .pandas_vb_common import * -import sqlite3 -import sqlalchemy -from sqlalchemy import create_engine - -engine = create_engine('sqlite:///:memory:') -con = sqlite3.connect(':memory:') -""" - -sdate = datetime(2014, 6, 1) - - -#------------------------------------------------------------------------------- -# to_sql - -setup = common_setup + """ -index = tm.makeStringIndex(10000) -df = DataFrame({'float1' : randn(10000), - 'float2' : randn(10000), - 'string1' : ['foo'] * 10000, - 'bool1' : [True] * 10000, - 'int1' : np.random.randint(0, 100000, size=10000)}, - index=index) -""" - -sql_write_sqlalchemy = Benchmark("df.to_sql('test1', engine, if_exists='replace')", - setup, start_date=sdate) - -sql_write_fallback = Benchmark("df.to_sql('test1', con, if_exists='replace')", - setup, start_date=sdate) - - -#------------------------------------------------------------------------------- -# read_sql - -setup = common_setup + """ -index = tm.makeStringIndex(10000) -df = DataFrame({'float1' : randn(10000), - 'float2' : randn(10000), - 'string1' : ['foo'] * 10000, - 'bool1' : [True] * 10000, - 'int1' : np.random.randint(0, 100000, size=10000)}, - index=index) -df.to_sql('test2', engine, if_exists='replace') -df.to_sql('test2', con, if_exists='replace') -""" - -sql_read_query_sqlalchemy = Benchmark("read_sql_query('SELECT * FROM test2', engine)", - setup, start_date=sdate) - -sql_read_query_fallback = Benchmark("read_sql_query('SELECT * FROM test2', con)", - setup, start_date=sdate) - -sql_read_table_sqlalchemy = Benchmark("read_sql_table('test2', engine)", - setup, start_date=sdate) - - -#------------------------------------------------------------------------------- -# type specific write - -setup = common_setup + """ -df = DataFrame({'float' : randn(10000), - 'string' : ['foo'] * 10000, - 'bool' : [True] * 10000, - 'datetime' : date_range('2000-01-01', periods=10000, freq='s')}) -df.loc[1000:3000, 'float'] = np.nan -""" - -sql_float_write_sqlalchemy = \ - Benchmark("df[['float']].to_sql('test_float', engine, if_exists='replace')", - setup, start_date=sdate) - -sql_float_write_fallback = \ - Benchmark("df[['float']].to_sql('test_float', con, if_exists='replace')", - setup, start_date=sdate) - -sql_string_write_sqlalchemy = \ - Benchmark("df[['string']].to_sql('test_string', engine, if_exists='replace')", - setup, start_date=sdate) - -sql_string_write_fallback = \ - Benchmark("df[['string']].to_sql('test_string', con, if_exists='replace')", - setup, start_date=sdate) - -sql_datetime_write_sqlalchemy = \ - Benchmark("df[['datetime']].to_sql('test_datetime', engine, if_exists='replace')", - setup, start_date=sdate) - -#sql_datetime_write_fallback = \ -# Benchmark("df[['datetime']].to_sql('test_datetime', con, if_exists='replace')", -# setup3, start_date=sdate) - -#------------------------------------------------------------------------------- -# type specific read - -setup = common_setup + """ -df = DataFrame({'float' : randn(10000), - 'datetime' : date_range('2000-01-01', periods=10000, freq='s')}) -df['datetime_string'] = df['datetime'].map(str) - -df.to_sql('test_type', engine, if_exists='replace') -df[['float', 'datetime_string']].to_sql('test_type', con, if_exists='replace') -""" - -sql_float_read_query_sqlalchemy = \ - Benchmark("read_sql_query('SELECT float FROM test_type', engine)", - setup, start_date=sdate) - -sql_float_read_table_sqlalchemy = \ - Benchmark("read_sql_table('test_type', engine, columns=['float'])", - setup, start_date=sdate) - -sql_float_read_query_fallback = \ - Benchmark("read_sql_query('SELECT float FROM test_type', con)", - setup, start_date=sdate) - -sql_datetime_read_as_native_sqlalchemy = \ - Benchmark("read_sql_table('test_type', engine, columns=['datetime'])", - setup, start_date=sdate) - -sql_datetime_read_and_parse_sqlalchemy = \ - Benchmark("read_sql_table('test_type', engine, columns=['datetime_string'], parse_dates=['datetime_string'])", - setup, start_date=sdate) diff --git a/vb_suite/join_merge.py b/vb_suite/join_merge.py deleted file mode 100644 index 238a129552e90..0000000000000 --- a/vb_suite/join_merge.py +++ /dev/null @@ -1,270 +0,0 @@ -from vbench.benchmark import Benchmark -from datetime import datetime - -common_setup = """from .pandas_vb_common import * -""" - -setup = common_setup + """ -level1 = tm.makeStringIndex(10).values -level2 = tm.makeStringIndex(1000).values -label1 = np.arange(10).repeat(1000) -label2 = np.tile(np.arange(1000), 10) - -key1 = np.tile(level1.take(label1), 10) -key2 = np.tile(level2.take(label2), 10) - -shuf = np.arange(100000) -random.shuffle(shuf) -try: - index2 = MultiIndex(levels=[level1, level2], labels=[label1, label2]) - index3 = MultiIndex(levels=[np.arange(10), np.arange(100), np.arange(100)], - labels=[np.arange(10).repeat(10000), - np.tile(np.arange(100).repeat(100), 10), - np.tile(np.tile(np.arange(100), 100), 10)]) - df_multi = DataFrame(np.random.randn(len(index2), 4), index=index2, - columns=['A', 'B', 'C', 'D']) -except: # pre-MultiIndex - pass - -try: - DataFrame = DataMatrix -except: - pass - -df = pd.DataFrame({'data1' : np.random.randn(100000), - 'data2' : np.random.randn(100000), - 'key1' : key1, - 'key2' : key2}) - - -df_key1 = pd.DataFrame(np.random.randn(len(level1), 4), index=level1, - columns=['A', 'B', 'C', 'D']) -df_key2 = pd.DataFrame(np.random.randn(len(level2), 4), index=level2, - columns=['A', 'B', 'C', 'D']) - -df_shuf = df.reindex(df.index[shuf]) -""" - -#---------------------------------------------------------------------- -# DataFrame joins on key - -join_dataframe_index_single_key_small = \ - Benchmark("df.join(df_key1, on='key1')", setup, - name='join_dataframe_index_single_key_small') - -join_dataframe_index_single_key_bigger = \ - Benchmark("df.join(df_key2, on='key2')", setup, - name='join_dataframe_index_single_key_bigger') - -join_dataframe_index_single_key_bigger_sort = \ - Benchmark("df_shuf.join(df_key2, on='key2', sort=True)", setup, - name='join_dataframe_index_single_key_bigger_sort', - start_date=datetime(2012, 2, 5)) - -join_dataframe_index_multi = \ - Benchmark("df.join(df_multi, on=['key1', 'key2'])", setup, - name='join_dataframe_index_multi', - start_date=datetime(2011, 10, 20)) - -#---------------------------------------------------------------------- -# Joins on integer keys -setup = common_setup + """ -df = pd.DataFrame({'key1': np.tile(np.arange(500).repeat(10), 2), - 'key2': np.tile(np.arange(250).repeat(10), 4), - 'value': np.random.randn(10000)}) -df2 = pd.DataFrame({'key1': np.arange(500), 'value2': randn(500)}) -df3 = df[:5000] -""" - - -join_dataframe_integer_key = Benchmark("merge(df, df2, on='key1')", setup, - start_date=datetime(2011, 10, 20)) -join_dataframe_integer_2key = Benchmark("merge(df, df3)", setup, - start_date=datetime(2011, 10, 20)) - -#---------------------------------------------------------------------- -# DataFrame joins on index - - -#---------------------------------------------------------------------- -# Merges -setup = common_setup + """ -N = 10000 - -indices = tm.makeStringIndex(N).values -indices2 = tm.makeStringIndex(N).values -key = np.tile(indices[:8000], 10) -key2 = np.tile(indices2[:8000], 10) - -left = pd.DataFrame({'key' : key, 'key2':key2, - 'value' : np.random.randn(80000)}) -right = pd.DataFrame({'key': indices[2000:], 'key2':indices2[2000:], - 'value2' : np.random.randn(8000)}) -""" - -merge_2intkey_nosort = Benchmark('merge(left, right, sort=False)', setup, - start_date=datetime(2011, 10, 20)) - -merge_2intkey_sort = Benchmark('merge(left, right, sort=True)', setup, - start_date=datetime(2011, 10, 20)) - -#---------------------------------------------------------------------- -# Appending DataFrames - -setup = common_setup + """ -df1 = pd.DataFrame(np.random.randn(10000, 4), columns=['A', 'B', 'C', 'D']) -df2 = df1.copy() -df2.index = np.arange(10000, 20000) -mdf1 = df1.copy() -mdf1['obj1'] = 'bar' -mdf1['obj2'] = 'bar' -mdf1['int1'] = 5 -try: - mdf1.consolidate(inplace=True) -except: - pass -mdf2 = mdf1.copy() -mdf2.index = df2.index -""" - -stmt = "df1.append(df2)" -append_frame_single_homogenous = \ - Benchmark(stmt, setup, name='append_frame_single_homogenous', - ncalls=500, repeat=1) - -stmt = "mdf1.append(mdf2)" -append_frame_single_mixed = Benchmark(stmt, setup, - name='append_frame_single_mixed', - ncalls=500, repeat=1) - -#---------------------------------------------------------------------- -# data alignment - -setup = common_setup + """n = 1000000 -# indices = tm.makeStringIndex(n) -def sample(values, k): - sampler = np.random.permutation(len(values)) - return values.take(sampler[:k]) -sz = 500000 -rng = np.arange(0, 10000000000000, 10000000) -stamps = np.datetime64(datetime.now()).view('i8') + rng -idx1 = np.sort(sample(stamps, sz)) -idx2 = np.sort(sample(stamps, sz)) -ts1 = Series(np.random.randn(sz), idx1) -ts2 = Series(np.random.randn(sz), idx2) -""" -stmt = "ts1 + ts2" -series_align_int64_index = \ - Benchmark(stmt, setup, - name="series_align_int64_index", - start_date=datetime(2010, 6, 1), logy=True) - -stmt = "ts1.align(ts2, join='left')" -series_align_left_monotonic = \ - Benchmark(stmt, setup, - name="series_align_left_monotonic", - start_date=datetime(2011, 12, 1), logy=True) - -#---------------------------------------------------------------------- -# Concat Series axis=1 - -setup = common_setup + """ -n = 1000 -indices = tm.makeStringIndex(1000) -s = Series(n, index=indices) -pieces = [s[i:-i] for i in range(1, 10)] -pieces = pieces * 50 -""" - -concat_series_axis1 = Benchmark('concat(pieces, axis=1)', setup, - start_date=datetime(2012, 2, 27)) - -setup = common_setup + """ -df = pd.DataFrame(randn(5, 4)) -""" - -concat_small_frames = Benchmark('concat([df] * 1000)', setup, - start_date=datetime(2012, 1, 1)) - - -#---------------------------------------------------------------------- -# Concat empty - -setup = common_setup + """ -df = pd.DataFrame(dict(A = range(10000)),index=date_range('20130101',periods=10000,freq='s')) -empty = pd.DataFrame() -""" - -concat_empty_frames1 = Benchmark('concat([df,empty])', setup, - start_date=datetime(2012, 1, 1)) -concat_empty_frames2 = Benchmark('concat([empty,df])', setup, - start_date=datetime(2012, 1, 1)) - - -#---------------------------------------------------------------------- -# Ordered merge - -setup = common_setup + """ -groups = tm.makeStringIndex(10).values - -left = pd.DataFrame({'group': groups.repeat(5000), - 'key' : np.tile(np.arange(0, 10000, 2), 10), - 'lvalue': np.random.randn(50000)}) - -right = pd.DataFrame({'key' : np.arange(10000), - 'rvalue' : np.random.randn(10000)}) - -""" - -stmt = "ordered_merge(left, right, on='key', left_by='group')" - -#---------------------------------------------------------------------- -# outer join of non-unique -# GH 6329 - -setup = common_setup + """ -date_index = date_range('01-Jan-2013', '23-Jan-2013', freq='T') -daily_dates = date_index.to_period('D').to_timestamp('S','S') -fracofday = date_index.view(np.ndarray) - daily_dates.view(np.ndarray) -fracofday = fracofday.astype('timedelta64[ns]').astype(np.float64)/864e11 -fracofday = TimeSeries(fracofday, daily_dates) -index = date_range(date_index.min().to_period('A').to_timestamp('D','S'), - date_index.max().to_period('A').to_timestamp('D','E'), - freq='D') -temp = TimeSeries(1.0, index) -""" - -join_non_unique_equal = Benchmark('fracofday * temp[fracofday.index]', setup, - start_date=datetime(2013, 1, 1)) - - -setup = common_setup + ''' -np.random.seed(2718281) -n = 50000 - -left = pd.DataFrame(np.random.randint(1, n/500, (n, 2)), - columns=['jim', 'joe']) - -right = pd.DataFrame(np.random.randint(1, n/500, (n, 2)), - columns=['jolie', 'jolia']).set_index('jolie') -''' - -left_outer_join_index = Benchmark("left.join(right, on='jim')", setup, - name='left_outer_join_index') - - -setup = common_setup + """ -low, high, n = -1 << 10, 1 << 10, 1 << 20 -left = pd.DataFrame(np.random.randint(low, high, (n, 7)), - columns=list('ABCDEFG')) -left['left'] = left.sum(axis=1) - -i = np.random.permutation(len(left)) -right = left.iloc[i].copy() -right.columns = right.columns[:-1].tolist() + ['right'] -right.index = np.arange(len(right)) -right['right'] *= -1 -""" - -i8merge = Benchmark("merge(left, right, how='outer')", setup, - name='i8merge') diff --git a/vb_suite/make.py b/vb_suite/make.py deleted file mode 100755 index 5a8a8215db9a4..0000000000000 --- a/vb_suite/make.py +++ /dev/null @@ -1,167 +0,0 @@ -#!/usr/bin/env python - -""" -Python script for building documentation. - -To build the docs you must have all optional dependencies for statsmodels -installed. See the installation instructions for a list of these. - -Note: currently latex builds do not work because of table formats that are not -supported in the latex generation. - -Usage ------ -python make.py clean -python make.py html -""" - -import glob -import os -import shutil -import sys -import sphinx - -os.environ['PYTHONPATH'] = '..' - -SPHINX_BUILD = 'sphinxbuild' - - -def upload(): - 'push a copy to the site' - os.system('cd build/html; rsync -avz . pandas@pandas.pydata.org' - ':/usr/share/nginx/pandas/pandas-docs/vbench/ -essh') - - -def clean(): - if os.path.exists('build'): - shutil.rmtree('build') - - if os.path.exists('source/generated'): - shutil.rmtree('source/generated') - - -def html(): - check_build() - if os.system('sphinx-build -P -b html -d build/doctrees ' - 'source build/html'): - raise SystemExit("Building HTML failed.") - - -def check_build(): - build_dirs = [ - 'build', 'build/doctrees', 'build/html', - 'build/plots', 'build/_static', - 'build/_templates'] - for d in build_dirs: - try: - os.mkdir(d) - except OSError: - pass - - -def all(): - clean() - html() - - -def auto_update(): - msg = '' - try: - clean() - html() - upload() - sendmail() - except (Exception, SystemExit), inst: - msg += str(inst) + '\n' - sendmail(msg) - - -def sendmail(err_msg=None): - from_name, to_name = _get_config() - - if err_msg is None: - msgstr = 'Daily vbench uploaded successfully' - subject = "VB: daily update successful" - else: - msgstr = err_msg - subject = "VB: daily update failed" - - import smtplib - from email.MIMEText import MIMEText - msg = MIMEText(msgstr) - msg['Subject'] = subject - msg['From'] = from_name - msg['To'] = to_name - - server_str, port, login, pwd = _get_credentials() - server = smtplib.SMTP(server_str, port) - server.ehlo() - server.starttls() - server.ehlo() - - server.login(login, pwd) - try: - server.sendmail(from_name, to_name, msg.as_string()) - finally: - server.close() - - -def _get_dir(subdir=None): - import getpass - USERNAME = getpass.getuser() - if sys.platform == 'darwin': - HOME = '/Users/%s' % USERNAME - else: - HOME = '/home/%s' % USERNAME - - if subdir is None: - subdir = '/code/scripts' - conf_dir = '%s%s' % (HOME, subdir) - return conf_dir - - -def _get_credentials(): - tmp_dir = _get_dir() - cred = '%s/credentials' % tmp_dir - with open(cred, 'r') as fh: - server, port, un, domain = fh.read().split(',') - port = int(port) - login = un + '@' + domain + '.com' - - import base64 - with open('%s/cron_email_pwd' % tmp_dir, 'r') as fh: - pwd = base64.b64decode(fh.read()) - - return server, port, login, pwd - - -def _get_config(): - tmp_dir = _get_dir() - with open('%s/addresses' % tmp_dir, 'r') as fh: - from_name, to_name = fh.read().split(',') - return from_name, to_name - -funcd = { - 'html': html, - 'clean': clean, - 'upload': upload, - 'auto_update': auto_update, - 'all': all, -} - -small_docs = False - -# current_dir = os.getcwd() -# os.chdir(os.path.dirname(os.path.join(current_dir, __file__))) - -if len(sys.argv) > 1: - for arg in sys.argv[1:]: - func = funcd.get(arg) - if func is None: - raise SystemExit('Do not know how to handle %s; valid args are %s' % ( - arg, funcd.keys())) - func() -else: - small_docs = False - all() -# os.chdir(current_dir) diff --git a/vb_suite/measure_memory_consumption.py b/vb_suite/measure_memory_consumption.py deleted file mode 100755 index bb73cf5da4302..0000000000000 --- a/vb_suite/measure_memory_consumption.py +++ /dev/null @@ -1,55 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -from __future__ import print_function - -"""Short one-line summary - -long summary -""" - - -def main(): - import shutil - import tempfile - import warnings - - from pandas import Series - - from vbench.api import BenchmarkRunner - from suite import (REPO_PATH, BUILD, DB_PATH, PREPARE, - dependencies, benchmarks) - - from memory_profiler import memory_usage - - warnings.filterwarnings('ignore', category=FutureWarning) - - try: - TMP_DIR = tempfile.mkdtemp() - runner = BenchmarkRunner( - benchmarks, REPO_PATH, REPO_PATH, BUILD, DB_PATH, - TMP_DIR, PREPARE, always_clean=True, - # run_option='eod', start_date=START_DATE, - module_dependencies=dependencies) - results = {} - for b in runner.benchmarks: - k = b.name - try: - vs = memory_usage((b.run,)) - v = max(vs) - # print(k, v) - results[k] = v - except Exception as e: - print("Exception caught in %s\n" % k) - print(str(e)) - - s = Series(results) - s.sort() - print((s)) - - finally: - shutil.rmtree(TMP_DIR) - - -if __name__ == "__main__": - main() diff --git a/vb_suite/miscellaneous.py b/vb_suite/miscellaneous.py deleted file mode 100644 index da2c736e79ea7..0000000000000 --- a/vb_suite/miscellaneous.py +++ /dev/null @@ -1,32 +0,0 @@ -from vbench.benchmark import Benchmark -from datetime import datetime - -common_setup = """from .pandas_vb_common import * -""" - -#---------------------------------------------------------------------- -# cache_readonly - -setup = common_setup + """ -from pandas.util.decorators import cache_readonly - -class Foo: - - @cache_readonly - def prop(self): - return 5 -obj = Foo() -""" -misc_cache_readonly = Benchmark("obj.prop", setup, name="misc_cache_readonly", - ncalls=2000000) - -#---------------------------------------------------------------------- -# match - -setup = common_setup + """ -uniques = tm.makeStringIndex(1000).values -all = uniques.repeat(10) -""" - -match_strings = Benchmark("match(all, uniques)", setup, - start_date=datetime(2012, 5, 12)) diff --git a/vb_suite/packers.py b/vb_suite/packers.py deleted file mode 100644 index 69ec10822b392..0000000000000 --- a/vb_suite/packers.py +++ /dev/null @@ -1,252 +0,0 @@ -from vbench.api import Benchmark -from datetime import datetime - -start_date = datetime(2013, 5, 1) - -common_setup = """from .pandas_vb_common import * -import os -import pandas as pd -from pandas.core import common as com -from pandas.compat import BytesIO -from random import randrange - -f = '__test__.msg' -def remove(f): - try: - os.remove(f) - except: - pass - -N=100000 -C=5 -index = date_range('20000101',periods=N,freq='H') -df = DataFrame(dict([ ("float{0}".format(i),randn(N)) for i in range(C) ]), - index=index) - -N=100000 -C=5 -index = date_range('20000101',periods=N,freq='H') -df2 = DataFrame(dict([ ("float{0}".format(i),randn(N)) for i in range(C) ]), - index=index) -df2['object'] = ['%08x'%randrange(16**8) for _ in range(N)] -remove(f) -""" - -#---------------------------------------------------------------------- -# msgpack - -setup = common_setup + """ -df2.to_msgpack(f) -""" - -packers_read_pack = Benchmark("pd.read_msgpack(f)", setup, start_date=start_date) - -setup = common_setup + """ -""" - -packers_write_pack = Benchmark("df2.to_msgpack(f)", setup, cleanup="remove(f)", start_date=start_date) - -#---------------------------------------------------------------------- -# pickle - -setup = common_setup + """ -df2.to_pickle(f) -""" - -packers_read_pickle = Benchmark("pd.read_pickle(f)", setup, start_date=start_date) - -setup = common_setup + """ -""" - -packers_write_pickle = Benchmark("df2.to_pickle(f)", setup, cleanup="remove(f)", start_date=start_date) - -#---------------------------------------------------------------------- -# csv - -setup = common_setup + """ -df.to_csv(f) -""" - -packers_read_csv = Benchmark("pd.read_csv(f)", setup, start_date=start_date) - -setup = common_setup + """ -""" - -packers_write_csv = Benchmark("df.to_csv(f)", setup, cleanup="remove(f)", start_date=start_date) - -#---------------------------------------------------------------------- -# hdf store - -setup = common_setup + """ -df2.to_hdf(f,'df') -""" - -packers_read_hdf_store = Benchmark("pd.read_hdf(f,'df')", setup, start_date=start_date) - -setup = common_setup + """ -""" - -packers_write_hdf_store = Benchmark("df2.to_hdf(f,'df')", setup, cleanup="remove(f)", start_date=start_date) - -#---------------------------------------------------------------------- -# hdf table - -setup = common_setup + """ -df2.to_hdf(f,'df',format='table') -""" - -packers_read_hdf_table = Benchmark("pd.read_hdf(f,'df')", setup, start_date=start_date) - -setup = common_setup + """ -""" - -packers_write_hdf_table = Benchmark("df2.to_hdf(f,'df',table=True)", setup, cleanup="remove(f)", start_date=start_date) - -#---------------------------------------------------------------------- -# sql - -setup = common_setup + """ -import sqlite3 -from sqlalchemy import create_engine -engine = create_engine('sqlite:///:memory:') - -df2.to_sql('table', engine, if_exists='replace') -""" - -packers_read_sql= Benchmark("pd.read_sql_table('table', engine)", setup, start_date=start_date) - -setup = common_setup + """ -import sqlite3 -from sqlalchemy import create_engine -engine = create_engine('sqlite:///:memory:') -""" - -packers_write_sql = Benchmark("df2.to_sql('table', engine, if_exists='replace')", setup, start_date=start_date) - -#---------------------------------------------------------------------- -# json - -setup_int_index = """ -import numpy as np -df.index = np.arange(N) -""" - -setup = common_setup + """ -df.to_json(f,orient='split') -""" -packers_read_json_date_index = Benchmark("pd.read_json(f, orient='split')", setup, start_date=start_date) -setup = setup + setup_int_index -packers_read_json = Benchmark("pd.read_json(f, orient='split')", setup, start_date=start_date) - -setup = common_setup + """ -""" -packers_write_json_date_index = Benchmark("df.to_json(f,orient='split')", setup, cleanup="remove(f)", start_date=start_date) - -setup = setup + setup_int_index -packers_write_json = Benchmark("df.to_json(f,orient='split')", setup, cleanup="remove(f)", start_date=start_date) -packers_write_json_T = Benchmark("df.to_json(f,orient='columns')", setup, cleanup="remove(f)", start_date=start_date) - -setup = common_setup + """ -from numpy.random import randint -from collections import OrderedDict - -cols = [ - lambda i: ("{0}_timedelta".format(i), [pd.Timedelta('%d seconds' % randrange(1e6)) for _ in range(N)]), - lambda i: ("{0}_int".format(i), randint(1e8, size=N)), - lambda i: ("{0}_timestamp".format(i), [pd.Timestamp( 1418842918083256000 + randrange(1e9, 1e18, 200)) for _ in range(N)]) - ] -df_mixed = DataFrame(OrderedDict([cols[i % len(cols)](i) for i in range(C)]), - index=index) -""" -packers_write_json_mixed_delta_int_tstamp = Benchmark("df_mixed.to_json(f,orient='split')", setup, cleanup="remove(f)", start_date=start_date) - -setup = common_setup + """ -from numpy.random import randint -from collections import OrderedDict -cols = [ - lambda i: ("{0}_float".format(i), randn(N)), - lambda i: ("{0}_int".format(i), randint(1e8, size=N)) - ] -df_mixed = DataFrame(OrderedDict([cols[i % len(cols)](i) for i in range(C)]), - index=index) -""" -packers_write_json_mixed_float_int = Benchmark("df_mixed.to_json(f,orient='index')", setup, cleanup="remove(f)", start_date=start_date) -packers_write_json_mixed_float_int_T = Benchmark("df_mixed.to_json(f,orient='columns')", setup, cleanup="remove(f)", start_date=start_date) - -setup = common_setup + """ -from numpy.random import randint -from collections import OrderedDict -cols = [ - lambda i: ("{0}_float".format(i), randn(N)), - lambda i: ("{0}_int".format(i), randint(1e8, size=N)), - lambda i: ("{0}_str".format(i), ['%08x'%randrange(16**8) for _ in range(N)]) - ] -df_mixed = DataFrame(OrderedDict([cols[i % len(cols)](i) for i in range(C)]), - index=index) -""" -packers_write_json_mixed_float_int_str = Benchmark("df_mixed.to_json(f,orient='split')", setup, cleanup="remove(f)", start_date=start_date) - -#---------------------------------------------------------------------- -# stata - -setup = common_setup + """ -df.to_stata(f, {'index': 'tc'}) -""" -packers_read_stata = Benchmark("pd.read_stata(f)", setup, start_date=start_date) - -packers_write_stata = Benchmark("df.to_stata(f, {'index': 'tc'})", setup, cleanup="remove(f)", start_date=start_date) - -setup = common_setup + """ -df['int8_'] = [randint(np.iinfo(np.int8).min, np.iinfo(np.int8).max - 27) for _ in range(N)] -df['int16_'] = [randint(np.iinfo(np.int16).min, np.iinfo(np.int16).max - 27) for _ in range(N)] -df['int32_'] = [randint(np.iinfo(np.int32).min, np.iinfo(np.int32).max - 27) for _ in range(N)] -df['float32_'] = np.array(randn(N), dtype=np.float32) -df.to_stata(f, {'index': 'tc'}) -""" - -packers_read_stata_with_validation = Benchmark("pd.read_stata(f)", setup, start_date=start_date) - -packers_write_stata_with_validation = Benchmark("df.to_stata(f, {'index': 'tc'})", setup, cleanup="remove(f)", start_date=start_date) - -#---------------------------------------------------------------------- -# Excel - alternative writers -setup = common_setup + """ -bio = BytesIO() -""" - -excel_writer_bench = """ -bio.seek(0) -writer = pd.io.excel.ExcelWriter(bio, engine='{engine}') -df[:2000].to_excel(writer) -writer.save() -""" - -benchmark_xlsxwriter = excel_writer_bench.format(engine='xlsxwriter') - -packers_write_excel_xlsxwriter = Benchmark(benchmark_xlsxwriter, setup) - -benchmark_openpyxl = excel_writer_bench.format(engine='openpyxl') - -packers_write_excel_openpyxl = Benchmark(benchmark_openpyxl, setup) - -benchmark_xlwt = excel_writer_bench.format(engine='xlwt') - -packers_write_excel_xlwt = Benchmark(benchmark_xlwt, setup) - - -#---------------------------------------------------------------------- -# Excel - reader - -setup = common_setup + """ -bio = BytesIO() -writer = pd.io.excel.ExcelWriter(bio, engine='xlsxwriter') -df[:2000].to_excel(writer) -writer.save() -""" - -benchmark_read_excel=""" -bio.seek(0) -pd.read_excel(bio) -""" - -packers_read_excel = Benchmark(benchmark_read_excel, setup) diff --git a/vb_suite/pandas_vb_common.py b/vb_suite/pandas_vb_common.py deleted file mode 100644 index bd2e8a1c1d504..0000000000000 --- a/vb_suite/pandas_vb_common.py +++ /dev/null @@ -1,30 +0,0 @@ -from pandas import * -import pandas as pd -from datetime import timedelta -from numpy.random import randn -from numpy.random import randint -from numpy.random import permutation -import pandas.util.testing as tm -import random -import numpy as np -try: - from pandas.compat import range -except ImportError: - pass - -np.random.seed(1234) -try: - import pandas._tseries as lib -except: - import pandas._libs.lib as lib - -try: - Panel = WidePanel -except Exception: - pass - -# didn't add to namespace until later -try: - from pandas.core.index import MultiIndex -except ImportError: - pass diff --git a/vb_suite/panel_ctor.py b/vb_suite/panel_ctor.py deleted file mode 100644 index 9f497e7357a61..0000000000000 --- a/vb_suite/panel_ctor.py +++ /dev/null @@ -1,76 +0,0 @@ -from vbench.benchmark import Benchmark -from datetime import datetime - -common_setup = """from .pandas_vb_common import * -""" - -#---------------------------------------------------------------------- -# Panel.from_dict homogenization time - -START_DATE = datetime(2011, 6, 1) - -setup_same_index = common_setup + """ -# create 100 dataframes with the same index -dr = np.asarray(DatetimeIndex(start=datetime(1990,1,1), end=datetime(2012,1,1), - freq=datetools.Day(1))) -data_frames = {} -for x in range(100): - df = DataFrame({"a": [0]*len(dr), "b": [1]*len(dr), - "c": [2]*len(dr)}, index=dr) - data_frames[x] = df -""" - -panel_from_dict_same_index = \ - Benchmark("Panel.from_dict(data_frames)", - setup_same_index, name='panel_from_dict_same_index', - start_date=START_DATE, repeat=1, logy=True) - -setup_equiv_indexes = common_setup + """ -data_frames = {} -for x in range(100): - dr = np.asarray(DatetimeIndex(start=datetime(1990,1,1), end=datetime(2012,1,1), - freq=datetools.Day(1))) - df = DataFrame({"a": [0]*len(dr), "b": [1]*len(dr), - "c": [2]*len(dr)}, index=dr) - data_frames[x] = df -""" - -panel_from_dict_equiv_indexes = \ - Benchmark("Panel.from_dict(data_frames)", - setup_equiv_indexes, name='panel_from_dict_equiv_indexes', - start_date=START_DATE, repeat=1, logy=True) - -setup_all_different_indexes = common_setup + """ -data_frames = {} -start = datetime(1990,1,1) -end = datetime(2012,1,1) -for x in range(100): - end += timedelta(days=1) - dr = np.asarray(date_range(start, end)) - df = DataFrame({"a": [0]*len(dr), "b": [1]*len(dr), - "c": [2]*len(dr)}, index=dr) - data_frames[x] = df -""" -panel_from_dict_all_different_indexes = \ - Benchmark("Panel.from_dict(data_frames)", - setup_all_different_indexes, - name='panel_from_dict_all_different_indexes', - start_date=START_DATE, repeat=1, logy=True) - -setup_two_different_indexes = common_setup + """ -data_frames = {} -start = datetime(1990,1,1) -end = datetime(2012,1,1) -for x in range(100): - if x == 50: - end += timedelta(days=1) - dr = np.asarray(date_range(start, end)) - df = DataFrame({"a": [0]*len(dr), "b": [1]*len(dr), - "c": [2]*len(dr)}, index=dr) - data_frames[x] = df -""" -panel_from_dict_two_different_indexes = \ - Benchmark("Panel.from_dict(data_frames)", - setup_two_different_indexes, - name='panel_from_dict_two_different_indexes', - start_date=START_DATE, repeat=1, logy=True) diff --git a/vb_suite/panel_methods.py b/vb_suite/panel_methods.py deleted file mode 100644 index 28586422a66e3..0000000000000 --- a/vb_suite/panel_methods.py +++ /dev/null @@ -1,28 +0,0 @@ -from vbench.api import Benchmark -from datetime import datetime - -common_setup = """from .pandas_vb_common import * -""" - -#---------------------------------------------------------------------- -# shift - -setup = common_setup + """ -index = date_range(start="2000", freq="D", periods=1000) -panel = Panel(np.random.randn(100, len(index), 1000)) -""" - -panel_shift = Benchmark('panel.shift(1)', setup, - start_date=datetime(2012, 1, 12)) - -panel_shift_minor = Benchmark('panel.shift(1, axis="minor")', setup, - start_date=datetime(2012, 1, 12)) - -panel_pct_change_major = Benchmark('panel.pct_change(1, axis="major")', setup, - start_date=datetime(2014, 4, 19)) - -panel_pct_change_minor = Benchmark('panel.pct_change(1, axis="minor")', setup, - start_date=datetime(2014, 4, 19)) - -panel_pct_change_items = Benchmark('panel.pct_change(1, axis="items")', setup, - start_date=datetime(2014, 4, 19)) diff --git a/vb_suite/parser_vb.py b/vb_suite/parser_vb.py deleted file mode 100644 index bb9ccbdb5e854..0000000000000 --- a/vb_suite/parser_vb.py +++ /dev/null @@ -1,112 +0,0 @@ -from vbench.api import Benchmark -from datetime import datetime - -common_setup = """from .pandas_vb_common import * -from pandas import read_csv, read_table -""" - -setup = common_setup + """ -import os -N = 10000 -K = 8 -df = DataFrame(np.random.randn(N, K) * np.random.randint(100, 10000, (N, K))) -df.to_csv('test.csv', sep='|') -""" - -read_csv_vb = Benchmark("read_csv('test.csv', sep='|')", setup, - cleanup="os.remove('test.csv')", - start_date=datetime(2012, 5, 7)) - - -setup = common_setup + """ -import os -N = 10000 -K = 8 -format = lambda x: '{:,}'.format(x) -df = DataFrame(np.random.randn(N, K) * np.random.randint(100, 10000, (N, K))) -df = df.applymap(format) -df.to_csv('test.csv', sep='|') -""" - -read_csv_thou_vb = Benchmark("read_csv('test.csv', sep='|', thousands=',')", - setup, - cleanup="os.remove('test.csv')", - start_date=datetime(2012, 5, 7)) - -setup = common_setup + """ -data = ['A,B,C'] -data = data + ['1,2,3 # comment'] * 100000 -data = '\\n'.join(data) -""" - -stmt = "read_csv(StringIO(data), comment='#')" -read_csv_comment2 = Benchmark(stmt, setup, - start_date=datetime(2011, 11, 1)) - -setup = common_setup + """ -try: - from cStringIO import StringIO -except ImportError: - from io import StringIO - -import os -N = 10000 -K = 8 -data = '''\ -KORD,19990127, 19:00:00, 18:56:00, 0.8100, 2.8100, 7.2000, 0.0000, 280.0000 -KORD,19990127, 20:00:00, 19:56:00, 0.0100, 2.2100, 7.2000, 0.0000, 260.0000 -KORD,19990127, 21:00:00, 20:56:00, -0.5900, 2.2100, 5.7000, 0.0000, 280.0000 -KORD,19990127, 21:00:00, 21:18:00, -0.9900, 2.0100, 3.6000, 0.0000, 270.0000 -KORD,19990127, 22:00:00, 21:56:00, -0.5900, 1.7100, 5.1000, 0.0000, 290.0000 -''' -data = data * 200 -""" -cmd = ("read_table(StringIO(data), sep=',', header=None, " - "parse_dates=[[1,2], [1,3]])") -sdate = datetime(2012, 5, 7) -read_table_multiple_date = Benchmark(cmd, setup, start_date=sdate) - -setup = common_setup + """ -try: - from cStringIO import StringIO -except ImportError: - from io import StringIO - -import os -N = 10000 -K = 8 -data = '''\ -KORD,19990127 19:00:00, 18:56:00, 0.8100, 2.8100, 7.2000, 0.0000, 280.0000 -KORD,19990127 20:00:00, 19:56:00, 0.0100, 2.2100, 7.2000, 0.0000, 260.0000 -KORD,19990127 21:00:00, 20:56:00, -0.5900, 2.2100, 5.7000, 0.0000, 280.0000 -KORD,19990127 21:00:00, 21:18:00, -0.9900, 2.0100, 3.6000, 0.0000, 270.0000 -KORD,19990127 22:00:00, 21:56:00, -0.5900, 1.7100, 5.1000, 0.0000, 290.0000 -''' -data = data * 200 -""" -cmd = "read_table(StringIO(data), sep=',', header=None, parse_dates=[1])" -sdate = datetime(2012, 5, 7) -read_table_multiple_date_baseline = Benchmark(cmd, setup, start_date=sdate) - -setup = common_setup + """ -try: - from cStringIO import StringIO -except ImportError: - from io import StringIO - -data = '''\ -0.1213700904466425978256438611,0.0525708283766902484401839501,0.4174092731488769913994474336 -0.4096341697147408700274695547,0.1587830198973579909349496119,0.1292545832485494372576795285 -0.8323255650024565799327547210,0.9694902427379478160318626578,0.6295047811546814475747169126 -0.4679375305798131323697930383,0.2963942381834381301075609371,0.5268936082160610157032465394 -0.6685382761849776311890991564,0.6721207066140679753374342908,0.6519975277021627935170045020 -''' -data = data * 200 -""" -cmd = "read_csv(StringIO(data), sep=',', header=None, float_precision=None)" -sdate = datetime(2014, 8, 20) -read_csv_default_converter = Benchmark(cmd, setup, start_date=sdate) -cmd = "read_csv(StringIO(data), sep=',', header=None, float_precision='high')" -read_csv_precise_converter = Benchmark(cmd, setup, start_date=sdate) -cmd = "read_csv(StringIO(data), sep=',', header=None, float_precision='round_trip')" -read_csv_roundtrip_converter = Benchmark(cmd, setup, start_date=sdate) diff --git a/vb_suite/perf_HEAD.py b/vb_suite/perf_HEAD.py deleted file mode 100755 index 143d943b9eadf..0000000000000 --- a/vb_suite/perf_HEAD.py +++ /dev/null @@ -1,243 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -from __future__ import print_function - -"""Run all the vbenches in `suite`, and post the results as a json blob to gist - -""" - -import urllib2 -from contextlib import closing -from urllib2 import urlopen -import json - -import pandas as pd - -WEB_TIMEOUT = 10 - - -def get_travis_data(): - """figure out what worker we're running on, and the number of jobs it's running - """ - import os - jobid = os.environ.get("TRAVIS_JOB_ID") - if not jobid: - return None, None - - with closing(urlopen("https://api.travis-ci.org/workers/")) as resp: - workers = json.loads(resp.read()) - - host = njobs = None - for item in workers: - host = item.get("host") - id = ((item.get("payload") or {}).get("job") or {}).get("id") - if id and str(id) == str(jobid): - break - if host: - njobs = len( - [x for x in workers if host in x['host'] and x['payload']]) - - return host, njobs - - -def get_utcdatetime(): - try: - from datetime import datetime - return datetime.utcnow().isoformat(" ") - except: - pass - - -def dump_as_gist(data, desc="The Commit", njobs=None): - host, njobs2 = get_travis_data()[:2] - - if njobs: # be slightly more reliable - njobs = max(njobs, njobs2) - - content = dict(version="0.1.1", - timings=data, - datetime=get_utcdatetime(), # added in 0.1.1 - hostname=host, # added in 0.1.1 - njobs=njobs # added in 0.1.1, a measure of load on the travis box - ) - - payload = dict(description=desc, - public=True, - files={'results.json': dict(content=json.dumps(content))}) - try: - with closing(urlopen("https://api.github.com/gists", - json.dumps(payload), timeout=WEB_TIMEOUT)) as r: - if 200 <= r.getcode() < 300: - print("\n\n" + "-" * 80) - - gist = json.loads(r.read()) - file_raw_url = gist['files'].items()[0][1]['raw_url'] - print("[vbench-gist-raw_url] %s" % file_raw_url) - print("[vbench-html-url] %s" % gist['html_url']) - print("[vbench-api-url] %s" % gist['url']) - - print("-" * 80 + "\n\n") - else: - print("api.github.com returned status %d" % r.getcode()) - except: - print("Error occured while dumping to gist") - - -def main(): - import warnings - from suite import benchmarks - - exit_code = 0 - warnings.filterwarnings('ignore', category=FutureWarning) - - host, njobs = get_travis_data()[:2] - results = [] - for b in benchmarks: - try: - d = b.run() - d.update(dict(name=b.name)) - results.append(d) - msg = "{name:<40}: {timing:> 10.4f} [ms]" - print(msg.format(name=results[-1]['name'], - timing=results[-1]['timing'])) - - except Exception as e: - exit_code = 1 - if (type(e) == KeyboardInterrupt or - 'KeyboardInterrupt' in str(d)): - raise KeyboardInterrupt() - - msg = "{name:<40}: ERROR:\n<-------" - print(msg.format(name=b.name)) - if isinstance(d, dict): - if d['succeeded']: - print("\nException:\n%s\n" % str(e)) - else: - for k, v in sorted(d.iteritems()): - print("{k}: {v}".format(k=k, v=v)) - - print("------->\n") - - dump_as_gist(results, "testing", njobs=njobs) - - return exit_code - - -if __name__ == "__main__": - import sys - sys.exit(main()) - -##################################################### -# functions for retrieving and processing the results - - -def get_vbench_log(build_url): - with closing(urllib2.urlopen(build_url)) as r: - if not (200 <= r.getcode() < 300): - return - - s = json.loads(r.read()) - s = [x for x in s['matrix'] if "VBENCH" in ((x.get('config', {}) - or {}).get('env', {}) or {})] - # s=[x for x in s['matrix']] - if not s: - return - id = s[0]['id'] # should be just one for now - with closing(urllib2.urlopen("https://api.travis-ci.org/jobs/%s" % id)) as r2: - if not 200 <= r.getcode() < 300: - return - s2 = json.loads(r2.read()) - return s2.get('log') - - -def get_results_raw_url(build): - "Taks a Travis a build number, retrieves the build log and extracts the gist url" - import re - log = get_vbench_log("https://api.travis-ci.org/builds/%s" % build) - if not log: - return - l = [x.strip( - ) for x in log.split("\n") if re.match(".vbench-gist-raw_url", x)] - if l: - s = l[0] - m = re.search("(https://[^\s]+)", s) - if m: - return m.group(0) - - -def convert_json_to_df(results_url): - """retrieve json results file from url and return df - - df contains timings for all successful vbenchmarks - """ - - with closing(urlopen(results_url)) as resp: - res = json.loads(resp.read()) - timings = res.get("timings") - if not timings: - return - res = [x for x in timings if x.get('succeeded')] - df = pd.DataFrame(res) - df = df.set_index("name") - return df - - -def get_build_results(build): - "Returns a df with the results of the VBENCH job associated with the travis build" - r_url = get_results_raw_url(build) - if not r_url: - return - - return convert_json_to_df(r_url) - - -def get_all_results(repo_id=53976): # travis pandas-dev/pandas id - """Fetches the VBENCH results for all travis builds, and returns a list of result df - - unsuccesful individual vbenches are dropped. - """ - from collections import OrderedDict - - def get_results_from_builds(builds): - dfs = OrderedDict() - for build in builds: - build_id = build['id'] - build_number = build['number'] - print(build_number) - res = get_build_results(build_id) - if res is not None: - dfs[build_number] = res - return dfs - - base_url = 'https://api.travis-ci.org/builds?url=%2Fbuilds&repository_id={repo_id}' - url = base_url.format(repo_id=repo_id) - url_after = url + '&after_number={after}' - dfs = OrderedDict() - - while True: - with closing(urlopen(url)) as r: - if not (200 <= r.getcode() < 300): - break - builds = json.loads(r.read()) - res = get_results_from_builds(builds) - if not res: - break - last_build_number = min(res.keys()) - dfs.update(res) - url = url_after.format(after=last_build_number) - - return dfs - - -def get_all_results_joined(repo_id=53976): - def mk_unique(df): - for dupe in df.index.get_duplicates(): - df = df.ix[df.index != dupe] - return df - dfs = get_all_results(repo_id) - for k in dfs: - dfs[k] = mk_unique(dfs[k]) - ss = [pd.Series(v.timing, name=k) for k, v in dfs.iteritems()] - results = pd.concat(reversed(ss), 1) - return results diff --git a/vb_suite/plotting.py b/vb_suite/plotting.py deleted file mode 100644 index 79e81e9eea8f4..0000000000000 --- a/vb_suite/plotting.py +++ /dev/null @@ -1,25 +0,0 @@ -from vbench.benchmark import Benchmark -from datetime import datetime - -common_setup = """from .pandas_vb_common import * - -try: - from pandas import date_range -except ImportError: - def date_range(start=None, end=None, periods=None, freq=None): - return DatetimeIndex(start, end, periods=periods, offset=freq) - -""" - -#----------------------------------------------------------------------------- -# Timeseries plotting - -setup = common_setup + """ -N = 2000 -M = 5 -df = DataFrame(np.random.randn(N,M), index=date_range('1/1/1975', periods=N)) -""" - -plot_timeseries_period = Benchmark("df.plot()", setup=setup, - name='plot_timeseries_period') - diff --git a/vb_suite/reindex.py b/vb_suite/reindex.py deleted file mode 100644 index 443eb43835745..0000000000000 --- a/vb_suite/reindex.py +++ /dev/null @@ -1,225 +0,0 @@ -from vbench.benchmark import Benchmark -from datetime import datetime - -common_setup = """from .pandas_vb_common import * -""" - -#---------------------------------------------------------------------- -# DataFrame reindex columns - -setup = common_setup + """ -df = DataFrame(index=range(10000), data=np.random.rand(10000,30), - columns=range(30)) -""" -statement = "df.reindex(columns=df.columns[1:5])" - -frame_reindex_columns = Benchmark(statement, setup) - -#---------------------------------------------------------------------- - -setup = common_setup + """ -rng = DatetimeIndex(start='1/1/1970', periods=10000, freq=datetools.Minute()) -df = DataFrame(np.random.rand(10000, 10), index=rng, - columns=range(10)) -df['foo'] = 'bar' -rng2 = Index(rng[::2]) -""" -statement = "df.reindex(rng2)" -dataframe_reindex = Benchmark(statement, setup) - -#---------------------------------------------------------------------- -# multiindex reindexing - -setup = common_setup + """ -N = 1000 -K = 20 - -level1 = tm.makeStringIndex(N).values.repeat(K) -level2 = np.tile(tm.makeStringIndex(K).values, N) -index = MultiIndex.from_arrays([level1, level2]) - -s1 = Series(np.random.randn(N * K), index=index) -s2 = s1[::2] -""" -statement = "s1.reindex(s2.index)" -reindex_multi = Benchmark(statement, setup, - name='reindex_multiindex', - start_date=datetime(2011, 9, 1)) - -#---------------------------------------------------------------------- -# Pad / backfill - -def pad(source_series, target_index): - try: - source_series.reindex(target_index, method='pad') - except: - source_series.reindex(target_index, fillMethod='pad') - -def backfill(source_series, target_index): - try: - source_series.reindex(target_index, method='backfill') - except: - source_series.reindex(target_index, fillMethod='backfill') - -setup = common_setup + """ -rng = date_range('1/1/2000', periods=100000, freq=datetools.Minute()) - -ts = Series(np.random.randn(len(rng)), index=rng) -ts2 = ts[::2] -ts3 = ts2.reindex(ts.index) -ts4 = ts3.astype('float32') - -def pad(source_series, target_index): - try: - source_series.reindex(target_index, method='pad') - except: - source_series.reindex(target_index, fillMethod='pad') -def backfill(source_series, target_index): - try: - source_series.reindex(target_index, method='backfill') - except: - source_series.reindex(target_index, fillMethod='backfill') -""" - -statement = "pad(ts2, ts.index)" -reindex_daterange_pad = Benchmark(statement, setup, - name="reindex_daterange_pad") - -statement = "backfill(ts2, ts.index)" -reindex_daterange_backfill = Benchmark(statement, setup, - name="reindex_daterange_backfill") - -reindex_fillna_pad = Benchmark("ts3.fillna(method='pad')", setup, - name="reindex_fillna_pad", - start_date=datetime(2011, 3, 1)) - -reindex_fillna_pad_float32 = Benchmark("ts4.fillna(method='pad')", setup, - name="reindex_fillna_pad_float32", - start_date=datetime(2013, 1, 1)) - -reindex_fillna_backfill = Benchmark("ts3.fillna(method='backfill')", setup, - name="reindex_fillna_backfill", - start_date=datetime(2011, 3, 1)) -reindex_fillna_backfill_float32 = Benchmark("ts4.fillna(method='backfill')", setup, - name="reindex_fillna_backfill_float32", - start_date=datetime(2013, 1, 1)) - -#---------------------------------------------------------------------- -# align on level - -setup = common_setup + """ -index = MultiIndex(levels=[np.arange(10), np.arange(100), np.arange(100)], - labels=[np.arange(10).repeat(10000), - np.tile(np.arange(100).repeat(100), 10), - np.tile(np.tile(np.arange(100), 100), 10)]) -random.shuffle(index.values) -df = DataFrame(np.random.randn(len(index), 4), index=index) -df_level = DataFrame(np.random.randn(100, 4), index=index.levels[1]) -""" - -reindex_frame_level_align = \ - Benchmark("df.align(df_level, level=1, copy=False)", setup, - name='reindex_frame_level_align', - start_date=datetime(2011, 12, 27)) - -reindex_frame_level_reindex = \ - Benchmark("df_level.reindex(df.index, level=1)", setup, - name='reindex_frame_level_reindex', - start_date=datetime(2011, 12, 27)) - - -#---------------------------------------------------------------------- -# sort_index, drop_duplicates - -# pathological, but realistic -setup = common_setup + """ -N = 10000 -K = 10 - -key1 = tm.makeStringIndex(N).values.repeat(K) -key2 = tm.makeStringIndex(N).values.repeat(K) - -df = DataFrame({'key1' : key1, 'key2' : key2, - 'value' : np.random.randn(N * K)}) -col_array_list = list(df.values.T) -""" -statement = "df.sort_index(by=['key1', 'key2'])" -frame_sort_index_by_columns = Benchmark(statement, setup, - start_date=datetime(2011, 11, 1)) - -# drop_duplicates - -statement = "df.drop_duplicates(['key1', 'key2'])" -frame_drop_duplicates = Benchmark(statement, setup, - start_date=datetime(2011, 11, 15)) - -statement = "df.drop_duplicates(['key1', 'key2'], inplace=True)" -frame_drop_dup_inplace = Benchmark(statement, setup, - start_date=datetime(2012, 5, 16)) - -lib_fast_zip = Benchmark('lib.fast_zip(col_array_list)', setup, - name='lib_fast_zip', - start_date=datetime(2012, 1, 1)) - -setup = setup + """ -df.ix[:10000, :] = np.nan -""" -statement2 = "df.drop_duplicates(['key1', 'key2'])" -frame_drop_duplicates_na = Benchmark(statement2, setup, - start_date=datetime(2012, 5, 15)) - -lib_fast_zip_fillna = Benchmark('lib.fast_zip_fillna(col_array_list)', setup, - start_date=datetime(2012, 5, 15)) - -statement2 = "df.drop_duplicates(['key1', 'key2'], inplace=True)" -frame_drop_dup_na_inplace = Benchmark(statement2, setup, - start_date=datetime(2012, 5, 16)) - -setup = common_setup + """ -s = Series(np.random.randint(0, 1000, size=10000)) -s2 = Series(np.tile(tm.makeStringIndex(1000).values, 10)) -""" - -series_drop_duplicates_int = Benchmark('s.drop_duplicates()', setup, - start_date=datetime(2012, 11, 27)) - -series_drop_duplicates_string = \ - Benchmark('s2.drop_duplicates()', setup, - start_date=datetime(2012, 11, 27)) - -#---------------------------------------------------------------------- -# fillna, many columns - - -setup = common_setup + """ -values = np.random.randn(1000, 1000) -values[::2] = np.nan -df = DataFrame(values) -""" - -frame_fillna_many_columns_pad = Benchmark("df.fillna(method='pad')", - setup, - start_date=datetime(2011, 3, 1)) - -#---------------------------------------------------------------------- -# blog "pandas escaped the zoo" - -setup = common_setup + """ -n = 50000 -indices = tm.makeStringIndex(n) - -def sample(values, k): - from random import shuffle - sampler = np.arange(len(values)) - shuffle(sampler) - return values.take(sampler[:k]) - -subsample_size = 40000 - -x = Series(np.random.randn(50000), indices) -y = Series(np.random.randn(subsample_size), - index=sample(indices, subsample_size)) -""" - -series_align_irregular_string = Benchmark("x + y", setup, - start_date=datetime(2010, 6, 1)) diff --git a/vb_suite/replace.py b/vb_suite/replace.py deleted file mode 100644 index 9326aa5becca9..0000000000000 --- a/vb_suite/replace.py +++ /dev/null @@ -1,36 +0,0 @@ -from vbench.api import Benchmark -from datetime import datetime - -common_setup = """from .pandas_vb_common import * -from datetime import timedelta - -N = 1000000 - -try: - rng = date_range('1/1/2000', periods=N, freq='min') -except NameError: - rng = DatetimeIndex('1/1/2000', periods=N, offset=datetools.Minute()) - date_range = DateRange - -ts = Series(np.random.randn(N), index=rng) -""" - -large_dict_setup = """from .pandas_vb_common import * -from pandas.compat import range -n = 10 ** 6 -start_value = 10 ** 5 -to_rep = dict((i, start_value + i) for i in range(n)) -s = Series(np.random.randint(n, size=10 ** 3)) -""" - -replace_fillna = Benchmark('ts.fillna(0., inplace=True)', common_setup, - name='replace_fillna', - start_date=datetime(2012, 4, 4)) -replace_replacena = Benchmark('ts.replace(np.nan, 0., inplace=True)', - common_setup, - name='replace_replacena', - start_date=datetime(2012, 5, 15)) -replace_large_dict = Benchmark('s.replace(to_rep, inplace=True)', - large_dict_setup, - name='replace_large_dict', - start_date=datetime(2014, 4, 6)) diff --git a/vb_suite/reshape.py b/vb_suite/reshape.py deleted file mode 100644 index daab96103f2c5..0000000000000 --- a/vb_suite/reshape.py +++ /dev/null @@ -1,65 +0,0 @@ -from vbench.api import Benchmark -from datetime import datetime - -common_setup = """from .pandas_vb_common import * -index = MultiIndex.from_arrays([np.arange(100).repeat(100), - np.roll(np.tile(np.arange(100), 100), 25)]) -df = DataFrame(np.random.randn(10000, 4), index=index) -""" - -reshape_unstack_simple = Benchmark('df.unstack(1)', common_setup, - start_date=datetime(2011, 10, 1)) - -setup = common_setup + """ -udf = df.unstack(1) -""" - -reshape_stack_simple = Benchmark('udf.stack()', setup, - start_date=datetime(2011, 10, 1)) - -setup = common_setup + """ -def unpivot(frame): - N, K = frame.shape - data = {'value' : frame.values.ravel('F'), - 'variable' : np.asarray(frame.columns).repeat(N), - 'date' : np.tile(np.asarray(frame.index), K)} - return DataFrame(data, columns=['date', 'variable', 'value']) -index = date_range('1/1/2000', periods=10000, freq='h') -df = DataFrame(randn(10000, 50), index=index, columns=range(50)) -pdf = unpivot(df) -f = lambda: pdf.pivot('date', 'variable', 'value') -""" - -reshape_pivot_time_series = Benchmark('f()', setup, - start_date=datetime(2012, 5, 1)) - -# Sparse key space, re: #2278 - -setup = common_setup + """ -NUM_ROWS = 1000 -for iter in range(10): - df = DataFrame({'A' : np.random.randint(50, size=NUM_ROWS), - 'B' : np.random.randint(50, size=NUM_ROWS), - 'C' : np.random.randint(-10,10, size=NUM_ROWS), - 'D' : np.random.randint(-10,10, size=NUM_ROWS), - 'E' : np.random.randint(10, size=NUM_ROWS), - 'F' : np.random.randn(NUM_ROWS)}) - idf = df.set_index(['A', 'B', 'C', 'D', 'E']) - if len(idf.index.unique()) == NUM_ROWS: - break -""" - -unstack_sparse_keyspace = Benchmark('idf.unstack()', setup, - start_date=datetime(2011, 10, 1)) - -# Melt - -setup = common_setup + """ -from pandas.core.reshape import melt -df = DataFrame(np.random.randn(10000, 3), columns=['A', 'B', 'C']) -df['id1'] = np.random.randint(0, 10, 10000) -df['id2'] = np.random.randint(100, 1000, 10000) -""" - -melt_dataframe = Benchmark("melt(df, id_vars=['id1', 'id2'])", setup, - start_date=datetime(2012, 8, 1)) diff --git a/vb_suite/run_suite.py b/vb_suite/run_suite.py deleted file mode 100755 index 43bf24faae43a..0000000000000 --- a/vb_suite/run_suite.py +++ /dev/null @@ -1,15 +0,0 @@ -#!/usr/bin/env python -from vbench.api import BenchmarkRunner -from suite import * - - -def run_process(): - runner = BenchmarkRunner(benchmarks, REPO_PATH, REPO_URL, - BUILD, DB_PATH, TMP_DIR, PREPARE, - always_clean=True, - run_option='eod', start_date=START_DATE, - module_dependencies=dependencies) - runner.run() - -if __name__ == '__main__': - run_process() diff --git a/vb_suite/series_methods.py b/vb_suite/series_methods.py deleted file mode 100644 index c545f419c2dec..0000000000000 --- a/vb_suite/series_methods.py +++ /dev/null @@ -1,39 +0,0 @@ -from vbench.api import Benchmark -from datetime import datetime - -common_setup = """from .pandas_vb_common import * -""" - -setup = common_setup + """ -s1 = Series(np.random.randn(10000)) -s2 = Series(np.random.randint(1, 10, 10000)) -s3 = Series(np.random.randint(1, 10, 100000)).astype('int64') -values = [1,2] -s4 = s3.astype('object') -""" - -series_nlargest1 = Benchmark("s1.nlargest(3, keep='last');" - "s1.nlargest(3, keep='first')", - setup, - start_date=datetime(2014, 1, 25)) -series_nlargest2 = Benchmark("s2.nlargest(3, keep='last');" - "s2.nlargest(3, keep='first')", - setup, - start_date=datetime(2014, 1, 25)) - -series_nsmallest2 = Benchmark("s1.nsmallest(3, keep='last');" - "s1.nsmallest(3, keep='first')", - setup, - start_date=datetime(2014, 1, 25)) - -series_nsmallest2 = Benchmark("s2.nsmallest(3, keep='last');" - "s2.nsmallest(3, keep='first')", - setup, - start_date=datetime(2014, 1, 25)) - -series_isin_int64 = Benchmark('s3.isin(values)', - setup, - start_date=datetime(2014, 1, 25)) -series_isin_object = Benchmark('s4.isin(values)', - setup, - start_date=datetime(2014, 1, 25)) diff --git a/vb_suite/source/_static/stub b/vb_suite/source/_static/stub deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/vb_suite/source/conf.py b/vb_suite/source/conf.py deleted file mode 100644 index d83448fd97d09..0000000000000 --- a/vb_suite/source/conf.py +++ /dev/null @@ -1,225 +0,0 @@ -# -*- coding: utf-8 -*- -# -# pandas documentation build configuration file, created by -# -# This file is execfile()d with the current directory set to its containing dir. -# -# Note that not all possible configuration values are present in this -# autogenerated file. -# -# All configuration values have a default; values that are commented out -# serve to show the default. - -import sys -import os - -# If extensions (or modules to document with autodoc) are in another directory, -# add these directories to sys.path here. If the directory is relative to the -# documentation root, use os.path.abspath to make it absolute, like shown here. -# sys.path.append(os.path.abspath('.')) -sys.path.insert(0, os.path.abspath('../sphinxext')) - -sys.path.extend([ - - # numpy standard doc extensions - os.path.join(os.path.dirname(__file__), - '..', '../..', - 'sphinxext') - -]) - -# -- General configuration ----------------------------------------------- - -# Add any Sphinx extension module names here, as strings. They can be extensions -# coming with Sphinx (named 'sphinx.ext.*') or your custom ones. sphinxext. - -extensions = ['sphinx.ext.autodoc', - 'sphinx.ext.doctest'] - -# Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates', '_templates/autosummary'] - -# The suffix of source filenames. -source_suffix = '.rst' - -# The encoding of source files. -# source_encoding = 'utf-8' - -# The master toctree document. -master_doc = 'index' - -# General information about the project. -project = u'pandas' -copyright = u'2008-2011, the pandas development team' - -# The version info for the project you're documenting, acts as replacement for -# |version| and |release|, also used in various other places throughout the -# built documents. -# -# The short X.Y version. -import pandas - -# version = '%s r%s' % (pandas.__version__, svn_version()) -version = '%s' % (pandas.__version__) - -# The full version, including alpha/beta/rc tags. -release = version - -# JP: added from sphinxdocs -autosummary_generate = True - -# The language for content autogenerated by Sphinx. Refer to documentation -# for a list of supported languages. -# language = None - -# There are two options for replacing |today|: either, you set today to some -# non-false value, then it is used: -# today = '' -# Else, today_fmt is used as the format for a strftime call. -# today_fmt = '%B %d, %Y' - -# List of documents that shouldn't be included in the build. -# unused_docs = [] - -# List of directories, relative to source directory, that shouldn't be searched -# for source files. -exclude_trees = [] - -# The reST default role (used for this markup: `text`) to use for all documents. -# default_role = None - -# If true, '()' will be appended to :func: etc. cross-reference text. -# add_function_parentheses = True - -# If true, the current module name will be prepended to all description -# unit titles (such as .. function::). -# add_module_names = True - -# If true, sectionauthor and moduleauthor directives will be shown in the -# output. They are ignored by default. -# show_authors = False - -# The name of the Pygments (syntax highlighting) style to use. -pygments_style = 'sphinx' - -# A list of ignored prefixes for module index sorting. -# modindex_common_prefix = [] - - -# -- Options for HTML output --------------------------------------------- - -# The theme to use for HTML and HTML Help pages. Major themes that come with -# Sphinx are currently 'default' and 'sphinxdoc'. -html_theme = 'agogo' - -# The style sheet to use for HTML and HTML Help pages. A file of that name -# must exist either in Sphinx' static/ path, or in one of the custom paths -# given in html_static_path. -# html_style = 'statsmodels.css' - -# Theme options are theme-specific and customize the look and feel of a theme -# further. For a list of options available for each theme, see the -# documentation. -# html_theme_options = {} - -# Add any paths that contain custom themes here, relative to this directory. -html_theme_path = ['themes'] - -# The name for this set of Sphinx documents. If None, it defaults to -# " v documentation". -html_title = 'Vbench performance benchmarks for pandas' - -# A shorter title for the navigation bar. Default is the same as html_title. -# html_short_title = None - -# The name of an image file (relative to this directory) to place at the top -# of the sidebar. -# html_logo = None - -# The name of an image file (within the static path) to use as favicon of the -# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 -# pixels large. -# html_favicon = None - -# Add any paths that contain custom static files (such as style sheets) here, -# relative to this directory. They are copied after the builtin static files, -# so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] - -# If not '', a 'Last updated on:' timestamp is inserted at every page bottom, -# using the given strftime format. -# html_last_updated_fmt = '%b %d, %Y' - -# If true, SmartyPants will be used to convert quotes and dashes to -# typographically correct entities. -# html_use_smartypants = True - -# Custom sidebar templates, maps document names to template names. -# html_sidebars = {} - -# Additional templates that should be rendered to pages, maps page names to -# template names. -# html_additional_pages = {} - -# If false, no module index is generated. -html_use_modindex = True - -# If false, no index is generated. -# html_use_index = True - -# If true, the index is split into individual pages for each letter. -# html_split_index = False - -# If true, links to the reST sources are added to the pages. -# html_show_sourcelink = True - -# If true, an OpenSearch description file will be output, and all pages will -# contain a tag referring to it. The value of this option must be the -# base URL from which the finished HTML is served. -# html_use_opensearch = '' - -# If nonempty, this is the file name suffix for HTML files (e.g. ".xhtml"). -# html_file_suffix = '' - -# Output file base name for HTML help builder. -htmlhelp_basename = 'performance' - - -# -- Options for LaTeX output -------------------------------------------- - -# The paper size ('letter' or 'a4'). -# latex_paper_size = 'letter' - -# The font size ('10pt', '11pt' or '12pt'). -# latex_font_size = '10pt' - -# Grouping the document tree into LaTeX files. List of tuples -# (source start file, target name, title, author, documentclass [howto/manual]). -latex_documents = [ - ('index', 'performance.tex', - u'pandas vbench Performance Benchmarks', - u'Wes McKinney', 'manual'), -] - -# The name of an image file (relative to this directory) to place at the top of -# the title page. -# latex_logo = None - -# For "manual" documents, if this is true, then toplevel headings are parts, -# not chapters. -# latex_use_parts = False - -# Additional stuff for the LaTeX preamble. -# latex_preamble = '' - -# Documents to append as an appendix to all manuals. -# latex_appendices = [] - -# If false, no module index is generated. -# latex_use_modindex = True - - -# Example configuration for intersphinx: refer to the Python standard library. -# intersphinx_mapping = {'http://docs.scipy.org/': None} -import glob -autosummary_generate = glob.glob("*.rst") diff --git a/vb_suite/source/themes/agogo/layout.html b/vb_suite/source/themes/agogo/layout.html deleted file mode 100644 index cd0f3d7ffc9c7..0000000000000 --- a/vb_suite/source/themes/agogo/layout.html +++ /dev/null @@ -1,95 +0,0 @@ -{# - agogo/layout.html - ~~~~~~~~~~~~~~~~~ - - Sphinx layout template for the agogo theme, originally written - by Andi Albrecht. - - :copyright: Copyright 2007-2011 by the Sphinx team, see AUTHORS. - :license: BSD, see LICENSE for details. -#} -{% extends "basic/layout.html" %} - -{% block header %} -
-{% endblock %} - -{% block content %} -
-
- -
- {%- block document %} - {{ super() }} - {%- endblock %} -
-
-
-
-{% endblock %} - -{% block footer %} - -{% endblock %} - -{% block relbar1 %}{% endblock %} -{% block relbar2 %}{% endblock %} diff --git a/vb_suite/source/themes/agogo/static/agogo.css_t b/vb_suite/source/themes/agogo/static/agogo.css_t deleted file mode 100644 index ef909b72e20f6..0000000000000 --- a/vb_suite/source/themes/agogo/static/agogo.css_t +++ /dev/null @@ -1,476 +0,0 @@ -/* - * agogo.css_t - * ~~~~~~~~~~~ - * - * Sphinx stylesheet -- agogo theme. - * - * :copyright: Copyright 2007-2011 by the Sphinx team, see AUTHORS. - * :license: BSD, see LICENSE for details. - * - */ - -* { - margin: 0px; - padding: 0px; -} - -body { - font-family: {{ theme_bodyfont }}; - line-height: 1.4em; - color: black; - background-color: {{ theme_bgcolor }}; -} - - -/* Page layout */ - -div.header, div.content, div.footer { - max-width: {{ theme_pagewidth }}; - margin-left: auto; - margin-right: auto; -} - -div.header-wrapper { - background: {{ theme_headerbg }}; - padding: 1em 1em 0; - border-bottom: 3px solid #2e3436; - min-height: 0px; -} - - -/* Default body styles */ -a { - color: {{ theme_linkcolor }}; -} - -div.bodywrapper a, div.footer a { - text-decoration: underline; -} - -.clearer { - clear: both; -} - -.left { - float: left; -} - -.right { - float: right; -} - -.line-block { - display: block; - margin-top: 1em; - margin-bottom: 1em; -} - -.line-block .line-block { - margin-top: 0; - margin-bottom: 0; - margin-left: 1.5em; -} - -h1, h2, h3, h4 { - font-family: {{ theme_headerfont }}; - font-weight: normal; - color: {{ theme_headercolor2 }}; - margin-bottom: .8em; -} - -h1 { - color: {{ theme_headercolor1 }}; -} - -h2 { - padding-bottom: .5em; - border-bottom: 1px solid {{ theme_headercolor2 }}; -} - -a.headerlink { - visibility: hidden; - color: #dddddd; - padding-left: .3em; -} - -h1:hover > a.headerlink, -h2:hover > a.headerlink, -h3:hover > a.headerlink, -h4:hover > a.headerlink, -h5:hover > a.headerlink, -h6:hover > a.headerlink, -dt:hover > a.headerlink { - visibility: visible; -} - -img { - border: 0; -} - -pre { - background-color: #EEE; - padding: 0.5em; -} - -div.admonition { - margin-top: 10px; - margin-bottom: 10px; - padding: 2px 7px 1px 7px; - border-left: 0.2em solid black; -} - -p.admonition-title { - margin: 0px 10px 5px 0px; - font-weight: bold; -} - -dt:target, .highlighted { - background-color: #fbe54e; -} - -/* Header */ - -/* -div.header { - padding-top: 10px; - padding-bottom: 10px; -} -*/ - -div.header {} - -div.header h1 { - font-family: {{ theme_headerfont }}; - font-weight: normal; - font-size: 180%; - letter-spacing: .08em; -} - -div.header h1 a { - color: white; -} - -div.header div.rel { - text-decoration: none; -} -/* margin-top: 1em; */ - -div.header div.rel a { - margin-top: 1em; - color: {{ theme_headerlinkcolor }}; - letter-spacing: .1em; - text-transform: uppercase; - padding: 3px 1em; -} - -p.logo { - float: right; -} - -img.logo { - border: 0; -} - - -/* Content */ -div.content-wrapper { - background-color: white; - padding: 1em; -} -/* - padding-top: 20px; - padding-bottom: 20px; -*/ - -/* float: left; */ - -div.document { - max-width: {{ theme_documentwidth }}; -} - -div.body { - padding-right: 2em; - text-align: {{ theme_textalign }}; -} - -div.document ul { - margin: 1.5em; - list-style-type: square; -} - -div.document dd { - margin-left: 1.2em; - margin-top: .4em; - margin-bottom: 1em; -} - -div.document .section { - margin-top: 1.7em; -} -div.document .section:first-child { - margin-top: 0px; -} - -div.document div.highlight { - padding: 3px; - background-color: #eeeeec; - border-top: 2px solid #dddddd; - border-bottom: 2px solid #dddddd; - margin-top: .8em; - margin-bottom: .8em; -} - -div.document h2 { - margin-top: .7em; -} - -div.document p { - margin-bottom: .5em; -} - -div.document li.toctree-l1 { - margin-bottom: 1em; -} - -div.document .descname { - font-weight: bold; -} - -div.document .docutils.literal { - background-color: #eeeeec; - padding: 1px; -} - -div.document .docutils.xref.literal { - background-color: transparent; - padding: 0px; -} - -div.document blockquote { - margin: 1em; -} - -div.document ol { - margin: 1.5em; -} - - -/* Sidebar */ - - -div.sidebar { - width: {{ theme_sidebarwidth }}; - padding: 0 1em; - float: right; - font-size: .93em; -} - -div.sidebar a, div.header a { - text-decoration: none; -} - -div.sidebar a:hover, div.header a:hover { - text-decoration: underline; -} - -div.sidebar h3 { - color: #2e3436; - text-transform: uppercase; - font-size: 130%; - letter-spacing: .1em; -} - -div.sidebar ul { - list-style-type: none; -} - -div.sidebar li.toctree-l1 a { - display: block; - padding: 1px; - border: 1px solid #dddddd; - background-color: #eeeeec; - margin-bottom: .4em; - padding-left: 3px; - color: #2e3436; -} - -div.sidebar li.toctree-l2 a { - background-color: transparent; - border: none; - margin-left: 1em; - border-bottom: 1px solid #dddddd; -} - -div.sidebar li.toctree-l3 a { - background-color: transparent; - border: none; - margin-left: 2em; - border-bottom: 1px solid #dddddd; -} - -div.sidebar li.toctree-l2:last-child a { - border-bottom: none; -} - -div.sidebar li.toctree-l1.current a { - border-right: 5px solid {{ theme_headerlinkcolor }}; -} - -div.sidebar li.toctree-l1.current li.toctree-l2 a { - border-right: none; -} - - -/* Footer */ - -div.footer-wrapper { - background: {{ theme_footerbg }}; - border-top: 4px solid #babdb6; - padding-top: 10px; - padding-bottom: 10px; - min-height: 80px; -} - -div.footer, div.footer a { - color: #888a85; -} - -div.footer .right { - text-align: right; -} - -div.footer .left { - text-transform: uppercase; -} - - -/* Styles copied from basic theme */ - -img.align-left, .figure.align-left, object.align-left { - clear: left; - float: left; - margin-right: 1em; -} - -img.align-right, .figure.align-right, object.align-right { - clear: right; - float: right; - margin-left: 1em; -} - -img.align-center, .figure.align-center, object.align-center { - display: block; - margin-left: auto; - margin-right: auto; -} - -.align-left { - text-align: left; -} - -.align-center { - clear: both; - text-align: center; -} - -.align-right { - text-align: right; -} - -/* -- search page ----------------------------------------------------------- */ - -ul.search { - margin: 10px 0 0 20px; - padding: 0; -} - -ul.search li { - padding: 5px 0 5px 20px; - background-image: url(file.png); - background-repeat: no-repeat; - background-position: 0 7px; -} - -ul.search li a { - font-weight: bold; -} - -ul.search li div.context { - color: #888; - margin: 2px 0 0 30px; - text-align: left; -} - -ul.keywordmatches li.goodmatch a { - font-weight: bold; -} - -/* -- index page ------------------------------------------------------------ */ - -table.contentstable { - width: 90%; -} - -table.contentstable p.biglink { - line-height: 150%; -} - -a.biglink { - font-size: 1.3em; -} - -span.linkdescr { - font-style: italic; - padding-top: 5px; - font-size: 90%; -} - -/* -- general index --------------------------------------------------------- */ - -table.indextable td { - text-align: left; - vertical-align: top; -} - -table.indextable dl, table.indextable dd { - margin-top: 0; - margin-bottom: 0; -} - -table.indextable tr.pcap { - height: 10px; -} - -table.indextable tr.cap { - margin-top: 10px; - background-color: #f2f2f2; -} - -img.toggler { - margin-right: 3px; - margin-top: 3px; - cursor: pointer; -} - -/* -- viewcode extension ---------------------------------------------------- */ - -.viewcode-link { - float: right; -} - -.viewcode-back { - float: right; - font-family:: {{ theme_bodyfont }}; -} - -div.viewcode-block:target { - margin: -1px -3px; - padding: 0 3px; - background-color: #f4debf; - border-top: 1px solid #ac9; - border-bottom: 1px solid #ac9; -} - -th.field-name { - white-space: nowrap; -} diff --git a/vb_suite/source/themes/agogo/static/bgfooter.png b/vb_suite/source/themes/agogo/static/bgfooter.png deleted file mode 100644 index 9ce5bdd902943fdf8b0c0ca6a545297e1e2cc665..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 434 zcmV;j0ZsmiP)Px#24YJ`L;%wO*8tD73qoQ5000SaNLh0L01FcU01FcV0GgZ_00007bV*G`2iXD> z2Q(2CT#42I000?uMObu0Z*6U5Zgc=ca%Ew3Wn>_CX>@2HM@dakSAh-}0003ENklR?sq9~H`=l5UI-{JW_f9!)=Hwush3JC}Y z1gFM&r>$lJNPt^*1k!w;l|obx>lr$2IOaI$n=(gBBaj^I0=y%@K5N&GIU&-%OE_~V zX=m=_j7d`hvubQRuF+xT63vIfWnC3%kKN*T3l7ob3nEC2R->wU1Y)4)(7_t^thiqb zj$CO7xBn9gg`*!MY$}SI|_*)!a*&V0w7h>cUb&$Grh37iJ=C%Yn c>}w1E0Z4f>1OEiDlmGw#07*qoM6N<$g4BwtIsgCw diff --git a/vb_suite/source/themes/agogo/static/bgtop.png b/vb_suite/source/themes/agogo/static/bgtop.png deleted file mode 100644 index a0d4709bac8f79943a817195c086461c8c4d5419..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 430 zcmV;f0a5;mP)Px#24YJ`L;zI)R{&FzA;Z4_000SaNLh0L01FcU01FcV0GgZ_00007bV*G`2iXD> z2Q3AZhV-)l000?uMObu0Z*6U5Zgc=ca%Ew3Wn>_CX>@2HM@dakSAh-}0003ANklMo8vqN`cM=KwSQV|n zk}naE+VzlN;kK@Ej${PSkI$-R6-Yfp`zA;^O$`)7`gRi{-0i?owGIbX{p>Nc##93U z;sA|ayOYkG%F9M0iEMUM*s3NDYSS=KN2ht8Rv|7nv77i{NTO47R)}V_+2H~mL-nTR z_8j}*%6Qm8?#7NU2kM$#gcP&kO?iw|n}ynz+r-~FA9nKcZnfixWvZ&d28Cc_6&_Pe zMpbjI>9r+<=}NIDz4mCd3U++H?rrHcYxH&eeB|)>mnv*N#44ILM2zL6yU!VVWSrgp Y0Yu&#qm)=by8r+H07*qoM6N<$f@HC)j{pDw diff --git a/vb_suite/source/themes/agogo/theme.conf b/vb_suite/source/themes/agogo/theme.conf deleted file mode 100644 index 3fc88580f1ab4..0000000000000 --- a/vb_suite/source/themes/agogo/theme.conf +++ /dev/null @@ -1,19 +0,0 @@ -[theme] -inherit = basic -stylesheet = agogo.css -pygments_style = tango - -[options] -bodyfont = "Verdana", Arial, sans-serif -headerfont = "Georgia", "Times New Roman", serif -pagewidth = 70em -documentwidth = 50em -sidebarwidth = 20em -bgcolor = #eeeeec -headerbg = url(bgtop.png) top left repeat-x -footerbg = url(bgfooter.png) top left repeat-x -linkcolor = #ce5c00 -headercolor1 = #204a87 -headercolor2 = #3465a4 -headerlinkcolor = #fcaf3e -textalign = justify \ No newline at end of file diff --git a/vb_suite/sparse.py b/vb_suite/sparse.py deleted file mode 100644 index b1c1a2f24e41d..0000000000000 --- a/vb_suite/sparse.py +++ /dev/null @@ -1,65 +0,0 @@ -from vbench.benchmark import Benchmark -from datetime import datetime - -common_setup = """from .pandas_vb_common import * -""" - -#---------------------------------------------------------------------- - -setup = common_setup + """ -from pandas.core.sparse import SparseSeries, SparseDataFrame - -K = 50 -N = 50000 -rng = np.asarray(date_range('1/1/2000', periods=N, - freq='T')) - -# rng2 = np.asarray(rng).astype('M8[ns]').astype('i8') - -series = {} -for i in range(1, K + 1): - data = np.random.randn(N)[:-i] - this_rng = rng[:-i] - data[100:] = np.nan - series[i] = SparseSeries(data, index=this_rng) -""" -stmt = "SparseDataFrame(series)" - -bm_sparse1 = Benchmark(stmt, setup, name="sparse_series_to_frame", - start_date=datetime(2011, 6, 1)) - - -setup = common_setup + """ -from pandas.core.sparse import SparseDataFrame -""" - -stmt = "SparseDataFrame(columns=np.arange(100), index=np.arange(1000))" - -sparse_constructor = Benchmark(stmt, setup, name="sparse_frame_constructor", - start_date=datetime(2012, 6, 1)) - - -setup = common_setup + """ -s = pd.Series([np.nan] * 10000) -s[0] = 3.0 -s[100] = -1.0 -s[999] = 12.1 -s.index = pd.MultiIndex.from_product((range(10), range(10), range(10), range(10))) -ss = s.to_sparse() -""" - -stmt = "ss.to_coo(row_levels=[0, 1], column_levels=[2, 3], sort_labels=True)" - -sparse_series_to_coo = Benchmark(stmt, setup, name="sparse_series_to_coo", - start_date=datetime(2015, 1, 3)) - -setup = common_setup + """ -import scipy.sparse -import pandas.core.sparse.series -A = scipy.sparse.coo_matrix(([3.0, 1.0, 2.0], ([1, 0, 0], [0, 2, 3])), shape=(100, 100)) -""" - -stmt = "ss = pandas.core.sparse.series.SparseSeries.from_coo(A)" - -sparse_series_from_coo = Benchmark(stmt, setup, name="sparse_series_from_coo", - start_date=datetime(2015, 1, 3)) diff --git a/vb_suite/stat_ops.py b/vb_suite/stat_ops.py deleted file mode 100644 index 8d7c30dc9fdcf..0000000000000 --- a/vb_suite/stat_ops.py +++ /dev/null @@ -1,126 +0,0 @@ -from vbench.benchmark import Benchmark -from datetime import datetime - -common_setup = """from .pandas_vb_common import * -""" - -#---------------------------------------------------------------------- -# nanops - -setup = common_setup + """ -s = Series(np.random.randn(100000), index=np.arange(100000)) -s[::2] = np.nan -""" - -stat_ops_series_std = Benchmark("s.std()", setup) - -#---------------------------------------------------------------------- -# ops by level - -setup = common_setup + """ -index = MultiIndex(levels=[np.arange(10), np.arange(100), np.arange(100)], - labels=[np.arange(10).repeat(10000), - np.tile(np.arange(100).repeat(100), 10), - np.tile(np.tile(np.arange(100), 100), 10)]) -random.shuffle(index.values) -df = DataFrame(np.random.randn(len(index), 4), index=index) -df_level = DataFrame(np.random.randn(100, 4), index=index.levels[1]) -""" - -stat_ops_level_frame_sum = \ - Benchmark("df.sum(level=1)", setup, - start_date=datetime(2011, 11, 15)) - -stat_ops_level_frame_sum_multiple = \ - Benchmark("df.sum(level=[0, 1])", setup, repeat=1, - start_date=datetime(2011, 11, 15)) - -stat_ops_level_series_sum = \ - Benchmark("df[1].sum(level=1)", setup, - start_date=datetime(2011, 11, 15)) - -stat_ops_level_series_sum_multiple = \ - Benchmark("df[1].sum(level=[0, 1])", setup, repeat=1, - start_date=datetime(2011, 11, 15)) - -sum_setup = common_setup + """ -df = DataFrame(np.random.randn(100000, 4)) -dfi = DataFrame(np.random.randint(1000, size=df.shape)) -""" - -stat_ops_frame_sum_int_axis_0 = \ - Benchmark("dfi.sum()", sum_setup, start_date=datetime(2013, 7, 25)) - -stat_ops_frame_sum_float_axis_0 = \ - Benchmark("df.sum()", sum_setup, start_date=datetime(2013, 7, 25)) - -stat_ops_frame_mean_int_axis_0 = \ - Benchmark("dfi.mean()", sum_setup, start_date=datetime(2013, 7, 25)) - -stat_ops_frame_mean_float_axis_0 = \ - Benchmark("df.mean()", sum_setup, start_date=datetime(2013, 7, 25)) - -stat_ops_frame_sum_int_axis_1 = \ - Benchmark("dfi.sum(1)", sum_setup, start_date=datetime(2013, 7, 25)) - -stat_ops_frame_sum_float_axis_1 = \ - Benchmark("df.sum(1)", sum_setup, start_date=datetime(2013, 7, 25)) - -stat_ops_frame_mean_int_axis_1 = \ - Benchmark("dfi.mean(1)", sum_setup, start_date=datetime(2013, 7, 25)) - -stat_ops_frame_mean_float_axis_1 = \ - Benchmark("df.mean(1)", sum_setup, start_date=datetime(2013, 7, 25)) - -#---------------------------------------------------------------------- -# rank - -setup = common_setup + """ -values = np.concatenate([np.arange(100000), - np.random.randn(100000), - np.arange(100000)]) -s = Series(values) -""" - -stats_rank_average = Benchmark('s.rank()', setup, - start_date=datetime(2011, 12, 12)) - -stats_rank_pct_average = Benchmark('s.rank(pct=True)', setup, - start_date=datetime(2014, 1, 16)) -stats_rank_pct_average_old = Benchmark('s.rank() / len(s)', setup, - start_date=datetime(2014, 1, 16)) -setup = common_setup + """ -values = np.random.randint(0, 100000, size=200000) -s = Series(values) -""" - -stats_rank_average_int = Benchmark('s.rank()', setup, - start_date=datetime(2011, 12, 12)) - -setup = common_setup + """ -df = DataFrame(np.random.randn(5000, 50)) -""" - -stats_rank2d_axis1_average = Benchmark('df.rank(1)', setup, - start_date=datetime(2011, 12, 12)) - -stats_rank2d_axis0_average = Benchmark('df.rank()', setup, - start_date=datetime(2011, 12, 12)) - -# rolling functions - -setup = common_setup + """ -arr = np.random.randn(100000) -""" - -stats_rolling_mean = Benchmark('rolling_mean(arr, 100)', setup, - start_date=datetime(2011, 6, 1)) - -# spearman correlation - -setup = common_setup + """ -df = DataFrame(np.random.randn(1000, 30)) -""" - -stats_corr_spearman = Benchmark("df.corr(method='spearman')", setup, - start_date=datetime(2011, 12, 4)) diff --git a/vb_suite/strings.py b/vb_suite/strings.py deleted file mode 100644 index 0948df5673a0d..0000000000000 --- a/vb_suite/strings.py +++ /dev/null @@ -1,59 +0,0 @@ -from vbench.api import Benchmark - -common_setup = """from .pandas_vb_common import * -""" - -setup = common_setup + """ -import string -import itertools as IT - -def make_series(letters, strlen, size): - return Series( - [str(x) for x in np.fromiter(IT.cycle(letters), count=size*strlen, dtype='|S1') - .view('|S{}'.format(strlen))]) - -many = make_series('matchthis'+string.ascii_uppercase, strlen=19, size=10000) # 31% matches -few = make_series('matchthis'+string.ascii_uppercase*42, strlen=19, size=10000) # 1% matches -""" - -strings_cat = Benchmark("many.str.cat(sep=',')", setup) -strings_title = Benchmark("many.str.title()", setup) -strings_count = Benchmark("many.str.count('matchthis')", setup) -strings_contains_many = Benchmark("many.str.contains('matchthis')", setup) -strings_contains_few = Benchmark("few.str.contains('matchthis')", setup) -strings_contains_many_noregex = Benchmark( - "many.str.contains('matchthis', regex=False)", setup) -strings_contains_few_noregex = Benchmark( - "few.str.contains('matchthis', regex=False)", setup) -strings_startswith = Benchmark("many.str.startswith('matchthis')", setup) -strings_endswith = Benchmark("many.str.endswith('matchthis')", setup) -strings_lower = Benchmark("many.str.lower()", setup) -strings_upper = Benchmark("many.str.upper()", setup) -strings_replace = Benchmark("many.str.replace(r'(matchthis)', r'\1\1')", setup) -strings_repeat = Benchmark( - "many.str.repeat(list(IT.islice(IT.cycle(range(1,4)),len(many))))", setup) -strings_match = Benchmark("many.str.match(r'mat..this')", setup) -strings_extract = Benchmark("many.str.extract(r'(\w*)matchthis(\w*)')", setup) -strings_join_split = Benchmark("many.str.join(r'--').str.split('--')", setup) -strings_join_split_expand = Benchmark("many.str.join(r'--').str.split('--',expand=True)", setup) -strings_len = Benchmark("many.str.len()", setup) -strings_findall = Benchmark("many.str.findall(r'[A-Z]+')", setup) -strings_pad = Benchmark("many.str.pad(100, side='both')", setup) -strings_center = Benchmark("many.str.center(100)", setup) -strings_slice = Benchmark("many.str.slice(5,15,2)", setup) -strings_strip = Benchmark("many.str.strip('matchthis')", setup) -strings_lstrip = Benchmark("many.str.lstrip('matchthis')", setup) -strings_rstrip = Benchmark("many.str.rstrip('matchthis')", setup) -strings_get = Benchmark("many.str.get(0)", setup) - -setup = setup + """ -s = make_series(string.ascii_uppercase, strlen=10, size=10000).str.join('|') -""" -strings_get_dummies = Benchmark("s.str.get_dummies('|')", setup) - -setup = common_setup + """ -import pandas.util.testing as testing -ser = Series(testing.makeUnicodeIndex()) -""" - -strings_encode_decode = Benchmark("ser.str.encode('utf-8').str.decode('utf-8')", setup) diff --git a/vb_suite/suite.py b/vb_suite/suite.py deleted file mode 100644 index 45053b6610896..0000000000000 --- a/vb_suite/suite.py +++ /dev/null @@ -1,164 +0,0 @@ -from vbench.api import Benchmark, GitRepo -from datetime import datetime - -import os - -modules = ['attrs_caching', - 'binary_ops', - 'ctors', - 'frame_ctor', - 'frame_methods', - 'groupby', - 'index_object', - 'indexing', - 'io_bench', - 'io_sql', - 'inference', - 'hdfstore_bench', - 'join_merge', - 'gil', - 'miscellaneous', - 'panel_ctor', - 'packers', - 'parser_vb', - 'panel_methods', - 'plotting', - 'reindex', - 'replace', - 'sparse', - 'strings', - 'reshape', - 'stat_ops', - 'timeseries', - 'timedelta', - 'eval'] - -by_module = {} -benchmarks = [] - -for modname in modules: - ref = __import__(modname) - by_module[modname] = [v for v in ref.__dict__.values() - if isinstance(v, Benchmark)] - benchmarks.extend(by_module[modname]) - -for bm in benchmarks: - assert(bm.name is not None) - -import getpass -import sys - -USERNAME = getpass.getuser() - -if sys.platform == 'darwin': - HOME = '/Users/%s' % USERNAME -else: - HOME = '/home/%s' % USERNAME - -try: - import ConfigParser - - config = ConfigParser.ConfigParser() - config.readfp(open(os.path.expanduser('~/.vbenchcfg'))) - - REPO_PATH = config.get('setup', 'repo_path') - REPO_URL = config.get('setup', 'repo_url') - DB_PATH = config.get('setup', 'db_path') - TMP_DIR = config.get('setup', 'tmp_dir') -except: - REPO_PATH = os.path.abspath(os.path.join(os.path.dirname(__file__), "../")) - REPO_URL = 'git@github.com:pandas-dev/pandas.git' - DB_PATH = os.path.join(REPO_PATH, 'vb_suite/benchmarks.db') - TMP_DIR = os.path.join(HOME, 'tmp/vb_pandas') - -PREPARE = """ -python setup.py clean -""" -BUILD = """ -python setup.py build_ext --inplace -""" -dependencies = ['pandas_vb_common.py'] - -START_DATE = datetime(2010, 6, 1) - -# repo = GitRepo(REPO_PATH) - -RST_BASE = 'source' - -# HACK! - -# timespan = [datetime(2011, 1, 1), datetime(2012, 1, 1)] - - -def generate_rst_files(benchmarks): - import matplotlib as mpl - mpl.use('Agg') - import matplotlib.pyplot as plt - - vb_path = os.path.join(RST_BASE, 'vbench') - fig_base_path = os.path.join(vb_path, 'figures') - - if not os.path.exists(vb_path): - print('creating %s' % vb_path) - os.makedirs(vb_path) - - if not os.path.exists(fig_base_path): - print('creating %s' % fig_base_path) - os.makedirs(fig_base_path) - - for bmk in benchmarks: - print('Generating rst file for %s' % bmk.name) - rst_path = os.path.join(RST_BASE, 'vbench/%s.txt' % bmk.name) - - fig_full_path = os.path.join(fig_base_path, '%s.png' % bmk.name) - - # make the figure - plt.figure(figsize=(10, 6)) - ax = plt.gca() - bmk.plot(DB_PATH, ax=ax) - - start, end = ax.get_xlim() - - plt.xlim([start - 30, end + 30]) - plt.savefig(fig_full_path, bbox_inches='tight') - plt.close('all') - - fig_rel_path = 'vbench/figures/%s.png' % bmk.name - rst_text = bmk.to_rst(image_path=fig_rel_path) - with open(rst_path, 'w') as f: - f.write(rst_text) - - with open(os.path.join(RST_BASE, 'index.rst'), 'w') as f: - print >> f, """ -Performance Benchmarks -====================== - -These historical benchmark graphs were produced with `vbench -`__. - -The ``.pandas_vb_common`` setup script can be found here_ - -.. _here: https://github.com/pandas-dev/pandas/tree/master/vb_suite - -Produced on a machine with - - - Intel Core i7 950 processor - - (K)ubuntu Linux 12.10 - - Python 2.7.2 64-bit (Enthought Python Distribution 7.1-2) - - NumPy 1.6.1 - -.. toctree:: - :hidden: - :maxdepth: 3 -""" - for modname, mod_bmks in sorted(by_module.items()): - print >> f, ' vb_%s' % modname - modpath = os.path.join(RST_BASE, 'vb_%s.rst' % modname) - with open(modpath, 'w') as mh: - header = '%s\n%s\n\n' % (modname, '=' * len(modname)) - print >> mh, header - - for bmk in mod_bmks: - print >> mh, bmk.name - print >> mh, '-' * len(bmk.name) - print >> mh, '.. include:: vbench/%s.txt\n' % bmk.name diff --git a/vb_suite/test.py b/vb_suite/test.py deleted file mode 100644 index da30c3e1a5f76..0000000000000 --- a/vb_suite/test.py +++ /dev/null @@ -1,67 +0,0 @@ -from pandas import * -import matplotlib.pyplot as plt - -import sqlite3 - -from vbench.git import GitRepo - - -REPO_PATH = '/home/adam/code/pandas' -repo = GitRepo(REPO_PATH) - -con = sqlite3.connect('vb_suite/benchmarks.db') - -bmk = '36900a889961162138c140ce4ae3c205' -# bmk = '9d7b8c04b532df6c2d55ef497039b0ce' -bmk = '4481aa4efa9926683002a673d2ed3dac' -bmk = '00593cd8c03d769669d7b46585161726' -bmk = '3725ab7cd0a0657d7ae70f171c877cea' -bmk = '3cd376d6d6ef802cdea49ac47a67be21' -bmk2 = '459225186023853494bc345fd180f395' -bmk = 'c22ca82e0cfba8dc42595103113c7da3' -bmk = 'e0e651a8e9fbf0270ab68137f8b9df5f' -bmk = '96bda4b9a60e17acf92a243580f2a0c3' - - -def get_results(bmk): - results = con.execute( - "select * from results where checksum='%s'" % bmk).fetchall() - x = Series(dict((t[1], t[3]) for t in results)) - x.index = x.index.map(repo.timestamps.get) - x = x.sort_index() - return x - -x = get_results(bmk) - - -def graph1(): - dm_getitem = get_results('459225186023853494bc345fd180f395') - dm_getvalue = get_results('c22ca82e0cfba8dc42595103113c7da3') - - plt.figure() - ax = plt.gca() - - dm_getitem.plot(label='df[col][idx]', ax=ax) - dm_getvalue.plot(label='df.get_value(idx, col)', ax=ax) - - plt.ylabel('ms') - plt.legend(loc='best') - - -def graph2(): - bm = get_results('96bda4b9a60e17acf92a243580f2a0c3') - plt.figure() - ax = plt.gca() - - bm.plot(ax=ax) - plt.ylabel('ms') - -bm = get_results('36900a889961162138c140ce4ae3c205') -fig = plt.figure() -ax = plt.gca() -bm.plot(ax=ax) -fig.autofmt_xdate() - -plt.xlim([bm.dropna().index[0] - datetools.MonthEnd(), - bm.dropna().index[-1] + datetools.MonthEnd()]) -plt.ylabel('ms') diff --git a/vb_suite/test_perf.py b/vb_suite/test_perf.py deleted file mode 100755 index be546b72f9465..0000000000000 --- a/vb_suite/test_perf.py +++ /dev/null @@ -1,616 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -""" -What ----- -vbench is a library which can be used to benchmark the performance -of a codebase over time. -Although vbench can collect data over many commites, generate plots -and other niceties, for Pull-Requests the important thing is the -performance of the HEAD commit against a known-good baseline. - -This script tries to automate the process of comparing these -two commits, and is meant to run out of the box on a fresh -clone. - -How ---- -These are the steps taken: -1) create a temp directory into which vbench will clone the temporary repo. -2) instantiate a vbench runner, using the local repo as the source repo. -3) perform a vbench run for the baseline commit, then the target commit. -4) pull the results for both commits from the db. use pandas to align -everything and calculate a ration for the timing information. -5) print the results to the log file and to stdout. - -""" - -# IMPORTANT NOTE -# -# This script should run on pandas versions at least as far back as 0.9.1. -# devs should be able to use the latest version of this script with -# any dusty old commit and expect it to "just work". -# One way in which this is useful is when collecting historical data, -# where writing some logic around this script may prove easier -# in some cases then running vbench directly (think perf bisection). -# -# *please*, when you modify this script for whatever reason, -# make sure you do not break its functionality when running under older -# pandas versions. -# Note that depreaction warnings are turned off in main(), so there's -# no need to change the actual code to supress such warnings. - -import shutil -import os -import sys -import argparse -import tempfile -import time -import re - -import random -import numpy as np - -import pandas as pd -from pandas import DataFrame, Series - -from suite import REPO_PATH -VB_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) -DEFAULT_MIN_DURATION = 0.01 -HEAD_COL="head[ms]" -BASE_COL="base[ms]" - -try: - import git # gitpython -except Exception: - print("Error: Please install the `gitpython` package\n") - sys.exit(1) - -class RevParseAction(argparse.Action): - def __call__(self, parser, namespace, values, option_string=None): - import subprocess - cmd = 'git rev-parse --short -verify {0}^{{commit}}'.format(values) - rev_parse = subprocess.check_output(cmd, shell=True) - setattr(namespace, self.dest, rev_parse.strip()) - - -parser = argparse.ArgumentParser(description='Use vbench to measure and compare the performance of commits.') -parser.add_argument('-H', '--head', - help='Execute vbenches using the currently checked out copy.', - dest='head', - action='store_true', - default=False) -parser.add_argument('-b', '--base-commit', - help='The commit serving as performance baseline ', - type=str, action=RevParseAction) -parser.add_argument('-t', '--target-commit', - help='The commit to compare against the baseline (default: HEAD).', - type=str, action=RevParseAction) -parser.add_argument('--base-pickle', - help='name of pickle file with timings data generated by a former `-H -d FILE` run. '\ - 'filename must be of the form -*.* or specify --base-commit seperately', - type=str) -parser.add_argument('--target-pickle', - help='name of pickle file with timings data generated by a former `-H -d FILE` run '\ - 'filename must be of the form -*.* or specify --target-commit seperately', - type=str) -parser.add_argument('-m', '--min-duration', - help='Minimum duration (in ms) of baseline test for inclusion in report (default: %.3f).' % DEFAULT_MIN_DURATION, - type=float, - default=0.01) -parser.add_argument('-o', '--output', - metavar="", - dest='log_file', - help='Path of file in which to save the textual report (default: vb_suite.log).') -parser.add_argument('-d', '--outdf', - metavar="FNAME", - dest='outdf', - default=None, - help='Name of file to df.save() the result table into. Will overwrite') -parser.add_argument('-r', '--regex', - metavar="REGEX", - dest='regex', - default="", - help='Regex pat, only tests whose name matches the regext will be run.') -parser.add_argument('-s', '--seed', - metavar="SEED", - dest='seed', - default=1234, - type=int, - help='Integer value to seed PRNG with') -parser.add_argument('-n', '--repeats', - metavar="N", - dest='repeats', - default=3, - type=int, - help='Number of times to run each vbench, result value is the best of') -parser.add_argument('-c', '--ncalls', - metavar="N", - dest='ncalls', - default=3, - type=int, - help='Number of calls to in each repetition of a vbench') -parser.add_argument('-N', '--hrepeats', - metavar="N", - dest='hrepeats', - default=1, - type=int, - help='implies -H, number of times to run the vbench suite on the head commit.\n' - 'Each iteration will yield another column in the output' ) -parser.add_argument('-a', '--affinity', - metavar="a", - dest='affinity', - default=1, - type=int, - help='set processor affinity of process by default bind to cpu/core #1 only. ' - 'Requires the "affinity" or "psutil" python module, will raise Warning otherwise') -parser.add_argument('-u', '--burnin', - metavar="u", - dest='burnin', - default=1, - type=int, - help='Number of extra iteration per benchmark to perform first, then throw away. ' ) - -parser.add_argument('-S', '--stats', - default=False, - action='store_true', - help='when specified with -N, prints the output of describe() per vbench results. ' ) - -parser.add_argument('--temp-dir', - metavar="PATH", - default=None, - help='Specify temp work dir to use. ccache depends on builds being invoked from consistent directory.' ) - -parser.add_argument('-q', '--quiet', - default=False, - action='store_true', - help='Suppress report output to stdout. ' ) - -def get_results_df(db, rev): - """Takes a git commit hash and returns a Dataframe of benchmark results - """ - bench = DataFrame(db.get_benchmarks()) - results = DataFrame(map(list,db.get_rev_results(rev).values())) - - # Sinch vbench.db._reg_rev_results returns an unlabeled dict, - # we have to break encapsulation a bit. - results.columns = db._results.c.keys() - results = results.join(bench['name'], on='checksum').set_index("checksum") - return results - - -def prprint(s): - print("*** %s" % s) - -def pre_hook(): - import gc - gc.disable() - -def post_hook(): - import gc - gc.enable() - -def profile_comparative(benchmarks): - - from vbench.api import BenchmarkRunner - from vbench.db import BenchmarkDB - from vbench.git import GitRepo - from suite import BUILD, DB_PATH, PREPARE, dependencies - - TMP_DIR = args.temp_dir or tempfile.mkdtemp() - - try: - - prprint("Opening DB at '%s'...\n" % DB_PATH) - db = BenchmarkDB(DB_PATH) - - prprint("Initializing Runner...") - - # all in a good cause... - GitRepo._parse_commit_log = _parse_wrapper(args.base_commit) - - runner = BenchmarkRunner( - benchmarks, REPO_PATH, REPO_PATH, BUILD, DB_PATH, - TMP_DIR, PREPARE, always_clean=True, - # run_option='eod', start_date=START_DATE, - module_dependencies=dependencies) - - repo = runner.repo # (steal the parsed git repo used by runner) - h_head = args.target_commit or repo.shas[-1] - h_baseline = args.base_commit - - # ARGH. reparse the repo, without discarding any commits, - # then overwrite the previous parse results - # prprint("Slaughtering kittens...") - (repo.shas, repo.messages, - repo.timestamps, repo.authors) = _parse_commit_log(None,REPO_PATH, - args.base_commit) - - prprint('Target [%s] : %s\n' % (h_head, repo.messages.get(h_head, ""))) - prprint('Baseline [%s] : %s\n' % (h_baseline, - repo.messages.get(h_baseline, ""))) - - prprint("Removing any previous measurements for the commits.") - db.delete_rev_results(h_baseline) - db.delete_rev_results(h_head) - - # TODO: we could skip this, but we need to make sure all - # results are in the DB, which is a little tricky with - # start dates and so on. - prprint("Running benchmarks for baseline [%s]" % h_baseline) - runner._run_and_write_results(h_baseline) - - prprint("Running benchmarks for target [%s]" % h_head) - runner._run_and_write_results(h_head) - - prprint('Processing results...') - - head_res = get_results_df(db, h_head) - baseline_res = get_results_df(db, h_baseline) - - report_comparative(head_res,baseline_res) - - finally: - # print("Disposing of TMP_DIR: %s" % TMP_DIR) - shutil.rmtree(TMP_DIR) - -def prep_pickle_for_total(df, agg_name='median'): - """ - accepts a datafram resulting from invocation with -H -d o.pickle - If multiple data columns are present (-N was used), the - `agg_name` attr of the datafram will be used to reduce - them to a single value per vbench, df.median is used by defa - ult. - - Returns a datadrame of the form expected by prep_totals - """ - def prep(df): - agg = getattr(df,agg_name) - df = DataFrame(agg(1)) - cols = list(df.columns) - cols[0]='timing' - df.columns=cols - df['name'] = list(df.index) - return df - - return prep(df) - -def prep_totals(head_res, baseline_res): - """ - Each argument should be a dataframe with 'timing' and 'name' columns - where name is the name of the vbench. - - returns a 'totals' dataframe, suitable as input for print_report. - """ - head_res, baseline_res = head_res.align(baseline_res) - ratio = head_res['timing'] / baseline_res['timing'] - totals = DataFrame({HEAD_COL:head_res['timing'], - BASE_COL:baseline_res['timing'], - 'ratio':ratio, - 'name':baseline_res.name}, - columns=[HEAD_COL, BASE_COL, "ratio", "name"]) - totals = totals.ix[totals[HEAD_COL] > args.min_duration] - # ignore below threshold - totals = totals.dropna( - ).sort("ratio").set_index('name') # sort in ascending order - return totals - -def report_comparative(head_res,baseline_res): - try: - r=git.Repo(VB_DIR) - except: - import pdb - pdb.set_trace() - - totals = prep_totals(head_res,baseline_res) - - h_head = args.target_commit - h_baseline = args.base_commit - h_msg = b_msg = "Unknown" - try: - h_msg = r.commit(h_head).message.strip() - except git.exc.BadObject: - pass - try: - b_msg = r.commit(h_baseline).message.strip() - except git.exc.BadObject: - pass - - - print_report(totals,h_head=h_head,h_msg=h_msg, - h_baseline=h_baseline,b_msg=b_msg) - - if args.outdf: - prprint("The results DataFrame was written to '%s'\n" % args.outdf) - totals.save(args.outdf) - -def profile_head_single(benchmark): - import gc - results = [] - - # just in case - gc.collect() - - try: - from ctypes import cdll, CDLL - cdll.LoadLibrary("libc.so.6") - libc = CDLL("libc.so.6") - libc.malloc_trim(0) - except: - pass - - - N = args.hrepeats + args.burnin - - results = [] - try: - for i in range(N): - gc.disable() - d=dict() - - try: - d = benchmark.run() - - except KeyboardInterrupt: - raise - except Exception as e: # if a single vbench bursts into flames, don't die. - err="" - try: - err = d.get("traceback") - if err is None: - err = str(e) - except: - pass - print("%s died with:\n%s\nSkipping...\n" % (benchmark.name, err)) - - results.append(d.get('timing',np.nan)) - gc.enable() - gc.collect() - - finally: - gc.enable() - - if results: - # throw away the burn_in - results = results[args.burnin:] - sys.stdout.write('.') - sys.stdout.flush() - return Series(results, name=benchmark.name) - - # df = DataFrame(results) - # df.columns = ["name",HEAD_COL] - # return df.set_index("name")[HEAD_COL] - -def profile_head(benchmarks): - print( "Performing %d benchmarks (%d runs each)" % ( len(benchmarks), args.hrepeats)) - - ss= [profile_head_single(b) for b in benchmarks] - print("\n") - - results = DataFrame(ss) - results.columns=[ "#%d" %i for i in range(args.hrepeats)] - # results.index = ["#%d" % i for i in range(len(ss))] - # results = results.T - - shas, messages, _,_ = _parse_commit_log(None,REPO_PATH,base_commit="HEAD^") - print_report(results,h_head=shas[-1],h_msg=messages[-1]) - - - if args.outdf: - prprint("The results DataFrame was written to '%s'\n" % args.outdf) - DataFrame(results).save(args.outdf) - -def print_report(df,h_head=None,h_msg="",h_baseline=None,b_msg=""): - - name_width=45 - col_width = 10 - - hdr = ("{:%s}" % name_width).format("Test name") - hdr += ("|{:^%d}" % col_width)* len(df.columns) - hdr += "|" - hdr = hdr.format(*df.columns) - hdr = "-"*len(hdr) + "\n" + hdr + "\n" + "-"*len(hdr) + "\n" - ftr=hdr - s = "\n" - s+= "Invoked with :\n" - s+= "--ncalls: %s\n" % (args.ncalls or 'Auto') - s+= "--repeats: %s\n" % (args.repeats) - s+= "\n\n" - - s += hdr - # import ipdb - # ipdb.set_trace() - for i in range(len(df)): - lfmt = ("{:%s}" % name_width) - lfmt += ("| {:%d.4f} " % (col_width-2))* len(df.columns) - lfmt += "|\n" - s += lfmt.format(df.index[i],*list(df.iloc[i].values)) - - s+= ftr + "\n" - - s += "Ratio < 1.0 means the target commit is faster then the baseline.\n" - s += "Seed used: %d\n\n" % args.seed - - if h_head: - s += 'Target [%s] : %s\n' % (h_head, h_msg) - if h_baseline: - s += 'Base [%s] : %s\n\n' % ( - h_baseline, b_msg) - - stats_footer = "\n" - if args.stats : - try: - pd.options.display.expand_frame_repr=False - except: - pass - stats_footer += str(df.T.describe().T) + "\n\n" - - s+= stats_footer - logfile = open(args.log_file, 'w') - logfile.write(s) - logfile.close() - - if not args.quiet: - prprint(s) - - if args.stats and args.quiet: - prprint(stats_footer) - - prprint("Results were also written to the logfile at '%s'" % - args.log_file) - - - -def main(): - from suite import benchmarks - - if not args.log_file: - args.log_file = os.path.abspath( - os.path.join(REPO_PATH, 'vb_suite.log')) - - saved_dir = os.path.curdir - if args.outdf: - # not bullet-proof but enough for us - args.outdf = os.path.realpath(args.outdf) - - if args.log_file: - # not bullet-proof but enough for us - args.log_file = os.path.realpath(args.log_file) - - random.seed(args.seed) - np.random.seed(args.seed) - - if args.base_pickle and args.target_pickle: - baseline_res = prep_pickle_for_total(pd.load(args.base_pickle)) - target_res = prep_pickle_for_total(pd.load(args.target_pickle)) - - report_comparative(target_res, baseline_res) - sys.exit(0) - - if args.affinity is not None: - try: # use psutil rather then stale affinity module. Thanks @yarikoptic - import psutil - if hasattr(psutil.Process, 'set_cpu_affinity'): - psutil.Process(os.getpid()).set_cpu_affinity([args.affinity]) - print("CPU affinity set to %d" % args.affinity) - except ImportError: - print("-a/--affinity specified, but the 'psutil' module is not available, aborting.\n") - sys.exit(1) - - print("\n") - prprint("LOG_FILE = %s" % args.log_file) - if args.outdf: - prprint("PICKE_FILE = %s" % args.outdf) - - print("\n") - - # move away from the pandas root dir, to avoid possible import - # surprises - os.chdir(os.path.dirname(os.path.abspath(__file__))) - - benchmarks = [x for x in benchmarks if re.search(args.regex,x.name)] - - for b in benchmarks: - b.repeat = args.repeats - if args.ncalls: - b.ncalls = args.ncalls - - if benchmarks: - if args.head: - profile_head(benchmarks) - else: - profile_comparative(benchmarks) - else: - print( "No matching benchmarks") - - os.chdir(saved_dir) - -# hack , vbench.git ignores some commits, but we -# need to be able to reference any commit. -# modified from vbench.git -def _parse_commit_log(this,repo_path,base_commit=None): - from vbench.git import _convert_timezones - from pandas import Series - from dateutil import parser as dparser - - git_cmd = 'git --git-dir=%s/.git --work-tree=%s ' % (repo_path, repo_path) - githist = git_cmd + ('log --graph --pretty=format:'+ - '\"::%h::%cd::%s::%an\"'+ - ('%s..' % base_commit)+ - '> githist.txt') - os.system(githist) - githist = open('githist.txt').read() - os.remove('githist.txt') - - shas = [] - timestamps = [] - messages = [] - authors = [] - for line in githist.split('\n'): - if '*' not in line.split("::")[0]: # skip non-commit lines - continue - - _, sha, stamp, message, author = line.split('::', 4) - - # parse timestamp into datetime object - stamp = dparser.parse(stamp) - - shas.append(sha) - timestamps.append(stamp) - messages.append(message) - authors.append(author) - - # to UTC for now - timestamps = _convert_timezones(timestamps) - - shas = Series(shas, timestamps) - messages = Series(messages, shas) - timestamps = Series(timestamps, shas) - authors = Series(authors, shas) - return shas[::-1], messages[::-1], timestamps[::-1], authors[::-1] - -# even worse, monkey patch vbench -def _parse_wrapper(base_commit): - def inner(repo_path): - return _parse_commit_log(repo_path,base_commit) - return inner - -if __name__ == '__main__': - args = parser.parse_args() - if (not args.head - and not (args.base_commit and args.target_commit) - and not (args.base_pickle and args.target_pickle)): - parser.print_help() - sys.exit(1) - elif ((args.base_pickle or args.target_pickle) and not - (args.base_pickle and args.target_pickle)): - print("Must specify Both --base-pickle and --target-pickle.") - sys.exit(1) - - if ((args.base_pickle or args.target_pickle) and not - (args.base_commit and args.target_commit)): - if not args.base_commit: - print("base_commit not specified, Assuming base_pickle is named -foo.*") - args.base_commit = args.base_pickle.split('-')[0] - if not args.target_commit: - print("target_commit not specified, Assuming target_pickle is named -foo.*") - args.target_commit = args.target_pickle.split('-')[0] - - import warnings - warnings.filterwarnings('ignore',category=FutureWarning) - warnings.filterwarnings('ignore',category=DeprecationWarning) - - if args.base_commit and args.target_commit: - print("Verifying specified commits exist in repo...") - r=git.Repo(VB_DIR) - for c in [ args.base_commit, args.target_commit ]: - try: - msg = r.commit(c).message.strip() - except git.BadObject: - print("The commit '%s' was not found, aborting..." % c) - sys.exit(1) - else: - print("%s: %s" % (c,msg)) - - main() diff --git a/vb_suite/timedelta.py b/vb_suite/timedelta.py deleted file mode 100644 index 378968ea1379a..0000000000000 --- a/vb_suite/timedelta.py +++ /dev/null @@ -1,32 +0,0 @@ -from vbench.api import Benchmark -from datetime import datetime - -common_setup = """from .pandas_vb_common import * -from pandas import to_timedelta -""" - -#---------------------------------------------------------------------- -# conversion - -setup = common_setup + """ -arr = np.random.randint(0,1000,size=10000) -""" - -stmt = "to_timedelta(arr,unit='s')" -timedelta_convert_int = Benchmark(stmt, setup, start_date=datetime(2014, 1, 1)) - -setup = common_setup + """ -arr = np.random.randint(0,1000,size=10000) -arr = [ '{0} days'.format(i) for i in arr ] -""" - -stmt = "to_timedelta(arr)" -timedelta_convert_string = Benchmark(stmt, setup, start_date=datetime(2014, 1, 1)) - -setup = common_setup + """ -arr = np.random.randint(0,60,size=10000) -arr = [ '00:00:{0:02d}'.format(i) for i in arr ] -""" - -stmt = "to_timedelta(arr)" -timedelta_convert_string_seconds = Benchmark(stmt, setup, start_date=datetime(2014, 1, 1)) diff --git a/vb_suite/timeseries.py b/vb_suite/timeseries.py deleted file mode 100644 index 15bc89d62305f..0000000000000 --- a/vb_suite/timeseries.py +++ /dev/null @@ -1,445 +0,0 @@ -from vbench.api import Benchmark -from datetime import datetime -from pandas import * - -N = 100000 -try: - rng = date_range(start='1/1/2000', periods=N, freq='min') -except NameError: - rng = DatetimeIndex(start='1/1/2000', periods=N, freq='T') - def date_range(start=None, end=None, periods=None, freq=None): - return DatetimeIndex(start=start, end=end, periods=periods, offset=freq) - - -common_setup = """from .pandas_vb_common import * -from datetime import timedelta -N = 100000 - -rng = date_range(start='1/1/2000', periods=N, freq='T') - -if hasattr(Series, 'convert'): - Series.resample = Series.convert - -ts = Series(np.random.randn(N), index=rng) -""" - -#---------------------------------------------------------------------- -# Lookup value in large time series, hash map population - -setup = common_setup + """ -rng = date_range(start='1/1/2000', periods=1500000, freq='S') -ts = Series(1, index=rng) -""" - -stmt = "ts[ts.index[len(ts) // 2]]; ts.index._cleanup()" -timeseries_large_lookup_value = Benchmark(stmt, setup, - start_date=datetime(2012, 1, 1)) - -#---------------------------------------------------------------------- -# Test slice minutely series - -timeseries_slice_minutely = Benchmark('ts[:10000]', common_setup) - -#---------------------------------------------------------------------- -# Test conversion - -setup = common_setup + """ - -""" - -timeseries_1min_5min_ohlc = Benchmark( - "ts[:10000].resample('5min', how='ohlc')", - common_setup, - start_date=datetime(2012, 5, 1)) - -timeseries_1min_5min_mean = Benchmark( - "ts[:10000].resample('5min', how='mean')", - common_setup, - start_date=datetime(2012, 5, 1)) - -#---------------------------------------------------------------------- -# Irregular alignment - -setup = common_setup + """ -lindex = np.random.permutation(N)[:N // 2] -rindex = np.random.permutation(N)[:N // 2] -left = Series(ts.values.take(lindex), index=ts.index.take(lindex)) -right = Series(ts.values.take(rindex), index=ts.index.take(rindex)) -""" - -timeseries_add_irregular = Benchmark('left + right', setup) - -#---------------------------------------------------------------------- -# Sort large irregular time series - -setup = common_setup + """ -N = 100000 -rng = date_range(start='1/1/2000', periods=N, freq='s') -rng = rng.take(np.random.permutation(N)) -ts = Series(np.random.randn(N), index=rng) -""" - -timeseries_sort_index = Benchmark('ts.sort_index()', setup, - start_date=datetime(2012, 4, 1)) - -#---------------------------------------------------------------------- -# Shifting, add offset - -setup = common_setup + """ -rng = date_range(start='1/1/2000', periods=10000, freq='T') -""" - -datetimeindex_add_offset = Benchmark('rng + timedelta(minutes=2)', setup, - start_date=datetime(2012, 4, 1)) - -setup = common_setup + """ -N = 10000 -rng = date_range(start='1/1/1990', periods=N, freq='53s') -ts = Series(np.random.randn(N), index=rng) -dates = date_range(start='1/1/1990', periods=N * 10, freq='5s') -""" -timeseries_asof_single = Benchmark('ts.asof(dates[0])', setup, - start_date=datetime(2012, 4, 27)) - -timeseries_asof = Benchmark('ts.asof(dates)', setup, - start_date=datetime(2012, 4, 27)) - -setup = setup + 'ts[250:5000] = np.nan' - -timeseries_asof_nan = Benchmark('ts.asof(dates)', setup, - start_date=datetime(2012, 4, 27)) - -#---------------------------------------------------------------------- -# Time zone - -setup = common_setup + """ -rng = date_range(start='1/1/2000', end='3/1/2000', tz='US/Eastern') -""" - -timeseries_timestamp_tzinfo_cons = \ - Benchmark('rng[0]', setup, start_date=datetime(2012, 5, 5)) - -#---------------------------------------------------------------------- -# Resampling period - -setup = common_setup + """ -rng = period_range(start='1/1/2000', end='1/1/2001', freq='T') -ts = Series(np.random.randn(len(rng)), index=rng) -""" - -timeseries_period_downsample_mean = \ - Benchmark("ts.resample('D', how='mean')", setup, - start_date=datetime(2012, 4, 25)) - -setup = common_setup + """ -rng = date_range(start='1/1/2000', end='1/1/2001', freq='T') -ts = Series(np.random.randn(len(rng)), index=rng) -""" - -timeseries_timestamp_downsample_mean = \ - Benchmark("ts.resample('D', how='mean')", setup, - start_date=datetime(2012, 4, 25)) - -# GH 7754 -setup = common_setup + """ -rng = date_range(start='2000-01-01 00:00:00', - end='2000-01-01 10:00:00', freq='555000U') -int_ts = Series(5, rng, dtype='int64') -ts = int_ts.astype('datetime64[ns]') -""" - -timeseries_resample_datetime64 = Benchmark("ts.resample('1S', how='last')", setup) - -#---------------------------------------------------------------------- -# to_datetime - -setup = common_setup + """ -rng = date_range(start='1/1/2000', periods=20000, freq='H') -strings = [x.strftime('%Y-%m-%d %H:%M:%S') for x in rng] -""" - -timeseries_to_datetime_iso8601 = \ - Benchmark('to_datetime(strings)', setup, - start_date=datetime(2012, 7, 11)) - -timeseries_to_datetime_iso8601_format = \ - Benchmark("to_datetime(strings, format='%Y-%m-%d %H:%M:%S')", setup, - start_date=datetime(2012, 7, 11)) - -setup = common_setup + """ -rng = date_range(start='1/1/2000', periods=10000, freq='D') -strings = Series(rng.year*10000+rng.month*100+rng.day,dtype=np.int64).apply(str) -""" - -timeseries_to_datetime_YYYYMMDD = \ - Benchmark('to_datetime(strings,format="%Y%m%d")', setup, - start_date=datetime(2012, 7, 1)) - -setup = common_setup + """ -s = Series(['19MAY11','19MAY11:00:00:00']*100000) -""" -timeseries_with_format_no_exact = Benchmark("to_datetime(s,format='%d%b%y',exact=False)", \ - setup, start_date=datetime(2014, 11, 26)) -timeseries_with_format_replace = Benchmark("to_datetime(s.str.replace(':\S+$',''),format='%d%b%y')", \ - setup, start_date=datetime(2014, 11, 26)) - -# ---- infer_freq -# infer_freq - -setup = common_setup + """ -from pandas.tseries.frequencies import infer_freq -rng = date_range(start='1/1/1700', freq='D', periods=100000) -a = rng[:50000].append(rng[50002:]) -""" - -timeseries_infer_freq = \ - Benchmark('infer_freq(a)', setup, start_date=datetime(2012, 7, 1)) - -# setitem PeriodIndex - -setup = common_setup + """ -rng = period_range(start='1/1/1990', freq='S', periods=20000) -df = DataFrame(index=range(len(rng))) -""" - -period_setitem = \ - Benchmark("df['col'] = rng", setup, - start_date=datetime(2012, 8, 1)) - -setup = common_setup + """ -rng = date_range(start='1/1/2000 9:30', periods=10000, freq='S', tz='US/Eastern') -""" - -datetimeindex_normalize = \ - Benchmark('rng.normalize()', setup, - start_date=datetime(2012, 9, 1)) - -setup = common_setup + """ -from pandas.tseries.offsets import Second -s1 = date_range(start='1/1/2000', periods=100, freq='S') -curr = s1[-1] -slst = [] -for i in range(100): - slst.append(curr + Second()), periods=100, freq='S') - curr = slst[-1][-1] -""" - -# dti_append_tz = \ -# Benchmark('s1.append(slst)', setup, start_date=datetime(2012, 9, 1)) - - -setup = common_setup + """ -rng = date_range(start='1/1/2000', periods=1000, freq='H') -df = DataFrame(np.random.randn(len(rng), 2), rng) -""" - -dti_reset_index = \ - Benchmark('df.reset_index()', setup, start_date=datetime(2012, 9, 1)) - -setup = common_setup + """ -rng = date_range(start='1/1/2000', periods=1000, freq='H', - tz='US/Eastern') -df = DataFrame(np.random.randn(len(rng), 2), index=rng) -""" - -dti_reset_index_tz = \ - Benchmark('df.reset_index()', setup, start_date=datetime(2012, 9, 1)) - -setup = common_setup + """ -rng = date_range(start='1/1/2000', periods=1000, freq='T') -index = rng.repeat(10) -""" - -datetimeindex_unique = Benchmark('index.unique()', setup, - start_date=datetime(2012, 7, 1)) - -# tz_localize with infer argument. This is an attempt to emulate the results -# of read_csv with duplicated data. Not passing infer_dst will fail -setup = common_setup + """ -dst_rng = date_range(start='10/29/2000 1:00:00', - end='10/29/2000 1:59:59', freq='S') -index = date_range(start='10/29/2000', end='10/29/2000 00:59:59', freq='S') -index = index.append(dst_rng) -index = index.append(dst_rng) -index = index.append(date_range(start='10/29/2000 2:00:00', - end='10/29/2000 3:00:00', freq='S')) -""" - -datetimeindex_infer_dst = \ -Benchmark('index.tz_localize("US/Eastern", infer_dst=True)', - setup, start_date=datetime(2013, 9, 30)) - - -#---------------------------------------------------------------------- -# Resampling: fast-path various functions - -setup = common_setup + """ -rng = date_range(start='20130101',periods=100000,freq='50L') -df = DataFrame(np.random.randn(100000,2),index=rng) -""" - -dataframe_resample_mean_string = \ - Benchmark("df.resample('1s', how='mean')", setup) - -dataframe_resample_mean_numpy = \ - Benchmark("df.resample('1s', how=np.mean)", setup) - -dataframe_resample_min_string = \ - Benchmark("df.resample('1s', how='min')", setup) - -dataframe_resample_min_numpy = \ - Benchmark("df.resample('1s', how=np.min)", setup) - -dataframe_resample_max_string = \ - Benchmark("df.resample('1s', how='max')", setup) - -dataframe_resample_max_numpy = \ - Benchmark("df.resample('1s', how=np.max)", setup) - - -#---------------------------------------------------------------------- -# DatetimeConverter - -setup = common_setup + """ -from pandas.tseries.converter import DatetimeConverter -""" - -datetimeindex_converter = \ - Benchmark('DatetimeConverter.convert(rng, None, None)', - setup, start_date=datetime(2013, 1, 1)) - -# Adding custom business day -setup = common_setup + """ -import datetime as dt -import pandas as pd -try: - import pandas.tseries.holiday -except ImportError: - pass -import numpy as np - -date = dt.datetime(2011,1,1) -dt64 = np.datetime64('2011-01-01 09:00Z') -hcal = pd.tseries.holiday.USFederalHolidayCalendar() - -day = pd.offsets.Day() -year = pd.offsets.YearBegin() -cday = pd.offsets.CustomBusinessDay() -cmb = pd.offsets.CustomBusinessMonthBegin(calendar=hcal) -cme = pd.offsets.CustomBusinessMonthEnd(calendar=hcal) - -cdayh = pd.offsets.CustomBusinessDay(calendar=hcal) -""" -timeseries_day_incr = Benchmark("date + day",setup) - -timeseries_day_apply = Benchmark("day.apply(date)",setup) - -timeseries_year_incr = Benchmark("date + year",setup) - -timeseries_year_apply = Benchmark("year.apply(date)",setup) - -timeseries_custom_bday_incr = \ - Benchmark("date + cday",setup) - -timeseries_custom_bday_decr = \ - Benchmark("date - cday",setup) - -timeseries_custom_bday_apply = \ - Benchmark("cday.apply(date)",setup) - -timeseries_custom_bday_apply_dt64 = \ - Benchmark("cday.apply(dt64)",setup) - -timeseries_custom_bday_cal_incr = \ - Benchmark("date + 1 * cdayh",setup) - -timeseries_custom_bday_cal_decr = \ - Benchmark("date - 1 * cdayh",setup) - -timeseries_custom_bday_cal_incr_n = \ - Benchmark("date + 10 * cdayh",setup) - -timeseries_custom_bday_cal_incr_neg_n = \ - Benchmark("date - 10 * cdayh",setup) - -# Increment custom business month -timeseries_custom_bmonthend_incr = \ - Benchmark("date + cme",setup) - -timeseries_custom_bmonthend_incr_n = \ - Benchmark("date + 10 * cme",setup) - -timeseries_custom_bmonthend_decr_n = \ - Benchmark("date - 10 * cme",setup) - -timeseries_custom_bmonthbegin_incr_n = \ - Benchmark("date + 10 * cmb",setup) - -timeseries_custom_bmonthbegin_decr_n = \ - Benchmark("date - 10 * cmb",setup) - - -#---------------------------------------------------------------------- -# month/quarter/year start/end accessors - -setup = common_setup + """ -N = 10000 -rng = date_range(start='1/1/1', periods=N, freq='B') -""" - -timeseries_is_month_start = Benchmark('rng.is_month_start', setup, - start_date=datetime(2014, 4, 1)) - -#---------------------------------------------------------------------- -# iterate over DatetimeIndex/PeriodIndex -setup = common_setup + """ -N = 1000000 -M = 10000 -idx1 = date_range(start='20140101', freq='T', periods=N) -idx2 = period_range(start='20140101', freq='T', periods=N) - -def iter_n(iterable, n=None): - i = 0 - for _ in iterable: - i += 1 - if n is not None and i > n: - break -""" - -timeseries_iter_datetimeindex = Benchmark('iter_n(idx1)', setup) - -timeseries_iter_periodindex = Benchmark('iter_n(idx2)', setup) - -timeseries_iter_datetimeindex_preexit = Benchmark('iter_n(idx1, M)', setup) - -timeseries_iter_periodindex_preexit = Benchmark('iter_n(idx2, M)', setup) - - -#---------------------------------------------------------------------- -# apply an Offset to a DatetimeIndex -setup = common_setup + """ -N = 100000 -idx1 = date_range(start='20140101', freq='T', periods=N) -delta_offset = pd.offsets.Day() -fast_offset = pd.offsets.DateOffset(months=2, days=2) -slow_offset = pd.offsets.BusinessDay() - -""" - -timeseries_datetimeindex_offset_delta = Benchmark('idx1 + delta_offset', setup) -timeseries_datetimeindex_offset_fast = Benchmark('idx1 + fast_offset', setup) -timeseries_datetimeindex_offset_slow = Benchmark('idx1 + slow_offset', setup) - -# apply an Offset to a Series containing datetime64 values -setup = common_setup + """ -N = 100000 -s = Series(date_range(start='20140101', freq='T', periods=N)) -delta_offset = pd.offsets.Day() -fast_offset = pd.offsets.DateOffset(months=2, days=2) -slow_offset = pd.offsets.BusinessDay() - -""" - -timeseries_series_offset_delta = Benchmark('s + delta_offset', setup) -timeseries_series_offset_fast = Benchmark('s + fast_offset', setup) -timeseries_series_offset_slow = Benchmark('s + slow_offset', setup) From e0cbc37ccc02c3ee539a19d9043b541ace0d2733 Mon Sep 17 00:00:00 2001 From: Pietro Battiston Date: Mon, 17 Apr 2017 22:44:24 +0200 Subject: [PATCH 413/933] TST: partial indexing with __getitem__ and integer labels (#16029) closes #12416 --- pandas/tests/indexing/test_multiindex.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/pandas/tests/indexing/test_multiindex.py b/pandas/tests/indexing/test_multiindex.py index 07786b9fb4b72..c39e25a1f1d74 100644 --- a/pandas/tests/indexing/test_multiindex.py +++ b/pandas/tests/indexing/test_multiindex.py @@ -275,6 +275,30 @@ def test_loc_multiindex(self): xp = mi_int.ix[4] tm.assert_frame_equal(rs, xp) + def test_getitem_partial_int(self): + # GH 12416 + # with single item + l1 = [10, 20] + l2 = ['a', 'b'] + df = DataFrame(index=range(2), + columns=pd.MultiIndex.from_product([l1, l2])) + expected = DataFrame(index=range(2), + columns=l2) + result = df[20] + tm.assert_frame_equal(result, expected) + + # with list + expected = DataFrame(index=range(2), + columns=pd.MultiIndex.from_product([l1[1:], l2])) + result = df[[20]] + tm.assert_frame_equal(result, expected) + + # missing item: + with tm.assertRaisesRegexp(KeyError, '1'): + df[1] + with tm.assertRaisesRegexp(KeyError, "'\[1\] not in index'"): + df[[1]] + def test_loc_multiindex_indexer_none(self): # GH6788 From a65492f884653c81d684786ce6098f88def3b0fe Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Mon, 17 Apr 2017 19:58:04 -0400 Subject: [PATCH 414/933] TST: skip 32bit platform on test_get_indexer_closer for interval index --- pandas/tests/indexes/test_interval.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/tests/indexes/test_interval.py b/pandas/tests/indexes/test_interval.py index d99ef9538c5b1..825e508174374 100644 --- a/pandas/tests/indexes/test_interval.py +++ b/pandas/tests/indexes/test_interval.py @@ -4,7 +4,8 @@ import numpy as np from pandas import (Interval, IntervalIndex, Index, isnull, - interval_range, Timestamp, Timedelta) + interval_range, Timestamp, Timedelta, + compat) from pandas._libs.interval import IntervalTree from pandas.tests.indexes.common import Base import pandas.util.testing as tm @@ -778,6 +779,8 @@ def test_get_loc_closed(self): self.assert_numpy_array_equal(tree.get_loc(p), np.array([0], dtype='int64')) + @pytest.mark.skipif(compat.is_platform_32bit(), + reason="int type mistmach on 32bit") def test_get_indexer_closed(self): x = np.arange(1000, dtype='float64') found = x.astype('intp') From cd1031f28664928c7e258dbb04d1f5861bad6b37 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Mon, 17 Apr 2017 19:24:28 -0500 Subject: [PATCH 415/933] BUG: Handle iterable of arrays in convert (#16026) * BUG: Handle iterable of arrays in convert DatetimeConverter.convert can take an array or iterable of arrays. Fixed the converter to detect which case we're in and then re-use the existing logic. --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/core/dtypes/inference.py | 44 ++++++++++++++++++++++ pandas/plotting/_converter.py | 24 +++++++++++- pandas/tests/dtypes/test_inference.py | 22 +++++++++++ pandas/tests/plotting/test_converter.py | 13 +++++++ pandas/tests/plotting/test_datetimelike.py | 8 ++++ 6 files changed, 110 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 9df82b8ac7338..0b95bf98b401d 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -1566,6 +1566,7 @@ Plotting - Bug in ``DataFrame.hist`` where ``plt.tight_layout`` caused an ``AttributeError`` (use ``matplotlib >= 2.0.1``) (:issue:`9351`) - Bug in ``DataFrame.boxplot`` where ``fontsize`` was not applied to the tick labels on both axes (:issue:`15108`) +- Bug in the date and time converters pandas registers with matplotlib not handling multiple dimensions (:issue:`16026`) - Bug in ``pd.scatter_matrix()`` could accept either ``color`` or ``c``, but not both (:issue:`14855`) Groupby/Resample/Rolling diff --git a/pandas/core/dtypes/inference.py b/pandas/core/dtypes/inference.py index b0a93d24228af..66f4d87aa8e33 100644 --- a/pandas/core/dtypes/inference.py +++ b/pandas/core/dtypes/inference.py @@ -273,6 +273,50 @@ def is_list_like(obj): not isinstance(obj, string_and_binary_types)) +def is_nested_list_like(obj): + """ + Check if the object is list-like, and that all of its elements + are also list-like. + + .. versionadded:: 0.20.0 + + Parameters + ---------- + obj : The object to check. + + Returns + ------- + is_list_like : bool + Whether `obj` has list-like properties. + + Examples + -------- + >>> is_nested_list_like([[1, 2, 3]]) + True + >>> is_nested_list_like([{1, 2, 3}, {1, 2, 3}]) + True + >>> is_nested_list_like(["foo"]) + False + >>> is_nested_list_like([]) + False + >>> is_nested_list_like([[1, 2, 3], 1]) + False + + Notes + ----- + This won't reliably detect whether a consumable iterator (e. g. + a generator) is a nested-list-like without consuming the iterator. + To avoid consuming it, we always return False if the outer container + doesn't define `__len__`. + + See Also + -------- + is_list_like + """ + return (is_list_like(obj) and hasattr(obj, '__len__') and + len(obj) > 0 and all(is_list_like(item) for item in obj)) + + def is_dict_like(obj): """ Check if the object is dict-like. diff --git a/pandas/plotting/_converter.py b/pandas/plotting/_converter.py index 0e51e95057be2..9621ee3d0cad4 100644 --- a/pandas/plotting/_converter.py +++ b/pandas/plotting/_converter.py @@ -10,13 +10,14 @@ from matplotlib.ticker import Formatter, AutoLocator, Locator from matplotlib.transforms import nonsingular - from pandas.core.dtypes.common import ( is_float, is_integer, is_integer_dtype, is_float_dtype, is_datetime64_ns_dtype, - is_period_arraylike) + is_period_arraylike, + is_nested_list_like +) from pandas.compat import lrange import pandas.compat as compat @@ -127,6 +128,15 @@ class PeriodConverter(dates.DateConverter): @staticmethod def convert(values, units, axis): + if is_nested_list_like(values): + values = [PeriodConverter._convert_1d(v, units, axis) + for v in values] + else: + values = PeriodConverter._convert_1d(values, units, axis) + return values + + @staticmethod + def _convert_1d(values, units, axis): if not hasattr(axis, 'freq'): raise TypeError('Axis must have `freq` set to convert to Periods') valid_types = (compat.string_types, datetime, @@ -178,6 +188,16 @@ class DatetimeConverter(dates.DateConverter): @staticmethod def convert(values, unit, axis): + # values might be a 1-d array, or a list-like of arrays. + if is_nested_list_like(values): + values = [DatetimeConverter._convert_1d(v, unit, axis) + for v in values] + else: + values = DatetimeConverter._convert_1d(values, unit, axis) + return values + + @staticmethod + def _convert_1d(values, unit, axis): def try_parse(values): try: return _dt_to_float_ordinal(tools.to_datetime(values)) diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index 94d1d21d59d88..dd8f65a8e48ff 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -11,6 +11,7 @@ from datetime import datetime, date, timedelta, time import numpy as np import pytz +import pytest import pandas as pd from pandas._libs import tslib, lib @@ -66,6 +67,27 @@ def test_is_list_like(): assert not inference.is_list_like(f) +@pytest.mark.parametrize('inner', [ + [], [1], (1, ), (1, 2), {'a': 1}, set([1, 'a']), Series([1]), + Series([]), Series(['a']).str, (x for x in range(5)) +]) +@pytest.mark.parametrize('outer', [ + list, Series, np.array, tuple +]) +def test_is_nested_list_like_passes(inner, outer): + result = outer([inner for _ in range(5)]) + assert inference.is_list_like(result) + + +@pytest.mark.parametrize('obj', [ + 'abc', [], [1], (1,), ['a'], 'a', {'a'}, + [1, 2, 3], Series([1]), DataFrame({"A": [1]}), + ([1, 2] for _ in range(5)), +]) +def test_is_nested_list_like_fails(obj): + assert not inference.is_nested_list_like(obj) + + def test_is_dict_like(): passes = [{}, {'A': 1}, Series([1])] fails = ['1', 1, [1, 2], (1, 2), range(2), Index([1])] diff --git a/pandas/tests/plotting/test_converter.py b/pandas/tests/plotting/test_converter.py index 683f4ee89687f..30eb3ef24fe30 100644 --- a/pandas/tests/plotting/test_converter.py +++ b/pandas/tests/plotting/test_converter.py @@ -138,6 +138,13 @@ def _assert_less(ts1, ts2): _assert_less(ts, ts + Milli()) _assert_less(ts, ts + Micro(50)) + def test_convert_nested(self): + inner = [Timestamp('2017-01-01', Timestamp('2017-01-02'))] + data = [inner, inner] + result = self.dtc.convert(data, None, None) + expected = [self.dtc.convert(x, None, None) for x in data] + assert result == expected + class TestPeriodConverter(tm.TestCase): @@ -196,3 +203,9 @@ def test_integer_passthrough(self): rs = self.pc.convert([0, 1], None, self.axis) xp = [0, 1] self.assertEqual(rs, xp) + + def test_convert_nested(self): + data = ['2012-1-1', '2012-1-2'] + r1 = self.pc.convert([data, data], None, self.axis) + r2 = [self.pc.convert(data, None, self.axis) for _ in range(2)] + assert r1 == r2 diff --git a/pandas/tests/plotting/test_datetimelike.py b/pandas/tests/plotting/test_datetimelike.py index 547770ebcf6e5..4beb804acacc5 100644 --- a/pandas/tests/plotting/test_datetimelike.py +++ b/pandas/tests/plotting/test_datetimelike.py @@ -1334,6 +1334,14 @@ def test_timedelta_plot(self): s = Series(np.random.randn(len(index)), index) _check_plot_works(s.plot) + def test_hist(self): + # https://github.com/matplotlib/matplotlib/issues/8459 + rng = date_range('1/1/2011', periods=10, freq='H') + x = rng + w1 = np.arange(0, 1, .1) + w2 = np.arange(0, 1, .1)[::-1] + self.plt.hist([x, x], weights=[w1, w2]) + def _check_plot_works(f, freq=None, series=None, *args, **kwargs): import matplotlib.pyplot as plt From f53d38b94d963cff081b4fe0a1e7242e8d5eb221 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 18 Apr 2017 10:34:36 +0200 Subject: [PATCH 416/933] BUG: show series length in repr when truncated (GH15962) (#15974) --- doc/source/whatsnew/v0.20.0.txt | 3 ++ pandas/core/series.py | 40 ++++++++------------------ pandas/io/formats/format.py | 3 +- pandas/tests/io/formats/test_format.py | 33 +++++++++++++++++---- pandas/tests/sparse/test_format.py | 10 ++++--- pandas/tests/test_categorical.py | 2 +- 6 files changed, 51 insertions(+), 40 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 0b95bf98b401d..4583e0d6eb836 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -518,6 +518,8 @@ Other Enhancements - :method:`~MultiIndex.remove_unused_levels` has been added to facilitate :ref:`removing unused levels `. (:issue:`15694`) - ``pd.read_csv()`` will now raise a ``ParserError`` error whenever any parsing error occurs (:issue:`15913`, :issue:`15925`) - ``pd.read_csv()`` now supports the ``error_bad_lines`` and ``warn_bad_lines`` arguments for the Python parser (:issue:`15925`) +- The ``display.show_dimensions`` option can now also be used to specify + whether the length of a ``Series`` should be shown in its repr (:issue:`7117`). - ``parallel_coordinates()`` has gained a ``sort_labels`` keyword arg that sorts class labels and the colours assigned to them (:issue:`15908`) @@ -1560,6 +1562,7 @@ I/O - Bug in ``pd.read_hdf()`` passing a ``Timestamp`` to the ``where`` parameter with a non date column (:issue:`15492`) - Bug in ``DataFrame.to_stata()`` and ``StataWriter`` which produces incorrectly formatted files to be produced for some locales (:issue:`13856`) - Bug in ``StataReader`` and ``StataWriter`` which allows invalid encodings (:issue:`15723`) +- Bug in the ``Series`` repr not showing the length when the output was truncated (:issue:`15962`). Plotting ^^^^^^^^ diff --git a/pandas/core/series.py b/pandas/core/series.py index 9022bff092ac3..2a99481274e9e 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -980,9 +980,10 @@ def __unicode__(self): width, height = get_terminal_size() max_rows = (height if get_option("display.max_rows") == 0 else get_option("display.max_rows")) + show_dimensions = get_option("display.show_dimensions") self.to_string(buf=buf, name=self.name, dtype=self.dtype, - max_rows=max_rows) + max_rows=max_rows, length=show_dimensions) result = buf.getvalue() return result @@ -1021,31 +1022,6 @@ def to_string(self, buf=None, na_rep='NaN', float_format=None, header=True, formatted : string (if not buffer passed) """ - the_repr = self._get_repr(float_format=float_format, na_rep=na_rep, - header=header, index=index, length=length, - dtype=dtype, name=name, max_rows=max_rows) - - # catch contract violations - if not isinstance(the_repr, compat.text_type): - raise AssertionError("result must be of type unicode, type" - " of result is {0!r}" - "".format(the_repr.__class__.__name__)) - - if buf is None: - return the_repr - else: - try: - buf.write(the_repr) - except AttributeError: - with open(buf, 'w') as f: - f.write(the_repr) - - def _get_repr(self, name=False, header=True, index=True, length=True, - dtype=True, na_rep='NaN', float_format=None, max_rows=None): - """ - - Internal function, should always return unicode string - """ formatter = fmt.SeriesFormatter(self, name=name, length=length, header=header, index=index, dtype=dtype, na_rep=na_rep, @@ -1053,12 +1029,20 @@ def _get_repr(self, name=False, header=True, index=True, length=True, max_rows=max_rows) result = formatter.to_string() - # TODO: following check prob. not neces. + # catch contract violations if not isinstance(result, compat.text_type): raise AssertionError("result must be of type unicode, type" " of result is {0!r}" "".format(result.__class__.__name__)) - return result + + if buf is None: + return result + else: + try: + buf.write(result) + except AttributeError: + with open(buf, 'w') as f: + f.write(result) def __iter__(self): """ provide iteration over the values of the Series diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 20df60eb96299..ae0814d5566a8 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -199,7 +199,8 @@ def _get_footer(self): escape_chars=('\t', '\r', '\n')) footer += ("Name: %s" % series_name) if name is not None else "" - if self.length: + if (self.length is True or + (self.length == 'truncate' and self.truncate_v)): if footer: footer += ', ' footer += 'Length: %d' % len(self.series) diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index 354ce99f567ea..20fbaf781d72f 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -1770,12 +1770,14 @@ def test_east_asian_unicode_series(self): name=u'おおおおおおお') expected = (u"0 あ\n ... \n" - u"3 ええええ\nName: おおおおおおお, dtype: object") + u"3 ええええ\n" + u"Name: おおおおおおお, Length: 4, dtype: object") self.assertEqual(_rep(s), expected) s.index = [u'ああ', u'いいいい', u'う', u'えええ'] expected = (u"ああ あ\n ... \n" - u"えええ ええええ\nName: おおおおおおお, dtype: object") + u"えええ ええええ\n" + u"Name: おおおおおおお, Length: 4, dtype: object") self.assertEqual(_rep(s), expected) # Emable Unicode option ----------------------------------------- @@ -1846,14 +1848,15 @@ def test_east_asian_unicode_series(self): s = Series([u'あ', u'いい', u'ううう', u'ええええ'], name=u'おおおおおおお') expected = (u"0 あ\n ... \n" - u"3 ええええ\nName: おおおおおおお, dtype: object") + u"3 ええええ\n" + u"Name: おおおおおおお, Length: 4, dtype: object") self.assertEqual(_rep(s), expected) s.index = [u'ああ', u'いいいい', u'う', u'えええ'] expected = (u"ああ あ\n" u" ... \n" u"えええ ええええ\n" - u"Name: おおおおおおお, dtype: object") + u"Name: おおおおおおお, Length: 4, dtype: object") self.assertEqual(_rep(s), expected) # ambiguous unicode @@ -2021,7 +2024,8 @@ def test_max_multi_index_display(self): # Make sure #8532 is fixed def test_consistent_format(self): s = pd.Series([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0.9999, 1, 1] * 10) - with option_context("display.max_rows", 10): + with option_context("display.max_rows", 10, + "display.show_dimensions", False): res = repr(s) exp = ('0 1.0000\n1 1.0000\n2 1.0000\n3 ' '1.0000\n4 1.0000\n ... \n125 ' @@ -2040,7 +2044,8 @@ def chck_ncols(self, s): def test_format_explicit(self): test_sers = gen_series_formatting() - with option_context("display.max_rows", 4): + with option_context("display.max_rows", 4, + "display.show_dimensions", False): res = repr(test_sers['onel']) exp = '0 a\n1 a\n ..\n98 a\n99 a\ndtype: object' self.assertEqual(exp, res) @@ -2087,6 +2092,22 @@ def getndots(s): strrepr = repr(s).replace('\n', '') self.assertEqual(getndots(strrepr), 3) + def test_show_dimensions(self): + # gh-7117 + s = Series(range(5)) + + assert 'Length' not in repr(s) + + with option_context("display.max_rows", 4): + assert 'Length' in repr(s) + + with option_context("display.show_dimensions", True): + assert 'Length' in repr(s) + + with option_context("display.max_rows", 4, + "display.show_dimensions", False): + assert 'Length' not in repr(s) + def test_to_string_name(self): s = Series(range(100), dtype='int64') s.name = 'myser' diff --git a/pandas/tests/sparse/test_format.py b/pandas/tests/sparse/test_format.py index ba870a2c33801..eafb493319e40 100644 --- a/pandas/tests/sparse/test_format.py +++ b/pandas/tests/sparse/test_format.py @@ -33,7 +33,7 @@ def test_sparse_max_row(self): # GH 10560 result = repr(s) exp = ("0 1.0\n ... \n4 NaN\n" - "dtype: float64\nBlockIndex\n" + "Length: 5, dtype: float64\nBlockIndex\n" "Block locations: array([0, 3]{0})\n" "Block lengths: array([1, 1]{0})".format(dfm)) self.assertEqual(result, exp) @@ -52,7 +52,8 @@ def test_sparse_mi_max_row(self): "Block lengths: array([1, 1]{0})".format(dfm)) self.assertEqual(result, exp) - with option_context("display.max_rows", 3): + with option_context("display.max_rows", 3, + "display.show_dimensions", False): # GH 13144 result = repr(s) exp = ("A 0 1.0\n ... \nC 2 NaN\n" @@ -77,7 +78,7 @@ def test_sparse_bool(self): with option_context("display.max_rows", 3): result = repr(s) exp = ("0 True\n ... \n5 False\n" - "dtype: bool\nBlockIndex\n" + "Length: 6, dtype: bool\nBlockIndex\n" "Block locations: array([0, 3]{0})\n" "Block lengths: array([1, 1]{0})".format(dtype)) self.assertEqual(result, exp) @@ -94,7 +95,8 @@ def test_sparse_int(self): "Block lengths: array([1, 1]{0})".format(dtype)) self.assertEqual(result, exp) - with option_context("display.max_rows", 3): + with option_context("display.max_rows", 3, + "display.show_dimensions", False): result = repr(s) exp = ("0 0\n ..\n5 0\n" "dtype: int64\nBlockIndex\n" diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py index 3296673e96316..17f55b41970b1 100644 --- a/pandas/tests/test_categorical.py +++ b/pandas/tests/test_categorical.py @@ -2088,7 +2088,7 @@ def test_repr(self): a = pd.Series(pd.Categorical(["a", "b"] * 25)) exp = u("0 a\n1 b\n" + " ..\n" + "48 a\n49 b\n" + - "dtype: category\nCategories (2, object): [a, b]") + "Length: 50, dtype: category\nCategories (2, object): [a, b]") with option_context("display.max_rows", 5): self.assertEqual(exp, repr(a)) From 0ba305b15713aac0ada62e13768732485e374902 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 18 Apr 2017 12:01:04 +0200 Subject: [PATCH 417/933] ENH: level keyword in rename (GH4160) (#13766) --- doc/source/whatsnew/v0.20.0.txt | 2 + pandas/core/generic.py | 9 +++- pandas/core/internals.py | 14 ++++-- pandas/tests/frame/test_alter_axes.py | 67 ++++++++++++++++++++++++--- 4 files changed, 81 insertions(+), 11 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 4583e0d6eb836..44c79fd131705 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -490,6 +490,8 @@ Other Enhancements - ``DataFrame.plot`` now prints a title above each subplot if ``suplots=True`` and ``title`` is a list of strings (:issue:`14753`) - ``DataFrame.plot`` can pass the matplotlib 2.0 default color cycle as a single string as color parameter, see `here `__. (:issue:`15516`) - ``Series.interpolate()`` now supports timedelta as an index type with ``method='time'`` (:issue:`6424`) +- Addition of a ``level`` keyword to ``DataFrame/Series.rename`` to rename + labels in the specified level of a MultiIndex (:issue:`4160`). - ``Timedelta.isoformat`` method added for formatting Timedeltas as an `ISO 8601 duration`_. See the :ref:`Timedelta docs ` (:issue:`15136`) - ``.select_dtypes()`` now allows the string ``datetimetz`` to generically select datetimes with tz (:issue:`14910`) - The ``.to_latex()`` method will now accept ``multicolumn`` and ``multirow`` arguments to use the accompanying LaTeX enhancements diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 5f0c65ddfb9c3..841df3727e5a6 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -645,6 +645,9 @@ def swaplevel(self, i=-2, j=-1, axis=0): inplace : boolean, default False Whether to return a new %(klass)s. If True then value of copy is ignored. + level : int or level name, default None + In case of a MultiIndex, only rename labels in the specified + level. Returns ------- @@ -701,6 +704,7 @@ def rename(self, *args, **kwargs): axes, kwargs = self._construct_axes_from_arguments(args, kwargs) copy = kwargs.pop('copy', True) inplace = kwargs.pop('inplace', False) + level = kwargs.pop('level', None) if kwargs: raise TypeError('rename() got an unexpected keyword ' @@ -734,7 +738,10 @@ def f(x): f = _get_rename_function(v) baxis = self._get_block_manager_axis(axis) - result._data = result._data.rename_axis(f, axis=baxis, copy=copy) + if level is not None: + level = self.axes[axis]._get_level_number(level) + result._data = result._data.rename_axis(f, axis=baxis, copy=copy, + level=level) result._clear_item_cache() if inplace: diff --git a/pandas/core/internals.py b/pandas/core/internals.py index c698bcb9fa5ee..5a87574455a63 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -2837,7 +2837,7 @@ def set_axis(self, axis, new_labels): self.axes[axis] = new_labels - def rename_axis(self, mapper, axis, copy=True): + def rename_axis(self, mapper, axis, copy=True, level=None): """ Rename one of axes. @@ -2846,10 +2846,11 @@ def rename_axis(self, mapper, axis, copy=True): mapper : unary callable axis : int copy : boolean, default True + level : int, default None """ obj = self.copy(deep=copy) - obj.set_axis(axis, _transform_index(self.axes[axis], mapper)) + obj.set_axis(axis, _transform_index(self.axes[axis], mapper, level)) return obj def add_prefix(self, prefix): @@ -4735,15 +4736,20 @@ def _safe_reshape(arr, new_shape): return arr -def _transform_index(index, func): +def _transform_index(index, func, level=None): """ Apply function to all values found in index. This includes transforming multiindex entries separately. + Only apply function to one level of the MultiIndex if level is specified. """ if isinstance(index, MultiIndex): - items = [tuple(func(y) for y in x) for x in index] + if level is not None: + items = [tuple(func(y) if i == level else y + for i, y in enumerate(x)) for x in index] + else: + items = [tuple(func(y) for y in x) for x in index] return MultiIndex.from_tuples(items, names=index.names) else: items = [func(x) for x in index] diff --git a/pandas/tests/frame/test_alter_axes.py b/pandas/tests/frame/test_alter_axes.py index 9add944d2293e..ce4dd6d38eeeb 100644 --- a/pandas/tests/frame/test_alter_axes.py +++ b/pandas/tests/frame/test_alter_axes.py @@ -415,15 +415,20 @@ def test_rename(self): pd.Index(['bar', 'foo'], name='name')) self.assertEqual(renamed.index.name, renamer.index.name) - # MultiIndex + def test_rename_multiindex(self): + tuples_index = [('foo1', 'bar1'), ('foo2', 'bar2')] tuples_columns = [('fizz1', 'buzz1'), ('fizz2', 'buzz2')] index = MultiIndex.from_tuples(tuples_index, names=['foo', 'bar']) columns = MultiIndex.from_tuples( tuples_columns, names=['fizz', 'buzz']) - renamer = DataFrame([(0, 0), (1, 1)], index=index, columns=columns) - renamed = renamer.rename(index={'foo1': 'foo3', 'bar2': 'bar3'}, - columns={'fizz1': 'fizz3', 'buzz2': 'buzz3'}) + df = DataFrame([(0, 0), (1, 1)], index=index, columns=columns) + + # + # without specifying level -> accross all levels + + renamed = df.rename(index={'foo1': 'foo3', 'bar2': 'bar3'}, + columns={'fizz1': 'fizz3', 'buzz2': 'buzz3'}) new_index = MultiIndex.from_tuples([('foo3', 'bar1'), ('foo2', 'bar3')], names=['foo', 'bar']) @@ -432,8 +437,58 @@ def test_rename(self): names=['fizz', 'buzz']) self.assert_index_equal(renamed.index, new_index) self.assert_index_equal(renamed.columns, new_columns) - self.assertEqual(renamed.index.names, renamer.index.names) - self.assertEqual(renamed.columns.names, renamer.columns.names) + self.assertEqual(renamed.index.names, df.index.names) + self.assertEqual(renamed.columns.names, df.columns.names) + + # + # with specifying a level (GH13766) + + # dict + new_columns = MultiIndex.from_tuples([('fizz3', 'buzz1'), + ('fizz2', 'buzz2')], + names=['fizz', 'buzz']) + renamed = df.rename(columns={'fizz1': 'fizz3', 'buzz2': 'buzz3'}, + level=0) + self.assert_index_equal(renamed.columns, new_columns) + renamed = df.rename(columns={'fizz1': 'fizz3', 'buzz2': 'buzz3'}, + level='fizz') + self.assert_index_equal(renamed.columns, new_columns) + + new_columns = MultiIndex.from_tuples([('fizz1', 'buzz1'), + ('fizz2', 'buzz3')], + names=['fizz', 'buzz']) + renamed = df.rename(columns={'fizz1': 'fizz3', 'buzz2': 'buzz3'}, + level=1) + self.assert_index_equal(renamed.columns, new_columns) + renamed = df.rename(columns={'fizz1': 'fizz3', 'buzz2': 'buzz3'}, + level='buzz') + self.assert_index_equal(renamed.columns, new_columns) + + # function + func = str.upper + new_columns = MultiIndex.from_tuples([('FIZZ1', 'buzz1'), + ('FIZZ2', 'buzz2')], + names=['fizz', 'buzz']) + renamed = df.rename(columns=func, level=0) + self.assert_index_equal(renamed.columns, new_columns) + renamed = df.rename(columns=func, level='fizz') + self.assert_index_equal(renamed.columns, new_columns) + + new_columns = MultiIndex.from_tuples([('fizz1', 'BUZZ1'), + ('fizz2', 'BUZZ2')], + names=['fizz', 'buzz']) + renamed = df.rename(columns=func, level=1) + self.assert_index_equal(renamed.columns, new_columns) + renamed = df.rename(columns=func, level='buzz') + self.assert_index_equal(renamed.columns, new_columns) + + # index + new_index = MultiIndex.from_tuples([('foo3', 'bar1'), + ('foo2', 'bar2')], + names=['foo', 'bar']) + renamed = df.rename(index={'foo1': 'foo3', 'bar2': 'bar3'}, + level=0) + self.assert_index_equal(renamed.index, new_index) def test_rename_nocopy(self): renamed = self.frame.rename(columns={'C': 'foo'}, copy=False) From d16cce89521c84fcb9c7b7bb2e95629a6fe7acb7 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 18 Apr 2017 06:10:54 -0400 Subject: [PATCH 418/933] CLN: move/reorg pandas.tools -> pandas.core.reshape xref #13634 Author: Jeff Reback Closes #16032 from jreback/move_tools and squashes the following commits: 376cef5 [Jeff Reback] move to_numeric cc6e059 [Jeff Reback] CLN: move/reorg pandas.tools -> pandas.core.reshape --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/__init__.py | 9 +- pandas/core/algorithms.py | 2 +- pandas/core/api.py | 6 +- pandas/core/base.py | 4 +- pandas/core/categorical.py | 2 +- pandas/core/computation/expr.py | 2 +- pandas/core/dtypes/cast.py | 164 +- pandas/core/frame.py | 20 +- pandas/core/groupby.py | 20 +- pandas/core/indexes/base.py | 2 +- pandas/core/indexes/multi.py | 2 +- pandas/core/panel.py | 4 +- .../{tests/tools => core/reshape}/__init__.py | 0 pandas/core/reshape/api.py | 8 + pandas/{tools => core/reshape}/concat.py | 0 pandas/core/reshape/merge.py | 1481 ++++++++++++ pandas/{tools => core/reshape}/pivot.py | 5 +- pandas/core/{ => reshape}/reshape.py | 2 +- pandas/{tools => core/reshape}/tile.py | 0 pandas/core/reshape/util.py | 76 + pandas/core/series.py | 4 +- pandas/io/formats/format.py | 4 +- pandas/plotting/_core.py | 2 +- pandas/tests/dtypes/test_cast.py | 370 ++- pandas/tests/dtypes/test_convert.py | 0 pandas/tests/reshape/__init__.py | 0 .../data/allow_exact_matches.csv | 0 .../allow_exact_matches_and_tolerance.csv | 0 pandas/tests/{tools => reshape}/data/asof.csv | 0 .../tests/{tools => reshape}/data/asof2.csv | 0 .../{tools => reshape}/data/cut_data.csv | 0 .../tests/{tools => reshape}/data/quotes.csv | 0 .../tests/{tools => reshape}/data/quotes2.csv | 0 .../{tools => reshape}/data/tolerance.csv | 0 .../tests/{tools => reshape}/data/trades.csv | 0 .../tests/{tools => reshape}/data/trades2.csv | 0 .../tests/{tools => reshape}/test_concat.py | 0 .../tests/{tools => reshape}/test_hashing.py | 0 pandas/tests/{tools => reshape}/test_join.py | 2 +- pandas/tests/{tools => reshape}/test_merge.py | 4 +- .../{tools => reshape}/test_merge_asof.py | 2 +- .../{tools => reshape}/test_merge_ordered.py | 0 pandas/tests/{tools => reshape}/test_pivot.py | 2 +- pandas/tests/{ => reshape}/test_reshape.py | 5 +- pandas/tests/{tools => reshape}/test_tile.py | 2 +- .../test_union_categoricals.py | 0 pandas/tests/reshape/test_util.py | 49 + pandas/tests/sparse/test_series.py | 2 +- pandas/tests/test_algos.py | 2 +- pandas/tests/test_generic.py | 2076 ----------------- pandas/tests/test_panel.py | 6 +- pandas/tests/test_util.py | 78 +- pandas/tests/tools/test_util.py | 485 ---- pandas/tools/merge.py | 1482 +----------- pandas/tools/util.py | 245 -- setup.py | 4 +- 57 files changed, 2281 insertions(+), 4355 deletions(-) rename pandas/{tests/tools => core/reshape}/__init__.py (100%) create mode 100644 pandas/core/reshape/api.py rename pandas/{tools => core/reshape}/concat.py (100%) create mode 100644 pandas/core/reshape/merge.py rename pandas/{tools => core/reshape}/pivot.py (99%) rename pandas/core/{ => reshape}/reshape.py (99%) rename pandas/{tools => core/reshape}/tile.py (100%) create mode 100644 pandas/core/reshape/util.py create mode 100644 pandas/tests/dtypes/test_convert.py create mode 100644 pandas/tests/reshape/__init__.py rename pandas/tests/{tools => reshape}/data/allow_exact_matches.csv (100%) rename pandas/tests/{tools => reshape}/data/allow_exact_matches_and_tolerance.csv (100%) rename pandas/tests/{tools => reshape}/data/asof.csv (100%) rename pandas/tests/{tools => reshape}/data/asof2.csv (100%) rename pandas/tests/{tools => reshape}/data/cut_data.csv (100%) rename pandas/tests/{tools => reshape}/data/quotes.csv (100%) rename pandas/tests/{tools => reshape}/data/quotes2.csv (100%) rename pandas/tests/{tools => reshape}/data/tolerance.csv (100%) rename pandas/tests/{tools => reshape}/data/trades.csv (100%) rename pandas/tests/{tools => reshape}/data/trades2.csv (100%) rename pandas/tests/{tools => reshape}/test_concat.py (100%) rename pandas/tests/{tools => reshape}/test_hashing.py (100%) rename pandas/tests/{tools => reshape}/test_join.py (99%) rename pandas/tests/{tools => reshape}/test_merge.py (99%) rename pandas/tests/{tools => reshape}/test_merge_asof.py (99%) rename pandas/tests/{tools => reshape}/test_merge_ordered.py (100%) rename pandas/tests/{tools => reshape}/test_pivot.py (99%) rename pandas/tests/{ => reshape}/test_reshape.py (99%) rename pandas/tests/{tools => reshape}/test_tile.py (99%) rename pandas/tests/{tools => reshape}/test_union_categoricals.py (100%) create mode 100644 pandas/tests/reshape/test_util.py delete mode 100644 pandas/tests/test_generic.py delete mode 100644 pandas/tests/tools/test_util.py delete mode 100644 pandas/tools/util.py diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 44c79fd131705..9fe0b66028ac5 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -1353,6 +1353,7 @@ If indicated, a deprecation warning will be issued if you reference theses modul "pandas.parser", "pandas.io.libparsers", "X" "pandas.formats", "pandas.io.formats", "" "pandas.sparse", "pandas.core.sparse", "" + "pandas.tools", "pandas.core.reshape", "" "pandas.types", "pandas.core.dtypes", "" "pandas.io.sas.saslib", "pandas.io.sas.libsas", "" "pandas._join", "pandas._libs.join", "" diff --git a/pandas/__init__.py b/pandas/__init__.py index 5f6d54fd904b1..43fa362b66ed5 100644 --- a/pandas/__init__.py +++ b/pandas/__init__.py @@ -44,11 +44,7 @@ from pandas.stats.api import * from pandas.tseries.api import * from pandas.core.computation.api import * - -from pandas.tools.concat import concat -from pandas.tools.merge import (merge, ordered_merge, - merge_ordered, merge_asof) -from pandas.tools.pivot import pivot_table, crosstab +from pandas.core.reshape.api import * # deprecate tools.plotting, plot_params and scatter_matrix on the top namespace import pandas.tools.plotting @@ -58,9 +54,6 @@ 'pandas.scatter_matrix', pandas.plotting.scatter_matrix, 'pandas.plotting.scatter_matrix') -from pandas.tools.tile import cut, qcut -from pandas.tools.util import to_numeric -from pandas.core.reshape import melt from pandas.util.print_versions import show_versions from pandas.io.api import * from pandas.util._tester import test diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 6df7fce631a3c..63df4b3d94bc8 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -605,7 +605,7 @@ def value_counts(values, sort=True, ascending=False, normalize=False, if bins is not None: try: - from pandas.tools.tile import cut + from pandas.core.reshape.tile import cut values = Series(values) ii = cut(values, bins, include_lowest=True) except TypeError: diff --git a/pandas/core/api.py b/pandas/core/api.py index 865fe367873d8..f3191283b85eb 100644 --- a/pandas/core/api.py +++ b/pandas/core/api.py @@ -18,10 +18,12 @@ from pandas.core.frame import DataFrame from pandas.core.panel import Panel, WidePanel from pandas.core.panel4d import Panel4D -from pandas.core.reshape import (pivot_simple as pivot, get_dummies, - lreshape, wide_to_long) +from pandas.core.reshape.reshape import ( + pivot_simple as pivot, get_dummies, + lreshape, wide_to_long) from pandas.core.indexing import IndexSlice +from pandas.core.dtypes.cast import to_numeric from pandas.tseries.offsets import DateOffset from pandas.tseries.tools import to_datetime from pandas.tseries.index import (DatetimeIndex, Timestamp, diff --git a/pandas/core/base.py b/pandas/core/base.py index e30751a6582f9..87c649c5fbd79 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -522,7 +522,7 @@ def nested_renaming_depr(level=4): len(obj.columns.intersection(keys)) != len(keys)): nested_renaming_depr() - from pandas.tools.concat import concat + from pandas.core.reshape.concat import concat def _agg_1dim(name, how, subset=None): """ @@ -671,7 +671,7 @@ def is_any_frame(): return result, True def _aggregate_multiple_funcs(self, arg, _level, _axis): - from pandas.tools.concat import concat + from pandas.core.reshape.concat import concat if _axis != 0: raise NotImplementedError("axis other than 0 is not supported") diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index a12cec33fb350..a3667e9322959 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -1995,7 +1995,7 @@ def describe(self): counts = self.value_counts(dropna=False) freqs = counts / float(counts.sum()) - from pandas.tools.concat import concat + from pandas.core.reshape.concat import concat result = concat([counts, freqs], axis=1) result.columns = ['counts', 'freqs'] result.index.name = 'categories' diff --git a/pandas/core/computation/expr.py b/pandas/core/computation/expr.py index 51785ebcd9ec8..73c27f4d772ca 100644 --- a/pandas/core/computation/expr.py +++ b/pandas/core/computation/expr.py @@ -13,7 +13,7 @@ from pandas.core.base import StringMixin from pandas.core import common as com import pandas.io.formats.printing as printing -from pandas.tools.util import compose +from pandas.core.reshape.util import compose from pandas.core.computation.ops import ( _cmp_ops_syms, _bool_ops_syms, _arith_ops_syms, _unary_ops_syms, is_term) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 3954fb5c93da8..3c1f480787d3a 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -5,6 +5,7 @@ import numpy as np import warnings +import pandas as pd from pandas._libs import tslib, lib from pandas._libs.tslib import iNaT from pandas.compat import string_types, text_type, PY3 @@ -18,6 +19,8 @@ is_integer_dtype, is_datetime_or_timedelta_dtype, is_bool_dtype, is_scalar, + is_numeric_dtype, is_decimal, + is_number, _string_dtypes, _coerce_to_dtype, _ensure_int8, _ensure_int16, @@ -25,7 +28,8 @@ _NS_DTYPE, _TD_DTYPE, _INT64_DTYPE, _POSSIBLY_CAST_DTYPES) from .dtypes import ExtensionDtype, DatetimeTZDtype, PeriodDtype -from .generic import ABCDatetimeIndex, ABCPeriodIndex, ABCSeries +from .generic import (ABCDatetimeIndex, ABCPeriodIndex, + ABCSeries, ABCIndexClass) from .missing import isnull, notnull from .inference import is_list_like @@ -1025,3 +1029,161 @@ def find_common_type(types): return np.object return np.find_common_type(types, []) + + +def to_numeric(arg, errors='raise', downcast=None): + """ + Convert argument to a numeric type. + + Parameters + ---------- + arg : list, tuple, 1-d array, or Series + errors : {'ignore', 'raise', 'coerce'}, default 'raise' + - If 'raise', then invalid parsing will raise an exception + - If 'coerce', then invalid parsing will be set as NaN + - If 'ignore', then invalid parsing will return the input + downcast : {'integer', 'signed', 'unsigned', 'float'} , default None + If not None, and if the data has been successfully cast to a + numerical dtype (or if the data was numeric to begin with), + downcast that resulting data to the smallest numerical dtype + possible according to the following rules: + + - 'integer' or 'signed': smallest signed int dtype (min.: np.int8) + - 'unsigned': smallest unsigned int dtype (min.: np.uint8) + - 'float': smallest float dtype (min.: np.float32) + + As this behaviour is separate from the core conversion to + numeric values, any errors raised during the downcasting + will be surfaced regardless of the value of the 'errors' input. + + In addition, downcasting will only occur if the size + of the resulting data's dtype is strictly larger than + the dtype it is to be cast to, so if none of the dtypes + checked satisfy that specification, no downcasting will be + performed on the data. + + .. versionadded:: 0.19.0 + + Returns + ------- + ret : numeric if parsing succeeded. + Return type depends on input. Series if Series, otherwise ndarray + + Examples + -------- + Take separate series and convert to numeric, coercing when told to + + >>> import pandas as pd + >>> s = pd.Series(['1.0', '2', -3]) + >>> pd.to_numeric(s) + 0 1.0 + 1 2.0 + 2 -3.0 + dtype: float64 + >>> pd.to_numeric(s, downcast='float') + 0 1.0 + 1 2.0 + 2 -3.0 + dtype: float32 + >>> pd.to_numeric(s, downcast='signed') + 0 1 + 1 2 + 2 -3 + dtype: int8 + >>> s = pd.Series(['apple', '1.0', '2', -3]) + >>> pd.to_numeric(s, errors='ignore') + 0 apple + 1 1.0 + 2 2 + 3 -3 + dtype: object + >>> pd.to_numeric(s, errors='coerce') + 0 NaN + 1 1.0 + 2 2.0 + 3 -3.0 + dtype: float64 + """ + if downcast not in (None, 'integer', 'signed', 'unsigned', 'float'): + raise ValueError('invalid downcasting method provided') + + is_series = False + is_index = False + is_scalars = False + + if isinstance(arg, ABCSeries): + is_series = True + values = arg.values + elif isinstance(arg, ABCIndexClass): + is_index = True + values = arg.asi8 + if values is None: + values = arg.values + elif isinstance(arg, (list, tuple)): + values = np.array(arg, dtype='O') + elif is_scalar(arg): + if is_decimal(arg): + return float(arg) + if is_number(arg): + return arg + is_scalars = True + values = np.array([arg], dtype='O') + elif getattr(arg, 'ndim', 1) > 1: + raise TypeError('arg must be a list, tuple, 1-d array, or Series') + else: + values = arg + + try: + if is_numeric_dtype(values): + pass + elif is_datetime_or_timedelta_dtype(values): + values = values.astype(np.int64) + else: + values = _ensure_object(values) + coerce_numeric = False if errors in ('ignore', 'raise') else True + values = lib.maybe_convert_numeric(values, set(), + coerce_numeric=coerce_numeric) + + except Exception: + if errors == 'raise': + raise + + # attempt downcast only if the data has been successfully converted + # to a numerical dtype and if a downcast method has been specified + if downcast is not None and is_numeric_dtype(values): + typecodes = None + + if downcast in ('integer', 'signed'): + typecodes = np.typecodes['Integer'] + elif downcast == 'unsigned' and np.min(values) >= 0: + typecodes = np.typecodes['UnsignedInteger'] + elif downcast == 'float': + typecodes = np.typecodes['Float'] + + # pandas support goes only to np.float32, + # as float dtypes smaller than that are + # extremely rare and not well supported + float_32_char = np.dtype(np.float32).char + float_32_ind = typecodes.index(float_32_char) + typecodes = typecodes[float_32_ind:] + + if typecodes is not None: + # from smallest to largest + for dtype in typecodes: + if np.dtype(dtype).itemsize <= values.dtype.itemsize: + values = maybe_downcast_to_dtype(values, dtype) + + # successful conversion + if values.dtype == dtype: + break + + if is_series: + return pd.Series(values, index=arg.index, name=arg.name) + elif is_index: + # because we want to coerce to numeric if possible, + # do not use _shallow_copy_with_infer + return pd.Index(values, name=arg.name) + elif is_scalars: + return values[0] + else: + return values diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 732ce7ce695b0..9b9039455b948 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3956,7 +3956,7 @@ def pivot(self, index=None, columns=None, values=None): """ - from pandas.core.reshape import pivot + from pandas.core.reshape.reshape import pivot return pivot(self, index=index, columns=columns, values=values) def stack(self, level=-1, dropna=True): @@ -3992,7 +3992,7 @@ def stack(self, level=-1, dropna=True): ------- stacked : DataFrame or Series """ - from pandas.core.reshape import stack, stack_multiple + from pandas.core.reshape.reshape import stack, stack_multiple if isinstance(level, (tuple, list)): return stack_multiple(self, level, dropna=dropna) @@ -4057,7 +4057,7 @@ def unstack(self, level=-1, fill_value=None): ------- unstacked : DataFrame or Series """ - from pandas.core.reshape import unstack + from pandas.core.reshape.reshape import unstack return unstack(self, level, fill_value) _shared_docs['melt'] = (""" @@ -4159,7 +4159,7 @@ def unstack(self, level=-1, fill_value=None): other='melt')) def melt(self, id_vars=None, value_vars=None, var_name=None, value_name='value', col_level=None): - from pandas.core.reshape import melt + from pandas.core.reshape.reshape import melt return melt(self, id_vars=id_vars, value_vars=value_vars, var_name=var_name, value_name=value_name, col_level=col_level) @@ -4609,7 +4609,7 @@ def append(self, other, ignore_index=False, verify_integrity=False): if (self.columns.get_indexer(other.columns) >= 0).all(): other = other.loc[:, self.columns] - from pandas.tools.concat import concat + from pandas.core.reshape.concat import concat if isinstance(other, (list, tuple)): to_concat = [self] + other else: @@ -4741,8 +4741,8 @@ def join(self, other, on=None, how='left', lsuffix='', rsuffix='', def _join_compat(self, other, on=None, how='left', lsuffix='', rsuffix='', sort=False): - from pandas.tools.merge import merge - from pandas.tools.concat import concat + from pandas.core.reshape.merge import merge + from pandas.core.reshape.concat import concat if isinstance(other, Series): if other.name is None: @@ -4786,7 +4786,7 @@ def _join_compat(self, other, on=None, how='left', lsuffix='', rsuffix='', def merge(self, right, how='inner', on=None, left_on=None, right_on=None, left_index=False, right_index=False, sort=False, suffixes=('_x', '_y'), copy=True, indicator=False): - from pandas.tools.merge import merge + from pandas.core.reshape.merge import merge return merge(self, right, how=how, on=on, left_on=left_on, right_on=right_on, left_index=left_index, right_index=right_index, sort=sort, suffixes=suffixes, @@ -4846,7 +4846,7 @@ def round(self, decimals=0, *args, **kwargs): Series.round """ - from pandas.tools.concat import concat + from pandas.core.reshape.concat import concat def _dict_round(df, decimals): for col, vals in df.iteritems(): @@ -5523,7 +5523,7 @@ def isin(self, values): """ if isinstance(values, dict): from collections import defaultdict - from pandas.tools.concat import concat + from pandas.core.reshape.concat import concat values = defaultdict(list, values) return concat((self.iloc[:, [i]].isin(values[col]) for i, col in enumerate(self.columns)), axis=1) diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 3fd41f3456732..8f788aed3950d 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -870,7 +870,7 @@ def _wrap_applied_output(self, *args, **kwargs): raise AbstractMethodError(self) def _concat_objects(self, keys, values, not_indexed_same=False): - from pandas.tools.concat import concat + from pandas.core.reshape.concat import concat def reset_identity(values): # reset the identities of the components @@ -2985,7 +2985,7 @@ def transform(self, func, *args, **kwargs): s = klass(res, indexer) results.append(s) - from pandas.tools.concat import concat + from pandas.core.reshape.concat import concat result = concat(results).sort_index() # we will only try to coerce the result type if @@ -3126,8 +3126,8 @@ def value_counts(self, normalize=False, sort=True, ascending=False, bins=None, dropna=True): from functools import partial - from pandas.tools.tile import cut - from pandas.tools.merge import _get_join_indexers + from pandas.core.reshape.tile import cut + from pandas.core.reshape.merge import _get_join_indexers if bins is not None and not np.iterable(bins): # scalar bins cannot be done at top level @@ -3509,7 +3509,7 @@ def _decide_output_index(self, output, labels): def _wrap_applied_output(self, keys, values, not_indexed_same=False): from pandas.core.index import _all_indexes_same - from pandas.tools.util import to_numeric + from pandas.core.dtypes.cast import to_numeric if len(keys) == 0: return DataFrame(index=keys) @@ -3600,7 +3600,7 @@ def first_non_None_value(values): # still a series # path added as of GH 5545 elif all_indexed_same: - from pandas.tools.concat import concat + from pandas.core.reshape.concat import concat return concat(values) if not all_indexed_same: @@ -3633,7 +3633,7 @@ def first_non_None_value(values): else: # GH5788 instead of stacking; concat gets the # dtypes correct - from pandas.tools.concat import concat + from pandas.core.reshape.concat import concat result = concat(values, keys=key_index, names=key_index.names, axis=self.axis).unstack() @@ -3684,7 +3684,7 @@ def first_non_None_value(values): not_indexed_same=not_indexed_same) def _transform_general(self, func, *args, **kwargs): - from pandas.tools.concat import concat + from pandas.core.reshape.concat import concat applied = [] obj = self._obj_with_exclusions @@ -4071,7 +4071,7 @@ def _iterate_column_groupbys(self): exclusions=self.exclusions) def _apply_to_column_groupbys(self, func): - from pandas.tools.concat import concat + from pandas.core.reshape.concat import concat return concat( (func(col_groupby) for _, col_groupby in self._iterate_column_groupbys()), @@ -4151,7 +4151,7 @@ def groupby_series(obj, col=None): if isinstance(obj, Series): results = groupby_series(obj) else: - from pandas.tools.concat import concat + from pandas.core.reshape.concat import concat results = [groupby_series(obj[col], col) for col in obj.columns] results = concat(results, axis=1) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 5149d45514e2e..705b7a186dced 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -3064,7 +3064,7 @@ def _join_multi(self, other, how, return_indexers=True): "implemented") def _join_non_unique(self, other, how='left', return_indexers=False): - from pandas.tools.merge import _get_join_indexers + from pandas.core.reshape.merge import _get_join_indexers left_idx, right_idx = _get_join_indexers([self.values], [other._values], how=how, diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 40e7118ca0f6a..6d9a9aa691f66 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -1170,7 +1170,7 @@ def from_product(cls, iterables, sortorder=None, names=None): MultiIndex.from_tuples : Convert list of tuples to MultiIndex """ from pandas.core.categorical import _factorize_from_iterables - from pandas.tools.util import cartesian_product + from pandas.core.reshape.util import cartesian_product labels, levels = _factorize_from_iterables(iterables) labels = cartesian_product(labels) diff --git a/pandas/core/panel.py b/pandas/core/panel.py index fefe75163d033..39d2ebdeec3ac 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -33,7 +33,7 @@ create_block_manager_from_blocks) from pandas.core.ops import _op_descriptions from pandas.core.series import Series -from pandas.tools.util import cartesian_product +from pandas.core.reshape.util import cartesian_product from pandas.util.decorators import (deprecate, Appender) _shared_doc_kwargs = dict( @@ -1294,7 +1294,7 @@ def join(self, other, how='left', lsuffix='', rsuffix=''): ------- joined : Panel """ - from pandas.tools.concat import concat + from pandas.core.reshape.concat import concat if isinstance(other, Panel): join_major, join_minor = self._get_join_index(other, how) diff --git a/pandas/tests/tools/__init__.py b/pandas/core/reshape/__init__.py similarity index 100% rename from pandas/tests/tools/__init__.py rename to pandas/core/reshape/__init__.py diff --git a/pandas/core/reshape/api.py b/pandas/core/reshape/api.py new file mode 100644 index 0000000000000..c75e0341918bb --- /dev/null +++ b/pandas/core/reshape/api.py @@ -0,0 +1,8 @@ +# flake8: noqa + +from pandas.core.reshape.concat import concat +from pandas.core.reshape.reshape import melt +from pandas.core.reshape.merge import ( + merge, ordered_merge, merge_ordered, merge_asof) +from pandas.core.reshape.pivot import pivot_table, crosstab +from pandas.core.reshape.tile import cut, qcut diff --git a/pandas/tools/concat.py b/pandas/core/reshape/concat.py similarity index 100% rename from pandas/tools/concat.py rename to pandas/core/reshape/concat.py diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py new file mode 100644 index 0000000000000..1ca3786ecc174 --- /dev/null +++ b/pandas/core/reshape/merge.py @@ -0,0 +1,1481 @@ +""" +SQL-style merge routines +""" + +import copy +import warnings +import string + +import numpy as np +from pandas.compat import range, lzip, zip, map, filter +import pandas.compat as compat + +from pandas import (Categorical, Series, DataFrame, + Index, MultiIndex, Timedelta) +from pandas.core.frame import _merge_doc +from pandas.core.dtypes.common import ( + is_datetime64tz_dtype, + is_datetime64_dtype, + needs_i8_conversion, + is_int64_dtype, + is_categorical_dtype, + is_integer_dtype, + is_float_dtype, + is_numeric_dtype, + is_integer, + is_int_or_datetime_dtype, + is_dtype_equal, + is_bool, + is_list_like, + _ensure_int64, + _ensure_float64, + _ensure_object, + _get_dtype) +from pandas.core.dtypes.missing import na_value_for_dtype +from pandas.core.internals import (items_overlap_with_suffix, + concatenate_block_managers) +from pandas.util.decorators import Appender, Substitution + +from pandas.core.sorting import is_int64_overflow_possible +import pandas.core.algorithms as algos +import pandas.core.common as com +from pandas._libs import hashtable as libhashtable, join as libjoin, lib + + +@Substitution('\nleft : DataFrame') +@Appender(_merge_doc, indents=0) +def merge(left, right, how='inner', on=None, left_on=None, right_on=None, + left_index=False, right_index=False, sort=False, + suffixes=('_x', '_y'), copy=True, indicator=False): + op = _MergeOperation(left, right, how=how, on=on, left_on=left_on, + right_on=right_on, left_index=left_index, + right_index=right_index, sort=sort, suffixes=suffixes, + copy=copy, indicator=indicator) + return op.get_result() + + +if __debug__: + merge.__doc__ = _merge_doc % '\nleft : DataFrame' + + +class MergeError(ValueError): + pass + + +def _groupby_and_merge(by, on, left, right, _merge_pieces, + check_duplicates=True): + """ + groupby & merge; we are always performing a left-by type operation + + Parameters + ---------- + by: field to group + on: duplicates field + left: left frame + right: right frame + _merge_pieces: function for merging + check_duplicates: boolean, default True + should we check & clean duplicates + """ + + pieces = [] + if not isinstance(by, (list, tuple)): + by = [by] + + lby = left.groupby(by, sort=False) + + # if we can groupby the rhs + # then we can get vastly better perf + try: + + # we will check & remove duplicates if indicated + if check_duplicates: + if on is None: + on = [] + elif not isinstance(on, (list, tuple)): + on = [on] + + if right.duplicated(by + on).any(): + right = right.drop_duplicates(by + on, keep='last') + rby = right.groupby(by, sort=False) + except KeyError: + rby = None + + for key, lhs in lby: + + if rby is None: + rhs = right + else: + try: + rhs = right.take(rby.indices[key]) + except KeyError: + # key doesn't exist in left + lcols = lhs.columns.tolist() + cols = lcols + [r for r in right.columns + if r not in set(lcols)] + merged = lhs.reindex(columns=cols) + merged.index = range(len(merged)) + pieces.append(merged) + continue + + merged = _merge_pieces(lhs, rhs) + + # make sure join keys are in the merged + # TODO, should _merge_pieces do this? + for k in by: + try: + if k in merged: + merged[k] = key + except: + pass + + pieces.append(merged) + + # preserve the original order + # if we have a missing piece this can be reset + from pandas.core.reshape.concat import concat + result = concat(pieces, ignore_index=True) + result = result.reindex(columns=pieces[0].columns, copy=False) + return result, lby + + +def ordered_merge(left, right, on=None, + left_on=None, right_on=None, + left_by=None, right_by=None, + fill_method=None, suffixes=('_x', '_y')): + + warnings.warn("ordered_merge is deprecated and replaced by merge_ordered", + FutureWarning, stacklevel=2) + return merge_ordered(left, right, on=on, + left_on=left_on, right_on=right_on, + left_by=left_by, right_by=right_by, + fill_method=fill_method, suffixes=suffixes) + + +def merge_ordered(left, right, on=None, + left_on=None, right_on=None, + left_by=None, right_by=None, + fill_method=None, suffixes=('_x', '_y'), + how='outer'): + """Perform merge with optional filling/interpolation designed for ordered + data like time series data. Optionally perform group-wise merge (see + examples) + + Parameters + ---------- + left : DataFrame + right : DataFrame + on : label or list + Field names to join on. Must be found in both DataFrames. + left_on : label or list, or array-like + Field names to join on in left DataFrame. Can be a vector or list of + vectors of the length of the DataFrame to use a particular vector as + the join key instead of columns + right_on : label or list, or array-like + Field names to join on in right DataFrame or vector/list of vectors per + left_on docs + left_by : column name or list of column names + Group left DataFrame by group columns and merge piece by piece with + right DataFrame + right_by : column name or list of column names + Group right DataFrame by group columns and merge piece by piece with + left DataFrame + fill_method : {'ffill', None}, default None + Interpolation method for data + suffixes : 2-length sequence (tuple, list, ...) + Suffix to apply to overlapping column names in the left and right + side, respectively + how : {'left', 'right', 'outer', 'inner'}, default 'outer' + * left: use only keys from left frame (SQL: left outer join) + * right: use only keys from right frame (SQL: right outer join) + * outer: use union of keys from both frames (SQL: full outer join) + * inner: use intersection of keys from both frames (SQL: inner join) + + .. versionadded:: 0.19.0 + + Examples + -------- + >>> A >>> B + key lvalue group key rvalue + 0 a 1 a 0 b 1 + 1 c 2 a 1 c 2 + 2 e 3 a 2 d 3 + 3 a 1 b + 4 c 2 b + 5 e 3 b + + >>> ordered_merge(A, B, fill_method='ffill', left_by='group') + key lvalue group rvalue + 0 a 1 a NaN + 1 b 1 a 1 + 2 c 2 a 2 + 3 d 2 a 3 + 4 e 3 a 3 + 5 f 3 a 4 + 6 a 1 b NaN + 7 b 1 b 1 + 8 c 2 b 2 + 9 d 2 b 3 + 10 e 3 b 3 + 11 f 3 b 4 + + Returns + ------- + merged : DataFrame + The output type will the be same as 'left', if it is a subclass + of DataFrame. + + See also + -------- + merge + merge_asof + + """ + def _merger(x, y): + # perform the ordered merge operation + op = _OrderedMerge(x, y, on=on, left_on=left_on, right_on=right_on, + suffixes=suffixes, fill_method=fill_method, + how=how) + return op.get_result() + + if left_by is not None and right_by is not None: + raise ValueError('Can only group either left or right frames') + elif left_by is not None: + result, _ = _groupby_and_merge(left_by, on, left, right, + lambda x, y: _merger(x, y), + check_duplicates=False) + elif right_by is not None: + result, _ = _groupby_and_merge(right_by, on, right, left, + lambda x, y: _merger(y, x), + check_duplicates=False) + else: + result = _merger(left, right) + return result + + +ordered_merge.__doc__ = merge_ordered.__doc__ + + +def merge_asof(left, right, on=None, + left_on=None, right_on=None, + left_index=False, right_index=False, + by=None, left_by=None, right_by=None, + suffixes=('_x', '_y'), + tolerance=None, + allow_exact_matches=True, + direction='backward'): + """Perform an asof merge. This is similar to a left-join except that we + match on nearest key rather than equal keys. + + Both DataFrames must be sorted by the key. + + For each row in the left DataFrame: + + - A "backward" search selects the last row in the right DataFrame whose + 'on' key is less than or equal to the left's key. + + - A "forward" search selects the first row in the right DataFrame whose + 'on' key is greater than or equal to the left's key. + + - A "nearest" search selects the row in the right DataFrame whose 'on' + key is closest in absolute distance to the left's key. + + The default is "backward" and is compatible in versions below 0.20.0. + The direction parameter was added in version 0.20.0 and introduces + "forward" and "nearest". + + Optionally match on equivalent keys with 'by' before searching with 'on'. + + .. versionadded:: 0.19.0 + + Parameters + ---------- + left : DataFrame + right : DataFrame + on : label + Field name to join on. Must be found in both DataFrames. + The data MUST be ordered. Furthermore this must be a numeric column, + such as datetimelike, integer, or float. On or left_on/right_on + must be given. + left_on : label + Field name to join on in left DataFrame. + right_on : label + Field name to join on in right DataFrame. + left_index : boolean + Use the index of the left DataFrame as the join key. + + .. versionadded:: 0.19.2 + + right_index : boolean + Use the index of the right DataFrame as the join key. + + .. versionadded:: 0.19.2 + + by : column name or list of column names + Match on these columns before performing merge operation. + left_by : column name + Field names to match on in the left DataFrame. + + .. versionadded:: 0.19.2 + + right_by : column name + Field names to match on in the right DataFrame. + + .. versionadded:: 0.19.2 + + suffixes : 2-length sequence (tuple, list, ...) + Suffix to apply to overlapping column names in the left and right + side, respectively. + tolerance : integer or Timedelta, optional, default None + Select asof tolerance within this range; must be compatible + with the merge index. + allow_exact_matches : boolean, default True + + - If True, allow matching with the same 'on' value + (i.e. less-than-or-equal-to / greater-than-or-equal-to) + - If False, don't match the same 'on' value + (i.e., stricly less-than / strictly greater-than) + + direction : 'backward' (default), 'forward', or 'nearest' + Whether to search for prior, subsequent, or closest matches. + + .. versionadded:: 0.20.0 + + Returns + ------- + merged : DataFrame + + Examples + -------- + >>> left + a left_val + 0 1 a + 1 5 b + 2 10 c + + >>> right + a right_val + 0 1 1 + 1 2 2 + 2 3 3 + 3 6 6 + 4 7 7 + + >>> pd.merge_asof(left, right, on='a') + a left_val right_val + 0 1 a 1 + 1 5 b 3 + 2 10 c 7 + + >>> pd.merge_asof(left, right, on='a', allow_exact_matches=False) + a left_val right_val + 0 1 a NaN + 1 5 b 3.0 + 2 10 c 7.0 + + >>> pd.merge_asof(left, right, on='a', direction='forward') + a left_val right_val + 0 1 a 1.0 + 1 5 b 6.0 + 2 10 c NaN + + >>> pd.merge_asof(left, right, on='a', direction='nearest') + a left_val right_val + 0 1 a 1 + 1 5 b 6 + 2 10 c 7 + + We can use indexed DataFrames as well. + + >>> left + left_val + 1 a + 5 b + 10 c + + >>> right + right_val + 1 1 + 2 2 + 3 3 + 6 6 + 7 7 + + >>> pd.merge_asof(left, right, left_index=True, right_index=True) + left_val right_val + 1 a 1 + 5 b 3 + 10 c 7 + + Here is a real-world times-series example + + >>> quotes + time ticker bid ask + 0 2016-05-25 13:30:00.023 GOOG 720.50 720.93 + 1 2016-05-25 13:30:00.023 MSFT 51.95 51.96 + 2 2016-05-25 13:30:00.030 MSFT 51.97 51.98 + 3 2016-05-25 13:30:00.041 MSFT 51.99 52.00 + 4 2016-05-25 13:30:00.048 GOOG 720.50 720.93 + 5 2016-05-25 13:30:00.049 AAPL 97.99 98.01 + 6 2016-05-25 13:30:00.072 GOOG 720.50 720.88 + 7 2016-05-25 13:30:00.075 MSFT 52.01 52.03 + + >>> trades + time ticker price quantity + 0 2016-05-25 13:30:00.023 MSFT 51.95 75 + 1 2016-05-25 13:30:00.038 MSFT 51.95 155 + 2 2016-05-25 13:30:00.048 GOOG 720.77 100 + 3 2016-05-25 13:30:00.048 GOOG 720.92 100 + 4 2016-05-25 13:30:00.048 AAPL 98.00 100 + + By default we are taking the asof of the quotes + + >>> pd.merge_asof(trades, quotes, + ... on='time', + ... by='ticker') + time ticker price quantity bid ask + 0 2016-05-25 13:30:00.023 MSFT 51.95 75 51.95 51.96 + 1 2016-05-25 13:30:00.038 MSFT 51.95 155 51.97 51.98 + 2 2016-05-25 13:30:00.048 GOOG 720.77 100 720.50 720.93 + 3 2016-05-25 13:30:00.048 GOOG 720.92 100 720.50 720.93 + 4 2016-05-25 13:30:00.048 AAPL 98.00 100 NaN NaN + + We only asof within 2ms betwen the quote time and the trade time + + >>> pd.merge_asof(trades, quotes, + ... on='time', + ... by='ticker', + ... tolerance=pd.Timedelta('2ms')) + time ticker price quantity bid ask + 0 2016-05-25 13:30:00.023 MSFT 51.95 75 51.95 51.96 + 1 2016-05-25 13:30:00.038 MSFT 51.95 155 NaN NaN + 2 2016-05-25 13:30:00.048 GOOG 720.77 100 720.50 720.93 + 3 2016-05-25 13:30:00.048 GOOG 720.92 100 720.50 720.93 + 4 2016-05-25 13:30:00.048 AAPL 98.00 100 NaN NaN + + We only asof within 10ms betwen the quote time and the trade time + and we exclude exact matches on time. However *prior* data will + propogate forward + + >>> pd.merge_asof(trades, quotes, + ... on='time', + ... by='ticker', + ... tolerance=pd.Timedelta('10ms'), + ... allow_exact_matches=False) + time ticker price quantity bid ask + 0 2016-05-25 13:30:00.023 MSFT 51.95 75 NaN NaN + 1 2016-05-25 13:30:00.038 MSFT 51.95 155 51.97 51.98 + 2 2016-05-25 13:30:00.048 GOOG 720.77 100 720.50 720.93 + 3 2016-05-25 13:30:00.048 GOOG 720.92 100 720.50 720.93 + 4 2016-05-25 13:30:00.048 AAPL 98.00 100 NaN NaN + + See also + -------- + merge + merge_ordered + + """ + op = _AsOfMerge(left, right, + on=on, left_on=left_on, right_on=right_on, + left_index=left_index, right_index=right_index, + by=by, left_by=left_by, right_by=right_by, + suffixes=suffixes, + how='asof', tolerance=tolerance, + allow_exact_matches=allow_exact_matches, + direction=direction) + return op.get_result() + + +# TODO: transformations?? +# TODO: only copy DataFrames when modification necessary +class _MergeOperation(object): + """ + Perform a database (SQL) merge operation between two DataFrame objects + using either columns as keys or their row indexes + """ + _merge_type = 'merge' + + def __init__(self, left, right, how='inner', on=None, + left_on=None, right_on=None, axis=1, + left_index=False, right_index=False, sort=True, + suffixes=('_x', '_y'), copy=True, indicator=False): + self.left = self.orig_left = left + self.right = self.orig_right = right + self.how = how + self.axis = axis + + self.on = com._maybe_make_list(on) + self.left_on = com._maybe_make_list(left_on) + self.right_on = com._maybe_make_list(right_on) + + self.copy = copy + self.suffixes = suffixes + self.sort = sort + + self.left_index = left_index + self.right_index = right_index + + self.indicator = indicator + + if isinstance(self.indicator, compat.string_types): + self.indicator_name = self.indicator + elif isinstance(self.indicator, bool): + self.indicator_name = '_merge' if self.indicator else None + else: + raise ValueError( + 'indicator option can only accept boolean or string arguments') + + if not isinstance(left, DataFrame): + raise ValueError( + 'can not merge DataFrame with instance of ' + 'type {0}'.format(type(left))) + if not isinstance(right, DataFrame): + raise ValueError( + 'can not merge DataFrame with instance of ' + 'type {0}'.format(type(right))) + + if not is_bool(left_index): + raise ValueError( + 'left_index parameter must be of type bool, not ' + '{0}'.format(type(left_index))) + if not is_bool(right_index): + raise ValueError( + 'right_index parameter must be of type bool, not ' + '{0}'.format(type(right_index))) + + # warn user when merging between different levels + if left.columns.nlevels != right.columns.nlevels: + msg = ('merging between different levels can give an unintended ' + 'result ({0} levels on the left, {1} on the right)') + msg = msg.format(left.columns.nlevels, right.columns.nlevels) + warnings.warn(msg, UserWarning) + + self._validate_specification() + + # note this function has side effects + (self.left_join_keys, + self.right_join_keys, + self.join_names) = self._get_merge_keys() + + # validate the merge keys dtypes. We may need to coerce + # to avoid incompat dtypes + self._maybe_coerce_merge_keys() + + def get_result(self): + if self.indicator: + self.left, self.right = self._indicator_pre_merge( + self.left, self.right) + + join_index, left_indexer, right_indexer = self._get_join_info() + + ldata, rdata = self.left._data, self.right._data + lsuf, rsuf = self.suffixes + + llabels, rlabels = items_overlap_with_suffix(ldata.items, lsuf, + rdata.items, rsuf) + + lindexers = {1: left_indexer} if left_indexer is not None else {} + rindexers = {1: right_indexer} if right_indexer is not None else {} + + result_data = concatenate_block_managers( + [(ldata, lindexers), (rdata, rindexers)], + axes=[llabels.append(rlabels), join_index], + concat_axis=0, copy=self.copy) + + typ = self.left._constructor + result = typ(result_data).__finalize__(self, method=self._merge_type) + + if self.indicator: + result = self._indicator_post_merge(result) + + self._maybe_add_join_keys(result, left_indexer, right_indexer) + + return result + + def _indicator_pre_merge(self, left, right): + + columns = left.columns.union(right.columns) + + for i in ['_left_indicator', '_right_indicator']: + if i in columns: + raise ValueError("Cannot use `indicator=True` option when " + "data contains a column named {}".format(i)) + if self.indicator_name in columns: + raise ValueError( + "Cannot use name of an existing column for indicator column") + + left = left.copy() + right = right.copy() + + left['_left_indicator'] = 1 + left['_left_indicator'] = left['_left_indicator'].astype('int8') + + right['_right_indicator'] = 2 + right['_right_indicator'] = right['_right_indicator'].astype('int8') + + return left, right + + def _indicator_post_merge(self, result): + + result['_left_indicator'] = result['_left_indicator'].fillna(0) + result['_right_indicator'] = result['_right_indicator'].fillna(0) + + result[self.indicator_name] = Categorical((result['_left_indicator'] + + result['_right_indicator']), + categories=[1, 2, 3]) + result[self.indicator_name] = ( + result[self.indicator_name] + .cat.rename_categories(['left_only', 'right_only', 'both'])) + + result = result.drop(labels=['_left_indicator', '_right_indicator'], + axis=1) + return result + + def _maybe_add_join_keys(self, result, left_indexer, right_indexer): + + left_has_missing = None + right_has_missing = None + + keys = zip(self.join_names, self.left_on, self.right_on) + for i, (name, lname, rname) in enumerate(keys): + if not _should_fill(lname, rname): + continue + + take_left, take_right = None, None + + if name in result: + + if left_indexer is not None and right_indexer is not None: + if name in self.left: + + if left_has_missing is None: + left_has_missing = (left_indexer == -1).any() + + if left_has_missing: + take_right = self.right_join_keys[i] + + if not is_dtype_equal(result[name].dtype, + self.left[name].dtype): + take_left = self.left[name]._values + + elif name in self.right: + + if right_has_missing is None: + right_has_missing = (right_indexer == -1).any() + + if right_has_missing: + take_left = self.left_join_keys[i] + + if not is_dtype_equal(result[name].dtype, + self.right[name].dtype): + take_right = self.right[name]._values + + elif left_indexer is not None \ + and isinstance(self.left_join_keys[i], np.ndarray): + + take_left = self.left_join_keys[i] + take_right = self.right_join_keys[i] + + if take_left is not None or take_right is not None: + + if take_left is None: + lvals = result[name]._values + else: + lfill = na_value_for_dtype(take_left.dtype) + lvals = algos.take_1d(take_left, left_indexer, + fill_value=lfill) + + if take_right is None: + rvals = result[name]._values + else: + rfill = na_value_for_dtype(take_right.dtype) + rvals = algos.take_1d(take_right, right_indexer, + fill_value=rfill) + + # if we have an all missing left_indexer + # make sure to just use the right values + mask = left_indexer == -1 + if mask.all(): + key_col = rvals + else: + key_col = Index(lvals).where(~mask, rvals) + + if name in result: + result[name] = key_col + else: + result.insert(i, name or 'key_%d' % i, key_col) + + def _get_join_indexers(self): + """ return the join indexers """ + return _get_join_indexers(self.left_join_keys, + self.right_join_keys, + sort=self.sort, + how=self.how) + + def _get_join_info(self): + left_ax = self.left._data.axes[self.axis] + right_ax = self.right._data.axes[self.axis] + + if self.left_index and self.right_index and self.how != 'asof': + join_index, left_indexer, right_indexer = \ + left_ax.join(right_ax, how=self.how, return_indexers=True, + sort=self.sort) + elif self.right_index and self.how == 'left': + join_index, left_indexer, right_indexer = \ + _left_join_on_index(left_ax, right_ax, self.left_join_keys, + sort=self.sort) + + elif self.left_index and self.how == 'right': + join_index, right_indexer, left_indexer = \ + _left_join_on_index(right_ax, left_ax, self.right_join_keys, + sort=self.sort) + else: + (left_indexer, + right_indexer) = self._get_join_indexers() + + if self.right_index: + if len(self.left) > 0: + join_index = self.left.index.take(left_indexer) + else: + join_index = self.right.index.take(right_indexer) + left_indexer = np.array([-1] * len(join_index)) + elif self.left_index: + if len(self.right) > 0: + join_index = self.right.index.take(right_indexer) + else: + join_index = self.left.index.take(left_indexer) + right_indexer = np.array([-1] * len(join_index)) + else: + join_index = Index(np.arange(len(left_indexer))) + + if len(join_index) == 0: + join_index = join_index.astype(object) + return join_index, left_indexer, right_indexer + + def _get_merge_keys(self): + """ + Note: has side effects (copy/delete key columns) + + Parameters + ---------- + left + right + on + + Returns + ------- + left_keys, right_keys + """ + left_keys = [] + right_keys = [] + join_names = [] + right_drop = [] + left_drop = [] + left, right = self.left, self.right + + is_lkey = lambda x: isinstance( + x, (np.ndarray, Series)) and len(x) == len(left) + is_rkey = lambda x: isinstance( + x, (np.ndarray, Series)) and len(x) == len(right) + + # Note that pd.merge_asof() has separate 'on' and 'by' parameters. A + # user could, for example, request 'left_index' and 'left_by'. In a + # regular pd.merge(), users cannot specify both 'left_index' and + # 'left_on'. (Instead, users have a MultiIndex). That means the + # self.left_on in this function is always empty in a pd.merge(), but + # a pd.merge_asof(left_index=True, left_by=...) will result in a + # self.left_on array with a None in the middle of it. This requires + # a work-around as designated in the code below. + # See _validate_specification() for where this happens. + + # ugh, spaghetti re #733 + if _any(self.left_on) and _any(self.right_on): + for lk, rk in zip(self.left_on, self.right_on): + if is_lkey(lk): + left_keys.append(lk) + if is_rkey(rk): + right_keys.append(rk) + join_names.append(None) # what to do? + else: + if rk is not None: + right_keys.append(right[rk]._values) + join_names.append(rk) + else: + # work-around for merge_asof(right_index=True) + right_keys.append(right.index) + join_names.append(right.index.name) + else: + if not is_rkey(rk): + if rk is not None: + right_keys.append(right[rk]._values) + else: + # work-around for merge_asof(right_index=True) + right_keys.append(right.index) + if lk is not None and lk == rk: + # avoid key upcast in corner case (length-0) + if len(left) > 0: + right_drop.append(rk) + else: + left_drop.append(lk) + else: + right_keys.append(rk) + if lk is not None: + left_keys.append(left[lk]._values) + join_names.append(lk) + else: + # work-around for merge_asof(left_index=True) + left_keys.append(left.index) + join_names.append(left.index.name) + elif _any(self.left_on): + for k in self.left_on: + if is_lkey(k): + left_keys.append(k) + join_names.append(None) + else: + left_keys.append(left[k]._values) + join_names.append(k) + if isinstance(self.right.index, MultiIndex): + right_keys = [lev._values.take(lab) + for lev, lab in zip(self.right.index.levels, + self.right.index.labels)] + else: + right_keys = [self.right.index.values] + elif _any(self.right_on): + for k in self.right_on: + if is_rkey(k): + right_keys.append(k) + join_names.append(None) + else: + right_keys.append(right[k]._values) + join_names.append(k) + if isinstance(self.left.index, MultiIndex): + left_keys = [lev._values.take(lab) + for lev, lab in zip(self.left.index.levels, + self.left.index.labels)] + else: + left_keys = [self.left.index.values] + + if left_drop: + self.left = self.left.drop(left_drop, axis=1) + + if right_drop: + self.right = self.right.drop(right_drop, axis=1) + + return left_keys, right_keys, join_names + + def _maybe_coerce_merge_keys(self): + # we have valid mergee's but we may have to further + # coerce these if they are originally incompatible types + # + # for example if these are categorical, but are not dtype_equal + # or if we have object and integer dtypes + + for lk, rk, name in zip(self.left_join_keys, + self.right_join_keys, + self.join_names): + if (len(lk) and not len(rk)) or (not len(lk) and len(rk)): + continue + + # if either left or right is a categorical + # then the must match exactly in categories & ordered + if is_categorical_dtype(lk) and is_categorical_dtype(rk): + if lk.is_dtype_equal(rk): + continue + elif is_categorical_dtype(lk) or is_categorical_dtype(rk): + pass + + elif is_dtype_equal(lk.dtype, rk.dtype): + continue + + # if we are numeric, then allow differing + # kinds to proceed, eg. int64 and int8 + # further if we are object, but we infer to + # the same, then proceed + if (is_numeric_dtype(lk) and is_numeric_dtype(rk)): + if lk.dtype.kind == rk.dtype.kind: + continue + + # let's infer and see if we are ok + if lib.infer_dtype(lk) == lib.infer_dtype(rk): + continue + + # Houston, we have a problem! + # let's coerce to object + if name in self.left.columns: + self.left = self.left.assign( + **{name: self.left[name].astype(object)}) + if name in self.right.columns: + self.right = self.right.assign( + **{name: self.right[name].astype(object)}) + + def _validate_specification(self): + # Hm, any way to make this logic less complicated?? + if self.on is None and self.left_on is None and self.right_on is None: + + if self.left_index and self.right_index: + self.left_on, self.right_on = (), () + elif self.left_index: + if self.right_on is None: + raise MergeError('Must pass right_on or right_index=True') + elif self.right_index: + if self.left_on is None: + raise MergeError('Must pass left_on or left_index=True') + else: + # use the common columns + common_cols = self.left.columns.intersection( + self.right.columns) + if len(common_cols) == 0: + raise MergeError('No common columns to perform merge on') + if not common_cols.is_unique: + raise MergeError("Data columns not unique: %s" + % repr(common_cols)) + self.left_on = self.right_on = common_cols + elif self.on is not None: + if self.left_on is not None or self.right_on is not None: + raise MergeError('Can only pass argument "on" OR "left_on" ' + 'and "right_on", not a combination of both.') + self.left_on = self.right_on = self.on + elif self.left_on is not None: + n = len(self.left_on) + if self.right_index: + if len(self.left_on) != self.right.index.nlevels: + raise ValueError('len(left_on) must equal the number ' + 'of levels in the index of "right"') + self.right_on = [None] * n + elif self.right_on is not None: + n = len(self.right_on) + if self.left_index: + if len(self.right_on) != self.left.index.nlevels: + raise ValueError('len(right_on) must equal the number ' + 'of levels in the index of "left"') + self.left_on = [None] * n + if len(self.right_on) != len(self.left_on): + raise ValueError("len(right_on) must equal len(left_on)") + + +def _get_join_indexers(left_keys, right_keys, sort=False, how='inner', + **kwargs): + """ + + Parameters + ---------- + left_keys: ndarray, Index, Series + right_keys: ndarray, Index, Series + sort: boolean, default False + how: string {'inner', 'outer', 'left', 'right'}, default 'inner' + + Returns + ------- + tuple of (left_indexer, right_indexer) + indexers into the left_keys, right_keys + + """ + from functools import partial + + assert len(left_keys) == len(right_keys), \ + 'left_key and right_keys must be the same length' + + # bind `sort` arg. of _factorize_keys + fkeys = partial(_factorize_keys, sort=sort) + + # get left & right join labels and num. of levels at each location + llab, rlab, shape = map(list, zip(* map(fkeys, left_keys, right_keys))) + + # get flat i8 keys from label lists + lkey, rkey = _get_join_keys(llab, rlab, shape, sort) + + # factorize keys to a dense i8 space + # `count` is the num. of unique keys + # set(lkey) | set(rkey) == range(count) + lkey, rkey, count = fkeys(lkey, rkey) + + # preserve left frame order if how == 'left' and sort == False + kwargs = copy.copy(kwargs) + if how == 'left': + kwargs['sort'] = sort + join_func = _join_functions[how] + + return join_func(lkey, rkey, count, **kwargs) + + +class _OrderedMerge(_MergeOperation): + _merge_type = 'ordered_merge' + + def __init__(self, left, right, on=None, left_on=None, right_on=None, + left_index=False, right_index=False, axis=1, + suffixes=('_x', '_y'), copy=True, + fill_method=None, how='outer'): + + self.fill_method = fill_method + _MergeOperation.__init__(self, left, right, on=on, left_on=left_on, + left_index=left_index, + right_index=right_index, + right_on=right_on, axis=axis, + how=how, suffixes=suffixes, + sort=True # factorize sorts + ) + + def get_result(self): + join_index, left_indexer, right_indexer = self._get_join_info() + + # this is a bit kludgy + ldata, rdata = self.left._data, self.right._data + lsuf, rsuf = self.suffixes + + llabels, rlabels = items_overlap_with_suffix(ldata.items, lsuf, + rdata.items, rsuf) + + if self.fill_method == 'ffill': + left_join_indexer = libjoin.ffill_indexer(left_indexer) + right_join_indexer = libjoin.ffill_indexer(right_indexer) + else: + left_join_indexer = left_indexer + right_join_indexer = right_indexer + + lindexers = { + 1: left_join_indexer} if left_join_indexer is not None else {} + rindexers = { + 1: right_join_indexer} if right_join_indexer is not None else {} + + result_data = concatenate_block_managers( + [(ldata, lindexers), (rdata, rindexers)], + axes=[llabels.append(rlabels), join_index], + concat_axis=0, copy=self.copy) + + typ = self.left._constructor + result = typ(result_data).__finalize__(self, method=self._merge_type) + + self._maybe_add_join_keys(result, left_indexer, right_indexer) + + return result + + +def _asof_function(direction, on_type): + return getattr(libjoin, 'asof_join_%s_%s' % (direction, on_type), None) + + +def _asof_by_function(direction, on_type, by_type): + return getattr(libjoin, 'asof_join_%s_%s_by_%s' % + (direction, on_type, by_type), None) + + +_type_casters = { + 'int64_t': _ensure_int64, + 'double': _ensure_float64, + 'object': _ensure_object, +} + +_cython_types = { + 'uint8': 'uint8_t', + 'uint32': 'uint32_t', + 'uint16': 'uint16_t', + 'uint64': 'uint64_t', + 'int8': 'int8_t', + 'int32': 'int32_t', + 'int16': 'int16_t', + 'int64': 'int64_t', + 'float16': 'error', + 'float32': 'float', + 'float64': 'double', +} + + +def _get_cython_type(dtype): + """ Given a dtype, return a C name like 'int64_t' or 'double' """ + type_name = _get_dtype(dtype).name + ctype = _cython_types.get(type_name, 'object') + if ctype == 'error': + raise MergeError('unsupported type: ' + type_name) + return ctype + + +def _get_cython_type_upcast(dtype): + """ Upcast a dtype to 'int64_t', 'double', or 'object' """ + if is_integer_dtype(dtype): + return 'int64_t' + elif is_float_dtype(dtype): + return 'double' + else: + return 'object' + + +class _AsOfMerge(_OrderedMerge): + _merge_type = 'asof_merge' + + def __init__(self, left, right, on=None, left_on=None, right_on=None, + left_index=False, right_index=False, + by=None, left_by=None, right_by=None, + axis=1, suffixes=('_x', '_y'), copy=True, + fill_method=None, + how='asof', tolerance=None, + allow_exact_matches=True, + direction='backward'): + + self.by = by + self.left_by = left_by + self.right_by = right_by + self.tolerance = tolerance + self.allow_exact_matches = allow_exact_matches + self.direction = direction + + _OrderedMerge.__init__(self, left, right, on=on, left_on=left_on, + right_on=right_on, left_index=left_index, + right_index=right_index, axis=axis, + how=how, suffixes=suffixes, + fill_method=fill_method) + + def _validate_specification(self): + super(_AsOfMerge, self)._validate_specification() + + # we only allow on to be a single item for on + if len(self.left_on) != 1 and not self.left_index: + raise MergeError("can only asof on a key for left") + + if len(self.right_on) != 1 and not self.right_index: + raise MergeError("can only asof on a key for right") + + if self.left_index and isinstance(self.left.index, MultiIndex): + raise MergeError("left can only have one index") + + if self.right_index and isinstance(self.right.index, MultiIndex): + raise MergeError("right can only have one index") + + # set 'by' columns + if self.by is not None: + if self.left_by is not None or self.right_by is not None: + raise MergeError('Can only pass by OR left_by ' + 'and right_by') + self.left_by = self.right_by = self.by + if self.left_by is None and self.right_by is not None: + raise MergeError('missing left_by') + if self.left_by is not None and self.right_by is None: + raise MergeError('missing right_by') + + # add 'by' to our key-list so we can have it in the + # output as a key + if self.left_by is not None: + if not is_list_like(self.left_by): + self.left_by = [self.left_by] + if not is_list_like(self.right_by): + self.right_by = [self.right_by] + + if len(self.left_by) != len(self.right_by): + raise MergeError('left_by and right_by must be same length') + + self.left_on = self.left_by + list(self.left_on) + self.right_on = self.right_by + list(self.right_on) + + # check 'direction' is valid + if self.direction not in ['backward', 'forward', 'nearest']: + raise MergeError('direction invalid: ' + self.direction) + + @property + def _asof_key(self): + """ This is our asof key, the 'on' """ + return self.left_on[-1] + + def _get_merge_keys(self): + + # note this function has side effects + (left_join_keys, + right_join_keys, + join_names) = super(_AsOfMerge, self)._get_merge_keys() + + # validate index types are the same + for lk, rk in zip(left_join_keys, right_join_keys): + if not is_dtype_equal(lk.dtype, rk.dtype): + raise MergeError("incompatible merge keys, " + "must be the same type") + + # validate tolerance; must be a Timedelta if we have a DTI + if self.tolerance is not None: + + if self.left_index: + lt = self.left.index + else: + lt = left_join_keys[-1] + + msg = "incompatible tolerance, must be compat " \ + "with type {0}".format(type(lt)) + + if is_datetime64_dtype(lt) or is_datetime64tz_dtype(lt): + if not isinstance(self.tolerance, Timedelta): + raise MergeError(msg) + if self.tolerance < Timedelta(0): + raise MergeError("tolerance must be positive") + + elif is_int64_dtype(lt): + if not is_integer(self.tolerance): + raise MergeError(msg) + if self.tolerance < 0: + raise MergeError("tolerance must be positive") + + else: + raise MergeError("key must be integer or timestamp") + + # validate allow_exact_matches + if not is_bool(self.allow_exact_matches): + raise MergeError("allow_exact_matches must be boolean, " + "passed {0}".format(self.allow_exact_matches)) + + return left_join_keys, right_join_keys, join_names + + def _get_join_indexers(self): + """ return the join indexers """ + + def flip(xs): + """ unlike np.transpose, this returns an array of tuples """ + labels = list(string.ascii_lowercase[:len(xs)]) + dtypes = [x.dtype for x in xs] + labeled_dtypes = list(zip(labels, dtypes)) + return np.array(lzip(*xs), labeled_dtypes) + + # values to compare + left_values = (self.left.index.values if self.left_index else + self.left_join_keys[-1]) + right_values = (self.right.index.values if self.right_index else + self.right_join_keys[-1]) + tolerance = self.tolerance + + # we required sortedness in the join keys + msg = " keys must be sorted" + if not Index(left_values).is_monotonic: + raise ValueError('left' + msg) + if not Index(right_values).is_monotonic: + raise ValueError('right' + msg) + + # initial type conversion as needed + if needs_i8_conversion(left_values): + left_values = left_values.view('i8') + right_values = right_values.view('i8') + if tolerance is not None: + tolerance = tolerance.value + + # a "by" parameter requires special handling + if self.left_by is not None: + # remove 'on' parameter from values if one existed + if self.left_index and self.right_index: + left_by_values = self.left_join_keys + right_by_values = self.right_join_keys + else: + left_by_values = self.left_join_keys[0:-1] + right_by_values = self.right_join_keys[0:-1] + + # get tuple representation of values if more than one + if len(left_by_values) == 1: + left_by_values = left_by_values[0] + right_by_values = right_by_values[0] + else: + left_by_values = flip(left_by_values) + right_by_values = flip(right_by_values) + + # upcast 'by' parameter because HashTable is limited + by_type = _get_cython_type_upcast(left_by_values.dtype) + by_type_caster = _type_casters[by_type] + left_by_values = by_type_caster(left_by_values) + right_by_values = by_type_caster(right_by_values) + + # choose appropriate function by type + on_type = _get_cython_type(left_values.dtype) + func = _asof_by_function(self.direction, on_type, by_type) + return func(left_values, + right_values, + left_by_values, + right_by_values, + self.allow_exact_matches, + tolerance) + else: + # choose appropriate function by type + on_type = _get_cython_type(left_values.dtype) + func = _asof_function(self.direction, on_type) + return func(left_values, + right_values, + self.allow_exact_matches, + tolerance) + + +def _get_multiindex_indexer(join_keys, index, sort): + from functools import partial + + # bind `sort` argument + fkeys = partial(_factorize_keys, sort=sort) + + # left & right join labels and num. of levels at each location + rlab, llab, shape = map(list, zip(* map(fkeys, index.levels, join_keys))) + if sort: + rlab = list(map(np.take, rlab, index.labels)) + else: + i8copy = lambda a: a.astype('i8', subok=False, copy=True) + rlab = list(map(i8copy, index.labels)) + + # fix right labels if there were any nulls + for i in range(len(join_keys)): + mask = index.labels[i] == -1 + if mask.any(): + # check if there already was any nulls at this location + # if there was, it is factorized to `shape[i] - 1` + a = join_keys[i][llab[i] == shape[i] - 1] + if a.size == 0 or not a[0] != a[0]: + shape[i] += 1 + + rlab[i][mask] = shape[i] - 1 + + # get flat i8 join keys + lkey, rkey = _get_join_keys(llab, rlab, shape, sort) + + # factorize keys to a dense i8 space + lkey, rkey, count = fkeys(lkey, rkey) + + return libjoin.left_outer_join(lkey, rkey, count, sort=sort) + + +def _get_single_indexer(join_key, index, sort=False): + left_key, right_key, count = _factorize_keys(join_key, index, sort=sort) + + left_indexer, right_indexer = libjoin.left_outer_join( + _ensure_int64(left_key), + _ensure_int64(right_key), + count, sort=sort) + + return left_indexer, right_indexer + + +def _left_join_on_index(left_ax, right_ax, join_keys, sort=False): + if len(join_keys) > 1: + if not ((isinstance(right_ax, MultiIndex) and + len(join_keys) == right_ax.nlevels)): + raise AssertionError("If more than one join key is given then " + "'right_ax' must be a MultiIndex and the " + "number of join keys must be the number of " + "levels in right_ax") + + left_indexer, right_indexer = \ + _get_multiindex_indexer(join_keys, right_ax, sort=sort) + else: + jkey = join_keys[0] + + left_indexer, right_indexer = \ + _get_single_indexer(jkey, right_ax, sort=sort) + + if sort or len(left_ax) != len(left_indexer): + # if asked to sort or there are 1-to-many matches + join_index = left_ax.take(left_indexer) + return join_index, left_indexer, right_indexer + + # left frame preserves order & length of its index + return left_ax, None, right_indexer + + +def _right_outer_join(x, y, max_groups): + right_indexer, left_indexer = libjoin.left_outer_join(y, x, max_groups) + return left_indexer, right_indexer + + +_join_functions = { + 'inner': libjoin.inner_join, + 'left': libjoin.left_outer_join, + 'right': _right_outer_join, + 'outer': libjoin.full_outer_join, +} + + +def _factorize_keys(lk, rk, sort=True): + if is_datetime64tz_dtype(lk) and is_datetime64tz_dtype(rk): + lk = lk.values + rk = rk.values + + # if we exactly match in categories, allow us to use codes + if (is_categorical_dtype(lk) and + is_categorical_dtype(rk) and + lk.is_dtype_equal(rk)): + return lk.codes, rk.codes, len(lk.categories) + + if is_int_or_datetime_dtype(lk) and is_int_or_datetime_dtype(rk): + klass = libhashtable.Int64Factorizer + lk = _ensure_int64(com._values_from_object(lk)) + rk = _ensure_int64(com._values_from_object(rk)) + else: + klass = libhashtable.Factorizer + lk = _ensure_object(lk) + rk = _ensure_object(rk) + + rizer = klass(max(len(lk), len(rk))) + + llab = rizer.factorize(lk) + rlab = rizer.factorize(rk) + + count = rizer.get_count() + + if sort: + uniques = rizer.uniques.to_array() + llab, rlab = _sort_labels(uniques, llab, rlab) + + # NA group + lmask = llab == -1 + lany = lmask.any() + rmask = rlab == -1 + rany = rmask.any() + + if lany or rany: + if lany: + np.putmask(llab, lmask, count) + if rany: + np.putmask(rlab, rmask, count) + count += 1 + + return llab, rlab, count + + +def _sort_labels(uniques, left, right): + if not isinstance(uniques, np.ndarray): + # tuplesafe + uniques = Index(uniques).values + + l = len(left) + labels = np.concatenate([left, right]) + + _, new_labels = algos.safe_sort(uniques, labels, na_sentinel=-1) + new_labels = _ensure_int64(new_labels) + new_left, new_right = new_labels[:l], new_labels[l:] + + return new_left, new_right + + +def _get_join_keys(llab, rlab, shape, sort): + + # how many levels can be done without overflow + pred = lambda i: not is_int64_overflow_possible(shape[:i]) + nlev = next(filter(pred, range(len(shape), 0, -1))) + + # get keys for the first `nlev` levels + stride = np.prod(shape[1:nlev], dtype='i8') + lkey = stride * llab[0].astype('i8', subok=False, copy=False) + rkey = stride * rlab[0].astype('i8', subok=False, copy=False) + + for i in range(1, nlev): + stride //= shape[i] + lkey += llab[i] * stride + rkey += rlab[i] * stride + + if nlev == len(shape): # all done! + return lkey, rkey + + # densify current keys to avoid overflow + lkey, rkey, count = _factorize_keys(lkey, rkey, sort=sort) + + llab = [lkey] + llab[nlev:] + rlab = [rkey] + rlab[nlev:] + shape = [count] + shape[nlev:] + + return _get_join_keys(llab, rlab, shape, sort) + + +def _should_fill(lname, rname): + if (not isinstance(lname, compat.string_types) or + not isinstance(rname, compat.string_types)): + return True + return lname == rname + + +def _any(x): + return x is not None and len(x) > 0 and any([y is not None for y in x]) diff --git a/pandas/tools/pivot.py b/pandas/core/reshape/pivot.py similarity index 99% rename from pandas/tools/pivot.py rename to pandas/core/reshape/pivot.py index 11ca2e548f171..1c5250615d410 100644 --- a/pandas/tools/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -2,9 +2,10 @@ from pandas.core.dtypes.common import is_list_like, is_scalar -from pandas import Series, DataFrame, MultiIndex, Index, concat +from pandas.core.reshape.concat import concat +from pandas import Series, DataFrame, MultiIndex, Index from pandas.core.groupby import Grouper -from pandas.tools.util import cartesian_product +from pandas.core.reshape.util import cartesian_product from pandas.compat import range, lrange, zip from pandas import compat import pandas.core.common as com diff --git a/pandas/core/reshape.py b/pandas/core/reshape/reshape.py similarity index 99% rename from pandas/core/reshape.py rename to pandas/core/reshape/reshape.py index b3a06d85967f2..bfd5320af13fb 100644 --- a/pandas/core/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -1151,7 +1151,7 @@ def get_dummies(data, prefix=None, prefix_sep='_', dummy_na=False, -------- Series.str.get_dummies """ - from pandas.tools.concat import concat + from pandas.core.reshape.concat import concat from itertools import cycle if isinstance(data, DataFrame): diff --git a/pandas/tools/tile.py b/pandas/core/reshape/tile.py similarity index 100% rename from pandas/tools/tile.py rename to pandas/core/reshape/tile.py diff --git a/pandas/core/reshape/util.py b/pandas/core/reshape/util.py new file mode 100644 index 0000000000000..2fe82e5d6bc57 --- /dev/null +++ b/pandas/core/reshape/util.py @@ -0,0 +1,76 @@ +import numpy as np + +from pandas.core.dtypes.common import is_list_like + +from pandas.compat import reduce +from pandas.core.index import Index +from pandas.core import common as com + + +def match(needles, haystack): + haystack = Index(haystack) + needles = Index(needles) + return haystack.get_indexer(needles) + + +def cartesian_product(X): + """ + Numpy version of itertools.product or pandas.compat.product. + Sometimes faster (for large inputs)... + + Parameters + ---------- + X : list-like of list-likes + + Returns + ------- + product : list of ndarrays + + Examples + -------- + >>> cartesian_product([list('ABC'), [1, 2]]) + [array(['A', 'A', 'B', 'B', 'C', 'C'], dtype='|S1'), + array([1, 2, 1, 2, 1, 2])] + + See also + -------- + itertools.product : Cartesian product of input iterables. Equivalent to + nested for-loops. + pandas.compat.product : An alias for itertools.product. + """ + msg = "Input must be a list-like of list-likes" + if not is_list_like(X): + raise TypeError(msg) + for x in X: + if not is_list_like(x): + raise TypeError(msg) + + if len(X) == 0: + return [] + + lenX = np.fromiter((len(x) for x in X), dtype=np.intp) + cumprodX = np.cumproduct(lenX) + + a = np.roll(cumprodX, 1) + a[0] = 1 + + if cumprodX[-1] != 0: + b = cumprodX[-1] / cumprodX + else: + # if any factor is empty, the cartesian product is empty + b = np.zeros_like(cumprodX) + + return [np.tile(np.repeat(np.asarray(com._values_from_object(x)), b[i]), + np.product(a[i])) + for i, x in enumerate(X)] + + +def _compose2(f, g): + """Compose 2 callables""" + return lambda *args, **kwargs: f(g(*args, **kwargs)) + + +def compose(*funcs): + """Compose 2 or more callables""" + assert len(funcs) > 1, 'At least 2 callables must be passed to compose' + return reduce(_compose2, funcs) diff --git a/pandas/core/series.py b/pandas/core/series.py index 2a99481274e9e..69a2b35d88460 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1541,7 +1541,7 @@ def append(self, to_append, ignore_index=False, verify_integrity=False): """ - from pandas.tools.concat import concat + from pandas.core.reshape.concat import concat if isinstance(to_append, (list, tuple)): to_concat = [self] + to_append @@ -2019,7 +2019,7 @@ def unstack(self, level=-1, fill_value=None): ------- unstacked : DataFrame """ - from pandas.core.reshape import unstack + from pandas.core.reshape.reshape import unstack return unstack(self, level, fill_value) # ---------------------------------------------------------------------- diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index ae0814d5566a8..6fbcbe7d645e1 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -168,7 +168,7 @@ def __init__(self, series, buf=None, length=True, header=True, index=True, self._chk_truncate() def _chk_truncate(self): - from pandas.tools.concat import concat + from pandas.core.reshape.concat import concat max_rows = self.max_rows truncate_v = max_rows and (len(self.series) > max_rows) series = self.series @@ -410,7 +410,7 @@ def _chk_truncate(self): Checks whether the frame should be truncated. If so, slices the frame up. """ - from pandas.tools.concat import concat + from pandas.core.reshape.concat import concat # Column of which first element is used to determine width of a dot col self.tr_size_col = -1 diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index 374244acfe173..934c05ba5f130 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -2317,7 +2317,7 @@ def boxplot_frame_groupby(grouped, subplots=True, column=None, fontsize=None, fig.subplots_adjust(bottom=0.15, top=0.9, left=0.1, right=0.9, wspace=0.2) else: - from pandas.tools.concat import concat + from pandas.core.reshape.concat import concat keys, frames = zip(*grouped) if grouped.axis == 0: df = concat(frames, keys=keys, axis=1) diff --git a/pandas/tests/dtypes/test_cast.py b/pandas/tests/dtypes/test_cast.py index a1490426ebf9d..e59784d233367 100644 --- a/pandas/tests/dtypes/test_cast.py +++ b/pandas/tests/dtypes/test_cast.py @@ -6,10 +6,14 @@ """ import pytest +import decimal from datetime import datetime, timedelta, date import numpy as np -from pandas import Timedelta, Timestamp, DatetimeIndex +import pandas as pd +from pandas import (Timedelta, Timestamp, DatetimeIndex, + to_numeric, _np_version_under1p9) + from pandas.core.dtypes.cast import ( maybe_downcast_to_dtype, maybe_convert_objects, @@ -24,6 +28,8 @@ PeriodDtype) from pandas.util import testing as tm +from numpy import iinfo + class TestMaybeDowncast(tm.TestCase): @@ -321,3 +327,365 @@ def test_period_dtype(self): np.dtype('datetime64[ns]'), np.object, np.int64]: self.assertEqual(find_common_type([dtype, dtype2]), np.object) self.assertEqual(find_common_type([dtype2, dtype]), np.object) + + +class TestToNumeric(tm.TestCase): + + def test_series(self): + s = pd.Series(['1', '-3.14', '7']) + res = to_numeric(s) + expected = pd.Series([1, -3.14, 7]) + tm.assert_series_equal(res, expected) + + s = pd.Series(['1', '-3.14', 7]) + res = to_numeric(s) + tm.assert_series_equal(res, expected) + + def test_series_numeric(self): + s = pd.Series([1, 3, 4, 5], index=list('ABCD'), name='XXX') + res = to_numeric(s) + tm.assert_series_equal(res, s) + + s = pd.Series([1., 3., 4., 5.], index=list('ABCD'), name='XXX') + res = to_numeric(s) + tm.assert_series_equal(res, s) + + # bool is regarded as numeric + s = pd.Series([True, False, True, True], + index=list('ABCD'), name='XXX') + res = to_numeric(s) + tm.assert_series_equal(res, s) + + def test_error(self): + s = pd.Series([1, -3.14, 'apple']) + msg = 'Unable to parse string "apple" at position 2' + with tm.assertRaisesRegexp(ValueError, msg): + to_numeric(s, errors='raise') + + res = to_numeric(s, errors='ignore') + expected = pd.Series([1, -3.14, 'apple']) + tm.assert_series_equal(res, expected) + + res = to_numeric(s, errors='coerce') + expected = pd.Series([1, -3.14, np.nan]) + tm.assert_series_equal(res, expected) + + s = pd.Series(['orange', 1, -3.14, 'apple']) + msg = 'Unable to parse string "orange" at position 0' + with tm.assertRaisesRegexp(ValueError, msg): + to_numeric(s, errors='raise') + + def test_error_seen_bool(self): + s = pd.Series([True, False, 'apple']) + msg = 'Unable to parse string "apple" at position 2' + with tm.assertRaisesRegexp(ValueError, msg): + to_numeric(s, errors='raise') + + res = to_numeric(s, errors='ignore') + expected = pd.Series([True, False, 'apple']) + tm.assert_series_equal(res, expected) + + # coerces to float + res = to_numeric(s, errors='coerce') + expected = pd.Series([1., 0., np.nan]) + tm.assert_series_equal(res, expected) + + def test_list(self): + s = ['1', '-3.14', '7'] + res = to_numeric(s) + expected = np.array([1, -3.14, 7]) + tm.assert_numpy_array_equal(res, expected) + + def test_list_numeric(self): + s = [1, 3, 4, 5] + res = to_numeric(s) + tm.assert_numpy_array_equal(res, np.array(s, dtype=np.int64)) + + s = [1., 3., 4., 5.] + res = to_numeric(s) + tm.assert_numpy_array_equal(res, np.array(s)) + + # bool is regarded as numeric + s = [True, False, True, True] + res = to_numeric(s) + tm.assert_numpy_array_equal(res, np.array(s)) + + def test_numeric(self): + s = pd.Series([1, -3.14, 7], dtype='O') + res = to_numeric(s) + expected = pd.Series([1, -3.14, 7]) + tm.assert_series_equal(res, expected) + + s = pd.Series([1, -3.14, 7]) + res = to_numeric(s) + tm.assert_series_equal(res, expected) + + # GH 14827 + df = pd.DataFrame(dict( + a=[1.2, decimal.Decimal(3.14), decimal.Decimal("infinity"), '0.1'], + b=[1.0, 2.0, 3.0, 4.0], + )) + expected = pd.DataFrame(dict( + a=[1.2, 3.14, np.inf, 0.1], + b=[1.0, 2.0, 3.0, 4.0], + )) + + # Test to_numeric over one column + df_copy = df.copy() + df_copy['a'] = df_copy['a'].apply(to_numeric) + tm.assert_frame_equal(df_copy, expected) + + # Test to_numeric over multiple columns + df_copy = df.copy() + df_copy[['a', 'b']] = df_copy[['a', 'b']].apply(to_numeric) + tm.assert_frame_equal(df_copy, expected) + + def test_numeric_lists_and_arrays(self): + # Test to_numeric with embedded lists and arrays + df = pd.DataFrame(dict( + a=[[decimal.Decimal(3.14), 1.0], decimal.Decimal(1.6), 0.1] + )) + df['a'] = df['a'].apply(to_numeric) + expected = pd.DataFrame(dict( + a=[[3.14, 1.0], 1.6, 0.1], + )) + tm.assert_frame_equal(df, expected) + + df = pd.DataFrame(dict( + a=[np.array([decimal.Decimal(3.14), 1.0]), 0.1] + )) + df['a'] = df['a'].apply(to_numeric) + expected = pd.DataFrame(dict( + a=[[3.14, 1.0], 0.1], + )) + tm.assert_frame_equal(df, expected) + + def test_all_nan(self): + s = pd.Series(['a', 'b', 'c']) + res = to_numeric(s, errors='coerce') + expected = pd.Series([np.nan, np.nan, np.nan]) + tm.assert_series_equal(res, expected) + + def test_type_check(self): + # GH 11776 + df = pd.DataFrame({'a': [1, -3.14, 7], 'b': ['4', '5', '6']}) + with tm.assertRaisesRegexp(TypeError, "1-d array"): + to_numeric(df) + for errors in ['ignore', 'raise', 'coerce']: + with tm.assertRaisesRegexp(TypeError, "1-d array"): + to_numeric(df, errors=errors) + + def test_scalar(self): + self.assertEqual(pd.to_numeric(1), 1) + self.assertEqual(pd.to_numeric(1.1), 1.1) + + self.assertEqual(pd.to_numeric('1'), 1) + self.assertEqual(pd.to_numeric('1.1'), 1.1) + + with tm.assertRaises(ValueError): + to_numeric('XX', errors='raise') + + self.assertEqual(to_numeric('XX', errors='ignore'), 'XX') + self.assertTrue(np.isnan(to_numeric('XX', errors='coerce'))) + + def test_numeric_dtypes(self): + idx = pd.Index([1, 2, 3], name='xxx') + res = pd.to_numeric(idx) + tm.assert_index_equal(res, idx) + + res = pd.to_numeric(pd.Series(idx, name='xxx')) + tm.assert_series_equal(res, pd.Series(idx, name='xxx')) + + res = pd.to_numeric(idx.values) + tm.assert_numpy_array_equal(res, idx.values) + + idx = pd.Index([1., np.nan, 3., np.nan], name='xxx') + res = pd.to_numeric(idx) + tm.assert_index_equal(res, idx) + + res = pd.to_numeric(pd.Series(idx, name='xxx')) + tm.assert_series_equal(res, pd.Series(idx, name='xxx')) + + res = pd.to_numeric(idx.values) + tm.assert_numpy_array_equal(res, idx.values) + + def test_str(self): + idx = pd.Index(['1', '2', '3'], name='xxx') + exp = np.array([1, 2, 3], dtype='int64') + res = pd.to_numeric(idx) + tm.assert_index_equal(res, pd.Index(exp, name='xxx')) + + res = pd.to_numeric(pd.Series(idx, name='xxx')) + tm.assert_series_equal(res, pd.Series(exp, name='xxx')) + + res = pd.to_numeric(idx.values) + tm.assert_numpy_array_equal(res, exp) + + idx = pd.Index(['1.5', '2.7', '3.4'], name='xxx') + exp = np.array([1.5, 2.7, 3.4]) + res = pd.to_numeric(idx) + tm.assert_index_equal(res, pd.Index(exp, name='xxx')) + + res = pd.to_numeric(pd.Series(idx, name='xxx')) + tm.assert_series_equal(res, pd.Series(exp, name='xxx')) + + res = pd.to_numeric(idx.values) + tm.assert_numpy_array_equal(res, exp) + + def test_datetimelike(self): + for tz in [None, 'US/Eastern', 'Asia/Tokyo']: + idx = pd.date_range('20130101', periods=3, tz=tz, name='xxx') + res = pd.to_numeric(idx) + tm.assert_index_equal(res, pd.Index(idx.asi8, name='xxx')) + + res = pd.to_numeric(pd.Series(idx, name='xxx')) + tm.assert_series_equal(res, pd.Series(idx.asi8, name='xxx')) + + res = pd.to_numeric(idx.values) + tm.assert_numpy_array_equal(res, idx.asi8) + + def test_timedelta(self): + idx = pd.timedelta_range('1 days', periods=3, freq='D', name='xxx') + res = pd.to_numeric(idx) + tm.assert_index_equal(res, pd.Index(idx.asi8, name='xxx')) + + res = pd.to_numeric(pd.Series(idx, name='xxx')) + tm.assert_series_equal(res, pd.Series(idx.asi8, name='xxx')) + + res = pd.to_numeric(idx.values) + tm.assert_numpy_array_equal(res, idx.asi8) + + def test_period(self): + idx = pd.period_range('2011-01', periods=3, freq='M', name='xxx') + res = pd.to_numeric(idx) + tm.assert_index_equal(res, pd.Index(idx.asi8, name='xxx')) + + # ToDo: enable when we can support native PeriodDtype + # res = pd.to_numeric(pd.Series(idx, name='xxx')) + # tm.assert_series_equal(res, pd.Series(idx.asi8, name='xxx')) + + def test_non_hashable(self): + # Test for Bug #13324 + s = pd.Series([[10.0, 2], 1.0, 'apple']) + res = pd.to_numeric(s, errors='coerce') + tm.assert_series_equal(res, pd.Series([np.nan, 1.0, np.nan])) + + res = pd.to_numeric(s, errors='ignore') + tm.assert_series_equal(res, pd.Series([[10.0, 2], 1.0, 'apple'])) + + with self.assertRaisesRegexp(TypeError, "Invalid object type"): + pd.to_numeric(s) + + def test_downcast(self): + # see gh-13352 + mixed_data = ['1', 2, 3] + int_data = [1, 2, 3] + date_data = np.array(['1970-01-02', '1970-01-03', + '1970-01-04'], dtype='datetime64[D]') + + invalid_downcast = 'unsigned-integer' + msg = 'invalid downcasting method provided' + + smallest_int_dtype = np.dtype(np.typecodes['Integer'][0]) + smallest_uint_dtype = np.dtype(np.typecodes['UnsignedInteger'][0]) + + # support below np.float32 is rare and far between + float_32_char = np.dtype(np.float32).char + smallest_float_dtype = float_32_char + + for data in (mixed_data, int_data, date_data): + with self.assertRaisesRegexp(ValueError, msg): + pd.to_numeric(data, downcast=invalid_downcast) + + expected = np.array([1, 2, 3], dtype=np.int64) + + res = pd.to_numeric(data) + tm.assert_numpy_array_equal(res, expected) + + res = pd.to_numeric(data, downcast=None) + tm.assert_numpy_array_equal(res, expected) + + expected = np.array([1, 2, 3], dtype=smallest_int_dtype) + + for signed_downcast in ('integer', 'signed'): + res = pd.to_numeric(data, downcast=signed_downcast) + tm.assert_numpy_array_equal(res, expected) + + expected = np.array([1, 2, 3], dtype=smallest_uint_dtype) + res = pd.to_numeric(data, downcast='unsigned') + tm.assert_numpy_array_equal(res, expected) + + expected = np.array([1, 2, 3], dtype=smallest_float_dtype) + res = pd.to_numeric(data, downcast='float') + tm.assert_numpy_array_equal(res, expected) + + # if we can't successfully cast the given + # data to a numeric dtype, do not bother + # with the downcast parameter + data = ['foo', 2, 3] + expected = np.array(data, dtype=object) + res = pd.to_numeric(data, errors='ignore', + downcast='unsigned') + tm.assert_numpy_array_equal(res, expected) + + # cannot cast to an unsigned integer because + # we have a negative number + data = ['-1', 2, 3] + expected = np.array([-1, 2, 3], dtype=np.int64) + res = pd.to_numeric(data, downcast='unsigned') + tm.assert_numpy_array_equal(res, expected) + + # cannot cast to an integer (signed or unsigned) + # because we have a float number + data = (['1.1', 2, 3], + [10000.0, 20000, 3000, 40000.36, 50000, 50000.00]) + expected = (np.array([1.1, 2, 3], dtype=np.float64), + np.array([10000.0, 20000, 3000, + 40000.36, 50000, 50000.00], dtype=np.float64)) + + for _data, _expected in zip(data, expected): + for downcast in ('integer', 'signed', 'unsigned'): + res = pd.to_numeric(_data, downcast=downcast) + tm.assert_numpy_array_equal(res, _expected) + + # the smallest integer dtype need not be np.(u)int8 + data = ['256', 257, 258] + + for downcast, expected_dtype in zip( + ['integer', 'signed', 'unsigned'], + [np.int16, np.int16, np.uint16]): + expected = np.array([256, 257, 258], dtype=expected_dtype) + res = pd.to_numeric(data, downcast=downcast) + tm.assert_numpy_array_equal(res, expected) + + def test_downcast_limits(self): + # Test the limits of each downcast. Bug: #14401. + # Check to make sure numpy is new enough to run this test. + if _np_version_under1p9: + pytest.skip("Numpy version is under 1.9") + + i = 'integer' + u = 'unsigned' + dtype_downcast_min_max = [ + ('int8', i, [iinfo(np.int8).min, iinfo(np.int8).max]), + ('int16', i, [iinfo(np.int16).min, iinfo(np.int16).max]), + ('int32', i, [iinfo(np.int32).min, iinfo(np.int32).max]), + ('int64', i, [iinfo(np.int64).min, iinfo(np.int64).max]), + ('uint8', u, [iinfo(np.uint8).min, iinfo(np.uint8).max]), + ('uint16', u, [iinfo(np.uint16).min, iinfo(np.uint16).max]), + ('uint32', u, [iinfo(np.uint32).min, iinfo(np.uint32).max]), + ('uint64', u, [iinfo(np.uint64).min, iinfo(np.uint64).max]), + ('int16', i, [iinfo(np.int8).min, iinfo(np.int8).max + 1]), + ('int32', i, [iinfo(np.int16).min, iinfo(np.int16).max + 1]), + ('int64', i, [iinfo(np.int32).min, iinfo(np.int32).max + 1]), + ('int16', i, [iinfo(np.int8).min - 1, iinfo(np.int16).max]), + ('int32', i, [iinfo(np.int16).min - 1, iinfo(np.int32).max]), + ('int64', i, [iinfo(np.int32).min - 1, iinfo(np.int64).max]), + ('uint16', u, [iinfo(np.uint8).min, iinfo(np.uint8).max + 1]), + ('uint32', u, [iinfo(np.uint16).min, iinfo(np.uint16).max + 1]), + ('uint64', u, [iinfo(np.uint32).min, iinfo(np.uint32).max + 1]) + ] + + for dtype, downcast, min_max in dtype_downcast_min_max: + series = pd.to_numeric(pd.Series(min_max), downcast=downcast) + assert series.dtype == dtype diff --git a/pandas/tests/dtypes/test_convert.py b/pandas/tests/dtypes/test_convert.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/tests/reshape/__init__.py b/pandas/tests/reshape/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/tests/tools/data/allow_exact_matches.csv b/pandas/tests/reshape/data/allow_exact_matches.csv similarity index 100% rename from pandas/tests/tools/data/allow_exact_matches.csv rename to pandas/tests/reshape/data/allow_exact_matches.csv diff --git a/pandas/tests/tools/data/allow_exact_matches_and_tolerance.csv b/pandas/tests/reshape/data/allow_exact_matches_and_tolerance.csv similarity index 100% rename from pandas/tests/tools/data/allow_exact_matches_and_tolerance.csv rename to pandas/tests/reshape/data/allow_exact_matches_and_tolerance.csv diff --git a/pandas/tests/tools/data/asof.csv b/pandas/tests/reshape/data/asof.csv similarity index 100% rename from pandas/tests/tools/data/asof.csv rename to pandas/tests/reshape/data/asof.csv diff --git a/pandas/tests/tools/data/asof2.csv b/pandas/tests/reshape/data/asof2.csv similarity index 100% rename from pandas/tests/tools/data/asof2.csv rename to pandas/tests/reshape/data/asof2.csv diff --git a/pandas/tests/tools/data/cut_data.csv b/pandas/tests/reshape/data/cut_data.csv similarity index 100% rename from pandas/tests/tools/data/cut_data.csv rename to pandas/tests/reshape/data/cut_data.csv diff --git a/pandas/tests/tools/data/quotes.csv b/pandas/tests/reshape/data/quotes.csv similarity index 100% rename from pandas/tests/tools/data/quotes.csv rename to pandas/tests/reshape/data/quotes.csv diff --git a/pandas/tests/tools/data/quotes2.csv b/pandas/tests/reshape/data/quotes2.csv similarity index 100% rename from pandas/tests/tools/data/quotes2.csv rename to pandas/tests/reshape/data/quotes2.csv diff --git a/pandas/tests/tools/data/tolerance.csv b/pandas/tests/reshape/data/tolerance.csv similarity index 100% rename from pandas/tests/tools/data/tolerance.csv rename to pandas/tests/reshape/data/tolerance.csv diff --git a/pandas/tests/tools/data/trades.csv b/pandas/tests/reshape/data/trades.csv similarity index 100% rename from pandas/tests/tools/data/trades.csv rename to pandas/tests/reshape/data/trades.csv diff --git a/pandas/tests/tools/data/trades2.csv b/pandas/tests/reshape/data/trades2.csv similarity index 100% rename from pandas/tests/tools/data/trades2.csv rename to pandas/tests/reshape/data/trades2.csv diff --git a/pandas/tests/tools/test_concat.py b/pandas/tests/reshape/test_concat.py similarity index 100% rename from pandas/tests/tools/test_concat.py rename to pandas/tests/reshape/test_concat.py diff --git a/pandas/tests/tools/test_hashing.py b/pandas/tests/reshape/test_hashing.py similarity index 100% rename from pandas/tests/tools/test_hashing.py rename to pandas/tests/reshape/test_hashing.py diff --git a/pandas/tests/tools/test_join.py b/pandas/tests/reshape/test_join.py similarity index 99% rename from pandas/tests/tools/test_join.py rename to pandas/tests/reshape/test_join.py index 8571a1ff16701..51e5beadee8a7 100644 --- a/pandas/tests/tools/test_join.py +++ b/pandas/tests/reshape/test_join.py @@ -12,7 +12,7 @@ from pandas._libs import join as libjoin import pandas.util.testing as tm -from pandas.tests.tools.test_merge import get_test_data, N, NGROUPS +from pandas.tests.reshape.test_merge import get_test_data, N, NGROUPS a_ = np.array diff --git a/pandas/tests/tools/test_merge.py b/pandas/tests/reshape/test_merge.py similarity index 99% rename from pandas/tests/tools/test_merge.py rename to pandas/tests/reshape/test_merge.py index cc4a97df33801..67a8c5084eef6 100644 --- a/pandas/tests/tools/test_merge.py +++ b/pandas/tests/reshape/test_merge.py @@ -9,8 +9,8 @@ import pandas as pd from pandas.compat import lrange, lzip -from pandas.tools.concat import concat -from pandas.tools.merge import merge, MergeError +from pandas.core.reshape.concat import concat +from pandas.core.reshape.merge import merge, MergeError from pandas.util.testing import assert_frame_equal, assert_series_equal from pandas.core.dtypes.dtypes import CategoricalDtype from pandas.core.dtypes.common import is_categorical_dtype, is_object_dtype diff --git a/pandas/tests/tools/test_merge_asof.py b/pandas/tests/reshape/test_merge_asof.py similarity index 99% rename from pandas/tests/tools/test_merge_asof.py rename to pandas/tests/reshape/test_merge_asof.py index c9460cc74c94a..865c413bad11e 100644 --- a/pandas/tests/tools/test_merge_asof.py +++ b/pandas/tests/reshape/test_merge_asof.py @@ -5,7 +5,7 @@ import pandas as pd from pandas import (merge_asof, read_csv, to_datetime, Timedelta) -from pandas.tools.merge import MergeError +from pandas.core.reshape.merge import MergeError from pandas.util import testing as tm from pandas.util.testing import assert_frame_equal diff --git a/pandas/tests/tools/test_merge_ordered.py b/pandas/tests/reshape/test_merge_ordered.py similarity index 100% rename from pandas/tests/tools/test_merge_ordered.py rename to pandas/tests/reshape/test_merge_ordered.py diff --git a/pandas/tests/tools/test_pivot.py b/pandas/tests/reshape/test_pivot.py similarity index 99% rename from pandas/tests/tools/test_pivot.py rename to pandas/tests/reshape/test_pivot.py index c8dfaf5e29bc6..88d25b9d053c3 100644 --- a/pandas/tests/tools/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -6,7 +6,7 @@ import pandas as pd from pandas import (DataFrame, Series, Index, MultiIndex, Grouper, date_range, concat) -from pandas.tools.pivot import pivot_table, crosstab +from pandas.core.reshape.pivot import pivot_table, crosstab from pandas.compat import range, product import pandas.util.testing as tm from pandas.tseries.util import pivot_annual, isleapyear diff --git a/pandas/tests/test_reshape.py b/pandas/tests/reshape/test_reshape.py similarity index 99% rename from pandas/tests/test_reshape.py rename to pandas/tests/reshape/test_reshape.py index ee255c1863b41..0eb1e5ff3cf11 100644 --- a/pandas/tests/test_reshape.py +++ b/pandas/tests/reshape/test_reshape.py @@ -9,7 +9,8 @@ from pandas.util.testing import assert_frame_equal -from pandas.core.reshape import (melt, lreshape, get_dummies, wide_to_long) +from pandas.core.reshape.reshape import ( + melt, lreshape, get_dummies, wide_to_long) import pandas.util.testing as tm from pandas.compat import range, u @@ -662,7 +663,7 @@ def test_preserve_categorical_dtype(self): expected = DataFrame([[1.0, 0.0, 0.0], [0.0, 1.0, 0.0]], index=midx, columns=cidx) - from pandas.core.reshape import make_axis_dummies + from pandas.core.reshape.reshape import make_axis_dummies result = make_axis_dummies(df) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/tools/test_tile.py b/pandas/tests/reshape/test_tile.py similarity index 99% rename from pandas/tests/tools/test_tile.py rename to pandas/tests/reshape/test_tile.py index 742568870c3c3..1cdd87dc67bd8 100644 --- a/pandas/tests/tools/test_tile.py +++ b/pandas/tests/reshape/test_tile.py @@ -10,7 +10,7 @@ import pandas.util.testing as tm from pandas.core.algorithms import quantile -import pandas.tools.tile as tmod +import pandas.core.reshape.tile as tmod class TestCut(tm.TestCase): diff --git a/pandas/tests/tools/test_union_categoricals.py b/pandas/tests/reshape/test_union_categoricals.py similarity index 100% rename from pandas/tests/tools/test_union_categoricals.py rename to pandas/tests/reshape/test_union_categoricals.py diff --git a/pandas/tests/reshape/test_util.py b/pandas/tests/reshape/test_util.py new file mode 100644 index 0000000000000..fd3a683e80397 --- /dev/null +++ b/pandas/tests/reshape/test_util.py @@ -0,0 +1,49 @@ + +import numpy as np +from pandas import date_range, Index +import pandas.util.testing as tm +from pandas.core.reshape.util import cartesian_product + + +class TestCartesianProduct(tm.TestCase): + + def test_simple(self): + x, y = list('ABC'), [1, 22] + result1, result2 = cartesian_product([x, y]) + expected1 = np.array(['A', 'A', 'B', 'B', 'C', 'C']) + expected2 = np.array([1, 22, 1, 22, 1, 22]) + tm.assert_numpy_array_equal(result1, expected1) + tm.assert_numpy_array_equal(result2, expected2) + + def test_datetimeindex(self): + # regression test for GitHub issue #6439 + # make sure that the ordering on datetimeindex is consistent + x = date_range('2000-01-01', periods=2) + result1, result2 = [Index(y).day for y in cartesian_product([x, x])] + expected1 = Index([1, 1, 2, 2]) + expected2 = Index([1, 2, 1, 2]) + tm.assert_index_equal(result1, expected1) + tm.assert_index_equal(result2, expected2) + + def test_empty(self): + # product of empty factors + X = [[], [0, 1], []] + Y = [[], [], ['a', 'b', 'c']] + for x, y in zip(X, Y): + expected1 = np.array([], dtype=np.asarray(x).dtype) + expected2 = np.array([], dtype=np.asarray(y).dtype) + result1, result2 = cartesian_product([x, y]) + tm.assert_numpy_array_equal(result1, expected1) + tm.assert_numpy_array_equal(result2, expected2) + + # empty product (empty input): + result = cartesian_product([]) + expected = [] + assert result == expected + + def test_invalid_input(self): + invalid_inputs = [1, [1], [1, 2], [[1], 2], + 'a', ['a'], ['a', 'b'], [['a'], 'b']] + msg = "Input must be a list-like of list-likes" + for X in invalid_inputs: + tm.assertRaisesRegexp(TypeError, msg, cartesian_product, X=X) diff --git a/pandas/tests/sparse/test_series.py b/pandas/tests/sparse/test_series.py index f5a27a8161909..b8e74073e9eb9 100644 --- a/pandas/tests/sparse/test_series.py +++ b/pandas/tests/sparse/test_series.py @@ -12,7 +12,7 @@ import pandas.util.testing as tm from pandas.compat import range from pandas import compat -from pandas.tools.util import cartesian_product +from pandas.core.reshape.util import cartesian_product import pandas.core.sparse.frame as spf diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index cd1ec915d3aeb..6f4c145d74cd1 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -583,7 +583,7 @@ class TestValueCounts(tm.TestCase): def test_value_counts(self): np.random.seed(1234) - from pandas.tools.tile import cut + from pandas.core.reshape.tile import cut arr = np.random.randn(4) factor = cut(arr, 4) diff --git a/pandas/tests/test_generic.py b/pandas/tests/test_generic.py deleted file mode 100644 index d740d8bd26581..0000000000000 --- a/pandas/tests/test_generic.py +++ /dev/null @@ -1,2076 +0,0 @@ -# -*- coding: utf-8 -*- -# pylint: disable-msg=E1101,W0612 - -from operator import methodcaller -from copy import copy, deepcopy -from warnings import catch_warnings - -import pytest -import numpy as np -from numpy import nan -import pandas as pd - -from distutils.version import LooseVersion -from pandas.core.dtypes.common import is_scalar -from pandas import (Index, Series, DataFrame, Panel, isnull, - date_range, period_range, Panel4D) -from pandas.core.index import MultiIndex - -import pandas.io.formats.printing as printing - -from pandas.compat import range, zip, PY3 -from pandas import compat -from pandas.util.testing import (assertRaisesRegexp, - assert_series_equal, - assert_frame_equal, - assert_panel_equal, - assert_panel4d_equal, - assert_almost_equal) - -import pandas.util.testing as tm - - -# ---------------------------------------------------------------------- -# Generic types test cases - - -class Generic(object): - - def setUp(self): - pass - - @property - def _ndim(self): - return self._typ._AXIS_LEN - - def _axes(self): - """ return the axes for my object typ """ - return self._typ._AXIS_ORDERS - - def _construct(self, shape, value=None, dtype=None, **kwargs): - """ construct an object for the given shape - if value is specified use that if its a scalar - if value is an array, repeat it as needed """ - - if isinstance(shape, int): - shape = tuple([shape] * self._ndim) - if value is not None: - if is_scalar(value): - if value == 'empty': - arr = None - - # remove the info axis - kwargs.pop(self._typ._info_axis_name, None) - else: - arr = np.empty(shape, dtype=dtype) - arr.fill(value) - else: - fshape = np.prod(shape) - arr = value.ravel() - new_shape = fshape / arr.shape[0] - if fshape % arr.shape[0] != 0: - raise Exception("invalid value passed in _construct") - - arr = np.repeat(arr, new_shape).reshape(shape) - else: - arr = np.random.randn(*shape) - return self._typ(arr, dtype=dtype, **kwargs) - - def _compare(self, result, expected): - self._comparator(result, expected) - - def test_rename(self): - - # single axis - idx = list('ABCD') - # relabeling values passed into self.rename - args = [ - str.lower, - {x: x.lower() for x in idx}, - Series({x: x.lower() for x in idx}), - ] - - for axis in self._axes(): - kwargs = {axis: idx} - obj = self._construct(4, **kwargs) - - for arg in args: - # rename a single axis - result = obj.rename(**{axis: arg}) - expected = obj.copy() - setattr(expected, axis, list('abcd')) - self._compare(result, expected) - - # multiple axes at once - - def test_rename_axis(self): - idx = list('ABCD') - # relabeling values passed into self.rename - args = [ - str.lower, - {x: x.lower() for x in idx}, - Series({x: x.lower() for x in idx}), - ] - - for axis in self._axes(): - kwargs = {axis: idx} - obj = self._construct(4, **kwargs) - - for arg in args: - # rename a single axis - result = obj.rename_axis(arg, axis=axis) - expected = obj.copy() - setattr(expected, axis, list('abcd')) - self._compare(result, expected) - # scalar values - for arg in ['foo', None]: - result = obj.rename_axis(arg, axis=axis) - expected = obj.copy() - getattr(expected, axis).name = arg - self._compare(result, expected) - - def test_get_numeric_data(self): - - n = 4 - kwargs = {} - for i in range(self._ndim): - kwargs[self._typ._AXIS_NAMES[i]] = list(range(n)) - - # get the numeric data - o = self._construct(n, **kwargs) - result = o._get_numeric_data() - self._compare(result, o) - - # non-inclusion - result = o._get_bool_data() - expected = self._construct(n, value='empty', **kwargs) - self._compare(result, expected) - - # get the bool data - arr = np.array([True, True, False, True]) - o = self._construct(n, value=arr, **kwargs) - result = o._get_numeric_data() - self._compare(result, o) - - # _get_numeric_data is includes _get_bool_data, so can't test for - # non-inclusion - - def test_get_default(self): - - # GH 7725 - d0 = "a", "b", "c", "d" - d1 = np.arange(4, dtype='int64') - others = "e", 10 - - for data, index in ((d0, d1), (d1, d0)): - s = Series(data, index=index) - for i, d in zip(index, data): - self.assertEqual(s.get(i), d) - self.assertEqual(s.get(i, d), d) - self.assertEqual(s.get(i, "z"), d) - for other in others: - self.assertEqual(s.get(other, "z"), "z") - self.assertEqual(s.get(other, other), other) - - def test_nonzero(self): - - # GH 4633 - # look at the boolean/nonzero behavior for objects - obj = self._construct(shape=4) - self.assertRaises(ValueError, lambda: bool(obj == 0)) - self.assertRaises(ValueError, lambda: bool(obj == 1)) - self.assertRaises(ValueError, lambda: bool(obj)) - - obj = self._construct(shape=4, value=1) - self.assertRaises(ValueError, lambda: bool(obj == 0)) - self.assertRaises(ValueError, lambda: bool(obj == 1)) - self.assertRaises(ValueError, lambda: bool(obj)) - - obj = self._construct(shape=4, value=np.nan) - self.assertRaises(ValueError, lambda: bool(obj == 0)) - self.assertRaises(ValueError, lambda: bool(obj == 1)) - self.assertRaises(ValueError, lambda: bool(obj)) - - # empty - obj = self._construct(shape=0) - self.assertRaises(ValueError, lambda: bool(obj)) - - # invalid behaviors - - obj1 = self._construct(shape=4, value=1) - obj2 = self._construct(shape=4, value=1) - - def f(): - if obj1: - printing.pprint_thing("this works and shouldn't") - - self.assertRaises(ValueError, f) - self.assertRaises(ValueError, lambda: obj1 and obj2) - self.assertRaises(ValueError, lambda: obj1 or obj2) - self.assertRaises(ValueError, lambda: not obj1) - - def test_numpy_1_7_compat_numeric_methods(self): - # GH 4435 - # numpy in 1.7 tries to pass addtional arguments to pandas functions - - o = self._construct(shape=4) - for op in ['min', 'max', 'max', 'var', 'std', 'prod', 'sum', 'cumsum', - 'cumprod', 'median', 'skew', 'kurt', 'compound', 'cummax', - 'cummin', 'all', 'any']: - f = getattr(np, op, None) - if f is not None: - f(o) - - def test_downcast(self): - # test close downcasting - - o = self._construct(shape=4, value=9, dtype=np.int64) - result = o.copy() - result._data = o._data.downcast(dtypes='infer') - self._compare(result, o) - - o = self._construct(shape=4, value=9.) - expected = o.astype(np.int64) - result = o.copy() - result._data = o._data.downcast(dtypes='infer') - self._compare(result, expected) - - o = self._construct(shape=4, value=9.5) - result = o.copy() - result._data = o._data.downcast(dtypes='infer') - self._compare(result, o) - - # are close - o = self._construct(shape=4, value=9.000000000005) - result = o.copy() - result._data = o._data.downcast(dtypes='infer') - expected = o.astype(np.int64) - self._compare(result, expected) - - def test_constructor_compound_dtypes(self): - # GH 5191 - # compound dtypes should raise not-implementederror - - def f(dtype): - return self._construct(shape=3, dtype=dtype) - - self.assertRaises(NotImplementedError, f, [("A", "datetime64[h]"), - ("B", "str"), - ("C", "int32")]) - - # these work (though results may be unexpected) - f('int64') - f('float64') - f('M8[ns]') - - def check_metadata(self, x, y=None): - for m in x._metadata: - v = getattr(x, m, None) - if y is None: - self.assertIsNone(v) - else: - self.assertEqual(v, getattr(y, m, None)) - - def test_metadata_propagation(self): - # check that the metadata matches up on the resulting ops - - o = self._construct(shape=3) - o.name = 'foo' - o2 = self._construct(shape=3) - o2.name = 'bar' - - # TODO - # Once panel can do non-trivial combine operations - # (currently there is an a raise in the Panel arith_ops to prevent - # this, though it actually does work) - # can remove all of these try: except: blocks on the actual operations - - # ---------- - # preserving - # ---------- - - # simple ops with scalars - for op in ['__add__', '__sub__', '__truediv__', '__mul__']: - result = getattr(o, op)(1) - self.check_metadata(o, result) - - # ops with like - for op in ['__add__', '__sub__', '__truediv__', '__mul__']: - try: - result = getattr(o, op)(o) - self.check_metadata(o, result) - except (ValueError, AttributeError): - pass - - # simple boolean - for op in ['__eq__', '__le__', '__ge__']: - v1 = getattr(o, op)(o) - self.check_metadata(o, v1) - - try: - self.check_metadata(o, v1 & v1) - except (ValueError): - pass - - try: - self.check_metadata(o, v1 | v1) - except (ValueError): - pass - - # combine_first - try: - result = o.combine_first(o2) - self.check_metadata(o, result) - except (AttributeError): - pass - - # --------------------------- - # non-preserving (by default) - # --------------------------- - - # add non-like - try: - result = o + o2 - self.check_metadata(result) - except (ValueError, AttributeError): - pass - - # simple boolean - for op in ['__eq__', '__le__', '__ge__']: - - # this is a name matching op - v1 = getattr(o, op)(o) - - v2 = getattr(o, op)(o2) - self.check_metadata(v2) - - try: - self.check_metadata(v1 & v2) - except (ValueError): - pass - - try: - self.check_metadata(v1 | v2) - except (ValueError): - pass - - def test_head_tail(self): - # GH5370 - - o = self._construct(shape=10) - - # check all index types - for index in [tm.makeFloatIndex, tm.makeIntIndex, tm.makeStringIndex, - tm.makeUnicodeIndex, tm.makeDateIndex, - tm.makePeriodIndex]: - axis = o._get_axis_name(0) - setattr(o, axis, index(len(getattr(o, axis)))) - - # Panel + dims - try: - o.head() - except (NotImplementedError): - pytest.skip('not implemented on {0}'.format( - o.__class__.__name__)) - - self._compare(o.head(), o.iloc[:5]) - self._compare(o.tail(), o.iloc[-5:]) - - # 0-len - self._compare(o.head(0), o.iloc[0:0]) - self._compare(o.tail(0), o.iloc[0:0]) - - # bounded - self._compare(o.head(len(o) + 1), o) - self._compare(o.tail(len(o) + 1), o) - - # neg index - self._compare(o.head(-3), o.head(7)) - self._compare(o.tail(-3), o.tail(7)) - - def test_sample(self): - # Fixes issue: 2419 - - o = self._construct(shape=10) - - ### - # Check behavior of random_state argument - ### - - # Check for stability when receives seed or random state -- run 10 - # times. - for test in range(10): - seed = np.random.randint(0, 100) - self._compare( - o.sample(n=4, random_state=seed), o.sample(n=4, - random_state=seed)) - self._compare( - o.sample(frac=0.7, random_state=seed), o.sample( - frac=0.7, random_state=seed)) - - self._compare( - o.sample(n=4, random_state=np.random.RandomState(test)), - o.sample(n=4, random_state=np.random.RandomState(test))) - - self._compare( - o.sample(frac=0.7, random_state=np.random.RandomState(test)), - o.sample(frac=0.7, random_state=np.random.RandomState(test))) - - os1, os2 = [], [] - for _ in range(2): - np.random.seed(test) - os1.append(o.sample(n=4)) - os2.append(o.sample(frac=0.7)) - self._compare(*os1) - self._compare(*os2) - - # Check for error when random_state argument invalid. - with tm.assertRaises(ValueError): - o.sample(random_state='astring!') - - ### - # Check behavior of `frac` and `N` - ### - - # Giving both frac and N throws error - with tm.assertRaises(ValueError): - o.sample(n=3, frac=0.3) - - # Check that raises right error for negative lengths - with tm.assertRaises(ValueError): - o.sample(n=-3) - with tm.assertRaises(ValueError): - o.sample(frac=-0.3) - - # Make sure float values of `n` give error - with tm.assertRaises(ValueError): - o.sample(n=3.2) - - # Check lengths are right - self.assertTrue(len(o.sample(n=4) == 4)) - self.assertTrue(len(o.sample(frac=0.34) == 3)) - self.assertTrue(len(o.sample(frac=0.36) == 4)) - - ### - # Check weights - ### - - # Weight length must be right - with tm.assertRaises(ValueError): - o.sample(n=3, weights=[0, 1]) - - with tm.assertRaises(ValueError): - bad_weights = [0.5] * 11 - o.sample(n=3, weights=bad_weights) - - with tm.assertRaises(ValueError): - bad_weight_series = Series([0, 0, 0.2]) - o.sample(n=4, weights=bad_weight_series) - - # Check won't accept negative weights - with tm.assertRaises(ValueError): - bad_weights = [-0.1] * 10 - o.sample(n=3, weights=bad_weights) - - # Check inf and -inf throw errors: - with tm.assertRaises(ValueError): - weights_with_inf = [0.1] * 10 - weights_with_inf[0] = np.inf - o.sample(n=3, weights=weights_with_inf) - - with tm.assertRaises(ValueError): - weights_with_ninf = [0.1] * 10 - weights_with_ninf[0] = -np.inf - o.sample(n=3, weights=weights_with_ninf) - - # All zeros raises errors - zero_weights = [0] * 10 - with tm.assertRaises(ValueError): - o.sample(n=3, weights=zero_weights) - - # All missing weights - nan_weights = [np.nan] * 10 - with tm.assertRaises(ValueError): - o.sample(n=3, weights=nan_weights) - - # Check np.nan are replaced by zeros. - weights_with_nan = [np.nan] * 10 - weights_with_nan[5] = 0.5 - self._compare( - o.sample(n=1, axis=0, weights=weights_with_nan), o.iloc[5:6]) - - # Check None are also replaced by zeros. - weights_with_None = [None] * 10 - weights_with_None[5] = 0.5 - self._compare( - o.sample(n=1, axis=0, weights=weights_with_None), o.iloc[5:6]) - - def test_size_compat(self): - # GH8846 - # size property should be defined - - o = self._construct(shape=10) - self.assertTrue(o.size == np.prod(o.shape)) - self.assertTrue(o.size == 10 ** len(o.axes)) - - def test_split_compat(self): - # xref GH8846 - o = self._construct(shape=10) - self.assertTrue(len(np.array_split(o, 5)) == 5) - self.assertTrue(len(np.array_split(o, 2)) == 2) - - def test_unexpected_keyword(self): # GH8597 - df = DataFrame(np.random.randn(5, 2), columns=['jim', 'joe']) - ca = pd.Categorical([0, 0, 2, 2, 3, np.nan]) - ts = df['joe'].copy() - ts[2] = np.nan - - with assertRaisesRegexp(TypeError, 'unexpected keyword'): - df.drop('joe', axis=1, in_place=True) - - with assertRaisesRegexp(TypeError, 'unexpected keyword'): - df.reindex([1, 0], inplace=True) - - with assertRaisesRegexp(TypeError, 'unexpected keyword'): - ca.fillna(0, inplace=True) - - with assertRaisesRegexp(TypeError, 'unexpected keyword'): - ts.fillna(0, in_place=True) - - # See gh-12301 - def test_stat_unexpected_keyword(self): - obj = self._construct(5) - starwars = 'Star Wars' - errmsg = 'unexpected keyword' - - with assertRaisesRegexp(TypeError, errmsg): - obj.max(epic=starwars) # stat_function - with assertRaisesRegexp(TypeError, errmsg): - obj.var(epic=starwars) # stat_function_ddof - with assertRaisesRegexp(TypeError, errmsg): - obj.sum(epic=starwars) # cum_function - with assertRaisesRegexp(TypeError, errmsg): - obj.any(epic=starwars) # logical_function - - def test_api_compat(self): - - # GH 12021 - # compat for __name__, __qualname__ - - obj = self._construct(5) - for func in ['sum', 'cumsum', 'any', 'var']: - f = getattr(obj, func) - self.assertEqual(f.__name__, func) - if PY3: - self.assertTrue(f.__qualname__.endswith(func)) - - def test_stat_non_defaults_args(self): - obj = self._construct(5) - out = np.array([0]) - errmsg = "the 'out' parameter is not supported" - - with assertRaisesRegexp(ValueError, errmsg): - obj.max(out=out) # stat_function - with assertRaisesRegexp(ValueError, errmsg): - obj.var(out=out) # stat_function_ddof - with assertRaisesRegexp(ValueError, errmsg): - obj.sum(out=out) # cum_function - with assertRaisesRegexp(ValueError, errmsg): - obj.any(out=out) # logical_function - - def test_clip(self): - lower = 1 - upper = 3 - col = np.arange(5) - - obj = self._construct(len(col), value=col) - - if isinstance(obj, Panel): - msg = "clip is not supported yet for panels" - tm.assertRaisesRegexp(NotImplementedError, msg, - obj.clip, lower=lower, - upper=upper) - - else: - out = obj.clip(lower=lower, upper=upper) - expected = self._construct(len(col), value=col - .clip(lower, upper)) - self._compare(out, expected) - - bad_axis = 'foo' - msg = ('No axis named {axis} ' - 'for object').format(axis=bad_axis) - assertRaisesRegexp(ValueError, msg, obj.clip, - lower=lower, upper=upper, - axis=bad_axis) - - def test_truncate_out_of_bounds(self): - # GH11382 - - # small - shape = [int(2e3)] + ([1] * (self._ndim - 1)) - small = self._construct(shape, dtype='int8') - self._compare(small.truncate(), small) - self._compare(small.truncate(before=0, after=3e3), small) - self._compare(small.truncate(before=-1, after=2e3), small) - - # big - shape = [int(2e6)] + ([1] * (self._ndim - 1)) - big = self._construct(shape, dtype='int8') - self._compare(big.truncate(), big) - self._compare(big.truncate(before=0, after=3e6), big) - self._compare(big.truncate(before=-1, after=2e6), big) - - def test_numpy_clip(self): - lower = 1 - upper = 3 - col = np.arange(5) - - obj = self._construct(len(col), value=col) - - if isinstance(obj, Panel): - msg = "clip is not supported yet for panels" - tm.assertRaisesRegexp(NotImplementedError, msg, - np.clip, obj, - lower, upper) - else: - out = np.clip(obj, lower, upper) - expected = self._construct(len(col), value=col - .clip(lower, upper)) - self._compare(out, expected) - - msg = "the 'out' parameter is not supported" - tm.assertRaisesRegexp(ValueError, msg, - np.clip, obj, - lower, upper, out=col) - - def test_validate_bool_args(self): - df = DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}) - invalid_values = [1, "True", [1, 2, 3], 5.0] - - for value in invalid_values: - with self.assertRaises(ValueError): - super(DataFrame, df).rename_axis(mapper={'a': 'x', 'b': 'y'}, - axis=1, inplace=value) - - with self.assertRaises(ValueError): - super(DataFrame, df).drop('a', axis=1, inplace=value) - - with self.assertRaises(ValueError): - super(DataFrame, df).sort_index(inplace=value) - - with self.assertRaises(ValueError): - super(DataFrame, df)._consolidate(inplace=value) - - with self.assertRaises(ValueError): - super(DataFrame, df).fillna(value=0, inplace=value) - - with self.assertRaises(ValueError): - super(DataFrame, df).replace(to_replace=1, value=7, - inplace=value) - - with self.assertRaises(ValueError): - super(DataFrame, df).interpolate(inplace=value) - - with self.assertRaises(ValueError): - super(DataFrame, df)._where(cond=df.a > 2, inplace=value) - - with self.assertRaises(ValueError): - super(DataFrame, df).mask(cond=df.a > 2, inplace=value) - - def test_copy_and_deepcopy(self): - # GH 15444 - for shape in [0, 1, 2]: - obj = self._construct(shape) - for func in [copy, - deepcopy, - lambda x: x.copy(deep=False), - lambda x: x.copy(deep=True)]: - obj_copy = func(obj) - assert obj_copy is not obj - self._compare(obj_copy, obj) - - -class TestSeries(tm.TestCase, Generic): - _typ = Series - _comparator = lambda self, x, y: assert_series_equal(x, y) - - def setUp(self): - self.ts = tm.makeTimeSeries() # Was at top level in test_series - self.ts.name = 'ts' - - self.series = tm.makeStringSeries() - self.series.name = 'series' - - def test_rename_mi(self): - s = Series([11, 21, 31], - index=MultiIndex.from_tuples( - [("A", x) for x in ["a", "B", "c"]])) - s.rename(str.lower) - - def test_set_axis_name(self): - s = Series([1, 2, 3], index=['a', 'b', 'c']) - funcs = ['rename_axis', '_set_axis_name'] - name = 'foo' - for func in funcs: - result = methodcaller(func, name)(s) - self.assertTrue(s.index.name is None) - self.assertEqual(result.index.name, name) - - def test_set_axis_name_mi(self): - s = Series([11, 21, 31], index=MultiIndex.from_tuples( - [("A", x) for x in ["a", "B", "c"]], - names=['l1', 'l2']) - ) - funcs = ['rename_axis', '_set_axis_name'] - for func in funcs: - result = methodcaller(func, ['L1', 'L2'])(s) - self.assertTrue(s.index.name is None) - self.assertEqual(s.index.names, ['l1', 'l2']) - self.assertTrue(result.index.name is None) - self.assertTrue(result.index.names, ['L1', 'L2']) - - def test_set_axis_name_raises(self): - s = pd.Series([1]) - with tm.assertRaises(ValueError): - s._set_axis_name(name='a', axis=1) - - def test_get_numeric_data_preserve_dtype(self): - - # get the numeric data - o = Series([1, 2, 3]) - result = o._get_numeric_data() - self._compare(result, o) - - o = Series([1, '2', 3.]) - result = o._get_numeric_data() - expected = Series([], dtype=object, index=pd.Index([], dtype=object)) - self._compare(result, expected) - - o = Series([True, False, True]) - result = o._get_numeric_data() - self._compare(result, o) - - o = Series([True, False, True]) - result = o._get_bool_data() - self._compare(result, o) - - o = Series(date_range('20130101', periods=3)) - result = o._get_numeric_data() - expected = Series([], dtype='M8[ns]', index=pd.Index([], dtype=object)) - self._compare(result, expected) - - def test_nonzero_single_element(self): - - # allow single item via bool method - s = Series([True]) - self.assertTrue(s.bool()) - - s = Series([False]) - self.assertFalse(s.bool()) - - # single item nan to raise - for s in [Series([np.nan]), Series([pd.NaT]), Series([True]), - Series([False])]: - self.assertRaises(ValueError, lambda: bool(s)) - - for s in [Series([np.nan]), Series([pd.NaT])]: - self.assertRaises(ValueError, lambda: s.bool()) - - # multiple bool are still an error - for s in [Series([True, True]), Series([False, False])]: - self.assertRaises(ValueError, lambda: bool(s)) - self.assertRaises(ValueError, lambda: s.bool()) - - # single non-bool are an error - for s in [Series([1]), Series([0]), Series(['a']), Series([0.0])]: - self.assertRaises(ValueError, lambda: bool(s)) - self.assertRaises(ValueError, lambda: s.bool()) - - def test_metadata_propagation_indiv(self): - # check that the metadata matches up on the resulting ops - - o = Series(range(3), range(3)) - o.name = 'foo' - o2 = Series(range(3), range(3)) - o2.name = 'bar' - - result = o.T - self.check_metadata(o, result) - - # resample - ts = Series(np.random.rand(1000), - index=date_range('20130101', periods=1000, freq='s'), - name='foo') - result = ts.resample('1T').mean() - self.check_metadata(ts, result) - - result = ts.resample('1T').min() - self.check_metadata(ts, result) - - result = ts.resample('1T').apply(lambda x: x.sum()) - self.check_metadata(ts, result) - - _metadata = Series._metadata - _finalize = Series.__finalize__ - Series._metadata = ['name', 'filename'] - o.filename = 'foo' - o2.filename = 'bar' - - def finalize(self, other, method=None, **kwargs): - for name in self._metadata: - if method == 'concat' and name == 'filename': - value = '+'.join([getattr( - o, name) for o in other.objs if getattr(o, name, None) - ]) - object.__setattr__(self, name, value) - else: - object.__setattr__(self, name, getattr(other, name, None)) - - return self - - Series.__finalize__ = finalize - - result = pd.concat([o, o2]) - self.assertEqual(result.filename, 'foo+bar') - self.assertIsNone(result.name) - - # reset - Series._metadata = _metadata - Series.__finalize__ = _finalize - - def test_describe(self): - self.series.describe() - self.ts.describe() - - def test_describe_objects(self): - s = Series(['a', 'b', 'b', np.nan, np.nan, np.nan, 'c', 'd', 'a', 'a']) - result = s.describe() - expected = Series({'count': 7, 'unique': 4, - 'top': 'a', 'freq': 3, 'second': 'b', - 'second_freq': 2}, index=result.index) - assert_series_equal(result, expected) - - dt = list(self.ts.index) - dt.append(dt[0]) - ser = Series(dt) - rs = ser.describe() - min_date = min(dt) - max_date = max(dt) - xp = Series({'count': len(dt), - 'unique': len(self.ts.index), - 'first': min_date, 'last': max_date, 'freq': 2, - 'top': min_date}, index=rs.index) - assert_series_equal(rs, xp) - - def test_describe_empty(self): - result = pd.Series().describe() - - self.assertEqual(result['count'], 0) - self.assertTrue(result.drop('count').isnull().all()) - - nanSeries = Series([np.nan]) - nanSeries.name = 'NaN' - result = nanSeries.describe() - self.assertEqual(result['count'], 0) - self.assertTrue(result.drop('count').isnull().all()) - - def test_describe_none(self): - noneSeries = Series([None]) - noneSeries.name = 'None' - expected = Series([0, 0], index=['count', 'unique'], name='None') - assert_series_equal(noneSeries.describe(), expected) - - def test_to_xarray(self): - - tm._skip_if_no_xarray() - import xarray - from xarray import DataArray - - s = Series([]) - s.index.name = 'foo' - result = s.to_xarray() - self.assertEqual(len(result), 0) - self.assertEqual(len(result.coords), 1) - assert_almost_equal(list(result.coords.keys()), ['foo']) - self.assertIsInstance(result, DataArray) - - def testit(index, check_index_type=True, check_categorical=True): - s = Series(range(6), index=index(6)) - s.index.name = 'foo' - result = s.to_xarray() - repr(result) - self.assertEqual(len(result), 6) - self.assertEqual(len(result.coords), 1) - assert_almost_equal(list(result.coords.keys()), ['foo']) - self.assertIsInstance(result, DataArray) - - # idempotency - assert_series_equal(result.to_series(), s, - check_index_type=check_index_type, - check_categorical=check_categorical) - - l = [tm.makeFloatIndex, tm.makeIntIndex, - tm.makeStringIndex, tm.makeUnicodeIndex, - tm.makeDateIndex, tm.makePeriodIndex, - tm.makeTimedeltaIndex] - - if LooseVersion(xarray.__version__) >= '0.8.0': - l.append(tm.makeCategoricalIndex) - - for index in l: - testit(index) - - s = Series(range(6)) - s.index.name = 'foo' - s.index = pd.MultiIndex.from_product([['a', 'b'], range(3)], - names=['one', 'two']) - result = s.to_xarray() - self.assertEqual(len(result), 2) - assert_almost_equal(list(result.coords.keys()), ['one', 'two']) - self.assertIsInstance(result, DataArray) - assert_series_equal(result.to_series(), s) - - -class TestDataFrame(tm.TestCase, Generic): - _typ = DataFrame - _comparator = lambda self, x, y: assert_frame_equal(x, y) - - def test_rename_mi(self): - df = DataFrame([ - 11, 21, 31 - ], index=MultiIndex.from_tuples([("A", x) for x in ["a", "B", "c"]])) - df.rename(str.lower) - - def test_set_axis_name(self): - df = pd.DataFrame([[1, 2], [3, 4]]) - funcs = ['_set_axis_name', 'rename_axis'] - for func in funcs: - result = methodcaller(func, 'foo')(df) - self.assertTrue(df.index.name is None) - self.assertEqual(result.index.name, 'foo') - - result = methodcaller(func, 'cols', axis=1)(df) - self.assertTrue(df.columns.name is None) - self.assertEqual(result.columns.name, 'cols') - - def test_set_axis_name_mi(self): - df = DataFrame( - np.empty((3, 3)), - index=MultiIndex.from_tuples([("A", x) for x in list('aBc')]), - columns=MultiIndex.from_tuples([('C', x) for x in list('xyz')]) - ) - - level_names = ['L1', 'L2'] - funcs = ['_set_axis_name', 'rename_axis'] - for func in funcs: - result = methodcaller(func, level_names)(df) - self.assertEqual(result.index.names, level_names) - self.assertEqual(result.columns.names, [None, None]) - - result = methodcaller(func, level_names, axis=1)(df) - self.assertEqual(result.columns.names, ["L1", "L2"]) - self.assertEqual(result.index.names, [None, None]) - - def test_nonzero_single_element(self): - - # allow single item via bool method - df = DataFrame([[True]]) - self.assertTrue(df.bool()) - - df = DataFrame([[False]]) - self.assertFalse(df.bool()) - - df = DataFrame([[False, False]]) - self.assertRaises(ValueError, lambda: df.bool()) - self.assertRaises(ValueError, lambda: bool(df)) - - def test_get_numeric_data_preserve_dtype(self): - - # get the numeric data - o = DataFrame({'A': [1, '2', 3.]}) - result = o._get_numeric_data() - expected = DataFrame(index=[0, 1, 2], dtype=object) - self._compare(result, expected) - - def test_describe(self): - tm.makeDataFrame().describe() - tm.makeMixedDataFrame().describe() - tm.makeTimeDataFrame().describe() - - def test_describe_percentiles_percent_or_raw(self): - msg = 'percentiles should all be in the interval \\[0, 1\\]' - - df = tm.makeDataFrame() - with tm.assertRaisesRegexp(ValueError, msg): - df.describe(percentiles=[10, 50, 100]) - - with tm.assertRaisesRegexp(ValueError, msg): - df.describe(percentiles=[2]) - - with tm.assertRaisesRegexp(ValueError, msg): - df.describe(percentiles=[-2]) - - def test_describe_percentiles_equivalence(self): - df = tm.makeDataFrame() - d1 = df.describe() - d2 = df.describe(percentiles=[.25, .75]) - assert_frame_equal(d1, d2) - - def test_describe_percentiles_insert_median(self): - df = tm.makeDataFrame() - d1 = df.describe(percentiles=[.25, .75]) - d2 = df.describe(percentiles=[.25, .5, .75]) - assert_frame_equal(d1, d2) - self.assertTrue('25%' in d1.index) - self.assertTrue('75%' in d2.index) - - # none above - d1 = df.describe(percentiles=[.25, .45]) - d2 = df.describe(percentiles=[.25, .45, .5]) - assert_frame_equal(d1, d2) - self.assertTrue('25%' in d1.index) - self.assertTrue('45%' in d2.index) - - # none below - d1 = df.describe(percentiles=[.75, 1]) - d2 = df.describe(percentiles=[.5, .75, 1]) - assert_frame_equal(d1, d2) - self.assertTrue('75%' in d1.index) - self.assertTrue('100%' in d2.index) - - # edge - d1 = df.describe(percentiles=[0, 1]) - d2 = df.describe(percentiles=[0, .5, 1]) - assert_frame_equal(d1, d2) - self.assertTrue('0%' in d1.index) - self.assertTrue('100%' in d2.index) - - def test_describe_percentiles_insert_median_ndarray(self): - # GH14908 - df = tm.makeDataFrame() - result = df.describe(percentiles=np.array([.25, .75])) - expected = df.describe(percentiles=[.25, .75]) - assert_frame_equal(result, expected) - - def test_describe_percentiles_unique(self): - # GH13104 - df = tm.makeDataFrame() - with self.assertRaises(ValueError): - df.describe(percentiles=[0.1, 0.2, 0.4, 0.5, 0.2, 0.6]) - with self.assertRaises(ValueError): - df.describe(percentiles=[0.1, 0.2, 0.4, 0.2, 0.6]) - - def test_describe_percentiles_formatting(self): - # GH13104 - df = tm.makeDataFrame() - - # default - result = df.describe().index - expected = Index(['count', 'mean', 'std', 'min', '25%', '50%', '75%', - 'max'], - dtype='object') - tm.assert_index_equal(result, expected) - - result = df.describe(percentiles=[0.0001, 0.0005, 0.001, 0.999, - 0.9995, 0.9999]).index - expected = Index(['count', 'mean', 'std', 'min', '0.01%', '0.05%', - '0.1%', '50%', '99.9%', '99.95%', '99.99%', 'max'], - dtype='object') - tm.assert_index_equal(result, expected) - - result = df.describe(percentiles=[0.00499, 0.005, 0.25, 0.50, - 0.75]).index - expected = Index(['count', 'mean', 'std', 'min', '0.499%', '0.5%', - '25%', '50%', '75%', 'max'], - dtype='object') - tm.assert_index_equal(result, expected) - - result = df.describe(percentiles=[0.00499, 0.01001, 0.25, 0.50, - 0.75]).index - expected = Index(['count', 'mean', 'std', 'min', '0.5%', '1.0%', - '25%', '50%', '75%', 'max'], - dtype='object') - tm.assert_index_equal(result, expected) - - def test_describe_column_index_type(self): - # GH13288 - df = pd.DataFrame([1, 2, 3, 4]) - df.columns = pd.Index([0], dtype=object) - result = df.describe().columns - expected = Index([0], dtype=object) - tm.assert_index_equal(result, expected) - - df = pd.DataFrame({'A': list("BCDE"), 0: [1, 2, 3, 4]}) - result = df.describe().columns - expected = Index([0], dtype=object) - tm.assert_index_equal(result, expected) - - def test_describe_no_numeric(self): - df = DataFrame({'A': ['foo', 'foo', 'bar'] * 8, - 'B': ['a', 'b', 'c', 'd'] * 6}) - desc = df.describe() - expected = DataFrame(dict((k, v.describe()) - for k, v in compat.iteritems(df)), - columns=df.columns) - assert_frame_equal(desc, expected) - - ts = tm.makeTimeSeries() - df = DataFrame({'time': ts.index}) - desc = df.describe() - self.assertEqual(desc.time['first'], min(ts.index)) - - def test_describe_empty(self): - df = DataFrame() - tm.assertRaisesRegexp(ValueError, 'DataFrame without columns', - df.describe) - - df = DataFrame(columns=['A', 'B']) - result = df.describe() - expected = DataFrame(0, columns=['A', 'B'], index=['count', 'unique']) - tm.assert_frame_equal(result, expected) - - def test_describe_empty_int_columns(self): - df = DataFrame([[0, 1], [1, 2]]) - desc = df[df[0] < 0].describe() # works - assert_series_equal(desc.xs('count'), - Series([0, 0], dtype=float, name='count')) - self.assertTrue(isnull(desc.iloc[1:]).all().all()) - - def test_describe_objects(self): - df = DataFrame({"C1": ['a', 'a', 'c'], "C2": ['d', 'd', 'f']}) - result = df.describe() - expected = DataFrame({"C1": [3, 2, 'a', 2], "C2": [3, 2, 'd', 2]}, - index=['count', 'unique', 'top', 'freq']) - assert_frame_equal(result, expected) - - df = DataFrame({"C1": pd.date_range('2010-01-01', periods=4, freq='D') - }) - df.loc[4] = pd.Timestamp('2010-01-04') - result = df.describe() - expected = DataFrame({"C1": [5, 4, pd.Timestamp('2010-01-04'), 2, - pd.Timestamp('2010-01-01'), - pd.Timestamp('2010-01-04')]}, - index=['count', 'unique', 'top', 'freq', - 'first', 'last']) - assert_frame_equal(result, expected) - - # mix time and str - df['C2'] = ['a', 'a', 'b', 'c', 'a'] - result = df.describe() - expected['C2'] = [5, 3, 'a', 3, np.nan, np.nan] - assert_frame_equal(result, expected) - - # just str - expected = DataFrame({'C2': [5, 3, 'a', 4]}, - index=['count', 'unique', 'top', 'freq']) - result = df[['C2']].describe() - - # mix of time, str, numeric - df['C3'] = [2, 4, 6, 8, 2] - result = df.describe() - expected = DataFrame({"C3": [5., 4.4, 2.607681, 2., 2., 4., 6., 8.]}, - index=['count', 'mean', 'std', 'min', '25%', - '50%', '75%', 'max']) - assert_frame_equal(result, expected) - assert_frame_equal(df.describe(), df[['C3']].describe()) - - assert_frame_equal(df[['C1', 'C3']].describe(), df[['C3']].describe()) - assert_frame_equal(df[['C2', 'C3']].describe(), df[['C3']].describe()) - - def test_describe_typefiltering(self): - df = DataFrame({'catA': ['foo', 'foo', 'bar'] * 8, - 'catB': ['a', 'b', 'c', 'd'] * 6, - 'numC': np.arange(24, dtype='int64'), - 'numD': np.arange(24.) + .5, - 'ts': tm.makeTimeSeries()[:24].index}) - - descN = df.describe() - expected_cols = ['numC', 'numD', ] - expected = DataFrame(dict((k, df[k].describe()) - for k in expected_cols), - columns=expected_cols) - assert_frame_equal(descN, expected) - - desc = df.describe(include=['number']) - assert_frame_equal(desc, descN) - desc = df.describe(exclude=['object', 'datetime']) - assert_frame_equal(desc, descN) - desc = df.describe(include=['float']) - assert_frame_equal(desc, descN.drop('numC', 1)) - - descC = df.describe(include=['O']) - expected_cols = ['catA', 'catB'] - expected = DataFrame(dict((k, df[k].describe()) - for k in expected_cols), - columns=expected_cols) - assert_frame_equal(descC, expected) - - descD = df.describe(include=['datetime']) - assert_series_equal(descD.ts, df.ts.describe()) - - desc = df.describe(include=['object', 'number', 'datetime']) - assert_frame_equal(desc.loc[:, ["numC", "numD"]].dropna(), descN) - assert_frame_equal(desc.loc[:, ["catA", "catB"]].dropna(), descC) - descDs = descD.sort_index() # the index order change for mixed-types - assert_frame_equal(desc.loc[:, "ts":].dropna().sort_index(), descDs) - - desc = df.loc[:, 'catA':'catB'].describe(include='all') - assert_frame_equal(desc, descC) - desc = df.loc[:, 'numC':'numD'].describe(include='all') - assert_frame_equal(desc, descN) - - desc = df.describe(percentiles=[], include='all') - cnt = Series(data=[4, 4, 6, 6, 6], - index=['catA', 'catB', 'numC', 'numD', 'ts']) - assert_series_equal(desc.count(), cnt) - self.assertTrue('count' in desc.index) - self.assertTrue('unique' in desc.index) - self.assertTrue('50%' in desc.index) - self.assertTrue('first' in desc.index) - - desc = df.drop("ts", 1).describe(percentiles=[], include='all') - assert_series_equal(desc.count(), cnt.drop("ts")) - self.assertTrue('first' not in desc.index) - desc = df.drop(["numC", "numD"], 1).describe(percentiles=[], - include='all') - assert_series_equal(desc.count(), cnt.drop(["numC", "numD"])) - self.assertTrue('50%' not in desc.index) - - def test_describe_typefiltering_category_bool(self): - df = DataFrame({'A_cat': pd.Categorical(['foo', 'foo', 'bar'] * 8), - 'B_str': ['a', 'b', 'c', 'd'] * 6, - 'C_bool': [True] * 12 + [False] * 12, - 'D_num': np.arange(24.) + .5, - 'E_ts': tm.makeTimeSeries()[:24].index}) - - desc = df.describe() - expected_cols = ['D_num'] - expected = DataFrame(dict((k, df[k].describe()) - for k in expected_cols), - columns=expected_cols) - assert_frame_equal(desc, expected) - - desc = df.describe(include=["category"]) - self.assertTrue(desc.columns.tolist() == ["A_cat"]) - - # 'all' includes numpy-dtypes + category - desc1 = df.describe(include="all") - desc2 = df.describe(include=[np.generic, "category"]) - assert_frame_equal(desc1, desc2) - - def test_describe_timedelta(self): - df = DataFrame({"td": pd.to_timedelta(np.arange(24) % 20, "D")}) - self.assertTrue(df.describe().loc["mean"][0] == pd.to_timedelta( - "8d4h")) - - def test_describe_typefiltering_dupcol(self): - df = DataFrame({'catA': ['foo', 'foo', 'bar'] * 8, - 'catB': ['a', 'b', 'c', 'd'] * 6, - 'numC': np.arange(24), - 'numD': np.arange(24.) + .5, - 'ts': tm.makeTimeSeries()[:24].index}) - s = df.describe(include='all').shape[1] - df = pd.concat([df, df], axis=1) - s2 = df.describe(include='all').shape[1] - self.assertTrue(s2 == 2 * s) - - def test_describe_typefiltering_groupby(self): - df = DataFrame({'catA': ['foo', 'foo', 'bar'] * 8, - 'catB': ['a', 'b', 'c', 'd'] * 6, - 'numC': np.arange(24), - 'numD': np.arange(24.) + .5, - 'ts': tm.makeTimeSeries()[:24].index}) - G = df.groupby('catA') - self.assertTrue(G.describe(include=['number']).shape == (2, 16)) - self.assertTrue(G.describe(include=['number', 'object']).shape == (2, - 33)) - self.assertTrue(G.describe(include='all').shape == (2, 52)) - - def test_describe_multi_index_df_column_names(self): - """ Test that column names persist after the describe operation.""" - - df = pd.DataFrame( - {'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'], - 'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'], - 'C': np.random.randn(8), - 'D': np.random.randn(8)}) - - # GH 11517 - # test for hierarchical index - hierarchical_index_df = df.groupby(['A', 'B']).mean().T - self.assertTrue(hierarchical_index_df.columns.names == ['A', 'B']) - self.assertTrue(hierarchical_index_df.describe().columns.names == - ['A', 'B']) - - # test for non-hierarchical index - non_hierarchical_index_df = df.groupby(['A']).mean().T - self.assertTrue(non_hierarchical_index_df.columns.names == ['A']) - self.assertTrue(non_hierarchical_index_df.describe().columns.names == - ['A']) - - def test_metadata_propagation_indiv(self): - - # groupby - df = DataFrame( - {'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'], - 'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'], - 'C': np.random.randn(8), - 'D': np.random.randn(8)}) - result = df.groupby('A').sum() - self.check_metadata(df, result) - - # resample - df = DataFrame(np.random.randn(1000, 2), - index=date_range('20130101', periods=1000, freq='s')) - result = df.resample('1T') - self.check_metadata(df, result) - - # merging with override - # GH 6923 - _metadata = DataFrame._metadata - _finalize = DataFrame.__finalize__ - - np.random.seed(10) - df1 = DataFrame(np.random.randint(0, 4, (3, 2)), columns=['a', 'b']) - df2 = DataFrame(np.random.randint(0, 4, (3, 2)), columns=['c', 'd']) - DataFrame._metadata = ['filename'] - df1.filename = 'fname1.csv' - df2.filename = 'fname2.csv' - - def finalize(self, other, method=None, **kwargs): - - for name in self._metadata: - if method == 'merge': - left, right = other.left, other.right - value = getattr(left, name, '') + '|' + getattr(right, - name, '') - object.__setattr__(self, name, value) - else: - object.__setattr__(self, name, getattr(other, name, '')) - - return self - - DataFrame.__finalize__ = finalize - result = df1.merge(df2, left_on=['a'], right_on=['c'], how='inner') - self.assertEqual(result.filename, 'fname1.csv|fname2.csv') - - # concat - # GH 6927 - DataFrame._metadata = ['filename'] - df1 = DataFrame(np.random.randint(0, 4, (3, 2)), columns=list('ab')) - df1.filename = 'foo' - - def finalize(self, other, method=None, **kwargs): - for name in self._metadata: - if method == 'concat': - value = '+'.join([getattr( - o, name) for o in other.objs if getattr(o, name, None) - ]) - object.__setattr__(self, name, value) - else: - object.__setattr__(self, name, getattr(other, name, None)) - - return self - - DataFrame.__finalize__ = finalize - - result = pd.concat([df1, df1]) - self.assertEqual(result.filename, 'foo+foo') - - # reset - DataFrame._metadata = _metadata - DataFrame.__finalize__ = _finalize - - def test_tz_convert_and_localize(self): - l0 = date_range('20140701', periods=5, freq='D') - - # TODO: l1 should be a PeriodIndex for testing - # after GH2106 is addressed - with tm.assertRaises(NotImplementedError): - period_range('20140701', periods=1).tz_convert('UTC') - with tm.assertRaises(NotImplementedError): - period_range('20140701', periods=1).tz_localize('UTC') - # l1 = period_range('20140701', periods=5, freq='D') - l1 = date_range('20140701', periods=5, freq='D') - - int_idx = Index(range(5)) - - for fn in ['tz_localize', 'tz_convert']: - - if fn == 'tz_convert': - l0 = l0.tz_localize('UTC') - l1 = l1.tz_localize('UTC') - - for idx in [l0, l1]: - - l0_expected = getattr(idx, fn)('US/Pacific') - l1_expected = getattr(idx, fn)('US/Pacific') - - df1 = DataFrame(np.ones(5), index=l0) - df1 = getattr(df1, fn)('US/Pacific') - self.assert_index_equal(df1.index, l0_expected) - - # MultiIndex - # GH7846 - df2 = DataFrame(np.ones(5), MultiIndex.from_arrays([l0, l1])) - - df3 = getattr(df2, fn)('US/Pacific', level=0) - self.assertFalse(df3.index.levels[0].equals(l0)) - self.assert_index_equal(df3.index.levels[0], l0_expected) - self.assert_index_equal(df3.index.levels[1], l1) - self.assertFalse(df3.index.levels[1].equals(l1_expected)) - - df3 = getattr(df2, fn)('US/Pacific', level=1) - self.assert_index_equal(df3.index.levels[0], l0) - self.assertFalse(df3.index.levels[0].equals(l0_expected)) - self.assert_index_equal(df3.index.levels[1], l1_expected) - self.assertFalse(df3.index.levels[1].equals(l1)) - - df4 = DataFrame(np.ones(5), - MultiIndex.from_arrays([int_idx, l0])) - - # TODO: untested - df5 = getattr(df4, fn)('US/Pacific', level=1) # noqa - - self.assert_index_equal(df3.index.levels[0], l0) - self.assertFalse(df3.index.levels[0].equals(l0_expected)) - self.assert_index_equal(df3.index.levels[1], l1_expected) - self.assertFalse(df3.index.levels[1].equals(l1)) - - # Bad Inputs - for fn in ['tz_localize', 'tz_convert']: - # Not DatetimeIndex / PeriodIndex - with tm.assertRaisesRegexp(TypeError, 'DatetimeIndex'): - df = DataFrame(index=int_idx) - df = getattr(df, fn)('US/Pacific') - - # Not DatetimeIndex / PeriodIndex - with tm.assertRaisesRegexp(TypeError, 'DatetimeIndex'): - df = DataFrame(np.ones(5), - MultiIndex.from_arrays([int_idx, l0])) - df = getattr(df, fn)('US/Pacific', level=0) - - # Invalid level - with tm.assertRaisesRegexp(ValueError, 'not valid'): - df = DataFrame(index=l0) - df = getattr(df, fn)('US/Pacific', level=1) - - def test_set_attribute(self): - # Test for consistent setattr behavior when an attribute and a column - # have the same name (Issue #8994) - df = DataFrame({'x': [1, 2, 3]}) - - df.y = 2 - df['y'] = [2, 4, 6] - df.y = 5 - - self.assertEqual(df.y, 5) - assert_series_equal(df['y'], Series([2, 4, 6], name='y')) - - def test_pct_change(self): - # GH 11150 - pnl = DataFrame([np.arange(0, 40, 10), np.arange(0, 40, 10), np.arange( - 0, 40, 10)]).astype(np.float64) - pnl.iat[1, 0] = np.nan - pnl.iat[1, 1] = np.nan - pnl.iat[2, 3] = 60 - - mask = pnl.isnull() - - for axis in range(2): - expected = pnl.ffill(axis=axis) / pnl.ffill(axis=axis).shift( - axis=axis) - 1 - expected[mask] = np.nan - result = pnl.pct_change(axis=axis, fill_method='pad') - - self.assert_frame_equal(result, expected) - - def test_to_xarray(self): - - tm._skip_if_no_xarray() - from xarray import Dataset - - df = DataFrame({'a': list('abc'), - 'b': list(range(1, 4)), - 'c': np.arange(3, 6).astype('u1'), - 'd': np.arange(4.0, 7.0, dtype='float64'), - 'e': [True, False, True], - 'f': pd.Categorical(list('abc')), - 'g': pd.date_range('20130101', periods=3), - 'h': pd.date_range('20130101', - periods=3, - tz='US/Eastern')} - ) - - df.index.name = 'foo' - result = df[0:0].to_xarray() - self.assertEqual(result.dims['foo'], 0) - self.assertIsInstance(result, Dataset) - - for index in [tm.makeFloatIndex, tm.makeIntIndex, - tm.makeStringIndex, tm.makeUnicodeIndex, - tm.makeDateIndex, tm.makePeriodIndex, - tm.makeCategoricalIndex, tm.makeTimedeltaIndex]: - df.index = index(3) - df.index.name = 'foo' - df.columns.name = 'bar' - result = df.to_xarray() - self.assertEqual(result.dims['foo'], 3) - self.assertEqual(len(result.coords), 1) - self.assertEqual(len(result.data_vars), 8) - assert_almost_equal(list(result.coords.keys()), ['foo']) - self.assertIsInstance(result, Dataset) - - # idempotency - # categoricals are not preserved - # datetimes w/tz are not preserved - # column names are lost - expected = df.copy() - expected['f'] = expected['f'].astype(object) - expected['h'] = expected['h'].astype('datetime64[ns]') - expected.columns.name = None - assert_frame_equal(result.to_dataframe(), expected, - check_index_type=False, check_categorical=False) - - # available in 0.7.1 - # MultiIndex - df.index = pd.MultiIndex.from_product([['a'], range(3)], - names=['one', 'two']) - result = df.to_xarray() - self.assertEqual(result.dims['one'], 1) - self.assertEqual(result.dims['two'], 3) - self.assertEqual(len(result.coords), 2) - self.assertEqual(len(result.data_vars), 8) - assert_almost_equal(list(result.coords.keys()), ['one', 'two']) - self.assertIsInstance(result, Dataset) - - result = result.to_dataframe() - expected = df.copy() - expected['f'] = expected['f'].astype(object) - expected['h'] = expected['h'].astype('datetime64[ns]') - expected.columns.name = None - assert_frame_equal(result, - expected, - check_index_type=False) - - def test_deepcopy_empty(self): - # This test covers empty frame copying with non-empty column sets - # as reported in issue GH15370 - empty_frame = DataFrame(data=[], index=[], columns=['A']) - empty_frame_copy = deepcopy(empty_frame) - - self._compare(empty_frame_copy, empty_frame) - - -class TestPanel(tm.TestCase, Generic): - _typ = Panel - _comparator = lambda self, x, y: assert_panel_equal(x, y, by_blocks=True) - - def test_to_xarray(self): - - tm._skip_if_no_xarray() - from xarray import DataArray - - with catch_warnings(record=True): - p = tm.makePanel() - - result = p.to_xarray() - self.assertIsInstance(result, DataArray) - self.assertEqual(len(result.coords), 3) - assert_almost_equal(list(result.coords.keys()), - ['items', 'major_axis', 'minor_axis']) - self.assertEqual(len(result.dims), 3) - - # idempotency - assert_panel_equal(result.to_pandas(), p) - - -class TestPanel4D(tm.TestCase, Generic): - _typ = Panel4D - _comparator = lambda self, x, y: assert_panel4d_equal(x, y, by_blocks=True) - - def test_sample(self): - pytest.skip("sample on Panel4D") - - def test_to_xarray(self): - - tm._skip_if_no_xarray() - from xarray import DataArray - - with catch_warnings(record=True): - p = tm.makePanel4D() - - result = p.to_xarray() - self.assertIsInstance(result, DataArray) - self.assertEqual(len(result.coords), 4) - assert_almost_equal(list(result.coords.keys()), - ['labels', 'items', 'major_axis', - 'minor_axis']) - self.assertEqual(len(result.dims), 4) - - # non-convertible - self.assertRaises(ValueError, lambda: result.to_pandas()) - - -# run all the tests, but wrap each in a warning catcher -for t in ['test_rename', 'test_rename_axis', 'test_get_numeric_data', - 'test_get_default', 'test_nonzero', - 'test_numpy_1_7_compat_numeric_methods', - 'test_downcast', 'test_constructor_compound_dtypes', - 'test_head_tail', - 'test_size_compat', 'test_split_compat', - 'test_unexpected_keyword', - 'test_stat_unexpected_keyword', 'test_api_compat', - 'test_stat_non_defaults_args', - 'test_clip', 'test_truncate_out_of_bounds', 'test_numpy_clip', - 'test_metadata_propagation', 'test_copy_and_deepcopy', - 'test_sample']: - - def f(): - def tester(self): - with catch_warnings(record=True): - return getattr(super(TestPanel, self), t)() - return tester - - setattr(TestPanel, t, f()) - - def f(): - def tester(self): - with catch_warnings(record=True): - return getattr(super(TestPanel4D, self), t)() - return tester - - setattr(TestPanel4D, t, f()) - - -class TestNDFrame(tm.TestCase): - # tests that don't fit elsewhere - - def test_sample(sel): - # Fixes issue: 2419 - # additional specific object based tests - - # A few dataframe test with degenerate weights. - easy_weight_list = [0] * 10 - easy_weight_list[5] = 1 - - df = pd.DataFrame({'col1': range(10, 20), - 'col2': range(20, 30), - 'colString': ['a'] * 10, - 'easyweights': easy_weight_list}) - sample1 = df.sample(n=1, weights='easyweights') - assert_frame_equal(sample1, df.iloc[5:6]) - - # Ensure proper error if string given as weight for Series, panel, or - # DataFrame with axis = 1. - s = Series(range(10)) - with tm.assertRaises(ValueError): - s.sample(n=3, weights='weight_column') - - with catch_warnings(record=True): - panel = Panel(items=[0, 1, 2], major_axis=[2, 3, 4], - minor_axis=[3, 4, 5]) - with tm.assertRaises(ValueError): - panel.sample(n=1, weights='weight_column') - - with tm.assertRaises(ValueError): - df.sample(n=1, weights='weight_column', axis=1) - - # Check weighting key error - with tm.assertRaises(KeyError): - df.sample(n=3, weights='not_a_real_column_name') - - # Check that re-normalizes weights that don't sum to one. - weights_less_than_1 = [0] * 10 - weights_less_than_1[0] = 0.5 - tm.assert_frame_equal( - df.sample(n=1, weights=weights_less_than_1), df.iloc[:1]) - - ### - # Test axis argument - ### - - # Test axis argument - df = pd.DataFrame({'col1': range(10), 'col2': ['a'] * 10}) - second_column_weight = [0, 1] - assert_frame_equal( - df.sample(n=1, axis=1, weights=second_column_weight), df[['col2']]) - - # Different axis arg types - assert_frame_equal(df.sample(n=1, axis='columns', - weights=second_column_weight), - df[['col2']]) - - weight = [0] * 10 - weight[5] = 0.5 - assert_frame_equal(df.sample(n=1, axis='rows', weights=weight), - df.iloc[5:6]) - assert_frame_equal(df.sample(n=1, axis='index', weights=weight), - df.iloc[5:6]) - - # Check out of range axis values - with tm.assertRaises(ValueError): - df.sample(n=1, axis=2) - - with tm.assertRaises(ValueError): - df.sample(n=1, axis='not_a_name') - - with tm.assertRaises(ValueError): - s = pd.Series(range(10)) - s.sample(n=1, axis=1) - - # Test weight length compared to correct axis - with tm.assertRaises(ValueError): - df.sample(n=1, axis=1, weights=[0.5] * 10) - - # Check weights with axis = 1 - easy_weight_list = [0] * 3 - easy_weight_list[2] = 1 - - df = pd.DataFrame({'col1': range(10, 20), - 'col2': range(20, 30), - 'colString': ['a'] * 10}) - sample1 = df.sample(n=1, axis=1, weights=easy_weight_list) - assert_frame_equal(sample1, df[['colString']]) - - # Test default axes - with catch_warnings(record=True): - p = Panel(items=['a', 'b', 'c'], major_axis=[2, 4, 6], - minor_axis=[1, 3, 5]) - assert_panel_equal( - p.sample(n=3, random_state=42), p.sample(n=3, axis=1, - random_state=42)) - assert_frame_equal( - df.sample(n=3, random_state=42), df.sample(n=3, axis=0, - random_state=42)) - - # Test that function aligns weights with frame - df = DataFrame( - {'col1': [5, 6, 7], - 'col2': ['a', 'b', 'c'], }, index=[9, 5, 3]) - s = Series([1, 0, 0], index=[3, 5, 9]) - assert_frame_equal(df.loc[[3]], df.sample(1, weights=s)) - - # Weights have index values to be dropped because not in - # sampled DataFrame - s2 = Series([0.001, 0, 10000], index=[3, 5, 10]) - assert_frame_equal(df.loc[[3]], df.sample(1, weights=s2)) - - # Weights have empty values to be filed with zeros - s3 = Series([0.01, 0], index=[3, 5]) - assert_frame_equal(df.loc[[3]], df.sample(1, weights=s3)) - - # No overlap in weight and sampled DataFrame indices - s4 = Series([1, 0], index=[1, 2]) - with tm.assertRaises(ValueError): - df.sample(1, weights=s4) - - def test_squeeze(self): - # noop - for s in [tm.makeFloatSeries(), tm.makeStringSeries(), - tm.makeObjectSeries()]: - tm.assert_series_equal(s.squeeze(), s) - for df in [tm.makeTimeDataFrame()]: - tm.assert_frame_equal(df.squeeze(), df) - with catch_warnings(record=True): - for p in [tm.makePanel()]: - tm.assert_panel_equal(p.squeeze(), p) - with catch_warnings(record=True): - for p4d in [tm.makePanel4D()]: - tm.assert_panel4d_equal(p4d.squeeze(), p4d) - - # squeezing - df = tm.makeTimeDataFrame().reindex(columns=['A']) - tm.assert_series_equal(df.squeeze(), df['A']) - - with catch_warnings(record=True): - p = tm.makePanel().reindex(items=['ItemA']) - tm.assert_frame_equal(p.squeeze(), p['ItemA']) - - p = tm.makePanel().reindex(items=['ItemA'], minor_axis=['A']) - tm.assert_series_equal(p.squeeze(), p.loc['ItemA', :, 'A']) - - with catch_warnings(record=True): - p4d = tm.makePanel4D().reindex(labels=['label1']) - tm.assert_panel_equal(p4d.squeeze(), p4d['label1']) - - with catch_warnings(record=True): - p4d = tm.makePanel4D().reindex(labels=['label1'], items=['ItemA']) - tm.assert_frame_equal(p4d.squeeze(), p4d.loc['label1', 'ItemA']) - - # don't fail with 0 length dimensions GH11229 & GH8999 - empty_series = Series([], name='five') - empty_frame = DataFrame([empty_series]) - with catch_warnings(record=True): - empty_panel = Panel({'six': empty_frame}) - - [tm.assert_series_equal(empty_series, higher_dim.squeeze()) - for higher_dim in [empty_series, empty_frame, empty_panel]] - - # axis argument - df = tm.makeTimeDataFrame(nper=1).iloc[:, :1] - assert df.shape == (1, 1) - tm.assert_series_equal(df.squeeze(axis=0), df.iloc[0]) - tm.assert_series_equal(df.squeeze(axis='index'), df.iloc[0]) - tm.assert_series_equal(df.squeeze(axis=1), df.iloc[:, 0]) - tm.assert_series_equal(df.squeeze(axis='columns'), df.iloc[:, 0]) - assert df.squeeze() == df.iloc[0, 0] - tm.assertRaises(ValueError, df.squeeze, axis=2) - tm.assertRaises(ValueError, df.squeeze, axis='x') - - df = tm.makeTimeDataFrame(3) - tm.assert_frame_equal(df.squeeze(axis=0), df) - - def test_numpy_squeeze(self): - s = tm.makeFloatSeries() - tm.assert_series_equal(np.squeeze(s), s) - - df = tm.makeTimeDataFrame().reindex(columns=['A']) - tm.assert_series_equal(np.squeeze(df), df['A']) - - def test_transpose(self): - msg = (r"transpose\(\) got multiple values for " - r"keyword argument 'axes'") - for s in [tm.makeFloatSeries(), tm.makeStringSeries(), - tm.makeObjectSeries()]: - # calls implementation in pandas/core/base.py - tm.assert_series_equal(s.transpose(), s) - for df in [tm.makeTimeDataFrame()]: - tm.assert_frame_equal(df.transpose().transpose(), df) - - with catch_warnings(record=True): - for p in [tm.makePanel()]: - tm.assert_panel_equal(p.transpose(2, 0, 1) - .transpose(1, 2, 0), p) - tm.assertRaisesRegexp(TypeError, msg, p.transpose, - 2, 0, 1, axes=(2, 0, 1)) - - with catch_warnings(record=True): - for p4d in [tm.makePanel4D()]: - tm.assert_panel4d_equal(p4d.transpose(2, 0, 3, 1) - .transpose(1, 3, 0, 2), p4d) - tm.assertRaisesRegexp(TypeError, msg, p4d.transpose, - 2, 0, 3, 1, axes=(2, 0, 3, 1)) - - def test_numpy_transpose(self): - msg = "the 'axes' parameter is not supported" - - s = tm.makeFloatSeries() - tm.assert_series_equal( - np.transpose(s), s) - tm.assertRaisesRegexp(ValueError, msg, - np.transpose, s, axes=1) - - df = tm.makeTimeDataFrame() - tm.assert_frame_equal(np.transpose( - np.transpose(df)), df) - tm.assertRaisesRegexp(ValueError, msg, - np.transpose, df, axes=1) - - with catch_warnings(record=True): - p = tm.makePanel() - tm.assert_panel_equal(np.transpose( - np.transpose(p, axes=(2, 0, 1)), - axes=(1, 2, 0)), p) - - with catch_warnings(record=True): - p4d = tm.makePanel4D() - tm.assert_panel4d_equal(np.transpose( - np.transpose(p4d, axes=(2, 0, 3, 1)), - axes=(1, 3, 0, 2)), p4d) - - def test_take(self): - indices = [1, 5, -2, 6, 3, -1] - for s in [tm.makeFloatSeries(), tm.makeStringSeries(), - tm.makeObjectSeries()]: - out = s.take(indices) - expected = Series(data=s.values.take(indices), - index=s.index.take(indices), dtype=s.dtype) - tm.assert_series_equal(out, expected) - for df in [tm.makeTimeDataFrame()]: - out = df.take(indices) - expected = DataFrame(data=df.values.take(indices, axis=0), - index=df.index.take(indices), - columns=df.columns) - tm.assert_frame_equal(out, expected) - - indices = [-3, 2, 0, 1] - with catch_warnings(record=True): - for p in [tm.makePanel()]: - out = p.take(indices) - expected = Panel(data=p.values.take(indices, axis=0), - items=p.items.take(indices), - major_axis=p.major_axis, - minor_axis=p.minor_axis) - tm.assert_panel_equal(out, expected) - - with catch_warnings(record=True): - for p4d in [tm.makePanel4D()]: - out = p4d.take(indices) - expected = Panel4D(data=p4d.values.take(indices, axis=0), - labels=p4d.labels.take(indices), - major_axis=p4d.major_axis, - minor_axis=p4d.minor_axis, - items=p4d.items) - tm.assert_panel4d_equal(out, expected) - - def test_take_invalid_kwargs(self): - indices = [-3, 2, 0, 1] - s = tm.makeFloatSeries() - df = tm.makeTimeDataFrame() - - with catch_warnings(record=True): - p = tm.makePanel() - p4d = tm.makePanel4D() - - for obj in (s, df, p, p4d): - msg = r"take\(\) got an unexpected keyword argument 'foo'" - tm.assertRaisesRegexp(TypeError, msg, obj.take, - indices, foo=2) - - msg = "the 'out' parameter is not supported" - tm.assertRaisesRegexp(ValueError, msg, obj.take, - indices, out=indices) - - msg = "the 'mode' parameter is not supported" - tm.assertRaisesRegexp(ValueError, msg, obj.take, - indices, mode='clip') - - def test_equals(self): - s1 = pd.Series([1, 2, 3], index=[0, 2, 1]) - s2 = s1.copy() - self.assertTrue(s1.equals(s2)) - - s1[1] = 99 - self.assertFalse(s1.equals(s2)) - - # NaNs compare as equal - s1 = pd.Series([1, np.nan, 3, np.nan], index=[0, 2, 1, 3]) - s2 = s1.copy() - self.assertTrue(s1.equals(s2)) - - s2[0] = 9.9 - self.assertFalse(s1.equals(s2)) - - idx = MultiIndex.from_tuples([(0, 'a'), (1, 'b'), (2, 'c')]) - s1 = Series([1, 2, np.nan], index=idx) - s2 = s1.copy() - self.assertTrue(s1.equals(s2)) - - # Add object dtype column with nans - index = np.random.random(10) - df1 = DataFrame( - np.random.random(10, ), index=index, columns=['floats']) - df1['text'] = 'the sky is so blue. we could use more chocolate.'.split( - ) - df1['start'] = date_range('2000-1-1', periods=10, freq='T') - df1['end'] = date_range('2000-1-1', periods=10, freq='D') - df1['diff'] = df1['end'] - df1['start'] - df1['bool'] = (np.arange(10) % 3 == 0) - df1.loc[::2] = nan - df2 = df1.copy() - self.assertTrue(df1['text'].equals(df2['text'])) - self.assertTrue(df1['start'].equals(df2['start'])) - self.assertTrue(df1['end'].equals(df2['end'])) - self.assertTrue(df1['diff'].equals(df2['diff'])) - self.assertTrue(df1['bool'].equals(df2['bool'])) - self.assertTrue(df1.equals(df2)) - self.assertFalse(df1.equals(object)) - - # different dtype - different = df1.copy() - different['floats'] = different['floats'].astype('float32') - self.assertFalse(df1.equals(different)) - - # different index - different_index = -index - different = df2.set_index(different_index) - self.assertFalse(df1.equals(different)) - - # different columns - different = df2.copy() - different.columns = df2.columns[::-1] - self.assertFalse(df1.equals(different)) - - # DatetimeIndex - index = pd.date_range('2000-1-1', periods=10, freq='T') - df1 = df1.set_index(index) - df2 = df1.copy() - self.assertTrue(df1.equals(df2)) - - # MultiIndex - df3 = df1.set_index(['text'], append=True) - df2 = df1.set_index(['text'], append=True) - self.assertTrue(df3.equals(df2)) - - df2 = df1.set_index(['floats'], append=True) - self.assertFalse(df3.equals(df2)) - - # NaN in index - df3 = df1.set_index(['floats'], append=True) - df2 = df1.set_index(['floats'], append=True) - self.assertTrue(df3.equals(df2)) - - # GH 8437 - a = pd.Series([False, np.nan]) - b = pd.Series([False, np.nan]) - c = pd.Series(index=range(2)) - d = pd.Series(index=range(2)) - e = pd.Series(index=range(2)) - f = pd.Series(index=range(2)) - c[:-1] = d[:-1] = e[0] = f[0] = False - self.assertTrue(a.equals(a)) - self.assertTrue(a.equals(b)) - self.assertTrue(a.equals(c)) - self.assertTrue(a.equals(d)) - self.assertFalse(a.equals(e)) - self.assertTrue(e.equals(f)) - - def test_describe_raises(self): - with catch_warnings(record=True): - with tm.assertRaises(NotImplementedError): - tm.makePanel().describe() - - def test_pipe(self): - df = DataFrame({'A': [1, 2, 3]}) - f = lambda x, y: x ** y - result = df.pipe(f, 2) - expected = DataFrame({'A': [1, 4, 9]}) - self.assert_frame_equal(result, expected) - - result = df.A.pipe(f, 2) - self.assert_series_equal(result, expected.A) - - def test_pipe_tuple(self): - df = DataFrame({'A': [1, 2, 3]}) - f = lambda x, y: y - result = df.pipe((f, 'y'), 0) - self.assert_frame_equal(result, df) - - result = df.A.pipe((f, 'y'), 0) - self.assert_series_equal(result, df.A) - - def test_pipe_tuple_error(self): - df = DataFrame({"A": [1, 2, 3]}) - f = lambda x, y: y - with tm.assertRaises(ValueError): - df.pipe((f, 'y'), x=1, y=0) - - with tm.assertRaises(ValueError): - df.A.pipe((f, 'y'), x=1, y=0) - - def test_pipe_panel(self): - with catch_warnings(record=True): - wp = Panel({'r1': DataFrame({"A": [1, 2, 3]})}) - f = lambda x, y: x + y - result = wp.pipe(f, 2) - expected = wp + 2 - assert_panel_equal(result, expected) - - result = wp.pipe((f, 'y'), x=1) - expected = wp + 1 - assert_panel_equal(result, expected) - - with tm.assertRaises(ValueError): - result = wp.pipe((f, 'y'), x=1, y=1) diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index 55e0e512169fb..69a844e2e64e4 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -2584,7 +2584,7 @@ def test_truncate(self): wp.major_axis[2]) def test_axis_dummies(self): - from pandas.core.reshape import make_axis_dummies + from pandas.core.reshape.reshape import make_axis_dummies minor_dummies = make_axis_dummies(self.panel, 'minor').astype(np.uint8) self.assertEqual(len(minor_dummies.columns), @@ -2604,7 +2604,7 @@ def test_axis_dummies(self): # TODO: test correctness def test_get_dummies(self): - from pandas.core.reshape import get_dummies, make_axis_dummies + from pandas.core.reshape.reshape import get_dummies, make_axis_dummies self.panel['Label'] = self.panel.index.labels[1] minor_dummies = make_axis_dummies(self.panel, 'minor').astype(np.uint8) @@ -2655,7 +2655,7 @@ def test_join(self): def test_pivot(self): with catch_warnings(record=True): - from pandas.core.reshape import _slow_pivot + from pandas.core.reshape.reshape import _slow_pivot one, two, three = (np.array([1, 2, 3, 4, 5]), np.array(['a', 'b', 'c', 'd', 'e']), diff --git a/pandas/tests/test_util.py b/pandas/tests/test_util.py index 2793cc14df19a..1fa436df0910d 100644 --- a/pandas/tests/test_util.py +++ b/pandas/tests/test_util.py @@ -1,8 +1,12 @@ # -*- coding: utf-8 -*- -from collections import OrderedDict +import os +import locale +import codecs import sys -import unittest from uuid import uuid4 +from collections import OrderedDict + +import pytest from pandas.util._move import move_into_mutable_buffer, BadMove, stolenbuf from pandas.util.decorators import deprecate_kwarg from pandas.util.validators import (validate_args, validate_kwargs, @@ -11,6 +15,9 @@ import pandas.util.testing as tm +CURRENT_LOCALE = locale.getlocale() +LOCALE_OVERRIDE = os.environ.get('LOCALE_OVERRIDE', None) + class TestDecorators(tm.TestCase): @@ -352,9 +359,9 @@ def test_exactly_one_ref(self): # materialize as bytearray to show that it is mutable self.assertEqual(bytearray(as_stolen_buf), b'test') - @unittest.skipIf( + @pytest.mark.skipif( sys.version_info[0] > 2, - 'bytes objects cannot be interned in py3', + reason='bytes objects cannot be interned in py3', ) def test_interned(self): salt = uuid4().hex @@ -401,3 +408,66 @@ def test_numpy_errstate_is_default(): from pandas.compat import numpy # noqa # The errstate should be unchanged after that import. assert np.geterr() == expected + + +class TestLocaleUtils(tm.TestCase): + + @classmethod + def setUpClass(cls): + super(TestLocaleUtils, cls).setUpClass() + cls.locales = tm.get_locales() + + if not cls.locales: + pytest.skip("No locales found") + + tm._skip_if_windows() + + @classmethod + def tearDownClass(cls): + super(TestLocaleUtils, cls).tearDownClass() + del cls.locales + + def test_get_locales(self): + # all systems should have at least a single locale + assert len(tm.get_locales()) > 0 + + def test_get_locales_prefix(self): + if len(self.locales) == 1: + pytest.skip("Only a single locale found, no point in " + "trying to test filtering locale prefixes") + first_locale = self.locales[0] + assert len(tm.get_locales(prefix=first_locale[:2])) > 0 + + def test_set_locale(self): + if len(self.locales) == 1: + pytest.skip("Only a single locale found, no point in " + "trying to test setting another locale") + + if all(x is None for x in CURRENT_LOCALE): + # Not sure why, but on some travis runs with pytest, + # getlocale() returned (None, None). + pytest.skip("CURRENT_LOCALE is not set.") + + if LOCALE_OVERRIDE is None: + lang, enc = 'it_CH', 'UTF-8' + elif LOCALE_OVERRIDE == 'C': + lang, enc = 'en_US', 'ascii' + else: + lang, enc = LOCALE_OVERRIDE.split('.') + + enc = codecs.lookup(enc).name + new_locale = lang, enc + + if not tm._can_set_locale(new_locale): + with tm.assertRaises(locale.Error): + with tm.set_locale(new_locale): + pass + else: + with tm.set_locale(new_locale) as normalized_locale: + new_lang, new_enc = normalized_locale.split('.') + new_enc = codecs.lookup(enc).name + normalized_locale = new_lang, new_enc + self.assertEqual(normalized_locale, new_locale) + + current_locale = locale.getlocale() + self.assertEqual(current_locale, CURRENT_LOCALE) diff --git a/pandas/tests/tools/test_util.py b/pandas/tests/tools/test_util.py deleted file mode 100644 index 3ac7d8b32516e..0000000000000 --- a/pandas/tests/tools/test_util.py +++ /dev/null @@ -1,485 +0,0 @@ -import os -import locale -import codecs -import pytest -import decimal - -import numpy as np -from numpy import iinfo - -import pandas as pd -from pandas import (date_range, Index, _np_version_under1p9) -import pandas.util.testing as tm -from pandas.tools.util import cartesian_product, to_numeric - -CURRENT_LOCALE = locale.getlocale() -LOCALE_OVERRIDE = os.environ.get('LOCALE_OVERRIDE', None) - - -class TestCartesianProduct(tm.TestCase): - - def test_simple(self): - x, y = list('ABC'), [1, 22] - result1, result2 = cartesian_product([x, y]) - expected1 = np.array(['A', 'A', 'B', 'B', 'C', 'C']) - expected2 = np.array([1, 22, 1, 22, 1, 22]) - tm.assert_numpy_array_equal(result1, expected1) - tm.assert_numpy_array_equal(result2, expected2) - - def test_datetimeindex(self): - # regression test for GitHub issue #6439 - # make sure that the ordering on datetimeindex is consistent - x = date_range('2000-01-01', periods=2) - result1, result2 = [Index(y).day for y in cartesian_product([x, x])] - expected1 = Index([1, 1, 2, 2]) - expected2 = Index([1, 2, 1, 2]) - tm.assert_index_equal(result1, expected1) - tm.assert_index_equal(result2, expected2) - - def test_empty(self): - # product of empty factors - X = [[], [0, 1], []] - Y = [[], [], ['a', 'b', 'c']] - for x, y in zip(X, Y): - expected1 = np.array([], dtype=np.asarray(x).dtype) - expected2 = np.array([], dtype=np.asarray(y).dtype) - result1, result2 = cartesian_product([x, y]) - tm.assert_numpy_array_equal(result1, expected1) - tm.assert_numpy_array_equal(result2, expected2) - - # empty product (empty input): - result = cartesian_product([]) - expected = [] - assert result == expected - - def test_invalid_input(self): - invalid_inputs = [1, [1], [1, 2], [[1], 2], - 'a', ['a'], ['a', 'b'], [['a'], 'b']] - msg = "Input must be a list-like of list-likes" - for X in invalid_inputs: - tm.assertRaisesRegexp(TypeError, msg, cartesian_product, X=X) - - -class TestLocaleUtils(tm.TestCase): - - @classmethod - def setUpClass(cls): - super(TestLocaleUtils, cls).setUpClass() - cls.locales = tm.get_locales() - - if not cls.locales: - pytest.skip("No locales found") - - tm._skip_if_windows() - - @classmethod - def tearDownClass(cls): - super(TestLocaleUtils, cls).tearDownClass() - del cls.locales - - def test_get_locales(self): - # all systems should have at least a single locale - assert len(tm.get_locales()) > 0 - - def test_get_locales_prefix(self): - if len(self.locales) == 1: - pytest.skip("Only a single locale found, no point in " - "trying to test filtering locale prefixes") - first_locale = self.locales[0] - assert len(tm.get_locales(prefix=first_locale[:2])) > 0 - - def test_set_locale(self): - if len(self.locales) == 1: - pytest.skip("Only a single locale found, no point in " - "trying to test setting another locale") - - if all(x is None for x in CURRENT_LOCALE): - # Not sure why, but on some travis runs with pytest, - # getlocale() returned (None, None). - pytest.skip("CURRENT_LOCALE is not set.") - - if LOCALE_OVERRIDE is None: - lang, enc = 'it_CH', 'UTF-8' - elif LOCALE_OVERRIDE == 'C': - lang, enc = 'en_US', 'ascii' - else: - lang, enc = LOCALE_OVERRIDE.split('.') - - enc = codecs.lookup(enc).name - new_locale = lang, enc - - if not tm._can_set_locale(new_locale): - with tm.assertRaises(locale.Error): - with tm.set_locale(new_locale): - pass - else: - with tm.set_locale(new_locale) as normalized_locale: - new_lang, new_enc = normalized_locale.split('.') - new_enc = codecs.lookup(enc).name - normalized_locale = new_lang, new_enc - self.assertEqual(normalized_locale, new_locale) - - current_locale = locale.getlocale() - self.assertEqual(current_locale, CURRENT_LOCALE) - - -class TestToNumeric(tm.TestCase): - - def test_series(self): - s = pd.Series(['1', '-3.14', '7']) - res = to_numeric(s) - expected = pd.Series([1, -3.14, 7]) - tm.assert_series_equal(res, expected) - - s = pd.Series(['1', '-3.14', 7]) - res = to_numeric(s) - tm.assert_series_equal(res, expected) - - def test_series_numeric(self): - s = pd.Series([1, 3, 4, 5], index=list('ABCD'), name='XXX') - res = to_numeric(s) - tm.assert_series_equal(res, s) - - s = pd.Series([1., 3., 4., 5.], index=list('ABCD'), name='XXX') - res = to_numeric(s) - tm.assert_series_equal(res, s) - - # bool is regarded as numeric - s = pd.Series([True, False, True, True], - index=list('ABCD'), name='XXX') - res = to_numeric(s) - tm.assert_series_equal(res, s) - - def test_error(self): - s = pd.Series([1, -3.14, 'apple']) - msg = 'Unable to parse string "apple" at position 2' - with tm.assertRaisesRegexp(ValueError, msg): - to_numeric(s, errors='raise') - - res = to_numeric(s, errors='ignore') - expected = pd.Series([1, -3.14, 'apple']) - tm.assert_series_equal(res, expected) - - res = to_numeric(s, errors='coerce') - expected = pd.Series([1, -3.14, np.nan]) - tm.assert_series_equal(res, expected) - - s = pd.Series(['orange', 1, -3.14, 'apple']) - msg = 'Unable to parse string "orange" at position 0' - with tm.assertRaisesRegexp(ValueError, msg): - to_numeric(s, errors='raise') - - def test_error_seen_bool(self): - s = pd.Series([True, False, 'apple']) - msg = 'Unable to parse string "apple" at position 2' - with tm.assertRaisesRegexp(ValueError, msg): - to_numeric(s, errors='raise') - - res = to_numeric(s, errors='ignore') - expected = pd.Series([True, False, 'apple']) - tm.assert_series_equal(res, expected) - - # coerces to float - res = to_numeric(s, errors='coerce') - expected = pd.Series([1., 0., np.nan]) - tm.assert_series_equal(res, expected) - - def test_list(self): - s = ['1', '-3.14', '7'] - res = to_numeric(s) - expected = np.array([1, -3.14, 7]) - tm.assert_numpy_array_equal(res, expected) - - def test_list_numeric(self): - s = [1, 3, 4, 5] - res = to_numeric(s) - tm.assert_numpy_array_equal(res, np.array(s, dtype=np.int64)) - - s = [1., 3., 4., 5.] - res = to_numeric(s) - tm.assert_numpy_array_equal(res, np.array(s)) - - # bool is regarded as numeric - s = [True, False, True, True] - res = to_numeric(s) - tm.assert_numpy_array_equal(res, np.array(s)) - - def test_numeric(self): - s = pd.Series([1, -3.14, 7], dtype='O') - res = to_numeric(s) - expected = pd.Series([1, -3.14, 7]) - tm.assert_series_equal(res, expected) - - s = pd.Series([1, -3.14, 7]) - res = to_numeric(s) - tm.assert_series_equal(res, expected) - - # GH 14827 - df = pd.DataFrame(dict( - a=[1.2, decimal.Decimal(3.14), decimal.Decimal("infinity"), '0.1'], - b=[1.0, 2.0, 3.0, 4.0], - )) - expected = pd.DataFrame(dict( - a=[1.2, 3.14, np.inf, 0.1], - b=[1.0, 2.0, 3.0, 4.0], - )) - - # Test to_numeric over one column - df_copy = df.copy() - df_copy['a'] = df_copy['a'].apply(to_numeric) - tm.assert_frame_equal(df_copy, expected) - - # Test to_numeric over multiple columns - df_copy = df.copy() - df_copy[['a', 'b']] = df_copy[['a', 'b']].apply(to_numeric) - tm.assert_frame_equal(df_copy, expected) - - def test_numeric_lists_and_arrays(self): - # Test to_numeric with embedded lists and arrays - df = pd.DataFrame(dict( - a=[[decimal.Decimal(3.14), 1.0], decimal.Decimal(1.6), 0.1] - )) - df['a'] = df['a'].apply(to_numeric) - expected = pd.DataFrame(dict( - a=[[3.14, 1.0], 1.6, 0.1], - )) - tm.assert_frame_equal(df, expected) - - df = pd.DataFrame(dict( - a=[np.array([decimal.Decimal(3.14), 1.0]), 0.1] - )) - df['a'] = df['a'].apply(to_numeric) - expected = pd.DataFrame(dict( - a=[[3.14, 1.0], 0.1], - )) - tm.assert_frame_equal(df, expected) - - def test_all_nan(self): - s = pd.Series(['a', 'b', 'c']) - res = to_numeric(s, errors='coerce') - expected = pd.Series([np.nan, np.nan, np.nan]) - tm.assert_series_equal(res, expected) - - def test_type_check(self): - # GH 11776 - df = pd.DataFrame({'a': [1, -3.14, 7], 'b': ['4', '5', '6']}) - with tm.assertRaisesRegexp(TypeError, "1-d array"): - to_numeric(df) - for errors in ['ignore', 'raise', 'coerce']: - with tm.assertRaisesRegexp(TypeError, "1-d array"): - to_numeric(df, errors=errors) - - def test_scalar(self): - self.assertEqual(pd.to_numeric(1), 1) - self.assertEqual(pd.to_numeric(1.1), 1.1) - - self.assertEqual(pd.to_numeric('1'), 1) - self.assertEqual(pd.to_numeric('1.1'), 1.1) - - with tm.assertRaises(ValueError): - to_numeric('XX', errors='raise') - - self.assertEqual(to_numeric('XX', errors='ignore'), 'XX') - self.assertTrue(np.isnan(to_numeric('XX', errors='coerce'))) - - def test_numeric_dtypes(self): - idx = pd.Index([1, 2, 3], name='xxx') - res = pd.to_numeric(idx) - tm.assert_index_equal(res, idx) - - res = pd.to_numeric(pd.Series(idx, name='xxx')) - tm.assert_series_equal(res, pd.Series(idx, name='xxx')) - - res = pd.to_numeric(idx.values) - tm.assert_numpy_array_equal(res, idx.values) - - idx = pd.Index([1., np.nan, 3., np.nan], name='xxx') - res = pd.to_numeric(idx) - tm.assert_index_equal(res, idx) - - res = pd.to_numeric(pd.Series(idx, name='xxx')) - tm.assert_series_equal(res, pd.Series(idx, name='xxx')) - - res = pd.to_numeric(idx.values) - tm.assert_numpy_array_equal(res, idx.values) - - def test_str(self): - idx = pd.Index(['1', '2', '3'], name='xxx') - exp = np.array([1, 2, 3], dtype='int64') - res = pd.to_numeric(idx) - tm.assert_index_equal(res, pd.Index(exp, name='xxx')) - - res = pd.to_numeric(pd.Series(idx, name='xxx')) - tm.assert_series_equal(res, pd.Series(exp, name='xxx')) - - res = pd.to_numeric(idx.values) - tm.assert_numpy_array_equal(res, exp) - - idx = pd.Index(['1.5', '2.7', '3.4'], name='xxx') - exp = np.array([1.5, 2.7, 3.4]) - res = pd.to_numeric(idx) - tm.assert_index_equal(res, pd.Index(exp, name='xxx')) - - res = pd.to_numeric(pd.Series(idx, name='xxx')) - tm.assert_series_equal(res, pd.Series(exp, name='xxx')) - - res = pd.to_numeric(idx.values) - tm.assert_numpy_array_equal(res, exp) - - def test_datetimelike(self): - for tz in [None, 'US/Eastern', 'Asia/Tokyo']: - idx = pd.date_range('20130101', periods=3, tz=tz, name='xxx') - res = pd.to_numeric(idx) - tm.assert_index_equal(res, pd.Index(idx.asi8, name='xxx')) - - res = pd.to_numeric(pd.Series(idx, name='xxx')) - tm.assert_series_equal(res, pd.Series(idx.asi8, name='xxx')) - - res = pd.to_numeric(idx.values) - tm.assert_numpy_array_equal(res, idx.asi8) - - def test_timedelta(self): - idx = pd.timedelta_range('1 days', periods=3, freq='D', name='xxx') - res = pd.to_numeric(idx) - tm.assert_index_equal(res, pd.Index(idx.asi8, name='xxx')) - - res = pd.to_numeric(pd.Series(idx, name='xxx')) - tm.assert_series_equal(res, pd.Series(idx.asi8, name='xxx')) - - res = pd.to_numeric(idx.values) - tm.assert_numpy_array_equal(res, idx.asi8) - - def test_period(self): - idx = pd.period_range('2011-01', periods=3, freq='M', name='xxx') - res = pd.to_numeric(idx) - tm.assert_index_equal(res, pd.Index(idx.asi8, name='xxx')) - - # ToDo: enable when we can support native PeriodDtype - # res = pd.to_numeric(pd.Series(idx, name='xxx')) - # tm.assert_series_equal(res, pd.Series(idx.asi8, name='xxx')) - - def test_non_hashable(self): - # Test for Bug #13324 - s = pd.Series([[10.0, 2], 1.0, 'apple']) - res = pd.to_numeric(s, errors='coerce') - tm.assert_series_equal(res, pd.Series([np.nan, 1.0, np.nan])) - - res = pd.to_numeric(s, errors='ignore') - tm.assert_series_equal(res, pd.Series([[10.0, 2], 1.0, 'apple'])) - - with self.assertRaisesRegexp(TypeError, "Invalid object type"): - pd.to_numeric(s) - - def test_downcast(self): - # see gh-13352 - mixed_data = ['1', 2, 3] - int_data = [1, 2, 3] - date_data = np.array(['1970-01-02', '1970-01-03', - '1970-01-04'], dtype='datetime64[D]') - - invalid_downcast = 'unsigned-integer' - msg = 'invalid downcasting method provided' - - smallest_int_dtype = np.dtype(np.typecodes['Integer'][0]) - smallest_uint_dtype = np.dtype(np.typecodes['UnsignedInteger'][0]) - - # support below np.float32 is rare and far between - float_32_char = np.dtype(np.float32).char - smallest_float_dtype = float_32_char - - for data in (mixed_data, int_data, date_data): - with self.assertRaisesRegexp(ValueError, msg): - pd.to_numeric(data, downcast=invalid_downcast) - - expected = np.array([1, 2, 3], dtype=np.int64) - - res = pd.to_numeric(data) - tm.assert_numpy_array_equal(res, expected) - - res = pd.to_numeric(data, downcast=None) - tm.assert_numpy_array_equal(res, expected) - - expected = np.array([1, 2, 3], dtype=smallest_int_dtype) - - for signed_downcast in ('integer', 'signed'): - res = pd.to_numeric(data, downcast=signed_downcast) - tm.assert_numpy_array_equal(res, expected) - - expected = np.array([1, 2, 3], dtype=smallest_uint_dtype) - res = pd.to_numeric(data, downcast='unsigned') - tm.assert_numpy_array_equal(res, expected) - - expected = np.array([1, 2, 3], dtype=smallest_float_dtype) - res = pd.to_numeric(data, downcast='float') - tm.assert_numpy_array_equal(res, expected) - - # if we can't successfully cast the given - # data to a numeric dtype, do not bother - # with the downcast parameter - data = ['foo', 2, 3] - expected = np.array(data, dtype=object) - res = pd.to_numeric(data, errors='ignore', - downcast='unsigned') - tm.assert_numpy_array_equal(res, expected) - - # cannot cast to an unsigned integer because - # we have a negative number - data = ['-1', 2, 3] - expected = np.array([-1, 2, 3], dtype=np.int64) - res = pd.to_numeric(data, downcast='unsigned') - tm.assert_numpy_array_equal(res, expected) - - # cannot cast to an integer (signed or unsigned) - # because we have a float number - data = (['1.1', 2, 3], - [10000.0, 20000, 3000, 40000.36, 50000, 50000.00]) - expected = (np.array([1.1, 2, 3], dtype=np.float64), - np.array([10000.0, 20000, 3000, - 40000.36, 50000, 50000.00], dtype=np.float64)) - - for _data, _expected in zip(data, expected): - for downcast in ('integer', 'signed', 'unsigned'): - res = pd.to_numeric(_data, downcast=downcast) - tm.assert_numpy_array_equal(res, _expected) - - # the smallest integer dtype need not be np.(u)int8 - data = ['256', 257, 258] - - for downcast, expected_dtype in zip( - ['integer', 'signed', 'unsigned'], - [np.int16, np.int16, np.uint16]): - expected = np.array([256, 257, 258], dtype=expected_dtype) - res = pd.to_numeric(data, downcast=downcast) - tm.assert_numpy_array_equal(res, expected) - - def test_downcast_limits(self): - # Test the limits of each downcast. Bug: #14401. - # Check to make sure numpy is new enough to run this test. - if _np_version_under1p9: - pytest.skip("Numpy version is under 1.9") - - i = 'integer' - u = 'unsigned' - dtype_downcast_min_max = [ - ('int8', i, [iinfo(np.int8).min, iinfo(np.int8).max]), - ('int16', i, [iinfo(np.int16).min, iinfo(np.int16).max]), - ('int32', i, [iinfo(np.int32).min, iinfo(np.int32).max]), - ('int64', i, [iinfo(np.int64).min, iinfo(np.int64).max]), - ('uint8', u, [iinfo(np.uint8).min, iinfo(np.uint8).max]), - ('uint16', u, [iinfo(np.uint16).min, iinfo(np.uint16).max]), - ('uint32', u, [iinfo(np.uint32).min, iinfo(np.uint32).max]), - ('uint64', u, [iinfo(np.uint64).min, iinfo(np.uint64).max]), - ('int16', i, [iinfo(np.int8).min, iinfo(np.int8).max + 1]), - ('int32', i, [iinfo(np.int16).min, iinfo(np.int16).max + 1]), - ('int64', i, [iinfo(np.int32).min, iinfo(np.int32).max + 1]), - ('int16', i, [iinfo(np.int8).min - 1, iinfo(np.int16).max]), - ('int32', i, [iinfo(np.int16).min - 1, iinfo(np.int32).max]), - ('int64', i, [iinfo(np.int32).min - 1, iinfo(np.int64).max]), - ('uint16', u, [iinfo(np.uint8).min, iinfo(np.uint8).max + 1]), - ('uint32', u, [iinfo(np.uint16).min, iinfo(np.uint16).max + 1]), - ('uint64', u, [iinfo(np.uint32).min, iinfo(np.uint32).max + 1]) - ] - - for dtype, downcast, min_max in dtype_downcast_min_max: - series = pd.to_numeric(pd.Series(min_max), downcast=downcast) - assert series.dtype == dtype diff --git a/pandas/tools/merge.py b/pandas/tools/merge.py index 53208fbdd5529..cd58aa2c7f923 100644 --- a/pandas/tools/merge.py +++ b/pandas/tools/merge.py @@ -1,46 +1,4 @@ -""" -SQL-style merge routines -""" - -import copy import warnings -import string - -import numpy as np -from pandas.compat import range, lzip, zip, map, filter -import pandas.compat as compat - -import pandas as pd -from pandas import (Categorical, Series, DataFrame, - Index, MultiIndex, Timedelta) -from pandas.core.frame import _merge_doc -from pandas.core.dtypes.common import ( - is_datetime64tz_dtype, - is_datetime64_dtype, - needs_i8_conversion, - is_int64_dtype, - is_categorical_dtype, - is_integer_dtype, - is_float_dtype, - is_numeric_dtype, - is_integer, - is_int_or_datetime_dtype, - is_dtype_equal, - is_bool, - is_list_like, - _ensure_int64, - _ensure_float64, - _ensure_object, - _get_dtype) -from pandas.core.dtypes.missing import na_value_for_dtype -from pandas.core.internals import (items_overlap_with_suffix, - concatenate_block_managers) -from pandas.util.decorators import Appender, Substitution - -from pandas.core.sorting import is_int64_overflow_possible -import pandas.core.algorithms as algos -import pandas.core.common as com -from pandas._libs import hashtable as libhashtable, join as libjoin, lib # back-compat of pseudo-public API @@ -51,1447 +9,9 @@ def wrapper(*args, **kwargs): "import from the public API: " "pandas.concat instead", FutureWarning, stacklevel=3) + import pandas as pd return pd.concat(*args, **kwargs) return wrapper concat = concat_wrap() - - -@Substitution('\nleft : DataFrame') -@Appender(_merge_doc, indents=0) -def merge(left, right, how='inner', on=None, left_on=None, right_on=None, - left_index=False, right_index=False, sort=False, - suffixes=('_x', '_y'), copy=True, indicator=False): - op = _MergeOperation(left, right, how=how, on=on, left_on=left_on, - right_on=right_on, left_index=left_index, - right_index=right_index, sort=sort, suffixes=suffixes, - copy=copy, indicator=indicator) - return op.get_result() - - -if __debug__: - merge.__doc__ = _merge_doc % '\nleft : DataFrame' - - -class MergeError(ValueError): - pass - - -def _groupby_and_merge(by, on, left, right, _merge_pieces, - check_duplicates=True): - """ - groupby & merge; we are always performing a left-by type operation - - Parameters - ---------- - by: field to group - on: duplicates field - left: left frame - right: right frame - _merge_pieces: function for merging - check_duplicates: boolean, default True - should we check & clean duplicates - """ - - pieces = [] - if not isinstance(by, (list, tuple)): - by = [by] - - lby = left.groupby(by, sort=False) - - # if we can groupby the rhs - # then we can get vastly better perf - try: - - # we will check & remove duplicates if indicated - if check_duplicates: - if on is None: - on = [] - elif not isinstance(on, (list, tuple)): - on = [on] - - if right.duplicated(by + on).any(): - right = right.drop_duplicates(by + on, keep='last') - rby = right.groupby(by, sort=False) - except KeyError: - rby = None - - for key, lhs in lby: - - if rby is None: - rhs = right - else: - try: - rhs = right.take(rby.indices[key]) - except KeyError: - # key doesn't exist in left - lcols = lhs.columns.tolist() - cols = lcols + [r for r in right.columns - if r not in set(lcols)] - merged = lhs.reindex(columns=cols) - merged.index = range(len(merged)) - pieces.append(merged) - continue - - merged = _merge_pieces(lhs, rhs) - - # make sure join keys are in the merged - # TODO, should _merge_pieces do this? - for k in by: - try: - if k in merged: - merged[k] = key - except: - pass - - pieces.append(merged) - - # preserve the original order - # if we have a missing piece this can be reset - from pandas.tools.concat import concat - result = concat(pieces, ignore_index=True) - result = result.reindex(columns=pieces[0].columns, copy=False) - return result, lby - - -def ordered_merge(left, right, on=None, - left_on=None, right_on=None, - left_by=None, right_by=None, - fill_method=None, suffixes=('_x', '_y')): - - warnings.warn("ordered_merge is deprecated and replaced by merge_ordered", - FutureWarning, stacklevel=2) - return merge_ordered(left, right, on=on, - left_on=left_on, right_on=right_on, - left_by=left_by, right_by=right_by, - fill_method=fill_method, suffixes=suffixes) - - -def merge_ordered(left, right, on=None, - left_on=None, right_on=None, - left_by=None, right_by=None, - fill_method=None, suffixes=('_x', '_y'), - how='outer'): - """Perform merge with optional filling/interpolation designed for ordered - data like time series data. Optionally perform group-wise merge (see - examples) - - Parameters - ---------- - left : DataFrame - right : DataFrame - on : label or list - Field names to join on. Must be found in both DataFrames. - left_on : label or list, or array-like - Field names to join on in left DataFrame. Can be a vector or list of - vectors of the length of the DataFrame to use a particular vector as - the join key instead of columns - right_on : label or list, or array-like - Field names to join on in right DataFrame or vector/list of vectors per - left_on docs - left_by : column name or list of column names - Group left DataFrame by group columns and merge piece by piece with - right DataFrame - right_by : column name or list of column names - Group right DataFrame by group columns and merge piece by piece with - left DataFrame - fill_method : {'ffill', None}, default None - Interpolation method for data - suffixes : 2-length sequence (tuple, list, ...) - Suffix to apply to overlapping column names in the left and right - side, respectively - how : {'left', 'right', 'outer', 'inner'}, default 'outer' - * left: use only keys from left frame (SQL: left outer join) - * right: use only keys from right frame (SQL: right outer join) - * outer: use union of keys from both frames (SQL: full outer join) - * inner: use intersection of keys from both frames (SQL: inner join) - - .. versionadded:: 0.19.0 - - Examples - -------- - >>> A >>> B - key lvalue group key rvalue - 0 a 1 a 0 b 1 - 1 c 2 a 1 c 2 - 2 e 3 a 2 d 3 - 3 a 1 b - 4 c 2 b - 5 e 3 b - - >>> ordered_merge(A, B, fill_method='ffill', left_by='group') - key lvalue group rvalue - 0 a 1 a NaN - 1 b 1 a 1 - 2 c 2 a 2 - 3 d 2 a 3 - 4 e 3 a 3 - 5 f 3 a 4 - 6 a 1 b NaN - 7 b 1 b 1 - 8 c 2 b 2 - 9 d 2 b 3 - 10 e 3 b 3 - 11 f 3 b 4 - - Returns - ------- - merged : DataFrame - The output type will the be same as 'left', if it is a subclass - of DataFrame. - - See also - -------- - merge - merge_asof - - """ - def _merger(x, y): - # perform the ordered merge operation - op = _OrderedMerge(x, y, on=on, left_on=left_on, right_on=right_on, - suffixes=suffixes, fill_method=fill_method, - how=how) - return op.get_result() - - if left_by is not None and right_by is not None: - raise ValueError('Can only group either left or right frames') - elif left_by is not None: - result, _ = _groupby_and_merge(left_by, on, left, right, - lambda x, y: _merger(x, y), - check_duplicates=False) - elif right_by is not None: - result, _ = _groupby_and_merge(right_by, on, right, left, - lambda x, y: _merger(y, x), - check_duplicates=False) - else: - result = _merger(left, right) - return result - - -ordered_merge.__doc__ = merge_ordered.__doc__ - - -def merge_asof(left, right, on=None, - left_on=None, right_on=None, - left_index=False, right_index=False, - by=None, left_by=None, right_by=None, - suffixes=('_x', '_y'), - tolerance=None, - allow_exact_matches=True, - direction='backward'): - """Perform an asof merge. This is similar to a left-join except that we - match on nearest key rather than equal keys. - - Both DataFrames must be sorted by the key. - - For each row in the left DataFrame: - - - A "backward" search selects the last row in the right DataFrame whose - 'on' key is less than or equal to the left's key. - - - A "forward" search selects the first row in the right DataFrame whose - 'on' key is greater than or equal to the left's key. - - - A "nearest" search selects the row in the right DataFrame whose 'on' - key is closest in absolute distance to the left's key. - - The default is "backward" and is compatible in versions below 0.20.0. - The direction parameter was added in version 0.20.0 and introduces - "forward" and "nearest". - - Optionally match on equivalent keys with 'by' before searching with 'on'. - - .. versionadded:: 0.19.0 - - Parameters - ---------- - left : DataFrame - right : DataFrame - on : label - Field name to join on. Must be found in both DataFrames. - The data MUST be ordered. Furthermore this must be a numeric column, - such as datetimelike, integer, or float. On or left_on/right_on - must be given. - left_on : label - Field name to join on in left DataFrame. - right_on : label - Field name to join on in right DataFrame. - left_index : boolean - Use the index of the left DataFrame as the join key. - - .. versionadded:: 0.19.2 - - right_index : boolean - Use the index of the right DataFrame as the join key. - - .. versionadded:: 0.19.2 - - by : column name or list of column names - Match on these columns before performing merge operation. - left_by : column name - Field names to match on in the left DataFrame. - - .. versionadded:: 0.19.2 - - right_by : column name - Field names to match on in the right DataFrame. - - .. versionadded:: 0.19.2 - - suffixes : 2-length sequence (tuple, list, ...) - Suffix to apply to overlapping column names in the left and right - side, respectively. - tolerance : integer or Timedelta, optional, default None - Select asof tolerance within this range; must be compatible - with the merge index. - allow_exact_matches : boolean, default True - - - If True, allow matching with the same 'on' value - (i.e. less-than-or-equal-to / greater-than-or-equal-to) - - If False, don't match the same 'on' value - (i.e., stricly less-than / strictly greater-than) - - direction : 'backward' (default), 'forward', or 'nearest' - Whether to search for prior, subsequent, or closest matches. - - .. versionadded:: 0.20.0 - - Returns - ------- - merged : DataFrame - - Examples - -------- - >>> left - a left_val - 0 1 a - 1 5 b - 2 10 c - - >>> right - a right_val - 0 1 1 - 1 2 2 - 2 3 3 - 3 6 6 - 4 7 7 - - >>> pd.merge_asof(left, right, on='a') - a left_val right_val - 0 1 a 1 - 1 5 b 3 - 2 10 c 7 - - >>> pd.merge_asof(left, right, on='a', allow_exact_matches=False) - a left_val right_val - 0 1 a NaN - 1 5 b 3.0 - 2 10 c 7.0 - - >>> pd.merge_asof(left, right, on='a', direction='forward') - a left_val right_val - 0 1 a 1.0 - 1 5 b 6.0 - 2 10 c NaN - - >>> pd.merge_asof(left, right, on='a', direction='nearest') - a left_val right_val - 0 1 a 1 - 1 5 b 6 - 2 10 c 7 - - We can use indexed DataFrames as well. - - >>> left - left_val - 1 a - 5 b - 10 c - - >>> right - right_val - 1 1 - 2 2 - 3 3 - 6 6 - 7 7 - - >>> pd.merge_asof(left, right, left_index=True, right_index=True) - left_val right_val - 1 a 1 - 5 b 3 - 10 c 7 - - Here is a real-world times-series example - - >>> quotes - time ticker bid ask - 0 2016-05-25 13:30:00.023 GOOG 720.50 720.93 - 1 2016-05-25 13:30:00.023 MSFT 51.95 51.96 - 2 2016-05-25 13:30:00.030 MSFT 51.97 51.98 - 3 2016-05-25 13:30:00.041 MSFT 51.99 52.00 - 4 2016-05-25 13:30:00.048 GOOG 720.50 720.93 - 5 2016-05-25 13:30:00.049 AAPL 97.99 98.01 - 6 2016-05-25 13:30:00.072 GOOG 720.50 720.88 - 7 2016-05-25 13:30:00.075 MSFT 52.01 52.03 - - >>> trades - time ticker price quantity - 0 2016-05-25 13:30:00.023 MSFT 51.95 75 - 1 2016-05-25 13:30:00.038 MSFT 51.95 155 - 2 2016-05-25 13:30:00.048 GOOG 720.77 100 - 3 2016-05-25 13:30:00.048 GOOG 720.92 100 - 4 2016-05-25 13:30:00.048 AAPL 98.00 100 - - By default we are taking the asof of the quotes - - >>> pd.merge_asof(trades, quotes, - ... on='time', - ... by='ticker') - time ticker price quantity bid ask - 0 2016-05-25 13:30:00.023 MSFT 51.95 75 51.95 51.96 - 1 2016-05-25 13:30:00.038 MSFT 51.95 155 51.97 51.98 - 2 2016-05-25 13:30:00.048 GOOG 720.77 100 720.50 720.93 - 3 2016-05-25 13:30:00.048 GOOG 720.92 100 720.50 720.93 - 4 2016-05-25 13:30:00.048 AAPL 98.00 100 NaN NaN - - We only asof within 2ms betwen the quote time and the trade time - - >>> pd.merge_asof(trades, quotes, - ... on='time', - ... by='ticker', - ... tolerance=pd.Timedelta('2ms')) - time ticker price quantity bid ask - 0 2016-05-25 13:30:00.023 MSFT 51.95 75 51.95 51.96 - 1 2016-05-25 13:30:00.038 MSFT 51.95 155 NaN NaN - 2 2016-05-25 13:30:00.048 GOOG 720.77 100 720.50 720.93 - 3 2016-05-25 13:30:00.048 GOOG 720.92 100 720.50 720.93 - 4 2016-05-25 13:30:00.048 AAPL 98.00 100 NaN NaN - - We only asof within 10ms betwen the quote time and the trade time - and we exclude exact matches on time. However *prior* data will - propogate forward - - >>> pd.merge_asof(trades, quotes, - ... on='time', - ... by='ticker', - ... tolerance=pd.Timedelta('10ms'), - ... allow_exact_matches=False) - time ticker price quantity bid ask - 0 2016-05-25 13:30:00.023 MSFT 51.95 75 NaN NaN - 1 2016-05-25 13:30:00.038 MSFT 51.95 155 51.97 51.98 - 2 2016-05-25 13:30:00.048 GOOG 720.77 100 720.50 720.93 - 3 2016-05-25 13:30:00.048 GOOG 720.92 100 720.50 720.93 - 4 2016-05-25 13:30:00.048 AAPL 98.00 100 NaN NaN - - See also - -------- - merge - merge_ordered - - """ - op = _AsOfMerge(left, right, - on=on, left_on=left_on, right_on=right_on, - left_index=left_index, right_index=right_index, - by=by, left_by=left_by, right_by=right_by, - suffixes=suffixes, - how='asof', tolerance=tolerance, - allow_exact_matches=allow_exact_matches, - direction=direction) - return op.get_result() - - -# TODO: transformations?? -# TODO: only copy DataFrames when modification necessary -class _MergeOperation(object): - """ - Perform a database (SQL) merge operation between two DataFrame objects - using either columns as keys or their row indexes - """ - _merge_type = 'merge' - - def __init__(self, left, right, how='inner', on=None, - left_on=None, right_on=None, axis=1, - left_index=False, right_index=False, sort=True, - suffixes=('_x', '_y'), copy=True, indicator=False): - self.left = self.orig_left = left - self.right = self.orig_right = right - self.how = how - self.axis = axis - - self.on = com._maybe_make_list(on) - self.left_on = com._maybe_make_list(left_on) - self.right_on = com._maybe_make_list(right_on) - - self.copy = copy - self.suffixes = suffixes - self.sort = sort - - self.left_index = left_index - self.right_index = right_index - - self.indicator = indicator - - if isinstance(self.indicator, compat.string_types): - self.indicator_name = self.indicator - elif isinstance(self.indicator, bool): - self.indicator_name = '_merge' if self.indicator else None - else: - raise ValueError( - 'indicator option can only accept boolean or string arguments') - - if not isinstance(left, DataFrame): - raise ValueError( - 'can not merge DataFrame with instance of ' - 'type {0}'.format(type(left))) - if not isinstance(right, DataFrame): - raise ValueError( - 'can not merge DataFrame with instance of ' - 'type {0}'.format(type(right))) - - if not is_bool(left_index): - raise ValueError( - 'left_index parameter must be of type bool, not ' - '{0}'.format(type(left_index))) - if not is_bool(right_index): - raise ValueError( - 'right_index parameter must be of type bool, not ' - '{0}'.format(type(right_index))) - - # warn user when merging between different levels - if left.columns.nlevels != right.columns.nlevels: - msg = ('merging between different levels can give an unintended ' - 'result ({0} levels on the left, {1} on the right)') - msg = msg.format(left.columns.nlevels, right.columns.nlevels) - warnings.warn(msg, UserWarning) - - self._validate_specification() - - # note this function has side effects - (self.left_join_keys, - self.right_join_keys, - self.join_names) = self._get_merge_keys() - - # validate the merge keys dtypes. We may need to coerce - # to avoid incompat dtypes - self._maybe_coerce_merge_keys() - - def get_result(self): - if self.indicator: - self.left, self.right = self._indicator_pre_merge( - self.left, self.right) - - join_index, left_indexer, right_indexer = self._get_join_info() - - ldata, rdata = self.left._data, self.right._data - lsuf, rsuf = self.suffixes - - llabels, rlabels = items_overlap_with_suffix(ldata.items, lsuf, - rdata.items, rsuf) - - lindexers = {1: left_indexer} if left_indexer is not None else {} - rindexers = {1: right_indexer} if right_indexer is not None else {} - - result_data = concatenate_block_managers( - [(ldata, lindexers), (rdata, rindexers)], - axes=[llabels.append(rlabels), join_index], - concat_axis=0, copy=self.copy) - - typ = self.left._constructor - result = typ(result_data).__finalize__(self, method=self._merge_type) - - if self.indicator: - result = self._indicator_post_merge(result) - - self._maybe_add_join_keys(result, left_indexer, right_indexer) - - return result - - def _indicator_pre_merge(self, left, right): - - columns = left.columns.union(right.columns) - - for i in ['_left_indicator', '_right_indicator']: - if i in columns: - raise ValueError("Cannot use `indicator=True` option when " - "data contains a column named {}".format(i)) - if self.indicator_name in columns: - raise ValueError( - "Cannot use name of an existing column for indicator column") - - left = left.copy() - right = right.copy() - - left['_left_indicator'] = 1 - left['_left_indicator'] = left['_left_indicator'].astype('int8') - - right['_right_indicator'] = 2 - right['_right_indicator'] = right['_right_indicator'].astype('int8') - - return left, right - - def _indicator_post_merge(self, result): - - result['_left_indicator'] = result['_left_indicator'].fillna(0) - result['_right_indicator'] = result['_right_indicator'].fillna(0) - - result[self.indicator_name] = Categorical((result['_left_indicator'] + - result['_right_indicator']), - categories=[1, 2, 3]) - result[self.indicator_name] = ( - result[self.indicator_name] - .cat.rename_categories(['left_only', 'right_only', 'both'])) - - result = result.drop(labels=['_left_indicator', '_right_indicator'], - axis=1) - return result - - def _maybe_add_join_keys(self, result, left_indexer, right_indexer): - - left_has_missing = None - right_has_missing = None - - keys = zip(self.join_names, self.left_on, self.right_on) - for i, (name, lname, rname) in enumerate(keys): - if not _should_fill(lname, rname): - continue - - take_left, take_right = None, None - - if name in result: - - if left_indexer is not None and right_indexer is not None: - if name in self.left: - - if left_has_missing is None: - left_has_missing = (left_indexer == -1).any() - - if left_has_missing: - take_right = self.right_join_keys[i] - - if not is_dtype_equal(result[name].dtype, - self.left[name].dtype): - take_left = self.left[name]._values - - elif name in self.right: - - if right_has_missing is None: - right_has_missing = (right_indexer == -1).any() - - if right_has_missing: - take_left = self.left_join_keys[i] - - if not is_dtype_equal(result[name].dtype, - self.right[name].dtype): - take_right = self.right[name]._values - - elif left_indexer is not None \ - and isinstance(self.left_join_keys[i], np.ndarray): - - take_left = self.left_join_keys[i] - take_right = self.right_join_keys[i] - - if take_left is not None or take_right is not None: - - if take_left is None: - lvals = result[name]._values - else: - lfill = na_value_for_dtype(take_left.dtype) - lvals = algos.take_1d(take_left, left_indexer, - fill_value=lfill) - - if take_right is None: - rvals = result[name]._values - else: - rfill = na_value_for_dtype(take_right.dtype) - rvals = algos.take_1d(take_right, right_indexer, - fill_value=rfill) - - # if we have an all missing left_indexer - # make sure to just use the right values - mask = left_indexer == -1 - if mask.all(): - key_col = rvals - else: - key_col = Index(lvals).where(~mask, rvals) - - if name in result: - result[name] = key_col - else: - result.insert(i, name or 'key_%d' % i, key_col) - - def _get_join_indexers(self): - """ return the join indexers """ - return _get_join_indexers(self.left_join_keys, - self.right_join_keys, - sort=self.sort, - how=self.how) - - def _get_join_info(self): - left_ax = self.left._data.axes[self.axis] - right_ax = self.right._data.axes[self.axis] - - if self.left_index and self.right_index and self.how != 'asof': - join_index, left_indexer, right_indexer = \ - left_ax.join(right_ax, how=self.how, return_indexers=True, - sort=self.sort) - elif self.right_index and self.how == 'left': - join_index, left_indexer, right_indexer = \ - _left_join_on_index(left_ax, right_ax, self.left_join_keys, - sort=self.sort) - - elif self.left_index and self.how == 'right': - join_index, right_indexer, left_indexer = \ - _left_join_on_index(right_ax, left_ax, self.right_join_keys, - sort=self.sort) - else: - (left_indexer, - right_indexer) = self._get_join_indexers() - - if self.right_index: - if len(self.left) > 0: - join_index = self.left.index.take(left_indexer) - else: - join_index = self.right.index.take(right_indexer) - left_indexer = np.array([-1] * len(join_index)) - elif self.left_index: - if len(self.right) > 0: - join_index = self.right.index.take(right_indexer) - else: - join_index = self.left.index.take(left_indexer) - right_indexer = np.array([-1] * len(join_index)) - else: - join_index = Index(np.arange(len(left_indexer))) - - if len(join_index) == 0: - join_index = join_index.astype(object) - return join_index, left_indexer, right_indexer - - def _get_merge_keys(self): - """ - Note: has side effects (copy/delete key columns) - - Parameters - ---------- - left - right - on - - Returns - ------- - left_keys, right_keys - """ - left_keys = [] - right_keys = [] - join_names = [] - right_drop = [] - left_drop = [] - left, right = self.left, self.right - - is_lkey = lambda x: isinstance( - x, (np.ndarray, Series)) and len(x) == len(left) - is_rkey = lambda x: isinstance( - x, (np.ndarray, Series)) and len(x) == len(right) - - # Note that pd.merge_asof() has separate 'on' and 'by' parameters. A - # user could, for example, request 'left_index' and 'left_by'. In a - # regular pd.merge(), users cannot specify both 'left_index' and - # 'left_on'. (Instead, users have a MultiIndex). That means the - # self.left_on in this function is always empty in a pd.merge(), but - # a pd.merge_asof(left_index=True, left_by=...) will result in a - # self.left_on array with a None in the middle of it. This requires - # a work-around as designated in the code below. - # See _validate_specification() for where this happens. - - # ugh, spaghetti re #733 - if _any(self.left_on) and _any(self.right_on): - for lk, rk in zip(self.left_on, self.right_on): - if is_lkey(lk): - left_keys.append(lk) - if is_rkey(rk): - right_keys.append(rk) - join_names.append(None) # what to do? - else: - if rk is not None: - right_keys.append(right[rk]._values) - join_names.append(rk) - else: - # work-around for merge_asof(right_index=True) - right_keys.append(right.index) - join_names.append(right.index.name) - else: - if not is_rkey(rk): - if rk is not None: - right_keys.append(right[rk]._values) - else: - # work-around for merge_asof(right_index=True) - right_keys.append(right.index) - if lk is not None and lk == rk: - # avoid key upcast in corner case (length-0) - if len(left) > 0: - right_drop.append(rk) - else: - left_drop.append(lk) - else: - right_keys.append(rk) - if lk is not None: - left_keys.append(left[lk]._values) - join_names.append(lk) - else: - # work-around for merge_asof(left_index=True) - left_keys.append(left.index) - join_names.append(left.index.name) - elif _any(self.left_on): - for k in self.left_on: - if is_lkey(k): - left_keys.append(k) - join_names.append(None) - else: - left_keys.append(left[k]._values) - join_names.append(k) - if isinstance(self.right.index, MultiIndex): - right_keys = [lev._values.take(lab) - for lev, lab in zip(self.right.index.levels, - self.right.index.labels)] - else: - right_keys = [self.right.index.values] - elif _any(self.right_on): - for k in self.right_on: - if is_rkey(k): - right_keys.append(k) - join_names.append(None) - else: - right_keys.append(right[k]._values) - join_names.append(k) - if isinstance(self.left.index, MultiIndex): - left_keys = [lev._values.take(lab) - for lev, lab in zip(self.left.index.levels, - self.left.index.labels)] - else: - left_keys = [self.left.index.values] - - if left_drop: - self.left = self.left.drop(left_drop, axis=1) - - if right_drop: - self.right = self.right.drop(right_drop, axis=1) - - return left_keys, right_keys, join_names - - def _maybe_coerce_merge_keys(self): - # we have valid mergee's but we may have to further - # coerce these if they are originally incompatible types - # - # for example if these are categorical, but are not dtype_equal - # or if we have object and integer dtypes - - for lk, rk, name in zip(self.left_join_keys, - self.right_join_keys, - self.join_names): - if (len(lk) and not len(rk)) or (not len(lk) and len(rk)): - continue - - # if either left or right is a categorical - # then the must match exactly in categories & ordered - if is_categorical_dtype(lk) and is_categorical_dtype(rk): - if lk.is_dtype_equal(rk): - continue - elif is_categorical_dtype(lk) or is_categorical_dtype(rk): - pass - - elif is_dtype_equal(lk.dtype, rk.dtype): - continue - - # if we are numeric, then allow differing - # kinds to proceed, eg. int64 and int8 - # further if we are object, but we infer to - # the same, then proceed - if (is_numeric_dtype(lk) and is_numeric_dtype(rk)): - if lk.dtype.kind == rk.dtype.kind: - continue - - # let's infer and see if we are ok - if lib.infer_dtype(lk) == lib.infer_dtype(rk): - continue - - # Houston, we have a problem! - # let's coerce to object - if name in self.left.columns: - self.left = self.left.assign( - **{name: self.left[name].astype(object)}) - if name in self.right.columns: - self.right = self.right.assign( - **{name: self.right[name].astype(object)}) - - def _validate_specification(self): - # Hm, any way to make this logic less complicated?? - if self.on is None and self.left_on is None and self.right_on is None: - - if self.left_index and self.right_index: - self.left_on, self.right_on = (), () - elif self.left_index: - if self.right_on is None: - raise MergeError('Must pass right_on or right_index=True') - elif self.right_index: - if self.left_on is None: - raise MergeError('Must pass left_on or left_index=True') - else: - # use the common columns - common_cols = self.left.columns.intersection( - self.right.columns) - if len(common_cols) == 0: - raise MergeError('No common columns to perform merge on') - if not common_cols.is_unique: - raise MergeError("Data columns not unique: %s" - % repr(common_cols)) - self.left_on = self.right_on = common_cols - elif self.on is not None: - if self.left_on is not None or self.right_on is not None: - raise MergeError('Can only pass argument "on" OR "left_on" ' - 'and "right_on", not a combination of both.') - self.left_on = self.right_on = self.on - elif self.left_on is not None: - n = len(self.left_on) - if self.right_index: - if len(self.left_on) != self.right.index.nlevels: - raise ValueError('len(left_on) must equal the number ' - 'of levels in the index of "right"') - self.right_on = [None] * n - elif self.right_on is not None: - n = len(self.right_on) - if self.left_index: - if len(self.right_on) != self.left.index.nlevels: - raise ValueError('len(right_on) must equal the number ' - 'of levels in the index of "left"') - self.left_on = [None] * n - if len(self.right_on) != len(self.left_on): - raise ValueError("len(right_on) must equal len(left_on)") - - -def _get_join_indexers(left_keys, right_keys, sort=False, how='inner', - **kwargs): - """ - - Parameters - ---------- - left_keys: ndarray, Index, Series - right_keys: ndarray, Index, Series - sort: boolean, default False - how: string {'inner', 'outer', 'left', 'right'}, default 'inner' - - Returns - ------- - tuple of (left_indexer, right_indexer) - indexers into the left_keys, right_keys - - """ - from functools import partial - - assert len(left_keys) == len(right_keys), \ - 'left_key and right_keys must be the same length' - - # bind `sort` arg. of _factorize_keys - fkeys = partial(_factorize_keys, sort=sort) - - # get left & right join labels and num. of levels at each location - llab, rlab, shape = map(list, zip(* map(fkeys, left_keys, right_keys))) - - # get flat i8 keys from label lists - lkey, rkey = _get_join_keys(llab, rlab, shape, sort) - - # factorize keys to a dense i8 space - # `count` is the num. of unique keys - # set(lkey) | set(rkey) == range(count) - lkey, rkey, count = fkeys(lkey, rkey) - - # preserve left frame order if how == 'left' and sort == False - kwargs = copy.copy(kwargs) - if how == 'left': - kwargs['sort'] = sort - join_func = _join_functions[how] - - return join_func(lkey, rkey, count, **kwargs) - - -class _OrderedMerge(_MergeOperation): - _merge_type = 'ordered_merge' - - def __init__(self, left, right, on=None, left_on=None, right_on=None, - left_index=False, right_index=False, axis=1, - suffixes=('_x', '_y'), copy=True, - fill_method=None, how='outer'): - - self.fill_method = fill_method - _MergeOperation.__init__(self, left, right, on=on, left_on=left_on, - left_index=left_index, - right_index=right_index, - right_on=right_on, axis=axis, - how=how, suffixes=suffixes, - sort=True # factorize sorts - ) - - def get_result(self): - join_index, left_indexer, right_indexer = self._get_join_info() - - # this is a bit kludgy - ldata, rdata = self.left._data, self.right._data - lsuf, rsuf = self.suffixes - - llabels, rlabels = items_overlap_with_suffix(ldata.items, lsuf, - rdata.items, rsuf) - - if self.fill_method == 'ffill': - left_join_indexer = libjoin.ffill_indexer(left_indexer) - right_join_indexer = libjoin.ffill_indexer(right_indexer) - else: - left_join_indexer = left_indexer - right_join_indexer = right_indexer - - lindexers = { - 1: left_join_indexer} if left_join_indexer is not None else {} - rindexers = { - 1: right_join_indexer} if right_join_indexer is not None else {} - - result_data = concatenate_block_managers( - [(ldata, lindexers), (rdata, rindexers)], - axes=[llabels.append(rlabels), join_index], - concat_axis=0, copy=self.copy) - - typ = self.left._constructor - result = typ(result_data).__finalize__(self, method=self._merge_type) - - self._maybe_add_join_keys(result, left_indexer, right_indexer) - - return result - - -def _asof_function(direction, on_type): - return getattr(libjoin, 'asof_join_%s_%s' % (direction, on_type), None) - - -def _asof_by_function(direction, on_type, by_type): - return getattr(libjoin, 'asof_join_%s_%s_by_%s' % - (direction, on_type, by_type), None) - - -_type_casters = { - 'int64_t': _ensure_int64, - 'double': _ensure_float64, - 'object': _ensure_object, -} - -_cython_types = { - 'uint8': 'uint8_t', - 'uint32': 'uint32_t', - 'uint16': 'uint16_t', - 'uint64': 'uint64_t', - 'int8': 'int8_t', - 'int32': 'int32_t', - 'int16': 'int16_t', - 'int64': 'int64_t', - 'float16': 'error', - 'float32': 'float', - 'float64': 'double', -} - - -def _get_cython_type(dtype): - """ Given a dtype, return a C name like 'int64_t' or 'double' """ - type_name = _get_dtype(dtype).name - ctype = _cython_types.get(type_name, 'object') - if ctype == 'error': - raise MergeError('unsupported type: ' + type_name) - return ctype - - -def _get_cython_type_upcast(dtype): - """ Upcast a dtype to 'int64_t', 'double', or 'object' """ - if is_integer_dtype(dtype): - return 'int64_t' - elif is_float_dtype(dtype): - return 'double' - else: - return 'object' - - -class _AsOfMerge(_OrderedMerge): - _merge_type = 'asof_merge' - - def __init__(self, left, right, on=None, left_on=None, right_on=None, - left_index=False, right_index=False, - by=None, left_by=None, right_by=None, - axis=1, suffixes=('_x', '_y'), copy=True, - fill_method=None, - how='asof', tolerance=None, - allow_exact_matches=True, - direction='backward'): - - self.by = by - self.left_by = left_by - self.right_by = right_by - self.tolerance = tolerance - self.allow_exact_matches = allow_exact_matches - self.direction = direction - - _OrderedMerge.__init__(self, left, right, on=on, left_on=left_on, - right_on=right_on, left_index=left_index, - right_index=right_index, axis=axis, - how=how, suffixes=suffixes, - fill_method=fill_method) - - def _validate_specification(self): - super(_AsOfMerge, self)._validate_specification() - - # we only allow on to be a single item for on - if len(self.left_on) != 1 and not self.left_index: - raise MergeError("can only asof on a key for left") - - if len(self.right_on) != 1 and not self.right_index: - raise MergeError("can only asof on a key for right") - - if self.left_index and isinstance(self.left.index, MultiIndex): - raise MergeError("left can only have one index") - - if self.right_index and isinstance(self.right.index, MultiIndex): - raise MergeError("right can only have one index") - - # set 'by' columns - if self.by is not None: - if self.left_by is not None or self.right_by is not None: - raise MergeError('Can only pass by OR left_by ' - 'and right_by') - self.left_by = self.right_by = self.by - if self.left_by is None and self.right_by is not None: - raise MergeError('missing left_by') - if self.left_by is not None and self.right_by is None: - raise MergeError('missing right_by') - - # add 'by' to our key-list so we can have it in the - # output as a key - if self.left_by is not None: - if not is_list_like(self.left_by): - self.left_by = [self.left_by] - if not is_list_like(self.right_by): - self.right_by = [self.right_by] - - if len(self.left_by) != len(self.right_by): - raise MergeError('left_by and right_by must be same length') - - self.left_on = self.left_by + list(self.left_on) - self.right_on = self.right_by + list(self.right_on) - - # check 'direction' is valid - if self.direction not in ['backward', 'forward', 'nearest']: - raise MergeError('direction invalid: ' + self.direction) - - @property - def _asof_key(self): - """ This is our asof key, the 'on' """ - return self.left_on[-1] - - def _get_merge_keys(self): - - # note this function has side effects - (left_join_keys, - right_join_keys, - join_names) = super(_AsOfMerge, self)._get_merge_keys() - - # validate index types are the same - for lk, rk in zip(left_join_keys, right_join_keys): - if not is_dtype_equal(lk.dtype, rk.dtype): - raise MergeError("incompatible merge keys, " - "must be the same type") - - # validate tolerance; must be a Timedelta if we have a DTI - if self.tolerance is not None: - - if self.left_index: - lt = self.left.index - else: - lt = left_join_keys[-1] - - msg = "incompatible tolerance, must be compat " \ - "with type {0}".format(type(lt)) - - if is_datetime64_dtype(lt) or is_datetime64tz_dtype(lt): - if not isinstance(self.tolerance, Timedelta): - raise MergeError(msg) - if self.tolerance < Timedelta(0): - raise MergeError("tolerance must be positive") - - elif is_int64_dtype(lt): - if not is_integer(self.tolerance): - raise MergeError(msg) - if self.tolerance < 0: - raise MergeError("tolerance must be positive") - - else: - raise MergeError("key must be integer or timestamp") - - # validate allow_exact_matches - if not is_bool(self.allow_exact_matches): - raise MergeError("allow_exact_matches must be boolean, " - "passed {0}".format(self.allow_exact_matches)) - - return left_join_keys, right_join_keys, join_names - - def _get_join_indexers(self): - """ return the join indexers """ - - def flip(xs): - """ unlike np.transpose, this returns an array of tuples """ - labels = list(string.ascii_lowercase[:len(xs)]) - dtypes = [x.dtype for x in xs] - labeled_dtypes = list(zip(labels, dtypes)) - return np.array(lzip(*xs), labeled_dtypes) - - # values to compare - left_values = (self.left.index.values if self.left_index else - self.left_join_keys[-1]) - right_values = (self.right.index.values if self.right_index else - self.right_join_keys[-1]) - tolerance = self.tolerance - - # we required sortedness in the join keys - msg = " keys must be sorted" - if not Index(left_values).is_monotonic: - raise ValueError('left' + msg) - if not Index(right_values).is_monotonic: - raise ValueError('right' + msg) - - # initial type conversion as needed - if needs_i8_conversion(left_values): - left_values = left_values.view('i8') - right_values = right_values.view('i8') - if tolerance is not None: - tolerance = tolerance.value - - # a "by" parameter requires special handling - if self.left_by is not None: - # remove 'on' parameter from values if one existed - if self.left_index and self.right_index: - left_by_values = self.left_join_keys - right_by_values = self.right_join_keys - else: - left_by_values = self.left_join_keys[0:-1] - right_by_values = self.right_join_keys[0:-1] - - # get tuple representation of values if more than one - if len(left_by_values) == 1: - left_by_values = left_by_values[0] - right_by_values = right_by_values[0] - else: - left_by_values = flip(left_by_values) - right_by_values = flip(right_by_values) - - # upcast 'by' parameter because HashTable is limited - by_type = _get_cython_type_upcast(left_by_values.dtype) - by_type_caster = _type_casters[by_type] - left_by_values = by_type_caster(left_by_values) - right_by_values = by_type_caster(right_by_values) - - # choose appropriate function by type - on_type = _get_cython_type(left_values.dtype) - func = _asof_by_function(self.direction, on_type, by_type) - return func(left_values, - right_values, - left_by_values, - right_by_values, - self.allow_exact_matches, - tolerance) - else: - # choose appropriate function by type - on_type = _get_cython_type(left_values.dtype) - func = _asof_function(self.direction, on_type) - return func(left_values, - right_values, - self.allow_exact_matches, - tolerance) - - -def _get_multiindex_indexer(join_keys, index, sort): - from functools import partial - - # bind `sort` argument - fkeys = partial(_factorize_keys, sort=sort) - - # left & right join labels and num. of levels at each location - rlab, llab, shape = map(list, zip(* map(fkeys, index.levels, join_keys))) - if sort: - rlab = list(map(np.take, rlab, index.labels)) - else: - i8copy = lambda a: a.astype('i8', subok=False, copy=True) - rlab = list(map(i8copy, index.labels)) - - # fix right labels if there were any nulls - for i in range(len(join_keys)): - mask = index.labels[i] == -1 - if mask.any(): - # check if there already was any nulls at this location - # if there was, it is factorized to `shape[i] - 1` - a = join_keys[i][llab[i] == shape[i] - 1] - if a.size == 0 or not a[0] != a[0]: - shape[i] += 1 - - rlab[i][mask] = shape[i] - 1 - - # get flat i8 join keys - lkey, rkey = _get_join_keys(llab, rlab, shape, sort) - - # factorize keys to a dense i8 space - lkey, rkey, count = fkeys(lkey, rkey) - - return libjoin.left_outer_join(lkey, rkey, count, sort=sort) - - -def _get_single_indexer(join_key, index, sort=False): - left_key, right_key, count = _factorize_keys(join_key, index, sort=sort) - - left_indexer, right_indexer = libjoin.left_outer_join( - _ensure_int64(left_key), - _ensure_int64(right_key), - count, sort=sort) - - return left_indexer, right_indexer - - -def _left_join_on_index(left_ax, right_ax, join_keys, sort=False): - if len(join_keys) > 1: - if not ((isinstance(right_ax, MultiIndex) and - len(join_keys) == right_ax.nlevels)): - raise AssertionError("If more than one join key is given then " - "'right_ax' must be a MultiIndex and the " - "number of join keys must be the number of " - "levels in right_ax") - - left_indexer, right_indexer = \ - _get_multiindex_indexer(join_keys, right_ax, sort=sort) - else: - jkey = join_keys[0] - - left_indexer, right_indexer = \ - _get_single_indexer(jkey, right_ax, sort=sort) - - if sort or len(left_ax) != len(left_indexer): - # if asked to sort or there are 1-to-many matches - join_index = left_ax.take(left_indexer) - return join_index, left_indexer, right_indexer - - # left frame preserves order & length of its index - return left_ax, None, right_indexer - - -def _right_outer_join(x, y, max_groups): - right_indexer, left_indexer = libjoin.left_outer_join(y, x, max_groups) - return left_indexer, right_indexer - - -_join_functions = { - 'inner': libjoin.inner_join, - 'left': libjoin.left_outer_join, - 'right': _right_outer_join, - 'outer': libjoin.full_outer_join, -} - - -def _factorize_keys(lk, rk, sort=True): - if is_datetime64tz_dtype(lk) and is_datetime64tz_dtype(rk): - lk = lk.values - rk = rk.values - - # if we exactly match in categories, allow us to use codes - if (is_categorical_dtype(lk) and - is_categorical_dtype(rk) and - lk.is_dtype_equal(rk)): - return lk.codes, rk.codes, len(lk.categories) - - if is_int_or_datetime_dtype(lk) and is_int_or_datetime_dtype(rk): - klass = libhashtable.Int64Factorizer - lk = _ensure_int64(com._values_from_object(lk)) - rk = _ensure_int64(com._values_from_object(rk)) - else: - klass = libhashtable.Factorizer - lk = _ensure_object(lk) - rk = _ensure_object(rk) - - rizer = klass(max(len(lk), len(rk))) - - llab = rizer.factorize(lk) - rlab = rizer.factorize(rk) - - count = rizer.get_count() - - if sort: - uniques = rizer.uniques.to_array() - llab, rlab = _sort_labels(uniques, llab, rlab) - - # NA group - lmask = llab == -1 - lany = lmask.any() - rmask = rlab == -1 - rany = rmask.any() - - if lany or rany: - if lany: - np.putmask(llab, lmask, count) - if rany: - np.putmask(rlab, rmask, count) - count += 1 - - return llab, rlab, count - - -def _sort_labels(uniques, left, right): - if not isinstance(uniques, np.ndarray): - # tuplesafe - uniques = Index(uniques).values - - l = len(left) - labels = np.concatenate([left, right]) - - _, new_labels = algos.safe_sort(uniques, labels, na_sentinel=-1) - new_labels = _ensure_int64(new_labels) - new_left, new_right = new_labels[:l], new_labels[l:] - - return new_left, new_right - - -def _get_join_keys(llab, rlab, shape, sort): - - # how many levels can be done without overflow - pred = lambda i: not is_int64_overflow_possible(shape[:i]) - nlev = next(filter(pred, range(len(shape), 0, -1))) - - # get keys for the first `nlev` levels - stride = np.prod(shape[1:nlev], dtype='i8') - lkey = stride * llab[0].astype('i8', subok=False, copy=False) - rkey = stride * rlab[0].astype('i8', subok=False, copy=False) - - for i in range(1, nlev): - stride //= shape[i] - lkey += llab[i] * stride - rkey += rlab[i] * stride - - if nlev == len(shape): # all done! - return lkey, rkey - - # densify current keys to avoid overflow - lkey, rkey, count = _factorize_keys(lkey, rkey, sort=sort) - - llab = [lkey] + llab[nlev:] - rlab = [rkey] + rlab[nlev:] - shape = [count] + shape[nlev:] - - return _get_join_keys(llab, rlab, shape, sort) - - -def _should_fill(lname, rname): - if (not isinstance(lname, compat.string_types) or - not isinstance(rname, compat.string_types)): - return True - return lname == rname - - -def _any(x): - return x is not None and len(x) > 0 and any([y is not None for y in x]) diff --git a/pandas/tools/util.py b/pandas/tools/util.py deleted file mode 100644 index baf968440858d..0000000000000 --- a/pandas/tools/util.py +++ /dev/null @@ -1,245 +0,0 @@ -import numpy as np -import pandas._libs.lib as lib - -from pandas.core.dtypes.common import ( - is_number, - is_numeric_dtype, - is_datetime_or_timedelta_dtype, - is_list_like, - _ensure_object, - is_decimal, - is_scalar as isscalar) - -from pandas.core.dtypes.cast import maybe_downcast_to_dtype - -import pandas as pd -from pandas.compat import reduce -from pandas.core.index import Index -from pandas.core import common as com - - -def match(needles, haystack): - haystack = Index(haystack) - needles = Index(needles) - return haystack.get_indexer(needles) - - -def cartesian_product(X): - """ - Numpy version of itertools.product or pandas.compat.product. - Sometimes faster (for large inputs)... - - Parameters - ---------- - X : list-like of list-likes - - Returns - ------- - product : list of ndarrays - - Examples - -------- - >>> cartesian_product([list('ABC'), [1, 2]]) - [array(['A', 'A', 'B', 'B', 'C', 'C'], dtype='|S1'), - array([1, 2, 1, 2, 1, 2])] - - See also - -------- - itertools.product : Cartesian product of input iterables. Equivalent to - nested for-loops. - pandas.compat.product : An alias for itertools.product. - """ - msg = "Input must be a list-like of list-likes" - if not is_list_like(X): - raise TypeError(msg) - for x in X: - if not is_list_like(x): - raise TypeError(msg) - - if len(X) == 0: - return [] - - lenX = np.fromiter((len(x) for x in X), dtype=np.intp) - cumprodX = np.cumproduct(lenX) - - a = np.roll(cumprodX, 1) - a[0] = 1 - - if cumprodX[-1] != 0: - b = cumprodX[-1] / cumprodX - else: - # if any factor is empty, the cartesian product is empty - b = np.zeros_like(cumprodX) - - return [np.tile(np.repeat(np.asarray(com._values_from_object(x)), b[i]), - np.product(a[i])) - for i, x in enumerate(X)] - - -def _compose2(f, g): - """Compose 2 callables""" - return lambda *args, **kwargs: f(g(*args, **kwargs)) - - -def compose(*funcs): - """Compose 2 or more callables""" - assert len(funcs) > 1, 'At least 2 callables must be passed to compose' - return reduce(_compose2, funcs) - - -def to_numeric(arg, errors='raise', downcast=None): - """ - Convert argument to a numeric type. - - Parameters - ---------- - arg : list, tuple, 1-d array, or Series - errors : {'ignore', 'raise', 'coerce'}, default 'raise' - - If 'raise', then invalid parsing will raise an exception - - If 'coerce', then invalid parsing will be set as NaN - - If 'ignore', then invalid parsing will return the input - downcast : {'integer', 'signed', 'unsigned', 'float'} , default None - If not None, and if the data has been successfully cast to a - numerical dtype (or if the data was numeric to begin with), - downcast that resulting data to the smallest numerical dtype - possible according to the following rules: - - - 'integer' or 'signed': smallest signed int dtype (min.: np.int8) - - 'unsigned': smallest unsigned int dtype (min.: np.uint8) - - 'float': smallest float dtype (min.: np.float32) - - As this behaviour is separate from the core conversion to - numeric values, any errors raised during the downcasting - will be surfaced regardless of the value of the 'errors' input. - - In addition, downcasting will only occur if the size - of the resulting data's dtype is strictly larger than - the dtype it is to be cast to, so if none of the dtypes - checked satisfy that specification, no downcasting will be - performed on the data. - - .. versionadded:: 0.19.0 - - Returns - ------- - ret : numeric if parsing succeeded. - Return type depends on input. Series if Series, otherwise ndarray - - Examples - -------- - Take separate series and convert to numeric, coercing when told to - - >>> import pandas as pd - >>> s = pd.Series(['1.0', '2', -3]) - >>> pd.to_numeric(s) - 0 1.0 - 1 2.0 - 2 -3.0 - dtype: float64 - >>> pd.to_numeric(s, downcast='float') - 0 1.0 - 1 2.0 - 2 -3.0 - dtype: float32 - >>> pd.to_numeric(s, downcast='signed') - 0 1 - 1 2 - 2 -3 - dtype: int8 - >>> s = pd.Series(['apple', '1.0', '2', -3]) - >>> pd.to_numeric(s, errors='ignore') - 0 apple - 1 1.0 - 2 2 - 3 -3 - dtype: object - >>> pd.to_numeric(s, errors='coerce') - 0 NaN - 1 1.0 - 2 2.0 - 3 -3.0 - dtype: float64 - """ - if downcast not in (None, 'integer', 'signed', 'unsigned', 'float'): - raise ValueError('invalid downcasting method provided') - - is_series = False - is_index = False - is_scalar = False - - if isinstance(arg, pd.Series): - is_series = True - values = arg.values - elif isinstance(arg, pd.Index): - is_index = True - values = arg.asi8 - if values is None: - values = arg.values - elif isinstance(arg, (list, tuple)): - values = np.array(arg, dtype='O') - elif isscalar(arg): - if is_decimal(arg): - return float(arg) - if is_number(arg): - return arg - is_scalar = True - values = np.array([arg], dtype='O') - elif getattr(arg, 'ndim', 1) > 1: - raise TypeError('arg must be a list, tuple, 1-d array, or Series') - else: - values = arg - - try: - if is_numeric_dtype(values): - pass - elif is_datetime_or_timedelta_dtype(values): - values = values.astype(np.int64) - else: - values = _ensure_object(values) - coerce_numeric = False if errors in ('ignore', 'raise') else True - values = lib.maybe_convert_numeric(values, set(), - coerce_numeric=coerce_numeric) - - except Exception: - if errors == 'raise': - raise - - # attempt downcast only if the data has been successfully converted - # to a numerical dtype and if a downcast method has been specified - if downcast is not None and is_numeric_dtype(values): - typecodes = None - - if downcast in ('integer', 'signed'): - typecodes = np.typecodes['Integer'] - elif downcast == 'unsigned' and np.min(values) >= 0: - typecodes = np.typecodes['UnsignedInteger'] - elif downcast == 'float': - typecodes = np.typecodes['Float'] - - # pandas support goes only to np.float32, - # as float dtypes smaller than that are - # extremely rare and not well supported - float_32_char = np.dtype(np.float32).char - float_32_ind = typecodes.index(float_32_char) - typecodes = typecodes[float_32_ind:] - - if typecodes is not None: - # from smallest to largest - for dtype in typecodes: - if np.dtype(dtype).itemsize <= values.dtype.itemsize: - values = maybe_downcast_to_dtype(values, dtype) - - # successful conversion - if values.dtype == dtype: - break - - if is_series: - return pd.Series(values, index=arg.index, name=arg.name) - elif is_index: - # because we want to coerce to numeric if possible, - # do not use _shallow_copy_with_infer - return Index(values, name=arg.name) - elif is_scalar: - return values[0] - else: - return values diff --git a/setup.py b/setup.py index 6fc66e2355c0f..69b9a974b9935 100755 --- a/setup.py +++ b/setup.py @@ -642,6 +642,7 @@ def pxd(name): 'pandas.core.dtypes', 'pandas.core.indexes', 'pandas.core.computation', + 'pandas.core.reshape', 'pandas.core.sparse', 'pandas.errors', 'pandas.io', @@ -673,7 +674,6 @@ def pxd(name): 'pandas.tests.series', 'pandas.tests.scalar', 'pandas.tests.tseries', - 'pandas.tests.tools', 'pandas.tests.plotting', 'pandas.tools', 'pandas.tseries', @@ -703,7 +703,7 @@ def pxd(name): 'data/html_encoding/*.html', 'json/data/*.json'], 'pandas.tests.io.formats': ['data/*.csv'], - 'pandas.tests.tools': ['data/*.csv'], + 'pandas.tests.reshape': ['data/*.csv'], 'pandas.tests.tseries': ['data/*.pickle'], 'pandas.io.formats': ['templates/*.tpl'] }, From c8dafb5a7ae9fe42b9d15c47082a6fb139e78b5d Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 18 Apr 2017 12:17:07 +0000 Subject: [PATCH 419/933] CLN: reorg pandas.tseries (#16040) * CLN: move pandas/tseries/resample.py -> pandas/core/resample.py closes #13634 * CLN: move pandas.tseries.period -> pandas.core.indexes.period * CLN: move pandas.tseries.tdi -> pandas.core.indexes.timedeltas * CLN: move pandas.tseries.base -> pandas.core.indexes.datetimelike * CLN: pandas.tseries.common -> pandas.core.indexes.accessors * CLN: move pandas.tseries.index -> pandas.core.indexes.datetimes * CLN: move pandas.tseries.timedeltas, pandas.tseries.tools -> pandas.core.tools * move to_numeric to pandas.core.tools.numeric --- doc/source/api.rst | 2 +- pandas/_libs/period.pyx | 2 +- pandas/_libs/tslib.pyx | 2 +- pandas/compat/pickle_compat.py | 8 +- pandas/core/api.py | 17 +- pandas/core/computation/pytables.py | 2 +- pandas/core/datetools.py | 2 +- pandas/core/dtypes/cast.py | 171 +------- pandas/core/frame.py | 6 +- pandas/core/generic.py | 12 +- pandas/core/groupby.py | 6 +- .../common.py => core/indexes/accessors.py} | 6 +- pandas/core/indexes/api.py | 11 +- pandas/core/indexes/base.py | 20 +- .../base.py => core/indexes/datetimelike.py} | 9 +- .../index.py => core/indexes/datetimes.py} | 16 +- pandas/{tseries => core/indexes}/period.py | 16 +- .../tdi.py => core/indexes/timedeltas.py} | 9 +- pandas/core/internals.py | 2 +- pandas/core/ops.py | 4 +- pandas/{tseries => core}/resample.py | 6 +- pandas/core/series.py | 10 +- pandas/core/tools/__init__.py | 0 .../tools.py => core/tools/datetimes.py} | 2 +- pandas/core/tools/numeric.py | 170 ++++++++ pandas/{tseries => core/tools}/timedeltas.py | 0 pandas/io/excel.py | 2 +- pandas/io/formats/format.py | 6 +- pandas/io/parsers.py | 2 +- pandas/io/sql.py | 2 +- pandas/plotting/_converter.py | 6 +- pandas/plotting/_core.py | 2 +- pandas/plotting/_timeseries.py | 8 +- pandas/tests/dtypes/test_cast.py | 369 +---------------- pandas/tests/frame/test_alter_axes.py | 7 +- pandas/tests/frame/test_analytics.py | 2 +- pandas/tests/frame/test_timeseries.py | 2 +- pandas/tests/groupby/test_groupby.py | 5 +- pandas/tests/groupby/test_timegrouper.py | 4 +- pandas/tests/indexes/common.py | 9 +- .../indexes/datetimes/test_date_range.py | 2 +- pandas/tests/indexes/datetimes/test_ops.py | 2 +- pandas/tests/indexes/datetimes/test_setops.py | 2 +- pandas/tests/indexes/datetimes/test_tools.py | 4 +- .../tests/indexes/period/test_construction.py | 2 +- pandas/tests/indexes/period/test_ops.py | 2 +- pandas/tests/indexes/period/test_setops.py | 2 +- pandas/tests/indexes/period/test_tools.py | 2 +- pandas/tests/indexes/test_base.py | 2 +- pandas/tests/io/json/test_ujson.py | 2 +- pandas/tests/io/parser/parse_dates.py | 4 +- pandas/tests/io/test_sql.py | 2 +- pandas/tests/plotting/test_datetimelike.py | 8 +- pandas/tests/reshape/test_concat.py | 2 +- pandas/tests/scalar/test_period.py | 2 +- pandas/tests/scalar/test_timedelta.py | 2 +- pandas/tests/series/test_analytics.py | 4 +- pandas/tests/series/test_api.py | 2 +- pandas/tests/series/test_combine_concat.py | 2 +- pandas/tests/series/test_constructors.py | 2 +- pandas/tests/series/test_datetime_values.py | 6 +- pandas/tests/series/test_internals.py | 2 +- pandas/tests/series/test_operators.py | 4 +- pandas/tests/series/test_period.py | 2 +- pandas/tests/series/test_quantile.py | 2 +- pandas/tests/series/test_timeseries.py | 6 +- pandas/tests/sparse/test_frame.py | 2 +- pandas/tests/test_base.py | 2 +- pandas/tests/test_categorical.py | 2 +- pandas/tests/{tseries => }/test_resample.py | 10 +- pandas/tests/tools/__init__.py | 0 pandas/tests/tools/test_numeric.py | 371 ++++++++++++++++++ pandas/tests/tseries/test_frequencies.py | 4 +- pandas/tests/tseries/test_offsets.py | 8 +- pandas/tests/tseries/test_timezones.py | 4 +- pandas/tseries/api.py | 6 - pandas/tseries/offsets.py | 2 +- setup.py | 2 + 78 files changed, 727 insertions(+), 697 deletions(-) rename pandas/{tseries/common.py => core/indexes/accessors.py} (97%) rename pandas/{tseries/base.py => core/indexes/datetimelike.py} (99%) rename pandas/{tseries/index.py => core/indexes/datetimes.py} (99%) rename pandas/{tseries => core/indexes}/period.py (98%) rename pandas/{tseries/tdi.py => core/indexes/timedeltas.py} (99%) rename pandas/{tseries => core}/resample.py (99%) mode change 100755 => 100644 create mode 100644 pandas/core/tools/__init__.py rename pandas/{tseries/tools.py => core/tools/datetimes.py} (99%) create mode 100644 pandas/core/tools/numeric.py rename pandas/{tseries => core/tools}/timedeltas.py (100%) rename pandas/tests/{tseries => }/test_resample.py (99%) mode change 100755 => 100644 create mode 100644 pandas/tests/tools/__init__.py create mode 100644 pandas/tests/tools/test_numeric.py diff --git a/doc/source/api.rst b/doc/source/api.rst index 868f0d7f9c962..caa5498db1ebf 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -1761,7 +1761,7 @@ The following methods are available only for ``DataFrameGroupBy`` objects. Resampling ---------- -.. currentmodule:: pandas.tseries.resample +.. currentmodule:: pandas.core.resample Resampler objects are returned by resample calls: :func:`pandas.DataFrame.resample`, :func:`pandas.Series.resample`. diff --git a/pandas/_libs/period.pyx b/pandas/_libs/period.pyx index f30035910a62f..1db31387de5a7 100644 --- a/pandas/_libs/period.pyx +++ b/pandas/_libs/period.pyx @@ -34,7 +34,7 @@ from tslib cimport ( ) from pandas.tseries import offsets -from pandas.tseries.tools import parse_time_string +from pandas.core.tools.datetimes import parse_time_string from pandas.tseries import frequencies cdef int64_t NPY_NAT = util.get_nat() diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 47679966e3d5c..c471d46262484 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -502,7 +502,7 @@ class Timestamp(_Timestamp): """ Return an period of which this timestamp is an observation. """ - from pandas.tseries.period import Period + from pandas import Period if freq is None: freq = self.freq diff --git a/pandas/compat/pickle_compat.py b/pandas/compat/pickle_compat.py index f7d451ce7c92f..6df365a1cd898 100644 --- a/pandas/compat/pickle_compat.py +++ b/pandas/compat/pickle_compat.py @@ -94,7 +94,13 @@ def load_reduce(self): ('pandas.indexes.range', 'RangeIndex'): ('pandas.core.indexes.range', 'RangeIndex'), ('pandas.indexes.multi', 'MultiIndex'): - ('pandas.core.indexes.multi', 'MultiIndex') + ('pandas.core.indexes.multi', 'MultiIndex'), + ('pandas.tseries.index', '_new_DatetimeIndex'): + ('pandas.core.indexes.datetimes', '_new_DatetimeIndex'), + ('pandas.tseries.index', 'DatetimeIndex'): + ('pandas.core.indexes.datetimes', 'DatetimeIndex'), + ('pandas.tseries.period', 'PeriodIndex'): + ('pandas.core.indexes.period', 'PeriodIndex') } diff --git a/pandas/core/api.py b/pandas/core/api.py index f3191283b85eb..3e84720c32a1c 100644 --- a/pandas/core/api.py +++ b/pandas/core/api.py @@ -11,7 +11,12 @@ from pandas.io.formats.format import set_eng_float_format from pandas.core.index import (Index, CategoricalIndex, Int64Index, UInt64Index, RangeIndex, Float64Index, - MultiIndex, IntervalIndex) + MultiIndex, IntervalIndex, + TimedeltaIndex, DatetimeIndex, + PeriodIndex, NaT) +from pandas.core.indexes.period import Period, period_range, pnow +from pandas.core.indexes.timedeltas import Timedelta, timedelta_range +from pandas.core.indexes.datetimes import Timestamp, date_range, bdate_range from pandas.core.indexes.interval import Interval, interval_range from pandas.core.series import Series @@ -23,13 +28,11 @@ lreshape, wide_to_long) from pandas.core.indexing import IndexSlice -from pandas.core.dtypes.cast import to_numeric +from pandas.core.tools.numeric import to_numeric from pandas.tseries.offsets import DateOffset -from pandas.tseries.tools import to_datetime -from pandas.tseries.index import (DatetimeIndex, Timestamp, - date_range, bdate_range) -from pandas.tseries.tdi import TimedeltaIndex, Timedelta -from pandas.tseries.period import Period, PeriodIndex +from pandas.core.tools.datetimes import to_datetime +from pandas.core.tools.timedeltas import to_timedelta +from pandas.core.resample import TimeGrouper # see gh-14094. from pandas.util.depr_module import _DeprecatedModule diff --git a/pandas/core/computation/pytables.py b/pandas/core/computation/pytables.py index 285ff346158a0..5870090856ff9 100644 --- a/pandas/core/computation/pytables.py +++ b/pandas/core/computation/pytables.py @@ -14,7 +14,7 @@ from pandas.core.computation.ops import is_term, UndefinedVariableError from pandas.core.computation.expr import BaseExprVisitor from pandas.core.computation.common import _ensure_decoded -from pandas.tseries.timedeltas import _coerce_scalar_to_timedelta_type +from pandas.core.tools.timedeltas import _coerce_scalar_to_timedelta_type class Scope(expr.Scope): diff --git a/pandas/core/datetools.py b/pandas/core/datetools.py index bfc3f3d4e4743..3444d09c6ed1b 100644 --- a/pandas/core/datetools.py +++ b/pandas/core/datetools.py @@ -4,7 +4,7 @@ import warnings -from pandas.tseries.tools import * +from pandas.core.tools.datetimes import * from pandas.tseries.offsets import * from pandas.tseries.frequencies import * diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 3c1f480787d3a..a5e12e8262579 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -5,7 +5,6 @@ import numpy as np import warnings -import pandas as pd from pandas._libs import tslib, lib from pandas._libs.tslib import iNaT from pandas.compat import string_types, text_type, PY3 @@ -19,8 +18,6 @@ is_integer_dtype, is_datetime_or_timedelta_dtype, is_bool_dtype, is_scalar, - is_numeric_dtype, is_decimal, - is_number, _string_dtypes, _coerce_to_dtype, _ensure_int8, _ensure_int16, @@ -29,7 +26,7 @@ _POSSIBLY_CAST_DTYPES) from .dtypes import ExtensionDtype, DatetimeTZDtype, PeriodDtype from .generic import (ABCDatetimeIndex, ABCPeriodIndex, - ABCSeries, ABCIndexClass) + ABCSeries) from .missing import isnull, notnull from .inference import is_list_like @@ -548,7 +545,7 @@ def coerce_to_dtypes(result, dtypes): if len(result) != len(dtypes): raise AssertionError("_coerce_to_dtypes requires equal len arrays") - from pandas.tseries.timedeltas import _coerce_scalar_to_timedelta_type + from pandas.core.tools.timedeltas import _coerce_scalar_to_timedelta_type def conv(r, dtype): try: @@ -670,7 +667,7 @@ def maybe_convert_objects(values, convert_dates=True, convert_numeric=True, if convert_timedeltas and values.dtype == np.object_: if convert_timedeltas == 'coerce': - from pandas.tseries.timedeltas import to_timedelta + from pandas.core.tools.timedeltas import to_timedelta new_values = to_timedelta(values, coerce=True) # if we are all nans then leave me alone @@ -872,8 +869,8 @@ def maybe_cast_to_datetime(value, dtype, errors='raise'): """ try to cast the array/value to a datetimelike dtype, converting float nan to iNaT """ - from pandas.tseries.timedeltas import to_timedelta - from pandas.tseries.tools import to_datetime + from pandas.core.tools.timedeltas import to_timedelta + from pandas.core.tools.datetimes import to_datetime if dtype is not None: if isinstance(dtype, string_types): @@ -1029,161 +1026,3 @@ def find_common_type(types): return np.object return np.find_common_type(types, []) - - -def to_numeric(arg, errors='raise', downcast=None): - """ - Convert argument to a numeric type. - - Parameters - ---------- - arg : list, tuple, 1-d array, or Series - errors : {'ignore', 'raise', 'coerce'}, default 'raise' - - If 'raise', then invalid parsing will raise an exception - - If 'coerce', then invalid parsing will be set as NaN - - If 'ignore', then invalid parsing will return the input - downcast : {'integer', 'signed', 'unsigned', 'float'} , default None - If not None, and if the data has been successfully cast to a - numerical dtype (or if the data was numeric to begin with), - downcast that resulting data to the smallest numerical dtype - possible according to the following rules: - - - 'integer' or 'signed': smallest signed int dtype (min.: np.int8) - - 'unsigned': smallest unsigned int dtype (min.: np.uint8) - - 'float': smallest float dtype (min.: np.float32) - - As this behaviour is separate from the core conversion to - numeric values, any errors raised during the downcasting - will be surfaced regardless of the value of the 'errors' input. - - In addition, downcasting will only occur if the size - of the resulting data's dtype is strictly larger than - the dtype it is to be cast to, so if none of the dtypes - checked satisfy that specification, no downcasting will be - performed on the data. - - .. versionadded:: 0.19.0 - - Returns - ------- - ret : numeric if parsing succeeded. - Return type depends on input. Series if Series, otherwise ndarray - - Examples - -------- - Take separate series and convert to numeric, coercing when told to - - >>> import pandas as pd - >>> s = pd.Series(['1.0', '2', -3]) - >>> pd.to_numeric(s) - 0 1.0 - 1 2.0 - 2 -3.0 - dtype: float64 - >>> pd.to_numeric(s, downcast='float') - 0 1.0 - 1 2.0 - 2 -3.0 - dtype: float32 - >>> pd.to_numeric(s, downcast='signed') - 0 1 - 1 2 - 2 -3 - dtype: int8 - >>> s = pd.Series(['apple', '1.0', '2', -3]) - >>> pd.to_numeric(s, errors='ignore') - 0 apple - 1 1.0 - 2 2 - 3 -3 - dtype: object - >>> pd.to_numeric(s, errors='coerce') - 0 NaN - 1 1.0 - 2 2.0 - 3 -3.0 - dtype: float64 - """ - if downcast not in (None, 'integer', 'signed', 'unsigned', 'float'): - raise ValueError('invalid downcasting method provided') - - is_series = False - is_index = False - is_scalars = False - - if isinstance(arg, ABCSeries): - is_series = True - values = arg.values - elif isinstance(arg, ABCIndexClass): - is_index = True - values = arg.asi8 - if values is None: - values = arg.values - elif isinstance(arg, (list, tuple)): - values = np.array(arg, dtype='O') - elif is_scalar(arg): - if is_decimal(arg): - return float(arg) - if is_number(arg): - return arg - is_scalars = True - values = np.array([arg], dtype='O') - elif getattr(arg, 'ndim', 1) > 1: - raise TypeError('arg must be a list, tuple, 1-d array, or Series') - else: - values = arg - - try: - if is_numeric_dtype(values): - pass - elif is_datetime_or_timedelta_dtype(values): - values = values.astype(np.int64) - else: - values = _ensure_object(values) - coerce_numeric = False if errors in ('ignore', 'raise') else True - values = lib.maybe_convert_numeric(values, set(), - coerce_numeric=coerce_numeric) - - except Exception: - if errors == 'raise': - raise - - # attempt downcast only if the data has been successfully converted - # to a numerical dtype and if a downcast method has been specified - if downcast is not None and is_numeric_dtype(values): - typecodes = None - - if downcast in ('integer', 'signed'): - typecodes = np.typecodes['Integer'] - elif downcast == 'unsigned' and np.min(values) >= 0: - typecodes = np.typecodes['UnsignedInteger'] - elif downcast == 'float': - typecodes = np.typecodes['Float'] - - # pandas support goes only to np.float32, - # as float dtypes smaller than that are - # extremely rare and not well supported - float_32_char = np.dtype(np.float32).char - float_32_ind = typecodes.index(float_32_char) - typecodes = typecodes[float_32_ind:] - - if typecodes is not None: - # from smallest to largest - for dtype in typecodes: - if np.dtype(dtype).itemsize <= values.dtype.itemsize: - values = maybe_downcast_to_dtype(values, dtype) - - # successful conversion - if values.dtype == dtype: - break - - if is_series: - return pd.Series(values, index=arg.index, name=arg.name) - elif is_index: - # because we want to coerce to numeric if possible, - # do not use _shallow_copy_with_infer - return pd.Index(values, name=arg.name) - elif is_scalars: - return values[0] - else: - return values diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 9b9039455b948..153042d4a09c9 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -82,9 +82,9 @@ from pandas.util.decorators import Appender, Substitution from pandas.util.validators import validate_bool_kwarg -from pandas.tseries.period import PeriodIndex -from pandas.tseries.index import DatetimeIndex -from pandas.tseries.tdi import TimedeltaIndex +from pandas.core.indexes.period import PeriodIndex +from pandas.core.indexes.datetimes import DatetimeIndex +from pandas.core.indexes.timedeltas import TimedeltaIndex import pandas.core.base as base import pandas.core.common as com diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 841df3727e5a6..1555157610609 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -37,8 +37,8 @@ from pandas.core.index import (Index, MultiIndex, _ensure_index, InvalidIndexError) import pandas.core.indexing as indexing -from pandas.tseries.index import DatetimeIndex -from pandas.tseries.period import PeriodIndex, Period +from pandas.core.indexes.datetimes import DatetimeIndex +from pandas.core.indexes.period import PeriodIndex, Period from pandas.core.internals import BlockManager import pandas.core.algorithms as algos import pandas.core.common as com @@ -4363,7 +4363,7 @@ def asfreq(self, freq, method=None, how=None, normalize=False, To learn more about the frequency strings, please see `this link `__. """ - from pandas.tseries.resample import asfreq + from pandas.core.resample import asfreq return asfreq(self, freq, method=method, how=how, normalize=normalize, fill_value=fill_value) @@ -4573,8 +4573,8 @@ def resample(self, rule, how=None, axis=0, fill_method=None, closed=None, 2000-01-01 00:00:00 0 6 12 18 2000-01-01 00:03:00 0 4 8 12 """ - from pandas.tseries.resample import (resample, - _maybe_process_deprecations) + from pandas.core.resample import (resample, + _maybe_process_deprecations) axis = self._get_axis_number(axis) r = resample(self, freq=rule, label=label, closed=closed, axis=axis, kind=kind, loffset=loffset, @@ -5361,7 +5361,7 @@ def truncate(self, before=None, after=None, axis=None, copy=True): # if we have a date index, convert to dates, otherwise # treat like a slice if ax.is_all_dates: - from pandas.tseries.tools import to_datetime + from pandas.core.tools.datetimes import to_datetime before = to_datetime(before) after = to_datetime(after) diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 8f788aed3950d..1f715c685c27e 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -232,7 +232,7 @@ class Grouper(object): def __new__(cls, *args, **kwargs): if kwargs.get('freq') is not None: - from pandas.tseries.resample import TimeGrouper + from pandas.core.resample import TimeGrouper cls = TimeGrouper return super(Grouper, cls).__new__(cls) @@ -1227,7 +1227,7 @@ def resample(self, rule, *args, **kwargs): Provide resampling when using a TimeGrouper Return a new grouper with our resampler appended """ - from pandas.tseries.resample import get_resampler_for_grouping + from pandas.core.resample import get_resampler_for_grouping return get_resampler_for_grouping(self, rule, *args, **kwargs) @Substitution(name='groupby') @@ -3509,7 +3509,7 @@ def _decide_output_index(self, output, labels): def _wrap_applied_output(self, keys, values, not_indexed_same=False): from pandas.core.index import _all_indexes_same - from pandas.core.dtypes.cast import to_numeric + from pandas.core.tools.numeric import to_numeric if len(keys) == 0: return DataFrame(index=keys) diff --git a/pandas/tseries/common.py b/pandas/core/indexes/accessors.py similarity index 97% rename from pandas/tseries/common.py rename to pandas/core/indexes/accessors.py index 2154cfd4b2857..f1fb9a8ad93a7 100644 --- a/pandas/tseries/common.py +++ b/pandas/core/indexes/accessors.py @@ -12,10 +12,10 @@ is_list_like) from pandas.core.base import PandasDelegate, NoNewAttributesMixin -from pandas.tseries.index import DatetimeIndex +from pandas.core.indexes.datetimes import DatetimeIndex from pandas._libs.period import IncompatibleFrequency # noqa -from pandas.tseries.period import PeriodIndex -from pandas.tseries.tdi import TimedeltaIndex +from pandas.core.indexes.period import PeriodIndex +from pandas.core.indexes.timedeltas import TimedeltaIndex from pandas.core.algorithms import take_1d diff --git a/pandas/core/indexes/api.py b/pandas/core/indexes/api.py index d40f6da4c4ee5..d90c681abc03f 100644 --- a/pandas/core/indexes/api.py +++ b/pandas/core/indexes/api.py @@ -7,16 +7,21 @@ from pandas.core.indexes.numeric import (NumericIndex, Float64Index, # noqa Int64Index, UInt64Index) from pandas.core.indexes.range import RangeIndex # noqa +from pandas.core.indexes.timedeltas import TimedeltaIndex +from pandas.core.indexes.period import PeriodIndex +from pandas.core.indexes.datetimes import DatetimeIndex import pandas.core.common as com -import pandas._libs.lib as lib +from pandas._libs import lib +from pandas._libs.tslib import NaT # TODO: there are many places that rely on these private methods existing in # pandas.core.index __all__ = ['Index', 'MultiIndex', 'NumericIndex', 'Float64Index', 'Int64Index', 'CategoricalIndex', 'IntervalIndex', 'RangeIndex', 'UInt64Index', - 'InvalidIndexError', - '_new_Index', + 'InvalidIndexError', 'TimedeltaIndex', + 'PeriodIndex', 'DatetimeIndex', + '_new_Index', 'NaT', '_ensure_index', '_get_na_value', '_get_combined_index', '_get_distinct_indexes', '_union_indexes', '_get_consensus_names', diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 705b7a186dced..dcb9f9a144f39 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -91,7 +91,7 @@ def _new_Index(cls, d): # required for backward compat, because PI can't be instantiated with # ordinals through __new__ GH #13277 if issubclass(cls, ABCPeriodIndex): - from pandas.tseries.period import _new_PeriodIndex + from pandas.core.indexes.period import _new_PeriodIndex return _new_PeriodIndex(cls, **d) return cls.__new__(cls, **d) @@ -184,7 +184,7 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, if (is_datetime64_any_dtype(data) or (dtype is not None and is_datetime64_any_dtype(dtype)) or 'tz' in kwargs): - from pandas.tseries.index import DatetimeIndex + from pandas.core.indexes.datetimes import DatetimeIndex result = DatetimeIndex(data, copy=copy, name=name, dtype=dtype, **kwargs) if dtype is not None and is_dtype_equal(_o_dtype, dtype): @@ -194,7 +194,7 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, elif (is_timedelta64_dtype(data) or (dtype is not None and is_timedelta64_dtype(dtype))): - from pandas.tseries.tdi import TimedeltaIndex + from pandas.core.indexes.timedeltas import TimedeltaIndex result = TimedeltaIndex(data, copy=copy, name=name, **kwargs) if dtype is not None and _o_dtype == dtype: return Index(result.to_pytimedelta(), dtype=_o_dtype) @@ -250,8 +250,8 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, raise # maybe coerce to a sub-class - from pandas.tseries.period import (PeriodIndex, - IncompatibleFrequency) + from pandas.core.indexes.period import ( + PeriodIndex, IncompatibleFrequency) if isinstance(data, PeriodIndex): return PeriodIndex(data, copy=copy, name=name, **kwargs) if is_signed_integer_dtype(data.dtype): @@ -299,7 +299,8 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, if (lib.is_datetime_with_singletz_array(subarr) or 'tz' in kwargs): # only when subarr has the same tz - from pandas.tseries.index import DatetimeIndex + from pandas.core.indexes.datetimes import ( + DatetimeIndex) try: return DatetimeIndex(subarr, copy=copy, name=name, **kwargs) @@ -307,7 +308,8 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, pass elif inferred.startswith('timedelta'): - from pandas.tseries.tdi import TimedeltaIndex + from pandas.core.indexes.timedeltas import ( + TimedeltaIndex) return TimedeltaIndex(subarr, copy=copy, name=name, **kwargs) elif inferred == 'period': @@ -1009,7 +1011,7 @@ def to_datetime(self, dayfirst=False): warnings.warn("to_datetime is deprecated. Use pd.to_datetime(...)", FutureWarning, stacklevel=2) - from pandas.tseries.index import DatetimeIndex + from pandas.core.indexes.datetimes import DatetimeIndex if self.inferred_type == 'string': from dateutil.parser import parse parser = lambda x: parse(x, dayfirst=dayfirst) @@ -2664,7 +2666,7 @@ def get_indexer_for(self, target, **kwargs): def _maybe_promote(self, other): # A hack, but it works - from pandas.tseries.index import DatetimeIndex + from pandas.core.indexes.datetimes import DatetimeIndex if self.inferred_type == 'date' and isinstance(other, DatetimeIndex): return DatetimeIndex(self), other elif self.inferred_type == 'boolean': diff --git a/pandas/tseries/base.py b/pandas/core/indexes/datetimelike.py similarity index 99% rename from pandas/tseries/base.py rename to pandas/core/indexes/datetimelike.py index 3daa88fe396f6..387209ceb038f 100644 --- a/pandas/tseries/base.py +++ b/pandas/core/indexes/datetimelike.py @@ -27,8 +27,7 @@ Timedelta, Timestamp, iNaT, NaT) from pandas._libs.period import Period -from pandas.core.index import Index -from pandas.core.indexes.base import _index_shared_docs +from pandas.core.indexes.base import Index, _index_shared_docs from pandas.util.decorators import Appender, cache_readonly import pandas.core.dtypes.concat as _concat import pandas.tseries.frequencies as frequencies @@ -639,7 +638,7 @@ def _add_datetimelike_methods(cls): def __add__(self, other): from pandas.core.index import Index - from pandas.tseries.tdi import TimedeltaIndex + from pandas.core.indexes.timedeltas import TimedeltaIndex from pandas.tseries.offsets import DateOffset if isinstance(other, TimedeltaIndex): return self._add_delta(other) @@ -666,8 +665,8 @@ def __add__(self, other): def __sub__(self, other): from pandas.core.index import Index - from pandas.tseries.index import DatetimeIndex - from pandas.tseries.tdi import TimedeltaIndex + from pandas.core.indexes.datetimes import DatetimeIndex + from pandas.core.indexes.timedeltas import TimedeltaIndex from pandas.tseries.offsets import DateOffset if isinstance(other, TimedeltaIndex): return self._add_delta(-other) diff --git a/pandas/tseries/index.py b/pandas/core/indexes/datetimes.py similarity index 99% rename from pandas/tseries/index.py rename to pandas/core/indexes/datetimes.py index d9aa72fe065ab..b92368ec1be7b 100644 --- a/pandas/tseries/index.py +++ b/pandas/core/indexes/datetimes.py @@ -29,21 +29,23 @@ from pandas.errors import PerformanceWarning from pandas.core.common import _values_from_object, _maybe_box -from pandas.core.index import Index, Int64Index, Float64Index -from pandas.core.indexes.base import _index_shared_docs +from pandas.core.indexes.base import Index, _index_shared_docs +from pandas.core.indexes.numeric import Int64Index, Float64Index import pandas.compat as compat from pandas.tseries.frequencies import ( to_offset, get_period_alias, Resolution) -from pandas.tseries.base import DatelikeOps, TimelikeOps, DatetimeIndexOpsMixin +from pandas.core.indexes.datetimelike import ( + DatelikeOps, TimelikeOps, DatetimeIndexOpsMixin) from pandas.tseries.offsets import DateOffset, generate_range, Tick, CDay -from pandas.tseries.tools import parse_time_string, normalize_date, to_time -from pandas.tseries.timedeltas import to_timedelta +from pandas.core.tools.datetimes import ( + parse_time_string, normalize_date, to_time) +from pandas.core.tools.timedeltas import to_timedelta from pandas.util.decorators import (Appender, cache_readonly, deprecate_kwarg, Substitution) import pandas.core.common as com import pandas.tseries.offsets as offsets -import pandas.tseries.tools as tools +import pandas.core.tools.datetimes as tools from pandas._libs import (lib, index as libindex, tslib as libts, algos as libalgos, join as libjoin, @@ -927,7 +929,7 @@ def to_period(self, freq=None): """ Cast to PeriodIndex at a particular frequency """ - from pandas.tseries.period import PeriodIndex + from pandas.core.indexes.period import PeriodIndex if freq is None: freq = self.freqstr or self.inferred_freq diff --git a/pandas/tseries/period.py b/pandas/core/indexes/period.py similarity index 98% rename from pandas/tseries/period.py rename to pandas/core/indexes/period.py index b19e086b818f0..378661a49e20d 100644 --- a/pandas/tseries/period.py +++ b/pandas/core/indexes/period.py @@ -24,10 +24,10 @@ import pandas.tseries.frequencies as frequencies from pandas.tseries.frequencies import get_freq_code as _gfc -from pandas.tseries.index import DatetimeIndex, Int64Index, Index -from pandas.tseries.tdi import TimedeltaIndex -from pandas.tseries.base import DatelikeOps, DatetimeIndexOpsMixin -from pandas.tseries.tools import parse_time_string +from pandas.core.indexes.datetimes import DatetimeIndex, Int64Index, Index +from pandas.core.indexes.timedeltas import TimedeltaIndex +from pandas.core.indexes.datetimelike import DatelikeOps, DatetimeIndexOpsMixin +from pandas.core.tools.datetimes import parse_time_string import pandas.tseries.offsets as offsets from pandas._libs.lib import infer_dtype @@ -528,17 +528,17 @@ def asfreq(self, freq=None, how='E'): -------- >>> pidx = pd.period_range('2010-01-01', '2015-01-01', freq='A') >>> pidx - + [2010, ..., 2015] Length: 6, Freq: A-DEC >>> pidx.asfreq('M') - + [2010-12, ..., 2015-12] Length: 6, Freq: M >>> pidx.asfreq('M', how='S') - + [2010-01, ..., 2015-01] Length: 6, Freq: M """ @@ -1154,7 +1154,7 @@ def pnow(freq=None): # deprecation, xref #13790 import warnings - warnings.warn("pd.pnow() and pandas.tseries.period.pnow() " + warnings.warn("pd.pnow() and pandas.core.indexes.period.pnow() " "are deprecated. Please use Period.now()", FutureWarning, stacklevel=2) return Period.now(freq=freq) diff --git a/pandas/tseries/tdi.py b/pandas/core/indexes/timedeltas.py similarity index 99% rename from pandas/tseries/tdi.py rename to pandas/core/indexes/timedeltas.py index 7768b4a340775..1081787b2c0b0 100644 --- a/pandas/tseries/tdi.py +++ b/pandas/core/indexes/timedeltas.py @@ -17,7 +17,8 @@ from pandas.core.dtypes.generic import ABCSeries from pandas.core.common import _maybe_box, _values_from_object, is_bool_indexer -from pandas.core.index import Index, Int64Index +from pandas.core.indexes.base import Index +from pandas.core.indexes.numeric import Int64Index import pandas.compat as compat from pandas.compat import u from pandas.tseries.frequencies import to_offset @@ -27,9 +28,9 @@ import pandas.core.common as com import pandas.core.dtypes.concat as _concat from pandas.util.decorators import Appender, Substitution, deprecate_kwarg -from pandas.tseries.base import TimelikeOps, DatetimeIndexOpsMixin -from pandas.tseries.timedeltas import (to_timedelta, - _coerce_scalar_to_timedelta_type) +from pandas.core.indexes.datetimelike import TimelikeOps, DatetimeIndexOpsMixin +from pandas.core.tools.timedeltas import ( + to_timedelta, _coerce_scalar_to_timedelta_type) from pandas.tseries.offsets import Tick, DateOffset from pandas._libs import (lib, index as libindex, tslib as libts, join as libjoin, Timedelta, NaT, iNaT) diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 5a87574455a63..f265f5f438280 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -54,7 +54,7 @@ from pandas.core.index import Index, MultiIndex, _ensure_index from pandas.core.indexing import maybe_convert_indices, length_of_indexer from pandas.core.categorical import Categorical, maybe_to_categorical -from pandas.tseries.index import DatetimeIndex +from pandas.core.indexes.datetimes import DatetimeIndex from pandas.io.formats.printing import pprint_thing import pandas.core.missing as missing diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 50815498f40df..41a17a0957cbf 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -442,7 +442,7 @@ def _validate(self, lvalues, rvalues, name): def _convert_to_array(self, values, name=None, other=None): """converts values to ndarray""" - from pandas.tseries.timedeltas import to_timedelta + from pandas.core.tools.timedeltas import to_timedelta ovalues = values supplied_dtype = None @@ -508,7 +508,7 @@ def _convert_to_array(self, values, name=None, other=None): return values def _convert_for_datetime(self, lvalues, rvalues): - from pandas.tseries.timedeltas import to_timedelta + from pandas.core.tools.timedeltas import to_timedelta mask = isnull(lvalues) | isnull(rvalues) diff --git a/pandas/tseries/resample.py b/pandas/core/resample.py old mode 100755 new mode 100644 similarity index 99% rename from pandas/tseries/resample.py rename to pandas/core/resample.py index 2856b54ad9a8c..203ae0cb17e02 --- a/pandas/tseries/resample.py +++ b/pandas/core/resample.py @@ -10,10 +10,10 @@ SeriesGroupBy, groupby, PanelGroupBy) from pandas.tseries.frequencies import to_offset, is_subperiod, is_superperiod -from pandas.tseries.index import DatetimeIndex, date_range -from pandas.tseries.tdi import TimedeltaIndex +from pandas.core.indexes.datetimes import DatetimeIndex, date_range +from pandas.core.indexes.timedeltas import TimedeltaIndex from pandas.tseries.offsets import DateOffset, Tick, Day, _delta_to_nanoseconds -from pandas.tseries.period import PeriodIndex, period_range +from pandas.core.indexes.period import PeriodIndex, period_range import pandas.core.common as com import pandas.core.algorithms as algos diff --git a/pandas/core/series.py b/pandas/core/series.py index 69a2b35d88460..8a2351527856d 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -52,11 +52,11 @@ from pandas.core.internals import SingleBlockManager from pandas.core.categorical import Categorical, CategoricalAccessor import pandas.core.strings as strings -from pandas.tseries.common import (maybe_to_datetimelike, - CombinedDatetimelikeProperties) -from pandas.tseries.index import DatetimeIndex -from pandas.tseries.tdi import TimedeltaIndex -from pandas.tseries.period import PeriodIndex +from pandas.core.indexes.accessors import ( + maybe_to_datetimelike, CombinedDatetimelikeProperties) +from pandas.core.indexes.datetimes import DatetimeIndex +from pandas.core.indexes.timedeltas import TimedeltaIndex +from pandas.core.indexes.period import PeriodIndex from pandas import compat from pandas.util.terminal import get_terminal_size from pandas.compat import zip, u, OrderedDict, StringIO diff --git a/pandas/core/tools/__init__.py b/pandas/core/tools/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/tseries/tools.py b/pandas/core/tools/datetimes.py similarity index 99% rename from pandas/tseries/tools.py rename to pandas/core/tools/datetimes.py index db7aa5974e562..9c02a6212c412 100644 --- a/pandas/tseries/tools.py +++ b/pandas/core/tools/datetimes.py @@ -336,7 +336,7 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False, 2 1960-01-04 """ - from pandas.tseries.index import DatetimeIndex + from pandas.core.indexes.datetimes import DatetimeIndex tz = 'utc' if utc else None diff --git a/pandas/core/tools/numeric.py b/pandas/core/tools/numeric.py new file mode 100644 index 0000000000000..eda88a2f7e474 --- /dev/null +++ b/pandas/core/tools/numeric.py @@ -0,0 +1,170 @@ +import numpy as np +import pandas as pd +from pandas.core.dtypes.common import ( + is_scalar, + is_numeric_dtype, + is_decimal, + is_datetime_or_timedelta_dtype, + is_number, + _ensure_object) +from pandas.core.dtypes.generic import ABCSeries, ABCIndexClass +from pandas.core.dtypes.cast import maybe_downcast_to_dtype +from pandas._libs import lib + + +def to_numeric(arg, errors='raise', downcast=None): + """ + Convert argument to a numeric type. + + Parameters + ---------- + arg : list, tuple, 1-d array, or Series + errors : {'ignore', 'raise', 'coerce'}, default 'raise' + - If 'raise', then invalid parsing will raise an exception + - If 'coerce', then invalid parsing will be set as NaN + - If 'ignore', then invalid parsing will return the input + downcast : {'integer', 'signed', 'unsigned', 'float'} , default None + If not None, and if the data has been successfully cast to a + numerical dtype (or if the data was numeric to begin with), + downcast that resulting data to the smallest numerical dtype + possible according to the following rules: + + - 'integer' or 'signed': smallest signed int dtype (min.: np.int8) + - 'unsigned': smallest unsigned int dtype (min.: np.uint8) + - 'float': smallest float dtype (min.: np.float32) + + As this behaviour is separate from the core conversion to + numeric values, any errors raised during the downcasting + will be surfaced regardless of the value of the 'errors' input. + + In addition, downcasting will only occur if the size + of the resulting data's dtype is strictly larger than + the dtype it is to be cast to, so if none of the dtypes + checked satisfy that specification, no downcasting will be + performed on the data. + + .. versionadded:: 0.19.0 + + Returns + ------- + ret : numeric if parsing succeeded. + Return type depends on input. Series if Series, otherwise ndarray + + Examples + -------- + Take separate series and convert to numeric, coercing when told to + + >>> import pandas as pd + >>> s = pd.Series(['1.0', '2', -3]) + >>> pd.to_numeric(s) + 0 1.0 + 1 2.0 + 2 -3.0 + dtype: float64 + >>> pd.to_numeric(s, downcast='float') + 0 1.0 + 1 2.0 + 2 -3.0 + dtype: float32 + >>> pd.to_numeric(s, downcast='signed') + 0 1 + 1 2 + 2 -3 + dtype: int8 + >>> s = pd.Series(['apple', '1.0', '2', -3]) + >>> pd.to_numeric(s, errors='ignore') + 0 apple + 1 1.0 + 2 2 + 3 -3 + dtype: object + >>> pd.to_numeric(s, errors='coerce') + 0 NaN + 1 1.0 + 2 2.0 + 3 -3.0 + dtype: float64 + """ + if downcast not in (None, 'integer', 'signed', 'unsigned', 'float'): + raise ValueError('invalid downcasting method provided') + + is_series = False + is_index = False + is_scalars = False + + if isinstance(arg, ABCSeries): + is_series = True + values = arg.values + elif isinstance(arg, ABCIndexClass): + is_index = True + values = arg.asi8 + if values is None: + values = arg.values + elif isinstance(arg, (list, tuple)): + values = np.array(arg, dtype='O') + elif is_scalar(arg): + if is_decimal(arg): + return float(arg) + if is_number(arg): + return arg + is_scalars = True + values = np.array([arg], dtype='O') + elif getattr(arg, 'ndim', 1) > 1: + raise TypeError('arg must be a list, tuple, 1-d array, or Series') + else: + values = arg + + try: + if is_numeric_dtype(values): + pass + elif is_datetime_or_timedelta_dtype(values): + values = values.astype(np.int64) + else: + values = _ensure_object(values) + coerce_numeric = False if errors in ('ignore', 'raise') else True + values = lib.maybe_convert_numeric(values, set(), + coerce_numeric=coerce_numeric) + + except Exception: + if errors == 'raise': + raise + + # attempt downcast only if the data has been successfully converted + # to a numerical dtype and if a downcast method has been specified + if downcast is not None and is_numeric_dtype(values): + typecodes = None + + if downcast in ('integer', 'signed'): + typecodes = np.typecodes['Integer'] + elif downcast == 'unsigned' and np.min(values) >= 0: + typecodes = np.typecodes['UnsignedInteger'] + elif downcast == 'float': + typecodes = np.typecodes['Float'] + + # pandas support goes only to np.float32, + # as float dtypes smaller than that are + # extremely rare and not well supported + float_32_char = np.dtype(np.float32).char + float_32_ind = typecodes.index(float_32_char) + typecodes = typecodes[float_32_ind:] + + if typecodes is not None: + # from smallest to largest + for dtype in typecodes: + if np.dtype(dtype).itemsize <= values.dtype.itemsize: + values = maybe_downcast_to_dtype(values, dtype) + + # successful conversion + if values.dtype == dtype: + break + + if is_series: + return pd.Series(values, index=arg.index, name=arg.name) + elif is_index: + # because we want to coerce to numeric if possible, + # do not use _shallow_copy_with_infer + return pd.Index(values, name=arg.name) + elif is_scalars: + return values[0] + else: + return values diff --git a/pandas/tseries/timedeltas.py b/pandas/core/tools/timedeltas.py similarity index 100% rename from pandas/tseries/timedeltas.py rename to pandas/core/tools/timedeltas.py diff --git a/pandas/io/excel.py b/pandas/io/excel.py index 637635a64d4d0..fbb10ebdfc56d 100644 --- a/pandas/io/excel.py +++ b/pandas/io/excel.py @@ -19,7 +19,7 @@ from pandas.errors import EmptyDataError from pandas.io.common import (_is_url, _urlopen, _validate_header_arg, get_filepath_or_buffer, _NA_VALUES) -from pandas.tseries.period import Period +from pandas.core.indexes.period import Period from pandas.io.json import libjson from pandas.compat import (map, zip, reduce, range, lrange, u, add_metaclass, string_types, OrderedDict) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 6fbcbe7d645e1..d618fab08309f 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -38,8 +38,8 @@ import pandas._libs.lib as lib from pandas._libs.tslib import (iNaT, Timestamp, Timedelta, format_array_from_datetime) -from pandas.tseries.index import DatetimeIndex -from pandas.tseries.period import PeriodIndex +from pandas.core.indexes.datetimes import DatetimeIndex +from pandas.core.indexes.period import PeriodIndex import pandas as pd import numpy as np @@ -2314,7 +2314,7 @@ def _format_strings(self): class PeriodArrayFormatter(IntArrayFormatter): def _format_strings(self): - from pandas.tseries.period import IncompatibleFrequency + from pandas.core.indexes.period import IncompatibleFrequency try: values = PeriodIndex(self.values).to_native_types() except IncompatibleFrequency: diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index f2449e3064867..79595818b7387 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -35,7 +35,7 @@ _get_handle, UnicodeReader, UTF8Recoder, BaseIterator, _NA_VALUES, _infer_compression) -from pandas.tseries import tools +from pandas.core.tools import datetimes as tools from pandas.util.decorators import Appender diff --git a/pandas/io/sql.py b/pandas/io/sql.py index de47a8ad5401f..ee992c6dd3439 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -22,7 +22,7 @@ string_types, text_type) from pandas.core.api import DataFrame, Series from pandas.core.base import PandasObject -from pandas.tseries.tools import to_datetime +from pandas.core.tools.datetimes import to_datetime from contextlib import contextmanager diff --git a/pandas/plotting/_converter.py b/pandas/plotting/_converter.py index 9621ee3d0cad4..97295dfa7baf1 100644 --- a/pandas/plotting/_converter.py +++ b/pandas/plotting/_converter.py @@ -26,11 +26,11 @@ from pandas.core.index import Index from pandas.core.series import Series -from pandas.tseries.index import date_range -import pandas.tseries.tools as tools +from pandas.core.indexes.datetimes import date_range +import pandas.core.tools.datetimes as tools import pandas.tseries.frequencies as frequencies from pandas.tseries.frequencies import FreqGroup -from pandas.tseries.period import Period, PeriodIndex +from pandas.core.indexes.period import Period, PeriodIndex from pandas.plotting._compat import _mpl_le_2_0_0 diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index 934c05ba5f130..c3476d1443fc3 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -21,7 +21,7 @@ from pandas.core.generic import _shared_docs, _shared_doc_kwargs from pandas.core.index import Index, MultiIndex from pandas.core.series import Series, remove_na -from pandas.tseries.period import PeriodIndex +from pandas.core.indexes.period import PeriodIndex from pandas.compat import range, lrange, map, zip, string_types import pandas.compat as compat from pandas.io.formats.printing import pprint_thing diff --git a/pandas/plotting/_timeseries.py b/pandas/plotting/_timeseries.py index f8c7c1ee9ee10..3d04973ed0009 100644 --- a/pandas/plotting/_timeseries.py +++ b/pandas/plotting/_timeseries.py @@ -3,12 +3,12 @@ import numpy as np from matplotlib import pylab -from pandas.tseries.period import Period +from pandas.core.indexes.period import Period from pandas.tseries.offsets import DateOffset import pandas.tseries.frequencies as frequencies -from pandas.tseries.index import DatetimeIndex -from pandas.tseries.period import PeriodIndex -from pandas.tseries.tdi import TimedeltaIndex +from pandas.core.indexes.datetimes import DatetimeIndex +from pandas.core.indexes.period import PeriodIndex +from pandas.core.indexes.timedeltas import TimedeltaIndex from pandas.io.formats.printing import pprint_thing import pandas.compat as compat diff --git a/pandas/tests/dtypes/test_cast.py b/pandas/tests/dtypes/test_cast.py index e59784d233367..f3fdc54d4a3cc 100644 --- a/pandas/tests/dtypes/test_cast.py +++ b/pandas/tests/dtypes/test_cast.py @@ -6,13 +6,10 @@ """ import pytest -import decimal from datetime import datetime, timedelta, date import numpy as np -import pandas as pd -from pandas import (Timedelta, Timestamp, DatetimeIndex, - to_numeric, _np_version_under1p9) +from pandas import Timedelta, Timestamp, DatetimeIndex from pandas.core.dtypes.cast import ( maybe_downcast_to_dtype, @@ -28,8 +25,6 @@ PeriodDtype) from pandas.util import testing as tm -from numpy import iinfo - class TestMaybeDowncast(tm.TestCase): @@ -327,365 +322,3 @@ def test_period_dtype(self): np.dtype('datetime64[ns]'), np.object, np.int64]: self.assertEqual(find_common_type([dtype, dtype2]), np.object) self.assertEqual(find_common_type([dtype2, dtype]), np.object) - - -class TestToNumeric(tm.TestCase): - - def test_series(self): - s = pd.Series(['1', '-3.14', '7']) - res = to_numeric(s) - expected = pd.Series([1, -3.14, 7]) - tm.assert_series_equal(res, expected) - - s = pd.Series(['1', '-3.14', 7]) - res = to_numeric(s) - tm.assert_series_equal(res, expected) - - def test_series_numeric(self): - s = pd.Series([1, 3, 4, 5], index=list('ABCD'), name='XXX') - res = to_numeric(s) - tm.assert_series_equal(res, s) - - s = pd.Series([1., 3., 4., 5.], index=list('ABCD'), name='XXX') - res = to_numeric(s) - tm.assert_series_equal(res, s) - - # bool is regarded as numeric - s = pd.Series([True, False, True, True], - index=list('ABCD'), name='XXX') - res = to_numeric(s) - tm.assert_series_equal(res, s) - - def test_error(self): - s = pd.Series([1, -3.14, 'apple']) - msg = 'Unable to parse string "apple" at position 2' - with tm.assertRaisesRegexp(ValueError, msg): - to_numeric(s, errors='raise') - - res = to_numeric(s, errors='ignore') - expected = pd.Series([1, -3.14, 'apple']) - tm.assert_series_equal(res, expected) - - res = to_numeric(s, errors='coerce') - expected = pd.Series([1, -3.14, np.nan]) - tm.assert_series_equal(res, expected) - - s = pd.Series(['orange', 1, -3.14, 'apple']) - msg = 'Unable to parse string "orange" at position 0' - with tm.assertRaisesRegexp(ValueError, msg): - to_numeric(s, errors='raise') - - def test_error_seen_bool(self): - s = pd.Series([True, False, 'apple']) - msg = 'Unable to parse string "apple" at position 2' - with tm.assertRaisesRegexp(ValueError, msg): - to_numeric(s, errors='raise') - - res = to_numeric(s, errors='ignore') - expected = pd.Series([True, False, 'apple']) - tm.assert_series_equal(res, expected) - - # coerces to float - res = to_numeric(s, errors='coerce') - expected = pd.Series([1., 0., np.nan]) - tm.assert_series_equal(res, expected) - - def test_list(self): - s = ['1', '-3.14', '7'] - res = to_numeric(s) - expected = np.array([1, -3.14, 7]) - tm.assert_numpy_array_equal(res, expected) - - def test_list_numeric(self): - s = [1, 3, 4, 5] - res = to_numeric(s) - tm.assert_numpy_array_equal(res, np.array(s, dtype=np.int64)) - - s = [1., 3., 4., 5.] - res = to_numeric(s) - tm.assert_numpy_array_equal(res, np.array(s)) - - # bool is regarded as numeric - s = [True, False, True, True] - res = to_numeric(s) - tm.assert_numpy_array_equal(res, np.array(s)) - - def test_numeric(self): - s = pd.Series([1, -3.14, 7], dtype='O') - res = to_numeric(s) - expected = pd.Series([1, -3.14, 7]) - tm.assert_series_equal(res, expected) - - s = pd.Series([1, -3.14, 7]) - res = to_numeric(s) - tm.assert_series_equal(res, expected) - - # GH 14827 - df = pd.DataFrame(dict( - a=[1.2, decimal.Decimal(3.14), decimal.Decimal("infinity"), '0.1'], - b=[1.0, 2.0, 3.0, 4.0], - )) - expected = pd.DataFrame(dict( - a=[1.2, 3.14, np.inf, 0.1], - b=[1.0, 2.0, 3.0, 4.0], - )) - - # Test to_numeric over one column - df_copy = df.copy() - df_copy['a'] = df_copy['a'].apply(to_numeric) - tm.assert_frame_equal(df_copy, expected) - - # Test to_numeric over multiple columns - df_copy = df.copy() - df_copy[['a', 'b']] = df_copy[['a', 'b']].apply(to_numeric) - tm.assert_frame_equal(df_copy, expected) - - def test_numeric_lists_and_arrays(self): - # Test to_numeric with embedded lists and arrays - df = pd.DataFrame(dict( - a=[[decimal.Decimal(3.14), 1.0], decimal.Decimal(1.6), 0.1] - )) - df['a'] = df['a'].apply(to_numeric) - expected = pd.DataFrame(dict( - a=[[3.14, 1.0], 1.6, 0.1], - )) - tm.assert_frame_equal(df, expected) - - df = pd.DataFrame(dict( - a=[np.array([decimal.Decimal(3.14), 1.0]), 0.1] - )) - df['a'] = df['a'].apply(to_numeric) - expected = pd.DataFrame(dict( - a=[[3.14, 1.0], 0.1], - )) - tm.assert_frame_equal(df, expected) - - def test_all_nan(self): - s = pd.Series(['a', 'b', 'c']) - res = to_numeric(s, errors='coerce') - expected = pd.Series([np.nan, np.nan, np.nan]) - tm.assert_series_equal(res, expected) - - def test_type_check(self): - # GH 11776 - df = pd.DataFrame({'a': [1, -3.14, 7], 'b': ['4', '5', '6']}) - with tm.assertRaisesRegexp(TypeError, "1-d array"): - to_numeric(df) - for errors in ['ignore', 'raise', 'coerce']: - with tm.assertRaisesRegexp(TypeError, "1-d array"): - to_numeric(df, errors=errors) - - def test_scalar(self): - self.assertEqual(pd.to_numeric(1), 1) - self.assertEqual(pd.to_numeric(1.1), 1.1) - - self.assertEqual(pd.to_numeric('1'), 1) - self.assertEqual(pd.to_numeric('1.1'), 1.1) - - with tm.assertRaises(ValueError): - to_numeric('XX', errors='raise') - - self.assertEqual(to_numeric('XX', errors='ignore'), 'XX') - self.assertTrue(np.isnan(to_numeric('XX', errors='coerce'))) - - def test_numeric_dtypes(self): - idx = pd.Index([1, 2, 3], name='xxx') - res = pd.to_numeric(idx) - tm.assert_index_equal(res, idx) - - res = pd.to_numeric(pd.Series(idx, name='xxx')) - tm.assert_series_equal(res, pd.Series(idx, name='xxx')) - - res = pd.to_numeric(idx.values) - tm.assert_numpy_array_equal(res, idx.values) - - idx = pd.Index([1., np.nan, 3., np.nan], name='xxx') - res = pd.to_numeric(idx) - tm.assert_index_equal(res, idx) - - res = pd.to_numeric(pd.Series(idx, name='xxx')) - tm.assert_series_equal(res, pd.Series(idx, name='xxx')) - - res = pd.to_numeric(idx.values) - tm.assert_numpy_array_equal(res, idx.values) - - def test_str(self): - idx = pd.Index(['1', '2', '3'], name='xxx') - exp = np.array([1, 2, 3], dtype='int64') - res = pd.to_numeric(idx) - tm.assert_index_equal(res, pd.Index(exp, name='xxx')) - - res = pd.to_numeric(pd.Series(idx, name='xxx')) - tm.assert_series_equal(res, pd.Series(exp, name='xxx')) - - res = pd.to_numeric(idx.values) - tm.assert_numpy_array_equal(res, exp) - - idx = pd.Index(['1.5', '2.7', '3.4'], name='xxx') - exp = np.array([1.5, 2.7, 3.4]) - res = pd.to_numeric(idx) - tm.assert_index_equal(res, pd.Index(exp, name='xxx')) - - res = pd.to_numeric(pd.Series(idx, name='xxx')) - tm.assert_series_equal(res, pd.Series(exp, name='xxx')) - - res = pd.to_numeric(idx.values) - tm.assert_numpy_array_equal(res, exp) - - def test_datetimelike(self): - for tz in [None, 'US/Eastern', 'Asia/Tokyo']: - idx = pd.date_range('20130101', periods=3, tz=tz, name='xxx') - res = pd.to_numeric(idx) - tm.assert_index_equal(res, pd.Index(idx.asi8, name='xxx')) - - res = pd.to_numeric(pd.Series(idx, name='xxx')) - tm.assert_series_equal(res, pd.Series(idx.asi8, name='xxx')) - - res = pd.to_numeric(idx.values) - tm.assert_numpy_array_equal(res, idx.asi8) - - def test_timedelta(self): - idx = pd.timedelta_range('1 days', periods=3, freq='D', name='xxx') - res = pd.to_numeric(idx) - tm.assert_index_equal(res, pd.Index(idx.asi8, name='xxx')) - - res = pd.to_numeric(pd.Series(idx, name='xxx')) - tm.assert_series_equal(res, pd.Series(idx.asi8, name='xxx')) - - res = pd.to_numeric(idx.values) - tm.assert_numpy_array_equal(res, idx.asi8) - - def test_period(self): - idx = pd.period_range('2011-01', periods=3, freq='M', name='xxx') - res = pd.to_numeric(idx) - tm.assert_index_equal(res, pd.Index(idx.asi8, name='xxx')) - - # ToDo: enable when we can support native PeriodDtype - # res = pd.to_numeric(pd.Series(idx, name='xxx')) - # tm.assert_series_equal(res, pd.Series(idx.asi8, name='xxx')) - - def test_non_hashable(self): - # Test for Bug #13324 - s = pd.Series([[10.0, 2], 1.0, 'apple']) - res = pd.to_numeric(s, errors='coerce') - tm.assert_series_equal(res, pd.Series([np.nan, 1.0, np.nan])) - - res = pd.to_numeric(s, errors='ignore') - tm.assert_series_equal(res, pd.Series([[10.0, 2], 1.0, 'apple'])) - - with self.assertRaisesRegexp(TypeError, "Invalid object type"): - pd.to_numeric(s) - - def test_downcast(self): - # see gh-13352 - mixed_data = ['1', 2, 3] - int_data = [1, 2, 3] - date_data = np.array(['1970-01-02', '1970-01-03', - '1970-01-04'], dtype='datetime64[D]') - - invalid_downcast = 'unsigned-integer' - msg = 'invalid downcasting method provided' - - smallest_int_dtype = np.dtype(np.typecodes['Integer'][0]) - smallest_uint_dtype = np.dtype(np.typecodes['UnsignedInteger'][0]) - - # support below np.float32 is rare and far between - float_32_char = np.dtype(np.float32).char - smallest_float_dtype = float_32_char - - for data in (mixed_data, int_data, date_data): - with self.assertRaisesRegexp(ValueError, msg): - pd.to_numeric(data, downcast=invalid_downcast) - - expected = np.array([1, 2, 3], dtype=np.int64) - - res = pd.to_numeric(data) - tm.assert_numpy_array_equal(res, expected) - - res = pd.to_numeric(data, downcast=None) - tm.assert_numpy_array_equal(res, expected) - - expected = np.array([1, 2, 3], dtype=smallest_int_dtype) - - for signed_downcast in ('integer', 'signed'): - res = pd.to_numeric(data, downcast=signed_downcast) - tm.assert_numpy_array_equal(res, expected) - - expected = np.array([1, 2, 3], dtype=smallest_uint_dtype) - res = pd.to_numeric(data, downcast='unsigned') - tm.assert_numpy_array_equal(res, expected) - - expected = np.array([1, 2, 3], dtype=smallest_float_dtype) - res = pd.to_numeric(data, downcast='float') - tm.assert_numpy_array_equal(res, expected) - - # if we can't successfully cast the given - # data to a numeric dtype, do not bother - # with the downcast parameter - data = ['foo', 2, 3] - expected = np.array(data, dtype=object) - res = pd.to_numeric(data, errors='ignore', - downcast='unsigned') - tm.assert_numpy_array_equal(res, expected) - - # cannot cast to an unsigned integer because - # we have a negative number - data = ['-1', 2, 3] - expected = np.array([-1, 2, 3], dtype=np.int64) - res = pd.to_numeric(data, downcast='unsigned') - tm.assert_numpy_array_equal(res, expected) - - # cannot cast to an integer (signed or unsigned) - # because we have a float number - data = (['1.1', 2, 3], - [10000.0, 20000, 3000, 40000.36, 50000, 50000.00]) - expected = (np.array([1.1, 2, 3], dtype=np.float64), - np.array([10000.0, 20000, 3000, - 40000.36, 50000, 50000.00], dtype=np.float64)) - - for _data, _expected in zip(data, expected): - for downcast in ('integer', 'signed', 'unsigned'): - res = pd.to_numeric(_data, downcast=downcast) - tm.assert_numpy_array_equal(res, _expected) - - # the smallest integer dtype need not be np.(u)int8 - data = ['256', 257, 258] - - for downcast, expected_dtype in zip( - ['integer', 'signed', 'unsigned'], - [np.int16, np.int16, np.uint16]): - expected = np.array([256, 257, 258], dtype=expected_dtype) - res = pd.to_numeric(data, downcast=downcast) - tm.assert_numpy_array_equal(res, expected) - - def test_downcast_limits(self): - # Test the limits of each downcast. Bug: #14401. - # Check to make sure numpy is new enough to run this test. - if _np_version_under1p9: - pytest.skip("Numpy version is under 1.9") - - i = 'integer' - u = 'unsigned' - dtype_downcast_min_max = [ - ('int8', i, [iinfo(np.int8).min, iinfo(np.int8).max]), - ('int16', i, [iinfo(np.int16).min, iinfo(np.int16).max]), - ('int32', i, [iinfo(np.int32).min, iinfo(np.int32).max]), - ('int64', i, [iinfo(np.int64).min, iinfo(np.int64).max]), - ('uint8', u, [iinfo(np.uint8).min, iinfo(np.uint8).max]), - ('uint16', u, [iinfo(np.uint16).min, iinfo(np.uint16).max]), - ('uint32', u, [iinfo(np.uint32).min, iinfo(np.uint32).max]), - ('uint64', u, [iinfo(np.uint64).min, iinfo(np.uint64).max]), - ('int16', i, [iinfo(np.int8).min, iinfo(np.int8).max + 1]), - ('int32', i, [iinfo(np.int16).min, iinfo(np.int16).max + 1]), - ('int64', i, [iinfo(np.int32).min, iinfo(np.int32).max + 1]), - ('int16', i, [iinfo(np.int8).min - 1, iinfo(np.int16).max]), - ('int32', i, [iinfo(np.int16).min - 1, iinfo(np.int32).max]), - ('int64', i, [iinfo(np.int32).min - 1, iinfo(np.int64).max]), - ('uint16', u, [iinfo(np.uint8).min, iinfo(np.uint8).max + 1]), - ('uint32', u, [iinfo(np.uint16).min, iinfo(np.uint16).max + 1]), - ('uint64', u, [iinfo(np.uint32).min, iinfo(np.uint32).max + 1]) - ] - - for dtype, downcast, min_max in dtype_downcast_min_max: - series = pd.to_numeric(pd.Series(min_max), downcast=downcast) - assert series.dtype == dtype diff --git a/pandas/tests/frame/test_alter_axes.py b/pandas/tests/frame/test_alter_axes.py index ce4dd6d38eeeb..3133a6883eb6f 100644 --- a/pandas/tests/frame/test_alter_axes.py +++ b/pandas/tests/frame/test_alter_axes.py @@ -8,7 +8,8 @@ from pandas.compat import lrange from pandas import (DataFrame, Series, Index, MultiIndex, - RangeIndex, date_range, IntervalIndex) + RangeIndex, date_range, IntervalIndex, + to_datetime) from pandas.core.dtypes.common import ( is_object_dtype, is_categorical_dtype, @@ -202,8 +203,8 @@ def test_set_index_cast_datetimeindex(self): # don't cast a DatetimeIndex WITH a tz, leave as object # GH 6032 i = (pd.DatetimeIndex( - pd.tseries.tools.to_datetime(['2013-1-1 13:00', - '2013-1-2 14:00'], errors="raise")) + to_datetime(['2013-1-1 13:00', + '2013-1-2 14:00'], errors="raise")) .tz_localize('US/Pacific')) df = DataFrame(np.random.randn(2, 1), columns=['A']) diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index e165e30c59f0f..979493b95a253 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -873,7 +873,7 @@ def test_operators_timedelta64(self): mixed['F'] = Timestamp('20130101') # results in an object array - from pandas.tseries.timedeltas import ( + from pandas.core.tools.timedeltas import ( _coerce_scalar_to_timedelta_type as _coerce) result = mixed.min() diff --git a/pandas/tests/frame/test_timeseries.py b/pandas/tests/frame/test_timeseries.py index 37b6f0c261789..7765bac55fb1f 100644 --- a/pandas/tests/frame/test_timeseries.py +++ b/pandas/tests/frame/test_timeseries.py @@ -547,7 +547,7 @@ def test_datetime_assignment_with_NaT_and_diff_time_units(self): def test_frame_to_period(self): K = 5 - from pandas.tseries.period import period_range + from pandas.core.indexes.period import period_range dr = date_range('1/1/2000', '1/1/2001') pr = period_range('1/1/2000', '1/1/2001') diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index f486c70d86f9d..880737392d037 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -8,7 +8,7 @@ from pandas import (date_range, bdate_range, Timestamp, isnull, Index, MultiIndex, DataFrame, Series, - concat, Panel) + concat, Panel, DatetimeIndex) from pandas.errors import UnsupportedFunctionCall, PerformanceWarning from pandas.util.testing import (assert_panel_equal, assert_frame_equal, assert_series_equal, assert_almost_equal, @@ -3305,7 +3305,6 @@ def test_groupby_sort_multiindex_series(self): assert_series_equal(result, mseries_result.sort_index()) def test_groupby_reindex_inside_function(self): - from pandas.tseries.api import DatetimeIndex periods = 1000 ind = DatetimeIndex(start='2012/1/1', freq='5min', periods=periods) @@ -3559,7 +3558,7 @@ def test_groupby_with_empty(self): index = pd.DatetimeIndex(()) data = () series = pd.Series(data, index) - grouper = pd.tseries.resample.TimeGrouper('D') + grouper = pd.core.resample.TimeGrouper('D') grouped = series.groupby(grouper) assert next(iter(grouped), None) is None diff --git a/pandas/tests/groupby/test_timegrouper.py b/pandas/tests/groupby/test_timegrouper.py index 3142b74b56778..f97f59cd92262 100644 --- a/pandas/tests/groupby/test_timegrouper.py +++ b/pandas/tests/groupby/test_timegrouper.py @@ -5,7 +5,8 @@ from numpy import nan import pandas as pd -from pandas import DataFrame, date_range, Index, Series, MultiIndex, Timestamp +from pandas import (DataFrame, date_range, Index, + Series, MultiIndex, Timestamp, DatetimeIndex) from pandas.compat import StringIO from pandas.util import testing as tm from pandas.util.testing import assert_frame_equal, assert_series_equal @@ -361,7 +362,6 @@ def sumfunc_value(x): def test_groupby_groups_datetimeindex(self): # #1430 - from pandas.tseries.api import DatetimeIndex periods = 1000 ind = DatetimeIndex(start='2012/1/1', freq='5min', periods=periods) df = DataFrame({'high': np.arange(periods), diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index 9003a3707e417..d53f131820dea 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -9,6 +9,7 @@ RangeIndex, MultiIndex, CategoricalIndex, DatetimeIndex, TimedeltaIndex, PeriodIndex, IntervalIndex, notnull, isnull) +from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin from pandas.core.dtypes.common import needs_i8_conversion from pandas.util.testing import assertRaisesRegexp from pandas._libs.tslib import iNaT @@ -789,7 +790,7 @@ def test_numpy_ufuncs(self): np.arccos, np.arctan, np.sinh, np.cosh, np.tanh, np.arcsinh, np.arccosh, np.arctanh, np.deg2rad, np.rad2deg]: - if isinstance(idx, pd.tseries.base.DatetimeIndexOpsMixin): + if isinstance(idx, DatetimeIndexOpsMixin): # raise TypeError or ValueError (PeriodIndex) # PeriodIndex behavior should be changed in future version with tm.assertRaises(Exception): @@ -812,7 +813,7 @@ def test_numpy_ufuncs(self): func(idx) for func in [np.isfinite, np.isinf, np.isnan, np.signbit]: - if isinstance(idx, pd.tseries.base.DatetimeIndexOpsMixin): + if isinstance(idx, DatetimeIndexOpsMixin): # raise TypeError or ValueError (PeriodIndex) with tm.assertRaises(Exception): func(idx) @@ -847,7 +848,7 @@ def test_hasnans_isnans(self): if len(index) == 0: continue - elif isinstance(index, pd.tseries.base.DatetimeIndexOpsMixin): + elif isinstance(index, DatetimeIndexOpsMixin): values[1] = iNaT elif isinstance(index, (Int64Index, UInt64Index)): continue @@ -887,7 +888,7 @@ def test_fillna(self): idx = index.copy() values = idx.values - if isinstance(index, pd.tseries.base.DatetimeIndexOpsMixin): + if isinstance(index, DatetimeIndexOpsMixin): values[1] = iNaT elif isinstance(index, (Int64Index, UInt64Index)): continue diff --git a/pandas/tests/indexes/datetimes/test_date_range.py b/pandas/tests/indexes/datetimes/test_date_range.py index 67e82e5c71d75..3eaeda965b217 100644 --- a/pandas/tests/indexes/datetimes/test_date_range.py +++ b/pandas/tests/indexes/datetimes/test_date_range.py @@ -9,7 +9,7 @@ import pandas as pd import pandas.util.testing as tm from pandas import compat -from pandas.tseries.index import bdate_range, cdate_range +from pandas.core.indexes.datetimes import bdate_range, cdate_range from pandas import date_range, offsets, DatetimeIndex, Timestamp from pandas.tseries.offsets import (generate_range, CDay, BDay, DateOffset, MonthEnd) diff --git a/pandas/tests/indexes/datetimes/test_ops.py b/pandas/tests/indexes/datetimes/test_ops.py index 8ab29c0c0b6f2..5dcc49cf776db 100644 --- a/pandas/tests/indexes/datetimes/test_ops.py +++ b/pandas/tests/indexes/datetimes/test_ops.py @@ -8,7 +8,7 @@ import pandas._libs.tslib as tslib import pandas.util.testing as tm from pandas.errors import PerformanceWarning -from pandas.tseries.index import cdate_range +from pandas.core.indexes.datetimes import cdate_range from pandas import (DatetimeIndex, PeriodIndex, Series, Timestamp, Timedelta, date_range, TimedeltaIndex, _np_version_under1p10, Index, datetime, Float64Index, offsets, bdate_range) diff --git a/pandas/tests/indexes/datetimes/test_setops.py b/pandas/tests/indexes/datetimes/test_setops.py index a1ad147f84aff..2da37f9394407 100644 --- a/pandas/tests/indexes/datetimes/test_setops.py +++ b/pandas/tests/indexes/datetimes/test_setops.py @@ -4,7 +4,7 @@ import pandas as pd import pandas.util.testing as tm -from pandas.tseries.index import cdate_range +from pandas.core.indexes.datetimes import cdate_range from pandas import (DatetimeIndex, date_range, Series, bdate_range, DataFrame, Int64Index, Index, to_datetime) from pandas.tseries.offsets import Minute, BMonthEnd, MonthEnd diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index f8eb923d51f75..a250a936b7ca8 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -10,8 +10,8 @@ import pandas as pd from pandas._libs import tslib, lib -from pandas.tseries import tools -from pandas.tseries.tools import normalize_date +from pandas.core.tools import datetimes as tools +from pandas.core.tools.datetimes import normalize_date from pandas.compat import lmap from pandas.compat.numpy import np_array_datetime64_compat from pandas.core.dtypes.common import is_datetime64_ns_dtype diff --git a/pandas/tests/indexes/period/test_construction.py b/pandas/tests/indexes/period/test_construction.py index ab70ad59846e8..1340c9cad211b 100644 --- a/pandas/tests/indexes/period/test_construction.py +++ b/pandas/tests/indexes/period/test_construction.py @@ -2,7 +2,7 @@ import pandas as pd import pandas.util.testing as tm -import pandas.tseries.period as period +import pandas.core.indexes.period as period from pandas.compat import lrange, PY3, text_type, lmap from pandas import (Period, PeriodIndex, period_range, offsets, date_range, Series, Index) diff --git a/pandas/tests/indexes/period/test_ops.py b/pandas/tests/indexes/period/test_ops.py index 3b94992f2fe9f..50b2da380fd30 100644 --- a/pandas/tests/indexes/period/test_ops.py +++ b/pandas/tests/indexes/period/test_ops.py @@ -4,7 +4,7 @@ import pandas as pd import pandas._libs.tslib as tslib import pandas.util.testing as tm -import pandas.tseries.period as period +import pandas.core.indexes.period as period from pandas import (DatetimeIndex, PeriodIndex, period_range, Series, Period, _np_version_under1p10, Index, Timedelta, offsets) diff --git a/pandas/tests/indexes/period/test_setops.py b/pandas/tests/indexes/period/test_setops.py index d4f06bae8bc32..357eccccf9fe8 100644 --- a/pandas/tests/indexes/period/test_setops.py +++ b/pandas/tests/indexes/period/test_setops.py @@ -2,7 +2,7 @@ import pandas as pd import pandas.util.testing as tm -import pandas.tseries.period as period +import pandas.core.indexes.period as period from pandas import period_range, PeriodIndex, Index, date_range diff --git a/pandas/tests/indexes/period/test_tools.py b/pandas/tests/indexes/period/test_tools.py index f9a1df3d824f1..32fbf44bd572c 100644 --- a/pandas/tests/indexes/period/test_tools.py +++ b/pandas/tests/indexes/period/test_tools.py @@ -3,7 +3,7 @@ import pandas as pd import pandas.util.testing as tm -import pandas.tseries.period as period +import pandas.core.indexes.period as period from pandas.compat import lrange from pandas.tseries.frequencies import get_freq, MONTHS from pandas._libs.period import period_ordinal, period_asfreq diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index de15abe89712a..c1b61bcd2971c 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -21,7 +21,7 @@ import pandas.core.config as cf -from pandas.tseries.index import _to_m8 +from pandas.core.indexes.datetimes import _to_m8 import pandas as pd from pandas._libs.lib import Timestamp diff --git a/pandas/tests/io/json/test_ujson.py b/pandas/tests/io/json/test_ujson.py index dcfa939f84d7e..545165be37178 100644 --- a/pandas/tests/io/json/test_ujson.py +++ b/pandas/tests/io/json/test_ujson.py @@ -1452,7 +1452,7 @@ def testIndex(self): tm.assert_index_equal(i, outp) def test_datetimeindex(self): - from pandas.tseries.index import date_range + from pandas.core.indexes.datetimes import date_range rng = date_range('1/1/2000', periods=20) diff --git a/pandas/tests/io/parser/parse_dates.py b/pandas/tests/io/parser/parse_dates.py index de4e3fbc0d943..8bb1d5ee3972a 100644 --- a/pandas/tests/io/parser/parse_dates.py +++ b/pandas/tests/io/parser/parse_dates.py @@ -15,7 +15,7 @@ import pandas as pd import pandas.io.parsers as parsers -import pandas.tseries.tools as tools +import pandas.core.tools.datetimes as tools import pandas.util.testing as tm import pandas.io.date_converters as conv @@ -23,7 +23,7 @@ from pandas import compat from pandas.compat import parse_date, StringIO, lrange from pandas.compat.numpy import np_array_datetime64_compat -from pandas.tseries.index import date_range +from pandas.core.indexes.datetimes import date_range class ParseDatesTests(object): diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index ce411bb4d5c4e..b4c7f2ba8719e 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -38,7 +38,7 @@ from pandas import date_range, to_datetime, to_timedelta, Timestamp import pandas.compat as compat from pandas.compat import range, lrange, string_types, PY36 -from pandas.tseries.tools import format as date_format +from pandas.core.tools.datetimes import format as date_format import pandas.io.sql as sql from pandas.io.sql import read_sql_table, read_sql_query diff --git a/pandas/tests/plotting/test_datetimelike.py b/pandas/tests/plotting/test_datetimelike.py index 4beb804acacc5..9946c3475b7a1 100644 --- a/pandas/tests/plotting/test_datetimelike.py +++ b/pandas/tests/plotting/test_datetimelike.py @@ -8,11 +8,11 @@ import numpy as np from pandas import Index, Series, DataFrame from pandas.compat import is_platform_mac -from pandas.tseries.index import date_range, bdate_range -from pandas.tseries.tdi import timedelta_range +from pandas.core.indexes.datetimes import date_range, bdate_range +from pandas.core.indexes.timedeltas import timedelta_range from pandas.tseries.offsets import DateOffset -from pandas.tseries.period import period_range, Period, PeriodIndex -from pandas.tseries.resample import DatetimeIndex +from pandas.core.indexes.period import period_range, Period, PeriodIndex +from pandas.core.resample import DatetimeIndex from pandas.util.testing import assert_series_equal, ensure_clean, slow import pandas.util.testing as tm diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py index e6514a1e2e81e..ed194cacb1628 100644 --- a/pandas/tests/reshape/test_concat.py +++ b/pandas/tests/reshape/test_concat.py @@ -1503,7 +1503,7 @@ def test_concat_exclude_none(self): self.assertRaises(ValueError, concat, [None, None]) def test_concat_datetime64_block(self): - from pandas.tseries.index import date_range + from pandas.core.indexes.datetimes import date_range rng = date_range('1/1/2000', periods=10) diff --git a/pandas/tests/scalar/test_period.py b/pandas/tests/scalar/test_period.py index 98af0028469bf..4c6784fb1732b 100644 --- a/pandas/tests/scalar/test_period.py +++ b/pandas/tests/scalar/test_period.py @@ -3,7 +3,7 @@ import pandas as pd import pandas.util.testing as tm -import pandas.tseries.period as period +import pandas.core.indexes.period as period from pandas.compat import text_type, iteritems from pandas.compat.numpy import np_datetime64_compat diff --git a/pandas/tests/scalar/test_timedelta.py b/pandas/tests/scalar/test_timedelta.py index abdbf29008b7e..227297709098f 100644 --- a/pandas/tests/scalar/test_timedelta.py +++ b/pandas/tests/scalar/test_timedelta.py @@ -4,7 +4,7 @@ import pandas as pd import pandas.util.testing as tm -from pandas.tseries.timedeltas import _coerce_scalar_to_timedelta_type as ct +from pandas.core.tools.timedeltas import _coerce_scalar_to_timedelta_type as ct from pandas import (Timedelta, TimedeltaIndex, timedelta_range, Series, to_timedelta, compat) from pandas._libs.tslib import iNaT, NaTType diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index a682e8643d251..671c04880bf5c 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -13,8 +13,8 @@ from pandas import (Series, Categorical, DataFrame, isnull, notnull, bdate_range, date_range, _np_version_under1p10) from pandas.core.index import MultiIndex -from pandas.tseries.index import Timestamp -from pandas.tseries.tdi import Timedelta +from pandas.core.indexes.datetimes import Timestamp +from pandas.core.indexes.timedeltas import Timedelta import pandas.core.config as cf import pandas.core.nanops as nanops diff --git a/pandas/tests/series/test_api.py b/pandas/tests/series/test_api.py index faf987c9b3820..25acd304e0a23 100644 --- a/pandas/tests/series/test_api.py +++ b/pandas/tests/series/test_api.py @@ -5,7 +5,7 @@ import pandas as pd from pandas import Index, Series, DataFrame, date_range -from pandas.tseries.index import Timestamp +from pandas.core.indexes.datetimes import Timestamp from pandas.compat import range from pandas import compat diff --git a/pandas/tests/series/test_combine_concat.py b/pandas/tests/series/test_combine_concat.py index d4e5d36c15c68..15e7d97c7ce32 100644 --- a/pandas/tests/series/test_combine_concat.py +++ b/pandas/tests/series/test_combine_concat.py @@ -204,7 +204,7 @@ def test_concat_empty_series_dtypes(self): self.assertEqual(result.ftype, 'object:dense') def test_combine_first_dt64(self): - from pandas.tseries.tools import to_datetime + from pandas.core.tools.datetimes import to_datetime s0 = to_datetime(Series(["2010", np.NaN])) s1 = to_datetime(Series([np.NaN, "2011"])) rs = s0.combine_first(s1) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 24b2a12d70709..6b16c607e5ee1 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -13,7 +13,7 @@ is_datetime64tz_dtype) from pandas import (Index, Series, isnull, date_range, NaT, period_range, MultiIndex, IntervalIndex) -from pandas.tseries.index import Timestamp, DatetimeIndex +from pandas.core.indexes.datetimes import Timestamp, DatetimeIndex from pandas._libs import lib from pandas._libs.tslib import iNaT diff --git a/pandas/tests/series/test_datetime_values.py b/pandas/tests/series/test_datetime_values.py index 8825ba5607a20..a984f578b0520 100644 --- a/pandas/tests/series/test_datetime_values.py +++ b/pandas/tests/series/test_datetime_values.py @@ -364,8 +364,8 @@ def test_valid_dt_with_missing_values(self): def test_dt_accessor_api(self): # GH 9322 - from pandas.tseries.common import (CombinedDatetimelikeProperties, - DatetimeProperties) + from pandas.core.indexes.accessors import ( + CombinedDatetimelikeProperties, DatetimeProperties) self.assertIs(Series.dt, CombinedDatetimelikeProperties) s = Series(date_range('2000-01-01', periods=3)) @@ -379,7 +379,7 @@ def test_dt_accessor_api(self): self.assertFalse(hasattr(s, 'dt')) def test_sub_of_datetime_from_TimeSeries(self): - from pandas.tseries.timedeltas import to_timedelta + from pandas.core.tools.timedeltas import to_timedelta from datetime import datetime a = Timestamp(datetime(1993, 0o1, 0o7, 13, 30, 00)) b = datetime(1993, 6, 22, 13, 30) diff --git a/pandas/tests/series/test_internals.py b/pandas/tests/series/test_internals.py index 4b1c303200739..9ca7645e6f974 100644 --- a/pandas/tests/series/test_internals.py +++ b/pandas/tests/series/test_internals.py @@ -7,7 +7,7 @@ import numpy as np from pandas import Series -from pandas.tseries.index import Timestamp +from pandas.core.indexes.datetimes import Timestamp import pandas._libs.lib as lib from pandas.util.testing import assert_series_equal diff --git a/pandas/tests/series/test_operators.py b/pandas/tests/series/test_operators.py index 3d609dec7958a..2e1ae7b81ea20 100644 --- a/pandas/tests/series/test_operators.py +++ b/pandas/tests/series/test_operators.py @@ -13,8 +13,8 @@ from pandas import (Index, Series, DataFrame, isnull, bdate_range, NaT, date_range, timedelta_range, _np_version_under1p8) -from pandas.tseries.index import Timestamp -from pandas.tseries.tdi import Timedelta +from pandas.core.indexes.datetimes import Timestamp +from pandas.core.indexes.timedeltas import Timedelta import pandas.core.nanops as nanops from pandas.compat import range, zip diff --git a/pandas/tests/series/test_period.py b/pandas/tests/series/test_period.py index f1ae7765648ca..354010a5d89ea 100644 --- a/pandas/tests/series/test_period.py +++ b/pandas/tests/series/test_period.py @@ -2,7 +2,7 @@ import pandas as pd import pandas.util.testing as tm -import pandas.tseries.period as period +import pandas.core.indexes.period as period from pandas import Series, period_range, DataFrame, Period diff --git a/pandas/tests/series/test_quantile.py b/pandas/tests/series/test_quantile.py index 339d871b63049..e61297bdcce3e 100644 --- a/pandas/tests/series/test_quantile.py +++ b/pandas/tests/series/test_quantile.py @@ -6,7 +6,7 @@ import pandas as pd from pandas import (Index, Series, _np_version_under1p9) -from pandas.tseries.index import Timestamp +from pandas.core.indexes.datetimes import Timestamp from pandas.core.dtypes.common import is_integer import pandas.util.testing as tm diff --git a/pandas/tests/series/test_timeseries.py b/pandas/tests/series/test_timeseries.py index 0f960a890e72b..0322933e96631 100644 --- a/pandas/tests/series/test_timeseries.py +++ b/pandas/tests/series/test_timeseries.py @@ -8,8 +8,8 @@ import pandas.util.testing as tm from pandas._libs.tslib import iNaT from pandas.compat import lrange, StringIO, product -from pandas.tseries.tdi import TimedeltaIndex -from pandas.tseries.index import DatetimeIndex +from pandas.core.indexes.timedeltas import TimedeltaIndex +from pandas.core.indexes.datetimes import DatetimeIndex from pandas.tseries.offsets import BDay, BMonthEnd from pandas import (Index, Series, date_range, NaT, concat, DataFrame, Timestamp, to_datetime, offsets, @@ -739,7 +739,7 @@ def test_between_time_formats(self): "%s - %s" % time_string) def test_to_period(self): - from pandas.tseries.period import period_range + from pandas.core.indexes.period import period_range ts = _simple_ts('1/1/2000', '1/1/2001') diff --git a/pandas/tests/sparse/test_frame.py b/pandas/tests/sparse/test_frame.py index 0a58713125a30..6ee8dacf17c62 100644 --- a/pandas/tests/sparse/test_frame.py +++ b/pandas/tests/sparse/test_frame.py @@ -14,7 +14,7 @@ is_float_dtype, is_object_dtype, is_float) -from pandas.tseries.index import DatetimeIndex +from pandas.core.indexes.datetimes import DatetimeIndex from pandas.tseries.offsets import BDay from pandas.util import testing as tm from pandas.compat import lrange diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py index 148f2ae425629..6321dcce7153b 100644 --- a/pandas/tests/test_base.py +++ b/pandas/tests/test_base.py @@ -18,7 +18,7 @@ from pandas.compat import StringIO from pandas.compat.numpy import np_array_datetime64_compat from pandas.core.base import PandasDelegate, NoNewAttributesMixin -from pandas.tseries.base import DatetimeIndexOpsMixin +from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin from pandas._libs.tslib import iNaT diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py index 17f55b41970b1..0594cc9878056 100644 --- a/pandas/tests/test_categorical.py +++ b/pandas/tests/test_categorical.py @@ -4312,7 +4312,7 @@ def test_str_accessor_api_for_categorical(self): def test_dt_accessor_api_for_categorical(self): # https://github.com/pandas-dev/pandas/issues/10661 - from pandas.tseries.common import Properties + from pandas.core.indexes.accessors import Properties s_dr = Series(date_range('1/1/2015', periods=5, tz="MET")) c_dr = s_dr.astype("category") diff --git a/pandas/tests/tseries/test_resample.py b/pandas/tests/test_resample.py old mode 100755 new mode 100644 similarity index 99% rename from pandas/tests/tseries/test_resample.py rename to pandas/tests/test_resample.py index e81dfd8649e8e..a39242c9dd882 --- a/pandas/tests/tseries/test_resample.py +++ b/pandas/tests/test_resample.py @@ -19,12 +19,12 @@ from pandas.core.groupby import DataError from pandas.tseries.frequencies import MONTHS, DAYS from pandas.tseries.frequencies import to_offset -from pandas.tseries.index import date_range +from pandas.core.indexes.datetimes import date_range from pandas.tseries.offsets import Minute, BDay -from pandas.tseries.period import period_range, PeriodIndex, Period -from pandas.tseries.resample import (DatetimeIndex, TimeGrouper, - DatetimeIndexResampler) -from pandas.tseries.tdi import timedelta_range, TimedeltaIndex +from pandas.core.indexes.period import period_range, PeriodIndex, Period +from pandas.core.resample import (DatetimeIndex, TimeGrouper, + DatetimeIndexResampler) +from pandas.core.indexes.timedeltas import timedelta_range, TimedeltaIndex from pandas.util.testing import (assert_series_equal, assert_almost_equal, assert_frame_equal, assert_index_equal) from pandas._libs.period import IncompatibleFrequency diff --git a/pandas/tests/tools/__init__.py b/pandas/tests/tools/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/tests/tools/test_numeric.py b/pandas/tests/tools/test_numeric.py new file mode 100644 index 0000000000000..96b49c5fb97a6 --- /dev/null +++ b/pandas/tests/tools/test_numeric.py @@ -0,0 +1,371 @@ +import pytest +import decimal + +import numpy as np +import pandas as pd +from pandas import to_numeric, _np_version_under1p9 + +from pandas.util import testing as tm +from numpy import iinfo + + +class TestToNumeric(tm.TestCase): + + def test_series(self): + s = pd.Series(['1', '-3.14', '7']) + res = to_numeric(s) + expected = pd.Series([1, -3.14, 7]) + tm.assert_series_equal(res, expected) + + s = pd.Series(['1', '-3.14', 7]) + res = to_numeric(s) + tm.assert_series_equal(res, expected) + + def test_series_numeric(self): + s = pd.Series([1, 3, 4, 5], index=list('ABCD'), name='XXX') + res = to_numeric(s) + tm.assert_series_equal(res, s) + + s = pd.Series([1., 3., 4., 5.], index=list('ABCD'), name='XXX') + res = to_numeric(s) + tm.assert_series_equal(res, s) + + # bool is regarded as numeric + s = pd.Series([True, False, True, True], + index=list('ABCD'), name='XXX') + res = to_numeric(s) + tm.assert_series_equal(res, s) + + def test_error(self): + s = pd.Series([1, -3.14, 'apple']) + msg = 'Unable to parse string "apple" at position 2' + with tm.assertRaisesRegexp(ValueError, msg): + to_numeric(s, errors='raise') + + res = to_numeric(s, errors='ignore') + expected = pd.Series([1, -3.14, 'apple']) + tm.assert_series_equal(res, expected) + + res = to_numeric(s, errors='coerce') + expected = pd.Series([1, -3.14, np.nan]) + tm.assert_series_equal(res, expected) + + s = pd.Series(['orange', 1, -3.14, 'apple']) + msg = 'Unable to parse string "orange" at position 0' + with tm.assertRaisesRegexp(ValueError, msg): + to_numeric(s, errors='raise') + + def test_error_seen_bool(self): + s = pd.Series([True, False, 'apple']) + msg = 'Unable to parse string "apple" at position 2' + with tm.assertRaisesRegexp(ValueError, msg): + to_numeric(s, errors='raise') + + res = to_numeric(s, errors='ignore') + expected = pd.Series([True, False, 'apple']) + tm.assert_series_equal(res, expected) + + # coerces to float + res = to_numeric(s, errors='coerce') + expected = pd.Series([1., 0., np.nan]) + tm.assert_series_equal(res, expected) + + def test_list(self): + s = ['1', '-3.14', '7'] + res = to_numeric(s) + expected = np.array([1, -3.14, 7]) + tm.assert_numpy_array_equal(res, expected) + + def test_list_numeric(self): + s = [1, 3, 4, 5] + res = to_numeric(s) + tm.assert_numpy_array_equal(res, np.array(s, dtype=np.int64)) + + s = [1., 3., 4., 5.] + res = to_numeric(s) + tm.assert_numpy_array_equal(res, np.array(s)) + + # bool is regarded as numeric + s = [True, False, True, True] + res = to_numeric(s) + tm.assert_numpy_array_equal(res, np.array(s)) + + def test_numeric(self): + s = pd.Series([1, -3.14, 7], dtype='O') + res = to_numeric(s) + expected = pd.Series([1, -3.14, 7]) + tm.assert_series_equal(res, expected) + + s = pd.Series([1, -3.14, 7]) + res = to_numeric(s) + tm.assert_series_equal(res, expected) + + # GH 14827 + df = pd.DataFrame(dict( + a=[1.2, decimal.Decimal(3.14), decimal.Decimal("infinity"), '0.1'], + b=[1.0, 2.0, 3.0, 4.0], + )) + expected = pd.DataFrame(dict( + a=[1.2, 3.14, np.inf, 0.1], + b=[1.0, 2.0, 3.0, 4.0], + )) + + # Test to_numeric over one column + df_copy = df.copy() + df_copy['a'] = df_copy['a'].apply(to_numeric) + tm.assert_frame_equal(df_copy, expected) + + # Test to_numeric over multiple columns + df_copy = df.copy() + df_copy[['a', 'b']] = df_copy[['a', 'b']].apply(to_numeric) + tm.assert_frame_equal(df_copy, expected) + + def test_numeric_lists_and_arrays(self): + # Test to_numeric with embedded lists and arrays + df = pd.DataFrame(dict( + a=[[decimal.Decimal(3.14), 1.0], decimal.Decimal(1.6), 0.1] + )) + df['a'] = df['a'].apply(to_numeric) + expected = pd.DataFrame(dict( + a=[[3.14, 1.0], 1.6, 0.1], + )) + tm.assert_frame_equal(df, expected) + + df = pd.DataFrame(dict( + a=[np.array([decimal.Decimal(3.14), 1.0]), 0.1] + )) + df['a'] = df['a'].apply(to_numeric) + expected = pd.DataFrame(dict( + a=[[3.14, 1.0], 0.1], + )) + tm.assert_frame_equal(df, expected) + + def test_all_nan(self): + s = pd.Series(['a', 'b', 'c']) + res = to_numeric(s, errors='coerce') + expected = pd.Series([np.nan, np.nan, np.nan]) + tm.assert_series_equal(res, expected) + + def test_type_check(self): + # GH 11776 + df = pd.DataFrame({'a': [1, -3.14, 7], 'b': ['4', '5', '6']}) + with tm.assertRaisesRegexp(TypeError, "1-d array"): + to_numeric(df) + for errors in ['ignore', 'raise', 'coerce']: + with tm.assertRaisesRegexp(TypeError, "1-d array"): + to_numeric(df, errors=errors) + + def test_scalar(self): + self.assertEqual(pd.to_numeric(1), 1) + self.assertEqual(pd.to_numeric(1.1), 1.1) + + self.assertEqual(pd.to_numeric('1'), 1) + self.assertEqual(pd.to_numeric('1.1'), 1.1) + + with tm.assertRaises(ValueError): + to_numeric('XX', errors='raise') + + self.assertEqual(to_numeric('XX', errors='ignore'), 'XX') + self.assertTrue(np.isnan(to_numeric('XX', errors='coerce'))) + + def test_numeric_dtypes(self): + idx = pd.Index([1, 2, 3], name='xxx') + res = pd.to_numeric(idx) + tm.assert_index_equal(res, idx) + + res = pd.to_numeric(pd.Series(idx, name='xxx')) + tm.assert_series_equal(res, pd.Series(idx, name='xxx')) + + res = pd.to_numeric(idx.values) + tm.assert_numpy_array_equal(res, idx.values) + + idx = pd.Index([1., np.nan, 3., np.nan], name='xxx') + res = pd.to_numeric(idx) + tm.assert_index_equal(res, idx) + + res = pd.to_numeric(pd.Series(idx, name='xxx')) + tm.assert_series_equal(res, pd.Series(idx, name='xxx')) + + res = pd.to_numeric(idx.values) + tm.assert_numpy_array_equal(res, idx.values) + + def test_str(self): + idx = pd.Index(['1', '2', '3'], name='xxx') + exp = np.array([1, 2, 3], dtype='int64') + res = pd.to_numeric(idx) + tm.assert_index_equal(res, pd.Index(exp, name='xxx')) + + res = pd.to_numeric(pd.Series(idx, name='xxx')) + tm.assert_series_equal(res, pd.Series(exp, name='xxx')) + + res = pd.to_numeric(idx.values) + tm.assert_numpy_array_equal(res, exp) + + idx = pd.Index(['1.5', '2.7', '3.4'], name='xxx') + exp = np.array([1.5, 2.7, 3.4]) + res = pd.to_numeric(idx) + tm.assert_index_equal(res, pd.Index(exp, name='xxx')) + + res = pd.to_numeric(pd.Series(idx, name='xxx')) + tm.assert_series_equal(res, pd.Series(exp, name='xxx')) + + res = pd.to_numeric(idx.values) + tm.assert_numpy_array_equal(res, exp) + + def test_datetimelike(self): + for tz in [None, 'US/Eastern', 'Asia/Tokyo']: + idx = pd.date_range('20130101', periods=3, tz=tz, name='xxx') + res = pd.to_numeric(idx) + tm.assert_index_equal(res, pd.Index(idx.asi8, name='xxx')) + + res = pd.to_numeric(pd.Series(idx, name='xxx')) + tm.assert_series_equal(res, pd.Series(idx.asi8, name='xxx')) + + res = pd.to_numeric(idx.values) + tm.assert_numpy_array_equal(res, idx.asi8) + + def test_timedelta(self): + idx = pd.timedelta_range('1 days', periods=3, freq='D', name='xxx') + res = pd.to_numeric(idx) + tm.assert_index_equal(res, pd.Index(idx.asi8, name='xxx')) + + res = pd.to_numeric(pd.Series(idx, name='xxx')) + tm.assert_series_equal(res, pd.Series(idx.asi8, name='xxx')) + + res = pd.to_numeric(idx.values) + tm.assert_numpy_array_equal(res, idx.asi8) + + def test_period(self): + idx = pd.period_range('2011-01', periods=3, freq='M', name='xxx') + res = pd.to_numeric(idx) + tm.assert_index_equal(res, pd.Index(idx.asi8, name='xxx')) + + # ToDo: enable when we can support native PeriodDtype + # res = pd.to_numeric(pd.Series(idx, name='xxx')) + # tm.assert_series_equal(res, pd.Series(idx.asi8, name='xxx')) + + def test_non_hashable(self): + # Test for Bug #13324 + s = pd.Series([[10.0, 2], 1.0, 'apple']) + res = pd.to_numeric(s, errors='coerce') + tm.assert_series_equal(res, pd.Series([np.nan, 1.0, np.nan])) + + res = pd.to_numeric(s, errors='ignore') + tm.assert_series_equal(res, pd.Series([[10.0, 2], 1.0, 'apple'])) + + with self.assertRaisesRegexp(TypeError, "Invalid object type"): + pd.to_numeric(s) + + def test_downcast(self): + # see gh-13352 + mixed_data = ['1', 2, 3] + int_data = [1, 2, 3] + date_data = np.array(['1970-01-02', '1970-01-03', + '1970-01-04'], dtype='datetime64[D]') + + invalid_downcast = 'unsigned-integer' + msg = 'invalid downcasting method provided' + + smallest_int_dtype = np.dtype(np.typecodes['Integer'][0]) + smallest_uint_dtype = np.dtype(np.typecodes['UnsignedInteger'][0]) + + # support below np.float32 is rare and far between + float_32_char = np.dtype(np.float32).char + smallest_float_dtype = float_32_char + + for data in (mixed_data, int_data, date_data): + with self.assertRaisesRegexp(ValueError, msg): + pd.to_numeric(data, downcast=invalid_downcast) + + expected = np.array([1, 2, 3], dtype=np.int64) + + res = pd.to_numeric(data) + tm.assert_numpy_array_equal(res, expected) + + res = pd.to_numeric(data, downcast=None) + tm.assert_numpy_array_equal(res, expected) + + expected = np.array([1, 2, 3], dtype=smallest_int_dtype) + + for signed_downcast in ('integer', 'signed'): + res = pd.to_numeric(data, downcast=signed_downcast) + tm.assert_numpy_array_equal(res, expected) + + expected = np.array([1, 2, 3], dtype=smallest_uint_dtype) + res = pd.to_numeric(data, downcast='unsigned') + tm.assert_numpy_array_equal(res, expected) + + expected = np.array([1, 2, 3], dtype=smallest_float_dtype) + res = pd.to_numeric(data, downcast='float') + tm.assert_numpy_array_equal(res, expected) + + # if we can't successfully cast the given + # data to a numeric dtype, do not bother + # with the downcast parameter + data = ['foo', 2, 3] + expected = np.array(data, dtype=object) + res = pd.to_numeric(data, errors='ignore', + downcast='unsigned') + tm.assert_numpy_array_equal(res, expected) + + # cannot cast to an unsigned integer because + # we have a negative number + data = ['-1', 2, 3] + expected = np.array([-1, 2, 3], dtype=np.int64) + res = pd.to_numeric(data, downcast='unsigned') + tm.assert_numpy_array_equal(res, expected) + + # cannot cast to an integer (signed or unsigned) + # because we have a float number + data = (['1.1', 2, 3], + [10000.0, 20000, 3000, 40000.36, 50000, 50000.00]) + expected = (np.array([1.1, 2, 3], dtype=np.float64), + np.array([10000.0, 20000, 3000, + 40000.36, 50000, 50000.00], dtype=np.float64)) + + for _data, _expected in zip(data, expected): + for downcast in ('integer', 'signed', 'unsigned'): + res = pd.to_numeric(_data, downcast=downcast) + tm.assert_numpy_array_equal(res, _expected) + + # the smallest integer dtype need not be np.(u)int8 + data = ['256', 257, 258] + + for downcast, expected_dtype in zip( + ['integer', 'signed', 'unsigned'], + [np.int16, np.int16, np.uint16]): + expected = np.array([256, 257, 258], dtype=expected_dtype) + res = pd.to_numeric(data, downcast=downcast) + tm.assert_numpy_array_equal(res, expected) + + def test_downcast_limits(self): + # Test the limits of each downcast. Bug: #14401. + # Check to make sure numpy is new enough to run this test. + if _np_version_under1p9: + pytest.skip("Numpy version is under 1.9") + + i = 'integer' + u = 'unsigned' + dtype_downcast_min_max = [ + ('int8', i, [iinfo(np.int8).min, iinfo(np.int8).max]), + ('int16', i, [iinfo(np.int16).min, iinfo(np.int16).max]), + ('int32', i, [iinfo(np.int32).min, iinfo(np.int32).max]), + ('int64', i, [iinfo(np.int64).min, iinfo(np.int64).max]), + ('uint8', u, [iinfo(np.uint8).min, iinfo(np.uint8).max]), + ('uint16', u, [iinfo(np.uint16).min, iinfo(np.uint16).max]), + ('uint32', u, [iinfo(np.uint32).min, iinfo(np.uint32).max]), + ('uint64', u, [iinfo(np.uint64).min, iinfo(np.uint64).max]), + ('int16', i, [iinfo(np.int8).min, iinfo(np.int8).max + 1]), + ('int32', i, [iinfo(np.int16).min, iinfo(np.int16).max + 1]), + ('int64', i, [iinfo(np.int32).min, iinfo(np.int32).max + 1]), + ('int16', i, [iinfo(np.int8).min - 1, iinfo(np.int16).max]), + ('int32', i, [iinfo(np.int16).min - 1, iinfo(np.int32).max]), + ('int64', i, [iinfo(np.int32).min - 1, iinfo(np.int64).max]), + ('uint16', u, [iinfo(np.uint8).min, iinfo(np.uint8).max + 1]), + ('uint32', u, [iinfo(np.uint16).min, iinfo(np.uint16).max + 1]), + ('uint64', u, [iinfo(np.uint32).min, iinfo(np.uint32).max + 1]) + ] + + for dtype, downcast, min_max in dtype_downcast_min_max: + series = pd.to_numeric(pd.Series(min_max), downcast=downcast) + assert series.dtype == dtype diff --git a/pandas/tests/tseries/test_frequencies.py b/pandas/tests/tseries/test_frequencies.py index 5fbef465ca8fc..327dad6d47634 100644 --- a/pandas/tests/tseries/test_frequencies.py +++ b/pandas/tests/tseries/test_frequencies.py @@ -7,10 +7,10 @@ date_range, period_range) import pandas.tseries.frequencies as frequencies -from pandas.tseries.tools import to_datetime +from pandas.core.tools.datetimes import to_datetime import pandas.tseries.offsets as offsets -from pandas.tseries.period import PeriodIndex +from pandas.core.indexes.period import PeriodIndex import pandas.compat as compat from pandas.compat import is_platform_windows diff --git a/pandas/tests/tseries/test_offsets.py b/pandas/tests/tseries/test_offsets.py index 2dc2485550bc5..b0c84cf555ede 100644 --- a/pandas/tests/tseries/test_offsets.py +++ b/pandas/tests/tseries/test_offsets.py @@ -15,7 +15,8 @@ from pandas.tseries.frequencies import (_offset_map, get_freq_code, _get_freq_str, _INVALID_FREQ_ERROR, get_offset, get_standard_freq) -from pandas.tseries.index import _to_m8, DatetimeIndex, _daterange_cache +from pandas.core.indexes.datetimes import ( + _to_m8, DatetimeIndex, _daterange_cache) from pandas.tseries.offsets import (BDay, CDay, BQuarterEnd, BMonthEnd, BusinessHour, WeekOfMonth, CBMonthEnd, CustomBusinessHour, WeekDay, @@ -27,8 +28,9 @@ QuarterEnd, BusinessMonthEnd, FY5253, Milli, Nano, Easter, FY5253Quarter, LastWeekOfMonth, CacheableOffset) -from pandas.tseries.tools import (format, ole2datetime, parse_time_string, - to_datetime, DateParseError) +from pandas.core.tools.datetimes import ( + format, ole2datetime, parse_time_string, + to_datetime, DateParseError) import pandas.tseries.offsets as offsets from pandas.io.pickle import read_pickle from pandas._libs.tslib import normalize_date, NaT, Timestamp, Timedelta diff --git a/pandas/tests/tseries/test_timezones.py b/pandas/tests/tseries/test_timezones.py index 125e031b5e3a2..06b6bbbcbc559 100644 --- a/pandas/tests/tseries/test_timezones.py +++ b/pandas/tests/tseries/test_timezones.py @@ -7,10 +7,10 @@ from pytz import NonExistentTimeError import pandas.util.testing as tm -import pandas.tseries.tools as tools +import pandas.core.tools.datetimes as tools import pandas.tseries.offsets as offsets from pandas.compat import lrange, zip -from pandas.tseries.index import bdate_range, date_range +from pandas.core.indexes.datetimes import bdate_range, date_range from pandas.core.dtypes.dtypes import DatetimeTZDtype from pandas._libs import tslib from pandas import (Index, Series, DataFrame, isnull, Timestamp, NaT, diff --git a/pandas/tseries/api.py b/pandas/tseries/api.py index a00ccf99e1b96..71386c02547ba 100644 --- a/pandas/tseries/api.py +++ b/pandas/tseries/api.py @@ -4,11 +4,5 @@ # flake8: noqa -from pandas.tseries.index import DatetimeIndex, date_range, bdate_range from pandas.tseries.frequencies import infer_freq -from pandas.tseries.tdi import Timedelta, TimedeltaIndex, timedelta_range -from pandas.tseries.period import Period, PeriodIndex, period_range, pnow -from pandas.tseries.resample import TimeGrouper -from pandas.tseries.timedeltas import to_timedelta -from pandas._libs.lib import NaT import pandas.tseries.offsets as offsets diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index a097c56a0ffd3..f9f4adc1b2c81 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -4,7 +4,7 @@ import numpy as np from pandas.core.dtypes.generic import ABCSeries, ABCDatetimeIndex, ABCPeriod -from pandas.tseries.tools import to_datetime, normalize_date +from pandas.core.tools.datetimes import to_datetime, normalize_date from pandas.core.common import AbstractMethodError # import after tools, dateutil check diff --git a/setup.py b/setup.py index 69b9a974b9935..830968768ceb2 100755 --- a/setup.py +++ b/setup.py @@ -644,6 +644,7 @@ def pxd(name): 'pandas.core.computation', 'pandas.core.reshape', 'pandas.core.sparse', + 'pandas.core.tools', 'pandas.errors', 'pandas.io', 'pandas.io.json', @@ -675,6 +676,7 @@ def pxd(name): 'pandas.tests.scalar', 'pandas.tests.tseries', 'pandas.tests.plotting', + 'pandas.tests.tools', 'pandas.tools', 'pandas.tseries', 'pandas.util.clipboard' From 816f94575c9ec1af2169a28536217c4d16dd6b4b Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 18 Apr 2017 12:45:36 +0000 Subject: [PATCH 420/933] PERF: better perf on _ensure_data in core/algorithms, helping perf of unique, duplicated, factorize (#16046) --- pandas/core/algorithms.py | 71 +++++++++++++++++------------------- pandas/core/dtypes/common.py | 50 +++++++++++++++++++++++++ 2 files changed, 84 insertions(+), 37 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 63df4b3d94bc8..8437861bea19e 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -14,6 +14,7 @@ from pandas.core.dtypes.common import ( is_unsigned_integer_dtype, is_signed_integer_dtype, is_integer_dtype, is_complex_dtype, + is_object_dtype, is_categorical_dtype, is_sparse, is_period_dtype, is_numeric_dtype, is_float_dtype, @@ -63,6 +64,35 @@ def _ensure_data(values, dtype=None): """ + # we check some simple dtypes first + try: + if is_bool_dtype(values) or is_bool_dtype(dtype): + # we are actually coercing to uint64 + # until our algos suppport uint8 directly (see TODO) + return np.asarray(values).astype('uint64'), 'bool', 'uint64' + elif is_signed_integer_dtype(values) or is_signed_integer_dtype(dtype): + return _ensure_int64(values), 'int64', 'int64' + elif (is_unsigned_integer_dtype(values) or + is_unsigned_integer_dtype(dtype)): + return _ensure_uint64(values), 'uint64', 'uint64' + elif is_float_dtype(values) or is_float_dtype(dtype): + return _ensure_float64(values), 'float64', 'float64' + elif is_object_dtype(values) and dtype is None: + return _ensure_object(np.asarray(values)), 'object', 'object' + elif is_complex_dtype(values) or is_complex_dtype(dtype): + + # ignore the fact that we are casting to float + # which discards complex parts + with catch_warnings(record=True): + values = _ensure_float64(values) + return values, 'float64', 'float64' + + except (TypeError, ValueError): + # if we are trying to coerce to a dtype + # and it is incompat this will fall thru to here + return _ensure_object(values), 'object', 'object' + + # datetimelike if (needs_i8_conversion(values) or is_period_dtype(dtype) or is_datetime64_any_dtype(dtype) or @@ -94,43 +124,9 @@ def _ensure_data(values, dtype=None): return values, dtype, 'int64' + # we have failed, return object values = np.asarray(values) - - try: - if is_bool_dtype(values) or is_bool_dtype(dtype): - # we are actually coercing to uint64 - # until our algos suppport uint8 directly (see TODO) - values = values.astype('uint64') - dtype = 'bool' - ndtype = 'uint64' - elif is_signed_integer_dtype(values) or is_signed_integer_dtype(dtype): - values = _ensure_int64(values) - ndtype = dtype = 'int64' - elif (is_unsigned_integer_dtype(values) or - is_unsigned_integer_dtype(dtype)): - values = _ensure_uint64(values) - ndtype = dtype = 'uint64' - elif is_complex_dtype(values) or is_complex_dtype(dtype): - - # ignore the fact that we are casting to float - # which discards complex parts - with catch_warnings(record=True): - values = _ensure_float64(values) - ndtype = dtype = 'float64' - elif is_float_dtype(values) or is_float_dtype(dtype): - values = _ensure_float64(values) - ndtype = dtype = 'float64' - else: - values = _ensure_object(values) - ndtype = dtype = 'object' - - except (TypeError, ValueError): - # if we are trying to coerce to a dtype - # and it is incompat this will fall thru to here - values = _ensure_object(values) - ndtype = dtype = 'object' - - return values, dtype, ndtype + return _ensure_object(values), 'object', 'object' def _reconstruct_data(values, dtype, original): @@ -465,7 +461,7 @@ def safe_sort(values, labels=None, na_sentinel=-1, assume_unique=False): if not is_list_like(values): raise TypeError("Only list-like objects are allowed to be passed to" "safe_sort as values") - values = np.array(values, copy=False) + values = np.asarray(values) def sort_mixed(values): # order ints before strings, safe in py3 @@ -547,6 +543,7 @@ def factorize(values, sort=False, order=None, na_sentinel=-1, size_hint=None): PeriodIndex """ + values = _ensure_arraylike(values) original = values values, dtype, _ = _ensure_data(values) (hash_klass, vec_klass), values = _get_data_algo(values, _hashtables) diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 0b14e484d40a7..156e43fc4e5fb 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -82,6 +82,8 @@ def _ensure_categorical(arr): def is_object_dtype(arr_or_dtype): + if arr_or_dtype is None: + return False tipo = _get_dtype_type(arr_or_dtype) return issubclass(tipo, np.object_) @@ -120,6 +122,8 @@ def is_period(array): def is_datetime64_dtype(arr_or_dtype): + if arr_or_dtype is None: + return False try: tipo = _get_dtype_type(arr_or_dtype) except TypeError: @@ -128,23 +132,33 @@ def is_datetime64_dtype(arr_or_dtype): def is_datetime64tz_dtype(arr_or_dtype): + if arr_or_dtype is None: + return False return DatetimeTZDtype.is_dtype(arr_or_dtype) def is_timedelta64_dtype(arr_or_dtype): + if arr_or_dtype is None: + return False tipo = _get_dtype_type(arr_or_dtype) return issubclass(tipo, np.timedelta64) def is_period_dtype(arr_or_dtype): + if arr_or_dtype is None: + return False return PeriodDtype.is_dtype(arr_or_dtype) def is_interval_dtype(arr_or_dtype): + if arr_or_dtype is None: + return False return IntervalDtype.is_dtype(arr_or_dtype) def is_categorical_dtype(arr_or_dtype): + if arr_or_dtype is None: + return False return CategoricalDtype.is_dtype(arr_or_dtype) @@ -178,6 +192,8 @@ def is_string_dtype(arr_or_dtype): # TODO: gh-15585: consider making the checks stricter. + if arr_or_dtype is None: + return False try: dtype = _get_dtype(arr_or_dtype) return dtype.kind in ('O', 'S', 'U') and not is_period_dtype(dtype) @@ -224,45 +240,61 @@ def is_dtype_equal(source, target): def is_any_int_dtype(arr_or_dtype): + if arr_or_dtype is None: + return False tipo = _get_dtype_type(arr_or_dtype) return issubclass(tipo, np.integer) def is_integer_dtype(arr_or_dtype): + if arr_or_dtype is None: + return False tipo = _get_dtype_type(arr_or_dtype) return (issubclass(tipo, np.integer) and not issubclass(tipo, (np.datetime64, np.timedelta64))) def is_signed_integer_dtype(arr_or_dtype): + if arr_or_dtype is None: + return False tipo = _get_dtype_type(arr_or_dtype) return (issubclass(tipo, np.signedinteger) and not issubclass(tipo, (np.datetime64, np.timedelta64))) def is_unsigned_integer_dtype(arr_or_dtype): + if arr_or_dtype is None: + return False tipo = _get_dtype_type(arr_or_dtype) return (issubclass(tipo, np.unsignedinteger) and not issubclass(tipo, (np.datetime64, np.timedelta64))) def is_int64_dtype(arr_or_dtype): + if arr_or_dtype is None: + return False tipo = _get_dtype_type(arr_or_dtype) return issubclass(tipo, np.int64) def is_int_or_datetime_dtype(arr_or_dtype): + if arr_or_dtype is None: + return False tipo = _get_dtype_type(arr_or_dtype) return (issubclass(tipo, np.integer) or issubclass(tipo, (np.datetime64, np.timedelta64))) def is_datetime64_any_dtype(arr_or_dtype): + if arr_or_dtype is None: + return False return (is_datetime64_dtype(arr_or_dtype) or is_datetime64tz_dtype(arr_or_dtype)) def is_datetime64_ns_dtype(arr_or_dtype): + if arr_or_dtype is None: + return False try: tipo = _get_dtype(arr_or_dtype) except TypeError: @@ -303,6 +335,8 @@ def is_timedelta64_ns_dtype(arr_or_dtype): False """ + if arr_or_dtype is None: + return False try: tipo = _get_dtype(arr_or_dtype) return tipo == _TD_DTYPE @@ -311,6 +345,8 @@ def is_timedelta64_ns_dtype(arr_or_dtype): def is_datetime_or_timedelta_dtype(arr_or_dtype): + if arr_or_dtype is None: + return False tipo = _get_dtype_type(arr_or_dtype) return issubclass(tipo, (np.datetime64, np.timedelta64)) @@ -398,12 +434,16 @@ def is_object(x): def needs_i8_conversion(arr_or_dtype): + if arr_or_dtype is None: + return False return (is_datetime_or_timedelta_dtype(arr_or_dtype) or is_datetime64tz_dtype(arr_or_dtype) or is_period_dtype(arr_or_dtype)) def is_numeric_dtype(arr_or_dtype): + if arr_or_dtype is None: + return False tipo = _get_dtype_type(arr_or_dtype) return (issubclass(tipo, (np.number, np.bool_)) and not issubclass(tipo, (np.datetime64, np.timedelta64))) @@ -438,6 +478,8 @@ def is_string_like_dtype(arr_or_dtype): False """ + if arr_or_dtype is None: + return False try: dtype = _get_dtype(arr_or_dtype) return dtype.kind in ('S', 'U') @@ -446,16 +488,22 @@ def is_string_like_dtype(arr_or_dtype): def is_float_dtype(arr_or_dtype): + if arr_or_dtype is None: + return False tipo = _get_dtype_type(arr_or_dtype) return issubclass(tipo, np.floating) def is_floating_dtype(arr_or_dtype): + if arr_or_dtype is None: + return False tipo = _get_dtype_type(arr_or_dtype) return isinstance(tipo, np.floating) def is_bool_dtype(arr_or_dtype): + if arr_or_dtype is None: + return False try: tipo = _get_dtype_type(arr_or_dtype) except ValueError: @@ -479,6 +527,8 @@ def is_extension_type(value): def is_complex_dtype(arr_or_dtype): + if arr_or_dtype is None: + return False tipo = _get_dtype_type(arr_or_dtype) return issubclass(tipo, np.complexfloating) From 2522efa9e687e777d966f49af70b325922699bea Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 18 Apr 2017 09:55:30 -0500 Subject: [PATCH 421/933] DOC: Various doc fixes (#16035) - Fixed spacing - Fixed method reference - Fixed list line wrapping - Fixed unbalanced ticks - Fixed section-heading without colon - Changed Interval Properties -> Attributes - Changed Styler properties --- doc/source/whatsnew/v0.20.0.txt | 12 ++++++------ pandas/_libs/interval.pyx | 2 +- pandas/core/generic.py | 16 ++++++++-------- pandas/io/formats/style.py | 2 +- 4 files changed, 16 insertions(+), 16 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 9fe0b66028ac5..7951a4dd43534 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -176,7 +176,7 @@ The following are now part of this API: ``pandas.testing`` ^^^^^^^^^^^^^^^^^^ -We are adding a standard module that exposes the public testing functions in ``pandas.testing``(:issue:`9895`). Those functions can be used when writing tests for functionality using pandas objects. +We are adding a standard module that exposes the public testing functions in ``pandas.testing`` (:issue:`9895`). Those functions can be used when writing tests for functionality using pandas objects. The following testing functions are now part of this API: @@ -517,7 +517,7 @@ Other Enhancements - ``pandas.io.json.json_normalize()`` gained the option ``errors='ignore'|'raise'``; the default is ``errors='raise'`` which is backward compatible. (:issue:`14583`) - ``pandas.io.json.json_normalize()`` with an empty ``list`` will return an empty ``DataFrame`` (:issue:`15534`) - ``pandas.io.json.json_normalize()`` has gained a ``sep`` option that accepts ``str`` to separate joined fields; the default is ".", which is backward compatible. (:issue:`14883`) -- :method:`~MultiIndex.remove_unused_levels` has been added to facilitate :ref:`removing unused levels `. (:issue:`15694`) +- :meth:`~MultiIndex.remove_unused_levels` has been added to facilitate :ref:`removing unused levels `. (:issue:`15694`) - ``pd.read_csv()`` will now raise a ``ParserError`` error whenever any parsing error occurs (:issue:`15913`, :issue:`15925`) - ``pd.read_csv()`` now supports the ``error_bad_lines`` and ``warn_bad_lines`` arguments for the Python parser (:issue:`15925`) - The ``display.show_dimensions`` option can now also be used to specify @@ -620,8 +620,8 @@ However, ``.agg(..)`` can *also* accept a dict that allows 'renaming' of the res between ``Series`` and ``DataFrame``. We are deprecating this 'renaming' functionaility. - We are deprecating passing a dict to a grouped/rolled/resampled ``Series``. This allowed -one to ``rename`` the resulting aggregation, but this had a completely different -meaning than passing a dictionary to a grouped ``DataFrame``, which accepts column-to-aggregations. + one to ``rename`` the resulting aggregation, but this had a completely different + meaning than passing a dictionary to a grouped ``DataFrame``, which accepts column-to-aggregations. - We are deprecating passing a dict-of-dicts to a grouped/rolled/resampled ``DataFrame`` in a similar manner. This is an illustrative example: @@ -1363,9 +1363,9 @@ If indicated, a deprecation warning will be issued if you reference theses modul "pandas._testing", "pandas.util.libtesting", "" "pandas._window", "pandas.core.libwindow", "" -- The function :func:`~pandas.api.type.union_categoricals` is now importable from ``pandas.api.types``, formerly from ``pandas.types.concat`` (:issue:`15998`) +- The function :func:`~pandas.api.types.union_categoricals` is now importable from ``pandas.api.types``, formerly from ``pandas.types.concat`` (:issue:`15998`) -.. _whatsnew_0200.privacy.deprecate_plotting +.. _whatsnew_0200.privacy.deprecate_plotting: Deprecate .plotting ^^^^^^^^^^^^^^^^^^^ diff --git a/pandas/_libs/interval.pyx b/pandas/_libs/interval.pyx index 60a34aff16e9d..e287e1fc8bdaf 100644 --- a/pandas/_libs/interval.pyx +++ b/pandas/_libs/interval.pyx @@ -52,7 +52,7 @@ cdef class Interval(IntervalMixin): .. versionadded:: 0.20.0 - Properties + Attributes ---------- left, right : values Left and right bounds for each interval. diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 1555157610609..74d3053821e39 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -4217,14 +4217,14 @@ def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True, Parameters ---------- - by : mapping function / list of functions, dict, Series, ndarray, - or tuple / list of column names or index level names or - Series or ndarrays - Called on each element of the object index to determine the groups. - If a dict or Series is passed, the Series or dict VALUES will be - used to determine the groups (the Series' values are first - aligned; see ``.align()`` method). If ndarray is passed, the - values as-is determine the groups. + by : mapping, function, str, or iterable + Used to determine the groups for the groupby. + If ``by`` is a function, it's called on each value of the object's + index. If a dict or Series is passed, the Series or dict VALUES + will be used to determine the groups (the Series' values are first + aligned; see ``.align()`` method). If an ndarray is passed, the + values are used as-is determine the groups. A str or list of strs + may be passed to group by the columns in ``self`` axis : int, default 0 level : int, level name, or sequence of such, default None If the axis is a MultiIndex (hierarchical), group by a particular diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 9321c29c99790..10f18fc35e43f 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -71,7 +71,7 @@ class Styler(object): Attributes ---------- env : Jinja2 Environment - template: Jinja2 Template + template : Jinja2 Template loader : Jinja2 Loader Notes From e082eb2c95a22a16d67e533cbf581a304cf5e70e Mon Sep 17 00:00:00 2001 From: yui-knk Date: Mon, 4 Jul 2016 00:06:27 +0900 Subject: [PATCH 422/933] BUG: `pivot_table` always returns a `DataFrame` Before this commit, if * `values` is not list like * `columns` is `None` * `aggfunc` is not instance of `list` `pivot_table` returns a `Series`. This commit adds checking for `columns.nlevels` is greater than 1 to prevent from casting `table` to a `Series`. This will fix #4386. DOC: add docs for #13554 --- doc/source/whatsnew/v0.20.0.txt | 32 +++++++++++++++++++++++++ pandas/core/reshape/pivot.py | 3 ++- pandas/tests/reshape/test_pivot.py | 38 ++++++++++++++++++++++++++++++ 3 files changed, 72 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 7951a4dd43534..6b6f532ed2323 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -1287,6 +1287,38 @@ joins, :meth:`DataFrame.join` and :func:`merge`, and the ``.align`` methods. left.join(right, how='inner') +.. _whatsnew_0200.api_breaking.pivot_table: + +Pivot Table always returns a DataFrame +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The documentation for :meth:`pivot_table` states that a ``DataFrame`` is *always* returned. Here a bug +is fixed that allowed this to return a ``Series`` under a narrow circumstance. (:issue:`4386`) + +.. ipython:: python + + df = DataFrame({'col1': [3, 4, 5], + 'col2': ['C', 'D', 'E'], + 'col3': [1, 3, 9]}) + df + +Previous Behavior: + +.. code-block:: ipython + + In [2]: df.pivot_table('col1', index=['col3', 'col2'], aggfunc=np.sum) + Out[2]: + col3 col2 + 1 C 3 + 3 D 4 + 9 E 5 + Name: col1, dtype: int64 + +New Behavior: + +.. ipython:: python + + df.pivot_table('col1', index=['col3', 'col2'], aggfunc=np.sum) .. _whatsnew_0200.api: diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index 1c5250615d410..74dbbfc00cb11 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -170,7 +170,8 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean', margins_name=margins_name) # discard the top level - if values_passed and not values_multi and not table.empty: + if values_passed and not values_multi and not table.empty and \ + (table.columns.nlevels > 1): table = table[values[0]] if len(index) == 0 and len(columns) > 0: diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 88d25b9d053c3..7d122baa8ae64 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -940,6 +940,44 @@ def test_categorical_pivot_index_ordering(self): columns=expected_columns) tm.assert_frame_equal(result, expected) + def test_pivot_table_not_series(self): + # GH 4386 + # pivot_table always returns a DataFrame + # when values is not list like and columns is None + # and aggfunc is not instance of list + df = DataFrame({'col1': [3, 4, 5], + 'col2': ['C', 'D', 'E'], + 'col3': [1, 3, 9]}) + + result = df.pivot_table('col1', index=['col3', 'col2'], aggfunc=np.sum) + m = MultiIndex.from_arrays([[1, 3, 9], + ['C', 'D', 'E']], + names=['col3', 'col2']) + expected = DataFrame([3, 4, 5], + index=m, columns=['col1']) + + tm.assert_frame_equal(result, expected) + + result = df.pivot_table( + 'col1', index='col3', columns='col2', aggfunc=np.sum + ) + expected = DataFrame([[3, np.NaN, np.NaN], + [np.NaN, 4, np.NaN], + [np.NaN, np.NaN, 5]], + index=Index([1, 3, 9], name='col3'), + columns=Index(['C', 'D', 'E'], name='col2')) + + tm.assert_frame_equal(result, expected) + + result = df.pivot_table('col1', index='col3', aggfunc=[np.sum]) + m = MultiIndex.from_arrays([['sum'], + ['col1']]) + expected = DataFrame([3, 4, 5], + index=Index([1, 3, 9], name='col3'), + columns=m) + + tm.assert_frame_equal(result, expected) + class TestCrosstab(tm.TestCase): From 0b22b8df9cb2acac2cf3a51265421b2aecc264ce Mon Sep 17 00:00:00 2001 From: James McBride Date: Wed, 19 Apr 2017 00:33:39 -0700 Subject: [PATCH 423/933] DOC: fix typo magify -> magnify (#16052) --- doc/source/style.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/style.ipynb b/doc/source/style.ipynb index 06763b2a5e741..93323d046d495 100644 --- a/doc/source/style.ipynb +++ b/doc/source/style.ipynb @@ -849,7 +849,7 @@ "\n", "bigdf.style.background_gradient(cmap, axis=1)\\\n", " .set_properties(**{'max-width': '80px', 'font-size': '1pt'})\\\n", - " .set_caption(\"Hover to magify\")\\\n", + " .set_caption(\"Hover to magnify\")\\\n", " .set_precision(2)\\\n", " .set_table_styles(magnify())" ] From 00063d23dd3d36b7bd00f949117022677ba9179a Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 19 Apr 2017 10:08:44 +0000 Subject: [PATCH 424/933] TST: fix tests xref #13554 (#16054) --- pandas/tests/test_categorical.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py index 0594cc9878056..cf0ccb95eabf1 100644 --- a/pandas/tests/test_categorical.py +++ b/pandas/tests/test_categorical.py @@ -3043,9 +3043,10 @@ def test_pivot_table(self): [Categorical(["a", "b", "z"], ordered=True), Categorical(["c", "d", "y"], ordered=True)], names=['A', 'B']) - expected = Series([1, 2, np.nan, 3, 4, np.nan, np.nan, np.nan, np.nan], - index=exp_index, name='values') - tm.assert_series_equal(result, expected) + expected = DataFrame( + {'values': [1, 2, np.nan, 3, 4, np.nan, np.nan, np.nan, np.nan]}, + index=exp_index) + tm.assert_frame_equal(result, expected) def test_count(self): From f114af045f68ed960cba02d234a959301ad97a79 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 19 Apr 2017 10:20:26 +0000 Subject: [PATCH 425/933] TST: fix tests xref #13554 (#16054) From dd5cef560b2fc30aaad04e74134b3f52b64425ce Mon Sep 17 00:00:00 2001 From: Line Pedersen Date: Wed, 19 Apr 2017 18:22:54 -0400 Subject: [PATCH 426/933] BUG: na_position doesn't work for sort_index() with MultiIndex closes #14784 Author: Line Pedersen Closes #15845 from linebp/json_normalize_seperator and squashes the following commits: 66f809e [Line Pedersen] BUG GH14784 na_position doesn't work for sort_index() with MultiIndex --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/core/frame.py | 3 +- pandas/core/indexes/multi.py | 16 +++++++++ pandas/core/series.py | 4 ++- pandas/tests/test_multilevel.py | 57 +++++++++++++++++++++++++++++++++ 5 files changed, 79 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 6b6f532ed2323..f64f592e109a1 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -1564,6 +1564,7 @@ Indexing - Bug in the HTML display with with a ``MultiIndex`` and truncation (:issue:`14882`) - Bug in the display of ``.info()`` where a qualifier (+) would always be displayed with a ``MultiIndex`` that contains only non-strings (:issue:`15245`) - Bug in ``pd.concat()`` where the names of ``MultiIndex`` of resulting ``DataFrame`` are not handled correctly when ``None`` is presented in the names of ``MultiIndex`` of input ``DataFrame`` (:issue:`15787`) +- Bug in ``DataFrame.sort_index()`` and ``Series.sort_index()`` where ``na_position`` doesn't work with a ``MultiIndex`` (:issue:`14784`) I/O ^^^ diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 153042d4a09c9..7fbfa7962c2c6 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3352,7 +3352,8 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False, # make sure that the axis is lexsorted to start # if not we need to reconstruct to get the correct indexer labels = labels._sort_levels_monotonic() - indexer = lexsort_indexer(labels.labels, orders=ascending, + indexer = lexsort_indexer(labels._get_labels_for_sorting(), + orders=ascending, na_position=na_position) else: from pandas.core.sorting import nargsort diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 6d9a9aa691f66..92baf9d289cd2 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -1635,6 +1635,22 @@ def reorder_levels(self, order): def __getslice__(self, i, j): return self.__getitem__(slice(i, j)) + def _get_labels_for_sorting(self): + """ + we categorizing our labels by using the + available catgories (all, not just observed) + excluding any missing ones (-1); this is in preparation + for sorting, where we need to disambiguate that -1 is not + a valid valid + """ + from pandas.core.categorical import Categorical + + return [Categorical.from_codes(label, + np.arange(np.array(label).max() + 1, + dtype=label.dtype), + ordered=True) + for label in self.labels] + def sortlevel(self, level=0, ascending=True, sort_remaining=True): """ Sort MultiIndex at the requested level. The result will respect the diff --git a/pandas/core/series.py b/pandas/core/series.py index 8a2351527856d..e0364ad629c5d 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1753,7 +1753,9 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False, elif isinstance(index, MultiIndex): from pandas.core.sorting import lexsort_indexer labels = index._sort_levels_monotonic() - indexer = lexsort_indexer(labels.labels, orders=ascending) + indexer = lexsort_indexer(labels._get_labels_for_sorting(), + orders=ascending, + na_position=na_position) else: from pandas.core.sorting import nargsort diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index 24bbf895508d7..99c468b256167 100755 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -2634,3 +2634,60 @@ def test_sort_non_lexsorted(self): with pytest.raises(UnsortedIndexError): result.loc[pd.IndexSlice['B':'C', 'a':'c'], :] + + def test_sort_index_nan(self): + # GH 14784 + # incorrect sorting w.r.t. nans + tuples = [[12, 13], [np.nan, np.nan], [np.nan, 3], [1, 2]] + mi = MultiIndex.from_tuples(tuples) + + df = DataFrame(np.arange(16).reshape(4, 4), + index=mi, columns=list('ABCD')) + s = Series(np.arange(4), index=mi) + + df2 = DataFrame({ + 'date': pd.to_datetime([ + '20121002', '20121007', '20130130', '20130202', '20130305', + '20121002', '20121207', '20130130', '20130202', '20130305', + '20130202', '20130305' + ]), + 'user_id': [1, 1, 1, 1, 1, 3, 3, 3, 5, 5, 5, 5], + 'whole_cost': [1790, np.nan, 280, 259, np.nan, 623, 90, 312, + np.nan, 301, 359, 801], + 'cost': [12, 15, 10, 24, 39, 1, 0, np.nan, 45, 34, 1, 12] + }).set_index(['date', 'user_id']) + + # sorting frame, default nan position is last + result = df.sort_index() + expected = df.iloc[[3, 0, 2, 1], :] + tm.assert_frame_equal(result, expected) + + # sorting frame, nan position last + result = df.sort_index(na_position='last') + expected = df.iloc[[3, 0, 2, 1], :] + tm.assert_frame_equal(result, expected) + + # sorting frame, nan position first + result = df.sort_index(na_position='first') + expected = df.iloc[[1, 2, 3, 0], :] + tm.assert_frame_equal(result, expected) + + # sorting frame with removed rows + result = df2.dropna().sort_index() + expected = df2.sort_index().dropna() + tm.assert_frame_equal(result, expected) + + # sorting series, default nan position is last + result = s.sort_index() + expected = s.iloc[[3, 0, 2, 1]] + tm.assert_series_equal(result, expected) + + # sorting series, nan position last + result = s.sort_index(na_position='last') + expected = s.iloc[[3, 0, 2, 1]] + tm.assert_series_equal(result, expected) + + # sorting series, nan position first + result = s.sort_index(na_position='first') + expected = s.iloc[[1, 2, 3, 0]] + tm.assert_series_equal(result, expected) From 1b52b1218e973382ea958084c47c3f33eb0ed40c Mon Sep 17 00:00:00 2001 From: Joel Nothman Date: Wed, 19 Apr 2017 21:00:21 -0400 Subject: [PATCH 427/933] ENH: support Styler in ExcelFormatter closes #1663 Author: Joel Nothman Closes #15530 from jnothman/excel_style and squashes the following commits: c7a51ca [Joel Nothman] Test currently fails on openpyxl1 due to version incompatibilities 836f39e [Joel Nothman] Revert changes to xlwt de53808 [Joel Nothman] Remove debug code a5d51f9 [Joel Nothman] Merge branch 'master' into excel_style 934df06 [Joel Nothman] Display df, not styled 6465913 [Joel Nothman] More pytest-like test_styler_to_excel; enhancements to xlwt 6168765 [Joel Nothman] Recommended changes to what's new 9669d7d [Joel Nothman] Require jinja in test with df.style 14035c5 [Joel Nothman] Merge branch 'master' into excel_style 3071bac [Joel Nothman] Complete tests ceb9171 [Joel Nothman] reasons for xfails e2cfa77 [Joel Nothman] Test Styler.to_excel d5db0ac [Joel Nothman] Remove obsolete TODO 0256fc6 [Joel Nothman] Return after unhandled font size warning 60d6a3b [Joel Nothman] add doc/source/styled.xlsx to the gitignore 4e72993 [Joel Nothman] Fix what's new heading d144fdf [Joel Nothman] Font name strings 61fdc69 [Joel Nothman] Complete testing basic CSS -> Excel conversions 6ff8a46 [Joel Nothman] Fix loose character; sorry 6d3ffc6 [Joel Nothman] Lint 79eae41 [Joel Nothman] Documentation tweaks c4f59c6 [Joel Nothman] Doc tweaks 2c3d015 [Joel Nothman] Fix JSON syntax in IPynb b1d774b [Joel Nothman] What's new heading 096f26c [Joel Nothman] Merge remote-tracking branch 'upstream/master' into excel_style 433be03 [Joel Nothman] Documentation 9a62699 [Joel Nothman] Fix tests and add TODOs to tests 7c54a69 [Joel Nothman] Fix test failures; avoid hair border which renders strangely 8e9a567 [Joel Nothman] Fixes from integration testing c1fc232 [Joel Nothman] Remove debugging print statements a43d6b7 [Joel Nothman] Cleaner imports a1127f6 [Joel Nothman] Merge branch 'master' into excel_style 306eebe [Joel Nothman] Module-level docstring 350eab5 [Joel Nothman] remove spurious blank line efce9b6 [Joel Nothman] More CSS to Excel testing; define ExcelFormatter.write f17a0f4 [Joel Nothman] Some border style tests 1a8818f [Joel Nothman] Lint 9a5b791 [Joel Nothman] Fix testing ImportError 1984cab [Joel Nothman] Fix making get_level_lengths non-private eb02cc1 [Joel Nothman] Fix testing ImportError 3b26087 [Joel Nothman] Make get_level_lengths non-private f62f02d [Joel Nothman] File restructure dc953d4 [Joel Nothman] Font size and border width 7db59c0 [Joel Nothman] Test inherited styles in converter d103f61 [Joel Nothman] Refactoring and initial tests for CSS to Excel 176e51c [Joel Nothman] Fix NameError c589c35 [Joel Nothman] Fix some lint errors (yes, the code needs testing) cb5cf02 [Joel Nothman] Fix bug where inherited not being passed; avoid classmethods 0ce72f9 [Joel Nothman] Use inherited font size for em_pt 8780076 [Joel Nothman] Merge branch 'master' into excel_style 96680f9 [Joel Nothman] Largely complete CSSToExcelConverter and Styler.to_excel() f1cde08 [Joel Nothman] FIX column offset incorrect in refactor ada5101 [Joel Nothman] ENH: support Styler in ExcelFormatter --- .gitignore | 1 + doc/source/_static/style-excel.png | Bin 0 -> 58167 bytes doc/source/style.ipynb | 77 ++- doc/source/whatsnew/v0.20.0.txt | 34 ++ pandas/core/frame.py | 33 +- pandas/io/formats/common.py | 44 ++ pandas/io/formats/css.py | 248 +++++++++ pandas/io/formats/excel.py | 630 +++++++++++++++++++++++ pandas/io/formats/format.py | 339 +----------- pandas/io/formats/style.py | 26 + pandas/tests/io/formats/test_css.py | 256 +++++++++ pandas/tests/io/formats/test_to_excel.py | 219 ++++++++ pandas/tests/io/test_excel.py | 143 ++++- 13 files changed, 1670 insertions(+), 380 deletions(-) create mode 100644 doc/source/_static/style-excel.png create mode 100644 pandas/io/formats/common.py create mode 100644 pandas/io/formats/css.py create mode 100644 pandas/io/formats/excel.py create mode 100644 pandas/tests/io/formats/test_css.py create mode 100644 pandas/tests/io/formats/test_to_excel.py diff --git a/.gitignore b/.gitignore index c953020f59342..495429fcde429 100644 --- a/.gitignore +++ b/.gitignore @@ -103,4 +103,5 @@ doc/source/index.rst doc/build/html/index.html # Windows specific leftover: doc/tmp.sv +doc/source/styled.xlsx doc/source/templates/ diff --git a/doc/source/_static/style-excel.png b/doc/source/_static/style-excel.png new file mode 100644 index 0000000000000000000000000000000000000000..f946949e8bcf9c88251a1a592543110eda2323b9 GIT binary patch literal 58167 zcmZ^~Wmucd)+h|cHBj7wwzz9?io3fz6nFQc#VPLYQlQ1%3GNgpxH|+rJbUl=y!*P& z_vOdjlY5rTT9a8elSm~6Ni<|aWGE;oG-)X@6(}g^%J=>?BK&*H)AmIa3JST}T2xd? zT2z!w$=Sie+SVKjN-8or1wli=N(1 z#8?^%ioDoiB^=^p3R4jVEqvG$H(_{ab7F>ws8N0+B)&FFkIB~!h;zU`$Zz9m-S0l< z8X@pG@Dm1Aj|vpk_3ZZ`;6(IAgcQ6zhVXDGf|gOgNhxyH^NSx;LFncAqaz^w$HAW- z+v>am5aA?b<5-umYls36_iRmuBltVJPO_ufJqJC$` zt-<$$aduijMc<3=IKGoS?hpDsh(|isdU7hAl!^IEF6go=Ig3iiGmP-#j%U6l8QUni zjYekkgzngLTCzXN7AZ!=g3f-v>Lzk=RC|M+=#DBUE+ozIIvNG^@I(UY}v+$xm3 zudWQMz|01uo2Vwxq_9x%7f>ZGsjL^UHP*k<8+-_xtFX+FyjCAL!mymbnGnbct~e6( z%`NqR3|waCd5heb-k-n~>-v?pa)25LcA{$WV8shNIW(1oRw~mOE8m483poL&NdJ*a z6U-Hzd^Li?l=?szk1#4sasUNW#EoKvLL5YN@xh-AUo4O_5N%t84I>z}4%!ylD-Y@f z{z;gC2Q@ZW$B{S-wtADH8L=ifGY@YSF0Ka(AHKF5_5rGd9I;mfI|6w`lrH{r5g`4m zM!didJVZE12^Jg8iCmB>m`ivi??V|PT}Z63+tlO~!2yLU$x#UKYseJ-4eldqp%I&0 zNU{;*65^lWY$FFAd;>I>Eo>g5iBON8V#iOusP;k3z1&-E7f{d0IbzV*NaB*^1*l3I zG_OCB!lPzWm|mVFvS&=QKmC$Q+UZ- zrR+3gi^#~1ibs9=EnT5^6g}rxE^S@W7}=QXT7RQ(=zVxRQ^>|cz(~MN5Gz+RrkpOz zCaQ+5o_+MyT(wELvP`qAkAp!zbFk>s6tdNjwLl}1Ho@|4eSUp<{ciof8NvZgD{i*5 zVgcTa;>`K95VNw*b|aPhNbIK=*~e755yoVfpFfg8sWypi^!M61m~A{Jf%&Pys#Eq9T`tF$T3Eb~!n(gZB9FaB7hUu-J|9P1uj-vEy88T%L- z3(sVD*H5*92TYUEGJW zd$g5t{ulfijJvI|cQ>X#o_)N&DY#{P_G`Vc4-0&F+A?%4s2Vw9vA{~*&d|Z-t$+8~ z2f-`VOXz<38uPL7skhUh)5kBjYEEy&aQO(~InCeaCFjxRsr2FPV<6fEfCk`+Y4x$0 zypBGCu#u}w@IbWA#nJczjFF9@EM-|{W~FAOf1s9Tqouqg+^qaCc#(;TjM*OXM@rAk z%FJzNhptKeNdrwXU2<1^O}Dy(*X8z>6WJzlAxoFOCFS>>8pmf|hE~Bx6i}S{NJg~r zpjy9No&M6(axG76mdS)xmAbt=!##0qIL zH?mdIS<=X|-(&`GmRIa;4L9}fW-(@MmT!%~j+~B8myn|{Mk07_UOWDJ zq`%k=8T%O{b^X`Awg0v{ezG%O`RkJG%)O89w|}WGnj@COa{lVR zavI(x88G?OX&Js83&YpB>vH9QjgR*e+jn_n-3yG6L@E=Iwv5wMSvpm4F=c;Z|9a-* zLwq-fD~)H4hjx0qwxTjdGr{cN<#5`0{0StBeolRccS=fgOKZoM=VWvr#B6?P?g7dM zWzu2OyQ|5O>=2&_?Rb`5*3E1M_vHrcygY113?-zLzl#Na#NM#_V{vCV(mHoq1lz^H$4F)z$iq>0BTYx93Q4~C zo)_JZOsHy<5-Qr{)L4~p?KcTCYBOS0Usl7|VcPjwwokbJS^2~LX7V6Au07wdyKgp$ zlS7>|DInxR{?uz0o@e?@alfs!|94_2wU>FVh1juWKk7c&Msuy!;K=!?)p7jn@A5I% z;^b;w>$LamwuNkym&bMOUKiKB)=lGTef#`^&DzR(Q^C1!m+0HTN=;Mky7798m&fi} zrw{o(a5b(~=pyji=*D=ak2`LLR99ry zKQ4SpwANqocVRe5&0!Su|ugITkK*#DC!r$Ekq_zfvet(!sa>+I#^ zS>#3J7I!Z4@CbSajegvvF-`?2nFm#83`4P^9rk6%8#Meh6EWHCUM zNrX+IL-CV9(};dGW#1NwU#${Hc89G?Pv?2fpW|Vlh3J^|Tbl?@jJ2zNfLcWF*4!E! zyXi?!uT__706-r|rQh#0P~1SrM?Pd3HC48ZZW`X=4XD7zq{}qSKtX+gwN}$|)smOv zHFdCKGB$HCF=z6$b9@hMp`iFZdEdKs=B~zMo_4nOF1(%s6#s(YeeeJCnwf&^Um&hF z0u);EN@SuA&gNvCOdL!s6oSZPWMur#W){3EViNyBfBz;xVdd)T$ji*^;o-sL!OrC1 zY{|^}<;xdl7B*%!HpX`dMi(!8S7T2`dl$-o6Zs!HV&*QU&eo2u)(-Y$|Ijryad2}L zprH7N(f|GYd!FW=*8j`N-sL}Hy$i_v&j>Rs6ASbI7n-ZJ#s3f5KO_G}`5<>dzbgA39@l<^Z$#@|1NqsFU)^t!mH?P zZT_yLe^epJ%Fq1&y!#J4Kl49&`Y*lxdr|)N_I*_Zk@=bb?+ObdKb0cwKtTyZNsEc7 zc|xCb0DRR4l6m}r(qzydg0D(NV53?$WvVXhdNit_k8|BQpRp@{KpR(XstA2-dPv%v>K%_mWb87$2#DDTn2gNj{0BYaUwjN5t!zY28PyGdwV5+|~(%xEzbO&*4{Y4Wr zNT3hwu2tdk^dirgXkly$BZGWAbdyfO-dR`MNZtfdSPrP^ieZU4F+#G01}p;KU`zVq z4_RDyCM67K*l)TU2~@mdvF5QaIY{W($wYZcg}@4`jUq zdafD-wGpUl#B)W({1dg0Od>#&r;}@|>(3kuBf?V$_o%xa!XuuFPMbU;0uzv5c?MzI ztA;HdW&8u<-ReZoBd!X4Kh|d`3+2rZQf)QZ0x@wjDt35!JcDZ(A{7UuG%=$ZCmTiYkuSC)akqI?zh2F?37iQAO*mf9 zLNJ5ITC`i*7WC7Y&Me}$5Xd1yzIF38{=!3pgv5Z%G`XxhxK7Dk>Xt1Q3&0_zL{>JG z?KnBWkuuK2ZHP4)4esw^Y5;ol%8O;hOSGN;)aFWAM2}TKOx zAQ_$(#MG~D>9@^TXTn8?Ai>MIq55C#?ou51Ly_|+&A*!uE?U$xOW+0o+_oVWM=iOE!5Rb`kJzCrF zgaF{JEpUL_jHCuC#m~jVlh3oxWx%kEX9m7w4y6}<-Cg7rKcb}e-piNEbD zcJ1+q`uP2vu$G%eZ>bWi2)3^teSzyWQAbdw-Q;*39;yY%L%E z2!FG38&X}L$o03|mz*r*Ssb6rId`oFkFEv6t9*@C=MXiwoM5tL*!@L@8kqx(6OVJ` z(-8_5LXhbDL;#+j5IDMU#`-W*fVcJ#XT%?LY+Z_dKmPj?P@#WF zF%mC?0oBZ6N)f$@Z2NA;ifje!MFDMQ>vE9v4MZ?t*5X|b3IS0`z1Vzxf8d)Gz&Sb| zmm-jIz-lRU&)Nm@Ih!8~*$ z2^+dX-c;0byp78Vg@Dy1xe=(_v4M}KN}mO3?JsPK1_8}gF~DofaIv)6?OLkuBjyBQ zvwUc^_G2Myok|2!nFz-y3%$FAENBp7M2L27qxj-P;Y8b@xt=l;Xi8z_>&^|P=cA8M z;6~1z>VD7tpPEqF2cI$6_gy>bd*$Jw44Ge!#<288HJ!$Cq-z&4%C|XtMXze#LY4xO zLDCJM#^{36Fq7nY2RDgz{oPC}o;J4Dg+FXC`Qq-y#%h`0L?hmJb-biI?4??2Oh<WLTF7TqiRm(blB@ekir{)z%AVdD9bJZJ|u z1fG(rNsEe8`FFe_j&?}&*j+cpx%m#Ow0|n z&~(o-5vaed2cm@*UpVh#E(y09X%l=j^ls1Hv2+^G{Eh{Wy4~HFj;c;*R_g<6|5z#Q z_?QIPEVgTdrq*^uBCtnC`k7ma&o2O#_F}w?x9Mtwb@H^#MdNo!?MP>iwPTQ*F!hhI zLoWeFAn4--F=c6N`MP{x$9rsfYgl#XvGQhlY@O@F2|!TvJ4V4c>AjDTru>;tNCwd+ zEke-QFPL&5Vi;EPa@sf4Qo8+44Nd|8mQax*!LYYHSW~n3)P!@^A6!M|kGtU0Jh)f= zc7g4WiEOK5wqvKk7+wEDzm}}3)qMYKYJrJelYbD-pAQz5qGpe~ zTAwhR-6quVSxsQ0yUduLUrOFk(s_8q3o763FF{w+6}zHZ&bv{v&YFlnj)o^lC4ZS$4LOsoHU4XzIK zN@7bwFBox9%Lqofpx!7CgW*KjoM(&^<<$6to$5=o^sefU`KbXEBa>z(XbRDINPi(Q zlK$JYQ(o}Wu_2jVzLBAg*+6sW3^>+_A8A)ab|0-Bqtwx^C6r#btpfH&BL5U{!|zSx z#x6^4&d$P62^|GJP`HVKCnsD2KtjZtBnlepy|n??L=U%TYZq;zr!cdxVb4^g$30Ay zvu^)HY-Ke`q}(DlK75TYhl+^5C~wFeb?MTp0A^xL>%P4L$L7LZYGMqSB6wkkE`<6n zS^53H#`l&YfKO}o7d^zyTT#mKM-aL}_rBWonvmL;Q}xdzHKO6XcCqYK{H4vR8KEVt zzvpMOw&lXfw|E9_Irywoaz`_=FCXleuIB7z61S$da37BLORkFHjvC+H8AkTDf;Dcz zU}$xDuTmPi=vT+6W9wf#)T?(TaS19FzTZA)YmKfnPhIm&umJsC^@yym*%E=x41JzQ zKP3In5Xf6cwOB;4EXUz#{3Ova$g|?lvS(b^DR$Xkq?FDpFQ>rX_%f|r=0_YY)oze4 zXvKa0f!GSKxCYCYOoC&NMl+1Fo3fhG+{?S(_(%Llmr{_=`S_T7>4z+H$M3lQ4l6=b zLhPRcfmU0avKI%G%mk5(J>bbrKKk-JC7~&=DKM{)_bM4Kx6+V8Io`0PdDY{9lJb7u zGM)!ZEZ^AhcFGjyFPQb$dE+tZkQvL|(5)%7ThHB<7cP#CY@}v3p}Zd3Zs;V#NXgqr zC$@C=KB4ZeHTi~e^dA%d3;6Zx%l1i=-0JC$ifq$O;cIaEn%ajjQxA z6<8-q{gmg9kjOwUf;{*zM2}AQ>+Q9^3N^AGxSt^fa2)?eqvwSqOnn!;qc&x@#=5TE zhj=9`fZ;@3o7N@B40HD*pTgpygepR4Bn>)UI_&$zzk+s}1M6r}mMLc_cW_PZh)X9F zttXmQO(zQi-kuZYNmIB2$k|)FKwLy|7EG6Vr5>I4SYFEB>(65q?5N2U#-;S0MWOR5sOwt;vjAV zyZ+2w*$YSl4ApxKe6<5F3K*jnRlyoUI&vB<%qwM$MRy|>nBe)DQen5!G&>6_<9R9e zhTv_y)XHob+6RdGYm*L21H9LwYfBH`e>L9=-w@+Khfagjvx6~&Xq4e-;mU;)TW$2a z6H=3!PLH5^`oA)G6VLRvrw*`?bW)PL_t8fltq*%?A zHIJB&BR~=bc=WkttPb8;vnRXir_D3Ux-j9f+m35`C~q&T4y<0#a?YNvCcLPpH}A0< z_UXfprQkyZUV%uDLY%ea=ssJ0ow9*qL#kzaQ8~(1>>+c{Bq%p{h~J}cV{9DC;e~Yb z(C`^}_BNu*l50x=c|yEF437|!93~d zF7=Xs*9VpLs4DP$6snnzgn9O`V1jRVvnB?byfU_|zoF)Pi%c|J3~%RF!h;r?p!qQ3 zcxk8)4VEl(j~Q3o_?x?H36TO2TT1aeXOSV4@{}$^cG)h!mh??2#Law>pEm2nA6$Gp zy~@0`j=uY=6^e?(T4+xelf$O=lXzQwGR4Uvn*C%kOCdhpVmK;NLxNe+iCm#4|7FOD zzf+x%E?Gj(A01bp8-2ajmx!>f>jS$Z5(BQEb#yr;!3P=N==(5I?JlQisR+ENP=u6B zV}{pWTAFm8>`_2z+$TcN<|Qc&^E8;L3;lJg)5j=r;gVux*6@m5sI0lH)wG4+F8o=o z38QaoFW?cndseY;(xVGDZngn63{(pWS~jI@vg`uIxyk4k0<%7V#_Qg8G;cB2u#W)p z4!z*G4qENB=$EbY^G=dI1rmY@QXFS^p~={_WmF~=E)RGxz$U} zH9v6>xRLhPd*q7&OqVzNWp}|@yq?r`{oPP#>bjYthMbOxwJ7|2*4ssDXJ+F6@wCmX z|JpF$2cZa%0@$%+{joBM@Ud{?3X7r#A`UB3p_VHM0Fw~Bji#w%h`BAIp2d2VX3mTA zk8IM@+9^MPMwJ#q%bmiZ6LTTcx5L(!FxvKEVzP03GlKx2kJHNl!XbB zjugsI_F`B&^YOf|qbSv~g@7($V(qo|+CK7L8St*SE-i>Q!IG=5L;vp{3e2!5e523! zYyrGG?Vgvn^?p%A64HbW0>B_G2Z-sjQC2A(`zz$L5UB9DM|xnt?U{;wM9SL{SybFV zWo?z@8x<-iI9Pv)rj9Gj(;Eu zZb#O?O6Kn#%NaltPASQJB;Z-Ia4;&;29PI?EL7^|F~gyD(m$I9LU~D~5m3bGR@jwQ z?SFE+T#KOMe@4j*u%tJvy7JanxD5K4dqn*v-U>9i40tEZc5Ds9D6}~4&7%u3N#1{? z=;`@KSuft=oEn&xYW!fMC`4@<#*qkKBJvK=BPSoP2~#PEf>S963AajTM`xYQpWh7Z zMBhXiNj)HZg?-A`gdd2T@+Gx1->%a%y9p zTTzR<=iHcCP{50{52#Yz*C)+-6csEPS1LWEve)_x1)98lcTEmq?yq06jUBFAIwKt8 zceo)vPr>Ftno;qmmD1I!F#TfYGO+w{wmN@CjNyw202R!QR6MnV^MG+p?kUN6!2JoU z(!%$s@6;2UZImCb-W~p6Z@r;N+{O1WKu@ByeCSKA`KPiW zdC))v{6FDCbjf1=(3g#WJc<3#57+AJydeU^f584V=}vusH;H%FGruQ7fR?`5CBS`OeT(;^miw^VW*Jw}F15gvVlM0#CYV?P zcKCzTGU1rfm|iD)E1&E{W3*flYh!8?c;#@p_H_3Y;_mJ4%eKiT&%X0>kQ_Ca5NdZi zx2Of3sZex+QmO}?a@vwVJ4JQXU_I8fb|ZE}6Ou0EuS40Y&|@H!YKlJ?mv1=XQBYpG z3_AA0Zd!C064HgF0}q3??~H15cTI+OTdGJl6ej)cm>njYW*flw?^VVweZbx0`>r;A zeh&Tfo=-kn3ZSY(v;mJE8nJ^>n3HhBF~d_6Ngb@FZU#Pr)3eq?1(rjp?vaD|eS!l8 zio52H_-Q<{;qSSgK-~7PhnQqV4%fdO1LqCTYOG-wTiV6fhI=vOrR>oI4pqJdg|laj zugjy8&SEn#*Msqw1cTe$8iLc)*7vwwT|1UuC*DTyMMYqr*7M*fH81b!`u2U(GXvgn zkeBH&1l9DCh(7n=pwZ_?E)7Jx^V3^PO5x`6i;%-FgE2Ag?f7tz|;UK z4vq1NK5~8jJ}e=@+<*A^gs(tpl_J?Dx$8#gOL=6iHi%XOuZPx5_)j^Q42Xv6!bWB5 ziLZlrxenb&!y0Ry7wxOCUw>z#;F14NsrcW(5yj9gZfo{Zp78zg5lUGXV;meZdJ`XF zaKu5Q1;$Gztl+)2e^LBbftebOs^M}PErCNYY4xzt4SjCg{pnHLu(D>5iMdHJcJ(wMk(9<?-JI`zLy3C^0XBav60$IIv?Yd4 z0S_gjHXM{Qo|{1yQ04^plX+Ho4O^BhPLIWohsCMA{lDAtT$Gye`(-*f9T$HdviLn> zKg);Hs($WG`&qeJ-oY3$Jh!PvVzmMQ7$$#EWg7nuhk`4-cOt8d2M}FTI>fdx%zMpy z?0sm&*1EcInfeZJ5v8+gO=B}hUdKBEdesxIRRNsq7+`czW2ia!dT9u2*z3UgXcyl) zwmnbE%;Z#^xW2iHI~7poXMMAVAMe&IUhN7g54tya(=K~FUD*{S9I+B zXK}a|mCYGy&7Pa$$)5ODPTN)*5kG9a)NXFUT)~HIGJM1+VKO5Kz-@->HUNR?LQ+5w%r-PRTykCT=XD-P9m|RWX!l#odKBP1q zvVi9shU|OG7xM~LP*y@&lm;DzrV)UV!2~ZgS2lm(jsJOrddqe=i^O=56C6Q+Ou3oZ z^~z7>6YNNsiZJX!xpf(LS+`fm(VOqeokc`=0jvrfAC|FL$5Lvq7H0bm>7ol;z@p+7 zKVlJU@lx&eM>;HaN0 zqsiOs_coH7)TszFo|_r+NeTvLMB!g0aHR3W?Ek&Ow4k9Y#O89~IQua+a$@OYp-|HEEdB9gOZ!BtX)#B4wgU=g$d$a>iqR}q5X{XJe(7>*z4}44$b`5 zYRtkzxXRmroWFb?em6)rT>PutRe;~{i|e`$BnIMo7*yB{7(*i`@!Y!bxe{NWQj2mj z+^i(JI5k88Pe3ycV=>tNk*QhbyDYJg8WGO)>!ExL8}CIN%918Gj%9Z(I4(iUf>&5* zhg)fGjXJHfYh%;<*=Pr{$A1FT68Mful3qYz&BzsfR}^=?c;1bJ1v{qL!31m zF`0VXJkK{Ws3$`&jlG?e1(16Z0IxbZUc4Mf*XSJMZza8lSF#}VeXTYO2(P$1du1-^ zCiso=9*Otcg|93zw+Ah@6p=nm(h^vK6vmzr-QtMc09K z$NvhmG)~g|xUZyy<~aU$X!E=VUH)TB7I_mAMR6vgy%`k8W=IJgVSK~ZT`0Tgt^C7> z)o@)f-JeBaFw)7-*{`PrUwJo7Xj!DA)L>SM6Gp#RWr@c>4Hr7D#1m$*Z6^5rj(?1^ zF<$$FTt=&YPFwH%!+0S+UFrMdSK4)$=3B)2dc5waa9~*S4~uLE5}`1Da(&y>R;7cc ztmTh|dfN??M{{m$!nR{sTJvbw;MUR_dU^5L%p|7CMGj5CzCr+^t9vkDW23mJxX-Bi zx%p*l^$@V%n^t&_;9@Z__#F*Z)I@8N9fgR9!7S(3&QP;ExjeDQQ>#Ab+3(6A7YUe= z2Y(8ZfcKM4N2YJWXYgiy{qgvp3j2e=CHaYnbebh2=Q5&;KL^dz;O({%W!c%+BL2&z zRY5Q2s++AffB7GO=oo7yoYNY+9z33Hj2{HlsY$AnEWR%bv9jL#0K7(FnW3Kgu~YVgWjm7||{+ zFBN-)oZ0kPZ8C1>tQ?5uIDsLPQ|9;k%$YE_e?Cy>=iil$W%@4L4o~^5Nzz*)ZjR|v zx5m6-#0DINC#YI>S4HPL%1AlraIiKDFuGyz-w}8Y`lBaW=$lj_eVGeACA50RhrJ6g zqObXGjGmi$Yeb4`$`n?FRA>jwORl8}5U_mSY`HLz${DOd;mt422L>C&oBdFwz(8QeuQ zCvw)?b6Pu5=WXV?_8tJfn4o?!5TlOngug7rq3=gNy?>bziaI;9hnPx6@3<<*HDF0N zxAQbl;Q1ABF#o7>K1fZcK3ZNO)R_rF?H`IqB#9~D(L5Ar$fJ|nfk8n3UMwyjA)=UE z{AU97H~Vid_XMMj;PG9!1^(fW#W59>51AnGKkJe#26}5uBNFt~m=u?LwRi_U3o83Z zi8?>hIu=5TZy+wyx!wm%t(vagb5qPdnTWh)bXu=qjM)AzX;;D*kpd46B*Qa#M72LmDL>oQCk;RiA>8WaJ{w=uE_ZEuF-)A$2q3K-cX=m;WaBD2N4aXXVn?7JqvL~%ZF731fYlRc@;QREoeNcdZ6~-(0^41Ye5FhyJ zMN5pbtv_sznVt%L%Mr^rzL4f$kbl3aNbchTr;!w5KjuN_fx(%%_12-5dQcJ0;>XO- zX~A2?_WU%CU#W0(-u897IGpK*sTpC^Mi-j2A+Hr*Ui=W|;!(s6-%`>CZzomlD_^US zRSnZ%(k9*JNGgC_H@oiG5LcA8Dy%|7NYI>Hlw7Aqz0x4Q=8Q6xGP2+Tmj?!X9hLXz zH;nHoXMAob_3B0g4&m2Vn#Y=tO+^jx83rK}2zT9e+bU9cq&>q|Zc7MUBKtWhEWV9& z@p_N&(RTr`g~z#r1QX;JUC%i|9>~6?HE>Q%^i)Yk@YETgjx-wHejY&Zd{9H);!Aqv3#%e)5BKZU;GiI+ zws0O(eZi^~E!D82pn+4+@A~e zEkPpfH#1v*B!NNV4WBo8J=pXTzKCk9nl}e!|8|A3)%t@mxHbKiz~wy9VXGWWy;C1Y zs^A;39{dCN9fEa1RlkF(l@tC_WPwNAqwukXI3ccA_lS7uqK}gWWsy*IXgr8dbBFBJ z=Yp0LnX$#??WPp|gF`BLi#Q8~q#tfVqZQm8EV1jx8&y?C(Ml(bb(204Q0!u?T`eZ8 z-1cXpvFELB{OdZCRF5FTjo|8?&qb;UXwjEc<#U-^pD_W!Hzd|u8GK0J6FEUDssOm= z`xZT}amTqR;CD-{?xmxQK((htg{dy#to;U;Z?% znMeO07Rq~a$OBtEq6k2EGFm9R>hbyDK zC_T@G+nZouO$X^}7v5caD^8~%z@hboqS>#ek7zUnSt}gD&eFgv8h_Yyp!3@`^k+w% z7zN`#DXkVyFPo62oQ+r(iaATuINzEU^EGM~?RLGH@SQL_>>Ht2C!P{CL?Z{`3NLdEX>{&YXAAOn6 zJn}Z+U-zB9r9imj#q0#HCGc~6;9whSLucdc163Qi1F7Gv@mYsnndB;~YNrhjp>IR{ zHndD(ct|oLJqP6^&@0G>{iR<uh&(t451FtZVj7Y3#M!XJ(i`hzQEem1)4w9dqM3=z^_ z%(U9I9|QkE(zq}9*io7p+iW&71g|*vd$o-dY+vzKxI-~C(q`p-HGH!rPIS2!u%5Xp zP-WT9+05WT(X&O#;6|&DUg)R7%M~a!2Wwrk(B&yLiT*F4W5Wl9Y}Jcb1%g_9q5SLf z5a1E&Qu|gu)-%!B&jzXqEND)Ji{F$SIY!=eI%K<@$z=X-#IR>ASQF9f2!-~A1QEzR zn#g25ulWMdP=CwmUht8R_vn+a-i_AcTpTyrr|w`=e6pzc_Q5aw%%i}6?92m7~B z-^s6qWh{q9eJtKgEuIA-%J^vIZGwsCaT46;-;2gXr~It*Mf#{P$erPeI&MkhTTkqz ze751FzA{YX6SnxE^P^dwJrfAODjF9}?-gv}IeM>uzQE>3EA*XbDR9KRS%tXqT<=L@ z9z_U2Y;WS*t1u)qgwSXK8_***P9>VY^cuc;>N!qW(f0l@VX3ykJD)0z#g^gI1#L^N zknyA!r{d~mOb6h@zFC6%&PA9M{9(J0w}3#r8h=+sBE$@@GC z>nWy5xd;d9x6MaaKwj>5-1dS&Yp2I5Sdq_3A8w!|&fOlVZ^8`NjL^HJh�j;PbFrZQ)irYxdv?t%TgT&_&t+8q+qVKc}h0H1Rr@x-d9lUPZ^YO~{b z&PP2s$<1;SkNrG(EJ%a%4)t}n5D!uY#MWeIXhha=@jJ+G>WDo_p0HO@C`nt8HXP&w zw*W6~#IhmbZ`c(zalaa7McJ6V)?v%(&%@`6xwp_;4Z~8H5=a#XlUYobJJJI9isaNl z;65g41VwE9A?nH``#v0_i7Fad?$5SGG}+rh3ojQPr0v0)*6}aNs8(g=JuaE(y^^_b zJBj35%0_*b%%s-Z%E>a%8;6FS;u@Hw=BCfTFC-LNqqz~3lSKV%f}}pna*Rgqd;_BkXf?D^n@9BIuZS{o zWj9AKs*;z&Bo6Z5!Gkzh2Xz~;^~P-_9wo8(5_Wn-6&xSbODt3u;vNvuL;TdqZv1cz zk2kbmAR#0v4pk36m6op;?^t}N_B|x|c_8VQFKX#A;_IX)V^`cLU4;bO_+FE~o~43+xsstQlCtVg z(>v&ip=oI~H$-J!+i?&*xi_5{J++wGI`6kA;v*Pkr2*nw4Zzqm1mX7l+k^)1+r+tU zXD+Y)=*FL|CZR76<4Sv7Bj+_Io+R2EIBV7XY@P$@a>w`LH>V5Mg#d)goCe=a*9xmz zv$j%;%wqf3_2Xz;muCxFkMd6&nN5_&8`{geKgz;QjyDSH zi6wEi+Dj*R2VBG(Zxi6`ZFN*yNd6pLWJ{QJ0*Vt+^RUE5#%X(ZlANFV`50fK8G?6$ z!m|x7W_*^gS0$?4Gtv10T7~7cBIPfuuuJS_h;6JhcH~n3T41Y z{a=xkOXuF*f@HotA@Y5UJMDjWh6U}{?zYdGeyUv*Nr=1B*!_&r4zH%M-^LKEn$2I$ zJIu$r4dqMVKUCnuGCiQls3{!&HXF68vKV5{O=kM*(wSJvx*_*6Yq;%Y*yvI*Wx&@* z`-yXrbCZRWMBfA8S#2L0k{@K|I=db|=)TqlkTCsJrf>q2bK_}6Es39prNsrYJ&F!z zMx!E5+I07Pciy*d(M%ev1uSP(MOp8i`Wy>ULbJuYR$U)=J4J_b zn%slo_v2?*OGTTonQDQS=Arj|TDf`4h8w+NYF}pe;p#%qcwo2hKX;xR;fr*Aw&V3y zR$vH7eH>*di3PH^Q$C4O=FY_C{hDz)y$K)XJnVJbw|CPOQ2VSWl%1a5}gDmlZ5KEkALVEQ?T<58CO3_tj%gsJp`_Yf?k8>@y0iB7JVxV zu;XQ2w6i5a11o$KVjvrvvXEF-Rin?3>`BnKi#O8YBR}Ls!&C<4f|364ss}y14i+L5 z?1;Z`7regECT&H1;quG3$qxHlctLQuS-%|L6Aa%zHM6QvN-JgIXbOxb7J~b1)Y()G zr}R6^d-+e#k)q?_^UC>RNnwp7n`@j8NiOR?9ShH z|NcZ*hmGY=`pc@m7i)jPK|C?Z6wI(@&0uM$;mvDK3wKtQ$4chdyK* zYwo`4AS@osNPZP_mjucNP*6CCUCXAJ`taJ;?D+aVFN+@p3h=ApSxlpNfNDR+ zEpAWgXtvKRdMi~%)yGgnQRWl7}Y8W3_)%G9sP>)`* zRl8Z@WjtPSr-CR!t^znj_3mmbw;}wp=@)6NnFcMs(jAHsR;I#@{Gv^*Kw)k*?^Tns zDMyH+A6ZU7i5kbtGIeW5jLFa-j;F?EK$NMzeo4Mb$R~0_Izd1@IYE^O9}&i6EO_E` za%8s7apmeMJnBclzTd@s#B)1E(Ag?ns|z0FN`zc~dD}M~0gP&reG0aXv`yGb%ljgz7FAX`Obr>RwuKeJ!}j8-5Z`{b@*G zKC1;mk79@86U&1k>R+Vtbu|F`8s?-@7mN_f2T)`Hr;PBPwOr>Dzg+$}GbzS0MEjk9 z+%X-}^-B%Cz4AxpE)v3luE=hqu1X-sm|xo^yQ|3SX! z$ruQWuaYQqa)W6UW&TT=3@~@8B_n$8o3i*m#bc(pF_!^YRNyymu7kGD1`}`5D+b~H zui8TWkq|bwhri<1Ow%Kn^RBhxL@))r1Qj#4WHC4dv|MU;1UN*D83M{ck;j*Y4jepkYJr3P*WEfb1f}UOCoF*xA_q_akBBCHZI~ zVe7>_wy9u5Vuf?i`uKPIDvEuGn82O7&`}{Q!i${U0(L=_iMba?^M0fRVVNwW%q*%Ze0KT)4fDCURCWL+Z+{(p(7DeCwA`;3 z*(95WZ`Al2&f!-UHqDfARF%>LB|;GcM4ebjY5`CJxpQAVxLVVP*P;6MbvBaH8%@QY z*8xrp9VjJf=Nl@<6ueqh51WXc)mb)Pq~Z=b-xkb!6F2s|i+=ncs@^gxj;P(%g`h#2 z;K3RQ?(XhE0wK6Va1GM9yIXK~cX#dJ+PJ&ByBxl=&%S%#|21mVT2)JGzR#SCF}P~A z6D5&RJWK8Ci!RgKD&W2I^M+ZUJ5&;bPnXs;^9rZ)r^2QlrIfOt-U-!KZ?M8#VHfic zN~D^)w4>?a+o(~Tx*ijbq_{QUMuBRej@?dtxsUl>J9m|P9zuqZp^krhx?l=#jzT}( zWJQPb7~$bIlda}s`QJY!FIY^0=>@VRY@QgS8Xmai!y2%da;qa-ll7K#F-?`V>8`Xr zS6u&h0YI-wM;#0bs|AUHFZb3HyAv6-Hui&H#G?=7VfY&;48ALv79v|VmJf2u2^$W~^+nBG|h@Kv>80awL5KPDFA(n-i2-o@DS znV+;u8mar5PW3%G)a~+Z3=9VWZtj~hUlj8VF2h79LQy>oU#1`aU=YIX+kU`TItF=x zWTVM}r6u>t@^Z#N0=B`ee6ZR%m$$Wv`I|(?Z_)*7+7s?4QKEhUVaYf8ehr3NHHa<8*N8N{NTy;W;qxu;&*clSSBtXN+-PyaLwrUoEc!qFJX^>ML^*>L$6ur2XyZ;_`3W z)oh}cDcR3;7hEd#(#W=$qK-czi)ooS*R44k2_w=?Ar&?4d-W(G=T`RBz*eRc(j2#L zuUOfu?J3g=8g{!4+{jtu-2CS6Sm*SZ#(njpN9{&x8p7vWwX^Ot{*O-Et6H%h8pa9K z|933RgPF-`lJ>uimCBVBN~&qg^q|%J?eF%pWK7a18d5`nuj3CBWZ=h|TY zVch`>st3At6528b$pxBjkFTEP@_|UcRteZ@@s=qOa$A{%h`rNg?F2`nHn?3h?C0Mv zY|>qnEFylkgG7!{{&Tlo57v%^h7iGo{~m!bJy#+$)WZ}+Z~oet&vCefFM+ACV*qtb zm#RU>@B@Op4SnS>O<%r5^Vw6q28Pt?$1Wl26fM_vl z%k!x`_70EB$79i+-!8QHnil#Xsg}%)j(bR}*`U#DD{Uh#!?h7*rTwajaLvkQ(6goT z(>Dp5pw8gfeJu8xzV$~>Y>;R(5MTG1N!tTIjS`Y* zS_bten>y3a)d6m-^zCI0P-C;r_WKyd(d&Fv;`kEVyYPMcbKAs)J$+U<73%2<#>ppGjy-LNZQLb=~m9DID3G6nPPXE*_J ze$za0S%yt*E~NySY4AU;V@{`@*!?+ShYp_QgMZe|Zx|eL-%>?Z(07@*OyK+2{lohS zthau)q4Sj;2X>t?ED3+`1Lhw`X6Zv*N0d4D@XS;%x z`1ZfE6SZ=6s?7Cy{WC!P@~k;QTZ^&IGTs3nLT6oX#WDM?Y7i}~L;WgKdA6^QSdaRp zlUm#O^Vv92lY|tA#l}&E@uL5xVlD}NK8%qtfTw!dZlpFlHsuTMDiK1T`BKk!$wV(R z@)uGkx2`}T8-eP7h@KWaQ!BJ<zPMD8zqY#-KQ)!tLTm4 zP|O1KEb+3sux2MSt+WtFz!k?SId(zkzArKoN$04ZraAH~q;AZKL@BtK`0z@n{m7T; zYQ5{8j}iqnV;Lhx6^vL7srWO_R%HztDKhwpaU(4G|LMD9(RqK6Wi;##EAZP-Ww?l~ z-$1<-=1Es1UYMa6UWr;)0b%+@!2hFgJ3wRApW^1UcG{Zvu`$G-&Y*9EUqKHJzbQ$#Nh*8cnPGZbJDB4Z{6VkUc@x%|cTs{q8?w78a!{!gLK1 zNVYj0YCeFW#-r_2<&@K*iu?+fC$+&x{S@37VMmT;7Lg3lxn_~KYUb~)HtH~1kZ|Uh zG)2viirOmVLz)$DIiU2)Rpz7C_;@6=vq7*?Sm84B4mU=Ot{Wj5+0a3)0FgJkSUTui zBJoLt*M?AOR5eq6UJh|N$+|{dPfoXF;8rVou2p}mi&-&N6l@|tBUMhQ=}PAQicaZ+ z^Ly+6WB9sTDn8(d*_Ks7M(6*gBYg0Mo9-Il0!zU|PzQk~gtV~HeBG`N|Jy>%)Q65ZB*nF|m7ftzU@sBU3WK)dW*JfHO z*Yblnf4$C-Bu_dtSdviN_3e+o1aDw{PqiyKYy^7tWiQ{lA<8(PO+zxCxwRBQ_qxGn zYnSQ#6M?v5SM2tZY9qQQnQ!FR&Z|~8} z!2tRxF4tZykcoYU1Dt$Vr|lUoxwtA-#| zBa|PJfs!jbZ^>MGK9z*c>+%zX6cw;AHM7~11l@M0vFH!uF>Cu9@hTiqphvIyh21UDyk8G*_!u(Bx7p36?l;-k#t>I^= zL`CAe){M)cVEsjB?5JI%`?U->DbSX~K5#XX2V!2upi~rWxO_06e>>L4qxf%9*LrHl zi77j-Ig%`f`-e!`^TXO*@WYL;oMXro|8B#4xloq&*nsUw)IN0XHcH+5uf_D=+h23d zT)*>t`(t;|6MY?|`^qcKPbP5Vm#p-q#Y(9_1?TVnt|knz`O2R-M~#}{YYzYBvBZ11 zhG9!uw4%rLNZNDqt>mYzSKSODYM&v7`dh9u~ZJNvBX?G|d#Bb~N z9(|%uT-1Qt%d4~2Jfo^yzdU@bi|hn9^zq67V7k4d7mwI9B6~%h-vVTM=EhKOzrgAD z&=cS2=&2QhouBX}6akJa9yxqO=c^l7W5%sN(oWrX4Qh4(WZ!ML2eJ8DX_KIJw9?0y zDPJlMkn)JOcT~Q18a5vb&*NO7be)ITfKNTDTkv%J0uB($;>+Awe;2xk_v^`!fl42I z#8Kku(x+3)?Cq6)Cum{h8aJHd;D!-%eu zc<^>5&TyOnzPCM<-~3Jnx2En4XP6k;lRlBc%V+$X$U4M7bV$S(Dg8LTA`|~~S%ZS? zesA9a67%d1oli?&EFX`aNbnPq3P#+&JZrL`Rb^#!N@_BE-`|k`vMxq8eFOsu=r;#U zHD{tyZZ+zs;N&lcb-{zxz0gq8i7&H++u?Wox3qR1eAummQqR#_2);1INb$JIeD`)S z{~7JhKd{k-i`lb`fzK`$1hWk7@%gq1OFGFR-kAvH0=uP=Fs$$7bKRudxmsqBps9W`Wwj z&s>S;g(Eh+kF+M^THu3le|t*tG~8FeozB<%1n{S`sm-xiLujeH<4f?sX)+I#vuOU;2mi$VMlAe6Sqyhv1gIp=goh?2SddNBFPGs`f+UfuE05;Gw!Hr zJ^($7;S%?EVlF`cW^}4L{LDILaaV5Y81ww(I`_;vx0v?AINkm<&sIf4~Me4k3&*bE=Z^qaR0IJBE?=6 zd;*4sZHj)6n~%NzeL0uqMiOOHBFU9=4&Cl^-v^<-4+Lp=_{r#h=W5SS_SB-F)98u8 z)+Zm9dEI>a22zq~`l7oY7hDZ*r+-TRY?b8Kbeu149*H*63z5?5PY5=9_ z$21Xc@joDTQw$=sIF6^2)qQ(AOb{vq7ievgykM8N7((ZJ6q?oMutj1n3CL`NdjG9i5utt-Y2AT|62dL#{im?BPFP8A>=p+3NY^ux4d(|KMTao(u6^tn-{hj%J^cf z!OKf_GERg$acP6*slNGh*c~6!8$KiL4PBn06;Hlm5lRPPiSWyE_xlU5UFi+}??oZ{ zz!8PK?YzRq4F-La5ygY<${)cfxXfp&8(&Ad{E8OnYz?ygzQFtPD&fCJzmsF(fvm;E zget~qUeFl_oPx&Byx)Oh?Y9EdTzm79w)?=8%GpSsD4uPT-W837_YTwV83rP)++4rN zTDbK(Jy7}YR)A(;%A5^>JJLu4TmAWH{jt9i)n-HXE-1IbRg4!DpI@O}pgb)Z^g0QF zYxUTG`a!}3$VivnaZVljFkQ?;j$Usb8dI=Dcw%>|RFi0ixr-Qfoz|TcO4Esh&3Z&; zRwKBbMYj3`qZ#mQtN4M1!grM}^XOG%CO=`l^*N_g`qxrWU45-(0L)y?RM@xbxi$dG85pJ_1eJX^`4ZnN6m?<0B zO#LGSrKHI4YaKL5Vbh-xgBX9B#1V3L7~`={v{CXz1DenRyv5Ch6)Zaw4*tz- z*6{=-=U%ygFfNB_bfo6}c8DtuROzm_Wa*A7K7VGtzL&lTx)Cy$Pa)<(Ve{oo--#&D z=w$_rJ3iboIwR+g;}JPZ4pRIUmDPz|ZAk<5t|X`N0KA|ldiz>(qO7rr!7G2(5M)i* z!B6fu!fRze9>|4YfHbrLxm(y55`<%!D-TekY4%(vO%0*_8YgDjA5jU~G=lM?>L~ZC zj&N4XFHobp&$+C*CJX?ZMXJ%@HZvUG=%%!+eE6{-^SFUB49FEC8OEQ`N&hYCy>;12 zDSsbzgeDmNAMk{cv-zOB1y~=xf^sN0?zVi}AtN~4wcsx!5p75t0Cs!4aFy2A^Jz;J znX`&JLLX-*{f^{z)e%b%}K1wPHO9ytPM$s4Z$~jpK zauY962c^8lYefbskelvw=930K=aYLg+M4y{)no#!OClMZz2G$sMYnbBbUI=QjaHQi1@byBWDjoHow)N2cnd=a;~jh+d+-Xm1x23c9T3f(R})6c zPAL0whd!>0X@WY?);Sa(;XR0NMOpT?`AX-%4D-9K2e{t2kGKp3*0HcfULR!4u;z0h z*p0I1rMLGxYrR{?dl$}yfnRkQVYUK|_sC2|{)OSSX6wkOcyjBw69T>4zXG0^uWj-8 zJB^9cd#_l(bf+YwN|?h^N50|JF$eNbdTVXeR80hpaZ3cIe=}QcfWHvwMTIGV_As^I z=IW%kJmxkm$LlL@J3m{wG)oNSHkMS|&CxB!q35SLg6Pe3z!vRW_d13$FuTD3j7><| z#lBem2F{|$WKUlMS9CuDA`Wz8P&2O2f`Lb}%bc6#hp6csqyIQw=1B%+WeXDzrMDb9XblqnoS{vTW>qDX7B8{X4IWQ!up8C`z@ak>GGhKj|1-V)Ur z_Jvg1oBRcSwjzb^jP%1}h|}t_*^1luER>nvwmVO=vTtK9_u@nHHj4_qou;x7C3lYU zbRpC!s?k8yWpgJLq;oG1dYMeztv%F_bWvc4JD@b_Isw~hOXjvqe+nm^R43AZodOxU zQ-O`1Zcdz?_Y68Sbdn;gldB9h(&$ht{RI zm{p{>8q2{AUun3ws=RNpc+=tS0cS_|!fBn-^}=!*)!{+qc7@I9*rME}E-1%KI6ao{ z~qe9V2S0}$G z<+gY6x+T9RG$pYQ?bik6L@)#VqUDjKYmOSz1osy3NY-T&qV` zj$2wD!@267*M&RGtXrHc$0$)-Ov1UfYyt&{oh6)R&ahnJ_#}RK<`q}b;JRK73CPAb z1r>629HLzwi-cV9FZCrcIH}I(wNw1kRo4kqc%;2J=|4&&a^{o)s)g5Tu0R&T&el+) zTlkKbb9Wxa+1Iup{@W*FS)mJCKe!2aLb`6*-FsHO&h^us7X<>$@23-O(@DpA9&VHM zr1)-*A{Q~o>*c?7m`0?`SgJQ-U+b}%Xfl!MVvZ;3g7uWxL=ERRo>)0xET~oL=smuD z=_>0<`MRaDQH5VYyQ-v+S;>aeKMtvy5+3eKw)uAi}(38>9m@^XIPq?5UD?` zb9SiafKL`H)n+=u$(@b+vvzsNdPRo<2>WPVfT1sA+F~fCptWeRueE@13pkT^{%%Ji2UR;sCL`*1^Vt znqF=TB&&Nd8UrQf~(iY^Cz7 z*D&>ng%X6{l5b#NS&NdzvJ0kWT+V|W!~wTJ`@o7dGgs67EqM0WG&)kzIiNY35e`WT}2qAhjvkk_cKONPZ>mZ_J$-T64I6JFZ*0&C=EfA;V|KK*gj^N2DVb;5+x`*_=llZ{O zPJ#Ybw{yhJ3Wz04Fvm?x=b9Z=&2vwrYrW>gc%LgPz-c_J2o%i1icbf*cFRFn#LZK3 z7>z6pH*=jKo+1OidCmKi?T?3+DB?qDII;hZoyxEb5#rEo1&5I1VN54yU$Sm%dZzcs zj2y;n85JWAe2$L%y4lk6U-;2~WVC-}(B|c4MVY{b|6unRQXZ_5ZmAvZuc_uohc+w! z>$*JXXksp@Y6ACq;fFhsYkU_K-`CG2jS)3{&?3SR_NWVq)ODZSx5kIWy3 zhi>V%qxxSNukpzO++ibC{;~32xdN$l^gM58_gms?`i`fOZx@W-x0tmBiYXm#yWO}q zK42204xwUWVM6Z3pEaZg-w&}4QtgcV68q<2KP$YMr53;y7E!ePZ&a6br+QJ4TO<6Q*D#&k)B!iZe)hBQ{=`!e9f^t^A(uVr^-j_)KlE z743z*5hthlv;+9HbQ*3m+z+{ zT#uNL0en`PE*W=jxjF;O#Gk0N zvhNO9K$b!{g}R({J9C1WZ$u?k+5Cvn{0GhK|}GTJNOGtsSs=-jUdDXu>T zx~qJ)*9rzv3%j~03~x)r#OP|M*AE0LeSZ}BKL1~33h`g}y^JkvHbN8DfK`|fmF6Fn zOBt^0k@H@ziQA5;bjS_-y~r(U(K|;Sc}u$RBQ^Y9f{XmhrZ&=%Jqb)J1#YUoW_$jJ z7{v%64w6#mJ%U;sB**Od9-PeWq%7>Mq??ANoyBhRD%R-lv_C&ewU42SBaZB0TiUy# zYin!YGBx~w#|MSnA!*+<}Eifl+8}V1rZtI z1Hd4H;4cm>`nL`kQh03q(SI-45^An+N?Nh{4qT@sP&}kSO{L%n|FN-eX6cqEOyX__ zsgwuTVI+42`faSgN6mjfi3L;V*1ca1Px2dfUt=6j8hB-g%ys28@MI{Z=>4KuvSXc! z@Ju^vDF3uAIPm;UsOtMnqZ6VYU;i#xu@W>krCR`eLmDQC#p^0&RNS^VCtFvZd)sh{ zshA!uU4E`q53VaEB6cb8{e`eJmD6}+3N*4DFQ57Rv3J=tKQ=0cpH?>qgxNBe;4Ff~ zHY1Pl={f~4;AZ1goD@r_`=M9P=8L(wj4Mt?d2;7YMk+oD;0Hi%X3yhcK#GwptQ(O( z6T?q4bm$R|m;Mrt4iNN<7;q;!o4wSOno!0gEf=mMlcjT5gS!Sse`nVQ^6iU3UX0{nt0*jPGWt1!YSw znAK+LWTI8ZsgBAl6}P$5`uz>r7!V|+MwIw(3WXkI55!;G8?@R~@Nr&k4aC?#yII7e zU#XnNvH=}TRNftocVyDAbSr$-#Nhu?eWFGaG54tV56|0cIE!WXG$D@99}}`SQE{1U zZLAfkg2b^`dGJBjJ z*gRy{%hjPaRBPLlj8j{*R-p+7SB;i~Ojpd4(J?>1x*~9BJAd&~J6`S8$i^Da=D9t& zKg-*{GK6nz1~|N=y|&fJdQ<9tSMv!RkDu5$T!-~?h_wOM2Ui~0PtTiQ z#;WD^B?rGfaeliIX_rflY`eKbcc31a+yTC`{@G-|m{dAIcwnH?{vj5Nvy{!|05g33 zEq}ag5H!JvZnB05cBt|>JF(mZt#wS+Jr->YdE-lFtC=E6|H@PnChC=8J+8eE8WKS0V?|5q!xMZEF*V!mZ^@lJb=Y?yXD9#z)IW z3o%LeF95+iJPMb16&9gG^V&f9qmU7SSpX4w5^~AGTzebJxafvC*4GN`=}WP>D0TYE z-#EUXx_l=*UixdW>P0O%6h1G_Xm+*xUc%{QAm>X@#&fgAMa4(RMjcaN84iPQneF_M zqZxmEun3cKQi2SI)Us(e*LfQXZ4$>7#gM`8fsN$V7b%acDzscdIYXbSCAxI115qJg=wCof;` zE5|;Z5uReb*t?HoKNkTj~IHE(ttKfE(Dd$o-j+}v7 z)&1a#ChR1FSO2UDP4v(c=WNoO_lGl34Z~T6-z`Lx-jSDH@{R!wjRvYo zw87Vg>pskipu#!;@A1@a-slF|ix#FYv-q*r3n zVSj#eG;O&0`;Opi%crTeC$#h*tElD*?NHL+#n@@fSglA4D_U@W&3hn7b~GvM!FjuK z8Y=R?=D=|lj0I7#U1zs8z~Iw`lOhjv)2w^JP0}HLd=qcfo_#a2P^An2AUwP#PYza+HawPrh7(=CcfaEfB|)+ z9l}*JJA4s&Dlzu0Ug-mD$->mbtEmTYGX^-h;n1A-O!yz4-SX>@Fl__@7rlG|d^AUs@^58m+tCu_~=Dc5P;kKy5RI9~x(^ z5k<991Ez@~6`pwGM|_w)!B*hZZij&(vjl^2^yF$QU#nY_y?c+kX1y)q(Dp|E z9}f)^Q2OWYocShwezZn@d$tJK3@9$4dblf=-CJ;TMJHO0nj7;$QN+*oBTPxnnR4z6 zsp$uIG?hWB<%#H1=tGTm5Ygz!XaHQeS`LlWO5|eoZ3btEc9bcta*4gI0yDq(2=c&Q ze0=a)0O&IIspFo6yx_ycKW6!5CkTaI?91{Z8%uqC$#Cum7WF@Po%zPU{)Wk0c{*6# zqLhmc=BwB={jZG^7r}2jc+p_*zkM^rx|X2712oZ3Go*-|ms63w=oX$Ld9skpjh=lf zL*a(qzRrjyArR*ala;u=#PYQPqnA9h^`^{1dhq9EKsOa&_TZIIHSOHn0-JtK9U5+T ztlC6yEKRgW>ps^*9gM2*A1~ub5oQUV!$zYqox#{}!NqQ~_ z6K!4vExDck!8YcTmEdSq_)}Sh0cUTj+j!$6>v+MKzp^C*Sx{#{%12lvBwY`b0^P2W zI_>R-<<`SRHSrz3axVH`3R98W51T#SZVYIs0Tx@5r z*~GHi#XgS0!4I+g*L_1S!Tl0`OAFka_uU#?d(@T;u1+m^moOzf0U?251dHKA_1Dpi zZle%?l1-Kr5g?EhhBX?mx2tqPL8Z+6o+PlP~x3bc*w$_HWh9-Jhl(X8V(pF$usanO+?CrrV zZenO5wf@O67H^d#%$`$UE^4lF?mtyTE+?jtn^G9*rws|)mi&Vq-rUGf6C#QsofiEK z{1l;vxNqiR*ozk;>O~J*#>?VJSIj>FuqNp%1p2PRI#LhZZbQ?I;wBoh=Rj8v9pdWm zb1Rf1M3u{A3VRd;jf2N79K=4~!$|9e@K$M_9+gRsWVU!IqJ81>HwT@F_n96$zxVV0 zi=`Drsr@;K861iO(R)hf_VXtw`QV$Ci=Up4W}(q+3^_dpU9>l_=s~$L+eKwsAOoxT zYf=K&3rh7)|0jM`Q%_3236^hT=#ZHia{RQ^U1=>z37rZf10~pR?%Ey=-fIKq{x(b$X{p^(*?-IIdD+~Cg`q_0U`uMch zSy={R&RY%@yaR?Fm_`-}#*V%`PP)g8r~LPGdist_N4v4gqNr7xy7uiGS4Zbj24g`g z12HQ;Zn1!-T8?gQeK}4-?!=?AMYRXQkc>kDPhx#}38wcGcY7ktsT#ZEwz5?u3p`7v zOqq_MUyAV+X|SoXobEZ3Z!&0*>OOaYYgJf}7}dZ+$k3#-Lv42q{P?;>i94q&Vtw7K z_vVt9kSaG!LVth5-#(VTV9FS7uEMjO1_N;7ht|Jgk2$1_Bw4UcnaE{>sm;cd$l}xHWdn;ANjY~>)Kn2sR-_z`qmM%H8einSJTe-)ek+yzQ6g)ge z06Hw}pJ*a%hP=E%=f>Kt-=W8!wrS3UGUWsZ^Nz1(vP$Act3{HrC+jignSaOsVkK@b z{JS;n1MaS2oz@AYdJz?|c5gN9)k0Twi6?{tI8pi=D=%E295!o)E07@Vh@h&Kdy*PV zzrtF@*K)?n!5Er*<1hJI%g5kgEpP zE;riy_G0RIm1my%R}`Kv_9Q#PgT65^cy`@_t>eQ12~n#2OY<^8RgSJy6r@Do_Lf>UyMie4eV^-{zLHTNsNw-NXwj^5s8{!_t zh86EnaV;#ZaR{bva{d$1c{NKnFf})!jb{b)+B>6Z&-t(05h4S2c0bK!644Pl?ERZx zN-=i=%J%!hiP?y8TT%iDbb|m9R!U}E{D&cdvhD|B3U8bEm6VZ zOu!@aUo?BXpLA%a4xC=e7DeF3+`o9rr4ek2FAtjUAlE~!XZ_0Ni*Ntc*8eoasN0vO zJiC3kPg#=^H6^J?DQMa?B?x3ctH6MIWR>3k2+}1Ye){hQe{>TjV4(>5heR%R^qo#X zePwO@-*H1aGCG}s9Z-1i3 z6!K!X;ks9SKqGA7{PUA%hxj}nl1EceThf=+4~&d&tcls{=egI#YTPh_a8y(Kz2M1f zhPY`)5?(}0TZfF9?}jiHQ?dfAI;cot)g0*#7<2E#B1CR5n)IoENhZMJI8)k`37Rk>xby5D{7~$^wpGBMwTwkhCt}WN zeUXdZ4psg-s)X~Yan-LO_)uL)qbA%0Z!iGOBRb=Ne-&Y+sE8r;{Be&~{4{djcL zNP;8o{F;dO;xi;YP?lv*((7T5%LL7-eyS73Y&zYw9u(Td0FTMeT5V16h54Qp!1?LN zvJb1r!H>%j!zb@4vu+JgS0Cu&XWS;PvgTF!Z?wUXwsP1J7dtopC_Yns z0V~eR~+LrUDy+%#7b+;p6Wqep~(5goec21 zKE)<=V^|fK+Fs}tRwlW`8ULF21)XH6)J-btIo9@EM*h|HA0;OCRD~;(QSX|w6uDVJ zif#|{sPE5nnZYP*;;EE%3nNR}#N(*BWowhT?@MrNbZ15FgLKcO6UMIVENNdHje}a4 z5@{B2Z`j24YEjK->8AxZHuNz#q1^w46D+u#I`^d^JmvrB_J0<{A|61nhAW7iU+N6R zFOOWP?Ma z=TD__e_8kr4212>U?X8WeE2y3QpLv9)O5a>A8CttS)@D>{;#aWKfi|apq@D!oOcq9 z8qLhH=D{empMYIdGbyGgp8=z)c7;k#^oqa3?aktgH8VlsJPZfA{v_uqXbGd&?1riY z8?&zycUq4vW^G)lmTbNaLb)3!4ZTxr%U4gt^`?;0q1 z;?Z@p`jqi861WZ$juV-v=T+0ANkCL15o$=0wd-ob^l0lB4T?CPNHcU*KRwRRlA!j( z-?mf^P|&IF)NH}rdm9w6977^~vQBI;W2q+!b3awUEj$6u>wc z;V+rHnWW37aeE8B>W}d%tBFRdK-vB|#tDVwK};wsi3SvZv`fG_b1?@vCD3Q{>}$)E zy}xm4vUrz2W2S&}i+FWo4!Tvw+75aLEoQ&G&f|8bvE>)%QHT-~zJz#%t8EXIgdBxW z)X6-FbXfK-IfI=oFDgU##9q4FYDG-P>}(5hbX_>MbSbQIpd`~T{f~-wf>e~%ZSfhp zwkTWkcI;D+7e^0W4yAwpD|-4HeldSKRnT|eG(~WVV7`8=b%;a4o;3FoUp>9+Bpka| zjheOlKlDKB!0oJ`%%DYj*hqLtBqD@oDqLQ1;~yM)cjgjjc1UsKY=Zv_i;gH(}0kIIHOLRo)dv zi^h0@JKa{tK;R@FWcVa5_s(MrYkoqC8;!N)@5y6Y z_SmzDIFQIb)2WSzPa9p5X6ccIA1dH*M-TY5P?6f;K(qV$ zJ!b#05E2+E?xAP^EqX-btNK|nqTo8=lRWB92YG3M6Og3Ld>GA>ssK6=~7q z;C2zV>aX2sHC?wo)o@LEcfp%nt4n3)r;}6JePRjSVm*8$A9{IIM@67%3OBQYKDm`wG{yuSzJ!Ef>Qn3NvQ%9Zlh(VZ}jVTyf0ZoHC z@tMU0-yfVHEi6iJNHO#4>-oEQYMO+Sf3MWGXVh1po%&;VM6!E^jkePcsEoqb&@KNp ztg3BPx{(qXDkRJA1Li_R#H|&c1s^`5tLAL>6l~!{M;u|32Kcuv*po%(JKXERh~l}!ANcGa&%8h9v|#S3nqaN?soOmsUHoZor#zz8Q}o3;{)2{rX0Zw!);VIdB} z{`Zxo+q!jMaNVz8B z?Tt&I>C;>?G@_;-G^cO&GO8^t@AR7NU@F|dcU_)lM>@Z^X|JVMI}r|G{}8e!WpTTQ zwCStgoJ9i$X@On4NVN3r0cU$>HeTtKEk;BgmZa%v*36r2V-JVdOodwKJ=J-LL?Ofg zAGhEhRI}5WQkwm*6u^<~hx7s9sRLo|Erw%cR9Thq=q;z_LPU`-MSctVP)B!oWMGAW zaqP9s-bV1^_f?cYR}+WGO9qj;Ha!2UH05Tj;a!HFjbtV3Qu~`ls(>e>j_0|FpJeT0-+jxZ->omw-6M6I zpn8s}TAU;Pq+n#wG}CGge97ZuvJt&O*V6*N_qR_6fuY4Fzm5+g=fY~kG$AN1kBxiq zia}g-nPZ$F9)7O%bjd$fQ1rZOdsgc1Xr!*iP-#a;0!)aV1b;#)By#IpbsJk2Kf(VG4m06rA}F2t_^I+h z{)@dx^1?&u0i`f3OD8Ty9`+m&wuBUTp`}TzH2a&9&>CJZ)na$l;)f%Q%Un=gP0u|( ztSg#ii2R;oy<+@kiW*|TiHY`r?TRXqRS!xzLXv1Wa(4YqF;oG!e%j+o>-Vrc(AH*bT5lJBZjB7Bv?KG4rm_*sM3h}!_NR<| zxI|?9Gzk!{prq~8;+Je+7-cBOg1x?~1@OS-lz8mQMbhK@wSY1W7JF8qX_3PA;T?5W zQiN37JfmVq|ATJn7bdB@5$W#aK=QfxQp2m(@!!;JUFYRk~Z6sc`26bw((u=>V#rj@j+li$lqU?b_}VjJpaA=D$I<_x>ztw^Yuj#}bUI1L z9ox2T+v%7c+qP}nwry8z+qSjy{`>p)_nz#Nbx@$Zr-xUHrT7u-=*yODczrYA*r=!=uuh zgxZme@S{TV2;nD@B0Ka2B#_5JkEkEaXTdcKEI=a(Ce8YEWkT2UJ?0K9*Yeb3Q?jT8WFmn5Uq#~|9i*oOYh+GhAOk}DK%W9LW(IOpQa(BP zMl&c8?Xz{4U1sE`_02WMeOhpujW?FAA2U;5 zRXM0kO`5fpd4Yf_Jl_74;UnCq3~}^8N*>P^hdPW7XwI58!bVMe;dhVzI1FCX5Fh8r zv^&Z=mi_sAvvfI_X!@x2C{tEZGdl@-TFxC42E>gUoNnFquG1lAGt5EPbU(w6<6y;R zpM1A1xMjVA5q>HKay3!}5j^Ixd2ZTQ8<5H#Za1YZz+P1?%bYp>e&a_k3w1Op4fLh1 z|8|UceN|C7W?D7Ig_kp>v6&wo%L49!2BD9$e&qH0D0SmyL^E~z4ETzd>zba}cryPk ziQ3LvH2F05@mSln`YtI;@(wB@3Y`#l7yJqF_x{42$m-7w;kzSBR|pAASLf70Xpl5R zlP%H&``IIM=N0tD1OhF!qzf__?;p!BBVJZRb+|-Zy{7$W=62&$11YsGy3G~FyoHaK zgt6WT2Q*rT=*3Pns#>xzays9X;Nz1NE!)vO{~l1SJIZ?JFyAm-4c%r1M`*wx6iu9 za)XAIa3)E;RYHL+e}MV;dOCGTT$P*J{&KiNkM&|h5OqrytC+b@z^z`c0z`@^4h-_8 zE?8+fyT)r7H+hU8O>$?Z;wv70Y_D@t&ub+qK2Rb!rg?V~?yO4sESk6H@O(q|Duwk2 z{%C<&d4TH^Nb=<_+!S@iBw)SHs<}F3vn|R{v$4O=DKEOI?saGJ4U`FIbYZ#CK^~ERM@aMigX@yxzTf-f?h#&aih~$@ zranLyQ47=DNwG_Yx&)w}A%In}KqO!0LWCgeM!@NaR`utkK^XM#>_%(Y9tj2#<@pI> zCMa#Di*0qx*C|e~Y9Ebd0s`F&FQyoxwDG}kwIf0UwX{YYMD^;C`Yl&L4Qc-DX|1np zy-gG0VOonqb`$!6kk;@U>4w>Ld?s;XR)cY`1QRZ<{eRtPgPUo?SY_MpUJWqD|1frXXutbBfFElNC9gM&&R&}cW?Ra$T#A)%p@-H@6{j5qivO4;~RDv6UO z;SbpKW)WS~CMra%-=K+Ck656>Np;0~!~B}oI_Z}#D4eRKnxkbh2=9yqw<@%Ne_{m@ zuKlJeqI&^?)RUae%s&69f5UiMAUcyH9VU+f$k8MtB`?|?9-H$9%>ku;K_CiB>_9*s z0_In_yugyi^}Vm>C7f;5*;o)96}V+gFwr{(8QKPq^w^ZFP{M<2;Edr86}pSmhBUIA z_6c2g-8M)TtFT;wsosVEY=ww%e{g|^X&cOxvFEVXZJ|{`M{#U_6KY`x?jdV5(8^Lm z<5iM1cKLfwg8}mtP_fc9 zhtr#>cKRF#RjNVbo&zmwQ9?zO*?IUqb8xs^Vxy<-CMlEorh`YT=%9;FiWj(Z(`RdB ztvhf`g@ZU)wSfVXg^p4`1m;AW>Y#i(CcFGt>MP4@1nv#d(3AJO@Yhvb?T>&&{$i)oXbrv2Ssby)W#9W>>*>8dw=?jK9lYTgl1?mLW zGriH9xuRi9z~=RUzT>}Q?%VViVR9Z9cDt8i4~zow{#4Yo&$1{U)7vA?sGr0a6l#3- zldTELUmuoyzT(vU=P0Rf`Tf1&vwlOa+XE+i*^Xqg z8RTYa_#^z+b4>=@?Lx;y_h2GH^9hQQHCb8bf3pB4CF)nXI`nxH<}VW3pMeUkT4Q0m z-+>RbUYbYR)4^o8{a=-}q;J$%0V*9>BGJKjg{9dtj+~(=sPdcAV znFm|nMU~!c&faKux@^wDZgAY>z+{rgJTUycPk-A=Bkbq-DY(WrrUR#RuRVF`^ZxK zvguT2$hyhfSAG39d7Iu?!D5%%FUhY32a?s5y)^Wokev$SV-IrB5!LZ*XC7S5=juJ#sJ^M9D)2t9-G3HD{5GOq+9tW!FevZ+k#y#M?vmwiThhpqtlgk zI%|3|4HpWVv+yvCJ`YERy8_dCs`HoaN)`V< zX(@j=WZN3>cwB}DV=1U)+lPW1rL?;gRrFrW(CM@du}J!|5U!4H8MaO8tP$@3&6z$v zva51Jk%^mj_T(Lk*UP8W?Wgj{%N3hxIiRs-GC4*#*r{Rp7G#XkOcW&n4d-}tL zne==@be=|ryT}O(l2LcRu_(XbZa5ous&=IxvKr0@9^&lE( z4W?JqHWH+bM)5p3Q}V@t#g-rYOC=kwtSQa9)s-I!bPQC4Y(>7M4CZOa6S+q$pg+Eu)PUb_iZI|e=!+bd0*^4rJUXkeMhtj~? zSeKT`_ZX6l$!XKc{#$*osQz%VIUt4xr&z1lEpfHxt8O>Y-q`6ONIVxs}2EG&2;2fL#jFlhsj&~m%oeeLC_s) zR4nfEP-$W^=$9f5dI*Rr&cK6Q#+HBr(=$-gp)#+3?~*7;d3Diev3KM_KO9-(2RcTJ zJF9k5H&rs)Ko+k2*6Ede9q1NU<6kvv#X^gz1jU*~@5@4kTIf$jks0w#* zLs~i{n?owKfI`IcI=Pjhqf+cG*v^_8ufDxRiLM`NI3~GobYM$7(Y9gN{ZHHyYflXd zqN0?Y*T>v;SEUYxtL>a0vMRarg7=xl&IF!t+-^(AQh{A94L%&&UOqHx!4GqJkgtGs zkOucBtJT)_KsN#Pp5p-M_@Nf# zt-!@x@vRK==zO)F$qyfyn%$(vd~77Fhtki-=VippigKy9T!5Z~JZ--T7ck@JT}RyS z`6Yke-)28{O8UGK{i71^+ddd?J*z_F$QS=`U5V~@dzo@akjnnriQsi_~G?ohfaeJ_WY}@ z5i733Q>bW^SZGpZKoVxrwsi6$%bK?eA=q|6wLO=_oV_n^OQkSLF(`jclWDI|HXPsc zhdEN32UW8UPH6M0_HBw71~_c}o?wv8Wqwb}ooU;>&7}a7N}}qXP&6vDQJT#FJG}XC zbI6Hj0r}jW{a&Fj9+l#g4Ehq21kACLl^cAYEXP+B>IcWLQw0zwZ2jtj4jIq8fJc$? zjqgc0KN);@GWc7kI%=OpkjIoC8{{`t2dm!$QQqRZ-L!rRcGoPi23;t|wX;ClB#b(~ zI+AP6J`cxxffZAf3{M)@*b}G8)EEPZd;I+yEzd4axa2!lO!e{$je_Gx3IwJhYw{ zf06R5=xpoAXsu(1{-#%Q;EKaXr}t@BRBjMF5j$kqwu2;lmcJ9cFftUbg*Uc`qW@Q_ z^xP-?D9`cFL?{W#xu2wv`ext2(pW7!&QJY#xY?LWWun&|Qwr4J90i`+fNT$Z1@g9t zkL$oq#^_#q8qNtK7V;ggUy;V%dJ3zy9e+4{r#ER9>j-Ys%4AI zHu%y;`cFg9nZII>v12v8D|iqdBC zZM@l2c{YC>F!2rf13(Kwn{Xa-&rM`7>I?((6Jb%O0g`Vt1a1Yj3JgY4EHIQC8jJ21 zdFa~zKq3D@Nr3+0Bqq4jZB4W18vfSpxxuzOhy%QH8eQLr$=)?@pxf?@#h#rpID z_~s4!@{^$~oBq8Z@&w=9Wm zy4b$k>AnO~Ms1}(t-Fq$?X+Ci5Ow?nzbyiq+RQ*dNsVWZ|GP0L5qdZ3Qqd-f9N;3P zCLwtEm+1L4-0jtWx7rmr^bdF#H_Fnz45Pt zp7CgO^?{W5;SQy98V#vumVD}oTesv;(7$yZBdig9&%DHFnml9CSM@jbRMWtMYR9&d zbknpfbmK#q{(oU4As}pwA*t|RzVQGdnv25hAGL9HclbHg6Ks_!<7}1dY%~u4dbb9w z7@>I;I^2D1sSOi9wsXmyC&#s|->vafZea0L;mIOT(qys>z3FR_-P{Z5dh>QUZxD7% zs{Lf1eR4p}<8TEWm|MwSzIKlrCd+%=vyk0CWJlB15@~)j2_l=bH1Wb5zvTwwiG?*U zxIm)4X-`wL(`*G}@{FdeqNm=rVd1CECAi+L$(C<_0An5QGq3OZo_jT2F#8;L?o0Z`a%`R2Z3^<61}HCU?2fHy0GA8b%AnW0B3g z+HzN)4q&<@Be-=N-5HL>XoP~bRseQOU}^R+`rH?%noi`~8lc z;fOBU6G?kXbpExc&@H7Zd;Zm}woCFGy*~QJ0%@AsD`O|q*&ftfdPK`c97HlA3PJ<| z#8Skwp7+p78w9hnNV<^B^%jR0;h@4diwS@MbVwQz~KJkh{LdSy<5nyy6ObiXSkTY zkG3kJP;1oIxvMhy2A0AT*24pPON!3?8LgB&%;hh?Wq3j z8$Y+FLJ)gp&mahV7EA+yrnjW}E11O@VhQGPc=Y3zjEswx-<|Yp1xmTWNm`lEK+G$Y z;Rn(TgWj4w6^X@L_bpYXoMdBl_-g_t<1>}{k?K6EJ+rVt+}8FF{6U+8o^VUSy4@tLhZTvt(%VY;D}~_lE|0=T zfsN)@85jJYYxqH8NU(ebz2@>hV7hB)clpR*2KmwuRhx&6%uJuaV1m<%c)Q|nuZ9yv z>zDNEVLwNFjS@+cXP0>{);r)G32<5rubSzX=TfOK`bDB?|G8AY@GwOUdao$?@&Ox0 z7q#_C^Kd2}^V}MnbWUQ6zUr;Uj)Zjlw~mT~L+=Ef36Wm-j%NDs>=Ded6W9yEfrY=e z$)6>dHMyh#cAs(W>V956U=xwxT1iFQbs zK8pk?il46%>)bvot=2*+ z+3Wub8&c3%PCL~ZDTf+8_>fUm&UvILeYMCb+BR&hUA!u-0Iq;h4O%m5ai|0<;i^Mu7PbX#mA3ZyT?xFGfDF#P|DM= zk7jvlH7~rm+&rImnX$ zFlgvIr~(gLC^W`4o_M&&8OP400I3*Uxq6Szsf70HsT0gGW5xYe@RONuLiqD0Tc>Cx zd0ocaGit2}F*%->JUA{PJPVPrHdl3j%P?>v8g=dgltyWlwfw48!5?IpKWj5(qssW)*P9!!Xk>4T#O3LcJHei(*4Oe89 zh$p+0w6RQ^kwQS`aN9@1#j{N4d+$4TuSWxAF-0el*&%29@aI74m%oFY@%tP#7H70H zD&&h01;0{kbf=m~eU22Rw&tgg^(zlz)TVJKH^vWeWkCK^F=B#4Db177T`PZ5bMh1* z>vYE}Q(qRqG;0bN(mLn30BhC0l876P4MlE)%zz%zpxI9IFW5(X3W&cqYw%&F+v@0D z08P|}<7yW5+q+l0S_Wo!!UL0)(Q2p@D^w%c(D+z&AO=&Wg?xfdRcdi71FPV|cd8f< z_J?XOl`pZ|?!j{y$z83pEqxRON63P&Txwiy!%D2zrrTlS=4!qGd^w6Df4AJ| zh6e{Jj4L%~n%p2e97!L|Bt<&UwLWpt>JVj&1Mm4g&egN>z`B+cjvZoJ6V+XhG}Kpj zfKIJ)sJoiMR`yA1PF6_T1j6@*5!!r@AT#)Cn3V9!601i-l?4tiHd|jsZ(S;~*>GEecl6N+aYbq}BB~7q%uwb{MSG;PvN-b9aD$1`_p_a- zic^!pC;N$TGf`hAL#@@wE*SxrDYig-F6oQFT1_3kG(2V^xbzj=NYm^t=CoWN4fI^k ziVYt-XD3z>8D&MJCW9px6giAEeUvg+-1qU)=7yfE-j48=><<`jp4M<5rC2V)^Y5$i z4H=>f6FP;mIJ}8Y1F$oEZ{E|Rs2kFQ$PY@MSHcbMV_$+x>#Aamr~3Dq<*3+h93ix`)6hxcCq|to{xT}a{Talt|Etg z3qxJ_W#dn;l~|HClMRh|waEA!1joarZxTO!AB z{-m>=-13Qo2Z_IX@LSL2-sHTez@h7yO zk36sT146v-%gRbp{&%OiI%}{9`m+stq-1ng9?XUtZ{po$^3f!P{UjyG!^X@P)$g>c zVH)1g${h;0ftXMk?w<{6bt0vb5p8pViJ0g%MjA=u$%LvTKYM$?dt1Bj#wN&gMADaZ zr0c(j-14gpse_Q0gLd!;$YGQW4t096Kv5c!zB0_6?Vr^CakEyxeYY-crMMq<)V>&Y zCppBA@Z5IXe+i0K`w7fg(I6rEgx$m)IlXs)H=yVSgl;Z`mUy#j|3a}xpYyHMc}WZF z2|JT-*9U#bVuUp4z%^+1ZbfH`(0_YY^#P+LFY*n)%+tFLZ{mVBe4{)|^sYFUQ!tTL z>o9;k1TIfGF4VemRWO={mK^sV7S8y5aL2@hLw48*Y3BTDIJrxnYh>M$F}Lpa?Q%JV zS9?-3M$-K|Xf45c{Dl1*;Y5iLR*#*ra_(Dc^+(x^_wo36OU8`@r}BDFXC3s7=0)k= zYLu?%d{D0a3~b&9h7v(c0;ihAs+bO@Bza)cRq5 z1w#Wwc4=t#?f{<77g8{*o7|V-aWr;GH*y23-Ifi8Ny>dHuIL4aH$J?{-ar1PItDo} z{|+2oHGWG`fKQUfEt7@Ko&-*WDjDq31$-%)Tmdo0gZ|$TOn4#iU#+&T4j)i|b4rDw z1){(((}lu7$xE>28nrZu!fYHmZ%t%CNQ6npoEzk`TxbTKO@rxVzNT!2HuJ4;DP5Fzr-Z}*RV9YY?5D9t;`MeV<@7?k_T6_ z82F&pk*Ce-{ob_4;FeJSY2!gbZv&Eeu=^ppd;fJZEHUnLNaF1~evl=!iYqV+llO<$6`W5Qqqh4#!l~ZpDRIUs@yC?NqM~gaa z9N&o3SwU73`~fONko~aDL!j@Gk~iDa|FbNChrKKWywpq*b&P^iR$tz&JwJv<2k1bp zv_(YM6MEF+U$t4C^M z7se7i?0NP&senCV`S@-``J>`m*ts&hp$jM<(y5f88H0vXSvU|}prfc-MA@EKs6{|W zUh?gE9G3K59d-CxE^tLh2HR;@W-x=Y^((^iBR{)w?L0ug@L+2Fc|l|ui|_hIjT zxlVAGjLM4JKmRCN*W}mjrm2$!b zTm1g4S;?ZHKI~r&?CY?ZCg+BR>pW!mI-saAEQhZQRw=RYiIGtA?ko-wr#H&kt{+CN z`dE|w>3sv@yC!zHbVkrTVyf<4i;Se1LyY*9IWB&N6vWCRWf2N?ZP22{Q^WHqA(@QO z|5Ah^*zvwCb`qmABv{4BaDGj@e*tdd(R(W=w;UD7+3~W*2`l}){}MeYVOw-B)L`fb zxTk!A0{0j#xGC{o;7%%L*|O>SM)2k?Zl!|ne51yVFT6XyKDA!~D%A<5!*942I5<~S zzpl1+`F@#Sg$R5ft_h&ov0`c-p)IFmCJR&%)e!MrsTJ3~X9v&N9v`G6hiy%I~% z!5d1^)s%g^kY8Vsc(yY0QCd8?bI zy7t@rFXR+}g%$~-Sn#7Z>EnESk{yaO3`Bk9`Z7UTOuKM!i6h%JKx?@5R}kvIP7es| zekvmlC-i|K1xE1X?}u~u?8gV9Qr*du2A5B&R09fv%7JI3pwfAZRoGex7z_zJ_GH!pyn%zd-3z#AK%INR&^_E5<`COtj6>_&eLVbyKjZoWCB$mG>eYKYiJ zT9>(KHUN8nBI<5tJ8s%>QvVw208$3_e_`c&v#kc-@BH>14YY%Q4Qb$r1V%0Gfjy;# zdH#w=e@SrPd|?{g1~EpzG%3slfdHi1V;s<=T353|@U_A9d+9|n(TzHt>mn^V%7QS> zuLg061K1B70E`hcuQT(~f5#&FFR+P>GNf8*(qy8f1XClRuIztD^Kn?tcophy(tR1L2qTFBnx{<*$X5HOT^!Dk0(-8ruLHYkw{pjawE0JP+6?2-MYU_ zN_O=3TVS$1SvfM=O=XA?y$rz2ryks1+gl)k)&LJ8NN%O%;QOF)XI2VMbJMcOBM|TN8#z~Wudd`O1P~P5<%mU~HAp7>({NqPmZ^iI6i_3^=c-*k{(tvA_iMFaTr zd=*-ZAL4Fgo9O<7_4LlHPi0}nc#X2sd41hq@46U2PRw#)Q`1y3N=n3o1IX1>bPY{Y zx&^%}dN|vSY{z1lDX`vFV@5K@;KnOtM`YwY{8i9)d!N}V?p~wR|V0P)(eax**OubxjyIq6caz}FdYrd9G@z!?kpM|mhaZU{rX>P(|WaaC2dNvP;M`q{HPLH`l{Yx&Q<{^7HaYk3h+q?OB(d zPOTkJe#DX|KZ@wId`qQhQ>5*WCKJ2;I--x0qd}NzchkWxli|K>s^Pqhsj1VU-h%6s z=OUYL#oXh0f?)VDj9tY(G`K^wW#%Mk0-}p&51J)f(Y($GT`fOHJ0HE$Y%g9CKzR14 z*fpqcq?>`1E1cwUX>ag1;^s0uz*UU^>!5V$OzA~@%0)=3XTjiK{&7CMqt;#lx0Q_x zM)8t&HG&yFF1?y8;T6nHQ`*$*L zkivNz_p|n2KT?$Dh)<^7kH5;SX9w-oVh-m{86r9klaDZu-s2C^(=1zWw#yW+8 zaw6cBs{#By9Wv96D9{vbZCzd=pzZE(I-N%p1O&-LMG)R)c5#y0(Sg_4S3I1my^$*%B!vRq}*Qv z>TN$U3K|ZF@_p>5poh4;wzaC#Ces)NU*C;)0#){`N%h=8X@s$<+f9!ilTUUQjOX^` zs?WkjyEQY!8K_p9|9Hx8P@N4nuw9S8XKBd%7mVL>+4qZifFI}5@-3B1uo zDB0y0ZQtaTx*HA*vW~el^S;C`&a29n%m>_8NZecQ$<9C* z%L!cPJ@=`OBlAOLTPDKPm9lILzP=%cxi)!|5VGq-SY7V2+;mzfwkvZ_gv;#7M@pjW>Pg$ zVr7WU=z%mw#$n`~1L?YK<1E}|50GDZ%a=K#$+&Ib)xDzJ4!w^(ufe7=P=%)%Ew{_4 zL|fjo)euAijVXBn`W;QtclT7A^#ZfQDP4!R>%ddM=*=-9!o+Tt=DX?=%Ya7mH@B*Eia1fyja2czy1IsHxQ{k1km#EQD|Nl3)cAr3I%2=1?LN2 zUZLEG5Pu3KdN&%XBS!xmwb9J5+sRZ?`y*XWqa_pE%qzrN#g&e;+_j|@>Zl`Q0POi9 zQ$cUh>2IWE!(Q3O6)e-ai%&!=r$(pWT^ix<*l=Sj#f)zsewbM?`{T=q{JUu)FjUVl zXu`AnIoLXUtK&Uy`LVBhpz3069!Gf&*nKMOso)+T>cai0lEi!p5}?3=sT%p0flzbkJ`P z#Stu)EbfMv&0fi}C|@61*M>LCU+g6;g)V=hd@+TthA@9k8p*3D zQkRvP3k!CZ)%(#0N`c!9b3ifGah#r8825t*YgU)^Wy!@(b(Qn{k{`xYPCBbF-WHFX zD%5Yikj|6(YtZ^M+lEPLko`wEqlu$XY2+|FwvaRLvitWNVcTPVVsNj--tV8VL|CPL zjc2+70vGs+SNAM4j@%LM7s){3(IbK|{g$)_@$2r>?%7CdaMnS zIoQ2x-@f`b$lw`=CYqUOL9_^IWFh;Kleah#oe8H{3oC?gW62XmhYl97Pd?W?u-(c? z|MC!M^U4nq>dGb791mSxWW99`cV(^@-rFJ$>ahs{AL4KI&@p2z0?KM>_8?j@!E`wJjY@aLV5pwdlSt zWN8J$sn4K+KkeTo#RQEU$4Lm{1zyCDCl@(o$e7A;HnC|+d4DL%bPK2N!Dh`Y1DF6p zRV;t-&jd^7y|0zLZ?Ey9`Jp1_Kt%uEtxr#{Fc%wcJTUGOHHhTp` z_(GuZV15=4I)I`jKZU0J#YoA!m6XZv6iOi*Y z%TKifH)^4kzB&#Fr_#HqX08IW%U;1R3i^^YDA!|sL-~cg^81JQKM`0S{#4}&+szSk zF1Do(ZZPbP5RO+&35gsb^gYF*1!qlTpfgenYNS9HbAhi3x%RaGoZ#U=&{fq#dF4?f z*W2n@ju?Zjc6bvYga$X;bCg7vA{1pbTE9j9Gzt~h4plb;W(`+7?(kcR3Z+^0u1Sfq zYDNODhxCvT&*$7{PsL2tVUynIcZvJ7VxZiThFJz#UJ$x_p zuVHK;Q5{AEW(iO z_M;?=MJyz_cI2G@463mAV0fKonQQy8@h>;B5yD%g427sp;w{usRd6G>3NBPgo-qg^ajJpj#dkd708F(#*ob#>P_fDy26{%2UOp_@Z*(9@2*ynVF412XoNErpRdF|M|Pw=4i8` zpt5&jW=lc3*;*Pke}kpUu9|sHggV5y(h2zPRH4Mv^LB=_fU& ztMJLPCN3NRlJ6?NuhsY|28Do8^-boj9hRGuqE{hHokuE6#GLM=A&)t+Pc_z_V_hBA zwi{o4G>hp&x7ifqdgC34)^=h(Oo%v2Af1MjN7Z_OCXHzJd;9bEp{gxh!u(uK!QQgf z{83EU@YJMkq2^xhp!a=lc$FEqp}K1Y-P`rrKV!JB2#sv#fLRjX74OhJyen|;&`o5X z7@?4BZk7>id=k(!O5D2Bq~p?6-zE($l#zNW7hm{XQRn%`4PrLCXypnksh*3hT#pc+ z({;o~Q}HB)T9x+dhS6%r$L>Z1zSgXZ@d7Z7OeKW$WY-VFfWhB zY(v_o!sx|A49YwjvQW^H!{mkh^bDkcdz!X5HRVx^`OZ!iw`}^S!+KR_WIU94H73(& zS2Az>dWOD+C(!ri@jDGi1c-twOROj`*m>p`pm=Q`s_`N(VMmzGqd5R!81<9U=AL~@ z&{oRhdiuIssb)uf-Msgg>P-%lcbIA%KkiTdL}NK8u&8EtG{SQFn~#ggQHAEnc^R0Lc?=k66hJ@{d``m9 z`6?f4ybS~+Qf2^=Nmem-;Ehy~1aPhuIurRvQo@udtin+$F^{kO<`gBduGqrGC!hyU zNpRJ?f>>zQY|b52$OWDzE-oku)*Wj^<*-3lNpialUUa%Lpl273(1@B`!6Pw9JI*dbDxI|ZAGaV;ryw8SbB9yG^ZPuRX z01&p&tL%AvX=YBpC_Tf3$rG1KhkNFu1zUCcR9p4d!hH0he`z%{Y`We^6^Srs)SMOx zi?*vrMA(Zw!={4ulD?|1xlq{39k+NTIpmV0y^$`Q#oUbG^q-KXizVNCq-w z^^0f+H6EFNvPR%0^Ydty(G{0Ng*WANUIo`Yw&~Y|miZ4?;`X(2YJDFN18;u={QH5O z$%3P+&Q@YF2LNDS9p4YeJEHc+J&bU)wG9poi@w&XGIxNX=w|jqlZ%Jv90BTnJXx13 zSK0?)bvnDY2)F?MxtUz9->KlzMJaI7u#&@Xs!5?l!H&w`;WR`oNJz7-R9|zD(e4?{{pZ2fLo1;6T=HL8g^&8lo>oG^suJ}95dA5hevT~ztAVCjD>=G|DUIiosI&0 zpWaBeyl-dT=3nG-4>H+;Csp5F(*UM8!!HMQx@k(L}wSeZ|KAkXluIWQlvx_?njki~9?r9WVx6|afiFL)c7LJau z7SVv~uwy7BH#`A`9)_R{GdO`%bs0Gss1w{`I4LwcnEQ`t2D?m>Lk9b*pU+QwVrJ{C z@)(~1CV;-e1kNpL&VKCUrT>Uwe&RA;KOfjl1fqfpv8V&F*qQtf#G0c_onv}@0L%t* z#S&XgwFMVH&2%toKlFoLZraZZN(&o#qn3j#R#Gm!t=vZp{Z1AirowsAntn8T@`O?J4H|Q}$}@Dr~coU0*9* z-#u3Oo@1Mg9(_Hr09zR)BFkRjoSVrV8++5E(Xhsw$CJ`@B1VAS?auwMp|O`R_mrK; zb??+86k`~uNw`^r|NGaG6}lJm1oVUHuU9@c=M;p$k~>ANd0`ha7vH%!IyQ-CpaPdM zinE7>hodE4D7QTg9H*5$0j_wLis(ypbvoWS^{=FS$_SZ6RS)-PiZSpTMmnxFrw1NW zi6w1wqjkMW+q!bRZjuDmb=JrCIy#Xo?@)+;41)~aFSqiBsU)v~J+mS>LWWn*6N=Eu zL}m7ZPO100XeeK;Fhgs=;t(ooqzVs~LErOTl3*6A{s7EAnplMpoPMau6aVdw4LglR z-TWP#oAb9lD{d*8_0qngvfrk%w1#Bvv@J-Ij0*T&kcogo63N0<+_ApD76KX@4nXny zbYr&>XF2sX{dq@s1;4!s;7aE**1S|2+(SZ!BO|$;J2z9It8n2#~wq_Z$1 z!{asl?Muh)7W_+!)k9l4_95fipVf)*j-U;`=DPKl=jT+Hh>l;kbF~2Q923%@?2Ks- zyYUYgdr&QxNpj`6TSr7e?+o_ezaDso+NwfzA&#W;j&xx+P$#h)oPmNefqp$nqe!%H zQ7ue7(V(kK`auSzn~+p#_$GX6>{q+Rn6MP@ffv{%!!$CY>aoL@Yk%M6NyvyJP^>*9 z``eIICikXP+Nx0Rhi-X{(_pZuG)GvXd$%TL-w$kD?uY@#_1B{Brl5VTvb@RyvM?4L0$ORSorpejBywSJS?TS1oL96XR4Oriojjzx2 z?5enQtcCEFtomB&*+x|WvwTbmC*`w>G{j3u?0!x{^W4!)(+Omz*-FGa&-$<$@0D|( zJ)mZ+Fg8)}3SLWD*vCy&reN!btG9<3$=XUZ3Z8)&SoJ1J&bQ|_0=%f83McwTDPxP~H{#H=k zzI#S8=a(a7>^*A^_Sw-<($B0QsHNF?iIMCXS`MwZM^irR-Y#WL0jWa9NzQL)dY)HB zW#4;#W|9QfKxdX{v4A4^AXB8#6ydq0Bf{Orv}cQrN!o*yL5uFqV2^ho6}L9kTKjD- zwu@Q99IvPTiV(|QOPM)S$x|(zJ_HWQ`+CR|!pTx3yaQhO+Dzv?ERMhvyeG2$b{2R} zNDXc8<@;I4b&#}9O5tocoi-~ge=1(oQ`xemSIV(lLc16B45hnLhF8gESn!%u#${Od?`s6OtnCPL{` zs_x1%hf-oexw_xY0Ax05h3ioxk%lz+Cg?xu()N>8*kb5StsVy(z~JDLH9ROLuDkeK z83Is6`+a9}Hlu@_L=4Yj%;{&gwgOmBBVB+t2%qX}-DMmIyFvT^Tth=&LVJDo37;n7 zo{3~})|$c+KeD3o&ukCcbKF|=X}q-k5t6)FRC5AHn&f4JD<>eMbJA3SdKVz0qSmfm zx~|cLIB1kH8a~rXXQ*oG|wdA(nGx?Ar+&6OKzqk#ShCeGP9-dW%wBVsb;?8W#TTp zJS6#T8jfFjyzWPk5q712t^6Qa`4Z%Vw&I=r0`+1u{|c*&<@w43l>Duboo_edkO%V0 z<}mLvbV2ekq909~1D!O)FL97(QyUsshfXQoOw*wdgH{M6QFE`#3_JT)yhrs%v2ES0V%@WE%Ka+d zw|aXU_OaGJD?Y~lmBO>?6)&v6Rjl3o?+rwGqV!Fi)!3QI+OW4$QF<~JBH3M4wz z@G){+_wnFHgL=&NuZH8S|EqUo}1=vwtkR2=)EC6)bcv^9XIn{Qz7~Y{V(?= zt-h9hk3PgK+ulnIk`-=aJNCGqtHz-p?0dZepJiazDDv=%bwKE+sONVme=piXXw&av zPQ6Uq&-uy2=>2HPi;1L+{h~Pbi+EpZxa~!`ZxpY1RSYG%#F0t`O4(n}dz1cs?rJ9h z90$r3HdrF+>_zH4rX1;gu)dn@BH7%e4($})FOD>|Scf{?2Q1`FP0y-85WH576sP?*M;{tk z#`-dn>UNDxp8*m6w;yAcRe`0vi?QbfG5!|bj@dPTh)LI}2<*Tzql|I>$h{}$QieWv zk`obC;zi%b-W2bBTy-zroRR#icbz;kg!;eoH5C^>>`C*v%)j?lDi9y2-<#i5GapS% z`qp&Dr@W5NnSU`&y$3fj*#Rw9{R2IrGn_X*b_S z({I0y@~wWIBKwtl<{cX9(9YLhln;dVD`l`yN#svPP6rjv(E`;jS%J={>`#5=#0Pw6Qq z&7jmTnK8G~%=K5%l<9M6>b*aZ=Mje9{e#Tx-{%%F_VQ-a)UV;j;KX5T4NzXGKc*as zk5tDD-*Ouz3U8z&SKs`vbmWqMq#>(cjG~Zx=qIg|bb@o8qOSMfr}y7~n?CHp5=u`S zy|;HS{o(Ks&A*6yw1=L1=2!H=M<3AtetHWr3%>jXUio*^WO}CZlNl!X&w^!(WsMuw z+(S=1{Unur&w9F?H;-maa9fU->IU}jrWan`O&$C9Q|r#>=`SzBXk1fEf3-+2j%CjY zER9nue(yn{9Ky@dTL^!^58eTy`|&b#&-04rU*KKjx2W!Cdys!O(RzgO_DZaa3QoZ? z-3FpxqiheBWPbgEDsu{!_7?o93jfQ~pxc3csk_wmpDzWi2e8EVD!z+B{Oj;xu?1yO=MOe1n)A_)u{nH`0w$zd z{}an?7;6XKLKxr1#92Y4Xz&3n&8>b0L%WX#R$op1Pyd=y?H^ET`!h6fH<(fMWwkH0 ze+TuS3++nyKC<{s>?Q5SzQb-B>PcY=`ZGHG=+9)ip_UzV`1+fX4~MDF!pkW7^$7ZP z2>qJc$;$3g@b+<QCR=@`OHGW_N{rX8LJ1qZhsNiD@!SZn1b52{hO_&wZa^N@;vm*gLHy;mGaRqBb+|AZZ82QQ^NVq z3P;!1;^;abUpIa~Y&sy91AqUkAG+c`s%w~geQce54Q)SZu=c%Cd}pPOtAhy; z5y0C6&b+M`s&o<@*Z)~}K~V4d-g&tro8Km0GO_wk`&j#zN5b(;5;&cu=wT`5+s1cw z%W+)11S3HIz%pN)$anxP6<27puPt$)6OEsLf!xMiQmTp>5k1qF|wY;OtU5oF$ z_&X~;){aNLtKFNdBksB^`}iXVBo|!-t!ANys10 zT`%S6@2ujbPut5jd=LIm@14%KVOE`e<-a(p@2oPG2Sd*7Me?Ws02UERL_t*Kadh;* zKDw@tr|W*xzFf`=kd>Bdcb{&+GNaj_e)3@E;jrHlW~TBG)Q?_y5cs+*vBvn8s_-6h zB$lwMLRzU>!n6CfY@6)cz&`KX?i@Xr{d$!++|VXz2Fa24{*qOwyU_pWx`eeyA@q;q z_}V)Y@{JoFq&|G+pEvyk(n$B|=rMJsIoYuG3R)bENel7q@Zea7y!<`Z^-UiAQJ~(B z@!BvD&w0{CC#IRq$uz7~OvhWCTC6g-gtzKGEJch+v*Pox@|G5CRj1EDx`$|BeW?1_ zS=q)+DG1KpxNUFHAI^O=npS#RsP_SAa4;%O+Nag(x?kV<#oK%gkOw=DPvF?e11iKf z79zGJ@Z{&M7V^H#KJbzKf8Z6RG3(+^VZb3BK&gL0?Q z2`9K$jLmXpS+5e)^d{1uJZSNWyvt8hUBOWA$GW9J`u1H~k!fkhs%#TR#);{cBK+d@ zjRd3z)~CxK6MyW`>i%%pQ=AyVd}Cu=7ktU^(w#u!q)>%X{|h2s11P+8(M$!qg)hAnb?5kW^B`2qQ-3V_WU#=ZXf z9=hhzbi{dn$muW}`y~S}lp5r*p8>cdde?tz090)lOavBQur=HC>aIj+=O&VeOj%YoDZ< zCoQCNfh~^DAVN1z9Pon{m(cS2@Eta+>YTj{AIpwYed)*y!~V|DzE3FUq|a0CsF3M> zFc0$~<(#<;w$My!K@j0V(B3#;NNMANK0^KcLDbuaEU|D<`eblIXnzV-mieeVI} zop}4a8&kpcB4a;O+pyG zrkN)%pt+hTKEvMM{vI6w#QDsHi)on)rhI@pkakF>Ek)YVe{hvipQrk0ShiW+xJcr7 z)5-H`irwa*zGFTBw!Iaw?XY*3bIR$Iw+MEb;DRCB{h{7vVGNAa-hCu6FmjiAWl-iA zw?CQ6N*r<$gYm*-{yaKwxksh|@6sOKD`jQg(sOCP!8_(pN9#KT5u-T^m(mieU*iY& zQ#Wj2TB0KF2jW5}FPLx1V34eMZeN>Q(k7PJH!ZFw17Ph3dt%}R4SE06e=6(imZkLp z+Xq^3;!|L5Npy-z!~71|AgH7Y#z!W{as6JiEB|G$VRFb1YJQi6$}9QJJVzfelB#*H z?g~;qOPpS$;V~Nndj+fnx@1AiOide1JO#;$bt@(xB&@xLwl@`43N`|hNjI$hhHVNWgkkLm2YcL* z_s_o}jzB@BvkNF~1VUl4KUuZ`+iKS+rWKWmx`tY@#O~RyrpS7LC!$rdTyaUU zDsS+pw^^mtwRMZ$?rzbIAJdR7(Jac#szkJ_S0uU-mJhK~(Z=aWtE{LIF$sSj6ICTF z59N4`bX+!k5ZUy(We0HpZ5~E@hwvNLZ4T-?fW;(=*$(6VV$kB-P-iC~H28k@NO+BY z*ufj_RWyfs6>quEeWBi&Cfjx_dsRWW_gb*-w(@jCV>sN&K9sqv^orWDGEp0ei3DIi z8YKrY*{g}@-mIs@(gRTlvQ(;9(2lbt;V?jUN z)F1>&6Pl!jsyURPHc>tZHiRU~AGTT>)L;itBZJ@%KtZHNfm9|9GHObkNR=HT*^00m z#Z_Pa3R8tsb*Qohc7Ow%7ApdUv_8xk-{I}=&Ft*W?%l2L%*8aYBi+r;*PEF) zZ|Cj2ncZJWLpXN{$U})6Nm&ixL$9oi05xym>(Q37>cfXUxu1Rbn5$C8_%L+}4pko5 zX~bLCVnSJg2P{SHym1*%(P^=sE}k;R`dLBsTUb7-l+%wl^J?t{V+>*=afJSm{P3zZ zSz)?0v4BQ8OzfZj7NIO-y{VW!I!#rdep9NrOn!eWKX_Fc{aRn~v2P<)mWPdIRu;7f zGm+Bb<%&qrh=WIAlSlkSEhjCRT`H?Gqt~imBjdR2Z{|s_#5f{vJ6r0JdY;rKudH$z zbNR8?3Qaz=rnrsW`7U!DO(moiQ_3pVnX1(EF`2OTz64zc6p=D#V_rh0+W>h#8+a!PifL(TZ%X(g zpLVj)JcFU?m`Y!vgLWPMC#7lmwSJRVk^wNgbN}8EkFuRys#$2>HM6rrW8mV4Kv6gNc zS8a9~JL6xcL|d-zdzVIlihQYjCB@o8Ql?C}ra1aHQoWK!w(q9+P;vw^LcKF0FJsmK zs^e1iCK@SyhQ@aK{>&3(zk)rN*dx>yxx# zQ5kK6PGL8eFw4jw&qgV(XvP!N(9lR5%U_}&-+q$bSha|zuWPd=GF2e5_Z?afWm&hY zLSQ^`{MpZcPYV_-pylg%+G6LZPe!Kwzm_rg&G+mRHi{{a*p>>Os1-aWe&)e@(YM`U4u@ww*?2Ra5+1+5C)9 z??^6K__74i-1DixB%|P?5@RfF&#y5uD_PO%80w<6%P~GR9kgU7KM@tvbFX1dv}YCF zIj@w;OMNtb(Kf`-!c*tJ=`cSK=`Q{sI-`fh^$>kr`5p`c?X*zPv}Z^sEA|`+sRI_4 z3DAaT6&Hssc<)NV+Ov63?pg7^mE5}$X$~`>G6{LJCC$#sKMqu;=KS-MBV3-tp z4Rt-4X`5y|+n$w~I}=2BMmYBgAi#q{=6V|;yVx&C?zF{>X7DvwcgS9B zI4t&|8hG=-bm0b;`&zk&q$1(R&K)so#lEB6EC+j#Nh4XCR!m$Ts%M&Vk3ywg49k73 zVBJBCbsC3#k&ZfPns4q;w$F_Zc^PxFDP#4F=~N-iyKZpq(tP-WWFMBXOujn4^ zsps;-0Xt9(;BbmT7LeU1l-UVpmk?F}vl*`U8l4l`yP)_X)b&>juALI+=ql>+O;4I9 zQmSN7Dt7w0S_Iu7!ZSg8PXr0>MpLvQhrip4A3j|v9O!F=q1iO!ZX&MA!>OI~~ywLyAG=RDZ7o|_%1vGcC zo__gwlJA_9m8?i2awY0~(N`%PpKsiPwLgcY?dZ1=32Q&XAF)l)g|lbr%ozvdJ<^n! z(5yp*6-^a4)qb?EiRR5Kr|mm;!hA8GD&NB_N_DjCS^2)iRJZ3wst?-`X`$vHSSWk> zCM>RzA;7%D-;Jo{i}T<6f`u=E=yAHT;}gKGN8tZf-1vdT4!A zT@C_4+FCZ7jsqjUrw4g~5g$F;M5Wa`sR!`q5x}3nzY7x>+)TiqPsU|G=9lQomQ6H@ zHeVh5isGHSDYhnrHvfwJXtNufyKn9@Vv-#DT_Yt>cC6`5p=b(zcE2lRyBpNI@1FZa zcN2f!OII&^OXHCc#g8B=_(M4%Plg#8%49Dq7pSS?VcLdiv8s6)1x|)26zZW+A8uT@ zp-?Avh6ZS8XplOaR?{Nf%eD1v-i3+q)ha03O8Vpeqj=YfnYmcMrX&~?a^u>@k0jN#`GMv4BBJg^h28`i^ zN)Qjt=)e4x)yiX-SOQ)T$L-KfVc+NLjL_mR|Mj(NJV&43(%|D&pYz54c>CMpz$D$dRCZ?8+r zGWc4j`U2=T`yw)L(CZpn5f3sCam6c|nxN*ol z)~w=7Y8nh>$y3MLRhshoJuQA$2h*BHx5Q{L;m=iHbLB&JV~(wh2KzgB(V7p8xMh9J zJa*i!t4B;T9n60`_Lmx^Y1tLB-398MX~sL`s})L?hD%-tynOQHXs&!tB?WO{y>Jpr_LA`>LId+qwyrQLICsir&!U~f&w zS1l(cMxzv?A}UVFn4nRdii`)+rRfwL*XU<77J~(~hze6^I`TS#qAmDhMiK5or*Uzz ze8_6d)z}1pRGhBODm?|xojYKTdzv&?$aWW~cR_QMkGcSXx6o#Dqe=0>!Dm}A2U)R| z(*g;!HHObm6crbzj2mZD7mm%TwNjCU7+5jlDfmCV|h%W4xtb|_Un zZ{EjMCyr~OtaL@96qiy$Dow>Z%BKYNe5$OGsmfIz_C4ouz~g|&0gnS72Xf_rkKdaT(DkfH@EbRM zRxEsNXfX9k35m3;d?Ju{r5Ms)metec^|(b((LH_o3@v_sNv`d@A@kofdsY*SULVk5 zJ?C-2{!7lU#*%%g!#wm0`XTnIHAf|`B`si>8iZScN)q{U8}U4 z(dUgrKRtFccHP!zeFN2&aFYc*KF9(0_~7wDzx8!_xJ4-$U4d{MJLT(zEsLL6OeM{N zTwc&HErQOn&ajQ97w|aXalqq%#{rK6E*!A!S*d(VaVekU`I;4u6$hoH6p+oAi!*Provisional: This is a new feature and still under development. We'll be adding features and possibly making breaking changes in future releases. We'd love to hear your feedback.*

\n", + "*Provisional: This is a new feature and still under development. We'll be adding features and possibly making breaking changes in future releases. We'd love to hear your feedback.*\n", "\n", "This document is written as a Jupyter Notebook, and can be viewed or downloaded [here](http://nbviewer.ipython.org/github/pandas-dev/pandas/blob/master/doc/source/html-styling.ipynb).\n", "\n", @@ -49,7 +49,6 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": true, "nbsphinx": "hidden" }, "outputs": [], @@ -62,9 +61,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", @@ -130,9 +127,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "def color_negative_red(val):\n", @@ -186,9 +181,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "def highlight_max(s):\n", @@ -240,7 +233,7 @@ "source": [ "Above we used `Styler.apply` to pass in each column one at a time.\n", "\n", - "

*Debugging Tip*: If you're having trouble writing your style function, try just passing it into DataFrame.apply. Internally, Styler.apply uses DataFrame.apply so the result should be the same.

\n", + "*Debugging Tip*: If you're having trouble writing your style function, try just passing it into DataFrame.apply. Internally, Styler.apply uses DataFrame.apply so the result should be the same.\n", "\n", "What if you wanted to highlight just the maximum value in the entire table?\n", "Use `.apply(function, axis=None)` to indicate that your function wants the entire table, not one column or row at a time. Let's try that next.\n", @@ -251,9 +244,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "def highlight_max(data, color='yellow'):\n", @@ -819,9 +810,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "def magnify():\n", @@ -854,6 +843,53 @@ " .set_table_styles(magnify())" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Export to Excel\n", + "\n", + "*New in version 0.20.0*\n", + "\n", + "*Experimental: This is a new feature and still under development. We'll be adding features and possibly making breaking changes in future releases. We'd love to hear your feedback.*\n", + "\n", + "Some support is available for exporting styled `DataFrames` to Excel worksheets using the `OpenPyXL` engine. CSS2.2 properties handled include:\n", + "\n", + "- `background-color`\n", + "- `border-style`, `border-width`, `border-color` and their {`top`, `right`, `bottom`, `left` variants}\n", + "- `color`\n", + "- `font-family`\n", + "- `font-style`\n", + "- `font-weight`\n", + "- `text-align`\n", + "- `text-decoration`\n", + "- `vertical-align`\n", + "- `white-space: nowrap`\n", + "\n", + "Only CSS2 named colors and hex colors of the form `#rgb` or `#rrggbb` are currently supported." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df.style.\\\n", + " applymap(color_negative_red).\\\n", + " apply(highlight_max).\\\n", + " to_excel('styled.xlsx', engine='openpyxl')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "A screenshot of the output:\n", + "\n", + "![Excel spreadsheet with styled DataFrame](_static/style-excel.png)\n" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -1039,8 +1075,7 @@ "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.1" + "pygments_lexer": "ipython3" } }, "nbformat": 4, diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index f64f592e109a1..6802fceb99123 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -17,6 +17,7 @@ Highlights include: - Improved user API when accessing levels in ``.groupby()``, see :ref:`here ` - Improved support for ``UInt64`` dtypes, see :ref:`here ` - A new orient for JSON serialization, ``orient='table'``, that uses the :ref:`Table Schema spec ` +- Experimental support for exporting ``DataFrame.style`` formats to Excel , see :ref:`here ` - Window Binary Corr/Cov operations now return a MultiIndexed ``DataFrame`` rather than a ``Panel``, as ``Panel`` is now deprecated, see :ref:`here ` - Support for S3 handling now uses ``s3fs``, see :ref:`here ` - Google BigQuery support now uses the ``pandas-gbq`` library, see :ref:`here ` @@ -398,6 +399,39 @@ To convert a ``SparseDataFrame`` back to sparse SciPy matrix in COO format, you sdf.to_coo() +.. _whatsnew_0200.enhancements.style_excel: + +Excel output for styled DataFrames +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Experimental support has been added to export ``DataFrame.style`` formats to Excel using the ``openpyxl`` engine. (:issue:`15530`) + +For example, after running the following, ``styled.xlsx`` renders as below: + +.. ipython:: python + + np.random.seed(24) + df = pd.DataFrame({'A': np.linspace(1, 10, 10)}) + df = pd.concat([df, pd.DataFrame(np.random.RandomState(24).randn(10, 4), + columns=list('BCDE'))], + axis=1) + df.iloc[0, 2] = np.nan + df + styled = df.style.\ + applymap(lambda val: 'color: %s' % 'red' if val < 0 else 'black').\ + apply(lambda s: ['background-color: yellow' if v else '' + for v in s == s.max()]) + styled.to_excel('styled.xlsx', engine='openpyxl') + +.. image:: _static/style-excel.png + +.. ipython:: python + :suppress: + import os + os.remove('styled.xlsx') + +See the :ref:`Style documentation '.format(css))" + ] } ], "metadata": { @@ -1075,7 +1105,8 @@ "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython3" + "pygments_lexer": "ipython3", + "version": "3.6.1" } }, "nbformat": 4, diff --git a/doc/source/template_structure.html b/doc/source/template_structure.html index 81dbe2b7d0217..0778d8e2e6f18 100644 --- a/doc/source/template_structure.html +++ b/doc/source/template_structure.html @@ -9,9 +9,6 @@ --> """) + if self.notebook: + self.write(template) + def write_result(self, buf): indent = 0 frame = self.frame @@ -1131,6 +1150,7 @@ def write_result(self, buf): self.write(''.format(div_style)) + self.write_style() self.write('' % (self.border, ' '.join(_classes)), indent) diff --git a/pandas/tests/io/formats/test_to_html.py b/pandas/tests/io/formats/test_to_html.py index b2fcfa3fbbaef..8b71fb047456e 100644 --- a/pandas/tests/io/formats/test_to_html.py +++ b/pandas/tests/io/formats/test_to_html.py @@ -1859,3 +1859,13 @@ def test_to_html_no_index_max_rows(self):
""") self.assertEqual(result, expected) + + def test_to_html_notebook_has_style(self): + df = pd.DataFrame({"A": [1, 2, 3]}) + result = df.to_html(notebook=True) + assert "thead tr:only-child" in result + + def test_to_html_notebook_has_no_style(self): + df = pd.DataFrame({"A": [1, 2, 3]}) + result = df.to_html() + assert "thead tr:only-child" not in result From 2f9c854c30385c29a905c0a9bf0b5009e99347e0 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 21 Apr 2017 11:20:41 -0500 Subject: [PATCH 441/933] DOC/API: Move Styler import (#16059) * DOC/API: Move Styler import From the top-level to pandas.io.formats.style.Styler, which is a bit wordy but I was having circular-import issues at pandas.io.formats. I think that's ok since people won't really be using this interactively. Closes https://github.com/pandas-dev/pandas/issues/16009 * Added deprecation warning and shim --- doc/source/style.ipynb | 150 ++++++++++++++++++-------- pandas/formats/__init__.py | 0 pandas/formats/style.py | 7 ++ pandas/io/api.py | 17 --- pandas/tests/api/test_api.py | 3 +- pandas/tests/io/formats/test_style.py | 7 ++ pandas/util/importing.py | 10 -- 7 files changed, 123 insertions(+), 71 deletions(-) create mode 100644 pandas/formats/__init__.py create mode 100644 pandas/formats/style.py diff --git a/doc/source/style.ipynb b/doc/source/style.ipynb index a9d9c8de40b70..2cacbb19d81bb 100644 --- a/doc/source/style.ipynb +++ b/doc/source/style.ipynb @@ -14,7 +14,7 @@ "\n", "You can apply **conditional formatting**, the visual styling of a DataFrame\n", "depending on the data within, by using the ``DataFrame.style`` property.\n", - "This is a property that returns a ``pandas.Styler`` object, which has\n", + "This is a property that returns a ``Styler`` object, which has\n", "useful methods for formatting and displaying DataFrames.\n", "\n", "The styling is accomplished using CSS.\n", @@ -30,8 +30,8 @@ "\n", "Pass your style functions into one of the following methods:\n", "\n", - "- `Styler.applymap`: elementwise\n", - "- `Styler.apply`: column-/row-/table-wise\n", + "- ``Styler.applymap``: elementwise\n", + "- ``Styler.apply``: column-/row-/table-wise\n", "\n", "Both of those methods take a function (and some other keyword arguments) and applies your function to the DataFrame in a certain way.\n", "`Styler.applymap` works through the DataFrame elementwise.\n", @@ -87,7 +87,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "df.style" @@ -105,7 +107,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "df.style.highlight_null().render().split('\\n')[:10]" @@ -156,7 +160,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "s = df.style.applymap(color_negative_red)\n", @@ -202,7 +208,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "df.style.apply(highlight_max)" @@ -226,7 +234,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "df.style.\\\n", @@ -280,7 +290,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "df.style.apply(highlight_max, color='darkorange', axis=None)" @@ -328,7 +340,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "df.style.apply(highlight_max, subset=['B', 'C', 'D'])" @@ -344,7 +358,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "df.style.applymap(color_negative_red,\n", @@ -377,7 +393,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "df.style.format(\"{:.2%}\")" @@ -393,7 +411,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "df.style.format({'B': \"{:0<4.0f}\", 'D': '{:+.2f}'})" @@ -409,7 +429,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "df.style.format({\"B\": lambda x: \"±{:.2f}\".format(abs(x))})" @@ -432,7 +454,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "df.style.highlight_null(null_color='red')" @@ -448,7 +472,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "import seaborn as sns\n", @@ -469,7 +495,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "# Uses the full color range\n", @@ -479,7 +507,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "# Compress the color range\n", @@ -499,7 +529,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "df.style.bar(subset=['A', 'B'], color='#d65f5f')" @@ -515,7 +547,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "df.style.highlight_max(axis=0)" @@ -524,7 +558,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "df.style.highlight_min(axis=0)" @@ -540,7 +576,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "df.style.set_properties(**{'background-color': 'black',\n", @@ -565,7 +603,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "df2 = -df\n", @@ -576,7 +616,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "style2 = df2.style\n", @@ -606,7 +648,7 @@ "\n", "Each of these can be specified in two ways:\n", "\n", - "- A keyword argument to `pandas.core.Styler`\n", + "- A keyword argument to `Styler.__init__`\n", "- A call to one of the `.set_` methods, e.g. `.set_caption`\n", "\n", "The best method to use depends on the context. Use the `Styler` constructor when building many styled DataFrames that should all share the same properties. For interactive use, the`.set_` methods are more convenient." @@ -629,7 +671,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "with pd.option_context('display.precision', 2):\n", @@ -649,7 +693,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "df.style\\\n", @@ -682,7 +728,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "df.style.set_caption('Colormaps, with a caption.')\\\n", @@ -708,7 +756,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "from IPython.display import HTML\n", @@ -804,7 +854,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "from IPython.html import widgets\n", @@ -840,7 +892,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "np.random.seed(25)\n", @@ -933,13 +987,15 @@ "source": [ "from jinja2 import Environment, ChoiceLoader, FileSystemLoader\n", "from IPython.display import HTML\n", - "from pandas.io.api import Styler" + "from pandas.io.formats.style import Styler" ] }, { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "%mkdir templates" @@ -956,7 +1012,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "%%file templates/myhtml.tpl\n", @@ -971,7 +1029,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Now that we've created a template, we need to set up a subclass of ``pd.Styler`` that\n", + "Now that we've created a template, we need to set up a subclass of ``Styler`` that\n", "knows about it." ] }, @@ -983,11 +1041,11 @@ }, "outputs": [], "source": [ - "class MyStyler(pd.Styler):\n", + "class MyStyler(Styler):\n", " env = Environment(\n", " loader=ChoiceLoader([\n", " FileSystemLoader(\"templates\"), # contains ours\n", - " pd.Styler.loader, # the default\n", + " Styler.loader, # the default\n", " ])\n", " )\n", " template = env.get_template(\"myhtml.tpl\")" @@ -1007,7 +1065,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "MyStyler(df)" @@ -1023,7 +1083,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "HTML(MyStyler(df).render(table_title=\"Extending Example\"))" @@ -1039,10 +1101,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ - "EasyStyler = pd.Styler.from_custom_template(\"templates\", \"myhtml.tpl\")\n", + "EasyStyler = Styler.from_custom_template(\"templates\", \"myhtml.tpl\")\n", "EasyStyler(df)" ] }, @@ -1056,7 +1120,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "with open(\"template_structure.html\") as f:\n", diff --git a/pandas/formats/__init__.py b/pandas/formats/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/formats/style.py b/pandas/formats/style.py new file mode 100644 index 0000000000000..ec4b4a2cde0c5 --- /dev/null +++ b/pandas/formats/style.py @@ -0,0 +1,7 @@ +import warnings + +warnings.warn("Styler has been moved from pandas.formats.style.Styler" + " to pandas.io.formats.style.Styler. This shim will be" + " removed in pandas 0.21", + FutureWarning) +from pandas.io.formats.style import Styler # noqa diff --git a/pandas/io/api.py b/pandas/io/api.py index 58c388d306721..e312e7bc2f300 100644 --- a/pandas/io/api.py +++ b/pandas/io/api.py @@ -17,23 +17,6 @@ from pandas.io.pickle import read_pickle, to_pickle from pandas.io.packers import read_msgpack, to_msgpack from pandas.io.gbq import read_gbq -try: - from pandas.io.formats.style import Styler -except ImportError: - from pandas.compat import add_metaclass as _add_metaclass - from pandas.util.importing import _UnSubclassable - - # We want to *not* raise an ImportError upon importing this module - # We *do* want to raise an ImportError with a custom message - # when the class is instantiated or subclassed. - @_add_metaclass(_UnSubclassable) - class Styler(object): - msg = ("pandas.io.api.Styler requires jinja2. " - "Please install with `conda install jinja2` " - "or `pip install jinja2`") - def __init__(self, *args, **kargs): - raise ImportError(self.msg) - # deprecation, xref #13790 def Term(*args, **kwargs): diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index 13e6d065382a6..026a36fd9f4f9 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -48,8 +48,7 @@ class TestPDApi(Base, tm.TestCase): 'Period', 'PeriodIndex', 'RangeIndex', 'UInt64Index', 'Series', 'SparseArray', 'SparseDataFrame', 'SparseSeries', 'TimeGrouper', 'Timedelta', - 'TimedeltaIndex', 'Timestamp', 'Interval', 'IntervalIndex', - 'Styler'] + 'TimedeltaIndex', 'Timestamp', 'Interval', 'IntervalIndex'] # these are already deprecated; awaiting removal deprecated_classes = ['WidePanel', 'Panel4D', diff --git a/pandas/tests/io/formats/test_style.py b/pandas/tests/io/formats/test_style.py index c02d94d8918b3..4fb91c40aba3a 100644 --- a/pandas/tests/io/formats/test_style.py +++ b/pandas/tests/io/formats/test_style.py @@ -747,3 +747,10 @@ def test_from_custom_template(tmpdir): assert result.template is not Styler.template styler = result(pd.DataFrame({"A": [1, 2]})) assert styler.render() + + +def test_shim(): + # https://github.com/pandas-dev/pandas/pull/16059 + # Remove in 0.21 + with pytest.warns(FutureWarning): + from pandas.formats.style import Styler as _styler # noqa diff --git a/pandas/util/importing.py b/pandas/util/importing.py index 9323fb97baac0..e69de29bb2d1d 100644 --- a/pandas/util/importing.py +++ b/pandas/util/importing.py @@ -1,10 +0,0 @@ -class _UnSubclassable(type): - """ - Metaclass to raise an ImportError when subclassed - """ - msg = "" - - def __init__(cls, name, bases, clsdict): - if len(cls.mro()) > 2: - raise ImportError(cls.msg) - super(_UnSubclassable, cls).__init__(name, bases, clsdict) From 70ae817a5d0bd5e11e30a2e0781afa98103d656f Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 21 Apr 2017 16:59:22 -0500 Subject: [PATCH 442/933] REF: Refactor pandas.util.testing to not use pytest at top-level (#16088) Closes https://github.com/pandas-dev/pandas/pull/16088 --- pandas/tests/io/parser/test_network.py | 3 ++- pandas/util/testing.py | 16 ++++++++-------- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/pandas/tests/io/parser/test_network.py b/pandas/tests/io/parser/test_network.py index 4d6b6c7daa3c6..3845ca2d81edd 100644 --- a/pandas/tests/io/parser/test_network.py +++ b/pandas/tests/io/parser/test_network.py @@ -22,7 +22,8 @@ def salaries_table(): @pytest.mark.parametrize( "compression,extension", [('gzip', '.gz'), ('bz2', '.bz2'), ('zip', '.zip'), - tm._mark_skipif_no_lzma(('xz', '.xz'))]) + pytest.mark.skipif(not tm._check_if_lzma(), + reason='need backports.lzma to run')(('xz', '.xz'))]) @pytest.mark.parametrize('mode', ['explicit', 'infer']) @pytest.mark.parametrize('engine', ['python', 'c']) def test_compressed_urls(salaries_table, compression, extension, mode, engine): diff --git a/pandas/util/testing.py b/pandas/util/testing.py index c54def2b4ef5e..d2fb18be1c72e 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -19,7 +19,6 @@ from distutils.version import LooseVersion from numpy.random import randn, rand -import pytest import numpy as np import pandas as pd @@ -52,7 +51,13 @@ from pandas.util import libtesting from pandas.io.common import urlopen -slow = pytest.mark.slow +try: + import pytest + slow = pytest.mark.slow +except ImportError: + # Should be ok to just ignore. If you actually need + # slow then you'll hit an import error long before getting here. + pass N = 30 @@ -347,15 +352,10 @@ def _check_if_lzma(): def _skip_if_no_lzma(): + import pytest return _check_if_lzma() or pytest.skip('need backports.lzma to run') -_mark_skipif_no_lzma = pytest.mark.skipif( - not _check_if_lzma(), - reason='need backports.lzma to run' -) - - def _skip_if_no_xarray(): try: import xarray From d528a10c241215ea73234df247ee54a6c277fc95 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Fri, 21 Apr 2017 18:01:42 -0400 Subject: [PATCH 443/933] MAINT: Remove assertRaises from testing (#16089) --- pandas/tests/api/test_types.py | 12 +- pandas/tests/computation/test_compat.py | 3 +- pandas/tests/computation/test_eval.py | 147 ++++---- pandas/tests/dtypes/test_cast.py | 2 +- pandas/tests/dtypes/test_dtypes.py | 53 ++- pandas/tests/frame/test_alter_axes.py | 4 +- pandas/tests/frame/test_analytics.py | 46 +-- pandas/tests/frame/test_api.py | 11 +- pandas/tests/frame/test_apply.py | 10 +- .../tests/frame/test_axis_select_reindex.py | 24 +- pandas/tests/frame/test_block_internals.py | 10 +- pandas/tests/frame/test_constructors.py | 30 +- pandas/tests/frame/test_convert_to.py | 2 +- pandas/tests/frame/test_dtypes.py | 13 +- pandas/tests/frame/test_indexing.py | 91 +++-- pandas/tests/frame/test_missing.py | 27 +- pandas/tests/frame/test_mutate_columns.py | 10 +- pandas/tests/frame/test_nonunique_indexes.py | 11 +- pandas/tests/frame/test_operators.py | 40 +-- pandas/tests/frame/test_quantile.py | 6 +- pandas/tests/frame/test_query_eval.py | 42 +-- pandas/tests/frame/test_replace.py | 12 +- pandas/tests/frame/test_reshape.py | 8 +- pandas/tests/frame/test_sorting.py | 4 +- pandas/tests/frame/test_timeseries.py | 12 +- pandas/tests/frame/test_to_csv.py | 9 +- pandas/tests/frame/test_validate.py | 16 +- pandas/tests/groupby/test_aggregate.py | 17 +- pandas/tests/groupby/test_bin_groupby.py | 18 +- pandas/tests/groupby/test_categorical.py | 4 +- pandas/tests/groupby/test_filters.py | 17 +- pandas/tests/groupby/test_groupby.py | 90 ++--- pandas/tests/groupby/test_timegrouper.py | 8 +- pandas/tests/groupby/test_transform.py | 6 +- pandas/tests/indexes/common.py | 14 +- pandas/tests/indexes/datetimes/test_astype.py | 12 +- .../indexes/datetimes/test_construction.py | 56 +-- .../indexes/datetimes/test_date_range.py | 80 +++-- .../tests/indexes/datetimes/test_datetime.py | 30 +- .../tests/indexes/datetimes/test_indexing.py | 2 +- pandas/tests/indexes/datetimes/test_misc.py | 5 +- pandas/tests/indexes/datetimes/test_ops.py | 24 +- .../indexes/datetimes/test_partial_slicing.py | 21 +- pandas/tests/indexes/datetimes/test_tools.py | 101 +++--- pandas/tests/indexes/period/test_asfreq.py | 5 +- .../tests/indexes/period/test_construction.py | 59 +-- pandas/tests/indexes/period/test_indexing.py | 18 +- pandas/tests/indexes/period/test_ops.py | 22 +- .../indexes/period/test_partial_slicing.py | 8 +- pandas/tests/indexes/period/test_period.py | 22 +- pandas/tests/indexes/period/test_setops.py | 10 +- pandas/tests/indexes/test_base.py | 74 ++-- pandas/tests/indexes/test_category.py | 54 +-- pandas/tests/indexes/test_interval.py | 50 +-- pandas/tests/indexes/test_multi.py | 86 ++--- pandas/tests/indexes/test_numeric.py | 42 +-- pandas/tests/indexes/test_range.py | 30 +- .../tests/indexes/timedeltas/test_astype.py | 16 +- .../indexes/timedeltas/test_construction.py | 18 +- .../tests/indexes/timedeltas/test_indexing.py | 4 +- pandas/tests/indexes/timedeltas/test_ops.py | 78 ++-- .../timedeltas/test_partial_slicing.py | 4 +- .../indexes/timedeltas/test_timedelta.py | 18 +- pandas/tests/indexes/timedeltas/test_tools.py | 12 +- pandas/tests/indexing/test_categorical.py | 34 +- .../indexing/test_chaining_and_caching.py | 2 +- pandas/tests/indexing/test_coercion.py | 6 +- pandas/tests/indexing/test_datetime.py | 4 +- pandas/tests/indexing/test_floats.py | 62 ++-- pandas/tests/indexing/test_iloc.py | 30 +- pandas/tests/indexing/test_indexing.py | 16 +- pandas/tests/indexing/test_ix.py | 10 +- pandas/tests/indexing/test_loc.py | 30 +- pandas/tests/indexing/test_multiindex.py | 30 +- pandas/tests/indexing/test_panel.py | 8 +- pandas/tests/indexing/test_partial.py | 31 +- pandas/tests/indexing/test_scalar.py | 18 +- pandas/tests/io/formats/test_format.py | 10 +- pandas/tests/io/formats/test_style.py | 24 +- pandas/tests/io/formats/test_to_latex.py | 2 +- .../tests/io/json/test_json_table_schema.py | 2 +- pandas/tests/io/json/test_normalize.py | 14 +- pandas/tests/io/json/test_pandas.py | 50 +-- pandas/tests/io/json/test_ujson.py | 16 +- pandas/tests/io/msgpack/test_except.py | 29 +- pandas/tests/io/msgpack/test_limits.py | 25 +- pandas/tests/io/msgpack/test_obj.py | 15 +- pandas/tests/io/msgpack/test_pack.py | 15 +- pandas/tests/io/msgpack/test_sequnpack.py | 22 +- pandas/tests/io/msgpack/test_unpack.py | 2 +- pandas/tests/io/parser/c_parser_only.py | 24 +- pandas/tests/io/parser/common.py | 44 +-- pandas/tests/io/parser/compression.py | 8 +- pandas/tests/io/parser/dtypes.py | 14 +- pandas/tests/io/parser/header.py | 36 +- pandas/tests/io/parser/index_col.py | 10 +- pandas/tests/io/parser/parse_dates.py | 10 +- pandas/tests/io/parser/python_parser_only.py | 8 +- pandas/tests/io/parser/test_network.py | 4 +- pandas/tests/io/parser/test_read_fwf.py | 4 +- pandas/tests/io/parser/test_textreader.py | 12 +- pandas/tests/io/parser/usecols.py | 4 +- pandas/tests/io/sas/test_sas.py | 4 +- pandas/tests/io/test_clipboard.py | 4 +- pandas/tests/io/test_common.py | 3 +- pandas/tests/io/test_excel.py | 18 +- pandas/tests/io/test_html.py | 12 +- pandas/tests/io/test_packers.py | 8 +- pandas/tests/io/test_pytables.py | 336 +++++++++--------- pandas/tests/io/test_sql.py | 90 ++--- pandas/tests/io/test_stata.py | 30 +- pandas/tests/plotting/test_boxplot_method.py | 18 +- pandas/tests/plotting/test_datetimelike.py | 6 +- pandas/tests/plotting/test_frame.py | 56 +-- pandas/tests/plotting/test_hist_method.py | 34 +- pandas/tests/plotting/test_misc.py | 10 +- pandas/tests/plotting/test_series.py | 21 +- pandas/tests/reshape/test_concat.py | 16 +- pandas/tests/reshape/test_hashing.py | 6 +- pandas/tests/reshape/test_join.py | 21 +- pandas/tests/reshape/test_merge.py | 54 +-- pandas/tests/reshape/test_merge_asof.py | 35 +- pandas/tests/reshape/test_pivot.py | 17 +- pandas/tests/reshape/test_reshape.py | 8 +- pandas/tests/reshape/test_tile.py | 19 +- .../tests/reshape/test_union_categoricals.py | 6 +- pandas/tests/scalar/test_interval.py | 2 +- pandas/tests/scalar/test_period.py | 108 +++--- pandas/tests/scalar/test_timedelta.py | 44 +-- pandas/tests/scalar/test_timestamp.py | 125 +++---- pandas/tests/series/test_alter_axes.py | 8 +- pandas/tests/series/test_analytics.py | 28 +- pandas/tests/series/test_api.py | 8 +- pandas/tests/series/test_apply.py | 15 +- pandas/tests/series/test_asof.py | 6 +- pandas/tests/series/test_combine_concat.py | 6 +- pandas/tests/series/test_constructors.py | 32 +- pandas/tests/series/test_datetime_values.py | 4 +- pandas/tests/series/test_indexing.py | 142 ++++---- pandas/tests/series/test_internals.py | 4 +- pandas/tests/series/test_missing.py | 42 +-- pandas/tests/series/test_operators.py | 88 ++--- pandas/tests/series/test_rank.py | 2 +- pandas/tests/series/test_replace.py | 6 +- pandas/tests/series/test_sorting.py | 30 +- pandas/tests/series/test_timeseries.py | 36 +- pandas/tests/series/test_validate.py | 18 +- pandas/tests/sparse/test_array.py | 28 +- pandas/tests/sparse/test_frame.py | 32 +- pandas/tests/sparse/test_indexing.py | 4 +- pandas/tests/sparse/test_libsparse.py | 8 +- pandas/tests/sparse/test_series.py | 25 +- pandas/tests/test_algos.py | 24 +- pandas/tests/test_base.py | 16 +- pandas/tests/test_categorical.py | 222 ++++++------ pandas/tests/test_common.py | 6 +- pandas/tests/test_config.py | 54 +-- pandas/tests/test_lib.py | 4 +- pandas/tests/test_multilevel.py | 32 +- pandas/tests/test_nanops.py | 10 +- pandas/tests/test_panel.py | 80 ++--- pandas/tests/test_panel4d.py | 34 +- pandas/tests/test_panelnd.py | 26 +- pandas/tests/test_resample.py | 63 ++-- pandas/tests/test_strings.py | 21 +- pandas/tests/test_testing.py | 24 +- pandas/tests/test_util.py | 10 +- pandas/tests/test_window.py | 144 ++++---- pandas/tests/tools/test_numeric.py | 2 +- pandas/tests/tseries/test_frequencies.py | 33 +- pandas/tests/tseries/test_holiday.py | 4 +- pandas/tests/tseries/test_offsets.py | 46 +-- pandas/tests/tseries/test_timezones.py | 58 +-- pandas/util/testing.py | 60 +--- 174 files changed, 2633 insertions(+), 2498 deletions(-) diff --git a/pandas/tests/api/test_types.py b/pandas/tests/api/test_types.py index 057f7d8f3e286..3b9148a1c91c6 100644 --- a/pandas/tests/api/test_types.py +++ b/pandas/tests/api/test_types.py @@ -1,5 +1,7 @@ # -*- coding: utf-8 -*- +import pytest + from warnings import catch_warnings import numpy as np @@ -41,13 +43,11 @@ def check_deprecation(self, fold, fnew): try: result = fold('foo') expected = fnew('foo') - self.assertEqual(result, expected) + assert result == expected except TypeError: - self.assertRaises(TypeError, - lambda: fnew('foo')) + pytest.raises(TypeError, lambda: fnew('foo')) except AttributeError: - self.assertRaises(AttributeError, - lambda: fnew('foo')) + pytest.raises(AttributeError, lambda: fnew('foo')) def test_deprecation_core_common(self): @@ -83,7 +83,7 @@ def test_removed_from_core_common(self): for t in ['is_null_datelike_scalar', 'ensure_float']: - self.assertRaises(AttributeError, lambda: getattr(com, t)) + pytest.raises(AttributeError, lambda: getattr(com, t)) def test_moved_infer_dtype(): diff --git a/pandas/tests/computation/test_compat.py b/pandas/tests/computation/test_compat.py index 9ee9f674a1ddd..ed569625177d3 100644 --- a/pandas/tests/computation/test_compat.py +++ b/pandas/tests/computation/test_compat.py @@ -2,7 +2,6 @@ from distutils.version import LooseVersion import pandas as pd -from pandas.util import testing as tm from pandas.core.computation.engines import _engines import pandas.core.computation.expr as expr @@ -39,7 +38,7 @@ def testit(): pytest.skip("no numexpr") else: if ne.__version__ < LooseVersion(_MIN_NUMEXPR_VERSION): - with tm.assertRaises(ImportError): + with pytest.raises(ImportError): testit() else: testit() diff --git a/pandas/tests/computation/test_eval.py b/pandas/tests/computation/test_eval.py index 0ba4fe61ae78f..eacbd2b390154 100644 --- a/pandas/tests/computation/test_eval.py +++ b/pandas/tests/computation/test_eval.py @@ -220,11 +220,11 @@ def check_complex_cmp_op(self, lhs, cmp1, rhs, binop, cmp2): scalar_with_in_notin = (is_scalar(rhs) and (cmp1 in skip_these or cmp2 in skip_these)) if scalar_with_in_notin: - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): pd.eval(ex, engine=self.engine, parser=self.parser) - self.assertRaises(TypeError, pd.eval, ex, engine=self.engine, - parser=self.parser, local_dict={'lhs': lhs, - 'rhs': rhs}) + with pytest.raises(TypeError): + pd.eval(ex, engine=self.engine, parser=self.parser, + local_dict={'lhs': lhs, 'rhs': rhs}) else: lhs_new = _eval_single_bin(lhs, cmp1, rhs, self.engine) rhs_new = _eval_single_bin(lhs, cmp2, rhs, self.engine) @@ -236,9 +236,9 @@ def check_complex_cmp_op(self, lhs, cmp1, rhs, binop, cmp2): # hand side bool ops are fixed. # # try: - # self.assertRaises(Exception, pd.eval, ex, - # local_dict={'lhs': lhs, 'rhs': rhs}, - # engine=self.engine, parser=self.parser) + # pytest.raises(Exception, pd.eval, ex, + # local_dict={'lhs': lhs, 'rhs': rhs}, + # engine=self.engine, parser=self.parser) # except AssertionError: # import ipdb # @@ -273,9 +273,9 @@ def check_operands(left, right, cmp_op): def check_simple_cmp_op(self, lhs, cmp1, rhs): ex = 'lhs {0} rhs'.format(cmp1) if cmp1 in ('in', 'not in') and not is_list_like(rhs): - self.assertRaises(TypeError, pd.eval, ex, engine=self.engine, - parser=self.parser, local_dict={'lhs': lhs, - 'rhs': rhs}) + pytest.raises(TypeError, pd.eval, ex, engine=self.engine, + parser=self.parser, local_dict={'lhs': lhs, + 'rhs': rhs}) else: expected = _eval_single_bin(lhs, cmp1, rhs, self.engine) result = pd.eval(ex, engine=self.engine, parser=self.parser) @@ -328,9 +328,9 @@ def check_floor_division(self, lhs, arith1, rhs): expected = lhs // rhs self.check_equal(res, expected) else: - self.assertRaises(TypeError, pd.eval, ex, local_dict={'lhs': lhs, - 'rhs': rhs}, - engine=self.engine, parser=self.parser) + pytest.raises(TypeError, pd.eval, ex, + local_dict={'lhs': lhs, 'rhs': rhs}, + engine=self.engine, parser=self.parser) def get_expected_pow_result(self, lhs, rhs): try: @@ -353,8 +353,8 @@ def check_pow(self, lhs, arith1, rhs): if (is_scalar(lhs) and is_scalar(rhs) and _is_py3_complex_incompat(result, expected)): - self.assertRaises(AssertionError, tm.assert_numpy_array_equal, - result, expected) + pytest.raises(AssertionError, tm.assert_numpy_array_equal, + result, expected) else: tm.assert_almost_equal(result, expected) @@ -385,9 +385,9 @@ def check_compound_invert_op(self, lhs, cmp1, rhs): ex = '~(lhs {0} rhs)'.format(cmp1) if is_scalar(rhs) and cmp1 in skip_these: - self.assertRaises(TypeError, pd.eval, ex, engine=self.engine, - parser=self.parser, local_dict={'lhs': lhs, - 'rhs': rhs}) + pytest.raises(TypeError, pd.eval, ex, engine=self.engine, + parser=self.parser, local_dict={'lhs': lhs, + 'rhs': rhs}) else: # compound if is_scalar(lhs) and is_scalar(rhs): @@ -417,16 +417,16 @@ def test_frame_invert(self): # float always raises lhs = DataFrame(randn(5, 2)) if self.engine == 'numexpr': - with tm.assertRaises(NotImplementedError): + with pytest.raises(NotImplementedError): result = pd.eval(expr, engine=self.engine, parser=self.parser) else: - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): result = pd.eval(expr, engine=self.engine, parser=self.parser) # int raises on numexpr lhs = DataFrame(randint(5, size=(5, 2))) if self.engine == 'numexpr': - with tm.assertRaises(NotImplementedError): + with pytest.raises(NotImplementedError): result = pd.eval(expr, engine=self.engine, parser=self.parser) else: expect = ~lhs @@ -442,10 +442,10 @@ def test_frame_invert(self): # object raises lhs = DataFrame({'b': ['a', 1, 2.0], 'c': rand(3) > 0.5}) if self.engine == 'numexpr': - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): result = pd.eval(expr, engine=self.engine, parser=self.parser) else: - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): result = pd.eval(expr, engine=self.engine, parser=self.parser) def test_series_invert(self): @@ -456,16 +456,16 @@ def test_series_invert(self): # float raises lhs = Series(randn(5)) if self.engine == 'numexpr': - with tm.assertRaises(NotImplementedError): + with pytest.raises(NotImplementedError): result = pd.eval(expr, engine=self.engine, parser=self.parser) else: - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): result = pd.eval(expr, engine=self.engine, parser=self.parser) # int raises on numexpr lhs = Series(randint(5, size=5)) if self.engine == 'numexpr': - with tm.assertRaises(NotImplementedError): + with pytest.raises(NotImplementedError): result = pd.eval(expr, engine=self.engine, parser=self.parser) else: expect = ~lhs @@ -485,10 +485,10 @@ def test_series_invert(self): # object lhs = Series(['a', 1, 2.0]) if self.engine == 'numexpr': - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): result = pd.eval(expr, engine=self.engine, parser=self.parser) else: - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): result = pd.eval(expr, engine=self.engine, parser=self.parser) def test_frame_negate(self): @@ -509,7 +509,7 @@ def test_frame_negate(self): # bool doesn't work with numexpr but works elsewhere lhs = DataFrame(rand(5, 2) > 0.5) if self.engine == 'numexpr': - with tm.assertRaises(NotImplementedError): + with pytest.raises(NotImplementedError): result = pd.eval(expr, engine=self.engine, parser=self.parser) else: expect = -lhs @@ -534,7 +534,7 @@ def test_series_negate(self): # bool doesn't work with numexpr but works elsewhere lhs = Series(rand(5) > 0.5) if self.engine == 'numexpr': - with tm.assertRaises(NotImplementedError): + with pytest.raises(NotImplementedError): result = pd.eval(expr, engine=self.engine, parser=self.parser) else: expect = -lhs @@ -547,7 +547,7 @@ def test_frame_pos(self): # float lhs = DataFrame(randn(5, 2)) if self.engine == 'python': - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): result = pd.eval(expr, engine=self.engine, parser=self.parser) else: expect = lhs @@ -557,7 +557,7 @@ def test_frame_pos(self): # int lhs = DataFrame(randint(5, size=(5, 2))) if self.engine == 'python': - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): result = pd.eval(expr, engine=self.engine, parser=self.parser) else: expect = lhs @@ -567,7 +567,7 @@ def test_frame_pos(self): # bool doesn't work with numexpr but works elsewhere lhs = DataFrame(rand(5, 2) > 0.5) if self.engine == 'python': - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): result = pd.eval(expr, engine=self.engine, parser=self.parser) else: expect = lhs @@ -580,7 +580,7 @@ def test_series_pos(self): # float lhs = Series(randn(5)) if self.engine == 'python': - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): result = pd.eval(expr, engine=self.engine, parser=self.parser) else: expect = lhs @@ -590,7 +590,7 @@ def test_series_pos(self): # int lhs = Series(randint(5, size=5)) if self.engine == 'python': - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): result = pd.eval(expr, engine=self.engine, parser=self.parser) else: expect = lhs @@ -600,7 +600,7 @@ def test_series_pos(self): # bool doesn't work with numexpr but works elsewhere lhs = Series(rand(5) > 0.5) if self.engine == 'python': - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): result = pd.eval(expr, engine=self.engine, parser=self.parser) else: expect = lhs @@ -608,7 +608,7 @@ def test_series_pos(self): assert_series_equal(expect, result) def test_scalar_unary(self): - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): pd.eval('~1.0', engine=self.engine, parser=self.parser) self.assertEqual( @@ -655,7 +655,7 @@ def test_disallow_scalar_bool_ops(self): x, a, b, df = np.random.randn(3), 1, 2, DataFrame(randn(3, 2)) # noqa for ex in exprs: - with tm.assertRaises(NotImplementedError): + with pytest.raises(NotImplementedError): pd.eval(ex, engine=self.engine, parser=self.parser) def test_identical(self): @@ -745,7 +745,7 @@ def setup_ops(self): def check_chained_cmp_op(self, lhs, cmp1, mid, cmp2, rhs): ex1 = 'lhs {0} mid {1} rhs'.format(cmp1, cmp2) - with tm.assertRaises(NotImplementedError): + with pytest.raises(NotImplementedError): pd.eval(ex1, engine=self.engine, parser=self.parser) @@ -1099,8 +1099,8 @@ def test_simple_arith_ops(self): ex3 = '1 {0} (x + 1)'.format(op) if op in ('in', 'not in'): - self.assertRaises(TypeError, pd.eval, ex, - engine=self.engine, parser=self.parser) + pytest.raises(TypeError, pd.eval, ex, + engine=self.engine, parser=self.parser) else: expec = _eval_single_bin(1, op, 1, self.engine) x = self.eval(ex, engine=self.engine, parser=self.parser) @@ -1208,7 +1208,7 @@ def test_truediv(self): def test_failing_subscript_with_name_error(self): df = DataFrame(np.random.randn(5, 3)) # noqa - with tm.assertRaises(NameError): + with pytest.raises(NameError): self.eval('df[x > 2] > 2') def test_lhs_expression_subscript(self): @@ -1234,20 +1234,19 @@ def test_assignment_fails(self): df = DataFrame(np.random.randn(5, 3), columns=list('abc')) df2 = DataFrame(np.random.randn(5, 3)) expr1 = 'df = df2' - self.assertRaises(ValueError, self.eval, expr1, - local_dict={'df': df, 'df2': df2}) + pytest.raises(ValueError, self.eval, expr1, + local_dict={'df': df, 'df2': df2}) def test_assignment_column(self): df = DataFrame(np.random.randn(5, 2), columns=list('ab')) orig_df = df.copy() # multiple assignees - self.assertRaises(SyntaxError, df.eval, 'd c = a + b') + pytest.raises(SyntaxError, df.eval, 'd c = a + b') # invalid assignees - self.assertRaises(SyntaxError, df.eval, 'd,c = a + b') - self.assertRaises( - SyntaxError, df.eval, 'Timestamp("20131001") = a + b') + pytest.raises(SyntaxError, df.eval, 'd,c = a + b') + pytest.raises(SyntaxError, df.eval, 'Timestamp("20131001") = a + b') # single assignment - existing variable expected = orig_df.copy() @@ -1290,7 +1289,7 @@ def f(): # multiple assignment df = orig_df.copy() df.eval('c = a + b', inplace=True) - self.assertRaises(SyntaxError, df.eval, 'c = a = b') + pytest.raises(SyntaxError, df.eval, 'c = a = b') # explicit targets df = orig_df.copy() @@ -1348,7 +1347,7 @@ def test_multi_line_expression(self): self.assertIsNone(ans) # multi-line not valid if not all assignments - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): df.eval(""" a = b + 2 b - 2""", inplace=False) @@ -1391,7 +1390,7 @@ def test_assignment_in_query(self): # GH 8664 df = pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}) df_orig = df.copy() - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): df.query('a = 1') assert_frame_equal(df, df_orig) @@ -1475,19 +1474,19 @@ def test_simple_in_ops(self): parser=self.parser) self.assertTrue(res) else: - with tm.assertRaises(NotImplementedError): + with pytest.raises(NotImplementedError): pd.eval('1 in [1, 2]', engine=self.engine, parser=self.parser) - with tm.assertRaises(NotImplementedError): + with pytest.raises(NotImplementedError): pd.eval('2 in (1, 2)', engine=self.engine, parser=self.parser) - with tm.assertRaises(NotImplementedError): + with pytest.raises(NotImplementedError): pd.eval('3 in (1, 2)', engine=self.engine, parser=self.parser) - with tm.assertRaises(NotImplementedError): + with pytest.raises(NotImplementedError): pd.eval('3 not in (1, 2)', engine=self.engine, parser=self.parser) - with tm.assertRaises(NotImplementedError): + with pytest.raises(NotImplementedError): pd.eval('[(3,)] in (1, 2, [(3,)])', engine=self.engine, parser=self.parser) - with tm.assertRaises(NotImplementedError): + with pytest.raises(NotImplementedError): pd.eval('[3] not in (1, 2, [[3]])', engine=self.engine, parser=self.parser) @@ -1513,32 +1512,32 @@ def test_check_many_exprs(self): def test_fails_and(self): df = DataFrame(np.random.randn(5, 3)) - self.assertRaises(NotImplementedError, pd.eval, 'df > 2 and df > 3', - local_dict={'df': df}, parser=self.parser, - engine=self.engine) + pytest.raises(NotImplementedError, pd.eval, 'df > 2 and df > 3', + local_dict={'df': df}, parser=self.parser, + engine=self.engine) def test_fails_or(self): df = DataFrame(np.random.randn(5, 3)) - self.assertRaises(NotImplementedError, pd.eval, 'df > 2 or df > 3', - local_dict={'df': df}, parser=self.parser, - engine=self.engine) + pytest.raises(NotImplementedError, pd.eval, 'df > 2 or df > 3', + local_dict={'df': df}, parser=self.parser, + engine=self.engine) def test_fails_not(self): df = DataFrame(np.random.randn(5, 3)) - self.assertRaises(NotImplementedError, pd.eval, 'not df > 2', - local_dict={'df': df}, parser=self.parser, - engine=self.engine) + pytest.raises(NotImplementedError, pd.eval, 'not df > 2', + local_dict={'df': df}, parser=self.parser, + engine=self.engine) def test_fails_ampersand(self): df = DataFrame(np.random.randn(5, 3)) # noqa ex = '(df + 2)[df > 1] > 0 & (df > 0)' - with tm.assertRaises(NotImplementedError): + with pytest.raises(NotImplementedError): pd.eval(ex, parser=self.parser, engine=self.engine) def test_fails_pipe(self): df = DataFrame(np.random.randn(5, 3)) # noqa ex = '(df + 2)[df > 1] > 0 | (df > 0)' - with tm.assertRaises(NotImplementedError): + with pytest.raises(NotImplementedError): pd.eval(ex, parser=self.parser, engine=self.engine) def test_bool_ops_with_constants(self): @@ -1546,7 +1545,7 @@ def test_bool_ops_with_constants(self): ('True', 'False')): ex = '{0} {1} {2}'.format(lhs, op, rhs) if op in ('and', 'or'): - with tm.assertRaises(NotImplementedError): + with pytest.raises(NotImplementedError): self.eval(ex) else: res = self.eval(ex) @@ -1558,7 +1557,7 @@ def test_simple_bool_ops(self): (True, False)): ex = 'lhs {0} rhs'.format(op) if op in ('and', 'or'): - with tm.assertRaises(NotImplementedError): + with pytest.raises(NotImplementedError): pd.eval(ex, engine=self.engine, parser=self.parser) else: res = pd.eval(ex, engine=self.engine, parser=self.parser) @@ -1786,7 +1785,7 @@ def test_syntax_error_exprs(engine, parser): def test_name_error_exprs(engine, parser): e = 's + t' - with tm.assertRaises(NameError): + with pytest.raises(NameError): pd.eval(e, engine=engine, parser=parser) @@ -1847,7 +1846,7 @@ def test_bool_ops_fails_on_scalars(lhs, cmp, rhs, engine, parser): ex2 = 'lhs {0} mid and mid {1} rhs'.format(cmp, cmp) ex3 = '(lhs {0} mid) & (mid {1} rhs)'.format(cmp, cmp) for ex in (ex1, ex2, ex3): - with tm.assertRaises(NotImplementedError): + with pytest.raises(NotImplementedError): pd.eval(ex, engine=engine, parser=parser) @@ -1866,7 +1865,7 @@ def test_negate_lt_eq_le(engine, parser): tm.assert_frame_equal(result, expected) if parser == 'python': - with tm.assertRaises(NotImplementedError): + with pytest.raises(NotImplementedError): df.query('not (cat > 0)', engine=engine, parser=parser) else: result = df.query('not (cat > 0)', engine=engine, parser=parser) @@ -1879,5 +1878,5 @@ def test_validate_bool_args(self): invalid_values = [1, "True", [1, 2, 3], 5.0] for value in invalid_values: - with self.assertRaises(ValueError): + with pytest.raises(ValueError): pd.eval("2+2", inplace=value) diff --git a/pandas/tests/dtypes/test_cast.py b/pandas/tests/dtypes/test_cast.py index f3fdc54d4a3cc..bf3668111b9f9 100644 --- a/pandas/tests/dtypes/test_cast.py +++ b/pandas/tests/dtypes/test_cast.py @@ -293,7 +293,7 @@ def test_numpy_dtypes(self): for src, common in testcases: self.assertEqual(find_common_type(src), common) - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): # empty find_common_type([]) diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py index 46569fecf553f..32ec1194639ae 100644 --- a/pandas/tests/dtypes/test_dtypes.py +++ b/pandas/tests/dtypes/test_dtypes.py @@ -1,4 +1,6 @@ # -*- coding: utf-8 -*- +import pytest + from itertools import product import numpy as np @@ -25,23 +27,18 @@ def test_hash(self): hash(self.dtype) def test_equality_invalid(self): - self.assertRaises(self.dtype == 'foo') - self.assertFalse(is_dtype_equal(self.dtype, np.int64)) + assert not self.dtype == 'foo' + assert not is_dtype_equal(self.dtype, np.int64) def test_numpy_informed(self): + pytest.raises(TypeError, np.dtype, self.dtype) - # np.dtype doesn't know about our new dtype - def f(): - np.dtype(self.dtype) - - self.assertRaises(TypeError, f) - - self.assertNotEqual(self.dtype, np.str_) - self.assertNotEqual(np.str_, self.dtype) + assert not self.dtype == np.str_ + assert not np.str_ == self.dtype def test_pickle(self): result = tm.round_trip_pickle(self.dtype) - self.assertEqual(result, self.dtype) + assert result == self.dtype class TestCategoricalDtype(Base, tm.TestCase): @@ -67,7 +64,7 @@ def test_equality(self): def test_construction_from_string(self): result = CategoricalDtype.construct_from_string('category') self.assertTrue(is_dtype_equal(self.dtype, result)) - self.assertRaises( + pytest.raises( TypeError, lambda: CategoricalDtype.construct_from_string('foo')) def test_is_dtype(self): @@ -116,8 +113,8 @@ def test_hash_vs_equality(self): self.assertTrue(hash(dtype) == hash(dtype3)) def test_construction(self): - self.assertRaises(ValueError, - lambda: DatetimeTZDtype('ms', 'US/Eastern')) + pytest.raises(ValueError, + lambda: DatetimeTZDtype('ms', 'US/Eastern')) def test_subclass(self): a = DatetimeTZDtype('datetime64[ns, US/Eastern]') @@ -148,8 +145,8 @@ def test_construction_from_string(self): result = DatetimeTZDtype.construct_from_string( 'datetime64[ns, US/Eastern]') self.assertTrue(is_dtype_equal(self.dtype, result)) - self.assertRaises(TypeError, - lambda: DatetimeTZDtype.construct_from_string('foo')) + pytest.raises(TypeError, + lambda: DatetimeTZDtype.construct_from_string('foo')) def test_is_dtype(self): self.assertFalse(DatetimeTZDtype.is_dtype(None)) @@ -215,7 +212,7 @@ def test_parser(self): def test_empty(self): dt = DatetimeTZDtype() - with tm.assertRaises(AttributeError): + with pytest.raises(AttributeError): str(dt) @@ -225,7 +222,7 @@ def setUp(self): self.dtype = PeriodDtype('D') def test_construction(self): - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): PeriodDtype('xx') for s in ['period[D]', 'Period[D]', 'D']: @@ -284,16 +281,16 @@ def test_construction_from_string(self): self.assertTrue(is_dtype_equal(self.dtype, result)) result = PeriodDtype.construct_from_string('period[D]') self.assertTrue(is_dtype_equal(self.dtype, result)) - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): PeriodDtype.construct_from_string('foo') - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): PeriodDtype.construct_from_string('period[foo]') - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): PeriodDtype.construct_from_string('foo[D]') - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): PeriodDtype.construct_from_string('datetime64[ns]') - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): PeriodDtype.construct_from_string('datetime64[ns, US/Eastern]') def test_is_dtype(self): @@ -348,7 +345,7 @@ def test_basic(self): def test_empty(self): dt = PeriodDtype() - with tm.assertRaises(AttributeError): + with pytest.raises(AttributeError): str(dt) def test_not_string(self): @@ -363,7 +360,7 @@ def setUp(self): self.dtype = IntervalDtype('int64') def test_construction(self): - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): IntervalDtype('xx') for s in ['interval[int64]', 'Interval[int64]', 'int64']: @@ -419,11 +416,11 @@ def test_construction_from_string(self): self.assertTrue(is_dtype_equal(self.dtype, result)) result = IntervalDtype.construct_from_string('interval[int64]') self.assertTrue(is_dtype_equal(self.dtype, result)) - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): IntervalDtype.construct_from_string('foo') - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): IntervalDtype.construct_from_string('interval[foo]') - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): IntervalDtype.construct_from_string('foo[int64]') def test_equality(self): diff --git a/pandas/tests/frame/test_alter_axes.py b/pandas/tests/frame/test_alter_axes.py index 5f1eb8ff46259..f925022b6bd7f 100644 --- a/pandas/tests/frame/test_alter_axes.py +++ b/pandas/tests/frame/test_alter_axes.py @@ -2,6 +2,8 @@ from __future__ import print_function +import pytest + from datetime import datetime, timedelta import numpy as np @@ -396,7 +398,7 @@ def test_rename(self): tm.assert_index_equal(renamed.index, pd.Index(['BAR', 'FOO'])) # have to pass something - self.assertRaises(TypeError, self.frame.rename) + pytest.raises(TypeError, self.frame.rename) # partial columns renamed = self.frame.rename(columns={'C': 'foo', 'D': 'bar'}) diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index 75a12ebaebf9d..703b93b9ec950 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -586,10 +586,10 @@ def test_numeric_only_flag(self): tm.assert_series_equal(expected, result) # df1 has all numbers, df2 has a letter inside - self.assertRaises(TypeError, lambda: getattr(df1, meth) - (axis=1, numeric_only=False)) - self.assertRaises(TypeError, lambda: getattr(df2, meth) - (axis=1, numeric_only=False)) + pytest.raises(TypeError, lambda: getattr(df1, meth)( + axis=1, numeric_only=False)) + pytest.raises(TypeError, lambda: getattr(df2, meth)( + axis=1, numeric_only=False)) def test_cumsum(self): self.tsframe.loc[5:10, 0] = nan @@ -998,7 +998,7 @@ def test_idxmin(self): skipna=skipna) tm.assert_series_equal(result, expected) - self.assertRaises(ValueError, frame.idxmin, axis=2) + pytest.raises(ValueError, frame.idxmin, axis=2) def test_idxmax(self): frame = self.frame @@ -1012,7 +1012,7 @@ def test_idxmax(self): skipna=skipna) tm.assert_series_equal(result, expected) - self.assertRaises(ValueError, frame.idxmax, axis=2) + pytest.raises(ValueError, frame.idxmax, axis=2) # ---------------------------------------------------------------------- # Logical reductions @@ -1087,7 +1087,7 @@ def wrapper(x): # assert_series_equal(result, comp) # bad axis - self.assertRaises(ValueError, f, axis=2) + pytest.raises(ValueError, f, axis=2) # make sure works on mixed-type frame mixed = self.mixed_frame @@ -1163,10 +1163,10 @@ def test_isin_with_string_scalar(self): df = DataFrame({'vals': [1, 2, 3, 4], 'ids': ['a', 'b', 'f', 'n'], 'ids2': ['a', 'n', 'c', 'n']}, index=['foo', 'bar', 'baz', 'qux']) - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): df.isin('a') - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): df.isin('aaa') def test_isin_df(self): @@ -1189,18 +1189,18 @@ def test_isin_df_dupe_values(self): # just cols duped df2 = DataFrame([[0, 2], [12, 4], [2, np.nan], [4, 5]], columns=['B', 'B']) - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): df1.isin(df2) # just index duped df2 = DataFrame([[0, 2], [12, 4], [2, np.nan], [4, 5]], columns=['A', 'B'], index=[0, 0, 1, 1]) - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): df1.isin(df2) # cols and index: df2.columns = ['B', 'B'] - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): df1.isin(df2) def test_isin_dupe_self(self): @@ -1629,7 +1629,7 @@ def test_round(self): # Round with a list round_list = [1, 2] - with self.assertRaises(TypeError): + with pytest.raises(TypeError): df.round(round_list) # Round with a dictionary @@ -1652,34 +1652,34 @@ def test_round(self): # float input to `decimals` non_int_round_dict = {'col1': 1, 'col2': 0.5} - with self.assertRaises(TypeError): + with pytest.raises(TypeError): df.round(non_int_round_dict) # String input non_int_round_dict = {'col1': 1, 'col2': 'foo'} - with self.assertRaises(TypeError): + with pytest.raises(TypeError): df.round(non_int_round_dict) non_int_round_Series = Series(non_int_round_dict) - with self.assertRaises(TypeError): + with pytest.raises(TypeError): df.round(non_int_round_Series) # List input non_int_round_dict = {'col1': 1, 'col2': [1, 2]} - with self.assertRaises(TypeError): + with pytest.raises(TypeError): df.round(non_int_round_dict) non_int_round_Series = Series(non_int_round_dict) - with self.assertRaises(TypeError): + with pytest.raises(TypeError): df.round(non_int_round_Series) # Non integer Series inputs non_int_round_Series = Series(non_int_round_dict) - with self.assertRaises(TypeError): + with pytest.raises(TypeError): df.round(non_int_round_Series) non_int_round_Series = Series(non_int_round_dict) - with self.assertRaises(TypeError): + with pytest.raises(TypeError): df.round(non_int_round_Series) # Negative numbers @@ -1700,10 +1700,10 @@ def test_round(self): if sys.version < LooseVersion('2.7'): # Rounding with decimal is a ValueError in Python < 2.7 - with self.assertRaises(ValueError): + with pytest.raises(ValueError): df.round(nan_round_Series) else: - with self.assertRaises(TypeError): + with pytest.raises(TypeError): df.round(nan_round_Series) # Make sure this doesn't break existing Series.round @@ -1761,7 +1761,7 @@ def test_round_issue(self): tm.assert_index_equal(rounded.index, dfs.index) decimals = pd.Series([1, 0, 2], index=['A', 'B', 'A']) - self.assertRaises(ValueError, df.round, decimals) + pytest.raises(ValueError, df.round, decimals) def test_built_in_round(self): if not compat.PY3: diff --git a/pandas/tests/frame/test_api.py b/pandas/tests/frame/test_api.py index c8d36e01d5205..879458a38770d 100644 --- a/pandas/tests/frame/test_api.py +++ b/pandas/tests/frame/test_api.py @@ -1,6 +1,9 @@ # -*- coding: utf-8 -*- from __future__ import print_function + +import pytest + # pylint: disable-msg=W0612,E1101 from copy import deepcopy import sys @@ -106,8 +109,8 @@ def test_column_contains_typeerror(self): def test_not_hashable(self): df = pd.DataFrame([1]) - self.assertRaises(TypeError, hash, df) - self.assertRaises(TypeError, hash, self.empty) + pytest.raises(TypeError, hash, df) + pytest.raises(TypeError, hash, self.empty) def test_new_empty_index(self): df1 = DataFrame(randn(0, 3)) @@ -131,7 +134,7 @@ def test_get_agg_axis(self): idx = self.frame._get_agg_axis(1) self.assertIs(idx, self.frame.index) - self.assertRaises(ValueError, self.frame._get_agg_axis, 2) + pytest.raises(ValueError, self.frame._get_agg_axis, 2) def test_nonzero(self): self.assertTrue(self.empty.empty) @@ -278,7 +281,7 @@ def test_swapaxes(self): assert_frame_equal(df.T, df.swapaxes(0, 1)) assert_frame_equal(df.T, df.swapaxes(1, 0)) assert_frame_equal(df, df.swapaxes(0, 0)) - self.assertRaises(ValueError, df.swapaxes, 2, 5) + pytest.raises(ValueError, df.swapaxes, 2, 5) def test_axis_aliases(self): f = self.frame diff --git a/pandas/tests/frame/test_apply.py b/pandas/tests/frame/test_apply.py index 89bb0e1fdf5b2..d31fb4218adeb 100644 --- a/pandas/tests/frame/test_apply.py +++ b/pandas/tests/frame/test_apply.py @@ -2,6 +2,8 @@ from __future__ import print_function +import pytest + from datetime import datetime import warnings @@ -37,7 +39,7 @@ def test_apply(self): # invalid axis df = DataFrame( [[1, 2, 3], [4, 5, 6], [7, 8, 9]], index=['a', 'a', 'c']) - self.assertRaises(ValueError, df.apply, lambda x: x, 2) + pytest.raises(ValueError, df.apply, lambda x: x, 2) # GH9573 df = DataFrame({'c0': ['A', 'A', 'B', 'B'], @@ -528,17 +530,17 @@ def test_transform_and_agg_err(self): # cannot both transform and agg def f(): self.frame.transform(['max', 'min']) - self.assertRaises(ValueError, f) + pytest.raises(ValueError, f) def f(): with np.errstate(all='ignore'): self.frame.agg(['max', 'sqrt']) - self.assertRaises(ValueError, f) + pytest.raises(ValueError, f) def f(): with np.errstate(all='ignore'): self.frame.transform(['max', 'sqrt']) - self.assertRaises(ValueError, f) + pytest.raises(ValueError, f) df = pd.DataFrame({'A': range(5), 'B': 5}) diff --git a/pandas/tests/frame/test_axis_select_reindex.py b/pandas/tests/frame/test_axis_select_reindex.py index 9b3dc11ea3be2..2f914472a1152 100644 --- a/pandas/tests/frame/test_axis_select_reindex.py +++ b/pandas/tests/frame/test_axis_select_reindex.py @@ -43,8 +43,8 @@ def test_drop_names(self): self.assertEqual(obj.columns.name, 'second') self.assertEqual(list(df.columns), ['d', 'e', 'f']) - self.assertRaises(ValueError, df.drop, ['g']) - self.assertRaises(ValueError, df.drop, ['g'], 1) + pytest.raises(ValueError, df.drop, ['g']) + pytest.raises(ValueError, df.drop, ['g'], 1) # errors = 'ignore' dropped = df.drop(['g'], errors='ignore') @@ -84,10 +84,10 @@ def test_drop(self): assert_frame_equal(simple.drop( [0, 3], axis='index'), simple.loc[[1, 2], :]) - self.assertRaises(ValueError, simple.drop, 5) - self.assertRaises(ValueError, simple.drop, 'C', 1) - self.assertRaises(ValueError, simple.drop, [1, 5]) - self.assertRaises(ValueError, simple.drop, ['A', 'C'], 1) + pytest.raises(ValueError, simple.drop, 5) + pytest.raises(ValueError, simple.drop, 'C', 1) + pytest.raises(ValueError, simple.drop, [1, 5]) + pytest.raises(ValueError, simple.drop, ['A', 'C'], 1) # errors = 'ignore' assert_frame_equal(simple.drop(5, errors='ignore'), simple) @@ -407,7 +407,7 @@ def test_reindex_dups(self): assert_frame_equal(result, expected) # reindex fails - self.assertRaises(ValueError, df.reindex, index=list(range(len(df)))) + pytest.raises(ValueError, df.reindex, index=list(range(len(df)))) def test_align(self): af, bf = self.frame.align(self.frame) @@ -798,10 +798,10 @@ def test_take(self): assert_frame_equal(result, expected, check_names=False) # illegal indices - self.assertRaises(IndexError, df.take, [3, 1, 2, 30], axis=0) - self.assertRaises(IndexError, df.take, [3, 1, 2, -31], axis=0) - self.assertRaises(IndexError, df.take, [3, 1, 2, 5], axis=1) - self.assertRaises(IndexError, df.take, [3, 1, 2, -5], axis=1) + pytest.raises(IndexError, df.take, [3, 1, 2, 30], axis=0) + pytest.raises(IndexError, df.take, [3, 1, 2, -31], axis=0) + pytest.raises(IndexError, df.take, [3, 1, 2, 5], axis=1) + pytest.raises(IndexError, df.take, [3, 1, 2, -5], axis=1) # mixed-dtype order = [4, 1, 2, 0, 3] @@ -883,7 +883,7 @@ def test_reindex_axis(self): reindexed2 = self.intframe.reindex(index=rows) assert_frame_equal(reindexed1, reindexed2) - self.assertRaises(ValueError, self.intframe.reindex_axis, rows, axis=2) + pytest.raises(ValueError, self.intframe.reindex_axis, rows, axis=2) # no-op case cols = self.frame.columns.copy() diff --git a/pandas/tests/frame/test_block_internals.py b/pandas/tests/frame/test_block_internals.py index 0b707a2896e95..63c1f0a50fbed 100644 --- a/pandas/tests/frame/test_block_internals.py +++ b/pandas/tests/frame/test_block_internals.py @@ -2,6 +2,8 @@ from __future__ import print_function +import pytest + from datetime import datetime, timedelta import itertools @@ -285,10 +287,10 @@ def f(dtype): columns=["A", "B", "C"], dtype=dtype) - self.assertRaises(NotImplementedError, f, - [("A", "datetime64[h]"), - ("B", "str"), - ("C", "int32")]) + pytest.raises(NotImplementedError, f, + [("A", "datetime64[h]"), + ("B", "str"), + ("C", "int32")]) # these work (though results may be unexpected) f('int64') diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 29d742b2b79e2..d253fc2049462 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -61,8 +61,8 @@ def test_constructor_cast_failure(self): df['foo'] = np.ones((4, 2)).tolist() # this is not ok - self.assertRaises(ValueError, df.__setitem__, tuple(['test']), - np.ones((4, 2))) + pytest.raises(ValueError, df.__setitem__, tuple(['test']), + np.ones((4, 2))) # this is ok df['foo2'] = np.ones((4, 2)).tolist() @@ -232,7 +232,7 @@ def test_constructor_dict(self): # mix dict and array, wrong size - no spec for which error should raise # first - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): DataFrame({'A': {'a': 'a', 'b': 'b'}, 'B': ['a', 'b', 'c']}) # Length-one dict micro-optimization @@ -265,13 +265,13 @@ def test_constructor_dict(self): # GH10856 # dict with scalar values should raise error, even if columns passed - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): DataFrame({'a': 0.7}) - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): DataFrame({'a': 0.7}, columns=['a']) - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): DataFrame({'a': 0.7}, columns=['b']) def test_constructor_multi_index(self): @@ -1087,7 +1087,7 @@ def test_constructor_Series_named(self): expected = DataFrame({0: s}) tm.assert_frame_equal(df, expected) - self.assertRaises(ValueError, DataFrame, s, columns=[1, 2]) + pytest.raises(ValueError, DataFrame, s, columns=[1, 2]) # #2234 a = Series([], name='x') @@ -1207,9 +1207,9 @@ def test_constructor_column_duplicates(self): [('a', [8]), ('a', [5])], columns=['a', 'a']) tm.assert_frame_equal(idf, edf) - self.assertRaises(ValueError, DataFrame.from_items, - [('a', [8]), ('a', [5]), ('b', [6])], - columns=['b', 'a', 'a']) + pytest.raises(ValueError, DataFrame.from_items, + [('a', [8]), ('a', [5]), ('b', [6])], + columns=['b', 'a', 'a']) def test_constructor_empty_with_string_dtype(self): # GH 9428 @@ -1240,8 +1240,8 @@ def test_constructor_single_value(self): dtype=object), index=[1, 2], columns=['a', 'c'])) - self.assertRaises(ValueError, DataFrame, 'a', [1, 2]) - self.assertRaises(ValueError, DataFrame, 'a', columns=['a', 'c']) + pytest.raises(ValueError, DataFrame, 'a', [1, 2]) + pytest.raises(ValueError, DataFrame, 'a', columns=['a', 'c']) with tm.assertRaisesRegexp(TypeError, 'incompatible data and dtype'): DataFrame('a', [1, 2], ['a', 'c'], float) @@ -1486,7 +1486,7 @@ def check(df): def f(): df.loc[:, np.nan] - self.assertRaises(TypeError, f) + pytest.raises(TypeError, f) df = DataFrame([[1, 2, 3], [4, 5, 6]], index=[1, np.nan]) check(df) @@ -1857,8 +1857,8 @@ def test_from_records_bad_index_column(self): tm.assert_index_equal(df1.index, Index(df.C)) # should fail - self.assertRaises(ValueError, DataFrame.from_records, df, index=[2]) - self.assertRaises(KeyError, DataFrame.from_records, df, index=2) + pytest.raises(ValueError, DataFrame.from_records, df, index=[2]) + pytest.raises(KeyError, DataFrame.from_records, df, index=2) def test_from_records_non_tuple(self): class Record(object): diff --git a/pandas/tests/frame/test_convert_to.py b/pandas/tests/frame/test_convert_to.py index 24a0e9c866eaf..64edc52508216 100644 --- a/pandas/tests/frame/test_convert_to.py +++ b/pandas/tests/frame/test_convert_to.py @@ -111,7 +111,7 @@ def test_to_dict_timestamp(self): def test_to_dict_invalid_orient(self): df = DataFrame({'A': [0, 1]}) - self.assertRaises(ValueError, df.to_dict, orient='xinvalid') + pytest.raises(ValueError, df.to_dict, orient='xinvalid') def test_to_records_dt64(self): df = DataFrame([["one", "two", "three"], diff --git a/pandas/tests/frame/test_dtypes.py b/pandas/tests/frame/test_dtypes.py index c38b411097420..99141e3a8e1c4 100644 --- a/pandas/tests/frame/test_dtypes.py +++ b/pandas/tests/frame/test_dtypes.py @@ -1,6 +1,9 @@ # -*- coding: utf-8 -*- from __future__ import print_function + +import pytest + from datetime import timedelta import numpy as np @@ -146,8 +149,8 @@ def test_select_dtypes_include(self): ei = df[['k']] assert_frame_equal(ri, ei) - self.assertRaises(NotImplementedError, - lambda: df.select_dtypes(include=['period'])) + pytest.raises(NotImplementedError, + lambda: df.select_dtypes(include=['period'])) def test_select_dtypes_exclude(self): df = DataFrame({'a': list('abc'), @@ -470,8 +473,8 @@ def test_astype_dict(self): # error should be raised when using something other than column labels # in the keys of the dtype dict - self.assertRaises(KeyError, df.astype, {'b': str, 2: str}) - self.assertRaises(KeyError, df.astype, {'e': str}) + pytest.raises(KeyError, df.astype, {'b': str, 2: str}) + pytest.raises(KeyError, df.astype, {'e': str}) assert_frame_equal(df, original) # if the dtypes provided are the same as the original dtypes, the @@ -526,7 +529,7 @@ def test_arg_for_errors_in_astype(self): df = DataFrame([1, 2, 3]) - with self.assertRaises(ValueError): + with pytest.raises(ValueError): df.astype(np.float64, errors=True) with tm.assert_produces_warning(FutureWarning): diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py index d25a62c5e5932..780cb3d0457bd 100644 --- a/pandas/tests/frame/test_indexing.py +++ b/pandas/tests/frame/test_indexing.py @@ -29,8 +29,7 @@ from pandas.util.testing import (assert_almost_equal, assert_series_equal, assert_frame_equal, - assertRaisesRegexp, - assertRaises) + assertRaisesRegexp) from pandas.core.indexing import IndexingError import pandas.util.testing as tm @@ -410,8 +409,8 @@ def test_getitem_setitem_ix_negative_integers(self): def test_getattr(self): assert_series_equal(self.frame.A, self.frame['A']) - self.assertRaises(AttributeError, getattr, self.frame, - 'NONEXISTENT_NAME') + pytest.raises(AttributeError, getattr, self.frame, + 'NONEXISTENT_NAME') def test_setattr_column(self): df = DataFrame({'foobar': 1}, index=lrange(10)) @@ -436,7 +435,7 @@ def test_setitem(self): self.frame['col6'] = series tm.assert_series_equal(series, self.frame['col6'], check_names=False) - with tm.assertRaises(KeyError): + with pytest.raises(KeyError): self.frame[randn(len(self.frame) + 1)] = 1 # set ndarray @@ -459,7 +458,7 @@ def test_setitem(self): def f(): smaller['col10'] = ['1', '2'] - self.assertRaises(com.SettingWithCopyError, f) + pytest.raises(com.SettingWithCopyError, f) self.assertEqual(smaller['col10'].dtype, np.object_) self.assertTrue((smaller['col10'] == ['1', '2']).all()) @@ -732,7 +731,7 @@ def test_delitem_corner(self): f = self.frame.copy() del f['D'] self.assertEqual(len(f.columns), 3) - self.assertRaises(KeyError, f.__delitem__, 'D') + pytest.raises(KeyError, f.__delitem__, 'D') del f['B'] self.assertEqual(len(f.columns), 2) @@ -774,7 +773,7 @@ def test_getitem_fancy_2d(self): assert_frame_equal(f, exp) with catch_warnings(record=True): - self.assertRaises(ValueError, f.ix.__getitem__, f > 0.5) + pytest.raises(ValueError, f.ix.__getitem__, f > 0.5) def test_slice_floats(self): index = [52195.504153, 52196.303147, 52198.369883] @@ -819,8 +818,8 @@ def test_getitem_setitem_integer_slice_keyerrors(self): # non-monotonic, raise KeyError df2 = df.iloc[lrange(5) + lrange(5, 10)[::-1]] - self.assertRaises(KeyError, df2.loc.__getitem__, slice(3, 11)) - self.assertRaises(KeyError, df2.loc.__setitem__, slice(3, 11), 0) + pytest.raises(KeyError, df2.loc.__getitem__, slice(3, 11)) + pytest.raises(KeyError, df2.loc.__setitem__, slice(3, 11), 0) def test_setitem_fancy_2d(self): @@ -938,7 +937,7 @@ def test_fancy_getitem_slice_mixed(self): def f(): sliced['C'] = 4. - self.assertRaises(com.SettingWithCopyError, f) + pytest.raises(com.SettingWithCopyError, f) self.assertTrue((self.frame['C'] == 4).all()) def test_fancy_setitem_int_labels(self): @@ -999,21 +998,18 @@ def test_fancy_index_int_labels_exceptions(self): with catch_warnings(record=True): # labels that aren't contained - self.assertRaises(KeyError, df.ix.__setitem__, - ([0, 1, 2], [2, 3, 4]), 5) + pytest.raises(KeyError, df.ix.__setitem__, + ([0, 1, 2], [2, 3, 4]), 5) # try to set indices not contained in frame - self.assertRaises(KeyError, - self.frame.ix.__setitem__, - ['foo', 'bar', 'baz'], 1) - self.assertRaises(KeyError, - self.frame.ix.__setitem__, - (slice(None, None), ['E']), 1) + pytest.raises(KeyError, self.frame.ix.__setitem__, + ['foo', 'bar', 'baz'], 1) + pytest.raises(KeyError, self.frame.ix.__setitem__, + (slice(None, None), ['E']), 1) # partial setting now allows this GH2578 - # self.assertRaises(KeyError, - # self.frame.ix.__setitem__, - # (slice(None, None), 'E'), 1) + # pytest.raises(KeyError, self.frame.ix.__setitem__, + # (slice(None, None), 'E'), 1) def test_setitem_fancy_mixed_2d(self): @@ -1357,7 +1353,7 @@ def test_getitem_setitem_fancy_exceptions(self): with assertRaisesRegexp(IndexingError, 'Too many indexers'): ix[:, :, :] - with assertRaises(IndexingError): + with pytest.raises(IndexingError): ix[:, :, :] = 1 def test_getitem_setitem_boolean_misaligned(self): @@ -1423,7 +1419,7 @@ def test_getitem_setitem_float_labels(self): df = DataFrame(np.random.randn(5, 5), index=index) # positional slicing only via iloc! - self.assertRaises(TypeError, lambda: df.iloc[1.0:5]) + pytest.raises(TypeError, lambda: df.iloc[1.0:5]) result = df.iloc[4:5] expected = df.reindex([5.0]) @@ -1434,12 +1430,12 @@ def test_getitem_setitem_float_labels(self): def f(): cp.iloc[1.0:5] = 0 - self.assertRaises(TypeError, f) + pytest.raises(TypeError, f) def f(): result = cp.iloc[1.0:5] == 0 # noqa - self.assertRaises(TypeError, f) + pytest.raises(TypeError, f) self.assertTrue(result.values.all()) self.assertTrue((cp.iloc[0:1] == df.iloc[0:1]).values.all()) @@ -1507,7 +1503,7 @@ def test_setitem_single_column_mixed_datetime(self): # as of GH 3216 this will now work! # try to set with a list like item - # self.assertRaises( + # pytest.raises( # Exception, df.loc.__setitem__, ('d', 'timestamp'), [nan]) def test_setitem_frame(self): @@ -1612,11 +1608,11 @@ def test_getitem_setitem_ix_bool_keyerror(self): # #2199 df = DataFrame({'a': [1, 2, 3]}) - self.assertRaises(KeyError, df.loc.__getitem__, False) - self.assertRaises(KeyError, df.loc.__getitem__, True) + pytest.raises(KeyError, df.loc.__getitem__, False) + pytest.raises(KeyError, df.loc.__getitem__, True) - self.assertRaises(KeyError, df.loc.__setitem__, False, 0) - self.assertRaises(KeyError, df.loc.__setitem__, True, 0) + pytest.raises(KeyError, df.loc.__setitem__, False, 0) + pytest.raises(KeyError, df.loc.__setitem__, True, 0) def test_getitem_list_duplicates(self): # #1943 @@ -1662,10 +1658,10 @@ def testit(df): tm.assert_series_equal(df['mask'], pd.Series(exp_mask, name='mask')) self.assertEqual(df['mask'].dtype, np.bool_) - with tm.assertRaises(KeyError): + with pytest.raises(KeyError): self.frame.lookup(['xyz'], ['A']) - with tm.assertRaises(KeyError): + with pytest.raises(KeyError): self.frame.lookup([self.frame.index[0]], ['xyz']) with tm.assertRaisesRegexp(ValueError, 'same size'): @@ -1699,7 +1695,7 @@ def test_set_value_resize(self): res3 = res.set_value('foobar', 'baz', 5) self.assertTrue(is_float_dtype(res3['baz'])) self.assertTrue(isnull(res3['baz'].drop(['foobar'])).all()) - self.assertRaises(ValueError, res3.set_value, 'foobar', 'baz', 'sam') + pytest.raises(ValueError, res3.set_value, 'foobar', 'baz', 'sam') def test_set_value_with_index_dtype_change(self): df_orig = DataFrame(randn(3, 3), index=lrange(3), columns=list('ABC')) @@ -1731,8 +1727,8 @@ def test_get_set_value_no_partial_indexing(self): # partial w/ MultiIndex raise exception index = MultiIndex.from_tuples([(0, 1), (0, 2), (1, 1), (1, 2)]) df = DataFrame(index=index, columns=lrange(4)) - self.assertRaises(KeyError, df.get_value, 0, 1) - # self.assertRaises(KeyError, df.set_value, 0, 1, 0) + pytest.raises(KeyError, df.get_value, 0, 1) + # pytest.raises(KeyError, df.set_value, 0, 1, 0) def test_single_element_ix_dont_upcast(self): self.frame['E'] = 1 @@ -1783,7 +1779,7 @@ def test_iloc_row(self): # setting it makes it raise/warn def f(): result[2] = 0. - self.assertRaises(com.SettingWithCopyError, f) + pytest.raises(com.SettingWithCopyError, f) exp_col = df[2].copy() exp_col[4:8] = 0. assert_series_equal(df[2], exp_col) @@ -1814,7 +1810,7 @@ def test_iloc_col(self): # and that we are setting a copy def f(): result[8] = 0. - self.assertRaises(com.SettingWithCopyError, f) + pytest.raises(com.SettingWithCopyError, f) self.assertTrue((df[8] == 0).all()) # list of integers @@ -1954,11 +1950,10 @@ def test_non_monotonic_reindex_methods(self): df_rev = pd.DataFrame(data, index=dr[[3, 4, 5] + [0, 1, 2]], columns=list('A')) # index is not monotonic increasing or decreasing - self.assertRaises(ValueError, df_rev.reindex, df.index, method='pad') - self.assertRaises(ValueError, df_rev.reindex, df.index, method='ffill') - self.assertRaises(ValueError, df_rev.reindex, df.index, method='bfill') - self.assertRaises(ValueError, df_rev.reindex, - df.index, method='nearest') + pytest.raises(ValueError, df_rev.reindex, df.index, method='pad') + pytest.raises(ValueError, df_rev.reindex, df.index, method='ffill') + pytest.raises(ValueError, df_rev.reindex, df.index, method='bfill') + pytest.raises(ValueError, df_rev.reindex, df.index, method='nearest') def test_reindex_level(self): from itertools import permutations @@ -2197,7 +2192,7 @@ def test_xs(self): self.assertEqual(xs['A'], 1) self.assertEqual(xs['B'], '1') - with tm.assertRaises(KeyError): + with pytest.raises(KeyError): self.tsframe.xs(self.tsframe.index[0] - BDay()) # xs get column @@ -2423,14 +2418,14 @@ def _check_align(df, cond, other, check_dtypes=True): # invalid conditions df = default_frame err1 = (df + 1).values[0:2, :] - self.assertRaises(ValueError, df.where, cond, err1) + pytest.raises(ValueError, df.where, cond, err1) err2 = cond.iloc[:2, :].values other1 = _safe_add(df) - self.assertRaises(ValueError, df.where, err2, other1) + pytest.raises(ValueError, df.where, err2, other1) - self.assertRaises(ValueError, df.mask, True) - self.assertRaises(ValueError, df.mask, 0) + pytest.raises(ValueError, df.mask, True) + pytest.raises(ValueError, df.mask, 0) # where inplace def _check_set(df, cond, check_dtypes=True): diff --git a/pandas/tests/frame/test_missing.py b/pandas/tests/frame/test_missing.py index 9aca810a4d298..74f1d3292fa4e 100644 --- a/pandas/tests/frame/test_missing.py +++ b/pandas/tests/frame/test_missing.py @@ -2,6 +2,8 @@ from __future__ import print_function +import pytest + from distutils.version import LooseVersion from numpy import nan, random import numpy as np @@ -139,7 +141,7 @@ def test_dropna(self): assert_frame_equal(dropped, expected) # bad input - self.assertRaises(ValueError, df.dropna, axis=3) + pytest.raises(ValueError, df.dropna, axis=3) def test_drop_and_dropna_caching(self): # tst that cacher updates @@ -158,10 +160,10 @@ def test_drop_and_dropna_caching(self): def test_dropna_corner(self): # bad input - self.assertRaises(ValueError, self.frame.dropna, how='foo') - self.assertRaises(TypeError, self.frame.dropna, how=None) + pytest.raises(ValueError, self.frame.dropna, how='foo') + pytest.raises(TypeError, self.frame.dropna, how=None) # non-existent column - 8303 - self.assertRaises(KeyError, self.frame.dropna, subset=['A', 'X']) + pytest.raises(KeyError, self.frame.dropna, subset=['A', 'X']) def test_dropna_multiple_axes(self): df = DataFrame([[1, np.nan, 2, 3], @@ -202,8 +204,8 @@ def test_fillna(self): result = self.mixed_frame.fillna(value=0) result = self.mixed_frame.fillna(method='pad') - self.assertRaises(ValueError, self.tsframe.fillna) - self.assertRaises(ValueError, self.tsframe.fillna, 5, method='ffill') + pytest.raises(ValueError, self.tsframe.fillna) + pytest.raises(ValueError, self.tsframe.fillna, 5, method='ffill') # mixed numeric (but no float16) mf = self.mixed_float.reindex(columns=['A', 'B', 'D']) @@ -482,12 +484,11 @@ def test_fillna_invalid_method(self): def test_fillna_invalid_value(self): # list - self.assertRaises(TypeError, self.frame.fillna, [1, 2]) + pytest.raises(TypeError, self.frame.fillna, [1, 2]) # tuple - self.assertRaises(TypeError, self.frame.fillna, (1, 2)) + pytest.raises(TypeError, self.frame.fillna, (1, 2)) # frame with series - self.assertRaises(ValueError, self.frame.iloc[:, 0].fillna, - self.frame) + pytest.raises(ValueError, self.frame.iloc[:, 0].fillna, self.frame) def test_fillna_col_reordering(self): cols = ["COL." + str(i) for i in range(5, 0, -1)] @@ -545,7 +546,7 @@ def test_interp_bad_method(self): 'B': [1, 4, 9, np.nan], 'C': [1, 2, 3, 5], 'D': list('abcd')}) - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): df.interpolate(method='not_a_method') def test_interp_combo(self): @@ -565,7 +566,7 @@ def test_interp_combo(self): def test_interp_nan_idx(self): df = DataFrame({'A': [1, 2, np.nan, 4], 'B': [np.nan, 2, 3, 4]}) df = df.set_index('A') - with tm.assertRaises(NotImplementedError): + with pytest.raises(NotImplementedError): df.interpolate(method='values') def test_interp_various(self): @@ -693,7 +694,7 @@ def test_interp_raise_on_only_mixed(self): 'C': [np.nan, 2, 5, 7], 'D': [np.nan, np.nan, 9, 9], 'E': [1, 2, 3, 4]}) - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): df.interpolate(axis=1) def test_interp_inplace(self): diff --git a/pandas/tests/frame/test_mutate_columns.py b/pandas/tests/frame/test_mutate_columns.py index f8de29cb63ebb..a7da704e73764 100644 --- a/pandas/tests/frame/test_mutate_columns.py +++ b/pandas/tests/frame/test_mutate_columns.py @@ -76,13 +76,13 @@ def test_assign_alphabetical(self): def test_assign_bad(self): df = DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}) # non-keyword argument - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): df.assign(lambda x: x.A) - with tm.assertRaises(AttributeError): + with pytest.raises(AttributeError): df.assign(C=df.A, D=df.A + df.C) - with tm.assertRaises(KeyError): + with pytest.raises(KeyError): df.assign(C=lambda df: df.A, D=lambda df: df['A'] + df['C']) - with tm.assertRaises(KeyError): + with pytest.raises(KeyError): df.assign(C=df.A, D=lambda x: x['A'] + x['C']) def test_insert_error_msmgs(self): @@ -147,7 +147,7 @@ def test_insert(self): with assertRaisesRegexp(ValueError, 'already exists'): df.insert(1, 'a', df['b']) - self.assertRaises(ValueError, df.insert, 1, 'c', df['b']) + pytest.raises(ValueError, df.insert, 1, 'c', df['b']) df.columns.name = 'some_name' # preserve columns name field diff --git a/pandas/tests/frame/test_nonunique_indexes.py b/pandas/tests/frame/test_nonunique_indexes.py index bb7c7c2bd012d..9d5a99eaf2e9d 100644 --- a/pandas/tests/frame/test_nonunique_indexes.py +++ b/pandas/tests/frame/test_nonunique_indexes.py @@ -2,6 +2,7 @@ from __future__ import print_function +import pytest import numpy as np from pandas.compat import lrange, u @@ -189,8 +190,8 @@ def check(result, expected=None): # reindex is invalid! df = DataFrame([[1, 5, 7.], [1, 5, 7.], [1, 5, 7.]], columns=['bar', 'a', 'a']) - self.assertRaises(ValueError, df.reindex, columns=['bar']) - self.assertRaises(ValueError, df.reindex, columns=['bar', 'foo']) + pytest.raises(ValueError, df.reindex, columns=['bar']) + pytest.raises(ValueError, df.reindex, columns=['bar', 'foo']) # drop df = DataFrame([[1, 5, 7.], [1, 5, 7.], [1, 5, 7.]], @@ -307,7 +308,7 @@ def check(result, expected=None): # boolean with the duplicate raises df = DataFrame(np.arange(12).reshape(3, 4), columns=dups, dtype='float64') - self.assertRaises(ValueError, lambda: df[df.A > 6]) + pytest.raises(ValueError, lambda: df[df.A > 6]) # dup aligining operations should work # GH 5185 @@ -324,7 +325,7 @@ def check(result, expected=None): columns=['A', 'A']) # not-comparing like-labelled - self.assertRaises(ValueError, lambda: df1 == df2) + pytest.raises(ValueError, lambda: df1 == df2) df1r = df1.reindex_like(df2) result = df1r == df2 @@ -411,7 +412,7 @@ def test_columns_with_dups(self): assert_frame_equal(df, expected) # this is an error because we cannot disambiguate the dup columns - self.assertRaises(Exception, lambda x: DataFrame( + pytest.raises(Exception, lambda x: DataFrame( [[1, 2, 'foo', 'bar']], columns=['a', 'a', 'a', 'a'])) # dups across blocks diff --git a/pandas/tests/frame/test_operators.py b/pandas/tests/frame/test_operators.py index 01e930fee959d..ce756ca188bf0 100644 --- a/pandas/tests/frame/test_operators.py +++ b/pandas/tests/frame/test_operators.py @@ -119,12 +119,12 @@ def test_operators_boolean(self): def f(): DataFrame(1.0, index=[1], columns=['A']) | DataFrame( True, index=[1], columns=['A']) - self.assertRaises(TypeError, f) + pytest.raises(TypeError, f) def f(): DataFrame('foo', index=[1], columns=['A']) | DataFrame( True, index=[1], columns=['A']) - self.assertRaises(TypeError, f) + pytest.raises(TypeError, f) def test_operators_none_as_na(self): df = DataFrame({"col1": [2, 5.0, 123, None], @@ -157,12 +157,12 @@ def test_comparison_invalid(self): def check(df, df2): for (x, y) in [(df, df2), (df2, df)]: - self.assertRaises(TypeError, lambda: x == y) - self.assertRaises(TypeError, lambda: x != y) - self.assertRaises(TypeError, lambda: x >= y) - self.assertRaises(TypeError, lambda: x > y) - self.assertRaises(TypeError, lambda: x < y) - self.assertRaises(TypeError, lambda: x <= y) + pytest.raises(TypeError, lambda: x == y) + pytest.raises(TypeError, lambda: x != y) + pytest.raises(TypeError, lambda: x >= y) + pytest.raises(TypeError, lambda: x > y) + pytest.raises(TypeError, lambda: x < y) + pytest.raises(TypeError, lambda: x <= y) # GH4968 # invalid date/int comparisons @@ -318,10 +318,10 @@ def _check_unary_op(op): def test_logical_typeerror(self): if not compat.PY3: - self.assertRaises(TypeError, self.frame.__eq__, 'foo') - self.assertRaises(TypeError, self.frame.__lt__, 'foo') - self.assertRaises(TypeError, self.frame.__gt__, 'foo') - self.assertRaises(TypeError, self.frame.__ne__, 'foo') + pytest.raises(TypeError, self.frame.__eq__, 'foo') + pytest.raises(TypeError, self.frame.__lt__, 'foo') + pytest.raises(TypeError, self.frame.__gt__, 'foo') + pytest.raises(TypeError, self.frame.__ne__, 'foo') else: pytest.skip('test_logical_typeerror not tested on PY3') @@ -968,7 +968,7 @@ def test_float_none_comparison(self): df = DataFrame(np.random.randn(8, 3), index=lrange(8), columns=['A', 'B', 'C']) - self.assertRaises(TypeError, df.__eq__, None) + pytest.raises(TypeError, df.__eq__, None) def test_boolean_comparison(self): @@ -1001,8 +1001,8 @@ def test_boolean_comparison(self): result = df.values > b_r assert_numpy_array_equal(result, expected.values) - self.assertRaises(ValueError, df.__gt__, b_c) - self.assertRaises(ValueError, df.values.__gt__, b_c) + pytest.raises(ValueError, df.__gt__, b_c) + pytest.raises(ValueError, df.values.__gt__, b_c) # == expected = DataFrame([[False, False], [True, False], [False, False]]) @@ -1021,7 +1021,7 @@ def test_boolean_comparison(self): result = df.values == b_r assert_numpy_array_equal(result, expected.values) - self.assertRaises(ValueError, lambda: df == b_c) + pytest.raises(ValueError, lambda: df == b_c) self.assertFalse(np.array_equal(df.values, b_c)) # with alignment @@ -1037,8 +1037,8 @@ def test_boolean_comparison(self): assert_frame_equal(result, expected) # not shape compatible - self.assertRaises(ValueError, lambda: df == (2, 2)) - self.assertRaises(ValueError, lambda: df == [2, 2]) + pytest.raises(ValueError, lambda: df == (2, 2)) + pytest.raises(ValueError, lambda: df == [2, 2]) def test_combine_generic(self): df1 = self.frame @@ -1203,7 +1203,7 @@ def test_alignment_non_pandas(self): align(df, val, 'columns') val = np.zeros((3, 3, 3)) - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): align(df, val, 'index') - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): align(df, val, 'columns') diff --git a/pandas/tests/frame/test_quantile.py b/pandas/tests/frame/test_quantile.py index 909a1a6a4c917..738ddb89db652 100644 --- a/pandas/tests/frame/test_quantile.py +++ b/pandas/tests/frame/test_quantile.py @@ -77,7 +77,7 @@ def test_quantile_axis_mixed(self): # must raise def f(): df.quantile(.5, axis=1, numeric_only=False) - self.assertRaises(TypeError, f) + pytest.raises(TypeError, f) def test_quantile_axis_parameter(self): # GH 9543/9544 @@ -100,8 +100,8 @@ def test_quantile_axis_parameter(self): result = df.quantile(.5, axis="columns") assert_series_equal(result, expected) - self.assertRaises(ValueError, df.quantile, 0.1, axis=-1) - self.assertRaises(ValueError, df.quantile, 0.1, axis="column") + pytest.raises(ValueError, df.quantile, 0.1, axis=-1) + pytest.raises(ValueError, df.quantile, 0.1, axis="column") def test_quantile_interpolation(self): # GH #10174 diff --git a/pandas/tests/frame/test_query_eval.py b/pandas/tests/frame/test_query_eval.py index a531b86699e90..b787d63d3c754 100644 --- a/pandas/tests/frame/test_query_eval.py +++ b/pandas/tests/frame/test_query_eval.py @@ -15,7 +15,6 @@ from pandas.util.testing import (assert_series_equal, assert_frame_equal, - assertRaises, makeCustomDataframe as mkdf) import pandas.util.testing as tm @@ -82,10 +81,10 @@ def test_query_numexpr(self): result = df.eval('A+1', engine='numexpr') assert_series_equal(result, self.expected2, check_names=False) else: - self.assertRaises(ImportError, - lambda: df.query('A>0', engine='numexpr')) - self.assertRaises(ImportError, - lambda: df.eval('A+1', engine='numexpr')) + pytest.raises(ImportError, + lambda: df.query('A>0', engine='numexpr')) + pytest.raises(ImportError, + lambda: df.eval('A+1', engine='numexpr')) class TestDataFrameEval(tm.TestCase, TestData): @@ -384,7 +383,7 @@ def check_raise_on_panel_with_multiindex(self, parser, engine): tm.skip_if_no_ne() p = tm.makePanel(7) p.items = tm.makeCustomIndex(len(p.items), nlevels=2) - with tm.assertRaises(NotImplementedError): + with pytest.raises(NotImplementedError): pd.eval('p + 1', parser=parser, engine=engine) def test_raise_on_panel4d_with_multiindex(self): @@ -395,7 +394,7 @@ def check_raise_on_panel4d_with_multiindex(self, parser, engine): tm.skip_if_no_ne() p4d = tm.makePanel4D(7) p4d.items = tm.makeCustomIndex(len(p4d.items), nlevels=2) - with tm.assertRaises(NotImplementedError): + with pytest.raises(NotImplementedError): pd.eval('p4d + 1', parser=parser, engine=engine) @@ -500,14 +499,14 @@ def test_date_query_with_non_date(self): ops = '==', '!=', '<', '>', '<=', '>=' for op in ops: - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): df.query('dates %s nondate' % op, parser=parser, engine=engine) def test_query_syntax_error(self): engine, parser = self.engine, self.parser df = DataFrame({"i": lrange(10), "+": lrange(3, 13), "r": lrange(4, 14)}) - with tm.assertRaises(SyntaxError): + with pytest.raises(SyntaxError): df.query('i - +', engine=engine, parser=parser) def test_query_scope(self): @@ -527,11 +526,11 @@ def test_query_scope(self): assert_frame_equal(res, expected) # no local variable c - with tm.assertRaises(UndefinedVariableError): + with pytest.raises(UndefinedVariableError): df.query('@a > b > @c', engine=engine, parser=parser) # no column named 'c' - with tm.assertRaises(UndefinedVariableError): + with pytest.raises(UndefinedVariableError): df.query('@a > b > c', engine=engine, parser=parser) def test_query_doesnt_pickup_local(self): @@ -542,7 +541,7 @@ def test_query_doesnt_pickup_local(self): df = DataFrame(np.random.randint(m, size=(n, 3)), columns=list('abc')) # we don't pick up the local 'sin' - with tm.assertRaises(UndefinedVariableError): + with pytest.raises(UndefinedVariableError): df.query('sin > 5', engine=engine, parser=parser) def test_query_builtin(self): @@ -629,7 +628,7 @@ def test_nested_raises_on_local_self_reference(self): df = DataFrame(np.random.randn(5, 3)) # can't reference ourself b/c we're a local so @ is necessary - with tm.assertRaises(UndefinedVariableError): + with pytest.raises(UndefinedVariableError): df.query('df > 0', engine=self.engine, parser=self.parser) def test_local_syntax(self): @@ -799,7 +798,7 @@ def test_date_index_query_with_NaT_duplicates(self): df['dates3'] = date_range('1/1/2014', periods=n) df.loc[np.random.rand(n) > 0.5, 'dates1'] = pd.NaT df.set_index('dates1', inplace=True, drop=True) - with tm.assertRaises(NotImplementedError): + with pytest.raises(NotImplementedError): df.query('index < 20130101 < dates3', engine=engine, parser=parser) def test_nested_scope(self): @@ -815,10 +814,10 @@ def test_nested_scope(self): df2 = DataFrame(np.random.randn(5, 3)) # don't have the pandas parser - with tm.assertRaises(SyntaxError): + with pytest.raises(SyntaxError): df.query('(@df>0) & (@df2>0)', engine=engine, parser=parser) - with tm.assertRaises(UndefinedVariableError): + with pytest.raises(UndefinedVariableError): df.query('(df>0) & (df2>0)', engine=engine, parser=parser) expected = df[(df > 0) & (df2 > 0)] @@ -893,8 +892,9 @@ def check_str_query_method(self, parser, engine): for lhs, op, rhs in zip(lhs, ops, rhs): ex = '{lhs} {op} {rhs}'.format(lhs=lhs, op=op, rhs=rhs) - assertRaises(NotImplementedError, df.query, ex, engine=engine, - parser=parser, local_dict={'strings': df.strings}) + pytest.raises(NotImplementedError, df.query, ex, + engine=engine, parser=parser, + local_dict={'strings': df.strings}) else: res = df.query('"a" == strings', engine=engine, parser=parser) assert_frame_equal(res, expect) @@ -937,7 +937,7 @@ def check_str_list_query_method(self, parser, engine): for lhs, op, rhs in zip(lhs, ops, rhs): ex = '{lhs} {op} {rhs}'.format(lhs=lhs, op=op, rhs=rhs) - with tm.assertRaises(NotImplementedError): + with pytest.raises(NotImplementedError): df.query(ex, engine=engine, parser=parser) else: res = df.query('strings == ["a", "b"]', engine=engine, @@ -973,10 +973,10 @@ def check_query_with_string_columns(self, parser, engine): expec = df[df.a.isin(df.b) & (df.c < df.d)] assert_frame_equal(res, expec) else: - with assertRaises(NotImplementedError): + with pytest.raises(NotImplementedError): df.query('a in b', parser=parser, engine=engine) - with assertRaises(NotImplementedError): + with pytest.raises(NotImplementedError): df.query('a in b and c < d', parser=parser, engine=engine) def test_query_with_string_columns(self): diff --git a/pandas/tests/frame/test_replace.py b/pandas/tests/frame/test_replace.py index f8e411c30fe38..bb2baaf0e02e2 100644 --- a/pandas/tests/frame/test_replace.py +++ b/pandas/tests/frame/test_replace.py @@ -2,6 +2,8 @@ from __future__ import print_function +import pytest + from datetime import datetime import re @@ -31,8 +33,8 @@ def test_replace_inplace(self): tsframe.replace(nan, 0, inplace=True) assert_frame_equal(tsframe, self.tsframe.fillna(0)) - self.assertRaises(TypeError, self.tsframe.replace, nan, inplace=True) - self.assertRaises(TypeError, self.tsframe.replace, nan) + pytest.raises(TypeError, self.tsframe.replace, nan, inplace=True) + pytest.raises(TypeError, self.tsframe.replace, nan) # mixed type mf = self.mixed_frame @@ -718,7 +720,7 @@ def test_replace_simple_nested_dict_with_nonexistent_value(self): assert_frame_equal(expected, result) def test_replace_value_is_none(self): - self.assertRaises(TypeError, self.tsframe.replace, nan) + pytest.raises(TypeError, self.tsframe.replace, nan) orig_value = self.tsframe.iloc[0, 0] orig2 = self.tsframe.iloc[1, 0] @@ -831,7 +833,7 @@ def test_replace_input_formats_listlike(self): expected.replace(to_rep[i], values[i], inplace=True) assert_frame_equal(result, expected) - self.assertRaises(ValueError, df.replace, to_rep, values[1:]) + pytest.raises(ValueError, df.replace, to_rep, values[1:]) def test_replace_input_formats_scalar(self): df = DataFrame({'A': [np.nan, 0, np.inf], 'B': [0, 2, 5], @@ -845,7 +847,7 @@ def test_replace_input_formats_scalar(self): expected[k] = v.replace(to_rep[k], 0) assert_frame_equal(filled, DataFrame(expected)) - self.assertRaises(TypeError, df.replace, to_rep, [np.nan, 0, '']) + pytest.raises(TypeError, df.replace, to_rep, [np.nan, 0, '']) # list to scalar to_rep = [np.nan, 0, ''] diff --git a/pandas/tests/frame/test_reshape.py b/pandas/tests/frame/test_reshape.py index 067c6539121c0..60f19b7e6c87b 100644 --- a/pandas/tests/frame/test_reshape.py +++ b/pandas/tests/frame/test_reshape.py @@ -4,7 +4,9 @@ from warnings import catch_warnings from datetime import datetime + import itertools +import pytest from numpy.random import randn from numpy import nan @@ -364,7 +366,7 @@ def test_stack_mixed_levels(self): # When mixed types are passed and the ints are not level # names, raise - self.assertRaises(ValueError, df2.stack, level=['animal', 0]) + pytest.raises(ValueError, df2.stack, level=['animal', 0]) # GH #8584: Having 0 in the level names could raise a # strange error about lexsort depth @@ -523,10 +525,10 @@ def test_unstack_non_unique_index_names(self): idx = MultiIndex.from_tuples([('a', 'b'), ('c', 'd')], names=['c1', 'c1']) df = DataFrame([1, 2], index=idx) - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): df.unstack('c1') - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): df.T.stack('c1') def test_unstack_nan_index(self): # GH7466 diff --git a/pandas/tests/frame/test_sorting.py b/pandas/tests/frame/test_sorting.py index 97171123c4a36..f1bca0d6a99b8 100644 --- a/pandas/tests/frame/test_sorting.py +++ b/pandas/tests/frame/test_sorting.py @@ -1,6 +1,8 @@ # -*- coding: utf-8 -*- from __future__ import print_function + +import pytest import random import numpy as np @@ -61,7 +63,7 @@ def test_sort_values(self): sorted_df = frame.sort_values(by=['B', 'A'], ascending=[True, False]) assert_frame_equal(sorted_df, expected) - self.assertRaises(ValueError, lambda: frame.sort_values( + pytest.raises(ValueError, lambda: frame.sort_values( by=['A', 'B'], axis=2, inplace=True)) # by row (axis=1): GH 10806 diff --git a/pandas/tests/frame/test_timeseries.py b/pandas/tests/frame/test_timeseries.py index c521ba9ec215a..cf42f751f390a 100644 --- a/pandas/tests/frame/test_timeseries.py +++ b/pandas/tests/frame/test_timeseries.py @@ -4,6 +4,8 @@ from datetime import datetime, time +import pytest + from numpy import nan from numpy.random import randn import numpy as np @@ -299,7 +301,7 @@ def test_tshift(self): assert_frame_equal(unshifted, inferred_ts) no_freq = self.tsframe.iloc[[0, 5, 7], :] - self.assertRaises(ValueError, no_freq.tshift) + pytest.raises(ValueError, no_freq.tshift) def test_truncate(self): ts = self.tsframe[::3] @@ -340,9 +342,9 @@ def test_truncate(self): truncated = ts.truncate(after=end_missing) assert_frame_equal(truncated, expected) - self.assertRaises(ValueError, ts.truncate, - before=ts.index[-1] - 1, - after=ts.index[0] + 1) + pytest.raises(ValueError, ts.truncate, + before=ts.index[-1] - 1, + after=ts.index[0] + 1) def test_truncate_copy(self): index = self.tsframe.index @@ -571,4 +573,4 @@ def test_frame_to_period(self): pts = df.to_period('M', axis=1) tm.assert_index_equal(pts.columns, exp.columns.asfreq('M')) - self.assertRaises(ValueError, df.to_period, axis=2) + pytest.raises(ValueError, df.to_period, axis=2) diff --git a/pandas/tests/frame/test_to_csv.py b/pandas/tests/frame/test_to_csv.py index f7da45e14cbde..2df2e23c3f877 100644 --- a/pandas/tests/frame/test_to_csv.py +++ b/pandas/tests/frame/test_to_csv.py @@ -3,6 +3,7 @@ from __future__ import print_function import csv +import pytest from numpy import nan import numpy as np @@ -94,8 +95,8 @@ def test_to_csv_from_csv2(self): assert_frame_equal(xp, rs) - self.assertRaises(ValueError, self.frame2.to_csv, path, - header=['AA', 'X']) + pytest.raises(ValueError, self.frame2.to_csv, path, + header=['AA', 'X']) def test_to_csv_from_csv3(self): @@ -965,8 +966,8 @@ def test_to_csv_compression_value_error(self): with ensure_clean() as filename: # zip compression is not supported and should raise ValueError import zipfile - self.assertRaises(zipfile.BadZipfile, df.to_csv, - filename, compression="zip") + pytest.raises(zipfile.BadZipfile, df.to_csv, + filename, compression="zip") def test_to_csv_date_format(self): with ensure_clean('__tmp_to_csv_date_format__') as path: diff --git a/pandas/tests/frame/test_validate.py b/pandas/tests/frame/test_validate.py index e1ef87bb3271a..4c4abb7e58e75 100644 --- a/pandas/tests/frame/test_validate.py +++ b/pandas/tests/frame/test_validate.py @@ -1,6 +1,8 @@ from unittest import TestCase from pandas.core.frame import DataFrame +import pytest + class TestDataFrameValidate(TestCase): """Tests for error handling related to data types of method arguments.""" @@ -11,23 +13,23 @@ def test_validate_bool_args(self): invalid_values = [1, "True", [1, 2, 3], 5.0] for value in invalid_values: - with self.assertRaises(ValueError): + with pytest.raises(ValueError): self.df.query('a > b', inplace=value) - with self.assertRaises(ValueError): + with pytest.raises(ValueError): self.df.eval('a + b', inplace=value) - with self.assertRaises(ValueError): + with pytest.raises(ValueError): self.df.set_index(keys=['a'], inplace=value) - with self.assertRaises(ValueError): + with pytest.raises(ValueError): self.df.reset_index(inplace=value) - with self.assertRaises(ValueError): + with pytest.raises(ValueError): self.df.dropna(inplace=value) - with self.assertRaises(ValueError): + with pytest.raises(ValueError): self.df.drop_duplicates(inplace=value) - with self.assertRaises(ValueError): + with pytest.raises(ValueError): self.df.sort_values(by=['a'], inplace=value) diff --git a/pandas/tests/groupby/test_aggregate.py b/pandas/tests/groupby/test_aggregate.py index d91340312d7c2..e32dbb7846061 100644 --- a/pandas/tests/groupby/test_aggregate.py +++ b/pandas/tests/groupby/test_aggregate.py @@ -6,6 +6,9 @@ """ from __future__ import print_function + +import pytest + from datetime import datetime, timedelta from functools import partial @@ -179,8 +182,8 @@ def test_agg_cast_results_dtypes(self): def test_agg_must_agg(self): grouped = self.df.groupby('A')['C'] - self.assertRaises(Exception, grouped.agg, lambda x: x.describe()) - self.assertRaises(Exception, grouped.agg, lambda x: x.index[:2]) + pytest.raises(Exception, grouped.agg, lambda x: x.describe()) + pytest.raises(Exception, grouped.agg, lambda x: x.index[:2]) def test_agg_ser_multi_key(self): # TODO(wesm): unused @@ -374,7 +377,7 @@ def f(): g.aggregate({'r1': {'C': ['mean', 'sum']}, 'r2': {'D': ['mean', 'sum']}}) - self.assertRaises(SpecificationError, f) + pytest.raises(SpecificationError, f) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): @@ -477,7 +480,7 @@ def raiseException(df): pprint_thing(df.to_string()) raise TypeError - self.assertRaises(TypeError, df.groupby(0).agg, raiseException) + pytest.raises(TypeError, df.groupby(0).agg, raiseException) def test_series_agg_multikey(self): ts = tm.makeTimeSeries() @@ -563,11 +566,11 @@ def test_cython_agg_boolean(self): def test_cython_agg_nothing_to_agg(self): frame = DataFrame({'a': np.random.randint(0, 5, 50), 'b': ['foo', 'bar'] * 25}) - self.assertRaises(DataError, frame.groupby('a')['b'].mean) + pytest.raises(DataError, frame.groupby('a')['b'].mean) frame = DataFrame({'a': np.random.randint(0, 5, 50), 'b': ['foo', 'bar'] * 25}) - self.assertRaises(DataError, frame[['b']].groupby(frame['a']).mean) + pytest.raises(DataError, frame[['b']].groupby(frame['a']).mean) def test_cython_agg_nothing_to_agg_with_dates(self): frame = DataFrame({'a': np.random.randint(0, 5, 50), @@ -659,7 +662,7 @@ def test_agg_multiple_functions_too_many_lambdas(self): grouped = self.df.groupby('A') funcs = ['mean', lambda x: x.mean(), lambda x: x.std()] - self.assertRaises(SpecificationError, grouped.agg, funcs) + pytest.raises(SpecificationError, grouped.agg, funcs) def test_more_flexible_frame_multi_function(self): diff --git a/pandas/tests/groupby/test_bin_groupby.py b/pandas/tests/groupby/test_bin_groupby.py index 289723ed5667a..320acacff483c 100644 --- a/pandas/tests/groupby/test_bin_groupby.py +++ b/pandas/tests/groupby/test_bin_groupby.py @@ -1,5 +1,7 @@ # -*- coding: utf-8 -*- +import pytest + from numpy import nan import numpy as np @@ -70,15 +72,15 @@ def test_generate_bins(self): bins = func(values, binner, closed='right') assert ((bins == np.array([3, 6])).all()) - self.assertRaises(ValueError, generate_bins_generic, values, [], - 'right') - self.assertRaises(ValueError, generate_bins_generic, values[:0], - binner, 'right') + pytest.raises(ValueError, generate_bins_generic, values, [], + 'right') + pytest.raises(ValueError, generate_bins_generic, values[:0], + binner, 'right') - self.assertRaises(ValueError, generate_bins_generic, values, [4], - 'right') - self.assertRaises(ValueError, generate_bins_generic, values, [-3, -1], - 'right') + pytest.raises(ValueError, generate_bins_generic, values, [4], + 'right') + pytest.raises(ValueError, generate_bins_generic, values, [-3, -1], + 'right') def test_group_ohlc(): diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py index 83471c7e98299..b9a731f2204da 100644 --- a/pandas/tests/groupby/test_categorical.py +++ b/pandas/tests/groupby/test_categorical.py @@ -2,6 +2,8 @@ from __future__ import print_function from datetime import datetime +import pytest + import numpy as np from numpy import nan @@ -227,7 +229,7 @@ def test_groupby_bins_unequal_len(self): # len(bins) != len(series) here def f(): series.groupby(bins).mean() - self.assertRaises(ValueError, f) + pytest.raises(ValueError, f) def test_groupby_multi_categorical_as_index(self): # GH13204 diff --git a/pandas/tests/groupby/test_filters.py b/pandas/tests/groupby/test_filters.py index de6757786a363..5f39f320b1ea1 100644 --- a/pandas/tests/groupby/test_filters.py +++ b/pandas/tests/groupby/test_filters.py @@ -2,6 +2,7 @@ from __future__ import print_function from numpy import nan +import pytest from pandas import Timestamp from pandas.core.index import MultiIndex @@ -164,8 +165,8 @@ def raise_if_sum_is_zero(x): s = pd.Series([-1, 0, 1, 2]) grouper = s.apply(lambda x: x % 2) grouped = s.groupby(grouper) - self.assertRaises(TypeError, - lambda: grouped.filter(raise_if_sum_is_zero)) + pytest.raises(TypeError, + lambda: grouped.filter(raise_if_sum_is_zero)) def test_filter_with_axis_in_groupby(self): # issue 11041 @@ -186,16 +187,16 @@ def test_filter_bad_shapes(self): g_s = s.groupby(s) f = lambda x: x - self.assertRaises(TypeError, lambda: g_df.filter(f)) - self.assertRaises(TypeError, lambda: g_s.filter(f)) + pytest.raises(TypeError, lambda: g_df.filter(f)) + pytest.raises(TypeError, lambda: g_s.filter(f)) f = lambda x: x == 1 - self.assertRaises(TypeError, lambda: g_df.filter(f)) - self.assertRaises(TypeError, lambda: g_s.filter(f)) + pytest.raises(TypeError, lambda: g_df.filter(f)) + pytest.raises(TypeError, lambda: g_s.filter(f)) f = lambda x: np.outer(x, x) - self.assertRaises(TypeError, lambda: g_df.filter(f)) - self.assertRaises(TypeError, lambda: g_s.filter(f)) + pytest.raises(TypeError, lambda: g_df.filter(f)) + pytest.raises(TypeError, lambda: g_s.filter(f)) def test_filter_nan_is_false(self): df = DataFrame({'A': np.arange(8), diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index ea76fa24cc37c..25ebfef327476 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -1,6 +1,8 @@ # -*- coding: utf-8 -*- from __future__ import print_function +import pytest + from warnings import catch_warnings from string import ascii_lowercase from datetime import datetime @@ -69,7 +71,7 @@ def checkit(dtype): self.assertEqual(agged[1], 21) # corner cases - self.assertRaises(Exception, grouped.aggregate, lambda x: x * 2) + pytest.raises(Exception, grouped.aggregate, lambda x: x * 2) for dtype in ['int64', 'int32', 'float64', 'float32']: checkit(dtype) @@ -77,9 +79,9 @@ def checkit(dtype): def test_select_bad_cols(self): df = DataFrame([[1, 2]], columns=['A', 'B']) g = df.groupby('A') - self.assertRaises(KeyError, g.__getitem__, ['C']) # g[['C']] + pytest.raises(KeyError, g.__getitem__, ['C']) # g[['C']] - self.assertRaises(KeyError, g.__getitem__, ['A', 'C']) # g[['A', 'C']] + pytest.raises(KeyError, g.__getitem__, ['A', 'C']) # g[['A', 'C']] with assertRaisesRegexp(KeyError, '^[^A]+$'): # A should not be referenced as a bad column... # will have to rethink regex if you change message! @@ -648,7 +650,7 @@ def test_grouper_iter(self): def test_empty_groups(self): # GH # 1048 - self.assertRaises(ValueError, self.df.groupby, []) + pytest.raises(ValueError, self.df.groupby, []) def test_groupby_grouper(self): grouped = self.df.groupby('A') @@ -662,8 +664,8 @@ def test_groupby_duplicated_column_errormsg(self): df = DataFrame(columns=['A', 'B', 'A', 'C'], data=[range(4), range(2, 6), range(0, 8, 2)]) - self.assertRaises(ValueError, df.groupby, 'A') - self.assertRaises(ValueError, df.groupby, ['A', 'B']) + pytest.raises(ValueError, df.groupby, 'A') + pytest.raises(ValueError, df.groupby, ['A', 'B']) grouped = df.groupby('B') c = grouped.count() @@ -702,7 +704,7 @@ def test_groupby_grouper_f_sanity_checked(self): # when the elements are Timestamp. # the result is Index[0:6], very confusing. - self.assertRaises(AssertionError, ts.groupby, lambda key: key[0:6]) + pytest.raises(AssertionError, ts.groupby, lambda key: key[0:6]) def test_groupby_nonobject_dtype(self): key = self.mframe.index.labels[0] @@ -853,10 +855,10 @@ def test_get_group(self): assert_frame_equal(result1, result3) # must pass a same-length tuple with multiple keys - self.assertRaises(ValueError, lambda: g.get_group('foo')) - self.assertRaises(ValueError, lambda: g.get_group(('foo'))) - self.assertRaises(ValueError, - lambda: g.get_group(('foo', 'bar', 'baz'))) + pytest.raises(ValueError, lambda: g.get_group('foo')) + pytest.raises(ValueError, lambda: g.get_group(('foo'))) + pytest.raises(ValueError, + lambda: g.get_group(('foo', 'bar', 'baz'))) def test_get_group_empty_bins(self): @@ -870,7 +872,7 @@ def test_get_group_empty_bins(self): expected = DataFrame([3, 1], index=[0, 1]) assert_frame_equal(result, expected) - self.assertRaises(KeyError, lambda: g.get_group(pd.Interval(10, 15))) + pytest.raises(KeyError, lambda: g.get_group(pd.Interval(10, 15))) def test_get_group_grouped_by_tuple(self): # GH 8121 @@ -890,8 +892,8 @@ def test_get_group_grouped_by_tuple(self): def test_grouping_error_on_multidim_input(self): from pandas.core.groupby import Grouping - self.assertRaises(ValueError, - Grouping, self.df.index, self.df[['A', 'A']]) + pytest.raises(ValueError, + Grouping, self.df.index, self.df[['A', 'A']]) def test_apply_describe_bug(self): grouped = self.mframe.groupby(level='first') @@ -1073,12 +1075,12 @@ def f3(x): assert_frame_equal(result1, result2) # should fail (not the same number of levels) - self.assertRaises(AssertionError, df.groupby('a').apply, f2) - self.assertRaises(AssertionError, df2.groupby('a').apply, f2) + pytest.raises(AssertionError, df.groupby('a').apply, f2) + pytest.raises(AssertionError, df2.groupby('a').apply, f2) # should fail (incorrect shape) - self.assertRaises(AssertionError, df.groupby('a').apply, f3) - self.assertRaises(AssertionError, df2.groupby('a').apply, f3) + pytest.raises(AssertionError, df.groupby('a').apply, f3) + pytest.raises(AssertionError, df2.groupby('a').apply, f3) def test_attr_wrapper(self): grouped = self.ts.groupby(lambda x: x.weekday()) @@ -1100,7 +1102,7 @@ def test_attr_wrapper(self): expected = grouped.agg(lambda x: x.dtype) # make sure raises error - self.assertRaises(AttributeError, getattr, grouped, 'foo') + pytest.raises(AttributeError, getattr, grouped, 'foo') def test_series_describe_multikey(self): ts = tm.makeTimeSeries() @@ -1155,8 +1157,8 @@ def test_frame_describe_tupleindex(self): 'z': [100, 200, 300, 400, 500] * 3}) df1['k'] = [(0, 0, 1), (0, 1, 0), (1, 0, 0)] * 5 df2 = df1.rename(columns={'k': 'key'}) - tm.assertRaises(ValueError, lambda: df1.groupby('k').describe()) - tm.assertRaises(ValueError, lambda: df2.groupby('key').describe()) + pytest.raises(ValueError, lambda: df1.groupby('k').describe()) + pytest.raises(ValueError, lambda: df2.groupby('key').describe()) def test_frame_describe_unstacked_format(self): # GH 4792 @@ -1585,7 +1587,7 @@ def test_as_index_series_return_frame(self): assert_frame_equal(result2, expected2) # corner case - self.assertRaises(Exception, grouped['C'].__getitem__, 'D') + pytest.raises(Exception, grouped['C'].__getitem__, 'D') def test_groupby_as_index_cython(self): data = self.df @@ -1619,11 +1621,11 @@ def test_groupby_as_index_series_scalar(self): assert_frame_equal(result, expected) def test_groupby_as_index_corner(self): - self.assertRaises(TypeError, self.ts.groupby, lambda x: x.weekday(), - as_index=False) + pytest.raises(TypeError, self.ts.groupby, lambda x: x.weekday(), + as_index=False) - self.assertRaises(ValueError, self.df.groupby, lambda x: x.lower(), - as_index=False, axis=1) + pytest.raises(ValueError, self.df.groupby, lambda x: x.lower(), + as_index=False, axis=1) def test_groupby_as_index_apply(self): # GH #4648 and #3417 @@ -1756,8 +1758,8 @@ def test_omit_nuisance(self): # won't work with axis = 1 grouped = df.groupby({'A': 0, 'C': 0, 'D': 1, 'E': 1}, axis=1) - result = self.assertRaises(TypeError, grouped.agg, - lambda x: x.sum(0, numeric_only=False)) + result = pytest.raises(TypeError, grouped.agg, + lambda x: x.sum(0, numeric_only=False)) def test_omit_nuisance_python_multiple(self): grouped = self.three_group.groupby(['A', 'B']) @@ -1821,7 +1823,7 @@ def desc3(group): def test_nonsense_func(self): df = DataFrame([0]) - self.assertRaises(Exception, df.groupby, lambda x: x + 'foo') + pytest.raises(Exception, df.groupby, lambda x: x + 'foo') def test_builtins_apply(self): # GH8155 df = pd.DataFrame(np.random.randint(1, 50, (1000, 2)), @@ -2046,14 +2048,14 @@ def test_groupby_level(self): assert_frame_equal(result1, expected1.T) # raise exception for non-MultiIndex - self.assertRaises(ValueError, self.df.groupby, level=1) + pytest.raises(ValueError, self.df.groupby, level=1) def test_groupby_level_index_names(self): # GH4014 this used to raise ValueError since 'exp'>1 (in py2) df = DataFrame({'exp': ['A'] * 3 + ['B'] * 3, 'var1': lrange(6), }).set_index('exp') df.groupby(level='exp') - self.assertRaises(ValueError, df.groupby, level='foo') + pytest.raises(ValueError, df.groupby, level='foo') def test_groupby_level_with_nas(self): index = MultiIndex(levels=[[1, 0], [0, 1, 2, 3]], @@ -2140,12 +2142,12 @@ def test_groupby_level_nonmulti(self): result = s.groupby(level=[-1]).sum() tm.assert_series_equal(result, expected) - tm.assertRaises(ValueError, s.groupby, level=1) - tm.assertRaises(ValueError, s.groupby, level=-2) - tm.assertRaises(ValueError, s.groupby, level=[]) - tm.assertRaises(ValueError, s.groupby, level=[0, 0]) - tm.assertRaises(ValueError, s.groupby, level=[0, 1]) - tm.assertRaises(ValueError, s.groupby, level=[1]) + pytest.raises(ValueError, s.groupby, level=1) + pytest.raises(ValueError, s.groupby, level=-2) + pytest.raises(ValueError, s.groupby, level=[]) + pytest.raises(ValueError, s.groupby, level=[0, 0]) + pytest.raises(ValueError, s.groupby, level=[0, 1]) + pytest.raises(ValueError, s.groupby, level=[1]) def test_groupby_complex(self): # GH 12902 @@ -2905,7 +2907,7 @@ def test_groupby_list_infer_array_like(self): expected = self.df.groupby(self.df['A']).mean() assert_frame_equal(result, expected, check_names=False) - self.assertRaises(Exception, self.df.groupby, list(self.df['A'][:-1])) + pytest.raises(Exception, self.df.groupby, list(self.df['A'][:-1])) # pathological case of ambiguity df = DataFrame({'foo': [0, 1], @@ -2931,9 +2933,9 @@ def test_groupby_keys_same_size_as_index(self): def test_groupby_one_row(self): # GH 11741 df1 = pd.DataFrame(np.random.randn(1, 4), columns=list('ABCD')) - self.assertRaises(KeyError, df1.groupby, 'Z') + pytest.raises(KeyError, df1.groupby, 'Z') df2 = pd.DataFrame(np.random.randn(2, 4), columns=list('ABCD')) - self.assertRaises(KeyError, df2.groupby, 'Z') + pytest.raises(KeyError, df2.groupby, 'Z') def test_groupby_nat_exclude(self): # GH 6992 @@ -2970,7 +2972,7 @@ def test_groupby_nat_exclude(self): tm.assert_frame_equal( grouped.get_group(Timestamp('2013-02-01')), df.iloc[[3, 5]]) - self.assertRaises(KeyError, grouped.get_group, pd.NaT) + pytest.raises(KeyError, grouped.get_group, pd.NaT) nan_df = DataFrame({'nan': [np.nan, np.nan, np.nan], 'nat': [pd.NaT, pd.NaT, pd.NaT]}) @@ -2982,8 +2984,8 @@ def test_groupby_nat_exclude(self): self.assertEqual(grouped.groups, {}) self.assertEqual(grouped.ngroups, 0) self.assertEqual(grouped.indices, {}) - self.assertRaises(KeyError, grouped.get_group, np.nan) - self.assertRaises(KeyError, grouped.get_group, pd.NaT) + pytest.raises(KeyError, grouped.get_group, np.nan) + pytest.raises(KeyError, grouped.get_group, pd.NaT) def test_dictify(self): dict(iter(self.df.groupby('A'))) @@ -4019,7 +4021,7 @@ def test_pivot_table_values_key_error(self): df['year'] = df.set_index('eventDate').index.year df['month'] = df.set_index('eventDate').index.month - with self.assertRaises(KeyError): + with pytest.raises(KeyError): df.reset_index().pivot_table(index='year', columns='month', values='badname', aggfunc='count') diff --git a/pandas/tests/groupby/test_timegrouper.py b/pandas/tests/groupby/test_timegrouper.py index dfc23d3db21bc..ae0413615f738 100644 --- a/pandas/tests/groupby/test_timegrouper.py +++ b/pandas/tests/groupby/test_timegrouper.py @@ -1,5 +1,7 @@ """ test with the TimeGrouper / grouping with datetimes """ +import pytest + from datetime import datetime import numpy as np from numpy import nan @@ -186,7 +188,7 @@ def test_timegrouper_with_reg_groups(self): ]).sum() assert_frame_equal(result, expected) - with self.assertRaises(KeyError): + with pytest.raises(KeyError): df.groupby([pd.Grouper(freq='1M', key='foo'), 'Buyer']).sum() # passing the level @@ -198,7 +200,7 @@ def test_timegrouper_with_reg_groups(self): ) assert_frame_equal(result, expected) - with self.assertRaises(ValueError): + with pytest.raises(ValueError): df.groupby([pd.Grouper(freq='1M', level='foo'), 'Buyer']).sum() @@ -219,7 +221,7 @@ def test_timegrouper_with_reg_groups(self): assert_frame_equal(result, expected) # error as we have both a level and a name! - with self.assertRaises(ValueError): + with pytest.raises(ValueError): df.groupby([pd.Grouper(freq='1M', key='Date', level='Date'), 'Buyer']).sum() diff --git a/pandas/tests/groupby/test_transform.py b/pandas/tests/groupby/test_transform.py index 3ed2023136b3a..57ea8eb067a2b 100644 --- a/pandas/tests/groupby/test_transform.py +++ b/pandas/tests/groupby/test_transform.py @@ -1,5 +1,7 @@ """ test with the .transform """ +import pytest + import numpy as np import pandas as pd from pandas.util import testing as tm @@ -534,8 +536,8 @@ def test_cython_transform(self): for c in df: if c not in ['float', 'int', 'float_missing' ] and op != 'shift': - self.assertRaises(DataError, gb[c].transform, op) - self.assertRaises(DataError, getattr(gb[c], op)) + pytest.raises(DataError, gb[c].transform, op) + pytest.raises(DataError, getattr(gb[c], op)) else: expected = gb[c].apply(targop) expected.name = c diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index 91c08764985a9..ff90c924933f3 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -40,7 +40,7 @@ def test_pickle_compat_construction(self): return # need an object to create with - self.assertRaises(TypeError, self._holder) + pytest.raises(TypeError, self._holder) def test_to_series(self): # assert that we are creating a copy of the index @@ -55,8 +55,8 @@ def test_shift(self): # GH8083 test the base class for shift idx = self.create_index() - self.assertRaises(NotImplementedError, idx.shift, 1) - self.assertRaises(NotImplementedError, idx.shift, 1, 2) + pytest.raises(NotImplementedError, idx.shift, 1) + pytest.raises(NotImplementedError, idx.shift, 1, 2) def test_create_index_existing_name(self): @@ -363,13 +363,13 @@ def test_get_unique_index(self): def test_sort(self): for ind in self.indices.values(): - self.assertRaises(TypeError, ind.sort) + pytest.raises(TypeError, ind.sort) def test_mutability(self): for ind in self.indices.values(): if not len(ind): continue - self.assertRaises(TypeError, ind.__setitem__, 0, ind[0]) + pytest.raises(TypeError, ind.__setitem__, 0, ind[0]) def test_view(self): for ind in self.indices.values(): @@ -459,7 +459,7 @@ def test_take(self): if not isinstance(ind, (DatetimeIndex, PeriodIndex, TimedeltaIndex)): # GH 10791 - with tm.assertRaises(AttributeError): + with pytest.raises(AttributeError): ind.freq def test_take_invalid_kwargs(self): @@ -694,7 +694,7 @@ def test_delete_base(self): self.assertTrue(result.equals(expected)) self.assertEqual(result.name, expected.name) - with tm.assertRaises((IndexError, ValueError)): + with pytest.raises((IndexError, ValueError)): # either depending on numpy version result = idx.delete(len(idx)) diff --git a/pandas/tests/indexes/datetimes/test_astype.py b/pandas/tests/indexes/datetimes/test_astype.py index 3d7814b7340bc..755944d342ed4 100644 --- a/pandas/tests/indexes/datetimes/test_astype.py +++ b/pandas/tests/indexes/datetimes/test_astype.py @@ -1,3 +1,5 @@ +import pytest + import numpy as np from datetime import datetime @@ -115,11 +117,11 @@ def test_astype_raises(self): # GH 13149, GH 13209 idx = DatetimeIndex(['2016-05-16', 'NaT', NaT, np.NaN]) - self.assertRaises(ValueError, idx.astype, float) - self.assertRaises(ValueError, idx.astype, 'timedelta64') - self.assertRaises(ValueError, idx.astype, 'timedelta64[ns]') - self.assertRaises(ValueError, idx.astype, 'datetime64') - self.assertRaises(ValueError, idx.astype, 'datetime64[D]') + pytest.raises(ValueError, idx.astype, float) + pytest.raises(ValueError, idx.astype, 'timedelta64') + pytest.raises(ValueError, idx.astype, 'timedelta64[ns]') + pytest.raises(ValueError, idx.astype, 'datetime64') + pytest.raises(ValueError, idx.astype, 'datetime64[D]') def test_index_convert_to_datetime_array(self): tm._skip_if_no_pytz() diff --git a/pandas/tests/indexes/datetimes/test_construction.py b/pandas/tests/indexes/datetimes/test_construction.py index 50d389e263305..ca673e3059ea2 100644 --- a/pandas/tests/indexes/datetimes/test_construction.py +++ b/pandas/tests/indexes/datetimes/test_construction.py @@ -1,3 +1,5 @@ +import pytest + import numpy as np from datetime import timedelta @@ -49,7 +51,7 @@ def test_construction_with_alt(self): tm.assert_index_equal(i2, expected) # incompat tz/dtype - self.assertRaises(ValueError, lambda: DatetimeIndex( + pytest.raises(ValueError, lambda: DatetimeIndex( i.tz_localize(None).asi8, dtype=i.dtype, tz='US/Pacific')) def test_construction_index_with_mixed_timezones(self): @@ -239,7 +241,7 @@ def test_construction_dti_with_mixed_timezones(self): # tz mismatch affecting to tz-aware raises TypeError/ValueError - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): DatetimeIndex([Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), Timestamp('2011-01-02 10:00', tz='US/Eastern')], name='idx') @@ -249,7 +251,7 @@ def test_construction_dti_with_mixed_timezones(self): Timestamp('2011-01-02 10:00', tz='US/Eastern')], tz='Asia/Tokyo', name='idx') - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): DatetimeIndex([Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), Timestamp('2011-01-02 10:00', tz='US/Eastern')], tz='US/Eastern', name='idx') @@ -280,7 +282,7 @@ def test_construction_outofbounds(self): # coerces to object tm.assert_index_equal(Index(dates), exp) - with tm.assertRaises(OutOfBoundsDatetime): + with pytest.raises(OutOfBoundsDatetime): # can't create DatetimeIndex DatetimeIndex(dates) @@ -302,13 +304,13 @@ def test_constructor_coverage(self): exp = date_range('1/1/2000', periods=10) tm.assert_index_equal(rng, exp) - self.assertRaises(ValueError, DatetimeIndex, start='1/1/2000', - periods='foo', freq='D') + pytest.raises(ValueError, DatetimeIndex, start='1/1/2000', + periods='foo', freq='D') - self.assertRaises(ValueError, DatetimeIndex, start='1/1/2000', - end='1/10/2000') + pytest.raises(ValueError, DatetimeIndex, start='1/1/2000', + end='1/10/2000') - self.assertRaises(ValueError, DatetimeIndex, '1/1/2000') + pytest.raises(ValueError, DatetimeIndex, '1/1/2000') # generator expression gen = (datetime(2000, 1, 1) + timedelta(i) for i in range(10)) @@ -336,14 +338,14 @@ def test_constructor_coverage(self): tm.assert_index_equal(from_ints, expected) # non-conforming - self.assertRaises(ValueError, DatetimeIndex, - ['2000-01-01', '2000-01-02', '2000-01-04'], freq='D') + pytest.raises(ValueError, DatetimeIndex, + ['2000-01-01', '2000-01-02', '2000-01-04'], freq='D') - self.assertRaises(ValueError, DatetimeIndex, start='2011-01-01', - freq='b') - self.assertRaises(ValueError, DatetimeIndex, end='2011-01-01', - freq='B') - self.assertRaises(ValueError, DatetimeIndex, periods=10, freq='D') + pytest.raises(ValueError, DatetimeIndex, start='2011-01-01', + freq='b') + pytest.raises(ValueError, DatetimeIndex, end='2011-01-01', + freq='B') + pytest.raises(ValueError, DatetimeIndex, periods=10, freq='D') def test_constructor_datetime64_tzformat(self): # GH 6572 @@ -414,18 +416,18 @@ def test_constructor_dtype(self): idx = DatetimeIndex(['2013-01-01', '2013-01-02'], dtype='datetime64[ns, US/Eastern]') - self.assertRaises(ValueError, - lambda: DatetimeIndex(idx, - dtype='datetime64[ns]')) + pytest.raises(ValueError, + lambda: DatetimeIndex(idx, + dtype='datetime64[ns]')) # this is effectively trying to convert tz's - self.assertRaises(TypeError, - lambda: DatetimeIndex(idx, - dtype='datetime64[ns, CET]')) - self.assertRaises(ValueError, - lambda: DatetimeIndex( - idx, tz='CET', - dtype='datetime64[ns, US/Eastern]')) + pytest.raises(TypeError, + lambda: DatetimeIndex(idx, + dtype='datetime64[ns, CET]')) + pytest.raises(ValueError, + lambda: DatetimeIndex( + idx, tz='CET', + dtype='datetime64[ns, US/Eastern]')) result = DatetimeIndex(idx, dtype='datetime64[ns, US/Eastern]') tm.assert_index_equal(idx, result) @@ -527,7 +529,7 @@ def test_from_freq_recreate_from_data(self): def test_datetimeindex_constructor_misc(self): arr = ['1/1/2005', '1/2/2005', 'Jn 3, 2005', '2005-01-04'] - self.assertRaises(Exception, DatetimeIndex, arr) + pytest.raises(Exception, DatetimeIndex, arr) arr = ['1/1/2005', '1/2/2005', '1/3/2005', '2005-01-04'] idx1 = DatetimeIndex(arr) diff --git a/pandas/tests/indexes/datetimes/test_date_range.py b/pandas/tests/indexes/datetimes/test_date_range.py index 3c00fb32649bb..665dcc45050f3 100644 --- a/pandas/tests/indexes/datetimes/test_date_range.py +++ b/pandas/tests/indexes/datetimes/test_date_range.py @@ -3,6 +3,8 @@ construction from the convenience range functions """ +import pytest + import numpy as np from datetime import datetime, timedelta, time @@ -79,8 +81,8 @@ def test_date_range_ambiguous_arguments(self): start = datetime(2011, 1, 1, 5, 3, 40) end = datetime(2011, 1, 1, 8, 9, 40) - self.assertRaises(ValueError, date_range, start, end, freq='s', - periods=10) + pytest.raises(ValueError, date_range, start, end, freq='s', + periods=10) def test_date_range_businesshour(self): idx = DatetimeIndex(['2014-07-04 09:00', '2014-07-04 10:00', @@ -119,13 +121,13 @@ def test_date_range_businesshour(self): def test_range_misspecified(self): # GH #1095 - self.assertRaises(ValueError, date_range, '1/1/2000') - self.assertRaises(ValueError, date_range, end='1/1/2000') - self.assertRaises(ValueError, date_range, periods=10) + pytest.raises(ValueError, date_range, '1/1/2000') + pytest.raises(ValueError, date_range, end='1/1/2000') + pytest.raises(ValueError, date_range, periods=10) - self.assertRaises(ValueError, date_range, '1/1/2000', freq='H') - self.assertRaises(ValueError, date_range, end='1/1/2000', freq='H') - self.assertRaises(ValueError, date_range, periods=10, freq='H') + pytest.raises(ValueError, date_range, '1/1/2000', freq='H') + pytest.raises(ValueError, date_range, end='1/1/2000', freq='H') + pytest.raises(ValueError, date_range, periods=10, freq='H') def test_compat_replace(self): # https://github.com/statsmodels/statsmodels/issues/3349 @@ -141,8 +143,8 @@ def test_compat_replace(self): def test_catch_infinite_loop(self): offset = offsets.DateOffset(minute=5) # blow up, don't loop forever - self.assertRaises(Exception, date_range, datetime(2011, 11, 11), - datetime(2011, 11, 12), freq=offset) + pytest.raises(Exception, date_range, datetime(2011, 11, 11), + datetime(2011, 11, 12), freq=offset) class TestGenRangeGeneration(tm.TestCase): @@ -203,8 +205,8 @@ def test_constructor(self): bdate_range(START, END, freq=BDay()) bdate_range(START, periods=20, freq=BDay()) bdate_range(end=START, periods=20, freq=BDay()) - self.assertRaises(ValueError, date_range, '2011-1-1', '2012-1-1', 'B') - self.assertRaises(ValueError, bdate_range, '2011-1-1', '2012-1-1', 'B') + pytest.raises(ValueError, date_range, '2011-1-1', '2012-1-1', 'B') + pytest.raises(ValueError, bdate_range, '2011-1-1', '2012-1-1', 'B') def test_naive_aware_conflicts(self): naive = bdate_range(START, END, freq=BDay(), tz=None) @@ -250,8 +252,8 @@ def test_timezone_comparaison_bug(self): def test_timezone_comparaison_assert(self): start = Timestamp('20130220 10:00', tz='US/Eastern') - self.assertRaises(AssertionError, date_range, start, periods=2, - tz='Europe/Berlin') + pytest.raises(AssertionError, date_range, start, periods=2, + tz='Europe/Berlin') def test_misc(self): end = datetime(2009, 5, 13) @@ -265,14 +267,14 @@ def test_misc(self): def test_date_parse_failure(self): badly_formed_date = '2007/100/1' - self.assertRaises(ValueError, Timestamp, badly_formed_date) + pytest.raises(ValueError, Timestamp, badly_formed_date) - self.assertRaises(ValueError, bdate_range, start=badly_formed_date, - periods=10) - self.assertRaises(ValueError, bdate_range, end=badly_formed_date, - periods=10) - self.assertRaises(ValueError, bdate_range, badly_formed_date, - badly_formed_date) + pytest.raises(ValueError, bdate_range, start=badly_formed_date, + periods=10) + pytest.raises(ValueError, bdate_range, end=badly_formed_date, + periods=10) + pytest.raises(ValueError, bdate_range, badly_formed_date, + badly_formed_date) def test_daterange_bug_456(self): # GH #456 @@ -284,8 +286,8 @@ def test_daterange_bug_456(self): assert isinstance(result, DatetimeIndex) def test_error_with_zero_monthends(self): - self.assertRaises(ValueError, date_range, '1/1/2000', '1/1/2001', - freq=MonthEnd(0)) + pytest.raises(ValueError, date_range, '1/1/2000', '1/1/2001', + freq=MonthEnd(0)) def test_range_bug(self): # GH #770 @@ -488,8 +490,8 @@ def test_constructor(self): cdate_range(START, END, freq=CDay()) cdate_range(START, periods=20, freq=CDay()) cdate_range(end=START, periods=20, freq=CDay()) - self.assertRaises(ValueError, date_range, '2011-1-1', '2012-1-1', 'C') - self.assertRaises(ValueError, cdate_range, '2011-1-1', '2012-1-1', 'C') + pytest.raises(ValueError, date_range, '2011-1-1', '2012-1-1', 'C') + pytest.raises(ValueError, cdate_range, '2011-1-1', '2012-1-1', 'C') def test_cached_range(self): DatetimeIndex._cached_range(START, END, offset=CDay()) @@ -498,16 +500,16 @@ def test_cached_range(self): DatetimeIndex._cached_range(end=START, periods=20, offset=CDay()) - self.assertRaises(Exception, DatetimeIndex._cached_range, START, END) + pytest.raises(Exception, DatetimeIndex._cached_range, START, END) - self.assertRaises(Exception, DatetimeIndex._cached_range, START, - freq=CDay()) + pytest.raises(Exception, DatetimeIndex._cached_range, START, + freq=CDay()) - self.assertRaises(Exception, DatetimeIndex._cached_range, end=END, - freq=CDay()) + pytest.raises(Exception, DatetimeIndex._cached_range, end=END, + freq=CDay()) - self.assertRaises(Exception, DatetimeIndex._cached_range, periods=20, - freq=CDay()) + pytest.raises(Exception, DatetimeIndex._cached_range, periods=20, + freq=CDay()) def test_misc(self): end = datetime(2009, 5, 13) @@ -521,14 +523,14 @@ def test_misc(self): def test_date_parse_failure(self): badly_formed_date = '2007/100/1' - self.assertRaises(ValueError, Timestamp, badly_formed_date) + pytest.raises(ValueError, Timestamp, badly_formed_date) - self.assertRaises(ValueError, cdate_range, start=badly_formed_date, - periods=10) - self.assertRaises(ValueError, cdate_range, end=badly_formed_date, - periods=10) - self.assertRaises(ValueError, cdate_range, badly_formed_date, - badly_formed_date) + pytest.raises(ValueError, cdate_range, start=badly_formed_date, + periods=10) + pytest.raises(ValueError, cdate_range, end=badly_formed_date, + periods=10) + pytest.raises(ValueError, cdate_range, badly_formed_date, + badly_formed_date) def test_daterange_bug_456(self): # GH #456 diff --git a/pandas/tests/indexes/datetimes/test_datetime.py b/pandas/tests/indexes/datetimes/test_datetime.py index cd7e8cb782f12..7cef5eeb94915 100644 --- a/pandas/tests/indexes/datetimes/test_datetime.py +++ b/pandas/tests/indexes/datetimes/test_datetime.py @@ -1,3 +1,5 @@ +import pytest + import numpy as np from datetime import date, timedelta, time @@ -40,7 +42,7 @@ def test_get_loc(self): tolerance=timedelta(1)), 1) with tm.assertRaisesRegexp(ValueError, 'must be convertible'): idx.get_loc('2000-01-01T12', method='nearest', tolerance='foo') - with tm.assertRaises(KeyError): + with pytest.raises(KeyError): idx.get_loc('2000-01-01T03', method='nearest', tolerance='2 hours') self.assertEqual(idx.get_loc('2000', method='nearest'), slice(0, 3)) @@ -49,14 +51,14 @@ def test_get_loc(self): self.assertEqual(idx.get_loc('1999', method='nearest'), 0) self.assertEqual(idx.get_loc('2001', method='nearest'), 2) - with tm.assertRaises(KeyError): + with pytest.raises(KeyError): idx.get_loc('1999', method='pad') - with tm.assertRaises(KeyError): + with pytest.raises(KeyError): idx.get_loc('2001', method='backfill') - with tm.assertRaises(KeyError): + with pytest.raises(KeyError): idx.get_loc('foobar') - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): idx.get_loc(slice(2)) idx = pd.to_datetime(['2000-01-01', '2000-01-04']) @@ -70,7 +72,7 @@ def test_get_loc(self): np.array([12]), check_dtype=False) tm.assert_numpy_array_equal(idx.get_loc(time(12, 30)), np.array([]), check_dtype=False) - with tm.assertRaises(NotImplementedError): + with pytest.raises(NotImplementedError): idx.get_loc(time(12, 30), method='pad') def test_get_indexer(self): @@ -90,7 +92,7 @@ def test_get_indexer(self): idx.get_indexer(target, 'nearest', tolerance=pd.Timedelta('1 hour')), np.array([0, -1, 1], dtype=np.intp)) - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): idx.get_indexer(idx[[0]], method='nearest', tolerance='foo') def test_reasonable_keyerror(self): @@ -234,7 +236,7 @@ def test_append_join_nondatetimeindex(self): def test_to_period_nofreq(self): idx = DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-04']) - self.assertRaises(ValueError, idx.to_period) + pytest.raises(ValueError, idx.to_period) idx = DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-03'], freq='infer') @@ -252,7 +254,7 @@ def test_comparisons_coverage(self): rng = date_range('1/1/2000', periods=10) # raise TypeError for now - self.assertRaises(TypeError, rng.__lt__, rng[3].value) + pytest.raises(TypeError, rng.__lt__, rng[3].value) result = rng == list(rng) exp = rng == rng @@ -422,15 +424,15 @@ def test_overflow_offset(self): def f(): t + offset - self.assertRaises(OverflowError, f) + pytest.raises(OverflowError, f) def f(): offset + t - self.assertRaises(OverflowError, f) + pytest.raises(OverflowError, f) def f(): t - offset - self.assertRaises(OverflowError, f) + pytest.raises(OverflowError, f) def test_get_duplicates(self): idx = DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-02', @@ -511,7 +513,7 @@ def test_take_fill_value(self): with tm.assertRaisesRegexp(ValueError, msg): idx.take(np.array([1, 0, -5]), fill_value=True) - with tm.assertRaises(IndexError): + with pytest.raises(IndexError): idx.take(np.array([1, -5])) def test_take_fill_value_with_timezone(self): @@ -542,7 +544,7 @@ def test_take_fill_value_with_timezone(self): with tm.assertRaisesRegexp(ValueError, msg): idx.take(np.array([1, 0, -5]), fill_value=True) - with tm.assertRaises(IndexError): + with pytest.raises(IndexError): idx.take(np.array([1, -5])) def test_map_bug_1677(self): diff --git a/pandas/tests/indexes/datetimes/test_indexing.py b/pandas/tests/indexes/datetimes/test_indexing.py index 3cb96eb4e4051..568e045d9f5e7 100644 --- a/pandas/tests/indexes/datetimes/test_indexing.py +++ b/pandas/tests/indexes/datetimes/test_indexing.py @@ -167,7 +167,7 @@ def test_delete(self): self.assertEqual(result.name, expected.name) self.assertEqual(result.freq, expected.freq) - with tm.assertRaises((IndexError, ValueError)): + with pytest.raises((IndexError, ValueError)): # either depeidnig on numpy version result = idx.delete(5) diff --git a/pandas/tests/indexes/datetimes/test_misc.py b/pandas/tests/indexes/datetimes/test_misc.py index d7507360506ca..4c7235fea63e8 100644 --- a/pandas/tests/indexes/datetimes/test_misc.py +++ b/pandas/tests/indexes/datetimes/test_misc.py @@ -1,5 +1,6 @@ -import numpy as np +import pytest +import numpy as np import pandas as pd import pandas.util.testing as tm from pandas import (Index, DatetimeIndex, datetime, offsets, @@ -291,7 +292,7 @@ def test_datetimeindex_accessors(self): # CBD requires np >= 1.7 bday_egypt = offsets.CustomBusinessDay(weekmask='Sun Mon Tue Wed Thu') dti = date_range(datetime(2013, 4, 30), periods=5, freq=bday_egypt) - self.assertRaises(ValueError, lambda: dti.is_month_start) + pytest.raises(ValueError, lambda: dti.is_month_start) dti = DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-03']) diff --git a/pandas/tests/indexes/datetimes/test_ops.py b/pandas/tests/indexes/datetimes/test_ops.py index f04207caef6ab..2eff8a12dee77 100644 --- a/pandas/tests/indexes/datetimes/test_ops.py +++ b/pandas/tests/indexes/datetimes/test_ops.py @@ -41,14 +41,14 @@ def test_ops_properties_basic(self): # sanity check that the behavior didn't change # GH7206 for op in ['year', 'day', 'second', 'weekday']: - self.assertRaises(TypeError, lambda x: getattr(self.dt_series, op)) + pytest.raises(TypeError, lambda x: getattr(self.dt_series, op)) # attribute access should still work! s = Series(dict(year=2000, month=1, day=10)) self.assertEqual(s.year, 2000) self.assertEqual(s.month, 1) self.assertEqual(s.day, 10) - self.assertRaises(AttributeError, lambda: s.weekday) + pytest.raises(AttributeError, lambda: s.weekday) def test_asobject_tolist(self): idx = pd.date_range(start='2013-01-01', periods=4, freq='M', @@ -446,16 +446,16 @@ def test_add_dti_dti(self): dti = date_range('20130101', periods=3) dti_tz = date_range('20130101', periods=3).tz_localize('US/Eastern') - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): dti + dti - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): dti_tz + dti_tz - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): dti_tz + dti - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): dti + dti_tz def test_difference(self): @@ -521,13 +521,13 @@ def test_sub_dti_dti(self): result = dti_tz - dti_tz tm.assert_index_equal(result, expected) - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): dti_tz - dti - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): dti - dti_tz - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): dti_tz - dti_tz2 # isub @@ -537,7 +537,7 @@ def test_sub_dti_dti(self): # different length raises ValueError dti1 = date_range('20130101', periods=3) dti2 = date_range('20130101', periods=4) - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): dti1 - dti2 # NaN propagation @@ -555,10 +555,10 @@ def test_sub_period(self): for freq in [None, 'D']: idx = pd.DatetimeIndex(['2011-01-01', '2011-01-02'], freq=freq) - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): idx - p - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): p - idx def test_comp_nat(self): diff --git a/pandas/tests/indexes/datetimes/test_partial_slicing.py b/pandas/tests/indexes/datetimes/test_partial_slicing.py index ee704d0b29ddd..9c41e2d823238 100644 --- a/pandas/tests/indexes/datetimes/test_partial_slicing.py +++ b/pandas/tests/indexes/datetimes/test_partial_slicing.py @@ -1,4 +1,7 @@ """ test partial slicing on Series/Frame """ + +import pytest + from datetime import datetime import numpy as np import pandas as pd @@ -67,7 +70,7 @@ def test_partial_slice(self): result = s['2005-1-1'] self.assertEqual(result, s.iloc[0]) - self.assertRaises(Exception, s.__getitem__, '2004-12-31') + pytest.raises(Exception, s.__getitem__, '2004-12-31') def test_partial_slice_daily(self): rng = DatetimeIndex(freq='H', start=datetime(2005, 1, 31), periods=500) @@ -76,7 +79,7 @@ def test_partial_slice_daily(self): result = s['2005-1-31'] tm.assert_series_equal(result, s.iloc[:24]) - self.assertRaises(Exception, s.__getitem__, '2004-12-31 00') + pytest.raises(Exception, s.__getitem__, '2004-12-31 00') def test_partial_slice_hourly(self): rng = DatetimeIndex(freq='T', start=datetime(2005, 1, 1, 20, 0, 0), @@ -90,7 +93,7 @@ def test_partial_slice_hourly(self): tm.assert_series_equal(result, s.iloc[:60]) self.assertEqual(s['2005-1-1 20:00'], s.iloc[0]) - self.assertRaises(Exception, s.__getitem__, '2004-12-31 00:15') + pytest.raises(Exception, s.__getitem__, '2004-12-31 00:15') def test_partial_slice_minutely(self): rng = DatetimeIndex(freq='S', start=datetime(2005, 1, 1, 23, 59, 0), @@ -104,7 +107,7 @@ def test_partial_slice_minutely(self): tm.assert_series_equal(result, s.iloc[:60]) self.assertEqual(s[Timestamp('2005-1-1 23:59:00')], s.iloc[0]) - self.assertRaises(Exception, s.__getitem__, '2004-12-31 00:00:00') + pytest.raises(Exception, s.__getitem__, '2004-12-31 00:00:00') def test_partial_slice_second_precision(self): rng = DatetimeIndex(start=datetime(2005, 1, 1, 0, 0, 59, @@ -152,7 +155,7 @@ def test_partial_slicing_dataframe(self): result = df['a'][ts_string] assert isinstance(result, np.int64) self.assertEqual(result, expected) - self.assertRaises(KeyError, df.__getitem__, ts_string) + pytest.raises(KeyError, df.__getitem__, ts_string) # Timestamp with resolution less precise than index for fmt in formats[:rnum]: @@ -179,15 +182,15 @@ def test_partial_slicing_dataframe(self): result = df['a'][ts_string] assert isinstance(result, np.int64) self.assertEqual(result, 2) - self.assertRaises(KeyError, df.__getitem__, ts_string) + pytest.raises(KeyError, df.__getitem__, ts_string) # Not compatible with existing key # Should raise KeyError for fmt, res in list(zip(formats, resolutions))[rnum + 1:]: ts = index[1] + Timedelta("1 " + res) ts_string = ts.strftime(fmt) - self.assertRaises(KeyError, df['a'].__getitem__, ts_string) - self.assertRaises(KeyError, df.__getitem__, ts_string) + pytest.raises(KeyError, df['a'].__getitem__, ts_string) + pytest.raises(KeyError, df.__getitem__, ts_string) def test_partial_slicing_with_multiindex(self): @@ -216,7 +219,7 @@ def test_partial_slicing_with_multiindex(self): def f(): df_multi.loc[('2013-06-19', 'ACCT1', 'ABC')] - self.assertRaises(KeyError, f) + pytest.raises(KeyError, f) # GH 4294 # partial slice on a series mi diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index faebd4b2761f1..b7427f1935a8c 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -184,8 +184,8 @@ def test_to_datetime_dt64s(self): oob_dts = [np.datetime64('1000-01-01'), np.datetime64('5000-01-02'), ] for dt in oob_dts: - self.assertRaises(ValueError, pd.to_datetime, dt, errors='raise') - self.assertRaises(ValueError, Timestamp, dt) + pytest.raises(ValueError, pd.to_datetime, dt, errors='raise') + pytest.raises(ValueError, Timestamp, dt) self.assertIs(pd.to_datetime(dt, errors='coerce'), NaT) def test_to_datetime_array_of_dt64s(self): @@ -201,8 +201,8 @@ def test_to_datetime_array_of_dt64s(self): # A list of datetimes where the last one is out of bounds dts_with_oob = dts + [np.datetime64('9999-01-01')] - self.assertRaises(ValueError, pd.to_datetime, dts_with_oob, - errors='raise') + pytest.raises(ValueError, pd.to_datetime, dts_with_oob, + errors='raise') tm.assert_numpy_array_equal( pd.to_datetime(dts_with_oob, box=False, errors='coerce'), @@ -241,7 +241,7 @@ def test_to_datetime_tz(self): # mixed tzs will raise arr = [pd.Timestamp('2013-01-01 13:00:00', tz='US/Pacific'), pd.Timestamp('2013-01-02 14:00:00', tz='US/Eastern')] - self.assertRaises(ValueError, lambda: pd.to_datetime(arr)) + pytest.raises(ValueError, lambda: pd.to_datetime(arr)) def test_to_datetime_tz_pytz(self): @@ -309,17 +309,17 @@ def test_to_datetime_tz_psycopg2(self): def test_datetime_bool(self): # GH13176 - with self.assertRaises(TypeError): + with pytest.raises(TypeError): to_datetime(False) self.assertTrue(to_datetime(False, errors="coerce") is NaT) self.assertEqual(to_datetime(False, errors="ignore"), False) - with self.assertRaises(TypeError): + with pytest.raises(TypeError): to_datetime(True) self.assertTrue(to_datetime(True, errors="coerce") is NaT) self.assertEqual(to_datetime(True, errors="ignore"), True) - with self.assertRaises(TypeError): + with pytest.raises(TypeError): to_datetime([False, datetime.today()]) - with self.assertRaises(TypeError): + with pytest.raises(TypeError): to_datetime(['20130101', True]) tm.assert_index_equal(to_datetime([0, False, NaT, 0.0], errors="coerce"), @@ -329,9 +329,9 @@ def test_datetime_bool(self): def test_datetime_invalid_datatype(self): # GH13176 - with self.assertRaises(TypeError): + with pytest.raises(TypeError): pd.to_datetime(bool) - with self.assertRaises(TypeError): + with pytest.raises(TypeError): pd.to_datetime(pd.to_datetime) @@ -341,7 +341,7 @@ def test_unit(self): # GH 11758 # test proper behavior with erros - with self.assertRaises(ValueError): + with pytest.raises(ValueError): to_datetime([1], unit='D', format='%Y%m%d') values = [11111111, 1, 1.0, tslib.iNaT, NaT, np.nan, @@ -358,7 +358,7 @@ def test_unit(self): 'NaT', 'NaT', 'NaT', 'NaT', 'NaT']) tm.assert_index_equal(result, expected) - with self.assertRaises(tslib.OutOfBoundsDatetime): + with pytest.raises(tslib.OutOfBoundsDatetime): to_datetime(values, unit='D', errors='raise') values = [1420043460000, tslib.iNaT, NaT, np.nan, 'NaT'] @@ -372,7 +372,7 @@ def test_unit(self): expected = DatetimeIndex(['NaT', 'NaT', 'NaT', 'NaT', 'NaT']) tm.assert_index_equal(result, expected) - with self.assertRaises(tslib.OutOfBoundsDatetime): + with pytest.raises(tslib.OutOfBoundsDatetime): to_datetime(values, errors='raise', unit='s') # if we have a string, then we raise a ValueError @@ -441,7 +441,7 @@ def test_unit_mixed(self): result = pd.to_datetime(arr, errors='coerce') tm.assert_index_equal(result, expected) - with self.assertRaises(ValueError): + with pytest.raises(ValueError): pd.to_datetime(arr, errors='raise') expected = DatetimeIndex(['NaT', @@ -451,7 +451,7 @@ def test_unit_mixed(self): result = pd.to_datetime(arr, errors='coerce') tm.assert_index_equal(result, expected) - with self.assertRaises(ValueError): + with pytest.raises(ValueError): pd.to_datetime(arr, errors='raise') def test_dataframe(self): @@ -529,7 +529,7 @@ def test_dataframe(self): df2 = DataFrame({'year': [2015, 2016], 'month': [2, 20], 'day': [4, 5]}) - with self.assertRaises(ValueError): + with pytest.raises(ValueError): to_datetime(df2) result = to_datetime(df2, errors='coerce') expected = Series([Timestamp('20150204 00:00:00'), @@ -537,7 +537,7 @@ def test_dataframe(self): assert_series_equal(result, expected) # extra columns - with self.assertRaises(ValueError): + with pytest.raises(ValueError): df2 = df.copy() df2['foo'] = 1 to_datetime(df2) @@ -548,7 +548,7 @@ def test_dataframe(self): ['year', 'month', 'second'], ['month', 'day'], ['year', 'day', 'second']]: - with self.assertRaises(ValueError): + with pytest.raises(ValueError): to_datetime(df[c]) # duplicates @@ -556,7 +556,7 @@ def test_dataframe(self): 'month': [2, 20], 'day': [4, 5]}) df2.columns = ['year', 'year', 'day'] - with self.assertRaises(ValueError): + with pytest.raises(ValueError): to_datetime(df2) df2 = DataFrame({'year': [2015, 2016], @@ -564,7 +564,7 @@ def test_dataframe(self): 'day': [4, 5], 'hour': [4, 5]}) df2.columns = ['year', 'month', 'day', 'day'] - with self.assertRaises(ValueError): + with pytest.raises(ValueError): to_datetime(df2) def test_dataframe_dtypes(self): @@ -591,7 +591,7 @@ def test_dataframe_dtypes(self): df = DataFrame({'year': [2000, 2001], 'month': [1.5, 1], 'day': [1, 1]}) - with self.assertRaises(ValueError): + with pytest.raises(ValueError): to_datetime(df) @@ -631,7 +631,7 @@ def test_to_datetime_default(self): # dayfirst is essentially broken # to_datetime('01-13-2012', dayfirst=True) - # self.assertRaises(ValueError, to_datetime('01-13-2012', + # pytest.raises(ValueError, to_datetime('01-13-2012', # dayfirst=True)) def test_to_datetime_on_datetime64_series(self): @@ -644,7 +644,7 @@ def test_to_datetime_on_datetime64_series(self): def test_to_datetime_with_space_in_series(self): # GH 6428 s = Series(['10/18/2006', '10/18/2008', ' ']) - tm.assertRaises(ValueError, lambda: to_datetime(s, errors='raise')) + pytest.raises(ValueError, lambda: to_datetime(s, errors='raise')) result_coerce = to_datetime(s, errors='coerce') expected_coerce = Series([datetime(2006, 10, 18), datetime(2008, 10, 18), @@ -665,12 +665,12 @@ def test_to_datetime_with_apply(self): assert_series_equal(result, expected) td = pd.Series(['May 04', 'Jun 02', ''], index=[1, 2, 3]) - self.assertRaises(ValueError, - lambda: pd.to_datetime(td, format='%b %y', - errors='raise')) - self.assertRaises(ValueError, - lambda: td.apply(pd.to_datetime, format='%b %y', - errors='raise')) + pytest.raises(ValueError, + lambda: pd.to_datetime(td, format='%b %y', + errors='raise')) + pytest.raises(ValueError, + lambda: td.apply(pd.to_datetime, format='%b %y', + errors='raise')) expected = pd.to_datetime(td, format='%b %y', errors='coerce') result = td.apply( @@ -713,7 +713,7 @@ def test_to_datetime_unprocessable_input(self): to_datetime([1, '1'], errors='ignore'), np.array([1, '1'], dtype='O') ) - self.assertRaises(TypeError, to_datetime, [1, '1'], errors='raise') + pytest.raises(TypeError, to_datetime, [1, '1'], errors='raise') def test_to_datetime_other_datetime64_units(self): # 5/25/2012 @@ -767,13 +767,13 @@ def test_string_na_nat_conversion(self): malformed = np.array(['1/100/2000', np.nan], dtype=object) # GH 10636, default is now 'raise' - self.assertRaises(ValueError, - lambda: to_datetime(malformed, errors='raise')) + pytest.raises(ValueError, + lambda: to_datetime(malformed, errors='raise')) result = to_datetime(malformed, errors='ignore') tm.assert_numpy_array_equal(result, malformed) - self.assertRaises(ValueError, to_datetime, malformed, errors='raise') + pytest.raises(ValueError, to_datetime, malformed, errors='raise') idx = ['a', 'b', 'c', 'd', 'e'] series = Series(['1/1/2000', np.nan, '1/3/2000', np.nan, @@ -1002,14 +1002,14 @@ def test_day_not_in_month_coerce(self): errors='coerce'))) def test_day_not_in_month_raise(self): - self.assertRaises(ValueError, to_datetime, '2015-02-29', - errors='raise') - self.assertRaises(ValueError, to_datetime, '2015-02-29', - errors='raise', format="%Y-%m-%d") - self.assertRaises(ValueError, to_datetime, '2015-02-32', - errors='raise', format="%Y-%m-%d") - self.assertRaises(ValueError, to_datetime, '2015-04-31', - errors='raise', format="%Y-%m-%d") + pytest.raises(ValueError, to_datetime, '2015-02-29', + errors='raise') + pytest.raises(ValueError, to_datetime, '2015-02-29', + errors='raise', format="%Y-%m-%d") + pytest.raises(ValueError, to_datetime, '2015-02-32', + errors='raise', format="%Y-%m-%d") + pytest.raises(ValueError, to_datetime, '2015-04-31', + errors='raise', format="%Y-%m-%d") def test_day_not_in_month_ignore(self): self.assertEqual(to_datetime( @@ -1139,7 +1139,7 @@ def test_parsers_quarter_invalid(self): cases = ['2Q 2005', '2Q-200A', '2Q-200', '22Q2005', '6Q-20', '2Q200.'] for case in cases: - self.assertRaises(ValueError, tools.parse_time_string, case) + pytest.raises(ValueError, tools.parse_time_string, case) def test_parsers_dayfirst_yearfirst(self): tm._skip_if_no_dateutil() @@ -1272,7 +1272,7 @@ def test_parsers_time(self): self.assertEqual(tools.to_time(time_string), expected) new_string = "14.15" - self.assertRaises(ValueError, tools.to_time, new_string) + pytest.raises(ValueError, tools.to_time, new_string) self.assertEqual(tools.to_time(new_string, format="%H.%M"), expected) arg = ["14:15", "20:20"] @@ -1287,7 +1287,7 @@ def test_parsers_time(self): res = tools.to_time(arg, format="%I:%M%p", errors="ignore") tm.assert_numpy_array_equal(res, np.array(arg, dtype=np.object_)) - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): tools.to_time(arg, format="%I:%M%p", errors="raise") tm.assert_series_equal(tools.to_time(Series(arg, name="test")), @@ -1375,7 +1375,7 @@ def test_parsers_iso8601(self): # wrong separator for HHMMSS '2001-01-01 12-34-56'] for date_str in invalid_cases: - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): tslib._test_parse_iso8601(date_str) # If no ValueError raised, let me know which case failed. raise Exception(date_str) @@ -1459,11 +1459,10 @@ def test_coercing_dates_outside_of_datetime64_ns_bounds(self): ] for invalid_date in invalid_dates: - self.assertRaises(ValueError, - tslib.array_to_datetime, - np.array( - [invalid_date], dtype='object'), - errors='raise', ) + pytest.raises(ValueError, + tslib.array_to_datetime, + np.array([invalid_date], dtype='object'), + errors='raise', ) tm.assert_numpy_array_equal( tslib.array_to_datetime( np.array([invalid_date], dtype='object'), diff --git a/pandas/tests/indexes/period/test_asfreq.py b/pandas/tests/indexes/period/test_asfreq.py index adf68605b65db..4d1fe9c46f126 100644 --- a/pandas/tests/indexes/period/test_asfreq.py +++ b/pandas/tests/indexes/period/test_asfreq.py @@ -1,5 +1,6 @@ -import numpy as np +import pytest +import numpy as np import pandas as pd from pandas.util import testing as tm from pandas import PeriodIndex, Series, DataFrame @@ -69,7 +70,7 @@ def test_asfreq(self): self.assertEqual(pi7.asfreq('H', 'S'), pi5) self.assertEqual(pi7.asfreq('Min', 'S'), pi6) - self.assertRaises(ValueError, pi7.asfreq, 'T', 'foo') + pytest.raises(ValueError, pi7.asfreq, 'T', 'foo') result1 = pi1.asfreq('3M') result2 = pi1.asfreq('M') expected = PeriodIndex(freq='M', start='2001-12', end='2001-12') diff --git a/pandas/tests/indexes/period/test_construction.py b/pandas/tests/indexes/period/test_construction.py index 4e53ac56ec187..8f2b03829b128 100644 --- a/pandas/tests/indexes/period/test_construction.py +++ b/pandas/tests/indexes/period/test_construction.py @@ -1,5 +1,6 @@ -import numpy as np +import pytest +import numpy as np import pandas as pd import pandas.util.testing as tm import pandas.core.indexes.period as period @@ -58,12 +59,12 @@ def test_constructor_field_arrays(self): years = [2007, 2007, 2007] months = [1, 2] - self.assertRaises(ValueError, PeriodIndex, year=years, month=months, - freq='M') - self.assertRaises(ValueError, PeriodIndex, year=years, month=months, - freq='2M') - self.assertRaises(ValueError, PeriodIndex, year=years, month=months, - freq='M', start=Period('2007-01', freq='M')) + pytest.raises(ValueError, PeriodIndex, year=years, month=months, + freq='M') + pytest.raises(ValueError, PeriodIndex, year=years, month=months, + freq='2M') + pytest.raises(ValueError, PeriodIndex, year=years, month=months, + freq='M', start=Period('2007-01', freq='M')) years = [2007, 2007, 2007] months = [1, 2, 3] @@ -73,8 +74,8 @@ def test_constructor_field_arrays(self): def test_constructor_U(self): # U was used as undefined period - self.assertRaises(ValueError, period_range, '2007-1-1', periods=500, - freq='X') + pytest.raises(ValueError, period_range, '2007-1-1', periods=500, + freq='X') def test_constructor_nano(self): idx = period_range(start=Period(ordinal=1, freq='N'), @@ -95,17 +96,17 @@ def test_constructor_arrays_negative_year(self): tm.assert_index_equal(pindex.quarter, pd.Index(quarters)) def test_constructor_invalid_quarters(self): - self.assertRaises(ValueError, PeriodIndex, year=lrange(2000, 2004), - quarter=lrange(4), freq='Q-DEC') + pytest.raises(ValueError, PeriodIndex, year=lrange(2000, 2004), + quarter=lrange(4), freq='Q-DEC') def test_constructor_corner(self): - self.assertRaises(ValueError, PeriodIndex, periods=10, freq='A') + pytest.raises(ValueError, PeriodIndex, periods=10, freq='A') start = Period('2007', freq='A-JUN') end = Period('2010', freq='A-DEC') - self.assertRaises(ValueError, PeriodIndex, start=start, end=end) - self.assertRaises(ValueError, PeriodIndex, start=start) - self.assertRaises(ValueError, PeriodIndex, end=end) + pytest.raises(ValueError, PeriodIndex, start=start, end=end) + pytest.raises(ValueError, PeriodIndex, start=start) + pytest.raises(ValueError, PeriodIndex, end=end) result = period_range('2007-01', periods=10.5, freq='M') exp = period_range('2007-01', periods=10, freq='M') @@ -118,10 +119,10 @@ def test_constructor_fromarraylike(self): tm.assert_index_equal(PeriodIndex(idx.values), idx) tm.assert_index_equal(PeriodIndex(list(idx.values)), idx) - self.assertRaises(ValueError, PeriodIndex, idx._values) - self.assertRaises(ValueError, PeriodIndex, list(idx._values)) - self.assertRaises(TypeError, PeriodIndex, - data=Period('2007', freq='A')) + pytest.raises(ValueError, PeriodIndex, idx._values) + pytest.raises(ValueError, PeriodIndex, list(idx._values)) + pytest.raises(TypeError, PeriodIndex, + data=Period('2007', freq='A')) result = PeriodIndex(iter(idx)) tm.assert_index_equal(result, idx) @@ -152,7 +153,7 @@ def test_constructor_datetime64arr(self): vals = np.arange(100000, 100000 + 10000, 100, dtype=np.int64) vals = vals.view(np.dtype('M8[us]')) - self.assertRaises(ValueError, PeriodIndex, vals, freq='D') + pytest.raises(ValueError, PeriodIndex, vals, freq='D') def test_constructor_dtype(self): # passing a dtype with a tz should localize @@ -288,17 +289,17 @@ def test_constructor_simple_new_empty(self): def test_constructor_floats(self): # GH13079 for floats in [[1.1, 2.1], np.array([1.1, 2.1])]: - with self.assertRaises(TypeError): + with pytest.raises(TypeError): pd.PeriodIndex._simple_new(floats, freq='M') - with self.assertRaises(TypeError): + with pytest.raises(TypeError): pd.PeriodIndex(floats, freq='M') def test_constructor_nat(self): - self.assertRaises(ValueError, period_range, start='NaT', - end='2011-01-01', freq='M') - self.assertRaises(ValueError, period_range, start='2011-01-01', - end='NaT', freq='M') + pytest.raises(ValueError, period_range, start='NaT', + end='2011-01-01', freq='M') + pytest.raises(ValueError, period_range, start='2011-01-01', + end='NaT', freq='M') def test_constructor_year_and_quarter(self): year = pd.Series([2001, 2002, 2003]) @@ -427,9 +428,9 @@ def test_constructor(self): # Mixed freq should fail vals = [end_intv, Period('2006-12-31', 'w')] - self.assertRaises(ValueError, PeriodIndex, vals) + pytest.raises(ValueError, PeriodIndex, vals) vals = np.array(vals) - self.assertRaises(ValueError, PeriodIndex, vals) + pytest.raises(ValueError, PeriodIndex, vals) def test_constructor_error(self): start = Period('02-Apr-2005', 'B') @@ -478,7 +479,7 @@ def setUp(self): self.series = Series(period_range('2000-01-01', periods=10, freq='D')) def test_constructor_cant_cast_period(self): - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): Series(period_range('2000-01-01', periods=10, freq='D'), dtype=float) diff --git a/pandas/tests/indexes/period/test_indexing.py b/pandas/tests/indexes/period/test_indexing.py index 419a917415f98..982ac7d96a9cc 100644 --- a/pandas/tests/indexes/period/test_indexing.py +++ b/pandas/tests/indexes/period/test_indexing.py @@ -1,5 +1,7 @@ from datetime import datetime +import pytest + import numpy as np import pandas as pd from pandas.util import testing as tm @@ -76,7 +78,7 @@ def test_getitem_partial(self): rng = period_range('2007-01', periods=50, freq='M') ts = Series(np.random.randn(len(rng)), rng) - self.assertRaises(KeyError, ts.__getitem__, '2006') + pytest.raises(KeyError, ts.__getitem__, '2006') result = ts['2008'] self.assertTrue((result.index.year == 2008).all()) @@ -148,13 +150,13 @@ def test_getitem_seconds(self): '2013/02/01 09:00'] for v in values: if _np_version_under1p9: - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): idx[v] else: # GH7116 # these show deprecations as we are trying # to slice with non-integer indexers - # with tm.assertRaises(IndexError): + # with pytest.raises(IndexError): # idx[v] continue @@ -177,13 +179,13 @@ def test_getitem_day(self): for v in values: if _np_version_under1p9: - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): idx[v] else: # GH7116 # these show deprecations as we are trying # to slice with non-integer indexers - # with tm.assertRaises(IndexError): + # with pytest.raises(IndexError): # idx[v] continue @@ -194,7 +196,7 @@ def test_getitem_day(self): invalid = ['2013/02/01 9H', '2013/02/01 09:00'] for v in invalid: - with tm.assertRaises(KeyError): + with pytest.raises(KeyError): s[v] @@ -203,7 +205,7 @@ class TestIndexing(tm.TestCase): def test_get_loc_msg(self): idx = period_range('2000-1-1', freq='A', periods=10) bad_period = Period('2012', 'A') - self.assertRaises(KeyError, idx.get_loc, bad_period) + pytest.raises(KeyError, idx.get_loc, bad_period) try: idx.get_loc(bad_period) @@ -314,5 +316,5 @@ def test_take_fill_value(self): with tm.assertRaisesRegexp(ValueError, msg): idx.take(np.array([1, 0, -5]), fill_value=True) - with tm.assertRaises(IndexError): + with pytest.raises(IndexError): idx.take(np.array([1, -5])) diff --git a/pandas/tests/indexes/period/test_ops.py b/pandas/tests/indexes/period/test_ops.py index dd91e19c5a9c5..70a27eada7774 100644 --- a/pandas/tests/indexes/period/test_ops.py +++ b/pandas/tests/indexes/period/test_ops.py @@ -1,3 +1,5 @@ +import pytest + import numpy as np from datetime import timedelta @@ -287,10 +289,10 @@ def test_add_iadd(self): other = pd.period_range('1/6/2000', freq='D', periods=5) # previously performed setop union, now raises TypeError (GH14164) - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): rng + other - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): rng += other # offset @@ -392,10 +394,10 @@ def test_sub_isub(self): rng = pd.period_range('1/1/2000', freq='D', periods=5) other = pd.period_range('1/6/2000', freq='D', periods=5) - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): rng - other - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): rng -= other # offset @@ -747,7 +749,7 @@ def test_shift(self): # GH 9903 idx = pd.PeriodIndex([], name='xxx', freq='H') - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): # period shift doesn't accept freq idx.shift(1, freq='H') @@ -885,29 +887,29 @@ def test_pi_ops_errors(self): with tm.assertRaisesRegexp(TypeError, msg): obj + ng - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): # error message differs between PY2 and 3 ng + obj with tm.assertRaisesRegexp(TypeError, msg): obj - ng - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): np.add(obj, ng) if _np_version_under1p10: self.assertIs(np.add(ng, obj), NotImplemented) else: - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): np.add(ng, obj) - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): np.subtract(obj, ng) if _np_version_under1p10: self.assertIs(np.subtract(ng, obj), NotImplemented) else: - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): np.subtract(ng, obj) def test_pi_ops_nat(self): diff --git a/pandas/tests/indexes/period/test_partial_slicing.py b/pandas/tests/indexes/period/test_partial_slicing.py index b051c4a0dcab1..a8ac6a3e55fe5 100644 --- a/pandas/tests/indexes/period/test_partial_slicing.py +++ b/pandas/tests/indexes/period/test_partial_slicing.py @@ -1,3 +1,5 @@ +import pytest + import numpy as np import pandas as pd @@ -75,7 +77,7 @@ def test_range_slice_day(self): values = ['2014', '2013/02', '2013/01/02', '2013/02/01 9H', '2013/02/01 09:00'] for v in values: - with tm.assertRaises(exc): + with pytest.raises(exc): idx[v:] s = Series(np.random.rand(len(idx)), index=idx) @@ -87,7 +89,7 @@ def test_range_slice_day(self): invalid = ['2013/02/01 9H', '2013/02/01 09:00'] for v in invalid: - with tm.assertRaises(exc): + with pytest.raises(exc): idx[v:] def test_range_slice_seconds(self): @@ -105,7 +107,7 @@ def test_range_slice_seconds(self): values = ['2014', '2013/02', '2013/01/02', '2013/02/01 9H', '2013/02/01 09:00'] for v in values: - with tm.assertRaises(exc): + with pytest.raises(exc): idx[v:] s = Series(np.random.rand(len(idx)), index=idx) diff --git a/pandas/tests/indexes/period/test_period.py b/pandas/tests/indexes/period/test_period.py index bb87e780012ce..9f72c10316c90 100644 --- a/pandas/tests/indexes/period/test_period.py +++ b/pandas/tests/indexes/period/test_period.py @@ -47,10 +47,10 @@ def test_astype_raises(self): # GH 13149, GH 13209 idx = PeriodIndex(['2016-05-16', 'NaT', NaT, np.NaN], freq='D') - self.assertRaises(ValueError, idx.astype, str) - self.assertRaises(ValueError, idx.astype, float) - self.assertRaises(ValueError, idx.astype, 'timedelta64') - self.assertRaises(ValueError, idx.astype, 'timedelta64[ns]') + pytest.raises(ValueError, idx.astype, str) + pytest.raises(ValueError, idx.astype, float) + pytest.raises(ValueError, idx.astype, 'timedelta64') + pytest.raises(ValueError, idx.astype, 'timedelta64[ns]') def test_pickle_compat_construction(self): pass @@ -88,7 +88,7 @@ def test_get_loc(self): msg = 'Input has different freq from PeriodIndex\\(freq=D\\)' with tm.assertRaisesRegexp(ValueError, msg): idx.get_loc('2000-01-10', method='nearest', tolerance='1 hour') - with tm.assertRaises(KeyError): + with pytest.raises(KeyError): idx.get_loc('2000-01-10', method='nearest', tolerance='1 day') def test_where(self): @@ -197,10 +197,10 @@ def test_fillna_period(self): pd.Period('2011-01-01', freq='D')), exp) def test_no_millisecond_field(self): - with self.assertRaises(AttributeError): + with pytest.raises(AttributeError): DatetimeIndex.millisecond - with self.assertRaises(AttributeError): + with pytest.raises(AttributeError): DatetimeIndex([]).millisecond def test_difference_freq(self): @@ -355,9 +355,9 @@ def test_period_index_length(self): # Mixed freq should fail vals = [end_intv, Period('2006-12-31', 'w')] - self.assertRaises(ValueError, PeriodIndex, vals) + pytest.raises(ValueError, PeriodIndex, vals) vals = np.array(vals) - self.assertRaises(ValueError, PeriodIndex, vals) + pytest.raises(ValueError, PeriodIndex, vals) def test_fields(self): # year, month, day, hour, minute @@ -530,7 +530,7 @@ def test_contains_nat(self): self.assertTrue(np.nan in idx) def test_periods_number_check(self): - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): period_range('2011-1-1', '2012-1-1', 'B') def test_start_time(self): @@ -720,7 +720,7 @@ def test_is_full(self): self.assertTrue(index.is_full) index = PeriodIndex([2006, 2005, 2005], freq='A') - self.assertRaises(ValueError, getattr, index, 'is_full') + pytest.raises(ValueError, getattr, index, 'is_full') self.assertTrue(index[:0].is_full) diff --git a/pandas/tests/indexes/period/test_setops.py b/pandas/tests/indexes/period/test_setops.py index e142b5179ad55..54dafd832fd23 100644 --- a/pandas/tests/indexes/period/test_setops.py +++ b/pandas/tests/indexes/period/test_setops.py @@ -1,3 +1,5 @@ +import pytest + import numpy as np import pandas as pd @@ -106,7 +108,7 @@ def test_union_misc(self): # raise if different frequencies index = period_range('1/1/2000', '1/20/2000', freq='D') index2 = period_range('1/1/2000', '1/20/2000', freq='W-WED') - with tm.assertRaises(period.IncompatibleFrequency): + with pytest.raises(period.IncompatibleFrequency): index.union(index2) msg = 'can only call with other PeriodIndex-ed objects' @@ -114,7 +116,7 @@ def test_union_misc(self): index.join(index.to_timestamp()) index3 = period_range('1/1/2000', '1/20/2000', freq='2D') - with tm.assertRaises(period.IncompatibleFrequency): + with pytest.raises(period.IncompatibleFrequency): index.join(index3) def test_union_dataframe_index(self): @@ -143,11 +145,11 @@ def test_intersection(self): # raise if different frequencies index = period_range('1/1/2000', '1/20/2000', freq='D') index2 = period_range('1/1/2000', '1/20/2000', freq='W-WED') - with tm.assertRaises(period.IncompatibleFrequency): + with pytest.raises(period.IncompatibleFrequency): index.intersection(index2) index3 = period_range('1/1/2000', '1/20/2000', freq='2D') - with tm.assertRaises(period.IncompatibleFrequency): + with pytest.raises(period.IncompatibleFrequency): index.intersection(index3) def test_intersection_cases(self): diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 221bd8d50cee6..695d3aa7ebe3a 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -1,5 +1,7 @@ # -*- coding: utf-8 -*- +import pytest + from datetime import datetime, timedelta import pandas.util.testing as tm @@ -83,11 +85,11 @@ def test_constructor(self): # what to do here? # arr = np.array(5.) - # self.assertRaises(Exception, arr.view, Index) + # pytest.raises(Exception, arr.view, Index) def test_constructor_corner(self): # corner case - self.assertRaises(TypeError, Index, 0) + pytest.raises(TypeError, Index, 0) def test_construction_list_mixed_tuples(self): # see gh-10697: if we are constructing from a mixed list of tuples, @@ -387,7 +389,7 @@ def test_view_with_args(self): ind = self.indices[i] # with arguments - self.assertRaises(TypeError, lambda: ind.view('i8')) + pytest.raises(TypeError, lambda: ind.view('i8')) # these are ok for i in list(set(self.indices.keys()) - set(restricted)): @@ -454,7 +456,7 @@ def test_delete(self): tm.assert_index_equal(result, expected) self.assertEqual(result.name, expected.name) - with tm.assertRaises((IndexError, ValueError)): + with pytest.raises((IndexError, ValueError)): # either depending on numpy version result = idx.delete(5) @@ -589,7 +591,7 @@ def test_empty_fancy(self): # np.ndarray only accepts ndarray of int & bool dtypes, so should # Index. - self.assertRaises(IndexError, idx.__getitem__, empty_farr) + pytest.raises(IndexError, idx.__getitem__, empty_farr) def test_getitem(self): arr = np.array(self.dateIndex) @@ -773,10 +775,10 @@ def test_add(self): def test_sub(self): idx = self.strIndex - self.assertRaises(TypeError, lambda: idx - 'a') - self.assertRaises(TypeError, lambda: idx - idx) - self.assertRaises(TypeError, lambda: idx - idx.tolist()) - self.assertRaises(TypeError, lambda: idx.tolist() - idx) + pytest.raises(TypeError, lambda: idx - 'a') + pytest.raises(TypeError, lambda: idx - idx) + pytest.raises(TypeError, lambda: idx - idx.tolist()) + pytest.raises(TypeError, lambda: idx.tolist() - idx) def test_map_identity_mapping(self): # GH 12766 @@ -1126,10 +1128,10 @@ def test_get_indexer_strings(self): expected = np.array([0, 0, 1, -1], dtype=np.intp) tm.assert_numpy_array_equal(actual, expected) - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): idx.get_indexer(['a', 'b', 'c', 'd'], method='nearest') - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): idx.get_indexer(['a', 'b', 'c', 'd'], method='pad', tolerance=2) def test_get_loc(self): @@ -1139,7 +1141,7 @@ def test_get_loc(self): self.assertEqual(idx.get_loc(1, method=method), 1) if method is not None: self.assertEqual(idx.get_loc(1, method=method, tolerance=0), 1) - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): idx.get_loc([1, 2], method=method) for method, loc in [('pad', 1), ('backfill', 2), ('nearest', 1)]: @@ -1149,7 +1151,7 @@ def test_get_loc(self): self.assertEqual(idx.get_loc(1.1, method, tolerance=1), loc) for method in ['pad', 'backfill', 'nearest']: - with tm.assertRaises(KeyError): + with pytest.raises(KeyError): idx.get_loc(1.1, method, tolerance=0.05) with tm.assertRaisesRegexp(ValueError, 'must be numeric'): @@ -1158,9 +1160,9 @@ def test_get_loc(self): idx.get_loc(1.1, tolerance=1) idx = pd.Index(['a', 'c']) - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): idx.get_loc('a', method='nearest') - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): idx.get_loc('a', method='pad', tolerance='invalid') def test_slice_locs(self): @@ -1192,15 +1194,15 @@ def test_slice_locs(self): # int slicing with floats # GH 4892, these are all TypeErrors idx = Index(np.array([0, 1, 2, 5, 6, 7, 9, 10], dtype=int)) - self.assertRaises(TypeError, - lambda: idx.slice_locs(5.0, 10.0), (3, n)) - self.assertRaises(TypeError, - lambda: idx.slice_locs(4.5, 10.5), (3, 8)) + pytest.raises(TypeError, + lambda: idx.slice_locs(5.0, 10.0), (3, n)) + pytest.raises(TypeError, + lambda: idx.slice_locs(4.5, 10.5), (3, 8)) idx2 = idx[::-1] - self.assertRaises(TypeError, - lambda: idx2.slice_locs(8.5, 1.5), (2, 6)) - self.assertRaises(TypeError, - lambda: idx2.slice_locs(10.5, -1), (0, n)) + pytest.raises(TypeError, + lambda: idx2.slice_locs(8.5, 1.5), (2, 6)) + pytest.raises(TypeError, + lambda: idx2.slice_locs(10.5, -1), (0, n)) def test_slice_locs_dup(self): idx = Index(['a', 'a', 'b', 'c', 'd', 'd']) @@ -1226,8 +1228,8 @@ def test_slice_locs_dup(self): def test_slice_locs_na(self): idx = Index([np.nan, 1, 2]) - self.assertRaises(KeyError, idx.slice_locs, start=1.5) - self.assertRaises(KeyError, idx.slice_locs, end=1.5) + pytest.raises(KeyError, idx.slice_locs, start=1.5) + pytest.raises(KeyError, idx.slice_locs, end=1.5) self.assertEqual(idx.slice_locs(1), (1, 3)) self.assertEqual(idx.slice_locs(np.nan), (0, 3)) @@ -1268,8 +1270,8 @@ def test_drop(self): expected = self.strIndex[lrange(5) + lrange(10, n)] tm.assert_index_equal(dropped, expected) - self.assertRaises(ValueError, self.strIndex.drop, ['foo', 'bar']) - self.assertRaises(ValueError, self.strIndex.drop, ['1', 'bar']) + pytest.raises(ValueError, self.strIndex.drop, ['foo', 'bar']) + pytest.raises(ValueError, self.strIndex.drop, ['1', 'bar']) # errors='ignore' mixed = drop.tolist() + ['foo'] @@ -1291,7 +1293,7 @@ def test_drop(self): tm.assert_index_equal(dropped, expected) # errors='ignore' - self.assertRaises(ValueError, ser.drop, [3, 4]) + pytest.raises(ValueError, ser.drop, [3, 4]) dropped = ser.drop(4, errors='ignore') expected = Index([1, 2, 3]) @@ -1388,19 +1390,19 @@ def check_idx(idx): tm.assert_numpy_array_equal(expected, idx.isin(values, level=0)) tm.assert_numpy_array_equal(expected, idx.isin(values, level=-1)) - self.assertRaises(IndexError, idx.isin, values, level=1) - self.assertRaises(IndexError, idx.isin, values, level=10) - self.assertRaises(IndexError, idx.isin, values, level=-2) + pytest.raises(IndexError, idx.isin, values, level=1) + pytest.raises(IndexError, idx.isin, values, level=10) + pytest.raises(IndexError, idx.isin, values, level=-2) - self.assertRaises(KeyError, idx.isin, values, level=1.0) - self.assertRaises(KeyError, idx.isin, values, level='foobar') + pytest.raises(KeyError, idx.isin, values, level=1.0) + pytest.raises(KeyError, idx.isin, values, level='foobar') idx.name = 'foobar' tm.assert_numpy_array_equal(expected, idx.isin(values, level='foobar')) - self.assertRaises(KeyError, idx.isin, values, level='xyzzy') - self.assertRaises(KeyError, idx.isin, values, level=np.nan) + pytest.raises(KeyError, idx.isin, values, level='xyzzy') + pytest.raises(KeyError, idx.isin, values, level=np.nan) check_idx(Index(['qux', 'baz', 'foo', 'bar'])) # Float64Index overrides isin, so must be checked separately @@ -1529,7 +1531,7 @@ def test_take_fill_value(self): with tm.assertRaisesRegexp(ValueError, msg): idx.take(np.array([1, 0, -5]), fill_value=True) - with tm.assertRaises(IndexError): + with pytest.raises(IndexError): idx.take(np.array([1, -5])) def test_reshape_raise(self): diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py index 18842f7886cad..057ea1a416275 100644 --- a/pandas/tests/indexes/test_category.py +++ b/pandas/tests/indexes/test_category.py @@ -131,12 +131,12 @@ def test_disallow_set_ops(self): # set ops (+/-) raise TypeError idx = pd.Index(pd.Categorical(['a', 'b'])) - self.assertRaises(TypeError, lambda: idx - idx) - self.assertRaises(TypeError, lambda: idx + idx) - self.assertRaises(TypeError, lambda: idx - ['a', 'b']) - self.assertRaises(TypeError, lambda: idx + ['a', 'b']) - self.assertRaises(TypeError, lambda: ['a', 'b'] - idx) - self.assertRaises(TypeError, lambda: ['a', 'b'] + idx) + pytest.raises(TypeError, lambda: idx - idx) + pytest.raises(TypeError, lambda: idx + idx) + pytest.raises(TypeError, lambda: idx - ['a', 'b']) + pytest.raises(TypeError, lambda: idx + ['a', 'b']) + pytest.raises(TypeError, lambda: ['a', 'b'] - idx) + pytest.raises(TypeError, lambda: ['a', 'b'] + idx) def test_method_delegation(self): @@ -170,7 +170,7 @@ def test_method_delegation(self): list('aabbca'), categories=list('cabdef'), ordered=True)) # invalid - self.assertRaises(ValueError, lambda: ci.set_categories( + pytest.raises(ValueError, lambda: ci.set_categories( list('cab'), inplace=True)) def test_contains(self): @@ -193,8 +193,8 @@ def test_contains(self): def test_min_max(self): ci = self.create_index(ordered=False) - self.assertRaises(TypeError, lambda: ci.min()) - self.assertRaises(TypeError, lambda: ci.max()) + pytest.raises(TypeError, lambda: ci.min()) + pytest.raises(TypeError, lambda: ci.max()) ci = self.create_index(ordered=True) @@ -275,10 +275,10 @@ def test_append(self): tm.assert_index_equal(result, ci, exact=True) # appending with different categories or reoreded is not ok - self.assertRaises( + pytest.raises( TypeError, lambda: ci.append(ci.values.set_categories(list('abcd')))) - self.assertRaises( + pytest.raises( TypeError, lambda: ci.append(ci.values.reorder_categories(list('abc')))) @@ -288,7 +288,7 @@ def test_append(self): tm.assert_index_equal(result, expected, exact=True) # invalid objects - self.assertRaises(TypeError, lambda: ci.append(Index(['a', 'd']))) + pytest.raises(TypeError, lambda: ci.append(Index(['a', 'd']))) # GH14298 - if base object is not categorical -> coerce to object result = Index(['c', 'a']).append(ci) @@ -316,7 +316,7 @@ def test_insert(self): tm.assert_index_equal(result, expected, exact=True) # invalid - self.assertRaises(TypeError, lambda: ci.insert(0, 'd')) + pytest.raises(TypeError, lambda: ci.insert(0, 'd')) def test_delete(self): @@ -439,12 +439,12 @@ def test_get_indexer(self): r1 = idx1.get_indexer(idx2) assert_almost_equal(r1, np.array([0, 1, 2, -1], dtype=np.intp)) - self.assertRaises(NotImplementedError, - lambda: idx2.get_indexer(idx1, method='pad')) - self.assertRaises(NotImplementedError, - lambda: idx2.get_indexer(idx1, method='backfill')) - self.assertRaises(NotImplementedError, - lambda: idx2.get_indexer(idx1, method='nearest')) + pytest.raises(NotImplementedError, + lambda: idx2.get_indexer(idx1, method='pad')) + pytest.raises(NotImplementedError, + lambda: idx2.get_indexer(idx1, method='backfill')) + pytest.raises(NotImplementedError, + lambda: idx2.get_indexer(idx1, method='nearest')) def test_get_loc(self): # GH 12531 @@ -454,7 +454,7 @@ def test_get_loc(self): self.assertEqual(cidx1.get_loc('e'), idx1.get_loc('e')) for i in [cidx1, idx1]: - with tm.assertRaises(KeyError): + with pytest.raises(KeyError): i.get_loc('NOT-EXIST') # non-unique @@ -472,7 +472,7 @@ def test_get_loc(self): self.assertEqual(res, 4) for i in [cidx2, idx2]: - with tm.assertRaises(KeyError): + with pytest.raises(KeyError): i.get_loc('NOT-EXIST') # non-unique, slicable @@ -489,7 +489,7 @@ def test_get_loc(self): self.assertEqual(res, slice(2, 5, None)) for i in [cidx3, idx3]: - with tm.assertRaises(KeyError): + with pytest.raises(KeyError): i.get_loc('c') def test_repr_roundtrip(self): @@ -581,10 +581,10 @@ def test_equals_categorical(self): # invalid comparisons with tm.assertRaisesRegexp(ValueError, "Lengths must match"): ci1 == Index(['a', 'b', 'c']) - self.assertRaises(TypeError, lambda: ci1 == ci2) - self.assertRaises( + pytest.raises(TypeError, lambda: ci1 == ci2) + pytest.raises( TypeError, lambda: ci1 == Categorical(ci1.values, ordered=False)) - self.assertRaises( + pytest.raises( TypeError, lambda: ci1 == Categorical(ci1.values, categories=list('abc'))) @@ -866,7 +866,7 @@ def test_take_fill_value(self): with tm.assertRaisesRegexp(ValueError, msg): idx.take(np.array([1, 0, -5]), fill_value=True) - with tm.assertRaises(IndexError): + with pytest.raises(IndexError): idx.take(np.array([1, -5])) def test_take_fill_value_datetime(self): @@ -904,7 +904,7 @@ def test_take_fill_value_datetime(self): with tm.assertRaisesRegexp(ValueError, msg): idx.take(np.array([1, 0, -5]), fill_value=True) - with tm.assertRaises(IndexError): + with pytest.raises(IndexError): idx.take(np.array([1, -5])) def test_take_invalid_kwargs(self): diff --git a/pandas/tests/indexes/test_interval.py b/pandas/tests/indexes/test_interval.py index f9ddf936007b2..53b5c01c40899 100644 --- a/pandas/tests/indexes/test_interval.py +++ b/pandas/tests/indexes/test_interval.py @@ -114,7 +114,7 @@ def test_constructors_error(self): # non-intervals def f(): IntervalIndex.from_intervals([0.997, 4.0]) - self.assertRaises(TypeError, f) + pytest.raises(TypeError, f) def test_properties(self): index = self.index @@ -212,7 +212,7 @@ def test_astype(self): for dtype in [np.int64, np.float64, 'datetime64[ns]', 'datetime64[ns, US/Eastern]', 'timedelta64', 'period[M]']: - self.assertRaises(ValueError, idx.astype, dtype) + pytest.raises(ValueError, idx.astype, dtype) result = idx.astype(object) tm.assert_index_equal(result, Index(idx.values, dtype='object')) @@ -251,9 +251,9 @@ def test_insert(self): actual = self.index.insert(2, Interval(2, 3)) self.assertTrue(expected.equals(actual)) - self.assertRaises(ValueError, self.index.insert, 0, 1) - self.assertRaises(ValueError, self.index.insert, 0, - Interval(2, 3, closed='left')) + pytest.raises(ValueError, self.index.insert, 0, 1) + pytest.raises(ValueError, self.index.insert, 0, + Interval(2, 3, closed='left')) def test_take(self): actual = self.index.take([0, 1]) @@ -328,13 +328,13 @@ def test_get_item(self): tm.assert_index_equal(result, expected) def test_get_loc_value(self): - self.assertRaises(KeyError, self.index.get_loc, 0) + pytest.raises(KeyError, self.index.get_loc, 0) self.assertEqual(self.index.get_loc(0.5), 0) self.assertEqual(self.index.get_loc(1), 0) self.assertEqual(self.index.get_loc(1.5), 1) self.assertEqual(self.index.get_loc(2), 1) - self.assertRaises(KeyError, self.index.get_loc, -1) - self.assertRaises(KeyError, self.index.get_loc, 3) + pytest.raises(KeyError, self.index.get_loc, -1) + pytest.raises(KeyError, self.index.get_loc, 3) idx = IntervalIndex.from_tuples([(0, 2), (1, 3)]) self.assertEqual(idx.get_loc(0.5), 0) @@ -344,10 +344,10 @@ def test_get_loc_value(self): tm.assert_numpy_array_equal(np.sort(idx.get_loc(2)), np.array([0, 1], dtype='int64')) self.assertEqual(idx.get_loc(3), 1) - self.assertRaises(KeyError, idx.get_loc, 3.5) + pytest.raises(KeyError, idx.get_loc, 3.5) idx = IntervalIndex.from_arrays([0, 2], [1, 3]) - self.assertRaises(KeyError, idx.get_loc, 1.5) + pytest.raises(KeyError, idx.get_loc, 1.5) def slice_locs_cases(self, breaks): # TODO: same tests for more index types @@ -401,16 +401,16 @@ def test_slice_locs_decreasing_float64(self): def test_slice_locs_fails(self): index = IntervalIndex.from_tuples([(1, 2), (0, 1), (2, 3)]) - with self.assertRaises(KeyError): + with pytest.raises(KeyError): index.slice_locs(1, 2) def test_get_loc_interval(self): self.assertEqual(self.index.get_loc(Interval(0, 1)), 0) self.assertEqual(self.index.get_loc(Interval(0, 0.5)), 0) self.assertEqual(self.index.get_loc(Interval(0, 1, 'left')), 0) - self.assertRaises(KeyError, self.index.get_loc, Interval(2, 3)) - self.assertRaises(KeyError, self.index.get_loc, - Interval(-1, 0, 'left')) + pytest.raises(KeyError, self.index.get_loc, Interval(2, 3)) + pytest.raises(KeyError, self.index.get_loc, + Interval(-1, 0, 'left')) def test_get_indexer(self): actual = self.index.get_indexer([-1, 0, 0.5, 1, 1.5, 2, 3]) @@ -543,10 +543,10 @@ def test_symmetric_difference(self): tm.assert_index_equal(result, expected) def test_set_operation_errors(self): - self.assertRaises(ValueError, self.index.union, self.index.left) + pytest.raises(ValueError, self.index.union, self.index.left) other = IntervalIndex.from_breaks([0, 1, 2], closed='neither') - self.assertRaises(ValueError, self.index.union, other) + pytest.raises(ValueError, self.index.union, other) def test_isin(self): actual = self.index.isin(self.index) @@ -606,9 +606,9 @@ def test_comparison(self): self.index > 0 with self.assertRaisesRegexp(TypeError, 'unorderable types'): self.index <= 0 - with self.assertRaises(TypeError): + with pytest.raises(TypeError): self.index > np.arange(2) - with self.assertRaises(ValueError): + with pytest.raises(ValueError): self.index > np.arange(3) def test_missing_values(self): @@ -677,7 +677,7 @@ def f(): index1.append(IntervalIndex.from_arrays([0, 1], [1, 2], closed='both')) - self.assertRaises(ValueError, f) + pytest.raises(ValueError, f) class TestIntervalRange(tm.TestCase): @@ -694,28 +694,28 @@ def test_errors(self): def f(): interval_range(0) - self.assertRaises(ValueError, f) + pytest.raises(ValueError, f) def f(): interval_range(periods=2) - self.assertRaises(ValueError, f) + pytest.raises(ValueError, f) def f(): interval_range() - self.assertRaises(ValueError, f) + pytest.raises(ValueError, f) # mixed units def f(): interval_range(0, Timestamp('20130101'), freq=2) - self.assertRaises(ValueError, f) + pytest.raises(ValueError, f) def f(): interval_range(0, 10, freq=Timedelta('1day')) - self.assertRaises(ValueError, f) + pytest.raises(ValueError, f) class TestIntervalTree(tm.TestCase): @@ -740,7 +740,7 @@ def test_get_indexer(self): tm.assert_numpy_array_equal( tree.get_indexer(np.array([1.0, 5.5, 6.5])), np.array([0, 4, -1], dtype='int64')) - with self.assertRaises(KeyError): + with pytest.raises(KeyError): tree.get_indexer(np.array([3.0])) def test_get_indexer_non_unique(self): diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index d10cf4e71c916..c671a33e2f171 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -22,7 +22,7 @@ import pandas.util.testing as tm -from pandas.util.testing import (assertRaises, assertRaisesRegexp, +from pandas.util.testing import (assertRaisesRegexp, assert_almost_equal, assert_copy) @@ -87,7 +87,7 @@ def test_where(self): def f(): i.where(True) - self.assertRaises(NotImplementedError, f) + pytest.raises(NotImplementedError, f) def test_where_array_like(self): i = MultiIndex.from_tuples([('A', 1), ('A', 2)]) @@ -96,7 +96,7 @@ def test_where_array_like(self): for klass in klasses: f = lambda: i.where(klass(cond)) - self.assertRaises(NotImplementedError, f) + pytest.raises(NotImplementedError, f) def test_repeat(self): reps = 2 @@ -657,7 +657,7 @@ def test_get_level_number_integer(self): self.index.names = [1, 0] self.assertEqual(self.index._get_level_number(1), 0) self.assertEqual(self.index._get_level_number(0), 1) - self.assertRaises(IndexError, self.index._get_level_number, 2) + pytest.raises(IndexError, self.index._get_level_number, 2) assertRaisesRegexp(KeyError, 'Level fourth not found', self.index._get_level_number, 'fourth') @@ -784,7 +784,7 @@ def test_from_arrays_invalid_input(self): invalid_inputs = [1, [1], [1, 2], [[1], 2], 'a', ['a'], ['a', 'b'], [['a'], 'b']] for i in invalid_inputs: - tm.assertRaises(TypeError, MultiIndex.from_arrays, arrays=i) + pytest.raises(TypeError, MultiIndex.from_arrays, arrays=i) def test_from_arrays_different_lengths(self): # GH13599 @@ -853,7 +853,7 @@ def test_from_product_invalid_input(self): invalid_inputs = [1, [1], [1, 2], [[1], 2], 'a', ['a'], ['a', 'b'], [['a'], 'b']] for i in invalid_inputs: - tm.assertRaises(TypeError, MultiIndex.from_product, iterables=i) + pytest.raises(TypeError, MultiIndex.from_product, iterables=i) def test_from_product_datetimeindex(self): dt_index = date_range('2000-01-01', periods=2) @@ -1108,17 +1108,17 @@ def test_getitem_group_select(self): def test_get_loc(self): self.assertEqual(self.index.get_loc(('foo', 'two')), 1) self.assertEqual(self.index.get_loc(('baz', 'two')), 3) - self.assertRaises(KeyError, self.index.get_loc, ('bar', 'two')) - self.assertRaises(KeyError, self.index.get_loc, 'quux') + pytest.raises(KeyError, self.index.get_loc, ('bar', 'two')) + pytest.raises(KeyError, self.index.get_loc, 'quux') - self.assertRaises(NotImplementedError, self.index.get_loc, 'foo', - method='nearest') + pytest.raises(NotImplementedError, self.index.get_loc, 'foo', + method='nearest') # 3 levels index = MultiIndex(levels=[Index(lrange(4)), Index(lrange(4)), Index( lrange(4))], labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array( [0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])]) - self.assertRaises(KeyError, index.get_loc, (1, 1)) + pytest.raises(KeyError, index.get_loc, (1, 1)) self.assertEqual(index.get_loc((2, 0)), slice(3, 5)) def test_get_loc_duplicates(self): @@ -1126,7 +1126,7 @@ def test_get_loc_duplicates(self): result = index.get_loc(2) expected = slice(0, 4) self.assertEqual(result, expected) - # self.assertRaises(Exception, index.get_loc, 2) + # pytest.raises(Exception, index.get_loc, 2) index = Index(['c', 'a', 'a', 'b', 'b']) rs = index.get_loc('c') @@ -1160,7 +1160,7 @@ def test_get_loc_level(self): self.assertEqual(loc, expected) self.assertIsNone(new_index) - self.assertRaises(KeyError, index.get_loc_level, (2, 2)) + pytest.raises(KeyError, index.get_loc_level, (2, 2)) index = MultiIndex(levels=[[2000], lrange(4)], labels=[np.array( [0, 0, 0, 0]), np.array([0, 1, 2, 3])]) @@ -1298,7 +1298,7 @@ def test_truncate(self): self.assertEqual(len(result.levels[0]), 2) # after < before - self.assertRaises(ValueError, index.truncate, 3, 1) + pytest.raises(ValueError, index.truncate, 3, 1) def test_get_indexer(self): major_axis = Index(lrange(4)) @@ -1353,9 +1353,9 @@ def test_get_indexer(self): def test_get_indexer_nearest(self): midx = MultiIndex.from_tuples([('a', 1), ('b', 2)]) - with tm.assertRaises(NotImplementedError): + with pytest.raises(NotImplementedError): midx.get_indexer(['a'], method='nearest') - with tm.assertRaises(NotImplementedError): + with pytest.raises(NotImplementedError): midx.get_indexer(['a'], method='pad', tolerance=2) def test_hash_collisions(self): @@ -1631,13 +1631,13 @@ def test_sub(self): first = self.index # - now raises (previously was set op difference) - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): first - self.index[-3:] - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): self.index[-3:] - first - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): self.index[-3:] - first.tolist() - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): first.tolist() - self.index[-3:] def test_difference(self): @@ -1781,13 +1781,13 @@ def test_drop(self): tm.assert_index_equal(dropped, expected) index = MultiIndex.from_tuples([('bar', 'two')]) - self.assertRaises(KeyError, self.index.drop, [('bar', 'two')]) - self.assertRaises(KeyError, self.index.drop, index) - self.assertRaises(KeyError, self.index.drop, ['foo', 'two']) + pytest.raises(KeyError, self.index.drop, [('bar', 'two')]) + pytest.raises(KeyError, self.index.drop, index) + pytest.raises(KeyError, self.index.drop, ['foo', 'two']) # partially correct argument mixed_index = MultiIndex.from_tuples([('qux', 'one'), ('bar', 'two')]) - self.assertRaises(KeyError, self.index.drop, mixed_index) + pytest.raises(KeyError, self.index.drop, mixed_index) # error='ignore' dropped = self.index.drop(index, errors='ignore') @@ -1809,7 +1809,7 @@ def test_drop(self): # mixed partial / full drop / error='ignore' mixed_index = ['foo', ('qux', 'one'), 'two'] - self.assertRaises(KeyError, self.index.drop, mixed_index) + pytest.raises(KeyError, self.index.drop, mixed_index) dropped = self.index.drop(mixed_index, errors='ignore') expected = self.index[[2, 3, 5]] tm.assert_index_equal(dropped, expected) @@ -1969,7 +1969,7 @@ def test_take_fill_value(self): with tm.assertRaisesRegexp(ValueError, msg): idx.take(np.array([1, 0, -5]), fill_value=True) - with tm.assertRaises(IndexError): + with pytest.raises(IndexError): idx.take(np.array([1, -5])) def take_invalid_kwargs(self): @@ -2355,7 +2355,7 @@ def test_isnull_behavior(self): # should not segfault GH5123 # NOTE: if MI representation changes, may make sense to allow # isnull(MI) - with tm.assertRaises(NotImplementedError): + with pytest.raises(NotImplementedError): pd.isnull(self.index) def test_level_setting_resets_attributes(self): @@ -2527,18 +2527,18 @@ def test_isin_level_kwarg(self): tm.assert_numpy_array_equal(expected, idx.isin(vals_1, level=1)) tm.assert_numpy_array_equal(expected, idx.isin(vals_1, level=-1)) - self.assertRaises(IndexError, idx.isin, vals_0, level=5) - self.assertRaises(IndexError, idx.isin, vals_0, level=-5) + pytest.raises(IndexError, idx.isin, vals_0, level=5) + pytest.raises(IndexError, idx.isin, vals_0, level=-5) - self.assertRaises(KeyError, idx.isin, vals_0, level=1.0) - self.assertRaises(KeyError, idx.isin, vals_1, level=-1.0) - self.assertRaises(KeyError, idx.isin, vals_1, level='A') + pytest.raises(KeyError, idx.isin, vals_0, level=1.0) + pytest.raises(KeyError, idx.isin, vals_1, level=-1.0) + pytest.raises(KeyError, idx.isin, vals_1, level='A') idx.names = ['A', 'B'] tm.assert_numpy_array_equal(expected, idx.isin(vals_0, level='A')) tm.assert_numpy_array_equal(expected, idx.isin(vals_1, level='B')) - self.assertRaises(KeyError, idx.isin, vals_1, level='C') + pytest.raises(KeyError, idx.isin, vals_1, level='C') def test_reindex_preserves_names_when_target_is_list_or_ndarray(self): # GH6552 @@ -2616,16 +2616,16 @@ def test_large_multiindex_error(self): df_below_1000000 = pd.DataFrame( 1, index=pd.MultiIndex.from_product([[1, 2], range(499999)]), columns=['dest']) - with assertRaises(KeyError): + with pytest.raises(KeyError): df_below_1000000.loc[(-1, 0), 'dest'] - with assertRaises(KeyError): + with pytest.raises(KeyError): df_below_1000000.loc[(3, 0), 'dest'] df_above_1000000 = pd.DataFrame( 1, index=pd.MultiIndex.from_product([[1, 2], range(500001)]), columns=['dest']) - with assertRaises(KeyError): + with pytest.raises(KeyError): df_above_1000000.loc[(-1, 0), 'dest'] - with assertRaises(KeyError): + with pytest.raises(KeyError): df_above_1000000.loc[(3, 0), 'dest'] def test_partial_string_timestamp_multiindex(self): @@ -2678,7 +2678,7 @@ def test_partial_string_timestamp_multiindex(self): # ambiguous and we don't want to extend this behavior forward to work # in multi-indexes. This would amount to selecting a scalar from a # column. - with assertRaises(KeyError): + with pytest.raises(KeyError): df['2016-01-01'] # partial string match on year only @@ -2707,7 +2707,7 @@ def test_partial_string_timestamp_multiindex(self): tm.assert_frame_equal(result, expected) # Slicing date on first level should break (of course) - with assertRaises(KeyError): + with pytest.raises(KeyError): df_swap.loc['2016-01-01'] # GH12685 (partial string with daily resolution or below) @@ -2771,12 +2771,12 @@ def test_unsortedindex(self): df = pd.DataFrame([[i, 10 * i] for i in lrange(6)], index=mi, columns=['one', 'two']) - with assertRaises(UnsortedIndexError): + with pytest.raises(UnsortedIndexError): df.loc(axis=0)['z', :] df.sort_index(inplace=True) self.assertEqual(len(df.loc(axis=0)['z', :]), 2) - with assertRaises(KeyError): + with pytest.raises(KeyError): df.loc(axis=0)['q', :] def test_unsortedindex_doc_examples(self): @@ -2807,7 +2807,7 @@ def test_tuples_with_name_string(self): # GH 15110 and GH 14848 li = [(0, 0, 1), (0, 1, 0), (1, 0, 0)] - with assertRaises(ValueError): + with pytest.raises(ValueError): pd.Index(li, name='abc') - with assertRaises(ValueError): + with pytest.raises(ValueError): pd.Index(li, name='a') diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py index b14175843ae20..62aabaaef100c 100644 --- a/pandas/tests/indexes/test_numeric.py +++ b/pandas/tests/indexes/test_numeric.py @@ -1,5 +1,7 @@ # -*- coding: utf-8 -*- +import pytest + from datetime import datetime from pandas.compat import range, PY3 @@ -74,10 +76,10 @@ def test_numeric_compat(self): tm.assert_index_equal(result, expected) # invalid - self.assertRaises(TypeError, - lambda: idx * date_range('20130101', periods=5)) - self.assertRaises(ValueError, lambda: idx * idx[0:3]) - self.assertRaises(ValueError, lambda: idx * np.array([1, 2])) + pytest.raises(TypeError, + lambda: idx * date_range('20130101', periods=5)) + pytest.raises(ValueError, lambda: idx * idx[0:3]) + pytest.raises(ValueError, lambda: idx * np.array([1, 2])) result = divmod(idx, 2) with np.errstate(all='ignore'): @@ -235,9 +237,9 @@ def test_constructor(self): def test_constructor_invalid(self): # invalid - self.assertRaises(TypeError, Float64Index, 0.) - self.assertRaises(TypeError, Float64Index, ['a', 'b', 0.]) - self.assertRaises(TypeError, Float64Index, [Timestamp('20130101')]) + pytest.raises(TypeError, Float64Index, 0.) + pytest.raises(TypeError, Float64Index, ['a', 'b', 0.]) + pytest.raises(TypeError, Float64Index, [Timestamp('20130101')]) def test_constructor_coerce(self): @@ -295,12 +297,12 @@ def test_astype(self): # invalid for dtype in ['M8[ns]', 'm8[ns]']: - self.assertRaises(TypeError, lambda: i.astype(dtype)) + pytest.raises(TypeError, lambda: i.astype(dtype)) # GH 13149 for dtype in ['int16', 'int32', 'int64']: i = Float64Index([0, 1.1, np.NAN]) - self.assertRaises(ValueError, lambda: i.astype(dtype)) + pytest.raises(ValueError, lambda: i.astype(dtype)) def test_equals_numeric(self): @@ -342,10 +344,10 @@ def test_get_loc(self): self.assertEqual(idx.get_loc(1.1, method), loc) self.assertEqual(idx.get_loc(1.1, method, tolerance=0.9), loc) - self.assertRaises(KeyError, idx.get_loc, 'foo') - self.assertRaises(KeyError, idx.get_loc, 1.5) - self.assertRaises(KeyError, idx.get_loc, 1.5, method='pad', - tolerance=0.1) + pytest.raises(KeyError, idx.get_loc, 'foo') + pytest.raises(KeyError, idx.get_loc, 1.5) + pytest.raises(KeyError, idx.get_loc, 1.5, method='pad', + tolerance=0.1) with tm.assertRaisesRegexp(ValueError, 'must be numeric'): idx.get_loc(1.4, method='nearest', tolerance='foo') @@ -359,7 +361,7 @@ def test_get_loc_na(self): self.assertEqual(idx.get_loc(1), 1) # representable by slice [0:2:2] - # self.assertRaises(KeyError, idx.slice_locs, np.nan) + # pytest.raises(KeyError, idx.slice_locs, np.nan) sliced = idx.slice_locs(np.nan) self.assertTrue(isinstance(sliced, tuple)) self.assertEqual(sliced, (0, 3)) @@ -367,7 +369,7 @@ def test_get_loc_na(self): # not representable by slice idx = Float64Index([np.nan, 1, np.nan, np.nan]) self.assertEqual(idx.get_loc(1), 1) - self.assertRaises(KeyError, idx.slice_locs, np.nan) + pytest.raises(KeyError, idx.slice_locs, np.nan) def test_contains_nans(self): i = Float64Index([1.0, 2.0, np.nan]) @@ -441,7 +443,7 @@ def test_take_fill_value(self): with tm.assertRaisesRegexp(ValueError, msg): idx.take(np.array([1, 0, -5]), fill_value=True) - with tm.assertRaises(IndexError): + with pytest.raises(IndexError): idx.take(np.array([1, -5])) @@ -534,11 +536,11 @@ def test_union_noncomparable(self): def test_cant_or_shouldnt_cast(self): # can't data = ['foo', 'bar', 'baz'] - self.assertRaises(TypeError, self._holder, data) + pytest.raises(TypeError, self._holder, data) # shouldn't data = ['0', '1', '2'] - self.assertRaises(TypeError, self._holder, data) + pytest.raises(TypeError, self._holder, data) def test_view_index(self): self.index.view(Index) @@ -578,7 +580,7 @@ def test_take_fill_value(self): with tm.assertRaisesRegexp(ValueError, msg): idx.take(np.array([1, 0, -5]), fill_value=True) - with tm.assertRaises(IndexError): + with pytest.raises(IndexError): idx.take(np.array([1, -5])) def test_slice_keep_name(self): @@ -642,7 +644,7 @@ def test_constructor(self): tm.assert_index_equal(index, expected) # scalar raise Exception - self.assertRaises(TypeError, Int64Index, 5) + pytest.raises(TypeError, Int64Index, 5) # copy arr = self.index.values diff --git a/pandas/tests/indexes/test_range.py b/pandas/tests/indexes/test_range.py index 6168deba7e72c..0b27eb0c34111 100644 --- a/pandas/tests/indexes/test_range.py +++ b/pandas/tests/indexes/test_range.py @@ -1,5 +1,7 @@ # -*- coding: utf-8 -*- +import pytest + from datetime import datetime from itertools import combinations import operator @@ -111,13 +113,13 @@ def test_constructor(self): self.assertEqual(index.name, 'Foo') # we don't allow on a bare Index - self.assertRaises(TypeError, lambda: Index(0, 1000)) + pytest.raises(TypeError, lambda: Index(0, 1000)) # invalid args for i in [Index(['a', 'b']), Series(['a', 'b']), np.array(['a', 'b']), [], 'foo', datetime(2000, 1, 1, 0, 0), np.arange(0, 10), np.array([1]), [1]]: - self.assertRaises(TypeError, lambda: RangeIndex(i)) + pytest.raises(TypeError, lambda: RangeIndex(i)) def test_constructor_same(self): @@ -132,12 +134,12 @@ def test_constructor_same(self): result = RangeIndex(index) tm.assert_index_equal(result, index, exact=True) - self.assertRaises(TypeError, - lambda: RangeIndex(index, dtype='float64')) + pytest.raises(TypeError, + lambda: RangeIndex(index, dtype='float64')) def test_constructor_range(self): - self.assertRaises(TypeError, lambda: RangeIndex(range(1, 5, 2))) + pytest.raises(TypeError, lambda: RangeIndex(range(1, 5, 2))) result = RangeIndex.from_range(range(1, 5, 2)) expected = RangeIndex(1, 5, 2) @@ -160,8 +162,8 @@ def test_constructor_range(self): expected = RangeIndex(1, 5, 2) tm.assert_index_equal(result, expected, exact=True) - self.assertRaises(TypeError, - lambda: Index(range(1, 5, 2), dtype='float64')) + pytest.raises(TypeError, + lambda: Index(range(1, 5, 2), dtype='float64')) def test_constructor_name(self): # GH12288 @@ -249,11 +251,11 @@ def test_constructor_corner(self): tm.assert_index_equal(index, Index(arr)) # non-int raise Exception - self.assertRaises(TypeError, RangeIndex, '1', '10', '1') - self.assertRaises(TypeError, RangeIndex, 1.1, 10.2, 1.3) + pytest.raises(TypeError, RangeIndex, '1', '10', '1') + pytest.raises(TypeError, RangeIndex, 1.1, 10.2, 1.3) # invalid passed type - self.assertRaises(TypeError, lambda: RangeIndex(1, 5, dtype='float64')) + pytest.raises(TypeError, lambda: RangeIndex(1, 5, dtype='float64')) def test_copy(self): i = RangeIndex(5, name='Foo') @@ -306,7 +308,7 @@ def test_delete(self): tm.assert_index_equal(result, expected) self.assertEqual(result.name, expected.name) - with tm.assertRaises((IndexError, ValueError)): + with pytest.raises((IndexError, ValueError)): # either depending on numpy version result = idx.delete(len(idx)) @@ -696,10 +698,10 @@ def test_nbytes(self): def test_cant_or_shouldnt_cast(self): # can't - self.assertRaises(TypeError, RangeIndex, 'foo', 'bar', 'baz') + pytest.raises(TypeError, RangeIndex, 'foo', 'bar', 'baz') # shouldn't - self.assertRaises(TypeError, RangeIndex, '0', '1', '2') + pytest.raises(TypeError, RangeIndex, '0', '1', '2') def test_view_Index(self): self.index.view(Index) @@ -737,7 +739,7 @@ def test_take_fill_value(self): with tm.assertRaisesRegexp(ValueError, msg): idx.take(np.array([1, 0, -5]), fill_value=True) - with tm.assertRaises(IndexError): + with pytest.raises(IndexError): idx.take(np.array([1, -5])) def test_print_unicode_columns(self): diff --git a/pandas/tests/indexes/timedeltas/test_astype.py b/pandas/tests/indexes/timedeltas/test_astype.py index c2afaf209dbc0..d269cddcbb5c8 100644 --- a/pandas/tests/indexes/timedeltas/test_astype.py +++ b/pandas/tests/indexes/timedeltas/test_astype.py @@ -1,3 +1,5 @@ +import pytest + import numpy as np import pandas as pd @@ -59,10 +61,10 @@ def test_astype_raises(self): # GH 13149, GH 13209 idx = TimedeltaIndex([1e14, 'NaT', pd.NaT, np.NaN]) - self.assertRaises(ValueError, idx.astype, float) - self.assertRaises(ValueError, idx.astype, str) - self.assertRaises(ValueError, idx.astype, 'datetime64') - self.assertRaises(ValueError, idx.astype, 'datetime64[ns]') + pytest.raises(ValueError, idx.astype, float) + pytest.raises(ValueError, idx.astype, str) + pytest.raises(ValueError, idx.astype, 'datetime64') + pytest.raises(ValueError, idx.astype, 'datetime64[ns]') def test_pickle_compat_construction(self): pass @@ -116,6 +118,6 @@ def test_numeric_compat(self): 5, dtype='float64') * (np.arange(5, dtype='float64') + 0.1))) # invalid - self.assertRaises(TypeError, lambda: idx * idx) - self.assertRaises(ValueError, lambda: idx * self._holder(np.arange(3))) - self.assertRaises(ValueError, lambda: idx * np.array([1, 2])) + pytest.raises(TypeError, lambda: idx * idx) + pytest.raises(ValueError, lambda: idx * self._holder(np.arange(3))) + pytest.raises(ValueError, lambda: idx * np.array([1, 2])) diff --git a/pandas/tests/indexes/timedeltas/test_construction.py b/pandas/tests/indexes/timedeltas/test_construction.py index e903615a09f5c..6681a03a3b271 100644 --- a/pandas/tests/indexes/timedeltas/test_construction.py +++ b/pandas/tests/indexes/timedeltas/test_construction.py @@ -1,3 +1,5 @@ +import pytest + import numpy as np from datetime import timedelta @@ -48,13 +50,13 @@ def test_constructor_coverage(self): exp = timedelta_range('1 days', periods=10) tm.assert_index_equal(rng, exp) - self.assertRaises(ValueError, TimedeltaIndex, start='1 days', - periods='foo', freq='D') + pytest.raises(ValueError, TimedeltaIndex, start='1 days', + periods='foo', freq='D') - self.assertRaises(ValueError, TimedeltaIndex, start='1 days', - end='10 days') + pytest.raises(ValueError, TimedeltaIndex, start='1 days', + end='10 days') - self.assertRaises(ValueError, TimedeltaIndex, '1 days') + pytest.raises(ValueError, TimedeltaIndex, '1 days') # generator expression gen = (timedelta(i) for i in range(10)) @@ -72,10 +74,10 @@ def test_constructor_coverage(self): tm.assert_index_equal(from_ints, expected) # non-conforming freq - self.assertRaises(ValueError, TimedeltaIndex, - ['1 days', '2 days', '4 days'], freq='D') + pytest.raises(ValueError, TimedeltaIndex, + ['1 days', '2 days', '4 days'], freq='D') - self.assertRaises(ValueError, TimedeltaIndex, periods=10, freq='D') + pytest.raises(ValueError, TimedeltaIndex, periods=10, freq='D') def test_constructor_name(self): idx = TimedeltaIndex(start='1 days', periods=1, freq='D', name='TEST') diff --git a/pandas/tests/indexes/timedeltas/test_indexing.py b/pandas/tests/indexes/timedeltas/test_indexing.py index b98b5f097ce24..58b83dde5f402 100644 --- a/pandas/tests/indexes/timedeltas/test_indexing.py +++ b/pandas/tests/indexes/timedeltas/test_indexing.py @@ -1,3 +1,5 @@ +import pytest + from datetime import timedelta import pandas.util.testing as tm @@ -77,7 +79,7 @@ def test_delete(self): self.assertEqual(result.name, expected.name) self.assertEqual(result.freq, expected.freq) - with tm.assertRaises((IndexError, ValueError)): + with pytest.raises((IndexError, ValueError)): # either depeidnig on numpy version result = idx.delete(5) diff --git a/pandas/tests/indexes/timedeltas/test_ops.py b/pandas/tests/indexes/timedeltas/test_ops.py index 6d8d5f19b98e2..97c4a1df95963 100644 --- a/pandas/tests/indexes/timedeltas/test_ops.py +++ b/pandas/tests/indexes/timedeltas/test_ops.py @@ -1,3 +1,5 @@ +import pytest + import numpy as np from datetime import timedelta from distutils.version import LooseVersion @@ -276,7 +278,7 @@ def test_ops_compat(self): # multiply for offset in offsets: - self.assertRaises(TypeError, lambda: rng * offset) + pytest.raises(TypeError, lambda: rng * offset) # divide expected = Int64Index((np.arange(10) + 1) * 12, name='foo') @@ -298,7 +300,7 @@ def test_ops_compat(self): tm.assert_index_equal(result, expected) # don't allow division by NaT (make could in the future) - self.assertRaises(TypeError, lambda: rng / pd.NaT) + pytest.raises(TypeError, lambda: rng / pd.NaT) def test_subtraction_ops(self): @@ -308,10 +310,10 @@ def test_subtraction_ops(self): td = Timedelta('1 days') dt = Timestamp('20130101') - self.assertRaises(TypeError, lambda: tdi - dt) - self.assertRaises(TypeError, lambda: tdi - dti) - self.assertRaises(TypeError, lambda: td - dt) - self.assertRaises(TypeError, lambda: td - dti) + pytest.raises(TypeError, lambda: tdi - dt) + pytest.raises(TypeError, lambda: tdi - dti) + pytest.raises(TypeError, lambda: td - dt) + pytest.raises(TypeError, lambda: td - dti) result = dt - dti expected = TimedeltaIndex(['0 days', '-1 days', '-2 days'], name='bar') @@ -368,19 +370,19 @@ def _check(result, expected): _check(result, expected) # tz mismatches - self.assertRaises(TypeError, lambda: dt_tz - ts) - self.assertRaises(TypeError, lambda: dt_tz - dt) - self.assertRaises(TypeError, lambda: dt_tz - ts_tz2) - self.assertRaises(TypeError, lambda: dt - dt_tz) - self.assertRaises(TypeError, lambda: ts - dt_tz) - self.assertRaises(TypeError, lambda: ts_tz2 - ts) - self.assertRaises(TypeError, lambda: ts_tz2 - dt) - self.assertRaises(TypeError, lambda: ts_tz - ts_tz2) + pytest.raises(TypeError, lambda: dt_tz - ts) + pytest.raises(TypeError, lambda: dt_tz - dt) + pytest.raises(TypeError, lambda: dt_tz - ts_tz2) + pytest.raises(TypeError, lambda: dt - dt_tz) + pytest.raises(TypeError, lambda: ts - dt_tz) + pytest.raises(TypeError, lambda: ts_tz2 - ts) + pytest.raises(TypeError, lambda: ts_tz2 - dt) + pytest.raises(TypeError, lambda: ts_tz - ts_tz2) # with dti - self.assertRaises(TypeError, lambda: dti - ts_tz) - self.assertRaises(TypeError, lambda: dti_tz - ts) - self.assertRaises(TypeError, lambda: dti_tz - ts_tz2) + pytest.raises(TypeError, lambda: dti - ts_tz) + pytest.raises(TypeError, lambda: dti_tz - ts) + pytest.raises(TypeError, lambda: dti_tz - ts_tz2) result = dti_tz - dt_tz expected = TimedeltaIndex(['0 days', '1 days', '2 days']) @@ -437,10 +439,10 @@ def test_sub_period(self): for freq in [None, 'H']: idx = pd.TimedeltaIndex(['1 hours', '2 hours'], freq=freq) - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): idx - p - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): p - idx def test_addition_ops(self): @@ -468,14 +470,14 @@ def test_addition_ops(self): tm.assert_index_equal(result, expected) # unequal length - self.assertRaises(ValueError, lambda: tdi + dti[0:1]) - self.assertRaises(ValueError, lambda: tdi[0:1] + dti) + pytest.raises(ValueError, lambda: tdi + dti[0:1]) + pytest.raises(ValueError, lambda: tdi[0:1] + dti) # random indexes - self.assertRaises(TypeError, lambda: tdi + Int64Index([1, 2, 3])) + pytest.raises(TypeError, lambda: tdi + Int64Index([1, 2, 3])) # this is a union! - # self.assertRaises(TypeError, lambda : Int64Index([1,2,3]) + tdi) + # pytest.raises(TypeError, lambda : Int64Index([1,2,3]) + tdi) result = tdi + dti # name will be reset expected = DatetimeIndex(['20130102', pd.NaT, '20130105']) @@ -566,7 +568,7 @@ def test_unknown_attribute(self): tdi = pd.timedelta_range(start=0, periods=10, freq='1s') ts = pd.Series(np.random.normal(size=10), index=tdi) self.assertNotIn('foo', ts.__dict__.keys()) - self.assertRaises(AttributeError, lambda: ts.foo) + pytest.raises(AttributeError, lambda: ts.foo) def test_order(self): # GH 10295 @@ -886,11 +888,11 @@ def test_ops(self): self.assertEqual(abs(-td), Timedelta('10d')) # invalid multiply with another timedelta - self.assertRaises(TypeError, lambda: td * td) + pytest.raises(TypeError, lambda: td * td) # can't operate with integers - self.assertRaises(TypeError, lambda: td + 2) - self.assertRaises(TypeError, lambda: td - 2) + pytest.raises(TypeError, lambda: td + 2) + pytest.raises(TypeError, lambda: td - 2) def test_ops_offsets(self): td = Timedelta(10, unit='d') @@ -910,21 +912,21 @@ def test_ops_ndarray(self): tm.assert_numpy_array_equal(td + other, expected) if LooseVersion(np.__version__) >= '1.8': tm.assert_numpy_array_equal(other + td, expected) - self.assertRaises(TypeError, lambda: td + np.array([1])) - self.assertRaises(TypeError, lambda: np.array([1]) + td) + pytest.raises(TypeError, lambda: td + np.array([1])) + pytest.raises(TypeError, lambda: np.array([1]) + td) expected = pd.to_timedelta(['0 days']).values tm.assert_numpy_array_equal(td - other, expected) if LooseVersion(np.__version__) >= '1.8': tm.assert_numpy_array_equal(-other + td, expected) - self.assertRaises(TypeError, lambda: td - np.array([1])) - self.assertRaises(TypeError, lambda: np.array([1]) - td) + pytest.raises(TypeError, lambda: td - np.array([1])) + pytest.raises(TypeError, lambda: np.array([1]) - td) expected = pd.to_timedelta(['2 days']).values tm.assert_numpy_array_equal(td * np.array([2]), expected) tm.assert_numpy_array_equal(np.array([2]) * td, expected) - self.assertRaises(TypeError, lambda: td * other) - self.assertRaises(TypeError, lambda: other * td) + pytest.raises(TypeError, lambda: td * other) + pytest.raises(TypeError, lambda: other * td) tm.assert_numpy_array_equal(td / other, np.array([1], dtype=np.float64)) @@ -1002,16 +1004,16 @@ def test_ops_error_str(self): tdi = TimedeltaIndex(['1 day', '2 days']) for l, r in [(tdi, 'a'), ('a', tdi)]: - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): l + r - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): l > r - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): l == r - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): l != r def test_timedelta_ops(self): @@ -1058,7 +1060,7 @@ def test_timedelta_ops(self): # invalid ops for op in ['skew', 'kurt', 'sem', 'prod']: - self.assertRaises(TypeError, getattr(td, op)) + pytest.raises(TypeError, getattr(td, op)) # GH 10040 # make sure NaT is properly handled by median() diff --git a/pandas/tests/indexes/timedeltas/test_partial_slicing.py b/pandas/tests/indexes/timedeltas/test_partial_slicing.py index 0d46ee4172211..f7560c37e814b 100644 --- a/pandas/tests/indexes/timedeltas/test_partial_slicing.py +++ b/pandas/tests/indexes/timedeltas/test_partial_slicing.py @@ -1,3 +1,5 @@ +import pytest + import numpy as np import pandas.util.testing as tm @@ -27,7 +29,7 @@ def test_partial_slice(self): result = s['6 days, 23:11:12'] self.assertEqual(result, s.iloc[133]) - self.assertRaises(KeyError, s.__getitem__, '50 days') + pytest.raises(KeyError, s.__getitem__, '50 days') def test_partial_slice_high_reso(self): diff --git a/pandas/tests/indexes/timedeltas/test_timedelta.py b/pandas/tests/indexes/timedeltas/test_timedelta.py index 12c9e626ca9e1..9ca207876520f 100644 --- a/pandas/tests/indexes/timedeltas/test_timedelta.py +++ b/pandas/tests/indexes/timedeltas/test_timedelta.py @@ -1,3 +1,5 @@ +import pytest + import numpy as np from datetime import timedelta @@ -119,9 +121,9 @@ def test_numeric_compat(self): 5, dtype='float64') * (np.arange(5, dtype='float64') + 0.1))) # invalid - self.assertRaises(TypeError, lambda: idx * idx) - self.assertRaises(ValueError, lambda: idx * self._holder(np.arange(3))) - self.assertRaises(ValueError, lambda: idx * np.array([1, 2])) + pytest.raises(TypeError, lambda: idx * idx) + pytest.raises(ValueError, lambda: idx * self._holder(np.arange(3))) + pytest.raises(ValueError, lambda: idx * np.array([1, 2])) def test_pickle_compat_construction(self): pass @@ -238,7 +240,7 @@ def test_take_fill_value(self): with tm.assertRaisesRegexp(ValueError, msg): idx.take(np.array([1, 0, -5]), fill_value=True) - with tm.assertRaises(IndexError): + with pytest.raises(IndexError): idx.take(np.array([1, -5])) def test_isin(self): @@ -411,7 +413,7 @@ def test_comparisons_coverage(self): tm.assert_numpy_array_equal(result, exp) # raise TypeError for now - self.assertRaises(TypeError, rng.__lt__, rng[3].value) + pytest.raises(TypeError, rng.__lt__, rng[3].value) result = rng == list(rng) exp = rng == rng @@ -497,9 +499,9 @@ def test_fields(self): tm.assert_index_equal(rng.nanoseconds, Index([456, 456], dtype='int64')) - self.assertRaises(AttributeError, lambda: rng.hours) - self.assertRaises(AttributeError, lambda: rng.minutes) - self.assertRaises(AttributeError, lambda: rng.milliseconds) + pytest.raises(AttributeError, lambda: rng.hours) + pytest.raises(AttributeError, lambda: rng.minutes) + pytest.raises(AttributeError, lambda: rng.milliseconds) # with nat s = Series(rng) diff --git a/pandas/tests/indexes/timedeltas/test_tools.py b/pandas/tests/indexes/timedeltas/test_tools.py index ade9366c7e994..b4f6f33a6e06a 100644 --- a/pandas/tests/indexes/timedeltas/test_tools.py +++ b/pandas/tests/indexes/timedeltas/test_tools.py @@ -1,3 +1,5 @@ +import pytest + from datetime import time, timedelta import numpy as np @@ -115,15 +117,15 @@ def test_to_timedelta_invalid(self): ['foo'], errors='never') # these will error - self.assertRaises(ValueError, lambda: to_timedelta([1, 2], unit='foo')) - self.assertRaises(ValueError, lambda: to_timedelta(1, unit='foo')) + pytest.raises(ValueError, lambda: to_timedelta([1, 2], unit='foo')) + pytest.raises(ValueError, lambda: to_timedelta(1, unit='foo')) # time not supported ATM - self.assertRaises(ValueError, lambda: to_timedelta(time(second=1))) + pytest.raises(ValueError, lambda: to_timedelta(time(second=1))) self.assertTrue(to_timedelta( time(second=1), errors='coerce') is pd.NaT) - self.assertRaises(ValueError, lambda: to_timedelta(['foo', 'bar'])) + pytest.raises(ValueError, lambda: to_timedelta(['foo', 'bar'])) tm.assert_index_equal(TimedeltaIndex([pd.NaT, pd.NaT]), to_timedelta(['foo', 'bar'], errors='coerce')) @@ -199,4 +201,4 @@ def test_to_timedelta_on_nanoseconds(self): expected = Timedelta('990ns') self.assertEqual(result, expected) - self.assertRaises(TypeError, lambda: Timedelta(nanoseconds='abc')) + pytest.raises(TypeError, lambda: Timedelta(nanoseconds='abc')) diff --git a/pandas/tests/indexing/test_categorical.py b/pandas/tests/indexing/test_categorical.py index b8a24cb2dcb03..fd5557dfcb99c 100644 --- a/pandas/tests/indexing/test_categorical.py +++ b/pandas/tests/indexing/test_categorical.py @@ -1,5 +1,7 @@ # -*- coding: utf-8 -*- +import pytest + import pandas as pd import numpy as np from pandas import Series, DataFrame @@ -47,22 +49,22 @@ def test_loc_scalar(self): assert_frame_equal(df, expected) # value not in the categories - self.assertRaises(KeyError, lambda: df.loc['d']) + pytest.raises(KeyError, lambda: df.loc['d']) def f(): df.loc['d'] = 10 - self.assertRaises(TypeError, f) + pytest.raises(TypeError, f) def f(): df.loc['d', 'A'] = 10 - self.assertRaises(TypeError, f) + pytest.raises(TypeError, f) def f(): df.loc['d', 'C'] = 10 - self.assertRaises(TypeError, f) + pytest.raises(TypeError, f) def test_loc_listlike(self): @@ -78,7 +80,7 @@ def test_loc_listlike(self): assert_frame_equal(result, expected, check_index_type=True) # element in the categories but not in the values - self.assertRaises(KeyError, lambda: self.df2.loc['e']) + pytest.raises(KeyError, lambda: self.df2.loc['e']) # assign is ok df = self.df2.copy() @@ -97,7 +99,7 @@ def test_loc_listlike(self): assert_frame_equal(result, expected, check_index_type=True) # not all labels in the categories - self.assertRaises(KeyError, lambda: self.df2.loc[['a', 'd']]) + pytest.raises(KeyError, lambda: self.df2.loc[['a', 'd']]) def test_loc_listlike_dtypes(self): # GH 11586 @@ -324,22 +326,22 @@ def test_reindexing(self): assert_frame_equal(result, expected, check_index_type=True) # passed duplicate indexers are not allowed - self.assertRaises(ValueError, lambda: self.df2.reindex(['a', 'a'])) + pytest.raises(ValueError, lambda: self.df2.reindex(['a', 'a'])) # args NotImplemented ATM - self.assertRaises(NotImplementedError, - lambda: self.df2.reindex(['a'], method='ffill')) - self.assertRaises(NotImplementedError, - lambda: self.df2.reindex(['a'], level=1)) - self.assertRaises(NotImplementedError, - lambda: self.df2.reindex(['a'], limit=2)) + pytest.raises(NotImplementedError, + lambda: self.df2.reindex(['a'], method='ffill')) + pytest.raises(NotImplementedError, + lambda: self.df2.reindex(['a'], level=1)) + pytest.raises(NotImplementedError, + lambda: self.df2.reindex(['a'], limit=2)) def test_loc_slice(self): # slicing # not implemented ATM # GH9748 - self.assertRaises(TypeError, lambda: self.df.loc[1:5]) + pytest.raises(TypeError, lambda: self.df.loc[1:5]) # result = df.loc[1:5] # expected = df.iloc[[1,2,3,4]] @@ -387,8 +389,8 @@ def test_boolean_selection(self): # categories=[3, 2, 1], # ordered=False, # name=u'B') - self.assertRaises(TypeError, lambda: df4[df4.index < 2]) - self.assertRaises(TypeError, lambda: df4[df4.index > 1]) + pytest.raises(TypeError, lambda: df4[df4.index < 2]) + pytest.raises(TypeError, lambda: df4[df4.index > 1]) def test_indexing_with_category(self): diff --git a/pandas/tests/indexing/test_chaining_and_caching.py b/pandas/tests/indexing/test_chaining_and_caching.py index f2c3a49bc377c..c0d83c580d1d1 100644 --- a/pandas/tests/indexing/test_chaining_and_caching.py +++ b/pandas/tests/indexing/test_chaining_and_caching.py @@ -326,7 +326,7 @@ def test_setting_with_copy_bug(self): def f(): df[['c']][mask] = df[['b']][mask] - self.assertRaises(com.SettingWithCopyError, f) + pytest.raises(com.SettingWithCopyError, f) # invalid warning as we are returning a new object # GH 8730 diff --git a/pandas/tests/indexing/test_coercion.py b/pandas/tests/indexing/test_coercion.py index 7216c05657102..15a56d97eeaec 100644 --- a/pandas/tests/indexing/test_coercion.py +++ b/pandas/tests/indexing/test_coercion.py @@ -268,7 +268,7 @@ def test_setitem_index_object(self): # object + int -> IndexError, regarded as location temp = obj.copy() - with tm.assertRaises(IndexError): + with pytest.raises(IndexError): temp[5] = 5 # object + float -> object @@ -300,7 +300,7 @@ def test_setitem_index_float64(self): # float + int -> int temp = obj.copy() # TODO_GH12747 The result must be float - with tm.assertRaises(IndexError): + with pytest.raises(IndexError): temp[5] = 5 # float + float -> float @@ -803,7 +803,7 @@ def test_where_index_datetime64(self): # datetime64 + datetime64 -> datetime64 # must support scalar msg = "cannot coerce a Timestamp with a tz on a naive Block" - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): obj.where(cond, pd.Timestamp('2012-01-01')) values = pd.Index([pd.Timestamp('2012-01-01'), diff --git a/pandas/tests/indexing/test_datetime.py b/pandas/tests/indexing/test_datetime.py index eeef41ad6dbb2..9b224ba796268 100644 --- a/pandas/tests/indexing/test_datetime.py +++ b/pandas/tests/indexing/test_datetime.py @@ -1,3 +1,5 @@ +import pytest + import numpy as np import pandas as pd from pandas import date_range, Index, DataFrame, Series, Timestamp @@ -57,7 +59,7 @@ def test_indexing_with_datetime_tz(self): def f(): df.loc[df.new_col == 'new', 'time'] = v - self.assertRaises(ValueError, f) + pytest.raises(ValueError, f) v = df.loc[df.new_col == 'new', 'time'] + pd.Timedelta('1s') df.loc[df.new_col == 'new', 'time'] = v diff --git a/pandas/tests/indexing/test_floats.py b/pandas/tests/indexing/test_floats.py index 4c177cade88bf..ef64c6e0475e9 100644 --- a/pandas/tests/indexing/test_floats.py +++ b/pandas/tests/indexing/test_floats.py @@ -1,5 +1,7 @@ # -*- coding: utf-8 -*- +import pytest + from warnings import catch_warnings import numpy as np from pandas import Series, DataFrame, Index, Float64Index @@ -52,7 +54,7 @@ def f(): def f(): s.iloc[3.0] = 0 - self.assertRaises(TypeError, f) + pytest.raises(TypeError, f) def test_scalar_non_numeric(self): @@ -87,7 +89,7 @@ def f(): error = KeyError else: error = TypeError - self.assertRaises(error, f) + pytest.raises(error, f) # label based can be a TypeError or KeyError def f(): @@ -97,7 +99,7 @@ def f(): error = KeyError else: error = TypeError - self.assertRaises(error, f) + pytest.raises(error, f) # contains self.assertFalse(3.0 in s) @@ -105,7 +107,7 @@ def f(): # setting with a float fails with iloc def f(): s.iloc[3.0] = 0 - self.assertRaises(TypeError, f) + pytest.raises(TypeError, f) # setting with an indexer if s.index.inferred_type in ['categorical']: @@ -121,7 +123,7 @@ def f(): # s2 = s.copy() # def f(): # idxr(s2)[3.0] = 0 - # self.assertRaises(TypeError, f) + # pytest.raises(TypeError, f) pass else: @@ -140,7 +142,7 @@ def f(): # fallsback to position selection, series only s = Series(np.arange(len(i)), index=i) s[3] - self.assertRaises(TypeError, lambda: s[3.0]) + pytest.raises(TypeError, lambda: s[3.0]) def test_scalar_with_mixed(self): @@ -157,9 +159,9 @@ def f(): with catch_warnings(record=True): idxr(s2)[1.0] - self.assertRaises(TypeError, f) + pytest.raises(TypeError, f) - self.assertRaises(KeyError, lambda: s2.loc[1.0]) + pytest.raises(KeyError, lambda: s2.loc[1.0]) result = s2.loc['b'] expected = 2 @@ -174,14 +176,14 @@ def f(): with catch_warnings(record=True): idxr(s3)[1.0] - self.assertRaises(TypeError, f) + pytest.raises(TypeError, f) result = idxr(s3)[1] expected = 2 self.assertEqual(result, expected) - self.assertRaises(TypeError, lambda: s3.iloc[1.0]) - self.assertRaises(KeyError, lambda: s3.loc[1.0]) + pytest.raises(TypeError, lambda: s3.iloc[1.0]) + pytest.raises(KeyError, lambda: s3.loc[1.0]) result = s3.loc[1.5] expected = 3 @@ -270,7 +272,7 @@ def f(): # random integer is a KeyError with catch_warnings(record=True): - self.assertRaises(KeyError, lambda: idxr(s)[3.5]) + pytest.raises(KeyError, lambda: idxr(s)[3.5]) # contains self.assertTrue(3.0 in s) @@ -284,11 +286,11 @@ def f(): self.check(result, s, 3, False) # iloc raises with a float - self.assertRaises(TypeError, lambda: s.iloc[3.0]) + pytest.raises(TypeError, lambda: s.iloc[3.0]) def g(): s2.iloc[3.0] = 0 - self.assertRaises(TypeError, g) + pytest.raises(TypeError, g) def test_slice_non_numeric(self): @@ -311,7 +313,7 @@ def test_slice_non_numeric(self): def f(): s.iloc[l] - self.assertRaises(TypeError, f) + pytest.raises(TypeError, f) for idxr in [lambda x: x.ix, lambda x: x.loc, @@ -321,7 +323,7 @@ def f(): def f(): with catch_warnings(record=True): idxr(s)[l] - self.assertRaises(TypeError, f) + pytest.raises(TypeError, f) # setitem for l in [slice(3.0, 4), @@ -330,7 +332,7 @@ def f(): def f(): s.iloc[l] = 0 - self.assertRaises(TypeError, f) + pytest.raises(TypeError, f) for idxr in [lambda x: x.ix, lambda x: x.loc, @@ -339,7 +341,7 @@ def f(): def f(): with catch_warnings(record=True): idxr(s)[l] = 0 - self.assertRaises(TypeError, f) + pytest.raises(TypeError, f) def test_slice_integer(self): @@ -378,7 +380,7 @@ def test_slice_integer(self): def f(): s[l] - self.assertRaises(TypeError, f) + pytest.raises(TypeError, f) # getitem out-of-bounds for l in [slice(-6, 6), @@ -402,7 +404,7 @@ def f(): def f(): s[slice(-6.0, 6.0)] - self.assertRaises(TypeError, f) + pytest.raises(TypeError, f) # getitem odd floats for l, res1 in [(slice(2.5, 4), slice(3, 5)), @@ -425,7 +427,7 @@ def f(): def f(): s[l] - self.assertRaises(TypeError, f) + pytest.raises(TypeError, f) # setitem for l in [slice(3.0, 4), @@ -444,7 +446,7 @@ def f(): def f(): s[l] = 0 - self.assertRaises(TypeError, f) + pytest.raises(TypeError, f) def test_integer_positional_indexing(self): """ make sure that we are raising on positional indexing @@ -466,7 +468,7 @@ def test_integer_positional_indexing(self): def f(): idxr(s)[l] - self.assertRaises(TypeError, f) + pytest.raises(TypeError, f) def test_slice_integer_frame_getitem(self): @@ -493,7 +495,7 @@ def test_slice_integer_frame_getitem(self): def f(): s[l] - self.assertRaises(TypeError, f) + pytest.raises(TypeError, f) # getitem out-of-bounds for l in [slice(-10, 10), @@ -506,7 +508,7 @@ def f(): def f(): s[slice(-10.0, 10.0)] - self.assertRaises(TypeError, f) + pytest.raises(TypeError, f) # getitem odd floats for l, res in [(slice(0.5, 1), slice(1, 2)), @@ -521,7 +523,7 @@ def f(): def f(): s[l] - self.assertRaises(TypeError, f) + pytest.raises(TypeError, f) # setitem for l in [slice(3.0, 4), @@ -538,7 +540,7 @@ def f(): def f(): s[l] = 0 - self.assertRaises(TypeError, f) + pytest.raises(TypeError, f) def test_slice_float(self): @@ -610,9 +612,9 @@ def test_floating_misc(self): # value not found (and no fallbacking at all) # scalar integers - self.assertRaises(KeyError, lambda: s.loc[4]) - self.assertRaises(KeyError, lambda: s.loc[4]) - self.assertRaises(KeyError, lambda: s[4]) + pytest.raises(KeyError, lambda: s.loc[4]) + pytest.raises(KeyError, lambda: s.loc[4]) + pytest.raises(KeyError, lambda: s[4]) # fancy floats/integers create the correct entry (as nan) # fancy tests diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index 517194835ca73..174026a00fcdd 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -1,5 +1,7 @@ """ test positional based indexing with iloc """ +import pytest + from warnings import catch_warnings import numpy as np @@ -23,26 +25,26 @@ def test_iloc_exceeds_bounds(self): with tm.assertRaisesRegexp(IndexError, 'positional indexers are out-of-bounds'): df.iloc[:, [0, 1, 2, 3, 4, 5]] - self.assertRaises(IndexError, lambda: df.iloc[[1, 30]]) - self.assertRaises(IndexError, lambda: df.iloc[[1, -30]]) - self.assertRaises(IndexError, lambda: df.iloc[[100]]) + pytest.raises(IndexError, lambda: df.iloc[[1, 30]]) + pytest.raises(IndexError, lambda: df.iloc[[1, -30]]) + pytest.raises(IndexError, lambda: df.iloc[[100]]) s = df['A'] - self.assertRaises(IndexError, lambda: s.iloc[[100]]) - self.assertRaises(IndexError, lambda: s.iloc[[-100]]) + pytest.raises(IndexError, lambda: s.iloc[[100]]) + pytest.raises(IndexError, lambda: s.iloc[[-100]]) # still raise on a single indexer msg = 'single positional indexer is out-of-bounds' with tm.assertRaisesRegexp(IndexError, msg): df.iloc[30] - self.assertRaises(IndexError, lambda: df.iloc[-30]) + pytest.raises(IndexError, lambda: df.iloc[-30]) # GH10779 # single positive/negative indexer exceeding Series bounds should raise # an IndexError with tm.assertRaisesRegexp(IndexError, msg): s.iloc[30] - self.assertRaises(IndexError, lambda: s.iloc[-30]) + pytest.raises(IndexError, lambda: s.iloc[-30]) # slices are ok result = df.iloc[:, 4:10] # 0 < start < len < stop @@ -101,8 +103,8 @@ def check(result, expected): check(dfl.iloc[:, 1:3], dfl.iloc[:, [1]]) check(dfl.iloc[4:6], dfl.iloc[[4]]) - self.assertRaises(IndexError, lambda: dfl.iloc[[4, 5, 6]]) - self.assertRaises(IndexError, lambda: dfl.iloc[:, 4]) + pytest.raises(IndexError, lambda: dfl.iloc[[4, 5, 6]]) + pytest.raises(IndexError, lambda: dfl.iloc[:, 4]) def test_iloc_getitem_int(self): @@ -385,10 +387,10 @@ def test_iloc_getitem_labelled_frame(self): self.assertEqual(result, exp) # out-of-bounds exception - self.assertRaises(IndexError, df.iloc.__getitem__, tuple([10, 5])) + pytest.raises(IndexError, df.iloc.__getitem__, tuple([10, 5])) # trying to use a label - self.assertRaises(ValueError, df.iloc.__getitem__, tuple(['j', 'D'])) + pytest.raises(ValueError, df.iloc.__getitem__, tuple(['j', 'D'])) def test_iloc_getitem_doc_issue(self): @@ -488,10 +490,10 @@ def test_iloc_mask(self): # GH 3631, iloc with a mask (of a series) should raise df = DataFrame(lrange(5), list('ABCDE'), columns=['a']) mask = (df.a % 2 == 0) - self.assertRaises(ValueError, df.iloc.__getitem__, tuple([mask])) + pytest.raises(ValueError, df.iloc.__getitem__, tuple([mask])) mask.index = lrange(len(mask)) - self.assertRaises(NotImplementedError, df.iloc.__getitem__, - tuple([mask])) + pytest.raises(NotImplementedError, df.iloc.__getitem__, + tuple([mask])) # ndarray ok result = df.iloc[np.array([True] * len(mask), dtype=bool)] diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index ff3b1cc3dbc89..fe934e7b2a7e0 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -3,6 +3,8 @@ """ test fancy indexing & misc """ +import pytest + from warnings import catch_warnings from datetime import datetime @@ -40,7 +42,7 @@ def f(): df.loc[df.index[2:5], 'bar'] = np.array([2.33j, 1.23 + 0.1j, 2.2, 1.0]) - self.assertRaises(ValueError, f) + pytest.raises(ValueError, f) # valid df.loc[df.index[2:6], 'bar'] = np.array([2.33j, 1.23 + 0.1j, @@ -59,7 +61,7 @@ def f(): def f(): df[2:5] = np.arange(1, 4) * 1j - self.assertRaises(ValueError, f) + pytest.raises(ValueError, f) def test_setitem_dtype_upcast(self): @@ -240,7 +242,7 @@ def test_multitype_list_index_access(self): df = pd.DataFrame(np.random.random((10, 5)), columns=["a"] + [20, 21, 22, 23]) - with self.assertRaises(KeyError): + with pytest.raises(KeyError): df[[22, 26, -8]] self.assertEqual(df[21].shape[0], df.shape[0]) @@ -429,18 +431,18 @@ def test_string_slice(self): df = pd.DataFrame([1], pd.Index([pd.Timestamp('2011-01-01')], dtype=object)) self.assertTrue(df.index.is_all_dates) - with tm.assertRaises(KeyError): + with pytest.raises(KeyError): df['2011'] - with tm.assertRaises(KeyError): + with pytest.raises(KeyError): df.loc['2011', 0] df = pd.DataFrame() self.assertFalse(df.index.is_all_dates) - with tm.assertRaises(KeyError): + with pytest.raises(KeyError): df['2011'] - with tm.assertRaises(KeyError): + with pytest.raises(KeyError): df.loc['2011', 0] def test_mi_access(self): diff --git a/pandas/tests/indexing/test_ix.py b/pandas/tests/indexing/test_ix.py index d8b43abd1b537..c3ce21343b8d1 100644 --- a/pandas/tests/indexing/test_ix.py +++ b/pandas/tests/indexing/test_ix.py @@ -1,5 +1,7 @@ """ test indexing with ix """ +import pytest + from warnings import catch_warnings import numpy as np @@ -96,7 +98,7 @@ def compare(result, expected): with catch_warnings(record=True): df.ix[key] - self.assertRaises(TypeError, lambda: df.loc[key]) + pytest.raises(TypeError, lambda: df.loc[key]) df = pd.DataFrame(np.random.randn(5, 4), columns=list('ABCD'), index=pd.date_range('2012-01-01', periods=5)) @@ -116,7 +118,7 @@ def compare(result, expected): with catch_warnings(record=True): expected = df.ix[key] except KeyError: - self.assertRaises(KeyError, lambda: df.loc[key]) + pytest.raises(KeyError, lambda: df.loc[key]) continue result = df.loc[key] @@ -298,14 +300,14 @@ def test_ix_setitem_out_of_bounds_axis_0(self): np.random.randn(2, 5), index=["row%s" % i for i in range(2)], columns=["col%s" % i for i in range(5)]) with catch_warnings(record=True): - self.assertRaises(ValueError, df.ix.__setitem__, (2, 0), 100) + pytest.raises(ValueError, df.ix.__setitem__, (2, 0), 100) def test_ix_setitem_out_of_bounds_axis_1(self): df = pd.DataFrame( np.random.randn(5, 2), index=["row%s" % i for i in range(5)], columns=["col%s" % i for i in range(2)]) with catch_warnings(record=True): - self.assertRaises(ValueError, df.ix.__setitem__, (0, 2), 100) + pytest.raises(ValueError, df.ix.__setitem__, (0, 2), 100) def test_ix_empty_list_indexer_is_ok(self): with catch_warnings(record=True): diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index af9d3ffdf6671..b2a5e6147cd28 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -1,6 +1,8 @@ """ test label based indexing with loc """ import itertools +import pytest + from warnings import catch_warnings import numpy as np @@ -231,8 +233,8 @@ def test_loc_to_fail(self): columns=['e', 'f', 'g']) # raise a KeyError? - self.assertRaises(KeyError, df.loc.__getitem__, - tuple([[1, 2], [1, 2]])) + pytest.raises(KeyError, df.loc.__getitem__, + tuple([[1, 2], [1, 2]])) # GH 7496 # loc should not fallback @@ -241,10 +243,10 @@ def test_loc_to_fail(self): s.loc[1] = 1 s.loc['a'] = 2 - self.assertRaises(KeyError, lambda: s.loc[-1]) - self.assertRaises(KeyError, lambda: s.loc[[-1, -2]]) + pytest.raises(KeyError, lambda: s.loc[-1]) + pytest.raises(KeyError, lambda: s.loc[[-1, -2]]) - self.assertRaises(KeyError, lambda: s.loc[['4']]) + pytest.raises(KeyError, lambda: s.loc[['4']]) s.loc[-1] = 3 result = s.loc[[-1, -2]] @@ -252,14 +254,14 @@ def test_loc_to_fail(self): tm.assert_series_equal(result, expected) s['a'] = 2 - self.assertRaises(KeyError, lambda: s.loc[[-2]]) + pytest.raises(KeyError, lambda: s.loc[[-2]]) del s['a'] def f(): s.loc[[-2]] = 0 - self.assertRaises(KeyError, f) + pytest.raises(KeyError, f) # inconsistency between .loc[values] and .loc[values,:] # GH 7999 @@ -268,12 +270,12 @@ def f(): def f(): df.loc[[3], :] - self.assertRaises(KeyError, f) + pytest.raises(KeyError, f) def f(): df.loc[[3]] - self.assertRaises(KeyError, f) + pytest.raises(KeyError, f) def test_loc_getitem_label_slice(self): @@ -540,11 +542,11 @@ def test_loc_non_unique(self): # these are going to raise becuase the we are non monotonic df = DataFrame({'A': [1, 2, 3, 4, 5, 6], 'B': [3, 4, 5, 6, 7, 8]}, index=[0, 1, 0, 1, 2, 3]) - self.assertRaises(KeyError, df.loc.__getitem__, - tuple([slice(1, None)])) - self.assertRaises(KeyError, df.loc.__getitem__, - tuple([slice(0, None)])) - self.assertRaises(KeyError, df.loc.__getitem__, tuple([slice(1, 2)])) + pytest.raises(KeyError, df.loc.__getitem__, + tuple([slice(1, None)])) + pytest.raises(KeyError, df.loc.__getitem__, + tuple([slice(0, None)])) + pytest.raises(KeyError, df.loc.__getitem__, tuple([slice(1, 2)])) # monotonic are ok df = DataFrame({'A': [1, 2, 3, 4, 5, 6], diff --git a/pandas/tests/indexing/test_multiindex.py b/pandas/tests/indexing/test_multiindex.py index c39e25a1f1d74..18cb9a3a063b9 100644 --- a/pandas/tests/indexing/test_multiindex.py +++ b/pandas/tests/indexing/test_multiindex.py @@ -446,7 +446,7 @@ def test_multiindex_setitem(self): def f(): df.loc['bar'] *= 2 - self.assertRaises(TypeError, f) + pytest.raises(TypeError, f) # from SO # http://stackoverflow.com/questions/24572040/pandas-access-the-level-of-multiindex-for-inplace-operation @@ -492,12 +492,12 @@ def test_getitem_duplicates_multiindex(self): def f(): df.val['A'] - self.assertRaises(KeyError, f) + pytest.raises(KeyError, f) def f(): df.val['X'] - self.assertRaises(KeyError, f) + pytest.raises(KeyError, f) # A is treated as a special Timestamp index = MultiIndex(levels=[['A', 'B', 'C'], @@ -514,7 +514,7 @@ def f(): def f(): df.val['X'] - self.assertRaises(KeyError, f) + pytest.raises(KeyError, f) # GH 7866 # multi-index slicing with missing indexers @@ -534,7 +534,7 @@ def f(): tm.assert_series_equal(result, expected) # not any values found - self.assertRaises(KeyError, lambda: s.loc[['D']]) + pytest.raises(KeyError, lambda: s.loc[['D']]) # empty ok result = s.loc[[]] @@ -611,13 +611,13 @@ def f(): with catch_warnings(record=True): df.ix[4, 'c'] = [0, 1, 2, 3] - self.assertRaises(ValueError, f) + pytest.raises(ValueError, f) def f(): with catch_warnings(record=True): df.ix[4, 'c'] = [0] - self.assertRaises(ValueError, f) + pytest.raises(ValueError, f) # groupby example NUM_ROWS = 100 @@ -786,12 +786,12 @@ def test_per_axis_per_level_getitem(self): def f(): df.loc[(slice(None), np.array([True, False])), :] - self.assertRaises(ValueError, f) + pytest.raises(ValueError, f) # ambiguous cases # these can be multiply interpreted (e.g. in this case # as df.loc[slice(None),[1]] as well - self.assertRaises(KeyError, lambda: df.loc[slice(None), [1]]) + pytest.raises(KeyError, lambda: df.loc[slice(None), [1]]) result = df.loc[(slice(None), [1]), :] expected = df.iloc[[0, 3]] @@ -989,7 +989,7 @@ def test_per_axis_per_level_doc_examples(self): def f(): df.loc['A1', (slice(None), 'foo')] - self.assertRaises(UnsortedIndexError, f) + pytest.raises(UnsortedIndexError, f) df = df.sort_index(axis=1) # slicing @@ -1038,17 +1038,17 @@ def test_loc_axis_arguments(self): def f(): df.loc(axis=-1)[:, :, ['C1', 'C3']] - self.assertRaises(ValueError, f) + pytest.raises(ValueError, f) def f(): df.loc(axis=2)[:, :, ['C1', 'C3']] - self.assertRaises(ValueError, f) + pytest.raises(ValueError, f) def f(): df.loc(axis='foo')[:, :, ['C1', 'C3']] - self.assertRaises(ValueError, f) + pytest.raises(ValueError, f) def test_per_axis_per_level_setitem(self): @@ -1153,13 +1153,13 @@ def f(): df.loc[(slice(None), 1), (slice(None), ['foo'])] = np.array( [[100], [100, 100]], dtype='int64') - self.assertRaises(ValueError, f) + pytest.raises(ValueError, f) def f(): df.loc[(slice(None), 1), (slice(None), ['foo'])] = np.array( [100, 100, 100, 100], dtype='int64') - self.assertRaises(ValueError, f) + pytest.raises(ValueError, f) # with an alignable rhs df = df_orig.copy() diff --git a/pandas/tests/indexing/test_panel.py b/pandas/tests/indexing/test_panel.py index 8daef6155212c..8aa35a163babc 100644 --- a/pandas/tests/indexing/test_panel.py +++ b/pandas/tests/indexing/test_panel.py @@ -65,7 +65,7 @@ def test_iloc_getitem_panel(self): def f(): p.iloc[0, [True, True], [0, 1, 2]] - self.assertRaises(IndexError, f) + pytest.raises(IndexError, f) # trying to use a label with pytest.raises(ValueError): @@ -89,12 +89,12 @@ def f(): def f(): p.iloc[0, [True, True, True], [0, 1, 2]] - self.assertRaises(IndexError, f) + pytest.raises(IndexError, f) def f(): p.iloc[0, [True, True, True], [2]] - self.assertRaises(IndexError, f) + pytest.raises(IndexError, f) def test_iloc_panel_issue(self): @@ -211,7 +211,7 @@ def f(): wp.loc[['Item1', 'Item2'], :, ['A', 'B']] = wp2.loc[ ['Item1', 'Item2'], :, ['A', 'B']] - self.assertRaises(NotImplementedError, f) + pytest.raises(NotImplementedError, f) # to_assign = wp2.loc[['Item1', 'Item2'], :, ['A', 'B']] # wp.loc[['Item1', 'Item2'], :, ['A', 'B']] = to_assign diff --git a/pandas/tests/indexing/test_partial.py b/pandas/tests/indexing/test_partial.py index f51f050c57624..80d2d5729c610 100644 --- a/pandas/tests/indexing/test_partial.py +++ b/pandas/tests/indexing/test_partial.py @@ -3,6 +3,9 @@ TOD: these should be split among the indexer tests """ + +import pytest + from warnings import catch_warnings import numpy as np @@ -46,12 +49,12 @@ def test_partial_setting(self): def f(): s.iloc[3] = 5. - self.assertRaises(IndexError, f) + pytest.raises(IndexError, f) def f(): s.iat[3] = 5. - self.assertRaises(IndexError, f) + pytest.raises(IndexError, f) # ## frame ## @@ -64,12 +67,12 @@ def f(): def f(): df.iloc[4, 2] = 5. - self.assertRaises(IndexError, f) + pytest.raises(IndexError, f) def f(): df.iat[4, 2] = 5. - self.assertRaises(IndexError, f) + pytest.raises(IndexError, f) # row setting where it exists expected = DataFrame(dict({'A': [0, 4, 4], 'B': [1, 5, 5]})) @@ -204,7 +207,7 @@ def test_partial_setting_mixed_dtype(self): def f(): df.loc[0] = [1, 2, 3] - self.assertRaises(ValueError, f) + pytest.raises(ValueError, f) # TODO: #15657, these are left as object and not coerced df = DataFrame(columns=['A', 'B']) @@ -237,7 +240,7 @@ def test_series_partial_set(self): tm.assert_series_equal(result, expected, check_index_type=True) # raises as nothing in in the index - self.assertRaises(KeyError, lambda: ser.loc[[3, 3, 3]]) + pytest.raises(KeyError, lambda: ser.loc[[3, 3, 3]]) expected = Series([0.2, 0.2, np.nan], index=[2, 2, 3]) result = ser.loc[[2, 2, 3]] @@ -301,7 +304,7 @@ def test_series_partial_set_with_name(self): tm.assert_series_equal(result, expected, check_index_type=True) # raises as nothing in in the index - self.assertRaises(KeyError, lambda: ser.loc[[3, 3, 3]]) + pytest.raises(KeyError, lambda: ser.loc[[3, 3, 3]]) exp_idx = Index([2, 2, 3], dtype='int64', name='idx') expected = Series([0.2, 0.2, np.nan], index=exp_idx, name='s') @@ -361,25 +364,25 @@ def f(): with catch_warnings(record=True): df.loc[100.0, :] = df.ix[0] - self.assertRaises(TypeError, f) + pytest.raises(TypeError, f) def f(): with catch_warnings(record=True): df.loc[100, :] = df.ix[0] - self.assertRaises(TypeError, f) + pytest.raises(TypeError, f) def f(): with catch_warnings(record=True): df.ix[100.0, :] = df.ix[0] - self.assertRaises(TypeError, f) + pytest.raises(TypeError, f) def f(): with catch_warnings(record=True): df.ix[100, :] = df.ix[0] - self.assertRaises(ValueError, f) + pytest.raises(ValueError, f) # allow object conversion here df = orig.copy() @@ -425,17 +428,17 @@ def test_partial_set_empty_frame(self): def f(): df.loc[1] = 1 - self.assertRaises(ValueError, f) + pytest.raises(ValueError, f) def f(): df.loc[1] = Series([1], index=['foo']) - self.assertRaises(ValueError, f) + pytest.raises(ValueError, f) def f(): df.loc[:, 1] = 1 - self.assertRaises(ValueError, f) + pytest.raises(ValueError, f) # these work as they don't really change # anything but the index diff --git a/pandas/tests/indexing/test_scalar.py b/pandas/tests/indexing/test_scalar.py index 0eeaec3e00fa6..3522974c18061 100644 --- a/pandas/tests/indexing/test_scalar.py +++ b/pandas/tests/indexing/test_scalar.py @@ -1,5 +1,7 @@ """ test scalar indexing, including at and iat """ +import pytest + import numpy as np from pandas import (Series, DataFrame, Timestamp, @@ -30,7 +32,7 @@ def _check(f, func, values=False): for f in [d['labels'], d['ts'], d['floats']]: if f is not None: - self.assertRaises(ValueError, self.check_values, f, 'iat') + pytest.raises(ValueError, self.check_values, f, 'iat') # at for f in [d['ints'], d['uints'], d['labels'], @@ -57,7 +59,7 @@ def _check(f, func, values=False): for f in [d['labels'], d['ts'], d['floats']]: if f is not None: - self.assertRaises(ValueError, _check, f, 'iat') + pytest.raises(ValueError, _check, f, 'iat') # at for f in [d['ints'], d['uints'], d['labels'], @@ -107,8 +109,8 @@ def test_imethods_with_dups(self): result = s.iat[2] self.assertEqual(result, 2) - self.assertRaises(IndexError, lambda: s.iat[10]) - self.assertRaises(IndexError, lambda: s.iat[-10]) + pytest.raises(IndexError, lambda: s.iat[10]) + pytest.raises(IndexError, lambda: s.iat[-10]) result = s.iloc[[2, 3]] expected = Series([2, 3], [2, 2], dtype='int64') @@ -129,22 +131,22 @@ def test_at_to_fail(self): s = Series([1, 2, 3], index=list('abc')) result = s.at['a'] self.assertEqual(result, 1) - self.assertRaises(ValueError, lambda: s.at[0]) + pytest.raises(ValueError, lambda: s.at[0]) df = DataFrame({'A': [1, 2, 3]}, index=list('abc')) result = df.at['a', 'A'] self.assertEqual(result, 1) - self.assertRaises(ValueError, lambda: df.at['a', 0]) + pytest.raises(ValueError, lambda: df.at['a', 0]) s = Series([1, 2, 3], index=[3, 2, 1]) result = s.at[1] self.assertEqual(result, 3) - self.assertRaises(ValueError, lambda: s.at['a']) + pytest.raises(ValueError, lambda: s.at['a']) df = DataFrame({0: [1, 2, 3]}, index=[3, 2, 1]) result = df.at[1, 0] self.assertEqual(result, 3) - self.assertRaises(ValueError, lambda: df.at['a', 0]) + pytest.raises(ValueError, lambda: df.at['a', 0]) # GH 13822, incorrect error string with non-unique columns when missing # column is accessed diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index 1d471163790d5..3784840fbfd28 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -1177,7 +1177,7 @@ def test_to_string_specified_header(self): self.assertEqual(df_s, expected) - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): df.to_string(header=['X']) def test_to_string_no_index(self): @@ -2563,7 +2563,7 @@ def test_format_percentiles(): expected = ['0%', '50%', '2.0%', '50%', '66.67%', '99.99%'] assert result == expected - tm.assertRaises(ValueError, fmt.format_percentiles, [0.1, np.nan, 0.5]) - tm.assertRaises(ValueError, fmt.format_percentiles, [-0.001, 0.1, 0.5]) - tm.assertRaises(ValueError, fmt.format_percentiles, [2, 0.1, 0.5]) - tm.assertRaises(ValueError, fmt.format_percentiles, [0.1, 0.5, 'a']) + pytest.raises(ValueError, fmt.format_percentiles, [0.1, np.nan, 0.5]) + pytest.raises(ValueError, fmt.format_percentiles, [-0.001, 0.1, 0.5]) + pytest.raises(ValueError, fmt.format_percentiles, [2, 0.1, 0.5]) + pytest.raises(ValueError, fmt.format_percentiles, [0.1, 0.5, 'a']) diff --git a/pandas/tests/io/formats/test_style.py b/pandas/tests/io/formats/test_style.py index 4fb91c40aba3a..3b8bbf239d941 100644 --- a/pandas/tests/io/formats/test_style.py +++ b/pandas/tests/io/formats/test_style.py @@ -34,7 +34,7 @@ def h(x, foo='bar'): ] def test_init_non_pandas(self): - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): Styler([1, 2, 3]) def test_init_series(self): @@ -358,10 +358,10 @@ def test_highlight_null(self, null_color='red'): def test_nonunique_raises(self): df = pd.DataFrame([[1, 2]], columns=['A', 'A']) - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): df.style - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): Styler(df) def test_caption(self): @@ -494,9 +494,9 @@ def test_display_format(self): def test_display_format_raises(self): df = pd.DataFrame(np.random.randn(2, 2)) - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): df.style.format(5) - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): df.style.format(True) def test_display_subset(self): @@ -550,33 +550,33 @@ def test_display_dict(self): def test_bad_apply_shape(self): df = pd.DataFrame([[1, 2], [3, 4]]) - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): df.style._apply(lambda x: 'x', subset=pd.IndexSlice[[0, 1], :]) - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): df.style._apply(lambda x: [''], subset=pd.IndexSlice[[0, 1], :]) - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): df.style._apply(lambda x: ['', '', '', '']) - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): df.style._apply(lambda x: ['', '', ''], subset=1) - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): df.style._apply(lambda x: ['', '', ''], axis=1) def test_apply_bad_return(self): def f(x): return '' df = pd.DataFrame([[1, 2], [3, 4]]) - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): df.style._apply(f, axis=None) def test_apply_bad_labels(self): def f(x): return pd.DataFrame(index=[1, 2], columns=['a', 'b']) df = pd.DataFrame([[1, 2], [3, 4]]) - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): df.style._apply(f, axis=None) def test_get_level_lengths(self): diff --git a/pandas/tests/io/formats/test_to_latex.py b/pandas/tests/io/formats/test_to_latex.py index 29ead83f3bcd9..2542deb0cedf1 100644 --- a/pandas/tests/io/formats/test_to_latex.py +++ b/pandas/tests/io/formats/test_to_latex.py @@ -470,7 +470,7 @@ def test_to_latex_specified_header(self): assert withoutescape_result == withoutescape_expected - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): df.to_latex(header=['A']) def test_to_latex_decimal(self, frame): diff --git a/pandas/tests/io/json/test_json_table_schema.py b/pandas/tests/io/json/test_json_table_schema.py index 2a785375acaea..cbb302ad39dd6 100644 --- a/pandas/tests/io/json/test_json_table_schema.py +++ b/pandas/tests/io/json/test_json_table_schema.py @@ -327,7 +327,7 @@ def test_to_json_categorical_index(self): self.assertEqual(result, expected) def test_date_format_raises(self): - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): self.df.to_json(orient='table', date_format='epoch') # others work diff --git a/pandas/tests/io/json/test_normalize.py b/pandas/tests/io/json/test_normalize.py index ee79859e9b71a..42456d2630886 100644 --- a/pandas/tests/io/json/test_normalize.py +++ b/pandas/tests/io/json/test_normalize.py @@ -300,10 +300,10 @@ def test_json_normalize_errors(self): self.assertEqual(j.fillna('').to_dict(), expected) - self.assertRaises(KeyError, - json_normalize, data=i['Trades'], - record_path=[['general', 'stocks']], - meta=[['general', 'tradeid'], - ['general', 'trade_version']], - errors='raise' - ) + pytest.raises(KeyError, + json_normalize, data=i['Trades'], + record_path=[['general', 'stocks']], + meta=[['general', 'tradeid'], + ['general', 'trade_version']], + errors='raise' + ) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 99b0f0493117c..b152f7bb089de 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -96,8 +96,8 @@ def test_frame_non_unique_index(self): df = DataFrame([['a', 'b'], ['c', 'd']], index=[1, 1], columns=['x', 'y']) - self.assertRaises(ValueError, df.to_json, orient='index') - self.assertRaises(ValueError, df.to_json, orient='columns') + pytest.raises(ValueError, df.to_json, orient='index') + pytest.raises(ValueError, df.to_json, orient='columns') assert_frame_equal(df, read_json(df.to_json(orient='split'), orient='split')) @@ -111,9 +111,9 @@ def test_frame_non_unique_columns(self): df = DataFrame([['a', 'b'], ['c', 'd']], index=[1, 2], columns=['x', 'x']) - self.assertRaises(ValueError, df.to_json, orient='index') - self.assertRaises(ValueError, df.to_json, orient='columns') - self.assertRaises(ValueError, df.to_json, orient='records') + pytest.raises(ValueError, df.to_json, orient='index') + pytest.raises(ValueError, df.to_json, orient='columns') + pytest.raises(ValueError, df.to_json, orient='records') assert_frame_equal(df, read_json(df.to_json(orient='split'), orient='split', dtype=False)) @@ -151,12 +151,12 @@ def _check_orient(df, orient, dtype=None, numpy=False, # if we are not unique, then check that we are raising ValueError # for the appropriate orients if not df.index.is_unique and orient in ['index', 'columns']: - self.assertRaises( + pytest.raises( ValueError, lambda: df.to_json(orient=orient)) return if (not df.columns.is_unique and orient in ['index', 'columns', 'records']): - self.assertRaises( + pytest.raises( ValueError, lambda: df.to_json(orient=orient)) return @@ -322,21 +322,21 @@ def _check_all_orients(df, dtype=None, convert_axes=True, _check_orient(df.transpose().transpose(), "index", dtype=False) def test_frame_from_json_bad_data(self): - self.assertRaises(ValueError, read_json, StringIO('{"key":b:a:d}')) + pytest.raises(ValueError, read_json, StringIO('{"key":b:a:d}')) # too few indices json = StringIO('{"columns":["A","B"],' '"index":["2","3"],' '"data":[[1.0,"1"],[2.0,"2"],[null,"3"]]}') - self.assertRaises(ValueError, read_json, json, - orient="split") + pytest.raises(ValueError, read_json, json, + orient="split") # too many columns json = StringIO('{"columns":["A","B","C"],' '"index":["1","2","3"],' '"data":[[1.0,"1"],[2.0,"2"],[null,"3"]]}') - self.assertRaises(AssertionError, read_json, json, - orient="split") + pytest.raises(AssertionError, read_json, json, + orient="split") # bad key json = StringIO('{"badkey":["A","B"],' @@ -410,7 +410,7 @@ def test_frame_to_json_float_precision(self): def test_frame_to_json_except(self): df = DataFrame([1, 2, 3]) - self.assertRaises(ValueError, df.to_json, orient="garbage") + pytest.raises(ValueError, df.to_json, orient="garbage") def test_frame_empty(self): df = DataFrame(columns=['jim', 'joe']) @@ -514,7 +514,7 @@ def test_blocks_compat_GH9037(self): def test_series_non_unique_index(self): s = Series(['a', 'b'], index=[1, 1]) - self.assertRaises(ValueError, s.to_json, orient='index') + pytest.raises(ValueError, s.to_json, orient='index') assert_series_equal(s, read_json(s.to_json(orient='split'), orient='split', typ='series')) @@ -587,7 +587,7 @@ def _check_all_orients(series, dtype=None, check_index_type=True): def test_series_to_json_except(self): s = Series([1, 2, 3]) - self.assertRaises(ValueError, s.to_json, orient="garbage") + pytest.raises(ValueError, s.to_json, orient="garbage") def test_series_from_json_precise_float(self): s = Series([4.56, 4.56, 4.56]) @@ -695,8 +695,8 @@ def test_w_date(date, date_unit=None): test_w_date('20130101 20:43:42.123456', date_unit='us') test_w_date('20130101 20:43:42.123456789', date_unit='ns') - self.assertRaises(ValueError, df.to_json, date_format='iso', - date_unit='foo') + pytest.raises(ValueError, df.to_json, date_format='iso', + date_unit='foo') def test_date_format_series(self): def test_w_date(date, date_unit=None): @@ -717,8 +717,8 @@ def test_w_date(date, date_unit=None): test_w_date('20130101 20:43:42.123456789', date_unit='ns') ts = Series(Timestamp('20130101 20:43:42.123'), index=self.ts.index) - self.assertRaises(ValueError, ts.to_json, date_format='iso', - date_unit='foo') + pytest.raises(ValueError, ts.to_json, date_format='iso', + date_unit='foo') def test_date_unit(self): df = self.tsframe.copy() @@ -884,12 +884,12 @@ def test_default_handler_numpy_unsupported_dtype(self): def test_default_handler_raises(self): def my_handler_raises(obj): raise TypeError("raisin") - self.assertRaises(TypeError, - DataFrame({'a': [1, 2, object()]}).to_json, - default_handler=my_handler_raises) - self.assertRaises(TypeError, - DataFrame({'a': [1, 2, complex(4, -5)]}).to_json, - default_handler=my_handler_raises) + pytest.raises(TypeError, + DataFrame({'a': [1, 2, object()]}).to_json, + default_handler=my_handler_raises) + pytest.raises(TypeError, + DataFrame({'a': [1, 2, complex(4, -5)]}).to_json, + default_handler=my_handler_raises) def test_categorical(self): # GH4377 df.to_json segfaults with non-ndarray blocks diff --git a/pandas/tests/io/json/test_ujson.py b/pandas/tests/io/json/test_ujson.py index 545165be37178..f0ccce5830a65 100644 --- a/pandas/tests/io/json/test_ujson.py +++ b/pandas/tests/io/json/test_ujson.py @@ -226,14 +226,14 @@ def test_doublePrecisionTest(self): def test_invalidDoublePrecision(self): input = 30.12345678901234567890 - self.assertRaises(ValueError, ujson.encode, input, double_precision=20) - self.assertRaises(ValueError, ujson.encode, input, double_precision=-1) + pytest.raises(ValueError, ujson.encode, input, double_precision=20) + pytest.raises(ValueError, ujson.encode, input, double_precision=-1) # will throw typeError - self.assertRaises(TypeError, ujson.encode, input, double_precision='9') + pytest.raises(TypeError, ujson.encode, input, double_precision='9') # will throw typeError - self.assertRaises(TypeError, ujson.encode, - input, double_precision=None) + pytest.raises(TypeError, ujson.encode, + input, double_precision=None) def test_encodeStringConversion2(self): input = "A string \\ / \b \f \n \r \t" @@ -446,7 +446,7 @@ def test_datetime_units(self): roundtrip = ujson.decode(ujson.encode(val, date_unit='ns')) self.assertEqual(roundtrip, stamp.value) - self.assertRaises(ValueError, ujson.encode, val, date_unit='foo') + pytest.raises(ValueError, ujson.encode, val, date_unit='foo') def test_encodeToUTF8(self): input = "\xe6\x97\xa5\xd1\x88" @@ -912,7 +912,7 @@ def recursive_attr(self): def __str__(self): return str(self.val) - self.assertRaises(OverflowError, ujson.encode, _TestObject("foo")) + pytest.raises(OverflowError, ujson.encode, _TestObject("foo")) self.assertEqual('"foo"', ujson.encode(_TestObject("foo"), default_handler=str)) @@ -1101,7 +1101,7 @@ def testOdArray(self): def will_raise(): ujson.encode(np.array(1)) - self.assertRaises(TypeError, will_raise) + pytest.raises(TypeError, will_raise) def testArrayNumpyExcept(self): diff --git a/pandas/tests/io/msgpack/test_except.py b/pandas/tests/io/msgpack/test_except.py index 4bcef3607bfa4..6246e0777daee 100644 --- a/pandas/tests/io/msgpack/test_except.py +++ b/pandas/tests/io/msgpack/test_except.py @@ -1,6 +1,7 @@ # coding: utf-8 -import unittest +import pytest + from pandas.io.msgpack import packb, unpackb @@ -8,26 +9,26 @@ class DummyException(Exception): pass -class TestExceptions(unittest.TestCase): +class TestExceptions(object): def test_raise_on_find_unsupported_value(self): import datetime - self.assertRaises(TypeError, packb, datetime.datetime.now()) + pytest.raises(TypeError, packb, datetime.datetime.now()) def test_raise_from_object_hook(self): def hook(obj): raise DummyException - self.assertRaises(DummyException, unpackb, packb({}), object_hook=hook) - self.assertRaises(DummyException, unpackb, packb({'fizz': 'buzz'}), - object_hook=hook) - self.assertRaises(DummyException, unpackb, packb({'fizz': 'buzz'}), - object_pairs_hook=hook) - self.assertRaises(DummyException, unpackb, - packb({'fizz': {'buzz': 'spam'}}), object_hook=hook) - self.assertRaises(DummyException, unpackb, - packb({'fizz': {'buzz': 'spam'}}), - object_pairs_hook=hook) + pytest.raises(DummyException, unpackb, packb({}), object_hook=hook) + pytest.raises(DummyException, unpackb, packb({'fizz': 'buzz'}), + object_hook=hook) + pytest.raises(DummyException, unpackb, packb({'fizz': 'buzz'}), + object_pairs_hook=hook) + pytest.raises(DummyException, unpackb, + packb({'fizz': {'buzz': 'spam'}}), object_hook=hook) + pytest.raises(DummyException, unpackb, + packb({'fizz': {'buzz': 'spam'}}), + object_pairs_hook=hook) def test_invalidvalue(self): - self.assertRaises(ValueError, unpackb, b'\xd9\x97#DL_') + pytest.raises(ValueError, unpackb, b'\xd9\x97#DL_') diff --git a/pandas/tests/io/msgpack/test_limits.py b/pandas/tests/io/msgpack/test_limits.py index a908ee3547634..e906d14a2b5a8 100644 --- a/pandas/tests/io/msgpack/test_limits.py +++ b/pandas/tests/io/msgpack/test_limits.py @@ -1,6 +1,9 @@ # coding: utf-8 from __future__ import (absolute_import, division, print_function, unicode_literals) + +import pytest + import pandas.util.testing as tm from pandas.io.msgpack import packb, unpackb, Packer, Unpacker, ExtType @@ -11,22 +14,22 @@ class TestLimits(tm.TestCase): def test_integer(self): x = -(2 ** 63) assert unpackb(packb(x)) == x - self.assertRaises((OverflowError, ValueError), packb, x - 1) + pytest.raises((OverflowError, ValueError), packb, x - 1) x = 2 ** 64 - 1 assert unpackb(packb(x)) == x - self.assertRaises((OverflowError, ValueError), packb, x + 1) + pytest.raises((OverflowError, ValueError), packb, x + 1) def test_array_header(self): packer = Packer() packer.pack_array_header(2 ** 32 - 1) - self.assertRaises((OverflowError, ValueError), - packer.pack_array_header, 2 ** 32) + pytest.raises((OverflowError, ValueError), + packer.pack_array_header, 2 ** 32) def test_map_header(self): packer = Packer() packer.pack_map_header(2 ** 32 - 1) - self.assertRaises((OverflowError, ValueError), - packer.pack_array_header, 2 ** 32) + pytest.raises((OverflowError, ValueError), + packer.pack_array_header, 2 ** 32) def test_max_str_len(self): d = 'x' * 3 @@ -38,7 +41,7 @@ def test_max_str_len(self): unpacker = Unpacker(max_str_len=2, encoding='utf-8') unpacker.feed(packed) - self.assertRaises(ValueError, unpacker.unpack) + pytest.raises(ValueError, unpacker.unpack) def test_max_bin_len(self): d = b'x' * 3 @@ -50,7 +53,7 @@ def test_max_bin_len(self): unpacker = Unpacker(max_bin_len=2) unpacker.feed(packed) - self.assertRaises(ValueError, unpacker.unpack) + pytest.raises(ValueError, unpacker.unpack) def test_max_array_len(self): d = [1, 2, 3] @@ -62,7 +65,7 @@ def test_max_array_len(self): unpacker = Unpacker(max_array_len=2) unpacker.feed(packed) - self.assertRaises(ValueError, unpacker.unpack) + pytest.raises(ValueError, unpacker.unpack) def test_max_map_len(self): d = {1: 2, 3: 4, 5: 6} @@ -74,7 +77,7 @@ def test_max_map_len(self): unpacker = Unpacker(max_map_len=2) unpacker.feed(packed) - self.assertRaises(ValueError, unpacker.unpack) + pytest.raises(ValueError, unpacker.unpack) def test_max_ext_len(self): d = ExtType(42, b"abc") @@ -86,4 +89,4 @@ def test_max_ext_len(self): unpacker = Unpacker(max_ext_len=2) unpacker.feed(packed) - self.assertRaises(ValueError, unpacker.unpack) + pytest.raises(ValueError, unpacker.unpack) diff --git a/pandas/tests/io/msgpack/test_obj.py b/pandas/tests/io/msgpack/test_obj.py index b067dacb84494..4a6b89907954e 100644 --- a/pandas/tests/io/msgpack/test_obj.py +++ b/pandas/tests/io/msgpack/test_obj.py @@ -1,6 +1,7 @@ # coding: utf-8 -import unittest +import pytest + from pandas.io.msgpack import packb, unpackb @@ -8,7 +9,7 @@ class DecodeError(Exception): pass -class TestObj(unittest.TestCase): +class TestObj(object): def _arr_to_str(self, arr): return ''.join(str(c) for c in arr) @@ -46,15 +47,15 @@ def test_decode_pairs_hook(self): assert unpacked[1] == prod_sum def test_only_one_obj_hook(self): - self.assertRaises(TypeError, unpackb, b'', object_hook=lambda x: x, - object_pairs_hook=lambda x: x) + pytest.raises(TypeError, unpackb, b'', object_hook=lambda x: x, + object_pairs_hook=lambda x: x) def test_bad_hook(self): def f(): packed = packb([3, 1 + 2j], default=lambda o: o) unpacked = unpackb(packed, use_list=1) # noqa - self.assertRaises(TypeError, f) + pytest.raises(TypeError, f) def test_array_hook(self): packed = packb([1, 2, 3]) @@ -66,11 +67,11 @@ def f(): packed = packb({1: {'__complex__': True, 'real': 1, 'imag': 2}}) unpackb(packed, object_hook=self.bad_complex_decoder) - self.assertRaises(DecodeError, f) + pytest.raises(DecodeError, f) def test_an_exception_in_objecthook2(self): def f(): packed = packb({1: [{'__complex__': True, 'real': 1, 'imag': 2}]}) unpackb(packed, list_hook=self.bad_complex_decoder, use_list=1) - self.assertRaises(DecodeError, f) + pytest.raises(DecodeError, f) diff --git a/pandas/tests/io/msgpack/test_pack.py b/pandas/tests/io/msgpack/test_pack.py index 6f9a271cbd326..c0b3e1b24674f 100644 --- a/pandas/tests/io/msgpack/test_pack.py +++ b/pandas/tests/io/msgpack/test_pack.py @@ -1,14 +1,15 @@ # coding: utf-8 -import unittest +import pytest import struct + from pandas import compat from pandas.compat import u, OrderedDict from pandas.io.msgpack import packb, unpackb, Unpacker, Packer -class TestPack(unittest.TestCase): +class TestPack(object): def check(self, data, use_list=False): re = unpackb(packb(data), use_list=use_list) @@ -64,12 +65,12 @@ def testIgnoreUnicodeErrors(self): assert re == "abcdef" def testStrictUnicodeUnpack(self): - self.assertRaises(UnicodeDecodeError, unpackb, packb(b'abc\xeddef'), - encoding='utf-8', use_list=1) + pytest.raises(UnicodeDecodeError, unpackb, packb(b'abc\xeddef'), + encoding='utf-8', use_list=1) def testStrictUnicodePack(self): - self.assertRaises(UnicodeEncodeError, packb, compat.u("abc\xeddef"), - encoding='ascii', unicode_errors='strict') + pytest.raises(UnicodeEncodeError, packb, compat.u("abc\xeddef"), + encoding='ascii', unicode_errors='strict') def testIgnoreErrorsPack(self): re = unpackb( @@ -79,7 +80,7 @@ def testIgnoreErrorsPack(self): assert re == compat.u("abcdef") def testNoEncoding(self): - self.assertRaises(TypeError, packb, compat.u("abc"), encoding=None) + pytest.raises(TypeError, packb, compat.u("abc"), encoding=None) def testDecodeBinary(self): re = unpackb(packb("abc"), encoding=None, use_list=1) diff --git a/pandas/tests/io/msgpack/test_sequnpack.py b/pandas/tests/io/msgpack/test_sequnpack.py index c9c979c4e0e44..1178176c2c557 100644 --- a/pandas/tests/io/msgpack/test_sequnpack.py +++ b/pandas/tests/io/msgpack/test_sequnpack.py @@ -1,26 +1,26 @@ # coding: utf-8 -import unittest +import pytest from pandas import compat from pandas.io.msgpack import Unpacker, BufferFull from pandas.io.msgpack import OutOfData -class TestPack(unittest.TestCase): +class TestPack(object): def test_partialdata(self): unpacker = Unpacker() unpacker.feed(b'\xa5') - self.assertRaises(StopIteration, next, iter(unpacker)) + pytest.raises(StopIteration, next, iter(unpacker)) unpacker.feed(b'h') - self.assertRaises(StopIteration, next, iter(unpacker)) + pytest.raises(StopIteration, next, iter(unpacker)) unpacker.feed(b'a') - self.assertRaises(StopIteration, next, iter(unpacker)) + pytest.raises(StopIteration, next, iter(unpacker)) unpacker.feed(b'l') - self.assertRaises(StopIteration, next, iter(unpacker)) + pytest.raises(StopIteration, next, iter(unpacker)) unpacker.feed(b'l') - self.assertRaises(StopIteration, next, iter(unpacker)) + pytest.raises(StopIteration, next, iter(unpacker)) unpacker.feed(b'o') assert next(iter(unpacker)) == b'hallo' @@ -33,7 +33,7 @@ def test_foobar(self): assert unpacker.unpack() == ord(b'b') assert unpacker.unpack() == ord(b'a') assert unpacker.unpack() == ord(b'r') - self.assertRaises(OutOfData, unpacker.unpack) + pytest.raises(OutOfData, unpacker.unpack) unpacker.feed(b'foo') unpacker.feed(b'bar') @@ -53,13 +53,13 @@ def test_foobar_skip(self): unpacker.skip() assert unpacker.unpack() == ord(b'a') unpacker.skip() - self.assertRaises(OutOfData, unpacker.unpack) + pytest.raises(OutOfData, unpacker.unpack) def test_maxbuffersize(self): - self.assertRaises(ValueError, Unpacker, read_size=5, max_buffer_size=3) + pytest.raises(ValueError, Unpacker, read_size=5, max_buffer_size=3) unpacker = Unpacker(read_size=3, max_buffer_size=3, use_list=1) unpacker.feed(b'fo') - self.assertRaises(BufferFull, unpacker.feed, b'ob') + pytest.raises(BufferFull, unpacker.feed, b'ob') unpacker.feed(b'o') assert ord('f') == next(unpacker) unpacker.feed(b'b') diff --git a/pandas/tests/io/msgpack/test_unpack.py b/pandas/tests/io/msgpack/test_unpack.py index 24a8e885d19d6..158094d111b54 100644 --- a/pandas/tests/io/msgpack/test_unpack.py +++ b/pandas/tests/io/msgpack/test_unpack.py @@ -15,7 +15,7 @@ def test_unpack_array_header_from_file(self): assert unpacker.unpack() == 2 assert unpacker.unpack() == 3 assert unpacker.unpack() == 4 - self.assertRaises(OutOfData, unpacker.unpack) + pytest.raises(OutOfData, unpacker.unpack) def test_unpacker_hook_refcnt(self): if not hasattr(sys, 'getrefcount'): diff --git a/pandas/tests/io/parser/c_parser_only.py b/pandas/tests/io/parser/c_parser_only.py index 837b7a7922d75..6d3dc8f637012 100644 --- a/pandas/tests/io/parser/c_parser_only.py +++ b/pandas/tests/io/parser/c_parser_only.py @@ -108,22 +108,22 @@ def test_unsupported_dtype(self): df.to_csv(path) # valid but we don't support it (date) - self.assertRaises(TypeError, self.read_csv, path, - dtype={'A': 'datetime64', 'B': 'float64'}, - index_col=0) - self.assertRaises(TypeError, self.read_csv, path, - dtype={'A': 'datetime64', 'B': 'float64'}, - index_col=0, parse_dates=['B']) + pytest.raises(TypeError, self.read_csv, path, + dtype={'A': 'datetime64', 'B': 'float64'}, + index_col=0) + pytest.raises(TypeError, self.read_csv, path, + dtype={'A': 'datetime64', 'B': 'float64'}, + index_col=0, parse_dates=['B']) # valid but we don't support it - self.assertRaises(TypeError, self.read_csv, path, - dtype={'A': 'timedelta64', 'B': 'float64'}, - index_col=0) + pytest.raises(TypeError, self.read_csv, path, + dtype={'A': 'timedelta64', 'B': 'float64'}, + index_col=0) # valid but unsupported - fixed width unicode string - self.assertRaises(TypeError, self.read_csv, path, - dtype={'A': 'U8'}, - index_col=0) + pytest.raises(TypeError, self.read_csv, path, + dtype={'A': 'U8'}, + index_col=0) def test_precise_conversion(self): # see gh-8002 diff --git a/pandas/tests/io/parser/common.py b/pandas/tests/io/parser/common.py index ca60327d7916c..a0c50bb3a573d 100644 --- a/pandas/tests/io/parser/common.py +++ b/pandas/tests/io/parser/common.py @@ -202,8 +202,8 @@ def test_quoting(self): Klosterdruckerei\tKlosterdruckerei (1609-1805)\t"Furststiftische Hofdruckerei, (1609-1805)\tGaller, Alois Klosterdruckerei\tKlosterdruckerei (1609-1805)\tHochfurstliche Buchhandlung """ # noqa - self.assertRaises(Exception, self.read_table, StringIO(bad_line_small), - sep='\t') + pytest.raises(Exception, self.read_table, StringIO(bad_line_small), + sep='\t') good_line_small = bad_line_small + '"' df = self.read_table(StringIO(good_line_small), sep='\t') @@ -290,7 +290,7 @@ def test_read_table_wrong_num_columns(self): 6,7,8,9,10,11,12 11,12,13,14,15,16 """ - self.assertRaises(ValueError, self.read_csv, StringIO(data)) + pytest.raises(ValueError, self.read_csv, StringIO(data)) def test_read_duplicate_index_explicit(self): data = """index,A,B,C,D @@ -440,7 +440,7 @@ def test_read_chunksize_and_nrows(self): tm.assert_frame_equal(reader.get_chunk(size=2), df.iloc[:2]) tm.assert_frame_equal(reader.get_chunk(size=4), df.iloc[2:5]) - with tm.assertRaises(StopIteration): + with pytest.raises(StopIteration): reader.get_chunk(size=3) def test_read_chunksize_named(self): @@ -545,7 +545,7 @@ def test_iterator(self): # test bad parameter (skipfooter) reader = self.read_csv(StringIO(self.data1), index_col=0, iterator=True, skipfooter=1) - self.assertRaises(ValueError, reader.read, 3) + pytest.raises(ValueError, reader.read, 3) def test_pass_names_with_index(self): lines = self.data1.split('\n') @@ -685,7 +685,7 @@ def test_nonexistent_path(self): # gh-2428: pls no segfault # gh-14086: raise more helpful FileNotFoundError path = '%s.csv' % tm.rands(10) - self.assertRaises(compat.FileNotFoundError, self.read_csv, path) + pytest.raises(compat.FileNotFoundError, self.read_csv, path) def test_missing_trailing_delimiters(self): data = """A,B,C,D @@ -874,8 +874,8 @@ def test_catch_too_many_names(self): 4,,6 7,8,9 10,11,12\n""" - tm.assertRaises(ValueError, self.read_csv, StringIO(data), - header=0, names=['a', 'b', 'c', 'd']) + pytest.raises(ValueError, self.read_csv, StringIO(data), + header=0, names=['a', 'b', 'c', 'd']) def test_ignore_leading_whitespace(self): # see gh-3374, gh-6607 @@ -959,8 +959,8 @@ def test_int64_overflow(self): # to cast to either int64 or uint64 will result in # an OverflowError being raised. for conv in (np.int64, np.uint64): - self.assertRaises(OverflowError, self.read_csv, - StringIO(data), converters={'ID': conv}) + pytest.raises(OverflowError, self.read_csv, + StringIO(data), converters={'ID': conv}) # These numbers fall right inside the int64-uint64 range, # so they should be parsed as string. @@ -1080,18 +1080,18 @@ def test_eof_states(self): # ESCAPED_CHAR data = "a,b,c\n4,5,6\n\\" - self.assertRaises(Exception, self.read_csv, - StringIO(data), escapechar='\\') + pytest.raises(Exception, self.read_csv, + StringIO(data), escapechar='\\') # ESCAPE_IN_QUOTED_FIELD data = 'a,b,c\n4,5,6\n"\\' - self.assertRaises(Exception, self.read_csv, - StringIO(data), escapechar='\\') + pytest.raises(Exception, self.read_csv, + StringIO(data), escapechar='\\') # IN_QUOTED_FIELD data = 'a,b,c\n4,5,6\n"' - self.assertRaises(Exception, self.read_csv, - StringIO(data), escapechar='\\') + pytest.raises(Exception, self.read_csv, + StringIO(data), escapechar='\\') def test_uneven_lines_with_usecols(self): # See gh-12203 @@ -1312,8 +1312,8 @@ def test_iteration_open_handle(self): break if self.engine == 'c': - tm.assertRaises(Exception, self.read_table, - f, squeeze=True, header=None) + pytest.raises(Exception, self.read_table, + f, squeeze=True, header=None) else: result = self.read_table(f, squeeze=True, header=None) expected = Series(['DDD', 'EEE', 'FFF', 'GGG'], name=0) @@ -1403,11 +1403,11 @@ def test_inf_parsing(self): def test_raise_on_no_columns(self): # single newline data = "\n" - self.assertRaises(EmptyDataError, self.read_csv, StringIO(data)) + pytest.raises(EmptyDataError, self.read_csv, StringIO(data)) # test with more than a single newline data = "\n\n\n" - self.assertRaises(EmptyDataError, self.read_csv, StringIO(data)) + pytest.raises(EmptyDataError, self.read_csv, StringIO(data)) def test_compact_ints_use_unsigned(self): # see gh-13323 @@ -1695,10 +1695,10 @@ def test_skip_bad_lines(self): # see gh-15925 data = 'a\n1\n1,2,3\n4\n5,6,7' - with tm.assertRaises(ParserError): + with pytest.raises(ParserError): self.read_csv(StringIO(data)) - with tm.assertRaises(ParserError): + with pytest.raises(ParserError): self.read_csv(StringIO(data), error_bad_lines=True) expected = DataFrame({'a': [1, 4]}) diff --git a/pandas/tests/io/parser/compression.py b/pandas/tests/io/parser/compression.py index bdcd10fc64aa5..c7e9401665cdf 100644 --- a/pandas/tests/io/parser/compression.py +++ b/pandas/tests/io/parser/compression.py @@ -60,8 +60,8 @@ def test_zip(self): with tm.ensure_clean() as path: with open(path, 'wb') as f: - self.assertRaises(zipfile.BadZipfile, self.read_csv, - f, compression='zip') + pytest.raises(zipfile.BadZipfile, self.read_csv, + f, compression='zip') def test_gzip(self): try: @@ -110,8 +110,8 @@ def test_bz2(self): result = self.read_csv(path, compression='bz2') tm.assert_frame_equal(result, expected) - self.assertRaises(ValueError, self.read_csv, - path, compression='bz3') + pytest.raises(ValueError, self.read_csv, + path, compression='bz3') with open(path, 'rb') as fin: result = self.read_csv(fin, compression='bz2') diff --git a/pandas/tests/io/parser/dtypes.py b/pandas/tests/io/parser/dtypes.py index 50c9a1bc724fc..6ef2bd8f869dd 100644 --- a/pandas/tests/io/parser/dtypes.py +++ b/pandas/tests/io/parser/dtypes.py @@ -5,6 +5,8 @@ for all of the parsers defined in parsers.py """ +import pytest + import numpy as np import pandas as pd import pandas.util.testing as tm @@ -40,9 +42,9 @@ def test_passing_dtype(self): tm.assert_frame_equal(result, df) # invalid dtype - self.assertRaises(TypeError, self.read_csv, path, - dtype={'A': 'foo', 'B': 'float64'}, - index_col=0) + pytest.raises(TypeError, self.read_csv, path, + dtype={'A': 'foo', 'B': 'float64'}, + index_col=0) # see gh-12048: empty frame actual = self.read_csv(StringIO('A,B'), dtype=str) @@ -213,9 +215,9 @@ def test_raise_on_passed_int_dtype_with_nas(self): 2001,106380451,10 2001,,11 2001,106380451,67""" - self.assertRaises(ValueError, self.read_csv, StringIO(data), - sep=",", skipinitialspace=True, - dtype={'DOY': np.int64}) + pytest.raises(ValueError, self.read_csv, StringIO(data), + sep=",", skipinitialspace=True, + dtype={'DOY': np.int64}) def test_dtype_with_converter(self): data = """a,b diff --git a/pandas/tests/io/parser/header.py b/pandas/tests/io/parser/header.py index a40f14f2a24f8..2f0ca5b311aef 100644 --- a/pandas/tests/io/parser/header.py +++ b/pandas/tests/io/parser/header.py @@ -5,6 +5,8 @@ during parsing for all of the parsers defined in parsers.py """ +import pytest + import numpy as np import pandas.util.testing as tm @@ -30,9 +32,9 @@ def test_bool_header_arg(self): a b""" for arg in [True, False]: - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): self.read_csv(StringIO(data), header=arg) - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): self.read_table(StringIO(data), header=arg) def test_no_header_prefix(self): @@ -117,27 +119,27 @@ def test_header_multi_index(self): # no as_recarray with tm.assert_produces_warning( FutureWarning, check_stacklevel=False): - self.assertRaises(ValueError, self.read_csv, - StringIO(data), header=[0, 1, 2, 3], - index_col=[0, 1], as_recarray=True, - tupleize_cols=False) - - # names - self.assertRaises(ValueError, self.read_csv, + pytest.raises(ValueError, self.read_csv, StringIO(data), header=[0, 1, 2, 3], - index_col=[0, 1], names=['foo', 'bar'], + index_col=[0, 1], as_recarray=True, tupleize_cols=False) + # names + pytest.raises(ValueError, self.read_csv, + StringIO(data), header=[0, 1, 2, 3], + index_col=[0, 1], names=['foo', 'bar'], + tupleize_cols=False) + # usecols - self.assertRaises(ValueError, self.read_csv, - StringIO(data), header=[0, 1, 2, 3], - index_col=[0, 1], usecols=['foo', 'bar'], - tupleize_cols=False) + pytest.raises(ValueError, self.read_csv, + StringIO(data), header=[0, 1, 2, 3], + index_col=[0, 1], usecols=['foo', 'bar'], + tupleize_cols=False) # non-numeric index_col - self.assertRaises(ValueError, self.read_csv, - StringIO(data), header=[0, 1, 2, 3], - index_col=['foo', 'bar'], tupleize_cols=False) + pytest.raises(ValueError, self.read_csv, + StringIO(data), header=[0, 1, 2, 3], + index_col=['foo', 'bar'], tupleize_cols=False) def test_header_multiindex_common_format(self): diff --git a/pandas/tests/io/parser/index_col.py b/pandas/tests/io/parser/index_col.py index 6eb15eb3e043c..168f6eda46ed1 100644 --- a/pandas/tests/io/parser/index_col.py +++ b/pandas/tests/io/parser/index_col.py @@ -6,6 +6,8 @@ the parsers defined in parsers.py """ +import pytest + import pandas.util.testing as tm from pandas import DataFrame, Index, MultiIndex @@ -29,8 +31,8 @@ def test_index_col_named(self): xp = self.read_csv(StringIO(data), header=0).set_index('ID') tm.assert_frame_equal(rs, xp) - self.assertRaises(ValueError, self.read_csv, StringIO(no_header), - index_col='ID') + pytest.raises(ValueError, self.read_csv, StringIO(no_header), + index_col='ID') data = """\ 1,2,3,4,hello @@ -51,8 +53,8 @@ def test_index_col_named(self): def test_index_col_is_true(self): # see gh-9798 - self.assertRaises(ValueError, self.read_csv, - StringIO(self.ts_data), index_col=True) + pytest.raises(ValueError, self.read_csv, + StringIO(self.ts_data), index_col=True) def test_infer_index_col(self): data = """A,B,C diff --git a/pandas/tests/io/parser/parse_dates.py b/pandas/tests/io/parser/parse_dates.py index a61a1fb39985f..6dd92783eac60 100644 --- a/pandas/tests/io/parser/parse_dates.py +++ b/pandas/tests/io/parser/parse_dates.py @@ -347,11 +347,11 @@ def test_parse_dates_custom_euroformat(self): tm.assert_frame_equal(df, expected) parser = lambda d: parse_date(d, day_first=True) - self.assertRaises(TypeError, self.read_csv, - StringIO(text), skiprows=[0], - names=['time', 'Q', 'NTU'], index_col=0, - parse_dates=True, date_parser=parser, - na_values=['NA']) + pytest.raises(TypeError, self.read_csv, + StringIO(text), skiprows=[0], + names=['time', 'Q', 'NTU'], index_col=0, + parse_dates=True, date_parser=parser, + na_values=['NA']) def test_parse_tz_aware(self): # See gh-1693 diff --git a/pandas/tests/io/parser/python_parser_only.py b/pandas/tests/io/parser/python_parser_only.py index 55e72ca51790e..c5fa64d067ee6 100644 --- a/pandas/tests/io/parser/python_parser_only.py +++ b/pandas/tests/io/parser/python_parser_only.py @@ -148,8 +148,8 @@ def test_decompression_regex_sep(self): result = self.read_csv(path, sep='::', compression='bz2') tm.assert_frame_equal(result, expected) - self.assertRaises(ValueError, self.read_csv, - path, compression='bz3') + pytest.raises(ValueError, self.read_csv, + path, compression='bz3') def test_read_table_buglet_4x_multiindex(self): # see gh-6607 @@ -213,7 +213,7 @@ def test_multi_char_sep_quotes(self): # We expect no match, so there should be an assertion # error out of the inner context manager. - with tm.assertRaises(AssertionError): + with pytest.raises(AssertionError): with tm.assertRaisesRegexp(ParserError, msg): self.read_csv(StringIO(data), sep=',,', quoting=csv.QUOTE_NONE) @@ -231,6 +231,6 @@ def test_skipfooter_bad_row(self): # We expect no match, so there should be an assertion # error out of the inner context manager. - with tm.assertRaises(AssertionError): + with pytest.raises(AssertionError): with tm.assertRaisesRegexp(ParserError, msg): self.read_csv(StringIO(data)) diff --git a/pandas/tests/io/parser/test_network.py b/pandas/tests/io/parser/test_network.py index 3845ca2d81edd..4a8d2e997ee06 100644 --- a/pandas/tests/io/parser/test_network.py +++ b/pandas/tests/io/parser/test_network.py @@ -169,10 +169,10 @@ def test_parse_public_s3_bucket_nrows_python(self): @tm.network def test_s3_fails(self): - with tm.assertRaises(IOError): + with pytest.raises(IOError): read_csv('s3://nyqpug/asdf.csv') # Receive a permission error when trying to read a private bucket. # It's irrelevant here that this isn't actually a table. - with tm.assertRaises(IOError): + with pytest.raises(IOError): read_csv('s3://cant_get_it/') diff --git a/pandas/tests/io/parser/test_read_fwf.py b/pandas/tests/io/parser/test_read_fwf.py index dccae06afe4d1..9498a7d83e0de 100644 --- a/pandas/tests/io/parser/test_read_fwf.py +++ b/pandas/tests/io/parser/test_read_fwf.py @@ -243,7 +243,7 @@ def test_bool_header_arg(self): a b""" for arg in [True, False]: - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): read_fwf(StringIO(data), header=arg) def test_full_file(self): @@ -401,5 +401,5 @@ def test_skiprows_inference_empty(self): 78 901 2 """.strip() - with tm.assertRaises(EmptyDataError): + with pytest.raises(EmptyDataError): read_fwf(StringIO(test), skiprows=3) diff --git a/pandas/tests/io/parser/test_textreader.py b/pandas/tests/io/parser/test_textreader.py index b6b6508bab13c..bf1d8d4f3e27c 100644 --- a/pandas/tests/io/parser/test_textreader.py +++ b/pandas/tests/io/parser/test_textreader.py @@ -5,6 +5,8 @@ is integral to the C engine in parsers.py """ +import pytest + from pandas.compat import StringIO, BytesIO, map from pandas import compat @@ -152,7 +154,7 @@ def test_skip_bad_lines(self): reader = TextReader(StringIO(data), delimiter=':', header=None) - self.assertRaises(parser.ParserError, reader.read) + pytest.raises(parser.ParserError, reader.read) reader = TextReader(StringIO(data), delimiter=':', header=None, @@ -191,8 +193,8 @@ def test_header_not_enough_lines(self): assert_array_dicts_equal(expected, recs) # not enough rows - self.assertRaises(parser.ParserError, TextReader, StringIO(data), - delimiter=',', header=5, as_recarray=True) + pytest.raises(parser.ParserError, TextReader, StringIO(data), + delimiter=',', header=5, as_recarray=True) def test_header_not_enough_lines_as_recarray(self): data = ('skip this\n' @@ -212,8 +214,8 @@ def test_header_not_enough_lines_as_recarray(self): assert_array_dicts_equal(expected, recs) # not enough rows - self.assertRaises(parser.ParserError, TextReader, StringIO(data), - delimiter=',', header=5, as_recarray=True) + pytest.raises(parser.ParserError, TextReader, StringIO(data), + delimiter=',', header=5, as_recarray=True) def test_escapechar(self): data = ('\\"hello world\"\n' diff --git a/pandas/tests/io/parser/usecols.py b/pandas/tests/io/parser/usecols.py index 0cf642983e8d3..1ea7353427c30 100644 --- a/pandas/tests/io/parser/usecols.py +++ b/pandas/tests/io/parser/usecols.py @@ -82,8 +82,8 @@ def test_usecols(self): tm.assert_frame_equal(result, expected) # length conflict, passed names and usecols disagree - self.assertRaises(ValueError, self.read_csv, StringIO(data), - names=['a', 'b'], usecols=[1], header=None) + pytest.raises(ValueError, self.read_csv, StringIO(data), + names=['a', 'b'], usecols=[1], header=None) def test_usecols_index_col_False(self): # see gh-9082 diff --git a/pandas/tests/io/sas/test_sas.py b/pandas/tests/io/sas/test_sas.py index 237e3676c3b3d..461c0fe1fd848 100644 --- a/pandas/tests/io/sas/test_sas.py +++ b/pandas/tests/io/sas/test_sas.py @@ -1,3 +1,5 @@ +import pytest + import pandas.util.testing as tm from pandas.compat import StringIO from pandas import read_sas @@ -9,5 +11,5 @@ def test_sas_buffer_format(self): # GH14947 b = StringIO("") - with self.assertRaises(ValueError): + with pytest.raises(ValueError): read_sas(b) diff --git a/pandas/tests/io/test_clipboard.py b/pandas/tests/io/test_clipboard.py index 2e701143357e3..f373c13c3bc58 100644 --- a/pandas/tests/io/test_clipboard.py +++ b/pandas/tests/io/test_clipboard.py @@ -127,9 +127,9 @@ def test_read_clipboard_infer_excel(self): def test_invalid_encoding(self): # test case for testing invalid encoding data = self.data['string'] - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): data.to_clipboard(encoding='ascii') - with tm.assertRaises(NotImplementedError): + with pytest.raises(NotImplementedError): pd.read_clipboard(encoding='ascii') def test_round_trip_valid_encodings(self): diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py index c08d235b07c9e..fd29caefb8cb5 100644 --- a/pandas/tests/io/test_common.py +++ b/pandas/tests/io/test_common.py @@ -2,6 +2,7 @@ Tests for the pandas.io.common functionalities """ import mmap +import pytest import os from os.path import isabs @@ -138,4 +139,4 @@ def test_next(self): next_line = next(wrapper) self.assertEqual(next_line.strip(), line.strip()) - self.assertRaises(StopIteration, next, wrapper) + pytest.raises(StopIteration, next, wrapper) diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py index cbfffd785ca0d..02652be2153f1 100644 --- a/pandas/tests/io/test_excel.py +++ b/pandas/tests/io/test_excel.py @@ -287,7 +287,7 @@ def test_excel_table_sheet_by_index(self): tm.assert_frame_equal(df3, df4) import xlrd - with tm.assertRaises(xlrd.XLRDError): + with pytest.raises(xlrd.XLRDError): read_excel(excel, 'asdf') def test_excel_table(self): @@ -399,7 +399,7 @@ def test_reader_dtype(self): expected['c'] = ['001', '002', '003', '004'] tm.assert_frame_equal(actual, expected) - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): actual = self.get_exceldf(basename, dtype={'d': 'int64'}) def test_reading_all_sheets(self): @@ -915,13 +915,13 @@ def test_excel_oldindex_format(self): def test_read_excel_bool_header_arg(self): # GH 6114 for arg in [True, False]: - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): pd.read_excel(os.path.join(self.dirpath, 'test1' + self.ext), header=arg) def test_read_excel_chunksize(self): # GH 8011 - with tm.assertRaises(NotImplementedError): + with pytest.raises(NotImplementedError): pd.read_excel(os.path.join(self.dirpath, 'test1' + self.ext), chunksize=100) @@ -1040,7 +1040,7 @@ def test_excel_sheet_by_name_raise(self): df = read_excel(xl, 0) tm.assert_frame_equal(gt, df) - with tm.assertRaises(xlrd.XLRDError): + with pytest.raises(xlrd.XLRDError): read_excel(xl, '0') def test_excelwriter_contextmanager(self): @@ -1675,7 +1675,7 @@ def roundtrip(df, header=True, parser_hdr=0, index=True): # this if will be removed once multi column excel writing # is implemented for now fixing #9794 if j > 1: - with tm.assertRaises(NotImplementedError): + with pytest.raises(NotImplementedError): res = roundtrip(df, use_headers, index=False) else: res = roundtrip(df, use_headers) @@ -1725,7 +1725,7 @@ def roundtrip2(df, header=True, parser_hdr=0, index=True): j = 2 i = 1 df = mkdf(nrows, ncols, r_idx_nlevels=i, c_idx_nlevels=j) - with tm.assertRaises(NotImplementedError): + with pytest.raises(NotImplementedError): roundtrip2(df, header=False, index=False) def test_duplicated_columns(self): @@ -1784,7 +1784,7 @@ def test_invalid_columns(self): read_frame = read_excel(path, 'test1') tm.assert_frame_equal(expected, read_frame) - with tm.assertRaises(KeyError): + with pytest.raises(KeyError): write_frame.to_excel(path, 'test1', columns=['C', 'D']) def test_datetimes(self): @@ -2164,7 +2164,7 @@ def test_excel_raise_error_on_multiindex_columns_and_no_index(self): ('2014', 'height'), ('2014', 'weight')]) df = DataFrame(np.random.randn(10, 3), columns=cols) - with tm.assertRaises(NotImplementedError): + with pytest.raises(NotImplementedError): with ensure_clean(self.ext) as path: df.to_excel(path, index=False) diff --git a/pandas/tests/io/test_html.py b/pandas/tests/io/test_html.py index 0d061d516af28..866ed2cf2f359 100644 --- a/pandas/tests/io/test_html.py +++ b/pandas/tests/io/test_html.py @@ -278,13 +278,13 @@ def test_file_like(self): @network def test_bad_url_protocol(self): - with tm.assertRaises(URLError): + with pytest.raises(URLError): self.read_html('git://github.com', match='.*Water.*') @network def test_invalid_url(self): try: - with tm.assertRaises(URLError): + with pytest.raises(URLError): self.read_html('http://www.a23950sdfa908sd.com', match='.*Water.*') except ValueError as e: @@ -691,7 +691,7 @@ def test_decimal_rows(self): def test_bool_header_arg(self): # GH 6114 for arg in [True, False]: - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): read_html(self.spam_data, header=arg) def test_converters(self): @@ -842,10 +842,10 @@ def test_data_fail(self): spam_data = os.path.join(DATA_PATH, 'spam.html') banklist_data = os.path.join(DATA_PATH, 'banklist.html') - with tm.assertRaises(XMLSyntaxError): + with pytest.raises(XMLSyntaxError): self.read_html(spam_data) - with tm.assertRaises(XMLSyntaxError): + with pytest.raises(XMLSyntaxError): self.read_html(banklist_data) def test_works_on_valid_markup(self): @@ -884,7 +884,7 @@ def test_computer_sales_page(self): def test_invalid_flavor(): url = 'google.com' - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): read_html(url, 'google', flavor='not a* valid**++ flaver') diff --git a/pandas/tests/io/test_packers.py b/pandas/tests/io/test_packers.py index 329748eb5be33..ca6d0605c193b 100644 --- a/pandas/tests/io/test_packers.py +++ b/pandas/tests/io/test_packers.py @@ -148,9 +148,9 @@ class A(object): def __init__(self): self.read = 0 - tm.assertRaises(ValueError, read_msgpack, path_or_buf=None) - tm.assertRaises(ValueError, read_msgpack, path_or_buf={}) - tm.assertRaises(ValueError, read_msgpack, path_or_buf=A()) + pytest.raises(ValueError, read_msgpack, path_or_buf=None) + pytest.raises(ValueError, read_msgpack, path_or_buf={}) + pytest.raises(ValueError, read_msgpack, path_or_buf=A()) class TestNumpy(TestPackers): @@ -536,7 +536,7 @@ def _check_roundtrip(self, obj, comparator, **kwargs): # currently these are not implemetned # i_rec = self.encode_decode(obj) # comparator(obj, i_rec, **kwargs) - self.assertRaises(NotImplementedError, self.encode_decode, obj) + pytest.raises(NotImplementedError, self.encode_decode, obj) def test_sparse_series(self): diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index 1287686874a58..153ac749b4b17 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -308,20 +308,20 @@ def test_api(self): # invalid df = tm.makeDataFrame() - self.assertRaises(ValueError, df.to_hdf, path, - 'df', append=True, format='f') - self.assertRaises(ValueError, df.to_hdf, path, - 'df', append=True, format='fixed') + pytest.raises(ValueError, df.to_hdf, path, + 'df', append=True, format='f') + pytest.raises(ValueError, df.to_hdf, path, + 'df', append=True, format='fixed') - self.assertRaises(TypeError, df.to_hdf, path, - 'df', append=True, format='foo') - self.assertRaises(TypeError, df.to_hdf, path, - 'df', append=False, format='bar') + pytest.raises(TypeError, df.to_hdf, path, + 'df', append=True, format='foo') + pytest.raises(TypeError, df.to_hdf, path, + 'df', append=False, format='bar') # File path doesn't exist path = "" - self.assertRaises(compat.FileNotFoundError, - read_hdf, path, 'df') + pytest.raises(compat.FileNotFoundError, + read_hdf, path, 'df') def test_api_default_format(self): @@ -333,7 +333,7 @@ def test_api_default_format(self): _maybe_remove(store, 'df') store.put('df', df) self.assertFalse(store.get_storer('df').is_table) - self.assertRaises(ValueError, store.append, 'df2', df) + pytest.raises(ValueError, store.append, 'df2', df) pandas.set_option('io.hdf.default_format', 'table') _maybe_remove(store, 'df') @@ -353,7 +353,7 @@ def test_api_default_format(self): df.to_hdf(path, 'df') with HDFStore(path) as store: self.assertFalse(store.get_storer('df').is_table) - self.assertRaises(ValueError, df.to_hdf, path, 'df2', append=True) + pytest.raises(ValueError, df.to_hdf, path, 'df2', append=True) pandas.set_option('io.hdf.default_format', 'table') df.to_hdf(path, 'df3') @@ -473,7 +473,7 @@ def test_versioning(self): # this is an error because its table_type is appendable, but no # version info store.get_node('df2')._v_attrs.pandas_version = None - self.assertRaises(Exception, store.select, 'df2') + pytest.raises(Exception, store.select, 'df2') def test_mode(self): @@ -485,7 +485,7 @@ def check(mode): # constructor if mode in ['r', 'r+']: - self.assertRaises(IOError, HDFStore, path, mode=mode) + pytest.raises(IOError, HDFStore, path, mode=mode) else: store = HDFStore(path, mode=mode) @@ -499,7 +499,7 @@ def check(mode): def f(): with HDFStore(path, mode=mode) as store: # noqa pass - self.assertRaises(IOError, f) + pytest.raises(IOError, f) else: with HDFStore(path, mode=mode) as store: self.assertEqual(store._handle.mode, mode) @@ -508,16 +508,16 @@ def f(): # conv write if mode in ['r', 'r+']: - self.assertRaises(IOError, df.to_hdf, - path, 'df', mode=mode) + pytest.raises(IOError, df.to_hdf, + path, 'df', mode=mode) df.to_hdf(path, 'df', mode='w') else: df.to_hdf(path, 'df', mode=mode) # conv read if mode in ['w']: - self.assertRaises(ValueError, read_hdf, - path, 'df', mode=mode) + pytest.raises(ValueError, read_hdf, + path, 'df', mode=mode) else: result = read_hdf(path, 'df', mode=mode) assert_frame_equal(result, df) @@ -544,7 +544,7 @@ def test_reopen_handle(self): store['a'] = tm.makeTimeSeries() # invalid mode change - self.assertRaises(PossibleDataLossError, store.open, 'w') + pytest.raises(PossibleDataLossError, store.open, 'w') store.close() self.assertFalse(store.is_open) @@ -621,7 +621,7 @@ def test_get(self): right = store['/a'] tm.assert_series_equal(left, right) - self.assertRaises(KeyError, store.get, 'b') + pytest.raises(KeyError, store.get, 'b') def test_getattr(self): @@ -642,10 +642,10 @@ def test_getattr(self): tm.assert_frame_equal(result, df) # errors - self.assertRaises(AttributeError, getattr, store, 'd') + pytest.raises(AttributeError, getattr, store, 'd') for x in ['mode', 'path', 'handle', 'complib']: - self.assertRaises(AttributeError, getattr, store, x) + pytest.raises(AttributeError, getattr, store, x) # not stores for x in ['mode', 'path', 'handle', 'complib']: @@ -665,17 +665,17 @@ def test_put(self): store.put('c', df[:10], format='table') # not OK, not a table - self.assertRaises( + pytest.raises( ValueError, store.put, 'b', df[10:], append=True) # node does not currently exist, test _is_table_type returns False # in this case # _maybe_remove(store, 'f') - # self.assertRaises(ValueError, store.put, 'f', df[10:], + # pytest.raises(ValueError, store.put, 'f', df[10:], # append=True) # can't put to a table (use append instead) - self.assertRaises(ValueError, store.put, 'c', df[10:], append=True) + pytest.raises(ValueError, store.put, 'c', df[10:], append=True) # overwrite table store.put('c', df[:10], format='table', append=False) @@ -717,8 +717,8 @@ def test_put_compression(self): tm.assert_frame_equal(store['c'], df) # can't compress if format='fixed' - self.assertRaises(ValueError, store.put, 'b', df, - format='fixed', complib='zlib') + pytest.raises(ValueError, store.put, 'b', df, + format='fixed', complib='zlib') def test_put_compression_blosc(self): tm.skip_if_no_package('tables', min_version='2.2', @@ -731,8 +731,8 @@ def test_put_compression_blosc(self): with ensure_clean_store(self.path) as store: # can't compress if format='fixed' - self.assertRaises(ValueError, store.put, 'b', df, - format='fixed', complib='blosc') + pytest.raises(ValueError, store.put, 'b', df, + format='fixed', complib='blosc') store.put('c', df, format='table', complib='blosc') tm.assert_frame_equal(store['c'], df) @@ -946,7 +946,7 @@ def check(format, index): else: # only support for fixed types (and they have a perf warning) - self.assertRaises(TypeError, check, 'table', index) + pytest.raises(TypeError, check, 'table', index) # PerformanceWarning with catch_warnings(record=True): @@ -1216,11 +1216,11 @@ def test_append_with_different_block_ordering(self): # store additonal fields in different blocks df['int16_2'] = Series([1] * len(df), dtype='int16') - self.assertRaises(ValueError, store.append, 'df', df) + pytest.raises(ValueError, store.append, 'df', df) # store multile additonal fields in different blocks df['float_3'] = Series([1.] * len(df), dtype='float64') - self.assertRaises(ValueError, store.append, 'df', df) + pytest.raises(ValueError, store.append, 'df', df) def test_ndim_indexables(self): # test using ndim tables in new ways @@ -1254,7 +1254,7 @@ def check_indexers(key, indexers): # pass incorrect number of axes _maybe_remove(store, 'p4d') - self.assertRaises(ValueError, store.append, 'p4d', p4d.iloc[ + pytest.raises(ValueError, store.append, 'p4d', p4d.iloc[ :, :, :10, :], axes=['major_axis', 'minor_axis']) # different than default indexables #1 @@ -1323,11 +1323,11 @@ def check_col(key, name, size): # apply the wrong field (similar to #1) store.append('s3', wp, min_itemsize={'major_axis': 20}) - self.assertRaises(ValueError, store.append, 's3', wp2) + pytest.raises(ValueError, store.append, 's3', wp2) # test truncation of bigger strings store.append('s4', wp) - self.assertRaises(ValueError, store.append, 's4', wp2) + pytest.raises(ValueError, store.append, 's4', wp2) # avoid truncation on elements df = DataFrame([[123, 'asdqwerty'], [345, 'dggnhebbsdfbdfb']]) @@ -1352,7 +1352,7 @@ def check_col(key, name, size): store.append('df_new', df) df_new = DataFrame( [[124, 'abcdefqhij'], [346, 'abcdefghijklmnopqrtsuvwxyz']]) - self.assertRaises(ValueError, store.append, 'df_new', df_new) + pytest.raises(ValueError, store.append, 'df_new', df_new) # min_itemsize on Series index (GH 11412) df = tm.makeMixedDataFrame().set_index('C') @@ -1431,8 +1431,8 @@ def check_col(key, name, size): df = DataFrame(['foo', 'foo', 'foo', 'barh', 'barh', 'barh'], columns=['A']) _maybe_remove(store, 'df') - self.assertRaises(ValueError, store.append, 'df', - df, min_itemsize={'foo': 20, 'foobar': 20}) + pytest.raises(ValueError, store.append, 'df', + df, min_itemsize={'foo': 20, 'foobar': 20}) def test_to_hdf_with_min_itemsize(self): @@ -1690,7 +1690,7 @@ def col(t, column): # try to index a non-table _maybe_remove(store, 'f2') store.put('f2', df) - self.assertRaises(TypeError, store.create_table_index, 'f2') + pytest.raises(TypeError, store.create_table_index, 'f2') def test_append_diff_item_order(self): @@ -1702,8 +1702,8 @@ def test_append_diff_item_order(self): with ensure_clean_store(self.path) as store: store.put('panel', wp1, format='table') - self.assertRaises(ValueError, store.put, 'panel', wp2, - append=True) + pytest.raises(ValueError, store.put, 'panel', wp2, + append=True) def test_append_hierarchical(self): index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], @@ -1754,10 +1754,10 @@ def test_column_multiindex(self): check_index_type=True, check_column_type=True) - self.assertRaises(ValueError, store.put, 'df2', df, - format='table', data_columns=['A']) - self.assertRaises(ValueError, store.put, 'df3', df, - format='table', data_columns=True) + pytest.raises(ValueError, store.put, 'df2', df, + format='table', data_columns=['A']) + pytest.raises(ValueError, store.put, 'df3', df, + format='table', data_columns=True) # appending multi-column on existing table (see GH 6167) with ensure_clean_store(self.path) as store: @@ -1820,13 +1820,13 @@ def make_index(names=None): _maybe_remove(store, 'df') df = DataFrame(np.zeros((12, 2)), columns=[ 'a', 'b'], index=make_index(['date', 'a', 't'])) - self.assertRaises(ValueError, store.append, 'df', df) + pytest.raises(ValueError, store.append, 'df', df) # dup within level _maybe_remove(store, 'df') df = DataFrame(np.zeros((12, 2)), columns=['a', 'b'], index=make_index(['date', 'date', 'date'])) - self.assertRaises(ValueError, store.append, 'df', df) + pytest.raises(ValueError, store.append, 'df', df) # fully names _maybe_remove(store, 'df') @@ -1885,9 +1885,9 @@ def test_pass_spec_to_storer(self): with ensure_clean_store(self.path) as store: store.put('df', df) - self.assertRaises(TypeError, store.select, 'df', columns=['A']) - self.assertRaises(TypeError, store.select, - 'df', where=[('columns=A')]) + pytest.raises(TypeError, store.select, 'df', columns=['A']) + pytest.raises(TypeError, store.select, + 'df', where=[('columns=A')]) def test_append_misc(self): @@ -1897,13 +1897,13 @@ def test_append_misc(self): # unsuported data types for non-tables p4d = tm.makePanel4D() - self.assertRaises(TypeError, store.put, 'p4d', p4d) + pytest.raises(TypeError, store.put, 'p4d', p4d) # unsuported data types - self.assertRaises(TypeError, store.put, 'abc', None) - self.assertRaises(TypeError, store.put, 'abc', '123') - self.assertRaises(TypeError, store.put, 'abc', 123) - self.assertRaises(TypeError, store.put, 'abc', np.arange(5)) + pytest.raises(TypeError, store.put, 'abc', None) + pytest.raises(TypeError, store.put, 'abc', '123') + pytest.raises(TypeError, store.put, 'abc', 123) + pytest.raises(TypeError, store.put, 'abc', np.arange(5)) df = tm.makeDataFrame() store.append('df', df, chunksize=1) @@ -1945,7 +1945,7 @@ def check(obj, comparator): # 0 len df_empty = DataFrame(columns=list('ABC')) store.append('df', df_empty) - self.assertRaises(KeyError, store.select, 'df') + pytest.raises(KeyError, store.select, 'df') # repeated append of 0/non-zero frames df = DataFrame(np.random.rand(10, 3), columns=list('ABC')) @@ -1964,7 +1964,7 @@ def check(obj, comparator): # 0 len p_empty = Panel(items=list('ABC')) store.append('p', p_empty) - self.assertRaises(KeyError, store.select, 'p') + pytest.raises(KeyError, store.select, 'p') # repeated append of 0/non-zero frames p = Panel(np.random.randn(3, 4, 5), items=list('ABC')) @@ -1987,12 +1987,12 @@ def test_append_raise(self): df = tm.makeDataFrame() df['invalid'] = [['a']] * len(df) self.assertEqual(df.dtypes['invalid'], np.object_) - self.assertRaises(TypeError, store.append, 'df', df) + pytest.raises(TypeError, store.append, 'df', df) # multiple invalid columns df['invalid2'] = [['a']] * len(df) df['invalid3'] = [['a']] * len(df) - self.assertRaises(TypeError, store.append, 'df', df) + pytest.raises(TypeError, store.append, 'df', df) # datetime with embedded nans as object df = tm.makeDataFrame() @@ -2001,21 +2001,21 @@ def test_append_raise(self): s[0:5] = np.nan df['invalid'] = s self.assertEqual(df.dtypes['invalid'], np.object_) - self.assertRaises(TypeError, store.append, 'df', df) + pytest.raises(TypeError, store.append, 'df', df) # directy ndarray - self.assertRaises(TypeError, store.append, 'df', np.arange(10)) + pytest.raises(TypeError, store.append, 'df', np.arange(10)) # series directly - self.assertRaises(TypeError, store.append, - 'df', Series(np.arange(10))) + pytest.raises(TypeError, store.append, + 'df', Series(np.arange(10))) # appending an incompatible table df = tm.makeDataFrame() store.append('df', df) df['foo'] = 'foo' - self.assertRaises(ValueError, store.append, 'df', df) + pytest.raises(ValueError, store.append, 'df', df) def test_table_index_incompatible_dtypes(self): df1 = DataFrame({'a': [1, 2, 3]}) @@ -2024,8 +2024,8 @@ def test_table_index_incompatible_dtypes(self): with ensure_clean_store(self.path) as store: store.put('frame', df1, format='table') - self.assertRaises(TypeError, store.put, 'frame', df2, - format='table', append=True) + pytest.raises(TypeError, store.put, 'frame', df2, + format='table', append=True) def test_table_values_dtypes_roundtrip(self): @@ -2039,7 +2039,7 @@ def test_table_values_dtypes_roundtrip(self): assert_series_equal(df2.dtypes, store['df_i8'].dtypes) # incompatible dtype - self.assertRaises(ValueError, store.append, 'df_i8', df1) + pytest.raises(ValueError, store.append, 'df_i8', df1) # check creation/storage/retrieval of float32 (a bit hacky to # actually create them thought) @@ -2138,7 +2138,7 @@ def test_unimplemented_dtypes_table_columns(self): for n, f in l: df = tm.makeDataFrame() df[n] = f - self.assertRaises( + pytest.raises( TypeError, store.append, 'df1_%s' % n, df) # frame @@ -2150,7 +2150,7 @@ def test_unimplemented_dtypes_table_columns(self): with ensure_clean_store(self.path) as store: # this fails because we have a date in the object block...... - self.assertRaises(TypeError, store.append, 'df_unimplemented', df) + pytest.raises(TypeError, store.append, 'df_unimplemented', df) def test_calendar_roundtrip_issue(self): @@ -2235,7 +2235,7 @@ def test_remove(self): self.assertEqual(len(store), 0) # nonexistence - self.assertRaises(KeyError, store.remove, 'a_nonexistent_store') + pytest.raises(KeyError, store.remove, 'a_nonexistent_store') # pathing store['a'] = ts @@ -2264,7 +2264,7 @@ def test_remove_where(self): # non-existance crit1 = 'index>foo' - self.assertRaises(KeyError, store.remove, 'a', [crit1]) + pytest.raises(KeyError, store.remove, 'a', [crit1]) # try to remove non-table (with crit) # non-table ok (where = None) @@ -2286,8 +2286,8 @@ def test_remove_where(self): # non - empty where _maybe_remove(store, 'wp') store.put('wp', wp, format='table') - self.assertRaises(ValueError, store.remove, - 'wp', ['foo']) + pytest.raises(ValueError, store.remove, + 'wp', ['foo']) def test_remove_startstop(self): # GH #4835 and #6177 @@ -2460,19 +2460,19 @@ def test_invalid_terms(self): store.put('p4d', p4d, format='table') # some invalid terms - self.assertRaises(ValueError, store.select, - 'wp', "minor=['A', 'B']") - self.assertRaises(ValueError, store.select, - 'wp', ["index=['20121114']"]) - self.assertRaises(ValueError, store.select, 'wp', [ + pytest.raises(ValueError, store.select, + 'wp', "minor=['A', 'B']") + pytest.raises(ValueError, store.select, + 'wp', ["index=['20121114']"]) + pytest.raises(ValueError, store.select, 'wp', [ "index=['20121114', '20121114']"]) - self.assertRaises(TypeError, Term) + pytest.raises(TypeError, Term) # more invalid - self.assertRaises( + pytest.raises( ValueError, store.select, 'df', 'df.index[3]') - self.assertRaises(SyntaxError, store.select, 'df', 'index>') - self.assertRaises( + pytest.raises(SyntaxError, store.select, 'df', 'index>') + pytest.raises( ValueError, store.select, 'wp', "major_axis<'20000108' & minor_axis['A', 'B']") @@ -2493,8 +2493,8 @@ def test_invalid_terms(self): 'ABCD'), index=date_range('20130101', periods=10)) dfq.to_hdf(path, 'dfq', format='table') - self.assertRaises(ValueError, read_hdf, path, - 'dfq', where="A>0 or C>0") + pytest.raises(ValueError, read_hdf, path, + 'dfq', where="A>0 or C>0") def test_terms(self): @@ -3097,7 +3097,7 @@ def test_select(self): assert_panel_equal(expected, result) # selectin non-table with a where - # self.assertRaises(ValueError, store.select, + # pytest.raises(ValueError, store.select, # 'wp2', ('column', ['A', 'D'])) # select with columns= @@ -3318,10 +3318,10 @@ def test_select_iterator(self): df = tm.makeTimeDataFrame(500) df.to_hdf(path, 'df_non_table') - self.assertRaises(TypeError, read_hdf, path, - 'df_non_table', chunksize=100) - self.assertRaises(TypeError, read_hdf, path, - 'df_non_table', iterator=True) + pytest.raises(TypeError, read_hdf, path, + 'df_non_table', chunksize=100) + pytest.raises(TypeError, read_hdf, path, + 'df_non_table', iterator=True) with ensure_clean_path(self.path) as path: @@ -3671,12 +3671,12 @@ def test_frame_select(self): # invalid terms df = tm.makeTimeDataFrame() store.append('df_time', df) - self.assertRaises( + pytest.raises( ValueError, store.select, 'df_time', "index>0") # can't select if not written as table # store['frame'] = df - # self.assertRaises(ValueError, store.select, + # pytest.raises(ValueError, store.select, # 'frame', [crit1, crit2]) def test_frame_select_complex(self): @@ -3715,8 +3715,8 @@ def test_frame_select_complex(self): tm.assert_frame_equal(result, expected) # invert not implemented in numexpr :( - self.assertRaises(NotImplementedError, - store.select, 'df', '~(string="bar")') + pytest.raises(NotImplementedError, + store.select, 'df', '~(string="bar")') # invert ok for filters result = store.select('df', "~(columns=['A','B'])") @@ -3804,12 +3804,12 @@ def test_invalid_filtering(self): store.put('df', df, format='table') # not implemented - self.assertRaises(NotImplementedError, store.select, - 'df', "columns=['A'] | columns=['B']") + pytest.raises(NotImplementedError, store.select, + 'df', "columns=['A'] | columns=['B']") # in theory we could deal with this - self.assertRaises(NotImplementedError, store.select, - 'df', "columns=['A','B'] & columns=['C']") + pytest.raises(NotImplementedError, store.select, + 'df', "columns=['A','B'] & columns=['C']") def test_string_select(self): # GH 2973 @@ -3867,11 +3867,11 @@ def test_read_column(self): store.append('df', df) # error - self.assertRaises(KeyError, store.select_column, 'df', 'foo') + pytest.raises(KeyError, store.select_column, 'df', 'foo') def f(): store.select_column('df', 'index', where=['index>5']) - self.assertRaises(Exception, f) + pytest.raises(Exception, f) # valid result = store.select_column('df', 'index') @@ -3879,7 +3879,7 @@ def f(): assert isinstance(result, Series) # not a data indexable column - self.assertRaises( + pytest.raises( ValueError, store.select_column, 'df', 'values_block_0') # a data column @@ -3989,14 +3989,14 @@ def test_coordinates(self): tm.assert_frame_equal(result, expected) # invalid - self.assertRaises(ValueError, store.select, 'df', - where=np.arange(len(df), dtype='float64')) - self.assertRaises(ValueError, store.select, 'df', - where=np.arange(len(df) + 1)) - self.assertRaises(ValueError, store.select, 'df', - where=np.arange(len(df)), start=5) - self.assertRaises(ValueError, store.select, 'df', - where=np.arange(len(df)), start=5, stop=10) + pytest.raises(ValueError, store.select, 'df', + where=np.arange(len(df), dtype='float64')) + pytest.raises(ValueError, store.select, 'df', + where=np.arange(len(df) + 1)) + pytest.raises(ValueError, store.select, 'df', + where=np.arange(len(df)), start=5) + pytest.raises(ValueError, store.select, 'df', + where=np.arange(len(df)), start=5, stop=10) # selection with filter selection = date_range('20000101', periods=500) @@ -4032,12 +4032,12 @@ def test_append_to_multiple(self): with ensure_clean_store(self.path) as store: # exceptions - self.assertRaises(ValueError, store.append_to_multiple, - {'df1': ['A', 'B'], 'df2': None}, df, - selector='df3') - self.assertRaises(ValueError, store.append_to_multiple, - {'df1': None, 'df2': None}, df, selector='df3') - self.assertRaises( + pytest.raises(ValueError, store.append_to_multiple, + {'df1': ['A', 'B'], 'df2': None}, df, + selector='df3') + pytest.raises(ValueError, store.append_to_multiple, + {'df1': None, 'df2': None}, df, selector='df3') + pytest.raises( ValueError, store.append_to_multiple, 'df1', df, 'df1') # regular operation @@ -4097,25 +4097,25 @@ def test_select_as_multiple(self): with ensure_clean_store(self.path) as store: # no tables stored - self.assertRaises(Exception, store.select_as_multiple, - None, where=['A>0', 'B>0'], selector='df1') + pytest.raises(Exception, store.select_as_multiple, + None, where=['A>0', 'B>0'], selector='df1') store.append('df1', df1, data_columns=['A', 'B']) store.append('df2', df2) # exceptions - self.assertRaises(Exception, store.select_as_multiple, - None, where=['A>0', 'B>0'], selector='df1') - self.assertRaises(Exception, store.select_as_multiple, - [None], where=['A>0', 'B>0'], selector='df1') - self.assertRaises(KeyError, store.select_as_multiple, - ['df1', 'df3'], where=['A>0', 'B>0'], - selector='df1') - self.assertRaises(KeyError, store.select_as_multiple, - ['df3'], where=['A>0', 'B>0'], selector='df1') - self.assertRaises(KeyError, store.select_as_multiple, - ['df1', 'df2'], where=['A>0', 'B>0'], - selector='df4') + pytest.raises(Exception, store.select_as_multiple, + None, where=['A>0', 'B>0'], selector='df1') + pytest.raises(Exception, store.select_as_multiple, + [None], where=['A>0', 'B>0'], selector='df1') + pytest.raises(KeyError, store.select_as_multiple, + ['df1', 'df3'], where=['A>0', 'B>0'], + selector='df1') + pytest.raises(KeyError, store.select_as_multiple, + ['df3'], where=['A>0', 'B>0'], selector='df1') + pytest.raises(KeyError, store.select_as_multiple, + ['df1', 'df2'], where=['A>0', 'B>0'], + selector='df4') # default select result = store.select('df1', ['A>0', 'B>0']) @@ -4142,9 +4142,9 @@ def test_select_as_multiple(self): # test excpection for diff rows store.append('df3', tm.makeTimeDataFrame(nper=50)) - self.assertRaises(ValueError, store.select_as_multiple, - ['df1', 'df3'], where=['A>0', 'B>0'], - selector='df1') + pytest.raises(ValueError, store.select_as_multiple, + ['df1', 'df3'], where=['A>0', 'B>0'], + selector='df1') def test_nan_selection_bug_4858(self): @@ -4231,7 +4231,7 @@ def test_start_stop_fixed(self): df.iloc[8:10, -2] = np.nan dfs = df.to_sparse() store.put('dfs', dfs) - with self.assertRaises(NotImplementedError): + with pytest.raises(NotImplementedError): store.select('dfs', start=0, stop=5) def test_select_filter_corner(self): @@ -4312,7 +4312,7 @@ def test_multiple_open_close(self): def f(): HDFStore(path) - self.assertRaises(ValueError, f) + pytest.raises(ValueError, f) store1.close() else: @@ -4374,17 +4374,17 @@ def f(): store = HDFStore(path) store.close() - self.assertRaises(ClosedFileError, store.keys) - self.assertRaises(ClosedFileError, lambda: 'df' in store) - self.assertRaises(ClosedFileError, lambda: len(store)) - self.assertRaises(ClosedFileError, lambda: store['df']) - self.assertRaises(ClosedFileError, lambda: store.df) - self.assertRaises(ClosedFileError, store.select, 'df') - self.assertRaises(ClosedFileError, store.get, 'df') - self.assertRaises(ClosedFileError, store.append, 'df2', df) - self.assertRaises(ClosedFileError, store.put, 'df3', df) - self.assertRaises(ClosedFileError, store.get_storer, 'df2') - self.assertRaises(ClosedFileError, store.remove, 'df2') + pytest.raises(ClosedFileError, store.keys) + pytest.raises(ClosedFileError, lambda: 'df' in store) + pytest.raises(ClosedFileError, lambda: len(store)) + pytest.raises(ClosedFileError, lambda: store['df']) + pytest.raises(ClosedFileError, lambda: store.df) + pytest.raises(ClosedFileError, store.select, 'df') + pytest.raises(ClosedFileError, store.get, 'df') + pytest.raises(ClosedFileError, store.append, 'df2', df) + pytest.raises(ClosedFileError, store.put, 'df3', df) + pytest.raises(ClosedFileError, store.get_storer, 'df2') + pytest.raises(ClosedFileError, store.remove, 'df2') def f(): store.select('df') @@ -4425,7 +4425,7 @@ def test_legacy_table_read(self): store.select('df2', typ='legacy_frame') # old version warning - self.assertRaises( + pytest.raises( Exception, store.select, 'wp1', 'minor_axis=B') df2 = store.select('df2') @@ -4636,7 +4636,7 @@ def test_store_datetime_mixed(self): # index=[np.arange(5).repeat(2), # np.tile(np.arange(2), 5)]) - # self.assertRaises(Exception, store.put, 'foo', df, format='table') + # pytest.raises(Exception, store.put, 'foo', df, format='table') def test_append_with_diff_col_name_types_raises_value_error(self): df = DataFrame(np.random.randn(10, 1)) @@ -4650,7 +4650,7 @@ def test_append_with_diff_col_name_types_raises_value_error(self): store.append(name, df) for d in (df2, df3, df4, df5): - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): store.append(name, d) def test_query_with_nested_special_character(self): @@ -4795,8 +4795,8 @@ def test_duplicate_column_name(self): df = DataFrame(columns=["a", "a"], data=[[0, 0]]) with ensure_clean_path(self.path) as path: - self.assertRaises(ValueError, df.to_hdf, - path, 'df', format='fixed') + pytest.raises(ValueError, df.to_hdf, + path, 'df', format='fixed') df.to_hdf(path, 'df', format='table') other = read_hdf(path, 'df') @@ -4871,7 +4871,7 @@ def test_to_hdf_with_object_column_names(self): df = DataFrame(np.random.randn(10, 2), columns=index(2)) with ensure_clean_path(self.path) as path: with catch_warnings(record=True): - with self.assertRaises( + with pytest.raises( ValueError, msg=("cannot have non-object label " "DataIndexableCol")): df.to_hdf(path, 'df', format='table', @@ -4926,21 +4926,21 @@ def test_read_hdf_errors(self): columns=list('ABCDE')) with ensure_clean_path(self.path) as path: - self.assertRaises(IOError, read_hdf, path, 'key') + pytest.raises(IOError, read_hdf, path, 'key') df.to_hdf(path, 'df') store = HDFStore(path, mode='r') store.close() - self.assertRaises(IOError, read_hdf, store, 'df') + pytest.raises(IOError, read_hdf, store, 'df') with open(path, mode='r') as store: - self.assertRaises(NotImplementedError, read_hdf, store, 'df') + pytest.raises(NotImplementedError, read_hdf, store, 'df') def test_invalid_complib(self): df = DataFrame(np.random.rand(4, 5), index=list('abcd'), columns=list('ABCDE')) with ensure_clean_path(self.path) as path: - self.assertRaises(ValueError, df.to_hdf, path, - 'df', complib='blosc:zlib') + pytest.raises(ValueError, df.to_hdf, path, + 'df', complib='blosc:zlib') # GH10443 def test_read_nokey(self): @@ -4955,7 +4955,7 @@ def test_read_nokey(self): reread = read_hdf(path) assert_frame_equal(df, reread) df.to_hdf(path, 'df2', mode='a') - self.assertRaises(ValueError, read_hdf, path) + pytest.raises(ValueError, read_hdf, path) def test_read_nokey_table(self): # GH13231 @@ -4967,13 +4967,13 @@ def test_read_nokey_table(self): reread = read_hdf(path) assert_frame_equal(df, reread) df.to_hdf(path, 'df2', mode='a', format='table') - self.assertRaises(ValueError, read_hdf, path) + pytest.raises(ValueError, read_hdf, path) def test_read_nokey_empty(self): with ensure_clean_path(self.path) as path: store = HDFStore(path) store.close() - self.assertRaises(ValueError, read_hdf, path) + pytest.raises(ValueError, read_hdf, path) def test_read_from_pathlib_path(self): @@ -5055,14 +5055,14 @@ def test_query_compare_column_type(self): for v in [2.1, True, pd.Timestamp('2014-01-01'), pd.Timedelta(1, 's')]: query = 'date {op} v'.format(op=op) - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): result = store.select('test', where=query) # strings to other columns must be convertible to type v = 'a' for col in ['int', 'float', 'real_date']: query = '{col} {op} v'.format(op=op, col=col) - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): result = store.select('test', where=query) for v, col in zip(['1', '1.1', '2014-01-01'], @@ -5200,15 +5200,15 @@ def test_complex_indexing_error(self): 'C': complex128}, index=list('abcd')) with ensure_clean_store(self.path) as store: - self.assertRaises(TypeError, store.append, - 'df', df, data_columns=['C']) + pytest.raises(TypeError, store.append, + 'df', df, data_columns=['C']) def test_complex_series_error(self): complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j]) s = Series(complex128, index=list('abcd')) with ensure_clean_path(self.path) as path: - self.assertRaises(TypeError, s.to_hdf, path, 'obj', format='t') + pytest.raises(TypeError, s.to_hdf, path, 'obj', format='t') with ensure_clean_path(self.path) as path: s.to_hdf(path, 'obj', format='t', index=False) @@ -5283,7 +5283,7 @@ def test_append_with_timezones_dateutil(self): tz=gettz('US/Eastern')), B=Timestamp('20130102', tz=gettz('EET'))), index=range(5)) - self.assertRaises(ValueError, store.append, 'df_tz', df) + pytest.raises(ValueError, store.append, 'df_tz', df) # this is ok _maybe_remove(store, 'df_tz') @@ -5297,7 +5297,7 @@ def test_append_with_timezones_dateutil(self): tz=gettz('US/Eastern')), B=Timestamp('20130102', tz=gettz('CET'))), index=range(5)) - self.assertRaises(ValueError, store.append, 'df_tz', df) + pytest.raises(ValueError, store.append, 'df_tz', df) # as index with ensure_clean_store(self.path) as store: @@ -5350,7 +5350,7 @@ def test_append_with_timezones_pytz(self): df = DataFrame(dict(A=Timestamp('20130102', tz='US/Eastern'), B=Timestamp('20130102', tz='EET')), index=range(5)) - self.assertRaises(ValueError, store.append, 'df_tz', df) + pytest.raises(ValueError, store.append, 'df_tz', df) # this is ok _maybe_remove(store, 'df_tz') @@ -5363,7 +5363,7 @@ def test_append_with_timezones_pytz(self): df = DataFrame(dict(A=Timestamp('20130102', tz='US/Eastern'), B=Timestamp('20130102', tz='CET')), index=range(5)) - self.assertRaises(ValueError, store.append, 'df_tz', df) + pytest.raises(ValueError, store.append, 'df_tz', df) # as index with ensure_clean_store(self.path) as store: diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index b4c7f2ba8719e..63fcfb0d2f5ac 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -390,8 +390,8 @@ def _to_sql_fail(self): self.assertTrue(self.pandasSQL.has_table( 'test_frame1'), 'Table not written to DB') - self.assertRaises(ValueError, self.pandasSQL.to_sql, - self.test_frame1, 'test_frame1', if_exists='fail') + pytest.raises(ValueError, self.pandasSQL.to_sql, + self.test_frame1, 'test_frame1', if_exists='fail') self.drop_table('test_frame1') @@ -539,8 +539,8 @@ def test_to_sql_fail(self): sql.has_table('test_frame2', self.conn), 'Table not written to DB') - self.assertRaises(ValueError, sql.to_sql, self.test_frame1, - 'test_frame2', self.conn, if_exists='fail') + pytest.raises(ValueError, sql.to_sql, self.test_frame1, + 'test_frame2', self.conn, if_exists='fail') def test_to_sql_replace(self): sql.to_sql(self.test_frame1, 'test_frame3', @@ -590,8 +590,8 @@ def test_to_sql_series(self): def test_to_sql_panel(self): with catch_warnings(record=True): panel = tm.makePanel() - self.assertRaises(NotImplementedError, sql.to_sql, panel, - 'test_panel', self.conn) + pytest.raises(NotImplementedError, sql.to_sql, panel, + 'test_panel', self.conn) def test_roundtrip(self): sql.to_sql(self.test_frame1, 'test_frame_roundtrip', @@ -679,7 +679,7 @@ def test_timedelta(self): def test_complex(self): df = DataFrame({'a': [1 + 1j, 2j]}) # Complex data type should raise error - self.assertRaises(ValueError, df.to_sql, 'test_complex', self.conn) + pytest.raises(ValueError, df.to_sql, 'test_complex', self.conn) def test_to_sql_index_label(self): temp_frame = DataFrame({'col1': range(4)}) @@ -760,9 +760,9 @@ def test_to_sql_index_label_multiindex(self): "Specified index_labels not written to database") # wrong length of index_label - self.assertRaises(ValueError, sql.to_sql, temp_frame, - 'test_index_label', self.conn, if_exists='replace', - index_label='C') + pytest.raises(ValueError, sql.to_sql, temp_frame, + 'test_index_label', self.conn, if_exists='replace', + index_label='C') def test_multiindex_roundtrip(self): df = DataFrame.from_records([(1, 2.1, 'line1'), (2, 1.5, 'line2')], @@ -1080,8 +1080,8 @@ def test_sql_open_close(self): def test_con_string_import_error(self): if not SQLALCHEMY_INSTALLED: conn = 'mysql://root@localhost/pandas_nosetest' - self.assertRaises(ImportError, sql.read_sql, "SELECT * FROM iris", - conn) + pytest.raises(ImportError, sql.read_sql, "SELECT * FROM iris", + conn) else: pytest.skip('SQLAlchemy is installed') @@ -1090,7 +1090,7 @@ def test_read_sql_delegate(self): iris_frame2 = sql.read_sql("SELECT * FROM iris", self.conn) tm.assert_frame_equal(iris_frame1, iris_frame2) - self.assertRaises(sql.DatabaseError, sql.read_sql, 'iris', self.conn) + pytest.raises(sql.DatabaseError, sql.read_sql, 'iris', self.conn) def test_safe_names_warning(self): # GH 6798 @@ -1250,7 +1250,7 @@ def test_read_table_columns(self): iris_frame.columns.values, ['SepalLength', 'SepalLength']) def test_read_table_absent(self): - self.assertRaises( + pytest.raises( ValueError, sql.read_sql_table, "this_doesnt_exist", con=self.conn) def test_default_type_conversion(self): @@ -1561,8 +1561,8 @@ def test_dtype(self): meta.reflect() sqltype = meta.tables['dtype_test2'].columns['B'].type self.assertTrue(isinstance(sqltype, sqlalchemy.TEXT)) - self.assertRaises(ValueError, df.to_sql, - 'error', self.conn, dtype={'B': str}) + pytest.raises(ValueError, df.to_sql, + 'error', self.conn, dtype={'B': str}) # GH9083 df.to_sql('dtype_test3', self.conn, dtype={'B': sqlalchemy.String(10)}) @@ -1857,8 +1857,8 @@ def test_schema_support(self): res4 = sql.read_sql_table('test_schema_other', self.conn, schema='other') tm.assert_frame_equal(df, res4) - self.assertRaises(ValueError, sql.read_sql_table, 'test_schema_other', - self.conn, schema='public') + pytest.raises(ValueError, sql.read_sql_table, 'test_schema_other', + self.conn, schema='public') # different if_exists options @@ -2060,8 +2060,8 @@ def test_dtype(self): self.assertEqual(self._get_sqlite_column_type( 'dtype_test2', 'B'), 'STRING') - self.assertRaises(ValueError, df.to_sql, - 'error', self.conn, dtype={'B': bool}) + pytest.raises(ValueError, df.to_sql, + 'error', self.conn, dtype={'B': bool}) # single dtype df.to_sql('single_dtype_test', self.conn, dtype='STRING') @@ -2095,7 +2095,7 @@ def test_illegal_names(self): df = DataFrame([[1, 2], [3, 4]], columns=['a', 'b']) # Raise error on blank - self.assertRaises(ValueError, df.to_sql, "", self.conn) + pytest.raises(ValueError, df.to_sql, "", self.conn) for ndx, weird_name in enumerate( ['test_weird_name]', 'test_weird_name[', @@ -2324,23 +2324,23 @@ def clean_up(test_table_to_drop): self.drop_table(test_table_to_drop) # test if invalid value for if_exists raises appropriate error - self.assertRaises(ValueError, - sql.to_sql, - frame=df_if_exists_1, - con=self.conn, - name=table_name, - if_exists='notvalidvalue') + pytest.raises(ValueError, + sql.to_sql, + frame=df_if_exists_1, + con=self.conn, + name=table_name, + if_exists='notvalidvalue') clean_up(table_name) # test if_exists='fail' sql.to_sql(frame=df_if_exists_1, con=self.conn, name=table_name, if_exists='fail') - self.assertRaises(ValueError, - sql.to_sql, - frame=df_if_exists_1, - con=self.conn, - name=table_name, - if_exists='fail') + pytest.raises(ValueError, + sql.to_sql, + frame=df_if_exists_1, + con=self.conn, + name=table_name, + if_exists='fail') # test if_exists='replace' sql.to_sql(frame=df_if_exists_1, con=self.conn, name=table_name, @@ -2642,23 +2642,23 @@ def clean_up(test_table_to_drop): self.drop_table(test_table_to_drop) # test if invalid value for if_exists raises appropriate error - self.assertRaises(ValueError, - sql.to_sql, - frame=df_if_exists_1, - con=self.conn, - name=table_name, - if_exists='notvalidvalue') + pytest.raises(ValueError, + sql.to_sql, + frame=df_if_exists_1, + con=self.conn, + name=table_name, + if_exists='notvalidvalue') clean_up(table_name) # test if_exists='fail' sql.to_sql(frame=df_if_exists_1, con=self.conn, name=table_name, if_exists='fail', index=False) - self.assertRaises(ValueError, - sql.to_sql, - frame=df_if_exists_1, - con=self.conn, - name=table_name, - if_exists='fail') + pytest.raises(ValueError, + sql.to_sql, + frame=df_if_exists_1, + con=self.conn, + name=table_name, + if_exists='fail') # test if_exists='replace' sql.to_sql(frame=df_if_exists_1, con=self.conn, name=table_name, diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py index 7ffe054978b9b..9dc2bd589bf9b 100644 --- a/pandas/tests/io/test_stata.py +++ b/pandas/tests/io/test_stata.py @@ -523,7 +523,7 @@ def test_no_index(self): with tm.ensure_clean() as path: original.to_stata(path, write_index=False) written_and_read_again = self.read_dta(path) - tm.assertRaises( + pytest.raises( KeyError, lambda: written_and_read_again['index_not_written']) def test_string_no_dates(self): @@ -677,7 +677,7 @@ def test_excessively_long_string(self): s['s' + str(str_len)] = Series(['a' * str_len, 'b' * str_len, 'c' * str_len]) original = DataFrame(s) - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): with tm.ensure_clean() as path: original.to_stata(path) @@ -831,11 +831,11 @@ def test_drop_column(self): columns=columns) tm.assert_frame_equal(expected, reordered) - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): columns = ['byte_', 'byte_'] read_stata(self.dta15_117, convert_dates=True, columns=columns) - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): columns = ['byte_', 'int_', 'long_', 'not_found'] read_stata(self.dta15_117, convert_dates=True, columns=columns) @@ -889,7 +889,7 @@ def test_categorical_warnings_and_errors(self): original = pd.concat([original[col].astype('category') for col in original], axis=1) with tm.ensure_clean() as path: - tm.assertRaises(ValueError, original.to_stata, path) + pytest.raises(ValueError, original.to_stata, path) original = pd.DataFrame.from_records( [['a'], @@ -1151,7 +1151,7 @@ def test_write_variable_label_errors(self): 'b': 'City Exponent', 'c': u''.join(values)} - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): with tm.ensure_clean() as path: original.to_stata(path, variable_labels=variable_labels_utf8) @@ -1161,7 +1161,7 @@ def test_write_variable_label_errors(self): 'that is too long for Stata which means ' 'that it has more than 80 characters'} - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): with tm.ensure_clean() as path: original.to_stata(path, variable_labels=variable_labels_long) @@ -1188,7 +1188,7 @@ def test_default_date_conversion(self): def test_unsupported_type(self): original = pd.DataFrame({'a': [1 + 2j, 2 + 4j]}) - with tm.assertRaises(NotImplementedError): + with pytest.raises(NotImplementedError): with tm.ensure_clean() as path: original.to_stata(path) @@ -1200,7 +1200,7 @@ def test_unsupported_datetype(self): 'strs': ['apple', 'banana', 'cherry'], 'dates': dates}) - with tm.assertRaises(NotImplementedError): + with pytest.raises(NotImplementedError): with tm.ensure_clean() as path: original.to_stata(path, convert_dates={'dates': 'tC'}) @@ -1208,13 +1208,13 @@ def test_unsupported_datetype(self): original = pd.DataFrame({'nums': [1.0, 2.0, 3.0], 'strs': ['apple', 'banana', 'cherry'], 'dates': dates}) - with tm.assertRaises(NotImplementedError): + with pytest.raises(NotImplementedError): with tm.ensure_clean() as path: original.to_stata(path) def test_repeated_column_labels(self): # GH 13923 - with tm.assertRaises(ValueError) as cm: + with pytest.raises(ValueError) as cm: read_stata(self.dta23, convert_categoricals=True) tm.assertTrue('wolof' in cm.exception) @@ -1239,13 +1239,13 @@ def test_out_of_range_double(self): 'ColumnTooBig': [0.0, np.finfo(np.double).eps, np.finfo(np.double).max]}) - with tm.assertRaises(ValueError) as cm: + with pytest.raises(ValueError) as cm: with tm.ensure_clean() as path: df.to_stata(path) tm.assertTrue('ColumnTooBig' in cm.exception) df.loc[2, 'ColumnTooBig'] = np.inf - with tm.assertRaises(ValueError) as cm: + with pytest.raises(ValueError) as cm: with tm.ensure_clean() as path: df.to_stata(path) tm.assertTrue('ColumnTooBig' in cm.exception) @@ -1271,7 +1271,7 @@ def test_out_of_range_float(self): reread.set_index('index')) original.loc[2, 'ColumnTooBig'] = np.inf - with tm.assertRaises(ValueError) as cm: + with pytest.raises(ValueError) as cm: with tm.ensure_clean() as path: original.to_stata(path) tm.assertTrue('ColumnTooBig' in cm.exception) @@ -1280,6 +1280,6 @@ def test_out_of_range_float(self): def test_invalid_encoding(self): # GH15723, validate encoding original = self.read_csv(self.csv3) - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): with tm.ensure_clean() as path: original.to_stata(path, encoding='utf-8') diff --git a/pandas/tests/plotting/test_boxplot_method.py b/pandas/tests/plotting/test_boxplot_method.py index 8755ce419ae20..64d0fec2b5646 100644 --- a/pandas/tests/plotting/test_boxplot_method.py +++ b/pandas/tests/plotting/test_boxplot_method.py @@ -106,7 +106,7 @@ def test_boxplot_return_type_legacy(self): df = DataFrame(randn(6, 4), index=list(string.ascii_letters[:6]), columns=['one', 'two', 'three', 'four']) - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): df.boxplot(return_type='NOTATYPE') result = df.boxplot() @@ -263,13 +263,13 @@ def test_grouped_box_return_type(self): def test_grouped_box_layout(self): df = self.hist_df - self.assertRaises(ValueError, df.boxplot, column=['weight', 'height'], - by=df.gender, layout=(1, 1)) - self.assertRaises(ValueError, df.boxplot, - column=['height', 'weight', 'category'], - layout=(2, 1), return_type='dict') - self.assertRaises(ValueError, df.boxplot, column=['weight', 'height'], - by=df.gender, layout=(-1, -1)) + pytest.raises(ValueError, df.boxplot, column=['weight', 'height'], + by=df.gender, layout=(1, 1)) + pytest.raises(ValueError, df.boxplot, + column=['height', 'weight', 'category'], + layout=(2, 1), return_type='dict') + pytest.raises(ValueError, df.boxplot, column=['weight', 'height'], + by=df.gender, layout=(-1, -1)) # _check_plot_works adds an ax so catch warning. see GH #13188 with tm.assert_produces_warning(UserWarning): @@ -369,7 +369,7 @@ def test_grouped_box_multiple_axes(self): tm.assert_numpy_array_equal(returned, axes[1]) self.assertIs(returned[0].figure, fig) - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): fig, axes = self.plt.subplots(2, 3) # pass different number of axes from required with tm.assert_produces_warning(UserWarning): diff --git a/pandas/tests/plotting/test_datetimelike.py b/pandas/tests/plotting/test_datetimelike.py index 803907c60d0d0..364a561669ed2 100644 --- a/pandas/tests/plotting/test_datetimelike.py +++ b/pandas/tests/plotting/test_datetimelike.py @@ -98,7 +98,7 @@ def test_nonnumeric_exclude(self): self.assertEqual(len(ax.get_lines()), 1) # B was plotted plt.close(plt.gcf()) - self.assertRaises(TypeError, df['A'].plot) + pytest.raises(TypeError, df['A'].plot) @slow def test_tsplot(self): @@ -130,10 +130,10 @@ def test_both_style_and_color(self): import matplotlib.pyplot as plt # noqa ts = tm.makeTimeSeries() - self.assertRaises(ValueError, ts.plot, style='b-', color='#000099') + pytest.raises(ValueError, ts.plot, style='b-', color='#000099') s = ts.reset_index(drop=True) - self.assertRaises(ValueError, s.plot, style='b-', color='#000099') + pytest.raises(ValueError, s.plot, style='b-', color='#000099') @slow def test_high_freq(self): diff --git a/pandas/tests/plotting/test_frame.py b/pandas/tests/plotting/test_frame.py index 448201960b3d1..0e9aa3355a658 100644 --- a/pandas/tests/plotting/test_frame.py +++ b/pandas/tests/plotting/test_frame.py @@ -64,7 +64,7 @@ def test_plot(self): df = DataFrame({'x': [1, 2], 'y': [3, 4]}) # mpl >= 1.5.2 (or slightly below) throw AttributError - with tm.assertRaises((TypeError, AttributeError)): + with pytest.raises((TypeError, AttributeError)): df.plot.line(blarg=True) df = DataFrame(np.random.rand(10, 3), @@ -154,7 +154,7 @@ def test_mpl2_color_cycle_str(self): def test_color_empty_string(self): df = DataFrame(randn(10, 2)) - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): df.plot(color='') def test_color_and_style_arguments(self): @@ -170,7 +170,7 @@ def test_color_and_style_arguments(self): self.assertEqual(color, ['red', 'black']) # passing both 'color' and 'style' arguments should not be allowed # if there is a color symbol in the style strings: - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): df.plot(color=['red', 'black'], style=['k-', 'r--']) def test_nonnumeric_exclude(self): @@ -401,9 +401,9 @@ def test_subplots_layout(self): self._check_axes_shape(axes, axes_num=3, layout=(4, 1)) self.assertEqual(axes.shape, (4, 1)) - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): axes = df.plot(subplots=True, layout=(1, 1)) - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): axes = df.plot(subplots=True, layout=(-1, -1)) # single column @@ -453,7 +453,7 @@ def test_subplots_multiple_axes(self): self._check_axes_shape(axes, axes_num=6, layout=(2, 3)) tm.close() - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): fig, axes = self.plt.subplots(2, 3) # pass different number of axes from required df.plot(subplots=True, ax=axes) @@ -560,9 +560,9 @@ def test_negative_log(self): index=list(string.ascii_letters[:6]), columns=['x', 'y', 'z', 'four']) - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): df.plot.area(logy=True) - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): df.plot.area(loglog=True) def _compare_stacked_y_cood(self, normal_lines, stacked_lines): @@ -601,7 +601,7 @@ def test_line_area_stacked(self): self._compare_stacked_y_cood(ax1.lines[2:], ax2.lines[2:]) _check_plot_works(mixed_df.plot, stacked=False) - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): mixed_df.plot(stacked=True) _check_plot_works(df.plot, kind=kind, logx=True, stacked=True) @@ -900,9 +900,9 @@ def test_plot_scatter(self): _check_plot_works(df.plot.scatter, x='x', y='y') _check_plot_works(df.plot.scatter, x=1, y=2) - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): df.plot.scatter(x='x') - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): df.plot.scatter(y='y') # GH 6951 @@ -965,7 +965,7 @@ def test_plot_scatter_with_c(self): def test_scatter_colors(self): df = DataFrame({'a': [1, 2, 3], 'b': [1, 2, 3], 'c': [1, 2, 3]}) - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): df.plot.scatter(x='a', y='b', c='c', color='green') default_colors = self._maybe_unpack_cycler(self.plt.rcParams) @@ -1227,7 +1227,7 @@ def test_boxplot_return_type(self): df = DataFrame(randn(6, 4), index=list(string.ascii_letters[:6]), columns=['one', 'two', 'three', 'four']) - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): df.plot.box(return_type='NOTATYPE') result = df.plot.box(return_type='dict') @@ -1620,7 +1620,7 @@ def test_line_colors(self): self._check_colors(ax.get_lines(), linecolors=custom_colors) tm.close() - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): # Color contains shorthand hex value results in ValueError custom_colors = ['#F00', '#00F', '#FF0', '#000', '#FFF'] # Forced show plot @@ -1677,7 +1677,7 @@ def test_line_colors_and_styles_subplots(self): self._check_colors(ax.get_lines(), linecolors=[c]) tm.close() - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): # Color contains shorthand hex value results in ValueError custom_colors = ['#F00', '#00F', '#FF0', '#000', '#FFF'] # Forced show plot @@ -1947,7 +1947,7 @@ def _check_colors(bp, box_c, whiskers_c, medians_c, caps_c='k', _check_colors(bp, (0, 1, 0), (0, 1, 0), (0, 1, 0), (0, 1, 0), '#123456') - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): # Color contains invalid key results in ValueError df.plot.box(color=dict(boxes='red', xxxx='blue')) @@ -1994,7 +1994,7 @@ def test_all_invalid_plot_data(self): for kind in plotting._core._common_kinds: if not _ok_for_gaussian_kde(kind): continue - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): df.plot(kind=kind) @slow @@ -2005,7 +2005,7 @@ def test_partially_invalid_plot_data(self): for kind in plotting._core._common_kinds: if not _ok_for_gaussian_kde(kind): continue - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): df.plot(kind=kind) with tm.RNGContext(42): @@ -2014,12 +2014,12 @@ def test_partially_invalid_plot_data(self): df = DataFrame(rand(10, 2), dtype=object) df[np.random.rand(df.shape[0]) > 0.5] = 'a' for kind in kinds: - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): df.plot(kind=kind) def test_invalid_kind(self): df = DataFrame(randn(10, 2)) - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): df.plot(kind='aasdf') @slow @@ -2074,14 +2074,14 @@ def test_allow_cmap(self): ax = df.plot.hexbin(x='A', y='B', cmap='YlGn') self.assertEqual(ax.collections[0].cmap.name, 'YlGn') - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): df.plot.hexbin(x='A', y='B', cmap='YlGn', colormap='BuGn') @slow def test_pie_df(self): df = DataFrame(np.random.rand(5, 3), columns=['X', 'Y', 'Z'], index=['a', 'b', 'c', 'd', 'e']) - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): df.plot.pie() ax = _check_plot_works(df.plot.pie, y='Y') @@ -2193,11 +2193,11 @@ def test_errorbar_plot(self): ax = _check_plot_works(s_df.plot, y='y', x='x', yerr=yerr) self._check_has_errorbars(ax, xerr=0, yerr=1) - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): df.plot(yerr=np.random.randn(11)) df_err = DataFrame({'x': ['zzz'] * 12, 'y': ['zzz'] * 12}) - with tm.assertRaises((ValueError, TypeError)): + with pytest.raises((ValueError, TypeError)): df.plot(yerr=df_err) @slow @@ -2288,7 +2288,7 @@ def test_errorbar_asymmetrical(self): self.assertEqual(ax.lines[5].get_xdata()[0], -err[1, 0, 0] / 2) self.assertEqual(ax.lines[6].get_xdata()[0], err[1, 1, 0] / 2) - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): df.plot(yerr=err.T) tm.close() @@ -2477,7 +2477,7 @@ def test_memory_leak(self): gc.collect() for key in results: # check that every plot was collected - with tm.assertRaises(ReferenceError): + with pytest.raises(ReferenceError): # need to actually access something to get an error results[key].lines @@ -2667,13 +2667,13 @@ def test_option_mpl_style(self): check_stacklevel=False): set_option('display.mpl_style', False) - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): set_option('display.mpl_style', 'default2') def test_invalid_colormap(self): df = DataFrame(randn(3, 2), columns=['A', 'B']) - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): df.plot(colormap='invalid_colormap') def test_plain_axes(self): diff --git a/pandas/tests/plotting/test_hist_method.py b/pandas/tests/plotting/test_hist_method.py index 4552464ce1e1f..6dd97a1181f22 100644 --- a/pandas/tests/plotting/test_hist_method.py +++ b/pandas/tests/plotting/test_hist_method.py @@ -2,6 +2,8 @@ """ Test cases for .hist method """ +import pytest + from pandas import Series, DataFrame import pandas.util.testing as tm from pandas.util.testing import slow @@ -45,7 +47,7 @@ def test_hist_legacy(self): _check_plot_works(self.ts.hist, figure=fig, ax=ax1) _check_plot_works(self.ts.hist, figure=fig, ax=ax2) - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): self.ts.hist(by=self.ts.index, figure=fig) @slow @@ -57,10 +59,10 @@ def test_hist_bins_legacy(self): @slow def test_hist_layout(self): df = self.hist_df - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): df.height.hist(layout=(1, 1)) - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): df.height.hist(layout=[1, 1]) @slow @@ -134,7 +136,7 @@ def test_plot_fails_when_ax_differs_from_figure(self): fig1 = figure() fig2 = figure() ax1 = fig1.add_subplot(111) - with tm.assertRaises(AssertionError): + with pytest.raises(AssertionError): self.ts.hist(ax=ax1, figure=fig2) @@ -204,7 +206,7 @@ def test_hist_df_legacy(self): tm.close() # propagate attr exception from matplotlib.Axes.hist - with tm.assertRaises(AttributeError): + with pytest.raises(AttributeError): ser.hist(foo='bar') @slow @@ -229,13 +231,13 @@ def test_hist_layout(self): self._check_axes_shape(axes, axes_num=3, layout=expected) # layout too small for all 4 plots - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): df.hist(layout=(1, 1)) # invalid format for layout - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): df.hist(layout=(1,)) - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): df.hist(layout=(-1, -1)) @slow @@ -295,7 +297,7 @@ def test_grouped_hist_legacy(self): tm.close() # propagate attr exception from matplotlib.Axes.hist - with tm.assertRaises(AttributeError): + with pytest.raises(AttributeError): grouped_hist(df.A, by=df.C, foo='bar') with tm.assert_produces_warning(FutureWarning): @@ -319,12 +321,12 @@ def test_grouped_hist_legacy2(self): @slow def test_grouped_hist_layout(self): df = self.hist_df - self.assertRaises(ValueError, df.hist, column='weight', by=df.gender, - layout=(1, 1)) - self.assertRaises(ValueError, df.hist, column='height', by=df.category, - layout=(1, 3)) - self.assertRaises(ValueError, df.hist, column='height', by=df.category, - layout=(-1, -1)) + pytest.raises(ValueError, df.hist, column='weight', by=df.gender, + layout=(1, 1)) + pytest.raises(ValueError, df.hist, column='height', by=df.category, + layout=(1, 3)) + pytest.raises(ValueError, df.hist, column='height', by=df.category, + layout=(-1, -1)) with tm.assert_produces_warning(UserWarning): axes = _check_plot_works(df.hist, column='height', by=df.gender, @@ -380,7 +382,7 @@ def test_grouped_hist_multiple_axes(self): tm.assert_numpy_array_equal(returned, axes[1]) self.assertIs(returned[0].figure, fig) - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): fig, axes = self.plt.subplots(2, 3) # pass different number of axes from required axes = df.hist(column='height', ax=axes) diff --git a/pandas/tests/plotting/test_misc.py b/pandas/tests/plotting/test_misc.py index fe0b6c103a0e1..07abd0190a417 100644 --- a/pandas/tests/plotting/test_misc.py +++ b/pandas/tests/plotting/test_misc.py @@ -2,6 +2,8 @@ """ Test cases for misc plot functions """ +import pytest + from pandas import Series, DataFrame from pandas.compat import lmap import pandas.util.testing as tm @@ -310,14 +312,14 @@ def test_subplot_titles(self): self.assertEqual([p.get_title() for p in plot], title) # Case len(title) > len(df) - self.assertRaises(ValueError, df.plot, subplots=True, - title=title + ["kittens > puppies"]) + pytest.raises(ValueError, df.plot, subplots=True, + title=title + ["kittens > puppies"]) # Case len(title) < len(df) - self.assertRaises(ValueError, df.plot, subplots=True, title=title[:2]) + pytest.raises(ValueError, df.plot, subplots=True, title=title[:2]) # Case subplots=False and title is of type list - self.assertRaises(ValueError, df.plot, subplots=False, title=title) + pytest.raises(ValueError, df.plot, subplots=False, title=title) # Case df with 3 numeric columns but layout of (2,2) plot = df.drop('SepalWidth', axis=1).plot(subplots=True, layout=(2, 2), diff --git a/pandas/tests/plotting/test_series.py b/pandas/tests/plotting/test_series.py index 25c2aee4cbe7a..38ce5f44b812f 100644 --- a/pandas/tests/plotting/test_series.py +++ b/pandas/tests/plotting/test_series.py @@ -4,6 +4,7 @@ import itertools +import pytest from datetime import datetime @@ -297,7 +298,7 @@ def test_pie_series(self): self.assertEqual(t.get_fontsize(), 7) # includes negative value - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): series = Series([1, 2, 0, 4, -1], index=['a', 'b', 'c', 'd', 'e']) series.plot.pie() @@ -356,7 +357,7 @@ def test_hist_legacy(self): _check_plot_works(self.ts.hist, figure=fig, ax=ax1) _check_plot_works(self.ts.hist, figure=fig, ax=ax2) - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): self.ts.hist(by=self.ts.index, figure=fig) @slow @@ -368,10 +369,10 @@ def test_hist_bins_legacy(self): @slow def test_hist_layout(self): df = self.hist_df - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): df.height.hist(layout=(1, 1)) - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): df.height.hist(layout=[1, 1]) @slow @@ -530,7 +531,7 @@ def test_df_series_secondary_legend(self): @slow def test_plot_fails_with_dupe_color_and_style(self): x = Series(randn(2)) - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): x.plot(style='k--', color='k') @slow @@ -636,7 +637,7 @@ def test_invalid_plot_data(self): for kind in plotting._core._common_kinds: if not _ok_for_gaussian_kde(kind): continue - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): s.plot(kind=kind) @slow @@ -652,12 +653,12 @@ def test_partially_invalid_plot_data(self): for kind in plotting._core._common_kinds: if not _ok_for_gaussian_kde(kind): continue - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): s.plot(kind=kind) def test_invalid_kind(self): s = Series([1, 2]) - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): s.plot(kind='aasdf') @slow @@ -704,12 +705,12 @@ def test_errorbar_plot(self): self._check_has_errorbars(ax, xerr=0, yerr=1) # check incorrect lengths and types - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): s.plot(yerr=np.arange(11)) s_err = ['zzz'] * 10 # in mpl 1.5+ this is a TypeError - with tm.assertRaises((ValueError, TypeError)): + with pytest.raises((ValueError, TypeError)): s.plot(yerr=s_err) def test_table(self): diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py index 75c0efaee85c4..b877a9d181848 100644 --- a/pandas/tests/reshape/test_concat.py +++ b/pandas/tests/reshape/test_concat.py @@ -1110,11 +1110,11 @@ def test_concat_keys_levels_no_overlap(self): df = DataFrame(np.random.randn(1, 3), index=['a']) df2 = DataFrame(np.random.randn(1, 4), index=['b']) - self.assertRaises(ValueError, concat, [df, df], - keys=['one', 'two'], levels=[['foo', 'bar', 'baz']]) + pytest.raises(ValueError, concat, [df, df], + keys=['one', 'two'], levels=[['foo', 'bar', 'baz']]) - self.assertRaises(ValueError, concat, [df, df2], - keys=['one', 'two'], levels=[['foo', 'bar', 'baz']]) + pytest.raises(ValueError, concat, [df, df2], + keys=['one', 'two'], levels=[['foo', 'bar', 'baz']]) def test_concat_rename_index(self): a = DataFrame(np.random.rand(3, 3), @@ -1306,7 +1306,7 @@ def test_concat_mixed_objs(self): # invalid concatente of mixed dims with catch_warnings(record=True): panel = tm.makePanel() - self.assertRaises(ValueError, lambda: concat([panel, s1], axis=1)) + pytest.raises(ValueError, lambda: concat([panel, s1], axis=1)) def test_empty_dtype_coerce(self): @@ -1500,7 +1500,7 @@ def test_concat_exclude_none(self): pieces = [df[:5], None, None, df[5:]] result = concat(pieces) tm.assert_frame_equal(result, df) - self.assertRaises(ValueError, concat, [None, None]) + pytest.raises(ValueError, concat, [None, None]) def test_concat_datetime64_block(self): from pandas.core.indexes.datetimes import date_range @@ -1634,12 +1634,12 @@ def test_concat_invalid(self): # trying to concat a ndframe with a non-ndframe df1 = mkdf(10, 2) for obj in [1, dict(), [1, 2], (1, 2)]: - self.assertRaises(TypeError, lambda x: concat([df1, obj])) + pytest.raises(TypeError, lambda x: concat([df1, obj])) def test_concat_invalid_first_argument(self): df1 = mkdf(10, 2) df2 = mkdf(10, 2) - self.assertRaises(TypeError, concat, df1, df2) + pytest.raises(TypeError, concat, df1, df2) # generator ok though concat(DataFrame(np.random.rand(5, 5)) for _ in range(3)) diff --git a/pandas/tests/reshape/test_hashing.py b/pandas/tests/reshape/test_hashing.py index 7ab9558d961aa..fba3a15182238 100644 --- a/pandas/tests/reshape/test_hashing.py +++ b/pandas/tests/reshape/test_hashing.py @@ -47,7 +47,7 @@ def test_hash_array_mixed(self): def test_hash_array_errors(self): for val in [5, 'foo', pd.Timestamp('20130101')]: - self.assertRaises(TypeError, hash_array, val) + pytest.raises(TypeError, hash_array, val) def check_equal(self, obj, **kwargs): a = hash_pandas_object(obj, **kwargs) @@ -81,7 +81,7 @@ def test_hash_tuples(self): def test_hash_tuples_err(self): for val in [5, 'foo', pd.Timestamp('20130101')]: - self.assertRaises(TypeError, hash_tuples, val) + pytest.raises(TypeError, hash_tuples, val) def test_multiindex_unique(self): mi = MultiIndex.from_tuples([(118, 472), (236, 118), @@ -221,7 +221,7 @@ def test_invalid_key(self): # this only matters for object dtypes def f(): hash_pandas_object(Series(list('abc')), hash_key='foo') - self.assertRaises(ValueError, f) + pytest.raises(ValueError, f) def test_alread_encoded(self): # if already encoded then ok diff --git a/pandas/tests/reshape/test_join.py b/pandas/tests/reshape/test_join.py index 3075d24d95375..aa42f190926ae 100644 --- a/pandas/tests/reshape/test_join.py +++ b/pandas/tests/reshape/test_join.py @@ -3,6 +3,7 @@ from warnings import catch_warnings from numpy.random import randn import numpy as np +import pytest import pandas as pd from pandas.compat import lrange @@ -193,15 +194,15 @@ def test_join_on(self): self.assertTrue(np.isnan(joined['three']['c'])) # merge column not p resent - self.assertRaises(KeyError, target.join, source, on='E') + pytest.raises(KeyError, target.join, source, on='E') # overlap source_copy = source.copy() source_copy['A'] = 0 - self.assertRaises(ValueError, target.join, source_copy, on='A') + pytest.raises(ValueError, target.join, source_copy, on='A') def test_join_on_fails_with_different_right_index(self): - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): df = DataFrame({'a': np.random.choice(['m', 'f'], size=3), 'b': np.random.randn(3)}) df2 = DataFrame({'a': np.random.choice(['m', 'f'], size=10), @@ -210,7 +211,7 @@ def test_join_on_fails_with_different_right_index(self): merge(df, df2, left_on='a', right_index=True) def test_join_on_fails_with_different_left_index(self): - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): df = DataFrame({'a': np.random.choice(['m', 'f'], size=3), 'b': np.random.randn(3)}, index=tm.makeCustomIndex(10, 2)) @@ -219,7 +220,7 @@ def test_join_on_fails_with_different_left_index(self): merge(df, df2, right_on='b', left_index=True) def test_join_on_fails_with_different_column_counts(self): - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): df = DataFrame({'a': np.random.choice(['m', 'f'], size=3), 'b': np.random.randn(3)}) df2 = DataFrame({'a': np.random.choice(['m', 'f'], size=10), @@ -588,7 +589,7 @@ def _check_diff_index(df_list, result, exp_index): joined = df_list[0].join(df_list[1:], how='inner') _check_diff_index(df_list, joined, df.index[2:8]) - self.assertRaises(ValueError, df_list[0].join, df_list[1:], on='a') + pytest.raises(ValueError, df_list[0].join, df_list[1:], on='a') def test_join_many_mixed(self): df = DataFrame(np.random.randn(8, 4), columns=['A', 'B', 'C', 'D']) @@ -710,10 +711,10 @@ def test_panel_join_many(self): tm.assert_panel_equal(joined, expected) # edge cases - self.assertRaises(ValueError, panels[0].join, panels[1:], - how='outer', lsuffix='foo', rsuffix='bar') - self.assertRaises(ValueError, panels[0].join, panels[1:], - how='right') + pytest.raises(ValueError, panels[0].join, panels[1:], + how='outer', lsuffix='foo', rsuffix='bar') + pytest.raises(ValueError, panels[0].join, panels[1:], + how='right') def _check_join(left, right, result, join_col, how='left', diff --git a/pandas/tests/reshape/test_merge.py b/pandas/tests/reshape/test_merge.py index 8aff478bee5fd..8ddeb4bdda14a 100644 --- a/pandas/tests/reshape/test_merge.py +++ b/pandas/tests/reshape/test_merge.py @@ -104,25 +104,25 @@ def test_merge_index_singlekey_inner(self): assert_frame_equal(result, expected.loc[:, result.columns]) def test_merge_misspecified(self): - self.assertRaises(ValueError, merge, self.left, self.right, - left_index=True) - self.assertRaises(ValueError, merge, self.left, self.right, - right_index=True) + pytest.raises(ValueError, merge, self.left, self.right, + left_index=True) + pytest.raises(ValueError, merge, self.left, self.right, + right_index=True) - self.assertRaises(ValueError, merge, self.left, self.left, - left_on='key', on='key') + pytest.raises(ValueError, merge, self.left, self.left, + left_on='key', on='key') - self.assertRaises(ValueError, merge, self.df, self.df2, - left_on=['key1'], right_on=['key1', 'key2']) + pytest.raises(ValueError, merge, self.df, self.df2, + left_on=['key1'], right_on=['key1', 'key2']) def test_index_and_on_parameters_confusion(self): - self.assertRaises(ValueError, merge, self.df, self.df2, how='left', - left_index=False, right_index=['key1', 'key2']) - self.assertRaises(ValueError, merge, self.df, self.df2, how='left', - left_index=['key1', 'key2'], right_index=False) - self.assertRaises(ValueError, merge, self.df, self.df2, how='left', - left_index=['key1', 'key2'], - right_index=['key1', 'key2']) + pytest.raises(ValueError, merge, self.df, self.df2, how='left', + left_index=False, right_index=['key1', 'key2']) + pytest.raises(ValueError, merge, self.df, self.df2, how='left', + left_index=['key1', 'key2'], right_index=False) + pytest.raises(ValueError, merge, self.df, self.df2, how='left', + left_index=['key1', 'key2'], + right_index=['key1', 'key2']) def test_merge_overlap(self): merged = merge(self.left, self.left, on='key') @@ -254,7 +254,7 @@ def test_no_overlap_more_informative_error(self): df1 = DataFrame({'x': ['a']}, index=[dt]) df2 = DataFrame({'y': ['b', 'c']}, index=[dt, dt]) - self.assertRaises(MergeError, merge, df1, df2) + pytest.raises(MergeError, merge, df1, df2) def test_merge_non_unique_indexes(self): @@ -549,7 +549,7 @@ def test_overlapping_columns_error_message(self): # #2649, #10639 df2.columns = ['key1', 'foo', 'foo'] - self.assertRaises(ValueError, merge, df, df2) + pytest.raises(ValueError, merge, df, df2) def test_merge_on_datetime64tz(self): @@ -663,9 +663,9 @@ def test_indicator(self): assert_frame_equal(test_custom_name, df_result_custom_name) # Check only accepts strings and booleans - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): merge(df1, df2, on='col1', how='outer', indicator=5) - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): df1.merge(df2, on='col1', how='outer', indicator=5) # Check result integrity @@ -689,20 +689,20 @@ def test_indicator(self): for i in ['_right_indicator', '_left_indicator', '_merge']: df_badcolumn = DataFrame({'col1': [1, 2], i: [2, 2]}) - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): merge(df1, df_badcolumn, on='col1', how='outer', indicator=True) - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): df1.merge(df_badcolumn, on='col1', how='outer', indicator=True) # Check for name conflict with custom name df_badcolumn = DataFrame( {'col1': [1, 2], 'custom_column_name': [2, 2]}) - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): merge(df1, df_badcolumn, on='col1', how='outer', indicator='custom_column_name') - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): df1.merge(df_badcolumn, on='col1', how='outer', indicator='custom_column_name') @@ -1136,14 +1136,14 @@ def test_join_multi_levels(self): def f(): household.join(portfolio, how='inner') - self.assertRaises(ValueError, f) + pytest.raises(ValueError, f) portfolio2 = portfolio.copy() portfolio2.index.set_names(['household_id', 'foo']) def f(): portfolio2.join(portfolio, how='inner') - self.assertRaises(ValueError, f) + pytest.raises(ValueError, f) def test_join_multi_levels2(self): @@ -1184,7 +1184,7 @@ def test_join_multi_levels2(self): def f(): household.join(log_return, how='inner') - self.assertRaises(NotImplementedError, f) + pytest.raises(NotImplementedError, f) # this is the equivalency result = (merge(household.reset_index(), log_return.reset_index(), @@ -1212,7 +1212,7 @@ def f(): def f(): household.join(log_return, how='outer') - self.assertRaises(NotImplementedError, f) + pytest.raises(NotImplementedError, f) @pytest.fixture diff --git a/pandas/tests/reshape/test_merge_asof.py b/pandas/tests/reshape/test_merge_asof.py index 865c413bad11e..0b5b580563741 100644 --- a/pandas/tests/reshape/test_merge_asof.py +++ b/pandas/tests/reshape/test_merge_asof.py @@ -1,4 +1,5 @@ import os +import pytest import pytz import numpy as np @@ -200,14 +201,14 @@ def test_multi_index(self): # MultiIndex is prohibited trades = self.trades.set_index(['time', 'price']) quotes = self.quotes.set_index('time') - with self.assertRaises(MergeError): + with pytest.raises(MergeError): merge_asof(trades, quotes, left_index=True, right_index=True) trades = self.trades.set_index('time') quotes = self.quotes.set_index(['time', 'bid']) - with self.assertRaises(MergeError): + with pytest.raises(MergeError): merge_asof(trades, quotes, left_index=True, right_index=True) @@ -217,7 +218,7 @@ def test_on_and_index(self): # 'on' parameter and index together is prohibited trades = self.trades.set_index('time') quotes = self.quotes.set_index('time') - with self.assertRaises(MergeError): + with pytest.raises(MergeError): merge_asof(trades, quotes, left_on='price', left_index=True, @@ -225,7 +226,7 @@ def test_on_and_index(self): trades = self.trades.set_index('time') quotes = self.quotes.set_index('time') - with self.assertRaises(MergeError): + with pytest.raises(MergeError): merge_asof(trades, quotes, right_on='bid', left_index=True, @@ -399,7 +400,7 @@ def test_multiby_indexed(self): assert_frame_equal(expected, result) - with self.assertRaises(MergeError): + with pytest.raises(MergeError): pd.merge_asof(left, right, left_index=True, right_index=True, left_by=['k1', 'k2'], right_by=['k1']) @@ -432,18 +433,18 @@ def test_valid_join_keys(self): trades = self.trades quotes = self.quotes - with self.assertRaises(MergeError): + with pytest.raises(MergeError): merge_asof(trades, quotes, left_on='time', right_on='bid', by='ticker') - with self.assertRaises(MergeError): + with pytest.raises(MergeError): merge_asof(trades, quotes, on=['time', 'ticker'], by='ticker') - with self.assertRaises(MergeError): + with pytest.raises(MergeError): merge_asof(trades, quotes, by='ticker') @@ -474,7 +475,7 @@ def test_valid_allow_exact_matches(self): trades = self.trades quotes = self.quotes - with self.assertRaises(MergeError): + with pytest.raises(MergeError): merge_asof(trades, quotes, on='time', by='ticker', @@ -498,27 +499,27 @@ def test_valid_tolerance(self): tolerance=1) # incompat - with self.assertRaises(MergeError): + with pytest.raises(MergeError): merge_asof(trades, quotes, on='time', by='ticker', tolerance=1) # invalid - with self.assertRaises(MergeError): + with pytest.raises(MergeError): merge_asof(trades.reset_index(), quotes.reset_index(), on='index', by='ticker', tolerance=1.0) # invalid negative - with self.assertRaises(MergeError): + with pytest.raises(MergeError): merge_asof(trades, quotes, on='time', by='ticker', tolerance=-Timedelta('1s')) - with self.assertRaises(MergeError): + with pytest.raises(MergeError): merge_asof(trades.reset_index(), quotes.reset_index(), on='index', by='ticker', @@ -532,7 +533,7 @@ def test_non_sorted(self): # we require that we are already sorted on time & quotes self.assertFalse(trades.time.is_monotonic) self.assertFalse(quotes.time.is_monotonic) - with self.assertRaises(ValueError): + with pytest.raises(ValueError): merge_asof(trades, quotes, on='time', by='ticker') @@ -540,7 +541,7 @@ def test_non_sorted(self): trades = self.trades.sort_values('time') self.assertTrue(trades.time.is_monotonic) self.assertFalse(quotes.time.is_monotonic) - with self.assertRaises(ValueError): + with pytest.raises(ValueError): merge_asof(trades, quotes, on='time', by='ticker') @@ -891,7 +892,7 @@ def test_on_specialized_type(self): df1 = df1.sort_values('value').reset_index(drop=True) if dtype == np.float16: - with self.assertRaises(MergeError): + with pytest.raises(MergeError): pd.merge_asof(df1, df2, on='value') continue @@ -928,7 +929,7 @@ def test_on_specialized_type_by_int(self): df1 = df1.sort_values('value').reset_index(drop=True) if dtype == np.float16: - with self.assertRaises(MergeError): + with pytest.raises(MergeError): pd.merge_asof(df1, df2, on='value', by='key') else: result = pd.merge_asof(df1, df2, on='value', by='key') diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 0624505ae619b..242011d6f23da 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -1,5 +1,8 @@ from datetime import datetime, date, timedelta +import pytest + + import numpy as np from collections import OrderedDict @@ -553,17 +556,17 @@ def test_margins_no_values_two_row_two_cols(self): def test_pivot_table_with_margins_set_margin_name(self): # GH 3335 for margin_name in ['foo', 'one', 666, None, ['a', 'b']]: - with self.assertRaises(ValueError): + with pytest.raises(ValueError): # multi-index index pivot_table(self.data, values='D', index=['A', 'B'], columns=['C'], margins=True, margins_name=margin_name) - with self.assertRaises(ValueError): + with pytest.raises(ValueError): # multi-index column pivot_table(self.data, values='D', index=['C'], columns=['A', 'B'], margins=True, margins_name=margin_name) - with self.assertRaises(ValueError): + with pytest.raises(ValueError): # non-multi-index index/column pivot_table(self.data, values='D', index=['A'], columns=['B'], margins=True, @@ -626,10 +629,10 @@ def test_pivot_timegrouper(self): values='Quantity', aggfunc=np.sum) tm.assert_frame_equal(result, expected.T) - self.assertRaises(KeyError, lambda: pivot_table( + pytest.raises(KeyError, lambda: pivot_table( df, index=Grouper(freq='6MS', key='foo'), columns='Buyer', values='Quantity', aggfunc=np.sum)) - self.assertRaises(KeyError, lambda: pivot_table( + pytest.raises(KeyError, lambda: pivot_table( df, index='Buyer', columns=Grouper(freq='6MS', key='foo'), values='Quantity', aggfunc=np.sum)) @@ -646,10 +649,10 @@ def test_pivot_timegrouper(self): values='Quantity', aggfunc=np.sum) tm.assert_frame_equal(result, expected.T) - self.assertRaises(ValueError, lambda: pivot_table( + pytest.raises(ValueError, lambda: pivot_table( df, index=Grouper(freq='6MS', level='foo'), columns='Buyer', values='Quantity', aggfunc=np.sum)) - self.assertRaises(ValueError, lambda: pivot_table( + pytest.raises(ValueError, lambda: pivot_table( df, index='Buyer', columns=Grouper(freq='6MS', level='foo'), values='Quantity', aggfunc=np.sum)) diff --git a/pandas/tests/reshape/test_reshape.py b/pandas/tests/reshape/test_reshape.py index 0eb1e5ff3cf11..8960ae610f8d7 100644 --- a/pandas/tests/reshape/test_reshape.py +++ b/pandas/tests/reshape/test_reshape.py @@ -1,6 +1,8 @@ # -*- coding: utf-8 -*- # pylint: disable-msg=W0612,E1101 +import pytest + from pandas import DataFrame, Series import pandas as pd @@ -428,11 +430,11 @@ def test_dataframe_dummies_prefix_sep(self): assert_frame_equal(result, expected) def test_dataframe_dummies_prefix_bad_length(self): - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): get_dummies(self.df, prefix=['too few'], sparse=self.sparse) def test_dataframe_dummies_prefix_sep_bad_length(self): - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): get_dummies(self.df, prefix_sep=['bad'], sparse=self.sparse) def test_dataframe_dummies_prefix_dict(self): @@ -740,7 +742,7 @@ def test_pairs(self): spec = {'visitdt': ['visitdt%d' % i for i in range(1, 3)], 'wt': ['wt%d' % i for i in range(1, 4)]} - self.assertRaises(ValueError, lreshape, df, spec) + pytest.raises(ValueError, lreshape, df, spec) class TestWideToLong(tm.TestCase): diff --git a/pandas/tests/reshape/test_tile.py b/pandas/tests/reshape/test_tile.py index adda25a210ba6..8aaa8a51d7020 100644 --- a/pandas/tests/reshape/test_tile.py +++ b/pandas/tests/reshape/test_tile.py @@ -1,4 +1,5 @@ import os +import pytest import numpy as np from pandas.compat import zip @@ -85,18 +86,18 @@ def test_bins_from_intervalindex(self): def test_bins_not_monotonic(self): data = [.2, 1.4, 2.5, 6.2, 9.7, 2.1] - self.assertRaises(ValueError, cut, data, [0.1, 1.5, 1, 10]) + pytest.raises(ValueError, cut, data, [0.1, 1.5, 1, 10]) def test_wrong_num_labels(self): data = [.2, 1.4, 2.5, 6.2, 9.7, 2.1] - self.assertRaises(ValueError, cut, data, [0, 1, 10], - labels=['foo', 'bar', 'baz']) + pytest.raises(ValueError, cut, data, [0, 1, 10], + labels=['foo', 'bar', 'baz']) def test_cut_corner(self): # h3h - self.assertRaises(ValueError, cut, [], 2) + pytest.raises(ValueError, cut, [], 2) - self.assertRaises(ValueError, cut, [1, 2, 3], 0.5) + pytest.raises(ValueError, cut, [1, 2, 3], 0.5) def test_cut_out_of_range_more(self): # #1511 @@ -326,11 +327,11 @@ def test_qcut_duplicates_bin(self): result = qcut(values, 3, duplicates='drop') tm.assert_index_equal(result.categories, expected) - self.assertRaises(ValueError, qcut, values, 3) - self.assertRaises(ValueError, qcut, values, 3, duplicates='raise') + pytest.raises(ValueError, qcut, values, 3) + pytest.raises(ValueError, qcut, values, 3, duplicates='raise') # invalid - self.assertRaises(ValueError, qcut, values, 3, duplicates='foo') + pytest.raises(ValueError, qcut, values, 3, duplicates='foo') def test_single_quantile(self): # issue 15431 @@ -489,7 +490,7 @@ def test_datetime_nan(self): def f(): cut(date_range('20130101', periods=3), bins=[0, 2, 4]) - self.assertRaises(ValueError, f) + pytest.raises(ValueError, f) result = cut(date_range('20130102', periods=5), bins=date_range('20130101', periods=2)) diff --git a/pandas/tests/reshape/test_union_categoricals.py b/pandas/tests/reshape/test_union_categoricals.py index f9224d0126f6c..1c67b13a9c1c9 100644 --- a/pandas/tests/reshape/test_union_categoricals.py +++ b/pandas/tests/reshape/test_union_categoricals.py @@ -1,3 +1,5 @@ +import pytest + import numpy as np import pandas as pd from pandas import Categorical, Series, CategoricalIndex @@ -265,7 +267,7 @@ def test_union_categoricals_sort(self): c1 = Categorical(['b', 'a'], categories=['b', 'a', 'c'], ordered=True) c2 = Categorical(['a', 'c'], categories=['b', 'a', 'c'], ordered=True) - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): union_categoricals([c1, c2], sort_categories=True) def test_union_categoricals_sort_false(self): @@ -335,5 +337,5 @@ def test_union_categorical_unwrap(self): result = union_categoricals([c1, c2]) tm.assert_categorical_equal(result, expected) - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): union_categoricals([c1, ['a', 'b', 'c']]) diff --git a/pandas/tests/scalar/test_interval.py b/pandas/tests/scalar/test_interval.py index 63e57fb472861..717ef38a5b447 100644 --- a/pandas/tests/scalar/test_interval.py +++ b/pandas/tests/scalar/test_interval.py @@ -29,7 +29,7 @@ def test_contains(self): self.assertIn(0.5, self.interval) self.assertIn(1, self.interval) self.assertNotIn(0, self.interval) - self.assertRaises(TypeError, lambda: self.interval in self.interval) + pytest.raises(TypeError, lambda: self.interval in self.interval) interval = Interval(0, 1, closed='both') self.assertIn(0, interval) diff --git a/pandas/tests/scalar/test_period.py b/pandas/tests/scalar/test_period.py index 4978b801c741b..50c8bcbb68fcb 100644 --- a/pandas/tests/scalar/test_period.py +++ b/pandas/tests/scalar/test_period.py @@ -1,3 +1,5 @@ +import pytest + import numpy as np from datetime import datetime, date, timedelta @@ -293,7 +295,7 @@ def test_construction(self): i4 = Period('2005', freq='M') i5 = Period('2005', freq='m') - self.assertRaises(ValueError, i1.__ne__, i4) + pytest.raises(ValueError, i1.__ne__, i4) self.assertEqual(i4, i5) i1 = Period.now('Q') @@ -332,9 +334,9 @@ def test_construction(self): freq='U') self.assertEqual(i1, expected) - self.assertRaises(ValueError, Period, ordinal=200701) + pytest.raises(ValueError, Period, ordinal=200701) - self.assertRaises(ValueError, Period, '2007-1-1', freq='X') + pytest.raises(ValueError, Period, '2007-1-1', freq='X') def test_construction_bday(self): @@ -493,9 +495,9 @@ def test_period_constructor_offsets(self): freq='U') self.assertEqual(i1, expected) - self.assertRaises(ValueError, Period, ordinal=200701) + pytest.raises(ValueError, Period, ordinal=200701) - self.assertRaises(ValueError, Period, '2007-1-1', freq='X') + pytest.raises(ValueError, Period, '2007-1-1', freq='X') def test_freq_str(self): i1 = Period('1982', freq='Min') @@ -572,7 +574,7 @@ def test_sub_delta(self): result = left - right self.assertEqual(result, 4) - with self.assertRaises(period.IncompatibleFrequency): + with pytest.raises(period.IncompatibleFrequency): left - Period('2007-01', freq='M') def test_to_timestamp(self): @@ -851,13 +853,13 @@ def test_constructor_corner(self): expected = Period('2007-01', freq='2M') self.assertEqual(Period(year=2007, month=1, freq='2M'), expected) - self.assertRaises(ValueError, Period, datetime.now()) - self.assertRaises(ValueError, Period, datetime.now().date()) - self.assertRaises(ValueError, Period, 1.6, freq='D') - self.assertRaises(ValueError, Period, ordinal=1.6, freq='D') - self.assertRaises(ValueError, Period, ordinal=2, value=1, freq='D') + pytest.raises(ValueError, Period, datetime.now()) + pytest.raises(ValueError, Period, datetime.now().date()) + pytest.raises(ValueError, Period, 1.6, freq='D') + pytest.raises(ValueError, Period, ordinal=1.6, freq='D') + pytest.raises(ValueError, Period, ordinal=2, value=1, freq='D') self.assertIs(Period(None), pd.NaT) - self.assertRaises(ValueError, Period, month=1) + pytest.raises(ValueError, Period, month=1) p = Period('2007-01-01', freq='D') @@ -888,9 +890,9 @@ def test_constructor_infer_freq(self): self.assertEqual(p.freq, 'U') def test_badinput(self): - self.assertRaises(ValueError, Period, '-2000', 'A') - self.assertRaises(tslib.DateParseError, Period, '0', 'A') - self.assertRaises(tslib.DateParseError, Period, '1/1/-2000', 'A') + pytest.raises(ValueError, Period, '-2000', 'A') + pytest.raises(tslib.DateParseError, Period, '0', 'A') + pytest.raises(tslib.DateParseError, Period, '1/1/-2000', 'A') def test_multiples(self): result1 = Period('1989', freq='2A') @@ -916,11 +918,11 @@ def test_round_trip(self): class TestPeriodField(tm.TestCase): def test_get_period_field_raises_on_out_of_range(self): - self.assertRaises(ValueError, libperiod.get_period_field, -1, 0, 0) + pytest.raises(ValueError, libperiod.get_period_field, -1, 0, 0) def test_get_period_field_array_raises_on_out_of_range(self): - self.assertRaises(ValueError, libperiod.get_period_field_arr, -1, - np.empty(1), 0) + pytest.raises(ValueError, libperiod.get_period_field_arr, -1, + np.empty(1), 0) class TestComparisons(tm.TestCase): @@ -936,7 +938,7 @@ def test_equal(self): self.assertEqual(self.january1, self.january2) def test_equal_Raises_Value(self): - with tm.assertRaises(period.IncompatibleFrequency): + with pytest.raises(period.IncompatibleFrequency): self.january1 == self.day def test_notEqual(self): @@ -947,43 +949,43 @@ def test_greater(self): self.assertTrue(self.february > self.january1) def test_greater_Raises_Value(self): - with tm.assertRaises(period.IncompatibleFrequency): + with pytest.raises(period.IncompatibleFrequency): self.january1 > self.day def test_greater_Raises_Type(self): - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): self.january1 > 1 def test_greaterEqual(self): self.assertTrue(self.january1 >= self.january2) def test_greaterEqual_Raises_Value(self): - with tm.assertRaises(period.IncompatibleFrequency): + with pytest.raises(period.IncompatibleFrequency): self.january1 >= self.day - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): print(self.january1 >= 1) def test_smallerEqual(self): self.assertTrue(self.january1 <= self.january2) def test_smallerEqual_Raises_Value(self): - with tm.assertRaises(period.IncompatibleFrequency): + with pytest.raises(period.IncompatibleFrequency): self.january1 <= self.day def test_smallerEqual_Raises_Type(self): - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): self.january1 <= 1 def test_smaller(self): self.assertTrue(self.january1 < self.february) def test_smaller_Raises_Value(self): - with tm.assertRaises(period.IncompatibleFrequency): + with pytest.raises(period.IncompatibleFrequency): self.january1 < self.day def test_smaller_Raises_Type(self): - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): self.january1 < 1 def test_sort(self): @@ -1062,14 +1064,14 @@ def test_add_offset(self): for o in [offsets.YearBegin(2), offsets.MonthBegin(1), offsets.Minute(), np.timedelta64(365, 'D'), timedelta(365)]: - with tm.assertRaises(period.IncompatibleFrequency): + with pytest.raises(period.IncompatibleFrequency): p + o if isinstance(o, np.timedelta64): - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): o + p else: - with tm.assertRaises(period.IncompatibleFrequency): + with pytest.raises(period.IncompatibleFrequency): o + p for freq in ['M', '2M', '3M']: @@ -1085,14 +1087,14 @@ def test_add_offset(self): for o in [offsets.YearBegin(2), offsets.MonthBegin(1), offsets.Minute(), np.timedelta64(365, 'D'), timedelta(365)]: - with tm.assertRaises(period.IncompatibleFrequency): + with pytest.raises(period.IncompatibleFrequency): p + o if isinstance(o, np.timedelta64): - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): o + p else: - with tm.assertRaises(period.IncompatibleFrequency): + with pytest.raises(period.IncompatibleFrequency): o + p # freq is Tick @@ -1109,12 +1111,12 @@ def test_add_offset(self): exp = Period('2011-04-03', freq=freq) self.assertEqual(p + np.timedelta64(2, 'D'), exp) - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): np.timedelta64(2, 'D') + p exp = Period('2011-04-02', freq=freq) self.assertEqual(p + np.timedelta64(3600 * 24, 's'), exp) - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): np.timedelta64(3600 * 24, 's') + p exp = Period('2011-03-30', freq=freq) @@ -1128,14 +1130,14 @@ def test_add_offset(self): for o in [offsets.YearBegin(2), offsets.MonthBegin(1), offsets.Minute(), np.timedelta64(4, 'h'), timedelta(hours=23)]: - with tm.assertRaises(period.IncompatibleFrequency): + with pytest.raises(period.IncompatibleFrequency): p + o if isinstance(o, np.timedelta64): - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): o + p else: - with tm.assertRaises(period.IncompatibleFrequency): + with pytest.raises(period.IncompatibleFrequency): o + p for freq in ['H', '2H', '3H']: @@ -1151,12 +1153,12 @@ def test_add_offset(self): exp = Period('2011-04-01 12:00', freq=freq) self.assertEqual(p + np.timedelta64(3, 'h'), exp) - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): np.timedelta64(3, 'h') + p exp = Period('2011-04-01 10:00', freq=freq) self.assertEqual(p + np.timedelta64(3600, 's'), exp) - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): np.timedelta64(3600, 's') + p exp = Period('2011-04-01 11:00', freq=freq) @@ -1170,14 +1172,14 @@ def test_add_offset(self): for o in [offsets.YearBegin(2), offsets.MonthBegin(1), offsets.Minute(), np.timedelta64(3200, 's'), timedelta(hours=23, minutes=30)]: - with tm.assertRaises(period.IncompatibleFrequency): + with pytest.raises(period.IncompatibleFrequency): p + o if isinstance(o, np.timedelta64): - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): o + p else: - with tm.assertRaises(period.IncompatibleFrequency): + with pytest.raises(period.IncompatibleFrequency): o + p def test_add_offset_nat(self): @@ -1194,7 +1196,7 @@ def test_add_offset_nat(self): self.assertIs(p + o, tslib.NaT) if isinstance(o, np.timedelta64): - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): o + p else: self.assertIs(o + p, tslib.NaT) @@ -1205,7 +1207,7 @@ def test_add_offset_nat(self): self.assertIs(p + o, tslib.NaT) if isinstance(o, np.timedelta64): - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): o + p else: self.assertIs(o + p, tslib.NaT) @@ -1216,7 +1218,7 @@ def test_add_offset_nat(self): self.assertIs(p + o, tslib.NaT) if isinstance(o, np.timedelta64): - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): o + p else: self.assertIs(o + p, tslib.NaT) @@ -1230,7 +1232,7 @@ def test_add_offset_nat(self): self.assertIs(p + o, tslib.NaT) if isinstance(o, np.timedelta64): - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): o + p else: self.assertIs(o + p, tslib.NaT) @@ -1241,7 +1243,7 @@ def test_add_offset_nat(self): self.assertIs(p + o, tslib.NaT) if isinstance(o, np.timedelta64): - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): o + p else: self.assertIs(o + p, tslib.NaT) @@ -1262,7 +1264,7 @@ def test_add_offset_nat(self): self.assertIs(p + o, tslib.NaT) if isinstance(o, np.timedelta64): - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): o + p else: self.assertIs(o + p, tslib.NaT) @@ -1286,7 +1288,7 @@ def test_sub_offset(self): for o in [offsets.YearBegin(2), offsets.MonthBegin(1), offsets.Minute(), np.timedelta64(365, 'D'), timedelta(365)]: - with tm.assertRaises(period.IncompatibleFrequency): + with pytest.raises(period.IncompatibleFrequency): p - o for freq in ['M', '2M', '3M']: @@ -1299,7 +1301,7 @@ def test_sub_offset(self): for o in [offsets.YearBegin(2), offsets.MonthBegin(1), offsets.Minute(), np.timedelta64(365, 'D'), timedelta(365)]: - with tm.assertRaises(period.IncompatibleFrequency): + with pytest.raises(period.IncompatibleFrequency): p - o # freq is Tick @@ -1321,7 +1323,7 @@ def test_sub_offset(self): for o in [offsets.YearBegin(2), offsets.MonthBegin(1), offsets.Minute(), np.timedelta64(4, 'h'), timedelta(hours=23)]: - with tm.assertRaises(period.IncompatibleFrequency): + with pytest.raises(period.IncompatibleFrequency): p - o for freq in ['H', '2H', '3H']: @@ -1342,7 +1344,7 @@ def test_sub_offset(self): for o in [offsets.YearBegin(2), offsets.MonthBegin(1), offsets.Minute(), np.timedelta64(3200, 's'), timedelta(hours=23, minutes=30)]: - with tm.assertRaises(period.IncompatibleFrequency): + with pytest.raises(period.IncompatibleFrequency): p - o def test_sub_offset_nat(self): diff --git a/pandas/tests/scalar/test_timedelta.py b/pandas/tests/scalar/test_timedelta.py index 5b81237c7c000..4cced3e480c78 100644 --- a/pandas/tests/scalar/test_timedelta.py +++ b/pandas/tests/scalar/test_timedelta.py @@ -114,15 +114,15 @@ def test_construction(self): # currently invalid as it has a - on the hhmmdd part (only allowed on # the days) - self.assertRaises(ValueError, - lambda: Timedelta('-10 days -1 h 1.5m 1s 3us')) + pytest.raises(ValueError, + lambda: Timedelta('-10 days -1 h 1.5m 1s 3us')) # only leading neg signs are allowed - self.assertRaises(ValueError, - lambda: Timedelta('10 days -1 h 1.5m 1s 3us')) + pytest.raises(ValueError, + lambda: Timedelta('10 days -1 h 1.5m 1s 3us')) # no units specified - self.assertRaises(ValueError, lambda: Timedelta('3.1415')) + pytest.raises(ValueError, lambda: Timedelta('3.1415')) # invalid construction tm.assertRaisesRegexp(ValueError, "cannot construct a Timedelta", @@ -169,12 +169,12 @@ def test_construction(self): self.assertEqual(to_timedelta(pd.offsets.Hour(2)), Timedelta(u'0 days, 02:00:00')) - self.assertRaises(ValueError, lambda: Timedelta(u'foo bar')) + pytest.raises(ValueError, lambda: Timedelta(u'foo bar')) def test_overflow_on_construction(self): # xref https://github.com/statsmodels/statsmodels/issues/3374 value = pd.Timedelta('1day').value * 20169940 - self.assertRaises(OverflowError, pd.Timedelta, value) + pytest.raises(OverflowError, pd.Timedelta, value) def test_total_seconds_scalar(self): # GH 10939 @@ -248,9 +248,9 @@ def check(value): self.assertEqual(rng.microseconds, 0) self.assertEqual(rng.nanoseconds, 0) - self.assertRaises(AttributeError, lambda: rng.hours) - self.assertRaises(AttributeError, lambda: rng.minutes) - self.assertRaises(AttributeError, lambda: rng.milliseconds) + pytest.raises(AttributeError, lambda: rng.hours) + pytest.raises(AttributeError, lambda: rng.minutes) + pytest.raises(AttributeError, lambda: rng.milliseconds) # GH 10050 check(rng.days) @@ -270,9 +270,9 @@ def check(value): self.assertEqual(rng.seconds, 10 * 3600 + 11 * 60 + 12) self.assertEqual(rng.microseconds, 100 * 1000 + 123) self.assertEqual(rng.nanoseconds, 456) - self.assertRaises(AttributeError, lambda: rng.hours) - self.assertRaises(AttributeError, lambda: rng.minutes) - self.assertRaises(AttributeError, lambda: rng.milliseconds) + pytest.raises(AttributeError, lambda: rng.hours) + pytest.raises(AttributeError, lambda: rng.minutes) + pytest.raises(AttributeError, lambda: rng.milliseconds) # components tup = pd.to_timedelta(-1, 'us').components @@ -394,7 +394,7 @@ def test_round(self): # invalid for freq in ['Y', 'M', 'foobar']: - self.assertRaises(ValueError, lambda: t1.round(freq)) + pytest.raises(ValueError, lambda: t1.round(freq)) t1 = timedelta_range('1 days', periods=3, freq='1 min 2 s 3 us') t2 = -1 * t1 @@ -443,7 +443,7 @@ def test_round(self): # invalid for freq in ['Y', 'M', 'foobar']: - self.assertRaises(ValueError, lambda: t1.round(freq)) + pytest.raises(ValueError, lambda: t1.round(freq)) def test_contains(self): # Checking for any NaT-like objects @@ -505,8 +505,8 @@ def conv(v): self.assertEqual(ct(' - 10000D '), -conv(np.timedelta64(10000, 'D'))) # invalid - self.assertRaises(ValueError, ct, '1foo') - self.assertRaises(ValueError, ct, 'foo') + pytest.raises(ValueError, ct, '1foo') + pytest.raises(ValueError, ct, 'foo') def test_full_format_converters(self): def conv(v): @@ -534,7 +534,7 @@ def conv(v): d1 + np.timedelta64(1000 * (6 * 3600 + 1) + 10, 'ms'))) # invalid - self.assertRaises(ValueError, ct, '- 1days, 00') + pytest.raises(ValueError, ct, '- 1days, 00') def test_overflow(self): # GH 9442 @@ -552,9 +552,9 @@ def test_overflow(self): 1000)) # sum - self.assertRaises(ValueError, lambda: (s - s.min()).sum()) + pytest.raises(ValueError, lambda: (s - s.min()).sum()) s1 = s[0:10000] - self.assertRaises(ValueError, lambda: (s1 - s1.min()).sum()) + pytest.raises(ValueError, lambda: (s1 - s1.min()).sum()) s2 = s[0:1000] result = (s2 - s2.min()).sum() @@ -700,10 +700,10 @@ def test_ops_error_str(self): for l, r in [(td, 'a'), ('a', td)]: - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): l + r - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): l > r self.assertFalse(l == r) diff --git a/pandas/tests/scalar/test_timestamp.py b/pandas/tests/scalar/test_timestamp.py index d891032fb1f8e..ae351b9d1cab9 100644 --- a/pandas/tests/scalar/test_timestamp.py +++ b/pandas/tests/scalar/test_timestamp.py @@ -1,6 +1,7 @@ """ test the scalar Timestamp """ import sys +import pytest import operator import calendar import numpy as np @@ -178,15 +179,15 @@ def test_constructor_invalid(self): def test_constructor_positional(self): # GH 10758 - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): Timestamp(2000, 1) - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): Timestamp(2000, 0, 1) - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): Timestamp(2000, 13, 1) - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): Timestamp(2000, 1, 0) - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): Timestamp(2000, 1, 32) # GH 11630 @@ -200,15 +201,15 @@ def test_constructor_positional(self): def test_constructor_keyword(self): # GH 10758 - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): Timestamp(year=2000, month=1) - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): Timestamp(year=2000, month=0, day=1) - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): Timestamp(year=2000, month=13, day=1) - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): Timestamp(year=2000, month=1, day=0) - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): Timestamp(year=2000, month=1, day=32) self.assertEqual( @@ -351,7 +352,7 @@ def test_bounds_with_different_units(self): for date_string in out_of_bounds_dates: for unit in time_units: - self.assertRaises(ValueError, Timestamp, np.datetime64( + pytest.raises(ValueError, Timestamp, np.datetime64( date_string, dtype='M8[%s]' % unit)) in_bounds_dates = ('1677-09-23', '2262-04-11', ) @@ -388,8 +389,8 @@ def test_tz_localize_ambiguous(self): rng = date_range('2014-11-02', periods=3, freq='H', tz='US/Eastern') self.assertEqual(rng[1], ts_dst) self.assertEqual(rng[2], ts_no_dst) - self.assertRaises(ValueError, ts.tz_localize, 'US/Eastern', - ambiguous='infer') + pytest.raises(ValueError, ts.tz_localize, 'US/Eastern', + ambiguous='infer') # GH 8025 with tm.assertRaisesRegexp(TypeError, @@ -411,10 +412,10 @@ def test_tz_localize_nonexistent(self): 'Europe/Paris', 'Europe/Belgrade'] for t, tz in zip(times, timezones): ts = Timestamp(t) - self.assertRaises(NonExistentTimeError, ts.tz_localize, - tz) - self.assertRaises(NonExistentTimeError, ts.tz_localize, - tz, errors='raise') + pytest.raises(NonExistentTimeError, ts.tz_localize, + tz) + pytest.raises(NonExistentTimeError, ts.tz_localize, + tz, errors='raise') self.assertIs(ts.tz_localize(tz, errors='coerce'), NaT) @@ -422,8 +423,8 @@ def test_tz_localize_errors_ambiguous(self): # See issue 13057 from pytz.exceptions import AmbiguousTimeError ts = Timestamp('2015-11-1 01:00') - self.assertRaises(AmbiguousTimeError, - ts.tz_localize, 'US/Pacific', errors='coerce') + pytest.raises(AmbiguousTimeError, + ts.tz_localize, 'US/Pacific', errors='coerce') def test_tz_localize_roundtrip(self): for tz in ['UTC', 'Asia/Tokyo', 'US/Eastern', 'dateutil/US/Pacific']: @@ -433,7 +434,7 @@ def test_tz_localize_roundtrip(self): localized = ts.tz_localize(tz) self.assertEqual(localized, Timestamp(t, tz=tz)) - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): localized.tz_localize(tz) reset = localized.tz_localize(None) @@ -466,10 +467,10 @@ def test_barely_oob_dts(self): Timestamp(max_ts_us) # One us less than the minimum is an error - self.assertRaises(ValueError, Timestamp, min_ts_us - one_us) + pytest.raises(ValueError, Timestamp, min_ts_us - one_us) # One us more than the maximum is an error - self.assertRaises(ValueError, Timestamp, max_ts_us + one_us) + pytest.raises(ValueError, Timestamp, max_ts_us + one_us) def test_utc_z_designator(self): self.assertEqual(get_timezone( @@ -534,7 +535,7 @@ def check(value, equal): check(ts.hour, 9) check(ts.minute, 6) check(ts.second, 3) - self.assertRaises(AttributeError, lambda: ts.millisecond) + pytest.raises(AttributeError, lambda: ts.millisecond) check(ts.microsecond, 100) check(ts.nanosecond, 1) check(ts.dayofweek, 6) @@ -552,7 +553,7 @@ def check(value, equal): check(ts.hour, 23) check(ts.minute, 59) check(ts.second, 0) - self.assertRaises(AttributeError, lambda: ts.millisecond) + pytest.raises(AttributeError, lambda: ts.millisecond) check(ts.microsecond, 0) check(ts.nanosecond, 0) check(ts.dayofweek, 2) @@ -675,7 +676,7 @@ def test_round(self): # invalid for freq in ['Y', 'M', 'foobar']: - self.assertRaises(ValueError, lambda: dti.round(freq)) + pytest.raises(ValueError, lambda: dti.round(freq)) # GH 14440 & 15578 result = Timestamp('2016-10-17 12:00:00.0015').round('ms') @@ -920,18 +921,18 @@ def test_cant_compare_tz_naive_w_aware(self): a = Timestamp('3/12/2012') b = Timestamp('3/12/2012', tz='utc') - self.assertRaises(Exception, a.__eq__, b) - self.assertRaises(Exception, a.__ne__, b) - self.assertRaises(Exception, a.__lt__, b) - self.assertRaises(Exception, a.__gt__, b) - self.assertRaises(Exception, b.__eq__, a) - self.assertRaises(Exception, b.__ne__, a) - self.assertRaises(Exception, b.__lt__, a) - self.assertRaises(Exception, b.__gt__, a) + pytest.raises(Exception, a.__eq__, b) + pytest.raises(Exception, a.__ne__, b) + pytest.raises(Exception, a.__lt__, b) + pytest.raises(Exception, a.__gt__, b) + pytest.raises(Exception, b.__eq__, a) + pytest.raises(Exception, b.__ne__, a) + pytest.raises(Exception, b.__lt__, a) + pytest.raises(Exception, b.__gt__, a) if sys.version_info < (3, 3): - self.assertRaises(Exception, a.__eq__, b.to_pydatetime()) - self.assertRaises(Exception, a.to_pydatetime().__eq__, b) + pytest.raises(Exception, a.__eq__, b.to_pydatetime()) + pytest.raises(Exception, a.to_pydatetime().__eq__, b) else: self.assertFalse(a == b.to_pydatetime()) self.assertFalse(a.to_pydatetime() == b) @@ -943,18 +944,18 @@ def test_cant_compare_tz_naive_w_aware_explicit_pytz(self): a = Timestamp('3/12/2012') b = Timestamp('3/12/2012', tz=utc) - self.assertRaises(Exception, a.__eq__, b) - self.assertRaises(Exception, a.__ne__, b) - self.assertRaises(Exception, a.__lt__, b) - self.assertRaises(Exception, a.__gt__, b) - self.assertRaises(Exception, b.__eq__, a) - self.assertRaises(Exception, b.__ne__, a) - self.assertRaises(Exception, b.__lt__, a) - self.assertRaises(Exception, b.__gt__, a) + pytest.raises(Exception, a.__eq__, b) + pytest.raises(Exception, a.__ne__, b) + pytest.raises(Exception, a.__lt__, b) + pytest.raises(Exception, a.__gt__, b) + pytest.raises(Exception, b.__eq__, a) + pytest.raises(Exception, b.__ne__, a) + pytest.raises(Exception, b.__lt__, a) + pytest.raises(Exception, b.__gt__, a) if sys.version_info < (3, 3): - self.assertRaises(Exception, a.__eq__, b.to_pydatetime()) - self.assertRaises(Exception, a.to_pydatetime().__eq__, b) + pytest.raises(Exception, a.__eq__, b.to_pydatetime()) + pytest.raises(Exception, a.to_pydatetime().__eq__, b) else: self.assertFalse(a == b.to_pydatetime()) self.assertFalse(a.to_pydatetime() == b) @@ -967,18 +968,18 @@ def test_cant_compare_tz_naive_w_aware_dateutil(self): a = Timestamp('3/12/2012') b = Timestamp('3/12/2012', tz=utc) - self.assertRaises(Exception, a.__eq__, b) - self.assertRaises(Exception, a.__ne__, b) - self.assertRaises(Exception, a.__lt__, b) - self.assertRaises(Exception, a.__gt__, b) - self.assertRaises(Exception, b.__eq__, a) - self.assertRaises(Exception, b.__ne__, a) - self.assertRaises(Exception, b.__lt__, a) - self.assertRaises(Exception, b.__gt__, a) + pytest.raises(Exception, a.__eq__, b) + pytest.raises(Exception, a.__ne__, b) + pytest.raises(Exception, a.__lt__, b) + pytest.raises(Exception, a.__gt__, b) + pytest.raises(Exception, b.__eq__, a) + pytest.raises(Exception, b.__ne__, a) + pytest.raises(Exception, b.__lt__, a) + pytest.raises(Exception, b.__gt__, a) if sys.version_info < (3, 3): - self.assertRaises(Exception, a.__eq__, b.to_pydatetime()) - self.assertRaises(Exception, a.to_pydatetime().__eq__, b) + pytest.raises(Exception, a.__eq__, b.to_pydatetime()) + pytest.raises(Exception, a.to_pydatetime().__eq__, b) else: self.assertFalse(a == b.to_pydatetime()) self.assertFalse(a.to_pydatetime() == b) @@ -1007,11 +1008,11 @@ def test_frequency_misc(self): expected = offsets.Minute(5) self.assertEqual(result, expected) - self.assertRaises(ValueError, frequencies.get_freq_code, (5, 'baz')) + pytest.raises(ValueError, frequencies.get_freq_code, (5, 'baz')) - self.assertRaises(ValueError, frequencies.to_offset, '100foo') + pytest.raises(ValueError, frequencies.to_offset, '100foo') - self.assertRaises(ValueError, frequencies.to_offset, ('', '')) + pytest.raises(ValueError, frequencies.to_offset, ('', '')) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = frequencies.get_standard_freq(offsets.Hour()) @@ -1349,12 +1350,12 @@ def test_timestamp_fields(self): self.assertEqual(idx.freqstr, Timestamp(idx[-1], idx.freq).freqstr) def test_timestamp_date_out_of_range(self): - self.assertRaises(ValueError, Timestamp, '1676-01-01') - self.assertRaises(ValueError, Timestamp, '2263-01-01') + pytest.raises(ValueError, Timestamp, '1676-01-01') + pytest.raises(ValueError, Timestamp, '2263-01-01') # 1475 - self.assertRaises(ValueError, DatetimeIndex, ['1400-01-01']) - self.assertRaises(ValueError, DatetimeIndex, [datetime(1400, 1, 1)]) + pytest.raises(ValueError, DatetimeIndex, ['1400-01-01']) + pytest.raises(ValueError, DatetimeIndex, [datetime(1400, 1, 1)]) def test_timestamp_repr(self): # pre-1900 diff --git a/pandas/tests/series/test_alter_axes.py b/pandas/tests/series/test_alter_axes.py index 5cac86560dba2..a7b8d285bbe54 100644 --- a/pandas/tests/series/test_alter_axes.py +++ b/pandas/tests/series/test_alter_axes.py @@ -1,6 +1,8 @@ # coding=utf-8 # pylint: disable-msg=E1101,W0612 +import pytest + from datetime import datetime import numpy as np @@ -21,12 +23,12 @@ class TestSeriesAlterAxes(TestData, tm.TestCase): def test_setindex(self): # wrong type series = self.series.copy() - self.assertRaises(TypeError, setattr, series, 'index', None) + pytest.raises(TypeError, setattr, series, 'index', None) # wrong length series = self.series.copy() - self.assertRaises(Exception, setattr, series, 'index', - np.arange(len(series) - 1)) + pytest.raises(Exception, setattr, series, 'index', + np.arange(len(series) - 1)) # works series = self.series.copy() diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index 666fca2d791fe..eaa88da94ccd2 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -500,7 +500,7 @@ def testit(): # idxmax, idxmin, min, and max are valid for dates if name not in ['max', 'min']: ds = Series(date_range('1/1/2001', periods=10)) - self.assertRaises(TypeError, f, ds) + pytest.raises(TypeError, f, ds) # skipna or no self.assertTrue(notnull(f(self.series))) @@ -543,10 +543,10 @@ def testit(): # check on string data if name not in ['sum', 'min', 'max']: - self.assertRaises(TypeError, f, Series(list('abc'))) + pytest.raises(TypeError, f, Series(list('abc'))) # Invalid axis. - self.assertRaises(ValueError, f, self.series, axis=1) + pytest.raises(ValueError, f, self.series, axis=1) # Unimplemented numeric_only parameter. if 'numeric_only' in compat.signature(f).args: @@ -669,12 +669,12 @@ def test_all_any_params(self): assert_series_equal(s.any(level=0), Series([False, True, True])) # bool_only is not implemented with level option. - self.assertRaises(NotImplementedError, s.any, bool_only=True, level=0) - self.assertRaises(NotImplementedError, s.all, bool_only=True, level=0) + pytest.raises(NotImplementedError, s.any, bool_only=True, level=0) + pytest.raises(NotImplementedError, s.all, bool_only=True, level=0) # bool_only is not implemented alone. - self.assertRaises(NotImplementedError, s.any, bool_only=True) - self.assertRaises(NotImplementedError, s.all, bool_only=True) + pytest.raises(NotImplementedError, s.any, bool_only=True) + pytest.raises(NotImplementedError, s.all, bool_only=True) def test_modulo(self): with np.errstate(all='ignore'): @@ -866,8 +866,8 @@ def test_dot(self): assert_almost_equal(a.dot(b['1']), expected['1']) assert_almost_equal(a.dot(b2['1']), expected['1']) - self.assertRaises(Exception, a.dot, a.values[:3]) - self.assertRaises(ValueError, a.dot, b.T) + pytest.raises(Exception, a.dot, a.values[:3]) + pytest.raises(ValueError, a.dot, b.T) def test_value_counts_nunique(self): @@ -1061,10 +1061,10 @@ def test_isin(self): def test_isin_with_string_scalar(self): # GH4763 s = Series(['A', 'B', 'C', 'a', 'B', 'B', 'A', 'C']) - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): s.isin('a') - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): s = Series(['aaa', 'b', 'c']) s.isin('aaa') @@ -1263,14 +1263,14 @@ def test_ptp(self): expected = pd.Series([np.nan, np.nan], index=['a', 'b']) tm.assert_series_equal(s.ptp(level=0, skipna=False), expected) - with self.assertRaises(ValueError): + with pytest.raises(ValueError): s.ptp(axis=1) s = pd.Series(['a', 'b', 'c', 'd', 'e']) - with self.assertRaises(TypeError): + with pytest.raises(TypeError): s.ptp() - with self.assertRaises(NotImplementedError): + with pytest.raises(NotImplementedError): s.ptp(numeric_only=True) def test_empty_timeseries_redections_return_nat(self): diff --git a/pandas/tests/series/test_api.py b/pandas/tests/series/test_api.py index bc867cc8e0436..68d8e382ef046 100644 --- a/pandas/tests/series/test_api.py +++ b/pandas/tests/series/test_api.py @@ -1,6 +1,8 @@ # coding=utf-8 # pylint: disable-msg=E1101,W0612 +import pytest + import numpy as np import pandas as pd @@ -148,8 +150,8 @@ def test_tab_completion(self): def test_not_hashable(self): s_empty = Series() s = Series([1]) - self.assertRaises(TypeError, hash, s_empty) - self.assertRaises(TypeError, hash, s) + pytest.raises(TypeError, hash, s_empty) + pytest.raises(TypeError, hash, s) def test_contains(self): tm.assert_contains_all(self.ts.index, self.ts) @@ -218,7 +220,7 @@ def test_iteritems(self): def test_raise_on_info(self): s = Series(np.random.randn(10)) - with tm.assertRaises(AttributeError): + with pytest.raises(AttributeError): s.info() def test_copy(self): diff --git a/pandas/tests/series/test_apply.py b/pandas/tests/series/test_apply.py index ac165bd31ef23..afe46e5dcf480 100644 --- a/pandas/tests/series/test_apply.py +++ b/pandas/tests/series/test_apply.py @@ -1,7 +1,10 @@ # coding=utf-8 # pylint: disable-msg=E1101,W0612 +import pytest + from collections import Counter, defaultdict, OrderedDict + import numpy as np import pandas as pd @@ -204,22 +207,22 @@ def test_transform_and_agg_error(self): # we are trying to transform with an aggregator def f(): self.series.transform(['min', 'max']) - self.assertRaises(ValueError, f) + pytest.raises(ValueError, f) def f(): with np.errstate(all='ignore'): self.series.agg(['sqrt', 'max']) - self.assertRaises(ValueError, f) + pytest.raises(ValueError, f) def f(): with np.errstate(all='ignore'): self.series.transform(['sqrt', 'max']) - self.assertRaises(ValueError, f) + pytest.raises(ValueError, f) def f(): with np.errstate(all='ignore'): self.series.agg({'foo': np.sqrt, 'bar': 'sum'}) - self.assertRaises(ValueError, f) + pytest.raises(ValueError, f) def test_demo(self): # demonstration tests @@ -505,7 +508,7 @@ def test_map_categorical(self): tm.assert_series_equal(result, exp) self.assertEqual(result.dtype, np.object) - with tm.assertRaises(NotImplementedError): + with pytest.raises(NotImplementedError): s.map(lambda x: x, na_action='ignore') def test_map_datetimetz(self): @@ -526,7 +529,7 @@ def test_map_datetimetz(self): exp = pd.Series(list(range(24)) + [0], name='XX', dtype=np.int64) tm.assert_series_equal(result, exp) - with tm.assertRaises(NotImplementedError): + with pytest.raises(NotImplementedError): s.map(lambda x: x, na_action='ignore') # not vectorized diff --git a/pandas/tests/series/test_asof.py b/pandas/tests/series/test_asof.py index 82914a99e2f6c..9c1e4626e1736 100644 --- a/pandas/tests/series/test_asof.py +++ b/pandas/tests/series/test_asof.py @@ -1,5 +1,7 @@ # coding=utf-8 +import pytest + import numpy as np from pandas import (offsets, Series, notnull, isnull, date_range, Timestamp) @@ -139,14 +141,14 @@ def test_errors(self): # non-monotonic self.assertFalse(s.index.is_monotonic) - with self.assertRaises(ValueError): + with pytest.raises(ValueError): s.asof(s.index[0]) # subset with Series N = 10 rng = date_range('1/1/1990', periods=N, freq='53s') s = Series(np.random.randn(N), index=rng) - with self.assertRaises(ValueError): + with pytest.raises(ValueError): s.asof(s.index[0], subset='foo') def test_all_nans(self): diff --git a/pandas/tests/series/test_combine_concat.py b/pandas/tests/series/test_combine_concat.py index 57a5fdfa7731d..cc29c45d74c9d 100644 --- a/pandas/tests/series/test_combine_concat.py +++ b/pandas/tests/series/test_combine_concat.py @@ -1,6 +1,8 @@ # coding=utf-8 # pylint: disable-msg=E1101,W0612 +import pytest + from datetime import datetime from numpy import nan @@ -28,8 +30,8 @@ def test_append(self): else: self.fail("orphaned index!") - self.assertRaises(ValueError, self.ts.append, self.ts, - verify_integrity=True) + pytest.raises(ValueError, self.ts.append, self.ts, + verify_integrity=True) def test_append_many(self): pieces = [self.ts[:5], self.ts[5:10], self.ts[10:]] diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 151ad1863851b..e02698f96ca49 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -1,6 +1,8 @@ # coding=utf-8 # pylint: disable-msg=E1101,W0612 +import pytest + from datetime import datetime, timedelta from numpy import nan @@ -57,8 +59,8 @@ def test_constructor(self): self.assertFalse(self.empty.index.is_all_dates) self.assertFalse(Series({}).index.is_all_dates) - self.assertRaises(Exception, Series, np.random.randn(3, 3), - index=np.arange(3)) + pytest.raises(Exception, Series, np.random.randn(3, 3), + index=np.arange(3)) mixed.name = 'Series' rs = Series(mixed).name @@ -67,7 +69,7 @@ def test_constructor(self): # raise on MultiIndex GH4187 m = MultiIndex.from_arrays([[1, 2], [3, 4]]) - self.assertRaises(NotImplementedError, Series, m) + pytest.raises(NotImplementedError, Series, m) def test_constructor_empty(self): empty = Series() @@ -138,7 +140,7 @@ def test_constructor_categorical(self): tm.assert_categorical_equal(res.values, cat) # GH12574 - self.assertRaises( + pytest.raises( ValueError, lambda: Series(pd.Categorical([1, 2, 3]), dtype='int64')) cat = Series(pd.Categorical([1, 2, 3]), dtype='category') @@ -291,7 +293,7 @@ def test_constructor_pass_nan_nat(self): tm.assert_series_equal(Series(np.array([np.nan, pd.NaT])), exp) def test_constructor_cast(self): - self.assertRaises(ValueError, Series, ['a', 'b', 'c'], dtype=float) + pytest.raises(ValueError, Series, ['a', 'b', 'c'], dtype=float) def test_constructor_dtype_nocast(self): # 1572 @@ -372,13 +374,13 @@ def test_constructor_dtype_datetime64(self): # invalid astypes for t in ['s', 'D', 'us', 'ms']: - self.assertRaises(TypeError, s.astype, 'M8[%s]' % t) + pytest.raises(TypeError, s.astype, 'M8[%s]' % t) # GH3414 related - self.assertRaises(TypeError, lambda x: Series( + pytest.raises(TypeError, lambda x: Series( Series(dates).astype('int') / 1000000, dtype='M8[ms]')) - self.assertRaises(TypeError, - lambda x: Series(dates, dtype='datetime64')) + pytest.raises(TypeError, + lambda x: Series(dates, dtype='datetime64')) # invalid dates can be help as object result = Series([datetime(2, 1, 1)]) @@ -692,9 +694,9 @@ def test_constructor_tuple_of_tuples(self): def test_constructor_set(self): values = set([1, 2, 3, 4, 5]) - self.assertRaises(TypeError, Series, values) + pytest.raises(TypeError, Series, values) values = frozenset(values) - self.assertRaises(TypeError, Series, values) + pytest.raises(TypeError, Series, values) def test_fromDict(self): data = {'a': 0, 'b': 1, 'c': 2, 'd': 3} @@ -781,19 +783,19 @@ def test_constructor_dtype_timedelta64(self): # these are frequency conversion astypes # for t in ['s', 'D', 'us', 'ms']: - # self.assertRaises(TypeError, td.astype, 'm8[%s]' % t) + # pytest.raises(TypeError, td.astype, 'm8[%s]' % t) # valid astype td.astype('int64') # invalid casting - self.assertRaises(TypeError, td.astype, 'int32') + pytest.raises(TypeError, td.astype, 'int32') # this is an invalid casting def f(): Series([timedelta(days=1), 'foo'], dtype='m8[ns]') - self.assertRaises(Exception, f) + pytest.raises(Exception, f) # leave as object here td = Series([timedelta(days=i) for i in range(3)] + ['foo']) @@ -833,7 +835,7 @@ def test_constructor_name_hashable(self): def test_constructor_name_unhashable(self): for n in [['name_list'], np.ones(2), {1: 2}]: for data in [['name_list'], np.ones(2), {1: 2}]: - self.assertRaises(TypeError, Series, data, name=n) + pytest.raises(TypeError, Series, data, name=n) def test_auto_conversion(self): series = Series(list(date_range('1/1/2000', periods=10))) diff --git a/pandas/tests/series/test_datetime_values.py b/pandas/tests/series/test_datetime_values.py index ce82507ce8330..ac3e3a2abbd69 100644 --- a/pandas/tests/series/test_datetime_values.py +++ b/pandas/tests/series/test_datetime_values.py @@ -1,6 +1,8 @@ # coding=utf-8 # pylint: disable-msg=E1101,W0612 +import pytest + from datetime import datetime, date import numpy as np @@ -258,7 +260,7 @@ def get_dir(s): def f(): s.dt.hour[0] = 5 - self.assertRaises(com.SettingWithCopyError, f) + pytest.raises(com.SettingWithCopyError, f) def test_dt_accessor_no_new_attributes(self): # https://github.com/pandas-dev/pandas/issues/10673 diff --git a/pandas/tests/series/test_indexing.py b/pandas/tests/series/test_indexing.py index f1b9aaf1b9e46..1003e84a1a942 100644 --- a/pandas/tests/series/test_indexing.py +++ b/pandas/tests/series/test_indexing.py @@ -1,6 +1,8 @@ # coding=utf-8 # pylint: disable-msg=E1101,W0612 +import pytest + from datetime import datetime, timedelta from numpy import nan @@ -88,7 +90,7 @@ def test_delitem(self): def f(): del s[0] - self.assertRaises(KeyError, f) + pytest.raises(KeyError, f) # only 1 left, del, add, del s = Series(1) @@ -126,8 +128,8 @@ def test_getitem_setitem_ellipsis(self): def test_getitem_negative_out_of_bounds(self): s = Series(tm.rands_array(5, 10), index=tm.rands_array(10, 10)) - self.assertRaises(IndexError, s.__getitem__, -11) - self.assertRaises(IndexError, s.__setitem__, -11, 'foo') + pytest.raises(IndexError, s.__getitem__, -11) + pytest.raises(IndexError, s.__setitem__, -11, 'foo') def test_pop(self): # GH 6600 @@ -156,7 +158,7 @@ def test_getitem_get(self): # missing d = self.ts.index[0] - BDay() - self.assertRaises(KeyError, self.ts.__getitem__, d) + pytest.raises(KeyError, self.ts.__getitem__, d) # None # GH 5652 @@ -260,12 +262,12 @@ def test_getitem_boolean_empty(self): def f(): s[Series([], dtype=bool)] - self.assertRaises(IndexingError, f) + pytest.raises(IndexingError, f) def f(): s[Series([True], dtype=bool)] - self.assertRaises(IndexingError, f) + pytest.raises(IndexingError, f) def test_getitem_generator(self): gen = (x > 0 for x in self.series) @@ -306,8 +308,8 @@ def test_getitem_boolean_object(self): # nans raise exception omask[5:10] = np.nan - self.assertRaises(Exception, s.__getitem__, omask) - self.assertRaises(Exception, s.__setitem__, omask, 5) + pytest.raises(Exception, s.__getitem__, omask) + pytest.raises(Exception, s.__setitem__, omask, 5) def test_getitem_setitem_boolean_corner(self): ts = self.ts @@ -315,13 +317,13 @@ def test_getitem_setitem_boolean_corner(self): # these used to raise...?? - self.assertRaises(Exception, ts.__getitem__, mask_shifted) - self.assertRaises(Exception, ts.__setitem__, mask_shifted, 1) + pytest.raises(Exception, ts.__getitem__, mask_shifted) + pytest.raises(Exception, ts.__setitem__, mask_shifted, 1) # ts[mask_shifted] # ts[mask_shifted] = 1 - self.assertRaises(Exception, ts.loc.__getitem__, mask_shifted) - self.assertRaises(Exception, ts.loc.__setitem__, mask_shifted, 1) + pytest.raises(Exception, ts.loc.__getitem__, mask_shifted) + pytest.raises(Exception, ts.loc.__setitem__, mask_shifted, 1) # ts.loc[mask_shifted] # ts.loc[mask_shifted] = 2 @@ -545,11 +547,11 @@ def test_getitem_median_slice_bug(self): def test_getitem_out_of_bounds(self): # don't segfault, GH #495 - self.assertRaises(IndexError, self.ts.__getitem__, len(self.ts)) + pytest.raises(IndexError, self.ts.__getitem__, len(self.ts)) # GH #917 s = Series([]) - self.assertRaises(IndexError, s.__getitem__, -1) + pytest.raises(IndexError, s.__getitem__, -1) def test_getitem_setitem_integers(self): # caused bug without test @@ -565,8 +567,8 @@ def test_getitem_box_float64(self): def test_getitem_ambiguous_keyerror(self): s = Series(lrange(10), index=lrange(0, 20, 2)) - self.assertRaises(KeyError, s.__getitem__, 1) - self.assertRaises(KeyError, s.loc.__getitem__, 1) + pytest.raises(KeyError, s.__getitem__, 1) + pytest.raises(KeyError, s.loc.__getitem__, 1) def test_getitem_unordered_dup(self): obj = Series(lrange(5), index=['c', 'a', 'a', 'b', 'b']) @@ -592,7 +594,7 @@ def test_getitem_dataframe(self): rng = list(range(10)) s = pd.Series(10, index=rng) df = pd.DataFrame(rng, index=rng) - self.assertRaises(TypeError, s.__getitem__, df > 5) + pytest.raises(TypeError, s.__getitem__, df > 5) def test_getitem_callable(self): # GH 12533 @@ -674,15 +676,15 @@ def test_slice_can_reorder_not_uniquely_indexed(self): def test_slice_float_get_set(self): - self.assertRaises(TypeError, lambda: self.ts[4.0:10.0]) + pytest.raises(TypeError, lambda: self.ts[4.0:10.0]) def f(): self.ts[4.0:10.0] = 0 - self.assertRaises(TypeError, f) + pytest.raises(TypeError, f) - self.assertRaises(TypeError, self.ts.__getitem__, slice(4.5, 10.0)) - self.assertRaises(TypeError, self.ts.__setitem__, slice(4.5, 10.0), 0) + pytest.raises(TypeError, self.ts.__getitem__, slice(4.5, 10.0)) + pytest.raises(TypeError, self.ts.__setitem__, slice(4.5, 10.0), 0) def test_slice_floats2(self): s = Series(np.random.rand(10), index=np.arange(10, 20, dtype=float)) @@ -819,10 +821,10 @@ def test_basic_getitem_setitem_corner(self): assert_series_equal(result, expected) # OK - self.assertRaises(Exception, self.ts.__getitem__, - [5, slice(None, None)]) - self.assertRaises(Exception, self.ts.__setitem__, - [5, slice(None, None)], 2) + pytest.raises(Exception, self.ts.__getitem__, + [5, slice(None, None)]) + pytest.raises(Exception, self.ts.__setitem__, + [5, slice(None, None)], 2) def test_basic_getitem_with_labels(self): indices = self.ts.index[[5, 10, 15]] @@ -893,8 +895,8 @@ def test_basic_setitem_with_labels(self): inds_notfound = [0, 4, 5, 6] arr_inds_notfound = np.array([0, 4, 5, 6]) - self.assertRaises(Exception, s.__setitem__, inds_notfound, 0) - self.assertRaises(Exception, s.__setitem__, arr_inds_notfound, 0) + pytest.raises(Exception, s.__setitem__, inds_notfound, 0) + pytest.raises(Exception, s.__setitem__, arr_inds_notfound, 0) # GH12089 # with tz for values @@ -940,8 +942,8 @@ def test_loc_getitem_not_monotonic(self): ts2 = self.ts[::2][[1, 2, 0]] - self.assertRaises(KeyError, ts2.loc.__getitem__, slice(d1, d2)) - self.assertRaises(KeyError, ts2.loc.__setitem__, slice(d1, d2), 0) + pytest.raises(KeyError, ts2.loc.__getitem__, slice(d1, d2)) + pytest.raises(KeyError, ts2.loc.__setitem__, slice(d1, d2), 0) def test_loc_getitem_setitem_integer_slice_keyerrors(self): s = Series(np.random.randn(10), index=lrange(0, 20, 2)) @@ -965,8 +967,8 @@ def test_loc_getitem_setitem_integer_slice_keyerrors(self): # non-monotonic, raise KeyError s2 = s.iloc[lrange(5) + lrange(5, 10)[::-1]] - self.assertRaises(KeyError, s2.loc.__getitem__, slice(3, 11)) - self.assertRaises(KeyError, s2.loc.__setitem__, slice(3, 11), 0) + pytest.raises(KeyError, s2.loc.__getitem__, slice(3, 11)) + pytest.raises(KeyError, s2.loc.__setitem__, slice(3, 11), 0) def test_loc_getitem_iterator(self): idx = iter(self.series.index[:10]) @@ -1084,8 +1086,8 @@ def test_where(self): rs = s2.where(cond[:3], -s2) assert_series_equal(rs, expected) - self.assertRaises(ValueError, s.where, 1) - self.assertRaises(ValueError, s.where, cond[:3].values, -s) + pytest.raises(ValueError, s.where, 1) + pytest.raises(ValueError, s.where, cond[:3].values, -s) # GH 2745 s = Series([1, 2]) @@ -1094,10 +1096,10 @@ def test_where(self): assert_series_equal(s, expected) # failures - self.assertRaises(ValueError, s.__setitem__, tuple([[[True, False]]]), - [0, 2, 3]) - self.assertRaises(ValueError, s.__setitem__, tuple([[[True, False]]]), - []) + pytest.raises(ValueError, s.__setitem__, tuple([[[True, False]]]), + [0, 2, 3]) + pytest.raises(ValueError, s.__setitem__, tuple([[[True, False]]]), + []) # unsafe dtype changes for dtype in [np.int8, np.int16, np.int32, np.int64, np.float16, @@ -1133,7 +1135,7 @@ def test_where(self): s = Series(np.arange(10), dtype=dtype) mask = s < 5 values = [2.5, 3.5, 4.5, 5.5, 6.5] - self.assertRaises(Exception, s.__setitem__, tuple(mask), values) + pytest.raises(Exception, s.__setitem__, tuple(mask), values) # GH3235 s = Series(np.arange(10), dtype='int64') @@ -1155,12 +1157,12 @@ def test_where(self): def f(): s[mask] = [5, 4, 3, 2, 1] - self.assertRaises(ValueError, f) + pytest.raises(ValueError, f) def f(): s[mask] = [0] * 5 - self.assertRaises(ValueError, f) + pytest.raises(ValueError, f) # dtype changes s = Series([1, 2, 3, 4]) @@ -1246,7 +1248,7 @@ def test_where_setitem_invalid(self): def f(): s[0:3] = list(range(27)) - self.assertRaises(ValueError, f) + pytest.raises(ValueError, f) s[0:3] = list(range(3)) expected = Series([0, 1, 2]) @@ -1258,7 +1260,7 @@ def f(): def f(): s[0:4:2] = list(range(27)) - self.assertRaises(ValueError, f) + pytest.raises(ValueError, f) s = Series(list('abcdef')) s[0:4:2] = list(range(2)) @@ -1271,7 +1273,7 @@ def f(): def f(): s[:-1] = list(range(27)) - self.assertRaises(ValueError, f) + pytest.raises(ValueError, f) s[-3:-1] = list(range(2)) expected = Series(['a', 'b', 'c', 0, 1, 'f']) @@ -1283,14 +1285,14 @@ def f(): def f(): s[[0, 1, 2]] = list(range(27)) - self.assertRaises(ValueError, f) + pytest.raises(ValueError, f) s = Series(list('abc')) def f(): s[[0, 1, 2]] = list(range(2)) - self.assertRaises(ValueError, f) + pytest.raises(ValueError, f) # scalar s = Series(list('abc')) @@ -1440,8 +1442,8 @@ def test_mask(self): rs2 = s2.mask(cond[:3], -s2) assert_series_equal(rs, rs2) - self.assertRaises(ValueError, s.mask, 1) - self.assertRaises(ValueError, s.mask, cond[:3].values, -s) + pytest.raises(ValueError, s.mask, 1) + pytest.raises(ValueError, s.mask, cond[:3].values, -s) # dtype changes s = Series([1, 2, 3, 4]) @@ -1562,8 +1564,8 @@ def test_ix_setitem_boolean(self): def test_ix_setitem_corner(self): inds = list(self.series.index[[5, 8, 12]]) self.series.loc[inds] = 5 - self.assertRaises(Exception, self.series.loc.__setitem__, - inds + ['foo'], 5) + pytest.raises(Exception, self.series.loc.__setitem__, + inds + ['foo'], 5) def test_get_set_boolean_different_order(self): ordered = self.series.sort_values() @@ -1604,29 +1606,29 @@ def test_setitem_na(self): def test_basic_indexing(self): s = Series(np.random.randn(5), index=['a', 'b', 'a', 'a', 'b']) - self.assertRaises(IndexError, s.__getitem__, 5) - self.assertRaises(IndexError, s.__setitem__, 5, 0) + pytest.raises(IndexError, s.__getitem__, 5) + pytest.raises(IndexError, s.__setitem__, 5, 0) - self.assertRaises(KeyError, s.__getitem__, 'c') + pytest.raises(KeyError, s.__getitem__, 'c') s = s.sort_index() - self.assertRaises(IndexError, s.__getitem__, 5) - self.assertRaises(IndexError, s.__setitem__, 5, 0) + pytest.raises(IndexError, s.__getitem__, 5) + pytest.raises(IndexError, s.__setitem__, 5, 0) def test_int_indexing(self): s = Series(np.random.randn(6), index=[0, 0, 1, 1, 2, 2]) - self.assertRaises(KeyError, s.__getitem__, 5) + pytest.raises(KeyError, s.__getitem__, 5) - self.assertRaises(KeyError, s.__getitem__, 'c') + pytest.raises(KeyError, s.__getitem__, 'c') # not monotonic s = Series(np.random.randn(6), index=[2, 2, 0, 0, 1, 1]) - self.assertRaises(KeyError, s.__getitem__, 5) + pytest.raises(KeyError, s.__getitem__, 5) - self.assertRaises(KeyError, s.__getitem__, 'c') + pytest.raises(KeyError, s.__getitem__, 'c') def test_datetime_indexing(self): from pandas import date_range @@ -1637,7 +1639,7 @@ def test_datetime_indexing(self): s = Series(len(index), index=index) stamp = Timestamp('1/8/2000') - self.assertRaises(KeyError, s.__getitem__, stamp) + pytest.raises(KeyError, s.__getitem__, stamp) s[stamp] = 0 self.assertEqual(s[stamp], 0) @@ -1645,7 +1647,7 @@ def test_datetime_indexing(self): s = Series(len(index), index=index) s = s[::-1] - self.assertRaises(KeyError, s.__getitem__, stamp) + pytest.raises(KeyError, s.__getitem__, stamp) s[stamp] = 0 self.assertEqual(s[stamp], 0) @@ -1745,8 +1747,8 @@ def test_drop(self): # single string/tuple-like s = Series(range(3), index=list('abc')) - self.assertRaises(ValueError, s.drop, 'bc') - self.assertRaises(ValueError, s.drop, ('a', )) + pytest.raises(ValueError, s.drop, 'bc') + pytest.raises(ValueError, s.drop, ('a', )) # errors='ignore' s = Series(range(3), index=list('abc')) @@ -1757,7 +1759,7 @@ def test_drop(self): assert_series_equal(result, expected) # bad axis - self.assertRaises(ValueError, s.drop, 'one', axis='columns') + pytest.raises(ValueError, s.drop, 'one', axis='columns') # GH 8522 s = Series([2, 3], index=[True, False]) @@ -2000,7 +2002,7 @@ def test_reindex_corner(self): # bad fill method ts = self.ts[::2] - self.assertRaises(Exception, ts.reindex, self.ts.index, method='foo') + pytest.raises(Exception, ts.reindex, self.ts.index, method='foo') def test_reindex_pad(self): @@ -2236,7 +2238,7 @@ def test_setitem_scalar_into_readonly_backing_data(self): series = Series(array) for n in range(len(series)): - with self.assertRaises(ValueError): + with pytest.raises(ValueError): series[n] = 1 self.assertEqual( @@ -2253,7 +2255,7 @@ def test_setitem_slice_into_readonly_backing_data(self): array.flags.writeable = False # make the array immutable series = Series(array) - with self.assertRaises(ValueError): + with pytest.raises(ValueError): series[1:3] = 1 self.assertTrue( @@ -2340,7 +2342,7 @@ def test_duplicate_dates_indexing(self): expected = Series(np.where(mask, 0, ts), index=ts.index) assert_series_equal(cp, expected) - self.assertRaises(KeyError, ts.__getitem__, datetime(2000, 1, 6)) + pytest.raises(KeyError, ts.__getitem__, datetime(2000, 1, 6)) # new index ts[datetime(2000, 1, 6)] = 0 @@ -2501,8 +2503,8 @@ def test_indexing(self): expected = df.loc[[df.index[2]]] # this is a single date, so will raise - self.assertRaises(KeyError, df.__getitem__, '2012-01-02 18:01:02', ) - self.assertRaises(KeyError, df.__getitem__, df.index[2], ) + pytest.raises(KeyError, df.__getitem__, '2012-01-02 18:01:02', ) + pytest.raises(KeyError, df.__getitem__, df.index[2], ) class TestDatetimeIndexing(tm.TestCase): @@ -2526,7 +2528,7 @@ def test_fancy_getitem(self): self.assertEqual(s['2009-1-2'], 48) self.assertEqual(s[datetime(2009, 1, 2)], 48) self.assertEqual(s[lib.Timestamp(datetime(2009, 1, 2))], 48) - self.assertRaises(KeyError, s.__getitem__, '2009-1-3') + pytest.raises(KeyError, s.__getitem__, '2009-1-3') assert_series_equal(s['3/6/2009':'2009-06-05'], s[datetime(2009, 3, 6):datetime(2009, 6, 5)]) diff --git a/pandas/tests/series/test_internals.py b/pandas/tests/series/test_internals.py index 9ca7645e6f974..19170c82953ad 100644 --- a/pandas/tests/series/test_internals.py +++ b/pandas/tests/series/test_internals.py @@ -1,6 +1,8 @@ # coding=utf-8 # pylint: disable-msg=E1101,W0612 +import pytest + from datetime import datetime from numpy import nan @@ -294,7 +296,7 @@ def test_convert(self): def test_convert_no_arg_error(self): s = Series(['1.0', '2']) - self.assertRaises(ValueError, s._convert) + pytest.raises(ValueError, s._convert) def test_convert_preserve_bool(self): s = Series([1, True, 3, 5], dtype=object) diff --git a/pandas/tests/series/test_missing.py b/pandas/tests/series/test_missing.py index dee466bc11a6f..2f7bf6902f5af 100644 --- a/pandas/tests/series/test_missing.py +++ b/pandas/tests/series/test_missing.py @@ -2,6 +2,8 @@ # pylint: disable-msg=E1101,W0612 import pytz +import pytest + from datetime import timedelta, datetime from distutils.version import LooseVersion @@ -309,14 +311,14 @@ def test_fillna_int(self): def test_fillna_raise(self): s = Series(np.random.randint(-100, 100, 50)) - self.assertRaises(TypeError, s.fillna, [1, 2]) - self.assertRaises(TypeError, s.fillna, (1, 2)) + pytest.raises(TypeError, s.fillna, [1, 2]) + pytest.raises(TypeError, s.fillna, (1, 2)) # related GH 9217, make sure limit is an int and greater than 0 s = Series([1, 2, 3, None]) for limit in [-1, 0, 1., 2.]: for method in ['backfill', 'bfill', 'pad', 'ffill', None]: - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): s.fillna(1, limit=limit, method=method) def test_fillna_nat(self): @@ -382,8 +384,8 @@ def test_fillna(self): exp = Series([0., 1., 5., 3., 4.], index=ts.index) tm.assert_series_equal(ts.fillna(value=5), exp) - self.assertRaises(ValueError, ts.fillna) - self.assertRaises(ValueError, self.ts.fillna, value=0, method='ffill') + pytest.raises(ValueError, ts.fillna) + pytest.raises(ValueError, self.ts.fillna, value=0, method='ffill') # GH 5703 s1 = Series([np.nan]) @@ -520,7 +522,7 @@ def test_dropna_empty(self): self.assertEqual(len(s), 0) # invalid axis - self.assertRaises(ValueError, s.dropna, axis=1) + pytest.raises(ValueError, s.dropna, axis=1) def test_datetime64_tz_dropna(self): # DatetimeBlock @@ -605,7 +607,7 @@ def test_pad_require_monotonicity(self): # neither monotonic increasing or decreasing rng2 = rng[[1, 0, 2]] - self.assertRaises(ValueError, rng2.get_indexer, rng, method='pad') + pytest.raises(ValueError, rng2.get_indexer, rng, method='pad') def test_dropna_preserve_name(self): self.ts[:5] = np.nan @@ -722,7 +724,7 @@ def test_interpolate(self): # Only raises ValueError if there are NaNs. non_ts = self.series.copy() non_ts[0] = np.NaN - self.assertRaises(ValueError, non_ts.interpolate, method='time') + pytest.raises(ValueError, non_ts.interpolate, method='time') def test_interpolate_pchip(self): tm._skip_if_no_scipy() @@ -821,7 +823,7 @@ def test_interpolate_index_values(self): def test_interpolate_non_ts(self): s = Series([1, 3, np.nan, np.nan, np.nan, 11]) - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): s.interpolate(method='time') # New interpolation tests @@ -912,7 +914,7 @@ def test_interp_limit(self): s = pd.Series([1, 2, np.nan, np.nan, 5]) for limit in [-1, 0, 1., 2.]: for method in methods: - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): s.interpolate(limit=limit, method=method) def test_interp_limit_forward(self): @@ -932,12 +934,12 @@ def test_interp_limit_forward(self): def test_interp_limit_bad_direction(self): s = Series([1, 3, np.nan, np.nan, np.nan, 11]) - self.assertRaises(ValueError, s.interpolate, method='linear', limit=2, - limit_direction='abc') + pytest.raises(ValueError, s.interpolate, method='linear', limit=2, + limit_direction='abc') # raises an error even if no limit is specified. - self.assertRaises(ValueError, s.interpolate, method='linear', - limit_direction='abc') + pytest.raises(ValueError, s.interpolate, method='linear', + limit_direction='abc') def test_interp_limit_direction(self): # These tests are for issue #9218 -- fill NaNs in both directions. @@ -1021,13 +1023,13 @@ def test_interp_multiIndex(self): assert_series_equal(result, expected) tm._skip_if_no_scipy() - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): s.interpolate(method='polynomial', order=1) def test_interp_nonmono_raise(self): tm._skip_if_no_scipy() s = Series([1, np.nan, 3], index=[0, 2, 1]) - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): s.interpolate(method='krogh') def test_interp_datetime64(self): @@ -1048,9 +1050,9 @@ def test_interp_limit_no_nans(self): def test_no_order(self): tm._skip_if_no_scipy() s = Series([0, 1, np.nan, 3]) - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): s.interpolate(method='polynomial') - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): s.interpolate(method='spline') def test_spline(self): @@ -1094,10 +1096,10 @@ def test_spline_error(self): s = pd.Series(np.arange(10) ** 2) s[np.random.randint(0, 9, 3)] = np.nan - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): s.interpolate(method='spline') - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): s.interpolate(method='spline', order=0) def test_interp_timedelta64(self): diff --git a/pandas/tests/series/test_operators.py b/pandas/tests/series/test_operators.py index ce22b29ee5299..159b29aca1e7c 100644 --- a/pandas/tests/series/test_operators.py +++ b/pandas/tests/series/test_operators.py @@ -1,6 +1,8 @@ # coding=utf-8 # pylint: disable-msg=E1101,W0612 +import pytest + from collections import Iterable from datetime import datetime, timedelta import operator @@ -246,12 +248,12 @@ def test_operators_empty_int_corner(self): def test_operators_timedelta64(self): # invalid ops - self.assertRaises(Exception, self.objSeries.__add__, 1) - self.assertRaises(Exception, self.objSeries.__add__, - np.array(1, dtype=np.int64)) - self.assertRaises(Exception, self.objSeries.__sub__, 1) - self.assertRaises(Exception, self.objSeries.__sub__, - np.array(1, dtype=np.int64)) + pytest.raises(Exception, self.objSeries.__add__, 1) + pytest.raises(Exception, self.objSeries.__add__, + np.array(1, dtype=np.int64)) + pytest.raises(Exception, self.objSeries.__sub__, 1) + pytest.raises(Exception, self.objSeries.__sub__, + np.array(1, dtype=np.int64)) # seriese ops v1 = date_range('2012-1-1', periods=3, freq='D') @@ -530,8 +532,8 @@ def test_timedelta64_operations_with_integers(self): for op in ['__add__', '__sub__']: sop = getattr(s1, op, None) if sop is not None: - self.assertRaises(TypeError, sop, 1) - self.assertRaises(TypeError, sop, s2.values) + pytest.raises(TypeError, sop, 1) + pytest.raises(TypeError, sop, s2.values) def test_timedelta64_conversions(self): startdate = Series(date_range('2013-01-01', '2013-01-03')) @@ -696,12 +698,12 @@ def run_ops(ops, get_ser, test_ser): result = dt1 - td1[0] exp = (dt1.dt.tz_localize(None) - td1[0]).dt.tz_localize(tz) assert_series_equal(result, exp) - self.assertRaises(TypeError, lambda: td1[0] - dt1) + pytest.raises(TypeError, lambda: td1[0] - dt1) result = dt2 - td2[0] exp = (dt2.dt.tz_localize(None) - td2[0]).dt.tz_localize(tz) assert_series_equal(result, exp) - self.assertRaises(TypeError, lambda: td2[0] - dt2) + pytest.raises(TypeError, lambda: td2[0] - dt2) result = dt1 + td1 exp = (dt1.dt.tz_localize(None) + td1).dt.tz_localize(tz) @@ -719,8 +721,8 @@ def run_ops(ops, get_ser, test_ser): exp = (dt2.dt.tz_localize(None) - td2).dt.tz_localize(tz) assert_series_equal(result, exp) - self.assertRaises(TypeError, lambda: td1 - dt1) - self.assertRaises(TypeError, lambda: td2 - dt2) + pytest.raises(TypeError, lambda: td1 - dt1) + pytest.raises(TypeError, lambda: td2 - dt2) def test_sub_datetime_compat(self): # GH 14088 @@ -768,7 +770,7 @@ def test_ops_nat(self): assert_series_equal(datetime_series - single_nat_dtype_datetime, nat_series_dtype_timedelta) - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): -single_nat_dtype_datetime + datetime_series assert_series_equal(datetime_series - single_nat_dtype_timedelta, @@ -787,7 +789,7 @@ def test_ops_nat(self): assert_series_equal(nat_series_dtype_timestamp - single_nat_dtype_datetime, nat_series_dtype_timedelta) - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): -single_nat_dtype_datetime + nat_series_dtype_timestamp assert_series_equal(nat_series_dtype_timestamp - @@ -797,7 +799,7 @@ def test_ops_nat(self): nat_series_dtype_timestamp, nat_series_dtype_timestamp) - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): timedelta_series - single_nat_dtype_datetime # addition @@ -881,13 +883,13 @@ def test_ops_nat(self): assert_series_equal(timedelta_series * nan, nat_series_dtype_timedelta) assert_series_equal(nan * timedelta_series, nat_series_dtype_timedelta) - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): datetime_series * 1 - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): nat_series_dtype_timestamp * 1 - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): datetime_series * 1.0 - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): nat_series_dtype_timestamp * 1.0 # division @@ -896,9 +898,9 @@ def test_ops_nat(self): assert_series_equal(timedelta_series / 2.0, Series([NaT, Timedelta('0.5s')])) assert_series_equal(timedelta_series / nan, nat_series_dtype_timedelta) - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): nat_series_dtype_timestamp / 1.0 - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): nat_series_dtype_timestamp / 1 def test_ops_datetimelike_align(self): @@ -1030,12 +1032,12 @@ def test_comparison_invalid(self): s2 = Series(date_range('20010101', periods=5)) for (x, y) in [(s, s2), (s2, s)]: - self.assertRaises(TypeError, lambda: x == y) - self.assertRaises(TypeError, lambda: x != y) - self.assertRaises(TypeError, lambda: x >= y) - self.assertRaises(TypeError, lambda: x > y) - self.assertRaises(TypeError, lambda: x < y) - self.assertRaises(TypeError, lambda: x <= y) + pytest.raises(TypeError, lambda: x == y) + pytest.raises(TypeError, lambda: x != y) + pytest.raises(TypeError, lambda: x >= y) + pytest.raises(TypeError, lambda: x > y) + pytest.raises(TypeError, lambda: x < y) + pytest.raises(TypeError, lambda: x <= y) def test_more_na_comparisons(self): for dtype in [None, object]: @@ -1133,11 +1135,11 @@ def test_nat_comparisons_scalar(self): def test_comparison_different_length(self): a = Series(['a', 'b', 'c']) b = Series(['b', 'a']) - self.assertRaises(ValueError, a.__lt__, b) + pytest.raises(ValueError, a.__lt__, b) a = Series([1, 2]) b = Series([2, 3, 4]) - self.assertRaises(ValueError, a.__eq__, b) + pytest.raises(ValueError, a.__eq__, b) def test_comparison_label_based(self): @@ -1216,7 +1218,7 @@ def test_comparison_label_based(self): assert_series_equal(result, expected) for v in [np.nan, 'foo']: - self.assertRaises(TypeError, lambda: t | v) + pytest.raises(TypeError, lambda: t | v) for v in [False, 0]: result = Series([True, False, True], index=index) | v @@ -1233,7 +1235,7 @@ def test_comparison_label_based(self): expected = Series([False, False, False], index=index) assert_series_equal(result, expected) for v in [np.nan]: - self.assertRaises(TypeError, lambda: t & v) + pytest.raises(TypeError, lambda: t & v) def test_comparison_flex_basic(self): left = pd.Series(np.random.randn(10)) @@ -1388,11 +1390,11 @@ def test_operators_bitwise(self): expected = Series([1, 1, 3, 3], dtype='int32') assert_series_equal(res, expected) - self.assertRaises(TypeError, lambda: s_1111 & 'a') - self.assertRaises(TypeError, lambda: s_1111 & ['a', 'b', 'c', 'd']) - self.assertRaises(TypeError, lambda: s_0123 & np.NaN) - self.assertRaises(TypeError, lambda: s_0123 & 3.14) - self.assertRaises(TypeError, lambda: s_0123 & [0.1, 4, 3.14, 2]) + pytest.raises(TypeError, lambda: s_1111 & 'a') + pytest.raises(TypeError, lambda: s_1111 & ['a', 'b', 'c', 'd']) + pytest.raises(TypeError, lambda: s_0123 & np.NaN) + pytest.raises(TypeError, lambda: s_0123 & 3.14) + pytest.raises(TypeError, lambda: s_0123 & [0.1, 4, 3.14, 2]) # s_0123 will be all false now because of reindexing like s_tft if compat.PY3: @@ -1435,7 +1437,7 @@ def test_scalar_na_cmp_corners(self): def tester(a, b): return a & b - self.assertRaises(TypeError, tester, s, datetime(2005, 1, 1)) + pytest.raises(TypeError, tester, s, datetime(2005, 1, 1)) s = Series([2, 3, 4, 5, 6, 7, 8, 9, datetime(2005, 1, 1)]) s[::2] = np.nan @@ -1452,8 +1454,8 @@ def tester(a, b): # this is an alignment issue; these are equivalent # https://github.com/pandas-dev/pandas/issues/5284 - self.assertRaises(ValueError, lambda: d.__and__(s, axis='columns')) - self.assertRaises(ValueError, tester, s, d) + pytest.raises(ValueError, lambda: d.__and__(s, axis='columns')) + pytest.raises(ValueError, tester, s, d) # this is wrong as its not a boolean result # result = d.__and__(s,axis='index') @@ -1627,10 +1629,10 @@ def test_series_frame_radd_bug(self): assert_frame_equal(result, expected) # really raise this time - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): datetime.now() + self.ts - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): self.ts + datetime.now() def test_series_radd_more(self): @@ -1643,7 +1645,7 @@ def test_series_radd_more(self): for d in data: for dtype in [None, object]: s = Series(d, dtype=dtype) - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): 'foo_' + s for dtype in [None, object]: @@ -1680,7 +1682,7 @@ def test_frame_radd_more(self): for d in data: for dtype in [None, object]: s = DataFrame(d, dtype=dtype) - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): 'foo_' + s for dtype in [None, object]: diff --git a/pandas/tests/series/test_rank.py b/pandas/tests/series/test_rank.py index f47eae3adc3ae..1a1829eb5829f 100644 --- a/pandas/tests/series/test_rank.py +++ b/pandas/tests/series/test_rank.py @@ -213,7 +213,7 @@ def test_rank_categorical(self): def test_rank_signature(self): s = Series([0, 1]) s.rank(method='average') - self.assertRaises(ValueError, s.rank, 'average') + pytest.raises(ValueError, s.rank, 'average') def test_rank_inf(self): pytest.skip('DataFrame.rank does not currently rank ' diff --git a/pandas/tests/series/test_replace.py b/pandas/tests/series/test_replace.py index 5190eb110f4cf..a9a9204cf7f67 100644 --- a/pandas/tests/series/test_replace.py +++ b/pandas/tests/series/test_replace.py @@ -1,6 +1,8 @@ # coding=utf-8 # pylint: disable-msg=E1101,W0612 +import pytest + import numpy as np import pandas as pd import pandas._libs.lib as lib @@ -72,7 +74,7 @@ def test_replace(self): tm.assert_series_equal(ser.replace(np.nan, 0), ser.fillna(0)) # malformed - self.assertRaises(ValueError, ser.replace, [1, 2, 3], [np.nan, 0]) + pytest.raises(ValueError, ser.replace, [1, 2, 3], [np.nan, 0]) # make sure that we aren't just masking a TypeError because bools don't # implement indexing @@ -117,7 +119,7 @@ def test_replace_with_single_list(self): # make sure things don't get corrupted when fillna call fails s = ser.copy() - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): s.replace([1, 2, 3], inplace=True, method='crash_cymbal') tm.assert_series_equal(s, ser) diff --git a/pandas/tests/series/test_sorting.py b/pandas/tests/series/test_sorting.py index ead3c7dbc3bfd..7ab2ec245f611 100644 --- a/pandas/tests/series/test_sorting.py +++ b/pandas/tests/series/test_sorting.py @@ -1,5 +1,7 @@ # coding=utf-8 +import pytest + import numpy as np import random @@ -61,16 +63,16 @@ def test_sort_values(self): expected = ts.sort_values(ascending=False, na_position='first') assert_series_equal(expected, ordered) - self.assertRaises(ValueError, - lambda: ts.sort_values(ascending=None)) - self.assertRaises(ValueError, - lambda: ts.sort_values(ascending=[])) - self.assertRaises(ValueError, - lambda: ts.sort_values(ascending=[1, 2, 3])) - self.assertRaises(ValueError, - lambda: ts.sort_values(ascending=[False, False])) - self.assertRaises(ValueError, - lambda: ts.sort_values(ascending='foobar')) + pytest.raises(ValueError, + lambda: ts.sort_values(ascending=None)) + pytest.raises(ValueError, + lambda: ts.sort_values(ascending=[])) + pytest.raises(ValueError, + lambda: ts.sort_values(ascending=[1, 2, 3])) + pytest.raises(ValueError, + lambda: ts.sort_values(ascending=[False, False])) + pytest.raises(ValueError, + lambda: ts.sort_values(ascending='foobar')) # inplace=True ts = self.ts.copy() @@ -87,7 +89,7 @@ def test_sort_values(self): def f(): s.sort_values(inplace=True) - self.assertRaises(ValueError, f) + pytest.raises(ValueError, f) def test_sort_index(self): rindex = list(self.ts.index) @@ -110,13 +112,13 @@ def test_sort_index(self): sorted_series = random_order.sort_index(axis=0) assert_series_equal(sorted_series, self.ts) - self.assertRaises(ValueError, lambda: random_order.sort_values(axis=1)) + pytest.raises(ValueError, lambda: random_order.sort_values(axis=1)) sorted_series = random_order.sort_index(level=0, axis=0) assert_series_equal(sorted_series, self.ts) - self.assertRaises(ValueError, - lambda: random_order.sort_index(level=0, axis=1)) + pytest.raises(ValueError, + lambda: random_order.sort_index(level=0, axis=1)) def test_sort_index_inplace(self): diff --git a/pandas/tests/series/test_timeseries.py b/pandas/tests/series/test_timeseries.py index 78e55420bb3c8..bafb8ebfd7145 100644 --- a/pandas/tests/series/test_timeseries.py +++ b/pandas/tests/series/test_timeseries.py @@ -1,6 +1,8 @@ # coding=utf-8 # pylint: disable-msg=E1101,W0612 +import pytest + import numpy as np from datetime import datetime, timedelta, time @@ -73,7 +75,7 @@ def test_shift(self): assert_series_equal(shifted2, shifted3) assert_series_equal(ps, shifted2.shift(-1, 'B')) - self.assertRaises(ValueError, ps.shift, freq='D') + pytest.raises(ValueError, ps.shift, freq='D') # legacy support shifted4 = ps.shift(1, freq='B') @@ -104,7 +106,7 @@ def test_shift(self): # incompat tz s2 = Series(date_range('2000-01-01 09:00:00', periods=5, tz='CET'), name='foo') - self.assertRaises(ValueError, lambda: s - s2) + pytest.raises(ValueError, lambda: s - s2) def test_shift2(self): ts = Series(np.random.randn(5), @@ -120,7 +122,7 @@ def test_shift2(self): tm.assert_index_equal(result.index, exp_index) idx = DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-04']) - self.assertRaises(ValueError, idx.shift, 1) + pytest.raises(ValueError, idx.shift, 1) def test_shift_dst(self): # GH 13926 @@ -163,7 +165,7 @@ def test_tshift(self): shifted3 = ps.tshift(freq=BDay()) assert_series_equal(shifted, shifted3) - self.assertRaises(ValueError, ps.tshift, freq='M') + pytest.raises(ValueError, ps.tshift, freq='M') # DatetimeIndex shifted = self.ts.tshift(1) @@ -182,7 +184,7 @@ def test_tshift(self): assert_series_equal(unshifted, inferred_ts) no_freq = self.ts[[0, 5, 7]] - self.assertRaises(ValueError, no_freq.tshift) + pytest.raises(ValueError, no_freq.tshift) def test_truncate(self): offset = BDay() @@ -230,9 +232,9 @@ def test_truncate(self): truncated = ts.truncate(before=self.ts.index[-1] + offset) assert (len(truncated) == 0) - self.assertRaises(ValueError, ts.truncate, - before=self.ts.index[-1] + offset, - after=self.ts.index[0] - offset) + pytest.raises(ValueError, ts.truncate, + before=self.ts.index[-1] + offset, + after=self.ts.index[0] - offset) def test_asfreq(self): ts = Series([0., 1., 2.], index=[datetime(2009, 10, 30), datetime( @@ -401,7 +403,7 @@ def test_empty_series_ops(self): assert_series_equal(a, a + b) assert_series_equal(a, a - b) assert_series_equal(a, b + a) - self.assertRaises(TypeError, lambda x, y: x - y, b, a) + pytest.raises(TypeError, lambda x, y: x - y, b, a) def test_contiguous_boolean_preserve_freq(self): rng = date_range('1/1/2000', '3/1/2000', freq='B') @@ -466,9 +468,9 @@ def test_to_datetime_unit(self): Timestamp('1970-01-03')] + ['NaT'] * 3) tm.assert_index_equal(result, expected) - with self.assertRaises(ValueError): + with pytest.raises(ValueError): to_datetime([1, 2, 'foo'], unit='D') - with self.assertRaises(ValueError): + with pytest.raises(ValueError): to_datetime([1, 2, 111111111], unit='D') # coerce we can process @@ -709,16 +711,16 @@ def test_between_time(self): def test_between_time_types(self): # GH11818 rng = date_range('1/1/2000', '1/5/2000', freq='5min') - self.assertRaises(ValueError, rng.indexer_between_time, - datetime(2010, 1, 2, 1), datetime(2010, 1, 2, 5)) + pytest.raises(ValueError, rng.indexer_between_time, + datetime(2010, 1, 2, 1), datetime(2010, 1, 2, 5)) frame = DataFrame({'A': 0}, index=rng) - self.assertRaises(ValueError, frame.between_time, - datetime(2010, 1, 2, 1), datetime(2010, 1, 2, 5)) + pytest.raises(ValueError, frame.between_time, + datetime(2010, 1, 2, 1), datetime(2010, 1, 2, 5)) series = Series(0, index=rng) - self.assertRaises(ValueError, series.between_time, - datetime(2010, 1, 2, 1), datetime(2010, 1, 2, 5)) + pytest.raises(ValueError, series.between_time, + datetime(2010, 1, 2, 1), datetime(2010, 1, 2, 5)) def test_between_time_formats(self): # GH11818 diff --git a/pandas/tests/series/test_validate.py b/pandas/tests/series/test_validate.py index cf0482b41c80a..6327e265d8c1e 100644 --- a/pandas/tests/series/test_validate.py +++ b/pandas/tests/series/test_validate.py @@ -1,8 +1,8 @@ -from unittest import TestCase +import pytest from pandas.core.series import Series -class TestSeriesValidate(TestCase): +class TestSeriesValidate(object): """Tests for error handling related to data types of method arguments.""" s = Series([1, 2, 3, 4, 5]) @@ -11,23 +11,23 @@ def test_validate_bool_args(self): invalid_values = [1, "True", [1, 2, 3], 5.0] for value in invalid_values: - with self.assertRaises(ValueError): + with pytest.raises(ValueError): self.s.reset_index(inplace=value) - with self.assertRaises(ValueError): + with pytest.raises(ValueError): self.s._set_name(name='hello', inplace=value) - with self.assertRaises(ValueError): + with pytest.raises(ValueError): self.s.sort_values(inplace=value) - with self.assertRaises(ValueError): + with pytest.raises(ValueError): self.s.sort_index(inplace=value) - with self.assertRaises(ValueError): + with pytest.raises(ValueError): self.s.sort_index(inplace=value) - with self.assertRaises(ValueError): + with pytest.raises(ValueError): self.s.rename(inplace=value) - with self.assertRaises(ValueError): + with pytest.raises(ValueError): self.s.dropna(inplace=value) diff --git a/pandas/tests/sparse/test_array.py b/pandas/tests/sparse/test_array.py index 653656fe1e539..df14a3139edab 100644 --- a/pandas/tests/sparse/test_array.py +++ b/pandas/tests/sparse/test_array.py @@ -1,6 +1,8 @@ from pandas.compat import range + import re import operator +import pytest import warnings from numpy import nan @@ -178,7 +180,7 @@ def test_take_negative(self): def test_bad_take(self): assertRaisesRegexp(IndexError, "bounds", lambda: self.arr.take(11)) - self.assertRaises(IndexError, lambda: self.arr.take(-11)) + pytest.raises(IndexError, lambda: self.arr.take(-11)) def test_take_invalid_kwargs(self): msg = r"take\(\) got an unexpected keyword argument 'foo'" @@ -218,11 +220,11 @@ def test_take_filling(self): with tm.assertRaisesRegexp(ValueError, msg): sparse.take(np.array([1, 0, -5]), fill_value=True) - with tm.assertRaises(IndexError): + with pytest.raises(IndexError): sparse.take(np.array([1, -6])) - with tm.assertRaises(IndexError): + with pytest.raises(IndexError): sparse.take(np.array([1, 5])) - with tm.assertRaises(IndexError): + with pytest.raises(IndexError): sparse.take(np.array([1, 5]), fill_value=True) def test_take_filling_fill_value(self): @@ -250,11 +252,11 @@ def test_take_filling_fill_value(self): with tm.assertRaisesRegexp(ValueError, msg): sparse.take(np.array([1, 0, -5]), fill_value=True) - with tm.assertRaises(IndexError): + with pytest.raises(IndexError): sparse.take(np.array([1, -6])) - with tm.assertRaises(IndexError): + with pytest.raises(IndexError): sparse.take(np.array([1, 5])) - with tm.assertRaises(IndexError): + with pytest.raises(IndexError): sparse.take(np.array([1, 5]), fill_value=True) def test_take_filling_all_nan(self): @@ -267,11 +269,11 @@ def test_take_filling_all_nan(self): expected = SparseArray([np.nan, np.nan, np.nan]) tm.assert_sp_array_equal(result, expected) - with tm.assertRaises(IndexError): + with pytest.raises(IndexError): sparse.take(np.array([1, -6])) - with tm.assertRaises(IndexError): + with pytest.raises(IndexError): sparse.take(np.array([1, 5])) - with tm.assertRaises(IndexError): + with pytest.raises(IndexError): sparse.take(np.array([1, 5]), fill_value=True) def test_set_item(self): @@ -496,10 +498,10 @@ def test_getslice_tuple(self): exp = SparseArray(dense[4:, ], fill_value=0) tm.assert_sp_array_equal(res, exp) - with tm.assertRaises(IndexError): + with pytest.raises(IndexError): sparse[4:, :] - with tm.assertRaises(IndexError): + with pytest.raises(IndexError): # check numpy compat dense[4:, :] @@ -546,7 +548,7 @@ def _check_op(op, first, second): def _check_inplace_op(op): tmp = arr1.copy() - self.assertRaises(NotImplementedError, op, tmp, arr2) + pytest.raises(NotImplementedError, op, tmp, arr2) with np.errstate(all='ignore'): bin_ops = [operator.add, operator.sub, operator.mul, diff --git a/pandas/tests/sparse/test_frame.py b/pandas/tests/sparse/test_frame.py index d5df25e82de13..d5df744648ff3 100644 --- a/pandas/tests/sparse/test_frame.py +++ b/pandas/tests/sparse/test_frame.py @@ -133,7 +133,7 @@ def test_constructor(self): tm.assert_sp_frame_equal(cons, reindexed, exact_indices=False) # assert level parameter breaks reindex - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): self.frame.reindex(idx, level=0) repr(self.frame) @@ -147,8 +147,8 @@ def test_constructor_ndarray(self): tm.assert_sp_frame_equal(sp, self.frame.reindex(columns=['A'])) # raise on level argument - self.assertRaises(TypeError, self.frame.reindex, columns=['A'], - level=1) + pytest.raises(TypeError, self.frame.reindex, columns=['A'], + level=1) # wrong length index / columns with tm.assertRaisesRegexp(ValueError, "^Index length"): @@ -405,7 +405,7 @@ def test_getitem(self): exp = sdf.reindex(columns=['a', 'b']) tm.assert_sp_frame_equal(result, exp) - self.assertRaises(Exception, sdf.__getitem__, ['a', 'd']) + pytest.raises(Exception, sdf.__getitem__, ['a', 'd']) def test_iloc(self): @@ -465,7 +465,7 @@ def test_getitem_overload(self): subframe = self.frame[indexer] tm.assert_index_equal(subindex, subframe.index) - self.assertRaises(Exception, self.frame.__getitem__, indexer[:-1]) + pytest.raises(Exception, self.frame.__getitem__, indexer[:-1]) def test_setitem(self): @@ -509,8 +509,8 @@ def _check_frame(frame, orig): self.assertEqual(len(frame['I'].sp_values), N // 2) # insert ndarray wrong size - self.assertRaises(Exception, frame.__setitem__, 'foo', - np.random.randn(N - 1)) + pytest.raises(Exception, frame.__setitem__, 'foo', + np.random.randn(N - 1)) # scalar value frame['J'] = 5 @@ -557,13 +557,13 @@ def test_delitem(self): def test_set_columns(self): self.frame.columns = self.frame.columns - self.assertRaises(Exception, setattr, self.frame, 'columns', - self.frame.columns[:-1]) + pytest.raises(Exception, setattr, self.frame, 'columns', + self.frame.columns[:-1]) def test_set_index(self): self.frame.index = self.frame.index - self.assertRaises(Exception, setattr, self.frame, 'index', - self.frame.index[:-1]) + pytest.raises(Exception, setattr, self.frame, 'index', + self.frame.index[:-1]) def test_append(self): a = self.frame[:5] @@ -796,7 +796,7 @@ def test_join(self): tm.assert_sp_frame_equal(joined, self.frame, exact_indices=False) right = self.frame.loc[:, ['B', 'D']] - self.assertRaises(Exception, left.join, right) + pytest.raises(Exception, left.join, right) with tm.assertRaisesRegexp(ValueError, 'Other Series must have a name'): @@ -931,11 +931,11 @@ def test_reindex_method(self): tm.assert_sp_frame_equal(result, expected) # method='bfill' - with tm.assertRaises(NotImplementedError): + with pytest.raises(NotImplementedError): sparse.reindex(columns=range(6), method='bfill') # method='ffill' - with tm.assertRaises(NotImplementedError): + with pytest.raises(NotImplementedError): sparse.reindex(columns=range(6), method='ffill') def test_take(self): @@ -969,8 +969,8 @@ def _check(frame): _check(self.iframe) # for now - self.assertRaises(Exception, _check, self.zframe) - self.assertRaises(Exception, _check, self.fill_frame) + pytest.raises(Exception, _check, self.zframe) + pytest.raises(Exception, _check, self.fill_frame) def test_transpose(self): diff --git a/pandas/tests/sparse/test_indexing.py b/pandas/tests/sparse/test_indexing.py index 4a9bea798be36..d0e5196570adc 100644 --- a/pandas/tests/sparse/test_indexing.py +++ b/pandas/tests/sparse/test_indexing.py @@ -220,7 +220,7 @@ def test_iloc(self): exp = orig.iloc[[1, -2, -4]].to_sparse() tm.assert_sp_series_equal(result, exp) - with tm.assertRaises(IndexError): + with pytest.raises(IndexError): sparse.iloc[[1, 3, 5]] def test_iloc_fill_value(self): @@ -792,7 +792,7 @@ def test_iloc(self): exp = orig.iloc[[2], [1, 0]].to_sparse() tm.assert_sp_frame_equal(result, exp) - with tm.assertRaises(IndexError): + with pytest.raises(IndexError): sparse.iloc[[1, 3, 5]] def test_iloc_slice(self): diff --git a/pandas/tests/sparse/test_libsparse.py b/pandas/tests/sparse/test_libsparse.py index 1990e29a4b901..00e01b8ba14bc 100644 --- a/pandas/tests/sparse/test_libsparse.py +++ b/pandas/tests/sparse/test_libsparse.py @@ -184,7 +184,7 @@ def test_intindex_make_union(self): a = IntIndex(5, np.array([0, 1], dtype=np.int32)) b = IntIndex(4, np.array([0, 1], dtype=np.int32)) - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): a.make_union(b) @@ -196,7 +196,7 @@ def _check_correct(a, b, expected): assert (result.equals(expected)) def _check_length_exc(a, longer): - self.assertRaises(Exception, a.intersect, longer) + pytest.raises(Exception, a.intersect, longer) def _check_case(xloc, xlen, yloc, ylen, eloc, elen): xindex = BlockIndex(TEST_LENGTH, xloc, xlen) @@ -451,10 +451,10 @@ def test_check_integrity(self): index = BlockIndex(1, locs, lengths) # noqa # block extend beyond end - self.assertRaises(Exception, BlockIndex, 10, [5], [10]) + pytest.raises(Exception, BlockIndex, 10, [5], [10]) # block overlap - self.assertRaises(Exception, BlockIndex, 10, [2, 5], [5, 3]) + pytest.raises(Exception, BlockIndex, 10, [2, 5], [5, 3]) def test_to_int_index(self): locs = [0, 10] diff --git a/pandas/tests/sparse/test_series.py b/pandas/tests/sparse/test_series.py index 8ccc9e1131f60..dcac0f599fdeb 100644 --- a/pandas/tests/sparse/test_series.py +++ b/pandas/tests/sparse/test_series.py @@ -1,6 +1,7 @@ # pylint: disable-msg=E1101,W0612 import operator +import pytest from numpy import nan import numpy as np @@ -353,7 +354,7 @@ def test_shape(self): self.assertEqual(self.ziseries2.shape, (15, )) def test_astype(self): - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): self.bseries.astype(np.int64) def test_astype_all(self): @@ -425,12 +426,12 @@ def _check_getitem(sp, dense): _check_getitem(self.ziseries, self.ziseries.to_dense()) # exception handling - self.assertRaises(Exception, self.bseries.__getitem__, - len(self.bseries) + 1) + pytest.raises(Exception, self.bseries.__getitem__, + len(self.bseries) + 1) # index not contained - self.assertRaises(Exception, self.btseries.__getitem__, - self.btseries.index[-1] + BDay()) + pytest.raises(Exception, self.btseries.__getitem__, + self.btseries.index[-1] + BDay()) def test_get_get_value(self): tm.assert_almost_equal(self.bseries.get(10), self.bseries[10]) @@ -489,8 +490,8 @@ def _compare(idx): self._check_all(_compare_with_dense) - self.assertRaises(Exception, self.bseries.take, - [0, len(self.bseries) + 1]) + pytest.raises(Exception, self.bseries.take, + [0, len(self.bseries) + 1]) # Corner case sp = SparseSeries(np.ones(10) * nan) @@ -1036,25 +1037,25 @@ def test_to_coo_text_names_text_row_levels_nosort(self): def test_to_coo_bad_partition_nonnull_intersection(self): ss = self.sparse_series[0] - self.assertRaises(ValueError, ss.to_coo, ['A', 'B', 'C'], ['C', 'D']) + pytest.raises(ValueError, ss.to_coo, ['A', 'B', 'C'], ['C', 'D']) def test_to_coo_bad_partition_small_union(self): ss = self.sparse_series[0] - self.assertRaises(ValueError, ss.to_coo, ['A'], ['C', 'D']) + pytest.raises(ValueError, ss.to_coo, ['A'], ['C', 'D']) def test_to_coo_nlevels_less_than_two(self): ss = self.sparse_series[0] ss.index = np.arange(len(ss.index)) - self.assertRaises(ValueError, ss.to_coo) + pytest.raises(ValueError, ss.to_coo) def test_to_coo_bad_ilevel(self): ss = self.sparse_series[0] - self.assertRaises(KeyError, ss.to_coo, ['A', 'B'], ['C', 'D', 'E']) + pytest.raises(KeyError, ss.to_coo, ['A', 'B'], ['C', 'D', 'E']) def test_to_coo_duplicate_index_entries(self): ss = pd.concat([self.sparse_series[0], self.sparse_series[0]]).to_sparse() - self.assertRaises(ValueError, ss.to_coo, ['A', 'B'], ['C', 'D']) + pytest.raises(ValueError, ss.to_coo, ['A', 'B'], ['C', 'D']) def test_from_coo_dense_index(self): ss = SparseSeries.from_coo(self.coo_matrices[0], dense_index=True) diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index 3b916f0b6792f..df267f2374051 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -1,6 +1,8 @@ # -*- coding: utf-8 -*- import numpy as np +import pytest + from numpy.random import RandomState from numpy import nan from datetime import datetime @@ -126,9 +128,9 @@ def test_unsortable(self): if compat.PY2 and not pd._np_version_under1p10: # RuntimeWarning: tp_compare didn't return -1 or -2 for exception with tm.assert_produces_warning(RuntimeWarning): - tm.assertRaises(TypeError, algos.safe_sort, arr) + pytest.raises(TypeError, algos.safe_sort, arr) else: - tm.assertRaises(TypeError, algos.safe_sort, arr) + pytest.raises(TypeError, algos.safe_sort, arr) def test_exceptions(self): with tm.assertRaisesRegexp(TypeError, @@ -284,7 +286,7 @@ def test_complex_sorting(self): x17 = np.array([complex(i) for i in range(17)], dtype=object) - self.assertRaises(TypeError, algos.factorize, x17[::-1], sort=True) + pytest.raises(TypeError, algos.factorize, x17[::-1], sort=True) def test_uint64_factorize(self): data = np.array([2**63, 1, 2**63], dtype=np.uint64) @@ -499,9 +501,9 @@ class TestIsin(tm.TestCase): def test_invalid(self): - self.assertRaises(TypeError, lambda: algos.isin(1, 1)) - self.assertRaises(TypeError, lambda: algos.isin(1, [1])) - self.assertRaises(TypeError, lambda: algos.isin([1], 1)) + pytest.raises(TypeError, lambda: algos.isin(1, 1)) + pytest.raises(TypeError, lambda: algos.isin(1, [1])) + pytest.raises(TypeError, lambda: algos.isin([1], 1)) def test_basic(self): @@ -620,8 +622,8 @@ def test_value_counts_dtypes(self): result = algos.value_counts(Series([1, 1., '1'])) # object self.assertEqual(len(result), 2) - self.assertRaises(TypeError, lambda s: algos.value_counts(s, bins=1), - ['1', 1]) + pytest.raises(TypeError, lambda s: algos.value_counts(s, bins=1), + ['1', 1]) def test_value_counts_nat(self): td = Series([np.timedelta64(10000), pd.NaT], dtype='timedelta64[ns]') @@ -1355,15 +1357,15 @@ def test_int64_add_overflow(): # Check that the nan boolean arrays override whether or not # the addition overflows. We don't check the result but just # the fact that an OverflowError is not raised. - with tm.assertRaises(AssertionError): + with pytest.raises(AssertionError): with tm.assertRaisesRegexp(OverflowError, msg): algos.checked_add_with_arr(np.array([m, m]), np.array([m, m]), arr_mask=np.array([True, True])) - with tm.assertRaises(AssertionError): + with pytest.raises(AssertionError): with tm.assertRaisesRegexp(OverflowError, msg): algos.checked_add_with_arr(np.array([m, m]), np.array([m, m]), b_mask=np.array([True, True])) - with tm.assertRaises(AssertionError): + with pytest.raises(AssertionError): with tm.assertRaisesRegexp(OverflowError, msg): algos.checked_add_with_arr(np.array([m, m]), np.array([m, m]), arr_mask=np.array([True, False]), diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py index 4b9c27d23273d..d91aab6bc3ceb 100644 --- a/pandas/tests/test_base.py +++ b/pandas/tests/test_base.py @@ -130,17 +130,17 @@ def test_invalida_delgation(self): def f(): delegate.foo - self.assertRaises(TypeError, f) + pytest.raises(TypeError, f) def f(): delegate.foo = 5 - self.assertRaises(TypeError, f) + pytest.raises(TypeError, f) def f(): delegate.foo() - self.assertRaises(TypeError, f) + pytest.raises(TypeError, f) def test_memory_usage(self): # Delegate does not implement memory_usage. @@ -227,10 +227,10 @@ def check_ops_properties(self, props, filter=None, ignore_failures=False): # an object that is datetimelike will raise a TypeError, # otherwise an AttributeError if issubclass(type(o), DatetimeIndexOpsMixin): - self.assertRaises(TypeError, lambda: getattr(o, op)) + pytest.raises(TypeError, lambda: getattr(o, op)) else: - self.assertRaises(AttributeError, - lambda: getattr(o, op)) + pytest.raises(AttributeError, + lambda: getattr(o, op)) def test_binary_ops_docs(self): from pandas import DataFrame, Panel @@ -978,7 +978,7 @@ def test_validate_bool_args(self): invalid_values = [1, "True", [1, 2, 3], 5.0] for value in invalid_values: - with self.assertRaises(ValueError): + with pytest.raises(ValueError): self.int_series.drop_duplicates(inplace=value) @@ -1027,5 +1027,5 @@ class T(NoNewAttributesMixin): def f(): t.b = "test" - self.assertRaises(AttributeError, f) + pytest.raises(AttributeError, f) self.assertFalse(hasattr(t, "b")) diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py index 65ef841e624e2..b26c015133697 100644 --- a/pandas/tests/test_categorical.py +++ b/pandas/tests/test_categorical.py @@ -120,7 +120,7 @@ def test_constructor_unsortable(self): self.assertFalse(factor.ordered) # this however will raise as cannot be sorted - self.assertRaises( + pytest.raises( TypeError, lambda: Categorical(arr, ordered=True)) def test_constructor_interval(self): @@ -166,12 +166,12 @@ def test_constructor(self): def f(): Categorical([1, 2], [1, 2, 2]) - self.assertRaises(ValueError, f) + pytest.raises(ValueError, f) def f(): Categorical(["a", "b"], ["a", "b", "b"]) - self.assertRaises(ValueError, f) + pytest.raises(ValueError, f) # The default should be unordered c1 = Categorical(["a", "b", "c", "a"]) @@ -409,31 +409,31 @@ def test_from_codes(self): def f(): Categorical.from_codes([1, 2], [1, 2]) - self.assertRaises(ValueError, f) + pytest.raises(ValueError, f) # no int codes def f(): Categorical.from_codes(["a"], [1, 2]) - self.assertRaises(ValueError, f) + pytest.raises(ValueError, f) # no unique categories def f(): Categorical.from_codes([0, 1, 2], ["a", "a", "b"]) - self.assertRaises(ValueError, f) + pytest.raises(ValueError, f) # NaN categories included def f(): Categorical.from_codes([0, 1, 2], ["a", "b", np.nan]) - self.assertRaises(ValueError, f) + pytest.raises(ValueError, f) # too negative def f(): Categorical.from_codes([-2, 1, 2], ["a", "b", "c"]) - self.assertRaises(ValueError, f) + pytest.raises(ValueError, f) exp = Categorical(["a", "b", "c"], ordered=False) res = Categorical.from_codes([0, 1, 2], ["a", "b", "c"]) @@ -522,7 +522,7 @@ def test_comparisons(self): def f(): cat > cat_rev - self.assertRaises(TypeError, f) + pytest.raises(TypeError, f) cat_rev_base2 = pd.Categorical( ["b", "b", "b"], categories=["c", "b", "a", "d"]) @@ -530,7 +530,7 @@ def f(): def f(): cat_rev > cat_rev_base2 - self.assertRaises(TypeError, f) + pytest.raises(TypeError, f) # Only categories with same ordering information can be compared cat_unorderd = cat.set_ordered(False) @@ -539,26 +539,26 @@ def f(): def f(): cat > cat_unorderd - self.assertRaises(TypeError, f) + pytest.raises(TypeError, f) # comparison (in both directions) with Series will raise s = Series(["b", "b", "b"]) - self.assertRaises(TypeError, lambda: cat > s) - self.assertRaises(TypeError, lambda: cat_rev > s) - self.assertRaises(TypeError, lambda: s < cat) - self.assertRaises(TypeError, lambda: s < cat_rev) + pytest.raises(TypeError, lambda: cat > s) + pytest.raises(TypeError, lambda: cat_rev > s) + pytest.raises(TypeError, lambda: s < cat) + pytest.raises(TypeError, lambda: s < cat_rev) # comparison with numpy.array will raise in both direction, but only on # newer numpy versions a = np.array(["b", "b", "b"]) - self.assertRaises(TypeError, lambda: cat > a) - self.assertRaises(TypeError, lambda: cat_rev > a) + pytest.raises(TypeError, lambda: cat > a) + pytest.raises(TypeError, lambda: cat_rev > a) # The following work via '__array_priority__ = 1000' # works only on numpy >= 1.7.1 if LooseVersion(np.__version__) > "1.7.1": - self.assertRaises(TypeError, lambda: a < cat) - self.assertRaises(TypeError, lambda: a < cat_rev) + pytest.raises(TypeError, lambda: a < cat) + pytest.raises(TypeError, lambda: a < cat_rev) # Make sure that unequal comparison take the categories order in # account @@ -777,13 +777,13 @@ def test_categories_assigments(self): def f(): s.categories = [1, 2, 3, 4] - self.assertRaises(ValueError, f) + pytest.raises(ValueError, f) # shorten def f(): s.categories = [1, 2] - self.assertRaises(ValueError, f) + pytest.raises(ValueError, f) def test_construction_with_ordered(self): # GH 9347, 9190 @@ -968,19 +968,19 @@ def test_reorder_categories(self): def f(): cat.reorder_categories(["a"]) - self.assertRaises(ValueError, f) + pytest.raises(ValueError, f) # still not all "old" in "new" def f(): cat.reorder_categories(["a", "b", "d"]) - self.assertRaises(ValueError, f) + pytest.raises(ValueError, f) # all "old" included in "new", but too long def f(): cat.reorder_categories(["a", "b", "c", "d"]) - self.assertRaises(ValueError, f) + pytest.raises(ValueError, f) def test_add_categories(self): cat = Categorical(["a", "b", "c", "a"], ordered=True) @@ -1006,7 +1006,7 @@ def test_add_categories(self): def f(): cat.add_categories(["d"]) - self.assertRaises(ValueError, f) + pytest.raises(ValueError, f) # GH 9927 cat = Categorical(list("abc"), ordered=True) @@ -1046,7 +1046,7 @@ def test_remove_categories(self): def f(): cat.remove_categories(["c"]) - self.assertRaises(ValueError, f) + pytest.raises(ValueError, f) def test_remove_unused_categories(self): c = Categorical(["a", "b", "c", "d", "a"], @@ -1127,7 +1127,7 @@ def test_codes_immutable(self): def f(): c.codes = np.array([0, 1, 2, 0, 1], dtype='int8') - self.assertRaises(ValueError, f) + pytest.raises(ValueError, f) # changes in the codes array should raise # np 1.6.1 raises RuntimeError rather than ValueError @@ -1136,7 +1136,7 @@ def f(): def f(): codes[4] = 1 - self.assertRaises(ValueError, f) + pytest.raises(ValueError, f) # But even after getting the codes, the original array should still be # writeable! @@ -1151,8 +1151,8 @@ def test_min_max(self): # unordered cats have no min/max cat = Categorical(["a", "b", "c", "d"], ordered=False) - self.assertRaises(TypeError, lambda: cat.min()) - self.assertRaises(TypeError, lambda: cat.max()) + pytest.raises(TypeError, lambda: cat.min()) + pytest.raises(TypeError, lambda: cat.max()) cat = Categorical(["a", "b", "c", "d"], ordered=True) _min = cat.min() _max = cat.max() @@ -1480,18 +1480,18 @@ def test_searchsorted(self): tm.assert_numpy_array_equal(res_ser, exp) # Searching for a single value that is not from the Categorical - self.assertRaises(ValueError, lambda: c1.searchsorted('cucumber')) - self.assertRaises(ValueError, lambda: s1.searchsorted('cucumber')) + pytest.raises(ValueError, lambda: c1.searchsorted('cucumber')) + pytest.raises(ValueError, lambda: s1.searchsorted('cucumber')) # Searching for multiple values one of each is not from the Categorical - self.assertRaises(ValueError, - lambda: c1.searchsorted(['bread', 'cucumber'])) - self.assertRaises(ValueError, - lambda: s1.searchsorted(['bread', 'cucumber'])) + pytest.raises(ValueError, + lambda: c1.searchsorted(['bread', 'cucumber'])) + pytest.raises(ValueError, + lambda: s1.searchsorted(['bread', 'cucumber'])) # searchsorted call for unordered Categorical - self.assertRaises(ValueError, lambda: c2.searchsorted('apple')) - self.assertRaises(ValueError, lambda: s2.searchsorted('apple')) + pytest.raises(ValueError, lambda: c2.searchsorted('apple')) + pytest.raises(ValueError, lambda: s2.searchsorted('apple')) with tm.assert_produces_warning(FutureWarning): res = c1.searchsorted(v=['bread']) @@ -1568,36 +1568,36 @@ def test_validate_inplace(self): invalid_values = [1, "True", [1, 2, 3], 5.0] for value in invalid_values: - with self.assertRaises(ValueError): + with pytest.raises(ValueError): cat.set_ordered(value=True, inplace=value) - with self.assertRaises(ValueError): + with pytest.raises(ValueError): cat.as_ordered(inplace=value) - with self.assertRaises(ValueError): + with pytest.raises(ValueError): cat.as_unordered(inplace=value) - with self.assertRaises(ValueError): + with pytest.raises(ValueError): cat.set_categories(['X', 'Y', 'Z'], rename=True, inplace=value) - with self.assertRaises(ValueError): + with pytest.raises(ValueError): cat.rename_categories(['X', 'Y', 'Z'], inplace=value) - with self.assertRaises(ValueError): + with pytest.raises(ValueError): cat.reorder_categories( ['X', 'Y', 'Z'], ordered=True, inplace=value) - with self.assertRaises(ValueError): + with pytest.raises(ValueError): cat.add_categories( new_categories=['D', 'E', 'F'], inplace=value) - with self.assertRaises(ValueError): + with pytest.raises(ValueError): cat.remove_categories(removals=['D', 'E', 'F'], inplace=value) - with self.assertRaises(ValueError): + with pytest.raises(ValueError): cat.remove_unused_categories(inplace=value) - with self.assertRaises(ValueError): + with pytest.raises(ValueError): cat.sort_values(inplace=value) @@ -1817,14 +1817,14 @@ def test_construction_frame(self): tm.assert_frame_equal(df, expected) # invalid (shape) - self.assertRaises( + pytest.raises( ValueError, lambda: DataFrame([pd.Categorical(list('abc')), pd.Categorical(list('abdefg'))])) # ndim > 1 - self.assertRaises(NotImplementedError, - lambda: pd.Categorical(np.array([list('abcd')]))) + pytest.raises(NotImplementedError, + lambda: pd.Categorical(np.array([list('abcd')]))) def test_reshaping(self): @@ -1954,15 +1954,15 @@ def test_sequence_like(self): def test_series_delegations(self): # invalid accessor - self.assertRaises(AttributeError, lambda: Series([1, 2, 3]).cat) + pytest.raises(AttributeError, lambda: Series([1, 2, 3]).cat) tm.assertRaisesRegexp( AttributeError, r"Can only use .cat accessor with a 'category' dtype", lambda: Series([1, 2, 3]).cat) - self.assertRaises(AttributeError, lambda: Series(['a', 'b', 'c']).cat) - self.assertRaises(AttributeError, lambda: Series(np.arange(5.)).cat) - self.assertRaises(AttributeError, - lambda: Series([Timestamp('20130101')]).cat) + pytest.raises(AttributeError, lambda: Series(['a', 'b', 'c']).cat) + pytest.raises(AttributeError, lambda: Series(np.arange(5.)).cat) + pytest.raises(AttributeError, + lambda: Series([Timestamp('20130101')]).cat) # Series should delegate calls to '.categories', '.codes', '.ordered' # and the methods '.set_categories()' 'drop_unused_categories()' to the @@ -2007,7 +2007,7 @@ def test_series_delegations(self): def f(): s.set_categories([4, 3, 2, 1]) - self.assertRaises(Exception, f) + pytest.raises(Exception, f) # right: s.cat.set_categories([4,3,2,1]) def test_series_functions_no_warnings(self): @@ -2795,8 +2795,8 @@ def test_groupby_sort(self): def test_min_max(self): # unordered cats have no min/max cat = Series(Categorical(["a", "b", "c", "d"], ordered=False)) - self.assertRaises(TypeError, lambda: cat.min()) - self.assertRaises(TypeError, lambda: cat.max()) + pytest.raises(TypeError, lambda: cat.min()) + pytest.raises(TypeError, lambda: cat.max()) cat = Series(Categorical(["a", "b", "c", "d"], ordered=True)) _min = cat.min() @@ -3395,7 +3395,7 @@ def f(): df = orig.copy() df.iloc[2, 0] = "c" - self.assertRaises(ValueError, f) + pytest.raises(ValueError, f) # - assign a complete row (mixed values) -> exp_single_row df = orig.copy() @@ -3407,7 +3407,7 @@ def f(): df = orig.copy() df.iloc[2, :] = ["c", 2] - self.assertRaises(ValueError, f) + pytest.raises(ValueError, f) # - assign multiple rows (mixed values) -> exp_multi_row df = orig.copy() @@ -3418,7 +3418,7 @@ def f(): df = orig.copy() df.iloc[2:4, :] = [["c", 2], ["c", 2]] - self.assertRaises(ValueError, f) + pytest.raises(ValueError, f) # assign a part of a column with dtype == categorical -> # exp_parts_cats_col @@ -3426,13 +3426,13 @@ def f(): df.iloc[2:4, 0] = pd.Categorical(["b", "b"], categories=["a", "b"]) tm.assert_frame_equal(df, exp_parts_cats_col) - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): # different categories -> not sure if this should fail or pass df = orig.copy() df.iloc[2:4, 0] = pd.Categorical( ["b", "b"], categories=["a", "b", "c"]) - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): # different values df = orig.copy() df.iloc[2:4, 0] = pd.Categorical( @@ -3444,7 +3444,7 @@ def f(): df.iloc[2:4, 0] = ["b", "b"] tm.assert_frame_equal(df, exp_parts_cats_col) - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): df.iloc[2:4, 0] = ["c", "c"] # loc @@ -3463,7 +3463,7 @@ def f(): df = orig.copy() df.loc["j", "cats"] = "c" - self.assertRaises(ValueError, f) + pytest.raises(ValueError, f) # - assign a complete row (mixed values) -> exp_single_row df = orig.copy() @@ -3475,7 +3475,7 @@ def f(): df = orig.copy() df.loc["j", :] = ["c", 2] - self.assertRaises(ValueError, f) + pytest.raises(ValueError, f) # - assign multiple rows (mixed values) -> exp_multi_row df = orig.copy() @@ -3486,7 +3486,7 @@ def f(): df = orig.copy() df.loc["j":"k", :] = [["c", 2], ["c", 2]] - self.assertRaises(ValueError, f) + pytest.raises(ValueError, f) # assign a part of a column with dtype == categorical -> # exp_parts_cats_col @@ -3495,13 +3495,13 @@ def f(): ["b", "b"], categories=["a", "b"]) tm.assert_frame_equal(df, exp_parts_cats_col) - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): # different categories -> not sure if this should fail or pass df = orig.copy() df.loc["j":"k", "cats"] = pd.Categorical( ["b", "b"], categories=["a", "b", "c"]) - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): # different values df = orig.copy() df.loc["j":"k", "cats"] = pd.Categorical( @@ -3513,7 +3513,7 @@ def f(): df.loc["j":"k", "cats"] = ["b", "b"] tm.assert_frame_equal(df, exp_parts_cats_col) - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): df.loc["j":"k", "cats"] = ["c", "c"] # loc @@ -3532,7 +3532,7 @@ def f(): df = orig.copy() df.loc["j", df.columns[0]] = "c" - self.assertRaises(ValueError, f) + pytest.raises(ValueError, f) # - assign a complete row (mixed values) -> exp_single_row df = orig.copy() @@ -3544,7 +3544,7 @@ def f(): df = orig.copy() df.loc["j", :] = ["c", 2] - self.assertRaises(ValueError, f) + pytest.raises(ValueError, f) # - assign multiple rows (mixed values) -> exp_multi_row df = orig.copy() @@ -3555,7 +3555,7 @@ def f(): df = orig.copy() df.loc["j":"k", :] = [["c", 2], ["c", 2]] - self.assertRaises(ValueError, f) + pytest.raises(ValueError, f) # assign a part of a column with dtype == categorical -> # exp_parts_cats_col @@ -3564,13 +3564,13 @@ def f(): ["b", "b"], categories=["a", "b"]) tm.assert_frame_equal(df, exp_parts_cats_col) - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): # different categories -> not sure if this should fail or pass df = orig.copy() df.loc["j":"k", df.columns[0]] = pd.Categorical( ["b", "b"], categories=["a", "b", "c"]) - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): # different values df = orig.copy() df.loc["j":"k", df.columns[0]] = pd.Categorical( @@ -3582,7 +3582,7 @@ def f(): df.loc["j":"k", df.columns[0]] = ["b", "b"] tm.assert_frame_equal(df, exp_parts_cats_col) - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): df.loc["j":"k", df.columns[0]] = ["c", "c"] # iat @@ -3595,7 +3595,7 @@ def f(): df = orig.copy() df.iat[2, 0] = "c" - self.assertRaises(ValueError, f) + pytest.raises(ValueError, f) # at # - assign a single value -> exp_single_cats_value @@ -3608,7 +3608,7 @@ def f(): df = orig.copy() df.at["j", "cats"] = "c" - self.assertRaises(ValueError, f) + pytest.raises(ValueError, f) # fancy indexing catsf = pd.Categorical(["a", "a", "c", "c", "a", "a", "a"], @@ -3633,7 +3633,7 @@ def f(): df = orig.copy() df.set_value("j", "cats", "c") - self.assertRaises(ValueError, f) + pytest.raises(ValueError, f) # Assigning a Category to parts of a int/... column uses the values of # the Catgorical @@ -3723,20 +3723,20 @@ def test_comparisons(self): def f(): cat > cat_rev - self.assertRaises(TypeError, f) + pytest.raises(TypeError, f) # categorical cannot be compared to Series or numpy array, and also # not the other way around - self.assertRaises(TypeError, lambda: cat > s) - self.assertRaises(TypeError, lambda: cat_rev > s) - self.assertRaises(TypeError, lambda: cat > a) - self.assertRaises(TypeError, lambda: cat_rev > a) + pytest.raises(TypeError, lambda: cat > s) + pytest.raises(TypeError, lambda: cat_rev > s) + pytest.raises(TypeError, lambda: cat > a) + pytest.raises(TypeError, lambda: cat_rev > a) - self.assertRaises(TypeError, lambda: s < cat) - self.assertRaises(TypeError, lambda: s < cat_rev) + pytest.raises(TypeError, lambda: s < cat) + pytest.raises(TypeError, lambda: s < cat_rev) - self.assertRaises(TypeError, lambda: a < cat) - self.assertRaises(TypeError, lambda: a < cat_rev) + pytest.raises(TypeError, lambda: a < cat) + pytest.raises(TypeError, lambda: a < cat_rev) # unequal comparison should raise for unordered cats cat = Series(Categorical(list("abc"))) @@ -3744,23 +3744,23 @@ def f(): def f(): cat > "b" - self.assertRaises(TypeError, f) + pytest.raises(TypeError, f) cat = Series(Categorical(list("abc"), ordered=False)) def f(): cat > "b" - self.assertRaises(TypeError, f) + pytest.raises(TypeError, f) # https://github.com/pandas-dev/pandas/issues/9836#issuecomment-92123057 # and following comparisons with scalars not in categories should raise # for unequal comps, but not for equal/not equal cat = Series(Categorical(list("abc"), ordered=True)) - self.assertRaises(TypeError, lambda: cat < "d") - self.assertRaises(TypeError, lambda: cat > "d") - self.assertRaises(TypeError, lambda: "d" < cat) - self.assertRaises(TypeError, lambda: "d" > cat) + pytest.raises(TypeError, lambda: cat < "d") + pytest.raises(TypeError, lambda: cat > "d") + pytest.raises(TypeError, lambda: "d" < cat) + pytest.raises(TypeError, lambda: "d" > cat) tm.assert_series_equal(cat == "d", Series([False, False, False])) tm.assert_series_equal(cat != "d", Series([True, True, True])) @@ -3818,10 +3818,10 @@ def test_cat_equality(self): self.assertTrue(((~(f == a) == (f != a)).all())) # non-equality is not comparable - self.assertRaises(TypeError, lambda: a < b) - self.assertRaises(TypeError, lambda: b < a) - self.assertRaises(TypeError, lambda: a > b) - self.assertRaises(TypeError, lambda: b > a) + pytest.raises(TypeError, lambda: a < b) + pytest.raises(TypeError, lambda: b < a) + pytest.raises(TypeError, lambda: a > b) + pytest.raises(TypeError, lambda: b > a) def test_concat_append(self): cat = pd.Categorical(["a", "b"], categories=["a", "b"]) @@ -3921,7 +3921,7 @@ def test_categorical_index_preserver(self): df3 = DataFrame({'A': a, 'B': pd.Categorical(b, categories=list('abc')) }).set_index('B') - self.assertRaises(TypeError, lambda: pd.concat([df2, df3])) + pytest.raises(TypeError, lambda: pd.concat([df2, df3])) def test_merge(self): # GH 9426 @@ -4046,7 +4046,7 @@ def test_na_actions(self): def f(): df.fillna(value={"cats": 4, "vals": "c"}) - self.assertRaises(ValueError, f) + pytest.raises(ValueError, f) res = df.fillna(method='pad') tm.assert_frame_equal(res, df_exp_fill) @@ -4104,7 +4104,7 @@ def test_astype_to_other(self): expected = s tm.assert_series_equal(s.astype('category'), expected) tm.assert_series_equal(s.astype(CategoricalDtype()), expected) - self.assertRaises(ValueError, lambda: s.astype('float64')) + pytest.raises(ValueError, lambda: s.astype('float64')) cat = Series(Categorical(['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c'])) exp = Series(['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c']) @@ -4142,7 +4142,7 @@ def cmp(a, b): # invalid conversion (these are NOT a dtype) for invalid in [lambda x: x.astype(pd.Categorical), lambda x: x.astype('object').astype(pd.Categorical)]: - self.assertRaises(TypeError, lambda: invalid(s)) + pytest.raises(TypeError, lambda: invalid(s)) def test_astype_categorical(self): @@ -4150,7 +4150,7 @@ def test_astype_categorical(self): tm.assert_categorical_equal(cat, cat.astype('category')) tm.assert_almost_equal(np.array(cat), cat.astype('object')) - self.assertRaises(ValueError, lambda: cat.astype(float)) + pytest.raises(ValueError, lambda: cat.astype(float)) def test_to_records(self): @@ -4177,28 +4177,28 @@ def test_numeric_like_ops(self): # numeric ops should not succeed for op in ['__add__', '__sub__', '__mul__', '__truediv__']: - self.assertRaises(TypeError, - lambda: getattr(self.cat, op)(self.cat)) + pytest.raises(TypeError, + lambda: getattr(self.cat, op)(self.cat)) # reduction ops should not succeed (unless specifically defined, e.g. # min/max) s = self.cat['value_group'] for op in ['kurt', 'skew', 'var', 'std', 'mean', 'sum', 'median']: - self.assertRaises(TypeError, - lambda: getattr(s, op)(numeric_only=False)) + pytest.raises(TypeError, + lambda: getattr(s, op)(numeric_only=False)) # mad technically works because it takes always the numeric data # numpy ops s = pd.Series(pd.Categorical([1, 2, 3, 4])) - self.assertRaises(TypeError, lambda: np.sum(s)) + pytest.raises(TypeError, lambda: np.sum(s)) # numeric ops on a Series for op in ['__add__', '__sub__', '__mul__', '__truediv__']: - self.assertRaises(TypeError, lambda: getattr(s, op)(2)) + pytest.raises(TypeError, lambda: getattr(s, op)(2)) # invalid ufunc - self.assertRaises(TypeError, lambda: np.log(s)) + pytest.raises(TypeError, lambda: np.log(s)) def test_cat_tab_completition(self): # test the tab completion display diff --git a/pandas/tests/test_common.py b/pandas/tests/test_common.py index 5222f8fc18520..435fca14d164f 100644 --- a/pandas/tests/test_common.py +++ b/pandas/tests/test_common.py @@ -1,5 +1,7 @@ # -*- coding: utf-8 -*- +import pytest + import numpy as np from pandas import Series, Timestamp @@ -153,10 +155,10 @@ def test_random_state(): assert com._random_state() is np.random # Error for floats or strings - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): com._random_state('test') - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): com._random_state(5.5) diff --git a/pandas/tests/test_config.py b/pandas/tests/test_config.py index c58aada193b15..f260895e74dda 100644 --- a/pandas/tests/test_config.py +++ b/pandas/tests/test_config.py @@ -1,4 +1,6 @@ # -*- coding: utf-8 -*- +import pytest + import pandas as pd import unittest import warnings @@ -40,26 +42,26 @@ def test_is_one_of_factory(self): v(12) v(None) - self.assertRaises(ValueError, v, 1.1) + pytest.raises(ValueError, v, 1.1) def test_register_option(self): self.cf.register_option('a', 1, 'doc') # can't register an already registered option - self.assertRaises(KeyError, self.cf.register_option, 'a', 1, 'doc') + pytest.raises(KeyError, self.cf.register_option, 'a', 1, 'doc') # can't register an already registered option - self.assertRaises(KeyError, self.cf.register_option, 'a.b.c.d1', 1, - 'doc') - self.assertRaises(KeyError, self.cf.register_option, 'a.b.c.d2', 1, - 'doc') + pytest.raises(KeyError, self.cf.register_option, 'a.b.c.d1', 1, + 'doc') + pytest.raises(KeyError, self.cf.register_option, 'a.b.c.d2', 1, + 'doc') # no python keywords - self.assertRaises(ValueError, self.cf.register_option, 'for', 0) - self.assertRaises(ValueError, self.cf.register_option, 'a.for.b', 0) + pytest.raises(ValueError, self.cf.register_option, 'for', 0) + pytest.raises(ValueError, self.cf.register_option, 'a.for.b', 0) # must be valid identifier (ensure attribute access works) - self.assertRaises(ValueError, self.cf.register_option, - 'Oh my Goddess!', 0) + pytest.raises(ValueError, self.cf.register_option, + 'Oh my Goddess!', 0) # we can register options several levels deep # without predefining the intermediate steps @@ -82,7 +84,7 @@ def test_describe_option(self): self.cf.register_option('l', "foo") # non-existent keys raise KeyError - self.assertRaises(KeyError, self.cf.describe_option, 'no.such.key') + pytest.raises(KeyError, self.cf.describe_option, 'no.such.key') # we can get the description for any key we registered self.assertTrue( @@ -128,7 +130,7 @@ def test_case_insensitive(self): self.assertEqual(self.cf.get_option('kAnBaN'), 2) # gets of non-existent keys fail - self.assertRaises(KeyError, self.cf.get_option, 'no_such_option') + pytest.raises(KeyError, self.cf.get_option, 'no_such_option') self.cf.deprecate_option('KanBan') self.assertTrue(self.cf._is_deprecated('kAnBaN')) @@ -144,7 +146,7 @@ def test_get_option(self): self.assertTrue(self.cf.get_option('b.b') is None) # gets of non-existent keys fail - self.assertRaises(KeyError, self.cf.get_option, 'no_such_option') + pytest.raises(KeyError, self.cf.get_option, 'no_such_option') def test_set_option(self): self.cf.register_option('a', 1, 'doc') @@ -163,16 +165,16 @@ def test_set_option(self): self.assertEqual(self.cf.get_option('b.c'), 'wurld') self.assertEqual(self.cf.get_option('b.b'), 1.1) - self.assertRaises(KeyError, self.cf.set_option, 'no.such.key', None) + pytest.raises(KeyError, self.cf.set_option, 'no.such.key', None) def test_set_option_empty_args(self): - self.assertRaises(ValueError, self.cf.set_option) + pytest.raises(ValueError, self.cf.set_option) def test_set_option_uneven_args(self): - self.assertRaises(ValueError, self.cf.set_option, 'a.b', 2, 'b.c') + pytest.raises(ValueError, self.cf.set_option, 'a.b', 2, 'b.c') def test_set_option_invalid_single_argument_type(self): - self.assertRaises(ValueError, self.cf.set_option, 2) + pytest.raises(ValueError, self.cf.set_option, 2) def test_set_option_multiple(self): self.cf.register_option('a', 1, 'doc') @@ -193,23 +195,23 @@ def test_validation(self): self.cf.register_option('a', 1, 'doc', validator=self.cf.is_int) self.cf.register_option('b.c', 'hullo', 'doc2', validator=self.cf.is_text) - self.assertRaises(ValueError, self.cf.register_option, 'a.b.c.d2', - 'NO', 'doc', validator=self.cf.is_int) + pytest.raises(ValueError, self.cf.register_option, 'a.b.c.d2', + 'NO', 'doc', validator=self.cf.is_int) self.cf.set_option('a', 2) # int is_int self.cf.set_option('b.c', 'wurld') # str is_str - self.assertRaises( + pytest.raises( ValueError, self.cf.set_option, 'a', None) # None not is_int - self.assertRaises(ValueError, self.cf.set_option, 'a', 'ab') - self.assertRaises(ValueError, self.cf.set_option, 'b.c', 1) + pytest.raises(ValueError, self.cf.set_option, 'a', 'ab') + pytest.raises(ValueError, self.cf.set_option, 'b.c', 1) validator = self.cf.is_one_of_factory([None, self.cf.is_callable]) self.cf.register_option('b', lambda: None, 'doc', validator=validator) self.cf.set_option('b', '%.1f'.format) # Formatter is callable self.cf.set_option('b', None) # Formatter is none (default) - self.assertRaises(ValueError, self.cf.set_option, 'b', '%.1f') + pytest.raises(ValueError, self.cf.set_option, 'b', '%.1f') def test_reset_option(self): self.cf.register_option('a', 1, 'doc', validator=self.cf.is_int) @@ -279,7 +281,7 @@ def test_deprecate_option(self): self.assertTrue( 'nifty_ver' in str(w[-1])) # with the removal_ver quoted - self.assertRaises( + pytest.raises( KeyError, self.cf.deprecate_option, 'a') # can't depr. twice self.cf.deprecate_option('b.c', 'zounds!') @@ -415,8 +417,8 @@ def f3(key): self.cf.reset_option("a") self.assertEqual(options.a, self.cf.get_option("a", 0)) - self.assertRaises(KeyError, f) - self.assertRaises(KeyError, f2) + pytest.raises(KeyError, f) + pytest.raises(KeyError, f2) # make sure callback kicks when using this form of setting options.c = 1 diff --git a/pandas/tests/test_lib.py b/pandas/tests/test_lib.py index 481b1ca3cb72a..5c3e6adb48808 100644 --- a/pandas/tests/test_lib.py +++ b/pandas/tests/test_lib.py @@ -24,8 +24,8 @@ def test_max_len_string_array(self): self.assertTrue(lib.max_len_string_array(arr), 3) # raises - tm.assertRaises(TypeError, - lambda: lib.max_len_string_array(arr.astype('U'))) + pytest.raises(TypeError, + lambda: lib.max_len_string_array(arr.astype('U'))) def test_fast_unique_multiple_list_gen_sort(self): keys = [['p', 'a'], ['n', 'd'], ['a', 's']] diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index d56366ca8a545..d7ba7f1c6fac6 100755 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -272,14 +272,14 @@ def test_series_getitem(self): tm.assert_series_equal(result, expected) # key error - self.assertRaises(KeyError, s.__getitem__, (2000, 3, 4)) + pytest.raises(KeyError, s.__getitem__, (2000, 3, 4)) def test_series_getitem_corner(self): s = self.ymd['A'] # don't segfault, GH #495 # out of bounds access - self.assertRaises(IndexError, s.__getitem__, len(self.ymd)) + pytest.raises(IndexError, s.__getitem__, len(self.ymd)) # generator result = s[(x > 0 for x in s)] @@ -509,7 +509,7 @@ def test_xs_level(self): def f(x): x[:] = 10 - self.assertRaises(com.SettingWithCopyError, f, result) + pytest.raises(com.SettingWithCopyError, f, result) def test_xs_level_multiple(self): from pandas import read_table @@ -533,7 +533,7 @@ def test_xs_level_multiple(self): def f(x): x[:] = 10 - self.assertRaises(com.SettingWithCopyError, f, result) + pytest.raises(com.SettingWithCopyError, f, result) # GH2107 dates = lrange(20111201, 20111205) @@ -574,7 +574,7 @@ def test_xs_level_series(self): # not implementing this for now - self.assertRaises(TypeError, s.__getitem__, (2000, slice(3, 4))) + pytest.raises(TypeError, s.__getitem__, (2000, slice(3, 4))) # result = s[2000, 3:4] # lv =s.index.get_level_values(1) @@ -641,7 +641,7 @@ def test_getitem_int(self): tm.assert_frame_equal(result, expected) # raises exception - self.assertRaises(KeyError, frame.loc.__getitem__, 3) + pytest.raises(KeyError, frame.loc.__getitem__, 3) # however this will work result = self.frame.iloc[2] @@ -709,8 +709,8 @@ def test_getitem_partial_column_select(self): result = df.ix[('a', 'y'), [1, 0]] tm.assert_frame_equal(result, expected) - self.assertRaises(KeyError, df.loc.__getitem__, - (('a', 'foo'), slice(None, None))) + pytest.raises(KeyError, df.loc.__getitem__, + (('a', 'foo'), slice(None, None))) def test_delevel_infer_dtype(self): tuples = [tuple @@ -1323,7 +1323,7 @@ def f(): df['foo']['one'] = 2 return df - self.assertRaises(com.SettingWithCopyError, f) + pytest.raises(com.SettingWithCopyError, f) try: df = f() @@ -1356,8 +1356,8 @@ def test_count(self): tm.assert_series_equal(result, expect, check_names=False) self.assertEqual(result.index.name, 'a') - self.assertRaises(KeyError, series.count, 'x') - self.assertRaises(KeyError, frame.count, level='x') + pytest.raises(KeyError, series.count, 'x') + pytest.raises(KeyError, frame.count, level='x') AGG_FUNCTIONS = ['sum', 'prod', 'min', 'max', 'median', 'mean', 'skew', 'mad', 'std', 'var', 'sem'] @@ -1552,8 +1552,8 @@ def test_unstack_group_index_overflow(self): self.assertEqual(result.shape, (500, 2)) def test_getitem_lowerdim_corner(self): - self.assertRaises(KeyError, self.frame.loc.__getitem__, - (('bar', 'three'), 'B')) + pytest.raises(KeyError, self.frame.loc.__getitem__, + (('bar', 'three'), 'B')) # in theory should be inserting in a sorted space???? self.frame.loc[('bar', 'three'), 'B'] = 0 @@ -1575,8 +1575,8 @@ def test_partial_ix_missing(self): # self.assertTrue((self.ymd.loc[2000]['A'] == 0).all()) # Pretty sure the second (and maybe even the first) is already wrong. - self.assertRaises(Exception, self.ymd.loc.__getitem__, (2000, 6)) - self.assertRaises(Exception, self.ymd.loc.__getitem__, (2000, 6), 0) + pytest.raises(Exception, self.ymd.loc.__getitem__, (2000, 6)) + pytest.raises(Exception, self.ymd.loc.__getitem__, (2000, 6), 0) # --------------------------------------------------------------------- @@ -1600,7 +1600,7 @@ def test_level_with_tuples(self): tm.assert_series_equal(result, expected) tm.assert_series_equal(result2, expected) - self.assertRaises(KeyError, series.__getitem__, (('foo', 'bar', 0), 2)) + pytest.raises(KeyError, series.__getitem__, (('foo', 'bar', 0), 2)) result = frame.loc[('foo', 'bar', 0)] result2 = frame.xs(('foo', 'bar', 0)) diff --git a/pandas/tests/test_nanops.py b/pandas/tests/test_nanops.py index 20a9238310ccf..1aad2f5224c0d 100644 --- a/pandas/tests/test_nanops.py +++ b/pandas/tests/test_nanops.py @@ -3,6 +3,8 @@ from functools import partial +import pytest + import warnings import numpy as np from pandas import Series, isnull, _np_version_under1p9 @@ -774,7 +776,7 @@ def test_ndarray(self): # Test non-convertible string ndarray s_values = np.array(['foo', 'bar', 'baz'], dtype=object) - self.assertRaises(ValueError, lambda: nanops._ensure_numeric(s_values)) + pytest.raises(ValueError, lambda: nanops._ensure_numeric(s_values)) def test_convertable_values(self): self.assertTrue(np.allclose(nanops._ensure_numeric('1'), 1.0), @@ -785,9 +787,9 @@ def test_convertable_values(self): 'Failed for convertible complex string') def test_non_convertable_values(self): - self.assertRaises(TypeError, lambda: nanops._ensure_numeric('foo')) - self.assertRaises(TypeError, lambda: nanops._ensure_numeric({})) - self.assertRaises(TypeError, lambda: nanops._ensure_numeric([])) + pytest.raises(TypeError, lambda: nanops._ensure_numeric('foo')) + pytest.raises(TypeError, lambda: nanops._ensure_numeric({})) + pytest.raises(TypeError, lambda: nanops._ensure_numeric([])) class TestNanvarFixedValues(tm.TestCase): diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index fbd8aa72c941d..703c8cc80e8f9 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -48,7 +48,7 @@ def test_pickle(self): def test_rank(self): with catch_warnings(record=True): - self.assertRaises(NotImplementedError, lambda: self.panel.rank()) + pytest.raises(NotImplementedError, lambda: self.panel.rank()) def test_cumsum(self): with catch_warnings(record=True): @@ -59,8 +59,8 @@ def not_hashable(self): with catch_warnings(record=True): c_empty = Panel() c = Panel(Panel([[[1]]])) - self.assertRaises(TypeError, hash, c_empty) - self.assertRaises(TypeError, hash, c) + pytest.raises(TypeError, hash, c_empty) + pytest.raises(TypeError, hash, c) class SafeForLongAndSparse(object): @@ -176,7 +176,7 @@ def wrapper(x): if not tm._incompat_bottleneck_version(name): assert_frame_equal(result, obj.apply(skipna_wrapper, axis=i)) - self.assertRaises(Exception, f, axis=obj.ndim) + pytest.raises(Exception, f, axis=obj.ndim) # Unimplemented numeric_only parameter. if 'numeric_only' in signature(f).args: @@ -287,8 +287,8 @@ def test_arith(self): self._test_op(self.panel, lambda x, y: x / y) # panel / 1 self._test_op(self.panel, lambda x, y: x ** y) # panel ** 1 - self.assertRaises(Exception, self.panel.__add__, - self.panel['ItemA']) + pytest.raises(Exception, self.panel.__add__, + self.panel['ItemA']) @staticmethod def _test_op(panel, op): @@ -375,7 +375,7 @@ def test_raise_when_not_implemented(self): ops = ['add', 'sub', 'mul', 'truediv', 'floordiv', 'div', 'mod', 'pow'] for op in ops: - with self.assertRaises(NotImplementedError): + with pytest.raises(NotImplementedError): getattr(p, op)(d, axis=0) def test_select(self): @@ -440,7 +440,7 @@ def test_abs(self): class CheckIndexing(object): def test_getitem(self): - self.assertRaises(Exception, self.panel.__getitem__, 'ItemQ') + pytest.raises(Exception, self.panel.__getitem__, 'ItemQ') def test_delitem_and_pop(self): with catch_warnings(record=True): @@ -451,7 +451,7 @@ def test_delitem_and_pop(self): del self.panel['ItemB'] self.assertNotIn('ItemB', self.panel.items) - self.assertRaises(Exception, self.panel.__delitem__, 'ItemB') + pytest.raises(Exception, self.panel.__delitem__, 'ItemB') values = np.empty((3, 3, 3)) values[0] = 0 @@ -482,7 +482,7 @@ def test_setitem(self): # LongPanel with one item lp = self.panel.filter(['ItemA', 'ItemB']).to_frame() - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): self.panel['ItemE'] = lp # DataFrame @@ -509,8 +509,8 @@ def test_setitem(self): self.panel['ItemP'] = self.panel['ItemA'] > 0 self.assertEqual(self.panel['ItemP'].values.dtype, np.bool_) - self.assertRaises(TypeError, self.panel.__setitem__, 'foo', - self.panel.loc[['ItemP']]) + pytest.raises(TypeError, self.panel.__setitem__, 'foo', + self.panel.loc[['ItemP']]) # bad shape p = Panel(np.random.randn(4, 3, 2)) @@ -564,7 +564,7 @@ def test_major_xs(self): # not contained idx = self.panel.major_axis[0] - BDay() - self.assertRaises(Exception, self.panel.major_xs, idx) + pytest.raises(Exception, self.panel.major_xs, idx) def test_major_xs_mixed(self): with catch_warnings(record=True): @@ -583,7 +583,7 @@ def test_minor_xs(self): assert_series_equal(xs['ItemA'], ref[idx], check_names=False) # not contained - self.assertRaises(Exception, self.panel.minor_xs, 'E') + pytest.raises(Exception, self.panel.minor_xs, 'E') def test_minor_xs_mixed(self): with catch_warnings(record=True): @@ -979,7 +979,7 @@ def test_constructor_cast(self): # can't cast data = [[['foo', 'bar', 'baz']]] - self.assertRaises(ValueError, Panel, data, dtype=float) + pytest.raises(ValueError, Panel, data, dtype=float) def test_constructor_empty_panel(self): with catch_warnings(record=True): @@ -1122,10 +1122,10 @@ def test_constructor_dict_mixed(self): # corner, blow up data['ItemB'] = data['ItemB'][:-1] - self.assertRaises(Exception, Panel, data) + pytest.raises(Exception, Panel, data) data['ItemB'] = self.panel['ItemB'].values[:, :-1] - self.assertRaises(Exception, Panel, data) + pytest.raises(Exception, Panel, data) def test_ctor_orderedDict(self): with catch_warnings(record=True): @@ -1238,7 +1238,7 @@ def test_astype(self): expected = Panel(str_data, ['a', 'b'], ['c', 'd'], ['e', 'f']) assert_panel_equal(panel.astype(str), expected) - self.assertRaises(NotImplementedError, panel.astype, {0: str}) + pytest.raises(NotImplementedError, panel.astype, {0: str}) def test_apply(self): with catch_warnings(record=True): @@ -1409,9 +1409,9 @@ def test_reindex(self): assert_frame_equal(result['ItemB'], ref.reindex(index=new_major)) # raise exception put both major and major_axis - self.assertRaises(Exception, self.panel.reindex, - major_axis=new_major, - major=new_major) + pytest.raises(Exception, self.panel.reindex, + major_axis=new_major, + major=new_major) # minor new_minor = list(self.panel.minor_axis[:2]) @@ -1511,7 +1511,7 @@ def test_take(self): result = self.panel.take([3, -1, 1, 2], axis=2) assert_panel_equal(result, expected) - self.assertRaises(Exception, self.panel.take, [4, 0, 1, 2], axis=2) + pytest.raises(Exception, self.panel.take, [4, 0, 1, 2], axis=2) def test_sort_index(self): with catch_warnings(record=True): @@ -1563,17 +1563,17 @@ def test_fillna(self): filled = empty.fillna(0) assert_panel_equal(filled, empty) - self.assertRaises(ValueError, self.panel.fillna) - self.assertRaises(ValueError, self.panel.fillna, 5, method='ffill') + pytest.raises(ValueError, self.panel.fillna) + pytest.raises(ValueError, self.panel.fillna, 5, method='ffill') - self.assertRaises(TypeError, self.panel.fillna, [1, 2]) - self.assertRaises(TypeError, self.panel.fillna, (1, 2)) + pytest.raises(TypeError, self.panel.fillna, [1, 2]) + pytest.raises(TypeError, self.panel.fillna, (1, 2)) # limit not implemented when only value is specified p = Panel(np.random.randn(3, 4, 5)) p.iloc[0:2, 0:2, 0:2] = np.nan - self.assertRaises(NotImplementedError, - lambda: p.fillna(999, limit=1)) + pytest.raises(NotImplementedError, + lambda: p.fillna(999, limit=1)) # Test in place fillNA # Expected result @@ -1680,7 +1680,7 @@ def test_transpose(self): result = self.panel.transpose(2, 0, 1) assert_panel_equal(result, expected) - self.assertRaises(ValueError, self.panel.transpose, 0, 0, 1) + pytest.raises(ValueError, self.panel.transpose, 0, 0, 1) def test_transpose_copy(self): with catch_warnings(record=True): @@ -2014,7 +2014,7 @@ def test_tshift(self): assert_panel_equal(unshifted, inferred_ts) no_freq = panel.iloc[:, [0, 5, 7], :] - self.assertRaises(ValueError, no_freq.tshift) + pytest.raises(ValueError, no_freq.tshift) def test_pct_change(self): with catch_warnings(record=True): @@ -2276,7 +2276,7 @@ def check_drop(drop_val, axis_number, aliases, expected): expected = Panel({"One": df}) check_drop('Two', 0, ['items'], expected) - self.assertRaises(ValueError, panel.drop, 'Three') + pytest.raises(ValueError, panel.drop, 'Three') # errors = 'ignore' dropped = panel.drop('Three', errors='ignore') @@ -2405,8 +2405,8 @@ def test_update_raise(self): [1.5, np.nan, 3.], [1.5, np.nan, 3.]]]) - self.assertRaises(Exception, pan.update, *(pan, ), - **{'raise_conflict': True}) + pytest.raises(Exception, pan.update, *(pan, ), + **{'raise_conflict': True}) def test_all_any(self): self.assertTrue((self.panel.all(axis=0).values == nanall( @@ -2423,8 +2423,8 @@ def test_all_any(self): self.panel, axis=2).T).all()) def test_all_any_unhandled(self): - self.assertRaises(NotImplementedError, self.panel.all, bool_only=True) - self.assertRaises(NotImplementedError, self.panel.any, bool_only=True) + pytest.raises(NotImplementedError, self.panel.all, bool_only=True) + pytest.raises(NotImplementedError, self.panel.any, bool_only=True) class TestLongPanel(tm.TestCase): @@ -2580,8 +2580,8 @@ def test_truncate(self): assert_panel_equal(wp_trunc, lp_trunc.to_panel()) # throw proper exception - self.assertRaises(Exception, lp2.truncate, wp.major_axis[-2], - wp.major_axis[2]) + pytest.raises(Exception, lp2.truncate, wp.major_axis[-2], + wp.major_axis[2]) def test_axis_dummies(self): from pandas.core.reshape.reshape import make_axis_dummies @@ -2650,8 +2650,8 @@ def test_join(self): self.assertEqual(len(joined.columns), 3) - self.assertRaises(Exception, lp1.join, - self.panel.filter(['ItemB', 'ItemC'])) + pytest.raises(Exception, lp1.join, + self.panel.filter(['ItemB', 'ItemC'])) def test_pivot(self): with catch_warnings(record=True): @@ -2672,7 +2672,7 @@ def test_pivot(self): a, b, c = (np.array([1, 2, 3, 4, 4]), np.array(['a', 'a', 'a', 'a', 'a']), np.array([1., 2., 3., 4., 5.])) - self.assertRaises(Exception, pivot, a, b, c) + pytest.raises(Exception, pivot, a, b, c) # corner case, empty df = pivot(np.array([]), np.array([]), np.array([])) diff --git a/pandas/tests/test_panel4d.py b/pandas/tests/test_panel4d.py index 8dfbe547e15fb..f55452b44731c 100644 --- a/pandas/tests/test_panel4d.py +++ b/pandas/tests/test_panel4d.py @@ -145,7 +145,7 @@ def wrapper(x): expected = obj.apply(skipna_wrapper, axis=i) assert_panel_equal(result, expected) - self.assertRaises(Exception, f, axis=obj.ndim) + pytest.raises(Exception, f, axis=obj.ndim) class SafeForSparse(object): @@ -220,8 +220,8 @@ def test_arith(self): self._test_op(self.panel4d, lambda x, y: y / x) self._test_op(self.panel4d, lambda x, y: y ** x) - self.assertRaises(Exception, self.panel4d.__add__, - self.panel4d['l1']) + pytest.raises(Exception, self.panel4d.__add__, + self.panel4d['l1']) @staticmethod def _test_op(panel4d, op): @@ -307,7 +307,7 @@ def test_abs(self): class CheckIndexing(object): def test_getitem(self): - self.assertRaises(Exception, self.panel4d.__getitem__, 'ItemQ') + pytest.raises(Exception, self.panel4d.__getitem__, 'ItemQ') def test_delitem_and_pop(self): @@ -319,7 +319,7 @@ def test_delitem_and_pop(self): del self.panel4d['l3'] self.assertNotIn('l3', self.panel4d.labels) - self.assertRaises(Exception, self.panel4d.__delitem__, 'l3') + pytest.raises(Exception, self.panel4d.__delitem__, 'l3') values = np.empty((4, 4, 4, 4)) values[0] = 0 @@ -395,7 +395,7 @@ def test_setitem_by_indexer(self): def func(): self.panel4d.iloc[0] = p - self.assertRaises(NotImplementedError, func) + pytest.raises(NotImplementedError, func) # DataFrame panel4dc = self.panel4d.copy() @@ -478,7 +478,7 @@ def test_major_xs(self): # not contained idx = self.panel4d.major_axis[0] - BDay() - self.assertRaises(Exception, self.panel4d.major_xs, idx) + pytest.raises(Exception, self.panel4d.major_xs, idx) def test_major_xs_mixed(self): self.panel4d['l4'] = 'foo' @@ -497,7 +497,7 @@ def test_minor_xs(self): assert_series_equal(xs['l1'].T['ItemA'], ref[idx], check_names=False) # not contained - self.assertRaises(Exception, self.panel4d.minor_xs, 'E') + pytest.raises(Exception, self.panel4d.minor_xs, 'E') def test_minor_xs_mixed(self): self.panel4d['l4'] = 'foo' @@ -677,7 +677,7 @@ def test_constructor_cast(self): # can't cast data = [[['foo', 'bar', 'baz']]] - self.assertRaises(ValueError, Panel, data, dtype=float) + pytest.raises(ValueError, Panel, data, dtype=float) def test_consolidate(self): with catch_warnings(record=True): @@ -724,10 +724,10 @@ def test_constructor_dict_mixed(self): # corner, blow up data['l2'] = data['l2']['ItemB'] - self.assertRaises(Exception, Panel4D, data) + pytest.raises(Exception, Panel4D, data) data['l2'] = self.panel4d['l2'].values[:, :, :-1] - self.assertRaises(Exception, Panel4D, data) + pytest.raises(Exception, Panel4D, data) def test_constructor_resize(self): with catch_warnings(record=True): @@ -786,8 +786,8 @@ def test_reindex(self): result['l2']['ItemB'], ref['ItemB'].reindex(index=new_major)) # raise exception put both major and major_axis - self.assertRaises(Exception, self.panel4d.reindex, - major_axis=new_major, major=new_major) + pytest.raises(Exception, self.panel4d.reindex, + major_axis=new_major, major=new_major) # minor new_minor = list(self.panel4d.minor_axis[:2]) @@ -824,8 +824,8 @@ def test_reindex(self): def test_not_hashable(self): with catch_warnings(record=True): p4D_empty = Panel4D() - self.assertRaises(TypeError, hash, p4D_empty) - self.assertRaises(TypeError, hash, self.panel4d) + pytest.raises(TypeError, hash, p4D_empty) + pytest.raises(TypeError, hash, self.panel4d) def test_reindex_like(self): # reindex_like @@ -861,8 +861,8 @@ def test_fillna(self): filled = self.panel4d.fillna(0) self.assertTrue(np.isfinite(filled.values).all()) - self.assertRaises(NotImplementedError, - self.panel4d.fillna, method='pad') + pytest.raises(NotImplementedError, + self.panel4d.fillna, method='pad') def test_swapaxes(self): with catch_warnings(record=True): diff --git a/pandas/tests/test_panelnd.py b/pandas/tests/test_panelnd.py index 7ecc773cd7bea..33c37e9c8feb2 100644 --- a/pandas/tests/test_panelnd.py +++ b/pandas/tests/test_panelnd.py @@ -1,4 +1,6 @@ # -*- coding: utf-8 -*- +import pytest + from warnings import catch_warnings from pandas.core import panelnd from pandas.core.panel import Panel @@ -47,18 +49,18 @@ def test_4d_construction_alt(self): def test_4d_construction_error(self): # create a 4D - self.assertRaises(Exception, - panelnd.create_nd_panel_factory, - klass_name='Panel4D', - orders=['labels', 'items', 'major_axis', - 'minor_axis'], - slices={'items': 'items', - 'major_axis': 'major_axis', - 'minor_axis': 'minor_axis'}, - slicer='foo', - aliases={'major': 'major_axis', - 'minor': 'minor_axis'}, - stat_axis=2) + pytest.raises(Exception, + panelnd.create_nd_panel_factory, + klass_name='Panel4D', + orders=['labels', 'items', 'major_axis', + 'minor_axis'], + slices={'items': 'items', + 'major_axis': 'major_axis', + 'minor_axis': 'minor_axis'}, + slicer='foo', + aliases={'major': 'major_axis', + 'minor': 'minor_axis'}, + stat_axis=2) def test_5d_construction(self): diff --git a/pandas/tests/test_resample.py b/pandas/tests/test_resample.py index 2a8696af36268..e5795eea12135 100644 --- a/pandas/tests/test_resample.py +++ b/pandas/tests/test_resample.py @@ -4,6 +4,7 @@ from datetime import datetime, timedelta from functools import partial +import pytest import numpy as np import pandas as pd @@ -107,18 +108,18 @@ def test_api_changes_v018(self): # invalids as these can be setting operations r = self.series.resample('H') - self.assertRaises(ValueError, lambda: r.iloc[0]) - self.assertRaises(ValueError, lambda: r.iat[0]) - self.assertRaises(ValueError, lambda: r.loc[0]) - self.assertRaises(ValueError, lambda: r.loc[ + pytest.raises(ValueError, lambda: r.iloc[0]) + pytest.raises(ValueError, lambda: r.iat[0]) + pytest.raises(ValueError, lambda: r.loc[0]) + pytest.raises(ValueError, lambda: r.loc[ Timestamp('2013-01-01 00:00:00', offset='H')]) - self.assertRaises(ValueError, lambda: r.at[ + pytest.raises(ValueError, lambda: r.at[ Timestamp('2013-01-01 00:00:00', offset='H')]) def f(): r[0] = 5 - self.assertRaises(ValueError, f) + pytest.raises(ValueError, f) # str/repr r = self.series.resample('H') @@ -178,7 +179,7 @@ def f(): df = self.series.to_frame('foo') # same as prior versions for DataFrame - self.assertRaises(KeyError, lambda: df.resample('H')[0]) + pytest.raises(KeyError, lambda: df.resample('H')[0]) # compat for Series # but we cannot be sure that we need a warning here @@ -268,9 +269,9 @@ def test_getitem(self): def test_select_bad_cols(self): g = self.frame.resample('H') - self.assertRaises(KeyError, g.__getitem__, ['D']) + pytest.raises(KeyError, g.__getitem__, ['D']) - self.assertRaises(KeyError, g.__getitem__, ['A', 'D']) + pytest.raises(KeyError, g.__getitem__, ['A', 'D']) with tm.assertRaisesRegexp(KeyError, '^[^A]+$'): # A should not be referenced as a bad column... # will have to rethink regex if you change message! @@ -283,13 +284,13 @@ def test_attribute_access(self): # getting with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - self.assertRaises(AttributeError, lambda: r.F) + pytest.raises(AttributeError, lambda: r.F) # setting def f(): r.F = 'bah' - self.assertRaises(ValueError, f) + pytest.raises(ValueError, f) def test_api_compat_before_use(self): @@ -371,7 +372,7 @@ def test_fillna(self): result = r.fillna(method='bfill') assert_series_equal(result, expected) - with self.assertRaises(ValueError): + with pytest.raises(ValueError): r.fillna(0) def test_apply_without_aggregation(self): @@ -598,7 +599,7 @@ def f(): t[['A']].agg({'A': ['sum', 'std'], 'B': ['mean', 'std']}) - self.assertRaises(SpecificationError, f) + pytest.raises(SpecificationError, f) def test_agg_nested_dicts(self): @@ -625,7 +626,7 @@ def test_agg_nested_dicts(self): def f(): t.aggregate({'r1': {'A': ['mean', 'sum']}, 'r2': {'B': ['mean', 'sum']}}) - self.assertRaises(ValueError, f) + pytest.raises(ValueError, f) for t in cases: expected = pd.concat([t['A'].mean(), t['A'].std(), t['B'].mean(), @@ -658,23 +659,23 @@ def test_selection_api_validation(self): index=index) # non DatetimeIndex - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): df.resample('2D', level='v') - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): df.resample('2D', on='date', level='d') - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): df.resample('2D', on=['a', 'date']) - with tm.assertRaises(KeyError): + with pytest.raises(KeyError): df.resample('2D', level=['a', 'date']) # upsampling not allowed - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): df.resample('2D', level='d').asfreq() - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): df.resample('2D', on='date').asfreq() exp = df_exp.resample('2D').sum() @@ -754,7 +755,7 @@ def test_resample_interpolate(self): def test_raises_on_non_datetimelike_index(self): # this is a non datetimelike index xp = DataFrame() - self.assertRaises(TypeError, lambda: xp.resample('A').mean()) + pytest.raises(TypeError, lambda: xp.resample('A').mean()) def test_resample_empty_series(self): # GH12771 & GH12868 @@ -839,7 +840,7 @@ def test_resample_loffset_arg_type(self): # GH 13022, 7687 - TODO: fix resample w/ TimedeltaIndex if isinstance(expected.index, TimedeltaIndex): - with tm.assertRaises(AssertionError): + with pytest.raises(AssertionError): assert_frame_equal(result_agg, expected) assert_frame_equal(result_how, expected) else: @@ -1476,7 +1477,7 @@ def test_asfreq_non_unique(self): rng2 = rng.repeat(2).values ts = Series(np.random.randn(len(rng2)), index=rng2) - self.assertRaises(Exception, ts.asfreq, 'B') + pytest.raises(Exception, ts.asfreq, 'B') def test_resample_axis1(self): rng = date_range('1/1/2000', '2/29/2000') @@ -2252,10 +2253,10 @@ def test_selection(self): np.arange(len(index), dtype=np.int64), index], names=['v', 'd'])) - with tm.assertRaises(NotImplementedError): + with pytest.raises(NotImplementedError): df.resample('2D', on='date') - with tm.assertRaises(NotImplementedError): + with pytest.raises(NotImplementedError): df.resample('2D', level='d') def test_annual_upsample_D_s_f(self): @@ -2318,10 +2319,10 @@ def test_basic_downsample(self): def test_not_subperiod(self): # These are incompatible period rules for resampling ts = _simple_pts('1/1/1990', '6/30/1995', freq='w-wed') - self.assertRaises(ValueError, lambda: ts.resample('a-dec').mean()) - self.assertRaises(ValueError, lambda: ts.resample('q-mar').mean()) - self.assertRaises(ValueError, lambda: ts.resample('M').mean()) - self.assertRaises(ValueError, lambda: ts.resample('w-thu').mean()) + pytest.raises(ValueError, lambda: ts.resample('a-dec').mean()) + pytest.raises(ValueError, lambda: ts.resample('q-mar').mean()) + pytest.raises(ValueError, lambda: ts.resample('M').mean()) + pytest.raises(ValueError, lambda: ts.resample('w-thu').mean()) def test_basic_upsample(self): ts = _simple_pts('1/1/1990', '6/30/1995', freq='M') @@ -2422,7 +2423,7 @@ def test_resample_same_freq(self): def test_resample_incompat_freq(self): - with self.assertRaises(IncompatibleFrequency): + with pytest.raises(IncompatibleFrequency): pd.Series(range(3), index=pd.period_range( start='2000', periods=3, freq='M')).resample('W').mean() @@ -2548,7 +2549,7 @@ def test_resample_fill_missing(self): def test_cant_fill_missing_dups(self): rng = PeriodIndex([2000, 2005, 2005, 2007, 2007], freq='A') s = Series(np.random.randn(5), index=rng) - self.assertRaises(Exception, lambda: s.resample('A').ffill()) + pytest.raises(Exception, lambda: s.resample('A').ffill()) def test_resample_5minute(self): rng = period_range('1/1/2000', '1/5/2000', freq='T') diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index 1aa69ce6b231e..107720d90e489 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -2,6 +2,7 @@ # pylint: disable-msg=E1101,W0612 from datetime import datetime, timedelta +import pytest import re from numpy import nan as NA @@ -430,7 +431,7 @@ def test_replace(self): for repl in (None, 3, {'a': 'b'}): for data in (['a', 'b', None], ['a', 'b', 'c', 'ad']): values = klass(data) - self.assertRaises(TypeError, values.str.replace, 'a', repl) + pytest.raises(TypeError, values.str.replace, 'a', repl) def test_replace_callable(self): # GH 15055 @@ -583,8 +584,8 @@ def test_match(self): with tm.assert_produces_warning(FutureWarning): result = values.str.match('.*(BAD[_]+).*(BAD)', as_indexer=True) tm.assert_series_equal(result, exp) - self.assertRaises(ValueError, values.str.match, '.*(BAD[_]+).*(BAD)', - as_indexer=False) + pytest.raises(ValueError, values.str.match, '.*(BAD[_]+).*(BAD)', + as_indexer=False) # mixed mixed = Series(['aBAD_BAD', NA, 'BAD_b_BAD', True, datetime.today(), @@ -655,11 +656,11 @@ def test_extract_expand_False(self): # no groups s_or_idx = klass(['A1', 'B2', 'C3']) f = lambda: s_or_idx.str.extract('[ABC][123]', expand=False) - self.assertRaises(ValueError, f) + pytest.raises(ValueError, f) # only non-capturing groups f = lambda: s_or_idx.str.extract('(?:[AB]).*', expand=False) - self.assertRaises(ValueError, f) + pytest.raises(ValueError, f) # single group renames series/index properly s_or_idx = klass(['A1', 'A2']) @@ -800,11 +801,11 @@ def test_extract_expand_True(self): # no groups s_or_idx = klass(['A1', 'B2', 'C3']) f = lambda: s_or_idx.str.extract('[ABC][123]', expand=True) - self.assertRaises(ValueError, f) + pytest.raises(ValueError, f) # only non-capturing groups f = lambda: s_or_idx.str.extract('(?:[AB]).*', expand=True) - self.assertRaises(ValueError, f) + pytest.raises(ValueError, f) # single group renames series/index properly s_or_idx = klass(['A1', 'A2']) @@ -2619,7 +2620,7 @@ def test_encode_decode(self): def test_encode_decode_errors(self): encodeBase = Series([u('a'), u('b'), u('a\x9d')]) - self.assertRaises(UnicodeEncodeError, encodeBase.str.encode, 'cp1252') + pytest.raises(UnicodeEncodeError, encodeBase.str.encode, 'cp1252') f = lambda x: x.encode('cp1252', 'ignore') result = encodeBase.str.encode('cp1252', 'ignore') @@ -2628,7 +2629,7 @@ def test_encode_decode_errors(self): decodeBase = Series([b'a', b'b', b'a\x9d']) - self.assertRaises(UnicodeDecodeError, decodeBase.str.decode, 'cp1252') + pytest.raises(UnicodeDecodeError, decodeBase.str.decode, 'cp1252') f = lambda x: x.decode('cp1252', 'ignore') result = decodeBase.str.decode('cp1252', 'ignore') @@ -2745,7 +2746,7 @@ def test_method_on_bytes(self): lhs = Series(np.array(list('abc'), 'S1').astype(object)) rhs = Series(np.array(list('def'), 'S1').astype(object)) if compat.PY3: - self.assertRaises(TypeError, lhs.str.cat, rhs) + pytest.raises(TypeError, lhs.str.cat, rhs) else: result = lhs.str.cat(rhs) expected = Series(np.array( diff --git a/pandas/tests/test_testing.py b/pandas/tests/test_testing.py index fc9a828c18f0e..4a9e4f4fbd37a 100644 --- a/pandas/tests/test_testing.py +++ b/pandas/tests/test_testing.py @@ -21,8 +21,8 @@ def _assert_almost_equal_both(self, a, b, **kwargs): assert_almost_equal(b, a, **kwargs) def _assert_not_almost_equal_both(self, a, b, **kwargs): - self.assertRaises(AssertionError, assert_almost_equal, a, b, **kwargs) - self.assertRaises(AssertionError, assert_almost_equal, b, a, **kwargs) + pytest.raises(AssertionError, assert_almost_equal, a, b, **kwargs) + pytest.raises(AssertionError, assert_almost_equal, b, a, **kwargs) def test_assert_almost_equal_numbers(self): self._assert_almost_equal_both(1.1, 1.1) @@ -495,8 +495,8 @@ def _assert_equal(self, x, y, **kwargs): assert_series_equal(y, x, **kwargs) def _assert_not_equal(self, a, b, **kwargs): - self.assertRaises(AssertionError, assert_series_equal, a, b, **kwargs) - self.assertRaises(AssertionError, assert_series_equal, b, a, **kwargs) + pytest.raises(AssertionError, assert_series_equal, a, b, **kwargs) + pytest.raises(AssertionError, assert_series_equal, b, a, **kwargs) def test_equal(self): self._assert_equal(Series(range(3)), Series(range(3))) @@ -520,27 +520,27 @@ def test_less_precise(self): s1 = Series([0.12345], dtype='float64') s2 = Series([0.12346], dtype='float64') - self.assertRaises(AssertionError, assert_series_equal, s1, s2) + pytest.raises(AssertionError, assert_series_equal, s1, s2) self._assert_equal(s1, s2, check_less_precise=True) for i in range(4): self._assert_equal(s1, s2, check_less_precise=i) - self.assertRaises(AssertionError, assert_series_equal, s1, s2, 10) + pytest.raises(AssertionError, assert_series_equal, s1, s2, 10) s1 = Series([0.12345], dtype='float32') s2 = Series([0.12346], dtype='float32') - self.assertRaises(AssertionError, assert_series_equal, s1, s2) + pytest.raises(AssertionError, assert_series_equal, s1, s2) self._assert_equal(s1, s2, check_less_precise=True) for i in range(4): self._assert_equal(s1, s2, check_less_precise=i) - self.assertRaises(AssertionError, assert_series_equal, s1, s2, 10) + pytest.raises(AssertionError, assert_series_equal, s1, s2, 10) # even less than less precise s1 = Series([0.1235], dtype='float32') s2 = Series([0.1236], dtype='float32') - self.assertRaises(AssertionError, assert_series_equal, s1, s2) - self.assertRaises(AssertionError, assert_series_equal, s1, s2, True) + pytest.raises(AssertionError, assert_series_equal, s1, s2) + pytest.raises(AssertionError, assert_series_equal, s1, s2, True) def test_index_dtype(self): df1 = DataFrame.from_records( @@ -589,8 +589,8 @@ def _assert_equal(self, x, y, **kwargs): assert_frame_equal(y, x, **kwargs) def _assert_not_equal(self, a, b, **kwargs): - self.assertRaises(AssertionError, assert_frame_equal, a, b, **kwargs) - self.assertRaises(AssertionError, assert_frame_equal, b, a, **kwargs) + pytest.raises(AssertionError, assert_frame_equal, a, b, **kwargs) + pytest.raises(AssertionError, assert_frame_equal, b, a, **kwargs) def test_equal_with_different_row_order(self): # check_like=True ignores row-column orderings diff --git a/pandas/tests/test_util.py b/pandas/tests/test_util.py index 1fa436df0910d..c9b2e1c9c1c75 100644 --- a/pandas/tests/test_util.py +++ b/pandas/tests/test_util.py @@ -63,11 +63,11 @@ def test_callable_deprecate_kwarg(self): with tm.assert_produces_warning(FutureWarning): result = self.f3(old=x) self.assertEqual(result, x + 1) - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): self.f3(old='hello') def test_bad_deprecate_kwarg(self): - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): @deprecate_kwarg('old', 'new', 0) def f4(new=None): pass @@ -336,7 +336,7 @@ def test_more_than_one_ref(self): """ b = b'testing' - with tm.assertRaises(BadMove) as e: + with pytest.raises(BadMove) as e: def handle_success(type_, value, tb): self.assertIs(value.args[0], b) return type(e).handle_success(e, type_, value, tb) # super @@ -385,7 +385,7 @@ def ref_capture(ob): refcount[0] = sys.getrefcount(ob) - 2 return ob - with tm.assertRaises(BadMove): + with pytest.raises(BadMove): # If we intern the string it will still have one reference but now # it is in the intern table so if other people intern the same # string while the mutable buffer holds the first string they will @@ -459,7 +459,7 @@ def test_set_locale(self): new_locale = lang, enc if not tm._can_set_locale(new_locale): - with tm.assertRaises(locale.Error): + with pytest.raises(locale.Error): with tm.set_locale(new_locale): pass else: diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index 98724fe665e58..101ecec783533 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -71,9 +71,9 @@ def test_getitem(self): def test_select_bad_cols(self): df = DataFrame([[1, 2]], columns=['A', 'B']) g = df.rolling(window=5) - self.assertRaises(KeyError, g.__getitem__, ['C']) # g[['C']] + pytest.raises(KeyError, g.__getitem__, ['C']) # g[['C']] - self.assertRaises(KeyError, g.__getitem__, ['A', 'C']) # g[['A', 'C']] + pytest.raises(KeyError, g.__getitem__, ['A', 'C']) # g[['A', 'C']] with tm.assertRaisesRegexp(KeyError, '^[^A]+$'): # A should not be referenced as a bad column... # will have to rethink regex if you change message! @@ -84,7 +84,7 @@ def test_attribute_access(self): df = DataFrame([[1, 2]], columns=['A', 'B']) r = df.rolling(window=5) tm.assert_series_equal(r.A.sum(), r['A'].sum()) - self.assertRaises(AttributeError, lambda: r.F) + pytest.raises(AttributeError, lambda: r.F) def tests_skip_nuisance(self): @@ -191,7 +191,7 @@ def f(): r.aggregate({'r1': {'A': ['mean', 'sum']}, 'r2': {'B': ['mean', 'sum']}}) - self.assertRaises(SpecificationError, f) + pytest.raises(SpecificationError, f) expected = pd.concat([r['A'].mean(), r['A'].std(), r['B'].mean(), r['B'].std()], axis=1) @@ -336,13 +336,13 @@ def test_constructor(self): # not valid for w in [2., 'foo', np.array([2])]: - with self.assertRaises(ValueError): + with pytest.raises(ValueError): c(win_type='boxcar', window=2, min_periods=w) - with self.assertRaises(ValueError): + with pytest.raises(ValueError): c(win_type='boxcar', window=2, min_periods=1, center=w) for wt in ['foobar', 1]: - with self.assertRaises(ValueError): + with pytest.raises(ValueError): c(win_type=wt, window=2) def test_numpy_compat(self): @@ -384,16 +384,16 @@ def test_constructor(self): # GH 13383 c(0) - with self.assertRaises(ValueError): + with pytest.raises(ValueError): c(-1) # not valid for w in [2., 'foo', np.array([2])]: - with self.assertRaises(ValueError): + with pytest.raises(ValueError): c(window=w) - with self.assertRaises(ValueError): + with pytest.raises(ValueError): c(window=2, min_periods=w) - with self.assertRaises(ValueError): + with pytest.raises(ValueError): c(window=2, min_periods=1, center=w) def test_constructor_with_win_type(self): @@ -402,7 +402,7 @@ def test_constructor_with_win_type(self): for o in [self.series, self.frame]: c = o.rolling c(0, win_type='boxcar') - with self.assertRaises(ValueError): + with pytest.raises(ValueError): c(-1, win_type='boxcar') def test_constructor_with_timedelta_window(self): @@ -466,9 +466,9 @@ def test_constructor(self): # not valid for w in [2., 'foo', np.array([2])]: - with self.assertRaises(ValueError): + with pytest.raises(ValueError): c(min_periods=w) - with self.assertRaises(ValueError): + with pytest.raises(ValueError): c(min_periods=1, center=w) def test_numpy_compat(self): @@ -509,28 +509,28 @@ def test_constructor(self): c(halflife=0.75, alpha=None) # not valid: mutually exclusive - with self.assertRaises(ValueError): + with pytest.raises(ValueError): c(com=0.5, alpha=0.5) - with self.assertRaises(ValueError): + with pytest.raises(ValueError): c(span=1.5, halflife=0.75) - with self.assertRaises(ValueError): + with pytest.raises(ValueError): c(alpha=0.5, span=1.5) # not valid: com < 0 - with self.assertRaises(ValueError): + with pytest.raises(ValueError): c(com=-0.5) # not valid: span < 1 - with self.assertRaises(ValueError): + with pytest.raises(ValueError): c(span=0.5) # not valid: halflife <= 0 - with self.assertRaises(ValueError): + with pytest.raises(ValueError): c(halflife=0) # not valid: alpha <= 0 or alpha > 1 for alpha in (-0.5, 1.5): - with self.assertRaises(ValueError): + with pytest.raises(ValueError): c(alpha=alpha) def test_numpy_compat(self): @@ -784,7 +784,7 @@ def test_centered_axis_validation(self): Series(np.ones(10)).rolling(window=3, center=True, axis=0).mean() # bad axis - with self.assertRaises(ValueError): + with pytest.raises(ValueError): Series(np.ones(10)).rolling(window=3, center=True, axis=1).mean() # ok ok @@ -794,7 +794,7 @@ def test_centered_axis_validation(self): axis=1).mean() # bad axis - with self.assertRaises(ValueError): + with pytest.raises(ValueError): (DataFrame(np.ones((10, 10))) .rolling(window=3, center=True, axis=2).mean()) @@ -887,7 +887,7 @@ def test_cmov_window_frame(self): tm.assert_frame_equal(DataFrame(xp), rs) # invalid method - with self.assertRaises(AttributeError): + with pytest.raises(AttributeError): (DataFrame(vals).rolling(5, win_type='boxcar', center=True) .std()) @@ -1056,8 +1056,8 @@ def test_rolling_min(self): b = mom.rolling_min(a, window=100, min_periods=1) tm.assert_almost_equal(b, np.ones(len(a))) - self.assertRaises(ValueError, mom.rolling_min, np.array([1, 2, 3]), - window=3, min_periods=5) + pytest.raises(ValueError, mom.rolling_min, np.array([1, 2, 3]), + window=3, min_periods=5) def test_rolling_max(self): @@ -1069,8 +1069,8 @@ def test_rolling_max(self): b = mom.rolling_max(a, window=100, min_periods=1) tm.assert_almost_equal(a, b) - self.assertRaises(ValueError, mom.rolling_max, np.array([1, 2, 3]), - window=3, min_periods=5) + pytest.raises(ValueError, mom.rolling_max, np.array([1, 2, 3]), + window=3, min_periods=5) def test_rolling_quantile(self): qs = [0.0, .1, .5, .9, 1.0] @@ -1097,13 +1097,13 @@ def alt(x): def test_rolling_quantile_param(self): ser = Series([0.0, .1, .5, .9, 1.0]) - with self.assertRaises(ValueError): + with pytest.raises(ValueError): ser.rolling(3).quantile(-0.1) - with self.assertRaises(ValueError): + with pytest.raises(ValueError): ser.rolling(3).quantile(10.0) - with self.assertRaises(TypeError): + with pytest.raises(TypeError): ser.rolling(3).quantile('foo') def test_rolling_apply(self): @@ -1549,8 +1549,8 @@ def test_ewma_span_com_args(self): B = mom.ewma(self.arr, span=20) tm.assert_almost_equal(A, B) - self.assertRaises(ValueError, mom.ewma, self.arr, com=9.5, span=20) - self.assertRaises(ValueError, mom.ewma, self.arr) + pytest.raises(ValueError, mom.ewma, self.arr, com=9.5, span=20) + pytest.raises(ValueError, mom.ewma, self.arr) def test_ewma_halflife_arg(self): with catch_warnings(record=True): @@ -1558,13 +1558,13 @@ def test_ewma_halflife_arg(self): B = mom.ewma(self.arr, halflife=10.0) tm.assert_almost_equal(A, B) - self.assertRaises(ValueError, mom.ewma, self.arr, span=20, - halflife=50) - self.assertRaises(ValueError, mom.ewma, self.arr, com=9.5, - halflife=50) - self.assertRaises(ValueError, mom.ewma, self.arr, com=9.5, span=20, - halflife=50) - self.assertRaises(ValueError, mom.ewma, self.arr) + pytest.raises(ValueError, mom.ewma, self.arr, span=20, + halflife=50) + pytest.raises(ValueError, mom.ewma, self.arr, com=9.5, + halflife=50) + pytest.raises(ValueError, mom.ewma, self.arr, com=9.5, span=20, + halflife=50) + pytest.raises(ValueError, mom.ewma, self.arr) def test_ewma_alpha_old_api(self): # GH 10789 @@ -1580,13 +1580,13 @@ def test_ewma_alpha_old_api(self): def test_ewma_alpha_arg_old_api(self): # GH 10789 with catch_warnings(record=True): - self.assertRaises(ValueError, mom.ewma, self.arr) - self.assertRaises(ValueError, mom.ewma, self.arr, - com=10.0, alpha=0.5) - self.assertRaises(ValueError, mom.ewma, self.arr, - span=10.0, alpha=0.5) - self.assertRaises(ValueError, mom.ewma, self.arr, - halflife=10.0, alpha=0.5) + pytest.raises(ValueError, mom.ewma, self.arr) + pytest.raises(ValueError, mom.ewma, self.arr, + com=10.0, alpha=0.5) + pytest.raises(ValueError, mom.ewma, self.arr, + span=10.0, alpha=0.5) + pytest.raises(ValueError, mom.ewma, self.arr, + halflife=10.0, alpha=0.5) def test_ewm_alpha(self): # GH 10789 @@ -1602,34 +1602,34 @@ def test_ewm_alpha(self): def test_ewm_alpha_arg(self): # GH 10789 s = Series(self.arr) - self.assertRaises(ValueError, s.ewm) - self.assertRaises(ValueError, s.ewm, com=10.0, alpha=0.5) - self.assertRaises(ValueError, s.ewm, span=10.0, alpha=0.5) - self.assertRaises(ValueError, s.ewm, halflife=10.0, alpha=0.5) + pytest.raises(ValueError, s.ewm) + pytest.raises(ValueError, s.ewm, com=10.0, alpha=0.5) + pytest.raises(ValueError, s.ewm, span=10.0, alpha=0.5) + pytest.raises(ValueError, s.ewm, halflife=10.0, alpha=0.5) def test_ewm_domain_checks(self): # GH 12492 s = Series(self.arr) # com must satisfy: com >= 0 - self.assertRaises(ValueError, s.ewm, com=-0.1) + pytest.raises(ValueError, s.ewm, com=-0.1) s.ewm(com=0.0) s.ewm(com=0.1) # span must satisfy: span >= 1 - self.assertRaises(ValueError, s.ewm, span=-0.1) - self.assertRaises(ValueError, s.ewm, span=0.0) - self.assertRaises(ValueError, s.ewm, span=0.9) + pytest.raises(ValueError, s.ewm, span=-0.1) + pytest.raises(ValueError, s.ewm, span=0.0) + pytest.raises(ValueError, s.ewm, span=0.9) s.ewm(span=1.0) s.ewm(span=1.1) # halflife must satisfy: halflife > 0 - self.assertRaises(ValueError, s.ewm, halflife=-0.1) - self.assertRaises(ValueError, s.ewm, halflife=0.0) + pytest.raises(ValueError, s.ewm, halflife=-0.1) + pytest.raises(ValueError, s.ewm, halflife=0.0) s.ewm(halflife=0.1) # alpha must satisfy: 0 < alpha <= 1 - self.assertRaises(ValueError, s.ewm, alpha=-0.1) - self.assertRaises(ValueError, s.ewm, alpha=0.0) + pytest.raises(ValueError, s.ewm, alpha=-0.1) + pytest.raises(ValueError, s.ewm, alpha=0.0) s.ewm(alpha=0.1) s.ewm(alpha=1.0) - self.assertRaises(ValueError, s.ewm, alpha=1.1) + pytest.raises(ValueError, s.ewm, alpha=1.1) def test_ew_empty_arrays(self): arr = np.array([], dtype=np.float64) @@ -2396,7 +2396,7 @@ def get_result(obj, obj2=None): def test_flex_binary_moment(self): # GH3155 # don't blow the stack - self.assertRaises(TypeError, rwindow._flex_binary_moment, 5, 6, None) + pytest.raises(TypeError, rwindow._flex_binary_moment, 5, 6, None) def test_corr_sanity(self): # GH 3155 @@ -2485,7 +2485,7 @@ def func(A, B, com, **kwargs): Series([1.]), Series([1.]), 50, min_periods=min_periods) tm.assert_series_equal(result, Series([np.NaN])) - self.assertRaises(Exception, func, A, randn(50), 20, min_periods=5) + pytest.raises(Exception, func, A, randn(50), 20, min_periods=5) def test_expanding_apply(self): ser = Series([]) @@ -3047,7 +3047,7 @@ def test_mutated(self): def f(): self.frame.groupby('A', foo=1) - self.assertRaises(TypeError, f) + pytest.raises(TypeError, f) g = self.frame.groupby('A') self.assertFalse(g.mutated) @@ -3216,16 +3216,16 @@ def test_valid(self): df = self.regular # not a valid freq - with self.assertRaises(ValueError): + with pytest.raises(ValueError): df.rolling(window='foobar') # not a datetimelike index - with self.assertRaises(ValueError): + with pytest.raises(ValueError): df.reset_index().rolling(window='foobar') # non-fixed freqs for freq in ['2MS', pd.offsets.MonthBegin(2)]: - with self.assertRaises(ValueError): + with pytest.raises(ValueError): df.rolling(window=freq) for freq in ['1D', pd.offsets.Day(2), '2ms']: @@ -3233,11 +3233,11 @@ def test_valid(self): # non-integer min_periods for minp in [1.0, 'foo', np.array([1, 2, 3])]: - with self.assertRaises(ValueError): + with pytest.raises(ValueError): df.rolling(window='1D', min_periods=minp) # center is not implemented - with self.assertRaises(NotImplementedError): + with pytest.raises(NotImplementedError): df.rolling(window='1D', center=True) def test_on(self): @@ -3245,7 +3245,7 @@ def test_on(self): df = self.regular # not a valid column - with self.assertRaises(ValueError): + with pytest.raises(ValueError): df.rolling(window='2s', on='foobar') # column is valid @@ -3254,7 +3254,7 @@ def test_on(self): df.rolling(window='2d', on='C').sum() # invalid columns - with self.assertRaises(ValueError): + with pytest.raises(ValueError): df.rolling(window='2d', on='B') # ok even though on non-selected @@ -3279,11 +3279,11 @@ def test_monotonic_on(self): df.index = reversed(df.index.tolist()) self.assertFalse(df.index.is_monotonic) - with self.assertRaises(ValueError): + with pytest.raises(ValueError): df.rolling('2s').sum() df = df.reset_index() - with self.assertRaises(ValueError): + with pytest.raises(ValueError): df.rolling('2s', on='A').sum() def test_frame_on(self): diff --git a/pandas/tests/tools/test_numeric.py b/pandas/tests/tools/test_numeric.py index 96b49c5fb97a6..5fa3f38f31ae1 100644 --- a/pandas/tests/tools/test_numeric.py +++ b/pandas/tests/tools/test_numeric.py @@ -162,7 +162,7 @@ def test_scalar(self): self.assertEqual(pd.to_numeric('1'), 1) self.assertEqual(pd.to_numeric('1.1'), 1.1) - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): to_numeric('XX', errors='raise') self.assertEqual(to_numeric('XX', errors='ignore'), 'XX') diff --git a/pandas/tests/tseries/test_frequencies.py b/pandas/tests/tseries/test_frequencies.py index 327dad6d47634..e3ab01499e676 100644 --- a/pandas/tests/tseries/test_frequencies.py +++ b/pandas/tests/tseries/test_frequencies.py @@ -1,6 +1,7 @@ from datetime import datetime, timedelta from pandas.compat import range +import pytest import numpy as np from pandas import (Index, DatetimeIndex, Timestamp, Series, @@ -198,7 +199,7 @@ def test_to_offset_pd_timedelta(self): assert (expected == result) td = Timedelta(microseconds=0) - tm.assertRaises(ValueError, lambda: frequencies.to_offset(td)) + pytest.raises(ValueError, lambda: frequencies.to_offset(td)) def test_anchored_shortcuts(self): result = frequencies.to_offset('W') @@ -427,11 +428,11 @@ def test_resolution_bumping(self): self.assertEqual(Reso.get_stride_from_decimal(1.2345, 'D'), (106660800, 'L')) - with self.assertRaises(ValueError): + with pytest.raises(ValueError): Reso.get_stride_from_decimal(0.5, 'N') # too much precision in the input can prevent - with self.assertRaises(ValueError): + with pytest.raises(ValueError): Reso.get_stride_from_decimal(0.3429324798798269273987982, 'H') def test_get_freq_code(self): @@ -492,11 +493,11 @@ class TestFrequencyInference(tm.TestCase): def test_raise_if_period_index(self): index = PeriodIndex(start="1/1/1990", periods=20, freq="M") - self.assertRaises(TypeError, frequencies.infer_freq, index) + pytest.raises(TypeError, frequencies.infer_freq, index) def test_raise_if_too_few(self): index = _dti(['12/31/1998', '1/3/1999']) - self.assertRaises(ValueError, frequencies.infer_freq, index) + pytest.raises(ValueError, frequencies.infer_freq, index) def test_business_daily(self): index = _dti(['12/31/1998', '1/3/1999', '1/4/1999']) @@ -567,7 +568,7 @@ def test_week_of_month(self): def test_fifth_week_of_month(self): # Only supports freq up to WOM-4. See #9425 func = lambda: date_range('2014-01-01', freq='WOM-5MON') - self.assertRaises(ValueError, func) + pytest.raises(ValueError, func) def test_fifth_week_of_month_infer(self): # Only attempts to infer up to WOM-4. See #9425 @@ -742,14 +743,13 @@ def test_invalid_index_types(self): # test all index types for i in [tm.makeIntIndex(10), tm.makeFloatIndex(10), tm.makePeriodIndex(10)]: - self.assertRaises(TypeError, lambda: frequencies.infer_freq(i)) + pytest.raises(TypeError, lambda: frequencies.infer_freq(i)) # GH 10822 # odd error message on conversions to datetime for unicode if not is_platform_windows(): for i in [tm.makeStringIndex(10), tm.makeUnicodeIndex(10)]: - self.assertRaises(ValueError, - lambda: frequencies.infer_freq(i)) + pytest.raises(ValueError, lambda: frequencies.infer_freq(i)) def test_string_datetimelike_compat(self): @@ -767,33 +767,32 @@ def test_series(self): # invalid type of Series for s in [Series(np.arange(10)), Series(np.arange(10.))]: - self.assertRaises(TypeError, lambda: frequencies.infer_freq(s)) + pytest.raises(TypeError, lambda: frequencies.infer_freq(s)) # a non-convertible string - self.assertRaises(ValueError, - lambda: frequencies.infer_freq( - Series(['foo', 'bar']))) + pytest.raises(ValueError, lambda: frequencies.infer_freq( + Series(['foo', 'bar']))) # cannot infer on PeriodIndex for freq in [None, 'L']: s = Series(period_range('2013', periods=10, freq=freq)) - self.assertRaises(TypeError, lambda: frequencies.infer_freq(s)) + pytest.raises(TypeError, lambda: frequencies.infer_freq(s)) for freq in ['Y']: msg = frequencies._INVALID_FREQ_ERROR with tm.assertRaisesRegexp(ValueError, msg): s = Series(period_range('2013', periods=10, freq=freq)) - self.assertRaises(TypeError, lambda: frequencies.infer_freq(s)) + pytest.raises(TypeError, lambda: frequencies.infer_freq(s)) # DateTimeIndex for freq in ['M', 'L', 'S']: s = Series(date_range('20130101', periods=10, freq=freq)) inferred = frequencies.infer_freq(s) - self.assertEqual(inferred, freq) + assert inferred == freq s = Series(date_range('20130101', '20130110')) inferred = frequencies.infer_freq(s) - self.assertEqual(inferred, 'D') + assert inferred == 'D' def test_legacy_offset_warnings(self): freqs = ['WEEKDAY', 'EOM', 'W@MON', 'W@TUE', 'W@WED', 'W@THU', diff --git a/pandas/tests/tseries/test_holiday.py b/pandas/tests/tseries/test_holiday.py index 25ae367617874..c87f580582335 100644 --- a/pandas/tests/tseries/test_holiday.py +++ b/pandas/tests/tseries/test_holiday.py @@ -1,3 +1,5 @@ +import pytest + from datetime import datetime import pandas.util.testing as tm from pandas import compat @@ -384,7 +386,7 @@ class TestHolidayConflictingArguments(tm.TestCase): # GH 10217 def test_both_offset_observance_raises(self): - with self.assertRaises(NotImplementedError): + with pytest.raises(NotImplementedError): Holiday("Cyber Monday", month=11, day=1, offset=[DateOffset(weekday=SA(4))], observance=next_monday) diff --git a/pandas/tests/tseries/test_offsets.py b/pandas/tests/tseries/test_offsets.py index 64f978124f8e7..335a7f3513da9 100644 --- a/pandas/tests/tseries/test_offsets.py +++ b/pandas/tests/tseries/test_offsets.py @@ -590,7 +590,7 @@ def testRAdd(self): def testSub(self): off = self.offset2 - self.assertRaises(Exception, off.__sub__, self.d) + pytest.raises(Exception, off.__sub__, self.d) self.assertEqual(2 * off - off, off) self.assertEqual(self.d - self.offset2, self.d + BDay(-2)) @@ -713,7 +713,7 @@ def test_apply_large_n(self): self.assertEqual(rs, xp) def test_apply_corner(self): - self.assertRaises(TypeError, BDay().apply, BMonthEnd()) + pytest.raises(TypeError, BDay().apply, BMonthEnd()) def test_offsets_compare_equal(self): # root cause of #456 @@ -742,11 +742,11 @@ def setUp(self): def test_constructor_errors(self): from datetime import time as dt_time - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): BusinessHour(start=dt_time(11, 0, 5)) - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): BusinessHour(start='AAA') - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): BusinessHour(start='14:00:05') def test_different_normalize_equals(self): @@ -802,7 +802,7 @@ def testRAdd(self): def testSub(self): off = self.offset2 - self.assertRaises(Exception, off.__sub__, self.d) + pytest.raises(Exception, off.__sub__, self.d) self.assertEqual(2 * off - off, off) self.assertEqual(self.d - self.offset2, self.d + self._offset(-3)) @@ -1446,11 +1446,11 @@ def setUp(self): def test_constructor_errors(self): from datetime import time as dt_time - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): CustomBusinessHour(start=dt_time(11, 0, 5)) - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): CustomBusinessHour(start='AAA') - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): CustomBusinessHour(start='14:00:05') def test_different_normalize_equals(self): @@ -1502,7 +1502,7 @@ def testRAdd(self): def testSub(self): off = self.offset2 - self.assertRaises(Exception, off.__sub__, self.d) + pytest.raises(Exception, off.__sub__, self.d) self.assertEqual(2 * off - off, off) self.assertEqual(self.d - self.offset2, self.d - (2 * off - off)) @@ -1735,7 +1735,7 @@ def testRAdd(self): def testSub(self): off = self.offset2 - self.assertRaises(Exception, off.__sub__, self.d) + pytest.raises(Exception, off.__sub__, self.d) self.assertEqual(2 * off - off, off) self.assertEqual(self.d - self.offset2, self.d + CDay(-2)) @@ -1854,7 +1854,7 @@ def test_apply_large_n(self): self.assertEqual(rs, xp) def test_apply_corner(self): - self.assertRaises(Exception, CDay().apply, BMonthEnd()) + pytest.raises(Exception, CDay().apply, BMonthEnd()) def test_offsets_compare_equal(self): # root cause of #456 @@ -1947,7 +1947,7 @@ def testRAdd(self): def testSub(self): off = self.offset2 - self.assertRaises(Exception, off.__sub__, self.d) + pytest.raises(Exception, off.__sub__, self.d) self.assertEqual(2 * off - off, off) self.assertEqual(self.d - self.offset2, self.d + self._object(-2)) @@ -2225,7 +2225,7 @@ def test_repr(self): "<-2 * Weeks: weekday=0>") def test_corner(self): - self.assertRaises(ValueError, Week, weekday=7) + pytest.raises(ValueError, Week, weekday=7) assertRaisesRegexp(ValueError, "Day must be", Week, weekday=-1) def test_isAnchored(self): @@ -4031,8 +4031,8 @@ class TestBYearBegin(Base): _offset = BYearBegin def test_misspecified(self): - self.assertRaises(ValueError, BYearBegin, month=13) - self.assertRaises(ValueError, BYearEnd, month=13) + pytest.raises(ValueError, BYearBegin, month=13) + pytest.raises(ValueError, BYearEnd, month=13) def test_offset(self): tests = [] @@ -4077,7 +4077,7 @@ class TestYearBegin(Base): _offset = YearBegin def test_misspecified(self): - self.assertRaises(ValueError, YearBegin, month=13) + pytest.raises(ValueError, YearBegin, month=13) def test_offset(self): tests = [] @@ -4169,8 +4169,8 @@ def test_onOffset(self): class TestBYearEndLagged(Base): def test_bad_month_fail(self): - self.assertRaises(Exception, BYearEnd, month=13) - self.assertRaises(Exception, BYearEnd, month=0) + pytest.raises(Exception, BYearEnd, month=13) + pytest.raises(Exception, BYearEnd, month=0) def test_offset(self): tests = [] @@ -4258,7 +4258,7 @@ class TestYearEnd(Base): _offset = YearEnd def test_misspecified(self): - self.assertRaises(ValueError, YearEnd, month=13) + pytest.raises(ValueError, YearEnd, month=13) def test_offset(self): tests = [] @@ -4622,9 +4622,9 @@ def test_parse_time_quarter_w_dash(self): self.assertEqual(parsed_dash, parsed) self.assertEqual(reso_dash, reso) - self.assertRaises(DateParseError, parse_time_string, "-2Q1992") - self.assertRaises(DateParseError, parse_time_string, "2-Q1992") - self.assertRaises(DateParseError, parse_time_string, "4-4Q1992") + pytest.raises(DateParseError, parse_time_string, "-2Q1992") + pytest.raises(DateParseError, parse_time_string, "2-Q1992") + pytest.raises(DateParseError, parse_time_string, "4-4Q1992") def test_get_standard_freq(): diff --git a/pandas/tests/tseries/test_timezones.py b/pandas/tests/tseries/test_timezones.py index f99ef2898d0f9..c9ac4f36e7769 100644 --- a/pandas/tests/tseries/test_timezones.py +++ b/pandas/tests/tseries/test_timezones.py @@ -105,8 +105,8 @@ def test_localize_utc_conversion(self): # DST ambiguity, this should fail rng = date_range('3/11/2012', '3/12/2012', freq='30T') # Is this really how it should fail?? - self.assertRaises(NonExistentTimeError, rng.tz_localize, - self.tzstr('US/Eastern')) + pytest.raises(NonExistentTimeError, rng.tz_localize, + self.tzstr('US/Eastern')) def test_localize_utc_conversion_explicit(self): # Localizing to time zone should: @@ -121,8 +121,8 @@ def test_localize_utc_conversion_explicit(self): # DST ambiguity, this should fail rng = date_range('3/11/2012', '3/12/2012', freq='30T') # Is this really how it should fail?? - self.assertRaises(NonExistentTimeError, rng.tz_localize, - self.tz('US/Eastern')) + pytest.raises(NonExistentTimeError, rng.tz_localize, + self.tz('US/Eastern')) def test_timestamp_tz_localize(self): stamp = Timestamp('3/11/2012 04:00') @@ -255,13 +255,13 @@ def test_tz_localize_dti(self): dti = DatetimeIndex(start='11/6/2011 1:59', end='11/6/2011 2:00', freq='L') - self.assertRaises(pytz.AmbiguousTimeError, dti.tz_localize, - self.tzstr('US/Eastern')) + pytest.raises(pytz.AmbiguousTimeError, dti.tz_localize, + self.tzstr('US/Eastern')) dti = DatetimeIndex(start='3/13/2011 1:59', end='3/13/2011 2:00', freq='L') - self.assertRaises(pytz.NonExistentTimeError, dti.tz_localize, - self.tzstr('US/Eastern')) + pytest.raises(pytz.NonExistentTimeError, dti.tz_localize, + self.tzstr('US/Eastern')) def test_tz_localize_empty_series(self): # #2248 @@ -436,9 +436,9 @@ def test_with_tz(self): dr = bdate_range(datetime(2005, 1, 1, tzinfo=pytz.utc), '1/1/2009', tz=pytz.utc) - self.assertRaises(Exception, bdate_range, - datetime(2005, 1, 1, tzinfo=pytz.utc), '1/1/2009', - tz=tz) + pytest.raises(Exception, bdate_range, + datetime(2005, 1, 1, tzinfo=pytz.utc), '1/1/2009', + tz=tz) def test_tz_localize(self): dr = bdate_range('1/1/2009', '1/1/2010') @@ -452,7 +452,7 @@ def test_with_tz_ambiguous_times(self): # March 13, 2011, spring forward, skip from 2 AM to 3 AM dr = date_range(datetime(2011, 3, 13, 1, 30), periods=3, freq=offsets.Hour()) - self.assertRaises(pytz.NonExistentTimeError, dr.tz_localize, tz) + pytest.raises(pytz.NonExistentTimeError, dr.tz_localize, tz) # after dst transition, it works dr = date_range(datetime(2011, 3, 13, 3, 30), periods=3, @@ -461,7 +461,7 @@ def test_with_tz_ambiguous_times(self): # November 6, 2011, fall back, repeat 2 AM hour dr = date_range(datetime(2011, 11, 6, 1, 30), periods=3, freq=offsets.Hour()) - self.assertRaises(pytz.AmbiguousTimeError, dr.tz_localize, tz) + pytest.raises(pytz.AmbiguousTimeError, dr.tz_localize, tz) # UTC is OK dr = date_range(datetime(2011, 3, 13), periods=48, @@ -473,7 +473,7 @@ def test_ambiguous_infer(self): tz = self.tz('US/Eastern') dr = date_range(datetime(2011, 11, 6, 0), periods=5, freq=offsets.Hour()) - self.assertRaises(pytz.AmbiguousTimeError, dr.tz_localize, tz) + pytest.raises(pytz.AmbiguousTimeError, dr.tz_localize, tz) # With repeated hours, we can infer the transition dr = date_range(datetime(2011, 11, 6, 0), periods=5, @@ -533,7 +533,7 @@ def test_ambiguous_flags(self): di = DatetimeIndex(times) # When the sizes are incompatible, make sure error is raised - self.assertRaises(Exception, di.tz_localize, tz, ambiguous=is_dst) + pytest.raises(Exception, di.tz_localize, tz, ambiguous=is_dst) # When sizes are compatible and there are repeats ('infer' won't work) is_dst = np.hstack((is_dst, is_dst)) @@ -556,7 +556,7 @@ def test_ambiguous_flags(self): def f(): date_range("2013-10-26 23:00", "2013-10-27 01:00", tz="Europe/London", freq="H") - self.assertRaises(pytz.AmbiguousTimeError, f) + pytest.raises(pytz.AmbiguousTimeError, f) times = date_range("2013-10-26 23:00", "2013-10-27 01:00", freq="H", tz=tz, ambiguous='infer') @@ -592,7 +592,7 @@ def test_ambiguous_bool(self): def f(): t.tz_localize('US/Central') - self.assertRaises(pytz.AmbiguousTimeError, f) + pytest.raises(pytz.AmbiguousTimeError, f) result = t.tz_localize('US/Central', ambiguous=True) self.assertEqual(result, expected0) @@ -606,7 +606,7 @@ def f(): def f(): s.dt.tz_localize('US/Central') - self.assertRaises(pytz.AmbiguousTimeError, f) + pytest.raises(pytz.AmbiguousTimeError, f) result = s.dt.tz_localize('US/Central', ambiguous=True) assert_series_equal(result, expected0) @@ -626,10 +626,10 @@ def test_nonexistent_raise_coerce(self): times = ['2015-03-08 01:00', '2015-03-08 02:00', '2015-03-08 03:00'] index = DatetimeIndex(times) tz = 'US/Eastern' - self.assertRaises(NonExistentTimeError, - index.tz_localize, tz=tz) - self.assertRaises(NonExistentTimeError, - index.tz_localize, tz=tz, errors='raise') + pytest.raises(NonExistentTimeError, + index.tz_localize, tz=tz) + pytest.raises(NonExistentTimeError, + index.tz_localize, tz=tz, errors='raise') result = index.tz_localize(tz=tz, errors='coerce') test_times = ['2015-03-08 01:00-05:00', 'NaT', '2015-03-08 03:00-04:00'] @@ -659,8 +659,8 @@ def test_infer_tz(self): assert (tools._infer_tzinfo(start, end) is utc) end = self.localize(eastern, _end) - self.assertRaises(Exception, tools._infer_tzinfo, start, end) - self.assertRaises(Exception, tools._infer_tzinfo, end, start) + pytest.raises(Exception, tools._infer_tzinfo, start, end) + pytest.raises(Exception, tools._infer_tzinfo, end, start) def test_tz_string(self): result = date_range('1/1/2000', periods=10, @@ -1232,11 +1232,11 @@ def test_replace(self): # error def f(): dt.replace(foo=5) - self.assertRaises(TypeError, f) + pytest.raises(TypeError, f) def f(): dt.replace(hour=0.1) - self.assertRaises(ValueError, f) + pytest.raises(ValueError, f) # assert conversion to naive is the same as replacing tzinfo with None dt = Timestamp('2013-11-03 01:59:59.999999-0400', tz='US/Eastern') @@ -1306,7 +1306,7 @@ def test_tz_localize_roundtrip(self): tz=tz) tm.assert_index_equal(localized, expected) - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): localized.tz_localize(tz) reset = localized.tz_localize(None) @@ -1410,8 +1410,8 @@ def test_join_aware(self): ts_utc = ts.tz_localize('utc') - self.assertRaises(Exception, ts.__add__, ts_utc) - self.assertRaises(Exception, ts_utc.__add__, ts) + pytest.raises(Exception, ts.__add__, ts_utc) + pytest.raises(Exception, ts_utc.__add__, ts) test1 = DataFrame(np.zeros((6, 3)), index=date_range("2012-11-15 00:00:00", periods=6, diff --git a/pandas/util/testing.py b/pandas/util/testing.py index d2fb18be1c72e..7f62d319aa096 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -2446,57 +2446,23 @@ def stdin_encoding(encoding=None): sys.stdin = _stdin -def assertRaises(_exception, _callable=None, *args, **kwargs): - """assertRaises that is usable as context manager or in a with statement - - Exceptions that don't match the given Exception type fall through:: - - >>> with assertRaises(ValueError): - ... raise TypeError("banana") - ... - Traceback (most recent call last): - ... - TypeError: banana - - If it raises the given Exception type, the test passes - >>> with assertRaises(KeyError): - ... dct = dict() - ... dct["apple"] - - If the expected error doesn't occur, it raises an error. - >>> with assertRaises(KeyError): - ... dct = {'apple':True} - ... dct["apple"] - Traceback (most recent call last): - ... - AssertionError: KeyError not raised. - - In addition to using it as a contextmanager, you can also use it as a - function, just like the normal assertRaises - - >>> assertRaises(TypeError, ",".join, [1, 3, 5]) +def assertRaisesRegexp(_exception, _regexp, _callable=None, *args, **kwargs): """ - manager = _AssertRaisesContextmanager(exception=_exception) - # don't return anything if used in function form - if _callable is not None: - with manager: - _callable(*args, **kwargs) - else: - return manager - + Check that the specified Exception is raised and that the error message + matches a given regular expression pattern. This may be a regular + expression object or a string containing a regular expression suitable + for use by `re.search()`. -def assertRaisesRegexp(_exception, _regexp, _callable=None, *args, **kwargs): - """ Port of assertRaisesRegexp from unittest in - Python 2.7 - used in with statement. + This is a port of the `assertRaisesRegexp` function from unittest in + Python 2.7. However, with our migration to `pytest`, please refrain + from using this. Instead, use the following paradigm: - Explanation from standard library: - Like assertRaises() but also tests that regexp matches on the - string representation of the raised exception. regexp may be a - regular expression object or a string containing a regular - expression suitable for use by re.search(). + with pytest.raises(_exception) as exc_info: + func(*args, **kwargs) + exc_info.matches(reg_exp) - You can pass either a regular expression - or a compiled regular expression object. + Examples + -------- >>> assertRaisesRegexp(ValueError, 'invalid literal for.*XYZ', ... int, 'XYZ') >>> import re From fcd2f70d369ad0abdefd9026b71e416dfc3e82d9 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Sat, 22 Apr 2017 00:02:39 +0200 Subject: [PATCH 444/933] CLN: move tools.hashing to util.hashing (#16086) --- asv_bench/benchmarks/algorithms.py | 22 +++++++++++++--------- doc/source/whatsnew/v0.20.0.txt | 6 +++--- pandas/core/indexes/multi.py | 4 ++-- pandas/tests/reshape/test_hashing.py | 2 +- pandas/{tools => util}/hashing.py | 2 +- pandas/{tools => util}/hashing.pyx | 0 setup.py | 4 ++-- 7 files changed, 22 insertions(+), 18 deletions(-) rename pandas/{tools => util}/hashing.py (99%) rename pandas/{tools => util}/hashing.pyx (100%) diff --git a/asv_bench/benchmarks/algorithms.py b/asv_bench/benchmarks/algorithms.py index 0e2182c58d44c..d79051ed2d66c 100644 --- a/asv_bench/benchmarks/algorithms.py +++ b/asv_bench/benchmarks/algorithms.py @@ -1,12 +1,16 @@ +from importlib import import_module + import numpy as np + import pandas as pd from pandas.util import testing as tm -try: - from pandas.tools.hashing import hash_pandas_object -except ImportError: - pass - +for imp in ['pandas.util.hashing', 'pandas.tools.hashing']: + try: + hashing = import_module(imp) + break + except: + pass class Algorithms(object): goal_time = 0.2 @@ -108,13 +112,13 @@ def setup(self): self.df.iloc[10:20] = np.nan def time_frame(self): - hash_pandas_object(self.df) + hashing.hash_pandas_object(self.df) def time_series_int(self): - hash_pandas_object(self.df.E) + hashing.hash_pandas_object(self.df.E) def time_series_string(self): - hash_pandas_object(self.df.B) + hashing.hash_pandas_object(self.df.B) def time_series_categorical(self): - hash_pandas_object(self.df.C) + hashing.hash_pandas_object(self.df.C) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 02c54f28a1695..86d9bef636e17 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -533,7 +533,7 @@ Other Enhancements - ``pd.merge_asof()`` gained the option ``direction='backward'|'forward'|'nearest'`` (:issue:`14887`) - ``Series/DataFrame.asfreq()`` have gained a ``fill_value`` parameter, to fill missing values (:issue:`3715`). - ``Series/DataFrame.resample.asfreq`` have gained a ``fill_value`` parameter, to fill missing values during resampling (:issue:`3715`). -- ``pandas.tools.hashing`` has gained a ``hash_tuples`` routine, and ``hash_pandas_object`` has gained the ability to hash a ``MultiIndex`` (:issue:`15224`) +- ``pandas.util.hashing`` has gained a ``hash_tuples`` routine, and ``hash_pandas_object`` has gained the ability to hash a ``MultiIndex`` (:issue:`15224`) - ``Series/DataFrame.squeeze()`` have gained the ``axis`` parameter. (:issue:`15339`) - ``DataFrame.to_excel()`` has a new ``freeze_panes`` parameter to turn on Freeze Panes when exporting to Excel (:issue:`15160`) - ``pd.read_html()`` will parse multiple header rows, creating a multiindex header. (:issue:`13434`). @@ -1423,7 +1423,7 @@ If indicated, a deprecation warning will be issued if you reference theses modul "pandas.types", "pandas.core.dtypes", "" "pandas.io.sas.saslib", "pandas.io.sas.libsas", "" "pandas._join", "pandas._libs.join", "" - "pandas._hash", "pandas.tools.libhash", "" + "pandas._hash", "pandas.util.libhashing", "" "pandas._period", "pandas._libs.period", "" "pandas._sparse", "pandas.core.sparse.libsparse", "" "pandas._testing", "pandas.util.libtesting", "" @@ -1619,7 +1619,7 @@ I/O - Bug in ``pd.read_csv()`` for the Python engine in which unhelpful error messages were being raised when parsing errors occurred (:issue:`15910`) - Bug in ``pd.read_csv()`` in which the ``skipfooter`` parameter was not being properly validated (:issue:`15925`) - Bug in ``pd.to_csv()`` in which there was numeric overflow when a timestamp index was being written (:issue:`15982`) -- Bug in ``pd.tools.hashing.hash_pandas_object()`` in which hashing of categoricals depended on the ordering of categories, instead of just their values. (:issue:`15143`) +- Bug in ``pd.util.hashing.hash_pandas_object()`` in which hashing of categoricals depended on the ordering of categories, instead of just their values. (:issue:`15143`) - Bug in ``.to_json()`` where ``lines=True`` and contents (keys or values) contain escaped characters (:issue:`15096`) - Bug in ``.to_json()`` causing single byte ascii characters to be expanded to four byte unicode (:issue:`15344`) - Bug in ``.to_json()`` for the C engine where rollover was not correctly handled for case where frac is odd and diff is exactly 0.5 (:issue:`15716`, :issue:`15864`) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 92baf9d289cd2..d46d2c78fbdb0 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -718,7 +718,7 @@ def _inferred_type_levels(self): @cache_readonly def _hashed_values(self): """ return a uint64 ndarray of my hashed values """ - from pandas.tools.hashing import hash_tuples + from pandas.util.hashing import hash_tuples return hash_tuples(self) def _hashed_indexing_key(self, key): @@ -740,7 +740,7 @@ def _hashed_indexing_key(self, key): we need to stringify if we have mixed levels """ - from pandas.tools.hashing import hash_tuples + from pandas.util.hashing import hash_tuples if not isinstance(key, tuple): return hash_tuples(key) diff --git a/pandas/tests/reshape/test_hashing.py b/pandas/tests/reshape/test_hashing.py index fba3a15182238..cba70bba6823f 100644 --- a/pandas/tests/reshape/test_hashing.py +++ b/pandas/tests/reshape/test_hashing.py @@ -5,7 +5,7 @@ import pandas as pd from pandas import DataFrame, Series, Index, MultiIndex -from pandas.tools.hashing import hash_array, hash_tuples, hash_pandas_object +from pandas.util.hashing import hash_array, hash_tuples, hash_pandas_object import pandas.util.testing as tm diff --git a/pandas/tools/hashing.py b/pandas/util/hashing.py similarity index 99% rename from pandas/tools/hashing.py rename to pandas/util/hashing.py index 275c1c87ea57a..3046c62a03f48 100644 --- a/pandas/tools/hashing.py +++ b/pandas/util/hashing.py @@ -5,7 +5,7 @@ import numpy as np from pandas import Series, factorize, Categorical, Index, MultiIndex -from pandas.tools import libhashing as _hash +from pandas.util import libhashing as _hash from pandas._libs.lib import is_bool_array from pandas.core.dtypes.generic import ( ABCIndexClass, diff --git a/pandas/tools/hashing.pyx b/pandas/util/hashing.pyx similarity index 100% rename from pandas/tools/hashing.pyx rename to pandas/util/hashing.pyx diff --git a/setup.py b/setup.py index 830968768ceb2..5647e18aa227c 100755 --- a/setup.py +++ b/setup.py @@ -528,8 +528,8 @@ def pxd(name): _pxi_dep['sparse'])}, 'util.libtesting': {'pyxfile': 'util/testing', 'depends': ['pandas/util/testing.pyx']}, - 'tools.libhashing': {'pyxfile': 'tools/hashing', - 'depends': ['pandas/tools/hashing.pyx']}, + 'util.libhashing': {'pyxfile': 'util/hashing', + 'depends': ['pandas/util/hashing.pyx']}, 'io.sas.libsas': {'pyxfile': 'io/sas/sas'}, } From 1c497062925da7168cc12abc79b8464d68fd20bd Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Sat, 22 Apr 2017 00:37:45 +0200 Subject: [PATCH 445/933] DOC: document public/private subpackages in api.rst (#16087) --- doc/source/api.rst | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/doc/source/api.rst b/doc/source/api.rst index caa5498db1ebf..ab14c2758ae49 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -5,6 +5,22 @@ API Reference ************* +This page gives an overview of all public pandas objects, functions and +methods. In general, all classes and functions exposed in the top-level +``pandas.*`` namespace are regarded as public. + +Further some of the subpackages are public, including ``pandas.errors``, +``pandas.plotting``, and ``pandas.testing``. Certain functions in the the +``pandas.io`` and ``pandas.tseries`` submodules are public as well (those +mentioned in the documentation). Further, the ``pandas.api.types`` subpackage +holds some public functions related to data types in pandas. + + +.. warning:: + + The ``pandas.core``, ``pandas.compat``, and ``pandas.util`` top-level modules are considered to be PRIVATE. Stability of functionality in those modules in not guaranteed. + + .. _api.functions: Input/Output @@ -1896,3 +1912,30 @@ Testing functions testing.assert_frame_equal testing.assert_series_equal testing.assert_index_equal + + +Exceptions and warnings +~~~~~~~~~~~~~~~~~~~~~~~ + +.. autosummary:: + :toctree: generated/ + + errors.DtypeWarning + errors.EmptyDataError + errors.OutOfBoundsDatetime + errors.ParserError + errors.ParserWarning + errors.PerformanceWarning + errors.UnsortedIndexError + errors.UnsupportedFunctionCall + + +Data types related functionality +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. autosummary:: + :toctree: generated/ + + api.types.union_categoricals + api.types.infer_dtype + api.types.pandas_dtype From d313e4dd7605a658869f5d026d6705afb169ab40 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Sat, 22 Apr 2017 00:55:38 +0200 Subject: [PATCH 446/933] DOC: some editing of 0.20 whatsnew file (#16085) * DOC: some editing of 0.20 whatsnew file * feedback --- doc/source/whatsnew/v0.20.0.txt | 455 +++++++++++++++++--------------- 1 file changed, 241 insertions(+), 214 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 86d9bef636e17..b6e538f3164d8 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -3,7 +3,7 @@ v0.20.0 (May 12, 2017) ------------------------ -This is a major release from 0.19.2 and includes a small number of API changes, deprecations, new features, +This is a major release from 0.19.2 and includes a number of API changes, deprecations, new features, enhancements, and performance improvements along with a large number of bug fixes. We recommend that all users upgrade to this version. @@ -45,9 +45,9 @@ New features ^^^^^^^^^^^ Series & DataFrame have been enhanced to support the aggregation API. This is an already familiar API that -is supported for groupby, window operations, and resampling. This allows one to express, possibly multiple -aggregation operations, in a single concise way by using :meth:`~DataFrame.agg`, -and :meth:`~DataFrame.transform`. The full documentation is :ref:`here ` (:issue:`1623`) +is supported for groupby, window operations, and resampling. This allows one to express, possibly multiple, +aggregation operations in a single concise way by using :meth:`~DataFrame.agg`, +and :meth:`~DataFrame.transform`. The full documentation is :ref:`here ` (:issue:`1623`). Here is a sample @@ -149,42 +149,6 @@ Commonly called 'unix epoch' or POSIX time. This was the previous default, so th pd.to_datetime([1, 2, 3], unit='D') -.. _whatsnew_0200.enhancements.errors: - -``pandas.errors`` -^^^^^^^^^^^^^^^^^ - -We are adding a standard public module for all pandas exceptions & warnings ``pandas.errors``. (:issue:`14800`). Previously -these exceptions & warnings could be imported from ``pandas.core.common`` or ``pandas.io.common``. These exceptions and warnings -will be removed from the ``*.common`` locations in a future release. (:issue:`15541`) - -The following are now part of this API: - -.. code-block:: python - - ['DtypeWarning', - 'EmptyDataError', - 'OutOfBoundsDatetime', - 'ParserError', - 'ParserWarning', - 'PerformanceWarning', - 'UnsortedIndexError', - 'UnsupportedFunctionCall'] - - -.. _whatsnew_0200.enhancements.testing: - -``pandas.testing`` -^^^^^^^^^^^^^^^^^^ - -We are adding a standard module that exposes the public testing functions in ``pandas.testing`` (:issue:`9895`). Those functions can be used when writing tests for functionality using pandas objects. - -The following testing functions are now part of this API: - -- :func:`testing.assert_frame_equal` -- :func:`testing.assert_series_equal` -- :func:`testing.assert_index_equal` - .. _whatsnew_0200.enhancements.groupby_access: @@ -567,167 +531,10 @@ Other Enhancements Backwards incompatible API changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -.. _whatsnew_0200.api_breaking.deprecate_ix: - -Deprecate .ix -^^^^^^^^^^^^^ - -The ``.ix`` indexer is deprecated, in favor of the more strict ``.iloc`` and ``.loc`` indexers. ``.ix`` offers a lot of magic on the inference of what the user wants to do. To wit, ``.ix`` can decide to index *positionally* OR via *labels*, depending on the data type of the index. This has caused quite a bit of user confusion over the years. The full indexing documentation are :ref:`here `. (:issue:`14218`) - - -The recommended methods of indexing are: - -- ``.loc`` if you want to *label* index -- ``.iloc`` if you want to *positionally* index. - -Using ``.ix`` will now show a ``DeprecationWarning`` with a link to some examples of how to convert code :ref:`here `. - - -.. ipython:: python - - df = pd.DataFrame({'A': [1, 2, 3], - 'B': [4, 5, 6]}, - index=list('abc')) - - df - -Previous Behavior, where you wish to get the 0th and the 2nd elements from the index in the 'A' column. - -.. code-block:: ipython - - In [3]: df.ix[[0, 2], 'A'] - Out[3]: - a 1 - c 3 - Name: A, dtype: int64 - -Using ``.loc``. Here we will select the appropriate indexes from the index, then use *label* indexing. - -.. ipython:: python - - df.loc[df.index[[0, 2]], 'A'] - -Using ``.iloc``. Here we will get the location of the 'A' column, then use *positional* indexing to select things. - -.. ipython:: python - - df.iloc[[0, 2], df.columns.get_loc('A')] - - -.. _whatsnew_0200.api_breaking.deprecate_panel: - -Deprecate Panel -^^^^^^^^^^^^^^^ - -``Panel`` is deprecated and will be removed in a future version. The recommended way to represent 3-D data are -with a ``MultiIndex`` on a ``DataFrame`` via the :meth:`~Panel.to_frame` or with the `xarray package `__. Pandas -provides a :meth:`~Panel.to_xarray` method to automate this conversion. See the documentation :ref:`Deprecate Panel `. (:issue:`13563`). - -.. ipython:: python - :okwarning: - - p = tm.makePanel() - p - -Convert to a MultiIndex DataFrame - -.. ipython:: python - - p.to_frame() - -Convert to an xarray DataArray - -.. ipython:: python - - p.to_xarray() - -.. _whatsnew_0200.api_breaking.deprecate_group_agg_dict: - -Deprecate groupby.agg() with a dictionary when renaming -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -The ``.groupby(..).agg(..)``, ``.rolling(..).agg(..)``, and ``.resample(..).agg(..)`` syntax can accept a variable of inputs, including scalars, -list, and a dict of column names to scalars or lists. This provides a useful syntax for constructing multiple -(potentially different) aggregations. - -However, ``.agg(..)`` can *also* accept a dict that allows 'renaming' of the result columns. This is a complicated and confusing syntax, as well as not consistent -between ``Series`` and ``DataFrame``. We are deprecating this 'renaming' functionaility. - -- We are deprecating passing a dict to a grouped/rolled/resampled ``Series``. This allowed - one to ``rename`` the resulting aggregation, but this had a completely different - meaning than passing a dictionary to a grouped ``DataFrame``, which accepts column-to-aggregations. -- We are deprecating passing a dict-of-dicts to a grouped/rolled/resampled ``DataFrame`` in a similar manner. - -This is an illustrative example: - -.. ipython:: python - - df = pd.DataFrame({'A': [1, 1, 1, 2, 2], - 'B': range(5), - 'C': range(5)}) - df - -Here is a typical useful syntax for computing different aggregations for different columns. This -is a natural, and useful syntax. We aggregate from the dict-to-list by taking the specified -columns and applying the list of functions. This returns a ``MultiIndex`` for the columns. - -.. ipython:: python - - df.groupby('A').agg({'B': 'sum', 'C': 'min'}) - -Here's an example of the first deprecation, passing a dict to a grouped ``Series``. This -is a combination aggregation & renaming: - -.. code-block:: ipython - - In [6]: df.groupby('A').B.agg({'foo': 'count'}) - FutureWarning: using a dict on a Series for aggregation - is deprecated and will be removed in a future version - - Out[6]: - foo - A - 1 3 - 2 2 - -You can accomplish the same operation, more idiomatically by: - -.. ipython:: python - - df.groupby('A').B.agg(['count']).rename(columns={'count': 'foo'}) - - -Here's an example of the second deprecation, passing a dict-of-dict to a grouped ``DataFrame``: - -.. code-block:: python - - In [23]: (df.groupby('A') - .agg({'B': {'foo': 'sum'}, 'C': {'bar': 'min'}}) - ) - FutureWarning: using a dict with renaming is deprecated and - will be removed in a future version - - Out[23]: - B C - foo bar - A - 1 3 0 - 2 7 3 - - -You can accomplish nearly the same by: - -.. ipython:: python - - (df.groupby('A') - .agg({'B': 'sum', 'C': 'min'}) - .rename(columns={'B': 'foo', 'C': 'bar'}) - ) - .. _whatsnew.api_breaking.io_compat: -Possible incompat for HDF5 formats for pandas < 0.13.0 -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Possible incompatibility for HDF5 formats created with pandas < 0.13.0 +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ``pd.TimeSeries`` was deprecated officially in 0.17.0, though has only been an alias since 0.13.0. It has been dropped in favor of ``pd.Series``. (:issue:`15098`). @@ -1389,10 +1196,11 @@ Other API Changes - ``DataFrame`` and ``Panel`` constructors with invalid input will now raise ``ValueError`` rather than ``pandas.core.common.PandasError``, if called with scalar inputs and not axes; The exception ``PandasError`` is removed as well. (:issue:`15541`) - The exception ``pandas.core.common.AmbiguousIndexError`` is removed as it is not referenced (:issue:`15541`) + .. _whatsnew_0200.privacy: -Privacy Changes -~~~~~~~~~~~~~~~ +Reorganization of the library: Privacy Changes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. _whatsnew_0200.privacy.extensions: @@ -1400,7 +1208,7 @@ Modules Privacy Has Changed ^^^^^^^^^^^^^^^^^^^^^^^^^^^ Some formerly public python/c/c++/cython extension modules have been moved and/or renamed. These are all removed from the public API. -Furthermore, the ``pandas.core``, ``pandas.io``, and ``pandas.util`` top-level modules are now considered to be PRIVATE. +Furthermore, the ``pandas.core``, ``pandas.compat``, and ``pandas.util`` top-level modules are now considered to be PRIVATE. If indicated, a deprecation warning will be issued if you reference theses modules. (:issue:`12588`) .. csv-table:: @@ -1429,8 +1237,236 @@ If indicated, a deprecation warning will be issued if you reference theses modul "pandas._testing", "pandas.util.libtesting", "" "pandas._window", "pandas.core.libwindow", "" + +Some new subpackages are created with public functionality that is not directly +exposed in the top-level namespace: ``pandas.errors``, ``pandas.plotting`` and +``pandas.testing`` (more details below). Together with ``pandas.api.types`` and +certain functions in the ``pandas.io`` and ``pandas.tseries`` submodules, +these are now the public subpackages. + + - The function :func:`~pandas.api.types.union_categoricals` is now importable from ``pandas.api.types``, formerly from ``pandas.types.concat`` (:issue:`15998`) + +.. _whatsnew_0200.privacy.errors: + +``pandas.errors`` +^^^^^^^^^^^^^^^^^ + +We are adding a standard public module for all pandas exceptions & warnings ``pandas.errors``. (:issue:`14800`). Previously +these exceptions & warnings could be imported from ``pandas.core.common`` or ``pandas.io.common``. These exceptions and warnings +will be removed from the ``*.common`` locations in a future release. (:issue:`15541`) + +The following are now part of this API: + +.. code-block:: python + + ['DtypeWarning', + 'EmptyDataError', + 'OutOfBoundsDatetime', + 'ParserError', + 'ParserWarning', + 'PerformanceWarning', + 'UnsortedIndexError', + 'UnsupportedFunctionCall'] + + +.. _whatsnew_0200.privay.testing: + +``pandas.testing`` +^^^^^^^^^^^^^^^^^^ + +We are adding a standard module that exposes the public testing functions in ``pandas.testing`` (:issue:`9895`. Those functions can be used when writing tests for functionality using pandas objects. + +The following testing functions are now part of this API: + +- :func:`testing.assert_frame_equal` +- :func:`testing.assert_series_equal` +- :func:`testing.assert_index_equal` + + +.. _whatsnew_0200.privay.plotting: + +``pandas.plotting`` +^^^^^^^^^^^^^^^^^^^ + +A new public ``pandas.plotting`` module has been added that holds plotting functionality that was previously in either ``pandas.tools.plotting`` or in the top-level namespace. See the :ref:`deprecations sections ` for more details. + + +.. _whatsnew_0200.privacy.development: + +Other Developement Changes +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +- Building pandas for development now requires ``cython >= 0.23`` (:issue:`14831`) +- Require at least 0.23 version of cython to avoid problems with character encodings (:issue:`14699`) +- Reorganization of timeseries tests (:issue:`14854`) +- Reorganization of date converter tests (:issue:`15707`) + +.. _whatsnew_0200.deprecations: + +Deprecations +~~~~~~~~~~~~ + +.. _whatsnew_0200.api_breaking.deprecate_ix: + +Deprecate ``.ix`` +^^^^^^^^^^^^^^^^^ + +The ``.ix`` indexer is deprecated, in favor of the more strict ``.iloc`` and ``.loc`` indexers. ``.ix`` offers a lot of magic on the inference of what the user wants to do. To wit, ``.ix`` can decide to index *positionally* OR via *labels*, depending on the data type of the index. This has caused quite a bit of user confusion over the years. The full indexing documentation are :ref:`here `. (:issue:`14218`) + + +The recommended methods of indexing are: + +- ``.loc`` if you want to *label* index +- ``.iloc`` if you want to *positionally* index. + +Using ``.ix`` will now show a ``DeprecationWarning`` with a link to some examples of how to convert code :ref:`here `. + + +.. ipython:: python + + df = pd.DataFrame({'A': [1, 2, 3], + 'B': [4, 5, 6]}, + index=list('abc')) + + df + +Previous Behavior, where you wish to get the 0th and the 2nd elements from the index in the 'A' column. + +.. code-block:: ipython + + In [3]: df.ix[[0, 2], 'A'] + Out[3]: + a 1 + c 3 + Name: A, dtype: int64 + +Using ``.loc``. Here we will select the appropriate indexes from the index, then use *label* indexing. + +.. ipython:: python + + df.loc[df.index[[0, 2]], 'A'] + +Using ``.iloc``. Here we will get the location of the 'A' column, then use *positional* indexing to select things. + +.. ipython:: python + + df.iloc[[0, 2], df.columns.get_loc('A')] + + +.. _whatsnew_0200.api_breaking.deprecate_panel: + +Deprecate Panel +^^^^^^^^^^^^^^^ + +``Panel`` is deprecated and will be removed in a future version. The recommended way to represent 3-D data are +with a ``MultiIndex`` on a ``DataFrame`` via the :meth:`~Panel.to_frame` or with the `xarray package `__. Pandas +provides a :meth:`~Panel.to_xarray` method to automate this conversion. See the documentation :ref:`Deprecate Panel `. (:issue:`13563`). + +.. ipython:: python + :okwarning: + + p = tm.makePanel() + p + +Convert to a MultiIndex DataFrame + +.. ipython:: python + + p.to_frame() + +Convert to an xarray DataArray + +.. ipython:: python + + p.to_xarray() + +.. _whatsnew_0200.api_breaking.deprecate_group_agg_dict: + +Deprecate groupby.agg() with a dictionary when renaming +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The ``.groupby(..).agg(..)``, ``.rolling(..).agg(..)``, and ``.resample(..).agg(..)`` syntax can accept a variable of inputs, including scalars, +list, and a dict of column names to scalars or lists. This provides a useful syntax for constructing multiple +(potentially different) aggregations. + +However, ``.agg(..)`` can *also* accept a dict that allows 'renaming' of the result columns. This is a complicated and confusing syntax, as well as not consistent +between ``Series`` and ``DataFrame``. We are deprecating this 'renaming' functionaility. + +- We are deprecating passing a dict to a grouped/rolled/resampled ``Series``. This allowed + one to ``rename`` the resulting aggregation, but this had a completely different + meaning than passing a dictionary to a grouped ``DataFrame``, which accepts column-to-aggregations. +- We are deprecating passing a dict-of-dicts to a grouped/rolled/resampled ``DataFrame`` in a similar manner. + +This is an illustrative example: + +.. ipython:: python + + df = pd.DataFrame({'A': [1, 1, 1, 2, 2], + 'B': range(5), + 'C': range(5)}) + df + +Here is a typical useful syntax for computing different aggregations for different columns. This +is a natural, and useful syntax. We aggregate from the dict-to-list by taking the specified +columns and applying the list of functions. This returns a ``MultiIndex`` for the columns. + +.. ipython:: python + + df.groupby('A').agg({'B': 'sum', 'C': 'min'}) + +Here's an example of the first deprecation, passing a dict to a grouped ``Series``. This +is a combination aggregation & renaming: + +.. code-block:: ipython + + In [6]: df.groupby('A').B.agg({'foo': 'count'}) + FutureWarning: using a dict on a Series for aggregation + is deprecated and will be removed in a future version + + Out[6]: + foo + A + 1 3 + 2 2 + +You can accomplish the same operation, more idiomatically by: + +.. ipython:: python + + df.groupby('A').B.agg(['count']).rename(columns={'count': 'foo'}) + + +Here's an example of the second deprecation, passing a dict-of-dict to a grouped ``DataFrame``: + +.. code-block:: python + + In [23]: (df.groupby('A') + .agg({'B': {'foo': 'sum'}, 'C': {'bar': 'min'}}) + ) + FutureWarning: using a dict with renaming is deprecated and + will be removed in a future version + + Out[23]: + B C + foo bar + A + 1 3 0 + 2 7 3 + + +You can accomplish nearly the same by: + +.. ipython:: python + + (df.groupby('A') + .agg({'B': 'sum', 'C': 'min'}) + .rename(columns={'B': 'foo', 'C': 'bar'}) + ) + + + .. _whatsnew_0200.privacy.deprecate_plotting: Deprecate .plotting @@ -1456,20 +1492,11 @@ Should be changed to: pd.plotting.scatter_matrix(df) -.. _whatsnew_0200.privacy.development: -Other Developement Changes -^^^^^^^^^^^^^^^^^^^^^^^^^^ +.. _whatsnew_0200.deprecations.other: -- Building pandas for development now requires ``cython >= 0.23`` (:issue:`14831`) -- Require at least 0.23 version of cython to avoid problems with character encodings (:issue:`14699`) -- Reorganization of timeseries tests (:issue:`14854`) -- Reorganization of date converter tests (:issue:`15707`) - -.. _whatsnew_0200.deprecations: - -Deprecations -~~~~~~~~~~~~ +Other Deprecations +^^^^^^^^^^^^^^^^^^ - ``SparseArray.to_dense()`` has deprecated the ``fill`` parameter, as that parameter was not being respected (:issue:`14647`) - ``SparseSeries.to_dense()`` has deprecated the ``sparse_only`` parameter (:issue:`14647`) From f5623087f010c21d1ccd618881e308ccf3b6d1b0 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Fri, 21 Apr 2017 20:05:27 -0400 Subject: [PATCH 447/933] BUG: bug in groupby on empty frame with multi groupers (#16090) * TST: separate out groupby/test_nth * BUG: bug in groupby on empty frame with multi groupers xref #14784 closes #16064 --- doc/source/whatsnew/v0.20.0.txt | 2 +- pandas/core/indexes/multi.py | 9 +- pandas/tests/groupby/test_groupby.py | 225 +----------------------- pandas/tests/groupby/test_nth.py | 248 +++++++++++++++++++++++++++ 4 files changed, 255 insertions(+), 229 deletions(-) create mode 100644 pandas/tests/groupby/test_nth.py diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index b6e538f3164d8..99df7426aca6d 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -1627,7 +1627,7 @@ Indexing - Bug in the HTML display with with a ``MultiIndex`` and truncation (:issue:`14882`) - Bug in the display of ``.info()`` where a qualifier (+) would always be displayed with a ``MultiIndex`` that contains only non-strings (:issue:`15245`) - Bug in ``pd.concat()`` where the names of ``MultiIndex`` of resulting ``DataFrame`` are not handled correctly when ``None`` is presented in the names of ``MultiIndex`` of input ``DataFrame`` (:issue:`15787`) -- Bug in ``DataFrame.sort_index()`` and ``Series.sort_index()`` where ``na_position`` doesn't work with a ``MultiIndex`` (:issue:`14784`) +- Bug in ``DataFrame.sort_index()`` and ``Series.sort_index()`` where ``na_position`` doesn't work with a ``MultiIndex`` (:issue:`14784`, :issue:`16604`) I/O ^^^ diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index d46d2c78fbdb0..c760d2943b823 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -1645,10 +1645,11 @@ def _get_labels_for_sorting(self): """ from pandas.core.categorical import Categorical - return [Categorical.from_codes(label, - np.arange(np.array(label).max() + 1, - dtype=label.dtype), - ordered=True) + def cats(label): + return np.arange(np.array(label).max() + 1 if len(label) else 0, + dtype=label.dtype) + + return [Categorical.from_codes(label, cats(label), ordered=True) for label in self.labels] def sortlevel(self, level=0, ascending=True, sort_remaining=True): diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 25ebfef327476..752c0689b0660 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -9,7 +9,7 @@ from numpy import nan from pandas import (date_range, bdate_range, Timestamp, - isnull, Index, MultiIndex, DataFrame, Series, + Index, MultiIndex, DataFrame, Series, concat, Panel, DatetimeIndex) from pandas.errors import UnsupportedFunctionCall, PerformanceWarning from pandas.util.testing import (assert_panel_equal, assert_frame_equal, @@ -87,229 +87,6 @@ def test_select_bad_cols(self): # will have to rethink regex if you change message! g[['A', 'C']] - def test_first_last_nth(self): - # tests for first / last / nth - grouped = self.df.groupby('A') - first = grouped.first() - expected = self.df.loc[[1, 0], ['B', 'C', 'D']] - expected.index = Index(['bar', 'foo'], name='A') - expected = expected.sort_index() - assert_frame_equal(first, expected) - - nth = grouped.nth(0) - assert_frame_equal(nth, expected) - - last = grouped.last() - expected = self.df.loc[[5, 7], ['B', 'C', 'D']] - expected.index = Index(['bar', 'foo'], name='A') - assert_frame_equal(last, expected) - - nth = grouped.nth(-1) - assert_frame_equal(nth, expected) - - nth = grouped.nth(1) - expected = self.df.loc[[2, 3], ['B', 'C', 'D']].copy() - expected.index = Index(['foo', 'bar'], name='A') - expected = expected.sort_index() - assert_frame_equal(nth, expected) - - # it works! - grouped['B'].first() - grouped['B'].last() - grouped['B'].nth(0) - - self.df.loc[self.df['A'] == 'foo', 'B'] = np.nan - self.assertTrue(isnull(grouped['B'].first()['foo'])) - self.assertTrue(isnull(grouped['B'].last()['foo'])) - self.assertTrue(isnull(grouped['B'].nth(0)['foo'])) - - # v0.14.0 whatsnew - df = DataFrame([[1, np.nan], [1, 4], [5, 6]], columns=['A', 'B']) - g = df.groupby('A') - result = g.first() - expected = df.iloc[[1, 2]].set_index('A') - assert_frame_equal(result, expected) - - expected = df.iloc[[1, 2]].set_index('A') - result = g.nth(0, dropna='any') - assert_frame_equal(result, expected) - - def test_first_last_nth_dtypes(self): - - df = self.df_mixed_floats.copy() - df['E'] = True - df['F'] = 1 - - # tests for first / last / nth - grouped = df.groupby('A') - first = grouped.first() - expected = df.loc[[1, 0], ['B', 'C', 'D', 'E', 'F']] - expected.index = Index(['bar', 'foo'], name='A') - expected = expected.sort_index() - assert_frame_equal(first, expected) - - last = grouped.last() - expected = df.loc[[5, 7], ['B', 'C', 'D', 'E', 'F']] - expected.index = Index(['bar', 'foo'], name='A') - expected = expected.sort_index() - assert_frame_equal(last, expected) - - nth = grouped.nth(1) - expected = df.loc[[3, 2], ['B', 'C', 'D', 'E', 'F']] - expected.index = Index(['bar', 'foo'], name='A') - expected = expected.sort_index() - assert_frame_equal(nth, expected) - - # GH 2763, first/last shifting dtypes - idx = lrange(10) - idx.append(9) - s = Series(data=lrange(11), index=idx, name='IntCol') - self.assertEqual(s.dtype, 'int64') - f = s.groupby(level=0).first() - self.assertEqual(f.dtype, 'int64') - - def test_nth(self): - df = DataFrame([[1, np.nan], [1, 4], [5, 6]], columns=['A', 'B']) - g = df.groupby('A') - - assert_frame_equal(g.nth(0), df.iloc[[0, 2]].set_index('A')) - assert_frame_equal(g.nth(1), df.iloc[[1]].set_index('A')) - assert_frame_equal(g.nth(2), df.loc[[]].set_index('A')) - assert_frame_equal(g.nth(-1), df.iloc[[1, 2]].set_index('A')) - assert_frame_equal(g.nth(-2), df.iloc[[0]].set_index('A')) - assert_frame_equal(g.nth(-3), df.loc[[]].set_index('A')) - assert_series_equal(g.B.nth(0), df.set_index('A').B.iloc[[0, 2]]) - assert_series_equal(g.B.nth(1), df.set_index('A').B.iloc[[1]]) - assert_frame_equal(g[['B']].nth(0), - df.loc[[0, 2], ['A', 'B']].set_index('A')) - - exp = df.set_index('A') - assert_frame_equal(g.nth(0, dropna='any'), exp.iloc[[1, 2]]) - assert_frame_equal(g.nth(-1, dropna='any'), exp.iloc[[1, 2]]) - - exp['B'] = np.nan - assert_frame_equal(g.nth(7, dropna='any'), exp.iloc[[1, 2]]) - assert_frame_equal(g.nth(2, dropna='any'), exp.iloc[[1, 2]]) - - # out of bounds, regression from 0.13.1 - # GH 6621 - df = DataFrame({'color': {0: 'green', - 1: 'green', - 2: 'red', - 3: 'red', - 4: 'red'}, - 'food': {0: 'ham', - 1: 'eggs', - 2: 'eggs', - 3: 'ham', - 4: 'pork'}, - 'two': {0: 1.5456590000000001, - 1: -0.070345000000000005, - 2: -2.4004539999999999, - 3: 0.46206000000000003, - 4: 0.52350799999999997}, - 'one': {0: 0.56573799999999996, - 1: -0.9742360000000001, - 2: 1.033801, - 3: -0.78543499999999999, - 4: 0.70422799999999997}}).set_index(['color', - 'food']) - - result = df.groupby(level=0, as_index=False).nth(2) - expected = df.iloc[[-1]] - assert_frame_equal(result, expected) - - result = df.groupby(level=0, as_index=False).nth(3) - expected = df.loc[[]] - assert_frame_equal(result, expected) - - # GH 7559 - # from the vbench - df = DataFrame(np.random.randint(1, 10, (100, 2)), dtype='int64') - s = df[1] - g = df[0] - expected = s.groupby(g).first() - expected2 = s.groupby(g).apply(lambda x: x.iloc[0]) - assert_series_equal(expected2, expected, check_names=False) - self.assertTrue(expected.name, 0) - self.assertEqual(expected.name, 1) - - # validate first - v = s[g == 1].iloc[0] - self.assertEqual(expected.iloc[0], v) - self.assertEqual(expected2.iloc[0], v) - - # this is NOT the same as .first (as sorted is default!) - # as it keeps the order in the series (and not the group order) - # related GH 7287 - expected = s.groupby(g, sort=False).first() - result = s.groupby(g, sort=False).nth(0, dropna='all') - assert_series_equal(result, expected) - - # doc example - df = DataFrame([[1, np.nan], [1, 4], [5, 6]], columns=['A', 'B']) - g = df.groupby('A') - result = g.B.nth(0, dropna=True) - expected = g.B.first() - assert_series_equal(result, expected) - - # test multiple nth values - df = DataFrame([[1, np.nan], [1, 3], [1, 4], [5, 6], [5, 7]], - columns=['A', 'B']) - g = df.groupby('A') - - assert_frame_equal(g.nth(0), df.iloc[[0, 3]].set_index('A')) - assert_frame_equal(g.nth([0]), df.iloc[[0, 3]].set_index('A')) - assert_frame_equal(g.nth([0, 1]), df.iloc[[0, 1, 3, 4]].set_index('A')) - assert_frame_equal( - g.nth([0, -1]), df.iloc[[0, 2, 3, 4]].set_index('A')) - assert_frame_equal( - g.nth([0, 1, 2]), df.iloc[[0, 1, 2, 3, 4]].set_index('A')) - assert_frame_equal( - g.nth([0, 1, -1]), df.iloc[[0, 1, 2, 3, 4]].set_index('A')) - assert_frame_equal(g.nth([2]), df.iloc[[2]].set_index('A')) - assert_frame_equal(g.nth([3, 4]), df.loc[[]].set_index('A')) - - business_dates = pd.date_range(start='4/1/2014', end='6/30/2014', - freq='B') - df = DataFrame(1, index=business_dates, columns=['a', 'b']) - # get the first, fourth and last two business days for each month - key = (df.index.year, df.index.month) - result = df.groupby(key, as_index=False).nth([0, 3, -2, -1]) - expected_dates = pd.to_datetime( - ['2014/4/1', '2014/4/4', '2014/4/29', '2014/4/30', '2014/5/1', - '2014/5/6', '2014/5/29', '2014/5/30', '2014/6/2', '2014/6/5', - '2014/6/27', '2014/6/30']) - expected = DataFrame(1, columns=['a', 'b'], index=expected_dates) - assert_frame_equal(result, expected) - - def test_nth_multi_index(self): - # PR 9090, related to issue 8979 - # test nth on MultiIndex, should match .first() - grouped = self.three_group.groupby(['A', 'B']) - result = grouped.nth(0) - expected = grouped.first() - assert_frame_equal(result, expected) - - def test_nth_multi_index_as_expected(self): - # PR 9090, related to issue 8979 - # test nth on MultiIndex - three_group = DataFrame( - {'A': ['foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'bar', - 'foo', 'foo', 'foo'], - 'B': ['one', 'one', 'one', 'two', 'one', 'one', 'one', 'two', - 'two', 'two', 'one'], - 'C': ['dull', 'dull', 'shiny', 'dull', 'dull', 'shiny', 'shiny', - 'dull', 'shiny', 'shiny', 'shiny']}) - grouped = three_group.groupby(['A', 'B']) - result = grouped.nth(0) - expected = DataFrame( - {'C': ['dull', 'dull', 'dull', 'dull']}, - index=MultiIndex.from_arrays([['bar', 'bar', 'foo', 'foo'], - ['one', 'two', 'one', 'two']], - names=['A', 'B'])) - assert_frame_equal(result, expected) - def test_group_selection_cache(self): # GH 12839 nth, head, and tail should return same result consistently df = DataFrame([[1, 2], [1, 4], [5, 6]], columns=['A', 'B']) diff --git a/pandas/tests/groupby/test_nth.py b/pandas/tests/groupby/test_nth.py new file mode 100644 index 0000000000000..bf2f1f1f9cbc5 --- /dev/null +++ b/pandas/tests/groupby/test_nth.py @@ -0,0 +1,248 @@ +import numpy as np +import pandas as pd +from pandas import DataFrame, MultiIndex, Index, Series, isnull +from pandas.compat import lrange +from pandas.util import testing as tm +from pandas.util.testing import assert_frame_equal, assert_series_equal + +from .common import MixIn + + +class TestNth(MixIn, tm.TestCase): + + def test_first_last_nth(self): + # tests for first / last / nth + grouped = self.df.groupby('A') + first = grouped.first() + expected = self.df.loc[[1, 0], ['B', 'C', 'D']] + expected.index = Index(['bar', 'foo'], name='A') + expected = expected.sort_index() + assert_frame_equal(first, expected) + + nth = grouped.nth(0) + assert_frame_equal(nth, expected) + + last = grouped.last() + expected = self.df.loc[[5, 7], ['B', 'C', 'D']] + expected.index = Index(['bar', 'foo'], name='A') + assert_frame_equal(last, expected) + + nth = grouped.nth(-1) + assert_frame_equal(nth, expected) + + nth = grouped.nth(1) + expected = self.df.loc[[2, 3], ['B', 'C', 'D']].copy() + expected.index = Index(['foo', 'bar'], name='A') + expected = expected.sort_index() + assert_frame_equal(nth, expected) + + # it works! + grouped['B'].first() + grouped['B'].last() + grouped['B'].nth(0) + + self.df.loc[self.df['A'] == 'foo', 'B'] = np.nan + self.assertTrue(isnull(grouped['B'].first()['foo'])) + self.assertTrue(isnull(grouped['B'].last()['foo'])) + self.assertTrue(isnull(grouped['B'].nth(0)['foo'])) + + # v0.14.0 whatsnew + df = DataFrame([[1, np.nan], [1, 4], [5, 6]], columns=['A', 'B']) + g = df.groupby('A') + result = g.first() + expected = df.iloc[[1, 2]].set_index('A') + assert_frame_equal(result, expected) + + expected = df.iloc[[1, 2]].set_index('A') + result = g.nth(0, dropna='any') + assert_frame_equal(result, expected) + + def test_first_last_nth_dtypes(self): + + df = self.df_mixed_floats.copy() + df['E'] = True + df['F'] = 1 + + # tests for first / last / nth + grouped = df.groupby('A') + first = grouped.first() + expected = df.loc[[1, 0], ['B', 'C', 'D', 'E', 'F']] + expected.index = Index(['bar', 'foo'], name='A') + expected = expected.sort_index() + assert_frame_equal(first, expected) + + last = grouped.last() + expected = df.loc[[5, 7], ['B', 'C', 'D', 'E', 'F']] + expected.index = Index(['bar', 'foo'], name='A') + expected = expected.sort_index() + assert_frame_equal(last, expected) + + nth = grouped.nth(1) + expected = df.loc[[3, 2], ['B', 'C', 'D', 'E', 'F']] + expected.index = Index(['bar', 'foo'], name='A') + expected = expected.sort_index() + assert_frame_equal(nth, expected) + + # GH 2763, first/last shifting dtypes + idx = lrange(10) + idx.append(9) + s = Series(data=lrange(11), index=idx, name='IntCol') + self.assertEqual(s.dtype, 'int64') + f = s.groupby(level=0).first() + self.assertEqual(f.dtype, 'int64') + + def test_nth(self): + df = DataFrame([[1, np.nan], [1, 4], [5, 6]], columns=['A', 'B']) + g = df.groupby('A') + + assert_frame_equal(g.nth(0), df.iloc[[0, 2]].set_index('A')) + assert_frame_equal(g.nth(1), df.iloc[[1]].set_index('A')) + assert_frame_equal(g.nth(2), df.loc[[]].set_index('A')) + assert_frame_equal(g.nth(-1), df.iloc[[1, 2]].set_index('A')) + assert_frame_equal(g.nth(-2), df.iloc[[0]].set_index('A')) + assert_frame_equal(g.nth(-3), df.loc[[]].set_index('A')) + assert_series_equal(g.B.nth(0), df.set_index('A').B.iloc[[0, 2]]) + assert_series_equal(g.B.nth(1), df.set_index('A').B.iloc[[1]]) + assert_frame_equal(g[['B']].nth(0), + df.loc[[0, 2], ['A', 'B']].set_index('A')) + + exp = df.set_index('A') + assert_frame_equal(g.nth(0, dropna='any'), exp.iloc[[1, 2]]) + assert_frame_equal(g.nth(-1, dropna='any'), exp.iloc[[1, 2]]) + + exp['B'] = np.nan + assert_frame_equal(g.nth(7, dropna='any'), exp.iloc[[1, 2]]) + assert_frame_equal(g.nth(2, dropna='any'), exp.iloc[[1, 2]]) + + # out of bounds, regression from 0.13.1 + # GH 6621 + df = DataFrame({'color': {0: 'green', + 1: 'green', + 2: 'red', + 3: 'red', + 4: 'red'}, + 'food': {0: 'ham', + 1: 'eggs', + 2: 'eggs', + 3: 'ham', + 4: 'pork'}, + 'two': {0: 1.5456590000000001, + 1: -0.070345000000000005, + 2: -2.4004539999999999, + 3: 0.46206000000000003, + 4: 0.52350799999999997}, + 'one': {0: 0.56573799999999996, + 1: -0.9742360000000001, + 2: 1.033801, + 3: -0.78543499999999999, + 4: 0.70422799999999997}}).set_index(['color', + 'food']) + + result = df.groupby(level=0, as_index=False).nth(2) + expected = df.iloc[[-1]] + assert_frame_equal(result, expected) + + result = df.groupby(level=0, as_index=False).nth(3) + expected = df.loc[[]] + assert_frame_equal(result, expected) + + # GH 7559 + # from the vbench + df = DataFrame(np.random.randint(1, 10, (100, 2)), dtype='int64') + s = df[1] + g = df[0] + expected = s.groupby(g).first() + expected2 = s.groupby(g).apply(lambda x: x.iloc[0]) + assert_series_equal(expected2, expected, check_names=False) + self.assertTrue(expected.name, 0) + self.assertEqual(expected.name, 1) + + # validate first + v = s[g == 1].iloc[0] + self.assertEqual(expected.iloc[0], v) + self.assertEqual(expected2.iloc[0], v) + + # this is NOT the same as .first (as sorted is default!) + # as it keeps the order in the series (and not the group order) + # related GH 7287 + expected = s.groupby(g, sort=False).first() + result = s.groupby(g, sort=False).nth(0, dropna='all') + assert_series_equal(result, expected) + + # doc example + df = DataFrame([[1, np.nan], [1, 4], [5, 6]], columns=['A', 'B']) + g = df.groupby('A') + result = g.B.nth(0, dropna=True) + expected = g.B.first() + assert_series_equal(result, expected) + + # test multiple nth values + df = DataFrame([[1, np.nan], [1, 3], [1, 4], [5, 6], [5, 7]], + columns=['A', 'B']) + g = df.groupby('A') + + assert_frame_equal(g.nth(0), df.iloc[[0, 3]].set_index('A')) + assert_frame_equal(g.nth([0]), df.iloc[[0, 3]].set_index('A')) + assert_frame_equal(g.nth([0, 1]), df.iloc[[0, 1, 3, 4]].set_index('A')) + assert_frame_equal( + g.nth([0, -1]), df.iloc[[0, 2, 3, 4]].set_index('A')) + assert_frame_equal( + g.nth([0, 1, 2]), df.iloc[[0, 1, 2, 3, 4]].set_index('A')) + assert_frame_equal( + g.nth([0, 1, -1]), df.iloc[[0, 1, 2, 3, 4]].set_index('A')) + assert_frame_equal(g.nth([2]), df.iloc[[2]].set_index('A')) + assert_frame_equal(g.nth([3, 4]), df.loc[[]].set_index('A')) + + business_dates = pd.date_range(start='4/1/2014', end='6/30/2014', + freq='B') + df = DataFrame(1, index=business_dates, columns=['a', 'b']) + # get the first, fourth and last two business days for each month + key = (df.index.year, df.index.month) + result = df.groupby(key, as_index=False).nth([0, 3, -2, -1]) + expected_dates = pd.to_datetime( + ['2014/4/1', '2014/4/4', '2014/4/29', '2014/4/30', '2014/5/1', + '2014/5/6', '2014/5/29', '2014/5/30', '2014/6/2', '2014/6/5', + '2014/6/27', '2014/6/30']) + expected = DataFrame(1, columns=['a', 'b'], index=expected_dates) + assert_frame_equal(result, expected) + + def test_nth_multi_index(self): + # PR 9090, related to issue 8979 + # test nth on MultiIndex, should match .first() + grouped = self.three_group.groupby(['A', 'B']) + result = grouped.nth(0) + expected = grouped.first() + assert_frame_equal(result, expected) + + def test_nth_multi_index_as_expected(self): + # PR 9090, related to issue 8979 + # test nth on MultiIndex + three_group = DataFrame( + {'A': ['foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'bar', + 'foo', 'foo', 'foo'], + 'B': ['one', 'one', 'one', 'two', 'one', 'one', 'one', 'two', + 'two', 'two', 'one'], + 'C': ['dull', 'dull', 'shiny', 'dull', 'dull', 'shiny', 'shiny', + 'dull', 'shiny', 'shiny', 'shiny']}) + grouped = three_group.groupby(['A', 'B']) + result = grouped.nth(0) + expected = DataFrame( + {'C': ['dull', 'dull', 'dull', 'dull']}, + index=MultiIndex.from_arrays([['bar', 'bar', 'foo', 'foo'], + ['one', 'two', 'one', 'two']], + names=['A', 'B'])) + assert_frame_equal(result, expected) + + +def test_nth_empty(): + # GH 16064 + df = DataFrame(index=[0], columns=['a', 'b', 'c']) + result = df.groupby('a').nth(10) + expected = DataFrame(index=Index([], name='a'), columns=['b', 'c']) + assert_frame_equal(result, expected) + + result = df.groupby(['a', 'b']).nth(10) + expected = DataFrame(index=MultiIndex([[], []], [[], []], + names=['a', 'b']), + columns=['c']) + assert_frame_equal(result, expected) From f0bd908336a260cafa9d83c8244dd1a0a056f72d Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Fri, 21 Apr 2017 21:28:27 -0400 Subject: [PATCH 448/933] BUG: groupby-rolling with a timedelta (#16091) closes #13966 xref to #15130, closed by #15175 --- doc/source/whatsnew/v0.20.0.txt | 2 +- pandas/tests/test_window.py | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 99df7426aca6d..781e90a39e48f 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -1677,7 +1677,7 @@ Groupby/Resample/Rolling - Bug in ``.groupby(..).resample()`` when passed the ``on=`` kwarg. (:issue:`15021`) - Properly set ``__name__`` and ``__qualname__`` for ``Groupby.*`` functions (:issue:`14620`) - Bug in ``GroupBy.get_group()`` failing with a categorical grouper (:issue:`15155`) -- Bug in ``.groupby(...).rolling(...)`` when ``on`` is specified and using a ``DatetimeIndex`` (:issue:`15130`) +- Bug in ``.groupby(...).rolling(...)`` when ``on`` is specified and using a ``DatetimeIndex`` (:issue:`15130`, :issue:`13966`) - Bug in groupby operations with ``timedelta64`` when passing ``numeric_only=False`` (:issue:`5724`) - Bug in ``groupby.apply()`` coercing ``object`` dtypes to numeric types, when not all values were numeric (:issue:`14423`, :issue:`15421`, :issue:`15670`) - Bug in ``resample``, where a non-string ``loffset`` argument would not be applied when resampling a timeseries (:issue:`13218`) diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index 101ecec783533..aea2525a3a1f9 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -3782,3 +3782,21 @@ def test_groupby_monotonic(self): lambda x: x.rolling('180D')['amount'].sum()) result = df.groupby('name').rolling('180D', on='date')['amount'].sum() tm.assert_series_equal(result, expected) + + def test_non_monotonic(self): + # GH 13966 (similar to #15130, closed by #15175) + + dates = pd.date_range(start='2016-01-01 09:30:00', + periods=20, freq='s') + df = pd.DataFrame({'A': [1] * 20 + [2] * 12 + [3] * 8, + 'B': np.concatenate((dates, dates)), + 'C': np.arange(40)}) + + result = df.groupby('A').rolling('4s', on='B').C.mean() + expected = df.set_index('B').groupby('A').apply( + lambda x: x.rolling('4s')['C'].mean()) + tm.assert_series_equal(result, expected) + + df2 = df.sort_values('B') + result = df2.groupby('A').rolling('4s', on='B').C.mean() + tm.assert_series_equal(result, expected) From c728a227840a550f4dfe1b9a43a901e384fc23fb Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Fri, 21 Apr 2017 22:15:46 -0400 Subject: [PATCH 449/933] TST: catch warnings in test_css (#16094) closes #16033 --- pandas/tests/io/formats/test_css.py | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/pandas/tests/io/formats/test_css.py b/pandas/tests/io/formats/test_css.py index 904a333926c33..44f95266b6c78 100644 --- a/pandas/tests/io/formats/test_css.py +++ b/pandas/tests/io/formats/test_css.py @@ -1,5 +1,6 @@ import pytest +from pandas.util import testing as tm from pandas.io.formats.css import CSSResolver, CSSWarning @@ -35,20 +36,23 @@ def test_css_parse_comments(): @pytest.mark.xfail(reason='''we don't need to handle specificity - markers like !important, but we should - ignore them in the future''') + markers like !important, but we should + ignore them in the future''') def test_css_parse_specificity(): assert_same_resolution('font-weight: bold', 'font-weight: bold !important') @pytest.mark.xfail(reason='Splitting CSS declarations not yet sensitive to ' - '; in CSS strings') + '; in CSS strings') def test_css_parse_strings(): # semicolons in strings - assert_resolves('background-image: url(\'http://blah.com/foo?a;b=c\')', - {'background-image': 'url(\'http://blah.com/foo?a;b=c\')'}) - assert_resolves('background-image: url("http://blah.com/foo?a;b=c")', - {'background-image': 'url("http://blah.com/foo?a;b=c")'}) + with tm.assert_produces_warning(CSSWarning): + assert_resolves( + 'background-image: url(\'http://blah.com/foo?a;b=c\')', + {'background-image': 'url(\'http://blah.com/foo?a;b=c\')'}) + assert_resolves( + 'background-image: url("http://blah.com/foo?a;b=c")', + {'background-image': 'url("http://blah.com/foo?a;b=c")'}) @pytest.mark.parametrize( @@ -77,7 +81,7 @@ def test_css_parse_strings(): ('font-size: 10 pt', 'font-size: 1em'), ]) def test_css_parse_invalid(invalid_css, remainder): - with pytest.warns(CSSWarning): + with tm.assert_produces_warning(CSSWarning): assert_same_resolution(invalid_css, remainder) # TODO: we should be checking that in other cases no warnings are raised @@ -115,7 +119,7 @@ def test_css_side_shorthands(shorthand, expansions): {top: '1pt', right: '4pt', bottom: '2pt', left: '0pt'}) - with pytest.warns(CSSWarning): + with tm.assert_produces_warning(CSSWarning): assert_resolves('%s: 1pt 1pt 1pt 1pt 1pt' % shorthand, {}) From 5b07d02bbad8fb13388e12e7797e4b022c02b2b5 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Fri, 21 Apr 2017 22:18:11 -0400 Subject: [PATCH 450/933] API: raise notImplementedError on to_json(orient='table') for a DataFrame with a column MultiIndex (#16095) closes #15996 --- pandas/io/json/json.py | 7 ++++++- pandas/tests/io/formats/test_printing.py | 17 ++++++++++++++++- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/pandas/io/json/json.py b/pandas/io/json/json.py index 7149ab497a00d..28ea8298cee9e 100644 --- a/pandas/io/json/json.py +++ b/pandas/io/json/json.py @@ -6,7 +6,7 @@ from pandas._libs.tslib import iNaT from pandas.compat import StringIO, long, u from pandas import compat, isnull -from pandas import Series, DataFrame, to_datetime +from pandas import Series, DataFrame, to_datetime, MultiIndex from pandas.io.common import get_filepath_or_buffer, _get_handle from pandas.core.common import AbstractMethodError from pandas.io.formats.printing import pprint_thing @@ -138,6 +138,11 @@ def __init__(self, obj, orient, date_format, double_precision, self.schema = build_table_schema(obj) + # NotImplementd on a column MultiIndex + if obj.ndim == 2 and isinstance(obj.columns, MultiIndex): + raise NotImplementedError( + "orient='table' is not supported for MultiIndex") + # TODO: Do this timedelta properly in objToJSON.c See GH #15137 if ((obj.ndim == 1) and (obj.name in set(obj.index.names)) or len(obj.columns & obj.index.names)): diff --git a/pandas/tests/io/formats/test_printing.py b/pandas/tests/io/formats/test_printing.py index d2c3b47aba042..882c9b9bd42f6 100644 --- a/pandas/tests/io/formats/test_printing.py +++ b/pandas/tests/io/formats/test_printing.py @@ -1,7 +1,10 @@ # -*- coding: utf-8 -*- import pytest -from pandas import compat + +import numpy as np import pandas as pd + +from pandas import compat import pandas.io.formats.printing as printing import pandas.io.formats.format as fmt import pandas.util.testing as tm @@ -166,6 +169,18 @@ def test_publishes(self): 'application/vnd.dataresource+json'} self.assertEqual(set(arg.keys()), expected) + def test_publishes_not_implemented(self): + # column MultiIndex + # GH 15996 + midx = pd.MultiIndex.from_product([['A', 'B'], ['a', 'b', 'c']]) + df = pd.DataFrame(np.random.randn(5, len(midx)), columns=midx) + + make_patch = self.mock.patch('IPython.display.display') + opt = pd.option_context('display.html.table_schema', True) + with opt, make_patch as mock_display: # noqa + with pytest.raises(NotImplementedError): + df._ipython_display_() + def test_config_on(self): df = pd.DataFrame({"A": [1, 2]}) with pd.option_context("display.html.table_schema", True): From c847884c9726eb917a540822430aaf2036174b72 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Fri, 21 Apr 2017 22:58:17 -0400 Subject: [PATCH 451/933] BUG: fix degenerate MultiIndex sorting (#16092) xref #15694 closes #15797 --- doc/source/whatsnew/v0.20.0.txt | 2 +- pandas/core/frame.py | 3 +++ pandas/core/indexes/base.py | 4 ++++ pandas/core/reshape/reshape.py | 9 ++++++++- pandas/core/series.py | 1 + pandas/tests/test_multilevel.py | 33 +++++++++++++++++++-------------- 6 files changed, 36 insertions(+), 16 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 781e90a39e48f..945922b5f9ba8 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -870,7 +870,7 @@ DataFrame.sort_index changes ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ In certain cases, calling ``.sort_index()`` on a MultiIndexed DataFrame would return the *same* DataFrame without seeming to sort. -This would happen with a ``lexsorted``, but non-monotonic levels. (:issue:`15622`, :issue:`15687`, :issue:`14015`, :issue:`13431`) +This would happen with a ``lexsorted``, but non-monotonic levels. (:issue:`15622`, :issue:`15687`, :issue:`14015`, :issue:`13431`, :issue:`15797`) This is *unchanged* from prior versions, but shown for illustration purposes: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index dd2b975560186..b3da897b97e5c 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3364,6 +3364,9 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False, axis=baxis, convert=False, verify=False) + # reconstruct axis if needed + new_data.axes[baxis] = new_data.axes[baxis]._sort_levels_monotonic() + if inplace: return self._update_inplace(new_data) else: diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index dcb9f9a144f39..04458d684d795 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -465,6 +465,10 @@ def _update_inplace(self, result, **kwargs): # guard when called from IndexOpsMixin raise TypeError("Index can't be updated inplace") + def _sort_levels_monotonic(self): + """ compat with MultiIndex """ + return self + _index_shared_docs['_get_grouper_for_level'] = """ Get index grouper corresponding to an index level diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index bfd5320af13fb..a3cf80d758b7b 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -650,8 +650,15 @@ def _convert_level_number(level_num, columns): drop_cols = [] for key in unique_groups: loc = this.columns.get_loc(key) - slice_len = loc.stop - loc.start + # can make more efficient? + # we almost always return a slice + # but if unsorted can get a boolean + # indexer + if not isinstance(loc, slice): + slice_len = len(loc) + else: + slice_len = loc.stop - loc.start if slice_len == 0: drop_cols.append(key) diff --git a/pandas/core/series.py b/pandas/core/series.py index e0364ad629c5d..d4511fb58b2f3 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1773,6 +1773,7 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False, indexer = _ensure_platform_int(indexer) new_index = index.take(indexer) + new_index = new_index._sort_levels_monotonic() new_values = self._values.take(indexer) result = self._constructor(new_values, index=new_index) diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index d7ba7f1c6fac6..a7a80c635a364 100755 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -11,7 +11,6 @@ from pandas.core.index import Index, MultiIndex from pandas import Panel, DataFrame, Series, notnull, isnull, Timestamp -from pandas.core.common import UnsortedIndexError from pandas.core.dtypes.common import is_float_dtype, is_integer_dtype import pandas.core.common as com import pandas.util.testing as tm @@ -938,7 +937,7 @@ def test_stack_mixed_dtype(self): df = df.sort_index(level=1, axis=1) stacked = df.stack() - result = df['foo'].stack() + result = df['foo'].stack().sort_index() tm.assert_series_equal(stacked['foo'], result, check_names=False) self.assertIs(result.name, None) self.assertEqual(stacked['bar'].dtype, np.float_) @@ -2456,11 +2455,11 @@ def test_frame_getitem_not_sorted2(self): assert df2_original.index.equals(df2.index) expected = df2.sort_index() - assert not expected.index.is_lexsorted() + assert expected.index.is_lexsorted() assert expected.index.is_monotonic result = df2.sort_index(level=0) - assert not result.index.is_lexsorted() + assert result.index.is_lexsorted() assert result.index.is_monotonic tm.assert_frame_equal(result, expected) @@ -2536,8 +2535,7 @@ def test_sort_index_and_reconstruction(self): concatted = pd.concat([df, df], keys=[0.8, 0.5]) result = concatted.sort_index() - # this will be monotonic, but not lexsorted! - assert not result.index.is_lexsorted() + assert result.index.is_lexsorted() assert result.index.is_monotonic tm.assert_frame_equal(result, expected) @@ -2576,7 +2574,7 @@ def test_sort_index_and_reconstruction_doc_example(self): levels=[['a', 'b'], ['aa', 'bb']], labels=[[0, 0, 1, 1], [0, 1, 0, 1]])) result = df.sort_index() - assert not result.index.is_lexsorted() + assert result.index.is_lexsorted() assert result.index.is_monotonic tm.assert_frame_equal(result, expected) @@ -2618,22 +2616,29 @@ def my_func(group): def test_sort_non_lexsorted(self): # degenerate case where we sort but don't # have a satisfying result :< - + # GH 15797 idx = MultiIndex([['A', 'B', 'C'], ['c', 'b', 'a']], [[0, 1, 2, 0, 1, 2], [0, 2, 1, 1, 0, 2]]) - df = DataFrame({'col': range(len(idx))}, index=idx) + df = DataFrame({'col': range(len(idx))}, + index=idx, + dtype='int64') assert df.index.is_lexsorted() is False assert df.index.is_monotonic is False - result = df.sort_index() - assert result.index.is_lexsorted() is False - assert result.index.is_monotonic is True + sorted = df.sort_index() + assert sorted.index.is_lexsorted() is True + assert sorted.index.is_monotonic is True - with pytest.raises(UnsortedIndexError): - result.loc[pd.IndexSlice['B':'C', 'a':'c'], :] + expected = DataFrame( + {'col': [1, 4, 5, 2]}, + index=MultiIndex.from_tuples([('B', 'a'), ('B', 'c'), + ('C', 'a'), ('C', 'b')]), + dtype='int64') + result = sorted.loc[pd.IndexSlice['B':'C', 'a':'c'], :] + tm.assert_frame_equal(result, expected) def test_sort_index_nan(self): # GH 14784 From 19fc8dac68e088126ffd132dc322dbf8a163ec69 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 21 Apr 2017 22:33:24 -0500 Subject: [PATCH 452/933] RLS: v0.20.0rc1 From 1c806113b17f71e92a6eeb6de68446e478a1f909 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Sun, 23 Apr 2017 09:57:45 -0400 Subject: [PATCH 453/933] MAINT: Remove assertNotIn from testing (#16096) * MAINT: Refactor _AssertRaisesContextManager Rewrite _AssertRaisesContextManager with more documentation and remove vestigial assertRaises. Follow-up to gh-16089. * MAINT: Remove assertNotIn from testing --- pandas/tests/frame/test_alter_axes.py | 15 ++-- pandas/tests/frame/test_analytics.py | 4 +- .../tests/frame/test_axis_select_reindex.py | 66 +++++++------- pandas/tests/frame/test_constructors.py | 26 +++--- pandas/tests/frame/test_mutate_columns.py | 17 ++-- pandas/tests/indexes/test_base.py | 4 +- pandas/tests/indexes/test_interval.py | 29 +++--- pandas/tests/indexes/test_multi.py | 16 ++-- pandas/tests/indexes/timedeltas/test_ops.py | 4 +- pandas/tests/indexing/test_indexing_slow.py | 4 +- pandas/tests/io/formats/test_format.py | 8 +- pandas/tests/io/formats/test_to_html.py | 22 ++--- pandas/tests/io/parser/parse_dates.py | 48 +++++----- pandas/tests/io/test_pytables.py | 72 +++++++-------- pandas/tests/scalar/test_interval.py | 16 ++-- pandas/tests/scalar/test_timestamp.py | 44 ++++----- pandas/tests/series/test_indexing.py | 20 ++--- pandas/tests/series/test_repr.py | 19 ++-- pandas/tests/sparse/test_frame.py | 26 +++--- pandas/tests/test_base.py | 5 +- pandas/tests/test_panel.py | 28 +++--- pandas/tests/test_panel4d.py | 40 ++++----- pandas/util/testing.py | 89 +++++++++++++------ 23 files changed, 328 insertions(+), 294 deletions(-) diff --git a/pandas/tests/frame/test_alter_axes.py b/pandas/tests/frame/test_alter_axes.py index f925022b6bd7f..f32e001ea984a 100644 --- a/pandas/tests/frame/test_alter_axes.py +++ b/pandas/tests/frame/test_alter_axes.py @@ -500,16 +500,16 @@ def test_rename_nocopy(self): def test_rename_inplace(self): self.frame.rename(columns={'C': 'foo'}) - self.assertIn('C', self.frame) - self.assertNotIn('foo', self.frame) + assert 'C' in self.frame + assert 'foo' not in self.frame c_id = id(self.frame['C']) frame = self.frame.copy() frame.rename(columns={'C': 'foo'}, inplace=True) - self.assertNotIn('C', frame) - self.assertIn('foo', frame) - self.assertNotEqual(id(frame['foo']), c_id) + assert 'C' not in frame + assert 'foo' in frame + assert id(frame['foo']) != c_id def test_rename_bug(self): # GH 5344 @@ -778,8 +778,9 @@ def test_set_index_names(self): def test_rename_objects(self): renamed = self.mixed_frame.rename(columns=str.upper) - self.assertIn('FOO', renamed) - self.assertNotIn('foo', renamed) + + assert 'FOO' in renamed + assert 'foo' not in renamed def test_assign_columns(self): self.frame['hi'] = 'there' diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index 703b93b9ec950..cd98460d8609c 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -190,10 +190,10 @@ def test_corrwith(self): dropped = a.corrwith(b, axis=0, drop=True) tm.assert_almost_equal(dropped['A'], a['A'].corr(b['A'])) - self.assertNotIn('B', dropped) + assert 'B' not in dropped dropped = a.corrwith(b, axis=1, drop=True) - self.assertNotIn(a.index[-1], dropped.index) + assert a.index[-1] not in dropped.index # non time-series data index = ['a', 'b', 'c', 'd', 'e'] diff --git a/pandas/tests/frame/test_axis_select_reindex.py b/pandas/tests/frame/test_axis_select_reindex.py index 2f914472a1152..5b3a0a9e01f35 100644 --- a/pandas/tests/frame/test_axis_select_reindex.py +++ b/pandas/tests/frame/test_axis_select_reindex.py @@ -14,9 +14,7 @@ date_range, isnull) import pandas as pd -from pandas.util.testing import (assert_series_equal, - assert_frame_equal, - assertRaisesRegexp) +from pandas.util.testing import assert_frame_equal from pandas.errors import PerformanceWarning import pandas.util.testing as tm @@ -287,15 +285,15 @@ def test_reindex_like(self): assert_frame_equal(other, self.frame.reindex_like(other)) def test_reindex_columns(self): - newFrame = self.frame.reindex(columns=['A', 'B', 'E']) + new_frame = self.frame.reindex(columns=['A', 'B', 'E']) - assert_series_equal(newFrame['B'], self.frame['B']) - self.assertTrue(np.isnan(newFrame['E']).all()) - self.assertNotIn('C', newFrame) + tm.assert_series_equal(new_frame['B'], self.frame['B']) + assert np.isnan(new_frame['E']).all() + assert 'C' not in new_frame - # length zero - newFrame = self.frame.reindex(columns=[]) - self.assertTrue(newFrame.empty) + # Length zero + new_frame = self.frame.reindex(columns=[]) + assert new_frame.empty def test_reindex_columns_method(self): @@ -659,33 +657,33 @@ def test_align_series_combinations(self): tm.assert_frame_equal(res2, exp1) def test_filter(self): - # items + # Items filtered = self.frame.filter(['A', 'B', 'E']) - self.assertEqual(len(filtered.columns), 2) - self.assertNotIn('E', filtered) + assert len(filtered.columns) == 2 + assert 'E' not in filtered filtered = self.frame.filter(['A', 'B', 'E'], axis='columns') - self.assertEqual(len(filtered.columns), 2) - self.assertNotIn('E', filtered) + assert len(filtered.columns) == 2 + assert 'E' not in filtered - # other axis + # Other axis idx = self.frame.index[0:4] filtered = self.frame.filter(idx, axis='index') expected = self.frame.reindex(index=idx) - assert_frame_equal(filtered, expected) + tm.assert_frame_equal(filtered, expected) # like fcopy = self.frame.copy() fcopy['AA'] = 1 filtered = fcopy.filter(like='A') - self.assertEqual(len(filtered.columns), 2) - self.assertIn('AA', filtered) + assert len(filtered.columns) == 2 + assert 'AA' in filtered # like with ints in column names df = DataFrame(0., index=[0, 1, 2], columns=[0, 1, '_A', '_B']) filtered = df.filter(like='_') - self.assertEqual(len(filtered.columns), 2) + assert len(filtered.columns) == 2 # regex with ints in column names # from PR #10384 @@ -693,41 +691,41 @@ def test_filter(self): expected = DataFrame( 0., index=[0, 1, 2], columns=pd.Index([1, 2], dtype=object)) filtered = df.filter(regex='^[0-9]+$') - assert_frame_equal(filtered, expected) + tm.assert_frame_equal(filtered, expected) expected = DataFrame(0., index=[0, 1, 2], columns=[0, '0', 1, '1']) # shouldn't remove anything filtered = expected.filter(regex='^[0-9]+$') - assert_frame_equal(filtered, expected) + tm.assert_frame_equal(filtered, expected) # pass in None - with assertRaisesRegexp(TypeError, 'Must pass'): + with tm.assertRaisesRegexp(TypeError, 'Must pass'): self.frame.filter() - with assertRaisesRegexp(TypeError, 'Must pass'): + with tm.assertRaisesRegexp(TypeError, 'Must pass'): self.frame.filter(items=None) - with assertRaisesRegexp(TypeError, 'Must pass'): + with tm.assertRaisesRegexp(TypeError, 'Must pass'): self.frame.filter(axis=1) # test mutually exclusive arguments - with assertRaisesRegexp(TypeError, 'mutually exclusive'): + with tm.assertRaisesRegexp(TypeError, 'mutually exclusive'): self.frame.filter(items=['one', 'three'], regex='e$', like='bbi') - with assertRaisesRegexp(TypeError, 'mutually exclusive'): + with tm.assertRaisesRegexp(TypeError, 'mutually exclusive'): self.frame.filter(items=['one', 'three'], regex='e$', axis=1) - with assertRaisesRegexp(TypeError, 'mutually exclusive'): + with tm.assertRaisesRegexp(TypeError, 'mutually exclusive'): self.frame.filter(items=['one', 'three'], regex='e$') - with assertRaisesRegexp(TypeError, 'mutually exclusive'): + with tm.assertRaisesRegexp(TypeError, 'mutually exclusive'): self.frame.filter(items=['one', 'three'], like='bbi', axis=0) - with assertRaisesRegexp(TypeError, 'mutually exclusive'): + with tm.assertRaisesRegexp(TypeError, 'mutually exclusive'): self.frame.filter(items=['one', 'three'], like='bbi') # objects filtered = self.mixed_frame.filter(like='foo') - self.assertIn('foo', filtered) + assert 'foo' in filtered # unicode columns, won't ascii-encode df = self.frame.rename(columns={'B': u('\u2202')}) filtered = df.filter(like='C') - self.assertTrue('C' in filtered) + assert 'C' in filtered def test_filter_regex_search(self): fcopy = self.frame.copy() @@ -857,10 +855,10 @@ def test_reindex_boolean(self): def test_reindex_objects(self): reindexed = self.mixed_frame.reindex(columns=['foo', 'A', 'B']) - self.assertIn('foo', reindexed) + assert 'foo' in reindexed reindexed = self.mixed_frame.reindex(columns=['A', 'B']) - self.assertNotIn('foo', reindexed) + assert 'foo' not in reindexed def test_reindex_corner(self): index = Index(['a', 'b', 'c']) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index d253fc2049462..3bcc058316f77 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -210,8 +210,8 @@ def test_constructor_dict(self): 'col2': self.ts2}) # col2 is padded with NaN - self.assertEqual(len(self.ts1), 30) - self.assertEqual(len(self.ts2), 25) + assert len(self.ts1) == 30 + assert len(self.ts2) == 25 tm.assert_series_equal(self.ts1, frame['col1'], check_names=False) @@ -223,12 +223,12 @@ def test_constructor_dict(self): 'col2': self.ts2}, columns=['col2', 'col3', 'col4']) - self.assertEqual(len(frame), len(self.ts2)) - self.assertNotIn('col1', frame) - self.assertTrue(isnull(frame['col3']).all()) + assert len(frame) == len(self.ts2) + assert 'col1' not in frame + assert isnull(frame['col3']).all() # Corner cases - self.assertEqual(len(DataFrame({})), 0) + assert len(DataFrame({})) == 0 # mix dict and array, wrong size - no spec for which error should raise # first @@ -242,14 +242,14 @@ def test_constructor_dict(self): # empty dict plus index idx = Index([0, 1, 2]) frame = DataFrame({}, index=idx) - self.assertIs(frame.index, idx) + assert frame.index is idx # empty with index and columns idx = Index([0, 1, 2]) frame = DataFrame({}, index=idx, columns=idx) - self.assertIs(frame.index, idx) - self.assertIs(frame.columns, idx) - self.assertEqual(len(frame._series), 3) + assert frame.index is idx + assert frame.columns is idx + assert len(frame._series) == 3 # with dict of empty list and Series frame = DataFrame({'A': [], 'B': []}, columns=['A', 'B']) @@ -1533,11 +1533,11 @@ def test_from_records_to_records(self): # what to do? records = indexed_frame.to_records() - self.assertEqual(len(records.dtype.names), 3) + assert len(records.dtype.names) == 3 records = indexed_frame.to_records(index=False) - self.assertEqual(len(records.dtype.names), 2) - self.assertNotIn('index', records.dtype.names) + assert len(records.dtype.names) == 2 + assert 'index' not in records.dtype.names def test_from_records_nones(self): tuples = [(1, 2, None, 3), diff --git a/pandas/tests/frame/test_mutate_columns.py b/pandas/tests/frame/test_mutate_columns.py index a7da704e73764..dfaeaea49cf75 100644 --- a/pandas/tests/frame/test_mutate_columns.py +++ b/pandas/tests/frame/test_mutate_columns.py @@ -7,9 +7,7 @@ from pandas import DataFrame, Series, Index, MultiIndex -from pandas.util.testing import (assert_series_equal, - assert_frame_equal, - assertRaisesRegexp) +from pandas.util.testing import assert_frame_equal, assertRaisesRegexp import pandas.util.testing as tm @@ -163,7 +161,7 @@ def test_insert(self): def test_delitem(self): del self.frame['A'] - self.assertNotIn('A', self.frame) + assert 'A' not in self.frame def test_delitem_multiindex(self): midx = MultiIndex.from_product([['A', 'B'], [1, 2]]) @@ -194,15 +192,14 @@ def test_pop(self): self.frame.columns.name = 'baz' self.frame.pop('A') - self.assertNotIn('A', self.frame) + assert 'A' not in self.frame self.frame['foo'] = 'bar' self.frame.pop('foo') - self.assertNotIn('foo', self.frame) + assert 'foo' not in self.frame # TODO self.assertEqual(self.frame.columns.name, 'baz') - # 10912 - # inplace ops cause caching issue + # gh-10912: inplace ops cause caching issue a = DataFrame([[1, 2, 3], [4, 5, 6]], columns=[ 'A', 'B', 'C'], index=['X', 'Y']) b = a.pop('B') @@ -211,11 +208,11 @@ def test_pop(self): # original frame expected = DataFrame([[1, 3], [4, 6]], columns=[ 'A', 'C'], index=['X', 'Y']) - assert_frame_equal(a, expected) + tm.assert_frame_equal(a, expected) # result expected = Series([2, 5], index=['X', 'Y'], name='B') + 1 - assert_series_equal(b, expected) + tm.assert_series_equal(b, expected) def test_pop_non_unique_cols(self): df = DataFrame({0: [0, 1], 1: [0, 1], 2: [4, 5]}) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 695d3aa7ebe3a..9a166aa3340e3 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -851,8 +851,8 @@ def test_add_string(self): index = Index(['a', 'b', 'c']) index2 = index + 'foo' - self.assertNotIn('a', index2) - self.assertIn('afoo', index2) + assert 'a' not in index2 + assert 'afoo' in index2 def test_iadd_string(self): index = pd.Index(['a', 'b', 'c']) diff --git a/pandas/tests/indexes/test_interval.py b/pandas/tests/indexes/test_interval.py index 53b5c01c40899..c34d93cb21b0f 100644 --- a/pandas/tests/indexes/test_interval.py +++ b/pandas/tests/indexes/test_interval.py @@ -458,20 +458,20 @@ def test_get_indexer_subintervals(self): tm.assert_numpy_array_equal(actual, expected) def test_contains(self): - # only endpoints are valid + # Only endpoints are valid. i = IntervalIndex.from_arrays([0, 1], [1, 2]) - # invalid - self.assertNotIn(0, i) - self.assertNotIn(1, i) - self.assertNotIn(2, i) + # Invalid + assert 0 not in i + assert 1 not in i + assert 2 not in i - # valid - self.assertIn(Interval(0, 1), i) - self.assertIn(Interval(0, 2), i) - self.assertIn(Interval(0, 0.5), i) - self.assertNotIn(Interval(3, 5), i) - self.assertNotIn(Interval(-1, 0, closed='left'), i) + # Valid + assert Interval(0, 1) in i + assert Interval(0, 2) in i + assert Interval(0, 0.5) in i + assert Interval(3, 5) not in i + assert Interval(-1, 0, closed='left') not in i def testcontains(self): # can select values that are IN the range of a value @@ -509,7 +509,7 @@ def test_non_contiguous(self): expected = np.array([0, -1, 1], dtype='intp') tm.assert_numpy_array_equal(actual, expected) - self.assertNotIn(1.5, index) + assert 1.5 not in index def test_union(self): other = IntervalIndex.from_arrays([2], [3]) @@ -651,11 +651,12 @@ def test_datetime(self): expected = pd.date_range('2000-01-01T12:00', periods=2) tm.assert_index_equal(idx.mid, expected) - self.assertNotIn(pd.Timestamp('2000-01-01T12'), idx) - self.assertNotIn(pd.Timestamp('2000-01-01T12'), idx) + assert pd.Timestamp('2000-01-01T12') not in idx + assert pd.Timestamp('2000-01-01T12') not in idx target = pd.date_range('1999-12-31T12:00', periods=7, freq='12H') actual = idx.get_indexer(target) + expected = np.array([-1, -1, 0, 0, 1, 1, -1], dtype='intp') tm.assert_numpy_array_equal(actual, expected) diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index c671a33e2f171..5000a71dfc756 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -1058,9 +1058,9 @@ def test_from_tuples_index_values(self): self.assertTrue((result.values == self.index.values).all()) def test_contains(self): - self.assertIn(('foo', 'two'), self.index) - self.assertNotIn(('bar', 'two'), self.index) - self.assertNotIn(None, self.index) + assert ('foo', 'two') in self.index + assert ('bar', 'two') not in self.index + assert None not in self.index def test_contains_top_level(self): midx = MultiIndex.from_product([['A', 'B'], [1, 2]]) @@ -1287,15 +1287,15 @@ def test_truncate(self): labels=[major_labels, minor_labels]) result = index.truncate(before=1) - self.assertNotIn('foo', result.levels[0]) - self.assertIn(1, result.levels[0]) + assert 'foo' not in result.levels[0] + assert 1 in result.levels[0] result = index.truncate(after=1) - self.assertNotIn(2, result.levels[0]) - self.assertIn(1, result.levels[0]) + assert 2 not in result.levels[0] + assert 1 in result.levels[0] result = index.truncate(before=1, after=2) - self.assertEqual(len(result.levels[0]), 2) + assert len(result.levels[0]) == 2 # after < before pytest.raises(ValueError, index.truncate, 3, 1) diff --git a/pandas/tests/indexes/timedeltas/test_ops.py b/pandas/tests/indexes/timedeltas/test_ops.py index 97c4a1df95963..5201af3af3531 100644 --- a/pandas/tests/indexes/timedeltas/test_ops.py +++ b/pandas/tests/indexes/timedeltas/test_ops.py @@ -564,10 +564,10 @@ def test_nonunique_contains(self): tm.assertIn(idx[0], idx) def test_unknown_attribute(self): - # GH 9680 + # see gh-9680 tdi = pd.timedelta_range(start=0, periods=10, freq='1s') ts = pd.Series(np.random.normal(size=10), index=tdi) - self.assertNotIn('foo', ts.__dict__.keys()) + assert 'foo' not in ts.__dict__.keys() pytest.raises(AttributeError, lambda: ts.foo) def test_order(self): diff --git a/pandas/tests/indexing/test_indexing_slow.py b/pandas/tests/indexing/test_indexing_slow.py index 42b50e37f0492..21cdbb17f52ce 100644 --- a/pandas/tests/indexing/test_indexing_slow.py +++ b/pandas/tests/indexing/test_indexing_slow.py @@ -27,10 +27,10 @@ def validate(mi, df, key): mask &= df.iloc[:, i] == k if not mask.any(): - self.assertNotIn(key[:i + 1], mi.index) + assert key[:i + 1] not in mi.index continue - self.assertIn(key[:i + 1], mi.index) + assert key[:i + 1] in mi.index right = df[mask].copy() if i + 1 != len(key): # partial key diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index 3784840fbfd28..ea796a497bd19 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -183,15 +183,15 @@ def test_repr_truncation(self): for line, value in lzip(r.split('\n'), df['B']): if adj.len(value) + 1 > max_len: - self.assertIn('...', line) + assert '...' in line else: - self.assertNotIn('...', line) + assert '...' not in line with option_context("display.max_colwidth", 999999): - self.assertNotIn('...', repr(df)) + assert '...' not in repr(df) with option_context("display.max_colwidth", max_len + 2): - self.assertNotIn('...', repr(df)) + assert '...' not in repr(df) def test_repr_chop_threshold(self): df = DataFrame([[0.1, 0.5], [0.5, -0.1]]) diff --git a/pandas/tests/io/formats/test_to_html.py b/pandas/tests/io/formats/test_to_html.py index 8b71fb047456e..90c6db1dcc7fb 100644 --- a/pandas/tests/io/formats/test_to_html.py +++ b/pandas/tests/io/formats/test_to_html.py @@ -1466,7 +1466,7 @@ def test_to_html_with_no_bold(self): def test_to_html_columns_arg(self): frame = DataFrame(tm.getSeriesData()) result = frame.to_html(columns=['A']) - self.assertNotIn('
-
- {%- if logo %} - - {%- endif %} - {%- block headertitle %} -

{{ shorttitle|e }}

- {%- endblock %} -
- {%- for rellink in rellinks|reverse %} - {{ rellink[3] }} - {%- if not loop.last %}{{ reldelim2 }}{% endif %} - {%- endfor %} -
-
-
BB
') - self.assertEqual(df.to_html(), expected_with_index) + assert df.to_html() == expected_with_index expected_without_index = ('\n' ' \n' @@ -1690,8 +1690,8 @@ def test_to_html_index(self): '
') result = df.to_html(index=False) for i in index: - self.assertNotIn(i, result) - self.assertEqual(result, expected_without_index) + assert i not in result + assert result == expected_without_index df.index = Index(['foo', 'bar', 'baz'], name='idx') expected_with_index = ('\n' ' \n' @@ -1729,8 +1729,8 @@ def test_to_html_index(self): ' \n' ' \n' '
') - self.assertEqual(df.to_html(), expected_with_index) - self.assertEqual(df.to_html(index=False), expected_without_index) + assert df.to_html() == expected_with_index + assert df.to_html(index=False) == expected_without_index tuples = [('foo', 'car'), ('foo', 'bike'), ('bar', 'car')] df.index = MultiIndex.from_tuples(tuples) @@ -1768,13 +1768,13 @@ def test_to_html_index(self): ' \n' ' \n' '') - self.assertEqual(df.to_html(), expected_with_index) + assert df.to_html() == expected_with_index result = df.to_html(index=False) for i in ['foo', 'bar', 'car', 'bike']: - self.assertNotIn(i, result) + assert i not in result # must be the same result as normal index - self.assertEqual(result, expected_without_index) + assert result == expected_without_index df.index = MultiIndex.from_tuples(tuples, names=['idx1', 'idx2']) expected_with_index = ('\n' @@ -1817,8 +1817,8 @@ def test_to_html_index(self): ' \n' ' \n' '
') - self.assertEqual(df.to_html(), expected_with_index) - self.assertEqual(df.to_html(index=False), expected_without_index) + assert df.to_html() == expected_with_index + assert df.to_html(index=False) == expected_without_index def test_to_html_with_classes(self): df = DataFrame() diff --git a/pandas/tests/io/parser/parse_dates.py b/pandas/tests/io/parser/parse_dates.py index 6dd92783eac60..cdc4f9fa9d84f 100644 --- a/pandas/tests/io/parser/parse_dates.py +++ b/pandas/tests/io/parser/parse_dates.py @@ -60,26 +60,26 @@ def func(*date_cols): prefix='X', parse_dates={'nominal': [1, 2], 'actual': [1, 3]}) - self.assertIn('nominal', df) - self.assertIn('actual', df) - self.assertNotIn('X1', df) - self.assertNotIn('X2', df) - self.assertNotIn('X3', df) + assert 'nominal' in df + assert 'actual' in df + assert 'X1' not in df + assert 'X2' not in df + assert 'X3' not in df d = datetime(1999, 1, 27, 19, 0) - self.assertEqual(df.loc[0, 'nominal'], d) + assert df.loc[0, 'nominal'] == d df = self.read_csv(StringIO(data), header=None, date_parser=func, parse_dates={'nominal': [1, 2], 'actual': [1, 3]}, keep_date_col=True) - self.assertIn('nominal', df) - self.assertIn('actual', df) + assert 'nominal' in df + assert 'actual' in df - self.assertIn(1, df) - self.assertIn(2, df) - self.assertIn(3, df) + assert 1 in df + assert 2 in df + assert 3 in df data = """\ KORD,19990127, 19:00:00, 18:56:00, 0.8100, 2.8100, 7.2000, 0.0000, 280.0000 @@ -92,23 +92,23 @@ def func(*date_cols): df = self.read_csv(StringIO(data), header=None, prefix='X', parse_dates=[[1, 2], [1, 3]]) - self.assertIn('X1_X2', df) - self.assertIn('X1_X3', df) - self.assertNotIn('X1', df) - self.assertNotIn('X2', df) - self.assertNotIn('X3', df) + assert 'X1_X2' in df + assert 'X1_X3' in df + assert 'X1' not in df + assert 'X2' not in df + assert 'X3' not in df d = datetime(1999, 1, 27, 19, 0) - self.assertEqual(df.loc[0, 'X1_X2'], d) + assert df.loc[0, 'X1_X2'] == d df = self.read_csv(StringIO(data), header=None, parse_dates=[[1, 2], [1, 3]], keep_date_col=True) - self.assertIn('1_2', df) - self.assertIn('1_3', df) - self.assertIn(1, df) - self.assertIn(2, df) - self.assertIn(3, df) + assert '1_2' in df + assert '1_3' in df + assert 1 in df + assert 2 in df + assert 3 in df data = '''\ KORD,19990127 19:00:00, 18:56:00, 0.8100, 2.8100, 7.2000, 0.0000, 280.0000 @@ -120,7 +120,7 @@ def func(*date_cols): df = self.read_csv(StringIO(data), sep=',', header=None, parse_dates=[1], index_col=1) d = datetime(1999, 1, 27, 19, 0) - self.assertEqual(df.index[0], d) + assert df.index[0] == d def test_multiple_date_cols_int_cast(self): data = ("KORD,19990127, 19:00:00, 18:56:00, 0.8100\n" @@ -402,7 +402,7 @@ def test_multiple_date_cols_chunked(self): chunks = list(reader) - self.assertNotIn('nominalTime', df) + assert 'nominalTime' not in df tm.assert_frame_equal(chunks[0], df[:2]) tm.assert_frame_equal(chunks[1], df[2:4]) diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index 153ac749b4b17..d077dd879d44a 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -439,19 +439,18 @@ def test_contains(self): store['a'] = tm.makeTimeSeries() store['b'] = tm.makeDataFrame() store['foo/bar'] = tm.makeDataFrame() - self.assertIn('a', store) - self.assertIn('b', store) - self.assertNotIn('c', store) - self.assertIn('foo/bar', store) - self.assertIn('/foo/bar', store) - self.assertNotIn('/foo/b', store) - self.assertNotIn('bar', store) - - # GH 2694 - # tables.NaturalNameWarning + assert 'a' in store + assert 'b' in store + assert 'c' not in store + assert 'foo/bar' in store + assert '/foo/bar' in store + assert '/foo/b' not in store + assert 'bar' not in store + + # gh-2694: tables.NaturalNameWarning with catch_warnings(record=True): store['node())'] = tm.makeDataFrame() - self.assertIn('node())', store) + assert 'node())' in store def test_versioning(self): @@ -4288,7 +4287,7 @@ def _check_roundtrip_table(self, obj, comparator, compression=False): comparator(retrieved, obj) def test_multiple_open_close(self): - # GH 4409, open & close multiple times + # gh-4409: open & close multiple times with ensure_clean_path(self.path) as path: @@ -4297,11 +4296,12 @@ def test_multiple_open_close(self): # single store = HDFStore(path) - self.assertNotIn('CLOSED', str(store)) - self.assertTrue(store.is_open) + assert 'CLOSED' not in str(store) + assert store.is_open + store.close() - self.assertIn('CLOSED', str(store)) - self.assertFalse(store.is_open) + assert 'CLOSED' in str(store) + assert not store.is_open with ensure_clean_path(self.path) as path: @@ -4321,22 +4321,22 @@ def f(): store1 = HDFStore(path) store2 = HDFStore(path) - self.assertNotIn('CLOSED', str(store1)) - self.assertNotIn('CLOSED', str(store2)) - self.assertTrue(store1.is_open) - self.assertTrue(store2.is_open) + assert 'CLOSED' not in str(store1) + assert 'CLOSED' not in str(store2) + assert store1.is_open + assert store2.is_open store1.close() - self.assertIn('CLOSED', str(store1)) - self.assertFalse(store1.is_open) - self.assertNotIn('CLOSED', str(store2)) - self.assertTrue(store2.is_open) + assert 'CLOSED' in str(store1) + assert not store1.is_open + assert 'CLOSED' not in str(store2) + assert store2.is_open store2.close() - self.assertIn('CLOSED', str(store1)) - self.assertIn('CLOSED', str(store2)) - self.assertFalse(store1.is_open) - self.assertFalse(store2.is_open) + assert 'CLOSED' in str(store1) + assert 'CLOSED' in str(store2) + assert not store1.is_open + assert not store2.is_open # nested close store = HDFStore(path, mode='w') @@ -4345,12 +4345,12 @@ def f(): store2 = HDFStore(path) store2.append('df2', df) store2.close() - self.assertIn('CLOSED', str(store2)) - self.assertFalse(store2.is_open) + assert 'CLOSED' in str(store2) + assert not store2.is_open store.close() - self.assertIn('CLOSED', str(store)) - self.assertFalse(store.is_open) + assert 'CLOSED' in str(store) + assert not store.is_open # double closing store = HDFStore(path, mode='w') @@ -4358,12 +4358,12 @@ def f(): store2 = HDFStore(path) store.close() - self.assertIn('CLOSED', str(store)) - self.assertFalse(store.is_open) + assert 'CLOSED' in str(store) + assert not store.is_open store2.close() - self.assertIn('CLOSED', str(store2)) - self.assertFalse(store2.is_open) + assert 'CLOSED' in str(store2) + assert not store2.is_open # ops on a closed store with ensure_clean_path(self.path) as path: diff --git a/pandas/tests/scalar/test_interval.py b/pandas/tests/scalar/test_interval.py index 717ef38a5b447..bd73a7d173962 100644 --- a/pandas/tests/scalar/test_interval.py +++ b/pandas/tests/scalar/test_interval.py @@ -26,19 +26,19 @@ def test_repr(self): self.assertEqual(str(interval_left), "[0, 1)") def test_contains(self): - self.assertIn(0.5, self.interval) - self.assertIn(1, self.interval) - self.assertNotIn(0, self.interval) + assert 0.5 in self.interval + assert 1 in self.interval + assert 0 not in self.interval pytest.raises(TypeError, lambda: self.interval in self.interval) interval = Interval(0, 1, closed='both') - self.assertIn(0, interval) - self.assertIn(1, interval) + assert 0 in interval + assert 1 in interval interval = Interval(0, 1, closed='neither') - self.assertNotIn(0, interval) - self.assertIn(0.5, interval) - self.assertNotIn(1, interval) + assert 0 not in interval + assert 0.5 in interval + assert 1 not in interval def test_equal(self): self.assertEqual(Interval(0, 1), Interval(0, 1, closed='right')) diff --git a/pandas/tests/scalar/test_timestamp.py b/pandas/tests/scalar/test_timestamp.py index ae351b9d1cab9..055da8b2cc07f 100644 --- a/pandas/tests/scalar/test_timestamp.py +++ b/pandas/tests/scalar/test_timestamp.py @@ -310,40 +310,40 @@ def test_repr(self): tz_repr = tz date_only = Timestamp(date) - self.assertIn(date, repr(date_only)) - self.assertNotIn(tz_repr, repr(date_only)) - self.assertNotIn(freq_repr, repr(date_only)) - self.assertEqual(date_only, eval(repr(date_only))) + assert date in repr(date_only) + assert tz_repr not in repr(date_only) + assert freq_repr not in repr(date_only) + assert date_only == eval(repr(date_only)) date_tz = Timestamp(date, tz=tz) - self.assertIn(date, repr(date_tz)) - self.assertIn(tz_repr, repr(date_tz)) - self.assertNotIn(freq_repr, repr(date_tz)) - self.assertEqual(date_tz, eval(repr(date_tz))) + assert date in repr(date_tz) + assert tz_repr in repr(date_tz) + assert freq_repr not in repr(date_tz) + assert date_tz == eval(repr(date_tz)) date_freq = Timestamp(date, freq=freq) - self.assertIn(date, repr(date_freq)) - self.assertNotIn(tz_repr, repr(date_freq)) - self.assertIn(freq_repr, repr(date_freq)) - self.assertEqual(date_freq, eval(repr(date_freq))) + assert date in repr(date_freq) + assert tz_repr not in repr(date_freq) + assert freq_repr in repr(date_freq) + assert date_freq == eval(repr(date_freq)) date_tz_freq = Timestamp(date, tz=tz, freq=freq) - self.assertIn(date, repr(date_tz_freq)) - self.assertIn(tz_repr, repr(date_tz_freq)) - self.assertIn(freq_repr, repr(date_tz_freq)) - self.assertEqual(date_tz_freq, eval(repr(date_tz_freq))) + assert date in repr(date_tz_freq) + assert tz_repr in repr(date_tz_freq) + assert freq_repr in repr(date_tz_freq) + assert date_tz_freq == eval(repr(date_tz_freq)) - # this can cause the tz field to be populated, but it's redundant to - # information in the datestring + # This can cause the tz field to be populated, but it's redundant to + # include this information in the date-string. tm._skip_if_no_pytz() import pytz # noqa date_with_utc_offset = Timestamp('2014-03-13 00:00:00-0400', tz=None) - self.assertIn('2014-03-13 00:00:00-0400', repr(date_with_utc_offset)) - self.assertNotIn('tzoffset', repr(date_with_utc_offset)) - self.assertIn('pytz.FixedOffset(-240)', repr(date_with_utc_offset)) + assert '2014-03-13 00:00:00-0400' in repr(date_with_utc_offset) + assert 'tzoffset' not in repr(date_with_utc_offset) + assert 'pytz.FixedOffset(-240)' in repr(date_with_utc_offset) expr = repr(date_with_utc_offset).replace("'pytz.FixedOffset(-240)'", 'pytz.FixedOffset(-240)') - self.assertEqual(date_with_utc_offset, eval(expr)) + assert date_with_utc_offset == eval(expr) def test_bounds_with_different_units(self): out_of_bounds_dates = ('1677-09-21', '2262-04-12', ) diff --git a/pandas/tests/series/test_indexing.py b/pandas/tests/series/test_indexing.py index 1003e84a1a942..1181e3a44f295 100644 --- a/pandas/tests/series/test_indexing.py +++ b/pandas/tests/series/test_indexing.py @@ -653,22 +653,20 @@ def test_slice(self): numSliceEnd = self.series[-10:] objSlice = self.objSeries[10:20] - self.assertNotIn(self.series.index[9], numSlice.index) - self.assertNotIn(self.objSeries.index[9], objSlice.index) + assert self.series.index[9] not in numSlice.index + assert self.objSeries.index[9] not in objSlice.index - self.assertEqual(len(numSlice), len(numSlice.index)) - self.assertEqual(self.series[numSlice.index[0]], - numSlice[numSlice.index[0]]) + assert len(numSlice) == len(numSlice.index) + assert self.series[numSlice.index[0]] == numSlice[numSlice.index[0]] - self.assertEqual(numSlice.index[1], self.series.index[11]) + assert numSlice.index[1] == self.series.index[11] + assert tm.equalContents(numSliceEnd, np.array(self.series)[-10:]) - self.assertTrue(tm.equalContents(numSliceEnd, np.array(self.series)[ - -10:])) - - # test return view + # Test return view. sl = self.series[10:20] sl[:] = 0 - self.assertTrue((self.series[10:20] == 0).all()) + + assert (self.series[10:20] == 0).all() def test_slice_can_reorder_not_uniquely_indexed(self): s = Series(1, index=['a', 'a', 'b', 'b', 'c']) diff --git a/pandas/tests/series/test_repr.py b/pandas/tests/series/test_repr.py index 188b96638344c..a80c5edcc21bd 100644 --- a/pandas/tests/series/test_repr.py +++ b/pandas/tests/series/test_repr.py @@ -37,21 +37,26 @@ def test_multilevel_name_print(self): self.assertEqual(repr(s), expected) def test_name_printing(self): - # test small series + # Test small Series. s = Series([0, 1, 2]) + s.name = "test" - self.assertIn("Name: test", repr(s)) + assert "Name: test" in repr(s) + s.name = None - self.assertNotIn("Name:", repr(s)) - # test big series (diff code path) + assert "Name:" not in repr(s) + + # Test big Series (diff code path). s = Series(lrange(0, 1000)) + s.name = "test" - self.assertIn("Name: test", repr(s)) + assert "Name: test" in repr(s) + s.name = None - self.assertNotIn("Name:", repr(s)) + assert "Name:" not in repr(s) s = Series(index=date_range('20010101', '20020101'), name='test') - self.assertIn("Name: test", repr(s)) + assert "Name: test" in repr(s) def test_repr(self): str(self.ts) diff --git a/pandas/tests/sparse/test_frame.py b/pandas/tests/sparse/test_frame.py index d5df744648ff3..ccb72d1f0d788 100644 --- a/pandas/tests/sparse/test_frame.py +++ b/pandas/tests/sparse/test_frame.py @@ -545,15 +545,15 @@ def test_delitem(self): C = self.frame['C'] del self.frame['B'] - self.assertNotIn('B', self.frame) + assert 'B' not in self.frame tm.assert_sp_series_equal(self.frame['A'], A) tm.assert_sp_series_equal(self.frame['C'], C) del self.frame['D'] - self.assertNotIn('D', self.frame) + assert 'D' not in self.frame del self.frame['A'] - self.assertNotIn('A', self.frame) + assert 'A' not in self.frame def test_set_columns(self): self.frame.columns = self.frame.columns @@ -829,22 +829,22 @@ def _check_frame(frame): # length zero length_zero = frame.reindex([]) - self.assertEqual(len(length_zero), 0) - self.assertEqual(len(length_zero.columns), len(frame.columns)) - self.assertEqual(len(length_zero['A']), 0) + assert len(length_zero) == 0 + assert len(length_zero.columns) == len(frame.columns) + assert len(length_zero['A']) == 0 # frame being reindexed has length zero length_n = length_zero.reindex(index) - self.assertEqual(len(length_n), len(frame)) - self.assertEqual(len(length_n.columns), len(frame.columns)) - self.assertEqual(len(length_n['A']), len(frame)) + assert len(length_n) == len(frame) + assert len(length_n.columns) == len(frame.columns) + assert len(length_n['A']) == len(frame) # reindex columns reindexed = frame.reindex(columns=['A', 'B', 'Z']) - self.assertEqual(len(reindexed.columns), 3) + assert len(reindexed.columns) == 3 tm.assert_almost_equal(reindexed['Z'].fill_value, frame.default_fill_value) - self.assertTrue(np.isnan(reindexed['Z'].sp_values).all()) + assert np.isnan(reindexed['Z'].sp_values).all() _check_frame(self.frame) _check_frame(self.iframe) @@ -854,11 +854,11 @@ def _check_frame(frame): # with copy=False reindexed = self.frame.reindex(self.frame.index, copy=False) reindexed['F'] = reindexed['A'] - self.assertIn('F', self.frame) + assert 'F' in self.frame reindexed = self.frame.reindex(self.frame.index) reindexed['G'] = reindexed['A'] - self.assertNotIn('G', self.frame) + assert 'G' not in self.frame def test_reindex_fill_value(self): rng = bdate_range('20110110', periods=20) diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py index d91aab6bc3ceb..5814ae3494b44 100644 --- a/pandas/tests/test_base.py +++ b/pandas/tests/test_base.py @@ -45,8 +45,8 @@ class CheckImmutable(object): mutable_regex = re.compile('does not support mutable operations') def check_mutable_error(self, *args, **kwargs): - # pass whatever functions you normally would to assertRaises (after the - # Exception kind) + # Pass whatever function you normally would to assertRaisesRegexp + # (after the Exception kind). tm.assertRaisesRegexp(TypeError, self.mutable_regex, *args, **kwargs) def test_no_mutable_funcs(self): @@ -70,6 +70,7 @@ def delslice(): self.check_mutable_error(delslice) mutable_methods = getattr(self, "mutable_methods", []) + for meth in mutable_methods: self.check_mutable_error(getattr(self.container, meth)) diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index 703c8cc80e8f9..af56f0c33df2e 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -205,22 +205,22 @@ def test_set_axis(self): self.panel.items = new_items if hasattr(self.panel, '_item_cache'): - self.assertNotIn('ItemA', self.panel._item_cache) - self.assertIs(self.panel.items, new_items) + assert 'ItemA' not in self.panel._item_cache + assert self.panel.items is new_items # TODO: unused? item = self.panel[0] # noqa self.panel.major_axis = new_major - self.assertIs(self.panel[0].index, new_major) - self.assertIs(self.panel.major_axis, new_major) + assert self.panel[0].index is new_major + assert self.panel.major_axis is new_major # TODO: unused? item = self.panel[0] # noqa self.panel.minor_axis = new_minor - self.assertIs(self.panel[0].columns, new_minor) - self.assertIs(self.panel.minor_axis, new_minor) + assert self.panel[0].columns is new_minor + assert self.panel.minor_axis is new_minor def test_get_axis_number(self): self.assertEqual(self.panel._get_axis_number('items'), 0) @@ -447,10 +447,10 @@ def test_delitem_and_pop(self): expected = self.panel['ItemA'] result = self.panel.pop('ItemA') assert_frame_equal(expected, result) - self.assertNotIn('ItemA', self.panel.items) + assert 'ItemA' not in self.panel.items del self.panel['ItemB'] - self.assertNotIn('ItemB', self.panel.items) + assert 'ItemB' not in self.panel.items pytest.raises(Exception, self.panel.__delitem__, 'ItemB') values = np.empty((3, 3, 3)) @@ -464,18 +464,18 @@ def test_delitem_and_pop(self): panelc = panel.copy() del panelc[0] - assert_frame_equal(panelc[1], panel[1]) - assert_frame_equal(panelc[2], panel[2]) + tm.assert_frame_equal(panelc[1], panel[1]) + tm.assert_frame_equal(panelc[2], panel[2]) panelc = panel.copy() del panelc[1] - assert_frame_equal(panelc[0], panel[0]) - assert_frame_equal(panelc[2], panel[2]) + tm.assert_frame_equal(panelc[0], panel[0]) + tm.assert_frame_equal(panelc[2], panel[2]) panelc = panel.copy() del panelc[2] - assert_frame_equal(panelc[1], panel[1]) - assert_frame_equal(panelc[0], panel[0]) + tm.assert_frame_equal(panelc[1], panel[1]) + tm.assert_frame_equal(panelc[0], panel[0]) def test_setitem(self): with catch_warnings(record=True): diff --git a/pandas/tests/test_panel4d.py b/pandas/tests/test_panel4d.py index f55452b44731c..33b17bc04cd79 100644 --- a/pandas/tests/test_panel4d.py +++ b/pandas/tests/test_panel4d.py @@ -182,16 +182,16 @@ def test_set_axis(self): self.panel4d.labels = new_labels if hasattr(self.panel4d, '_item_cache'): - self.assertNotIn('l1', self.panel4d._item_cache) - self.assertIs(self.panel4d.labels, new_labels) + assert 'l1' not in self.panel4d._item_cache + assert self.panel4d.labels is new_labels self.panel4d.major_axis = new_major - self.assertIs(self.panel4d[0].major_axis, new_major) - self.assertIs(self.panel4d.major_axis, new_major) + assert self.panel4d[0].major_axis is new_major + assert self.panel4d.major_axis is new_major self.panel4d.minor_axis = new_minor - self.assertIs(self.panel4d[0].minor_axis, new_minor) - self.assertIs(self.panel4d.minor_axis, new_minor) + assert self.panel4d[0].minor_axis is new_minor + assert self.panel4d.minor_axis is new_minor def test_get_axis_number(self): self.assertEqual(self.panel4d._get_axis_number('labels'), 0) @@ -315,10 +315,10 @@ def test_delitem_and_pop(self): expected = self.panel4d['l2'] result = self.panel4d.pop('l2') assert_panel_equal(expected, result) - self.assertNotIn('l2', self.panel4d.labels) + assert 'l2' not in self.panel4d.labels del self.panel4d['l3'] - self.assertNotIn('l3', self.panel4d.labels) + assert 'l3' not in self.panel4d.labels pytest.raises(Exception, self.panel4d.__delitem__, 'l3') values = np.empty((4, 4, 4, 4)) @@ -333,27 +333,27 @@ def test_delitem_and_pop(self): # did we delete the right row? panel4dc = panel4d.copy() del panel4dc[0] - assert_panel_equal(panel4dc[1], panel4d[1]) - assert_panel_equal(panel4dc[2], panel4d[2]) - assert_panel_equal(panel4dc[3], panel4d[3]) + tm.assert_panel_equal(panel4dc[1], panel4d[1]) + tm.assert_panel_equal(panel4dc[2], panel4d[2]) + tm.assert_panel_equal(panel4dc[3], panel4d[3]) panel4dc = panel4d.copy() del panel4dc[1] - assert_panel_equal(panel4dc[0], panel4d[0]) - assert_panel_equal(panel4dc[2], panel4d[2]) - assert_panel_equal(panel4dc[3], panel4d[3]) + tm.assert_panel_equal(panel4dc[0], panel4d[0]) + tm.assert_panel_equal(panel4dc[2], panel4d[2]) + tm.assert_panel_equal(panel4dc[3], panel4d[3]) panel4dc = panel4d.copy() del panel4dc[2] - assert_panel_equal(panel4dc[1], panel4d[1]) - assert_panel_equal(panel4dc[0], panel4d[0]) - assert_panel_equal(panel4dc[3], panel4d[3]) + tm.assert_panel_equal(panel4dc[1], panel4d[1]) + tm.assert_panel_equal(panel4dc[0], panel4d[0]) + tm.assert_panel_equal(panel4dc[3], panel4d[3]) panel4dc = panel4d.copy() del panel4dc[3] - assert_panel_equal(panel4dc[1], panel4d[1]) - assert_panel_equal(panel4dc[2], panel4d[2]) - assert_panel_equal(panel4dc[0], panel4d[0]) + tm.assert_panel_equal(panel4dc[1], panel4d[1]) + tm.assert_panel_equal(panel4dc[2], panel4d[2]) + tm.assert_panel_equal(panel4dc[0], panel4d[0]) def test_setitem(self): with catch_warnings(record=True): diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 7f62d319aa096..7565cbb859201 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -1072,12 +1072,6 @@ def assertIn(first, second, msg=''): assert a in b, "%s: %r is not in %r" % (msg.format(a, b), a, b) -def assertNotIn(first, second, msg=''): - """Checks that 'first' is not in 'second'""" - a, b = first, second - assert a not in b, "%s: %r is in %r" % (msg.format(a, b), a, b) - - def assertIsNone(expr, msg=''): """Checks that 'expr' is None""" return assertIs(expr, None, msg) @@ -2500,40 +2494,79 @@ def assertRaisesRegexp(_exception, _regexp, _callable=None, *args, **kwargs): class _AssertRaisesContextmanager(object): """ - Handles the behind the scenes work - for assertRaises and assertRaisesRegexp + Context manager behind assertRaisesRegexp. """ - def __init__(self, exception, regexp=None, *args, **kwargs): + def __init__(self, exception, regexp=None): + """ + Initialize an _AssertRaisesContextManager instance. + + Parameters + ---------- + exception : class + The expected Exception class. + regexp : str, default None + The regex to compare against the Exception message. + """ + self.exception = exception + if regexp is not None and not hasattr(regexp, "search"): regexp = re.compile(regexp, re.DOTALL) + self.regexp = regexp def __enter__(self): return self - def __exit__(self, exc_type, exc_value, traceback): + def __exit__(self, exc_type, exc_value, trace_back): expected = self.exception - if not exc_type: - name = getattr(expected, "__name__", str(expected)) - raise AssertionError("{0} not raised.".format(name)) - if issubclass(exc_type, expected): - return self.handle_success(exc_type, exc_value, traceback) - return self.handle_failure(exc_type, exc_value, traceback) - - def handle_failure(*args, **kwargs): - # Failed, so allow Exception to bubble up - return False - def handle_success(self, exc_type, exc_value, traceback): - if self.regexp is not None: - val = str(exc_value) - if not self.regexp.search(val): - e = AssertionError('"%s" does not match "%s"' % - (self.regexp.pattern, str(val))) - raise_with_traceback(e, traceback) - return True + if not exc_type: + exp_name = getattr(expected, "__name__", str(expected)) + raise AssertionError("{0} not raised.".format(exp_name)) + + return self.exception_matches(exc_type, exc_value, trace_back) + + def exception_matches(self, exc_type, exc_value, trace_back): + """ + Check that the Exception raised matches the expected Exception + and expected error message regular expression. + + Parameters + ---------- + exc_type : class + The type of Exception raised. + exc_value : Exception + The instance of `exc_type` raised. + trace_back : stack trace object + The traceback object associated with `exc_value`. + + Returns + ------- + is_matched : bool + Whether or not the Exception raised matches the expected + Exception class and expected error message regular expression. + + Raises + ------ + AssertionError : The error message provided does not match + the expected error message regular expression. + """ + + if issubclass(exc_type, self.exception): + if self.regexp is not None: + val = str(exc_value) + + if not self.regexp.search(val): + e = AssertionError('"%s" does not match "%s"' % + (self.regexp.pattern, str(val))) + raise_with_traceback(e, trace_back) + + return True + else: + # Failed, so allow Exception to bubble up. + return False @contextmanager From 12f0762f65274544ef39304f7d6d7848087cd7e0 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sun, 23 Apr 2017 10:18:29 -0400 Subject: [PATCH 454/933] TST: fix various deprecation warnings in tests suite (#16100) --- pandas/tests/frame/test_quantile.py | 2 +- pandas/tests/io/formats/test_style.py | 3 ++- pandas/tests/plotting/test_misc.py | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/tests/frame/test_quantile.py b/pandas/tests/frame/test_quantile.py index 738ddb89db652..1a5ba3ccad400 100644 --- a/pandas/tests/frame/test_quantile.py +++ b/pandas/tests/frame/test_quantile.py @@ -431,7 +431,7 @@ def test_quantile_empty(self): # res = df.quantile(0.5) # datetimes - df = DataFrame(columns=['a', 'b'], dtype='datetime64') + df = DataFrame(columns=['a', 'b'], dtype='datetime64[ns]') # FIXME (gives NaNs instead of NaT in 0.18.1 or 0.19.0) # res = df.quantile(0.5, numeric_only=False) diff --git a/pandas/tests/io/formats/test_style.py b/pandas/tests/io/formats/test_style.py index 3b8bbf239d941..96bf2b605ffa1 100644 --- a/pandas/tests/io/formats/test_style.py +++ b/pandas/tests/io/formats/test_style.py @@ -752,5 +752,6 @@ def test_from_custom_template(tmpdir): def test_shim(): # https://github.com/pandas-dev/pandas/pull/16059 # Remove in 0.21 - with pytest.warns(FutureWarning): + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): from pandas.formats.style import Styler as _styler # noqa diff --git a/pandas/tests/plotting/test_misc.py b/pandas/tests/plotting/test_misc.py index 07abd0190a417..9b8569e8680e4 100644 --- a/pandas/tests/plotting/test_misc.py +++ b/pandas/tests/plotting/test_misc.py @@ -245,7 +245,7 @@ def test_parallel_coordinates(self): def test_parallel_coordinates_with_sorted_labels(self): """ For #15908 """ - from pandas.tools.plotting import parallel_coordinates + from pandas.plotting import parallel_coordinates df = DataFrame({"feat": [i for i in range(30)], "class": [2 for _ in range(10)] + From e501e1d7d161da9596156e07201b1370ed2ee10e Mon Sep 17 00:00:00 2001 From: gfyoung Date: Sun, 23 Apr 2017 16:38:02 -0400 Subject: [PATCH 455/933] MAINT: Remove assertIn from testing (#16101) --- pandas/tests/frame/test_alter_axes.py | 2 +- .../tests/frame/test_axis_select_reindex.py | 2 +- pandas/tests/frame/test_convert_to.py | 6 ++-- pandas/tests/frame/test_indexing.py | 4 +-- pandas/tests/frame/test_operators.py | 2 +- pandas/tests/frame/test_repr_info.py | 2 +- pandas/tests/frame/test_to_csv.py | 4 +-- pandas/tests/groupby/test_groupby.py | 32 +++++++++---------- .../tests/indexes/datetimes/test_datetime.py | 2 +- pandas/tests/indexes/datetimes/test_ops.py | 2 +- pandas/tests/indexes/test_base.py | 8 ++--- pandas/tests/indexes/test_multi.py | 4 +-- pandas/tests/indexes/timedeltas/test_ops.py | 2 +- .../indexing/test_chaining_and_caching.py | 8 ++--- pandas/tests/io/formats/test_format.py | 8 ++--- pandas/tests/io/parser/c_parser_only.py | 2 +- pandas/tests/io/parser/parse_dates.py | 12 +++---- pandas/tests/io/test_html.py | 4 +-- pandas/tests/reshape/test_join.py | 10 +++--- pandas/tests/reshape/test_merge.py | 4 +-- pandas/tests/scalar/test_period.py | 8 ++--- pandas/tests/scalar/test_timestamp.py | 14 ++++---- pandas/tests/series/test_alter_axes.py | 4 +-- pandas/tests/series/test_indexing.py | 2 +- pandas/tests/series/test_missing.py | 2 +- pandas/tests/series/test_repr.py | 4 +-- pandas/tests/series/test_timeseries.py | 2 +- pandas/tests/tseries/test_timezones.py | 4 +-- pandas/util/testing.py | 8 +---- 29 files changed, 81 insertions(+), 87 deletions(-) diff --git a/pandas/tests/frame/test_alter_axes.py b/pandas/tests/frame/test_alter_axes.py index f32e001ea984a..1a3de7b463a19 100644 --- a/pandas/tests/frame/test_alter_axes.py +++ b/pandas/tests/frame/test_alter_axes.py @@ -138,7 +138,7 @@ def test_set_index_nonuniq(self): 'E': np.random.randn(5)}) with assertRaisesRegexp(ValueError, 'Index has duplicate keys'): df.set_index('A', verify_integrity=True, inplace=True) - self.assertIn('A', df) + assert 'A' in df def test_set_index_bug(self): # GH1590 diff --git a/pandas/tests/frame/test_axis_select_reindex.py b/pandas/tests/frame/test_axis_select_reindex.py index 5b3a0a9e01f35..636194d32ad46 100644 --- a/pandas/tests/frame/test_axis_select_reindex.py +++ b/pandas/tests/frame/test_axis_select_reindex.py @@ -734,7 +734,7 @@ def test_filter_regex_search(self): # regex filtered = fcopy.filter(regex='[A]+') self.assertEqual(len(filtered.columns), 2) - self.assertIn('AA', filtered) + assert 'AA' in filtered # doesn't have to be at beginning df = DataFrame({'aBBa': [1, 2], diff --git a/pandas/tests/frame/test_convert_to.py b/pandas/tests/frame/test_convert_to.py index 64edc52508216..6a49c88f17526 100644 --- a/pandas/tests/frame/test_convert_to.py +++ b/pandas/tests/frame/test_convert_to.py @@ -156,16 +156,16 @@ def test_to_records_index_name(self): df = DataFrame(np.random.randn(3, 3)) df.index.name = 'X' rs = df.to_records() - self.assertIn('X', rs.dtype.fields) + assert 'X' in rs.dtype.fields df = DataFrame(np.random.randn(3, 3)) rs = df.to_records() - self.assertIn('index', rs.dtype.fields) + assert 'index' in rs.dtype.fields df.index = MultiIndex.from_tuples([('a', 'x'), ('a', 'y'), ('b', 'z')]) df.index.names = ['A', None] rs = df.to_records() - self.assertIn('level_0', rs.dtype.fields) + assert 'level_0' in rs.dtype.fields def test_to_records_with_unicode_index(self): # GH13172 diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py index 780cb3d0457bd..12c5b33fcbce9 100644 --- a/pandas/tests/frame/test_indexing.py +++ b/pandas/tests/frame/test_indexing.py @@ -422,7 +422,7 @@ def test_setitem(self): # not sure what else to do here series = self.frame['A'][::2] self.frame['col5'] = series - self.assertIn('col5', self.frame) + assert 'col5' in self.frame self.assertEqual(len(series), 15) self.assertEqual(len(self.frame), 30) @@ -600,7 +600,7 @@ def test_setitem_corner(self): index=np.arange(3)) del df['B'] df['B'] = [1., 2., 3.] - self.assertIn('B', df) + assert 'B' in df self.assertEqual(len(df.columns), 2) df['A'] = 'beginning' diff --git a/pandas/tests/frame/test_operators.py b/pandas/tests/frame/test_operators.py index ce756ca188bf0..18639990662b0 100644 --- a/pandas/tests/frame/test_operators.py +++ b/pandas/tests/frame/test_operators.py @@ -831,7 +831,7 @@ def test_combineSeries(self): for key, s in compat.iteritems(self.frame): assert_series_equal(larger_added[key], s + series[key]) - self.assertIn('E', larger_added) + assert 'E' in larger_added self.assertTrue(np.isnan(larger_added['E']).all()) # vs mix (upcast) as needed diff --git a/pandas/tests/frame/test_repr_info.py b/pandas/tests/frame/test_repr_info.py index be55efac2992b..efbdc05ba23c8 100644 --- a/pandas/tests/frame/test_repr_info.py +++ b/pandas/tests/frame/test_repr_info.py @@ -171,7 +171,7 @@ def test_repr_column_name_unicode_truncation_bug(self): ' the File through the code..')}) result = repr(df) - self.assertIn('StringCol', result) + assert 'StringCol' in result def test_latex_repr(self): result = r"""\begin{tabular}{llll} diff --git a/pandas/tests/frame/test_to_csv.py b/pandas/tests/frame/test_to_csv.py index 2df2e23c3f877..0fd1df0b733f8 100644 --- a/pandas/tests/frame/test_to_csv.py +++ b/pandas/tests/frame/test_to_csv.py @@ -909,7 +909,7 @@ def test_to_csv_compression_gzip(self): text = f.read().decode('utf8') f.close() for col in df.columns: - self.assertIn(col, text) + assert col in text def test_to_csv_compression_bz2(self): # GH7615 @@ -932,7 +932,7 @@ def test_to_csv_compression_bz2(self): text = f.read().decode('utf8') f.close() for col in df.columns: - self.assertIn(col, text) + assert col in text def test_to_csv_compression_xz(self): # GH11852 diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 752c0689b0660..05fe1c6f58e9a 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -2483,14 +2483,14 @@ def test_groupby_series_with_name(self): result = self.df.groupby(self.df['A']).mean() result2 = self.df.groupby(self.df['A'], as_index=False).mean() self.assertEqual(result.index.name, 'A') - self.assertIn('A', result2) + assert 'A' in result2 result = self.df.groupby([self.df['A'], self.df['B']]).mean() result2 = self.df.groupby([self.df['A'], self.df['B']], as_index=False).mean() self.assertEqual(result.index.names, ('A', 'B')) - self.assertIn('A', result2) - self.assertIn('B', result2) + assert 'A' in result2 + assert 'B' in result2 def test_seriesgroupby_name_attr(self): # GH 6265 @@ -3357,10 +3357,10 @@ def test_groupby_with_small_elem(self): 'change': [1234, 5678]}, index=pd.DatetimeIndex(['2014-09-10', '2013-10-10'])) grouped = df.groupby([pd.TimeGrouper(freq='M'), 'event']) - self.assertEqual(len(grouped.groups), 2) - self.assertEqual(grouped.ngroups, 2) - self.assertIn((pd.Timestamp('2014-09-30'), 'start'), grouped.groups) - self.assertIn((pd.Timestamp('2013-10-31'), 'start'), grouped.groups) + assert len(grouped.groups) == 2 + assert grouped.ngroups == 2 + assert (pd.Timestamp('2014-09-30'), 'start') in grouped.groups + assert (pd.Timestamp('2013-10-31'), 'start') in grouped.groups res = grouped.get_group((pd.Timestamp('2014-09-30'), 'start')) tm.assert_frame_equal(res, df.iloc[[0], :]) @@ -3372,10 +3372,10 @@ def test_groupby_with_small_elem(self): index=pd.DatetimeIndex(['2014-09-10', '2013-10-10', '2014-09-15'])) grouped = df.groupby([pd.TimeGrouper(freq='M'), 'event']) - self.assertEqual(len(grouped.groups), 2) - self.assertEqual(grouped.ngroups, 2) - self.assertIn((pd.Timestamp('2014-09-30'), 'start'), grouped.groups) - self.assertIn((pd.Timestamp('2013-10-31'), 'start'), grouped.groups) + assert len(grouped.groups) == 2 + assert grouped.ngroups == 2 + assert (pd.Timestamp('2014-09-30'), 'start') in grouped.groups + assert (pd.Timestamp('2013-10-31'), 'start') in grouped.groups res = grouped.get_group((pd.Timestamp('2014-09-30'), 'start')) tm.assert_frame_equal(res, df.iloc[[0, 2], :]) @@ -3388,11 +3388,11 @@ def test_groupby_with_small_elem(self): index=pd.DatetimeIndex(['2014-09-10', '2013-10-10', '2014-08-05'])) grouped = df.groupby([pd.TimeGrouper(freq='M'), 'event']) - self.assertEqual(len(grouped.groups), 3) - self.assertEqual(grouped.ngroups, 3) - self.assertIn((pd.Timestamp('2014-09-30'), 'start'), grouped.groups) - self.assertIn((pd.Timestamp('2013-10-31'), 'start'), grouped.groups) - self.assertIn((pd.Timestamp('2014-08-31'), 'start'), grouped.groups) + assert len(grouped.groups) == 3 + assert grouped.ngroups == 3 + assert (pd.Timestamp('2014-09-30'), 'start') in grouped.groups + assert (pd.Timestamp('2013-10-31'), 'start') in grouped.groups + assert (pd.Timestamp('2014-08-31'), 'start') in grouped.groups res = grouped.get_group((pd.Timestamp('2014-09-30'), 'start')) tm.assert_frame_equal(res, df.iloc[[0], :]) diff --git a/pandas/tests/indexes/datetimes/test_datetime.py b/pandas/tests/indexes/datetimes/test_datetime.py index 7cef5eeb94915..93fc855178800 100644 --- a/pandas/tests/indexes/datetimes/test_datetime.py +++ b/pandas/tests/indexes/datetimes/test_datetime.py @@ -101,7 +101,7 @@ def test_reasonable_keyerror(self): try: index.get_loc('1/1/2000') except KeyError as e: - self.assertIn('2000', str(e)) + assert '2000' in str(e) def test_roundtrip_pickle_with_tz(self): diff --git a/pandas/tests/indexes/datetimes/test_ops.py b/pandas/tests/indexes/datetimes/test_ops.py index 2eff8a12dee77..235c8f1de1fae 100644 --- a/pandas/tests/indexes/datetimes/test_ops.py +++ b/pandas/tests/indexes/datetimes/test_ops.py @@ -632,7 +632,7 @@ def test_nonunique_contains(self): for idx in map(DatetimeIndex, ([0, 1, 0], [0, 0, -1], [0, -1, -1], ['2015', '2015', '2016'], ['2015', '2015', '2014'])): - tm.assertIn(idx[0], idx) + assert idx[0] in idx def test_order(self): # with freq diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 9a166aa3340e3..7db7410d79349 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -857,10 +857,10 @@ def test_add_string(self): def test_iadd_string(self): index = pd.Index(['a', 'b', 'c']) # doesn't fail test unless there is a check before `+=` - self.assertIn('a', index) + assert 'a' in index index += '_x' - self.assertIn('a_x', index) + assert 'a_x' in index def test_difference(self): @@ -963,8 +963,8 @@ def test_summary(self): ind = Index(['{other}%s', "~:{range}:0"], name='A') result = ind.summary() # shouldn't be formatted accidentally. - self.assertIn('~:{range}:0', result) - self.assertIn('{other}%s', result) + assert '~:{range}:0' in result + assert '{other}%s' in result def test_format(self): self._check_method_works(Index.format) diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index 5000a71dfc756..98fce437d26c8 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -1597,8 +1597,8 @@ def test_union(self): # other = Index(['A', 'B', 'C']) # result = other.union(self.index) - # self.assertIn(('foo', 'one'), result) - # self.assertIn('B', result) + # assert ('foo', 'one') in result + # assert 'B' in result # result2 = self.index.union(other) # self.assertTrue(result.equals(result2)) diff --git a/pandas/tests/indexes/timedeltas/test_ops.py b/pandas/tests/indexes/timedeltas/test_ops.py index 5201af3af3531..adf164977205f 100644 --- a/pandas/tests/indexes/timedeltas/test_ops.py +++ b/pandas/tests/indexes/timedeltas/test_ops.py @@ -561,7 +561,7 @@ def test_nonunique_contains(self): for idx in map(TimedeltaIndex, ([0, 1, 0], [0, 0, -1], [0, -1, -1], ['00:01:00', '00:01:00', '00:02:00'], ['00:01:00', '00:01:00', '00:00:01'])): - tm.assertIn(idx[0], idx) + assert idx[0] in idx def test_unknown_attribute(self): # see gh-9680 diff --git a/pandas/tests/indexing/test_chaining_and_caching.py b/pandas/tests/indexing/test_chaining_and_caching.py index c0d83c580d1d1..b776d3c2d08ea 100644 --- a/pandas/tests/indexing/test_chaining_and_caching.py +++ b/pandas/tests/indexing/test_chaining_and_caching.py @@ -373,15 +373,15 @@ def test_cache_updating(self): df['A'] # cache series with catch_warnings(record=True): df.ix["Hello Friend"] = df.ix[0] - self.assertIn("Hello Friend", df['A'].index) - self.assertIn("Hello Friend", df['B'].index) + assert "Hello Friend" in df['A'].index + assert "Hello Friend" in df['B'].index with catch_warnings(record=True): panel = tm.makePanel() panel.ix[0] # get first item into cache panel.ix[:, :, 'A+1'] = panel.ix[:, :, 'A'] + 1 - self.assertIn("A+1", panel.ix[0].columns) - self.assertIn("A+1", panel.ix[1].columns) + assert "A+1" in panel.ix[0].columns + assert "A+1" in panel.ix[1].columns # 5216 # make sure that we don't try to set a dead cache diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index ea796a497bd19..ba59add4305d8 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -959,7 +959,7 @@ def test_wide_repr_named(self): self.assertTrue(len(wider_repr) < len(wide_repr)) for line in wide_repr.splitlines()[1::13]: - self.assertIn('DataFrame Index', line) + assert 'DataFrame Index' in line reset_option('display.expand_frame_repr') @@ -981,7 +981,7 @@ def test_wide_repr_multiindex(self): self.assertTrue(len(wider_repr) < len(wide_repr)) for line in wide_repr.splitlines()[1::13]: - self.assertIn('Level 0 Level 1', line) + assert 'Level 0 Level 1' in line reset_option('display.expand_frame_repr') @@ -1875,9 +1875,9 @@ def test_float_trim_zeros(self): if line.startswith('dtype:'): continue if _three_digit_exp(): - self.assertIn('+010', line) + assert '+010' in line else: - self.assertIn('+10', line) + assert '+10' in line def test_datetimeindex(self): diff --git a/pandas/tests/io/parser/c_parser_only.py b/pandas/tests/io/parser/c_parser_only.py index 6d3dc8f637012..f4ca632e09f39 100644 --- a/pandas/tests/io/parser/c_parser_only.py +++ b/pandas/tests/io/parser/c_parser_only.py @@ -33,7 +33,7 @@ def test_buffer_overflow(self): try: self.read_table(StringIO(malf)) except Exception as err: - self.assertIn(cperr, str(err)) + assert cperr in str(err) def test_buffer_rd_bytes(self): # see gh-12098: src->buffer in the C parser can be freed twice leading diff --git a/pandas/tests/io/parser/parse_dates.py b/pandas/tests/io/parser/parse_dates.py index cdc4f9fa9d84f..b7147cd77f4f6 100644 --- a/pandas/tests/io/parser/parse_dates.py +++ b/pandas/tests/io/parser/parse_dates.py @@ -135,7 +135,7 @@ def test_multiple_date_cols_int_cast(self): # it works! df = self.read_csv(StringIO(data), header=None, parse_dates=date_spec, date_parser=conv.parse_date_time) - self.assertIn('nominal', df) + assert 'nominal' in df def test_multiple_date_col_timestamp_parse(self): data = """05/31/2012,15:30:00.029,1306.25,1,E,0,,1306.25 @@ -530,7 +530,7 @@ def test_parse_date_time(self): df = self.read_csv(StringIO(data), sep=',', header=0, parse_dates=datecols, date_parser=conv.parse_date_time) - self.assertIn('date_time', df) + assert 'date_time' in df self.assertEqual(df.date_time.loc[0], datetime(2001, 1, 5, 10, 0, 0)) data = ("KORD,19990127, 19:00:00, 18:56:00, 0.8100\n" @@ -558,7 +558,7 @@ def test_parse_date_fields(self): df = self.read_csv(StringIO(data), sep=',', header=0, parse_dates=datecols, date_parser=conv.parse_date_fields) - self.assertIn('ymd', df) + assert 'ymd' in df self.assertEqual(df.ymd.loc[0], datetime(2001, 1, 10)) def test_datetime_six_col(self): @@ -585,7 +585,7 @@ def test_datetime_six_col(self): df = self.read_csv(StringIO(data), sep=',', header=0, parse_dates=datecols, date_parser=conv.parse_all_fields) - self.assertIn('ymdHMS', df) + assert 'ymdHMS' in df self.assertEqual(df.ymdHMS.loc[0], datetime(2001, 1, 5, 10, 0, 0)) def test_datetime_fractional_seconds(self): @@ -598,7 +598,7 @@ def test_datetime_fractional_seconds(self): df = self.read_csv(StringIO(data), sep=',', header=0, parse_dates=datecols, date_parser=conv.parse_all_fields) - self.assertIn('ymdHMS', df) + assert 'ymdHMS' in df self.assertEqual(df.ymdHMS.loc[0], datetime(2001, 1, 5, 10, 0, 0, microsecond=123456)) self.assertEqual(df.ymdHMS.loc[1], datetime(2001, 1, 5, 10, 0, 0, @@ -611,7 +611,7 @@ def test_generic(self): df = self.read_csv(StringIO(data), sep=',', header=0, parse_dates=datecols, date_parser=dateconverter) - self.assertIn('ym', df) + assert 'ym' in df self.assertEqual(df.ym.loc[0], date(2001, 1, 1)) def test_dateparser_resolution_if_not_ns(self): diff --git a/pandas/tests/io/test_html.py b/pandas/tests/io/test_html.py index 866ed2cf2f359..e7eaab098fe4d 100644 --- a/pandas/tests/io/test_html.py +++ b/pandas/tests/io/test_html.py @@ -566,10 +566,10 @@ def test_gold_canyon(self): with open(self.banklist_data, 'r') as f: raw_text = f.read() - self.assertIn(gc, raw_text) + assert gc in raw_text df = self.read_html(self.banklist_data, 'Gold Canyon', attrs={'id': 'table'})[0] - self.assertIn(gc, df.to_string()) + assert gc in df.to_string() def test_different_number_of_rows(self): expected = """ diff --git a/pandas/tests/reshape/test_join.py b/pandas/tests/reshape/test_join.py index aa42f190926ae..7e39806b42cbf 100644 --- a/pandas/tests/reshape/test_join.py +++ b/pandas/tests/reshape/test_join.py @@ -153,15 +153,15 @@ def test_handle_overlap(self): joined = merge(self.df, self.df2, on='key2', suffixes=['.foo', '.bar']) - self.assertIn('key1.foo', joined) - self.assertIn('key1.bar', joined) + assert 'key1.foo' in joined + assert 'key1.bar' in joined def test_handle_overlap_arbitrary_key(self): joined = merge(self.df, self.df2, left_on='key2', right_on='key1', suffixes=['.foo', '.bar']) - self.assertIn('key1.foo', joined) - self.assertIn('key2.bar', joined) + assert 'key1.foo' in joined + assert 'key2.bar' in joined def test_join_on(self): target = self.target @@ -251,7 +251,7 @@ def test_join_with_len0(self): # nothing to merge merged = self.target.join(self.source.reindex([]), on='C') for col in self.source: - self.assertIn(col, merged) + assert col in merged self.assertTrue(merged[col].isnull().all()) merged2 = self.target.join(self.source.reindex([]), on='C', diff --git a/pandas/tests/reshape/test_merge.py b/pandas/tests/reshape/test_merge.py index 8ddeb4bdda14a..73d0346546b97 100644 --- a/pandas/tests/reshape/test_merge.py +++ b/pandas/tests/reshape/test_merge.py @@ -128,8 +128,8 @@ def test_merge_overlap(self): merged = merge(self.left, self.left, on='key') exp_len = (self.left['key'].value_counts() ** 2).sum() self.assertEqual(len(merged), exp_len) - self.assertIn('v1_x', merged) - self.assertIn('v1_y', merged) + assert 'v1_x' in merged + assert 'v1_y' in merged def test_merge_different_column_key_names(self): left = DataFrame({'lkey': ['foo', 'bar', 'baz', 'foo'], diff --git a/pandas/tests/scalar/test_period.py b/pandas/tests/scalar/test_period.py index 50c8bcbb68fcb..1635d90189b50 100644 --- a/pandas/tests/scalar/test_period.py +++ b/pandas/tests/scalar/test_period.py @@ -54,7 +54,7 @@ def test_period_cons_quarterly(self): for month in MONTHS: freq = 'Q-%s' % month exp = Period('1989Q3', freq=freq) - self.assertIn('1989Q3', str(exp)) + assert '1989Q3' in str(exp) stamp = exp.to_timestamp('D', how='end') p = Period(stamp, freq=freq) self.assertEqual(p, exp) @@ -544,14 +544,14 @@ def test_hash(self): def test_repr(self): p = Period('Jan-2000') - self.assertIn('2000-01', repr(p)) + assert '2000-01' in repr(p) p = Period('2000-12-15') - self.assertIn('2000-12-15', repr(p)) + assert '2000-12-15' in repr(p) def test_repr_nat(self): p = Period('nat', freq='M') - self.assertIn(repr(tslib.NaT), repr(p)) + assert repr(tslib.NaT) in repr(p) def test_millisecond_repr(self): p = Period('2000-01-01 12:15:02.123') diff --git a/pandas/tests/scalar/test_timestamp.py b/pandas/tests/scalar/test_timestamp.py index 055da8b2cc07f..c1d5a069c248b 100644 --- a/pandas/tests/scalar/test_timestamp.py +++ b/pandas/tests/scalar/test_timestamp.py @@ -1133,24 +1133,24 @@ def test_nanosecond_string_parsing(self): expected_repr = '2013-05-01 07:15:45.123456789' expected_value = 1367392545123456789 self.assertEqual(ts.value, expected_value) - self.assertIn(expected_repr, repr(ts)) + assert expected_repr in repr(ts) ts = Timestamp('2013-05-01 07:15:45.123456789+09:00', tz='Asia/Tokyo') self.assertEqual(ts.value, expected_value - 9 * 3600 * 1000000000) - self.assertIn(expected_repr, repr(ts)) + assert expected_repr in repr(ts) ts = Timestamp('2013-05-01 07:15:45.123456789', tz='UTC') self.assertEqual(ts.value, expected_value) - self.assertIn(expected_repr, repr(ts)) + assert expected_repr in repr(ts) ts = Timestamp('2013-05-01 07:15:45.123456789', tz='US/Eastern') self.assertEqual(ts.value, expected_value + 4 * 3600 * 1000000000) - self.assertIn(expected_repr, repr(ts)) + assert expected_repr in repr(ts) # GH 10041 ts = Timestamp('20130501T071545.123456789') self.assertEqual(ts.value, expected_value) - self.assertIn(expected_repr, repr(ts)) + assert expected_repr in repr(ts) def test_nanosecond_timestamp(self): # GH 7610 @@ -1365,7 +1365,7 @@ def test_timestamp_repr(self): iso8601 = '1850-01-01 01:23:45.012345' stamp = Timestamp(iso8601, tz='US/Eastern') result = repr(stamp) - self.assertIn(iso8601, result) + assert iso8601 in result def test_timestamp_from_ordinal(self): @@ -1440,7 +1440,7 @@ def test_to_html_timestamp(self): df = DataFrame(np.random.randn(10, 4), index=rng) result = df.to_html() - self.assertIn('2000-01-01', result) + assert '2000-01-01' in result def test_series_map_box_timestamps(self): # #2689, #2627 diff --git a/pandas/tests/series/test_alter_axes.py b/pandas/tests/series/test_alter_axes.py index a7b8d285bbe54..17a270c3a9346 100644 --- a/pandas/tests/series/test_alter_axes.py +++ b/pandas/tests/series/test_alter_axes.py @@ -118,10 +118,10 @@ def test_reset_index(self): ser.name = 'value' df = ser.reset_index() - self.assertIn('value', df) + assert 'value' in df df = ser.reset_index(name='value2') - self.assertIn('value2', df) + assert 'value2' in df # check inplace s = ser.reset_index(drop=True) diff --git a/pandas/tests/series/test_indexing.py b/pandas/tests/series/test_indexing.py index 1181e3a44f295..22869fca35931 100644 --- a/pandas/tests/series/test_indexing.py +++ b/pandas/tests/series/test_indexing.py @@ -2397,7 +2397,7 @@ def test_indexing_over_size_cutoff(self): pos = n * 3 timestamp = df.index[pos] - self.assertIn(timestamp, df.index) + assert timestamp in df.index # it works! df.loc[timestamp] diff --git a/pandas/tests/series/test_missing.py b/pandas/tests/series/test_missing.py index 2f7bf6902f5af..e7c1b22216dcb 100644 --- a/pandas/tests/series/test_missing.py +++ b/pandas/tests/series/test_missing.py @@ -459,7 +459,7 @@ def test_fillna_invalid_method(self): try: self.ts.fillna(method='ffil') except ValueError as inst: - self.assertIn('ffil', str(inst)) + assert 'ffil' in str(inst) def test_ffill(self): ts = Series([0., 1., 2., 3., 4.], index=tm.makeDateIndex(5)) diff --git a/pandas/tests/series/test_repr.py b/pandas/tests/series/test_repr.py index a80c5edcc21bd..b4ad90f6f35af 100644 --- a/pandas/tests/series/test_repr.py +++ b/pandas/tests/series/test_repr.py @@ -95,12 +95,12 @@ def test_repr(self): # 0 as name ser = Series(np.random.randn(100), name=0) rep_str = repr(ser) - self.assertIn("Name: 0", rep_str) + assert "Name: 0" in rep_str # tidy repr ser = Series(np.random.randn(1001), name=0) rep_str = repr(ser) - self.assertIn("Name: 0", rep_str) + assert "Name: 0" in rep_str ser = Series(["a\n\r\tb"], name="a\n\r\td", index=["a\n\r\tf"]) self.assertFalse("\t" in repr(ser)) diff --git a/pandas/tests/series/test_timeseries.py b/pandas/tests/series/test_timeseries.py index bafb8ebfd7145..486f129b6814c 100644 --- a/pandas/tests/series/test_timeseries.py +++ b/pandas/tests/series/test_timeseries.py @@ -796,7 +796,7 @@ def test_to_csv_numpy_16_bug(self): frame.to_csv(buf) result = buf.getvalue() - self.assertIn('2000-01-01', result) + assert '2000-01-01' in result def test_series_map_box_timedelta(self): # GH 11349 diff --git a/pandas/tests/tseries/test_timezones.py b/pandas/tests/tseries/test_timezones.py index c9ac4f36e7769..40ff2421a9f63 100644 --- a/pandas/tests/tseries/test_timezones.py +++ b/pandas/tests/tseries/test_timezones.py @@ -372,7 +372,7 @@ def test_utc_box_timestamp_and_localize(self): rng = date_range('3/13/2012', '3/14/2012', freq='H', tz='utc') rng_eastern = rng.tz_convert(self.tzstr('US/Eastern')) # test not valid for dateutil timezones. - # self.assertIn('EDT', repr(rng_eastern[0].tzinfo)) + # assert 'EDT' in repr(rng_eastern[0].tzinfo) self.assertTrue('EDT' in repr(rng_eastern[0].tzinfo) or 'tzfile' in repr(rng_eastern[0].tzinfo)) @@ -683,7 +683,7 @@ def test_index_with_timezone_repr(self): rng_eastern = rng.tz_localize(self.tzstr('US/Eastern')) rng_repr = repr(rng_eastern) - self.assertIn('2010-04-13 00:00:00', rng_repr) + assert '2010-04-13 00:00:00' in rng_repr def test_index_astype_asobject_tzinfos(self): # #1345 diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 7565cbb859201..d6f3e00b7594d 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -894,7 +894,7 @@ def _check_types(l, r, obj='Index'): assert_attr_equal('dtype', l, r, obj=obj) # allow string-like to have different inferred_types if l.inferred_type in ('string', 'unicode'): - assertIn(r.inferred_type, ('string', 'unicode')) + assert r.inferred_type in ('string', 'unicode') else: assert_attr_equal('inferred_type', l, r, obj=obj) @@ -1066,12 +1066,6 @@ def assertIs(first, second, msg=''): assert a is b, "%s: %r is not %r" % (msg.format(a, b), a, b) -def assertIn(first, second, msg=''): - """Checks that 'first' is in 'second'""" - a, b = first, second - assert a in b, "%s: %r is not in %r" % (msg.format(a, b), a, b) - - def assertIsNone(expr, msg=''): """Checks that 'expr' is None""" return assertIs(expr, None, msg) From 60a926bf7f2727a7ba1ff31ac371410662886793 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Sun, 23 Apr 2017 19:19:41 -0400 Subject: [PATCH 456/933] MAINT: Remove assertIsNone from testing (#16102) --- pandas/tests/computation/test_eval.py | 6 ++-- pandas/tests/frame/test_api.py | 4 +-- pandas/tests/frame/test_constructors.py | 2 +- pandas/tests/frame/test_indexing.py | 4 +-- pandas/tests/frame/test_repr_info.py | 2 +- pandas/tests/frame/test_timeseries.py | 4 +-- pandas/tests/groupby/test_groupby.py | 8 ++--- pandas/tests/indexes/common.py | 2 +- .../tests/indexes/datetimes/test_datetime.py | 2 +- pandas/tests/indexes/datetimes/test_ops.py | 30 ++++++++-------- pandas/tests/indexes/datetimes/test_setops.py | 12 +++---- pandas/tests/indexes/test_base.py | 12 +++---- pandas/tests/indexes/test_multi.py | 34 +++++++++---------- pandas/tests/indexes/test_numeric.py | 16 ++++----- pandas/tests/indexes/test_range.py | 8 ++--- pandas/tests/indexes/timedeltas/test_ops.py | 20 +++++------ .../indexes/timedeltas/test_timedelta.py | 2 +- pandas/tests/io/formats/test_format.py | 4 +-- pandas/tests/io/formats/test_printing.py | 2 +- pandas/tests/io/formats/test_to_html.py | 2 +- pandas/tests/io/parser/common.py | 4 +-- pandas/tests/io/test_pytables.py | 8 ++--- pandas/tests/plotting/test_boxplot_method.py | 2 +- pandas/tests/plotting/test_datetimelike.py | 10 +++--- pandas/tests/scalar/test_timestamp.py | 2 +- pandas/tests/series/test_api.py | 12 +++---- pandas/tests/series/test_combine_concat.py | 2 +- pandas/tests/series/test_indexing.py | 2 +- pandas/tests/series/test_io.py | 4 +-- pandas/tests/series/test_timeseries.py | 18 +++++----- pandas/tests/sparse/test_indexing.py | 6 ++-- pandas/tests/sparse/test_series.py | 2 +- pandas/tests/test_categorical.py | 10 +++--- pandas/tests/test_panel.py | 2 +- pandas/tests/tseries/test_frequencies.py | 6 ++-- pandas/tests/tseries/test_offsets.py | 2 +- pandas/util/testing.py | 5 --- 37 files changed, 134 insertions(+), 139 deletions(-) diff --git a/pandas/tests/computation/test_eval.py b/pandas/tests/computation/test_eval.py index eacbd2b390154..6ec06f75de06d 100644 --- a/pandas/tests/computation/test_eval.py +++ b/pandas/tests/computation/test_eval.py @@ -1336,7 +1336,7 @@ def test_multi_line_expression(self): c = a + b d = c + b""", inplace=True) assert_frame_equal(expected, df) - self.assertIsNone(ans) + assert ans is None expected['a'] = expected['a'] - 1 expected['e'] = expected['a'] + 2 @@ -1344,7 +1344,7 @@ def test_multi_line_expression(self): a = a - 1 e = a + 2""", inplace=True) assert_frame_equal(expected, df) - self.assertIsNone(ans) + assert ans is None # multi-line not valid if not all assignments with pytest.raises(ValueError): @@ -1384,7 +1384,7 @@ def test_multi_line_expression_local_variable(self): d = c + @local_var """, inplace=True) assert_frame_equal(expected, df) - self.assertIsNone(ans) + assert ans is None def test_assignment_in_query(self): # GH 8664 diff --git a/pandas/tests/frame/test_api.py b/pandas/tests/frame/test_api.py index 879458a38770d..e060e863c0431 100644 --- a/pandas/tests/frame/test_api.py +++ b/pandas/tests/frame/test_api.py @@ -38,7 +38,7 @@ def test_copy_index_name_checking(self): ind.name = None cp = self.frame.copy() getattr(cp, attr).name = 'foo' - self.assertIsNone(getattr(self.frame, attr).name) + assert getattr(self.frame, attr).name is None def test_getitem_pop_assign_name(self): s = self.frame['A'] @@ -116,7 +116,7 @@ def test_new_empty_index(self): df1 = DataFrame(randn(0, 3)) df2 = DataFrame(randn(0, 3)) df1.index.name = 'foo' - self.assertIsNone(df2.index.name) + assert df2.index.name is None def test_array_interface(self): with np.errstate(all='ignore'): diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 3bcc058316f77..737d9f8e50477 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -91,7 +91,7 @@ def test_constructor_dtype_nocast_view(self): def test_constructor_dtype_list_data(self): df = DataFrame([[1, '2'], [None, 'a']], dtype=object) - self.assertIsNone(df.loc[1, 0]) + assert df.loc[1, 0] is None self.assertEqual(df.loc[0, 1], '2') def test_constructor_list_frames(self): diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py index 12c5b33fcbce9..088f863e5358b 100644 --- a/pandas/tests/frame/test_indexing.py +++ b/pandas/tests/frame/test_indexing.py @@ -81,7 +81,7 @@ def test_get(self): b = self.frame.get('B') assert_series_equal(b, self.frame['B']) - self.assertIsNone(self.frame.get('foo')) + assert self.frame.get('foo') is None assert_series_equal(self.frame.get('foo', self.frame['B']), self.frame['B']) # None @@ -89,7 +89,7 @@ def test_get(self): for df in [DataFrame(), DataFrame(columns=list('AB')), DataFrame(columns=list('AB'), index=range(3))]: result = df.get(None) - self.assertIsNone(result) + assert result is None def test_getitem_iterator(self): idx = iter(['A', 'B', 'C']) diff --git a/pandas/tests/frame/test_repr_info.py b/pandas/tests/frame/test_repr_info.py index efbdc05ba23c8..630fa5ad57fad 100644 --- a/pandas/tests/frame/test_repr_info.py +++ b/pandas/tests/frame/test_repr_info.py @@ -189,7 +189,7 @@ def test_latex_repr(self): self.assertEqual(result, df._repr_latex_()) # GH 12182 - self.assertIsNone(df._repr_latex_()) + assert df._repr_latex_() is None @tm.capture_stdout def test_info(self): diff --git a/pandas/tests/frame/test_timeseries.py b/pandas/tests/frame/test_timeseries.py index cf42f751f390a..090f742a69b63 100644 --- a/pandas/tests/frame/test_timeseries.py +++ b/pandas/tests/frame/test_timeseries.py @@ -415,8 +415,8 @@ def test_first_last_valid(self): # GH12800 empty = DataFrame() - self.assertIsNone(empty.last_valid_index()) - self.assertIsNone(empty.first_valid_index()) + assert empty.last_valid_index() is None + assert empty.first_valid_index() is None def test_at_time_frame(self): rng = date_range('1/1/2000', '1/5/2000', freq='5min') diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 05fe1c6f58e9a..02ef9e614150c 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -1972,11 +1972,11 @@ def test_apply_frame_yield_constant(self): # GH13568 result = self.df.groupby(['A', 'B']).apply(len) self.assertTrue(isinstance(result, Series)) - self.assertIsNone(result.name) + assert result.name is None result = self.df.groupby(['A', 'B'])[['C', 'D']].apply(len) self.assertTrue(isinstance(result, Series)) - self.assertIsNone(result.name) + assert result.name is None def test_apply_frame_to_series(self): grouped = self.df.groupby(['A', 'B']) @@ -2954,7 +2954,7 @@ def test_no_nonsense_name(self): s.name = None result = s.groupby(self.frame['A']).agg(np.sum) - self.assertIsNone(result.name) + assert result.name is None def test_multifunc_sum_bug(self): # GH #1065 @@ -3060,7 +3060,7 @@ def test_no_dummy_key_names(self): # GH #1291 result = self.df.groupby(self.df['A'].values).sum() - self.assertIsNone(result.index.name) + assert result.index.name is None result = self.df.groupby([self.df['A'].values, self.df['B'].values ]).sum() diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index ff90c924933f3..fa1f31ded5f1c 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -195,7 +195,7 @@ def test_set_name_methods(self): res = ind.rename(new_name, inplace=True) # should return None - self.assertIsNone(res) + assert res is None self.assertEqual(ind.name, new_name) self.assertEqual(ind.names, [new_name]) # with assertRaisesRegexp(TypeError, "list-like"): diff --git a/pandas/tests/indexes/datetimes/test_datetime.py b/pandas/tests/indexes/datetimes/test_datetime.py index 93fc855178800..31e795a80f2e6 100644 --- a/pandas/tests/indexes/datetimes/test_datetime.py +++ b/pandas/tests/indexes/datetimes/test_datetime.py @@ -480,7 +480,7 @@ def test_take(self): for taken in [taken1, taken2]: tm.assert_index_equal(taken, expected) assert isinstance(taken, DatetimeIndex) - self.assertIsNone(taken.freq) + assert taken.freq is None self.assertEqual(taken.tz, expected.tz) self.assertEqual(taken.name, expected.name) diff --git a/pandas/tests/indexes/datetimes/test_ops.py b/pandas/tests/indexes/datetimes/test_ops.py index 235c8f1de1fae..de41f321b245b 100644 --- a/pandas/tests/indexes/datetimes/test_ops.py +++ b/pandas/tests/indexes/datetimes/test_ops.py @@ -197,7 +197,7 @@ def test_repeat_range(self): rng = date_range('1/1/2000', '1/1/2001') result = rng.repeat(5) - self.assertIsNone(result.freq) + assert result.freq is None self.assertEqual(len(result), 5 * len(rng)) for tz in self.tz: @@ -206,14 +206,14 @@ def test_repeat_range(self): '2001-01-02', '2001-01-02'], tz=tz) for res in [index.repeat(2), np.repeat(index, 2)]: tm.assert_index_equal(res, exp) - self.assertIsNone(res.freq) + assert res.freq is None index = pd.date_range('2001-01-01', periods=2, freq='2D', tz=tz) exp = pd.DatetimeIndex(['2001-01-01', '2001-01-01', '2001-01-03', '2001-01-03'], tz=tz) for res in [index.repeat(2), np.repeat(index, 2)]: tm.assert_index_equal(res, exp) - self.assertIsNone(res.freq) + assert res.freq is None index = pd.DatetimeIndex(['2001-01-01', 'NaT', '2003-01-01'], tz=tz) @@ -223,7 +223,7 @@ def test_repeat_range(self): tz=tz) for res in [index.repeat(3), np.repeat(index, 3)]: tm.assert_index_equal(res, exp) - self.assertIsNone(res.freq) + assert res.freq is None def test_repeat(self): reps = 2 @@ -242,7 +242,7 @@ def test_repeat(self): res = rng.repeat(reps) tm.assert_index_equal(res, expected_rng) - self.assertIsNone(res.freq) + assert res.freq is None tm.assert_index_equal(np.repeat(rng, reps), expected_rng) tm.assertRaisesRegexp(ValueError, msg, np.repeat, @@ -694,18 +694,18 @@ def test_order(self): for idx, expected in [(idx1, exp1), (idx2, exp2), (idx3, exp3)]: ordered = idx.sort_values() tm.assert_index_equal(ordered, expected) - self.assertIsNone(ordered.freq) + assert ordered.freq is None ordered = idx.sort_values(ascending=False) tm.assert_index_equal(ordered, expected[::-1]) - self.assertIsNone(ordered.freq) + assert ordered.freq is None ordered, indexer = idx.sort_values(return_indexer=True) tm.assert_index_equal(ordered, expected) exp = np.array([0, 4, 3, 1, 2]) tm.assert_numpy_array_equal(indexer, exp, check_dtype=False) - self.assertIsNone(ordered.freq) + assert ordered.freq is None ordered, indexer = idx.sort_values(return_indexer=True, ascending=False) @@ -713,7 +713,7 @@ def test_order(self): exp = np.array([2, 1, 3, 4, 0]) tm.assert_numpy_array_equal(indexer, exp, check_dtype=False) - self.assertIsNone(ordered.freq) + assert ordered.freq is None def test_getitem(self): idx1 = pd.date_range('2011-01-01', '2011-01-31', freq='D', name='idx') @@ -757,10 +757,10 @@ def test_drop_duplicates_metadata(self): self.assertEqual(idx.freq, result.freq) idx_dup = idx.append(idx) - self.assertIsNone(idx_dup.freq) # freq is reset + assert idx_dup.freq is None # freq is reset result = idx_dup.drop_duplicates() tm.assert_index_equal(idx, result) - self.assertIsNone(result.freq) + assert result.freq is None def test_drop_duplicates(self): # to check Index/Series compat @@ -816,14 +816,14 @@ def test_take(self): '2011-01-06'], freq=None, tz=idx.tz, name='idx') tm.assert_index_equal(result, expected) - self.assertIsNone(result.freq) + assert result.freq is None result = idx.take([-3, 2, 5]) expected = DatetimeIndex(['2011-01-29', '2011-01-03', '2011-01-06'], freq=None, tz=idx.tz, name='idx') tm.assert_index_equal(result, expected) - self.assertIsNone(result.freq) + assert result.freq is None def test_take_invalid_kwargs(self): idx = pd.date_range('2011-01-01', '2011-01-31', freq='D', name='idx') @@ -1145,7 +1145,7 @@ def test_getitem(self): fancy_indexed = self.rng[[4, 3, 2, 1, 0]] self.assertEqual(len(fancy_indexed), 5) assert isinstance(fancy_indexed, DatetimeIndex) - self.assertIsNone(fancy_indexed.freq) + assert fancy_indexed.freq is None # 32-bit vs. 64-bit platforms self.assertEqual(self.rng[4], self.rng[np.int_(4)]) @@ -1240,7 +1240,7 @@ def test_getitem(self): fancy_indexed = self.rng[[4, 3, 2, 1, 0]] self.assertEqual(len(fancy_indexed), 5) assert isinstance(fancy_indexed, DatetimeIndex) - self.assertIsNone(fancy_indexed.freq) + assert fancy_indexed.freq is None # 32-bit vs. 64-bit platforms self.assertEqual(self.rng[4], self.rng[np.int_(4)]) diff --git a/pandas/tests/indexes/datetimes/test_setops.py b/pandas/tests/indexes/datetimes/test_setops.py index 3191b0e2c586d..3e6ed7756b9bd 100644 --- a/pandas/tests/indexes/datetimes/test_setops.py +++ b/pandas/tests/indexes/datetimes/test_setops.py @@ -65,7 +65,7 @@ def test_union_freq_both_none(self): result = expected.union(expected) tm.assert_index_equal(result, expected) - self.assertIsNone(result.freq) + assert result.freq is None def test_union_dataframe_index(self): rng1 = date_range('1/1/1999', '1/1/2012', freq='MS') @@ -137,7 +137,7 @@ def test_intersection(self): result = base.intersection(rng) tm.assert_index_equal(result, expected) self.assertEqual(result.name, expected.name) - self.assertIsNone(result.freq) + assert result.freq is None self.assertEqual(result.tz, expected.tz) # empty same freq GH2129 @@ -251,7 +251,7 @@ def test_outer_join(self): the_join = left.join(right, how='outer') assert isinstance(the_join, DatetimeIndex) - self.assertIsNone(the_join.freq) + assert the_join.freq is None # non-overlapping, no gap left = self.rng[:5] @@ -265,7 +265,7 @@ def test_outer_join(self): the_join = self.rng.join(rng, how='outer') assert isinstance(the_join, DatetimeIndex) - self.assertIsNone(the_join.freq) + assert the_join.freq is None def test_union_not_cacheable(self): rng = date_range('1/1/2000', periods=50, freq=Minute()) @@ -395,7 +395,7 @@ def test_outer_join(self): the_join = left.join(right, how='outer') assert isinstance(the_join, DatetimeIndex) - self.assertIsNone(the_join.freq) + assert the_join.freq is None # non-overlapping, no gap left = self.rng[:5] @@ -409,7 +409,7 @@ def test_outer_join(self): the_join = self.rng.join(rng, how='outer') assert isinstance(the_join, DatetimeIndex) - self.assertIsNone(the_join.freq) + assert the_join.freq is None def test_intersection_bug(self): # GH #771 diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 7db7410d79349..91d5068ee9f19 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -659,12 +659,12 @@ def test_intersection(self): second.name = 'B' intersect = first.intersection(second) - self.assertIsNone(intersect.name) + assert intersect.name is None first.name = None second.name = 'B' intersect = first.intersection(second) - self.assertIsNone(intersect.name) + assert intersect.name is None def test_union(self): first = self.strIndex[5:20] @@ -844,7 +844,7 @@ def test_append_empty_preserve_name(self): right = Index([1, 2, 3], name='bar') result = left.append(right) - self.assertIsNone(result.name) + assert result.name is None def test_add_string(self): # from bug report @@ -896,12 +896,12 @@ def test_symmetric_difference(self): result = idx1.symmetric_difference(idx2) expected = Index([1, 5]) self.assertTrue(tm.equalContents(result, expected)) - self.assertIsNone(result.name) + assert result.name is None # __xor__ syntax expected = idx1 ^ idx2 self.assertTrue(tm.equalContents(result, expected)) - self.assertIsNone(result.name) + assert result.name is None # multiIndex idx1 = MultiIndex.from_tuples(self.tuples) @@ -1015,7 +1015,7 @@ def test_format_none(self): idx = Index(values) idx.format() - self.assertIsNone(idx[3]) + assert idx[3] is None def test_logical_compat(self): idx = self.create_index() diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index 98fce437d26c8..7a9b3c1c4b5d6 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -138,7 +138,7 @@ def test_set_name_methods(self): ind.set_names(new_names + new_names) new_names2 = [name + "SUFFIX2" for name in new_names] res = ind.set_names(new_names2, inplace=True) - self.assertIsNone(res) + assert res is None self.assertEqual(ind.names, new_names2) # set names for specific level (# GH7792) @@ -147,7 +147,7 @@ def test_set_name_methods(self): self.assertEqual(ind.names, [new_names[0], self.index_names[1]]) res = ind.set_names(new_names2[0], level=0, inplace=True) - self.assertIsNone(res) + assert res is None self.assertEqual(ind.names, [new_names2[0], self.index_names[1]]) # set names for multiple levels @@ -156,7 +156,7 @@ def test_set_name_methods(self): self.assertEqual(ind.names, new_names) res = ind.set_names(new_names2, level=[0, 1], inplace=True) - self.assertIsNone(res) + assert res is None self.assertEqual(ind.names, new_names2) def test_set_levels(self): @@ -182,7 +182,7 @@ def assert_matching(actual, expected, check_dtype=False): # level changing [w/ mutation] ind2 = self.index.copy() inplace_return = ind2.set_levels(new_levels, inplace=True) - self.assertIsNone(inplace_return) + assert inplace_return is None assert_matching(ind2.levels, new_levels) # level changing specific level [w/o mutation] @@ -202,13 +202,13 @@ def assert_matching(actual, expected, check_dtype=False): # level changing specific level [w/ mutation] ind2 = self.index.copy() inplace_return = ind2.set_levels(new_levels[0], level=0, inplace=True) - self.assertIsNone(inplace_return) + assert inplace_return is None assert_matching(ind2.levels, [new_levels[0], levels[1]]) assert_matching(self.index.levels, levels) ind2 = self.index.copy() inplace_return = ind2.set_levels(new_levels[1], level=1, inplace=True) - self.assertIsNone(inplace_return) + assert inplace_return is None assert_matching(ind2.levels, [levels[0], new_levels[1]]) assert_matching(self.index.levels, levels) @@ -216,7 +216,7 @@ def assert_matching(actual, expected, check_dtype=False): ind2 = self.index.copy() inplace_return = ind2.set_levels(new_levels, level=[0, 1], inplace=True) - self.assertIsNone(inplace_return) + assert inplace_return is None assert_matching(ind2.levels, new_levels) assert_matching(self.index.levels, levels) @@ -271,7 +271,7 @@ def assert_matching(actual, expected): # label changing [w/ mutation] ind2 = self.index.copy() inplace_return = ind2.set_labels(new_labels, inplace=True) - self.assertIsNone(inplace_return) + assert inplace_return is None assert_matching(ind2.labels, new_labels) # label changing specific level [w/o mutation] @@ -291,13 +291,13 @@ def assert_matching(actual, expected): # label changing specific level [w/ mutation] ind2 = self.index.copy() inplace_return = ind2.set_labels(new_labels[0], level=0, inplace=True) - self.assertIsNone(inplace_return) + assert inplace_return is None assert_matching(ind2.labels, [new_labels[0], labels[1]]) assert_matching(self.index.labels, labels) ind2 = self.index.copy() inplace_return = ind2.set_labels(new_labels[1], level=1, inplace=True) - self.assertIsNone(inplace_return) + assert inplace_return is None assert_matching(ind2.labels, [labels[0], new_labels[1]]) assert_matching(self.index.labels, labels) @@ -305,7 +305,7 @@ def assert_matching(actual, expected): ind2 = self.index.copy() inplace_return = ind2.set_labels(new_labels, level=[0, 1], inplace=True) - self.assertIsNone(inplace_return) + assert inplace_return is None assert_matching(ind2.labels, new_labels) assert_matching(self.index.labels, labels) @@ -457,10 +457,10 @@ def test_set_value_keeps_names(self): columns=['one', 'two', 'three', 'four'], index=idx) df = df.sort_index() - self.assertIsNone(df.is_copy) + assert df.is_copy is None self.assertEqual(df.index.names, ('Name', 'Number')) df = df.set_value(('grethe', '4'), 'one', 99.34) - self.assertIsNone(df.is_copy) + assert df.is_copy is None self.assertEqual(df.index.names, ('Name', 'Number')) def test_copy_names(self): @@ -1158,7 +1158,7 @@ def test_get_loc_level(self): loc, new_index = index.get_loc_level((0, 1, 0)) expected = 1 self.assertEqual(loc, expected) - self.assertIsNone(new_index) + assert new_index is None pytest.raises(KeyError, index.get_loc_level, (2, 2)) @@ -2067,12 +2067,12 @@ def test_join_multi(self): exp_ridx = np.array([-1, 0, 1, -1, -1, 0, 1, -1, -1, 0, 1, -1, -1, 0, 1, -1], dtype=np.intp) tm.assert_index_equal(jidx, midx) - self.assertIsNone(lidx) + assert lidx is None tm.assert_numpy_array_equal(ridx, exp_ridx) # flip jidx, ridx, lidx = idx.join(midx, how='right', return_indexers=True) tm.assert_index_equal(jidx, midx) - self.assertIsNone(lidx) + assert lidx is None tm.assert_numpy_array_equal(ridx, exp_ridx) def test_reindex(self): @@ -2082,7 +2082,7 @@ def test_reindex(self): result, indexer = self.index.reindex(list(self.index)) assert isinstance(result, MultiIndex) - self.assertIsNone(indexer) + assert indexer is None self.check_level_names(result, self.index.names) def test_reindex_level(self): diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py index 62aabaaef100c..56e0d858f1f0f 100644 --- a/pandas/tests/indexes/test_numeric.py +++ b/pandas/tests/indexes/test_numeric.py @@ -791,7 +791,7 @@ def test_join_left(self): assert isinstance(res, Int64Index) tm.assert_index_equal(res, eres) - self.assertIsNone(lidx) + assert lidx is None tm.assert_numpy_array_equal(ridx, eridx) # monotonic @@ -801,7 +801,7 @@ def test_join_left(self): dtype=np.intp) assert isinstance(res, Int64Index) tm.assert_index_equal(res, eres) - self.assertIsNone(lidx) + assert lidx is None tm.assert_numpy_array_equal(ridx, eridx) # non-unique @@ -828,7 +828,7 @@ def test_join_right(self): assert isinstance(other, Int64Index) tm.assert_index_equal(res, eres) tm.assert_numpy_array_equal(lidx, elidx) - self.assertIsNone(ridx) + assert ridx is None # monotonic res, lidx, ridx = self.index.join(other_mono, how='right', @@ -838,7 +838,7 @@ def test_join_right(self): assert isinstance(other, Int64Index) tm.assert_index_equal(res, eres) tm.assert_numpy_array_equal(lidx, elidx) - self.assertIsNone(ridx) + assert ridx is None # non-unique idx = Index([1, 1, 2, 5]) @@ -1031,7 +1031,7 @@ def test_join_left(self): assert isinstance(res, UInt64Index) tm.assert_index_equal(res, eres) - self.assertIsNone(lidx) + assert lidx is None tm.assert_numpy_array_equal(ridx, eridx) # monotonic @@ -1041,7 +1041,7 @@ def test_join_left(self): assert isinstance(res, UInt64Index) tm.assert_index_equal(res, eres) - self.assertIsNone(lidx) + assert lidx is None tm.assert_numpy_array_equal(ridx, eridx) # non-unique @@ -1074,7 +1074,7 @@ def test_join_right(self): tm.assert_numpy_array_equal(lidx, elidx) assert isinstance(other, UInt64Index) tm.assert_index_equal(res, eres) - self.assertIsNone(ridx) + assert ridx is None # monotonic res, lidx, ridx = self.index.join(other_mono, how='right', @@ -1085,7 +1085,7 @@ def test_join_right(self): assert isinstance(other, UInt64Index) tm.assert_numpy_array_equal(lidx, elidx) tm.assert_index_equal(res, eres) - self.assertIsNone(ridx) + assert ridx is None # non-unique idx = UInt64Index(2**63 + np.array([1, 1, 2, 5], dtype='uint64')) diff --git a/pandas/tests/indexes/test_range.py b/pandas/tests/indexes/test_range.py index 0b27eb0c34111..4b622ad3fce49 100644 --- a/pandas/tests/indexes/test_range.py +++ b/pandas/tests/indexes/test_range.py @@ -487,7 +487,7 @@ def test_join_left(self): assert isinstance(res, RangeIndex) tm.assert_index_equal(res, eres) - self.assertIsNone(lidx) + assert lidx is None tm.assert_numpy_array_equal(ridx, eridx) # Join withRangeIndex @@ -498,7 +498,7 @@ def test_join_left(self): assert isinstance(res, RangeIndex) tm.assert_index_equal(res, eres) - self.assertIsNone(lidx) + assert lidx is None tm.assert_numpy_array_equal(ridx, eridx) def test_join_right(self): @@ -514,7 +514,7 @@ def test_join_right(self): assert isinstance(other, Int64Index) tm.assert_index_equal(res, eres) tm.assert_numpy_array_equal(lidx, elidx) - self.assertIsNone(ridx) + assert ridx is None # Join withRangeIndex other = RangeIndex(25, 14, -1) @@ -526,7 +526,7 @@ def test_join_right(self): assert isinstance(other, RangeIndex) tm.assert_index_equal(res, eres) tm.assert_numpy_array_equal(lidx, elidx) - self.assertIsNone(ridx) + assert ridx is None def test_join_non_int_index(self): other = Index([3, 6, 7, 8, 10], dtype=object) diff --git a/pandas/tests/indexes/timedeltas/test_ops.py b/pandas/tests/indexes/timedeltas/test_ops.py index adf164977205f..a847467518b92 100644 --- a/pandas/tests/indexes/timedeltas/test_ops.py +++ b/pandas/tests/indexes/timedeltas/test_ops.py @@ -620,18 +620,18 @@ def test_order(self): for idx, expected in [(idx1, exp1), (idx1, exp1), (idx1, exp1)]: ordered = idx.sort_values() tm.assert_index_equal(ordered, expected) - self.assertIsNone(ordered.freq) + assert ordered.freq is None ordered = idx.sort_values(ascending=False) tm.assert_index_equal(ordered, expected[::-1]) - self.assertIsNone(ordered.freq) + assert ordered.freq is None ordered, indexer = idx.sort_values(return_indexer=True) tm.assert_index_equal(ordered, expected) exp = np.array([0, 4, 3, 1, 2]) tm.assert_numpy_array_equal(indexer, exp, check_dtype=False) - self.assertIsNone(ordered.freq) + assert ordered.freq is None ordered, indexer = idx.sort_values(return_indexer=True, ascending=False) @@ -639,7 +639,7 @@ def test_order(self): exp = np.array([2, 1, 3, 4, 0]) tm.assert_numpy_array_equal(indexer, exp, check_dtype=False) - self.assertIsNone(ordered.freq) + assert ordered.freq is None def test_getitem(self): idx1 = pd.timedelta_range('1 day', '31 day', freq='D', name='idx') @@ -681,10 +681,10 @@ def test_drop_duplicates_metadata(self): self.assertEqual(idx.freq, result.freq) idx_dup = idx.append(idx) - self.assertIsNone(idx_dup.freq) # freq is reset + assert idx_dup.freq is None # freq is reset result = idx_dup.drop_duplicates() tm.assert_index_equal(idx, result) - self.assertIsNone(result.freq) + assert result.freq is None def test_drop_duplicates(self): # to check Index/Series compat @@ -739,12 +739,12 @@ def test_take(self): result = idx.take([3, 2, 5]) expected = TimedeltaIndex(['4 day', '3 day', '6 day'], name='idx') tm.assert_index_equal(result, expected) - self.assertIsNone(result.freq) + assert result.freq is None result = idx.take([-3, 2, 5]) expected = TimedeltaIndex(['29 day', '3 day', '6 day'], name='idx') tm.assert_index_equal(result, expected) - self.assertIsNone(result.freq) + assert result.freq is None def test_take_invalid_kwargs(self): idx = pd.timedelta_range('1 day', '31 day', freq='D', name='idx') @@ -808,7 +808,7 @@ def test_repeat(self): exp = pd.TimedeltaIndex(['1 days', '1 days', '2 days', '2 days']) for res in [index.repeat(2), np.repeat(index, 2)]: tm.assert_index_equal(res, exp) - self.assertIsNone(res.freq) + assert res.freq is None index = TimedeltaIndex(['1 days', 'NaT', '3 days']) exp = TimedeltaIndex(['1 days', '1 days', '1 days', @@ -816,7 +816,7 @@ def test_repeat(self): '3 days', '3 days', '3 days']) for res in [index.repeat(3), np.repeat(index, 3)]: tm.assert_index_equal(res, exp) - self.assertIsNone(res.freq) + assert res.freq is None def test_nat(self): self.assertIs(pd.TimedeltaIndex._na_value, pd.NaT) diff --git a/pandas/tests/indexes/timedeltas/test_timedelta.py b/pandas/tests/indexes/timedeltas/test_timedelta.py index 9ca207876520f..b93f76c14dcfe 100644 --- a/pandas/tests/indexes/timedeltas/test_timedelta.py +++ b/pandas/tests/indexes/timedeltas/test_timedelta.py @@ -208,7 +208,7 @@ def test_take(self): for taken in [taken1, taken2]: tm.assert_index_equal(taken, expected) assert isinstance(taken, TimedeltaIndex) - self.assertIsNone(taken.freq) + assert taken.freq is None self.assertEqual(taken.name, expected.name) def test_take_fill_value(self): diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index ba59add4305d8..35a71efbbf5ba 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -1126,7 +1126,7 @@ def test_to_string(self): buf = StringIO() retval = biggie.to_string(buf=buf) - self.assertIsNone(retval) + assert retval is None self.assertEqual(buf.getvalue(), s) assert isinstance(s, compat.string_types) @@ -1631,7 +1631,7 @@ def test_to_string(self): s = self.ts.to_string() retval = self.ts.to_string(buf=buf) - self.assertIsNone(retval) + assert retval is None self.assertEqual(buf.getvalue().strip(), s) # pass float_format diff --git a/pandas/tests/io/formats/test_printing.py b/pandas/tests/io/formats/test_printing.py index 882c9b9bd42f6..f9d911f523699 100644 --- a/pandas/tests/io/formats/test_printing.py +++ b/pandas/tests/io/formats/test_printing.py @@ -151,7 +151,7 @@ def test_publishes(self): with opt, make_patch as mock_display: handle = obj._ipython_display_() self.assertEqual(mock_display.call_count, 1) - self.assertIsNone(handle) + assert handle is None args, kwargs = mock_display.call_args arg, = args # just one argument diff --git a/pandas/tests/io/formats/test_to_html.py b/pandas/tests/io/formats/test_to_html.py index 90c6db1dcc7fb..28c6a0e95e0f1 100644 --- a/pandas/tests/io/formats/test_to_html.py +++ b/pandas/tests/io/formats/test_to_html.py @@ -1423,7 +1423,7 @@ def test_to_html(self): buf = StringIO() retval = biggie.to_html(buf=buf) - self.assertIsNone(retval) + assert retval is None self.assertEqual(buf.getvalue(), s) assert isinstance(s, compat.string_types) diff --git a/pandas/tests/io/parser/common.py b/pandas/tests/io/parser/common.py index a0c50bb3a573d..deeeaef63da39 100644 --- a/pandas/tests/io/parser/common.py +++ b/pandas/tests/io/parser/common.py @@ -641,7 +641,7 @@ def test_no_unnamed_index(self): 2 2 2 e f """ df = self.read_table(StringIO(data), sep=' ') - self.assertIsNone(df.index.name) + assert df.index.name is None def test_read_csv_parse_simple_list(self): text = """foo @@ -1243,7 +1243,7 @@ def test_regex_separator(self): df = self.read_table(StringIO(data), sep=r'\s+') expected = self.read_csv(StringIO(re.sub('[ ]+', ',', data)), index_col=0) - self.assertIsNone(expected.index.name) + assert expected.index.name is None tm.assert_frame_equal(df, expected) data = ' a b c\n1 2 3 \n4 5 6\n 7 8 9' diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index d077dd879d44a..ea9911d8ee66c 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -878,12 +878,12 @@ def test_append_series(self): store.append('ss', ss) result = store['ss'] tm.assert_series_equal(result, ss) - self.assertIsNone(result.name) + assert result.name is None store.append('ts', ts) result = store['ts'] tm.assert_series_equal(result, ts) - self.assertIsNone(result.name) + assert result.name is None ns.name = 'foo' store.append('ns', ns) @@ -3573,7 +3573,7 @@ def test_retain_index_attributes(self): periods=3, freq='D')))) store.append('data', df2) - self.assertIsNone(store.get_storer('data').info['index']['freq']) + assert store.get_storer('data').info['index']['freq'] is None # this is ok _maybe_remove(store, 'df2') @@ -3618,7 +3618,7 @@ def test_retain_index_attributes2(self): df2 = DataFrame(dict(A=Series(lrange(3), index=idx2))) df2.to_hdf(path, 'data', append=True) - self.assertIsNone(read_hdf(path, 'data').index.name) + assert read_hdf(path, 'data').index.name is None def test_panel_select(self): diff --git a/pandas/tests/plotting/test_boxplot_method.py b/pandas/tests/plotting/test_boxplot_method.py index 64d0fec2b5646..ce341a1e02e0d 100644 --- a/pandas/tests/plotting/test_boxplot_method.py +++ b/pandas/tests/plotting/test_boxplot_method.py @@ -150,7 +150,7 @@ def _check_ax_limits(col, ax): _check_ax_limits(df['age'], age_ax) self.assertEqual(weight_ax._sharey, height_ax) self.assertEqual(age_ax._sharey, height_ax) - self.assertIsNone(dummy_ax._sharey) + assert dummy_ax._sharey is None @slow def test_boxplot_empty_column(self): diff --git a/pandas/tests/plotting/test_datetimelike.py b/pandas/tests/plotting/test_datetimelike.py index 364a561669ed2..f0a56592158d3 100644 --- a/pandas/tests/plotting/test_datetimelike.py +++ b/pandas/tests/plotting/test_datetimelike.py @@ -145,7 +145,7 @@ def test_high_freq(self): def test_get_datevalue(self): from pandas.plotting._converter import get_datevalue - self.assertIsNone(get_datevalue(None, 'D')) + assert get_datevalue(None, 'D') is None self.assertEqual(get_datevalue(1987, 'A'), 1987) self.assertEqual(get_datevalue(Period(1987, 'A'), 'M'), Period('1987-12', 'M').ordinal) @@ -1069,7 +1069,7 @@ def test_secondary_legend(self): self.assertEqual(leg.get_texts()[1].get_text(), 'B (right)') self.assertEqual(leg.get_texts()[2].get_text(), 'C') self.assertEqual(leg.get_texts()[3].get_text(), 'D') - self.assertIsNone(ax.right_ax.get_legend()) + assert ax.right_ax.get_legend() is None colors = set() for line in leg.get_lines(): colors.add(line.get_color()) @@ -1105,7 +1105,7 @@ def test_secondary_legend(self): ax = df.plot(secondary_y=['C', 'D']) leg = ax.get_legend() self.assertEqual(len(leg.get_lines()), 4) - self.assertIsNone(ax.right_ax.get_legend()) + assert ax.right_ax.get_legend() is None colors = set() for line in leg.get_lines(): colors.add(line.get_color()) @@ -1120,7 +1120,7 @@ def test_secondary_legend(self): ax = df.plot(secondary_y=['A', 'B']) leg = ax.get_legend() self.assertEqual(len(leg.get_lines()), 4) - self.assertIsNone(ax.right_ax.get_legend()) + assert ax.right_ax.get_legend() is None colors = set() for line in leg.get_lines(): colors.add(line.get_color()) @@ -1133,7 +1133,7 @@ def test_secondary_legend(self): ax = df.plot(secondary_y=['C', 'D']) leg = ax.get_legend() self.assertEqual(len(leg.get_lines()), 4) - self.assertIsNone(ax.right_ax.get_legend()) + assert ax.right_ax.get_legend() is None colors = set() for line in leg.get_lines(): colors.add(line.get_color()) diff --git a/pandas/tests/scalar/test_timestamp.py b/pandas/tests/scalar/test_timestamp.py index c1d5a069c248b..2cf40335f3ded 100644 --- a/pandas/tests/scalar/test_timestamp.py +++ b/pandas/tests/scalar/test_timestamp.py @@ -1467,7 +1467,7 @@ def test_dti_slicing(self): self.assertEqual(v3, Timestamp('6/30/2005')) # don't carry freq through irregular slicing - self.assertIsNone(dti2.freq) + assert dti2.freq is None def test_woy_boundary(self): # make sure weeks at year boundaries are correct diff --git a/pandas/tests/series/test_api.py b/pandas/tests/series/test_api.py index 68d8e382ef046..a477bb325061f 100644 --- a/pandas/tests/series/test_api.py +++ b/pandas/tests/series/test_api.py @@ -34,13 +34,13 @@ def test_copy_index_name_checking(self): # making a copy self.ts.index.name = None - self.assertIsNone(self.ts.index.name) + assert self.ts.index.name is None self.assertIs(self.ts, self.ts) cp = self.ts.copy() cp.index.name = 'foo' printing.pprint_thing(self.ts.index.name) - self.assertIsNone(self.ts.index.name) + assert self.ts.index.name is None def test_append_preserve_name(self): result = self.ts[:5].append(self.ts[5:]) @@ -60,9 +60,9 @@ def test_binop_maybe_preserve_name(self): cp = self.ts.copy() cp.name = 'something else' result = self.ts + cp - self.assertIsNone(result.name) + assert result.name is None result = self.ts.add(cp) - self.assertIsNone(result.name) + assert result.name is None ops = ['add', 'sub', 'mul', 'div', 'truediv', 'floordiv', 'mod', 'pow'] ops = ops + ['r' + op for op in ops] @@ -76,7 +76,7 @@ def test_binop_maybe_preserve_name(self): cp = self.ts.copy() cp.name = 'changed' result = getattr(s, op)(cp) - self.assertIsNone(result.name) + assert result.name is None def test_combine_first_name(self): result = self.ts.combine_first(self.ts[:5]) @@ -170,7 +170,7 @@ def test_iter_box(self): for res, exp in zip(s, vals): assert isinstance(res, pd.Timestamp) self.assertEqual(res, exp) - self.assertIsNone(res.tz) + assert res.tz is None vals = [pd.Timestamp('2011-01-01', tz='US/Eastern'), pd.Timestamp('2011-01-02', tz='US/Eastern')] diff --git a/pandas/tests/series/test_combine_concat.py b/pandas/tests/series/test_combine_concat.py index cc29c45d74c9d..51a2a5ed6c574 100644 --- a/pandas/tests/series/test_combine_concat.py +++ b/pandas/tests/series/test_combine_concat.py @@ -246,7 +246,7 @@ def test_append_concat(self): rng1.name = 'foo' rng2.name = 'bar' self.assertEqual(rng1.append(rng1).name, 'foo') - self.assertIsNone(rng1.append(rng2).name) + assert rng1.append(rng2).name is None def test_append_concat_tz(self): # GH 2938 diff --git a/pandas/tests/series/test_indexing.py b/pandas/tests/series/test_indexing.py index 22869fca35931..9872a1982a770 100644 --- a/pandas/tests/series/test_indexing.py +++ b/pandas/tests/series/test_indexing.py @@ -164,7 +164,7 @@ def test_getitem_get(self): # GH 5652 for s in [Series(), Series(index=list('abc'))]: result = s.get(None) - self.assertIsNone(result) + assert result is None def test_iloc(self): diff --git a/pandas/tests/series/test_io.py b/pandas/tests/series/test_io.py index d45a6ba347c3e..3df32992a4d74 100644 --- a/pandas/tests/series/test_io.py +++ b/pandas/tests/series/test_io.py @@ -34,8 +34,8 @@ def test_from_csv(self): self.series.to_csv(path) series = Series.from_csv(path) - self.assertIsNone(series.name) - self.assertIsNone(series.index.name) + assert series.name is None + assert series.index.name is None assert_series_equal(self.series, series, check_names=False) self.assertTrue(series.name is None) self.assertTrue(series.index.name is None) diff --git a/pandas/tests/series/test_timeseries.py b/pandas/tests/series/test_timeseries.py index 486f129b6814c..430be97845fcb 100644 --- a/pandas/tests/series/test_timeseries.py +++ b/pandas/tests/series/test_timeseries.py @@ -373,17 +373,17 @@ def test_first_last_valid(self): self.assertEqual(index, ts.index[-6]) ts[:] = np.nan - self.assertIsNone(ts.last_valid_index()) - self.assertIsNone(ts.first_valid_index()) + assert ts.last_valid_index() is None + assert ts.first_valid_index() is None ser = Series([], index=[]) - self.assertIsNone(ser.last_valid_index()) - self.assertIsNone(ser.first_valid_index()) + assert ser.last_valid_index() is None + assert ser.first_valid_index() is None # GH12800 empty = Series() - self.assertIsNone(empty.last_valid_index()) - self.assertIsNone(empty.first_valid_index()) + assert empty.last_valid_index() is None + assert empty.first_valid_index() is None def test_mpl_compat_hack(self): result = self.ts[:, np.newaxis] @@ -860,7 +860,7 @@ def test_setops_preserve_freq(self): result = rng[:50].union(rng[60:100]) self.assertEqual(result.name, rng.name) - self.assertIsNone(result.freq) + assert result.freq is None self.assertEqual(result.tz, rng.tz) result = rng[:50].intersection(rng[25:75]) @@ -870,12 +870,12 @@ def test_setops_preserve_freq(self): nofreq = DatetimeIndex(list(rng[25:75]), name='other') result = rng[:50].union(nofreq) - self.assertIsNone(result.name) + assert result.name is None self.assertEqual(result.freq, rng.freq) self.assertEqual(result.tz, rng.tz) result = rng[:50].intersection(nofreq) - self.assertIsNone(result.name) + assert result.name is None self.assertEqual(result.freq, rng.freq) self.assertEqual(result.tz, rng.tz) diff --git a/pandas/tests/sparse/test_indexing.py b/pandas/tests/sparse/test_indexing.py index d0e5196570adc..cf91e41624276 100644 --- a/pandas/tests/sparse/test_indexing.py +++ b/pandas/tests/sparse/test_indexing.py @@ -303,20 +303,20 @@ def test_get(self): s = pd.SparseSeries([1, np.nan, np.nan, 3, np.nan]) self.assertEqual(s.get(0), 1) self.assertTrue(np.isnan(s.get(1))) - self.assertIsNone(s.get(5)) + assert s.get(5) is None s = pd.SparseSeries([1, np.nan, 0, 3, 0], index=list('ABCDE')) self.assertEqual(s.get('A'), 1) self.assertTrue(np.isnan(s.get('B'))) self.assertEqual(s.get('C'), 0) - self.assertIsNone(s.get('XX')) + assert s.get('XX') is None s = pd.SparseSeries([1, np.nan, 0, 3, 0], index=list('ABCDE'), fill_value=0) self.assertEqual(s.get('A'), 1) self.assertTrue(np.isnan(s.get('B'))) self.assertEqual(s.get('C'), 0) - self.assertIsNone(s.get('XX')) + assert s.get('XX') is None def test_take(self): orig = pd.Series([1, np.nan, np.nan, 3, np.nan], diff --git a/pandas/tests/sparse/test_series.py b/pandas/tests/sparse/test_series.py index dcac0f599fdeb..2fd7ac3d13b98 100644 --- a/pandas/tests/sparse/test_series.py +++ b/pandas/tests/sparse/test_series.py @@ -435,7 +435,7 @@ def _check_getitem(sp, dense): def test_get_get_value(self): tm.assert_almost_equal(self.bseries.get(10), self.bseries[10]) - self.assertIsNone(self.bseries.get(len(self.bseries) + 1)) + assert self.bseries.get(len(self.bseries) + 1) is None dt = self.btseries.index[10] result = self.btseries.get(dt) diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py index b26c015133697..2b9afb8e1dd6b 100644 --- a/pandas/tests/test_categorical.py +++ b/pandas/tests/test_categorical.py @@ -931,7 +931,7 @@ def test_rename_categories(self): res = cat.rename_categories([1, 2, 3], inplace=True) # and now inplace - self.assertIsNone(res) + assert res is None tm.assert_numpy_array_equal(cat.__array__(), np.array([1, 2, 3, 1], dtype=np.int64)) tm.assert_index_equal(cat.categories, Index([1, 2, 3])) @@ -959,7 +959,7 @@ def test_reorder_categories(self): # inplace == True res = cat.reorder_categories(["c", "b", "a"], inplace=True) - self.assertIsNone(res) + assert res is None tm.assert_categorical_equal(cat, new) # not all "old" included in "new" @@ -1000,7 +1000,7 @@ def test_add_categories(self): # inplace == True res = cat.add_categories("d", inplace=True) tm.assert_categorical_equal(cat, new) - self.assertIsNone(res) + assert res is None # new is in old categories def f(): @@ -1040,7 +1040,7 @@ def test_remove_categories(self): # inplace == True res = cat.remove_categories("c", inplace=True) tm.assert_categorical_equal(cat, new) - self.assertIsNone(res) + assert res is None # removal is not in categories def f(): @@ -1062,7 +1062,7 @@ def test_remove_unused_categories(self): res = c.remove_unused_categories(inplace=True) tm.assert_index_equal(c.categories, exp_categories_dropped) - self.assertIsNone(res) + assert res is None # with NaN values (GH11599) c = Categorical(["a", "b", "c", np.nan], diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index af56f0c33df2e..24248d735adb0 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -74,7 +74,7 @@ def test_copy_names(self): getattr(self.panel, attr).name = None cp = self.panel.copy() getattr(cp, attr).name = 'foo' - self.assertIsNone(getattr(self.panel, attr).name) + assert getattr(self.panel, attr).name is None def test_iter(self): tm.equalContents(list(self.panel), self.panel.items) diff --git a/pandas/tests/tseries/test_frequencies.py b/pandas/tests/tseries/test_frequencies.py index e3ab01499e676..c9de997cdf067 100644 --- a/pandas/tests/tseries/test_frequencies.py +++ b/pandas/tests/tseries/test_frequencies.py @@ -545,12 +545,12 @@ def _check_tick(self, base_delta, code): index = _dti([b + base_delta * 7] + [b + base_delta * j for j in range( 3)]) - self.assertIsNone(frequencies.infer_freq(index)) + assert frequencies.infer_freq(index) is None index = _dti([b + base_delta * j for j in range(3)] + [b + base_delta * 7]) - self.assertIsNone(frequencies.infer_freq(index)) + assert frequencies.infer_freq(index) is None def test_weekly(self): days = ['MON', 'TUE', 'WED', 'THU', 'FRI', 'SAT', 'SUN'] @@ -690,7 +690,7 @@ def test_infer_freq_tz_transition(self): index = date_range("2013-11-03", periods=5, freq="3H").tz_localize("America/Chicago") - self.assertIsNone(index.inferred_freq) + assert index.inferred_freq is None def test_infer_freq_businesshour(self): # GH 7905 diff --git a/pandas/tests/tseries/test_offsets.py b/pandas/tests/tseries/test_offsets.py index 335a7f3513da9..ff38d5ec46a44 100644 --- a/pandas/tests/tseries/test_offsets.py +++ b/pandas/tests/tseries/test_offsets.py @@ -147,7 +147,7 @@ def test_apply_out_of_range(self): result = Timestamp('20080101') + offset assert isinstance(result, datetime) - self.assertIsNone(result.tzinfo) + assert result.tzinfo is None tm._skip_if_no_pytz() tm._skip_if_no_dateutil() diff --git a/pandas/util/testing.py b/pandas/util/testing.py index d6f3e00b7594d..b28bd6a696bda 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -1066,11 +1066,6 @@ def assertIs(first, second, msg=''): assert a is b, "%s: %r is not %r" % (msg.format(a, b), a, b) -def assertIsNone(expr, msg=''): - """Checks that 'expr' is None""" - return assertIs(expr, None, msg) - - def assert_categorical_equal(left, right, check_dtype=True, obj='Categorical', check_category_order=True): """Test that Categoricals are equivalent. From e8edf13023746d09aa2f63c51059984449fa6c60 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Mon, 24 Apr 2017 06:04:40 -0400 Subject: [PATCH 457/933] MAINT: Remove assertIs from testing (#16105) --- pandas/tests/dtypes/test_common.py | 4 +- pandas/tests/dtypes/test_dtypes.py | 22 ++--- pandas/tests/frame/test_alter_axes.py | 2 +- pandas/tests/frame/test_api.py | 14 +-- pandas/tests/frame/test_apply.py | 19 ++-- .../tests/frame/test_axis_select_reindex.py | 2 +- pandas/tests/frame/test_indexing.py | 18 ++-- pandas/tests/frame/test_operators.py | 18 ++-- pandas/tests/groupby/test_groupby.py | 11 +-- .../tests/indexes/datetimes/test_datetime.py | 4 +- pandas/tests/indexes/datetimes/test_ops.py | 4 +- pandas/tests/indexes/datetimes/test_setops.py | 2 +- pandas/tests/indexes/datetimes/test_tools.py | 6 +- pandas/tests/indexes/period/test_indexing.py | 4 +- pandas/tests/indexes/period/test_ops.py | 20 ++--- pandas/tests/indexes/period/test_setops.py | 2 +- pandas/tests/indexes/test_base.py | 10 +-- pandas/tests/indexes/test_multi.py | 8 +- pandas/tests/indexes/test_numeric.py | 2 +- pandas/tests/indexes/test_range.py | 2 +- pandas/tests/indexes/timedeltas/test_ops.py | 4 +- pandas/tests/io/parser/parse_dates.py | 2 +- pandas/tests/io/test_packers.py | 2 +- pandas/tests/plotting/test_boxplot_method.py | 10 +-- pandas/tests/plotting/test_frame.py | 12 +-- pandas/tests/plotting/test_hist_method.py | 4 +- pandas/tests/scalar/test_period.py | 90 +++++++++---------- pandas/tests/scalar/test_timestamp.py | 3 +- pandas/tests/series/test_analytics.py | 4 +- pandas/tests/series/test_api.py | 4 +- pandas/tests/series/test_constructors.py | 4 +- pandas/tests/series/test_datetime_values.py | 2 +- pandas/tests/series/test_indexing.py | 12 +-- pandas/tests/series/test_period.py | 4 +- pandas/tests/series/test_sorting.py | 13 +-- pandas/tests/sparse/test_frame.py | 2 +- pandas/tests/sparse/test_libsparse.py | 4 +- pandas/tests/sparse/test_series.py | 2 +- pandas/tests/test_base.py | 14 +-- pandas/tests/test_categorical.py | 2 +- pandas/tests/test_internals.py | 2 +- pandas/tests/test_multilevel.py | 12 +-- pandas/tests/test_panel.py | 14 +-- pandas/tests/test_panel4d.py | 12 +-- pandas/tests/test_strings.py | 2 +- pandas/tests/test_util.py | 4 +- pandas/tests/tseries/test_timezones.py | 26 +++--- pandas/util/testing.py | 6 -- 48 files changed, 219 insertions(+), 227 deletions(-) diff --git a/pandas/tests/dtypes/test_common.py b/pandas/tests/dtypes/test_common.py index 1017f93b8241c..86233c5d2b192 100644 --- a/pandas/tests/dtypes/test_common.py +++ b/pandas/tests/dtypes/test_common.py @@ -26,7 +26,7 @@ def test_datetimetz_dtype(self): for dtype in ['datetime64[ns, US/Eastern]', 'datetime64[ns, Asia/Tokyo]', 'datetime64[ns, UTC]']: - self.assertIs(pandas_dtype(dtype), DatetimeTZDtype(dtype)) + assert pandas_dtype(dtype) is DatetimeTZDtype(dtype) self.assertEqual(pandas_dtype(dtype), DatetimeTZDtype(dtype)) self.assertEqual(pandas_dtype(dtype), dtype) @@ -36,7 +36,7 @@ def test_categorical_dtype(self): def test_period_dtype(self): for dtype in ['period[D]', 'period[3M]', 'period[U]', 'Period[D]', 'Period[3M]', 'Period[U]']: - self.assertIs(pandas_dtype(dtype), PeriodDtype(dtype)) + assert pandas_dtype(dtype) is PeriodDtype(dtype) self.assertEqual(pandas_dtype(dtype), PeriodDtype(dtype)) self.assertEqual(pandas_dtype(dtype), dtype) diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py index 32ec1194639ae..e3bae3675a9e4 100644 --- a/pandas/tests/dtypes/test_dtypes.py +++ b/pandas/tests/dtypes/test_dtypes.py @@ -249,20 +249,14 @@ def test_subclass(self): self.assertTrue(issubclass(type(a), type(b))) def test_identity(self): - self.assertEqual(PeriodDtype('period[D]'), - PeriodDtype('period[D]')) - self.assertIs(PeriodDtype('period[D]'), - PeriodDtype('period[D]')) + assert PeriodDtype('period[D]') == PeriodDtype('period[D]') + assert PeriodDtype('period[D]') is PeriodDtype('period[D]') - self.assertEqual(PeriodDtype('period[3D]'), - PeriodDtype('period[3D]')) - self.assertIs(PeriodDtype('period[3D]'), - PeriodDtype('period[3D]')) + assert PeriodDtype('period[3D]') == PeriodDtype('period[3D]') + assert PeriodDtype('period[3D]') is PeriodDtype('period[3D]') - self.assertEqual(PeriodDtype('period[1S1U]'), - PeriodDtype('period[1000001U]')) - self.assertIs(PeriodDtype('period[1S1U]'), - PeriodDtype('period[1000001U]')) + assert PeriodDtype('period[1S1U]') == PeriodDtype('period[1000001U]') + assert PeriodDtype('period[1S1U]') is PeriodDtype('period[1000001U]') def test_coerce_to_dtype(self): self.assertEqual(_coerce_to_dtype('period[D]'), @@ -371,12 +365,12 @@ def test_construction(self): def test_construction_generic(self): # generic i = IntervalDtype('interval') - self.assertIs(i.subtype, None) + assert i.subtype is None self.assertTrue(is_interval_dtype(i)) self.assertTrue(str(i) == 'interval') i = IntervalDtype() - self.assertIs(i.subtype, None) + assert i.subtype is None self.assertTrue(is_interval_dtype(i)) self.assertTrue(str(i) == 'interval') diff --git a/pandas/tests/frame/test_alter_axes.py b/pandas/tests/frame/test_alter_axes.py index 1a3de7b463a19..b4b86d8ea1907 100644 --- a/pandas/tests/frame/test_alter_axes.py +++ b/pandas/tests/frame/test_alter_axes.py @@ -35,7 +35,7 @@ def test_set_index(self): # cache it _ = self.mixed_frame['foo'] # noqa self.mixed_frame.index = idx - self.assertIs(self.mixed_frame['foo'].index, idx) + assert self.mixed_frame['foo'].index is idx with assertRaisesRegexp(ValueError, 'Length mismatch'): self.mixed_frame.index = idx[::2] diff --git a/pandas/tests/frame/test_api.py b/pandas/tests/frame/test_api.py index e060e863c0431..9e16698bab39c 100644 --- a/pandas/tests/frame/test_api.py +++ b/pandas/tests/frame/test_api.py @@ -88,8 +88,8 @@ def test_get_axis(self): self.assertEqual(f._get_axis_name('rows'), 'index') self.assertEqual(f._get_axis_name('columns'), 'columns') - self.assertIs(f._get_axis(0), f.index) - self.assertIs(f._get_axis(1), f.columns) + assert f._get_axis(0) is f.index + assert f._get_axis(1) is f.columns assertRaisesRegexp(ValueError, 'No axis named', f._get_axis_number, 2) assertRaisesRegexp(ValueError, 'No axis.*foo', f._get_axis_name, 'foo') @@ -99,7 +99,7 @@ def test_get_axis(self): def test_keys(self): getkeys = self.frame.keys - self.assertIs(getkeys(), self.frame.columns) + assert getkeys() is self.frame.columns def test_column_contains_typeerror(self): try: @@ -122,17 +122,17 @@ def test_array_interface(self): with np.errstate(all='ignore'): result = np.sqrt(self.frame) assert isinstance(result, type(self.frame)) - self.assertIs(result.index, self.frame.index) - self.assertIs(result.columns, self.frame.columns) + assert result.index is self.frame.index + assert result.columns is self.frame.columns assert_frame_equal(result, self.frame.apply(np.sqrt)) def test_get_agg_axis(self): cols = self.frame._get_agg_axis(0) - self.assertIs(cols, self.frame.columns) + assert cols is self.frame.columns idx = self.frame._get_agg_axis(1) - self.assertIs(idx, self.frame.index) + assert idx is self.frame.index pytest.raises(ValueError, self.frame._get_agg_axis, 2) diff --git a/pandas/tests/frame/test_apply.py b/pandas/tests/frame/test_apply.py index d31fb4218adeb..9d0f00c6eeffe 100644 --- a/pandas/tests/frame/test_apply.py +++ b/pandas/tests/frame/test_apply.py @@ -25,29 +25,30 @@ def test_apply(self): with np.errstate(all='ignore'): # ufunc applied = self.frame.apply(np.sqrt) - assert_series_equal(np.sqrt(self.frame['A']), applied['A']) + tm.assert_series_equal(np.sqrt(self.frame['A']), applied['A']) # aggregator applied = self.frame.apply(np.mean) - self.assertEqual(applied['A'], np.mean(self.frame['A'])) + assert applied['A'] == np.mean(self.frame['A']) d = self.frame.index[0] applied = self.frame.apply(np.mean, axis=1) - self.assertEqual(applied[d], np.mean(self.frame.xs(d))) - self.assertIs(applied.index, self.frame.index) # want this + assert applied[d] == np.mean(self.frame.xs(d)) + assert applied.index is self.frame.index # want this # invalid axis df = DataFrame( [[1, 2, 3], [4, 5, 6], [7, 8, 9]], index=['a', 'a', 'c']) pytest.raises(ValueError, df.apply, lambda x: x, 2) - # GH9573 + # see gh-9573 df = DataFrame({'c0': ['A', 'A', 'B', 'B'], 'c1': ['C', 'C', 'D', 'D']}) df = df.apply(lambda ts: ts.astype('category')) - self.assertEqual(df.shape, (4, 2)) - self.assertTrue(isinstance(df['c0'].dtype, CategoricalDtype)) - self.assertTrue(isinstance(df['c1'].dtype, CategoricalDtype)) + + assert df.shape == (4, 2) + assert isinstance(df['c0'].dtype, CategoricalDtype) + assert isinstance(df['c1'].dtype, CategoricalDtype) def test_apply_mixed_datetimelike(self): # mixed datetimelike @@ -190,7 +191,7 @@ def _checkit(axis=0, raw=False): if is_reduction: agg_axis = df._get_agg_axis(axis) assert isinstance(res, Series) - self.assertIs(res.index, agg_axis) + assert res.index is agg_axis else: assert isinstance(res, DataFrame) diff --git a/pandas/tests/frame/test_axis_select_reindex.py b/pandas/tests/frame/test_axis_select_reindex.py index 636194d32ad46..e8f34b977a707 100644 --- a/pandas/tests/frame/test_axis_select_reindex.py +++ b/pandas/tests/frame/test_axis_select_reindex.py @@ -204,7 +204,7 @@ def test_reindex(self): # Same index, copies values but not index if copy=False newFrame = self.frame.reindex(self.frame.index, copy=False) - self.assertIs(newFrame.index, self.frame.index) + assert newFrame.index is self.frame.index # length zero newFrame = self.frame.reindex([]) diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py index 088f863e5358b..5f8d04fdb16bd 100644 --- a/pandas/tests/frame/test_indexing.py +++ b/pandas/tests/frame/test_indexing.py @@ -1171,29 +1171,29 @@ def test_getitem_fancy_1d(self): # return self if no slicing...for now with catch_warnings(record=True): - self.assertIs(f.ix[:, :], f) + assert f.ix[:, :] is f # low dimensional slice with catch_warnings(record=True): xs1 = f.ix[2, ['C', 'B', 'A']] xs2 = f.xs(f.index[2]).reindex(['C', 'B', 'A']) - assert_series_equal(xs1, xs2) + tm.assert_series_equal(xs1, xs2) with catch_warnings(record=True): ts1 = f.ix[5:10, 2] ts2 = f[f.columns[2]][5:10] - assert_series_equal(ts1, ts2) + tm.assert_series_equal(ts1, ts2) # positional xs with catch_warnings(record=True): xs1 = f.ix[0] xs2 = f.xs(f.index[0]) - assert_series_equal(xs1, xs2) + tm.assert_series_equal(xs1, xs2) with catch_warnings(record=True): xs1 = f.ix[f.index[5]] xs2 = f.xs(f.index[5]) - assert_series_equal(xs1, xs2) + tm.assert_series_equal(xs1, xs2) # single column with catch_warnings(record=True): @@ -1204,18 +1204,18 @@ def test_getitem_fancy_1d(self): exp = f.copy() exp.values[5] = 4 f.ix[5][:] = 4 - assert_frame_equal(exp, f) + tm.assert_frame_equal(exp, f) with catch_warnings(record=True): exp.values[:, 1] = 6 f.ix[:, 1][:] = 6 - assert_frame_equal(exp, f) + tm.assert_frame_equal(exp, f) # slice of mixed-frame with catch_warnings(record=True): xs = self.mixed_frame.ix[5] exp = self.mixed_frame.xs(self.mixed_frame.index[5]) - assert_series_equal(xs, exp) + tm.assert_series_equal(xs, exp) def test_setitem_fancy_1d(self): @@ -1676,7 +1676,7 @@ def test_set_value(self): def test_set_value_resize(self): res = self.frame.set_value('foobar', 'B', 0) - self.assertIs(res, self.frame) + assert res is self.frame self.assertEqual(res.index[-1], 'foobar') self.assertEqual(res.get_value('foobar', 'B'), 0) diff --git a/pandas/tests/frame/test_operators.py b/pandas/tests/frame/test_operators.py index 18639990662b0..3f77bc754a525 100644 --- a/pandas/tests/frame/test_operators.py +++ b/pandas/tests/frame/test_operators.py @@ -907,7 +907,7 @@ def test_combineFunc(self): _check_mixed_float(result, dtype=dict(C=None)) result = self.empty * 2 - self.assertIs(result.index, self.empty.index) + assert result.index is self.empty.index self.assertEqual(len(result.columns), 0) def test_comparisons(self): @@ -1117,16 +1117,16 @@ def test_inplace_ops_identity(self): s += 1 assert_series_equal(s, s2) assert_series_equal(s_orig + 1, s) - self.assertIs(s, s2) - self.assertIs(s._data, s2._data) + assert s is s2 + assert s._data is s2._data df = df_orig.copy() df2 = df df += 1 assert_frame_equal(df, df2) assert_frame_equal(df_orig + 1, df) - self.assertIs(df, df2) - self.assertIs(df._data, df2._data) + assert df is df2 + assert df._data is df2._data # dtype change s = s_orig.copy() @@ -1140,8 +1140,8 @@ def test_inplace_ops_identity(self): df += 1.5 assert_frame_equal(df, df2) assert_frame_equal(df_orig + 1.5, df) - self.assertIs(df, df2) - self.assertIs(df._data, df2._data) + assert df is df2 + assert df._data is df2._data # mixed dtype arr = np.random.randint(0, 10, size=5) @@ -1152,7 +1152,7 @@ def test_inplace_ops_identity(self): expected = DataFrame({'A': arr.copy() + 1, 'B': 'foo'}) assert_frame_equal(df, expected) assert_frame_equal(df2, expected) - self.assertIs(df._data, df2._data) + assert df._data is df2._data df = df_orig.copy() df2 = df @@ -1160,7 +1160,7 @@ def test_inplace_ops_identity(self): expected = DataFrame({'A': arr.copy() + 1.5, 'B': 'foo'}) assert_frame_equal(df, expected) assert_frame_equal(df2, expected) - self.assertIs(df._data, df2._data) + assert df._data is df2._data def test_alignment_non_pandas(self): index = ['A', 'B', 'C'] diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 02ef9e614150c..2d673b2dac259 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -759,17 +759,18 @@ def test_len(self): def test_groups(self): grouped = self.df.groupby(['A']) groups = grouped.groups - self.assertIs(groups, grouped.groups) # caching works + assert groups is grouped.groups # caching works for k, v in compat.iteritems(grouped.groups): - self.assertTrue((self.df.loc[v]['A'] == k).all()) + assert (self.df.loc[v]['A'] == k).all() grouped = self.df.groupby(['A', 'B']) groups = grouped.groups - self.assertIs(groups, grouped.groups) # caching works + assert groups is grouped.groups # caching works + for k, v in compat.iteritems(grouped.groups): - self.assertTrue((self.df.loc[v]['A'] == k[0]).all()) - self.assertTrue((self.df.loc[v]['B'] == k[1]).all()) + assert (self.df.loc[v]['A'] == k[0]).all() + assert (self.df.loc[v]['B'] == k[1]).all() def test_basic_regression(self): # regression diff --git a/pandas/tests/indexes/datetimes/test_datetime.py b/pandas/tests/indexes/datetimes/test_datetime.py index 31e795a80f2e6..66dcb195611e1 100644 --- a/pandas/tests/indexes/datetimes/test_datetime.py +++ b/pandas/tests/indexes/datetimes/test_datetime.py @@ -156,7 +156,7 @@ def test_time_overflow_for_32bit_machines(self): self.assertEqual(len(idx2), periods) def test_nat(self): - self.assertIs(DatetimeIndex([np.nan])[0], pd.NaT) + assert DatetimeIndex([np.nan])[0] is pd.NaT def test_ufunc_coercions(self): idx = date_range('2011-01-01', periods=3, freq='2D', name='x') @@ -617,7 +617,7 @@ def test_join_self(self): kinds = 'outer', 'inner', 'left', 'right' for kind in kinds: joined = index.join(index, how=kind) - self.assertIs(index, joined) + assert index is joined def assert_index_parameters(self, index): assert index.freq == '40960N' diff --git a/pandas/tests/indexes/datetimes/test_ops.py b/pandas/tests/indexes/datetimes/test_ops.py index de41f321b245b..d531d0913df77 100644 --- a/pandas/tests/indexes/datetimes/test_ops.py +++ b/pandas/tests/indexes/datetimes/test_ops.py @@ -879,8 +879,8 @@ def test_shift(self): tm.assert_index_equal(idx.shift(-3, freq='H'), exp) def test_nat(self): - self.assertIs(pd.DatetimeIndex._na_value, pd.NaT) - self.assertIs(pd.DatetimeIndex([])._na_value, pd.NaT) + assert pd.DatetimeIndex._na_value is pd.NaT + assert pd.DatetimeIndex([])._na_value is pd.NaT for tz in [None, 'US/Eastern', 'UTC']: idx = pd.DatetimeIndex(['2011-01-01', '2011-01-02'], tz=tz) diff --git a/pandas/tests/indexes/datetimes/test_setops.py b/pandas/tests/indexes/datetimes/test_setops.py index 3e6ed7756b9bd..84a1adce2c0aa 100644 --- a/pandas/tests/indexes/datetimes/test_setops.py +++ b/pandas/tests/indexes/datetimes/test_setops.py @@ -185,7 +185,7 @@ def test_datetimeindex_union_join_empty(self): result = dti.union(empty) assert isinstance(result, DatetimeIndex) - self.assertIs(result, result) + assert result is result result = dti.join(empty) assert isinstance(result, DatetimeIndex) diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index b7427f1935a8c..c637b36d1bbb5 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -179,14 +179,14 @@ def test_to_datetime_dt64s(self): ] for dt in in_bound_dts: - self.assertEqual(pd.to_datetime(dt), Timestamp(dt)) + assert pd.to_datetime(dt) == Timestamp(dt) oob_dts = [np.datetime64('1000-01-01'), np.datetime64('5000-01-02'), ] for dt in oob_dts: pytest.raises(ValueError, pd.to_datetime, dt, errors='raise') pytest.raises(ValueError, Timestamp, dt) - self.assertIs(pd.to_datetime(dt, errors='coerce'), NaT) + assert pd.to_datetime(dt, errors='coerce') is NaT def test_to_datetime_array_of_dt64s(self): dts = [np.datetime64('2000-01-01'), np.datetime64('2000-01-02'), ] @@ -681,7 +681,7 @@ def test_to_datetime_types(self): # empty string result = to_datetime('') - self.assertIs(result, NaT) + assert result is NaT result = to_datetime(['', '']) self.assertTrue(isnull(result).all()) diff --git a/pandas/tests/indexes/period/test_indexing.py b/pandas/tests/indexes/period/test_indexing.py index 982ac7d96a9cc..79998d5a6ad7f 100644 --- a/pandas/tests/indexes/period/test_indexing.py +++ b/pandas/tests/indexes/period/test_indexing.py @@ -121,7 +121,7 @@ def test_getitem_datetime(self): def test_getitem_nat(self): idx = pd.PeriodIndex(['2011-01', 'NaT', '2011-02'], freq='M') self.assertEqual(idx[0], pd.Period('2011-01', freq='M')) - self.assertIs(idx[1], tslib.NaT) + assert idx[1] is tslib.NaT s = pd.Series([0, 1, 2], index=idx) self.assertEqual(s[pd.NaT], 1) @@ -129,7 +129,7 @@ def test_getitem_nat(self): s = pd.Series(idx, index=idx) self.assertEqual(s[pd.Period('2011-01', freq='M')], pd.Period('2011-01', freq='M')) - self.assertIs(s[pd.NaT], tslib.NaT) + assert s[pd.NaT] is tslib.NaT def test_getitem_list_periods(self): # GH 7710 diff --git a/pandas/tests/indexes/period/test_ops.py b/pandas/tests/indexes/period/test_ops.py index 70a27eada7774..4f54f44b7bdab 100644 --- a/pandas/tests/indexes/period/test_ops.py +++ b/pandas/tests/indexes/period/test_ops.py @@ -56,13 +56,13 @@ def test_asobject_tolist(self): tm.assert_index_equal(result, expected) for i in [0, 1, 3]: self.assertEqual(result[i], expected[i]) - self.assertIs(result[2], pd.NaT) + assert result[2] is pd.NaT self.assertEqual(result.name, expected.name) result_list = idx.tolist() for i in [0, 1, 3]: self.assertEqual(result_list[i], expected_list[i]) - self.assertIs(result_list[2], pd.NaT) + assert result_list[2] is pd.NaT def test_minmax(self): @@ -88,15 +88,15 @@ def test_minmax(self): # Return NaT obj = PeriodIndex([], freq='M') result = getattr(obj, op)() - self.assertIs(result, tslib.NaT) + assert result is tslib.NaT obj = PeriodIndex([pd.NaT], freq='M') result = getattr(obj, op)() - self.assertIs(result, tslib.NaT) + assert result is tslib.NaT obj = PeriodIndex([pd.NaT, pd.NaT, pd.NaT], freq='M') result = getattr(obj, op)() - self.assertIs(result, tslib.NaT) + assert result is tslib.NaT def test_numpy_minmax(self): pr = pd.period_range(start='2016-01-15', end='2016-01-20') @@ -787,8 +787,8 @@ def test_repeat(self): tm.assert_index_equal(res, exp) def test_nat(self): - self.assertIs(pd.PeriodIndex._na_value, pd.NaT) - self.assertIs(pd.PeriodIndex([], freq='M')._na_value, pd.NaT) + assert pd.PeriodIndex._na_value is pd.NaT + assert pd.PeriodIndex([], freq='M')._na_value is pd.NaT idx = pd.PeriodIndex(['2011-01-01', '2011-01-02'], freq='D') self.assertTrue(idx._can_hold_na) @@ -898,7 +898,7 @@ def test_pi_ops_errors(self): np.add(obj, ng) if _np_version_under1p10: - self.assertIs(np.add(ng, obj), NotImplemented) + assert np.add(ng, obj) is NotImplemented else: with pytest.raises(TypeError): np.add(ng, obj) @@ -907,7 +907,7 @@ def test_pi_ops_errors(self): np.subtract(obj, ng) if _np_version_under1p10: - self.assertIs(np.subtract(ng, obj), NotImplemented) + assert np.subtract(ng, obj) is NotImplemented else: with pytest.raises(TypeError): np.subtract(ng, obj) @@ -1014,7 +1014,7 @@ def test_pi_sub_period(self): result = np.subtract(pd.Period('2012-01', freq='M'), idx) if _np_version_under1p10: - self.assertIs(result, NotImplemented) + assert result is NotImplemented else: tm.assert_index_equal(result, exp) diff --git a/pandas/tests/indexes/period/test_setops.py b/pandas/tests/indexes/period/test_setops.py index 54dafd832fd23..97f9cff2d193e 100644 --- a/pandas/tests/indexes/period/test_setops.py +++ b/pandas/tests/indexes/period/test_setops.py @@ -31,7 +31,7 @@ def test_join_self(self): for kind in ['inner', 'outer', 'left', 'right']: res = index.join(index, how=kind) - self.assertIs(index, res) + assert index is res def test_join_does_not_recur(self): df = tm.makeCustomDataframe( diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 91d5068ee9f19..2b6d69a90a55f 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -608,7 +608,7 @@ def test_intersection(self): # Corner cases inter = first.intersection(first) - self.assertIs(inter, first) + assert inter is first idx1 = Index([1, 2, 3, 4, 5], name='idx') # if target has the same name, it is preserved @@ -681,13 +681,13 @@ def test_union(self): # Corner cases union = first.union(first) - self.assertIs(union, first) + assert union is first union = first.union([]) - self.assertIs(union, first) + assert union is first union = Index([]).union(first) - self.assertIs(union, first) + assert union is first # preserve names first = Index(list('ab'), name='A') @@ -1434,7 +1434,7 @@ def test_join_self(self): for kind in kinds: joined = res.join(res, how=kind) - self.assertIs(res, joined) + assert res is joined def test_str_attribute(self): # GH9068 diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index 7a9b3c1c4b5d6..34051a9749af8 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -1582,10 +1582,10 @@ def test_union(self): # corner case, pass self or empty thing: the_union = self.index.union(self.index) - self.assertIs(the_union, self.index) + assert the_union is self.index the_union = self.index.union(self.index[:0]) - self.assertIs(the_union, self.index) + assert the_union is self.index # won't work in python 3 # tuples = self.index.values @@ -1614,7 +1614,7 @@ def test_intersection(self): # corner case, pass self the_int = self.index.intersection(self.index) - self.assertIs(the_int, self.index) + assert the_int is self.index # empty intersection: disjoint empty = self.index[:2] & self.index[2:] @@ -2039,7 +2039,7 @@ def test_join_self(self): for kind in kinds: res = self.index joined = res.join(res, how=kind) - self.assertIs(res, joined) + assert res is joined def test_join_multi(self): # GH 10665 diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py index 56e0d858f1f0f..f0c42a3dae239 100644 --- a/pandas/tests/indexes/test_numeric.py +++ b/pandas/tests/indexes/test_numeric.py @@ -518,7 +518,7 @@ def test_join_self(self): kinds = 'outer', 'inner', 'left', 'right' for kind in kinds: joined = self.index.join(self.index, how=kind) - self.assertIs(self.index, joined) + assert self.index is joined def test_union_noncomparable(self): from datetime import datetime, timedelta diff --git a/pandas/tests/indexes/test_range.py b/pandas/tests/indexes/test_range.py index 4b622ad3fce49..6f87687f822e9 100644 --- a/pandas/tests/indexes/test_range.py +++ b/pandas/tests/indexes/test_range.py @@ -573,7 +573,7 @@ def test_join_self(self): kinds = 'outer', 'inner', 'left', 'right' for kind in kinds: joined = self.index.join(self.index, how=kind) - self.assertIs(self.index, joined) + assert self.index is joined def test_intersection(self): # intersect with Int64Index diff --git a/pandas/tests/indexes/timedeltas/test_ops.py b/pandas/tests/indexes/timedeltas/test_ops.py index a847467518b92..6da6653b752c9 100644 --- a/pandas/tests/indexes/timedeltas/test_ops.py +++ b/pandas/tests/indexes/timedeltas/test_ops.py @@ -819,8 +819,8 @@ def test_repeat(self): assert res.freq is None def test_nat(self): - self.assertIs(pd.TimedeltaIndex._na_value, pd.NaT) - self.assertIs(pd.TimedeltaIndex([])._na_value, pd.NaT) + assert pd.TimedeltaIndex._na_value is pd.NaT + assert pd.TimedeltaIndex([])._na_value is pd.NaT idx = pd.TimedeltaIndex(['1 days', '2 days']) self.assertTrue(idx._can_hold_na) diff --git a/pandas/tests/io/parser/parse_dates.py b/pandas/tests/io/parser/parse_dates.py index b7147cd77f4f6..f0a1f58be4026 100644 --- a/pandas/tests/io/parser/parse_dates.py +++ b/pandas/tests/io/parser/parse_dates.py @@ -363,7 +363,7 @@ def test_parse_tz_aware(self): stamp = result.index[0] self.assertEqual(stamp.minute, 39) try: - self.assertIs(result.index.tz, pytz.utc) + assert result.index.tz is pytz.utc except AssertionError: # hello Yaroslav arr = result.index.to_pydatetime() result = tools.to_datetime(arr, utc=True)[0] diff --git a/pandas/tests/io/test_packers.py b/pandas/tests/io/test_packers.py index ca6d0605c193b..f8923035b3a63 100644 --- a/pandas/tests/io/test_packers.py +++ b/pandas/tests/io/test_packers.py @@ -272,7 +272,7 @@ def test_timestamp(self): def test_nat(self): nat_rec = self.encode_decode(NaT) - self.assertIs(NaT, nat_rec) + assert NaT is nat_rec def test_datetimes(self): diff --git a/pandas/tests/plotting/test_boxplot_method.py b/pandas/tests/plotting/test_boxplot_method.py index ce341a1e02e0d..018cbbe170313 100644 --- a/pandas/tests/plotting/test_boxplot_method.py +++ b/pandas/tests/plotting/test_boxplot_method.py @@ -71,19 +71,19 @@ def test_boxplot_legacy(self): fig, ax = self.plt.subplots() axes = df.boxplot('Col1', by='X', ax=ax) ax_axes = ax.axes if self.mpl_ge_1_5_0 else ax.get_axes() - self.assertIs(ax_axes, axes) + assert ax_axes is axes fig, ax = self.plt.subplots() axes = df.groupby('Y').boxplot(ax=ax, return_type='axes') ax_axes = ax.axes if self.mpl_ge_1_5_0 else ax.get_axes() - self.assertIs(ax_axes, axes['A']) + assert ax_axes is axes['A'] # Multiple columns with an ax argument should use same figure fig, ax = self.plt.subplots() with tm.assert_produces_warning(UserWarning): axes = df.boxplot(column=['Col1', 'Col2'], by='X', ax=ax, return_type='axes') - self.assertIs(axes['Col1'].get_figure(), fig) + assert axes['Col1'].get_figure() is fig # When by is None, check that all relevant lines are present in the # dict @@ -357,7 +357,7 @@ def test_grouped_box_multiple_axes(self): returned = np.array(list(returned.values)) self._check_axes_shape(returned, axes_num=3, layout=(1, 3)) tm.assert_numpy_array_equal(returned, axes[0]) - self.assertIs(returned[0].figure, fig) + assert returned[0].figure is fig # draw on second row with tm.assert_produces_warning(UserWarning): @@ -367,7 +367,7 @@ def test_grouped_box_multiple_axes(self): returned = np.array(list(returned.values)) self._check_axes_shape(returned, axes_num=3, layout=(1, 3)) tm.assert_numpy_array_equal(returned, axes[1]) - self.assertIs(returned[0].figure, fig) + assert returned[0].figure is fig with pytest.raises(ValueError): fig, axes = self.plt.subplots(2, 3) diff --git a/pandas/tests/plotting/test_frame.py b/pandas/tests/plotting/test_frame.py index 0e9aa3355a658..c72bce28b5862 100644 --- a/pandas/tests/plotting/test_frame.py +++ b/pandas/tests/plotting/test_frame.py @@ -139,7 +139,7 @@ def test_plot(self): result = ax.axes else: result = ax.get_axes() # deprecated - self.assertIs(result, axes[0]) + assert result is axes[0] # GH 15516 def test_mpl2_color_cycle_str(self): @@ -443,13 +443,13 @@ def test_subplots_multiple_axes(self): sharey=False) self._check_axes_shape(returned, axes_num=3, layout=(1, 3)) self.assertEqual(returned.shape, (3, )) - self.assertIs(returned[0].figure, fig) + assert returned[0].figure is fig # draw on second row returned = df.plot(subplots=True, ax=axes[1], sharex=False, sharey=False) self._check_axes_shape(returned, axes_num=3, layout=(1, 3)) self.assertEqual(returned.shape, (3, )) - self.assertIs(returned[0].figure, fig) + assert returned[0].figure is fig self._check_axes_shape(axes, axes_num=6, layout=(2, 3)) tm.close() @@ -933,11 +933,11 @@ def test_plot_scatter_with_c(self): # verify turning off colorbar works ax = df.plot.scatter(x='x', y='y', c='z', colorbar=False) - self.assertIs(ax.collections[0].colorbar, None) + assert ax.collections[0].colorbar is None # verify that we can still plot a solid color ax = df.plot.scatter(x=0, y=1, c='red') - self.assertIs(ax.collections[0].colorbar, None) + assert ax.collections[0].colorbar is None self._check_colors(ax.collections, facecolors=['r']) # Ensure that we can pass an np.array straight through to matplotlib, @@ -2065,7 +2065,7 @@ def test_no_color_bar(self): df = self.hexbin_df ax = df.plot.hexbin(x='A', y='B', colorbar=None) - self.assertIs(ax.collections[0].colorbar, None) + assert ax.collections[0].colorbar is None @slow def test_allow_cmap(self): diff --git a/pandas/tests/plotting/test_hist_method.py b/pandas/tests/plotting/test_hist_method.py index 6dd97a1181f22..79d5f74e6ea06 100644 --- a/pandas/tests/plotting/test_hist_method.py +++ b/pandas/tests/plotting/test_hist_method.py @@ -376,11 +376,11 @@ def test_grouped_hist_multiple_axes(self): returned = df.hist(column=['height', 'weight', 'category'], ax=axes[0]) self._check_axes_shape(returned, axes_num=3, layout=(1, 3)) tm.assert_numpy_array_equal(returned, axes[0]) - self.assertIs(returned[0].figure, fig) + assert returned[0].figure is fig returned = df.hist(by='classroom', ax=axes[1]) self._check_axes_shape(returned, axes_num=3, layout=(1, 3)) tm.assert_numpy_array_equal(returned, axes[1]) - self.assertIs(returned[0].figure, fig) + assert returned[0].figure is fig with pytest.raises(ValueError): fig, axes = self.plt.subplots(2, 3) diff --git a/pandas/tests/scalar/test_period.py b/pandas/tests/scalar/test_period.py index 1635d90189b50..dff954b25f839 100644 --- a/pandas/tests/scalar/test_period.py +++ b/pandas/tests/scalar/test_period.py @@ -92,25 +92,25 @@ def test_period_from_ordinal(self): def test_period_cons_nat(self): p = Period('NaT', freq='M') - self.assertIs(p, pd.NaT) + assert p is pd.NaT p = Period('nat', freq='W-SUN') - self.assertIs(p, pd.NaT) + assert p is pd.NaT p = Period(tslib.iNaT, freq='D') - self.assertIs(p, pd.NaT) + assert p is pd.NaT p = Period(tslib.iNaT, freq='3D') - self.assertIs(p, pd.NaT) + assert p is pd.NaT p = Period(tslib.iNaT, freq='1D1H') - self.assertIs(p, pd.NaT) + assert p is pd.NaT p = Period('NaT') - self.assertIs(p, pd.NaT) + assert p is pd.NaT p = Period(tslib.iNaT) - self.assertIs(p, pd.NaT) + assert p is pd.NaT def test_period_cons_mult(self): p1 = Period('2011-01', freq='3M') @@ -858,7 +858,7 @@ def test_constructor_corner(self): pytest.raises(ValueError, Period, 1.6, freq='D') pytest.raises(ValueError, Period, ordinal=1.6, freq='D') pytest.raises(ValueError, Period, ordinal=2, value=1, freq='D') - self.assertIs(Period(None), pd.NaT) + assert Period(None) is pd.NaT pytest.raises(ValueError, Period, month=1) p = Period('2007-01-01', freq='D') @@ -1020,12 +1020,12 @@ def test_add(self): def test_add_pdnat(self): p = pd.Period('2011-01', freq='M') - self.assertIs(p + pd.NaT, pd.NaT) - self.assertIs(pd.NaT + p, pd.NaT) + assert p + pd.NaT is pd.NaT + assert pd.NaT + p is pd.NaT p = pd.Period('NaT', freq='M') - self.assertIs(p + pd.NaT, pd.NaT) - self.assertIs(pd.NaT + p, pd.NaT) + assert p + pd.NaT is pd.NaT + assert pd.NaT + p is pd.NaT def test_add_raises(self): # GH 4731 @@ -1187,41 +1187,41 @@ def test_add_offset_nat(self): for freq in ['A', '2A', '3A']: p = Period('NaT', freq=freq) for o in [offsets.YearEnd(2)]: - self.assertIs(p + o, tslib.NaT) - self.assertIs(o + p, tslib.NaT) + assert p + o is tslib.NaT + assert o + p is tslib.NaT for o in [offsets.YearBegin(2), offsets.MonthBegin(1), offsets.Minute(), np.timedelta64(365, 'D'), timedelta(365)]: - self.assertIs(p + o, tslib.NaT) + assert p + o is tslib.NaT if isinstance(o, np.timedelta64): with pytest.raises(TypeError): o + p else: - self.assertIs(o + p, tslib.NaT) + assert o + p is tslib.NaT for freq in ['M', '2M', '3M']: p = Period('NaT', freq=freq) for o in [offsets.MonthEnd(2), offsets.MonthEnd(12)]: - self.assertIs(p + o, tslib.NaT) + assert p + o is tslib.NaT if isinstance(o, np.timedelta64): with pytest.raises(TypeError): o + p else: - self.assertIs(o + p, tslib.NaT) + assert o + p is tslib.NaT for o in [offsets.YearBegin(2), offsets.MonthBegin(1), offsets.Minute(), np.timedelta64(365, 'D'), timedelta(365)]: - self.assertIs(p + o, tslib.NaT) + assert p + o is tslib.NaT if isinstance(o, np.timedelta64): with pytest.raises(TypeError): o + p else: - self.assertIs(o + p, tslib.NaT) + assert o + p is tslib.NaT # freq is Tick for freq in ['D', '2D', '3D']: @@ -1229,55 +1229,55 @@ def test_add_offset_nat(self): for o in [offsets.Day(5), offsets.Hour(24), np.timedelta64(2, 'D'), np.timedelta64(3600 * 24, 's'), timedelta(-2), timedelta(hours=48)]: - self.assertIs(p + o, tslib.NaT) + assert p + o is tslib.NaT if isinstance(o, np.timedelta64): with pytest.raises(TypeError): o + p else: - self.assertIs(o + p, tslib.NaT) + assert o + p is tslib.NaT for o in [offsets.YearBegin(2), offsets.MonthBegin(1), offsets.Minute(), np.timedelta64(4, 'h'), timedelta(hours=23)]: - self.assertIs(p + o, tslib.NaT) + assert p + o is tslib.NaT if isinstance(o, np.timedelta64): with pytest.raises(TypeError): o + p else: - self.assertIs(o + p, tslib.NaT) + assert o + p is tslib.NaT for freq in ['H', '2H', '3H']: p = Period('NaT', freq=freq) for o in [offsets.Day(2), offsets.Hour(3), np.timedelta64(3, 'h'), np.timedelta64(3600, 's'), timedelta(minutes=120), timedelta(days=4, minutes=180)]: - self.assertIs(p + o, tslib.NaT) + assert p + o is tslib.NaT if not isinstance(o, np.timedelta64): - self.assertIs(o + p, tslib.NaT) + assert o + p is tslib.NaT for o in [offsets.YearBegin(2), offsets.MonthBegin(1), offsets.Minute(), np.timedelta64(3200, 's'), timedelta(hours=23, minutes=30)]: - self.assertIs(p + o, tslib.NaT) + assert p + o is tslib.NaT if isinstance(o, np.timedelta64): with pytest.raises(TypeError): o + p else: - self.assertIs(o + p, tslib.NaT) + assert o + p is tslib.NaT def test_sub_pdnat(self): # GH 13071 p = pd.Period('2011-01', freq='M') - self.assertIs(p - pd.NaT, pd.NaT) - self.assertIs(pd.NaT - p, pd.NaT) + assert p - pd.NaT is pd.NaT + assert pd.NaT - p is pd.NaT p = pd.Period('NaT', freq='M') - self.assertIs(p - pd.NaT, pd.NaT) - self.assertIs(pd.NaT - p, pd.NaT) + assert p - pd.NaT is pd.NaT + assert pd.NaT - p is pd.NaT def test_sub_offset(self): # freq is DateOffset @@ -1352,22 +1352,22 @@ def test_sub_offset_nat(self): for freq in ['A', '2A', '3A']: p = Period('NaT', freq=freq) for o in [offsets.YearEnd(2)]: - self.assertIs(p - o, tslib.NaT) + assert p - o is tslib.NaT for o in [offsets.YearBegin(2), offsets.MonthBegin(1), offsets.Minute(), np.timedelta64(365, 'D'), timedelta(365)]: - self.assertIs(p - o, tslib.NaT) + assert p - o is tslib.NaT for freq in ['M', '2M', '3M']: p = Period('NaT', freq=freq) for o in [offsets.MonthEnd(2), offsets.MonthEnd(12)]: - self.assertIs(p - o, tslib.NaT) + assert p - o is tslib.NaT for o in [offsets.YearBegin(2), offsets.MonthBegin(1), offsets.Minute(), np.timedelta64(365, 'D'), timedelta(365)]: - self.assertIs(p - o, tslib.NaT) + assert p - o is tslib.NaT # freq is Tick for freq in ['D', '2D', '3D']: @@ -1375,33 +1375,33 @@ def test_sub_offset_nat(self): for o in [offsets.Day(5), offsets.Hour(24), np.timedelta64(2, 'D'), np.timedelta64(3600 * 24, 's'), timedelta(-2), timedelta(hours=48)]: - self.assertIs(p - o, tslib.NaT) + assert p - o is tslib.NaT for o in [offsets.YearBegin(2), offsets.MonthBegin(1), offsets.Minute(), np.timedelta64(4, 'h'), timedelta(hours=23)]: - self.assertIs(p - o, tslib.NaT) + assert p - o is tslib.NaT for freq in ['H', '2H', '3H']: p = Period('NaT', freq=freq) for o in [offsets.Day(2), offsets.Hour(3), np.timedelta64(3, 'h'), np.timedelta64(3600, 's'), timedelta(minutes=120), timedelta(days=4, minutes=180)]: - self.assertIs(p - o, tslib.NaT) + assert p - o is tslib.NaT for o in [offsets.YearBegin(2), offsets.MonthBegin(1), offsets.Minute(), np.timedelta64(3200, 's'), timedelta(hours=23, minutes=30)]: - self.assertIs(p - o, tslib.NaT) + assert p - o is tslib.NaT def test_nat_ops(self): for freq in ['M', '2M', '3M']: p = Period('NaT', freq=freq) - self.assertIs(p + 1, tslib.NaT) - self.assertIs(1 + p, tslib.NaT) - self.assertIs(p - 1, tslib.NaT) - self.assertIs(p - Period('2011-01', freq=freq), tslib.NaT) - self.assertIs(Period('2011-01', freq=freq) - p, tslib.NaT) + assert p + 1 is tslib.NaT + assert 1 + p is tslib.NaT + assert p - 1 is tslib.NaT + assert p - Period('2011-01', freq=freq) is tslib.NaT + assert Period('2011-01', freq=freq) - p is tslib.NaT def test_period_ops_offset(self): p = Period('2011-04-01', freq='D') diff --git a/pandas/tests/scalar/test_timestamp.py b/pandas/tests/scalar/test_timestamp.py index 2cf40335f3ded..2b00ac68ee555 100644 --- a/pandas/tests/scalar/test_timestamp.py +++ b/pandas/tests/scalar/test_timestamp.py @@ -416,8 +416,7 @@ def test_tz_localize_nonexistent(self): tz) pytest.raises(NonExistentTimeError, ts.tz_localize, tz, errors='raise') - self.assertIs(ts.tz_localize(tz, errors='coerce'), - NaT) + assert ts.tz_localize(tz, errors='coerce') is NaT def test_tz_localize_errors_ambiguous(self): # See issue 13057 diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index eaa88da94ccd2..63e291836b472 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -1276,8 +1276,8 @@ def test_ptp(self): def test_empty_timeseries_redections_return_nat(self): # covers #11245 for dtype in ('m8[ns]', 'm8[ns]', 'M8[ns]', 'M8[ns, UTC]'): - self.assertIs(Series([], dtype=dtype).min(), pd.NaT) - self.assertIs(Series([], dtype=dtype).max(), pd.NaT) + assert Series([], dtype=dtype).min() is pd.NaT + assert Series([], dtype=dtype).max() is pd.NaT def test_unique_data_ownership(self): # it works! #1807 diff --git a/pandas/tests/series/test_api.py b/pandas/tests/series/test_api.py index a477bb325061f..341d4006c5fcc 100644 --- a/pandas/tests/series/test_api.py +++ b/pandas/tests/series/test_api.py @@ -35,7 +35,7 @@ def test_copy_index_name_checking(self): self.ts.index.name = None assert self.ts.index.name is None - self.assertIs(self.ts, self.ts) + assert self.ts is self.ts cp = self.ts.copy() cp.index.name = 'foo' @@ -203,7 +203,7 @@ def test_keys(self): # HACK: By doing this in two stages, we avoid 2to3 wrapping the call # to .keys() in a list() getkeys = self.ts.keys - self.assertIs(getkeys(), self.ts.index) + assert getkeys() is self.ts.index def test_values(self): tm.assert_almost_equal(self.ts.values, self.ts, check_dtype=False) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index e02698f96ca49..57cce1d1cf199 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -55,7 +55,7 @@ def test_constructor(self): # Mixed type Series mixed = Series(['hello', np.NaN], index=[0, 1]) self.assertEqual(mixed.dtype, np.object_) - self.assertIs(mixed[1], np.NaN) + assert mixed[1] is np.NaN self.assertFalse(self.empty.index.is_all_dates) self.assertFalse(Series({}).index.is_all_dates) @@ -226,7 +226,7 @@ def test_series_ctor_plus_datetimeindex(self): data = dict((k, 1) for k in rng) result = Series(data, index=rng) - self.assertIs(result.index, rng) + assert result.index is rng def test_constructor_default_index(self): s = Series([0, 1, 2]) diff --git a/pandas/tests/series/test_datetime_values.py b/pandas/tests/series/test_datetime_values.py index ac3e3a2abbd69..ecb457b4ff1b0 100644 --- a/pandas/tests/series/test_datetime_values.py +++ b/pandas/tests/series/test_datetime_values.py @@ -368,7 +368,7 @@ def test_dt_accessor_api(self): # GH 9322 from pandas.core.indexes.accessors import ( CombinedDatetimelikeProperties, DatetimeProperties) - self.assertIs(Series.dt, CombinedDatetimelikeProperties) + assert Series.dt is CombinedDatetimelikeProperties s = Series(date_range('2000-01-01', periods=3)) assert isinstance(s.dt, DatetimeProperties) diff --git a/pandas/tests/series/test_indexing.py b/pandas/tests/series/test_indexing.py index 9872a1982a770..135e208877f2d 100644 --- a/pandas/tests/series/test_indexing.py +++ b/pandas/tests/series/test_indexing.py @@ -786,13 +786,13 @@ def test_setitem_dtypes(self): def test_set_value(self): idx = self.ts.index[10] res = self.ts.set_value(idx, 0) - self.assertIs(res, self.ts) + assert res is self.ts self.assertEqual(self.ts[idx], 0) # equiv s = self.series.copy() res = s.set_value('foobar', 0) - self.assertIs(res, s) + assert res is s self.assertEqual(res.index[-1], 'foobar') self.assertEqual(res['foobar'], 0) @@ -2659,16 +2659,16 @@ def setUp(self): def test_set_none_nan(self): self.series[3] = None - self.assertIs(self.series[3], NaT) + assert self.series[3] is NaT self.series[3:5] = None - self.assertIs(self.series[4], NaT) + assert self.series[4] is NaT self.series[5] = np.nan - self.assertIs(self.series[5], NaT) + assert self.series[5] is NaT self.series[5:7] = np.nan - self.assertIs(self.series[6], NaT) + assert self.series[6] is NaT def test_nat_operations(self): # GH 8617 diff --git a/pandas/tests/series/test_period.py b/pandas/tests/series/test_period.py index e0f1c8e060378..a67d097f341db 100644 --- a/pandas/tests/series/test_period.py +++ b/pandas/tests/series/test_period.py @@ -103,10 +103,10 @@ def test_NaT_cast(self): def test_set_none_nan(self): # currently Period is stored as object dtype, not as NaT self.series[3] = None - self.assertIs(self.series[3], None) + assert self.series[3] is None self.series[3:5] = None - self.assertIs(self.series[4], None) + assert self.series[4] is None self.series[5] = np.nan self.assertTrue(np.isnan(self.series[5])) diff --git a/pandas/tests/series/test_sorting.py b/pandas/tests/series/test_sorting.py index 7ab2ec245f611..6fe18e712a29d 100644 --- a/pandas/tests/series/test_sorting.py +++ b/pandas/tests/series/test_sorting.py @@ -129,16 +129,17 @@ def test_sort_index_inplace(self): # descending random_order = self.ts.reindex(rindex) result = random_order.sort_index(ascending=False, inplace=True) - self.assertIs(result, None, - msg='sort_index() inplace should return None') - assert_series_equal(random_order, self.ts.reindex(self.ts.index[::-1])) + + assert result is None + tm.assert_series_equal(random_order, self.ts.reindex( + self.ts.index[::-1])) # ascending random_order = self.ts.reindex(rindex) result = random_order.sort_index(ascending=True, inplace=True) - self.assertIs(result, None, - msg='sort_index() inplace should return None') - assert_series_equal(random_order, self.ts) + + assert result is None + tm.assert_series_equal(random_order, self.ts) def test_sort_index_multiindex(self): diff --git a/pandas/tests/sparse/test_frame.py b/pandas/tests/sparse/test_frame.py index ccb72d1f0d788..cf6d80e9c0133 100644 --- a/pandas/tests/sparse/test_frame.py +++ b/pandas/tests/sparse/test_frame.py @@ -593,7 +593,7 @@ def test_apply(self): exp = self.frame.to_dense().apply(np.sum, broadcast=True) tm.assert_frame_equal(broadcasted.to_dense(), exp) - self.assertIs(self.empty.apply(np.sqrt), self.empty) + assert self.empty.apply(np.sqrt) is self.empty from pandas.core import nanops applied = self.frame.apply(np.sum) diff --git a/pandas/tests/sparse/test_libsparse.py b/pandas/tests/sparse/test_libsparse.py index 00e01b8ba14bc..14038777fdd02 100644 --- a/pandas/tests/sparse/test_libsparse.py +++ b/pandas/tests/sparse/test_libsparse.py @@ -469,7 +469,7 @@ def test_to_int_index(self): def test_to_block_index(self): index = BlockIndex(10, [0, 5], [4, 5]) - self.assertIs(index.to_block_index(), index) + assert index.to_block_index() is index class TestIntIndex(tm.TestCase): @@ -554,7 +554,7 @@ def _check_case(xloc, xlen, yloc, ylen, eloc, elen): def test_to_int_index(self): index = IntIndex(10, [2, 3, 4, 5, 6]) - self.assertIs(index.to_int_index(), index) + assert index.to_int_index() is index class TestSparseOperators(tm.TestCase): diff --git a/pandas/tests/sparse/test_series.py b/pandas/tests/sparse/test_series.py index 2fd7ac3d13b98..1502aaa7e0b9e 100644 --- a/pandas/tests/sparse/test_series.py +++ b/pandas/tests/sparse/test_series.py @@ -664,7 +664,7 @@ def _check(values, index1, index2, fill_value): first_series = SparseSeries(values, sparse_index=index1, fill_value=fill_value) reindexed = first_series.sparse_reindex(index2) - self.assertIs(reindexed.sp_index, index2) + assert reindexed.sp_index is index2 int_indices1 = index1.to_int_index().indices int_indices2 = index2.to_int_index().indices diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py index 5814ae3494b44..bec743fac18e1 100644 --- a/pandas/tests/test_base.py +++ b/pandas/tests/test_base.py @@ -525,7 +525,7 @@ def test_value_counts_unique_nunique_null(self): # unable to compare NaT / nan tm.assert_numpy_array_equal(result[1:], values[2:].asobject.values) - self.assertIs(result[0], pd.NaT) + assert result[0] is pd.NaT else: tm.assert_numpy_array_equal(result[1:], values[2:]) @@ -1018,15 +1018,17 @@ class T(NoNewAttributesMixin): pass t = T() - self.assertFalse(hasattr(t, "__frozen")) + assert not hasattr(t, "__frozen") + t.a = "test" - self.assertEqual(t.a, "test") + assert t.a == "test" + t._freeze() - # self.assertTrue("__frozen" not in dir(t)) - self.assertIs(getattr(t, "__frozen"), True) + assert "__frozen" in dir(t) + assert getattr(t, "__frozen") def f(): t.b = "test" pytest.raises(AttributeError, f) - self.assertFalse(hasattr(t, "b")) + assert not hasattr(t, "b") diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py index 2b9afb8e1dd6b..d516448acd876 100644 --- a/pandas/tests/test_categorical.py +++ b/pandas/tests/test_categorical.py @@ -4218,7 +4218,7 @@ def get_dir(s): def test_cat_accessor_api(self): # GH 9322 from pandas.core.categorical import CategoricalAccessor - self.assertIs(Series.cat, CategoricalAccessor) + assert Series.cat is CategoricalAccessor s = Series(list('aabbcde')).astype('category') assert isinstance(s.cat, CategoricalAccessor) diff --git a/pandas/tests/test_internals.py b/pandas/tests/test_internals.py index adca47488413d..f58a6d4b146bd 100644 --- a/pandas/tests/test_internals.py +++ b/pandas/tests/test_internals.py @@ -382,7 +382,7 @@ def test_pickle(self, mgr): assert_frame_equal(DataFrame(mgr), DataFrame(mgr2)) # share ref_items - # self.assertIs(mgr2.blocks[0].ref_items, mgr2.blocks[1].ref_items) + # assert mgr2.blocks[0].ref_items is mgr2.blocks[1].ref_items # GH2431 assert hasattr(mgr2, "_is_consolidated") diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index a7a80c635a364..173ac97691b3b 100755 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -198,21 +198,21 @@ def test_reindex(self): def test_reindex_preserve_levels(self): new_index = self.ymd.index[::10] chunk = self.ymd.reindex(new_index) - self.assertIs(chunk.index, new_index) + assert chunk.index is new_index chunk = self.ymd.loc[new_index] - self.assertIs(chunk.index, new_index) + assert chunk.index is new_index with catch_warnings(record=True): chunk = self.ymd.ix[new_index] - self.assertIs(chunk.index, new_index) + assert chunk.index is new_index ymdT = self.ymd.T chunk = ymdT.reindex(columns=new_index) - self.assertIs(chunk.columns, new_index) + assert chunk.columns is new_index chunk = ymdT.loc[:, new_index] - self.assertIs(chunk.columns, new_index) + assert chunk.columns is new_index def test_repr_to_string(self): repr(self.frame) @@ -939,7 +939,7 @@ def test_stack_mixed_dtype(self): stacked = df.stack() result = df['foo'].stack().sort_index() tm.assert_series_equal(stacked['foo'], result, check_names=False) - self.assertIs(result.name, None) + assert result.name is None self.assertEqual(stacked['bar'].dtype, np.float_) def test_unstack_bug(self): diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index 24248d735adb0..1d0788ae160dd 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -1447,9 +1447,9 @@ def test_reindex_multi(self): major=self.panel.major_axis, minor=self.panel.minor_axis, copy=False) - self.assertIs(result.items, self.panel.items) - self.assertIs(result.major_axis, self.panel.major_axis) - self.assertIs(result.minor_axis, self.panel.minor_axis) + assert result.items is self.panel.items + assert result.major_axis is self.panel.major_axis + assert result.minor_axis is self.panel.minor_axis result = self.panel.reindex( items=self.panel.items, @@ -1612,13 +1612,13 @@ def test_truncate_fillna_bug(self): def test_swapaxes(self): with catch_warnings(record=True): result = self.panel.swapaxes('items', 'minor') - self.assertIs(result.items, self.panel.minor_axis) + assert result.items is self.panel.minor_axis result = self.panel.swapaxes('items', 'major') - self.assertIs(result.items, self.panel.major_axis) + assert result.items is self.panel.major_axis result = self.panel.swapaxes('major', 'minor') - self.assertIs(result.major_axis, self.panel.minor_axis) + assert result.major_axis is self.panel.minor_axis panel = self.panel.copy() result = panel.swapaxes('major', 'minor') @@ -1628,7 +1628,7 @@ def test_swapaxes(self): # this should also work result = self.panel.swapaxes(0, 1) - self.assertIs(result.items, self.panel.major_axis) + assert result.items is self.panel.major_axis # this works, but return a copy result = self.panel.swapaxes('items', 'items') diff --git a/pandas/tests/test_panel4d.py b/pandas/tests/test_panel4d.py index 33b17bc04cd79..05b42cdf00e94 100644 --- a/pandas/tests/test_panel4d.py +++ b/pandas/tests/test_panel4d.py @@ -867,23 +867,23 @@ def test_fillna(self): def test_swapaxes(self): with catch_warnings(record=True): result = self.panel4d.swapaxes('labels', 'items') - self.assertIs(result.items, self.panel4d.labels) + assert result.items is self.panel4d.labels result = self.panel4d.swapaxes('labels', 'minor') - self.assertIs(result.labels, self.panel4d.minor_axis) + assert result.labels is self.panel4d.minor_axis result = self.panel4d.swapaxes('items', 'minor') - self.assertIs(result.items, self.panel4d.minor_axis) + assert result.items is self.panel4d.minor_axis result = self.panel4d.swapaxes('items', 'major') - self.assertIs(result.items, self.panel4d.major_axis) + assert result.items is self.panel4d.major_axis result = self.panel4d.swapaxes('major', 'minor') - self.assertIs(result.major_axis, self.panel4d.minor_axis) + assert result.major_axis is self.panel4d.minor_axis # this should also work result = self.panel4d.swapaxes(0, 1) - self.assertIs(result.labels, self.panel4d.items) + assert result.labels is self.panel4d.items # this works, but return a copy result = self.panel4d.swapaxes('items', 'items') diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index 107720d90e489..761bfa6bfe29a 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -24,7 +24,7 @@ class TestStringMethods(tm.TestCase): def test_api(self): # GH 6106, GH 9322 - self.assertIs(Series.str, strings.StringMethods) + assert Series.str is strings.StringMethods assert isinstance(Series(['']).str, strings.StringMethods) # GH 9184 diff --git a/pandas/tests/test_util.py b/pandas/tests/test_util.py index c9b2e1c9c1c75..375463ec25c36 100644 --- a/pandas/tests/test_util.py +++ b/pandas/tests/test_util.py @@ -42,7 +42,7 @@ def test_deprecate_kwarg(self): x = 78 with tm.assert_produces_warning(FutureWarning): result = self.f1(old=x) - self.assertIs(result, x) + assert result is x with tm.assert_produces_warning(None): self.f1(new=x) @@ -338,7 +338,7 @@ def test_more_than_one_ref(self): with pytest.raises(BadMove) as e: def handle_success(type_, value, tb): - self.assertIs(value.args[0], b) + assert value.args[0] is b return type(e).handle_success(e, type_, value, tb) # super e.handle_success = handle_success diff --git a/pandas/tests/tseries/test_timezones.py b/pandas/tests/tseries/test_timezones.py index 40ff2421a9f63..e3f2c242e3294 100644 --- a/pandas/tests/tseries/test_timezones.py +++ b/pandas/tests/tseries/test_timezones.py @@ -291,7 +291,7 @@ def test_create_with_tz(self): self.assertEqual(stamp, rng[1]) utc_stamp = Timestamp('3/11/2012 05:00', tz='utc') - self.assertIs(utc_stamp.tzinfo, pytz.utc) + assert utc_stamp.tzinfo is pytz.utc self.assertEqual(utc_stamp.hour, 5) stamp = Timestamp('3/11/2012 05:00').tz_localize('utc') @@ -414,7 +414,7 @@ def test_with_tz(self): # just want it to work start = datetime(2011, 3, 12, tzinfo=pytz.utc) dr = bdate_range(start, periods=50, freq=offsets.Hour()) - self.assertIs(dr.tz, pytz.utc) + assert dr.tz is pytz.utc # DateRange with naive datetimes dr = bdate_range('1/1/2005', '1/1/2009', tz=pytz.utc) @@ -422,15 +422,15 @@ def test_with_tz(self): # normalized central = dr.tz_convert(tz) - self.assertIs(central.tz, tz) + assert central.tz is tz comp = self.localize(tz, central[0].to_pydatetime().replace( tzinfo=None)).tzinfo - self.assertIs(central[0].tz, comp) + assert central[0].tz is comp # compare vs a localized tz comp = self.localize(tz, dr[0].to_pydatetime().replace(tzinfo=None)).tzinfo - self.assertIs(central[0].tz, comp) + assert central[0].tz is comp # datetimes with tzinfo set dr = bdate_range(datetime(2005, 1, 1, tzinfo=pytz.utc), @@ -762,14 +762,14 @@ def test_convert_tz_aware_datetime_datetime(self): converted = to_datetime(dates_aware, utc=True) ex_vals = np.array([Timestamp(x).value for x in dates_aware]) tm.assert_numpy_array_equal(converted.asi8, ex_vals) - self.assertIs(converted.tz, pytz.utc) + assert converted.tz is pytz.utc def test_to_datetime_utc(self): from dateutil.parser import parse arr = np.array([parse('2012-06-13T01:39:00Z')], dtype=object) result = to_datetime(arr, utc=True) - self.assertIs(result.tz, pytz.utc) + assert result.tz is pytz.utc def test_to_datetime_tzlocal(self): from dateutil.parser import parse @@ -780,12 +780,12 @@ def test_to_datetime_tzlocal(self): arr = np.array([dt], dtype=object) result = to_datetime(arr, utc=True) - self.assertIs(result.tz, pytz.utc) + assert result.tz is pytz.utc rng = date_range('2012-11-03 03:00', '2012-11-05 03:00', tz=tzlocal()) arr = rng.to_pydatetime() result = to_datetime(arr, utc=True) - self.assertIs(result.tz, pytz.utc) + assert result.tz is pytz.utc def test_frame_no_datetime64_dtype(self): @@ -1554,18 +1554,18 @@ def test_equal_join_ensure_utc(self): ts_moscow = ts.tz_convert('Europe/Moscow') result = ts + ts_moscow - self.assertIs(result.index.tz, pytz.utc) + assert result.index.tz is pytz.utc result = ts_moscow + ts - self.assertIs(result.index.tz, pytz.utc) + assert result.index.tz is pytz.utc df = DataFrame({'a': ts}) df_moscow = df.tz_convert('Europe/Moscow') result = df + df_moscow - self.assertIs(result.index.tz, pytz.utc) + assert result.index.tz is pytz.utc result = df_moscow + df - self.assertIs(result.index.tz, pytz.utc) + assert result.index.tz is pytz.utc def test_arith_utc_convert(self): rng = date_range('1/1/2011', periods=100, freq='H', tz='utc') diff --git a/pandas/util/testing.py b/pandas/util/testing.py index b28bd6a696bda..08b298fabdd57 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -1060,12 +1060,6 @@ def is_sorted(seq): return assert_numpy_array_equal(seq, np.sort(np.array(seq))) -def assertIs(first, second, msg=''): - """Checks that 'first' is 'second'""" - a, b = first, second - assert a is b, "%s: %r is not %r" % (msg.format(a, b), a, b) - - def assert_categorical_equal(left, right, check_dtype=True, obj='Categorical', check_category_order=True): """Test that Categoricals are equivalent. From 844013b20c2a6230b1baeec1d5442e26b7930656 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Mon, 24 Apr 2017 07:43:11 -0400 Subject: [PATCH 458/933] API: expose dtypes in pandas.api.types (#16099) * API: expose dtypes in pandas.api.types xref #16015 xref https://github.com/apache/arrow/pull/585 xref https://github.com/pandas-dev/pandas/issues/16042 xref https://github.com/pandas-dev/pandas/pull/15541#issuecomment-286008496 --- pandas/api/types/__init__.py | 4 ++++ pandas/tests/api/test_types.py | 4 +++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/pandas/api/types/__init__.py b/pandas/api/types/__init__.py index dcf010dcf4bc2..8bda0c75f8540 100644 --- a/pandas/api/types/__init__.py +++ b/pandas/api/types/__init__.py @@ -1,6 +1,10 @@ """ public toolkit API """ from pandas.core.dtypes.api import * # noqa +from pandas.core.dtypes.dtypes import (CategoricalDtype, # noqa + DatetimeTZDtype, + PeriodDtype, + IntervalDtype) from pandas.core.dtypes.concat import union_categoricals # noqa from pandas._libs.lib import infer_dtype # noqa del np # noqa diff --git a/pandas/tests/api/test_types.py b/pandas/tests/api/test_types.py index 3b9148a1c91c6..6b37501045d40 100644 --- a/pandas/tests/api/test_types.py +++ b/pandas/tests/api/test_types.py @@ -33,10 +33,12 @@ class TestTypes(Base, tm.TestCase): 'is_list_like', 'is_hashable', 'is_named_tuple', 'is_sequence', 'pandas_dtype', 'union_categoricals', 'infer_dtype'] + dtypes = ['CategoricalDtype', 'DatetimeTZDtype', + 'PeriodDtype', 'IntervalDtype'] def test_types(self): - self.check(types, self.allowed) + self.check(types, self.allowed + self.dtypes) def check_deprecation(self, fold, fnew): with tm.assert_produces_warning(DeprecationWarning): From f8b25c2829d414f7073ee3a25ba8130e75b4124f Mon Sep 17 00:00:00 2001 From: gfyoung Date: Mon, 24 Apr 2017 15:59:42 -0400 Subject: [PATCH 459/933] MAINT: Remove self.assertRaisesRegexp from testing (#16113) * MAINT: Remove self.assertRaisesRegexp from testing * MAINT: Do not add static assert methods to TestCase --- pandas/tests/groupby/test_groupby.py | 12 +++---- pandas/tests/groupby/test_transform.py | 8 ++--- pandas/tests/indexes/common.py | 6 ++-- .../indexes/datetimes/test_date_range.py | 32 +++++++++---------- .../tests/indexes/datetimes/test_datetime.py | 12 +++---- .../indexes/datetimes/test_partial_slicing.py | 16 +++++----- pandas/tests/indexes/period/test_indexing.py | 8 ++--- .../indexes/period/test_partial_slicing.py | 12 +++---- pandas/tests/indexes/period/test_period.py | 2 +- pandas/tests/indexes/period/test_tools.py | 6 ++-- pandas/tests/indexes/test_base.py | 4 +-- pandas/tests/indexes/test_interval.py | 4 +-- pandas/tests/indexes/timedeltas/test_ops.py | 2 +- .../timedeltas/test_partial_slicing.py | 12 +++---- pandas/tests/indexing/test_floats.py | 6 ++-- pandas/tests/indexing/test_indexing.py | 12 +++---- pandas/tests/indexing/test_scalar.py | 4 +-- pandas/tests/io/json/test_pandas.py | 2 +- pandas/tests/io/parser/compression.py | 12 +++---- pandas/tests/io/test_pytables.py | 2 +- pandas/tests/scalar/test_interval.py | 2 +- pandas/tests/scalar/test_period.py | 6 ++-- pandas/tests/scalar/test_period_asfreq.py | 18 +++++------ pandas/tests/scalar/test_timestamp.py | 2 +- pandas/tests/series/test_analytics.py | 4 +-- pandas/tests/series/test_api.py | 2 +- pandas/tests/test_panel.py | 4 +-- pandas/tests/test_strings.py | 6 ++-- pandas/tests/tools/test_numeric.py | 4 +-- pandas/util/testing.py | 6 ---- 30 files changed, 111 insertions(+), 117 deletions(-) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 2d673b2dac259..8ca8ddded3073 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -1874,16 +1874,16 @@ def test_groupby_args(self): def j(): frame.groupby() - self.assertRaisesRegexp(TypeError, - "You have to supply one of 'by' and 'level'", - j) + tm.assertRaisesRegexp(TypeError, + "You have to supply one of 'by' and 'level'", + j) def k(): frame.groupby(by=None, level=None) - self.assertRaisesRegexp(TypeError, - "You have to supply one of 'by' and 'level'", - k) + tm.assertRaisesRegexp(TypeError, + "You have to supply one of 'by' and 'level'", + k) def test_groupby_level_mapper(self): frame = self.mframe diff --git a/pandas/tests/groupby/test_transform.py b/pandas/tests/groupby/test_transform.py index 57ea8eb067a2b..4624d43df6128 100644 --- a/pandas/tests/groupby/test_transform.py +++ b/pandas/tests/groupby/test_transform.py @@ -556,7 +556,7 @@ def test_transform_with_non_scalar_group(self): df = pd.DataFrame(np.random.randint(1, 10, (4, 12)), columns=cols, index=['A', 'C', 'G', 'T']) - self.assertRaisesRegexp(ValueError, 'transform must return a scalar ' - 'value for each group.*', df.groupby - (axis=1, level=1).transform, - lambda z: z.div(z.sum(axis=1), axis=0)) + tm.assertRaisesRegexp(ValueError, 'transform must return a scalar ' + 'value for each group.*', df.groupby + (axis=1, level=1).transform, + lambda z: z.div(z.sum(axis=1), axis=0)) diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index fa1f31ded5f1c..746932b7c2975 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -875,7 +875,7 @@ def test_fillna(self): elif isinstance(index, MultiIndex): idx = index.copy() msg = "isnull is not defined for MultiIndex" - with self.assertRaisesRegexp(NotImplementedError, msg): + with tm.assertRaisesRegexp(NotImplementedError, msg): idx.fillna(idx[0]) else: idx = index.copy() @@ -884,7 +884,7 @@ def test_fillna(self): self.assertFalse(result is idx) msg = "'value' must be a scalar, passed: " - with self.assertRaisesRegexp(TypeError, msg): + with tm.assertRaisesRegexp(TypeError, msg): idx.fillna([idx[0]]) idx = index.copy() @@ -918,7 +918,7 @@ def test_nulls(self): elif isinstance(index, MultiIndex): idx = index.copy() msg = "isnull is not defined for MultiIndex" - with self.assertRaisesRegexp(NotImplementedError, msg): + with tm.assertRaisesRegexp(NotImplementedError, msg): idx.isnull() else: diff --git a/pandas/tests/indexes/datetimes/test_date_range.py b/pandas/tests/indexes/datetimes/test_date_range.py index 665dcc45050f3..6e49b1612b4c5 100644 --- a/pandas/tests/indexes/datetimes/test_date_range.py +++ b/pandas/tests/indexes/datetimes/test_date_range.py @@ -212,31 +212,31 @@ def test_naive_aware_conflicts(self): naive = bdate_range(START, END, freq=BDay(), tz=None) aware = bdate_range(START, END, freq=BDay(), tz="Asia/Hong_Kong") - self.assertRaisesRegexp(TypeError, "tz-naive.*tz-aware", - naive.join, aware) - self.assertRaisesRegexp(TypeError, "tz-naive.*tz-aware", - aware.join, naive) + tm.assertRaisesRegexp(TypeError, "tz-naive.*tz-aware", + naive.join, aware) + tm.assertRaisesRegexp(TypeError, "tz-naive.*tz-aware", + aware.join, naive) def test_cached_range(self): DatetimeIndex._cached_range(START, END, offset=BDay()) DatetimeIndex._cached_range(START, periods=20, offset=BDay()) DatetimeIndex._cached_range(end=START, periods=20, offset=BDay()) - self.assertRaisesRegexp(TypeError, "offset", - DatetimeIndex._cached_range, - START, END) + tm.assertRaisesRegexp(TypeError, "offset", + DatetimeIndex._cached_range, + START, END) - self.assertRaisesRegexp(TypeError, "specify period", - DatetimeIndex._cached_range, START, - offset=BDay()) + tm.assertRaisesRegexp(TypeError, "specify period", + DatetimeIndex._cached_range, START, + offset=BDay()) - self.assertRaisesRegexp(TypeError, "specify period", - DatetimeIndex._cached_range, end=END, - offset=BDay()) + tm.assertRaisesRegexp(TypeError, "specify period", + DatetimeIndex._cached_range, end=END, + offset=BDay()) - self.assertRaisesRegexp(TypeError, "start or end", - DatetimeIndex._cached_range, periods=20, - offset=BDay()) + tm.assertRaisesRegexp(TypeError, "start or end", + DatetimeIndex._cached_range, periods=20, + offset=BDay()) def test_cached_range_bug(self): rng = date_range('2010-09-01 05:00:00', periods=50, diff --git a/pandas/tests/indexes/datetimes/test_datetime.py b/pandas/tests/indexes/datetimes/test_datetime.py index 66dcb195611e1..abfc52728ef0f 100644 --- a/pandas/tests/indexes/datetimes/test_datetime.py +++ b/pandas/tests/indexes/datetimes/test_datetime.py @@ -755,12 +755,12 @@ def assert_slices_equivalent(l_slc, i_slc): def test_slice_with_zero_step_raises(self): ts = Series(np.arange(20), date_range('2014-01-01', periods=20, freq='MS')) - self.assertRaisesRegexp(ValueError, 'slice step cannot be zero', - lambda: ts[::0]) - self.assertRaisesRegexp(ValueError, 'slice step cannot be zero', - lambda: ts.loc[::0]) - self.assertRaisesRegexp(ValueError, 'slice step cannot be zero', - lambda: ts.loc[::0]) + tm.assertRaisesRegexp(ValueError, 'slice step cannot be zero', + lambda: ts[::0]) + tm.assertRaisesRegexp(ValueError, 'slice step cannot be zero', + lambda: ts.loc[::0]) + tm.assertRaisesRegexp(ValueError, 'slice step cannot be zero', + lambda: ts.loc[::0]) def test_slice_bounds_empty(self): # GH 14354 diff --git a/pandas/tests/indexes/datetimes/test_partial_slicing.py b/pandas/tests/indexes/datetimes/test_partial_slicing.py index 9c41e2d823238..352e066c6c90c 100644 --- a/pandas/tests/indexes/datetimes/test_partial_slicing.py +++ b/pandas/tests/indexes/datetimes/test_partial_slicing.py @@ -122,8 +122,8 @@ def test_partial_slice_second_precision(self): tm.assert_series_equal(s['2005-1-1 00:01:00'], s.iloc[10:]) self.assertEqual(s[Timestamp('2005-1-1 00:00:59.999990')], s.iloc[0]) - self.assertRaisesRegexp(KeyError, '2005-1-1 00:00:00', - lambda: s['2005-1-1 00:00:00']) + tm.assertRaisesRegexp(KeyError, '2005-1-1 00:00:00', + lambda: s['2005-1-1 00:00:00']) def test_partial_slicing_dataframe(self): # GH14856 @@ -249,14 +249,14 @@ def test_partial_slice_doesnt_require_monotonicity(self): timestamp = pd.Timestamp('2014-01-10') tm.assert_series_equal(nonmonotonic['2014-01-10':], expected) - self.assertRaisesRegexp(KeyError, - r"Timestamp\('2014-01-10 00:00:00'\)", - lambda: nonmonotonic[timestamp:]) + tm.assertRaisesRegexp(KeyError, + r"Timestamp\('2014-01-10 00:00:00'\)", + lambda: nonmonotonic[timestamp:]) tm.assert_series_equal(nonmonotonic.loc['2014-01-10':], expected) - self.assertRaisesRegexp(KeyError, - r"Timestamp\('2014-01-10 00:00:00'\)", - lambda: nonmonotonic.loc[timestamp:]) + tm.assertRaisesRegexp(KeyError, + r"Timestamp\('2014-01-10 00:00:00'\)", + lambda: nonmonotonic.loc[timestamp:]) def test_loc_datetime_length_one(self): # GH16071 diff --git a/pandas/tests/indexes/period/test_indexing.py b/pandas/tests/indexes/period/test_indexing.py index 79998d5a6ad7f..4d5fdd748219c 100644 --- a/pandas/tests/indexes/period/test_indexing.py +++ b/pandas/tests/indexes/period/test_indexing.py @@ -103,10 +103,10 @@ def test_getitem_partial(self): tm.assert_series_equal(exp, result) ts = ts[10:].append(ts[10:]) - self.assertRaisesRegexp(KeyError, - "left slice bound for non-unique " - "label: '2008'", - ts.__getitem__, slice('2008', '2009')) + tm.assertRaisesRegexp(KeyError, + "left slice bound for non-unique " + "label: '2008'", + ts.__getitem__, slice('2008', '2009')) def test_getitem_datetime(self): rng = period_range(start='2012-01-01', periods=10, freq='W-MON') diff --git a/pandas/tests/indexes/period/test_partial_slicing.py b/pandas/tests/indexes/period/test_partial_slicing.py index a8ac6a3e55fe5..b13e231db6c1c 100644 --- a/pandas/tests/indexes/period/test_partial_slicing.py +++ b/pandas/tests/indexes/period/test_partial_slicing.py @@ -42,12 +42,12 @@ def assert_slices_equivalent(l_slc, i_slc): def test_slice_with_zero_step_raises(self): ts = Series(np.arange(20), period_range('2014-01', periods=20, freq='M')) - self.assertRaisesRegexp(ValueError, 'slice step cannot be zero', - lambda: ts[::0]) - self.assertRaisesRegexp(ValueError, 'slice step cannot be zero', - lambda: ts.loc[::0]) - self.assertRaisesRegexp(ValueError, 'slice step cannot be zero', - lambda: ts.loc[::0]) + tm.assertRaisesRegexp(ValueError, 'slice step cannot be zero', + lambda: ts[::0]) + tm.assertRaisesRegexp(ValueError, 'slice step cannot be zero', + lambda: ts.loc[::0]) + tm.assertRaisesRegexp(ValueError, 'slice step cannot be zero', + lambda: ts.loc[::0]) def test_slice_keep_name(self): idx = period_range('20010101', periods=10, freq='D', name='bob') diff --git a/pandas/tests/indexes/period/test_period.py b/pandas/tests/indexes/period/test_period.py index 9f72c10316c90..6036d6c0fb19b 100644 --- a/pandas/tests/indexes/period/test_period.py +++ b/pandas/tests/indexes/period/test_period.py @@ -151,7 +151,7 @@ def test_get_indexer(self): np.array([0, -1, 1], dtype=np.intp)) msg = 'Input has different freq from PeriodIndex\\(freq=H\\)' - with self.assertRaisesRegexp(ValueError, msg): + with tm.assertRaisesRegexp(ValueError, msg): idx.get_indexer(target, 'nearest', tolerance='1 minute') tm.assert_numpy_array_equal(idx.get_indexer(target, 'nearest', diff --git a/pandas/tests/indexes/period/test_tools.py b/pandas/tests/indexes/period/test_tools.py index 49bc4a4cabe59..3887463cae827 100644 --- a/pandas/tests/indexes/period/test_tools.py +++ b/pandas/tests/indexes/period/test_tools.py @@ -386,7 +386,7 @@ def test_to_period_monthish(self): self.assertEqual(prng.freq, 'M') msg = pd.tseries.frequencies._INVALID_FREQ_ERROR - with self.assertRaisesRegexp(ValueError, msg): + with tm.assertRaisesRegexp(ValueError, msg): date_range('01-Jan-2012', periods=8, freq='EOM') def test_period_dt64_round_trip(self): @@ -439,11 +439,11 @@ def test_searchsorted(self): self.assertEqual(pidx.searchsorted(p2), 3) msg = "Input has different freq=H from PeriodIndex" - with self.assertRaisesRegexp(period.IncompatibleFrequency, msg): + with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): pidx.searchsorted(pd.Period('2014-01-01', freq='H')) msg = "Input has different freq=5D from PeriodIndex" - with self.assertRaisesRegexp(period.IncompatibleFrequency, msg): + with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): pidx.searchsorted(pd.Period('2014-01-01', freq='5D')) with tm.assert_produces_warning(FutureWarning): diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 2b6d69a90a55f..06f98527deefb 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -1450,8 +1450,8 @@ def test_str_attribute(self): MultiIndex.from_tuples([('foo', '1'), ('bar', '3')]), PeriodIndex(start='2000', end='2010', freq='A')] for idx in indices: - with self.assertRaisesRegexp(AttributeError, - 'only use .str accessor'): + with tm.assertRaisesRegexp(AttributeError, + 'only use .str accessor'): idx.str.repeat(2) idx = Index(['a b c', 'd e', 'f']) diff --git a/pandas/tests/indexes/test_interval.py b/pandas/tests/indexes/test_interval.py index c34d93cb21b0f..8e020846f5c50 100644 --- a/pandas/tests/indexes/test_interval.py +++ b/pandas/tests/indexes/test_interval.py @@ -602,9 +602,9 @@ def test_comparison(self): actual = self.index == self.index.left tm.assert_numpy_array_equal(actual, np.array([False, False])) - with self.assertRaisesRegexp(TypeError, 'unorderable types'): + with tm.assertRaisesRegexp(TypeError, 'unorderable types'): self.index > 0 - with self.assertRaisesRegexp(TypeError, 'unorderable types'): + with tm.assertRaisesRegexp(TypeError, 'unorderable types'): self.index <= 0 with pytest.raises(TypeError): self.index > np.arange(2) diff --git a/pandas/tests/indexes/timedeltas/test_ops.py b/pandas/tests/indexes/timedeltas/test_ops.py index 6da6653b752c9..da23f8698f4dc 100644 --- a/pandas/tests/indexes/timedeltas/test_ops.py +++ b/pandas/tests/indexes/timedeltas/test_ops.py @@ -115,7 +115,7 @@ def test_round(self): self.assertEqual(elt.round(freq='H'), expected_elt) msg = pd.tseries.frequencies._INVALID_FREQ_ERROR - with self.assertRaisesRegexp(ValueError, msg): + with tm.assertRaisesRegexp(ValueError, msg): td.round(freq='foo') with tm.assertRaisesRegexp(ValueError, msg): elt.round(freq='foo') diff --git a/pandas/tests/indexes/timedeltas/test_partial_slicing.py b/pandas/tests/indexes/timedeltas/test_partial_slicing.py index f7560c37e814b..ccb9a7f1803e7 100644 --- a/pandas/tests/indexes/timedeltas/test_partial_slicing.py +++ b/pandas/tests/indexes/timedeltas/test_partial_slicing.py @@ -75,9 +75,9 @@ def assert_slices_equivalent(l_slc, i_slc): def test_slice_with_zero_step_raises(self): ts = Series(np.arange(20), timedelta_range('0', periods=20, freq='H')) - self.assertRaisesRegexp(ValueError, 'slice step cannot be zero', - lambda: ts[::0]) - self.assertRaisesRegexp(ValueError, 'slice step cannot be zero', - lambda: ts.loc[::0]) - self.assertRaisesRegexp(ValueError, 'slice step cannot be zero', - lambda: ts.loc[::0]) + tm.assertRaisesRegexp(ValueError, 'slice step cannot be zero', + lambda: ts[::0]) + tm.assertRaisesRegexp(ValueError, 'slice step cannot be zero', + lambda: ts.loc[::0]) + tm.assertRaisesRegexp(ValueError, 'slice step cannot be zero', + lambda: ts.loc[::0]) diff --git a/pandas/tests/indexing/test_floats.py b/pandas/tests/indexing/test_floats.py index ef64c6e0475e9..02df4de010fbe 100644 --- a/pandas/tests/indexing/test_floats.py +++ b/pandas/tests/indexing/test_floats.py @@ -48,9 +48,9 @@ def test_scalar_error(self): def f(): s.iloc[3.0] - self.assertRaisesRegexp(TypeError, - 'cannot do positional indexing', - f) + tm.assertRaisesRegexp(TypeError, + 'cannot do positional indexing', + f) def f(): s.iloc[3.0] = 0 diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index fe934e7b2a7e0..f7a9c9fe59594 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -716,13 +716,13 @@ def assert_slices_equivalent(l_slc, i_slc): def test_slice_with_zero_step_raises(self): s = Series(np.arange(20), index=_mklbl('A', 20)) - self.assertRaisesRegexp(ValueError, 'slice step cannot be zero', - lambda: s[::0]) - self.assertRaisesRegexp(ValueError, 'slice step cannot be zero', - lambda: s.loc[::0]) + tm.assertRaisesRegexp(ValueError, 'slice step cannot be zero', + lambda: s[::0]) + tm.assertRaisesRegexp(ValueError, 'slice step cannot be zero', + lambda: s.loc[::0]) with catch_warnings(record=True): - self.assertRaisesRegexp(ValueError, 'slice step cannot be zero', - lambda: s.ix[::0]) + tm.assertRaisesRegexp(ValueError, 'slice step cannot be zero', + lambda: s.ix[::0]) def test_indexing_assignment_dict_already_exists(self): df = pd.DataFrame({'x': [1, 2, 6], diff --git a/pandas/tests/indexing/test_scalar.py b/pandas/tests/indexing/test_scalar.py index 3522974c18061..a583bf1c5ef16 100644 --- a/pandas/tests/indexing/test_scalar.py +++ b/pandas/tests/indexing/test_scalar.py @@ -154,8 +154,8 @@ def test_at_to_fail(self): df.columns = ['x', 'x', 'z'] # Check that we get the correct value in the KeyError - self.assertRaisesRegexp(KeyError, r"\['y'\] not in index", - lambda: df[['x', 'y', 'z']]) + tm.assertRaisesRegexp(KeyError, r"\['y'\] not in index", + lambda: df[['x', 'y', 'z']]) def test_at_with_tz(self): # gh-15822 diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index b152f7bb089de..a8ea76ff9459c 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -1036,7 +1036,7 @@ def test_to_jsonl(self): def test_latin_encoding(self): if compat.PY2: - self.assertRaisesRegexp( + tm.assertRaisesRegexp( TypeError, r'\[unicode\] is not implemented as a table column') return diff --git a/pandas/tests/io/parser/compression.py b/pandas/tests/io/parser/compression.py index c7e9401665cdf..26e216cfbcffa 100644 --- a/pandas/tests/io/parser/compression.py +++ b/pandas/tests/io/parser/compression.py @@ -45,18 +45,18 @@ def test_zip(self): tmp.writestr(file_name, data) tmp.close() - self.assertRaisesRegexp(ValueError, 'Multiple files', - self.read_csv, path, compression='zip') + tm.assertRaisesRegexp(ValueError, 'Multiple files', + self.read_csv, path, compression='zip') - self.assertRaisesRegexp(ValueError, 'Multiple files', - self.read_csv, path, compression='infer') + tm.assertRaisesRegexp(ValueError, 'Multiple files', + self.read_csv, path, compression='infer') with tm.ensure_clean() as path: tmp = zipfile.ZipFile(path, mode='w') tmp.close() - self.assertRaisesRegexp(ValueError, 'Zero files', - self.read_csv, path, compression='zip') + tm.assertRaisesRegexp(ValueError, 'Zero files', + self.read_csv, path, compression='zip') with tm.ensure_clean() as path: with open(path, 'wb') as f: diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index ea9911d8ee66c..7d2c7a97fce0a 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -971,7 +971,7 @@ def test_encoding(self): def test_latin_encoding(self): if compat.PY2: - self.assertRaisesRegexp( + tm.assertRaisesRegexp( TypeError, r'\[unicode\] is not implemented as a table column') return diff --git a/pandas/tests/scalar/test_interval.py b/pandas/tests/scalar/test_interval.py index bd73a7d173962..ea267fabd13ed 100644 --- a/pandas/tests/scalar/test_interval.py +++ b/pandas/tests/scalar/test_interval.py @@ -46,7 +46,7 @@ def test_equal(self): self.assertNotEqual(Interval(0, 1), 0) def test_comparison(self): - with self.assertRaisesRegexp(TypeError, 'unorderable types'): + with tm.assertRaisesRegexp(TypeError, 'unorderable types'): Interval(0, 1) < 2 self.assertTrue(Interval(0, 1) < Interval(1, 2)) diff --git a/pandas/tests/scalar/test_period.py b/pandas/tests/scalar/test_period.py index dff954b25f839..f8b103836e9a1 100644 --- a/pandas/tests/scalar/test_period.py +++ b/pandas/tests/scalar/test_period.py @@ -518,9 +518,9 @@ def test_period_deprecated_freq(self): msg = pd.tseries.frequencies._INVALID_FREQ_ERROR for exp, freqs in iteritems(cases): for freq in freqs: - with self.assertRaisesRegexp(ValueError, msg): + with tm.assertRaisesRegexp(ValueError, msg): Period('2016-03-01 09:00', freq=freq) - with self.assertRaisesRegexp(ValueError, msg): + with tm.assertRaisesRegexp(ValueError, msg): Period(ordinal=1, freq=freq) # check supported freq-aliases still works @@ -762,7 +762,7 @@ def test_properties_weekly_legacy(self): self.assertEqual(exp.days_in_month, 29) msg = pd.tseries.frequencies._INVALID_FREQ_ERROR - with self.assertRaisesRegexp(ValueError, msg): + with tm.assertRaisesRegexp(ValueError, msg): Period(freq='WK', year=2007, month=1, day=7) def test_properties_daily(self): diff --git a/pandas/tests/scalar/test_period_asfreq.py b/pandas/tests/scalar/test_period_asfreq.py index d311fef8a826d..611a1cc97e9c6 100644 --- a/pandas/tests/scalar/test_period_asfreq.py +++ b/pandas/tests/scalar/test_period_asfreq.py @@ -295,26 +295,26 @@ def test_conv_weekly(self): self.assertEqual(ival_W.asfreq('W'), ival_W) msg = pd.tseries.frequencies._INVALID_FREQ_ERROR - with self.assertRaisesRegexp(ValueError, msg): + with tm.assertRaisesRegexp(ValueError, msg): ival_W.asfreq('WK') def test_conv_weekly_legacy(self): # frequency conversion tests: from Weekly Frequency msg = pd.tseries.frequencies._INVALID_FREQ_ERROR - with self.assertRaisesRegexp(ValueError, msg): + with tm.assertRaisesRegexp(ValueError, msg): Period(freq='WK', year=2007, month=1, day=1) - with self.assertRaisesRegexp(ValueError, msg): + with tm.assertRaisesRegexp(ValueError, msg): Period(freq='WK-SAT', year=2007, month=1, day=6) - with self.assertRaisesRegexp(ValueError, msg): + with tm.assertRaisesRegexp(ValueError, msg): Period(freq='WK-FRI', year=2007, month=1, day=5) - with self.assertRaisesRegexp(ValueError, msg): + with tm.assertRaisesRegexp(ValueError, msg): Period(freq='WK-THU', year=2007, month=1, day=4) - with self.assertRaisesRegexp(ValueError, msg): + with tm.assertRaisesRegexp(ValueError, msg): Period(freq='WK-WED', year=2007, month=1, day=3) - with self.assertRaisesRegexp(ValueError, msg): + with tm.assertRaisesRegexp(ValueError, msg): Period(freq='WK-TUE', year=2007, month=1, day=2) - with self.assertRaisesRegexp(ValueError, msg): + with tm.assertRaisesRegexp(ValueError, msg): Period(freq='WK-MON', year=2007, month=1, day=1) def test_conv_business(self): @@ -712,7 +712,7 @@ def test_asfreq_MS(self): Period('2013-01', 'M')) msg = pd.tseries.frequencies._INVALID_FREQ_ERROR - with self.assertRaisesRegexp(ValueError, msg): + with tm.assertRaisesRegexp(ValueError, msg): initial.asfreq(freq="MS", how="S") with tm.assertRaisesRegexp(ValueError, msg): diff --git a/pandas/tests/scalar/test_timestamp.py b/pandas/tests/scalar/test_timestamp.py index 2b00ac68ee555..b7b1b4c198701 100644 --- a/pandas/tests/scalar/test_timestamp.py +++ b/pandas/tests/scalar/test_timestamp.py @@ -710,7 +710,7 @@ def _check_round(freq, expected): _check_round(freq, expected) msg = frequencies._INVALID_FREQ_ERROR - with self.assertRaisesRegexp(ValueError, msg): + with tm.assertRaisesRegexp(ValueError, msg): stamp.round('foo') def test_class_ops_pytz(self): diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index 63e291836b472..02748aee4628d 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -550,8 +550,8 @@ def testit(): # Unimplemented numeric_only parameter. if 'numeric_only' in compat.signature(f).args: - self.assertRaisesRegexp(NotImplementedError, name, f, - self.series, numeric_only=True) + tm.assertRaisesRegexp(NotImplementedError, name, f, + self.series, numeric_only=True) testit() diff --git a/pandas/tests/series/test_api.py b/pandas/tests/series/test_api.py index 341d4006c5fcc..f1a904498b727 100644 --- a/pandas/tests/series/test_api.py +++ b/pandas/tests/series/test_api.py @@ -341,7 +341,7 @@ def test_str_attribute(self): # str accessor only valid with string values s = Series(range(5)) - with self.assertRaisesRegexp(AttributeError, 'only use .str accessor'): + with tm.assertRaisesRegexp(AttributeError, 'only use .str accessor'): s.str.repeat(2) def test_empty_method(self): diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index 1d0788ae160dd..17bc06b5ee9c7 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -180,8 +180,8 @@ def wrapper(x): # Unimplemented numeric_only parameter. if 'numeric_only' in signature(f).args: - self.assertRaisesRegexp(NotImplementedError, name, f, - numeric_only=True) + tm.assertRaisesRegexp(NotImplementedError, name, f, + numeric_only=True) class SafeForSparse(object): diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index 761bfa6bfe29a..a818bf84b8e9b 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -2722,9 +2722,9 @@ def test_index_str_accessor_visibility(self): for values, tp in cases: idx = Index(values) message = 'Can only use .str accessor with string values' - with self.assertRaisesRegexp(AttributeError, message): + with tm.assertRaisesRegexp(AttributeError, message): Series(values).str - with self.assertRaisesRegexp(AttributeError, message): + with tm.assertRaisesRegexp(AttributeError, message): idx.str self.assertEqual(idx.inferred_type, tp) @@ -2732,7 +2732,7 @@ def test_index_str_accessor_visibility(self): idx = MultiIndex.from_tuples([('a', 'b'), ('a', 'b')]) self.assertEqual(idx.inferred_type, 'mixed') message = 'Can only use .str accessor with Index, not MultiIndex' - with self.assertRaisesRegexp(AttributeError, message): + with tm.assertRaisesRegexp(AttributeError, message): idx.str def test_str_accessor_no_new_attributes(self): diff --git a/pandas/tests/tools/test_numeric.py b/pandas/tests/tools/test_numeric.py index 5fa3f38f31ae1..1376101412112 100644 --- a/pandas/tests/tools/test_numeric.py +++ b/pandas/tests/tools/test_numeric.py @@ -253,7 +253,7 @@ def test_non_hashable(self): res = pd.to_numeric(s, errors='ignore') tm.assert_series_equal(res, pd.Series([[10.0, 2], 1.0, 'apple'])) - with self.assertRaisesRegexp(TypeError, "Invalid object type"): + with tm.assertRaisesRegexp(TypeError, "Invalid object type"): pd.to_numeric(s) def test_downcast(self): @@ -274,7 +274,7 @@ def test_downcast(self): smallest_float_dtype = float_32_char for data in (mixed_data, int_data, date_data): - with self.assertRaisesRegexp(ValueError, msg): + with tm.assertRaisesRegexp(ValueError, msg): pd.to_numeric(data, downcast=invalid_downcast) expected = np.array([1, 2, 3], dtype=np.int64) diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 08b298fabdd57..75d24938ed309 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -2665,12 +2665,6 @@ def use_numexpr(use, min_elements=expr._MIN_ELEMENTS): expr.set_use_numexpr(olduse) -# Also provide all assert_* functions in the TestCase class -for name, obj in inspect.getmembers(sys.modules[__name__]): - if inspect.isfunction(obj) and name.startswith('assert'): - setattr(TestCase, name, staticmethod(obj)) - - def test_parallel(num_threads=2, kwargs_list=None): """Decorator to run the same function multiple times in parallel. From 72248e759a2193bbe14ede2bb1534659ded7ccd1 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Mon, 24 Apr 2017 21:53:13 -0400 Subject: [PATCH 460/933] REGR: assure .unique of mixed strings does not stringize (#16108) closes #16107 --- pandas/core/algorithms.py | 6 +++++- pandas/tests/indexes/common.py | 2 +- pandas/tests/test_algos.py | 8 +++++++- 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 8437861bea19e..3df82b6c13259 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -161,7 +161,11 @@ def _ensure_arraylike(values): """ if not isinstance(values, (np.ndarray, ABCCategorical, ABCIndexClass, ABCSeries)): - values = np.array(values) + inferred = lib.infer_dtype(values) + if inferred in ['mixed', 'string', 'unicode']: + values = np.asarray(values, dtype=object) + else: + values = np.asarray(values) return values diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index 746932b7c2975..b62cab6cc8710 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -323,6 +323,7 @@ def test_get_unique_index(self): idx = ind[[0] * 5] idx_unique = ind[[0]] + # We test against `idx_unique`, so first we make sure it's unique # and doesn't contain nans. self.assertTrue(idx_unique.is_unique) @@ -336,7 +337,6 @@ def test_get_unique_index(self): tm.assert_index_equal(result, idx_unique) # nans: - if not ind._can_hold_na: continue diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index df267f2374051..01c18dc64f578 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -387,6 +387,12 @@ def test_uint64_overflow(self): exp = np.array([1, 2, 2**63], dtype=np.uint64) tm.assert_numpy_array_equal(algos.unique(s), exp) + def test_nan_in_object_array(self): + l = ['a', np.nan, 'c', 'c'] + result = pd.unique(l) + expected = np.array(['a', np.nan, 'c'], dtype=object) + tm.assert_numpy_array_equal(result, expected) + def test_categorical(self): # we are expecting to return in the order @@ -1378,8 +1384,8 @@ def test_no_mode(self): exp = Series([], dtype=np.float64) tm.assert_series_equal(algos.mode([]), exp) - # GH 15714 def test_mode_single(self): + # GH 15714 exp_single = [1] data_single = [1] From 008e9ec4d2df01dc7325d626a196c0128f6976be Mon Sep 17 00:00:00 2001 From: chris-b1 Date: Mon, 24 Apr 2017 20:55:28 -0500 Subject: [PATCH 461/933] PERF: maybe_convert_numeric speedup (#16104) --- pandas/_libs/src/inference.pyx | 52 +++++++++++++++++++++++----------- 1 file changed, 35 insertions(+), 17 deletions(-) diff --git a/pandas/_libs/src/inference.pyx b/pandas/_libs/src/inference.pyx index f7dbae4ab736e..d87a0641291b1 100644 --- a/pandas/_libs/src/inference.pyx +++ b/pandas/_libs/src/inference.pyx @@ -96,7 +96,7 @@ cdef class Seen(object): encountered when trying to perform type conversions. """ - cdef public: + cdef: bint int_ # seen_int bint bool_ # seen_bool bint null_ # seen_null @@ -185,7 +185,7 @@ cdef class Seen(object): self.null_ = 1 self.float_ = 1 - def saw_int(self, val): + cdef saw_int(self, object val): """ Set flags indicating that an integer value was encountered. @@ -196,7 +196,7 @@ cdef class Seen(object): """ self.int_ = 1 self.sint_ = self.sint_ or (val < 0) - self.uint_ = self.uint_ or (val > iINT64_MAX) + self.uint_ = self.uint_ or (val > oINT64_MAX) @property def numeric_(self): @@ -908,11 +908,15 @@ cpdef bint is_interval_array(ndarray[object] values): cdef extern from "parse_helper.h": inline int floatify(object, double *result, int *maybe_int) except -1 -cdef int64_t iINT64_MAX = INT64_MAX -cdef int64_t iINT64_MIN = INT64_MIN -cdef uint64_t iUINT64_MAX = UINT64_MAX +# constants that will be compared to potentially arbitrarily large +# python int +cdef object oINT64_MAX = INT64_MAX +cdef object oINT64_MIN = INT64_MIN +cdef object oUINT64_MAX = UINT64_MAX +@cython.boundscheck(False) +@cython.wraparound(False) def maybe_convert_numeric(ndarray[object] values, set na_values, bint convert_empty=True, bint coerce_numeric=False): """ @@ -943,6 +947,17 @@ def maybe_convert_numeric(ndarray[object] values, set na_values, ------- numeric_array : array of converted object values to numerical ones """ + # fastpath for ints - try to convert all based on first value + cdef object val = values[0] + if util.is_integer_object(val): + try: + maybe_ints = values.astype('i8') + if (maybe_ints == values).all(): + return maybe_ints + except (ValueError, OverflowError, TypeError): + pass + + # otherwise, iterate and do full infererence cdef: int status, maybe_int Py_ssize_t i, n = values.size @@ -952,7 +967,6 @@ def maybe_convert_numeric(ndarray[object] values, set na_values, ndarray[int64_t] ints = np.empty(n, dtype='i8') ndarray[uint64_t] uints = np.empty(n, dtype='u8') ndarray[uint8_t] bools = np.empty(n, dtype='u1') - object val float64_t fval for i in range(n): @@ -962,21 +976,23 @@ def maybe_convert_numeric(ndarray[object] values, set na_values, seen.saw_null() floats[i] = complexes[i] = nan elif util.is_float_object(val): - if val != val: + fval = val + if fval != fval: seen.null_ = True - floats[i] = complexes[i] = val + floats[i] = complexes[i] = fval seen.float_ = True elif util.is_integer_object(val): floats[i] = complexes[i] = val - as_int = int(val) - seen.saw_int(as_int) + val = int(val) + seen.saw_int(val) + + if val >= 0: + uints[i] = val - if as_int >= 0: - uints[i] = as_int - if as_int <= iINT64_MAX: - ints[i] = as_int + if val <= oINT64_MAX: + ints[i] = val elif util.is_bool_object(val): floats[i] = uints[i] = ints[i] = bools[i] = val seen.bool_ = True @@ -1017,12 +1033,12 @@ def maybe_convert_numeric(ndarray[object] values, set na_values, seen.saw_int(as_int) if not (seen.float_ or as_int in na_values): - if as_int < iINT64_MIN or as_int > iUINT64_MAX: + if as_int < oINT64_MIN or as_int > oUINT64_MAX: raise ValueError('Integer out of range.') if as_int >= 0: uints[i] = as_int - if as_int <= iINT64_MAX: + if as_int <= oINT64_MAX: ints[i] = as_int else: seen.float_ = True @@ -1053,6 +1069,8 @@ def maybe_convert_numeric(ndarray[object] values, set na_values, return ints +@cython.boundscheck(False) +@cython.wraparound(False) def maybe_convert_objects(ndarray[object] objects, bint try_float=0, bint safe=0, bint convert_datetime=0, bint convert_timedelta=0): From 4ca4fca839ced3483f37b3ae2434033e2721e1d5 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Mon, 24 Apr 2017 22:10:13 -0400 Subject: [PATCH 462/933] BUG: Compare names with None in DataFrame ctor Closes https://github.com/pandas-dev/pandas/issues/16114 Author: Tom Augspurger Closes #16117 from TomAugspurger/int_dtypes and squashes the following commits: 7f7a32e [Tom Augspurger] DOC: Add a release note 0663098 [Tom Augspurger] BUG: Compare names with None in DataFrame ctor --- pandas/core/frame.py | 2 +- pandas/tests/frame/test_constructors.py | 9 +++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index b3da897b97e5c..983a6ef3e045a 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -296,7 +296,7 @@ def __init__(self, data=None, index=None, columns=None, dtype=None, if columns is None: columns = data_columns mgr = self._init_dict(data, index, columns, dtype=dtype) - elif getattr(data, 'name', None): + elif getattr(data, 'name', None) is not None: mgr = self._init_dict({data.name: data}, index, columns, dtype=dtype) else: diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 737d9f8e50477..f6cdb37a2477a 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -1888,6 +1888,15 @@ def test_from_records_len0_with_columns(self): self.assertEqual(len(result), 0) self.assertEqual(result.index.name, 'foo') + def test_to_frame_with_falsey_names(self): + # GH 16114 + result = Series(name=0).to_frame().dtypes + expected = Series({0: np.float64}) + tm.assert_series_equal(result, expected) + + result = DataFrame(Series(name=0)).dtypes + tm.assert_series_equal(result, expected) + class TestDataFrameConstructorWithDatetimeTZ(tm.TestCase, TestData): From 6ad32d23c52416b4e6195c873a23be5065b85228 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 25 Apr 2017 03:56:34 -0400 Subject: [PATCH 463/933] REGR: bug in moments when using bottleneck (#16124) closes #16116 --- pandas/core/nanops.py | 3 +++ pandas/tests/frame/test_analytics.py | 17 +++++++++++++++++ 2 files changed, 20 insertions(+) diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 5ce302967de24..e9be43b184537 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -381,6 +381,7 @@ def nanstd(values, axis=None, skipna=True, ddof=1): @bottleneck_switch(ddof=1) def nanvar(values, axis=None, skipna=True, ddof=1): + values = _values_from_object(values) dtype = values.dtype mask = isnull(values) if is_any_int_dtype(values): @@ -489,6 +490,7 @@ def nanskew(values, axis=None, skipna=True): """ + values = _values_from_object(values) mask = isnull(values) if not is_float_dtype(values.dtype): values = values.astype('f8') @@ -543,6 +545,7 @@ def nankurt(values, axis=None, skipna=True): central moment. """ + values = _values_from_object(values) mask = isnull(values) if not is_float_dtype(values.dtype): values = values.astype('f8') diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index cd98460d8609c..0941f0af6bec5 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -591,6 +591,23 @@ def test_numeric_only_flag(self): pytest.raises(TypeError, lambda: getattr(df2, meth)( axis=1, numeric_only=False)) + def test_mixed_ops(self): + # GH 16116 + df = DataFrame({'int': [1, 2, 3, 4], + 'float': [1., 2., 3., 4.], + 'str': ['a', 'b', 'c', 'd']}) + + for op in ['mean', 'std', 'var', 'skew', + 'kurt', 'sem']: + result = getattr(df, op)() + assert len(result) == 2 + + if nanops._USE_BOTTLENECK: + nanops._USE_BOTTLENECK = False + result = getattr(df, op)() + assert len(result) == 2 + nanops._USE_BOTTLENECK = True + def test_cumsum(self): self.tsframe.loc[5:10, 0] = nan self.tsframe.loc[10:15, 1] = nan From 8ab8ad027a98b3219d9bbdb1cc39f7e577deed55 Mon Sep 17 00:00:00 2001 From: Pietro Battiston Date: Tue, 25 Apr 2017 12:35:14 +0200 Subject: [PATCH 464/933] BUG: fix error for reset_index() with index.name in MultiIndex columns (#16126) closes #16120 --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/core/internals.py | 2 +- pandas/tests/test_multilevel.py | 16 ++++++++++++++++ 3 files changed, 18 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 945922b5f9ba8..c07760a94d3f1 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -1623,6 +1623,7 @@ Indexing - Bug in ``Series.iloc`` where a ``Categorical`` object for list-like indexes input was returned, where a ``Series`` was expected. (:issue:`14580`) - Bug in ``DataFrame.isin`` comparing datetimelike to empty frame (:issue:`15473`) - Bug in ``.reset_index()`` when an all ``NaN`` level of a ``MultiIndex`` would fail (:issue:`6322`) +- Bug in ``.reset_index()`` when raising error for index name already present in ``MultiIndex`` columns (:issue:`16120`) - Bug in creating a ``MultiIndex`` with tuples and not passing a list of names; this will now raise ``ValueError`` (:issue:`15110`) - Bug in the HTML display with with a ``MultiIndex`` and truncation (:issue:`14882`) - Bug in the display of ``.info()`` where a qualifier (+) would always be displayed with a ``MultiIndex`` that contains only non-strings (:issue:`15245`) diff --git a/pandas/core/internals.py b/pandas/core/internals.py index f265f5f438280..840206977cf30 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -3807,7 +3807,7 @@ def insert(self, loc, item, value, allow_duplicates=False): """ if not allow_duplicates and item in self.items: # Should this be a different kind of error?? - raise ValueError('cannot insert %s, already exists' % item) + raise ValueError('cannot insert {}, already exists'.format(item)) if not isinstance(loc, int): raise TypeError("loc must be int") diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index 173ac97691b3b..e81e6e2d987c6 100755 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -2235,6 +2235,22 @@ def test_reset_index_period(self): }, columns=['month', 'feature', 'a']) tm.assert_frame_equal(df.reset_index(), expected) + def test_reset_index_multiindex_columns(self): + levels = [['A', ''], ['B', 'b']] + df = pd.DataFrame([[0, 2], [1, 3]], + columns=pd.MultiIndex.from_tuples(levels)) + expected = df.copy() + df.index.name = 'A' + result = df[['B']].reset_index() + tm.assert_frame_equal(result, expected) + + # GH 16120 + # already existing column + with tm.assertRaisesRegexp(ValueError, + ("cannot insert \('A', ''\), " + "already exists")): + df.reset_index() + def test_set_index_period(self): # GH 6631 df = DataFrame(np.random.random(6)) From 8d122e633a6b08e3f9dfad80050da42d789210d4 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 25 Apr 2017 07:16:33 -0400 Subject: [PATCH 465/933] PERF: use StringHashtable in data algos (#16128) xref #16107 --- pandas/core/algorithms.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 3df82b6c13259..a745ec616eda8 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -216,10 +216,7 @@ def _get_data_algo(values, func_map): # its cheaper to use a String Hash Table than Object if lib.infer_dtype(values) in ['string']: - try: - f = func_map['string'] - except KeyError: - pass + ndtype = 'string' f = func_map.get(ndtype, func_map['object']) From 186957ef153adaba367ffcd5cb0162488416b59f Mon Sep 17 00:00:00 2001 From: chris-b1 Date: Tue, 25 Apr 2017 17:55:22 -0500 Subject: [PATCH 466/933] BUG: sefault in concat of CategoricalIndex (#16133) * BUG: sefault in concat of cat-idx * lint --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/core/indexes/category.py | 7 +++++-- pandas/tests/reshape/test_concat.py | 21 +++++++++++++++++++++ 3 files changed, 27 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index c07760a94d3f1..c9c22de9141fe 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -1629,6 +1629,7 @@ Indexing - Bug in the display of ``.info()`` where a qualifier (+) would always be displayed with a ``MultiIndex`` that contains only non-strings (:issue:`15245`) - Bug in ``pd.concat()`` where the names of ``MultiIndex`` of resulting ``DataFrame`` are not handled correctly when ``None`` is presented in the names of ``MultiIndex`` of input ``DataFrame`` (:issue:`15787`) - Bug in ``DataFrame.sort_index()`` and ``Series.sort_index()`` where ``na_position`` doesn't work with a ``MultiIndex`` (:issue:`14784`, :issue:`16604`) + - Bug in in ``pd.concat()`` when combining objects with a ``CategoricalIndex`` (:issue:`16111`) I/O ^^^ diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 257ca86947f2b..5f38b19742f71 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -12,6 +12,7 @@ is_scalar) from pandas.core.common import _asarray_tuplesafe from pandas.core.dtypes.missing import array_equivalent +from pandas.core.algorithms import take_1d from pandas.util.decorators import Appender, cache_readonly @@ -470,8 +471,10 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None): codes = target.codes else: if isinstance(target, CategoricalIndex): - target = target.categories - codes = self.categories.get_indexer(target) + code_indexer = self.categories.get_indexer(target.categories) + codes = take_1d(code_indexer, target.codes, fill_value=-1) + else: + codes = self.categories.get_indexer(target) indexer, _ = self._engine.get_indexer_non_unique(codes) diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py index b877a9d181848..cc71cf6b1a4dc 100644 --- a/pandas/tests/reshape/test_concat.py +++ b/pandas/tests/reshape/test_concat.py @@ -1928,6 +1928,27 @@ def test_concat_multiindex_dfs_with_deepcopy(self): result_no_copy = pd.concat(example_dict, names=['testname']) tm.assert_frame_equal(result_no_copy, expected) + def test_concat_categoricalindex(self): + # GH 16111, categories that aren't lexsorted + categories = [9, 0, 1, 2, 3] + + a = pd.Series(1, index=pd.CategoricalIndex([9, 0], + categories=categories)) + b = pd.Series(2, index=pd.CategoricalIndex([0, 1], + categories=categories)) + c = pd.Series(3, index=pd.CategoricalIndex([1, 2], + categories=categories)) + + result = pd.concat([a, b, c], axis=1) + + exp_idx = pd.CategoricalIndex([0, 1, 2, 9]) + exp = pd.DataFrame({0: [1, np.nan, np.nan, 1], + 1: [2, 2, np.nan, np.nan], + 2: [np.nan, 3, 3, np.nan]}, + columns=[0, 1, 2], + index=exp_idx) + tm.assert_frame_equal(result, exp) + @pytest.mark.parametrize('pdt', [pd.Series, pd.DataFrame, pd.Panel]) @pytest.mark.parametrize('dt', np.sctypes['float']) From d50b1620ca57a37c0c005ae946b9fc45020e2155 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 25 Apr 2017 18:55:19 -0500 Subject: [PATCH 467/933] BUG/API: Catch exceptions in _ipython_display_ (#16132) We raise an UnserializeableWarning when we fail to generate the table schema and do not publish a `table_schema` repr. --- pandas/core/generic.py | 8 +++++++- pandas/errors/__init__.py | 8 ++++++++ pandas/tests/io/formats/test_printing.py | 12 ++++++++++-- 3 files changed, 25 insertions(+), 3 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 74d3053821e39..f078cbb435d25 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -43,6 +43,7 @@ import pandas.core.algorithms as algos import pandas.core.common as com import pandas.core.missing as missing +from pandas.errors import UnserializableWarning from pandas.io.formats.printing import pprint_thing from pandas.io.formats.format import format_percentiles from pandas.tseries.frequencies import to_offset @@ -138,7 +139,12 @@ def _ipython_display_(self): # Series doesn't define _repr_html_ or _repr_latex_ latex = self._repr_latex_() if hasattr(self, '_repr_latex_') else None html = self._repr_html_() if hasattr(self, '_repr_html_') else None - table_schema = self._repr_table_schema_() + try: + table_schema = self._repr_table_schema_() + except Exception as e: + warnings.warn("Cannot create table schema representation. " + "{}".format(e), UnserializableWarning) + table_schema = None # We need the inital newline since we aren't going through the # usual __repr__. See # https://github.com/pandas-dev/pandas/pull/14904#issuecomment-277829277 diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py index f6719e7be421b..8540d8776fbaa 100644 --- a/pandas/errors/__init__.py +++ b/pandas/errors/__init__.py @@ -55,3 +55,11 @@ class ParserWarning(Warning): one specified by the user due to lack of support or functionality for parsing particular attributes of a CSV file with the requsted engine """ + + +class UnserializableWarning(Warning): + """ + Warnng that is raised when a DataFrame cannot be serialzed. + + .. versionadded:: 0.20.0 + """ diff --git a/pandas/tests/io/formats/test_printing.py b/pandas/tests/io/formats/test_printing.py index f9d911f523699..63cd08545610f 100644 --- a/pandas/tests/io/formats/test_printing.py +++ b/pandas/tests/io/formats/test_printing.py @@ -5,6 +5,7 @@ import pandas as pd from pandas import compat +from pandas.errors import UnserializableWarning import pandas.io.formats.printing as printing import pandas.io.formats.format as fmt import pandas.util.testing as tm @@ -177,9 +178,16 @@ def test_publishes_not_implemented(self): make_patch = self.mock.patch('IPython.display.display') opt = pd.option_context('display.html.table_schema', True) - with opt, make_patch as mock_display: # noqa - with pytest.raises(NotImplementedError): + with opt, make_patch as mock_display: + with pytest.warns(UnserializableWarning) as record: df._ipython_display_() + args, _ = mock_display.call_args + arg, = args # just one argument + + expected = {'text/plain', 'text/html'} + assert set(arg.keys()) == expected + assert "orient='table' is not supported for MultiIndex" in ( + record[-1].message.args[0]) def test_config_on(self): df = pd.DataFrame({"A": [1, 2]}) From b555c432fd874968a8745fff07158947a18482c2 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 26 Apr 2017 11:12:02 +0200 Subject: [PATCH 468/933] DOC: fix some typos (#16144) --- doc/source/whatsnew/v0.20.0.txt | 13 +++++++------ pandas/errors/__init__.py | 2 +- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index c9c22de9141fe..0b66b90afec67 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -1241,7 +1241,7 @@ If indicated, a deprecation warning will be issued if you reference theses modul Some new subpackages are created with public functionality that is not directly exposed in the top-level namespace: ``pandas.errors``, ``pandas.plotting`` and ``pandas.testing`` (more details below). Together with ``pandas.api.types`` and -certain functions in the ``pandas.io`` and ``pandas.tseries`` submodules, +certain functions in the ``pandas.io`` and ``pandas.tseries`` submodules, these are now the public subpackages. @@ -1276,7 +1276,7 @@ The following are now part of this API: ``pandas.testing`` ^^^^^^^^^^^^^^^^^^ -We are adding a standard module that exposes the public testing functions in ``pandas.testing`` (:issue:`9895`. Those functions can be used when writing tests for functionality using pandas objects. +We are adding a standard module that exposes the public testing functions in ``pandas.testing`` (:issue:`9895`). Those functions can be used when writing tests for functionality using pandas objects. The following testing functions are now part of this API: @@ -1295,13 +1295,14 @@ A new public ``pandas.plotting`` module has been added that holds plotting funct .. _whatsnew_0200.privacy.development: -Other Developement Changes -^^^^^^^^^^^^^^^^^^^^^^^^^^ +Other Development Changes +^^^^^^^^^^^^^^^^^^^^^^^^^ - Building pandas for development now requires ``cython >= 0.23`` (:issue:`14831`) - Require at least 0.23 version of cython to avoid problems with character encodings (:issue:`14699`) -- Reorganization of timeseries tests (:issue:`14854`) -- Reorganization of date converter tests (:issue:`15707`) +- Switched the test framework to use `pytest `__ (:issue:`13097`) +- Reorganization of tests directory layout (:issue:`14854`, :issue:`15707`). + .. _whatsnew_0200.deprecations: diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py index 8540d8776fbaa..9b6c9c5be319c 100644 --- a/pandas/errors/__init__.py +++ b/pandas/errors/__init__.py @@ -59,7 +59,7 @@ class ParserWarning(Warning): class UnserializableWarning(Warning): """ - Warnng that is raised when a DataFrame cannot be serialzed. + Warning that is raised when a DataFrame cannot be serialized. .. versionadded:: 0.20.0 """ From b8d98618d3dd9a2f912e8ed67b835836406dcb7c Mon Sep 17 00:00:00 2001 From: chris-b1 Date: Wed, 26 Apr 2017 08:20:35 -0500 Subject: [PATCH 469/933] MAINT: asv with py3 on windows (#16139) --- asv_bench/asv.conf.json | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/asv_bench/asv.conf.json b/asv_bench/asv.conf.json index 62f1c090a7462..59c05400d06b0 100644 --- a/asv_bench/asv.conf.json +++ b/asv_bench/asv.conf.json @@ -46,12 +46,14 @@ "numexpr": [], "pytables": [null, ""], // platform dependent, see excludes below "tables": [null, ""], - "libpython": [null, ""], "openpyxl": [], "xlsxwriter": [], "xlrd": [], "xlwt": [], "pytest": [], + // If using Windows with python 2.7 and want to build using the + // mingw toolchain (rather than MSVC), uncomment the following line. + // "libpython": [], }, // Combinations of libraries/python versions can be excluded/included @@ -80,10 +82,6 @@ {"environment_type": "conda", "pytables": null}, {"environment_type": "(?!conda).*", "tables": null}, {"environment_type": "(?!conda).*", "pytables": ""}, - // On conda&win32, install libpython - {"sys_platform": "(?!win32).*", "libpython": ""}, - {"environment_type": "conda", "sys_platform": "win32", "libpython": null}, - {"environment_type": "(?!conda).*", "libpython": ""} ], "include": [], From 61ca02274b898cc2dd15f305a0b885e2f6348dd8 Mon Sep 17 00:00:00 2001 From: Akash Tandon Date: Wed, 26 Apr 2017 09:30:55 -0400 Subject: [PATCH 470/933] BUG: raise for invalid dtypes per issue #15520 closes #15520 Author: Akash Tandon Author: root Author: analyticalmonk Author: Akash Tandon Closes #16047 from analyticalmonk/patch_for_15520 and squashes the following commits: 3646eb6 [analyticalmonk] TST: check for invalid dtype for Series constructor per GH15520 73d980a [Akash Tandon] Merge branch 'master' into patch_for_15520 b3c2fbb [root] BUG: Added 'O' to pandas_dtype's valid list c3699fb [root] DOC: added whatsnew entry for PR#16047 addressing GH15520 fbed5a6 [Akash Tandon] TST: Added list to invalid dtype ad9f345 [Akash Tandon] CLN: refactored code related to issue GH15520 a358181 [Akash Tandon] BUG: Added numpy.dtype_ to valid pandas_dtype() type list 3eaa432 [Akash Tandon] TST: Added numpy.object_ dtype to valid pandas_dtype list f858726 [Akash Tandon] style fix d4971cd [Akash Tandon] BUG: pandas_dtype() to raise error for invalid dtype per GH15520 ee0030f [Akash Tandon] TST: added more test-cases for pandas_dtype() test 3700259 [Akash Tandon] CLN: Replace _coerce_to_dtype() with pandas_dtype() c10e1d4 [Akash Tandon] TST: maintain list containing dtypes in TestPandasDtype fecba12 [Akash Tandon] BUG: Raise when invalid dtype passed to pandas_dtype 99fb660 [Akash Tandon] TST: wrote test representing bug fix result for #15520 --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/core/dtypes/cast.py | 4 ++-- pandas/core/dtypes/common.py | 17 ++++++++++++++++- pandas/core/generic.py | 7 ++++--- pandas/core/series.py | 7 ++++--- pandas/tests/dtypes/test_common.py | 15 +++++++++++++++ pandas/tests/series/test_constructors.py | 8 ++++++++ pandas/tests/test_strings.py | 16 +++++++--------- 8 files changed, 57 insertions(+), 18 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 0b66b90afec67..3a48c94830597 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -1605,6 +1605,7 @@ Conversion - Bug in the return type of ``pd.unique`` on a ``Categorical``, which was returning an ndarray and not a ``Categorical`` (:issue:`15903`) - Bug in ``Index.to_series()`` where the index was not copied (and so mutating later would change the original), (:issue:`15949`) - Bug in indexing with partial string indexing with a len-1 DataFrame (:issue:`16071`) +- Bug in ``Series`` construction where passing invalid dtype didn't raise an error. (:issue:`15520`) Indexing ^^^^^^^^ diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index a5e12e8262579..19d3792f73de7 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -19,7 +19,7 @@ is_datetime_or_timedelta_dtype, is_bool_dtype, is_scalar, _string_dtypes, - _coerce_to_dtype, + pandas_dtype, _ensure_int8, _ensure_int16, _ensure_int32, _ensure_int64, _NS_DTYPE, _TD_DTYPE, _INT64_DTYPE, @@ -576,7 +576,7 @@ def astype_nansafe(arr, dtype, copy=True): """ return a view if copy is False, but need to be very careful as the result shape could change! """ if not isinstance(dtype, np.dtype): - dtype = _coerce_to_dtype(dtype) + dtype = pandas_dtype(dtype) if issubclass(dtype.type, text_type): # in Py3 that's str, in Py2 that's unicode diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 156e43fc4e5fb..ba822071a3b72 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -788,4 +788,19 @@ def pandas_dtype(dtype): elif isinstance(dtype, ExtensionDtype): return dtype - return np.dtype(dtype) + try: + npdtype = np.dtype(dtype) + except (TypeError, ValueError): + raise + + # Any invalid dtype (such as pd.Timestamp) should raise an error. + # np.dtype(invalid_type).kind = 0 for such objects. However, this will + # also catch some valid dtypes such as object, np.object_ and 'object' + # which we safeguard against by catching them earlier and returning + # np.dtype(valid_dtype) before this condition is evaluated. + if dtype in [object, np.object_, 'object', 'O']: + return npdtype + elif npdtype.kind == 'O': + raise TypeError('dtype {0} not understood'.format(dtype)) + + return npdtype diff --git a/pandas/core/generic.py b/pandas/core/generic.py index f078cbb435d25..70862015dff5b 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -11,7 +11,6 @@ from pandas._libs import tslib, lib from pandas.core.dtypes.common import ( - _coerce_to_dtype, _ensure_int64, needs_i8_conversion, is_scalar, @@ -23,7 +22,8 @@ is_datetime64tz_dtype, is_list_like, is_dict_like, - is_re_compilable) + is_re_compilable, + pandas_dtype) from pandas.core.dtypes.cast import maybe_promote, maybe_upcast_putmask from pandas.core.dtypes.missing import isnull, notnull from pandas.core.dtypes.generic import ABCSeries, ABCPanel @@ -170,13 +170,14 @@ def _validate_dtype(self, dtype): """ validate the passed dtype """ if dtype is not None: - dtype = _coerce_to_dtype(dtype) + dtype = pandas_dtype(dtype) # a compound dtype if dtype.kind == 'V': raise NotImplementedError("compound dtypes are not implemented" "in the {0} constructor" .format(self.__class__.__name__)) + return dtype def _init_mgr(self, mgr, axes=None, dtype=None, copy=False): diff --git a/pandas/core/series.py b/pandas/core/series.py index d4511fb58b2f3..f03091d7e6a66 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -14,7 +14,7 @@ import numpy.ma as ma from pandas.core.dtypes.common import ( - _coerce_to_dtype, is_categorical_dtype, + is_categorical_dtype, is_bool, is_integer, is_integer_dtype, is_float_dtype, @@ -28,7 +28,8 @@ is_dict_like, is_scalar, _is_unorderable_exception, - _ensure_platform_int) + _ensure_platform_int, + pandas_dtype) from pandas.core.dtypes.generic import ABCSparseArray, ABCDataFrame from pandas.core.dtypes.cast import ( maybe_upcast, infer_dtype_from_scalar, @@ -2872,7 +2873,7 @@ def _sanitize_array(data, index, dtype=None, copy=False, """ if dtype is not None: - dtype = _coerce_to_dtype(dtype) + dtype = pandas_dtype(dtype) if isinstance(data, ma.MaskedArray): mask = ma.getmaskarray(data) diff --git a/pandas/tests/dtypes/test_common.py b/pandas/tests/dtypes/test_common.py index 86233c5d2b192..c4ef5e48b4db9 100644 --- a/pandas/tests/dtypes/test_common.py +++ b/pandas/tests/dtypes/test_common.py @@ -2,6 +2,7 @@ import pytest import numpy as np +import pandas as pd from pandas.core.dtypes.dtypes import ( DatetimeTZDtype, PeriodDtype, CategoricalDtype) @@ -13,6 +14,20 @@ class TestPandasDtype(tm.TestCase): + # Passing invalid dtype, both as a string or object, must raise TypeError + # Per issue GH15520 + def test_invalid_dtype_error(self): + msg = 'not understood' + invalid_list = [pd.Timestamp, 'pd.Timestamp', list] + for dtype in invalid_list: + with tm.assertRaisesRegexp(TypeError, msg): + pandas_dtype(dtype) + + valid_list = [object, 'float64', np.object_, np.dtype('object'), 'O', + np.float64, float, np.dtype('float64')] + for dtype in valid_list: + pandas_dtype(dtype) + def test_numpy_dtype(self): for dtype in ['M8[ns]', 'm8[ns]', 'object', 'float64', 'int64']: self.assertEqual(pandas_dtype(dtype), np.dtype(dtype)) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 57cce1d1cf199..74c2544d900ea 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -30,6 +30,14 @@ class TestSeriesConstructors(TestData, tm.TestCase): + def test_invalid_dtype(self): + # GH15520 + msg = 'not understood' + invalid_list = [pd.Timestamp, 'pd.Timestamp', list] + for dtype in invalid_list: + with tm.assertRaisesRegexp(TypeError, msg): + Series([], name='time', dtype=dtype) + def test_scalar_conversion(self): # Pass in scalar is disabled diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index a818bf84b8e9b..6733fbdc3b9c6 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -1208,10 +1208,9 @@ def test_extractall_same_as_extract_subject_index(self): tm.assert_frame_equal(extract_one_noname, no_match_index) def test_empty_str_methods(self): - empty_str = empty = Series(dtype=str) + empty_str = empty = Series(dtype=object) empty_int = Series(dtype=int) empty_bool = Series(dtype=bool) - empty_list = Series(dtype=list) empty_bytes = Series(dtype=object) # GH7241 @@ -1242,25 +1241,24 @@ def test_empty_str_methods(self): DataFrame(columns=[0, 1], dtype=str), empty.str.extract('()()', expand=False)) tm.assert_frame_equal(DataFrame(dtype=str), empty.str.get_dummies()) - tm.assert_series_equal(empty_str, empty_list.str.join('')) + tm.assert_series_equal(empty_str, empty_str.str.join('')) tm.assert_series_equal(empty_int, empty.str.len()) - tm.assert_series_equal(empty_list, empty_list.str.findall('a')) + tm.assert_series_equal(empty_str, empty_str.str.findall('a')) tm.assert_series_equal(empty_int, empty.str.find('a')) tm.assert_series_equal(empty_int, empty.str.rfind('a')) tm.assert_series_equal(empty_str, empty.str.pad(42)) tm.assert_series_equal(empty_str, empty.str.center(42)) - tm.assert_series_equal(empty_list, empty.str.split('a')) - tm.assert_series_equal(empty_list, empty.str.rsplit('a')) - tm.assert_series_equal(empty_list, + tm.assert_series_equal(empty_str, empty.str.split('a')) + tm.assert_series_equal(empty_str, empty.str.rsplit('a')) + tm.assert_series_equal(empty_str, empty.str.partition('a', expand=False)) - tm.assert_series_equal(empty_list, + tm.assert_series_equal(empty_str, empty.str.rpartition('a', expand=False)) tm.assert_series_equal(empty_str, empty.str.slice(stop=1)) tm.assert_series_equal(empty_str, empty.str.slice(step=1)) tm.assert_series_equal(empty_str, empty.str.strip()) tm.assert_series_equal(empty_str, empty.str.lstrip()) tm.assert_series_equal(empty_str, empty.str.rstrip()) - tm.assert_series_equal(empty_str, empty.str.rstrip()) tm.assert_series_equal(empty_str, empty.str.wrap(42)) tm.assert_series_equal(empty_str, empty.str.get(0)) tm.assert_series_equal(empty_str, empty_bytes.str.decode('ascii')) From a94086880c12124e135fd8f12ebc494bcd0ef0d0 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 26 Apr 2017 11:15:03 -0400 Subject: [PATCH 471/933] REGR: Bug in indexing with a CategoricalIndex (#16123) * REGR: Bug in indexing with a CategoricalIndex closes #16115 * some cleaning * BUG: scalar getitem with a CI closes #16131 --- doc/source/whatsnew/v0.20.0.txt | 3 +- pandas/core/indexes/category.py | 21 +++- pandas/tests/indexing/test_categorical.py | 137 ++++++++++++++-------- pandas/tests/reshape/test_reshape.py | 2 +- 4 files changed, 108 insertions(+), 55 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 3a48c94830597..025ac7673622b 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -1631,7 +1631,8 @@ Indexing - Bug in the display of ``.info()`` where a qualifier (+) would always be displayed with a ``MultiIndex`` that contains only non-strings (:issue:`15245`) - Bug in ``pd.concat()`` where the names of ``MultiIndex`` of resulting ``DataFrame`` are not handled correctly when ``None`` is presented in the names of ``MultiIndex`` of input ``DataFrame`` (:issue:`15787`) - Bug in ``DataFrame.sort_index()`` and ``Series.sort_index()`` where ``na_position`` doesn't work with a ``MultiIndex`` (:issue:`14784`, :issue:`16604`) - - Bug in in ``pd.concat()`` when combining objects with a ``CategoricalIndex`` (:issue:`16111`) +- Bug in in ``pd.concat()`` when combining objects with a ``CategoricalIndex`` (:issue:`16111`) +- Bug in indexing with a scalar and a ``CategoricalIndex`` (:issue:`16123`) I/O ^^^ diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 5f38b19742f71..760db4ba20675 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -10,7 +10,8 @@ is_list_like, is_interval_dtype, is_scalar) -from pandas.core.common import _asarray_tuplesafe +from pandas.core.common import (_asarray_tuplesafe, + _values_from_object) from pandas.core.dtypes.missing import array_equivalent from pandas.core.algorithms import take_1d @@ -353,6 +354,22 @@ def get_loc(self, key, method=None): raise KeyError(key) return self._engine.get_loc(codes) + def get_value(self, series, key): + """ + Fast lookup of value from 1-dimensional ndarray. Only use this if you + know what you're doing + """ + try: + k = _values_from_object(key) + k = self._convert_scalar_indexer(k, kind='getitem') + indexer = self.get_loc(k) + return series.iloc[indexer] + except (KeyError, TypeError): + pass + + # we might be a positional inexer + return super(CategoricalIndex, self).get_value(series, key) + def _can_reindex(self, indexer): """ always allow reindexing """ pass @@ -507,7 +524,7 @@ def _convert_list_indexer(self, keyarr, kind=None): indexer = self.categories._convert_list_indexer(keyarr, kind=kind) return Index(self.codes).get_indexer_for(indexer) - indexer = self.categories.get_indexer(keyarr) + indexer = self.categories.get_indexer(np.asarray(keyarr)) if (indexer == -1).any(): raise KeyError( "a list-indexer must only " diff --git a/pandas/tests/indexing/test_categorical.py b/pandas/tests/indexing/test_categorical.py index fd5557dfcb99c..e0f95a1fd5c0d 100644 --- a/pandas/tests/indexing/test_categorical.py +++ b/pandas/tests/indexing/test_categorical.py @@ -4,7 +4,8 @@ import pandas as pd import numpy as np -from pandas import Series, DataFrame +from pandas import (Series, DataFrame, Timestamp, + Categorical, CategoricalIndex) from pandas.util.testing import assert_series_equal, assert_frame_equal from pandas.util import testing as tm @@ -66,6 +67,17 @@ def f(): pytest.raises(TypeError, f) + def test_getitem_scalar(self): + + cats = Categorical([Timestamp('12-31-1999'), + Timestamp('12-31-2000')]) + + s = Series([1, 2], index=cats) + + expected = s.iloc[0] + result = s[cats[0]] + assert result == expected + def test_loc_listlike(self): # list of labels @@ -74,7 +86,7 @@ def test_loc_listlike(self): assert_frame_equal(result, expected, check_index_type=True) result = self.df2.loc[['a', 'b', 'e']] - exp_index = pd.CategoricalIndex( + exp_index = CategoricalIndex( list('aaabbe'), categories=list('cabe'), name='B') expected = DataFrame({'A': [0, 1, 5, 2, 3, np.nan]}, index=exp_index) assert_frame_equal(result, expected, check_index_type=True) @@ -86,14 +98,14 @@ def test_loc_listlike(self): df = self.df2.copy() df.loc['e'] = 20 result = df.loc[['a', 'b', 'e']] - exp_index = pd.CategoricalIndex( + exp_index = CategoricalIndex( list('aaabbe'), categories=list('cabe'), name='B') expected = DataFrame({'A': [0, 1, 5, 2, 3, 20]}, index=exp_index) assert_frame_equal(result, expected) df = self.df2.copy() result = df.loc[['a', 'b', 'e']] - exp_index = pd.CategoricalIndex( + exp_index = CategoricalIndex( list('aaabbe'), categories=list('cabe'), name='B') expected = DataFrame({'A': [0, 1, 5, 2, 3, np.nan]}, index=exp_index) assert_frame_equal(result, expected, check_index_type=True) @@ -105,21 +117,21 @@ def test_loc_listlike_dtypes(self): # GH 11586 # unique categories and codes - index = pd.CategoricalIndex(['a', 'b', 'c']) + index = CategoricalIndex(['a', 'b', 'c']) df = DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}, index=index) # unique slice res = df.loc[['a', 'b']] - exp_index = pd.CategoricalIndex(['a', 'b'], - categories=index.categories) + exp_index = CategoricalIndex(['a', 'b'], + categories=index.categories) exp = DataFrame({'A': [1, 2], 'B': [4, 5]}, index=exp_index) tm.assert_frame_equal(res, exp, check_index_type=True) # duplicated slice res = df.loc[['a', 'a', 'b']] - exp_index = pd.CategoricalIndex(['a', 'a', 'b'], - categories=index.categories) + exp_index = CategoricalIndex(['a', 'a', 'b'], + categories=index.categories) exp = DataFrame({'A': [1, 1, 2], 'B': [4, 4, 5]}, index=exp_index) tm.assert_frame_equal(res, exp, check_index_type=True) @@ -130,14 +142,14 @@ def test_loc_listlike_dtypes(self): df.loc[['a', 'x']] # duplicated categories and codes - index = pd.CategoricalIndex(['a', 'b', 'a']) + index = CategoricalIndex(['a', 'b', 'a']) df = DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}, index=index) # unique slice res = df.loc[['a', 'b']] exp = DataFrame({'A': [1, 3, 2], 'B': [4, 6, 5]}, - index=pd.CategoricalIndex(['a', 'a', 'b'])) + index=CategoricalIndex(['a', 'a', 'b'])) tm.assert_frame_equal(res, exp, check_index_type=True) # duplicated slice @@ -145,7 +157,7 @@ def test_loc_listlike_dtypes(self): exp = DataFrame( {'A': [1, 3, 1, 3, 2], 'B': [4, 6, 4, 6, 5 - ]}, index=pd.CategoricalIndex(['a', 'a', 'a', 'a', 'b'])) + ]}, index=CategoricalIndex(['a', 'a', 'a', 'a', 'b'])) tm.assert_frame_equal(res, exp, check_index_type=True) with tm.assertRaisesRegexp( @@ -155,27 +167,27 @@ def test_loc_listlike_dtypes(self): df.loc[['a', 'x']] # contains unused category - index = pd.CategoricalIndex( + index = CategoricalIndex( ['a', 'b', 'a', 'c'], categories=list('abcde')) df = DataFrame({'A': [1, 2, 3, 4], 'B': [5, 6, 7, 8]}, index=index) res = df.loc[['a', 'b']] - exp = DataFrame({'A': [1, 3, 2], - 'B': [5, 7, 6]}, index=pd.CategoricalIndex( - ['a', 'a', 'b'], categories=list('abcde'))) + exp = DataFrame({'A': [1, 3, 2], 'B': [5, 7, 6]}, + index=CategoricalIndex(['a', 'a', 'b'], + categories=list('abcde'))) tm.assert_frame_equal(res, exp, check_index_type=True) res = df.loc[['a', 'e']] exp = DataFrame({'A': [1, 3, np.nan], 'B': [5, 7, np.nan]}, - index=pd.CategoricalIndex(['a', 'a', 'e'], - categories=list('abcde'))) + index=CategoricalIndex(['a', 'a', 'e'], + categories=list('abcde'))) tm.assert_frame_equal(res, exp, check_index_type=True) # duplicated slice res = df.loc[['a', 'a', 'b']] exp = DataFrame({'A': [1, 3, 1, 3, 2], 'B': [5, 7, 5, 7, 6]}, - index=pd.CategoricalIndex(['a', 'a', 'a', 'a', 'b'], - categories=list('abcde'))) + index=CategoricalIndex(['a', 'a', 'a', 'a', 'b'], + categories=list('abcde'))) tm.assert_frame_equal(res, exp, check_index_type=True) with tm.assertRaisesRegexp( @@ -184,54 +196,77 @@ def test_loc_listlike_dtypes(self): 'that are in the categories'): df.loc[['a', 'x']] + def test_get_indexer_array(self): + arr = np.array([Timestamp('1999-12-31 00:00:00'), + Timestamp('2000-12-31 00:00:00')], dtype=object) + cats = [Timestamp('1999-12-31 00:00:00'), + Timestamp('2000-12-31 00:00:00')] + ci = CategoricalIndex(cats, + categories=cats, + ordered=False, dtype='category') + result = ci.get_indexer(arr) + expected = np.array([0, 1], dtype='intp') + tm.assert_numpy_array_equal(result, expected) + + def test_getitem_with_listlike(self): + # GH 16115 + cats = Categorical([Timestamp('12-31-1999'), + Timestamp('12-31-2000')]) + + expected = DataFrame([[1, 0], [0, 1]], dtype='uint8', + index=[0, 1], columns=cats) + dummies = pd.get_dummies(cats) + result = dummies[[c for c in dummies.columns]] + assert_frame_equal(result, expected) + def test_ix_categorical_index(self): # GH 12531 - df = pd.DataFrame(np.random.randn(3, 3), - index=list('ABC'), columns=list('XYZ')) + df = DataFrame(np.random.randn(3, 3), + index=list('ABC'), columns=list('XYZ')) cdf = df.copy() - cdf.index = pd.CategoricalIndex(df.index) - cdf.columns = pd.CategoricalIndex(df.columns) + cdf.index = CategoricalIndex(df.index) + cdf.columns = CategoricalIndex(df.columns) - expect = pd.Series(df.loc['A', :], index=cdf.columns, name='A') + expect = Series(df.loc['A', :], index=cdf.columns, name='A') assert_series_equal(cdf.loc['A', :], expect) - expect = pd.Series(df.loc[:, 'X'], index=cdf.index, name='X') + expect = Series(df.loc[:, 'X'], index=cdf.index, name='X') assert_series_equal(cdf.loc[:, 'X'], expect) - exp_index = pd.CategoricalIndex(list('AB'), categories=['A', 'B', 'C']) - expect = pd.DataFrame(df.loc[['A', 'B'], :], columns=cdf.columns, - index=exp_index) + exp_index = CategoricalIndex(list('AB'), categories=['A', 'B', 'C']) + expect = DataFrame(df.loc[['A', 'B'], :], columns=cdf.columns, + index=exp_index) assert_frame_equal(cdf.loc[['A', 'B'], :], expect) - exp_columns = pd.CategoricalIndex(list('XY'), - categories=['X', 'Y', 'Z']) - expect = pd.DataFrame(df.loc[:, ['X', 'Y']], index=cdf.index, - columns=exp_columns) + exp_columns = CategoricalIndex(list('XY'), + categories=['X', 'Y', 'Z']) + expect = DataFrame(df.loc[:, ['X', 'Y']], index=cdf.index, + columns=exp_columns) assert_frame_equal(cdf.loc[:, ['X', 'Y']], expect) # non-unique - df = pd.DataFrame(np.random.randn(3, 3), - index=list('ABA'), columns=list('XYX')) + df = DataFrame(np.random.randn(3, 3), + index=list('ABA'), columns=list('XYX')) cdf = df.copy() - cdf.index = pd.CategoricalIndex(df.index) - cdf.columns = pd.CategoricalIndex(df.columns) + cdf.index = CategoricalIndex(df.index) + cdf.columns = CategoricalIndex(df.columns) - exp_index = pd.CategoricalIndex(list('AA'), categories=['A', 'B']) - expect = pd.DataFrame(df.loc['A', :], columns=cdf.columns, - index=exp_index) + exp_index = CategoricalIndex(list('AA'), categories=['A', 'B']) + expect = DataFrame(df.loc['A', :], columns=cdf.columns, + index=exp_index) assert_frame_equal(cdf.loc['A', :], expect) - exp_columns = pd.CategoricalIndex(list('XX'), categories=['X', 'Y']) - expect = pd.DataFrame(df.loc[:, 'X'], index=cdf.index, - columns=exp_columns) + exp_columns = CategoricalIndex(list('XX'), categories=['X', 'Y']) + expect = DataFrame(df.loc[:, 'X'], index=cdf.index, + columns=exp_columns) assert_frame_equal(cdf.loc[:, 'X'], expect) - expect = pd.DataFrame(df.loc[['A', 'B'], :], columns=cdf.columns, - index=pd.CategoricalIndex(list('AAB'))) + expect = DataFrame(df.loc[['A', 'B'], :], columns=cdf.columns, + index=CategoricalIndex(list('AAB'))) assert_frame_equal(cdf.loc[['A', 'B'], :], expect) - expect = pd.DataFrame(df.loc[:, ['X', 'Y']], index=cdf.index, - columns=pd.CategoricalIndex(list('XXY'))) + expect = DataFrame(df.loc[:, ['X', 'Y']], index=cdf.index, + columns=CategoricalIndex(list('XXY'))) assert_frame_equal(cdf.loc[:, ['X', 'Y']], expect) def test_read_only_source(self): @@ -281,13 +316,13 @@ def test_reindexing(self): # then return a Categorical cats = list('cabe') - result = self.df2.reindex(pd.Categorical(['a', 'd'], categories=cats)) + result = self.df2.reindex(Categorical(['a', 'd'], categories=cats)) expected = DataFrame({'A': [0, 1, 5, np.nan], 'B': Series(list('aaad')).astype( 'category', categories=cats)}).set_index('B') assert_frame_equal(result, expected, check_index_type=True) - result = self.df2.reindex(pd.Categorical(['a'], categories=cats)) + result = self.df2.reindex(Categorical(['a'], categories=cats)) expected = DataFrame({'A': [0, 1, 5], 'B': Series(list('aaa')).astype( 'category', categories=cats)}).set_index('B') @@ -309,7 +344,7 @@ def test_reindexing(self): assert_frame_equal(result, expected, check_index_type=True) # give back the type of categorical that we received - result = self.df2.reindex(pd.Categorical( + result = self.df2.reindex(Categorical( ['a', 'd'], categories=cats, ordered=True)) expected = DataFrame( {'A': [0, 1, 5, np.nan], @@ -317,7 +352,7 @@ def test_reindexing(self): ordered=True)}).set_index('B') assert_frame_equal(result, expected, check_index_type=True) - result = self.df2.reindex(pd.Categorical( + result = self.df2.reindex(Categorical( ['a', 'd'], categories=['a', 'd'])) expected = DataFrame({'A': [0, 1, 5, np.nan], 'B': Series(list('aaad')).astype( diff --git a/pandas/tests/reshape/test_reshape.py b/pandas/tests/reshape/test_reshape.py index 8960ae610f8d7..f41c977cc03e1 100644 --- a/pandas/tests/reshape/test_reshape.py +++ b/pandas/tests/reshape/test_reshape.py @@ -490,8 +490,8 @@ def test_dataframe_dummies_with_categorical(self): 'cat_x', 'cat_y']] assert_frame_equal(result, expected) - # GH12402 Add a new parameter `drop_first` to avoid collinearity def test_basic_drop_first(self): + # GH12402 Add a new parameter `drop_first` to avoid collinearity # Basic case s_list = list('abc') s_series = Series(s_list) From 1a937593086fc6ea187bb20b5e073e59cb2324f9 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Wed, 26 Apr 2017 11:42:18 -0400 Subject: [PATCH 472/933] MAINT: Rename assertRaisesRegexp to assert_raises_regex (#16119) --- pandas/tests/computation/test_eval.py | 46 ++-- pandas/tests/dtypes/test_inference.py | 6 +- pandas/tests/frame/test_alter_axes.py | 14 +- pandas/tests/frame/test_analytics.py | 13 +- pandas/tests/frame/test_api.py | 16 +- .../tests/frame/test_axis_select_reindex.py | 16 +- pandas/tests/frame/test_block_internals.py | 5 +- pandas/tests/frame/test_combine_concat.py | 16 +- pandas/tests/frame/test_constructors.py | 60 ++--- pandas/tests/frame/test_dtypes.py | 31 +-- pandas/tests/frame/test_indexing.py | 51 +++-- pandas/tests/frame/test_join.py | 9 +- pandas/tests/frame/test_missing.py | 9 +- pandas/tests/frame/test_mutate_columns.py | 8 +- pandas/tests/frame/test_nonunique_indexes.py | 10 +- pandas/tests/frame/test_operators.py | 31 ++- pandas/tests/frame/test_period.py | 3 +- pandas/tests/frame/test_quantile.py | 14 +- pandas/tests/frame/test_query_eval.py | 20 +- pandas/tests/frame/test_replace.py | 8 +- pandas/tests/frame/test_reshape.py | 6 +- pandas/tests/frame/test_sorting.py | 24 +- pandas/tests/frame/test_subclass.py | 2 +- pandas/tests/frame/test_timeseries.py | 12 +- pandas/tests/frame/test_to_csv.py | 21 +- pandas/tests/groupby/test_aggregate.py | 3 +- pandas/tests/groupby/test_filters.py | 6 +- pandas/tests/groupby/test_groupby.py | 22 +- pandas/tests/groupby/test_transform.py | 9 +- pandas/tests/groupby/test_whitelist.py | 2 +- pandas/tests/indexes/common.py | 116 +++++----- .../indexes/datetimes/test_construction.py | 6 +- .../indexes/datetimes/test_date_range.py | 32 +-- .../tests/indexes/datetimes/test_datetime.py | 31 +-- pandas/tests/indexes/datetimes/test_ops.py | 38 ++-- .../indexes/datetimes/test_partial_slicing.py | 16 +- pandas/tests/indexes/datetimes/test_tools.py | 4 +- .../tests/indexes/period/test_construction.py | 30 +-- pandas/tests/indexes/period/test_indexing.py | 12 +- pandas/tests/indexes/period/test_ops.py | 83 ++++--- .../indexes/period/test_partial_slicing.py | 12 +- pandas/tests/indexes/period/test_period.py | 13 +- pandas/tests/indexes/period/test_setops.py | 2 +- pandas/tests/indexes/period/test_tools.py | 10 +- pandas/tests/indexes/test_base.py | 40 ++-- pandas/tests/indexes/test_category.py | 28 +-- pandas/tests/indexes/test_interval.py | 4 +- pandas/tests/indexes/test_multi.py | 207 +++++++++--------- pandas/tests/indexes/test_numeric.py | 16 +- pandas/tests/indexes/test_range.py | 13 +- pandas/tests/indexes/timedeltas/test_ops.py | 48 ++-- .../timedeltas/test_partial_slicing.py | 12 +- .../indexes/timedeltas/test_timedelta.py | 10 +- .../timedeltas/test_timedelta_range.py | 4 +- pandas/tests/indexes/timedeltas/test_tools.py | 4 +- pandas/tests/indexing/test_categorical.py | 6 +- pandas/tests/indexing/test_coercion.py | 18 +- pandas/tests/indexing/test_floats.py | 6 +- pandas/tests/indexing/test_iloc.py | 9 +- pandas/tests/indexing/test_indexing.py | 13 +- pandas/tests/indexing/test_multiindex.py | 6 +- pandas/tests/indexing/test_scalar.py | 4 +- pandas/tests/io/formats/test_to_csv.py | 4 +- pandas/tests/io/json/test_pandas.py | 7 +- pandas/tests/io/json/test_ujson.py | 2 +- pandas/tests/io/parser/c_parser_only.py | 2 +- pandas/tests/io/parser/common.py | 41 ++-- pandas/tests/io/parser/compression.py | 15 +- pandas/tests/io/parser/converters.py | 2 +- pandas/tests/io/parser/dialect.py | 2 +- pandas/tests/io/parser/header.py | 2 +- pandas/tests/io/parser/parse_dates.py | 23 +- pandas/tests/io/parser/python_parser_only.py | 16 +- pandas/tests/io/parser/quoting.py | 34 +-- pandas/tests/io/parser/skiprows.py | 4 +- pandas/tests/io/parser/test_read_fwf.py | 16 +- pandas/tests/io/parser/test_unsupported.py | 22 +- pandas/tests/io/parser/usecols.py | 6 +- pandas/tests/io/test_common.py | 5 +- pandas/tests/io/test_excel.py | 4 +- pandas/tests/io/test_html.py | 17 +- pandas/tests/io/test_pickle.py | 4 +- pandas/tests/io/test_pytables.py | 11 +- pandas/tests/io/test_sql.py | 6 +- pandas/tests/reshape/test_concat.py | 14 +- pandas/tests/reshape/test_join.py | 4 +- pandas/tests/reshape/test_merge_ordered.py | 2 +- pandas/tests/reshape/test_pivot.py | 10 +- pandas/tests/reshape/test_reshape.py | 2 +- pandas/tests/reshape/test_tile.py | 4 +- .../tests/reshape/test_union_categoricals.py | 14 +- pandas/tests/reshape/test_util.py | 2 +- pandas/tests/scalar/test_interval.py | 2 +- pandas/tests/scalar/test_period.py | 38 ++-- pandas/tests/scalar/test_period_asfreq.py | 20 +- pandas/tests/scalar/test_timedelta.py | 21 +- pandas/tests/scalar/test_timestamp.py | 22 +- pandas/tests/series/test_analytics.py | 36 +-- pandas/tests/series/test_api.py | 3 +- pandas/tests/series/test_combine_concat.py | 4 +- pandas/tests/series/test_constructors.py | 8 +- pandas/tests/series/test_datetime_values.py | 10 +- pandas/tests/series/test_dtypes.py | 2 +- pandas/tests/series/test_indexing.py | 12 +- pandas/tests/series/test_operators.py | 16 +- pandas/tests/series/test_period.py | 11 +- pandas/tests/series/test_quantile.py | 6 +- pandas/tests/series/test_replace.py | 4 +- pandas/tests/sparse/test_array.py | 79 +++---- pandas/tests/sparse/test_frame.py | 18 +- pandas/tests/sparse/test_indexing.py | 2 +- pandas/tests/sparse/test_libsparse.py | 14 +- pandas/tests/sparse/test_series.py | 24 +- pandas/tests/test_algos.py | 37 ++-- pandas/tests/test_base.py | 17 +- pandas/tests/test_categorical.py | 44 ++-- pandas/tests/test_common.py | 2 +- pandas/tests/test_expressions.py | 12 +- pandas/tests/test_internals.py | 4 +- pandas/tests/test_multilevel.py | 30 +-- pandas/tests/test_panel.py | 76 ++++--- pandas/tests/test_resample.py | 27 +-- pandas/tests/test_strings.py | 106 +++++---- pandas/tests/test_take.py | 8 +- pandas/tests/test_testing.py | 115 +++++----- pandas/tests/test_util.py | 45 ++-- pandas/tests/test_window.py | 38 ++-- pandas/tests/tools/test_numeric.py | 14 +- pandas/tests/tseries/test_frequencies.py | 39 ++-- pandas/tests/tseries/test_offsets.py | 45 ++-- pandas/tests/tseries/test_timezones.py | 8 +- pandas/util/testing.py | 18 +- 132 files changed, 1397 insertions(+), 1283 deletions(-) diff --git a/pandas/tests/computation/test_eval.py b/pandas/tests/computation/test_eval.py index 6ec06f75de06d..cc14282934f16 100644 --- a/pandas/tests/computation/test_eval.py +++ b/pandas/tests/computation/test_eval.py @@ -29,9 +29,8 @@ import pandas.core.computation.expr as expr import pandas.util.testing as tm from pandas.util.testing import (assert_frame_equal, randbool, - assertRaisesRegexp, assert_numpy_array_equal, - assert_produces_warning, assert_series_equal, - slow) + assert_numpy_array_equal, assert_series_equal, + assert_produces_warning, slow) from pandas.compat import PY3, reduce _series_frame_incompatible = _bool_ops_syms @@ -1677,17 +1676,17 @@ def test_result_types2(self): def test_undefined_func(self): df = DataFrame({'a': np.random.randn(10)}) - with tm.assertRaisesRegexp(ValueError, - "\"mysin\" is not a supported function"): + with tm.assert_raises_regex( + ValueError, "\"mysin\" is not a supported function"): df.eval("mysin(a)", engine=self.engine, parser=self.parser) def test_keyword_arg(self): df = DataFrame({'a': np.random.randn(10)}) - with tm.assertRaisesRegexp(TypeError, - "Function \"sin\" does not support " - "keyword arguments"): + with tm.assert_raises_regex(TypeError, + "Function \"sin\" does not support " + "keyword arguments"): df.eval("sin(x=a)", engine=self.engine, parser=self.parser) @@ -1748,16 +1747,16 @@ def test_no_new_globals(self, engine, parser): def test_invalid_engine(): tm.skip_if_no_ne() - assertRaisesRegexp(KeyError, 'Invalid engine \'asdf\' passed', - pd.eval, 'x + y', local_dict={'x': 1, 'y': 2}, - engine='asdf') + tm.assert_raises_regex(KeyError, 'Invalid engine \'asdf\' passed', + pd.eval, 'x + y', local_dict={'x': 1, 'y': 2}, + engine='asdf') def test_invalid_parser(): tm.skip_if_no_ne() - assertRaisesRegexp(KeyError, 'Invalid parser \'asdf\' passed', - pd.eval, 'x + y', local_dict={'x': 1, 'y': 2}, - parser='asdf') + tm.assert_raises_regex(KeyError, 'Invalid parser \'asdf\' passed', + pd.eval, 'x + y', local_dict={'x': 1, 'y': 2}, + parser='asdf') _parsers = {'python': PythonExprVisitor, 'pytables': pytables.ExprVisitor, @@ -1795,18 +1794,20 @@ def test_invalid_local_variable_reference(engine, parser): for _expr in exprs: if parser != 'pandas': - with tm.assertRaisesRegexp(SyntaxError, "The '@' prefix is only"): + with tm.assert_raises_regex(SyntaxError, + "The '@' prefix is only"): pd.eval(_expr, engine=engine, parser=parser) else: - with tm.assertRaisesRegexp(SyntaxError, "The '@' prefix is not"): + with tm.assert_raises_regex(SyntaxError, + "The '@' prefix is not"): pd.eval(_expr, engine=engine, parser=parser) def test_numexpr_builtin_raises(engine, parser): sin, dotted_line = 1, 2 if engine == 'numexpr': - with tm.assertRaisesRegexp(NumExprClobberingError, - 'Variables in expression .+'): + with tm.assert_raises_regex(NumExprClobberingError, + 'Variables in expression .+'): pd.eval('sin + dotted_line', engine=engine, parser=parser) else: res = pd.eval('sin + dotted_line', engine=engine, parser=parser) @@ -1815,20 +1816,21 @@ def test_numexpr_builtin_raises(engine, parser): def test_bad_resolver_raises(engine, parser): cannot_resolve = 42, 3.0 - with tm.assertRaisesRegexp(TypeError, 'Resolver of type .+'): + with tm.assert_raises_regex(TypeError, 'Resolver of type .+'): pd.eval('1 + 2', resolvers=cannot_resolve, engine=engine, parser=parser) def test_empty_string_raises(engine, parser): # GH 13139 - with tm.assertRaisesRegexp(ValueError, 'expr cannot be an empty string'): + with tm.assert_raises_regex(ValueError, + 'expr cannot be an empty string'): pd.eval('', engine=engine, parser=parser) def test_more_than_one_expression_raises(engine, parser): - with tm.assertRaisesRegexp(SyntaxError, - 'only a single expression is allowed'): + with tm.assert_raises_regex(SyntaxError, + 'only a single expression is allowed'): pd.eval('1 + 1; 2 + 2', engine=engine, parser=parser) diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index dd8f65a8e48ff..35720b32d756c 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -257,7 +257,7 @@ def test_maybe_convert_numeric_infinities(self): tm.assert_numpy_array_equal(out, pos) # too many characters - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): lib.maybe_convert_numeric( np.array(['foo_' + infinity], dtype=object), na_values, maybe_int) @@ -320,7 +320,7 @@ def test_convert_numeric_uint64_nan(self): for coerce in (True, False): for arr, na_values in cases: if coerce: - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): lib.maybe_convert_numeric(arr, na_values, coerce_numeric=coerce) else: @@ -339,7 +339,7 @@ def test_convert_numeric_int64_uint64(self): for coerce in (True, False): for case in cases: if coerce: - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): lib.maybe_convert_numeric(case, set(), coerce_numeric=coerce) else: diff --git a/pandas/tests/frame/test_alter_axes.py b/pandas/tests/frame/test_alter_axes.py index b4b86d8ea1907..0a00d7e018f33 100644 --- a/pandas/tests/frame/test_alter_axes.py +++ b/pandas/tests/frame/test_alter_axes.py @@ -18,9 +18,7 @@ is_interval_dtype) import pandas as pd -from pandas.util.testing import (assert_series_equal, - assert_frame_equal, - assertRaisesRegexp) +from pandas.util.testing import assert_series_equal, assert_frame_equal import pandas.util.testing as tm @@ -36,7 +34,7 @@ def test_set_index(self): _ = self.mixed_frame['foo'] # noqa self.mixed_frame.index = idx assert self.mixed_frame['foo'].index is idx - with assertRaisesRegexp(ValueError, 'Length mismatch'): + with tm.assert_raises_regex(ValueError, 'Length mismatch'): self.mixed_frame.index = idx[::2] def test_set_index_cast(self): @@ -111,7 +109,8 @@ def test_set_index2(self): assert_frame_equal(df3, expected_nodrop) # corner case - with assertRaisesRegexp(ValueError, 'Index has duplicate keys'): + with tm.assert_raises_regex(ValueError, + 'Index has duplicate keys'): df.set_index('A', verify_integrity=True) # append @@ -136,7 +135,8 @@ def test_set_index_nonuniq(self): 'C': ['a', 'b', 'c', 'd', 'e'], 'D': np.random.randn(5), 'E': np.random.randn(5)}) - with assertRaisesRegexp(ValueError, 'Index has duplicate keys'): + with tm.assert_raises_regex(ValueError, + 'Index has duplicate keys'): df.set_index('A', verify_integrity=True, inplace=True) assert 'A' in df @@ -338,7 +338,7 @@ def test_set_index_empty_column(self): def test_set_columns(self): cols = Index(np.arange(len(self.mixed_frame.columns))) self.mixed_frame.columns = cols - with assertRaisesRegexp(ValueError, 'Length mismatch'): + with tm.assert_raises_regex(ValueError, 'Length mismatch'): self.mixed_frame.columns = cols[::2] def test_dti_set_index_reindex(self): diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index 0941f0af6bec5..45d93c187e0b7 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -779,7 +779,7 @@ def wrapper(x): # assert_series_equal(result, comp) # bad axis - tm.assertRaisesRegexp(ValueError, 'No axis named 2', f, axis=2) + tm.assert_raises_regex(ValueError, 'No axis named 2', f, axis=2) # make sure works on mixed-type frame getattr(self.mixed_frame, name)(axis=0) getattr(self.mixed_frame, name)(axis=1) @@ -1749,7 +1749,7 @@ def test_numpy_round(self): tm.assert_frame_equal(out, expected) msg = "the 'out' parameter is not supported" - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): np.round(df, decimals=0, out=df) def test_round_mixed_type(self): @@ -1897,7 +1897,8 @@ def test_dot(self): exp = a.dot(a.iloc[0]) tm.assert_series_equal(result, exp) - with tm.assertRaisesRegexp(ValueError, 'Dot product shape mismatch'): + with tm.assert_raises_regex(ValueError, + 'Dot product shape mismatch'): a.dot(row[:-1]) a = np.random.rand(1, 5) @@ -1914,7 +1915,7 @@ def test_dot(self): df = DataFrame(randn(3, 4), index=[1, 2, 3], columns=lrange(4)) df2 = DataFrame(randn(5, 3), index=lrange(5), columns=[1, 2, 3]) - with tm.assertRaisesRegexp(ValueError, 'aligned'): + with tm.assert_raises_regex(ValueError, 'aligned'): df.dot(df2) @@ -1986,7 +1987,7 @@ def test_n(self, df_strings, method, n, order): error_msg = self.dtype_error_msg_template.format( column='b', method=method, dtype='object') - with tm.assertRaisesRegexp(TypeError, error_msg): + with tm.assert_raises_regex(TypeError, error_msg): getattr(df, method)(n, order) else: ascending = method == 'nsmallest' @@ -2003,7 +2004,7 @@ def test_n_error(self, df_main_dtypes, method, columns): df = df_main_dtypes error_msg = self.dtype_error_msg_template.format( column=columns[1], method=method, dtype=df[columns[1]].dtype) - with tm.assertRaisesRegexp(TypeError, error_msg): + with tm.assert_raises_regex(TypeError, error_msg): getattr(df, method)(2, columns) def test_n_all_dtypes(self, df_main_dtypes): diff --git a/pandas/tests/frame/test_api.py b/pandas/tests/frame/test_api.py index 9e16698bab39c..bd4abd6fcd822 100644 --- a/pandas/tests/frame/test_api.py +++ b/pandas/tests/frame/test_api.py @@ -20,8 +20,7 @@ from pandas.util.testing import (assert_almost_equal, assert_series_equal, - assert_frame_equal, - assertRaisesRegexp) + assert_frame_equal) import pandas.util.testing as tm @@ -91,11 +90,14 @@ def test_get_axis(self): assert f._get_axis(0) is f.index assert f._get_axis(1) is f.columns - assertRaisesRegexp(ValueError, 'No axis named', f._get_axis_number, 2) - assertRaisesRegexp(ValueError, 'No axis.*foo', f._get_axis_name, 'foo') - assertRaisesRegexp(ValueError, 'No axis.*None', f._get_axis_name, None) - assertRaisesRegexp(ValueError, 'No axis named', f._get_axis_number, - None) + tm.assert_raises_regex( + ValueError, 'No axis named', f._get_axis_number, 2) + tm.assert_raises_regex( + ValueError, 'No axis.*foo', f._get_axis_name, 'foo') + tm.assert_raises_regex( + ValueError, 'No axis.*None', f._get_axis_name, None) + tm.assert_raises_regex(ValueError, 'No axis named', + f._get_axis_number, None) def test_keys(self): getkeys = self.frame.keys diff --git a/pandas/tests/frame/test_axis_select_reindex.py b/pandas/tests/frame/test_axis_select_reindex.py index e8f34b977a707..b8be7c19203fa 100644 --- a/pandas/tests/frame/test_axis_select_reindex.py +++ b/pandas/tests/frame/test_axis_select_reindex.py @@ -699,23 +699,23 @@ def test_filter(self): tm.assert_frame_equal(filtered, expected) # pass in None - with tm.assertRaisesRegexp(TypeError, 'Must pass'): + with tm.assert_raises_regex(TypeError, 'Must pass'): self.frame.filter() - with tm.assertRaisesRegexp(TypeError, 'Must pass'): + with tm.assert_raises_regex(TypeError, 'Must pass'): self.frame.filter(items=None) - with tm.assertRaisesRegexp(TypeError, 'Must pass'): + with tm.assert_raises_regex(TypeError, 'Must pass'): self.frame.filter(axis=1) # test mutually exclusive arguments - with tm.assertRaisesRegexp(TypeError, 'mutually exclusive'): + with tm.assert_raises_regex(TypeError, 'mutually exclusive'): self.frame.filter(items=['one', 'three'], regex='e$', like='bbi') - with tm.assertRaisesRegexp(TypeError, 'mutually exclusive'): + with tm.assert_raises_regex(TypeError, 'mutually exclusive'): self.frame.filter(items=['one', 'three'], regex='e$', axis=1) - with tm.assertRaisesRegexp(TypeError, 'mutually exclusive'): + with tm.assert_raises_regex(TypeError, 'mutually exclusive'): self.frame.filter(items=['one', 'three'], regex='e$') - with tm.assertRaisesRegexp(TypeError, 'mutually exclusive'): + with tm.assert_raises_regex(TypeError, 'mutually exclusive'): self.frame.filter(items=['one', 'three'], like='bbi', axis=0) - with tm.assertRaisesRegexp(TypeError, 'mutually exclusive'): + with tm.assert_raises_regex(TypeError, 'mutually exclusive'): self.frame.filter(items=['one', 'three'], like='bbi') # objects diff --git a/pandas/tests/frame/test_block_internals.py b/pandas/tests/frame/test_block_internals.py index 63c1f0a50fbed..5e85b890be569 100644 --- a/pandas/tests/frame/test_block_internals.py +++ b/pandas/tests/frame/test_block_internals.py @@ -17,8 +17,7 @@ from pandas.util.testing import (assert_almost_equal, assert_series_equal, - assert_frame_equal, - assertRaisesRegexp) + assert_frame_equal) import pandas.util.testing as tm @@ -481,7 +480,7 @@ def test_convert_objects(self): # via astype, but errors converted = self.mixed_frame.copy() - with assertRaisesRegexp(ValueError, 'invalid literal'): + with tm.assert_raises_regex(ValueError, 'invalid literal'): converted['H'].astype('int32') # mixed in a single column diff --git a/pandas/tests/frame/test_combine_concat.py b/pandas/tests/frame/test_combine_concat.py index 6f06a55ad065e..0e4184b07f22e 100644 --- a/pandas/tests/frame/test_combine_concat.py +++ b/pandas/tests/frame/test_combine_concat.py @@ -15,9 +15,7 @@ from pandas.tests.frame.common import TestData import pandas.util.testing as tm -from pandas.util.testing import (assertRaisesRegexp, - assert_frame_equal, - assert_series_equal) +from pandas.util.testing import assert_frame_equal, assert_series_equal class TestDataFrameConcatCommon(tm.TestCase, TestData): @@ -78,11 +76,13 @@ def test_append_series_dict(self): columns=['foo', 'bar', 'baz', 'qux']) series = df.loc[4] - with assertRaisesRegexp(ValueError, 'Indexes have overlapping values'): + with tm.assert_raises_regex(ValueError, + 'Indexes have overlapping values'): df.append(series, verify_integrity=True) series.name = None - with assertRaisesRegexp(TypeError, 'Can only append a Series if ' - 'ignore_index=True'): + with tm.assert_raises_regex(TypeError, + 'Can only append a Series if ' + 'ignore_index=True'): df.append(series, verify_integrity=True) result = df.append(series[::-1], ignore_index=True) @@ -270,7 +270,7 @@ def test_update_raise(self): other = DataFrame([[2., nan], [nan, 7]], index=[1, 3], columns=[1, 2]) - with assertRaisesRegexp(ValueError, "Data overlaps"): + with tm.assert_raises_regex(ValueError, "Data overlaps"): df.update(other, raise_conflict=True) def test_update_from_non_df(self): @@ -419,7 +419,7 @@ def test_concat_axis_parameter(self): assert_frame_equal(concatted_1_series, expected_columns_series) # Testing ValueError - with assertRaisesRegexp(ValueError, 'No axis named'): + with tm.assert_raises_regex(ValueError, 'No axis named'): pd.concat([series1, series2], axis='something') def test_concat_numerical_names(self): diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index f6cdb37a2477a..db0293b71c3a3 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -290,37 +290,40 @@ def test_constructor_multi_index(self): def test_constructor_error_msgs(self): msg = "Empty data passed with indices specified." # passing an empty array with columns specified. - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): DataFrame(np.empty(0), columns=list('abc')) msg = "Mixing dicts with non-Series may lead to ambiguous ordering." # mix dict and array, wrong size - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): DataFrame({'A': {'a': 'a', 'b': 'b'}, 'B': ['a', 'b', 'c']}) # wrong size ndarray, GH 3105 msg = r"Shape of passed values is \(3, 4\), indices imply \(3, 3\)" - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): DataFrame(np.arange(12).reshape((4, 3)), columns=['foo', 'bar', 'baz'], index=pd.date_range('2000-01-01', periods=3)) # higher dim raise exception - with tm.assertRaisesRegexp(ValueError, 'Must pass 2-d input'): + with tm.assert_raises_regex(ValueError, 'Must pass 2-d input'): DataFrame(np.zeros((3, 3, 3)), columns=['A', 'B', 'C'], index=[1]) # wrong size axis labels - with tm.assertRaisesRegexp(ValueError, "Shape of passed values is " - r"\(3, 2\), indices imply \(3, 1\)"): + with tm.assert_raises_regex(ValueError, "Shape of passed values " + "is \(3, 2\), indices " + "imply \(3, 1\)"): DataFrame(np.random.rand(2, 3), columns=['A', 'B', 'C'], index=[1]) - with tm.assertRaisesRegexp(ValueError, "Shape of passed values is " - r"\(3, 2\), indices imply \(2, 2\)"): + with tm.assert_raises_regex(ValueError, "Shape of passed values " + "is \(3, 2\), indices " + "imply \(2, 2\)"): DataFrame(np.random.rand(2, 3), columns=['A', 'B'], index=[1, 2]) - with tm.assertRaisesRegexp(ValueError, 'If using all scalar values, ' - 'you must pass an index'): + with tm.assert_raises_regex(ValueError, "If using all scalar " + "values, you must pass " + "an index"): DataFrame({'a': False, 'b': True}) def test_constructor_with_embedded_frames(self): @@ -542,14 +545,14 @@ def _check_basic_constructor(self, empty): # wrong size axis labels msg = r'Shape of passed values is \(3, 2\), indices imply \(3, 1\)' - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): DataFrame(mat, columns=['A', 'B', 'C'], index=[1]) msg = r'Shape of passed values is \(3, 2\), indices imply \(2, 2\)' - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): DataFrame(mat, columns=['A', 'B'], index=[1, 2]) # higher dim raise exception - with tm.assertRaisesRegexp(ValueError, 'Must pass 2-d input'): + with tm.assert_raises_regex(ValueError, 'Must pass 2-d input'): DataFrame(empty((3, 3, 3)), columns=['A', 'B', 'C'], index=[1]) @@ -739,7 +742,7 @@ def test_constructor_arrays_and_scalars(self): exp = DataFrame({'a': df['a'].values, 'b': [True] * 10}) tm.assert_frame_equal(df, exp) - with tm.assertRaisesRegexp(ValueError, 'must pass an index'): + with tm.assert_raises_regex(ValueError, 'must pass an index'): DataFrame({'a': False, 'b': True}) def test_constructor_DataFrame(self): @@ -772,13 +775,13 @@ def test_constructor_more(self): # corner, silly # TODO: Fix this Exception to be better... - with tm.assertRaisesRegexp(ValueError, 'constructor not ' - 'properly called'): + with tm.assert_raises_regex(ValueError, 'constructor not ' + 'properly called'): DataFrame((1, 2, 3)) # can't cast mat = np.array(['foo', 'bar'], dtype=object).reshape(2, 1) - with tm.assertRaisesRegexp(ValueError, 'cast'): + with tm.assert_raises_regex(ValueError, 'cast'): DataFrame(mat, index=[0, 1], columns=[0], dtype=float) dm = DataFrame(DataFrame(self.frame._series)) @@ -1004,8 +1007,8 @@ class CustomDict(dict): def test_constructor_ragged(self): data = {'A': randn(10), 'B': randn(8)} - with tm.assertRaisesRegexp(ValueError, - 'arrays must all be same length'): + with tm.assert_raises_regex(ValueError, + 'arrays must all be same length'): DataFrame(data) def test_constructor_scalar(self): @@ -1027,7 +1030,7 @@ def test_constructor_mixed_dict_and_Series(self): self.assertTrue(result.index.is_monotonic) # ordering ambiguous, raise exception - with tm.assertRaisesRegexp(ValueError, 'ambiguous ordering'): + with tm.assert_raises_regex(ValueError, 'ambiguous ordering'): DataFrame({'A': ['a', 'b'], 'B': {'a': 'a', 'b': 'b'}}) # this is OK though @@ -1155,8 +1158,9 @@ def test_constructor_from_items(self): tm.assert_frame_equal(recons, self.mixed_frame) self.assertEqual(recons['A'].dtype, np.float64) - with tm.assertRaisesRegexp(TypeError, - "Must pass columns with orient='index'"): + with tm.assert_raises_regex(TypeError, + "Must pass columns with " + "orient='index'"): DataFrame.from_items(row_items, orient='index') # orient='index', but thar be tuples @@ -1183,7 +1187,8 @@ def test_constructor_mix_series_nonseries(self): 'B': list(self.frame['B'])}, columns=['A', 'B']) tm.assert_frame_equal(df, self.frame.loc[:, ['A', 'B']]) - with tm.assertRaisesRegexp(ValueError, 'does not match index length'): + with tm.assert_raises_regex(ValueError, 'does not match ' + 'index length'): DataFrame({'A': self.frame['A'], 'B': list(self.frame['B'])[:-2]}) def test_constructor_miscast_na_int_dtype(self): @@ -1192,8 +1197,8 @@ def test_constructor_miscast_na_int_dtype(self): tm.assert_frame_equal(df, expected) def test_constructor_iterator_failure(self): - with tm.assertRaisesRegexp(TypeError, 'iterator'): - df = DataFrame(iter([1, 2, 3])) # noqa + with tm.assert_raises_regex(TypeError, 'iterator'): + DataFrame(iter([1, 2, 3])) def test_constructor_column_duplicates(self): # it works! #2079 @@ -1242,7 +1247,8 @@ def test_constructor_single_value(self): pytest.raises(ValueError, DataFrame, 'a', [1, 2]) pytest.raises(ValueError, DataFrame, 'a', columns=['a', 'c']) - with tm.assertRaisesRegexp(TypeError, 'incompatible data and dtype'): + with tm.assert_raises_regex(TypeError, 'incompatible data ' + 'and dtype'): DataFrame('a', [1, 2], ['a', 'c'], float) def test_constructor_with_datetimes(self): @@ -1526,7 +1532,7 @@ def test_from_records_to_records(self): # wrong length msg = r'Shape of passed values is \(3, 2\), indices imply \(3, 1\)' - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): DataFrame.from_records(arr, index=index[:-1]) indexed_frame = DataFrame.from_records(arr, index='f1') diff --git a/pandas/tests/frame/test_dtypes.py b/pandas/tests/frame/test_dtypes.py index 99141e3a8e1c4..ed6d72c08fdae 100644 --- a/pandas/tests/frame/test_dtypes.py +++ b/pandas/tests/frame/test_dtypes.py @@ -200,17 +200,21 @@ def test_select_dtypes_not_an_attr_but_still_valid_dtype(self): def test_select_dtypes_empty(self): df = DataFrame({'a': list('abc'), 'b': list(range(1, 4))}) - with tm.assertRaisesRegexp(ValueError, 'at least one of include or ' - 'exclude must be nonempty'): + with tm.assert_raises_regex(ValueError, 'at least one of ' + 'include or exclude ' + 'must be nonempty'): df.select_dtypes() def test_select_dtypes_raises_on_string(self): df = DataFrame({'a': list('abc'), 'b': list(range(1, 4))}) - with tm.assertRaisesRegexp(TypeError, 'include and exclude .+ non-'): + with tm.assert_raises_regex(TypeError, 'include and exclude ' + '.+ non-'): df.select_dtypes(include='object') - with tm.assertRaisesRegexp(TypeError, 'include and exclude .+ non-'): + with tm.assert_raises_regex(TypeError, 'include and exclude ' + '.+ non-'): df.select_dtypes(exclude='object') - with tm.assertRaisesRegexp(TypeError, 'include and exclude .+ non-'): + with tm.assert_raises_regex(TypeError, 'include and exclude ' + '.+ non-'): df.select_dtypes(include=int, exclude='object') def test_select_dtypes_bad_datetime64(self): @@ -220,10 +224,10 @@ def test_select_dtypes_bad_datetime64(self): 'd': np.arange(4.0, 7.0, dtype='float64'), 'e': [True, False, True], 'f': pd.date_range('now', periods=3).values}) - with tm.assertRaisesRegexp(ValueError, '.+ is too specific'): + with tm.assert_raises_regex(ValueError, '.+ is too specific'): df.select_dtypes(include=['datetime64[D]']) - with tm.assertRaisesRegexp(ValueError, '.+ is too specific'): + with tm.assert_raises_regex(ValueError, '.+ is too specific'): df.select_dtypes(exclude=['datetime64[as]']) def test_select_dtypes_datetime_with_tz(self): @@ -251,11 +255,11 @@ def test_select_dtypes_str_raises(self): except NameError: pass for dt in string_dtypes: - with tm.assertRaisesRegexp(TypeError, - 'string dtypes are not allowed'): + with tm.assert_raises_regex(TypeError, + 'string dtypes are not allowed'): df.select_dtypes(include=[dt]) - with tm.assertRaisesRegexp(TypeError, - 'string dtypes are not allowed'): + with tm.assert_raises_regex(TypeError, + 'string dtypes are not allowed'): df.select_dtypes(exclude=[dt]) def test_select_dtypes_bad_arg_raises(self): @@ -266,7 +270,8 @@ def test_select_dtypes_bad_arg_raises(self): 'd': np.arange(4.0, 7.0, dtype='float64'), 'e': [True, False, True], 'f': pd.date_range('now', periods=3).values}) - with tm.assertRaisesRegexp(TypeError, 'data type.*not understood'): + with tm.assert_raises_regex(TypeError, 'data type.' + '*not understood'): df.select_dtypes(['blargy, blarg, blarg']) def test_select_dtypes_typecodes(self): @@ -396,7 +401,7 @@ def test_astype_cast_nan_inf_int(self): for this_type in types: for this_val in values: df = DataFrame([this_val]) - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): df.astype(this_type) def test_astype_str(self): diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py index 5f8d04fdb16bd..be4e69fe99a4e 100644 --- a/pandas/tests/frame/test_indexing.py +++ b/pandas/tests/frame/test_indexing.py @@ -28,8 +28,7 @@ is_scalar) from pandas.util.testing import (assert_almost_equal, assert_series_equal, - assert_frame_equal, - assertRaisesRegexp) + assert_frame_equal) from pandas.core.indexing import IndexingError import pandas.util.testing as tm @@ -53,7 +52,7 @@ def test_getitem(self): assert self.frame[key] is not None assert 'random' not in self.frame - with assertRaisesRegexp(KeyError, 'random'): + with tm.assert_raises_regex(KeyError, 'random'): self.frame['random'] df = self.frame.copy() @@ -116,9 +115,9 @@ def test_getitem_list(self): self.assertEqual(result.columns.name, 'foo') - with assertRaisesRegexp(KeyError, 'not in index'): + with tm.assert_raises_regex(KeyError, 'not in index'): self.frame[['B', 'A', 'food']] - with assertRaisesRegexp(KeyError, 'not in index'): + with tm.assert_raises_regex(KeyError, 'not in index'): self.frame[Index(['B', 'A', 'foo'])] # tuples @@ -152,12 +151,13 @@ def test_setitem_list(self): assert_series_equal(self.frame['B'], data['A'], check_names=False) assert_series_equal(self.frame['A'], data['B'], check_names=False) - with assertRaisesRegexp(ValueError, - 'Columns must be same length as key'): + with tm.assert_raises_regex(ValueError, + 'Columns must be same length as key'): data[['A']] = self.frame[['A', 'B']] - with assertRaisesRegexp(ValueError, 'Length of values does not match ' - 'length of index'): + with tm.assert_raises_regex(ValueError, 'Length of values ' + 'does not match ' + 'length of index'): data['A'] = range(len(data.index) - 1) df = DataFrame(0, lrange(3), ['tt1', 'tt2'], dtype=np.int_) @@ -239,13 +239,13 @@ def test_getitem_boolean(self): subframe = self.tsframe[indexer] tm.assert_index_equal(subindex, subframe.index) - with assertRaisesRegexp(ValueError, 'Item wrong length'): + with tm.assert_raises_regex(ValueError, 'Item wrong length'): self.tsframe[indexer[:-1]] subframe_obj = self.tsframe[indexer_obj] assert_frame_equal(subframe_obj, subframe) - with tm.assertRaisesRegexp(ValueError, 'boolean values only'): + with tm.assert_raises_regex(ValueError, 'boolean values only'): self.tsframe[self.tsframe] # test that Series work @@ -522,8 +522,9 @@ def test_setitem_boolean(self): values[values == 2] = 3 assert_almost_equal(df.values, values) - with assertRaisesRegexp(TypeError, 'Must pass DataFrame with boolean ' - 'values only'): + with tm.assert_raises_regex(TypeError, 'Must pass ' + 'DataFrame with ' + 'boolean values only'): df[df * 0] = 2 # index with DataFrame @@ -1350,7 +1351,7 @@ def test_getitem_fancy_ints(self): def test_getitem_setitem_fancy_exceptions(self): ix = self.frame.iloc - with assertRaisesRegexp(IndexingError, 'Too many indexers'): + with tm.assert_raises_regex(IndexingError, 'Too many indexers'): ix[:, :, :] with pytest.raises(IndexingError): @@ -1664,7 +1665,7 @@ def testit(df): with pytest.raises(KeyError): self.frame.lookup([self.frame.index[0]], ['xyz']) - with tm.assertRaisesRegexp(ValueError, 'same size'): + with tm.assert_raises_regex(ValueError, 'same size'): self.frame.lookup(['a', 'b', 'c'], ['a']) def test_set_value(self): @@ -2289,7 +2290,7 @@ def test_boolean_indexing(self): df1[df1 > 2.0 * df2] = -1 assert_frame_equal(df1, expected) - with assertRaisesRegexp(ValueError, 'Item wrong length'): + with tm.assert_raises_regex(ValueError, 'Item wrong length'): df1[df1.index[:-1] > 2] = -1 def test_boolean_indexing_mixed(self): @@ -2320,7 +2321,8 @@ def test_boolean_indexing_mixed(self): assert_frame_equal(df2, expected) df['foo'] = 'test' - with tm.assertRaisesRegexp(TypeError, 'boolean setting on mixed-type'): + with tm.assert_raises_regex(TypeError, 'boolean setting ' + 'on mixed-type'): df[df > 0.3] = 1 def test_where(self): @@ -2498,7 +2500,7 @@ def test_where_invalid_input(self): ] for cond in conds: - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): df.where(cond) df['b'] = 2 @@ -2514,7 +2516,7 @@ def test_where_invalid_input(self): ] for cond in conds: - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): df.where(cond) def test_where_dataframe_col_match(self): @@ -2527,7 +2529,7 @@ def test_where_dataframe_col_match(self): cond.columns = ["a", "b", "c"] # Columns no longer match. msg = "Boolean array expected for the condition" - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): df.where(cond) def test_where_ndframe_align(self): @@ -2535,7 +2537,7 @@ def test_where_ndframe_align(self): df = DataFrame([[1, 2, 3], [4, 5, 6]]) cond = [True] - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): df.where(cond) expected = DataFrame([[1, 2, 3], [np.nan, np.nan, np.nan]]) @@ -2544,7 +2546,7 @@ def test_where_ndframe_align(self): tm.assert_frame_equal(out, expected) cond = np.array([False, True, False, True]) - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): df.where(cond) expected = DataFrame([[np.nan, np.nan, np.nan], [4, 5, 6]]) @@ -2632,7 +2634,8 @@ def test_where_none(self): df = DataFrame([{'A': 1, 'B': np.nan, 'C': 'Test'}, { 'A': np.nan, 'B': 'Test', 'C': np.nan}]) expected = df.where(~isnull(df), None) - with tm.assertRaisesRegexp(TypeError, 'boolean setting on mixed-type'): + with tm.assert_raises_regex(TypeError, 'boolean setting ' + 'on mixed-type'): df.where(~isnull(df), None, inplace=True) def test_where_align(self): @@ -2890,7 +2893,7 @@ def test_type_error_multiindex(self): dg = df.pivot_table(index='i', columns='c', values=['x', 'y']) - with assertRaisesRegexp(TypeError, "is an invalid key"): + with tm.assert_raises_regex(TypeError, "is an invalid key"): str(dg[:, 0]) index = Index(range(2), name='i') diff --git a/pandas/tests/frame/test_join.py b/pandas/tests/frame/test_join.py index f7a510023ca07..21807cb42aa6e 100644 --- a/pandas/tests/frame/test_join.py +++ b/pandas/tests/frame/test_join.py @@ -86,12 +86,13 @@ def test_join_index(frame): tm.assert_index_equal(joined.index, frame.index.sort_values()) tm.assert_index_equal(joined.columns, expected_columns) - tm.assertRaisesRegexp(ValueError, 'join method', f.join, f2, how='foo') + tm.assert_raises_regex( + ValueError, 'join method', f.join, f2, how='foo') # corner case - overlapping columns for how in ('outer', 'left', 'inner'): - with tm.assertRaisesRegexp(ValueError, 'columns overlap but ' - 'no suffix'): + with tm.assert_raises_regex(ValueError, 'columns overlap but ' + 'no suffix'): frame.join(frame, how=how) @@ -122,7 +123,7 @@ def test_join_index_series(frame): tm.assert_frame_equal(joined, frame, check_names=False) s.name = None - tm.assertRaisesRegexp(ValueError, 'must have a name', df.join, s) + tm.assert_raises_regex(ValueError, 'must have a name', df.join, s) def test_join_overlap(frame): diff --git a/pandas/tests/frame/test_missing.py b/pandas/tests/frame/test_missing.py index 74f1d3292fa4e..721cee7f3141b 100644 --- a/pandas/tests/frame/test_missing.py +++ b/pandas/tests/frame/test_missing.py @@ -13,9 +13,7 @@ date_range) import pandas as pd -from pandas.util.testing import (assert_series_equal, - assert_frame_equal, - assertRaisesRegexp) +from pandas.util.testing import assert_series_equal, assert_frame_equal import pandas.util.testing as tm from pandas.tests.frame.common import TestData, _check_mixed_float @@ -439,7 +437,8 @@ def test_fillna_dict_series(self): assert_frame_equal(result, expected) # disable this for now - with assertRaisesRegexp(NotImplementedError, 'column by column'): + with tm.assert_raises_regex(NotImplementedError, + 'column by column'): df.fillna(df.max(1), axis=1) def test_fillna_dataframe(self): @@ -479,7 +478,7 @@ def test_fillna_columns(self): assert_frame_equal(result, expected) def test_fillna_invalid_method(self): - with assertRaisesRegexp(ValueError, 'ffil'): + with tm.assert_raises_regex(ValueError, 'ffil'): self.frame.fillna(method='ffil') def test_fillna_invalid_value(self): diff --git a/pandas/tests/frame/test_mutate_columns.py b/pandas/tests/frame/test_mutate_columns.py index dfaeaea49cf75..b82a549bae3a0 100644 --- a/pandas/tests/frame/test_mutate_columns.py +++ b/pandas/tests/frame/test_mutate_columns.py @@ -7,7 +7,7 @@ from pandas import DataFrame, Series, Index, MultiIndex -from pandas.util.testing import assert_frame_equal, assertRaisesRegexp +from pandas.util.testing import assert_frame_equal import pandas.util.testing as tm @@ -91,7 +91,7 @@ def test_insert_error_msmgs(self): s = DataFrame({'foo': ['a', 'b', 'c', 'a'], 'fiz': [ 'g', 'h', 'i', 'j']}).set_index('foo') msg = 'cannot reindex from a duplicate axis' - with assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): df['newcol'] = s # GH 4107, more descriptive error message @@ -99,7 +99,7 @@ def test_insert_error_msmgs(self): columns=['a', 'b', 'c', 'd']) msg = 'incompatible index of inserted column with frame index' - with assertRaisesRegexp(TypeError, msg): + with tm.assert_raises_regex(TypeError, msg): df['gr'] = df.groupby(['b', 'c']).count() def test_insert_benchmark(self): @@ -143,7 +143,7 @@ def test_insert(self): result = Series(dict(float64=4, float32=2, int32=1)) self.assertTrue((df.get_dtype_counts() == result).all()) - with assertRaisesRegexp(ValueError, 'already exists'): + with tm.assert_raises_regex(ValueError, 'already exists'): df.insert(1, 'a', df['b']) pytest.raises(ValueError, df.insert, 1, 'c', df['b']) diff --git a/pandas/tests/frame/test_nonunique_indexes.py b/pandas/tests/frame/test_nonunique_indexes.py index 9d5a99eaf2e9d..5c141b6a46eec 100644 --- a/pandas/tests/frame/test_nonunique_indexes.py +++ b/pandas/tests/frame/test_nonunique_indexes.py @@ -9,9 +9,7 @@ from pandas import DataFrame, Series, MultiIndex, date_range import pandas as pd -from pandas.util.testing import (assert_series_equal, - assert_frame_equal, - assertRaisesRegexp) +from pandas.util.testing import assert_series_equal, assert_frame_equal import pandas.util.testing as tm @@ -53,7 +51,7 @@ def check(result, expected=None): [2, 1, 3, 5, 'bah']], columns=['foo', 'bar', 'foo', 'hello', 'string']) check(df, expected) - with assertRaisesRegexp(ValueError, 'Length of value'): + with tm.assert_raises_regex(ValueError, 'Length of value'): df.insert(0, 'AnotherColumn', range(len(df.index) - 1)) # insert same dtype @@ -103,8 +101,8 @@ def check(result, expected=None): check(df, expected) # insert a dup - assertRaisesRegexp(ValueError, 'cannot insert', - df.insert, 2, 'new_col', 4.) + tm.assert_raises_regex(ValueError, 'cannot insert', + df.insert, 2, 'new_col', 4.) df.insert(2, 'new_col', 4., allow_duplicates=True) expected = DataFrame([[1, 1, 4., 5., 'bah', 3], [1, 2, 4., 5., 'bah', 3], diff --git a/pandas/tests/frame/test_operators.py b/pandas/tests/frame/test_operators.py index 3f77bc754a525..d90e859509454 100644 --- a/pandas/tests/frame/test_operators.py +++ b/pandas/tests/frame/test_operators.py @@ -20,8 +20,7 @@ from pandas.util.testing import (assert_numpy_array_equal, assert_series_equal, - assert_frame_equal, - assertRaisesRegexp) + assert_frame_equal) import pandas.util.testing as tm @@ -423,10 +422,10 @@ def test_arith_flex_frame(self): # ndim >= 3 ndim_5 = np.ones(self.frame.shape + (3, 4, 5)) msg = "Unable to coerce to Series/DataFrame" - with assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): f(self.frame, ndim_5) - with assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): getattr(self.frame, op)(ndim_5) # res_add = self.frame.add(self.frame) @@ -448,9 +447,9 @@ def test_arith_flex_frame(self): result = self.frame[:0].add(self.frame) assert_frame_equal(result, self.frame * np.nan) - with assertRaisesRegexp(NotImplementedError, 'fill_value'): + with tm.assert_raises_regex(NotImplementedError, 'fill_value'): self.frame.add(self.frame.iloc[0], fill_value=3) - with assertRaisesRegexp(NotImplementedError, 'fill_value'): + with tm.assert_raises_regex(NotImplementedError, 'fill_value'): self.frame.add(self.frame.iloc[0], axis='index', fill_value=3) def test_binary_ops_align(self): @@ -589,7 +588,7 @@ def _check_unaligned_frame(meth, op, df, other): # NAs msg = "Unable to coerce to Series/DataFrame" assert_frame_equal(f(np.nan), o(df, np.nan)) - with assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): f(ndim_5) # Series @@ -921,8 +920,8 @@ def test_comp(func): result = func(df1, df2) tm.assert_numpy_array_equal(result.values, func(df1.values, df2.values)) - with tm.assertRaisesRegexp(ValueError, - 'Wrong number of dimensions'): + with tm.assert_raises_regex(ValueError, + 'Wrong number of dimensions'): func(df1, ndim_5) result2 = func(self.simple, row) @@ -933,9 +932,9 @@ def test_comp(func): tm.assert_numpy_array_equal(result3.values, func(self.frame.values, 0)) - with tm.assertRaisesRegexp(ValueError, - 'Can only compare identically' - '-labeled DataFrame'): + with tm.assert_raises_regex(ValueError, + 'Can only compare identically' + '-labeled DataFrame'): func(self.simple, self.simple[:2]) test_comp(operator.eq) @@ -1179,10 +1178,10 @@ def test_alignment_non_pandas(self): # length mismatch msg = 'Unable to coerce to Series, length must be 3: given 2' for val in [[1, 2], (1, 2), np.array([1, 2])]: - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): align(df, val, 'index') - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): align(df, val, 'columns') val = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) @@ -1196,10 +1195,10 @@ def test_alignment_non_pandas(self): # shape mismatch msg = 'Unable to coerce to DataFrame, shape must be' val = np.array([[1, 2, 3], [4, 5, 6]]) - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): align(df, val, 'index') - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): align(df, val, 'columns') val = np.zeros((3, 3, 3)) diff --git a/pandas/tests/frame/test_period.py b/pandas/tests/frame/test_period.py index c1b5a92725868..194b6c0e251bc 100644 --- a/pandas/tests/frame/test_period.py +++ b/pandas/tests/frame/test_period.py @@ -106,7 +106,8 @@ def _get_with_delta(delta, freq='A-DEC'): tm.assert_index_equal(result.columns, exp_index) # invalid axis - tm.assertRaisesRegexp(ValueError, 'axis', df.to_timestamp, axis=2) + tm.assert_raises_regex( + ValueError, 'axis', df.to_timestamp, axis=2) result1 = df.to_timestamp('5t', axis=1) result2 = df.to_timestamp('t', axis=1) diff --git a/pandas/tests/frame/test_quantile.py b/pandas/tests/frame/test_quantile.py index 1a5ba3ccad400..406f8107952ef 100644 --- a/pandas/tests/frame/test_quantile.py +++ b/pandas/tests/frame/test_quantile.py @@ -9,9 +9,7 @@ from pandas import (DataFrame, Series, Timestamp, _np_version_under1p11) import pandas as pd -from pandas.util.testing import (assert_series_equal, - assert_frame_equal, - assertRaisesRegexp) +from pandas.util.testing import assert_series_equal, assert_frame_equal import pandas.util.testing as tm from pandas import _np_version_under1p9 @@ -189,21 +187,21 @@ def test_quantile_interpolation_np_lt_1p9(self): # interpolation method other than default linear expErrMsg = "Interpolation methods other than linear" df = DataFrame({"A": [1, 2, 3], "B": [2, 3, 4]}, index=[1, 2, 3]) - with assertRaisesRegexp(ValueError, expErrMsg): + with tm.assert_raises_regex(ValueError, expErrMsg): df.quantile(.5, axis=1, interpolation='nearest') - with assertRaisesRegexp(ValueError, expErrMsg): + with tm.assert_raises_regex(ValueError, expErrMsg): df.quantile([.5, .75], axis=1, interpolation='lower') # test degenerate case df = DataFrame({'x': [], 'y': []}) - with assertRaisesRegexp(ValueError, expErrMsg): + with tm.assert_raises_regex(ValueError, expErrMsg): q = df.quantile(0.1, axis=0, interpolation='higher') # multi df = DataFrame([[1, 1, 1], [2, 2, 2], [3, 3, 3]], columns=['a', 'b', 'c']) - with assertRaisesRegexp(ValueError, expErrMsg): + with tm.assert_raises_regex(ValueError, expErrMsg): df.quantile([.25, .5], interpolation='midpoint') def test_quantile_multi(self): @@ -268,7 +266,7 @@ def test_quantile_datetime(self): def test_quantile_invalid(self): msg = 'percentiles should all be in the interval \\[0, 1\\]' for invalid in [-1, 2, [0.5, -1], [0.5, 2]]: - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): self.tsframe.quantile(invalid) def test_quantile_box(self): diff --git a/pandas/tests/frame/test_query_eval.py b/pandas/tests/frame/test_query_eval.py index b787d63d3c754..2232205a57326 100644 --- a/pandas/tests/frame/test_query_eval.py +++ b/pandas/tests/frame/test_query_eval.py @@ -138,10 +138,10 @@ def test_query_non_str(self): df = pd.DataFrame({'A': [1, 2, 3], 'B': ['a', 'b', 'b']}) msg = "expr must be a string to be evaluated" - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): df.query(lambda x: x.B == "b") - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): df.query(111) def test_query_empty_string(self): @@ -149,7 +149,7 @@ def test_query_empty_string(self): df = pd.DataFrame({'A': [1, 2, 3]}) msg = "expr cannot be an empty string" - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): df.query('') def test_eval_resolvers_as_list(self): @@ -552,8 +552,8 @@ def test_query_builtin(self): df = DataFrame(np.random.randint(m, size=(n, 3)), columns=list('abc')) df.index.name = 'sin' - with tm.assertRaisesRegexp(NumExprClobberingError, - 'Variables in expression.+'): + with tm.assert_raises_regex(NumExprClobberingError, + 'Variables in expression.+'): df.query('sin > 5', engine=engine, parser=parser) def test_query(self): @@ -686,8 +686,8 @@ def test_query_undefined_local(self): engine, parser = self.engine, self.parser skip_if_no_pandas_parser(parser) df = DataFrame(np.random.rand(10, 2), columns=list('ab')) - with tm.assertRaisesRegexp(UndefinedVariableError, - "local variable 'c' is not defined"): + with tm.assert_raises_regex(UndefinedVariableError, + "local variable 'c' is not defined"): df.query('a == @c', engine=engine, parser=parser) def test_index_resolvers_come_after_columns_with_the_same_name(self): @@ -1119,9 +1119,9 @@ def test_invalid_type_for_operator_raises(self): df = DataFrame({'a': [1, 2], 'b': ['c', 'd']}) ops = '+', '-', '*', '/' for op in ops: - with tm.assertRaisesRegexp(TypeError, - r"unsupported operand type\(s\) for " - r".+: '.+' and '.+'"): + with tm.assert_raises_regex(TypeError, + "unsupported operand type\(s\) " + "for .+: '.+' and '.+'"): df.eval('a {0} b'.format(op), engine=self.engine, parser=self.parser) diff --git a/pandas/tests/frame/test_replace.py b/pandas/tests/frame/test_replace.py index bb2baaf0e02e2..262734d093d4e 100644 --- a/pandas/tests/frame/test_replace.py +++ b/pandas/tests/frame/test_replace.py @@ -918,7 +918,7 @@ def test_replace_bool_with_bool(self): def test_replace_with_dict_with_bool_keys(self): df = DataFrame({0: [True, False], 1: [False, True]}) - with tm.assertRaisesRegexp(TypeError, 'Cannot compare types .+'): + with tm.assert_raises_regex(TypeError, 'Cannot compare types .+'): df.replace({'asdf': 'asdb', True: 'yes'}) def test_replace_truthy(self): @@ -929,7 +929,8 @@ def test_replace_truthy(self): def test_replace_int_to_int_chain(self): df = DataFrame({'a': lrange(1, 5)}) - with tm.assertRaisesRegexp(ValueError, "Replacement not allowed .+"): + with tm.assert_raises_regex(ValueError, + "Replacement not allowed .+"): df.replace({'a': dict(zip(range(1, 5), range(2, 6)))}) def test_replace_str_to_str_chain(self): @@ -937,7 +938,8 @@ def test_replace_str_to_str_chain(self): astr = a.astype(str) bstr = np.arange(2, 6).astype(str) df = DataFrame({'a': astr}) - with tm.assertRaisesRegexp(ValueError, "Replacement not allowed .+"): + with tm.assert_raises_regex(ValueError, + "Replacement not allowed .+"): df.replace({'a': dict(zip(astr, bstr))}) def test_replace_swapping_bug(self): diff --git a/pandas/tests/frame/test_reshape.py b/pandas/tests/frame/test_reshape.py index 60f19b7e6c87b..c1905fa0476c4 100644 --- a/pandas/tests/frame/test_reshape.py +++ b/pandas/tests/frame/test_reshape.py @@ -17,9 +17,7 @@ Timedelta, Period) import pandas as pd -from pandas.util.testing import (assert_series_equal, - assert_frame_equal, - assertRaisesRegexp) +from pandas.util.testing import assert_series_equal, assert_frame_equal import pandas.util.testing as tm @@ -67,7 +65,7 @@ def test_pivot_duplicates(self): data = DataFrame({'a': ['bar', 'bar', 'foo', 'foo', 'foo'], 'b': ['one', 'two', 'one', 'one', 'two'], 'c': [1., 2., 3., 3., 4.]}) - with assertRaisesRegexp(ValueError, 'duplicate entries'): + with tm.assert_raises_regex(ValueError, 'duplicate entries'): data.pivot('a', 'b', 'c') def test_pivot_empty(self): diff --git a/pandas/tests/frame/test_sorting.py b/pandas/tests/frame/test_sorting.py index f1bca0d6a99b8..bdb5fd0e8354c 100644 --- a/pandas/tests/frame/test_sorting.py +++ b/pandas/tests/frame/test_sorting.py @@ -11,9 +11,7 @@ from pandas import (DataFrame, Series, MultiIndex, Timestamp, date_range, NaT, IntervalIndex) -from pandas.util.testing import (assert_series_equal, - assert_frame_equal, - assertRaisesRegexp) +from pandas.util.testing import assert_series_equal, assert_frame_equal import pandas.util.testing as tm @@ -88,7 +86,7 @@ def test_sort_values(self): assert_frame_equal(sorted_df, expected) msg = r'Length of ascending \(5\) != length of by \(2\)' - with assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): frame.sort_values(by=['A', 'B'], axis=0, ascending=[True] * 5) def test_sort_values_inplace(self): @@ -264,7 +262,7 @@ def test_sort_datetimes(self): def test_frame_column_inplace_sort_exception(self): s = self.frame['A'] - with assertRaisesRegexp(ValueError, "This Series is a view"): + with tm.assert_raises_regex(ValueError, "This Series is a view"): s.sort_values(inplace=True) cp = s.copy() @@ -420,26 +418,26 @@ def test_sort_index_duplicates(self): df = DataFrame([lrange(5, 9), lrange(4)], columns=['a', 'a', 'b', 'b']) - with assertRaisesRegexp(ValueError, 'duplicate'): + with tm.assert_raises_regex(ValueError, 'duplicate'): # use .sort_values #9816 with tm.assert_produces_warning(FutureWarning): df.sort_index(by='a') - with assertRaisesRegexp(ValueError, 'duplicate'): + with tm.assert_raises_regex(ValueError, 'duplicate'): df.sort_values(by='a') - with assertRaisesRegexp(ValueError, 'duplicate'): + with tm.assert_raises_regex(ValueError, 'duplicate'): # use .sort_values #9816 with tm.assert_produces_warning(FutureWarning): df.sort_index(by=['a']) - with assertRaisesRegexp(ValueError, 'duplicate'): + with tm.assert_raises_regex(ValueError, 'duplicate'): df.sort_values(by=['a']) - with assertRaisesRegexp(ValueError, 'duplicate'): + with tm.assert_raises_regex(ValueError, 'duplicate'): # use .sort_values #9816 with tm.assert_produces_warning(FutureWarning): # multi-column 'by' is separate codepath df.sort_index(by=['a', 'b']) - with assertRaisesRegexp(ValueError, 'duplicate'): + with tm.assert_raises_regex(ValueError, 'duplicate'): # multi-column 'by' is separate codepath df.sort_values(by=['a', 'b']) @@ -447,11 +445,11 @@ def test_sort_index_duplicates(self): # GH4370 df = DataFrame(np.random.randn(4, 2), columns=MultiIndex.from_tuples([('a', 0), ('a', 1)])) - with assertRaisesRegexp(ValueError, 'levels'): + with tm.assert_raises_regex(ValueError, 'levels'): # use .sort_values #9816 with tm.assert_produces_warning(FutureWarning): df.sort_index(by='a') - with assertRaisesRegexp(ValueError, 'levels'): + with tm.assert_raises_regex(ValueError, 'levels'): df.sort_values(by='a') # convert tuples to a list of tuples diff --git a/pandas/tests/frame/test_subclass.py b/pandas/tests/frame/test_subclass.py index 995abfb1ca01a..db4f4b909f7cb 100644 --- a/pandas/tests/frame/test_subclass.py +++ b/pandas/tests/frame/test_subclass.py @@ -156,7 +156,7 @@ class A(DataFrame): @property def bar(self): return self.i_dont_exist - with tm.assertRaisesRegexp(AttributeError, '.*i_dont_exist.*'): + with tm.assert_raises_regex(AttributeError, '.*i_dont_exist.*'): A().bar def test_subclass_align(self): diff --git a/pandas/tests/frame/test_timeseries.py b/pandas/tests/frame/test_timeseries.py index 090f742a69b63..7a5afa178208a 100644 --- a/pandas/tests/frame/test_timeseries.py +++ b/pandas/tests/frame/test_timeseries.py @@ -16,9 +16,7 @@ import pandas as pd import pandas.tseries.offsets as offsets -from pandas.util.testing import (assert_series_equal, - assert_frame_equal, - assertRaisesRegexp) +from pandas.util.testing import assert_series_equal, assert_frame_equal import pandas.util.testing as tm from pandas.compat import product @@ -220,8 +218,9 @@ def test_shift(self): assert_frame_equal(shifted2, shifted3) assert_frame_equal(ps, shifted2.shift(-1, 'B')) - assertRaisesRegexp(ValueError, 'does not match PeriodIndex freq', - ps.shift, freq='D') + tm.assert_raises_regex(ValueError, + 'does not match PeriodIndex freq', + ps.shift, freq='D') # shift other axis # GH 6371 @@ -281,7 +280,8 @@ def test_tshift(self): shifted3 = ps.tshift(freq=offsets.BDay()) assert_frame_equal(shifted, shifted3) - assertRaisesRegexp(ValueError, 'does not match', ps.tshift, freq='M') + tm.assert_raises_regex( + ValueError, 'does not match', ps.tshift, freq='M') # DatetimeIndex shifted = self.tsframe.tshift(1) diff --git a/pandas/tests/frame/test_to_csv.py b/pandas/tests/frame/test_to_csv.py index 0fd1df0b733f8..ffce525434ab5 100644 --- a/pandas/tests/frame/test_to_csv.py +++ b/pandas/tests/frame/test_to_csv.py @@ -17,9 +17,8 @@ from pandas.util.testing import (assert_almost_equal, assert_series_equal, assert_frame_equal, - ensure_clean, - makeCustomDataframe as mkdf, - assertRaisesRegexp, slow) + ensure_clean, slow, + makeCustomDataframe as mkdf) import pandas.util.testing as tm from pandas.tests.frame.common import TestData @@ -588,13 +587,13 @@ def _make_frame(names=None): for i in [6, 7]: msg = 'len of {i}, but only 5 lines in file'.format(i=i) - with assertRaisesRegexp(ParserError, msg): + with tm.assert_raises_regex(ParserError, msg): read_csv(path, tupleize_cols=False, header=lrange(i), index_col=0) # write with cols - with assertRaisesRegexp(TypeError, 'cannot specify cols with a ' - 'MultiIndex'): + with tm.assert_raises_regex(TypeError, 'cannot specify cols ' + 'with a MultiIndex'): df.to_csv(path, tupleize_cols=False, columns=['foo', 'bar']) with ensure_clean('__tmp_to_csv_multiindex__') as path: @@ -1106,11 +1105,11 @@ def test_to_csv_quoting(self): self.assertEqual(result, expected) msg = "need to escape, but no escapechar set" - tm.assertRaisesRegexp(csv.Error, msg, df.to_csv, - quoting=csv.QUOTE_NONE) - tm.assertRaisesRegexp(csv.Error, msg, df.to_csv, - quoting=csv.QUOTE_NONE, - escapechar=None) + tm.assert_raises_regex(csv.Error, msg, df.to_csv, + quoting=csv.QUOTE_NONE) + tm.assert_raises_regex(csv.Error, msg, df.to_csv, + quoting=csv.QUOTE_NONE, + escapechar=None) expected = """\ ,c_bool,c_float,c_int,c_string diff --git a/pandas/tests/groupby/test_aggregate.py b/pandas/tests/groupby/test_aggregate.py index e32dbb7846061..e3f166d2294e2 100644 --- a/pandas/tests/groupby/test_aggregate.py +++ b/pandas/tests/groupby/test_aggregate.py @@ -577,7 +577,8 @@ def test_cython_agg_nothing_to_agg_with_dates(self): 'b': ['foo', 'bar'] * 25, 'dates': pd.date_range('now', periods=50, freq='T')}) - with tm.assertRaisesRegexp(DataError, "No numeric types to aggregate"): + with tm.assert_raises_regex(DataError, + "No numeric types to aggregate"): frame.groupby('b').dates.mean() def test_cython_agg_frame_columns(self): diff --git a/pandas/tests/groupby/test_filters.py b/pandas/tests/groupby/test_filters.py index 5f39f320b1ea1..2cfbe0ab68c8e 100644 --- a/pandas/tests/groupby/test_filters.py +++ b/pandas/tests/groupby/test_filters.py @@ -578,7 +578,8 @@ def test_filter_enforces_scalarness(self): ['worst', 'd', 'y'], ['best', 'd', 'z'], ], columns=['a', 'b', 'c']) - with tm.assertRaisesRegexp(TypeError, 'filter function returned a.*'): + with tm.assert_raises_regex(TypeError, + 'filter function returned a.*'): df.groupby('c').filter(lambda g: g['a'] == 'best') def test_filter_non_bool_raises(self): @@ -591,7 +592,8 @@ def test_filter_non_bool_raises(self): ['worst', 'd', 1], ['best', 'd', 1], ], columns=['a', 'b', 'c']) - with tm.assertRaisesRegexp(TypeError, 'filter function returned a.*'): + with tm.assert_raises_regex(TypeError, + 'filter function returned a.*'): df.groupby('a').filter(lambda g: g.c.mean()) def test_filter_dropna_with_empty_groups(self): diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 8ca8ddded3073..177c2345ea143 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -14,7 +14,7 @@ from pandas.errors import UnsupportedFunctionCall, PerformanceWarning from pandas.util.testing import (assert_panel_equal, assert_frame_equal, assert_series_equal, assert_almost_equal, - assert_index_equal, assertRaisesRegexp) + assert_index_equal) from pandas.compat import (range, long, lrange, StringIO, lmap, lzip, map, zip, builtins, OrderedDict, product as cart_product) from pandas import compat @@ -82,7 +82,7 @@ def test_select_bad_cols(self): pytest.raises(KeyError, g.__getitem__, ['C']) # g[['C']] pytest.raises(KeyError, g.__getitem__, ['A', 'C']) # g[['A', 'C']] - with assertRaisesRegexp(KeyError, '^[^A]+$'): + with tm.assert_raises_regex(KeyError, '^[^A]+$'): # A should not be referenced as a bad column... # will have to rethink regex if you change message! g[['A', 'C']] @@ -1874,16 +1874,14 @@ def test_groupby_args(self): def j(): frame.groupby() - tm.assertRaisesRegexp(TypeError, - "You have to supply one of 'by' and 'level'", - j) + tm.assert_raises_regex(TypeError, "You have to supply one of " + "'by' and 'level'", j) def k(): frame.groupby(by=None, level=None) - tm.assertRaisesRegexp(TypeError, - "You have to supply one of 'by' and 'level'", - k) + tm.assert_raises_regex(TypeError, "You have to supply one of " + "'by' and 'level'", k) def test_groupby_level_mapper(self): frame = self.mframe @@ -3753,10 +3751,10 @@ def test_numpy_compat(self): msg = "numpy operations are not valid with groupby" for func in ('mean', 'var', 'std', 'cumprod', 'cumsum'): - tm.assertRaisesRegexp(UnsupportedFunctionCall, msg, - getattr(g, func), 1, 2, 3) - tm.assertRaisesRegexp(UnsupportedFunctionCall, msg, - getattr(g, func), foo=1) + tm.assert_raises_regex(UnsupportedFunctionCall, msg, + getattr(g, func), 1, 2, 3) + tm.assert_raises_regex(UnsupportedFunctionCall, msg, + getattr(g, func), foo=1) def test_grouping_string_repr(self): # GH 13394 diff --git a/pandas/tests/groupby/test_transform.py b/pandas/tests/groupby/test_transform.py index 4624d43df6128..e0d81003e325f 100644 --- a/pandas/tests/groupby/test_transform.py +++ b/pandas/tests/groupby/test_transform.py @@ -556,7 +556,8 @@ def test_transform_with_non_scalar_group(self): df = pd.DataFrame(np.random.randint(1, 10, (4, 12)), columns=cols, index=['A', 'C', 'G', 'T']) - tm.assertRaisesRegexp(ValueError, 'transform must return a scalar ' - 'value for each group.*', df.groupby - (axis=1, level=1).transform, - lambda z: z.div(z.sum(axis=1), axis=0)) + tm.assert_raises_regex(ValueError, 'transform must return ' + 'a scalar value for each ' + 'group.*', + df.groupby(axis=1, level=1).transform, + lambda z: z.div(z.sum(axis=1), axis=0)) diff --git a/pandas/tests/groupby/test_whitelist.py b/pandas/tests/groupby/test_whitelist.py index 5a4f282789eeb..5d131717f8345 100644 --- a/pandas/tests/groupby/test_whitelist.py +++ b/pandas/tests/groupby/test_whitelist.py @@ -223,7 +223,7 @@ def test_groupby_blacklist(df_letters): for obj in (df, s): gb = obj.groupby(df.letters) msg = fmt.format(bl, type(gb).__name__) - with tm.assertRaisesRegexp(AttributeError, msg): + with tm.assert_raises_regex(AttributeError, msg): getattr(gb, bl) diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index b62cab6cc8710..56a9af73e904a 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -13,7 +13,6 @@ notnull, isnull) from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin from pandas.core.dtypes.common import needs_i8_conversion -from pandas.util.testing import assertRaisesRegexp from pandas._libs.tslib import iNaT import pandas.util.testing as tm @@ -91,26 +90,26 @@ def test_create_index_existing_name(self): def test_numeric_compat(self): idx = self.create_index() - tm.assertRaisesRegexp(TypeError, "cannot perform __mul__", - lambda: idx * 1) - tm.assertRaisesRegexp(TypeError, "cannot perform __mul__", - lambda: 1 * idx) + tm.assert_raises_regex(TypeError, "cannot perform __mul__", + lambda: idx * 1) + tm.assert_raises_regex(TypeError, "cannot perform __mul__", + lambda: 1 * idx) div_err = "cannot perform __truediv__" if PY3 \ else "cannot perform __div__" - tm.assertRaisesRegexp(TypeError, div_err, lambda: idx / 1) - tm.assertRaisesRegexp(TypeError, div_err, lambda: 1 / idx) - tm.assertRaisesRegexp(TypeError, "cannot perform __floordiv__", - lambda: idx // 1) - tm.assertRaisesRegexp(TypeError, "cannot perform __floordiv__", - lambda: 1 // idx) + tm.assert_raises_regex(TypeError, div_err, lambda: idx / 1) + tm.assert_raises_regex(TypeError, div_err, lambda: 1 / idx) + tm.assert_raises_regex(TypeError, "cannot perform __floordiv__", + lambda: idx // 1) + tm.assert_raises_regex(TypeError, "cannot perform __floordiv__", + lambda: 1 // idx) def test_logical_compat(self): idx = self.create_index() - tm.assertRaisesRegexp(TypeError, 'cannot perform all', - lambda: idx.all()) - tm.assertRaisesRegexp(TypeError, 'cannot perform any', - lambda: idx.any()) + tm.assert_raises_regex(TypeError, 'cannot perform all', + lambda: idx.all()) + tm.assert_raises_regex(TypeError, 'cannot perform any', + lambda: idx.any()) def test_boolean_context_compat(self): @@ -121,7 +120,7 @@ def f(): if idx: pass - tm.assertRaisesRegexp(ValueError, 'The truth value of a', f) + tm.assert_raises_regex(ValueError, 'The truth value of a', f) def test_reindex_base(self): idx = self.create_index() @@ -130,7 +129,7 @@ def test_reindex_base(self): actual = idx.get_indexer(idx) tm.assert_numpy_array_equal(expected, actual) - with tm.assertRaisesRegexp(ValueError, 'Invalid fill method'): + with tm.assert_raises_regex(ValueError, 'Invalid fill method'): idx.get_indexer(idx, method='invalid') def test_ndarray_compat_properties(self): @@ -178,7 +177,7 @@ def testit(ind): ind.names = ["apple", "banana", "carrot"] for ind in self.indices.values(): - assertRaisesRegexp(ValueError, "^Length", testit, ind) + tm.assert_raises_regex(ValueError, "^Length", testit, ind) def test_set_name_methods(self): new_name = "This is the new name for this index" @@ -198,10 +197,10 @@ def test_set_name_methods(self): assert res is None self.assertEqual(ind.name, new_name) self.assertEqual(ind.names, [new_name]) - # with assertRaisesRegexp(TypeError, "list-like"): + # with tm.assert_raises_regex(TypeError, "list-like"): # # should still fail even if it would be the right length # ind.set_names("a") - with assertRaisesRegexp(ValueError, "Level must be None"): + with tm.assert_raises_regex(ValueError, "Level must be None"): ind.set_names("a", level=0) # rename in place just leaves tuples and other containers alone @@ -212,8 +211,8 @@ def test_set_name_methods(self): def test_hash_error(self): for ind in self.indices.values(): - with tm.assertRaisesRegexp(TypeError, "unhashable type: %r" % - type(ind).__name__): + with tm.assert_raises_regex(TypeError, "unhashable type: %r" % + type(ind).__name__): hash(ind) def test_copy_name(self): @@ -427,16 +426,16 @@ def test_numpy_argsort(self): # backwards compatibility concerns if isinstance(type(ind), (CategoricalIndex, RangeIndex)): msg = "the 'axis' parameter is not supported" - tm.assertRaisesRegexp(ValueError, msg, - np.argsort, ind, axis=1) + tm.assert_raises_regex(ValueError, msg, + np.argsort, ind, axis=1) msg = "the 'kind' parameter is not supported" - tm.assertRaisesRegexp(ValueError, msg, np.argsort, - ind, kind='mergesort') + tm.assert_raises_regex(ValueError, msg, np.argsort, + ind, kind='mergesort') msg = "the 'order' parameter is not supported" - tm.assertRaisesRegexp(ValueError, msg, np.argsort, - ind, order=('a', 'b')) + tm.assert_raises_regex(ValueError, msg, np.argsort, + ind, order=('a', 'b')) def test_pickle(self): for ind in self.indices.values(): @@ -467,16 +466,16 @@ def test_take_invalid_kwargs(self): indices = [1, 2] msg = r"take\(\) got an unexpected keyword argument 'foo'" - tm.assertRaisesRegexp(TypeError, msg, idx.take, - indices, foo=2) + tm.assert_raises_regex(TypeError, msg, idx.take, + indices, foo=2) msg = "the 'out' parameter is not supported" - tm.assertRaisesRegexp(ValueError, msg, idx.take, - indices, out=indices) + tm.assert_raises_regex(ValueError, msg, idx.take, + indices, out=indices) msg = "the 'mode' parameter is not supported" - tm.assertRaisesRegexp(ValueError, msg, idx.take, - indices, mode='clip') + tm.assert_raises_regex(ValueError, msg, idx.take, + indices, mode='clip') def test_repeat(self): rep = 2 @@ -496,8 +495,8 @@ def test_numpy_repeat(self): tm.assert_index_equal(np.repeat(i, rep), expected) msg = "the 'axis' parameter is not supported" - tm.assertRaisesRegexp(ValueError, msg, np.repeat, - i, rep, axis=0) + tm.assert_raises_regex(ValueError, msg, np.repeat, + i, rep, axis=0) def test_where(self): i = self.create_index() @@ -533,9 +532,10 @@ def test_setops_errorcases(self): for method in methods: for case in cases: - assertRaisesRegexp(TypeError, - "Input must be Index or array-like", - method, case) + tm.assert_raises_regex(TypeError, + "Input must be Index " + "or array-like", + method, case) def test_intersection_base(self): for name, idx in compat.iteritems(self.indices): @@ -554,7 +554,7 @@ def test_intersection_base(self): for case in cases: if isinstance(idx, PeriodIndex): msg = "can only call with other PeriodIndex-ed objects" - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): result = first.intersection(case) elif isinstance(idx, CategoricalIndex): pass @@ -564,7 +564,7 @@ def test_intersection_base(self): if isinstance(idx, MultiIndex): msg = "other must be a MultiIndex or a list of tuples" - with tm.assertRaisesRegexp(TypeError, msg): + with tm.assert_raises_regex(TypeError, msg): result = first.intersection([1, 2, 3]) def test_union_base(self): @@ -581,7 +581,7 @@ def test_union_base(self): for case in cases: if isinstance(idx, PeriodIndex): msg = "can only call with other PeriodIndex-ed objects" - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): result = first.union(case) elif isinstance(idx, CategoricalIndex): pass @@ -591,7 +591,7 @@ def test_union_base(self): if isinstance(idx, MultiIndex): msg = "other must be a MultiIndex or a list of tuples" - with tm.assertRaisesRegexp(TypeError, msg): + with tm.assert_raises_regex(TypeError, msg): result = first.union([1, 2, 3]) def test_difference_base(self): @@ -612,7 +612,7 @@ def test_difference_base(self): for case in cases: if isinstance(idx, PeriodIndex): msg = "can only call with other PeriodIndex-ed objects" - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): result = first.difference(case) elif isinstance(idx, CategoricalIndex): pass @@ -625,7 +625,7 @@ def test_difference_base(self): if isinstance(idx, MultiIndex): msg = "other must be a MultiIndex or a list of tuples" - with tm.assertRaisesRegexp(TypeError, msg): + with tm.assert_raises_regex(TypeError, msg): result = first.difference([1, 2, 3]) def test_symmetric_difference(self): @@ -645,7 +645,7 @@ def test_symmetric_difference(self): for case in cases: if isinstance(idx, PeriodIndex): msg = "can only call with other PeriodIndex-ed objects" - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): result = first.symmetric_difference(case) elif isinstance(idx, CategoricalIndex): pass @@ -655,7 +655,7 @@ def test_symmetric_difference(self): if isinstance(idx, MultiIndex): msg = "other must be a MultiIndex or a list of tuples" - with tm.assertRaisesRegexp(TypeError, msg): + with tm.assert_raises_regex(TypeError, msg): result = first.symmetric_difference([1, 2, 3]) # 12591 deprecated @@ -728,7 +728,7 @@ def test_equals_op(self): index_b = index_a[0:-1] index_c = index_a[0:-1].append(index_a[-2:-1]) index_d = index_a[0:1] - with tm.assertRaisesRegexp(ValueError, "Lengths must match"): + with tm.assert_raises_regex(ValueError, "Lengths must match"): index_a == index_b expected1 = np.array([True] * n) expected2 = np.array([True] * (n - 1) + [False]) @@ -740,7 +740,7 @@ def test_equals_op(self): array_b = np.array(index_a[0:-1]) array_c = np.array(index_a[0:-1].append(index_a[-2:-1])) array_d = np.array(index_a[0:1]) - with tm.assertRaisesRegexp(ValueError, "Lengths must match"): + with tm.assert_raises_regex(ValueError, "Lengths must match"): index_a == array_b tm.assert_numpy_array_equal(index_a == array_a, expected1) tm.assert_numpy_array_equal(index_a == array_c, expected2) @@ -750,22 +750,22 @@ def test_equals_op(self): series_b = Series(array_b) series_c = Series(array_c) series_d = Series(array_d) - with tm.assertRaisesRegexp(ValueError, "Lengths must match"): + with tm.assert_raises_regex(ValueError, "Lengths must match"): index_a == series_b tm.assert_numpy_array_equal(index_a == series_a, expected1) tm.assert_numpy_array_equal(index_a == series_c, expected2) # cases where length is 1 for one of them - with tm.assertRaisesRegexp(ValueError, "Lengths must match"): + with tm.assert_raises_regex(ValueError, "Lengths must match"): index_a == index_d - with tm.assertRaisesRegexp(ValueError, "Lengths must match"): + with tm.assert_raises_regex(ValueError, "Lengths must match"): index_a == series_d - with tm.assertRaisesRegexp(ValueError, "Lengths must match"): + with tm.assert_raises_regex(ValueError, "Lengths must match"): index_a == array_d msg = "Can only compare identically-labeled Series objects" - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): series_a == series_d - with tm.assertRaisesRegexp(ValueError, "Lengths must match"): + with tm.assert_raises_regex(ValueError, "Lengths must match"): series_a == array_d # comparing with a scalar should broadcast; note that we are excluding @@ -875,7 +875,7 @@ def test_fillna(self): elif isinstance(index, MultiIndex): idx = index.copy() msg = "isnull is not defined for MultiIndex" - with tm.assertRaisesRegexp(NotImplementedError, msg): + with tm.assert_raises_regex(NotImplementedError, msg): idx.fillna(idx[0]) else: idx = index.copy() @@ -884,7 +884,7 @@ def test_fillna(self): self.assertFalse(result is idx) msg = "'value' must be a scalar, passed: " - with tm.assertRaisesRegexp(TypeError, msg): + with tm.assert_raises_regex(TypeError, msg): idx.fillna([idx[0]]) idx = index.copy() @@ -918,7 +918,7 @@ def test_nulls(self): elif isinstance(index, MultiIndex): idx = index.copy() msg = "isnull is not defined for MultiIndex" - with tm.assertRaisesRegexp(NotImplementedError, msg): + with tm.assert_raises_regex(NotImplementedError, msg): idx.isnull() else: diff --git a/pandas/tests/indexes/datetimes/test_construction.py b/pandas/tests/indexes/datetimes/test_construction.py index ca673e3059ea2..ea9f7c65fb49b 100644 --- a/pandas/tests/indexes/datetimes/test_construction.py +++ b/pandas/tests/indexes/datetimes/test_construction.py @@ -246,7 +246,8 @@ def test_construction_dti_with_mixed_timezones(self): Timestamp('2011-01-02 10:00', tz='US/Eastern')], name='idx') - with tm.assertRaisesRegexp(TypeError, 'data is already tz-aware'): + with tm.assert_raises_regex(TypeError, + 'data is already tz-aware'): DatetimeIndex([Timestamp('2011-01-01 10:00'), Timestamp('2011-01-02 10:00', tz='US/Eastern')], tz='Asia/Tokyo', name='idx') @@ -256,7 +257,8 @@ def test_construction_dti_with_mixed_timezones(self): Timestamp('2011-01-02 10:00', tz='US/Eastern')], tz='US/Eastern', name='idx') - with tm.assertRaisesRegexp(TypeError, 'data is already tz-aware'): + with tm.assert_raises_regex(TypeError, + 'data is already tz-aware'): # passing tz should results in DatetimeIndex, then mismatch raises # TypeError Index([pd.NaT, Timestamp('2011-01-01 10:00'), diff --git a/pandas/tests/indexes/datetimes/test_date_range.py b/pandas/tests/indexes/datetimes/test_date_range.py index 6e49b1612b4c5..e570313b716cb 100644 --- a/pandas/tests/indexes/datetimes/test_date_range.py +++ b/pandas/tests/indexes/datetimes/test_date_range.py @@ -212,31 +212,31 @@ def test_naive_aware_conflicts(self): naive = bdate_range(START, END, freq=BDay(), tz=None) aware = bdate_range(START, END, freq=BDay(), tz="Asia/Hong_Kong") - tm.assertRaisesRegexp(TypeError, "tz-naive.*tz-aware", - naive.join, aware) - tm.assertRaisesRegexp(TypeError, "tz-naive.*tz-aware", - aware.join, naive) + tm.assert_raises_regex(TypeError, "tz-naive.*tz-aware", + naive.join, aware) + tm.assert_raises_regex(TypeError, "tz-naive.*tz-aware", + aware.join, naive) def test_cached_range(self): DatetimeIndex._cached_range(START, END, offset=BDay()) DatetimeIndex._cached_range(START, periods=20, offset=BDay()) DatetimeIndex._cached_range(end=START, periods=20, offset=BDay()) - tm.assertRaisesRegexp(TypeError, "offset", - DatetimeIndex._cached_range, - START, END) + tm.assert_raises_regex(TypeError, "offset", + DatetimeIndex._cached_range, + START, END) - tm.assertRaisesRegexp(TypeError, "specify period", - DatetimeIndex._cached_range, START, - offset=BDay()) + tm.assert_raises_regex(TypeError, "specify period", + DatetimeIndex._cached_range, START, + offset=BDay()) - tm.assertRaisesRegexp(TypeError, "specify period", - DatetimeIndex._cached_range, end=END, - offset=BDay()) + tm.assert_raises_regex(TypeError, "specify period", + DatetimeIndex._cached_range, end=END, + offset=BDay()) - tm.assertRaisesRegexp(TypeError, "start or end", - DatetimeIndex._cached_range, periods=20, - offset=BDay()) + tm.assert_raises_regex(TypeError, "start or end", + DatetimeIndex._cached_range, periods=20, + offset=BDay()) def test_cached_range_bug(self): rng = date_range('2010-09-01 05:00:00', periods=50, diff --git a/pandas/tests/indexes/datetimes/test_datetime.py b/pandas/tests/indexes/datetimes/test_datetime.py index abfc52728ef0f..8a4cff2974b0d 100644 --- a/pandas/tests/indexes/datetimes/test_datetime.py +++ b/pandas/tests/indexes/datetimes/test_datetime.py @@ -40,7 +40,7 @@ def test_get_loc(self): tolerance=np.timedelta64(1, 'D')), 1) self.assertEqual(idx.get_loc('2000-01-01T12', method='nearest', tolerance=timedelta(1)), 1) - with tm.assertRaisesRegexp(ValueError, 'must be convertible'): + with tm.assert_raises_regex(ValueError, 'must be convertible'): idx.get_loc('2000-01-01T12', method='nearest', tolerance='foo') with pytest.raises(KeyError): idx.get_loc('2000-01-01T03', method='nearest', tolerance='2 hours') @@ -212,8 +212,8 @@ def test_week_of_month_frequency(self): def test_hash_error(self): index = date_range('20010101', periods=10) - with tm.assertRaisesRegexp(TypeError, "unhashable type: %r" % - type(index).__name__): + with tm.assert_raises_regex(TypeError, "unhashable type: %r" % + type(index).__name__): hash(index) def test_stringified_slice_with_tz(self): @@ -508,9 +508,9 @@ def test_take_fill_value(self): msg = ('When allow_fill=True and fill_value is not None, ' 'all indices must be >= -1') - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): idx.take(np.array([1, 0, -2]), fill_value=True) - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): idx.take(np.array([1, 0, -5]), fill_value=True) with pytest.raises(IndexError): @@ -539,9 +539,9 @@ def test_take_fill_value_with_timezone(self): msg = ('When allow_fill=True and fill_value is not None, ' 'all indices must be >= -1') - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): idx.take(np.array([1, 0, -2]), fill_value=True) - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): idx.take(np.array([1, 0, -5]), fill_value=True) with pytest.raises(IndexError): @@ -645,8 +645,9 @@ def test_join_with_period_index(self): joins = 'left', 'right', 'inner', 'outer' for join in joins: - with tm.assertRaisesRegexp(ValueError, 'can only call with other ' - 'PeriodIndex-ed objects'): + with tm.assert_raises_regex(ValueError, + 'can only call with other ' + 'PeriodIndex-ed objects'): df.columns.join(s.index, how=join) def test_factorize(self): @@ -755,12 +756,12 @@ def assert_slices_equivalent(l_slc, i_slc): def test_slice_with_zero_step_raises(self): ts = Series(np.arange(20), date_range('2014-01-01', periods=20, freq='MS')) - tm.assertRaisesRegexp(ValueError, 'slice step cannot be zero', - lambda: ts[::0]) - tm.assertRaisesRegexp(ValueError, 'slice step cannot be zero', - lambda: ts.loc[::0]) - tm.assertRaisesRegexp(ValueError, 'slice step cannot be zero', - lambda: ts.loc[::0]) + tm.assert_raises_regex(ValueError, 'slice step cannot be zero', + lambda: ts[::0]) + tm.assert_raises_regex(ValueError, 'slice step cannot be zero', + lambda: ts.loc[::0]) + tm.assert_raises_regex(ValueError, 'slice step cannot be zero', + lambda: ts.loc[::0]) def test_slice_bounds_empty(self): # GH 14354 diff --git a/pandas/tests/indexes/datetimes/test_ops.py b/pandas/tests/indexes/datetimes/test_ops.py index d531d0913df77..020bb0e27d9de 100644 --- a/pandas/tests/indexes/datetimes/test_ops.py +++ b/pandas/tests/indexes/datetimes/test_ops.py @@ -131,16 +131,18 @@ def test_numpy_minmax(self): Timestamp('2016-01-20 00:00:00', freq='D')) errmsg = "the 'out' parameter is not supported" - tm.assertRaisesRegexp(ValueError, errmsg, np.min, dr, out=0) - tm.assertRaisesRegexp(ValueError, errmsg, np.max, dr, out=0) + tm.assert_raises_regex(ValueError, errmsg, np.min, dr, out=0) + tm.assert_raises_regex(ValueError, errmsg, np.max, dr, out=0) self.assertEqual(np.argmin(dr), 0) self.assertEqual(np.argmax(dr), 5) if not _np_version_under1p10: errmsg = "the 'out' parameter is not supported" - tm.assertRaisesRegexp(ValueError, errmsg, np.argmin, dr, out=0) - tm.assertRaisesRegexp(ValueError, errmsg, np.argmax, dr, out=0) + tm.assert_raises_regex( + ValueError, errmsg, np.argmin, dr, out=0) + tm.assert_raises_regex( + ValueError, errmsg, np.argmax, dr, out=0) def test_round(self): for tz in self.tz: @@ -161,14 +163,14 @@ def test_round(self): self.assertEqual(elt.round(freq='H'), expected_elt) msg = pd.tseries.frequencies._INVALID_FREQ_ERROR - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): rng.round(freq='foo') - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): elt.round(freq='foo') msg = " is a non-fixed frequency" - tm.assertRaisesRegexp(ValueError, msg, rng.round, freq='M') - tm.assertRaisesRegexp(ValueError, msg, elt.round, freq='M') + tm.assert_raises_regex(ValueError, msg, rng.round, freq='M') + tm.assert_raises_regex(ValueError, msg, elt.round, freq='M') # GH 14440 & 15578 index = pd.DatetimeIndex(['2016-10-17 12:00:00.0015'], tz=tz) @@ -245,8 +247,8 @@ def test_repeat(self): assert res.freq is None tm.assert_index_equal(np.repeat(rng, reps), expected_rng) - tm.assertRaisesRegexp(ValueError, msg, np.repeat, - rng, reps, axis=1) + tm.assert_raises_regex(ValueError, msg, np.repeat, + rng, reps, axis=1) def test_representation(self): @@ -433,10 +435,10 @@ def test_add_iadd(self): idx = DatetimeIndex(['2011-01-01', '2011-01-02']) msg = "cannot add a datelike to a DatetimeIndex" - with tm.assertRaisesRegexp(TypeError, msg): + with tm.assert_raises_regex(TypeError, msg): idx + Timestamp('2011-01-01') - with tm.assertRaisesRegexp(TypeError, msg): + with tm.assert_raises_regex(TypeError, msg): Timestamp('2011-01-01') + idx def test_add_dti_dti(self): @@ -830,16 +832,16 @@ def test_take_invalid_kwargs(self): indices = [1, 6, 5, 9, 10, 13, 15, 3] msg = r"take\(\) got an unexpected keyword argument 'foo'" - tm.assertRaisesRegexp(TypeError, msg, idx.take, - indices, foo=2) + tm.assert_raises_regex(TypeError, msg, idx.take, + indices, foo=2) msg = "the 'out' parameter is not supported" - tm.assertRaisesRegexp(ValueError, msg, idx.take, - indices, out=indices) + tm.assert_raises_regex(ValueError, msg, idx.take, + indices, out=indices) msg = "the 'mode' parameter is not supported" - tm.assertRaisesRegexp(ValueError, msg, idx.take, - indices, mode='clip') + tm.assert_raises_regex(ValueError, msg, idx.take, + indices, mode='clip') def test_infer_freq(self): # GH 11018 diff --git a/pandas/tests/indexes/datetimes/test_partial_slicing.py b/pandas/tests/indexes/datetimes/test_partial_slicing.py index 352e066c6c90c..c3eda8b378c96 100644 --- a/pandas/tests/indexes/datetimes/test_partial_slicing.py +++ b/pandas/tests/indexes/datetimes/test_partial_slicing.py @@ -122,8 +122,8 @@ def test_partial_slice_second_precision(self): tm.assert_series_equal(s['2005-1-1 00:01:00'], s.iloc[10:]) self.assertEqual(s[Timestamp('2005-1-1 00:00:59.999990')], s.iloc[0]) - tm.assertRaisesRegexp(KeyError, '2005-1-1 00:00:00', - lambda: s['2005-1-1 00:00:00']) + tm.assert_raises_regex(KeyError, '2005-1-1 00:00:00', + lambda: s['2005-1-1 00:00:00']) def test_partial_slicing_dataframe(self): # GH14856 @@ -249,14 +249,14 @@ def test_partial_slice_doesnt_require_monotonicity(self): timestamp = pd.Timestamp('2014-01-10') tm.assert_series_equal(nonmonotonic['2014-01-10':], expected) - tm.assertRaisesRegexp(KeyError, - r"Timestamp\('2014-01-10 00:00:00'\)", - lambda: nonmonotonic[timestamp:]) + tm.assert_raises_regex(KeyError, + r"Timestamp\('2014-01-10 00:00:00'\)", + lambda: nonmonotonic[timestamp:]) tm.assert_series_equal(nonmonotonic.loc['2014-01-10':], expected) - tm.assertRaisesRegexp(KeyError, - r"Timestamp\('2014-01-10 00:00:00'\)", - lambda: nonmonotonic.loc[timestamp:]) + tm.assert_raises_regex(KeyError, + r"Timestamp\('2014-01-10 00:00:00'\)", + lambda: nonmonotonic.loc[timestamp:]) def test_loc_datetime_length_one(self): # GH16071 diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index c637b36d1bbb5..715825417cd31 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -1308,13 +1308,13 @@ def test_parsers_monthfreq(self): def test_parsers_quarterly_with_freq(self): msg = ('Incorrect quarterly string is given, quarter ' 'must be between 1 and 4: 2013Q5') - with tm.assertRaisesRegexp(tslib.DateParseError, msg): + with tm.assert_raises_regex(tslib.DateParseError, msg): tools.parse_time_string('2013Q5') # GH 5418 msg = ('Unable to retrieve month information from given freq: ' 'INVLD-L-DEC-SAT') - with tm.assertRaisesRegexp(tslib.DateParseError, msg): + with tm.assert_raises_regex(tslib.DateParseError, msg): tools.parse_time_string('2013Q1', freq='INVLD-L-DEC-SAT') cases = {('2013Q2', None): datetime(2013, 4, 1), diff --git a/pandas/tests/indexes/period/test_construction.py b/pandas/tests/indexes/period/test_construction.py index 8f2b03829b128..434271cbe22ec 100644 --- a/pandas/tests/indexes/period/test_construction.py +++ b/pandas/tests/indexes/period/test_construction.py @@ -181,7 +181,7 @@ def test_constructor_dtype(self): self.assertEqual(res.dtype, 'period[M]') msg = 'specified freq and dtype are different' - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): + with tm.assert_raises_regex(period.IncompatibleFrequency, msg): PeriodIndex(['2011-01'], freq='M', dtype='period[D]') def test_constructor_empty(self): @@ -190,7 +190,7 @@ def test_constructor_empty(self): self.assertEqual(len(idx), 0) self.assertEqual(idx.freq, 'M') - with tm.assertRaisesRegexp(ValueError, 'freq not specified'): + with tm.assert_raises_regex(ValueError, 'freq not specified'): pd.PeriodIndex([]) def test_constructor_pi_nat(self): @@ -216,35 +216,35 @@ def test_constructor_pi_nat(self): idx = PeriodIndex([pd.NaT, pd.NaT, '2011-01', '2011-01'], freq='M') tm.assert_index_equal(idx, exp) - with tm.assertRaisesRegexp(ValueError, 'freq not specified'): + with tm.assert_raises_regex(ValueError, 'freq not specified'): PeriodIndex([pd.NaT, pd.NaT]) - with tm.assertRaisesRegexp(ValueError, 'freq not specified'): + with tm.assert_raises_regex(ValueError, 'freq not specified'): PeriodIndex(np.array([pd.NaT, pd.NaT])) - with tm.assertRaisesRegexp(ValueError, 'freq not specified'): + with tm.assert_raises_regex(ValueError, 'freq not specified'): PeriodIndex(['NaT', 'NaT']) - with tm.assertRaisesRegexp(ValueError, 'freq not specified'): + with tm.assert_raises_regex(ValueError, 'freq not specified'): PeriodIndex(np.array(['NaT', 'NaT'])) def test_constructor_incompat_freq(self): msg = "Input has different freq=D from PeriodIndex\\(freq=M\\)" - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): + with tm.assert_raises_regex(period.IncompatibleFrequency, msg): PeriodIndex([Period('2011-01', freq='M'), pd.NaT, Period('2011-01', freq='D')]) - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): + with tm.assert_raises_regex(period.IncompatibleFrequency, msg): PeriodIndex(np.array([Period('2011-01', freq='M'), pd.NaT, Period('2011-01', freq='D')])) # first element is pd.NaT - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): + with tm.assert_raises_regex(period.IncompatibleFrequency, msg): PeriodIndex([pd.NaT, Period('2011-01', freq='M'), Period('2011-01', freq='D')]) - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): + with tm.assert_raises_regex(period.IncompatibleFrequency, msg): PeriodIndex(np.array([pd.NaT, Period('2011-01', freq='M'), Period('2011-01', freq='D')])) @@ -332,15 +332,15 @@ def test_constructor_freq_mult(self): msg = ('Frequency must be positive, because it' ' represents span: -1M') - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): PeriodIndex(['2011-01'], freq='-1M') msg = ('Frequency must be positive, because it' ' represents span: 0M') - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): PeriodIndex(['2011-01'], freq='0M') msg = ('Frequency must be positive, because it' ' represents span: 0M') - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): period_range('2011-01', periods=3, freq='0M') def test_constructor_freq_mult_dti_compat(self): @@ -437,11 +437,11 @@ def test_constructor_error(self): end_intv = Period('2006-12-31', ('w', 1)) msg = 'Start and end must have same freq' - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): PeriodIndex(start=start, end=end_intv) msg = 'Must specify 2 of start, end, periods' - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): PeriodIndex(start=start) def test_recreate_from_data(self): diff --git a/pandas/tests/indexes/period/test_indexing.py b/pandas/tests/indexes/period/test_indexing.py index 4d5fdd748219c..7af9e9ae3b14c 100644 --- a/pandas/tests/indexes/period/test_indexing.py +++ b/pandas/tests/indexes/period/test_indexing.py @@ -103,10 +103,10 @@ def test_getitem_partial(self): tm.assert_series_equal(exp, result) ts = ts[10:].append(ts[10:]) - tm.assertRaisesRegexp(KeyError, - "left slice bound for non-unique " - "label: '2008'", - ts.__getitem__, slice('2008', '2009')) + tm.assert_raises_regex(KeyError, + "left slice bound for non-unique " + "label: '2008'", + ts.__getitem__, slice('2008', '2009')) def test_getitem_datetime(self): rng = period_range(start='2012-01-01', periods=10, freq='W-MON') @@ -311,9 +311,9 @@ def test_take_fill_value(self): msg = ('When allow_fill=True and fill_value is not None, ' 'all indices must be >= -1') - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): idx.take(np.array([1, 0, -2]), fill_value=True) - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): idx.take(np.array([1, 0, -5]), fill_value=True) with pytest.raises(IndexError): diff --git a/pandas/tests/indexes/period/test_ops.py b/pandas/tests/indexes/period/test_ops.py index 4f54f44b7bdab..70c0879a0871a 100644 --- a/pandas/tests/indexes/period/test_ops.py +++ b/pandas/tests/indexes/period/test_ops.py @@ -105,16 +105,18 @@ def test_numpy_minmax(self): self.assertEqual(np.max(pr), Period('2016-01-20', freq='D')) errmsg = "the 'out' parameter is not supported" - tm.assertRaisesRegexp(ValueError, errmsg, np.min, pr, out=0) - tm.assertRaisesRegexp(ValueError, errmsg, np.max, pr, out=0) + tm.assert_raises_regex(ValueError, errmsg, np.min, pr, out=0) + tm.assert_raises_regex(ValueError, errmsg, np.max, pr, out=0) self.assertEqual(np.argmin(pr), 0) self.assertEqual(np.argmax(pr), 5) if not _np_version_under1p10: errmsg = "the 'out' parameter is not supported" - tm.assertRaisesRegexp(ValueError, errmsg, np.argmin, pr, out=0) - tm.assertRaisesRegexp(ValueError, errmsg, np.argmax, pr, out=0) + tm.assert_raises_regex( + ValueError, errmsg, np.argmin, pr, out=0) + tm.assert_raises_regex( + ValueError, errmsg, np.argmax, pr, out=0) def test_representation(self): # GH 7601 @@ -309,7 +311,8 @@ def test_add_iadd(self): timedelta(365), Timedelta(days=365)]: msg = ('Input has different freq(=.+)? ' 'from PeriodIndex\\(freq=A-DEC\\)') - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): + with tm.assert_raises_regex( + period.IncompatibleFrequency, msg): rng + o rng = pd.period_range('2014-01', '2016-12', freq='M') @@ -324,7 +327,8 @@ def test_add_iadd(self): timedelta(365), Timedelta(days=365)]: rng = pd.period_range('2014-01', '2016-12', freq='M') msg = 'Input has different freq(=.+)? from PeriodIndex\\(freq=M\\)' - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): + with tm.assert_raises_regex( + period.IncompatibleFrequency, msg): rng + o # Tick @@ -345,7 +349,8 @@ def test_add_iadd(self): timedelta(hours=23), Timedelta('23:00:00')]: rng = pd.period_range('2014-05-01', '2014-05-15', freq='D') msg = 'Input has different freq(=.+)? from PeriodIndex\\(freq=D\\)' - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): + with tm.assert_raises_regex( + period.IncompatibleFrequency, msg): rng + o offsets = [pd.offsets.Hour(2), timedelta(hours=2), @@ -367,9 +372,11 @@ def test_add_iadd(self): rng = pd.period_range('2014-01-01 10:00', '2014-01-05 10:00', freq='H') msg = 'Input has different freq(=.+)? from PeriodIndex\\(freq=H\\)' - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): - result = rng + delta - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): + with tm.assert_raises_regex( + period.IncompatibleFrequency, msg): + rng + delta + with tm.assert_raises_regex( + period.IncompatibleFrequency, msg): rng += delta # int @@ -415,7 +422,8 @@ def test_sub_isub(self): rng = pd.period_range('2014', '2024', freq='A') msg = ('Input has different freq(=.+)? ' 'from PeriodIndex\\(freq=A-DEC\\)') - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): + with tm.assert_raises_regex( + period.IncompatibleFrequency, msg): rng - o rng = pd.period_range('2014-01', '2016-12', freq='M') @@ -430,7 +438,8 @@ def test_sub_isub(self): timedelta(365)]: rng = pd.period_range('2014-01', '2016-12', freq='M') msg = 'Input has different freq(=.+)? from PeriodIndex\\(freq=M\\)' - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): + with tm.assert_raises_regex( + period.IncompatibleFrequency, msg): rng - o # Tick @@ -450,7 +459,8 @@ def test_sub_isub(self): timedelta(hours=23)]: rng = pd.period_range('2014-05-01', '2014-05-15', freq='D') msg = 'Input has different freq(=.+)? from PeriodIndex\\(freq=D\\)' - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): + with tm.assert_raises_regex( + period.IncompatibleFrequency, msg): rng - o offsets = [pd.offsets.Hour(2), timedelta(hours=2), @@ -471,9 +481,11 @@ def test_sub_isub(self): rng = pd.period_range('2014-01-01 10:00', '2014-01-05 10:00', freq='H') msg = 'Input has different freq(=.+)? from PeriodIndex\\(freq=H\\)' - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): - result = rng + delta - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): + with tm.assert_raises_regex( + period.IncompatibleFrequency, msg): + rng + delta + with tm.assert_raises_regex( + period.IncompatibleFrequency, msg): rng += delta # int @@ -884,14 +896,14 @@ def test_pi_ops_errors(self): for obj in [idx, s]: for ng in ["str", 1.5]: - with tm.assertRaisesRegexp(TypeError, msg): + with tm.assert_raises_regex(TypeError, msg): obj + ng with pytest.raises(TypeError): # error message differs between PY2 and 3 ng + obj - with tm.assertRaisesRegexp(TypeError, msg): + with tm.assert_raises_regex(TypeError, msg): obj - ng with pytest.raises(TypeError): @@ -987,13 +999,16 @@ def test_pi_offset_errors(self): msg_idx = r"Input has different freq from PeriodIndex\(freq=D\)" msg_s = r"Input cannot be converted to Period\(freq=D\)" for obj, msg in [(idx, msg_idx), (s, msg_s)]: - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): + with tm.assert_raises_regex( + period.IncompatibleFrequency, msg): obj + offsets.Hour(2) - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): + with tm.assert_raises_regex( + period.IncompatibleFrequency, msg): offsets.Hour(2) + obj - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): + with tm.assert_raises_regex( + period.IncompatibleFrequency, msg): obj - offsets.Hour(2) def test_pi_sub_period(self): @@ -1247,25 +1262,31 @@ def test_pi_pi_comp(self): # different base freq msg = "Input has different freq=A-DEC from PeriodIndex" - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): + with tm.assert_raises_regex( + period.IncompatibleFrequency, msg): base <= Period('2011', freq='A') - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): + with tm.assert_raises_regex( + period.IncompatibleFrequency, msg): Period('2011', freq='A') >= base - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): + with tm.assert_raises_regex( + period.IncompatibleFrequency, msg): idx = PeriodIndex(['2011', '2012', '2013', '2014'], freq='A') base <= idx - # different mult + # Different frequency msg = "Input has different freq=4M from PeriodIndex" - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): + with tm.assert_raises_regex( + period.IncompatibleFrequency, msg): base <= Period('2011', freq='4M') - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): + with tm.assert_raises_regex( + period.IncompatibleFrequency, msg): Period('2011', freq='4M') >= base - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): + with tm.assert_raises_regex( + period.IncompatibleFrequency, msg): idx = PeriodIndex(['2011', '2012', '2013', '2014'], freq='4M') base <= idx @@ -1317,8 +1338,10 @@ def test_pi_nat_comp(self): diff = PeriodIndex(['2011-02', '2011-01', '2011-04', 'NaT'], freq='4M') msg = "Input has different freq=4M from PeriodIndex" - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): + with tm.assert_raises_regex( + period.IncompatibleFrequency, msg): idx1 > diff - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): + with tm.assert_raises_regex( + period.IncompatibleFrequency, msg): idx1 == diff diff --git a/pandas/tests/indexes/period/test_partial_slicing.py b/pandas/tests/indexes/period/test_partial_slicing.py index b13e231db6c1c..7c1279a12450c 100644 --- a/pandas/tests/indexes/period/test_partial_slicing.py +++ b/pandas/tests/indexes/period/test_partial_slicing.py @@ -42,12 +42,12 @@ def assert_slices_equivalent(l_slc, i_slc): def test_slice_with_zero_step_raises(self): ts = Series(np.arange(20), period_range('2014-01', periods=20, freq='M')) - tm.assertRaisesRegexp(ValueError, 'slice step cannot be zero', - lambda: ts[::0]) - tm.assertRaisesRegexp(ValueError, 'slice step cannot be zero', - lambda: ts.loc[::0]) - tm.assertRaisesRegexp(ValueError, 'slice step cannot be zero', - lambda: ts.loc[::0]) + tm.assert_raises_regex(ValueError, 'slice step cannot be zero', + lambda: ts[::0]) + tm.assert_raises_regex(ValueError, 'slice step cannot be zero', + lambda: ts.loc[::0]) + tm.assert_raises_regex(ValueError, 'slice step cannot be zero', + lambda: ts.loc[::0]) def test_slice_keep_name(self): idx = period_range('20010101', periods=10, freq='D', name='bob') diff --git a/pandas/tests/indexes/period/test_period.py b/pandas/tests/indexes/period/test_period.py index 6036d6c0fb19b..e563f683bf8ca 100644 --- a/pandas/tests/indexes/period/test_period.py +++ b/pandas/tests/indexes/period/test_period.py @@ -82,11 +82,11 @@ def test_get_loc(self): tolerance=np.timedelta64(1, 'D')), 1) self.assertEqual(idx.get_loc('2000-01-02T12', method='nearest', tolerance=timedelta(1)), 1) - with tm.assertRaisesRegexp(ValueError, 'must be convertible'): + with tm.assert_raises_regex(ValueError, 'must be convertible'): idx.get_loc('2000-01-10', method='nearest', tolerance='foo') msg = 'Input has different freq from PeriodIndex\\(freq=D\\)' - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): idx.get_loc('2000-01-10', method='nearest', tolerance='1 hour') with pytest.raises(KeyError): idx.get_loc('2000-01-10', method='nearest', tolerance='1 day') @@ -151,7 +151,7 @@ def test_get_indexer(self): np.array([0, -1, 1], dtype=np.intp)) msg = 'Input has different freq from PeriodIndex\\(freq=H\\)' - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): idx.get_indexer(target, 'nearest', tolerance='1 minute') tm.assert_numpy_array_equal(idx.get_indexer(target, 'nearest', @@ -223,8 +223,8 @@ def test_difference_freq(self): def test_hash_error(self): index = period_range('20010101', periods=10) - with tm.assertRaisesRegexp(TypeError, "unhashable type: %r" % - type(index).__name__): + with tm.assert_raises_regex(TypeError, "unhashable type: %r" % + type(index).__name__): hash(index) def test_make_time_series(self): @@ -679,7 +679,8 @@ def test_numpy_repeat(self): tm.assert_index_equal(np.repeat(index, 2), expected) msg = "the 'axis' parameter is not supported" - tm.assertRaisesRegexp(ValueError, msg, np.repeat, index, 2, axis=1) + tm.assert_raises_regex( + ValueError, msg, np.repeat, index, 2, axis=1) def test_pindex_multiples(self): pi = PeriodIndex(start='1/1/11', end='12/31/11', freq='2M') diff --git a/pandas/tests/indexes/period/test_setops.py b/pandas/tests/indexes/period/test_setops.py index 97f9cff2d193e..e1fdc85d670d4 100644 --- a/pandas/tests/indexes/period/test_setops.py +++ b/pandas/tests/indexes/period/test_setops.py @@ -112,7 +112,7 @@ def test_union_misc(self): index.union(index2) msg = 'can only call with other PeriodIndex-ed objects' - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): index.join(index.to_timestamp()) index3 = period_range('1/1/2000', '1/20/2000', freq='2D') diff --git a/pandas/tests/indexes/period/test_tools.py b/pandas/tests/indexes/period/test_tools.py index 3887463cae827..60ad8fed32399 100644 --- a/pandas/tests/indexes/period/test_tools.py +++ b/pandas/tests/indexes/period/test_tools.py @@ -271,7 +271,7 @@ def test_to_timestamp_pi_nat(self): msg = ('Frequency must be positive, because it' ' represents span: -2A') - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): result.to_period(freq='-2A') def test_to_timestamp_pi_mult(self): @@ -386,7 +386,7 @@ def test_to_period_monthish(self): self.assertEqual(prng.freq, 'M') msg = pd.tseries.frequencies._INVALID_FREQ_ERROR - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): date_range('01-Jan-2012', periods=8, freq='EOM') def test_period_dt64_round_trip(self): @@ -439,11 +439,13 @@ def test_searchsorted(self): self.assertEqual(pidx.searchsorted(p2), 3) msg = "Input has different freq=H from PeriodIndex" - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): + with tm.assert_raises_regex( + period.IncompatibleFrequency, msg): pidx.searchsorted(pd.Period('2014-01-01', freq='H')) msg = "Input has different freq=5D from PeriodIndex" - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): + with tm.assert_raises_regex( + period.IncompatibleFrequency, msg): pidx.searchsorted(pd.Period('2014-01-01', freq='5D')) with tm.assert_produces_warning(FutureWarning): diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 06f98527deefb..caf2dde249600 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -206,10 +206,10 @@ def test_constructor_int_dtype_nan(self): data = [np.nan] msg = "cannot convert" - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): Index(data, dtype='int64') - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): Index(data, dtype='uint64') # This, however, should not break @@ -1062,10 +1062,10 @@ def test_get_indexer_invalid(self): # GH10411 idx = Index(np.arange(10)) - with tm.assertRaisesRegexp(ValueError, 'tolerance argument'): + with tm.assert_raises_regex(ValueError, 'tolerance argument'): idx.get_indexer([1, 0], tolerance=1) - with tm.assertRaisesRegexp(ValueError, 'limit argument'): + with tm.assert_raises_regex(ValueError, 'limit argument'): idx.get_indexer([1, 0], limit=1) def test_get_indexer_nearest(self): @@ -1099,7 +1099,7 @@ def test_get_indexer_nearest(self): tm.assert_numpy_array_equal(actual, np.array(expected, dtype=np.intp)) - with tm.assertRaisesRegexp(ValueError, 'limit argument'): + with tm.assert_raises_regex(ValueError, 'limit argument'): idx.get_indexer([1, 0], method='nearest', limit=1) def test_get_indexer_nearest_decreasing(self): @@ -1154,9 +1154,9 @@ def test_get_loc(self): with pytest.raises(KeyError): idx.get_loc(1.1, method, tolerance=0.05) - with tm.assertRaisesRegexp(ValueError, 'must be numeric'): + with tm.assert_raises_regex(ValueError, 'must be numeric'): idx.get_loc(1.1, 'nearest', tolerance='invalid') - with tm.assertRaisesRegexp(ValueError, 'tolerance .* valid if'): + with tm.assert_raises_regex(ValueError, 'tolerance .* valid if'): idx.get_loc(1.1, tolerance=1) idx = pd.Index(['a', 'c']) @@ -1450,8 +1450,8 @@ def test_str_attribute(self): MultiIndex.from_tuples([('foo', '1'), ('bar', '3')]), PeriodIndex(start='2000', end='2010', freq='A')] for idx in indices: - with tm.assertRaisesRegexp(AttributeError, - 'only use .str accessor'): + with tm.assert_raises_regex(AttributeError, + 'only use .str accessor'): idx.str.repeat(2) idx = Index(['a b c', 'd e', 'f']) @@ -1526,9 +1526,9 @@ def test_take_fill_value(self): msg = ('When allow_fill=True and fill_value is not None, ' 'all indices must be >= -1') - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): idx.take(np.array([1, 0, -2]), fill_value=True) - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): idx.take(np.array([1, 0, -5]), fill_value=True) with pytest.raises(IndexError): @@ -1537,8 +1537,8 @@ def test_take_fill_value(self): def test_reshape_raise(self): msg = "reshaping is not supported" idx = pd.Index([0, 1, 2]) - tm.assertRaisesRegexp(NotImplementedError, msg, - idx.reshape, idx.shape) + tm.assert_raises_regex(NotImplementedError, msg, + idx.reshape, idx.shape) def test_reindex_preserves_name_if_target_is_list_or_ndarray(self): # GH6552 @@ -1617,11 +1617,11 @@ def test_equals_op_multiindex(self): mi2 = MultiIndex.from_tuples([(1, 2), (4, 6)]) tm.assert_numpy_array_equal(df.index == mi2, np.array([True, False])) mi3 = MultiIndex.from_tuples([(1, 2), (4, 5), (8, 9)]) - with tm.assertRaisesRegexp(ValueError, "Lengths must match"): + with tm.assert_raises_regex(ValueError, "Lengths must match"): df.index == mi3 index_a = Index(['foo', 'bar', 'baz']) - with tm.assertRaisesRegexp(ValueError, "Lengths must match"): + with tm.assert_raises_regex(ValueError, "Lengths must match"): df.index == index_a tm.assert_numpy_array_equal(index_a == mi3, np.array([False, False, False])) @@ -1821,10 +1821,10 @@ def create_index(self): def test_argsort(self): idx = self.create_index() if PY36: - with tm.assertRaisesRegexp(TypeError, "'>' not supported"): + with tm.assert_raises_regex(TypeError, "'>' not supported"): result = idx.argsort() elif PY3: - with tm.assertRaisesRegexp(TypeError, "unorderable types"): + with tm.assert_raises_regex(TypeError, "unorderable types"): result = idx.argsort() else: result = idx.argsort() @@ -1834,10 +1834,10 @@ def test_argsort(self): def test_numpy_argsort(self): idx = self.create_index() if PY36: - with tm.assertRaisesRegexp(TypeError, "'>' not supported"): + with tm.assert_raises_regex(TypeError, "'>' not supported"): result = np.argsort(idx) elif PY3: - with tm.assertRaisesRegexp(TypeError, "unorderable types"): + with tm.assert_raises_regex(TypeError, "unorderable types"): result = np.argsort(idx) else: result = np.argsort(idx) @@ -2002,7 +2002,7 @@ def test_dropna(self): tm.assert_index_equal(nanidx.dropna(), idx) msg = "invalid how option: xxx" - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): pd.Index([1, 2, 3]).dropna(how='xxx') def test_get_combined_index(self): diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py index 057ea1a416275..5dcd45e8c85b0 100644 --- a/pandas/tests/indexes/test_category.py +++ b/pandas/tests/indexes/test_category.py @@ -373,7 +373,7 @@ def test_reindex_base(self): actual = idx.get_indexer(idx) tm.assert_numpy_array_equal(expected, actual) - with tm.assertRaisesRegexp(ValueError, 'Invalid fill method'): + with tm.assert_raises_regex(ValueError, 'Invalid fill method'): idx.get_indexer(idx, method='invalid') def test_reindexing(self): @@ -579,7 +579,7 @@ def test_equals_categorical(self): self.assertTrue((ci1 == ci1.values).all()) # invalid comparisons - with tm.assertRaisesRegexp(ValueError, "Lengths must match"): + with tm.assert_raises_regex(ValueError, "Lengths must match"): ci1 == Index(['a', 'b', 'c']) pytest.raises(TypeError, lambda: ci1 == ci2) pytest.raises( @@ -806,8 +806,8 @@ def test_fillna_categorical(self): tm.assert_index_equal(idx.fillna(1.0), exp) # fill by value not in categories raises ValueError - with tm.assertRaisesRegexp(ValueError, - 'fill value must be in categories'): + with tm.assert_raises_regex(ValueError, + 'fill value must be in categories'): idx.fillna(2.0) def test_take_fill_value(self): @@ -861,9 +861,9 @@ def test_take_fill_value(self): msg = ('When allow_fill=True and fill_value is not None, ' 'all indices must be >= -1') - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): idx.take(np.array([1, 0, -2]), fill_value=True) - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): idx.take(np.array([1, 0, -5]), fill_value=True) with pytest.raises(IndexError): @@ -899,9 +899,9 @@ def test_take_fill_value_datetime(self): msg = ('When allow_fill=True and fill_value is not None, ' 'all indices must be >= -1') - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): idx.take(np.array([1, 0, -2]), fill_value=True) - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): idx.take(np.array([1, 0, -5]), fill_value=True) with pytest.raises(IndexError): @@ -912,13 +912,13 @@ def test_take_invalid_kwargs(self): indices = [1, 0, -1] msg = r"take\(\) got an unexpected keyword argument 'foo'" - tm.assertRaisesRegexp(TypeError, msg, idx.take, - indices, foo=2) + tm.assert_raises_regex(TypeError, msg, idx.take, + indices, foo=2) msg = "the 'out' parameter is not supported" - tm.assertRaisesRegexp(ValueError, msg, idx.take, - indices, out=indices) + tm.assert_raises_regex(ValueError, msg, idx.take, + indices, out=indices) msg = "the 'mode' parameter is not supported" - tm.assertRaisesRegexp(ValueError, msg, idx.take, - indices, mode='clip') + tm.assert_raises_regex(ValueError, msg, idx.take, + indices, mode='clip') diff --git a/pandas/tests/indexes/test_interval.py b/pandas/tests/indexes/test_interval.py index 8e020846f5c50..ec56791a6ec67 100644 --- a/pandas/tests/indexes/test_interval.py +++ b/pandas/tests/indexes/test_interval.py @@ -602,9 +602,9 @@ def test_comparison(self): actual = self.index == self.index.left tm.assert_numpy_array_equal(actual, np.array([False, False])) - with tm.assertRaisesRegexp(TypeError, 'unorderable types'): + with tm.assert_raises_regex(TypeError, 'unorderable types'): self.index > 0 - with tm.assertRaisesRegexp(TypeError, 'unorderable types'): + with tm.assert_raises_regex(TypeError, 'unorderable types'): self.index <= 0 with pytest.raises(TypeError): self.index > np.arange(2) diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index 34051a9749af8..ab403cf56e033 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -22,9 +22,7 @@ import pandas.util.testing as tm -from pandas.util.testing import (assertRaisesRegexp, - assert_almost_equal, assert_copy) - +from pandas.util.testing import assert_almost_equal, assert_copy from .common import Base @@ -61,7 +59,7 @@ def f(): if common: pass - tm.assertRaisesRegexp(ValueError, 'The truth value of a', f) + tm.assert_raises_regex(ValueError, 'The truth value of a', f) def test_labels_dtypes(self): @@ -125,7 +123,8 @@ def test_numpy_repeat(self): tm.assert_index_equal(np.repeat(m, reps), expected) msg = "the 'axis' parameter is not supported" - tm.assertRaisesRegexp(ValueError, msg, np.repeat, m, reps, axis=1) + tm.assert_raises_regex( + ValueError, msg, np.repeat, m, reps, axis=1) def test_set_name_methods(self): # so long as these are synonyms, we don't need to test set_names @@ -134,7 +133,7 @@ def test_set_name_methods(self): ind = self.index.set_names(new_names) self.assertEqual(self.index.names, self.index_names) self.assertEqual(ind.names, new_names) - with assertRaisesRegexp(ValueError, "^Length"): + with tm.assert_raises_regex(ValueError, "^Length"): ind.set_names(new_names + new_names) new_names2 = [name + "SUFFIX2" for name in new_names] res = ind.set_names(new_names2, inplace=True) @@ -224,23 +223,23 @@ def assert_matching(actual, expected, check_dtype=False): # GH 13754 original_index = self.index.copy() for inplace in [True, False]: - with assertRaisesRegexp(ValueError, "^On"): + with tm.assert_raises_regex(ValueError, "^On"): self.index.set_levels(['c'], level=0, inplace=inplace) assert_matching(self.index.levels, original_index.levels, check_dtype=True) - with assertRaisesRegexp(ValueError, "^On"): + with tm.assert_raises_regex(ValueError, "^On"): self.index.set_labels([0, 1, 2, 3, 4, 5], level=0, inplace=inplace) assert_matching(self.index.labels, original_index.labels, check_dtype=True) - with assertRaisesRegexp(TypeError, "^Levels"): + with tm.assert_raises_regex(TypeError, "^Levels"): self.index.set_levels('c', level=0, inplace=inplace) assert_matching(self.index.levels, original_index.levels, check_dtype=True) - with assertRaisesRegexp(TypeError, "^Labels"): + with tm.assert_raises_regex(TypeError, "^Labels"): self.index.set_labels(1, level=0, inplace=inplace) assert_matching(self.index.labels, original_index.labels, check_dtype=True) @@ -313,46 +312,46 @@ def test_set_levels_labels_names_bad_input(self): levels, labels = self.index.levels, self.index.labels names = self.index.names - with tm.assertRaisesRegexp(ValueError, 'Length of levels'): + with tm.assert_raises_regex(ValueError, 'Length of levels'): self.index.set_levels([levels[0]]) - with tm.assertRaisesRegexp(ValueError, 'Length of labels'): + with tm.assert_raises_regex(ValueError, 'Length of labels'): self.index.set_labels([labels[0]]) - with tm.assertRaisesRegexp(ValueError, 'Length of names'): + with tm.assert_raises_regex(ValueError, 'Length of names'): self.index.set_names([names[0]]) # shouldn't scalar data error, instead should demand list-like - with tm.assertRaisesRegexp(TypeError, 'list of lists-like'): + with tm.assert_raises_regex(TypeError, 'list of lists-like'): self.index.set_levels(levels[0]) # shouldn't scalar data error, instead should demand list-like - with tm.assertRaisesRegexp(TypeError, 'list of lists-like'): + with tm.assert_raises_regex(TypeError, 'list of lists-like'): self.index.set_labels(labels[0]) # shouldn't scalar data error, instead should demand list-like - with tm.assertRaisesRegexp(TypeError, 'list-like'): + with tm.assert_raises_regex(TypeError, 'list-like'): self.index.set_names(names[0]) # should have equal lengths - with tm.assertRaisesRegexp(TypeError, 'list of lists-like'): + with tm.assert_raises_regex(TypeError, 'list of lists-like'): self.index.set_levels(levels[0], level=[0, 1]) - with tm.assertRaisesRegexp(TypeError, 'list-like'): + with tm.assert_raises_regex(TypeError, 'list-like'): self.index.set_levels(levels, level=0) # should have equal lengths - with tm.assertRaisesRegexp(TypeError, 'list of lists-like'): + with tm.assert_raises_regex(TypeError, 'list of lists-like'): self.index.set_labels(labels[0], level=[0, 1]) - with tm.assertRaisesRegexp(TypeError, 'list-like'): + with tm.assert_raises_regex(TypeError, 'list-like'): self.index.set_labels(labels, level=0) # should have equal lengths - with tm.assertRaisesRegexp(ValueError, 'Length of names'): + with tm.assert_raises_regex(ValueError, 'Length of names'): self.index.set_names(names[0], level=[0, 1]) - with tm.assertRaisesRegexp(TypeError, 'string'): + with tm.assert_raises_regex(TypeError, 'string'): self.index.set_names(names, level=0) def test_set_levels_categorical(self): @@ -375,18 +374,18 @@ def test_metadata_immutable(self): levels, labels = self.index.levels, self.index.labels # shouldn't be able to set at either the top level or base level mutable_regex = re.compile('does not support mutable operations') - with assertRaisesRegexp(TypeError, mutable_regex): + with tm.assert_raises_regex(TypeError, mutable_regex): levels[0] = levels[0] - with assertRaisesRegexp(TypeError, mutable_regex): + with tm.assert_raises_regex(TypeError, mutable_regex): levels[0][0] = levels[0][0] # ditto for labels - with assertRaisesRegexp(TypeError, mutable_regex): + with tm.assert_raises_regex(TypeError, mutable_regex): labels[0] = labels[0] - with assertRaisesRegexp(TypeError, mutable_regex): + with tm.assert_raises_regex(TypeError, mutable_regex): labels[0][0] = labels[0][0] # and for names names = self.index.names - with assertRaisesRegexp(TypeError, mutable_regex): + with tm.assert_raises_regex(TypeError, mutable_regex): names[0] = names[0] def test_inplace_mutation_resets_values(self): @@ -494,22 +493,23 @@ def test_names(self): # setting bad names on existing index = self.index - assertRaisesRegexp(ValueError, "^Length of names", setattr, index, - "names", list(index.names) + ["third"]) - assertRaisesRegexp(ValueError, "^Length of names", setattr, index, - "names", []) + tm.assert_raises_regex(ValueError, "^Length of names", + setattr, index, "names", + list(index.names) + ["third"]) + tm.assert_raises_regex(ValueError, "^Length of names", + setattr, index, "names", []) # initializing with bad names (should always be equivalent) major_axis, minor_axis = self.index.levels major_labels, minor_labels = self.index.labels - assertRaisesRegexp(ValueError, "^Length of names", MultiIndex, - levels=[major_axis, minor_axis], - labels=[major_labels, minor_labels], - names=['first']) - assertRaisesRegexp(ValueError, "^Length of names", MultiIndex, - levels=[major_axis, minor_axis], - labels=[major_labels, minor_labels], - names=['first', 'second', 'third']) + tm.assert_raises_regex(ValueError, "^Length of names", MultiIndex, + levels=[major_axis, minor_axis], + labels=[major_labels, minor_labels], + names=['first']) + tm.assert_raises_regex(ValueError, "^Length of names", MultiIndex, + levels=[major_axis, minor_axis], + labels=[major_labels, minor_labels], + names=['first', 'second', 'third']) # names are assigned index.names = ["a", "b"] @@ -533,7 +533,7 @@ def test_astype(self): assert_copy(actual.labels, expected.labels) self.check_level_names(actual, expected.names) - with assertRaisesRegexp(TypeError, "^Setting.*dtype.*object"): + with tm.assert_raises_regex(TypeError, "^Setting.*dtype.*object"): self.index.astype(np.dtype(int)) def test_constructor_single_level(self): @@ -548,46 +548,47 @@ def test_constructor_single_level(self): assert single_level.name is None def test_constructor_no_levels(self): - tm.assertRaisesRegexp(ValueError, "non-zero number of levels/labels", - MultiIndex, levels=[], labels=[]) + tm.assert_raises_regex(ValueError, "non-zero number " + "of levels/labels", + MultiIndex, levels=[], labels=[]) both_re = re.compile('Must pass both levels and labels') - with tm.assertRaisesRegexp(TypeError, both_re): + with tm.assert_raises_regex(TypeError, both_re): MultiIndex(levels=[]) - with tm.assertRaisesRegexp(TypeError, both_re): + with tm.assert_raises_regex(TypeError, both_re): MultiIndex(labels=[]) def test_constructor_mismatched_label_levels(self): labels = [np.array([1]), np.array([2]), np.array([3])] levels = ["a"] - assertRaisesRegexp(ValueError, "Length of levels and labels must be" - " the same", MultiIndex, levels=levels, - labels=labels) + tm.assert_raises_regex(ValueError, "Length of levels and labels " + "must be the same", MultiIndex, + levels=levels, labels=labels) length_error = re.compile('>= length of level') label_error = re.compile(r'Unequal label lengths: \[4, 2\]') # important to check that it's looking at the right thing. - with tm.assertRaisesRegexp(ValueError, length_error): + with tm.assert_raises_regex(ValueError, length_error): MultiIndex(levels=[['a'], ['b']], labels=[[0, 1, 2, 3], [0, 3, 4, 1]]) - with tm.assertRaisesRegexp(ValueError, label_error): + with tm.assert_raises_regex(ValueError, label_error): MultiIndex(levels=[['a'], ['b']], labels=[[0, 0, 0, 0], [0, 0]]) # external API - with tm.assertRaisesRegexp(ValueError, length_error): + with tm.assert_raises_regex(ValueError, length_error): self.index.copy().set_levels([['a'], ['b']]) - with tm.assertRaisesRegexp(ValueError, label_error): + with tm.assert_raises_regex(ValueError, label_error): self.index.copy().set_labels([[0, 0, 0, 0], [0, 0]]) # deprecated properties with warnings.catch_warnings(): warnings.simplefilter('ignore') - with tm.assertRaisesRegexp(ValueError, length_error): + with tm.assert_raises_regex(ValueError, length_error): self.index.copy().levels = [['a'], ['b']] - with tm.assertRaisesRegexp(ValueError, label_error): + with tm.assert_raises_regex(ValueError, label_error): self.index.copy().labels = [[0, 0, 0, 0], [0, 0]] def assert_multiindex_copied(self, copy, original): @@ -650,16 +651,16 @@ def test_changing_names(self): def test_duplicate_names(self): self.index.names = ['foo', 'foo'] - assertRaisesRegexp(KeyError, 'Level foo not found', - self.index._get_level_number, 'foo') + tm.assert_raises_regex(KeyError, 'Level foo not found', + self.index._get_level_number, 'foo') def test_get_level_number_integer(self): self.index.names = [1, 0] self.assertEqual(self.index._get_level_number(1), 0) self.assertEqual(self.index._get_level_number(0), 1) pytest.raises(IndexError, self.index._get_level_number, 2) - assertRaisesRegexp(KeyError, 'Level fourth not found', - self.index._get_level_number, 'fourth') + tm.assert_raises_regex(KeyError, 'Level fourth not found', + self.index._get_level_number, 'fourth') def test_from_arrays(self): arrays = [] @@ -762,7 +763,7 @@ def test_from_arrays_index_series_categorical(self): def test_from_arrays_empty(self): # 0 levels - with tm.assertRaisesRegexp( + with tm.assert_raises_regex( ValueError, "Must pass non-zero number of levels/labels"): MultiIndex.from_arrays(arrays=[]) @@ -787,21 +788,24 @@ def test_from_arrays_invalid_input(self): pytest.raises(TypeError, MultiIndex.from_arrays, arrays=i) def test_from_arrays_different_lengths(self): - # GH13599 + # see gh-13599 idx1 = [1, 2, 3] idx2 = ['a', 'b'] - assertRaisesRegexp(ValueError, '^all arrays must be same length$', - MultiIndex.from_arrays, [idx1, idx2]) + tm.assert_raises_regex(ValueError, '^all arrays must ' + 'be same length$', + MultiIndex.from_arrays, [idx1, idx2]) idx1 = [] idx2 = ['a', 'b'] - assertRaisesRegexp(ValueError, '^all arrays must be same length$', - MultiIndex.from_arrays, [idx1, idx2]) + tm.assert_raises_regex(ValueError, '^all arrays must ' + 'be same length$', + MultiIndex.from_arrays, [idx1, idx2]) idx1 = [1, 2, 3] idx2 = [] - assertRaisesRegexp(ValueError, '^all arrays must be same length$', - MultiIndex.from_arrays, [idx1, idx2]) + tm.assert_raises_regex(ValueError, '^all arrays must ' + 'be same length$', + MultiIndex.from_arrays, [idx1, idx2]) def test_from_product(self): @@ -820,7 +824,7 @@ def test_from_product(self): def test_from_product_empty(self): # 0 levels - with tm.assertRaisesRegexp( + with tm.assert_raises_regex( ValueError, "Must pass non-zero number of levels/labels"): MultiIndex.from_product([]) @@ -990,8 +994,8 @@ def test_get_level_values_na(self): def test_reorder_levels(self): # this blows up - assertRaisesRegexp(IndexError, '^Too many levels', - self.index.reorder_levels, [2, 1, 0]) + tm.assert_raises_regex(IndexError, '^Too many levels', + self.index.reorder_levels, [2, 1, 0]) def test_nlevels(self): self.assertEqual(self.index.nlevels, 2) @@ -1189,17 +1193,19 @@ def test_slice_locs_with_type_mismatch(self): df = tm.makeTimeDataFrame() stacked = df.stack() idx = stacked.index - assertRaisesRegexp(TypeError, '^Level type mismatch', idx.slice_locs, - (1, 3)) - assertRaisesRegexp(TypeError, '^Level type mismatch', idx.slice_locs, - df.index[5] + timedelta(seconds=30), (5, 2)) + tm.assert_raises_regex(TypeError, '^Level type mismatch', + idx.slice_locs, (1, 3)) + tm.assert_raises_regex(TypeError, '^Level type mismatch', + idx.slice_locs, + df.index[5] + timedelta( + seconds=30), (5, 2)) df = tm.makeCustomDataframe(5, 5) stacked = df.stack() idx = stacked.index - with assertRaisesRegexp(TypeError, '^Level type mismatch'): + with tm.assert_raises_regex(TypeError, '^Level type mismatch'): idx.slice_locs(timedelta(seconds=30)) # TODO: Try creating a UnicodeDecodeError in exception message - with assertRaisesRegexp(TypeError, '^Level type mismatch'): + with tm.assert_raises_regex(TypeError, '^Level type mismatch'): idx.slice_locs(df.index[1], (16, "a")) def test_slice_locs_not_sorted(self): @@ -1207,9 +1213,9 @@ def test_slice_locs_not_sorted(self): lrange(4))], labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array( [0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])]) - assertRaisesRegexp(KeyError, "[Kk]ey length.*greater than MultiIndex" - " lexsort depth", index.slice_locs, (1, 0, 1), - (2, 1, 0)) + tm.assert_raises_regex(KeyError, "[Kk]ey length.*greater than " + "MultiIndex lexsort depth", + index.slice_locs, (1, 0, 1), (2, 1, 0)) # works sorted_index, _ = index.sortlevel(0) @@ -1348,7 +1354,7 @@ def test_get_indexer(self): idx2 = Index(lrange(20)) msg = "Reindexing only valid with uniquely valued Index objects" - with assertRaisesRegexp(InvalidIndexError, msg): + with tm.assert_raises_regex(InvalidIndexError, msg): idx1.get_indexer(idx2) def test_get_indexer_nearest(self): @@ -1695,12 +1701,14 @@ def test_difference(self): 'foo', 'two'), ('qux', 'one'), ('qux', 'two')]) expected.names = first.names self.assertEqual(first.names, result.names) - assertRaisesRegexp(TypeError, "other must be a MultiIndex or a list" - " of tuples", first.difference, [1, 2, 3, 4, 5]) + tm.assert_raises_regex(TypeError, "other must be a MultiIndex " + "or a list of tuples", + first.difference, [1, 2, 3, 4, 5]) def test_from_tuples(self): - assertRaisesRegexp(TypeError, 'Cannot infer number of levels from' - ' empty list', MultiIndex.from_tuples, []) + tm.assert_raises_regex(TypeError, 'Cannot infer number of levels ' + 'from empty list', + MultiIndex.from_tuples, []) idx = MultiIndex.from_tuples(((1, 2), (3, 4)), names=['a', 'b']) self.assertEqual(len(idx), 2) @@ -1880,7 +1888,7 @@ def test_insert(self): # key wrong length msg = "Item must have length equal to number of levels" - with assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): self.index.insert(0, ('foo2', )) left = pd.DataFrame([['a', 'b', 0], ['b', 'd', 1]], @@ -1964,9 +1972,9 @@ def test_take_fill_value(self): msg = ('When allow_fill=True and fill_value is not None, ' 'all indices must be >= -1') - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): idx.take(np.array([1, 0, -2]), fill_value=True) - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): idx.take(np.array([1, 0, -5]), fill_value=True) with pytest.raises(IndexError): @@ -1979,16 +1987,16 @@ def take_invalid_kwargs(self): indices = [1, 2] msg = r"take\(\) got an unexpected keyword argument 'foo'" - tm.assertRaisesRegexp(TypeError, msg, idx.take, - indices, foo=2) + tm.assert_raises_regex(TypeError, msg, idx.take, + indices, foo=2) msg = "the 'out' parameter is not supported" - tm.assertRaisesRegexp(ValueError, msg, idx.take, - indices, out=indices) + tm.assert_raises_regex(ValueError, msg, idx.take, + indices, out=indices) msg = "the 'mode' parameter is not supported" - tm.assertRaisesRegexp(ValueError, msg, idx.take, - indices, mode='clip') + tm.assert_raises_regex(ValueError, msg, idx.take, + indices, mode='clip') def test_join_level(self): def _check_how(other, how): @@ -2031,8 +2039,8 @@ def _check_all(other): result = idx.join(self.index, level='second') assert isinstance(result, MultiIndex) - assertRaisesRegexp(TypeError, "Join.*MultiIndex.*ambiguous", - self.index.join, self.index, level=1) + tm.assert_raises_regex(TypeError, "Join.*MultiIndex.*ambiguous", + self.index.join, self.index, level=1) def test_join_self(self): kinds = 'outer', 'inner', 'left', 'right' @@ -2102,12 +2110,13 @@ def test_reindex_level(self): exp_indexer2 = np.array([0, -1, 0, -1, 0, -1]) tm.assert_numpy_array_equal(indexer2, exp_indexer2, check_dtype=False) - assertRaisesRegexp(TypeError, "Fill method not supported", - self.index.reindex, self.index, method='pad', - level='second') + tm.assert_raises_regex(TypeError, "Fill method not supported", + self.index.reindex, self.index, + method='pad', level='second') - assertRaisesRegexp(TypeError, "Fill method not supported", idx.reindex, - idx, method='bfill', level='first') + tm.assert_raises_regex(TypeError, "Fill method not supported", + idx.reindex, idx, method='bfill', + level='first') def test_duplicates(self): self.assertFalse(self.index.has_duplicates) @@ -2760,7 +2769,7 @@ def test_dropna(self): tm.assert_index_equal(idx.dropna(how='all'), exp) msg = "invalid how option: xxx" - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): idx.dropna(how='xxx') def test_unsortedindex(self): diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py index f0c42a3dae239..8a46da37572ff 100644 --- a/pandas/tests/indexes/test_numeric.py +++ b/pandas/tests/indexes/test_numeric.py @@ -349,7 +349,7 @@ def test_get_loc(self): pytest.raises(KeyError, idx.get_loc, 1.5, method='pad', tolerance=0.1) - with tm.assertRaisesRegexp(ValueError, 'must be numeric'): + with tm.assert_raises_regex(ValueError, 'must be numeric'): idx.get_loc(1.4, method='nearest', tolerance='foo') def test_get_loc_na(self): @@ -438,9 +438,9 @@ def test_take_fill_value(self): msg = ('When allow_fill=True and fill_value is not None, ' 'all indices must be >= -1') - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): idx.take(np.array([1, 0, -2]), fill_value=True) - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): idx.take(np.array([1, 0, -5]), fill_value=True) with pytest.raises(IndexError): @@ -566,7 +566,7 @@ def test_take_fill_value(self): "{name} cannot contain NA").format(name=name) # fill_value=True - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): idx.take(np.array([1, 0, -1]), fill_value=True) # allow_fill=False @@ -575,9 +575,9 @@ def test_take_fill_value(self): expected = self._holder([2, 1, 3], name='xxx') tm.assert_index_equal(result, expected) - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): idx.take(np.array([1, 0, -2]), fill_value=True) - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): idx.take(np.array([1, 0, -5]), fill_value=True) with pytest.raises(IndexError): @@ -672,11 +672,11 @@ def test_constructor_corner(self): # preventing casting arr = np.array([1, '2', 3, '4'], dtype=object) - with tm.assertRaisesRegexp(TypeError, 'casting'): + with tm.assert_raises_regex(TypeError, 'casting'): Int64Index(arr) arr_with_floats = [0, 2, 3, 4, 5, 1.25, 3, -1] - with tm.assertRaisesRegexp(TypeError, 'casting'): + with tm.assert_raises_regex(TypeError, 'casting'): Int64Index(arr_with_floats) def test_coerce_list(self): diff --git a/pandas/tests/indexes/test_range.py b/pandas/tests/indexes/test_range.py index 6f87687f822e9..c3ffb32c36e3b 100644 --- a/pandas/tests/indexes/test_range.py +++ b/pandas/tests/indexes/test_range.py @@ -12,7 +12,6 @@ from pandas import (notnull, Series, Index, Float64Index, Int64Index, RangeIndex) -from pandas.util.testing import assertRaisesRegexp import pandas.util.testing as tm @@ -65,7 +64,7 @@ def test_too_many_names(self): def testit(): self.index.names = ["roger", "harold"] - assertRaisesRegexp(ValueError, "^Length", testit) + tm.assert_raises_regex(ValueError, "^Length", testit) def test_constructor(self): index = RangeIndex(5) @@ -90,7 +89,7 @@ def test_constructor(self): tm.assert_index_equal(Index(expected), index) msg = "RangeIndex\\(\\.\\.\\.\\) must be called with integers" - with tm.assertRaisesRegexp(TypeError, msg): + with tm.assert_raises_regex(TypeError, msg): RangeIndex() for index in [RangeIndex(0), RangeIndex(start=0), RangeIndex(stop=0), @@ -102,7 +101,7 @@ def test_constructor(self): self.assertEqual(index._step, 1) tm.assert_index_equal(Index(expected), index) - with tm.assertRaisesRegexp(TypeError, msg): + with tm.assert_raises_regex(TypeError, msg): RangeIndex(name='Foo') for index in [RangeIndex(0, name='Foo'), @@ -724,7 +723,7 @@ def test_take_fill_value(self): # fill_value msg = "Unable to fill values because RangeIndex cannot contain NA" - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): idx.take(np.array([1, 0, -1]), fill_value=True) # allow_fill=False @@ -734,9 +733,9 @@ def test_take_fill_value(self): tm.assert_index_equal(result, expected) msg = "Unable to fill values because RangeIndex cannot contain NA" - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): idx.take(np.array([1, 0, -2]), fill_value=True) - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): idx.take(np.array([1, 0, -5]), fill_value=True) with pytest.raises(IndexError): diff --git a/pandas/tests/indexes/timedeltas/test_ops.py b/pandas/tests/indexes/timedeltas/test_ops.py index da23f8698f4dc..c3cc05271e978 100644 --- a/pandas/tests/indexes/timedeltas/test_ops.py +++ b/pandas/tests/indexes/timedeltas/test_ops.py @@ -87,16 +87,18 @@ def test_numpy_minmax(self): self.assertEqual(np.max(td), Timedelta('16820 days')) errmsg = "the 'out' parameter is not supported" - tm.assertRaisesRegexp(ValueError, errmsg, np.min, td, out=0) - tm.assertRaisesRegexp(ValueError, errmsg, np.max, td, out=0) + tm.assert_raises_regex(ValueError, errmsg, np.min, td, out=0) + tm.assert_raises_regex(ValueError, errmsg, np.max, td, out=0) self.assertEqual(np.argmin(td), 0) self.assertEqual(np.argmax(td), 5) if not _np_version_under1p10: errmsg = "the 'out' parameter is not supported" - tm.assertRaisesRegexp(ValueError, errmsg, np.argmin, td, out=0) - tm.assertRaisesRegexp(ValueError, errmsg, np.argmax, td, out=0) + tm.assert_raises_regex( + ValueError, errmsg, np.argmin, td, out=0) + tm.assert_raises_regex( + ValueError, errmsg, np.argmax, td, out=0) def test_round(self): td = pd.timedelta_range(start='16801 days', periods=5, freq='30Min') @@ -115,14 +117,14 @@ def test_round(self): self.assertEqual(elt.round(freq='H'), expected_elt) msg = pd.tseries.frequencies._INVALID_FREQ_ERROR - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): td.round(freq='foo') - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): elt.round(freq='foo') msg = " is a non-fixed frequency" - tm.assertRaisesRegexp(ValueError, msg, td.round, freq='M') - tm.assertRaisesRegexp(ValueError, msg, elt.round, freq='M') + tm.assert_raises_regex(ValueError, msg, td.round, freq='M') + tm.assert_raises_regex(ValueError, msg, elt.round, freq='M') def test_representation(self): idx1 = TimedeltaIndex([], freq='D') @@ -262,7 +264,7 @@ def test_sub_isub(self): idx = TimedeltaIndex(['1 day', '2 day']) msg = "cannot subtract a datelike from a TimedeltaIndex" - with tm.assertRaisesRegexp(TypeError, msg): + with tm.assert_raises_regex(TypeError, msg): idx - Timestamp('2011-01-01') result = Timestamp('2011-01-01') + idx @@ -497,7 +499,7 @@ def test_addition_ops(self): def test_comp_nat(self): left = pd.TimedeltaIndex([pd.Timedelta('1 days'), pd.NaT, - pd.Timedelta('3 days')]) + pd.Timedelta('3 days')]) right = pd.TimedeltaIndex([pd.NaT, pd.NaT, pd.Timedelta('3 days')]) for l, r in [(left, right), (left.asobject, right.asobject)]: @@ -751,16 +753,16 @@ def test_take_invalid_kwargs(self): indices = [1, 6, 5, 9, 10, 13, 15, 3] msg = r"take\(\) got an unexpected keyword argument 'foo'" - tm.assertRaisesRegexp(TypeError, msg, idx.take, - indices, foo=2) + tm.assert_raises_regex(TypeError, msg, idx.take, + indices, foo=2) msg = "the 'out' parameter is not supported" - tm.assertRaisesRegexp(ValueError, msg, idx.take, - indices, out=indices) + tm.assert_raises_regex(ValueError, msg, idx.take, + indices, out=indices) msg = "the 'mode' parameter is not supported" - tm.assertRaisesRegexp(ValueError, msg, idx.take, - indices, mode='clip') + tm.assert_raises_regex(ValueError, msg, idx.take, + indices, mode='clip') def test_infer_freq(self): # GH 11018 @@ -1248,22 +1250,22 @@ def test_tdi_ops_attributes(self): def test_add_overflow(self): # see gh-14068 msg = "too (big|large) to convert" - with tm.assertRaisesRegexp(OverflowError, msg): + with tm.assert_raises_regex(OverflowError, msg): to_timedelta(106580, 'D') + Timestamp('2000') - with tm.assertRaisesRegexp(OverflowError, msg): + with tm.assert_raises_regex(OverflowError, msg): Timestamp('2000') + to_timedelta(106580, 'D') _NaT = int(pd.NaT) + 1 msg = "Overflow in int64 addition" - with tm.assertRaisesRegexp(OverflowError, msg): + with tm.assert_raises_regex(OverflowError, msg): to_timedelta([106580], 'D') + Timestamp('2000') - with tm.assertRaisesRegexp(OverflowError, msg): + with tm.assert_raises_regex(OverflowError, msg): Timestamp('2000') + to_timedelta([106580], 'D') - with tm.assertRaisesRegexp(OverflowError, msg): + with tm.assert_raises_regex(OverflowError, msg): to_timedelta([_NaT]) - Timedelta('1 days') - with tm.assertRaisesRegexp(OverflowError, msg): + with tm.assert_raises_regex(OverflowError, msg): to_timedelta(['5 days', _NaT]) - Timedelta('1 days') - with tm.assertRaisesRegexp(OverflowError, msg): + with tm.assert_raises_regex(OverflowError, msg): (to_timedelta([_NaT, '5 days', '1 hours']) - to_timedelta(['7 seconds', _NaT, '4 hours'])) diff --git a/pandas/tests/indexes/timedeltas/test_partial_slicing.py b/pandas/tests/indexes/timedeltas/test_partial_slicing.py index ccb9a7f1803e7..230dbe91b4e34 100644 --- a/pandas/tests/indexes/timedeltas/test_partial_slicing.py +++ b/pandas/tests/indexes/timedeltas/test_partial_slicing.py @@ -75,9 +75,9 @@ def assert_slices_equivalent(l_slc, i_slc): def test_slice_with_zero_step_raises(self): ts = Series(np.arange(20), timedelta_range('0', periods=20, freq='H')) - tm.assertRaisesRegexp(ValueError, 'slice step cannot be zero', - lambda: ts[::0]) - tm.assertRaisesRegexp(ValueError, 'slice step cannot be zero', - lambda: ts.loc[::0]) - tm.assertRaisesRegexp(ValueError, 'slice step cannot be zero', - lambda: ts.loc[::0]) + tm.assert_raises_regex(ValueError, 'slice step cannot be zero', + lambda: ts[::0]) + tm.assert_raises_regex(ValueError, 'slice step cannot be zero', + lambda: ts.loc[::0]) + tm.assert_raises_regex(ValueError, 'slice step cannot be zero', + lambda: ts.loc[::0]) diff --git a/pandas/tests/indexes/timedeltas/test_timedelta.py b/pandas/tests/indexes/timedeltas/test_timedelta.py index b93f76c14dcfe..b5bdf031180ec 100644 --- a/pandas/tests/indexes/timedeltas/test_timedelta.py +++ b/pandas/tests/indexes/timedeltas/test_timedelta.py @@ -59,7 +59,7 @@ def test_get_loc(self): idx.get_loc(idx[1], 'pad', tolerance=np.timedelta64(0, 's')), 1) self.assertEqual(idx.get_loc(idx[1], 'pad', tolerance=timedelta(0)), 1) - with tm.assertRaisesRegexp(ValueError, 'must be convertible'): + with tm.assert_raises_regex(ValueError, 'must be convertible'): idx.get_loc(idx[1], method='nearest', tolerance='foo') for method, loc in [('pad', 1), ('backfill', 2), ('nearest', 1)]: @@ -235,9 +235,9 @@ def test_take_fill_value(self): msg = ('When allow_fill=True and fill_value is not None, ' 'all indices must be >= -1') - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): idx.take(np.array([1, 0, -2]), fill_value=True) - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): idx.take(np.array([1, 0, -5]), fill_value=True) with pytest.raises(IndexError): @@ -461,8 +461,8 @@ def test_pickle(self): def test_hash_error(self): index = timedelta_range('1 days', periods=10) - with tm.assertRaisesRegexp(TypeError, "unhashable type: %r" % - type(index).__name__): + with tm.assert_raises_regex(TypeError, "unhashable type: %r" % + type(index).__name__): hash(index) def test_append_join_nondatetimeindex(self): diff --git a/pandas/tests/indexes/timedeltas/test_timedelta_range.py b/pandas/tests/indexes/timedeltas/test_timedelta_range.py index 8bd56b5885bba..55f16c10e9945 100644 --- a/pandas/tests/indexes/timedeltas/test_timedelta_range.py +++ b/pandas/tests/indexes/timedeltas/test_timedelta_range.py @@ -37,10 +37,10 @@ def test_timedelta_range(self): arr = np.arange(10).reshape(2, 5) df = pd.DataFrame(np.arange(10).reshape(2, 5)) for arg in (arr, df): - with tm.assertRaisesRegexp(TypeError, "1-d array"): + with tm.assert_raises_regex(TypeError, "1-d array"): to_timedelta(arg) for errors in ['ignore', 'raise', 'coerce']: - with tm.assertRaisesRegexp(TypeError, "1-d array"): + with tm.assert_raises_regex(TypeError, "1-d array"): to_timedelta(arg, errors=errors) # issue10583 diff --git a/pandas/tests/indexes/timedeltas/test_tools.py b/pandas/tests/indexes/timedeltas/test_tools.py index b4f6f33a6e06a..12ed8a2e38f92 100644 --- a/pandas/tests/indexes/timedeltas/test_tools.py +++ b/pandas/tests/indexes/timedeltas/test_tools.py @@ -113,8 +113,8 @@ def test_to_timedelta_invalid(self): # bad value for errors parameter msg = "errors must be one of" - tm.assertRaisesRegexp(ValueError, msg, to_timedelta, - ['foo'], errors='never') + tm.assert_raises_regex(ValueError, msg, to_timedelta, + ['foo'], errors='never') # these will error pytest.raises(ValueError, lambda: to_timedelta([1, 2], unit='foo')) diff --git a/pandas/tests/indexing/test_categorical.py b/pandas/tests/indexing/test_categorical.py index e0f95a1fd5c0d..f9fcef16c12d4 100644 --- a/pandas/tests/indexing/test_categorical.py +++ b/pandas/tests/indexing/test_categorical.py @@ -135,7 +135,7 @@ def test_loc_listlike_dtypes(self): exp = DataFrame({'A': [1, 1, 2], 'B': [4, 4, 5]}, index=exp_index) tm.assert_frame_equal(res, exp, check_index_type=True) - with tm.assertRaisesRegexp( + with tm.assert_raises_regex( KeyError, 'a list-indexer must only include values that are ' 'in the categories'): @@ -160,7 +160,7 @@ def test_loc_listlike_dtypes(self): ]}, index=CategoricalIndex(['a', 'a', 'a', 'a', 'b'])) tm.assert_frame_equal(res, exp, check_index_type=True) - with tm.assertRaisesRegexp( + with tm.assert_raises_regex( KeyError, 'a list-indexer must only include values ' 'that are in the categories'): @@ -190,7 +190,7 @@ def test_loc_listlike_dtypes(self): categories=list('abcde'))) tm.assert_frame_equal(res, exp, check_index_type=True) - with tm.assertRaisesRegexp( + with tm.assert_raises_regex( KeyError, 'a list-indexer must only include values ' 'that are in the categories'): diff --git a/pandas/tests/indexing/test_coercion.py b/pandas/tests/indexing/test_coercion.py index 15a56d97eeaec..b8030d84e7929 100644 --- a/pandas/tests/indexing/test_coercion.py +++ b/pandas/tests/indexing/test_coercion.py @@ -423,12 +423,12 @@ def test_insert_index_datetime64(self): # ToDo: must coerce to object msg = "Passed item and index have different timezone" - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): obj.insert(1, pd.Timestamp('2012-01-01', tz='US/Eastern')) # ToDo: must coerce to object msg = "cannot insert DatetimeIndex with incompatible label" - with tm.assertRaisesRegexp(TypeError, msg): + with tm.assert_raises_regex(TypeError, msg): obj.insert(1, 1) def test_insert_index_datetime64tz(self): @@ -445,17 +445,17 @@ def test_insert_index_datetime64tz(self): # ToDo: must coerce to object msg = "Passed item and index have different timezone" - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): obj.insert(1, pd.Timestamp('2012-01-01')) # ToDo: must coerce to object msg = "Passed item and index have different timezone" - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): obj.insert(1, pd.Timestamp('2012-01-01', tz='Asia/Tokyo')) # ToDo: must coerce to object msg = "cannot insert DatetimeIndex with incompatible label" - with tm.assertRaisesRegexp(TypeError, msg): + with tm.assert_raises_regex(TypeError, msg): obj.insert(1, 1) def test_insert_index_timedelta64(self): @@ -469,12 +469,12 @@ def test_insert_index_timedelta64(self): # ToDo: must coerce to object msg = "cannot insert TimedeltaIndex with incompatible label" - with tm.assertRaisesRegexp(TypeError, msg): + with tm.assert_raises_regex(TypeError, msg): obj.insert(1, pd.Timestamp('2012-01-01')) # ToDo: must coerce to object msg = "cannot insert TimedeltaIndex with incompatible label" - with tm.assertRaisesRegexp(TypeError, msg): + with tm.assert_raises_regex(TypeError, msg): obj.insert(1, 1) def test_insert_index_period(self): @@ -778,7 +778,7 @@ def test_where_series_datetime64(self): # ToDo: coerce to object msg = "cannot coerce a Timestamp with a tz on a naive Block" - with tm.assertRaisesRegexp(TypeError, msg): + with tm.assert_raises_regex(TypeError, msg): obj.where(cond, pd.Timestamp('2012-01-01', tz='US/Eastern')) # ToDo: do not coerce to UTC, must be object @@ -819,7 +819,7 @@ def test_where_index_datetime64(self): # ToDo: coerce to object msg = ("Index\\(\\.\\.\\.\\) must be called with a collection " "of some kind") - with tm.assertRaisesRegexp(TypeError, msg): + with tm.assert_raises_regex(TypeError, msg): obj.where(cond, pd.Timestamp('2012-01-01', tz='US/Eastern')) # ToDo: do not ignore timezone, must be object diff --git a/pandas/tests/indexing/test_floats.py b/pandas/tests/indexing/test_floats.py index 02df4de010fbe..bdee41acbc8fd 100644 --- a/pandas/tests/indexing/test_floats.py +++ b/pandas/tests/indexing/test_floats.py @@ -48,9 +48,9 @@ def test_scalar_error(self): def f(): s.iloc[3.0] - tm.assertRaisesRegexp(TypeError, - 'cannot do positional indexing', - f) + tm.assert_raises_regex(TypeError, + 'cannot do positional indexing', + f) def f(): s.iloc[3.0] = 0 diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index 174026a00fcdd..18b169559b2d4 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -22,8 +22,9 @@ def test_iloc_exceeds_bounds(self): expected = df # lists of positions should raise IndexErrror! - with tm.assertRaisesRegexp(IndexError, - 'positional indexers are out-of-bounds'): + with tm.assert_raises_regex(IndexError, + 'positional indexers ' + 'are out-of-bounds'): df.iloc[:, [0, 1, 2, 3, 4, 5]] pytest.raises(IndexError, lambda: df.iloc[[1, 30]]) pytest.raises(IndexError, lambda: df.iloc[[1, -30]]) @@ -35,14 +36,14 @@ def test_iloc_exceeds_bounds(self): # still raise on a single indexer msg = 'single positional indexer is out-of-bounds' - with tm.assertRaisesRegexp(IndexError, msg): + with tm.assert_raises_regex(IndexError, msg): df.iloc[30] pytest.raises(IndexError, lambda: df.iloc[-30]) # GH10779 # single positive/negative indexer exceeding Series bounds should raise # an IndexError - with tm.assertRaisesRegexp(IndexError, msg): + with tm.assert_raises_regex(IndexError, msg): s.iloc[30] pytest.raises(IndexError, lambda: s.iloc[-30]) diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index f7a9c9fe59594..f8a7c57ad5061 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -716,13 +716,14 @@ def assert_slices_equivalent(l_slc, i_slc): def test_slice_with_zero_step_raises(self): s = Series(np.arange(20), index=_mklbl('A', 20)) - tm.assertRaisesRegexp(ValueError, 'slice step cannot be zero', - lambda: s[::0]) - tm.assertRaisesRegexp(ValueError, 'slice step cannot be zero', - lambda: s.loc[::0]) + tm.assert_raises_regex(ValueError, 'slice step cannot be zero', + lambda: s[::0]) + tm.assert_raises_regex(ValueError, 'slice step cannot be zero', + lambda: s.loc[::0]) with catch_warnings(record=True): - tm.assertRaisesRegexp(ValueError, 'slice step cannot be zero', - lambda: s.ix[::0]) + tm.assert_raises_regex(ValueError, + 'slice step cannot be zero', + lambda: s.ix[::0]) def test_indexing_assignment_dict_already_exists(self): df = pd.DataFrame({'x': [1, 2, 6], diff --git a/pandas/tests/indexing/test_multiindex.py b/pandas/tests/indexing/test_multiindex.py index 18cb9a3a063b9..a85c6bb446140 100644 --- a/pandas/tests/indexing/test_multiindex.py +++ b/pandas/tests/indexing/test_multiindex.py @@ -294,9 +294,9 @@ def test_getitem_partial_int(self): tm.assert_frame_equal(result, expected) # missing item: - with tm.assertRaisesRegexp(KeyError, '1'): + with tm.assert_raises_regex(KeyError, '1'): df[1] - with tm.assertRaisesRegexp(KeyError, "'\[1\] not in index'"): + with tm.assert_raises_regex(KeyError, "'\[1\] not in index'"): df[[1]] def test_loc_multiindex_indexer_none(self): @@ -801,7 +801,7 @@ def f(): self.assertEqual(df.index.lexsort_depth, 2) df = df.sort_index(level=1, axis=0) self.assertEqual(df.index.lexsort_depth, 0) - with tm.assertRaisesRegexp( + with tm.assert_raises_regex( UnsortedIndexError, 'MultiIndex Slicing requires the index to be fully ' r'lexsorted tuple len \(2\), lexsort depth \(0\)'): diff --git a/pandas/tests/indexing/test_scalar.py b/pandas/tests/indexing/test_scalar.py index a583bf1c5ef16..70c7eaf7446db 100644 --- a/pandas/tests/indexing/test_scalar.py +++ b/pandas/tests/indexing/test_scalar.py @@ -154,8 +154,8 @@ def test_at_to_fail(self): df.columns = ['x', 'x', 'z'] # Check that we get the correct value in the KeyError - tm.assertRaisesRegexp(KeyError, r"\['y'\] not in index", - lambda: df[['x', 'y', 'z']]) + tm.assert_raises_regex(KeyError, r"\['y'\] not in index", + lambda: df[['x', 'y', 'z']]) def test_at_with_tz(self): # gh-15822 diff --git a/pandas/tests/io/formats/test_to_csv.py b/pandas/tests/io/formats/test_to_csv.py index 51295fd750602..02c73019b0f65 100644 --- a/pandas/tests/io/formats/test_to_csv.py +++ b/pandas/tests/io/formats/test_to_csv.py @@ -31,7 +31,7 @@ def test_to_csv_quotechar(self): self.assertEqual(f.read(), expected) with tm.ensure_clean('test.csv') as path: - with tm.assertRaisesRegexp(TypeError, 'quotechar'): + with tm.assert_raises_regex(TypeError, 'quotechar'): df.to_csv(path, quoting=1, quotechar=None) def test_to_csv_doublequote(self): @@ -49,7 +49,7 @@ def test_to_csv_doublequote(self): from _csv import Error with tm.ensure_clean('test.csv') as path: - with tm.assertRaisesRegexp(Error, 'escapechar'): + with tm.assert_raises_regex(Error, 'escapechar'): df.to_csv(path, doublequote=False) # no escapechar set def test_to_csv_escapechar(self): diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index a8ea76ff9459c..0dfae0fb88bf6 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -342,7 +342,8 @@ def test_frame_from_json_bad_data(self): json = StringIO('{"badkey":["A","B"],' '"index":["2","3"],' '"data":[[1.0,"1"],[2.0,"2"],[null,"3"]]}') - with tm.assertRaisesRegexp(ValueError, r"unexpected key\(s\): badkey"): + with tm.assert_raises_regex(ValueError, + r"unexpected key\(s\): badkey"): read_json(json, orient="split") def test_frame_from_json_nones(self): @@ -783,7 +784,7 @@ def test_misc_example(self): DataFrame\\.index values are different \\(100\\.0 %\\) \\[left\\]: Index\\(\\[u?'a', u?'b'\\], dtype='object'\\) \\[right\\]: RangeIndex\\(start=0, stop=2, step=1\\)""" - with tm.assertRaisesRegexp(AssertionError, error_msg): + with tm.assert_raises_regex(AssertionError, error_msg): assert_frame_equal(result, expected, check_index_type=False) result = read_json('[{"a": 1, "b": 2}, {"b":2, "a" :1}]') @@ -1036,7 +1037,7 @@ def test_to_jsonl(self): def test_latin_encoding(self): if compat.PY2: - tm.assertRaisesRegexp( + tm.assert_raises_regex( TypeError, r'\[unicode\] is not implemented as a table column') return diff --git a/pandas/tests/io/json/test_ujson.py b/pandas/tests/io/json/test_ujson.py index f0ccce5830a65..037e47bfc2a46 100644 --- a/pandas/tests/io/json/test_ujson.py +++ b/pandas/tests/io/json/test_ujson.py @@ -923,7 +923,7 @@ def my_handler(obj): def my_handler_raises(obj): raise TypeError("I raise for anything") - with tm.assertRaisesRegexp(TypeError, "I raise for anything"): + with tm.assert_raises_regex(TypeError, "I raise for anything"): ujson.encode(_TestObject("foo"), default_handler=my_handler_raises) def my_int_handler(obj): diff --git a/pandas/tests/io/parser/c_parser_only.py b/pandas/tests/io/parser/c_parser_only.py index f4ca632e09f39..7ce8c61777bc7 100644 --- a/pandas/tests/io/parser/c_parser_only.py +++ b/pandas/tests/io/parser/c_parser_only.py @@ -96,7 +96,7 @@ def test_dtype_and_names_error(self): 3.0 3 """ # fallback casting, but not castable - with tm.assertRaisesRegexp(ValueError, 'cannot safely convert'): + with tm.assert_raises_regex(ValueError, 'cannot safely convert'): self.read_csv(StringIO(data), sep=r'\s+', header=None, names=['a', 'b'], dtype={'a': np.int32}) diff --git a/pandas/tests/io/parser/common.py b/pandas/tests/io/parser/common.py index deeeaef63da39..9abd3c5bfe993 100644 --- a/pandas/tests/io/parser/common.py +++ b/pandas/tests/io/parser/common.py @@ -44,7 +44,7 @@ def test_empty_decimal_marker(self): """ # Parsers support only length-1 decimals msg = 'Only length-1 decimal markers supported' - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): self.read_csv(StringIO(data), decimal='') def test_bad_stream_exception(self): @@ -64,7 +64,7 @@ def test_bad_stream_exception(self): msg = "'utf-8' codec can't decode byte" else: msg = "'utf8' codec can't decode byte" - with tm.assertRaisesRegexp(UnicodeDecodeError, msg): + with tm.assert_raises_regex(UnicodeDecodeError, msg): self.read_csv(stream) stream.close() @@ -126,7 +126,7 @@ def test_malformed(self): 2,3,4 """ msg = 'Expected 3 fields in line 4, saw 5' - with tm.assertRaisesRegexp(Exception, msg): + with tm.assert_raises_regex(Exception, msg): self.read_table(StringIO(data), sep=',', header=1, comment='#') @@ -140,7 +140,7 @@ def test_malformed(self): 2,3,4 """ msg = 'Expected 3 fields in line 6, saw 5' - with tm.assertRaisesRegexp(Exception, msg): + with tm.assert_raises_regex(Exception, msg): it = self.read_table(StringIO(data), sep=',', header=1, comment='#', iterator=True, chunksize=1, @@ -157,7 +157,7 @@ def test_malformed(self): 2,3,4 """ msg = 'Expected 3 fields in line 6, saw 5' - with tm.assertRaisesRegexp(Exception, msg): + with tm.assert_raises_regex(Exception, msg): it = self.read_table(StringIO(data), sep=',', header=1, comment='#', iterator=True, chunksize=1, skiprows=[2]) @@ -173,7 +173,7 @@ def test_malformed(self): 2,3,4 """ msg = 'Expected 3 fields in line 6, saw 5' - with tm.assertRaisesRegexp(Exception, msg): + with tm.assert_raises_regex(Exception, msg): it = self.read_table(StringIO(data), sep=',', header=1, comment='#', iterator=True, chunksize=1, skiprows=[2]) @@ -190,7 +190,7 @@ def test_malformed(self): footer """ msg = 'Expected 3 fields in line 4, saw 5' - with tm.assertRaisesRegexp(Exception, msg): + with tm.assert_raises_regex(Exception, msg): self.read_table(StringIO(data), sep=',', header=1, comment='#', skipfooter=1) @@ -385,13 +385,13 @@ def test_read_nrows(self): msg = r"'nrows' must be an integer >=0" - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): self.read_csv(StringIO(self.data1), nrows=1.2) - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): self.read_csv(StringIO(self.data1), nrows='foo') - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): self.read_csv(StringIO(self.data1), nrows=-1) def test_read_chunksize(self): @@ -407,13 +407,13 @@ def test_read_chunksize(self): # with invalid chunksize value: msg = r"'chunksize' must be an integer >=1" - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): self.read_csv(StringIO(self.data1), chunksize=1.3) - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): self.read_csv(StringIO(self.data1), chunksize='foo') - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): self.read_csv(StringIO(self.data1), chunksize=0) def test_read_chunksize_and_nrows(self): @@ -1104,7 +1104,7 @@ def test_uneven_lines_with_usecols(self): # make sure that an error is still thrown # when the 'usecols' parameter is not provided msg = r"Expected \d+ fields in line \d+, saw \d+" - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): df = self.read_csv(StringIO(csv)) expected = DataFrame({ @@ -1130,10 +1130,10 @@ def test_read_empty_with_usecols(self): # throws the correct error, with or without usecols errmsg = "No columns to parse from file" - with tm.assertRaisesRegexp(EmptyDataError, errmsg): + with tm.assert_raises_regex(EmptyDataError, errmsg): self.read_csv(StringIO('')) - with tm.assertRaisesRegexp(EmptyDataError, errmsg): + with tm.assert_raises_regex(EmptyDataError, errmsg): self.read_csv(StringIO(''), usecols=usecols) expected = DataFrame(columns=usecols, index=[0], dtype=np.float64) @@ -1172,7 +1172,8 @@ def test_trailing_spaces(self): def test_raise_on_sep_with_delim_whitespace(self): # see gh-6607 data = 'a b c\n1 2 3' - with tm.assertRaisesRegexp(ValueError, 'you can only specify one'): + with tm.assert_raises_regex(ValueError, + 'you can only specify one'): self.read_table(StringIO(data), sep=r'\s', delim_whitespace=True) def test_single_char_leading_whitespace(self): @@ -1563,7 +1564,7 @@ def test_null_byte_char(self): tm.assert_frame_equal(out, expected) else: msg = "NULL byte detected" - with tm.assertRaisesRegexp(ParserError, msg): + with tm.assert_raises_regex(ParserError, msg): self.read_csv(StringIO(data), names=cols) def test_utf8_bom(self): @@ -1681,13 +1682,13 @@ class InvalidBuffer(object): msg = "Invalid file path or buffer object type" - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): self.read_csv(InvalidBuffer()) if PY3: from unittest import mock - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): self.read_csv(mock.Mock()) @tm.capture_stderr diff --git a/pandas/tests/io/parser/compression.py b/pandas/tests/io/parser/compression.py index 26e216cfbcffa..55c0506acb132 100644 --- a/pandas/tests/io/parser/compression.py +++ b/pandas/tests/io/parser/compression.py @@ -45,18 +45,19 @@ def test_zip(self): tmp.writestr(file_name, data) tmp.close() - tm.assertRaisesRegexp(ValueError, 'Multiple files', - self.read_csv, path, compression='zip') + tm.assert_raises_regex(ValueError, 'Multiple files', + self.read_csv, path, compression='zip') - tm.assertRaisesRegexp(ValueError, 'Multiple files', - self.read_csv, path, compression='infer') + tm.assert_raises_regex(ValueError, 'Multiple files', + self.read_csv, path, + compression='infer') with tm.ensure_clean() as path: tmp = zipfile.ZipFile(path, mode='w') tmp.close() - tm.assertRaisesRegexp(ValueError, 'Zero files', - self.read_csv, path, compression='zip') + tm.assert_raises_regex(ValueError, 'Zero files', + self.read_csv, path, compression='zip') with tm.ensure_clean() as path: with open(path, 'wb') as f: @@ -167,5 +168,5 @@ def test_read_csv_infer_compression(self): def test_invalid_compression(self): msg = 'Unrecognized compression type: sfark' - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): self.read_csv('test_file.zip', compression='sfark') diff --git a/pandas/tests/io/parser/converters.py b/pandas/tests/io/parser/converters.py index f2b3ce532b59e..6cea0f3e7b36c 100644 --- a/pandas/tests/io/parser/converters.py +++ b/pandas/tests/io/parser/converters.py @@ -24,7 +24,7 @@ def test_converters_type_must_be_dict(self): data = """index,A,B,C,D foo,2,3,4,5 """ - with tm.assertRaisesRegexp(TypeError, 'Type converters.+'): + with tm.assert_raises_regex(TypeError, 'Type converters.+'): self.read_csv(StringIO(data), converters=0) def test_converters(self): diff --git a/pandas/tests/io/parser/dialect.py b/pandas/tests/io/parser/dialect.py index 82871628e54d6..f756fe71bf684 100644 --- a/pandas/tests/io/parser/dialect.py +++ b/pandas/tests/io/parser/dialect.py @@ -61,7 +61,7 @@ class InvalidDialect(object): data = 'a\n1' msg = 'Invalid dialect' - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): self.read_csv(StringIO(data), dialect=InvalidDialect) def test_dialect_conflict(self): diff --git a/pandas/tests/io/parser/header.py b/pandas/tests/io/parser/header.py index 2f0ca5b311aef..f7967f4fe9765 100644 --- a/pandas/tests/io/parser/header.py +++ b/pandas/tests/io/parser/header.py @@ -19,7 +19,7 @@ class HeaderTests(object): def test_read_with_bad_header(self): errmsg = r"but only \d+ lines in file" - with tm.assertRaisesRegexp(ValueError, errmsg): + with tm.assert_raises_regex(ValueError, errmsg): s = StringIO(',,') self.read_csv(s, header=[10]) diff --git a/pandas/tests/io/parser/parse_dates.py b/pandas/tests/io/parser/parse_dates.py index f0a1f58be4026..3833fa3d7ff4e 100644 --- a/pandas/tests/io/parser/parse_dates.py +++ b/pandas/tests/io/parser/parse_dates.py @@ -435,11 +435,11 @@ def test_read_with_parse_dates_scalar_non_bool(self): data = """A,B,C 1,2,2003-11-1""" - tm.assertRaisesRegexp(TypeError, errmsg, self.read_csv, - StringIO(data), parse_dates="C") - tm.assertRaisesRegexp(TypeError, errmsg, self.read_csv, - StringIO(data), parse_dates="C", - index_col="C") + tm.assert_raises_regex(TypeError, errmsg, self.read_csv, + StringIO(data), parse_dates="C") + tm.assert_raises_regex(TypeError, errmsg, self.read_csv, + StringIO(data), parse_dates="C", + index_col="C") def test_read_with_parse_dates_invalid_type(self): errmsg = ("Only booleans, lists, and " @@ -448,12 +448,13 @@ def test_read_with_parse_dates_invalid_type(self): data = """A,B,C 1,2,2003-11-1""" - tm.assertRaisesRegexp(TypeError, errmsg, self.read_csv, - StringIO(data), parse_dates=(1,)) - tm.assertRaisesRegexp(TypeError, errmsg, self.read_csv, - StringIO(data), parse_dates=np.array([4, 5])) - tm.assertRaisesRegexp(TypeError, errmsg, self.read_csv, - StringIO(data), parse_dates=set([1, 3, 3])) + tm.assert_raises_regex(TypeError, errmsg, self.read_csv, + StringIO(data), parse_dates=(1,)) + tm.assert_raises_regex(TypeError, errmsg, + self.read_csv, StringIO(data), + parse_dates=np.array([4, 5])) + tm.assert_raises_regex(TypeError, errmsg, self.read_csv, + StringIO(data), parse_dates=set([1, 3, 3])) def test_parse_dates_empty_string(self): # see gh-2263 diff --git a/pandas/tests/io/parser/python_parser_only.py b/pandas/tests/io/parser/python_parser_only.py index c5fa64d067ee6..1356ace4bb38a 100644 --- a/pandas/tests/io/parser/python_parser_only.py +++ b/pandas/tests/io/parser/python_parser_only.py @@ -24,17 +24,17 @@ def test_invalid_skipfooter(self): # see gh-15925 (comment) msg = "skipfooter must be an integer" - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): self.read_csv(StringIO(text), skipfooter="foo") - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): self.read_csv(StringIO(text), skipfooter=1.5) - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): self.read_csv(StringIO(text), skipfooter=True) msg = "skipfooter cannot be negative" - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): self.read_csv(StringIO(text), skipfooter=-1) def test_sniff_delimiter(self): @@ -208,13 +208,13 @@ def test_multi_char_sep_quotes(self): data = 'a,,b\n1,,a\n2,,"2,,b"' msg = 'ignored when a multi-char delimiter is used' - with tm.assertRaisesRegexp(ParserError, msg): + with tm.assert_raises_regex(ParserError, msg): self.read_csv(StringIO(data), sep=',,') # We expect no match, so there should be an assertion # error out of the inner context manager. with pytest.raises(AssertionError): - with tm.assertRaisesRegexp(ParserError, msg): + with tm.assert_raises_regex(ParserError, msg): self.read_csv(StringIO(data), sep=',,', quoting=csv.QUOTE_NONE) @@ -226,11 +226,11 @@ def test_skipfooter_bad_row(self): for data in ('a\n1\n"b"a', 'a,b,c\ncat,foo,bar\ndog,foo,"baz'): - with tm.assertRaisesRegexp(ParserError, msg): + with tm.assert_raises_regex(ParserError, msg): self.read_csv(StringIO(data), skipfooter=1) # We expect no match, so there should be an assertion # error out of the inner context manager. with pytest.raises(AssertionError): - with tm.assertRaisesRegexp(ParserError, msg): + with tm.assert_raises_regex(ParserError, msg): self.read_csv(StringIO(data)) diff --git a/pandas/tests/io/parser/quoting.py b/pandas/tests/io/parser/quoting.py index a692e03e868c7..15427aaf9825c 100644 --- a/pandas/tests/io/parser/quoting.py +++ b/pandas/tests/io/parser/quoting.py @@ -20,29 +20,29 @@ def test_bad_quote_char(self): # Python 2.x: "...must be an 1-character..." # Python 3.x: "...must be a 1-character..." msg = '"quotechar" must be a(n)? 1-character string' - tm.assertRaisesRegexp(TypeError, msg, self.read_csv, - StringIO(data), quotechar='foo') + tm.assert_raises_regex(TypeError, msg, self.read_csv, + StringIO(data), quotechar='foo') msg = 'quotechar must be set if quoting enabled' - tm.assertRaisesRegexp(TypeError, msg, self.read_csv, - StringIO(data), quotechar=None, - quoting=csv.QUOTE_MINIMAL) + tm.assert_raises_regex(TypeError, msg, self.read_csv, + StringIO(data), quotechar=None, + quoting=csv.QUOTE_MINIMAL) msg = '"quotechar" must be string, not int' - tm.assertRaisesRegexp(TypeError, msg, self.read_csv, - StringIO(data), quotechar=2) + tm.assert_raises_regex(TypeError, msg, self.read_csv, + StringIO(data), quotechar=2) def test_bad_quoting(self): data = '1,2,3' msg = '"quoting" must be an integer' - tm.assertRaisesRegexp(TypeError, msg, self.read_csv, - StringIO(data), quoting='foo') + tm.assert_raises_regex(TypeError, msg, self.read_csv, + StringIO(data), quoting='foo') # quoting must in the range [0, 3] msg = 'bad "quoting" value' - tm.assertRaisesRegexp(TypeError, msg, self.read_csv, - StringIO(data), quoting=5) + tm.assert_raises_regex(TypeError, msg, self.read_csv, + StringIO(data), quoting=5) def test_quote_char_basic(self): data = 'a,b,c\n1,2,"cat"' @@ -68,13 +68,13 @@ def test_null_quote_char(self): # sanity checks msg = 'quotechar must be set if quoting enabled' - tm.assertRaisesRegexp(TypeError, msg, self.read_csv, - StringIO(data), quotechar=None, - quoting=csv.QUOTE_MINIMAL) + tm.assert_raises_regex(TypeError, msg, self.read_csv, + StringIO(data), quotechar=None, + quoting=csv.QUOTE_MINIMAL) - tm.assertRaisesRegexp(TypeError, msg, self.read_csv, - StringIO(data), quotechar='', - quoting=csv.QUOTE_MINIMAL) + tm.assert_raises_regex(TypeError, msg, self.read_csv, + StringIO(data), quotechar='', + quoting=csv.QUOTE_MINIMAL) # no errors should be raised if quoting is None expected = DataFrame([[1, 2, 3]], diff --git a/pandas/tests/io/parser/skiprows.py b/pandas/tests/io/parser/skiprows.py index cb1b656e42be2..fb08ec0447267 100644 --- a/pandas/tests/io/parser/skiprows.py +++ b/pandas/tests/io/parser/skiprows.py @@ -215,11 +215,11 @@ def test_skiprows_callable(self): skiprows = lambda x: True msg = "No columns to parse from file" - with tm.assertRaisesRegexp(EmptyDataError, msg): + with tm.assert_raises_regex(EmptyDataError, msg): self.read_csv(StringIO(data), skiprows=skiprows) # This is a bad callable and should raise. msg = "by zero" skiprows = lambda x: 1 / 0 - with tm.assertRaisesRegexp(ZeroDivisionError, msg): + with tm.assert_raises_regex(ZeroDivisionError, msg): self.read_csv(StringIO(data), skiprows=skiprows) diff --git a/pandas/tests/io/parser/test_read_fwf.py b/pandas/tests/io/parser/test_read_fwf.py index 9498a7d83e0de..ffb04c52e8d93 100644 --- a/pandas/tests/io/parser/test_read_fwf.py +++ b/pandas/tests/io/parser/test_read_fwf.py @@ -67,10 +67,11 @@ def test_fwf(self): StringIO(data3), colspecs=colspecs, delimiter='~', header=None) tm.assert_frame_equal(df, expected) - with tm.assertRaisesRegexp(ValueError, "must specify only one of"): + with tm.assert_raises_regex(ValueError, + "must specify only one of"): read_fwf(StringIO(data3), colspecs=colspecs, widths=[6, 10, 10, 7]) - with tm.assertRaisesRegexp(ValueError, "Must specify either"): + with tm.assert_raises_regex(ValueError, "Must specify either"): read_fwf(StringIO(data3), colspecs=None, widths=None) def test_BytesIO_input(self): @@ -93,9 +94,9 @@ def test_fwf_colspecs_is_list_or_tuple(self): bar2,12,13,14,15 """ - with tm.assertRaisesRegexp(TypeError, - 'column specifications must be a list or ' - 'tuple.+'): + with tm.assert_raises_regex(TypeError, + 'column specifications must ' + 'be a list or tuple.+'): pd.io.parsers.FixedWidthReader(StringIO(data), {'a': 1}, ',', '#') @@ -109,8 +110,9 @@ def test_fwf_colspecs_is_list_or_tuple_of_two_element_tuples(self): bar2,12,13,14,15 """ - with tm.assertRaisesRegexp(TypeError, - 'Each column specification must be.+'): + with tm.assert_raises_regex(TypeError, + 'Each column specification ' + 'must be.+'): read_fwf(StringIO(data), [('a', 1)]) def test_fwf_colspecs_None(self): diff --git a/pandas/tests/io/parser/test_unsupported.py b/pandas/tests/io/parser/test_unsupported.py index 9637b449de6da..6c2d883aeb16b 100644 --- a/pandas/tests/io/parser/test_unsupported.py +++ b/pandas/tests/io/parser/test_unsupported.py @@ -25,7 +25,7 @@ def test_mangle_dupe_cols_false(self): msg = 'is not supported' for engine in ('c', 'python'): - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): read_csv(StringIO(data), engine=engine, mangle_dupe_cols=False) @@ -35,14 +35,14 @@ def test_c_engine(self): msg = 'does not support' # specify C engine with unsupported options (raise) - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): read_table(StringIO(data), engine='c', sep=None, delim_whitespace=False) - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): read_table(StringIO(data), engine='c', sep=r'\s') - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): read_table(StringIO(data), engine='c', quotechar=chr(128)) - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): read_table(StringIO(data), engine='c', skipfooter=1) # specify C-unsupported options without python-unsupported options @@ -62,9 +62,9 @@ def test_c_engine(self): x q 30 3 -0.6662 -0.5243 -0.3580 0.89145 2.5838""" msg = 'Error tokenizing data' - with tm.assertRaisesRegexp(ParserError, msg): + with tm.assert_raises_regex(ParserError, msg): read_table(StringIO(text), sep='\\s+') - with tm.assertRaisesRegexp(ParserError, msg): + with tm.assert_raises_regex(ParserError, msg): read_table(StringIO(text), engine='c', sep='\\s+') msg = "Only length-1 thousands markers supported" @@ -72,14 +72,14 @@ def test_c_engine(self): 1|2,334|5 10|13|10. """ - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): read_csv(StringIO(data), thousands=',,') - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): read_csv(StringIO(data), thousands='') msg = "Only length-1 line terminators supported" data = 'a,b,c~~1,2,3~~4,5,6' - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): read_csv(StringIO(data), lineterminator='~~') def test_python_engine(self): @@ -98,7 +98,7 @@ def test_python_engine(self): 'with the %r engine' % (default, engine)) kwargs = {default: object()} - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): read_csv(StringIO(data), engine=engine, **kwargs) diff --git a/pandas/tests/io/parser/usecols.py b/pandas/tests/io/parser/usecols.py index 1ea7353427c30..db8e5b7653a51 100644 --- a/pandas/tests/io/parser/usecols.py +++ b/pandas/tests/io/parser/usecols.py @@ -28,7 +28,7 @@ def test_raise_on_mixed_dtype_usecols(self): "all integers or a callable") usecols = [0, 'b', 2] - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): self.read_csv(StringIO(data), usecols=usecols) def test_usecols(self): @@ -351,10 +351,10 @@ def test_usecols_with_mixed_encoding_strings(self): msg = ("'usecols' must either be all strings, all unicode, " "all integers or a callable") - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): self.read_csv(StringIO(s), usecols=[u'AAA', b'BBB']) - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): self.read_csv(StringIO(s), usecols=[b'AAA', u'BBB']) def test_usecols_with_multibyte_characters(self): diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py index fd29caefb8cb5..82819b94413b4 100644 --- a/pandas/tests/io/test_common.py +++ b/pandas/tests/io/test_common.py @@ -108,13 +108,14 @@ def test_constructor_bad_file(self): msg = "[Errno 22]" err = mmap.error - tm.assertRaisesRegexp(err, msg, common.MMapWrapper, non_file) + tm.assert_raises_regex(err, msg, common.MMapWrapper, non_file) target = open(self.mmap_file, 'r') target.close() msg = "I/O operation on closed file" - tm.assertRaisesRegexp(ValueError, msg, common.MMapWrapper, target) + tm.assert_raises_regex( + ValueError, msg, common.MMapWrapper, target) def test_get_attr(self): with open(self.mmap_file, 'r') as target: diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py index 02652be2153f1..2a3a4992ead71 100644 --- a/pandas/tests/io/test_excel.py +++ b/pandas/tests/io/test_excel.py @@ -1870,7 +1870,7 @@ def wrapped(self, *args, **kwargs): else: msg = (r'Installed openpyxl is not supported at this ' r'time\. Use.+') - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): orig_method(self, *args, **kwargs) return wrapped return versioned_raise_wrapper @@ -2292,7 +2292,7 @@ class XlsxWriterTests_NoMerge(ExcelWriterBase, tm.TestCase): class ExcelWriterEngineTests(tm.TestCase): def test_ExcelWriter_dispatch(self): - with tm.assertRaisesRegexp(ValueError, 'No engine'): + with tm.assert_raises_regex(ValueError, 'No engine'): ExcelWriter('nothing') try: diff --git a/pandas/tests/io/test_html.py b/pandas/tests/io/test_html.py index e7eaab098fe4d..5a30ff2afe7e5 100644 --- a/pandas/tests/io/test_html.py +++ b/pandas/tests/io/test_html.py @@ -219,8 +219,8 @@ def test_skiprows_ndarray(self): assert_framelist_equal(df1, df2) def test_skiprows_invalid(self): - with tm.assertRaisesRegexp(TypeError, - 'is not a valid type for skipping rows'): + with tm.assert_raises_regex(TypeError, 'is not a valid type ' + 'for skipping rows'): self.read_html(self.spam_data, '.*Water.*', skiprows='asdf') def test_index(self): @@ -302,7 +302,7 @@ def test_file_url(self): @tm.slow def test_invalid_table_attrs(self): url = self.banklist_data - with tm.assertRaisesRegexp(ValueError, 'No tables found'): + with tm.assert_raises_regex(ValueError, 'No tables found'): self.read_html(url, 'First Federal Bank of Florida', attrs={'id': 'tasdfable'}) @@ -353,8 +353,8 @@ def test_regex_idempotency(self): assert isinstance(df, DataFrame) def test_negative_skiprows(self): - with tm.assertRaisesRegexp(ValueError, - r'\(you passed a negative value\)'): + with tm.assert_raises_regex(ValueError, + r'\(you passed a negative value\)'): self.read_html(self.spam_data, 'Water', skiprows=-1) @network @@ -652,9 +652,10 @@ def test_parse_dates_combine(self): def test_computer_sales_page(self): data = os.path.join(DATA_PATH, 'computer_sales_page.html') - with tm.assertRaisesRegexp(ParserError, r"Passed header=\[0,1\] are " - "too many rows for this multi_index " - "of columns"): + with tm.assert_raises_regex(ParserError, + r"Passed header=\[0,1\] are " + r"too many rows for this " + r"multi_index of columns"): self.read_html(data, header=[0, 1]) def test_wikipedia_states_table(self): diff --git a/pandas/tests/io/test_pickle.py b/pandas/tests/io/test_pickle.py index 94e4fec07e775..875b5bd3055b9 100644 --- a/pandas/tests/io/test_pickle.py +++ b/pandas/tests/io/test_pickle.py @@ -403,8 +403,8 @@ def test_write_explicit(self, compression, get_random_path): @pytest.mark.parametrize('compression', ['', 'None', 'bad', '7z']) def test_write_explicit_bad(self, compression, get_random_path): - with tm.assertRaisesRegexp(ValueError, - "Unrecognized compression type"): + with tm.assert_raises_regex(ValueError, + "Unrecognized compression type"): with tm.ensure_clean(get_random_path) as path: df = tm.makeDataFrame() df.to_pickle(path, compression=compression) diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index 7d2c7a97fce0a..1b656e7b1b004 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -971,7 +971,7 @@ def test_encoding(self): def test_latin_encoding(self): if compat.PY2: - tm.assertRaisesRegexp( + tm.assert_raises_regex( TypeError, r'\[unicode\] is not implemented as a table column') return @@ -2563,7 +2563,7 @@ def test_terms(self): for t in terms: store.select('p4d', t) - with tm.assertRaisesRegexp( + with tm.assert_raises_regex( TypeError, 'Only named functions are supported'): store.select( 'wp', @@ -2575,8 +2575,9 @@ def test_terms(self): expected = Panel({-1: wpneg[-1]}) tm.assert_panel_equal(res, expected) - with tm.assertRaisesRegexp(NotImplementedError, - 'Unary addition not supported'): + with tm.assert_raises_regex(NotImplementedError, + 'Unary addition ' + 'not supported'): store.select('wpneg', 'items == +1') def test_term_compat(self): @@ -4388,7 +4389,7 @@ def f(): def f(): store.select('df') - tm.assertRaisesRegexp(ClosedFileError, 'file is not open', f) + tm.assert_raises_regex(ClosedFileError, 'file is not open', f) def test_pytables_native_read(self): diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 63fcfb0d2f5ac..36ff3bdbb24b5 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -984,7 +984,7 @@ def test_database_uri_string(self): # using driver that will not be installed on Travis to trigger error # in sqlalchemy.create_engine -> test passing of this error to user db_uri = "postgresql+pg8000://user:pass@host/dbname" - with tm.assertRaisesRegexp(ImportError, "pg8000"): + with tm.assert_raises_regex(ImportError, "pg8000"): sql.read_sql("select * from table", db_uri) def _make_iris_table_metadata(self): @@ -2380,8 +2380,8 @@ def test_unsupported_flavor(self): msg = 'is not supported' for func in self.funcs: - tm.assertRaisesRegexp(ValueError, msg, getattr(sql, func), - self.con, flavor='mysql') + tm.assert_raises_regex(ValueError, msg, getattr(sql, func), + self.con, flavor='mysql') def test_deprecated_flavor(self): for func in self.funcs: diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py index cc71cf6b1a4dc..2bde4349f6000 100644 --- a/pandas/tests/reshape/test_concat.py +++ b/pandas/tests/reshape/test_concat.py @@ -124,10 +124,12 @@ def test_concatlike_same_dtypes(self): tm.assert_index_equal(res, exp) # cannot append non-index - with tm.assertRaisesRegexp(TypeError, 'all inputs must be Index'): + with tm.assert_raises_regex(TypeError, + 'all inputs must be Index'): pd.Index(vals1).append(vals2) - with tm.assertRaisesRegexp(TypeError, 'all inputs must be Index'): + with tm.assert_raises_regex(TypeError, + 'all inputs must be Index'): pd.Index(vals1).append([pd.Index(vals2), vals3]) # ----- Series ----- # @@ -175,16 +177,16 @@ def test_concatlike_same_dtypes(self): # cannot append non-index msg = "cannot concatenate a non-NDFrame object" - with tm.assertRaisesRegexp(TypeError, msg): + with tm.assert_raises_regex(TypeError, msg): pd.Series(vals1).append(vals2) - with tm.assertRaisesRegexp(TypeError, msg): + with tm.assert_raises_regex(TypeError, msg): pd.Series(vals1).append([pd.Series(vals2), vals3]) - with tm.assertRaisesRegexp(TypeError, msg): + with tm.assert_raises_regex(TypeError, msg): pd.concat([pd.Series(vals1), vals2]) - with tm.assertRaisesRegexp(TypeError, msg): + with tm.assert_raises_regex(TypeError, msg): pd.concat([pd.Series(vals1), pd.Series(vals2), vals3]) def test_concatlike_dtypes_coercion(self): diff --git a/pandas/tests/reshape/test_join.py b/pandas/tests/reshape/test_join.py index 7e39806b42cbf..475b17d9fe792 100644 --- a/pandas/tests/reshape/test_join.py +++ b/pandas/tests/reshape/test_join.py @@ -234,9 +234,9 @@ def test_join_on_fails_with_wrong_object_type(self): df = DataFrame({'a': [1, 1]}) for obj in wrongly_typed: - with tm.assertRaisesRegexp(ValueError, str(type(obj))): + with tm.assert_raises_regex(ValueError, str(type(obj))): merge(obj, df, left_on='a', right_on='a') - with tm.assertRaisesRegexp(ValueError, str(type(obj))): + with tm.assert_raises_regex(ValueError, str(type(obj))): merge(df, obj, left_on='a', right_on='a') def test_join_on_pass_vector(self): diff --git a/pandas/tests/reshape/test_merge_ordered.py b/pandas/tests/reshape/test_merge_ordered.py index e445ad9323b51..77f47ff0a76e9 100644 --- a/pandas/tests/reshape/test_merge_ordered.py +++ b/pandas/tests/reshape/test_merge_ordered.py @@ -83,7 +83,7 @@ def test_empty_sequence_concat(self): ([None, None], none_pat) ] for df_seq, pattern in test_cases: - tm.assertRaisesRegexp(ValueError, pattern, pd.concat, df_seq) + tm.assert_raises_regex(ValueError, pattern, pd.concat, df_seq) pd.concat([pd.DataFrame()]) pd.concat([None, pd.DataFrame()]) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 242011d6f23da..f15616a16678f 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -1321,22 +1321,22 @@ def test_crosstab_errors(self): 'c': [1, 1, np.nan, 1, 1]}) error = 'values cannot be used without an aggfunc.' - with tm.assertRaisesRegexp(ValueError, error): + with tm.assert_raises_regex(ValueError, error): pd.crosstab(df.a, df.b, values=df.c) error = 'aggfunc cannot be used without values' - with tm.assertRaisesRegexp(ValueError, error): + with tm.assert_raises_regex(ValueError, error): pd.crosstab(df.a, df.b, aggfunc=np.mean) error = 'Not a valid normalize argument' - with tm.assertRaisesRegexp(ValueError, error): + with tm.assert_raises_regex(ValueError, error): pd.crosstab(df.a, df.b, normalize='42') - with tm.assertRaisesRegexp(ValueError, error): + with tm.assert_raises_regex(ValueError, error): pd.crosstab(df.a, df.b, normalize=42) error = 'Not a valid margins argument' - with tm.assertRaisesRegexp(ValueError, error): + with tm.assert_raises_regex(ValueError, error): pd.crosstab(df.a, df.b, normalize='all', margins=42) def test_crosstab_with_categorial_columns(self): diff --git a/pandas/tests/reshape/test_reshape.py b/pandas/tests/reshape/test_reshape.py index f41c977cc03e1..87f16cfaf31ec 100644 --- a/pandas/tests/reshape/test_reshape.py +++ b/pandas/tests/reshape/test_reshape.py @@ -117,7 +117,7 @@ def test_tuple_vars_fail_with_multiindex(self): for id_vars, value_vars in ((tuple_a, list_b), (list_a, tuple_b), (tuple_a, tuple_b)): - with tm.assertRaisesRegexp(ValueError, r'MultiIndex'): + with tm.assert_raises_regex(ValueError, r'MultiIndex'): self.df1.melt(id_vars=id_vars, value_vars=value_vars) def test_custom_var_name(self): diff --git a/pandas/tests/reshape/test_tile.py b/pandas/tests/reshape/test_tile.py index 8aaa8a51d7020..1cc5c5f229bce 100644 --- a/pandas/tests/reshape/test_tile.py +++ b/pandas/tests/reshape/test_tile.py @@ -192,8 +192,8 @@ def test_qcut_specify_quantiles(self): tm.assert_categorical_equal(factor, expected) def test_qcut_all_bins_same(self): - tm.assertRaisesRegexp(ValueError, "edges.*unique", qcut, - [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 3) + tm.assert_raises_regex(ValueError, "edges.*unique", qcut, + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 3) def test_cut_out_of_bounds(self): arr = np.random.randn(100) diff --git a/pandas/tests/reshape/test_union_categoricals.py b/pandas/tests/reshape/test_union_categoricals.py index 1c67b13a9c1c9..5cc476718add2 100644 --- a/pandas/tests/reshape/test_union_categoricals.py +++ b/pandas/tests/reshape/test_union_categoricals.py @@ -58,11 +58,11 @@ def test_union_categorical(self): s = Categorical([0, 1.2, 2]) s2 = Categorical([2, 3, 4]) msg = 'dtype of categories must be the same' - with tm.assertRaisesRegexp(TypeError, msg): + with tm.assert_raises_regex(TypeError, msg): union_categoricals([s, s2]) msg = 'No Categoricals to union' - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): union_categoricals([]) def test_union_categoricals_nan(self): @@ -139,7 +139,7 @@ def test_union_categoricals_ordered(self): c2 = Categorical([1, 2, 3], ordered=False) msg = 'Categorical.ordered must be the same' - with tm.assertRaisesRegexp(TypeError, msg): + with tm.assert_raises_regex(TypeError, msg): union_categoricals([c1, c2]) res = union_categoricals([c1, c1]) @@ -157,7 +157,7 @@ def test_union_categoricals_ordered(self): c2 = Categorical([1, 2, 3], categories=[3, 2, 1], ordered=True) msg = "to union ordered Categoricals, all categories must be the same" - with tm.assertRaisesRegexp(TypeError, msg): + with tm.assert_raises_regex(TypeError, msg): union_categoricals([c1, c2]) def test_union_categoricals_ignore_order(self): @@ -170,7 +170,7 @@ def test_union_categoricals_ignore_order(self): tm.assert_categorical_equal(res, exp) msg = 'Categorical.ordered must be the same' - with tm.assertRaisesRegexp(TypeError, msg): + with tm.assert_raises_regex(TypeError, msg): union_categoricals([c1, c2], ignore_order=False) res = union_categoricals([c1, c1], ignore_order=True) @@ -208,10 +208,10 @@ def test_union_categoricals_ignore_order(self): tm.assert_categorical_equal(result, expected) msg = "to union ordered Categoricals, all categories must be the same" - with tm.assertRaisesRegexp(TypeError, msg): + with tm.assert_raises_regex(TypeError, msg): union_categoricals([c1, c2], ignore_order=False) - with tm.assertRaisesRegexp(TypeError, msg): + with tm.assert_raises_regex(TypeError, msg): union_categoricals([c1, c2]) def test_union_categoricals_sort(self): diff --git a/pandas/tests/reshape/test_util.py b/pandas/tests/reshape/test_util.py index fd3a683e80397..a7fbe8d305011 100644 --- a/pandas/tests/reshape/test_util.py +++ b/pandas/tests/reshape/test_util.py @@ -46,4 +46,4 @@ def test_invalid_input(self): 'a', ['a'], ['a', 'b'], [['a'], 'b']] msg = "Input must be a list-like of list-likes" for X in invalid_inputs: - tm.assertRaisesRegexp(TypeError, msg, cartesian_product, X=X) + tm.assert_raises_regex(TypeError, msg, cartesian_product, X=X) diff --git a/pandas/tests/scalar/test_interval.py b/pandas/tests/scalar/test_interval.py index ea267fabd13ed..526a2916e2924 100644 --- a/pandas/tests/scalar/test_interval.py +++ b/pandas/tests/scalar/test_interval.py @@ -46,7 +46,7 @@ def test_equal(self): self.assertNotEqual(Interval(0, 1), 0) def test_comparison(self): - with tm.assertRaisesRegexp(TypeError, 'unorderable types'): + with tm.assert_raises_regex(TypeError, 'unorderable types'): Interval(0, 1) < 2 self.assertTrue(Interval(0, 1) < Interval(1, 2)) diff --git a/pandas/tests/scalar/test_period.py b/pandas/tests/scalar/test_period.py index f8b103836e9a1..b5c2439524e34 100644 --- a/pandas/tests/scalar/test_period.py +++ b/pandas/tests/scalar/test_period.py @@ -135,11 +135,11 @@ def test_period_cons_mult(self): msg = ('Frequency must be positive, because it' ' represents span: -3M') - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): Period('2011-01', freq='-3M') msg = ('Frequency must be positive, because it' ' represents span: 0M') - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): Period('2011-01', freq='0M') def test_period_cons_combined(self): @@ -185,28 +185,28 @@ def test_period_cons_combined(self): msg = ('Frequency must be positive, because it' ' represents span: -25H') - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): Period('2011-01', freq='-1D1H') - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): Period('2011-01', freq='-1H1D') - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): Period(ordinal=1, freq='-1D1H') - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): Period(ordinal=1, freq='-1H1D') msg = ('Frequency must be positive, because it' ' represents span: 0D') - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): Period('2011-01', freq='0D0H') - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): Period(ordinal=1, freq='0D0H') # You can only combine together day and intraday offsets msg = ('Invalid frequency: 1W1D') - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): Period('2011-01', freq='1W1D') msg = ('Invalid frequency: 1D1W') - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): Period('2011-01', freq='1D1W') def test_timestamp_tz_arg(self): @@ -518,9 +518,9 @@ def test_period_deprecated_freq(self): msg = pd.tseries.frequencies._INVALID_FREQ_ERROR for exp, freqs in iteritems(cases): for freq in freqs: - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): Period('2016-03-01 09:00', freq=freq) - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): Period(ordinal=1, freq=freq) # check supported freq-aliases still works @@ -762,7 +762,7 @@ def test_properties_weekly_legacy(self): self.assertEqual(exp.days_in_month, 29) msg = pd.tseries.frequencies._INVALID_FREQ_ERROR - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): Period(freq='WK', year=2007, month=1, day=7) def test_properties_daily(self): @@ -1032,14 +1032,14 @@ def test_add_raises(self): dt1 = Period(freq='D', year=2008, month=1, day=1) dt2 = Period(freq='D', year=2008, month=1, day=2) msg = r"unsupported operand type\(s\)" - with tm.assertRaisesRegexp(TypeError, msg): + with tm.assert_raises_regex(TypeError, msg): dt1 + "str" msg = r"unsupported operand type\(s\)" - with tm.assertRaisesRegexp(TypeError, msg): + with tm.assert_raises_regex(TypeError, msg): "str" + dt1 - with tm.assertRaisesRegexp(TypeError, msg): + with tm.assert_raises_regex(TypeError, msg): dt1 + dt2 def test_sub(self): @@ -1050,7 +1050,7 @@ def test_sub(self): self.assertEqual(dt2 - dt1, 14) msg = r"Input has different freq=M from Period\(freq=D\)" - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): + with tm.assert_raises_regex(period.IncompatibleFrequency, msg): dt1 - pd.Period('2011-02', freq='M') def test_add_offset(self): @@ -1414,8 +1414,8 @@ def test_period_ops_offset(self): self.assertEqual(result, exp) msg = r"Input cannot be converted to Period\(freq=D\)" - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): + with tm.assert_raises_regex(period.IncompatibleFrequency, msg): p + offsets.Hour(2) - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): + with tm.assert_raises_regex(period.IncompatibleFrequency, msg): p - offsets.Hour(2) diff --git a/pandas/tests/scalar/test_period_asfreq.py b/pandas/tests/scalar/test_period_asfreq.py index 611a1cc97e9c6..84793658a6537 100644 --- a/pandas/tests/scalar/test_period_asfreq.py +++ b/pandas/tests/scalar/test_period_asfreq.py @@ -295,26 +295,26 @@ def test_conv_weekly(self): self.assertEqual(ival_W.asfreq('W'), ival_W) msg = pd.tseries.frequencies._INVALID_FREQ_ERROR - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): ival_W.asfreq('WK') def test_conv_weekly_legacy(self): # frequency conversion tests: from Weekly Frequency msg = pd.tseries.frequencies._INVALID_FREQ_ERROR - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): Period(freq='WK', year=2007, month=1, day=1) - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): Period(freq='WK-SAT', year=2007, month=1, day=6) - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): Period(freq='WK-FRI', year=2007, month=1, day=5) - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): Period(freq='WK-THU', year=2007, month=1, day=4) - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): Period(freq='WK-WED', year=2007, month=1, day=3) - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): Period(freq='WK-TUE', year=2007, month=1, day=2) - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): Period(freq='WK-MON', year=2007, month=1, day=1) def test_conv_business(self): @@ -712,10 +712,10 @@ def test_asfreq_MS(self): Period('2013-01', 'M')) msg = pd.tseries.frequencies._INVALID_FREQ_ERROR - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): initial.asfreq(freq="MS", how="S") - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): pd.Period('2013-01', 'MS') self.assertTrue(_period_code_map.get("MS") is None) diff --git a/pandas/tests/scalar/test_timedelta.py b/pandas/tests/scalar/test_timedelta.py index 4cced3e480c78..86b02d20b6996 100644 --- a/pandas/tests/scalar/test_timedelta.py +++ b/pandas/tests/scalar/test_timedelta.py @@ -125,16 +125,17 @@ def test_construction(self): pytest.raises(ValueError, lambda: Timedelta('3.1415')) # invalid construction - tm.assertRaisesRegexp(ValueError, "cannot construct a Timedelta", - lambda: Timedelta()) - tm.assertRaisesRegexp(ValueError, "unit abbreviation w/o a number", - lambda: Timedelta('foo')) - tm.assertRaisesRegexp(ValueError, - "cannot construct a Timedelta from the passed " - "arguments, allowed keywords are ", - lambda: Timedelta(day=10)) - - # roundtripping both for string and value + tm.assert_raises_regex(ValueError, "cannot construct a Timedelta", + lambda: Timedelta()) + tm.assert_raises_regex(ValueError, + "unit abbreviation w/o a number", + lambda: Timedelta('foo')) + tm.assert_raises_regex(ValueError, + "cannot construct a Timedelta from the " + "passed arguments, allowed keywords are ", + lambda: Timedelta(day=10)) + + # round-trip both for string and value for v in ['1s', '-1s', '1us', '-1us', '1 day', '-1 day', '-23:59:59.999999', '-1 days +23:59:59.999999', '-1ns', '1ns', '-23:59:59.999999999']: diff --git a/pandas/tests/scalar/test_timestamp.py b/pandas/tests/scalar/test_timestamp.py index b7b1b4c198701..bad0b697eef6c 100644 --- a/pandas/tests/scalar/test_timestamp.py +++ b/pandas/tests/scalar/test_timestamp.py @@ -172,9 +172,9 @@ def test_constructor_with_stringoffset(self): self.assertEqual(result, eval(repr(result))) def test_constructor_invalid(self): - with tm.assertRaisesRegexp(TypeError, 'Cannot convert input'): + with tm.assert_raises_regex(TypeError, 'Cannot convert input'): Timestamp(slice(2)) - with tm.assertRaisesRegexp(ValueError, 'Cannot convert Period'): + with tm.assert_raises_regex(ValueError, 'Cannot convert Period'): Timestamp(Period('1000-01-01')) def test_constructor_positional(self): @@ -245,7 +245,7 @@ def test_constructor_offset_depr(self): self.assertEqual(ts.offset, 'D') msg = "Can only specify freq or offset, not both" - with tm.assertRaisesRegexp(TypeError, msg): + with tm.assert_raises_regex(TypeError, msg): Timestamp('2011-01-01', offset='D', freq='D') def test_constructor_offset_depr_fromordinal(self): @@ -260,7 +260,7 @@ def test_constructor_offset_depr_fromordinal(self): self.assertEqual(base.toordinal(), ts.toordinal()) msg = "Can only specify freq or offset, not both" - with tm.assertRaisesRegexp(TypeError, msg): + with tm.assert_raises_regex(TypeError, msg): Timestamp.fromordinal(base.toordinal(), offset='D', freq='D') def test_conversion(self): @@ -393,14 +393,14 @@ def test_tz_localize_ambiguous(self): ambiguous='infer') # GH 8025 - with tm.assertRaisesRegexp(TypeError, - 'Cannot localize tz-aware Timestamp, use ' - 'tz_convert for conversions'): + with tm.assert_raises_regex(TypeError, + 'Cannot localize tz-aware Timestamp, ' + 'use tz_convert for conversions'): Timestamp('2011-01-01', tz='US/Eastern').tz_localize('Asia/Tokyo') - with tm.assertRaisesRegexp(TypeError, - 'Cannot convert tz-naive Timestamp, use ' - 'tz_localize to localize'): + with tm.assert_raises_regex(TypeError, + 'Cannot convert tz-naive Timestamp, ' + 'use tz_localize to localize'): Timestamp('2011-01-01').tz_convert('Asia/Tokyo') def test_tz_localize_nonexistent(self): @@ -710,7 +710,7 @@ def _check_round(freq, expected): _check_round(freq, expected) msg = frequencies._INVALID_FREQ_ERROR - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): stamp.round('foo') def test_class_ops_pytz(self): diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index 02748aee4628d..eb8a83bb85847 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -550,8 +550,8 @@ def testit(): # Unimplemented numeric_only parameter. if 'numeric_only' in compat.signature(f).args: - tm.assertRaisesRegexp(NotImplementedError, name, f, - self.series, numeric_only=True) + tm.assert_raises_regex(NotImplementedError, name, f, + self.series, numeric_only=True) testit() @@ -596,12 +596,12 @@ def test_numpy_compress(self): tm.assert_series_equal(np.compress(cond, s), expected) msg = "the 'axis' parameter is not supported" - tm.assertRaisesRegexp(ValueError, msg, np.compress, - cond, s, axis=1) + tm.assert_raises_regex(ValueError, msg, np.compress, + cond, s, axis=1) msg = "the 'out' parameter is not supported" - tm.assertRaisesRegexp(ValueError, msg, np.compress, - cond, s, out=s) + tm.assert_raises_regex(ValueError, msg, np.compress, + cond, s, out=s) def test_round(self): self.ts.index.name = "index_name" @@ -619,7 +619,7 @@ def test_numpy_round(self): assert_series_equal(out, expected) msg = "the 'out' parameter is not supported" - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): np.round(s, decimals=0, out=s) def test_built_in_round(self): @@ -1184,8 +1184,8 @@ def test_numpy_argmin(self): if not _np_version_under1p10: msg = "the 'out' parameter is not supported" - tm.assertRaisesRegexp(ValueError, msg, np.argmin, - Series(data), out=data) + tm.assert_raises_regex(ValueError, msg, np.argmin, + Series(data), out=data) def test_idxmax(self): # test idxmax @@ -1240,8 +1240,8 @@ def test_numpy_argmax(self): if not _np_version_under1p10: msg = "the 'out' parameter is not supported" - tm.assertRaisesRegexp(ValueError, msg, np.argmax, - Series(data), out=data) + tm.assert_raises_regex(ValueError, msg, np.argmax, + Series(data), out=data) def test_ptp(self): N = 1000 @@ -1307,7 +1307,7 @@ def test_numpy_repeat(self): assert_series_equal(np.repeat(s, 2), expected) msg = "the 'axis' parameter is not supported" - tm.assertRaisesRegexp(ValueError, msg, np.repeat, s, 2, axis=0) + tm.assert_raises_regex(ValueError, msg, np.repeat, s, 2, axis=0) def test_searchsorted(self): s = Series([1, 2, 3]) @@ -1483,11 +1483,13 @@ def test_reshape_bad_kwarg(self): with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): msg = "'foo' is an invalid keyword argument for this function" - tm.assertRaisesRegexp(TypeError, msg, a.reshape, (2, 2), foo=2) + tm.assert_raises_regex( + TypeError, msg, a.reshape, (2, 2), foo=2) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): msg = r"reshape\(\) got an unexpected keyword argument 'foo'" - tm.assertRaisesRegexp(TypeError, msg, a.reshape, a.shape, foo=2) + tm.assert_raises_regex( + TypeError, msg, a.reshape, a.shape, foo=2) def test_numpy_reshape(self): a = Series([1, 2, 3, 4]) @@ -1697,7 +1699,7 @@ def test_error(self, r): args = 2, len(r), 0, -1 methods = r.nlargest, r.nsmallest for method, arg in product(methods, args): - with tm.assertRaisesRegexp(TypeError, msg): + with tm.assert_raises_regex(TypeError, msg): method(arg) @pytest.mark.parametrize( @@ -1729,9 +1731,9 @@ def test_misc(self): assert_series_equal(s.nsmallest(), s.iloc[[2, 3, 0, 4]]) msg = 'keep must be either "first", "last"' - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): s.nsmallest(keep='invalid') - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): s.nlargest(keep='invalid') # GH 15297 diff --git a/pandas/tests/series/test_api.py b/pandas/tests/series/test_api.py index f1a904498b727..397058c4bb8ce 100644 --- a/pandas/tests/series/test_api.py +++ b/pandas/tests/series/test_api.py @@ -341,7 +341,8 @@ def test_str_attribute(self): # str accessor only valid with string values s = Series(range(5)) - with tm.assertRaisesRegexp(AttributeError, 'only use .str accessor'): + with tm.assert_raises_regex(AttributeError, + 'only use .str accessor'): s.str.repeat(2) def test_empty_method(self): diff --git a/pandas/tests/series/test_combine_concat.py b/pandas/tests/series/test_combine_concat.py index 51a2a5ed6c574..b4615e5420a81 100644 --- a/pandas/tests/series/test_combine_concat.py +++ b/pandas/tests/series/test_combine_concat.py @@ -55,9 +55,9 @@ def test_append_duplicates(self): exp, check_index_type=True) msg = 'Indexes have overlapping values:' - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): s1.append(s2, verify_integrity=True) - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): pd.concat([s1, s2], verify_integrity=True) def test_combine_first(self): diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 74c2544d900ea..c461556644275 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -851,10 +851,10 @@ def test_auto_conversion(self): def test_constructor_cant_cast_datetime64(self): msg = "Cannot cast datetime64 to " - with tm.assertRaisesRegexp(TypeError, msg): + with tm.assert_raises_regex(TypeError, msg): Series(date_range('1/1/2000', periods=10), dtype=float) - with tm.assertRaisesRegexp(TypeError, msg): + with tm.assert_raises_regex(TypeError, msg): Series(date_range('1/1/2000', periods=10), dtype=int) def test_constructor_cast_object(self): @@ -882,9 +882,9 @@ def test_constructor_generic_timestamp_deprecated(self): # These timestamps have the wrong frequencies, # so an Exception should be raised now. msg = "cannot convert timedeltalike" - with tm.assertRaisesRegexp(TypeError, msg): + with tm.assert_raises_regex(TypeError, msg): Series([], dtype='m8[ps]') msg = "cannot convert datetimelike" - with tm.assertRaisesRegexp(TypeError, msg): + with tm.assert_raises_regex(TypeError, msg): Series([], dtype='M8[ps]') diff --git a/pandas/tests/series/test_datetime_values.py b/pandas/tests/series/test_datetime_values.py index ecb457b4ff1b0..74a4e37f0923a 100644 --- a/pandas/tests/series/test_datetime_values.py +++ b/pandas/tests/series/test_datetime_values.py @@ -251,7 +251,7 @@ def get_dir(s): # no setting allowed s = Series(date_range('20130101', periods=5, freq='D'), name='xxx') - with tm.assertRaisesRegexp(ValueError, "modifications"): + with tm.assert_raises_regex(ValueError, "modifications"): s.dt.hour = 5 # trying to set a copy @@ -265,8 +265,8 @@ def f(): def test_dt_accessor_no_new_attributes(self): # https://github.com/pandas-dev/pandas/issues/10673 s = Series(date_range('20130101', periods=5, freq='D')) - with tm.assertRaisesRegexp(AttributeError, - "You cannot add any new attribute"): + with tm.assert_raises_regex(AttributeError, + "You cannot add any new attribute"): s.dt.xlabel = "a" def test_strftime(self): @@ -375,8 +375,8 @@ def test_dt_accessor_api(self): for s in [Series(np.arange(5)), Series(list('abcde')), Series(np.random.randn(5))]: - with tm.assertRaisesRegexp(AttributeError, - "only use .dt accessor"): + with tm.assert_raises_regex(AttributeError, + "only use .dt accessor"): s.dt self.assertFalse(hasattr(s, 'dt')) diff --git a/pandas/tests/series/test_dtypes.py b/pandas/tests/series/test_dtypes.py index 6bbf00d6cab22..e084fa58d6c51 100644 --- a/pandas/tests/series/test_dtypes.py +++ b/pandas/tests/series/test_dtypes.py @@ -50,7 +50,7 @@ def test_astype_cast_nan_inf_int(self, dtype, value): msg = 'Cannot convert non-finite values \\(NA or inf\\) to integer' s = Series([value]) - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): s.astype(dtype) @pytest.mark.parametrize("dtype", [int, np.int8, np.int64]) diff --git a/pandas/tests/series/test_indexing.py b/pandas/tests/series/test_indexing.py index 135e208877f2d..6907cc194f0f0 100644 --- a/pandas/tests/series/test_indexing.py +++ b/pandas/tests/series/test_indexing.py @@ -808,9 +808,9 @@ def test_setslice(self): def test_basic_getitem_setitem_corner(self): # invalid tuples, e.g. self.ts[:, None] vs. self.ts[:, 2] - with tm.assertRaisesRegexp(ValueError, 'tuple-index'): + with tm.assert_raises_regex(ValueError, 'tuple-index'): self.ts[:, 2] - with tm.assertRaisesRegexp(ValueError, 'tuple-index'): + with tm.assert_raises_regex(ValueError, 'tuple-index'): self.ts[:, 2] = 2 # weird lists. [slice(0, 5)] will work but not two slices @@ -1206,11 +1206,11 @@ def test_where_invalid_input(self): ] for cond in conds: - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): s.where(cond) msg = "Array conditional must be same shape as self" - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): s.where([True]) def test_where_ndframe_align(self): @@ -1218,7 +1218,7 @@ def test_where_ndframe_align(self): s = Series([1, 2, 3]) cond = [True] - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): s.where(cond) expected = Series([1, np.nan, np.nan]) @@ -1227,7 +1227,7 @@ def test_where_ndframe_align(self): tm.assert_series_equal(out, expected) cond = np.array([False, True, False, True]) - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): s.where(cond) expected = Series([np.nan, 2, np.nan]) diff --git a/pandas/tests/series/test_operators.py b/pandas/tests/series/test_operators.py index 159b29aca1e7c..89ed7975e8017 100644 --- a/pandas/tests/series/test_operators.py +++ b/pandas/tests/series/test_operators.py @@ -612,7 +612,7 @@ def run_ops(ops, get_ser, test_ser): # defined for op_str in ops: op = getattr(get_ser, op_str, None) - with tm.assertRaisesRegexp(TypeError, 'operate'): + with tm.assert_raises_regex(TypeError, 'operate'): op(test_ser) # ## timedelta64 ### @@ -1260,7 +1260,7 @@ def test_comparison_flex_basic(self): # msg = 'No axis named 1 for object type' for op in ['eq', 'ne', 'le', 'le', 'gt', 'ge']: - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): getattr(left, op)(right, axis=1) def test_comparison_flex_alignment(self): @@ -1539,23 +1539,23 @@ def test_comp_ops_df_compat(self): for l, r in [(s1, s2), (s2, s1), (s3, s4), (s4, s3)]: msg = "Can only compare identically-labeled Series objects" - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): l == r - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): l != r - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): l < r msg = "Can only compare identically-labeled DataFrame objects" - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): l.to_frame() == r.to_frame() - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): l.to_frame() != r.to_frame() - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): l.to_frame() < r.to_frame() def test_bool_ops_df_compat(self): diff --git a/pandas/tests/series/test_period.py b/pandas/tests/series/test_period.py index a67d097f341db..fdc12459f8c59 100644 --- a/pandas/tests/series/test_period.py +++ b/pandas/tests/series/test_period.py @@ -161,10 +161,12 @@ def test_comp_series_period_scalar(self): # different base freq msg = "Input has different freq=A-DEC from Period" - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): + with tm.assert_raises_regex( + period.IncompatibleFrequency, msg): base <= Period('2011', freq='A') - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): + with tm.assert_raises_regex( + period.IncompatibleFrequency, msg): Period('2011', freq='A') >= base def test_comp_series_period_series(self): @@ -199,7 +201,8 @@ def test_comp_series_period_series(self): # different base freq msg = "Input has different freq=A-DEC from Period" - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): + with tm.assert_raises_regex( + period.IncompatibleFrequency, msg): base <= s2 def test_comp_series_period_object(self): @@ -244,5 +247,5 @@ def test_align_series(self): for kind in ['inner', 'outer', 'left', 'right']: ts.align(ts[::2], join=kind) msg = "Input has different freq=D from PeriodIndex\\(freq=A-DEC\\)" - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): + with tm.assert_raises_regex(period.IncompatibleFrequency, msg): ts + ts.asfreq('D', how="end") diff --git a/pandas/tests/series/test_quantile.py b/pandas/tests/series/test_quantile.py index e61297bdcce3e..6f9c65e37533d 100644 --- a/pandas/tests/series/test_quantile.py +++ b/pandas/tests/series/test_quantile.py @@ -43,7 +43,7 @@ def test_quantile(self): msg = 'percentiles should all be in the interval \\[0, 1\\]' for invalid in [-1, 2, [0.5, -1], [0.5, 2]]: - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): self.ts.quantile(invalid) def test_quantile_multi(self): @@ -109,11 +109,11 @@ def test_quantile_interpolation_np_lt_1p9(self): # interpolation other than linear expErrMsg = "Interpolation methods other than " - with tm.assertRaisesRegexp(ValueError, expErrMsg): + with tm.assert_raises_regex(ValueError, expErrMsg): self.ts.quantile(0.9, interpolation='nearest') # object dtype - with tm.assertRaisesRegexp(ValueError, expErrMsg): + with tm.assert_raises_regex(ValueError, expErrMsg): q = Series(self.ts, dtype=object).quantile(0.7, interpolation='higher') diff --git a/pandas/tests/series/test_replace.py b/pandas/tests/series/test_replace.py index a9a9204cf7f67..ee7b264bde8bc 100644 --- a/pandas/tests/series/test_replace.py +++ b/pandas/tests/series/test_replace.py @@ -78,7 +78,7 @@ def test_replace(self): # make sure that we aren't just masking a TypeError because bools don't # implement indexing - with tm.assertRaisesRegexp(TypeError, 'Cannot compare types .+'): + with tm.assert_raises_regex(TypeError, 'Cannot compare types .+'): ser.replace([1, 2], [np.nan, 0]) ser = pd.Series([0, 1, 2, 3, 4]) @@ -186,7 +186,7 @@ def test_replace_bool_with_bool(self): def test_replace_with_dict_with_bool_keys(self): s = pd.Series([True, False, True]) - with tm.assertRaisesRegexp(TypeError, 'Cannot compare types .+'): + with tm.assert_raises_regex(TypeError, 'Cannot compare types .+'): s.replace({'asdf': 'asdb', True: 'yes'}) def test_replace2(self): diff --git a/pandas/tests/sparse/test_array.py b/pandas/tests/sparse/test_array.py index df14a3139edab..bb6ff7a0c728f 100644 --- a/pandas/tests/sparse/test_array.py +++ b/pandas/tests/sparse/test_array.py @@ -11,7 +11,7 @@ from pandas import _np_version_under1p8 from pandas.core.sparse.api import SparseArray, SparseSeries from pandas.core.sparse.libsparse import IntIndex -from pandas.util.testing import assert_almost_equal, assertRaisesRegexp +from pandas.util.testing import assert_almost_equal import pandas.util.testing as tm @@ -142,8 +142,8 @@ def test_get_item(self): self.assertEqual(self.zarr[7], 5) errmsg = re.compile("bounds") - assertRaisesRegexp(IndexError, errmsg, lambda: self.arr[11]) - assertRaisesRegexp(IndexError, errmsg, lambda: self.arr[-11]) + tm.assert_raises_regex(IndexError, errmsg, lambda: self.arr[11]) + tm.assert_raises_regex(IndexError, errmsg, lambda: self.arr[-11]) self.assertEqual(self.arr[-1], self.arr[len(self.arr) - 1]) def test_take(self): @@ -179,21 +179,22 @@ def test_take_negative(self): tm.assert_sp_array_equal(self.arr.take([-4, -3, -2]), exp) def test_bad_take(self): - assertRaisesRegexp(IndexError, "bounds", lambda: self.arr.take(11)) + tm.assert_raises_regex( + IndexError, "bounds", lambda: self.arr.take(11)) pytest.raises(IndexError, lambda: self.arr.take(-11)) def test_take_invalid_kwargs(self): msg = r"take\(\) got an unexpected keyword argument 'foo'" - tm.assertRaisesRegexp(TypeError, msg, self.arr.take, - [2, 3], foo=2) + tm.assert_raises_regex(TypeError, msg, self.arr.take, + [2, 3], foo=2) msg = "the 'out' parameter is not supported" - tm.assertRaisesRegexp(ValueError, msg, self.arr.take, - [2, 3], out=self.arr) + tm.assert_raises_regex(ValueError, msg, self.arr.take, + [2, 3], out=self.arr) msg = "the 'mode' parameter is not supported" - tm.assertRaisesRegexp(ValueError, msg, self.arr.take, - [2, 3], mode='clip') + tm.assert_raises_regex(ValueError, msg, self.arr.take, + [2, 3], mode='clip') def test_take_filling(self): # similar tests as GH 12631 @@ -215,9 +216,9 @@ def test_take_filling(self): msg = ('When allow_fill=True and fill_value is not None, ' 'all indices must be >= -1') - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): sparse.take(np.array([1, 0, -2]), fill_value=True) - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): sparse.take(np.array([1, 0, -5]), fill_value=True) with pytest.raises(IndexError): @@ -247,9 +248,9 @@ def test_take_filling_fill_value(self): msg = ('When allow_fill=True and fill_value is not None, ' 'all indices must be >= -1') - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): sparse.take(np.array([1, 0, -2]), fill_value=True) - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): sparse.take(np.array([1, 0, -5]), fill_value=True) with pytest.raises(IndexError): @@ -283,12 +284,12 @@ def setitem(): def setslice(): self.arr[1:5] = 2 - assertRaisesRegexp(TypeError, "item assignment", setitem) - assertRaisesRegexp(TypeError, "item assignment", setslice) + tm.assert_raises_regex(TypeError, "item assignment", setitem) + tm.assert_raises_regex(TypeError, "item assignment", setslice) def test_constructor_from_too_large_array(self): - assertRaisesRegexp(TypeError, "expected dimension <= 1 data", - SparseArray, np.arange(10).reshape((2, 5))) + tm.assert_raises_regex(TypeError, "expected dimension <= 1 data", + SparseArray, np.arange(10).reshape((2, 5))) def test_constructor_from_sparse(self): res = SparseArray(self.zarr) @@ -354,16 +355,16 @@ def test_astype(self): self.assertFalse((self.arr.sp_values[:3] == 27).any()) msg = "unable to coerce current fill_value nan to int64 dtype" - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): self.arr.astype('i8') arr = SparseArray([0, np.nan, 0, 1]) - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): arr.astype('i8') arr = SparseArray([0, np.nan, 0, 1], fill_value=0) msg = 'Cannot convert non-finite values \\(NA or inf\\) to integer' - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): arr.astype('i8') def test_astype_all(self): @@ -390,11 +391,11 @@ def test_set_fill_value(self): # coerces to int msg = "unable to set fill_value 3\\.1 to int64 dtype" - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): arr.fill_value = 3.1 msg = "unable to set fill_value nan to int64 dtype" - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): arr.fill_value = np.nan arr = SparseArray([True, False, True], fill_value=False, dtype=np.bool) @@ -403,17 +404,17 @@ def test_set_fill_value(self): # coerces to bool msg = "unable to set fill_value 0 to bool dtype" - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): arr.fill_value = 0 msg = "unable to set fill_value nan to bool dtype" - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): arr.fill_value = np.nan # invalid msg = "fill_value must be a scalar" for val in [[1, 2, 3], np.array([1, 2]), (1, 2, 3)]: - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): arr.fill_value = val def test_copy_shallow(self): @@ -682,12 +683,12 @@ def test_numpy_sum(self): self.assertEqual(out, 40.0) msg = "the 'dtype' parameter is not supported" - tm.assertRaisesRegexp(ValueError, msg, np.sum, - SparseArray(data), dtype=np.int64) + tm.assert_raises_regex(ValueError, msg, np.sum, + SparseArray(data), dtype=np.int64) msg = "the 'out' parameter is not supported" - tm.assertRaisesRegexp(ValueError, msg, np.sum, - SparseArray(data), out=out) + tm.assert_raises_regex(ValueError, msg, np.sum, + SparseArray(data), out=out) def test_cumsum(self): non_null_data = np.array([1, 2, 3, 4, 5], dtype=float) @@ -711,7 +712,7 @@ def test_cumsum(self): axis = 1 # SparseArray currently 1-D, so only axis = 0 is valid. msg = "axis\\(={axis}\\) out of bounds".format(axis=axis) - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): SparseArray(data).cumsum(axis=axis) def test_numpy_cumsum(self): @@ -735,12 +736,12 @@ def test_numpy_cumsum(self): tm.assert_sp_array_equal(out, expected) msg = "the 'dtype' parameter is not supported" - tm.assertRaisesRegexp(ValueError, msg, np.cumsum, - SparseArray(data), dtype=np.int64) + tm.assert_raises_regex(ValueError, msg, np.cumsum, + SparseArray(data), dtype=np.int64) msg = "the 'out' parameter is not supported" - tm.assertRaisesRegexp(ValueError, msg, np.cumsum, - SparseArray(data), out=out) + tm.assert_raises_regex(ValueError, msg, np.cumsum, + SparseArray(data), out=out) def test_mean(self): data = np.arange(10).astype(float) @@ -761,12 +762,12 @@ def test_numpy_mean(self): self.assertEqual(out, 40.0 / 9) msg = "the 'dtype' parameter is not supported" - tm.assertRaisesRegexp(ValueError, msg, np.mean, - SparseArray(data), dtype=np.int64) + tm.assert_raises_regex(ValueError, msg, np.mean, + SparseArray(data), dtype=np.int64) msg = "the 'out' parameter is not supported" - tm.assertRaisesRegexp(ValueError, msg, np.mean, - SparseArray(data), out=out) + tm.assert_raises_regex(ValueError, msg, np.mean, + SparseArray(data), out=out) def test_ufunc(self): # GH 13853 make sure ufunc is applied to fill_value diff --git a/pandas/tests/sparse/test_frame.py b/pandas/tests/sparse/test_frame.py index cf6d80e9c0133..a5080bbd81005 100644 --- a/pandas/tests/sparse/test_frame.py +++ b/pandas/tests/sparse/test_frame.py @@ -151,10 +151,10 @@ def test_constructor_ndarray(self): level=1) # wrong length index / columns - with tm.assertRaisesRegexp(ValueError, "^Index length"): + with tm.assert_raises_regex(ValueError, "^Index length"): SparseDataFrame(self.frame.values, index=self.frame.index[:-1]) - with tm.assertRaisesRegexp(ValueError, "^Column length"): + with tm.assert_raises_regex(ValueError, "^Column length"): SparseDataFrame(self.frame.values, columns=self.frame.columns[:-1]) # GH 9272 @@ -798,8 +798,8 @@ def test_join(self): right = self.frame.loc[:, ['B', 'D']] pytest.raises(Exception, left.join, right) - with tm.assertRaisesRegexp(ValueError, - 'Other Series must have a name'): + with tm.assert_raises_regex(ValueError, + 'Other Series must have a name'): self.frame.join(Series( np.random.randn(len(self.frame)), index=self.frame.index)) @@ -1042,7 +1042,7 @@ def test_numpy_transpose(self): tm.assert_sp_frame_equal(result, sdf) msg = "the 'axes' parameter is not supported" - tm.assertRaisesRegexp(ValueError, msg, np.transpose, sdf, axes=1) + tm.assert_raises_regex(ValueError, msg, np.transpose, sdf, axes=1) def test_combine_first(self): df = self.frame @@ -1303,12 +1303,12 @@ def test_numpy_cumsum(self): tm.assert_sp_frame_equal(result, expected) msg = "the 'dtype' parameter is not supported" - tm.assertRaisesRegexp(ValueError, msg, np.cumsum, - self.frame, dtype=np.int64) + tm.assert_raises_regex(ValueError, msg, np.cumsum, + self.frame, dtype=np.int64) msg = "the 'out' parameter is not supported" - tm.assertRaisesRegexp(ValueError, msg, np.cumsum, - self.frame, out=result) + tm.assert_raises_regex(ValueError, msg, np.cumsum, + self.frame, out=result) def test_numpy_func_call(self): # no exception should be raised even though diff --git a/pandas/tests/sparse/test_indexing.py b/pandas/tests/sparse/test_indexing.py index cf91e41624276..bfa0a0440761f 100644 --- a/pandas/tests/sparse/test_indexing.py +++ b/pandas/tests/sparse/test_indexing.py @@ -440,7 +440,7 @@ def tests_indexing_with_sparse(self): msg = ("iLocation based boolean indexing cannot use an " "indexable as a mask") - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): s.iloc[indexer] diff --git a/pandas/tests/sparse/test_libsparse.py b/pandas/tests/sparse/test_libsparse.py index 14038777fdd02..63ed11845a896 100644 --- a/pandas/tests/sparse/test_libsparse.py +++ b/pandas/tests/sparse/test_libsparse.py @@ -479,37 +479,37 @@ def test_check_integrity(self): # Too many indices than specified in self.length msg = "Too many indices" - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): IntIndex(length=1, indices=[1, 2, 3]) # No index can be negative. msg = "No index can be less than zero" - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): IntIndex(length=5, indices=[1, -2, 3]) # No index can be negative. msg = "No index can be less than zero" - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): IntIndex(length=5, indices=[1, -2, 3]) # All indices must be less than the length. msg = "All indices must be less than the length" - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): IntIndex(length=5, indices=[1, 2, 5]) - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): IntIndex(length=5, indices=[1, 2, 6]) # Indices must be strictly ascending. msg = "Indices must be strictly increasing" - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): IntIndex(length=5, indices=[1, 3, 2]) - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): IntIndex(length=5, indices=[1, 3, 3]) def test_int_internal(self): diff --git a/pandas/tests/sparse/test_series.py b/pandas/tests/sparse/test_series.py index 1502aaa7e0b9e..e0b0809c756b1 100644 --- a/pandas/tests/sparse/test_series.py +++ b/pandas/tests/sparse/test_series.py @@ -506,12 +506,12 @@ def test_numpy_take(self): np.take(sp.to_dense(), indices, axis=0)) msg = "the 'out' parameter is not supported" - tm.assertRaisesRegexp(ValueError, msg, np.take, - sp, indices, out=np.empty(sp.shape)) + tm.assert_raises_regex(ValueError, msg, np.take, + sp, indices, out=np.empty(sp.shape)) msg = "the 'mode' parameter is not supported" - tm.assertRaisesRegexp(ValueError, msg, np.take, - sp, indices, mode='clip') + tm.assert_raises_regex(ValueError, msg, np.take, + sp, indices, mode='clip') def test_setitem(self): self.bseries[5] = 7. @@ -703,8 +703,8 @@ def _check_all(values, first, second): first_series = SparseSeries(values1, sparse_index=IntIndex(length, index1), fill_value=nan) - with tm.assertRaisesRegexp(TypeError, - 'new index must be a SparseIndex'): + with tm.assert_raises_regex(TypeError, + 'new index must be a SparseIndex'): reindexed = first_series.sparse_reindex(0) # noqa def test_repr(self): @@ -797,7 +797,7 @@ def _check_matches(indices, expected): # must have NaN fill value data = {'a': SparseSeries(np.arange(7), sparse_index=expected2, fill_value=0)} - with tm.assertRaisesRegexp(TypeError, "NaN fill value"): + with tm.assert_raises_regex(TypeError, "NaN fill value"): spf.homogenize(data) def test_fill_value_corner(self): @@ -1332,7 +1332,7 @@ def test_cumsum(self): axis = 1 # Series is 1-D, so only axis = 0 is valid. msg = "No axis named {axis}".format(axis=axis) - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): self.bseries.cumsum(axis=axis) def test_numpy_cumsum(self): @@ -1345,12 +1345,12 @@ def test_numpy_cumsum(self): tm.assert_series_equal(result, expected) msg = "the 'dtype' parameter is not supported" - tm.assertRaisesRegexp(ValueError, msg, np.cumsum, - self.bseries, dtype=np.int64) + tm.assert_raises_regex(ValueError, msg, np.cumsum, + self.bseries, dtype=np.int64) msg = "the 'out' parameter is not supported" - tm.assertRaisesRegexp(ValueError, msg, np.cumsum, - self.zbseries, out=result) + tm.assert_raises_regex(ValueError, msg, np.cumsum, + self.zbseries, out=result) def test_numpy_func_call(self): # no exception should be raised even though diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index 01c18dc64f578..96628322e4ee2 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -133,15 +133,16 @@ def test_unsortable(self): pytest.raises(TypeError, algos.safe_sort, arr) def test_exceptions(self): - with tm.assertRaisesRegexp(TypeError, - "Only list-like objects are allowed"): + with tm.assert_raises_regex(TypeError, + "Only list-like objects are allowed"): algos.safe_sort(values=1) - with tm.assertRaisesRegexp(TypeError, - "Only list-like objects or None"): + with tm.assert_raises_regex(TypeError, + "Only list-like objects or None"): algos.safe_sort(values=[0, 1, 2], labels=1) - with tm.assertRaisesRegexp(ValueError, "values should be unique"): + with tm.assert_raises_regex(ValueError, + "values should be unique"): algos.safe_sort(values=[0, 1, 2, 1], labels=[0, 1]) @@ -1152,7 +1153,7 @@ def test_too_many_ndims(self): arr = np.array([[[1, 2, 3], [4, 5, 6], [7, 8, 9]]]) msg = "Array with ndim > 2 are not supported" - with tm.assertRaisesRegexp(TypeError, msg): + with tm.assert_raises_regex(TypeError, msg): algos.rank(arr) @@ -1335,27 +1336,27 @@ def test_int64_add_overflow(): m = np.iinfo(np.int64).max n = np.iinfo(np.int64).min - with tm.assertRaisesRegexp(OverflowError, msg): + with tm.assert_raises_regex(OverflowError, msg): algos.checked_add_with_arr(np.array([m, m]), m) - with tm.assertRaisesRegexp(OverflowError, msg): + with tm.assert_raises_regex(OverflowError, msg): algos.checked_add_with_arr(np.array([m, m]), np.array([m, m])) - with tm.assertRaisesRegexp(OverflowError, msg): + with tm.assert_raises_regex(OverflowError, msg): algos.checked_add_with_arr(np.array([n, n]), n) - with tm.assertRaisesRegexp(OverflowError, msg): + with tm.assert_raises_regex(OverflowError, msg): algos.checked_add_with_arr(np.array([n, n]), np.array([n, n])) - with tm.assertRaisesRegexp(OverflowError, msg): + with tm.assert_raises_regex(OverflowError, msg): algos.checked_add_with_arr(np.array([m, n]), np.array([n, n])) - with tm.assertRaisesRegexp(OverflowError, msg): + with tm.assert_raises_regex(OverflowError, msg): algos.checked_add_with_arr(np.array([m, m]), np.array([m, m]), arr_mask=np.array([False, True])) - with tm.assertRaisesRegexp(OverflowError, msg): + with tm.assert_raises_regex(OverflowError, msg): algos.checked_add_with_arr(np.array([m, m]), np.array([m, m]), b_mask=np.array([False, True])) - with tm.assertRaisesRegexp(OverflowError, msg): + with tm.assert_raises_regex(OverflowError, msg): algos.checked_add_with_arr(np.array([m, m]), np.array([m, m]), arr_mask=np.array([False, True]), b_mask=np.array([False, True])) - with tm.assertRaisesRegexp(OverflowError, msg): + with tm.assert_raises_regex(OverflowError, msg): with tm.assert_produces_warning(RuntimeWarning): algos.checked_add_with_arr(np.array([m, m]), np.array([np.nan, m])) @@ -1364,15 +1365,15 @@ def test_int64_add_overflow(): # the addition overflows. We don't check the result but just # the fact that an OverflowError is not raised. with pytest.raises(AssertionError): - with tm.assertRaisesRegexp(OverflowError, msg): + with tm.assert_raises_regex(OverflowError, msg): algos.checked_add_with_arr(np.array([m, m]), np.array([m, m]), arr_mask=np.array([True, True])) with pytest.raises(AssertionError): - with tm.assertRaisesRegexp(OverflowError, msg): + with tm.assert_raises_regex(OverflowError, msg): algos.checked_add_with_arr(np.array([m, m]), np.array([m, m]), b_mask=np.array([True, True])) with pytest.raises(AssertionError): - with tm.assertRaisesRegexp(OverflowError, msg): + with tm.assert_raises_regex(OverflowError, msg): algos.checked_add_with_arr(np.array([m, m]), np.array([m, m]), arr_mask=np.array([True, False]), b_mask=np.array([False, True])) diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py index bec743fac18e1..86343e441f49a 100644 --- a/pandas/tests/test_base.py +++ b/pandas/tests/test_base.py @@ -47,7 +47,8 @@ class CheckImmutable(object): def check_mutable_error(self, *args, **kwargs): # Pass whatever function you normally would to assertRaisesRegexp # (after the Exception kind). - tm.assertRaisesRegexp(TypeError, self.mutable_regex, *args, **kwargs) + tm.assert_raises_regex( + TypeError, self.mutable_regex, *args, **kwargs) def test_no_mutable_funcs(self): def setitem(): @@ -831,7 +832,7 @@ def test_duplicated_drop_duplicates_index(self): result = idx.drop_duplicates(keep=False) tm.assert_index_equal(result, idx[~expected]) - with tm.assertRaisesRegexp( + with tm.assert_raises_regex( TypeError, r"drop_duplicates\(\) got an unexpected " "keyword argument"): idx.drop_duplicates(inplace=True) @@ -995,10 +996,10 @@ def test_transpose(self): def test_transpose_non_default_axes(self): for obj in self.objs: - tm.assertRaisesRegexp(ValueError, self.errmsg, - obj.transpose, 1) - tm.assertRaisesRegexp(ValueError, self.errmsg, - obj.transpose, axes=1) + tm.assert_raises_regex(ValueError, self.errmsg, + obj.transpose, 1) + tm.assert_raises_regex(ValueError, self.errmsg, + obj.transpose, axes=1) def test_numpy_transpose(self): for obj in self.objs: @@ -1007,8 +1008,8 @@ def test_numpy_transpose(self): else: tm.assert_series_equal(np.transpose(obj), obj) - tm.assertRaisesRegexp(ValueError, self.errmsg, - np.transpose, obj, axes=1) + tm.assert_raises_regex(ValueError, self.errmsg, + np.transpose, obj, axes=1) class TestNoNewAttributesMixin(tm.TestCase): diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py index d516448acd876..bbcd42b147654 100644 --- a/pandas/tests/test_categorical.py +++ b/pandas/tests/test_categorical.py @@ -452,10 +452,10 @@ def test_validate_ordered(self): # This should be a boolean. ordered = np.array([0, 1, 2]) - with tm.assertRaisesRegexp(exp_err, exp_msg): + with tm.assert_raises_regex(exp_err, exp_msg): Categorical([1, 2, 3], ordered=ordered) - with tm.assertRaisesRegexp(exp_err, exp_msg): + with tm.assert_raises_regex(exp_err, exp_msg): Categorical.from_codes([0, 0, 1], categories=['a', 'b', 'c'], ordered=ordered) @@ -587,16 +587,16 @@ def test_numpy_argsort(self): check_dtype=False) msg = "the 'kind' parameter is not supported" - tm.assertRaisesRegexp(ValueError, msg, np.argsort, - c, kind='mergesort') + tm.assert_raises_regex(ValueError, msg, np.argsort, + c, kind='mergesort') msg = "the 'axis' parameter is not supported" - tm.assertRaisesRegexp(ValueError, msg, np.argsort, - c, axis=0) + tm.assert_raises_regex(ValueError, msg, np.argsort, + c, axis=0) msg = "the 'order' parameter is not supported" - tm.assertRaisesRegexp(ValueError, msg, np.argsort, - c, order='C') + tm.assert_raises_regex(ValueError, msg, np.argsort, + c, order='C') def test_na_flags_int_categories(self): # #1457 @@ -835,9 +835,9 @@ def test_set_ordered(self): # removed in 0.19.0 msg = "can\'t set attribute" - with tm.assertRaisesRegexp(AttributeError, msg): + with tm.assert_raises_regex(AttributeError, msg): cat.ordered = True - with tm.assertRaisesRegexp(AttributeError, msg): + with tm.assert_raises_regex(AttributeError, msg): cat.ordered = False def test_set_categories(self): @@ -1955,7 +1955,7 @@ def test_series_delegations(self): # invalid accessor pytest.raises(AttributeError, lambda: Series([1, 2, 3]).cat) - tm.assertRaisesRegexp( + tm.assert_raises_regex( AttributeError, r"Can only use .cat accessor with a 'category' dtype", lambda: Series([1, 2, 3]).cat) @@ -3987,7 +3987,7 @@ def test_numpy_repeat(self): tm.assert_categorical_equal(np.repeat(cat, 2), exp) msg = "the 'axis' parameter is not supported" - tm.assertRaisesRegexp(ValueError, msg, np.repeat, cat, 2, axis=1) + tm.assert_raises_regex(ValueError, msg, np.repeat, cat, 2, axis=1) def test_reshape(self): cat = pd.Categorical([], categories=["a", "b"]) @@ -4012,7 +4012,7 @@ def test_reshape(self): with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): msg = "can only specify one unknown dimension" cat = pd.Categorical(["a", "b"], categories=["a", "b"]) - tm.assertRaisesRegexp(ValueError, msg, cat.reshape, (-2, -1)) + tm.assert_raises_regex(ValueError, msg, cat.reshape, (-2, -1)) def test_numpy_reshape(self): with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): @@ -4021,8 +4021,8 @@ def test_numpy_reshape(self): with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): msg = "the 'order' parameter is not supported" - tm.assertRaisesRegexp(ValueError, msg, np.reshape, - cat, cat.shape, order='F') + tm.assert_raises_regex(ValueError, msg, np.reshape, + cat, cat.shape, order='F') def test_na_actions(self): @@ -4223,15 +4223,16 @@ def test_cat_accessor_api(self): assert isinstance(s.cat, CategoricalAccessor) invalid = Series([1]) - with tm.assertRaisesRegexp(AttributeError, "only use .cat accessor"): + with tm.assert_raises_regex(AttributeError, + "only use .cat accessor"): invalid.cat self.assertFalse(hasattr(invalid, 'cat')) def test_cat_accessor_no_new_attributes(self): # https://github.com/pandas-dev/pandas/issues/10673 c = Series(list('aabbcde')).astype('category') - with tm.assertRaisesRegexp(AttributeError, - "You cannot add any new attribute"): + with tm.assert_raises_regex(AttributeError, + "You cannot add any new attribute"): c.cat.xlabel = "a" def test_str_accessor_api_for_categorical(self): @@ -4304,8 +4305,9 @@ def test_str_accessor_api_for_categorical(self): tm.assert_series_equal(res, exp) invalid = Series([1, 2, 3]).astype('category') - with tm.assertRaisesRegexp(AttributeError, - "Can only use .str accessor with string"): + with tm.assert_raises_regex(AttributeError, + "Can only use .str " + "accessor with string"): invalid.str self.assertFalse(hasattr(invalid, 'str')) @@ -4385,7 +4387,7 @@ def test_dt_accessor_api_for_categorical(self): tm.assert_almost_equal(res, exp) invalid = Series([1, 2, 3]).astype('category') - with tm.assertRaisesRegexp( + with tm.assert_raises_regex( AttributeError, "Can only use .dt accessor with datetimelike"): invalid.dt self.assertFalse(hasattr(invalid, 'str')) diff --git a/pandas/tests/test_common.py b/pandas/tests/test_common.py index 435fca14d164f..d7dbaccb87ee8 100644 --- a/pandas/tests/test_common.py +++ b/pandas/tests/test_common.py @@ -12,7 +12,7 @@ def test_mut_exclusive(): msg = "mutually exclusive arguments: '[ab]' and '[ab]'" - with tm.assertRaisesRegexp(TypeError, msg): + with tm.assert_raises_regex(TypeError, msg): com._mut_exclusive(a=1, b=2) assert com._mut_exclusive(a=1, b=None) == 1 assert com._mut_exclusive(major=None, major_axis=None) is None diff --git a/pandas/tests/test_expressions.py b/pandas/tests/test_expressions.py index ddbaedc3ef919..14e08411fa106 100644 --- a/pandas/tests/test_expressions.py +++ b/pandas/tests/test_expressions.py @@ -390,22 +390,22 @@ def test_bool_ops_raise_on_arithmetic(self): f = getattr(operator, name) err_msg = re.escape(msg % op) - with tm.assertRaisesRegexp(NotImplementedError, err_msg): + with tm.assert_raises_regex(NotImplementedError, err_msg): f(df, df) - with tm.assertRaisesRegexp(NotImplementedError, err_msg): + with tm.assert_raises_regex(NotImplementedError, err_msg): f(df.a, df.b) - with tm.assertRaisesRegexp(NotImplementedError, err_msg): + with tm.assert_raises_regex(NotImplementedError, err_msg): f(df.a, True) - with tm.assertRaisesRegexp(NotImplementedError, err_msg): + with tm.assert_raises_regex(NotImplementedError, err_msg): f(False, df.a) - with tm.assertRaisesRegexp(TypeError, err_msg): + with tm.assert_raises_regex(TypeError, err_msg): f(False, df) - with tm.assertRaisesRegexp(TypeError, err_msg): + with tm.assert_raises_regex(TypeError, err_msg): f(df, True) def test_bool_ops_warn_on_arithmetic(self): diff --git a/pandas/tests/test_internals.py b/pandas/tests/test_internals.py index f58a6d4b146bd..61b4369d21ab4 100644 --- a/pandas/tests/test_internals.py +++ b/pandas/tests/test_internals.py @@ -1090,8 +1090,8 @@ def test_zero_step_raises(self): def test_unbounded_slice_raises(self): def assert_unbounded_slice_error(slc): - tm.assertRaisesRegexp(ValueError, "unbounded slice", - lambda: BlockPlacement(slc)) + tm.assert_raises_regex(ValueError, "unbounded slice", + lambda: BlockPlacement(slc)) assert_unbounded_slice_error(slice(None, None)) assert_unbounded_slice_error(slice(10, None)) diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index e81e6e2d987c6..f350ef4351585 100755 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -320,7 +320,7 @@ def test_frame_getitem_setitem_boolean(self): np.putmask(values[:-1], values[:-1] < 0, 2) tm.assert_almost_equal(df.values, values) - with tm.assertRaisesRegexp(TypeError, 'boolean values only'): + with tm.assert_raises_regex(TypeError, 'boolean values only'): df[df * 0] = 2 def test_frame_getitem_setitem_slice(self): @@ -755,7 +755,8 @@ def _check_counts(frame, axis=0): # can't call with level on regular DataFrame df = tm.makeTimeDataFrame() - tm.assertRaisesRegexp(TypeError, 'hierarchical', df.count, level=0) + tm.assert_raises_regex( + TypeError, 'hierarchical', df.count, level=0) self.frame['D'] = 'foo' result = self.frame.count(level=0, numeric_only=True) @@ -792,9 +793,10 @@ def test_count_level_corner(self): tm.assert_frame_equal(result, expected) def test_get_level_number_out_of_bounds(self): - with tm.assertRaisesRegexp(IndexError, "Too many levels"): + with tm.assert_raises_regex(IndexError, "Too many levels"): self.frame.index._get_level_number(2) - with tm.assertRaisesRegexp(IndexError, "not a valid level number"): + with tm.assert_raises_regex(IndexError, + "not a valid level number"): self.frame.index._get_level_number(-3) def test_unstack(self): @@ -1011,16 +1013,17 @@ def test_stack_names_and_numbers(self): unstacked = self.ymd.unstack(['year', 'month']) # Can't use mixture of names and numbers to stack - with tm.assertRaisesRegexp(ValueError, "level should contain"): + with tm.assert_raises_regex(ValueError, "level should contain"): unstacked.stack([0, 'month']) def test_stack_multiple_out_of_bounds(self): # nlevels == 3 unstacked = self.ymd.unstack(['year', 'month']) - with tm.assertRaisesRegexp(IndexError, "Too many levels"): + with tm.assert_raises_regex(IndexError, "Too many levels"): unstacked.stack([2, 3]) - with tm.assertRaisesRegexp(IndexError, "not a valid level number"): + with tm.assert_raises_regex(IndexError, + "not a valid level number"): unstacked.stack([-4, -3]) def test_unstack_period_series(self): @@ -1275,10 +1278,10 @@ def test_reorder_levels(self): expected = self.ymd.T.swaplevel(0, 1, axis=1).swaplevel(1, 2, axis=1) tm.assert_frame_equal(result, expected) - with tm.assertRaisesRegexp(TypeError, 'hierarchical axis'): + with tm.assert_raises_regex(TypeError, 'hierarchical axis'): self.ymd.reorder_levels([1, 2], axis=1) - with tm.assertRaisesRegexp(IndexError, 'Too many levels'): + with tm.assert_raises_regex(IndexError, 'Too many levels'): self.ymd.index.reorder_levels([1, 2, 3]) def test_insert_index(self): @@ -2244,11 +2247,10 @@ def test_reset_index_multiindex_columns(self): result = df[['B']].reset_index() tm.assert_frame_equal(result, expected) - # GH 16120 - # already existing column - with tm.assertRaisesRegexp(ValueError, - ("cannot insert \('A', ''\), " - "already exists")): + # gh-16120: already existing column + with tm.assert_raises_regex(ValueError, + ("cannot insert \('A', ''\), " + "already exists")): df.reset_index() def test_set_index_period(self): diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index 17bc06b5ee9c7..322ea32a93562 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -23,9 +23,8 @@ from pandas.tseries.offsets import BDay, MonthEnd from pandas.util.testing import (assert_panel_equal, assert_frame_equal, assert_series_equal, assert_almost_equal, - ensure_clean, assertRaisesRegexp, - makeCustomDataframe as mkdf, - makeMixedDataFrame) + ensure_clean, makeMixedDataFrame, + makeCustomDataframe as mkdf) import pandas.core.panel as panelm import pandas.util.testing as tm @@ -180,8 +179,8 @@ def wrapper(x): # Unimplemented numeric_only parameter. if 'numeric_only' in signature(f).args: - tm.assertRaisesRegexp(NotImplementedError, name, f, - numeric_only=True) + tm.assert_raises_regex(NotImplementedError, name, f, + numeric_only=True) class SafeForSparse(object): @@ -227,10 +226,10 @@ def test_get_axis_number(self): self.assertEqual(self.panel._get_axis_number('major'), 1) self.assertEqual(self.panel._get_axis_number('minor'), 2) - with tm.assertRaisesRegexp(ValueError, "No axis named foo"): + with tm.assert_raises_regex(ValueError, "No axis named foo"): self.panel._get_axis_number('foo') - with tm.assertRaisesRegexp(ValueError, "No axis named foo"): + with tm.assert_raises_regex(ValueError, "No axis named foo"): self.panel.__ge__(self.panel, axis='foo') def test_get_axis_name(self): @@ -514,9 +513,10 @@ def test_setitem(self): # bad shape p = Panel(np.random.randn(4, 3, 2)) - with tm.assertRaisesRegexp(ValueError, - r"shape of value must be \(3, 2\), " - r"shape of given object was \(4, 2\)"): + with tm.assert_raises_regex(ValueError, + r"shape of value must be " + r"\(3, 2\), shape of given " + r"object was \(4, 2\)"): p[0] = np.random.randn(4, 2) def test_setitem_ndarray(self): @@ -874,8 +874,9 @@ def test_get_value(self): result = self.panel.get_value(item, mjr, mnr) expected = self.panel[item][mnr][mjr] assert_almost_equal(result, expected) - with tm.assertRaisesRegexp(TypeError, - "There must be an argument for each axis"): + with tm.assert_raises_regex(TypeError, + "There must be an argument " + "for each axis"): self.panel.get_value('a') def test_set_value(self): @@ -897,7 +898,7 @@ def test_set_value(self): msg = ("There must be an argument for each " "axis plus the value provided") - with tm.assertRaisesRegexp(TypeError, msg): + with tm.assert_raises_regex(TypeError, msg): self.panel.set_value('a') @@ -1041,7 +1042,7 @@ def _check_dtype(panel, dtype): def test_constructor_fails_with_not_3d_input(self): with catch_warnings(record=True): - with tm.assertRaisesRegexp(ValueError, "The number of dimensions required is 3"): # noqa + with tm.assert_raises_regex(ValueError, "The number of dimensions required is 3"): # noqa Panel(np.random.randn(10, 2)) def test_consolidate(self): @@ -1181,28 +1182,31 @@ def testit(): Panel(np.random.randn(3, 4, 5), lrange(4), lrange(5), lrange(5)) - assertRaisesRegexp(ValueError, - r"Shape of passed values is \(3, 4, 5\), " - r"indices imply \(4, 5, 5\)", - testit) + tm.assert_raises_regex(ValueError, + r"Shape of passed values is " + r"\(3, 4, 5\), indices imply " + r"\(4, 5, 5\)", + testit) def testit(): Panel(np.random.randn(3, 4, 5), lrange(5), lrange(4), lrange(5)) - assertRaisesRegexp(ValueError, - r"Shape of passed values is \(3, 4, 5\), " - r"indices imply \(5, 4, 5\)", - testit) + tm.assert_raises_regex(ValueError, + r"Shape of passed values is " + r"\(3, 4, 5\), indices imply " + r"\(5, 4, 5\)", + testit) def testit(): Panel(np.random.randn(3, 4, 5), lrange(5), lrange(5), lrange(4)) - assertRaisesRegexp(ValueError, - r"Shape of passed values is \(3, 4, 5\), " - r"indices imply \(5, 5, 4\)", - testit) + tm.assert_raises_regex(ValueError, + r"Shape of passed values is " + r"\(3, 4, 5\), indices imply " + r"\(5, 5, 4\)", + testit) def test_conform(self): with catch_warnings(record=True): @@ -1660,12 +1664,12 @@ def test_transpose(self): assert_panel_equal(result, expected) # duplicate axes - with tm.assertRaisesRegexp(TypeError, - 'not enough/duplicate arguments'): + with tm.assert_raises_regex(TypeError, + 'not enough/duplicate arguments'): self.panel.transpose('minor', maj='major', minor='items') - with tm.assertRaisesRegexp(ValueError, - 'repeated axis in transpose'): + with tm.assert_raises_regex(ValueError, + 'repeated axis in transpose'): self.panel.transpose('minor', 'major', major='minor', minor='items') @@ -1867,7 +1871,7 @@ def test_to_panel_duplicates(self): with catch_warnings(record=True): df = DataFrame({'a': [0, 0, 1], 'b': [1, 1, 1], 'c': [1, 2, 3]}) idf = df.set_index(['a', 'b']) - assertRaisesRegexp( + tm.assert_raises_regex( ValueError, 'non-uniquely indexed', idf.to_panel) def test_panel_dups(self): @@ -1992,8 +1996,8 @@ def test_tshift(self): shifted3 = ps.tshift(freq=BDay()) assert_panel_equal(shifted, shifted3) - assertRaisesRegexp(ValueError, 'does not match', - ps.tshift, freq='M') + tm.assert_raises_regex(ValueError, 'does not match', + ps.tshift, freq='M') # DatetimeIndex panel = make_test_panel() @@ -2108,7 +2112,7 @@ def test_numpy_round(self): assert_panel_equal(expected, result) msg = "the 'out' parameter is not supported" - tm.assertRaisesRegexp(ValueError, msg, np.round, p, out=p) + tm.assert_raises_regex(ValueError, msg, np.round, p, out=p) def test_multiindex_get(self): with catch_warnings(record=True): @@ -2540,8 +2544,8 @@ def test_to_string(self): def test_to_sparse(self): if isinstance(self.panel, Panel): msg = 'sparsifying is not supported' - tm.assertRaisesRegexp(NotImplementedError, msg, - self.panel.to_sparse) + tm.assert_raises_regex(NotImplementedError, msg, + self.panel.to_sparse) def test_truncate(self): with catch_warnings(record=True): diff --git a/pandas/tests/test_resample.py b/pandas/tests/test_resample.py index e5795eea12135..f5309a985a499 100644 --- a/pandas/tests/test_resample.py +++ b/pandas/tests/test_resample.py @@ -272,7 +272,7 @@ def test_select_bad_cols(self): pytest.raises(KeyError, g.__getitem__, ['D']) pytest.raises(KeyError, g.__getitem__, ['A', 'D']) - with tm.assertRaisesRegexp(KeyError, '^[^A]+$'): + with tm.assert_raises_regex(KeyError, '^[^A]+$'): # A should not be referenced as a bad column... # will have to rethink regex if you change message! g[['A', 'D']] @@ -983,11 +983,11 @@ def test_numpy_compat(self): for func in ('min', 'max', 'sum', 'prod', 'mean', 'var', 'std'): - tm.assertRaisesRegexp(UnsupportedFunctionCall, msg, - getattr(r, func), - func, 1, 2, 3) - tm.assertRaisesRegexp(UnsupportedFunctionCall, msg, - getattr(r, func), axis=1) + tm.assert_raises_regex(UnsupportedFunctionCall, msg, + getattr(r, func), + func, 1, 2, 3) + tm.assert_raises_regex(UnsupportedFunctionCall, msg, + getattr(r, func), axis=1) def test_resample_how_callables(self): # GH 7929 @@ -3081,17 +3081,18 @@ def test_fails_on_no_datetime_index(self): for name, func in zip(index_names, index_funcs): index = func(n) df = DataFrame({'a': np.random.randn(n)}, index=index) - with tm.assertRaisesRegexp(TypeError, - "Only valid with DatetimeIndex, " - "TimedeltaIndex or PeriodIndex, " - "but got an instance of %r" % name): + with tm.assert_raises_regex(TypeError, + "Only valid with " + "DatetimeIndex, TimedeltaIndex " + "or PeriodIndex, but got an " + "instance of %r" % name): df.groupby(TimeGrouper('D')) # PeriodIndex gives a specific error message df = DataFrame({'a': np.random.randn(n)}, index=tm.makePeriodIndex(n)) - with tm.assertRaisesRegexp(TypeError, - "axis must be a DatetimeIndex, but " - "got an instance of 'PeriodIndex'"): + with tm.assert_raises_regex(TypeError, + "axis must be a DatetimeIndex, but " + "got an instance of 'PeriodIndex'"): df.groupby(TimeGrouper('D')) def test_aaa_group_order(self): diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index 6733fbdc3b9c6..db0c2fdc80fd2 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -29,7 +29,8 @@ def test_api(self): # GH 9184 invalid = Series([1]) - with tm.assertRaisesRegexp(AttributeError, "only use .str accessor"): + with tm.assert_raises_regex(AttributeError, + "only use .str accessor"): invalid.str self.assertFalse(hasattr(invalid, 'str')) @@ -451,15 +452,15 @@ def test_replace_callable(self): r'(?(3)required )positional arguments?') repl = lambda: None - with tm.assertRaisesRegexp(TypeError, p_err): + with tm.assert_raises_regex(TypeError, p_err): values.str.replace('a', repl) repl = lambda m, x: None - with tm.assertRaisesRegexp(TypeError, p_err): + with tm.assert_raises_regex(TypeError, p_err): values.str.replace('a', repl) repl = lambda m, x, y=None: None - with tm.assertRaisesRegexp(TypeError, p_err): + with tm.assert_raises_regex(TypeError, p_err): values.str.replace('a', repl) # test regex named groups @@ -512,13 +513,16 @@ def test_replace_compiled_regex(self): values = Series(['fooBAD__barBAD__bad', NA]) pat = re.compile(r'BAD[_]*') - with tm.assertRaisesRegexp(ValueError, "case and flags cannot be"): + with tm.assert_raises_regex(ValueError, + "case and flags cannot be"): result = values.str.replace(pat, '', flags=re.IGNORECASE) - with tm.assertRaisesRegexp(ValueError, "case and flags cannot be"): + with tm.assert_raises_regex(ValueError, + "case and flags cannot be"): result = values.str.replace(pat, '', case=False) - with tm.assertRaisesRegexp(ValueError, "case and flags cannot be"): + with tm.assert_raises_regex(ValueError, + "case and flags cannot be"): result = values.str.replace(pat, '', case=True) # test with callable @@ -648,7 +652,7 @@ def test_extract_expand_False(self): # Index only works with one regex group since # multi-group would expand to a frame idx = Index(['A1', 'A2', 'A3', 'A4', 'B5']) - with tm.assertRaisesRegexp(ValueError, "supported"): + with tm.assert_raises_regex(ValueError, "supported"): idx.str.extract('([AB])([123])', expand=False) # these should work for both Series and Index @@ -1124,7 +1128,7 @@ def test_extractall_errors(self): # no capture groups. (it returns DataFrame with one column for # each capture group) s = Series(['a3', 'b3', 'd4c2'], name='series_name') - with tm.assertRaisesRegexp(ValueError, "no capture groups"): + with tm.assert_raises_regex(ValueError, "no capture groups"): s.str.extractall(r'[a-z]') def test_extract_index_one_two_groups(self): @@ -1504,12 +1508,12 @@ def test_find(self): dtype=np.int64) tm.assert_numpy_array_equal(result.values, expected) - with tm.assertRaisesRegexp(TypeError, - "expected a string object, not int"): + with tm.assert_raises_regex(TypeError, + "expected a string object, not int"): result = values.str.find(0) - with tm.assertRaisesRegexp(TypeError, - "expected a string object, not int"): + with tm.assert_raises_regex(TypeError, + "expected a string object, not int"): result = values.str.rfind(0) def test_find_nan(self): @@ -1579,11 +1583,13 @@ def _check(result, expected): dtype=np.int64) tm.assert_numpy_array_equal(result.values, expected) - with tm.assertRaisesRegexp(ValueError, "substring not found"): + with tm.assert_raises_regex(ValueError, + "substring not found"): result = s.str.index('DE') - with tm.assertRaisesRegexp(TypeError, - "expected a string object, not int"): + with tm.assert_raises_regex(TypeError, + "expected a string " + "object, not int"): result = s.str.index(0) # test with nan @@ -1667,12 +1673,14 @@ def test_pad_fillchar(self): exp = Series(['XXaXX', 'XXbXX', NA, 'XXcXX', NA, 'eeeeee']) tm.assert_almost_equal(result, exp) - with tm.assertRaisesRegexp(TypeError, - "fillchar must be a character, not str"): + with tm.assert_raises_regex(TypeError, + "fillchar must be a " + "character, not str"): result = values.str.pad(5, fillchar='XY') - with tm.assertRaisesRegexp(TypeError, - "fillchar must be a character, not int"): + with tm.assert_raises_regex(TypeError, + "fillchar must be a " + "character, not int"): result = values.str.pad(5, fillchar=5) def test_pad_width(self): @@ -1680,8 +1688,9 @@ def test_pad_width(self): s = Series(['1', '22', 'a', 'bb']) for f in ['center', 'ljust', 'rjust', 'zfill', 'pad']: - with tm.assertRaisesRegexp(TypeError, - "width must be of integer type, not*"): + with tm.assert_raises_regex(TypeError, + "width must be of " + "integer type, not*"): getattr(s.str, f)('f') def test_translate(self): @@ -1713,7 +1722,7 @@ def _check(result, expected): expected = klass(['abcde', 'abcc', 'cddd', 'cde']) _check(result, expected) else: - with tm.assertRaisesRegexp( + with tm.assert_raises_regex( ValueError, "deletechars is not a valid argument"): result = s.str.translate(table, deletechars='fg') @@ -1802,28 +1811,34 @@ def test_center_ljust_rjust_fillchar(self): # If fillchar is not a charatter, normal str raises TypeError # 'aaa'.ljust(5, 'XY') # TypeError: must be char, not str - with tm.assertRaisesRegexp(TypeError, - "fillchar must be a character, not str"): + with tm.assert_raises_regex(TypeError, + "fillchar must be a " + "character, not str"): result = values.str.center(5, fillchar='XY') - with tm.assertRaisesRegexp(TypeError, - "fillchar must be a character, not str"): + with tm.assert_raises_regex(TypeError, + "fillchar must be a " + "character, not str"): result = values.str.ljust(5, fillchar='XY') - with tm.assertRaisesRegexp(TypeError, - "fillchar must be a character, not str"): + with tm.assert_raises_regex(TypeError, + "fillchar must be a " + "character, not str"): result = values.str.rjust(5, fillchar='XY') - with tm.assertRaisesRegexp(TypeError, - "fillchar must be a character, not int"): + with tm.assert_raises_regex(TypeError, + "fillchar must be a " + "character, not int"): result = values.str.center(5, fillchar=1) - with tm.assertRaisesRegexp(TypeError, - "fillchar must be a character, not int"): + with tm.assert_raises_regex(TypeError, + "fillchar must be a " + "character, not int"): result = values.str.ljust(5, fillchar=1) - with tm.assertRaisesRegexp(TypeError, - "fillchar must be a character, not int"): + with tm.assert_raises_regex(TypeError, + "fillchar must be a " + "character, not int"): result = values.str.rjust(5, fillchar=1) def test_zfill(self): @@ -2005,7 +2020,7 @@ def test_split_to_dataframe(self): index=['preserve', 'me']) tm.assert_frame_equal(result, exp) - with tm.assertRaisesRegexp(ValueError, "expand must be"): + with tm.assert_raises_regex(ValueError, "expand must be"): s.str.split('_', expand="not_a_boolean") def test_split_to_multiindex_expand(self): @@ -2030,7 +2045,7 @@ def test_split_to_multiindex_expand(self): tm.assert_index_equal(result, exp) self.assertEqual(result.nlevels, 6) - with tm.assertRaisesRegexp(ValueError, "expand must be"): + with tm.assert_raises_regex(ValueError, "expand must be"): idx.str.split('_', expand="not_a_boolean") def test_rsplit_to_dataframe_expand(self): @@ -2651,7 +2666,8 @@ def test_normalize(self): result = s.str.normalize('NFC') tm.assert_series_equal(result, expected) - with tm.assertRaisesRegexp(ValueError, "invalid normalization form"): + with tm.assert_raises_regex(ValueError, + "invalid normalization form"): s.str.normalize('xxx') s = Index([u'ABC', u'123', u'アイエ']) @@ -2680,9 +2696,9 @@ def test_str_cat_raises_intuitive_error(self): # https://github.com/pandas-dev/pandas/issues/11334 s = Series(['a', 'b', 'c', 'd']) message = "Did you mean to supply a `sep` keyword?" - with tm.assertRaisesRegexp(ValueError, message): + with tm.assert_raises_regex(ValueError, message): s.str.cat('|') - with tm.assertRaisesRegexp(ValueError, message): + with tm.assert_raises_regex(ValueError, message): s.str.cat(' ') def test_index_str_accessor_visibility(self): @@ -2720,9 +2736,9 @@ def test_index_str_accessor_visibility(self): for values, tp in cases: idx = Index(values) message = 'Can only use .str accessor with string values' - with tm.assertRaisesRegexp(AttributeError, message): + with tm.assert_raises_regex(AttributeError, message): Series(values).str - with tm.assertRaisesRegexp(AttributeError, message): + with tm.assert_raises_regex(AttributeError, message): idx.str self.assertEqual(idx.inferred_type, tp) @@ -2730,14 +2746,14 @@ def test_index_str_accessor_visibility(self): idx = MultiIndex.from_tuples([('a', 'b'), ('a', 'b')]) self.assertEqual(idx.inferred_type, 'mixed') message = 'Can only use .str accessor with Index, not MultiIndex' - with tm.assertRaisesRegexp(AttributeError, message): + with tm.assert_raises_regex(AttributeError, message): idx.str def test_str_accessor_no_new_attributes(self): # https://github.com/pandas-dev/pandas/issues/10673 s = Series(list('aabbcde')) - with tm.assertRaisesRegexp(AttributeError, - "You cannot add any new attribute"): + with tm.assert_raises_regex(AttributeError, + "You cannot add any new attribute"): s.str.xlabel = "a" def test_method_on_bytes(self): diff --git a/pandas/tests/test_take.py b/pandas/tests/test_take.py index 4d60750fb4a20..9fb61998f6c54 100644 --- a/pandas/tests/test_take.py +++ b/pandas/tests/test_take.py @@ -32,7 +32,7 @@ def _test_dtype(dtype, can_hold_na, writeable=True): expected[3] = np.nan tm.assert_almost_equal(out, expected) else: - with tm.assertRaisesRegexp(TypeError, self.fill_error): + with tm.assert_raises_regex(TypeError, self.fill_error): algos.take_1d(data, indexer, out=out) # no exception o/w data.take(indexer, out=out) @@ -123,7 +123,8 @@ def _test_dtype(dtype, can_hold_na, writeable=True): tm.assert_almost_equal(out1, expected1) else: for i, out in enumerate([out0, out1]): - with tm.assertRaisesRegexp(TypeError, self.fill_error): + with tm.assert_raises_regex(TypeError, + self.fill_error): algos.take_nd(data, indexer, out=out, axis=i) # no exception o/w data.take(indexer, out=out, axis=i) @@ -235,7 +236,8 @@ def _test_dtype(dtype, can_hold_na): tm.assert_almost_equal(out2, expected2) else: for i, out in enumerate([out0, out1, out2]): - with tm.assertRaisesRegexp(TypeError, self.fill_error): + with tm.assert_raises_regex(TypeError, + self.fill_error): algos.take_nd(data, indexer, out=out, axis=i) # no exception o/w data.take(indexer, out=out, axis=i) diff --git a/pandas/tests/test_testing.py b/pandas/tests/test_testing.py index 4a9e4f4fbd37a..45994fd400912 100644 --- a/pandas/tests/test_testing.py +++ b/pandas/tests/test_testing.py @@ -6,10 +6,9 @@ import sys from pandas import Series, DataFrame import pandas.util.testing as tm -from pandas.util.testing import (assert_almost_equal, assertRaisesRegexp, - raise_with_traceback, assert_index_equal, - assert_series_equal, assert_frame_equal, - assert_numpy_array_equal, +from pandas.util.testing import (assert_almost_equal, raise_with_traceback, + assert_index_equal, assert_series_equal, + assert_frame_equal, assert_numpy_array_equal, RNGContext) from pandas.compat import is_platform_windows @@ -144,13 +143,13 @@ def test_assert_almost_equal_object(self): class TestUtilTesting(tm.TestCase): def test_raise_with_traceback(self): - with assertRaisesRegexp(LookupError, "error_text"): + with tm.assert_raises_regex(LookupError, "error_text"): try: raise ValueError("THIS IS AN ERROR") except ValueError as e: e = LookupError("error_text") raise_with_traceback(e) - with assertRaisesRegexp(LookupError, "error_text"): + with tm.assert_raises_regex(LookupError, "error_text"): try: raise ValueError("This is another error") except ValueError: @@ -173,18 +172,18 @@ def test_numpy_array_equal_message(self): \\[left\\]: \\(2,\\) \\[right\\]: \\(3,\\)""" - with assertRaisesRegexp(AssertionError, expected): + with tm.assert_raises_regex(AssertionError, expected): assert_numpy_array_equal(np.array([1, 2]), np.array([3, 4, 5])) - with assertRaisesRegexp(AssertionError, expected): + with tm.assert_raises_regex(AssertionError, expected): assert_almost_equal(np.array([1, 2]), np.array([3, 4, 5])) # scalar comparison expected = """Expected type """ - with assertRaisesRegexp(AssertionError, expected): + with tm.assert_raises_regex(AssertionError, expected): assert_numpy_array_equal(1, 2) expected = """expected 2\\.00000 but got 1\\.00000, with decimal 5""" - with assertRaisesRegexp(AssertionError, expected): + with tm.assert_raises_regex(AssertionError, expected): assert_almost_equal(1, 2) # array / scalar array comparison @@ -194,10 +193,10 @@ def test_numpy_array_equal_message(self): \\[left\\]: ndarray \\[right\\]: int""" - with assertRaisesRegexp(AssertionError, expected): + with tm.assert_raises_regex(AssertionError, expected): # numpy_array_equal only accepts np.ndarray assert_numpy_array_equal(np.array([1]), 1) - with assertRaisesRegexp(AssertionError, expected): + with tm.assert_raises_regex(AssertionError, expected): assert_almost_equal(np.array([1]), 1) # scalar / array comparison @@ -207,9 +206,9 @@ def test_numpy_array_equal_message(self): \\[left\\]: int \\[right\\]: ndarray""" - with assertRaisesRegexp(AssertionError, expected): + with tm.assert_raises_regex(AssertionError, expected): assert_numpy_array_equal(1, np.array([1])) - with assertRaisesRegexp(AssertionError, expected): + with tm.assert_raises_regex(AssertionError, expected): assert_almost_equal(1, np.array([1])) expected = """numpy array are different @@ -218,10 +217,10 @@ def test_numpy_array_equal_message(self): \\[left\\]: \\[nan, 2\\.0, 3\\.0\\] \\[right\\]: \\[1\\.0, nan, 3\\.0\\]""" - with assertRaisesRegexp(AssertionError, expected): + with tm.assert_raises_regex(AssertionError, expected): assert_numpy_array_equal(np.array([np.nan, 2, 3]), np.array([1, np.nan, 3])) - with assertRaisesRegexp(AssertionError, expected): + with tm.assert_raises_regex(AssertionError, expected): assert_almost_equal(np.array([np.nan, 2, 3]), np.array([1, np.nan, 3])) @@ -231,9 +230,9 @@ def test_numpy_array_equal_message(self): \\[left\\]: \\[1, 2\\] \\[right\\]: \\[1, 3\\]""" - with assertRaisesRegexp(AssertionError, expected): + with tm.assert_raises_regex(AssertionError, expected): assert_numpy_array_equal(np.array([1, 2]), np.array([1, 3])) - with assertRaisesRegexp(AssertionError, expected): + with tm.assert_raises_regex(AssertionError, expected): assert_almost_equal(np.array([1, 2]), np.array([1, 3])) expected = """numpy array are different @@ -242,7 +241,7 @@ def test_numpy_array_equal_message(self): \\[left\\]: \\[1\\.1, 2\\.000001\\] \\[right\\]: \\[1\\.1, 2.0\\]""" - with assertRaisesRegexp(AssertionError, expected): + with tm.assert_raises_regex(AssertionError, expected): assert_numpy_array_equal( np.array([1.1, 2.000001]), np.array([1.1, 2.0])) @@ -255,10 +254,10 @@ def test_numpy_array_equal_message(self): \\[left\\]: \\[\\[1, 2\\], \\[3, 4\\], \\[5, 6\\]\\] \\[right\\]: \\[\\[1, 3\\], \\[3, 4\\], \\[5, 6\\]\\]""" - with assertRaisesRegexp(AssertionError, expected): + with tm.assert_raises_regex(AssertionError, expected): assert_numpy_array_equal(np.array([[1, 2], [3, 4], [5, 6]]), np.array([[1, 3], [3, 4], [5, 6]])) - with assertRaisesRegexp(AssertionError, expected): + with tm.assert_raises_regex(AssertionError, expected): assert_almost_equal(np.array([[1, 2], [3, 4], [5, 6]]), np.array([[1, 3], [3, 4], [5, 6]])) @@ -268,10 +267,10 @@ def test_numpy_array_equal_message(self): \\[left\\]: \\[\\[1, 2\\], \\[3, 4\\]\\] \\[right\\]: \\[\\[1, 3\\], \\[3, 4\\]\\]""" - with assertRaisesRegexp(AssertionError, expected): + with tm.assert_raises_regex(AssertionError, expected): assert_numpy_array_equal(np.array([[1, 2], [3, 4]]), np.array([[1, 3], [3, 4]])) - with assertRaisesRegexp(AssertionError, expected): + with tm.assert_raises_regex(AssertionError, expected): assert_almost_equal(np.array([[1, 2], [3, 4]]), np.array([[1, 3], [3, 4]])) @@ -282,10 +281,10 @@ def test_numpy_array_equal_message(self): \\[left\\]: \\(2,\\) \\[right\\]: \\(3,\\)""" - with assertRaisesRegexp(AssertionError, expected): + with tm.assert_raises_regex(AssertionError, expected): assert_numpy_array_equal(np.array([1, 2]), np.array([3, 4, 5]), obj='Index') - with assertRaisesRegexp(AssertionError, expected): + with tm.assert_raises_regex(AssertionError, expected): assert_almost_equal(np.array([1, 2]), np.array([3, 4, 5]), obj='Index') @@ -304,9 +303,9 @@ def test_numpy_array_equal_object_message(self): \\[left\\]: \\[2011-01-01 00:00:00, 2011-01-01 00:00:00\\] \\[right\\]: \\[2011-01-01 00:00:00, 2011-01-02 00:00:00\\]""" - with assertRaisesRegexp(AssertionError, expected): + with tm.assert_raises_regex(AssertionError, expected): assert_numpy_array_equal(a, b) - with assertRaisesRegexp(AssertionError, expected): + with tm.assert_raises_regex(AssertionError, expected): assert_almost_equal(a, b) def test_numpy_array_equal_copy_flag(self): @@ -314,10 +313,10 @@ def test_numpy_array_equal_copy_flag(self): b = a.copy() c = a.view() expected = r'array\(\[1, 2, 3\]\) is not array\(\[1, 2, 3\]\)' - with assertRaisesRegexp(AssertionError, expected): + with tm.assert_raises_regex(AssertionError, expected): assert_numpy_array_equal(a, b, check_same='same') expected = r'array\(\[1, 2, 3\]\) is array\(\[1, 2, 3\]\)' - with assertRaisesRegexp(AssertionError, expected): + with tm.assert_raises_regex(AssertionError, expected): assert_numpy_array_equal(a, c, check_same='copy') def test_assert_almost_equal_iterable_message(self): @@ -328,7 +327,7 @@ def test_assert_almost_equal_iterable_message(self): \\[left\\]: 2 \\[right\\]: 3""" - with assertRaisesRegexp(AssertionError, expected): + with tm.assert_raises_regex(AssertionError, expected): assert_almost_equal([1, 2], [3, 4, 5]) expected = """Iterable are different @@ -337,7 +336,7 @@ def test_assert_almost_equal_iterable_message(self): \\[left\\]: \\[1, 2\\] \\[right\\]: \\[1, 3\\]""" - with assertRaisesRegexp(AssertionError, expected): + with tm.assert_raises_regex(AssertionError, expected): assert_almost_equal([1, 2], [1, 3]) @@ -355,7 +354,7 @@ def test_index_equal_message(self): idx1 = pd.Index([1, 2, 3]) idx2 = pd.MultiIndex.from_tuples([('A', 1), ('A', 2), ('B', 3), ('B', 4)]) - with assertRaisesRegexp(AssertionError, expected): + with tm.assert_raises_regex(AssertionError, expected): assert_index_equal(idx1, idx2, exact=False) expected = """MultiIndex level \\[1\\] are different @@ -368,9 +367,9 @@ def test_index_equal_message(self): ('B', 3), ('B', 4)]) idx2 = pd.MultiIndex.from_tuples([('A', 1), ('A', 2), ('B', 3), ('B', 4)]) - with assertRaisesRegexp(AssertionError, expected): + with tm.assert_raises_regex(AssertionError, expected): assert_index_equal(idx1, idx2) - with assertRaisesRegexp(AssertionError, expected): + with tm.assert_raises_regex(AssertionError, expected): assert_index_equal(idx1, idx2, check_exact=False) expected = """Index are different @@ -381,9 +380,9 @@ def test_index_equal_message(self): idx1 = pd.Index([1, 2, 3]) idx2 = pd.Index([1, 2, 3, 4]) - with assertRaisesRegexp(AssertionError, expected): + with tm.assert_raises_regex(AssertionError, expected): assert_index_equal(idx1, idx2) - with assertRaisesRegexp(AssertionError, expected): + with tm.assert_raises_regex(AssertionError, expected): assert_index_equal(idx1, idx2, check_exact=False) expected = """Index are different @@ -394,9 +393,9 @@ def test_index_equal_message(self): idx1 = pd.Index([1, 2, 3]) idx2 = pd.Index([1, 2, 3.0]) - with assertRaisesRegexp(AssertionError, expected): + with tm.assert_raises_regex(AssertionError, expected): assert_index_equal(idx1, idx2, exact=True) - with assertRaisesRegexp(AssertionError, expected): + with tm.assert_raises_regex(AssertionError, expected): assert_index_equal(idx1, idx2, exact=True, check_exact=False) expected = """Index are different @@ -407,7 +406,7 @@ def test_index_equal_message(self): idx1 = pd.Index([1, 2, 3.]) idx2 = pd.Index([1, 2, 3.0000000001]) - with assertRaisesRegexp(AssertionError, expected): + with tm.assert_raises_regex(AssertionError, expected): assert_index_equal(idx1, idx2) # must success @@ -421,9 +420,9 @@ def test_index_equal_message(self): idx1 = pd.Index([1, 2, 3.]) idx2 = pd.Index([1, 2, 3.0001]) - with assertRaisesRegexp(AssertionError, expected): + with tm.assert_raises_regex(AssertionError, expected): assert_index_equal(idx1, idx2) - with assertRaisesRegexp(AssertionError, expected): + with tm.assert_raises_regex(AssertionError, expected): assert_index_equal(idx1, idx2, check_exact=False) # must success assert_index_equal(idx1, idx2, check_exact=False, @@ -437,9 +436,9 @@ def test_index_equal_message(self): idx1 = pd.Index([1, 2, 3]) idx2 = pd.Index([1, 2, 4]) - with assertRaisesRegexp(AssertionError, expected): + with tm.assert_raises_regex(AssertionError, expected): assert_index_equal(idx1, idx2) - with assertRaisesRegexp(AssertionError, expected): + with tm.assert_raises_regex(AssertionError, expected): assert_index_equal(idx1, idx2, check_less_precise=True) expected = """MultiIndex level \\[1\\] are different @@ -452,9 +451,9 @@ def test_index_equal_message(self): ('B', 3), ('B', 4)]) idx2 = pd.MultiIndex.from_tuples([('A', 1), ('A', 2), ('B', 3), ('B', 4)]) - with assertRaisesRegexp(AssertionError, expected): + with tm.assert_raises_regex(AssertionError, expected): assert_index_equal(idx1, idx2) - with assertRaisesRegexp(AssertionError, expected): + with tm.assert_raises_regex(AssertionError, expected): assert_index_equal(idx1, idx2, check_exact=False) def test_index_equal_metadata_message(self): @@ -467,7 +466,7 @@ def test_index_equal_metadata_message(self): idx1 = pd.Index([1, 2, 3]) idx2 = pd.Index([1, 2, 3], name='x') - with assertRaisesRegexp(AssertionError, expected): + with tm.assert_raises_regex(AssertionError, expected): assert_index_equal(idx1, idx2) # same name, should pass @@ -484,7 +483,7 @@ def test_index_equal_metadata_message(self): idx1 = pd.Index([1, 2, 3], name=np.nan) idx2 = pd.Index([1, 2, 3], name=pd.NaT) - with assertRaisesRegexp(AssertionError, expected): + with tm.assert_raises_regex(AssertionError, expected): assert_index_equal(idx1, idx2) @@ -566,7 +565,7 @@ def test_series_equal_message(self): \\[left\\]: 3, RangeIndex\\(start=0, stop=3, step=1\\) \\[right\\]: 4, RangeIndex\\(start=0, stop=4, step=1\\)""" - with assertRaisesRegexp(AssertionError, expected): + with tm.assert_raises_regex(AssertionError, expected): assert_series_equal(pd.Series([1, 2, 3]), pd.Series([1, 2, 3, 4])) expected = """Series are different @@ -575,9 +574,9 @@ def test_series_equal_message(self): \\[left\\]: \\[1, 2, 3\\] \\[right\\]: \\[1, 2, 4\\]""" - with assertRaisesRegexp(AssertionError, expected): + with tm.assert_raises_regex(AssertionError, expected): assert_series_equal(pd.Series([1, 2, 3]), pd.Series([1, 2, 4])) - with assertRaisesRegexp(AssertionError, expected): + with tm.assert_raises_regex(AssertionError, expected): assert_series_equal(pd.Series([1, 2, 3]), pd.Series([1, 2, 4]), check_less_precise=True) @@ -637,7 +636,7 @@ def test_frame_equal_message(self): \\[left\\]: \\(3, 2\\) \\[right\\]: \\(3, 1\\)""" - with assertRaisesRegexp(AssertionError, expected): + with tm.assert_raises_regex(AssertionError, expected): assert_frame_equal(pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}), pd.DataFrame({'A': [1, 2, 3]})) @@ -647,7 +646,7 @@ def test_frame_equal_message(self): \\[left\\]: Index\\(\\[u?'a', u?'b', u?'c'\\], dtype='object'\\) \\[right\\]: Index\\(\\[u?'a', u?'b', u?'d'\\], dtype='object'\\)""" - with assertRaisesRegexp(AssertionError, expected): + with tm.assert_raises_regex(AssertionError, expected): assert_frame_equal(pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}, index=['a', 'b', 'c']), pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}, @@ -659,7 +658,7 @@ def test_frame_equal_message(self): \\[left\\]: Index\\(\\[u?'A', u?'B'\\], dtype='object'\\) \\[right\\]: Index\\(\\[u?'A', u?'b'\\], dtype='object'\\)""" - with assertRaisesRegexp(AssertionError, expected): + with tm.assert_raises_regex(AssertionError, expected): assert_frame_equal(pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}, index=['a', 'b', 'c']), pd.DataFrame({'A': [1, 2, 3], 'b': [4, 5, 6]}, @@ -671,11 +670,11 @@ def test_frame_equal_message(self): \\[left\\]: \\[4, 5, 6\\] \\[right\\]: \\[4, 5, 7\\]""" - with assertRaisesRegexp(AssertionError, expected): + with tm.assert_raises_regex(AssertionError, expected): assert_frame_equal(pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}), pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 7]})) - with assertRaisesRegexp(AssertionError, expected): + with tm.assert_raises_regex(AssertionError, expected): assert_frame_equal(pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}), pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 7]}), by_blocks=True) @@ -693,7 +692,7 @@ def test_categorical_equal_message(self): a = pd.Categorical([1, 2, 3, 4]) b = pd.Categorical([1, 2, 3, 5]) - with assertRaisesRegexp(AssertionError, expected): + with tm.assert_raises_regex(AssertionError, expected): tm.assert_categorical_equal(a, b) expected = """Categorical\\.codes are different @@ -704,7 +703,7 @@ def test_categorical_equal_message(self): a = pd.Categorical([1, 2, 4, 3], categories=[1, 2, 3, 4]) b = pd.Categorical([1, 2, 3, 4], categories=[1, 2, 3, 4]) - with assertRaisesRegexp(AssertionError, expected): + with tm.assert_raises_regex(AssertionError, expected): tm.assert_categorical_equal(a, b) expected = """Categorical are different @@ -715,7 +714,7 @@ def test_categorical_equal_message(self): a = pd.Categorical([1, 2, 3, 4], ordered=False) b = pd.Categorical([1, 2, 3, 4], ordered=True) - with assertRaisesRegexp(AssertionError, expected): + with tm.assert_raises_regex(AssertionError, expected): tm.assert_categorical_equal(a, b) diff --git a/pandas/tests/test_util.py b/pandas/tests/test_util.py index 375463ec25c36..6581e7688a32f 100644 --- a/pandas/tests/test_util.py +++ b/pandas/tests/test_util.py @@ -93,7 +93,7 @@ class TestValidateArgs(tm.TestCase): def test_bad_min_fname_arg_count(self): msg = "'max_fname_arg_count' must be non-negative" - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): validate_args(self.fname, (None,), -1, 'foo') def test_bad_arg_length_max_value_single(self): @@ -108,7 +108,7 @@ def test_bad_arg_length_max_value_single(self): .format(fname=self.fname, max_length=max_length, actual_length=actual_length)) - with tm.assertRaisesRegexp(TypeError, msg): + with tm.assert_raises_regex(TypeError, msg): validate_args(self.fname, args, min_fname_arg_count, compat_args) @@ -125,7 +125,7 @@ def test_bad_arg_length_max_value_multiple(self): .format(fname=self.fname, max_length=max_length, actual_length=actual_length)) - with tm.assertRaisesRegexp(TypeError, msg): + with tm.assert_raises_regex(TypeError, msg): validate_args(self.fname, args, min_fname_arg_count, compat_args) @@ -144,7 +144,7 @@ def test_not_all_defaults(self): arg_vals = (1, -1, 3) for i in range(1, 3): - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): validate_args(self.fname, arg_vals[:i], 2, compat_args) def test_validation(self): @@ -173,7 +173,7 @@ def test_bad_kwarg(self): r"keyword argument '{arg}'".format( fname=self.fname, arg=badarg)) - with tm.assertRaisesRegexp(TypeError, msg): + with tm.assert_raises_regex(TypeError, msg): validate_kwargs(self.fname, kwargs, compat_args) def test_not_all_none(self): @@ -194,7 +194,7 @@ def test_not_all_none(self): kwargs = dict(zip(kwarg_keys[:i], kwarg_vals[:i])) - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): validate_kwargs(self.fname, kwargs, compat_args) def test_validation(self): @@ -213,10 +213,11 @@ def test_validate_bool_kwarg(self): for name in arg_names: for value in invalid_values: - with tm.assertRaisesRegexp(ValueError, - ("For argument \"%s\" expected " - "type bool, received type %s") % - (name, type(value).__name__)): + with tm.assert_raises_regex(ValueError, + "For argument \"%s\" " + "expected type bool, " + "received type %s" % + (name, type(value).__name__)): validate_bool_kwarg(value, name) for value in valid_values: @@ -239,7 +240,7 @@ def test_invalid_total_length_max_length_one(self): .format(fname=self.fname, max_length=max_length, actual_length=actual_length)) - with tm.assertRaisesRegexp(TypeError, msg): + with tm.assert_raises_regex(TypeError, msg): validate_args_and_kwargs(self.fname, args, kwargs, min_fname_arg_count, compat_args) @@ -257,7 +258,7 @@ def test_invalid_total_length_max_length_multiple(self): .format(fname=self.fname, max_length=max_length, actual_length=actual_length)) - with tm.assertRaisesRegexp(TypeError, msg): + with tm.assert_raises_regex(TypeError, msg): validate_args_and_kwargs(self.fname, args, kwargs, min_fname_arg_count, compat_args) @@ -276,17 +277,17 @@ def test_no_args_with_kwargs(self): args = () kwargs = {'foo': -5, bad_arg: 2} - tm.assertRaisesRegexp(ValueError, msg, - validate_args_and_kwargs, - self.fname, args, kwargs, - min_fname_arg_count, compat_args) + tm.assert_raises_regex(ValueError, msg, + validate_args_and_kwargs, + self.fname, args, kwargs, + min_fname_arg_count, compat_args) args = (-5, 2) kwargs = {} - tm.assertRaisesRegexp(ValueError, msg, - validate_args_and_kwargs, - self.fname, args, kwargs, - min_fname_arg_count, compat_args) + tm.assert_raises_regex(ValueError, msg, + validate_args_and_kwargs, + self.fname, args, kwargs, + min_fname_arg_count, compat_args) def test_duplicate_argument(self): min_fname_arg_count = 2 @@ -300,7 +301,7 @@ def test_duplicate_argument(self): msg = (r"{fname}\(\) got multiple values for keyword " r"argument '{arg}'".format(fname=self.fname, arg='foo')) - with tm.assertRaisesRegexp(TypeError, msg): + with tm.assert_raises_regex(TypeError, msg): validate_args_and_kwargs(self.fname, args, kwargs, min_fname_arg_count, compat_args) @@ -327,7 +328,7 @@ def test_cannot_create_instance_of_stolenbuffer(self): ``move_into_mutable_buffer`` which has a bunch of checks in it. """ msg = "cannot create 'pandas.util._move.stolenbuf' instances" - with tm.assertRaisesRegexp(TypeError, msg): + with tm.assert_raises_regex(TypeError, msg): stolenbuf() def test_more_than_one_ref(self): diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index aea2525a3a1f9..adfecc90129e9 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -74,7 +74,7 @@ def test_select_bad_cols(self): pytest.raises(KeyError, g.__getitem__, ['C']) # g[['C']] pytest.raises(KeyError, g.__getitem__, ['A', 'C']) # g[['A', 'C']] - with tm.assertRaisesRegexp(KeyError, '^[^A]+$'): + with tm.assert_raises_regex(KeyError, '^[^A]+$'): # A should not be referenced as a bad column... # will have to rethink regex if you change message! g[['A', 'C']] @@ -352,10 +352,10 @@ def test_numpy_compat(self): msg = "numpy operations are not valid with window objects" for func in ('sum', 'mean'): - tm.assertRaisesRegexp(UnsupportedFunctionCall, msg, - getattr(w, func), 1, 2, 3) - tm.assertRaisesRegexp(UnsupportedFunctionCall, msg, - getattr(w, func), dtype=np.float64) + tm.assert_raises_regex(UnsupportedFunctionCall, msg, + getattr(w, func), 1, 2, 3) + tm.assert_raises_regex(UnsupportedFunctionCall, msg, + getattr(w, func), dtype=np.float64) class TestRolling(Base): @@ -430,10 +430,10 @@ def test_numpy_compat(self): msg = "numpy operations are not valid with window objects" for func in ('std', 'mean', 'sum', 'max', 'min', 'var'): - tm.assertRaisesRegexp(UnsupportedFunctionCall, msg, - getattr(r, func), 1, 2, 3) - tm.assertRaisesRegexp(UnsupportedFunctionCall, msg, - getattr(r, func), dtype=np.float64) + tm.assert_raises_regex(UnsupportedFunctionCall, msg, + getattr(r, func), 1, 2, 3) + tm.assert_raises_regex(UnsupportedFunctionCall, msg, + getattr(r, func), dtype=np.float64) def test_closed(self): df = DataFrame({'A': [0, 1, 2, 3, 4]}) @@ -478,10 +478,10 @@ def test_numpy_compat(self): msg = "numpy operations are not valid with window objects" for func in ('std', 'mean', 'sum', 'max', 'min', 'var'): - tm.assertRaisesRegexp(UnsupportedFunctionCall, msg, - getattr(e, func), 1, 2, 3) - tm.assertRaisesRegexp(UnsupportedFunctionCall, msg, - getattr(e, func), dtype=np.float64) + tm.assert_raises_regex(UnsupportedFunctionCall, msg, + getattr(e, func), 1, 2, 3) + tm.assert_raises_regex(UnsupportedFunctionCall, msg, + getattr(e, func), dtype=np.float64) class TestEWM(Base): @@ -540,10 +540,10 @@ def test_numpy_compat(self): msg = "numpy operations are not valid with window objects" for func in ('std', 'mean', 'var'): - tm.assertRaisesRegexp(UnsupportedFunctionCall, msg, - getattr(e, func), 1, 2, 3) - tm.assertRaisesRegexp(UnsupportedFunctionCall, msg, - getattr(e, func), dtype=np.float64) + tm.assert_raises_regex(UnsupportedFunctionCall, msg, + getattr(e, func), 1, 2, 3) + tm.assert_raises_regex(UnsupportedFunctionCall, msg, + getattr(e, func), dtype=np.float64) class TestDeprecations(Base): @@ -1825,10 +1825,10 @@ def test_no_pairwise_with_other(self, f): tm.assert_index_equal(result.index, expected_index) tm.assert_index_equal(result.columns, expected_columns) else: - tm.assertRaisesRegexp( + tm.assert_raises_regex( ValueError, "'arg1' columns are not unique", f, df, self.df2) - tm.assertRaisesRegexp( + tm.assert_raises_regex( ValueError, "'arg2' columns are not unique", f, self.df2, df) diff --git a/pandas/tests/tools/test_numeric.py b/pandas/tests/tools/test_numeric.py index 1376101412112..290c03af3be4b 100644 --- a/pandas/tests/tools/test_numeric.py +++ b/pandas/tests/tools/test_numeric.py @@ -39,7 +39,7 @@ def test_series_numeric(self): def test_error(self): s = pd.Series([1, -3.14, 'apple']) msg = 'Unable to parse string "apple" at position 2' - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): to_numeric(s, errors='raise') res = to_numeric(s, errors='ignore') @@ -52,13 +52,13 @@ def test_error(self): s = pd.Series(['orange', 1, -3.14, 'apple']) msg = 'Unable to parse string "orange" at position 0' - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): to_numeric(s, errors='raise') def test_error_seen_bool(self): s = pd.Series([True, False, 'apple']) msg = 'Unable to parse string "apple" at position 2' - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): to_numeric(s, errors='raise') res = to_numeric(s, errors='ignore') @@ -149,10 +149,10 @@ def test_all_nan(self): def test_type_check(self): # GH 11776 df = pd.DataFrame({'a': [1, -3.14, 7], 'b': ['4', '5', '6']}) - with tm.assertRaisesRegexp(TypeError, "1-d array"): + with tm.assert_raises_regex(TypeError, "1-d array"): to_numeric(df) for errors in ['ignore', 'raise', 'coerce']: - with tm.assertRaisesRegexp(TypeError, "1-d array"): + with tm.assert_raises_regex(TypeError, "1-d array"): to_numeric(df, errors=errors) def test_scalar(self): @@ -253,7 +253,7 @@ def test_non_hashable(self): res = pd.to_numeric(s, errors='ignore') tm.assert_series_equal(res, pd.Series([[10.0, 2], 1.0, 'apple'])) - with tm.assertRaisesRegexp(TypeError, "Invalid object type"): + with tm.assert_raises_regex(TypeError, "Invalid object type"): pd.to_numeric(s) def test_downcast(self): @@ -274,7 +274,7 @@ def test_downcast(self): smallest_float_dtype = float_32_char for data in (mixed_data, int_data, date_data): - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): pd.to_numeric(data, downcast=invalid_downcast) expected = np.array([1, 2, 3], dtype=np.int64) diff --git a/pandas/tests/tseries/test_frequencies.py b/pandas/tests/tseries/test_frequencies.py index c9de997cdf067..af544d10a737c 100644 --- a/pandas/tests/tseries/test_frequencies.py +++ b/pandas/tests/tseries/test_frequencies.py @@ -101,7 +101,8 @@ def test_to_offset_multiple(self): assert (result == expected) # malformed - with tm.assertRaisesRegexp(ValueError, 'Invalid frequency: 2h20m'): + with tm.assert_raises_regex(ValueError, + 'Invalid frequency: 2h20m'): frequencies.to_offset('2h20m') def test_to_offset_negative(self): @@ -123,17 +124,23 @@ def test_to_offset_negative(self): def test_to_offset_invalid(self): # GH 13930 - with tm.assertRaisesRegexp(ValueError, 'Invalid frequency: U1'): + with tm.assert_raises_regex(ValueError, + 'Invalid frequency: U1'): frequencies.to_offset('U1') - with tm.assertRaisesRegexp(ValueError, 'Invalid frequency: -U'): + with tm.assert_raises_regex(ValueError, + 'Invalid frequency: -U'): frequencies.to_offset('-U') - with tm.assertRaisesRegexp(ValueError, 'Invalid frequency: 3U1'): + with tm.assert_raises_regex(ValueError, + 'Invalid frequency: 3U1'): frequencies.to_offset('3U1') - with tm.assertRaisesRegexp(ValueError, 'Invalid frequency: -2-3U'): + with tm.assert_raises_regex(ValueError, + 'Invalid frequency: -2-3U'): frequencies.to_offset('-2-3U') - with tm.assertRaisesRegexp(ValueError, 'Invalid frequency: -2D:3H'): + with tm.assert_raises_regex(ValueError, + 'Invalid frequency: -2D:3H'): frequencies.to_offset('-2D:3H') - with tm.assertRaisesRegexp(ValueError, 'Invalid frequency: 1.5.0S'): + with tm.assert_raises_regex(ValueError, + 'Invalid frequency: 1.5.0S'): frequencies.to_offset('1.5.0S') # split offsets with spaces are valid @@ -146,10 +153,11 @@ def test_to_offset_invalid(self): # special cases assert frequencies.to_offset('2SMS-15') == offsets.SemiMonthBegin(2) - with tm.assertRaisesRegexp(ValueError, - 'Invalid frequency: 2SMS-15-15'): + with tm.assert_raises_regex(ValueError, + 'Invalid frequency: 2SMS-15-15'): frequencies.to_offset('2SMS-15-15') - with tm.assertRaisesRegexp(ValueError, 'Invalid frequency: 2SMS-15D'): + with tm.assert_raises_regex(ValueError, + 'Invalid frequency: 2SMS-15D'): frequencies.to_offset('2SMS-15D') def test_to_offset_leading_zero(self): @@ -244,7 +252,8 @@ def test_anchored_shortcuts(self): 'SMS-1', 'SMS-28', 'SMS-30', 'SMS-BAR', 'BSMS', 'SMS--2'] for invalid_anchor in invalid_anchors: - with tm.assertRaisesRegexp(ValueError, 'Invalid frequency: '): + with tm.assert_raises_regex(ValueError, + 'Invalid frequency: '): frequencies.to_offset(invalid_anchor) @@ -306,7 +315,7 @@ def _assert_depr(freq, expected, aliases): msg = frequencies._INVALID_FREQ_ERROR for alias in aliases: - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): frequencies._period_str_to_code(alias) _assert_depr("M", 3000, ["MTH", "MONTH", "MONTHLY"]) @@ -780,7 +789,7 @@ def test_series(self): for freq in ['Y']: msg = frequencies._INVALID_FREQ_ERROR - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): s = Series(period_range('2013', periods=10, freq=freq)) pytest.raises(TypeError, lambda: frequencies.infer_freq(s)) @@ -807,10 +816,10 @@ def test_legacy_offset_warnings(self): msg = frequencies._INVALID_FREQ_ERROR for freq in freqs: - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): frequencies.get_offset(freq) - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): date_range('2011-01-01', periods=5, freq=freq) diff --git a/pandas/tests/tseries/test_offsets.py b/pandas/tests/tseries/test_offsets.py index ff38d5ec46a44..cb3fc3b60226f 100644 --- a/pandas/tests/tseries/test_offsets.py +++ b/pandas/tests/tseries/test_offsets.py @@ -35,7 +35,6 @@ from pandas.io.pickle import read_pickle from pandas._libs.tslib import normalize_date, NaT, Timestamp, Timedelta import pandas._libs.tslib as tslib -from pandas.util.testing import assertRaisesRegexp import pandas.util.testing as tm from pandas.tseries.holiday import USFederalHolidayCalendar @@ -2226,7 +2225,8 @@ def test_repr(self): def test_corner(self): pytest.raises(ValueError, Week, weekday=7) - assertRaisesRegexp(ValueError, "Day must be", Week, weekday=-1) + tm.assert_raises_regex( + ValueError, "Day must be", Week, weekday=-1) def test_isAnchored(self): self.assertTrue(Week(weekday=0).isAnchored()) @@ -2291,16 +2291,16 @@ class TestWeekOfMonth(Base): _offset = WeekOfMonth def test_constructor(self): - assertRaisesRegexp(ValueError, "^N cannot be 0", WeekOfMonth, n=0, - week=1, weekday=1) - assertRaisesRegexp(ValueError, "^Week", WeekOfMonth, n=1, week=4, - weekday=0) - assertRaisesRegexp(ValueError, "^Week", WeekOfMonth, n=1, week=-1, - weekday=0) - assertRaisesRegexp(ValueError, "^Day", WeekOfMonth, n=1, week=0, - weekday=-1) - assertRaisesRegexp(ValueError, "^Day", WeekOfMonth, n=1, week=0, - weekday=7) + tm.assert_raises_regex(ValueError, "^N cannot be 0", + WeekOfMonth, n=0, week=1, weekday=1) + tm.assert_raises_regex(ValueError, "^Week", WeekOfMonth, + n=1, week=4, weekday=0) + tm.assert_raises_regex(ValueError, "^Week", WeekOfMonth, + n=1, week=-1, weekday=0) + tm.assert_raises_regex(ValueError, "^Day", WeekOfMonth, + n=1, week=0, weekday=-1) + tm.assert_raises_regex(ValueError, "^Day", WeekOfMonth, + n=1, week=0, weekday=7) def test_repr(self): self.assertEqual(repr(WeekOfMonth(weekday=1, week=2)), @@ -2377,12 +2377,13 @@ class TestLastWeekOfMonth(Base): _offset = LastWeekOfMonth def test_constructor(self): - assertRaisesRegexp(ValueError, "^N cannot be 0", LastWeekOfMonth, n=0, - weekday=1) + tm.assert_raises_regex(ValueError, "^N cannot be 0", + LastWeekOfMonth, n=0, weekday=1) - assertRaisesRegexp(ValueError, "^Day", LastWeekOfMonth, n=1, - weekday=-1) - assertRaisesRegexp(ValueError, "^Day", LastWeekOfMonth, n=1, weekday=7) + tm.assert_raises_regex(ValueError, "^Day", LastWeekOfMonth, n=1, + weekday=-1) + tm.assert_raises_regex( + ValueError, "^Day", LastWeekOfMonth, n=1, weekday=7) def test_offset(self): # Saturday @@ -4567,9 +4568,9 @@ def test_get_offset_name(self): def test_get_offset(): - with tm.assertRaisesRegexp(ValueError, _INVALID_FREQ_ERROR): + with tm.assert_raises_regex(ValueError, _INVALID_FREQ_ERROR): get_offset('gibberish') - with tm.assertRaisesRegexp(ValueError, _INVALID_FREQ_ERROR): + with tm.assert_raises_regex(ValueError, _INVALID_FREQ_ERROR): get_offset('QS-JAN-B') pairs = [ @@ -4597,7 +4598,7 @@ def test_get_offset(): def test_get_offset_legacy(): pairs = [('w@Sat', Week(weekday=5))] for name, expected in pairs: - with tm.assertRaisesRegexp(ValueError, _INVALID_FREQ_ERROR): + with tm.assert_raises_regex(ValueError, _INVALID_FREQ_ERROR): get_offset(name) @@ -4637,7 +4638,7 @@ def test_get_standard_freq(): with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): assert fstr == get_standard_freq(('W', 1)) - with tm.assertRaisesRegexp(ValueError, _INVALID_FREQ_ERROR): + with tm.assert_raises_regex(ValueError, _INVALID_FREQ_ERROR): with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): get_standard_freq('WeEk') @@ -4646,7 +4647,7 @@ def test_get_standard_freq(): with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): assert fstr == get_standard_freq('5q') - with tm.assertRaisesRegexp(ValueError, _INVALID_FREQ_ERROR): + with tm.assert_raises_regex(ValueError, _INVALID_FREQ_ERROR): with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): get_standard_freq('5QuarTer') diff --git a/pandas/tests/tseries/test_timezones.py b/pandas/tests/tseries/test_timezones.py index e3f2c242e3294..807d6866cbf74 100644 --- a/pandas/tests/tseries/test_timezones.py +++ b/pandas/tests/tseries/test_timezones.py @@ -1335,8 +1335,8 @@ def test_series_frame_tz_localize(self): # Can't localize if already tz-aware rng = date_range('1/1/2011', periods=100, freq='H', tz='utc') ts = Series(1, index=rng) - tm.assertRaisesRegexp(TypeError, 'Already tz-aware', ts.tz_localize, - 'US/Eastern') + tm.assert_raises_regex(TypeError, 'Already tz-aware', + ts.tz_localize, 'US/Eastern') def test_series_frame_tz_convert(self): rng = date_range('1/1/2011', periods=200, freq='D', tz='US/Eastern') @@ -1359,8 +1359,8 @@ def test_series_frame_tz_convert(self): # can't convert tz-naive rng = date_range('1/1/2011', periods=200, freq='D') ts = Series(1, index=rng) - tm.assertRaisesRegexp(TypeError, "Cannot convert tz-naive", - ts.tz_convert, 'US/Eastern') + tm.assert_raises_regex(TypeError, "Cannot convert tz-naive", + ts.tz_convert, 'US/Eastern') def test_tz_convert_roundtrip(self): for tz in self.timezones: diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 75d24938ed309..3f07937a6e552 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -2423,7 +2423,8 @@ def stdin_encoding(encoding=None): sys.stdin = _stdin -def assertRaisesRegexp(_exception, _regexp, _callable=None, *args, **kwargs): +def assert_raises_regex(_exception, _regexp, _callable=None, + *args, **kwargs): """ Check that the specified Exception is raised and that the error message matches a given regular expression pattern. This may be a regular @@ -2440,27 +2441,26 @@ def assertRaisesRegexp(_exception, _regexp, _callable=None, *args, **kwargs): Examples -------- - >>> assertRaisesRegexp(ValueError, 'invalid literal for.*XYZ', - ... int, 'XYZ') + >>> assert_raises_regex(ValueError, 'invalid literal for.*XYZ', int, 'XYZ') >>> import re - >>> assertRaisesRegexp(ValueError, re.compile('literal'), int, 'XYZ') + >>> assert_raises_regex(ValueError, re.compile('literal'), int, 'XYZ') If an exception of a different type is raised, it bubbles up. - >>> assertRaisesRegexp(TypeError, 'literal', int, 'XYZ') + >>> assert_raises_regex(TypeError, 'literal', int, 'XYZ') Traceback (most recent call last): ... ValueError: invalid literal for int() with base 10: 'XYZ' >>> dct = dict() - >>> assertRaisesRegexp(KeyError, 'pear', dct.__getitem__, 'apple') + >>> assert_raises_regex(KeyError, 'pear', dct.__getitem__, 'apple') Traceback (most recent call last): ... AssertionError: "pear" does not match "'apple'" You can also use this in a with statement. - >>> with assertRaisesRegexp(TypeError, 'unsupported operand type\(s\)'): + >>> with assert_raises_regex(TypeError, 'unsupported operand type\(s\)'): ... 1 + {} - >>> with assertRaisesRegexp(TypeError, 'banana'): + >>> with assert_raises_regex(TypeError, 'banana'): ... 'apple'[0] = 'b' Traceback (most recent call last): ... @@ -2477,7 +2477,7 @@ def assertRaisesRegexp(_exception, _regexp, _callable=None, *args, **kwargs): class _AssertRaisesContextmanager(object): """ - Context manager behind assertRaisesRegexp. + Context manager behind `assert_raises_regex`. """ def __init__(self, exception, regexp=None): From a8aa2e1304e4c4837c50c95bc036325b49875034 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 26 Apr 2017 11:11:05 -0500 Subject: [PATCH 473/933] DOC: Fix table styling in main docs (#16145) When switching to nbsphinx, I modified the site's CSS so that the converted notebook looks decent. This had some unfortunate changes on tables elsewhere in the notebook. This change fixes the headers to be left-aligned in the main site, and right-aligned for the tables generated by `df.style` in the nbsphinx-converted notebook. xref https://github.com/pandas-dev/pandas/pull/15581/ --- doc/source/themes/nature_with_gtoc/static/nature.css_t | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/doc/source/themes/nature_with_gtoc/static/nature.css_t b/doc/source/themes/nature_with_gtoc/static/nature.css_t index 1adaaf58d79c5..b61068ee28bef 100644 --- a/doc/source/themes/nature_with_gtoc/static/nature.css_t +++ b/doc/source/themes/nature_with_gtoc/static/nature.css_t @@ -315,7 +315,6 @@ thead { vertical-align: bottom; } tr, th, td { - text-align: right; vertical-align: middle; padding: 0.5em 0.5em; line-height: normal; @@ -326,6 +325,9 @@ tr, th, td { th { font-weight: bold; } +th.col_heading { + text-align: right; +} tbody tr:nth-child(odd) { background: #f5f5f5; } From 3b80ed33e00957fec4a79cde19df66d809b5d3a7 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Wed, 26 Apr 2017 12:26:32 -0400 Subject: [PATCH 474/933] MAINT: Remove vestigial assertRaisesRegexp (#16148) Removes remaining assertRaisesRegexp before gh-16119 got merged. --- pandas/tests/dtypes/test_common.py | 2 +- pandas/tests/series/test_constructors.py | 2 +- pandas/tests/test_base.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/dtypes/test_common.py b/pandas/tests/dtypes/test_common.py index c4ef5e48b4db9..2aad1b6baaac0 100644 --- a/pandas/tests/dtypes/test_common.py +++ b/pandas/tests/dtypes/test_common.py @@ -20,7 +20,7 @@ def test_invalid_dtype_error(self): msg = 'not understood' invalid_list = [pd.Timestamp, 'pd.Timestamp', list] for dtype in invalid_list: - with tm.assertRaisesRegexp(TypeError, msg): + with tm.assert_raises_regex(TypeError, msg): pandas_dtype(dtype) valid_list = [object, 'float64', np.object_, np.dtype('object'), 'O', diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index c461556644275..a870667ff3f96 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -35,7 +35,7 @@ def test_invalid_dtype(self): msg = 'not understood' invalid_list = [pd.Timestamp, 'pd.Timestamp', list] for dtype in invalid_list: - with tm.assertRaisesRegexp(TypeError, msg): + with tm.assert_raises_regex(TypeError, msg): Series([], name='time', dtype=dtype) def test_scalar_conversion(self): diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py index 86343e441f49a..e4f39197421a0 100644 --- a/pandas/tests/test_base.py +++ b/pandas/tests/test_base.py @@ -45,7 +45,7 @@ class CheckImmutable(object): mutable_regex = re.compile('does not support mutable operations') def check_mutable_error(self, *args, **kwargs): - # Pass whatever function you normally would to assertRaisesRegexp + # Pass whatever function you normally would to assert_raises_regex # (after the Exception kind). tm.assert_raises_regex( TypeError, self.mutable_regex, *args, **kwargs) From cefc8c0ed8c80346527aeeeebb9bc8a8861dd27b Mon Sep 17 00:00:00 2001 From: gfyoung Date: Wed, 26 Apr 2017 20:42:33 -0400 Subject: [PATCH 475/933] MAINT: Remove self.assertFalse from testing (#16151) --- pandas/tests/computation/test_eval.py | 4 +- pandas/tests/dtypes/test_dtypes.py | 120 +++++----- pandas/tests/dtypes/test_inference.py | 224 +++++++++--------- pandas/tests/dtypes/test_missing.py | 16 +- pandas/tests/frame/test_analytics.py | 18 +- pandas/tests/frame/test_api.py | 12 +- .../tests/frame/test_axis_select_reindex.py | 4 +- pandas/tests/frame/test_block_internals.py | 6 +- pandas/tests/frame/test_constructors.py | 8 +- pandas/tests/frame/test_indexing.py | 5 +- pandas/tests/frame/test_mutate_columns.py | 2 +- pandas/tests/frame/test_operators.py | 22 +- pandas/tests/frame/test_repr_info.py | 16 +- pandas/tests/frame/test_timeseries.py | 2 +- pandas/tests/groupby/test_groupby.py | 24 +- pandas/tests/indexes/common.py | 18 +- pandas/tests/indexes/datetimelike.py | 2 +- pandas/tests/indexes/datetimes/test_astype.py | 2 +- .../indexes/datetimes/test_construction.py | 2 +- .../tests/indexes/datetimes/test_datetime.py | 4 +- pandas/tests/indexes/datetimes/test_misc.py | 2 +- pandas/tests/indexes/datetimes/test_ops.py | 44 ++-- pandas/tests/indexes/datetimes/test_tools.py | 3 +- pandas/tests/indexes/period/test_ops.py | 32 +-- pandas/tests/indexes/period/test_period.py | 18 +- pandas/tests/indexes/test_base.py | 54 ++--- pandas/tests/indexes/test_category.py | 30 +-- pandas/tests/indexes/test_interval.py | 23 +- pandas/tests/indexes/test_multi.py | 57 +++-- pandas/tests/indexes/test_numeric.py | 16 +- pandas/tests/indexes/test_range.py | 18 +- .../tests/indexes/timedeltas/test_astype.py | 2 +- pandas/tests/indexes/timedeltas/test_ops.py | 22 +- .../indexes/timedeltas/test_timedelta.py | 4 +- pandas/tests/indexing/test_floats.py | 2 +- pandas/tests/indexing/test_indexing.py | 2 +- pandas/tests/indexing/test_loc.py | 4 +- pandas/tests/indexing/test_multiindex.py | 6 +- pandas/tests/io/formats/test_format.py | 58 ++--- pandas/tests/io/formats/test_to_html.py | 2 +- pandas/tests/io/json/test_pandas.py | 2 +- pandas/tests/io/parser/common.py | 8 +- pandas/tests/io/parser/test_network.py | 20 +- pandas/tests/io/test_common.py | 2 +- pandas/tests/io/test_html.py | 6 +- pandas/tests/io/test_pytables.py | 16 +- pandas/tests/io/test_s3.py | 2 +- pandas/tests/io/test_sql.py | 13 +- pandas/tests/plotting/common.py | 4 +- pandas/tests/plotting/test_datetimelike.py | 24 +- pandas/tests/plotting/test_frame.py | 2 +- pandas/tests/plotting/test_hist_method.py | 10 +- pandas/tests/plotting/test_series.py | 8 +- pandas/tests/reshape/test_hashing.py | 6 +- pandas/tests/reshape/test_merge.py | 4 +- pandas/tests/reshape/test_merge_asof.py | 6 +- pandas/tests/reshape/test_pivot.py | 2 +- pandas/tests/scalar/test_period.py | 4 +- pandas/tests/scalar/test_timedelta.py | 6 +- pandas/tests/scalar/test_timestamp.py | 50 ++-- pandas/tests/series/test_analytics.py | 12 +- pandas/tests/series/test_api.py | 4 +- pandas/tests/series/test_asof.py | 2 +- pandas/tests/series/test_constructors.py | 8 +- pandas/tests/series/test_datetime_values.py | 2 +- pandas/tests/series/test_indexing.py | 20 +- pandas/tests/series/test_missing.py | 8 +- pandas/tests/series/test_operators.py | 2 +- pandas/tests/series/test_repr.py | 6 +- pandas/tests/sparse/test_array.py | 8 +- pandas/tests/sparse/test_libsparse.py | 4 +- pandas/tests/test_base.py | 26 +- pandas/tests/test_categorical.py | 62 ++--- pandas/tests/test_config.py | 3 +- pandas/tests/test_expressions.py | 10 +- pandas/tests/test_lib.py | 2 +- pandas/tests/test_multilevel.py | 12 +- pandas/tests/test_nanops.py | 8 +- pandas/tests/test_panel.py | 4 +- pandas/tests/test_panel4d.py | 6 +- pandas/tests/test_resample.py | 4 +- pandas/tests/test_strings.py | 4 +- pandas/tests/test_window.py | 28 +-- pandas/tests/tseries/test_offsets.py | 67 +++--- pandas/tests/tseries/test_timezones.py | 10 +- 85 files changed, 704 insertions(+), 723 deletions(-) diff --git a/pandas/tests/computation/test_eval.py b/pandas/tests/computation/test_eval.py index cc14282934f16..52061f7f1e0ae 100644 --- a/pandas/tests/computation/test_eval.py +++ b/pandas/tests/computation/test_eval.py @@ -1443,7 +1443,7 @@ def test_simple_in_ops(self): res = pd.eval('3 in (1, 2)', engine=self.engine, parser=self.parser) - self.assertFalse(res) + assert not res res = pd.eval('3 not in (1, 2)', engine=self.engine, parser=self.parser) @@ -1467,7 +1467,7 @@ def test_simple_in_ops(self): res = pd.eval('(3,) not in [(3,), 2]', engine=self.engine, parser=self.parser) - self.assertFalse(res) + assert not res res = pd.eval('[(3,)] in [[(3,)], 2]', engine=self.engine, parser=self.parser) diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py index e3bae3675a9e4..718efc08394b1 100644 --- a/pandas/tests/dtypes/test_dtypes.py +++ b/pandas/tests/dtypes/test_dtypes.py @@ -59,7 +59,7 @@ def test_hash_vs_equality(self): def test_equality(self): self.assertTrue(is_dtype_equal(self.dtype, 'category')) self.assertTrue(is_dtype_equal(self.dtype, CategoricalDtype())) - self.assertFalse(is_dtype_equal(self.dtype, 'foo')) + assert not is_dtype_equal(self.dtype, 'foo') def test_construction_from_string(self): result = CategoricalDtype.construct_from_string('category') @@ -71,8 +71,8 @@ def test_is_dtype(self): self.assertTrue(CategoricalDtype.is_dtype(self.dtype)) self.assertTrue(CategoricalDtype.is_dtype('category')) self.assertTrue(CategoricalDtype.is_dtype(CategoricalDtype())) - self.assertFalse(CategoricalDtype.is_dtype('foo')) - self.assertFalse(CategoricalDtype.is_dtype(np.float64)) + assert not CategoricalDtype.is_dtype('foo') + assert not CategoricalDtype.is_dtype(np.float64) def test_basic(self): @@ -85,12 +85,12 @@ def test_basic(self): # dtypes self.assertTrue(is_categorical_dtype(s.dtype)) self.assertTrue(is_categorical_dtype(s)) - self.assertFalse(is_categorical_dtype(np.dtype('float64'))) + assert not is_categorical_dtype(np.dtype('float64')) self.assertTrue(is_categorical(s.dtype)) self.assertTrue(is_categorical(s)) - self.assertFalse(is_categorical(np.dtype('float64'))) - self.assertFalse(is_categorical(1.0)) + assert not is_categorical(np.dtype('float64')) + assert not is_categorical(1.0) class TestDatetimeTZDtype(Base, tm.TestCase): @@ -136,8 +136,8 @@ def test_compat(self): self.assertTrue(is_datetime64_any_dtype('datetime64[ns, US/Eastern]')) self.assertTrue(is_datetime64_ns_dtype(self.dtype)) self.assertTrue(is_datetime64_ns_dtype('datetime64[ns, US/Eastern]')) - self.assertFalse(is_datetime64_dtype(self.dtype)) - self.assertFalse(is_datetime64_dtype('datetime64[ns, US/Eastern]')) + assert not is_datetime64_dtype(self.dtype) + assert not is_datetime64_dtype('datetime64[ns, US/Eastern]') def test_construction_from_string(self): result = DatetimeTZDtype('datetime64[ns, US/Eastern]') @@ -149,25 +149,23 @@ def test_construction_from_string(self): lambda: DatetimeTZDtype.construct_from_string('foo')) def test_is_dtype(self): - self.assertFalse(DatetimeTZDtype.is_dtype(None)) + assert not DatetimeTZDtype.is_dtype(None) self.assertTrue(DatetimeTZDtype.is_dtype(self.dtype)) self.assertTrue(DatetimeTZDtype.is_dtype('datetime64[ns, US/Eastern]')) - self.assertFalse(DatetimeTZDtype.is_dtype('foo')) + assert not DatetimeTZDtype.is_dtype('foo') self.assertTrue(DatetimeTZDtype.is_dtype(DatetimeTZDtype( 'ns', 'US/Pacific'))) - self.assertFalse(DatetimeTZDtype.is_dtype(np.float64)) + assert not DatetimeTZDtype.is_dtype(np.float64) def test_equality(self): self.assertTrue(is_dtype_equal(self.dtype, 'datetime64[ns, US/Eastern]')) self.assertTrue(is_dtype_equal(self.dtype, DatetimeTZDtype( 'ns', 'US/Eastern'))) - self.assertFalse(is_dtype_equal(self.dtype, 'foo')) - self.assertFalse(is_dtype_equal(self.dtype, DatetimeTZDtype('ns', - 'CET'))) - self.assertFalse(is_dtype_equal( - DatetimeTZDtype('ns', 'US/Eastern'), DatetimeTZDtype( - 'ns', 'US/Pacific'))) + assert not is_dtype_equal(self.dtype, 'foo') + assert not is_dtype_equal(self.dtype, DatetimeTZDtype('ns', 'CET')) + assert not is_dtype_equal(DatetimeTZDtype('ns', 'US/Eastern'), + DatetimeTZDtype('ns', 'US/Pacific')) # numpy compat self.assertTrue(is_dtype_equal(np.dtype("M8[ns]"), "datetime64[ns]")) @@ -182,13 +180,13 @@ def test_basic(self): # dtypes self.assertTrue(is_datetime64tz_dtype(s.dtype)) self.assertTrue(is_datetime64tz_dtype(s)) - self.assertFalse(is_datetime64tz_dtype(np.dtype('float64'))) - self.assertFalse(is_datetime64tz_dtype(1.0)) + assert not is_datetime64tz_dtype(np.dtype('float64')) + assert not is_datetime64tz_dtype(1.0) self.assertTrue(is_datetimetz(s)) self.assertTrue(is_datetimetz(s.dtype)) - self.assertFalse(is_datetimetz(np.dtype('float64'))) - self.assertFalse(is_datetimetz(1.0)) + assert not is_datetimetz(np.dtype('float64')) + assert not is_datetimetz(1.0) def test_dst(self): @@ -265,10 +263,10 @@ def test_coerce_to_dtype(self): PeriodDtype('period[3M]')) def test_compat(self): - self.assertFalse(is_datetime64_ns_dtype(self.dtype)) - self.assertFalse(is_datetime64_ns_dtype('period[D]')) - self.assertFalse(is_datetime64_dtype(self.dtype)) - self.assertFalse(is_datetime64_dtype('period[D]')) + assert not is_datetime64_ns_dtype(self.dtype) + assert not is_datetime64_ns_dtype('period[D]') + assert not is_datetime64_dtype(self.dtype) + assert not is_datetime64_dtype('period[D]') def test_construction_from_string(self): result = PeriodDtype('period[D]') @@ -297,14 +295,14 @@ def test_is_dtype(self): self.assertTrue(PeriodDtype.is_dtype(PeriodDtype('U'))) self.assertTrue(PeriodDtype.is_dtype(PeriodDtype('S'))) - self.assertFalse(PeriodDtype.is_dtype('D')) - self.assertFalse(PeriodDtype.is_dtype('3D')) - self.assertFalse(PeriodDtype.is_dtype('U')) - self.assertFalse(PeriodDtype.is_dtype('S')) - self.assertFalse(PeriodDtype.is_dtype('foo')) - self.assertFalse(PeriodDtype.is_dtype(np.object_)) - self.assertFalse(PeriodDtype.is_dtype(np.int64)) - self.assertFalse(PeriodDtype.is_dtype(np.float64)) + assert not PeriodDtype.is_dtype('D') + assert not PeriodDtype.is_dtype('3D') + assert not PeriodDtype.is_dtype('U') + assert not PeriodDtype.is_dtype('S') + assert not PeriodDtype.is_dtype('foo') + assert not PeriodDtype.is_dtype(np.object_) + assert not PeriodDtype.is_dtype(np.int64) + assert not PeriodDtype.is_dtype(np.float64) def test_equality(self): self.assertTrue(is_dtype_equal(self.dtype, 'period[D]')) @@ -312,8 +310,8 @@ def test_equality(self): self.assertTrue(is_dtype_equal(self.dtype, PeriodDtype('D'))) self.assertTrue(is_dtype_equal(PeriodDtype('D'), PeriodDtype('D'))) - self.assertFalse(is_dtype_equal(self.dtype, 'D')) - self.assertFalse(is_dtype_equal(PeriodDtype('D'), PeriodDtype('2D'))) + assert not is_dtype_equal(self.dtype, 'D') + assert not is_dtype_equal(PeriodDtype('D'), PeriodDtype('2D')) def test_basic(self): self.assertTrue(is_period_dtype(self.dtype)) @@ -328,14 +326,14 @@ def test_basic(self): # dtypes # series results in object dtype currently, # is_period checks period_arraylike - self.assertFalse(is_period_dtype(s.dtype)) - self.assertFalse(is_period_dtype(s)) + assert not is_period_dtype(s.dtype) + assert not is_period_dtype(s) self.assertTrue(is_period(s)) - self.assertFalse(is_period_dtype(np.dtype('float64'))) - self.assertFalse(is_period_dtype(1.0)) - self.assertFalse(is_period(np.dtype('float64'))) - self.assertFalse(is_period(1.0)) + assert not is_period_dtype(np.dtype('float64')) + assert not is_period_dtype(1.0) + assert not is_period(np.dtype('float64')) + assert not is_period(1.0) def test_empty(self): dt = PeriodDtype() @@ -344,7 +342,7 @@ def test_empty(self): def test_not_string(self): # though PeriodDtype has object kind, it cannot be string - self.assertFalse(is_string_dtype(PeriodDtype('D'))) + assert not is_string_dtype(PeriodDtype('D')) class TestIntervalDtype(Base, tm.TestCase): @@ -388,14 +386,14 @@ def test_is_dtype(self): self.assertTrue(IntervalDtype.is_dtype(IntervalDtype('int64'))) self.assertTrue(IntervalDtype.is_dtype(IntervalDtype(np.int64))) - self.assertFalse(IntervalDtype.is_dtype('D')) - self.assertFalse(IntervalDtype.is_dtype('3D')) - self.assertFalse(IntervalDtype.is_dtype('U')) - self.assertFalse(IntervalDtype.is_dtype('S')) - self.assertFalse(IntervalDtype.is_dtype('foo')) - self.assertFalse(IntervalDtype.is_dtype(np.object_)) - self.assertFalse(IntervalDtype.is_dtype(np.int64)) - self.assertFalse(IntervalDtype.is_dtype(np.float64)) + assert not IntervalDtype.is_dtype('D') + assert not IntervalDtype.is_dtype('3D') + assert not IntervalDtype.is_dtype('U') + assert not IntervalDtype.is_dtype('S') + assert not IntervalDtype.is_dtype('foo') + assert not IntervalDtype.is_dtype(np.object_) + assert not IntervalDtype.is_dtype(np.int64) + assert not IntervalDtype.is_dtype(np.float64) def test_identity(self): self.assertEqual(IntervalDtype('interval[int64]'), @@ -424,9 +422,9 @@ def test_equality(self): self.assertTrue(is_dtype_equal(IntervalDtype('int64'), IntervalDtype('int64'))) - self.assertFalse(is_dtype_equal(self.dtype, 'int64')) - self.assertFalse(is_dtype_equal(IntervalDtype('int64'), - IntervalDtype('float64'))) + assert not is_dtype_equal(self.dtype, 'int64') + assert not is_dtype_equal(IntervalDtype('int64'), + IntervalDtype('float64')) def test_basic(self): self.assertTrue(is_interval_dtype(self.dtype)) @@ -440,8 +438,8 @@ def test_basic(self): # dtypes # series results in object dtype currently, - self.assertFalse(is_interval_dtype(s.dtype)) - self.assertFalse(is_interval_dtype(s)) + assert not is_interval_dtype(s.dtype) + assert not is_interval_dtype(s) def test_basic_dtype(self): self.assertTrue(is_interval_dtype('interval[int64]')) @@ -450,9 +448,9 @@ def test_basic_dtype(self): (IntervalIndex.from_breaks(np.arange(4)))) self.assertTrue(is_interval_dtype( IntervalIndex.from_breaks(date_range('20130101', periods=3)))) - self.assertFalse(is_interval_dtype('U')) - self.assertFalse(is_interval_dtype('S')) - self.assertFalse(is_interval_dtype('foo')) - self.assertFalse(is_interval_dtype(np.object_)) - self.assertFalse(is_interval_dtype(np.int64)) - self.assertFalse(is_interval_dtype(np.float64)) + assert not is_interval_dtype('U') + assert not is_interval_dtype('S') + assert not is_interval_dtype('foo') + assert not is_interval_dtype(np.object_) + assert not is_interval_dtype(np.int64) + assert not is_interval_dtype(np.float64) diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index 35720b32d756c..8dcf75e8a1aec 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -213,15 +213,15 @@ def test_isinf_scalar(self): # GH 11352 self.assertTrue(lib.isposinf_scalar(float('inf'))) self.assertTrue(lib.isposinf_scalar(np.inf)) - self.assertFalse(lib.isposinf_scalar(-np.inf)) - self.assertFalse(lib.isposinf_scalar(1)) - self.assertFalse(lib.isposinf_scalar('a')) + assert not lib.isposinf_scalar(-np.inf) + assert not lib.isposinf_scalar(1) + assert not lib.isposinf_scalar('a') self.assertTrue(lib.isneginf_scalar(float('-inf'))) self.assertTrue(lib.isneginf_scalar(-np.inf)) - self.assertFalse(lib.isneginf_scalar(np.inf)) - self.assertFalse(lib.isneginf_scalar(1)) - self.assertFalse(lib.isneginf_scalar('a')) + assert not lib.isneginf_scalar(np.inf) + assert not lib.isneginf_scalar(1) + assert not lib.isneginf_scalar('a') def test_maybe_convert_numeric_infinities(self): # see gh-13274 @@ -639,24 +639,24 @@ def test_is_datetimelike_array_all_nan_nat_like(self): arr = np.array([np.nan, pd.NaT, np.datetime64('nat')]) self.assertTrue(lib.is_datetime_array(arr)) self.assertTrue(lib.is_datetime64_array(arr)) - self.assertFalse(lib.is_timedelta_array(arr)) - self.assertFalse(lib.is_timedelta64_array(arr)) - self.assertFalse(lib.is_timedelta_or_timedelta64_array(arr)) + assert not lib.is_timedelta_array(arr) + assert not lib.is_timedelta64_array(arr) + assert not lib.is_timedelta_or_timedelta64_array(arr) arr = np.array([np.nan, pd.NaT, np.timedelta64('nat')]) - self.assertFalse(lib.is_datetime_array(arr)) - self.assertFalse(lib.is_datetime64_array(arr)) + assert not lib.is_datetime_array(arr) + assert not lib.is_datetime64_array(arr) self.assertTrue(lib.is_timedelta_array(arr)) self.assertTrue(lib.is_timedelta64_array(arr)) self.assertTrue(lib.is_timedelta_or_timedelta64_array(arr)) arr = np.array([np.nan, pd.NaT, np.datetime64('nat'), np.timedelta64('nat')]) - self.assertFalse(lib.is_datetime_array(arr)) - self.assertFalse(lib.is_datetime64_array(arr)) - self.assertFalse(lib.is_timedelta_array(arr)) - self.assertFalse(lib.is_timedelta64_array(arr)) - self.assertFalse(lib.is_timedelta_or_timedelta64_array(arr)) + assert not lib.is_datetime_array(arr) + assert not lib.is_datetime64_array(arr) + assert not lib.is_timedelta_array(arr) + assert not lib.is_timedelta64_array(arr) + assert not lib.is_timedelta_or_timedelta64_array(arr) arr = np.array([np.nan, pd.NaT]) self.assertTrue(lib.is_datetime_array(arr)) @@ -666,11 +666,11 @@ def test_is_datetimelike_array_all_nan_nat_like(self): self.assertTrue(lib.is_timedelta_or_timedelta64_array(arr)) arr = np.array([np.nan, np.nan], dtype=object) - self.assertFalse(lib.is_datetime_array(arr)) - self.assertFalse(lib.is_datetime64_array(arr)) - self.assertFalse(lib.is_timedelta_array(arr)) - self.assertFalse(lib.is_timedelta64_array(arr)) - self.assertFalse(lib.is_timedelta_or_timedelta64_array(arr)) + assert not lib.is_datetime_array(arr) + assert not lib.is_datetime64_array(arr) + assert not lib.is_timedelta_array(arr) + assert not lib.is_timedelta64_array(arr) + assert not lib.is_timedelta_or_timedelta64_array(arr) def test_date(self): @@ -720,10 +720,10 @@ def test_to_object_array_width(self): def test_is_period(self): self.assertTrue(lib.is_period(pd.Period('2011-01', freq='M'))) - self.assertFalse(lib.is_period(pd.PeriodIndex(['2011-01'], freq='M'))) - self.assertFalse(lib.is_period(pd.Timestamp('2011-01'))) - self.assertFalse(lib.is_period(1)) - self.assertFalse(lib.is_period(np.nan)) + assert not lib.is_period(pd.PeriodIndex(['2011-01'], freq='M')) + assert not lib.is_period(pd.Timestamp('2011-01')) + assert not lib.is_period(1) + assert not lib.is_period(np.nan) def test_categorical(self): @@ -758,18 +758,17 @@ def test_is_number(self): self.assertTrue(is_number(np.complex128(1 + 3j))) self.assertTrue(is_number(np.nan)) - self.assertFalse(is_number(None)) - self.assertFalse(is_number('x')) - self.assertFalse(is_number(datetime(2011, 1, 1))) - self.assertFalse(is_number(np.datetime64('2011-01-01'))) - self.assertFalse(is_number(Timestamp('2011-01-01'))) - self.assertFalse(is_number(Timestamp('2011-01-01', - tz='US/Eastern'))) - self.assertFalse(is_number(timedelta(1000))) - self.assertFalse(is_number(Timedelta('1 days'))) + assert not is_number(None) + assert not is_number('x') + assert not is_number(datetime(2011, 1, 1)) + assert not is_number(np.datetime64('2011-01-01')) + assert not is_number(Timestamp('2011-01-01')) + assert not is_number(Timestamp('2011-01-01', tz='US/Eastern')) + assert not is_number(timedelta(1000)) + assert not is_number(Timedelta('1 days')) # questionable - self.assertFalse(is_number(np.bool_(False))) + assert not is_number(np.bool_(False)) self.assertTrue(is_number(np.timedelta64(1, 'D'))) def test_is_bool(self): @@ -777,45 +776,43 @@ def test_is_bool(self): self.assertTrue(is_bool(np.bool(False))) self.assertTrue(is_bool(np.bool_(False))) - self.assertFalse(is_bool(1)) - self.assertFalse(is_bool(1.1)) - self.assertFalse(is_bool(1 + 3j)) - self.assertFalse(is_bool(np.int64(1))) - self.assertFalse(is_bool(np.float64(1.1))) - self.assertFalse(is_bool(np.complex128(1 + 3j))) - self.assertFalse(is_bool(np.nan)) - self.assertFalse(is_bool(None)) - self.assertFalse(is_bool('x')) - self.assertFalse(is_bool(datetime(2011, 1, 1))) - self.assertFalse(is_bool(np.datetime64('2011-01-01'))) - self.assertFalse(is_bool(Timestamp('2011-01-01'))) - self.assertFalse(is_bool(Timestamp('2011-01-01', - tz='US/Eastern'))) - self.assertFalse(is_bool(timedelta(1000))) - self.assertFalse(is_bool(np.timedelta64(1, 'D'))) - self.assertFalse(is_bool(Timedelta('1 days'))) + assert not is_bool(1) + assert not is_bool(1.1) + assert not is_bool(1 + 3j) + assert not is_bool(np.int64(1)) + assert not is_bool(np.float64(1.1)) + assert not is_bool(np.complex128(1 + 3j)) + assert not is_bool(np.nan) + assert not is_bool(None) + assert not is_bool('x') + assert not is_bool(datetime(2011, 1, 1)) + assert not is_bool(np.datetime64('2011-01-01')) + assert not is_bool(Timestamp('2011-01-01')) + assert not is_bool(Timestamp('2011-01-01', tz='US/Eastern')) + assert not is_bool(timedelta(1000)) + assert not is_bool(np.timedelta64(1, 'D')) + assert not is_bool(Timedelta('1 days')) def test_is_integer(self): self.assertTrue(is_integer(1)) self.assertTrue(is_integer(np.int64(1))) - self.assertFalse(is_integer(True)) - self.assertFalse(is_integer(1.1)) - self.assertFalse(is_integer(1 + 3j)) - self.assertFalse(is_integer(np.bool(False))) - self.assertFalse(is_integer(np.bool_(False))) - self.assertFalse(is_integer(np.float64(1.1))) - self.assertFalse(is_integer(np.complex128(1 + 3j))) - self.assertFalse(is_integer(np.nan)) - self.assertFalse(is_integer(None)) - self.assertFalse(is_integer('x')) - self.assertFalse(is_integer(datetime(2011, 1, 1))) - self.assertFalse(is_integer(np.datetime64('2011-01-01'))) - self.assertFalse(is_integer(Timestamp('2011-01-01'))) - self.assertFalse(is_integer(Timestamp('2011-01-01', - tz='US/Eastern'))) - self.assertFalse(is_integer(timedelta(1000))) - self.assertFalse(is_integer(Timedelta('1 days'))) + assert not is_integer(True) + assert not is_integer(1.1) + assert not is_integer(1 + 3j) + assert not is_integer(np.bool(False)) + assert not is_integer(np.bool_(False)) + assert not is_integer(np.float64(1.1)) + assert not is_integer(np.complex128(1 + 3j)) + assert not is_integer(np.nan) + assert not is_integer(None) + assert not is_integer('x') + assert not is_integer(datetime(2011, 1, 1)) + assert not is_integer(np.datetime64('2011-01-01')) + assert not is_integer(Timestamp('2011-01-01')) + assert not is_integer(Timestamp('2011-01-01', tz='US/Eastern')) + assert not is_integer(timedelta(1000)) + assert not is_integer(Timedelta('1 days')) # questionable self.assertTrue(is_integer(np.timedelta64(1, 'D'))) @@ -825,23 +822,22 @@ def test_is_float(self): self.assertTrue(is_float(np.float64(1.1))) self.assertTrue(is_float(np.nan)) - self.assertFalse(is_float(True)) - self.assertFalse(is_float(1)) - self.assertFalse(is_float(1 + 3j)) - self.assertFalse(is_float(np.bool(False))) - self.assertFalse(is_float(np.bool_(False))) - self.assertFalse(is_float(np.int64(1))) - self.assertFalse(is_float(np.complex128(1 + 3j))) - self.assertFalse(is_float(None)) - self.assertFalse(is_float('x')) - self.assertFalse(is_float(datetime(2011, 1, 1))) - self.assertFalse(is_float(np.datetime64('2011-01-01'))) - self.assertFalse(is_float(Timestamp('2011-01-01'))) - self.assertFalse(is_float(Timestamp('2011-01-01', - tz='US/Eastern'))) - self.assertFalse(is_float(timedelta(1000))) - self.assertFalse(is_float(np.timedelta64(1, 'D'))) - self.assertFalse(is_float(Timedelta('1 days'))) + assert not is_float(True) + assert not is_float(1) + assert not is_float(1 + 3j) + assert not is_float(np.bool(False)) + assert not is_float(np.bool_(False)) + assert not is_float(np.int64(1)) + assert not is_float(np.complex128(1 + 3j)) + assert not is_float(None) + assert not is_float('x') + assert not is_float(datetime(2011, 1, 1)) + assert not is_float(np.datetime64('2011-01-01')) + assert not is_float(Timestamp('2011-01-01')) + assert not is_float(Timestamp('2011-01-01', tz='US/Eastern')) + assert not is_float(timedelta(1000)) + assert not is_float(np.timedelta64(1, 'D')) + assert not is_float(Timedelta('1 days')) def test_is_datetime_dtypes(self): @@ -851,9 +847,9 @@ def test_is_datetime_dtypes(self): self.assertTrue(is_datetime64_dtype('datetime64')) self.assertTrue(is_datetime64_dtype('datetime64[ns]')) self.assertTrue(is_datetime64_dtype(ts)) - self.assertFalse(is_datetime64_dtype(tsa)) + assert not is_datetime64_dtype(tsa) - self.assertFalse(is_datetime64_ns_dtype('datetime64')) + assert not is_datetime64_ns_dtype('datetime64') self.assertTrue(is_datetime64_ns_dtype('datetime64[ns]')) self.assertTrue(is_datetime64_ns_dtype(ts)) self.assertTrue(is_datetime64_ns_dtype(tsa)) @@ -863,14 +859,14 @@ def test_is_datetime_dtypes(self): self.assertTrue(is_datetime64_any_dtype(ts)) self.assertTrue(is_datetime64_any_dtype(tsa)) - self.assertFalse(is_datetime64tz_dtype('datetime64')) - self.assertFalse(is_datetime64tz_dtype('datetime64[ns]')) - self.assertFalse(is_datetime64tz_dtype(ts)) + assert not is_datetime64tz_dtype('datetime64') + assert not is_datetime64tz_dtype('datetime64[ns]') + assert not is_datetime64tz_dtype(ts) self.assertTrue(is_datetime64tz_dtype(tsa)) for tz in ['US/Eastern', 'UTC']: dtype = 'datetime64[ns, {}]'.format(tz) - self.assertFalse(is_datetime64_dtype(dtype)) + assert not is_datetime64_dtype(dtype) self.assertTrue(is_datetime64tz_dtype(dtype)) self.assertTrue(is_datetime64_ns_dtype(dtype)) self.assertTrue(is_datetime64_any_dtype(dtype)) @@ -878,7 +874,7 @@ def test_is_datetime_dtypes(self): def test_is_timedelta(self): self.assertTrue(is_timedelta64_dtype('timedelta64')) self.assertTrue(is_timedelta64_dtype('timedelta64[ns]')) - self.assertFalse(is_timedelta64_ns_dtype('timedelta64')) + assert not is_timedelta64_ns_dtype('timedelta64') self.assertTrue(is_timedelta64_ns_dtype('timedelta64[ns]')) tdi = TimedeltaIndex([1e14, 2e14], dtype='timedelta64') @@ -887,8 +883,8 @@ def test_is_timedelta(self): self.assertTrue(is_timedelta64_ns_dtype(tdi.astype('timedelta64[ns]'))) # Conversion to Int64Index: - self.assertFalse(is_timedelta64_ns_dtype(tdi.astype('timedelta64'))) - self.assertFalse(is_timedelta64_ns_dtype(tdi.astype('timedelta64[h]'))) + assert not is_timedelta64_ns_dtype(tdi.astype('timedelta64')) + assert not is_timedelta64_ns_dtype(tdi.astype('timedelta64[h]')) class Testisscalar(tm.TestCase): @@ -909,13 +905,13 @@ def test_isscalar_builtin_scalars(self): self.assertTrue(is_scalar(pd.NaT)) def test_isscalar_builtin_nonscalars(self): - self.assertFalse(is_scalar({})) - self.assertFalse(is_scalar([])) - self.assertFalse(is_scalar([1])) - self.assertFalse(is_scalar(())) - self.assertFalse(is_scalar((1, ))) - self.assertFalse(is_scalar(slice(None))) - self.assertFalse(is_scalar(Ellipsis)) + assert not is_scalar({}) + assert not is_scalar([]) + assert not is_scalar([1]) + assert not is_scalar(()) + assert not is_scalar((1, )) + assert not is_scalar(slice(None)) + assert not is_scalar(Ellipsis) def test_isscalar_numpy_array_scalars(self): self.assertTrue(is_scalar(np.int64(1))) @@ -933,13 +929,13 @@ def test_isscalar_numpy_zerodim_arrays(self): np.array(np.datetime64('2014-01-01')), np.array(np.timedelta64(1, 'h')), np.array(np.datetime64('NaT'))]: - self.assertFalse(is_scalar(zerodim)) + assert not is_scalar(zerodim) self.assertTrue(is_scalar(lib.item_from_zerodim(zerodim))) def test_isscalar_numpy_arrays(self): - self.assertFalse(is_scalar(np.array([]))) - self.assertFalse(is_scalar(np.array([[]]))) - self.assertFalse(is_scalar(np.matrix('1; 2'))) + assert not is_scalar(np.array([])) + assert not is_scalar(np.array([[]])) + assert not is_scalar(np.matrix('1; 2')) def test_isscalar_pandas_scalars(self): self.assertTrue(is_scalar(Timestamp('2014-01-01'))) @@ -947,15 +943,15 @@ def test_isscalar_pandas_scalars(self): self.assertTrue(is_scalar(Period('2014-01-01'))) def test_lisscalar_pandas_containers(self): - self.assertFalse(is_scalar(Series())) - self.assertFalse(is_scalar(Series([1]))) - self.assertFalse(is_scalar(DataFrame())) - self.assertFalse(is_scalar(DataFrame([[1]]))) + assert not is_scalar(Series()) + assert not is_scalar(Series([1])) + assert not is_scalar(DataFrame()) + assert not is_scalar(DataFrame([[1]])) with catch_warnings(record=True): - self.assertFalse(is_scalar(Panel())) - self.assertFalse(is_scalar(Panel([[[1]]]))) - self.assertFalse(is_scalar(Index([]))) - self.assertFalse(is_scalar(Index([1]))) + assert not is_scalar(Panel()) + assert not is_scalar(Panel([[[1]]])) + assert not is_scalar(Index([])) + assert not is_scalar(Index([1])) def test_datetimeindex_from_empty_datetime64_array(): diff --git a/pandas/tests/dtypes/test_missing.py b/pandas/tests/dtypes/test_missing.py index c03ba2b7daf50..3e1a12d439b9a 100644 --- a/pandas/tests/dtypes/test_missing.py +++ b/pandas/tests/dtypes/test_missing.py @@ -49,12 +49,12 @@ class TestIsNull(tm.TestCase): def test_0d_array(self): self.assertTrue(isnull(np.array(np.nan))) - self.assertFalse(isnull(np.array(0.0))) - self.assertFalse(isnull(np.array(0))) + assert not isnull(np.array(0.0)) + assert not isnull(np.array(0)) # test object dtype self.assertTrue(isnull(np.array(np.nan, dtype=object))) - self.assertFalse(isnull(np.array(0.0, dtype=object))) - self.assertFalse(isnull(np.array(0, dtype=object))) + assert not isnull(np.array(0.0, dtype=object)) + assert not isnull(np.array(0, dtype=object)) def test_empty_object(self): @@ -65,12 +65,12 @@ def test_empty_object(self): tm.assert_numpy_array_equal(result, expected) def test_isnull(self): - self.assertFalse(isnull(1.)) + assert not isnull(1.) self.assertTrue(isnull(None)) self.assertTrue(isnull(np.NaN)) self.assertTrue(float('nan')) - self.assertFalse(isnull(np.inf)) - self.assertFalse(isnull(-np.inf)) + assert not isnull(np.inf) + assert not isnull(-np.inf) # series for s in [tm.makeFloatSeries(), tm.makeStringSeries(), @@ -135,7 +135,7 @@ def test_isnull_numpy_nat(self): tm.assert_numpy_array_equal(result, expected) def test_isnull_datetime(self): - self.assertFalse(isnull(datetime.now())) + assert not isnull(datetime.now()) self.assertTrue(notnull(datetime.now())) idx = date_range('1/1/1990', periods=20) diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index 45d93c187e0b7..6268ccc27c7a6 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -558,11 +558,11 @@ def test_var_std(self): arr = np.repeat(np.random.random((1, 1000)), 1000, 0) result = nanops.nanvar(arr, axis=0) - self.assertFalse((result < 0).any()) + assert not (result < 0).any() if nanops._USE_BOTTLENECK: nanops._USE_BOTTLENECK = False result = nanops.nanvar(arr, axis=0) - self.assertFalse((result < 0).any()) + assert not (result < 0).any() nanops._USE_BOTTLENECK = True def test_numeric_only_flag(self): @@ -671,11 +671,11 @@ def test_sem(self): arr = np.repeat(np.random.random((1, 1000)), 1000, 0) result = nanops.nansem(arr, axis=0) - self.assertFalse((result < 0).any()) + assert not (result < 0).any() if nanops._USE_BOTTLENECK: nanops._USE_BOTTLENECK = False result = nanops.nansem(arr, axis=0) - self.assertFalse((result < 0).any()) + assert not (result < 0).any() nanops._USE_BOTTLENECK = True def test_skew(self): @@ -1131,8 +1131,8 @@ def __nonzero__(self): r0 = getattr(all_na, name)(axis=0) r1 = getattr(all_na, name)(axis=1) if name == 'any': - self.assertFalse(r0.any()) - self.assertFalse(r1.any()) + assert not r0.any() + assert not r1.any() else: self.assertTrue(r0.all()) self.assertTrue(r1.all()) @@ -1801,13 +1801,13 @@ def test_clip(self): median = self.frame.median().median() capped = self.frame.clip_upper(median) - self.assertFalse((capped.values > median).any()) + assert not (capped.values > median).any() floored = self.frame.clip_lower(median) - self.assertFalse((floored.values < median).any()) + assert not (floored.values < median).any() double = self.frame.clip(upper=median, lower=median) - self.assertFalse((double.values != median).any()) + assert not (double.values != median).any() def test_dataframe_clip(self): # GH #2747 diff --git a/pandas/tests/frame/test_api.py b/pandas/tests/frame/test_api.py index bd4abd6fcd822..7669de17885f8 100644 --- a/pandas/tests/frame/test_api.py +++ b/pandas/tests/frame/test_api.py @@ -141,15 +141,15 @@ def test_get_agg_axis(self): def test_nonzero(self): self.assertTrue(self.empty.empty) - self.assertFalse(self.frame.empty) - self.assertFalse(self.mixed_frame.empty) + assert not self.frame.empty + assert not self.mixed_frame.empty # corner case df = DataFrame({'A': [1., 2., 3.], 'B': ['a', 'b', 'c']}, index=np.arange(3)) del df['A'] - self.assertFalse(df.empty) + assert not df.empty def test_iteritems(self): df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=['a', 'a', 'b']) @@ -208,7 +208,7 @@ def test_itertuples(self): df3 = DataFrame(dict(('f' + str(i), [i]) for i in range(1024))) # will raise SyntaxError if trying to create namedtuple tup3 = next(df3.itertuples()) - self.assertFalse(hasattr(tup3, '_fields')) + assert not hasattr(tup3, '_fields') assert isinstance(tup3, tuple) def test_len(self): @@ -319,9 +319,9 @@ def test_series_put_names(self): def test_empty_nonzero(self): df = DataFrame([1, 2, 3]) - self.assertFalse(df.empty) + assert not df.empty df = pd.DataFrame(index=[1], columns=[1]) - self.assertFalse(df.empty) + assert not df.empty df = DataFrame(index=['a', 'b'], columns=['c', 'd']).dropna() self.assertTrue(df.empty) self.assertTrue(df.T.empty) diff --git a/pandas/tests/frame/test_axis_select_reindex.py b/pandas/tests/frame/test_axis_select_reindex.py index b8be7c19203fa..61d0694eea382 100644 --- a/pandas/tests/frame/test_axis_select_reindex.py +++ b/pandas/tests/frame/test_axis_select_reindex.py @@ -129,7 +129,7 @@ def test_drop_multiindex_not_lexsorted(self): not_lexsorted_df = not_lexsorted_df.pivot_table( index='a', columns=['b', 'c'], values='d') not_lexsorted_df = not_lexsorted_df.reset_index() - self.assertFalse(not_lexsorted_df.columns.is_lexsorted()) + assert not not_lexsorted_df.columns.is_lexsorted() # compare the results tm.assert_frame_equal(lexsorted_df, not_lexsorted_df) @@ -224,7 +224,7 @@ def test_reindex(self): # copy with no axes result = self.frame.reindex() assert_frame_equal(result, self.frame) - self.assertFalse(result is self.frame) + assert result is not self.frame def test_reindex_nan(self): df = pd.DataFrame([[1, 2], [3, 5], [7, 11], [9, 23]], diff --git a/pandas/tests/frame/test_block_internals.py b/pandas/tests/frame/test_block_internals.py index 5e85b890be569..37615179a3f26 100644 --- a/pandas/tests/frame/test_block_internals.py +++ b/pandas/tests/frame/test_block_internals.py @@ -69,7 +69,7 @@ def test_consolidate_inplace(self): def test_as_matrix_consolidate(self): self.frame['E'] = 7. - self.assertFalse(self.frame._data.is_consolidated()) + assert not self.frame._data.is_consolidated() _ = self.frame.as_matrix() # noqa self.assertTrue(self.frame._data.is_consolidated()) @@ -326,7 +326,7 @@ def test_copy_blocks(self): _df.loc[:, column] = _df[column] + 1 # make sure we did not change the original DataFrame - self.assertFalse(_df[column].equals(df[column])) + assert not _df[column].equals(df[column]) def test_no_copy_blocks(self): # API/ENH 9607 @@ -399,7 +399,7 @@ def test_consolidate_datetime64(self): tm.assert_index_equal(pd.DatetimeIndex(df.ending), ser_ending.index) def test_is_mixed_type(self): - self.assertFalse(self.frame._is_mixed_type) + assert not self.frame._is_mixed_type self.assertTrue(self.mixed_frame._is_mixed_type) def test_get_numeric_data(self): diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index db0293b71c3a3..e9a6f03abbe8d 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -1452,7 +1452,7 @@ def test_constructor_frame_copy(self): cop = DataFrame(self.frame, copy=True) cop['A'] = 5 self.assertTrue((cop['A'] == 5).all()) - self.assertFalse((self.frame['A'] == 5).all()) + assert not (self.frame['A'] == 5).all() def test_constructor_ndarray_copy(self): df = DataFrame(self.frame.values) @@ -1462,7 +1462,7 @@ def test_constructor_ndarray_copy(self): df = DataFrame(self.frame.values, copy=True) self.frame.values[6] = 6 - self.assertFalse((df.values[6] == 6).all()) + assert not (df.values[6] == 6).all() def test_constructor_series_copy(self): series = self.frame._series @@ -1470,7 +1470,7 @@ def test_constructor_series_copy(self): df = DataFrame({'A': series['A']}) df['A'][:] = 5 - self.assertFalse((series['A'] == 5).all()) + assert not (series['A'] == 5).all() def test_constructor_with_nas(self): # GH 5016 @@ -1512,7 +1512,7 @@ def test_constructor_lists_to_object_dtype(self): # from #1074 d = DataFrame({'a': [np.nan, False]}) self.assertEqual(d['a'].dtype, np.object_) - self.assertFalse(d['a'][1]) + assert not d['a'][1] def test_from_records_to_records(self): # from numpy documentation diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py index be4e69fe99a4e..ebc125ae09818 100644 --- a/pandas/tests/frame/test_indexing.py +++ b/pandas/tests/frame/test_indexing.py @@ -1938,7 +1938,7 @@ def test_reindex_frame_add_nat(self): mask = com.isnull(result)['B'] self.assertTrue(mask[-5:].all()) - self.assertFalse(mask[:-5].any()) + assert not mask[:-5].any() def test_set_dataframe_column_ns_dtype(self): x = DataFrame([datetime.now(), datetime.now()]) @@ -2940,8 +2940,7 @@ def test_setitem(self): b1 = df._data.blocks[1] b2 = df._data.blocks[2] self.assertTrue(b1.values.equals(b2.values)) - self.assertFalse(id(b1.values.values.base) == - id(b2.values.values.base)) + assert id(b1.values.values.base) != id(b2.values.values.base) # with nan df2 = df.copy() diff --git a/pandas/tests/frame/test_mutate_columns.py b/pandas/tests/frame/test_mutate_columns.py index b82a549bae3a0..d5035f2908528 100644 --- a/pandas/tests/frame/test_mutate_columns.py +++ b/pandas/tests/frame/test_mutate_columns.py @@ -223,7 +223,7 @@ def test_pop_non_unique_cols(self): self.assertEqual(len(res), 2) self.assertEqual(len(df.columns), 1) self.assertTrue("b" in df.columns) - self.assertFalse("a" in df.columns) + assert "a" not in df.columns self.assertEqual(len(df.index), 2) def test_insert_column_bug_4032(self): diff --git a/pandas/tests/frame/test_operators.py b/pandas/tests/frame/test_operators.py index d90e859509454..7f87666d5ecc4 100644 --- a/pandas/tests/frame/test_operators.py +++ b/pandas/tests/frame/test_operators.py @@ -236,7 +236,7 @@ def test_modulo(self): s = p[0] res = s % p res2 = p % s - self.assertFalse(np.array_equal(res.fillna(0), res2.fillna(0))) + assert not np.array_equal(res.fillna(0), res2.fillna(0)) def test_div(self): @@ -270,7 +270,7 @@ def test_div(self): s = p[0] res = s / p res2 = p / s - self.assertFalse(np.array_equal(res.fillna(0), res2.fillna(0))) + assert not np.array_equal(res.fillna(0), res2.fillna(0)) def test_logical_operators(self): @@ -574,7 +574,7 @@ def _check_unaligned_frame(meth, op, df, other): # DataFrame self.assertTrue(df.eq(df).values.all()) - self.assertFalse(df.ne(df).values.any()) + assert not df.ne(df).values.any() for op in ['eq', 'ne', 'gt', 'lt', 'ge', 'le']: f = getattr(df, op) o = getattr(operator, op) @@ -634,17 +634,17 @@ def _test_seq(df, idx_ser, col_ser): # NA df.loc[0, 0] = np.nan rs = df.eq(df) - self.assertFalse(rs.loc[0, 0]) + assert not rs.loc[0, 0] rs = df.ne(df) self.assertTrue(rs.loc[0, 0]) rs = df.gt(df) - self.assertFalse(rs.loc[0, 0]) + assert not rs.loc[0, 0] rs = df.lt(df) - self.assertFalse(rs.loc[0, 0]) + assert not rs.loc[0, 0] rs = df.ge(df) - self.assertFalse(rs.loc[0, 0]) + assert not rs.loc[0, 0] rs = df.le(df) - self.assertFalse(rs.loc[0, 0]) + assert not rs.loc[0, 0] # complex arr = np.array([np.nan, 1, 6, np.nan]) @@ -652,14 +652,14 @@ def _test_seq(df, idx_ser, col_ser): df = DataFrame({'a': arr}) df2 = DataFrame({'a': arr2}) rs = df.gt(df2) - self.assertFalse(rs.values.any()) + assert not rs.values.any() rs = df.ne(df2) self.assertTrue(rs.values.all()) arr3 = np.array([2j, np.nan, None]) df3 = DataFrame({'a': arr3}) rs = df3.gt(2j) - self.assertFalse(rs.values.any()) + assert not rs.values.any() # corner, dtype=object df1 = DataFrame({'col': ['foo', np.nan, 'bar']}) @@ -1021,7 +1021,7 @@ def test_boolean_comparison(self): assert_numpy_array_equal(result, expected.values) pytest.raises(ValueError, lambda: df == b_c) - self.assertFalse(np.array_equal(df.values, b_c)) + assert not np.array_equal(df.values, b_c) # with alignment df = DataFrame(np.arange(6).reshape((3, 2)), diff --git a/pandas/tests/frame/test_repr_info.py b/pandas/tests/frame/test_repr_info.py index 630fa5ad57fad..bcb85b6e44d54 100644 --- a/pandas/tests/frame/test_repr_info.py +++ b/pandas/tests/frame/test_repr_info.py @@ -72,9 +72,9 @@ def test_repr(self): self.empty.info(buf=buf) df = DataFrame(["a\n\r\tb"], columns=["a\n\r\td"], index=["a\n\r\tf"]) - self.assertFalse("\t" in repr(df)) - self.assertFalse("\r" in repr(df)) - self.assertFalse("a\n" in repr(df)) + assert "\t" not in repr(df) + assert "\r" not in repr(df) + assert "a\n" not in repr(df) def test_repr_dimensions(self): df = DataFrame([[1, 2, ], [3, 4]]) @@ -82,10 +82,10 @@ def test_repr_dimensions(self): self.assertTrue("2 rows x 2 columns" in repr(df)) with option_context('display.show_dimensions', False): - self.assertFalse("2 rows x 2 columns" in repr(df)) + assert "2 rows x 2 columns" not in repr(df) with option_context('display.show_dimensions', 'truncate'): - self.assertFalse("2 rows x 2 columns" in repr(df)) + assert "2 rows x 2 columns" not in repr(df) @tm.slow def test_repr_big(self): @@ -320,7 +320,7 @@ def test_info_memory_usage(self): res = buf.getvalue().splitlines() # excluded column with object dtype, so estimate is accurate - self.assertFalse(re.match(r"memory usage: [^+]+\+", res[-1])) + assert not re.match(r"memory usage: [^+]+\+", res[-1]) df_with_object_index = pd.DataFrame({'a': [1]}, index=['foo']) df_with_object_index.info(buf=buf, memory_usage=True) @@ -388,7 +388,7 @@ def test_info_memory_usage_qualified(self): df = DataFrame(1, columns=list('ab'), index=[1, 2, 3]) df.info(buf=buf) - self.assertFalse('+' in buf.getvalue()) + assert '+' not in buf.getvalue() buf = StringIO() df = DataFrame(1, columns=list('ab'), @@ -401,7 +401,7 @@ def test_info_memory_usage_qualified(self): index=pd.MultiIndex.from_product( [range(3), range(3)])) df.info(buf=buf) - self.assertFalse('+' in buf.getvalue()) + assert '+' not in buf.getvalue() buf = StringIO() df = DataFrame(1, columns=list('ab'), diff --git a/pandas/tests/frame/test_timeseries.py b/pandas/tests/frame/test_timeseries.py index 7a5afa178208a..66af6aaca6513 100644 --- a/pandas/tests/frame/test_timeseries.py +++ b/pandas/tests/frame/test_timeseries.py @@ -350,7 +350,7 @@ def test_truncate_copy(self): index = self.tsframe.index truncated = self.tsframe.truncate(index[5], index[10]) truncated.values[:] = 5. - self.assertFalse((self.tsframe.values[5:11] == 5).any()) + assert not (self.tsframe.values[5:11] == 5).any() def test_asfreq(self): offset_monthly = self.tsframe.asfreq(offsets.BMonthEnd()) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 177c2345ea143..0696473d0449f 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -330,7 +330,7 @@ def test_grouper_column_index_level_precedence(self): expected = df_multi_both.groupby([pd.Grouper(key='inner')]).mean() assert_frame_equal(result, expected) not_expected = df_multi_both.groupby(pd.Grouper(level='inner')).mean() - self.assertFalse(result.index.equals(not_expected.index)) + assert not result.index.equals(not_expected.index) # Group single Index by single key with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): @@ -339,7 +339,7 @@ def test_grouper_column_index_level_precedence(self): expected = df_single_both.groupby([pd.Grouper(key='inner')]).mean() assert_frame_equal(result, expected) not_expected = df_single_both.groupby(pd.Grouper(level='inner')).mean() - self.assertFalse(result.index.equals(not_expected.index)) + assert not result.index.equals(not_expected.index) # Group MultiIndex by single key list with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): @@ -348,7 +348,7 @@ def test_grouper_column_index_level_precedence(self): expected = df_multi_both.groupby([pd.Grouper(key='inner')]).mean() assert_frame_equal(result, expected) not_expected = df_multi_both.groupby(pd.Grouper(level='inner')).mean() - self.assertFalse(result.index.equals(not_expected.index)) + assert not result.index.equals(not_expected.index) # Group single Index by single key list with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): @@ -357,7 +357,7 @@ def test_grouper_column_index_level_precedence(self): expected = df_single_both.groupby([pd.Grouper(key='inner')]).mean() assert_frame_equal(result, expected) not_expected = df_single_both.groupby(pd.Grouper(level='inner')).mean() - self.assertFalse(result.index.equals(not_expected.index)) + assert not result.index.equals(not_expected.index) # Group MultiIndex by two keys (1) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): @@ -369,7 +369,7 @@ def test_grouper_column_index_level_precedence(self): not_expected = df_multi_both.groupby(['B', pd.Grouper(level='inner') ]).mean() - self.assertFalse(result.index.equals(not_expected.index)) + assert not result.index.equals(not_expected.index) # Group MultiIndex by two keys (2) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): @@ -380,7 +380,7 @@ def test_grouper_column_index_level_precedence(self): assert_frame_equal(result, expected) not_expected = df_multi_both.groupby([pd.Grouper(level='inner'), 'B']).mean() - self.assertFalse(result.index.equals(not_expected.index)) + assert not result.index.equals(not_expected.index) # Group single Index by two keys (1) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): @@ -392,7 +392,7 @@ def test_grouper_column_index_level_precedence(self): not_expected = df_single_both.groupby(['B', pd.Grouper(level='inner') ]).mean() - self.assertFalse(result.index.equals(not_expected.index)) + assert not result.index.equals(not_expected.index) # Group single Index by two keys (2) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): @@ -403,7 +403,7 @@ def test_grouper_column_index_level_precedence(self): assert_frame_equal(result, expected) not_expected = df_single_both.groupby([pd.Grouper(level='inner'), 'B']).mean() - self.assertFalse(result.index.equals(not_expected.index)) + assert not result.index.equals(not_expected.index) def test_grouper_getting_correct_binner(self): @@ -2626,7 +2626,7 @@ def f(g): group_keys = grouper._get_group_keys() values, mutated = splitter.fast_apply(f, group_keys) - self.assertFalse(mutated) + assert not mutated def test_apply_with_mixed_dtype(self): # GH3480, apply with mixed dtype on axis=1 breaks in 0.11 @@ -3263,7 +3263,7 @@ def test_groupby_multiindex_not_lexsorted(self): not_lexsorted_df = not_lexsorted_df.pivot_table( index='a', columns=['b', 'c'], values='d') not_lexsorted_df = not_lexsorted_df.reset_index() - self.assertFalse(not_lexsorted_df.columns.is_lexsorted()) + assert not not_lexsorted_df.columns.is_lexsorted() # compare the results tm.assert_frame_equal(lexsorted_df, not_lexsorted_df) @@ -3278,7 +3278,7 @@ def test_groupby_multiindex_not_lexsorted(self): df = DataFrame({'x': ['a', 'a', 'b', 'a'], 'y': [1, 1, 2, 2], 'z': [1, 2, 3, 4]}).set_index(['x', 'y']) - self.assertFalse(df.index.is_lexsorted()) + assert not df.index.is_lexsorted() for level in [0, 1, [0, 1]]: for sort in [False, True]: @@ -3595,7 +3595,7 @@ def test_max_nan_bug(self): r = gb[['File']].max() e = gb['File'].max().to_frame() tm.assert_frame_equal(r, e) - self.assertFalse(r['File'].isnull().any()) + assert not r['File'].isnull().any() def test_nlargest(self): a = Series([1, 3, 5, 7, 2, 9, 0, 4, 6, 10]) diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index 56a9af73e904a..23b1de76234c3 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -170,7 +170,7 @@ def test_repr_max_seq_item_setting(self): idx = idx.repeat(50) with pd.option_context("display.max_seq_items", None): repr(idx) - self.assertFalse('...' in str(idx)) + assert '...' not in str(idx) def test_wrong_number_names(self): def testit(ind): @@ -303,7 +303,7 @@ def test_duplicates(self): if isinstance(ind, MultiIndex): continue idx = self._holder([ind[0]] * 5) - self.assertFalse(idx.is_unique) + assert not idx.is_unique self.assertTrue(idx.has_duplicates) # GH 10115 @@ -327,7 +327,7 @@ def test_get_unique_index(self): # and doesn't contain nans. self.assertTrue(idx_unique.is_unique) try: - self.assertFalse(idx_unique.hasnans) + assert not idx_unique.hasnans except NotImplementedError: pass @@ -705,8 +705,8 @@ def test_equals(self): self.assertTrue(idx.equals(idx.copy())) self.assertTrue(idx.equals(idx.astype(object))) - self.assertFalse(idx.equals(list(idx))) - self.assertFalse(idx.equals(np.array(idx))) + assert not idx.equals(list(idx)) + assert not idx.equals(np.array(idx)) # Cannot pass in non-int64 dtype to RangeIndex if not isinstance(idx, RangeIndex): @@ -716,7 +716,7 @@ def test_equals(self): if idx.nlevels == 1: # do not test MultiIndex - self.assertFalse(idx.equals(pd.Series(idx))) + assert not idx.equals(pd.Series(idx)) def test_equals_op(self): # GH9947, GH10637 @@ -843,7 +843,7 @@ def test_hasnans_isnans(self): # cases in indices doesn't include NaN expected = np.array([False] * len(idx), dtype=bool) tm.assert_numpy_array_equal(idx._isnan, expected) - self.assertFalse(idx.hasnans) + assert not idx.hasnans idx = index.copy() values = idx.values @@ -881,7 +881,7 @@ def test_fillna(self): idx = index.copy() result = idx.fillna(idx[0]) tm.assert_index_equal(result, idx) - self.assertFalse(result is idx) + assert result is not idx msg = "'value' must be a scalar, passed: " with tm.assert_raises_regex(TypeError, msg): @@ -935,5 +935,5 @@ def test_nulls(self): def test_empty(self): # GH 15270 index = self.create_index() - self.assertFalse(index.empty) + assert not index.empty self.assertTrue(index[:0].empty) diff --git a/pandas/tests/indexes/datetimelike.py b/pandas/tests/indexes/datetimelike.py index 470c0c2aad01a..338dba9ef6c4f 100644 --- a/pandas/tests/indexes/datetimelike.py +++ b/pandas/tests/indexes/datetimelike.py @@ -16,7 +16,7 @@ def test_str(self): # test the string repr idx = self.create_index() idx.name = 'foo' - self.assertFalse("length=%s" % len(idx) in str(idx)) + assert not "length=%s" % len(idx) in str(idx) self.assertTrue("'foo'" in str(idx)) self.assertTrue(idx.__class__.__name__ in str(idx)) diff --git a/pandas/tests/indexes/datetimes/test_astype.py b/pandas/tests/indexes/datetimes/test_astype.py index 755944d342ed4..7e695164db971 100644 --- a/pandas/tests/indexes/datetimes/test_astype.py +++ b/pandas/tests/indexes/datetimes/test_astype.py @@ -101,7 +101,7 @@ def test_astype_datetime64(self): result = idx.astype('datetime64[ns]') tm.assert_index_equal(result, idx) - self.assertFalse(result is idx) + assert result is not idx result = idx.astype('datetime64[ns]', copy=False) tm.assert_index_equal(result, idx) diff --git a/pandas/tests/indexes/datetimes/test_construction.py b/pandas/tests/indexes/datetimes/test_construction.py index ea9f7c65fb49b..8ce2085032ca1 100644 --- a/pandas/tests/indexes/datetimes/test_construction.py +++ b/pandas/tests/indexes/datetimes/test_construction.py @@ -493,7 +493,7 @@ def test_is_(self): dti = DatetimeIndex(start='1/1/2005', end='12/1/2005', freq='M') self.assertTrue(dti.is_(dti)) self.assertTrue(dti.is_(dti.view())) - self.assertFalse(dti.is_(dti.copy())) + assert not dti.is_(dti.copy()) def test_index_cast_datetime64_other_units(self): arr = np.arange(0, 100, 10, dtype=np.int64).view('M8[D]') diff --git a/pandas/tests/indexes/datetimes/test_datetime.py b/pandas/tests/indexes/datetimes/test_datetime.py index 8a4cff2974b0d..7ba9bf53abc4d 100644 --- a/pandas/tests/indexes/datetimes/test_datetime.py +++ b/pandas/tests/indexes/datetimes/test_datetime.py @@ -399,10 +399,10 @@ def test_misc_coverage(self): assert isinstance(list(result.values())[0][0], Timestamp) idx = DatetimeIndex(['2000-01-03', '2000-01-01', '2000-01-02']) - self.assertFalse(idx.equals(list(idx))) + assert not idx.equals(list(idx)) non_datetime = Index(list('abc')) - self.assertFalse(idx.equals(list(non_datetime))) + assert not idx.equals(list(non_datetime)) def test_string_index_series_name_converted(self): # #1644 diff --git a/pandas/tests/indexes/datetimes/test_misc.py b/pandas/tests/indexes/datetimes/test_misc.py index 4c7235fea63e8..22e77eebec06b 100644 --- a/pandas/tests/indexes/datetimes/test_misc.py +++ b/pandas/tests/indexes/datetimes/test_misc.py @@ -167,7 +167,7 @@ def test_normalize(self): tm.assert_index_equal(rng_ns_normalized, expected) self.assertTrue(result.is_normalized) - self.assertFalse(rng.is_normalized) + assert not rng.is_normalized class TestDatetime64(tm.TestCase): diff --git a/pandas/tests/indexes/datetimes/test_ops.py b/pandas/tests/indexes/datetimes/test_ops.py index 020bb0e27d9de..7e42e5e3db7ef 100644 --- a/pandas/tests/indexes/datetimes/test_ops.py +++ b/pandas/tests/indexes/datetimes/test_ops.py @@ -103,7 +103,7 @@ def test_minmax(self): # non-monotonic idx2 = pd.DatetimeIndex(['2011-01-01', pd.NaT, '2011-01-03', '2011-01-02', pd.NaT], tz=tz) - self.assertFalse(idx2.is_monotonic) + assert not idx2.is_monotonic for idx in [idx1, idx2]: self.assertEqual(idx.min(), Timestamp('2011-01-01', tz=tz)) @@ -889,7 +889,7 @@ def test_nat(self): self.assertTrue(idx._can_hold_na) tm.assert_numpy_array_equal(idx._isnan, np.array([False, False])) - self.assertFalse(idx.hasnans) + assert not idx.hasnans tm.assert_numpy_array_equal(idx._nan_idxs, np.array([], dtype=np.intp)) @@ -910,27 +910,27 @@ def test_equals(self): self.assertTrue(idx.equals(idx.asobject)) self.assertTrue(idx.asobject.equals(idx)) self.assertTrue(idx.asobject.equals(idx.asobject)) - self.assertFalse(idx.equals(list(idx))) - self.assertFalse(idx.equals(pd.Series(idx))) + assert not idx.equals(list(idx)) + assert not idx.equals(pd.Series(idx)) idx2 = pd.DatetimeIndex(['2011-01-01', '2011-01-02', 'NaT'], tz='US/Pacific') - self.assertFalse(idx.equals(idx2)) - self.assertFalse(idx.equals(idx2.copy())) - self.assertFalse(idx.equals(idx2.asobject)) - self.assertFalse(idx.asobject.equals(idx2)) - self.assertFalse(idx.equals(list(idx2))) - self.assertFalse(idx.equals(pd.Series(idx2))) + assert not idx.equals(idx2) + assert not idx.equals(idx2.copy()) + assert not idx.equals(idx2.asobject) + assert not idx.asobject.equals(idx2) + assert not idx.equals(list(idx2)) + assert not idx.equals(pd.Series(idx2)) # same internal, different tz idx3 = pd.DatetimeIndex._simple_new(idx.asi8, tz='US/Pacific') tm.assert_numpy_array_equal(idx.asi8, idx3.asi8) - self.assertFalse(idx.equals(idx3)) - self.assertFalse(idx.equals(idx3.copy())) - self.assertFalse(idx.equals(idx3.asobject)) - self.assertFalse(idx.asobject.equals(idx3)) - self.assertFalse(idx.equals(list(idx3))) - self.assertFalse(idx.equals(pd.Series(idx3))) + assert not idx.equals(idx3) + assert not idx.equals(idx3.copy()) + assert not idx.equals(idx3.asobject) + assert not idx.asobject.equals(idx3) + assert not idx.equals(list(idx3)) + assert not idx.equals(pd.Series(idx3)) class TestDateTimeIndexToJulianDate(tm.TestCase): @@ -1119,7 +1119,7 @@ def test_comparison(self): comp = self.rng > d self.assertTrue(comp[11]) - self.assertFalse(comp[9]) + assert not comp[9] def test_pickle_unpickle(self): unpickled = tm.round_trip_pickle(self.rng) @@ -1189,7 +1189,7 @@ def test_summary_dateutil(self): bdate_range('1/1/2005', '1/1/2009', tz=dateutil.tz.tzutc()).summary() def test_equals(self): - self.assertFalse(self.rng.equals(list(self.rng))) + assert not self.rng.equals(list(self.rng)) def test_identical(self): t1 = self.rng.copy() @@ -1199,14 +1199,14 @@ def test_identical(self): # name t1 = t1.rename('foo') self.assertTrue(t1.equals(t2)) - self.assertFalse(t1.identical(t2)) + assert not t1.identical(t2) t2 = t2.rename('foo') self.assertTrue(t1.identical(t2)) # freq t2v = Index(t2.values) self.assertTrue(t1.equals(t2v)) - self.assertFalse(t1.identical(t2v)) + assert not t1.identical(t2v) class TestCustomDatetimeIndex(tm.TestCase): @@ -1219,7 +1219,7 @@ def test_comparison(self): comp = self.rng > d self.assertTrue(comp[11]) - self.assertFalse(comp[9]) + assert not comp[9] def test_copy(self): cp = self.rng.copy() @@ -1291,4 +1291,4 @@ def test_summary_dateutil(self): cdate_range('1/1/2005', '1/1/2009', tz=dateutil.tz.tzutc()).summary() def test_equals(self): - self.assertFalse(self.rng.equals(list(self.rng))) + assert not self.rng.equals(list(self.rng)) diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index 715825417cd31..941c9767e7a3a 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -1027,8 +1027,7 @@ def test_does_not_convert_mixed_integer(self): bad_date_strings = ('-50000', '999', '123.1234', 'm', 'T') for bad_date_string in bad_date_strings: - self.assertFalse(tslib._does_string_look_like_datetime( - bad_date_string)) + assert not tslib._does_string_look_like_datetime(bad_date_string) good_date_strings = ('2012-01-01', '01/01/2012', diff --git a/pandas/tests/indexes/period/test_ops.py b/pandas/tests/indexes/period/test_ops.py index 70c0879a0871a..f133845f8404a 100644 --- a/pandas/tests/indexes/period/test_ops.py +++ b/pandas/tests/indexes/period/test_ops.py @@ -74,7 +74,7 @@ def test_minmax(self): # non-monotonic idx2 = pd.PeriodIndex(['2011-01-01', pd.NaT, '2011-01-03', '2011-01-02', pd.NaT], freq='D') - self.assertFalse(idx2.is_monotonic) + assert not idx2.is_monotonic for idx in [idx1, idx2]: self.assertEqual(idx.min(), pd.Period('2011-01-01', freq='D')) @@ -806,7 +806,7 @@ def test_nat(self): self.assertTrue(idx._can_hold_na) tm.assert_numpy_array_equal(idx._isnan, np.array([False, False])) - self.assertFalse(idx.hasnans) + assert not idx.hasnans tm.assert_numpy_array_equal(idx._nan_idxs, np.array([], dtype=np.intp)) @@ -828,27 +828,27 @@ def test_equals(self): self.assertTrue(idx.equals(idx.asobject)) self.assertTrue(idx.asobject.equals(idx)) self.assertTrue(idx.asobject.equals(idx.asobject)) - self.assertFalse(idx.equals(list(idx))) - self.assertFalse(idx.equals(pd.Series(idx))) + assert not idx.equals(list(idx)) + assert not idx.equals(pd.Series(idx)) idx2 = pd.PeriodIndex(['2011-01-01', '2011-01-02', 'NaT'], freq='H') - self.assertFalse(idx.equals(idx2)) - self.assertFalse(idx.equals(idx2.copy())) - self.assertFalse(idx.equals(idx2.asobject)) - self.assertFalse(idx.asobject.equals(idx2)) - self.assertFalse(idx.equals(list(idx2))) - self.assertFalse(idx.equals(pd.Series(idx2))) + assert not idx.equals(idx2) + assert not idx.equals(idx2.copy()) + assert not idx.equals(idx2.asobject) + assert not idx.asobject.equals(idx2) + assert not idx.equals(list(idx2)) + assert not idx.equals(pd.Series(idx2)) # same internal, different tz idx3 = pd.PeriodIndex._simple_new(idx.asi8, freq='H') tm.assert_numpy_array_equal(idx.asi8, idx3.asi8) - self.assertFalse(idx.equals(idx3)) - self.assertFalse(idx.equals(idx3.copy())) - self.assertFalse(idx.equals(idx3.asobject)) - self.assertFalse(idx.asobject.equals(idx3)) - self.assertFalse(idx.equals(list(idx3))) - self.assertFalse(idx.equals(pd.Series(idx3))) + assert not idx.equals(idx3) + assert not idx.equals(idx3.copy()) + assert not idx.equals(idx3.asobject) + assert not idx.asobject.equals(idx3) + assert not idx.equals(list(idx3)) + assert not idx.equals(pd.Series(idx3)) class TestPeriodIndexSeriesMethods(tm.TestCase): diff --git a/pandas/tests/indexes/period/test_period.py b/pandas/tests/indexes/period/test_period.py index e563f683bf8ca..df3f6023a6506 100644 --- a/pandas/tests/indexes/period/test_period.py +++ b/pandas/tests/indexes/period/test_period.py @@ -512,16 +512,16 @@ def test_contains(self): rng = period_range('2007-01', freq='M', periods=10) self.assertTrue(Period('2007-01', freq='M') in rng) - self.assertFalse(Period('2007-01', freq='D') in rng) - self.assertFalse(Period('2007-01', freq='2M') in rng) + assert not Period('2007-01', freq='D') in rng + assert not Period('2007-01', freq='2M') in rng def test_contains_nat(self): - # GH13582 + # see gh-13582 idx = period_range('2007-01', freq='M', periods=10) - self.assertFalse(pd.NaT in idx) - self.assertFalse(None in idx) - self.assertFalse(float('nan') in idx) - self.assertFalse(np.nan in idx) + assert pd.NaT not in idx + assert None not in idx + assert float('nan') not in idx + assert np.nan not in idx idx = pd.PeriodIndex(['2011-01', 'NaT', '2011-02'], freq='M') self.assertTrue(pd.NaT in idx) @@ -709,13 +709,13 @@ def test_iteration(self): def test_is_full(self): index = PeriodIndex([2005, 2007, 2009], freq='A') - self.assertFalse(index.is_full) + assert not index.is_full index = PeriodIndex([2005, 2006, 2007], freq='A') self.assertTrue(index.is_full) index = PeriodIndex([2005, 2005, 2007], freq='A') - self.assertFalse(index.is_full) + assert not index.is_full index = PeriodIndex([2005, 2005, 2006], freq='A') self.assertTrue(index.is_full) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index caf2dde249600..2f07cf3c8270f 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -414,13 +414,13 @@ def test_equals_object(self): self.assertTrue(Index(['a', 'b', 'c']).equals(Index(['a', 'b', 'c']))) # different length - self.assertFalse(Index(['a', 'b', 'c']).equals(Index(['a', 'b']))) + assert not Index(['a', 'b', 'c']).equals(Index(['a', 'b'])) # same length, different values - self.assertFalse(Index(['a', 'b', 'c']).equals(Index(['a', 'b', 'd']))) + assert not Index(['a', 'b', 'c']).equals(Index(['a', 'b', 'd'])) # Must also be an Index - self.assertFalse(Index(['a', 'b', 'c']).equals(['a', 'b', 'c'])) + assert not Index(['a', 'b', 'c']).equals(['a', 'b', 'c']) def test_insert(self): @@ -470,25 +470,25 @@ def test_identical(self): i1 = i1.rename('foo') self.assertTrue(i1.equals(i2)) - self.assertFalse(i1.identical(i2)) + assert not i1.identical(i2) i2 = i2.rename('foo') self.assertTrue(i1.identical(i2)) i3 = Index([('a', 'a'), ('a', 'b'), ('b', 'a')]) i4 = Index([('a', 'a'), ('a', 'b'), ('b', 'a')], tupleize_cols=False) - self.assertFalse(i3.identical(i4)) + assert not i3.identical(i4) def test_is_(self): ind = Index(range(10)) self.assertTrue(ind.is_(ind)) self.assertTrue(ind.is_(ind.view().view().view().view())) - self.assertFalse(ind.is_(Index(range(10)))) - self.assertFalse(ind.is_(ind.copy())) - self.assertFalse(ind.is_(ind.copy(deep=False))) - self.assertFalse(ind.is_(ind[:])) - self.assertFalse(ind.is_(ind.view(np.ndarray).view(Index))) - self.assertFalse(ind.is_(np.array(range(10)))) + assert not ind.is_(Index(range(10))) + assert not ind.is_(ind.copy()) + assert not ind.is_(ind.copy(deep=False)) + assert not ind.is_(ind[:]) + assert not ind.is_(ind.view(np.ndarray).view(Index)) + assert not ind.is_(np.array(range(10))) # quasi-implementation dependent self.assertTrue(ind.is_(ind.view())) @@ -497,11 +497,11 @@ def test_is_(self): self.assertTrue(ind.is_(ind2)) self.assertTrue(ind2.is_(ind)) # doesn't matter if Indices are *actually* views of underlying data, - self.assertFalse(ind.is_(Index(ind.values))) + assert not ind.is_(Index(ind.values)) arr = np.array(range(1, 11)) ind1 = Index(arr, copy=False) ind2 = Index(arr, copy=False) - self.assertFalse(ind1.is_(ind2)) + assert not ind1.is_(ind2) def test_asof(self): d = self.dateIndex[0] @@ -519,7 +519,7 @@ def test_asof_datetime_partial(self): expected = Timestamp('2010-02-28') result = idx.asof('2010-02') self.assertEqual(result, expected) - self.assertFalse(isinstance(result, Index)) + assert not isinstance(result, Index) def test_nanosecond_index_access(self): s = Series([Timestamp('20130101')]).values.view('i8')[0] @@ -938,24 +938,24 @@ def test_symmetric_difference(self): self.assertEqual(result.name, 'new_name') def test_is_numeric(self): - self.assertFalse(self.dateIndex.is_numeric()) - self.assertFalse(self.strIndex.is_numeric()) + assert not self.dateIndex.is_numeric() + assert not self.strIndex.is_numeric() self.assertTrue(self.intIndex.is_numeric()) self.assertTrue(self.floatIndex.is_numeric()) - self.assertFalse(self.catIndex.is_numeric()) + assert not self.catIndex.is_numeric() def test_is_object(self): self.assertTrue(self.strIndex.is_object()) self.assertTrue(self.boolIndex.is_object()) - self.assertFalse(self.catIndex.is_object()) - self.assertFalse(self.intIndex.is_object()) - self.assertFalse(self.dateIndex.is_object()) - self.assertFalse(self.floatIndex.is_object()) + assert not self.catIndex.is_object() + assert not self.intIndex.is_object() + assert not self.dateIndex.is_object() + assert not self.floatIndex.is_object() def test_is_all_dates(self): self.assertTrue(self.dateIndex.is_all_dates) - self.assertFalse(self.strIndex.is_all_dates) - self.assertFalse(self.intIndex.is_all_dates) + assert not self.strIndex.is_all_dates + assert not self.intIndex.is_all_dates def test_summary(self): self._check_method_works(Index.summary) @@ -1331,8 +1331,8 @@ def test_tuple_union_bug(self): def test_is_monotonic_incomparable(self): index = Index([5, datetime.now(), 7]) - self.assertFalse(index.is_monotonic) - self.assertFalse(index.is_monotonic_decreasing) + assert not index.is_monotonic + assert not index.is_monotonic_decreasing def test_get_set_value(self): values = np.random.randn(100) @@ -2031,8 +2031,8 @@ def test_is_monotonic_na(self): pd.to_datetime(['2000-01-01', 'NaT', '2000-01-02']), pd.to_timedelta(['1 day', 'NaT']), ] for index in examples: - self.assertFalse(index.is_monotonic_increasing) - self.assertFalse(index.is_monotonic_decreasing) + assert not index.is_monotonic_increasing + assert not index.is_monotonic_decreasing def test_repr_summary(self): with cf.option_context('display.max_seq_items', 10): diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py index 5dcd45e8c85b0..5c9df55d2b508 100644 --- a/pandas/tests/indexes/test_category.py +++ b/pandas/tests/indexes/test_category.py @@ -183,8 +183,8 @@ def test_contains(self): self.assertTrue(np.nan not in ci) # assert codes NOT in index - self.assertFalse(0 in ci) - self.assertFalse(1 in ci) + assert 0 not in ci + assert 1 not in ci ci = CategoricalIndex( list('aabbca') + [np.nan], categories=list('cabdef')) @@ -423,7 +423,7 @@ def test_reindex_dtype(self): def test_duplicates(self): idx = CategoricalIndex([0, 0, 0], name='foo') - self.assertFalse(idx.is_unique) + assert not idx.is_unique self.assertTrue(idx.has_duplicates) expected = CategoricalIndex([0], name='foo') @@ -539,7 +539,7 @@ def test_identical(self): ordered=True) self.assertTrue(ci1.identical(ci1)) self.assertTrue(ci1.identical(ci1.copy())) - self.assertFalse(ci1.identical(ci2)) + assert not ci1.identical(ci2) def test_ensure_copied_data(self): # gh-12309: Check the "copy" argument of each @@ -563,18 +563,18 @@ def test_equals_categorical(self): ordered=True) self.assertTrue(ci1.equals(ci1)) - self.assertFalse(ci1.equals(ci2)) + assert not ci1.equals(ci2) self.assertTrue(ci1.equals(ci1.astype(object))) self.assertTrue(ci1.astype(object).equals(ci1)) self.assertTrue((ci1 == ci1).all()) - self.assertFalse((ci1 != ci1).all()) - self.assertFalse((ci1 > ci1).all()) - self.assertFalse((ci1 < ci1).all()) + assert not (ci1 != ci1).all() + assert not (ci1 > ci1).all() + assert not (ci1 < ci1).all() self.assertTrue((ci1 <= ci1).all()) self.assertTrue((ci1 >= ci1).all()) - self.assertFalse((ci1 == 1).all()) + assert not (ci1 == 1).all() self.assertTrue((ci1 == Index(['a', 'b'])).all()) self.assertTrue((ci1 == ci1.values).all()) @@ -591,20 +591,20 @@ def test_equals_categorical(self): # tests # make sure that we are testing for category inclusion properly ci = CategoricalIndex(list('aabca'), categories=['c', 'a', 'b']) - self.assertFalse(ci.equals(list('aabca'))) - self.assertFalse(ci.equals(CategoricalIndex(list('aabca')))) + assert not ci.equals(list('aabca')) + assert not ci.equals(CategoricalIndex(list('aabca'))) self.assertTrue(ci.equals(ci.copy())) ci = CategoricalIndex(list('aabca') + [np.nan], categories=['c', 'a', 'b']) - self.assertFalse(ci.equals(list('aabca'))) - self.assertFalse(ci.equals(CategoricalIndex(list('aabca')))) + assert not ci.equals(list('aabca')) + assert not ci.equals(CategoricalIndex(list('aabca'))) self.assertTrue(ci.equals(ci.copy())) ci = CategoricalIndex(list('aabca') + [np.nan], categories=['c', 'a', 'b']) - self.assertFalse(ci.equals(list('aabca') + [np.nan])) - self.assertFalse(ci.equals(CategoricalIndex(list('aabca') + [np.nan]))) + assert not ci.equals(list('aabca') + [np.nan]) + assert not ci.equals(CategoricalIndex(list('aabca') + [np.nan])) self.assertTrue(ci.equals(ci.copy())) def test_string_categorical_index_repr(self): diff --git a/pandas/tests/indexes/test_interval.py b/pandas/tests/indexes/test_interval.py index ec56791a6ec67..2e16e16e0b2c4 100644 --- a/pandas/tests/indexes/test_interval.py +++ b/pandas/tests/indexes/test_interval.py @@ -30,7 +30,7 @@ def test_constructors(self): self.assertTrue(expected.equals(actual)) alternate = IntervalIndex.from_breaks(np.arange(3), closed='left') - self.assertFalse(expected.equals(alternate)) + assert not expected.equals(alternate) actual = IntervalIndex.from_intervals([Interval(0, 1), Interval(1, 2)]) self.assertTrue(expected.equals(actual)) @@ -151,7 +151,7 @@ def test_properties(self): def test_with_nans(self): index = self.index - self.assertFalse(index.hasnans) + assert not index.hasnans tm.assert_numpy_array_equal(index.isnull(), np.array([False, False])) tm.assert_numpy_array_equal(index.notnull(), @@ -196,14 +196,13 @@ def test_equals(self): self.assertTrue(idx.equals(idx)) self.assertTrue(idx.equals(idx.copy())) - self.assertFalse(idx.equals(idx.astype(object))) - self.assertFalse(idx.equals(np.array(idx))) - self.assertFalse(idx.equals(list(idx))) + assert not idx.equals(idx.astype(object)) + assert not idx.equals(np.array(idx)) + assert not idx.equals(list(idx)) - self.assertFalse(idx.equals([1, 2])) - self.assertFalse(idx.equals(np.array([1, 2]))) - self.assertFalse(idx.equals( - pd.date_range('20130101', periods=2))) + assert not idx.equals([1, 2]) + assert not idx.equals(np.array([1, 2])) + assert not idx.equals(pd.date_range('20130101', periods=2)) def test_astype(self): @@ -216,7 +215,7 @@ def test_astype(self): result = idx.astype(object) tm.assert_index_equal(result, Index(idx.values, dtype='object')) - self.assertFalse(idx.equals(result)) + assert not idx.equals(result) self.assertTrue(idx.equals(IntervalIndex.from_intervals(result))) result = idx.astype('interval') @@ -272,11 +271,11 @@ def test_monotonic_and_unique(self): self.assertTrue(idx.is_unique) idx = IntervalIndex.from_tuples([(0, 1), (2, 3), (1, 2)]) - self.assertFalse(idx.is_monotonic) + assert not idx.is_monotonic self.assertTrue(idx.is_unique) idx = IntervalIndex.from_tuples([(0, 2), (0, 2)]) - self.assertFalse(idx.is_unique) + assert not idx.is_unique self.assertTrue(idx.is_monotonic) @pytest.mark.xfail(reason='not a valid repr as we use interval notation') diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index ab403cf56e033..6f6e1f1544219 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -524,7 +524,7 @@ def test_reference_duplicate_name(self): idx = MultiIndex.from_tuples( [('a', 'b'), ('c', 'd')], names=['x', 'y']) - self.assertFalse(idx._reference_duplicate_name('x')) + assert not idx._reference_duplicate_name('x') def test_astype(self): expected = self.index.copy() @@ -1082,11 +1082,11 @@ def test_contains_with_nat(self): self.assertTrue(val in mi) def test_is_all_dates(self): - self.assertFalse(self.index.is_all_dates) + assert not self.index.is_all_dates def test_is_numeric(self): # MultiIndex is never numeric - self.assertFalse(self.index.is_numeric()) + assert not self.index.is_numeric() def test_getitem(self): # scalar @@ -1280,7 +1280,7 @@ def test_consistency(self): index = MultiIndex(levels=[major_axis, minor_axis], labels=[major_labels, minor_labels]) - self.assertFalse(index.is_unique) + assert not index.is_unique def test_truncate(self): major_axis = Index(lrange(4)) @@ -1526,9 +1526,9 @@ def test_equals_missing_values(self): i = pd.MultiIndex.from_tuples([(0, pd.NaT), (0, pd.Timestamp('20130101'))]) result = i[0:1].equals(i[0]) - self.assertFalse(result) + assert not result result = i[1:2].equals(i[1]) - self.assertFalse(result) + assert not result def test_identical(self): mi = self.index.copy() @@ -1537,7 +1537,7 @@ def test_identical(self): mi = mi.set_names(['new1', 'new2']) self.assertTrue(mi.equals(mi2)) - self.assertFalse(mi.identical(mi2)) + assert not mi.identical(mi2) mi2 = mi2.set_names(['new1', 'new2']) self.assertTrue(mi.identical(mi2)) @@ -1545,7 +1545,7 @@ def test_identical(self): mi3 = Index(mi.tolist(), names=mi.names) mi4 = Index(mi.tolist(), names=mi.names, tupleize_cols=False) self.assertTrue(mi.identical(mi3)) - self.assertFalse(mi.identical(mi4)) + assert not mi.identical(mi4) self.assertTrue(mi.equals(mi4)) def test_is_(self): @@ -1565,15 +1565,15 @@ def test_is_(self): self.assertTrue(mi.is_(mi2)) # levels are inherent properties, they change identity mi3 = mi2.set_levels([lrange(10), lrange(10)]) - self.assertFalse(mi3.is_(mi2)) + assert not mi3.is_(mi2) # shouldn't change self.assertTrue(mi2.is_(mi)) mi4 = mi3.view() mi4.set_levels([[1 for _ in range(10)], lrange(10)], inplace=True) - self.assertFalse(mi4.is_(mi3)) + assert not mi4.is_(mi3) mi5 = mi.view() mi5.set_levels(mi5.levels, inplace=True) - self.assertFalse(mi5.is_(mi)) + assert not mi5.is_(mi) def test_union(self): piece1 = self.index[:5][::-1] @@ -1862,7 +1862,7 @@ def test_drop_not_lexsorted(self): df = df.pivot_table(index='a', columns=['b', 'c'], values='d') df = df.reset_index() not_lexsorted_mi = df.columns - self.assertFalse(not_lexsorted_mi.is_lexsorted()) + assert not not_lexsorted_mi.is_lexsorted() # compare the results tm.assert_index_equal(lexsorted_mi, not_lexsorted_mi) @@ -2119,7 +2119,7 @@ def test_reindex_level(self): level='first') def test_duplicates(self): - self.assertFalse(self.index.has_duplicates) + assert not self.index.has_duplicates self.assertTrue(self.index.append(self.index).has_duplicates) index = MultiIndex(levels=[[0, 1], [0, 1, 2]], labels=[ @@ -2147,7 +2147,7 @@ def test_duplicates(self): (u('x'), u('out'), u('z'), 12, u('y'), u('in'), u('z'), 144)] index = pd.MultiIndex.from_tuples(t) - self.assertFalse(index.has_duplicates) + assert not index.has_duplicates # handle int64 overflow if possible def check(nlevels, with_nulls): @@ -2168,7 +2168,7 @@ def check(nlevels, with_nulls): # no dups index = MultiIndex(levels=levels, labels=labels) - self.assertFalse(index.has_duplicates) + assert not index.has_duplicates # with a dup if with_nulls: @@ -2203,7 +2203,7 @@ def check(nlevels, with_nulls): # GH5873 for a in [101, 102]: mi = MultiIndex.from_arrays([[101, a], [3.5, np.nan]]) - self.assertFalse(mi.has_duplicates) + assert not mi.has_duplicates self.assertEqual(mi.get_duplicates(), []) tm.assert_numpy_array_equal(mi.duplicated(), np.zeros( 2, dtype='bool')) @@ -2215,7 +2215,7 @@ def check(nlevels, with_nulls): mi = MultiIndex(levels=[list('abcde')[:n], list('WXYZ')[:m]], labels=np.random.permutation(list(lab)).T) self.assertEqual(len(mi), (n + 1) * (m + 1)) - self.assertFalse(mi.has_duplicates) + assert not mi.has_duplicates self.assertEqual(mi.get_duplicates(), []) tm.assert_numpy_array_equal(mi.duplicated(), np.zeros( len(mi), dtype='bool')) @@ -2281,8 +2281,7 @@ def test_repr_with_unicode_data(self): with pd.core.config.option_context("display.encoding", 'UTF-8'): d = {"a": [u("\u05d0"), 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]} index = pd.DataFrame(d).set_index(["a", "b"]).index - self.assertFalse("\\u" in repr(index) - ) # we don't want unicode-escaped + assert "\\u" not in repr(index) # we don't want unicode-escaped def test_repr_roundtrip(self): @@ -2376,7 +2375,7 @@ def test_level_setting_resets_attributes(self): inplace=True) # if this fails, probably didn't reset the cache correctly. - self.assertFalse(ind.is_monotonic) + assert not ind.is_monotonic def test_is_monotonic(self): i = MultiIndex.from_product([np.arange(10), @@ -2386,18 +2385,18 @@ def test_is_monotonic(self): i = MultiIndex.from_product([np.arange(10, 0, -1), np.arange(10)], names=['one', 'two']) - self.assertFalse(i.is_monotonic) - self.assertFalse(Index(i.values).is_monotonic) + assert not i.is_monotonic + assert not Index(i.values).is_monotonic i = MultiIndex.from_product([np.arange(10), np.arange(10, 0, -1)], names=['one', 'two']) - self.assertFalse(i.is_monotonic) - self.assertFalse(Index(i.values).is_monotonic) + assert not i.is_monotonic + assert not Index(i.values).is_monotonic i = MultiIndex.from_product([[1.0, np.nan, 2.0], ['a', 'b', 'c']]) - self.assertFalse(i.is_monotonic) - self.assertFalse(Index(i.values).is_monotonic) + assert not i.is_monotonic + assert not Index(i.values).is_monotonic # string ordering i = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], @@ -2405,8 +2404,8 @@ def test_is_monotonic(self): labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], names=['first', 'second']) - self.assertFalse(i.is_monotonic) - self.assertFalse(Index(i.values).is_monotonic) + assert not i.is_monotonic + assert not Index(i.values).is_monotonic i = MultiIndex(levels=[['bar', 'baz', 'foo', 'qux'], ['mom', 'next', 'zenith']], @@ -2424,7 +2423,7 @@ def test_is_monotonic(self): labels=[[0, 1, 1, 2, 2, 2, 3], [4, 2, 0, 0, 1, 3, -1]], names=['household_id', 'asset_id']) - self.assertFalse(i.is_monotonic) + assert not i.is_monotonic def test_reconstruct_sort(self): diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py index 8a46da37572ff..8b4179dbf2e0e 100644 --- a/pandas/tests/indexes/test_numeric.py +++ b/pandas/tests/indexes/test_numeric.py @@ -381,8 +381,8 @@ def test_contains_not_nans(self): def test_doesnt_contain_all_the_things(self): i = Float64Index([np.nan]) - self.assertFalse(i.isin([0]).item()) - self.assertFalse(i.isin([1]).item()) + assert not i.isin([0]).item() + assert not i.isin([1]).item() self.assertTrue(i.isin([np.nan]).item()) def test_nan_multiple_containment(self): @@ -465,10 +465,10 @@ def test_view(self): def test_is_monotonic(self): self.assertTrue(self.index.is_monotonic) self.assertTrue(self.index.is_monotonic_increasing) - self.assertFalse(self.index.is_monotonic_decreasing) + assert not self.index.is_monotonic_decreasing index = self._holder([4, 3, 2, 1]) - self.assertFalse(index.is_monotonic) + assert not index.is_monotonic self.assertTrue(index.is_monotonic_decreasing) index = self._holder([1]) @@ -486,19 +486,19 @@ def test_identical(self): self.assertTrue(i.identical(self.index)) same_values_different_type = Index(i, dtype=object) - self.assertFalse(i.identical(same_values_different_type)) + assert not i.identical(same_values_different_type) i = self.index.copy(dtype=object) i = i.rename('foo') same_values = Index(i, dtype=object) self.assertTrue(same_values.identical(i)) - self.assertFalse(i.identical(self.index)) + assert not i.identical(self.index) self.assertTrue(Index(same_values, name='foo', dtype=object).identical( i)) - self.assertFalse(self.index.copy(dtype=object) - .identical(self.index.copy(dtype=self._dtype))) + assert not self.index.copy(dtype=object).identical( + self.index.copy(dtype=self._dtype)) def test_join_non_unique(self): left = Index([4, 4, 3, 3]) diff --git a/pandas/tests/indexes/test_range.py b/pandas/tests/indexes/test_range.py index c3ffb32c36e3b..0baf6636806f6 100644 --- a/pandas/tests/indexes/test_range.py +++ b/pandas/tests/indexes/test_range.py @@ -330,10 +330,10 @@ def test_dtype(self): def test_is_monotonic(self): self.assertTrue(self.index.is_monotonic) self.assertTrue(self.index.is_monotonic_increasing) - self.assertFalse(self.index.is_monotonic_decreasing) + assert not self.index.is_monotonic_decreasing index = RangeIndex(4, 0, -1) - self.assertFalse(index.is_monotonic) + assert not index.is_monotonic self.assertTrue(index.is_monotonic_decreasing) index = RangeIndex(1, 2) @@ -374,19 +374,19 @@ def test_identical(self): return same_values_different_type = Index(i, dtype=object) - self.assertFalse(i.identical(same_values_different_type)) + assert not i.identical(same_values_different_type) i = self.index.copy(dtype=object) i = i.rename('foo') same_values = Index(i, dtype=object) self.assertTrue(same_values.identical(self.index.copy(dtype=object))) - self.assertFalse(i.identical(self.index)) + assert not i.identical(self.index) self.assertTrue(Index(same_values, name='foo', dtype=object).identical( i)) - self.assertFalse(self.index.copy(dtype=object) - .identical(self.index.copy(dtype='int64'))) + assert not self.index.copy(dtype=object).identical( + self.index.copy(dtype='int64')) def test_get_indexer(self): target = RangeIndex(10) @@ -423,7 +423,7 @@ def test_join_outer(self): 5, 4, 3, 2, 1, 0], dtype=np.intp) assert isinstance(res, Int64Index) - self.assertFalse(isinstance(res, RangeIndex)) + assert not isinstance(res, RangeIndex) tm.assert_index_equal(res, eres) tm.assert_numpy_array_equal(lidx, elidx) tm.assert_numpy_array_equal(ridx, eridx) @@ -437,7 +437,7 @@ def test_join_outer(self): tm.assert_index_equal(res, noidx_res) assert isinstance(res, Int64Index) - self.assertFalse(isinstance(res, RangeIndex)) + assert not isinstance(res, RangeIndex) tm.assert_index_equal(res, eres) tm.assert_numpy_array_equal(lidx, elidx) tm.assert_numpy_array_equal(ridx, eridx) @@ -785,7 +785,7 @@ def test_duplicates(self): continue idx = self.indices[ind] self.assertTrue(idx.is_unique) - self.assertFalse(idx.has_duplicates) + assert not idx.has_duplicates def test_ufunc_compat(self): idx = RangeIndex(5) diff --git a/pandas/tests/indexes/timedeltas/test_astype.py b/pandas/tests/indexes/timedeltas/test_astype.py index d269cddcbb5c8..b17433d3aeb51 100644 --- a/pandas/tests/indexes/timedeltas/test_astype.py +++ b/pandas/tests/indexes/timedeltas/test_astype.py @@ -51,7 +51,7 @@ def test_astype_timedelta64(self): result = idx.astype('timedelta64[ns]') tm.assert_index_equal(result, idx) - self.assertFalse(result is idx) + assert result is not idx result = idx.astype('timedelta64[ns]', copy=False) tm.assert_index_equal(result, idx) diff --git a/pandas/tests/indexes/timedeltas/test_ops.py b/pandas/tests/indexes/timedeltas/test_ops.py index c3cc05271e978..9747902f316a6 100644 --- a/pandas/tests/indexes/timedeltas/test_ops.py +++ b/pandas/tests/indexes/timedeltas/test_ops.py @@ -60,7 +60,7 @@ def test_minmax(self): # non-monotonic idx2 = TimedeltaIndex(['1 days', np.nan, '3 days', 'NaT']) - self.assertFalse(idx2.is_monotonic) + assert not idx2.is_monotonic for idx in [idx1, idx2]: self.assertEqual(idx.min(), Timedelta('1 days')), @@ -828,7 +828,7 @@ def test_nat(self): self.assertTrue(idx._can_hold_na) tm.assert_numpy_array_equal(idx._isnan, np.array([False, False])) - self.assertFalse(idx.hasnans) + assert not idx.hasnans tm.assert_numpy_array_equal(idx._nan_idxs, np.array([], dtype=np.intp)) @@ -848,17 +848,17 @@ def test_equals(self): self.assertTrue(idx.equals(idx.asobject)) self.assertTrue(idx.asobject.equals(idx)) self.assertTrue(idx.asobject.equals(idx.asobject)) - self.assertFalse(idx.equals(list(idx))) - self.assertFalse(idx.equals(pd.Series(idx))) + assert not idx.equals(list(idx)) + assert not idx.equals(pd.Series(idx)) idx2 = pd.TimedeltaIndex(['2 days', '1 days', 'NaT']) - self.assertFalse(idx.equals(idx2)) - self.assertFalse(idx.equals(idx2.copy())) - self.assertFalse(idx.equals(idx2.asobject)) - self.assertFalse(idx.asobject.equals(idx2)) - self.assertFalse(idx.asobject.equals(idx2.asobject)) - self.assertFalse(idx.equals(list(idx2))) - self.assertFalse(idx.equals(pd.Series(idx2))) + assert not idx.equals(idx2) + assert not idx.equals(idx2.copy()) + assert not idx.equals(idx2.asobject) + assert not idx.asobject.equals(idx2) + assert not idx.asobject.equals(idx2.asobject) + assert not idx.equals(list(idx2)) + assert not idx.equals(pd.Series(idx2)) class TestTimedeltas(tm.TestCase): diff --git a/pandas/tests/indexes/timedeltas/test_timedelta.py b/pandas/tests/indexes/timedeltas/test_timedelta.py index b5bdf031180ec..c90c61170ca93 100644 --- a/pandas/tests/indexes/timedeltas/test_timedelta.py +++ b/pandas/tests/indexes/timedeltas/test_timedelta.py @@ -346,10 +346,10 @@ def test_misc_coverage(self): assert isinstance(list(result.values())[0][0], Timedelta) idx = TimedeltaIndex(['3d', '1d', '2d']) - self.assertFalse(idx.equals(list(idx))) + assert not idx.equals(list(idx)) non_td = Index(list('abc')) - self.assertFalse(idx.equals(list(non_td))) + assert not idx.equals(list(non_td)) def test_map(self): diff --git a/pandas/tests/indexing/test_floats.py b/pandas/tests/indexing/test_floats.py index bdee41acbc8fd..498604aaac853 100644 --- a/pandas/tests/indexing/test_floats.py +++ b/pandas/tests/indexing/test_floats.py @@ -102,7 +102,7 @@ def f(): pytest.raises(error, f) # contains - self.assertFalse(3.0 in s) + assert 3.0 not in s # setting with a float fails with iloc def f(): diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index f8a7c57ad5061..d0f089f0804c3 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -438,7 +438,7 @@ def test_string_slice(self): df.loc['2011', 0] df = pd.DataFrame() - self.assertFalse(df.index.is_all_dates) + assert not df.index.is_all_dates with pytest.raises(KeyError): df['2011'] diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index b2a5e6147cd28..862a6e6326ddd 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -588,7 +588,7 @@ def gen_expected(df, mask): df.take(mask[1:], convert=False)]) df = gen_test(900, 100) - self.assertFalse(df.index.is_unique) + assert not df.index.is_unique mask = np.arange(100) result = df.loc[mask] @@ -596,7 +596,7 @@ def gen_expected(df, mask): tm.assert_frame_equal(result, expected) df = gen_test(900000, 100000) - self.assertFalse(df.index.is_unique) + assert not df.index.is_unique mask = np.arange(100000) result = df.loc[mask] diff --git a/pandas/tests/indexing/test_multiindex.py b/pandas/tests/indexing/test_multiindex.py index a85c6bb446140..dbd0f5a9e6e1c 100644 --- a/pandas/tests/indexing/test_multiindex.py +++ b/pandas/tests/indexing/test_multiindex.py @@ -816,7 +816,7 @@ def test_multiindex_slicers_non_unique(self): C=[1, 2, 1, 3], D=[1, 2, 3, 4])) .set_index(['A', 'B', 'C']).sort_index()) - self.assertFalse(df.index.is_unique) + assert not df.index.is_unique expected = (DataFrame(dict(A=['foo', 'foo'], B=['a', 'a'], C=[1, 1], D=[1, 3])) .set_index(['A', 'B', 'C']).sort_index()) @@ -832,12 +832,12 @@ def test_multiindex_slicers_non_unique(self): C=[1, 2, 1, 2], D=[1, 2, 3, 4])) .set_index(['A', 'B', 'C']).sort_index()) - self.assertFalse(df.index.is_unique) + assert not df.index.is_unique expected = (DataFrame(dict(A=['foo', 'foo'], B=['a', 'a'], C=[1, 1], D=[1, 3])) .set_index(['A', 'B', 'C']).sort_index()) result = df.loc[(slice(None), slice(None), 1), :] - self.assertFalse(result.index.is_unique) + assert not result.index.is_unique tm.assert_frame_equal(result, expected) # GH12896 diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index 35a71efbbf5ba..ccc1372495106 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -252,20 +252,20 @@ def test_expand_frame_repr(self): 'display.max_rows', 20, 'display.show_dimensions', True): with option_context('display.expand_frame_repr', True): - self.assertFalse(has_truncated_repr(df_small)) - self.assertFalse(has_expanded_repr(df_small)) - self.assertFalse(has_truncated_repr(df_wide)) + assert not has_truncated_repr(df_small) + assert not has_expanded_repr(df_small) + assert not has_truncated_repr(df_wide) self.assertTrue(has_expanded_repr(df_wide)) self.assertTrue(has_vertically_truncated_repr(df_tall)) self.assertTrue(has_expanded_repr(df_tall)) with option_context('display.expand_frame_repr', False): - self.assertFalse(has_truncated_repr(df_small)) - self.assertFalse(has_expanded_repr(df_small)) - self.assertFalse(has_horizontally_truncated_repr(df_wide)) - self.assertFalse(has_expanded_repr(df_wide)) + assert not has_truncated_repr(df_small) + assert not has_expanded_repr(df_small) + assert not has_horizontally_truncated_repr(df_wide) + assert not has_expanded_repr(df_wide) self.assertTrue(has_vertically_truncated_repr(df_tall)) - self.assertFalse(has_expanded_repr(df_tall)) + assert not has_expanded_repr(df_tall) def test_repr_non_interactive(self): # in non interactive mode, there can be no dependency on the @@ -274,8 +274,8 @@ def test_repr_non_interactive(self): with option_context('mode.sim_interactive', False, 'display.width', 0, 'display.height', 0, 'display.max_rows', 5000): - self.assertFalse(has_truncated_repr(df)) - self.assertFalse(has_expanded_repr(df)) + assert not has_truncated_repr(df) + assert not has_expanded_repr(df) def test_repr_max_columns_max_rows(self): term_width, term_height = get_terminal_size() @@ -293,29 +293,29 @@ def mkframe(n): with option_context('display.width', term_width * 2): with option_context('display.max_rows', 5, 'display.max_columns', 5): - self.assertFalse(has_expanded_repr(mkframe(4))) - self.assertFalse(has_expanded_repr(mkframe(5))) - self.assertFalse(has_expanded_repr(df6)) + assert not has_expanded_repr(mkframe(4)) + assert not has_expanded_repr(mkframe(5)) + assert not has_expanded_repr(df6) self.assertTrue(has_doubly_truncated_repr(df6)) with option_context('display.max_rows', 20, 'display.max_columns', 10): # Out off max_columns boundary, but no extending # since not exceeding width - self.assertFalse(has_expanded_repr(df6)) - self.assertFalse(has_truncated_repr(df6)) + assert not has_expanded_repr(df6) + assert not has_truncated_repr(df6) with option_context('display.max_rows', 9, 'display.max_columns', 10): # out vertical bounds can not result in exanded repr - self.assertFalse(has_expanded_repr(df10)) + assert not has_expanded_repr(df10) self.assertTrue(has_vertically_truncated_repr(df10)) # width=None in terminal, auto detection with option_context('display.max_columns', 100, 'display.max_rows', term_width * 20, 'display.width', None): df = mkframe((term_width // 7) - 2) - self.assertFalse(has_expanded_repr(df)) + assert not has_expanded_repr(df) df = mkframe((term_width // 7) + 2) printing.pprint_thing(df._repr_fits_horizontal_()) self.assertTrue(has_expanded_repr(df)) @@ -755,14 +755,14 @@ def test_to_string_truncate_indices(self): self.assertTrue( has_vertically_truncated_repr(df)) else: - self.assertFalse( - has_vertically_truncated_repr(df)) + assert not has_vertically_truncated_repr( + df) with option_context("display.max_columns", 15): if w == 20: self.assertTrue( has_horizontally_truncated_repr(df)) else: - self.assertFalse( + assert not ( has_horizontally_truncated_repr(df)) with option_context("display.max_rows", 15, "display.max_columns", 15): @@ -770,8 +770,8 @@ def test_to_string_truncate_indices(self): self.assertTrue(has_doubly_truncated_repr( df)) else: - self.assertFalse(has_doubly_truncated_repr( - df)) + assert not has_doubly_truncated_repr( + df) def test_to_string_truncate_multilevel(self): arrays = [['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'], @@ -802,7 +802,7 @@ def test_truncate_with_different_dtypes(self): 'display.max_columns', 3): result = str(df) self.assertTrue('None' in result) - self.assertFalse('NaN' in result) + assert 'NaN' not in result def test_datetimelike_frame(self): @@ -1358,8 +1358,8 @@ def test_show_dimensions(self): with option_context('display.max_rows', 10, 'display.max_columns', 40, 'display.width', 500, 'display.expand_frame_repr', 'info', 'display.show_dimensions', False): - self.assertFalse('5 rows' in str(df)) - self.assertFalse('5 rows' in df._repr_html_()) + assert '5 rows' not in str(df) + assert '5 rows' not in df._repr_html_() with option_context('display.max_rows', 2, 'display.max_columns', 2, 'display.width', 500, 'display.expand_frame_repr', 'info', 'display.show_dimensions', 'truncate'): @@ -1368,8 +1368,8 @@ def test_show_dimensions(self): with option_context('display.max_rows', 10, 'display.max_columns', 40, 'display.width', 500, 'display.expand_frame_repr', 'info', 'display.show_dimensions', 'truncate'): - self.assertFalse('5 rows' in str(df)) - self.assertFalse('5 rows' in df._repr_html_()) + assert '5 rows' not in str(df) + assert '5 rows' not in df._repr_html_() def test_repr_html(self): self.frame._repr_html_() @@ -1386,7 +1386,7 @@ def test_repr_html(self): fmt.set_option('display.show_dimensions', True) self.assertTrue('2 rows' in df._repr_html_()) fmt.set_option('display.show_dimensions', False) - self.assertFalse('2 rows' in df._repr_html_()) + assert '2 rows' not in df._repr_html_() tm.reset_display_options() @@ -1518,7 +1518,7 @@ def test_info_repr_max_cols(self): with option_context('display.large_repr', 'info', 'display.max_columns', 1, 'display.max_info_columns', 5): - self.assertFalse(has_non_verbose_info_repr(df)) + assert not has_non_verbose_info_repr(df) # test verbose overrides # fmt.set_option('display.max_info_columns', 4) # exceeded diff --git a/pandas/tests/io/formats/test_to_html.py b/pandas/tests/io/formats/test_to_html.py index 28c6a0e95e0f1..a67bb2fd8eb5c 100644 --- a/pandas/tests/io/formats/test_to_html.py +++ b/pandas/tests/io/formats/test_to_html.py @@ -1461,7 +1461,7 @@ def test_to_html_filename(self): def test_to_html_with_no_bold(self): x = DataFrame({'x': np.random.randn(5)}) ashtml = x.to_html(bold_rows=False) - self.assertFalse('")]) + assert '")] def test_to_html_columns_arg(self): frame = DataFrame(tm.getSeriesData()) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 0dfae0fb88bf6..ac9e4f77db6ac 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -415,7 +415,7 @@ def test_frame_to_json_except(self): def test_frame_empty(self): df = DataFrame(columns=['jim', 'joe']) - self.assertFalse(df._is_mixed_type) + assert not df._is_mixed_type assert_frame_equal(read_json(df.to_json(), dtype=dict(df.dtypes)), df, check_index_type=False) # GH 7445 diff --git a/pandas/tests/io/parser/common.py b/pandas/tests/io/parser/common.py index 9abd3c5bfe993..afb23f540264e 100644 --- a/pandas/tests/io/parser/common.py +++ b/pandas/tests/io/parser/common.py @@ -113,7 +113,7 @@ def test_squeeze_no_view(self): # Series should not be a view data = """time,data\n0,10\n1,11\n2,12\n4,14\n5,15\n3,13""" result = self.read_csv(StringIO(data), index_col='time', squeeze=True) - self.assertFalse(result._is_view) + assert not result._is_view def test_malformed(self): # see gh-6607 @@ -1656,11 +1656,11 @@ def test_file_handles(self): fh = StringIO('a,b\n1,2') self.read_csv(fh) - self.assertFalse(fh.closed) + assert not fh.closed with open(self.csv1, 'r') as f: self.read_csv(f) - self.assertFalse(f.closed) + assert not f.closed # mmap not working with python engine if self.engine != 'python': @@ -1671,7 +1671,7 @@ def test_file_handles(self): self.read_csv(m) # closed attribute new in python 3.2 if PY3: - self.assertFalse(m.closed) + assert not m.closed m.close() def test_invalid_file_buffer(self): diff --git a/pandas/tests/io/parser/test_network.py b/pandas/tests/io/parser/test_network.py index 4a8d2e997ee06..b9920983856d4 100644 --- a/pandas/tests/io/parser/test_network.py +++ b/pandas/tests/io/parser/test_network.py @@ -61,14 +61,14 @@ def test_parse_public_s3_bucket(self): df = read_csv('s3://pandas-test/tips.csv' + ext, compression=comp) self.assertTrue(isinstance(df, DataFrame)) - self.assertFalse(df.empty) + assert not df.empty tm.assert_frame_equal(read_csv( tm.get_data_path('tips.csv')), df) # Read public file from bucket with not-public contents df = read_csv('s3://cant_get_it/tips.csv') self.assertTrue(isinstance(df, DataFrame)) - self.assertFalse(df.empty) + assert not df.empty tm.assert_frame_equal(read_csv(tm.get_data_path('tips.csv')), df) @tm.network @@ -76,7 +76,7 @@ def test_parse_public_s3n_bucket(self): # Read from AWS s3 as "s3n" URL df = read_csv('s3n://pandas-test/tips.csv', nrows=10) self.assertTrue(isinstance(df, DataFrame)) - self.assertFalse(df.empty) + assert not df.empty tm.assert_frame_equal(read_csv( tm.get_data_path('tips.csv')).iloc[:10], df) @@ -85,7 +85,7 @@ def test_parse_public_s3a_bucket(self): # Read from AWS s3 as "s3a" URL df = read_csv('s3a://pandas-test/tips.csv', nrows=10) self.assertTrue(isinstance(df, DataFrame)) - self.assertFalse(df.empty) + assert not df.empty tm.assert_frame_equal(read_csv( tm.get_data_path('tips.csv')).iloc[:10], df) @@ -95,7 +95,7 @@ def test_parse_public_s3_bucket_nrows(self): df = read_csv('s3://pandas-test/tips.csv' + ext, nrows=10, compression=comp) self.assertTrue(isinstance(df, DataFrame)) - self.assertFalse(df.empty) + assert not df.empty tm.assert_frame_equal(read_csv( tm.get_data_path('tips.csv')).iloc[:10], df) @@ -113,7 +113,7 @@ def test_parse_public_s3_bucket_chunked(self): # properly. df = df_reader.get_chunk() self.assertTrue(isinstance(df, DataFrame)) - self.assertFalse(df.empty) + assert not df.empty true_df = local_tips.iloc[ chunksize * i_chunk: chunksize * (i_chunk + 1)] tm.assert_frame_equal(true_df, df) @@ -132,7 +132,7 @@ def test_parse_public_s3_bucket_chunked_python(self): # Read a couple of chunks and make sure we see them properly. df = df_reader.get_chunk() self.assertTrue(isinstance(df, DataFrame)) - self.assertFalse(df.empty) + assert not df.empty true_df = local_tips.iloc[ chunksize * i_chunk: chunksize * (i_chunk + 1)] tm.assert_frame_equal(true_df, df) @@ -143,7 +143,7 @@ def test_parse_public_s3_bucket_python(self): df = read_csv('s3://pandas-test/tips.csv' + ext, engine='python', compression=comp) self.assertTrue(isinstance(df, DataFrame)) - self.assertFalse(df.empty) + assert not df.empty tm.assert_frame_equal(read_csv( tm.get_data_path('tips.csv')), df) @@ -153,7 +153,7 @@ def test_infer_s3_compression(self): df = read_csv('s3://pandas-test/tips.csv' + ext, engine='python', compression='infer') self.assertTrue(isinstance(df, DataFrame)) - self.assertFalse(df.empty) + assert not df.empty tm.assert_frame_equal(read_csv( tm.get_data_path('tips.csv')), df) @@ -163,7 +163,7 @@ def test_parse_public_s3_bucket_nrows_python(self): df = read_csv('s3://pandas-test/tips.csv' + ext, engine='python', nrows=10, compression=comp) self.assertTrue(isinstance(df, DataFrame)) - self.assertFalse(df.empty) + assert not df.empty tm.assert_frame_equal(read_csv( tm.get_data_path('tips.csv')).iloc[:10], df) diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py index 82819b94413b4..700915b81dd31 100644 --- a/pandas/tests/io/test_common.py +++ b/pandas/tests/io/test_common.py @@ -129,7 +129,7 @@ def test_get_attr(self): for attr in attrs: self.assertTrue(hasattr(wrapper, attr)) - self.assertFalse(hasattr(wrapper, 'foo')) + assert not hasattr(wrapper, 'foo') def test_next(self): with open(self.mmap_file, 'r') as target: diff --git a/pandas/tests/io/test_html.py b/pandas/tests/io/test_html.py index 5a30ff2afe7e5..cf08754a18527 100644 --- a/pandas/tests/io/test_html.py +++ b/pandas/tests/io/test_html.py @@ -168,7 +168,7 @@ def test_banklist_no_match(self): def test_spam_header(self): df = self.read_html(self.spam_data, '.*Water.*', header=1)[0] self.assertEqual(df.columns[0], 'Proximates') - self.assertFalse(df.empty) + assert not df.empty def test_skiprows_int(self): df1 = self.read_html(self.spam_data, '.*Water.*', skiprows=1) @@ -378,7 +378,7 @@ def test_thousands_macau_stats(self): attrs={'class': 'style1'}) df = dfs[all_non_nan_table_index] - self.assertFalse(any(s.isnull().any() for _, s in df.iteritems())) + assert not any(s.isnull().any() for _, s in df.iteritems()) @tm.slow def test_thousands_macau_index_col(self): @@ -387,7 +387,7 @@ def test_thousands_macau_index_col(self): dfs = self.read_html(macau_data, index_col=0, header=0) df = dfs[all_non_nan_table_index] - self.assertFalse(any(s.isnull().any() for _, s in df.iteritems())) + assert not any(s.isnull().any() for _, s in df.iteritems()) def test_empty_tables(self): """ diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index 1b656e7b1b004..6e7fca9a29e98 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -332,7 +332,7 @@ def test_api_default_format(self): pandas.set_option('io.hdf.default_format', 'fixed') _maybe_remove(store, 'df') store.put('df', df) - self.assertFalse(store.get_storer('df').is_table) + assert not store.get_storer('df').is_table pytest.raises(ValueError, store.append, 'df2', df) pandas.set_option('io.hdf.default_format', 'table') @@ -352,7 +352,7 @@ def test_api_default_format(self): pandas.set_option('io.hdf.default_format', 'fixed') df.to_hdf(path, 'df') with HDFStore(path) as store: - self.assertFalse(store.get_storer('df').is_table) + assert not store.get_storer('df').is_table pytest.raises(ValueError, df.to_hdf, path, 'df2', append=True) pandas.set_option('io.hdf.default_format', 'table') @@ -545,14 +545,14 @@ def test_reopen_handle(self): # invalid mode change pytest.raises(PossibleDataLossError, store.open, 'w') store.close() - self.assertFalse(store.is_open) + assert not store.is_open # truncation ok here store.open('w') self.assertTrue(store.is_open) self.assertEqual(len(store), 0) store.close() - self.assertFalse(store.is_open) + assert not store.is_open store = HDFStore(path, mode='a') store['a'] = tm.makeTimeSeries() @@ -563,7 +563,7 @@ def test_reopen_handle(self): self.assertEqual(len(store), 1) self.assertEqual(store._mode, 'r') store.close() - self.assertFalse(store.is_open) + assert not store.is_open # reopen as append store.open('a') @@ -571,7 +571,7 @@ def test_reopen_handle(self): self.assertEqual(len(store), 1) self.assertEqual(store._mode, 'a') store.close() - self.assertFalse(store.is_open) + assert not store.is_open # reopen as append (again) store.open('a') @@ -579,7 +579,7 @@ def test_reopen_handle(self): self.assertEqual(len(store), 1) self.assertEqual(store._mode, 'a') store.close() - self.assertFalse(store.is_open) + assert not store.is_open def test_open_args(self): @@ -599,7 +599,7 @@ def test_open_args(self): store.close() # the file should not have actually been written - self.assertFalse(os.path.exists(path)) + assert not os.path.exists(path) def test_flush(self): diff --git a/pandas/tests/io/test_s3.py b/pandas/tests/io/test_s3.py index 2983fa647445c..cff8eef74a607 100644 --- a/pandas/tests/io/test_s3.py +++ b/pandas/tests/io/test_s3.py @@ -7,4 +7,4 @@ class TestS3URL(tm.TestCase): def test_is_s3_url(self): self.assertTrue(_is_s3_url("s3://pandas/somethingelse.com")) - self.assertFalse(_is_s3_url("s4://pandas/somethingelse.com")) + assert not _is_s3_url("s4://pandas/somethingelse.com") diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 36ff3bdbb24b5..0930d99ea5c30 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -625,9 +625,7 @@ def test_date_parsing(self): # Test date parsing in read_sq # No Parsing df = sql.read_sql_query("SELECT * FROM types_test_data", self.conn) - self.assertFalse( - issubclass(df.DateCol.dtype.type, np.datetime64), - "DateCol loaded with incorrect type") + assert not issubclass(df.DateCol.dtype.type, np.datetime64) df = sql.read_sql_query("SELECT * FROM types_test_data", self.conn, parse_dates=['DateCol']) @@ -1230,8 +1228,7 @@ def test_drop_table(self): pandasSQL.drop_table('temp_frame') - self.assertFalse( - temp_conn.has_table('temp_frame'), 'Table not deleted from DB') + assert not temp_conn.has_table('temp_frame') def test_roundtrip(self): self._roundtrip() @@ -1727,8 +1724,7 @@ def test_default_date_load(self): df = sql.read_sql_table("types_test_data", self.conn) # IMPORTANT - sqlite has no native date type, so shouldn't parse, but - self.assertFalse(issubclass(df.DateCol.dtype.type, np.datetime64), - "DateCol loaded with incorrect type") + assert not issubclass(df.DateCol.dtype.type, np.datetime64) def test_bigint_warning(self): # test no warning for BIGINT (to support int64) is raised (GH7433) @@ -1988,8 +1984,7 @@ def test_create_and_drop_table(self): self.pandasSQL.drop_table('drop_test_frame') - self.assertFalse(self.pandasSQL.has_table('drop_test_frame'), - 'Table not deleted from DB') + assert not self.pandasSQL.has_table('drop_test_frame') def test_roundtrip(self): self._roundtrip() diff --git a/pandas/tests/plotting/common.py b/pandas/tests/plotting/common.py index 22f471a01b9d2..35625670f0641 100644 --- a/pandas/tests/plotting/common.py +++ b/pandas/tests/plotting/common.py @@ -491,13 +491,13 @@ def is_grid_on(): spndx += 1 mpl.rc('axes', grid=False) obj.plot(kind=kind, **kws) - self.assertFalse(is_grid_on()) + assert not is_grid_on() self.plt.subplot(1, 4 * len(kinds), spndx) spndx += 1 mpl.rc('axes', grid=True) obj.plot(kind=kind, grid=False, **kws) - self.assertFalse(is_grid_on()) + assert not is_grid_on() if kind != 'pie': self.plt.subplot(1, 4 * len(kinds), spndx) diff --git a/pandas/tests/plotting/test_datetimelike.py b/pandas/tests/plotting/test_datetimelike.py index f0a56592158d3..7534d9363f267 100644 --- a/pandas/tests/plotting/test_datetimelike.py +++ b/pandas/tests/plotting/test_datetimelike.py @@ -223,7 +223,7 @@ def test_fake_inferred_business(self): ts = Series(lrange(len(rng)), rng) ts = ts[:3].append(ts[5:]) ax = ts.plot() - self.assertFalse(hasattr(ax, 'freq')) + assert not hasattr(ax, 'freq') @slow def test_plot_offset_freq(self): @@ -334,7 +334,7 @@ def test_nonzero_base(self): df = DataFrame(np.arange(24), index=idx) ax = df.plot() rs = ax.get_lines()[0].get_xdata() - self.assertFalse(Index(rs).is_normalized) + assert not Index(rs).is_normalized def test_dataframe(self): bts = DataFrame({'a': tm.makeTimeSeries()}) @@ -568,14 +568,14 @@ def test_secondary_y(self): ser2 = Series(np.random.randn(10)) ax = ser.plot(secondary_y=True) self.assertTrue(hasattr(ax, 'left_ax')) - self.assertFalse(hasattr(ax, 'right_ax')) + assert not hasattr(ax, 'right_ax') fig = ax.get_figure() axes = fig.get_axes() l = ax.get_lines()[0] xp = Series(l.get_ydata(), l.get_xdata()) assert_series_equal(ser, xp) self.assertEqual(ax.get_yaxis().get_ticks_position(), 'right') - self.assertFalse(axes[0].get_yaxis().get_visible()) + assert not axes[0].get_yaxis().get_visible() plt.close(fig) ax2 = ser2.plot() @@ -586,10 +586,10 @@ def test_secondary_y(self): ax = ser2.plot() ax2 = ser.plot(secondary_y=True) self.assertTrue(ax.get_yaxis().get_visible()) - self.assertFalse(hasattr(ax, 'left_ax')) + assert not hasattr(ax, 'left_ax') self.assertTrue(hasattr(ax, 'right_ax')) self.assertTrue(hasattr(ax2, 'left_ax')) - self.assertFalse(hasattr(ax2, 'right_ax')) + assert not hasattr(ax2, 'right_ax') @slow def test_secondary_y_ts(self): @@ -599,14 +599,14 @@ def test_secondary_y_ts(self): ser2 = Series(np.random.randn(10), idx) ax = ser.plot(secondary_y=True) self.assertTrue(hasattr(ax, 'left_ax')) - self.assertFalse(hasattr(ax, 'right_ax')) + assert not hasattr(ax, 'right_ax') fig = ax.get_figure() axes = fig.get_axes() l = ax.get_lines()[0] xp = Series(l.get_ydata(), l.get_xdata()).to_timestamp() assert_series_equal(ser, xp) self.assertEqual(ax.get_yaxis().get_ticks_position(), 'right') - self.assertFalse(axes[0].get_yaxis().get_visible()) + assert not axes[0].get_yaxis().get_visible() plt.close(fig) ax2 = ser2.plot() @@ -627,7 +627,7 @@ def test_secondary_kde(self): ser = Series(np.random.randn(10)) ax = ser.plot(secondary_y=True, kind='density') self.assertTrue(hasattr(ax, 'left_ax')) - self.assertFalse(hasattr(ax, 'right_ax')) + assert not hasattr(ax, 'right_ax') fig = ax.get_figure() axes = fig.get_axes() self.assertEqual(axes[1].get_yaxis().get_ticks_position(), 'right') @@ -684,7 +684,7 @@ def test_mixed_freq_irregular_first(self): s2 = s1[[0, 5, 10, 11, 12, 13, 14, 15]] s2.plot(style='g') ax = s1.plot() - self.assertFalse(hasattr(ax, 'freq')) + assert not hasattr(ax, 'freq') lines = ax.get_lines() x1 = lines[0].get_xdata() tm.assert_numpy_array_equal(x1, s2.index.asobject.values) @@ -716,7 +716,7 @@ def test_mixed_freq_irregular_first_df(self): s2 = s1.iloc[[0, 5, 10, 11, 12, 13, 14, 15], :] ax = s2.plot(style='g') ax = s1.plot(ax=ax) - self.assertFalse(hasattr(ax, 'freq')) + assert not hasattr(ax, 'freq') lines = ax.get_lines() x1 = lines[0].get_xdata() tm.assert_numpy_array_equal(x1, s2.index.asobject.values) @@ -1049,7 +1049,7 @@ def test_secondary_upsample(self): for l in ax.get_lines(): self.assertEqual(PeriodIndex(l.get_xdata()).freq, 'D') self.assertTrue(hasattr(ax, 'left_ax')) - self.assertFalse(hasattr(ax, 'right_ax')) + assert not hasattr(ax, 'right_ax') for l in ax.left_ax.get_lines(): self.assertEqual(PeriodIndex(l.get_xdata()).freq, 'D') diff --git a/pandas/tests/plotting/test_frame.py b/pandas/tests/plotting/test_frame.py index c72bce28b5862..c5b43cd1a300b 100644 --- a/pandas/tests/plotting/test_frame.py +++ b/pandas/tests/plotting/test_frame.py @@ -664,7 +664,7 @@ def test_line_lim(self): self._check_axes_shape(axes, axes_num=3, layout=(3, 1)) for ax in axes: self.assertTrue(hasattr(ax, 'left_ax')) - self.assertFalse(hasattr(ax, 'right_ax')) + assert not hasattr(ax, 'right_ax') xmin, xmax = ax.get_xlim() lines = ax.get_lines() self.assertEqual(xmin, lines[0].get_data()[0][0]) diff --git a/pandas/tests/plotting/test_hist_method.py b/pandas/tests/plotting/test_hist_method.py index 79d5f74e6ea06..a77c1edd258e3 100644 --- a/pandas/tests/plotting/test_hist_method.py +++ b/pandas/tests/plotting/test_hist_method.py @@ -154,7 +154,7 @@ def test_hist_df_legacy(self): with tm.assert_produces_warning(UserWarning): axes = _check_plot_works(df.hist, grid=False) self._check_axes_shape(axes, axes_num=3, layout=(2, 2)) - self.assertFalse(axes[1, 1].get_visible()) + assert not axes[1, 1].get_visible() df = DataFrame(randn(100, 1)) _check_plot_works(df.hist) @@ -398,8 +398,8 @@ def test_axis_share_x(self): self.assertTrue(ax2._shared_x_axes.joined(ax1, ax2)) # don't share y - self.assertFalse(ax1._shared_y_axes.joined(ax1, ax2)) - self.assertFalse(ax2._shared_y_axes.joined(ax1, ax2)) + assert not ax1._shared_y_axes.joined(ax1, ax2) + assert not ax2._shared_y_axes.joined(ax1, ax2) @slow def test_axis_share_y(self): @@ -411,8 +411,8 @@ def test_axis_share_y(self): self.assertTrue(ax2._shared_y_axes.joined(ax1, ax2)) # don't share x - self.assertFalse(ax1._shared_x_axes.joined(ax1, ax2)) - self.assertFalse(ax2._shared_x_axes.joined(ax1, ax2)) + assert not ax1._shared_x_axes.joined(ax1, ax2) + assert not ax2._shared_x_axes.joined(ax1, ax2) @slow def test_axis_share_xy(self): diff --git a/pandas/tests/plotting/test_series.py b/pandas/tests/plotting/test_series.py index 38ce5f44b812f..b84e50c4ec827 100644 --- a/pandas/tests/plotting/test_series.py +++ b/pandas/tests/plotting/test_series.py @@ -454,7 +454,7 @@ def test_hist_secondary_legend(self): # left axis must be invisible, right axis must be visible self._check_legend_labels(ax.left_ax, labels=['a (right)', 'b (right)']) - self.assertFalse(ax.left_ax.get_yaxis().get_visible()) + assert not ax.left_ax.get_yaxis().get_visible() self.assertTrue(ax.get_yaxis().get_visible()) tm.close() @@ -502,7 +502,7 @@ def test_df_series_secondary_legend(self): # left axis must be invisible and right axis must be visible expected = ['a (right)', 'b (right)', 'c (right)', 'x (right)'] self._check_legend_labels(ax.left_ax, labels=expected) - self.assertFalse(ax.left_ax.get_yaxis().get_visible()) + assert not ax.left_ax.get_yaxis().get_visible() self.assertTrue(ax.get_yaxis().get_visible()) tm.close() @@ -513,7 +513,7 @@ def test_df_series_secondary_legend(self): # left axis must be invisible and right axis must be visible expected = ['a (right)', 'b (right)', 'c (right)', 'x (right)'] self._check_legend_labels(ax.left_ax, expected) - self.assertFalse(ax.left_ax.get_yaxis().get_visible()) + assert not ax.left_ax.get_yaxis().get_visible() self.assertTrue(ax.get_yaxis().get_visible()) tm.close() @@ -524,7 +524,7 @@ def test_df_series_secondary_legend(self): # left axis must be invisible and right axis must be visible expected = ['a', 'b', 'c', 'x (right)'] self._check_legend_labels(ax.left_ax, expected) - self.assertFalse(ax.left_ax.get_yaxis().get_visible()) + assert not ax.left_ax.get_yaxis().get_visible() self.assertTrue(ax.get_yaxis().get_visible()) tm.close() diff --git a/pandas/tests/reshape/test_hashing.py b/pandas/tests/reshape/test_hashing.py index cba70bba6823f..4857d3ac8310b 100644 --- a/pandas/tests/reshape/test_hashing.py +++ b/pandas/tests/reshape/test_hashing.py @@ -67,7 +67,7 @@ def check_not_equal_with_index(self, obj): a = hash_pandas_object(obj, index=True) b = hash_pandas_object(obj, index=False) if len(obj): - self.assertFalse((a == b).all()) + assert not (a == b).all() def test_hash_tuples(self): tups = [(1, 'one'), (1, 'two'), (2, 'one')] @@ -240,13 +240,13 @@ def test_same_len_hash_collisions(self): length = 2**(l + 8) + 1 s = tm.rands_array(length, 2) result = hash_array(s, 'utf8') - self.assertFalse(result[0] == result[1]) + assert not result[0] == result[1] for l in range(8): length = 2**(l + 8) s = tm.rands_array(length, 2) result = hash_array(s, 'utf8') - self.assertFalse(result[0] == result[1]) + assert not result[0] == result[1] def test_hash_collisions(self): diff --git a/pandas/tests/reshape/test_merge.py b/pandas/tests/reshape/test_merge.py index 73d0346546b97..80056b973a2fc 100644 --- a/pandas/tests/reshape/test_merge.py +++ b/pandas/tests/reshape/test_merge.py @@ -790,8 +790,8 @@ def run_asserts(left, right): res = left.join(right, on=icols, how='left', sort=sort) self.assertTrue(len(left) < len(res) + 1) - self.assertFalse(res['4th'].isnull().any()) - self.assertFalse(res['5th'].isnull().any()) + assert not res['4th'].isnull().any() + assert not res['5th'].isnull().any() tm.assert_series_equal( res['4th'], - res['5th'], check_names=False) diff --git a/pandas/tests/reshape/test_merge_asof.py b/pandas/tests/reshape/test_merge_asof.py index 0b5b580563741..f2aef409324f8 100644 --- a/pandas/tests/reshape/test_merge_asof.py +++ b/pandas/tests/reshape/test_merge_asof.py @@ -531,8 +531,8 @@ def test_non_sorted(self): quotes = self.quotes.sort_values('time', ascending=False) # we require that we are already sorted on time & quotes - self.assertFalse(trades.time.is_monotonic) - self.assertFalse(quotes.time.is_monotonic) + assert not trades.time.is_monotonic + assert not quotes.time.is_monotonic with pytest.raises(ValueError): merge_asof(trades, quotes, on='time', @@ -540,7 +540,7 @@ def test_non_sorted(self): trades = self.trades.sort_values('time') self.assertTrue(trades.time.is_monotonic) - self.assertFalse(quotes.time.is_monotonic) + assert not quotes.time.is_monotonic with pytest.raises(ValueError): merge_asof(trades, quotes, on='time', diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index f15616a16678f..416e729944d39 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -1494,7 +1494,7 @@ def test_isleapyear_deprecate(self): self.assertTrue(isleapyear(2000)) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - self.assertFalse(isleapyear(2001)) + assert not isleapyear(2001) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): self.assertTrue(isleapyear(2004)) diff --git a/pandas/tests/scalar/test_period.py b/pandas/tests/scalar/test_period.py index b5c2439524e34..c8f3833c2c964 100644 --- a/pandas/tests/scalar/test_period.py +++ b/pandas/tests/scalar/test_period.py @@ -25,13 +25,13 @@ def test_is_leap_year(self): assert isinstance(p.is_leap_year, bool) p = Period('1999-01-01 00:00:00', freq=freq) - self.assertFalse(p.is_leap_year) + assert not p.is_leap_year p = Period('2004-01-01 00:00:00', freq=freq) self.assertTrue(p.is_leap_year) p = Period('2100-01-01 00:00:00', freq=freq) - self.assertFalse(p.is_leap_year) + assert not p.is_leap_year def test_quarterly_negative_ordinals(self): p = Period(ordinal=-1, freq='Q-DEC') diff --git a/pandas/tests/scalar/test_timedelta.py b/pandas/tests/scalar/test_timedelta.py index 86b02d20b6996..788c204ca3eb3 100644 --- a/pandas/tests/scalar/test_timedelta.py +++ b/pandas/tests/scalar/test_timedelta.py @@ -451,7 +451,7 @@ def test_contains(self): # GH 13603 td = to_timedelta(range(5), unit='d') + pd.offsets.Hour(1) for v in [pd.NaT, None, float('nan'), np.nan]: - self.assertFalse((v in td)) + assert not (v in td) td = to_timedelta([pd.NaT]) for v in [pd.NaT, None, float('nan'), np.nan]: @@ -658,7 +658,7 @@ def test_components(self): s[1] = np.nan result = s.dt.components - self.assertFalse(result.iloc[0].isnull().all()) + assert not result.iloc[0].isnull().all() self.assertTrue(result.iloc[1].isnull().all()) def test_isoformat(self): @@ -707,5 +707,5 @@ def test_ops_error_str(self): with pytest.raises(TypeError): l > r - self.assertFalse(l == r) + assert not l == r self.assertTrue(l != r) diff --git a/pandas/tests/scalar/test_timestamp.py b/pandas/tests/scalar/test_timestamp.py index bad0b697eef6c..cfc4cf93e720c 100644 --- a/pandas/tests/scalar/test_timestamp.py +++ b/pandas/tests/scalar/test_timestamp.py @@ -862,18 +862,18 @@ def test_comparison(self): val = Timestamp(stamp) self.assertEqual(val, val) - self.assertFalse(val != val) - self.assertFalse(val < val) + assert not val != val + assert not val < val self.assertTrue(val <= val) - self.assertFalse(val > val) + assert not val > val self.assertTrue(val >= val) other = datetime(2012, 5, 18) self.assertEqual(val, other) - self.assertFalse(val != other) - self.assertFalse(val < other) + assert not val != other + assert not val < other self.assertTrue(val <= other) - self.assertFalse(val > other) + assert not val > other self.assertTrue(val >= other) other = Timestamp(stamp + 100) @@ -889,14 +889,14 @@ def test_compare_invalid(self): # GH 8058 val = Timestamp('20130101 12:01:02') - self.assertFalse(val == 'foo') - self.assertFalse(val == 10.0) - self.assertFalse(val == 1) - self.assertFalse(val == long(1)) - self.assertFalse(val == []) - self.assertFalse(val == {'foo': 1}) - self.assertFalse(val == np.float64(1)) - self.assertFalse(val == np.int64(1)) + assert not val == 'foo' + assert not val == 10.0 + assert not val == 1 + assert not val == long(1) + assert not val == [] + assert not val == {'foo': 1} + assert not val == np.float64(1) + assert not val == np.int64(1) self.assertTrue(val != 'foo') self.assertTrue(val != 10.0) @@ -933,8 +933,8 @@ def test_cant_compare_tz_naive_w_aware(self): pytest.raises(Exception, a.__eq__, b.to_pydatetime()) pytest.raises(Exception, a.to_pydatetime().__eq__, b) else: - self.assertFalse(a == b.to_pydatetime()) - self.assertFalse(a.to_pydatetime() == b) + assert not a == b.to_pydatetime() + assert not a.to_pydatetime() == b def test_cant_compare_tz_naive_w_aware_explicit_pytz(self): tm._skip_if_no_pytz() @@ -956,8 +956,8 @@ def test_cant_compare_tz_naive_w_aware_explicit_pytz(self): pytest.raises(Exception, a.__eq__, b.to_pydatetime()) pytest.raises(Exception, a.to_pydatetime().__eq__, b) else: - self.assertFalse(a == b.to_pydatetime()) - self.assertFalse(a.to_pydatetime() == b) + assert not a == b.to_pydatetime() + assert not a.to_pydatetime() == b def test_cant_compare_tz_naive_w_aware_dateutil(self): tm._skip_if_no_dateutil() @@ -980,8 +980,8 @@ def test_cant_compare_tz_naive_w_aware_dateutil(self): pytest.raises(Exception, a.__eq__, b.to_pydatetime()) pytest.raises(Exception, a.to_pydatetime().__eq__, b) else: - self.assertFalse(a == b.to_pydatetime()) - self.assertFalse(a.to_pydatetime() == b) + assert not a == b.to_pydatetime() + assert not a.to_pydatetime() == b def test_delta_preserve_nanos(self): val = Timestamp(long(1337299200000000123)) @@ -1090,13 +1090,13 @@ def test_is_leap_year(self): assert isinstance(dt.is_leap_year, bool) dt = Timestamp('1999-01-01 00:00:00', tz=tz) - self.assertFalse(dt.is_leap_year) + assert not dt.is_leap_year dt = Timestamp('2004-01-01 00:00:00', tz=tz) self.assertTrue(dt.is_leap_year) dt = Timestamp('2100-01-01 00:00:00', tz=tz) - self.assertFalse(dt.is_leap_year) + assert not dt.is_leap_year class TestTimestampNsOperations(tm.TestCase): @@ -1383,9 +1383,9 @@ def test_timestamp_compare_with_early_datetime(self): # e.g. datetime.min stamp = Timestamp('2012-01-01') - self.assertFalse(stamp == datetime.min) - self.assertFalse(stamp == datetime(1600, 1, 1)) - self.assertFalse(stamp == datetime(2700, 1, 1)) + assert not stamp == datetime.min + assert not stamp == datetime(1600, 1, 1) + assert not stamp == datetime(2700, 1, 1) self.assertNotEqual(stamp, datetime.min) self.assertNotEqual(stamp, datetime(1600, 1, 1)) self.assertNotEqual(stamp, datetime(2700, 1, 1)) diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index eb8a83bb85847..f5bccdd55e944 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -646,7 +646,7 @@ def test_prod_numpy16_bug(self): def test_all_any(self): ts = tm.makeTimeSeries() bool_series = ts > 0 - self.assertFalse(bool_series.all()) + assert not bool_series.all() self.assertTrue(bool_series.any()) # Alternative types, with implicit 'object' dtype. @@ -660,7 +660,7 @@ def test_all_any_params(self): self.assertTrue(s1.all(skipna=False)) # nan && True => True self.assertTrue(s1.all(skipna=True)) self.assertTrue(np.isnan(s2.any(skipna=False))) # nan || False => nan - self.assertFalse(s2.any(skipna=True)) + assert not s2.any(skipna=True) # Check level. s = pd.Series([False, False, True, True, False, True], @@ -699,7 +699,7 @@ def test_modulo(self): p = p.astype('float64') result = p['first'] % p['second'] result2 = p['second'] % p['first'] - self.assertFalse(np.array_equal(result, result2)) + assert not np.array_equal(result, result2) # GH 9144 s = Series([0, 1]) @@ -1362,14 +1362,14 @@ def test_searchsorted_sorter(self): def test_is_unique(self): # GH11946 s = Series(np.random.randint(0, 10, size=1000)) - self.assertFalse(s.is_unique) + assert not s.is_unique s = Series(np.arange(1000)) self.assertTrue(s.is_unique) def test_is_monotonic(self): s = Series(np.random.randint(0, 10, size=1000)) - self.assertFalse(s.is_monotonic) + assert not s.is_monotonic s = Series(np.arange(1000)) self.assertTrue(s.is_monotonic) self.assertTrue(s.is_monotonic_increasing) @@ -1380,7 +1380,7 @@ def test_is_monotonic(self): self.assertTrue(s.is_monotonic) self.assertTrue(s.is_monotonic_increasing) s = Series(list(reversed(s.tolist()))) - self.assertFalse(s.is_monotonic) + assert not s.is_monotonic self.assertTrue(s.is_monotonic_decreasing) def test_sort_index_level(self): diff --git a/pandas/tests/series/test_api.py b/pandas/tests/series/test_api.py index 397058c4bb8ce..5b7ac9bc2b33c 100644 --- a/pandas/tests/series/test_api.py +++ b/pandas/tests/series/test_api.py @@ -216,7 +216,7 @@ def test_iteritems(self): self.assertEqual(val, self.ts[idx]) # assert is lazy (genrators don't define reverse, lists do) - self.assertFalse(hasattr(self.series.iteritems(), 'reverse')) + assert not hasattr(self.series.iteritems(), 'reverse') def test_raise_on_info(self): s = Series(np.random.randn(10)) @@ -239,7 +239,7 @@ def test_copy(self): if deep is None or deep is True: # Did not modify original Series self.assertTrue(np.isnan(s2[0])) - self.assertFalse(np.isnan(s[0])) + assert not np.isnan(s[0]) else: # we DID modify the original Series self.assertTrue(np.isnan(s2[0])) diff --git a/pandas/tests/series/test_asof.py b/pandas/tests/series/test_asof.py index 9c1e4626e1736..137390b6427eb 100644 --- a/pandas/tests/series/test_asof.py +++ b/pandas/tests/series/test_asof.py @@ -140,7 +140,7 @@ def test_errors(self): Timestamp('20130102')]) # non-monotonic - self.assertFalse(s.index.is_monotonic) + assert not s.index.is_monotonic with pytest.raises(ValueError): s.asof(s.index[0]) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index a870667ff3f96..b08653b0001ca 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -65,8 +65,8 @@ def test_constructor(self): self.assertEqual(mixed.dtype, np.object_) assert mixed[1] is np.NaN - self.assertFalse(self.empty.index.is_all_dates) - self.assertFalse(Series({}).index.is_all_dates) + assert not self.empty.index.is_all_dates + assert not Series({}).index.is_all_dates pytest.raises(Exception, Series, np.random.randn(3, 3), index=np.arange(3)) @@ -265,7 +265,7 @@ def test_constructor_copy(self): # changes to origin of copy does not affect the copy x[0] = 2. - self.assertFalse(x.equals(y)) + assert not x.equals(y) self.assertEqual(x[0], 2.) self.assertEqual(y[0], 1.) @@ -354,7 +354,7 @@ def test_constructor_dtype_datetime64(self): # in theory this should be all nulls, but since # we are not specifying a dtype is ambiguous s = Series(iNaT, index=lrange(5)) - self.assertFalse(isnull(s).all()) + assert not isnull(s).all() s = Series(nan, dtype='M8[ns]', index=lrange(5)) self.assertTrue(isnull(s).all()) diff --git a/pandas/tests/series/test_datetime_values.py b/pandas/tests/series/test_datetime_values.py index 74a4e37f0923a..c56a5baac12af 100644 --- a/pandas/tests/series/test_datetime_values.py +++ b/pandas/tests/series/test_datetime_values.py @@ -378,7 +378,7 @@ def test_dt_accessor_api(self): with tm.assert_raises_regex(AttributeError, "only use .dt accessor"): s.dt - self.assertFalse(hasattr(s, 'dt')) + assert not hasattr(s, 'dt') def test_sub_of_datetime_from_TimeSeries(self): from pandas.core.tools.timedeltas import to_timedelta diff --git a/pandas/tests/series/test_indexing.py b/pandas/tests/series/test_indexing.py index 6907cc194f0f0..601262df89260 100644 --- a/pandas/tests/series/test_indexing.py +++ b/pandas/tests/series/test_indexing.py @@ -728,7 +728,7 @@ def test_setitem(self): self.assertTrue(np.isnan(self.ts[6])) self.assertTrue(np.isnan(self.ts[2])) self.ts[np.isnan(self.ts)] = 5 - self.assertFalse(np.isnan(self.ts[2])) + assert not np.isnan(self.ts[2]) # caught this bug when writing tests series = Series(tm.makeIntIndex(20).astype(float), @@ -1514,21 +1514,21 @@ def test_where_numeric_with_string(self): s = pd.Series([1, 2, 3]) w = s.where(s > 1, 'X') - self.assertFalse(is_integer(w[0])) + assert not is_integer(w[0]) self.assertTrue(is_integer(w[1])) self.assertTrue(is_integer(w[2])) self.assertTrue(isinstance(w[0], str)) self.assertTrue(w.dtype == 'object') w = s.where(s > 1, ['X', 'Y', 'Z']) - self.assertFalse(is_integer(w[0])) + assert not is_integer(w[0]) self.assertTrue(is_integer(w[1])) self.assertTrue(is_integer(w[2])) self.assertTrue(isinstance(w[0], str)) self.assertTrue(w.dtype == 'object') w = s.where(s > 1, np.array(['X', 'Y', 'Z'])) - self.assertFalse(is_integer(w[0])) + assert not is_integer(w[0]) self.assertTrue(is_integer(w[1])) self.assertTrue(is_integer(w[2])) self.assertTrue(isinstance(w[0], str)) @@ -1716,7 +1716,7 @@ def test_underlying_data_conversion(self): def test_preserveRefs(self): seq = self.ts[[5, 10, 15]] seq[1] = np.NaN - self.assertFalse(np.isnan(self.ts[10])) + assert not np.isnan(self.ts[10]) def test_drop(self): @@ -1851,7 +1851,7 @@ def test_align_nocopy(self): a = self.ts.copy() ra, _ = a.align(b, join='left') ra[:5] = 5 - self.assertFalse((a[:5] == 5).any()) + assert not (a[:5] == 5).any() # do not copy a = self.ts.copy() @@ -1864,7 +1864,7 @@ def test_align_nocopy(self): b = self.ts[:5].copy() _, rb = a.align(b, join='right') rb[:3] = 5 - self.assertFalse((b[:3] == 5).any()) + assert not (b[:3] == 5).any() # do not copy a = self.ts.copy() @@ -1952,7 +1952,7 @@ def test_reindex(self): # return a copy the same index here result = self.ts.reindex() - self.assertFalse((result is self.ts)) + assert not (result is self.ts) def test_reindex_nan(self): ts = Series([2, 3, 5, 7], index=[1, 4, nan, 8]) @@ -1974,7 +1974,7 @@ def test_reindex_series_add_nat(self): mask = result.isnull() self.assertTrue(mask[-5:].all()) - self.assertFalse(mask[:-5].any()) + assert not mask[:-5].any() def test_reindex_with_datetimes(self): rng = date_range('1/1/2000', periods=20) @@ -2279,7 +2279,7 @@ def test_constructor(self): assert isinstance(self.dups.index, DatetimeIndex) def test_is_unique_monotonic(self): - self.assertFalse(self.dups.index.is_unique) + assert not self.dups.index.is_unique def test_index_unique(self): uniques = self.dups.index.unique() diff --git a/pandas/tests/series/test_missing.py b/pandas/tests/series/test_missing.py index e7c1b22216dcb..53c8c518eb3eb 100644 --- a/pandas/tests/series/test_missing.py +++ b/pandas/tests/series/test_missing.py @@ -487,19 +487,19 @@ def test_timedelta64_nan(self): self.assertTrue(isnull(td1[0])) self.assertEqual(td1[0].value, iNaT) td1[0] = td[0] - self.assertFalse(isnull(td1[0])) + assert not isnull(td1[0]) td1[1] = iNaT self.assertTrue(isnull(td1[1])) self.assertEqual(td1[1].value, iNaT) td1[1] = td[1] - self.assertFalse(isnull(td1[1])) + assert not isnull(td1[1]) td1[2] = NaT self.assertTrue(isnull(td1[2])) self.assertEqual(td1[2].value, iNaT) td1[2] = td[2] - self.assertFalse(isnull(td1[2])) + assert not isnull(td1[2]) # boolean setting # this doesn't work, not sure numpy even supports it @@ -552,7 +552,7 @@ def test_dropna_no_nan(self): result = s.dropna() tm.assert_series_equal(result, s) - self.assertFalse(result is s) + assert result is not s s2 = s.copy() s2.dropna(inplace=True) diff --git a/pandas/tests/series/test_operators.py b/pandas/tests/series/test_operators.py index 89ed7975e8017..eb840faac05e0 100644 --- a/pandas/tests/series/test_operators.py +++ b/pandas/tests/series/test_operators.py @@ -122,7 +122,7 @@ def test_div(self): assert_series_equal(result, p['first'].astype('float64'), check_names=False) self.assertTrue(result.name is None) - self.assertFalse(np.array_equal(result, p['second'] / p['first'])) + assert not np.array_equal(result, p['second'] / p['first']) # inf signing s = Series([np.nan, 1., -1.]) diff --git a/pandas/tests/series/test_repr.py b/pandas/tests/series/test_repr.py index b4ad90f6f35af..c92a82e287120 100644 --- a/pandas/tests/series/test_repr.py +++ b/pandas/tests/series/test_repr.py @@ -103,9 +103,9 @@ def test_repr(self): assert "Name: 0" in rep_str ser = Series(["a\n\r\tb"], name="a\n\r\td", index=["a\n\r\tf"]) - self.assertFalse("\t" in repr(ser)) - self.assertFalse("\r" in repr(ser)) - self.assertFalse("a\n" in repr(ser)) + assert "\t" not in repr(ser) + assert "\r" not in repr(ser) + assert "a\n" not in repr(ser) # with empty series (#4651) s = Series([], dtype=np.int64, name='foo') diff --git a/pandas/tests/sparse/test_array.py b/pandas/tests/sparse/test_array.py index bb6ff7a0c728f..33df4b5e59bc9 100644 --- a/pandas/tests/sparse/test_array.py +++ b/pandas/tests/sparse/test_array.py @@ -299,7 +299,7 @@ def test_constructor_from_sparse(self): def test_constructor_copy(self): cp = SparseArray(self.arr, copy=True) cp.sp_values[:3] = 0 - self.assertFalse((self.arr.sp_values[:3] == 0).any()) + assert not (self.arr.sp_values[:3] == 0).any() not_copy = SparseArray(self.arr) not_copy.sp_values[:3] = 0 @@ -323,11 +323,11 @@ def test_constructor_bool(self): def test_constructor_bool_fill_value(self): arr = SparseArray([True, False, True], dtype=None) self.assertEqual(arr.dtype, np.bool) - self.assertFalse(arr.fill_value) + assert not arr.fill_value arr = SparseArray([True, False, True], dtype=np.bool) self.assertEqual(arr.dtype, np.bool) - self.assertFalse(arr.fill_value) + assert not arr.fill_value arr = SparseArray([True, False, True], dtype=np.bool, fill_value=True) self.assertEqual(arr.dtype, np.bool) @@ -352,7 +352,7 @@ def test_constructor_float32(self): def test_astype(self): res = self.arr.astype('f8') res.sp_values[:3] = 27 - self.assertFalse((self.arr.sp_values[:3] == 27).any()) + assert not (self.arr.sp_values[:3] == 27).any() msg = "unable to coerce current fill_value nan to int64 dtype" with tm.assert_raises_regex(ValueError, msg): diff --git a/pandas/tests/sparse/test_libsparse.py b/pandas/tests/sparse/test_libsparse.py index 63ed11845a896..55115f45ff740 100644 --- a/pandas/tests/sparse/test_libsparse.py +++ b/pandas/tests/sparse/test_libsparse.py @@ -437,7 +437,7 @@ def test_equals(self): index = BlockIndex(10, [0, 4], [2, 5]) self.assertTrue(index.equals(index)) - self.assertFalse(index.equals(BlockIndex(10, [0, 4], [2, 6]))) + assert not index.equals(BlockIndex(10, [0, 4], [2, 6])) def test_check_integrity(self): locs = [] @@ -535,7 +535,7 @@ def test_int_internal(self): def test_equals(self): index = IntIndex(10, [0, 1, 2, 3, 4]) self.assertTrue(index.equals(index)) - self.assertFalse(index.equals(IntIndex(10, [0, 1, 2, 3]))) + assert not index.equals(IntIndex(10, [0, 1, 2, 3])) def test_to_block_index(self): diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py index e4f39197421a0..e058a62ea3089 100644 --- a/pandas/tests/test_base.py +++ b/pandas/tests/test_base.py @@ -277,8 +277,8 @@ def test_none_comparison(self): # noinspection PyComparisonWithNone result = o == None # noqa - self.assertFalse(result.iat[0]) - self.assertFalse(result.iat[1]) + assert not result.iat[0] + assert not result.iat[1] # noinspection PyComparisonWithNone result = o != None # noqa @@ -286,8 +286,8 @@ def test_none_comparison(self): self.assertTrue(result.iat[1]) result = None == o # noqa - self.assertFalse(result.iat[0]) - self.assertFalse(result.iat[1]) + assert not result.iat[0] + assert not result.iat[1] # this fails for numpy < 1.9 # and oddly for *some* platforms @@ -296,12 +296,12 @@ def test_none_comparison(self): # self.assertTrue(result.iat[1]) result = None > o - self.assertFalse(result.iat[0]) - self.assertFalse(result.iat[1]) + assert not result.iat[0] + assert not result.iat[1] result = o < None - self.assertFalse(result.iat[0]) - self.assertFalse(result.iat[1]) + assert not result.iat[0] + assert not result.iat[1] def test_ndarray_compat_properties(self): @@ -796,10 +796,10 @@ def test_duplicated_drop_duplicates_index(self): self.assertTrue(duplicated.dtype == bool) result = original.drop_duplicates() tm.assert_index_equal(result, original) - self.assertFalse(result is original) + assert result is not original # has_duplicates - self.assertFalse(original.has_duplicates) + assert not original.has_duplicates # create repeated values, 3rd and 5th values are duplicated idx = original[list(range(len(original))) + [5, 3]] @@ -843,7 +843,7 @@ def test_duplicated_drop_duplicates_index(self): tm.assert_series_equal(original.duplicated(), expected) result = original.drop_duplicates() tm.assert_series_equal(result, original) - self.assertFalse(result is original) + assert result is not original idx = original.index[list(range(len(original))) + [5, 3]] values = original._values[list(range(len(original))) + [5, 3]] @@ -907,7 +907,7 @@ def test_fillna(self): else: tm.assert_series_equal(o, result) # check shallow_copied - self.assertFalse(o is result) + assert o is not result for null_obj in [np.nan, None]: for orig in self.objs: @@ -941,7 +941,7 @@ def test_fillna(self): else: tm.assert_series_equal(result, expected) # check shallow_copied - self.assertFalse(o is result) + assert o is not result def test_memory_usage(self): for o in self.objs: diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py index bbcd42b147654..252b32e264c1b 100644 --- a/pandas/tests/test_categorical.py +++ b/pandas/tests/test_categorical.py @@ -117,7 +117,7 @@ def test_constructor_unsortable(self): # it works! arr = np.array([1, 2, 3, datetime.now()], dtype='O') factor = Categorical(arr, ordered=False) - self.assertFalse(factor.ordered) + assert not factor.ordered # this however will raise as cannot be sorted pytest.raises( @@ -143,14 +143,14 @@ def test_is_equal_dtype(self): self.assertTrue(c1.is_dtype_equal(c1)) self.assertTrue(c2.is_dtype_equal(c2)) self.assertTrue(c3.is_dtype_equal(c3)) - self.assertFalse(c1.is_dtype_equal(c2)) - self.assertFalse(c1.is_dtype_equal(c3)) - self.assertFalse(c1.is_dtype_equal(Index(list('aabca')))) - self.assertFalse(c1.is_dtype_equal(c1.astype(object))) + assert not c1.is_dtype_equal(c2) + assert not c1.is_dtype_equal(c3) + assert not c1.is_dtype_equal(Index(list('aabca'))) + assert not c1.is_dtype_equal(c1.astype(object)) self.assertTrue(c1.is_dtype_equal(CategoricalIndex(c1))) - self.assertFalse(c1.is_dtype_equal( + assert not (c1.is_dtype_equal( CategoricalIndex(c1, categories=list('cab')))) - self.assertFalse(c1.is_dtype_equal(CategoricalIndex(c1, ordered=True))) + assert not c1.is_dtype_equal(CategoricalIndex(c1, ordered=True)) def test_constructor(self): @@ -175,7 +175,7 @@ def f(): # The default should be unordered c1 = Categorical(["a", "b", "c", "a"]) - self.assertFalse(c1.ordered) + assert not c1.ordered # Categorical as input c1 = Categorical(["a", "b", "c", "a"]) @@ -534,7 +534,7 @@ def f(): # Only categories with same ordering information can be compared cat_unorderd = cat.set_ordered(False) - self.assertFalse((cat > cat).any()) + assert not (cat > cat).any() def f(): cat > cat_unorderd @@ -788,9 +788,9 @@ def f(): def test_construction_with_ordered(self): # GH 9347, 9190 cat = Categorical([0, 1, 2]) - self.assertFalse(cat.ordered) + assert not cat.ordered cat = Categorical([0, 1, 2], ordered=False) - self.assertFalse(cat.ordered) + assert not cat.ordered cat = Categorical([0, 1, 2], ordered=True) self.assertTrue(cat.ordered) @@ -798,12 +798,12 @@ def test_ordered_api(self): # GH 9347 cat1 = pd.Categorical(["a", "c", "b"], ordered=False) tm.assert_index_equal(cat1.categories, Index(['a', 'b', 'c'])) - self.assertFalse(cat1.ordered) + assert not cat1.ordered cat2 = pd.Categorical(["a", "c", "b"], categories=['b', 'c', 'a'], ordered=False) tm.assert_index_equal(cat2.categories, Index(['b', 'c', 'a'])) - self.assertFalse(cat2.ordered) + assert not cat2.ordered cat3 = pd.Categorical(["a", "c", "b"], ordered=True) tm.assert_index_equal(cat3.categories, Index(['a', 'b', 'c'])) @@ -818,20 +818,20 @@ def test_set_ordered(self): cat = Categorical(["a", "b", "c", "a"], ordered=True) cat2 = cat.as_unordered() - self.assertFalse(cat2.ordered) + assert not cat2.ordered cat2 = cat.as_ordered() self.assertTrue(cat2.ordered) cat2.as_unordered(inplace=True) - self.assertFalse(cat2.ordered) + assert not cat2.ordered cat2.as_ordered(inplace=True) self.assertTrue(cat2.ordered) self.assertTrue(cat2.set_ordered(True).ordered) - self.assertFalse(cat2.set_ordered(False).ordered) + assert not cat2.set_ordered(False).ordered cat2.set_ordered(True, inplace=True) self.assertTrue(cat2.ordered) cat2.set_ordered(False, inplace=True) - self.assertFalse(cat2.ordered) + assert not cat2.ordered # removed in 0.19.0 msg = "can\'t set attribute" @@ -1876,7 +1876,7 @@ def test_sideeffects_free(self): # other one, IF you specify copy! cat = Categorical(["a", "b", "c", "a"]) s = pd.Series(cat, copy=True) - self.assertFalse(s.cat is cat) + assert s.cat is not cat s.cat.categories = [1, 2, 3] exp_s = np.array([1, 2, 3, 1], dtype=np.int64) exp_cat = np.array(["a", "b", "c", "a"], dtype=np.object_) @@ -3783,17 +3783,17 @@ def test_cat_equality(self): f = Categorical(list('acb')) # vs scalar - self.assertFalse((a == 'a').all()) + assert not (a == 'a').all() self.assertTrue(((a != 'a') == ~(a == 'a')).all()) - self.assertFalse(('a' == a).all()) + assert not ('a' == a).all() self.assertTrue((a == 'a')[0]) self.assertTrue(('a' == a)[0]) - self.assertFalse(('a' != a)[0]) + assert not ('a' != a)[0] # vs list-like self.assertTrue((a == a).all()) - self.assertFalse((a != a).all()) + assert not (a != a).all() self.assertTrue((a == list(a)).all()) self.assertTrue((a == b).all()) @@ -3801,16 +3801,16 @@ def test_cat_equality(self): self.assertTrue(((~(a == b)) == (a != b)).all()) self.assertTrue(((~(b == a)) == (b != a)).all()) - self.assertFalse((a == c).all()) - self.assertFalse((c == a).all()) - self.assertFalse((a == d).all()) - self.assertFalse((d == a).all()) + assert not (a == c).all() + assert not (c == a).all() + assert not (a == d).all() + assert not (d == a).all() # vs a cat-like self.assertTrue((a == e).all()) self.assertTrue((e == a).all()) - self.assertFalse((a == f).all()) - self.assertFalse((f == a).all()) + assert not (a == f).all() + assert not (f == a).all() self.assertTrue(((~(a == e) == (a != e)).all())) self.assertTrue(((~(e == a) == (e != a)).all())) @@ -4226,7 +4226,7 @@ def test_cat_accessor_api(self): with tm.assert_raises_regex(AttributeError, "only use .cat accessor"): invalid.cat - self.assertFalse(hasattr(invalid, 'cat')) + assert not hasattr(invalid, 'cat') def test_cat_accessor_no_new_attributes(self): # https://github.com/pandas-dev/pandas/issues/10673 @@ -4309,7 +4309,7 @@ def test_str_accessor_api_for_categorical(self): "Can only use .str " "accessor with string"): invalid.str - self.assertFalse(hasattr(invalid, 'str')) + assert not hasattr(invalid, 'str') def test_dt_accessor_api_for_categorical(self): # https://github.com/pandas-dev/pandas/issues/10661 @@ -4390,7 +4390,7 @@ def test_dt_accessor_api_for_categorical(self): with tm.assert_raises_regex( AttributeError, "Can only use .dt accessor with datetimelike"): invalid.dt - self.assertFalse(hasattr(invalid, 'str')) + assert not hasattr(invalid, 'str') def test_concat_categorical(self): # See GH 10177 diff --git a/pandas/tests/test_config.py b/pandas/tests/test_config.py index f260895e74dda..0e614fdbfe008 100644 --- a/pandas/tests/test_config.py +++ b/pandas/tests/test_config.py @@ -114,8 +114,7 @@ def test_describe_option(self): self.assertTrue( 'foo' in self.cf.describe_option('l', _print_desc=False)) # current value is reported - self.assertFalse( - 'bar' in self.cf.describe_option('l', _print_desc=False)) + assert 'bar' not in self.cf.describe_option('l', _print_desc=False) self.cf.set_option("l", "bar") self.assertTrue( 'bar' in self.cf.describe_option('l', _print_desc=False)) diff --git a/pandas/tests/test_expressions.py b/pandas/tests/test_expressions.py index 14e08411fa106..782d2682145d8 100644 --- a/pandas/tests/test_expressions.py +++ b/pandas/tests/test_expressions.py @@ -254,17 +254,17 @@ def test_invalid(self): # no op result = expr._can_use_numexpr(operator.add, None, self.frame, self.frame, 'evaluate') - self.assertFalse(result) + assert not result # mixed result = expr._can_use_numexpr(operator.add, '+', self.mixed, self.frame, 'evaluate') - self.assertFalse(result) + assert not result # min elements result = expr._can_use_numexpr(operator.add, '+', self.frame2, self.frame2, 'evaluate') - self.assertFalse(result) + assert not result # ok, we only check on first part of expression result = expr._can_use_numexpr(operator.add, '+', self.frame, @@ -308,7 +308,7 @@ def testit(): result = expr._can_use_numexpr(op, op_str, f2, f2, 'evaluate') - self.assertFalse(result) + assert not result expr.set_use_numexpr(False) testit() @@ -349,7 +349,7 @@ def testit(): result = expr._can_use_numexpr(op, op_str, f21, f22, 'evaluate') - self.assertFalse(result) + assert not result expr.set_use_numexpr(False) testit() diff --git a/pandas/tests/test_lib.py b/pandas/tests/test_lib.py index 5c3e6adb48808..621f624c41a19 100644 --- a/pandas/tests/test_lib.py +++ b/pandas/tests/test_lib.py @@ -152,7 +152,7 @@ def test_maybe_indices_to_slice_both_edges(self): for case in [[4, 2, 0, -2], [2, 2, 1, 0], [0, 1, 2, 1]]: indices = np.array(case, dtype=np.int64) maybe_slice = lib.maybe_indices_to_slice(indices, len(target)) - self.assertFalse(isinstance(maybe_slice, slice)) + assert not isinstance(maybe_slice, slice) tm.assert_numpy_array_equal(maybe_slice, indices) tm.assert_numpy_array_equal(target[indices], target[maybe_slice]) diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index f350ef4351585..668f5b2a5a962 100755 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -876,7 +876,7 @@ def test_stack(self): # GH10417 def check(left, right): tm.assert_series_equal(left, right) - self.assertFalse(left.index.is_unique) + assert not left.index.is_unique li, ri = left.index, right.index tm.assert_index_equal(li, ri) @@ -1225,7 +1225,7 @@ def test_join(self): expected = self.frame.copy() expected.values[np.isnan(joined.values)] = np.nan - self.assertFalse(np.isnan(joined.values).all()) + assert not np.isnan(joined.values).all() # TODO what should join do with names ? tm.assert_frame_equal(joined, expected, check_names=False) @@ -1235,7 +1235,7 @@ def test_swaplevel(self): swapped2 = self.frame['A'].swaplevel(0) swapped3 = self.frame['A'].swaplevel(0, 1) swapped4 = self.frame['A'].swaplevel('first', 'second') - self.assertFalse(swapped.index.equals(self.frame.index)) + assert not swapped.index.equals(self.frame.index) tm.assert_series_equal(swapped, swapped2) tm.assert_series_equal(swapped, swapped3) tm.assert_series_equal(swapped, swapped4) @@ -1831,7 +1831,7 @@ def test_drop_level_nonunique_datetime(self): df['tstamp'] = idxdt df = df.set_index('tstamp', append=True) ts = pd.Timestamp('201603231600') - self.assertFalse(df.index.is_unique) + assert not df.index.is_unique result = df.drop(ts, level='tstamp') expected = df.loc[idx != 4] @@ -2430,11 +2430,11 @@ def test_is_lexsorted(self): index = MultiIndex(levels=levels, labels=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 2, 1]]) - self.assertFalse(index.is_lexsorted()) + assert not index.is_lexsorted() index = MultiIndex(levels=levels, labels=[[0, 0, 1, 0, 1, 1], [0, 1, 0, 2, 2, 1]]) - self.assertFalse(index.is_lexsorted()) + assert not index.is_lexsorted() self.assertEqual(index.lexsort_depth, 0) def test_getitem_multilevel_index_tuple_not_sorted(self): diff --git a/pandas/tests/test_nanops.py b/pandas/tests/test_nanops.py index 1aad2f5224c0d..a108749db8e6a 100644 --- a/pandas/tests/test_nanops.py +++ b/pandas/tests/test_nanops.py @@ -659,7 +659,7 @@ def check_bool(self, func, value, correct, *args, **kwargs): if correct: self.assertTrue(res0) else: - self.assertFalse(res0) + assert not res0 except BaseException as exc: exc.args += ('dim: %s' % getattr(value, 'ndim', value), ) raise @@ -742,9 +742,9 @@ def test__bn_ok_dtype(self): self.assertTrue(nanops._bn_ok_dtype(self.arr_bool.dtype, 'test')) self.assertTrue(nanops._bn_ok_dtype(self.arr_str.dtype, 'test')) self.assertTrue(nanops._bn_ok_dtype(self.arr_utf.dtype, 'test')) - self.assertFalse(nanops._bn_ok_dtype(self.arr_date.dtype, 'test')) - self.assertFalse(nanops._bn_ok_dtype(self.arr_tdelta.dtype, 'test')) - self.assertFalse(nanops._bn_ok_dtype(self.arr_obj.dtype, 'test')) + assert not nanops._bn_ok_dtype(self.arr_date.dtype, 'test') + assert not nanops._bn_ok_dtype(self.arr_tdelta.dtype, 'test') + assert not nanops._bn_ok_dtype(self.arr_obj.dtype, 'test') class TestEnsureNumeric(tm.TestCase): diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index 322ea32a93562..802acc86d3359 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -1050,7 +1050,7 @@ def test_consolidate(self): self.assertTrue(self.panel._data.is_consolidated()) self.panel['foo'] = 1. - self.assertFalse(self.panel._data.is_consolidated()) + assert not self.panel._data.is_consolidated() panel = self.panel._consolidate() self.assertTrue(panel._data.is_consolidated()) @@ -1425,7 +1425,7 @@ def test_reindex(self): # this ok result = self.panel.reindex() assert_panel_equal(result, self.panel) - self.assertFalse(result is self.panel) + assert result is not self.panel # with filling smaller_major = self.panel.major_axis[::5] diff --git a/pandas/tests/test_panel4d.py b/pandas/tests/test_panel4d.py index 05b42cdf00e94..5b4f09009c9db 100644 --- a/pandas/tests/test_panel4d.py +++ b/pandas/tests/test_panel4d.py @@ -684,7 +684,7 @@ def test_consolidate(self): self.assertTrue(self.panel4d._data.is_consolidated()) self.panel4d['foo'] = 1. - self.assertFalse(self.panel4d._data.is_consolidated()) + assert not self.panel4d._data.is_consolidated() panel4d = self.panel4d._consolidate() self.assertTrue(panel4d._data.is_consolidated()) @@ -803,7 +803,7 @@ def test_reindex(self): # don't necessarily copy result = self.panel4d.reindex() assert_panel4d_equal(result, self.panel4d) - self.assertFalse(result is self.panel4d) + assert result is not self.panel4d # with filling smaller_major = self.panel4d.major_axis[::5] @@ -857,7 +857,7 @@ def test_sort_index(self): def test_fillna(self): with catch_warnings(record=True): - self.assertFalse(np.isfinite(self.panel4d.values).all()) + assert not np.isfinite(self.panel4d.values).all() filled = self.panel4d.fillna(0) self.assertTrue(np.isfinite(filled.values).all()) diff --git a/pandas/tests/test_resample.py b/pandas/tests/test_resample.py index f5309a985a499..42a6a2a784a0e 100644 --- a/pandas/tests/test_resample.py +++ b/pandas/tests/test_resample.py @@ -134,8 +134,8 @@ def f(): # masquerade as Series/DataFrame as needed for API compat self.assertTrue(isinstance(self.series.resample('H'), ABCSeries)) - self.assertFalse(isinstance(self.frame.resample('H'), ABCSeries)) - self.assertFalse(isinstance(self.series.resample('H'), ABCDataFrame)) + assert not isinstance(self.frame.resample('H'), ABCSeries) + assert not isinstance(self.series.resample('H'), ABCDataFrame) self.assertTrue(isinstance(self.frame.resample('H'), ABCDataFrame)) # bin numeric ops diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index db0c2fdc80fd2..45e8aa3a367db 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -32,7 +32,7 @@ def test_api(self): with tm.assert_raises_regex(AttributeError, "only use .str accessor"): invalid.str - self.assertFalse(hasattr(invalid, 'str')) + assert not hasattr(invalid, 'str') def test_iter(self): # GH3638 @@ -76,7 +76,7 @@ def test_iter_single_element(self): for i, s in enumerate(ds.str): pass - self.assertFalse(i) + assert not i assert_series_equal(ds, s) def test_iter_object_try_string(self): diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index adfecc90129e9..13d471f368693 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -1298,9 +1298,9 @@ def get_result(arr, window, min_periods=None, center=False): # min_periods is working correctly result = get_result(arr, 20, min_periods=15) self.assertTrue(np.isnan(result[23])) - self.assertFalse(np.isnan(result[24])) + assert not np.isnan(result[24]) - self.assertFalse(np.isnan(result[-6])) + assert not np.isnan(result[-6]) self.assertTrue(np.isnan(result[-5])) arr2 = randn(20) @@ -1660,18 +1660,18 @@ def _check_ew_ndarray(self, func, preserve_nan=False, name=None): # GH 7898 result = func(s, 50, min_periods=2) self.assertTrue(np.isnan(result.values[:11]).all()) - self.assertFalse(np.isnan(result.values[11:]).any()) + assert not np.isnan(result.values[11:]).any() for min_periods in (0, 1): result = func(s, 50, min_periods=min_periods) if func == mom.ewma: self.assertTrue(np.isnan(result.values[:10]).all()) - self.assertFalse(np.isnan(result.values[10:]).any()) + assert not np.isnan(result.values[10:]).any() else: # ewmstd, ewmvol, ewmvar (with bias=False) require at least two # values self.assertTrue(np.isnan(result.values[:11]).all()) - self.assertFalse(np.isnan(result.values[11:]).any()) + assert not np.isnan(result.values[11:]).any() # check series of length 0 result = func(Series([]), 50, min_periods=min_periods) @@ -2010,11 +2010,11 @@ def _non_null_values(x): # check that var(x), std(x), and cov(x) are all >= 0 var_x = var(x) std_x = std(x) - self.assertFalse((var_x < 0).any().any()) - self.assertFalse((std_x < 0).any().any()) + assert not (var_x < 0).any().any() + assert not (std_x < 0).any().any() if cov: cov_x_x = cov(x, x) - self.assertFalse((cov_x_x < 0).any().any()) + assert not (cov_x_x < 0).any().any() # check that var(x) == cov(x, x) assert_equal(var_x, cov_x_x) @@ -2029,7 +2029,7 @@ def _non_null_values(x): if is_constant: # check that variance of constant series is identically 0 - self.assertFalse((var_x > 0).any().any()) + assert not (var_x > 0).any().any() expected = x * np.nan expected[count_x >= max(min_periods, 1)] = 0. if var is var_unbiased: @@ -2466,7 +2466,7 @@ def func(A, B, com, **kwargs): result = func(A, B, 20, min_periods=5) self.assertTrue(np.isnan(result.values[:14]).all()) - self.assertFalse(np.isnan(result.values[14:]).any()) + assert not np.isnan(result.values[14:]).any() # GH 7898 for min_periods in (0, 1, 2): @@ -2474,7 +2474,7 @@ def func(A, B, com, **kwargs): # binary functions (ewmcov, ewmcorr) with bias=False require at # least two values self.assertTrue(np.isnan(result.values[:11]).all()) - self.assertFalse(np.isnan(result.values[11:]).any()) + assert not np.isnan(result.values[11:]).any() # check series of length 0 result = func(Series([]), Series([]), 50, min_periods=min_periods) @@ -2891,7 +2891,7 @@ def _check_expanding_ndarray(self, func, static_comp, has_min_periods=True, # min_periods is working correctly result = func(arr, min_periods=15) self.assertTrue(np.isnan(result[13])) - self.assertFalse(np.isnan(result[14])) + assert not np.isnan(result[14]) arr2 = randn(20) result = func(arr2, min_periods=5) @@ -3050,7 +3050,7 @@ def f(): pytest.raises(TypeError, f) g = self.frame.groupby('A') - self.assertFalse(g.mutated) + assert not g.mutated g = self.frame.groupby('A', mutated=True) self.assertTrue(g.mutated) @@ -3277,7 +3277,7 @@ def test_monotonic_on(self): # non-monotonic df.index = reversed(df.index.tolist()) - self.assertFalse(df.index.is_monotonic) + assert not df.index.is_monotonic with pytest.raises(ValueError): df.rolling('2s').sum() diff --git a/pandas/tests/tseries/test_offsets.py b/pandas/tests/tseries/test_offsets.py index cb3fc3b60226f..1332be2567b56 100644 --- a/pandas/tests/tseries/test_offsets.py +++ b/pandas/tests/tseries/test_offsets.py @@ -446,7 +446,7 @@ def test_onOffset(self): # when normalize=True, onOffset checks time is 00:00:00 offset_n = self._get_offset(offset, normalize=True) - self.assertFalse(offset_n.onOffset(dt)) + assert not offset_n.onOffset(dt) if offset in (BusinessHour, CustomBusinessHour): # In default BusinessHour (9:00-17:00), normalized time @@ -718,7 +718,7 @@ def test_offsets_compare_equal(self): # root cause of #456 offset1 = BDay() offset2 = BDay() - self.assertFalse(offset1 != offset2) + assert not offset1 != offset2 class TestBusinessHour(Base): @@ -1389,7 +1389,7 @@ def test_offsets_compare_equal(self): # root cause of #456 offset1 = self._offset() offset2 = self._offset() - self.assertFalse(offset1 != offset2) + assert not offset1 != offset2 def test_datetimeindex(self): idx1 = DatetimeIndex(start='2014-07-04 15:00', end='2014-07-08 10:00', @@ -1859,7 +1859,7 @@ def test_offsets_compare_equal(self): # root cause of #456 offset1 = CDay() offset2 = CDay() - self.assertFalse(offset1 != offset2) + assert not offset1 != offset2 def test_holidays(self): # Define a TradingDay offset @@ -1964,7 +1964,7 @@ def testMult2(self): def test_offsets_compare_equal(self): offset1 = self._object() offset2 = self._object() - self.assertFalse(offset1 != offset2) + assert not offset1 != offset2 def test_roundtrip_pickle(self): def _check_roundtrip(obj): @@ -2230,9 +2230,9 @@ def test_corner(self): def test_isAnchored(self): self.assertTrue(Week(weekday=0).isAnchored()) - self.assertFalse(Week().isAnchored()) - self.assertFalse(Week(2, weekday=2).isAnchored()) - self.assertFalse(Week(2).isAnchored()) + assert not Week().isAnchored() + assert not Week(2, weekday=2).isAnchored() + assert not Week(2).isAnchored() def test_offset(self): tests = [] @@ -2284,7 +2284,7 @@ def test_offsets_compare_equal(self): # root cause of #456 offset1 = Week() offset2 = Week() - self.assertFalse(offset1 != offset2) + assert not offset1 != offset2 class TestWeekOfMonth(Base): @@ -2507,7 +2507,7 @@ def test_offsets_compare_equal(self): # root cause of #456 offset1 = BMonthBegin() offset2 = BMonthBegin() - self.assertFalse(offset1 != offset2) + assert not offset1 != offset2 class TestBMonthEnd(Base): @@ -2570,7 +2570,7 @@ def test_offsets_compare_equal(self): # root cause of #456 offset1 = BMonthEnd() offset2 = BMonthEnd() - self.assertFalse(offset1 != offset2) + assert not offset1 != offset2 class TestMonthBegin(Base): @@ -3043,7 +3043,7 @@ def test_repr(self): def test_isAnchored(self): self.assertTrue(BQuarterBegin(startingMonth=1).isAnchored()) self.assertTrue(BQuarterBegin().isAnchored()) - self.assertFalse(BQuarterBegin(2, startingMonth=1).isAnchored()) + assert not BQuarterBegin(2, startingMonth=1).isAnchored() def test_offset(self): tests = [] @@ -3137,7 +3137,7 @@ def test_repr(self): def test_isAnchored(self): self.assertTrue(BQuarterEnd(startingMonth=1).isAnchored()) self.assertTrue(BQuarterEnd().isAnchored()) - self.assertFalse(BQuarterEnd(2, startingMonth=1).isAnchored()) + assert not BQuarterEnd(2, startingMonth=1).isAnchored() def test_offset(self): tests = [] @@ -3512,9 +3512,9 @@ def test_isAnchored(self): self.assertTrue( makeFY5253LastOfMonthQuarter(weekday=WeekDay.SAT, startingMonth=3, qtr_with_extra_week=4).isAnchored()) - self.assertFalse(makeFY5253LastOfMonthQuarter( + assert not makeFY5253LastOfMonthQuarter( 2, startingMonth=1, weekday=WeekDay.SAT, - qtr_with_extra_week=4).isAnchored()) + qtr_with_extra_week=4).isAnchored() def test_equality(self): self.assertEqual(makeFY5253LastOfMonthQuarter(startingMonth=1, @@ -3676,20 +3676,17 @@ def test_year_has_extra_week(self): .year_has_extra_week(datetime(2010, 12, 26))) # End of year before year with long Q1 - self.assertFalse( - makeFY5253LastOfMonthQuarter( - 1, startingMonth=12, weekday=WeekDay.SAT, - qtr_with_extra_week=1) - .year_has_extra_week(datetime(2010, 12, 25))) + assert not makeFY5253LastOfMonthQuarter( + 1, startingMonth=12, weekday=WeekDay.SAT, + qtr_with_extra_week=1).year_has_extra_week(datetime(2010, 12, 25)) for year in [x for x in range(1994, 2011 + 1) if x not in [2011, 2005, 2000, 1994]]: - self.assertFalse( - makeFY5253LastOfMonthQuarter( - 1, startingMonth=12, weekday=WeekDay.SAT, - qtr_with_extra_week=1) - .year_has_extra_week(datetime(year, 4, 2))) + assert not makeFY5253LastOfMonthQuarter( + 1, startingMonth=12, weekday=WeekDay.SAT, + qtr_with_extra_week=1).year_has_extra_week( + datetime(year, 4, 2)) # Other long years self.assertTrue( @@ -3825,7 +3822,7 @@ def test_repr(self): def test_isAnchored(self): self.assertTrue(QuarterBegin(startingMonth=1).isAnchored()) self.assertTrue(QuarterBegin().isAnchored()) - self.assertFalse(QuarterBegin(2, startingMonth=1).isAnchored()) + assert not QuarterBegin(2, startingMonth=1).isAnchored() def test_offset(self): tests = [] @@ -3903,7 +3900,7 @@ def test_repr(self): def test_isAnchored(self): self.assertTrue(QuarterEnd(startingMonth=1).isAnchored()) self.assertTrue(QuarterEnd().isAnchored()) - self.assertFalse(QuarterEnd(2, startingMonth=1).isAnchored()) + assert not QuarterEnd(2, startingMonth=1).isAnchored() def test_offset(self): tests = [] @@ -4527,7 +4524,7 @@ def test_tick_operators(self): def test_tick_offset(self): for t in self.ticks: - self.assertFalse(t().isAnchored()) + assert not t().isAnchored() def test_compare_ticks(self): for kls in self.ticks: @@ -4758,7 +4755,7 @@ def setUp(self): def run_X_index_creation(self, cls): inst1 = cls() if not inst1.isAnchored(): - self.assertFalse(inst1._should_cache(), cls) + assert not inst1._should_cache(), cls return self.assertTrue(inst1._should_cache(), cls) @@ -4768,13 +4765,13 @@ def run_X_index_creation(self, cls): self.assertTrue(cls() in _daterange_cache, cls) def test_should_cache_month_end(self): - self.assertFalse(MonthEnd()._should_cache()) + assert not MonthEnd()._should_cache() def test_should_cache_bmonth_end(self): - self.assertFalse(BusinessMonthEnd()._should_cache()) + assert not BusinessMonthEnd()._should_cache() def test_should_cache_week_month(self): - self.assertFalse(WeekOfMonth(weekday=1, week=2)._should_cache()) + assert not WeekOfMonth(weekday=1, week=2)._should_cache() def test_all_cacheableoffsets(self): for subclass in get_all_subclasses(CacheableOffset): @@ -4786,19 +4783,19 @@ def test_all_cacheableoffsets(self): def test_month_end_index_creation(self): DatetimeIndex(start=datetime(2013, 1, 31), end=datetime(2013, 3, 31), freq=MonthEnd(), normalize=True) - self.assertFalse(MonthEnd() in _daterange_cache) + assert not MonthEnd() in _daterange_cache def test_bmonth_end_index_creation(self): DatetimeIndex(start=datetime(2013, 1, 31), end=datetime(2013, 3, 29), freq=BusinessMonthEnd(), normalize=True) - self.assertFalse(BusinessMonthEnd() in _daterange_cache) + assert not BusinessMonthEnd() in _daterange_cache def test_week_of_month_index_creation(self): inst1 = WeekOfMonth(weekday=1, week=2) DatetimeIndex(start=datetime(2013, 1, 31), end=datetime(2013, 3, 29), freq=inst1, normalize=True) inst2 = WeekOfMonth(weekday=1, week=2) - self.assertFalse(inst2 in _daterange_cache) + assert inst2 not in _daterange_cache class TestReprNames(tm.TestCase): diff --git a/pandas/tests/tseries/test_timezones.py b/pandas/tests/tseries/test_timezones.py index 807d6866cbf74..65db858a6ccf1 100644 --- a/pandas/tests/tseries/test_timezones.py +++ b/pandas/tests/tseries/test_timezones.py @@ -1284,7 +1284,7 @@ def test_index_equals_with_tz(self): left = date_range('1/1/2011', periods=100, freq='H', tz='utc') right = date_range('1/1/2011', periods=100, freq='H', tz='US/Eastern') - self.assertFalse(left.equals(right)) + assert not left.equals(right) def test_tz_localize_naive(self): rng = date_range('1/1/2011', periods=100, freq='H') @@ -1627,7 +1627,7 @@ def test_normalize_tz(self): tm.assert_index_equal(result, expected) self.assertTrue(result.is_normalized) - self.assertFalse(rng.is_normalized) + assert not rng.is_normalized rng = date_range('1/1/2000 9:30', periods=10, freq='D', tz='UTC') @@ -1636,7 +1636,7 @@ def test_normalize_tz(self): tm.assert_index_equal(result, expected) self.assertTrue(result.is_normalized) - self.assertFalse(rng.is_normalized) + assert not rng.is_normalized from dateutil.tz import tzlocal rng = date_range('1/1/2000 9:30', periods=10, freq='D', tz=tzlocal()) @@ -1645,7 +1645,7 @@ def test_normalize_tz(self): tm.assert_index_equal(result, expected) self.assertTrue(result.is_normalized) - self.assertFalse(rng.is_normalized) + assert not rng.is_normalized def test_normalize_tz_local(self): # GH 13459 @@ -1665,7 +1665,7 @@ def test_normalize_tz_local(self): tm.assert_index_equal(result, expected) self.assertTrue(result.is_normalized) - self.assertFalse(rng.is_normalized) + assert not rng.is_normalized def test_tzaware_offset(self): dates = date_range('2012-11-01', periods=3, tz='US/Pacific') From 2d9909c62b73ea89c556aee22516cb3d0596e827 Mon Sep 17 00:00:00 2001 From: Andreas Winkler Date: Thu, 27 Apr 2017 03:32:13 +0200 Subject: [PATCH 476/933] CLN: remove unused TimeGrouper._get_binner_for_resample() method (#16152) --- pandas/core/resample.py | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 203ae0cb17e02..1685a5d75245d 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -1099,23 +1099,6 @@ def _get_grouper(self, obj): r._set_binner() return r.binner, r.grouper, r.obj - def _get_binner_for_resample(self, kind=None): - # create the BinGrouper - # assume that self.set_grouper(obj) has already been called - - ax = self.ax - if kind is None: - kind = self.kind - if kind is None or kind == 'timestamp': - self.binner, bins, binlabels = self._get_time_bins(ax) - elif kind == 'timedelta': - self.binner, bins, binlabels = self._get_time_delta_bins(ax) - else: - self.binner, bins, binlabels = self._get_time_period_bins(ax) - - self.grouper = BinGrouper(bins, binlabels) - return self.binner, self.grouper, self.obj - def _get_binner_for_grouping(self, obj): # return an ordering of the transformed group labels, # suitable for multi-grouping, e.g the labels for From a16fc8d443e4a07c15b6f300185d257a27c8b563 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Thu, 27 Apr 2017 05:59:56 -0400 Subject: [PATCH 477/933] API: Relax is-file-like conditions (#16150) Previously, we were requiring that all file-like objects had "read," "write," "seek," and "tell" methods, but that was too strict (e.g. read-only buffers). This commit relaxes those requirements to having EITHER "read" or "write" as attributes. Closes gh-16135. --- pandas/core/dtypes/inference.py | 15 ++++--------- pandas/tests/dtypes/test_inference.py | 30 ++++++++++++++++++++++++++ pandas/tests/io/parser/common.py | 20 +++++++++++++++++ pandas/tests/io/parser/test_network.py | 19 ++++++++++++++++ 4 files changed, 73 insertions(+), 11 deletions(-) diff --git a/pandas/core/dtypes/inference.py b/pandas/core/dtypes/inference.py index 66f4d87aa8e33..a5316a83612cb 100644 --- a/pandas/core/dtypes/inference.py +++ b/pandas/core/dtypes/inference.py @@ -142,12 +142,8 @@ def is_file_like(obj): Check if the object is a file-like object. For objects to be considered file-like, they must - be an iterator AND have the following four methods: - - 1) read - 2) write - 3) seek - 4) tell + be an iterator AND have either a `read` and/or `write` + method as an attribute. Note: file-like objects must be iterable, but iterable objects need not be file-like. @@ -172,11 +168,8 @@ def is_file_like(obj): False """ - file_attrs = ('read', 'write', 'seek', 'tell') - - for attr in file_attrs: - if not hasattr(obj, attr): - return False + if not (hasattr(obj, 'read') or hasattr(obj, 'write')): + return False if not is_iterator(obj): return False diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index 8dcf75e8a1aec..1d3a956829a3c 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -100,11 +100,41 @@ def test_is_dict_like(): def test_is_file_like(): + class MockFile(object): + pass + is_file = inference.is_file_like data = StringIO("data") assert is_file(data) + # No read / write attributes + # No iterator attributes + m = MockFile() + assert not is_file(m) + + MockFile.write = lambda self: 0 + + # Write attribute but not an iterator + m = MockFile() + assert not is_file(m) + + MockFile.__iter__ = lambda self: self + MockFile.__next__ = lambda self: 0 + MockFile.next = MockFile.__next__ + + # Valid write-only file + m = MockFile() + assert is_file(m) + + del MockFile.write + MockFile.read = lambda self: 0 + + # Valid read-only file + m = MockFile() + assert is_file(m) + + # Iterator but no read / write attributes data = [1, 2, 3] assert not is_file(data) diff --git a/pandas/tests/io/parser/common.py b/pandas/tests/io/parser/common.py index afb23f540264e..e3df02a948080 100644 --- a/pandas/tests/io/parser/common.py +++ b/pandas/tests/io/parser/common.py @@ -1685,6 +1685,26 @@ class InvalidBuffer(object): with tm.assert_raises_regex(ValueError, msg): self.read_csv(InvalidBuffer()) + # gh-16135: we want to ensure that "tell" and "seek" + # aren't actually being used when we call `read_csv` + # + # Thus, while the object may look "invalid" (these + # methods are attributes of the `StringIO` class), + # it is still a valid file-object for our purposes. + class NoSeekTellBuffer(StringIO): + def tell(self): + raise AttributeError("No tell method") + + def seek(self, pos, whence=0): + raise AttributeError("No seek method") + + data = "a\n1" + + expected = pd.DataFrame({"a": [1]}) + result = self.read_csv(NoSeekTellBuffer(data)) + + tm.assert_frame_equal(result, expected) + if PY3: from unittest import mock diff --git a/pandas/tests/io/parser/test_network.py b/pandas/tests/io/parser/test_network.py index b9920983856d4..e3a1b42fd4d45 100644 --- a/pandas/tests/io/parser/test_network.py +++ b/pandas/tests/io/parser/test_network.py @@ -176,3 +176,22 @@ def test_s3_fails(self): # It's irrelevant here that this isn't actually a table. with pytest.raises(IOError): read_csv('s3://cant_get_it/') + + @tm.network + def boto3_client_s3(self): + # see gh-16135 + + # boto3 is a dependency of s3fs + import boto3 + client = boto3.client("s3") + + key = "/tips.csv" + bucket = "pandas-test" + s3_object = client.get_object(Bucket=bucket, Key=key) + + result = read_csv(s3_object["Body"]) + assert isinstance(result, DataFrame) + assert not result.empty + + expected = read_csv(tm.get_data_path('tips.csv')) + tm.assert_frame_equal(result, expected) From 8879e8495b1af038c054787ad3597a27c1c07903 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Thu, 27 Apr 2017 08:17:48 -0400 Subject: [PATCH 478/933] MAINT: Remove self.assertTrue from testing (#16158) --- pandas/tests/computation/test_eval.py | 26 +-- pandas/tests/dtypes/test_cast.py | 16 +- pandas/tests/dtypes/test_dtypes.py | 199 +++++++++--------- pandas/tests/dtypes/test_inference.py | 166 +++++++-------- pandas/tests/dtypes/test_missing.py | 16 +- pandas/tests/frame/test_alter_axes.py | 8 +- pandas/tests/frame/test_analytics.py | 45 ++-- pandas/tests/frame/test_api.py | 22 +- pandas/tests/frame/test_apply.py | 10 +- pandas/tests/frame/test_asof.py | 6 +- .../tests/frame/test_axis_select_reindex.py | 23 +- pandas/tests/frame/test_block_internals.py | 22 +- pandas/tests/frame/test_combine_concat.py | 12 +- pandas/tests/frame/test_constructors.py | 38 ++-- pandas/tests/frame/test_convert_to.py | 4 +- pandas/tests/frame/test_dtypes.py | 12 +- pandas/tests/frame/test_indexing.py | 83 ++++---- pandas/tests/frame/test_missing.py | 13 +- pandas/tests/frame/test_mutate_columns.py | 8 +- pandas/tests/frame/test_nonunique_indexes.py | 4 +- pandas/tests/frame/test_operators.py | 35 ++- pandas/tests/frame/test_period.py | 4 +- pandas/tests/frame/test_query_eval.py | 8 +- pandas/tests/frame/test_replace.py | 2 +- pandas/tests/frame/test_repr_info.py | 24 +-- pandas/tests/frame/test_reshape.py | 2 +- pandas/tests/frame/test_subclass.py | 12 +- pandas/tests/frame/test_timeseries.py | 30 +-- pandas/tests/frame/test_to_csv.py | 2 +- pandas/tests/groupby/test_groupby.py | 28 +-- pandas/tests/groupby/test_nth.py | 8 +- pandas/tests/groupby/test_timegrouper.py | 14 +- pandas/tests/indexes/common.py | 60 +++--- pandas/tests/indexes/datetimelike.py | 8 +- pandas/tests/indexes/datetimes/test_astype.py | 14 +- .../indexes/datetimes/test_construction.py | 20 +- .../indexes/datetimes/test_date_range.py | 18 +- .../tests/indexes/datetimes/test_datetime.py | 18 +- .../indexes/datetimes/test_datetimelike.py | 8 +- pandas/tests/indexes/datetimes/test_misc.py | 2 +- pandas/tests/indexes/datetimes/test_ops.py | 42 ++-- pandas/tests/indexes/datetimes/test_setops.py | 2 +- pandas/tests/indexes/datetimes/test_tools.py | 41 ++-- .../tests/indexes/period/test_construction.py | 12 +- pandas/tests/indexes/period/test_indexing.py | 2 +- pandas/tests/indexes/period/test_ops.py | 22 +- pandas/tests/indexes/period/test_period.py | 22 +- pandas/tests/indexes/test_base.py | 85 ++++---- pandas/tests/indexes/test_category.py | 38 ++-- pandas/tests/indexes/test_interval.py | 52 ++--- pandas/tests/indexes/test_multi.py | 165 ++++++++------- pandas/tests/indexes/test_numeric.py | 53 +++-- pandas/tests/indexes/test_range.py | 59 +++--- .../tests/indexes/timedeltas/test_astype.py | 2 +- pandas/tests/indexes/timedeltas/test_ops.py | 48 ++--- .../indexes/timedeltas/test_timedelta.py | 8 +- pandas/tests/indexes/timedeltas/test_tools.py | 5 +- pandas/tests/indexing/test_floats.py | 14 +- pandas/tests/indexing/test_iloc.py | 2 +- pandas/tests/indexing/test_indexing.py | 28 +-- pandas/tests/indexing/test_ix.py | 2 +- pandas/tests/indexing/test_loc.py | 4 +- .../tests/io/formats/test_eng_formatting.py | 2 +- pandas/tests/io/formats/test_format.py | 152 +++++++------ pandas/tests/io/formats/test_style.py | 50 +++-- pandas/tests/io/formats/test_to_html.py | 14 +- .../tests/io/json/test_json_table_schema.py | 8 +- pandas/tests/io/json/test_pandas.py | 28 +-- pandas/tests/io/json/test_ujson.py | 82 ++++---- pandas/tests/io/parser/c_parser_only.py | 12 +- pandas/tests/io/parser/common.py | 18 +- pandas/tests/io/parser/converters.py | 2 +- pandas/tests/io/parser/index_col.py | 2 +- pandas/tests/io/parser/na_values.py | 2 +- pandas/tests/io/parser/parse_dates.py | 8 +- pandas/tests/io/parser/test_network.py | 20 +- pandas/tests/io/parser/test_read_fwf.py | 2 +- pandas/tests/io/parser/test_textreader.py | 12 +- pandas/tests/io/parser/usecols.py | 4 +- pandas/tests/io/sas/test_sas7bdat.py | 2 +- pandas/tests/io/sas/test_xport.py | 4 +- pandas/tests/io/test_common.py | 6 +- pandas/tests/io/test_excel.py | 20 +- pandas/tests/io/test_html.py | 2 +- pandas/tests/io/test_packers.py | 28 +-- pandas/tests/io/test_pytables.py | 69 +++--- pandas/tests/io/test_s3.py | 2 +- pandas/tests/io/test_sql.py | 198 +++++++---------- pandas/tests/io/test_stata.py | 34 +-- pandas/tests/plotting/common.py | 16 +- pandas/tests/plotting/test_boxplot_method.py | 6 +- pandas/tests/plotting/test_datetimelike.py | 47 ++--- pandas/tests/plotting/test_frame.py | 22 +- pandas/tests/plotting/test_hist_method.py | 16 +- pandas/tests/plotting/test_series.py | 31 ++- pandas/tests/reshape/test_concat.py | 22 +- pandas/tests/reshape/test_hashing.py | 6 +- pandas/tests/reshape/test_join.py | 12 +- pandas/tests/reshape/test_merge.py | 30 +-- pandas/tests/reshape/test_merge_asof.py | 6 +- pandas/tests/reshape/test_merge_ordered.py | 2 +- pandas/tests/reshape/test_pivot.py | 6 +- pandas/tests/reshape/test_tile.py | 14 +- pandas/tests/scalar/test_interval.py | 12 +- pandas/tests/scalar/test_period.py | 12 +- pandas/tests/scalar/test_period_asfreq.py | 2 +- pandas/tests/scalar/test_timedelta.py | 40 ++-- pandas/tests/scalar/test_timestamp.py | 76 +++---- pandas/tests/series/test_alter_axes.py | 8 +- pandas/tests/series/test_analytics.py | 116 +++++----- pandas/tests/series/test_api.py | 34 +-- pandas/tests/series/test_apply.py | 8 +- pandas/tests/series/test_asof.py | 16 +- pandas/tests/series/test_combine_concat.py | 2 +- pandas/tests/series/test_constructors.py | 88 ++++---- pandas/tests/series/test_datetime_values.py | 8 +- pandas/tests/series/test_indexing.py | 109 +++++----- pandas/tests/series/test_io.py | 14 +- pandas/tests/series/test_missing.py | 8 +- pandas/tests/series/test_operators.py | 16 +- pandas/tests/series/test_period.py | 12 +- pandas/tests/series/test_quantile.py | 20 +- pandas/tests/series/test_replace.py | 44 ++-- pandas/tests/series/test_repr.py | 4 +- pandas/tests/series/test_sorting.py | 4 +- pandas/tests/series/test_timeseries.py | 50 ++--- pandas/tests/sparse/test_array.py | 22 +- pandas/tests/sparse/test_frame.py | 14 +- pandas/tests/sparse/test_indexing.py | 82 ++++---- pandas/tests/sparse/test_libsparse.py | 30 +-- pandas/tests/sparse/test_series.py | 16 +- pandas/tests/test_algos.py | 17 +- pandas/tests/test_base.py | 46 ++-- pandas/tests/test_categorical.py | 151 +++++++------ pandas/tests/test_config.py | 84 +++----- pandas/tests/test_expressions.py | 2 +- pandas/tests/test_lib.py | 16 +- pandas/tests/test_multilevel.py | 64 +++--- pandas/tests/test_nanops.py | 52 ++--- pandas/tests/test_panel.py | 56 ++--- pandas/tests/test_panel4d.py | 26 +-- pandas/tests/test_resample.py | 36 ++-- pandas/tests/test_strings.py | 23 +- pandas/tests/test_testing.py | 2 +- pandas/tests/test_window.py | 77 +++---- pandas/tests/tools/test_numeric.py | 2 +- pandas/tests/tseries/test_frequencies.py | 28 +-- pandas/tests/tseries/test_offsets.py | 150 ++++++------- pandas/tests/tseries/test_timezones.py | 68 +++--- 149 files changed, 2204 insertions(+), 2340 deletions(-) diff --git a/pandas/tests/computation/test_eval.py b/pandas/tests/computation/test_eval.py index 52061f7f1e0ae..827a4668ed0bc 100644 --- a/pandas/tests/computation/test_eval.py +++ b/pandas/tests/computation/test_eval.py @@ -662,17 +662,17 @@ def test_identical(self): x = 1 result = pd.eval('x', engine=self.engine, parser=self.parser) self.assertEqual(result, 1) - self.assertTrue(is_scalar(result)) + assert is_scalar(result) x = 1.5 result = pd.eval('x', engine=self.engine, parser=self.parser) self.assertEqual(result, 1.5) - self.assertTrue(is_scalar(result)) + assert is_scalar(result) x = False result = pd.eval('x', engine=self.engine, parser=self.parser) self.assertEqual(result, False) - self.assertTrue(is_scalar(result)) + assert is_scalar(result) x = np.array([1]) result = pd.eval('x', engine=self.engine, parser=self.parser) @@ -708,7 +708,7 @@ def test_float_truncation(self): 1000000000.0015]}) cutoff = 1000000000.0006 result = df.query("A < %.4f" % cutoff) - self.assertTrue(result.empty) + assert result.empty cutoff = 1000000000.0010 result = df.query("A > %.4f" % cutoff) @@ -1281,7 +1281,7 @@ def f(): df.eval('a = a + b', inplace=True) result = old_a + df.b assert_series_equal(result, df.a, check_names=False) - self.assertTrue(result.name is None) + assert result.name is None f() @@ -1435,11 +1435,11 @@ def test_simple_in_ops(self): if self.parser != 'python': res = pd.eval('1 in [1, 2]', engine=self.engine, parser=self.parser) - self.assertTrue(res) + assert res res = pd.eval('2 in (1, 2)', engine=self.engine, parser=self.parser) - self.assertTrue(res) + assert res res = pd.eval('3 in (1, 2)', engine=self.engine, parser=self.parser) @@ -1447,23 +1447,23 @@ def test_simple_in_ops(self): res = pd.eval('3 not in (1, 2)', engine=self.engine, parser=self.parser) - self.assertTrue(res) + assert res res = pd.eval('[3] not in (1, 2)', engine=self.engine, parser=self.parser) - self.assertTrue(res) + assert res res = pd.eval('[3] in ([3], 2)', engine=self.engine, parser=self.parser) - self.assertTrue(res) + assert res res = pd.eval('[[3]] in [[[3]], 2]', engine=self.engine, parser=self.parser) - self.assertTrue(res) + assert res res = pd.eval('(3,) in [(3,), 2]', engine=self.engine, parser=self.parser) - self.assertTrue(res) + assert res res = pd.eval('(3,) not in [(3,), 2]', engine=self.engine, parser=self.parser) @@ -1471,7 +1471,7 @@ def test_simple_in_ops(self): res = pd.eval('[(3,)] in [[(3,)], 2]', engine=self.engine, parser=self.parser) - self.assertTrue(res) + assert res else: with pytest.raises(NotImplementedError): pd.eval('1 in [1, 2]', engine=self.engine, parser=self.parser) diff --git a/pandas/tests/dtypes/test_cast.py b/pandas/tests/dtypes/test_cast.py index bf3668111b9f9..22640729c262f 100644 --- a/pandas/tests/dtypes/test_cast.py +++ b/pandas/tests/dtypes/test_cast.py @@ -161,7 +161,7 @@ class TestMaybe(tm.TestCase): def test_maybe_convert_string_to_array(self): result = maybe_convert_string_to_object('x') tm.assert_numpy_array_equal(result, np.array(['x'], dtype=object)) - self.assertTrue(result.dtype == object) + assert result.dtype == object result = maybe_convert_string_to_object(1) self.assertEqual(result, 1) @@ -169,19 +169,19 @@ def test_maybe_convert_string_to_array(self): arr = np.array(['x', 'y'], dtype=str) result = maybe_convert_string_to_object(arr) tm.assert_numpy_array_equal(result, np.array(['x', 'y'], dtype=object)) - self.assertTrue(result.dtype == object) + assert result.dtype == object # unicode arr = np.array(['x', 'y']).astype('U') result = maybe_convert_string_to_object(arr) tm.assert_numpy_array_equal(result, np.array(['x', 'y'], dtype=object)) - self.assertTrue(result.dtype == object) + assert result.dtype == object # object arr = np.array(['x', 2], dtype=object) result = maybe_convert_string_to_object(arr) tm.assert_numpy_array_equal(result, np.array(['x', 2], dtype=object)) - self.assertTrue(result.dtype == object) + assert result.dtype == object def test_maybe_convert_scalar(self): @@ -220,17 +220,17 @@ def test_maybe_convert_objects_copy(self): values = np.array([1, 2]) out = maybe_convert_objects(values, copy=False) - self.assertTrue(values is out) + assert values is out out = maybe_convert_objects(values, copy=True) - self.assertTrue(values is not out) + assert values is not out values = np.array(['apply', 'banana']) out = maybe_convert_objects(values, copy=False) - self.assertTrue(values is out) + assert values is out out = maybe_convert_objects(values, copy=True) - self.assertTrue(values is not out) + assert values is not out class TestCommonTypes(tm.TestCase): diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py index 718efc08394b1..b02c846d50c89 100644 --- a/pandas/tests/dtypes/test_dtypes.py +++ b/pandas/tests/dtypes/test_dtypes.py @@ -50,45 +50,45 @@ def test_hash_vs_equality(self): # make sure that we satisfy is semantics dtype = self.dtype dtype2 = CategoricalDtype() - self.assertTrue(dtype == dtype2) - self.assertTrue(dtype2 == dtype) - self.assertTrue(dtype is dtype2) - self.assertTrue(dtype2 is dtype) - self.assertTrue(hash(dtype) == hash(dtype2)) + assert dtype == dtype2 + assert dtype2 == dtype + assert dtype is dtype2 + assert dtype2 is dtype + assert hash(dtype) == hash(dtype2) def test_equality(self): - self.assertTrue(is_dtype_equal(self.dtype, 'category')) - self.assertTrue(is_dtype_equal(self.dtype, CategoricalDtype())) + assert is_dtype_equal(self.dtype, 'category') + assert is_dtype_equal(self.dtype, CategoricalDtype()) assert not is_dtype_equal(self.dtype, 'foo') def test_construction_from_string(self): result = CategoricalDtype.construct_from_string('category') - self.assertTrue(is_dtype_equal(self.dtype, result)) + assert is_dtype_equal(self.dtype, result) pytest.raises( TypeError, lambda: CategoricalDtype.construct_from_string('foo')) def test_is_dtype(self): - self.assertTrue(CategoricalDtype.is_dtype(self.dtype)) - self.assertTrue(CategoricalDtype.is_dtype('category')) - self.assertTrue(CategoricalDtype.is_dtype(CategoricalDtype())) + assert CategoricalDtype.is_dtype(self.dtype) + assert CategoricalDtype.is_dtype('category') + assert CategoricalDtype.is_dtype(CategoricalDtype()) assert not CategoricalDtype.is_dtype('foo') assert not CategoricalDtype.is_dtype(np.float64) def test_basic(self): - self.assertTrue(is_categorical_dtype(self.dtype)) + assert is_categorical_dtype(self.dtype) factor = Categorical(['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c']) s = Series(factor, name='A') # dtypes - self.assertTrue(is_categorical_dtype(s.dtype)) - self.assertTrue(is_categorical_dtype(s)) + assert is_categorical_dtype(s.dtype) + assert is_categorical_dtype(s) assert not is_categorical_dtype(np.dtype('float64')) - self.assertTrue(is_categorical(s.dtype)) - self.assertTrue(is_categorical(s)) + assert is_categorical(s.dtype) + assert is_categorical(s) assert not is_categorical(np.dtype('float64')) assert not is_categorical(1.0) @@ -103,14 +103,14 @@ def test_hash_vs_equality(self): dtype = self.dtype dtype2 = DatetimeTZDtype('ns', 'US/Eastern') dtype3 = DatetimeTZDtype(dtype2) - self.assertTrue(dtype == dtype2) - self.assertTrue(dtype2 == dtype) - self.assertTrue(dtype3 == dtype) - self.assertTrue(dtype is dtype2) - self.assertTrue(dtype2 is dtype) - self.assertTrue(dtype3 is dtype) - self.assertTrue(hash(dtype) == hash(dtype2)) - self.assertTrue(hash(dtype) == hash(dtype3)) + assert dtype == dtype2 + assert dtype2 == dtype + assert dtype3 == dtype + assert dtype is dtype2 + assert dtype2 is dtype + assert dtype3 is dtype + assert hash(dtype) == hash(dtype2) + assert hash(dtype) == hash(dtype3) def test_construction(self): pytest.raises(ValueError, @@ -120,8 +120,8 @@ def test_subclass(self): a = DatetimeTZDtype('datetime64[ns, US/Eastern]') b = DatetimeTZDtype('datetime64[ns, CET]') - self.assertTrue(issubclass(type(a), type(a))) - self.assertTrue(issubclass(type(a), type(b))) + assert issubclass(type(a), type(a)) + assert issubclass(type(a), type(b)) def test_coerce_to_dtype(self): self.assertEqual(_coerce_to_dtype('datetime64[ns, US/Eastern]'), @@ -130,61 +130,58 @@ def test_coerce_to_dtype(self): DatetimeTZDtype('ns', 'Asia/Tokyo')) def test_compat(self): - self.assertTrue(is_datetime64tz_dtype(self.dtype)) - self.assertTrue(is_datetime64tz_dtype('datetime64[ns, US/Eastern]')) - self.assertTrue(is_datetime64_any_dtype(self.dtype)) - self.assertTrue(is_datetime64_any_dtype('datetime64[ns, US/Eastern]')) - self.assertTrue(is_datetime64_ns_dtype(self.dtype)) - self.assertTrue(is_datetime64_ns_dtype('datetime64[ns, US/Eastern]')) + assert is_datetime64tz_dtype(self.dtype) + assert is_datetime64tz_dtype('datetime64[ns, US/Eastern]') + assert is_datetime64_any_dtype(self.dtype) + assert is_datetime64_any_dtype('datetime64[ns, US/Eastern]') + assert is_datetime64_ns_dtype(self.dtype) + assert is_datetime64_ns_dtype('datetime64[ns, US/Eastern]') assert not is_datetime64_dtype(self.dtype) assert not is_datetime64_dtype('datetime64[ns, US/Eastern]') def test_construction_from_string(self): result = DatetimeTZDtype('datetime64[ns, US/Eastern]') - self.assertTrue(is_dtype_equal(self.dtype, result)) + assert is_dtype_equal(self.dtype, result) result = DatetimeTZDtype.construct_from_string( 'datetime64[ns, US/Eastern]') - self.assertTrue(is_dtype_equal(self.dtype, result)) + assert is_dtype_equal(self.dtype, result) pytest.raises(TypeError, lambda: DatetimeTZDtype.construct_from_string('foo')) def test_is_dtype(self): assert not DatetimeTZDtype.is_dtype(None) - self.assertTrue(DatetimeTZDtype.is_dtype(self.dtype)) - self.assertTrue(DatetimeTZDtype.is_dtype('datetime64[ns, US/Eastern]')) + assert DatetimeTZDtype.is_dtype(self.dtype) + assert DatetimeTZDtype.is_dtype('datetime64[ns, US/Eastern]') assert not DatetimeTZDtype.is_dtype('foo') - self.assertTrue(DatetimeTZDtype.is_dtype(DatetimeTZDtype( - 'ns', 'US/Pacific'))) + assert DatetimeTZDtype.is_dtype(DatetimeTZDtype('ns', 'US/Pacific')) assert not DatetimeTZDtype.is_dtype(np.float64) def test_equality(self): - self.assertTrue(is_dtype_equal(self.dtype, - 'datetime64[ns, US/Eastern]')) - self.assertTrue(is_dtype_equal(self.dtype, DatetimeTZDtype( - 'ns', 'US/Eastern'))) + assert is_dtype_equal(self.dtype, 'datetime64[ns, US/Eastern]') + assert is_dtype_equal(self.dtype, DatetimeTZDtype('ns', 'US/Eastern')) assert not is_dtype_equal(self.dtype, 'foo') assert not is_dtype_equal(self.dtype, DatetimeTZDtype('ns', 'CET')) assert not is_dtype_equal(DatetimeTZDtype('ns', 'US/Eastern'), DatetimeTZDtype('ns', 'US/Pacific')) # numpy compat - self.assertTrue(is_dtype_equal(np.dtype("M8[ns]"), "datetime64[ns]")) + assert is_dtype_equal(np.dtype("M8[ns]"), "datetime64[ns]") def test_basic(self): - self.assertTrue(is_datetime64tz_dtype(self.dtype)) + assert is_datetime64tz_dtype(self.dtype) dr = date_range('20130101', periods=3, tz='US/Eastern') s = Series(dr, name='A') # dtypes - self.assertTrue(is_datetime64tz_dtype(s.dtype)) - self.assertTrue(is_datetime64tz_dtype(s)) + assert is_datetime64tz_dtype(s.dtype) + assert is_datetime64tz_dtype(s) assert not is_datetime64tz_dtype(np.dtype('float64')) assert not is_datetime64tz_dtype(1.0) - self.assertTrue(is_datetimetz(s)) - self.assertTrue(is_datetimetz(s.dtype)) + assert is_datetimetz(s) + assert is_datetimetz(s.dtype) assert not is_datetimetz(np.dtype('float64')) assert not is_datetimetz(1.0) @@ -192,11 +189,11 @@ def test_dst(self): dr1 = date_range('2013-01-01', periods=3, tz='US/Eastern') s1 = Series(dr1, name='A') - self.assertTrue(is_datetimetz(s1)) + assert is_datetimetz(s1) dr2 = date_range('2013-08-01', periods=3, tz='US/Eastern') s2 = Series(dr2, name='A') - self.assertTrue(is_datetimetz(s2)) + assert is_datetimetz(s2) self.assertEqual(s1.dtype, s2.dtype) def test_parser(self): @@ -226,25 +223,25 @@ def test_construction(self): for s in ['period[D]', 'Period[D]', 'D']: dt = PeriodDtype(s) self.assertEqual(dt.freq, pd.tseries.offsets.Day()) - self.assertTrue(is_period_dtype(dt)) + assert is_period_dtype(dt) for s in ['period[3D]', 'Period[3D]', '3D']: dt = PeriodDtype(s) self.assertEqual(dt.freq, pd.tseries.offsets.Day(3)) - self.assertTrue(is_period_dtype(dt)) + assert is_period_dtype(dt) for s in ['period[26H]', 'Period[26H]', '26H', 'period[1D2H]', 'Period[1D2H]', '1D2H']: dt = PeriodDtype(s) self.assertEqual(dt.freq, pd.tseries.offsets.Hour(26)) - self.assertTrue(is_period_dtype(dt)) + assert is_period_dtype(dt) def test_subclass(self): a = PeriodDtype('period[D]') b = PeriodDtype('period[3D]') - self.assertTrue(issubclass(type(a), type(a))) - self.assertTrue(issubclass(type(a), type(b))) + assert issubclass(type(a), type(a)) + assert issubclass(type(a), type(b)) def test_identity(self): assert PeriodDtype('period[D]') == PeriodDtype('period[D]') @@ -270,9 +267,9 @@ def test_compat(self): def test_construction_from_string(self): result = PeriodDtype('period[D]') - self.assertTrue(is_dtype_equal(self.dtype, result)) + assert is_dtype_equal(self.dtype, result) result = PeriodDtype.construct_from_string('period[D]') - self.assertTrue(is_dtype_equal(self.dtype, result)) + assert is_dtype_equal(self.dtype, result) with pytest.raises(TypeError): PeriodDtype.construct_from_string('foo') with pytest.raises(TypeError): @@ -286,14 +283,14 @@ def test_construction_from_string(self): PeriodDtype.construct_from_string('datetime64[ns, US/Eastern]') def test_is_dtype(self): - self.assertTrue(PeriodDtype.is_dtype(self.dtype)) - self.assertTrue(PeriodDtype.is_dtype('period[D]')) - self.assertTrue(PeriodDtype.is_dtype('period[3D]')) - self.assertTrue(PeriodDtype.is_dtype(PeriodDtype('3D'))) - self.assertTrue(PeriodDtype.is_dtype('period[U]')) - self.assertTrue(PeriodDtype.is_dtype('period[S]')) - self.assertTrue(PeriodDtype.is_dtype(PeriodDtype('U'))) - self.assertTrue(PeriodDtype.is_dtype(PeriodDtype('S'))) + assert PeriodDtype.is_dtype(self.dtype) + assert PeriodDtype.is_dtype('period[D]') + assert PeriodDtype.is_dtype('period[3D]') + assert PeriodDtype.is_dtype(PeriodDtype('3D')) + assert PeriodDtype.is_dtype('period[U]') + assert PeriodDtype.is_dtype('period[S]') + assert PeriodDtype.is_dtype(PeriodDtype('U')) + assert PeriodDtype.is_dtype(PeriodDtype('S')) assert not PeriodDtype.is_dtype('D') assert not PeriodDtype.is_dtype('3D') @@ -305,22 +302,22 @@ def test_is_dtype(self): assert not PeriodDtype.is_dtype(np.float64) def test_equality(self): - self.assertTrue(is_dtype_equal(self.dtype, 'period[D]')) - self.assertTrue(is_dtype_equal(self.dtype, PeriodDtype('D'))) - self.assertTrue(is_dtype_equal(self.dtype, PeriodDtype('D'))) - self.assertTrue(is_dtype_equal(PeriodDtype('D'), PeriodDtype('D'))) + assert is_dtype_equal(self.dtype, 'period[D]') + assert is_dtype_equal(self.dtype, PeriodDtype('D')) + assert is_dtype_equal(self.dtype, PeriodDtype('D')) + assert is_dtype_equal(PeriodDtype('D'), PeriodDtype('D')) assert not is_dtype_equal(self.dtype, 'D') assert not is_dtype_equal(PeriodDtype('D'), PeriodDtype('2D')) def test_basic(self): - self.assertTrue(is_period_dtype(self.dtype)) + assert is_period_dtype(self.dtype) pidx = pd.period_range('2013-01-01 09:00', periods=5, freq='H') - self.assertTrue(is_period_dtype(pidx.dtype)) - self.assertTrue(is_period_dtype(pidx)) - self.assertTrue(is_period(pidx)) + assert is_period_dtype(pidx.dtype) + assert is_period_dtype(pidx) + assert is_period(pidx) s = Series(pidx, name='A') # dtypes @@ -328,7 +325,7 @@ def test_basic(self): # is_period checks period_arraylike assert not is_period_dtype(s.dtype) assert not is_period_dtype(s) - self.assertTrue(is_period(s)) + assert is_period(s) assert not is_period_dtype(np.dtype('float64')) assert not is_period_dtype(1.0) @@ -358,33 +355,33 @@ def test_construction(self): for s in ['interval[int64]', 'Interval[int64]', 'int64']: i = IntervalDtype(s) self.assertEqual(i.subtype, np.dtype('int64')) - self.assertTrue(is_interval_dtype(i)) + assert is_interval_dtype(i) def test_construction_generic(self): # generic i = IntervalDtype('interval') assert i.subtype is None - self.assertTrue(is_interval_dtype(i)) - self.assertTrue(str(i) == 'interval') + assert is_interval_dtype(i) + assert str(i) == 'interval' i = IntervalDtype() assert i.subtype is None - self.assertTrue(is_interval_dtype(i)) - self.assertTrue(str(i) == 'interval') + assert is_interval_dtype(i) + assert str(i) == 'interval' def test_subclass(self): a = IntervalDtype('interval[int64]') b = IntervalDtype('interval[int64]') - self.assertTrue(issubclass(type(a), type(a))) - self.assertTrue(issubclass(type(a), type(b))) + assert issubclass(type(a), type(a)) + assert issubclass(type(a), type(b)) def test_is_dtype(self): - self.assertTrue(IntervalDtype.is_dtype(self.dtype)) - self.assertTrue(IntervalDtype.is_dtype('interval')) - self.assertTrue(IntervalDtype.is_dtype(IntervalDtype('float64'))) - self.assertTrue(IntervalDtype.is_dtype(IntervalDtype('int64'))) - self.assertTrue(IntervalDtype.is_dtype(IntervalDtype(np.int64))) + assert IntervalDtype.is_dtype(self.dtype) + assert IntervalDtype.is_dtype('interval') + assert IntervalDtype.is_dtype(IntervalDtype('float64')) + assert IntervalDtype.is_dtype(IntervalDtype('int64')) + assert IntervalDtype.is_dtype(IntervalDtype(np.int64)) assert not IntervalDtype.is_dtype('D') assert not IntervalDtype.is_dtype('3D') @@ -405,9 +402,9 @@ def test_coerce_to_dtype(self): def test_construction_from_string(self): result = IntervalDtype('interval[int64]') - self.assertTrue(is_dtype_equal(self.dtype, result)) + assert is_dtype_equal(self.dtype, result) result = IntervalDtype.construct_from_string('interval[int64]') - self.assertTrue(is_dtype_equal(self.dtype, result)) + assert is_dtype_equal(self.dtype, result) with pytest.raises(TypeError): IntervalDtype.construct_from_string('foo') with pytest.raises(TypeError): @@ -416,23 +413,22 @@ def test_construction_from_string(self): IntervalDtype.construct_from_string('foo[int64]') def test_equality(self): - self.assertTrue(is_dtype_equal(self.dtype, 'interval[int64]')) - self.assertTrue(is_dtype_equal(self.dtype, IntervalDtype('int64'))) - self.assertTrue(is_dtype_equal(self.dtype, IntervalDtype('int64'))) - self.assertTrue(is_dtype_equal(IntervalDtype('int64'), - IntervalDtype('int64'))) + assert is_dtype_equal(self.dtype, 'interval[int64]') + assert is_dtype_equal(self.dtype, IntervalDtype('int64')) + assert is_dtype_equal(self.dtype, IntervalDtype('int64')) + assert is_dtype_equal(IntervalDtype('int64'), IntervalDtype('int64')) assert not is_dtype_equal(self.dtype, 'int64') assert not is_dtype_equal(IntervalDtype('int64'), IntervalDtype('float64')) def test_basic(self): - self.assertTrue(is_interval_dtype(self.dtype)) + assert is_interval_dtype(self.dtype) ii = IntervalIndex.from_breaks(range(3)) - self.assertTrue(is_interval_dtype(ii.dtype)) - self.assertTrue(is_interval_dtype(ii)) + assert is_interval_dtype(ii.dtype) + assert is_interval_dtype(ii) s = Series(ii, name='A') @@ -442,12 +438,11 @@ def test_basic(self): assert not is_interval_dtype(s) def test_basic_dtype(self): - self.assertTrue(is_interval_dtype('interval[int64]')) - self.assertTrue(is_interval_dtype(IntervalIndex.from_tuples([(0, 1)]))) - self.assertTrue(is_interval_dtype - (IntervalIndex.from_breaks(np.arange(4)))) - self.assertTrue(is_interval_dtype( - IntervalIndex.from_breaks(date_range('20130101', periods=3)))) + assert is_interval_dtype('interval[int64]') + assert is_interval_dtype(IntervalIndex.from_tuples([(0, 1)])) + assert is_interval_dtype(IntervalIndex.from_breaks(np.arange(4))) + assert is_interval_dtype(IntervalIndex.from_breaks( + date_range('20130101', periods=3))) assert not is_interval_dtype('U') assert not is_interval_dtype('S') assert not is_interval_dtype('foo') diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index 1d3a956829a3c..3449d6c56167e 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -241,14 +241,14 @@ def test_infer_dtype_bytes(self): def test_isinf_scalar(self): # GH 11352 - self.assertTrue(lib.isposinf_scalar(float('inf'))) - self.assertTrue(lib.isposinf_scalar(np.inf)) + assert lib.isposinf_scalar(float('inf')) + assert lib.isposinf_scalar(np.inf) assert not lib.isposinf_scalar(-np.inf) assert not lib.isposinf_scalar(1) assert not lib.isposinf_scalar('a') - self.assertTrue(lib.isneginf_scalar(float('-inf'))) - self.assertTrue(lib.isneginf_scalar(-np.inf)) + assert lib.isneginf_scalar(float('-inf')) + assert lib.isneginf_scalar(-np.inf) assert not lib.isneginf_scalar(np.inf) assert not lib.isneginf_scalar(1) assert not lib.isneginf_scalar('a') @@ -305,17 +305,17 @@ def test_maybe_convert_numeric_post_floatify_nan(self): def test_convert_infs(self): arr = np.array(['inf', 'inf', 'inf'], dtype='O') result = lib.maybe_convert_numeric(arr, set(), False) - self.assertTrue(result.dtype == np.float64) + assert result.dtype == np.float64 arr = np.array(['-inf', '-inf', '-inf'], dtype='O') result = lib.maybe_convert_numeric(arr, set(), False) - self.assertTrue(result.dtype == np.float64) + assert result.dtype == np.float64 def test_scientific_no_exponent(self): # See PR 12215 arr = np.array(['42E', '2E', '99e', '6e'], dtype='O') result = lib.maybe_convert_numeric(arr, set(), False, True) - self.assertTrue(np.all(np.isnan(result))) + assert np.all(np.isnan(result)) def test_convert_non_hashable(self): # GH13324 @@ -667,8 +667,8 @@ def test_infer_dtype_all_nan_nat_like(self): def test_is_datetimelike_array_all_nan_nat_like(self): arr = np.array([np.nan, pd.NaT, np.datetime64('nat')]) - self.assertTrue(lib.is_datetime_array(arr)) - self.assertTrue(lib.is_datetime64_array(arr)) + assert lib.is_datetime_array(arr) + assert lib.is_datetime64_array(arr) assert not lib.is_timedelta_array(arr) assert not lib.is_timedelta64_array(arr) assert not lib.is_timedelta_or_timedelta64_array(arr) @@ -676,9 +676,9 @@ def test_is_datetimelike_array_all_nan_nat_like(self): arr = np.array([np.nan, pd.NaT, np.timedelta64('nat')]) assert not lib.is_datetime_array(arr) assert not lib.is_datetime64_array(arr) - self.assertTrue(lib.is_timedelta_array(arr)) - self.assertTrue(lib.is_timedelta64_array(arr)) - self.assertTrue(lib.is_timedelta_or_timedelta64_array(arr)) + assert lib.is_timedelta_array(arr) + assert lib.is_timedelta64_array(arr) + assert lib.is_timedelta_or_timedelta64_array(arr) arr = np.array([np.nan, pd.NaT, np.datetime64('nat'), np.timedelta64('nat')]) @@ -689,11 +689,11 @@ def test_is_datetimelike_array_all_nan_nat_like(self): assert not lib.is_timedelta_or_timedelta64_array(arr) arr = np.array([np.nan, pd.NaT]) - self.assertTrue(lib.is_datetime_array(arr)) - self.assertTrue(lib.is_datetime64_array(arr)) - self.assertTrue(lib.is_timedelta_array(arr)) - self.assertTrue(lib.is_timedelta64_array(arr)) - self.assertTrue(lib.is_timedelta_or_timedelta64_array(arr)) + assert lib.is_datetime_array(arr) + assert lib.is_datetime64_array(arr) + assert lib.is_timedelta_array(arr) + assert lib.is_timedelta64_array(arr) + assert lib.is_timedelta_or_timedelta64_array(arr) arr = np.array([np.nan, np.nan], dtype=object) assert not lib.is_datetime_array(arr) @@ -749,7 +749,7 @@ def test_to_object_array_width(self): tm.assert_numpy_array_equal(out, expected) def test_is_period(self): - self.assertTrue(lib.is_period(pd.Period('2011-01', freq='M'))) + assert lib.is_period(pd.Period('2011-01', freq='M')) assert not lib.is_period(pd.PeriodIndex(['2011-01'], freq='M')) assert not lib.is_period(pd.Timestamp('2011-01')) assert not lib.is_period(1) @@ -778,15 +778,15 @@ class TestNumberScalar(tm.TestCase): def test_is_number(self): - self.assertTrue(is_number(True)) - self.assertTrue(is_number(1)) - self.assertTrue(is_number(1.1)) - self.assertTrue(is_number(1 + 3j)) - self.assertTrue(is_number(np.bool(False))) - self.assertTrue(is_number(np.int64(1))) - self.assertTrue(is_number(np.float64(1.1))) - self.assertTrue(is_number(np.complex128(1 + 3j))) - self.assertTrue(is_number(np.nan)) + assert is_number(True) + assert is_number(1) + assert is_number(1.1) + assert is_number(1 + 3j) + assert is_number(np.bool(False)) + assert is_number(np.int64(1)) + assert is_number(np.float64(1.1)) + assert is_number(np.complex128(1 + 3j)) + assert is_number(np.nan) assert not is_number(None) assert not is_number('x') @@ -799,12 +799,12 @@ def test_is_number(self): # questionable assert not is_number(np.bool_(False)) - self.assertTrue(is_number(np.timedelta64(1, 'D'))) + assert is_number(np.timedelta64(1, 'D')) def test_is_bool(self): - self.assertTrue(is_bool(True)) - self.assertTrue(is_bool(np.bool(False))) - self.assertTrue(is_bool(np.bool_(False))) + assert is_bool(True) + assert is_bool(np.bool(False)) + assert is_bool(np.bool_(False)) assert not is_bool(1) assert not is_bool(1.1) @@ -824,8 +824,8 @@ def test_is_bool(self): assert not is_bool(Timedelta('1 days')) def test_is_integer(self): - self.assertTrue(is_integer(1)) - self.assertTrue(is_integer(np.int64(1))) + assert is_integer(1) + assert is_integer(np.int64(1)) assert not is_integer(True) assert not is_integer(1.1) @@ -845,12 +845,12 @@ def test_is_integer(self): assert not is_integer(Timedelta('1 days')) # questionable - self.assertTrue(is_integer(np.timedelta64(1, 'D'))) + assert is_integer(np.timedelta64(1, 'D')) def test_is_float(self): - self.assertTrue(is_float(1.1)) - self.assertTrue(is_float(np.float64(1.1))) - self.assertTrue(is_float(np.nan)) + assert is_float(1.1) + assert is_float(np.float64(1.1)) + assert is_float(np.nan) assert not is_float(True) assert not is_float(1) @@ -874,43 +874,43 @@ def test_is_datetime_dtypes(self): ts = pd.date_range('20130101', periods=3) tsa = pd.date_range('20130101', periods=3, tz='US/Eastern') - self.assertTrue(is_datetime64_dtype('datetime64')) - self.assertTrue(is_datetime64_dtype('datetime64[ns]')) - self.assertTrue(is_datetime64_dtype(ts)) + assert is_datetime64_dtype('datetime64') + assert is_datetime64_dtype('datetime64[ns]') + assert is_datetime64_dtype(ts) assert not is_datetime64_dtype(tsa) assert not is_datetime64_ns_dtype('datetime64') - self.assertTrue(is_datetime64_ns_dtype('datetime64[ns]')) - self.assertTrue(is_datetime64_ns_dtype(ts)) - self.assertTrue(is_datetime64_ns_dtype(tsa)) + assert is_datetime64_ns_dtype('datetime64[ns]') + assert is_datetime64_ns_dtype(ts) + assert is_datetime64_ns_dtype(tsa) - self.assertTrue(is_datetime64_any_dtype('datetime64')) - self.assertTrue(is_datetime64_any_dtype('datetime64[ns]')) - self.assertTrue(is_datetime64_any_dtype(ts)) - self.assertTrue(is_datetime64_any_dtype(tsa)) + assert is_datetime64_any_dtype('datetime64') + assert is_datetime64_any_dtype('datetime64[ns]') + assert is_datetime64_any_dtype(ts) + assert is_datetime64_any_dtype(tsa) assert not is_datetime64tz_dtype('datetime64') assert not is_datetime64tz_dtype('datetime64[ns]') assert not is_datetime64tz_dtype(ts) - self.assertTrue(is_datetime64tz_dtype(tsa)) + assert is_datetime64tz_dtype(tsa) for tz in ['US/Eastern', 'UTC']: dtype = 'datetime64[ns, {}]'.format(tz) assert not is_datetime64_dtype(dtype) - self.assertTrue(is_datetime64tz_dtype(dtype)) - self.assertTrue(is_datetime64_ns_dtype(dtype)) - self.assertTrue(is_datetime64_any_dtype(dtype)) + assert is_datetime64tz_dtype(dtype) + assert is_datetime64_ns_dtype(dtype) + assert is_datetime64_any_dtype(dtype) def test_is_timedelta(self): - self.assertTrue(is_timedelta64_dtype('timedelta64')) - self.assertTrue(is_timedelta64_dtype('timedelta64[ns]')) + assert is_timedelta64_dtype('timedelta64') + assert is_timedelta64_dtype('timedelta64[ns]') assert not is_timedelta64_ns_dtype('timedelta64') - self.assertTrue(is_timedelta64_ns_dtype('timedelta64[ns]')) + assert is_timedelta64_ns_dtype('timedelta64[ns]') tdi = TimedeltaIndex([1e14, 2e14], dtype='timedelta64') - self.assertTrue(is_timedelta64_dtype(tdi)) - self.assertTrue(is_timedelta64_ns_dtype(tdi)) - self.assertTrue(is_timedelta64_ns_dtype(tdi.astype('timedelta64[ns]'))) + assert is_timedelta64_dtype(tdi) + assert is_timedelta64_ns_dtype(tdi) + assert is_timedelta64_ns_dtype(tdi.astype('timedelta64[ns]')) # Conversion to Int64Index: assert not is_timedelta64_ns_dtype(tdi.astype('timedelta64')) @@ -920,19 +920,19 @@ def test_is_timedelta(self): class Testisscalar(tm.TestCase): def test_isscalar_builtin_scalars(self): - self.assertTrue(is_scalar(None)) - self.assertTrue(is_scalar(True)) - self.assertTrue(is_scalar(False)) - self.assertTrue(is_scalar(0.)) - self.assertTrue(is_scalar(np.nan)) - self.assertTrue(is_scalar('foobar')) - self.assertTrue(is_scalar(b'foobar')) - self.assertTrue(is_scalar(u('efoobar'))) - self.assertTrue(is_scalar(datetime(2014, 1, 1))) - self.assertTrue(is_scalar(date(2014, 1, 1))) - self.assertTrue(is_scalar(time(12, 0))) - self.assertTrue(is_scalar(timedelta(hours=1))) - self.assertTrue(is_scalar(pd.NaT)) + assert is_scalar(None) + assert is_scalar(True) + assert is_scalar(False) + assert is_scalar(0.) + assert is_scalar(np.nan) + assert is_scalar('foobar') + assert is_scalar(b'foobar') + assert is_scalar(u('efoobar')) + assert is_scalar(datetime(2014, 1, 1)) + assert is_scalar(date(2014, 1, 1)) + assert is_scalar(time(12, 0)) + assert is_scalar(timedelta(hours=1)) + assert is_scalar(pd.NaT) def test_isscalar_builtin_nonscalars(self): assert not is_scalar({}) @@ -944,15 +944,15 @@ def test_isscalar_builtin_nonscalars(self): assert not is_scalar(Ellipsis) def test_isscalar_numpy_array_scalars(self): - self.assertTrue(is_scalar(np.int64(1))) - self.assertTrue(is_scalar(np.float64(1.))) - self.assertTrue(is_scalar(np.int32(1))) - self.assertTrue(is_scalar(np.object_('foobar'))) - self.assertTrue(is_scalar(np.str_('foobar'))) - self.assertTrue(is_scalar(np.unicode_(u('foobar')))) - self.assertTrue(is_scalar(np.bytes_(b'foobar'))) - self.assertTrue(is_scalar(np.datetime64('2014-01-01'))) - self.assertTrue(is_scalar(np.timedelta64(1, 'h'))) + assert is_scalar(np.int64(1)) + assert is_scalar(np.float64(1.)) + assert is_scalar(np.int32(1)) + assert is_scalar(np.object_('foobar')) + assert is_scalar(np.str_('foobar')) + assert is_scalar(np.unicode_(u('foobar'))) + assert is_scalar(np.bytes_(b'foobar')) + assert is_scalar(np.datetime64('2014-01-01')) + assert is_scalar(np.timedelta64(1, 'h')) def test_isscalar_numpy_zerodim_arrays(self): for zerodim in [np.array(1), np.array('foobar'), @@ -960,7 +960,7 @@ def test_isscalar_numpy_zerodim_arrays(self): np.array(np.timedelta64(1, 'h')), np.array(np.datetime64('NaT'))]: assert not is_scalar(zerodim) - self.assertTrue(is_scalar(lib.item_from_zerodim(zerodim))) + assert is_scalar(lib.item_from_zerodim(zerodim)) def test_isscalar_numpy_arrays(self): assert not is_scalar(np.array([])) @@ -968,9 +968,9 @@ def test_isscalar_numpy_arrays(self): assert not is_scalar(np.matrix('1; 2')) def test_isscalar_pandas_scalars(self): - self.assertTrue(is_scalar(Timestamp('2014-01-01'))) - self.assertTrue(is_scalar(Timedelta(hours=1))) - self.assertTrue(is_scalar(Period('2014-01-01'))) + assert is_scalar(Timestamp('2014-01-01')) + assert is_scalar(Timedelta(hours=1)) + assert is_scalar(Period('2014-01-01')) def test_lisscalar_pandas_containers(self): assert not is_scalar(Series()) diff --git a/pandas/tests/dtypes/test_missing.py b/pandas/tests/dtypes/test_missing.py index 3e1a12d439b9a..78396a8d89d91 100644 --- a/pandas/tests/dtypes/test_missing.py +++ b/pandas/tests/dtypes/test_missing.py @@ -48,11 +48,11 @@ def test_notnull(): class TestIsNull(tm.TestCase): def test_0d_array(self): - self.assertTrue(isnull(np.array(np.nan))) + assert isnull(np.array(np.nan)) assert not isnull(np.array(0.0)) assert not isnull(np.array(0)) # test object dtype - self.assertTrue(isnull(np.array(np.nan, dtype=object))) + assert isnull(np.array(np.nan, dtype=object)) assert not isnull(np.array(0.0, dtype=object)) assert not isnull(np.array(0, dtype=object)) @@ -66,9 +66,9 @@ def test_empty_object(self): def test_isnull(self): assert not isnull(1.) - self.assertTrue(isnull(None)) - self.assertTrue(isnull(np.NaN)) - self.assertTrue(float('nan')) + assert isnull(None) + assert isnull(np.NaN) + assert float('nan') assert not isnull(np.inf) assert not isnull(-np.inf) @@ -136,7 +136,7 @@ def test_isnull_numpy_nat(self): def test_isnull_datetime(self): assert not isnull(datetime.now()) - self.assertTrue(notnull(datetime.now())) + assert notnull(datetime.now()) idx = date_range('1/1/1990', periods=20) exp = np.ones(len(idx), dtype=bool) @@ -146,14 +146,14 @@ def test_isnull_datetime(self): idx[0] = iNaT idx = DatetimeIndex(idx) mask = isnull(idx) - self.assertTrue(mask[0]) + assert mask[0] exp = np.array([True] + [False] * (len(idx) - 1), dtype=bool) tm.assert_numpy_array_equal(mask, exp) # GH 9129 pidx = idx.to_period(freq='M') mask = isnull(pidx) - self.assertTrue(mask[0]) + assert mask[0] exp = np.array([True] + [False] * (len(idx) - 1), dtype=bool) tm.assert_numpy_array_equal(mask, exp) diff --git a/pandas/tests/frame/test_alter_axes.py b/pandas/tests/frame/test_alter_axes.py index 0a00d7e018f33..303c8cb6e858a 100644 --- a/pandas/tests/frame/test_alter_axes.py +++ b/pandas/tests/frame/test_alter_axes.py @@ -496,7 +496,7 @@ def test_rename_multiindex(self): def test_rename_nocopy(self): renamed = self.frame.rename(columns={'C': 'foo'}, copy=False) renamed['foo'] = 1. - self.assertTrue((self.frame['C'] == 1.).all()) + assert (self.frame['C'] == 1.).all() def test_rename_inplace(self): self.frame.rename(columns={'C': 'foo'}) @@ -763,15 +763,15 @@ def test_set_index_names(self): self.assertEqual(df.set_index(df.index).index.names, ['A', 'B']) # Check that set_index isn't converting a MultiIndex into an Index - self.assertTrue(isinstance(df.set_index(df.index).index, MultiIndex)) + assert isinstance(df.set_index(df.index).index, MultiIndex) # Check actual equality tm.assert_index_equal(df.set_index(df.index).index, mi) # Check that [MultiIndex, MultiIndex] yields a MultiIndex rather # than a pair of tuples - self.assertTrue(isinstance(df.set_index( - [df.index, df.index]).index, MultiIndex)) + assert isinstance(df.set_index( + [df.index, df.index]).index, MultiIndex) # Check equality tm.assert_index_equal(df.set_index([df.index, df.index]).index, mi2) diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index 6268ccc27c7a6..8f46f055343d4 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -81,11 +81,11 @@ def test_corr_nooverlap(self): 'C': [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan]}) rs = df.corr(meth) - self.assertTrue(isnull(rs.loc['A', 'B'])) - self.assertTrue(isnull(rs.loc['B', 'A'])) + assert isnull(rs.loc['A', 'B']) + assert isnull(rs.loc['B', 'A']) self.assertEqual(rs.loc['A', 'A'], 1) self.assertEqual(rs.loc['B', 'B'], 1) - self.assertTrue(isnull(rs.loc['C', 'C'])) + assert isnull(rs.loc['C', 'C']) def test_corr_constant(self): tm._skip_if_no_scipy() @@ -96,7 +96,7 @@ def test_corr_constant(self): df = DataFrame({'A': [1, 1, 1, np.nan, np.nan, np.nan], 'B': [np.nan, np.nan, np.nan, 1, 1, 1]}) rs = df.corr(meth) - self.assertTrue(isnull(rs.values).all()) + assert isnull(rs.values).all() def test_corr_int(self): # dtypes other than float64 #1761 @@ -136,7 +136,7 @@ def test_cov(self): tm.assert_frame_equal(expected, result) result = self.frame.cov(min_periods=len(self.frame) + 1) - self.assertTrue(isnull(result.values).all()) + assert isnull(result.values).all() # with NAs frame = self.frame.copy() @@ -234,7 +234,7 @@ def test_corrwith_matches_corrcoef(self): c2 = np.corrcoef(df1['a'], df2['a'])[0][1] tm.assert_almost_equal(c1, c2) - self.assertTrue(c1 < 1) + assert c1 < 1 def test_bool_describe_in_mixed_frame(self): df = DataFrame({ @@ -710,7 +710,7 @@ def alt(x): kurt = df.kurt() kurt2 = df.kurt(level=0).xs('bar') tm.assert_series_equal(kurt, kurt2, check_names=False) - self.assertTrue(kurt.name is None) + assert kurt.name is None self.assertEqual(kurt2.name, 'bar') def _check_stat_op(self, name, alternative, frame=None, has_skipna=True, @@ -733,7 +733,7 @@ def _check_stat_op(self, name, alternative, frame=None, has_skipna=True, df['a'] = lrange(len(df)) result = getattr(df, name)() assert isinstance(result, Series) - self.assertTrue(len(result)) + assert len(result) if has_skipna: def skipna_wrapper(x): @@ -796,8 +796,8 @@ def wrapper(x): r0 = getattr(all_na, name)(axis=0) r1 = getattr(all_na, name)(axis=1) if not tm._incompat_bottleneck_version(name): - self.assertTrue(np.isnan(r0).all()) - self.assertTrue(np.isnan(r1).all()) + assert np.isnan(r0).all() + assert np.isnan(r1).all() def test_mode(self): df = pd.DataFrame({"A": [12, 12, 11, 12, 19, 11], @@ -864,7 +864,7 @@ def test_operators_timedelta64(self): self.assertEqual(result[1], diffs.loc[0, 'B']) result = diffs.min(axis=1) - self.assertTrue((result == diffs.loc[0, 'B']).all()) + assert (result == diffs.loc[0, 'B']).all() # max result = diffs.max() @@ -872,7 +872,7 @@ def test_operators_timedelta64(self): self.assertEqual(result[1], diffs.loc[2, 'B']) result = diffs.max(axis=1) - self.assertTrue((result == diffs['A']).all()) + assert (result == diffs['A']).all() # abs result = diffs.abs() @@ -924,8 +924,8 @@ def test_operators_timedelta64(self): df['off2'] = df['time'] - df['time2'] df._consolidate_inplace() - self.assertTrue(df['off1'].dtype == 'timedelta64[ns]') - self.assertTrue(df['off2'].dtype == 'timedelta64[ns]') + assert df['off1'].dtype == 'timedelta64[ns]' + assert df['off2'].dtype == 'timedelta64[ns]' def test_sum_corner(self): axis0 = self.empty.sum(0) @@ -953,7 +953,7 @@ def test_mean_corner(self): the_mean = self.mixed_frame.mean(axis=0) the_sum = self.mixed_frame.sum(axis=0, numeric_only=True) tm.assert_index_equal(the_sum.index, the_mean.index) - self.assertTrue(len(the_mean.index) < len(self.mixed_frame.columns)) + assert len(the_mean.index) < len(self.mixed_frame.columns) # xs sum mixed type, just want to know it works... the_mean = self.mixed_frame.mean(axis=1) @@ -1134,8 +1134,8 @@ def __nonzero__(self): assert not r0.any() assert not r1.any() else: - self.assertTrue(r0.all()) - self.assertTrue(r1.all()) + assert r0.all() + assert r1.all() # ---------------------------------------------------------------------- # Isin @@ -1820,10 +1820,9 @@ def test_dataframe_clip(self): lb_mask = df.values <= lb ub_mask = df.values >= ub mask = ~lb_mask & ~ub_mask - self.assertTrue((clipped_df.values[lb_mask] == lb).all()) - self.assertTrue((clipped_df.values[ub_mask] == ub).all()) - self.assertTrue((clipped_df.values[mask] == - df.values[mask]).all()) + assert (clipped_df.values[lb_mask] == lb).all() + assert (clipped_df.values[ub_mask] == ub).all() + assert (clipped_df.values[mask] == df.values[mask]).all() def test_clip_against_series(self): # GH #6966 @@ -1884,11 +1883,11 @@ def test_dot(self): # Check series argument result = a.dot(b['one']) tm.assert_series_equal(result, expected['one'], check_names=False) - self.assertTrue(result.name is None) + assert result.name is None result = a.dot(b1['one']) tm.assert_series_equal(result, expected['one'], check_names=False) - self.assertTrue(result.name is None) + assert result.name is None # can pass correct-length arrays row = a.iloc[0].values diff --git a/pandas/tests/frame/test_api.py b/pandas/tests/frame/test_api.py index 7669de17885f8..6b1e9d66d2071 100644 --- a/pandas/tests/frame/test_api.py +++ b/pandas/tests/frame/test_api.py @@ -139,7 +139,7 @@ def test_get_agg_axis(self): pytest.raises(ValueError, self.frame._get_agg_axis, 2) def test_nonzero(self): - self.assertTrue(self.empty.empty) + assert self.empty.empty assert not self.frame.empty assert not self.mixed_frame.empty @@ -157,7 +157,7 @@ def test_iteritems(self): self.assertEqual(type(v), Series) def test_iter(self): - self.assertTrue(tm.equalContents(list(self.frame), self.frame.columns)) + assert tm.equalContents(list(self.frame), self.frame.columns) def test_iterrows(self): for i, (k, v) in enumerate(self.frame.iterrows()): @@ -223,7 +223,7 @@ def test_as_matrix(self): for j, value in enumerate(row): col = frameCols[j] if np.isnan(value): - self.assertTrue(np.isnan(frame[col][i])) + assert np.isnan(frame[col][i]) else: self.assertEqual(value, frame[col][i]) @@ -242,7 +242,7 @@ def test_as_matrix(self): def test_values(self): self.frame.values[:, 0] = 5. - self.assertTrue((self.frame.values[:, 0] == 5).all()) + assert (self.frame.values[:, 0] == 5).all() def test_deepcopy(self): cp = deepcopy(self.frame) @@ -260,7 +260,7 @@ def test_transpose(self): for idx, series in compat.iteritems(dft): for col, value in compat.iteritems(series): if np.isnan(value): - self.assertTrue(np.isnan(frame[col][idx])) + assert np.isnan(frame[col][idx]) else: self.assertEqual(value, frame[col][idx]) @@ -276,7 +276,7 @@ def test_transpose_get_view(self): dft = self.frame.T dft.values[:, 5:10] = 5 - self.assertTrue((self.frame.values[5:10] == 5).all()) + assert (self.frame.values[5:10] == 5).all() def test_swapaxes(self): df = DataFrame(np.random.randn(10, 5)) @@ -323,15 +323,15 @@ def test_empty_nonzero(self): df = pd.DataFrame(index=[1], columns=[1]) assert not df.empty df = DataFrame(index=['a', 'b'], columns=['c', 'd']).dropna() - self.assertTrue(df.empty) - self.assertTrue(df.T.empty) + assert df.empty + assert df.T.empty empty_frames = [pd.DataFrame(), pd.DataFrame(index=[1]), pd.DataFrame(columns=[1]), pd.DataFrame({1: []})] for df in empty_frames: - self.assertTrue(df.empty) - self.assertTrue(df.T.empty) + assert df.empty + assert df.T.empty def test_with_datetimelikes(self): @@ -352,7 +352,7 @@ def test_inplace_return_self(self): def _check_f(base, f): result = f(base) - self.assertTrue(result is None) + assert result is None # -----DataFrame----- diff --git a/pandas/tests/frame/test_apply.py b/pandas/tests/frame/test_apply.py index 9d0f00c6eeffe..0bccca5cecb27 100644 --- a/pandas/tests/frame/test_apply.py +++ b/pandas/tests/frame/test_apply.py @@ -61,10 +61,10 @@ def test_apply_mixed_datetimelike(self): def test_apply_empty(self): # empty applied = self.empty.apply(np.sqrt) - self.assertTrue(applied.empty) + assert applied.empty applied = self.empty.apply(np.mean) - self.assertTrue(applied.empty) + assert applied.empty no_rows = self.frame[:0] result = no_rows.apply(lambda x: x.mean()) @@ -125,12 +125,12 @@ def test_apply_broadcast(self): agged = self.frame.apply(np.mean) for col, ts in compat.iteritems(broadcasted): - self.assertTrue((ts == agged[col]).all()) + assert (ts == agged[col]).all() broadcasted = self.frame.apply(np.mean, axis=1, broadcast=True) agged = self.frame.apply(np.mean, axis=1) for idx in broadcasted.index: - self.assertTrue((broadcasted.xs(idx) == agged[idx]).all()) + assert (broadcasted.xs(idx) == agged[idx]).all() def test_apply_raw(self): result0 = self.frame.apply(np.mean, raw=True) @@ -452,7 +452,7 @@ def test_frame_apply_dont_convert_datetime64(self): df = df.applymap(lambda x: x + BDay()) df = df.applymap(lambda x: x + BDay()) - self.assertTrue(df.x1.dtype == 'M8[ns]') + assert df.x1.dtype == 'M8[ns]' # See gh-12244 def test_apply_non_numpy_dtype(self): diff --git a/pandas/tests/frame/test_asof.py b/pandas/tests/frame/test_asof.py index dd03f8f7cb7a9..ba3e239756f51 100644 --- a/pandas/tests/frame/test_asof.py +++ b/pandas/tests/frame/test_asof.py @@ -23,17 +23,17 @@ def test_basic(self): freq='25s') result = df.asof(dates) - self.assertTrue(result.notnull().all(1).all()) + assert result.notnull().all(1).all() lb = df.index[14] ub = df.index[30] dates = list(dates) result = df.asof(dates) - self.assertTrue(result.notnull().all(1).all()) + assert result.notnull().all(1).all() mask = (result.index >= lb) & (result.index < ub) rs = result[mask] - self.assertTrue((rs == 14).all(1).all()) + assert (rs == 14).all(1).all() def test_subset(self): N = 10 diff --git a/pandas/tests/frame/test_axis_select_reindex.py b/pandas/tests/frame/test_axis_select_reindex.py index 61d0694eea382..2c285c6261415 100644 --- a/pandas/tests/frame/test_axis_select_reindex.py +++ b/pandas/tests/frame/test_axis_select_reindex.py @@ -120,7 +120,7 @@ def test_drop_multiindex_not_lexsorted(self): lexsorted_mi = MultiIndex.from_tuples( [('a', ''), ('b1', 'c1'), ('b2', 'c2')], names=['b', 'c']) lexsorted_df = DataFrame([[1, 3, 4]], columns=lexsorted_mi) - self.assertTrue(lexsorted_df.columns.is_lexsorted()) + assert lexsorted_df.columns.is_lexsorted() # define the non-lexsorted version not_lexsorted_df = DataFrame(columns=['a', 'b', 'c', 'd'], @@ -172,14 +172,14 @@ def test_reindex(self): for idx, val in compat.iteritems(newFrame[col]): if idx in self.frame.index: if np.isnan(val): - self.assertTrue(np.isnan(self.frame[col][idx])) + assert np.isnan(self.frame[col][idx]) else: self.assertEqual(val, self.frame[col][idx]) else: - self.assertTrue(np.isnan(val)) + assert np.isnan(val) for col, series in compat.iteritems(newFrame): - self.assertTrue(tm.equalContents(series.index, newFrame.index)) + assert tm.equalContents(series.index, newFrame.index) emptyFrame = self.frame.reindex(Index([])) self.assertEqual(len(emptyFrame.index), 0) @@ -190,15 +190,14 @@ def test_reindex(self): for idx, val in compat.iteritems(nonContigFrame[col]): if idx in self.frame.index: if np.isnan(val): - self.assertTrue(np.isnan(self.frame[col][idx])) + assert np.isnan(self.frame[col][idx]) else: self.assertEqual(val, self.frame[col][idx]) else: - self.assertTrue(np.isnan(val)) + assert np.isnan(val) for col, series in compat.iteritems(nonContigFrame): - self.assertTrue(tm.equalContents(series.index, - nonContigFrame.index)) + assert tm.equalContents(series.index, nonContigFrame.index) # corner cases @@ -208,7 +207,7 @@ def test_reindex(self): # length zero newFrame = self.frame.reindex([]) - self.assertTrue(newFrame.empty) + assert newFrame.empty self.assertEqual(len(newFrame.columns), len(self.frame.columns)) # length zero with columns reindexed with non-empty index @@ -355,7 +354,7 @@ def test_reindex_fill_value(self): # axis=0 result = df.reindex(lrange(15)) - self.assertTrue(np.isnan(result.values[-5:]).all()) + assert np.isnan(result.values[-5:]).all() result = df.reindex(lrange(15), fill_value=0) expected = df.reindex(lrange(15)).fillna(0) @@ -847,11 +846,11 @@ def test_reindex_boolean(self): reindexed = frame.reindex(np.arange(10)) self.assertEqual(reindexed.values.dtype, np.object_) - self.assertTrue(isnull(reindexed[0][1])) + assert isnull(reindexed[0][1]) reindexed = frame.reindex(columns=lrange(3)) self.assertEqual(reindexed.values.dtype, np.object_) - self.assertTrue(isnull(reindexed[1]).all()) + assert isnull(reindexed[1]).all() def test_reindex_objects(self): reindexed = self.mixed_frame.reindex(columns=['foo', 'A', 'B']) diff --git a/pandas/tests/frame/test_block_internals.py b/pandas/tests/frame/test_block_internals.py index 37615179a3f26..2a319348aca3f 100644 --- a/pandas/tests/frame/test_block_internals.py +++ b/pandas/tests/frame/test_block_internals.py @@ -71,16 +71,16 @@ def test_as_matrix_consolidate(self): self.frame['E'] = 7. assert not self.frame._data.is_consolidated() _ = self.frame.as_matrix() # noqa - self.assertTrue(self.frame._data.is_consolidated()) + assert self.frame._data.is_consolidated() def test_modify_values(self): self.frame.values[5] = 5 - self.assertTrue((self.frame.values[5] == 5).all()) + assert (self.frame.values[5] == 5).all() # unconsolidated self.frame['E'] = 7. self.frame.values[6] = 6 - self.assertTrue((self.frame.values[6] == 6).all()) + assert (self.frame.values[6] == 6).all() def test_boolean_set_uncons(self): self.frame['E'] = 7. @@ -307,12 +307,12 @@ def test_equals_different_blocks(self): df1 = df0.reset_index()[["A", "B", "C"]] # this assert verifies that the above operations have # induced a block rearrangement - self.assertTrue(df0._data.blocks[0].dtype != - df1._data.blocks[0].dtype) + assert (df0._data.blocks[0].dtype != df1._data.blocks[0].dtype) + # do the real tests assert_frame_equal(df0, df1) - self.assertTrue(df0.equals(df1)) - self.assertTrue(df1.equals(df0)) + assert df0.equals(df1) + assert df1.equals(df0) def test_copy_blocks(self): # API/ENH 9607 @@ -340,7 +340,7 @@ def test_no_copy_blocks(self): _df.loc[:, column] = _df[column] + 1 # make sure we did change the original DataFrame - self.assertTrue(_df[column].equals(df[column])) + assert _df[column].equals(df[column]) def test_copy(self): cop = self.frame.copy() @@ -400,7 +400,7 @@ def test_consolidate_datetime64(self): def test_is_mixed_type(self): assert not self.frame._is_mixed_type - self.assertTrue(self.mixed_frame._is_mixed_type) + assert self.mixed_frame._is_mixed_type def test_get_numeric_data(self): # TODO(wesm): unused? @@ -507,7 +507,7 @@ def test_stale_cached_series_bug_473(self): repr(Y) result = Y.sum() # noqa exp = Y['g'].sum() # noqa - self.assertTrue(pd.isnull(Y['g']['c'])) + assert pd.isnull(Y['g']['c']) def test_get_X_columns(self): # numeric and object columns @@ -542,4 +542,4 @@ def test_strange_column_corruption_issue(self): first = len(df.loc[pd.isnull(df[myid]), [myid]]) second = len(df.loc[pd.isnull(df[myid]), [myid]]) - self.assertTrue(first == second == 0) + assert first == second == 0 diff --git a/pandas/tests/frame/test_combine_concat.py b/pandas/tests/frame/test_combine_concat.py index 0e4184b07f22e..5452792def1ac 100644 --- a/pandas/tests/frame/test_combine_concat.py +++ b/pandas/tests/frame/test_combine_concat.py @@ -464,7 +464,7 @@ def test_combine_first(self): combined = head.combine_first(tail) reordered_frame = self.frame.reindex(combined.index) assert_frame_equal(combined, reordered_frame) - self.assertTrue(tm.equalContents(combined.columns, self.frame.columns)) + assert tm.equalContents(combined.columns, self.frame.columns) assert_series_equal(combined['A'], reordered_frame['A']) # same index @@ -478,7 +478,7 @@ def test_combine_first(self): combined = fcopy.combine_first(fcopy2) - self.assertTrue((combined['A'] == 1).all()) + assert (combined['A'] == 1).all() assert_series_equal(combined['B'], fcopy['B']) assert_series_equal(combined['C'], fcopy2['C']) assert_series_equal(combined['D'], fcopy['D']) @@ -488,12 +488,12 @@ def test_combine_first(self): head['A'] = 1 combined = head.combine_first(tail) - self.assertTrue((combined['A'][:10] == 1).all()) + assert (combined['A'][:10] == 1).all() # reverse overlap tail['A'][:10] = 0 combined = tail.combine_first(head) - self.assertTrue((combined['A'][:10] == 0).all()) + assert (combined['A'][:10] == 0).all() # no overlap f = self.frame[:10] @@ -510,13 +510,13 @@ def test_combine_first(self): assert_frame_equal(comb, self.frame) comb = self.frame.combine_first(DataFrame(index=["faz", "boo"])) - self.assertTrue("faz" in comb.index) + assert "faz" in comb.index # #2525 df = DataFrame({'a': [1]}, index=[datetime(2012, 1, 1)]) df2 = DataFrame({}, columns=['b']) result = df.combine_first(df2) - self.assertTrue('b' in result) + assert 'b' in result def test_combine_first_mixed_bug(self): idx = Index(['a', 'b', 'c', 'e']) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index e9a6f03abbe8d..588182eb30336 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -280,12 +280,12 @@ def test_constructor_multi_index(self): tuples = [(2, 3), (3, 3), (3, 3)] mi = MultiIndex.from_tuples(tuples) df = DataFrame(index=mi, columns=mi) - self.assertTrue(pd.isnull(df).values.ravel().all()) + assert pd.isnull(df).values.ravel().all() tuples = [(3, 3), (2, 3), (3, 3)] mi = MultiIndex.from_tuples(tuples) df = DataFrame(index=mi, columns=mi) - self.assertTrue(pd.isnull(df).values.ravel().all()) + assert pd.isnull(df).values.ravel().all() def test_constructor_error_msgs(self): msg = "Empty data passed with indices specified." @@ -594,7 +594,7 @@ def test_constructor_maskedarray(self): # what is this even checking?? mat = ma.masked_all((2, 3), dtype=float) frame = DataFrame(mat, columns=['A', 'B', 'C'], index=[1, 2]) - self.assertTrue(np.all(~np.asarray(frame == frame))) + assert np.all(~np.asarray(frame == frame)) def test_constructor_maskedarray_nonfloat(self): # masked int promoted to float @@ -604,7 +604,7 @@ def test_constructor_maskedarray_nonfloat(self): self.assertEqual(len(frame.index), 2) self.assertEqual(len(frame.columns), 3) - self.assertTrue(np.all(~np.asarray(frame == frame))) + assert np.all(~np.asarray(frame == frame)) # cast type frame = DataFrame(mat, columns=['A', 'B', 'C'], @@ -626,7 +626,7 @@ def test_constructor_maskedarray_nonfloat(self): self.assertEqual(len(frame.index), 2) self.assertEqual(len(frame.columns), 3) - self.assertTrue(isnull(frame).values.all()) + assert isnull(frame).values.all() # cast type frame = DataFrame(mat, columns=['A', 'B', 'C'], @@ -648,7 +648,7 @@ def test_constructor_maskedarray_nonfloat(self): self.assertEqual(len(frame.index), 2) self.assertEqual(len(frame.columns), 3) - self.assertTrue(np.all(~np.asarray(frame == frame))) + assert np.all(~np.asarray(frame == frame)) # cast type frame = DataFrame(mat, columns=['A', 'B', 'C'], @@ -817,7 +817,7 @@ def test_constructor_list_of_lists(self): # GH #484 l = [[1, 'a'], [2, 'b']] df = DataFrame(data=l, columns=["num", "str"]) - self.assertTrue(is_integer_dtype(df['num'])) + assert is_integer_dtype(df['num']) self.assertEqual(df['str'].dtype, np.object_) # GH 4851 @@ -1027,7 +1027,7 @@ def test_constructor_mixed_dict_and_Series(self): data['B'] = Series([4, 3, 2, 1], index=['bar', 'qux', 'baz', 'foo']) result = DataFrame(data) - self.assertTrue(result.index.is_monotonic) + assert result.index.is_monotonic # ordering ambiguous, raise exception with tm.assert_raises_regex(ValueError, 'ambiguous ordering'): @@ -1344,13 +1344,13 @@ def test_constructor_with_datetimes(self): # GH 8411 dr = date_range('20130101', periods=3) df = DataFrame({'value': dr}) - self.assertTrue(df.iat[0, 0].tz is None) + assert df.iat[0, 0].tz is None dr = date_range('20130101', periods=3, tz='UTC') df = DataFrame({'value': dr}) - self.assertTrue(str(df.iat[0, 0].tz) == 'UTC') + assert str(df.iat[0, 0].tz) == 'UTC' dr = date_range('20130101', periods=3, tz='US/Eastern') df = DataFrame({'value': dr}) - self.assertTrue(str(df.iat[0, 0].tz) == 'US/Eastern') + assert str(df.iat[0, 0].tz) == 'US/Eastern' # GH 7822 # preserver an index with a tz on dict construction @@ -1451,14 +1451,14 @@ def test_constructor_for_list_with_dtypes(self): def test_constructor_frame_copy(self): cop = DataFrame(self.frame, copy=True) cop['A'] = 5 - self.assertTrue((cop['A'] == 5).all()) + assert (cop['A'] == 5).all() assert not (self.frame['A'] == 5).all() def test_constructor_ndarray_copy(self): df = DataFrame(self.frame.values) self.frame.values[5] = 5 - self.assertTrue((df.values[5] == 5).all()) + assert (df.values[5] == 5).all() df = DataFrame(self.frame.values, copy=True) self.frame.values[6] = 6 @@ -1551,7 +1551,7 @@ def test_from_records_nones(self): (None, 2, 5, 3)] df = DataFrame.from_records(tuples, columns=['a', 'b', 'c', 'd']) - self.assertTrue(np.isnan(df['c'][0])) + assert np.isnan(df['c'][0]) def test_from_records_iterator(self): arr = np.array([(1.0, 1.0, 2, 2), (3.0, 3.0, 4, 4), (5., 5., 6, 6), @@ -1628,7 +1628,7 @@ def test_from_records_decimal(self): df = DataFrame.from_records(tuples, columns=['a'], coerce_float=True) self.assertEqual(df['a'].dtype, np.float64) - self.assertTrue(np.isnan(df['a'].values[-1])) + assert np.isnan(df['a'].values[-1]) def test_from_records_duplicates(self): result = DataFrame.from_records([(1, 2, 3), (4, 5, 6)], @@ -1890,7 +1890,7 @@ def test_from_records_len0_with_columns(self): result = DataFrame.from_records([], index='foo', columns=['foo', 'bar']) - self.assertTrue(np.array_equal(result.columns, ['bar'])) + assert np.array_equal(result.columns, ['bar']) self.assertEqual(len(result), 0) self.assertEqual(result.index.name, 'foo') @@ -1917,8 +1917,8 @@ def test_from_dict(self): # construction df = DataFrame({'A': idx, 'B': dr}) - self.assertTrue(df['A'].dtype, 'M8[ns, US/Eastern') - self.assertTrue(df['A'].name == 'A') + assert df['A'].dtype, 'M8[ns, US/Eastern' + assert df['A'].name == 'A' tm.assert_series_equal(df['A'], Series(idx, name='A')) tm.assert_series_equal(df['B'], Series(dr, name='B')) @@ -1951,7 +1951,7 @@ def test_frame_datetime64_mixed_index_ctor_1681(self): # it works! d = DataFrame({'A': 'foo', 'B': ts}, index=dr) - self.assertTrue(d['B'].isnull().all()) + assert d['B'].isnull().all() def test_frame_timeseries_to_records(self): index = date_range('1/1/2000', periods=10) diff --git a/pandas/tests/frame/test_convert_to.py b/pandas/tests/frame/test_convert_to.py index 6a49c88f17526..d3a675e3dc1a3 100644 --- a/pandas/tests/frame/test_convert_to.py +++ b/pandas/tests/frame/test_convert_to.py @@ -129,8 +129,8 @@ def test_to_records_with_multindex(self): data = np.zeros((8, 4)) df = DataFrame(data, index=index) r = df.to_records(index=True)['level_0'] - self.assertTrue('bar' in r) - self.assertTrue('one' not in r) + assert 'bar' in r + assert 'one' not in r def test_to_records_with_Mapping_type(self): import email diff --git a/pandas/tests/frame/test_dtypes.py b/pandas/tests/frame/test_dtypes.py index ed6d72c08fdae..427834b3dbf38 100644 --- a/pandas/tests/frame/test_dtypes.py +++ b/pandas/tests/frame/test_dtypes.py @@ -624,9 +624,9 @@ def test_astype_str(self): tm.assert_frame_equal(result, expected) result = str(self.tzframe) - self.assertTrue('0 2013-01-01 2013-01-01 00:00:00-05:00 ' - '2013-01-01 00:00:00+01:00' in result) - self.assertTrue('1 2013-01-02 ' - 'NaT NaT' in result) - self.assertTrue('2 2013-01-03 2013-01-03 00:00:00-05:00 ' - '2013-01-03 00:00:00+01:00' in result) + assert ('0 2013-01-01 2013-01-01 00:00:00-05:00 ' + '2013-01-01 00:00:00+01:00') in result + assert ('1 2013-01-02 ' + 'NaT NaT') in result + assert ('2 2013-01-03 2013-01-03 00:00:00-05:00 ' + '2013-01-03 00:00:00+01:00') in result diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py index ebc125ae09818..8f6128ad4e525 100644 --- a/pandas/tests/frame/test_indexing.py +++ b/pandas/tests/frame/test_indexing.py @@ -391,11 +391,11 @@ def test_getitem_setitem_ix_negative_integers(self): with catch_warnings(record=True): self.frame.ix[:, [-1]] = 0 - self.assertTrue((self.frame['D'] == 0).all()) + assert (self.frame['D'] == 0).all() df = DataFrame(np.random.randn(8, 4)) with catch_warnings(record=True): - self.assertTrue(isnull(df.ix[:, [-1]].values).all()) + assert isnull(df.ix[:, [-1]].values).all() # #1942 a = DataFrame(randn(20, 2), index=[chr(x + 65) for x in range(20)]) @@ -416,7 +416,7 @@ def test_setattr_column(self): df = DataFrame({'foobar': 1}, index=lrange(10)) df.foobar = 5 - self.assertTrue((df.foobar == 5).all()) + assert (df.foobar == 5).all() def test_setitem(self): # not sure what else to do here @@ -441,7 +441,7 @@ def test_setitem(self): # set ndarray arr = randn(len(self.frame)) self.frame['col9'] = arr - self.assertTrue((self.frame['col9'] == arr).all()) + assert (self.frame['col9'] == arr).all() self.frame['col7'] = 5 assert((self.frame['col7'] == 5).all()) @@ -460,7 +460,7 @@ def f(): smaller['col10'] = ['1', '2'] pytest.raises(com.SettingWithCopyError, f) self.assertEqual(smaller['col10'].dtype, np.object_) - self.assertTrue((smaller['col10'] == ['1', '2']).all()) + assert (smaller['col10'] == ['1', '2']).all() # with a dtype for dtype in ['int32', 'int64', 'float32', 'float64']: @@ -487,7 +487,7 @@ def test_setitem_always_copy(self): self.frame['E'] = s self.frame['E'][5:10] = nan - self.assertTrue(notnull(s[5:10]).all()) + assert notnull(s[5:10]).all() def test_setitem_boolean(self): df = self.frame.copy() @@ -552,7 +552,7 @@ def test_setitem_cast(self): # cast if pass array of course self.frame['B'] = np.arange(len(self.frame)) - self.assertTrue(issubclass(self.frame['B'].dtype.type, np.integer)) + assert issubclass(self.frame['B'].dtype.type, np.integer) self.frame['foo'] = 'bar' self.frame['foo'] = 0 @@ -795,7 +795,7 @@ def test_getitem_fancy_slice_integers_step(self): # this is OK result = df.iloc[:8:2] # noqa df.iloc[:8:2] = np.nan - self.assertTrue(isnull(df.iloc[:8:2]).values.all()) + assert isnull(df.iloc[:8:2]).values.all() def test_getitem_setitem_integer_slice_keyerrors(self): df = DataFrame(np.random.randn(10, 5), index=lrange(0, 20, 2)) @@ -803,12 +803,12 @@ def test_getitem_setitem_integer_slice_keyerrors(self): # this is OK cp = df.copy() cp.iloc[4:10] = 0 - self.assertTrue((cp.iloc[4:10] == 0).values.all()) + assert (cp.iloc[4:10] == 0).values.all() # so is this cp = df.copy() cp.iloc[3:11] = 0 - self.assertTrue((cp.iloc[3:11] == 0).values.all()) + assert (cp.iloc[3:11] == 0).values.all() result = df.iloc[2:6] result2 = df.loc[3:11] @@ -939,7 +939,7 @@ def test_fancy_getitem_slice_mixed(self): def f(): sliced['C'] = 4. pytest.raises(com.SettingWithCopyError, f) - self.assertTrue((self.frame['C'] == 4).all()) + assert (self.frame['C'] == 4).all() def test_fancy_setitem_int_labels(self): # integer index defers to label-based indexing @@ -1017,10 +1017,10 @@ def test_setitem_fancy_mixed_2d(self): with catch_warnings(record=True): self.mixed_frame.ix[:5, ['C', 'B', 'A']] = 5 result = self.mixed_frame.ix[:5, ['C', 'B', 'A']] - self.assertTrue((result.values == 5).all()) + assert (result.values == 5).all() self.mixed_frame.ix[5] = np.nan - self.assertTrue(isnull(self.mixed_frame.ix[5]).all()) + assert isnull(self.mixed_frame.ix[5]).all() self.mixed_frame.ix[5] = self.mixed_frame.ix[6] assert_series_equal(self.mixed_frame.ix[5], self.mixed_frame.ix[6], @@ -1030,7 +1030,7 @@ def test_setitem_fancy_mixed_2d(self): with catch_warnings(record=True): df = DataFrame({1: [1., 2., 3.], 2: [3, 4, 5]}) - self.assertTrue(df._is_mixed_type) + assert df._is_mixed_type df.ix[1] = [5, 10] @@ -1413,7 +1413,7 @@ def test_getitem_setitem_float_labels(self): df.loc[1:2] = 0 result = df[1:2] - self.assertTrue((result == 0).all().all()) + assert (result == 0).all().all() # #2727 index = Index([1.0, 2.5, 3.5, 4.5, 5.0]) @@ -1437,13 +1437,13 @@ def f(): result = cp.iloc[1.0:5] == 0 # noqa pytest.raises(TypeError, f) - self.assertTrue(result.values.all()) - self.assertTrue((cp.iloc[0:1] == df.iloc[0:1]).values.all()) + assert result.values.all() + assert (cp.iloc[0:1] == df.iloc[0:1]).values.all() cp = df.copy() cp.iloc[4:5] = 0 - self.assertTrue((cp.iloc[4:5] == 0).values.all()) - self.assertTrue((cp.iloc[0:4] == df.iloc[0:4]).values.all()) + assert (cp.iloc[4:5] == 0).values.all() + assert (cp.iloc[0:4] == df.iloc[0:4]).values.all() # float slicing result = df.loc[1.0:5] @@ -1469,7 +1469,7 @@ def f(): cp = df.copy() cp.loc[1.0:5.0] = 0 result = cp.loc[1.0:5.0] - self.assertTrue((result == 0).values.all()) + assert (result == 0).values.all() def test_setitem_single_column_mixed(self): df = DataFrame(randn(5, 3), index=['a', 'b', 'c', 'd', 'e'], @@ -1492,15 +1492,15 @@ def test_setitem_single_column_mixed_datetime(self): # set an allowable datetime64 type df.loc['b', 'timestamp'] = iNaT - self.assertTrue(isnull(df.loc['b', 'timestamp'])) + assert isnull(df.loc['b', 'timestamp']) # allow this syntax df.loc['c', 'timestamp'] = nan - self.assertTrue(isnull(df.loc['c', 'timestamp'])) + assert isnull(df.loc['c', 'timestamp']) # allow this syntax df.loc['d', :] = nan - self.assertTrue(isnull(df.loc['c', :]).all() == False) # noqa + assert not isnull(df.loc['c', :]).all() # as of GH 3216 this will now work! # try to set with a list like item @@ -1694,8 +1694,8 @@ def test_set_value_resize(self): res = self.frame.copy() res3 = res.set_value('foobar', 'baz', 5) - self.assertTrue(is_float_dtype(res3['baz'])) - self.assertTrue(isnull(res3['baz'].drop(['foobar'])).all()) + assert is_float_dtype(res3['baz']) + assert isnull(res3['baz'].drop(['foobar'])).all() pytest.raises(ValueError, res3.set_value, 'foobar', 'baz', 'sam') def test_set_value_with_index_dtype_change(self): @@ -1733,15 +1733,14 @@ def test_get_set_value_no_partial_indexing(self): def test_single_element_ix_dont_upcast(self): self.frame['E'] = 1 - self.assertTrue(issubclass(self.frame['E'].dtype.type, - (int, np.integer))) + assert issubclass(self.frame['E'].dtype.type, (int, np.integer)) with catch_warnings(record=True): result = self.frame.ix[self.frame.index[5], 'E'] - self.assertTrue(is_integer(result)) + assert is_integer(result) result = self.frame.loc[self.frame.index[5], 'E'] - self.assertTrue(is_integer(result)) + assert is_integer(result) # GH 11617 df = pd.DataFrame(dict(a=[1.23])) @@ -1749,9 +1748,9 @@ def test_single_element_ix_dont_upcast(self): with catch_warnings(record=True): result = df.ix[0, "b"] - self.assertTrue(is_integer(result)) + assert is_integer(result) result = df.loc[0, "b"] - self.assertTrue(is_integer(result)) + assert is_integer(result) expected = Series([666], [0], name='b') with catch_warnings(record=True): @@ -1812,7 +1811,7 @@ def test_iloc_col(self): def f(): result[8] = 0. pytest.raises(com.SettingWithCopyError, f) - self.assertTrue((df[8] == 0).all()) + assert (df[8] == 0).all() # list of integers result = df.iloc[:, [1, 2, 4, 6]] @@ -1867,7 +1866,7 @@ def test_iloc_duplicates(self): def test_iloc_sparse_propegate_fill_value(self): from pandas.core.sparse.api import SparseDataFrame df = SparseDataFrame({'A': [999, 1]}, default_fill_value=999) - self.assertTrue(len(df['A'].sp_values) == len(df.iloc[:, 0].sp_values)) + assert len(df['A'].sp_values) == len(df.iloc[:, 0].sp_values) def test_iat(self): @@ -1934,10 +1933,10 @@ def test_reindex_frame_add_nat(self): df = DataFrame({'A': np.random.randn(len(rng)), 'B': rng}) result = df.reindex(lrange(15)) - self.assertTrue(np.issubdtype(result['B'].dtype, np.dtype('M8[ns]'))) + assert np.issubdtype(result['B'].dtype, np.dtype('M8[ns]')) mask = com.isnull(result)['B'] - self.assertTrue(mask[-5:].all()) + assert mask[-5:].all() assert not mask[:-5].any() def test_set_dataframe_column_ns_dtype(self): @@ -2178,7 +2177,7 @@ def test_xs(self): xs = self.frame.xs(idx) for item, value in compat.iteritems(xs): if np.isnan(value): - self.assertTrue(np.isnan(self.frame[item][idx])) + assert np.isnan(self.frame[item][idx]) else: self.assertEqual(value, self.frame[item][idx]) @@ -2204,7 +2203,7 @@ def test_xs(self): # view is returned if possible series = self.frame.xs('A', axis=1) series[:] = 5 - self.assertTrue((expected == 5).all()) + assert (expected == 5).all() def test_xs_corner(self): # pathological mixed-type reordering case @@ -2254,7 +2253,7 @@ def test_xs_view(self): index=lrange(4), columns=lrange(5)) dm.xs(2)[:] = 10 - self.assertTrue((dm.xs(2) == 10).all()) + assert (dm.xs(2) == 10).all() def test_index_namedtuple(self): from collections import namedtuple @@ -2350,7 +2349,7 @@ def _check_get(df, cond, check_dtypes=True): # dtypes if check_dtypes: - self.assertTrue((rs.dtypes == df.dtypes).all()) + assert (rs.dtypes == df.dtypes).all() # check getting for df in [default_frame, self.mixed_frame, @@ -2399,7 +2398,7 @@ def _check_align(df, cond, other, check_dtypes=True): # can't check dtype when other is an ndarray if check_dtypes and not isinstance(other, np.ndarray): - self.assertTrue((rs.dtypes == df.dtypes).all()) + assert (rs.dtypes == df.dtypes).all() for df in [self.mixed_frame, self.mixed_float, self.mixed_int]: @@ -2939,7 +2938,7 @@ def test_setitem(self): # are copies) b1 = df._data.blocks[1] b2 = df._data.blocks[2] - self.assertTrue(b1.values.equals(b2.values)) + assert b1.values.equals(b2.values) assert id(b1.values.values.base) != id(b2.values.values.base) # with nan @@ -2958,7 +2957,7 @@ def test_set_reset(self): # set/reset df = DataFrame({'A': [0, 1, 2]}, index=idx) result = df.reset_index() - self.assertTrue(result['foo'].dtype, 'M8[ns, US/Eastern') + assert result['foo'].dtype, 'M8[ns, US/Eastern' df = result.set_index('foo') tm.assert_index_equal(df.index, idx) diff --git a/pandas/tests/frame/test_missing.py b/pandas/tests/frame/test_missing.py index 721cee7f3141b..17f12679ae92e 100644 --- a/pandas/tests/frame/test_missing.py +++ b/pandas/tests/frame/test_missing.py @@ -78,7 +78,7 @@ def test_dropIncompleteRows(self): samesize_frame = frame.dropna(subset=['bar']) assert_series_equal(frame['foo'], original) - self.assertTrue((frame['bar'] == 5).all()) + assert (frame['bar'] == 5).all() inp_frame2.dropna(subset=['bar'], inplace=True) tm.assert_index_equal(samesize_frame.index, self.frame.index) tm.assert_index_equal(inp_frame2.index, self.frame.index) @@ -187,13 +187,12 @@ def test_fillna(self): tf.loc[tf.index[-5:], 'A'] = nan zero_filled = self.tsframe.fillna(0) - self.assertTrue((zero_filled.loc[zero_filled.index[:5], 'A'] == 0 - ).all()) + assert (zero_filled.loc[zero_filled.index[:5], 'A'] == 0).all() padded = self.tsframe.fillna(method='pad') - self.assertTrue(np.isnan(padded.loc[padded.index[:5], 'A']).all()) - self.assertTrue((padded.loc[padded.index[-5:], 'A'] == - padded.loc[padded.index[-5], 'A']).all()) + assert np.isnan(padded.loc[padded.index[:5], 'A']).all() + assert (padded.loc[padded.index[-5:], 'A'] == + padded.loc[padded.index[-5], 'A']).all() # mixed type mf = self.mixed_frame @@ -502,7 +501,7 @@ def test_fill_corner(self): mf.loc[mf.index[-10:], 'A'] = nan filled = self.mixed_frame.fillna(value=0) - self.assertTrue((filled.loc[filled.index[5:20], 'foo'] == 0).all()) + assert (filled.loc[filled.index[5:20], 'foo'] == 0).all() del self.mixed_frame['foo'] empty_float = self.frame.reindex(columns=[]) diff --git a/pandas/tests/frame/test_mutate_columns.py b/pandas/tests/frame/test_mutate_columns.py index d5035f2908528..fbd1b7be3e431 100644 --- a/pandas/tests/frame/test_mutate_columns.py +++ b/pandas/tests/frame/test_mutate_columns.py @@ -132,16 +132,16 @@ def test_insert(self): # new item df['x'] = df['a'].astype('float32') result = Series(dict(float64=5, float32=1)) - self.assertTrue((df.get_dtype_counts() == result).all()) + assert (df.get_dtype_counts() == result).all() # replacing current (in different block) df['a'] = df['a'].astype('float32') result = Series(dict(float64=4, float32=2)) - self.assertTrue((df.get_dtype_counts() == result).all()) + assert (df.get_dtype_counts() == result).all() df['y'] = df['a'].astype('int32') result = Series(dict(float64=4, float32=2, int32=1)) - self.assertTrue((df.get_dtype_counts() == result).all()) + assert (df.get_dtype_counts() == result).all() with tm.assert_raises_regex(ValueError, 'already exists'): df.insert(1, 'a', df['b']) @@ -222,7 +222,7 @@ def test_pop_non_unique_cols(self): self.assertEqual(type(res), DataFrame) self.assertEqual(len(res), 2) self.assertEqual(len(df.columns), 1) - self.assertTrue("b" in df.columns) + assert "b" in df.columns assert "a" not in df.columns self.assertEqual(len(df.index), 2) diff --git a/pandas/tests/frame/test_nonunique_indexes.py b/pandas/tests/frame/test_nonunique_indexes.py index 5c141b6a46eec..61dd92fcd1fab 100644 --- a/pandas/tests/frame/test_nonunique_indexes.py +++ b/pandas/tests/frame/test_nonunique_indexes.py @@ -151,7 +151,7 @@ def check(result, expected=None): df = DataFrame([[1, 2.5], [3, 4.5]], index=[1, 2], columns=['x', 'x']) result = df.values expected = np.array([[1, 2.5], [3, 4.5]]) - self.assertTrue((result == expected).all().all()) + assert (result == expected).all().all() # rename, GH 4403 df4 = DataFrame( @@ -448,7 +448,7 @@ def test_as_matrix_duplicates(self): expected = np.array([[1, 2, 'a', 'b'], [1, 2, 'a', 'b']], dtype=object) - self.assertTrue(np.array_equal(result, expected)) + assert np.array_equal(result, expected) def test_set_value_by_index(self): # See gh-12344 diff --git a/pandas/tests/frame/test_operators.py b/pandas/tests/frame/test_operators.py index 7f87666d5ecc4..efe167297627a 100644 --- a/pandas/tests/frame/test_operators.py +++ b/pandas/tests/frame/test_operators.py @@ -43,7 +43,7 @@ def test_operators(self): if not np.isnan(val): self.assertEqual(val, origVal) else: - self.assertTrue(np.isnan(origVal)) + assert np.isnan(origVal) for col, series in compat.iteritems(seriesSum): for idx, val in compat.iteritems(series): @@ -51,7 +51,7 @@ def test_operators(self): if not np.isnan(val): self.assertEqual(val, origVal) else: - self.assertTrue(np.isnan(origVal)) + assert np.isnan(origVal) added = self.frame2 + self.frame2 expected = self.frame2 * 2 @@ -68,7 +68,7 @@ def test_operators(self): DataFrame(index=[0], dtype=dtype), ] for df in frames: - self.assertTrue((df + df).equals(df)) + assert (df + df).equals(df) assert_frame_equal(df + df, df) def test_ops_np_scalar(self): @@ -573,7 +573,7 @@ def _check_unaligned_frame(meth, op, df, other): assert_frame_equal(rs, xp) # DataFrame - self.assertTrue(df.eq(df).values.all()) + assert df.eq(df).values.all() assert not df.ne(df).values.any() for op in ['eq', 'ne', 'gt', 'lt', 'ge', 'le']: f = getattr(df, op) @@ -636,7 +636,7 @@ def _test_seq(df, idx_ser, col_ser): rs = df.eq(df) assert not rs.loc[0, 0] rs = df.ne(df) - self.assertTrue(rs.loc[0, 0]) + assert rs.loc[0, 0] rs = df.gt(df) assert not rs.loc[0, 0] rs = df.lt(df) @@ -654,7 +654,7 @@ def _test_seq(df, idx_ser, col_ser): rs = df.gt(df2) assert not rs.values.any() rs = df.ne(df2) - self.assertTrue(rs.values.all()) + assert rs.values.all() arr3 = np.array([2j, np.nan, None]) df3 = DataFrame({'a': arr3}) @@ -766,31 +766,30 @@ def test_combineFrame(self): exp.loc[~exp.index.isin(indexer)] = np.nan tm.assert_series_equal(added['A'], exp.loc[added['A'].index]) - self.assertTrue( - np.isnan(added['C'].reindex(frame_copy.index)[:5]).all()) + assert np.isnan(added['C'].reindex(frame_copy.index)[:5]).all() # assert(False) - self.assertTrue(np.isnan(added['D']).all()) + assert np.isnan(added['D']).all() self_added = self.frame + self.frame tm.assert_index_equal(self_added.index, self.frame.index) added_rev = frame_copy + self.frame - self.assertTrue(np.isnan(added['D']).all()) - self.assertTrue(np.isnan(added_rev['D']).all()) + assert np.isnan(added['D']).all() + assert np.isnan(added_rev['D']).all() # corner cases # empty plus_empty = self.frame + self.empty - self.assertTrue(np.isnan(plus_empty.values).all()) + assert np.isnan(plus_empty.values).all() empty_plus = self.empty + self.frame - self.assertTrue(np.isnan(empty_plus.values).all()) + assert np.isnan(empty_plus.values).all() empty_empty = self.empty + self.empty - self.assertTrue(empty_empty.empty) + assert empty_empty.empty # out of order reverse = self.frame.reindex(columns=self.frame.columns[::-1]) @@ -831,7 +830,7 @@ def test_combineSeries(self): for key, s in compat.iteritems(self.frame): assert_series_equal(larger_added[key], s + series[key]) assert 'E' in larger_added - self.assertTrue(np.isnan(larger_added['E']).all()) + assert np.isnan(larger_added['E']).all() # vs mix (upcast) as needed added = self.mixed_float + series @@ -866,7 +865,7 @@ def test_combineSeries(self): if col.name == ts.name: self.assertEqual(result.name, 'A') else: - self.assertTrue(result.name is None) + assert result.name is None smaller_frame = self.tsframe[:-5] smaller_added = smaller_frame.add(ts, axis='index') @@ -1045,8 +1044,8 @@ def test_combine_generic(self): combined = df1.combine(df2, np.add) combined2 = df2.combine(df1, np.add) - self.assertTrue(combined['D'].isnull().all()) - self.assertTrue(combined2['D'].isnull().all()) + assert combined['D'].isnull().all() + assert combined2['D'].isnull().all() chunk = combined.loc[combined.index[:-5], ['A', 'B', 'C']] chunk2 = combined2.loc[combined2.index[:-5], ['A', 'B', 'C']] diff --git a/pandas/tests/frame/test_period.py b/pandas/tests/frame/test_period.py index 194b6c0e251bc..0ca37de6bf2d4 100644 --- a/pandas/tests/frame/test_period.py +++ b/pandas/tests/frame/test_period.py @@ -112,8 +112,8 @@ def _get_with_delta(delta, freq='A-DEC'): result1 = df.to_timestamp('5t', axis=1) result2 = df.to_timestamp('t', axis=1) expected = pd.date_range('2001-01-01', '2009-01-01', freq='AS') - self.assertTrue(isinstance(result1.columns, DatetimeIndex)) - self.assertTrue(isinstance(result2.columns, DatetimeIndex)) + assert isinstance(result1.columns, DatetimeIndex) + assert isinstance(result2.columns, DatetimeIndex) tm.assert_numpy_array_equal(result1.columns.asi8, expected.asi8) tm.assert_numpy_array_equal(result2.columns.asi8, expected.asi8) # PeriodIndex.to_timestamp always use 'infer' diff --git a/pandas/tests/frame/test_query_eval.py b/pandas/tests/frame/test_query_eval.py index 2232205a57326..575906fb5c8b2 100644 --- a/pandas/tests/frame/test_query_eval.py +++ b/pandas/tests/frame/test_query_eval.py @@ -157,10 +157,10 @@ def test_eval_resolvers_as_list(self): df = DataFrame(randn(10, 2), columns=list('ab')) dict1 = {'a': 1} dict2 = {'b': 2} - self.assertTrue(df.eval('a + b', resolvers=[dict1, dict2]) == - dict1['a'] + dict2['b']) - self.assertTrue(pd.eval('a + b', resolvers=[dict1, dict2]) == - dict1['a'] + dict2['b']) + assert (df.eval('a + b', resolvers=[dict1, dict2]) == + dict1['a'] + dict2['b']) + assert (pd.eval('a + b', resolvers=[dict1, dict2]) == + dict1['a'] + dict2['b']) class TestDataFrameQueryWithMultiIndex(tm.TestCase): diff --git a/pandas/tests/frame/test_replace.py b/pandas/tests/frame/test_replace.py index 262734d093d4e..87075e6d6e631 100644 --- a/pandas/tests/frame/test_replace.py +++ b/pandas/tests/frame/test_replace.py @@ -781,7 +781,7 @@ def test_replace_dtypes(self): # bools df = DataFrame({'bools': [True, False, True]}) result = df.replace(False, True) - self.assertTrue(result.values.all()) + assert result.values.all() # complex blocks df = DataFrame({'complex': [1j, 2j, 3j]}) diff --git a/pandas/tests/frame/test_repr_info.py b/pandas/tests/frame/test_repr_info.py index bcb85b6e44d54..dbdbebddcc0b5 100644 --- a/pandas/tests/frame/test_repr_info.py +++ b/pandas/tests/frame/test_repr_info.py @@ -79,7 +79,7 @@ def test_repr(self): def test_repr_dimensions(self): df = DataFrame([[1, 2, ], [3, 4]]) with option_context('display.show_dimensions', True): - self.assertTrue("2 rows x 2 columns" in repr(df)) + assert "2 rows x 2 columns" in repr(df) with option_context('display.show_dimensions', False): assert "2 rows x 2 columns" not in repr(df) @@ -211,7 +211,7 @@ def test_info_wide(self): io = StringIO() df.info(buf=io, max_cols=101) rs = io.getvalue() - self.assertTrue(len(rs.splitlines()) > 100) + assert len(rs.splitlines()) > 100 xp = rs set_option('display.max_info_columns', 101) @@ -303,18 +303,18 @@ def test_info_memory_usage(self): # display memory usage case df.info(buf=buf, memory_usage=True) res = buf.getvalue().splitlines() - self.assertTrue("memory usage: " in res[-1]) + assert "memory usage: " in res[-1] # do not display memory usage cas df.info(buf=buf, memory_usage=False) res = buf.getvalue().splitlines() - self.assertTrue("memory usage: " not in res[-1]) + assert "memory usage: " not in res[-1] df.info(buf=buf, memory_usage=True) res = buf.getvalue().splitlines() # memory usage is a lower bound, so print it as XYZ+ MB - self.assertTrue(re.match(r"memory usage: [^+]+\+", res[-1])) + assert re.match(r"memory usage: [^+]+\+", res[-1]) df.iloc[:, :5].info(buf=buf, memory_usage=True) res = buf.getvalue().splitlines() @@ -325,11 +325,11 @@ def test_info_memory_usage(self): df_with_object_index = pd.DataFrame({'a': [1]}, index=['foo']) df_with_object_index.info(buf=buf, memory_usage=True) res = buf.getvalue().splitlines() - self.assertTrue(re.match(r"memory usage: [^+]+\+", res[-1])) + assert re.match(r"memory usage: [^+]+\+", res[-1]) df_with_object_index.info(buf=buf, memory_usage='deep') res = buf.getvalue().splitlines() - self.assertTrue(re.match(r"memory usage: [^+]+$", res[-1])) + assert re.match(r"memory usage: [^+]+$", res[-1]) self.assertGreater(df_with_object_index.memory_usage(index=True, deep=True).sum(), @@ -380,7 +380,7 @@ def test_info_memory_usage(self): # sys.getsizeof will call the .memory_usage with # deep=True, and add on some GC overhead diff = df.memory_usage(deep=True).sum() - sys.getsizeof(df) - self.assertTrue(abs(diff) < 100) + assert abs(diff) < 100 def test_info_memory_usage_qualified(self): @@ -394,7 +394,7 @@ def test_info_memory_usage_qualified(self): df = DataFrame(1, columns=list('ab'), index=list('ABC')) df.info(buf=buf) - self.assertTrue('+' in buf.getvalue()) + assert '+' in buf.getvalue() buf = StringIO() df = DataFrame(1, columns=list('ab'), @@ -408,7 +408,7 @@ def test_info_memory_usage_qualified(self): index=pd.MultiIndex.from_product( [range(3), ['foo', 'bar']])) df.info(buf=buf) - self.assertTrue('+' in buf.getvalue()) + assert '+' in buf.getvalue() def test_info_memory_usage_bug_on_multiindex(self): # GH 14308 @@ -429,10 +429,10 @@ def memory_usage(f): unstacked = df.unstack('id') self.assertEqual(df.values.nbytes, unstacked.values.nbytes) - self.assertTrue(memory_usage(df) > memory_usage(unstacked)) + assert memory_usage(df) > memory_usage(unstacked) # high upper bound - self.assertTrue(memory_usage(unstacked) - memory_usage(df) < 2000) + assert memory_usage(unstacked) - memory_usage(df) < 2000 def test_info_categorical(self): # GH14298 diff --git a/pandas/tests/frame/test_reshape.py b/pandas/tests/frame/test_reshape.py index c1905fa0476c4..9c48233ff29cd 100644 --- a/pandas/tests/frame/test_reshape.py +++ b/pandas/tests/frame/test_reshape.py @@ -445,7 +445,7 @@ def test_unstack_to_series(self): # check reversibility data = self.frame.unstack() - self.assertTrue(isinstance(data, Series)) + assert isinstance(data, Series) undo = data.unstack().T assert_frame_equal(undo, self.frame) diff --git a/pandas/tests/frame/test_subclass.py b/pandas/tests/frame/test_subclass.py index db4f4b909f7cb..ade696885c2e0 100644 --- a/pandas/tests/frame/test_subclass.py +++ b/pandas/tests/frame/test_subclass.py @@ -50,26 +50,26 @@ def custom_frame_function(self): cdf = CustomDataFrame(data) # Did we get back our own DF class? - self.assertTrue(isinstance(cdf, CustomDataFrame)) + assert isinstance(cdf, CustomDataFrame) # Do we get back our own Series class after selecting a column? cdf_series = cdf.col1 - self.assertTrue(isinstance(cdf_series, CustomSeries)) + assert isinstance(cdf_series, CustomSeries) self.assertEqual(cdf_series.custom_series_function(), 'OK') # Do we get back our own DF class after slicing row-wise? cdf_rows = cdf[1:5] - self.assertTrue(isinstance(cdf_rows, CustomDataFrame)) + assert isinstance(cdf_rows, CustomDataFrame) self.assertEqual(cdf_rows.custom_frame_function(), 'OK') # Make sure sliced part of multi-index frame is custom class mcol = pd.MultiIndex.from_tuples([('A', 'A'), ('A', 'B')]) cdf_multi = CustomDataFrame([[0, 1], [2, 3]], columns=mcol) - self.assertTrue(isinstance(cdf_multi['A'], CustomDataFrame)) + assert isinstance(cdf_multi['A'], CustomDataFrame) mcol = pd.MultiIndex.from_tuples([('A', ''), ('B', '')]) cdf_multi2 = CustomDataFrame([[0, 1], [2, 3]], columns=mcol) - self.assertTrue(isinstance(cdf_multi2['A'], CustomSeries)) + assert isinstance(cdf_multi2['A'], CustomSeries) def test_dataframe_metadata(self): df = tm.SubclassedDataFrame({'X': [1, 2, 3], 'Y': [1, 2, 3]}, @@ -142,7 +142,7 @@ class SubclassedPanel(Panel): index = MultiIndex.from_tuples([(0, 0), (0, 1), (0, 2)]) df = SubclassedFrame({'X': [1, 2, 3], 'Y': [4, 5, 6]}, index=index) result = df.to_panel() - self.assertTrue(isinstance(result, SubclassedPanel)) + assert isinstance(result, SubclassedPanel) expected = SubclassedPanel([[[1, 2, 3]], [[4, 5, 6]]], items=['X', 'Y'], major_axis=[0], minor_axis=[0, 1, 2], diff --git a/pandas/tests/frame/test_timeseries.py b/pandas/tests/frame/test_timeseries.py index 66af6aaca6513..910f04f0d63c6 100644 --- a/pandas/tests/frame/test_timeseries.py +++ b/pandas/tests/frame/test_timeseries.py @@ -122,14 +122,14 @@ def test_frame_ctor_datetime64_column(self): dates = np.asarray(rng) df = DataFrame({'A': np.random.randn(len(rng)), 'B': dates}) - self.assertTrue(np.issubdtype(df['B'].dtype, np.dtype('M8[ns]'))) + assert np.issubdtype(df['B'].dtype, np.dtype('M8[ns]')) def test_frame_add_datetime64_column(self): rng = date_range('1/1/2000 00:00:00', '1/1/2000 1:59:50', freq='10s') df = DataFrame(index=np.arange(len(rng))) df['A'] = rng - self.assertTrue(np.issubdtype(df['A'].dtype, np.dtype('M8[ns]'))) + assert np.issubdtype(df['A'].dtype, np.dtype('M8[ns]')) def test_frame_datetime64_pre1900_repr(self): df = DataFrame({'year': date_range('1/1/1700', periods=50, @@ -154,7 +154,7 @@ def test_frame_add_datetime64_col_other_units(self): ex_vals = to_datetime(vals.astype('O')).values self.assertEqual(df[unit].dtype, ns_dtype) - self.assertTrue((df[unit].values == ex_vals).all()) + assert (df[unit].values == ex_vals).all() # Test insertion into existing datetime64 column df = DataFrame({'ints': np.arange(n)}, index=np.arange(n)) @@ -169,7 +169,7 @@ def test_frame_add_datetime64_col_other_units(self): tmp['dates'] = vals ex_vals = to_datetime(vals.astype('O')).values - self.assertTrue((tmp['dates'].values == ex_vals).all()) + assert (tmp['dates'].values == ex_vals).all() def test_shift(self): # naive shift @@ -422,9 +422,9 @@ def test_at_time_frame(self): rng = date_range('1/1/2000', '1/5/2000', freq='5min') ts = DataFrame(np.random.randn(len(rng), 2), index=rng) rs = ts.at_time(rng[1]) - self.assertTrue((rs.index.hour == rng[1].hour).all()) - self.assertTrue((rs.index.minute == rng[1].minute).all()) - self.assertTrue((rs.index.second == rng[1].second).all()) + assert (rs.index.hour == rng[1].hour).all() + assert (rs.index.minute == rng[1].minute).all() + assert (rs.index.second == rng[1].second).all() result = ts.at_time('9:30') expected = ts.at_time(time(9, 30)) @@ -467,14 +467,14 @@ def test_between_time_frame(self): for rs in filtered.index: t = rs.time() if inc_start: - self.assertTrue(t >= stime) + assert t >= stime else: - self.assertTrue(t > stime) + assert t > stime if inc_end: - self.assertTrue(t <= etime) + assert t <= etime else: - self.assertTrue(t < etime) + assert t < etime result = ts.between_time('00:00', '01:00') expected = ts.between_time(stime, etime) @@ -499,14 +499,14 @@ def test_between_time_frame(self): for rs in filtered.index: t = rs.time() if inc_start: - self.assertTrue((t >= stime) or (t <= etime)) + assert (t >= stime) or (t <= etime) else: - self.assertTrue((t > stime) or (t <= etime)) + assert (t > stime) or (t <= etime) if inc_end: - self.assertTrue((t <= etime) or (t >= stime)) + assert (t <= etime) or (t >= stime) else: - self.assertTrue((t < etime) or (t >= stime)) + assert (t < etime) or (t >= stime) def test_operation_on_NaT(self): # Both NaT and Timestamp are in DataFrame. diff --git a/pandas/tests/frame/test_to_csv.py b/pandas/tests/frame/test_to_csv.py index ffce525434ab5..11c10f1982558 100644 --- a/pandas/tests/frame/test_to_csv.py +++ b/pandas/tests/frame/test_to_csv.py @@ -548,7 +548,7 @@ def _make_frame(names=None): df = _make_frame(True) df.to_csv(path, tupleize_cols=False, index=False) result = read_csv(path, header=[0, 1], tupleize_cols=False) - self.assertTrue(all([x is None for x in result.columns.names])) + assert all([x is None for x in result.columns.names]) result.columns.names = df.columns.names assert_frame_equal(df, result) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 0696473d0449f..278682ccb8d45 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -446,8 +446,8 @@ def test_groupby_duplicated_column_errormsg(self): grouped = df.groupby('B') c = grouped.count() - self.assertTrue(c.columns.nlevels == 1) - self.assertTrue(c.columns.size == 3) + assert c.columns.nlevels == 1 + assert c.columns.size == 3 def test_groupby_dict_mapping(self): # GH #679 @@ -798,7 +798,7 @@ def test_with_na(self): assert_series_equal(agged, expected, check_dtype=False) - # self.assertTrue(issubclass(agged.dtype.type, np.integer)) + # assert issubclass(agged.dtype.type, np.integer) # explicity return a float from my function def f(x): @@ -808,7 +808,7 @@ def f(x): expected = Series([4, 2], index=['bar', 'foo']) assert_series_equal(agged, expected, check_dtype=False) - self.assertTrue(issubclass(agged.dtype.type, np.dtype(dtype).type)) + assert issubclass(agged.dtype.type, np.dtype(dtype).type) def test_indices_concatenation_order(self): @@ -995,7 +995,7 @@ def test_frame_groupby(self): for k, v in compat.iteritems(groups): samething = self.tsframe.index.take(indices[k]) - self.assertTrue((samething == v).all()) + assert (samething == v).all() def test_grouping_is_iterable(self): # this code path isn't used anywhere else @@ -1637,16 +1637,16 @@ def test_max_min_non_numeric(self): 'ss': 4 * ['mama']}) result = aa.groupby('nn').max() - self.assertTrue('ss' in result) + assert 'ss' in result result = aa.groupby('nn').max(numeric_only=False) - self.assertTrue('ss' in result) + assert 'ss' in result result = aa.groupby('nn').min() - self.assertTrue('ss' in result) + assert 'ss' in result result = aa.groupby('nn').min(numeric_only=False) - self.assertTrue('ss' in result) + assert 'ss' in result def test_arg_passthru(self): # make sure that we are passing thru kwargs @@ -1970,11 +1970,11 @@ def test_apply_series_yield_constant(self): def test_apply_frame_yield_constant(self): # GH13568 result = self.df.groupby(['A', 'B']).apply(len) - self.assertTrue(isinstance(result, Series)) + assert isinstance(result, Series) assert result.name is None result = self.df.groupby(['A', 'B'])[['C', 'D']].apply(len) - self.assertTrue(isinstance(result, Series)) + assert isinstance(result, Series) assert result.name is None def test_apply_frame_to_series(self): @@ -2459,7 +2459,7 @@ def f(g): return g result = grouped.apply(f) - self.assertTrue('value3' in result) + assert 'value3' in result def test_groupby_wrong_multi_labels(self): from pandas import read_csv @@ -2562,7 +2562,7 @@ def test_cython_grouper_series_bug_noncontig(self): inds = np.tile(lrange(10), 10) result = obj.groupby(inds).agg(Series.median) - self.assertTrue(result.isnull().all()) + assert result.isnull().all() def test_series_grouper_noncontig_index(self): index = Index(tm.rands_array(10, 100)) @@ -3254,7 +3254,7 @@ def test_groupby_multiindex_not_lexsorted(self): lexsorted_mi = MultiIndex.from_tuples( [('a', ''), ('b1', 'c1'), ('b2', 'c2')], names=['b', 'c']) lexsorted_df = DataFrame([[1, 3, 4]], columns=lexsorted_mi) - self.assertTrue(lexsorted_df.columns.is_lexsorted()) + assert lexsorted_df.columns.is_lexsorted() # define the non-lexsorted version not_lexsorted_df = DataFrame(columns=['a', 'b', 'c', 'd'], diff --git a/pandas/tests/groupby/test_nth.py b/pandas/tests/groupby/test_nth.py index bf2f1f1f9cbc5..f583fa7aa7e86 100644 --- a/pandas/tests/groupby/test_nth.py +++ b/pandas/tests/groupby/test_nth.py @@ -42,9 +42,9 @@ def test_first_last_nth(self): grouped['B'].nth(0) self.df.loc[self.df['A'] == 'foo', 'B'] = np.nan - self.assertTrue(isnull(grouped['B'].first()['foo'])) - self.assertTrue(isnull(grouped['B'].last()['foo'])) - self.assertTrue(isnull(grouped['B'].nth(0)['foo'])) + assert isnull(grouped['B'].first()['foo']) + assert isnull(grouped['B'].last()['foo']) + assert isnull(grouped['B'].nth(0)['foo']) # v0.14.0 whatsnew df = DataFrame([[1, np.nan], [1, 4], [5, 6]], columns=['A', 'B']) @@ -154,7 +154,7 @@ def test_nth(self): expected = s.groupby(g).first() expected2 = s.groupby(g).apply(lambda x: x.iloc[0]) assert_series_equal(expected2, expected, check_names=False) - self.assertTrue(expected.name, 0) + assert expected.name, 0 self.assertEqual(expected.name, 1) # validate first diff --git a/pandas/tests/groupby/test_timegrouper.py b/pandas/tests/groupby/test_timegrouper.py index ae0413615f738..db3fdfa605b5b 100644 --- a/pandas/tests/groupby/test_timegrouper.py +++ b/pandas/tests/groupby/test_timegrouper.py @@ -80,11 +80,11 @@ def test_groupby_with_timegrouper_methods(self): for df in [df_original, df_sorted]: df = df.set_index('Date', drop=False) g = df.groupby(pd.TimeGrouper('6M')) - self.assertTrue(g.group_keys) - self.assertTrue(isinstance(g.grouper, pd.core.groupby.BinGrouper)) + assert g.group_keys + assert isinstance(g.grouper, pd.core.groupby.BinGrouper) groups = g.groups - self.assertTrue(isinstance(groups, dict)) - self.assertTrue(len(groups) == 3) + assert isinstance(groups, dict) + assert len(groups) == 3 def test_timegrouper_with_reg_groups(self): @@ -528,15 +528,15 @@ def test_groupby_first_datetime64(self): df = DataFrame([(1, 1351036800000000000), (2, 1351036800000000000)]) df[1] = df[1].view('M8[ns]') - self.assertTrue(issubclass(df[1].dtype.type, np.datetime64)) + assert issubclass(df[1].dtype.type, np.datetime64) result = df.groupby(level=0).first() got_dt = result[1].dtype - self.assertTrue(issubclass(got_dt.type, np.datetime64)) + assert issubclass(got_dt.type, np.datetime64) result = df[1].groupby(level=0).first() got_dt = result.dtype - self.assertTrue(issubclass(got_dt.type, np.datetime64)) + assert issubclass(got_dt.type, np.datetime64) def test_groupby_max_datetime64(self): # GH 5869 diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index 23b1de76234c3..d9dccc39f469f 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -31,7 +31,7 @@ def setup_indices(self): def verify_pickle(self, index): unpickled = tm.round_trip_pickle(index) - self.assertTrue(index.equals(unpickled)) + assert index.equals(unpickled) def test_pickle_compat_construction(self): # this is testing for pickle compat @@ -134,8 +134,8 @@ def test_reindex_base(self): def test_ndarray_compat_properties(self): idx = self.create_index() - self.assertTrue(idx.T.equals(idx)) - self.assertTrue(idx.transpose().equals(idx)) + assert idx.T.equals(idx) + assert idx.transpose().equals(idx) values = idx.values for prop in self._compat_props: @@ -155,8 +155,8 @@ def test_str(self): # test the string repr idx = self.create_index() idx.name = 'foo' - self.assertTrue("'foo'" in str(idx)) - self.assertTrue(idx.__class__.__name__ in str(idx)) + assert "'foo'" in str(idx) + assert idx.__class__.__name__ in str(idx) def test_dtype_str(self): for idx in self.indices.values(): @@ -304,7 +304,7 @@ def test_duplicates(self): continue idx = self._holder([ind[0]] * 5) assert not idx.is_unique - self.assertTrue(idx.has_duplicates) + assert idx.has_duplicates # GH 10115 # preserve names @@ -325,7 +325,7 @@ def test_get_unique_index(self): # We test against `idx_unique`, so first we make sure it's unique # and doesn't contain nans. - self.assertTrue(idx_unique.is_unique) + assert idx_unique.is_unique try: assert not idx_unique.hasnans except NotImplementedError: @@ -349,7 +349,7 @@ def test_get_unique_index(self): vals_unique = vals[:2] idx_nan = ind._shallow_copy(vals) idx_unique_nan = ind._shallow_copy(vals_unique) - self.assertTrue(idx_unique_nan.is_unique) + assert idx_unique_nan.is_unique self.assertEqual(idx_nan.dtype, ind.dtype) self.assertEqual(idx_unique_nan.dtype, ind.dtype) @@ -390,10 +390,10 @@ def test_memory_usage(self): # RangeIndex, IntervalIndex # don't have engines if not isinstance(index, (RangeIndex, IntervalIndex)): - self.assertTrue(result2 > result) + assert result2 > result if index.inferred_type == 'object': - self.assertTrue(result3 > result2) + assert result3 > result2 else: @@ -453,7 +453,7 @@ def test_take(self): result = ind.take(indexer) expected = ind[indexer] - self.assertTrue(result.equals(expected)) + assert result.equals(expected) if not isinstance(ind, (DatetimeIndex, PeriodIndex, TimedeltaIndex)): @@ -546,7 +546,7 @@ def test_intersection_base(self): if isinstance(idx, CategoricalIndex): pass else: - self.assertTrue(tm.equalContents(intersect, second)) + assert tm.equalContents(intersect, second) # GH 10149 cases = [klass(second.values) @@ -560,7 +560,7 @@ def test_intersection_base(self): pass else: result = first.intersection(case) - self.assertTrue(tm.equalContents(result, second)) + assert tm.equalContents(result, second) if isinstance(idx, MultiIndex): msg = "other must be a MultiIndex or a list of tuples" @@ -573,7 +573,7 @@ def test_union_base(self): second = idx[:5] everything = idx union = first.union(second) - self.assertTrue(tm.equalContents(union, everything)) + assert tm.equalContents(union, everything) # GH 10149 cases = [klass(second.values) @@ -587,7 +587,7 @@ def test_union_base(self): pass else: result = first.union(case) - self.assertTrue(tm.equalContents(result, everything)) + assert tm.equalContents(result, everything) if isinstance(idx, MultiIndex): msg = "other must be a MultiIndex or a list of tuples" @@ -604,7 +604,7 @@ def test_difference_base(self): if isinstance(idx, CategoricalIndex): pass else: - self.assertTrue(tm.equalContents(result, answer)) + assert tm.equalContents(result, answer) # GH 10149 cases = [klass(second.values) @@ -621,7 +621,7 @@ def test_difference_base(self): tm.assert_numpy_array_equal(result.asi8, answer.asi8) else: result = first.difference(case) - self.assertTrue(tm.equalContents(result, answer)) + assert tm.equalContents(result, answer) if isinstance(idx, MultiIndex): msg = "other must be a MultiIndex or a list of tuples" @@ -637,7 +637,7 @@ def test_symmetric_difference(self): else: answer = idx[[0, -1]] result = first.symmetric_difference(second) - self.assertTrue(tm.equalContents(result, answer)) + assert tm.equalContents(result, answer) # GH 10149 cases = [klass(second.values) @@ -651,7 +651,7 @@ def test_symmetric_difference(self): pass else: result = first.symmetric_difference(case) - self.assertTrue(tm.equalContents(result, answer)) + assert tm.equalContents(result, answer) if isinstance(idx, MultiIndex): msg = "other must be a MultiIndex or a list of tuples" @@ -671,7 +671,7 @@ def test_insert_base(self): continue # test 0th element - self.assertTrue(idx[0:4].equals(result.insert(0, idx[0]))) + assert idx[0:4].equals(result.insert(0, idx[0])) def test_delete_base(self): @@ -686,12 +686,12 @@ def test_delete_base(self): expected = idx[1:] result = idx.delete(0) - self.assertTrue(result.equals(expected)) + assert result.equals(expected) self.assertEqual(result.name, expected.name) expected = idx[:-1] result = idx.delete(-1) - self.assertTrue(result.equals(expected)) + assert result.equals(expected) self.assertEqual(result.name, expected.name) with pytest.raises((IndexError, ValueError)): @@ -701,9 +701,9 @@ def test_delete_base(self): def test_equals(self): for name, idx in compat.iteritems(self.indices): - self.assertTrue(idx.equals(idx)) - self.assertTrue(idx.equals(idx.copy())) - self.assertTrue(idx.equals(idx.astype(object))) + assert idx.equals(idx) + assert idx.equals(idx.copy()) + assert idx.equals(idx.astype(object)) assert not idx.equals(list(idx)) assert not idx.equals(np.array(idx)) @@ -711,8 +711,8 @@ def test_equals(self): # Cannot pass in non-int64 dtype to RangeIndex if not isinstance(idx, RangeIndex): same_values = Index(idx, dtype=object) - self.assertTrue(idx.equals(same_values)) - self.assertTrue(same_values.equals(idx)) + assert idx.equals(same_values) + assert same_values.equals(idx) if idx.nlevels == 1: # do not test MultiIndex @@ -865,7 +865,7 @@ def test_hasnans_isnans(self): expected = np.array([False] * len(idx), dtype=bool) expected[1] = True tm.assert_numpy_array_equal(idx._isnan, expected) - self.assertTrue(idx.hasnans) + assert idx.hasnans def test_fillna(self): # GH 11343 @@ -905,7 +905,7 @@ def test_fillna(self): expected = np.array([False] * len(idx), dtype=bool) expected[1] = True tm.assert_numpy_array_equal(idx._isnan, expected) - self.assertTrue(idx.hasnans) + assert idx.hasnans def test_nulls(self): # this is really a smoke test for the methods @@ -936,4 +936,4 @@ def test_empty(self): # GH 15270 index = self.create_index() assert not index.empty - self.assertTrue(index[:0].empty) + assert index[:0].empty diff --git a/pandas/tests/indexes/datetimelike.py b/pandas/tests/indexes/datetimelike.py index 338dba9ef6c4f..114940009377c 100644 --- a/pandas/tests/indexes/datetimelike.py +++ b/pandas/tests/indexes/datetimelike.py @@ -17,14 +17,14 @@ def test_str(self): idx = self.create_index() idx.name = 'foo' assert not "length=%s" % len(idx) in str(idx) - self.assertTrue("'foo'" in str(idx)) - self.assertTrue(idx.__class__.__name__ in str(idx)) + assert "'foo'" in str(idx) + assert idx.__class__.__name__ in str(idx) if hasattr(idx, 'tz'): if idx.tz is not None: - self.assertTrue(idx.tz in str(idx)) + assert idx.tz in str(idx) if hasattr(idx, 'freq'): - self.assertTrue("freq='%s'" % idx.freqstr in str(idx)) + assert "freq='%s'" % idx.freqstr in str(idx) def test_view(self): super(DatetimeLike, self).test_view() diff --git a/pandas/tests/indexes/datetimes/test_astype.py b/pandas/tests/indexes/datetimes/test_astype.py index 7e695164db971..35031746efebe 100644 --- a/pandas/tests/indexes/datetimes/test_astype.py +++ b/pandas/tests/indexes/datetimes/test_astype.py @@ -105,7 +105,7 @@ def test_astype_datetime64(self): result = idx.astype('datetime64[ns]', copy=False) tm.assert_index_equal(result, idx) - self.assertTrue(result is idx) + assert result is idx idx_tz = DatetimeIndex(['2016-05-16', 'NaT', NaT, np.NaN], tz='EST') result = idx_tz.astype('datetime64[ns]') @@ -251,7 +251,7 @@ def test_to_period_tz_explicit_pytz(self): result = ts.to_period()[0] expected = ts[0].to_period() - self.assertTrue(result == expected) + assert result == expected tm.assert_index_equal(ts.to_period(), xp) ts = date_range('1/1/2000', '4/1/2000', tz=pytz.utc) @@ -259,7 +259,7 @@ def test_to_period_tz_explicit_pytz(self): result = ts.to_period()[0] expected = ts[0].to_period() - self.assertTrue(result == expected) + assert result == expected tm.assert_index_equal(ts.to_period(), xp) ts = date_range('1/1/2000', '4/1/2000', tz=tzlocal()) @@ -267,7 +267,7 @@ def test_to_period_tz_explicit_pytz(self): result = ts.to_period()[0] expected = ts[0].to_period() - self.assertTrue(result == expected) + assert result == expected tm.assert_index_equal(ts.to_period(), xp) def test_to_period_tz_dateutil(self): @@ -282,7 +282,7 @@ def test_to_period_tz_dateutil(self): result = ts.to_period()[0] expected = ts[0].to_period() - self.assertTrue(result == expected) + assert result == expected tm.assert_index_equal(ts.to_period(), xp) ts = date_range('1/1/2000', '4/1/2000', tz=dateutil.tz.tzutc()) @@ -290,7 +290,7 @@ def test_to_period_tz_dateutil(self): result = ts.to_period()[0] expected = ts[0].to_period() - self.assertTrue(result == expected) + assert result == expected tm.assert_index_equal(ts.to_period(), xp) ts = date_range('1/1/2000', '4/1/2000', tz=tzlocal()) @@ -298,7 +298,7 @@ def test_to_period_tz_dateutil(self): result = ts.to_period()[0] expected = ts[0].to_period() - self.assertTrue(result == expected) + assert result == expected tm.assert_index_equal(ts.to_period(), xp) def test_astype_object(self): diff --git a/pandas/tests/indexes/datetimes/test_construction.py b/pandas/tests/indexes/datetimes/test_construction.py index 8ce2085032ca1..098d4755b385c 100644 --- a/pandas/tests/indexes/datetimes/test_construction.py +++ b/pandas/tests/indexes/datetimes/test_construction.py @@ -205,7 +205,7 @@ def test_construction_dti_with_mixed_timezones(self): exp = DatetimeIndex( [Timestamp('2011-01-01'), Timestamp('2011-01-02')], name='idx') tm.assert_index_equal(result, exp, exact=True) - self.assertTrue(isinstance(result, DatetimeIndex)) + assert isinstance(result, DatetimeIndex) # same tz results in DatetimeIndex result = DatetimeIndex([Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), @@ -216,7 +216,7 @@ def test_construction_dti_with_mixed_timezones(self): Timestamp('2011-01-02 10:00')], tz='Asia/Tokyo', name='idx') tm.assert_index_equal(result, exp, exact=True) - self.assertTrue(isinstance(result, DatetimeIndex)) + assert isinstance(result, DatetimeIndex) # same tz results in DatetimeIndex (DST) result = DatetimeIndex([Timestamp('2011-01-01 10:00', tz='US/Eastern'), @@ -227,7 +227,7 @@ def test_construction_dti_with_mixed_timezones(self): Timestamp('2011-08-01 10:00')], tz='US/Eastern', name='idx') tm.assert_index_equal(result, exp, exact=True) - self.assertTrue(isinstance(result, DatetimeIndex)) + assert isinstance(result, DatetimeIndex) # different tz coerces tz-naive to tz-awareIndex(dtype=object) result = DatetimeIndex([Timestamp('2011-01-01 10:00'), @@ -237,7 +237,7 @@ def test_construction_dti_with_mixed_timezones(self): Timestamp('2011-01-02 10:00')], tz='US/Eastern', name='idx') tm.assert_index_equal(result, exp, exact=True) - self.assertTrue(isinstance(result, DatetimeIndex)) + assert isinstance(result, DatetimeIndex) # tz mismatch affecting to tz-aware raises TypeError/ValueError @@ -491,15 +491,15 @@ def test_ctor_str_intraday(self): def test_is_(self): dti = DatetimeIndex(start='1/1/2005', end='12/1/2005', freq='M') - self.assertTrue(dti.is_(dti)) - self.assertTrue(dti.is_(dti.view())) + assert dti.is_(dti) + assert dti.is_(dti.view()) assert not dti.is_(dti.copy()) def test_index_cast_datetime64_other_units(self): arr = np.arange(0, 100, 10, dtype=np.int64).view('M8[D]') idx = Index(arr) - self.assertTrue((idx.values == tslib.cast_to_nanoseconds(arr)).all()) + assert (idx.values == tslib.cast_to_nanoseconds(arr)).all() def test_constructor_int64_nocopy(self): # #1624 @@ -507,13 +507,13 @@ def test_constructor_int64_nocopy(self): index = DatetimeIndex(arr) arr[50:100] = -1 - self.assertTrue((index.asi8[50:100] == -1).all()) + assert (index.asi8[50:100] == -1).all() arr = np.arange(1000, dtype=np.int64) index = DatetimeIndex(arr, copy=True) arr[50:100] = -1 - self.assertTrue((index.asi8[50:100] != -1).all()) + assert (index.asi8[50:100] != -1).all() def test_from_freq_recreate_from_data(self): freqs = ['M', 'Q', 'A', 'D', 'B', 'BH', 'T', 'S', 'L', 'U', 'H', 'N', @@ -560,7 +560,7 @@ def test_datetimeindex_constructor_misc(self): tm.assert_index_equal(idx7, idx8) for other in [idx2, idx3, idx4, idx5, idx6]: - self.assertTrue((idx1.values == other.values).all()) + assert (idx1.values == other.values).all() sdate = datetime(1999, 12, 25) edate = datetime(2000, 1, 1) diff --git a/pandas/tests/indexes/datetimes/test_date_range.py b/pandas/tests/indexes/datetimes/test_date_range.py index e570313b716cb..6b011ad6db98e 100644 --- a/pandas/tests/indexes/datetimes/test_date_range.py +++ b/pandas/tests/indexes/datetimes/test_date_range.py @@ -359,19 +359,19 @@ def test_range_tz_dateutil(self): end = datetime(2011, 1, 3, tzinfo=tz('US/Eastern')) dr = date_range(start=start, periods=3) - self.assertTrue(dr.tz == tz('US/Eastern')) - self.assertTrue(dr[0] == start) - self.assertTrue(dr[2] == end) + assert dr.tz == tz('US/Eastern') + assert dr[0] == start + assert dr[2] == end dr = date_range(end=end, periods=3) - self.assertTrue(dr.tz == tz('US/Eastern')) - self.assertTrue(dr[0] == start) - self.assertTrue(dr[2] == end) + assert dr.tz == tz('US/Eastern') + assert dr[0] == start + assert dr[2] == end dr = date_range(start=start, end=end) - self.assertTrue(dr.tz == tz('US/Eastern')) - self.assertTrue(dr[0] == start) - self.assertTrue(dr[2] == end) + assert dr.tz == tz('US/Eastern') + assert dr[0] == start + assert dr[2] == end def test_range_closed(self): begin = datetime(2011, 1, 1) diff --git a/pandas/tests/indexes/datetimes/test_datetime.py b/pandas/tests/indexes/datetimes/test_datetime.py index 7ba9bf53abc4d..83f9119377b19 100644 --- a/pandas/tests/indexes/datetimes/test_datetime.py +++ b/pandas/tests/indexes/datetimes/test_datetime.py @@ -451,17 +451,17 @@ def test_sort_values(self): idx = DatetimeIndex(['2000-01-04', '2000-01-01', '2000-01-02']) ordered = idx.sort_values() - self.assertTrue(ordered.is_monotonic) + assert ordered.is_monotonic ordered = idx.sort_values(ascending=False) - self.assertTrue(ordered[::-1].is_monotonic) + assert ordered[::-1].is_monotonic ordered, dexer = idx.sort_values(return_indexer=True) - self.assertTrue(ordered.is_monotonic) + assert ordered.is_monotonic tm.assert_numpy_array_equal(dexer, np.array([1, 2, 0], dtype=np.intp)) ordered, dexer = idx.sort_values(return_indexer=True, ascending=False) - self.assertTrue(ordered[::-1].is_monotonic) + assert ordered[::-1].is_monotonic tm.assert_numpy_array_equal(dexer, np.array([0, 2, 1], dtype=np.intp)) def test_take(self): @@ -570,15 +570,15 @@ def test_append_numpy_bug_1681(self): c = DataFrame({'A': 'foo', 'B': dr}, index=dr) result = a.append(c) - self.assertTrue((result['B'] == dr).all()) + assert (result['B'] == dr).all() def test_isin(self): index = tm.makeDateIndex(4) result = index.isin(index) - self.assertTrue(result.all()) + assert result.all() result = index.isin(list(index)) - self.assertTrue(result.all()) + assert result.all() assert_almost_equal(index.isin([index[2], 5]), np.array([False, False, True, False])) @@ -587,13 +587,13 @@ def test_time(self): rng = pd.date_range('1/1/2000', freq='12min', periods=10) result = pd.Index(rng).time expected = [t.time() for t in rng] - self.assertTrue((result == expected).all()) + assert (result == expected).all() def test_date(self): rng = pd.date_range('1/1/2000', freq='12H', periods=10) result = pd.Index(rng).date expected = [t.date() for t in rng] - self.assertTrue((result == expected).all()) + assert (result == expected).all() def test_does_not_convert_mixed_integer(self): df = tm.makeCustomDataframe(10, 10, diff --git a/pandas/tests/indexes/datetimes/test_datetimelike.py b/pandas/tests/indexes/datetimes/test_datetimelike.py index 3e6fe10223216..0eb565bf0ec55 100644 --- a/pandas/tests/indexes/datetimes/test_datetimelike.py +++ b/pandas/tests/indexes/datetimes/test_datetimelike.py @@ -49,13 +49,13 @@ def test_intersection(self): first = self.index second = self.index[5:] intersect = first.intersection(second) - self.assertTrue(tm.equalContents(intersect, second)) + assert tm.equalContents(intersect, second) # GH 10149 cases = [klass(second.values) for klass in [np.array, Series, list]] for case in cases: result = first.intersection(case) - self.assertTrue(tm.equalContents(result, second)) + assert tm.equalContents(result, second) third = Index(['a', 'b', 'c']) result = first.intersection(third) @@ -67,10 +67,10 @@ def test_union(self): second = self.index[5:] everything = self.index union = first.union(second) - self.assertTrue(tm.equalContents(union, everything)) + assert tm.equalContents(union, everything) # GH 10149 cases = [klass(second.values) for klass in [np.array, Series, list]] for case in cases: result = first.union(case) - self.assertTrue(tm.equalContents(result, everything)) + assert tm.equalContents(result, everything) diff --git a/pandas/tests/indexes/datetimes/test_misc.py b/pandas/tests/indexes/datetimes/test_misc.py index 22e77eebec06b..55165aa39a1a4 100644 --- a/pandas/tests/indexes/datetimes/test_misc.py +++ b/pandas/tests/indexes/datetimes/test_misc.py @@ -166,7 +166,7 @@ def test_normalize(self): "datetime64[ns]")) tm.assert_index_equal(rng_ns_normalized, expected) - self.assertTrue(result.is_normalized) + assert result.is_normalized assert not rng.is_normalized diff --git a/pandas/tests/indexes/datetimes/test_ops.py b/pandas/tests/indexes/datetimes/test_ops.py index 7e42e5e3db7ef..fa1b2c0d7c68d 100644 --- a/pandas/tests/indexes/datetimes/test_ops.py +++ b/pandas/tests/indexes/datetimes/test_ops.py @@ -59,7 +59,7 @@ def test_asobject_tolist(self): Timestamp('2013-04-30')] expected = pd.Index(expected_list, dtype=object, name='idx') result = idx.asobject - self.assertTrue(isinstance(result, Index)) + assert isinstance(result, Index) self.assertEqual(result.dtype, object) tm.assert_index_equal(result, expected) @@ -74,7 +74,7 @@ def test_asobject_tolist(self): Timestamp('2013-04-30', tz='Asia/Tokyo')] expected = pd.Index(expected_list, dtype=object, name='idx') result = idx.asobject - self.assertTrue(isinstance(result, Index)) + assert isinstance(result, Index) self.assertEqual(result.dtype, object) tm.assert_index_equal(result, expected) self.assertEqual(result.name, expected.name) @@ -87,7 +87,7 @@ def test_asobject_tolist(self): Timestamp('2013-01-04')] expected = pd.Index(expected_list, dtype=object, name='idx') result = idx.asobject - self.assertTrue(isinstance(result, Index)) + assert isinstance(result, Index) self.assertEqual(result.dtype, object) tm.assert_index_equal(result, expected) self.assertEqual(result.name, expected.name) @@ -98,7 +98,7 @@ def test_minmax(self): # monotonic idx1 = pd.DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03'], tz=tz) - self.assertTrue(idx1.is_monotonic) + assert idx1.is_monotonic # non-monotonic idx2 = pd.DatetimeIndex(['2011-01-01', pd.NaT, '2011-01-03', @@ -114,13 +114,13 @@ def test_minmax(self): for op in ['min', 'max']: # Return NaT obj = DatetimeIndex([]) - self.assertTrue(pd.isnull(getattr(obj, op)())) + assert pd.isnull(getattr(obj, op)()) obj = DatetimeIndex([pd.NaT]) - self.assertTrue(pd.isnull(getattr(obj, op)())) + assert pd.isnull(getattr(obj, op)()) obj = DatetimeIndex([pd.NaT, pd.NaT, pd.NaT]) - self.assertTrue(pd.isnull(getattr(obj, op)())) + assert pd.isnull(getattr(obj, op)()) def test_numpy_minmax(self): dr = pd.date_range(start='2016-01-15', end='2016-01-20') @@ -886,7 +886,7 @@ def test_nat(self): for tz in [None, 'US/Eastern', 'UTC']: idx = pd.DatetimeIndex(['2011-01-01', '2011-01-02'], tz=tz) - self.assertTrue(idx._can_hold_na) + assert idx._can_hold_na tm.assert_numpy_array_equal(idx._isnan, np.array([False, False])) assert not idx.hasnans @@ -894,10 +894,10 @@ def test_nat(self): np.array([], dtype=np.intp)) idx = pd.DatetimeIndex(['2011-01-01', 'NaT'], tz=tz) - self.assertTrue(idx._can_hold_na) + assert idx._can_hold_na tm.assert_numpy_array_equal(idx._isnan, np.array([False, True])) - self.assertTrue(idx.hasnans) + assert idx.hasnans tm.assert_numpy_array_equal(idx._nan_idxs, np.array([1], dtype=np.intp)) @@ -905,11 +905,11 @@ def test_equals(self): # GH 13107 for tz in [None, 'UTC', 'US/Eastern', 'Asia/Tokyo']: idx = pd.DatetimeIndex(['2011-01-01', '2011-01-02', 'NaT']) - self.assertTrue(idx.equals(idx)) - self.assertTrue(idx.equals(idx.copy())) - self.assertTrue(idx.equals(idx.asobject)) - self.assertTrue(idx.asobject.equals(idx)) - self.assertTrue(idx.asobject.equals(idx.asobject)) + assert idx.equals(idx) + assert idx.equals(idx.copy()) + assert idx.equals(idx.asobject) + assert idx.asobject.equals(idx) + assert idx.asobject.equals(idx.asobject) assert not idx.equals(list(idx)) assert not idx.equals(pd.Series(idx)) @@ -1118,7 +1118,7 @@ def test_comparison(self): d = self.rng[10] comp = self.rng > d - self.assertTrue(comp[11]) + assert comp[11] assert not comp[9] def test_pickle_unpickle(self): @@ -1194,18 +1194,18 @@ def test_equals(self): def test_identical(self): t1 = self.rng.copy() t2 = self.rng.copy() - self.assertTrue(t1.identical(t2)) + assert t1.identical(t2) # name t1 = t1.rename('foo') - self.assertTrue(t1.equals(t2)) + assert t1.equals(t2) assert not t1.identical(t2) t2 = t2.rename('foo') - self.assertTrue(t1.identical(t2)) + assert t1.identical(t2) # freq t2v = Index(t2.values) - self.assertTrue(t1.equals(t2v)) + assert t1.equals(t2v) assert not t1.identical(t2v) @@ -1218,7 +1218,7 @@ def test_comparison(self): d = self.rng[10] comp = self.rng > d - self.assertTrue(comp[11]) + assert comp[11] assert not comp[9] def test_copy(self): diff --git a/pandas/tests/indexes/datetimes/test_setops.py b/pandas/tests/indexes/datetimes/test_setops.py index 84a1adce2c0aa..6612ab844b849 100644 --- a/pandas/tests/indexes/datetimes/test_setops.py +++ b/pandas/tests/indexes/datetimes/test_setops.py @@ -196,7 +196,7 @@ def test_join_nonunique(self): idx2 = to_datetime(['2012-11-06 15:11:09.006507', '2012-11-06 15:11:09.006507']) rs = idx1.join(idx2, how='outer') - self.assertTrue(rs.is_monotonic) + assert rs.is_monotonic class TestBusinessDatetimeIndex(tm.TestCase): diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index 941c9767e7a3a..4c32f41db207c 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -296,7 +296,7 @@ def test_to_datetime_tz_psycopg2(self): i = pd.DatetimeIndex([ '2000-01-01 08:00:00+00:00' ], tz=psycopg2.tz.FixedOffsetTimezone(offset=-300, name=None)) - self.assertTrue(is_datetime64_ns_dtype(i)) + assert is_datetime64_ns_dtype(i) # tz coerceion result = pd.to_datetime(i, errors='coerce') @@ -311,11 +311,11 @@ def test_datetime_bool(self): # GH13176 with pytest.raises(TypeError): to_datetime(False) - self.assertTrue(to_datetime(False, errors="coerce") is NaT) + assert to_datetime(False, errors="coerce") is NaT self.assertEqual(to_datetime(False, errors="ignore"), False) with pytest.raises(TypeError): to_datetime(True) - self.assertTrue(to_datetime(True, errors="coerce") is NaT) + assert to_datetime(True, errors="coerce") is NaT self.assertEqual(to_datetime(True, errors="ignore"), True) with pytest.raises(TypeError): to_datetime([False, datetime.today()]) @@ -626,7 +626,7 @@ def test_to_datetime_iso8601(self): def test_to_datetime_default(self): rs = to_datetime('2001') xp = datetime(2001, 1, 1) - self.assertTrue(rs, xp) + assert rs, xp # dayfirst is essentially broken @@ -684,7 +684,7 @@ def test_to_datetime_types(self): assert result is NaT result = to_datetime(['', '']) - self.assertTrue(isnull(result).all()) + assert isnull(result).all() # ints result = Timestamp(0) @@ -889,7 +889,7 @@ def test_guess_datetime_format_invalid_inputs(self): ] for invalid_dt in invalid_dts: - self.assertTrue(tools._guess_datetime_format(invalid_dt) is None) + assert tools._guess_datetime_format(invalid_dt) is None def test_guess_datetime_format_nopadding(self): # GH 11142 @@ -926,7 +926,7 @@ def test_guess_datetime_format_for_array(self): format_for_string_of_nans = tools._guess_datetime_format_for_array( np.array( [np.nan, np.nan, np.nan], dtype='O')) - self.assertTrue(format_for_string_of_nans is None) + assert format_for_string_of_nans is None class TestToDatetimeInferFormat(tm.TestCase): @@ -993,13 +993,13 @@ class TestDaysInMonth(tm.TestCase): # tests for issue #10154 def test_day_not_in_month_coerce(self): - self.assertTrue(isnull(to_datetime('2015-02-29', errors='coerce'))) - self.assertTrue(isnull(to_datetime('2015-02-29', format="%Y-%m-%d", - errors='coerce'))) - self.assertTrue(isnull(to_datetime('2015-02-32', format="%Y-%m-%d", - errors='coerce'))) - self.assertTrue(isnull(to_datetime('2015-04-31', format="%Y-%m-%d", - errors='coerce'))) + assert isnull(to_datetime('2015-02-29', errors='coerce')) + assert isnull(to_datetime('2015-02-29', format="%Y-%m-%d", + errors='coerce')) + assert isnull(to_datetime('2015-02-32', format="%Y-%m-%d", + errors='coerce')) + assert isnull(to_datetime('2015-04-31', format="%Y-%m-%d", + errors='coerce')) def test_day_not_in_month_raise(self): pytest.raises(ValueError, to_datetime, '2015-02-29', @@ -1037,8 +1037,7 @@ def test_does_not_convert_mixed_integer(self): '1-1', ) for good_date_string in good_date_strings: - self.assertTrue(tslib._does_string_look_like_datetime( - good_date_string)) + assert tslib._does_string_look_like_datetime(good_date_string) def test_parsers(self): @@ -1129,10 +1128,10 @@ def test_parsers(self): result2 = to_datetime('NaT') result3 = Timestamp('NaT') result4 = DatetimeIndex(['NaT'])[0] - self.assertTrue(result1 is tslib.NaT) - self.assertTrue(result1 is tslib.NaT) - self.assertTrue(result1 is tslib.NaT) - self.assertTrue(result1 is tslib.NaT) + assert result1 is tslib.NaT + assert result1 is tslib.NaT + assert result1 is tslib.NaT + assert result1 is tslib.NaT def test_parsers_quarter_invalid(self): @@ -1388,7 +1387,7 @@ def test_try_parse_dates(self): result = lib.try_parse_dates(arr, dayfirst=True) expected = [parse(d, dayfirst=True) for d in arr] - self.assertTrue(np.array_equal(result, expected)) + assert np.array_equal(result, expected) def test_parsing_valid_dates(self): arr = np.array(['01-01-2013', '01-02-2013'], dtype=object) diff --git a/pandas/tests/indexes/period/test_construction.py b/pandas/tests/indexes/period/test_construction.py index 434271cbe22ec..6ab42f14efae6 100644 --- a/pandas/tests/indexes/period/test_construction.py +++ b/pandas/tests/indexes/period/test_construction.py @@ -135,15 +135,15 @@ def test_constructor_fromarraylike(self): result = PeriodIndex(idx, freq=offsets.MonthEnd()) tm.assert_index_equal(result, idx) - self.assertTrue(result.freq, 'M') + assert result.freq, 'M' result = PeriodIndex(idx, freq='2M') tm.assert_index_equal(result, idx.asfreq('2M')) - self.assertTrue(result.freq, '2M') + assert result.freq, '2M' result = PeriodIndex(idx, freq=offsets.MonthEnd(2)) tm.assert_index_equal(result, idx.asfreq('2M')) - self.assertTrue(result.freq, '2M') + assert result.freq, '2M' result = PeriodIndex(idx, freq='D') exp = idx.asfreq('D', 'e') @@ -405,13 +405,13 @@ def test_constructor(self): end_intv = Period('2006-12-31', '1w') i2 = PeriodIndex(end=end_intv, periods=10) self.assertEqual(len(i1), len(i2)) - self.assertTrue((i1 == i2).all()) + assert (i1 == i2).all() self.assertEqual(i1.freq, i2.freq) end_intv = Period('2006-12-31', ('w', 1)) i2 = PeriodIndex(end=end_intv, periods=10) self.assertEqual(len(i1), len(i2)) - self.assertTrue((i1 == i2).all()) + assert (i1 == i2).all() self.assertEqual(i1.freq, i2.freq) end_intv = Period('2005-05-01', 'B') @@ -467,7 +467,7 @@ def test_map_with_string_constructor(self): assert isinstance(res, Index) # preserve element types - self.assertTrue(all(isinstance(resi, t) for resi in res)) + assert all(isinstance(resi, t) for resi in res) # lastly, values should compare equal tm.assert_index_equal(res, expected) diff --git a/pandas/tests/indexes/period/test_indexing.py b/pandas/tests/indexes/period/test_indexing.py index 7af9e9ae3b14c..cf5f741fb09ed 100644 --- a/pandas/tests/indexes/period/test_indexing.py +++ b/pandas/tests/indexes/period/test_indexing.py @@ -81,7 +81,7 @@ def test_getitem_partial(self): pytest.raises(KeyError, ts.__getitem__, '2006') result = ts['2008'] - self.assertTrue((result.index.year == 2008).all()) + assert (result.index.year == 2008).all() result = ts['2008':'2009'] self.assertEqual(len(result), 24) diff --git a/pandas/tests/indexes/period/test_ops.py b/pandas/tests/indexes/period/test_ops.py index f133845f8404a..af377c1b69922 100644 --- a/pandas/tests/indexes/period/test_ops.py +++ b/pandas/tests/indexes/period/test_ops.py @@ -37,7 +37,7 @@ def test_asobject_tolist(self): pd.Period('2013-04-30', freq='M')] expected = pd.Index(expected_list, dtype=object, name='idx') result = idx.asobject - self.assertTrue(isinstance(result, Index)) + assert isinstance(result, Index) self.assertEqual(result.dtype, object) tm.assert_index_equal(result, expected) self.assertEqual(result.name, expected.name) @@ -51,7 +51,7 @@ def test_asobject_tolist(self): pd.Period('2013-01-04', freq='D')] expected = pd.Index(expected_list, dtype=object, name='idx') result = idx.asobject - self.assertTrue(isinstance(result, Index)) + assert isinstance(result, Index) self.assertEqual(result.dtype, object) tm.assert_index_equal(result, expected) for i in [0, 1, 3]: @@ -69,7 +69,7 @@ def test_minmax(self): # monotonic idx1 = pd.PeriodIndex([pd.NaT, '2011-01-01', '2011-01-02', '2011-01-03'], freq='D') - self.assertTrue(idx1.is_monotonic) + assert idx1.is_monotonic # non-monotonic idx2 = pd.PeriodIndex(['2011-01-01', pd.NaT, '2011-01-03', @@ -803,7 +803,7 @@ def test_nat(self): assert pd.PeriodIndex([], freq='M')._na_value is pd.NaT idx = pd.PeriodIndex(['2011-01-01', '2011-01-02'], freq='D') - self.assertTrue(idx._can_hold_na) + assert idx._can_hold_na tm.assert_numpy_array_equal(idx._isnan, np.array([False, False])) assert not idx.hasnans @@ -811,10 +811,10 @@ def test_nat(self): np.array([], dtype=np.intp)) idx = pd.PeriodIndex(['2011-01-01', 'NaT'], freq='D') - self.assertTrue(idx._can_hold_na) + assert idx._can_hold_na tm.assert_numpy_array_equal(idx._isnan, np.array([False, True])) - self.assertTrue(idx.hasnans) + assert idx.hasnans tm.assert_numpy_array_equal(idx._nan_idxs, np.array([1], dtype=np.intp)) @@ -823,11 +823,11 @@ def test_equals(self): for freq in ['D', 'M']: idx = pd.PeriodIndex(['2011-01-01', '2011-01-02', 'NaT'], freq=freq) - self.assertTrue(idx.equals(idx)) - self.assertTrue(idx.equals(idx.copy())) - self.assertTrue(idx.equals(idx.asobject)) - self.assertTrue(idx.asobject.equals(idx)) - self.assertTrue(idx.asobject.equals(idx.asobject)) + assert idx.equals(idx) + assert idx.equals(idx.copy()) + assert idx.equals(idx.asobject) + assert idx.asobject.equals(idx) + assert idx.asobject.equals(idx.asobject) assert not idx.equals(list(idx)) assert not idx.equals(pd.Series(idx)) diff --git a/pandas/tests/indexes/period/test_period.py b/pandas/tests/indexes/period/test_period.py index df3f6023a6506..8ee3e9d6707b4 100644 --- a/pandas/tests/indexes/period/test_period.py +++ b/pandas/tests/indexes/period/test_period.py @@ -319,13 +319,13 @@ def test_period_index_length(self): end_intv = Period('2006-12-31', '1w') i2 = PeriodIndex(end=end_intv, periods=10) self.assertEqual(len(i1), len(i2)) - self.assertTrue((i1 == i2).all()) + assert (i1 == i2).all() self.assertEqual(i1.freq, i2.freq) end_intv = Period('2006-12-31', ('w', 1)) i2 = PeriodIndex(end=end_intv, periods=10) self.assertEqual(len(i1), len(i2)) - self.assertTrue((i1 == i2).all()) + assert (i1 == i2).all() self.assertEqual(i1.freq, i2.freq) try: @@ -511,7 +511,7 @@ def test_comp_period(self): def test_contains(self): rng = period_range('2007-01', freq='M', periods=10) - self.assertTrue(Period('2007-01', freq='M') in rng) + assert Period('2007-01', freq='M') in rng assert not Period('2007-01', freq='D') in rng assert not Period('2007-01', freq='2M') in rng @@ -524,10 +524,10 @@ def test_contains_nat(self): assert np.nan not in idx idx = pd.PeriodIndex(['2011-01', 'NaT', '2011-02'], freq='M') - self.assertTrue(pd.NaT in idx) - self.assertTrue(None in idx) - self.assertTrue(float('nan') in idx) - self.assertTrue(np.nan in idx) + assert pd.NaT in idx + assert None in idx + assert float('nan') in idx + assert np.nan in idx def test_periods_number_check(self): with pytest.raises(ValueError): @@ -552,7 +552,7 @@ def test_index_duplicate_periods(self): expected = ts[1:3] tm.assert_series_equal(result, expected) result[:] = 1 - self.assertTrue((ts[1:3] == 1).all()) + assert (ts[1:3] == 1).all() # not monotonic idx = PeriodIndex([2000, 2007, 2007, 2009, 2007], freq='A-JUN') @@ -712,18 +712,18 @@ def test_is_full(self): assert not index.is_full index = PeriodIndex([2005, 2006, 2007], freq='A') - self.assertTrue(index.is_full) + assert index.is_full index = PeriodIndex([2005, 2005, 2007], freq='A') assert not index.is_full index = PeriodIndex([2005, 2005, 2006], freq='A') - self.assertTrue(index.is_full) + assert index.is_full index = PeriodIndex([2006, 2005, 2005], freq='A') pytest.raises(ValueError, getattr, index, 'is_full') - self.assertTrue(index[:0].is_full) + assert index[:0].is_full def test_with_multi_index(self): # #1705 diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 2f07cf3c8270f..8ac1ef3e1911b 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -411,7 +411,7 @@ def test_astype(self): def test_equals_object(self): # same - self.assertTrue(Index(['a', 'b', 'c']).equals(Index(['a', 'b', 'c']))) + assert Index(['a', 'b', 'c']).equals(Index(['a', 'b', 'c'])) # different length assert not Index(['a', 'b', 'c']).equals(Index(['a', 'b'])) @@ -466,14 +466,14 @@ def test_identical(self): i1 = Index(['a', 'b', 'c']) i2 = Index(['a', 'b', 'c']) - self.assertTrue(i1.identical(i2)) + assert i1.identical(i2) i1 = i1.rename('foo') - self.assertTrue(i1.equals(i2)) + assert i1.equals(i2) assert not i1.identical(i2) i2 = i2.rename('foo') - self.assertTrue(i1.identical(i2)) + assert i1.identical(i2) i3 = Index([('a', 'a'), ('a', 'b'), ('b', 'a')]) i4 = Index([('a', 'a'), ('a', 'b'), ('b', 'a')], tupleize_cols=False) @@ -481,8 +481,8 @@ def test_identical(self): def test_is_(self): ind = Index(range(10)) - self.assertTrue(ind.is_(ind)) - self.assertTrue(ind.is_(ind.view().view().view().view())) + assert ind.is_(ind) + assert ind.is_(ind.view().view().view().view()) assert not ind.is_(Index(range(10))) assert not ind.is_(ind.copy()) assert not ind.is_(ind.copy(deep=False)) @@ -491,11 +491,11 @@ def test_is_(self): assert not ind.is_(np.array(range(10))) # quasi-implementation dependent - self.assertTrue(ind.is_(ind.view())) + assert ind.is_(ind.view()) ind2 = ind.view() ind2.name = 'bob' - self.assertTrue(ind.is_(ind2)) - self.assertTrue(ind2.is_(ind)) + assert ind.is_(ind2) + assert ind2.is_(ind) # doesn't matter if Indices are *actually* views of underlying data, assert not ind.is_(Index(ind.values)) arr = np.array(range(1, 11)) @@ -506,7 +506,7 @@ def test_is_(self): def test_asof(self): d = self.dateIndex[0] self.assertEqual(self.dateIndex.asof(d), d) - self.assertTrue(isnull(self.dateIndex.asof(d - timedelta(1)))) + assert isnull(self.dateIndex.asof(d - timedelta(1))) d = self.dateIndex[-1] self.assertEqual(self.dateIndex.asof(d + timedelta(1)), d) @@ -585,9 +585,9 @@ def test_empty_fancy(self): for idx in [self.strIndex, self.intIndex, self.floatIndex]: empty_idx = idx.__class__([]) - self.assertTrue(idx[[]].identical(empty_idx)) - self.assertTrue(idx[empty_iarr].identical(empty_idx)) - self.assertTrue(idx[empty_barr].identical(empty_idx)) + assert idx[[]].identical(empty_idx) + assert idx[empty_iarr].identical(empty_idx) + assert idx[empty_barr].identical(empty_idx) # np.ndarray only accepts ndarray of int & bool dtypes, so should # Index. @@ -604,7 +604,7 @@ def test_intersection(self): first = self.strIndex[:20] second = self.strIndex[:10] intersect = first.intersection(second) - self.assertTrue(tm.equalContents(intersect, second)) + assert tm.equalContents(intersect, second) # Corner cases inter = first.intersection(first) @@ -671,13 +671,13 @@ def test_union(self): second = self.strIndex[:10] everything = self.strIndex[:20] union = first.union(second) - self.assertTrue(tm.equalContents(union, everything)) + assert tm.equalContents(union, everything) # GH 10149 cases = [klass(second.values) for klass in [np.array, Series, list]] for case in cases: result = first.union(case) - self.assertTrue(tm.equalContents(result, everything)) + assert tm.equalContents(result, everything) # Corner cases union = first.union(first) @@ -753,8 +753,8 @@ def test_union(self): else: appended = np.append(self.strIndex, self.dateIndex.astype('O')) - self.assertTrue(tm.equalContents(firstCat, appended)) - self.assertTrue(tm.equalContents(secondCat, self.strIndex)) + assert tm.equalContents(firstCat, appended) + assert tm.equalContents(secondCat, self.strIndex) tm.assert_contains_all(self.strIndex, firstCat) tm.assert_contains_all(self.strIndex, secondCat) tm.assert_contains_all(self.dateIndex, firstCat) @@ -871,7 +871,7 @@ def test_difference(self): # different names result = first.difference(second) - self.assertTrue(tm.equalContents(result, answer)) + assert tm.equalContents(result, answer) self.assertEqual(result.name, None) # same names @@ -881,7 +881,7 @@ def test_difference(self): # with empty result = first.difference([]) - self.assertTrue(tm.equalContents(result, first)) + assert tm.equalContents(result, first) self.assertEqual(result.name, first.name) # with everythin @@ -895,12 +895,12 @@ def test_symmetric_difference(self): idx2 = Index([2, 3, 4, 5]) result = idx1.symmetric_difference(idx2) expected = Index([1, 5]) - self.assertTrue(tm.equalContents(result, expected)) + assert tm.equalContents(result, expected) assert result.name is None # __xor__ syntax expected = idx1 ^ idx2 - self.assertTrue(tm.equalContents(result, expected)) + assert tm.equalContents(result, expected) assert result.name is None # multiIndex @@ -908,7 +908,7 @@ def test_symmetric_difference(self): idx2 = MultiIndex.from_tuples([('foo', 1), ('bar', 3)]) result = idx1.symmetric_difference(idx2) expected = MultiIndex.from_tuples([('bar', 2), ('baz', 3), ('bar', 3)]) - self.assertTrue(tm.equalContents(result, expected)) + assert tm.equalContents(result, expected) # nans: # GH 13514 change: {nan} - {nan} == {} @@ -930,30 +930,30 @@ def test_symmetric_difference(self): idx2 = np.array([2, 3, 4, 5]) expected = Index([1, 5]) result = idx1.symmetric_difference(idx2) - self.assertTrue(tm.equalContents(result, expected)) + assert tm.equalContents(result, expected) self.assertEqual(result.name, 'idx1') result = idx1.symmetric_difference(idx2, result_name='new_name') - self.assertTrue(tm.equalContents(result, expected)) + assert tm.equalContents(result, expected) self.assertEqual(result.name, 'new_name') def test_is_numeric(self): assert not self.dateIndex.is_numeric() assert not self.strIndex.is_numeric() - self.assertTrue(self.intIndex.is_numeric()) - self.assertTrue(self.floatIndex.is_numeric()) + assert self.intIndex.is_numeric() + assert self.floatIndex.is_numeric() assert not self.catIndex.is_numeric() def test_is_object(self): - self.assertTrue(self.strIndex.is_object()) - self.assertTrue(self.boolIndex.is_object()) + assert self.strIndex.is_object() + assert self.boolIndex.is_object() assert not self.catIndex.is_object() assert not self.intIndex.is_object() assert not self.dateIndex.is_object() assert not self.floatIndex.is_object() def test_is_all_dates(self): - self.assertTrue(self.dateIndex.is_all_dates) + assert self.dateIndex.is_all_dates assert not self.strIndex.is_all_dates assert not self.intIndex.is_all_dates @@ -1475,17 +1475,16 @@ def test_str_attribute(self): def test_tab_completion(self): # GH 9910 idx = Index(list('abcd')) - self.assertTrue('str' in dir(idx)) + assert 'str' in dir(idx) idx = Index(range(4)) - self.assertTrue('str' not in dir(idx)) + assert 'str' not in dir(idx) def test_indexing_doesnt_change_class(self): idx = Index([1, 2, 3, 'a', 'b', 'c']) - self.assertTrue(idx[1:3].identical(pd.Index([2, 3], dtype=np.object_))) - self.assertTrue(idx[[0, 1]].identical(pd.Index( - [1, 2], dtype=np.object_))) + assert idx[1:3].identical(pd.Index([2, 3], dtype=np.object_)) + assert idx[[0, 1]].identical(pd.Index([1, 2], dtype=np.object_)) def test_outer_join_sort(self): left_idx = Index(np.random.permutation(15)) @@ -1876,19 +1875,19 @@ def test_copy_name2(self): idx = pd.Index([1, 2], name='MyName') idx1 = idx.copy() - self.assertTrue(idx.equals(idx1)) + assert idx.equals(idx1) self.assertEqual(idx.name, 'MyName') self.assertEqual(idx1.name, 'MyName') idx2 = idx.copy(name='NewName') - self.assertTrue(idx.equals(idx2)) + assert idx.equals(idx2) self.assertEqual(idx.name, 'MyName') self.assertEqual(idx2.name, 'NewName') idx3 = idx.copy(names=['NewName']) - self.assertTrue(idx.equals(idx3)) + assert idx.equals(idx3) self.assertEqual(idx.name, 'MyName') self.assertEqual(idx.names, ['MyName']) self.assertEqual(idx3.name, 'NewName') @@ -1918,10 +1917,10 @@ def test_union_base(self): with tm.assert_produces_warning(RuntimeWarning): # unorderable types result = first.union(case) - self.assertTrue(tm.equalContents(result, idx)) + assert tm.equalContents(result, idx) else: result = first.union(case) - self.assertTrue(tm.equalContents(result, idx)) + assert tm.equalContents(result, idx) def test_intersection_base(self): # (same results for py2 and py3 but sortedness not tested elsewhere) @@ -1937,7 +1936,7 @@ def test_intersection_base(self): for klass in [np.array, Series, list]] for case in cases: result = first.intersection(case) - self.assertTrue(tm.equalContents(result, second)) + assert tm.equalContents(result, second) def test_difference_base(self): # (same results for py2 and py3 but sortedness not tested elsewhere) @@ -2037,8 +2036,8 @@ def test_is_monotonic_na(self): def test_repr_summary(self): with cf.option_context('display.max_seq_items', 10): r = repr(pd.Index(np.arange(1000))) - self.assertTrue(len(r) < 200) - self.assertTrue("..." in r) + assert len(r) < 200 + assert "..." in r def test_int_name_format(self): index = Index(['a', 'b', 'c'], name=0) diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py index 5c9df55d2b508..7b2d27c9b51a4 100644 --- a/pandas/tests/indexes/test_category.py +++ b/pandas/tests/indexes/test_category.py @@ -177,10 +177,10 @@ def test_contains(self): ci = self.create_index(categories=list('cabdef')) - self.assertTrue('a' in ci) - self.assertTrue('z' not in ci) - self.assertTrue('e' not in ci) - self.assertTrue(np.nan not in ci) + assert 'a' in ci + assert 'z' not in ci + assert 'e' not in ci + assert np.nan not in ci # assert codes NOT in index assert 0 not in ci @@ -188,7 +188,7 @@ def test_contains(self): ci = CategoricalIndex( list('aabbca') + [np.nan], categories=list('cabdef')) - self.assertTrue(np.nan in ci) + assert np.nan in ci def test_min_max(self): @@ -424,7 +424,7 @@ def test_duplicates(self): idx = CategoricalIndex([0, 0, 0], name='foo') assert not idx.is_unique - self.assertTrue(idx.has_duplicates) + assert idx.has_duplicates expected = CategoricalIndex([0], name='foo') tm.assert_index_equal(idx.drop_duplicates(), expected) @@ -537,8 +537,8 @@ def test_identical(self): ci1 = CategoricalIndex(['a', 'b'], categories=['a', 'b'], ordered=True) ci2 = CategoricalIndex(['a', 'b'], categories=['a', 'b', 'c'], ordered=True) - self.assertTrue(ci1.identical(ci1)) - self.assertTrue(ci1.identical(ci1.copy())) + assert ci1.identical(ci1) + assert ci1.identical(ci1.copy()) assert not ci1.identical(ci2) def test_ensure_copied_data(self): @@ -562,21 +562,21 @@ def test_equals_categorical(self): ci2 = CategoricalIndex(['a', 'b'], categories=['a', 'b', 'c'], ordered=True) - self.assertTrue(ci1.equals(ci1)) + assert ci1.equals(ci1) assert not ci1.equals(ci2) - self.assertTrue(ci1.equals(ci1.astype(object))) - self.assertTrue(ci1.astype(object).equals(ci1)) + assert ci1.equals(ci1.astype(object)) + assert ci1.astype(object).equals(ci1) - self.assertTrue((ci1 == ci1).all()) + assert (ci1 == ci1).all() assert not (ci1 != ci1).all() assert not (ci1 > ci1).all() assert not (ci1 < ci1).all() - self.assertTrue((ci1 <= ci1).all()) - self.assertTrue((ci1 >= ci1).all()) + assert (ci1 <= ci1).all() + assert (ci1 >= ci1).all() assert not (ci1 == 1).all() - self.assertTrue((ci1 == Index(['a', 'b'])).all()) - self.assertTrue((ci1 == ci1.values).all()) + assert (ci1 == Index(['a', 'b'])).all() + assert (ci1 == ci1.values).all() # invalid comparisons with tm.assert_raises_regex(ValueError, "Lengths must match"): @@ -593,19 +593,19 @@ def test_equals_categorical(self): ci = CategoricalIndex(list('aabca'), categories=['c', 'a', 'b']) assert not ci.equals(list('aabca')) assert not ci.equals(CategoricalIndex(list('aabca'))) - self.assertTrue(ci.equals(ci.copy())) + assert ci.equals(ci.copy()) ci = CategoricalIndex(list('aabca') + [np.nan], categories=['c', 'a', 'b']) assert not ci.equals(list('aabca')) assert not ci.equals(CategoricalIndex(list('aabca'))) - self.assertTrue(ci.equals(ci.copy())) + assert ci.equals(ci.copy()) ci = CategoricalIndex(list('aabca') + [np.nan], categories=['c', 'a', 'b']) assert not ci.equals(list('aabca') + [np.nan]) assert not ci.equals(CategoricalIndex(list('aabca') + [np.nan])) - self.assertTrue(ci.equals(ci.copy())) + assert ci.equals(ci.copy()) def test_string_categorical_index_repr(self): # short diff --git a/pandas/tests/indexes/test_interval.py b/pandas/tests/indexes/test_interval.py index 2e16e16e0b2c4..815fefa813a9d 100644 --- a/pandas/tests/indexes/test_interval.py +++ b/pandas/tests/indexes/test_interval.py @@ -27,28 +27,28 @@ def create_index(self): def test_constructors(self): expected = self.index actual = IntervalIndex.from_breaks(np.arange(3), closed='right') - self.assertTrue(expected.equals(actual)) + assert expected.equals(actual) alternate = IntervalIndex.from_breaks(np.arange(3), closed='left') assert not expected.equals(alternate) actual = IntervalIndex.from_intervals([Interval(0, 1), Interval(1, 2)]) - self.assertTrue(expected.equals(actual)) + assert expected.equals(actual) actual = IntervalIndex([Interval(0, 1), Interval(1, 2)]) - self.assertTrue(expected.equals(actual)) + assert expected.equals(actual) actual = IntervalIndex.from_arrays(np.arange(2), np.arange(2) + 1, closed='right') - self.assertTrue(expected.equals(actual)) + assert expected.equals(actual) actual = Index([Interval(0, 1), Interval(1, 2)]) assert isinstance(actual, IntervalIndex) - self.assertTrue(expected.equals(actual)) + assert expected.equals(actual) actual = Index(expected) assert isinstance(actual, IntervalIndex) - self.assertTrue(expected.equals(actual)) + assert expected.equals(actual) def test_constructors_other(self): @@ -106,8 +106,8 @@ def test_constructors_datetimelike(self): expected_scalar_type = type(idx[0]) i = result[0] - self.assertTrue(isinstance(i.left, expected_scalar_type)) - self.assertTrue(isinstance(i.right, expected_scalar_type)) + assert isinstance(i.left, expected_scalar_type) + assert isinstance(i.right, expected_scalar_type) def test_constructors_error(self): @@ -158,7 +158,7 @@ def test_with_nans(self): np.array([True, True])) index = self.index_with_nan - self.assertTrue(index.hasnans) + assert index.hasnans tm.assert_numpy_array_equal(index.notnull(), np.array([True, False, True])) tm.assert_numpy_array_equal(index.isnull(), @@ -193,8 +193,8 @@ def test_ensure_copied_data(self): def test_equals(self): idx = self.index - self.assertTrue(idx.equals(idx)) - self.assertTrue(idx.equals(idx.copy())) + assert idx.equals(idx) + assert idx.equals(idx.copy()) assert not idx.equals(idx.astype(object)) assert not idx.equals(np.array(idx)) @@ -216,11 +216,11 @@ def test_astype(self): result = idx.astype(object) tm.assert_index_equal(result, Index(idx.values, dtype='object')) assert not idx.equals(result) - self.assertTrue(idx.equals(IntervalIndex.from_intervals(result))) + assert idx.equals(IntervalIndex.from_intervals(result)) result = idx.astype('interval') tm.assert_index_equal(result, idx) - self.assertTrue(result.equals(idx)) + assert result.equals(idx) result = idx.astype('category') expected = pd.Categorical(idx, ordered=True) @@ -243,12 +243,12 @@ def test_where_array_like(self): def test_delete(self): expected = IntervalIndex.from_breaks([1, 2]) actual = self.index.delete(0) - self.assertTrue(expected.equals(actual)) + assert expected.equals(actual) def test_insert(self): expected = IntervalIndex.from_breaks(range(4)) actual = self.index.insert(2, Interval(2, 3)) - self.assertTrue(expected.equals(actual)) + assert expected.equals(actual) pytest.raises(ValueError, self.index.insert, 0, 1) pytest.raises(ValueError, self.index.insert, 0, @@ -256,27 +256,27 @@ def test_insert(self): def test_take(self): actual = self.index.take([0, 1]) - self.assertTrue(self.index.equals(actual)) + assert self.index.equals(actual) expected = IntervalIndex.from_arrays([0, 0, 1], [1, 1, 2]) actual = self.index.take([0, 0, 1]) - self.assertTrue(expected.equals(actual)) + assert expected.equals(actual) def test_monotonic_and_unique(self): - self.assertTrue(self.index.is_monotonic) - self.assertTrue(self.index.is_unique) + assert self.index.is_monotonic + assert self.index.is_unique idx = IntervalIndex.from_tuples([(0, 1), (0.5, 1.5)]) - self.assertTrue(idx.is_monotonic) - self.assertTrue(idx.is_unique) + assert idx.is_monotonic + assert idx.is_unique idx = IntervalIndex.from_tuples([(0, 1), (2, 3), (1, 2)]) assert not idx.is_monotonic - self.assertTrue(idx.is_unique) + assert idx.is_unique idx = IntervalIndex.from_tuples([(0, 2), (0, 2)]) assert not idx.is_unique - self.assertTrue(idx.is_monotonic) + assert idx.is_monotonic @pytest.mark.xfail(reason='not a valid repr as we use interval notation') def test_repr(self): @@ -514,10 +514,10 @@ def test_union(self): other = IntervalIndex.from_arrays([2], [3]) expected = IntervalIndex.from_arrays(range(3), range(1, 4)) actual = self.index.union(other) - self.assertTrue(expected.equals(actual)) + assert expected.equals(actual) actual = other.union(self.index) - self.assertTrue(expected.equals(actual)) + assert expected.equals(actual) tm.assert_index_equal(self.index.union(self.index), self.index) tm.assert_index_equal(self.index.union(self.index[:1]), @@ -527,7 +527,7 @@ def test_intersection(self): other = IntervalIndex.from_breaks([1, 2, 3]) expected = IntervalIndex.from_breaks([1, 2]) actual = self.index.intersection(other) - self.assertTrue(expected.equals(actual)) + assert expected.equals(actual) tm.assert_index_equal(self.index.intersection(self.index), self.index) diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index 6f6e1f1544219..714e901532ed9 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -65,19 +65,19 @@ def test_labels_dtypes(self): # GH 8456 i = MultiIndex.from_tuples([('A', 1), ('A', 2)]) - self.assertTrue(i.labels[0].dtype == 'int8') - self.assertTrue(i.labels[1].dtype == 'int8') + assert i.labels[0].dtype == 'int8' + assert i.labels[1].dtype == 'int8' i = MultiIndex.from_product([['a'], range(40)]) - self.assertTrue(i.labels[1].dtype == 'int8') + assert i.labels[1].dtype == 'int8' i = MultiIndex.from_product([['a'], range(400)]) - self.assertTrue(i.labels[1].dtype == 'int16') + assert i.labels[1].dtype == 'int16' i = MultiIndex.from_product([['a'], range(40000)]) - self.assertTrue(i.labels[1].dtype == 'int32') + assert i.labels[1].dtype == 'int32' i = pd.MultiIndex.from_product([['a'], range(1000)]) - self.assertTrue((i.labels[0] >= 0).all()) - self.assertTrue((i.labels[1] >= 0).all()) + assert (i.labels[0] >= 0).all() + assert (i.labels[1] >= 0).all() def test_where(self): i = MultiIndex.from_tuples([('A', 1), ('A', 2)]) @@ -468,19 +468,19 @@ def test_copy_names(self): multi_idx = pd.Index([(1, 2), (3, 4)], names=['MyName1', 'MyName2']) multi_idx1 = multi_idx.copy() - self.assertTrue(multi_idx.equals(multi_idx1)) + assert multi_idx.equals(multi_idx1) self.assertEqual(multi_idx.names, ['MyName1', 'MyName2']) self.assertEqual(multi_idx1.names, ['MyName1', 'MyName2']) multi_idx2 = multi_idx.copy(names=['NewName1', 'NewName2']) - self.assertTrue(multi_idx.equals(multi_idx2)) + assert multi_idx.equals(multi_idx2) self.assertEqual(multi_idx.names, ['MyName1', 'MyName2']) self.assertEqual(multi_idx2.names, ['NewName1', 'NewName2']) multi_idx3 = multi_idx.copy(name=['NewName1', 'NewName2']) - self.assertTrue(multi_idx.equals(multi_idx3)) + assert multi_idx.equals(multi_idx3) self.assertEqual(multi_idx.names, ['MyName1', 'MyName2']) self.assertEqual(multi_idx3.names, ['NewName1', 'NewName2']) @@ -520,7 +520,7 @@ def test_names(self): def test_reference_duplicate_name(self): idx = MultiIndex.from_tuples( [('a', 'b'), ('c', 'd')], names=['x', 'x']) - self.assertTrue(idx._reference_duplicate_name('x')) + assert idx._reference_duplicate_name('x') idx = MultiIndex.from_tuples( [('a', 'b'), ('c', 'd')], names=['x', 'y']) @@ -673,9 +673,8 @@ def test_from_arrays(self): # infer correctly result = MultiIndex.from_arrays([[pd.NaT, Timestamp('20130101')], ['a', 'b']]) - self.assertTrue(result.levels[0].equals(Index([Timestamp('20130101') - ]))) - self.assertTrue(result.levels[1].equals(Index(['a', 'b']))) + assert result.levels[0].equals(Index([Timestamp('20130101')])) + assert result.levels[1].equals(Index(['a', 'b'])) def test_from_arrays_index_series_datetimetz(self): idx1 = pd.date_range('2015-01-01 10:00', freq='D', periods=3, @@ -895,15 +894,15 @@ def test_values_boxed(self): def test_append(self): result = self.index[:3].append(self.index[3:]) - self.assertTrue(result.equals(self.index)) + assert result.equals(self.index) foos = [self.index[:1], self.index[1:3], self.index[3:]] result = foos[0].append(foos[1:]) - self.assertTrue(result.equals(self.index)) + assert result.equals(self.index) # empty result = self.index.append([]) - self.assertTrue(result.equals(self.index)) + assert result.equals(self.index) def test_append_mixed_dtypes(self): # GH 13660 @@ -1015,7 +1014,7 @@ def test_legacy_pickle(self): obj = pd.read_pickle(path) obj2 = MultiIndex.from_tuples(obj.values) - self.assertTrue(obj.equals(obj2)) + assert obj.equals(obj2) res = obj.get_indexer(obj) exp = np.arange(len(obj), dtype=np.intp) @@ -1034,7 +1033,7 @@ def test_legacy_v2_unpickle(self): obj = pd.read_pickle(path) obj2 = MultiIndex.from_tuples(obj.values) - self.assertTrue(obj.equals(obj2)) + assert obj.equals(obj2) res = obj.get_indexer(obj) exp = np.arange(len(obj), dtype=np.intp) @@ -1055,11 +1054,11 @@ def test_roundtrip_pickle_with_tz(self): tz='US/Eastern') ], names=['one', 'two', 'three']) unpickled = tm.round_trip_pickle(index) - self.assertTrue(index.equal_levels(unpickled)) + assert index.equal_levels(unpickled) def test_from_tuples_index_values(self): result = MultiIndex.from_tuples(self.index) - self.assertTrue((result.values == self.index.values).all()) + assert (result.values == self.index.values).all() def test_contains(self): assert ('foo', 'two') in self.index @@ -1077,9 +1076,9 @@ def test_contains_with_nat(self): pd.date_range('2012-01-01', periods=5)], labels=[[0, 0, 0, 0, 0, 0], [-1, 0, 1, 2, 3, 4]], names=[None, 'B']) - self.assertTrue(('C', pd.Timestamp('2012-01-01')) in mi) + assert ('C', pd.Timestamp('2012-01-01')) in mi for val in mi.values: - self.assertTrue(val in mi) + assert val in mi def test_is_all_dates(self): assert not self.index.is_all_dates @@ -1095,14 +1094,14 @@ def test_getitem(self): # slice result = self.index[2:5] expected = self.index[[2, 3, 4]] - self.assertTrue(result.equals(expected)) + assert result.equals(expected) # boolean result = self.index[[True, False, True, False, True, True]] result2 = self.index[np.array([True, False, True, False, True, True])] expected = self.index[[0, 2, 4, 5]] - self.assertTrue(result.equals(expected)) - self.assertTrue(result2.equals(expected)) + assert result.equals(expected) + assert result2.equals(expected) def test_getitem_group_select(self): sorted_idx, _ = self.index.sortlevel(0) @@ -1157,7 +1156,7 @@ def test_get_loc_level(self): expected = slice(1, 2) exp_index = index[expected].droplevel(0).droplevel(0) self.assertEqual(loc, expected) - self.assertTrue(new_index.equals(exp_index)) + assert new_index.equals(exp_index) loc, new_index = index.get_loc_level((0, 1, 0)) expected = 1 @@ -1171,7 +1170,7 @@ def test_get_loc_level(self): result, new_index = index.get_loc_level((2000, slice(None, None))) expected = slice(None, None) self.assertEqual(result, expected) - self.assertTrue(new_index.equals(index.droplevel(0))) + assert new_index.equals(index.droplevel(0)) def test_slice_locs(self): df = tm.makeTimeDataFrame() @@ -1347,7 +1346,7 @@ def test_get_indexer(self): assert_almost_equal(r1, rexp1) r1 = idx1.get_indexer([1, 2, 3]) - self.assertTrue((r1 == [-1, -1, -1]).all()) + assert (r1 == [-1, -1, -1]).all() # create index with duplicates idx1 = Index(lrange(10) + lrange(10)) @@ -1533,41 +1532,41 @@ def test_equals_missing_values(self): def test_identical(self): mi = self.index.copy() mi2 = self.index.copy() - self.assertTrue(mi.identical(mi2)) + assert mi.identical(mi2) mi = mi.set_names(['new1', 'new2']) - self.assertTrue(mi.equals(mi2)) + assert mi.equals(mi2) assert not mi.identical(mi2) mi2 = mi2.set_names(['new1', 'new2']) - self.assertTrue(mi.identical(mi2)) + assert mi.identical(mi2) mi3 = Index(mi.tolist(), names=mi.names) mi4 = Index(mi.tolist(), names=mi.names, tupleize_cols=False) - self.assertTrue(mi.identical(mi3)) + assert mi.identical(mi3) assert not mi.identical(mi4) - self.assertTrue(mi.equals(mi4)) + assert mi.equals(mi4) def test_is_(self): mi = MultiIndex.from_tuples(lzip(range(10), range(10))) - self.assertTrue(mi.is_(mi)) - self.assertTrue(mi.is_(mi.view())) - self.assertTrue(mi.is_(mi.view().view().view().view())) + assert mi.is_(mi) + assert mi.is_(mi.view()) + assert mi.is_(mi.view().view().view().view()) mi2 = mi.view() # names are metadata, they don't change id mi2.names = ["A", "B"] - self.assertTrue(mi2.is_(mi)) - self.assertTrue(mi.is_(mi2)) + assert mi2.is_(mi) + assert mi.is_(mi2) - self.assertTrue(mi.is_(mi.set_names(["C", "D"]))) + assert mi.is_(mi.set_names(["C", "D"])) mi2 = mi.view() mi2.set_names(["E", "F"], inplace=True) - self.assertTrue(mi.is_(mi2)) + assert mi.is_(mi2) # levels are inherent properties, they change identity mi3 = mi2.set_levels([lrange(10), lrange(10)]) assert not mi3.is_(mi2) # shouldn't change - self.assertTrue(mi2.is_(mi)) + assert mi2.is_(mi) mi4 = mi3.view() mi4.set_levels([[1 for _ in range(10)], lrange(10)], inplace=True) assert not mi4.is_(mi3) @@ -1584,7 +1583,7 @@ def test_union(self): tups = sorted(self.index.values) expected = MultiIndex.from_tuples(tups) - self.assertTrue(the_union.equals(expected)) + assert the_union.equals(expected) # corner case, pass self or empty thing: the_union = self.index.union(self.index) @@ -1596,7 +1595,7 @@ def test_union(self): # won't work in python 3 # tuples = self.index.values # result = self.index[:4] | tuples[4:] - # self.assertTrue(result.equals(tuples)) + # assert result.equals(tuples) # not valid for python 3 # def test_union_with_regular_index(self): @@ -1607,7 +1606,7 @@ def test_union(self): # assert 'B' in result # result2 = self.index.union(other) - # self.assertTrue(result.equals(result2)) + # assert result.equals(result2) def test_intersection(self): piece1 = self.index[:5][::-1] @@ -1616,7 +1615,7 @@ def test_intersection(self): the_int = piece1 & piece2 tups = sorted(self.index[3:5].values) expected = MultiIndex.from_tuples(tups) - self.assertTrue(the_int.equals(expected)) + assert the_int.equals(expected) # corner case, pass self the_int = self.index.intersection(self.index) @@ -1625,12 +1624,12 @@ def test_intersection(self): # empty intersection: disjoint empty = self.index[:2] & self.index[2:] expected = self.index[:0] - self.assertTrue(empty.equals(expected)) + assert empty.equals(expected) # can't do in python 3 # tuples = self.index.values # result = self.index & tuples - # self.assertTrue(result.equals(tuples)) + # assert result.equals(tuples) def test_sub(self): @@ -1655,25 +1654,25 @@ def test_difference(self): names=self.index.names) assert isinstance(result, MultiIndex) - self.assertTrue(result.equals(expected)) + assert result.equals(expected) self.assertEqual(result.names, self.index.names) # empty difference: reflexive result = self.index.difference(self.index) expected = self.index[:0] - self.assertTrue(result.equals(expected)) + assert result.equals(expected) self.assertEqual(result.names, self.index.names) # empty difference: superset result = self.index[-3:].difference(self.index) expected = self.index[:0] - self.assertTrue(result.equals(expected)) + assert result.equals(expected) self.assertEqual(result.names, self.index.names) # empty difference: degenerate result = self.index[:0].difference(self.index) expected = self.index[:0] - self.assertTrue(result.equals(expected)) + assert result.equals(expected) self.assertEqual(result.names, self.index.names) # names not the same @@ -1688,11 +1687,11 @@ def test_difference(self): # raise Exception called with non-MultiIndex result = first.difference(first.values) - self.assertTrue(result.equals(first[:0])) + assert result.equals(first[:0]) # name from empty array result = first.difference([]) - self.assertTrue(first.equals(result)) + assert first.equals(result) self.assertEqual(first.names, result.names) # name from non-empty array @@ -1728,23 +1727,23 @@ def test_sortlevel(self): sorted_idx, _ = index.sortlevel(0) expected = MultiIndex.from_tuples(sorted(tuples)) - self.assertTrue(sorted_idx.equals(expected)) + assert sorted_idx.equals(expected) sorted_idx, _ = index.sortlevel(0, ascending=False) - self.assertTrue(sorted_idx.equals(expected[::-1])) + assert sorted_idx.equals(expected[::-1]) sorted_idx, _ = index.sortlevel(1) by1 = sorted(tuples, key=lambda x: (x[1], x[0])) expected = MultiIndex.from_tuples(by1) - self.assertTrue(sorted_idx.equals(expected)) + assert sorted_idx.equals(expected) sorted_idx, _ = index.sortlevel(1, ascending=False) - self.assertTrue(sorted_idx.equals(expected[::-1])) + assert sorted_idx.equals(expected[::-1]) def test_sortlevel_not_sort_remaining(self): mi = MultiIndex.from_tuples([[1, 1, 3], [1, 1, 1]], names=list('ABC')) sorted_idx, _ = mi.sortlevel('A', sort_remaining=False) - self.assertTrue(sorted_idx.equals(mi)) + assert sorted_idx.equals(mi) def test_sortlevel_deterministic(self): tuples = [('bar', 'one'), ('foo', 'two'), ('qux', 'two'), @@ -1754,18 +1753,18 @@ def test_sortlevel_deterministic(self): sorted_idx, _ = index.sortlevel(0) expected = MultiIndex.from_tuples(sorted(tuples)) - self.assertTrue(sorted_idx.equals(expected)) + assert sorted_idx.equals(expected) sorted_idx, _ = index.sortlevel(0, ascending=False) - self.assertTrue(sorted_idx.equals(expected[::-1])) + assert sorted_idx.equals(expected[::-1]) sorted_idx, _ = index.sortlevel(1) by1 = sorted(tuples, key=lambda x: (x[1], x[0])) expected = MultiIndex.from_tuples(by1) - self.assertTrue(sorted_idx.equals(expected)) + assert sorted_idx.equals(expected) sorted_idx, _ = index.sortlevel(1, ascending=False) - self.assertTrue(sorted_idx.equals(expected[::-1])) + assert sorted_idx.equals(expected[::-1]) def test_dims(self): pass @@ -1836,7 +1835,7 @@ def test_droplevel_with_names(self): dropped = index.droplevel('two') expected = index.droplevel(1) - self.assertTrue(dropped.equals(expected)) + assert dropped.equals(expected) def test_droplevel_multiple(self): index = MultiIndex(levels=[Index(lrange(4)), Index(lrange(4)), Index( @@ -1846,7 +1845,7 @@ def test_droplevel_multiple(self): dropped = index[:2].droplevel(['three', 'one']) expected = index[:2].droplevel(2).droplevel(0) - self.assertTrue(dropped.equals(expected)) + assert dropped.equals(expected) def test_drop_not_lexsorted(self): # GH 12078 @@ -1854,7 +1853,7 @@ def test_drop_not_lexsorted(self): # define the lexsorted version of the multi-index tuples = [('a', ''), ('b1', 'c1'), ('b2', 'c2')] lexsorted_mi = MultiIndex.from_tuples(tuples, names=['b', 'c']) - self.assertTrue(lexsorted_mi.is_lexsorted()) + assert lexsorted_mi.is_lexsorted() # and the not-lexsorted version df = pd.DataFrame(columns=['a', 'b', 'c', 'd'], @@ -1873,7 +1872,7 @@ def test_drop_not_lexsorted(self): def test_insert(self): # key contained in all levels new_index = self.index.insert(0, ('bar', 'two')) - self.assertTrue(new_index.equal_levels(self.index)) + assert new_index.equal_levels(self.index) self.assertEqual(new_index[0], ('bar', 'two')) # key not contained in all levels @@ -2005,8 +2004,8 @@ def _check_how(other, how): return_indexers=True) exp_level = other.join(self.index.levels[1], how=how) - self.assertTrue(join_index.levels[0].equals(self.index.levels[0])) - self.assertTrue(join_index.levels[1].equals(exp_level)) + assert join_index.levels[0].equals(self.index.levels[0]) + assert join_index.levels[1].equals(exp_level) # pare down levels mask = np.array( @@ -2019,7 +2018,7 @@ def _check_how(other, how): self.index.join(other, how=how, level='second', return_indexers=True) - self.assertTrue(join_index.equals(join_index2)) + assert join_index.equals(join_index2) tm.assert_numpy_array_equal(lidx, lidx2) tm.assert_numpy_array_equal(ridx, ridx2) tm.assert_numpy_array_equal(join_index2.values, exp_values) @@ -2102,11 +2101,11 @@ def test_reindex_level(self): exp_index = self.index.join(idx, level='second', how='right') exp_index2 = self.index.join(idx, level='second', how='left') - self.assertTrue(target.equals(exp_index)) + assert target.equals(exp_index) exp_indexer = np.array([0, 2, 4]) tm.assert_numpy_array_equal(indexer, exp_indexer, check_dtype=False) - self.assertTrue(target2.equals(exp_index2)) + assert target2.equals(exp_index2) exp_indexer2 = np.array([0, -1, 0, -1, 0, -1]) tm.assert_numpy_array_equal(indexer2, exp_indexer2, check_dtype=False) @@ -2120,11 +2119,11 @@ def test_reindex_level(self): def test_duplicates(self): assert not self.index.has_duplicates - self.assertTrue(self.index.append(self.index).has_duplicates) + assert self.index.append(self.index).has_duplicates index = MultiIndex(levels=[[0, 1], [0, 1, 2]], labels=[ [0, 0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 0, 1, 2]]) - self.assertTrue(index.has_duplicates) + assert index.has_duplicates # GH 9075 t = [(u('x'), u('out'), u('z'), 5, u('y'), u('in'), u('z'), 169), @@ -2179,7 +2178,7 @@ def check(nlevels, with_nulls): values = index.values.tolist() index = MultiIndex.from_tuples(values + [values[0]]) - self.assertTrue(index.has_duplicates) + assert index.has_duplicates # no overflow check(4, False) @@ -2228,7 +2227,7 @@ def test_duplicate_meta_data(self): index.set_names([None, None]), index.set_names([None, 'Num']), index.set_names(['Upper', 'Num']), ]: - self.assertTrue(idx.has_duplicates) + assert idx.has_duplicates self.assertEqual(idx.drop_duplicates().names, idx.names) def test_get_unique_index(self): @@ -2237,7 +2236,7 @@ def test_get_unique_index(self): for dropna in [False, True]: result = idx._get_unique_index(dropna=dropna) - self.assertTrue(result.unique) + assert result.unique tm.assert_index_equal(result, expected) def test_unique(self): @@ -2370,7 +2369,7 @@ def test_level_setting_resets_attributes(self): ind = MultiIndex.from_arrays([ ['A', 'A', 'B', 'B', 'B'], [1, 2, 1, 2, 3] ]) - self.assertTrue(ind.is_monotonic) + assert ind.is_monotonic ind.set_levels([['A', 'B', 'A', 'A', 'B'], [2, 1, 3, -2, 5]], inplace=True) @@ -2380,8 +2379,8 @@ def test_level_setting_resets_attributes(self): def test_is_monotonic(self): i = MultiIndex.from_product([np.arange(10), np.arange(10)], names=['one', 'two']) - self.assertTrue(i.is_monotonic) - self.assertTrue(Index(i.values).is_monotonic) + assert i.is_monotonic + assert Index(i.values).is_monotonic i = MultiIndex.from_product([np.arange(10, 0, -1), np.arange(10)], names=['one', 'two']) @@ -2412,8 +2411,8 @@ def test_is_monotonic(self): labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], names=['first', 'second']) - self.assertTrue(i.is_monotonic) - self.assertTrue(Index(i.values).is_monotonic) + assert i.is_monotonic + assert Index(i.values).is_monotonic # mixed levels, hits the TypeError i = MultiIndex( @@ -2617,7 +2616,7 @@ def test_index_name_retained(self): def test_equals_operator(self): # GH9785 - self.assertTrue((self.index == self.index).all()) + assert (self.index == self.index).all() def test_large_multiindex_error(self): # GH12527 diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py index 8b4179dbf2e0e..68a329a7f741f 100644 --- a/pandas/tests/indexes/test_numeric.py +++ b/pandas/tests/indexes/test_numeric.py @@ -228,11 +228,11 @@ def test_constructor(self): # nan handling result = Float64Index([np.nan, np.nan]) - self.assertTrue(pd.isnull(result.values).all()) + assert pd.isnull(result.values).all() result = Float64Index(np.array([np.nan])) - self.assertTrue(pd.isnull(result.values).all()) + assert pd.isnull(result.values).all() result = Index(np.array([np.nan])) - self.assertTrue(pd.isnull(result.values).all()) + assert pd.isnull(result.values).all() def test_constructor_invalid(self): @@ -260,15 +260,15 @@ def test_constructor_explicit(self): def test_astype(self): result = self.float.astype(object) - self.assertTrue(result.equals(self.float)) - self.assertTrue(self.float.equals(result)) + assert result.equals(self.float) + assert self.float.equals(result) self.check_is_index(result) i = self.mixed.copy() i.name = 'foo' result = i.astype(object) - self.assertTrue(result.equals(i)) - self.assertTrue(i.equals(result)) + assert result.equals(i) + assert i.equals(result) self.check_is_index(result) # GH 12881 @@ -307,18 +307,18 @@ def test_astype(self): def test_equals_numeric(self): i = Float64Index([1.0, 2.0]) - self.assertTrue(i.equals(i)) - self.assertTrue(i.identical(i)) + assert i.equals(i) + assert i.identical(i) i2 = Float64Index([1.0, 2.0]) - self.assertTrue(i.equals(i2)) + assert i.equals(i2) i = Float64Index([1.0, np.nan]) - self.assertTrue(i.equals(i)) - self.assertTrue(i.identical(i)) + assert i.equals(i) + assert i.identical(i) i2 = Float64Index([1.0, np.nan]) - self.assertTrue(i.equals(i2)) + assert i.equals(i2) def test_get_indexer(self): idx = Float64Index([0.0, 1.0, 2.0]) @@ -363,7 +363,7 @@ def test_get_loc_na(self): # representable by slice [0:2:2] # pytest.raises(KeyError, idx.slice_locs, np.nan) sliced = idx.slice_locs(np.nan) - self.assertTrue(isinstance(sliced, tuple)) + assert isinstance(sliced, tuple) self.assertEqual(sliced, (0, 3)) # not representable by slice @@ -373,17 +373,17 @@ def test_get_loc_na(self): def test_contains_nans(self): i = Float64Index([1.0, 2.0, np.nan]) - self.assertTrue(np.nan in i) + assert np.nan in i def test_contains_not_nans(self): i = Float64Index([1.0, 2.0, np.nan]) - self.assertTrue(1.0 in i) + assert 1.0 in i def test_doesnt_contain_all_the_things(self): i = Float64Index([np.nan]) assert not i.isin([0]).item() assert not i.isin([1]).item() - self.assertTrue(i.isin([np.nan]).item()) + assert i.isin([np.nan]).item() def test_nan_multiple_containment(self): i = Float64Index([1.0, np.nan]) @@ -463,18 +463,18 @@ def test_view(self): tm.assert_index_equal(i, self._holder(i_view, name='Foo')) def test_is_monotonic(self): - self.assertTrue(self.index.is_monotonic) - self.assertTrue(self.index.is_monotonic_increasing) + assert self.index.is_monotonic + assert self.index.is_monotonic_increasing assert not self.index.is_monotonic_decreasing index = self._holder([4, 3, 2, 1]) assert not index.is_monotonic - self.assertTrue(index.is_monotonic_decreasing) + assert index.is_monotonic_decreasing index = self._holder([1]) - self.assertTrue(index.is_monotonic) - self.assertTrue(index.is_monotonic_increasing) - self.assertTrue(index.is_monotonic_decreasing) + assert index.is_monotonic + assert index.is_monotonic_increasing + assert index.is_monotonic_decreasing def test_logical_compat(self): idx = self.create_index() @@ -483,7 +483,7 @@ def test_logical_compat(self): def test_identical(self): i = Index(self.index.copy()) - self.assertTrue(i.identical(self.index)) + assert i.identical(self.index) same_values_different_type = Index(i, dtype=object) assert not i.identical(same_values_different_type) @@ -491,11 +491,10 @@ def test_identical(self): i = self.index.copy(dtype=object) i = i.rename('foo') same_values = Index(i, dtype=object) - self.assertTrue(same_values.identical(i)) + assert same_values.identical(i) assert not i.identical(self.index) - self.assertTrue(Index(same_values, name='foo', dtype=object).identical( - i)) + assert Index(same_values, name='foo', dtype=object).identical(i) assert not self.index.copy(dtype=object).identical( self.index.copy(dtype=self._dtype)) diff --git a/pandas/tests/indexes/test_range.py b/pandas/tests/indexes/test_range.py index 0baf6636806f6..49536be1aa57c 100644 --- a/pandas/tests/indexes/test_range.py +++ b/pandas/tests/indexes/test_range.py @@ -125,7 +125,7 @@ def test_constructor_same(self): # pass thru w and w/o copy index = RangeIndex(1, 5, 2) result = RangeIndex(index, copy=False) - self.assertTrue(result.identical(index)) + assert result.identical(index) result = RangeIndex(index, copy=True) tm.assert_index_equal(result, index, exact=True) @@ -172,16 +172,16 @@ def test_constructor_name(self): copy = RangeIndex(orig) copy.name = 'copy' - self.assertTrue(orig.name, 'original') - self.assertTrue(copy.name, 'copy') + assert orig.name, 'original' + assert copy.name, 'copy' new = Index(copy) - self.assertTrue(new.name, 'copy') + assert new.name, 'copy' new.name = 'new' - self.assertTrue(orig.name, 'original') - self.assertTrue(new.name, 'copy') - self.assertTrue(new.name, 'new') + assert orig.name, 'original' + assert new.name, 'copy' + assert new.name, 'new' def test_numeric_compat2(self): # validate that we are handling the RangeIndex overrides to numeric ops @@ -259,8 +259,8 @@ def test_constructor_corner(self): def test_copy(self): i = RangeIndex(5, name='Foo') i_copy = i.copy() - self.assertTrue(i_copy is not i) - self.assertTrue(i_copy.identical(i)) + assert i_copy is not i + assert i_copy.identical(i) self.assertEqual(i_copy._start, 0) self.assertEqual(i_copy._stop, 5) self.assertEqual(i_copy._step, 1) @@ -273,7 +273,7 @@ def test_repr(self): expected = "RangeIndex(start=0, stop=5, step=1, name='Foo')" else: expected = "RangeIndex(start=0, stop=5, step=1, name=u'Foo')" - self.assertTrue(result, expected) + assert result, expected result = eval(result) tm.assert_index_equal(result, i, exact=True) @@ -328,28 +328,28 @@ def test_dtype(self): self.assertEqual(self.index.dtype, np.int64) def test_is_monotonic(self): - self.assertTrue(self.index.is_monotonic) - self.assertTrue(self.index.is_monotonic_increasing) + assert self.index.is_monotonic + assert self.index.is_monotonic_increasing assert not self.index.is_monotonic_decreasing index = RangeIndex(4, 0, -1) assert not index.is_monotonic - self.assertTrue(index.is_monotonic_decreasing) + assert index.is_monotonic_decreasing index = RangeIndex(1, 2) - self.assertTrue(index.is_monotonic) - self.assertTrue(index.is_monotonic_increasing) - self.assertTrue(index.is_monotonic_decreasing) + assert index.is_monotonic + assert index.is_monotonic_increasing + assert index.is_monotonic_decreasing index = RangeIndex(2, 1) - self.assertTrue(index.is_monotonic) - self.assertTrue(index.is_monotonic_increasing) - self.assertTrue(index.is_monotonic_decreasing) + assert index.is_monotonic + assert index.is_monotonic_increasing + assert index.is_monotonic_decreasing index = RangeIndex(1, 1) - self.assertTrue(index.is_monotonic) - self.assertTrue(index.is_monotonic_increasing) - self.assertTrue(index.is_monotonic_decreasing) + assert index.is_monotonic + assert index.is_monotonic_increasing + assert index.is_monotonic_decreasing def test_equals_range(self): equiv_pairs = [(RangeIndex(0, 9, 2), RangeIndex(0, 10, 2)), @@ -357,8 +357,8 @@ def test_equals_range(self): (RangeIndex(1, 2, 3), RangeIndex(1, 3, 4)), (RangeIndex(0, -9, -2), RangeIndex(0, -10, -2))] for left, right in equiv_pairs: - self.assertTrue(left.equals(right)) - self.assertTrue(right.equals(left)) + assert left.equals(right) + assert right.equals(left) def test_logical_compat(self): idx = self.create_index() @@ -367,7 +367,7 @@ def test_logical_compat(self): def test_identical(self): i = Index(self.index.copy()) - self.assertTrue(i.identical(self.index)) + assert i.identical(self.index) # we don't allow object dtype for RangeIndex if isinstance(self.index, RangeIndex): @@ -379,11 +379,10 @@ def test_identical(self): i = self.index.copy(dtype=object) i = i.rename('foo') same_values = Index(i, dtype=object) - self.assertTrue(same_values.identical(self.index.copy(dtype=object))) + assert same_values.identical(self.index.copy(dtype=object)) assert not i.identical(self.index) - self.assertTrue(Index(same_values, name='foo', dtype=object).identical( - i)) + assert Index(same_values, name='foo', dtype=object).identical(i) assert not self.index.copy(dtype=object).identical( self.index.copy(dtype='int64')) @@ -689,7 +688,7 @@ def test_nbytes(self): # memory savings vs int index i = RangeIndex(0, 1000) - self.assertTrue(i.nbytes < i.astype(int).nbytes / 10) + assert i.nbytes < i.astype(int).nbytes / 10 # constant memory usage i2 = RangeIndex(0, 10) @@ -784,7 +783,7 @@ def test_duplicates(self): if not len(ind): continue idx = self.indices[ind] - self.assertTrue(idx.is_unique) + assert idx.is_unique assert not idx.has_duplicates def test_ufunc_compat(self): diff --git a/pandas/tests/indexes/timedeltas/test_astype.py b/pandas/tests/indexes/timedeltas/test_astype.py index b17433d3aeb51..6e82f165e4909 100644 --- a/pandas/tests/indexes/timedeltas/test_astype.py +++ b/pandas/tests/indexes/timedeltas/test_astype.py @@ -55,7 +55,7 @@ def test_astype_timedelta64(self): result = idx.astype('timedelta64[ns]', copy=False) tm.assert_index_equal(result, idx) - self.assertTrue(result is idx) + assert result is idx def test_astype_raises(self): # GH 13149, GH 13209 diff --git a/pandas/tests/indexes/timedeltas/test_ops.py b/pandas/tests/indexes/timedeltas/test_ops.py index 9747902f316a6..feaec50264872 100644 --- a/pandas/tests/indexes/timedeltas/test_ops.py +++ b/pandas/tests/indexes/timedeltas/test_ops.py @@ -33,7 +33,7 @@ def test_asobject_tolist(self): Timedelta('3 days'), Timedelta('4 days')] expected = pd.Index(expected_list, dtype=object, name='idx') result = idx.asobject - self.assertTrue(isinstance(result, Index)) + assert isinstance(result, Index) self.assertEqual(result.dtype, object) tm.assert_index_equal(result, expected) @@ -46,7 +46,7 @@ def test_asobject_tolist(self): Timedelta('4 days')] expected = pd.Index(expected_list, dtype=object, name='idx') result = idx.asobject - self.assertTrue(isinstance(result, Index)) + assert isinstance(result, Index) self.assertEqual(result.dtype, object) tm.assert_index_equal(result, expected) self.assertEqual(result.name, expected.name) @@ -56,7 +56,7 @@ def test_minmax(self): # monotonic idx1 = TimedeltaIndex(['1 days', '2 days', '3 days']) - self.assertTrue(idx1.is_monotonic) + assert idx1.is_monotonic # non-monotonic idx2 = TimedeltaIndex(['1 days', np.nan, '3 days', 'NaT']) @@ -71,13 +71,13 @@ def test_minmax(self): for op in ['min', 'max']: # Return NaT obj = TimedeltaIndex([]) - self.assertTrue(pd.isnull(getattr(obj, op)())) + assert pd.isnull(getattr(obj, op)()) obj = TimedeltaIndex([pd.NaT]) - self.assertTrue(pd.isnull(getattr(obj, op)())) + assert pd.isnull(getattr(obj, op)()) obj = TimedeltaIndex([pd.NaT, pd.NaT, pd.NaT]) - self.assertTrue(pd.isnull(getattr(obj, op)())) + assert pd.isnull(getattr(obj, op)()) def test_numpy_minmax(self): dr = pd.date_range(start='2016-01-15', end='2016-01-20') @@ -825,7 +825,7 @@ def test_nat(self): assert pd.TimedeltaIndex([])._na_value is pd.NaT idx = pd.TimedeltaIndex(['1 days', '2 days']) - self.assertTrue(idx._can_hold_na) + assert idx._can_hold_na tm.assert_numpy_array_equal(idx._isnan, np.array([False, False])) assert not idx.hasnans @@ -833,21 +833,21 @@ def test_nat(self): np.array([], dtype=np.intp)) idx = pd.TimedeltaIndex(['1 days', 'NaT']) - self.assertTrue(idx._can_hold_na) + assert idx._can_hold_na tm.assert_numpy_array_equal(idx._isnan, np.array([False, True])) - self.assertTrue(idx.hasnans) + assert idx.hasnans tm.assert_numpy_array_equal(idx._nan_idxs, np.array([1], dtype=np.intp)) def test_equals(self): # GH 13107 idx = pd.TimedeltaIndex(['1 days', '2 days', 'NaT']) - self.assertTrue(idx.equals(idx)) - self.assertTrue(idx.equals(idx.copy())) - self.assertTrue(idx.equals(idx.asobject)) - self.assertTrue(idx.asobject.equals(idx)) - self.assertTrue(idx.asobject.equals(idx.asobject)) + assert idx.equals(idx) + assert idx.equals(idx.copy()) + assert idx.equals(idx.asobject) + assert idx.asobject.equals(idx) + assert idx.asobject.equals(idx.asobject) assert not idx.equals(list(idx)) assert not idx.equals(pd.Series(idx)) @@ -870,18 +870,18 @@ def test_ops(self): self.assertEqual(-td, Timedelta(-10, unit='d')) self.assertEqual(+td, Timedelta(10, unit='d')) self.assertEqual(td - td, Timedelta(0, unit='ns')) - self.assertTrue((td - pd.NaT) is pd.NaT) + assert (td - pd.NaT) is pd.NaT self.assertEqual(td + td, Timedelta(20, unit='d')) - self.assertTrue((td + pd.NaT) is pd.NaT) + assert (td + pd.NaT) is pd.NaT self.assertEqual(td * 2, Timedelta(20, unit='d')) - self.assertTrue((td * pd.NaT) is pd.NaT) + assert (td * pd.NaT) is pd.NaT self.assertEqual(td / 2, Timedelta(5, unit='d')) self.assertEqual(td // 2, Timedelta(5, unit='d')) self.assertEqual(abs(td), td) self.assertEqual(abs(-td), td) self.assertEqual(td / td, 1) - self.assertTrue((td / pd.NaT) is np.nan) - self.assertTrue((td // pd.NaT) is np.nan) + assert (td / pd.NaT) is np.nan + assert (td // pd.NaT) is np.nan # invert self.assertEqual(-td, Timedelta('-10d')) @@ -995,11 +995,11 @@ class Other: other = Other() td = Timedelta('1 day') - self.assertTrue(td.__add__(other) is NotImplemented) - self.assertTrue(td.__sub__(other) is NotImplemented) - self.assertTrue(td.__truediv__(other) is NotImplemented) - self.assertTrue(td.__mul__(other) is NotImplemented) - self.assertTrue(td.__floordiv__(other) is NotImplemented) + assert td.__add__(other) is NotImplemented + assert td.__sub__(other) is NotImplemented + assert td.__truediv__(other) is NotImplemented + assert td.__mul__(other) is NotImplemented + assert td.__floordiv__(other) is NotImplemented def test_ops_error_str(self): # GH 13624 diff --git a/pandas/tests/indexes/timedeltas/test_timedelta.py b/pandas/tests/indexes/timedeltas/test_timedelta.py index c90c61170ca93..8a327d2ecb08f 100644 --- a/pandas/tests/indexes/timedeltas/test_timedelta.py +++ b/pandas/tests/indexes/timedeltas/test_timedelta.py @@ -247,10 +247,10 @@ def test_isin(self): index = tm.makeTimedeltaIndex(4) result = index.isin(index) - self.assertTrue(result.all()) + assert result.all() result = index.isin(list(index)) - self.assertTrue(result.all()) + assert result.all() assert_almost_equal(index.isin([index[2], 5]), np.array([False, False, True, False])) @@ -483,7 +483,7 @@ def test_append_numpy_bug_1681(self): str(c) result = a.append(c) - self.assertTrue((result['B'] == td).all()) + assert (result['B'] == td).all() def test_fields(self): rng = timedelta_range('1 days, 10:11:12.100123456', periods=2, @@ -569,7 +569,7 @@ def test_timedelta(self): index = date_range('1/1/2000', periods=50, freq='B') shifted = index + timedelta(1) back = shifted + timedelta(-1) - self.assertTrue(tm.equalContents(index, back)) + assert tm.equalContents(index, back) self.assertEqual(shifted.freq, index.freq) self.assertEqual(shifted.freq, back.freq) diff --git a/pandas/tests/indexes/timedeltas/test_tools.py b/pandas/tests/indexes/timedeltas/test_tools.py index 12ed8a2e38f92..d69f78bfd73b1 100644 --- a/pandas/tests/indexes/timedeltas/test_tools.py +++ b/pandas/tests/indexes/timedeltas/test_tools.py @@ -32,7 +32,7 @@ def conv(v): self.assertEqual(result.astype('int64'), iNaT) result = to_timedelta(['', '']) - self.assertTrue(isnull(result).all()) + assert isnull(result).all() # pass thru result = to_timedelta(np.array([np.timedelta64(1, 's')])) @@ -122,8 +122,7 @@ def test_to_timedelta_invalid(self): # time not supported ATM pytest.raises(ValueError, lambda: to_timedelta(time(second=1))) - self.assertTrue(to_timedelta( - time(second=1), errors='coerce') is pd.NaT) + assert to_timedelta(time(second=1), errors='coerce') is pd.NaT pytest.raises(ValueError, lambda: to_timedelta(['foo', 'bar'])) tm.assert_index_equal(TimedeltaIndex([pd.NaT, pd.NaT]), diff --git a/pandas/tests/indexing/test_floats.py b/pandas/tests/indexing/test_floats.py index 498604aaac853..4d4ef65b40074 100644 --- a/pandas/tests/indexing/test_floats.py +++ b/pandas/tests/indexing/test_floats.py @@ -130,14 +130,14 @@ def f(): s2 = s.copy() s2.loc[3.0] = 10 - self.assertTrue(s2.index.is_object()) + assert s2.index.is_object() for idxr in [lambda x: x.ix, lambda x: x]: s2 = s.copy() with catch_warnings(record=True): idxr(s2)[3.0] = 0 - self.assertTrue(s2.index.is_object()) + assert s2.index.is_object() # fallsback to position selection, series only s = Series(np.arange(len(i)), index=i) @@ -239,7 +239,7 @@ def test_scalar_integer(self): # contains # coerce to equal int - self.assertTrue(3.0 in s) + assert 3.0 in s def test_scalar_float(self): @@ -275,7 +275,7 @@ def f(): pytest.raises(KeyError, lambda: idxr(s)[3.5]) # contains - self.assertTrue(3.0 in s) + assert 3.0 in s # iloc succeeds with an integer expected = s.iloc[3] @@ -440,7 +440,7 @@ def f(): with catch_warnings(record=True): idxr(sc)[l] = 0 result = idxr(sc)[l].values.ravel() - self.assertTrue((result == 0).all()) + assert (result == 0).all() # positional indexing def f(): @@ -534,7 +534,7 @@ def f(): with catch_warnings(record=True): idxr(sc)[l] = 0 result = idxr(sc)[l].values.ravel() - self.assertTrue((result == 0).all()) + assert (result == 0).all() # positional indexing def f(): @@ -570,7 +570,7 @@ def test_slice_float(self): with catch_warnings(record=True): idxr(s2)[l] = 0 result = idxr(s2)[l].values.ravel() - self.assertTrue((result == 0).all()) + assert (result == 0).all() def test_floating_index_doc_example(self): diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index 18b169559b2d4..baced46923fd4 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -191,7 +191,7 @@ def test_iloc_getitem_dups(self): # cross-sectional indexing result = df.iloc[0, 0] - self.assertTrue(isnull(result)) + assert isnull(result) result = df.iloc[0, :] expected = Series([np.nan, 1, 3, 3], index=['A', 'B', 'A', 'B'], diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index d0f089f0804c3..5924dba488043 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -87,8 +87,8 @@ def test_setitem_dtype_upcast(self): columns=['foo', 'bar', 'baz']) tm.assert_frame_equal(left, right) - self.assertTrue(is_integer_dtype(left['foo'])) - self.assertTrue(is_integer_dtype(left['baz'])) + assert is_integer_dtype(left['foo']) + assert is_integer_dtype(left['baz']) left = DataFrame(np.arange(6, dtype='int64').reshape(2, 3) / 10.0, index=list('ab'), @@ -99,8 +99,8 @@ def test_setitem_dtype_upcast(self): columns=['foo', 'bar', 'baz']) tm.assert_frame_equal(left, right) - self.assertTrue(is_float_dtype(left['foo'])) - self.assertTrue(is_float_dtype(left['baz'])) + assert is_float_dtype(left['foo']) + assert is_float_dtype(left['baz']) def test_dups_fancy_indexing(self): @@ -430,7 +430,7 @@ def test_string_slice(self): # dtype should properly raises KeyError df = pd.DataFrame([1], pd.Index([pd.Timestamp('2011-01-01')], dtype=object)) - self.assertTrue(df.index.is_all_dates) + assert df.index.is_all_dates with pytest.raises(KeyError): df['2011'] @@ -556,15 +556,15 @@ def test_index_type_coercion(self): for s in [Series(range(5)), Series(range(5), index=range(1, 6))]: - self.assertTrue(s.index.is_integer()) + assert s.index.is_integer() for indexer in [lambda x: x.ix, lambda x: x.loc, lambda x: x]: s2 = s.copy() indexer(s2)[0.1] = 0 - self.assertTrue(s2.index.is_floating()) - self.assertTrue(indexer(s2)[0.1] == 0) + assert s2.index.is_floating() + assert indexer(s2)[0.1] == 0 s2 = s.copy() indexer(s2)[0.0] = 0 @@ -575,11 +575,11 @@ def test_index_type_coercion(self): s2 = s.copy() indexer(s2)['0'] = 0 - self.assertTrue(s2.index.is_object()) + assert s2.index.is_object() for s in [Series(range(5), index=np.arange(5.))]: - self.assertTrue(s.index.is_floating()) + assert s.index.is_floating() for idxr in [lambda x: x.ix, lambda x: x.loc, @@ -587,8 +587,8 @@ def test_index_type_coercion(self): s2 = s.copy() idxr(s2)[0.1] = 0 - self.assertTrue(s2.index.is_floating()) - self.assertTrue(idxr(s2)[0.1] == 0) + assert s2.index.is_floating() + assert idxr(s2)[0.1] == 0 s2 = s.copy() idxr(s2)[0.0] = 0 @@ -596,7 +596,7 @@ def test_index_type_coercion(self): s2 = s.copy() idxr(s2)['0'] = 0 - self.assertTrue(s2.index.is_object()) + assert s2.index.is_object() class TestMisc(Base, tm.TestCase): @@ -776,7 +776,7 @@ def test_non_reducing_slice(self): ] for slice_ in slices: tslice_ = _non_reducing_slice(slice_) - self.assertTrue(isinstance(df.loc[tslice_], DataFrame)) + assert isinstance(df.loc[tslice_], DataFrame) def test_list_slice(self): # like dataframe getitem diff --git a/pandas/tests/indexing/test_ix.py b/pandas/tests/indexing/test_ix.py index c3ce21343b8d1..433b44c952ca1 100644 --- a/pandas/tests/indexing/test_ix.py +++ b/pandas/tests/indexing/test_ix.py @@ -84,7 +84,7 @@ def compare(result, expected): if is_scalar(expected): self.assertEqual(result, expected) else: - self.assertTrue(expected.equals(result)) + assert expected.equals(result) # failure cases for .loc, but these work for .ix df = pd.DataFrame(np.random.randn(5, 4), columns=list('ABCD')) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 862a6e6326ddd..b430f458d48b5 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -325,8 +325,8 @@ def test_loc_general(self): # want this to work result = df.loc[:, "A":"B"].iloc[0:2, :] - self.assertTrue((result.columns == ['A', 'B']).all()) - self.assertTrue((result.index == ['A', 'B']).all()) + assert (result.columns == ['A', 'B']).all() + assert (result.index == ['A', 'B']).all() # mixed type result = DataFrame({'a': [Timestamp('20130101')], 'b': [1]}).iloc[0] diff --git a/pandas/tests/io/formats/test_eng_formatting.py b/pandas/tests/io/formats/test_eng_formatting.py index 8eb4ed576fff1..41bb95964b4a2 100644 --- a/pandas/tests/io/formats/test_eng_formatting.py +++ b/pandas/tests/io/formats/test_eng_formatting.py @@ -184,7 +184,7 @@ def test_nan(self): pt = df.pivot_table(values='a', index='b', columns='c') fmt.set_eng_float_format(accuracy=1) result = pt.to_string() - self.assertTrue('NaN' in result) + assert 'NaN' in result tm.reset_display_options() def test_inf(self): diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index ccc1372495106..6f19a4a126118 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -148,7 +148,7 @@ def test_show_null_counts(self): def check(null_counts, result): buf = StringIO() df.info(buf=buf, null_counts=null_counts) - self.assertTrue(('non-null' in buf.getvalue()) is result) + assert ('non-null' in buf.getvalue()) is result with option_context('display.max_info_rows', 20, 'display.max_info_columns', 20): @@ -209,10 +209,10 @@ def test_repr_chop_threshold(self): def test_repr_obeys_max_seq_limit(self): with option_context("display.max_seq_items", 2000): - self.assertTrue(len(printing.pprint_thing(lrange(1000))) > 1000) + assert len(printing.pprint_thing(lrange(1000))) > 1000 with option_context("display.max_seq_items", 5): - self.assertTrue(len(printing.pprint_thing(lrange(1000))) < 100) + assert len(printing.pprint_thing(lrange(1000))) < 100 def test_repr_set(self): self.assertEqual(printing.pprint_thing(set([1])), '{1}') @@ -235,12 +235,12 @@ def test_repr_should_return_str(self): index1 = [u("\u03c3"), u("\u03c4"), u("\u03c5"), u("\u03c6")] cols = [u("\u03c8")] df = DataFrame(data, columns=cols, index=index1) - self.assertTrue(type(df.__repr__()) == str) # both py2 / 3 + assert type(df.__repr__()) == str # both py2 / 3 def test_repr_no_backslash(self): with option_context('mode.sim_interactive', True): df = DataFrame(np.random.randn(10, 4)) - self.assertTrue('\\' not in repr(df)) + assert '\\' not in repr(df) def test_expand_frame_repr(self): df_small = DataFrame('hello', [0], [0]) @@ -255,16 +255,16 @@ def test_expand_frame_repr(self): assert not has_truncated_repr(df_small) assert not has_expanded_repr(df_small) assert not has_truncated_repr(df_wide) - self.assertTrue(has_expanded_repr(df_wide)) - self.assertTrue(has_vertically_truncated_repr(df_tall)) - self.assertTrue(has_expanded_repr(df_tall)) + assert has_expanded_repr(df_wide) + assert has_vertically_truncated_repr(df_tall) + assert has_expanded_repr(df_tall) with option_context('display.expand_frame_repr', False): assert not has_truncated_repr(df_small) assert not has_expanded_repr(df_small) assert not has_horizontally_truncated_repr(df_wide) assert not has_expanded_repr(df_wide) - self.assertTrue(has_vertically_truncated_repr(df_tall)) + assert has_vertically_truncated_repr(df_tall) assert not has_expanded_repr(df_tall) def test_repr_non_interactive(self): @@ -296,7 +296,7 @@ def mkframe(n): assert not has_expanded_repr(mkframe(4)) assert not has_expanded_repr(mkframe(5)) assert not has_expanded_repr(df6) - self.assertTrue(has_doubly_truncated_repr(df6)) + assert has_doubly_truncated_repr(df6) with option_context('display.max_rows', 20, 'display.max_columns', 10): @@ -309,7 +309,7 @@ def mkframe(n): 'display.max_columns', 10): # out vertical bounds can not result in exanded repr assert not has_expanded_repr(df10) - self.assertTrue(has_vertically_truncated_repr(df10)) + assert has_vertically_truncated_repr(df10) # width=None in terminal, auto detection with option_context('display.max_columns', 100, 'display.max_rows', @@ -318,7 +318,7 @@ def mkframe(n): assert not has_expanded_repr(df) df = mkframe((term_width // 7) + 2) printing.pprint_thing(df._repr_fits_horizontal_()) - self.assertTrue(has_expanded_repr(df)) + assert has_expanded_repr(df) def test_str_max_colwidth(self): # GH 7856 @@ -330,15 +330,14 @@ def test_str_max_colwidth(self): 'c': 'stuff', 'd': 1}]) df.set_index(['a', 'b', 'c']) - self.assertTrue( - str(df) == + assert str(df) == ( ' a b c d\n' '0 foo bar uncomfortably long line with lots of stuff 1\n' '1 foo bar stuff 1') with option_context('max_colwidth', 20): - self.assertTrue(str(df) == ' a b c d\n' - '0 foo bar uncomfortably lo... 1\n' - '1 foo bar stuff 1') + assert str(df) == (' a b c d\n' + '0 foo bar uncomfortably lo... 1\n' + '1 foo bar stuff 1') def test_auto_detect(self): term_width, term_height = get_terminal_size() @@ -350,24 +349,24 @@ def test_auto_detect(self): with option_context('max_rows', None): with option_context('max_columns', None): # Wrap around with None - self.assertTrue(has_expanded_repr(df)) + assert has_expanded_repr(df) with option_context('max_rows', 0): with option_context('max_columns', 0): # Truncate with auto detection. - self.assertTrue(has_horizontally_truncated_repr(df)) + assert has_horizontally_truncated_repr(df) index = range(int(term_height * fac)) df = DataFrame(index=index, columns=cols) with option_context('max_rows', 0): with option_context('max_columns', None): # Wrap around with None - self.assertTrue(has_expanded_repr(df)) + assert has_expanded_repr(df) # Truncate vertically - self.assertTrue(has_vertically_truncated_repr(df)) + assert has_vertically_truncated_repr(df) with option_context('max_rows', None): with option_context('max_columns', 0): - self.assertTrue(has_horizontally_truncated_repr(df)) + assert has_horizontally_truncated_repr(df) def test_to_string_repr_unicode(self): buf = StringIO() @@ -732,7 +731,7 @@ def test_to_string_with_col_space(self): c10 = len(df.to_string(col_space=10).split("\n")[1]) c20 = len(df.to_string(col_space=20).split("\n")[1]) c30 = len(df.to_string(col_space=30).split("\n")[1]) - self.assertTrue(c10 < c20 < c30) + assert c10 < c20 < c30 # GH 8230 # col_space wasn't being applied with header=False @@ -752,23 +751,20 @@ def test_to_string_truncate_indices(self): df = DataFrame(index=index(h), columns=column(w)) with option_context("display.max_rows", 15): if h == 20: - self.assertTrue( - has_vertically_truncated_repr(df)) + assert has_vertically_truncated_repr(df) else: assert not has_vertically_truncated_repr( df) with option_context("display.max_columns", 15): if w == 20: - self.assertTrue( - has_horizontally_truncated_repr(df)) + assert has_horizontally_truncated_repr(df) else: assert not ( has_horizontally_truncated_repr(df)) with option_context("display.max_rows", 15, "display.max_columns", 15): if h == 20 and w == 20: - self.assertTrue(has_doubly_truncated_repr( - df)) + assert has_doubly_truncated_repr(df) else: assert not has_doubly_truncated_repr( df) @@ -778,7 +774,7 @@ def test_to_string_truncate_multilevel(self): ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']] df = DataFrame(index=arrays, columns=arrays) with option_context("display.max_rows", 7, "display.max_columns", 7): - self.assertTrue(has_doubly_truncated_repr(df)) + assert has_doubly_truncated_repr(df) def test_truncate_with_different_dtypes(self): @@ -793,7 +789,7 @@ def test_truncate_with_different_dtypes(self): with pd.option_context('display.max_rows', 8): result = str(s) - self.assertTrue('object' in result) + assert 'object' in result # 12045 df = DataFrame({'text': ['some words'] + [None] * 9}) @@ -801,7 +797,7 @@ def test_truncate_with_different_dtypes(self): with pd.option_context('display.max_rows', 8, 'display.max_columns', 3): result = str(df) - self.assertTrue('None' in result) + assert 'None' in result assert 'NaN' not in result def test_datetimelike_frame(self): @@ -813,10 +809,10 @@ def test_datetimelike_frame(self): with option_context("display.max_rows", 5): result = str(df) - self.assertTrue('2013-01-01 00:00:00+00:00' in result) - self.assertTrue('NaT' in result) - self.assertTrue('...' in result) - self.assertTrue('[6 rows x 1 columns]' in result) + assert '2013-01-01 00:00:00+00:00' in result + assert 'NaT' in result + assert '...' in result + assert '[6 rows x 1 columns]' in result dts = [pd.Timestamp('2011-01-01', tz='US/Eastern')] * 5 + [pd.NaT] * 5 df = pd.DataFrame({"dt": dts, @@ -930,7 +926,7 @@ def test_wide_repr(self): with option_context('display.width', 120): wider_repr = repr(df) - self.assertTrue(len(wider_repr) < len(wide_repr)) + assert len(wider_repr) < len(wide_repr) reset_option('display.expand_frame_repr') @@ -956,7 +952,7 @@ def test_wide_repr_named(self): with option_context('display.width', 150): wider_repr = repr(df) - self.assertTrue(len(wider_repr) < len(wide_repr)) + assert len(wider_repr) < len(wide_repr) for line in wide_repr.splitlines()[1::13]: assert 'DataFrame Index' in line @@ -978,7 +974,7 @@ def test_wide_repr_multiindex(self): with option_context('display.width', 150): wider_repr = repr(df) - self.assertTrue(len(wider_repr) < len(wide_repr)) + assert len(wider_repr) < len(wide_repr) for line in wide_repr.splitlines()[1::13]: assert 'Level 0 Level 1' in line @@ -1002,7 +998,7 @@ def test_wide_repr_multiindex_cols(self): with option_context('display.width', 150): wider_repr = repr(df) - self.assertTrue(len(wider_repr) < len(wide_repr)) + assert len(wider_repr) < len(wide_repr) reset_option('display.expand_frame_repr') @@ -1018,7 +1014,7 @@ def test_wide_repr_unicode(self): with option_context('display.width', 150): wider_repr = repr(df) - self.assertTrue(len(wider_repr) < len(wide_repr)) + assert len(wider_repr) < len(wide_repr) reset_option('display.expand_frame_repr') @@ -1028,8 +1024,8 @@ def test_wide_repr_wide_long_columns(self): 'b': ['c' * 70, 'd' * 80]}) result = repr(df) - self.assertTrue('ccccc' in result) - self.assertTrue('ddddd' in result) + assert 'ccccc' in result + assert 'ddddd' in result def test_long_series(self): n = 1000 @@ -1141,8 +1137,8 @@ def test_to_string(self): header=None, sep=' ') tm.assert_series_equal(recons['B'], biggie['B']) self.assertEqual(recons['A'].count(), biggie['A'].count()) - self.assertTrue((np.abs(recons['A'].dropna() - biggie['A'].dropna()) < - 0.1).all()) + assert (np.abs(recons['A'].dropna() - + biggie['A'].dropna()) < 0.1).all() # expected = ['B', 'A'] # self.assertEqual(header, expected) @@ -1289,7 +1285,7 @@ def test_to_string_ascii_error(self): def test_to_string_int_formatting(self): df = DataFrame({'x': [-15, 20, 25, -35]}) - self.assertTrue(issubclass(df['x'].dtype.type, np.integer)) + assert issubclass(df['x'].dtype.type, np.integer) output = df.to_string() expected = (' x\n' '0 -15\n' '1 20\n' '2 25\n' '3 -35') @@ -1353,8 +1349,8 @@ def test_show_dimensions(self): with option_context('display.max_rows', 10, 'display.max_columns', 40, 'display.width', 500, 'display.expand_frame_repr', 'info', 'display.show_dimensions', True): - self.assertTrue('5 rows' in str(df)) - self.assertTrue('5 rows' in df._repr_html_()) + assert '5 rows' in str(df) + assert '5 rows' in df._repr_html_() with option_context('display.max_rows', 10, 'display.max_columns', 40, 'display.width', 500, 'display.expand_frame_repr', 'info', 'display.show_dimensions', False): @@ -1363,8 +1359,8 @@ def test_show_dimensions(self): with option_context('display.max_rows', 2, 'display.max_columns', 2, 'display.width', 500, 'display.expand_frame_repr', 'info', 'display.show_dimensions', 'truncate'): - self.assertTrue('5 rows' in str(df)) - self.assertTrue('5 rows' in df._repr_html_()) + assert '5 rows' in str(df) + assert '5 rows' in df._repr_html_() with option_context('display.max_rows', 10, 'display.max_columns', 40, 'display.width', 500, 'display.expand_frame_repr', 'info', 'display.show_dimensions', 'truncate'): @@ -1384,7 +1380,7 @@ def test_repr_html(self): df = DataFrame([[1, 2], [3, 4]]) fmt.set_option('display.show_dimensions', True) - self.assertTrue('2 rows' in df._repr_html_()) + assert '2 rows' in df._repr_html_() fmt.set_option('display.show_dimensions', False) assert '2 rows' not in df._repr_html_() @@ -1513,7 +1509,7 @@ def test_info_repr_max_cols(self): with option_context('display.large_repr', 'info', 'display.max_columns', 1, 'display.max_info_columns', 4): - self.assertTrue(has_non_verbose_info_repr(df)) + assert has_non_verbose_info_repr(df) with option_context('display.large_repr', 'info', 'display.max_columns', 1, @@ -1576,17 +1572,17 @@ def test_float_trim_zeros(self): if line.startswith('dtype:'): continue if _three_digit_exp(): - self.assertTrue(('+010' in line) or skip) + assert ('+010' in line) or skip else: - self.assertTrue(('+10' in line) or skip) + assert ('+10' in line) or skip skip = False def test_dict_entries(self): df = DataFrame({'A': [{'a': 1, 'b': 2}]}) val = df.to_string() - self.assertTrue("'a': 1" in val) - self.assertTrue("'b': 2" in val) + assert "'a': 1" in val + assert "'b': 2" in val def test_period(self): # GH 12615 @@ -1662,7 +1658,7 @@ def test_freq_name_separation(self): index=date_range('1/1/2000', periods=10), name=0) result = repr(s) - self.assertTrue('Freq: D, Name: 0' in result) + assert 'Freq: D, Name: 0' in result def test_to_string_mixed(self): s = Series(['foo', np.nan, -1.23, 4.56]) @@ -1884,17 +1880,17 @@ def test_datetimeindex(self): index = date_range('20130102', periods=6) s = Series(1, index=index) result = s.to_string() - self.assertTrue('2013-01-02' in result) + assert '2013-01-02' in result # nat in index s2 = Series(2, index=[Timestamp('20130111'), NaT]) s = s2.append(s) result = s.to_string() - self.assertTrue('NaT' in result) + assert 'NaT' in result # nat in summary result = str(s2.index) - self.assertTrue('NaT' in result) + assert 'NaT' in result def test_timedelta64(self): @@ -1909,47 +1905,47 @@ def test_timedelta64(self): # adding NaTs y = s - s.shift(1) result = y.to_string() - self.assertTrue('1 days' in result) - self.assertTrue('00:00:00' not in result) - self.assertTrue('NaT' in result) + assert '1 days' in result + assert '00:00:00' not in result + assert 'NaT' in result # with frac seconds o = Series([datetime(2012, 1, 1, microsecond=150)] * 3) y = s - o result = y.to_string() - self.assertTrue('-1 days +23:59:59.999850' in result) + assert '-1 days +23:59:59.999850' in result # rounding? o = Series([datetime(2012, 1, 1, 1)] * 3) y = s - o result = y.to_string() - self.assertTrue('-1 days +23:00:00' in result) - self.assertTrue('1 days 23:00:00' in result) + assert '-1 days +23:00:00' in result + assert '1 days 23:00:00' in result o = Series([datetime(2012, 1, 1, 1, 1)] * 3) y = s - o result = y.to_string() - self.assertTrue('-1 days +22:59:00' in result) - self.assertTrue('1 days 22:59:00' in result) + assert '-1 days +22:59:00' in result + assert '1 days 22:59:00' in result o = Series([datetime(2012, 1, 1, 1, 1, microsecond=150)] * 3) y = s - o result = y.to_string() - self.assertTrue('-1 days +22:58:59.999850' in result) - self.assertTrue('0 days 22:58:59.999850' in result) + assert '-1 days +22:58:59.999850' in result + assert '0 days 22:58:59.999850' in result # neg time td = timedelta(minutes=5, seconds=3) s2 = Series(date_range('2012-1-1', periods=3, freq='D')) + td y = s - s2 result = y.to_string() - self.assertTrue('-1 days +23:54:57' in result) + assert '-1 days +23:54:57' in result td = timedelta(microseconds=550) s2 = Series(date_range('2012-1-1', periods=3, freq='D')) + td y = s - td result = y.to_string() - self.assertTrue('2012-01-01 23:59:59.999450' in result) + assert '2012-01-01 23:59:59.999450' in result # no boxing of the actual elements td = Series(pd.timedelta_range('1 days', periods=3)) @@ -1961,7 +1957,7 @@ def test_mixed_datetime64(self): df['B'] = pd.to_datetime(df.B) result = repr(df.loc[0]) - self.assertTrue('2012-01-01' in result) + assert '2012-01-01' in result def test_period(self): # GH 12615 @@ -2166,7 +2162,7 @@ class TestFloatArrayFormatter(tm.TestCase): def test_misc(self): obj = fmt.FloatArrayFormatter(np.array([], dtype=np.float64)) result = obj.get_result() - self.assertTrue(len(result) == 0) + assert len(result) == 0 def test_format(self): obj = fmt.FloatArrayFormatter(np.array([12, 0], dtype=np.float64)) @@ -2493,14 +2489,14 @@ class TestDatetimeIndexUnicode(tm.TestCase): def test_dates(self): text = str(pd.to_datetime([datetime(2013, 1, 1), datetime(2014, 1, 1) ])) - self.assertTrue("['2013-01-01'," in text) - self.assertTrue(", '2014-01-01']" in text) + assert "['2013-01-01'," in text + assert ", '2014-01-01']" in text def test_mixed(self): text = str(pd.to_datetime([datetime(2013, 1, 1), datetime( 2014, 1, 1, 12), datetime(2014, 1, 1)])) - self.assertTrue("'2013-01-01 00:00:00'," in text) - self.assertTrue("'2014-01-01 00:00:00']" in text) + assert "'2013-01-01 00:00:00'," in text + assert "'2014-01-01 00:00:00']" in text class TestStringRepTimestamp(tm.TestCase): diff --git a/pandas/tests/io/formats/test_style.py b/pandas/tests/io/formats/test_style.py index 96bf2b605ffa1..7d8ac6f81c31e 100644 --- a/pandas/tests/io/formats/test_style.py +++ b/pandas/tests/io/formats/test_style.py @@ -68,9 +68,9 @@ def test_update_ctx_flatten_multi_traliing_semi(self): def test_copy(self): s2 = copy.copy(self.styler) - self.assertTrue(self.styler is not s2) - self.assertTrue(self.styler.ctx is s2.ctx) # shallow - self.assertTrue(self.styler._todo is s2._todo) + assert self.styler is not s2 + assert self.styler.ctx is s2.ctx # shallow + assert self.styler._todo is s2._todo self.styler._update_ctx(self.attrs) self.styler.highlight_max() @@ -79,9 +79,9 @@ def test_copy(self): def test_deepcopy(self): s2 = copy.deepcopy(self.styler) - self.assertTrue(self.styler is not s2) - self.assertTrue(self.styler.ctx is not s2.ctx) - self.assertTrue(self.styler._todo is not s2._todo) + assert self.styler is not s2 + assert self.styler.ctx is not s2.ctx + assert self.styler._todo is not s2._todo self.styler._update_ctx(self.attrs) self.styler.highlight_max() @@ -91,11 +91,11 @@ def test_deepcopy(self): def test_clear(self): s = self.df.style.highlight_max()._compute() - self.assertTrue(len(s.ctx) > 0) - self.assertTrue(len(s._todo) > 0) + assert len(s.ctx) > 0 + assert len(s._todo) > 0 s.clear() - self.assertTrue(len(s.ctx) == 0) - self.assertTrue(len(s._todo) == 0) + assert len(s.ctx) == 0 + assert len(s._todo) == 0 def test_render(self): df = pd.DataFrame({"A": [0, 1]}) @@ -367,42 +367,42 @@ def test_nonunique_raises(self): def test_caption(self): styler = Styler(self.df, caption='foo') result = styler.render() - self.assertTrue(all(['caption' in result, 'foo' in result])) + assert all(['caption' in result, 'foo' in result]) styler = self.df.style result = styler.set_caption('baz') - self.assertTrue(styler is result) + assert styler is result self.assertEqual(styler.caption, 'baz') def test_uuid(self): styler = Styler(self.df, uuid='abc123') result = styler.render() - self.assertTrue('abc123' in result) + assert 'abc123' in result styler = self.df.style result = styler.set_uuid('aaa') - self.assertTrue(result is styler) + assert result is styler self.assertEqual(result.uuid, 'aaa') def test_table_styles(self): style = [{'selector': 'th', 'props': [('foo', 'bar')]}] styler = Styler(self.df, table_styles=style) result = ' '.join(styler.render().split()) - self.assertTrue('th { foo: bar; }' in result) + assert 'th { foo: bar; }' in result styler = self.df.style result = styler.set_table_styles(style) - self.assertTrue(styler is result) + assert styler is result self.assertEqual(styler.table_styles, style) def test_table_attributes(self): attributes = 'class="foo" data-bar' styler = Styler(self.df, table_attributes=attributes) result = styler.render() - self.assertTrue('class="foo" data-bar' in result) + assert 'class="foo" data-bar' in result result = self.df.style.set_table_attributes(attributes).render() - self.assertTrue('class="foo" data-bar' in result) + assert 'class="foo" data-bar' in result def test_precision(self): with pd.option_context('display.precision', 10): @@ -412,7 +412,7 @@ def test_precision(self): self.assertEqual(s.precision, 2) s2 = s.set_precision(4) - self.assertTrue(s is s2) + assert s is s2 self.assertEqual(s.precision, 4) def test_apply_none(self): @@ -485,12 +485,10 @@ def test_display_format(self): df = pd.DataFrame(np.random.random(size=(2, 2))) ctx = df.style.format("{:0.1f}")._translate() - self.assertTrue(all(['display_value' in c for c in row] - for row in ctx['body'])) - self.assertTrue(all([len(c['display_value']) <= 3 for c in row[1:]] - for row in ctx['body'])) - self.assertTrue( - len(ctx['body'][0][1]['display_value'].lstrip('-')) <= 3) + assert all(['display_value' in c for c in row] for row in ctx['body']) + assert (all([len(c['display_value']) <= 3 for c in row[1:]] + for row in ctx['body'])) + assert len(ctx['body'][0][1]['display_value'].lstrip('-')) <= 3 def test_display_format_raises(self): df = pd.DataFrame(np.random.randn(2, 2)) @@ -711,7 +709,7 @@ def test_background_gradient(self): for axis in [0, 1, 'index', 'columns']: for cmap in [None, 'YlOrRd']: result = df.style.background_gradient(cmap=cmap)._compute().ctx - self.assertTrue(all("#" in x[0] for x in result.values())) + assert all("#" in x[0] for x in result.values()) self.assertEqual(result[(0, 0)], result[(0, 1)]) self.assertEqual(result[(1, 0)], result[(1, 1)]) diff --git a/pandas/tests/io/formats/test_to_html.py b/pandas/tests/io/formats/test_to_html.py index a67bb2fd8eb5c..fd9ae0851635a 100644 --- a/pandas/tests/io/formats/test_to_html.py +++ b/pandas/tests/io/formats/test_to_html.py @@ -30,10 +30,10 @@ def check_with_width(df, col_space): # and be very brittle about it. html = df.to_html(col_space=col_space) hdrs = [x for x in html.split(r"\n") if re.search(r"\s]", x)] - self.assertTrue(len(hdrs) > 0) + assert len(hdrs) > 0 for h in hdrs: - self.assertTrue("min-width" in h) - self.assertTrue(str(col_space) in h) + assert "min-width" in h + assert str(col_space) in h df = DataFrame(np.random.random(size=(1, 3))) @@ -45,7 +45,7 @@ def test_to_html_with_empty_string_label(self): data = {'c1': ['a', 'b'], 'c2': ['a', ''], 'data': [1, 2]} df = DataFrame(data).set_index(['c1', 'c2']) res = df.to_html() - self.assertTrue("rowspan" not in res) + assert "rowspan" not in res def test_to_html_unicode(self): df = DataFrame({u('\u03c3'): np.arange(10.)}) @@ -1403,13 +1403,13 @@ def test_to_html_border_option(self): df = DataFrame({'A': [1, 2]}) with pd.option_context('html.border', 0): result = df.to_html() - self.assertTrue('border="0"' in result) - self.assertTrue('border="0"' in df._repr_html_()) + assert 'border="0"' in result + assert 'border="0"' in df._repr_html_() def test_to_html_border_zero(self): df = DataFrame({'A': [1, 2]}) result = df.to_html(border=0) - self.assertTrue('border="0"' in result) + assert 'border="0"' in result def test_to_html(self): # big mixed diff --git a/pandas/tests/io/json/test_json_table_schema.py b/pandas/tests/io/json/test_json_table_schema.py index cbb302ad39dd6..4ec13fa667452 100644 --- a/pandas/tests/io/json/test_json_table_schema.py +++ b/pandas/tests/io/json/test_json_table_schema.py @@ -41,7 +41,7 @@ def test_build_table_schema(self): } self.assertEqual(result, expected) result = build_table_schema(self.df) - self.assertTrue("pandas_version" in result) + assert "pandas_version" in result def test_series(self): s = pd.Series([1, 2, 3], name='foo') @@ -51,7 +51,7 @@ def test_series(self): 'primaryKey': ['index']} self.assertEqual(result, expected) result = build_table_schema(s) - self.assertTrue('pandas_version' in result) + assert 'pandas_version' in result def tets_series_unnamed(self): result = build_table_schema(pd.Series([1, 2, 3]), version=False) @@ -194,7 +194,7 @@ def test_build_series(self): result = s.to_json(orient='table', date_format='iso') result = json.loads(result, object_pairs_hook=OrderedDict) - self.assertTrue("pandas_version" in result['schema']) + assert "pandas_version" in result['schema'] result['schema'].pop('pandas_version') fields = [{'name': 'id', 'type': 'integer'}, @@ -217,7 +217,7 @@ def test_to_json(self): result = df.to_json(orient='table', date_format='iso') result = json.loads(result, object_pairs_hook=OrderedDict) - self.assertTrue("pandas_version" in result['schema']) + assert "pandas_version" in result['schema'] result['schema'].pop('pandas_version') fields = [ diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index ac9e4f77db6ac..e7a04e12d7fa4 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -349,38 +349,38 @@ def test_frame_from_json_bad_data(self): def test_frame_from_json_nones(self): df = DataFrame([[1, 2], [4, 5, 6]]) unser = read_json(df.to_json()) - self.assertTrue(np.isnan(unser[2][0])) + assert np.isnan(unser[2][0]) df = DataFrame([['1', '2'], ['4', '5', '6']]) unser = read_json(df.to_json()) - self.assertTrue(np.isnan(unser[2][0])) + assert np.isnan(unser[2][0]) unser = read_json(df.to_json(), dtype=False) - self.assertTrue(unser[2][0] is None) + assert unser[2][0] is None unser = read_json(df.to_json(), convert_axes=False, dtype=False) - self.assertTrue(unser['2']['0'] is None) + assert unser['2']['0'] is None unser = read_json(df.to_json(), numpy=False) - self.assertTrue(np.isnan(unser[2][0])) + assert np.isnan(unser[2][0]) unser = read_json(df.to_json(), numpy=False, dtype=False) - self.assertTrue(unser[2][0] is None) + assert unser[2][0] is None unser = read_json(df.to_json(), numpy=False, convert_axes=False, dtype=False) - self.assertTrue(unser['2']['0'] is None) + assert unser['2']['0'] is None # infinities get mapped to nulls which get mapped to NaNs during # deserialisation df = DataFrame([[1, 2], [4, 5, 6]]) df.loc[0, 2] = np.inf unser = read_json(df.to_json()) - self.assertTrue(np.isnan(unser[2][0])) + assert np.isnan(unser[2][0]) unser = read_json(df.to_json(), dtype=False) - self.assertTrue(np.isnan(unser[2][0])) + assert np.isnan(unser[2][0]) df.loc[0, 2] = np.NINF unser = read_json(df.to_json()) - self.assertTrue(np.isnan(unser[2][0])) + assert np.isnan(unser[2][0]) unser = read_json(df.to_json(), dtype=False) - self.assertTrue(np.isnan(unser[2][0])) + assert np.isnan(unser[2][0]) @pytest.mark.skipif(is_platform_32bit(), reason="not compliant on 32-bit, xref #15865") @@ -427,7 +427,7 @@ def test_frame_empty_mixedtype(self): # mixed type df = DataFrame(columns=['jim', 'joe']) df['joe'] = df['joe'].astype('i8') - self.assertTrue(df._is_mixed_type) + assert df._is_mixed_type assert_frame_equal(read_json(df.to_json(), dtype=dict(df.dtypes)), df, check_index_type=False) @@ -440,7 +440,7 @@ def test_frame_mixedtype_orient(self): # GH10289 df = DataFrame(vals, index=list('abcd'), columns=['1st', '2nd', '3rd', '4th', '5th']) - self.assertTrue(df._is_mixed_type) + assert df._is_mixed_type right = df.copy() for orient in ['split', 'index', 'columns']: @@ -637,7 +637,7 @@ def test_axis_dates(self): json = self.ts.to_json() result = read_json(json, typ='series') assert_series_equal(result, self.ts, check_names=False) - self.assertTrue(result.name is None) + assert result.name is None def test_convert_dates(self): diff --git a/pandas/tests/io/json/test_ujson.py b/pandas/tests/io/json/test_ujson.py index 037e47bfc2a46..12d5cd14197b8 100644 --- a/pandas/tests/io/json/test_ujson.py +++ b/pandas/tests/io/json/test_ujson.py @@ -157,7 +157,7 @@ def test_encodeDoubleTinyExponential(self): num = -1e-45 self.assertEqual(num, ujson.decode(ujson.encode(num))) num = -1e-145 - self.assertTrue(np.allclose(num, ujson.decode(ujson.encode(num)))) + assert np.allclose(num, ujson.decode(ujson.encode(num))) def test_encodeDictWithUnicodeKeys(self): input = {u("key1"): u("value1"), u("key1"): @@ -1189,15 +1189,15 @@ def testArrayNumpyExcept(self): def testArrayNumpyLabelled(self): input = {'a': []} output = ujson.loads(ujson.dumps(input), numpy=True, labelled=True) - self.assertTrue((np.empty((1, 0)) == output[0]).all()) - self.assertTrue((np.array(['a']) == output[1]).all()) - self.assertTrue(output[2] is None) + assert (np.empty((1, 0)) == output[0]).all() + assert (np.array(['a']) == output[1]).all() + assert output[2] is None input = [{'a': 42}] output = ujson.loads(ujson.dumps(input), numpy=True, labelled=True) - self.assertTrue((np.array([42]) == output[0]).all()) - self.assertTrue(output[1] is None) - self.assertTrue((np.array([u('a')]) == output[2]).all()) + assert (np.array([42]) == output[0]).all() + assert output[1] is None + assert (np.array([u('a')]) == output[2]).all() # Write out the dump explicitly so there is no dependency on iteration # order GH10837 @@ -1206,18 +1206,18 @@ def testArrayNumpyLabelled(self): output = ujson.loads(input_dumps, numpy=True, labelled=True) expectedvals = np.array( [42, 31, 24, 99, 2.4, 78], dtype=int).reshape((3, 2)) - self.assertTrue((expectedvals == output[0]).all()) - self.assertTrue(output[1] is None) - self.assertTrue((np.array([u('a'), 'b']) == output[2]).all()) + assert (expectedvals == output[0]).all() + assert output[1] is None + assert (np.array([u('a'), 'b']) == output[2]).all() input_dumps = ('{"1": {"a": 42, "b":31}, "2": {"a": 24, "c": 99}, ' '"3": {"a": 2.4, "b": 78}}') output = ujson.loads(input_dumps, numpy=True, labelled=True) expectedvals = np.array( [42, 31, 24, 99, 2.4, 78], dtype=int).reshape((3, 2)) - self.assertTrue((expectedvals == output[0]).all()) - self.assertTrue((np.array(['1', '2', '3']) == output[1]).all()) - self.assertTrue((np.array(['a', 'b']) == output[2]).all()) + assert (expectedvals == output[0]).all() + assert (np.array(['1', '2', '3']) == output[1]).all() + assert (np.array(['a', 'b']) == output[2]).all() class PandasJSONTests(TestCase): @@ -1228,27 +1228,27 @@ def testDataFrame(self): # column indexed outp = DataFrame(ujson.decode(ujson.encode(df))) - self.assertTrue((df == outp).values.all()) + assert (df == outp).values.all() tm.assert_index_equal(df.columns, outp.columns) tm.assert_index_equal(df.index, outp.index) dec = _clean_dict(ujson.decode(ujson.encode(df, orient="split"))) outp = DataFrame(**dec) - self.assertTrue((df == outp).values.all()) + assert (df == outp).values.all() tm.assert_index_equal(df.columns, outp.columns) tm.assert_index_equal(df.index, outp.index) outp = DataFrame(ujson.decode(ujson.encode(df, orient="records"))) outp.index = df.index - self.assertTrue((df == outp).values.all()) + assert (df == outp).values.all() tm.assert_index_equal(df.columns, outp.columns) outp = DataFrame(ujson.decode(ujson.encode(df, orient="values"))) outp.index = df.index - self.assertTrue((df.values == outp.values).all()) + assert (df.values == outp.values).all() outp = DataFrame(ujson.decode(ujson.encode(df, orient="index"))) - self.assertTrue((df.transpose() == outp).values.all()) + assert (df.transpose() == outp).values.all() tm.assert_index_equal(df.transpose().columns, outp.columns) tm.assert_index_equal(df.transpose().index, outp.index) @@ -1258,20 +1258,20 @@ def testDataFrameNumpy(self): # column indexed outp = DataFrame(ujson.decode(ujson.encode(df), numpy=True)) - self.assertTrue((df == outp).values.all()) + assert (df == outp).values.all() tm.assert_index_equal(df.columns, outp.columns) tm.assert_index_equal(df.index, outp.index) dec = _clean_dict(ujson.decode(ujson.encode(df, orient="split"), numpy=True)) outp = DataFrame(**dec) - self.assertTrue((df == outp).values.all()) + assert (df == outp).values.all() tm.assert_index_equal(df.columns, outp.columns) tm.assert_index_equal(df.index, outp.index) outp = DataFrame(ujson.decode(ujson.encode(df, orient="index"), numpy=True)) - self.assertTrue((df.transpose() == outp).values.all()) + assert (df.transpose() == outp).values.all() tm.assert_index_equal(df.transpose().columns, outp.columns) tm.assert_index_equal(df.transpose().index, outp.index) @@ -1283,27 +1283,23 @@ def testDataFrameNested(self): exp = {'df1': ujson.decode(ujson.encode(df)), 'df2': ujson.decode(ujson.encode(df))} - self.assertTrue(ujson.decode(ujson.encode(nested)) == exp) + assert ujson.decode(ujson.encode(nested)) == exp exp = {'df1': ujson.decode(ujson.encode(df, orient="index")), 'df2': ujson.decode(ujson.encode(df, orient="index"))} - self.assertTrue(ujson.decode( - ujson.encode(nested, orient="index")) == exp) + assert ujson.decode(ujson.encode(nested, orient="index")) == exp exp = {'df1': ujson.decode(ujson.encode(df, orient="records")), 'df2': ujson.decode(ujson.encode(df, orient="records"))} - self.assertTrue(ujson.decode( - ujson.encode(nested, orient="records")) == exp) + assert ujson.decode(ujson.encode(nested, orient="records")) == exp exp = {'df1': ujson.decode(ujson.encode(df, orient="values")), 'df2': ujson.decode(ujson.encode(df, orient="values"))} - self.assertTrue(ujson.decode( - ujson.encode(nested, orient="values")) == exp) + assert ujson.decode(ujson.encode(nested, orient="values")) == exp exp = {'df1': ujson.decode(ujson.encode(df, orient="split")), 'df2': ujson.decode(ujson.encode(df, orient="split"))} - self.assertTrue(ujson.decode( - ujson.encode(nested, orient="split")) == exp) + assert ujson.decode(ujson.encode(nested, orient="split")) == exp def testDataFrameNumpyLabelled(self): df = DataFrame([[1, 2, 3], [4, 5, 6]], index=[ @@ -1312,19 +1308,19 @@ def testDataFrameNumpyLabelled(self): # column indexed outp = DataFrame(*ujson.decode(ujson.encode(df), numpy=True, labelled=True)) - self.assertTrue((df.T == outp).values.all()) + assert (df.T == outp).values.all() tm.assert_index_equal(df.T.columns, outp.columns) tm.assert_index_equal(df.T.index, outp.index) outp = DataFrame(*ujson.decode(ujson.encode(df, orient="records"), numpy=True, labelled=True)) outp.index = df.index - self.assertTrue((df == outp).values.all()) + assert (df == outp).values.all() tm.assert_index_equal(df.columns, outp.columns) outp = DataFrame(*ujson.decode(ujson.encode(df, orient="index"), numpy=True, labelled=True)) - self.assertTrue((df == outp).values.all()) + assert (df == outp).values.all() tm.assert_index_equal(df.columns, outp.columns) tm.assert_index_equal(df.index, outp.index) @@ -1384,27 +1380,23 @@ def testSeriesNested(self): exp = {'s1': ujson.decode(ujson.encode(s)), 's2': ujson.decode(ujson.encode(s))} - self.assertTrue(ujson.decode(ujson.encode(nested)) == exp) + assert ujson.decode(ujson.encode(nested)) == exp exp = {'s1': ujson.decode(ujson.encode(s, orient="split")), 's2': ujson.decode(ujson.encode(s, orient="split"))} - self.assertTrue(ujson.decode( - ujson.encode(nested, orient="split")) == exp) + assert ujson.decode(ujson.encode(nested, orient="split")) == exp exp = {'s1': ujson.decode(ujson.encode(s, orient="records")), 's2': ujson.decode(ujson.encode(s, orient="records"))} - self.assertTrue(ujson.decode( - ujson.encode(nested, orient="records")) == exp) + assert ujson.decode(ujson.encode(nested, orient="records")) == exp exp = {'s1': ujson.decode(ujson.encode(s, orient="values")), 's2': ujson.decode(ujson.encode(s, orient="values"))} - self.assertTrue(ujson.decode( - ujson.encode(nested, orient="values")) == exp) + assert ujson.decode(ujson.encode(nested, orient="values")) == exp exp = {'s1': ujson.decode(ujson.encode(s, orient="index")), 's2': ujson.decode(ujson.encode(s, orient="index"))} - self.assertTrue(ujson.decode( - ujson.encode(nested, orient="index")) == exp) + assert ujson.decode(ujson.encode(nested, orient="index")) == exp def testIndex(self): i = Index([23, 45, 18, 98, 43, 11], name="index") @@ -1419,13 +1411,13 @@ def testIndex(self): dec = _clean_dict(ujson.decode(ujson.encode(i, orient="split"))) outp = Index(**dec) tm.assert_index_equal(i, outp) - self.assertTrue(i.name == outp.name) + assert i.name == outp.name dec = _clean_dict(ujson.decode(ujson.encode(i, orient="split"), numpy=True)) outp = Index(**dec) tm.assert_index_equal(i, outp) - self.assertTrue(i.name == outp.name) + assert i.name == outp.name outp = Index(ujson.decode(ujson.encode(i, orient="values")), name='index') @@ -1634,7 +1626,7 @@ def test_encodeSet(self): dec = ujson.decode(enc) for v in dec: - self.assertTrue(v in s) + assert v in s def _clean_dict(d): diff --git a/pandas/tests/io/parser/c_parser_only.py b/pandas/tests/io/parser/c_parser_only.py index 7ce8c61777bc7..ac2aaf1f5e4ed 100644 --- a/pandas/tests/io/parser/c_parser_only.py +++ b/pandas/tests/io/parser/c_parser_only.py @@ -154,8 +154,8 @@ def error(val): # round-trip should match float() self.assertEqual(roundtrip_val, float(text[2:])) - self.assertTrue(sum(precise_errors) <= sum(normal_errors)) - self.assertTrue(max(precise_errors) <= max(normal_errors)) + assert sum(precise_errors) <= sum(normal_errors) + assert max(precise_errors) <= max(normal_errors) def test_pass_dtype_as_recarray(self): if compat.is_platform_windows() and self.low_memory: @@ -195,8 +195,8 @@ def test_usecols_dtypes(self): converters={'a': str}, dtype={'b': int, 'c': float}, ) - self.assertTrue((result.dtypes == [object, np.int, np.float]).all()) - self.assertTrue((result2.dtypes == [object, np.float]).all()) + assert (result.dtypes == [object, np.int, np.float]).all() + assert (result2.dtypes == [object, np.float]).all() def test_disable_bool_parsing(self): # #2090 @@ -208,7 +208,7 @@ def test_disable_bool_parsing(self): No,No,No""" result = self.read_csv(StringIO(data), dtype=object) - self.assertTrue((result.dtypes == object).all()) + assert (result.dtypes == object).all() result = self.read_csv(StringIO(data), dtype=object, na_filter=False) self.assertEqual(result['B'][2], '') @@ -388,7 +388,7 @@ def test_read_nrows_large(self): df = self.read_csv(StringIO(test_input), sep='\t', nrows=1010) - self.assertTrue(df.size == 1010 * 10) + assert df.size == 1010 * 10 def test_float_precision_round_trip_with_text(self): # gh-15140 - This should not segfault on Python 2.7+ diff --git a/pandas/tests/io/parser/common.py b/pandas/tests/io/parser/common.py index e3df02a948080..9677106f37232 100644 --- a/pandas/tests/io/parser/common.py +++ b/pandas/tests/io/parser/common.py @@ -693,7 +693,7 @@ def test_missing_trailing_delimiters(self): 1,3,3, 1,4,5""" result = self.read_csv(StringIO(data)) - self.assertTrue(result['D'].isnull()[1:].all()) + assert result['D'].isnull()[1:].all() def test_skipinitialspace(self): s = ('"09-Apr-2012", "01:10:18.300", 2456026.548822908, 12849, ' @@ -707,7 +707,7 @@ def test_skipinitialspace(self): # it's 33 columns result = self.read_csv(sfile, names=lrange(33), na_values=['-9999.0'], header=None, skipinitialspace=True) - self.assertTrue(pd.isnull(result.iloc[0, 29])) + assert pd.isnull(result.iloc[0, 29]) def test_utf16_bom_skiprows(self): # #2298 @@ -794,8 +794,8 @@ def test_escapechar(self): quotechar='"', encoding='utf-8') self.assertEqual(result['SEARCH_TERM'][2], 'SLAGBORD, "Bergslagen", IKEA:s 1700-tals serie') - self.assertTrue(np.array_equal(result.columns, - ['SEARCH_TERM', 'ACTUAL_URL'])) + tm.assert_index_equal(result.columns, + Index(['SEARCH_TERM', 'ACTUAL_URL'])) def test_int64_min_issues(self): # #2599 @@ -831,7 +831,7 @@ def test_parse_integers_above_fp_precision(self): 17007000002000192, 17007000002000194]}) - self.assertTrue(np.array_equal(result['Numbers'], expected['Numbers'])) + assert np.array_equal(result['Numbers'], expected['Numbers']) def test_chunks_have_consistent_numerical_type(self): integers = [str(i) for i in range(499999)] @@ -840,7 +840,7 @@ def test_chunks_have_consistent_numerical_type(self): with tm.assert_produces_warning(False): df = self.read_csv(StringIO(data)) # Assert that types were coerced. - self.assertTrue(type(df.a[0]) is np.float64) + assert type(df.a[0]) is np.float64 self.assertEqual(df.a.dtype, np.float) def test_warn_if_chunks_have_mismatched_type(self): @@ -862,10 +862,10 @@ def test_integer_overflow_bug(self): data = "65248E10 11\n55555E55 22\n" result = self.read_csv(StringIO(data), header=None, sep=' ') - self.assertTrue(result[0].dtype == np.float64) + assert result[0].dtype == np.float64 result = self.read_csv(StringIO(data), header=None, sep=r'\s+') - self.assertTrue(result[0].dtype == np.float64) + assert result[0].dtype == np.float64 def test_catch_too_many_names(self): # see gh-5156 @@ -953,7 +953,7 @@ def test_int64_overflow(self): # 13007854817840016671868 > UINT64_MAX, so this # will overflow and return object as the dtype. result = self.read_csv(StringIO(data)) - self.assertTrue(result['ID'].dtype == object) + assert result['ID'].dtype == object # 13007854817840016671868 > UINT64_MAX, so attempts # to cast to either int64 or uint64 will result in diff --git a/pandas/tests/io/parser/converters.py b/pandas/tests/io/parser/converters.py index 6cea0f3e7b36c..e10ee016b749a 100644 --- a/pandas/tests/io/parser/converters.py +++ b/pandas/tests/io/parser/converters.py @@ -133,7 +133,7 @@ def convert_score(x): result = self.read_csv(fh, converters={'score': convert_score, 'days': convert_days}, na_values=['', None]) - self.assertTrue(pd.isnull(result['days'][1])) + assert pd.isnull(result['days'][1]) fh = StringIO(data) result2 = self.read_csv(fh, converters={'score': convert_score, diff --git a/pandas/tests/io/parser/index_col.py b/pandas/tests/io/parser/index_col.py index 168f6eda46ed1..6283104dffd70 100644 --- a/pandas/tests/io/parser/index_col.py +++ b/pandas/tests/io/parser/index_col.py @@ -63,7 +63,7 @@ def test_infer_index_col(self): baz,7,8,9 """ data = self.read_csv(StringIO(data)) - self.assertTrue(data.index.equals(Index(['foo', 'bar', 'baz']))) + assert data.index.equals(Index(['foo', 'bar', 'baz'])) def test_empty_index_col_scenarios(self): data = 'x,y,z' diff --git a/pandas/tests/io/parser/na_values.py b/pandas/tests/io/parser/na_values.py index cf29dbdfef49d..787fa304f84b2 100644 --- a/pandas/tests/io/parser/na_values.py +++ b/pandas/tests/io/parser/na_values.py @@ -249,7 +249,7 @@ def test_na_trailing_columns(self): result = self.read_csv(StringIO(data)) self.assertEqual(result['Date'][1], '2012-05-12') - self.assertTrue(result['UnitPrice'].isnull().all()) + assert result['UnitPrice'].isnull().all() def test_na_values_scalar(self): # see gh-12224 diff --git a/pandas/tests/io/parser/parse_dates.py b/pandas/tests/io/parser/parse_dates.py index 3833fa3d7ff4e..dfccf48b03be3 100644 --- a/pandas/tests/io/parser/parse_dates.py +++ b/pandas/tests/io/parser/parse_dates.py @@ -461,7 +461,7 @@ def test_parse_dates_empty_string(self): data = "Date, test\n2012-01-01, 1\n,2" result = self.read_csv(StringIO(data), parse_dates=["Date"], na_filter=False) - self.assertTrue(result['Date'].isnull()[1]) + assert result['Date'].isnull()[1] def test_parse_dates_noconvert_thousands(self): # see gh-14066 @@ -520,7 +520,7 @@ def test_parse_date_time(self): datetime(2008, 2, 4, 6, 8, 0)]) result = conv.parse_date_time(dates, times) - self.assertTrue((result == expected).all()) + assert (result == expected).all() data = """\ date, time, a, b @@ -551,7 +551,7 @@ def test_parse_date_fields(self): days = np.array([3, 4]) result = conv.parse_date_fields(years, months, days) expected = np.array([datetime(2007, 1, 3), datetime(2008, 2, 4)]) - self.assertTrue((result == expected).all()) + assert (result == expected).all() data = ("year, month, day, a\n 2001 , 01 , 10 , 10.\n" "2001 , 02 , 1 , 11.") @@ -575,7 +575,7 @@ def test_datetime_six_col(self): result = conv.parse_all_fields(years, months, days, hours, minutes, seconds) - self.assertTrue((result == expected).all()) + assert (result == expected).all() data = """\ year, month, day, hour, minute, second, a, b diff --git a/pandas/tests/io/parser/test_network.py b/pandas/tests/io/parser/test_network.py index e3a1b42fd4d45..046590a3ae4c9 100644 --- a/pandas/tests/io/parser/test_network.py +++ b/pandas/tests/io/parser/test_network.py @@ -60,14 +60,14 @@ def test_parse_public_s3_bucket(self): for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]: df = read_csv('s3://pandas-test/tips.csv' + ext, compression=comp) - self.assertTrue(isinstance(df, DataFrame)) + assert isinstance(df, DataFrame) assert not df.empty tm.assert_frame_equal(read_csv( tm.get_data_path('tips.csv')), df) # Read public file from bucket with not-public contents df = read_csv('s3://cant_get_it/tips.csv') - self.assertTrue(isinstance(df, DataFrame)) + assert isinstance(df, DataFrame) assert not df.empty tm.assert_frame_equal(read_csv(tm.get_data_path('tips.csv')), df) @@ -75,7 +75,7 @@ def test_parse_public_s3_bucket(self): def test_parse_public_s3n_bucket(self): # Read from AWS s3 as "s3n" URL df = read_csv('s3n://pandas-test/tips.csv', nrows=10) - self.assertTrue(isinstance(df, DataFrame)) + assert isinstance(df, DataFrame) assert not df.empty tm.assert_frame_equal(read_csv( tm.get_data_path('tips.csv')).iloc[:10], df) @@ -84,7 +84,7 @@ def test_parse_public_s3n_bucket(self): def test_parse_public_s3a_bucket(self): # Read from AWS s3 as "s3a" URL df = read_csv('s3a://pandas-test/tips.csv', nrows=10) - self.assertTrue(isinstance(df, DataFrame)) + assert isinstance(df, DataFrame) assert not df.empty tm.assert_frame_equal(read_csv( tm.get_data_path('tips.csv')).iloc[:10], df) @@ -94,7 +94,7 @@ def test_parse_public_s3_bucket_nrows(self): for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]: df = read_csv('s3://pandas-test/tips.csv' + ext, nrows=10, compression=comp) - self.assertTrue(isinstance(df, DataFrame)) + assert isinstance(df, DataFrame) assert not df.empty tm.assert_frame_equal(read_csv( tm.get_data_path('tips.csv')).iloc[:10], df) @@ -112,7 +112,7 @@ def test_parse_public_s3_bucket_chunked(self): # Read a couple of chunks and make sure we see them # properly. df = df_reader.get_chunk() - self.assertTrue(isinstance(df, DataFrame)) + assert isinstance(df, DataFrame) assert not df.empty true_df = local_tips.iloc[ chunksize * i_chunk: chunksize * (i_chunk + 1)] @@ -131,7 +131,7 @@ def test_parse_public_s3_bucket_chunked_python(self): for i_chunk in [0, 1, 2]: # Read a couple of chunks and make sure we see them properly. df = df_reader.get_chunk() - self.assertTrue(isinstance(df, DataFrame)) + assert isinstance(df, DataFrame) assert not df.empty true_df = local_tips.iloc[ chunksize * i_chunk: chunksize * (i_chunk + 1)] @@ -142,7 +142,7 @@ def test_parse_public_s3_bucket_python(self): for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]: df = read_csv('s3://pandas-test/tips.csv' + ext, engine='python', compression=comp) - self.assertTrue(isinstance(df, DataFrame)) + assert isinstance(df, DataFrame) assert not df.empty tm.assert_frame_equal(read_csv( tm.get_data_path('tips.csv')), df) @@ -152,7 +152,7 @@ def test_infer_s3_compression(self): for ext in ['', '.gz', '.bz2']: df = read_csv('s3://pandas-test/tips.csv' + ext, engine='python', compression='infer') - self.assertTrue(isinstance(df, DataFrame)) + assert isinstance(df, DataFrame) assert not df.empty tm.assert_frame_equal(read_csv( tm.get_data_path('tips.csv')), df) @@ -162,7 +162,7 @@ def test_parse_public_s3_bucket_nrows_python(self): for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]: df = read_csv('s3://pandas-test/tips.csv' + ext, engine='python', nrows=10, compression=comp) - self.assertTrue(isinstance(df, DataFrame)) + assert isinstance(df, DataFrame) assert not df.empty tm.assert_frame_equal(read_csv( tm.get_data_path('tips.csv')).iloc[:10], df) diff --git a/pandas/tests/io/parser/test_read_fwf.py b/pandas/tests/io/parser/test_read_fwf.py index ffb04c52e8d93..90231e01d0173 100644 --- a/pandas/tests/io/parser/test_read_fwf.py +++ b/pandas/tests/io/parser/test_read_fwf.py @@ -166,7 +166,7 @@ def test_fwf_regression(self): for c in df.columns: res = df.loc[:, c] - self.assertTrue(len(res)) + assert len(res) def test_fwf_for_uint8(self): data = """1421302965.213420 PRI=3 PGN=0xef00 DST=0x17 SRC=0x28 04 154 00 00 00 00 00 127 diff --git a/pandas/tests/io/parser/test_textreader.py b/pandas/tests/io/parser/test_textreader.py index bf1d8d4f3e27c..ad37f828bba6f 100644 --- a/pandas/tests/io/parser/test_textreader.py +++ b/pandas/tests/io/parser/test_textreader.py @@ -253,14 +253,14 @@ def _make_reader(**kwds): self.assertEqual(result[0].dtype, 'S5') ex_values = np.array(['a', 'aa', 'aaa', 'aaaa', 'aaaaa'], dtype='S5') - self.assertTrue((result[0] == ex_values).all()) + assert (result[0] == ex_values).all() self.assertEqual(result[1].dtype, 'i4') reader = _make_reader(dtype='S4') result = reader.read() self.assertEqual(result[0].dtype, 'S4') ex_values = np.array(['a', 'aa', 'aaa', 'aaaa', 'aaaa'], dtype='S4') - self.assertTrue((result[0] == ex_values).all()) + assert (result[0] == ex_values).all() self.assertEqual(result[1].dtype, 'S4') def test_numpy_string_dtype_as_recarray(self): @@ -279,7 +279,7 @@ def _make_reader(**kwds): result = reader.read() self.assertEqual(result['0'].dtype, 'S4') ex_values = np.array(['a', 'aa', 'aaa', 'aaaa', 'aaaa'], dtype='S4') - self.assertTrue((result['0'] == ex_values).all()) + assert (result['0'] == ex_values).all() self.assertEqual(result['1'].dtype, 'S4') def test_pass_dtype(self): @@ -325,8 +325,8 @@ def _make_reader(**kwds): exp = _make_reader().read() self.assertEqual(len(result), 2) - self.assertTrue((result[1] == exp[1]).all()) - self.assertTrue((result[2] == exp[2]).all()) + assert (result[1] == exp[1]).all() + assert (result[2] == exp[2]).all() def test_cr_delimited(self): def _test(text, **kwargs): @@ -392,7 +392,7 @@ def test_empty_csv_input(self): # GH14867 df = read_csv(StringIO(), chunksize=20, header=None, names=['a', 'b', 'c']) - self.assertTrue(isinstance(df, TextFileReader)) + assert isinstance(df, TextFileReader) def assert_array_dicts_equal(left, right): diff --git a/pandas/tests/io/parser/usecols.py b/pandas/tests/io/parser/usecols.py index db8e5b7653a51..b52106d9e8595 100644 --- a/pandas/tests/io/parser/usecols.py +++ b/pandas/tests/io/parser/usecols.py @@ -44,8 +44,8 @@ def test_usecols(self): exp = self.read_csv(StringIO(data)) self.assertEqual(len(result.columns), 2) - self.assertTrue((result['b'] == exp['b']).all()) - self.assertTrue((result['c'] == exp['c']).all()) + assert (result['b'] == exp['b']).all() + assert (result['c'] == exp['c']).all() tm.assert_frame_equal(result, result2) diff --git a/pandas/tests/io/sas/test_sas7bdat.py b/pandas/tests/io/sas/test_sas7bdat.py index 69073a90e9669..afd40e7017cff 100644 --- a/pandas/tests/io/sas/test_sas7bdat.py +++ b/pandas/tests/io/sas/test_sas7bdat.py @@ -75,7 +75,7 @@ def test_iterator_loop(self): y = 0 for x in rdr: y += x.shape[0] - self.assertTrue(y == rdr.row_count) + assert y == rdr.row_count rdr.close() def test_iterator_read_too_much(self): diff --git a/pandas/tests/io/sas/test_xport.py b/pandas/tests/io/sas/test_xport.py index fe2f7cb4bf4be..2ed7ebbbfce32 100644 --- a/pandas/tests/io/sas/test_xport.py +++ b/pandas/tests/io/sas/test_xport.py @@ -40,7 +40,7 @@ def test1_basic(self): # Test reading beyond end of file reader = read_sas(self.file01, format="xport", iterator=True) data = reader.read(num_rows + 100) - self.assertTrue(data.shape[0] == num_rows) + assert data.shape[0] == num_rows reader.close() # Test incremental read with `read` method. @@ -61,7 +61,7 @@ def test1_basic(self): for x in reader: m += x.shape[0] reader.close() - self.assertTrue(m == num_rows) + assert m == num_rows # Read full file with `read_sas` method data = read_sas(self.file01) diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py index 700915b81dd31..3eee3f619f33d 100644 --- a/pandas/tests/io/test_common.py +++ b/pandas/tests/io/test_common.py @@ -39,7 +39,7 @@ def test_expand_user(self): expanded_name = common._expand_user(filename) self.assertNotEqual(expanded_name, filename) - self.assertTrue(isabs(expanded_name)) + assert isabs(expanded_name) self.assertEqual(os.path.expanduser(filename), expanded_name) def test_expand_user_normal_path(self): @@ -69,7 +69,7 @@ def test_get_filepath_or_buffer_with_path(self): filename = '~/sometest' filepath_or_buffer, _, _ = common.get_filepath_or_buffer(filename) self.assertNotEqual(filepath_or_buffer, filename) - self.assertTrue(isabs(filepath_or_buffer)) + assert isabs(filepath_or_buffer) self.assertEqual(os.path.expanduser(filename), filepath_or_buffer) def test_get_filepath_or_buffer_with_buffer(self): @@ -127,7 +127,7 @@ def test_get_attr(self): attrs.append('__next__') for attr in attrs: - self.assertTrue(hasattr(wrapper, attr)) + assert hasattr(wrapper, attr) assert not hasattr(wrapper, 'foo') diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py index 2a3a4992ead71..6092cd4180675 100644 --- a/pandas/tests/io/test_excel.py +++ b/pandas/tests/io/test_excel.py @@ -656,7 +656,7 @@ def test_reader_closes_file(self): # parses okay read_excel(xlsx, 'Sheet1', index_col=0) - self.assertTrue(f.closed) + assert f.closed def test_creating_and_reading_multiple_sheets(self): # Test reading multiple sheets, from a runtime created excel file @@ -1630,7 +1630,7 @@ def test_to_excel_unicode_filename(self): # xlsaddrs += ["B1", "D1", "F1"] # for xlsaddr in xlsaddrs: # cell = ws.cell(xlsaddr) - # self.assertTrue(cell.style.font.bold) + # assert cell.style.font.bold # self.assertEqual(openpyxl.style.Border.BORDER_THIN, # cell.style.borders.top.border_style) # self.assertEqual(openpyxl.style.Border.BORDER_THIN, @@ -1643,7 +1643,7 @@ def test_to_excel_unicode_filename(self): # cell.style.alignment.horizontal) # mergedcells_addrs = ["C1", "E1", "G1"] # for maddr in mergedcells_addrs: - # self.assertTrue(ws.cell(maddr).merged) + # assert ws.cell(maddr).merged # os.remove(filename) def test_excel_010_hemstring(self): @@ -1689,15 +1689,15 @@ def roundtrip(df, header=True, parser_hdr=0, index=True): # no nans for r in range(len(res.index)): for c in range(len(res.columns)): - self.assertTrue(res.iloc[r, c] is not np.nan) + assert res.iloc[r, c] is not np.nan res = roundtrip(DataFrame([0])) self.assertEqual(res.shape, (1, 1)) - self.assertTrue(res.iloc[0, 0] is not np.nan) + assert res.iloc[0, 0] is not np.nan res = roundtrip(DataFrame([0]), False, None) self.assertEqual(res.shape, (1, 2)) - self.assertTrue(res.iloc[0, 0] is not np.nan) + assert res.iloc[0, 0] is not np.nan def test_excel_010_hemstring_raises_NotImplementedError(self): # This test was failing only for j>1 and header=False, @@ -1908,7 +1908,7 @@ def test_to_excel_styleconverter(self): "alignment": {"horizontal": "center", "vertical": "top"}} xlsx_style = _Openpyxl1Writer._convert_to_style(hstyle) - self.assertTrue(xlsx_style.font.bold) + assert xlsx_style.font.bold self.assertEqual(openpyxl.style.Border.BORDER_THIN, xlsx_style.borders.top.border_style) self.assertEqual(openpyxl.style.Border.BORDER_THIN, @@ -2200,7 +2200,7 @@ def test_to_excel_styleconverter(self): "alignment": {"horizontal": "center", "vertical": "top"}} xls_style = _XlwtWriter._convert_to_style(hstyle) - self.assertTrue(xls_style.font.bold) + assert xls_style.font.bold self.assertEqual(xlwt.Borders.THIN, xls_style.borders.top) self.assertEqual(xlwt.Borders.THIN, xls_style.borders.right) self.assertEqual(xlwt.Borders.THIN, xls_style.borders.bottom) @@ -2332,8 +2332,8 @@ def write_cells(self, *args, **kwargs): def check_called(func): func() - self.assertTrue(len(called_save) >= 1) - self.assertTrue(len(called_write_cells) >= 1) + assert len(called_save) >= 1 + assert len(called_write_cells) >= 1 del called_save[:] del called_write_cells[:] diff --git a/pandas/tests/io/test_html.py b/pandas/tests/io/test_html.py index cf08754a18527..db6ab236ee793 100644 --- a/pandas/tests/io/test_html.py +++ b/pandas/tests/io/test_html.py @@ -361,7 +361,7 @@ def test_negative_skiprows(self): def test_multiple_matches(self): url = 'https://docs.python.org/2/' dfs = self.read_html(url, match='Python') - self.assertTrue(len(dfs) > 1) + assert len(dfs) > 1 @network def test_python_docs_table(self): diff --git a/pandas/tests/io/test_packers.py b/pandas/tests/io/test_packers.py index f8923035b3a63..ae1cadcd41496 100644 --- a/pandas/tests/io/test_packers.py +++ b/pandas/tests/io/test_packers.py @@ -163,7 +163,7 @@ def test_numpy_scalar_float(self): def test_numpy_scalar_complex(self): x = np.complex64(np.random.rand() + 1j * np.random.rand()) x_rec = self.encode_decode(x) - self.assertTrue(np.allclose(x, x_rec)) + assert np.allclose(x, x_rec) def test_scalar_float(self): x = np.random.rand() @@ -173,7 +173,7 @@ def test_scalar_float(self): def test_scalar_complex(self): x = np.random.rand() + 1j * np.random.rand() x_rec = self.encode_decode(x) - self.assertTrue(np.allclose(x, x_rec)) + assert np.allclose(x, x_rec) def test_list_numpy_float(self): x = [np.float32(np.random.rand()) for i in range(5)] @@ -192,7 +192,7 @@ def test_list_numpy_float_complex(self): [np.complex128(np.random.rand() + 1j * np.random.rand()) for i in range(5)] x_rec = self.encode_decode(x) - self.assertTrue(np.allclose(x, x_rec)) + assert np.allclose(x, x_rec) def test_list_float(self): x = [np.random.rand() for i in range(5)] @@ -207,7 +207,7 @@ def test_list_float_complex(self): x = [np.random.rand() for i in range(5)] + \ [(np.random.rand() + 1j * np.random.rand()) for i in range(5)] x_rec = self.encode_decode(x) - self.assertTrue(np.allclose(x, x_rec)) + assert np.allclose(x, x_rec) def test_dict_float(self): x = {'foo': 1.0, 'bar': 2.0} @@ -247,8 +247,8 @@ def test_numpy_array_float(self): def test_numpy_array_complex(self): x = (np.random.rand(5) + 1j * np.random.rand(5)).astype(np.complex128) x_rec = self.encode_decode(x) - self.assertTrue(all(map(lambda x, y: x == y, x, x_rec)) and - x.dtype == x_rec.dtype) + assert (all(map(lambda x, y: x == y, x, x_rec)) and + x.dtype == x_rec.dtype) def test_list_mixed(self): x = [1.0, np.float32(3.5), np.complex128(4.25), u('foo')] @@ -613,7 +613,7 @@ def _test_compression(self, compress): assert_frame_equal(value, expected) # make sure that we can write to the new frames for block in value._data.blocks: - self.assertTrue(block.values.flags.writeable) + assert block.values.flags.writeable def test_compression_zlib(self): if not _ZLIB_INSTALLED: @@ -662,7 +662,7 @@ def decompress(ob): # make sure that we can write to the new frames even though # we needed to copy the data for block in value._data.blocks: - self.assertTrue(block.values.flags.writeable) + assert block.values.flags.writeable # mutate the data in some way block.values[0] += rhs[block.dtype] @@ -695,14 +695,14 @@ def _test_small_strings_no_warn(self, compress): empty_unpacked = self.encode_decode(empty, compress=compress) tm.assert_numpy_array_equal(empty_unpacked, empty) - self.assertTrue(empty_unpacked.flags.writeable) + assert empty_unpacked.flags.writeable char = np.array([ord(b'a')], dtype='uint8') with tm.assert_produces_warning(None): char_unpacked = self.encode_decode(char, compress=compress) tm.assert_numpy_array_equal(char_unpacked, char) - self.assertTrue(char_unpacked.flags.writeable) + assert char_unpacked.flags.writeable # if this test fails I am sorry because the interpreter is now in a # bad state where b'a' points to 98 == ord(b'b'). char_unpacked[0] = ord(b'b') @@ -732,15 +732,15 @@ def test_readonly_axis_blosc(self): pytest.skip('no blosc') df1 = DataFrame({'A': list('abcd')}) df2 = DataFrame(df1, index=[1., 2., 3., 4.]) - self.assertTrue(1 in self.encode_decode(df1['A'], compress='blosc')) - self.assertTrue(1. in self.encode_decode(df2['A'], compress='blosc')) + assert 1 in self.encode_decode(df1['A'], compress='blosc') + assert 1. in self.encode_decode(df2['A'], compress='blosc') def test_readonly_axis_zlib(self): # GH11880 df1 = DataFrame({'A': list('abcd')}) df2 = DataFrame(df1, index=[1., 2., 3., 4.]) - self.assertTrue(1 in self.encode_decode(df1['A'], compress='zlib')) - self.assertTrue(1. in self.encode_decode(df2['A'], compress='zlib')) + assert 1 in self.encode_decode(df1['A'], compress='zlib') + assert 1. in self.encode_decode(df2['A'], compress='zlib') def test_readonly_axis_blosc_to_sql(self): # GH11880 diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index 6e7fca9a29e98..ae1b4137c354f 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -338,10 +338,10 @@ def test_api_default_format(self): pandas.set_option('io.hdf.default_format', 'table') _maybe_remove(store, 'df') store.put('df', df) - self.assertTrue(store.get_storer('df').is_table) + assert store.get_storer('df').is_table _maybe_remove(store, 'df2') store.append('df2', df) - self.assertTrue(store.get_storer('df').is_table) + assert store.get_storer('df').is_table pandas.set_option('io.hdf.default_format', None) @@ -358,10 +358,10 @@ def test_api_default_format(self): pandas.set_option('io.hdf.default_format', 'table') df.to_hdf(path, 'df3') with HDFStore(path) as store: - self.assertTrue(store.get_storer('df3').is_table) + assert store.get_storer('df3').is_table df.to_hdf(path, 'df4', append=True) with HDFStore(path) as store: - self.assertTrue(store.get_storer('df4').is_table) + assert store.get_storer('df4').is_table pandas.set_option('io.hdf.default_format', None) @@ -376,14 +376,14 @@ def test_keys(self): store['foo/bar'] = tm.makePanel() self.assertEqual(len(store), 5) expected = set(['/a', '/b', '/c', '/d', '/foo/bar']) - self.assertTrue(set(store.keys()) == expected) - self.assertTrue(set(store) == expected) + assert set(store.keys()) == expected + assert set(store) == expected def test_iter_empty(self): with ensure_clean_store(self.path) as store: # GH 12221 - self.assertTrue(list(store) == []) + assert list(store) == [] def test_repr(self): @@ -549,7 +549,7 @@ def test_reopen_handle(self): # truncation ok here store.open('w') - self.assertTrue(store.is_open) + assert store.is_open self.assertEqual(len(store), 0) store.close() assert not store.is_open @@ -559,7 +559,7 @@ def test_reopen_handle(self): # reopen as read store.open('r') - self.assertTrue(store.is_open) + assert store.is_open self.assertEqual(len(store), 1) self.assertEqual(store._mode, 'r') store.close() @@ -567,7 +567,7 @@ def test_reopen_handle(self): # reopen as append store.open('a') - self.assertTrue(store.is_open) + assert store.is_open self.assertEqual(len(store), 1) self.assertEqual(store._mode, 'a') store.close() @@ -575,7 +575,7 @@ def test_reopen_handle(self): # reopen as append (again) store.open('a') - self.assertTrue(store.is_open) + assert store.is_open self.assertEqual(len(store), 1) self.assertEqual(store._mode, 'a') store.close() @@ -1232,7 +1232,7 @@ def test_ndim_indexables(self): def check_indexers(key, indexers): for i, idx in enumerate(indexers): descr = getattr(store.root, key).table.description - self.assertTrue(getattr(descr, idx)._v_pos == i) + assert getattr(descr, idx)._v_pos == i # append then change (will take existing schema) indexers = ['items', 'major_axis', 'minor_axis'] @@ -2280,7 +2280,7 @@ def test_remove_where(self): # deleted number (entire table) n = store.remove('wp', []) - self.assertTrue(n == 120) + assert n == 120 # non - empty where _maybe_remove(store, 'wp') @@ -2300,7 +2300,7 @@ def test_remove_startstop(self): _maybe_remove(store, 'wp1') store.put('wp1', wp, format='t') n = store.remove('wp1', start=32) - self.assertTrue(n == 120 - 32) + assert n == 120 - 32 result = store.select('wp1') expected = wp.reindex(major_axis=wp.major_axis[:32 // 4]) assert_panel_equal(result, expected) @@ -2308,7 +2308,7 @@ def test_remove_startstop(self): _maybe_remove(store, 'wp2') store.put('wp2', wp, format='t') n = store.remove('wp2', start=-32) - self.assertTrue(n == 32) + assert n == 32 result = store.select('wp2') expected = wp.reindex(major_axis=wp.major_axis[:-32 // 4]) assert_panel_equal(result, expected) @@ -2317,7 +2317,7 @@ def test_remove_startstop(self): _maybe_remove(store, 'wp3') store.put('wp3', wp, format='t') n = store.remove('wp3', stop=32) - self.assertTrue(n == 32) + assert n == 32 result = store.select('wp3') expected = wp.reindex(major_axis=wp.major_axis[32 // 4:]) assert_panel_equal(result, expected) @@ -2325,7 +2325,7 @@ def test_remove_startstop(self): _maybe_remove(store, 'wp4') store.put('wp4', wp, format='t') n = store.remove('wp4', stop=-32) - self.assertTrue(n == 120 - 32) + assert n == 120 - 32 result = store.select('wp4') expected = wp.reindex(major_axis=wp.major_axis[-32 // 4:]) assert_panel_equal(result, expected) @@ -2334,7 +2334,7 @@ def test_remove_startstop(self): _maybe_remove(store, 'wp5') store.put('wp5', wp, format='t') n = store.remove('wp5', start=16, stop=-16) - self.assertTrue(n == 120 - 32) + assert n == 120 - 32 result = store.select('wp5') expected = wp.reindex( major_axis=(wp.major_axis[:16 // 4] @@ -2344,7 +2344,7 @@ def test_remove_startstop(self): _maybe_remove(store, 'wp6') store.put('wp6', wp, format='t') n = store.remove('wp6', start=16, stop=16) - self.assertTrue(n == 0) + assert n == 0 result = store.select('wp6') expected = wp.reindex(major_axis=wp.major_axis) assert_panel_equal(result, expected) @@ -2358,7 +2358,7 @@ def test_remove_startstop(self): crit = 'major_axis=date' store.put('wp7', wp, format='t') n = store.remove('wp7', where=[crit], stop=80) - self.assertTrue(n == 28) + assert n == 28 result = store.select('wp7') expected = wp.reindex(major_axis=wp.major_axis.difference( wp.major_axis[np.arange(0, 20, 3)])) @@ -2377,7 +2377,7 @@ def test_remove_crit(self): crit4 = 'major_axis=date4' store.put('wp3', wp, format='t') n = store.remove('wp3', where=[crit4]) - self.assertTrue(n == 36) + assert n == 36 result = store.select('wp3') expected = wp.reindex( @@ -2392,10 +2392,10 @@ def test_remove_crit(self): crit1 = 'major_axis>date' crit2 = "minor_axis=['A', 'D']" n = store.remove('wp', where=[crit1]) - self.assertTrue(n == 56) + assert n == 56 n = store.remove('wp', where=[crit2]) - self.assertTrue(n == 32) + assert n == 32 result = store['wp'] expected = wp.truncate(after=date).reindex(minor=['B', 'C']) @@ -2819,7 +2819,7 @@ def test_frame(self): df['foo'] = np.random.randn(len(df)) store['df'] = df recons = store['df'] - self.assertTrue(recons._data.is_consolidated()) + assert recons._data.is_consolidated() # empty self._check_roundtrip(df[:0], tm.assert_frame_equal) @@ -4184,7 +4184,7 @@ def test_start_stop_table(self): # out of range result = store.select( 'df', "columns=['A']", start=30, stop=40) - self.assertTrue(len(result) == 0) + assert len(result) == 0 expected = df.loc[30:40, ['A']] tm.assert_frame_equal(result, expected) @@ -4495,8 +4495,7 @@ def do_copy(f=None, new_f=None, keys=None, if propindexes: for a in orig_t.axes: if a.is_indexed: - self.assertTrue( - new_t[a.name].is_indexed) + assert new_t[a.name].is_indexed finally: safe_close(store) @@ -4803,8 +4802,8 @@ def test_duplicate_column_name(self): other = read_hdf(path, 'df') tm.assert_frame_equal(df, other) - self.assertTrue(df.equals(other)) - self.assertTrue(other.equals(df)) + assert df.equals(other) + assert other.equals(df) def test_round_trip_equals(self): # GH 9330 @@ -4814,8 +4813,8 @@ def test_round_trip_equals(self): df.to_hdf(path, 'df', format='table') other = read_hdf(path, 'df') tm.assert_frame_equal(df, other) - self.assertTrue(df.equals(other)) - self.assertTrue(other.equals(df)) + assert df.equals(other) + assert other.equals(df) def test_preserve_timedeltaindex_type(self): # GH9635 @@ -4851,7 +4850,7 @@ def test_colums_multiindex_modified(self): cols2load = list('BCD') cols2load_original = list(cols2load) df_loaded = read_hdf(path, 'df', columns=cols2load) # noqa - self.assertTrue(cols2load_original == cols2load) + assert cols2load_original == cols2load def test_to_hdf_with_object_column_names(self): # GH9057 @@ -4902,7 +4901,7 @@ def test_read_hdf_open_store(self): store = HDFStore(path, mode='r') indirect = read_hdf(store, 'df') tm.assert_frame_equal(direct, indirect) - self.assertTrue(store.is_open) + assert store.is_open store.close() def test_read_hdf_iterator(self): @@ -4916,7 +4915,7 @@ def test_read_hdf_iterator(self): df.to_hdf(path, 'df', mode='w', format='t') direct = read_hdf(path, 'df') iterator = read_hdf(path, 'df', iterator=True) - self.assertTrue(isinstance(iterator, TableIterator)) + assert isinstance(iterator, TableIterator) indirect = next(iterator.__iter__()) tm.assert_frame_equal(direct, indirect) iterator.store.close() @@ -5023,7 +5022,7 @@ def test_query_long_float_literal(self): cutoff = 1000000000.0006 result = store.select('test', "A < %.4f" % cutoff) - self.assertTrue(result.empty) + assert result.empty cutoff = 1000000000.0010 result = store.select('test', "A > %.4f" % cutoff) diff --git a/pandas/tests/io/test_s3.py b/pandas/tests/io/test_s3.py index cff8eef74a607..36a0304bddfaf 100644 --- a/pandas/tests/io/test_s3.py +++ b/pandas/tests/io/test_s3.py @@ -6,5 +6,5 @@ class TestS3URL(tm.TestCase): def test_is_s3_url(self): - self.assertTrue(_is_s3_url("s3://pandas/somethingelse.com")) + assert _is_s3_url("s3://pandas/somethingelse.com") assert not _is_s3_url("s4://pandas/somethingelse.com") diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 0930d99ea5c30..fd883c9c0ff00 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -272,8 +272,7 @@ def _check_iris_loaded_frame(self, iris_frame): pytype = iris_frame.dtypes[0].type row = iris_frame.iloc[0] - self.assertTrue( - issubclass(pytype, np.floating), 'Loaded frame has incorrect type') + assert issubclass(pytype, np.floating) tm.equalContents(row.values, [5.1, 3.5, 1.4, 0.2, 'Iris-setosa']) def _load_test1_data(self): @@ -372,8 +371,7 @@ def _to_sql(self): self.drop_table('test_frame1') self.pandasSQL.to_sql(self.test_frame1, 'test_frame1') - self.assertTrue(self.pandasSQL.has_table( - 'test_frame1'), 'Table not written to DB') + assert self.pandasSQL.has_table('test_frame1') # Nuke table self.drop_table('test_frame1') @@ -387,8 +385,7 @@ def _to_sql_fail(self): self.pandasSQL.to_sql( self.test_frame1, 'test_frame1', if_exists='fail') - self.assertTrue(self.pandasSQL.has_table( - 'test_frame1'), 'Table not written to DB') + assert self.pandasSQL.has_table('test_frame1') pytest.raises(ValueError, self.pandasSQL.to_sql, self.test_frame1, 'test_frame1', if_exists='fail') @@ -403,8 +400,7 @@ def _to_sql_replace(self): # Add to table again self.pandasSQL.to_sql( self.test_frame1, 'test_frame1', if_exists='replace') - self.assertTrue(self.pandasSQL.has_table( - 'test_frame1'), 'Table not written to DB') + assert self.pandasSQL.has_table('test_frame1') num_entries = len(self.test_frame1) num_rows = self._count_rows('test_frame1') @@ -424,8 +420,7 @@ def _to_sql_append(self): # Add to table again self.pandasSQL.to_sql( self.test_frame1, 'test_frame1', if_exists='append') - self.assertTrue(self.pandasSQL.has_table( - 'test_frame1'), 'Table not written to DB') + assert self.pandasSQL.has_table('test_frame1') num_entries = 2 * len(self.test_frame1) num_rows = self._count_rows('test_frame1') @@ -528,16 +523,12 @@ def test_read_sql_view(self): def test_to_sql(self): sql.to_sql(self.test_frame1, 'test_frame1', self.conn) - self.assertTrue( - sql.has_table('test_frame1', self.conn), - 'Table not written to DB') + assert sql.has_table('test_frame1', self.conn) def test_to_sql_fail(self): sql.to_sql(self.test_frame1, 'test_frame2', self.conn, if_exists='fail') - self.assertTrue( - sql.has_table('test_frame2', self.conn), - 'Table not written to DB') + assert sql.has_table('test_frame2', self.conn) pytest.raises(ValueError, sql.to_sql, self.test_frame1, 'test_frame2', self.conn, if_exists='fail') @@ -548,9 +539,7 @@ def test_to_sql_replace(self): # Add to table again sql.to_sql(self.test_frame1, 'test_frame3', self.conn, if_exists='replace') - self.assertTrue( - sql.has_table('test_frame3', self.conn), - 'Table not written to DB') + assert sql.has_table('test_frame3', self.conn) num_entries = len(self.test_frame1) num_rows = self._count_rows('test_frame3') @@ -565,9 +554,7 @@ def test_to_sql_append(self): # Add to table again sql.to_sql(self.test_frame1, 'test_frame4', self.conn, if_exists='append') - self.assertTrue( - sql.has_table('test_frame4', self.conn), - 'Table not written to DB') + assert sql.has_table('test_frame4', self.conn) num_entries = 2 * len(self.test_frame1) num_rows = self._count_rows('test_frame4') @@ -629,27 +616,21 @@ def test_date_parsing(self): df = sql.read_sql_query("SELECT * FROM types_test_data", self.conn, parse_dates=['DateCol']) - self.assertTrue( - issubclass(df.DateCol.dtype.type, np.datetime64), - "DateCol loaded with incorrect type") + assert issubclass(df.DateCol.dtype.type, np.datetime64) df = sql.read_sql_query("SELECT * FROM types_test_data", self.conn, parse_dates={'DateCol': '%Y-%m-%d %H:%M:%S'}) - self.assertTrue( - issubclass(df.DateCol.dtype.type, np.datetime64), - "DateCol loaded with incorrect type") + assert issubclass(df.DateCol.dtype.type, np.datetime64) df = sql.read_sql_query("SELECT * FROM types_test_data", self.conn, parse_dates=['IntDateCol']) - self.assertTrue(issubclass(df.IntDateCol.dtype.type, np.datetime64), - "IntDateCol loaded with incorrect type") + assert issubclass(df.IntDateCol.dtype.type, np.datetime64) df = sql.read_sql_query("SELECT * FROM types_test_data", self.conn, parse_dates={'IntDateCol': 's'}) - self.assertTrue(issubclass(df.IntDateCol.dtype.type, np.datetime64), - "IntDateCol loaded with incorrect type") + assert issubclass(df.IntDateCol.dtype.type, np.datetime64) def test_date_and_index(self): # Test case where same column appears in parse_date and index_col @@ -658,11 +639,8 @@ def test_date_and_index(self): index_col='DateCol', parse_dates=['DateCol', 'IntDateCol']) - self.assertTrue(issubclass(df.index.dtype.type, np.datetime64), - "DateCol loaded with incorrect type") - - self.assertTrue(issubclass(df.IntDateCol.dtype.type, np.datetime64), - "IntDateCol loaded with incorrect type") + assert issubclass(df.index.dtype.type, np.datetime64) + assert issubclass(df.IntDateCol.dtype.type, np.datetime64) def test_timedelta(self): @@ -778,27 +756,27 @@ def test_integer_col_names(self): def test_get_schema(self): create_sql = sql.get_schema(self.test_frame1, 'test', con=self.conn) - self.assertTrue('CREATE' in create_sql) + assert 'CREATE' in create_sql def test_get_schema_dtypes(self): float_frame = DataFrame({'a': [1.1, 1.2], 'b': [2.1, 2.2]}) dtype = sqlalchemy.Integer if self.mode == 'sqlalchemy' else 'INTEGER' create_sql = sql.get_schema(float_frame, 'test', con=self.conn, dtype={'b': dtype}) - self.assertTrue('CREATE' in create_sql) - self.assertTrue('INTEGER' in create_sql) + assert 'CREATE' in create_sql + assert 'INTEGER' in create_sql def test_get_schema_keys(self): frame = DataFrame({'Col1': [1.1, 1.2], 'Col2': [2.1, 2.2]}) create_sql = sql.get_schema(frame, 'test', con=self.conn, keys='Col1') constraint_sentence = 'CONSTRAINT test_pk PRIMARY KEY ("Col1")' - self.assertTrue(constraint_sentence in create_sql) + assert constraint_sentence in create_sql # multiple columns as key (GH10385) create_sql = sql.get_schema(self.test_frame1, 'test', con=self.conn, keys=['A', 'B']) constraint_sentence = 'CONSTRAINT test_pk PRIMARY KEY ("A", "B")' - self.assertTrue(constraint_sentence in create_sql) + assert constraint_sentence in create_sql def test_chunksize_read(self): df = DataFrame(np.random.randn(22, 5), columns=list('abcde')) @@ -957,8 +935,7 @@ def test_sqlalchemy_type_mapping(self): utc=True)}) db = sql.SQLDatabase(self.conn) table = sql.SQLTable("test_type", db, frame=df) - self.assertTrue(isinstance( - table.table.c['time'].type, sqltypes.DateTime)) + assert isinstance(table.table.c['time'].type, sqltypes.DateTime) def test_database_uri_string(self): @@ -1100,7 +1077,7 @@ def test_safe_names_warning(self): def test_get_schema2(self): # without providing a connection object (available for backwards comp) create_sql = sql.get_schema(self.test_frame1, 'test') - self.assertTrue('CREATE' in create_sql) + assert 'CREATE' in create_sql def _get_sqlite_column_type(self, schema, column): @@ -1211,8 +1188,7 @@ def test_create_table(self): pandasSQL = sql.SQLDatabase(temp_conn) pandasSQL.to_sql(temp_frame, 'temp_frame') - self.assertTrue( - temp_conn.has_table('temp_frame'), 'Table not written to DB') + assert temp_conn.has_table('temp_frame') def test_drop_table(self): temp_conn = self.connect() @@ -1223,8 +1199,7 @@ def test_drop_table(self): pandasSQL = sql.SQLDatabase(temp_conn) pandasSQL.to_sql(temp_frame, 'temp_frame') - self.assertTrue( - temp_conn.has_table('temp_frame'), 'Table not written to DB') + assert temp_conn.has_table('temp_frame') pandasSQL.drop_table('temp_frame') @@ -1253,19 +1228,14 @@ def test_read_table_absent(self): def test_default_type_conversion(self): df = sql.read_sql_table("types_test_data", self.conn) - self.assertTrue(issubclass(df.FloatCol.dtype.type, np.floating), - "FloatCol loaded with incorrect type") - self.assertTrue(issubclass(df.IntCol.dtype.type, np.integer), - "IntCol loaded with incorrect type") - self.assertTrue(issubclass(df.BoolCol.dtype.type, np.bool_), - "BoolCol loaded with incorrect type") + assert issubclass(df.FloatCol.dtype.type, np.floating) + assert issubclass(df.IntCol.dtype.type, np.integer) + assert issubclass(df.BoolCol.dtype.type, np.bool_) # Int column with NA values stays as float - self.assertTrue(issubclass(df.IntColWithNull.dtype.type, np.floating), - "IntColWithNull loaded with incorrect type") + assert issubclass(df.IntColWithNull.dtype.type, np.floating) # Bool column with NA values becomes object - self.assertTrue(issubclass(df.BoolColWithNull.dtype.type, np.object), - "BoolColWithNull loaded with incorrect type") + assert issubclass(df.BoolColWithNull.dtype.type, np.object) def test_bigint(self): # int64 should be converted to BigInteger, GH7433 @@ -1280,8 +1250,7 @@ def test_default_date_load(self): # IMPORTANT - sqlite has no native date type, so shouldn't parse, but # MySQL SHOULD be converted. - self.assertTrue(issubclass(df.DateCol.dtype.type, np.datetime64), - "DateCol loaded with incorrect type") + assert issubclass(df.DateCol.dtype.type, np.datetime64) def test_datetime_with_timezone(self): # edge case that converts postgresql datetime with time zone types @@ -1302,7 +1271,7 @@ def check(col): self.assertEqual(col[1], Timestamp('2000-06-01 07:00:00')) elif is_datetime64tz_dtype(col.dtype): - self.assertTrue(str(col.dt.tz) == 'UTC') + assert str(col.dt.tz) == 'UTC' # "2000-01-01 00:00:00-08:00" should convert to # "2000-01-01 08:00:00" @@ -1327,11 +1296,9 @@ def check(col): # even with the same versions of psycopg2 & sqlalchemy, possibly a # Postgrsql server version difference col = df.DateColWithTz - self.assertTrue(is_object_dtype(col.dtype) or - is_datetime64_dtype(col.dtype) or - is_datetime64tz_dtype(col.dtype), - "DateCol loaded with incorrect type -> {0}" - .format(col.dtype)) + assert (is_object_dtype(col.dtype) or + is_datetime64_dtype(col.dtype) or + is_datetime64tz_dtype(col.dtype)) df = pd.read_sql_query("select * from types_test_data", self.conn, parse_dates=['DateColWithTz']) @@ -1343,10 +1310,8 @@ def check(col): self.conn, chunksize=1)), ignore_index=True) col = df.DateColWithTz - self.assertTrue(is_datetime64tz_dtype(col.dtype), - "DateCol loaded with incorrect type -> {0}" - .format(col.dtype)) - self.assertTrue(str(col.dt.tz) == 'UTC') + assert is_datetime64tz_dtype(col.dtype) + assert str(col.dt.tz) == 'UTC' expected = sql.read_sql_table("types_test_data", self.conn) tm.assert_series_equal(df.DateColWithTz, expected.DateColWithTz @@ -1363,33 +1328,27 @@ def test_date_parsing(self): df = sql.read_sql_table("types_test_data", self.conn, parse_dates=['DateCol']) - self.assertTrue(issubclass(df.DateCol.dtype.type, np.datetime64), - "DateCol loaded with incorrect type") + assert issubclass(df.DateCol.dtype.type, np.datetime64) df = sql.read_sql_table("types_test_data", self.conn, parse_dates={'DateCol': '%Y-%m-%d %H:%M:%S'}) - self.assertTrue(issubclass(df.DateCol.dtype.type, np.datetime64), - "DateCol loaded with incorrect type") + assert issubclass(df.DateCol.dtype.type, np.datetime64) df = sql.read_sql_table("types_test_data", self.conn, parse_dates={ 'DateCol': {'format': '%Y-%m-%d %H:%M:%S'}}) - self.assertTrue(issubclass(df.DateCol.dtype.type, np.datetime64), - "IntDateCol loaded with incorrect type") + assert issubclass(df.DateCol.dtype.type, np.datetime64) df = sql.read_sql_table( "types_test_data", self.conn, parse_dates=['IntDateCol']) - self.assertTrue(issubclass(df.IntDateCol.dtype.type, np.datetime64), - "IntDateCol loaded with incorrect type") + assert issubclass(df.IntDateCol.dtype.type, np.datetime64) df = sql.read_sql_table( "types_test_data", self.conn, parse_dates={'IntDateCol': 's'}) - self.assertTrue(issubclass(df.IntDateCol.dtype.type, np.datetime64), - "IntDateCol loaded with incorrect type") + assert issubclass(df.IntDateCol.dtype.type, np.datetime64) df = sql.read_sql_table("types_test_data", self.conn, parse_dates={'IntDateCol': {'unit': 's'}}) - self.assertTrue(issubclass(df.IntDateCol.dtype.type, np.datetime64), - "IntDateCol loaded with incorrect type") + assert issubclass(df.IntDateCol.dtype.type, np.datetime64) def test_datetime(self): df = DataFrame({'A': date_range('2013-01-01 09:00:00', periods=3), @@ -1405,7 +1364,7 @@ def test_datetime(self): result = sql.read_sql_query('SELECT * FROM test_datetime', self.conn) result = result.drop('index', axis=1) if self.flavor == 'sqlite': - self.assertTrue(isinstance(result.loc[0, 'A'], string_types)) + assert isinstance(result.loc[0, 'A'], string_types) result['A'] = to_datetime(result['A']) tm.assert_frame_equal(result, df) else: @@ -1424,7 +1383,7 @@ def test_datetime_NaT(self): # with read_sql -> no type information -> sqlite has no native result = sql.read_sql_query('SELECT * FROM test_datetime', self.conn) if self.flavor == 'sqlite': - self.assertTrue(isinstance(result.loc[0, 'A'], string_types)) + assert isinstance(result.loc[0, 'A'], string_types) result['A'] = to_datetime(result['A'], errors='coerce') tm.assert_frame_equal(result, df) else: @@ -1557,7 +1516,7 @@ def test_dtype(self): meta = sqlalchemy.schema.MetaData(bind=self.conn) meta.reflect() sqltype = meta.tables['dtype_test2'].columns['B'].type - self.assertTrue(isinstance(sqltype, sqlalchemy.TEXT)) + assert isinstance(sqltype, sqlalchemy.TEXT) pytest.raises(ValueError, df.to_sql, 'error', self.conn, dtype={'B': str}) @@ -1565,7 +1524,7 @@ def test_dtype(self): df.to_sql('dtype_test3', self.conn, dtype={'B': sqlalchemy.String(10)}) meta.reflect() sqltype = meta.tables['dtype_test3'].columns['B'].type - self.assertTrue(isinstance(sqltype, sqlalchemy.String)) + assert isinstance(sqltype, sqlalchemy.String) self.assertEqual(sqltype.length, 10) # single dtype @@ -1574,8 +1533,8 @@ def test_dtype(self): meta.reflect() sqltypea = meta.tables['single_dtype_test'].columns['A'].type sqltypeb = meta.tables['single_dtype_test'].columns['B'].type - self.assertTrue(isinstance(sqltypea, sqlalchemy.TEXT)) - self.assertTrue(isinstance(sqltypeb, sqlalchemy.TEXT)) + assert isinstance(sqltypea, sqlalchemy.TEXT) + assert isinstance(sqltypeb, sqlalchemy.TEXT) def test_notnull_dtype(self): cols = {'Bool': Series([True, None]), @@ -1597,10 +1556,10 @@ def test_notnull_dtype(self): col_dict = meta.tables[tbl].columns - self.assertTrue(isinstance(col_dict['Bool'].type, my_type)) - self.assertTrue(isinstance(col_dict['Date'].type, sqltypes.DateTime)) - self.assertTrue(isinstance(col_dict['Int'].type, sqltypes.Integer)) - self.assertTrue(isinstance(col_dict['Float'].type, sqltypes.Float)) + assert isinstance(col_dict['Bool'].type, my_type) + assert isinstance(col_dict['Date'].type, sqltypes.DateTime) + assert isinstance(col_dict['Int'].type, sqltypes.Integer) + assert isinstance(col_dict['Float'].type, sqltypes.Float) def test_double_precision(self): V = 1.23456789101112131415 @@ -1626,10 +1585,10 @@ def test_double_precision(self): col_dict = meta.tables['test_dtypes'].columns self.assertEqual(str(col_dict['f32'].type), str(col_dict['f64_as_f32'].type)) - self.assertTrue(isinstance(col_dict['f32'].type, sqltypes.Float)) - self.assertTrue(isinstance(col_dict['f64'].type, sqltypes.Float)) - self.assertTrue(isinstance(col_dict['i32'].type, sqltypes.Integer)) - self.assertTrue(isinstance(col_dict['i64'].type, sqltypes.BigInteger)) + assert isinstance(col_dict['f32'].type, sqltypes.Float) + assert isinstance(col_dict['f64'].type, sqltypes.Float) + assert isinstance(col_dict['i32'].type, sqltypes.Integer) + assert isinstance(col_dict['i64'].type, sqltypes.BigInteger) def test_connectable_issue_example(self): # This tests the example raised in issue @@ -1705,20 +1664,17 @@ def setup_driver(cls): def test_default_type_conversion(self): df = sql.read_sql_table("types_test_data", self.conn) - self.assertTrue(issubclass(df.FloatCol.dtype.type, np.floating), - "FloatCol loaded with incorrect type") - self.assertTrue(issubclass(df.IntCol.dtype.type, np.integer), - "IntCol loaded with incorrect type") + assert issubclass(df.FloatCol.dtype.type, np.floating) + assert issubclass(df.IntCol.dtype.type, np.integer) + # sqlite has no boolean type, so integer type is returned - self.assertTrue(issubclass(df.BoolCol.dtype.type, np.integer), - "BoolCol loaded with incorrect type") + assert issubclass(df.BoolCol.dtype.type, np.integer) # Int column with NA values stays as float - self.assertTrue(issubclass(df.IntColWithNull.dtype.type, np.floating), - "IntColWithNull loaded with incorrect type") + assert issubclass(df.IntColWithNull.dtype.type, np.floating) + # Non-native Bool column with NA values stays as float - self.assertTrue(issubclass(df.BoolColWithNull.dtype.type, np.floating), - "BoolColWithNull loaded with incorrect type") + assert issubclass(df.BoolColWithNull.dtype.type, np.floating) def test_default_date_load(self): df = sql.read_sql_table("types_test_data", self.conn) @@ -1760,20 +1716,17 @@ def setup_driver(cls): def test_default_type_conversion(self): df = sql.read_sql_table("types_test_data", self.conn) - self.assertTrue(issubclass(df.FloatCol.dtype.type, np.floating), - "FloatCol loaded with incorrect type") - self.assertTrue(issubclass(df.IntCol.dtype.type, np.integer), - "IntCol loaded with incorrect type") + assert issubclass(df.FloatCol.dtype.type, np.floating) + assert issubclass(df.IntCol.dtype.type, np.integer) + # MySQL has no real BOOL type (it's an alias for TINYINT) - self.assertTrue(issubclass(df.BoolCol.dtype.type, np.integer), - "BoolCol loaded with incorrect type") + assert issubclass(df.BoolCol.dtype.type, np.integer) # Int column with NA values stays as float - self.assertTrue(issubclass(df.IntColWithNull.dtype.type, np.floating), - "IntColWithNull loaded with incorrect type") + assert issubclass(df.IntColWithNull.dtype.type, np.floating) + # Bool column with NA = int column with NA values => becomes float - self.assertTrue(issubclass(df.BoolColWithNull.dtype.type, np.floating), - "BoolColWithNull loaded with incorrect type") + assert issubclass(df.BoolColWithNull.dtype.type, np.floating) def test_read_procedure(self): # see GH7324. Although it is more an api test, it is added to the @@ -1979,8 +1932,7 @@ def test_create_and_drop_table(self): self.pandasSQL.to_sql(temp_frame, 'drop_test_frame') - self.assertTrue(self.pandasSQL.has_table('drop_test_frame'), - 'Table not written to DB') + assert self.pandasSQL.has_table('drop_test_frame') self.pandasSQL.drop_table('drop_test_frame') @@ -2208,12 +2160,12 @@ def test_schema(self): for l in lines: tokens = l.split(' ') if len(tokens) == 2 and tokens[0] == 'A': - self.assertTrue(tokens[1] == 'DATETIME') + assert tokens[1] == 'DATETIME' frame = tm.makeTimeDataFrame() create_sql = sql.get_schema(frame, 'test', keys=['A', 'B']) lines = create_sql.splitlines() - self.assertTrue('PRIMARY KEY ("A", "B")' in create_sql) + assert 'PRIMARY KEY ("A", "B")' in create_sql cur = self.conn.cursor() cur.execute(create_sql) @@ -2514,13 +2466,13 @@ def test_schema(self): for l in lines: tokens = l.split(' ') if len(tokens) == 2 and tokens[0] == 'A': - self.assertTrue(tokens[1] == 'DATETIME') + assert tokens[1] == 'DATETIME' frame = tm.makeTimeDataFrame() drop_sql = "DROP TABLE IF EXISTS test" create_sql = sql.get_schema(frame, 'test', keys=['A', 'B']) lines = create_sql.splitlines() - self.assertTrue('PRIMARY KEY (`A`, `B`)' in create_sql) + assert 'PRIMARY KEY (`A`, `B`)' in create_sql cur = self.conn.cursor() cur.execute(drop_sql) cur.execute(create_sql) diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py index 9dc2bd589bf9b..72023c77e7c88 100644 --- a/pandas/tests/io/test_stata.py +++ b/pandas/tests/io/test_stata.py @@ -647,10 +647,10 @@ def test_variable_labels(self): keys = ('var1', 'var2', 'var3') labels = ('label1', 'label2', 'label3') for k, v in compat.iteritems(sr_115): - self.assertTrue(k in sr_117) - self.assertTrue(v == sr_117[k]) - self.assertTrue(k in keys) - self.assertTrue(v in labels) + assert k in sr_117 + assert v == sr_117[k] + assert k in keys + assert v in labels def test_minimal_size_col(self): str_lens = (1, 100, 244) @@ -667,8 +667,8 @@ def test_minimal_size_col(self): variables = sr.varlist formats = sr.fmtlist for variable, fmt, typ in zip(variables, formats, typlist): - self.assertTrue(int(variable[1:]) == int(fmt[1:-1])) - self.assertTrue(int(variable[1:]) == typ) + assert int(variable[1:]) == int(fmt[1:-1]) + assert int(variable[1:]) == typ def test_excessively_long_string(self): str_lens = (1, 244, 500) @@ -694,21 +694,21 @@ def test_missing_value_generator(self): offset = valid_range[t][1] for i in range(0, 27): val = StataMissingValue(offset + 1 + i) - self.assertTrue(val.string == expected_values[i]) + assert val.string == expected_values[i] # Test extremes for floats val = StataMissingValue(struct.unpack(' 0) + assert len(ax.get_children()) > 0 if layout is not None: result = self._get_axes_layout(_flatten(axes)) @@ -437,7 +437,7 @@ def _check_box_return_type(self, returned, return_type, expected_keys=None, if return_type is None: return_type = 'dict' - self.assertTrue(isinstance(returned, types[return_type])) + assert isinstance(returned, types[return_type]) if return_type == 'both': assert isinstance(returned.ax, Axes) assert isinstance(returned.lines, dict) @@ -448,11 +448,11 @@ def _check_box_return_type(self, returned, return_type, expected_keys=None, assert isinstance(r, Axes) return - self.assertTrue(isinstance(returned, Series)) + assert isinstance(returned, Series) self.assertEqual(sorted(returned.keys()), sorted(expected_keys)) for key, value in iteritems(returned): - self.assertTrue(isinstance(value, types[return_type])) + assert isinstance(value, types[return_type]) # check returned dict has correct mapping if return_type == 'axes': if check_ax_title: @@ -504,13 +504,13 @@ def is_grid_on(): spndx += 1 mpl.rc('axes', grid=True) obj.plot(kind=kind, **kws) - self.assertTrue(is_grid_on()) + assert is_grid_on() self.plt.subplot(1, 4 * len(kinds), spndx) spndx += 1 mpl.rc('axes', grid=False) obj.plot(kind=kind, grid=True, **kws) - self.assertTrue(is_grid_on()) + assert is_grid_on() def _maybe_unpack_cycler(self, rcParams, field='color'): """ diff --git a/pandas/tests/plotting/test_boxplot_method.py b/pandas/tests/plotting/test_boxplot_method.py index 018cbbe170313..fe6d5e5cf148f 100644 --- a/pandas/tests/plotting/test_boxplot_method.py +++ b/pandas/tests/plotting/test_boxplot_method.py @@ -96,7 +96,7 @@ def test_boxplot_legacy(self): def test_boxplot_return_type_none(self): # GH 12216; return_type=None & by=None -> axes result = self.hist_df.boxplot() - self.assertTrue(isinstance(result, self.plt.Axes)) + assert isinstance(result, self.plt.Axes) @slow def test_boxplot_return_type_legacy(self): @@ -129,8 +129,8 @@ def test_boxplot_axis_limits(self): def _check_ax_limits(col, ax): y_min, y_max = ax.get_ylim() - self.assertTrue(y_min <= col.min()) - self.assertTrue(y_max >= col.max()) + assert y_min <= col.min() + assert y_max >= col.max() df = self.hist_df.copy() df['age'] = np.random.randint(1, 20, df.shape[0]) diff --git a/pandas/tests/plotting/test_datetimelike.py b/pandas/tests/plotting/test_datetimelike.py index 7534d9363f267..30d67630afa41 100644 --- a/pandas/tests/plotting/test_datetimelike.py +++ b/pandas/tests/plotting/test_datetimelike.py @@ -278,8 +278,7 @@ def test_irreg_hf(self): diffs = Series(ax.get_lines()[0].get_xydata()[:, 0]).diff() sec = 1. / 24 / 60 / 60 - self.assertTrue((np.fabs(diffs[1:] - [sec, sec * 2, sec]) < 1e-8).all( - )) + assert (np.fabs(diffs[1:] - [sec, sec * 2, sec]) < 1e-8).all() plt.clf() fig.add_subplot(111) @@ -287,7 +286,7 @@ def test_irreg_hf(self): df2.index = df.index.asobject ax = df2.plot() diffs = Series(ax.get_lines()[0].get_xydata()[:, 0]).diff() - self.assertTrue((np.fabs(diffs[1:] - sec) < 1e-8).all()) + assert (np.fabs(diffs[1:] - sec) < 1e-8).all() def test_irregular_datetime64_repr_bug(self): import matplotlib.pyplot as plt @@ -509,7 +508,7 @@ def test_gaps(self): data = l.get_xydata() assert isinstance(data, np.ma.core.MaskedArray) mask = data.mask - self.assertTrue(mask[5:25, 1].all()) + assert mask[5:25, 1].all() plt.close(ax.get_figure()) # irregular @@ -523,7 +522,7 @@ def test_gaps(self): data = l.get_xydata() assert isinstance(data, np.ma.core.MaskedArray) mask = data.mask - self.assertTrue(mask[2:5, 1].all()) + assert mask[2:5, 1].all() plt.close(ax.get_figure()) # non-ts @@ -537,7 +536,7 @@ def test_gaps(self): data = l.get_xydata() assert isinstance(data, np.ma.core.MaskedArray) mask = data.mask - self.assertTrue(mask[2:5, 1].all()) + assert mask[2:5, 1].all() @slow def test_gap_upsample(self): @@ -558,7 +557,7 @@ def test_gap_upsample(self): assert isinstance(data, np.ma.core.MaskedArray) mask = data.mask - self.assertTrue(mask[5:25, 1].all()) + assert mask[5:25, 1].all() @slow def test_secondary_y(self): @@ -567,7 +566,7 @@ def test_secondary_y(self): ser = Series(np.random.randn(10)) ser2 = Series(np.random.randn(10)) ax = ser.plot(secondary_y=True) - self.assertTrue(hasattr(ax, 'left_ax')) + assert hasattr(ax, 'left_ax') assert not hasattr(ax, 'right_ax') fig = ax.get_figure() axes = fig.get_axes() @@ -585,10 +584,10 @@ def test_secondary_y(self): ax = ser2.plot() ax2 = ser.plot(secondary_y=True) - self.assertTrue(ax.get_yaxis().get_visible()) + assert ax.get_yaxis().get_visible() assert not hasattr(ax, 'left_ax') - self.assertTrue(hasattr(ax, 'right_ax')) - self.assertTrue(hasattr(ax2, 'left_ax')) + assert hasattr(ax, 'right_ax') + assert hasattr(ax2, 'left_ax') assert not hasattr(ax2, 'right_ax') @slow @@ -598,7 +597,7 @@ def test_secondary_y_ts(self): ser = Series(np.random.randn(10), idx) ser2 = Series(np.random.randn(10), idx) ax = ser.plot(secondary_y=True) - self.assertTrue(hasattr(ax, 'left_ax')) + assert hasattr(ax, 'left_ax') assert not hasattr(ax, 'right_ax') fig = ax.get_figure() axes = fig.get_axes() @@ -616,7 +615,7 @@ def test_secondary_y_ts(self): ax = ser2.plot() ax2 = ser.plot(secondary_y=True) - self.assertTrue(ax.get_yaxis().get_visible()) + assert ax.get_yaxis().get_visible() @slow def test_secondary_kde(self): @@ -626,7 +625,7 @@ def test_secondary_kde(self): import matplotlib.pyplot as plt # noqa ser = Series(np.random.randn(10)) ax = ser.plot(secondary_y=True, kind='density') - self.assertTrue(hasattr(ax, 'left_ax')) + assert hasattr(ax, 'left_ax') assert not hasattr(ax, 'right_ax') fig = ax.get_figure() axes = fig.get_axes() @@ -670,8 +669,8 @@ def test_mixed_freq_regular_first(self): lines = ax2.get_lines() idx1 = PeriodIndex(lines[0].get_xdata()) idx2 = PeriodIndex(lines[1].get_xdata()) - self.assertTrue(idx1.equals(s1.index.to_period('B'))) - self.assertTrue(idx2.equals(s2.index.to_period('B'))) + assert idx1.equals(s1.index.to_period('B')) + assert idx2.equals(s2.index.to_period('B')) left, right = ax2.get_xlim() pidx = s1.index.to_period() self.assertEqual(left, pidx[0].ordinal) @@ -701,8 +700,8 @@ def test_mixed_freq_regular_first_df(self): lines = ax2.get_lines() idx1 = PeriodIndex(lines[0].get_xdata()) idx2 = PeriodIndex(lines[1].get_xdata()) - self.assertTrue(idx1.equals(s1.index.to_period('B'))) - self.assertTrue(idx2.equals(s2.index.to_period('B'))) + assert idx1.equals(s1.index.to_period('B')) + assert idx2.equals(s2.index.to_period('B')) left, right = ax2.get_xlim() pidx = s1.index.to_period() self.assertEqual(left, pidx[0].ordinal) @@ -833,7 +832,7 @@ def test_to_weekly_resampling(self): tsplot(high, plt.Axes.plot) lines = tsplot(low, plt.Axes.plot) for l in lines: - self.assertTrue(PeriodIndex(data=l.get_xdata()).freq, idxh.freq) + assert PeriodIndex(data=l.get_xdata()).freq, idxh.freq @slow def test_from_weekly_resampling(self): @@ -848,7 +847,7 @@ def test_from_weekly_resampling(self): expected_l = np.array([1514, 1519, 1523, 1527, 1531, 1536, 1540, 1544, 1549, 1553, 1558, 1562], dtype=np.float64) for l in ax.get_lines(): - self.assertTrue(PeriodIndex(data=l.get_xdata()).freq, idxh.freq) + assert PeriodIndex(data=l.get_xdata()).freq, idxh.freq xdata = l.get_xdata(orig=False) if len(xdata) == 12: # idxl lines tm.assert_numpy_array_equal(xdata, expected_l) @@ -863,7 +862,7 @@ def test_from_weekly_resampling(self): tsplot(low, plt.Axes.plot) lines = tsplot(high, plt.Axes.plot) for l in lines: - self.assertTrue(PeriodIndex(data=l.get_xdata()).freq, idxh.freq) + assert PeriodIndex(data=l.get_xdata()).freq, idxh.freq xdata = l.get_xdata(orig=False) if len(xdata) == 12: # idxl lines tm.assert_numpy_array_equal(xdata, expected_l) @@ -1048,7 +1047,7 @@ def test_secondary_upsample(self): ax = high.plot(secondary_y=True) for l in ax.get_lines(): self.assertEqual(PeriodIndex(l.get_xdata()).freq, 'D') - self.assertTrue(hasattr(ax, 'left_ax')) + assert hasattr(ax, 'left_ax') assert not hasattr(ax, 'right_ax') for l in ax.left_ax.get_lines(): self.assertEqual(PeriodIndex(l.get_xdata()).freq, 'D') @@ -1213,7 +1212,7 @@ def test_secondary_y_non_ts_xlim(self): left_after, right_after = ax.get_xlim() self.assertEqual(left_before, left_after) - self.assertTrue(right_before < right_after) + assert right_before < right_after @slow def test_secondary_y_regular_ts_xlim(self): @@ -1229,7 +1228,7 @@ def test_secondary_y_regular_ts_xlim(self): left_after, right_after = ax.get_xlim() self.assertEqual(left_before, left_after) - self.assertTrue(right_before < right_after) + assert right_before < right_after @slow def test_secondary_y_mixed_freq_ts_xlim(self): diff --git a/pandas/tests/plotting/test_frame.py b/pandas/tests/plotting/test_frame.py index c5b43cd1a300b..c550504063b3e 100644 --- a/pandas/tests/plotting/test_frame.py +++ b/pandas/tests/plotting/test_frame.py @@ -333,7 +333,7 @@ def test_subplots(self): axes = df.plot(kind=kind, subplots=True, legend=False) for ax in axes: - self.assertTrue(ax.get_legend() is None) + assert ax.get_legend() is None @slow def test_subplots_timeseries(self): @@ -663,7 +663,7 @@ def test_line_lim(self): axes = df.plot(secondary_y=True, subplots=True) self._check_axes_shape(axes, axes_num=3, layout=(3, 1)) for ax in axes: - self.assertTrue(hasattr(ax, 'left_ax')) + assert hasattr(ax, 'left_ax') assert not hasattr(ax, 'right_ax') xmin, xmax = ax.get_xlim() lines = ax.get_lines() @@ -955,8 +955,8 @@ def test_plot_scatter_with_c(self): # identical to the values we supplied, normally we'd be on shaky ground # comparing floats for equality but here we expect them to be # identical. - self.assertTrue(np.array_equal(ax.collections[0].get_facecolor(), - rgba_array)) + tm.assert_numpy_array_equal(ax.collections[0] + .get_facecolor(), rgba_array) # we don't test the colors of the faces in this next plot because they # are dependent on the spring colormap, which may change its colors # later. @@ -1057,7 +1057,7 @@ def _check_bar_alignment(self, df, kind='bar', stacked=False, raise ValueError # Check the ticks locates on integer - self.assertTrue((axis.get_ticklocs() == np.arange(len(df))).all()) + assert (axis.get_ticklocs() == np.arange(len(df))).all() if align == 'center': # Check whether the bar locates on center @@ -1511,7 +1511,7 @@ def test_df_legend_labels(self): self._check_text_labels(ax.xaxis.get_label(), 'a') ax = df5.plot(y='c', label='LABEL_c', ax=ax) self._check_legend_labels(ax, labels=['LABEL_b', 'LABEL_c']) - self.assertTrue(df5.columns.tolist() == ['b', 'c']) + assert df5.columns.tolist() == ['b', 'c'] def test_legend_name(self): multi = DataFrame(randn(4, 4), @@ -1733,7 +1733,7 @@ def test_area_colors(self): self._check_colors(linehandles, linecolors=custom_colors) for h in handles: - self.assertTrue(h.get_alpha() is None) + assert h.get_alpha() is None tm.close() ax = df.plot.area(colormap='jet') @@ -1750,7 +1750,7 @@ def test_area_colors(self): if not isinstance(x, PolyCollection)] self._check_colors(linehandles, linecolors=jet_colors) for h in handles: - self.assertTrue(h.get_alpha() is None) + assert h.get_alpha() is None tm.close() # When stacked=False, alpha is set to 0.5 @@ -1974,7 +1974,7 @@ def test_unordered_ts(self): columns=['test']) ax = df.plot() xticks = ax.lines[0].get_xdata() - self.assertTrue(xticks[0] < xticks[1]) + assert xticks[0] < xticks[1] ydata = ax.lines[0].get_ydata() tm.assert_numpy_array_equal(ydata, np.array([1.0, 2.0, 3.0])) @@ -2300,9 +2300,9 @@ def test_table(self): _check_plot_works(df.plot, table=df) ax = df.plot() - self.assertTrue(len(ax.tables) == 0) + assert len(ax.tables) == 0 plotting.table(ax, df.T) - self.assertTrue(len(ax.tables) == 1) + assert len(ax.tables) == 1 def test_errorbar_scatter(self): df = DataFrame( diff --git a/pandas/tests/plotting/test_hist_method.py b/pandas/tests/plotting/test_hist_method.py index a77c1edd258e3..7002321908ef0 100644 --- a/pandas/tests/plotting/test_hist_method.py +++ b/pandas/tests/plotting/test_hist_method.py @@ -394,8 +394,8 @@ def test_axis_share_x(self): ax1, ax2 = df.hist(column='height', by=df.gender, sharex=True) # share x - self.assertTrue(ax1._shared_x_axes.joined(ax1, ax2)) - self.assertTrue(ax2._shared_x_axes.joined(ax1, ax2)) + assert ax1._shared_x_axes.joined(ax1, ax2) + assert ax2._shared_x_axes.joined(ax1, ax2) # don't share y assert not ax1._shared_y_axes.joined(ax1, ax2) @@ -407,8 +407,8 @@ def test_axis_share_y(self): ax1, ax2 = df.hist(column='height', by=df.gender, sharey=True) # share y - self.assertTrue(ax1._shared_y_axes.joined(ax1, ax2)) - self.assertTrue(ax2._shared_y_axes.joined(ax1, ax2)) + assert ax1._shared_y_axes.joined(ax1, ax2) + assert ax2._shared_y_axes.joined(ax1, ax2) # don't share x assert not ax1._shared_x_axes.joined(ax1, ax2) @@ -421,8 +421,8 @@ def test_axis_share_xy(self): sharey=True) # share both x and y - self.assertTrue(ax1._shared_x_axes.joined(ax1, ax2)) - self.assertTrue(ax2._shared_x_axes.joined(ax1, ax2)) + assert ax1._shared_x_axes.joined(ax1, ax2) + assert ax2._shared_x_axes.joined(ax1, ax2) - self.assertTrue(ax1._shared_y_axes.joined(ax1, ax2)) - self.assertTrue(ax2._shared_y_axes.joined(ax1, ax2)) + assert ax1._shared_y_axes.joined(ax1, ax2) + assert ax2._shared_y_axes.joined(ax1, ax2) diff --git a/pandas/tests/plotting/test_series.py b/pandas/tests/plotting/test_series.py index b84e50c4ec827..8ae301a0b7b4c 100644 --- a/pandas/tests/plotting/test_series.py +++ b/pandas/tests/plotting/test_series.py @@ -443,8 +443,8 @@ def test_hist_secondary_legend(self): # both legends are dran on left ax # left and right axis must be visible self._check_legend_labels(ax, labels=['a', 'b (right)']) - self.assertTrue(ax.get_yaxis().get_visible()) - self.assertTrue(ax.right_ax.get_yaxis().get_visible()) + assert ax.get_yaxis().get_visible() + assert ax.right_ax.get_yaxis().get_visible() tm.close() # secondary -> secondary @@ -455,7 +455,7 @@ def test_hist_secondary_legend(self): self._check_legend_labels(ax.left_ax, labels=['a (right)', 'b (right)']) assert not ax.left_ax.get_yaxis().get_visible() - self.assertTrue(ax.get_yaxis().get_visible()) + assert ax.get_yaxis().get_visible() tm.close() # secondary -> primary @@ -465,8 +465,8 @@ def test_hist_secondary_legend(self): # both legends are draw on left ax # left and right axis must be visible self._check_legend_labels(ax.left_ax, labels=['a (right)', 'b']) - self.assertTrue(ax.left_ax.get_yaxis().get_visible()) - self.assertTrue(ax.get_yaxis().get_visible()) + assert ax.left_ax.get_yaxis().get_visible() + assert ax.get_yaxis().get_visible() tm.close() @slow @@ -481,8 +481,8 @@ def test_df_series_secondary_legend(self): # both legends are dran on left ax # left and right axis must be visible self._check_legend_labels(ax, labels=['a', 'b', 'c', 'x (right)']) - self.assertTrue(ax.get_yaxis().get_visible()) - self.assertTrue(ax.right_ax.get_yaxis().get_visible()) + assert ax.get_yaxis().get_visible() + assert ax.right_ax.get_yaxis().get_visible() tm.close() # primary -> secondary (with passing ax) @@ -491,8 +491,8 @@ def test_df_series_secondary_legend(self): # both legends are dran on left ax # left and right axis must be visible self._check_legend_labels(ax, labels=['a', 'b', 'c', 'x (right)']) - self.assertTrue(ax.get_yaxis().get_visible()) - self.assertTrue(ax.right_ax.get_yaxis().get_visible()) + assert ax.get_yaxis().get_visible() + assert ax.right_ax.get_yaxis().get_visible() tm.close() # seconcary -> secondary (without passing ax) @@ -503,7 +503,7 @@ def test_df_series_secondary_legend(self): expected = ['a (right)', 'b (right)', 'c (right)', 'x (right)'] self._check_legend_labels(ax.left_ax, labels=expected) assert not ax.left_ax.get_yaxis().get_visible() - self.assertTrue(ax.get_yaxis().get_visible()) + assert ax.get_yaxis().get_visible() tm.close() # secondary -> secondary (with passing ax) @@ -514,7 +514,7 @@ def test_df_series_secondary_legend(self): expected = ['a (right)', 'b (right)', 'c (right)', 'x (right)'] self._check_legend_labels(ax.left_ax, expected) assert not ax.left_ax.get_yaxis().get_visible() - self.assertTrue(ax.get_yaxis().get_visible()) + assert ax.get_yaxis().get_visible() tm.close() # secondary -> secondary (with passing ax) @@ -525,7 +525,7 @@ def test_df_series_secondary_legend(self): expected = ['a', 'b', 'c', 'x (right)'] self._check_legend_labels(ax.left_ax, expected) assert not ax.left_ax.get_yaxis().get_visible() - self.assertTrue(ax.get_yaxis().get_visible()) + assert ax.get_yaxis().get_visible() tm.close() @slow @@ -576,10 +576,9 @@ def test_kde_missing_vals(self): s = Series(np.random.uniform(size=50)) s[0] = np.nan axes = _check_plot_works(s.plot.kde) - # check if the values have any missing values - # GH14821 - self.assertTrue(any(~np.isnan(axes.lines[0].get_xdata())), - msg='Missing Values not dropped') + + # gh-14821: check if the values have any missing values + assert any(~np.isnan(axes.lines[0].get_xdata())) @slow def test_hist_kwargs(self): diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py index 2bde4349f6000..9854245cf1abd 100644 --- a/pandas/tests/reshape/test_concat.py +++ b/pandas/tests/reshape/test_concat.py @@ -788,8 +788,8 @@ def test_append_different_columns(self): b = df[5:].loc[:, ['strings', 'ints', 'floats']] appended = a.append(b) - self.assertTrue(isnull(appended['strings'][0:4]).all()) - self.assertTrue(isnull(appended['bools'][5:]).all()) + assert isnull(appended['strings'][0:4]).all() + assert isnull(appended['bools'][5:]).all() def test_append_many(self): chunks = [self.frame[:5], self.frame[5:10], @@ -802,8 +802,8 @@ def test_append_many(self): chunks[-1]['foo'] = 'bar' result = chunks[0].append(chunks[1:]) tm.assert_frame_equal(result.loc[:, self.frame.columns], self.frame) - self.assertTrue((result['foo'][15:] == 'bar').all()) - self.assertTrue(result['foo'][:15].isnull().all()) + assert (result['foo'][15:] == 'bar').all() + assert result['foo'][:15].isnull().all() def test_append_preserve_index_name(self): # #980 @@ -1479,8 +1479,8 @@ def test_concat_series_axis1(self): s2.name = None result = concat([s, s2], axis=1) - self.assertTrue(np.array_equal( - result.columns, Index(['A', 0], dtype='object'))) + tm.assert_index_equal(result.columns, + Index(['A', 0], dtype='object')) # must reindex, #2603 s = Series(randn(3), index=['c', 'a', 'b'], name='A') @@ -1512,8 +1512,8 @@ def test_concat_datetime64_block(self): df = DataFrame({'time': rng}) result = concat([df, df]) - self.assertTrue((result.iloc[:10]['time'] == rng).all()) - self.assertTrue((result.iloc[10:]['time'] == rng).all()) + assert (result.iloc[:10]['time'] == rng).all() + assert (result.iloc[10:]['time'] == rng).all() def test_concat_timedelta64_block(self): from pandas import to_timedelta @@ -1523,8 +1523,8 @@ def test_concat_timedelta64_block(self): df = DataFrame({'time': rng}) result = concat([df, df]) - self.assertTrue((result.iloc[:10]['time'] == rng).all()) - self.assertTrue((result.iloc[10:]['time'] == rng).all()) + assert (result.iloc[:10]['time'] == rng).all() + assert (result.iloc[10:]['time'] == rng).all() def test_concat_keys_with_none(self): # #1649 @@ -1593,7 +1593,7 @@ def test_concat_series_axis1_same_names_ignore_index(self): s2 = Series(randn(len(dates)), index=dates, name='value') result = concat([s1, s2], axis=1, ignore_index=True) - self.assertTrue(np.array_equal(result.columns, [0, 1])) + assert np.array_equal(result.columns, [0, 1]) def test_concat_iterables(self): from collections import deque, Iterable diff --git a/pandas/tests/reshape/test_hashing.py b/pandas/tests/reshape/test_hashing.py index 4857d3ac8310b..f19f6b1374978 100644 --- a/pandas/tests/reshape/test_hashing.py +++ b/pandas/tests/reshape/test_hashing.py @@ -86,9 +86,9 @@ def test_hash_tuples_err(self): def test_multiindex_unique(self): mi = MultiIndex.from_tuples([(118, 472), (236, 118), (51, 204), (102, 51)]) - self.assertTrue(mi.is_unique) + assert mi.is_unique result = hash_pandas_object(mi) - self.assertTrue(result.is_unique) + assert result.is_unique def test_multiindex_objects(self): mi = MultiIndex(levels=[['b', 'd', 'a'], [1, 2, 3]], @@ -215,7 +215,7 @@ def test_hash_keys(self): obj = Series(list('abc')) a = hash_pandas_object(obj, hash_key='9876543210123456') b = hash_pandas_object(obj, hash_key='9876543210123465') - self.assertTrue((a != b).all()) + assert (a != b).all() def test_invalid_key(self): # this only matters for object dtypes diff --git a/pandas/tests/reshape/test_join.py b/pandas/tests/reshape/test_join.py index 475b17d9fe792..1da187788e99d 100644 --- a/pandas/tests/reshape/test_join.py +++ b/pandas/tests/reshape/test_join.py @@ -190,8 +190,8 @@ def test_join_on(self): columns=['three']) joined = df_a.join(df_b, on='one') joined = joined.join(df_c, on='one') - self.assertTrue(np.isnan(joined['two']['c'])) - self.assertTrue(np.isnan(joined['three']['c'])) + assert np.isnan(joined['two']['c']) + assert np.isnan(joined['three']['c']) # merge column not p resent pytest.raises(KeyError, target.join, source, on='E') @@ -252,7 +252,7 @@ def test_join_with_len0(self): merged = self.target.join(self.source.reindex([]), on='C') for col in self.source: assert col in merged - self.assertTrue(merged[col].isnull().all()) + assert merged[col].isnull().all() merged2 = self.target.join(self.source.reindex([]), on='C', how='inner') @@ -422,7 +422,7 @@ def test_join_inner_multiindex(self): expected = expected.drop(['first', 'second'], axis=1) expected.index = joined.index - self.assertTrue(joined.index.is_monotonic) + assert joined.index.is_monotonic assert_frame_equal(joined, expected) # _assert_same_contents(expected, expected2.loc[:, expected.columns]) @@ -437,8 +437,8 @@ def test_join_hierarchical_mixed(self): # GH 9455, 12219 with tm.assert_produces_warning(UserWarning): result = merge(new_df, other_df, left_index=True, right_index=True) - self.assertTrue(('b', 'mean') in result) - self.assertTrue('b' in result) + assert ('b', 'mean') in result + assert 'b' in result def test_join_float64_float32(self): diff --git a/pandas/tests/reshape/test_merge.py b/pandas/tests/reshape/test_merge.py index 80056b973a2fc..86580e5a84d92 100644 --- a/pandas/tests/reshape/test_merge.py +++ b/pandas/tests/reshape/test_merge.py @@ -162,10 +162,10 @@ def test_merge_copy(self): right_index=True, copy=True) merged['a'] = 6 - self.assertTrue((left['a'] == 0).all()) + assert (left['a'] == 0).all() merged['d'] = 'peekaboo' - self.assertTrue((right['d'] == 'bar').all()) + assert (right['d'] == 'bar').all() def test_merge_nocopy(self): left = DataFrame({'a': 0, 'b': 1}, index=lrange(10)) @@ -175,10 +175,10 @@ def test_merge_nocopy(self): right_index=True, copy=False) merged['a'] = 6 - self.assertTrue((left['a'] == 6).all()) + assert (left['a'] == 6).all() merged['d'] = 'peekaboo' - self.assertTrue((right['d'] == 'peekaboo').all()) + assert (right['d'] == 'peekaboo').all() def test_intelligently_handle_join_key(self): # #733, be a bit more 1337 about not returning unconsolidated DataFrame @@ -229,8 +229,8 @@ def test_handle_join_key_pass_array(self): merged2 = merge(right, left, left_on=key, right_on='key', how='outer') assert_series_equal(merged['key'], merged2['key']) - self.assertTrue(merged['key'].notnull().all()) - self.assertTrue(merged2['key'].notnull().all()) + assert merged['key'].notnull().all() + assert merged2['key'].notnull().all() left = DataFrame({'value': lrange(5)}, columns=['value']) right = DataFrame({'rvalue': lrange(6)}) @@ -425,7 +425,7 @@ def test_merge_nosort(self): exp = merge(df, new, on='var3', sort=False) assert_frame_equal(result, exp) - self.assertTrue((df.var3.unique() == result.var3.unique()).all()) + assert (df.var3.unique() == result.var3.unique()).all() def test_merge_nan_right(self): df1 = DataFrame({"i1": [0, 1], "i2": [0, 1]}) @@ -671,19 +671,19 @@ def test_indicator(self): # Check result integrity test2 = merge(df1, df2, on='col1', how='left', indicator=True) - self.assertTrue((test2._merge != 'right_only').all()) + assert (test2._merge != 'right_only').all() test2 = df1.merge(df2, on='col1', how='left', indicator=True) - self.assertTrue((test2._merge != 'right_only').all()) + assert (test2._merge != 'right_only').all() test3 = merge(df1, df2, on='col1', how='right', indicator=True) - self.assertTrue((test3._merge != 'left_only').all()) + assert (test3._merge != 'left_only').all() test3 = df1.merge(df2, on='col1', how='right', indicator=True) - self.assertTrue((test3._merge != 'left_only').all()) + assert (test3._merge != 'left_only').all() test4 = merge(df1, df2, on='col1', how='inner', indicator=True) - self.assertTrue((test4._merge == 'both').all()) + assert (test4._merge == 'both').all() test4 = df1.merge(df2, on='col1', how='inner', indicator=True) - self.assertTrue((test4._merge == 'both').all()) + assert (test4._merge == 'both').all() # Check if working name in df for i in ['_right_indicator', '_left_indicator', '_merge']: @@ -789,7 +789,7 @@ def run_asserts(left, right): for sort in [False, True]: res = left.join(right, on=icols, how='left', sort=sort) - self.assertTrue(len(left) < len(res) + 1) + assert len(left) < len(res) + 1 assert not res['4th'].isnull().any() assert not res['5th'].isnull().any() @@ -797,7 +797,7 @@ def run_asserts(left, right): res['4th'], - res['5th'], check_names=False) result = bind_cols(res.iloc[:, :-2]) tm.assert_series_equal(res['4th'], result, check_names=False) - self.assertTrue(result.name is None) + assert result.name is None if sort: tm.assert_frame_equal( diff --git a/pandas/tests/reshape/test_merge_asof.py b/pandas/tests/reshape/test_merge_asof.py index f2aef409324f8..7934b8abf85a8 100644 --- a/pandas/tests/reshape/test_merge_asof.py +++ b/pandas/tests/reshape/test_merge_asof.py @@ -539,7 +539,7 @@ def test_non_sorted(self): by='ticker') trades = self.trades.sort_values('time') - self.assertTrue(trades.time.is_monotonic) + assert trades.time.is_monotonic assert not quotes.time.is_monotonic with pytest.raises(ValueError): merge_asof(trades, quotes, @@ -547,8 +547,8 @@ def test_non_sorted(self): by='ticker') quotes = self.quotes.sort_values('time') - self.assertTrue(trades.time.is_monotonic) - self.assertTrue(quotes.time.is_monotonic) + assert trades.time.is_monotonic + assert quotes.time.is_monotonic # ok, though has dupes merge_asof(trades, self.quotes, diff --git a/pandas/tests/reshape/test_merge_ordered.py b/pandas/tests/reshape/test_merge_ordered.py index 77f47ff0a76e9..1f1eee0e9980b 100644 --- a/pandas/tests/reshape/test_merge_ordered.py +++ b/pandas/tests/reshape/test_merge_ordered.py @@ -57,7 +57,7 @@ def test_multigroup(self): assert_frame_equal(result, result2.loc[:, result.columns]) result = merge_ordered(left, self.right, on='key', left_by='group') - self.assertTrue(result['group'].notnull().all()) + assert result['group'].notnull().all() def test_merge_type(self): class NotADataFrame(DataFrame): diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 416e729944d39..3b3b4fe247b72 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -514,7 +514,7 @@ def test_pivot_columns_lexsorted(self): columns=['Index', 'Symbol', 'Year'], aggfunc='mean') - self.assertTrue(pivoted.columns.is_monotonic) + assert pivoted.columns.is_monotonic def test_pivot_complex_aggfunc(self): f = OrderedDict([('D', ['std']), ('E', ['sum'])]) @@ -1491,10 +1491,10 @@ def test_period_weekly(self): def test_isleapyear_deprecate(self): with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - self.assertTrue(isleapyear(2000)) + assert isleapyear(2000) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): assert not isleapyear(2001) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - self.assertTrue(isleapyear(2004)) + assert isleapyear(2004) diff --git a/pandas/tests/reshape/test_tile.py b/pandas/tests/reshape/test_tile.py index 1cc5c5f229bce..923615c93d98b 100644 --- a/pandas/tests/reshape/test_tile.py +++ b/pandas/tests/reshape/test_tile.py @@ -171,9 +171,9 @@ def test_qcut(self): labels, bins = qcut(arr, 4, retbins=True) ex_bins = quantile(arr, [0, .25, .5, .75, 1.]) result = labels.categories.left.values - self.assertTrue(np.allclose(result, ex_bins[:-1], atol=1e-2)) + assert np.allclose(result, ex_bins[:-1], atol=1e-2) result = labels.categories.right.values - self.assertTrue(np.allclose(result, ex_bins[1:], atol=1e-2)) + assert np.allclose(result, ex_bins[1:], atol=1e-2) ex_levels = cut(arr, ex_bins, include_lowest=True) tm.assert_categorical_equal(labels, ex_levels) @@ -236,7 +236,7 @@ def test_qcut_nas(self): arr[:20] = np.nan result = qcut(arr, 4) - self.assertTrue(isnull(result[:20]).all()) + assert isnull(result[:20]).all() def test_qcut_index(self): result = qcut([0, 2], 2) @@ -274,16 +274,16 @@ def test_qcut_binning_issues(self): for lev in np.unique(result): s = lev.left e = lev.right - self.assertTrue(s != e) + assert s != e starts.append(float(s)) ends.append(float(e)) for (sp, sn), (ep, en) in zip(zip(starts[:-1], starts[1:]), zip(ends[:-1], ends[1:])): - self.assertTrue(sp < sn) - self.assertTrue(ep < en) - self.assertTrue(ep <= sn) + assert sp < sn + assert ep < en + assert ep <= sn def test_cut_return_intervals(self): s = Series([0, 1, 2, 3, 4, 5, 6, 7, 8]) diff --git a/pandas/tests/scalar/test_interval.py b/pandas/tests/scalar/test_interval.py index 526a2916e2924..d77deabee58d4 100644 --- a/pandas/tests/scalar/test_interval.py +++ b/pandas/tests/scalar/test_interval.py @@ -49,12 +49,12 @@ def test_comparison(self): with tm.assert_raises_regex(TypeError, 'unorderable types'): Interval(0, 1) < 2 - self.assertTrue(Interval(0, 1) < Interval(1, 2)) - self.assertTrue(Interval(0, 1) < Interval(0, 2)) - self.assertTrue(Interval(0, 1) < Interval(0.5, 1.5)) - self.assertTrue(Interval(0, 1) <= Interval(0, 1)) - self.assertTrue(Interval(0, 1) > Interval(-1, 2)) - self.assertTrue(Interval(0, 1) >= Interval(0, 1)) + assert Interval(0, 1) < Interval(1, 2) + assert Interval(0, 1) < Interval(0, 2) + assert Interval(0, 1) < Interval(0.5, 1.5) + assert Interval(0, 1) <= Interval(0, 1) + assert Interval(0, 1) > Interval(-1, 2) + assert Interval(0, 1) >= Interval(0, 1) def test_hash(self): # should not raise diff --git a/pandas/tests/scalar/test_period.py b/pandas/tests/scalar/test_period.py index c8f3833c2c964..fc0921451c133 100644 --- a/pandas/tests/scalar/test_period.py +++ b/pandas/tests/scalar/test_period.py @@ -21,14 +21,14 @@ def test_is_leap_year(self): # GH 13727 for freq in ['A', 'M', 'D', 'H']: p = Period('2000-01-01 00:00:00', freq=freq) - self.assertTrue(p.is_leap_year) + assert p.is_leap_year assert isinstance(p.is_leap_year, bool) p = Period('1999-01-01 00:00:00', freq=freq) assert not p.is_leap_year p = Period('2004-01-01 00:00:00', freq=freq) - self.assertTrue(p.is_leap_year) + assert p.is_leap_year p = Period('2100-01-01 00:00:00', freq=freq) assert not p.is_leap_year @@ -946,7 +946,7 @@ def test_notEqual(self): self.assertNotEqual(self.january1, self.february) def test_greater(self): - self.assertTrue(self.february > self.january1) + assert self.february > self.january1 def test_greater_Raises_Value(self): with pytest.raises(period.IncompatibleFrequency): @@ -957,7 +957,7 @@ def test_greater_Raises_Type(self): self.january1 > 1 def test_greaterEqual(self): - self.assertTrue(self.january1 >= self.january2) + assert self.january1 >= self.january2 def test_greaterEqual_Raises_Value(self): with pytest.raises(period.IncompatibleFrequency): @@ -967,7 +967,7 @@ def test_greaterEqual_Raises_Value(self): print(self.january1 >= 1) def test_smallerEqual(self): - self.assertTrue(self.january1 <= self.january2) + assert self.january1 <= self.january2 def test_smallerEqual_Raises_Value(self): with pytest.raises(period.IncompatibleFrequency): @@ -978,7 +978,7 @@ def test_smallerEqual_Raises_Type(self): self.january1 <= 1 def test_smaller(self): - self.assertTrue(self.january1 < self.february) + assert self.january1 < self.february def test_smaller_Raises_Value(self): with pytest.raises(period.IncompatibleFrequency): diff --git a/pandas/tests/scalar/test_period_asfreq.py b/pandas/tests/scalar/test_period_asfreq.py index 84793658a6537..d31eeda5c8e3c 100644 --- a/pandas/tests/scalar/test_period_asfreq.py +++ b/pandas/tests/scalar/test_period_asfreq.py @@ -718,4 +718,4 @@ def test_asfreq_MS(self): with tm.assert_raises_regex(ValueError, msg): pd.Period('2013-01', 'MS') - self.assertTrue(_period_code_map.get("MS") is None) + assert _period_code_map.get("MS") is None diff --git a/pandas/tests/scalar/test_timedelta.py b/pandas/tests/scalar/test_timedelta.py index 788c204ca3eb3..9efd180afc2da 100644 --- a/pandas/tests/scalar/test_timedelta.py +++ b/pandas/tests/scalar/test_timedelta.py @@ -55,11 +55,9 @@ def test_construction(self): # rounding cases self.assertEqual(Timedelta(82739999850000).value, 82739999850000) - self.assertTrue('0 days 22:58:59.999850' in str(Timedelta( - 82739999850000))) + assert ('0 days 22:58:59.999850' in str(Timedelta(82739999850000))) self.assertEqual(Timedelta(123072001000000).value, 123072001000000) - self.assertTrue('1 days 10:11:12.001' in str(Timedelta( - 123072001000000))) + assert ('1 days 10:11:12.001' in str(Timedelta(123072001000000))) # string conversion with/without leading zero # GH 9570 @@ -184,7 +182,7 @@ def test_total_seconds_scalar(self): tm.assert_almost_equal(rng.total_seconds(), expt) rng = Timedelta(np.nan) - self.assertTrue(np.isnan(rng.total_seconds())) + assert np.isnan(rng.total_seconds()) def test_repr(self): @@ -202,20 +200,20 @@ def test_conversion(self): for td in [Timedelta(10, unit='d'), Timedelta('1 days, 10:11:12.012345')]: pydt = td.to_pytimedelta() - self.assertTrue(td == Timedelta(pydt)) + assert td == Timedelta(pydt) self.assertEqual(td, pydt) - self.assertTrue(isinstance(pydt, timedelta) and not isinstance( + assert (isinstance(pydt, timedelta) and not isinstance( pydt, Timedelta)) self.assertEqual(td, np.timedelta64(td.value, 'ns')) td64 = td.to_timedelta64() self.assertEqual(td64, np.timedelta64(td.value, 'ns')) self.assertEqual(td, td64) - self.assertTrue(isinstance(td64, np.timedelta64)) + assert isinstance(td64, np.timedelta64) # this is NOT equal and cannot be roundtriped (because of the nanos) td = Timedelta('1 days, 10:11:12.012345678') - self.assertTrue(td != td.to_pytimedelta()) + assert td != td.to_pytimedelta() def test_freq_conversion(self): @@ -240,7 +238,7 @@ def test_freq_conversion(self): def test_fields(self): def check(value): # that we are int/long like - self.assertTrue(isinstance(value, (int, compat.long))) + assert isinstance(value, (int, compat.long)) # compat to datetime.timedelta rng = to_timedelta('1 days, 10:11:12') @@ -261,7 +259,7 @@ def check(value): td = Timedelta('-1 days, 10:11:12') self.assertEqual(abs(td), Timedelta('13:48:48')) - self.assertTrue(str(td) == "-1 days +10:11:12") + assert str(td) == "-1 days +10:11:12" self.assertEqual(-td, Timedelta('0 days 13:48:48')) self.assertEqual(-Timedelta('-1 days, 10:11:12').value, 49728000000000) self.assertEqual(Timedelta('-1 days, 10:11:12').value, -49728000000000) @@ -455,13 +453,13 @@ def test_contains(self): td = to_timedelta([pd.NaT]) for v in [pd.NaT, None, float('nan'), np.nan]: - self.assertTrue((v in td)) + assert (v in td) def test_identity(self): td = Timedelta(10, unit='d') - self.assertTrue(isinstance(td, Timedelta)) - self.assertTrue(isinstance(td, timedelta)) + assert isinstance(td, Timedelta) + assert isinstance(td, timedelta) def test_short_format_converters(self): def conv(v): @@ -547,10 +545,9 @@ def test_overflow(self): expected = pd.Timedelta((pd.DatetimeIndex((s - s.min())).asi8 / len(s) ).sum()) - # the computation is converted to float so might be some loss of - # precision - self.assertTrue(np.allclose(result.value / 1000, expected.value / - 1000)) + # the computation is converted to float so + # might be some loss of precision + assert np.allclose(result.value / 1000, expected.value / 1000) # sum pytest.raises(ValueError, lambda: (s - s.min()).sum()) @@ -575,8 +572,7 @@ def test_timedelta_hash_equality(self): self.assertEqual(d[v], 2) tds = timedelta_range('1 second', periods=20) - self.assertTrue(all(hash(td) == hash(td.to_pytimedelta()) for td in - tds)) + assert all(hash(td) == hash(td.to_pytimedelta()) for td in tds) # python timedeltas drop ns resolution ns_td = Timedelta(1, 'ns') @@ -659,7 +655,7 @@ def test_components(self): result = s.dt.components assert not result.iloc[0].isnull().all() - self.assertTrue(result.iloc[1].isnull().all()) + assert result.iloc[1].isnull().all() def test_isoformat(self): td = Timedelta(days=6, minutes=50, seconds=3, @@ -708,4 +704,4 @@ def test_ops_error_str(self): l > r assert not l == r - self.assertTrue(l != r) + assert l != r diff --git a/pandas/tests/scalar/test_timestamp.py b/pandas/tests/scalar/test_timestamp.py index cfc4cf93e720c..72b1e4d450b84 100644 --- a/pandas/tests/scalar/test_timestamp.py +++ b/pandas/tests/scalar/test_timestamp.py @@ -438,7 +438,7 @@ def test_tz_localize_roundtrip(self): reset = localized.tz_localize(None) self.assertEqual(reset, ts) - self.assertTrue(reset.tzinfo is None) + assert reset.tzinfo is None def test_tz_convert_roundtrip(self): for tz in ['UTC', 'Asia/Tokyo', 'US/Eastern', 'dateutil/US/Pacific']: @@ -449,7 +449,7 @@ def test_tz_convert_roundtrip(self): reset = converted.tz_convert(None) self.assertEqual(reset, Timestamp(t)) - self.assertTrue(reset.tzinfo is None) + assert reset.tzinfo is None self.assertEqual(reset, converted.tz_convert('UTC').tz_localize(None)) @@ -487,11 +487,11 @@ def test_now(self): # Check that the delta between the times is less than 1s (arbitrarily # small) delta = Timedelta(seconds=1) - self.assertTrue(abs(ts_from_method - ts_from_string) < delta) - self.assertTrue(abs(ts_datetime - ts_from_method) < delta) - self.assertTrue(abs(ts_from_method_tz - ts_from_string_tz) < delta) - self.assertTrue(abs(ts_from_string_tz.tz_localize(None) - - ts_from_method_tz.tz_localize(None)) < delta) + assert abs(ts_from_method - ts_from_string) < delta + assert abs(ts_datetime - ts_from_method) < delta + assert abs(ts_from_method_tz - ts_from_string_tz) < delta + assert (abs(ts_from_string_tz.tz_localize(None) - + ts_from_method_tz.tz_localize(None)) < delta) def test_today(self): @@ -505,11 +505,11 @@ def test_today(self): # Check that the delta between the times is less than 1s (arbitrarily # small) delta = Timedelta(seconds=1) - self.assertTrue(abs(ts_from_method - ts_from_string) < delta) - self.assertTrue(abs(ts_datetime - ts_from_method) < delta) - self.assertTrue(abs(ts_from_method_tz - ts_from_string_tz) < delta) - self.assertTrue(abs(ts_from_string_tz.tz_localize(None) - - ts_from_method_tz.tz_localize(None)) < delta) + assert abs(ts_from_method - ts_from_string) < delta + assert abs(ts_datetime - ts_from_method) < delta + assert abs(ts_from_method_tz - ts_from_string_tz) < delta + assert (abs(ts_from_string_tz.tz_localize(None) - + ts_from_method_tz.tz_localize(None)) < delta) def test_asm8(self): np.random.seed(7960929) @@ -523,7 +523,7 @@ def test_asm8(self): def test_fields(self): def check(value, equal): # that we are int/long like - self.assertTrue(isinstance(value, (int, compat.long))) + assert isinstance(value, (int, compat.long)) self.assertEqual(value, equal) # GH 10050 @@ -564,11 +564,11 @@ def check(value, equal): ts = Timestamp('2014-01-01 00:00:00+01:00') starts = ['is_month_start', 'is_quarter_start', 'is_year_start'] for start in starts: - self.assertTrue(getattr(ts, start)) + assert getattr(ts, start) ts = Timestamp('2014-12-31 23:59:59+01:00') ends = ['is_month_end', 'is_year_end', 'is_quarter_end'] for end in ends: - self.assertTrue(getattr(ts, end)) + assert getattr(ts, end) def test_pprint(self): # GH12622 @@ -864,26 +864,26 @@ def test_comparison(self): self.assertEqual(val, val) assert not val != val assert not val < val - self.assertTrue(val <= val) + assert val <= val assert not val > val - self.assertTrue(val >= val) + assert val >= val other = datetime(2012, 5, 18) self.assertEqual(val, other) assert not val != other assert not val < other - self.assertTrue(val <= other) + assert val <= other assert not val > other - self.assertTrue(val >= other) + assert val >= other other = Timestamp(stamp + 100) self.assertNotEqual(val, other) self.assertNotEqual(val, other) - self.assertTrue(val < other) - self.assertTrue(val <= other) - self.assertTrue(other > val) - self.assertTrue(other >= val) + assert val < other + assert val <= other + assert other > val + assert other >= val def test_compare_invalid(self): @@ -898,14 +898,14 @@ def test_compare_invalid(self): assert not val == np.float64(1) assert not val == np.int64(1) - self.assertTrue(val != 'foo') - self.assertTrue(val != 10.0) - self.assertTrue(val != 1) - self.assertTrue(val != long(1)) - self.assertTrue(val != []) - self.assertTrue(val != {'foo': 1}) - self.assertTrue(val != np.float64(1)) - self.assertTrue(val != np.int64(1)) + assert val != 'foo' + assert val != 10.0 + assert val != 1 + assert val != long(1) + assert val != [] + assert val != {'foo': 1} + assert val != np.float64(1) + assert val != np.int64(1) # ops testing df = DataFrame(np.random.randn(5, 2)) @@ -1086,14 +1086,14 @@ def test_is_leap_year(self): # GH 13727 for tz in [None, 'UTC', 'US/Eastern', 'Asia/Tokyo']: dt = Timestamp('2000-01-01 00:00:00', tz=tz) - self.assertTrue(dt.is_leap_year) + assert dt.is_leap_year assert isinstance(dt.is_leap_year, bool) dt = Timestamp('1999-01-01 00:00:00', tz=tz) assert not dt.is_leap_year dt = Timestamp('2004-01-01 00:00:00', tz=tz) - self.assertTrue(dt.is_leap_year) + assert dt.is_leap_year dt = Timestamp('2100-01-01 00:00:00', tz=tz) assert not dt.is_leap_year @@ -1389,10 +1389,10 @@ def test_timestamp_compare_with_early_datetime(self): self.assertNotEqual(stamp, datetime.min) self.assertNotEqual(stamp, datetime(1600, 1, 1)) self.assertNotEqual(stamp, datetime(2700, 1, 1)) - self.assertTrue(stamp > datetime(1600, 1, 1)) - self.assertTrue(stamp >= datetime(1600, 1, 1)) - self.assertTrue(stamp < datetime(2700, 1, 1)) - self.assertTrue(stamp <= datetime(2700, 1, 1)) + assert stamp > datetime(1600, 1, 1) + assert stamp >= datetime(1600, 1, 1) + assert stamp < datetime(2700, 1, 1) + assert stamp <= datetime(2700, 1, 1) def test_timestamp_equality(self): @@ -1498,7 +1498,7 @@ def test_woy_boundary(self): result = np.array([Timestamp(datetime(*args)).week for args in [(2000, 1, 1), (2000, 1, 2), ( 2005, 1, 1), (2005, 1, 2)]]) - self.assertTrue((result == [52, 52, 53, 53]).all()) + assert (result == [52, 52, 53, 53]).all() class TestTsUtil(tm.TestCase): diff --git a/pandas/tests/series/test_alter_axes.py b/pandas/tests/series/test_alter_axes.py index 17a270c3a9346..e0964fea95cc9 100644 --- a/pandas/tests/series/test_alter_axes.py +++ b/pandas/tests/series/test_alter_axes.py @@ -70,7 +70,7 @@ def test_rename_set_name(self): result = s.rename(name) self.assertEqual(result.name, name) tm.assert_numpy_array_equal(result.index.values, s.index.values) - self.assertTrue(s.name is None) + assert s.name is None def test_rename_set_name_inplace(self): s = Series(range(3), index=list('abc')) @@ -94,8 +94,8 @@ def test_set_name(self): s = Series([1, 2, 3]) s2 = s._set_name('foo') self.assertEqual(s2.name, 'foo') - self.assertTrue(s.name is None) - self.assertTrue(s is not s2) + assert s.name is None + assert s is not s2 def test_rename_inplace(self): renamer = lambda x: x.strftime('%Y%m%d') @@ -109,7 +109,7 @@ def test_set_index_makes_timeseries(self): s = Series(lrange(10)) s.index = idx - self.assertTrue(s.index.is_all_dates) + assert s.index.is_all_dates def test_reset_index(self): df = tm.makeDataFrame()[:5] diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index f5bccdd55e944..233d71cb1d8a5 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -35,14 +35,14 @@ def test_sum_zero(self): self.assertEqual(nanops.nansum(arr), 0) arr = np.empty((10, 0)) - self.assertTrue((nanops.nansum(arr, axis=1) == 0).all()) + assert (nanops.nansum(arr, axis=1) == 0).all() # GH #844 s = Series([], index=[]) self.assertEqual(s.sum(), 0) df = DataFrame(np.empty((10, 0))) - self.assertTrue((df.sum(1) == 0).all()) + assert (df.sum(1) == 0).all() def test_nansum_buglet(self): s = Series([1.0, np.nan], index=[0, 1]) @@ -80,17 +80,17 @@ def test_overflow(self): result = s.sum(skipna=False) self.assertEqual(result, v.sum(dtype=dtype)) result = s.min(skipna=False) - self.assertTrue(np.allclose(float(result), 0.0)) + assert np.allclose(float(result), 0.0) result = s.max(skipna=False) - self.assertTrue(np.allclose(float(result), v[-1])) + assert np.allclose(float(result), v[-1]) # use bottleneck if available result = s.sum() self.assertEqual(result, v.sum(dtype=dtype)) result = s.min() - self.assertTrue(np.allclose(float(result), 0.0)) + assert np.allclose(float(result), 0.0) result = s.max() - self.assertTrue(np.allclose(float(result), v[-1])) + assert np.allclose(float(result), v[-1]) def test_sum(self): self._check_stat_op('sum', np.sum, check_allna=True) @@ -104,7 +104,7 @@ def test_sum_inf(self): s[5:8] = np.inf s2[5:8] = np.nan - self.assertTrue(np.isinf(s.sum())) + assert np.isinf(s.sum()) arr = np.random.randn(100, 100).astype('f4') arr[:, 2] = np.inf @@ -113,7 +113,7 @@ def test_sum_inf(self): assert_almost_equal(s.sum(), s2.sum()) res = nanops.nansum(arr, axis=1) - self.assertTrue(np.isinf(res).all()) + assert np.isinf(res).all() def test_mean(self): self._check_stat_op('mean', np.mean) @@ -248,10 +248,10 @@ def test_var_std(self): # 1 - element series with ddof=1 s = self.ts.iloc[[0]] result = s.var(ddof=1) - self.assertTrue(isnull(result)) + assert isnull(result) result = s.std(ddof=1) - self.assertTrue(isnull(result)) + assert isnull(result) def test_sem(self): alt = lambda x: np.std(x, ddof=1) / np.sqrt(len(x)) @@ -265,7 +265,7 @@ def test_sem(self): # 1 - element series with ddof=1 s = self.ts.iloc[[0]] result = s.sem(ddof=1) - self.assertTrue(isnull(result)) + assert isnull(result) def test_skew(self): tm._skip_if_no_scipy() @@ -281,11 +281,11 @@ def test_skew(self): s = Series(np.ones(i)) df = DataFrame(np.ones((i, i))) if i < min_N: - self.assertTrue(np.isnan(s.skew())) - self.assertTrue(np.isnan(df.skew()).all()) + assert np.isnan(s.skew()) + assert np.isnan(df.skew()).all() else: self.assertEqual(0, s.skew()) - self.assertTrue((df.skew() == 0).all()) + assert (df.skew() == 0).all() def test_kurt(self): tm._skip_if_no_scipy() @@ -307,11 +307,11 @@ def test_kurt(self): s = Series(np.ones(i)) df = DataFrame(np.ones((i, i))) if i < min_N: - self.assertTrue(np.isnan(s.kurt())) - self.assertTrue(np.isnan(df.kurt()).all()) + assert np.isnan(s.kurt()) + assert np.isnan(df.kurt()).all() else: self.assertEqual(0, s.kurt()) - self.assertTrue((df.kurt() == 0).all()) + assert (df.kurt() == 0).all() def test_describe(self): s = Series([0, 1, 2, 3, 4], name='int_data') @@ -337,14 +337,14 @@ def test_describe(self): def test_argsort(self): self._check_accum_op('argsort', check_dtype=False) argsorted = self.ts.argsort() - self.assertTrue(issubclass(argsorted.dtype.type, np.integer)) + assert issubclass(argsorted.dtype.type, np.integer) # GH 2967 (introduced bug in 0.11-dev I think) s = Series([Timestamp('201301%02d' % (i + 1)) for i in range(5)]) self.assertEqual(s.dtype, 'datetime64[ns]') shifted = s.shift(-1) self.assertEqual(shifted.dtype, 'datetime64[ns]') - self.assertTrue(isnull(shifted[4])) + assert isnull(shifted[4]) result = s.argsort() expected = Series(lrange(5), dtype='int64') @@ -503,8 +503,8 @@ def testit(): pytest.raises(TypeError, f, ds) # skipna or no - self.assertTrue(notnull(f(self.series))) - self.assertTrue(isnull(f(self.series, skipna=False))) + assert notnull(f(self.series)) + assert isnull(f(self.series, skipna=False)) # check the result is correct nona = self.series.dropna() @@ -517,12 +517,12 @@ def testit(): # xref 9422 # bottleneck >= 1.0 give 0.0 for an allna Series sum try: - self.assertTrue(nanops._USE_BOTTLENECK) + assert nanops._USE_BOTTLENECK import bottleneck as bn # noqa - self.assertTrue(bn.__version__ >= LooseVersion('1.0')) + assert bn.__version__ >= LooseVersion('1.0') self.assertEqual(f(allna), 0.0) except: - self.assertTrue(np.isnan(f(allna))) + assert np.isnan(f(allna)) # dtype=object with None, it works! s = Series([1, 2, 3, None, 5]) @@ -647,7 +647,7 @@ def test_all_any(self): ts = tm.makeTimeSeries() bool_series = ts > 0 assert not bool_series.all() - self.assertTrue(bool_series.any()) + assert bool_series.any() # Alternative types, with implicit 'object' dtype. s = Series(['abc', True]) @@ -657,9 +657,9 @@ def test_all_any_params(self): # Check skipna, with implicit 'object' dtype. s1 = Series([np.nan, True]) s2 = Series([np.nan, False]) - self.assertTrue(s1.all(skipna=False)) # nan && True => True - self.assertTrue(s1.all(skipna=True)) - self.assertTrue(np.isnan(s2.any(skipna=False))) # nan || False => nan + assert s1.all(skipna=False) # nan && True => True + assert s1.all(skipna=True) + assert np.isnan(s2.any(skipna=False)) # nan || False => nan assert not s2.any(skipna=True) # Check level. @@ -722,20 +722,20 @@ def test_ops_consistency_on_empty(self): self.assertEqual(result, 0) result = Series(dtype=float).mean() - self.assertTrue(isnull(result)) + assert isnull(result) result = Series(dtype=float).median() - self.assertTrue(isnull(result)) + assert isnull(result) # timedelta64[ns] result = Series(dtype='m8[ns]').sum() self.assertEqual(result, Timedelta(0)) result = Series(dtype='m8[ns]').mean() - self.assertTrue(result is pd.NaT) + assert result is pd.NaT result = Series(dtype='m8[ns]').median() - self.assertTrue(result is pd.NaT) + assert result is pd.NaT def test_corr(self): tm._skip_if_no_scipy() @@ -748,19 +748,19 @@ def test_corr(self): # partial overlap self.assertAlmostEqual(self.ts[:15].corr(self.ts[5:]), 1) - self.assertTrue(isnull(self.ts[:15].corr(self.ts[5:], min_periods=12))) + assert isnull(self.ts[:15].corr(self.ts[5:], min_periods=12)) ts1 = self.ts[:15].reindex(self.ts.index) ts2 = self.ts[5:].reindex(self.ts.index) - self.assertTrue(isnull(ts1.corr(ts2, min_periods=12))) + assert isnull(ts1.corr(ts2, min_periods=12)) # No overlap - self.assertTrue(np.isnan(self.ts[::2].corr(self.ts[1::2]))) + assert np.isnan(self.ts[::2].corr(self.ts[1::2])) # all NA cp = self.ts[:10].copy() cp[:] = np.nan - self.assertTrue(isnull(cp.corr(cp))) + assert isnull(cp.corr(cp)) A = tm.makeTimeSeries() B = tm.makeTimeSeries() @@ -812,19 +812,19 @@ def test_cov(self): self.ts[5:15].std() ** 2) # No overlap - self.assertTrue(np.isnan(self.ts[::2].cov(self.ts[1::2]))) + assert np.isnan(self.ts[::2].cov(self.ts[1::2])) # all NA cp = self.ts[:10].copy() cp[:] = np.nan - self.assertTrue(isnull(cp.cov(cp))) + assert isnull(cp.cov(cp)) # min_periods - self.assertTrue(isnull(self.ts[:15].cov(self.ts[5:], min_periods=12))) + assert isnull(self.ts[:15].cov(self.ts[5:], min_periods=12)) ts1 = self.ts[:15].reindex(self.ts.index) ts2 = self.ts[5:].reindex(self.ts.index) - self.assertTrue(isnull(ts1.cov(ts2, min_periods=12))) + assert isnull(ts1.cov(ts2, min_periods=12)) def test_count(self): self.assertEqual(self.ts.count(), len(self.ts)) @@ -859,7 +859,7 @@ def test_dot(self): # Check ndarray argument result = a.dot(b.values) - self.assertTrue(np.all(result == expected.values)) + assert np.all(result == expected.values) assert_almost_equal(a.dot(b['2'].values), expected['2']) # Check series argument @@ -1154,7 +1154,7 @@ def test_idxmin(self): # skipna or no self.assertEqual(self.series[self.series.idxmin()], self.series.min()) - self.assertTrue(isnull(self.series.idxmin(skipna=False))) + assert isnull(self.series.idxmin(skipna=False)) # no NaNs nona = self.series.dropna() @@ -1164,7 +1164,7 @@ def test_idxmin(self): # all NaNs allna = self.series * nan - self.assertTrue(isnull(allna.idxmin())) + assert isnull(allna.idxmin()) # datetime64[ns] from pandas import date_range @@ -1196,7 +1196,7 @@ def test_idxmax(self): # skipna or no self.assertEqual(self.series[self.series.idxmax()], self.series.max()) - self.assertTrue(isnull(self.series.idxmax(skipna=False))) + assert isnull(self.series.idxmax(skipna=False)) # no NaNs nona = self.series.dropna() @@ -1206,7 +1206,7 @@ def test_idxmax(self): # all NaNs allna = self.series * nan - self.assertTrue(isnull(allna.idxmax())) + assert isnull(allna.idxmax()) from pandas import date_range s = Series(date_range('20130102', periods=6)) @@ -1252,7 +1252,7 @@ def test_ptp(self): # GH11163 s = Series([3, 5, np.nan, -3, 10]) self.assertEqual(s.ptp(), 13) - self.assertTrue(pd.isnull(s.ptp(skipna=False))) + assert pd.isnull(s.ptp(skipna=False)) mi = pd.MultiIndex.from_product([['a', 'b'], [1, 2, 3]]) s = pd.Series([1, np.nan, 7, 3, 5, np.nan], index=mi) @@ -1364,24 +1364,24 @@ def test_is_unique(self): s = Series(np.random.randint(0, 10, size=1000)) assert not s.is_unique s = Series(np.arange(1000)) - self.assertTrue(s.is_unique) + assert s.is_unique def test_is_monotonic(self): s = Series(np.random.randint(0, 10, size=1000)) assert not s.is_monotonic s = Series(np.arange(1000)) - self.assertTrue(s.is_monotonic) - self.assertTrue(s.is_monotonic_increasing) + assert s.is_monotonic + assert s.is_monotonic_increasing s = Series(np.arange(1000, 0, -1)) - self.assertTrue(s.is_monotonic_decreasing) + assert s.is_monotonic_decreasing s = Series(pd.date_range('20130101', periods=10)) - self.assertTrue(s.is_monotonic) - self.assertTrue(s.is_monotonic_increasing) + assert s.is_monotonic + assert s.is_monotonic_increasing s = Series(list(reversed(s.tolist()))) assert not s.is_monotonic - self.assertTrue(s.is_monotonic_decreasing) + assert s.is_monotonic_decreasing def test_sort_index_level(self): mi = MultiIndex.from_tuples([[1, 1, 3], [1, 1, 1]], names=list('ABC')) @@ -1433,13 +1433,13 @@ def test_shift_categorical(self): sp1 = s.shift(1) assert_index_equal(s.index, sp1.index) - self.assertTrue(np.all(sp1.values.codes[:1] == -1)) - self.assertTrue(np.all(s.values.codes[:-1] == sp1.values.codes[1:])) + assert np.all(sp1.values.codes[:1] == -1) + assert np.all(s.values.codes[:-1] == sp1.values.codes[1:]) sn2 = s.shift(-2) assert_index_equal(s.index, sn2.index) - self.assertTrue(np.all(sn2.values.codes[-2:] == -1)) - self.assertTrue(np.all(s.values.codes[2:] == sn2.values.codes[:-2])) + assert np.all(sn2.values.codes[-2:] == -1) + assert np.all(s.values.codes[2:] == sn2.values.codes[:-2]) assert_index_equal(s.values.categories, sp1.values.categories) assert_index_equal(s.values.categories, sn2.values.categories) @@ -1452,7 +1452,7 @@ def test_reshape_non_2d(self): # see gh-4554 with tm.assert_produces_warning(FutureWarning): x = Series(np.random.random(201), name='x') - self.assertTrue(x.reshape(x.shape, ) is x) + assert x.reshape(x.shape, ) is x # see gh-2719 with tm.assert_produces_warning(FutureWarning): diff --git a/pandas/tests/series/test_api.py b/pandas/tests/series/test_api.py index 5b7ac9bc2b33c..7d331f0643b18 100644 --- a/pandas/tests/series/test_api.py +++ b/pandas/tests/series/test_api.py @@ -124,28 +124,28 @@ def test_tab_completion(self): # GH 9910 s = Series(list('abcd')) # Series of str values should have .str but not .dt/.cat in __dir__ - self.assertTrue('str' in dir(s)) - self.assertTrue('dt' not in dir(s)) - self.assertTrue('cat' not in dir(s)) + assert 'str' in dir(s) + assert 'dt' not in dir(s) + assert 'cat' not in dir(s) # similiarly for .dt s = Series(date_range('1/1/2015', periods=5)) - self.assertTrue('dt' in dir(s)) - self.assertTrue('str' not in dir(s)) - self.assertTrue('cat' not in dir(s)) + assert 'dt' in dir(s) + assert 'str' not in dir(s) + assert 'cat' not in dir(s) - # similiarly for .cat, but with the twist that str and dt should be - # there if the categories are of that type first cat and str + # Similarly for .cat, but with the twist that str and dt should be + # there if the categories are of that type first cat and str. s = Series(list('abbcd'), dtype="category") - self.assertTrue('cat' in dir(s)) - self.assertTrue('str' in dir(s)) # as it is a string categorical - self.assertTrue('dt' not in dir(s)) + assert 'cat' in dir(s) + assert 'str' in dir(s) # as it is a string categorical + assert 'dt' not in dir(s) # similar to cat and str s = Series(date_range('1/1/2015', periods=5)).astype("category") - self.assertTrue('cat' in dir(s)) - self.assertTrue('str' not in dir(s)) - self.assertTrue('dt' in dir(s)) # as it is a datetime categorical + assert 'cat' in dir(s) + assert 'str' not in dir(s) + assert 'dt' in dir(s) # as it is a datetime categorical def test_not_hashable(self): s_empty = Series() @@ -238,12 +238,12 @@ def test_copy(self): if deep is None or deep is True: # Did not modify original Series - self.assertTrue(np.isnan(s2[0])) + assert np.isnan(s2[0]) assert not np.isnan(s[0]) else: # we DID modify the original Series - self.assertTrue(np.isnan(s2[0])) - self.assertTrue(np.isnan(s[0])) + assert np.isnan(s2[0]) + assert np.isnan(s[0]) # GH 11794 # copy of tz-aware diff --git a/pandas/tests/series/test_apply.py b/pandas/tests/series/test_apply.py index afe46e5dcf480..c764d7b856bb8 100644 --- a/pandas/tests/series/test_apply.py +++ b/pandas/tests/series/test_apply.py @@ -373,17 +373,17 @@ def test_map_int(self): right = Series({1: 11, 2: 22, 3: 33}) self.assertEqual(left.dtype, np.float_) - self.assertTrue(issubclass(right.dtype.type, np.integer)) + assert issubclass(right.dtype.type, np.integer) merged = left.map(right) self.assertEqual(merged.dtype, np.float_) - self.assertTrue(isnull(merged['d'])) - self.assertTrue(not isnull(merged['c'])) + assert isnull(merged['d']) + assert not isnull(merged['c']) def test_map_type_inference(self): s = Series(lrange(3)) s2 = s.map(lambda x: np.where(x == 0, 0, 1)) - self.assertTrue(issubclass(s2.dtype.type, np.integer)) + assert issubclass(s2.dtype.type, np.integer) def test_map_decimal(self): from decimal import Decimal diff --git a/pandas/tests/series/test_asof.py b/pandas/tests/series/test_asof.py index 137390b6427eb..80556a5e5ffdb 100644 --- a/pandas/tests/series/test_asof.py +++ b/pandas/tests/series/test_asof.py @@ -23,18 +23,18 @@ def test_basic(self): dates = date_range('1/1/1990', periods=N * 3, freq='25s') result = ts.asof(dates) - self.assertTrue(notnull(result).all()) + assert notnull(result).all() lb = ts.index[14] ub = ts.index[30] result = ts.asof(list(dates)) - self.assertTrue(notnull(result).all()) + assert notnull(result).all() lb = ts.index[14] ub = ts.index[30] mask = (result.index >= lb) & (result.index < ub) rs = result[mask] - self.assertTrue((rs == ts[lb]).all()) + assert (rs == ts[lb]).all() val = result[result.index[result.index >= ub][0]] self.assertEqual(ts[ub], val) @@ -63,7 +63,7 @@ def test_scalar(self): # no as of value d = ts.index[0] - offsets.BDay() - self.assertTrue(np.isnan(ts.asof(d))) + assert np.isnan(ts.asof(d)) def test_with_nan(self): # basic asof test @@ -98,19 +98,19 @@ def test_periodindex(self): dates = date_range('1/1/1990', periods=N * 3, freq='37min') result = ts.asof(dates) - self.assertTrue(notnull(result).all()) + assert notnull(result).all() lb = ts.index[14] ub = ts.index[30] result = ts.asof(list(dates)) - self.assertTrue(notnull(result).all()) + assert notnull(result).all() lb = ts.index[14] ub = ts.index[30] pix = PeriodIndex(result.index.values, freq='H') mask = (pix >= lb) & (pix < ub) rs = result[mask] - self.assertTrue((rs == ts[lb]).all()) + assert (rs == ts[lb]).all() ts[5:10] = np.nan ts[15:20] = np.nan @@ -130,7 +130,7 @@ def test_periodindex(self): # no as of value d = ts.index[0].to_timestamp() - offsets.BDay() - self.assertTrue(isnull(ts.asof(d))) + assert isnull(ts.asof(d)) def test_errors(self): diff --git a/pandas/tests/series/test_combine_concat.py b/pandas/tests/series/test_combine_concat.py index b4615e5420a81..6042a8c0a2e9d 100644 --- a/pandas/tests/series/test_combine_concat.py +++ b/pandas/tests/series/test_combine_concat.py @@ -74,7 +74,7 @@ def test_combine_first(self): # Holes filled from input combined = series_copy.combine_first(series) - self.assertTrue(np.isfinite(combined).all()) + assert np.isfinite(combined).all() tm.assert_series_equal(combined[::2], series[::2]) tm.assert_series_equal(combined[1::2], series_copy[1::2]) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index b08653b0001ca..966861fe3c1e4 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -50,13 +50,13 @@ def test_scalar_conversion(self): assert long(Series([1.])) == 1 def test_constructor(self): - self.assertTrue(self.ts.index.is_all_dates) + assert self.ts.index.is_all_dates # Pass in Series derived = Series(self.ts) - self.assertTrue(derived.index.is_all_dates) + assert derived.index.is_all_dates - self.assertTrue(tm.equalContents(derived.index, self.ts.index)) + assert tm.equalContents(derived.index, self.ts.index) # Ensure new index is not created self.assertEqual(id(self.ts.index), id(derived.index)) @@ -152,11 +152,11 @@ def test_constructor_categorical(self): ValueError, lambda: Series(pd.Categorical([1, 2, 3]), dtype='int64')) cat = Series(pd.Categorical([1, 2, 3]), dtype='category') - self.assertTrue(is_categorical_dtype(cat)) - self.assertTrue(is_categorical_dtype(cat.dtype)) + assert is_categorical_dtype(cat) + assert is_categorical_dtype(cat.dtype) s = Series([1, 2, 3], dtype='category') - self.assertTrue(is_categorical_dtype(s)) - self.assertTrue(is_categorical_dtype(s.dtype)) + assert is_categorical_dtype(s) + assert is_categorical_dtype(s.dtype) def test_constructor_maskedarray(self): data = ma.masked_all((3, ), dtype=float) @@ -320,7 +320,7 @@ def test_constructor_datelike_coercion(self): s = Series([Timestamp('20130101'), 'NOV'], dtype=object) self.assertEqual(s.iloc[0], Timestamp('20130101')) self.assertEqual(s.iloc[1], 'NOV') - self.assertTrue(s.dtype == object) + assert s.dtype == object # the dtype was being reset on the slicing and re-inferred to datetime # even thought the blocks are mixed @@ -334,9 +334,9 @@ def test_constructor_datelike_coercion(self): 'mat': mat}, index=belly) result = df.loc['3T19'] - self.assertTrue(result.dtype == object) + assert result.dtype == object result = df.loc['216'] - self.assertTrue(result.dtype == object) + assert result.dtype == object def test_constructor_datetimes_with_nulls(self): # gh-15869 @@ -349,7 +349,7 @@ def test_constructor_datetimes_with_nulls(self): def test_constructor_dtype_datetime64(self): s = Series(iNaT, dtype='M8[ns]', index=lrange(5)) - self.assertTrue(isnull(s).all()) + assert isnull(s).all() # in theory this should be all nulls, but since # we are not specifying a dtype is ambiguous @@ -357,14 +357,14 @@ def test_constructor_dtype_datetime64(self): assert not isnull(s).all() s = Series(nan, dtype='M8[ns]', index=lrange(5)) - self.assertTrue(isnull(s).all()) + assert isnull(s).all() s = Series([datetime(2001, 1, 2, 0, 0), iNaT], dtype='M8[ns]') - self.assertTrue(isnull(s[1])) + assert isnull(s[1]) self.assertEqual(s.dtype, 'M8[ns]') s = Series([datetime(2001, 1, 2, 0, 0), nan], dtype='M8[ns]') - self.assertTrue(isnull(s[1])) + assert isnull(s[1]) self.assertEqual(s.dtype, 'M8[ns]') # GH3416 @@ -441,29 +441,29 @@ def test_constructor_dtype_datetime64(self): # tz-aware (UTC and other tz's) # GH 8411 dr = date_range('20130101', periods=3) - self.assertTrue(Series(dr).iloc[0].tz is None) + assert Series(dr).iloc[0].tz is None dr = date_range('20130101', periods=3, tz='UTC') - self.assertTrue(str(Series(dr).iloc[0].tz) == 'UTC') + assert str(Series(dr).iloc[0].tz) == 'UTC' dr = date_range('20130101', periods=3, tz='US/Eastern') - self.assertTrue(str(Series(dr).iloc[0].tz) == 'US/Eastern') + assert str(Series(dr).iloc[0].tz) == 'US/Eastern' # non-convertible s = Series([1479596223000, -1479590, pd.NaT]) - self.assertTrue(s.dtype == 'object') - self.assertTrue(s[2] is pd.NaT) - self.assertTrue('NaT' in str(s)) + assert s.dtype == 'object' + assert s[2] is pd.NaT + assert 'NaT' in str(s) # if we passed a NaT it remains s = Series([datetime(2010, 1, 1), datetime(2, 1, 1), pd.NaT]) - self.assertTrue(s.dtype == 'object') - self.assertTrue(s[2] is pd.NaT) - self.assertTrue('NaT' in str(s)) + assert s.dtype == 'object' + assert s[2] is pd.NaT + assert 'NaT' in str(s) # if we passed a nan it remains s = Series([datetime(2010, 1, 1), datetime(2, 1, 1), np.nan]) - self.assertTrue(s.dtype == 'object') - self.assertTrue(s[2] is np.nan) - self.assertTrue('NaN' in str(s)) + assert s.dtype == 'object' + assert s[2] is np.nan + assert 'NaN' in str(s) def test_constructor_with_datetime_tz(self): @@ -472,15 +472,15 @@ def test_constructor_with_datetime_tz(self): dr = date_range('20130101', periods=3, tz='US/Eastern') s = Series(dr) - self.assertTrue(s.dtype.name == 'datetime64[ns, US/Eastern]') - self.assertTrue(s.dtype == 'datetime64[ns, US/Eastern]') - self.assertTrue(is_datetime64tz_dtype(s.dtype)) - self.assertTrue('datetime64[ns, US/Eastern]' in str(s)) + assert s.dtype.name == 'datetime64[ns, US/Eastern]' + assert s.dtype == 'datetime64[ns, US/Eastern]' + assert is_datetime64tz_dtype(s.dtype) + assert 'datetime64[ns, US/Eastern]' in str(s) # export result = s.values assert isinstance(result, np.ndarray) - self.assertTrue(result.dtype == 'datetime64[ns]') + assert result.dtype == 'datetime64[ns]' exp = pd.DatetimeIndex(result) exp = exp.tz_localize('UTC').tz_convert(tz=s.dt.tz) @@ -524,16 +524,16 @@ def test_constructor_with_datetime_tz(self): assert_series_equal(result, expected) # short str - self.assertTrue('datetime64[ns, US/Eastern]' in str(s)) + assert 'datetime64[ns, US/Eastern]' in str(s) # formatting with NaT result = s.shift() - self.assertTrue('datetime64[ns, US/Eastern]' in str(result)) - self.assertTrue('NaT' in str(result)) + assert 'datetime64[ns, US/Eastern]' in str(result) + assert 'NaT' in str(result) # long str t = Series(date_range('20130101', periods=1000, tz='US/Eastern')) - self.assertTrue('datetime64[ns, US/Eastern]' in str(t)) + assert 'datetime64[ns, US/Eastern]' in str(t) result = pd.DatetimeIndex(s, freq='infer') tm.assert_index_equal(result, dr) @@ -541,13 +541,13 @@ def test_constructor_with_datetime_tz(self): # inference s = Series([pd.Timestamp('2013-01-01 13:00:00-0800', tz='US/Pacific'), pd.Timestamp('2013-01-02 14:00:00-0800', tz='US/Pacific')]) - self.assertTrue(s.dtype == 'datetime64[ns, US/Pacific]') - self.assertTrue(lib.infer_dtype(s) == 'datetime64') + assert s.dtype == 'datetime64[ns, US/Pacific]' + assert lib.infer_dtype(s) == 'datetime64' s = Series([pd.Timestamp('2013-01-01 13:00:00-0800', tz='US/Pacific'), pd.Timestamp('2013-01-02 14:00:00-0800', tz='US/Eastern')]) - self.assertTrue(s.dtype == 'object') - self.assertTrue(lib.infer_dtype(s) == 'datetime') + assert s.dtype == 'object' + assert lib.infer_dtype(s) == 'datetime' # with all NaT s = Series(pd.NaT, index=[0, 1], dtype='datetime64[ns, US/Eastern]') @@ -676,7 +676,7 @@ def test_orderedDict_ctor(self): import random data = OrderedDict([('col%s' % i, random.random()) for i in range(12)]) s = pandas.Series(data) - self.assertTrue(all(s.values == list(data.values()))) + assert all(s.values == list(data.values())) def test_orderedDict_subclass_ctor(self): # GH3283 @@ -688,7 +688,7 @@ class A(OrderedDict): data = A([('col%s' % i, random.random()) for i in range(12)]) s = pandas.Series(data) - self.assertTrue(all(s.values == list(data.values()))) + assert all(s.values == list(data.values())) def test_constructor_list_of_tuples(self): data = [(1, 1), (2, 2), (2, 3)] @@ -710,7 +710,7 @@ def test_fromDict(self): data = {'a': 0, 'b': 1, 'c': 2, 'd': 3} series = Series(data) - self.assertTrue(tm.is_sorted(series.index)) + assert tm.is_sorted(series.index) data = {'a': 0, 'b': '1', 'c': '2', 'd': datetime.now()} series = Series(data) @@ -823,10 +823,10 @@ def test_NaT_scalar(self): series = Series([0, 1000, 2000, iNaT], dtype='M8[ns]') val = series[3] - self.assertTrue(isnull(val)) + assert isnull(val) series[2] = val - self.assertTrue(isnull(series[2])) + assert isnull(series[2]) def test_NaT_cast(self): # GH10747 diff --git a/pandas/tests/series/test_datetime_values.py b/pandas/tests/series/test_datetime_values.py index c56a5baac12af..13fa3bc782f89 100644 --- a/pandas/tests/series/test_datetime_values.py +++ b/pandas/tests/series/test_datetime_values.py @@ -71,7 +71,7 @@ def compare(s, name): result = s.dt.to_pydatetime() assert isinstance(result, np.ndarray) - self.assertTrue(result.dtype == object) + assert result.dtype == object result = s.dt.tz_localize('US/Eastern') exp_values = DatetimeIndex(s.values).tz_localize('US/Eastern') @@ -141,7 +141,7 @@ def compare(s, name): result = s.dt.to_pydatetime() assert isinstance(result, np.ndarray) - self.assertTrue(result.dtype == object) + assert result.dtype == object result = s.dt.tz_convert('CET') expected = Series(s._values.tz_convert('CET'), @@ -176,11 +176,11 @@ def compare(s, name): result = s.dt.to_pytimedelta() assert isinstance(result, np.ndarray) - self.assertTrue(result.dtype == object) + assert result.dtype == object result = s.dt.total_seconds() assert isinstance(result, pd.Series) - self.assertTrue(result.dtype == 'float64') + assert result.dtype == 'float64' freq_result = s.dt.freq self.assertEqual(freq_result, TimedeltaIndex(s.values, diff --git a/pandas/tests/series/test_indexing.py b/pandas/tests/series/test_indexing.py index 601262df89260..954e80facf848 100644 --- a/pandas/tests/series/test_indexing.py +++ b/pandas/tests/series/test_indexing.py @@ -123,7 +123,7 @@ def test_getitem_setitem_ellipsis(self): assert_series_equal(result, s) s[...] = 5 - self.assertTrue((result == 5).all()) + assert (result == 5).all() def test_getitem_negative_out_of_bounds(self): s = Series(tm.rands_array(5, 10), index=tm.rands_array(10, 10)) @@ -182,7 +182,7 @@ def test_iloc(self): # test slice is a view result[:] = 0 - self.assertTrue((s[1:3] == 0).all()) + assert (s[1:3] == 0).all() # list of integers result = s.iloc[[0, 2, 3, 4, 5]] @@ -211,10 +211,10 @@ def test_getitem_setitem_slice_bug(self): s = Series(lrange(10), lrange(10)) s[-12:] = 0 - self.assertTrue((s == 0).all()) + assert (s == 0).all() s[:-12] = 5 - self.assertTrue((s == 0).all()) + assert (s == 0).all() def test_getitem_int64(self): idx = np.int64(5) @@ -335,8 +335,8 @@ def test_getitem_setitem_slice_integers(self): assert_series_equal(result, expected) s[:4] = 0 - self.assertTrue((s[:4] == 0).all()) - self.assertTrue(not (s[4:] == 0).any()) + assert (s[:4] == 0).all() + assert not (s[4:] == 0).any() def test_getitem_setitem_datetime_tz_pytz(self): tm._skip_if_no_pytz() @@ -572,7 +572,7 @@ def test_getitem_ambiguous_keyerror(self): def test_getitem_unordered_dup(self): obj = Series(lrange(5), index=['c', 'a', 'a', 'b', 'b']) - self.assertTrue(is_scalar(obj['c'])) + assert is_scalar(obj['c']) self.assertEqual(obj['c'], 0) def test_getitem_dups_with_missing(self): @@ -725,8 +725,8 @@ def test_setitem(self): self.ts[self.ts.index[5]] = np.NaN self.ts[[1, 2, 17]] = np.NaN self.ts[6] = np.NaN - self.assertTrue(np.isnan(self.ts[6])) - self.assertTrue(np.isnan(self.ts[2])) + assert np.isnan(self.ts[6]) + assert np.isnan(self.ts[2]) self.ts[np.isnan(self.ts)] = 5 assert not np.isnan(self.ts[2]) @@ -735,7 +735,7 @@ def test_setitem(self): index=tm.makeIntIndex(20)) series[::2] = 0 - self.assertTrue((series[::2] == 0).all()) + assert (series[::2] == 0).all() # set item that's not contained s = self.series.copy() @@ -804,7 +804,7 @@ def test_set_value(self): def test_setslice(self): sl = self.ts[5:20] self.assertEqual(len(sl), len(sl.index)) - self.assertTrue(sl.index.is_unique) + assert sl.index.is_unique def test_basic_getitem_setitem_corner(self): # invalid tuples, e.g. self.ts[:, None] vs. self.ts[:, 2] @@ -949,12 +949,12 @@ def test_loc_getitem_setitem_integer_slice_keyerrors(self): # this is OK cp = s.copy() cp.iloc[4:10] = 0 - self.assertTrue((cp.iloc[4:10] == 0).all()) + assert (cp.iloc[4:10] == 0).all() # so is this cp = s.copy() cp.iloc[3:11] = 0 - self.assertTrue((cp.iloc[3:11] == 0).values.all()) + assert (cp.iloc[3:11] == 0).values.all() result = s.iloc[2:6] result2 = s.loc[3:11] @@ -1173,7 +1173,7 @@ def f(): s = Series(range(10)).astype(float) s[8] = None result = s[8] - self.assertTrue(isnull(result)) + assert isnull(result) s = Series(range(10)).astype(float) s[s > 8] = None @@ -1515,24 +1515,24 @@ def test_where_numeric_with_string(self): w = s.where(s > 1, 'X') assert not is_integer(w[0]) - self.assertTrue(is_integer(w[1])) - self.assertTrue(is_integer(w[2])) - self.assertTrue(isinstance(w[0], str)) - self.assertTrue(w.dtype == 'object') + assert is_integer(w[1]) + assert is_integer(w[2]) + assert isinstance(w[0], str) + assert w.dtype == 'object' w = s.where(s > 1, ['X', 'Y', 'Z']) assert not is_integer(w[0]) - self.assertTrue(is_integer(w[1])) - self.assertTrue(is_integer(w[2])) - self.assertTrue(isinstance(w[0], str)) - self.assertTrue(w.dtype == 'object') + assert is_integer(w[1]) + assert is_integer(w[2]) + assert isinstance(w[0], str) + assert w.dtype == 'object' w = s.where(s > 1, np.array(['X', 'Y', 'Z'])) assert not is_integer(w[0]) - self.assertTrue(is_integer(w[1])) - self.assertTrue(is_integer(w[2])) - self.assertTrue(isinstance(w[0], str)) - self.assertTrue(w.dtype == 'object') + assert is_integer(w[1]) + assert is_integer(w[2]) + assert isinstance(w[0], str) + assert w.dtype == 'object' def test_setitem_boolean(self): mask = self.series > self.series.median() @@ -1761,7 +1761,7 @@ def test_drop(self): # GH 8522 s = Series([2, 3], index=[True, False]) - self.assertTrue(s.index.is_object()) + assert s.index.is_object() result = s.drop(True) expected = Series([3], index=[False]) assert_series_equal(result, expected) @@ -1775,9 +1775,9 @@ def _check_align(a, b, how='left', fill=None): diff_a = aa.index.difference(join_index) diff_b = ab.index.difference(join_index) if len(diff_a) > 0: - self.assertTrue((aa.reindex(diff_a) == fill).all()) + assert (aa.reindex(diff_a) == fill).all() if len(diff_b) > 0: - self.assertTrue((ab.reindex(diff_b) == fill).all()) + assert (ab.reindex(diff_b) == fill).all() ea = a.reindex(join_index) eb = b.reindex(join_index) @@ -1857,7 +1857,7 @@ def test_align_nocopy(self): a = self.ts.copy() ra, _ = a.align(b, join='left', copy=False) ra[:5] = 5 - self.assertTrue((a[:5] == 5).all()) + assert (a[:5] == 5).all() # do copy a = self.ts.copy() @@ -1871,7 +1871,7 @@ def test_align_nocopy(self): b = self.ts[:5].copy() _, rb = a.align(b, join='right', copy=False) rb[:2] = 5 - self.assertTrue((b[:2] == 5).all()) + assert (b[:2] == 5).all() def test_align_same_index(self): a, b = self.ts.align(self.ts, copy=False) @@ -1921,13 +1921,12 @@ def test_reindex(self): # __array_interface__ is not defined for older numpies # and on some pythons try: - self.assertTrue(np.may_share_memory(self.series.index, - identity.index)) - except (AttributeError): + assert np.may_share_memory(self.series.index, identity.index) + except AttributeError: pass - self.assertTrue(identity.index.is_(self.series.index)) - self.assertTrue(identity.index.identical(self.series.index)) + assert identity.index.is_(self.series.index) + assert identity.index.identical(self.series.index) subIndex = self.series.index[10:20] subSeries = self.series.reindex(subIndex) @@ -1942,7 +1941,7 @@ def test_reindex(self): self.assertEqual(val, self.ts[idx]) stuffSeries = self.ts.reindex(subIndex) - self.assertTrue(np.isnan(stuffSeries).all()) + assert np.isnan(stuffSeries).all() # This is extremely important for the Cython code to not screw up nonContigIndex = self.ts.index[::2] @@ -1970,10 +1969,10 @@ def test_reindex_series_add_nat(self): series = Series(rng) result = series.reindex(lrange(15)) - self.assertTrue(np.issubdtype(result.dtype, np.dtype('M8[ns]'))) + assert np.issubdtype(result.dtype, np.dtype('M8[ns]')) mask = result.isnull() - self.assertTrue(mask[-5:].all()) + assert mask[-5:].all() assert not mask[:-5].any() def test_reindex_with_datetimes(self): @@ -2098,7 +2097,7 @@ def test_reindex_bool_pad(self): ts = self.ts[5:] bool_ts = Series(np.zeros(len(ts), dtype=bool), index=ts.index) filled_bool = bool_ts.reindex(self.ts.index, method='pad') - self.assertTrue(isnull(filled_bool[:5]).all()) + assert isnull(filled_bool[:5]).all() def test_reindex_like(self): other = self.ts[::2] @@ -2140,7 +2139,7 @@ def test_reindex_fill_value(self): # don't upcast result = ints.reindex([1, 2, 3], fill_value=0) expected = Series([2, 3, 0], index=[1, 2, 3]) - self.assertTrue(issubclass(result.dtype.type, np.integer)) + assert issubclass(result.dtype.type, np.integer) assert_series_equal(result, expected) # ----------------------------------------------------------- @@ -2256,11 +2255,7 @@ def test_setitem_slice_into_readonly_backing_data(self): with pytest.raises(ValueError): series[1:3] = 1 - self.assertTrue( - not array.any(), - msg='even though the ValueError was raised, the underlying' - ' array was still mutated!', - ) + assert not array.any() class TestTimeSeriesDuplicates(tm.TestCase): @@ -2290,14 +2285,14 @@ def test_index_unique(self): self.assertEqual(self.dups.index.nunique(), 4) # #2563 - self.assertTrue(isinstance(uniques, DatetimeIndex)) + assert isinstance(uniques, DatetimeIndex) dups_local = self.dups.index.tz_localize('US/Eastern') dups_local.name = 'foo' result = dups_local.unique() expected = DatetimeIndex(expected, name='foo') expected = expected.tz_localize('US/Eastern') - self.assertTrue(result.tz is not None) + assert result.tz is not None self.assertEqual(result.name, 'foo') tm.assert_index_equal(result, expected) @@ -2318,7 +2313,7 @@ def test_index_unique(self): def test_index_dupes_contains(self): d = datetime(2011, 12, 5, 20, 30) ix = DatetimeIndex([d, d]) - self.assertTrue(d in ix) + assert d in ix def test_duplicate_dates_indexing(self): ts = self.dups @@ -2401,7 +2396,7 @@ def test_indexing_over_size_cutoff(self): # it works! df.loc[timestamp] - self.assertTrue(len(df.loc[[timestamp]]) > 0) + assert len(df.loc[[timestamp]]) > 0 finally: _index._SIZE_CUTOFF = old_cutoff @@ -2417,7 +2412,7 @@ def test_indexing_unordered(self): expected = ts[t] result = ts2[t] - self.assertTrue(expected == result) + assert expected == result # GH 3448 (ranges) def compare(slobj): @@ -2447,7 +2442,7 @@ def compare(slobj): result = ts['2005'] for t in result.index: - self.assertTrue(t.year == 2005) + assert t.year == 2005 def test_indexing(self): @@ -2541,7 +2536,7 @@ def test_fancy_setitem(self): s['1/2/2009'] = -2 self.assertEqual(s[48], -2) s['1/2/2009':'2009-06-05'] = -3 - self.assertTrue((s[48:54] == -3).all()) + assert (s[48:54] == -3).all() def test_dti_snap(self): dti = DatetimeIndex(['1/1/2002', '1/2/2002', '1/3/2002', '1/4/2002', @@ -2550,13 +2545,13 @@ def test_dti_snap(self): res = dti.snap(freq='W-MON') exp = date_range('12/31/2001', '1/7/2002', freq='w-mon') exp = exp.repeat([3, 4]) - self.assertTrue((res == exp).all()) + assert (res == exp).all() res = dti.snap(freq='B') exp = date_range('1/1/2002', '1/7/2002', freq='b') exp = exp.repeat([1, 1, 1, 2, 2]) - self.assertTrue((res == exp).all()) + assert (res == exp).all() def test_dti_reset_index_round_trip(self): dti = DatetimeIndex(start='1/1/2001', end='6/1/2001', freq='D') @@ -2642,11 +2637,11 @@ def test_frame_datetime64_duplicated(self): tst = DataFrame({'symbol': 'AAA', 'date': dates}) result = tst.duplicated(['date', 'symbol']) - self.assertTrue((-result).all()) + assert (-result).all() tst = DataFrame({'date': dates}) result = tst.duplicated() - self.assertTrue((-result).all()) + assert (-result).all() class TestNatIndexing(tm.TestCase): diff --git a/pandas/tests/series/test_io.py b/pandas/tests/series/test_io.py index 3df32992a4d74..7a9d0390a2cfa 100644 --- a/pandas/tests/series/test_io.py +++ b/pandas/tests/series/test_io.py @@ -24,25 +24,25 @@ def test_from_csv(self): self.ts.to_csv(path) ts = Series.from_csv(path) assert_series_equal(self.ts, ts, check_names=False) - self.assertTrue(ts.name is None) - self.assertTrue(ts.index.name is None) + assert ts.name is None + assert ts.index.name is None # GH10483 self.ts.to_csv(path, header=True) ts_h = Series.from_csv(path, header=0) - self.assertTrue(ts_h.name == 'ts') + assert ts_h.name == 'ts' self.series.to_csv(path) series = Series.from_csv(path) assert series.name is None assert series.index.name is None assert_series_equal(self.series, series, check_names=False) - self.assertTrue(series.name is None) - self.assertTrue(series.index.name is None) + assert series.name is None + assert series.index.name is None self.series.to_csv(path, header=True) series_h = Series.from_csv(path, header=0) - self.assertTrue(series_h.name == 'series') + assert series_h.name == 'series' outfile = open(path, 'w') outfile.write('1998-01-01|1.0\n1999-01-01|2.0') @@ -163,7 +163,7 @@ class SubclassedFrame(DataFrame): s = SubclassedSeries([1, 2, 3], name='X') result = s.to_frame() - self.assertTrue(isinstance(result, SubclassedFrame)) + assert isinstance(result, SubclassedFrame) expected = SubclassedFrame({'X': [1, 2, 3]}) assert_frame_equal(result, expected) diff --git a/pandas/tests/series/test_missing.py b/pandas/tests/series/test_missing.py index 53c8c518eb3eb..251954b5da05e 100644 --- a/pandas/tests/series/test_missing.py +++ b/pandas/tests/series/test_missing.py @@ -484,19 +484,19 @@ def test_timedelta64_nan(self): # nan ops on timedeltas td1 = td.copy() td1[0] = np.nan - self.assertTrue(isnull(td1[0])) + assert isnull(td1[0]) self.assertEqual(td1[0].value, iNaT) td1[0] = td[0] assert not isnull(td1[0]) td1[1] = iNaT - self.assertTrue(isnull(td1[1])) + assert isnull(td1[1]) self.assertEqual(td1[1].value, iNaT) td1[1] = td[1] assert not isnull(td1[1]) td1[2] = NaT - self.assertTrue(isnull(td1[2])) + assert isnull(td1[2]) self.assertEqual(td1[2].value, iNaT) td1[2] = td[2] assert not isnull(td1[2]) @@ -599,7 +599,7 @@ def test_pad_nan(self): expected = Series([np.nan, 1.0, 1.0, 3.0, 3.0], ['z', 'a', 'b', 'c', 'd'], dtype=float) assert_series_equal(x[1:], expected[1:]) - self.assertTrue(np.isnan(x[0]), np.isnan(expected[0])) + assert np.isnan(x[0]), np.isnan(expected[0]) def test_pad_require_monotonicity(self): rng = date_range('1/1/2000', '3/1/2000', freq='B') diff --git a/pandas/tests/series/test_operators.py b/pandas/tests/series/test_operators.py index eb840faac05e0..f48a3474494a4 100644 --- a/pandas/tests/series/test_operators.py +++ b/pandas/tests/series/test_operators.py @@ -121,7 +121,7 @@ def test_div(self): result = p['first'] / p['second'] assert_series_equal(result, p['first'].astype('float64'), check_names=False) - self.assertTrue(result.name is None) + assert result.name is None assert not np.array_equal(result, p['second'] / p['first']) # inf signing @@ -565,11 +565,11 @@ def test_timedelta64_conversions(self): s = Series(date_range('20130101', periods=3)) result = s.astype(object) assert isinstance(result.iloc[0], datetime) - self.assertTrue(result.dtype == np.object_) + assert result.dtype == np.object_ result = s1.astype(object) assert isinstance(result.iloc[0], timedelta) - self.assertTrue(result.dtype == np.object_) + assert result.dtype == np.object_ def test_timedelta64_equal_timedelta_supported_ops(self): ser = Series([Timestamp('20130301'), Timestamp('20130228 23:00:00'), @@ -1466,7 +1466,7 @@ def test_operators_corner(self): empty = Series([], index=Index([])) result = series + empty - self.assertTrue(np.isnan(result).all()) + assert np.isnan(result).all() result = empty + Series([], index=Index([])) self.assertEqual(len(result), 0) @@ -1777,8 +1777,8 @@ def _check_fill(meth, op, a, b, fill_value=0): def test_ne(self): ts = Series([3, 4, 5, 6, 7], [3, 4, 5, 6, 7], dtype=float) expected = [True, True, False, True, True] - self.assertTrue(tm.equalContents(ts.index != 5, expected)) - self.assertTrue(tm.equalContents(~(ts.index == 5), expected)) + assert tm.equalContents(ts.index != 5, expected) + assert tm.equalContents(~(ts.index == 5), expected) def test_operators_na_handling(self): from decimal import Decimal @@ -1788,8 +1788,8 @@ def test_operators_na_handling(self): result = s + s.shift(1) result2 = s.shift(1) + s - self.assertTrue(isnull(result[0])) - self.assertTrue(isnull(result2[0])) + assert isnull(result[0]) + assert isnull(result2[0]) s = Series(['foo', 'bar', 'baz', np.nan]) result = 'prefix_' + s diff --git a/pandas/tests/series/test_period.py b/pandas/tests/series/test_period.py index fdc12459f8c59..72a85086d4e24 100644 --- a/pandas/tests/series/test_period.py +++ b/pandas/tests/series/test_period.py @@ -89,10 +89,10 @@ def test_NaT_scalar(self): series = Series([0, 1000, 2000, iNaT], dtype='period[D]') val = series[3] - self.assertTrue(isnull(val)) + assert isnull(val) series[2] = val - self.assertTrue(isnull(series[2])) + assert isnull(series[2]) def test_NaT_cast(self): result = Series([np.nan]).astype('period[D]') @@ -109,10 +109,10 @@ def test_set_none_nan(self): assert self.series[4] is None self.series[5] = np.nan - self.assertTrue(np.isnan(self.series[5])) + assert np.isnan(self.series[5]) self.series[5:7] = np.nan - self.assertTrue(np.isnan(self.series[6])) + assert np.isnan(self.series[6]) def test_intercept_astype_object(self): expected = self.series.astype('object') @@ -121,12 +121,12 @@ def test_intercept_astype_object(self): 'b': np.random.randn(len(self.series))}) result = df.values.squeeze() - self.assertTrue((result[:, 0] == expected.values).all()) + assert (result[:, 0] == expected.values).all() df = DataFrame({'a': self.series, 'b': ['foo'] * len(self.series)}) result = df.values.squeeze() - self.assertTrue((result[:, 0] == expected.values).all()) + assert (result[:, 0] == expected.values).all() def test_comp_series_period_scalar(self): # GH 13200 diff --git a/pandas/tests/series/test_quantile.py b/pandas/tests/series/test_quantile.py index 6f9c65e37533d..9fb87a914a0ac 100644 --- a/pandas/tests/series/test_quantile.py +++ b/pandas/tests/series/test_quantile.py @@ -39,7 +39,7 @@ def test_quantile(self): # GH7661 result = Series([np.timedelta64('NaT')]).sum() - self.assertTrue(result is pd.NaT) + assert result is pd.NaT msg = 'percentiles should all be in the interval \\[0, 1\\]' for invalid in [-1, 2, [0.5, -1], [0.5, 2]]: @@ -90,11 +90,11 @@ def test_quantile_interpolation_dtype(self): # interpolation = linear (default case) q = pd.Series([1, 3, 4]).quantile(0.5, interpolation='lower') self.assertEqual(q, np.percentile(np.array([1, 3, 4]), 50)) - self.assertTrue(is_integer(q)) + assert is_integer(q) q = pd.Series([1, 3, 4]).quantile(0.5, interpolation='higher') self.assertEqual(q, np.percentile(np.array([1, 3, 4]), 50)) - self.assertTrue(is_integer(q)) + assert is_integer(q) @pytest.mark.skipif(not _np_version_under1p9, reason="Numpy version is greater 1.9") @@ -130,7 +130,7 @@ def test_quantile_nan(self): for s in cases: res = s.quantile(0.5) - self.assertTrue(np.isnan(res)) + assert np.isnan(res) res = s.quantile([0.5]) tm.assert_series_equal(res, pd.Series([np.nan], index=[0.5])) @@ -167,12 +167,12 @@ def test_quantile_box(self): def test_datetime_timedelta_quantiles(self): # covers #9694 - self.assertTrue(pd.isnull(Series([], dtype='M8[ns]').quantile(.5))) - self.assertTrue(pd.isnull(Series([], dtype='m8[ns]').quantile(.5))) + assert pd.isnull(Series([], dtype='M8[ns]').quantile(.5)) + assert pd.isnull(Series([], dtype='m8[ns]').quantile(.5)) def test_quantile_nat(self): res = Series([pd.NaT, pd.NaT]).quantile(0.5) - self.assertTrue(res is pd.NaT) + assert res is pd.NaT res = Series([pd.NaT, pd.NaT]).quantile([0.5]) tm.assert_series_equal(res, pd.Series([pd.NaT], index=[0.5])) @@ -183,7 +183,7 @@ def test_quantile_empty(self): s = Series([], dtype='float64') res = s.quantile(0.5) - self.assertTrue(np.isnan(res)) + assert np.isnan(res) res = s.quantile([0.5]) exp = Series([np.nan], index=[0.5]) @@ -193,7 +193,7 @@ def test_quantile_empty(self): s = Series([], dtype='int64') res = s.quantile(0.5) - self.assertTrue(np.isnan(res)) + assert np.isnan(res) res = s.quantile([0.5]) exp = Series([np.nan], index=[0.5]) @@ -203,7 +203,7 @@ def test_quantile_empty(self): s = Series([], dtype='datetime64[ns]') res = s.quantile(0.5) - self.assertTrue(res is pd.NaT) + assert res is pd.NaT res = s.quantile([0.5]) exp = Series([pd.NaT], index=[0.5]) diff --git a/pandas/tests/series/test_replace.py b/pandas/tests/series/test_replace.py index ee7b264bde8bc..19a99c8351db8 100644 --- a/pandas/tests/series/test_replace.py +++ b/pandas/tests/series/test_replace.py @@ -37,18 +37,18 @@ def test_replace(self): # replace list with a single value rs = ser.replace([np.nan, 'foo', 'bar'], -1) - self.assertTrue((rs[:5] == -1).all()) - self.assertTrue((rs[6:10] == -1).all()) - self.assertTrue((rs[20:30] == -1).all()) - self.assertTrue((pd.isnull(ser[:5])).all()) + assert (rs[:5] == -1).all() + assert (rs[6:10] == -1).all() + assert (rs[20:30] == -1).all() + assert (pd.isnull(ser[:5])).all() # replace with different values rs = ser.replace({np.nan: -1, 'foo': -2, 'bar': -3}) - self.assertTrue((rs[:5] == -1).all()) - self.assertTrue((rs[6:10] == -2).all()) - self.assertTrue((rs[20:30] == -3).all()) - self.assertTrue((pd.isnull(ser[:5])).all()) + assert (rs[:5] == -1).all() + assert (rs[6:10] == -2).all() + assert (rs[20:30] == -3).all() + assert (pd.isnull(ser[:5])).all() # replace with different values with 2 lists rs2 = ser.replace([np.nan, 'foo', 'bar'], [-1, -2, -3]) @@ -57,9 +57,9 @@ def test_replace(self): # replace inplace ser.replace([np.nan, 'foo', 'bar'], -1, inplace=True) - self.assertTrue((ser[:5] == -1).all()) - self.assertTrue((ser[6:10] == -1).all()) - self.assertTrue((ser[20:30] == -1).all()) + assert (ser[:5] == -1).all() + assert (ser[6:10] == -1).all() + assert (ser[20:30] == -1).all() ser = pd.Series([np.nan, 0, np.inf]) tm.assert_series_equal(ser.replace(np.nan, 0), ser.fillna(0)) @@ -200,18 +200,18 @@ def test_replace2(self): # replace list with a single value rs = ser.replace([np.nan, 'foo', 'bar'], -1) - self.assertTrue((rs[:5] == -1).all()) - self.assertTrue((rs[6:10] == -1).all()) - self.assertTrue((rs[20:30] == -1).all()) - self.assertTrue((pd.isnull(ser[:5])).all()) + assert (rs[:5] == -1).all() + assert (rs[6:10] == -1).all() + assert (rs[20:30] == -1).all() + assert (pd.isnull(ser[:5])).all() # replace with different values rs = ser.replace({np.nan: -1, 'foo': -2, 'bar': -3}) - self.assertTrue((rs[:5] == -1).all()) - self.assertTrue((rs[6:10] == -2).all()) - self.assertTrue((rs[20:30] == -3).all()) - self.assertTrue((pd.isnull(ser[:5])).all()) + assert (rs[:5] == -1).all() + assert (rs[6:10] == -2).all() + assert (rs[20:30] == -3).all() + assert (pd.isnull(ser[:5])).all() # replace with different values with 2 lists rs2 = ser.replace([np.nan, 'foo', 'bar'], [-1, -2, -3]) @@ -219,9 +219,9 @@ def test_replace2(self): # replace inplace ser.replace([np.nan, 'foo', 'bar'], -1, inplace=True) - self.assertTrue((ser[:5] == -1).all()) - self.assertTrue((ser[6:10] == -1).all()) - self.assertTrue((ser[20:30] == -1).all()) + assert (ser[:5] == -1).all() + assert (ser[6:10] == -1).all() + assert (ser[20:30] == -1).all() def test_replace_with_empty_dictlike(self): # GH 15289 diff --git a/pandas/tests/series/test_repr.py b/pandas/tests/series/test_repr.py index c92a82e287120..2decffce0f2fe 100644 --- a/pandas/tests/series/test_repr.py +++ b/pandas/tests/series/test_repr.py @@ -148,7 +148,7 @@ def test_repr_should_return_str(self): data = [8, 5, 3, 5] index1 = [u("\u03c3"), u("\u03c4"), u("\u03c5"), u("\u03c6")] df = Series(data, index=index1) - self.assertTrue(type(df.__repr__() == str)) # both py2 / 3 + assert type(df.__repr__() == str) # both py2 / 3 def test_repr_max_rows(self): # GH 6863 @@ -176,7 +176,7 @@ def test_timeseries_repr_object_dtype(self): repr(ts) ts = tm.makeTimeSeries(1000) - self.assertTrue(repr(ts).splitlines()[-1].startswith('Freq:')) + assert repr(ts).splitlines()[-1].startswith('Freq:') ts2 = ts.iloc[np.random.randint(0, len(ts) - 1, 400)] repr(ts2).splitlines()[-1] diff --git a/pandas/tests/series/test_sorting.py b/pandas/tests/series/test_sorting.py index 6fe18e712a29d..791a7d5db9a26 100644 --- a/pandas/tests/series/test_sorting.py +++ b/pandas/tests/series/test_sorting.py @@ -35,12 +35,12 @@ def test_sort_values(self): vals = ts.values result = ts.sort_values() - self.assertTrue(np.isnan(result[-5:]).all()) + assert np.isnan(result[-5:]).all() tm.assert_numpy_array_equal(result[:-5].values, np.sort(vals[5:])) # na_position result = ts.sort_values(na_position='first') - self.assertTrue(np.isnan(result[:5]).all()) + assert np.isnan(result[:5]).all() tm.assert_numpy_array_equal(result[5:].values, np.sort(vals[5:])) # something object-type diff --git a/pandas/tests/series/test_timeseries.py b/pandas/tests/series/test_timeseries.py index 430be97845fcb..1c94bc3db9990 100644 --- a/pandas/tests/series/test_timeseries.py +++ b/pandas/tests/series/test_timeseries.py @@ -343,8 +343,8 @@ def test_autocorr(self): # corr() with lag needs Series of at least length 2 if len(self.ts) <= 2: - self.assertTrue(np.isnan(corr1)) - self.assertTrue(np.isnan(corr2)) + assert np.isnan(corr1) + assert np.isnan(corr2) else: self.assertEqual(corr1, corr2) @@ -356,8 +356,8 @@ def test_autocorr(self): # corr() with lag needs Series of at least length 2 if len(self.ts) <= 2: - self.assertTrue(np.isnan(corr1)) - self.assertTrue(np.isnan(corr2)) + assert np.isnan(corr1) + assert np.isnan(corr2) else: self.assertEqual(corr1, corr2) @@ -393,7 +393,7 @@ def test_mpl_compat_hack(self): def test_timeseries_coercion(self): idx = tm.makeDateIndex(10000) ser = Series(np.random.randn(len(idx)), idx.astype(object)) - self.assertTrue(ser.index.is_all_dates) + assert ser.index.is_all_dates assert isinstance(ser.index, DatetimeIndex) def test_empty_series_ops(self): @@ -487,7 +487,7 @@ def test_series_ctor_datetime64(self): dates = np.asarray(rng) series = Series(dates) - self.assertTrue(np.issubdtype(series.dtype, np.dtype('M8[ns]'))) + assert np.issubdtype(series.dtype, np.dtype('M8[ns]')) def test_series_repr_nat(self): series = Series([0, 1000, 2000, iNaT], dtype='M8[ns]') @@ -602,9 +602,9 @@ def test_at_time(self): rng = date_range('1/1/2000', '1/5/2000', freq='5min') ts = Series(np.random.randn(len(rng)), index=rng) rs = ts.at_time(rng[1]) - self.assertTrue((rs.index.hour == rng[1].hour).all()) - self.assertTrue((rs.index.minute == rng[1].minute).all()) - self.assertTrue((rs.index.second == rng[1].second).all()) + assert (rs.index.hour == rng[1].hour).all() + assert (rs.index.minute == rng[1].minute).all() + assert (rs.index.second == rng[1].second).all() result = ts.at_time('9:30') expected = ts.at_time(time(9, 30)) @@ -667,14 +667,14 @@ def test_between_time(self): for rs in filtered.index: t = rs.time() if inc_start: - self.assertTrue(t >= stime) + assert t >= stime else: - self.assertTrue(t > stime) + assert t > stime if inc_end: - self.assertTrue(t <= etime) + assert t <= etime else: - self.assertTrue(t < etime) + assert t < etime result = ts.between_time('00:00', '01:00') expected = ts.between_time(stime, etime) @@ -699,14 +699,14 @@ def test_between_time(self): for rs in filtered.index: t = rs.time() if inc_start: - self.assertTrue((t >= stime) or (t <= etime)) + assert (t >= stime) or (t <= etime) else: - self.assertTrue((t > stime) or (t <= etime)) + assert (t > stime) or (t <= etime) if inc_end: - self.assertTrue((t <= etime) or (t >= stime)) + assert (t <= etime) or (t >= stime) else: - self.assertTrue((t < etime) or (t >= stime)) + assert (t < etime) or (t >= stime) def test_between_time_types(self): # GH11818 @@ -830,13 +830,13 @@ def test_pickle(self): # GH4606 p = tm.round_trip_pickle(NaT) - self.assertTrue(p is NaT) + assert p is NaT idx = pd.to_datetime(['2013-01-01', NaT, '2014-01-06']) idx_p = tm.round_trip_pickle(idx) - self.assertTrue(idx_p[0] == idx[0]) - self.assertTrue(idx_p[1] is NaT) - self.assertTrue(idx_p[2] == idx[2]) + assert idx_p[0] == idx[0] + assert idx_p[1] is NaT + assert idx_p[2] == idx[2] # GH11002 # don't infer freq @@ -900,12 +900,12 @@ def test_min_max_series(self): result = df.TS.max() exp = Timestamp(df.TS.iat[-1]) - self.assertTrue(isinstance(result, Timestamp)) + assert isinstance(result, Timestamp) self.assertEqual(result, exp) result = df.TS.min() exp = Timestamp(df.TS.iat[0]) - self.assertTrue(isinstance(result, Timestamp)) + assert isinstance(result, Timestamp) self.assertEqual(result, exp) def test_from_M8_structured(self): @@ -918,7 +918,7 @@ def test_from_M8_structured(self): self.assertEqual(df['Forecasting'][0], dates[0][1]) s = Series(arr['Date']) - self.assertTrue(s[0], Timestamp) + assert s[0], Timestamp self.assertEqual(s[0], dates[0][0]) s = Series.from_array(arr['Date'], Index([0])) @@ -933,4 +933,4 @@ def test_get_level_values_box(self): index = MultiIndex(levels=levels, labels=labels) - self.assertTrue(isinstance(index.get_level_values(0)[0], Timestamp)) + assert isinstance(index.get_level_values(0)[0], Timestamp) diff --git a/pandas/tests/sparse/test_array.py b/pandas/tests/sparse/test_array.py index 33df4b5e59bc9..b8dff5606f979 100644 --- a/pandas/tests/sparse/test_array.py +++ b/pandas/tests/sparse/test_array.py @@ -25,7 +25,7 @@ def setUp(self): def test_constructor_dtype(self): arr = SparseArray([np.nan, 1, 2, np.nan]) self.assertEqual(arr.dtype, np.float64) - self.assertTrue(np.isnan(arr.fill_value)) + assert np.isnan(arr.fill_value) arr = SparseArray([np.nan, 1, 2, np.nan], fill_value=0) self.assertEqual(arr.dtype, np.float64) @@ -33,7 +33,7 @@ def test_constructor_dtype(self): arr = SparseArray([0, 1, 2, 4], dtype=np.float64) self.assertEqual(arr.dtype, np.float64) - self.assertTrue(np.isnan(arr.fill_value)) + assert np.isnan(arr.fill_value) arr = SparseArray([0, 1, 2, 4], dtype=np.int64) self.assertEqual(arr.dtype, np.int64) @@ -55,7 +55,7 @@ def test_constructor_object_dtype(self): # GH 11856 arr = SparseArray(['A', 'A', np.nan, 'B'], dtype=np.object) self.assertEqual(arr.dtype, np.object) - self.assertTrue(np.isnan(arr.fill_value)) + assert np.isnan(arr.fill_value) arr = SparseArray(['A', 'A', np.nan, 'B'], dtype=np.object, fill_value='A') @@ -66,7 +66,7 @@ def test_constructor_spindex_dtype(self): arr = SparseArray(data=[1, 2], sparse_index=IntIndex(4, [1, 2])) tm.assert_sp_array_equal(arr, SparseArray([np.nan, 1, 2, np.nan])) self.assertEqual(arr.dtype, np.float64) - self.assertTrue(np.isnan(arr.fill_value)) + assert np.isnan(arr.fill_value) arr = SparseArray(data=[1, 2, 3], sparse_index=IntIndex(4, [1, 2, 3]), @@ -133,7 +133,7 @@ def test_sparseseries_roundtrip(self): def test_get_item(self): - self.assertTrue(np.isnan(self.arr[1])) + assert np.isnan(self.arr[1]) self.assertEqual(self.arr[2], 1) self.assertEqual(self.arr[7], 5) @@ -147,8 +147,8 @@ def test_get_item(self): self.assertEqual(self.arr[-1], self.arr[len(self.arr) - 1]) def test_take(self): - self.assertTrue(np.isnan(self.arr.take(0))) - self.assertTrue(np.isscalar(self.arr.take(2))) + assert np.isnan(self.arr.take(0)) + assert np.isscalar(self.arr.take(2)) # np.take in < 1.8 doesn't support scalar indexing if not _np_version_under1p8: @@ -303,7 +303,7 @@ def test_constructor_copy(self): not_copy = SparseArray(self.arr) not_copy.sp_values[:3] = 0 - self.assertTrue((self.arr.sp_values[:3] == 0).all()) + assert (self.arr.sp_values[:3] == 0).all() def test_constructor_bool(self): # GH 10648 @@ -331,7 +331,7 @@ def test_constructor_bool_fill_value(self): arr = SparseArray([True, False, True], dtype=np.bool, fill_value=True) self.assertEqual(arr.dtype, np.bool) - self.assertTrue(arr.fill_value) + assert arr.fill_value def test_constructor_float32(self): # GH 10648 @@ -400,7 +400,7 @@ def test_set_fill_value(self): arr = SparseArray([True, False, True], fill_value=False, dtype=np.bool) arr.fill_value = True - self.assertTrue(arr.fill_value) + assert arr.fill_value # coerces to bool msg = "unable to set fill_value 0 to bool dtype" @@ -637,7 +637,7 @@ def test_fillna(self): # only fill_value will be changed s = SparseArray([0, 0, 0, 0], fill_value=np.nan) self.assertEqual(s.dtype, np.int64) - self.assertTrue(np.isnan(s.fill_value)) + assert np.isnan(s.fill_value) res = s.fillna(-1) exp = SparseArray([0, 0, 0, 0], fill_value=-1) tm.assert_sp_array_equal(res, exp) diff --git a/pandas/tests/sparse/test_frame.py b/pandas/tests/sparse/test_frame.py index a5080bbd81005..6b54dca8e93d5 100644 --- a/pandas/tests/sparse/test_frame.py +++ b/pandas/tests/sparse/test_frame.py @@ -91,7 +91,7 @@ def test_copy(self): # as of v0.15.0 # this is now identical (but not is_a ) - self.assertTrue(cp.index.identical(self.frame.index)) + assert cp.index.identical(self.frame.index) def test_constructor(self): for col, series in compat.iteritems(self.frame): @@ -171,7 +171,7 @@ def test_constructor_dataframe(self): def test_constructor_convert_index_once(self): arr = np.array([1.5, 2.5, 3.5]) sdf = SparseDataFrame(columns=lrange(4), index=arr) - self.assertTrue(sdf[0].index is sdf[1].index) + assert sdf[0].index is sdf[1].index def test_constructor_from_series(self): @@ -290,7 +290,7 @@ def test_dense_to_sparse(self): 'B': [1, 2, nan, nan, nan]}) sdf = df.to_sparse() assert isinstance(sdf, SparseDataFrame) - self.assertTrue(np.isnan(sdf.default_fill_value)) + assert np.isnan(sdf.default_fill_value) assert isinstance(sdf['A'].sp_index, BlockIndex) tm.assert_frame_equal(sdf.to_dense(), df) @@ -385,7 +385,7 @@ def _compare_to_dense(a, b, da, db, op): def test_op_corners(self): empty = self.empty + self.empty - self.assertTrue(empty.empty) + assert empty.empty foo = self.frame + self.empty assert isinstance(foo.index, DatetimeIndex) @@ -411,7 +411,7 @@ def test_iloc(self): # 2227 result = self.frame.iloc[:, 0] - self.assertTrue(isinstance(result, SparseSeries)) + assert isinstance(result, SparseSeries) tm.assert_sp_series_equal(result, self.frame['A']) # preserve sparse index type. #2251 @@ -515,7 +515,7 @@ def _check_frame(frame, orig): # scalar value frame['J'] = 5 self.assertEqual(len(frame['J'].sp_values), N) - self.assertTrue((frame['J'].sp_values == 5).all()) + assert (frame['J'].sp_values == 5).all() frame['K'] = frame.default_fill_value self.assertEqual(len(frame['K'].sp_values), 0) @@ -1099,7 +1099,7 @@ def test_nan_columnname(self): # GH 8822 nan_colname = DataFrame(Series(1.0, index=[0]), columns=[nan]) nan_colname_sparse = nan_colname.to_sparse() - self.assertTrue(np.isnan(nan_colname_sparse.columns[0])) + assert np.isnan(nan_colname_sparse.columns[0]) def test_isnull(self): # GH 8276 diff --git a/pandas/tests/sparse/test_indexing.py b/pandas/tests/sparse/test_indexing.py index bfa0a0440761f..6dd012ad46db9 100644 --- a/pandas/tests/sparse/test_indexing.py +++ b/pandas/tests/sparse/test_indexing.py @@ -17,7 +17,7 @@ def test_getitem(self): sparse = self.sparse self.assertEqual(sparse[0], 1) - self.assertTrue(np.isnan(sparse[1])) + assert np.isnan(sparse[1]) self.assertEqual(sparse[3], 3) result = sparse[[1, 3, 4]] @@ -67,7 +67,7 @@ def test_getitem_fill_value(self): sparse = orig.to_sparse(fill_value=0) self.assertEqual(sparse[0], 1) - self.assertTrue(np.isnan(sparse[1])) + assert np.isnan(sparse[1]) self.assertEqual(sparse[2], 0) self.assertEqual(sparse[3], 3) @@ -114,7 +114,7 @@ def test_loc(self): sparse = self.sparse self.assertEqual(sparse.loc[0], 1) - self.assertTrue(np.isnan(sparse.loc[1])) + assert np.isnan(sparse.loc[1]) result = sparse.loc[[1, 3, 4]] exp = orig.loc[[1, 3, 4]].to_sparse() @@ -125,7 +125,7 @@ def test_loc(self): exp = orig.loc[[1, 3, 4, 5]].to_sparse() tm.assert_sp_series_equal(result, exp) # padded with NaN - self.assertTrue(np.isnan(result[-1])) + assert np.isnan(result[-1]) # dense array result = sparse.loc[orig % 2 == 1] @@ -146,7 +146,7 @@ def test_loc_index(self): sparse = orig.to_sparse() self.assertEqual(sparse.loc['A'], 1) - self.assertTrue(np.isnan(sparse.loc['B'])) + assert np.isnan(sparse.loc['B']) result = sparse.loc[['A', 'C', 'D']] exp = orig.loc[['A', 'C', 'D']].to_sparse() @@ -171,7 +171,7 @@ def test_loc_index_fill_value(self): sparse = orig.to_sparse(fill_value=0) self.assertEqual(sparse.loc['A'], 1) - self.assertTrue(np.isnan(sparse.loc['B'])) + assert np.isnan(sparse.loc['B']) result = sparse.loc[['A', 'C', 'D']] exp = orig.loc[['A', 'C', 'D']].to_sparse(fill_value=0) @@ -210,7 +210,7 @@ def test_iloc(self): sparse = self.sparse self.assertEqual(sparse.iloc[3], 3) - self.assertTrue(np.isnan(sparse.iloc[2])) + assert np.isnan(sparse.iloc[2]) result = sparse.iloc[[1, 3, 4]] exp = orig.iloc[[1, 3, 4]].to_sparse() @@ -228,7 +228,7 @@ def test_iloc_fill_value(self): sparse = orig.to_sparse(fill_value=0) self.assertEqual(sparse.iloc[3], 3) - self.assertTrue(np.isnan(sparse.iloc[1])) + assert np.isnan(sparse.iloc[1]) self.assertEqual(sparse.iloc[4], 0) result = sparse.iloc[[1, 3, 4]] @@ -250,26 +250,26 @@ def test_at(self): orig = pd.Series([1, np.nan, np.nan, 3, np.nan]) sparse = orig.to_sparse() self.assertEqual(sparse.at[0], orig.at[0]) - self.assertTrue(np.isnan(sparse.at[1])) - self.assertTrue(np.isnan(sparse.at[2])) + assert np.isnan(sparse.at[1]) + assert np.isnan(sparse.at[2]) self.assertEqual(sparse.at[3], orig.at[3]) - self.assertTrue(np.isnan(sparse.at[4])) + assert np.isnan(sparse.at[4]) orig = pd.Series([1, np.nan, np.nan, 3, np.nan], index=list('abcde')) sparse = orig.to_sparse() self.assertEqual(sparse.at['a'], orig.at['a']) - self.assertTrue(np.isnan(sparse.at['b'])) - self.assertTrue(np.isnan(sparse.at['c'])) + assert np.isnan(sparse.at['b']) + assert np.isnan(sparse.at['c']) self.assertEqual(sparse.at['d'], orig.at['d']) - self.assertTrue(np.isnan(sparse.at['e'])) + assert np.isnan(sparse.at['e']) def test_at_fill_value(self): orig = pd.Series([1, np.nan, 0, 3, 0], index=list('abcde')) sparse = orig.to_sparse(fill_value=0) self.assertEqual(sparse.at['a'], orig.at['a']) - self.assertTrue(np.isnan(sparse.at['b'])) + assert np.isnan(sparse.at['b']) self.assertEqual(sparse.at['c'], orig.at['c']) self.assertEqual(sparse.at['d'], orig.at['d']) self.assertEqual(sparse.at['e'], orig.at['e']) @@ -279,19 +279,19 @@ def test_iat(self): sparse = self.sparse self.assertEqual(sparse.iat[0], orig.iat[0]) - self.assertTrue(np.isnan(sparse.iat[1])) - self.assertTrue(np.isnan(sparse.iat[2])) + assert np.isnan(sparse.iat[1]) + assert np.isnan(sparse.iat[2]) self.assertEqual(sparse.iat[3], orig.iat[3]) - self.assertTrue(np.isnan(sparse.iat[4])) + assert np.isnan(sparse.iat[4]) - self.assertTrue(np.isnan(sparse.iat[-1])) + assert np.isnan(sparse.iat[-1]) self.assertEqual(sparse.iat[-5], orig.iat[-5]) def test_iat_fill_value(self): orig = pd.Series([1, np.nan, 0, 3, 0]) sparse = orig.to_sparse() self.assertEqual(sparse.iat[0], orig.iat[0]) - self.assertTrue(np.isnan(sparse.iat[1])) + assert np.isnan(sparse.iat[1]) self.assertEqual(sparse.iat[2], orig.iat[2]) self.assertEqual(sparse.iat[3], orig.iat[3]) self.assertEqual(sparse.iat[4], orig.iat[4]) @@ -302,19 +302,19 @@ def test_iat_fill_value(self): def test_get(self): s = pd.SparseSeries([1, np.nan, np.nan, 3, np.nan]) self.assertEqual(s.get(0), 1) - self.assertTrue(np.isnan(s.get(1))) + assert np.isnan(s.get(1)) assert s.get(5) is None s = pd.SparseSeries([1, np.nan, 0, 3, 0], index=list('ABCDE')) self.assertEqual(s.get('A'), 1) - self.assertTrue(np.isnan(s.get('B'))) + assert np.isnan(s.get('B')) self.assertEqual(s.get('C'), 0) assert s.get('XX') is None s = pd.SparseSeries([1, np.nan, 0, 3, 0], index=list('ABCDE'), fill_value=0) self.assertEqual(s.get('A'), 1) - self.assertTrue(np.isnan(s.get('B'))) + assert np.isnan(s.get('B')) self.assertEqual(s.get('C'), 0) assert s.get('XX') is None @@ -458,7 +458,7 @@ def test_getitem_multi(self): sparse = self.sparse self.assertEqual(sparse[0], orig[0]) - self.assertTrue(np.isnan(sparse[1])) + assert np.isnan(sparse[1]) self.assertEqual(sparse[3], orig[3]) tm.assert_sp_series_equal(sparse['A'], orig['A'].to_sparse()) @@ -487,8 +487,8 @@ def test_getitem_multi_tuple(self): sparse = self.sparse self.assertEqual(sparse['C', 0], orig['C', 0]) - self.assertTrue(np.isnan(sparse['A', 1])) - self.assertTrue(np.isnan(sparse['B', 0])) + assert np.isnan(sparse['A', 1]) + assert np.isnan(sparse['B', 0]) def test_getitems_slice_multi(self): orig = self.orig @@ -545,8 +545,8 @@ def test_loc_multi_tuple(self): sparse = self.sparse self.assertEqual(sparse.loc['C', 0], orig.loc['C', 0]) - self.assertTrue(np.isnan(sparse.loc['A', 1])) - self.assertTrue(np.isnan(sparse.loc['B', 0])) + assert np.isnan(sparse.loc['A', 1]) + assert np.isnan(sparse.loc['B', 0]) def test_loc_slice(self): orig = self.orig @@ -646,7 +646,7 @@ def test_loc(self): sparse = orig.to_sparse() self.assertEqual(sparse.loc[0, 'x'], 1) - self.assertTrue(np.isnan(sparse.loc[1, 'z'])) + assert np.isnan(sparse.loc[1, 'z']) self.assertEqual(sparse.loc[2, 'z'], 4) tm.assert_sp_series_equal(sparse.loc[0], orig.loc[0].to_sparse()) @@ -703,7 +703,7 @@ def test_loc_index(self): sparse = orig.to_sparse() self.assertEqual(sparse.loc['a', 'x'], 1) - self.assertTrue(np.isnan(sparse.loc['b', 'z'])) + assert np.isnan(sparse.loc['b', 'z']) self.assertEqual(sparse.loc['c', 'z'], 4) tm.assert_sp_series_equal(sparse.loc['a'], orig.loc['a'].to_sparse()) @@ -763,7 +763,7 @@ def test_iloc(self): sparse = orig.to_sparse() self.assertEqual(sparse.iloc[1, 1], 3) - self.assertTrue(np.isnan(sparse.iloc[2, 0])) + assert np.isnan(sparse.iloc[2, 0]) tm.assert_sp_series_equal(sparse.iloc[0], orig.loc[0].to_sparse()) tm.assert_sp_series_equal(sparse.iloc[1], orig.loc[1].to_sparse()) @@ -811,8 +811,8 @@ def test_at(self): index=list('ABCD'), columns=list('xyz')) sparse = orig.to_sparse() self.assertEqual(sparse.at['A', 'x'], orig.at['A', 'x']) - self.assertTrue(np.isnan(sparse.at['B', 'z'])) - self.assertTrue(np.isnan(sparse.at['C', 'y'])) + assert np.isnan(sparse.at['B', 'z']) + assert np.isnan(sparse.at['C', 'y']) self.assertEqual(sparse.at['D', 'x'], orig.at['D', 'x']) def test_at_fill_value(self): @@ -823,8 +823,8 @@ def test_at_fill_value(self): index=list('ABCD'), columns=list('xyz')) sparse = orig.to_sparse(fill_value=0) self.assertEqual(sparse.at['A', 'x'], orig.at['A', 'x']) - self.assertTrue(np.isnan(sparse.at['B', 'z'])) - self.assertTrue(np.isnan(sparse.at['C', 'y'])) + assert np.isnan(sparse.at['B', 'z']) + assert np.isnan(sparse.at['C', 'y']) self.assertEqual(sparse.at['D', 'x'], orig.at['D', 'x']) def test_iat(self): @@ -835,11 +835,11 @@ def test_iat(self): index=list('ABCD'), columns=list('xyz')) sparse = orig.to_sparse() self.assertEqual(sparse.iat[0, 0], orig.iat[0, 0]) - self.assertTrue(np.isnan(sparse.iat[1, 2])) - self.assertTrue(np.isnan(sparse.iat[2, 1])) + assert np.isnan(sparse.iat[1, 2]) + assert np.isnan(sparse.iat[2, 1]) self.assertEqual(sparse.iat[2, 0], orig.iat[2, 0]) - self.assertTrue(np.isnan(sparse.iat[-1, -2])) + assert np.isnan(sparse.iat[-1, -2]) self.assertEqual(sparse.iat[-1, -1], orig.iat[-1, -1]) def test_iat_fill_value(self): @@ -850,11 +850,11 @@ def test_iat_fill_value(self): index=list('ABCD'), columns=list('xyz')) sparse = orig.to_sparse(fill_value=0) self.assertEqual(sparse.iat[0, 0], orig.iat[0, 0]) - self.assertTrue(np.isnan(sparse.iat[1, 2])) - self.assertTrue(np.isnan(sparse.iat[2, 1])) + assert np.isnan(sparse.iat[1, 2]) + assert np.isnan(sparse.iat[2, 1]) self.assertEqual(sparse.iat[2, 0], orig.iat[2, 0]) - self.assertTrue(np.isnan(sparse.iat[-1, -2])) + assert np.isnan(sparse.iat[-1, -2]) self.assertEqual(sparse.iat[-1, -1], orig.iat[-1, -1]) def test_take(self): diff --git a/pandas/tests/sparse/test_libsparse.py b/pandas/tests/sparse/test_libsparse.py index 55115f45ff740..c7e1be968c148 100644 --- a/pandas/tests/sparse/test_libsparse.py +++ b/pandas/tests/sparse/test_libsparse.py @@ -162,25 +162,25 @@ def test_intindex_make_union(self): b = IntIndex(5, np.array([0, 2], dtype=np.int32)) res = a.make_union(b) exp = IntIndex(5, np.array([0, 2, 3, 4], np.int32)) - self.assertTrue(res.equals(exp)) + assert res.equals(exp) a = IntIndex(5, np.array([], dtype=np.int32)) b = IntIndex(5, np.array([0, 2], dtype=np.int32)) res = a.make_union(b) exp = IntIndex(5, np.array([0, 2], np.int32)) - self.assertTrue(res.equals(exp)) + assert res.equals(exp) a = IntIndex(5, np.array([], dtype=np.int32)) b = IntIndex(5, np.array([], dtype=np.int32)) res = a.make_union(b) exp = IntIndex(5, np.array([], np.int32)) - self.assertTrue(res.equals(exp)) + assert res.equals(exp) a = IntIndex(5, np.array([0, 1, 2, 3, 4], dtype=np.int32)) b = IntIndex(5, np.array([0, 1, 2, 3, 4], dtype=np.int32)) res = a.make_union(b) exp = IntIndex(5, np.array([0, 1, 2, 3, 4], np.int32)) - self.assertTrue(res.equals(exp)) + assert res.equals(exp) a = IntIndex(5, np.array([0, 1], dtype=np.int32)) b = IntIndex(4, np.array([0, 1], dtype=np.int32)) @@ -219,13 +219,13 @@ def _check_case(xloc, xlen, yloc, ylen, eloc, elen): def test_intersect_empty(self): xindex = IntIndex(4, np.array([], dtype=np.int32)) yindex = IntIndex(4, np.array([2, 3], dtype=np.int32)) - self.assertTrue(xindex.intersect(yindex).equals(xindex)) - self.assertTrue(yindex.intersect(xindex).equals(xindex)) + assert xindex.intersect(yindex).equals(xindex) + assert yindex.intersect(xindex).equals(xindex) xindex = xindex.to_block_index() yindex = yindex.to_block_index() - self.assertTrue(xindex.intersect(yindex).equals(xindex)) - self.assertTrue(yindex.intersect(xindex).equals(xindex)) + assert xindex.intersect(yindex).equals(xindex) + assert yindex.intersect(xindex).equals(xindex) def test_intersect_identical(self): cases = [IntIndex(5, np.array([1, 2], dtype=np.int32)), @@ -234,9 +234,9 @@ def test_intersect_identical(self): IntIndex(5, np.array([], dtype=np.int32))] for case in cases: - self.assertTrue(case.intersect(case).equals(case)) + assert case.intersect(case).equals(case) case = case.to_block_index() - self.assertTrue(case.intersect(case).equals(case)) + assert case.intersect(case).equals(case) class TestSparseIndexCommon(tm.TestCase): @@ -436,7 +436,7 @@ def test_make_block_boundary(self): def test_equals(self): index = BlockIndex(10, [0, 4], [2, 5]) - self.assertTrue(index.equals(index)) + assert index.equals(index) assert not index.equals(BlockIndex(10, [0, 4], [2, 6])) def test_check_integrity(self): @@ -534,7 +534,7 @@ def test_int_internal(self): def test_equals(self): index = IntIndex(10, [0, 1, 2, 3, 4]) - self.assertTrue(index.equals(index)) + assert index.equals(index) assert not index.equals(IntIndex(10, [0, 1, 2, 3])) def test_to_block_index(self): @@ -547,8 +547,8 @@ def _check_case(xloc, xlen, yloc, ylen, eloc, elen): xbindex = xindex.to_int_index().to_block_index() ybindex = yindex.to_int_index().to_block_index() assert isinstance(xbindex, BlockIndex) - self.assertTrue(xbindex.equals(xindex)) - self.assertTrue(ybindex.equals(yindex)) + assert xbindex.equals(xindex) + assert ybindex.equals(yindex) check_cases(_check_case) @@ -578,7 +578,7 @@ def _check_case(xloc, xlen, yloc, ylen, eloc, elen): result_int_vals, ri_index, ifill = sparse_op(x, xdindex, xfill, y, ydindex, yfill) - self.assertTrue(rb_index.to_int_index().equals(ri_index)) + assert rb_index.to_int_index().equals(ri_index) tm.assert_numpy_array_equal(result_block_vals, result_int_vals) self.assertEqual(bfill, ifill) diff --git a/pandas/tests/sparse/test_series.py b/pandas/tests/sparse/test_series.py index e0b0809c756b1..b8c12c2d64277 100644 --- a/pandas/tests/sparse/test_series.py +++ b/pandas/tests/sparse/test_series.py @@ -91,7 +91,7 @@ def setUp(self): def test_constructor_dtype(self): arr = SparseSeries([np.nan, 1, 2, np.nan]) self.assertEqual(arr.dtype, np.float64) - self.assertTrue(np.isnan(arr.fill_value)) + assert np.isnan(arr.fill_value) arr = SparseSeries([np.nan, 1, 2, np.nan], fill_value=0) self.assertEqual(arr.dtype, np.float64) @@ -99,7 +99,7 @@ def test_constructor_dtype(self): arr = SparseSeries([0, 1, 2, 4], dtype=np.int64, fill_value=np.nan) self.assertEqual(arr.dtype, np.int64) - self.assertTrue(np.isnan(arr.fill_value)) + assert np.isnan(arr.fill_value) arr = SparseSeries([0, 1, 2, 4], dtype=np.int64) self.assertEqual(arr.dtype, np.int64) @@ -230,9 +230,9 @@ def test_to_dense_preserve_name(self): def test_constructor(self): # test setup guys - self.assertTrue(np.isnan(self.bseries.fill_value)) + assert np.isnan(self.bseries.fill_value) assert isinstance(self.bseries.sp_index, BlockIndex) - self.assertTrue(np.isnan(self.iseries.fill_value)) + assert np.isnan(self.iseries.fill_value) assert isinstance(self.iseries.sp_index, IntIndex) self.assertEqual(self.zbseries.fill_value, 0) @@ -289,8 +289,8 @@ def test_constructor_scalar(self): data = 5 sp = SparseSeries(data, np.arange(100)) sp = sp.reindex(np.arange(200)) - self.assertTrue((sp.loc[:99] == data).all()) - self.assertTrue(isnull(sp.loc[100:]).all()) + assert (sp.loc[:99] == data).all() + assert isnull(sp.loc[100:]).all() data = np.nan sp = SparseSeries(data, np.arange(100)) @@ -805,13 +805,13 @@ def test_fill_value_corner(self): cop.fill_value = 0 result = self.bseries / cop - self.assertTrue(np.isnan(result.fill_value)) + assert np.isnan(result.fill_value) cop2 = self.zbseries.copy() cop2.fill_value = 1 result = cop2 / cop # 1 / 0 is inf - self.assertTrue(np.isinf(result.fill_value)) + assert np.isinf(result.fill_value) def test_fill_value_when_combine_const(self): # GH12723 diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index 96628322e4ee2..1b03c4e86b23f 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -264,8 +264,8 @@ def test_factorize_nan(self): ids = rizer.factorize(key, sort=True, na_sentinel=na_sentinel) expected = np.array([0, 1, 0, na_sentinel], dtype='int32') self.assertEqual(len(set(key)), len(set(expected))) - self.assertTrue(np.array_equal( - pd.isnull(key), expected == na_sentinel)) + tm.assert_numpy_array_equal(pd.isnull(key), + expected == na_sentinel) # nan still maps to na_sentinel when sort=False key = np.array([0, np.nan, 1], dtype='O') @@ -276,8 +276,7 @@ def test_factorize_nan(self): expected = np.array([2, -1, 0], dtype='int32') self.assertEqual(len(set(key)), len(set(expected))) - self.assertTrue( - np.array_equal(pd.isnull(key), expected == na_sentinel)) + tm.assert_numpy_array_equal(pd.isnull(key), expected == na_sentinel) def test_complex_sorting(self): # gh 12666 - check no segfault @@ -926,7 +925,7 @@ def test_datetime_likes(self): def test_unique_index(self): cases = [pd.Index([1, 2, 3]), pd.RangeIndex(0, 3)] for case in cases: - self.assertTrue(case.is_unique) + assert case.is_unique tm.assert_numpy_array_equal(case.duplicated(), np.array([False, False, False])) @@ -947,7 +946,7 @@ def test_group_var_generic_1d(self): expected_counts = counts + 3 self.algo(out, counts, values, labels) - self.assertTrue(np.allclose(out, expected_out, self.rtol)) + assert np.allclose(out, expected_out, self.rtol) tm.assert_numpy_array_equal(counts, expected_counts) def test_group_var_generic_1d_flat_labels(self): @@ -963,7 +962,7 @@ def test_group_var_generic_1d_flat_labels(self): self.algo(out, counts, values, labels) - self.assertTrue(np.allclose(out, expected_out, self.rtol)) + assert np.allclose(out, expected_out, self.rtol) tm.assert_numpy_array_equal(counts, expected_counts) def test_group_var_generic_2d_all_finite(self): @@ -978,7 +977,7 @@ def test_group_var_generic_2d_all_finite(self): expected_counts = counts + 2 self.algo(out, counts, values, labels) - self.assertTrue(np.allclose(out, expected_out, self.rtol)) + assert np.allclose(out, expected_out, self.rtol) tm.assert_numpy_array_equal(counts, expected_counts) def test_group_var_generic_2d_some_nan(self): @@ -1011,7 +1010,7 @@ def test_group_var_constant(self): self.algo(out, counts, values, labels) self.assertEqual(counts[0], 3) - self.assertTrue(out[0, 0] >= 0) + assert out[0, 0] >= 0 tm.assert_almost_equal(out[0, 0], 0.0) diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py index e058a62ea3089..cbcc4dc84e6d0 100644 --- a/pandas/tests/test_base.py +++ b/pandas/tests/test_base.py @@ -250,13 +250,11 @@ def test_binary_ops_docs(self): operand2 = 'other' op = op_map[op_name] expected_str = ' '.join([operand1, op, operand2]) - self.assertTrue(expected_str in getattr(klass, - op_name).__doc__) + assert expected_str in getattr(klass, op_name).__doc__ # reverse version of the binary ops expected_str = ' '.join([operand2, op, operand1]) - self.assertTrue(expected_str in getattr(klass, 'r' + - op_name).__doc__) + assert expected_str in getattr(klass, 'r' + op_name).__doc__ class TestIndexOps(Ops): @@ -282,8 +280,8 @@ def test_none_comparison(self): # noinspection PyComparisonWithNone result = o != None # noqa - self.assertTrue(result.iat[0]) - self.assertTrue(result.iat[1]) + assert result.iat[0] + assert result.iat[1] result = None == o # noqa assert not result.iat[0] @@ -292,8 +290,8 @@ def test_none_comparison(self): # this fails for numpy < 1.9 # and oddly for *some* platforms # result = None != o # noqa - # self.assertTrue(result.iat[0]) - # self.assertTrue(result.iat[1]) + # assert result.iat[0] + # assert result.iat[1] result = None > o assert not result.iat[0] @@ -355,10 +353,10 @@ def test_nanops(self): self.assertEqual(getattr(obj, op)(), 2.0) obj = klass([np.nan]) - self.assertTrue(pd.isnull(getattr(obj, op)())) + assert pd.isnull(getattr(obj, op)()) obj = klass([]) - self.assertTrue(pd.isnull(getattr(obj, op)())) + assert pd.isnull(getattr(obj, op)()) obj = klass([pd.NaT, datetime(2011, 11, 1)]) # check DatetimeIndex monotonic path @@ -423,12 +421,12 @@ def test_value_counts_unique_nunique(self): result = o.value_counts() tm.assert_series_equal(result, expected_s) - self.assertTrue(result.index.name is None) + assert result.index.name is None self.assertEqual(result.name, 'a') result = o.unique() if isinstance(o, Index): - self.assertTrue(isinstance(result, o.__class__)) + assert isinstance(result, o.__class__) tm.assert_index_equal(result, orig) elif is_datetimetz(o): # datetimetz Series returns array of Timestamp @@ -511,11 +509,11 @@ def test_value_counts_unique_nunique_null(self): result_s_na = o.value_counts(dropna=False) tm.assert_series_equal(result_s_na, expected_s_na) - self.assertTrue(result_s_na.index.name is None) + assert result_s_na.index.name is None self.assertEqual(result_s_na.name, 'a') result_s = o.value_counts() tm.assert_series_equal(o.value_counts(), expected_s) - self.assertTrue(result_s.index.name is None) + assert result_s.index.name is None self.assertEqual(result_s.name, 'a') result = o.unique() @@ -530,7 +528,7 @@ def test_value_counts_unique_nunique_null(self): else: tm.assert_numpy_array_equal(result[1:], values[2:]) - self.assertTrue(pd.isnull(result[0])) + assert pd.isnull(result[0]) self.assertEqual(result.dtype, orig.dtype) self.assertEqual(o.nunique(), 8) @@ -691,7 +689,7 @@ def test_value_counts_datetime64(self): tm.assert_index_equal(unique, exp_idx) else: tm.assert_numpy_array_equal(unique[:3], expected) - self.assertTrue(pd.isnull(unique[3])) + assert pd.isnull(unique[3]) self.assertEqual(s.nunique(), 3) self.assertEqual(s.nunique(dropna=False), 4) @@ -793,7 +791,7 @@ def test_duplicated_drop_duplicates_index(self): expected = np.array([False] * len(original), dtype=bool) duplicated = original.duplicated() tm.assert_numpy_array_equal(duplicated, expected) - self.assertTrue(duplicated.dtype == bool) + assert duplicated.dtype == bool result = original.drop_duplicates() tm.assert_index_equal(result, original) assert result is not original @@ -807,7 +805,7 @@ def test_duplicated_drop_duplicates_index(self): dtype=bool) duplicated = idx.duplicated() tm.assert_numpy_array_equal(duplicated, expected) - self.assertTrue(duplicated.dtype == bool) + assert duplicated.dtype == bool tm.assert_index_equal(idx.drop_duplicates(), original) base = [False] * len(idx) @@ -817,7 +815,7 @@ def test_duplicated_drop_duplicates_index(self): duplicated = idx.duplicated(keep='last') tm.assert_numpy_array_equal(duplicated, expected) - self.assertTrue(duplicated.dtype == bool) + assert duplicated.dtype == bool result = idx.drop_duplicates(keep='last') tm.assert_index_equal(result, idx[~expected]) @@ -828,7 +826,7 @@ def test_duplicated_drop_duplicates_index(self): duplicated = idx.duplicated(keep=False) tm.assert_numpy_array_equal(duplicated, expected) - self.assertTrue(duplicated.dtype == bool) + assert duplicated.dtype == bool result = idx.drop_duplicates(keep=False) tm.assert_index_equal(result, idx[~expected]) @@ -951,7 +949,7 @@ def test_memory_usage(self): if (is_object_dtype(o) or (isinstance(o, Series) and is_object_dtype(o.index))): # if there are objects, only deep will pick them up - self.assertTrue(res_deep > res) + assert res_deep > res else: self.assertEqual(res, res_deep) @@ -965,16 +963,16 @@ def test_memory_usage(self): # sys.getsizeof will call the .memory_usage with # deep=True, and add on some GC overhead diff = res_deep - sys.getsizeof(o) - self.assertTrue(abs(diff) < 100) + assert abs(diff) < 100 def test_searchsorted(self): # See gh-12238 for o in self.objs: index = np.searchsorted(o, max(o)) - self.assertTrue(0 <= index <= len(o)) + assert 0 <= index <= len(o) index = np.searchsorted(o, max(o), sorter=range(len(o))) - self.assertTrue(0 <= index <= len(o)) + assert 0 <= index <= len(o) def test_validate_bool_args(self): invalid_values = [1, "True", [1, 2, 3], 5.0] diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py index 252b32e264c1b..708ca92c30cac 100644 --- a/pandas/tests/test_categorical.py +++ b/pandas/tests/test_categorical.py @@ -140,14 +140,14 @@ def test_is_equal_dtype(self): c1 = Categorical(list('aabca'), categories=list('abc'), ordered=False) c2 = Categorical(list('aabca'), categories=list('cab'), ordered=False) c3 = Categorical(list('aabca'), categories=list('cab'), ordered=True) - self.assertTrue(c1.is_dtype_equal(c1)) - self.assertTrue(c2.is_dtype_equal(c2)) - self.assertTrue(c3.is_dtype_equal(c3)) + assert c1.is_dtype_equal(c1) + assert c2.is_dtype_equal(c2) + assert c3.is_dtype_equal(c3) assert not c1.is_dtype_equal(c2) assert not c1.is_dtype_equal(c3) assert not c1.is_dtype_equal(Index(list('aabca'))) assert not c1.is_dtype_equal(c1.astype(object)) - self.assertTrue(c1.is_dtype_equal(CategoricalIndex(c1))) + assert c1.is_dtype_equal(CategoricalIndex(c1)) assert not (c1.is_dtype_equal( CategoricalIndex(c1, categories=list('cab')))) assert not c1.is_dtype_equal(CategoricalIndex(c1, ordered=True)) @@ -216,51 +216,51 @@ def f(): # This should result in integer categories, not float! cat = pd.Categorical([1, 2, 3, np.nan], categories=[1, 2, 3]) - self.assertTrue(is_integer_dtype(cat.categories)) + assert is_integer_dtype(cat.categories) # https://github.com/pandas-dev/pandas/issues/3678 cat = pd.Categorical([np.nan, 1, 2, 3]) - self.assertTrue(is_integer_dtype(cat.categories)) + assert is_integer_dtype(cat.categories) # this should result in floats cat = pd.Categorical([np.nan, 1, 2., 3]) - self.assertTrue(is_float_dtype(cat.categories)) + assert is_float_dtype(cat.categories) cat = pd.Categorical([np.nan, 1., 2., 3.]) - self.assertTrue(is_float_dtype(cat.categories)) + assert is_float_dtype(cat.categories) # This doesn't work -> this would probably need some kind of "remember # the original type" feature to try to cast the array interface result # to... # vals = np.asarray(cat[cat.notnull()]) - # self.assertTrue(is_integer_dtype(vals)) + # assert is_integer_dtype(vals) # corner cases cat = pd.Categorical([1]) - self.assertTrue(len(cat.categories) == 1) - self.assertTrue(cat.categories[0] == 1) - self.assertTrue(len(cat.codes) == 1) - self.assertTrue(cat.codes[0] == 0) + assert len(cat.categories) == 1 + assert cat.categories[0] == 1 + assert len(cat.codes) == 1 + assert cat.codes[0] == 0 cat = pd.Categorical(["a"]) - self.assertTrue(len(cat.categories) == 1) - self.assertTrue(cat.categories[0] == "a") - self.assertTrue(len(cat.codes) == 1) - self.assertTrue(cat.codes[0] == 0) + assert len(cat.categories) == 1 + assert cat.categories[0] == "a" + assert len(cat.codes) == 1 + assert cat.codes[0] == 0 # Scalars should be converted to lists cat = pd.Categorical(1) - self.assertTrue(len(cat.categories) == 1) - self.assertTrue(cat.categories[0] == 1) - self.assertTrue(len(cat.codes) == 1) - self.assertTrue(cat.codes[0] == 0) + assert len(cat.categories) == 1 + assert cat.categories[0] == 1 + assert len(cat.codes) == 1 + assert cat.codes[0] == 0 cat = pd.Categorical([1], categories=1) - self.assertTrue(len(cat.categories) == 1) - self.assertTrue(cat.categories[0] == 1) - self.assertTrue(len(cat.codes) == 1) - self.assertTrue(cat.codes[0] == 0) + assert len(cat.categories) == 1 + assert cat.categories[0] == 1 + assert len(cat.codes) == 1 + assert cat.codes[0] == 0 # Catch old style constructor useage: two arrays, codes + categories # We can only catch two cases: @@ -360,7 +360,7 @@ def test_constructor_with_datetimelike(self): tm.assert_numpy_array_equal(c.codes, exp) result = repr(c) - self.assertTrue('NaT' in result) + assert 'NaT' in result def test_constructor_from_index_series_datetimetz(self): idx = pd.date_range('2015-01-01 10:00', freq='D', periods=3, @@ -618,7 +618,7 @@ def test_categories_none(self): def test_describe(self): # string type desc = self.factor.describe() - self.assertTrue(self.factor.ordered) + assert self.factor.ordered exp_index = pd.CategoricalIndex(['a', 'b', 'c'], name='categories', ordered=self.factor.ordered) expected = DataFrame({'counts': [3, 2, 3], @@ -792,7 +792,7 @@ def test_construction_with_ordered(self): cat = Categorical([0, 1, 2], ordered=False) assert not cat.ordered cat = Categorical([0, 1, 2], ordered=True) - self.assertTrue(cat.ordered) + assert cat.ordered def test_ordered_api(self): # GH 9347 @@ -807,12 +807,12 @@ def test_ordered_api(self): cat3 = pd.Categorical(["a", "c", "b"], ordered=True) tm.assert_index_equal(cat3.categories, Index(['a', 'b', 'c'])) - self.assertTrue(cat3.ordered) + assert cat3.ordered cat4 = pd.Categorical(["a", "c", "b"], categories=['b', 'c', 'a'], ordered=True) tm.assert_index_equal(cat4.categories, Index(['b', 'c', 'a'])) - self.assertTrue(cat4.ordered) + assert cat4.ordered def test_set_ordered(self): @@ -820,16 +820,16 @@ def test_set_ordered(self): cat2 = cat.as_unordered() assert not cat2.ordered cat2 = cat.as_ordered() - self.assertTrue(cat2.ordered) + assert cat2.ordered cat2.as_unordered(inplace=True) assert not cat2.ordered cat2.as_ordered(inplace=True) - self.assertTrue(cat2.ordered) + assert cat2.ordered - self.assertTrue(cat2.set_ordered(True).ordered) + assert cat2.set_ordered(True).ordered assert not cat2.set_ordered(False).ordered cat2.set_ordered(True, inplace=True) - self.assertTrue(cat2.ordered) + assert cat2.ordered cat2.set_ordered(False, inplace=True) assert not cat2.ordered @@ -1168,7 +1168,7 @@ def test_min_max(self): categories=['d', 'c', 'b', 'a'], ordered=True) _min = cat.min() _max = cat.max() - self.assertTrue(np.isnan(_min)) + assert np.isnan(_min) self.assertEqual(_max, "b") _min = cat.min(numeric_only=True) @@ -1180,7 +1180,7 @@ def test_min_max(self): ordered=True) _min = cat.min() _max = cat.max() - self.assertTrue(np.isnan(_min)) + assert np.isnan(_min) self.assertEqual(_max, 1) _min = cat.min(numeric_only=True) @@ -1433,17 +1433,16 @@ def test_memory_usage(self): cat = pd.Categorical([1, 2, 3]) # .categories is an index, so we include the hashtable - self.assertTrue(cat.nbytes > 0 and cat.nbytes <= cat.memory_usage()) - self.assertTrue(cat.nbytes > 0 and - cat.nbytes <= cat.memory_usage(deep=True)) + assert 0 < cat.nbytes <= cat.memory_usage() + assert 0 < cat.nbytes <= cat.memory_usage(deep=True) cat = pd.Categorical(['foo', 'foo', 'bar']) - self.assertTrue(cat.memory_usage(deep=True) > cat.nbytes) + assert cat.memory_usage(deep=True) > cat.nbytes # sys.getsizeof will call the .memory_usage with # deep=True, and add on some GC overhead diff = cat.memory_usage(deep=True) - sys.getsizeof(cat) - self.assertTrue(abs(diff) < 100) + assert abs(diff) < 100 def test_searchsorted(self): # https://github.com/pandas-dev/pandas/issues/8420 @@ -1640,23 +1639,23 @@ def test_codes_dtypes(self): # GH 8453 result = Categorical(['foo', 'bar', 'baz']) - self.assertTrue(result.codes.dtype == 'int8') + assert result.codes.dtype == 'int8' result = Categorical(['foo%05d' % i for i in range(400)]) - self.assertTrue(result.codes.dtype == 'int16') + assert result.codes.dtype == 'int16' result = Categorical(['foo%05d' % i for i in range(40000)]) - self.assertTrue(result.codes.dtype == 'int32') + assert result.codes.dtype == 'int32' # adding cats result = Categorical(['foo', 'bar', 'baz']) - self.assertTrue(result.codes.dtype == 'int8') + assert result.codes.dtype == 'int8' result = result.add_categories(['foo%05d' % i for i in range(400)]) - self.assertTrue(result.codes.dtype == 'int16') + assert result.codes.dtype == 'int16' # removing cats result = result.remove_categories(['foo%05d' % i for i in range(300)]) - self.assertTrue(result.codes.dtype == 'int8') + assert result.codes.dtype == 'int8' def test_basic(self): @@ -1893,7 +1892,7 @@ def test_sideeffects_free(self): # so this WILL change values cat = Categorical(["a", "b", "c", "a"]) s = pd.Series(cat) - self.assertTrue(s.values is cat) + assert s.values is cat s.cat.categories = [1, 2, 3] exp_s = np.array([1, 2, 3, 1], dtype=np.int64) tm.assert_numpy_array_equal(s.__array__(), exp_s) @@ -2816,14 +2815,14 @@ def test_min_max(self): ], ordered=True)) _min = cat.min() _max = cat.max() - self.assertTrue(np.isnan(_min)) + assert np.isnan(_min) self.assertEqual(_max, "b") cat = Series(Categorical( [np.nan, 1, 2, np.nan], categories=[5, 4, 3, 2, 1], ordered=True)) _min = cat.min() _max = cat.max() - self.assertTrue(np.isnan(_min)) + assert np.isnan(_min) self.assertEqual(_max, 1) def test_mode(self): @@ -3188,7 +3187,7 @@ def test_slicing_and_getting_ops(self): # frame res_df = df.iloc[2:4, :] tm.assert_frame_equal(res_df, exp_df) - self.assertTrue(is_categorical_dtype(res_df["cats"])) + assert is_categorical_dtype(res_df["cats"]) # row res_row = df.iloc[2, :] @@ -3198,7 +3197,7 @@ def test_slicing_and_getting_ops(self): # col res_col = df.iloc[:, 0] tm.assert_series_equal(res_col, exp_col) - self.assertTrue(is_categorical_dtype(res_col)) + assert is_categorical_dtype(res_col) # single value res_val = df.iloc[2, 0] @@ -3208,7 +3207,7 @@ def test_slicing_and_getting_ops(self): # frame res_df = df.loc["j":"k", :] tm.assert_frame_equal(res_df, exp_df) - self.assertTrue(is_categorical_dtype(res_df["cats"])) + assert is_categorical_dtype(res_df["cats"]) # row res_row = df.loc["j", :] @@ -3218,7 +3217,7 @@ def test_slicing_and_getting_ops(self): # col res_col = df.loc[:, "cats"] tm.assert_series_equal(res_col, exp_col) - self.assertTrue(is_categorical_dtype(res_col)) + assert is_categorical_dtype(res_col) # single value res_val = df.loc["j", "cats"] @@ -3229,7 +3228,7 @@ def test_slicing_and_getting_ops(self): # res_df = df.loc["j":"k",[0,1]] # doesn't work? res_df = df.loc["j":"k", :] tm.assert_frame_equal(res_df, exp_df) - self.assertTrue(is_categorical_dtype(res_df["cats"])) + assert is_categorical_dtype(res_df["cats"]) # row res_row = df.loc["j", :] @@ -3239,7 +3238,7 @@ def test_slicing_and_getting_ops(self): # col res_col = df.loc[:, "cats"] tm.assert_series_equal(res_col, exp_col) - self.assertTrue(is_categorical_dtype(res_col)) + assert is_categorical_dtype(res_col) # single value res_val = df.loc["j", df.columns[0]] @@ -3272,23 +3271,23 @@ def test_slicing_and_getting_ops(self): res_df = df.iloc[slice(2, 4)] tm.assert_frame_equal(res_df, exp_df) - self.assertTrue(is_categorical_dtype(res_df["cats"])) + assert is_categorical_dtype(res_df["cats"]) res_df = df.iloc[[2, 3]] tm.assert_frame_equal(res_df, exp_df) - self.assertTrue(is_categorical_dtype(res_df["cats"])) + assert is_categorical_dtype(res_df["cats"]) res_col = df.iloc[:, 0] tm.assert_series_equal(res_col, exp_col) - self.assertTrue(is_categorical_dtype(res_col)) + assert is_categorical_dtype(res_col) res_df = df.iloc[:, slice(0, 2)] tm.assert_frame_equal(res_df, df) - self.assertTrue(is_categorical_dtype(res_df["cats"])) + assert is_categorical_dtype(res_df["cats"]) res_df = df.iloc[:, [0, 1]] tm.assert_frame_equal(res_df, df) - self.assertTrue(is_categorical_dtype(res_df["cats"])) + assert is_categorical_dtype(res_df["cats"]) def test_slicing_doc_examples(self): @@ -3784,22 +3783,22 @@ def test_cat_equality(self): # vs scalar assert not (a == 'a').all() - self.assertTrue(((a != 'a') == ~(a == 'a')).all()) + assert ((a != 'a') == ~(a == 'a')).all() assert not ('a' == a).all() - self.assertTrue((a == 'a')[0]) - self.assertTrue(('a' == a)[0]) + assert (a == 'a')[0] + assert ('a' == a)[0] assert not ('a' != a)[0] # vs list-like - self.assertTrue((a == a).all()) + assert (a == a).all() assert not (a != a).all() - self.assertTrue((a == list(a)).all()) - self.assertTrue((a == b).all()) - self.assertTrue((b == a).all()) - self.assertTrue(((~(a == b)) == (a != b)).all()) - self.assertTrue(((~(b == a)) == (b != a)).all()) + assert (a == list(a)).all() + assert (a == b).all() + assert (b == a).all() + assert ((~(a == b)) == (a != b)).all() + assert ((~(b == a)) == (b != a)).all() assert not (a == c).all() assert not (c == a).all() @@ -3807,15 +3806,15 @@ def test_cat_equality(self): assert not (d == a).all() # vs a cat-like - self.assertTrue((a == e).all()) - self.assertTrue((e == a).all()) + assert (a == e).all() + assert (e == a).all() assert not (a == f).all() assert not (f == a).all() - self.assertTrue(((~(a == e) == (a != e)).all())) - self.assertTrue(((~(e == a) == (e != a)).all())) - self.assertTrue(((~(a == f) == (a != f)).all())) - self.assertTrue(((~(f == a) == (f != a)).all())) + assert ((~(a == e) == (a != e)).all()) + assert ((~(e == a) == (e != a)).all()) + assert ((~(a == f) == (a != f)).all()) + assert ((~(f == a) == (f != a)).all()) # non-equality is not comparable pytest.raises(TypeError, lambda: a < b) diff --git a/pandas/tests/test_config.py b/pandas/tests/test_config.py index 0e614fdbfe008..ad5418f4a4a29 100644 --- a/pandas/tests/test_config.py +++ b/pandas/tests/test_config.py @@ -32,10 +32,10 @@ def tearDown(self): def test_api(self): # the pandas object exposes the user API - self.assertTrue(hasattr(pd, 'get_option')) - self.assertTrue(hasattr(pd, 'set_option')) - self.assertTrue(hasattr(pd, 'reset_option')) - self.assertTrue(hasattr(pd, 'describe_option')) + assert hasattr(pd, 'get_option') + assert hasattr(pd, 'set_option') + assert hasattr(pd, 'reset_option') + assert hasattr(pd, 'describe_option') def test_is_one_of_factory(self): v = self.cf.is_one_of_factory([None, 12]) @@ -87,43 +87,30 @@ def test_describe_option(self): pytest.raises(KeyError, self.cf.describe_option, 'no.such.key') # we can get the description for any key we registered - self.assertTrue( - 'doc' in self.cf.describe_option('a', _print_desc=False)) - self.assertTrue( - 'doc2' in self.cf.describe_option('b', _print_desc=False)) - self.assertTrue( - 'precated' in self.cf.describe_option('b', _print_desc=False)) - - self.assertTrue( - 'doc3' in self.cf.describe_option('c.d.e1', _print_desc=False)) - self.assertTrue( - 'doc4' in self.cf.describe_option('c.d.e2', _print_desc=False)) + assert 'doc' in self.cf.describe_option('a', _print_desc=False) + assert 'doc2' in self.cf.describe_option('b', _print_desc=False) + assert 'precated' in self.cf.describe_option('b', _print_desc=False) + assert 'doc3' in self.cf.describe_option('c.d.e1', _print_desc=False) + assert 'doc4' in self.cf.describe_option('c.d.e2', _print_desc=False) # if no doc is specified we get a default message # saying "description not available" - self.assertTrue( - 'vailable' in self.cf.describe_option('f', _print_desc=False)) - self.assertTrue( - 'vailable' in self.cf.describe_option('g.h', _print_desc=False)) - self.assertTrue( - 'precated' in self.cf.describe_option('g.h', _print_desc=False)) - self.assertTrue( - 'k' in self.cf.describe_option('g.h', _print_desc=False)) + assert 'vailable' in self.cf.describe_option('f', _print_desc=False) + assert 'vailable' in self.cf.describe_option('g.h', _print_desc=False) + assert 'precated' in self.cf.describe_option('g.h', _print_desc=False) + assert 'k' in self.cf.describe_option('g.h', _print_desc=False) # default is reported - self.assertTrue( - 'foo' in self.cf.describe_option('l', _print_desc=False)) + assert 'foo' in self.cf.describe_option('l', _print_desc=False) # current value is reported assert 'bar' not in self.cf.describe_option('l', _print_desc=False) self.cf.set_option("l", "bar") - self.assertTrue( - 'bar' in self.cf.describe_option('l', _print_desc=False)) + assert 'bar' in self.cf.describe_option('l', _print_desc=False) def test_case_insensitive(self): self.cf.register_option('KanBAN', 1, 'doc') - self.assertTrue( - 'doc' in self.cf.describe_option('kanbaN', _print_desc=False)) + assert 'doc' in self.cf.describe_option('kanbaN', _print_desc=False) self.assertEqual(self.cf.get_option('kanBaN'), 1) self.cf.set_option('KanBan', 2) self.assertEqual(self.cf.get_option('kAnBaN'), 2) @@ -132,7 +119,7 @@ def test_case_insensitive(self): pytest.raises(KeyError, self.cf.get_option, 'no_such_option') self.cf.deprecate_option('KanBan') - self.assertTrue(self.cf._is_deprecated('kAnBaN')) + assert self.cf._is_deprecated('kAnBaN') def test_get_option(self): self.cf.register_option('a', 1, 'doc') @@ -142,7 +129,7 @@ def test_get_option(self): # gets of existing keys succeed self.assertEqual(self.cf.get_option('a'), 1) self.assertEqual(self.cf.get_option('b.c'), 'hullo') - self.assertTrue(self.cf.get_option('b.b') is None) + assert self.cf.get_option('b.b') is None # gets of non-existent keys fail pytest.raises(KeyError, self.cf.get_option, 'no_such_option') @@ -154,7 +141,7 @@ def test_set_option(self): self.assertEqual(self.cf.get_option('a'), 1) self.assertEqual(self.cf.get_option('b.c'), 'hullo') - self.assertTrue(self.cf.get_option('b.b') is None) + assert self.cf.get_option('b.b') is None self.cf.set_option('a', 2) self.cf.set_option('b.c', 'wurld') @@ -182,12 +169,12 @@ def test_set_option_multiple(self): self.assertEqual(self.cf.get_option('a'), 1) self.assertEqual(self.cf.get_option('b.c'), 'hullo') - self.assertTrue(self.cf.get_option('b.b') is None) + assert self.cf.get_option('b.b') is None self.cf.set_option('a', '2', 'b.c', None, 'b.b', 10.0) self.assertEqual(self.cf.get_option('a'), '2') - self.assertTrue(self.cf.get_option('b.c') is None) + assert self.cf.get_option('b.c') is None self.assertEqual(self.cf.get_option('b.b'), 10.0) def test_validation(self): @@ -251,7 +238,7 @@ def test_deprecate_option(self): # we can deprecate non-existent options self.cf.deprecate_option('foo') - self.assertTrue(self.cf._is_deprecated('foo')) + assert self.cf._is_deprecated('foo') with warnings.catch_warnings(record=True) as w: warnings.simplefilter('always') try: @@ -262,8 +249,7 @@ def test_deprecate_option(self): self.fail("Nonexistent option didn't raise KeyError") self.assertEqual(len(w), 1) # should have raised one warning - self.assertTrue( - 'deprecated' in str(w[-1])) # we get the default message + assert 'deprecated' in str(w[-1]) # we get the default message self.cf.register_option('a', 1, 'doc', validator=self.cf.is_int) self.cf.register_option('b.c', 'hullo', 'doc2') @@ -275,10 +261,8 @@ def test_deprecate_option(self): self.cf.get_option('a') self.assertEqual(len(w), 1) # should have raised one warning - self.assertTrue( - 'eprecated' in str(w[-1])) # we get the default message - self.assertTrue( - 'nifty_ver' in str(w[-1])) # with the removal_ver quoted + assert 'eprecated' in str(w[-1]) # we get the default message + assert 'nifty_ver' in str(w[-1]) # with the removal_ver quoted pytest.raises( KeyError, self.cf.deprecate_option, 'a') # can't depr. twice @@ -289,8 +273,7 @@ def test_deprecate_option(self): self.cf.get_option('b.c') self.assertEqual(len(w), 1) # should have raised one warning - self.assertTrue( - 'zounds!' in str(w[-1])) # we get the custom message + assert 'zounds!' in str(w[-1]) # we get the custom message # test rerouting keys self.cf.register_option('d.a', 'foo', 'doc2') @@ -304,24 +287,21 @@ def test_deprecate_option(self): self.assertEqual(self.cf.get_option('d.dep'), 'foo') self.assertEqual(len(w), 1) # should have raised one warning - self.assertTrue( - 'eprecated' in str(w[-1])) # we get the custom message + assert 'eprecated' in str(w[-1]) # we get the custom message with warnings.catch_warnings(record=True) as w: warnings.simplefilter('always') self.cf.set_option('d.dep', 'baz') # should overwrite "d.a" self.assertEqual(len(w), 1) # should have raised one warning - self.assertTrue( - 'eprecated' in str(w[-1])) # we get the custom message + assert 'eprecated' in str(w[-1]) # we get the custom message with warnings.catch_warnings(record=True) as w: warnings.simplefilter('always') self.assertEqual(self.cf.get_option('d.dep'), 'baz') self.assertEqual(len(w), 1) # should have raised one warning - self.assertTrue( - 'eprecated' in str(w[-1])) # we get the custom message + assert 'eprecated' in str(w[-1]) # we get the custom message def test_config_prefix(self): with self.cf.config_prefix("base"): @@ -337,10 +317,8 @@ def test_config_prefix(self): self.assertEqual(self.cf.get_option('base.a'), 3) self.assertEqual(self.cf.get_option('base.b'), 4) - self.assertTrue( - 'doc1' in self.cf.describe_option('base.a', _print_desc=False)) - self.assertTrue( - 'doc2' in self.cf.describe_option('base.b', _print_desc=False)) + assert 'doc1' in self.cf.describe_option('base.a', _print_desc=False) + assert 'doc2' in self.cf.describe_option('base.b', _print_desc=False) self.cf.reset_option('base.a') self.cf.reset_option('base.b') diff --git a/pandas/tests/test_expressions.py b/pandas/tests/test_expressions.py index 782d2682145d8..ae505a66ad75a 100644 --- a/pandas/tests/test_expressions.py +++ b/pandas/tests/test_expressions.py @@ -269,7 +269,7 @@ def test_invalid(self): # ok, we only check on first part of expression result = expr._can_use_numexpr(operator.add, '+', self.frame, self.frame2, 'evaluate') - self.assertTrue(result) + assert result def test_binary_ops(self): def testit(): diff --git a/pandas/tests/test_lib.py b/pandas/tests/test_lib.py index 621f624c41a19..0ac05bae624e5 100644 --- a/pandas/tests/test_lib.py +++ b/pandas/tests/test_lib.py @@ -13,15 +13,15 @@ class TestMisc(tm.TestCase): def test_max_len_string_array(self): arr = a = np.array(['foo', 'b', np.nan], dtype='object') - self.assertTrue(lib.max_len_string_array(arr), 3) + assert lib.max_len_string_array(arr), 3 # unicode arr = a.astype('U').astype(object) - self.assertTrue(lib.max_len_string_array(arr), 3) + assert lib.max_len_string_array(arr), 3 # bytes for python3 arr = a.astype('S').astype(object) - self.assertTrue(lib.max_len_string_array(arr), 3) + assert lib.max_len_string_array(arr), 3 # raises pytest.raises(TypeError, @@ -139,13 +139,13 @@ def test_maybe_indices_to_slice_both_edges(self): for step in [1, 2, 4, 5, 8, 9]: indices = np.arange(0, 9, step, dtype=np.int64) maybe_slice = lib.maybe_indices_to_slice(indices, len(target)) - self.assertTrue(isinstance(maybe_slice, slice)) + assert isinstance(maybe_slice, slice) tm.assert_numpy_array_equal(target[indices], target[maybe_slice]) # reverse indices = indices[::-1] maybe_slice = lib.maybe_indices_to_slice(indices, len(target)) - self.assertTrue(isinstance(maybe_slice, slice)) + assert isinstance(maybe_slice, slice) tm.assert_numpy_array_equal(target[indices], target[maybe_slice]) # not slice @@ -189,16 +189,16 @@ def test_maybe_indices_to_slice_middle(self): def test_maybe_booleans_to_slice(self): arr = np.array([0, 0, 1, 1, 1, 0, 1], dtype=np.uint8) result = lib.maybe_booleans_to_slice(arr) - self.assertTrue(result.dtype == np.bool_) + assert result.dtype == np.bool_ result = lib.maybe_booleans_to_slice(arr[:0]) - self.assertTrue(result == slice(0, 0)) + assert result == slice(0, 0) def test_get_reverse_indexer(self): indexer = np.array([-1, -1, 1, 2, 0, -1, 3, 4], dtype=np.int64) result = lib.get_reverse_indexer(indexer, 5) expected = np.array([4, 2, 3, 6, 7], dtype=np.int64) - self.assertTrue(np.array_equal(result, expected)) + assert np.array_equal(result, expected) class TestNullObj(tm.TestCase): diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index 668f5b2a5a962..1a4603978ce38 100755 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -233,7 +233,7 @@ def test_repr_name_coincide(self): df = DataFrame({'value': [0, 1]}, index=index) lines = repr(df).split('\n') - self.assertTrue(lines[2].startswith('a 0 foo')) + assert lines[2].startswith('a 0 foo') def test_getitem_simple(self): df = self.frame.T @@ -289,12 +289,12 @@ def test_series_setitem(self): s = self.ymd['A'] s[2000, 3] = np.nan - self.assertTrue(isnull(s.values[42:65]).all()) - self.assertTrue(notnull(s.values[:42]).all()) - self.assertTrue(notnull(s.values[65:]).all()) + assert isnull(s.values[42:65]).all() + assert notnull(s.values[:42]).all() + assert notnull(s.values[65:]).all() s[2000, 3, 10] = np.nan - self.assertTrue(isnull(s[49])) + assert isnull(s[49]) def test_series_slice_partial(self): pass @@ -333,8 +333,8 @@ def test_frame_getitem_setitem_slice(self): cp = self.frame.copy() cp.iloc[:4] = 0 - self.assertTrue((cp.values[:4] == 0).all()) - self.assertTrue((cp.values[4:] != 0).all()) + assert (cp.values[:4] == 0).all() + assert (cp.values[4:] != 0).all() def test_frame_getitem_setitem_multislice(self): levels = [['t1', 't2'], ['a', 'b', 'c']] @@ -393,7 +393,7 @@ def test_frame_setitem_multi_column(self): # Works, but adds a column instead of updating the two existing ones df['A'] = 0.0 # Doesn't work - self.assertTrue((df['A'].values == 0).all()) + assert (df['A'].values == 0).all() # it broadcasts df['B', '1'] = [1, 2, 3] @@ -616,7 +616,7 @@ def test_getitem_setitem_slice_integers(self): tm.assert_frame_equal(res, exp) frame.loc[1:2] = 7 - self.assertTrue((frame.loc[1:2] == 7).values.all()) + assert (frame.loc[1:2] == 7).values.all() series = Series(np.random.randn(len(index)), index=index) @@ -625,7 +625,7 @@ def test_getitem_setitem_slice_integers(self): tm.assert_series_equal(res, exp) series.loc[1:2] = 7 - self.assertTrue((series.loc[1:2] == 7).values.all()) + assert (series.loc[1:2] == 7).values.all() def test_getitem_int(self): levels = [[0, 1], [0, 1, 2]] @@ -719,8 +719,8 @@ def test_delevel_infer_dtype(self): df = DataFrame(np.random.randn(8, 3), columns=['A', 'B', 'C'], index=index) deleveled = df.reset_index() - self.assertTrue(is_integer_dtype(deleveled['prm1'])) - self.assertTrue(is_float_dtype(deleveled['prm2'])) + assert is_integer_dtype(deleveled['prm1']) + assert is_float_dtype(deleveled['prm2']) def test_reset_index_with_drop(self): deleveled = self.ymd.reset_index(drop=True) @@ -1136,7 +1136,7 @@ def test_stack_dropna(self): df = df.set_index(['A', 'B']) stacked = df.unstack().stack(dropna=False) - self.assertTrue(len(stacked) > len(stacked.dropna())) + assert len(stacked) > len(stacked.dropna()) stacked = df.unstack().stack(dropna=True) tm.assert_frame_equal(stacked, stacked.dropna()) @@ -1215,7 +1215,7 @@ def test_groupby_level_no_obs(self): grouped = df1.groupby(axis=1, level=0) result = grouped.sum() - self.assertTrue((result.columns == ['f2', 'f3']).all()) + assert (result.columns == ['f2', 'f3']).all() def test_join(self): a = self.frame.loc[self.frame.index[:5], ['A']] @@ -1244,7 +1244,7 @@ def test_swaplevel(self): back2 = swapped.swaplevel(0) back3 = swapped.swaplevel(0, 1) back4 = swapped.swaplevel('second', 'first') - self.assertTrue(back.index.equals(self.frame.index)) + assert back.index.equals(self.frame.index) tm.assert_series_equal(back, back2) tm.assert_series_equal(back, back3) tm.assert_series_equal(back, back4) @@ -1288,7 +1288,7 @@ def test_insert_index(self): df = self.ymd[:5].T df[2000, 1, 10] = df[2000, 1, 7] assert isinstance(df.columns, MultiIndex) - self.assertTrue((df[2000, 1, 10] == df[2000, 1, 7]).all()) + assert (df[2000, 1, 10] == df[2000, 1, 7]).all() def test_alignment(self): x = Series(data=[1, 2, 3], index=MultiIndex.from_tuples([("A", 1), ( @@ -1314,7 +1314,7 @@ def test_frame_getitem_view(self): # this works because we are modifying the underlying array # really a no-no df['foo'].values[:] = 0 - self.assertTrue((df['foo'].values == 0).all()) + assert (df['foo'].values == 0).all() # but not if it's mixed-type df['foo', 'four'] = 'foo' @@ -1331,7 +1331,7 @@ def f(): df = f() except: pass - self.assertTrue((df['foo', 'one'] == 0).all()) + assert (df['foo', 'one'] == 0).all() def test_count(self): frame = self.frame.copy() @@ -1574,7 +1574,7 @@ def test_partial_ix_missing(self): # need to put in some work here # self.ymd.loc[2000, 0] = 0 - # self.assertTrue((self.ymd.loc[2000]['A'] == 0).all()) + # assert (self.ymd.loc[2000]['A'] == 0).all() # Pretty sure the second (and maybe even the first) is already wrong. pytest.raises(Exception, self.ymd.loc.__getitem__, (2000, 6)) @@ -1874,7 +1874,7 @@ def test_dataframe_insert_column_all_na(self): df = DataFrame([[1, 2], [3, 4], [5, 6]], index=mix) s = Series({(1, 1): 1, (1, 2): 2}) df['new'] = s - self.assertTrue(df['new'].isnull().all()) + assert df['new'].isnull().all() def test_join_segfault(self): # 1532 @@ -1890,11 +1890,11 @@ def test_set_column_scalar_with_ix(self): subset = self.frame.index[[1, 4, 5]] self.frame.loc[subset] = 99 - self.assertTrue((self.frame.loc[subset].values == 99).all()) + assert (self.frame.loc[subset].values == 99).all() col = self.frame['B'] col[subset] = 97 - self.assertTrue((self.frame.loc[subset, 'B'] == 97).all()) + assert (self.frame.loc[subset, 'B'] == 97).all() def test_frame_dict_constructor_empty_series(self): s1 = Series([ @@ -1932,7 +1932,7 @@ def test_nonunique_assignment_1750(self): df.loc[ix, "C"] = '_' - self.assertTrue((df.xs((1, 1))['C'] == '_').all()) + assert (df.xs((1, 1))['C'] == '_').all() def test_indexing_over_hashtable_size_cutoff(self): n = 10000 @@ -1986,8 +1986,8 @@ def test_tuples_have_na(self): labels=[[1, 1, 1, 1, -1, 0, 0, 0], [0, 1, 2, 3, 0, 1, 2, 3]]) - self.assertTrue(isnull(index[4][0])) - self.assertTrue(isnull(index.values[4][0])) + assert isnull(index[4][0]) + assert isnull(index.values[4][0]) def test_duplicate_groupby_issues(self): idx_tp = [('600809', '20061231'), ('600809', '20070331'), @@ -2023,21 +2023,21 @@ def test_duplicated_drop_duplicates(self): [False, False, False, True, False, False], dtype=bool) duplicated = idx.duplicated() tm.assert_numpy_array_equal(duplicated, expected) - self.assertTrue(duplicated.dtype == bool) + assert duplicated.dtype == bool expected = MultiIndex.from_arrays(([1, 2, 3, 2, 3], [1, 1, 1, 2, 2])) tm.assert_index_equal(idx.drop_duplicates(), expected) expected = np.array([True, False, False, False, False, False]) duplicated = idx.duplicated(keep='last') tm.assert_numpy_array_equal(duplicated, expected) - self.assertTrue(duplicated.dtype == bool) + assert duplicated.dtype == bool expected = MultiIndex.from_arrays(([2, 3, 1, 2, 3], [1, 1, 1, 2, 2])) tm.assert_index_equal(idx.drop_duplicates(keep='last'), expected) expected = np.array([True, False, False, True, False, False]) duplicated = idx.duplicated(keep=False) tm.assert_numpy_array_equal(duplicated, expected) - self.assertTrue(duplicated.dtype == bool) + assert duplicated.dtype == bool expected = MultiIndex.from_arrays(([2, 3, 2, 3], [1, 1, 2, 2])) tm.assert_index_equal(idx.drop_duplicates(keep=False), expected) @@ -2387,7 +2387,7 @@ def test_sort_index_level_large_cardinality(self): # it works! result = df.sort_index(level=0) - self.assertTrue(result.index.lexsort_depth == 3) + assert result.index.lexsort_depth == 3 # #2684 (int32) index = MultiIndex.from_arrays([np.arange(4000)] * 3) @@ -2395,8 +2395,8 @@ def test_sort_index_level_large_cardinality(self): # it works! result = df.sort_index(level=0) - self.assertTrue((result.dtypes.values == df.dtypes.values).all()) - self.assertTrue(result.index.lexsort_depth == 3) + assert (result.dtypes.values == df.dtypes.values).all() + assert result.index.lexsort_depth == 3 def test_sort_index_level_by_name(self): self.frame.index.names = ['first', 'second'] @@ -2426,7 +2426,7 @@ def test_is_lexsorted(self): index = MultiIndex(levels=levels, labels=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]]) - self.assertTrue(index.is_lexsorted()) + assert index.is_lexsorted() index = MultiIndex(levels=levels, labels=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 2, 1]]) diff --git a/pandas/tests/test_nanops.py b/pandas/tests/test_nanops.py index a108749db8e6a..dda466a6937dd 100644 --- a/pandas/tests/test_nanops.py +++ b/pandas/tests/test_nanops.py @@ -347,7 +347,7 @@ def test_nanmean_overflow(self): np_result = s.values.mean() self.assertEqual(result, a) self.assertEqual(result, np_result) - self.assertTrue(result.dtype == np.float64) + assert result.dtype == np.float64 def test_returned_dtype(self): @@ -362,15 +362,9 @@ def test_returned_dtype(self): for method in group_a + group_b: result = getattr(s, method)() if is_integer_dtype(dtype) and method in group_a: - self.assertTrue( - result.dtype == np.float64, - "return dtype expected from %s is np.float64, " - "got %s instead" % (method, result.dtype)) + assert result.dtype == np.float64 else: - self.assertTrue( - result.dtype == dtype, - "return dtype expected from %s is %s, " - "got %s instead" % (method, dtype, result.dtype)) + assert result.dtype == dtype def test_nanmedian(self): with warnings.catch_warnings(record=True): @@ -657,7 +651,7 @@ def check_bool(self, func, value, correct, *args, **kwargs): try: res0 = func(value, *args, **kwargs) if correct: - self.assertTrue(res0) + assert res0 else: assert not res0 except BaseException as exc: @@ -736,12 +730,12 @@ def test__isfinite(self): raise def test__bn_ok_dtype(self): - self.assertTrue(nanops._bn_ok_dtype(self.arr_float.dtype, 'test')) - self.assertTrue(nanops._bn_ok_dtype(self.arr_complex.dtype, 'test')) - self.assertTrue(nanops._bn_ok_dtype(self.arr_int.dtype, 'test')) - self.assertTrue(nanops._bn_ok_dtype(self.arr_bool.dtype, 'test')) - self.assertTrue(nanops._bn_ok_dtype(self.arr_str.dtype, 'test')) - self.assertTrue(nanops._bn_ok_dtype(self.arr_utf.dtype, 'test')) + assert nanops._bn_ok_dtype(self.arr_float.dtype, 'test') + assert nanops._bn_ok_dtype(self.arr_complex.dtype, 'test') + assert nanops._bn_ok_dtype(self.arr_int.dtype, 'test') + assert nanops._bn_ok_dtype(self.arr_bool.dtype, 'test') + assert nanops._bn_ok_dtype(self.arr_str.dtype, 'test') + assert nanops._bn_ok_dtype(self.arr_utf.dtype, 'test') assert not nanops._bn_ok_dtype(self.arr_date.dtype, 'test') assert not nanops._bn_ok_dtype(self.arr_tdelta.dtype, 'test') assert not nanops._bn_ok_dtype(self.arr_obj.dtype, 'test') @@ -761,30 +755,24 @@ def test_numeric_values(self): def test_ndarray(self): # Test numeric ndarray values = np.array([1, 2, 3]) - self.assertTrue(np.allclose(nanops._ensure_numeric(values), values), - 'Failed for numeric ndarray') + assert np.allclose(nanops._ensure_numeric(values), values) # Test object ndarray o_values = values.astype(object) - self.assertTrue(np.allclose(nanops._ensure_numeric(o_values), values), - 'Failed for object ndarray') + assert np.allclose(nanops._ensure_numeric(o_values), values) # Test convertible string ndarray s_values = np.array(['1', '2', '3'], dtype=object) - self.assertTrue(np.allclose(nanops._ensure_numeric(s_values), values), - 'Failed for convertible string ndarray') + assert np.allclose(nanops._ensure_numeric(s_values), values) # Test non-convertible string ndarray s_values = np.array(['foo', 'bar', 'baz'], dtype=object) pytest.raises(ValueError, lambda: nanops._ensure_numeric(s_values)) def test_convertable_values(self): - self.assertTrue(np.allclose(nanops._ensure_numeric('1'), 1.0), - 'Failed for convertible integer string') - self.assertTrue(np.allclose(nanops._ensure_numeric('1.1'), 1.1), - 'Failed for convertible float string') - self.assertTrue(np.allclose(nanops._ensure_numeric('1+1j'), 1 + 1j), - 'Failed for convertible complex string') + assert np.allclose(nanops._ensure_numeric('1'), 1.0) + assert np.allclose(nanops._ensure_numeric('1.1'), 1.1) + assert np.allclose(nanops._ensure_numeric('1+1j'), 1 + 1j) def test_non_convertable_values(self): pytest.raises(TypeError, lambda: nanops._ensure_numeric('foo')) @@ -883,14 +871,14 @@ def test_ground_truth(self): for ddof in range(3): var = nanops.nanvar(samples, skipna=True, axis=axis, ddof=ddof) tm.assert_almost_equal(var[:3], variance[axis, ddof]) - self.assertTrue(np.isnan(var[3])) + assert np.isnan(var[3]) # Test nanstd. for axis in range(2): for ddof in range(3): std = nanops.nanstd(samples, skipna=True, axis=axis, ddof=ddof) tm.assert_almost_equal(std[:3], variance[axis, ddof] ** 0.5) - self.assertTrue(np.isnan(std[3])) + assert np.isnan(std[3]) def test_nanstd_roundoff(self): # Regression test for GH 10242 (test data taken from GH 10489). Ensure @@ -943,7 +931,7 @@ def test_axis(self): def test_nans(self): samples = np.hstack([self.samples, np.nan]) skew = nanops.nanskew(samples, skipna=False) - self.assertTrue(np.isnan(skew)) + assert np.isnan(skew) def test_nans_skipna(self): samples = np.hstack([self.samples, np.nan]) @@ -993,7 +981,7 @@ def test_axis(self): def test_nans(self): samples = np.hstack([self.samples, np.nan]) kurt = nanops.nankurt(samples, skipna=False) - self.assertTrue(np.isnan(kurt)) + assert np.isnan(kurt) def test_nans_skipna(self): samples = np.hstack([self.samples, np.nan]) diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index 802acc86d3359..c9894ad9a9acf 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -808,7 +808,7 @@ def _check_view(self, indexer, comp): cp = self.panel.copy() obj = cp.loc[indexer] obj.values[:] = 0 - self.assertTrue((obj.values == 0).all()) + assert (obj.values == 0).all() comp(cp.loc[indexer].reindex_like(obj), obj) def test_logical_with_nas(self): @@ -1047,13 +1047,13 @@ def test_constructor_fails_with_not_3d_input(self): def test_consolidate(self): with catch_warnings(record=True): - self.assertTrue(self.panel._data.is_consolidated()) + assert self.panel._data.is_consolidated() self.panel['foo'] = 1. assert not self.panel._data.is_consolidated() panel = self.panel._consolidate() - self.assertTrue(panel._data.is_consolidated()) + assert panel._data.is_consolidated() def test_ctor_dict(self): with catch_warnings(record=True): @@ -1134,10 +1134,10 @@ def test_ctor_orderedDict(self): :50] # unique random int keys d = OrderedDict([(k, mkdf(10, 5)) for k in keys]) p = Panel(d) - self.assertTrue(list(p.items) == keys) + assert list(p.items) == keys p = Panel.from_dict(d) - self.assertTrue(list(p.items) == keys) + assert list(p.items) == keys def test_constructor_resize(self): with catch_warnings(record=True): @@ -1440,7 +1440,7 @@ def test_reindex(self): result = self.panel.reindex( major=self.panel.major_axis, copy=False) assert_panel_equal(result, self.panel) - self.assertTrue(result is self.panel) + assert result is self.panel def test_reindex_multi(self): with catch_warnings(record=True): @@ -1550,7 +1550,7 @@ def test_sort_index(self): def test_fillna(self): with catch_warnings(record=True): filled = self.panel.fillna(0) - self.assertTrue(np.isfinite(filled.values).all()) + assert np.isfinite(filled.values).all() filled = self.panel.fillna(method='backfill') assert_frame_equal(filled['ItemA'], @@ -1695,7 +1695,7 @@ def test_transpose_copy(self): assert_panel_equal(result, expected) panel.values[0, 1, 1] = np.nan - self.assertTrue(notnull(result.values[1, 0, 1])) + assert notnull(result.values[1, 0, 1]) def test_to_frame(self): with catch_warnings(record=True): @@ -1864,7 +1864,7 @@ def test_to_panel_na_handling(self): [0, 1, 2, 3, 4, 5, 2, 3, 4, 5]]) panel = df.to_panel() - self.assertTrue(isnull(panel[0].loc[1, [0, 1]]).all()) + assert isnull(panel[0].loc[1, [0, 1]]).all() def test_to_panel_duplicates(self): # #2441 @@ -2127,8 +2127,8 @@ def test_multiindex_get(self): f2 = wp.loc['a'] assert_panel_equal(f1, f2) - self.assertTrue((f1.items == [1, 2]).all()) - self.assertTrue((f2.items == [1, 2]).all()) + assert (f1.items == [1, 2]).all() + assert (f2.items == [1, 2]).all() ind = MultiIndex.from_tuples([('a', 1), ('a', 2), ('b', 1)], names=['first', 'second']) @@ -2140,10 +2140,10 @@ def test_multiindex_blocks(self): wp = Panel(self.panel._data) wp.items = ind f1 = wp['a'] - self.assertTrue((f1.items == [1, 2]).all()) + assert (f1.items == [1, 2]).all() f1 = wp[('b', 1)] - self.assertTrue((f1.columns == ['A', 'B', 'C', 'D']).all()) + assert (f1.columns == ['A', 'B', 'C', 'D']).all() def test_repr_empty(self): with catch_warnings(record=True): @@ -2165,7 +2165,7 @@ def test_rename(self): # don't copy renamed_nocopy = self.panel.rename_axis(mapper, axis=0, copy=False) renamed_nocopy['foo'] = 3. - self.assertTrue((self.panel['ItemA'].values == 3).all()) + assert (self.panel['ItemA'].values == 3).all() def test_get_attr(self): assert_frame_equal(self.panel['ItemA'], self.panel.ItemA) @@ -2413,18 +2413,18 @@ def test_update_raise(self): **{'raise_conflict': True}) def test_all_any(self): - self.assertTrue((self.panel.all(axis=0).values == nanall( - self.panel, axis=0)).all()) - self.assertTrue((self.panel.all(axis=1).values == nanall( - self.panel, axis=1).T).all()) - self.assertTrue((self.panel.all(axis=2).values == nanall( - self.panel, axis=2).T).all()) - self.assertTrue((self.panel.any(axis=0).values == nanany( - self.panel, axis=0)).all()) - self.assertTrue((self.panel.any(axis=1).values == nanany( - self.panel, axis=1).T).all()) - self.assertTrue((self.panel.any(axis=2).values == nanany( - self.panel, axis=2).T).all()) + assert (self.panel.all(axis=0).values == nanall( + self.panel, axis=0)).all() + assert (self.panel.all(axis=1).values == nanall( + self.panel, axis=1).T).all() + assert (self.panel.all(axis=2).values == nanall( + self.panel, axis=2).T).all() + assert (self.panel.any(axis=0).values == nanany( + self.panel, axis=0)).all() + assert (self.panel.any(axis=1).values == nanany( + self.panel, axis=1).T).all() + assert (self.panel.any(axis=2).values == nanany( + self.panel, axis=2).T).all() def test_all_any_unhandled(self): pytest.raises(NotImplementedError, self.panel.all, bool_only=True) @@ -2532,10 +2532,10 @@ def is_sorted(arr): return (arr[1:] > arr[:-1]).any() sorted_minor = self.panel.sort_index(level=1) - self.assertTrue(is_sorted(sorted_minor.index.labels[1])) + assert is_sorted(sorted_minor.index.labels[1]) sorted_major = sorted_minor.sort_index(level=0) - self.assertTrue(is_sorted(sorted_major.index.labels[0])) + assert is_sorted(sorted_major.index.labels[0]) def test_to_string(self): buf = StringIO() diff --git a/pandas/tests/test_panel4d.py b/pandas/tests/test_panel4d.py index 5b4f09009c9db..05ce239b9c5a3 100644 --- a/pandas/tests/test_panel4d.py +++ b/pandas/tests/test_panel4d.py @@ -402,23 +402,23 @@ def func(): df = panel4dc.iloc[0, 0] df.iloc[:] = 1 panel4dc.iloc[0, 0] = df - self.assertTrue((panel4dc.iloc[0, 0].values == 1).all()) + assert (panel4dc.iloc[0, 0].values == 1).all() # Series panel4dc = self.panel4d.copy() s = panel4dc.iloc[0, 0, :, 0] s.iloc[:] = 1 panel4dc.iloc[0, 0, :, 0] = s - self.assertTrue((panel4dc.iloc[0, 0, :, 0].values == 1).all()) + assert (panel4dc.iloc[0, 0, :, 0].values == 1).all() # scalar panel4dc = self.panel4d.copy() panel4dc.iloc[0] = 1 panel4dc.iloc[1] = True panel4dc.iloc[2] = 'foo' - self.assertTrue((panel4dc.iloc[0].values == 1).all()) - self.assertTrue(panel4dc.iloc[1].values.all()) - self.assertTrue((panel4dc.iloc[2].values == 'foo').all()) + assert (panel4dc.iloc[0].values == 1).all() + assert panel4dc.iloc[1].values.all() + assert (panel4dc.iloc[2].values == 'foo').all() def test_setitem_by_indexer_mixed_type(self): @@ -431,9 +431,9 @@ def test_setitem_by_indexer_mixed_type(self): panel4dc.iloc[0] = 1 panel4dc.iloc[1] = True panel4dc.iloc[2] = 'foo' - self.assertTrue((panel4dc.iloc[0].values == 1).all()) - self.assertTrue(panel4dc.iloc[1].values.all()) - self.assertTrue((panel4dc.iloc[2].values == 'foo').all()) + assert (panel4dc.iloc[0].values == 1).all() + assert panel4dc.iloc[1].values.all() + assert (panel4dc.iloc[2].values == 'foo').all() def test_comparisons(self): with catch_warnings(record=True): @@ -681,13 +681,13 @@ def test_constructor_cast(self): def test_consolidate(self): with catch_warnings(record=True): - self.assertTrue(self.panel4d._data.is_consolidated()) + assert self.panel4d._data.is_consolidated() self.panel4d['foo'] = 1. assert not self.panel4d._data.is_consolidated() panel4d = self.panel4d._consolidate() - self.assertTrue(panel4d._data.is_consolidated()) + assert panel4d._data.is_consolidated() def test_ctor_dict(self): with catch_warnings(record=True): @@ -819,7 +819,7 @@ def test_reindex(self): result = self.panel4d.reindex( major=self.panel4d.major_axis, copy=False) assert_panel4d_equal(result, self.panel4d) - self.assertTrue(result is self.panel4d) + assert result is self.panel4d def test_not_hashable(self): with catch_warnings(record=True): @@ -859,7 +859,7 @@ def test_fillna(self): with catch_warnings(record=True): assert not np.isfinite(self.panel4d.values).all() filled = self.panel4d.fillna(0) - self.assertTrue(np.isfinite(filled.values).all()) + assert np.isfinite(filled.values).all() pytest.raises(NotImplementedError, self.panel4d.fillna, method='pad') @@ -949,7 +949,7 @@ def test_rename(self): axis=0, copy=False) renamed_nocopy['foo'] = 3. - self.assertTrue((self.panel4d['l1'].values == 3).all()) + assert (self.panel4d['l1'].values == 3).all() def test_get_attr(self): assert_panel_equal(self.panel4d['l1'], self.panel4d.l1) diff --git a/pandas/tests/test_resample.py b/pandas/tests/test_resample.py index 42a6a2a784a0e..37e22f101612b 100644 --- a/pandas/tests/test_resample.py +++ b/pandas/tests/test_resample.py @@ -63,9 +63,8 @@ def setUp(self): def test_str(self): r = self.series.resample('H') - self.assertTrue( - 'DatetimeIndexResampler [freq=, axis=0, closed=left, ' - 'label=left, convention=start, base=0]' in str(r)) + assert ('DatetimeIndexResampler [freq=, axis=0, closed=left, ' + 'label=left, convention=start, base=0]' in str(r)) def test_api(self): @@ -133,10 +132,10 @@ def f(): tm.assert_numpy_array_equal(np.array(r), np.array(r.mean())) # masquerade as Series/DataFrame as needed for API compat - self.assertTrue(isinstance(self.series.resample('H'), ABCSeries)) + assert isinstance(self.series.resample('H'), ABCSeries) assert not isinstance(self.frame.resample('H'), ABCSeries) assert not isinstance(self.series.resample('H'), ABCDataFrame) - self.assertTrue(isinstance(self.frame.resample('H'), ABCDataFrame)) + assert isinstance(self.frame.resample('H'), ABCDataFrame) # bin numeric ops for op in ['__add__', '__mul__', '__truediv__', '__div__', '__sub__']: @@ -886,7 +885,7 @@ def test_custom_grouper(self): g._cython_agg_general(f) self.assertEqual(g.ngroups, 2593) - self.assertTrue(notnull(g.mean()).all()) + assert notnull(g.mean()).all() # construct expected val arr = [1] + [5] * 2592 @@ -1118,47 +1117,46 @@ def test_resample_basic_from_daily(self): result = s.resample('w-sun').last() self.assertEqual(len(result), 3) - self.assertTrue((result.index.dayofweek == [6, 6, 6]).all()) + assert (result.index.dayofweek == [6, 6, 6]).all() self.assertEqual(result.iloc[0], s['1/2/2005']) self.assertEqual(result.iloc[1], s['1/9/2005']) self.assertEqual(result.iloc[2], s.iloc[-1]) result = s.resample('W-MON').last() self.assertEqual(len(result), 2) - self.assertTrue((result.index.dayofweek == [0, 0]).all()) + assert (result.index.dayofweek == [0, 0]).all() self.assertEqual(result.iloc[0], s['1/3/2005']) self.assertEqual(result.iloc[1], s['1/10/2005']) result = s.resample('W-TUE').last() self.assertEqual(len(result), 2) - self.assertTrue((result.index.dayofweek == [1, 1]).all()) + assert (result.index.dayofweek == [1, 1]).all() self.assertEqual(result.iloc[0], s['1/4/2005']) self.assertEqual(result.iloc[1], s['1/10/2005']) result = s.resample('W-WED').last() self.assertEqual(len(result), 2) - self.assertTrue((result.index.dayofweek == [2, 2]).all()) + assert (result.index.dayofweek == [2, 2]).all() self.assertEqual(result.iloc[0], s['1/5/2005']) self.assertEqual(result.iloc[1], s['1/10/2005']) result = s.resample('W-THU').last() self.assertEqual(len(result), 2) - self.assertTrue((result.index.dayofweek == [3, 3]).all()) + assert (result.index.dayofweek == [3, 3]).all() self.assertEqual(result.iloc[0], s['1/6/2005']) self.assertEqual(result.iloc[1], s['1/10/2005']) result = s.resample('W-FRI').last() self.assertEqual(len(result), 2) - self.assertTrue((result.index.dayofweek == [4, 4]).all()) + assert (result.index.dayofweek == [4, 4]).all() self.assertEqual(result.iloc[0], s['1/7/2005']) self.assertEqual(result.iloc[1], s['1/10/2005']) # to biz day result = s.resample('B').last() self.assertEqual(len(result), 7) - self.assertTrue((result.index.dayofweek == [ - 4, 0, 1, 2, 3, 4, 0 - ]).all()) + assert (result.index.dayofweek == [4, 0, 1, 2, 3, 4, 0]).all() + self.assertEqual(result.iloc[0], s['1/2/2005']) self.assertEqual(result.iloc[1], s['1/3/2005']) self.assertEqual(result.iloc[5], s['1/9/2005']) @@ -1451,13 +1449,13 @@ def _ohlc(group): resampled = ts.resample('5min', closed='right', label='right').ohlc() - self.assertTrue((resampled.loc['1/1/2000 00:00'] == ts[0]).all()) + assert (resampled.loc['1/1/2000 00:00'] == ts[0]).all() exp = _ohlc(ts[1:31]) - self.assertTrue((resampled.loc['1/1/2000 00:05'] == exp).all()) + assert (resampled.loc['1/1/2000 00:05'] == exp).all() exp = _ohlc(ts['1/1/2000 5:55:01':]) - self.assertTrue((resampled.loc['1/1/2000 6:00:00'] == exp).all()) + assert (resampled.loc['1/1/2000 6:00:00'] == exp).all() def test_downsample_non_unique(self): rng = date_range('1/1/2000', '2/29/2000') @@ -2588,7 +2586,7 @@ def test_resample_weekly_all_na(self): result = ts.resample('W-THU').asfreq() - self.assertTrue(result.isnull().all()) + assert result.isnull().all() result = ts.resample('W-THU').asfreq().ffill()[:-1] expected = ts.asfreq('W-THU').ffill() diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index 45e8aa3a367db..5b9797ce76a45 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -49,8 +49,7 @@ def test_iter(self): for el in s: # each element of the series is either a basestring/str or nan - self.assertTrue(isinstance(el, compat.string_types) or - isnull(el)) + assert isinstance(el, compat.string_types) or isnull(el) # desired behavior is to iterate until everything would be nan on the # next iter so make sure the last element of the iterator was 'l' in @@ -2114,12 +2113,12 @@ def test_split_with_name(self): idx = Index(['a,b', 'c,d'], name='xxx') res = idx.str.split(',') exp = Index([['a', 'b'], ['c', 'd']], name='xxx') - self.assertTrue(res.nlevels, 1) + assert res.nlevels, 1 tm.assert_index_equal(res, exp) res = idx.str.split(',', expand=True) exp = MultiIndex.from_tuples([('a', 'b'), ('c', 'd')]) - self.assertTrue(res.nlevels, 2) + assert res.nlevels, 2 tm.assert_index_equal(res, exp) def test_partition_series(self): @@ -2207,13 +2206,13 @@ def test_partition_index(self): result = values.str.partition('_') exp = Index([('a', '_', 'b_c'), ('c', '_', 'd_e'), ('f', '_', 'g_h')]) tm.assert_index_equal(result, exp) - self.assertTrue(isinstance(result, MultiIndex)) + assert isinstance(result, MultiIndex) self.assertEqual(result.nlevels, 3) result = values.str.rpartition('_') exp = Index([('a_b', '_', 'c'), ('c_d', '_', 'e'), ('f_g', '_', 'h')]) tm.assert_index_equal(result, exp) - self.assertTrue(isinstance(result, MultiIndex)) + assert isinstance(result, MultiIndex) self.assertEqual(result.nlevels, 3) def test_partition_to_dataframe(self): @@ -2259,13 +2258,13 @@ def test_partition_with_name(self): idx = Index(['a,b', 'c,d'], name='xxx') res = idx.str.partition(',') exp = MultiIndex.from_tuples([('a', ',', 'b'), ('c', ',', 'd')]) - self.assertTrue(res.nlevels, 3) + assert res.nlevels, 3 tm.assert_index_equal(res, exp) # should preserve name res = idx.str.partition(',', expand=False) exp = Index(np.array([('a', ',', 'b'), ('c', ',', 'd')]), name='xxx') - self.assertTrue(res.nlevels, 1) + assert res.nlevels, 1 tm.assert_index_equal(res, exp) def test_pipe_failures(self): @@ -2720,14 +2719,14 @@ def test_index_str_accessor_visibility(self): (['aa', datetime(2011, 1, 1)], 'mixed')] for values, tp in cases: idx = Index(values) - self.assertTrue(isinstance(Series(values).str, StringMethods)) - self.assertTrue(isinstance(idx.str, StringMethods)) + assert isinstance(Series(values).str, StringMethods) + assert isinstance(idx.str, StringMethods) self.assertEqual(idx.inferred_type, tp) for values, tp in cases: idx = Index(values) - self.assertTrue(isinstance(Series(values).str, StringMethods)) - self.assertTrue(isinstance(idx.str, StringMethods)) + assert isinstance(Series(values).str, StringMethods) + assert isinstance(idx.str, StringMethods) self.assertEqual(idx.inferred_type, tp) cases = [([1, np.nan], 'floating'), diff --git a/pandas/tests/test_testing.py b/pandas/tests/test_testing.py index 45994fd400912..80db5eb49c127 100644 --- a/pandas/tests/test_testing.py +++ b/pandas/tests/test_testing.py @@ -739,4 +739,4 @@ def test_locale(self): # GH9744 locales = tm.get_locales() - self.assertTrue(len(locales) >= 1) + assert len(locales) >= 1 diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index 13d471f368693..7979e7d77a49d 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -853,7 +853,7 @@ def test_cmov_window_corner(self): vals.fill(np.nan) with catch_warnings(record=True): rs = mom.rolling_window(vals, 5, 'boxcar', center=True) - self.assertTrue(np.isnan(rs).all()) + assert np.isnan(rs).all() # empty vals = np.array([]) @@ -865,7 +865,7 @@ def test_cmov_window_corner(self): vals = np.random.randn(5) with catch_warnings(record=True): rs = mom.rolling_window(vals, 10, 'boxcar') - self.assertTrue(np.isnan(rs).all()) + assert np.isnan(rs).all() self.assertEqual(len(rs), 5) def test_cmov_window_frame(self): @@ -1144,7 +1144,7 @@ def test_rolling_apply_out_of_bounds(self): # it works! with catch_warnings(record=True): result = mom.rolling_apply(arr, 10, np.sum) - self.assertTrue(isnull(result).all()) + assert isnull(result).all() with catch_warnings(record=True): result = mom.rolling_apply(arr, 10, np.sum, min_periods=1) @@ -1172,7 +1172,7 @@ def test_rolling_std_1obs(self): with catch_warnings(record=True): result = mom.rolling_std(np.array([np.nan, np.nan, 3., 4., 5.]), 3, min_periods=2) - self.assertTrue(np.isnan(result[2])) + assert np.isnan(result[2]) def test_rolling_std_neg_sqrt(self): # unit test from Bottleneck @@ -1184,11 +1184,11 @@ def test_rolling_std_neg_sqrt(self): 0.00028718669878572767]) with catch_warnings(record=True): b = mom.rolling_std(a, window=3) - self.assertTrue(np.isfinite(b[2:]).all()) + assert np.isfinite(b[2:]).all() with catch_warnings(record=True): b = mom.ewmstd(a, span=3) - self.assertTrue(np.isfinite(b[2:]).all()) + assert np.isfinite(b[2:]).all() def test_rolling_var(self): self._check_moment_func(mom.rolling_var, lambda x: np.var(x, ddof=1), @@ -1226,25 +1226,25 @@ def test_fperr_robustness(self): with catch_warnings(record=True): result = mom.rolling_sum(arr, 2) - self.assertTrue((result[1:] >= 0).all()) + assert (result[1:] >= 0).all() with catch_warnings(record=True): result = mom.rolling_mean(arr, 2) - self.assertTrue((result[1:] >= 0).all()) + assert (result[1:] >= 0).all() with catch_warnings(record=True): result = mom.rolling_var(arr, 2) - self.assertTrue((result[1:] >= 0).all()) + assert (result[1:] >= 0).all() # #2527, ugh arr = np.array([0.00012456, 0.0003, 0]) with catch_warnings(record=True): result = mom.rolling_mean(arr, 1) - self.assertTrue(result[-1] >= 0) + assert result[-1] >= 0 with catch_warnings(record=True): result = mom.rolling_mean(-arr, 1) - self.assertTrue(result[-1] <= 0) + assert result[-1] <= 0 def _check_moment_func(self, f, static_comp, name=None, window=50, has_min_periods=True, has_center=True, @@ -1297,16 +1297,16 @@ def get_result(arr, window, min_periods=None, center=False): # min_periods is working correctly result = get_result(arr, 20, min_periods=15) - self.assertTrue(np.isnan(result[23])) + assert np.isnan(result[23]) assert not np.isnan(result[24]) assert not np.isnan(result[-6]) - self.assertTrue(np.isnan(result[-5])) + assert np.isnan(result[-5]) arr2 = randn(20) result = get_result(arr2, 10, min_periods=5) - self.assertTrue(isnull(result[3])) - self.assertTrue(notnull(result[4])) + assert isnull(result[3]) + assert notnull(result[4]) # min_periods=0 result0 = get_result(arr, 20, min_periods=0) @@ -1344,8 +1344,8 @@ def get_result(arr, window, min_periods=None, center=False): expected = get_result(self.arr, len(self.arr), min_periods=minp) nan_mask = np.isnan(result) - self.assertTrue(np.array_equal(nan_mask, np.isnan( - expected))) + tm.assert_numpy_array_equal(nan_mask, np.isnan(expected)) + nan_mask = ~nan_mask tm.assert_almost_equal(result[nan_mask], expected[nan_mask]) @@ -1353,7 +1353,8 @@ def get_result(arr, window, min_periods=None, center=False): result = get_result(self.arr, len(self.arr) + 1) expected = get_result(self.arr, len(self.arr)) nan_mask = np.isnan(result) - self.assertTrue(np.array_equal(nan_mask, np.isnan(expected))) + tm.assert_numpy_array_equal(nan_mask, np.isnan(expected)) + nan_mask = ~nan_mask tm.assert_almost_equal(result[nan_mask], expected[nan_mask]) @@ -1459,7 +1460,7 @@ def test_ewma(self): arr[5] = 1 with catch_warnings(record=True): result = mom.ewma(arr, span=100, adjust=False).sum() - self.assertTrue(np.abs(result - 1) < 1e-2) + assert np.abs(result - 1) < 1e-2 s = Series([1.0, 2.0, 4.0, 8.0]) @@ -1659,18 +1660,18 @@ def _check_ew_ndarray(self, func, preserve_nan=False, name=None): # check min_periods # GH 7898 result = func(s, 50, min_periods=2) - self.assertTrue(np.isnan(result.values[:11]).all()) + assert np.isnan(result.values[:11]).all() assert not np.isnan(result.values[11:]).any() for min_periods in (0, 1): result = func(s, 50, min_periods=min_periods) if func == mom.ewma: - self.assertTrue(np.isnan(result.values[:10]).all()) + assert np.isnan(result.values[:10]).all() assert not np.isnan(result.values[10:]).any() else: # ewmstd, ewmvol, ewmvar (with bias=False) require at least two # values - self.assertTrue(np.isnan(result.values[:11]).all()) + assert np.isnan(result.values[:11]).all() assert not np.isnan(result.values[11:]).any() # check series of length 0 @@ -1980,7 +1981,8 @@ def _non_null_values(x): # check that correlation of a series with itself is either 1 or NaN corr_x_x = corr(x, x) - # self.assertTrue(_non_null_values(corr_x_x).issubset(set([1.]))) # + + # assert _non_null_values(corr_x_x).issubset(set([1.])) # restore once rolling_cov(x, x) is identically equal to var(x) if is_constant: @@ -2406,16 +2408,15 @@ def test_corr_sanity(self): [0.84780328, 0.33394331], [0.78369152, 0.63919667]])) res = df[0].rolling(5, center=True).corr(df[1]) - self.assertTrue(all([np.abs(np.nan_to_num(x)) <= 1 for x in res])) + assert all([np.abs(np.nan_to_num(x)) <= 1 for x in res]) # and some fuzzing - for i in range(10): + for _ in range(10): df = DataFrame(np.random.rand(30, 2)) res = df[0].rolling(5, center=True).corr(df[1]) try: - self.assertTrue(all([np.abs(np.nan_to_num(x)) <= 1 for x in res - ])) - except: + assert all([np.abs(np.nan_to_num(x)) <= 1 for x in res]) + except AssertionError: print(res) def test_flex_binary_frame(self): @@ -2465,7 +2466,7 @@ def func(A, B, com, **kwargs): B[-10:] = np.NaN result = func(A, B, 20, min_periods=5) - self.assertTrue(np.isnan(result.values[:14]).all()) + assert np.isnan(result.values[:14]).all() assert not np.isnan(result.values[14:]).any() # GH 7898 @@ -2473,7 +2474,7 @@ def func(A, B, com, **kwargs): result = func(A, B, 20, min_periods=min_periods) # binary functions (ewmcov, ewmcorr) with bias=False require at # least two values - self.assertTrue(np.isnan(result.values[:11]).all()) + assert np.isnan(result.values[:11]).all() assert not np.isnan(result.values[11:]).any() # check series of length 0 @@ -2890,13 +2891,13 @@ def _check_expanding_ndarray(self, func, static_comp, has_min_periods=True, # min_periods is working correctly result = func(arr, min_periods=15) - self.assertTrue(np.isnan(result[13])) + assert np.isnan(result[13]) assert not np.isnan(result[14]) arr2 = randn(20) result = func(arr2, min_periods=5) - self.assertTrue(isnull(result[3])) - self.assertTrue(notnull(result[4])) + assert isnull(result[3]) + assert notnull(result[4]) # min_periods=0 result0 = func(arr, min_periods=0) @@ -3052,7 +3053,7 @@ def f(): g = self.frame.groupby('A') assert not g.mutated g = self.frame.groupby('A', mutated=True) - self.assertTrue(g.mutated) + assert g.mutated def test_getitem(self): g = self.frame.groupby('A') @@ -3268,11 +3269,11 @@ def test_monotonic_on(self): freq='s'), 'B': range(5)}) - self.assertTrue(df.A.is_monotonic) + assert df.A.is_monotonic df.rolling('2s', on='A').sum() df = df.set_index('A') - self.assertTrue(df.index.is_monotonic) + assert df.index.is_monotonic df.rolling('2s').sum() # non-monotonic @@ -3666,11 +3667,11 @@ def test_perf_min(self): freq='s')) expected = dfp.rolling(2, min_periods=1).min() result = dfp.rolling('2s').min() - self.assertTrue(((result - expected) < 0.01).all().bool()) + assert ((result - expected) < 0.01).all().bool() expected = dfp.rolling(200, min_periods=1).min() result = dfp.rolling('200s').min() - self.assertTrue(((result - expected) < 0.01).all().bool()) + assert ((result - expected) < 0.01).all().bool() def test_ragged_max(self): diff --git a/pandas/tests/tools/test_numeric.py b/pandas/tests/tools/test_numeric.py index 290c03af3be4b..45b736102aa3d 100644 --- a/pandas/tests/tools/test_numeric.py +++ b/pandas/tests/tools/test_numeric.py @@ -166,7 +166,7 @@ def test_scalar(self): to_numeric('XX', errors='raise') self.assertEqual(to_numeric('XX', errors='ignore'), 'XX') - self.assertTrue(np.isnan(to_numeric('XX', errors='coerce'))) + assert np.isnan(to_numeric('XX', errors='coerce')) def test_numeric_dtypes(self): idx = pd.Index([1, 2, 3], name='xxx') diff --git a/pandas/tests/tseries/test_frequencies.py b/pandas/tests/tseries/test_frequencies.py index af544d10a737c..894269aaf451a 100644 --- a/pandas/tests/tseries/test_frequencies.py +++ b/pandas/tests/tseries/test_frequencies.py @@ -628,25 +628,29 @@ def _check_generated_range(self, start, freq): self.assertEqual(frequencies.infer_freq(index), gen.freqstr) else: inf_freq = frequencies.infer_freq(index) - self.assertTrue((inf_freq == 'Q-DEC' and gen.freqstr in ( - 'Q', 'Q-DEC', 'Q-SEP', 'Q-JUN', 'Q-MAR')) or ( - inf_freq == 'Q-NOV' and gen.freqstr in ( - 'Q-NOV', 'Q-AUG', 'Q-MAY', 'Q-FEB')) or ( - inf_freq == 'Q-OCT' and gen.freqstr in ( - 'Q-OCT', 'Q-JUL', 'Q-APR', 'Q-JAN'))) + is_dec_range = inf_freq == 'Q-DEC' and gen.freqstr in ( + 'Q', 'Q-DEC', 'Q-SEP', 'Q-JUN', 'Q-MAR') + is_nov_range = inf_freq == 'Q-NOV' and gen.freqstr in ( + 'Q-NOV', 'Q-AUG', 'Q-MAY', 'Q-FEB') + is_oct_range = inf_freq == 'Q-OCT' and gen.freqstr in ( + 'Q-OCT', 'Q-JUL', 'Q-APR', 'Q-JAN') + assert is_dec_range or is_nov_range or is_oct_range gen = date_range(start, periods=5, freq=freq) index = _dti(gen.values) + if not freq.startswith('Q-'): self.assertEqual(frequencies.infer_freq(index), gen.freqstr) else: inf_freq = frequencies.infer_freq(index) - self.assertTrue((inf_freq == 'Q-DEC' and gen.freqstr in ( - 'Q', 'Q-DEC', 'Q-SEP', 'Q-JUN', 'Q-MAR')) or ( - inf_freq == 'Q-NOV' and gen.freqstr in ( - 'Q-NOV', 'Q-AUG', 'Q-MAY', 'Q-FEB')) or ( - inf_freq == 'Q-OCT' and gen.freqstr in ( - 'Q-OCT', 'Q-JUL', 'Q-APR', 'Q-JAN'))) + is_dec_range = inf_freq == 'Q-DEC' and gen.freqstr in ( + 'Q', 'Q-DEC', 'Q-SEP', 'Q-JUN', 'Q-MAR') + is_nov_range = inf_freq == 'Q-NOV' and gen.freqstr in ( + 'Q-NOV', 'Q-AUG', 'Q-MAY', 'Q-FEB') + is_oct_range = inf_freq == 'Q-OCT' and gen.freqstr in ( + 'Q-OCT', 'Q-JUL', 'Q-APR', 'Q-JAN') + + assert is_dec_range or is_nov_range or is_oct_range def test_infer_freq(self): rng = period_range('1959Q2', '2009Q3', freq='Q') diff --git a/pandas/tests/tseries/test_offsets.py b/pandas/tests/tseries/test_offsets.py index 1332be2567b56..08f17fc358a47 100644 --- a/pandas/tests/tseries/test_offsets.py +++ b/pandas/tests/tseries/test_offsets.py @@ -221,11 +221,11 @@ def test_return_type(self): assert isinstance(result, Timestamp) # make sure that we are returning NaT - self.assertTrue(NaT + offset is NaT) - self.assertTrue(offset + NaT is NaT) + assert NaT + offset is NaT + assert offset + NaT is NaT - self.assertTrue(NaT - offset is NaT) - self.assertTrue((-offset).apply(NaT) is NaT) + assert NaT - offset is NaT + assert (-offset).apply(NaT) is NaT def test_offset_n(self): for offset_klass in self.offset_types: @@ -255,11 +255,11 @@ def _check_offsetfunc_works(self, offset, funcname, dt, expected, func = getattr(offset_s, funcname) result = func(dt) - self.assertTrue(isinstance(result, Timestamp)) + assert isinstance(result, Timestamp) self.assertEqual(result, expected) result = func(Timestamp(dt)) - self.assertTrue(isinstance(result, Timestamp)) + assert isinstance(result, Timestamp) self.assertEqual(result, expected) # see gh-14101 @@ -275,7 +275,7 @@ def _check_offsetfunc_works(self, offset, funcname, dt, expected, with tm.assert_produces_warning(exp_warning, check_stacklevel=False): result = func(ts) - self.assertTrue(isinstance(result, Timestamp)) + assert isinstance(result, Timestamp) if normalize is False: self.assertEqual(result, expected + Nano(5)) else: @@ -294,11 +294,11 @@ def _check_offsetfunc_works(self, offset, funcname, dt, expected, dt_tz = tslib._localize_pydatetime(dt, tz_obj) result = func(dt_tz) - self.assertTrue(isinstance(result, Timestamp)) + assert isinstance(result, Timestamp) self.assertEqual(result, expected_localize) result = func(Timestamp(dt, tz=tz)) - self.assertTrue(isinstance(result, Timestamp)) + assert isinstance(result, Timestamp) self.assertEqual(result, expected_localize) # see gh-14101 @@ -314,7 +314,7 @@ def _check_offsetfunc_works(self, offset, funcname, dt, expected, with tm.assert_produces_warning(exp_warning, check_stacklevel=False): result = func(ts) - self.assertTrue(isinstance(result, Timestamp)) + assert isinstance(result, Timestamp) if normalize is False: self.assertEqual(result, expected_localize + Nano(5)) else: @@ -442,7 +442,7 @@ def test_onOffset(self): for offset in self.offset_types: dt = self.expecteds[offset.__name__] offset_s = self._get_offset(offset) - self.assertTrue(offset_s.onOffset(dt)) + assert offset_s.onOffset(dt) # when normalize=True, onOffset checks time is 00:00:00 offset_n = self._get_offset(offset, normalize=True) @@ -453,7 +453,7 @@ def test_onOffset(self): # cannot be in business hour range continue date = datetime(dt.year, dt.month, dt.day) - self.assertTrue(offset_n.onOffset(date)) + assert offset_n.onOffset(date) def test_add(self): dt = datetime(2011, 1, 1, 9, 0) @@ -465,14 +465,14 @@ def test_add(self): result_dt = dt + offset_s result_ts = Timestamp(dt) + offset_s for result in [result_dt, result_ts]: - self.assertTrue(isinstance(result, Timestamp)) + assert isinstance(result, Timestamp) self.assertEqual(result, expected) tm._skip_if_no_pytz() for tz in self.timezones: expected_localize = expected.tz_localize(tz) result = Timestamp(dt, tz=tz) + offset_s - self.assertTrue(isinstance(result, Timestamp)) + assert isinstance(result, Timestamp) self.assertEqual(result, expected_localize) # normalize=True @@ -482,13 +482,13 @@ def test_add(self): result_dt = dt + offset_s result_ts = Timestamp(dt) + offset_s for result in [result_dt, result_ts]: - self.assertTrue(isinstance(result, Timestamp)) + assert isinstance(result, Timestamp) self.assertEqual(result, expected) for tz in self.timezones: expected_localize = expected.tz_localize(tz) result = Timestamp(dt, tz=tz) + offset_s - self.assertTrue(isinstance(result, Timestamp)) + assert isinstance(result, Timestamp) self.assertEqual(result, expected_localize) def test_pickle_v0_15_2(self): @@ -2229,7 +2229,7 @@ def test_corner(self): ValueError, "Day must be", Week, weekday=-1) def test_isAnchored(self): - self.assertTrue(Week(weekday=0).isAnchored()) + assert Week(weekday=0).isAnchored() assert not Week().isAnchored() assert not Week(2, weekday=2).isAnchored() assert not Week(2).isAnchored() @@ -3041,8 +3041,8 @@ def test_repr(self): "") def test_isAnchored(self): - self.assertTrue(BQuarterBegin(startingMonth=1).isAnchored()) - self.assertTrue(BQuarterBegin().isAnchored()) + assert BQuarterBegin(startingMonth=1).isAnchored() + assert BQuarterBegin().isAnchored() assert not BQuarterBegin(2, startingMonth=1).isAnchored() def test_offset(self): @@ -3135,8 +3135,8 @@ def test_repr(self): "") def test_isAnchored(self): - self.assertTrue(BQuarterEnd(startingMonth=1).isAnchored()) - self.assertTrue(BQuarterEnd().isAnchored()) + assert BQuarterEnd(startingMonth=1).isAnchored() + assert BQuarterEnd().isAnchored() assert not BQuarterEnd(2, startingMonth=1).isAnchored() def test_offset(self): @@ -3506,12 +3506,12 @@ def test_apply(self): class TestFY5253LastOfMonthQuarter(Base): def test_isAnchored(self): - self.assertTrue( - makeFY5253LastOfMonthQuarter(startingMonth=1, weekday=WeekDay.SAT, - qtr_with_extra_week=4).isAnchored()) - self.assertTrue( - makeFY5253LastOfMonthQuarter(weekday=WeekDay.SAT, startingMonth=3, - qtr_with_extra_week=4).isAnchored()) + assert makeFY5253LastOfMonthQuarter( + startingMonth=1, weekday=WeekDay.SAT, + qtr_with_extra_week=4).isAnchored() + assert makeFY5253LastOfMonthQuarter( + weekday=WeekDay.SAT, startingMonth=3, + qtr_with_extra_week=4).isAnchored() assert not makeFY5253LastOfMonthQuarter( 2, startingMonth=1, weekday=WeekDay.SAT, qtr_with_extra_week=4).isAnchored() @@ -3662,18 +3662,14 @@ def test_onOffset(self): def test_year_has_extra_week(self): # End of long Q1 - self.assertTrue( - makeFY5253LastOfMonthQuarter(1, startingMonth=12, - weekday=WeekDay.SAT, - qtr_with_extra_week=1) - .year_has_extra_week(datetime(2011, 4, 2))) + assert makeFY5253LastOfMonthQuarter( + 1, startingMonth=12, weekday=WeekDay.SAT, + qtr_with_extra_week=1).year_has_extra_week(datetime(2011, 4, 2)) # Start of long Q1 - self.assertTrue( - makeFY5253LastOfMonthQuarter( - 1, startingMonth=12, weekday=WeekDay.SAT, - qtr_with_extra_week=1) - .year_has_extra_week(datetime(2010, 12, 26))) + assert makeFY5253LastOfMonthQuarter( + 1, startingMonth=12, weekday=WeekDay.SAT, + qtr_with_extra_week=1).year_has_extra_week(datetime(2010, 12, 26)) # End of year before year with long Q1 assert not makeFY5253LastOfMonthQuarter( @@ -3689,23 +3685,17 @@ def test_year_has_extra_week(self): datetime(year, 4, 2)) # Other long years - self.assertTrue( - makeFY5253LastOfMonthQuarter( - 1, startingMonth=12, weekday=WeekDay.SAT, - qtr_with_extra_week=1) - .year_has_extra_week(datetime(2005, 4, 2))) + assert makeFY5253LastOfMonthQuarter( + 1, startingMonth=12, weekday=WeekDay.SAT, + qtr_with_extra_week=1).year_has_extra_week(datetime(2005, 4, 2)) - self.assertTrue( - makeFY5253LastOfMonthQuarter( - 1, startingMonth=12, weekday=WeekDay.SAT, - qtr_with_extra_week=1) - .year_has_extra_week(datetime(2000, 4, 2))) + assert makeFY5253LastOfMonthQuarter( + 1, startingMonth=12, weekday=WeekDay.SAT, + qtr_with_extra_week=1).year_has_extra_week(datetime(2000, 4, 2)) - self.assertTrue( - makeFY5253LastOfMonthQuarter( - 1, startingMonth=12, weekday=WeekDay.SAT, - qtr_with_extra_week=1) - .year_has_extra_week(datetime(1994, 4, 2))) + assert makeFY5253LastOfMonthQuarter( + 1, startingMonth=12, weekday=WeekDay.SAT, + qtr_with_extra_week=1).year_has_extra_week(datetime(1994, 4, 2)) def test_get_weeks(self): sat_dec_1 = makeFY5253LastOfMonthQuarter(1, startingMonth=12, @@ -3820,8 +3810,8 @@ def test_repr(self): "") def test_isAnchored(self): - self.assertTrue(QuarterBegin(startingMonth=1).isAnchored()) - self.assertTrue(QuarterBegin().isAnchored()) + assert QuarterBegin(startingMonth=1).isAnchored() + assert QuarterBegin().isAnchored() assert not QuarterBegin(2, startingMonth=1).isAnchored() def test_offset(self): @@ -3898,8 +3888,8 @@ def test_repr(self): "") def test_isAnchored(self): - self.assertTrue(QuarterEnd(startingMonth=1).isAnchored()) - self.assertTrue(QuarterEnd().isAnchored()) + assert QuarterEnd(startingMonth=1).isAnchored() + assert QuarterEnd().isAnchored() assert not QuarterEnd(2, startingMonth=1).isAnchored() def test_offset(self): @@ -4398,7 +4388,7 @@ def test_ticks(self): for kls, expected in offsets: offset = kls(3) result = offset + Timedelta(hours=2) - self.assertTrue(isinstance(result, Timedelta)) + assert isinstance(result, Timedelta) self.assertEqual(result, expected) def test_Hour(self): @@ -4532,12 +4522,12 @@ def test_compare_ticks(self): four = kls(4) for _ in range(10): - self.assertTrue(three < kls(4)) - self.assertTrue(kls(3) < four) - self.assertTrue(four > kls(3)) - self.assertTrue(kls(4) > three) - self.assertTrue(kls(3) == kls(3)) - self.assertTrue(kls(3) != kls(4)) + assert three < kls(4) + assert kls(3) < four + assert four > kls(3) + assert kls(4) > three + assert kls(3) == kls(3) + assert kls(3) != kls(4) class TestOffsetNames(tm.TestCase): @@ -4700,7 +4690,7 @@ def test_rule_code(self): lst = ['M', 'D', 'B', 'H', 'T', 'S', 'L', 'U'] for k in lst: code, stride = get_freq_code('3' + k) - self.assertTrue(isinstance(code, int)) + assert isinstance(code, int) self.assertEqual(stride, 3) self.assertEqual(k, _get_freq_str(code)) @@ -4758,11 +4748,11 @@ def run_X_index_creation(self, cls): assert not inst1._should_cache(), cls return - self.assertTrue(inst1._should_cache(), cls) + assert inst1._should_cache(), cls DatetimeIndex(start=datetime(2013, 1, 31), end=datetime(2013, 3, 31), freq=inst1, normalize=True) - self.assertTrue(cls() in _daterange_cache, cls) + assert cls() in _daterange_cache, cls def test_should_cache_month_end(self): assert not MonthEnd()._should_cache() @@ -4859,34 +4849,34 @@ def _test_offset(self, offset_name, offset_n, tstart, expected_utc_offset): t = tstart + offset if expected_utc_offset is not None: - self.assertTrue(get_utc_offset_hours(t) == expected_utc_offset) + assert get_utc_offset_hours(t) == expected_utc_offset if offset_name == 'weeks': # dates should match - self.assertTrue(t.date() == timedelta(days=7 * offset.kwds[ - 'weeks']) + tstart.date()) + assert t.date() == timedelta(days=7 * offset.kwds[ + 'weeks']) + tstart.date() # expect the same day of week, hour of day, minute, second, ... - self.assertTrue(t.dayofweek == tstart.dayofweek and t.hour == - tstart.hour and t.minute == tstart.minute and - t.second == tstart.second) + assert (t.dayofweek == tstart.dayofweek and + t.hour == tstart.hour and + t.minute == tstart.minute and + t.second == tstart.second) elif offset_name == 'days': # dates should match - self.assertTrue(timedelta(offset.kwds['days']) + tstart.date() == - t.date()) + assert timedelta(offset.kwds['days']) + tstart.date() == t.date() # expect the same hour of day, minute, second, ... - self.assertTrue(t.hour == tstart.hour and - t.minute == tstart.minute and - t.second == tstart.second) + assert (t.hour == tstart.hour and + t.minute == tstart.minute and + t.second == tstart.second) elif offset_name in self.valid_date_offsets_singular: # expect the signular offset value to match between tstart and t datepart_offset = getattr(t, offset_name if offset_name != 'weekday' else 'dayofweek') - self.assertTrue(datepart_offset == offset.kwds[offset_name]) + assert datepart_offset == offset.kwds[offset_name] else: # the offset should be the same as if it was done in UTC - self.assertTrue(t == (tstart.tz_convert('UTC') + offset - ).tz_convert('US/Pacific')) + assert (t == (tstart.tz_convert('UTC') + offset) + .tz_convert('US/Pacific')) def _make_timestamp(self, string, hrs_offset, tz): if hrs_offset >= 0: diff --git a/pandas/tests/tseries/test_timezones.py b/pandas/tests/tseries/test_timezones.py index 65db858a6ccf1..2c3aa03e85904 100644 --- a/pandas/tests/tseries/test_timezones.py +++ b/pandas/tests/tseries/test_timezones.py @@ -78,9 +78,9 @@ def test_utc_to_local_no_modify(self): rng_eastern = rng.tz_convert(self.tzstr('US/Eastern')) # Values are unmodified - self.assertTrue(np.array_equal(rng.asi8, rng_eastern.asi8)) + assert np.array_equal(rng.asi8, rng_eastern.asi8) - self.assertTrue(self.cmptz(rng_eastern.tz, self.tz('US/Eastern'))) + assert self.cmptz(rng_eastern.tz, self.tz('US/Eastern')) def test_utc_to_local_no_modify_explicit(self): rng = date_range('3/11/2012', '3/12/2012', freq='H', tz='utc') @@ -116,7 +116,7 @@ def test_localize_utc_conversion_explicit(self): rng = date_range('3/10/2012', '3/11/2012', freq='30T') converted = rng.tz_localize(self.tz('US/Eastern')) expected_naive = rng + offsets.Hour(5) - self.assertTrue(np.array_equal(converted.asi8, expected_naive.asi8)) + assert np.array_equal(converted.asi8, expected_naive.asi8) # DST ambiguity, this should fail rng = date_range('3/11/2012', '3/12/2012', freq='30T') @@ -269,10 +269,10 @@ def test_tz_localize_empty_series(self): ts = Series() ts2 = ts.tz_localize('utc') - self.assertTrue(ts2.index.tz == pytz.utc) + assert ts2.index.tz == pytz.utc ts2 = ts.tz_localize(self.tzstr('US/Eastern')) - self.assertTrue(self.cmptz(ts2.index.tz, self.tz('US/Eastern'))) + assert self.cmptz(ts2.index.tz, self.tz('US/Eastern')) def test_astimezone(self): utc = Timestamp('3/11/2012 22:00', tz='UTC') @@ -309,7 +309,7 @@ def test_create_with_fixed_tz(self): rng3 = date_range('3/11/2012 05:00:00+07:00', '6/11/2012 05:00:00+07:00') - self.assertTrue((rng.values == rng3.values).all()) + assert (rng.values == rng3.values).all() def test_create_with_fixedoffset_noname(self): off = fixed_off_no_name @@ -373,8 +373,8 @@ def test_utc_box_timestamp_and_localize(self): rng_eastern = rng.tz_convert(self.tzstr('US/Eastern')) # test not valid for dateutil timezones. # assert 'EDT' in repr(rng_eastern[0].tzinfo) - self.assertTrue('EDT' in repr(rng_eastern[0].tzinfo) or 'tzfile' in - repr(rng_eastern[0].tzinfo)) + assert ('EDT' in repr(rng_eastern[0].tzinfo) or + 'tzfile' in repr(rng_eastern[0].tzinfo)) def test_timestamp_tz_convert(self): strdates = ['1/1/2012', '3/1/2012', '4/1/2012'] @@ -399,7 +399,7 @@ def test_pass_dates_localize_to_utc(self): def test_field_access_localize(self): strdates = ['1/1/2012', '3/1/2012', '4/1/2012'] rng = DatetimeIndex(strdates, tz=self.tzstr('US/Eastern')) - self.assertTrue((rng.hour == 0).all()) + assert (rng.hour == 0).all() # a more unusual time zone, #1946 dr = date_range('2011-10-02 00:00', freq='h', periods=10, @@ -715,14 +715,14 @@ def test_localized_at_time_between_time(self): expected = ts.at_time(time(10, 0)).tz_localize(self.tzstr( 'US/Eastern')) assert_series_equal(result, expected) - self.assertTrue(self.cmptz(result.index.tz, self.tz('US/Eastern'))) + assert self.cmptz(result.index.tz, self.tz('US/Eastern')) t1, t2 = time(10, 0), time(11, 0) result = ts_local.between_time(t1, t2) expected = ts.between_time(t1, t2).tz_localize(self.tzstr('US/Eastern')) assert_series_equal(result, expected) - self.assertTrue(self.cmptz(result.index.tz, self.tz('US/Eastern'))) + assert self.cmptz(result.index.tz, self.tz('US/Eastern')) def test_string_index_alias_tz_aware(self): rng = date_range('1/1/2000', periods=10, tz=self.tzstr('US/Eastern')) @@ -757,7 +757,7 @@ def test_convert_tz_aware_datetime_datetime(self): dates_aware = [self.localize(tz, x) for x in dates] result = to_datetime(dates_aware) - self.assertTrue(self.cmptz(result.tz, self.tz('US/Eastern'))) + assert self.cmptz(result.tz, self.tz('US/Eastern')) converted = to_datetime(dates_aware, utc=True) ex_vals = np.array([Timestamp(x).value for x in dates_aware]) @@ -851,7 +851,7 @@ def test_tzaware_datetime_to_index(self): d = [datetime(2012, 8, 19, tzinfo=self.tz('US/Eastern'))] index = DatetimeIndex(d) - self.assertTrue(self.cmptz(index.tz, self.tz('US/Eastern'))) + assert self.cmptz(index.tz, self.tz('US/Eastern')) def test_date_range_span_dst_transition(self): # #1778 @@ -860,10 +860,10 @@ def test_date_range_span_dst_transition(self): dr = date_range('03/06/2012 00:00', periods=200, freq='W-FRI', tz='US/Eastern') - self.assertTrue((dr.hour == 0).all()) + assert (dr.hour == 0).all() dr = date_range('2012-11-02', periods=10, tz=self.tzstr('US/Eastern')) - self.assertTrue((dr.hour == 0).all()) + assert (dr.hour == 0).all() def test_convert_datetime_list(self): dr = date_range('2012-06-02', periods=10, @@ -916,7 +916,7 @@ def test_index_drop_dont_lose_tz(self): ind = date_range("2012-12-01", periods=10, tz="utc") ind = ind.drop(ind[-1]) - self.assertTrue(ind.tz is not None) + assert ind.tz is not None def test_datetimeindex_tz(self): """ Test different DatetimeIndex constructions with timezone @@ -938,8 +938,8 @@ def test_datetimeindex_tz_nat(self): idx = to_datetime([Timestamp("2013-1-1", tz=self.tzstr('US/Eastern')), NaT]) - self.assertTrue(isnull(idx[1])) - self.assertTrue(idx[0].tzinfo is not None) + assert isnull(idx[1]) + assert idx[0].tzinfo is not None class TestTimeZoneSupportDateutil(TestTimeZoneSupportPytz): @@ -1141,7 +1141,7 @@ def test_tzlocal(self): # GH 13583 ts = Timestamp('2011-01-01', tz=dateutil.tz.tzlocal()) self.assertEqual(ts.tz, dateutil.tz.tzlocal()) - self.assertTrue("tz='tzlocal()')" in repr(ts)) + assert "tz='tzlocal()')" in repr(ts) tz = tslib.maybe_get_tz('tzlocal()') self.assertEqual(tz, dateutil.tz.tzlocal()) @@ -1311,7 +1311,7 @@ def test_tz_localize_roundtrip(self): reset = localized.tz_localize(None) tm.assert_index_equal(reset, idx) - self.assertTrue(reset.tzinfo is None) + assert reset.tzinfo is None def test_series_frame_tz_localize(self): @@ -1385,7 +1385,7 @@ def test_tz_convert_roundtrip(self): converted = idx.tz_convert(tz) reset = converted.tz_convert(None) tm.assert_index_equal(reset, expected) - self.assertTrue(reset.tzinfo is None) + assert reset.tzinfo is None tm.assert_index_equal(reset, converted.tz_convert( 'UTC').tz_localize(None)) @@ -1425,7 +1425,7 @@ def test_join_aware(self): ex_index = test1.index.union(test2.index) tm.assert_index_equal(result.index, ex_index) - self.assertTrue(result.index.tz.zone == 'US/Central') + assert result.index.tz.zone == 'US/Central' # non-overlapping rng = date_range("2012-11-15 00:00:00", periods=6, freq="H", @@ -1435,7 +1435,7 @@ def test_join_aware(self): tz="US/Eastern") result = rng.union(rng2) - self.assertTrue(result.tz.zone == 'UTC') + assert result.tz.zone == 'UTC' def test_align_aware(self): idx1 = date_range('2001', periods=5, freq='H', tz='US/Eastern') @@ -1535,8 +1535,8 @@ def test_append_aware_naive(self): ts2 = Series(np.random.randn(len(rng2)), index=rng2) ts_result = ts1.append(ts2) - self.assertTrue(ts_result.index.equals(ts1.index.asobject.append( - ts2.index.asobject))) + assert ts_result.index.equals(ts1.index.asobject.append( + ts2.index.asobject)) # mixed rng1 = date_range('1/1/2011 01:00', periods=1, freq='H') @@ -1544,8 +1544,8 @@ def test_append_aware_naive(self): ts1 = Series(np.random.randn(len(rng1)), index=rng1) ts2 = Series(np.random.randn(len(rng2)), index=rng2) ts_result = ts1.append(ts2) - self.assertTrue(ts_result.index.equals(ts1.index.asobject.append( - ts2.index))) + assert ts_result.index.equals(ts1.index.asobject.append( + ts2.index)) def test_equal_join_ensure_utc(self): rng = date_range('1/1/2011', periods=10, freq='H', tz='US/Eastern') @@ -1607,9 +1607,9 @@ def test_timestamp_equality_different_timezones(self): self.assertEqual(b, c) self.assertEqual(a, c) - self.assertTrue((utc_range == eastern_range).all()) - self.assertTrue((utc_range == berlin_range).all()) - self.assertTrue((berlin_range == eastern_range).all()) + assert (utc_range == eastern_range).all() + assert (utc_range == berlin_range).all() + assert (berlin_range == eastern_range).all() def test_datetimeindex_tz(self): rng = date_range('03/12/2012 00:00', periods=10, freq='W-FRI', @@ -1626,7 +1626,7 @@ def test_normalize_tz(self): tz='US/Eastern') tm.assert_index_equal(result, expected) - self.assertTrue(result.is_normalized) + assert result.is_normalized assert not rng.is_normalized rng = date_range('1/1/2000 9:30', periods=10, freq='D', tz='UTC') @@ -1635,7 +1635,7 @@ def test_normalize_tz(self): expected = date_range('1/1/2000', periods=10, freq='D', tz='UTC') tm.assert_index_equal(result, expected) - self.assertTrue(result.is_normalized) + assert result.is_normalized assert not rng.is_normalized from dateutil.tz import tzlocal @@ -1644,7 +1644,7 @@ def test_normalize_tz(self): expected = date_range('1/1/2000', periods=10, freq='D', tz=tzlocal()) tm.assert_index_equal(result, expected) - self.assertTrue(result.is_normalized) + assert result.is_normalized assert not rng.is_normalized def test_normalize_tz_local(self): @@ -1664,7 +1664,7 @@ def test_normalize_tz_local(self): tz=tzlocal()) tm.assert_index_equal(result, expected) - self.assertTrue(result.is_normalized) + assert result.is_normalized assert not rng.is_normalized def test_tzaware_offset(self): From 5a04376dcaf596615fc1a32e2498bd05999bcb79 Mon Sep 17 00:00:00 2001 From: Jonathan Whitmore Date: Thu, 27 Apr 2017 06:58:59 -0700 Subject: [PATCH 479/933] DOC: cheatsheet - make various minor corrections (#16136) --- doc/cheatsheet/Pandas_Cheat_Sheet.pdf | Bin 685284 -> 175124 bytes doc/cheatsheet/Pandas_Cheat_Sheet.pptx | Bin 105196 -> 178327 bytes 2 files changed, 0 insertions(+), 0 deletions(-) diff --git a/doc/cheatsheet/Pandas_Cheat_Sheet.pdf b/doc/cheatsheet/Pandas_Cheat_Sheet.pdf index d504926d225809d21b3f900c7956a2f9cea421cc..0492805a1408b1cd5b398eaad3bb0df2852c4232 100644 GIT binary patch literal 175124 zcmY&Xo(ZsfG+qR7fzdY~xb$-?AUDefH*REc*)#M5y zVswo3%rNAGNB2h;B@cgQ2Zv!;h!}|MjI3aIc!++>nA(~zlq zHipioBBsW6CZCg8EF#qu04$@x>_59K9izy*OVnDz|NZen>-xr^Lmx%8J z3;8wg!ai2dOMBkFTx=tsWxV`37v0<6|8|~xHok8@NA!L5uX;Xx^+Ue9zf)_vj=TUn zD<{u*Ju7+dpJlLBBltV-1HT0F-h|z_mb|%ERNZ*+k5Ku~yEj1payH7#ZY{nWyLp#) zn0=RYo;@xS%q&%N;^mc}dv6oQg_UIHK63K1P9@Y>PP8yV*qh7lNGgA-Fymh}7Z&?) zfq5ghKF+0b1OidBv7Rg_yKr5Bk^#PPUMjk+0YJ+{3(^OT%{La(4RdVA zY2mZkr1m8NvAY|egdw@;7tX8Sd7qZw6|H3YUl^Xtx?X&X_3?lW71^Cj1o@Hj^Eg1y z_f=Z`usr0~O5WS~XyQC0t)2~6wtUrrve3`Ul}{I%KJOOs-SD>% zu9W>`2s1HMCgT`Yyt?P}@_?>ocdlnnl$TWKKo@O=HbAZIc}(#wt*l&ANoz51BPx|I zo2)b6yuk$g(R)@|bZ1ua+!wMDMxKjES1)dM-UO5vuDSDKpT{Yy{)7P>!0P3Cj&yuJ zP^Txp-}EpF;3@sbWceL{+K*ososp{fRpnK^lAGMi&mZ_;64S%MH}Ea82q{drjf1vB||!0T4F4(?7AQA$>kbixtfKLt0;TCy#%vR z8|O*$OwXW%P-{&1q;d(-u>Q@Rr9S;nzPn~k%5nRG_k_28H_>V9-!&ILj_XSPmP2(G z>ST+@`LkUdJj>~!f_By|)LvOO`PxWUr!la&-zC?c6DNW-?w*Alo=N(;lf#guif&8I zJ2Tv^B*oy*U5??6)YIbFr%$cx-B)`)BXDV~y5ihHuXkSbyQCrAfJJ9wVhoss>XI%l z{XZUKqn2t_q3qE?G7UBm>psh&Hjvkj7l}L0f`dUB^X)xcg-xbVNp#$TEoI_;a*2gq zkp%1c#;$07#d3@UvfzUVlM5Xg<_rU-j&C+Fh|_SN6qaV)N8} zc<;@BGi^W%)>pMV73ggZbc@DH6K;p*IXLIUU6UTG+z?`89bRKmS{zAS65jA9KA9!| zZaUigEc8uA9e0Vec!x1bI{j=kdECI z^KVL=mc!Vu)tJFS#clx7EY-zm+S!xT>7?Y5E}tcs??rc{%mR1tG)Pfv1he$2hZb=& z^d1dj1u7`9-TIa=I+B0g9AjgdHYGoHQoLyiu0j(x zx8`F8ujUfAp?>h-L77-g&~JO@;%0nvl+OztQr^PEMSytCg0g!4($c8QQU>n2R`*r| z=Pbhc_(pt(d}$7(3f~1J50>8RU!j*&76=1P+cWB$}>loJaunc z`9#(;dbq5cAO6i6$@^cA#7@lrW_DI=<7!O@EirK8b-~9wYW16tWo_jk(C_hR^#Z<` zpU|4g(^RL&>Zb_`!EfQstc@nt-8u6_f{4$@gt50A^P4FELWz>eBDATg=2Qo;0eTW* zhudPhei85Avi{*_f#LwGB!T%gi!5C^gpJbzQmAO4C7opi zVD~8*vz;Z)i9KBVwCZZ)AqLTyb!#}uQS{Q%J>jVcu#L99Wf=ApyBwsKAh_z7e)eCJ zxi<5a6}8hCzOf>?O=7O&jx?EzwGDSvm}Wi-1htrsqUSBKVbvpdZUKxW$Ow#4R8mtTlrZ_N|?U|^z5t^mD;;UqG3X^ z8n9~WMoIP-AP=-e-K>!di97y*aW^&_Y*poYVzoB6!VQ&3fw^1< zO)7c!r-vxkqH92Vl7mdt%?$scqm7GE-OwP)QnWITm5Ko4Eewh!h9(4mU&|+tJo8P} zz7E!M8I0E8B`zdyu(PV3!B2HSYkJ)a9uD2q&s$9kyTzxUWuw9>qg%B?)L`y6Th8s% z*0GU_tQB($xIOcNXG7_PrW~E*L_W2F;jF7w=G=)k zGN;>FckfEGXU`ydsQ0Z8NTCTOvL1xMAI|skGy>D;#>oUVa~v$MeP=Mvn@>Ryo8%WP zweJE=k@qV@lqnNl#lIk_&g1KXDW~#;e4%E?`1XD6i|W8UrE;~D_2EuKY!YI#Q|W5Z z$EnlZ<4cE|z0tyi;*(v^k%Y=SW3}O$L==M6ijjuMST=SPMs+3|1ixtpQTgq9aWB}u zdqC(Kmtj;nzJRDo!!Wh3Xh|59_9E%|=X0vD6;6_^Sf^P7Svrj^{wg`(di`m0Q(Wcl zt~n!(?Hs?p4v3>tI@Ad5;@1ZsZ<=eILOzIGkPt?7j4nX6EK$Jf6V#evmJ_50 zE|dGh3Y5espYezS(t4mVl?P!hgk%2Rh4ys@501uo?lk_(p5u(tNIat5;hZ}2eb}O2k+}BUYG1ppE`4#6IncA zwh-(Qq=Mp&izH*Hxl^N&S=!h`Fr3}mX5?5{JmH4cbO=MZ@X%{;NAd9od|}+yN!Y{I z%Cc4Hx`2UBf!Kk}>VW{-*w4#wyfgra9bVP!-{=T@@qZfK3xSwWBgUb^S+>D_D+I(> zY_a)k85{%;ApbHo7M@~>3ozr$>Ws<>MeS;6wyW6f;qn0}(7}uRBFM?}8{DDk6UsiN z_}Yn{K{B5_BMN?%#?F)UQL)g6)OV?c^trVn&8vi-;akop5I8rtdU9a?7lPAPTV(oi}!c+nQQ=8b-5`7wlDz^ zptg z$`v*U_%PvRR8*Hp<8|uPBJ@qa-=c5iFF8Q*hn~&`qS8j+ z;jX|5eryq`UPKn;6@h5mad0)2MC36PSiCqXow9V6!6QPJz5wfE-wPzHLJCb) zJCG%f8GwY`NKlg~YI$ZqP`zS!PUWBh8#Yiin1&5eHEv8U(YQiOMDBEtw>aRr;!#n@Mx z+9}_XIh$C`^-A2kq;%~kIc_zSqd-ME(1?&sHn^>VuixUIfz8a}+)-e+^q3^HAQP%~cRHm^ET}_IjE2jVKEc zVXza;+FH%|Jvj!99dj5pyE!{66KN&{Gp0l%X@}Q!yNSuAkZA=53#JcpFP}_Jckj(m z1FN4-z`ez#X?KY{e(AWjS3rO$Sq(}o8oqPAdu9Y$Yq;HCdJ#QFkOh0D$NqqjQcXEm%sZBPU zon2jAyDX{wRZo^{;D*~P>BVc^%|1!&*P`J=Kk0TP?l1z9d|^)Ac_ewQ{6^%OCN$u-~|=VtcB-xQwYKFsm{rCtlTL*_1ae z|Ch>8@U~?rE2g#s=hj^PEbjDpUVn;a1MTdDWq?oM@RFUQnEn3RYx-^)%U_#)rV+8= zYGkE_mYCg8JfzKG&WZs>U%%EtesXE#k06QQcrZoIDPewiHH%QgKaR4d5qAp0J-cG7PZm)wY+5b*u$1maUWV)7|gkpzE20HjmrbWGbmjwhx3hBShJ7Gs+4Ibc8Z~xGrUW@Obw^aR8@uA z4W5(Obo59>z#-;g9>X2oK#b$fhvcPOY8@U%YQ$q}4LvA#Un8zx=xy{lY;%|}LqF?H zaSsQ!$=?qMWa$+}_6-XG8`@Pix5>vx)qBm*z`O@l!XhdlLN@H{?{^!6kQx`oNGqr; zA{zE&Y9k~2cPN3Cs<4M-%-ad>n6JrNe)31fr@tTxHx~5?h=ogYewxR5q_CB?k~0bTvT{k2?LgF1hs zCfJgTom*VFlW{Y*t$9NaC6a9ZU+~zpV_I0mmQAW83fmtRd@mpv?CP@FG`Qs9^&%=1 zahfMwEwmoV88p#-Ufk?}Rk2}SI#bO;2D1Y>S4;{r+;vn3PVs1cN7-R2f1DR}sBxl9 zXk2}#A0NwnFkK3wrcu;s7aY42)ug@xevMDWR6@lfbaI`fUN*_@@{9glPDSL7c?a8)WMJjajwG$OnNC<6~ih_P<$fFML_HFmxci$8v}r5=?u-JmJ~z#ts47&UJ`26?)MUI@$`$MU!@s(ngjaAe2i1xJ$_f@K)O`u!1|A2#uB6usljB#vi=4~z`&j|s2qAZ`SYG+ zOpKBux%U}FDwM}g_(O0}b;&lWl~w*zw79NRMM3^il%C^;3NH|nA0B$2;eCf5PngW< zk++m(z|4jWNs%nfB-8PI9Hq!Xf)?_7gOY=Hxug&WysQe#q3lKs>rQHeZT@|$3Sw{& zvu9p z=!JrztJJHiT8vG^r^_3Vx?e_*90aQdDG4NA*Zh^Q!zA;7#`KoON5*3RHtp$bnqe#t zxojumlC+H7rRa-o=@*5DKQs#p*_-|g5tTW@JlN30hJveV;udMsbRU_lK#GT!V(%~0 zXqLm$fD{h?tQ0g{nKs@ z*JyK-#_V;f=kwo&-+Rw@-uH|CPssM)Hx2aLq#2GGQRByJiyxWhS6pT5JP0UmW3Pgc z2hSYMKxbU=sg`!8g3W+QSU zPqMzFevu-z6Gf?9{8wf!O2Lau*fHmzh-OKXzzdNZ4kYRTVEAT)z5MmA)OGuA=-wnVNON?^hY zCYmuPX>b+-N(~u)IPowjs_3B$rb>PBgMvU*Hm(sW0UzGd0@!8Ef;pg6L{DgHU>X`d zP)-760ExlIz4nwUBK@#of0AhJFH8v2WSqeEP$frXWtdxxy{#}>w=W|hoR&GXV!eLMqYq$3Ag*{PPz7gj zA#1?Yl4=4{hWR82XP09BQf6sWP7K^M8SKFf(em4CWS3+U5aEk$6pW&B?Y;_SjKvY99^Dq1fSWkPK)T`UYZ?KTho--J`<2uMlU5 z|G>I!FsWW5?=xybNc^zK(%<=V72`gF62OY=U;6t0 z#>3>Oxlt7#fAp*E5tMA%^%oh5gS5RIAF6c+0<^*yT}|PEd|U+kwgqBIABxuD$>7gZ z23i=N6bAQM!k#gKLfPBgD820P&75?KqTEMB@n5T z5b;6kgK_**nfy@v{4wO4aYKM4ILS1XnF>UUq+=omB0kvy?CffeI3gX9&s%Xi*GT|0=;1XWaFI#K9SW)#1FY8qJu; z4F=*`K5(nPc=F;azueGvDYWdkg^i%oNQt%*j9QfbngojP1Leh$H1u(^sfBi`MAT*( zco7p@vx|i>IS&_+aZ(tQbdyBhpxC6BB7eR$s_~JIQ7fzx&{`mSJnxyKFi4Ue1q}mx zrUpS2YwOF_*GlliA~o6zz#FVEJmi?uVqxh}(SqdfuSZDjLCkwnyS`6Aq7X>ksZVH( z7JjP-fv>18#12u$Nm2(zKE+xR40~#dwGaMjs-zj(QrfGb4IreolgyoiYxTzj zE&9aF^A8kPUj*3&n_6cOod9;h>EDMx(Nk43^xAE73dcOHW4Ll>wtpP?ni8451FTbt zHQ35(mcASlG_l=7`_Y!jk|EKS-BHnynIKG7lx49G_L`$yRx&g>U>1~^(U#R{?`sMe z@L7co0WE@tI98$k>$XAtj|*@iOZ`lm@f!fU^iPb;n$^My}6mT@KlDk{9jTBZ{DbZHmUZ7H93>&5RQy>1(Fp)5h<{j0tidRx?(SIkP-VcNdbbi62QTd zm92!AQ!K&Pq)BWk=5T?Nm8@{*nTiMHu9DY78->b%#;KaX&p`@X7HB&bl7bDyZggyj zvXW*&Cx2&}s~TwLi6)u^6QfvnBBONBk>H|IR@m2=Ky@J}LTMAgvS|+L&MvD=2Ah{1 zamZjf;4M4jBUJi?DTfey+M9b^#EcdTLi|g#*oaZ!XsXwqJxgKL6K5PTzRxnQ`x|aN z7m9nXk+NnVXY_oyW-?g@hCD8<)E6qvI)!?4K8QYgfs(FPWUIrlUr^w6uo@nS zRdpVXp*_d*0y)Job6z){p2|7bX343U-;h~a(R@xaz#$C}6Xw)~j~kxxfta=5N1r%P zMMA=Hs&nWF{5Wm}Z3@K&_Z z5aoy@5eFYIMCj8sN65nm9>e#`Ss?0MJs=7p4;#TO*6tg;4|mS`);tyW+t`|!Uu5d0{fxKU30n%&z0qV*>%S1#EK27m+9mg*ENS1 zt!^4`q!n3OebtOCR%6<;*lGZkCvT5mP`aPB-ye%DcN+7t-Ik-gFV=p=eX_Qq9Z3{uDHP_+YcA4 z5Z}H3)`zp}nzLF(PdBa`#wET(7o6bD3ul9!Isa=RD@j?qd=Y*eZnI+c%NT$RZKvhB z5_)-e9CKK1Kq6Sn^VDgLJH@N&!;@nz$$J=~2XlUx4wIT?Ottn~jL4e2`c|ZwLaLhw zUWha5)l1coR%j`KCOSW)+(*Meh|l42E|;2prUQm2Y;x^n4m?I#~CAgiyNtj8vTLGhbIx)p;TlfIxe9WCli2qZByRKvmP8c*0u@J zgd97xL2VBwl8d6C7;U6-p8qKCzH1Yh{N!g92a9b1s!#rhf`B7pqz)RxKhg*-o??0& zNb*D-q|DiOFN8fe2s`Q@DxD>Pr5E9ke@;VZU+wrw8gY6bm(>T3gP)3lmzl_R9Zib8UUrU=k9sA7fps72-0X8RJ= z>aJ#zM^K4Iq~r)PjxEg>%L55TqFAeql~-yKP=N!iQ96lJgX`78fge6X9mK4>(-Grc z26xN2S9A-rju#E8!q0rlIKIMkc0F+>@gxd{ULydqWSyXLXJAZGdsIm{$v^N! zyosW@El|nWDUyB_XD)z$CYXQ&cNtOW(Apc)$X1gt7LS=g@Ur&EB=`!28=M7NGFw0p~mBH*nmvY{_Vjs8J{;E zdx=UCGGdcHks(L}P|3_BELgaRCfx`Tw24f1ygF%%M+G9yaGqKI&2XFkx+aSlvMdOZ z|1Ecr*oqW8504&>REa7P#;Vj|yOlpz`xSVaNPFxy%1$BH<2onVd6)Eb&_HS!-l1utH{4HtnsMp>gjoJ;2#6WNlIE&5xPXAlJ+O=-@2 z1R0X*xPU?%J_#TwbC3*_W{4~-eHnBJ^a%I_L;*Dwq11UUSaA)^<6kmUrS*!w3W5n0 zSQKJp7W_#Iyd^oGD@PANVd%lwFUK9$`roitZtmda+dTk8D9>p4C8E7b$uN*BEx>D=(3UVbkWK%a)ozm&W`8Bm52Bp>qIUxZWxh9z*Jc+h7h{3Xd^Sm2KgI zq8kELXsRX4bM2T#CXMBGIA_rWZ5$QiL@y4Z5@f|Y@c9NXy+N6kN|SYy4&YW(L^{Y;r|^7 zWA*`^nNx#5aU&7K5wZ;xZmMOqXx2g@Wk$^0;?x6)r~?=7IVK~dBk@(FKf8dGcOaCV3ncI+`@uEtW-*XJ23mg9zduZ55;)p0b60Y*-oi)O36!rx z@)if%hX!(35#5>-vB@dmBh6vEW+yxbb|rSd*72gehXHfyi^r@iB3L;J8ueq7Cm0Fw625&I(JcGnv9to~?I0@p z5)G{}oPUWejd+glZ-dh3l1;H9?!4LZECZQyHZ-%%L41RtgLS#2EIhwgf70NH8}%ZE zsCSIw5i*H1&*DOr6y_{>Ibow8Iy;z4GQ{#E^PMD9%vu%dSFkFO7EAiqBpc-H;A^Lg5r9YR=}-gCCyO@E1!OgYgT z*AKC1MrOD0UF@p7`E3G$wr(mIb1^j`{>{(4Y6juzURAJjsPWq=WDDkZTQ0WqE4#hy zPjrLXY*rFrR1U@$hK+YqaV-p0Pj|R}M>m*1|(g1_hYL~ZV_4k^8;M>KZyd0ex;-Z&K3hwYhWeBk4O{u%lu=3&rs$0Gk) zOxY33OWaq9Gc~6EP)D1MA-AF{=uA(GE{cTBJ#zPjhZGWCEP^ogpNQ_;_fKO7vb%=i zMAzH~^WcpVW2f<(fWcy?t>Rs=lO;H_sLEJ~`=O4lt8~!R>RihDIlP=pZVAh^(>`k9u!-bY>ho#V*YqD)jgKf&vGkEwkb7qmvCY)rj?-n3JL+_H#Z=lFAPd!%l=a;6WuRUc`=-cM-v6QjQv@`3PLk{9Z#m)aEO$r zVamb~t>!8Enj1o220rW!Pcxx5SHG(A3`_#Jeds}6^0=5Ns4Jac{I~Qf>$*ng z*|xs?Sik$*uofk^w?B&#gCAxu2_Lq;<2u~#!T?Yoh;ImCn;; zXI7ae{Yo`8qshA@_`UqJl)GRgJeZwiYk&SCov5|=6qeK#|1Alj{Tl^v=qJ;>N%Xw` zIw;q^d#z?-t>e%H_a!O%w$#arS|fd1rSkW?;{7}fQ6KsA@#!;*&=)=NsaffmqI9Lz zft>ZH5NO&0^{ONzX_o{AHt7_n%e~+6UL(LnDwGb@cakaI>x=R+WwSHw<(S|V?wVLO zFF99%wdvl<8f)w@c>i4(D+k%iZdz$XIHO)g&ELHqJ51C!dK4~X#v3uMP!FQ`iPQC- zZvNCt@{-s)UNxDF5d_i$4fOBgZuO0IE;8BI+(yx$g*<4%L_kXjF7zz=yPxow!Zz}? z>oSQd8*cdxs<}Bpgy6msHD7Lxo$(y0c_CK-E5I6*F~SlIM2cInFMw9BtnCy)wLs^X zJU~Ht3x}b#59KPu)jSGzAmNeD(&UpiCBET34&BOjiG$zQa#x3-sJqKPo7m29hy|-0 zMJtgx+GH+kOfl$)>mf; zi&9(Y2GIESv%r(gV<4=sp`uu>Ez zysV$)Au*oPp{dG5s+Tdp&D@~xYC872;mB<#kHO{bx1?)AvnWCz z!H9kszY6w5bxG=Wz>80n=>54NDqe0w%NXve`FJxD$lsl03?`zGAkk;x#4c8>_d7P; z4`7uyqkSX6?syHW|BPxE+xQwEcCf|<5US=0Bo29%Pewo1L3!=5r2KWBgpM4 z8zoPq^XOU}TihyQVAcatH_2tmltv5#JdLFzst5ZjqGA+Onr@5i(t8|=-T15AVe;Ox-?YMl zB_b)js01^D$eb#OZLQ9##M0lFs1<4oPzqxu)-024_*MbJ6B<$s(isY3XKXLE%2Aid zN-8l`wJQ;sp^RUQtr?Ot{znE_BLP#^x7kh1)VR;nLsBfI{^bmJ@s4P`%bZk>N@0I! zU|61$#uFI&CpoKBx2V_Q%n{%DF zF)b+0Ku^M&mCx+b)>z)ji2tD|yw8aBte$jUbOLGc{+n}1s$#;g&%eM}I#z1)_aJwI z8UH1_m&eV$3*$+O5T_h5lx^uJP6!n_KIZtZFuI^4VwTJyaIM^Yid_pWTt4TF7z5CO45N=K z5z+~~V-F_Eln`ASs^mRZTYK^02VS|F${0XJ6`Q6Kp8*N_33rla)JPZm?_VfX z3=9!HIPxb+5tf+F@IzRoo#$*{o!~Kyadvw~u-knm(2uxtqwphbL$M(!ZMFlByOS7(b8MMN6G?xIpye zZ2p~8E?{C-DI@>==knH3K)uGL8*t`kn(mxa-TRo;^IJ$1ivvSDx#v3gaM@+#OlXQ5 zmjxo|G%iv4fQ09AIx-v)Owt}v-;x6D^7s@BR|W&#H!}7kG0z!sD=wDctr^FkE#tU+ zLL%6dU<0FbY_eR%=i7ac#X}uM!F-l3wD-8$d+1^P>*XNNmEY%c=we|;?}h(H{~IJt zRZ%EP9;?j1B%3v#yn{ph-JC@kxA$U&|I-w!ujXyVZ>#4kiy;uiVNl%v1y%TQ=nUu| z6dwX9hu=aE1^n*Qza;sdV1$nvvX&9Zz=q26GGwH!HOwRuk*M0aJD$$3BEQcIl|83l zK>c?INe533@P{~aepJ#1vtjZKB9rFNefznWx{2)fo~+w0F6mjY+XXB<*hGR0908I{ z{`_-*Q1=0w-ARiYWSC|0Q2N`$mlGz6&?&mUSL|$;IYk_#sP{Y(5sTWvqit`}7PN#) zIS$YRM{~ylE_oGOWB-l)rh%bb(JjQ72kmV!QVb7j@nJo@+e(KoT*pHc=h@w3Jk7i6 zobhv4yf$+19E+V3hr@+4^H1y7-G8{P2F;cjS0mk)R5OZ%v+21*UUId*?l=EV_sWdN?-q4(be?h#m=2ae6?0Ip7tNbD}AwKR3^_guc<3`JOuPOD_-m? zqnAod>LxUJjWZ3O2zt#b^>R8N`ZUkc6!}@E)r|zGB&NsaM@m1Gukn!IvC)SeVhPKX z<`n%bFH4Yiig_e?uQ}@;>rFaOC0tyIA2%lsR+;l^-FFA^iF1C)=V&=tH2Q5SJzsHP z8Z0o{OO6Of6l)Vg)ZSSc$RdStWlfr01z?KYo z>or>f?{nangDYi`Jw&~5RS7Ap-E!*5K&l~ZGW|g5&Nx%F=QNz{K-$p*o}vka${kOO zz!{JDr`j>+IVW>#jK}(T64F$I<^A;JD^P69Mr#Ra(^az&@|vwH5W66GWb`|ysIxto zYA`6rH7E8aIJ#^!Tc;wOJSG@BA~0imN)Rgnbg;ij|FY2nb`FlOLth1515_-p<<>o4 zkVZZFe;G_km(uX1=wsiZ{GUt~ahfm;Jbv?$_S+v0nNT1jX)L%vd86zD=l2`_ubMRB zU(tO|7HgcE?!_?xg3Wd9GdV|u8X>@1z9MfJK6M=lx6H=UoRQP7X2?Wg00jK7kdqfP z=y&`oj63uJ9u0XPl{`-=j>^{}7Ur4Nx-9Vf*jOl1ovtKb9-fOUwEiBs)6U*9nxgK0 zqII}@k=m%99x{NtX+%VnZ;;2dIq*WqF_=%eH%Q|MTc+h{6q!?;1)(k&ROZ_Qp)It4 zx3imVwhqSpQQ?%I2%)ZDi+vEy3@z}g1@lZHSVEWNR;gUxQo(N%K7{$L6-tpjA;nM( z%w0gPF1M*~`EuYCoOSYsZCQn7RoS z@+-RvH7BKUPFuHiXDbl0qvlg0xbHojOO%$9b$&`MNRhJFd%a8bE1XG$v=*8lJm?;* zJgtQm6jxs8a)n*JZzN>3Et22{ZVy)rx?s_PCSBNL_JK*;Z3~d!Lve0-q>jF;*37fp zRc3I$L@wu7(7pS8b<~8sYks}@VvUO6`jzG70RZE{UUErU7Mj31d_$nWy~ohp2H;t& z2TPZ(*m>ZQN|idb1e3qk$&N0^(uwYR6{tQ$J3if`4&7!AOoXxbq=4YN-SBhq`Md^& z4M6_BxyX`lqmWHHA-fM&z!$ZY2~upOIF^@k<@IXGqiT5pvF@+n^L-`rZnb-t*NX_M z7@RW->MNPHisH7bn_n3o_WRU=p;|h0(rtc*epp{m>KZhQPJtK8)}FL@eSrO#J}9du zIs78!2G9#Z5-wNNpQAQ+{c200;!j|hCOkY8$$K)|YdHW?+LbQ^XVKf+lN$iP?MhoD zOOK|$;v+<@Tj#(p8$X>B;%m%WM$5#Vs4WQL8ST)R>+dICBsUYp)!Q0-lPD7{oU!FB zb@q}$uZE{C#2;%Q?OYvuK*aDfM8@tC%lnWJX2dAxOkM<{#==*pnyESbu%zG`*a}J_ zbJ#S}O#RD4CPC%gK&l@JN{lFGfr<_4K8s-Tksv6b2WG*LfmA?lsz==fqaEENeM|=| z45`N-M|TKej7OFUBi!fE9BfESv*{{QZ5#d2h zDI5y!Fl@E--x4qMW3KsV9$H}JgtS~bBUle{Jin{PRx(w5XP?vE#tMoN_q36Micghk zPV-@?BiAM7dCDAla(GXr=*dY>uy+Kl7nH(sRNZLn1narUjnS|pZ;JU6wl@KQNHj*OZ!n=LCn{hH$t_7uN10${qoa#!O*=#DkqF2@ZWH`dR=b9L@6 zj=s(#Jud^5vdrf+B?weM)HS;ol*O9#Rt<&zvD~3O6I+6Qua?C{Al^y5C*bjp6VFJ3 z0rzO8vNO|KbRs$=^(j59Nhainu0Yrs!#!kSC##iSZh20mL3}lhGg7SOghd%F3jk`0S|b1I1M#3W8kLUJ1k2^}J zhi-$w7J%wwPhn|Mj|%qAwT8X_a#YDsoGrChaTwy&|sGmw9!p4b_offn7d)KNFz%Tkd zB95_ru^3>1ULW-QOpROb=2w3U@-c`m z2i~lm&#ka`Y)L7vUEan!Rz@5ufu38uTThs^o+1+MpA$WDF%`+OoK-9RMtmqpC=pj$ z_o#wVLHe#Ht70VuGW5ZAbA(PX5@!^dp9!1!iq%^`(97QEgo3l}uP9^aVyCX9z*hD= zRXtN@6D=AU7L82g6P!N4p?7KO#bX+F3S1UXlGNZ4d_3!N zBE4zgySNc^9u6b;d6@n+7kH8%_wlFN257mv^7U7jD9VNYl+(o;Gh}0NC+748HFE4* zEf1%uX$_6NowA5ypU!_ww%4CEc?E9}UPfjZyEbUm;i6D0D@3MR2S@T0D6A+Q|uwX-&V;y))L(n8XxG~`cI>SWwV(3dGZZ0$F*z^F(j6TjlrZ7bNSDnb@a{ka1u|7T*2n0w>YaE6=~DBr3QVXGRnC~cxhc_7-y@nq;JJ( z-DK|8lnB!v!vW#h-v*+{S$ouIcWC&!{P}<0Y>MZ-V?dbG>0BdO*z&HF#PPGHT+0qj znx*m43x1GsAQeF=gXA$5ii8k^g?qvc&}o`p`V)G zA6X5$=cUFCKa*uoy8buh2gx8|T5A}33gOe%olE1SFw#^Wjlw$;FD_b(P7fmpZmF#* zq3Yqt14dxZaLW)cO!lm+Q1SZXI;w~Nr0(^W83Wk+1-BGPSnf2=d3p{1yRtk+a00{% z{rtrvxI-{Bu5K(PtN6Zn&l{eil~t_`Kcd*usopn4#Mc}=kZBtN$|h_p7X2;vSL@0O^xok@a_aTx&PlXN2b5~izZ$ah}zjMl>AU`Oh4H|$Z`PwU9jWXi?F z*S1QXugR|a%Nza|zbX?n-Qw`z>g31v{&x*mLMI9f&D-97pDi4S_Z~nt=j6t<6J3gz zIeTnf*2>VDJ)5+MD*WUwj%2cor!BLP22+iaonXz5C^MctE=X97z&8IKxg@C#5p9AtyyEp47#LZKF8$KlQ4F|A%1a{-1v5 z=_Ki3{-;gi^?ADgIy3(3{82BX;9_r}X$ELSp~~^CY>qQQ0%YPOGJ%@-@t`@$C znrWm{?a5FxMY21P1;gD^S#)=gy=1$Wx_n!wwA9lqguh>EErl=u+T_-IqD$JD_tg$Mesj_k1>>=ojLT$~ zg3+61%MR;S9G#PU-OAzuF>omvKQ~Sa3ojQZ`DfCkmM76bWku2niR!|rlV}Ud#CZUa z#aD;5lMb)9Qkk$c$yTKC&@NBCw%rM}C6q$Lc1iLgxQ3l5Ch!rM*li zjQnoa5a^Pn^f^<#e!sw-?wJvxGSgr82%1aT#I z1Aiq8ENgY|^Rs-^TQO)zXcGE+S9}PuO8cv8=x7#`M|av9N1o@D>xpR8 zB4lJ>P|AsRujQ;CNP+ilfwad$+%UiwZWGo2+663IBy)^aFi-a(Ur9nWRcDl6={;05 zJ?m-RjiF&U5INRDrhb|88bGJnIt3k6&Qve8PwcYqjj&o1rxrSib@m}qEHHt;n^d2l6Jh1PK)Y{ zAA9K2U%t>|5xo*ac3X$KB`gu!>0nk#`C+ z?;4geZ|9IS@8FQM?qHFmxS9j7yN|EXFh)SpSguw4h8kF?RIYp$nx8?y6^P=Sjhz)8 zAcc~GBrJ8UVGbRlPg##TP)$9blgmo?e81e>N7Gpffswz5hwJ;Go~o-Xy9l{nY^$-PcW;tvHl!E zoo{nvIZod;Cks~aFSP;|ZFyCCn(!H>#!?%8dy)yV!^(H+O36#~W(AT~G1|2Hvv|n6 z7H9@~;rm%!HvcNhRg&1ERT1GbI4(s^P^_h$O@!0o@_f8YZNaOB!+YQD>lwiF9_FEW z&P3ckBdlzSP(}vW+D;-yNa!X0k!}i~S!-Z>RVVTEaORem^`VuG16Ck48n`72^Z>1^ z66u3i3##9yluc{Z#Z83byJ-gnjn}`(LHrW}{{s7lpHom8B*`3@VMlTA+{GvgY4hkS zTVNe7L9*{D@?%I+NhL_~@>zG?^zNI7Qtue{X5WI8A7&m<{G_0GMW7VH9O?5;k-gGn zw6SmnLGm@Mtl-sbO(L>*^YO=WyA_&5h5OP(vv4Hv7f>pYkn<>uYrH8%o$eIs()H0 z!HDbKB|*)TS@*nldJP^Di?uM7tS38@H#ZaqgXNw|$G;YmEFw4Hhrtxm?fFVn8po`` zUPfYcGG3XY;tP|2HD{T5A{$uRZg*0-whm8lT|EfEz{k|=xtm;7AoNskRqjd2BM(X{ zT3FrcC|E*J9i3%XbP8f;6LKdnb15}RmlgP+46%&C5C%0dVrpFavM2I%rfI*hSghH} zPc_)wzrIs*iB!z83yf#&8-@tJ=$iMXN}h5H(ctQSd9f~p0M6IdbY{Jo>ixW1fU~!% znWO+(w0cUS*v%!}_R_Zp5eJdpVDC6ON`ibhV8N$&?ZIJ*H zP|4>B;7mv?do?nTuZHAq_6ahf6F`F#WfQwcY=HU z#5D!yQlov4ht?0N>5=k8UuBiOyaDW+QFbk&mT=<>VWNECP)r&|2|n(%dWybRrylTz zhEvpRF_E7l)j|jo_xL7)OJ#&#heXj0K%Apyxc9|s;oujFyRrT7NEFox*26#97CyGE z9MbeiL*}*GJu#;+Wek5JfCWgUh;vSLCM2lOAD1Io5Z zr3jefQ*?8yq9xnwN3>h4ikP39zx?i$D47R z9!tLplg{wiJ;hK+QbXs0V#zV3ib|J-v5w3UwW$PGr&%)N?1Maw6eM@8im?MKpuTMyClSdDaBS~HA<}9T+;~; z13#77&+gQKUREwOK3o-d`Z7#^dOQ!$xw9WikMp>;!*g^k(|%&v{e!fI#4jmwOOp=| zOITUAzrYyaT{{x;<)Y!}z-HI;@}=dr#c3Ay8T7@E;d@XESgLGVi9>Z40*KKT0;4PD zvylhyL}yRBl=(Hw*-pUs@M~^!ZTMROMdzI%)WRGGLxgKkR5UTwWTTtE|3lT5g5Q z9q(&~ohwJw`jDu;p!fpf`M_!CAgoJwyv@{240HpM4dydy0vH$>Bl*&IF^ZNq)O?Ul zq>q7Zw&k3rJ_k%6H2$-}M`Bfy_y%Yc&6&cBjx~hPhu#)L`6}5QpWcfQ6)X+M)Wo!C zMZTX=*Ci+c7NE!r7+R0w&qP_lg%-hk-vF6HAVK0qK+gx|e!{E#@C?i))F=4nPmtTa zJsOr7zlC)-+MoGT-{MttK0G_0LLac`?&@L*a!~aee#Z9IA$QqTe+n%aT1px{JID|8 zngSy{Lp;-;pI1UZYZ|C8rJy{`QYXtw7ORia(>E~7(brS4owp}L9CBh@6Q{#8vYmG& zOSvD9cM0gzD_jj9a1Tnb_K+{s3>IxL&WeTf5ULi&+~SAxkAG)v=Dn>&s;%ns^sVPB z57)Ub$^3l%Hz?%#wkp>6mC{NG|5RcQ>@u}5UbBZt3kyLA3!nhM^Oj$^Hfkx zU4Q(}>GS78Ie7_^oexsarj|7g@%#y5IVFo~5?-;M*X$3_B_J*De`3B)@&rl=?h4iH!GIYtMwq0H{)VDbj)jZB{2zK*MWur24ipYD&*p zDQW8lrPV&UH^C15X|P1=HhDpZ+w#p%O_BQ|nnP8PUOEhFESy@M3vRCb1Hj&8-A z%#tqs5%AbePV%Y-*|C#MWI|ssF+Ml7Na+*mn z0C@d|cj2>#P*b+pvYek==EzE$Kf+v`Yqsh%RSH56k~9Mjld>NcAvx`RO+CyilxD=! zx)O9F_GJO?cTa+vO}#Vs%+biQ%;||_mrK<2%IV2qpYn9?$tSY}0FmK)BY6#Y1XJ!f zRbBZNJDOF^>C55LhEYN~K&tAM;NPxuy&$#NfA)Pf(||{b=0vQbnPh+yDznWm?|{#% z#E!%STMEpp1QOOEZ@AZ-Y=P&PA{ip2e(tw`6pOX~l2=`S5{vs-%pIf-^I zGVN|o|FaoE2Spltf-%M@=~wGtj;*_?Bm;T^?$==m zjAT_WMlOh<2zPlc4f%92OmzJlW|8@JNPMKlfF7QxTdrujYm^I$PK8FoCbMf}v!lRw zlWj0#?%&^1(LOzBimtEQpY7mh;IJ@a*k2KP`G&*OV=&9cQ7fpQlIt*lNC`PBI^ zlImPH8Y7?`GuQAH^NevCDpwA^>W7;4yBO={jIenO3eD73k5b($`*vV-3|sF@+W zN$|&eJmETOLiTNHC<1KQZ)gpXZ<`74qL)7~wB&*>7>(^{NH=Ff;I*CBewx8K%B|RW zGnXx(T66{;&}och%!gvx6p%iR}0t#H?k>-b-JX!j(@Nh~9Q_m7_4Q3U)__TL_+&r&2aYB68Sv zXB1bpI8fX)P5YZi1xm{=w;stwM{h-H?s7BmxRJnWB9Ki`GauJI(dYtwN@_pf4#l!O4So`i#ShRkT2*R@2KaG)bIXkIT% zq-SVnCOj5WT72^wxdz>o)bq;rfZIm&JWLbOm18>BdPFEm-di!@?($j`mt}G@3eXAn z^$Z5vT|&@@7HzS2uIoZScYCIJpvQ>@z?qGMwEQfs9Mu@{?Y+f8sydb*+Qo8$-U5oO zc#$S7IJ>6$F(&NP4UF!~Y2-Z%Vc!!WS!NtUubD&4LrA~(*5Z1MnGu>As#sHV?vrkg z<%{sfl#5{)x?DfApO+r(4I^|C-iUoTEG&$`0;sjcZ*0%~_#gq(HIDlQUT>DVWL>Fc zw9?HI_cnG;&LQf|x3n zMu?NcVKPQ)&}eo1W$FEPg|X}^rkU4SMnt@uKMG~O!esC~F@vm+DF$!BIyS7L93fDR z&tPv9bwxn9SHl3|Tj*fO@kOS5;Onc@$1-%Xkt9DUyIOrX5+YWQQs^4`J|l(Vj9r&D z3A`b=i!uY{Z0sc#W{(}@b_O`ep7MyGdf5hYCAX<@%hxBGX{OtF((|2SzL_4j`O>?0 z2H1TQF7bzCoK8dN9s3dhbQaly?H?gkpi-_?G#n%oT6Db38Oi59U&*?Ug8jDtU=Clegs9VKmMBT`EKe5GaMKzW@J!dkpv@2lD~UA z@wu(x{^P`>K|NLl|Kr3c)+$(JU^88)ane7WSNa<+;d&|l_dUT!PCH3NH(N0XT`;1M zIYa$Z;sh;oqo|WR`EHe1bW>@Z;<>(WIoI^+6sM{DM%acy_yTrQR?*9LSVTQS zXN@q3QlXwUqD@S8zwceJAP5>;MDU|9-%Dy|BMqVl1SQIwn;$XZ7d%(VPos{c5mdfv zn11c;LJhB0kS`-5icxV5_Bdu8sIEK&5p?Y$DXy`@|uYvtztMH#L6ZqWM_6=J-bxtgwYC%sI z>7MBVs@J?6@nzVFK4rq70~H$#&~1i}Z;#d!z!-(D`Gq7NkiV1;B(va`WKaMy;wk!- zNu}tQO$uACJR9?l%;AgDMn_dvlq3=@lqQ%}l0sgLMh#XfRD(Wsr_Cx`)v;aMuy6@(|0(d`~5t))Kvo+q-6iS?%d(5 z`6YiMlo84KYnnB#QQj0Y`ArCtdwW0b7$7GFbo7Cdg7DPk_BWbhOFszv4?+!6B5adV zt?I!>j~3u$eFK~&@gL(<_MTJ6nbcZG1Qj3L-*7;$IHUE1^DpP{%1b(`^dy2XqZgyg z7s0A|D`7O0v}{C-yqxG1rW;qp3k_@o1Uf*Fc`k@KvT^~gwiI>hfxSN_9`GD#Xx0D( z6hFL}I~T6Lvt>JlSP7+HznkqxJ#&q`EEqMkx9irguIFK9zma=$c_bC;E-d!tO@_~9 zDUa0eF1y|v6C}3n*lAiad{r14)n87(e3_6w@OgSFM7s&|r#u_Qfc_9->G3&po~mP@ z31ZIE>Y>{4a13lYtWP>ITv7aJ^th#>N`$vp9Quj7Qj}}9JQDk6u^njMqbw5f{!ab& zrf;k)hx)U@joQHY>|%rG{p8}Et)Jt4rnKcF`Km6#l@Lu2ao(^>bG3+rf(a#*He{Y8 zAzUcwwR#$*dc2?8vDm8AliJY;<_<1|Nmi$)y{0 zlPcKNUe&kvTmuzD9G#RIO8!xwv8u%DY9#+L78f2G%A7E^5)MRY+bxB>juayo5)-qc zP&M(X=pU%Y9P9OU=OQO1FwJ~-@u#@Ou^`0J2e6TBuWGT?EqpN%U+ zXi4>n&*7Mq>~?v|*J~cgVK)Sv1izAwoUh^Cy8*VP71-evQb}+~`+sw!b&6eNcAt+l z2&FY&vm4jTtWgT}DwJ-&9X;-aNEG_mjaO8Y0kRkGK_D5NzaI#-xN&^n50h!HQpIKi z0(}9t7T90l_peyc^csJ!OqG5TS%#?43C9S%iPscdoj$wO?# zbU;Zgp7$$jcD1vTeGY0LPP<*JBysI3=z*!fQlg^E-_~m6_PSbH<3L!j@8!MfSf|0J zB{9e&3R%lZW<~K1Wx&z!5G$?eWerHtr8}4v-+A)& zxNH>cFkYWOfRiB^E+CDp4gaQV{=ECm^8B0H`S+Eco{f?9_v1f-|3U$+=w@q#Pa~^m z^5<-CWbKI0@E0Alype;Alf8kF13vR#5dt>Wj-SsR@PAWCfAU3385x@C@!PoKYtVj1 z(9zM-;=EhdJ) z`p&=G?oa3Y2h(4J=wCe)pN5Z5R+62M@2~p)FPVO~?Ejj{SkKbo5A*upqln>m_xvZ< z-<0hP^q+B`eOdZ*w9@eX=54qB92)pEf=13}21fEC{GV<7ztFj-|8!AQSV9e&Xk%%c zL?px~zQuHhz@O$2MFEiu5&JF+6zEUlEC?!$j6kkzsMrZ85=3Yqgbd~PLw^C{8eX=) zOHNQ|Z!O#c;-Z}CWhFdvE0i6wBo6Z*J~4zW0z_W`Acf|b zIlW2H_JHCQYWgtfKR||W6UgZU>F80jA#%zHcY1$W-WTqFYZ*#PB-zczR2#==Wy}a~ zkzG$A{v>k*$<|CF-ZaVe1T$(={3Nv5dmx*cc}$Z8phZsB89gA$whW;cLdG}`xds9N zkMCt`wTBIRVl&MEa4j&|4Fw4i(=R|+NHW_y95jBus!@2-r~4M)BefOmDmigm3mr-O z=|f`kQwWe)%_`Y6=Byn1>`ZNb-}I?tX#>|=;f-t1{YB}DM3-8`mnal+~z%?j9?Rwe`ZlI-hZ0|NSDOA|$sigy8rUmu4Lzb7! zA2!1sfou=SBj+#590h1RzF);q7>tDW3JMD(1V=}qVG}0|-yYyUb~@S2#)f(*(=t2J zt^y8~0oQ%az<*rAqvY_FM5E@xCG*e2W2u9?3Gm6X zA_Mmcgha!~@;*(H6rof2m1GOZa@~G)#B2uF@_Wt#o5W}Q@&@STkC61~g#fb%Q2%|Y zk^>uDq#%>q*OHj665;|%>#!ir0_?Eu1-n_mz?Oc*UlCmV)KC>}A zgYVOieEqqq0#(v79%l$%J)F-csT;yvN!zPiALlz4pLX#&VPp{Dvv9Tu}C;c^dOdrUr2OG)S=EK)gkZ^e=m4#1d{9G?Nac~ ztyIny-m2~93J?=Y$w|-;)=$w7(f8YC>XRdGjrEOfK$I!wJ*u20&L_a+pUHQdZmgB6 zWnVU0ro|Cq$$OcJJLIv;4n%!V{h&^pHX-?ZRMD8YCX6k+yyC2oYKl8=t(2?Wpr}UGBv-m-97!vdT7!kJ zL9;=@gDu~@N)At{B$*>y$T=84(~Xp4PQ62?-dmVungRW-FT z;rT4K9?n6}T({JxZ%|xes$uD!_k=GbFLYpBL9Rh1yNT<1pj7P|#?XXtPx4_EUCSjo9`v)P~gII z0_#23dH{^;2gjS!33>^^6Y?4P03hBa_ZKoU9$lqs(EsQH1^5-IQJH5uBl$p%-r^V>NfFT<|1>2$9c!Tm{BSFZO0xBXX(oWD+?VP zRz5R;5x{+5+_=cttv-VfvKHlhR(*P_TV*{HCppK>9VR|HK7wbWS1}VJ0V4SW-BC_+ z9J327MJ(~m-4U_(o)$nUA(XP z`vb1RLePTPRUEmSYj>y9D=I72M|=~1X&R>U zZ3?UyPMc@co%PN1&ERw=hZItVBBO+-{B7=W!MM2X_B3ljccLcPBr32`faW9N>6c6o z=tS|vH{n|0W07)^iSVcJHTSC9zRQvD#F?Fd4)XW+EW*)l$}JV%u$(B??qB2kaBmS3Bq5m#%0S-mue%2yK? zXE=IFG%h&}XgB7C;QV80cM^YCS{Q_SM1n0qBsAy zink68o@g0b71DFd3l*CtEJ}L`;_EfAcIDbx;!Pw$-x~9^+K?}ST{3tLCqQA?fl@@^ zuQ{E2%z+VsXe*CQB5@OaS!__0r3w59uAr`-RY5clo6fMZoiedltFwwiw}ck+;wj@e z0v=64O(JeAjd?7xH8(51PJ<=mIQ>r0bsh4Qy`UK&+Hz1KY?~tAe=*esNP=xttW05F z8VJ6pM;v>CxU$%m7~-aZ1>$w6ndLPBe^J+bK1yZ}^*AOhJsYX&JM<%26sX4peoh?K~&?i8o^vvjR;exu4SS}wH|ef)TGz!?;` zV2hzA3fbuAn47~NJB8^Z5s{ggSbBQ(Xdf?&lq(YjOVH$}Mt1U;(zWx2EAFUTN+1cxV>MD%D5#Mh`ZbLEu{3rVVM6gG)? zKXu!WLP8 zZw6I!cP`CxHuTq_*&(bh;uea70~V1bY?9B-=x($&x&&!)!=n*banc~9LE#`)y?*+& z=%#k19FD=@o8Yw38y2&dH>xH|c-#=-wht0O$776vH~+ZIqvM>Z4N&9ip;k zcC(A^bx}C|+MMjr9@K87!a438X!@b!`rvh8H-IVl4Rva82bU$#We@lQG2MgTDru~q z=lZGrYqhw?1M5S3HrcukPlGzk6)u)*nCZ=!@=4}{M)?@rrjOs@k9WMROi!i%hgkb9 ziGE9`Poh}1e?-N- z*;*QX+Pr8K%-oGW4K;KO%uK&6H-A}jAb;C?{#bJ4^{hXQbbkeZNBtfBzfJS!y1{gi zQ~&@jqg3*gO3HK1apsiE`>G$-JuXmqnTh-*tC3=(g-DhlqWuZ5c=&H3InLMDJn*tU zHhAv1Vt)k2ZD81Ri@B4z2}rN{Z+7*<+-{P%+In%=+D5edoQZT?U+A0_Tx7r8R}5Z# z!^>sy-iy$w9-wZkl7MUHa2IQ}6 zu(ZuRSsO51s9JaGEpx!B;)rq-EWZi9kv-v@J;ye`oZ|?}@?*+(&Qlz{j}MhIlOb4^ z7uOKili0Rak88BIkhhllj_RCD47ese7(9Ijvg7NRYR(0o03g}nU>>z;N82v`XZt@G)bRT1q#VZo73(8^K@OQTO`s$ZG$NbcN7D!wd| zx+if~&fHK;N>S*iVvNu$sF6QIYZl1C1x5GGC7Td>Ge!omz>$vKUbfmRhMTQ$AU73M zY<`y+s|@C@7_h9^cagMMkjXw@v1T86=ydoOK7{SYL79z3924x7MIq$#NWy2~fKAb7 z_UFsyD9TG1fi?jk>2{Z3*1?CIUv6OYoQ-c5xV9)&Hgf`w59Q4-%2{pStb2`IE~;CC z&X3gsooSkETYhS8Zflt=5H>kXL1@`J%X6TnrE%cg=yDjZyuz{uEwUeE`8d_m*LynQ zkF%>A5nBay^4dceFsd|uTa z$MvG7D-?-(v1x#Pl4yur&3^%!IG>iQThi*4wqm}@l%{eIx@SI?T^HVAN^+lciYGR#zWuV%9^%u`Bbt(@&FKsG; zvRJNPvG0K&W#Z3eh;L^3{+a}CkUsJRdNhfg1l-h#@O@DuTSnVDS3cxOe)&g)`y%;- zrXhz2cX}bg4m)~uj||?*SHj8&4YZ}AkDJ!nmw>O{+0kvhu6UXs5G+1#BCn&bccY$s z#<<&UoPb;^wo!CnRd{zzcI6!&F?#XrA3rWW0F^k(ApRfH?Vm*fI$C<>f214zZ=>t~ zlw5z?um7pVG0XeX{9$;@-A>%OFsvAyj)(-uA&B1cwVwu+uycS{Xe{2 zM;5rZJe*d3YOUREShU=`ngQKB|FXHaPzKFh#`+=`WrCL67<%2^rVDlXCwA3J5XL)?CIBY-fq@-x& z#LATxmczAhcD0f@B+iresN-Q4#sDSJQ|ZqI{$nBbb86!F9tc%FR3hI;`OkNr-2eYFo zTQXiS2iK8@XJ#XX$}wrmp904U3i0y}ON>??aolOCc#8B0ZqXY;dwlUiB+@185y$f z*O+$x)Sf0*rAqKY!-&x5-lzIVwKiDo(F?#AIUk>s-0-X(j?D0S{WA6)hf`O81qY10yJ-i>L;B=!g3C!6P%c#kW^6q{IS`zMFRtIHY;5^btBR(9jykw<4Pw6-l*TV;US7*``zZ3OEckAmQ5#i zg$5OHhE)c)IW)p66~?x2qa$}WT9$p;VLc3oCRuIDyeq!ox;z07F&NzFSx=P@=c0VQ zUF}lL;^L3XhHMedPAeKozVSNc@t^wvrM1lL?Kgkr0*lNKV2!iOXZf`yal1CZUn0W6@BX1Q5kaa!h8S*5n{p0D0w!%j=th|~(X1#lCUzSs{?wVD>MK(DgP zvhD&JaysUlDrA%4uLD`x)(*UnN_5 zNzX}x$YPae5MFSNLs${3cQ?(i4xV=Rx0G~m&$pHx9^4Ba>v23>9`eYCtAQD`w_kW~ zLLW7+m-s$!n_?d)GbS=TobNMNcfTTa+RuYHXY-k%pC;kdA`ONpU#5;}jQp1{OZ%b9K1xl%UYa#XtYQ$EA53c4!E3`Z`%<%{y6-4hV^#T5$+l$;t@e zT*%L=NyULv4hM*?5%PW0F~KOHBCoPYwT}0BTuBT`6#GZ9VJGX414yGDMS4|Mps2BN zyt)Sm($_5R85L|let(Q3RhWM3a3uQZF*Od8P;m?#{`cgE3yLoHCa2hRRZVGoPTA%a zZEFn0aZ$f>Y&eNublD}j%Kid0?$1Jo@!hYRq`W*&5RTjShBk#o zK3*rGquOgRu)P3s$o!kK9fRW5U1l@ANepTG_iujA(5%6Iw?pUCT6BFbMJ13Oiad@M zIFRspg!A4$`6(eB_WgQhhxFYfO&50j>RpA{L|ygf4QvZ|;B{e3CdlJOqR^*`ga;13 zU0Xt<{dK?RF*}4OAP(Ok^1%gCa}GoiR>{i>Jo}gS zjF1xolU~-5wmTnn18$P6Yawj zB!y=Kqh31hJ~tEk4lO2P@M&;4!mwl!kMTK`&+x?)m9apDLLJXEm9Z%>z3In^DV58U z4N!@?L-25u4-5P<&=ZGz9#+tcE~&B<`s$GiikZzvM}`hb0%i+arFG0=bDYV0afbd? z$%IaSx_h>>B3b`6&&|&LeMV(b^c)@wkm~+NIT8$(mvK(OB&VdK;5^F%UR4wGDB{z1 zvwVXmSkvKLrZB~DMBp}bDh1WC;h;`;*&KnLYge6O+gGzk{OK!g9%(rw=%TL@WtknAlt@{ z7tQ9FnKHi`5@zqr*t^I52kxpY6U}$CAG8(#Ck}kdqR>v^7>YWaoY(KJzOi`j!k)#Dz>}sVM*90pSp-7 zOKLXw_9obZ19c=Wju{1`mGLNG^lc}%7;SQ-^yeGv|4|{QwLCLw8OJq5F`^8ze?UeQibeoQ^pRcCZK{&IPB9 z*}~VPyD^#c39{4%QbeAp{saShglc61J3xUW9`zk-eV+qV$QZgyFf4l#>EI0?3-)^Wo998)kDt zmwM-V!)A(!9`}*+N~%1>jrKgN1OatY{a3}ffYYj;w)3xNJ@tp!M$;W0~RZPks2%#Q3Q^FA2rt6zKLB38-+Nv zZCdrIMM70}RD5IOq|~V8>FB$BO+pZCVso*em{7GD=NzQ{?wTyCmXHyn((#zh`u+}f zgS5#fw*Kfww~aL40cErEeiYOI#+arsW^r`o*>9_DhE#31hMJ3WSFt;4^OIRP|>PFDkOj@Xkyd$b!H!KuVN$W_FQ#e zW%X2&M`1+q5mN9f=!~ONAQ7Sa!!#lxs$yvB`uBe4An+A$HPLca@)tusK)NBjOky=? zi2S}nw)ZyDt&0tQZj9jPd*2N>)AI|grEa2|w_!_{lxZ)Yx^F%U3yKoLwGh;IsDaof zjvO`M%)fg{gl)WUZHN$y_NGDCX&1QTX= z@(Ql^bYTHxB(@~jgiTXgr*xyz=GwPY9JFHb@D*Dw(0kyR#DUI$FsO0!Ygw~40(bQ^ zlNk6_xvC{b(y0kZJM+(9)ztx`uHal%7U1j`xU`{_w#8BW^f6IX@tl4nvA)n`MF6W* z@glcFayIYS+xI%;njGD?KbWU)SZmB5C_}vmWl~=BNW`=l&DeP>qUw`moKMHRk9?SN z?hZ@5$JSJL&78JbNi4&93)8?I`*~fguJWiHNe=9@rv@Z!5z2bUMn#ubDR(*NWD(l( zSgVrutnGbShpqs|upU0fCxD7rs9=A%q)sn`+Mq?ZA((P_Z<^#y#VdD#2 z;z1@Y0GsI6Mj?$KY52R<&gA9ufD)K_UL;9wRfMXU<9>Qgj`i2D?F>o;>Npd9Xxma> zusdo}@3L&Z#kYEd4{?{NjiNOVH;ZMy>#td53(xUfRB#{Lih2cJFDEShV530Rc*Y)R zM=7ivm5E2G5r!+8Q5EluiaG%W&;lR&GLp=Jq*r}+7H4zZsx&nsv$!j`iyDz(upEp z32`r+zRpjj&biY)ujntWi|M>9D$@q~gpP7QPd3toU$|~y&S!fiog3khz7S+UzPvMF z<@gE?Etx1uT+JtWT?Kt>$I~w7uq4bB@a)GZcfQW)Q6-TybB<_004;oqgl`=!uP{+b z9Ig*K!`{KJKv|Tlo39VvdCySedDDR%3TxbUbk58LJ;j=NNqRaa_3gWA zuRwQ-wNC-r^R0XCGa-oh5@%XhE9(TsA{Hy7h7S&^FGy!fa^ zD74AHhU&~Oq>)BrC^hfuMpWQ6^m9m==Y7a!__2#4X6F9vmm;JO8aFMZ7SbB|#5B{o z^SB##vtuvfZF|mADPo##!!M{V0vs9OWKH6pCB`^(dw!=&%o~Hmy9oQ?GZD#Hx zJX5T`Fe_jV?fb5IXQf6`_7Jb3{simbv^xqXlbs1zPzNUXHPWyaNs_S)^<^VjXp*Z( zE#ZE^?FsYMr?|y7wy5ctum`Y-%y!Gru@z_zcw-EH5f4X;fe*ZR$?-QS#$-!TPC4a7 z?{j$4Z`2?`X`xPbcK-R4l#wl_Yn7~s5z5T&WHoA?7CG7%+U>6I`|sDOCO%iC_be0b zPjfQvH1kX{1{>obJMa*mYu}ym{bw-lE_O%x**UW6ISFn{3QN(p!wzl?-BDT!1G0xc2tibsOSYerzqE zoL~08M}RH9J3ZxmW?s@ZLWl<%RD0J`#b16r z+kSAPQkZ+Es+2r2s^GtfTg|+@Z=KuxkpEKHz%+9gEOKD`3*hT%Jt$)LJrwf9kGq>- zQ1)Y>CjDOrZY`Sgag2(0=+^6AM}7xSm~m8-qI@IsNr+Dtw{6WzuW)JeXc}0huQxh zOV=2rNz-h1cXn*s&)BwY?AW$#+qP}nwr$%s?z~^zKh@dQ9nl?KRi_fCGpXG^dm{T7 z#yLo94Rf)h1B|EkRvmML(+<}7)9DUUE6v}a;FEEIHXaQ7_}ae4;ytS;&RFzOK0-G2 z&O4o+Z-C&CyIg9D9niP9EF_~>{6A9`_$C?eke%NN$#!ql`!}&+uY&Icod9}_X)(c*mP9hXpbU zpF+m321Rjwps-uFoP4ferQmcU;lp4V1TJ@=%3s_j_<-&^R~Ys?_(y%77fR4elp|J? zaT5(5R9F|MP}pH2Lesf(Qb>#785`{oF7ZhTV;8YEJvi><*RR8|i?^I^ zpo%^5pLo)FFX|eAh@>m6cl-7ws`^YTVmgR;*TB;E<_k@7psH< zZairV-kQH|n4?(m*GqergvMJ-gF0LYi;+s1&zSs}gQkq6*aridvH7vH_3ZmPJk)VG z7tR1sLb*5uzYw>^732sZerIj0kZBh7s3glpeP$7-;_SY zHRy!Iens9i35Lc!zS22SiqU})!RWefe5uXu=NaU)8rF{gJI)EjNHsK8F$bKm>ibj0 zGHON`%PlI1MQ#XRq0x%VjY=e=cd-Q%mL(RHfb)@X!2n(P|D2RTJ+MNxz($p5l#Ffc z)9!Gy=Jg4(2WVv)GC?2!iJG|n-Dp7C@>Eb0Qdjy5vIZ`spgR6H(AS9{9(jp7qmCrr z*NWelTd-^}3@zPZp4}mPG3YK%$iqiK_g&`(c-TQL18iW|>RKY}mo-6herQ>l$!J?O z)Ixp>27zOO@^c3qnVvtLsoa5&Eb0VtLAPJ>=ourn5c!MXo4%cYu*w5BKlyKAE2HXj z=w2xU^Q2$TE9keHc`^_odT<+hyFQ@&s=s657t%WKjS7s0jf-1xa0gS4v~MUjJVp7W zd@M)69M>)ec#j&5rBu57f-z+RkObPVAAD-%~@qa^u5aA)QnP{iGIS z#vDP?DUYVJQL{HaO~cXU84UC?Q;t__EF?3N(M}zIf0yH{h>o#|2+EMkv}N$=Kw-Av zqrN4YdOm&}8f(WPIURF}&w*snv$@T4r9F=v+XE2Yhaver6dop1x>N{)^%WwyTBy8Y zmNBx9inmhV0M=EsPd6OUM)kvmtH}i%=Y8BK@p0`6FZTbitPqrsD<9mMiQGq0U3AW0 zsK(^gCErq4byGFIjq11<{kbs6}tSzrx2s<$j z(0)@!Fp?lEVL+63$h2I9U~~DUP2q_{sT%a=9>CNrZ*TV)884ep82wA4HsqKy{oH$v z7j)WaP`d+}MO_E07pHrrp6KoI2Of^#7x<)uoMOwTv{6>Mu>T6a$Afy947TR>_?n$( zd7d<;Mbk@Unv3ht-p|w1&;n(EPXzW-CMgv=loJqNiJVVNn8S9R?Q;ZvS0M2z(%SfIFCB)zifrR_$bu+vOwt`&!sG0H!3tx+bV|fl#RbKaD(+_q1+M3XxCoEgaXyQ(PvC4g zPIU8fAo+sJmH2Pv20}?Lt%= z?b}RZq}Rft;#&0DChvP2#mc}695IJ6!{xB%>&eg7^|bHA{U-V@v>hlA2TL2`IItwo z&AV-=S8xOZ(*BbUW8y_YZ^Qww@XPPI55)ZrEqJ8Wgk5<3>~i#X@bBkdzGY733^5}M zCzUc36wZl0aKC)ME-4Qyn5}MA&FVLgi1yw!jjqB zD28q}=MA^jkSl@DY^P(x-Ajql-YfLz-c=e7Z`I)|fw#dd`uX2#%G_VN64>42-_Q@* zsCAJ0xO3i>6u93tg;6|;#7Ykc`QK$JLhhYe1#Z0Gi&YjM`AMTM`7kxRL@P#q$Zmkv zaJJD&j&Y+=uAy6Qz%y1+UyqwZ;ZeIN!Pn(x0@q>CfO5aW4dR~&%X5wtqhF=E z75Qq{W__F0k!GYP4hmrGhzqMfJ>>2R0=I?v;)BXhB}?sN*o&g1oAhOiRm>wS+q#_| zh^vU!MQ0J*EvX3I^{QQyyC-ac3fnd6`cfQZJykIlIJFzyqZq9)B2Yi08Z4InH@MfS zPxp?v_rIaF+1|;L#v9~iWOc|!W?^Ykm_WC|#p)_YP>r9V@ZA#puLw19uU-v~ND~x7 zG%xvt(P&O6wn|GUaz-yGcD$X0Rx^}sQ(v!#!nvQmo<349eD#fG5*c;3B zpN~kSk(%|V*;2eJQTjn0(8ok%jY~6ss5|VZFM}DZD3`EFZAP{}b?D^q&GZH_I5f(1 zSgJf)-S`~Z6>d_=B>*Xu6Cy-}753?4h`pqczd-YN5SyBioIZ6yDHYcB;H26l7RE-&jeFyjr zEJCl%OdD{?{zvdlSC;t!rR#Gk_QX03aotvb3KUX=Y@AV75vlf#B2iP6#ZEGU_JIsF z9bvZd#`k#Lnfb;(Rxsi?WS!$nyc>v>Q*lQvBonf8Nke&y?F<=>f_bxYh{k+3G<>}tB+I|EnqOji9M(KyL2@KBxCKWp?|eUX_xuNlXN}`4 zK~1eZ`gBSGqx!M9X)ZA%|8{F?P>REQ4)7L_B?8Sk4PZdQGnsGz)cIobyK-?f!T$&| zsQD~vY@!Olh_^I&YU-7y;S+QuY>ARQ9rCKg9?EL{1v8Os2XV0qPW0{X`Roqvc4kef z_I%u*{SAHtFT*yuLvdweT$F?y68>wbU_nK{-g67>>4 zTW@M28D=rk)=7r^ixG!TA|?uR)VSwxNQg+*P0Xp+Syxw<|HfIC%HyU$i6(GKpL1d@h~2Y_U#x zjiM_pc^ZqDLK)2pnJjeu$wp>SYyRc7-6t#oeW?Wz79$9M_bP*p8NDVYZqb#6Gz#jzW4ykE_<+aO~WiW(<6E!^p9 z9D+z;k((@2PPr=YP3Gu;9Z`FRONsum%$~}qle<041MBTa)o<+sP!zP__H-80`?sIz zl#=1oMqiIUKmdZ7T-g$?m^fq%36#@)nY*7yg=1#|tp z>)66%v_WtimMQDS)n?F)zVftIo01wIv0yvk{3#?t5;I%^4l%r#Da|GD_OWl1`HFSs z0GKpEyUXJqAHyUWT}9+WG_?P?Hi3~e0d`_zS5Td{;9&won_M6J&%Ay1fGE8>=Qd1} zytZKEGV1J7qjsJ_@OpB@$WRRQAg-QwTUVF1g^D&A>ho_A?2GGtpZU(1;+LU83U!E6 zU6}PYm*b+BtI1E$>FX{J7qvy1Y}CX1Htpd?KulsK;jVfz1A2^`eSkUTSZRnjtt&bBhhfm}=wVXm#xArA*05wKgRGDeabW}$Nn=Tpa7ca(gMY~gMTrroshipADU9Sbf<%>S7Yjyir<=v^y#v6V1TH@kxQ*|67 zvF^FjG_IG>htpxTv&P#iJMv8=Y@F3tj;4K_0TXJ8jen7n-+4|aMNsElV$;Ki%+m!Y zPlem+3@k=oiOfqXJ;*|?TNKgXrc>8s1;`4w@OU?8^wAeMh;Daqy*-;Y&N><)ylsS}U0 zP;Dd_B7cC?&d8mC9b5Mj9#Pi{O@& zuZ_R?y4iW@05Fv#Y^gF!2C+a~A*yY=BiCl9$_+uF6zwEopUX@+AT0U*Y>o9ze}DZ8RMC23U~pWk3Sm9K+w%*o zMkvj~7Ze+y5@x;c75{N}z$tg79C5zYG>iCO2IQC07$=a1dN5;ww1b0YjT&s418!At zTmAi$UpUEr^c?UnPpb1Y0=gd?++%Ujyr42Ij2;R6|Fu|4p*@LG-58x8{XyTqSgAXn z2FuTYqP$&!LDESvHa7~1CaqL#!t~1E4*bZ)L1n?EJ(02?Ff3!drpSv5QiY_7D!b=e zM8#iV!gQ5q(inU7O|aU!07p8UB2yb@Pvg3Kw5@CfDj3bE65}O0mK^(djI6GEOaNu2 zy+RM^FVLipplm6(SZ#1?>8BJ;Y=$=FhX@3BOT)w@N z5zn+h5d-GlTAPVBU?gt{e=9y^z}sh3>JL-Et!s{OaJQKhaFnA^9$_p+D_L1!l3dM{ z%dJ1-kglj?BcbLz0PA!yN$$!$+q^8`*|qVoV46v@@BL>~o?7v#dy5$rsn^gxBJF;g zM`4J(gB|Mm4KzZ&EJZlsuW2l>))`JfAp9ee99f8#rBRAWp^n9)Td`!&?T`0v>scjF z{Dj%0{xCj9AOu6`q!&Gu#Xg~PJEV-$dKjN08588M`*9u3V#S^!f@AnwF465(v|xOe zunQmjqMbXaF)CsHM+DTrL6`uQ{Ui%~GaQ(n2$j7D2duv5fZ^&O&zy>3I+9o#cN;TO ze(n{)osBCndQ%Gh69~Tv819nr%{mSEa@r3F+T)g`abe}44|Dz}Sdl956LnLisaTC< z7{O^)D3XtP<09<kYYJW#ZXt`rs16IQMB+kEWrLE6<(iZpE*#j=oGMleq9>(mODT|S9>AlX)bU*aCZf*(x>Vzkc zzJ!B+RyQ%`NzLJp7(Fb&{}{f?++LpZJ&}g3qz%#CD6b-Yd!t{eGv8(sgc#Nk1tEh*o zsjP7(B=F&|*1lW_{+EG(H9jrvFA$bJ3$bX#w z#hyD?6Xsu3)x`CG6lLxh8Ne1HZ$G%upi|EvIti1V zJVk!NjecI&GRQOF3nKQw2fAEPqrV5(YdS}2VEKBSTNDeY*-&Y@g4>yuBV9WhcfW!r zvTuC^$4`2I+l|4%6G zM_Kvy{BQoET;+v>3s%h6lRNLK3^6beKzrfn?WV1(bbGy-cRPQb8^Uo=^*)2Pw6$O^2W_SZrUv!NEX7|^&04<3g<@J zubISI@1I%R9f?WVX>b~DqoaZe>5hh7lOMzLQ{s}}DKV_g8WSq^cFuABlblaaxfCh% zx3ydiBA`m=*?Owf3L8IKw%izgg^OHn8@dSCqR>v(CY_X^_BgM&33eSv3x zK4CeFQP*z(FIJc4CeHzZhW+Pij^9muiy5gQgrO|q2dyKVS}cD-q?80p21f_m&ju1% z@soj^^OR+u4`V5QC@7gWtgSJUEv)~t8t*Xif@FiQ8mVvTlQ{QcOq*dP5sr&@Xy5_` zepY@9H$Xwk=|0*P7))cX=1Yaerza+cs#*G}mc_?r53fnQ_t^$0Bmi3{rTZ6U2iO63 zB`FtS({uSZ#cdv=$Ml{~bQ@J2Fn;XYh?d35U9wx`?C$A^9S_km$T8u&evRWE_T}ASvmMgMvNq&_EjV$Ito*_#L15qDi1g>)E zo8-x~%ZDS@{~+31@~llPXY%K87WdCiZlW0}J7XTUr@qH}n>N)d1w(7Ug^H7b=hk}Z6f+SOEY zcK-%zzW#MoFiV-X^d>Gm=pdmt?EyEx2(l5A(zjSz$we8*2p+PsE}$U* zT?G_m^Dc|8c!qq&mF$;b??0B*K&!1xNMePqZXyCe)*(Yn94&5pC4VA&2(}ZsOA2|% z`&C6h^q?Ic2`boKh|?##6KM%+anG>99|F~Fp-5r1j7uxYl~f#+fNyi`#H?&{!H`dq z1_?%SOtH#VG~;am{7*83lgu3Um1zWHo)K^r&3)nP!_3;ruI%w1Rji ze%+RAwoGR!qGACeysFl_biy~aII2nFnLTJ*>x;Apd&k}3sQv8zPCSM``hs|lz z@+G&Mv|P}V36Ns^&FJ1qd`gTB^YrAXocS$+ew97kcdjIYcxvr2+IRhVy<1K8n>{hYKWFYPIry4wy##QVWa|BvK!(y}u9S1Id z(qJfqW9)*d;V_9+E^aj_Fdt3nb;q80y&rNlyp59+w%m%bo|CajDrS8PxVBtEfGOf8 zgJyeWeg2-#FgKbB5uq4MGb+#0x9984k(6S$i}b61+b0^y&D*^69Rn^sbBqRPA?HqN z&O?;wK#9yd4s=R>am2CHG89RH;tjB>5lE#W4SI!X=lw91Ix(foh~xYT$P z5qu^=y73`rWO?)#CnS0F6{r7~7TOFcrVP39ekFhR1>O3aad?pyOwsVX-`(s5N<@Wa@&$L1lhOX1WY8 zz)McH89DJ`arGxoi1N^GI#9;Hl4;ENxa;Gx#$^+(hr_-ozf_yEMLHn?Ao;!lY z5S1ZBOC>?uQCubbb%$s>!}qII&D=)`$=7e;aW&5N^j0NRhHV40&V-SA(;&9e^@~WVD@+tJYRB@N3!pVL!pC}5>ibTr#bPs{5<@ke`X;N@XuzMKP2W(A@Y{kY5eEfx*FLI| ztJax0jXRW&U;Yv}Y@F@|RIu0ybt!mEJi-EQjfF7NZ-7tzdAk`BM7eFz^Lw-L6=RGN zxeUPQ35}@k?6D>(i}BXnZv-ac!tgiBa?JYxwN03J)j; z-b&<^HQ0&bA9Me)CQQu_?Q+=DLq2a6T~WPhtzQVoqRdrQgzE z8z^E9ws1TGz#IAncw}1AW3U`TR`mh5)IddC#z+Hw_G^KJnF>M(4DJ{`2A6)^ zaYA=*G(^%P4;7qLQ=qW1W_q}(gJe@4_7e#JYL!Rq{U_Ih>D!!n|F_vpb|MVhwvA~D z(K7*;a9tiS^-h=%iD-L5%|N%5y+^x4O`0J!K1)bJ-32E`M7BYIaHDljAg?MvSbY8y zBl@Xcw0_GS-T|e#8&-P9&{u;9lg`tue0y*mjU7U2tHYncdL`o8<< zo|aNNG80%bm@q(OfQKQx!QNS0x@#A{E=`R?p52(A5Eynv@RH@Z#^tWsMY**Rt`6Pp z`4Dd*gUFsV4&4+Mhji2#*id{iQb-`_q(}D>x*=T4ID~80f7GmI5%%Fhux#%oT%)1; zN`=N2?$7(_)LKLE;yDpH_e0I*+uCrm=uU7McR zds*tNmimF$xQ9oR{K(O0RYKO^I-gKA{N6>Tn(+jMImnM%aIqdAG?5<1b@sz5niDo3=^lirm6H4h17wKL?rGmr;D$ z(~lT(4gQo;RqC@R7-!$$NirNvyCxft(?Mz3HA-UAh|LPc0tti{6Y_s0apYFSsT%~Y zF;?wAOwm_WaPllEd>Rx4Kw<6AQ0fZ}pqYgoI>nLks6aNB2nsFQn9+u-Sp*W3i(N=} z$>2e&o~G#sHvRG87gh{sYNYRpm5&8%?MEk!MMk#l{@Nd#s_fO_eId3*Tzkg%9e@w$ zdQ&oRC*#2S{oX|ofgvPMGv+QbK2*2vIH$}55=cvpUFN&B>HuZHFtkYaF!@)%78&njA>WRN7C{{AN;kS*d^`Ak-;~l;@JHY6(A+|qY(d**=%6G+ ztXbX|q?{wxhoAsxBK{=X5LGqK|k~C!!bM~ZGyCHtni;}0lB|qsiWxv z6w`!J4pQkh18itx8!WS;Nj(2~{Rv!nuW-dF<54hn+sS3z=Pp06r*xV<~8_P$Ug-fof7647uNf zdCNaTi~Zdr5?}vDc!tJ($p)=soqJKA+27PG`_u}p=##T?L6zufCScGkdU*a8Bx6|u z6i^z81L9O7iUVjlz(6-)Q7?Xa7Q6c}nZF-1m)C7iK+q=&Oq5CK(fNnUX$v`F4w47a z;#U(X*Q3#aPLpy-`|c2I2Z8C4$ZmYXm|-)BH#m$8;ua?wYB}|u-iPOV;3MKjERgz} zXglJbUDi^GZeo3l|3%k|?G8a5rf38K2=KeNH=MufNrmxt{#+tjqH z{kghltoJR;TGuOXar$*A9=x=QI-l%AG$vuh5FWKi}*zOQHR zzJa%t--yszCdg#w#?T;wjYiM-{?5*x*Zd42 zHmI!8Jb|u~tvU)rcWLqz+`AMT^+dhQ;9>ZT#mo^o3S~UNO>3ouZTvo=(?!q(EFz0X z9G@VxqkOit-c2%oP`-k#Ur7txy>%ZuZd*_kG4rKMrq!+iRh@J|Ph!%oRcf|$%HFA< zL5~EnsJ?R4gP8<&mehd;Tq|#3{UdjElfn-yR{-6ja3J?YB~ZJv4G8nl&Xr?&QOGY{ z&35)XYpCryn!_ZE0|~`$HJ9&r0)LfV+T&B1Job;uudV<?NRUuRT=U|GkSzZ}uWpX7_l#+@JgYs^YqJ_A{}4W--ai0fh)xK9o307~$Eevrt^j(dXz?tBXFd_*g7kSv~4~fCd>f1CJZ0z>d^B z=p!y+d!M_y*Agmac^fu$sLk9R$ z`TewfJ1PGzCql)BDdIQD+~~gMs^DQ;L2wME&0G#D_XP^dEgBuV9cfVyp@4QJP5)~7 zJFRUI7gAR3O_eR(0@-TP5H1x=e@kVSqZq@HPj!ltCrJ;ES}ldiMa41a|V+ z&z~@MxF?bY`7%m<4bvP1;bLe=5)pEMi4fL66;lMp zNFLXK13w?x)4ku#Irx-Z+Y{_}pqYX2 z$dnf$gGOYm_UWr<;dv?U9OIBpS;gza^tP4&wDr^zlR+cCl+|S?zY^=U$b0^l2MRa zv1A8l7B|uh7SNGT|LclF^9*3CQy}5Mm+e@T$G4+~TLE)8hBe1QWN}e#uZkr&R8H#Z zA9(6JVhvYQFG-LTj^RuLN zKF-)fm(K((4AZD@6{^zgClO0O)VwgNRoaPET|T?1AFB40@BaD{=%5i*zH1<;4+CU{ z(3Ad?hz$=rm`F+FOJgqJiB64EGn@h5iX`H4v9&3V;B-g(>Pm-_H@KDqiInYdMo;j! z=J{zXWqukOb&y6}&_@$Ru+yakGuLB8r2((yY3joYgd*2yZ4?>pF0(6~sm1H5;^S0Q z`oX9?zh$HLRo0t}$J5u>dzR-%83D4^=<+jtk5~Sq+rxft;SNU2vawzFMT#>*WJV;2 z_fvt}KaZ>=+pY_nc104tWuS8+KY1n zUkIb>-vyT3o}p!b-YWb|ZXp11d6u@yX@@1T1ykMDvo?F!9{Y~#Oc@7F1nAuC6Gbof z!5Zmzn7J#(`I|%{_h+N*ODqM_j8TF1EbFP^4p~DbAtV)@q@s-CZCj-`LhW?GQx)Jz zNtje}q^Jc&Zf$8}#N-D2sJ2q0`Qr{Q2;MV~&mPDHA!k~(oqVY1vxFrO`EJQ8gEy7B zRK^PkRs-n4%GK+H234lWEr|(8Qz>~qFaNv^SjbWcK53!>4xB2TVT=kSfb=QDrWMU< zjMD+G-pK)qRDWJ3fzUx@XUqjLk3`y-`13G&H^3SGK)>z)L&KhkL{eqb(Fh{S4PGf+ z$P!)wub&C@-4p2-g;XF%eDBZybCjD7djZwk2E2m%vVkRS16#=a+O|-0$xQ@uPo)BJ zV%iY?`*$%w1m~GN@MgTknXEznj4*IVXu}xnV6^;77Q$;&#+98E2ePMVIH?E3f5|A> zukHp;1h2&?hozc&Qb#C7JqJV>UC^O(;}W<6P9AV+L%_ECn9BR^g}Z=)CGdh{j)O)@ zej}~L#T6ySoE1>fn&ri{4%?98!4Ns1q%#sxpY?)@W5%9=JeLM9wN;5_MfwR{d5$rS zfkFAex}lZ)Ryn7oDblY=Kv{bJg*Mz%qc@iBVXP!y0Y9u5AEwp8-Y(%mvR6@}Du(s` z8jUM&R!KCwaEs`5YCgXQjj_7bOw)qU?^3!r+Wo`t^Sb$))MGu}XN?w=`FX~6_!<9= z(yvM^5b=SwPF-sW&;jqC*A6+{J53|p^N%24>pU=t`I3~!d&(LH`|Hg^d2vukP>)%}8--)YkIU)$YlrL$wGeXL%lF0Dg# z_Q}{$^H%&nGiosMR9p;!c}4sH{Tj}+dei=oAI@y%wwug&D}LPT?!%@$f=l$H`Zc+; z*E!eWRJ8uD;asKr%XfzS@`+c&hlF+>iLl9J0Mwek*U!JpQ`Viu8eatHVrO~P2K21n zX@*?iABGNz9NiIoY~PuqlY7V20If4gg?pUy<{yIV$CCTlR(47B$@0Ii(yS}YCF{=e zjn~TOgh}m@J;}~-I9r?ZFOB~mZq79cvKpuCH+N>Zz@ z?d{!zM}QXxlDlM4B0zhp{ZNmgD+J+F;0=5^?|qT-j{G@lUoNQKY$(9kAyAjuYgD{L zDtk*&1r&t$)usGQ>T^)Fn>r4MWN1gPF#p)aFX6ul!V3P~2>J!0wxcSq!5>G|8V9+~ zThDUCEut>3W^;_G_aBYs!=ip^;7DR4^HLpjQ%TAAJ@18rX5e z3~JVnjUqkcgG=7e)?bjRXd&xFp}?w%h3LCpwaBTQ4ai~=AuA;J6Ybm_b{*FKw)oU? z8@;-LfIxU>o2um#b*EiEdz_8t-8(%Ufqb0ihxFFKtyUC9=WXI4;KN2kZDWVV3b^+d zeEL@@)o7R8+uQY$B$^HGURz}bL=7cY4?lEGw#p-`jnTiZ$N4GsniOTd_Tl(KXXm0B^gP_VaI*SwtQ@n z00OaLeT9;%#cdSP~QaiOd$YDr$1YUOp;9kOzJj3SAKoVK&A2{?hPH! z4`=WXb~~1T?HT1dohtuOf}F>y6cy|AXfgZhm$W*#xPyC=$9)SUwurunjrp)mW0sCT zpht_!bWOSK(lqV#@233yGjQ+{50ID10>06Fd%HhNuEx>&nA3Vg{X{1f?BP5Ntw6A6 z@|@ysJWns@KrHLuVsM900_AdsX>!&lvpJxFU(T!~$w!yu7oBwZT4y%yaY{(YH! zW7z#%#rPwZ9dadkZUtB_`+ba-i@Y!zdI3UrI12VzjmEs@_=il`r&fNIp`N)8ze#45 zxdB!CMGEjWSvJn2LKt@DA&i*SAVdQ_I3w((!CHOz z;xm!U(L#6(hP~UwSybG(oY2Xkto`sz2g5fch6Aotmc+LFA^d#jSd~@dH>sg1GJm~@ zCT2^a3_EN%scy?3Giv*Kzk5c60h*Y}=(VsJ(5L#|on1Ly<6)RdfXP+|=GzV4hp-e{ zure?$VMiS6qcA~d*CV{}kE>`oX$DcU;ABdftf+e!WVC4-?sw=o7SxyAezYWQNqWA5 z_5jnp6~{4@^Y_DK=Zw5(Jv}2;;j+(Z)YJhmlfRF|6(FYGS->{yRs>$tKTu7H^_H~I z04<2R)gZIhXu^ypYzRsEL%naP2Dl!!NFpVRw*b2@EFklV1h#I$v5`l$Oln?9q2^o$ z(#|%KTK)ZxJ6d`8)61TQ@f^fK}Sf3$0pzLJ;=Yh8Z@vqA?DAsVVt)F#vZs}%-Wp!6p z8IiUWJggF}6pagdreq<15ID4tDyi1$7)bpd>`!Nzo?i66@|w@e;@tiBoX+X&MM!t} zw>SFyf^t0DA}^aF^l`{+K^_4!HevNNmt$g+ONjKruk1S}fK_d}OXZe6f9=r)HszrY4rS+VNw@hI?vS3Jm?4SNq%vQZ1m)YY|* zw$vBT+o@RJ=yx*?B4HJh$gqg2)SuPig4ecyZNtqU#zc!3oPnxo+9Ok2nF2kVC5K7d zq$CO5;O?1iD&3;s8So-NWNJ*XgqQ;4AAO-t@0Q!hWzUIR-7I+znhs+p;C2L&*utS! zE5cwc)3X=?6mdM|Cx$i^S5{DQjWBe*hCTfkKj151966qSsSB7C+tUVpAB-uayY|bXu$%u87{8w%EC+gv5zZoXMru zi(zEfimheSD=k2pPFIv<>Ll}RHG9(677QqYKTNxws;+~Cg5bJywJ1JM9WiB|8lcBx z227ExzEcAnOPIm2>%VlDHju=sEs+H3BD+FRj4ksxW~POsORhB~U@d#ti!Jnfp2~WrF0?De~nKOfEYmORU5V|cE0d4F~OUQ|q;JAVId~dEu zhWYeoanbzL12^9oHa=(*?_Vqpa#28Zk*2G??8*>54RYvnNQ%3;Pb>>A1}W7sXK(SsU#ZPi;Yr4^veQ$+uxH0}APn zDbKgJQ_y@VXOs6NA3-FWMH?hbcNV_PUyHaHT(WsCXX9e7XBHzK&mdhccGcJ2 zS^3-PSAqFF=r+}?HG=>m$3)^u^Nyu3J_W8uLnO>kQ3a127t|~zziS)x4(|uw? zJi@R!zwUz&qn~Z_6m=mCQs@uEmc5l?pW_~)8%7UG#2+ns^(DuOU^T=-w&n^?V$B%6 zl6=;ja$&c#`Ux1Y)vV?nKEX0I1}Cj!s@bbjm4N09T&<3kF;%-8DmNx%S106kW)o7R z?T(VbiN^7TuG450Ox18p>uFX!b1PiCscLLD4w7xDRFP`|`e{xB(7fO3JmnLFZ0Sxu z`&&-VX%0^g15o z`7Sn~9Q3fIdu2?8XUV{=upYXYQb~Cauk`mHA>*r%wG)@@G#|SXid4N2TOXnu2RqaC z$*S1H@ok>zC1lbyidH|e&4KXc!U?7XwIw8u!0QA~aH;%;v^j+4YGFRVxBt&GV;yz*KXad>EHGb8kj4F`( zu;xqET|!rgwtv}Bv}*Tg=raGu+*?3Z)pc#dfHX=N=r)hxtEM;)U7dbZ7^o{a?q@0X&w>stYCr4hnAW6h@AXPkRqn0h4W^ncuQ=zD zZP(_%_BxM#J!!h}ovXNvjQ>#&(?Y5rHRP8<2e8=6b0;w(fr zq+n_|>MKWehK(?n7C%Gs*aqC0Tj9m05|Z?*PpxqBKPNZ6>C9k@bo4agN1pZ0kQ_Tt2^ab2MYyL3C-60Z3g zSurkLaCDhIqVAiEg{WIh$OvtIoT}mH(TlD0FLY~=2RYLc#g+2L!5>0_)2X&RtYFd? z1Q9b&)V+OK6!qg>EMmyH!?YjKh2MMfX6cidFdJWDo#u1ICL(ME)tyuvM3K?Zg~ZkA zu1R_&+gA^>9k>GmzfuTr_nBjzPss2z3Bxu$!mi6@3HMLUaP!9?KHO0zvT@2}8)hTU4ESBkfMAE%=Ge-YCcI^U`Zc!!zw=!&WRgFd04HB!25UqW>oIFuvSyQYPV`=3s|4ciQ?w7uz|3 z@HIE<&PwG3b?P@dqL9_0`#6VB7h1Xz?>UNUJ2dP}b~k+)z9pXj7zU^M1kF?$2lrQQ zANJy7QepHEimKV4NTOR+$j`-$3|BMQb4sd@-@2W;2e{LGqqDm=sr>@OWqMyZl_U8ZM15P+_@(TC>aSnRfd(K5<^+2K^;z}x@0@IxuU!`WvCh^Q4bRI3R%n6 zX+G6tBZMUy+BVN)-DCVr(ZdT{!%vuX&^E>H&KD7XM=Z@}Z(01M%!^Jbd^tSD>KgwC z*=a$%$jP*Z=Ay4C3`*bB;INiuSKr!_h=VAFjb7V!i0fE!55OoN)}OO>R8JGHALP$x-j;(m3qdgJzQ}}_67c=-6cBGyQb!ACC3G~%4kaBSmo%Zcm2@g zEK<~RH5{rBI;pZp3iTZ>wQR;;dw3 zWRsT1XNdqZW%CGH6#Z4{fx2md$IBls;a3ON>^-)6T!gwb3j~YES5BSA{aIu;1YId! z?dMo&gw$j0r3gEfaKK5bmKI#Hu@~0zd`@A2f5@NoC3$(GM&3wzSm%BY_cJo(daG`} z0L#&o;hayZn1~Ld>3@KwwMfcTR1ZzAZH?>Gk8z^C99&5d4c(Zqk5fA;lybf$KQdNf5$@_DVCOi|qA<0+%0S+z;QZd0 znn%%2hG%&-9#Q6z`-!0Tcr@@Qd~&n%i;Q36DIAuQr$dRB1DaN^Gq&tgKPDF=Nib*Y zIZ4foUA}*&z%)Rg6{_^^O?QNvt6g;Xvt$nc41GI7G;s`(VxwviL#>rcwQPICi>Jm5 z97H*p4Y@Nq>EL1V%-6g7FfSrl zPEm>gFUFL5@_>bquq)WVyVd)O8WHzNrlAbFJk=bE?Mv?C_}Bdvr8weJ!EbXBFE!ck zwyF8udW&Z=Asl-yhtQ*c%5CX~a;WGZDoy&3ogzja<2`j0mDh>G?sn?c7$;%F(UiKe zV+*^=Kyx=^O~1)EVM~MO2HDLpB(xV|Tu66EQrr0)tJHPP6YJF6H_pDX1y>cM_kFI; z;3&TAtvQd{=Bus3uZK4jkZ7ZOyIdM#+4^~(%1pTe{n8+-y=r%P`umADU&;LT4+WwX z{0)pLe#yT3L-L%}TgNMhE~8KIL(>kqt9MoiOYPb013q@MYf)v^h28V$!%$555*3JI z_4v_g`qk$znR^UK$$>&RuULtunJBHl+$Abkp6T+JK7UW=Sya19PsQex0_BjY=bnxiSI-)PS8bb&rXN_(rih9o=BKk}PhLIn zlM)M1{B~57qByR)>)(ytOHfN9y8Try9gB_>;l(_TA`RROD}Pk3c1j!FmpOiARD6B5 z*WHC|pRRf=7|_K~LRlS^x0f5wraR`#?)`h8n(J4gLt4_Xv2$~=|87*n{?~R7(8k;U zefN3}NUJQ+D*I3S`s>%OgD-yfuSflR|N6f)XutmH@1{8av4K6al%>A$OVDlm`d`tT zf2IDr#`dV7$Nhh7Y=7Of{(r5p{h!|SkPh~sx%|x(|K8>PPcwVabe>sV{)O2~1BdHp zNLjLybKEprl(J+a=eX|kg9^Frrk(xuZOSH`(E@xQgOzrOyPL;pK1 z?AbWk+1PkN4|{G79(F+gpa1{w_ps-HXoS*V#Reht|7)WD*~I>u;lGKx>Eh0D(;5z{ z#=k|xtnB#0;gt>O=6rUZb7I0fis2 zUI+&MGs=YL*(MtPLopkh-dm?g;MqPV$mBd3@$|RvLT+dw_ntvEAZ0;?5cMWN2#>g= z;dJc|_k_(_er}50H#+klA9&r5Z8+dq6t3y^nZiz+NyF%Pe09?6by%o-F)nDp z4Sek2kVA z7{xV3Fvyrb^=?N4Y9_j_tJ%Sb2pt*XK`Trj&>nu)o2+qc+m?aFzqyf^*Uo>cU~PN5 zSJ47oI{UTB+;BYi$#VQ*%}R#!!)pJv1-FV2Ac*z!KvLJ!)m-OUSP0GJGx0F-Od1af ztvlaUrxfedI*pdUw65EgScLhtIWD>VLSXj`PIf(Yx*rmhV;CsmyD=Z)7FT7tCmnu` z8n0WAh*qTlu;3=$nt4p6srAWXm$+h=Q+~v1_s-bW8=ve*)>0U08raJ4t{BGH;3~tm zdatMXwm&qC>wk@zClu>?T{xZ|`%P8T4^!x5`Cc+d&#yK$!I0;{dyBsmp=p9+jD{a- z5n_xLJj@~Mo2a&5PV68Z?KL6gb6APdvU&w*INF|WH0THo0#j~5V?N($#gG<(Q&haC ziY&i6&Ps8c>LV+R^kOg!TB!=Q0@|%I?b8L{hcxfPkVsv;C8T$4+dv|!QND0IAC}uI zxPlWtCr_|o2d5Yj_8=)FTRJv?J1Oz&(WHccypl>5Fw0V5Pi+ZPYcm59~hMDTj8`x39un- z&4V{oFTRbtw(&Ez?~c|6QN##bsY(wWly3?Fi{BK|dqO=>rYegKks&msm17o)w^6p* zz06ZEMxuep5tAGi@(6M7?^L4{{T|jPiigMPjiYkq_OP?C*lEh5P2GHkdB`@evUsW4 zb{ZORR_j)F{rJS4vZ@y{l$jty?UcFfO}Fm#yVO~(l^K42yOkhy^mu)?G1ECDvzRb{ zt&9adxXN5l@#OpOc{ayBXCPcFBSu4i8WQGu9CkIP8viG|9vjz|lem9no|hym}s#Sv(JQ_lz=c?TVDY=uJtj*U?*;2pt8*jH969x*uv zFExNJC|PydeX(bhr}TySLYGu>okMCL(raER%O#-@O&|PVP6m zI#*WmJgD|G`wXPGcn;JTm~M%RH)Z%nLZ#=2jR)6>h=gY8aE}Vgx!`rlg*Yr?%3RzG zV(f6pT|7z8I9`gvH0z6EbmRyLK?a^RsF#(MJu%_>#dW9-=oX0i`Jn#r*hgIa0dzhc z4e-v=d4_q=$*M%kq+_ZNX~g_dncAXfY(r3r=%sQLkQvt)Z`dh}g63oB=l7b6FC^l6 zj+E_(RCZm+=n;dcis(>{A(mrED5vnADNPSFP$s`-&Q95sgoK1U2a#K%D`O{OAgT05 z)RgOaQ$|MA?}xHIdSo&ErVj7mnqs_H-kf8wk}*^juuoQA{D5pM9-mbePx7Iaar3XJ;2o2&tD9h%xjN4Ws!bWi8;T zr`X^5M?EI7>4AD&izk{|nHC<;G3MoOMaLw(Q3|G9Ib*fleMO8bMQ4(WCU<#w>+Oc& zN-HvT6^Bt?Y<@zBO+lDr!AnCx%=oe2G#E-0YcW}bfrQPQq-SsJpp?(rX!gIq1Kju> z0pdisYq7o#JN)!&--xWceJY1ky1Y5P1N$tiiu7@5-pU`F`=Hx6)qwSiX$4B^UW`vV zrJkbj%{=~xa(SX(FP@IojMc`WAAUoyU<~Y5jDnPE7|rNRpFuu#ya+{UUIpWC#W8&G zqUHZM89+VSJ32i6JM}1cJv93t8z|ZUDJD1sxsT&XL5f z@y1@h;;cDZG-b@)>9bqy&U=l9-*7M>%`xV2_Dk_i-A)VI$yesujq*h6241fOOZdDD z4w2$q0|NtXoxq8b8kSyICsE8FGuF+qxLhP}z<{-G{(9W0KTzzM(=dx(ap*_q}xl=5c zUBTrk%{o7c8AXZwro!uZkxrXjb|n`+0kv6Y$=Zp^pcy@##pSy_&BO>Lx)IS4)e-qd zT+$_*`-=%-;pX!>iB^1P8x@_Y`>-=b1psTgO2DWQ3y6k?VaEY)7`y550=^6LQtDPF+O#DgsrFOgY=?${YMf1;gGWP)`S$9 zv0+$iAck>f{eBO>JpblLNivW3mrAjh#7F1*y&b9Z1^eo1B5JXYwMq|1pw2Z?hovb) z&V#&J*o#k`%om>wWOO1|9xj9%W~>(-4^|0|g?;=TF9xbVk#Uvg-To62`sBIBxhYE!pmd2( zv9YMYGt$%khEDnv;TATfeG<8>(D3}_v!_@nA*kBXr1(Yw6-=wCxH1*Obi;4wHF|=j*SStKC$yOEc2?QJ`XrLc#sh z=p&B6@F%h!t_}YRVfi$}#Pf`bXUH_fqcZPZ(tDVocZ$C)tqvjZ5l`_6;I^i)JmGV2b~WD5T2*)F3qYF`R46 z^)XevT9T4yp@-?cxC|7l?XvB0Ilu>Ue>5TZ*IjL=R1Y3JSa+5v?a;E3nPW$HJT=f| zDxiEo)0FYzPx^v=X5Ht477E}}P0fpczs$zZZ$6D|fHvi8fY+QZXZ~+9P?%%l*-=wf z*Cy{vfjYqp#%}`H%@od7>5MPnX4SWPlKEHbzkn3#p?ay__$NzV<K3| zlg|HIS)n!9{-xuF7ODsz$j4t+f~Oi01tJYAqDi%x4Ws5qlij&Cd~KWN8pri1urxxe zlIJHTg$6g}dy6czD(((39Nd*O`T5LGa2;gEy6(H;HMgr*GJ+mfFGPZv|9**C2sayR z&XgZ6AE5(S!xKAZvcbW@%^XC@Y#Hwulib1dt5zR{nmJl@i|GcBk4JAGq|o9GIOV+P zjHEf(b-h$Cdm+B7w)EiseG|3aro;6S>!~_k=5&d?maZ)n4*1}CI+!|enuW`E)H5KKt9&{)bt-3UKP&s7 zKmykYm8Lbh=gI8qf%fZs5tMpk)$=;PhG}6Y3tES#NBty>L%Gtpr}g0d0B;KwFUkis zct)({uh!T3S-+!y_;p?vz`j6h*GS1gWlyEtZSkqGF*U}Nv02y-vX0}nUC%udZE4ty z_5K=Y;TeRQ-{!h7Vs~TYIoM|A9MkGTKNO0YChq0SzM{Ky{k3h9d-4{@t_&P-GVl?9 zjgz)$R?PDpvLukm;JREuO$}kHoj^7|inb2_>WG$2-^E<4EYlX)Rh-)r+IXsgsRfSD_^Q55ejP z8gm_3DtOF@f+8tys<$C;;Ds~}9^m%uo|YU353q<7Ic`10<^Y3gdBGEEaYP-3g)mO@ zAH;(0$L|pV&DndrcDD9f;(4Tr&6ftvM@%Y!*G3PFHVh4~7d1w^dM25; zoDWGYsP7T0Ta)WSQ#^g1mr|;tHYO#(8|-(zRHKJZ7G$oc*)p1iYKGkGGlTgtY4OSR z(~i|u+fsQB`Wv>n>JcP&oo@|~Y`$$!#f8L&dw-1&{+{rI4Os1XbqGioOCNxKN!>&d zg@4_JL^I_})%)gKh?w84^*ivlq1fkkE@QqKM68N0*Es3F1kOt(%lCZw2FZYvWDC*) zFo}`Tg}-Zr-o2JrlIo%yitnxFSu4(T8e^#JL+(2Q2(RMoad33xTx9UvhtHDZ$=NrK zPd)vCmqAk@Q%Rll7$$dk`+Iq8tV6!xqA}9PY<$VO8rH#z{;Hslo^-peWcv%Rfl{#$L$Q+2q?TxhC?yQ_a0*MRB29Gyy;&_8isnEG9i<=t;Epv_X#%i(5k@)nVf(nHvmgc=PfsQ2VG%}K(FCi`_iW*DLTjlzOm$5sUd3`P+Tu%vFO6c zIDOM&lX2(?>-i^v5H2vSuEjsu1ziPj- zvhshhk!DtfmxzZ{>< zQ|MUA1PU9fQr|kgCBT3fWt#CZg7#lx2@XD1FD|PB(!31U_u*aF&+zD?!78*q&PKH#bk3{w>Waqpx3+x7N`;Pxe( zXC==)CLKo>fuyXqfrM^nzTs!y~zbfsNm-kkccAeVe3ocgGxTK^mEdnAUB4XmM)B!|<)Qh}^ z!y;?kjrR@Tx%yV`Zl?<$-R||WevvbnK0T6y8ilY;vhIM0Cg16(=FB($I`VC`=sT;r zhQSl3+zOlG>8UHp!uN>EyanB_^_4l5o<{CR3>p)*^)}l-dzDPdz(;RTneJUhQizvX zQ`l#^n73i9Kr$@KXg@aM{33=#G0BhLY2!^B;rR)!2B9$dZ_H_4+U!$R@Gd>d+Su0m z7hh5^hqL1MkPSzk3;D~53Q!vNGk@_oC~&bZ&TlX2ysKb>y!8X4 zd^Jl&TH5_Gmo{PLOi!Lm00m=n7$I=8#v@@hnZF~*NWEHCP(foVLm@JGt?Y?XLF!7R zk^r8E@{(JAY@9C@t((I-b&(cYhaCn9-vL;NdVDltf0sCs%Q9p077}`RG0?*OkAre< z2mO*@T~pR8)a~+QdA`>cG>14@ov1YGG_3jItOT3P*Q`E*j5hVvH?y)9RgfiV#$~Vb z0Ug^)=FWMUQFlV{d3y*^1vAWZu&;OwN6sp2^SL&-z(EC%A1DI6x=KG~XA&i`pH^Ck@J#JIoVZQ3(@&?73eb z0l<+pbGN+GDCf0LSYjxWvgu3hPe|=QO`zaO^z>hHJ5;Bp8)m1Y8#d_Ye%ZiXK9kl+ z$n}^TC!Y?IWV7hn$@;dQJ8WrI7_#ZC^XGfZCF^}K_eZccY}WUDk)x~(iH!OUYna4a z4>RdWqTA#KSMP#L#Ymbrm&71YNfKUt0QH|=hgwwZ+4-SrY;YvYVG%DayQbZcDV-%{ zbNlnYx>Qe22c?v$4v*0yO|~oJX-?OX1Rn9{#W!ZMe_VJ1JklgV-+Azl5ytr7hVsJt z6uw~tz={+pWFXv?4(sbVon`(=e%l#y9WT1@^`mB39w*oSxZONjgFYaIKAijO_pd<|YD+aP zcW?Xpg6%gMUVkBj_Hr-bM2f!l<`LPPDCnoAkN4R%b=l#K2gCZ@OGtyLvt951WwHl_ z$xF*dl)o8MEM*gy+M{D0Ba>t9@c_3kA1Bx`q;w{Pk*7<(%fSq)7iuKFt&=ZRvNOwn zn(_fG_g9rc{?D3(?Asb)`Ruus0!2`A&LAD&gf_;<8#3PlU?A*F%FsD_0Cc4Yt>Z&o z7ErM1_a{KJ4Qy=s!13X%n|nJs!OW1N zGF@6UtpaB(Vz6&x1S*LtY!-;)5idP3;=7q|A$qf-BUXRU7y2*}8hmCOmmZh2Nno3xka;_^n zg+oC{23g}ZQ=FVWZOO=d(uNub@^8O@-1EaRT#jDoT54kdoayWqTfx8tCULa7Htv5G zn%LjJ2Ify-x4?2>7S(3>$g!YfE?@bA;TMGA{bAGYz(OHV$AtQZT*X@r>Wq*2%oQ*QxUIil1E5+{ubt ztsz@O?e{Us+~>R%5RJp`^p#*EWUA`tIgGAWfn?87sdY9zt!}5&VB?5;gv_@!D*r|W z4?OXjpFjNW7!)Agre$5vX*@H%>fkv(?uCX)4BnJ7uxo<{(vYglj=p=bi#5^Tvgg!o zg3mb0zTppd5F&NFuPrN!3kJST{gI<4Z}cmDh*2$wR!+QO2%d!06#MZF+%*jAl zWUEoE<*H2&@rA}~GKi$lCEKNV(VoKxYF!zagnYM~e7dOQGb3d4nW_nbbt!o!PRT^g zwPV(w(Z{|c@HK$%d3K#D$w73rt&3Th(? zC08K<%#1naJ(%WQjE|$Cj64B>fs=pZNMJY0bG}c8Y}xfGDH+Q0dKE?*JHU}zbw8DW zJUxaq78O3Q_Io^^o1rb2gC;kL+&3&@tBbI7P9QRdXlVq9(+CyfGkrZ{v+i~Knw0!& zQV4mpAhz!C2;vgjIu{Wk6nAxX{exnkYl>;2Oh=v!X5Of3{a-cp-z4J@`QHX9a7qac+TA}1Icwi4+{>`sB(8QHJZF3&O%W-;)QpX+tROH4>C)=)*P& zEibE=7U*~ckE(lD*X{FxHq|EOvlF_wkPtptWOTtIc6}VA#~izukeNQivziqZ^H9eX z?5*l}T^xe-GKH{zsGtf)&<6+|tOA$E^NnYd+rKtO_X8iF&kjg43jo{$Adm`Q5>SEG zWXm7*o<{#t5Sm*-+jOyupQsLd__ve5$uY1YtGl`*(+1h(T!OjY;dk7ie(%68lR^9VE_@~{j1uUGL(cF9UWLKZ<4_H0B9vkx~`^XvZ{Z!`r z4%yLcQKr|>$&;6Vu!K&R5_SOlayLwCg)x&Wh#M<|Ojey|gn2XLR_-#r46ZBTrb9re zK(>KS$XX+6^0Ux_V|Jla#s1Pel8*N2gH9jlCB@a*mc*Z(}mSMGUTlahpKao!qLt?n4#6m)x00l1I%%}qF6Q5AyjUv|8yPxchTh$hE)G_ju zXRD7b9-7`(BR2dLFFJO`yz2NF67f%8G2h^?qbuLmb#QLn*0p=JfR1(3M#GBV8=RDz z0tduAFZWYp(U+C5ME@%c@YKpLSIip-b9T^iMOj{;((CFnH`UX1@mn-F}q$u11;E3UI(- z;2}XYl`}m>~<5kK}1<8DgC=Ehr;Q9hRuKXTTwoNBN4VrJ-+zOvp=o~ zH0OZ{D^;tI^8ij5noXv`h&&4T?MofI4o~QZUB%IsZ>7< z2_P$pY||h}zTZ-x`%z123GwA~a}Y`N6~%P*ns@-$v%*k2ww|vnfG3~47VqB&u(;>> zk{cBodW~$9RwMRxU=#0vaN<8>&mxV^>liUG4`PIkGq18S+s+Ojo7^{zoxcT&8*xAu zd+z>iwjeaMrAn8&hkX05&}U7c*8ICFh*OsimKML=%<3Kc1>)dDc5Lj)KILmD6?D89 zRNVdf)z{Cn>FbS;o6MNMo+!p9Zm&k-W@P72j+dSvZp5cX@>lVxA^aomTxUc7$HfN` zkZ*-5WD%}x5E80w?LtID0G>KnAGW%9Dg=pXcqG>m_2W0Lab6)nfz`sAqAO$M`+~6` z8i<;<-Lm{dg%;Z1uUO?tlLlc)P$+~Y`u4uDp-k~IrX{(#D?>OxKWO~NIFi#ts*!Xj z`G;&^l-+*_@8NPFQ~O77N4*a2G$a@hTer2V7KTs^7KHvmaoRP-orYta0|&G~mC9V! zR7wQ=0rWYT=Z5IuObk_lj=e@PA9f=4HQeF&uOOX_y!`m*sILaq02#?ZZk62iubg-o zANw5}xwENt{VNZhadXh$PzogdbcTm49xA8Iq?H(i zD;^wE9P*R`P^m+J)piGy1^C~BNk!!Ta4|$v)UTHV)^G5)dwdq`b z`VBp^e?w2^!vS-C0X*VO_)q-ryjf@+&5IlX@Uoh88H={he*+c*lrGv%I)*dbb)r?iEJcoz> zaFz9`;-#^`#bwMwEZ-vF-)#+_xTx$u(DXxX)QBGmxp{OfL|fn~#C@u|m1O~FnC+6* z&E=PDgvT1I!6Y$t56y4D32*Fx_ThI+J~>N_H*9w)J++(b4_l>=t z5q$SKou5*@YR%a9m(|7XsgNZASHeph6vfnUHP~<>Q|FkJBsY6Q#IiJo}IQk{IsDL<#6E4+O|gaj##3PYp1$l$It-@{C5!!Mvgk*kTnA z;+Z9cE6@n&gx6QTp*g6J;IM|7Gj?~UK!!M`bS8UuQ0m>x3MnIUXi6OKcMWejlGC`q zT&3cv$weH7FeC|Ek9WF1U$o;{;!`AM-&R^qOk&Mm;*l-YUS`?1S<3PdQFZ$d6#rfa zlTJ<`Gf^JQ8Ty=SQ|x;Ms!B?>Bc&gn?5AF7zr%qCgWMk4Hgnigco|dCbmIYT{qX<+X+!QG zRVp~~@Ul5(bdGJ3*ygGlFE)2O$gY}hVSod1lP>ciL3m6F@$qyx16-Vp99q5D z?|4!59K7_L(b(rEuu(F_Kw`MG?MJS03R+8!%OC3M?;}6zActarV{V{$0GxWzO1TTl zCR#a1w4(sJ)wZ2^mzg4zExuCZz32{^Pt4OuCR9;{F_WLy@-;;c4VwbQgB9~f;85Kw zCHJG+X&rDl;PO3Jisz~6)1Rc%7XbrnVP5A{hvTas)*UXrtgo2j)?*uPOEV~6ZGW`F zJqW}YLJ@t?jmK@g%SocTFj--3S9`II z@H`^7QEqBCpilXae{16W1bg}14Z9&=557Ne6>Rxmf`!u6l2=+)_g9<9E>I^BDZ(5)|;$Q6uy!lz&UT!TqH3L28}L z?u#1LHgNFd#b{b^tJleQUC&TcL4VsT~eY*ZGGXk(Q6Zq1cpD)H;m`mI@AGa5TlxDQAH}tjYW<*u|HZmO)0J zOsf^=Z$a;D(3ka+5uc9Cj9r0K68#A;46S;CmQwxy{%;lRV=^1@CEfo}HVY{#gcKUG za}qQMq=WA?@o#f%dpA|?Js>uG;ziz;#IhAk$TvEaKDZcePLr9QF@_~covv&b zFA@0d>-OaHAM>N;Mcb#v-;F1yr&(Y9s7P6^NaJrBgLCPiW2v9K`=J&&p^ zFr4@IBX7Y5U5x=>3rzGBfQXrank&uEHWZYcF08}UUtWN9t5iNiU-&29kk1^o-mY%c z19?(5!#DjflU~CkBO^mYD|h(80mcV1acRp0t5+Z}fJGV7%gWRj=qTYZDB<3WK9q^t zlU_yYWf`J@=#U%k)HR=vj9WMA&_|#C+SJOun8BMJI;Ogw2C@{)k_vmVBg-e2YL>ck zD^{EVhpX4SO<+@yliQ2ubt@>U5XIV_4}|O^>+M0Mib(`ZFeT$cJSwqvxekgCuN2Nm zTxNVg8N&X9RCiwnSKUVaFS_hTUC1ZF5iX98W~rwTBM=uvBiv1V(7OXEheQ^vmBo;Q znd%JF=Snumt4cI0&A`&ojxH$jV+QYaw)*HI;cOfqE4*CrY+OliS6wx&ir@ehp8iJOBCXG65J(Q;yvqr$Hvc7DK zdKTLeLS($5D+tySzi;aH-05;Zf1NSEm;OZXqn@*RuuOU2$>)4K1G}ht!r;PXL?gj1 zONXa|cRK^|HbOam(hdi>PpDM0Ru|}E%Pd@9oj^HJpeEBS<;Uib{UyCTpYtuGv(j1N zoVVKNEtB=`V9?yV>l@O%_S2Co-^|H##sT^!^wz%rpp?-_c`pGmkcaRlb~-c10^)I= zZ_6G*WC(v>D2j+6xt>ygES@D0q6w6gLQxFJF)RrBN_Qc5e8x%ZbX=ZaXVgD!X={Nf z7iIvAtQCQ4OpU)OY>OyFLEnVJ$QF}pwcPcWQiR+wBjj^NmBWeT8v`*YQ9ON4^ykUr+RTx!f4+JZjQeBGQ z&phL)= zLiDFYj>-V6CGe`AzQs|!fb0#NreC7z5f~d6>9;3lBBQ4OjZolldGA3JMbVor`_uVs$;MciV}= zc;?=C98KcqLXLyVCUVg|1!oNi_?;|joi>5ebGjezfa4c=%B~sSERKiq_krBrz~EWf zT*W3^!pd&>BB0%uOX4x^Nm^SoTWkc?wp?I&9^g>YqfStJnu z0mV`0muK6lHZ3T(5N|8w9*Du}udT7mLd0T=O03H93T8^bvTSn1i9&u80r`#Nu5L;FF91`zJ&%fLjW$VX5y=#9Z z_B8G5{gHkzeF~&!&cyaztP(p2#|OF*Q$Ba)AQUts_j#>O%+&P&{0?063WLf;DA|wU z;pSTfWD}{LC^-J6q>^v=xFcxbA9u5g_Mh z3Nec(C_ZrQkOgJkO0(f_(G0VVTlGiJv$(A$ke3}nrLXhqKz4e%e#OZ+sF&)OHE0iB z{ZTXy7QLT+Q15T_@+wZ1@0|Q@6fUF$e=J;_^>uxODebC->!Hg-Mmm*7#Y_pypQXaR z90S%3Cppci416G8xbZpFUK)+B*2V9g*^4-8Hs9O(8pKj>Qdsll<`_qDS^`w zXSpZx+h_a_T-Pfa%%%ET{k5gA2eQR_so2-v!{;0ed4Fn>#c^M9pzjS}oH5in3SA<{ zorU_OuB@o8B_iO(>z043{wRPe?Ot3Lr`uC_V>%f|$dk52U(7`5-3@CgJ}Cis_3q zEAN2~Z?K9qL?@sq6jyU91AO~u9G2x$UkG(NN6%g!SwLOMOzu)wyt0l?(OC(da?VG~ zai`1k6Y!P=LM{vSFj4`htqIeEjg|gP;G1e(cIQCecjb>GN5G=zZ4ic!nL~wS6bXaE zgAJVo2~m_^7l0bl=dZt-k9{kaxK`w+=a4$mFmGS=c#VReD~y5*I(O*F0Qm^10O!Mi$n}J>_yM;$B@G-)r@a zhO;f@Kzc>PI8-_RTj*Px@M6qAy+gj-UHe@3lmA)Sp~r*1|>tkddnwaDiA z*=ANGluTj-?Y#_~_iY70HSa52?5_f_5q(nNk&5_zUiw7%kMINFud$EBv}^)L=g-DR zE<1YPFBfg++X>HN!RtV@w{&~vih8)Bg!oY6l+}c}I2l49eAJM-+vg=d!s$;)qV@E? zPaTw2YI}cEJA~R`DEzql&iiY-Ar=K;iyUw`8aEC{11?SlwK1JX1J};W5%{Ku_o2Q? z3X68>`An8cVD-bl;*+l+#4?HH-D%TMd1P;y1B{35#(0o@T8hQx{hhuB4J-t}=czrA zNk$x`4i|@eJ&Bgo)&sFK&fmAfw(Gvt!*Ehen9!RbHpj}L!ZXT=f*Bc!UdOk;WEcvV zvulZGe&T&UmIVPtLFGQOjYqN@cjp#@+&M4BF5>4wbo-b~hT8(tIxQbzX7vFpcEtlm zM%whETDNcR5%ahjTtLOXo(wJ)th%Q=TfWLQ(!jX&GCN%NG9<6ksm#aH?)Jmpfw3>C zIM~+S2yc!l0P<40rqUPQVKDjd&=JOrUgbpbgp#QG_U$9Fi$vbnEi@l<%;i0!I`qJJb zrzEKX{rju~>#nBH=kDwbd<}nTVkZhWREU!!=ePO=<$Z-7D4!0TwG3|gb?*1^-92pU z3b$L|Oc&T*EidfhiLO6LacQ~G#O-}%5xhEy?I!o64i=X%fDzzO?6l{}EROeN9>_Iy zI#~O!N(heOd3xwjqMlZE%GP%|XDeZL+@bI`+Kt?Dft7S3X+X2=SZW)EiTxz*`tH6O zCbK?jOIF?vd_sbiL8X37%soDAjk?WjzuDmiG26t??!Dn2 zScZ>XqN}I0^saeIiISi#ym75#V#p>rM0jUpp?F#gBCKe;FQXD`>dJ|bh&_Bz`t$?P z=o5``$S54Fo_8f; zbJ$N?@oy6-jHqXS_%4lC(gC9o7+%2v{D0uJ|Hr1DVbH{?vnmj));hB@Cc045h&85- zyF2v`$0mP{OxyrGCm$LqU~S=y{1;g!)Ozqgcj(SB9nh2}^}c?5Nb{ifH87vEP>A`c zu%a~GA6Gk-O1B~0Vpv9#<1;0{e}~oaC(lk?4T>0k7$X8xw;dSNE%5sGeq^49sc+y! zs%Hfyb!X=GGE5S6++Q`rXmO^sWKv3H&{IgoiVrV532)KIo*y9@KiKCqkI5mB0|+d++q5{fE} zss=T15YG>CWB1)Qd{DvE0+t$&K()3$2Yw; zp=J~pvfGT@?)XGKZVvCAOn}Bh5_*Uv%3x%7e{hqY9lTKr#ywX9XRd z`jO{upwQ~JK|uQ`C?2G=2gjBRcHUr7NrV#T-web;2{+ZTNNFa-TPE9DTt4neQkb}} zi_UPNR9KQp*%cfD@~hB${m)__>VRzd5WHTXD~>f2gyg|oc}dt)=49+a6l{|*mWhH=YZ2wT}_Q! z`1x%i4^#-ZlOB54yk@{wgOv~(J{8iqy zVTU`2g38!VMz->eBHU_T(Ix+WXju5f^E--Da3cF^?HSF!}x5qL;6Y`foPuqQdu3dVQl1{T> zf_p~8|Ga_Tz}C>ciLJCWcG2C@P&^VRyHav6I%I8$+eFf8Ra_`P8Tb@e7Gx^kfV6#p zo_rO)`VNob@%4Gr&B+K$MOTqrV)V@jv=q~=WoYQ@dE}y%9~lFu7hD9hvsG8;LbT3a z1o0hM&HeL^@TsU$lB1iELpcR^*c&QPGj=%sWxrU_fv<6_Ju3;`ahjf*iLS%1Cnn77 zgVMr-)eFaA`9mIk*o=v+uUyQTju84tLoO72S#0%QeQPghFN*4b(E2=Uk@8hwk54q8 z`~ESPb`DFSRWc4USMi(DoG9SIWtaFX;&5l%ricx@-in@vPMI#feo~y8Vg$IgOP-Ih`Zl z55a;ENfMS8WKBuy)3%6)$S47n^X|RiY{R5?B~UMWO~%3_0XqVn6#rk7dPo+AYf&(` zKBn4W2K;^@<@<$u0vkq2Cf1BELA~!u-q0#~wLriO>9px^17hh4s8Wb}5(^xfgx|L* z)WKR<$}oi;B0M@JFx=eza1WI!8g2SXUdA}Xi=(B|Z!JOnlA}LADNu~t(1anfvKbcC zHa!GVcJ5wAvm^IC4l6?P%K9XaMvQ6pLDJ`vc`F6X6pjvRMc`QzB#?b!lVM;pWDZj( zD!npuL;(uc`Ch@ggi-G1lLj`JxyCw^yQ8Jbs$dK(U<0l31q@lgTJZ2{lB{!$%|SHc z8+QM-FVr3K1Sje_s~v|^?!xBCHmte8!!0tjJP3tfJ@I-ClYnXfb&&Rfb#B!y)lUwc z7=K~%e903J{PNQ2X|Y{fFuJM9J-Q%IjV6b2-XO(b^SWc#S6Pfgku6eSCcA zXqceGA>Yq)7fm8^f#o=)SINjCSH0Y$95=spR7__IJPxY+$MH(^OV1m#^QghykF34^ zN=6%jMDQ%UH7lccwSw<(9hR8}jewIj-+aH4vJ^rH8T|OYrJOGGDfyi+_a`XC+}aoJ zwmC}`DbOd9(c0QT=gIO11qsr`Vnw>hYI>b+9Y}syS?##gbi~%tQ(VHFaGY8?UOMzM zWXOQB=WO70F{qD*U|8}bvnA(9zLmYMphUmoJ=J{@13E_ z2dW-k9DI52u<#BfAgmTX{L5MSNdCuF(d>}NED`TUK2*gOy3t zOBU`Jlb+`W_m#}6VEtL7M2X{3)AO&| z8X6pKTZ$f^Wiju;uP$YZhq-34Z8vpau9Gw#P;$)k=CqIDxs5)!t~!o1^i;!2Qv}K= zdmR!^`U2$8WP2`qBat|FsmYN3?88wZu*M1n2y_+jVDQLzomYPyug~WHQE3S;mGcaf zic(SDeC{FlfA8&4Zv{Je4A+pqJ4rs*jwDJ@1U7;)x*A?dlVkhO&meIf%72tq$rUC= z1dK_2psYZ3AuX>ypq*r5HN&NY|J*V^EQc&ghpb9!$&^xQE*gI02ewC%U*k+a)HtmK z!~`VnGi%;SrsWOIeghtZ8NjZeZD-h5@2+%;Jo5j|5-s{@HsI&apYjwq)T+90N`*zM zQD@&_GUd6ibp+y||#`L=Q z;soSQ*VL?Zq0NEFsSR#z%}N@Veac@p1SJYRoPH8@t*pvxF6{HN?HLUfh2Ok6yKZ;7 zZowo`#)9s2z)CsQ>+s_X!=zKmxdvf7VZasy#(@ujZ(`s;ETF2YDx1g2SV98+^Tfs8 zY}Ed?hr42;s1#gDf2^IeN}SJkGITyX0|EosvCMsYV^Hf!odQS4eJm@07!Kd$50S`A^T3_SJn5zpyFaD=aB24?|rie&p|o zb(893vB`jG<;^Xa$O9Kt>hwVtqX}-ECB73W!w@Sv4D((NJf$;z^x#2yhb>WrmT^(4 zD{gm&HQa{$mFe~QOu-QP__%f=>vP$(cv!fO0T^_D$)50;M~D5>*6wTH_@+0)tjnmu zev1Do36q#3l?)#T>v}0&(TaN5Ct(H(bfc)&D5vqqM3=y4p1s|vY_)Pj9=RaMlf5p+ znKmfi20-9H(dOb>KhwkWBKA4&n6rHQ945ohG|86HAkn!$ZBI5+}y0o)Yto zUsF+`s0uItmZek$W;vf-JTc!PWEq|=9UiCRPhh)V4wP#l?|g{SX*_jfhyjP%N5OD3 znu^t$v_(ZOWcj#YE)yRazl*jX@WlI)BLB$z<#8VUbOrVa>Fm(O!FJg@Q>)t@ylw#K z!WywTnXWTO%IKp`fng=) zXT1&Fjq`dNCXX~7p9S`2d{A-$)L?o#Te{}@4sp8XvIKI`u03`Xn!t2qsV}p^$IXwV zR_$$tp>I^bPOv({(z&=ZH@W0kDbVAo-K?!go!RdiaHZFf!avH;?vQTS#_I`y)1Fp0 zkfb#ZYFo-zxD{_$pq8*ywTsYQVP}5niKQj2!t;xbfh+etZTqlQ!mphw{|oc=?8?x2 zMyj?{x4d=~r@{zZ`)K+iFuJym0`Gi}mfHcqA#CyS4G5y=0;EQMXe^O0ZMj(rpV7-n z;vEeH0S314d9X<_Pi|yCNyyVlCfTRk8F65N1<76SNbXcO4 z3490FkCfrx0c@Rydj%*s(a^ZHZHwJ;DaXUKirI(IbxMvL~WH6y)K4pAKlGlN{q_yfzmTZIVSoT z=Re7i-vH4B(m}^eg+Wjc)M%g{5iwxBd6$6x_}W;g{6s0&Xj6oQu}o*WgeZzR>lJQl zR3x%XJo&Gr)UPb>V=&&n*OX#8E2_pT&2mUzglaf|mVKYE13*oK>gBg^a)88G4MqQE zHDG%1Md)bwldgQB<2)0*znQ3F#?>0gb{Dp)3)%xQ{I~6@P;1sKcL(?>D2`gn zs$$&=y>q4BtD34Bk;4ek!5yi<;B~K`;RkX_8xyESpC1I)l@p$bA8Le9hSu|=AUpv; zLv*G&Q;&OJ!mXm)?S9-+w*M6wPQUi%St0$LI8J;oE~NR{C|yt)%H#$=cE^5oD~PmI zgRq3hi?zPj01gEP$mU<(?#WLC$Q~zqQlrYH`AHN2;ksQZpIO7>QPE;kf=LfP`U<7H zfM^2!U8tuPWaaGo_vzeQy;q*A7dxNKx5u;3BZ0S^kWVsF)Jf=ajg>@6C$Wdj{eBUb z@&vI)Tn!-M!1W8A2Lv5xG1g8H<^f^PcPAzhZ8IZy#{?>rDi)k59^>AY@p*?w?N^M{?z_x zjL4^V&}8Vny}hwap2~zL($NHbu0{NsC_S3=Z;-79JW!;+G-MI#@|m*Dz(1JrUSUsh zgk=ZOQsX7=ia-8R(jDa;)me?_iH~RKJistC=9szd zxK6|4dL>n8h=TJg)=bJ1?#v`|B8$%r)B4*8IZW2WJv=-l-kbRQ zx3@@&VP_Bye(kh2#b=49lK(HBvyfEA#zzFHr`vb2oygr7&C}w0Fzck5Lcd#rbG(+r_m{q3c zlmPu8A;vS3goadZ3ZL7{O3W=X5ZXZWlxa{thP>dvbz*S;4F!cOP#&l5mnDgZ(1(SNwE zo34{@YSLXcJ53W6KrP|aWO!I{?BlIndPNC;&zFSc_*_bLJ&$ypyo+q{XQ|qmL$`Ba zZcXktX!5J~Ays6d`;(@a;i_ie>BTDOrlYg-AHz z(&-CXa*Iphp&zO`hQ(6I2pZhZeicL$2q;Cl;Po^m#{xJSz8XXvJ0gZs(B0PWr3NfX zwO+rc-(H<}^moBvvpQL@D<7^wL%_@h!J*WTgQ&XWrK}{w14yfpb6s0l-!aI9{XPcE zj%NY&F_)y$kEJ%uUmlJ{oWAu8CuSG+&>5yC*Rt3S@W`Qo%?6|X6>&W?UUHIMGjesH zDI^Y#V!n-3c6FHd&KJZ2m-;^D*A+cp{jh*r(i=mrkA+m)6ivd}Dv6pTSDI)cRBX7V zpZX0(VrD|e2>&R-cRP8XXA4$o<1{^#A0pN?226&Q`Py1SML#Q5OJ!SN)IZ{Q(3;_Z zR1CVFwY;QX(>SHYiv2z{$>kuig>133QS#8M1TtW&qcLmG#zVU=;fy8qg6#s1B^3iM zox9m~KUlGMw8cp2>zHb_&~Gp02r6*T(Z8uxV?|4p9Qle9h9@{j{pM$jT9)TLrUeng zzrv`nJF9eXnnAE-L!ie{^%*i}c%<1jRx;0Xcs|RPa5ijDY$kqglniL)|qifQMBFf{IXB;30&+2knOgd0ymt zar$mNOpq@^zganydH~{zyT}~(gU6;?m3YEgb&bRfEvMI6^Z#F?na+`y6N*9t2>x?6lZkmhl}_8FM+ArHhK>jhvtgu=_wQw!GC`QI%K$*% zL`8Lkh+I!n2{Um{R+;7)TiG#EXDy7fkpg`I=deozM!_$KTD+q>kFu=gC|01O-eVCy zSeqTA61lkRtcCbFAQZT8KV1{xBu__`xgO8Gvm9Zq)&<0ZT%SGLv5Z+@L-t0uWS+XJ zr2rm}EDG2(?N4Y`au#b$I&aRu9=(Kr#0yGln=YKyMd&AQC!JgX0qaO;S9q zqCjGq8wgp_R(6byuC!};cOZ*~|1gtH1?(LiX8J@szCos9 zS@lOK^Wd?4NhjblXmYHosPF+?$Y_Z_F76cIAyt-_r}Md$6f*- zh8lSqjyk@VJ<)77CjT>eZ!}QR(>tAS#RwO3HS0$jim3OgiuD!vsNe8`haPLx=zHN9 z4Gq>4S_6#qp$W7~?tk)Q6Zkm`b41$kK08sIUc9N)6#rtI=AxoXyJMI|)h)E163op0 zw5N?3r6>rz2<9x&WI`>z7l)w!0sHvniOCP2KU!s-CXJ93_NpkYj#Yx(s&Dqd@eyBe zmDx27l;Qz{V5-cu&W}6r7$f9|Sx6zWA)vb&wYdKV&YtF@K@7Zy<&tO$CydL(%O|dm z@4h)(5O&AelP?CiRP|j-$+&l?Dd4U$Ov6Pi0FC(-s`x$Hrywpkx2xBT9SQGE(IBQ zU3?HWiwPh;`4;1WHywPaEOdR^W}}$i4(Mk-Gnwff0epA z1qMGERdFf=1g8YaFB4H#v7)g`j@SP23TaqOo-$o{z58zKok)@1{Sl4{W<5opmr*vR zHhgqU8ZDS18NL2JBTJ|PSSceEGzFQG=M#xjCo&YNv^?dG!{nnoZJ@wB)fSiR>qA<( zJ&g3Zv4=s|>5B+M+Za~v#sfn;OEJy0pd>F>H(B)AlsCG1L8~0q!J~&0u=j@v&;+t( zw?)q@d{Od!6og)^p6kc55fMS%*W*v!>cc@bi+wGzRa`Gl5lG7;cG4VD?oH@-wN4&v zVmkAf$_&qJUtP{ClBrvuJHYmsf<~(_{?iimi-b{1SSc_=9b0@is#I1kmU=qz;Nk?M z&D&vy8cu0rXXDQFyVs#!Y%4I#sW=$wimCArOUZvKt-+;~Mk?Ol+M_YH(Vb;c2C_8j z*(?(4pfEk8f57)C=91Fd<{qPlC>$Sm^YH~OB=bH=M3tTkCU@nSreJU||m#ly+qh5PUsNHCqie<%`791p3`@o`3J3lMn zu|UN0Sm;Kfu;_6Qd2QM~LgW=MwAVziOvNe`lbWT17FXQU25>yYKX(* z8doI_3svv9|y$TTCa$IVe zRZj&dn+3*LJbyS=55hW&9%(Y?Age~UB_LC-eiA<-O?9xMn5H9(Hg&MD9VI%y@?{(z>c70Lo?I)Vz+GNeQ(AaaH! z_?A~ZEz_HR8cX0mp5yqXp}eAE55SFTp`dBP03ekb|LYurK*G7pW6Mkcf8VVnh4q+Z_fieMr z_uHdzahB)D#hz<$H+Fp|;DVp9=re|5 zVyUU|(XFO`p^jHJYGUj1(|4RH8=4k*Vub>3ju*8mXW~8xCYd3I*gl#(0PGugpcCqw#BS*#ilowO8f5GpRF)ii8NOv!05szgf&>_8W*UObfawuk~I-vW=~ z)>zAG0(yoBeVfbamKZRB98!FyfyMqI1{P!v>B)o?V7!pX;u`Spr1PT!1`lqMXuMLj zS1LZC)pSswdhO5fH6igJFeV?ZszoBha#e_4Lvny_&}%+nW(}-6jRH~ZA5d%nl_t&- zdHP8i2`-MGi}oWMRYYLALdDGsfNZ9_s%`n@b_Fyg*$;2Ui`v~=l;|L`p;%v5Lfq86 z!ZhaJ-?*6$jyF955%W>bD7(uw#@;}ePJ;~J=;tGJQ2!+W>O;-E zrUoQwTYCZ>{#0RKBU7PQpj9PF+<)6?<0xk8dXczeBmsfGMku&_8`?$nAZ7tj!8Tx` z0J7Hi*~&TK2muEm)?ugDetYIs69b1iFajOmGuFsA`UcLAD6ut!7ZxHB-~#8htGVD`6nt zxiAN~fEg{gz<%l;sFpBYZ>>`JxF=?7EYk^yGhRPXA?j;`?J2nSyyau06280sCYAXk zxGg|Q>XXdlrJci9mDB}BA4kRFi?7dRNIjA&Vqt()X$ROz&tzyK;S-&5RkOztz|yEg zm!=)ZxXIB7R8R+vYvc-twRDWQEIwjt-GD6r;JY5tf?>7S_lvAK#~}vj$s$2(X^SX0 zv|UH@fq-kDHmSDJdAY~%cID-SI$ywzj*-!l>Vb}lNsS~MI2^qGm}xM7g&k7lX}s#X z;Of|hGFfpomh;tv10yi$H1m0?j=#PNw3$n(DurtSUT^{algU1*1 zw05>q$NbAa@CQEp{A@sa70OCl$C?Noq<{i|NUHvzNf9enNkAH{gg)%x?Iy>Bpc@dA zp{FeX4npk+)Pq3odHmSaqIWdNxzMob{|)N)DH2W%L-+p0agKzT{2x9x6~c+Nt5hul zP6AyTm?5cfLa=8Pp-9p3_-|8P-Kame^oA~x7?IqB{S;yPx@Qg$tRX(ZHJ_hydvG>)o3H$%|cm z|72aa;;iaHH(*4JsH38z18{Ydh|{7w4Smv~3{rJ*3LaV>bU`q9)?H66B~?wJ^DP59 z55ZTcO^Wj|acK6ZJasl*n(lLC{Lj3RAGXPhnr~U-Gb8bVoTHbaP@!$gimlt+#-N2# zr*#}IFjyo3VKqwhRHDODRc2_?1C`JNN*-FBhr1?smvi79tZ!-2 z|IOl6391hJfiy^Tbaa-^>l45U4;*1?;x%UGVFfv4du66u?5EJYhpw?i45>0HpYPzY zR3odm?H|?lKDq*?(Ah84bO0c(08`54HRy%V00=waLQyx7%|YpLovQAte-a}ewf4)h z(vkCMfyeqw00L+<;C2E4iJ!+|KJTgq)EhY{D~we?5xKi|%QAXzmki{>V%O+kdrZJk zIMwgT;H7*tDI)AYi5I;q*g$+-0=^IB;94it8l#j5qyG)cKI2^DKfuai;Ls z;Fk1LZ$1txB@-ta9H*qQLg21rZ^Fx&{TzRRI~D;YXQD$1r9+ChI@_*lnpOgcbB8n7 z%pa6lUFzuQr0$+q4*W?UQ3g|)5x?=L$$429Iu!b?%I2=#c=a!71EBwX&||BcI`y)9 z4D`-21}UFU8_tCTet}rS_urqN1VW!B;MZfl{5IC&`*7hFy5bV27l745Nq+^+;QTpf zBJmwi`B&#=E@KACe9`uKR!_d`ghV0838GQo>h?SUZ_pdjLzJ%ho{gR#U#eUnZj7fU z1e)KMT6s#kSPLR-YXB~>`q{vViIL>lJu#m8jXvBDlgnF{2Ar$Zx~C5Rl07lD?`9fv z(f_3PfOqwO^%wL8fz|TGpQb47AjcJucrrL`6FNjct?vg)G>TPRzQPcmuQF)L$jDfa z6>tQdyVT8|B)^K;2ATq_&kd{C>nhB7ntT(-KPbH992ya)4KTnj%V$cCTK1V8)}U3= zwNUGe5kDUqdwmN9%L)I2F|bGc!|2msTgSIE?6+kgLgRgP(8PfEM%LRC^vOaHND}ba z3u(1ETx>Sh?*QCqz<(|n`FFm=&U!*@o%%pn{LaGCL5& z$ysAa{Npfbce=F3sP*xY?tpwI$R1ncdMf=5a1caM-G?){?8DHBKNrei_?hKpWhHH_ z*TGuVgJ}J5prt|-dB88&I8m~)*BM!^{z~tYcV%GVxyF%OIbb)wJtn-yI3b`}xM_#LFeO zpa5hbsM~ndam7qlfBd~7BSV}?^Xt2W58FAuKf0_MK40wzPUn$2hS^s)57-R-+c7E0 zhb(`E36HWK2V}vKvfw|2Z6Z(Mwc&IQYoFA|fkPnw;}~ev0MJiQ86}2NlPr{?3@P8xgs#S{%Z%$x8eOX@b)}<>xoH2Ot~2+u3jp{d5*4 z8gngRz{-85$6Vyl@8>ht4Kxy7=j&g`9S8%41nZRGK?*TfT( zVXF!$ZRX_r+6IJA{vfU8$4vM&=*d%}hpI3#?Tvi30AhO}pfhEYb1_FpTm2|3-X#~> z8K8oOz*-n^GiD)|I1z+w0cB*j0N0AVA~bod%0$j)*7q^~$t>gxcI}%qCU#nt^0`linpBXF#S-)mBY8lPo+6Ozk z%h#-~mNLp%7mR+b;rQEwX(ku%v8!p!Fxw!BIfBVYmFCA4o5ys9#&?w+man;V(wbS9 zFDvyw9!rE$?ezgq3H{Zpg%=)_sjxU&v_dH6jif491HMJaJobXufq;!6Yi)yzE1?4A ziojigfb8ms5AP_*^8p!?pm;&;)EY>M=LMwzI7&kue;fTO_1jRI>)FB5HBx+T8Z!-8 zs5O`LoBCxhN?3zFqK@weqcV5ZzFHPMs=1rBr%7%E*5g7mc$|rL(KY)$-udrYf2Qc4 zqu2|o84MR~ID=}vTLMKF*W?;yX{fyg@|-LZ`g!j`l(>Var=OCRL`Cpe65cfoSF9^E z)q{9!pz{w~lD#yv|0H!i_aqtPF%#N$X#6?YaLOK-*iYxgo$M;6BHCCTyuR3$QU)zN zUbk*wHK@7k{~|c5UH0?z{rmSI2U4j*Re;+HkU@XZ!@P(}`_2P$xF@9L-sXlp2Kupg z%I9sa3{P?3P1`3|u7HSM2iqVT2!VJ=_ubjF%CV30cRF@$VG9K;v?2-wU%ygD_&y>xbA>?|WOoznsfjFd$TCna_}m;xU~OI4GQ^>rF(y|+-I)>HTZx3PqYykbhLaAsKuU39;u(cLoJ1)m_eDOcg0XJ7OLJ= zMd4W5&#?VI1EmojHxI2$=M;El086QAd9PrD7_qx_IDS}4`}zC?@44oR+He7t1IPW{ zh~^`&8stW_oiF=3`JFYZMrz!f!W2P@IJ)&#=)#=Mfai7tYA=j8P&wQufW8rRK{oBG z1xn=??@`iI4d61y;gR-#%Faj-g@L=nc$NPC;eV5}4$VZr&spcwj9X&V@rRS`PotgP*9s%HhIRL>ft zQ{ak|lw|m)l1p|KFYejTbuHW!IWLUQ1T)O}T~AjRyk*W`<#AlWx^|~;dRXFnaiB8q4IUqk%1o*p zqUF(@4kd2gewTY?zB_RprR(Ehho!*e$Nqg?p?X2Y2y188%AP!_9CSfpOXg5xZ0i z%6lB-FVp{Ny&z6M!G`{~^oTG#4o^~OjHO>gFfpYPURKi{msm!n@wbdQ!N=v^d~8M1 zHRd066-eP7Odki>!x#F#jj2CGCfX#(-PpdFG7sv3*nDNGTNf8#4$m@5E!=mN zcjs@_)KPKtTtV4&zK{{}54WaZ;C5CGU!>>8J%6OOr8C1obQj64qINy=XkXy{Y0iTG zo4)ky=TF8R`gol0crt%9k6{q;HX)8z^{iCE-mZEw{KBw*GlzW%dpC`379ObIXRO&YPn$%gbc&cr~CLf2dN!91lpldQY6wY)pHOE3N0|0)3Pw>4A%@N%;L zYx_f6X#b~@6UD@!OO)8l(&SF2k2(cPQjvJYvBfTJ3IJUoq_{uMW_Rv5A?XDqXl#n zF6b|ROO`*^mVmfJ9Gt8vSpEzG9bEk%XR!Z0K|w)^zv|^kJAuJiG(61Qy`3$=m#!X` ze=a2sakKpMqmG`Kn1&XEn5!+sK}i!>7vL00OE+^@TW5DC*N0qxMq!b1GGq2l=OZ=`Kq-P|Q?Ag&Y~;Kmdo|M;L|YvFElf1f8~qXWd+?S3%mIpX*0%)||hC?*beE(&&5u-ah7xs4vO$bl8JH5YTV1_l?fz{K3l zEy0TLv2p=x3*0310~0$Nn6(7N`LDu3JZzwdkcPYEb8QMfaG0Vc1bW|~;RXeZ?q7#+ zvGM&mM3#b=6I}I$_5FQPaB_m0LrM^@KcDXBalf9xVzXqY;NtvG5A6PnzaR16wj;DM zk~(x)M{9Q*HVSSYUNG3)LLv)sY!qZ{IZ@4!{K&2!?`G7yk%TJ>wE5C0vSCu_?HD4RlkNP2ZEC*MAYc01 z`EM=;=B88Vb}rYS*1SL0x9i*KxECO#KST%5{OweehAnGz9eGA-tNa~JDWwPKQm6)94MFKf4>DoJPCY0qCi zqB48K=P!n{Yy3|1of?XfT+y}K&=Z`!DM$I|mj@j(gJW{pzlynZm|yL5W60pj-}WrI z)m*-~z&LfVP?F~A{*n5ipylXhmGeeelH&}TjcBu>XM{$Ez1ZrMs|3dBN)qTZ+XjT68BC5d7>`F5<8nVs&?xSt?t`m z`ItC{tadcv%XGTbG*u1H;`gu7iW6T24yX(=Ub|1^HB4n|b#9c}@Hk^)UgHiUUBm15 zs|=^T(uNZVdex=2ie|b+r>30OHnCEwc}tcIliWpOO6QEp3$OdjH;?qyn zdrUZgja(zX>O{!FxPsi8QLl(;$<^X~neTeON>iKpt6r#gn$$;&NF)N8UsU0xyy-Jm zq4woH$;A3p!8 zmfY9w%|I=;*z`Pv`t?=G>>Fwq7Q+W}*q-}vvI!ro$AYL|``SIBena1Rq|z(HJ6vKs zGE<=$0AJw#*Wc6Y2YNfiQsc*mvJgtD#@>BGy&dEO^L%(l&g@V!i+t|K`a|NCdNx$3 zMq}b+kKfg+3Zr`K*@zH_tA?a`P9V-6FEc*o9~D=5r%H#~tXK9jG_3y3@Nk>jVYFdb z>7!BW+MH*@gED2g=tgDlL$fz9Tb4T>ttD6BTD zHB3EQHb>gS(*8Wx$kv$G$uG6xG%RDt#R%`v88V~ZE$C2cIDI{5=JHLtLnJw#~ zitQ9T?K2*9x$+rQpiyJsL|RmP(5S*?=zFkEqC&=PsMA~fWKH+!g7F~Jxm>LML0CSM z)rS@wE~g6f7T*+e_RGrm6CPdypSF!U4bi zHu{d<{^GG-quzB&d5VdyJd@Pp(tgNL{f2&+?sOIRK85`|u1|LFGpZt!4TkusD{{|# zHVm917QB9^$4^G%N;E>kA+DLQG~;ix0_47!s!i-$Yu5Wz)r9VCSA5aoFjPz7eV$_F zs>Q+7p0h_>`Q9bxvP>$sI@x=VOQE;sdMWz(BhkuZ4>jqSTl(o@JEiIr00T=@!TEN=hm{5st@diM2z)BB6Ap3avG}w!c2SxS@M$Q#U@Z^ z>^rmbxqtgE;D3Kx)0pbdtk<=|@N6ExV^^K!nH_x4C4KAEk0;6gk3=lc2(I6Hu_=hC zSN&$|BxUP_YY1O)9$HO(BZ?SiA|@XEt(0|lZi#PE=c^D2f^hmX4|=1sjc<)hP~VGU}h6y%o%P*`-v%WH?8-rG!*C+w2R%% zOa}=(O}4N)234QeDzsWhVqS=_@yNL~S2kv5_{H2F(mEMDFIqG>bmN+P;6uw5{j}Be z?a%E2qUj$T=3F1g2R_!(5*`GwYo~LnB$y`;)eUdxCblI;*M0nW{2;I?xdNW+Ltw(i zk0Qwpd)7I^vhi9EO@bkV!%_Y8R*ceQqofTKo1tnuCd4=1Muu93G)i72MYfnTSssVi5WeetPJHipZt)r z^FWN#t$OA?G-RgEj(Bqho2*b)&asoDUOo8^YJPZKQ@YdI@u1}mezbHmqMmmGNR#h5 znfuK@;cbW6mxc>w!&4IOj6Zp0iTOm^h|J-6iGLFMMjBbML%-6m+fO17gyY)+J8oY$ zD2(5|j$tN3`o!#$Fjh*pOCdBkUNGypS1d#wC;HwUqbg6pmdV(@K2J|&T1|0sW$4{c zpYC7yCdSCj=ko~C`#+>w;A6XME{{Atsj z)T8#z;T+Gt!B3IN-Wi75iaSxcBnc*ZnX?JSz`2!TvxZ-!&r5L}?0cp+`spdXmK79i z!qci){c~=atX7MNS&>3ZCb2#b3hlJV2wUkngqDf1VC-hx3buXc%=}`DbYO12Kz+!` zwX1RVBq@k3rEu;5H@}@;%_nTxYN(|ClKiYbNlD6`@N<2*v88{(UHI%xazrMI8j;*O zL{BlWq;ryO%WUGLzr;eLe8|2R?^K@;IJSXapuEII# zU}j3DH}fNr6XcCG1?Qf&#FgH-0+RK+wUr1(Hbt*KT{bm@KC@A~st@p+w#PvUg(O<; zea$lsV-3>{BWEGIeA9Yv?>6VW20Kq~Bt&p;e*}p9tl3}rOuoBP7$_>?f^mG_MK>=w z+CgXudw9WZvFPZNP4nah-`h7@wIf*Rd=9X!IZQItPUZshy#tH6W=NRDJPU-Qe4$@s z_)JCzPwa&8Htm)NY6c+=3@g1Wt1AzqWUcu`_XjB zd`gz=&qvc(jFd?)8dXbqcJY*)fjl!qktOpMqRh#{zTz00Au?EA(lce6Zk7Ig!EC{v z&k>zU0OQm28J)#yb5u5NWvGgvQs!?P8sc2m8mioF)ylM<_;NpdgFz>V?=RG5OzDq-l2R!KY&#sMJ`Z>n1`3;&H47?m=mMiak`AK3Rj?xeI2ck`_+ zzx^PNGwPD^+LR#J>K9FT1)46wH9gf+Awthtat{6Hkt+IdZQi}% zOMSv(Wfndkp@cl(b6#Tx7?xr;XeM?juS8tSGe53m_{A8j(dusKHkZip z*??gCwqUwawk%FmlxD8NZ0t2h-6+@m~DY|zgK}5oT~nM2k$Ee zf-6)i4L*~dBictr>3XW0#Z;Cs)qRL)!AYc`6`Nu=B3)vv4<>}0I8)l$vQX;AeovxP zr84p&|7i;rV#k(t6(nOtClhp!jEv&co?gqSj2C?BjfP*r7WQeBE2E!4Iy;UdR+@O` zu%P3|Kv~Sz1EHa+>d#xWowOOB!c`PbO&9f2%k^Il;38|DJz82=jv2}C|X`|saIL&J~L)Q%(PfD45xoLQshnn6d6EHEQI<_Z#k!m+W@ZCN) zuP*w|%rRRz~_{}8zPmc{Dy5@8nu=pgY zH$xffFGcF(XR6eq@bqkL()3Fgf)mX--|V;#N5mUTZv!_U$-ECL}zR&QPFSZt(Qf)ew=@#Y0sJQnywxu@YeV7`VKe@%Yk zexk3UQlUtIL3GHQI5Jn{J5@2BljxwR{ErNT^1nOLer7#*8bcLW4$GFm>h478TEaK| zQ1nOTBmbJnj-z(k%UI1~4C8?~0nAy2a^egU0NV0!_5QDT z9`_%7>EGzx|1qA&!vm1Gf8u#iX!HNeKOS}{#tHsCWToKdhK5)F{g026^FP9&(0Tuj zmHm4${*6@r6EFNPYw+LUg>3hDAuA6B8#^dG&c@El_CLo9c>yH&54;e3`UhUf@fR4V z4QBW^MEH=C2S9=U1_rqSO!zNgkd2)e1b?B=@-JZUPmS|`V84HdnxRPU-@xD>i1=T? zAQ$)l2{2f!>+6N9l_GMXtp!^9%ISC-D-o&BYGPAo7iIDU$?C}R3+flcX1Y`43xt?@ zrHhJ7n}#=3_9(uZVqZrc#m|b~@e9b+R2>|^Kd$SEeP?0VueT(Y6;rS;=_inzah9_u z|I76J>ypU%^6hN%c9y}#vfE*ET=n>YfIbCjLD$Qvw)f?v>O&P2yAFqYN$QM0kYGN% zCjR2IP;t5&2VdK?^3HPmL3Yz43X}OSQw_%3-{^|!4b<{ZEoi5DK38Qo?Y6zgwLMR1 z++u70X}dAq#J$A*S>#&aWU5NZz~Jn%Ie4I{$-{lmZya-K>imGo{HGjpSKdz-tziU; zkFG!PG<&#gk#;gZpaR3Knc&)*gelXE{F2#$0P$YS8?M97dD~W_?Cyn{;nG@ zcYUXOKW3=^K`aoX#2iTt5_^)fFhPRk~frB#$2AZspT}9 zKzPOX_SNZV{&)yh4%Wnlm(;P;J0xNhdzCejy3^=1iUk$K?bqsBv_=SIlDJqhenG2U z5;^dpf~6LpdMhU)uq1a9Io3*vdgk{~j!)ZVaqB;L&JiAOIu{che)`4vsa{%7VlKq; zJH?s91mQ7mLHn~z!w`C^*k5qn1@ZV|qO!;s;$qH4Vse9*nGK%Y6FB)5!Jl6eV@XS5 zs`a__@%34TU_BU=$CYHna@dD!^nitnfbFALqv~;%A`+`6#gZbCLZcY4_ZMTPA8MfE z&c(`C*-5+<^p`iun}89cq(>AIqai>hT8(g)#~B>H!RfaEs)GRyyVYU zt~i}}>M>j_YLv(1JVb4DVyMo{mZKOaHKfV-D*e^tSEUNO1vy5;%tQx~o}YHAe=%Q5 zk+T^Isyut};?ZHzryMVgpJ((!oJ6r5Bp8DxR#Ms&Hl-|aT%_haTRJ1hX$R}G+QdXSWr{tn= z>Eb%$KI0l$Mmy^oixeboBjMQC3_4#aa~ zFWlN4SW=l?#ltca+1s+{%n3^^LYhUTiSdVaw!UyYa{Ux-%$(zrC6M6~slhSs0#8Wp z{EG1R8zxJJg-;HT?d)CT=HE(N)+JHkt1#xoZjffTRV6mLyK2)p?QLjfbM9EYR~F|l zpO#al8}WFjNQcPpvO{e`=Pl21euchf-gT`hk`wWUK8fru5KkIQd;)#^cUJ_x%GW#f`t`mx-A!d z`MPtv-^u%!n~du=nLg6K;8&BMAkDi(9M}&lD-9y-C<7}r>!Ej<*elXwA4SVnkWu$5 ziQO-8xk|sidGg5mrpGYfU(D|MH2YjAc3YPs?$v@;P00xAELw<_zMhs|a!HSpvW8{B zJbTHzqx2(pK^Zb~4HWvu0{DD$&>nL8ks>K6X;EQ)q1*GpFUq)(sUiWxY*GP!fk~Jf zmwxP+gT-&I2hJWZ<^8KFLR8OXH2()>ZvoZD*FNeNFJ8PjgaF0e-3eOUtps;>_u|35 z#frOoTcE+ASaA#Pu7%6*`^)*xJ?GwY?^-jNS(80`&tzu5D|zzFenB;Am_2idE1Yzh z%=6Acq4p=-(aU+_nQPyrgMs;Og0h_{CSS*6uIwz?DA*~#jc3_W8YCT0$qcj6gF7SesGk9iJZM<2&t_v#g_XtxXV=ccBjwt zPL+9HiXK#G6l=->T=xL{96RF|M!NmE0CVfxy0?$Go`ff@8(ZshlK63h{R90BD<{D# z)$l{;p{c-8l*Spsc}jp1xuz6LOAFp=h?p8e{@MOV1il}FZFmV&vc7C>^``@M#DeP_ zdr?l&X&8+muvFN0*jLy-jMm--XBp98L(Llpnf(QCXC$1)0>&2C?@lX}B9}6;6}`ev z#9|oHtn!_zD{<~g+7d!PMAfcz>@KRzKGGHm7Ap=xf2@pp+hkT?R7dIK%|L>OA+w$==3P5OH^I6K&Yg8hx{$wjen8Eybu-9e3K_y_7flq=WgqgS_wp`}E;mb!Mhvl406%T9=RyTsM+;rTH11S)T|8`2|NH8VB#l z7c%@S15DXj zjBE8moBNY*Bk_!4u+M}Y`}?mm2E)i+30w$&t1yV3cn_7yw=hXb!eo>l9jXkJCxQCr z=J7U=Uy6>RK+P`LA9mGTdpb%4~^nX;+GUiI=IhL$D)ciFnJ)C6v>hM{X%6u%2s3(2Cf7 zhvt>-7webvIQfpd3w#$+%eN>YW02owYhkC7JL)b$$`D4lBNJD3`z6s(HP;>Op5(-A zqA{q{krySs;W8$^J>sg+YPFxoF9htY><;g;g(f;+zkb55giO?n4WyYTr1uW%O0vR!|1W zr+w7ZX1oW3L=0%QW=*dz-ld<`Bp=dx@N4yOXj#T;C?RvI(DzW^A64@}6aWy_>SNpZ z``3K(CBY%5fZ=v03E1~&zPno3KJ4>1#0 z0(5k|*f|OUii*3*V~38?84^l!+MxVf~$)d@|D`cDQ! z>^R4iR@Mm@zJUNaz*!($s)2b9fM4&PgFySpJs)aPk*j1JULwli=_c=4VH{<+rpZ+= zM*4}#Q?yw+Io+_5f$5m2s^i-u7mInsRa_-H3qN`XtCoV+yaNA$iw>TH^aZZg_xE<` z&OmCt5*fE!?o{0hC@0zNUMWj7q#H$SoKY3wR??QGYi)0f1TF z>+hBWZ-dlY_k0aSMav7eKYt4O=%QiYTf~nm^f6J*D7I3NurjS=Q5QIuDla=f1flx% z@b_7_6I-XqQ5_&qxAv70<-h<{P%H1NpVU|d76(>^3|)Fz*mp=!$&pEqAn^WEay<_x z-PF#v9t;QeFSPi#fD`u%MMnbr&C|%C4|BaV*1KhJPOQP-RD6cP%+F%Dj^XAp#B9YM zR+TuEzdtc9h&u$6k|n(5%J&z>7x~HiW=$M-=9;;<{ubVg%?&~b*Cr>1D+*tGle^=h zpEn2af7b>70^~ptV<3Bai!1bo`<}MthH#Y?y${tYb>$w37RjTj=8MYW+v=8hBy45Q zm{5%-U0Ss!85t_vukI-_qb<79A3p3g#VqjV=$bZuoZCnZ4V}$a4c>_JL^*xCLuwN# z>eFlVNrL3mEn(nnd#_5%PG_7cNyLZ?QEI;otnr{NJ@V zT6VN_9S8{I`%cJf@EMtyy3;(3mjO%fgPGpJL?cfg%k-9c`OkFLwDS3Bvt0^}Y?_4C~okVW>2j0s6*wxkG3xK6) z9V$Y5;{#R4mVb!}cm6n?7|#Wd0WR~64LPglPH&yS&+MU;18?mmdzF8ZK6p5*Fc|%) z-tg3S9CN($TDCWK>5VjC zSzcQbr&}afc}<7;_#oX)^(%L$;_RcO{zk<7a36m5{V-IVT!KT_6u)6k-+@;shEpP& zDi^}0>N{qC%FssO$f9WxDilqYytFNmm7=;CRaE}NYl(y@DpHm)Dm;0Edc|j*oMO7pcWc&-#hnjq0gHW z-lJwqt-pmXd3wo-vR~Uk?7fjehsY&%qNcE}sKr5?U;A{CcwC6=H#gWcLX@n2!V1^L z&Q*C}kQ(aax<^}TbW8r7;MJxk|4ZDwOrSCdaF6{tWAPG5$H#X1&Z2?A0*cuA7oS!a z*zOKbZt1EfK?)E`sE4gdC!A$TQ9UUl58qy#(Izi_?@lpb)a zvK^At@ffMj6KXkqVzb04#QERQsQ;4MxjDH0+jQsn-y2*12fa*DTj!(XN2b@ovu!@P z+WfCL>L14Ssyk^qX*t@w8fF$$y#Fwu*De1hmA#r*|1FhqaJ+Jr|E5rU9Iv_;rvMe# zYZ5u0f7CBdUaEhV+hoA3tJNv)N@V*iyZXTZ3wm8suc{yMA<@(pm!~I(R^;rH@?iD@p{2yli*E(K- z;A=bosuzu4fc>?e*K_1}ZR^$d`)B`u&Wq=jmhtev*89q$c==w>=ar`MzItq2ukZhB zeihcZUTrpB?$_gFf3?aux&9{`?_VGKf6=x71WvE{^ZxTf*t=V}zFrV}Gj|JVi+^3D ze}CM-D0RefiT$0-?E@O?w2Y}fehK0`3`7Qa?%0aso3c+Gmo6pks0#e2MIs_u${t0lmj|Ol{3;Ug-fb#0(E-3jvBac zC?%)Xw3~jl&iHp$M%-#Oj~ocgh2Cx#fnP_SkZgE;K^clhJng2O}ZQKf}Iqo{KgyYO!C<=_KiU}fd&T3E&C6UI?aKiLXy{6Q8xazYj7j&;5@!z0o+ z`akkTOlOSIEf8;l*LL7*;mbjU-Ylb3Q3?IIrY4nf0=59A*iQ*s@+tzAG&_hW6?8Pn zNb00&ed4wdLpX$TgD4h+@(pPWG$83Y#k28~aG>{FoV8FTOvX(EDxR!prx48-oHh7H z!R2Tt7yIt!@LO*oyT+gI3*!qLKA1w^dTz)JQ+Ej zt-%?c4aQ%F^C+S7(M2}WV;Jnaq}D&kaz4f2Ro5|eMqqgC3PdPxw2TPROv&y2E~|~^ zW~j-iL&xzePma?E!a;BSyW~C&f0(KNPMZH=%l}Q9d0wN!zmaAx&i{!t zzlNfJ`t<)G&Hvi)fA9GJsr>$zg386i$Ntax*!;i8fthX}|FnSxK2f(dtF>ENdI3ch z*M{69KPn(f1O036yQDW}CR(2F5H*(x?2POxn^~K+n6#zD-!5t|M$Ih}+8I(=&(Q3Ly=%Iogi2 zPqhc+ef6^$Cvl&hPmm>>vJP8n)_^}E=5arb>IMy297?xY!cqUt#r z0Vq`l3y|_2FVp-;!^V}|HEOW%c<{Y8j}G#m9v0a@rSFyC@9^0!Z3g@uLtyUmcwD^l z`0Sjp73@K1Y~W%oPp5cem?NM2+_b?YPBi(!CFup=Tlhv1HGi)w-dLHtZMeqnl(Kuf zn^G`iiS$`H6glv;F=*k1MmE29qdrIM!THE?%jEjkJMK-?P3+tKQ_4pI>QWT3hu1Mk z;)AwT6-;1tru}8Ia%pynBnVCDFcMHrI*bv7CGT1S)RA{>2R_m+&<`1dSX01>z)sqB zO^6ZXC>0z9TmmjI4iSQQ6&pYhMPR!wBo?^vVF(#Cr`VtZX@*oKgPVbsKs|;bG*G%? zgA#-ql&;Vq4HK9>N4l0J?x0Xk$RQX+Qu-7s3qUq(ue29@j@mFfb70 z5Amiwm)}h9r;X9TjRJat>>;|e_41$6`++fPxM4tFkUPYhc1pfbp-^6umfA*=9!Q)4 zR)RD_{DDJJ5PzznM@Sd_P!RCG75D;y6+oV(AYQaSa*$rd3qH`e{Dlta88Ea2dD4Tl z0(~?fVxS8-&@_CCELu|D75x`o7i_bu@444Hd zkZwqo_C5y42D&f@8K=UUfMO6>6y!-9LIk|;0Zu4hh=6*Sh8RH4ijY(J3kQ&j!bL7{ z4Fb!7_)9^oXnkZMc#t>+S3`(7M3`kr4U{0y2^ca1Mbi2xKj3D0TTq(r2YT6$7mPmhO|K~@(p?r zWr!hQXaHge9Fhg`$~PE8JRntR;CNsqtsd==0!UlF!4P5r!2mUALTDk(>EMqLI1nMI z;UgpvL@3W`0ucr6fH-v^0+7-4Vgm>(NRW0vsn`g@3(}{xNiM#B+n8>~lNe4)Q+2;tCf%b>C=2g@p zhi{36I?(7fI{(y$GeER|?HQy1}6eV?y5+&RE++NC05}j_1Uwod3>}J>sPYy{Ffoer zjwp$Rf*kK1!L1BpI1(5YivJEbi37rcGK_}yHV!;017+)@2Xw;=1Aepg(e^Ra2q6BE zDOL{F3YIbCLT5*1|7?d0=L^@7ye{nD=?VOUKKb^eOff)G^Ua1hJ!0G?&|N8f9PLiN zFKTlU1#s09e2e%b*Viitj}9ohGNB2%4-Wb~DTdg^v`+4Fd(lUIE4Q`cz z6(Dudt}DX@;ewt&NCv(8x)wVbgzhcXceuHDir7U52x402gAZg`*8vQ%$?Orq8!seQI!Wvi{1@D`>P6 z>Uv$U8JsGrw|d_^k}&mpQt&T1|Gn$|`F;%wytA^lo9N(Lw7bv2QAqxJeNylg_-Dc1 zfBtSVnBY1WwB5+S-J-S0^%3=B77%^yMDkbZGum9t+hp*1+h%0gsoYl$FUqt|2v>l^ z;6(T}4BipfxhR+v(NJvDD5lGVZ_@~=O}meWX4T`Z5zBf7j+2-qifq?=`@J-pRgyje zcvhm;Q=;2rygIM`PGk@(Uh64Nok3p-yeH}(2Gc0-D^Hh4UN{4}uFCUNmQAx9nc>$= z131q0!Lf+;__q%4>WsS<1YI8W;q0V`XW_L`bs0<}g16z*W0bIC_zW0LW1v@`&i5qF zWf0&AW9E%M^pu$hTZ9DP!#^NiC3vet|8(JCYaA})VB6KX$qLWE@s$-G2wnhyT7zqF zurW{<86?T!7iGXp(Yt7dcI4;FZ>)*W4NppyGJine$1<^;oO?$aA&whb7WyCJ5$eOWsTJVf=xiZmJ?-)kpa+%F7XjJB? z)6hQPv=YTSeZWn^$7aI+NJWv(Mx`n)DSi^2V#6ROZYa!UAk0NaK}et)^Y(4*V*?@> z?JX+w-8(WDjE$@ioL`*5LH+e)>v-z~>p1I#>$rUU1DIGiHbka)rUa%q3}b*2kTeO> zd&E7YJ!yq%`Es1(iZ?QE2*lO#ZnSXdwr*yVIUcj2dS{7Ll#-(2%a751*cX)0Ug{Ug zCo$@mzGg3+K$HtF=o*#PyB8g(iSw~T_zQ*)`UM?KwdFclY>`*``OUoute4t}>cnIH zl&V%DF!szTxHs|%?LraCRz)Od+?+0F4EMb33xZkL0WP?ps^5b?Da#s%KE1yH)D*}r zCsySE#1LQL?$FoQT8;62Ry#N@L=GdNUe$**-<(f_Hhk7X{7E6%{4o=|RBNIB(+w;a zm?JmP#&q&zD`p`Yda9zNt!Oc1ACmcm%|)F4cyTPLNNUwAwpd}CRnq$opCB0O<#w{v z4ig|zst4Hz$q9cw6c2++Bu z-0MA<40&dHWIOHMoZSAqGg5wkF}b~xaoYJbx!v0%y!|`t=Jxb>&CA;9#o}-0<1N|Z z%1fN9h~78D4BNfltI3+cG1iHiyv~`R#v$U*4c)}AitLcsinuE z8=oa?d&IpJh$$oB*n!Bp4F_`UbaFltu==l0<5f%B6F~@RZ;%e>kV!9&!0w^}oZT-^w zG8y`O@6r7?AjsCkM0NMSeu=4sGhT9=5LliiRu-?C7RQ~5)buBw8~LI%U{UU;Mn z$-&dDzv_xFYd1SBwY|KZ#e$X9j)pN_z3PeoO+`pI=C$miEQ7#ik-ngUlTepwAm@UJ zEmgqE&qyD|$o=@A?UxHy6<+6cCxd3cd%cJH%oN0gyfYcc&?7}wx?^STIt(1C8yh;J z8h5ZU#4>sQi1@z&4nPs#4hbI7G$N;rcpfb#E1eidCRzx2(%lEWoibt$7c=Y&67mF$ zKEu7EI~xA>5(AcR^+$QWGw+`k`A@&qQZN%39n1yBgW*C4l47YysY$60sb8PL zpthr`q^_i5qGqBRp&p@FM=2)%jcgOqB%V#(NX13XAdgp!!-??wO%_%ba(UE@ge^5slC3;JF|IlsLsY7S z0yS;YCwUwoDnmHF_yx635+aZYf)FFsK+PeKj~fzBt(T-E{}uv=DhdBitRLlEQurNT zKaz_i**l?r7_i!;Uh&n)C*li2m@>40Dk%I3?!pfmB()mc zg>_F3#1rmX1_oy&hXd*S6q>5A^;ypDCC5eb? zElL>so&wgHq>=Oz>W^~a00lwalAI)ZQ16jpdQh$;yd*WK3zP#oom7-WlY|2`gR;XI zpeaeUQu)DTp~6`ABrpzWQIZ^$NrXSr1v_jcDT)ePk|KB+`yLl23MHbxl{^XYhr9Uq zxZ^4WB2<;@U1h6+S1t>w%2=ycN zBb6U@hQy7yrxa_%QRq?lQLufKeVBcOeTaReeW)RP`+HX$Fbx<1ECMD6vw$(dJYWJa zEf@(b044^%ZNP6pYlsWuLicow<>VfK#%8lBMs*!pniB+Dy7~waTO<0qp zKGhg_?B&eyA?V={^I0|M`^&~Dk#0}i@p}OHX}mlq94hRJvmk-BzIF{Gbfx!6c*?E< z%m=SXQ2-N@YLm?_94_E}uDd#)gOCT=rX2x<*ZfmBeAIZ9D!If#h@2J8$}#PD)U9;B ztd>7G&RfwppLqj!!_ECQcL8*PwOaQK`8TS9yyw(|I^^{#Gj(;Y2yJ;m)142GzY0i# zuDUQrX}ZB2dXni7>W0uN>F4=>{tD-I<7f}J2juuzbJbL%m_$vw7E7A^xNp}qfND2H z*wYIQvAQxmJ0PtkW7XR&d3zHr#0U97Ex>FGayKa>(*>PAqk)DCBj&g3!vrve)KM*% z?KtprrX92yCYZX236pijQ#hKI;!`daly{^nkPrw;Use!}UvpheTY@^kJY7)^ej35t zV2&3^GqG%tN7m@S!il{Z%c>OOd1hYzSRB~So_zPh_sI7YKH0*ce88_ZRvoEUNg8#w z!?w~^NtDOrUAxk*`pKOWkb@DNL!CdR(ykx&mDhEu5UcWGvdt+;F?dlRlNIGAb!3`D z9j}$zDzspPei~=wXZ7y3#%m<`Y$0Ij+kyQ6$u<-0Bz3tBWcIZET>j*ELEF?9bvD#A z{3qi(l3(y;`T4DCX+V;RU#5SAKY_*h2Q#I@XdgtEM98nn7E1akDBhh{f~v*lK?>}! z3l=$K#wnSQRPW-zr{7bQ{qpQaG06g2feAWX$VKbdcfrQPnTzaxa`3=uV1}2M9gSYdSpzlvTt3GbKSEDB<2S?UUb`CcA zJY2Ig{6%IDO%EN5*$2NNn}tu(jw-@JxM%$2o|Ow=?mZ9vc(jq0GM?2IfMBjJM-`}F zNbs{Be-ADW(>Lj=_X&WC<0tN)YMyHfhJtJsXbkg4Nu0FmrF0tVa!n!J_(U=WTT>g; zEa!V4jFyGo1S%@}vkUr1UQ&$npiW1o-~3 zw@u)yf;l6qri>cq#!bGTQr=`Y_OSsnSOEWEgEanCO z@6iIDDm#j~ODh>ZtgXDEv8kUj^w@!xGZNa>^7?cA^Hqj|>{T`W$|$pQVJJ@TJ?Wy* zH(7on`MtaghM&hG8Zgk0N^$AZ^;&}wO`18>&`^lnIfZ5<7Thzf`YMbLyT`H4pF90v zZD3ZZu*s}R&0SwPi-ejm53X+SOyTQA^qqyQ!r>%9w7%j%7?rWGn3(>grCfY&Eie+G zs#RL;0bq@_3>xSZzjV4wncnqPSi9gTq`zbcRSgYX@6~1N*4thqM>5S*#I;gIP15SU z`KF96Z)>5SRUI{=kQ}&;2L42&A$8j4Y~z1eM=z>qO^V|!?|3+PkMKl@vm|(ALqW%+ z{q2;5)nTB!3{Z`t_q~n?Jn{J!E0-z_ua>grb7X=hgpm`vYW`5aL8ja2$lm~H zLcVr0a;L&4Rl$@D2s;mH4y#JtOxOsWKliwkg1hKUYDHHc>4)CbiO#y0CFdK}XV3zl`0XJ{<9r+h(K+1-}Z-)0mrvh)w+Q-P-h@%a1gd{mM9iMM|eM*v` zZ;Z|Vu-~hYbcNq63&ot?&*krXF!TzR>(%Mqy#0>Tl19^?ah9sM=6>H5ADy?Qv=x(<(UXq3)RwvjDqK)+gpuO^NQR+frHu55pa z_cGP<hI_X*MOTI%*K=jJW2uh!^AQL?#?n~fAK4p-x$Ke%E+;_AJdli_WJunL;mbfW_^F3 z-LDSa3^6mu-3sPyMc-e{suFj9JPVm+TJTE>;GB@+E5=*zB>!pIrS|DF<20YRP}|JV zD$>hJaE2AAcZ_0MNa11AZ<41WW5=ZA*S49Rd5ML+&HB$EV3*{LAG%D5qTZ9He_^EI zJs5K!uxd=prz&dcp9%F(?_X2O*%7ADbF+X;c4}d$*DTQaAv}F_y+Q%S{Eys82GPdk z0M$KX8GmXpuU_>Kat%Dz+12quL9|hhGQ~U@aN&VH>+5qa^Q>mF-;5TR!#UyshgiKO z*>H?n9s!0iq@|i{n~wb4cDT2MC#FNW^F*;i;))$RKXdL!Z1VN#&BW(q<82V`^bu~M z2SO#(=paMB#WPj2vJ({Y@McIG9ZHoCT90McbcHsaX$R)J%&LCq8mm57hfWkc9R6eeF2uI*Aji|F-jWRKMR>U(fp6DJ7XR zGaB6FI5>&69CK5X>dmA%3(T~6Pk7im@Up9{Z7`JoTvie18%pe4l^n z-MlJx@A=sL{0KiALLVj+c8WM*X^*`fo?vO5JDYpm=xlU%xncD)C|*;BP}VV8nBd+9 z)@{w`YjAbHx1j5Iw0Tr`M6~tj)|0zbPzTJ1`mqN&NXSZfes4uL{?J9jRvGclJnBKi z8%fi;f}%Y~D&0R!ccbSr+2(5hTQ>_!)9Ie_~Rl7Uz-Fh=0D7B5fdxFraA2} z(6rk9z#_U>+>7LRSXw_nXv|FRcN|yQbA`e3_n5S{d&B!y#wx&Oo0P3r%~&>%gPtf+ zdFoK4!`m}PJujoIysFAtv!W9B+&<^WR)wuC>Fn_5xC`}J>g!_l~keOs>1l2E#|}1qTa$vMw8`>!s+P6LxUl57}Ln^ znCro>!0P6JJM8M?BFiya%9WCXQZ|QSwO&)s4+N8Iu{M~XLXV&56e%tWnOs1{I z)Gc8DHy`MM$&Br{6dMa45$y z{6|MWKPLtwQL4cRI*Aq4?}=COYy33YPOh_KbvFU&ZPB!>pJHBD3tu1Gyen4`#ZmO) z0uo-Mx@+^gd+{)vn(fRPnVUiWQPQ8hJQN5u|~S%Dm8y^^ZhlAz1M0a;@lMo;SyfZ~e@tNUrh5uCxlyPsEH{Ys!#^P{!`8 zIQ{gnYcs0#X0-Q_UT4mzsf`6ZSwRxEU|N~k>2UJHU~Wdcw#NZ9k+90xxmv7|CLE-^U)G?NMnP$z6;hqy~LYCfh9Z zfa?=YZH>4Ny)l=@?5!KB2}dH%BP^=c6Dz$jYU08{aIwcvoc-Xg!8dDyp){(@;yy4Ctj|T51nefC_nvP~1RPbfU5gquZ=W9) z2zI@kA72l5Ck9;2imFYZpxaAnhoNQu&MQCI-tN7s6 z)?_{f+l0^XdW5W!m?~%K0+zj6^8XHBOYKGp&T5LA5hsCU=jvC{oY_s3YZdUD>F_8Y;u(id8 z7g#fkp59f%5n0)ksWZ4^aD_Og>L}P^FUISnEE!$!hUc?mLe%=#SK#{Rs}PZ~80SS``)=&f$jtZ*2;(7QMv3dAp{ZZ- z*kHE?8sQAKECfNSwWYoI`vpJMp5t~tuv1CF9qC_$RmOt8Sy^eKzd_f6dfw#p$3qPR zk8u;f(Ug$k-*-0c*Q0kY!Vb>~3ycaC=M6+v%j3<{>6XljutMC8=;;ULQXxxyUwy{(L#;n#`>or5(--4cXZbycO{kcV z0%EevSMrmVq62S0dPkHZVE4^=BJZUs+NoCFBzpA(f_MW`0z18|-liE%+_z^T*vetp z(bnW-=QZNbHBW+!63BLP8@aBr1O57tm}VfJ9f)oYqEtnjr7IE=pN`g`kfJedhjmw7 z5^{lS1nlMe%{DeiM}#q_z>RNX^W8SBx?gXt=R#{e~JsU~K6)8fwND_=KR zAkH!vyPajXV)r5Ogh`w;a?6l&_SCJ7VT>FF1;$KHCU|)R9PuA$DC%157TR?{EZmwy zuCzaw+!DN{^Klnlz+hfh_QbnKS|_4yeN`MoYly9 zkvf0KR

gHNOy~f)$#$YnAhalw5zRQ*F$lDR?_+xNVTh*T*QX5pvz)yBW8`;(!s) zn|P#Z;hl*kTqWs~xxZspniIAE#=`M1Zso!8341xvN+ zF3bJv^O7dI#!0$Q_s2^oM(MT7e@hypGT3}!V%V#f8yX`-f!75lOHX4tyUcf^jfbrT zUF}xitJh5D80~X*r=B@CzWs6Im&+J`n`9ME!-)k`PB4*^b+1CZ%hOX5`Den4O&cHo zborGz?un*9E7s*DTE$9&cTBFUJDtHYv5cz0tmM)+@bY_GP1dKTiivNYK>-tavR=&4 zj-wwK2P{VmZ8o0Yr>BW92}I5Y1P+&6XM}WCTieaLzzs%U%C4VFOEOrp{nXMNj>4X9 zW(&iuY@NhWcfSi=E@-AR>=DddsP6yw;F+4nT zy{k{6-?lH?8F=2bv+KF%f6treO<(KQp9{|k^0bTU{8C@%<|fzki~YGjBJESB*yhYY zHX&blETQPdYS?XHpY5Pn&y3%Evq3CrcH{9$+r`6<)3UJDGs}eipXX((kIFqmO@sDi zTn8Y`E|u1AKN4`Qs^RpiE%Yn5Elft5bVp{iW1lG5Ox<=v>FB2k-pdtlzokX+iGaU2 zAB;icf#_}aJ+1$l!&feO;`->6pK$#!O@U;|ft{D7xt!Y0j!z$TFWf9Wkb0Yo^pmb% zG)PxjHRi8lL83P81#Zk$zpnY5kLwR&jzqo2C4=QQ(+4cQ0d_vmjEUD2dK~m@qINa` zO%?oWjnkIzDeba|>?^wd)~C-l0`{^wpd?cq={hX7pA-MOHXMAPnX8!Vh%m^uswof} zGgsstWuSZsP)YY)If@VLcJ%kOe-OT!I>xh_nJtUWgN@;V=?u9&`Asc3(ocOBGm6r63D5ujtPvL=C%GIb z=mX4@P3$xGwA9pB$S?76CeOA8Sa{0X**FZ_e{EC~rPrVvw9RW)F+tejRGq-ELEm}& z*gD-!bZ|*b?{)`FkxJmga<|l|QWA|wq1y$wmL_``v@Oype?k9tBIx09c@h8QuO z;uI-hUZ*v_G^fQkldCb+>T(&`GrUjgWRhe!Grc}%gRf;Zm!oJ9SvosfIb4<*G_@1E zqw_m9!=DfTD-2Ku=SHl;&Ru3}t-l;l$n-AKq`QP^XS{f@T~O+TIBFc7k`J z@K^zcZU=_;fXuWL946D6o{<%yHDy5++G&#PPZ7Uh{gKKZGdlNI*Q_=_6V_eAiSoCAC@Xsf~lN*t+zk_un3m6gPA3?+%G90uIWC6~GP~V^Q z!2LjHS8ez$Mh8Ns-O)Ezi!pAjvTk~ZAyY;~*d|i|x;{s#sVAenl}=wXOEZ1kFo&nb z5_+sr+peQOjy$ZhE;p31wZBN#`etJ_d*jUYhh$G%;Pi*6b2Rh>s?0|wz7k{Ly$=3j z%*n)p_Pg*6Cg7zj+4VmLvpm{VWB zQu9l>wV#&_h0zz6T7DkVh1)GN7O1I5x@_>T51ua6`GtZxlU9&N>_*Jvj#5IZh0swW zpPi1$+ms2gmThCgACFJWpXRRj-a)hQ$>lizdg1}A52+G^Sbuq6wW`X~7?~H# zXl)AYP5ps1`l`2k1}BHwlSFf>8>+;PSi4}cBup>%{OIw{(#tzIiCCErhy0{;J1>2S z8RZzseDunS_c!CEcL3*`9rKXLSxWFjMw@H~*R(`69f*;pbk$0Y$b$^!@?xxkzp8bt zbJY`VuV9f}<`hK===j+s2g~PG&dUXX;CqpvCM_J^TERv@sELH!XOWrA zUIf?Tc8$OFwITcdFI@%)36DQxAyJ^3iG_@E^&$+4VIgfl?R4s}HuV~{F6-kT1Ge1` ztg4QfTi=_$Ka^@Dtf|!Z*$VB~akSCnV&dd?+DRvdi$zM$vvAQca9rxhYHan~ZqXX6 z#IhJ@#7Ouel9`4x;YxIEQoo+s+gAR?;kk;?`j*1_c6ZT1m*Hm|Y`>Jw_VaA9A0`#$ zd9Z)DxmitZw%S7=BTShymYFz0HbvJy z?_{h^q{?Xma%iR-%*)FiR@>pD_MEa>5m9b!Ucpmq5#+Wd?vT{eez;(_lpAA%lryz! z&7GNLci6TaoLDbLI5R-*Rztix`83>!B~Ea_qIs9+N_8o~f|+OLV{aXUjhV(Gn^2gF9XsDh+M*YD z-ecl8?*t0P`m+qDV+>5E6ncgg+95*(hT6Vko|Z^zi?iLD@wviRqVs(NC{LFQXin%` ziI8?5WL;}a*vg4iGG9+@+M`jINZOTDMQ#WtC7O`A;3di(sB3>!I?>H(7iBB1|Egyz z2XAD3kWJG9NL9kSbZ_$`Xjd_6R|(zqRy<<`-aGT1kgs%J_G?wM2=(x<_dL`<7VPX6pjokb z`}20UrZ7XUO}9E^D5>#>6*~@CH^E!J(BLl8u=30A@_N+TA59e6Gk1CC-AhYFXk1FG zXatw0zLU{eG5%Q=^QnO31b>61>5f5muxlA#hiQ<)*0AK++iAa#Tg~ePaSva=fAsKm zYNJXuAZYG3WhZTBeMCogS9WG5u5stj-1oQWGS|@Y*G@p<4OuyEwTZVh|I*;K$k@&G zGY?PXk;qcCE`oiQbj-zy|5|FqQCHWr*$|?{^`5$ zdYC3yqJ?Qm=732N!-}wV;xyB*w4CLHNkn5!ja|sCCBllpN5(SQLf^OI0-r0Aajnd` zOD$yEJAHS^5tiaAJV4e%b@|EF>trW$5;b-%x1TYI4^lA2vxUS95g<_T1t7^yAKik}Y4*f%WLH63VG(`2J|*DQ5BY#ltb9Xv$U80ebv*?tl7;zs{$ z-0Cqk?B-HsBw294u{W*!0J_}JJ~CgxnE!~VaC|yoypFBDn%z*invlc03+5K+G%Or4Xz-?A`yo&CYQORT9w%+?F3sm2&+`g)igI@ zYU1wgM^Bh#g<%P98?B}slpSUAj_LT*HFn49j^7Eoca2yQUdb=7BOhp}3DuN|hs z!i1d2bW&y6YL@lkVb4Et9fo(@&S;vnvs+#jeC2Duj0*+$$uHH14;dY;wk$j0eoFYw zY?L-GG3BtUn)0$FC1&=`C)O(}KeT^gs9ce;n!vOIsNI88Ev=#&NPGAw zZ=zRa)@@mVCWS<7YQpi(W4$Hh$8suhrXAaKrH4^(!)VaRp^@6R=k~8f?1G0G9!83R z#Caosi!vo#Nbg3Oeq<_Y=c~}E(*E6($z1x5iay~Y81n+Tkh)?R9UJYIK)svkq9_nM zMu${JG_z;ccSZ#?)XeNgb-u+a1R-QOj!3tbpP$C8lR+zgiv@>Bc;rUqmFyRd(<07QZ&!sJ|@IZj+Zh-;emc zkg>ddU)$Z}=U^_sLYxR=X8gYhyT|BA+IDaF6Ki7Iw(U%8+cqZFB#A9>GWFO@evr6Zs!we~42p}|zHbzr6}V=-1f7vI;hT&Vvr~r{Ith2Xo4crM zk=n37Eyom;A))AO}ZWf7DYSE{>=txaer3(_SMq*0SE61EW=nlH&3FCd@6 zInJEKV7ebPLRh#2K}U~5-0^gklaHU>RT5L?1RdWWX{X&^@(yM$RS1NM_f{PXNm*#^ z{l%D}SogBhZP{9E^VmAqRCFVW=(5m~nH2h5o?tOA|FN;bZL%Rpt3~L&F%a6Qp%alb zi1jjlQ~bRCoZNb9($88C&3DxB%Nx{4-!ovgr7zTJt6_O1H9VNLx-6D4u}CcB&DkD!iVS?Cw37)#R8t%%2Dl~3ab@gIaZ|QvuPdV?V zIR?SaC;{@duATYxM@y$~2ncEp^Jlw%HLaE@yp@ZCfcy6+!H?{37-aAi6?0~0Z)}a( zyD{<)(|;C-SNr@LgJ^Es5{t@0FcRl(6C)oDfp;jJD?-nRg&~D#?v2TIbSF{z+@-cpvGRy^@4}~Q^|bv&!p5!()*7B3JKhr z>l4`P@TT`YcJF$T9GkwAmg#PQ))?Qd8_mA4j?!~X8TgaJfz#3aqQELg5#gQ#fXTpq zJB$S6R(s;D#c?^*oDe}H%++XdB7D$jb0ST#Cw9u@@4aVqD({^*lNrJUVI4 zFNnit?wYZ8O);ki zRqTSvqqa1;FAUn-rkjU=V2bZ-c_qQ?JtIr8dEi7f{z98}AX5eg=5tq$W#Q??bh-^` ze-nexEYMJO;CbvR`E`_Jm;EIq&h<2l)MgP_OKqw2SPx?87Bz}K_s>=3Sqmd&+x_R4 ziZ7mDAL$J?bX9ux!tr}oDMh0(Coa~PQQ+A1*jo1Tlc_%T4pEofr`mUq-3&1u{f`zZ zPAifPHs)L(MqkU#G(Q?0uqupjlU7>c_oEYnJt+}@6>!>2QCKyO zXwE6b2B@e4{R;PJZajy67Pay>vGTH#MJ5rB^6+fAKvag?YnGJ#Yy0>I;8dUGQ2+ zhjbTqd3o;QvT8CHtdOG3t$LeXZ5KcfY$rJ+JKKsMC#68^mhgv&&oo}v^#_O)Ai>e^Vd(vZeLG79UE`rwZqGipQnO{$hd+u>^~GR=l@*H(jJay z>DNk?+rMuC=_zz89V4B_ggnu3D#$8g?W>AmyxWyQJQoAz(IX;B=PJttXQ z1)X7d?(vu^$+MTmmu(vwZh{L{S@rAV8R&=w84%fgpbe>^Gf$3UPy#NAe<{$BZ6+%t zf;G`)7`gYH!(3iNu+=*WAj*gpojr`mvR3uO^S z2L=tU2Mk*}Ks!YCh#v>{@hk`k!*W`!s_AY$u#9=p7kckK`pWA1mAY;Zne{)a%mflb z=&>m?Fc#69RROokcdyFsYfI+0TYy!Dk9@AwDf zuZwrCF|5m4B0DO3LuP{~DnKp9$3Y&(dmEn!FwqrLM=?0*cRNRJT_g88ySbDx?&XZ} zP{>a)yADChIg@FergU-@d|05x^T{R@3v=IX)n7fGh^!XQncS1 z6AElSza(rRZmmW2CE{HmHRVYcwH9UHNf$;SOy&QK$DGU+;G5w~|M^AlS=Dq`>e`%UT{dC+XizF%{&M$rQTo6(!jU(a=|k7%(c_VAkuRxm?u7k za-}=eAeHCBbl@Iqi6!i%n>W{If(?SNy!_7JfwQ=aiS$P)!ln4i)P8em-lAg5nG+}N zD)p19cE#Al(Sx^}aJUWK=^H)UV95_i+bP~!0o5N9o|Pq|Cj4pyItx>KK}G}(5>D6K zK_-B-53=~n(}2zgkgX#~NK)JKag^X!o1m=JZvZL(}_0%XnHhVC7iV8#K%Ne2%W+^+oYE~qM3$PB*zRuWo!-Rg8`%v*U*vzA*M zf+a>DautcTRw=O`HkY@D>j8ddDN#lj@Nf9@zS9cGFCQ>?*g*>e7%!wgm_qh|5C|BR zOIYEd7Y3u;but?GpBi3`KSh|!w8=_?A5IHB5z=dlR$jtVf6KYlTSX~l0!G4TsA8jM zGpQyT=44FRhLRf3w*D`l&oA{ZJcHu&Km;*>Utk zdPCHow_FKP_E*O{ni^t1m4ID@5dB-LQX`PdoKE-JU|_OF%LbVtrDy7dtu`-tKHH4- zhlLK~Kta5cj0%!<&h23!bk!#BLj86X#`c9LEj^&p2ms|kE zVD68P;(Wd$ZPr>;GQ_+a#73M&UJLV%@28CD2cCn+B29k~3=h!CBhheZa;(-dEa8y_H_xj)&t<#PTLaVj~-`RJ;}{^ zLrPa|9glthoocl1A~o2}9XJqVdCGo64sEe2y6dyiUNT$B_Y^X7ljc?GrWoUW<_;e= zTkb&2&Nb~BfR}1Vu8GIaN!Zy}h%cnw?}eV|{winoKHnSDyb4$u@;jysz0V>d-fzNL zsf3>iM1rsVu`l+E!j_$bd`glv^@Ni?2=eULToBtVHd`C7+(vpE*Gkj_mgbo47LqeH zhZsD$N%?Y*^4SoBQ+jFoI>x~vf>(}2jq2r0l7^{RR)l@eOkRwjh2uV;e}$kRS^2>? ziaq_UR`n{iTotJcQ)nC%rG4N=F)FqBx6zNY91 zU(UD!(QCB%lDg*fJ4gRA?IzM0%Q5|)2G9C=BOssI#2Ygbck2gXABBJ(E&|>OtH}yS z4&$*RL&U+ZZo0n39d|+VFTs9hH_l17IxRw_SeL72YzrViS3$llZ#iqZZPP>39G2$* z>~Nt%+M_h#?XybyoK?!rv(bLN<2!qMP-o2CSut{hU;>xE0DbfP=|evAQoGq(5${iq zo;!-F1^l199~9+nlX$};(1*_xE#l?U5kZWna;a7MN=i=wit~AM-1+BnIawaDoB4h* z-aI0qFdS1?y!Nnvc1r5@>=Zb6TXX0NC4W#~K2H=y!;{N$G^HdriY&9jF6jM)Ql}6dfUauVlfU`bfCq^R^3C4K3 zQcuL&ecsJNdOCZeJB>sB^IdMC@B@RdE~FoUo;vu)kpiaVDrcp?tjU9^XApCWc*%FG z%F9~JH*vOA_p0T^;+FQRc3<@lWQ9k~n@)8vRca;kZB9sAiSSJyNZj#BLw?nHRDGMl z7o(zq13ZI6AmIngX+g4I9A*p|icuCTm|*)gp_h2UHs-I30q($(%-po+kst-XIB}f81ki4(yl&OJ`FdKLX7*D9I}zVgnOvdyp;o>|q1 z^Z){+tI{Cm(^~}s97ZKi8`OTzkGI>r1DVG8Wb~hAt@PG%272a&-8J#MbR|~ z`dEZM2MafYf{k1g+IWI<)o&w=ejRpe4t_6l4_s!k_6R*>|x~b25 zS%!O#Ik^2LT2I@7xbI|Vr_67d4ylz$HN&esv_x+4T|;VmMf>Cc+Lx{M`wKkk@=?xC_ljq8MW?*{Yt)9*yGpK;UW>eP))5 z*gDjdIJ;TH({CiN+SAq9jUi>vl0z=dt5G_Q{hc>K}_C=3A^!3vdM)_fs>*4O9=H%_#^qpfs3)4&S^)Z zfuDnmm2Pqao5~+Q6f8Puo0Exjm7)GyPrADj7CaP{_ivvN?>70ufEbO_X z*lD)?iPoKlw~p5c?wVj-wVwW=d4ELd)rlVDxkpcr(g7Gk>~6m-R*#ZWKW1m!ANY#J zae9%ufQTZwD{!^Bx3X&?vfLDa${Fw7nG1ffsJkszYq?;WrsIU-W9JW<2&TE(V|*?} z^qXEF4I(CIinuIA3iiVs%EWbI(d!vpD95Fx{c5wZ8~b;4??SHGwszuudMnzROS*@P zsFq3_hSa%iShXqx=2V>--EAEIpvR?Z0&P`QR;CX2%~m@lvwXXUsFR9hhB7KF-A*dO z`Bl~$BB%;woF5J?kY@~9J#>O4bhb+00}PpLIk9bTLgLS@Ap8u3_tw?QQOYxBlIbkv zik$1q;Xw0tL+akkgXNqo)B}1v4$70rXNrpxz3{C?5$M`A>YhB2dN8zeM$?yC9L0~W zFf`6EG)_|98FY`oGY<7+9(HH?eYWM)^lDQyuWW-8L8Vgr{EQGCF;)mzU(h|n=rK^r zpKqtc4L0P0OwBg*SkO#ASpjQ=-i}6 z5Z=PYCo|1;eL#mxAlRa=0+NRpFFN!@wyvDzs2&GEr%c#z6u&5aObAzQE!a7!fN(eM5z1J+3OH8stea5aqeli@RdLUS2NU;MY zRd@k!88%ffGo9IB_@_V3=)R%dsgrF&YHTMIngJY>ck$6N1G@@hYb_s436x~z@> z?Vr=w)@mUZPJV-Ap!ErCGgtQ(|jH1T>B*V+$1si=NnW zOa8S76EH4N&>ReMT}F35vds?K!M zlc&OT-7f-cj?W>M=oqNa*XCGFHh`Dsk~UYf>B$>lNkv^588j*M$5BHauPL-+a5|%b z6|Z;cxTmCQ8d1J&h2DoznuSj47Bi!f4C1^OLV8}=Ifin^7knOl@y6A<$^e-cQ<{{8 z7-Jr@y11%1@6?>}e%Psgzx|=RO3;Edv0z%Bh5Kg^Rob@|L2ba0fTm{=kl)DYMEp`79T@G$7Kv@j{C$1 z1Gaw5?6&5DYJbQ>^wGYlkE83+9|qo6eI1*7&jX|0j2AWVlW*gk3~e8wP{jvqjB9@Q z?KPA$wuIBKH)jU^vIqMBD|w8Gk-{6KiIJ~+wnvM5{j_^iT=Z-=V-G{{on$x65}6j_ zK@Zn}=tLS+5|)I+=&<`S!JuyQix@-PczFD*4GOI5cAaMWTM7%bBEJLT6DZmUQq zb&QYwT!mhu^f7oOi(|8V^_`2Z{YVwwE!u?Y^;n&>7;pN~&GJoP{zUwoycT1A+B6lz zlvzP5qAcm_67#9aACX#Yw62iYvXM}0k4hHIPf1_xrH83)e*@!?`dmEL#8j37iu5ek zhZ#$L8iSEK>6~iQZm?p_?s+6Hm`c$)gM6dOA1eIbxHcq z^K$8SUVdR$0`%shCJP)jdNt|i$*~xAmF{NhQx&_R_RCyRIn=PSgqDG=X$tRRGm9e&TwNnZj7;0jOpjczlg-5;-xh(U z5A=e_nvW`p97m{oPN62s+KX*dnuLXc!N&D(Nl6V-8r(}d+;ciFrOH_>6KOfAtd!qk z=~UmVvw14tED| zAc|?6Z)5JYr4VDQwUBGI)x+y9*>mvc+jiBNb(a=$2E5Bl=D4LjTSr|_z8g_e^x=*x z)Wz%hyrG?+g&bKn;nJ$8K*yGqOF^a##<_J)U0us&<#EMPU3brSx63KRBaD25S1R`C zwxmcaedLLe0&bY}3@KD%oFjZy`(KB0GQ2+hm8hCyLLa;Ny`?iIIT;`eG? zG`A==e&@%Mqku6SIV~%FXm2>h-gMLP9j3uo*>TuVv!n7FQ;gQ@NQya|45`$8+X6=3 z7yxlbix4-gECQs@3XPuIa~;u)!$?kwiEk3(A83=)DZXmn{VRv4E;}d99bBf96NRQr zM?O|In3asZ`P)_$~hrk^0e z(sLNF^mf6`A?jEGAnBv|0my-tM4(EA#l?UyD2>Tp_=DX(^5XDtDbe{-G6LB!2?cR} zXH|s|N0qRxHwz@$%(1h^7s`{jt;e1N+{~A6*BaKZA9=~2-E-0yXDDp;=qmjeAy3j) zL?aL=m3b0|7?BWWuqpFZC{#=@M4wVPU$LYQ!nG`tUdV}%yKT=Ktw^7O^FW?!GU$vD z)$iWoDm<1-y(LR%J=|GIikV@?(T(=rHcUEH;;OyO_=;IhQ8p^1I2Lb}HseFjlyaVI zhbZo@OR#n`_1JHtea@!d$EvEd_b2pSKx;1}dT$cm%wD6MTAPT8yo9vuORXm6+)W%S z+^U-D`_`A{c$Nmu%VpQ4M{$=IUAQO9L#gToeKl&W3x-hqR+&`t`(lQtCWGT^O+8ni zv?-fFu+=m5>y5+%rrbI2Wpa|3@db#Nsdfq?iR}rfw zfkdc?hZ(_vMCnl-(Z(xAJr%_S)XtZGCL`vk)5%eL$kjBXe&8ZJ{iZotlAuKi5-)^~ zd=2!Ye20Ba=#~2*1GRq)%;4*o>UR+fpnsKj;$U+o{%vEJREBiDHm(c%`Msguk&?O< zl}YW6>=Ku`ntTy=YyiNNZVyx6YZ^z!s9z?`Of(Liuso4MR>$%JPY3Z)7OglnrYj;p zHdntb@o@%Es9spi^K;eKvuiN?Vj8Ki)wD{pU;ZmsIWG<$%8G($9MZ%HIxcZ7#fV~1 z_4{Xu9GRz~x*MY7zQcGb((QrL!(qcp?J9O(F7vHfvf)~qNrFrPa`(%onA?Fak>9~d zrkA%Y%<|WNJgE3tCeE%4H+Om;WACrmccC(@^@E3w9=f5r@zAEle|cY+6;!kJ32;0e z`8OP2Pj~n3MTl|ja9Wq?6pBu%#zVkW%mNt-K&ZMuNG+f`Ap18GR>tRb4-r2=jVu2L zZ}Q)YE(;SUJJ-L($uHFQe-bDE7h6|DLR(Hl>fg35y@-RY>3^`3|CNda`-MaPcVPGb zCXE080J~gYndkrM>vFI%efhF%Up6i$>_4_H_m@4(`j1J=`sL1YemS~d&h0-7+m|`} z&-y=JF58zg`ycOrIkaEn{=qNVn7@qLuaNtH*14EjVOjq%b-%X6$@Y)W`!(jDaUB0K zo`dttxBbWL{eNs+uCKZO!@>P$%$KM871{pJ!p6k~%l4In#PPLV)-Ugt;~yLMi}d_w zKUlsD+<(u(@ijjOD>D(-*S`I~DfC|vQ2+J0|J%L&zX@6Yn|G;~Fkzn|gckh7H%K@4 zW1pFIC>#)%FZR1rQfJ`;?3z_mlAS0D)9=mih(fN2VRCnGvn{jVjAJS^rJt+P{?0dT z)kIrQFlr;7Vb0Ym!DiM9tHJKFJx@He(H$uZanm&R<=P*+y ztsY*UMXp=>%liNxkWjiP0+h*%*D$q<6imX{sfsO~rR%%Z#wD|zp=G_Mu|gS^ zsFORMskus!V!`z`(5(NVqyL>u#?JJgR`$QT?%%BH|CUzvKezFJXJlb@=-Iy%=PyR~ zpRlrj7+FqcJt9V_pI@@JiLkwet=U&8GYY$ydktJgL$C~{wMway} z~c3}dF#GP z8<&ljGOuSuikFcT!2l+MqY%Z_!bF&13M)0{WBjPH(W|px*k$=1CS0jqq+QYIyRg)x zZFi#6RIHq$9U$vB)ixyy%}}McdG+>(aew?M+hvOXDf7s4(k1(d+X5KkikpJWz9R0S zT)#iw$1e67?gQ(%6W_oA&g2l($!GDBJ~uhgc?3=@eP>m{!T87LUR30CO0Uk=k@^s3 zol=c9_uE|~D3ilr8t&wf&%iei(}h#=v^OR7?w#?z2G%b}Ho#|_H^_pa|8-2`+hL7X z3!O{OBZ_g?!F*?Cm&tl;3=-^$HVzTwEowdyo?+m3qq@``nUSEkWm4C(I!cQ zCv2xfArNAy&!3lY-*dY5H^*XnnCn$u@^&p0HDKts5&245?e^hCR9guB_m&4o5sQ*p zILS*l3id-=8I=WU{KMXKMlr#j`3`77pJ{;>&^fzZ3QSMTKm(*05+A^|MJ0FCH8I9t zm>n>{6Y{3$Gb!+s=pAH7a|4!i2BkI9G6WnnK+Z z%%z63Gvt~ENDKWLea!-tfo3Fm#{||O`vqUioDUcQFp;t)o*}v)xhr2>02avE!q3=1 zVn7LUw#YLzFacV@Fe8@q9UP8Olq>9-3J3xq21p=1lJZ5>A{sG@z#1uxsz9R2lJF(o zCW>AP1E7#DC6=SF_YzCKV2e;kMu8_%fH%^mcq`nFJb?H5hLB`ErWS681E7O!-&gD) z-U>t5gS-O;%}Jyi>xeJlP7h><)*$sE+@S%`B3JhoyYKF$km`oig6()ir|g2I5a~wM z;_r|Eu1IuKoi)DcF$gV(I0^tgp>>cae{ew=!2!(y4agkCnZdQlMqofMfGe~O@*+{C zd>c(3>XtAUsu4O68@hzFQcN?z5eMiDFoTvsP9~xg#}dO5(gf5Z4O0VB{P zB48r?Kr#RiARHPM$yy~w8_NJ*gk1z(gj@tdgj)n#gjysVoKKE>cUl@0ZV!48`Uf;K zKm-5^@BvVXfD>odD*%RWkwmC@GmhJ+-@tznVL&3iLkoa2214~kZ;LpRa3P&TbAZ!! z_XRpIndHIrg&7J>lO7{ALgRq52f_-6+Z_^7)hfmii6S9+fFlHuAi*3{iQpK4?|=iH z04C4|MDOrBE+&6e{h)DfO_EW&;zy#Z6_3;%|{nOuOWeF z&^1PLH6)#Z(I1=~kN>JVW0TL|sSlhT4d@;*uBdBJ;N9~&W9`Wil3z@gKgJHW&(@T3 zv-t4}vCp=Z&y33QS&p%Z0tsKhH5X6;iQr$=$Km@eg8*1e*jN1YuL3k^#3T-`g)+zx z^MKwFwtWQwydk@hzGLllC!#kBba9$xeZT%HKjlUF;f<_%d8k%evn&uti%}lj?U+P0 z@MmK6HvEcnAWe%=lFrR0z2=?y_y&O8LF^esz!41iKz$}_dB@}Pt_`q3svctSCA85G zX0H)zB{9o_n7SB9|5ss>nwxAeq)f+!uX~eUw;p#n_1roW~-oe2&M~o>W*Y* z9H7ArLlj;pcs2i&XjlK_O+WctLiB+)n-G_+ICWD(bPo`;{v( zO>C*sn@C(PtVmjllnBiY9g&z^r_C@4_0C#oBuOapidu+B)P;EdPG8tU+y!yM8Mg_z zhb&wQpsx}3LZ97>^8-C$mi#(m*b{ZbLHf9mV~)c6=pNIOD zc?~7PD}pOR3sePQ0L%dt09^o)o4~JaiECmHWIkwnaLho|zJUHl(&d2+qgK*5c_AKz z6Rbi;A;`awCoYoy!S~pO3dFvkx?iWmfan8x_EYE;tdKO$5Ano3ZWH2?Amkpj@QC;W zbk-%#58*^ha!TkGyHGSPJA@7V1bO?}PZCjd70QaSFa!Ik4Y}|}9pHkjUL0izJA)2g zLadccN++rrQ44Ow3?u@61CjuOp+BJEkPb=5MUO?lZVrxQg35!+!zB(tl2FDZMI#;6V(&Dn5n4rkmwt~OJi9W9TgE%NKDbx9fyu3(uBu1|}c zEu&L*8?!Exm-PHVj4=%PrZ!iVsg(}Shf+?Z39Twz?{-dtmFE-j&H6g;z1e774*iZB zGb+XPCa0|-QQVij8Ok4XL+Ro4YaIGbzZ-geV{8k()bZ9!7n^J?^_Cj^p4thF^agqC zcT3KWdC#ac>SHE#IcsBrw7r)O4q9qglg@0bG5Xu%U3Yo>?9NWJ**i3z#x=XV9$qTk ztNe_c&fga_st*YQY2|gR=9_&=9>%k`vZY2$)~T3>oFOvx7U{yfFrj9>%x$Yf2b5*H|%6 zr^4Nz?$0kTPa>vH)4IIXr=p(kvriN^v9FHPnmYdE>xKDQ`-Br5Ie3p_J^bra1{J1_QX|p;q5xLF0 zP6C%IneAAeY@*hpbinqg3BRQh{)mKkqFm zCMQp)P^W6|fhYM0KA%BVviyqWBc$C*AkAS5Fo2=*0X~R78)|@; z4|MN@F8s<+)G2TD+sluhhExqPUVIgAHN;jwz)gSl*BO6(^nVonoZ5WY*fziyd)FErzu&BG6{KZ1cZk z`OsTQ6B`qh+8>Y~KCYX2ntO`wMijQ%XpOfavcM#vlo6LJ=r>@HF-Rbg{#34}fIQIx zd*W+!OGJnz*u&YD>bFE6b4jVSIgyh_zTz?aW;4uF|2tETBY}P&uRbYLVv=%; z95X>?xM<&7DDrOSmBpERMXH)eBfetzX=Gs^$C7-WR_}>dd(B|9!qEhY#9JGw=l}ci-(+l&TBrv`+ge z0cu$Eno;U84sAu|5oC%fX9E5=<%r)cG&Tv;uMDUW6QmD+yys^!$4Bb2s3g<7EnU0CQ zu%8pk=}IpZjW1>)8j@FkauEt33MiHDmV}Jvz#o9}s8V*Jf_@gAnS1FsiV(b?+6ZdO=nIPU3rhT{ zBoT}oO0gTveg~5yCGwjp(if?vY1%>tt7Sp$rMUOSDX*nDkX8)*o>Sazl;7M%*kVIi zr&bCnvrML5`^4%kw2fThEMYdlbC_{TU|Jc9&>Q2`=d`J)zdY!D%HA#%7~B!r`qWC2 zKc-hsj6ywhxoVGL^pO~kjH!Y(H7z9_HjkBQti|r~XJ5Qz*6Qp!Tai=!vZ7#_{Vw3k zmnujYGM45y;c*Vyk5Ux?HNaTiXlilW)jz|}!2MaJYC88D+sj3>JBg6p&_Bla- zjeJ9E%G9RBHk^V7R;gG=D7w6)SVw58;8jS$2k?Bw!k}3qK*Wb;hy)PxIH)t~UfIGv zK*aaYx|aN;7b}dyVCf^`L$-9Qw1g!)5z6IWabAqTp8%5^{P8nZo1L7^EUmmX>|CjA zM;TsYwN1LZLmvaOWD9ZbMh|kL)g<~=jsHjqJIED;`G?c!X;izhCi;4dW?II zG=%xRzFU`rEPgOf%06Qu^5vV>xjZs6@|lb?*!g6_3hyF;n}yEf^V+VE6SAZFD?DW|(>fyA%W&^5IV~0f!y2-6ATlVu6sP8qoFg%UdDu#42^q)( z6qF$nEjG6K0|jaxyOc~Rav40tv}CKNnMTb$bTS7ey-K{Pq10iSX+Cx~1hUm;A67rl z@G9)YDZkLD#MX)mvf~v!C3<1(1F2O8sBAdf)v;bX-wB~3BgBg?!Lch;f-zxS4dZ-A zWvmmmsU$`xSWOV2VeUOz)P+n_Lz2<5)DJGo>9MvKfgyS-=*C`&(mDitta#^fmKcsj z2(PjX(~3SF7WU-@>I`J&>Z(g3=$89C_xAlE5FyTZZWCXWT-5pmU*MCE z$xtUa})-)u@I&+7O&0^%XeOs;%km>MN!Sm<=i}v-t-PXSf_(q(sI$Ec!K3op28Pddt%fhYf z`vmek&Tdvum?rL4rWYh-v=${%sopfKpGCBp0Lttu6F^yu@V)oDOXW9=Uj+6DbY|z= zEe?Y*0RkLtVepzTJ#12<4`d_3r+pSFHu2@t^+Sa`^TVUKnx0_EV_Iz_^Qna#umjkj zS&b${;9js|)hVr3gp#2FQi7NW-=kyPo*)rBLf-a`8E0X#A!~@8R)VBcr6cXDqRke zU`VfSGVqq<@8G|{K(%He@?+Xn2cIFy$ArjXf14>Qx3O}<;9?M}+o(yI;o$%b+^S+k zgRU!%!SZYSWZGm{(6={23UR#9uq8&v!G!Qz7lvLzds2jsv=(ye&CTI|ujDldv)9YZ z)F}5}6mqO{K#vmUXoyOcQoJ!WCZU@DsEiiXEH@-w`nK`n_%wYSE0U20Is6w5GkAHo z7d)XA@t0vjH@*RbuDl%IS~4bHFJ+(G_oz`K3S_&d$CY3c?3Sa0}jT5`K}*6EZ` zChFVpEb`KpH2;`oQ-5;ezaS{v{=&9CnQWro0Jz^(>z@1HC4-o47N=(V(bLdK^iJ(#Y?Ty;lT{`@s(QB*s;7>CF8Bbl))jj1NpZ8o+Q zt_|_!B2ePF$~N@e^`$BTJEFuH$2o@PN~27zVnO=BA|1DJaP{ct(#c0i?T&o^R+I%U zA$yk1;e>@2UNUZF$y%wLO~ppuTHa` zuFYZE=%Z@Ce<9@$DU$Jt!Trc7rNFV?Wlt~VWJJ3yYve!~b)}NsTq!2dLUdwdV@o}6 zVjprQRm6BtaMn0-#CD~?cc&NE(0}K3prsEV zd)$Tf1&HW(dm2Z($NJP*jixJJW z;a8H1YH~UbjZ#*v)e?v=lU0=peZtl_P zA*hvR;P=6A2XR77gvNKY)3F$2z6ZsnTMJ8Wc>f(VmzH@g{Xq!`Z2K*3@bbe&ZCkm_rylIRX;g9 zl{&i!2Vm!-4nfUj>Em2v`qFsW6sJTa0R6|zyVj8#WqUf!_wst|lDRGMGdSCmLf#ps z+aEXcC>pOc-ZyB)bLLThrKX2Pm>L*GUyJz29=V?AsK}u3!57d+6lqz+pp*|8Ky-&B zXk^IZlfKUkQY&#wkgQ0aAM8Lrf}nZ7AjO^5`8R>&sx9Y>LMR%Qmj|}dTQnNGWLbe! z3_{)EA9d#L9MI_Wzemfh8MOJCU)ed>)o8EI?W>P?3KyxI?%9oL^Kd?)ksoHyt<-nY z=c!pa14Jj|=1iyHR?W9wPVn7TZnyaoICnWbNeB$`WKbi5aX2;`j5lJ{)gbS{xMQ&B z5aJ8@VdBj}`MWVr?!lQ|ID@p^oHT+0-AqgOvGSdi$}$d;q4x>$H>j039!1I(iZnzq4Cv+ zzAU(}WXqQUHFldTh{^u*9mY`Rfc_X#GFNGGX0z(omF`b&F zM%)S5DZ3y2-GF7+8kxkCBnv6>o1q|u>fv0wM%JO2vHon%5JZtQO?9#qF&V`5HEtCF zPIRDJ+MvKqY(;m4+bhZd6fH6y3_F5)Ty(uDU+#=tpoYXiffpSiI$2%?Yu@pNDobX( zK;o&tY2`c-F}k|})B0QB>&%SREisROPAcyhwUuMBOX?6v`XJT<0=5l#eE86(VTVaQ zTek9w8e2uOVVHHUGT}zqe3{wogI9D99S-%ARX|KorD|)x#`bjKl`{9OZ?{hh&xH!^ zyfe2C^b-1yg-MZI^|fOt;6y#&tk9;I^~Hn7JwjY9TKFlVGiRU^SVPF~0T8kkxLpX3 zpEV)G(U)TrRdVx$g)p^3s%axdpb_Dts|=B3&dJ2>ROA$jZ@+-%?Vu?AfQV}_;VFzVF~F%)i8{Ne@Ay>J(4o*jSpywG{m^Cfgihn{4toJC!pLzcHoN+g%7 zD}r0+&rdZ=&(|T${JnwwO@YArrMQ{|pNCD0VBtbGxO51sm5CF;Vpz0S6>$m$obz^r zwm>E>!mq^Oe@b4^31yEh1!k9%)|gz0H8mFwWJ{LrB81w6>9jM}s1_*o4U&@8SSPC# zxZxEJ_KrOt&-jKQaaN&G3b{1>4NSi#aG&>7W&^*vA=NI3 zNMYtb;{N+aw-F3CT5+GZ3PdI4qtA>hS?W_sdl12jPa7t?Wx6%;SFMRSdq^g(COL>q z0XZk_PG5nDTBq;@WT?jxPAy<9CyCQ9AO%^ zEy+=fB=12@d~QZlV`=<7nUKvTQWY{?XTiAYVfrkMP=KZ5AIB$U}V_MxhZVxozI$rV-C<=htXl8jB6 zF!UI{sFlu4E2>Sh!Z)#O`nevoBAmmsWSECKgj?h@7c98a413SefWubw(#W>iWTfJ* zH)03<}>h4qQDKuA)ZJU%B;w`s`h^T&vMU*%>)6fXC-!EHM4MT4(|Kr!f% zK5uNzDRUIrEdEbz+LaaHJrdzjU(T2`WV#whN9@$Iw_C@AI4#MHB-!2rOcr-Hj9IXm zespbLx&aNw5&>YqqY%Tw*RUGcz+YGo!`Kwpg-c zu`C9QnVFfHnZaUax>~DypS|zy?&uTu?LJgxRz^kA{BuU+81oyG+uqZ$<4Lxi`mHNS zT*mWC3v*gD&moX?lI3>hzGmeir{w51cq z5jm~vR8oR&9=rQr(|x~Vdu0_r?8<4y22#X@8nn&Jl|5LTzu}u;K(L(7Mi)=TJyc3Gh+LYX`Kp?$EjC$C^SnRs~7c!7vo@k9sx=!%$@-Cfg7LdLdRHfb5#pgAj71&^h1U%N?frwoItuH@t>6O*pu?e^umOo-Z+7cUqY&YXiIV!1{XeGEtp4!|Fi^jRRJR!^smj;Anc zAN-gWy4Pv0uc~fq;Myd8L3ful{~=$T`32ka8G-Duj7~w-TlMwgC^TTfKuuTqWDzFI zsq;`R^M|NX@lOqTM~^$^l#P=u8u>QE2b{dGMrQi{as#F12dF~tY_%dZsC9tRaG!S$ z$X0do+WwyG_`-kzPB`MrebGD!>~oqkbdj}JUs4=0)Q~3gX~Vs z^2jtP&bms-S-PB}csVZ_x+C`E>nWq*%`DDPPM*FM0!g#b&CLOrK8Q+%AeB8Qj8pO# z)gC;v`$*-v8SznJsq+4uD9fC9DtkzGrxylu3|!x<-|Ktdz8difsE+OcWcx(lJ{;|_ zzR}Tg!WJl-kvEMW7<2uhq$*0hLV_4pRWnQ;Ro7Qn8_zNeN5{a#Ds`VoX|b{!&KrMW zA7>M%NMfZ;QMXgXop(wsG}}H7r>VrHeD*P%NvA%X3mNHXGO$pjgl{QE#qG5%)h?{O z7%HW@i_>{jKFL&8lgd42vfTh^N334cg5*oxUY(=uQwV$zEUHgl5s3i~WmyBgjYp{sC6{D0y zc)&p9+lMu-R{j9SvW?(n-nLaAb{tzbnmF!MWhSa*{|?O|rb5lRdy zOk3(&LBYZ+8PCB%VKgpWVFHiLNU2X^U}C;pKgRQR!61Vjn>aZdy!1(_bmgz$8gi=@ z_{>1Nfqrj}g-rfd)g+z7EWdIq>*cZiYz1uzfvvz-nySY5YIVX)9N+X+!hV%)%4f|b zyqwARgl0XDgoZffWL#@#)I8@MMZkur-7bQMzoY3rr*AHuXTE*C57p}i#x9qREg~Qk&Rf9u|F1S`e}6cGxpK( z*pSPWbCKdIVnpl_Ti|yVd~xahyma_z{n^_|_R?d%Ot)?yFI?1xz9DyxAbf?E;1-N- zBjiE77q=dE#r9Nb{fg-1u=b0z$u(yLXrr_!z8+A)NaK*rwIZ6I>q~37*`P4zI#N)r zheS0(D23}GYB;k!r}18DOPDy-K_Fl;YIDzI9|}cdm)>qL-kI%$_6YhU?|Op(_v_+x zl)=E~E+>l4b?gp4@WivsT1xF`E_q&cN_Cr>t`au;338`C&zD_t4u#w;o?Q8oF5e^7 zIxnZV-sQe#Bz3lR>k#Pq`v0QWKg9SKB zL#ZnI5qZ_@V0mLFoH34B7r4JH(w{~nFq}epCAC9D;K4P8k9;&<{F)2kMFUbaX5%rx zKw($1YLi__bzd+H;d(CeRey8ZooPU6SB~#>7LIc3X+!v#cAv>t{a*GWZ&3TybGY#J zRg6ppRXz$o0(1UeVS;US99Ct@>SpzpJ9J&MaeulzEpusOrs?WF*ngJeQ0`c|Yt6l9 z_=sR=0#e7=H-v^A3KX=aLb+?CF=WtK7>@>I|S-Xi9~#1cmE3X$XL%Qx-w;jx?%V%5;- z55_qwY4#efV;>u@oHjAsxm6N}@&>(|-JPPxMw=eU`!V!8@V?;zl`!>5jRNZvFRNBB zt&>7?Vj^slu1W@&t}fFRoxqU5Y<1}}NSeY%Z7AzUhGT6wFtMM}Y(3%lD;`Y+dIDKE zk53F?{@6`R-*wsHA07fzsFp#-NWa^B(4chT^qJOLyr#{c-4b-Z?zNua7+CHFiI;q| zUW6{1dpk%&J?G;K4t+@m{f#y(5GlH!ZEt%Y>7I;$su7s@Wp(W|-f_en@fTB);~2Fe zPjIZIDinQw-N2Rm5*TDiEwu9@(lUerhA972VQv|86XAVJ=!;iXd$$(uljZBG$~;7UWU)8lfFojdqPe%XaFFCY(F;sd=3CwtC(7qO(> z>JUsi7=ekdUfHV5aVg?IQ)% z>w|Q4EbCeqEwDNTbDFZIzp1P{tIm72M+@`r48*JN8IXaUS7) zkrom+FUS7C|LI<;TYQxeM1mB_5klSD4_ETN({oIn<3N4CA2U);BC)iKfU>T1r7}fp zeO1?d?BeuK*uCu$bmVkA3&uAqW54l9RUJ78w6E(};_aWlph)Vm;kygydqGCXZfz(ec5FXtl}m;)4lO?`~Te|)bg zM7d65Tf!Q)JAH=2IYq1a!UZYxtv@(WuBn#Cq%|1w7=j)l8Wdb=;}#5wi_f}cqG z+XX+!?uUeE3hdZCHI->FZdxPr$Dmra#ZDNHzFRO7JO%D#7%Yg7$<>C^DX6?C z2K$0yR?e}B0DG^U9rbpBa|MPWZjzqW<)<3vZ_57pH#>6$D~Yu-cVl3FCek{~vU;m>m_a%1k=>6cWPi^ z(WsMqQn0}!zPu~FIo274%q-GgOA>IoBhmV)MfD%CXu(Ao6;B3%)u5898mR0}KZ61| z54*1LJgXf0WjESda$=KecLlh_)RW|*{&)r}=U49(hdwk%8{OLO`2zVO3I+#rj^v2T za)r|eqE&mHa-MUmwN0t>xE05*luh*vB#4^)R9$N6?pjD_Yc}jys+zEYh^){J(SyP? zf?3IRh*&2{J0}s(CL!B?k9=f9^pNjKn{IKZo80#kzPhbDKPqaT%E{OfL@g4Jlb2Ky zb+GG$C|oCw)6wX4Jfz&W;j4$zNiN%j<8veD+!FSFLBPj8W7nF`Nfgz7Eb0n@3($tQ&TibrY{m^GLkfMR4u(dCmFCE9Q)LI(Y(B>8`?6K;WO@K?^ z?v1g`@yR3_g7Hdq+_=+z8pp*mn?p%8oo$9bt?t46Ed}!coh>T%8@z}!g*6W)9zn3hmnn+@ zvetLnuY;Mrc^=7RFHLh9*21;m9KoHs5E&YZ9f)rZ1;MD_Gz2p|5?}YOLMEDLXUA_=H0lqL}cTZVu=RJBOl> zdTM^a>tIjoo{N+Ah=BkYB!jg)1CO4&*SC)&?hU>SDOD3Hte+9eM(&tM!gF+IHW})h z+AG>BsA(V|pfN=bG{Ca-mcbRN@B9vNI}Tc)#GtfTiu4fbr&Sh^tuNTtzBktM zPqcU5*7@5f4i?L`UDc^IUaaDjXEGG#E z+vLZl(^4^Kzp&3cx@F>^0^uNW@}bevKYAWvErKlxY3sT{LrdNUmf;%QAjx|C*}lRC zI&*1e{Dh6dE;kHWD1Y2=vMVFZm?hS#UMnjb>S;2%__+zUOFwcBEPxk!yy3$9BJatdS^3SDZyBw35PBl)(8`*l;C zQvYaeP;RbB$r!4isj>C)6z=&0&=aZBlaDw!eB|CR1BU#+Bt0SxMWY)eXLBFRBE+A+@(OgfB1N<99S5IdJA4~xwy;k`u z565|L&8Oygt4${OrJ40lYziE#l4t?8Lu2%K1bm(;KbN{f_NBCcwkEWbPCnI+A*HKo zYZEX-FFLOQgt9aYwAo>t2&l~n3ASIEqf<9~9Um^WHHd*xt@A!V4*kZM1}W1WN*`C} zK{lndo%r*e_S`9`P7q4EFVMl!Ny@}~p+|?{p@!6pS-6%iAgQ4Qf4G4rqV756U7L_5 zt%4Y64)4N>nXQ3*gy|uKen5)xJw6d4vbz!r2@|n3Wh2(sd~eC7%VYOi9Z3IXC7zMM z+;Df~gtY;x5`1>oqsI$LC-`Vgapd8$#$kAj2xNLtzVgv+7)AAuQ!s zZ@{qT+n9Z$j)tMeS#*6vB7Iy!M^|!tWZG)C%T}>tsg$2lvRUy3ImjN(Qq4vA*(y;T z=FrMU{IR5)j=WHQVzj>GmZpvoY9bVlyO7KmQgP^4N7N=1+6W0vjPOgbaxn=#^bvax zef{f_ zEqn3iyhvY(ydjqW(6cn zK1i7~e?Jfd)ZJzT?{dF8x0#si@K}wlQq?I`E+4iY(?+S6u_`f~Lo(&xDnrYyhhrIZ zjp&Eo*Xv##*5>)6dThCIvz)M?o{mrqC%)W#j{|8(WRqB@AwZ$u>SZ2B^Bjv6o;UMJ z8a<8q9>EikZ`eWa$=L!zaf6oC{AM*H0O7`4qz>MW#bafo&?S!6Jbb1OMmFqlRaK1? zYMLfX@LqJcv~ZSYY75mXFZYhZ>?QJxRGvbK0iK9nKTd^-O3nV}g^s6V8JcVoVSvjK z()!&2uFK-Ab{c>?p2VAJ9b&B*0&WZa=++>TH-g#cJ6beF=RP^72DHa!Kn9Jr(X8Ei^y0b4 zV`Orz&C1Np_rk@1ND4Ez#|7hqgpwjP3`H~k@o?qGTHLC6L@18$e#S7bRxzNQot=w^ zih;}?FGiHCsoP;Q>PpzDdf6O`_3*}q)$xa_Mv7(#B#a8ip+*XDr1(CnGYUr8cGF5R z3UFI|yZZebm$oZlN(OV$XSdN*A(1J{0Qx9k()O_fc@gwxd4S5C6qirAQ69Af6h4yf z+y0_krP9RyAJ#eYS)QDC$)4DoIF$RRBrieKqNOAO!S1WGlX+$nqrUCNBd2>9=F9+z z65e-|7inm+%Wtrk(f+UVDk|cE$3eu@eML^7Y75Hs(0%cyN_h=L=aVm^y_eB*{3CBE zMG^vn)vAF@MMgT_`+g>JwuFi=uEcf9bH7&iuIPI;gNiRm6NUDBMqp>Cd}Ycpd?2Mn z2hkX(fI)CY=OGtnQ5Sf?&FPzuVGfo7+SAE4y3*=cruobKafS@n*j;?GB4`MebwSC3 z7QBY;Y`JitNC6Vlglov7h`^!p~&Y~QoCQlN(SberCNlBAK! z#~;|Q_obSzaq~=CmWtAj?t#)qsguKI*fB}&Cj`Yn`n!XmQTSuJP+B)|*crpsHj~}) zX$%K1ONGlByNtopkc7a0);~BKPhfb=A%a*Qor(HE5#n{51TXPxv9@xYH8~2I;Iju( zS>f!x7=%BXzz9{h!Hecxk6aoLzd7^xGDR095r2(M7FK%sNah6OucYWcfIkR27~Bu8 z4~L!5D_w1TtoYI#FPxvVmXYGTtk&5|ct0(V7H_4FM;gD8II-N|G+=Zi+NtlVv&rs` zoFdPhCV7=-YuR2>BnP}Ams+(CXrLG!Le7Y1$RS3sYOZLE0;>l^J-?3CY#1t3?#;9w z>9+iCyACYlc=7K^34=t5)n$2Lc~np6>dU8m>=msQODq@(C>f0N&Z0G+M5;mZ@|>Q0L;IjOeR)#Hpc&hn*S^E{7*FXe?iTq z)wR^6RcJ))?VRmxjGX@~X3q6_EM-ClReLo%D-(NDGeQo=|Jd?BVCH}ErT+&rXJca| z{Lc^UU(0_*wEy1EzoDfZf8p|pnuHfeZeAU}GrRz=1N-@6(zXk&mf5 zL_-&pAo2cWk&TYr)^%aT&20{r%}1{BVu6PZGs_JXMlkc_^hTJ)DzZSvom1Yg5SkX( zKdr}UxobkkdOOT@y#yd);y_plOtqX>wl8gZ!w~cDf67t3)f7G{> z8l~IC=cv_$IsZnF7e|3*meRNb!#nYe`>&<&_YM2&CNi^f{i6^*dEbAs@Bb`>e`W{#MqvjW%5=!@n2QU#SeA1@#~3G7}*a6XPdg{lC`N zc&euds@i7A_*zjheh&ol<>_YfluF#Efu z#UJ&r9*c7dWq>*v`hlGv@4D<)&brq>3}uCBh)ua07Ppy4ka6BM8I0K!vU&rpSd$p$|U?hW8KrIYTg5yyg4{(t zbb=a1Jye49h}(#I)R7y)s#F4c{FT*cOpew^JYoKvO zH5mdlg3OUwkr%=*bD$Z*F595Lih0Nfu@Upq^jITjA{!+_D@0r-K+}k667`@G^V0X2 z5c5*^I3fQEuTl*nBjzRU=_RJc*d{{8M>C3rh7;2y?D0Y7C0=9b(M3K8bI5|O6>HK6 z@)m893DP27qw0}GHV9!m-f7#PFy_ zMbI4KjG{r^#PBFY3PCo=HKJ7!F;v3eknNC&M1neqZ;`jTk-Z|OWP)6fXGH%{2_qsa zi~Z@y{1!iQOtLvxvbQ`IZogNXLb@3^(@uHTuWnqtwBYY5C=}hQZxp(p=$2OYsP3h%MX6{y zKFs;-nzJR<8z4Be3cqGLh}IWtn^ zi%IjBQnYAoX@05yF%Y8)o-m>k$+#2ApZh)Y!>5?vVC5IX{JzY+4;4fI_L$tTqj{kz z*BQTO00$>?r5HX%^j$%JF0VSnOXD!xNnr2wy-h{cu zxTUy-o>N@XT_SH$k0fRg^Dd+|V>`(3$b3aUMdQRq6NB_O=p)7>q$8Obp&87)NvW(@ zKny>bukds7EwYt>RaRp!0_SvS9-^eT)J6OT$r;fZdAp2P=q>vZ-Ar1ZULJ=ej}(t& zWqd&qfn1{JYu=O1sw?;866RyuvcR@LK%5=PPonua@px;=`8;Z6vf=<$d1{iM z#Ou;6!K%_YQaD0R@wyxL@?+=YdE2BE#X+jF)Wqmyp%j2;>OF`|*%* z*OCBbdaBR_xe?-JsgyjBy!Sl6JY31ecxFl8b`907=jWITk~{J{;(YH)tPpyuyiC>Q zzy#@IdFiWxtqeVBax%|VvK$w>;xvxbPH%TIS0Yz(vbYMO;=EER7}7M35)QX$3O%vo zz52bJYw5Vex!5^>G>IQ%^%QzSKZxo{#Nu-#F_cNE0?_1PZ^SXS>VD@TDKies!V;%Q zipTlq#hAv48oLPqMy$Zfw?H`&C;SDvv@Ur#!Q=xsyDGOZ9bnVJDY_=NI5{ExKm`H_ zx*oQ8F6lU7J3tHqE4z>#uz;Wj0fAd^mt2qVz%N+=3SEO+xnVhvZ-xYqNIqZg10K4) z4H1_RHXts(H+Ji6$yLF%fw%?WZMjzAEhX9Zy3s6wvIj19kyjzteIf8bZv)kb#O?Ch zf*9XMzrxXk(g$S-!0fu&?%i4Jne=Dd(yGGY1ylF`=3m&YVo0V5k_oQxeX!f4i(;~# zBZ#6;07o6O6G#gt(~WWo?SVxLiRcg2OZLEP-#ML3gEbI4>cs zLy{kiOhddNMtMTqAU>o+vJuUR_Irs0()5rj?Jn9UQZ^2wIxY{?Af~fOkOdL9SjxG+ z&$y!=GiplKne70`G&4^t1-CXHm#&f{hK?4MeSP|Lqg8kcTCP$(T3V*NOtq?)&30we zc(OdnB7*6zEV7#yA|)k-fPkQ^+?zKL5+eTVcJiCl$1Bl$2D0%nF6fJq5Kc8tk)=&~ z!W^`ojj&+pyS1qOMgZo;TFl&-14UEKYfS(Fy(uBcKnbaU@)6Ppp#xN+%g6!R2L=$p&~>=Q;_$@> zmLG&5FuKdH3eg9e|BH9`%NAc1)G6defP2^K7TqQJ62!(Ao^HS;_Y$DNzuXXR30@y8 ztxJ3g)MrcW64nFT=F5pcCMy_18Q9;=Ye?q-C6# z02KHa8^R{T;|PHzLmzj6GvY^rnRMG4!WTj03Lz9hs%4|7es@xY6!+KZrprc)1h?A) zY;xpo&1*sNm2V3j|bRrfk#z=?^_MEGim-w;%8P9SAYg?F;qWnP)Te=zxH?j;{@(&-JMP9(b4z^__~ zWfL1@?-;(I)X-&;B0aC_cgK5hZTiY@ububb;>x>L`1>=>ORILiE@_vs%)hlBc;18R z45li5)NLG+-6Ox_8QGrl`f<1iWc2u=5R1v+IX2!Vg>32Nrp_PNs{YVQ>msk_9@7Zq z2lxbq((Em=t7HXgJ*Qti{Vr{ZSjSEFZ+-@bY3*u^Rk~w~cT2QjId@kk4ZJh7M;(Za zq1flsY?n_OW_6}AIF{#3I-hzLFfr&%vR7#=l{Q@`Ih^Sy=!^Sdu-pOyU?O;KYsC(vX&UeJ-2h;nF6 zC+imr?hV;s-5Ut#4!njs;Bs=VsXa{rdwyu$eva($tvJb8;UjDQdf4}Mu5@?TIEOZN z`!`Yd0b%@f_ST2fFo3OctiY9i7CvMC^%YHDeN;)wPf9GsTtsan>&242!+A^J{6s)0*2t${+L{wWVRn11i6xn#8fZ}A3+510ES*Z~a%r7CnB zStIT1Y#wu($Y`4GiDSZk0jbRRZz3*hc~?nk=C1%4F?g|=mXf=104D#{hi4kLr?2$-|Niki6%##)t=#-Xn!SvLtn!)pXm@*6Xx+Vto81;*xhCAqYhY|ZTQUrVa;Nlnb|Dn-X*!ramdTswzuJWp@yRKFw&_Ok| z-2No3QipE~OS%i-I_Mk* zbgE0&RZ6ulzq@l%iiR{;TeHT=v{L?r)Pj3m-WKj=XOkZ6nOI=89_;C$Iyj$HelU~L z=zR(Kox%_`{9Op^gO8kh@`Ic8cpPeO{Ge!}-Sqo6Zc5naFJKZcCypaN^q}$nM9EK1 z2RC0OrGCZQTC#s-u28AmDaEpwU1Cg=DOX>oJ|}u%#54f%FFrqGpq0MfK>F5$xl0Xa zNzZLO80!#Py+WzdF-RGxb`*GdX6tuoj&+uK##lCZj#&#;hh}LZ0ArRf9}Uf9GoTVD zPjJ`a6s++#$mZLe=FO}3Jx6@s3t$y$*UyXH6Xx}!ExK|6CD%p$6@HqMZ@;sw+5@w= zQ_&?M$d;*wJ$8rf)bpEz$K9;#uAns{f|*v=P?9tTpyDjb(MXLq{XkFObR}2 zD&yw>sZRfNW>v(wRO-(6bHnq^v?QH{#Mnpviw8BgKL>zS5pKR4(9lmwGbY{L#LX;w#A zmr?GRdNl59prM7tYLr-((AFA3z?aovJ#e+hhFrRe$gyo8B4m`h3YYnd4=~6`!6P6} z2ej^ff861!;{k+K5^|6S8?@=CqW<~rtW&2LS#(3Dg1rP%8(1LZL;IK-*nLy6`eX#_ zTPB(D<3Wty@=aC9a-jzmDseX>>mE_;=ZeN@q)7^@C%V9ZdlJ2Yf@$J6i!BcF+J>zGbGylX^p&B!1k;=S`facJK@-5o37%!g*f!>Pw6XIRE&|IO zBjy$Wgs+=A-QeR38o-47eHKTiix^9>lo6XOBBW2J3p~>tDayd`d)`j`cWu)eWc9Pa)K^ePW9noC_6hxV3F9 z7HJIC79M4E+}!-^#Ki0ymVEAWmV8-eMVudbuf9K6N=M@{&!vVH8rAbLqwLbW0B?)tjDO-!fDHa4hu$7}!y;gM0w zm!Xm5CZ?vXSh+;>l=2Nwt0cbf3TJr4f%}Ak#sFBB6ye<5p57W^VPUfH;Oi?0@uZ1C zOuV$|q|TY#oczdlEaSC_I(Ys$P8q^KCaM8wYDYPg)~&a0)xInFxL%!eOm!dkM;-4W zOKkVP+TvNiz@$qOuB1=2jvC!pwCRD&+N;Lul>>vRSTEGgjWu9d@S@fJxXEPF-E^#L z8%(s6OE%DO5q6j;*vQri`HIVE;oqb+nMT**V`EcsGGb#f5-UD>w-^iFc<2geX6`R~ z5Kt4AR}ouJr)o*eY!s;{?o1R<*4j z2c%>OGU~K&HX7VcZq1(G=#AK)48oat5o03}@Xzu2n>eWo?Sw_vo30^MKM-3?g8rLm z+0P8t{}6Q9J|kKGp>{I=CBy$i?fj?8`@hKj%9>gVn!^7_?*H$jWj`}n|6TL{%Uk$c zmv?e@5wSFKB4qj0d1a0M^9R#s672sh`u~dw@DDTK|I6}k1L6Pg`}%(wApdIE{Aaf2 zXGHG*odJ?C`bn@Ni9I~S($9(+YLlLUBY}fcfqG0s9p8hQhO>ju#SxCeefUmhY5}XmQiy zuUStre_lzi)zLbBt0386X!NdmFMsq*tH~u{UZ|w2H%VepnLTL6mTxX=5d)2obCWc*7#)`+^in-Xh8g+}KPFtIDF@jGo8yFb zNgcLg+y;4rYs$E#HZ`w~QW>yTPpS{mod5Y1VJ5Jp=8*-6^FT0E2M4do5?xMWzg z6O>9K+z$eS^>Z~q7tb=s!apJRbCq0c?Bnubr;>xfFDMATTUw6INJ6pye)~@x-sVDy zVS))+s;n_lBn6QG_$GBsLX9_QCFWe%Rr?e zo{yq#_2ZPzDZ_&nm{(0mrTP7HEKcLn5Aoj`!j;P}C$sv8DWQXzyx#G*3%4SE-_Q#k zXCMa%ZtUbD{~Q159~rsqEX@Dtnf~rR{%gs^*oV)}m-U&g7O6W?fcCw#+{ z@ut_D*o+qd(>uJQ%^1vDr3Z^L@Oy6?@?H@Nt2PmFVOzq8gpC`j;nSUDk*HvL1h>Qe z13n|Fj>?7{7Hb-B=deKo6cM8C)WA?!Mbb=TT2;J9pSIqsdD(P~wBS&%>3AsFUW;8# zjLdcpHWMnA^}aoQ%k--wV* z{1BJ|>`)gZzxLf&B|4`hpYbXbInj^kE_Dz(`@OV{HJ`PoIInnLx2$HYFR%BUwrQPd zxKID0f0n{zbK7@ubq{A@UR_13pG7!hsDqnjSzXDp&+TUSa>Zq9ptDE)z7Ah#*3i&s z%HHVCp<=;OK3-;3IZ!dO=k)b1z9Z|p?@K|`bSh8$ivG;eX2s%`_2w5mBU>`WbDZ8aO z@;gX`DU=ux420#}bB1;p+Km|{3Kd}wEmKsc#~Ry<=_bk$!3k9b>~cMFJ#u<*Mec5G z$MkfMclUP>6Qzitg!U;KgLIs4#dLK~bPsfoboUTtiU3130p@_uw=5aF-q=j1j;)-hV;S!KDI8|LMu6ahPUAW zSou{LhiDM|@XH%qXd_o!|F#^! z-hp#sdlIOBcu4`Mbo5B)m!ZC%@*fJsNro^36_q=QFluL>}8 z?-D$;7l?nzIJC_Kh<5S_>LmgoI&%(fBLV>qOOx9~fEP}92alLueSkOT=|NS5NSKH~ zC}Aj+FbrGF-5bzva@!r~H@eLa=-hT-2=(S%8{b|8p6@tNgz|8%O>DCR8l9Sgd&hxp zPEFyxKY@PzMo^&!PEG#3cfg8W2f9#$pwoKG5jVi1LsM$68n9xo>SwPyK!@`+y%!zG z?%b5#OAbhKT*9hJ+7c1iRY(f4IjzOzEIV|733Ub1A2=X~!hKF6?*J0YMs$KUSCiUH z4WQw=b>z(Ig$0aq(e66nhvox+IdCTR8UrFYY4@tW_xb>CILG#8{Sn z<3<>v&YWX=4(y@yfD9lvXL`R8WT+cpR^+O^sO4ubE1(>xE$o=QX2}vt4p0%P)+d#R zNtp%t2_$z;?)&ZE`**c*&N&sqq%bZ|Sp(F7c`?=S;fX+M&ID)5T}FQqs!$X_Kkx@9 zniJ(#%p_44QM3qsC=jp=tl>0u8rq7PAo?jn9eM?P1^NKnfCG+~4wBnj2U|N^dog1~ z;UdIPP@&)eHvl_89e@G20`743IWamjIx;#jI)rT_jT8L{%nYPAEehSM2eJa^fU3X* zAR15<017qYFPU2uqJ+f?iUa8j#|`rg=>mF-IJKBvDQFPT@x>SL41Y`Y2X$&Nn`lZR zyEE_|@)_wC)^v_QJl z^RId)`jy{EPqDv&Qb^^5kXp>lOBpj>DdsRTdPyEjkBt?(lbch(hKO41%!fCU^T!nF zUS!*I^E*Pv^=_9X0nOp)TmE=a5pfV;e#4;$aL}96(6=N%o4?q!P8pvx!1Q6%;njq% zH|(>kzv=5(x%x&W)-d#ID=ADl4G|;NPPeVKb(c*M`6h8;zjJJ)jV%OuC4G+B#uhVf z8=&l~Y8+?-eX1CQ6 zY{Skii?oVL!6VVfbN(91C^$?Hr#1uPCHHaLaC%{q=!rPJwKk7MI#8e!e*2kLyno81nsh{0>4N+URQW-zH5t?F`#wqP2l{}j8Y-QJb= zA+pkKis@(jnZx;>N(>~9F?*1ccJt2I1{EcRiEXD8Lg6}m!*33G{=;~5$`!?_~ZN)?g!svsZ@RgJ{_+<3WjSum}-f4J@5%c8cRlz5H4 z-?{_yF5ev97xE%5Aey--u7W-pY7vDp|2jQ4tq_c2j8iJ)h+JL8x^KKsZYJp|n~oA? zQC4jK#DJA9h7oI)=Wj!rHneZ~g8ms9D(wW)98=bN!<}bG(Mh-%ZxDyKNA5(?9HA>_ zN2)W-zU5Yfx@fTdM4s?M)||Emr?nyBC*h}XPT@WL#S=P?SMXgB-=EiB)j zTucAgjosX?Ly&q(;V!C}-rs%uuy5MG(6n8Tr)qMPrp=Z=gCvyv}y{;o$^a&+~nXXYhQ`&^}gF8M%^(ElXI6VW~q&A4{MLsU# z9)_Uy?DtHrsh%QzggO#r+QOLI`c|EyqM<9p^J`ls%8u}u!C#mIXRJUd1tSb?$?D>y zU5NA~IA&>vyoS7J{3=N};!BBcP6*d|9NG~cIz!sd{o9)6{NOy-A&P$vR}xH`D6fHG zGlpMCq&1^G3||{Fdw`;tpqS0<)tD-TD#V}77UuaRyej<~d)V*IXCbe@Fm(gQJ<~k# zJcYpvDKq50Rf+`2`dx^45o|y4guko(T6){zk!slY} zy4FkU9LqXe+&ZrQ@HaP(Snj?zoLn~GxIq87kghKMU`E)|1tmka_V+IFTG9-xR_}aW z$ihsx$E>K%5o-%5w}HIIjGD`A=V>*^>kY0Ez*n7<2}L~%r+@0B5`8q73QL2ZjOEH( ztqFI3u0%L7^H0JDp_dr~6E$9sbFRUxNr?bz`$$O`!CM?svEO}}vw7zF{PHw^yRG(D z8RywKs-N)`Y_esQrRBvHanv*;qvQ#{&@j<4OfAgJBOg2mJ(bfNIOC_0$``HJtP_=q zU=vK|Jda!Tgbdsjw{KWkO=~O^5K+A3W&QwKVpKogc!e-&76UalN2!PoUs;@_c?(^r zDcD*@B(=;#N+)(<@r&=8AT2e%LY-HZ-MO>`h+My^KIw1A6XaZNx#b3xPBc_4py#bu zv>3h}2kRWSd}U#=?9|uTElw#M9V2|>=CiwNf{S{fy(rwJDe@tStfV=^3?onVWvBEy z@j8Ii(dId}#_b==)6ocIHZpVGC3wn>^1Q2#%`PPyEGSmC(r#@V?*` z3NlCGc|~mlmC)Y9sUFg@LMtq83SGFIqAzQ91EUKd;bYw*4kBNnzPZJ6MIpu3%<=>Z zb9g&jp33>~c-I=8ju-E&<4ZP7s4U_=HG2J~Ych(WVF61`{O}BfW!Pv9$f@*}zYq@L zyJ_&So)#X8lMRj5h`=qTARcRZPq*2-I1(qpU}CCs1Fz*94Rp-3rne>ie>gkGAkBh= zOLtXuRhMnswrzIVwr$(CZQIpl+qP|EYi4$^^I^Z;fA{8%xN#%nM7(e2$@82XcSr+dZ_*t)H3En-0-NLeY`8_hTqFgg2C9w5rAE-Q_d4A_;fLn49JgFEla+Exp zX<+#YoExn2`;}v($T*^Dq56}54l}B09-ihv(^1@q;!SFkVOy zRfn%{ex-XgCKYdcGjJ&_$H!?@OfsBVIi{h7o%4<}6MH#kezqr}&w%Qzxpty&5w5O@ zK#BgtUFO~MrqnsM9XAOVtLN20wN$eB_D=BSWnRAzt(qjV)5&ti1%0ilhQ=Kq2_B^l zhB5zE>U2fTVp2q=a)|V#^y;Q1qUx233n(As(BdSyfNJcl4J9cL#^OdYt$ko_Q5HmK zG}9|{kN?q>!L%T9uAG5^_FCb|{TQk81HLn_vRpqq7Iq0~j)NZ%`S->`=EvT3m3}q^ zPjY86JUCT01xhXm}qn+0fcW4aMt$JVvF3*!56iM-oq-cJDGw-(PV66w>EFz~= z+#LIj|D~Zi=!-sVmHmWaTWgE`PSU0HVtoj(+xO{^f-G=&L{21?$I?7M_Nk83oK5+O zSqfD*A{pgchsI)dQ7Xh7_-TRy?tL6bVh>~AJR_=B-yhLCX}GGz0UX9};&!F3$XDO|4OWwA^pcUDF@ zz}7z@)Yt}mMLZz$smx>9!84xRuFIO&fT2i5tuf9sU<%laSPI7bTuQDd;>C_aItVLN z^OSKaA}w+PFZWXxhh z64Lp`HVi)NvmWdIs7Z31qQ|WXqql_ta`|J0FWNUk)Ey-SLeKCfssm&d((IR^|DsPf zN+--SbP0sQk5eGcpL!jyd=V&%T?4q^9i$ZI813GW;ywtG6nL_NfsMF$R`juius~=s z6|8tVcq4@IDWb@B18^v{@u<6m%yELJqI9E#@Ho~@27smA)PKTQuM6GcggXfR5p{Z<;`!yo8we8%;(x%|Koevg z53{Cn_&LtI-2zPHAIVF28DRaqt{vlSaSFq1sEZ5BE zDk`73SR6XN3Y8Of6x_hL2=8JU#H#PtVUVOdr-ekDQU_3}|O4+1) zn_w7sPQhXNWAVwjh~3Up#eCV;68r4F*OxSqG*y9p7T-kHMAT&2wB<_yEG-8csCmMR zu@EE0o$Bh-a?+>6ljd~6jLrmh>VHq!Ko~{M9}adAV*!qryOkR@GIE1v9Tufj;uNOH zJs_2hK{VhSP%7X`%s^FZ{0k7QiNN)Y83uY5^ze|C zaLrpq*-E-t-60&i;)v8nO`yf)HravTA_qex>lrJPFVH51Ni&w)4nvI*h-y?QJWuD1 zsYB@3v>cHnOOzMUi9+pFN%`((tU`qIwHg@JNSm$bs<&I9wR{m`^9YnDsi9(O4x^CO zUL}ba&ZZR0=Jg{~=-CgK z**K@k8En4kk`g^1uX<&nEd^2q1D{i8iBs}aib3Q6ut8yvNPnW0nAI%r%w4p^d7faf6 zK-v&Q1Tv&p1lpi8BwN7IDvIGW!<Gk9PGI4o!?<8U6-_cAosg+I)V-}guYZ;r!(hJITU7ijyrP3%i+ zRqsd1l|Kmc(VpCt6Bt!WmxhFfMlvj=lp&HQAM@XT0gs>(l+F9eRV$oE$i9+iR(!WQ zPgD>M3=H^=j@Eh^VT@ypGbAP$9dYfNsPKq6u~JzGt`_!`}mEibf~wuJgMF> zb|pjj7DzZw-0H_(Aa&0rh!Em^KYfX6@6Q7?yAh93?x1Y`C=cT%1{nz@Hmat&N1552mw7Yn#N&m<@Ave)}L3^#pJ0eG=PG+yl-uterhLWnt51xN-jmp-fnHVwIbwj z*4j)tTur=WGxU@L?E*4yllXNvo#a57=kp2&=?fvCs7*kAseZNSO!W=7jc{jU}Uu3vJ8ve z&9jo@MT$|6#^T5!*nBPboO=y!#>H)Wd!|t600|S=H%~}TOHLCg)KL`TOA;ed6}c?P z@3+Z-d+eNA!s5wWBQD*k&xqG!=8+Udz@0BM9m1`B+oE)qJVK>P5ND=Va7Sma0~FU9 zkm|?jb_7*-6K9+9zNOHkwjJ$zN+55|n-wpwaO&5~&kQq^j&G#fGp$W%rcm{I(ut6! z)Lf8Mz2-eaEMX-twgfas%+;YYfE9k*)zPIVoZDGQZKL&2ln}>gzSxlX0EObTpZG?> zU{Sg`6z6fmK%GfPpUSN#L1Cw)AXTM*cfB|hjU;9f4FDe&;r;Pi@Zp#Bg6YSESX>J; zts!P^GSk!=M0AisyL|qjXCSyKa?X=+z*s)CopTr&N5arBW58EV{!>WqE-_e%(*3!D z#!+aA`goO@!#ZJ^zI>KkNrAZ0IAO91jileg-N`(Lz-y!dcvhSm*)|hr>u3pAU2T~TFOc0(PLrg@#9fQ-D( z&`>T3M8}@i=AdUkAVE|iH3h@2$%hNNwP~dF9VCW2ysErRvqlOvQy1qwK4g+=`R7o5 zmT9sH^u5Sl+)P3*K7w^>iF~VBIy*$_Xj(It+D>}f`@t~1xsjvVfL#f-XKzKCEz5JE zqhbQ zKSYhn-$M9f58v=>iP$j{+iVvD zL>>-N9UlsREor`wKOUw*TqcdOOCZqn@|j~!4v-%IzU*!zx=DycZ=3<*Z!loiA!bI6 zikw4q{!`P340XM5#6`r&DqtYQ_tB6BsASCrUAX$rIl zbn^U=V?L<&poEuF04XzdPr`KgOhYV_S}_GTuzM2^gne8cBw`r!rsi zf&l~j99DLtRR&TFHCMxD-X~Jvp>5#ix)0k3%GCswwwI_Xw2k0l@H-sk6THVkzUqnu{T zU(wkVS3?y^f{XU)oV*VFXl}Vs!p73#5i)ga@w*5}Bhu=#Vj|@L#y15f{W+tu4t;r)WL@#0Cz0a-Au(t102$+Jo&}N zP1~s6U7sy`q&xeXXUz;F+8H;jWM~}QB^b^hBW7zpx372nf2GPeatX(aAM=VJ-zo-M~=Gj;g))T^OmKUjRROIF5Nm&qSARNOaT5u&s(iz6Io}((-7<%?!SKl#Y z(&E6>|4@gDzn=Q@9``uAey!ma109SsD%BS~_tw`+Q0>D({ULQ-oX+Ol4$KFU?z0a? zUa3^9Y$8*pJ|ZS&f^jx;+spL^%DQ`*aQUl_Ry8XQi|aA708s|>z3Hc4ZU{7-5W8?b zqaECr-JcD5-Yckq*2cFJE|s97S$86inX@^DSvAGJF66nIgmpb<^@@okr4MJoxa6Z- ze37Q*4wPU6fPs$y{&lw(NI>7(MECx<%JCSgckrny0XV}*`=pmB@;nP$u(+>f>_8%d zY6A`T;?X3m6k4klJ>BN&eiRPR&EeUr%YK9R5kRyJXB!|5pK?q{!w|cPh3@32b>b^R%`wnX|ZE zFy@Cfd@OjbIskhQdY6d6Gp0oQ7uv%Cm!BXAwhM3+6A)->HevKI6g6}-@{dC*&TCIJ zn+)l!1TKY>(X3kQosv}>*VAc8x#8t!)Se~_FW(ov=1St4g;7vrqwQe`zyrWC8Vd@Y zH#d>yFxKg6_AC0yTXyD~subncom@{93w7R@m-2)obq~=m#JMJs;ZQE@hqza@r`xV{ z-V(Fv>h}|EPE#jZ^m%sI9nUpGk|s-kCOpc7xOvX3>skfzd5zg8y2vJJ2=cy`yy~S9 zF6FJ_8gHhu7~V&1vRyE_20^yUgU^8Lg0xL@tvK!|9A)|28V5Km%*2EV;Knncdf?0jd16^kO8Zh6GZKsx+#P^^=wbtB0nx< za#l4eOJ^!_I!cvmBPJy^?60vOMOO&?dQvu61xMcO@QJ+V*m=SMernw(p7bOgi1Ha^ zMK(Zz7eH@!_W_TKEkKq3S)a1|;nBgDx}N76f{z-tm$`~6@6UXSqt6uv%;56dpT-H~ zpeAVj&-#}mQ?aTAfco4>tf6-j>!I2x1>PkwQTXmm>6vwQmxuFM#fRla{a)B%6LVF{ zIhB^vQ{Cm6_~fgi{@%)=--;9#ilp7leLZ#BX=wCY{g!-|`Gaf6;giyDe<24E8L;yNXkTt3~I~E_M+YLqwhruLBQpDWHXQY;LedDgsuC=*NJ zjwGn9B2yEw@+4Q{-+BhmK_DyL@UOKRc2j%fx*$&xftZpGnnH$b8fE(A$`AAP;b}@t zF9Qe!;cKf$6R^zC=KBlkI*Wr!JKQ#=LOf|8)J*#xR-tkgYW}3qM3hkom6CGcAc1iM zFa&&=M*ONYVLu(ibZzxz1&~+SdJi*xdCSq&OaXG)N&a@BeD`u8W5O=4a0xBzzQ_&c z=Ld?%-$e>b-JX*4gZn%e&y*H|0q0K7x6W^K$(HE@X{%b0WFcI#4b!JQ~q zjKn$D03LaqS~O#l_>2^)MHNCVo}cG<`g6VPmoQHvJSdY~Pfh`K*>owATcvQJ#o`$- zU^d(-L=6>kzya>hk{TB$!Enjo$7GvOLGQ?u^j~PHqzpryUjiSJxOt!~b66N-8B%Y* zaP-*PsNW^hUYD9n**tzhk4=DXk-)m)W9CWeZiuFgbcZF9)=&McBf$4Jap(8QpimO1 zR1(QE)VZf3ThxmT7wQjS|7I@xadB6vVlePBU9&!~+U?VtlNVXY=dk)n6~^s`Oj`Q; z282xDpWkXP1Q@xl>^qYKYA_nj;-K7HzD(f1{WMNs7|<6rC!GranCws1eC#hmFT8c- z3++;x&F~!Ef0-PRZBUUw7E!C11Fdm5%$fVhx8@@?Q)>}fE3+%LLB@f@6EaYYSB@Z( zqMN|(2T8ke!47x9!oA;0=EVR69tVpqzI?_wHr5S3h9Q}bzCu{U`q&s=R8|q}29vSQ zx5M(d=BE$UXi>g@GeZh119&A7Ggkz@)rWH&EO;pX6<+ZiDJGANNWbia>j^A03ml1v zlMf!^6*!BcPC~p}z0qh$0>5Sl3%^pBS!6+!Xv3j0T>KL`)O-AEy9bWhqi>^5@5H)J zZ$@TM;Z%i1=~!R46`w~tX>3f)CGvE+m;Xye;d?Jg!D&$ybyF{Jt1x72%AM@}uz$7P zn5skHwNxf83rkreYsr3LyuKQeA7Mp{#xq>66liPxCo7 zLJU}py5ejO8MGV$&?}v*a#?j*zM{qmMT!268@WsBIirgI=SVNUcl=WfDFWL$?uvlF zg*QISkXfuV5~6-PxE=4V%1Uo3T2Vm3&v>HB_3Sl;6czUY24znCln=vFu~MsIlDUJ7 zn8Xye#^&pccIph#pKV_E0z008KV&v!I6uT$eGvpeVEl^Z1xDr^e7r2MXtBCImV+=2 z!|&!iu9q3I0)494N$BW`SC7g^r6k8IZ!>QyVBzh{JM}|1PuKfnjSNJ<$Fwf)@>p{b zV?7$YfuHLzam91tuwiqlKH5xmK!mTSB0y73p@2t5fT2wH&hFyl@6JxZjrjylpF@GA zjeGiUzI)xIiI;TS)HNtIz{Z#Yf8w9#3x&YfTz|%{V>Y?7P-ciY|9M0wtOvs6j8<$8 zX`QzB0p(}6YUH;iO}Z$NUPWS%GVynf;tc+fcb(htr~8mJJ?A_vj4Ib4GIerS3xzPL zFHD@q=$Gl*2*qelCm9ZT0B-DKalzb@>?R9iZ%FS744uSNxVUxOgT;Ng2n&@j-njxJ z%9L=$7tRlLmhvwnf7|N`?a-C=3xPn@^Q{?{{bBTS3-@D1V{r0EMr#B-4$OF0hm`Yt z2bZ&+k;kcO@>JSy=RJ28X(*o57}=m4Az^-g5I%kgFvCuajC(a7+@l3zOnM0aaXiFSvUWtf?s}>&kS3jmobdP_!8>*6R(< z^!@##1C6;wvYKS?wcZywcI>@iD)(rp`l(wl`#lN6x`=joDqx}1CLrq+ zsywa-IyjBwk?YB%f0xdG^4s%uZ5Nk!y*BL3Zb&pi?TWabuhj6ok3?M`Wn*8gzW&&_ zS=2t(d}5fiuPrN62-@x;3Knb{`iv`3B$p{$Lm~+zik)O|$e^Qni$s;S4@|kHOGCSq zSH|X4jZy&_Xp@QvpXWm&Z-tiSEY-lz=m(E*=GFQYPU34r&BgT|2a9ycQQ~pF{asBn zidC{FJ|8P&_w>C11Ojz_4=|E)erNJ$#-t*Zq;9p6gffG`2nSW_mk2Zry0%e8;WIdR zO_Gs_pbpt>34BQM6Qhc=G6Bdaf3`1ghsy1HJ;PLG1!YDYLN?b^QQ;hgeGxLr+MPQ1 zlxB(sCsQ%~`cr@T4lu-*&V=bW4E1tr&9)EIpUzv~!2yJiw36~_2<3@kk!+AZ)xKLt z1n)*_@r~g92i?^)EBP?u%=U+=S9LI>Ln4oJ3 zVTZS;0^a^WXXEGPsx|wwq;7x&q!+7Ls_+(=DT44PQ{-aj>BuwPh0)Z->KVu^og;!F z9=QGL&;9J1b02_5&=#NFMXxmQn>j8L*tQNP);d*b2}Jy-G?};_Hja}PwEVk3Y^x26QXn_vao*Aj7@S5UdVfN z%uBmj^8%Bc*p_wa_dp$wU4XP;0!Z1GLH&2mN&G9&{yp9(PC5)#>)p8kK-&Tr7#{Ni zoiib!*mB*4``vmB3W@McG&T`w_t9}=Z0ok$b#QB0#oT67>AA#kTM8uC^ds7?#ci6r zjN6d&IK6A0rfnGZYZ5H++DNB76p^k8!ANU)m_qx&51i(VF?_fx<7;#6B7s*=u-gAFcypp-Im-H>RzqU3zC0hJ}OS zWOQ$~&!S@BKXs$I3JDv@?(obARYAd3AZ?*kTU%Yd$w(K8O%uwN8NC(X5!Ze=Ue=Ki z_j={?C#hhP_h}(KLQ>dvkE-=r!EtmWhkGFMM+GwUps$coJnBd2SVc7hN_c1g39$~< zDI@b6Gp|+AcNNVkA@!;CzleT4*gyzZB;}K7kTiQ(vd~gnb6L3@*xYWhOqS|st<5p} zSP$z(f~jx>$pnU|gIJOVoy<~Gca%&_NlR9c%Z6=Al+>QvtnY08?P#aMacQmeCy2T! zp`kKW%Ui6v!0nN!Q1&Z~jF0G;7|B>^ITaFOJM(ZiK$OygE%QC&ZQ_HO@wGF?9J6Th ziN6hU$}24@Bqgy({Lt36%9i<^9Lt;K{`HHR>_}IZa46XDZ7}eHv9_{UG~xNj&3kd- zj#4(;2l?J^MeGKW4to$CAxTrZ%`_`YgumRmBAjue9xHj)K zw)>!qaI8ZQ@?*p0V9jDDPj&GU9LMw57*zL*1oJOx%QkBW)!yw-Op78kfZilIbW1Al zRyayb+2?gD86!aak5&Y>@`A(+gqCqR$oiW0P&SC&h8ET1L`n{I52Hcuf|sRHpV@Lg zG>$1`cc5K7c7y_tS?f2+vwro+i7A$<52^Ar={BZs5-84B8cJD+e6bHi$tAs z44q7^VoU?hEK`#gT}DcChfo;Am#!F8tL(3_9^$1boc$NpIJPq|g~#jf0UFx;R#tv8 z^L@BHaq#*NM4vJqSu*su2%C{aPRU$D)T@H+nyE7oGV3%a4{{aFd^q3BB~@WS4-4Z4 zgOdJ`KsfZMO8rgki5kTs{I@T)6H}0}wx)AjS~j6Wb!6WmyrkhltR&r#Hz*d3uVn2) zM(HY@%YmQ3dx+>{vdhb89}EmvNAb_XTUibrRsIbW5vz(=Hao zl%iFAG9Rx(n+MiX4tbIqh&w4hufV`myk;t5451siyrQ`e=dSL@QF`DW>d2Bzou*{S z?_^d5`WJ_y((nUD76*5SusF0kmQ}ZYrz_ZP%wt%64rJyFK6~unsoFnTS>`)aw%NOu z@)s(0A?lx||8eoX-ge)q9&)_YVekGvnI&0|U|d5$V*6@&0_0%Q3__ANQ}^e>&h)lc z>qeY7-FaBgfiV0qV>`MDKJXO^U&wH+5pgB_=wh1^bTq6+yd>27t zHQYWeEbfG=v-wH!HwxCD{aS5pFX`=<2-9+swezaHz$L2DG`0QtbSl}qCZ}gj1htjn zMpS~Or8*g5Ojkm~c(8HmH&BCUr^l~u^@fp)?;=H*J{+*L zSc(D5u=8lGUJv5z%%0=pp_dmi1E8--fH*N{-FzzECsB5e(0cc4^WT@nSF!AJ(p&L* zQ1NG@W8p)_jEq_LhSZjz=y*gkKxU%nMgfs@709-=4 zY1~1^-~8Vf=ve7(&5mL7?KsTeq0YS5+&8UXiJ#3m?&#CUIE5se9kmD-{=PQv>7vlr?p2DkBd+<8v1`PJUUD62Nk~azLLE0xt*aMq_nXfUkL|ZRg&Im zFEr5q07IGa_4a_!2W?b2=8ko6Aat|nht_+>(&awXOFuR5`td$G zTY1&FLvA^t-ob0!B0G!RC?7D&O)xBhpun^V_pwUnhZR8ye-rV zo|%kYI1AnN7P-xTvp-5zRu7~W2Z=d(q#u{WHV_&hj6~ zh^+q?GU7K3@1JBuItE;ZZ&)593oAGi9V0H|ccAiL!-0Q&*S`gcSpUa##CEGbT1X*h zPv9X3P|9}4IX+m}0?_STnE8ue%kYMvB}9BlzrH-lh@^a>ommNKv=>URw|_U{>KsAO-}!`7H4yrafAC4sDyjCuArsovg9Mgne7po zN%)L-*)#JM{$vTZ%T_qqhk=YQKi7#yzy-@FpiJc{v#3LAEla~2DO;U4CvsR;TS-o9 z4<01Y)&_yTrxo$2-r)HDkAgd0#G%^CyqtmUrwJi~lz`~&`?Ns^388G$#51obKBs{N zn3%%5g3*W5g#Mko)lmtPa9hE94Q6U`GqkleH21z?xYv(@c8~ugrS{i}`^#j00}20; zQu}|QB>q>5_-jm!`I~C^cUaHX$_)Tv7CoU13`7asUf8Y}TK}cl&meD`cY4qRlL@8aDzwr|380i1^xBLx}$olVy z#J>*wyQ=;jkw{Db7l!Bm4Z*pP2Bw0j+~gyp_#k|Rv7rN`sp5x=3qG1hN;N%<56Exc zOwn$#?zfGMqK6DMXko-;7B|8@PAHTLj6jmspE@qNOK3ztBB{g>KaNhHDw#1L=yJ3O z0IYr3`PBL3dG%IwEdRxIl;wWadbHVQ>6?kAS*hb^1*;=|w>k)wqqU3I|9mKwiGMO# z_(RQkEhy1k>?uz>!Tcbt(y}1j`p5Ch<)&3GzYrqQ4tu6>&@R&rRY#%4u?beyQST3EuZk*t{fM@TADCtA(my#o zAlT4mVQlXB{2ME@+zbBDc+KG`FN06=RRBQ!nEEn(Gmoe~ceU7hF-6g0YJ;*} zW(NTp(Gqv^0@n*E<}nuPB@M` z4|iKS#mP_Ym?V%Qog(aMgiu$qq0o!SPpnIi=g%%td}umv1;0=TF9(V+!jKvx2t^D_ zXvUJoi2#P*BO{92PqBBnXR!y$ZK%Oywg*}pfLeA3&uI`r+yX{jW)F{P&=7A#pfX~D zmjXuY7Yke1zqox2LezwuOL7dV<`<;#bJhh=GznkCFTNh}w{Bqv1D#TGDM$cBFf; z0@lEOfi{!PL@sYDo!%jz-Wi?VeJ@r|Uxm$TqNfG2SYpPsh}Z)<&vPfMiYKcGG$gW? ze$%vAD<5Sns~ zZR_r4=%8WP8+yOofG3t-=x*lFxIIke6?tr17C7f>VZm+dFbwSIuc~&{eJRCA9 z^?TZp84`-ug#EW_2S(zUv~82{%RujjRm4hg_1SVGXq9bupNN&ksrl%WV8f7xG%6_M zFwa|3@>zRBe0D={!I6ga`Yn1X2YRw50DC2SA^E#@Mf=(~2V$d!6)|5zFnb*adU`X3 zfeaDq5PwOWeQSB3M=nb&MY7fac9yJw5!@6WFOgpyPqF|v4ooZn>}M!?1j&>jxL5)1 zJEImLUKZ!Kn$5VQU?oEEWN3N>0E7|N5sSk??BFryC#7rPDtIILk>VbF9?By50 zB4)GX15flV<|y4=;-{B=@R#bH@E60O^w%7zOfII7Cf{Rjbi^&;C<5u0$P2bO&8V|1 zHp|65kJ0L(_k6>`7wE0-oYR*;5!UCgnNO^QU%xL!T44@AM|BefNT2dB84aJW_*&PV zOTMW0`aU^gopaWs?0y_D>$iU??OAWu9}XTp4`O3n}eQg=xffm8&c$ObM9y+*q%pg4sHspI^95CMPJ2Soo#2k z$MgnjxN!#p&Xwxa#FI*HFRzz)E(<@q*cESUA=AW!(R`$?m7sMFV&}(j=ir}~otxRf zb2c#=#WLn_Io^)=5Fm+mdWIK|^}WDCc0WnXeaMCCJQFxuT?|sv_bIeL|1xJ$b5CS8QtOi9ftt0KHZnQ zUgpXZ2{E$2Q&^~P#d<(Yf-RO-K2ke~jgzpP$)H-rnfUcFgULF{-evQBIP|=F%{4u; zekI7lqdVx5xqNH1el123RHgVZX_e9oV#-PYxd)*pClHutMnhiPjNwJfqhwmaau=wm zcX!r@OkbP~G-G&YtQZRqRfSQb;ZH-he?GSK*n@rZ{lZdVq2l9NFtWmEP#O_^v$AxV z{uA$oo@%pl(DW|R4tG>L+3e8Gz(ZYoofF;3(!~~x&`qM9ho(vw54Rkdvp!xoxPXe! zGdB#P&}@?R`D#kiVk@Tg7>iR;Db9QgbM(7Q2Uf=20 zj{JWGo%?^Vlb|j%A%9ApUsSIL=H7YWH8p(q{bp|Y8>X|*YTt{1WMg_bjQ=Fp!JZcY zMefB9;bj^ZDfLw=@iVLeM9)Q2%RySzA!_I&A74VM0a@&2PheJo!Ppi*?#6WDN3rO& zJE{r1r>a3}>Vd}90$!`_K)PC+pOsz?O#Oo{+~TgX#WiXNr4f8)OSM@Y%vFP8#as8L zvZhk`>#--YC5xqJf>y3e0o~H@Kz$oJL4Ai=<6*7VgQe<&uLT9VbP>pgWWCVrsmHnI zbCRyB;xg__mlg17r_1#R7f%1i-LYZU_#HPZG*36z1Az;G^BNds=g*;l)ASU4Bb+Z z`IL{{M&l>g8j&0MtM?^m14^L}19j|i4-#mF8nu#|b+U)h8VwqwN5kwIb?bD*#`+8{ zu=QR?B@t2kUJgbg$v8eUtH-zg*R)kV0xrg9Hm%Hh-jLZg#9M({&)7-FUSc?8o;X=P zBsY}D{`uF8oIq3jUqT;b^VACz$2apr+1AZGzoyFDw|{0REPWZW0*^ZZY7KVsHa0M5 z316_TYWpc)Bf24J`OjSotaDVC+>*aTJ`JI{Ajnic?ey(n-8Z%Q6H-HM`Y@y!JQpsG zng+&S!}H)Y<$OYW!(2K}djT%;o`fBYB~hzlf(4(RA<+qq*Ke8TeQKw)+D+nEFvX$8t7;}O_fxRcOeyzf~*|=yh zcRpA+G35x^Y!ElKA#*YEbHa86H#Q_Pa|IiZ_q`^X6Rg->UF8-Qgf8hfjC8rGQ ztt{dA5G(9ouF{_1Grk~#8Waf`bonl1V3vkL*QQ;@riD*n5RO~?24{x!W^NoB-jLE0 zX0NJv6^mmMQ=u84=7xmDCb~h*=(!FLj#p^d* zT-aC*9eg@+@(aAJefg3%TwFA-FTLj35|uGVBC7(|p3!f!VCV&)mzRtS;UhV}Y7nPY`Yp zE;(0;XayAN?nZ^|H>@s83lkCryTk_}v=f&{#0RnU&DE|S&COP}&nZ}6jIGVw71z(V z3%?Oh(tR9v^znIl8*j%)BgTacz|Twp$UzpioGE`yS1y_<_?NNly=9vbkTlp%uTRBK zaPJP~O9?djn1yc#%BV9it_QR0a&sxN|nlg9MpN`kb zno-3`$-&j5(7WTUIF45(hZ#)v>Ds;CNS>O;TxaxBp$sbDfL=m#tbKUIE%Puc;7D^` zV1y9v64W9Z+Nwl}_gw*4$^pMl7|!|}w%R${6NCftKbO>#)OWD?P4$WfAInjT=)%bM zapj1Vt_E3(SjNKjyhrtK>! zMs)3n>&(H&Nh0TCGNEzw!%(CX>jyv2h|rQ*+?F*NF9QrUjS)wXx+;(dW9T^5lw}O~ zWpkl&+Iz-2%CEsd6i~S^*5xeZ-m6fl$Bkm#To*qQH;B3tMAp&>z%$fxOUEis8CeZ> zK!`nj2t9mTT z12aAsyX{Do%=Qb!4*t4(LCN{O%y|zICGDO6i+1lV0pme%diP5~$GiQgR&YsQ1CLuR z{rL}say~$%KSrSQ&4pz5pO-0xt-7-8$!h}_NMQSj^&biTS>`>5_|62|TNkZKKJtAe z2d4pQ*#ot?N##^CcMrdxdxCK&->&`VhUYX6e*sy;EYoywZI4e)jc2e2+7q%X*s8mB zY8A&1${gC;+uPOE*Bjl<6-yl!-Y6yPpUfHEwblooSNzP>)m^d%k9m4;onBCF4C{+Y)&+ZmJ2LDsWFYn+*? zp!-Q0^E)vPS)R4ZEXSrhHv7cp&=}e#)mm4Yq4VVtAehCQ{4;Cnfyac&Q9PhfKF*=d zq&fJ2lVc2F0^R~?Sx(1~iL(9Gbi@Mheq4YRbNOxYqWod?@WHofluTAu7Tte9CrfcF z4ylzp4m-Wx<}SDBwQBSxdP!m2v~<+5p>h=`?pelGk(g-mv9ZF*RRg)6j@nBjVdadz zc;2!)+kVJ$sd*wxK_NX|Glj02$y!4gz^L5h+UJ_isQky)Q8s#dr4Nz28edPvYT9#) zs%`z@;~kipAvnk^ z_^*u3cYZ`_29CS=ZnC#1u0{D`X|@)N^=!(}y{{!MJ+ns2&=G|Al}p9B;o0DH4rd2D zF0G~w)u@LN%8@h^XTT~zcJQ0W*0W2ql6JfWqumC#c&<^-TwHn*42}9icA^<`hZP5G znTRn&uE{+ynh}jUBD&H#8@Q-PTuTWFIFrH-V6>(^MnT4yurP({PjvsUk(EV5RXj?kKE^lH3?Xea*W-?I`&YVqN z@-t?zuA&}S7&Y>mYx11(1jMgT9eVL1S@`bR9)v%}AJb1kmM|niB0;Qs9bHDjEFy8l zGn09=%A;pbuHSTzBXjS1@pf3NE}Cl5SfSyq{Z(jhLIEItA&k?ZQzLtKh)vxoIedqr zy3wzt$+LBwh!Z%DPDn`tpCOU#Qd5#7*)QqD9s%0h?hmaUjb3bAmwkCsvq^1E_u@0+ z$G}F;b;P$jsiO@0_hwFBBIMb77-n!t*d2Q_{j*Og>BPWp5HS5>*Bm5=+T&^mb5X1g zE7S6{0)_oRmY$`s$8#YHVfuVCZs8xGu%Nh(D{t#VMaBe_@_}dpEbi! zen}%bo#3M9)HT(W=}D{E?xiV8VU^R5a@0;kegRYzeQx|mzlQZ6OiN~#e>4}-|9y~? z^Sg1%)#kgO=r0@V-@D+BdJccRRj_k3{M!S5U3uRo{f9dI~ajr2?p7W< z!uPU3?HiX0YD!3Od0h$xD|5w+%Q-Hn>|1FmnS5+KYXFK#1_*m$jYK3B>IkLuVNp@YI}?hPYFB0|O6?fIU*!}jRpa|={|8;Zp1`**!S(q-zVYkO8qyi;kWtpKf)TOyYgx*bPi3m)-zB5L5{&1 zZTG^`O9s&b@G}Q7#Xy0=`dYy9k9W1tGct8ChZ{JCrlpist3934+NPpPiY--KrFyqn(SqmKVs)?ChF&jKLM}Bak$zkMxN}}cm zDSR=Zb)LC(OqN^SfL^3gUqPU8{sJ)lex1bLk}>{;)=Xzd^64+Z_FhGEx4i?*3(H*l zIW^nJdT!)F*8kPmS4TzNeE*xU2$E7FunW?)y|mIDN~f%JcPN4iOM@WYDT0*JAPAB! zAl>aE2nf<3so&*!zR&ah2%mF)zd8KD`^9VK&fJ-~!#VffnP?C37b=zsP{x?Vhyy)Z z*#;@$jv-F!g=7dUe9(rpyZpuPPcW*q|+?h z>LHQtpFh4m;Erv)h2sxyb%@^sPP0ia>sNdru=fY(NPzh8E@N3RSeD}p{1GCGgYd$;B_~-X|p@gTO>xkkqyOB z#kko@$pSaNJwBVdzUp_yqt%(kpHPj(bv1?E!wc)t->7@-Cx<&ygb(u4rbhIJ#)sb| zFWcIROthn_#Aewm5A`R~ZE;yN4TS-d`r4K9CJA3i9=ICBye%y*d|7w%QN-K##hn~O z{Zt+uCBg8FKE|Fc;IK@hT%uBQA zAcyFpBm{jUEBFgXIEjc7_%3r+)EBmJ;0^LXsx2ZWNhVMNkje!xeg)MegJmIMHKR<$ zkVp!HBBB(X$nOI>Ft`2EqU8Vb6f`M=DZ(*V{GMn2?eEHD&j(OGr%JrU#41hXj~T=` z63vqTk`tF<0=)W3L{JM%0!1NMjP>8b~CxP$oV>URNKVc#PPj5ew-+#6t zjuUIH!*~K5sb*N}6}aqgpXbl#R)Z+W2i8* z31iS^r z5S?JialXwyxE&kaLt2bC(NFR<_9>Z~7$-uUM{_SxHH8D)arj0C#sGk#a$q@*eV5_H zie^R&-~yfAGqTpD-yfyeBQIeyl&VQ~qDG_8I)cRn-JX;l8j|;=ql(+ z=&I_<>MH6|dbmBT2_p&v&{EP+J|-dth~Z+H;E8HW>}~-X`Y(kd!3oesatkU83SlBf zj7RfTOcDku$>TmksoH)L-wT9OB{wTP!rI+dCY$^qufnR#rVLVMhm(INiUGLeq^I`1 z__6}kRq|OhDgDrsUPDDd<=e`@aE@@+aJE>gcJfX@E{3w%O7fYyGMx-3%RUh+k+>wN zNr_H^6G*}_PbG@Mdsrpd=gG^69Q?wfWawnEw*`slF>aDKL2+MzugLL^s4F6%mnN*b zL^OU$CMZZ0E>!hlu(uUb?xBh z7WKRmmNu&Ku6WQBf`cL9fn7`Ng}IEozpfn6ZBfieMv;M-@6H6(6F#EdBAvI#8bqN$ zj(3fNjxTO`V{M~!K#l(Ommbk?QO?U?6|jyVfxGEJqJ)oVTrO=fVBdlo{X{Pxu+3Xz zov|A6vZD}eOcVz&Qp#_gNyh7n3*8po{5>oP%Npe$1(-);rJ@KyWOqsLHb?o&16=OR zi(>_VKPA^7y&luoYl&Sr=1H+XqB5i${HLxR(9hoiRo`_8oVs$rW!7jNvLcHW#~Nd= zN3DQHCHC%?1Q=efz3D=2hBBiy%iAO@VQeQhnv{s|C{8N*Y?M?GwVkgK+WLZkR?tVr zoj-4uOD~Ook$v8Q#xL?Z z>cgqyWxGVfY!VhM!fxPJpZVK*Vb<3i?KdCiNxPNOYe*ix)&n&lVnkzFI z!j_dkT^q`CGL(jkvs$^j(_&{TObH8bBH7?Yxngcz?_O|aMFd}wxg7{#7+E*bVzeDK zf_oNw@_(Vu0@Qe>1@~D_%LKD_2SR@}#{7Po@$vWWK6<>;HlV2G6R~YrBCU`VGlfx@ z;S=d3`4GkZq81y8dpAwrn-`QVt-8)U1XuZ;1RJ%jJ(L@p6b*^Qg&ojT1sHW5j%%#a z@9~Fini==j?W^_J{uH>4Q2l1YyXWkt;HFV@cROL}sDFH7C{cSm^qbEx7x$puLxujz z(n$5iVcqA4wM(mXgZ$ytt>Q7AdUNVz-*-QQw%|(e6gawDVR*)9P}O ztj4E3C}X`G2@EKUI*DVMtNfKX3|ppOG>Nwx05DBpJ|ho z`zy|%Jv2Lfq8a_KrUzd4ZS(E+ZBcz^IqquplT$JBx6v=Lnh77?D}@2$7k7SU4wzo? zAvIbC^FHzDo!#A}^iKcqQMD{#tZS8ePh^kVJK}_CebLy`v8;M*Qup$DIoY_}rm*JM zpPb$d2h8u(#x5LfCvAqL`xfrV>`Dtr`J}cUM%Sbp)G|T@1fE~n7c_42uTK_e?wtHs zy-{q(R~K$X%44TtSN6i_l7ngCn2qj3;??r7=~qh|`nn73ODE_(*Ri`+dk*G%k(^b4 zQUf$Knj$Msgd$p->G@td#)4VhjjLj-CR)75mq_K#WkxdVuYj$Lz%_;KSf16*G(@1q z5sTYT0yxVKsA7}eP5!+%;l$B}3GW5o>1%1h6Osbi$6~qDL93B{o1a$IoFjZgjm0gW zq93GM#$?6+E=sf7WjhM>>HgK;(3CR4X^bgw&KqzM8@R{cru4g0l?%}jG5WH`CZ7ql zam8W&^*&vGwd?m5*cTPh_nc8Ldty{eRflNA;>RjQ?|I#t2o7$f=LQM?_LDXLX&x_X z<^TjZ9<|Xi6{*WL$5I`r=J$!Bc5vTqrki$)_}~(oVXMGGN)3(i7y4liNohmZ2pc~* zweJE9$#6#`$7ti?=*b7>sBhb|@_7U~?_o^j4ehV^25u(Z+va+OVY5mdC@+0`pzwpp zqK~9NScTv%jcQs2M=jzXwrAl*CX0}mD4D-pw5aZRBiRPB)r z2I8-aDq8Oh=m<{5B=9WO3`A4eYnMZsGXJ`buvHITY&g0=`slr+O{C$2bt>yx2Ks4B zbGLvgY=KC%Dwc1;J!dI3Sz^f0%eWgjOp^Yl!+vekuKsPG5l@{IhmDyh#uHi1+K?l= z<`hH_yANk8+2bUFQI3yW;Yw5tG{2H=@_X9nug%H$R2m@~Bxf9z^WzJ>sh$KB-DwKW zrzq`ndZcxSS^Omrg|*j@G2f8o?zu9Mma+X$KXnaPZ~F+CNS#!N>9FDhkiENn-p+#Y zt>(mWq~}vyf5Q(7hLqU_1YZ=bTdxDzoA>?sYFd+{)oig#*Qv3HEWfT9CI3s_QUNY1 z_E;WM6Ag>h+6!dXTXto2>pX!?U6(UVhAbD{KDkyrs(PEqc|$`q=QTNvv7tGMX$t_- zxM~N9w|@ytiIlc7brs2-wrhZW_Vaa=lmAZ1CKsf5e?q#WJu4d4U1w%hrCstP^tW+6 zCtrng!=w(Ni@Wg=$epQk8Wfjj>)XrHv^FDNO8~?0>+z6=AcC*deeEpzA+VL>>q*OA zs+*rxD>k2?6A-QQ)2a%EFS2R!c6pCrm50Wl#?W}opy!lQ(A{kgm`rHt~bhRofKDs$u>+AH7sgA$` z9QWo;77x`s^7&f@EB$oRgWn%pB5A6(4mhM-2Cjg^qker(9HPT~tAq5r|C-FYXAjA| zcJ_{$h<>> z_W~)sE$7$}e>kgK@tCJ#7%!9b4ckvT3M)cf+{~`8d6|fD^smJ<7q!BL9zRl4%BU&3wH{Sl>SID*oR{T2Nq1zmIwb6+P+MZ)F*)V#GyQU`{lMp> z$z--*qD02Dl<;sq@T!lC(vcV4{qY@!)PxYF$?+TFxs{zdfKM930^7yiClU>nx#cIB z5s;rFBYW>K9PdgBUi>nQtz>p zYaK|~ntG{QR4I0Zam{39I6@;|m5Z5e%&}*7q=Y|?!MK6E?^~IwRnrmLwusYCmSyE# zW|6DvbC|{m2s9@grCBOUS%+}cwDu2pW#>I6C5w!ih|4uH zZbsHbHRkm4(}rn96l7uQ`7RNJN8mzqJDgcU!kQ*S>6ex19z}*{r}LwDGcCmUbQkC4 zsW8(~gTzns_jb6Wzl765A$(ft{i)>8aOt3ZYUi)=Zovb=itg=K4e4{b|3Zu!6`ihh$c}z$aYC*>n{yT#TVo-|7P$ynJmOrC*3zyo1$-RHtX@%9V^ONKMwEXefw7Zsc(s1 z$J)wZ%5Ue(0|_%v0+(byfcLT;`0uU}^N_X3$mEj#OxW%FM) zjWl_;BPM3b{@Of1-m+Z!NlfH=UlMf{?CV=v{0qm5R7rtsUx@9A8-C>V?uy1eh!lGq zS#W>1^s#??uHKtRQU%rYl+K&M24lb9`xExx%r zK?oafTMc;ICq4Fg-Ca1c#7(s?Q&SZ903VqwPJDg%iASFwYgs6)<4I0;yg6U!hDjgX zu=+uP$4px8crUkJ+m}Gp070zNhMsrr(`QGKaS2B!RTmExlw}@r3Kaxz+x5hhX^D=r zbO_h-s*B#^rWd;(Edy7&U*;;xmcyaQ^5%tfqmWUv`#=RPfn@{GCu;OKXK$5fn>Jo~ zYeGZTb11g1-nPWCH^Mk$p5JM|{dTRK;H^jW+7a6+KM_5S3HelwCz_9a+5+ zoU1Mht~-hOiZ)0@t~b>5@jS#QhNJle2TW-i;D)}wu4(dpq3U_-R_L`gW1=|yo!Ir%gu?+o@0tkHNBU2TLe?qxgTx&vuEu0+M4y9 zjp=GdTj(TZ`bgeLw_dk^i;g!tUm6n@8W%xdYqfUU)n>lrgQ2^Xb10kR2Br?Ze7%d+ zrDgxgux1WdUzu6kfZywpjunrQFK?L=1@H!WF*%#7!K=4fMF4$gUd7X=lT2R7dV^OiEs>3>egNI{< zU3lU9_t~vE0xqL>DF!nE%MCvLOId9MDJ|K{;WHis1{$$UA@Is?;kK)ZD@U0m0*TbVriBtyK**~OU^1^a&OUHR-b6zLw_T4ZSHO-b~Bs!YBg{-HErI3`M9=Lm(p7pe!qaJ&)Xj&! zzuLKEw@cK0{d3FKxR~=DoFc5Q!lJk-gt^W#afZALoIF^hpRnqtZyxyb+kM@1>pPgn zQpzs*YlOwcp@ZbY45I0P;_`&OJSW6S>9@lpW~WBISI zdutR_Q2s|oDZ)07-?+_h3=5nH_q(;))mn872NC9#jL#w@a|gb-V_E|xbei)zi(+>_ zxto_wC@Q-0L92CL* z_0p3usHxU8v5srOIsM*zP8(21O)BR~qAFK}8f+Zpg1-jk#u8;{TuW=B;PH0FirDr{ zq-}H-#C2#0)LGh3vTA(OGSqL}DswaS$adYjx{XT=Om*K$+h(k2@LZpZu0MV^S?Tri z?cjQGfG7T~$_EGD0zL+-tiu|~M~PJ4`@#cD67gK!B*~)s^)^s8gW{afdJ6H~rqznr z!H$qVqdv2_5fXPv{g|~18t+@3%E9YnD5pV1KISK;ANShtW~U__MKV+UW zi`Lwg`|!TU>pb2!HyG{}^4yD0zP-M&8x*IY~T=>2xw`3h&wIy$0b zEON5eoz9o^i%4i|jN(m_43(=(7xArn_nC@I2T`jlPyI%D`lp&Vz78Btg*5E!_>~n+ z5jBMGP~_@*mip*=dRTQtX!NE@t*FM7c-9Ou?w6X$y{ZcOK+_eYZL0u%qTBbSFYl{S zoHhFkltv2^HI>0|#3ul!(y+9FR)2I~!iJU8mO%4(h(^!mKEKb#>@w+NB^GKf14tt; zarKA$1L;nti_1UC*Er%uYlMDpRm?Er0^T}|nF{b0?-Wg>Hnc&rxjhxTiYT>IzhOhv zA3V<%r~j@Y7QZg$nX77SOe4Zm_Ytge_xr^lsKmtJvs?Wg&CTHbJ*Z!!pMT{wuAAo!Q!i)FjSqOctdQgn~|g+SBH&;}vh5 z9^VU|b`#l{;nXQB2a9g-Yr))LDh9hZBo%@t-r0icsdRuFEv};bJ`?EH@y>y&e0Odk z>+AyF%v8;?+i8l2o9qS>z{1z>A69$i)TUlt{#a%(HdX z%(*3s+4|o#w^~N}_F4{Dl$w9sPm`A85jH3|=y-(UWlTXWH}h5Zq}0)tzL!bC-{YLF zH6d>8raJfJ9jWqeZ=KDr&mtv_PkO9EjjEk{tgOA&)Ff0EkJFpeuU@TW+}^Yty!ABq z^UzmFWD!gozsv{R2>kEeDh`r)<&3r%05ItJEvs}W(A0Y%*$~M$U?DA6)}=CDtNnhS-GEq%CMpqhEamS#_=m09GZej*xYdD7FJ;OZls zx2|g!+Go-=>gJFg8!$@pxoJ>E8XctXm&dar0^o7cscgBzw>tH%1^F!#Gh4s6dSfr$ zGWKKAe}(zIS5r^)n(ZewJ3iIY$W(EMm611hhy)CeOinmzYh$`%%FD}EX9{($>QqN4 z!o|ftr_XW8D|Wlr{g~+%W7PS~D_RmxOYCk?ykSt_>qVWo7!*-^eKF-B#7=A)g* z_YG4eu)%5|Mz>7W95H?~3C7pF>x7kB?53iAeqqO-bmCNBZn17q-vb`+`HJ=JW9gb_B2d#&IFCwd!3ua9+j^Jg>y0%oEg%U3-_z)mt@k2 zo{Z`KPNcr5^w0%8+CY6=+G1!#FXkqwPoJnguZ#?N6h^f#_+&k$4cWe!7W+yKp0X<& z&t`73dstM2QTF0}+Q+1v@-mhyY4>K%2cf31#I~EbnoIjl(9G)>$qH|gmvUk9w3rZN zb0H_*aGE2@+z4jCWhu3!xpu?&*4b$@Mke)iCQIKeyRiL1bFJ;~iiJMq--o_L$CGXj zVO#tipZ-+Vn<)Q!Y1yXj{n)ljVs3zueEE&umou3lt9~02hYvk?*9|qCK2dhEXmEv( zCOJ7#;_7Fiz@y9b%N(I$gT}25pY-owNq$!}B!+88R|xA-lfTfNHd~pZ(c3(L^)tE` z>dP`{=9PprP@PN?rWI)9sp-t_4l(ni4eR~yt+&ucOhMqXMyrt$k-RI{%U*86^$MRS z4Ac2VE9UguTz~u_tKZ6OU+#(mTdKx|YShz@ccdX!<`>xsHVAev@}nPX-;#_2G{E6l z*zFYB6$-VsLsLdOWfmQ7c6ovvZB0!USwt!36`6tg+<1jgOMM;`?VJcJMMN;+v@PI7_SuS*V;+6yk9d9^ZrFlZ`>mL zU%bWtq@1E)2>8G5;(yW#IaSc6Ks61ZF5a!n4}tQ-fcMe8}O_@D-JFE_4# zDhd7bmM5pWqo=uxx}&wjfBf+@>+{t2$|)@j1w&9^I1~YeK@d==J{Zh_|M|aWPSgGV zV*VfWPeg?Q(C8m&|Fm}+WTEcjXy$He?gAA2-)vSX7jv|mqYLn_F5zGN{OjA=(LoyR zW)2kk-vm?@N3%b^mCPS`I=Yy-UK9E|;ZQ(ALS0$-9~3Bb+KK!B@k*YTi@C)${3Cww zYv6wi5Q#v*5I_syUl;@h6++;{BTfs@;cpC|mk36l#RLUW_=LzmdH63Vz9XmQKkLHr z!5?QZyf)8bD52BPls|b$)VaDsFrm||(m!~AfBk_$pb)Uo**^al9#}{a1^fSL4~7Dt zzOQ&%{$p%N@L3s97#tr}aF&NcoE_WWj6Zk~I1Ine&eRnY6awSd`)T=C?%!X3U=So6 zey%V7jfX%9!p`X)CUj0N6eff?J6;$V4#O|k)ADa$ph8gSIZO}?Ifuc)=f(j=qD~`^ z{%jA4I!(d;6GI6h&#fyV=(+ar{ezs<6%2yJ=jQ&|9s+@YoRxv+;bV@@^5BB!>mtv| zK;W0b**Qlb&d(bH2?gULUH%vw3XFuGt&7L-8OeY01o6;$424faKEp$u-!p4{@I3f=3<^e^@~r)X;BCa&y#+xC!p`X#f#2t6=JJ$xcFyq_5`WXepK_6K*lC2A)=!OFY?mIfV0Z&&3{xB9$aIkO$;#baJze@uRfTE`}j}{aXg5qZp zX=-YQL<_>r;ARkWb2BsaLvtbVYyWS`-*;HJy5axR`RkB}pzzmmAg-~o$*9R*`yYsV BSpon6 literal 685284 zcmd?Rby$?^);LUqv~-7n^bA8IAl=ekk|WI!L#L#Yf|P)OAV><*4T6Azl(dvcw*u1f zdqDTzdiLJu?DxIS`~CBA%`neB&mF7pb+2dEVp5Zq<%EFva4?ywfIl2e2ptz4%-j|S zQ$z%;<>dqgOPRsV>|s`5H8U%yD;+m*TLXN@%+ZPk>d2|B$w~*-guA#|z_nbUPz@Li zPRA#7)kWFN#ZJNT9t@b~@)BxE2Xuqp2L5m`p^lbUOI%L&%UV}!fTdt=j&M3|u#%0X zt05iF<#cqs0+%1gbUzQjyz{5w0Guw+%i)*(!5UCkn460Q)D-|j#uF~52{(g7fonNU zK03au9&(!ebo`fBw{L@GVUBR%rYwZ+YBjJdH{Ew+WO?WymxR#(%kt7et{?)-^3g#A zzkkP1$MfS#fR2~z`*(tLyjKeV)2f-l;ZPSxU}iOLK04raIguI)Y6ym+Ym5%oZrFbtVHKqeA zLoIF0Bw?P0K${DIpNk8`O()351>yy6N&%<>WOAju>J652fw?&W0A6wYssYvnYq^*? zx;kBw)4~fZr3sdXy4zSlHRL3LwYAL5U4bs&dHQ_q*H}+_2m*?P#IWMHA zFf^X2hs0WXHJKfGGIboA$*$+GZas0#T#G#6&e7kVm~Hk#lz)UGu0!nOdt)z49XZj| zAA#T@$|HFqlheAxwIXoy#S-2u$>Nv^>Bau0K*y}uMKMRWvVKw_Z#F1|r6-!hR?`vg z9PstXD&R=jdH2Z5m;pT>?AWYK<9Mp1Qv1Q6Dw7} zi>4vs1;57U7H!R`b~)_=fxP@XRfEdT)4A)>oLc%0TI$+N3;E?UWOD>of%*1|9Bt&n z-H+75h90fl9=iDeJ-GvWWSEl4NWU+iKii2`<54bVf0(A#gIFgg?{J$pZxuTGOuhyX zVzGFXJOnoI*dir z%1{!|brM$>?beYDtc-U@4(^6>6|BU$kfJo228Eas2K z%s1-Hs($|pq}_rEvIvZ%yw%!4-f;LpKdSqipI$0I+#s2O)+ z`amK(scbBj)8h_*QSY5*kg&`+hxt ztFulFe*jivN3R=er(ROsag2Jp4Ysq_a>DkilTWipwR@BvF&bhaSdoeRpZ1Q$Q=c^)j1noK#2 zdAlL=37cr@ri?7%+8#P{8j$0BR<5EjBp}@q&y#hGQ$2EgoLyG;I%pTrj=2Uo~eS0uDK=~NWn&`cUjB= zd%=q)p*0>CIrr6bWO*CeK9jkYCsI}cot!EwZ7sxZU(ZT&nao|E&I3WYc^`5 z%2qs^!!JCcRS)oQG}A6rNC{ZQ`r?ldr$1(s30&098xPQCD{8qN^pxUNJBj@0J|)?+ zd#DtO2}ND(Db()^1@o#c_M<$L>iX7^5JAe_;No`UW;*>+ zysf5oC=q2*XtE*KNeIRQ30kNvQXMLNICsqWL$QEPrUx>@Iw4bbBcr=P-ot_Ic?BH;%1mMgf>xSLJ&c+{yv<*s*zs+ zq@d&$CU1kDAvRCQH~%gE3wt3g}s(t5!7hD(zsRQH!c?LnB9WWuFF~KhA1z ze1}}!x6iV9O=DId+xcvqQH=wlja@5|oT~LS0IRUxvq8t)?F8lUemX^mE( z^F_0+zA0R~;ut}ZiiFH$+x@{@1HW_6moyLh&kWr|micYD#=7MofwZ zth%+RvP)dBEDg~M1nS66&Me-_7^I>D%-)Z&;|Own(X5PR@EQh2d!_chDbCf@k#W!@)eqS}_(iO3Q-#9;+fRfj;xyTOS^_yUWU^ zsTPh>nk9YABMje!N2%G+3yf@VzAEs$%90|Djl=tnEBc$S3U0NWJ(SrS}z^R(jrZU6>yRGUBl&aMrZc_7dJ0yPP)_ zOu~ja8G6(>$}1^QK75f|!&!00S_;gH%du}K@*d*d;(=MX?XTbCbT*{Vf0wN5Gc`f^F5eNRnT__)~ zTxogho4$e#Gjwza&l~v-UM|@}Z_BV#+yk^~tJ=p3_Xe+%Jc=p{n?T(-LD@bq)rD;j z5;S~lo-pRH_p?c{bc;V2kwVP-gsgA6J=XSeVCR-t^)<3iC%K4^EkSOY>DhdWFt8bB^ zTN{lw4ul7It$U~R7>-@s3LOTsVMj@l)uSNbZK;uCCeOI77S2^N9K7q{N4Lj14uqs) zB;@F7pRB(;BZkfr!^rD*h~Xi^JWzxznvb&{)7u#migH?K$K+}OW@jwa!~HBOJ}$`{ zYv>=9(7s8XkqyWCZ?5S+srKw?O@w_FCNL4ZK)qoY*DOyf&TQ|~{~iq>uFFIvFa zV*}m#+z6W$$?|V#4G3-`T{$57DvFQz}7O1Oj&IL(<+;H>dHrR0u2cA`#CEiQIOpf-Vxv ziK>qc+A}}bIH+hQb)Qa-C7xnl+`cCT{!q&Ian{e$bceSVZ0VerFxsElK1FMQ9EG+- z?f{!9=iCn%EnnE!?cl_pJ<=XBp!1<`ZbEKfK{7_&8q>(O)SJq0B;V!&nbzu0xvmD| zFuFE6hIDLu*Tf=TzfL}!d+Bp<2Ysno4u($BIf z>(J-mkm2~cNS5jBkj|bD(KgoS`m$#DQzUm=XeTCB=Qt&ToX=xUf^}IY7{PphWA3py z!ybcVhe?KkgNL#*-0dJl$~`jTB5r*J?N-1pjSiBUAH@(JTP8jL{CWu+hz7FMFY{A7}V^L7Ri33 zTbnkL$yUpLI3&c_X#KWMSui$=sR_cSr#2UKKINwe z_Kjsdljao2?}>MlZ@q8TgbZA{XLg6R_b1%Pt85DfdMmCK2wI4DP{kGMq9#LHJE93c zeU3`#`YH+Piq@)y#GdR*Qv2Cv?a3b&pPZk!Ei2v&?H{xR-ghu*)sx)lTn6I38DzAU zs2tc`IUP(^?0t^kN{+>U%Z-7c?F`@FTDaEDHl?%m*0)c5t5_JK73__T(qFy&fWwos zx^aPXp>6t;dQ>XAmnO56E!+sCsJU~0d?tI0r1#xjr3Av}Bx{4?2U>d_rS3&5afelJ z?w<;M&N!`t-$%GrpMl_={><9#t%cu!Y7_sA1D`Hm`*AXOdYB03q#(`6D z66%LCgH;Wj#B$}uRK{Z6u;k90r8Ydz5Qzy#+1uGQ!6HdBEq7Nn>b#+z(6vOik)Xku z>+IspNOBzaX4KuRk=VM5bJl9nwaO*Z+8*K`>p!p^sw78d+j<0uph)6+V2s5W1GOAS zp%h!@^$t`mXe@&jc7Ck{GFzYgIHbNC*9WIF-8u`EC__{x+)s0UA;+%1E{&0wX_inv zfaPXqhA3IDTacghx+lk~hPF&wY;n{pU&JZIJEYx+LjbUJ#M4Y+6P1Wd~y@*9Bv+ z-98{>RYNGGK%-? z&};IIukoJQx(WX0bArg5Lv08T?s*nuf7(vUt^P7JQ8Y4rv~+BK;Q2nTEk%r@%0B&~ zXk>k)ywUdBQ=Zb7%dlkVbcz;_6yKO8`$TY)3>phjRBc=9t;v|=p_XNzl4MUwm-{be zE$AgacS0_ZyNwfICYaf`&w7Ly z>-5NfEov-5-*Dj5yQUk9cG7($ygEaO<`DzprDGF#G7Niy`&lCnL4I$Bcl18wn&%}U zUowgX<@m`q7MC#_GM6z6I{p%O919yey;zdlf7*%(h5-u$K{7s=KW+g+4M@=rHP#bu^i19^-uQbqH zb9${-C29m>h$y1o&#-`v`xNRRdKao8#)LOvCkGO9#U#q|B)EC#H+BZ<*>EFvO|j^wM`~T7LVsVT?`s$w;pT` zarqC#`F<^arSR3b`eFcmp70wj%~3Wg#4g2j2-Mwp-LmApxj%Q^GgI6qS3wPGG>=2< zPDyfM%+m)&hr*}fXWKQt$fzHBwm7tM8Bs~(u20au-C|y-xpA|GBu_P&J&+BX#;jM& zIEnHJXQH6-%iU0dX10Nyh{OZAT z*f^bspUPO_W(h+?u88M4)3e#`HTMOhF}q|q=XlYyIV#mWDe+!gAmM18Q}%rGcMa{{ zg6*MUlc#)e%T%(fl%Q5D7PyTa$oHwGpFp{67?GwkAE}w(>2e<4mp(^WUo{r(9AEr% z<{$)8SgCci^SMrXMoXfYruoO)#v83gvL{5swdAz7&PZTbGgY!{9XK;jz0ppH=JDP! zM%4PPISkZbO?}wNasC=aV0}6Q;d)pzE;3uSB+T?es#oWzg0E!7bT)xRyVqXqhNi=v zYOVJko$=;xi6f3uxWuEMy;$m(#Wl^-vgdcfaLwh$unSgd`%ct*ictK|$`@x>6uxZe zEX*~7uPg(=cuBpl%+CeFuXv%Yi0z1dJcp4Q5*-)lAiP>LUz)Qvj-nM|TMO24kg*L2H$nq>I;d&J3{&rzOA59<2|q1aB& zCaAT73}#N+N5LEx6!$~}RNFpj7r^5V<2YSeA$D~GCS%Z}ZapcRvhw!^DM7dZ^Z+NHfaMGGQ3B+^s(ngF{l6$4rxD-g5 zd%|n991}Lgsvj>jPfa}`ZPhq$gR#}CUaw(|PgyX*9!3b4qK&dtv4bbv@Vw3G{QQ#* z;hpt*)>W1~$@O|?SSXHvjs_Lu2j&$DuP&r@s&a%yR5y2dXR24i8BPJr*oTVB*aZ~e zF`FWY(V9JJY*=ADh)8&`!`1t-6)`mfc3Jjl+NR29{5++OK0eVl>{4vW71UQ^SG3kD zm?K)L8%_)W_f#ZKGllFIaccM6e9qmOJ@$BAjDi<4z#Z@e9NPg;APBLdcGvk3~y6v|sxs(Tek&2p?Oi z#&V>aJ#m(szmLKjqpCEtgVPdR)1q(wwxjLyqtVS6&l8_b4kT4|bdvjaTey+1b9}6X z$MhsBU1cdB^ig6+U!Z#yS73+l2bVeFSKO%UhoUd{ zBi>5uaXd83iE8V$WpzflcK@0Bb$>Y3k*Jw2feCAwRyinyZD{e-ofY*SakEiqO}^`* z{RG%5LHfCo$~d-$&p7qPpi{e?EsAj%`O~Rbw9__SZrjj<>)FN0`x*tr&3AZelG4H_ zPr6#-`Mp}EIXBaXSZ*yS44-^k3n4R^Z8A(^Z>ES_s4vb@u}~VGE}&3hwPsINAK;r_ zkz{UuE4{N#*7-;amvwQu7N$v=wv;IX_Y8KrO=34;it#Rt&-_J`1X)qsXLqfhHki{< zmd4=_4_2F3)5qx>op?Qx1ZuebhXg5T{?f@09;Qdll<+-Ik_pl=KvMXMbi0J@!_Cq$ zOmlb{HoeU2r*ZL#oKH+QU{XE;2P5fSx zwa;r`KjqI$d{i8!n1;#IOkU{v3|gxBblmZRafICB>`tNUJx+lGQh`Z3nvR|}YYxY| zHw#ZwGaMvdBX;6-^n9kpD(X#DStXbfEB|`7_)hNSVcP+sK^0#Sp31qa0hF^cc}^=y z2NHaS-k8)!N#^L)T-IDjQJ+i$-Fn$hYDYQ7wIPKY(814pSF0aNR>TPSNznx$QI07K z#TO6n?b)s{YuH@x>wiADGy*9@Ep_Yz$9UiLE7oB&v~bO8%y@-z`;h+WO08`F+rveC z3XX$VEIeyvdgW}R;IFm-YB$7!cWLvuV74`YRWoA z?e0c>dg6xi7$vxE9^-tS2ixp!Z*Lp71;$u-Xs|N2nXz`9K(g08*`8DLRr;uN%WE$y zIybXTDRx>Sawc3WX+#g!c@yA9RNmL`r$&_|38aKoWe!-@q=TB2!*bY0{KhJI4$dR6 z>pieU2x$^^85k-83|#hWk!r0yl5qNvohvnq`n?Ak$o6Jvo4WTN68S}as$cBxRHKwD#$iT!#_2(1kw?1TzR1%VWv-j3pPvVW z+!ih2bQ4659~+>4aLWZfh;s3A7~!%swPsanWbzXt#ma6|=KRtrm8Ei#Ul=#m8e)SluT{ zS4u;p#yz#du1mZ{lOSj}>-Kr?rQg`YA>3O)I69?EpOXJ{Olx+0)!%Pihqu{+FG2t# zEKoA}Xz)Z?>~@pH6TezLfxV@)M@0bvQmT8?k3(DA3~RQp(+H)b4cv)-<7}A~O-xqv zp@kMl%GUsL)MSbkS$`9sIy3L~3jwXB6}>1c!Ur({nZ=3J7rx|HwJ&<7l0PPp(q;~J z$M%29fVId-4wmoSSa{`meMg*iv#^!c<>BC?BjQovTX5V-0%{y;?^8MfIE8=D&fwEP z-lB^t{!|m+sX2CQtk`6<_76*)Jpwgw^WB9S>5+5^iwc!izD}QVqC}d^GYF;Gs!XPB zn*!<6Vip~P@^r2s81Zvf@j3pR{lZJ+>yR8uvqGcy&=Cuj50>G~&x*XIrWc>h z=gpRUJ^W<}*5neIOTFqs{WB())sP|=YI}zYo_pG#TQH|f*s(66v|UXIF$c3Ad!S+| zipB_4J=O4fy~VNb&?FWRyp?rrA9izG;B|FY{^t4nH13XL`v|u(ll$4~DY6@{$#42{ z;)veXh9xf+xaz#+9nnDHew;B?*E9cqlgM>?dq-5?BD{<2aq<&0Fps*5 z8CQFA4ca;I3{JvmvG&>m(TVNw3E`>c8SOGG?{HH6V!J1kiLS3X!`uXq(bQy#1sO6b zN}fU$)T@TpTK4iF88J$L%1hT+xeVRTgAs)gfk5&Rf|@cCJ_GAGDT0DTPyo+66WlkZ zCR=o9>NBE{6pF}fqqlQf3f?@E<1SQP3v~i7Ume`r_4gW9wD*;I_k3NsCcAlhhkkLt z;bhZ!S#^He$k&AFozJ0>-1BX`Nxg_#b!W;?5VPlJOjRdh=g^7zc^!#VtCs$n3k_fW zMuQ|V4C5!CVsB!&dpXy0D5Y80kSsyfE8g493}qW6d09O6JF)$?%PO$-91dQ{Wle zZ$0Wb6!mms54 zjYcEy8_thC(_eWeDX{nT;DEdg@LTv5GlJQUi~ zENIx!gX#JF^pzgglLq2JgeUMZlC4O$%^MDxCF}u%&)!j)s2)9e;6fiRX;o&yc>i9f zFmk_HS^xZ*2%1mA0WL>N^fWfLz$5cVV0?5qIsMgCPWGo57P#R1y+EuncsbDj+n=kXw3Xia{749-;4boq1SbZDJ? zpkTL%!SKn%qs=FQy?Ns-7VQ~O$RMQtXQa)sJ8E@z70dFv^o})iYw9H>M<&!LLFn18sx99N{dIz zMfHSII`;}V11140--yRUi@Ghan~zWJTF+ft&hD6QFPU12@VnOo8|;07<2yROx{YsM z2h=kr7boNvaB6y8qlW@`>?;l7a5rqmEE!dG{(?rEpon8^JyETU;f8T+|Bd|lUUT*X zSy6r(?>8>8Z<{Za27>5qdS?8_A1dnlB*!^PUB4khAp3;QN?%{iX~yd%8CeQmA+>S+ zow{cj*E6o^dQwg9l-}EsGZBA`#I^X~hV%-C=GaMvoIsf1HmfQTea~z4%C^VTqA3kI z*3j&pi8O`tIRz*E?Oqy(kq>#}Et!^J^pRxcD&5JGH4C+6$i=S0xviR?8pet4U6~r* zyhi+x;hg(j8-(qSaLm4TP@iA-b$NLn3ipZb*xCjOQY*1VOh!vSGWZFki=K48F&X^m zQT6N@!JS1Z6a;X?_BIxyr6^G#Nvr=2{AIC4pv6OUzW`1CngmsAg1{p+s0qgYy;yVn z`KDyyM`}_5;x*^$TCeojGzBt3;*lX!kDEx_1y2+pTDFW~G_C&TWXoc!(a-nf zv6(68n8KV99&gEaW501S*^hp(JJV0CzzKIL`l3QYHoww!`ffUIxbyu|)849V;>&~G zOveG48kFtTA)KlKbx_k1l7nf_*LOR+bqs?_)T|k2w*yE#7%5&t_WPHj6Xt3wc{HRj zomD15+mXXr5Sr2@qX43+f%Ty5=iyJE7nm;}^fL@X?qpWH}H*;C&tEMkoJy62`EM()kiiu%)p*L<``8xEq zpO=q{1cC`K@C>A8LF~DXfG*v3d)C?Y{5;~dR1tX!h1+N|;h}6%eM%DG!c=C_R)Z4~ zPP2PS5|N{ODzLF^w_phiDu@SmKqqrC7G}Fmer00zx=a<~34w6TZ3Y3%)Jms2Mr%>W zIO?_7*p}^N#%RwxOg=yGd(VXNdKY+7Z{$LXDF<@im6IooERBvxIjx}?)zF+`V;w(= zz8RvzsTnWT{th#bEnSELglG#r0FmP?4zn%$=AcUwo_X^jL~UhSF=g$53e=Aa1`-P?VdO~(SyNQ z$F~6ixXZbC{*2TuvVyS!DlyQJN6_m~H;?AJCASmDxwpAN88{b2-w01Jj>RN=QM`?F z*@T+Vj8_s6$8+#?`LiAQdmN=k;iydAs~H^_9=BG=$rMG6XE_=-)v+3vX(YI4$}Uni z0(0F@^s&D{{GAyU7YJNpAeYbeL$P+Uo`O0Gi1HuK+s!fuFwN*z0@Sg8syW;s#HHj zvfO8-$jtM`{+V4#pDEFo=(o6s*i_Rb_cl5S!j?Mn$5Ut2<2)xSSW`cBqjh5Qb4z7G zI`yA2v~q%;G7J!Dupjx<6`;<97I^IvpX0=_H@=CN%3%ERb_fO6D>X2VIUt-u8!i`V z(EL&hPl!lOe?b8$Rz&&JqZil6yeNX&F^YWCI#L+FkOBQIQUf731NwVp6s~LVynT#9 zibk2otaV5`xtmP3kQ2JgKl_JjbQ>78_orT*s$dH_-W? zzm`oXF%Z9j6^m(F^uuF0yHcKH6>gI@>l+B|29rDxW zb$?Q~lZQ3xDK~|M$*fP=URdBDw90?6L9+Nl#3W_Pis2fD(C>dR`ijO&_rc`e*tX@E>cfP;?czdUIc}R7YttZDYkc?=HFY;v98Z@m6?eFEiuyuR-0@Kad zvJA8c2d@aB+oY*ByXGZWUaUjbb1f=aJP!FwlPJB%AC77y_c-ze-*a5^;%WwM^grG| zH8b3{z$1|%d{GRy#G3c8t2kc#>d!f4h9v9Psww=~KcX>Uit)|b@#i34F_yt5{PRBa zh7}dKdym+QExT|JQ3e5HXA@kZ3fDeAA8|>io2p{(=O}A)D*VMrzV>Do2tQLQYk%A> zC|r-gNThV5WpRJZbz98at=dmy=CK2c-292lH9^ymk&&rz;ZPxaRRLB(E-5XGF&+N~ zLRyRBY4WKy^T_L6@l*YiH7w{a(j8gm23%+E@k7)G`Sz;)2I88_oLb3e19LmRz4Kf- zdfd5x1}j9&6t0HZsWda|2g~1y8M1xWdsjeKhGCE+B|{Meu9l0Px*Ztt(zX}*i?dbCniI9b+n(|5~Sw;_Ts-esmzX;Fng9%}srUmiO58y<0#5n=~EH( zmi;v#X1t0BN{;(zR)C9j#v+GF#3=PD@;SMe^M!MCDy44QLKb?FU5cbuZ@_Gsp-0v6 zuAGsp$&F_{LrhJcCAx+g&UsTZ4$oGevRu#zsu%1X4)e0HTwxJpEfGH zFI9b_9qBORE`@Bnq+uc1t;#uUa@fV@B4doHc4Wg;6jDf0#+Wo32e}$_kpFn7WHJN7 z?tABOd-HRAs`WB;2`%h}_(vBO>ESy*Sl5gFx>#B=7C^_6(v1hi*(~d`Ly9O*LykUc zRCqt{AwBSYqOi@Ru7k@IL1_bf7$LsNh&jqYwdUda&B<+{+qAVw!C7pybO;wpV#B<4 z{n!S2#QD8r9JV?vRXMhPulO-> z;ZS3|@KU01Y!gix5v_WVk5Iuz@oKDP(5r3vm~79gc?xFW&~<{R25U)KCu%I z^Xi1d=e&0-g-QCagI|$so5mK6K8ZXai+8^ znA9`rhB~le;;rSIbnk~LtEkqZj!@#ECFMa^%qA&8SiCKB*`-f4e5U)Rd; z4)q8c!#)mmIPez^r?Q(I3yQnd(pV3g4-hIcBcQ=nw6CWa1GHQN|QigmR z*`ios$j`qEV#_zya`JDSoIxpfwT=q(|DvyLJB7a($%Epd8G&Y8s~ruR8r8c+A7ObR zu3hg7r+4cfnNA+BGBIucAm%}aI=@79k-G|a4Nrc`s=RmDwM24W7Z84O*JRJ`DMC=7 z@f%Whw){mj253s2P5n^NTY2GO8M^@kPlR#1k}TH%gBE-jw};oo#=Qj_8pm&&OA+CX zk0h`7<<_os5bqt~#7&zaHkPyME_bBGRtESch>s%g_C2aFB-+8-Tta!~eXAuqA0Mk+ zK+;^iUho8_{>kUF+G(ic2n;r}DetBJC1HMEZkS>ENzfh+w%8W`s=I@4DJMo2+n}5X zF5UG)7}p`5Uzh8H90ThGP0i5u7Ts`6hOiOT;w7{9p9f<*CeIx@w|4t&E6Ewi{ZqC- za!bUV*gu*_dP_CIrbso&Myr5(MBUKM!`%X^l0at6ITtUO2Gqtt=q;sAxv{3Ufl%|b%u7k!$ zxw-;aqZOd9Hn|sUC3ll?yx-8WzBw?Axb_CqQ;x=bUQb63X~eg5w?)3fCbZ@-7JpPJ zmHi`miP(Md&L!Mbtr(F*hW+km%@x5Ed@gD^GaxM4n}KtmZcek~P2vc0$wpKeoXwq! zl5O{{#aicxx^=b@KTVeECD1)R;n8tTnO`oGR2~`FwMX|D%w#1a()qSjyq-KsSiQ}PE&rGBq!_q&ODs*;HoA}g$b+lwFB_21WwWmxAo z?RR|s$gasTcGvUF{$js7p}R`AJOx%NI;iHe8QD>#o2U1(NwNM>1MDw`=|_q5`pSI8 zB{CM?Cw|c|a2_hBTauJtVinsxQj)WixT$WYeT$$_Z`%XCf#K^ni~@TZkDzoHSvI?O z#EHO5q?P=G+)Yv$Y`N7}$a(`o^?A*$`zCO^=Zh#f-Ch3NEz5te=U z@Y*s&8`8*jU_0jlDX#WhhTQGN(_M~DJ{9R$eTOTBWmK=Lp+d6->U$|@Jy{Onru6k? zyWiE603kQd652p|a~O&lH4*-rRBJ>3<=Zn+Y_czJn7j70+No((%`ujdN+%Y<$V1T& z=L`w)7S1Bl`+VQu-(OoLe;R}|r%cI3Zej}=N}fy&4RSg}v|0Nativ#2EQlFS%U|Ky za?8x@oE%m_MgHO$O(;J{4+lZX1ik%4N_LgB+1Om zRuobP3EN|-$Y@vNDmMH^DC{v4+nO%Mjff~Rp1;Tnr7-p00~0k&#QOV-EDyc!g4|_r z4ruW5sq$&mb8pmF8hb}rK1|9D}x}QKr^>foFzFze@MUPEP$%+z1j^YWt zo$;%9SE`Z_ocFTL->aIDH50Y465dV13Uz;#B^=Gc^>L9*#>fOd*k~kQdh?~Efo85Y z!)Zd#d^q18BduA@Yg7L?yMvjYPt#;OJ0a)QD$$L1ji<|AYofk4Fn;|tHLo4*oy#+M zZpSa5o=(5x8s+kTNkS$b{94n(se>$lLb%lWnDE8U@Es5~v`)>Glv&%r;f4y5&wELW z>ucDi%>>(ieujvijrASxv$Ukng+LK7=bPNZu*Y_JXirC_wSCITgl|+@^9m5< zsjNwk@O8X;DCfRYsl&k8To66ye9J12qP1gNFr{0%!ck$@J`A9nIh6#3ks|GxMUl@t!^fmXb?rQ4LIU7Y^&HNo9WbUotM^Ub5 zmQ%wQEwy%r&Xj1`E%9wZk3XO(G`|6HoZa7N^14(13FjuI{1iTHb$@%E+{V5miD5)% zBKfm7@jAC$O&xVpitiBXO*t*_%~^QFyZb|t?*x<~RAR4lBL$-KW|U6NQq;(gOPyIz z!lhpsIPA#0P%aj@dxsL-?o0lHa3hxxDSVdb$t=!l$V)7Bu==;@Wdyl6b9y~L`m-sT z2(tpi`8tXoao-zj=u%IZRumZnQ+9RlB|rEWt21(4QJvz(TUJJLqIFo0iRF&4mBQ=% z+tFfGAm&tV97Zzw{+H=E$m*Y_ztj>-;SpqTt2ui$r$I#%r=7GLp3c^XHSpPGpC{dS z3*lQ-@8;oau1&4&00n2_4KSt&pBWn;mmjU4ql&9#+3KS5C~Z%(f5JmTrksXT#<##c zYR1wDk`e^)2t`|AK95J5-i6(I%$}zPJnNGF5+|=cl0nrQ3~eek2+ydXsqL_(swH&B zLq)s?3h*kcdEn6+xJizPrs7@Z%tVP4qTkxU|Gq+$7j^c_xGb#* zFXGfbgg~_5m3{hf_R7oQ4E_!#u?b2d_mHq&a(Z4(*F3DvCrg;27fHQOsu?8WlR;7h zO7ucb+X`?g)|dwpdT6B|yTEIhyz}!5`m=1_5nRR@FvBf3EDVUwsNUuqwFH0d2~%wc zym?w&r8~ZnI#lb|P5C^In_g2iN?4+4*x#EjbG?J%>~W1J;IV}oux+TW(LWD=Kx4jM z#%%nkw9T2$YSs7=x@yDG7#`&pQ`W-pY4f8&+=nBajAc*;Iy2iRnab$Fy9cOYN!?!* zGUaf?-a&gy#&lceMsMsno!P0}Jh3=a9nx;V7SS)UcyJ%*DXVtjsqfeKew0PU46^5B z{ih$kM&NxTIURpAEpSjIhg_KR);W-N9jkD}m!p<>*ZKY6@`pR575sWuPNZV<&;g!y zl((o{ruLW=&tT-}C~}EpYljB>l*%V)sYjV_?r^;kGtD6(X(OCOQ7%uJAdkJ+X5^_9 z=f*|YqZO*iQH`pR*?*rM(#i$7)5V9uuFIw@BAAVkLvep-a10h9ne~b_q!k}O#q-n6 z741wehzLQhi2iUI`7^RD6%l9iLHxGIn~%k@6^MI)DjkCH0D56oW| z-@7*VZb#vek8oaaoC#jdDbjuZTFoXhcL?O7x+KHoVwIG>lVE};!?#)q9k0-g8gPPk zAMs)ZHd17LDJt`^0EWxvwtnhMlbRvExr?72LY9y)i+am&&IDZ9!RKSF-c}D6%|?Gb zS^nwW>JzTdiZ3;FmQUeqY($T&ow9wVMVhCn`L}Dy&!q2VKcz^B_vpn*$S{h% zmar{ZPuS)8ktY9?woH}O@5u^Io`f)IA5XoWP=9a>S{P&4Rj%=GsinWA9bYEk{+5|~ zmGJs|W-wS%8LR?xaWJz7TbR*7eokk-%n|;NgQO&W9dw!0`^%uq3}djSn>qY4vsw#C zh`w51#moT;WF3Pg&0L{ZJtQQhq-A8-rGN}wHy1Ckj3baTYvX7I*0phzaCEi#y?xnP z8tQ7{V&epdxzPPcLbh>ng-cnRxzO?R0h!uAt_1{u(Ux#)S0EFZ=SODpADP&fBmV^G zA2VNNMqfesFOUHly;iPtJltRfAW7WDLc-C?9!kdrmTW=PJkEa)BWC)mx~Lm_hS?{9~Y2v40X@} zhFtZLyvlFpgz)pxT`oWe;pXMPoEyk+zN8J1cMRpeMEFOB_a6vf=5_xG;lJYy_n$cf zU~BGT!zl@~xBSQKf&54f|D8Vo-G1i}gq!;(2K~qe{{!Buv3~;i-*M(5s1OTf3 z&ZbKe|HLMs^*fvRc=&%}lYrnaY!diOHu3TN%qBjF5FID50Kg`G9y(yze`V8^0ROCY zS6TOeLi_Ld#PfH2`sXbAUN%7XKeOn2()};6UXJ|}tpAxsmud0;Mv4A0ceo)xbBFgk z<^Qb`@p1pc9WI{VxC0b1_%nCDS2p+s<3Ckk_|?7vMvm)0vqZrXuoX6NFHZR%QfO%g zXfB|=fT{Xkoj?nw?PzmZHQ-W}uPQS90>$r@1`O#SSC-9~?kC0h+1mffz!P808`KM+d4pj4GpN34X}rKu}A=AGoY6h zH{6_@?4b^qCia(-3s=kh|4Rb@a^w1)pZ~JM^8HyDVUDgado$O6ZXkXX1o_=S@cv}O zf8JxiSL67F&wsh+{2?V*Li5We`x|io!9W12{*(C@_}P2|xBf8SfGz)HmxXW%T`l`J`ZLDdmmA)1@+j-(2n1VTj+e?s2S5yS zFtc&ItW5#|=DG4{pn(2fw*PnocO-#gFqdUuxIkRo+>p!SDWJ=@kbr=o;3WV*W(9h1 zUuhN{_xEZj+~2FAa9;uSYu%Wuj(-Uj_rHK81-V?9mq+MwQV_p@;3Zc<+?Sug+EQHq z?ykzOuHY3||FuO01^KxFn*tI7oFza7KtNd+p-Xc7>!Lp!=-)_n#el!Y^zWwqpJDpD zJq8NO{DVdRJLLV@%>O93^8-;$YqLw|830BKW)E}GbTYGm0t)k^mJRsF?H`?`U@(_U zaT5>_00Ew)05?boF!Ov6pmfev1E4W*S^!QGhFA5BuKxWbhICgV5198~)Zk|W{|iC> zPt@RQuc70<@}=mwuj~~a&z0->FKTev@xLaKf5*jtmB^nB_Aj*fYl-~3X}?S4-^uI0 zF8Z_O{RfNw-L(HBHMk=A|JUTbswnl_qW?WL;1dLl?v)wi8SU{(*4(dx`wJyZ<7QS780u7X3SU{ntf* zwvqn;)4!Ycf2Ib1YyAE<3ja^Z`?G2L@2LSV4+tm(_hUD>Z2TuRxDxq)*$u8r#r?+m z%U$Qn#riv!mH%&D*8f@T;rp=({C5Xi{y$<5zm=`~N11IP6{i2o>nR9zMm3*VwaG> z&!Hl&pV)P^-T!Q7ug3lf?tjXzAAtTP(>Q_1+>g*6;Qs$F4jq7#`tzaVpRkQbK=23K z0N3g-4jor+&M)Zyyv_U;YQ1CQXa}65E#N>sNiJT{WtafO4@9CaD>n*(`1t4qf#b#H zt|qAo_=&og$7--F9WPh{XkF>3#MRjasCcNYp>X+Tv4+E)Zh^s0W{#F-t{^8bOQ1|9 z2-Z2yXy!1F)NuJGN*+#)C}%w z4TZu%PL}unj1u=R6#v=b{|l6a_=SKo)9+Y8xOf2s1?dDpg8wU$Ut;A1#7HclaF`>^ z32x(H<85|%vb@6a&#?0R0_)Fv;D5Bff`3-(A7R!1sXkv>m!I?*!Ug#|g}(CGe}Vbu zE%bMo|D8hr3gmy(<$oIg{UyREB*^=-F8_6y{?`bj0C0T&E&lsg5k?>~{4cSwtFo`Z zY0DMrmnCBV8#P{kFYWqA@m64OzX~CMWq&K<3TW~V?*Xs_Y5;iOAp|bD>44W2uwXq^ zb6dc&06nfMwes=M0WaXzk0vi2-<1z~IoJQZwku$ofT`&CuH=Ufh`j(Uz)bza+OAi| z@xN5umG>8s{N)(;_tjm2u!PX%xZfVZT~>EBu9&KhTWKgfrOorD<$WA)jYM~iyXs@% zo0!M;C9V~(U^{em8gk1~&mS)P9{TFJ(CpWkzy%P37;nqdL?sxD>D_MS9gi;ElFb=x zI{Mt77I0DnZa&(HHF4+;xH#yAwH&+b&ReMhwOtdStlgpVF#9k+!Z47aQ+p z{j2JZH{Ox9TvX3AHl1u-n{2G&><&xJ&fxdjE$YuW9kWv9Z#h}&>Fs>|j<>Jeno0A* zJ-xj}xu#B=77C>o$X=2eu?IQ4V$-WKYhCeo1qf9$q{NSZ&1;t5T}m4|kNq@8!cx-WR**uEs3EzDJ@MNx)GJ z6Cci>x-}&EysguGq?8^zo>V#K6Sth2Turvzd)N`)rd%jR(_DC`fm}eyrhv{}vdHYG z3`WdqIwQ0yraB0Zvj3ehOF7h#IGJKb_)6X!`0yNKBxPUz=WD5s`ZkdzG;V=)fIQWs zp@;C~(mb*&N3)O(J_NVFOXM3ALRA&DJE@BxF=`~GWWl63RaBu#C*64|(lKkN098GC z>ZH^ijiscCssuaKAM7=kO1Xo45ICASfg@5|M56egBHw*PzhbbmLvCTus_c-JV-VJ%J9>6hHk!7cvTi;Pnkf6;rWuDVWsBnL^U3S}@MJU&BZv0P zS13XnF=iDeclu|m3c~2{q0HE(GoWrPZ5!;x|6ydIJZi)Goo&oTA2sHg@ zsPM|6)D$2UGQ}4&r@lxCW*VP?uNPXGvbiN16}=8i7m9Pb@~OuhW|qL8^{6Ep>D6nW z7JWwEZv-$)k9A$0+m$0;rM|vC%KboYY3dF0Qp&ZS>rNOt6uCsA;GiO#Ansmg^%#dA zEkjxgY;2(&<_!kv$qX8ghBS134d|Kdvu$t-M9Qfn=9+X-bHR!lZUKD8Fh-%Tu$OIi z;B6kNOjL*l$FD~ZM;Al2Yr0oU`*=>vQ*B=F+%5WDD#{Ms>Q$8)IuV<8$wx0Y{~Y3dl&+AR#j#sKG9V_FYsG=^J87hCtPU42`;Czc*+hE@F+S6MpnQ9Wzh*NYN{OK;LmQ~$BH@0}y=Xs#sU*b_NeQQgOZws+5QoJ4vt>VvX z?3bzq*4CXlZ;mJg5tqJ0jctgY9oVqodlq;eeleU{&BrOT6mK06Hn0M z(Z=`nUaj&KK3ihwFm$S%albzIN4~Xc$5-cO~jc_E&xzqs_c_CA#Pw z=v`L09eatjx?5}O@LqJxvgM$@vp&5rb?jQwXwzDlD3oIy+Bfub^YwE7plW3;C@^Zj zGM!@*N7iz~dZZU7lco+|u=%Z}j@_;Jr$?zc~=t#h-ms{i_1EW%zrUihI|O6x0^F3&+cKlEtllTlejKT@P@62O?%l-tJl$ zVi2$oU1&jBWR#$TH?7r6{uA;-tH@Z$a6rVp#nq_nMt`4s6WCYaR=Y)H;}62IFFQ`! zTa3b%Hswg6Ibt65td3Vq&6Y;<{TeQW8k%Rw%N*mvYnbg}oPZLvYmZ$$3x=NHz=+m! zsZ&)o$B()V`jTe{KU!*V-6p}d;@7~98v7~)t%=$>;r&P2z^XuEySvXyrHhFxH88be zJr(dLQRz2yTl9;+No4H=U+4lVoy|J?D)l~+c9IX67NPst^_s6$^+ti#{jFkQN~?b5 z2l{P8By5bI1(N~u=3eEwu>_SBQJp(+Kq~izOPv4AvK1r?1cLSDD8aGO_l;qGp z#8dl}=a>s|c%(4=a28Z9cRuidf3U6!D%$!zx-PE3{(fc;OeyHId?4%BEzF+!D<0?r zqfYD29C~NH%IV-f;OIB<)6)+HG&7On8kR zMLBhj?MF~c;X}HsT153y<+3wX+a!h!3ro|I$EuM+a$)S2chcA@p>zT%;a9}sLEC?N zZdTy`bo6fkFp*U6vjjEq9|GIzyS~1Um|#W6=x+4BF2XVxCscKUB|r27kl)&?h3i>F z##!;0xeqDTS?Y<)+#BlJ#Rx9(5~^oYO`J5O{GSl(b?g$#Dm1LKr!2rDa)(Z);HdXa z!dTU;(l;gR zQ2-n=7xz1bc!9H~CbekwVc}9qLh%XO`{P09$M1EHITMZ!u}vPe9n(MxY#lBFuLLH+ zTA~x8yrynP9kadPm7VMEWK&M3vz<=UDxl&f*knf@+g3L9U*HZO(5}!Pnz2ch|?;QFnr{mjYZrys*~K&`PY!tQrG9&ZNcqDRizozN5O7-*Ng~e9yfqx zzoXiJoW8l%!U}29>8s?(O>tP-B)YoQmxj_-iOx8ro)2c8QLEuu(I~`$Y)RE}X1I(% zYcx+#f8Fg}o`2eJ_O18QO*jRt%=A0UAVi%#)K5RLH50VoI&^=rM3(46N3Iawu0BXE zLaGiJunC;gd>HhoSqu1;Cqpq(I^F!dZOY0zn2Bqulc<9cyc`YtI(nQMT9UJ#xmj9A z*%*2#!rp3rsvhpRE0Sk0O_gPy3!q@Z4^CHHZimaJ>BUx;xvP`r0j&x`t{XzJ zIn{fhcBc9<01M_nz9XFR?8CWS*_`EiX88c`sUxB$J*P@Uq5lGb~)ER zYso0N(*`RkwJN5Pg4T^2{kXcOJOu(_9r-X{vQ<(gNxxOW5k$$B9RpmPY##6iPtbv%2IL7X_65}JOQW3KH2lht}$Z2KS#C3R-4-8cs zrQU6}@Ouc*<6>Z)`r=Vl!&$!p_4%tZd$j^DG)L7s3ZZfsa3_ZD zz$nt2HYt3MEbMd7)HwuqxN=(z)-0~I=_x#@xhHA3(Vs!|6&TN;DYza3AXz&whiF~< zHcdgh`iue&xcsviqS$#Y!}6>V05Vj=GYBYj8_9oUu2obi;48fX318Vh zDyHVIV8Rf>YYCnfCs$cQC+@L2nH z9;_o8|H_ecMno6<;x*ySwD{AR>6zPwDSGkb(v}Mg&=cy%5Tm8K{;wTu%L9Pk5Ypg{ z{zkt%|#bq*mFTx)JXfOcbG_NFsPuy_suLQ|vCe~&nB-elKpc}7pk zRA0seNO(~Li3Wj)jT%EFO$Q5W?WaA5_h5fD1V`wA&Pl(%2Urm7N&IB8Ig)sKExGqkVEL z`g!u%$cN(_2`dA1S#`$;-SqV}g58}a>kYgY_yYdrebtqn?UNZ}Z20r#?}w4KB82XR zw+|S4*gWIb3!JIYd;2dgr#=&Cz*5z6ysbaNCvqaAuK7AT@w{k zkv&m+!@WFm|5k=;v!$aFPPtTb+aTtA-H!D;=Qare`<_q!!EB3NDc*tY83=$Lw_}JT zoFV+pKpCB-@diCk_#KL%05YNCp&0ZSpOv))#V`<}o^L~&x5z(#tZC)#U@NLlQjKRD zxk@T(!v)i1p=9eDk=aG(qs_PI^?gsu+x~-y0MH=Y+1QUry8ep+(sIJvfxUx3bUDHV z>JMPm^gg|c7}d1pd3AO}qrr}$K7B6#kD}H%po491=^}sg7LxgD!{!Ug@2sc-iq&5Vs4up`IHUS&jHUldWhNSc{V7=l%fhxK*@0J5a=}|ZRJ0tN zVq-S|+9-e-qS52pF;+0B{@tmD@zZxT1Vw5i(KG=BX&8~|h!~DOhamK)FjnU5Ux9NB z5Eu|;?KhbOhk`S~T(a%>Lyc{EFs058KRkenAlC;^9s7|~ny45nI%}!APD`J*h!$lA zIYY9R3PU*&r86RkDH|_*MwNx->NAMVh&h-KMOuBLv-Mepr-fV%$0IJ^u)YtNhGvJn zizJ~MQ6W15D*7Ec)X>w|E%~Fgt3;;Flj?qN;5eAmGH2+=xu$^iC9yQKLTVXSIj|By zRsp0IK-K}I5p&X(6Flo?5MO0!3Tz4OyHE4xh(2CDCj8PpCVYxM6VFe59F|;06DC>O zJw;FIFQ!$D--tmr#9jcU5i-bT@0w#35Hm2q>1x?lF>-<7T0^?#^|i_maIIn8N=93i!&X)eQB)QdCXpA6 ziEI`P?KX9wF{kzEa4)4mP25TXaQ>@gh30)Q&4zfBWk;{+a;x|+RXE#h^SwK1dVBl0 zS~`B03s^ilzJ8bTfl!<&PnZ`R*`3}%i)*ElX&4~EeTS)|(%zj%JDV(ho;F>+KZ)K* z&hoQfpW+})A-PGf>00~oP{iE~Lji#{qA9}?^NxNH*e@;UDCir@%!4%)t{VqO`K>f4 zfrUYq05&a_!?|g=-+!cC7~XReRZ@-m;s>g>aW;kqIZpOaXf`yB#>V~}MhLa5x^a+R zl`WKZCW+4)+MDR(4+qb*IWD7j_t#1o-Jqq&kk$onc0xOJ>W1i4s0*q>X7gIFrcntht0hmydUvEf-)LQLl zPzulK`oY&q-|!vCE`An>!ZgU2Igc@J@a~PtA;DBva+=El!|>bEF|i4QS)BP`y_#z_ zY)EW!ps5Gsj@B=WX+(woEEIS&Ykxu6>y)ckuAF~bAH_wxT&iWFJ>Xs%a{I=Bz#Kac z>y3NpsEfz%{d+BrWD2bL)TkiDk(-z6T-PlV%HFd%B~^ibD}BZj1(R;f!(glbt}Kq3 zGg3NI6p45q49UPYos2OQ&td0W#tt)NzIM_4zE-B}sol%X-RaA6GVy6dw$|E;ejD&y zg2@o!ZH;y%s?HH6dClD)%^3#j7&n(-gN+2NMJpd^8JSy)^tMUVKp`3Np(UUSo4Gn4 zw^9!8Z*D7xanJi_BcJDTU3?{F)Jn!Mtg=gNqQ54u^s3{BW$!^R>1d_Kl$OU-gHUL+ zdz7~D?Aj=dM4(tv1II{(+)ej>+dr`JzGoPUr0<1U3ofX&7Tl@HNWk-sQ>8rwtP=62 zHE&D?q2g3IEy8^?aPg96?6aq@QPJ)w-E=cPidLx#1FaaN?q8lf9RJ94DX&tBtGM6X zRy2o(i@uM#v2}>xFU|?glo?>s_ei_7ueleE4{uE4v!t^`?4h$?eIK_mKa~SZ%#tO8 zt~6&%k)c}hn=xPT4}bQJO&>>_6S9LW{Y*ok+1pdFLudoA4~|3_F8p&RMo}G2@QW%v z4VW{>I4Fn`mm`StRv_Zu_-cs0TV*uUyU*lz_KIx$yjX#SPVYG-#5PzwP@7Dd{#I3F z;iG6ePkB+QSon^1Zfr;683n4;gqiZx?--FWj31Znw0BY&G>lfxEQE-tZJ?bUqdnNF`lRO)WaV{2l>K1t9zY zf>YtapKjp{SO>kA)hXXOB+Zj48t(6m0mKww#=t*I=ED5xHY9X+Dc^`}@0*XtI?MQb z`KqBs?ym>T!+!BayM95w`r%x2{2tun`Z5ulwT@=uSY#kC`5{2e=IC~_$qSN&lAUn<;Lb1iD}Zkb(4-K z4LXc^@d9RdpkN8^W1xaR#ngYN+XWNTg&&=_#!M~&qo2geoD<#3a3@Ma{QOA<${zpk zveb!Jr?d^UlSan=OyCTL@j;aWI~p{@IKNs35a*V0i1p4T^Q%jl!X>{TcLOven^;Z_C4i}mc?(8JhtTUx;JI7VPtjQ-FI0zzXD zQ*bDyJHDlYlMCHF3=*Bbcj`%bl|rBVBsVfsxE8f+qFCtc+|l1vagJlIcmw9Dwn~0u z{p-z71Jb9vhZRSxtasKWk`4j{SOk=_i&@FxlGc(eR9py=|&NJJx(IJwh-Bq~DAxGp>lxVYF0_YTM{ zEt0bmDcVZhb!${twR@y4x>B*gm_taP1<5*~p5DzlGh$`Xim!#?sA!?Lc@1!a4X((6 zXi*pYU?If&U?c0IP?hPgmdW`{VBIC)fn2eK1hN|K_(4Z!ScLEtBeS@0-skxYHit_0 zTLhKjhXMoNC#x~pj9FyN<L%#e!I{_%w&57Q=QlcT9W5loxby~?*ZH!{M;Yh% z9%tZpnHo8j*6AkC;RnWTCoGcdG zy0yx98r;fwm`VjpqiDnyf~&Ht*FuWX^XVky)WGgy;2}k%NgU`-kucvfvC(QKk0FH1 zA)y594p<~b#Xi8u3iDEnr^i{(Df=U|cDUyOdevOAa=eStNHQ4I?tQ7T4IWzGPpo|M z3YOn44C%epKeX`ad!_CCvC@j5_ikm}*Mgz+pMgOz$}M>ou;uNNfdtQDP@cAu&~-bE zkle8A<@V%}!{Z%S6_W=gxcfSysX!$0%BbLw=N-cf0j(nn(;T6M78y;007mOLbb!&iP-F^~&IpWch}EX=q%y5Hz|hC^qU%;@M+87&5_zO9%p2BbpwUZy zNZG6PIf|L|-m7aE=UuV~4BJ$B`iGOT07^f(c`wz!~h$ZQziw`~2n!S};1e2nb zBZK!%0Ik3y{zzzZl9ns-1GTl+JaQ0;_)HRQWPOCHa_AUS_6HV;+>dIV=t3HEKW4NL zVj;I+k$=)8+x{j-5#W)h(JfIi+U+?X&MR8^bhYyh0&`)W<)4!X$>+KC8(=Qb4{ByK zVhXZ$K_um-=73EnhY+$dFB@R9lobM1>G(^?b=B+q&4UWxD1|C>I)DXzo6ohUC6dWi z*a}^6uAf^$8F{|4GC+07;IOLXnC6dIg{H;t*=l7#o+7xkDKj-aU#3)IVlv8-K-oD4 z>U`=g7L}y9Vo7CVg|hjtDzc}OG^vCH*)kL?!4-Ki&j4ekN}>`Q|Hxw17M6}&H#w$# zJ*%9K)N*B7r8w}DVZb3}#!a!!fIC!~ETtl4qLRl+C;_qRXWAmQ$Bf2FBjd&QyQG9e z9Clns#$Og)@qU7EaX_Eotxa4>2^#aAj@6d2UKiyD6PYK5 zW!N$A7m^MQX^m{f0MU8{PB4+MQJ-PY~S4J^-*q~rv^V{zb+y2s_bhW;VuVi zWX&Pwr2Sb7SCk!n+teK)&#hZHG(xvo$7<$(RmsA#Jd;SvqlBW(dW6z*H)h=3HA?O0 zmq`6okPY`1RW|mK)qS_Sdxl@U`{ z?+JSyvkOd7RzRaGw>d?8ykJ6Dwgxee4^h~HjLz&kct4tQiUT*d ze>FAQ4d;GHkV~TC*qU5G?0nE>GP2G-P8NY4rW8c38{r0&rg+@k-k0d zr=4(Fv};VV7I#@p;;{^sCqXtd=tGlmNqEsLF^#79L;ibvyayhh9Cacnl^##eNc1UH zzN375FXf|AZ?9q^@?9f#BC}e&Ej{)C?@SW0J`J{0<60{+^5z#PnZ5sSrHGQ+qjukS zb$QCb3!>#D3tTY}$k)u|{YVE2VmZJq*pb>2pcPU^=14i)g#r2THcM|+U z$;111iSB{R^~Z-2{JG5X4oPfs2h&6mW#f`KkfYqxF>0kCP*O%MyS%Mhq%nGj=#j+% zVD@~x+B#Qu@-#6Y;h0hfE^XX`7S*G{@>?Q&v22eAz4d25Ud1<)p^q53Rc)W1zP0&F z6WV1jDqz_r3{t(7xUzBu^|ii`$>5o7pC0PCvF!hn{Kb zp66YkOot4aMU;OT1(-*TCry}8lmyJaE)^>2kAK4H`%L!>SkG8*Gv~OmOem6Q4i1KD zroVC&NrATwV&f`qRzrlNNie4n`u1KJ zPfTX;kkM4XB@L01(*?Xe6)_@roFJI??w>UD1iH_oe|)?lSnMw`EJKvxAWdbHq65C~ zdHLmXrz<6?Y0U>93O2Ba63&3xP0w9{A0Z)wx|~>Vw%@U|qCE})>9pF)YGNs)xF2fK zr-eTpP75PvH64=th+=Sm3@BZZBkvmP0D~%eEs31zouwU{9`{pg3P&HZML`=TetxqR zgX3i}JZs;TW-mwCerx)Ja)yfQe08C~?HGE4zf*)NMVRkMk4+3-$U>TWsjN03GDKfHDl5bRIS{ybY z^n2*T=B2456g}x(6XWr4I?V;&s|lld`&J!h;Kl3ysii0P8?;2{WlKgQe@pxX;z+bi zuG}0Io z_UDt!XI#B@8}xoK9=0%NdoY$V4XqAy zLWQ`|OL=&v=3&3MGs17ot#%g$OlUBlW=SVH2Zj z&*(yV*di-@-s&bZ-`(0OPI)sa4);VW1yOc5*{dx`0P6DFzmK^rV4o^yF~q4kaBhDj zl2lay&YQ+)R=vFoDUKl*G(;kzmQx5kzG70O z?=zW!DEFQZuNy^&9S0vgn|R4x{+CZ*KJp#G_WH9&S4gpI3m3b+yTbVly=vfhdJyEF zIbwRD;zWNqXn^IFjdsEXPVH_Ha+5}TkwdYHiG2GcP=Kh889T*-YVARuL6+*hIrEc_Pyq-*KpX5Pv{m>KwLXLc!$-JIWNH>YZK)RE}pL=XSE#|q-ORwgH z^Y0i5LHMC3RCY_R3yAgOSq=7(pnmykd(8(>+^HQ9Gd-sv2IK`7i&k8;WXHM@ZSE1W ze@}jfYs4N7{xSBWcyvTXD*$0>4&UI!my<5)R~RYMpNM)Ymb`F*S<1o0alok7K#(rZ zZT%0Au7TsQ5~KrYR)V__5^WaTxWG5!aC8|6W1@8DG*3%wOngI{P)R~hA{}P(wVz}` z)~#{QSBUE?36J7?Z)VdnIB5 z6vZBn9N2M3WcGhNpyT6lz^_XW=Zfmp>H4YiGtbdssat<-Re$qDOHIlbACMP{q`yy$ z6F2BE?vJ*%kzgs-9lBSH7B>Kg_C$LpQMBsx%}lYLbq=wks+4Xc=Pmy)5+Q?bpdy+z{~WjV6?;bd#e)yrZL z&C#Y=_J_?hNSI-!rqxe**PmL_Qh4b?(la*=OG8L;yk!`##_0%=doEB%&-Ux)pS3Xq z0bN@SQn;IlD+;WW3yEp5n8qY0+54pN2iu?N5}+sFzU0`7$u9KmUhv(>$j)Iv zMTFs4W|N9`;6YCr{Y|kygpl;lwIbl#ANQH31lzDjCyS;F5#>Q&KFWCl1qbo5=nzRi zuw_aq87L&&)-<4KUd(@3{`kS|Ud~ep9@bv~J~@EC*U4b$%&$5T63Y&<2wMt<>m|bn zbA2qF7(;B_>jgjU9T|B@JS-bl3@K7kKjBnBiC4?zBW+&~j}*dQm0ineB^Hzqo!&=b5A?p81j(FLMt&|ZA%be0dfI>m z5#A}!GER>AVT&c;SCA}dDyatrI*Z9A=Vq}dFGBRb%>QB1LUHk&Mv9dBnfX1pAl|X% z5}VaSe}4A~AzhhZU{`>PNgW^*z5~SIX~SuN^T;gh5|Yi>0C^^l9K1voE9~1VxNao_ z??3yIF9V!K70}ud@=lG`BG_xoWL8C-t;ho|t|F#WrKySg7RbmYDaI*h;E2xVaO^xs zd41!~E=Z;+?O;=t1aixOVG^4LC=p@Mgmq{jXR~qx96NausZWwH!6&I_jdsh3dJrh& zVI)Wx+yt&IU8eIQ4~ppy|L)CahEj-64zpb*{HQLJ!v*J!L?(FEQe7mVNo<`E+$QwY zXo!Tti2_2GyeG3u>I~4&(nK=x#a0XPL$rc8uaZg>>&1Rvdf#l;jI}s*rTV5R+l-8p z7#Rzt1h#5t?zw5_|0|If44qdU2a0pUXkZ^n2(UJ!5(!1i`wyEyjCqHQG+2X*9TV>= zX>e+KH`1 jca1WIS0NPI2x4Ftq_2Q^Pio1-PJ5nslXLCN`dbq9;FU(|SS-qJ!rl zYo}Py=RC7~XKYq??rfxm1Ak@2)kXI_r`?wNTR$z+9IuXP7i_^zLi#UV^C(;q8|(?eDG&C1jVVp`o5Y?) zw7oH|l^Ptr)T?p_)%&`9zd!<4^bP!^k=x8}pRME4kZsBrfKTBG*(>PF^+zsU=G6Y1t>TIRi(!E0!QdBh@7 zdorY;)l_w?PY=1xBtx0d$)Tm7*>whuYf#+#nGuCvqA-|N~4`+!TxqAmWu zg1ruNR#3+o;xQOy)d&e#tG4s`Zu$1-Cd~^EW90H1qpmgI44z6Hi@-EsrB2TK$Lo3c zA?_;o*v0+ zcowtJ1xPL!d*n1{Xttu#lqHzIIJ%4fZIT8-CQCG0iZxteIYda?P}sG{PV)gRB0qYW zA}=$81k%MpG7VuP!h7h)c=QRm1YXre@0vqy&*VDwfG{`&C1y;lXM%FAjnHC6AIYXp zL!-YJk++DCMN}f5aq@6TBA2-}R)byCC%qOJjzzw>i!g@MQ;JiCdMKxOOU(w7F)7RG#y{1beQ|igvaqs9f7O*bY zn#|Ug_-MC+av5ED^DTzTjTj!`0CYoVu{&A49vqfQn876mO4eltF;g_Xc$sYjked-> zu?^Aj=+>xTTy~=FDr5f)@aM3tFxNzHY22n5B3l8e5zO4j(3*9TIag*DKxqpIGxE!` zfMbeSBN=(JRXS`j0rLmraBWW8lvuHf75Dg_gILvtK`>XeM$cg(ESwMuNXUJdTDvNs zamKVt%z-4!*MC5OPcS4CV`tx_m(5G!j{-*&na7oaB&LWlaDvpIj17Yt99nSHU_G zBRAaO!mB^8|?PkPUts0)NDFA1xFb=+BM#8@C~2(Z;Spa~POZOzos%x?Q8fop$qiDo`5{GD;bT3>yj0i|kj<2I!URq%}U zqS68UHe}cQ^1*<_7HKv9K?I$Cp>$YLbl-DyG+YIFbka_934FjW8_#9YnPg5j$H%%; zTAtt-+1R)U%UbcU8It08h8DO$R_D|Edf&glzc@H?iW$z>2Cm{&RXDVe$KcFSIRXy+ zy{$XYm!(54vpb%>Ou=9r4&`GCsk9%&-uQ_DR~MIANDWO78wXY>j(fuI8uQ5Dxp`!0 zPN9A_xd^kS@yb;D!FkE(I>VibUL5O~1MalpZ#2xbxAqcyF>ocw_@I1pgCFiYgVA0{ zRUWx5eHyRne7btqK~n1&{>Ect@yoejRN*EmFICSSy8eB?{ma77JK>2!R$!Qfe;Jgd zr*k|i4i<`@-L9Blo^^yvDb%n>J*EEhP=;Axc+4acm_%*}jq=9fk9GL+4AxurBzm+c zsL>peQF1EtpbBGgnSkR_r&(AUzPUaFe@nX*7W^$4>_i(XjP#U&Y6 z!YP{p?d0N?HL5%-75AQ)QmCcciN~{V*d$DYdv!(9cLWhH&Rg*FVYo8SY7!gNEOrPm zh9^CyA2INXJ3Eb?Ln~Hl9u}6GhlIeDd>9*MaNth9q zC~=05*22^_nh;XgN0gvf+?-*v|O0q_0Z0p;wVE06fpyFH1SQ5O7}OjP@HiV}+`$W#PJp7;`<1|o?$cgn?(yAW3wpd5 zrJsFo7;+w~>0Cw#gc9KR91mx74?iH>mQv(g`Zuk()g9A~yFk$2d|#xKlJGx}mdnQ= zBSg|rn1HRS&UhFlk@F3rMc{@Z%VQQ*c3tq7(n(vf>O}6P1q7afQ zY&YNLWHWZJ!JmJP%Fo9H6R)|ONu3i=s>FkHd3inz!KOp0E)wkpf~b!fu0ZtNd^i6i zDz&kVd-{@FZaa8GAx~kwAL~hTu|n>;rGE_UGuL;?tWo4$50t!(H_LMox1XAt{)qEF zG0}9L?!0y@(9dp!Sa-FAdI>2djJdnP4E;HzKhv(5!N15M#KX5DI(8IpLtkSteKd;C z2qqj@C%FMGLV>Jhm__)D>>fKL7OxNp5@UMq48o$HF z-R>~UB{Zud>)Y63aP+-}!gAV1AAYT?p_fC=w`(tQpv16r3Z-;Hb`+f$!3c_kK5q`c zH$)CIHO$wKosD0JjC<5eJW|J0iL#|o43_K0Jr)5=^j$bjhWK*)gtpeXJGy}nbg%Tf z%UB`$&O^ar`%MKN*^BK#K0tDWyD=+lwODs}3(v!RHjBO0ZYZ4D>!%m!anLEy=4A+c z>auE(7;X%%z7Pk(nRjL|{z6?)g+o*8qHQZ<2YWO(j~1fwU|vDK>_#ibkr9FmbX;*D zryHyqh-2vivr(OW_Gdd>k6xCKuD?9*@}WZ1LE?b!3)C^KT)dPpvpPBJ)iidl1&|YltB=HaB*NM4!-?F zH6zGtu0%KIDYztB;65;@J>DI$XjEFmE1Mli*;DcQ3HJDnapr{lSpRO%_Yzm<`^b{v zs{msy2Y@&{t|c&jcvYxLuY-SX=y>+i`tyhRSuh(~?lXU10qc{~h|`tlW?#t$LK47( z9xyn7T+xha@#3WGm9GDNXqa~FPfHdo7N(s67w1q~U5tpuz}bCj4j$}{*%-i0Nh1(z zt{OOz@$(h~#BqjICl!ul0^k^d?DwatGl3_FHos6-s6B& zadCs}w3TIt%_W?a5!bFCgNAy(ksLmqo4$YbawRXJzoiKHc}nD$ zni5+n=3+;H@AL;TS;g zJ-;%6tf5?#zl^v}QD-;=+rpZI93Df)Svuo7&LS(dCZS(Z(kDEfy-lfy++)@!jl=0} zhwFpz#$N2@`!>ayjp4LG;Xpg-@~+KK<0{oa?Cr%QbX)|j*a@vv{B}t7B#!X+`Sfn` zAfCln)>3_$?M`di?M|6x%X{o*LtMPkLmP7@3Oiw*gt?T{8u{0$(UP?OFIC03Gly{LrzUEg(gFmDjt#^(u+styb+ z_6`}V(1O3EEr`NSU?*>r@IplLUuHHeaQ<}jD&}U*we2pvRrV3sjyNKz(xT{A-y?1W zjg+tyS^54h&l?N|Mt+`&y-d|~d_6s5KDA|r(6Au#lfwpwqnS+)X3CUFZBsQ)XnwWa;~SNQ;KNms zDh0VL2o`YpUg^sc%}BI1PPMO(BZQ1I+t(^?eVyCS4XUWsE=$QcRa9SoPHOdwL~tU0 zAx2*DC_?`3OHr%@c)ael^rX|u`m^WHPuCkj@D_IhobE)a5?NbECQWPBjZ%k6HT%v; z3p5z2XxMXXyTQzs>dyVyshrq_nXQqQ9TpXqp_rf67m_Hu`$i-d#bU z5gmp^$~;gHkrp_#2mKN>7E?}-o-$jaFmZaZ@MczKTj?K#s1nm=8!Za@4VtuE|3VCO z$IBmwNF0d1uBN_~#q3W31w4=#l1^^lGEgFo%2o4lmK_0WXJ{MTNLnL7)R017`$OUg z-R0%CO$}V%KrLNsUcAU{0D=+*xF_a4L4SqHoe_NREC!wzL3cWSn(k|Vfrj+(licjP zxsY0&qwul=qTxCDMw|h=e5+rS#}*Xw0BhZuZPf}WWIlB}$Oi{Bw+M005t=MV@$|2( zs2KB5%r+u$#c+ER$?L{R6L=;!(J6AU8WoZf#y* zhvGfU8u34Z>8rj--N+T6m%Rfvg|rJ7hLJqW($b3A=5C?Ca0jRd?kEWS={**)AlmK} zgwTmI=GcbZJ~l9`(>gXZ9L)@oI_P5BekYl-n<+7JEte)Z5}U)j%)r&P=}PEeQgTzN zPcG@aq0jnVzr;r-rGVTrqfk0(M2|`Gt~4CfCs4PS@dDlT&t2%Ztmh~ws3D-5kK-J$ zTy8tiYxAo%Qi-$JA*$+=EHUXVEFquk*Kb!MoS)PkVXj-Zd;JAV{3{OjUnp9(|CUSy z0G(QR0*Gkr%q)O-UlEgk@P~hCX@Cgezl1bEt|A~@|G#7kSvWf>m^dmK*Z>%TM4bPL zX!bAA8X%OJ`5*AwU)~@QfaLk##_)2me*51eczqV1II@7zM(_M;YcNWrpA@#1EU#oQ zSA&tTg}@?YArgp!8H%p_7H_6}j)J+SU*40GlDcntW_;fL-rfPR81i2xzDD|wLX@Da zhPTlN17}8)*y4j;Y{r~uN?}9$X}4I_xp~VI$%Le+=WkS-=eX7foJh4n%qzY2*Bd#| zd;Phfu9sZQ7?`c?ldYs;ZDY)V^lj+OGH~J}v&Up~;qDL(d)VrEiXR4cghJcByUup@ zos|9=*X(mGTv5~Lguf>F?i2frCU$zmJ6gf#xcGMhJ6=q`Kr+3fn5G!MP3u<}cvcW> zA9Ye!FM6t4Qiwhb(^PI&r5sq5+t;LYfgDW>WRNPHL#3_|<7~4%a9H5Cg`Bll1%k%a}$GsvO=K$ZyDyD52nh7QcddNToPKq+sjK#G4 z-nc$GjhNLEVtN&uk5(VK6IpdAP#tyB(1b4bp+Q7-LP&#l(qyF5Ds}oC8i*NSdIR;5 z5T~F6h`$AgK_QdviB3akwrh|i#5wrVc1|*j?zbe&N0mE7UnCOh&o96kbB8+ z%f88WF{t>g2cMiL4D#p|P%747s@BSDYp%4HpWGozHD~@$F6V!~P5*}d{|iI#e}E^% z4gkur1MWa}08i)}fR6ERI^cf%e}&8awHE&gC&d1bG5A;2{x_)8|Ft0h*Mj_C3-bRL z3-TYSME?gY2rGb3`H$83#sFZId}sfc{{Lk`{#uLwY(f4bB=j$1<===V|Bkx)%Qg9* z$SERL0PyzTXza|Kod3^A&3|)2gzaqX?QH*{6mv340-S6AGwmskV@s#{P% z0>El@x<1c)VLp6$=VCxWFuM@twYA%wE*D-mJu7Q#6`iC&{ygJAN~JOyU9MLVsFb=M zM=2(gsTJ2yKtzaqkmfcvB^zw0>3lw(dU|>1r9eQ5SYO}!k0#S5fOvU%E3-g=BtRg5 zh;p#MZ1lCXHm3_o=)eA zD!V{|#{Emfold50Y;7q(jg5`b(9kY7+U)f81zhxi{F4u!-Yh`sEfy#`WxafCZ0IZU z+uPkI63O=vV|WP=r!dSb>p_6>LDmT^>YTlw%5=PzDplT|ZV0ilvHP->nT$rGOG`^{ zw@T&nbd{72<|4bhd6wwI1+S-OX4JMqu+kWG);nFQ&KZWO7#VkZyxIU-evHTc@l-6X z{_n#HRmn~mFt7M!F=k-_((ENetGp48I%J>upp#w5OK-K#%9snmuKh3o``nohr!uCpSKyS6WWAuB$ zHB*}$AD2*5OA7*((l<9h-R|+K>_P&Cs2A|?=s3q1hJpwxlgo)zcR!iVfh5cxQHL}t zYP|@POv3ZU_u<{!)n7)Abxh z4WK#n{hLRQAb=eGf$8$MIxGVOh+Ye=({S_K+SI!4FU;W=7Z=N~h2Q{Z{P_3)G!8|l zSsJc?b;(x9At@=T^X)%P`X9=$zugrCTpr~6YJEThku-6`D-L)Qq>ASx+R zf^|rfx+%> zg^q#I($ex*_y40UH{DODC_l=~gb@(?*g+h~uh$PzWIsk;21+zPxi$V|_Qc)2YPZ;& z2!#w1SKy0_uZmY$R`zir#LNsdY-?jP`ThHWsGS2m`VHAn4-`6XS=?e?#M$xw+qZ9l za?Rpyp5)5K4rd5=@5!L2iFiH!^J(e_;spwrtlF!Sow@aq?DbP&uip6xuR^IC7EdF z^lSH)h$_lH97)S19`BLHpl|v#d@iZHYy2*w_ad29(x0fs%&ti8DuirJS6N=MNNmX6 z&z?K_Gi7>&-Zc{5!Fg4R=`X*W+E`K&13oaV$%LDd8oJi4eh?gezIXLaJ#gjMQug5o5mC`@nK7BOz0`4a)1W`ZroJnEI5)ja zIC7`IWX34gmMFAJkCo5Q<0qi}h;>A*>X};XH?2JMGa_tP%UZ$>>(%J{6Oe_q7B%0& zLi7#S0|kCQzWsISgk0j!g4nYXn~(Mtf=(BF=0}cMFN|Oqvl2er(+8axw4*m^m?*wC zA^yHSRCHIApVEhsnB^^^2G=2q*l%QF)*u9%%f~56+1VqK7&)i@3z8V4yXpLl5ucHw z>2vRd>cyQzLQwP}(e#Aa@E&|1r6f`oQK)|j8dPc)h#cfUe+Nfk$(-=4B z93bVhQvZ^2(56(2Wt#?LgyP@#f`E5W2%!wP+kdG4e|!h7en$iT7xY1cj~~&4_?Io3 zDY?5Bp^WU<1O$^v2F&$TnL&&6Yf{rd_nWJ80xVKaW8d5B4$pEV&@&7&DK5_Et)Hh8 zsL>nRP?dB7+-v&oQCiLJrFYeKw7g5Fs{wU_+kk^Gkj8s6&EN znL*m%Nbnjk6Y6PXZzU9v{u*lzX&MT|_64wxQ0|@xq*i}&FlT>R%l&lzQuKQ~-b!45 z&l>#s4_1iKvl#5r+1X~`i2G?nT8oP8CKQn~1)OJpe=qg)^z6U2D=$C??ORylx*7ca z!$+AYPD}0AI}J-T{9x0`8x3{!Pp(Vt2a=G|Vx$Mm5z|nqyyxg(b*O%k!B6}}Bph5^ zI5|0=S6e+bHKpx98Sj?+)_uA?V@Z}XcyDi4;6$R*J&u;|x0t0*T(}rU;Ao=>X*%SL zTy#JmZ^@QCfD5Mk>H2v88c*VE^k;9Z9G8C70PW4bg8!r<-js-V29bainS*a%9JzUe zqX@=bUsnR*dBqth#^&1%5meiznWi6Q>) zjhZ~HRaI4WbtiAGFTK?8^zPb7MBF@}{NWR~;AK%Xj6sto_t%j2P2kTP|Fl5XadP}s z3bBAq|2L8He<2KUBLS`lGvSXDnVI50CmQW;R)jEFzYqQ`k2$15n2=%MilxjgqT)S~ zT{o?P*nLJq{{-)PmxY5evGI@3qgYgEcZ>OT|MHqI@2DrYww7WvvAN|)vd@~3EK*8w%tB0FSizCL!ja72FbF!7p#lka(*|<= z3i&S)MqqzMnxH`3G$Ri{P|^sdAY+@9yMj_mL^2tIf2+{lm#fX>bl;=thbjLS#IFbq zxFO24sU97<8HD#I#gAMUbVVt=;`W?@2Vw=bS!1RduO1h*T0wZUnb$TCH!wsMq|}#H};Tb zRP6We@qR`IRX3zNj#|*7&VI$oW; z($seu?Q&oigpoDrG!jW)j`#7%1G=CH;l6a~1QDAu*5;t(D#s6X2VO-lm{*1amp4b% z$i{d@_E%_cy_dVfPo{{gY=#Ugr`R~eGwNm18|cKv;%2Dm0t+JRJ>d+Ydyb-HMt#%H zEM0Y{s>eOAjpD=|j&;>MSpv37UQ`J$HxS;Z<7QNr^%r`_At{+1f-AN6=OP4`&d>_J zt>0uqQUc4RMSG1z!|P-p!kRvQ6{(D}ajzZgZrNQhU1GlA3U(1Oh=X5q7o)hg2+rKe0qZPqvmN)J2e?~S?;rXn`BSw!A{T!_-AhPq(|gfQx7Bhppy?Q0pQ zh34-~J@|rTcT;tshavcvzDNBmL)u{7W|2Hcv+S^S%DQgeYwtj4NR5Z*K}_B$qVM~P z#aTy%sJrThtQy}9!!C)8P(Z5kj6aQlY3`prVUA$^OX^+VQX7%-9H~xs#{Gda4xOP3 zR0{U@q-aQ4m_d$WEa73HGLHn|uWZp)W~ImxkhO4Osl#w#cm;S%+z-jUtfs`qzn%iI zWDdcUg)Qq0`aM+S;11K-Y)@1&MC(s7gtSG}VICx&P^N1MEh;OBTB920??r^$Jp$eM zx7E69P!KIr)8$SZfl%0tN@Trx^CmMh^ZomPjnNPL9?m%GOhQSNKnh+bh4tX`+R=s_ z4zLKi%r}By|A188@xMyZ8%y|y4<8(-D)cFFg9q$XNNAi*9tPkH7?@)nJ6NLA?=6+3PA+M$NZIG*@ z)n!41aJa&xb}Ta4`3>4+v%Y{?@4f=+DELdT{eq^e6*PLHP9t zw+FAu{#xJve1(*SqAWeP6*iFMfBA$#U{P7A&=}FtpK{aZHxa!^0qn&A6_hLi)$;zy(&m zzP>~0{BhR_2eGf*OZv{}d(KWjTvX=fKD;y$M#eUK4pg$8`AkFVGXx`ZBgTOU8vzW} zKV@hlGdq<<|96zU=$W9m^bY5Q_Aci;Rxj0~;zPro4B`n#P;YyqE7qVVr*YSwxh-Q$ z7ln-Sc~d`WGd5xbPdr7Je8dhvGgP);IF#T(9e6N6c!Yz4Gc+_r1A$|1m^8MvnS>(Q zb#*w+@TA{oH*QpKaV$atNZ4ykbaZrFT8UNwH{nTMsprue-5d^bVPRnn0*FQMZQ8}f z1&2|6>+Q`YfKo~Vo+?Ah#5~uOH8jR-vx5o-M|S_5p3(}rnE@AG`&Lv$M7tsJ0Z3t) zn3yU%ZZ2B(+UI6xOFj`9T7o6|%#)2X;r%5xP?QjOb zb-3EkEC6VAhn9&dNzvgKm6SZ@;K-B=k#Np0nR|+9SgMdf(*~?zm1TdCGsif}fZ_3^ zRDh|q_3C_+r>Ll?jt`&GmK#`R0Ji-2{Dxsy5W$ii_%=0F&_PI&jcx1V&lYGo1^{Ox z?*-LJQC@yHtzORxaB*`}_hNYZq%?I6!lqXdxSr_~5gEC9rIG%-lBT~ep7QeILS9~8 z;`Yk(_s9HvIaSI58AqGBdgqmZ;H~n8vuD^SA91O1KKbk|60_;Tyu6BC7!5YygIj-e zwY9~LzVfPOrdT4)zPBR+OOmg%esr!~M~^v_9upfoVCd}ZYz6(kljd=LXq~=tkOv1d zJ9({ulowG3GHk(slkL7fc@7E8oE>kF?N_#)P^gsHd#ePb{C-6!L%H7@OImEsBV}V_ zqotKtqAEr+vNrVo2**w-rvyhs;$nkg(bL@g!(bil4WoMOLbi8ybu~}Bi+y*_nvPEr znLo=AURBVgw;+`k%g461Ab?}R9!T9Ldn)}4C;d-v5JOkAo>&b3m*=tm`}y(zZvdKG z;9mf=Fwg%l05mrbFZ914(A$|9+S>i6{d>FPvJ+&9+5LM28Oo1pvkU;~zyxXJ;osxVk&$Zv_MdTxP1nFPA{<-Q3tH zBrI%gZ5{nx9{I)oZ4ONWmGIBi)$q!tR=<_?bz3K=?VX*4@$sKOe&B-_C8Z+(mPSYE zT%O%W?UFQZ@s2N@u^q{3YH9-M4lciuiOG;njYiWMfWCs=-MJf}?OKM0vj75|pO33t z!oPP7w~4vEIqd+qtTgQ)DKvi6m|k94Y1-t$B;UoV6Zq!s+wl)Jyu7@TIFthZ#~a+1 zeP7eKyI)FeY>`n=1OudYdRgHW)%nqgWNK<^tWIS)05$;v3eamk9UWrkPoO7}xYQm# zK9!}VbG7zFU%(UB#^>j$6v$l8ySk)7SfULpP;~S1B36;fbyZDr&MzqN_w|h`Yijo= zv$U`PU4^0&SK7GFH-gBXLVI-}k)>(R4Egd$CraRpmHRP1U>ZlVCD)Tf4t}CVb}v6| zY&bYLAoxdX=EnV^9S?IVY5F8@uZTo%O+&7ks!75_qxb_h$C<|X7%$djXGqqB0Mt)x z;2HD>N;rgx9I=~guXsH4CY#yanP7fVw=%C z$u-_zKg!7#_xM$mk-?9+#*>`Dy5Gch_nUOPlg=;2BL;OOvh zze7hy2UrDnFE6>w!!JZbg#N^^y5QhK05gY$?Y$1Hm(kSJ#HA720D>fA3|{8u=KA_+ z(a}$Vg{Y~i0r=e)zdygf$H&LdAIjFe48~I7o13-C#N;e8rlh2VRXYh#Vu%M~XkllS zGNm&e8*LQNl$EhtTCQAOUHO!Agr)0fYqz?u$g8WXdw5h&$1&r}qV;8fV0L;LEKMaa z+TKHxQ&ZW(p6ExDv$M0u$1oEUldD*l2j83BSNdMRetmZ4i=A9W)*$AftD;h#lao_h ztG$i4O=O4VUHIjTlDiQ-6|}sfs0e|Zosv>eT>Qv;C^r#cYdsrNb90yb%iSU(B4lKH zOC7XwF-_%(iA0YD`T3LevU>Vnsx}2*ubnPA24B~K1;SWt^k< zD7dv%X%2mbB1X$&WiKtA+sN@SoeBR;LraS{Kwzzxt7sif01Xv2YVIMBkQcPBi><=` z6Fxtra3HA$HM;GA=bG~*ck-AyIx+EpmX`Lx18qY?rO3-xAUB~_U@#jad;$W8OB0RX zkaHvLQbt{)I;9`MCPnVWbmKKpMHW59vm^+gKKgkC5ncMwtB{&pb z`$5LGAyvx6WFhI%Amky^+eo}@JuNK;645!CyDO8DljF(rkkrve@8ygIdssewI?zvR z`N9E?PEH;Vl(}kph9=!^vcF$JL*oEcsm?&PN>4@xrTp;EpCAwr~vqQV#Z`{?xa^zhIhcsk&A zfkA+RXGQXS85rQ^=I-nO=EL9LABg{7a+~#SVj}6?yLTBGUw0Iz`xpI5D6u{k78ZW| zxbJTWusVDDV&8R|Z?ZMzw-s71Fcd&JAz-7dyu7=+du+hNHHMsw>;W;cJ64`e$mLE0 z1|E&9yu6LAZR|G}>1@Q1^ZInvc4ddIr03dI%a;eFq+CWed>)KotfG>Vrb%mcn%#01 z77xA85C|!Le*OzP5*%KB{$7+&5s-~3FnGaW>Ro5;6&0MW?Sc;8Gu75Yke@=)ev!T$ zEQ%rI#x#25K^4SoJf4~+PaS4uG3Dgo z@R$bmfxWRY6)r=4eSO;3*_j!j4nJad<{D_ld`E|d;#;8FY|-)erNhU!3E`S>Y*^ji z_%^698kFTh>Xg^yK&_F0av+Sk%3WHzOD?9Ui1qjr0RaIL5>n;!HivwVn9>5J;Ub|_4bXU8M^t2#V^@Wv-em_Tf3~xX@5*1qsf!MDb_U>E#0oZ6xhhwu$zO; zZ-r@WUaq=smyYT?om4nmEK@nnNMr2F9HR#+*%jDkH=P(Y3=DZa&Z=C}K(r{SL}2$2 zk^W!!pMgb@c-DE38YomM0&-}QYS%NE%%->ciyfjwhV2><5Fo;i{OuXI5o-TJ5GaOH ztn?LAi>}R;;+W4yiPt=9uLizussbe@hDuAzargTmZl^Xd2H!!vIx}&coXglg>YEj+f1Wy0Yv7}u5e9DrSaW#SGCAql` zQ%u1(t%$if$~K3~ExDrp*;&IyoF_EEY=R2YJeF2?c(`du+k+n^abF8?C^P0rNl?qa z%+1*%;QWv&$#Yq@$4VbJd4VsH^x2CT&MZCc#-kE!$drj-M`!JN>3-$=eQ{39a(&o@ zebOB6!2AukHfLwaF8Pov4K#$2u6JJikj;|^sc+siSw(kF6a($?(T3UCu_F8*?{J)- z+j^h=48(@;@$(~j6#2gbV(^JF8Ab_g3GJuE+cO4AI`sUKoc;HinccS8RV>P*LVn|w zrxXkAL86J980sTd7ox(#b=Rz%oVWNm-C$oFWtrA5WWT5UTT9l>gy z!IAcRqA4I~kUI7hc~w>1_HT3V(^pqlN8WV@-v$lGkvmLV%q1u(DfPu^APb?x7rh6k zs|GDBER1e-zl9{eeG8lzG_J#Yr|)Mk6+>Pfz#)sOiP>OCnvnD~<#iCI=gb2H(!|&p z+E`vfcpUkoAv82pNBh{zYlw&_(Xcr1WP7H@sNTtM{|EhmE!qn^T`ONd>VVtpUJsAo zYinE0sn=d8Fk9akKckloOx)ZDAX`E4RqW085pz*Oi=S!AxY6A z_^qu`%9d}gF&N~(Ca0_w6}f$C@EOShb+ivM>kupCmtQlA=@-A-0Q;XAYQX5);3D#4 zbMq0$PE=RYn<_vx(AUr)s7tXIN0>r9ktWqWI4^Gk1OzuLN=w;TSvLmYiErK@<)DmW z?_<`4xj34#f_dz@XOjt%o|^jl-SK(~-$jsO6c+T&H0^O053dpu5`fS)HJx8yo*uX@H#&f< z!P;8K(C`a~3?G|;8Rq=WB7WA!^(Eft&q6tq5@%AUAjS#Ri&~PaJLHsF=(tta`L1ZP zAjHFi$wUQa`Q5vLHf$#LQG@>8-u(Q0FS4pwLJ9{GJboX%)0`Z-uLq{>fzc$B6B9YF zgMx0GlGXUw9+}zr|LiG#n#k9FChfV@9_AYKvAkTxSupy;_5cb7?(s+ZZEOp@M7x|1 zHtzd9-)uoyJU^e9ii(Ps*45QDVJ|W=vZb-nZ4iG5laz`D;9eW+;NYYz`kwql?r+BJ z#;&6|29Pv75*(V=MrYq1W-#$FTm%k-fOk zP)_*z%li(+#l_OaBu82x{X}26hromz!!^;+te<;#gBe*p*)jVXL?aozy;kpofM~&G zz7Z9z{hhKJ{Nr*rsd>~ZU30tm zrQiEcUw|N7CF|>gMfID331VHR(MPP~d+pucgGE)n((K$c;{0=uWNW(6Om+Z12#hWY zGBP)McN{SH&sq6bgrzs{d)z0rZnW&j1%enAr3kYHnTeLVKB`$xaq-+T6}s98112Wc zJ7txi>m|99T6Te2i*SN4cig;dpss6cYcT~2^+3aXb2QaWJ=e;ts)T@E$HWFaB%w5X z-jl=`H}Zi|y#>DBDJdd!Vo6CMVZ@ zW+rQ3p{Lg{S%B>tXL_!4MvY-p03T`h%*(4Tth;`%eWj?R;xQ-55tZd`tL%++dV0Ey z^e(_<0AUA{rZ_hb8w;!3)qH}(y>t|eW)b<)O-Co0HP3>Ib$7mr=mG5Vd+8U>AOgAH zTLB(Fq!=K3AgBT9!#lWXYw-2O<=)nh2KfY=XYwrBX*o+l8osNS3Yxz}-vyPtG1nM^I}4V&^lkm8$jjgF4K zKGw2^^jBFXTUye_4q}TX-=5=9=Ah?1d7fpWu1RTk43qt|tgJ|D z>z#e0Fbqo57VkErh*%uZmDu=r97NEUsrj0^I^tCF4Yop6lP_j3A+oP11uIKW!wRh>DxvuW7(Q(l4JhyaaD-i`1q^m&lP&? zp32LwfqZr7>sKvJ&6k?y9v+8{?lBM%tpawyDGZTTr-z{D^c6k0Eb|c_FE1s9zj^M=&DZyWuKQ#$2gur7RhE%#v0nFnsM&8)N8$z4X&Ol< zP~>yF%qyv>Rr;6^E>&4#AgtlQjus~^uU2+c+q2^MJCI;BG&VN2`0Rlcp*op|(6lf= z{~*EwG>xK!Z8^c|w=8+fetYfno;Ku{zJc(v55V%!aWo5bO())@W*2_!0Sa!GdQwyb z(tAEgeYHLjL<|EPI#ag=!NK4I)-Yil;A_BpdICgh!rsIr69AjVT7}9gDt>0` zmO`JNEW6M6yw%r!*Vu$OS;yRbIkQ<+J0rYT1bqclDm*Uk`zLc=2rf3oN3Z435WNL( zgj+})%{6sm6KzgbXJZS%5h$2TkfmjYEZF%hdhWrBU%Yq$qG&Qk09gLK1VJzm$K@3I zEp5ezNgZ0Kis(=ZZj0)+YmytbZZoyf@81iVCHat?Ap5!FU$2tc_*=M*ZLO`j401)C z$ZUUWuh!!o<@$om90V_@rn$!Kqkfr~$2C|a>4^FJ2(|Q1Ym$sofcC zrZ=EaiXMjJhHA!uz)C3wV)~SHVFNH9Z|}`TIh$?6 zY0t&q0yj6Qk=O?J%!FOD44*#TY-xW+Yqw#0z?dcM*#`pn&!0B{cSvX&083^(rGA?I>Bx+LN&SHFHW ztSg$e9{=d<=ISs;HRE@fklu9>4Gj~2(n#ttc0@sL0LggLQJIrNKz*$km3&HgJmFM$ zY&-OGUq@G$xgr3+C>_udEq{3~OZO+#fwU!c;26!$3QALEP*c!7SOZ*X0*&}%PR@ev z%%t303nHS@a4cbfz7tIG-k5~i=~hcA?04fifMHEm`aD=2Z`Zc-zXp6Npz`u@qezV$ zTpg2hbCGP#cTv2JA9$`i2xB3S)4poGdZoIjB-V6UZW&JY3e$0?*d;6sZ&oT2NftlK zhUuPzhL#Nc9=yZK>MOA3qxqGosd_n56~;~D-^2Zib$;Rv4wNGav9Jgx$1rkVKAZaf zy?6R>vK&FqE3_0mDD2VG@@J3Z*K-5?X|;D<8hV032R~O^et#Vv&Vrhw>9VppZm!iU z{G<^hWst2gK}*m9EJF~X$c8vVf?mYLNJz0uW!Bcpr3y4Y(d)9IeD+khMxV2!B*Fwd zZeWMgAor;=ezfMF*KZ8&xoJHQ{&TRxGTNs4E?0Ho)gx_neZ&GYgbnc0esx%k$5RSI z>eY!!M&9?Y^u$<&d(>TBT`a2FG!n*h^YiHelj_t!hX9lGia&H<<#MkE$w6Z; z45KDCCgx1=c*>NYl?vOGySF=g^XE@yjvZ|Ln=u84tgNhxiVA>%_K^?~5=z|o64VvL zl539-RZCZ2@F!5zVY4N@odtYYn_aTdux{6mD)P7?505@o)j9j!z+&6=DRoP1?HBRr%l5`?FMcNTT3m*;$c7;>7eEf(SaPLA$duzU_ z1X3S{{^;^NbM=TrO!r$sF_a7myKiS8k@QFsHcgw9d|+#cN7ZQ}xUUO8pVapOx0;B!t(Ib`j0?q)6lipnaInHjAM8v{r z0i^*hj<%AqFP3FKuz=%mT0BRH?DEIlsHKQ$tJ z5nUAJRj{^g6pg5^hE?W=2L9OMYycg{$E!FwvE#mVcXtP1?r*<3i?6oX;?MYbjhvmF z(vy-HFDCt*=IZ19-LRQzGSUg%4AorgKFzDUxa~L{wW%ikbWyAD`>UhkV*B~w*J-TH z!=rQQz()uE`AxhRgW=;XJ5ox#=e0tDNgx-ujCxL!FEcgkc=Qs;VGL?>^Y~$r?kgl( zbB&T9ej!*8sC*D)OE(qI6bf>IV;}ZO4fGYlh#s%2NOZE2NHE^YCLgCIqY46_yRM8KBOF=fbGk`%{q3R6Skw*sy0E<>uQ_F^$(}%ey_a18N>G1)uE`Py1i;$jSAV>dmp6d}(r9CM#Em)YBzvF#l3!OYilR@;<#J~EWH2xBy0jHi91A-Q>k`cVzhliQd)FhH%k_u;-@*Qtg)au7F@uI`rr1z8E?xs##?JmEHN_ zYVEkwR>#Wy3j-MH*w`2VQj6vRM0<$t*4*0%>V7EGa$cXidSm&|ebuh+qoc2BYBE<> zUrbJ(oGk&M#b;+41nS&@XAj|w%-@bqmpDXm9v#$H&oDzoA;(CDkmo_?Of5TFV$&I^ zsSTlBn2`5!v3!r&*g)ziZqY-ChIW$9ubXD&v$u{Fn!RM;(5X4+cQgSN+-r-t5aWxB z_sg+ewuu53D{WvL1W?L^A!kpB5jszvTpw?GUgga+H;crbb8mLj+F91PpShJ+wj0ur z`L+ZNync-j5#7C*?zrk&6VLk+nUP@}6m(SCo<3~m^fr;#8#aHTs~a214J4n3htN@` zLDZSzVwDOm^5{`{J=^;OOiMfhrHklfu&d%_WaKwb8Jzo}!|AIi3w*H;IoR2o{f?Mz z970dsv@7D`X#SL`bj>%3?6h&*eBJyu#zI2@MNBLNWy958x#>N|!fXb!g3^y4 z=gjp&0F4LO%CY5>@dw8Nr_#1V1gSK4}T>SP8Bnv^fOins4wI4Ms z38j&pjBmrl(Aub#foS29=OX{$@%oL~xwmI!YdexwTDaTRt& z{5)9s-qIY3^F%7j=G=2TtsyRsp$=ZZ1&p|VOVeUbRh5QJm`YQJE@TflQlHHh-y*bZ z2OtTeZa=FT>)EE0QvoB`{Zb~k1Wu4ES#Nj&L+l0sS) zs3nz_dR|=(-&^fdads}x%BqTsn+&<(U0Ui`ndyD`JQUn_A|rAcFjb9DbaN^~16jt{eCQ}HCdQ&l8Hoa^+Gc3HV9s+R z2!ON8q5VhQc&S>6$1xHYc5-uDP(Uayp!edfeXjIn^GhC1T}euLsXxDV%oSx5{;J{hU?7qIU@4lYaPfkbX?5o7JBpQl zPj(J!Um^kkv`C&wQ6e0V?Yw1sZujzI4FN)#cQn}tDlf-kXkZvKi#h4J-3g;MVr#9A z>StXO4>Db;yzDQUfhz(oLSJ7Wa1KhhT;$}2o4qK<3@o{?azFZ-OIO|{VYkOeM{}T7 zdVaLtBuZ-zg!5Ygz}^pKl{|;M@2jbC8O+2(1~4%Te*aDn!{9=i233bdD=R0Y=REH) zc6O+Ney4D)`})&tj;qO6fsICqqFh1P{tkB*8{`SV?Z{+WKnikm0q?`s)paCbx0=ho z9-y9|-{dhfGfPOcD=GcHtr)dHbIbC7Q8Ox{<8>;OjrqkVN;D zBtk;MfIlqwviEWL2fK%cIOyo;9z7bEoFr-60g~F_x-@)U#ATm?5i`}(L-<2T>HzWL zH-mg2-Y5T_C}CckuQ@c93fIqJK0c(M1Xx%KLu|vuK>V<+Gk8DuWqmq8^3DLTu8~Ye zHaBzKwt`~gx8c{6oofTU)aK4k(9fP|u=}W;KRO;1Q7brO0N7XWUA{A(p8@t;t!$m0 zodLTHKoD%&S4cH0J<)@+dl@&JfX>6tv4gDskg3jds$#)JFk6a?f|3#h0@2hrKRUwz z*E=uSW-OmZ-2Z`zgPa^%bJpRZ2Pk1}H?x|DgoL<|UtG>05Oayl#lcrqDPi4foHGL^ z8J~7mnFBMElZh!P47Ifx&#a5?HrGTtd#<@TIc3N;eS7$!;lqdQUey*r{rV2v_w@99 zix1wnIZz)9Yzw&!UhLIRcO((t-kzay(3|YAvaW+_Q6f(bY!U>AFEVRn*iJrc90J&*xWia(+Bb)Kz>sl$dzw)ElFdJ1-c| zY~~Qw|5#YKiL$hS2$VV5ggQHY76V-G!@o5wRcyeprgR-Toc#Ql;1=*XERv0AM@<(M zm7zoyHL%M}?i{LNU@+6_=K#UQ#AHjL7FD$gyGH=*=!X<~Ym{FaK593MVR_#w#e%QM z5NWe@t!CpNt=rln+#-`wQXW6f^`2}!%^Lq+Iwi_+8POnd6ZWe6v7xKqas z1EBhz$Rn0Q73@AF_(Q*QozSup6Ft2Vn=RxY9kd5~AD?#Q7urT4ChCowJkZQUeU?|l zvG~uqEW+M&#K#Xs;80dqFE*{0!5NvDG>DopA*!B2HjH8<4`pjY!>AXBhwXbYUsdis zMG~ba`8G9$aSzGP99jYPTb$OvaRqsquv?CRh$y^9UY;b7# z^_Oo6ZNHazrHvmxGSJrkm;>(&p_Ur@WbX7~nxbWze7BI&-DstEbb{7Wo4~iY(D^nYvK9F;}OY0?%Ou+o%E=wHhS3oTm$nLufBf$8rs>2 zyRW1>NfFS_YSfR^i9kpj`ZL7V$;rtj_zp;5kClBw%&Qm~;n5-GycvK=23^4PI&QWN zegj!(Zs_j^jQB{rey3wDIJ&T~H^`6$AJZ$M%GO9TGBbPs`rK4qogo)n3#wKk93yl4 z6t@6q!6if3R4McE<1}-hBjNQPZw`{j8A0$QE|sI%OC}@V+#AZB+TY(tD91Nka|2LY zshrVRUw_)(E(VSB`LQ4rI^7narlq9?WU$eFgNY3(9a8Jz;n6$>1KUEuXs61JB5#_F z>WLUpE*t@D1sD??+}!Tp3RF_LZfmaio%z}5K#5&b9g6(fstVY-CV)X)U)4|`#P3S{ zAv-5|o}nWaO3KRBRaNdjr>q5fQCyYd?Q1NcC0US137)0oskwd~*nG?B?`nTSu^YwPXNvjjt|FBJ}#qDNN~ zS!xC{siSk6nuLXg$wjX~F`N7FV~q_P8{7Bqw~J&4J<&wZpFf`$)TjabJf8x*Yje}$ zUBHqKz!$k`026jQmNdj@FsN(~Y>Z2w>8AjpmJF|6lj#DDS!Uvpv<17Ldm6huz5-M8FdQ zk3$~U)^StvB_vk@xA17a=AAO0y*&m4!{Bp5Z2*3Pnh>Z|FV^AmzNjCS6xhWVu_H0zRy?8^qa^c?{Z z%E&~pY6NhbUaXQ5O9)zVYnU`^XAc~sM9~O);NBYagR+lh|1iouYaXtltfAeBXrc>B z*_s7rZf*huKIE1Y0zJPvLIa4+-ebV`+1}oEGB>CHeE8$hqY=PR0kt#k5Q36{m+g=- zr-shS87FacOJoG~?}{N4BO@d3S(yzMfN!JnA$oh|lpHYXjvKaKpt!mcs?dSL{0xnz ziHWmqgY_`gtIW(AvdT`2FUHTGtM1MoV?@~m;QDO3YE(F!k@)!mhrShffY#q*CH{_X zY#ji%u+avPWpR^DK}tU6o2X{`2jn?O(}vmUj@!Gq6nJOiqS%>Loq+UP8BksPNpoi; z5H+?V%&Nx6ivbx-&tL-&BZFviXR@TSR}T)<0SUvWS)K9&9w;=3e;FKXaGrBC9B6X-o4iITV5YCW7gci7`9Q$c3@p73eQ zl?JId&)fX}K+t?Hv1`G;@4ds-L1$tmK-Xv(RjEO~<83s60@EEmt_%)JN6odhN!wvE z_@clK!TW6&n&b22-P>=U^D{H+^2HP0qXxG89zDH}wG}(W0n(*c!S|3`_(>e_hFt*e z{+Q2!CcZP8%8-+1lD3VMgiK*oE0q5C(bXI&A@C z3NWpQjun+O@&M8XSQFT351ur2Iy3}Am3FX;qH(f#|GeX5_z!6X2lh&c z)-5VA?PEoLPVRSfPZqf3jze{Iy4=Z6i=M^bH*_|6*dsnWVIM@d>q8)KqeP$(R!}R{ z@iito<5!^YR1BUCT9W+E z$C_SPa0c{3M@Qaw$~|QZE~F#HbYk)Hd7fEuvHiM%Q)mGHyTIR6D`c4h8-M{O;sTrU zB6k*;>(w8n1*E4CW5kaiVBfZ_lG2x<_k!JBU48xiVPRpQ^6<`FT^(PQ^tP_GHE^Xb z9)UoZnINo=>BqBG)i}jAe*Z3^T<|iH59|D~yBqBF{3|0317Hiv1kBIQUI5b!YEGbt zaR`pQuQ(dnsJ2TJ6cp@WWxcWi6RvM+dK)mSu zQu-a>l4*eXmzg;vk*eYXtO5+iH^kdk3u^{@SU!LL92Ir%AP&ZXs22yYD2UN>a^MR{ zMkv@Xt8#Ot^GIWXT624xie*xDLqtRb#_e>w8r(>9d3hNkjQ`-lOD?eM{rA?Ea4)rCt3E2ydu8U( zqU!3mvM(X{G+n{BH*GM^6CqT4Tif+tzZegU3=J>VhA2Z*`B<;%8yp=S&!YGMIxsms z9VI;dd3LcQ#MGr-E}#bx5(G6F^NGqo;g^7s0fN}WZ-h?((Nhy!OhO`p4GMkj%#Otx zTqK>ov%C9TU!Sfj@2a)6_2lGa&~pI!sAnN0%hJk9c0M04Gvk5Xz<)V4KHhatlN5un z^MVe=c>m9qo~fy66YUM2y0Zq4gp#T%fb4BhrxZh~?imzSR#sM2ysER<1gAKt)LCb! zmlPMj8T49fbobku8_QxJAZ)Ik)N+`ZGHV)2gP!541zu%*en7NtIB7EmTE6W*-2tYvq;jivri8WHh=%_ zBB@v0TWovw^l9gx?X-_eBK=daE*P$?7GMj-!Q6`m?fAx5EV6b zevhQ@KG1Uv&5Nf^#@cTFiu&OBmbzmi($x6)bf;+C>~!}5DF>|rECBE;T^3L>R;CU7 zXVKuvR3YnA_c>86IIA-X3t4pD&mv(J(34R}s&eDVfBpEhMJ(0Z%vx#0ykU-jeoo(G zVSYY?1SX4fo;a}+coYSLdYTU(t&cn+iu*)bT3-JBVN^y=K`buS_c!_O&GJ(%0G8c& zow@Xx8f>BBEB*dBGwQrsE_4T+NkDFiT4zZi9%uYeDQlnfUB>i_2P9*X5JQHp7%SHB z*spzs_nlvdXM7=bU}4G)oIJr?tk3}*IOmP|1_oZ0uIKKX?h8IX zo7ejPJkXDy|KE<|((7rrekdQEuplQNKb;__ zkRU%exkp+{OpHs94k{tRB~Qo4^#pwSYd6FbOPIZ@3zrs`j^@+5KUO;!%uR%g%gx-y z%G{IF&CkjlX3pvAVaxT14*XUN=HX=tQ#bdxJ9&ry?(DvQo;JuY@IQBsmz$Rp97y<& z&Iv%FoWlRh-g$!V`S|#7dU(RTtn6JmEnS_teB3xJUBNLxE-)@HHz!wfD^D&SC^tVB zl!xnX031FZ;5b7kdly>{OFL_Gn5Uh!HH_2E%I3ch>aPO{|1~JV|9MaXg5Z=jp?{4^ zkWYY9SP;xP`2K$}=XZm0^RRcZw1&C5xVpjYo$dY2@0RDUA^!KS3jU}02i^GBS#8?( zFehu)M|50IykK^&9;`~Pb}n?<;IoVe7(HlF7MzvDDk}o!NC0d|;pP^E3P1%opaOs` z&H^q|akct?T%qpaYUO2V?ZJ9;3sHgCLo6X45LbvN__Kk)AQccgh&p)h1F;6*-@UVk zxcuXL9tae|4dMM~XF%`%_f7o!1PJ`QcKjvVY8DP!FlR1RI({e=90>*Fg$^*_|EKdP z1)%&qcWwUjj6UCta+9$Njt1u-uF;S*$wr1Zd-qz}xc0ou-b4iMoz@v-K4lZE@|2l7hsgd!~Z? z(c?&~D-FD+eFb6dgMM$je%i`B)*JnV^wDPuK08GliWg(5S4IgbuXHQ&R`arV>^zQ< zU%WaXY|m-*7B&>u91nHJsM+L-D0A%CeZo&W4OfQkJ9R-e|%L?Rj(j5=4O@nYC!Mv z6Azh}Ntx~0ck`MuVRu41<5~+*4}vSS69o1?^>I?E^keH;qH9K&ug;BJ!aPq$7_L#? zgNTi;j-2da`;S|%Yt&0?#m=P7>$|&}cYLRdf^zgQ`KNum7pNomKTUOBT=o<5s zM|qaaBT^=fCDS^d6q>-PGkf*g`;Y;9tmrX59d#S?w6chD;-?<}#~mo_*eHYH=){Ts zc5AIUF*ap>$X42zdON7WW}iQz61o?jk4jToT-V_fj7qVIJo}XHK~9t+SJ@Q|)eWm7 zR4>FaQ-;QS7rlc%l_Jz6E6DhgfQrKjdj#=&@!*E-rT60)^cP5WBxz1R+V35pO%sQ6 z78m3_fYUDrpxx%U)8vn{6N~GGlF_k}n!mJ9jTLxV1-&1PLG7I$&N<0#mK&0xk1s`p zQ$adkI_`8osw~>BGs$7iqkE?$1Es@Gsu&fo3=^Npms$!p6RFWKO1VfAYixXDj0$U5 zqf&MvuRB&h+}iQk6|TPv=`}>$#|#7a1$y7p{e~@6PR+c_yad#dmd3UFo-Jp7p?fCe zTlY_&go#)(r{XW4WyjXO{^6sy%nnH^?W)OI<@Lot5;^;@La(kG;i z^km3KUtT+{4f+45VXCgM*Ub_$25Z|UEeR*DmO8$I?i)l3{TR<^N z83SPxHx*D2;bcbL*OkFCd1=Ewju9U=d6tyXtHyJ^^6U4Dj?Y2c@y~o8Goj-=E|gOc z!dllsN5Q>+qL4l$@)JE;Mc$E}r2~UOCRE(#IDNBOJvK>*tm>1b$2Fa*zfG>%Dv#Y; z3Mty!iYK{N%F58G&P;@V#Yd&^@CdvxVmb|B`OYieFrpF`%$cVaj>cSKU}b=ZF#j%Bfki&Fm?Jbm&=3sy`Zwq+ z)^k>eSa5R0G1xoqG&r~)9d#Dk?>@h19OMfT-yuXjO9MHBDz%xCOwxFiFf<4Tnb;dB zDy*7JcycKNeIqF*XX1FfwduURP{{f#5Bz{>wXcKNFU%VOl+XOp!oShJ<~WZ@u`C0n zLz$k?Un2L=B(YDWW`p{uB8`H)$=&)AZzn>gaK9pQrBii3^%RI<7mP-UJt*V+lniVs zK}ZO&uLj^8V|sKzpe`6f+Hm2VvE+@oDd*Lwl^}>Bs~~8>*@abSZs>$b#-XKQFC4#O zT;tLYR2m4ndy4CeMO2q`^%@5@r$2#s`HnB9%XolE8jd6zuhu1=xMfDfyoZXP?+|rl zvZ$G=&qRgvesqYHpEosGSv_-iYf;Ab{+4$>#B0Vb6Nx%_=kM)po)4K%@T_%o2un1T zn2iW>pkBuy0?Sr77=umX6eymfSAm9wK#n~n6b_>zRYxT(XhRB>2jmQv&INMT_=XV5MK>z@M`seF>sN!XyGUrbLdC}kzH?)_BJ#AJTqrn#?X>06Xhe67czo9b zV$d!-teuNXEl=%eJ857n_L*bViN`EGA|^D$Tga`ELtZT7$`m7%?UJ)Y;5@AI4~roE zISu=Y_)=HEQbbY)_wC04Y#6n~PCg4OUNLvhW8N(L2OeTq0f zEDm2x$|m;ElH#nUCXAEiw5Nil#LWOEjB9>JQX1h^0B;$&KUX$#*J(>6mfgHbI8Z&v zp96Nk3h4$-(iI1U5Tr<=V~eJK2HAb=HIX_svp&X+g8@c7g;L3>R2Ye|sEmqi+ntPQ z;bp02DV3m2RnsnPX?r?JONSG&kCED>aS*okk1A`h+#6X|hP#aSACK1E4|y^Nht!YO zwV9bX)c@KW4YWD+ag7_v5|81)OfYMe)NskjSt#@LZK!8Ou#&MuNYB(ZY&b)>nrZB#Y?jn<}MlvIOUd!KFzOUx)aQA`PrZ6URmbK5tvbFw8Ec zqXYHHwNqyBQzkea>|>mqUpG>7`h6l_&5T?mIc?VP(OcYhNzhvyjg}Z3ZzbXnkO`r; z`i`}t5Y&td0GV7mxZPmy)7yBQ#zDw2I#?ht26j7+t~r{~anAJ>_yPI%6Y^BE6$!7( z@j`%ieM=p8TobG!%QY#~95cJ+qnRu?;apjl8Qa`n&MjOB>{5>t)qhPQT6f}B(3XNG zkP?;s%MmyZ{QV$G_}6P{eMQ41EG)w`(}>cdRCfw?lUS66v^>e&)^ahSTN7GSd#=1# z(!>?|?tV}+Yh`_vJ2zWqQI^>g9c1*r-p3=^;`6@UUuE3^7HxDEj&-iif3 zFAjZPjzc~_ZpJTBaDbmDTt3gEou93pn9qlVKA(qz?^m26oiB}~@2}SsuM93$$Vr=G zzO}#h@GQ`)CH!)&<1W3QMkhBvC-vyMEylX$ss(30&EP2%Q6ZbAROQi@r|R1Nl(wYn z(#l6rD|510RsMEw_Kzvrd>=r^r{y2#a$;+ByLwr->HHiHv><3v6qkp%OBz2}jL_C& zoaxXls2tyBG;Nh%o$(re6c<5|%+A`-EJ#W^7&eV1m^Z?wbEWmMcE1s5-|rzyKy@x! zRJ3vT8crH^JS#`jsX42$NO@tXH3k~Lh;QZqFJ=^dd`)K+U5-NqBQJZD(Vjk6 zQuC_S)F%}-P+(m-j|I0_)4|Oi@B!DV7yCA3E;Dtb#EA64L?8;aVjl2R&RT6%`GL=yQu}(F0sCX zxn>l|16Ar5VG~^1U`|a_9yR+n*=jlvq;+wZ9nCiAW1{G+Gq>d*3gh~b=r0o-Bk7oZ z=p*?=GoH|N92bY~1EyF+LdOm4&#{+L(3BxaTkCEFq$h_y9+)_463vY_6CCA#RL(ZO zpch(5x@L9EKSJnWQ7ynt*vqB-cC9gHKRCUsWTtxKsyLP35V6hDrH2f_AMUM$&frkL zm`9hia;LI5OBMJE*N%L{lGWmwx1;i^$EG_+dq5_bO6 z4Xt)xZ448y)@hi(f54LHj>q&;`GBkOp~mDiBDK8Iu%RMshLC9~xW%5rezv7f_q>i7 zRV%=%K7P>{80CK3Q$u0u0=f7+Bw`z-qj$yfBW0y##WgK9BG57%@!!83C=rgSFT~%> z;$p|m{QmXR!O%viS4W-J6&6E)HQO$_%@+Udz_{wDTl~7w2sd!N_PTQ5WIcMp^U9Ux zp$G7JzvUEOdDZcsOX<&laHqOt<_~gDMsa6wNji7pIlm~12k3BOvuFT(A6&*ZqzCt;Em)1PCc%h5|*c_b};Z}ECJkV4|bgvf#ds?rd4|}t7&2cuf zC%T|6?*}#C#YhvXotHA^&ddAnnx{mSzD~(Z&v}`ZNA#`lEo3_9?5l?vdHs{_Uq`&- zd%-Ky=mE&RO*3G)C8xdQJOCMr9Pn|+}{&ZjJ`WG=2u#-Nep}A za4js#2Z^K%&0?5Vd#8gip4satUN{S-5QIKM?Z5rJbCJst=yQatc3eTzabYSF7!V2a ziY$*9Ij-BSy6_!iq#cBRC7Qkeku!_jx59zWfN&Cuz9H2{P#JrS#M8draGrl-Q4jP4 zhPm0dQ%>=3ypIjeHI^v~WH}$8N-tmY#@C4Il9|9{ON|tLN{vu#ON|_q3(+j^p-C@) z$CqBVMwf06M0$PFrMkaC;}baJ&deD@KksN{`F`70$*PcKp}X3qpxAzi9Vy9sd3shk z@$qMJ?w)w!Dt0qO#k;wJds5&q*TZUaIwgYKastEk-@wqu&B86=oL# zEi1S>57gaj&Qtrq5o8Q&ftI}fmIFg}K}xGrzHx!A)K!J-g*U$P@<~;bPghsuXiLzl zcZXHsB%j~BKbIucP?St0OFIH+I>!iT&VX2xr@vilVjK@TdNsYjx6q&>@%UkGVU@G4WEK&lJVTZqPRo~AI*x(de$*+0kZk=d@(SQOgsj|%MK zr_drqaxTNK3W*2NoYwqhvs6!Y!1BlR|!( zGsb8Y0{{AmPR3}1PR_hr)3&alLDQzOC}qG2Om@N%T&AowDUN3JCJ_9&G=h^|UL2;*>Hz0{xUKQZ zRdbXMJntRKO+-478i95oGYR01t%*SmQRYY`EnV#9i@3}C*2dG#8}VSj*@SIV)KUqg z+M#^yxN8B zyZQeQ%I|0G0^V4HJr#rn3*4PvWSS=m6MFBNozTz9_YS>{<~+~wdY>sd@ixGM!xQ3Z0nHyQ7;s+b~+EZ~yl`2NbW z@C*B$hJjBpT941yfwp@ltJe}sXB^eLbCuf~m1(TbJVtsduO|#IxD8MtmQor5SUQWo z)1Kv*mH#jmq$je1G4Yn?2dSdw)fdcHX{cjfCwNc>nzeTx@jcAjd%+t^%R^1K>q}E9 zrR_$-w%9EA&L<3SH=ndnBM3qh;h;_LBT;9Xw;lB-qad9`P{(R4a?rEUtfY$J_px8k zN$0f0BIQlOdsRd>-aM1e+>B@t7LMW;=B@59WVg|(q=^Ry_{bP)C2x|~J?-nuN2>3? z(3)&|TT8jnyq>5(hz7lnbX1@mQog2B1VP-W*SWL*4ViTP(wJ)I zC3Lj}YQ5$tO3qeee2x`N^gs`H2(sv}c2%BPC@tHVhAw7}bi5?JYk<^O7(}K^?%!no z#r;d5_it0ag)GvS6T)c}jE9(`5Nl=|ez||W65i!U!{^H`;m0V)sZQ6APT!9WX}&uXMVU6~ z^Nt>>`UkaymcI%oz~;|%X(tcm6FHei8!phlItuo`gC~T?i}$Np6u|=?OP3ZI{6%i- zu7`ai&Fgy>`N5Wgt&OBj6{CzN>kqq@e@?}wy-Vm4YouA@Yh1p@C1y=XDm#C{mu`O( zK%PG?FsX4!qsoLScS=sLe{y9!ef(HPnFv&NUcr)H4pSyG5oL<6A;y+&_k7R+IO+XY zI9zrv_jcXvK+2dzxwd#`NwpO8&b^fUhUxIf`-|#g$md+A@Rfi(CC0T4>QTL1Yt8#q z=t5TAs1Kl}W0Sn<@MM!{sxhUQ7k(9Q&H~2vqx)9Fpc4|6VIr{BCtqmM#(MH$mZRnN z#`!+t@Wt7qC!;oIzd<7DJtsz&x8w#T8`PdizV(W?P+&bbQrFQphAw-{Nv}>|PBSK- zuAal%Ob8VH@}=RGsUuK*?!VF4h7yI2VAFff`>=x3Nja%~_e{A+Jeg;lw?J@C^Dfo( z0OjpH1!0cZt?of6Lc}!h(35iR(_1KfwiLV@0A9gi zFnF5_C^yl`+-{SsQ!lDs*Yn3u;P%T|W@Eoh-StAEEM-406=}hEy%M)8jmTk;P`~2T zDvdiclvsiBc!LfUZ6w)UyGirT4A3$+`vKCQWot+XT35Zrw$z$CRC$FL@(3 zXId&5%t8ALcD^4Ee%k6rbG@X7gaWoNCMcub@|^?HE>(k0Kj{xW27u}Yf;JZ38|iOt!%uFlSGNP3 z*D5oQP7j=q%jOrgvul9j=E-cv2!c**FTLWowY_om?C8zlj>_9trxU^PG*is10~?4! zw>(lEhqh-U;oJMz-No$7Q+yi+vGkz1XYZEl-O)#R8hfj&S zmy(kvt~v0>{f*lAXkb+*$=z96r}9X*GtvGQPi%Zj0lqPe@;-T~qOe2o%JZ0b+N5@( zw4;fD!w=If>f7WW&#t3GgiG{mdW^CXTe|aaR@3)*`85Gk^rt8O`3r&~u>_$o%E)~X z)G#}Ki4fO9BYv(#`$BxN_DYyi+^`4*e75w3dn#M$72FP;EDrJiEo8Dd{1cRGP-bc( z!*+bxV(o=+rMQWZ3h-m0m64@DC<{O+XM{$;&58DX9q9{!;KyqkiD{@QZk(lKQ4(o* zQ7oLeCe~~ta!sQNLh9GTxROXw@dkP~HeD}12)t<#|NE&jj{iP$^M9NT;NoKazwH&l z#m@XcX9Eg#Ml0x>t+HT-1m_5~+r_^GOWILnv#s$Xuv7&EqjiNLeDc z?o5ZR*gZk|JZO{n-88~3(XCdgsnot_;om6RQn2MlokI0gsIxMKd@@3vON>KAq@02* zjrrHNOHb!+q)=G_R&%I4wkBR@Gzo9nj`P5K|yB z9cm{!Fr6I*wM3H{o74bI*_?>!OW3-|qfCWoQvj*}ACpuRiX!Sr$UaSdE)YI1H#%-s zjVF*0=M3FJKXugiiZnkg?HO_DkjxLhFy|nW0dJB_)Ng?`*>>{+L=NKPWk#xVd`l{Fj#)_Z`jLP1 z!6eB2a`7;%sI6nd8_>V85DCj>^|fg!lmUFfDxjhN8fY`o!GGx$WYR!-BU+iq^<|xs zJH`H(zOp4O!4aKZx)HL&1Do+(v+_5pL!7G)Q}8DZ4$iRe!%bI;)___0Dp_W9!Fupp zU(Ed=)7}u?otp;^1gE2sEc-TyUk&cb7QF-p>0!bT_Dz@#mQjEU8pEHIBlHRC8Oj`S zZfnh1j=PZ`vs_7I`-rzDbzKsI~WIw3-3gZ5JCiwFV_ z*HVD69c9LrzPtcpeUIRpu8(b3Zm7{P#MVS#daf30weZo-*r@@P)d8FE{lS;7Y$gmu z!aj#zzM}Hs1v}@=Ec-qB2?OiCK^U=xA=E*LXauGox?{7;Quw8tRKc>Jg5tCm{Uy>p zv2u6vEWucY{y9oYY}8R=oq-VS*NP|Z8jRo#La?yTP$N`c{D>K%N4X9}OFC2;a{9I+ z-)@1Q1QXm<`>*xFe@lG zN{J{gw+w!-1|pxN!+@z%5)SAj$$eoT)ko7>ytf7KW)4Oc3Gc@7i;~5Gs-5vXV3(~* zu2L`1Ussl=E0iza^W70*M<^AO(SYAgdD)?=YY!{53^>Yd+3(XbhqcecNR6@@BI@U0 zt4*|`;+~3${3AV`My9}CM10uB1wZa?aLjTg2C0gn0P{&HVQ-!GXm*dc4%3zHP)SEn zFvPqB&2Ep}&^?@huG)MAAW#ao8E~Pr0 zgzQ>n;sg642fHey^eAqPKT{dfoGZ#&AQR16PUhNFx@c-0n)ILxGWQ*2X%ppG(Id5A z=WT;pxK4=S2)}C_o6kmGps7u$S97it`Z!96;+LQc1Al;#XQ8hur3a01B;uzb3#uv8 zLZc?Xli&&Q8P7*Vzfo$roJB3^iPtusHRVCcV{eCU4$xg+l?gj<<`m{zi%vhJD_ ziNTid_B1qI2~Rs=sVVR!%XL-^<`szwG4JNr0#8NT2!LdQZ&Q~>+A#8pR0;g4KjRHw z?=ce($L{^LT!fa1I&P{1Y(zvQT0bgVr{p@-b&T;GoC4*sbNNDK^zjPn5?lZd!M@~S>A$b zx7x*I++wy}jthQ@Wou|j$3b8gbRAVW{)o-={v+hK9gAZQkeIODETEun5C+OTqWrc) z41UTgEJ*y`TBXzfM&;P4kLs$}EGJVCIHymI1z}>Rs*G&~&2#0|#aEPJ0S%2Bi#E5% zUqNaHhw&*yExSCRfgk&aNnpBuEC)C>vJi@DRTgt^VdO>A1{0mmbD7&Q5=1~G9_|2H zPE#KX;ru1BCQ&tr!~7R5_QaQ$!Ft3C;RIq&(j;kcS}=-|YAzx#9dIQ*^sADr^O*)S zKx-z(xuqyf>EcdkVZz*wuPV}PKPsH6L0Ql=BvdBN3N;cfj#5Bkya=AgOclBn(ZNo; z*l+tjO;&Ye9x3Jv;W4Lar+{?NXw0A+qME%U`{7kpjqrsnY1f#DhBhu5>IoQA#A>_k zcIj|@k!TrYxAI4g@O0kA0-CbIS|kJdIpdv@yXqBG7&NR33?m17_(V*E~~iVrU! zW|To`M7hq-go>L8<7XnB4mx`fZaCC{J{cUmP}2}pc)G~?;Gc-XfPdHqBPbd6m`jv9 z)Ppv%jV-eiHwc3lNZys3d#NtdBoPa#$cato*MzP~z<<#?g&4c@2pfr#vWg2c)=SEj z4V6+4U6rcH0;hQ@&@|o}r4Os4`4;amFJxcF!|(moPC}(wc*i#ENUmE(5fmT$A$tZ7mY>A??2~ zwlwMgTBv%L|FxL&-cR>E1&_6&r!Z4wg8vI0|9%gak<$pxEj4pI1wr~!;Br_)h#8Wr zl?OGw57BHY;`*r-9ElKj=MDyse4{41t2^)oZVoip4;4QFkky-1h)puk?%5<#=QI#P zC9JeYw)U@^AMcKe>qr)ig(EXcW*b={*F>qR$6#0a@mtFec@Z488Y>(cugLCczOv)p z)`=~nV$ZU{7y zxg{(jDSemE-V2Zp3nJKr`=w)mqzZ&PC4v=Lv{+Dzs5K3lJ#)-pzZ*Hkg`R_8fH(k8 z0l)Sg^>yhoSz7yFS0O;xJ-O}hD9=bkUQ2n2HSM3(@RNqTm52Jv)1^i809Bn`LB!Y> z?O8B$kT({#EfyhMTO1;!Cd621ZD=VF-U1Nb8Q)V{qIoQ?v~&-h+m2+f^4I@7-&|?v zpE&F%YQuUTK-;<@ku5x8skd|-XJyl?x>d2YneFzsmsdLu$R4OU1QBONiW*_*(HztK zeC#sWh6lq)3_PW>MkSr3nGm;GJ2!-39&2e?_YN}&JqtKp?qJ&XAgY0>kTQnR-j=hH z_l7ff?7DZpu_Q|C6*y`z0+T^$IrnXS*>Z1~7%LwnQHRlv8=41KXHzj*ok)q5Xv|nW z2R!!FT?sc5s8x_3H9%aPfY3Hx0}x?Ic(>riox3o`Ro6Evc}@dehlliv+aiygD;QaA z1I{k|r`wGVsc*XHL5u3}v2F&5ueI4=zR+KZjaS=@J7xcVdUf96%Yg7xG9GkW028p{ zojfqpb~(?$WftYFwpp*X0c8c9*P&8yKRSTEe?7Ls#X#gH=A5(Fr~58eIN1$@y$v^8 zj5gi3!df6MwgX159^WCgd%@MoZfyVIYjdKOcfz4*LM1BIAd_oyyW|FzZ`!tt-_sW7 z!#mU0f(ulD2a;O-q5KMlTEXKAe|I#$C^yc8*+vIhZXHsfh@d5cSzK|_#+K^ zH7+xK%;mUSV;Zu@a1(RgY0&TLFaS`RDcRkh)yWof29P_Z*R{OCwhG~a@Or%8w?^x=JQ|*rwEoB5WxqM&QcMz`{otA) zqZNPrc{yX!zUq5czp&^vmdot0s+^!o>}@S4kh0YOxB%WTjevHy7~bCd;oz1`oheIO zn5s5`_Q^NV?*j|=!mmAu{pE7E4142^%$zB!pc~P7t2luz)q92rt2YB^j8v4s9%|xj zyWp9L(VMaOEt4Gv5I1&?z_pqd&0p16CGiQ7iL=p`wKcX(1EfFc45-E28oN3;hXjmw zYf{w)unMxrMr@BO+C$6^BY|q8*$)+9wOeNO4<7!m;`zys%!)FRSRAAer=I zZ?tCE&1u~6a&Qu+|J@!R6c%yhS$(<9;;T08*K9?+H{ob|ex*Fi9-9>wrK&YDDeB}X z4+aUzV)nR9o-{41`~yV_=g7xz>-*9#jeJt@2}BzKMOzMn%<+;>*4~WCZ<%2M zwMwQi0O5-qT#&?En6lRX5hpLK9S7&c!9d#WM0X+>Aeb}dTPC;1u}a1SLk!oU-LCriSqi!R>~6I*Wt&=|WY!622#f4K_e57zpF zrxCz7!RalU*1x)pV!UCG6ZL8l^l;WsPEdx?2LhQ0_$B(i{HJ<(zHXf@M~q4b3uht* z6#P>dd*50BBf~Q!U}D0X%E8$-86uP4!)}<8%fb1l9~#acdSJB6KAsho5`a4T29>!VRa;|s&~{El4ys+< zj>LZiaisi@ApQzWL_TH~W^au>hq;1;)*4F$&~G!Kr{spPvxlw*>AU7K*Izuqe7EMk zooO2-!##+>?=5rxaI6Iq|6=2Rv`^te#6MG*6QmduLYbMubfDMtC+EZiqiVlf>$b64 z&F1Sm@&5UZO@im&v2}8zwm0zxyBwh*Q`h^@aoaL9(1(nKqn8}ga)9=kIKu#&l%qFW z%raR9#xnUQlqFC!?B4KWn9;Ccy@d?qfIcnwuKZV}=RD8Fh0}jNEkn253L+My=FrhM*P7nu*`KOvInU*f&81UHlWhBqM_9` z9tVav^(<=J>Wj7OdLG`cy%AT#kb~zSrUoaeGmkWVXo6O3j0;($rfu1;SS==GlDLF9 zf3DIX(7RTD#moWkL~+zY9yT?7eLa zxg)Q6g_g`N9*a_ofmb4tfb@@59otY4HlQWYa#DcL*uy!WIK%F0TAJwZD-qE;4EXU! zb0?2Dv4-xmm1iEWqK!z zI^q*E(oP^K`P;0rRswj6({yb?x8m5C03t+;$`8f&_ z!oT$QBad>kG`J7dpI5%b?6o1~@^__dKfl6tWCHa=Sa@wj`yA@qOKyJ#Ixmi3?3y8; zQ~s>r{aFF?eb@~_q&1?_5dSpy9mHAXHRxmFDeh2T3v~j*(tDrR&-FuWFbs2b)2=>av^~27e5fCAeyj%at+Qrq(c@cTYHhtbcCuui@Z{cQ zU+D5a$j2T8W|kjjjfK3xU}5Yo#q~N9JiXw=7~&JyFI~WSwsEs~&hYM{NobH4qi_fs zC`HcF-jNyPDD(&gjJr5}g+8G~47YO+$|ggclg0+gdf|?rKiFv*4&tACKqS3L723P^ zynt1k!3ZzQ0Ni)ywiM_mmIW32fB57}T$Yv&bf|x{n3t9>Y5D7GnCY48UITBdfI8zM z(|6JsV1smJQr2(>5&$NsClF9+R`D+ri?2+aiyR%*f#v z+sxJXLt8^HlZ%4ahV5U3K!lb{zm3lcVdjZ;4ZTiY)V1YdLDaR?UX(+tOS+ACt@THp zGP}Tmmz+)MM}9RFg{5Yb+9rrD{z2$d(Ev(+58nu=N5f{bjR%D*`cmS6Pc0GV=xL2j z1-_$>+U8Dy(j%8mH+BQBqG^mbH|~~>W7mJP#8?q=VAYvwEBmrcW$wl5$UEZs&Hk@_ zXSE%#3Z+xM0I?e`AI5X^q#!~mIxWZ$8`0d$ein`U#$>`x>J%X>du!kt_tYU9#J~d; z=32zdA40!P(O1m*lMoy9sC=FUN*TqMOPqd8?%D%4gHP20h9=bB%_nvo*=UATA0SSzWkLRt_jeanT!$cp z>b?FBF6m(-CLr;kKSe<16Ly!W9*MYjsnSd8#M?NGwmi6dTj-!)u)oxq?Pz zi~H%77adfBc66+~ORmPSkW>i-Ct=Y>vz(y4S7oo;%mLTB31S!DHXEZoo(Bs|a_7m8 zsGlJh%Z<#xj~yQRhl%o2=ClaDUjF?i7>x-{X(Z=$Rn9Jgucc(7{lg4%Mr1~7S~_xF z@09N3wJFzadTuc3fE>vya!NwBG-g^qh0kz4nX%buztD}kd=y)PI^pnP@z1VVcYkib z^6HEsP&414Y}~3>H>9C#FN>p1`?r^v{pnKZT+d0zv+1oN8{XRdf%~of>xpxt+wgS-9ZktoJ%KOQxP3OmNHUb>}oSJxvWhE91xpL(!+z&uL%Q9$8n%0kf$?)YTIXl#nG?w-yOVdwM{>4gMYaV+j|f(u z@PU_Hd(2bs-`eUqsrKFj z#x&?{FGtoaoQ)(T?hganziXzx*z3|9PKKX4E5;{tc04_MIy>I?j;|^{9-cjF}i zvlmxP8V>I`oZpTsKigK`?oNElu?g2RdpWcjeB%|)BNaeiVLg1lFXs^9&3vIoj%G*| z66kDU>uj-azP+JTALMSu>e4fHx_>*lIXfZKK5Ikvv34S9XerF1T!Nf{pB-Y?!Jn!s zpxeN`9lj8O3VD7}J?FCX^0+-H^K=~1I8SrK%vSMaN92Q0+zwk8yLjkRxFUTZ5JUqV z;Xrh^9mZ`&1E3qwO|idt`6K$%-zOZmuxQ%ftJ|8Vp_%PGoPoJB(bXO;wh2Z`<$g1J zg!MM_&G>5A9GBCfj@~ZaCQW2Z=7z7>JIHPRnC^fE&(-9!VoM`|A7OG>&%`_FS?xf* zyY%PZfS`o#9A`wukl?IA@J{wJhZ-Ym7Qy9-v*j1`RvPLM&YwY8wSN|hwFombI#~-0 z4DslU+<8mg6T%t=HjsHX-XKr$%N$Z?=fc2rH!hvkU*CHVe*3h(+Pzk9)IwkpH`=3Rlv}bdg3#`5$(0P4Kd2XB&I%gcd@4mKj4l)+)oUWgSjBmQEa<;yf zu3MG8$F#0yw65Q`S^>>{u2WhM3|?((J9j}8#(f&gnY~{hKR(|7zJI(Qw7#Dz60T$p zaCWw>o_~KKBjgaY*4w;P?D#y_dF6ZlSRa3I{(RW$Po#Z^rq6P zdJ(Qs?7jSZEXNVg{U z9hIACI{^>Z<|Dn`dsFoFcGCnI@}Se%k~aD@m9UCfey9Xm6bUYQ$?c%$8xbz~kW6e6 z5v}k~v3$s1a1CEA)o8J<0?ZQiX}f>=m;W17|4mzqmbXe$8=g8*UIr~gBz|=C+Z+DK zw==Y25gPFVnHg0M@*qo@uWF!+XLxG8R+=a`tI|Z3H2F}PR0~`@g;Y9`xqksDGL*(O zM)s7l^{eIS)gjR!C#)NJQ2b4~Z(^s7 zQW2eFv8M%rkE*OQ76tw}$5)b=5Ix?(3W;Dzz8Rr{|V!Y_Nv(U@3$ix zwZJ5_E%mO-oYwFSnkTg*oga+63Gy`Tp2fyyKcp-%FwKe#%xNo8Q@(;gj+^}d#7*x zh(DZcG(SB|AHl8NWTN^0n9%DJpnao=lJeHz^HlnNw=y7;dk?e}{3O4-CB*h|88i~V zIzdtEZ2e%q6@2TudA`J?#S-B-+9vr~HhE2w+?rL{`64LDu$A<`?;tX>{+H3^FCtDB z7LW<*zYmUot-ttxQ%pqMY-~D2|I1jInVI#!n+r>rTRNK9GfG&3tWw2HjBJcSX3Zwn z07sDJ4=XDN`+pM5L~Lx#Z2zC$)R8+Ev;QbGO&9DGdKUWk0Es^=?=s;dFPH2tLDWD#r3o64bFB&&XwnN zL7s=}k6(oI7wnQRAK&e>AAM3>Hrhd3@?A7PcIbbB1S3J^VoG7iWZzw-9n?m)eOCvw zYp=zk&n(?tA^;m91WV;TpdTBtL*jk~8v+xkceOjt{Jq+q(hiSW_l@Wb5M{+NsJ42j z3PwjSmK{7`opP0t2ZIb&RtCMbtIqYa!Atq68{OArXwpjC{M1p1uB5tsR|9BmVlfH7 z`d1AI+S(UP^;>$z%0m0A=RJ+@6Q3`MTJ8FRV`%d!e;M9si1fHd&T8ITpxa)^-T}vA zi=BEkIn`g})z{PCt+fO*d@c}o_`R<$3(*p*nYe2vG|gph^@!1X`9M7<}3Vq zV#ih`n#oFdOzn5wEWIEXeMGTV^U41pH=R$^Ba2MBL`urtI6z((yhFI>>~(2VB~NDR z^mo)Z%LXS%)u;Jw1=k>Zs!vG#-en|cz3;%>sTqz#U#8!~9lVW>KY5EdYOrj&`R8yf zgBOJ^7FOY7H7*jd=19kVqe_2G~{C zMUeia(QL}dJtuUFZpXa&SgwJj5(0;x>PNe!6@iVNRR_+7B0>laacfM(BPjVY1Z(Jr zN%4(-g|9m7?Ff>q`%bUBP!B#C^Kq6NijBpdmmTmy(ehLP@(}9e2D8%I40=n&?gtvuFzE2 zFNR;qNYYU|gN&s_=7LZGwg_^hH}B(@aYK)$cbbkK8Hdn`Eo&B%dQEBYrv|YGWyA) z7I(Atk;wV@wf2zo8CDS~gn;~?rbcVPlQ$vI1}OL0Q;BAR?}xG)1Rt`dLe-)&7Mcag zq5%Rh?2yNeawCjy#7G0+ctJ^P?Sfn6dRzD=Qq{V*|C?2SQaip|K%qX=B*=f7UIo=;6Ytfa`f-LC3ft=6~l5QasF`zjkTk7 z^xz40O7+mi(|3=~7;vBcxNRT)36(;u3!Xx1?MsX5ffmy%vEDz}~ccnwkxk(v;^qg*-rJ+>73JhrX-K7&2@Dto!LAKlQLoYZI=jI50-I323$LL-gvKa) zB0jk3`VATg?tHt#-T@DX*|zJW+=iR{;R=sT@`0Hx=9#{8xkWuWdgbv%zeD^)uwxKD z*>#Bq3=+*w*+Q|!zUFp?zIJoPypFlT-$4lV2`0Vb+W~IHgfjM^iP7D*!7_FOQT2*k z(RD>UgLd?`OeTR_YFyz@xc!UUYJHqk9dMOy(OtjLH-kA*W4bd!FS~waV{Pe#Uv@M1 z?V@jnxS$F8KJxhlqHJ^RNL~r-z=it+q3}=kZf=qG#cV_FNQVQv^w0!@V)!S!0k&VS z9iAYs5yJI)0HJ!_huMN%i&s!P)Zv?5DWRJ^i`jtfOI!GB-tf&{&QQUw#?XMPtV0}Z z|4TG}^vS#9bi(~vm``Hh$2zyy%jPHH)1CRJ1W*_4>KGjO{hyOJn!tOA!^Lc;V6o!o zh2+!S=bHfk%#rX%!n*rC{&ZO!6WxRU?BkWZf^nu_n#Y$4PsJ4O=0`tl* zlFP+Cf-1gW91RMP=0hLczI)7s5;30TerV3++sk~)ff}KpI0d}OhhgBLKBUbadOrIR={CRZE`(sfEB>m}Uk8cTInQ7=ZsKOj&63RvRh6 zVgAMnMJ0qK5xgX&9jOK)b;7zI#J?aIVXb;W$C&#CJqID5IoPaD@!su>RVEOY+UjcZ zD+i5BvfvLCP%{}B2+41aIe&QV;wT>zKLhunDM6SqaTCw>4?P*S@J~06BfbyN{%UoJ z94~+{>?Gb%*%>gXqaLb$bjFY7O}94!P!XbvhXrNlN-{4D_u-X%#nPD+x_afRF4nX` zNK20?m=5;7ap8`EgvWQE z6grW-#EF|p7E@J*mpmv?lO9X4A3fL`$#Al!Z6q$0Jk$IyUhh8A{#8cm=VRAYINs3^ z5#4m69u4ZOOCY(OD#OITYfe=;F(4#u=yoYVGZ2wlXAem3^pAL?Awc3GJ&hbjg_<^+ z7-yFb#HZ%(6&yV>Qpxiv`lpy?+iT{i5Y;`+&3%-fBr_NqCP8V42a(uCHhZs-$p=co=53gbZ+ zTv`cnp9v-gHYsrO;nV*{^w;_X05wmZ>(_}Q(Qd-$=jwN_YsQ7gF2$a@L4YUuf-;*2}gkMY!Ew z@kNJIY$-CV#HrG9eUyPLUJj*O6!#)&t!=H%m1Q01r>psJG|Wj$0W32Him}iMxN^;- z^3nVSp18SiMQxTN-=S5BsghomRm{^ol1o+5yT^+Xm($lUZvT$TAccjo;~ZNn&zSwT z4KQ!{l_8;-6UedqGqEYD4kuUq4A~Oxp@8sgl_w`4_@!=(LM6dUvp%DKKGZ6(uo;jl zv(CCexCkgfiEYJHYn>;DfW%b043On-_b|OrPF@Ht4@yzBc`)Iu@-lhB6drrHx58ZG zJyd_)kVmgBf~FmpYGf!;5G$28`2|&thw#T7rAj$(QiQmlr8+}7t{w$ zM*8c|`Ca|~`(^`S#hr~NS^8JL!L1pVT$>;D)YCeky_lC|b|1w=HvO|^F^Dm7H zE~kT}(>K2TN&41J+yVD`#h;#cW(Z&S@FzX=Yno|pSW}P$IC&{Yb6Vms$r7cg-mbB+ z=pW*eY0No2FQ9ce7piCY1lgSzk{N7K0$~h8^)Puj`uZXfg}4I_2Y1`0eCzDY+Bb^O z9@YfJL|^zy(u`*ViZK0Ki9eG)S4wXePVWrjffgK;H(!L&-2QXSjJIX?XjO6jVp?8nej7 zGC@nvnu;lnMx8MjoN-xt*|b6@3d@|Axbu)G7$&Z8(qZQRAj4r;0{ofonPeA>j!d3D!mdl#6KTCzaGCGAy<`s3SKUHbAIM%It ztPWP`8r9Z{eCJ!{dw$>k9z6-4hcB5?gHt=8kxkm~4gV7!9gZ^J6LNq;PgrxHRJ-u? z_w;u-{yeqRR{Hfww{{c$+13AtuWta(rD?Z~ZQHhu9ox2(9ox3OV>{WgZQJIX>?Aw3 zb@QG7o^$_ms?M#q`kn5XexB}`>gnp5e%4ysyVLhIR|c~_Ez#xi_~hV_-f{O#-J5T% ztVb8A7qa$xSIHl)9|}(5Gh-HxT6`9!F?1$iq((7AW2Z|zwix5a;LZ8;Bw|%swktEF zcbd@~h1I04HlZEDoBei%t@X_K#rk@++duHE!rgFB)Sj%|6Z~9zIXpPmD|RaQe{5N` zK7u#KJpZHzV{~LcE>A%aDAD8`5a7aeE`sq^KXqO2Q{=mJvz2S>Mg-%;cpNrJq!YS@PGD+Dkslk5+85_-7=Vnv7Bk znws>y4|OJ`7__o!LLnD*R6U#VJ9-!+vrilV70!Rq0c0?bi7yfW6h_H(-d~457>iZY z@4y~5$y7Bu7i)wbMgixFWyMT4tSGn1&A|QyNzFqi*4sK{_)Z;`#PfYCTnE%9z%H$?VcM8aF1vl%P-O3XEXwy+kR;OVHfze2*@{fQ1@5RYj5KqF$Y9G zAi?ls{x>0Nzq-gbZ{Dgto~rL$T*Uej zY$nW&J$Vm2g#8EJE&+1BaiXgRkj)E6Pd6_DvB*Y4mPZF>C;R}?Yo5m=$RvqNN{O+u znB0sBxVpP%vs`{8Fv~T4_$3j@mUZ~Fe(^SxmI2o~SZjY60SdYaB>9nq8?x{}N#^@R zoJRattS0HuvKWoOrcJMm35^U6Zl87d42J;j!`ZdP_Wv<~=gK&NZxK+L2EKM`~ zde=W+H@W6pX%DC(qBo^&&pD?+Oq$K$Gv~NNz9YvS)gRvJ+nJr-w+*g^+vd3+El}VG zL2g~KwXl+iX?$HB^jp_Gi7cMyHC_nOe#~9pd6!woBWyalLEW;W=(~bHSff* z#>CF-(sXMIi8rv?@Tq=uO+^>3>==w8TxUXW{3CmF8viiuy7GngCF^AqU20oZz_8Eb#eFKM2Gl?autyj8iW54#Si zHq_%@4aCU$KK<7FDa#UR^ND9h|1dcsXcBGJ@G|AGGp@M8u5;aa9(s92TySDm| z`mM~;Y|bBsTO{rfZ60ZTQbJ_JL1yHbz>Fc?0a**_j8G3XRs?faXMSf~XUsdSJDnr+ zBe^5`BYV2h(mK?2^z+#@J&*11v@G8!{$Yed`$dQ3w%9Exy9h{uM8+eF0*t(U#2dz2 z#+xfD8gvD!9+j2y`@NB#Elw||MkyOkmjt-F#`q$$Zivn?apSqx0@ zG$$1)uFj6Y5NWt@VDFA9$X*(UcmAylpV% z(2LW$12q8^HVB;Io}rWGyGkeR+ol|y(ZEej5{?KAl#xF-$GK1_LZ2DpkXt{yp2e)_ zyrO=Ex0&VZibJF~Nf1SJ5e zaxUk$p7U*GvJn6rTv4?c_nl}{R<^?ACNrIrnv;@;Mj2aLlUY4IX^4e|Grqie++-5A zG&N3U1=q=9YHB<~N&Znl(fI&iVy31BTUl){aEN`Hlaa;W$j509A-&Rn#Qb!pvFO=* z9f%YW0K8xuspwEoJl|oc*f_t5&(okH;Q~5kBNO7qTgT&~zPW}^S8L|@6q)%mgloHk zy*XHfs+Ovn>}Fh8&!~kv2tP4Z?tYptm++hMv73n@Zz{VgFFQpENvrQD@rHkWI88Sx z*=>7{*7SX3%2EZ^nKmm-A6Wc7g}$lmn*kpMP*NAxa8ZfB@mX}~dgdYc%;Px5eUdN*)z zbP>_gg*{@E&Mo?3Vql;fdjbrNJY@UL1z0b7T@>tRmZFt5%(oX6XklTAQNtTCx}EM- zQBt^=IVt@YhPvu)3YC%3snHK*Co~RZpGBZiiP_8%E~m9qamu=E9P+2?nrJMpi-zS; z$0|j3nQ%UXuL!gZo5jQ8preUY2(r0}JGZQwTk2*JS|YD*)eCkS9N7sW88sIyZe(5v zE;`2Yto%`4WG*rE)kc%Enp(yEMjNgCI5@K`z;m=L<{!?uwt?!kl!IfDwUEV-??_t2 z&Lb43qKS}ezr!Ae$o7C~qp6KGN$)rYnLuSj$>WgB4%?&+;ZjB>z)~2DJvBYra_<{b zrhowSj-|{B*7uvJ8wLF9wj?~shQB$vWhG?0EC~r%Mx!xQJ>H)U0gl^y;|EE%a}o0U zy~5j+)>caWT9om%70dXw(;S1D+WS?pk}*uoCMt`PNhCCMn9C`+<`ZO=WONIai&ChA zx7VLEmw{sqQ=zVx$rMI|ziFQ{SX0h)IMd5^cr+cHkJfJ5SMS2Z0?KKuP*EWjY`Tr+ z{8n9qXAfgqwN)q_IOAndnj`Y7gz4kLixAOE67!f%`<&ZLBIZY-Y+Pig;Nft`ndl;y z&k&iwlF2e7&0OD~rQ^u^Lc++&)K8m&luF{2kbU78l9b?*r}*XdA?d@2_&$2kYYTFE{<#GyTh| zIS#WezNa(l>htsEwL>>_`M2Fh-)Djq$}V54XV2cszW!3c<4ZvaRX+-{eH|HiS$W~| zjZ_%Tnk3NS?)rrsaktmW3+-BAlSs@C1uC4sk>kjiJOdtLXgTs zv9$&jNel(u@hnafLct#7;t-Ho#D&T?fp)SJLwPYkL_29`|$hPzk+jl3zg4nt8aMjQYw^qnC`1K=X$6OhjVi224+cF_Td5MFPddG>rLX~dgeTjKH_AbTAYtGnB#!6~X7K@d~ z>1l!qq@EL4A5eclr+ye#GF!cYmgk9 z!iagPz_?H1b~Risu7{{U7RuYysIRNK2csKE>+oUFUjBuKsFN5W`f0v*lW=Z1!j(LZ z_u2KJbnW+3?pm~E?TxfA-A`4%5h?E2jC%Uz`r4z#Q%auJXClQTPM6M)W~TmLZ-zPZ z=!6m5UwrW%9}zfHJ?fAwy;O}2Ut!S-tvsuE(h{U#itvyPv|vJ!ds?_*VRH7{Prios zpQeE_UFcbw%EKlYCsipYZTJJ#=szwCfxWia*2%d zE(r?$?8XL#6pJo(3ZCWZahXz!JoSu9%ALsR2=P|=Cz6DpRKGc?@h?;eWb$0+n*vKY z=kKuy35pbWf^YAmEmPvNY4Ui2eJ6s=GGuhrTiUc&HnTz~vL-3m+_=25>1ng{2HKKt zgum5VdMy{rcQodIXrNk3rmGB&I6j-wRtvJL-$wI0|n-(xh`_tELyv%RjBG) zJ3F%Zzv!D>U^w~@o7mSkZDy8*N23~7$f|7ZENw70Z1{zP@hi307fu>@m1grvc8GLcukehZ+V;Nb_N!=SYL4C_HztQ#J zZfp=`CY5=@YD`sLN>axI*%8D7Vbg>HsoV(^G_|7dm-V~8vlVOhwBx%t} z8-A)^i{WpF#S5Mnzf&G1P9-Smt&4F|s;ZMGNaHeL59=2>ioJ%AwJu0AR9ED^DFtXB z#~Ge}<6UtbcHk%pSIiLfV$d!V&$R0DjmIweA4Rw?_P8KyW|uqGZ*%Y*U>H@BD$JHv z$n)%6_-#zzXNcuAU}TktT)6DGjBDf8>qz?%Dz!d`_#wp zB>pVhz^X6Au+lN^K5^+H_qI>A=ur(f4M#X_7oBWY(n+xSeaLopR&2s)>`nTZ%AoVC zEgPM>pYXt4$X>skg0-?(Rx%G4NQ8p{e`a{k<>Bhm!SCh0z+@w4950GU;)%1XwIWvX zxFO73y-A^?k9hGUZIy#i)eLE-{iFGKc_@P;#W9R8S|OPm*LocI?u+)TGfDd}KFtbY zn)i?(O~tom!q2>;DhV5XKbHoT{I(q)gW9`#NP+?937^uo}j0u7n@=Y;^1( zg@2Kjt{*<@`66bW1$(DB`AtQvMdiyZH#?{dT%SE-U*82MgFzC-LF$L27Pg9~-7yxw zoh#rT3UIPy!(YncGR=8{i+{{`)%`O{BVnHCzT#c2ee1)&B;&D(=DKvjx}(=XkS%a4 z`8X&3LM~;qN51aM*XBR%vVfuI@{z(< zp!aB%6eG7ZxLE<;GGX1eikVkYi_mURIC3t~9OE&;x@nzs9FpucUSpIuPZTfP8SgJp zEk>OBXw_6Yb2L3;<2eNCviQ&dJKwpn`8U(DOklm&HDaR{%^%+0!}o&Qgs#{Thkvsq zC$eeRL2-azMQm3uyj5>LoPGQafY8yC)59r8c=Y%%QdaA0v8YP>q1$QZ z5mKBRv(%az_hrSC*&{H2_94N=o6wpwGsN)QvHtT2@G0r^qTRZhEd5HjzUfUrlLwzu zDM9+k9R2t)mDo7H7zt!uwKKo(<)#oALZQq7PB4E+QT|pFS4T``St@hXZiD}{;Fn8W0jUcas!z@Z(X>k>ymSf z+aR*|GXjIntSwCX{%cs12m2}RrS06B}!F+!E9%BFDzca=)(MBi|CyS4h-AYD7c zWx*@yHxqTutL~~rQ-bc_QARC3Gm8Y9QW|iz zJNvA#728p_?n>T&lgJ&uS(c(6n^g2A&c7A})Zxxng+AOVAHYp2x{}~qUpxvESrgnJ zH+O{7Tw@im&Ec?`F_fx-dGvT2>}r@?G5a$TVO(?zoLps_>rfyopji zpfI${E~KdB&VC`pZj=x0_=f21j6;{5sOmQN{BnOw;=nyok!!I3oA887N#zflQnz^A zqY)+U1%g!WZ}|YY(ayh8(OW7iiO)1zu-vKirPNF0319so)S|_!2VMDxn=19g+Vy8z z_e6xfbz!4YLtN@&)u34Qp!gYjm^;i)w??$j#NEb022b6%*Uy6PUu^u^nD_z&W8(C7 zReWV77%KWMuXe^A_3~h3Mbs_-|QZeag4BiQkpB@q~H}u(Q^_ zP=rura>+}2=0)*3C`i!1 zD?h%-|3I@f&h*F#oqurTd5b#)RfBcg!AkBpeIPqN#&x%g@|o(2L>7T5>SX(lu-4&R z9@^E~)WTGI8)0xBtIOU$hB=Duo#4nErCO6_pd#_A-(GL1Gc)U~8(UvKK{?vcv*yjN zY&4ltA_=A-S(;2>n6k2;nv&;KNU%#6&$Y2^goV{>;gFDdlQt^YUf*8$iHYIj($;V{ z#f{>=vE?~%t~I=9F`e67E>6WjO}l%ECEvrF7n{b0?`O z`OW?GweYaR+K&3z<09S7b~TCmA#%D$T8o={?)^EaBDmi8vG=9!e0Zbi>bpe;!OFJM zV7@z?Qy-smt}~&jZAyKQeLp{0#xwklp)Y;%R*w0igc z)N^&WI>mjQ>+FC0+0%Pv&;8_FL~ig27?Qu|U-JsExQSb=Iya+CLwnw*a7UnQFq{l5 z)@P8^_P$c z81&F-uyzsNUV4vjs^3q%s=6#j#&2}!2zjaheru48MrCh=__(5F41g-7{=s3VV%B~~ z!K0(9hz%CL&gvw=;S%Gu6#B}9j4qRGCg0<)r|ICY!P^j;L*7zUB}!33tjfUx7{86} z4m0<3JSTOvbH;3mrEo)jIKp7u5XS&?VsP{Q@D36IhOsfAfP)DXUe6eOcY|o7+}Qc4 z$>udP?R~QgWsKCQSvB%Rr2Yo7KQ((x?4M+BYf-*?DGNKN+YMxWh5lEfm=Fvcxm$Eg zl4OSrB``uo83r0!2@Zl1wSPQ{0N77D5R9`U9|(dJUUGqQu#MyM;}RG~^jrc@h3F?l z9U*kd4q_L) z+FYn_*oS+oB@yE>FVb}hS~B#hq}+bruCck9tEaZ+IG9gftWqz(el#bchCTPJo3KXQ zs8j@D;lOR1xC#khMh5(#3svV4YbH1<6^OpDzU?_YQbk&0g-l-;P=>ii@^mQ-Me&=w zEtMr*_x6b?G%yXCd@nCsu~W0PZ{we$^`Uku9$)JD+c8raLGg(QKVf>S*Z${)l%ee} z*X1Y2aDGOGuL%!$O>sw6HwNPzt|oRklrlSDiy2bQZ_S~E=~fdsQ@EfXcgu=uPvzVy&4&y3h4F@ z4U09P(7h1gfQ6>?1&gsJ+W6q0w&(k<57ONn^I*0<-92g-*&D}h)H!OeC&PLR$1kR0=xWzQ0`ktoYd`>72or@0$@L zdfJLhj?tWi8PmJBxV$D($$R~I`66-@MGv1~m~QEXI1nHPT5Jz929lkR`PMW6@-wU# zOWx9&@c5fn`N4y|{EHMCiRm`r{5Np~wlBZcgC@6?8fN|D4HO&^#5r*_UJQ*f2S`vQ z=ADmVgPUm-GI)pveDKp5R}Qc!T#)1raG*f(9Q$fWKTyDHrkV*boH};n$~&Rty@I$l zQ1weRQz^R%DN(ZHV?OLPN#|bEpUd_V)5`3LlIV4kj_~t_S&PvQ<0n^audp6!m>%md zkE6p-s{XZ@F@Ceyw}GwUw~!{txrC6&Utt|`-U|y|CCa>xFs@bT^2+`hF{G~S%4jgA zY0`f{THW0e^ZEy zd)Qw=U1^}bB=S2E>~-Bp3K5-*VB zryt`LtL_?ukw55M7FPS;k-|*Gb6Q zq1mtorG}jP6KC4X`0JGEm#J4f;aOAero8&W1)%bfnQIY74%h!p2TMJB*T>uYEEN`H zcqu%duK1n?N5@1ZY|7%DK)&=Z0@F^`%wVN~l9O$`1M3t^YwM3Yb!`D@S@4!D2+ve^ z4Z&`=jE6#7bfsua4NU@k6|b(P4fVCKB4KXh1o&g@x^}6O5t5ttX}*H^n0=0#hI(5k z)i+S6G)r(RdvjhwD3uq)C*-@iG9yqm)(0@r{RUuJA_hq2Da zKglWCCL`_PIJYhceo0-l`((1KK?-h`1E1Stn`QodoZ7MoJU)^WWuDh+xr2h~sy<#9 zLS)bmNK8Np&VpXuZ2Q4!MjjbMB*?h9ipWp_>Tw^;sG5 z0EHHgB00L7YG!I;jCoXWh=@|N7Abp)_`XenxI^)$IVsgHgb|W-}Flcw+dmq90qrzCv;5g&_?f8!yv+(8yyDxp<~L3e$ftm#LlLseb`-PCl%v*c8_3>QgHqv-s*k z)iXF&*QIBuK@AVP3cy1zWKlA4M!-D?23Jlx@GZ@=nwyfBMZTwfqrFCcsDl``W>HA^ zEf!B}f7N))8{ygkYHKxNi|)bVy`9E%790|9p4_uy6yc+`N-_d(Sdtz+q4oH=>Bu z{mjW;661~JNaViuK$9#w0l;|ZfIh9M0#lFWMy zFMxd8J%03%*`d^+yTj;+$$gZfD=`z?^)|`<`Xz?$oyrIa-|i|ZSSxclgLWH}>#DddxQtfsK|Ov7z>p2k1Wbrcu0=10{jxyCg05 z?N8|y$aA6dj8S86-Vder6R0o1T4haV>W=f_W$pq^i`9_{dwTph-ZLpcHcW*t7bUx? z)rS&=)~%#}#t4+J<2~qvP4LT49y~M>Ed_OPO)3qqqH&cBgi)8<<*l^9n?JIHmd{5% z`3yK%VX3sg%XzCcmeq~^?(z0z%MyB0Z$Vq%|WU`elcw3)TbBuTT7nkTsxd{1jMtlF9N6wBQIAn8ZbbB*B z!xcePaQ;=-j?+NP8=e*5$StKD9HI!0-`sym$w{%@*2n0p>?$SD$4<%x07dt2sywrWw+wz{>U+|joApIr|o%n z0X)~N8Qpe##k?u*SSLYn5Kx+49)-)0Bie2Ie&h$l#6q^RUW%|=rVyD*PN_NDxyYZg zeBRkf<7`gD6Qq2-*xG&%bhzREY4a)RijIlaIEG+-Plm7k3%FIIaPx{YuEdK#;VrSy~7MJ_) z=|5}y&*Qhi9uYeSGZ8xrHxUOb6VW#?o`{Y8pFHPxV*bCL*;rVKxH#E}IR3f9`E9}a zPn+!D5gu%u-|1U3@w>#$OvL@|?SHoL?US4B+k%sbjhX8|j_gdV-@WGew*A+67Pfz^ z*tnR8{>gDMv;M2rw}jz8R!l^!-x8Co+}uR0Y~T0vkDu>0IlomT|LNyHK3KUq{|Aig z-w6W#KQJy1Ze}*-|H5&xe(#pEd`nUOyU^Zw4y>-~@=D*T!0{B&T>#(%%Nz>}>xZF7 z)LwLA8c`up416Zce&qtuFCR3^ZMLGR34QaD6#iPMMXLtM11g>+e4S?-^h}C^#UJYZ4(7OTELjKMyI1`D;pu zhHDz?y4r&7tbD*WqY6L;v%k>e8M*y$C+!S%On`quj;5xjD13hUph79|2YGI z;~#fH4D|35;q{o=(SV01M3UC zK!={jWMo0-2XKMS-GIF6@Rzw>26gt*jWk%+f`Wz2*lMrAC>f4?vajiLx_>TkAIx;m zAnr}z1Z;;NPupd>vb_f4&VBjN&E|zeTzm^49dVinIPL_A$xy7sBLj!P1dOvXrf<6$ z$zMcR1T=3$L6AI@ViB<+S_JrzZPATgaSDW3={c-@vTT(uUQCo zIRawx6D5c@2;0s-PZz!kL#Xst4zBJI)P|o|4Yz@i0>Cz@3lP@?mP-$=@N`O|68Yg1 zkOb`_2|(O~5mEyp=v@<_V}UaLje0>+qZYV~5UKhM>h%+&8Dd3mTs?$={E>p*!IY9j zg?OMuw!DdvHNYhTl=UoFfNORgBS-e!(+rt`o$pv2Ye^Dj(FTh9P>YPdQ8!+=BmhCQ z6J<@(*pSFFT9tC6>AtS#^}Y8DQ6I1ss=|U>H2=&->#xR7i`<>~2E;ddUf+y;zVa7^ z*MumA=R^iUY6Qj~^OmCz{69aF1t1<;aY^275+0*B9^3XJaesaOD4>{O-0US3TY!3Dq^ zcc~Od;WNG`sFaF8t@#+iUV>9-l`11&e*=2N34_7FOMiSy^MWJ^Zcn2G0cT zUPItV4om9kV(mKi8R9khyKS8T^VpMY8G-2u@XreBh=hJ3c&Kd3~zp_c=#Bx1R3yT$g9 z2tqX-QJ11<3$)*1$#QFQ+mOQw7{scDszs)`DsNJ9AD-oR?I~hV&*DG|nM(EG`RfdI{V_D)<>B<_2>2u7qR3C>6q_ zMj$IP>I2Dyj4Ig=y_Xnb9uFi1x`I#1w3=Wu_TruoAk$zrubH#=XmFp{{1GVWBtBe? zo2LB7(SO|d(7&>`{xZ*R`LZd;zVmmv)Mfrh{3EOKyt+sW@hsC8(}E!@S{8_Gb*We>t=uYkx09~Zr2jUCZ#ogUuW3IP7Gkm? z3tK<@z%=d!r^pTv&mD9{aO_ z!YyT`fEM`m_ynQ^u*c^_?bDTYT!Yv z@7!*Z$fO|zX&5Q$7pYHWAQ=;Nj#TtI8pVdI!=7Hj7ke}l{p)uz6X4HoMH!rz$dX=J z-JLwQd;%2jA^#|Y=c$o4$rO}p8~6WM(!<-<#QJH>Q-0fephF36?pk9w@pOVADWer51ZJC z6-mV6F?ZD&YJSeiT8n!{kyFoSBVnRrRKYI%Rs;U@lMW?=fm@iII*g_G-qxkonnFj% z=CijG7Fh%mDOwCEz3q3+E){Rjv~-ZeUCDM{$AZ;yiAn zCeGthv6QZAGLd0ycVSFjl#%^S&l~0ry;pm<%`;rwP^vYPsV$*ZAt2Br)FKplK3PHM z_&VMrdwjz@$X;4=-<`ASc4la9l4+jmslM2bP_`Bl%QM_CGgFz}Z#kptT*2zPUZ%u# zxIW|JY?;A29W(Qr;=#QyJBlo4eaREe;rb|7B$!@~Aeu$W9nB+2phF-9zRzi!<}!MLC7ttA`;M z^H7(0@mFHpd|GePHSDCE{QLq}qbGsI;<>!DCG~Li4xlN;cwpsEhP9T4I>m~;)sR&7 z=%C`6gL$S-j?t0v?*Ir?gW$VUJSS8IRtwEYRGcjS>g)`&5+B{thM5s%9HJ+)8rSn6 z-KYKfM%-tb4A+^Z z;5EIjt)eZMV>e}6`)?hHUNZk=k-2DipXND3n$hTX4>F9u4()&F4CeO}DIgbvTX7!u zEBaM}-wnlOX)5-_NbI$h3*zk~C|@{57Q9D_C7oUouS-NkzS7I{SP^2a?tn`&C(P3j zu=a?`U}k~%cjN`-14joInOm-pC$#|dD9P6|Vp#!prlYmVZ zxV~mpp7OLiH;xjgUn2}i(bA{Lt)hke0aRuPY}1;%xvkseRGY=# zg*g)*O=#sD@r+7#^Q}n5A$+jxs6naL|9;(%apCih>YTE1wM~nyuF2Y8D*I8CwHwnrj1J&1JRsAHZvoOZkr=9v=<|9%t%r7QhzEN@N#FW=AC5e+RJ zt74X`cdnWh-QFFs89i3>$4zhxi!R8>;W=Yz{pW}?4CgP<`$cc+hi%ofXE4hIZiA>Q!R(3Kxj{aG-d| zDkX+iz$P*-`@zN@$i#();YfR7|9Zj*uWR-yk-bF@D$UfOBTF*vw_uJA`JMdTW)of6aTEv{RmET*6^e;+~q05kub$Zl2!nXLBLSEt>0T! z<5Jasz`iVP!KKJZ+Z!K%r1!oZG%n+)Xn;oax2hG7ZtvS%%gaNyijVg-eK=rUHQX95 zezki7Ju0~YB_rLQ62yBR0sCOsTAQ2mZ0P4^rzhS1GObkSaCdvPR@e>LKdil=#OX&T z@?#$%O5IB(B#!GpV>iaI`GNieh1Ce-lw=9CJSm+RArK@2b&+2dHM|XGA0=<(+&SC$pxNs=X6C3|81057m*!tmSx zg1b72Fk3mz&5&lfzw;sVvTFli(g2CHd8AeP_enEZk@r28QqmquJaW8(hAXNkdI0W# z(ZFhmzw$J}05OZ7*mKe~IKX+pBE%o*Q~r6$4DN@IaCrOLR`+f=WPnPIgXYLuo_W2BlZCh8L1t zw~U?@?GbZL1wbCa7#OTsvl$WzfvngD4fzH!BXgi$l5dGjUs2eQ*pO|BwIw@1w(tCC z%gL;z+!Aj~aexJwTe+he;RD16;6u)!t)W*F&)ApVwFlTkZ|GY%tRAfqFi zqSXDtmFGZUL}0{VgkZ#Bgi(;elsqT;lWdAemQt29TP9sPUHGSi7@`Ol)G-nT>UoHl zFuoB=L8_uix@7u~a*7yARpb$58ff&8`Vc2!?1Ep4!c-J7#A{GuA@dCT>!dwqX;`|{=%-_nwbI3tZfnfAP zB8sT0Awah$3g1dCN%=&QV2MOXSiq)CoCWc7GS86?+27nlqyU2u!asKFPq#5^N-PA# zJ`&HV*YE%)fc1bNG=GRlNG+5BN)M5b{Bz9fTR81N%rh@QMFY%^=@A-Wq9F>rPdXr^ zAqu@Oq9IDX4?6G`@}!Gw7U8akd>G*lWsmwPUb9QmB{lOYJ%fPsDeMuP{gJWJE$r$?<+WmfALm9yWj0Qw%9QfSVgKMstBOsxk zItMC|pV|i~i8wk3C`m3|LluO#4q#e|FH32%$|uBsJ7KD|Cr@1FXW~-1^DgTuw8i3V zNw*E7>j$@GWbTS;O0*@0xqvQrmI|@id7Y4uAnIV5_@X@^FVUAH772vB5}ksUKNanScVIv0FL^6oiEoE=AU`lHceQOQtrqQs zdC{KImva{FgmfT0h%ezP)*_~3wqiW+EU}lb7REs%`Ju%1n?|60H7bwjP+( zAoVn3AY*g{y{uUMYse%0zwyqL#57QHfF(~whK3Y|d(v#w@m@njm zt!`>>K~Lx_+G%pRx1xKIK%f^T!!UmNwhg$LR|ajm{Ywd2HhIW!NHp|C2#PTCun}Cp zI2B0@MNA=eIO+gY0bZx55}IyH+q6O?NY55@9ikdq;D{4+L7ciTN^I5t{@Nw{h$!pN zf^q~S?f(%${ui1#yQ@Mmve7;|J)yM z%LoUswXL;`EcJ17c#GK3ud{FUS{vJaF?9OJ6?*!|NP2quNga+BzU@NIorV^%wJ7TT z6_kAX-a6yJk3n#_M<~0wf=l!*2BqZ&oxh+E2C%*yoSi_R4gwJSd}q3EUG&Zu&Q*=) zjjdA+oH`a8Syi!**ejNgCpx9;v(57L3fnBhJBF@nv;D8h)emlOsb1bW%_{OpXVl+p=aTj?tUHGoBQmx^!EsW|v~9V${D1 zdsfSffS@G;E)ytY8L!!fDHb#;MA!aosdcQMSzo zuUSHD`#SI#`aLVq%MQYx-iQ9wT8cAZ!O~B>i1rvS3=L3Qm?7#K5sq_fUl@4AzW56Y zJJ+Siaku5tK9S+`UORzpaobCL{&fES)e2;1+Ea;%uodCMx+kEA#ctB^eg@&^4t!sY zd#wYNcF@af2zwqtxrglm{Ry(mjLtS7?SUf@$OzEkihHUigte%rdpP%7Jg_OgLW}~> zM}G=R`eLlSG`VDaC2;Z&92MCEn&g=hlTJm&aF6IbHr3hf=(+6CNU@q5w{;zu{K-G% zwKW<-lIwna*X4k&FiDT_w!PfiY=4jsXcES48br~AncPG98!&0e{SK_&uU3asGond1 zde81aUln*}NUw{?*%w>)En<=dz-U9a3x06mwdv#9fp+f~SHPa#4gIo9l<^$rcM#+Q zizm7Yo&Ls;0wEfI8pV8)USn;H3BO3pH$Y#CpMCs1Y;Loq{d5+_Y%|2Q8D%f(;#`II zHlpN<+X}60$9?QezDBD#(AQnJFvD&2xU;B{S`{1+ct+)eoE`iQ%DIe51E8-V)(%+r zmj6-sz{vH+|Ae#O)8_^959I{Vd*JSbBJO=JUI-6rcwMAlcI!rXHIIE2s^jZcb^)+e z>%r_51k0WZ3?I6W4nUg(Aqp9ScxE7YkesE@r`sg8b6%eKFQ+%l>vW?TwWI1&cj#XE z{wdMAB+{(!X90h^IElji!~Mq+3;rsdMEaY6YD zuD6VdHXyPa?z7FueM6XBsB#rEBb=rd-8z^y`n?AIw4nSc_$hDjTi$nI>Vxj?BbU?L zrF#|})BF+d@A!my|4n~G7&wfdpL7X#KMkB*%b(+;qZo{^Bdsauz#yB+NkR_oscGi_ z13y5(zY1cQ*D_k*c_zO5(?qeXH&Iv`KcX&M!P1f<<3j!Y#WSnOmz-a0wl5x+@Tw{*Io6OIr9v8Fk-g6<0*`&G2t0m{RC;`m@OhzQbfR*pRxKz3 z{O?RLh9_pnj;hiIX%&I`$e36^|KLE?kcy_>^)X42Dph1sOl%VOmDD=|BNFL8CjC(n z1_@w+_xUrj-LVT2-Jfq)2eI+yZujRl15YJswB{%+M6kdJi&|w)P=RD-!DdSmzIED+ zHZ^Eih^Wyd8xqZCqe>eA=7cD}5W}PpyTT4pMMXs+k%a}pxxpCN8Plib#`NT{=*;QU zqW0(Hc3ib@AB);IeHtaRvhfL8KX?o~c;Z*|m6e^A){qntL2H&Q#`^_|<^+o+e;A{M ziu5y!@luyY8Bvg(TWHis=k&!)lC*}QY1X_jrG~9l>dd2aN0cWAE1zYLvzD2OL&6ne zfKJP#-oQ|`q>LP5mM#qrSBs)LBJ{c5cW^JR0g&=>R~jJ=3gAv}Y~zG=J7Uz~;cDPN zb4<77@}I(~W0LX3ds}w4Ut+gs-ii3KZVS-W=0)*b9)GR3`PJk6VtUewdeU%mJ%uc@ zCnvK9_h(2U?oWN!FY}U>-GEzKGRG%ah6d;5=f-0|!ucLIiY(76n9ad_gNL3-O8J(O zX|2mA^gS4#krB_z7T^b;9v_+|Pc@5wYsMBZnNPJh z?LbcvzhJx$;0Dw0`GxHE0vv+{fUtDIo@nJ|xx>Ya9}zF{hb}+V7v&_Q3q1@hK+%s5Jh2?R=N*#a$KxanM3h zdKyB74S2%F2sc6iM0w^I)y=H7fWTV4JUu=CzLHIGVka;<iIV{XA8HxdN)C7+)fS5~+PmYU>YB*&94P7Y}_k1xXd1Y{a8q-M3F2i5~`*NLAlDbyPCHOV+rL ziOm?VWO&>YFTc=k;U-&z)&NF>UrG?G_KVVJ*?2#l8k0}3b0E}xcqgVqL!*`5?t|N~ zhEkqvARDjjW>dG@5^AI6qZDW8nO_=@*}mXHIbgQn-_C9SJ{lM&@8_mBk~l#%#;Ad< z2?$gxYMolC#EW!uda^ot2*#?? z{iU=)kqAZbHpOaNY-~^z|FsQB4%#iOhdjRh`N*Si;D~JS>c*XXM9PBGu}MxUk+{V@6DD zn5yBS?{44N+ciBh-V&Qj`&C+wPaK)B-}{-ENxLPT__|Ou@^Ff+8jiA9tg+UJo@>4+?4;;%p(37L1#{({UTfi@M z<-PsoO*?S^(uYAf+Pq0=xz?CtNzOHB`(ial+QV97t|d9gs9~RK4Y|pd9HTZ-rBf-D zcoEdSN4;t3Id7^jiM{Jhd(O;yjA;y1dSe=nM}Qgvf+mH_6AI@)hd6O;vCRGaw6~q9 zeDHDBKM8&-*5eVS>fIWj;Tg1+-HHaqg~n991`n%Sy~8^GwK_VL)?0ZIo=+p;EgG9m zt`~NXUu{5&Lct)W%G-8`Sw3k64Sd zOd1v?u>@^gYC%$$J~{ftwvY_RKh*}SLP*52E4I!g72eBQD1(Q;F972LD!uZq_Yl23{QyR50sGD^> zj-NvO1h?`Znt8VhzY7ZS5wtK3kD$U6_f%#u1>5!OU3w!-fwfqY%@Gmdr-nDwC`RU5 z25eWUTc=M7Z8hhnr9}5V6+1jqkR-Lb?-)_Bc6{$t6@84Tq-!!WwZ@^zeaUHcbu}sFx0MMqR6319p}~7l zfExD~ifBa=gyJW0|C3ml@GL%w8S$!u=o8Nq=i-f5XO5!x8-C*{CZFa>>?|7$IP<+P zn(~9#fCo}Vbj@8~tiSc6TdI(^ulwlM>b^s!>J^PmSJcFt#;<7N6mGe@uWQ=GdyZ|n z^SE>RgnNG2Ip=}JV=9;3JN5jBmK0T7dLMt<#9bufy&MavaGB?6BT>0qSO>vi5T3CG zKyZ>gNPJgI+on`%%-#L3aV%}SExcCaeE^U@u=sBAJcIp5?YvVqpZugC4SWO@ORsox z#e6NDv6}1@mTj$Dw75RKuP3X#I<@uUB71(UxN^aR3rF@f_uoI)W@h?Dj-Ii6W~pO{ zy00=}l-)D$YMggIl)?s@_ja8%I90VS`;k*7N}L{9l8$Ry|&qGk)ynZ zQTY0`(-vPF={-rj(~)1~4)n+$3a1tepR`HbJL_``z&wE{DWJCnZvm{$A z-dI)p8;9o%$uVf8Md2ygwjqYLVx%`B zBfD9KO*2NXNUe*IH&F!tf%=I%gr6p9!SuMV>)~5Ye)jK?Uj(hcjn@AON! zr6+&#i=tAzcKj8OHJ2=OjEGT7__!aKThmgR^*_XY33OanndVz-dGFP}@B6ELFR4^o zOOjnxDphIq5-+m6NM7T3i5)wRNu0&bLK8w3nk|QPfPwBg-C#?$6&ylC+%Q1W%>;J> z83=I5Ihinc4ulLeiB+C^->WK>);JD5bLt$`({ribz5n|C-~I1(smUBGZ{NMW99w<# ziyI=FhdR|l62pZOnvUf*r-Rv`COSB=dtxAlKCtt1yHZ-4(-@D~0!G^5bLaw{Y^W<9 zjCHI%GCcN)v4~V}RZDd)qs3=r%np;rld=T)f4?0_odK1@Z-G@ik*YHby_Nt_+%8^yCpd83w3N(S9WM;KbN&Xvl znN)gv)2-kaLFIKZX2>L?X)}fY2K^l&u{DQd8Jm=N7yAuPnN!hFT#Ji)^%faP$}D<3 zb?qgcMb2Fld)$64iPIX@H3$Avm0m&;621DG5C6GbCm~5~&;$7n3h2F)2m!tOkpA;} zotI%HGuZG&9SiQzEP#>Eo<~@ZB>?Jlc~%So`=oL=xf|i}I*hQKs35-J?0|cZ#pl>T z??QHv;Mu_-Ez-87<1J>I=&tPSAR#*#j;n+;Iv`NEJCpu4zfl1oI`%2lGw$+fNL(nB zyf7mLAWfhRIPuTQRTO~|LPo)SrBZ@B|&Q<2aFSG9d%D%$GT zqP^`~3JHhQ&Ek)(IL_5xe}F#ucq>w;DV? zIMIeQAsy%Ri1!lq6NEvuXw$Uaf?Awhx8hE0231~&#=GKJJT!yuJ1g9Sbj@BFyUP8j z@_3#Fb?kCf9R6{!C0W?ibo5(i3gw6Y`ABdy-(qG+kwnC}TSqdTTf3dUzRAv1xy8!} zDFOavf6QXm%T_(~(t{7Z@@St_XEnzY7FJK0?56m}yGtAIAN3k629eeeEfQ#vXa!n? z44fBoZ(riLw|gM>hQ`iGCrmSF{4B|9sS55^WX@WLpZfR5DsONaXnpkMM~js|ISNO% z?%Q|E;WCRkpZf8AZM?9v_kQ=`-0=;G*=?cqr+{3cJ%zzYLP!V4ncZ_yY**P;h}bxT zN-xM+lw-9}#W~3Y@D9aULEUl=fq}hTC&ZQ^vNQ{ctS}Ve?04Znu~u+N5a37UUUaoc zDkccAR8+Zy-Vbh(+e{uE#gAF6Wz8nHj(QVZn2a8+xKa`8JTMxb0ojNGqjw?6vn0S$ zPhT|Cv@3i>%S$eL_4S$`=+c`$Qj*r z4)Muez@7nS+=VI;5^zEl?x}ZtL$qxgil-cRX+FFqDvXc=qL1JzME z1{A|xK?QjrDe^FP1+b`%;th*JKU3LVyT;$DbEs#~6SeE=*wIJ8*fogiIpC>JPgC+q zZj>klf1X9-K_6jk{G%ln_-NbQ1M#=T5@-<;@%8J79(?Z;7*6rNkv^^8BSpG*~HZ-6yMy? zyyy}_t1ek_aU4ahIR0~xWPSdWT_LQzv`BV7q0$y(eJPuS5X&W%-=b*|BgDCR;Uc*N z{c}YKZJnFr{}l69FRql(bBs(xkic6+a*at@dA4Gf>6KE1k22W1V4ixo4^{;&K`5C7 zrk*)VaPfqvmORiO7m-KUyDH^u#p>sWXJgRS;JL(PIs$^j3pEK}yG-eQRf&=>f{4(9 z6sAL(uopPphOF2riBjRPGX+xp?yLfFHcf=JG;U!hEW71Zk#fL>GeTvi12X-3$N@nh z)JWA-^Zim(4G(ny*3pnJ(x#Y)jhQK|@)M$^%N{n%@XBi#xXZFp6BS{<9?nK=%+H8l zN^HSa?>~A&b4C-t_CtjXfIt!6eC^-rchC7la%aXjdl}0FT3xb`zvgG%K&ov>0`V0jD!;5J%V_D{V%t_=lceJBDH?DmDb1uAo7@)*oU6==XA;>bB+zd20o0 zoQzmyQCBt(K3;GQjSRWbf37WKfZVtiM9tG6YC=xL3)zsba%{-gAR7Wgq->`P+6}p9 zQEps;%OFnYANkS!_x{V7)qUXoZ`+l{&6kOh3R1n`|tr@r!MTzH<9s&MT7^Cc>g2U&-CGyWp+-As$f$ z4113z+kGnRjqy*8$0{#0N>w1FQ-gcgr3U~~uADbUx{&Ia-vVP!f;`mo90(}?Fddf% z6)CtSmTgnO@N5n$063g#Ytv=G;Lk&|;ho}YeaV>$$~kI_!eVt z4F^+pDPERZJW)@vHa&n>)=YkKcdM~!AY}-6obt7lsPY{J+unS~ZON{HMkS<3oS@{) zZ+sb6S-Gb^-EXmOXW>A9^TvFWoU(>HygxHrupgOYE_LO9t34^`AHCOqiwA%u^&`X2 zAwAg1^K6PuNiA?QFCvXyLSg3z1t6m;177;h8I(S6=_P|Z^$_Ur23BPi7t;V$ixb>% z&m=9V?xzP4R|GZa~uOUQ*Jjtglq9q1T} zIg)Orzjy71wLN~cZQx_;qjG~qDbv`rHlLm{J56$1$n1=J9lm6KM^Ev{P(Z3N$fY`` z!RSznbVi-r?A1DBtkavwZvzrB0bAG#Y{8D$&LRXz$x~VxA)i6hQ>F=uGuT?G4zW># zc3txd@vRE^b(rqFaf{@-NCGt?5#gAK0SV$eE#Ye~*0t2Z_o2WZ5-)iE1HiX^AYug( z_w$GyI|)2ni=8@8v6J#i)0|s(Ep+R-j8JvxhPdSGJofd&`~UG+2V+Ti;DU%XGZ=}K z(`MQlWBpMJjXrbq^S8DpcYf|(Y;P@J%zkxrP1R0CLND8TWI1h z*q;fBB^3%MR3bVkS5r7BW|jD7Tz)m|6_H*09WIfpXhNv*yZGu~Fb>Aoj;xzk_m9r6 z`=8D#K-oVTnB&3Zf&iPJGGkC<`)5C?(lYa=uaI4v#okw_X@a0t z@>v|arU0TK)d3Fsv%r@B2arV)S^GTV#=eLc5g!yp!jMcD!3&YXz2kbgO7v`2jxa3C zgeO@>Wt&trq9{W&8Px+7a$3xAYy3Pp*UXs_Ozo`Ar$~c%g4QBo#$7e*JLv+|}=Ol~~?o@Z{ zLGgboD`sqydVz9MP_xtMY=)a3Y)7km)Y4{JO*>{u<+4hmZcj(jHVOHC{M)3&k_?J%Y_83mRF9T7Zl1#*Z~*x2DJ{m+lUpSX)}nPl+qyU(Aumn!|X+P%1ZwF z+xQ*$YcNlu-YP#R)`2v+X&Ui*k=7Y(bylvxb?6^;sBVTyq1RGq3hw+7!;XejD$*U8 zLG@YFube3UG3O_oSk^h@-0H+-PMZ^B2&a><%v}FfR?5iGl0`2^%a->dMM%+S#o(8Y zH?vHcK=je-0!@%#C>|RdALAD4gOI2>1SE8sD_(hi?*H##xU~c5A}r8SwY3rIDA`m+ zU)4JV7pH}MCe*@$6~99r421oP^e5KkkFAe&+&O)0y~5ia>)KID%4r2Hpv<`~2io>L zwKeqTtsU!{4f(F74H27EE)>e8`L-TU->rp#+xy+kfv$ks>@-V_tj^}PxU4Gw+B2J9 zSGtpqmTYqhqMy63zfB}kD(}sZ6<~LA`CFb$OER1v2s)BFFBkO&Y_Z5NOq#>4Uwl-~RimwW*(cc|= z;Y>lnw)(emD=(CcCSA2<%8JSvvq=ZfOdZYa9f?bw&A!S{SLG8!JAo$`0E%+puOdyg zRjhMn#QOqvgzKoYA+~zgr5kSlR27MDq6H)lOp&J1ay-P%J_PrWX2s=dXI!w}T6Z~7 zZLRxAUo@}Bu7O@I*uLAC zEGJX_Ev!V$2#IePU3!_$VU%Y-HhtvGi>ErJdaG8ZcNtov!0i6^@xpCIkBzlaCT@KJ zNJ>bM$C0DR?NhtPhxfwJ5lasv<{4~!%Ih6hzkrP+B7j7XAmd07wPfi7`P6^3cKtCi zQCNEkOtysN(Oj0=SVqjavm}L!xaG5Wk%L4qSopwO_?% zI@K4^nmv>NS1aVs#Oioemc=GFkfn&gNs+dtZyhK3?A`}f%bik_zWMfZC(;{x0t)<= zY*+j4kMEfM1;v+w6gt~oY_jBfXTM&%Lp+S3hCs#^Y4ggoE>mmNXfyIUuzIYtG3YS5 zbTX|&&uQ!Qw~l8>(vS^x-@YzRQjAi;Ye;YvXy_Pn-_*K+>;|YI8=KW0c>%HI5y*Ffd7z`}&MCP-|QAuK(~y{^%Gjtb$r=Lz>at`M$EE zM3x||n9~weT2)hg2{D^^Q(jltQjE<<(9j&oBM_n}$mUmROVZ~{Ih2CRs|(a*V&~Mf zZO?~L6Dk&}9dWLOM=q_rhejC20m^oWR3zxD72iTt1+whTsOs7^bO)p4zKWJPRh6Gs z!fFezKk`-b5OOziavC|dXAqykHcuDQgHiz1$Fp>@BRL3O$JFe`8SF@wI#&Ag$ohAR zCky+bo+glSG&gm)oCNiqZA#ivFvAV*;d1W`YCbEx_W z_m>sW)#_pgV5BN@SS@KA)I`(@Cw6{PykXs<*9^MU?$7O-czitaT_{(z>cvQ#+OAg$ z1(b-O74}HRTDq;^oKUHu$aknb8IP;QtI@f|B&L?jov~H%1@Wvg`epZ?2Fk@R7`j8< zhel)3jSs9FpmaVy7Ar(^+X5I2pfS%%@PkR^$PZ_$I%iu^+1u$p8ndCu6+snw-MY_)C=D zZ?*dj^y(qe97&PkN2Nwr@2&YX5MSw(HzeQHA?@glrf%cwmWNScSmsqWZE4t z9e1>$zp3Tm+0?2+d}OkzW$$Q0=1lvb)QUiA7XT&=A}JKFcAu(Lju1QxDwqNVJdm3M z52-?!DfA)%@t;nDmJvcuV5K2qD}g;je1pIU!W^C9n@8b$)($=seUmNf|Aa{8QcQtM z#d-!Ui}m0G;=g3g)$AI)0x;jzD*PKev}G)Kb;}s!f__nLNzID?pFWxcO9Gc;S(gF7 zdz(2e3GoYl_w27tZDZX%6MZom2y8e;h$O8WkMtayI^NcK$KUTg_{E*EckxZ*v3%5k zq3=aPnXzuCN~aPk9R{sUE0gMViuM!V{MfN?pU(9heP)Y&@15?B(I|*QhU@QPp8;8< z9XV9hiB^u7V3LmoJQPR~Q_XoJJ2SUYz;+=UE7(i&Laq5G0WbZBvg3Xf+KuLn2ppTJN6162l_WXyeT4)Neo_Kr-tiq3yuMaOd`jT!_$F) z#vO&?$WWUPg7@iRjV7~m27qlES=E`L4jtS~koy2cp59hmm$O2F74wl+Fz2jzRUuV! z7v!ZQ056{fa2Z0MF0MR;Y$OETmy;aw0f9QhG4u_KVhtEtAoy@n(p8h^GMHO%EV%<< z?s8$3o2g=Mf)P?4e@Gi{cS)b*I4ROC4q+oaN>Vj`O zkJ0_B5{7spZEFS?eRXIlj82>p2t-0$>1p>btBUf*hc|@@p;*j_#UNU*4Cr}niC!R2 zAWGyJ9C7e zX@Xz)Nmk(zmt!-0>n&H~@~T~+2y_{RX+E?GIF_{rzUi%=qBX)9g}+)%rM5T4fTG^- z@N=3?hx8Hx6`A6`diAya?qta2V)c^a+8Q$9Kr z${85oixQbosduPb6X@#;6p0xIH$B9uT_m9wseMPykhaq@$khNtdPD$R8dtDw{I2^|)%$q43i?0< z-H%L6CzI((HH@1Zt=3)|3V44w4c;!4^SzTQgB(SNZ0*fAIAk)Vmue%$*{5QO*Iw@C|c|aEg$U? zH{fc7Y^msWBOJkBAt^Tw+iA z{VfiO#L?pSr|lBe3OS{8WKbLSS!p4O(Q<)IW7agq(DzuD_@O6hlSphyk2_(PO6`gF zQ!B}fPdXxsmTFzbn3oWcAOt$xyu<|koWxEd_e~ECg^xoyO^3!pw*v6+`~@lmUK#F0 z-jkcdFkgs!J2J-$RjEypWbYB^NjvhJ3%9_FzFgRzfoY?+kSx^%T2C90{2Hr?wMMY~ zE9X1PMohS(j8_mhu|yhKtOG&6#E}m8(+;WBk@g4D4xodo7gVM{z~)!8Rg*V3)ESsv zfQmFx@0^M(>gE{wKI^?<6)j`lQt6_+vxElu9Fzb#=m2uZ^r}_S&I05s*B}~fDBziTW(p?0=-}&Us%K8!7YV>QfDDh61W*_iRnXfwWYRd zPHKpDT&$Z-T&+jC`OQi^kEL0HCcB9oh2IWy2}%jocDTZ2lPDm66{Q*4lHgb};T%hb z(<0g(3TozOs>EVw-_aVVW-9U^j8ND9umo?#&{BZq7ZPM2K4n$2)SbY`K8-y69C8}F zaQ>6y<8AxeVLU1XgIW()b+z5v_QWNixd#v$W-RTU+BWbiP(WQ-1{o<89-zrb`tF8@ z!dj*e=l2zM!HZjR?a=~MATv_vDK(Wm1w|c7)Sa%Ys|qLP(iKqO{Die!Guln(q@`VP zE*&ng7tx(iqyD5rC8)esb4H+1%EN2)hEtMfen#T+V1`$?k$&Pm)hRt685!cgU)iyifPiz3#X&&lM{pY`sVc$~v;f~ZhTN7F zkB-KoHXF^$WNS3qz8{vm=Z=lX;Ei0i{V*q_>7DuU!X|i8%Eesm9L~-anoA85oAZsj z*m#@`gr`t`C;{HI3kX{3(2EjN?!She0|?$&+5Qy`d5N->)6VZeT?rYa@w3??LnL%5PVk`pKE|h zE<=qdYhB+fO1Y6;tI7ETNq+Pn7O`oiJBGdeE~yBIJtF!a^3< z|6|C)7@6ejzKa9{8{<|90 zYfx|tHYXbOv9i}>p{{waY0!1Pyy@Z1VTn|#_gTy?6$pd^xu-p_c6k#(aKu!30)H0& z3DSuSAmb=f-7_{QiwW@-S20<9v54D>Xz|xSU{F1SGCvqat)r-Z6dnD;%NkUtK@p8y zgUK|Stu6Rp+6w`DsOR|}4Cz68UT!JMHlcES(@Rev{lY`G>8*e?Qp z9eb5~8AAie#ekwDpjniQJKw}$#WEgt1io^A1{+6rA$Mw?17 zxj8csQ{!7>5q*ba;Lg#YMk;Y@EgnpSdwRFD_Z(f9aA!9(Ioi_+oo+DNw$1C>-Zya9 z+At*!RsK>KG6XZWRfGCKdUjnnhLI|l-6~I{bZnGcfdiKC68;*}0dT?ixrB9S2HP@? zNTo;)E+a{N7G$KQFVQ(6 zRl!47YmBZUnQZJ$@+{RI+ca-i3Z|?3=;Ecd?mPeK*0w#PO$xYdBVdF~s4&^vx~AFW z>F!?bt*y5C^0|VaHu!8dzn)sO-WELgg{>i4sg}rfHjS0l2$edeHZh!8=S3UEqjL(GG{Y)QqJ7~OT9uKL=wop+Ad=nps7Sa4e-L0 zHDGAmbzRPiv*DsU$B}T{u@BrvYrcOP9&B(!AD?$R#3E{^-W*hAf$&#q@k`~N$HLU! zt6a?r^jTvcP>m78kbT$RLq9|HH4nP zzgOF_vvr!{p1b^xokdTYd%(%sRaNtsfaX)kw%Y9DVBSuI0Y9~f3%f8C4r{rpN}5Hq zPMY-fnRAM|F1bpH+g=;x8b4t8qnG+r;~xAKH}CNE4SuOw-++qswS`lG1&JdL3KAOw z3X=EH2q;LF>d%-{`O-y1O|D@68l{Brp3hS996uK{3f0WD z367bjYIm1Z{0b_V8(4J=0x^hLL&)D(cbfxP4xpgbn&69?PA}}_OCa(FuR7OXY&^@Z z4Aj$I?97E*`obkcqu}Qb&&Y85U*WNKz<58(cYT2GF6YdeYAbFH&x;u|+(2ZZ6|N>v zcylvHv;r|HoXzgY=DLehM!(G-&{4%FhSImJPAGik{(kR<6aDtOI59~%D=Q5|Ytn>#ra9RsNw4a!{>)1Drv~--fc-Qf7+}=`|)g(r{T0qj& z#@BRa)-)T@w+}!6P_gIknL7@D>%n68-81+092klC2M^@Id)Pm40QSpPo+1!1$HsnH zM>AFHmp#2wzigj;@T2z2e!y2W_RB7z{^j~*0a)1L?e1u|*X_XI=hkJt{R5*>ZY};j zg}*mph{Jx_t*Q9xkOsYa?AvDwGF!w}*<3%(=Z%_C?D2K@)#Wqij%D_Y#AR?*_VwPr z#PCkuW3WqHANg%nk6~qS>4-s$GRj6pDO`e6uqTFwr<9?iSt=VWvNDamPgCNLl;BK$ z9QN8=uKEh~fgfMc8nTpgaMBTAmk5!B5~&SVr6v#tF0#NiT%9c~W{K6Vr%3|C``r;E z1?vxYduaB`V&^!J=w@ZOP)soz{;a&Wv3J1S`;fQh&SZ(y&tzGZ1=*8`ppiGzAc;_K zDbiUO0n+v~hI1#W$lB3%c=D5pJ5A+Hc#=vHl>HL4glJJYp?#-j5%3H({y3E}ZhO}F zSWpg^TE^xb)snUU=RWKvPH@5A$?Se~G_k1=(=vpJ5!1o!+Gb~ySMBL250^VUi7gMV z2@GaKDiML>LPjKJGyO4V!Y*e!2ZskcS=3rOGT@cz^qO$U;?f8WR-@GDGg^aovon<4 z*p=N^3NT8IOs28vOir~>qt{4{F10OSH#-1iaQK z-(>d|*T0i1pO(SSZA0N<$OniyK7b<>lXH9M;Aur1S9awx@aJe0Zn)vQE8l?8p}O@$ zuzd3*wosy)6*bV%BD)M3Kz;!eh-A(Pq+ar_JxUEp%f!1?-i*i9>em{~Vq8Q+HIy3F zu(A@H*4~;{J7ora)1J@o-0}F<=u&im*{P98>x4k9xpGxlt6`h%;TB`czl8oeR@Q#+ zo`Q!Y)V*x>wvk9j`4sP^7ZnOgn;&t(Rk?FINw^l4!;n^U z6)ck44GGk*t`LWfvsoeU9UwBl%q6wtY~pf4Q9G?Nd=uZnXZgjQvn#0&dz4nXs$9-K zz7uxuLKX(L%|fx$0`f#Cs#+lQ75EcQR-~Ogj!Yn1&X06=Cnl1dcMO=>1d?!q*Z-yM zOW@n6?)>My(Tp@Rx<<$77~S_R`H*b+h-Ev8oy2w=J0Z^DK*&KL39;i40%^Ds{!q#h zV1ZJO@NY|(qd;OiL4g#qOQ74*^nwrFwtv`mxBXLS!?vZAEyVJ^Hs)3%3o$J=oHGv*Hv4PGyG_hly-LWoPr-1kT>5fud3`fRrObk)# zR4E>Ix|H#P_r7LEvPv%TdBVh>oM>Do7g^`;-rX1I2;zJd1w+fdVZj!%s4CK5q{w?= zdj)~cPfhaVbk3g?mr^n3$q=jzLVg0diOES!U)g-B2L`}n$c&Ohzm#z=(Os58JlaaM zj3!&PI6@h9IHDSyc0s12t)1E^W~|0^rvcG>%-_vOvEP={XK)S2Vu~s*DTp@}D=Ytf zcH;du;+~2O?x-LTtMi?gKt9je$L1zO+<1O66uC?x`!#~%K{Q(&j~$Kn3}8+ZRBb97 zdJ*a9a-yfRwX?IcaX^jdKAP^-VHxswubmFRN=$aTS_-_L!ZGiu1A-7pw=JWH zE$c4dd->iiIGs1g-rRA0=dRB7pmiN4{FcrRO!$FrvN2_rdTM&ydP|5@@aryR&RbchLxmK0c`9F0{ z!QB{k{GCA!Lw;H=aixgFgeejEjFL0G=+nC+MNFYzkT7Muh$&Q68B^#dkd$3UTr%pM zw^Z6>x~%##^<{&Dm#Imy2eU<672aza^^5&@t4Y1FyR&OvXH{o?eRL@y;>GuO%CNW@ zOV&+M*rctdVj_yP-qJ!RrM;%lzg(XIufR7e5qoMcv3dz| znx#Di^=J6$xlVMoprBYN1rXq}iU^`G`4KsWW*wWVH>h z)N5#73y6D5_M^?NOT=Q@NFKK5IJ>r9t(B1t4cewWjOJ81Qro(yW|20g z{C3(EsGwBETk17h@gAjhH&<27^J-=HLU@ZzG$o;fVJ7~4nyz*?$Ks4^7L!$}#AL-p*02-XxYC`B z(O{WzA?~>iQ8`K6Sfug^!UJz1%!Cg{a!kUEUWAEL=HdFB)MSMexFq5Aa%>(R&8dkT z7hdG$c>5xLI+RwyU6`z9?VI?RjYQvaNm6m{-^21GlmahgiJo=(idqO2l@M-J09!*U zUgy-v9(`1%w%0~twPwJ4_!|aTvsJOQLrooem{eNh!C2Y^xVLH%>ro`5VnK7^C050d zlv?1yv*1CUMNN~miY>eWA_^t4VbvBxVXIWhvv3bNj%ec}Hk2w;1BOu(RxuCT#(h@S zYRs%9ITyH(-SxD!4p@``Lk%|ZnB6Vb^1Uc2m-YgbrZ0M zDH!B%|2C$#JCrg*`IRaoRXD9OXOq!tn_B);@+7T~)kU+SqHxM0$hBr3L}`nfto8bh z3Q}n?O*{$LTX}^-@LSN#77zq{k3304iAPFNcUI#ET8#$qG_rz^A9EVe3)fNj&M4#O z%T5(9nJJb|%~Zn)`)H0@QjDG<7d$$nxB`Keecd2h>Zy5`PU6j7kmGN5)H^d zdk?FMiQ}T3WHd_9Q#hqJQAn3Sx7whR%M=JZ$H7I6mZLARhs}&HoY2`t8xLivG_D&% zYi-6VgVScyPE05SeAXxfPryx-gNP&lb>67l*pLTLj}tgoBoCg-=`}8wXuLIo7;HE${?sczfPV_XmLrZrrO|#~R zE7Tl!vN}QAzBSi*NvrdPw5Q6c3VB>r7WgZ*N~sBWLMG(fS1pViMWfTm>Wq9UVzM{{ zOS-TARt0a-*&Q|;n%!zNyXR>o5g_V_+ELb-dLA6c0cUVL#}j%dtByVG8MbU!kJKEd zc9k5<;!e|<@|>m)!pv5CMZT4n!so_fbQC`C&aG{9)TUyBFNHfxu|mLR_X~>1ikjBd zS<9=8$#1jO_#A16=r?oZ2c08*F;4KB(+K1hoSf9sEKQQ=ufp42cdBpeLPsWQaYgU* z`7DuI#L@NeJJ3qmh!l|?H52|kIF?hh#)p4|>vn!qGfX~;biqlyuN(_KF5e~XE6=Fd zDODq#T8xT-8eLowX;RQiQVP#N_O`p?y+iH#NYF2E#WG6@m8;5LKd-6Dn+YfxMh3{V z&cbs>+gESz-Q68Ps#?SGCY{=5*3ed6cW>_!lUrqS;T_Nhv=$H09735iQ9a67(s(Th zA`ZrLTHd+c!jPeF3&YhoiKnWTZEHen5_P+QQC?gMd10|HV2xx%M_okp zM9Xuz1xbycND!?@63B)%-yJ8aIM=Rt#ho#veHp=^a_Tc?NM7=|D#?BPezb2|rO{arT1yzK(au+kxSw`R$NXaSM zt>aA^dW~A6Rt++O-){Gtm^L-KZMD=ctFu&FSsGF&4Hgx}Ds*;J(h&)}3zx__9B<*| z;C^>F;z(rr>+Ny{%Zs>vuNnLqUO`!jCZda2L+okgh-Kg_M2OIVg9!5%fIEmPq8S{_ zvGQn@TwWC={oQD(MhS5_MmLvVc3*Z!XxTl+&O0>;Ihi@EJgJ0Ar7L%5W_aa6*Y2qm zL#EJ~x6Wi+O0`3aZi!Zkuu6r1J~a;}b50rz`XfI#4Th#mckl`|*Tga2+}e6mM6F=$ z)|=8x)7DVW6+K;B7r5d<(e8Ix9R9Yob@m$J1gCx{7BV^vY%FMW8dzt*d$EH z8Y$+$PUGo}5@dl-PQ-wZAX&BB5Ql`K*0dApbGt1du+wmwZ0xHM;D1bUN3uTS^AsL0 z-&X>u$K#u~B8%mADdPAj!XTW5;W~lry#GW)C&1xnS?7ORv_sO6-#VuZ;?&^kDpM>R zjIqeV2-4Bj$fy#nxp=arH9kEmXi%`ohLf|3=hER&O>H<-rr({2ezim+;@B$iB(6`2 zs3;I#@Ep>_EchKZLUDG(kZn6(reukb#X6-Gs)?L~ignXj4syL9z&jb_k~tBKQ~!51 zDQ}@T!DqLJ%&by>%{Ns{dGtyJuoQkf2R&;Pt|H9gB_ZH4tO~p9@8=6xwZv??1 ztKBY*8zko>C9usN$28TZzpF$o8IQ1bA(1~K>i zTs;&XF;&p3RMawgrkIZK3G6rf9xrim2V5xU5y*JP7_ecM42CS#&hqm6!`A94{`4kO zp49qu=LKdspD@|!$ze%5geyYZ0!2x4AYir?XOi@$He{Omt4%XzP=`kR z*f}GX#tMln(u*h1SoKIVdQK1qa18I}BO8TbEr&G2-ftAGtn(EbmCx9QN2Y0P`Bgpr zSN8bAeS7|BYlZb*U+McJD0^mT|2tGjWLTU+7t*h zMoe8;;#Y$Y;PYri*aEB@b*5NMK%-;<8VDnsN;+i}HUh;V?G=?qplNO+a0VMs(ePX& zu;s#LOC*4Nk`iAi^iEUCo7~u(TT^e9j6gBtaikHYu@SgvWLb>it^6PHRu)cTD| z7$C|fQpsi8IF{GZ~=~ z{Q@JCD|9Y2$|l4M6dGjns{jx={RhsnLkj+BK{Ym|0}-mIoHyxMFtS&gP}YbMqf zHz_z3XLIUJDw$HNv>D7+qY!G9EU6RV73vUCO~l3suQP~OM61_u&K~^}V5%^xidH#(23}F9%$|V$l8bU`m0I0`1r_7Zuo~U&w7M-$ zijpJPwA(!@Moz8Ya2DH3yAUJEWazJ5*qZv4->0NB7Kw*Wz(*(pvR~sz7;n)Ow_{Y{ zy$l&1G7Y&#rws9_!b@|Q;T2&(gna{WFG0{}@M7DxlNZ~zZQHi(7d!vhwr%6Zw(aEJ zclUjVs=K6v+D8}=f3!L4L%Gx9D%ic+Vc^?BFkknS5v|Bj@{aOV%U3@ zCOhq0)E5a>A(+(rgmIE8Az!zOZ4;DggpABiV1pJMRWW|Q+_Pgv7va&~BC_{o1j9i+ zrw`dOE|HVaKK}quU{z~U4msFLUb+Xbf;FO#(@4@}4Dixp{GFh*L>|rBr8yk6Xu2AT zGS|JPCdyzLs`s33hwBgv7TVslvFK-`WMDT5OPU5(f`yADIf6o+G?!07&ml2m_ZI7a zkgwUN^X%14n)8;J$%Or-T!>5$Q3tcS03ViAn8ZVwR$r38g~h_`fEe_Yz(h!Q!pi6X z69SsRoMJ(kPGhmFQV!jlTM{wOB8ybqAb~U<+tKJlF@Iv}(YO>`_w#EHxOXs-ql%)0 z#Bn#rF;M#7oW3jCB_|CA!IN`>Z%F=(D^iF-p21mk6YdYdTF{fT10D8Hgc0?cm=kAL zV3-ikZfvGzn-N^($Hp%dkfNL3Snv^zJ{+b-6MX_bk5YcCUrWHR-te4mZQ~EjvvaVS zDMG`*PvTW#^ua77{;!DB3a15`A2izpz&C>%bFf2rXe{9IL0_#XHe zBvCwt4G_zrX9_IcR4Wn7S~H7@i_ab^&=rS+l%)vpq#B(M+JX|q;*z5f3?IiaV?M^- z_@F_Y@#ZHx9XHFiczF(FwtV3a4M5*OY*UJV>loP*eJWpxCs0hOhVpU&i)g`tA-2*3 zD|TuUiDcGF?-PH42-T8h-6M4QT4dV!y|8Hy^fw@ELjPjRh$wE zbVkn4G22W+2k4-Bk&L-ye=Hk+^W?XCtii{#Wqgo3FRGvf{5fl{V$ljMk|&6JpLQr~KDddGFCnkvjY zrRY7~EjVNi=K44pMk>2xNOezg6AZS=cVm`f8<(Ki-+-a0CIJe*}aVb2Dwj z6BjM`tX&xDO6n%@n3&W?Qscy|FJl%IQD9N!l^}vx;mQ6^mED@b-4p7mU(C<_@p@X* z^1q8|=2wWW{p+>H{>Jx;4oP&*41iq|ZOW!koy#MBZPiL+m~E~(oyEhQYKEU9HnR| zTeLmAX)AiKgLd=Xqt` zi;Mq$G^^M9M`=v5T}$9VGkydheBg+}Hzh~{;39eA!A=cx2#Oz#QETBK-0c01MGlpg zjkUV?-Qhx)JwPG|;kxp;09%eE+~S~<|Dmo|7eq|w*vo8kG)A|P@A{g??`;dCb_mT< zq`W)F;13MnU4*=3!3cd`8tTWAdzH*}i?LQ2!FP=TMTd%LT7~F&5y7V|j^rC*lE_o5 zo=_T+$Xo`xZJpO!hqk9rYyhLl%D9cqj0vb_63I;%Bw(L3dEry>ct4#V*b~+Q-pkD{ zfXLq&yyH^UHkRv*w3f`rMGv~3LGIB7Cw;HMl2=lXUAVY`5f&hFn4j;@Hf#@v( zwkxm{K3-Oi7@jMd;}o`N+&V(gr5AteqUggqQjnBMJrQS*p^Rp7AtoQXz6@t!bl?x> z>6{y(C7&)GoF{!?#`6fM=qgap5GJo0Qn_5CQcSxJQzD&>F|1zYDHC-7)NXjS>fzPV zmHw@fr*zvK*F(m9gxD(cTbxaEm1jr%AQF}Qd)?A#fvrj-`kLUCco_`8IzjmI6@*^T zcdGxk_Rc>i@H4H~xAM2T8{YiHXa|VnB5c|d>wW(ZQ*KRmn;rC*U2&-e&W(IBigDNeWzMO z;qvjA_tBu7G-FoiUnW)H+5I7+*CQ!ux495h1bY^I*mYjAT5m{LvLH(RdsYw9NVWWN zlq~V5JaM{VC$7g5?Hm@OO7g z{*liA5iYlZ2r5H0I)nZQiBMap;_!!K`tCAB-r)>O%Xs;RuajfqA~?1nY}uA6MQ51! z%-}IL*c(IF<946=;*&=0pU&Mw>zRrp)isQJ@1n|+eNZZ-Nkh9*)eAmFb~Hu|)}6bj zZ$;GNYM^wTKhC+*SCsPg-GE6x47M1ljHDZl#C|oCTka%Y+CV?Qf@@A}V;+P6gk*Z+ zUkd6h0@v`YREaY<`{L^`b^K!YVa0v&C?m6;_N@DWZ0`A+Pvl8nnX?QiNDmH_Nn$?X zj3Rd}NngLG;?Ibd5rdHAMo~BI6&twNr2Utrm;Fk=SLW zJ$O!@hG;8awCbYb%+i3Auw`na*IDYk)gN7X6-(1dr|oS7k+$XOSvhZ~Zhht0I$R8} zrO(pnCd9rfLq`+pJG;d?YB>i(l2@MEzwbO830OQt_Xysxj(oZ~v)k-qsSIU@6SbzjdqCbil&YvS#{hN2(uA z%Dv1CrAiT+UZA!<`S*PsW5-i*qT_a=AFvCWs=m-TN4mU~+00H`=V|k*AVg^N)E=Vn zq98sUChgvmnqUGZlw@oX20Ao68cF?XZ!pHG2tdIohU(VQOcFDlUhWSL$+B1RMp zY@+B73mP#gN~c952rCashDkMIFC~!lNeVmunSD{WtYKS}se2l^<1Kg&B;%$W?r5D`0O>5(h*sT6^vmIFAF4z{cy0oYZqg z63w$D1pldVZZLW-*tHfa3ewd*ev;9L@V4yjqd%C(cqx|hO4Tnrh>fXS&?h=u5!^@E zEVh}#iW~5b6x;4VMeBp6!-m2`8<{|Z^?1H}gNgRffo=pc2m)xN!b7EjyLU^j_@;b92;`2Z8A;rBlWfIlgBbLrX*ziaRs z(Xz%2Xbz?yG_j80kSv7N#1Prdyw&Qgs3_GP?Ja5DNHL)r2ZklWxmjt;yVFz72Kk~_ z2Nv0xJRWd7%Q0b6iD)&5tcf!nRFnh!vs->|i)$bPZ%C_+#2K@K4+q=()xZ#=)xBXDLA$n~)9Lro?5kfP%zTyW zlcp14#wDN=$h0@_%($n|M9zcYB0|z1@SdI23%FAtu{p%?ghlGeMmHR~X{N6m1Y?x3 zLBA(tU512im7>$e4tCDO-h~+)8hBIpeHaM)hYNTID{8fDQ}-5R(*Rhvny$%i)f~o< z9_1+TL)Y`%tHDfH(?y2RkV$<@-VKs^7id6&j zYhSpO`g*PJI!EZ?**=!$M3i|&56WEdx&efD|9L?5FZNjasp?xRNiNK9I*}UpjK?RU z#~?Eii?YO)Y-aLwx$sCRiu^|S_LYH*D(BgCuA;pb8#~?}LjRZ{b=8bPOwj=5!=p*y z5AWZv6LjV^m-lpBF)a`_R#^O<&D#{vNU@&O3`6|+)7CpWC>a>)_h$uB#P3cNc@7zLkBnlL=za>2YL^nONI6-&rmX6FUblU3?wp ze*XOoVj~CZI4OA^Hwb*zao4tr+4l?$1pUubNq5h&!T}gbevCF z5~x3#{^T(t5>0W7y|O;8K;4M1k3gr!TfO^w*6l;O_ET8m9N@}b#qRJrJd9VKZ`gfa z$LjmtQ6DaKXsxicR9m-nMyum=odx#tvU1!G`Qv$=0?fhH`S11yE3!sfDH;alFGW^8 zzGg-z_2^8+-bdWB9R@p{Vj@reWW&8s+b3QwF0z{UL`Yv8Ib^w_w>RBf57AfaKW4H- zIVL3mi|Cg7R~#-9Lr;^W8rVLBTiQP47TMBsj;xwTL!T%~?mZfZHl9OvcPZ`dPGUG& zlaVaxx>@^gtzayt8`14e325s?a;FM$R_3vL90kd~*j6>rwmW|K?lSyE+_FL-5ne)l z=t%afy^Dttr@wq^$1z3^pB;~stljMpcS_spsb49o>ICcWT4r zzDd&|Vh#P)1XF42M0DEp0S8_0WjGWparVfK78bB|$!1BG=k}&5jm`eDQYlHjtbK3E zpY%o*9D+72TJ=}AMaq?|N+Xa~i4JEvSl0NO&rP&^@puOY{dX_YTl2G(K2X|~Ny+E> z*WX%M7K2_j^C7>h)_$K_C~Bw|86N3ByxK!( zUc^#eI(!Itc|4vnpq-H%j64uGL=oew&a^!g0b!&%vjA~gBJ_Cg=%Z{MQrbypE7{(i znB4c`E*<{k9Nc|0Dh73{h0r^20#&goCQLMEu1B;O7)IIvaG<-<=(pY*=V(^mxzZWc zPysby)8F^8=X-z;dmk_7HnZ1LICj29thcSId92D87PnDXAty)MI;!ir$-Qa>9~Y0U zObULVTWbR8)zQ`MmSX0L$!i~@*HnNuAoaW^cJDZ}H}&qHnTd13s@DPPW!4U82FU~$U_Lat0<0cEa5U6s7~#ZoJW z!ATehzz|_yv$WH(17yhjs)`TFchKyuW5fwPF0J3xu1e73j}l~n4i6fL0*wQtz`$<2 z{vc6+(l4L`#^2~sf0QSU1kk}`Kn)rKCXb|j(m{prd(^|uP8en6d$dg?;qPEE`UP}H zE}h>_7P2cs_KzCiDoY}e>GnNYxI#~nz2yqH3OV*^10-wmm^fG{;okUPhdne`;8J^R zmQPg48;P-dt!|`mM{c_$4l==@&;UvbW2g;H`d!@NDUt0LsW~r!0~FLKy~>u-UNUkh z_RrqaNx$3QTPVT_C`H*EW-+Ry1*FehVhcg*V1QdYx=bnsRq_4oQ{z10dxHB8-49+N zmfDu1nnJ8Z{sEBErx$xfJx={O(=3Q|@=-)Hlt3h1FsfF8f+AZEDd#TEz8&bn&nYj6 znxr{aDZT?ZT95ra)yvdARWQ^LPa0-{Z=FDINYGz(bmycP;?%i+b!RPlWeHm&4!YsC zB2|w+J9IUc2#LChY{e=T5$ie$_Zd3EX!-V;M?UERR zdxDGs0$PCE(Z`kp)PUuJ#NitSp%wKOiqcS7&zuv8LdN+|R89%Nn3!Cb4uYtfgeAy@tCv|6}HmA~$$X(bJ= zG3f(@GaN*q3|qE05~%cpqR1QWlwk2e?3DV!9Q}qiyyQKN#Or~*27JTu7D&q<_p9&m zotYmNQ#_rvh80spEub*?VEN^}b9xQ@$#ogTZHw!{<(*H-b;+ktmy~~eh0^*smgM&# zc$T`Or_#T({f{HN_O78M##L(E6_2o1 z(E(Y7IHQ3{9D#>=kmC!G?(CE@7f3QD@O(Mtorhcrmwz|dzx1D%DmEr1s>g61N}x4 zoo1@(+QiB1qZ1N&zouT^l|2|AY1rRfdr8Hu_~y`B-tq- zV31z}NHxD8|LG-febrX2hUuiwhKG-yUAXSK!F z^fj@~SRbGdp^9=B#NN8oDeP77u44Bp&{UQ4)SIT0Fq!KzqOtvAv8g?%s+CsP8oG)( z>iXxZ=Sx*0C3L#=1zoOXp*N?rTAJ5R?DqDL7O>e;=EphPI^Bu{)QxBy%}#&~GgrrZ zhcSJlIxL=0F^JL?uBz-#FI&qGcodquKkN(Lu6`9kUl?wtD?2_+FGanI91Bxx(0B#h z!?h^S74~hLt59hl_v>BxsS*R1y$-c>;X)`&YL4?5@gu~&;$qbhaQHPF-v4+>9 zQml41T%RLvS)+@MZ-=rvj9;moihmWj^#C@TiD$uWvOrk& zQJXX)Y?1uO;bP~@F-T2cjXb#tx9bc&!rt@T_POzre`dAayKZKS0B4Ml-od^*?NWGvbJK~%h>$n+jei= zcD1%zTiviR)qSN#L*J}P-6bduT8V&p$D~&xZ#dueTBdQuj;F}*G>sO*ls!_v!KU}J zl{Q1^MYgzm?1Cyx$-Kd`I7SE1rCq^vCl|j%+em3pE9mb#_Q_M2VC9r87A{GGW1^XA zXzD~|p$LqU59gyS-AmL|7x-aOOamyk(`=M7{qdU@)P3?#PH@`O*B<*uKVm4ex8=Ih z$r=~Tn_@jjn8S%v>Mbo~R3}p&FNUr$S?gFJPah0RitB)j$v#kn@F-_u7(Hyzas_aT zzVS73=d(LPC$HunuxL!LXHP7XZ6DF$4Nv>|gnin!fZF^OD6#PMF4bJEpV^t;y*a&Q zmuOPhiOQ^uXelX4SHYy#kRHafElRIPBY1Gi4)uW3Yx%}waBfYz^D?!CAZXz+G^G29 zpsLTAdAuM8Z#ihR0^K!Kz`aEA8iaxlLxxK_hRKCp7J9+gK=0hqoif&PLi?yA) zd;aj{9z92}l(my1d807(EXS--n1XI*mTEU9Tjqw*S*=^imeta>|G03of;e93oECPF zhpC9qO#JVxF`kW<3dB#rni0*EC&!~m1l$%8`XK0e)^YUSb8iVY^}uC>-bR&E#PTg% zv!JN|S-yvo(L^ORW%<)t=5km&#$S7%ho)H!S$<5E6i2~Yl&_3Eb+JKc;b zyeppGK`RkdbeXld&(8^NVbi_XH`p5CXbx$X$cTI`&ihXLj?oI{qNYIs5GAOAIov+L zO5YxD2V!?ujQf-Md{~%Xp=S*3K`X;P`m(NV6$+m$(Rprn%)WZ{ryh4F`w!3?0|!B( zpNsdTy=7(pK!tM#!uKW|it}2I&ntFkbM_`C7&oNvf}-8(T1`gu+A0e>Dkf%C)YR2~ zH$EP2vPBUr%(pJ49!{#Vn9>Y7_dk4n2ng#A`GlrXN09I)B}LiE;_PHc1*DA~*I}nnA#JK}(HRf7-*4j8*K;ITKJX#zyA% zA@xJlTQwT|(3smWoHwx$?tVJZ&|md)>Ki*>=Gj}qS}dn=}dXK37Kf7Ej;SzTk8g|BK4D zMzUk)G7f_Hnf&Y@hARQJv%3moi3l6DBdyWkX0MrbJ+(t^_OoH#=9<{B)jy*f>!YkI zQOaCKid{XS-p#dC*X1;Q%$icH8=RPZv>fWNK^6L8UCh3;`|#_sVxn1lMNMj^>hJbx ziZz^7O8>;kFDE*9Y0X>)4aBmYC8geSKJ{|;@*^$4BJwTf6nOV^`4iZ~S2IpO4F2r^ z)8hcGmtZzQ9ed2C{p@`Il@g#OR=D5 z=(-zrtJNB4f|VNItgIiYp?xm^Khg)C-VEBjVimkVT6TXaJY{8FfpK+jqt98fAoo_* z5#Hw4%vCN=bRz^zm?_`}q0elRF~%P4)g9bXLlT4d3Jj8im)0@L|9xK=1uV)}N(_Wv zc4GO`93M~g6$Y@|Dk}166eWJ`Z*(I&i=AxnrPW9B2C`8-b+o%;oI39t!3D8KQw$ef z{8>JP=jr1SUK;A>@t!bII>|B1wP!%NuI zYQLSe*}Dv(v-I!ti3^9 zk!l}#DB&K5yQ4^M0b2YuW^k2y^qIZWY=fi5TxB;To|;$ZA#$ePsH1wf(zul#)MD{3Ju;YV7xdCpAUsCOFr2vb+S}K--O z26+`HxUJtqN+M2;I(7VAaX*jz%x28I)e@CXbEh3a+xx3SFoZg#p>Mwxd}l)#iW{1S zcarm%rNLu}GY@YGM_GV3l4MQBi~?OEBr^-G)+>xpvflAHhVk=n;cDn;xaX@v3E#c& z(8Tw`!golPa}v?ZtnB$QreWva!n{joFMuNd3m*WGXH?Qx0rJvwO@IWyL<}thbM3=L zM@Y1-l@3jbY}AWvt(8!D=TNt z=F4-K@nP!;Emuh8VrWB#{Rg8&ZR3KOj1?E%s=|V?Cx=pipdxK{HuI4?qs&Mtk6#HT zT@f{`Qp$uMdT9k6^?ube2tVumbyZ~rx0hM-5JhLeilNJV!3lR#*MlHl{_UQXt=G6F zt$i`We)aHUsBK;~Who&_`HkBA4Rp%Az}BqCyEd7oMYY|st7s@SQeDeg(Y)Y2oHUdz z`iVG)7uh?XTp{PVd$oV(lXUoO|mG%P=br9XfNaKZah$J$4|lNdy9G%b@Q{Hs3KR} zL0(O@|6^ojuaoVU{P#<4@ONr;^N0Oot6n54vOAH&g>TV*i25DS1MR#bYv!`ZiLU~^ zx;H+{!gMN^cVo&|P?V2#3&9BuIr?3v6x-!h4V6s<>ibsdg zskQ&|N=H3z<*lko&pLAQ8cJaiAy;LqueeZ=__eL3uvev{wmQ;NvqV|+crxeCrZV#d zZ=2=VsiDT-<*PfTiue_^I3eZiYc@QNoH@O37ycL0KWMD4#2XH~{JATGONWXB*RC7C z`?f5-+tT+$n$mm9rY&zJz)<#@`u zNmIM_!{j|%NnbEDE%~}f%IH?*)e-OnL{nXT2X3I=;9o0ZjNtJ~+$rX?D;Bw+)t81Q zO?KJqXcj9{5%~Ty+>X?(Ns??a1^%pUm5dBd&2g2T>51W1rW83*mbJ)4FlVl>;#Mpw z{ZN)fWtvr_ZHSN{kBfj(S`_P$q?QDc_j~+zwocjyxO}c(y-;#1*-DR_L;K<)^2I5x z&hcmAw(_whw6aaLfxj2)mvq!?TI;`%g4#9Ku6TSMo?Wab1gChoi$lj!-jU%Pau!zz zfevuvz<0=~`pc-f496^RvCy1$*Fzh>X)oKYzBZq@@Jid?)EXtLJB1G((ljVoxXc)C zT<+iC)z&JTAvW!v-p$i&;pTr0PX5r=|pQNId?qXt4xy6FlZ zx%MuWrZ8@rGcW)}y%{v_(@&UH;Hz}D%dAN5qJ-iF3s$h10v@5ghR(qon3r0qCg&`; z&d$*^8fqt8>u5Ca%UK2xXR}YIDr$fcqX5N>*vvqS6T4^?JV1u}m`bB{(S-!QZ|5kKh8&sgi>D9$ zUz*OD0841>z^Pn<;Dnl@{Z{>baQ!i0sIRO5*5{S3e^&dNtX++4(K7dsa}9E?8sQbq zWi&(m$Wg1+lh;+X(##cQWh+zbxr|l$Fpqln;MCNUat`sKUW*KJULG7#I-D#cj?nJ$ zu{=A@#3oIl?G@+~yt}cB>4)mmGs*iVvb!QCg8Sq($z0V}L zt%Jq;_iQ#6>sEg%*yejhi4f{tlPZTkUtarLK1lh}#DpJuq3(O%7-oLBw3Ay26S!aK zMbt{>rzW0egl-xN$(x_Cj?;RLp4G^fv17TYmbt0|pi-ZBMbPE5~ap9Rpn zsS^%&he?~I{8Px+Wb=Y@(<$DVM77bkye*Z~h`T8h&MP$^?MVC?wJ_5J1DeNF#UU{&b=!KIdZlFxMh@$Zk$BNoXbuGP^ zWRHO#wk#XUMBWFF#@@-aCDFguzGDuW4w{?+0`#+Fr`((nPQB;Xx0K!BH!t+$-_B`( zE*f7z4?onOpLBnM;vf?P{Y+>&tap$Fn&|SebWwc)FRR=rdj?0ke3G;h1FVG?FzPLS zc~gjA(3&D%ERTmq)bf9C)Rs#SYACHG{Uo{%ChRy-OVNI-OaO0 zbhRIh3mEMrPU5`E1QorNg6nlPd*5LnY2|5nTF;O76-VJXIk9=xK8B*NJ2@>?ZHAGl zr}hH;lO^OEAQhiD3q2(nEUcB8-ihy7?mi$nuUQHe#mz^7EBz~ZGs;=*wpCZ5DeSdA zp1EqO)tNd^@8=A)J&f{K7wf%?--FK|+)fRrnQ;VFR5}l@>lL(K70zAOTOEOR?&x40 zlbs}6bX(R8g|S;39leWRolm?M`lpud5#K#0<8Yif;A}E4@J};l^U2-vFW)6EvD^H# z^ZvN!$v^s%JLVUi?KjT8qwl%r-X%9=iQA{%y*IO2q**Ki$Fi-q@4x3RvbMDg$+Z4^ zdY_fb@(#ELeqT&&7T|9eVpzDR>1j~~X~I6$=&kC-KkfO?ivOef|3QiK5o%%e-_~e$ zc2-`?&-z*jY}4%JZTu~ec%vWZk?H;~Nnhx(9{HUodPErWxGNj6cuJsX=#Lj%9pe+V zf%&;}}WA3R5vSBM0eI$|7VB+C=|uEIH0pU639pUFA< z&=~5xA9)0(Pz+%Ui5oG#c(xc1%tCD6o2(It>^9h?2yWp`JP?&U2z;>~hzyWFgoI}@ z2dR3%rVhskbw1R)^PW~zPQD~PRsd5wY?c?;oWLj}y;tg8ksqAKpLZ++M5Yi-K^yT) zk*!o$xZ?tqZ6_N|BQ&YXgf(b^-E9VSxWWl|TnEPd;r`b%?SRGvOJT-)Y5^1vW5^|QbhB1bIuAHZ&)w~Q_Eg!r;w%)8xxOSUBf;3*_QTr5Z>=k_iFN|x+_ zEw?0s;8DxXlL)`~1VCftgB~G(dCNb5y_FgRxcieO6XYDB9r;z#NhM0JNFv1VD!l7? zWjTZ4XOl=JZVWZXZuvpLk%S|VhTMkah7QVGfl8GbfM<%ffq9~N8_Ufx;W-_~0)^pm zopuvI!*Mz0_CWg%%Hx4b>v@Ac&^f2})1AxJu|WTRK=ge%tbk_kfWZ=nA(BK~MdU<` zD&GLh9lT-8Ube$ zLHB7Ef{VcKRvHIDKv@KD29420{J#kyNM9*LTQ|(9)EE3*vKL+*1hof(gEz`8NA{IC z{|g}RV}ADQ^u+Jcng6|=fMhv7@qAR`$++m9NokPqLRCQcoJv6b+#lFWDIWM*J_n2^ z(+lU8ml{}bE z5zh1iZ+wk6vdInd?Fl^xl!;rQiZKU@^bb|s7tVqI8CNu$oV*XK5GYMP0$6C`g2`(^u1CKQ@@H9M z-rxwUA_MBkLA?6x7wg1MfrBH6h9Hf&jVO!|ZnMGdTs{?VZ&|8Pt@(}nD2imjSUw&*7}wC8Lv9a{t0ppm^`ECT2`ppie&r-HOW>AAtU z`TwO$p9NO_U6=*N2SdmMrLp*7(Qtup*thLlTk5Qvkb;7+km7~Vh=8mF-Bp05=BR4_ z07@>em97D5Kl$GW#1RBv{tMTE%N5*#{E2awwq+Kn`4A{ZQJOEo=Z}~6eV_{PYvN(> zQXVORq-g+wG#@$i_^1+^qJ*hSjEdN(1lm6)I4Ys~kBN@%;M z+K-n+T9}RI8f8OEsc4qb~Tv1{gVOUU0G z5gX~qm2#ww-xeFgEx7A%M%sd@oRh;XNgB5Ns2kKq4tgXl%QEs}sbRlzR-bZK! zT3n$cA7cKlBx`*W&4m=)B%8}N%tqMu&kAYZHqN1c(l&wd!~B0vbGG|?8Ct;j{o`+o zqEr&3lwhh|N#(%De_;i0tzxc0eGuk030eomkcbsfz*j3G2Qkx`U`9}M&$UKhhB-s= zMD?4)ipc}R_elD)hON*qg@pf%8cgxE zvC#$3p6Un=JWji10JxN*?g4#@q$j(9By>DS7CZyZpe?)CYNWy-yG|V zKAOI6g#;c8(4*YDuvf8#80hPiw^%52M~V}q*?1z}i`!eEt8xajq%qh+wRdy9I3FvC ztobNUa=5iPqP_Hk7sedKVqyYjurIf$o{E4+4g3!j%temVq`2)ayhk78rij55s9PZ% zEfjI-eJt+6+aE|BA`8mDD56+hm=_!lMLGb+xchYoaqs+;Ar8{M9=thxu^jR_jHPXk zUHW-tkX6!k(lK1oXmG_v#Xm`X9M%10mm&C>@yVzCzwTrI0Z##BgBXz0(V%v*(H>I7 zY;-Q=rLlnd22b-G6(u>{LOW`B8E>48!^;@Tn|i>`2B83EmBr zvWwTg<;-`=(#Mzli0Dz$;m&Yl4XZ4VU!{h+jxGh(s)?QHm*#A#;F9 zU@oQ9a|P!}qwXO@-wLEv%EF>AiyEC_U_-&Ha&|Fb6J+ zxOB53CqHev+uW{^oqc(jGtC+kzyOSVVQ-H&%E8Ohs;@s;)SFPWikN|L{{_m?=n)wKU`K zbg9Kr(oYU7I(I`KU0Eu$=O2Wg*1~hqd6K6@xOusBX~2iQRk=IP z&o)(D)ai|g;4E~;HKk#gxl)OdM_mymZ9CJ|I7gcc%@=kc4svxvxMMlS@Woo8zL$l& z8S)~}q(t-7YN>EX53h0S(;d4*TH1amEuSgZfcsn%#);KXFgqh_dZQ9;3!x8*XbLUkqaEl)vB^hN%6sV(U8cB2I?A2Y zC#WCn^GPHyhI!vnu4l~rq0G=f!dPwmfv6wzpp0r!?j0W1;wPP17IggKjp{I~&2Zc- zpkQPd#bY4Qwy@V}n-Ok`ZuDXK-CjM%9#_CHnDW7sYcvf%x|XM~Ql{WY+;I56a|%0c7Gt z_RAzIqY?59X7hk9i;>AV3hNj?h9oN(wQpxM=o8roK8gvZ5MT5 zvrxn`{QR|aZTtMBhzqf?FG!;cYCBBh>3-*4FB|B>g*Jp{sQGI0vO^P?+N9p4Ca5ou z%Df&&BUYT6(2dk7RR|K!@IxqOIb9pPYhRme+Xxho7jWmE2hB>`a>%H$*Amo0XCOwm z8(KSvX%&baj-v^~y&goj?rpmiu6V6QaIUO_%*K@l7?|glLy7}Jy_gOPnCwS>-mFHO zLM1CDQ}r4b$2r(H-u@c;Za4Uw;#c|S^55&3({`<;CNN(SPUT$Nd_@fW`EA+THP8dL z!cX)j&AApFTMf7qFm1I0a1|$sCRI%FuSQKCVPlb}3y{p#3NYWf!1bw*S~xy$0q2Qc zyqV`c=z9Dboyb%>k(+8{U$N?|0f1*^h(vP%>r&+s1WiFV{I~t=YYm3tTrqp}hR}{} z|HA{|*N2vfjqq&sC0A2PQW!uN6@fT7J4d4o0+0EzIMsv9dAovLeZM_49~3zj>x!M%=ID$X~ZL zh}oFXaG=k)jJU~-ykDOV>gRiW514vrI_zzJk0ZoiMIqb=8y|d-pQ7;`=^!10J_2g~ z+{D7i-yCxn0)mZ$SA}lY>$dj0{pCOvZ$N zl8jGaB?C^-=Z+p5$FD!3`8pN zF(HHX!i$X-$TDDM;L44R1|FRrO4xviksC>xBQz|9;TkQ~hS zlM)adB`>Y!W9fjf#RXN>=X}P!(wopiN{u{|@9n=f*CR4yYNqds7Xm(B`z0aE^VwZ@ zC^GEQx^u}9`Ur!`#s+h!tW*=iKw`f%Sv_?`%V0uHvbKFEiWe$ZbUL4XZ=PGmPYXs8-JDg%75t#mk&eu895!qck0vR_8`xKxO<3>l8r zI&6S?@VN44#9L~j^6PYHz39DiqP_`JR!R|lPbF1tOIPooA)eNgs0yNw91QJI)n*aIm*4GT&MppMr zLB}RK));JkQi~>ibkF`c&#bsNnx+bdOJD%Mz?LULRFA7R(1~zSA}cH{auN)!{kJ#A z?sC_Uhqo71>0iD%e%-8ED!o*(iV|8pKG(n|yoH|Qii^Msh8=y;H ziHbzhK^xAs&D>dr{h_5fpGFsyj7Z^I&}1|ebpH>Ou$b&fl!UtSI0kiHqNz*yIHl6T zYn+Y}UWD~TF62bYz+O6<*6=P4^VlcK&dd!l*YEuwQ++8w1E z0sUiQUcR|SR9AJ~DwIwE5i0DCI4>;?N>d7SzgARe*_q0q{(P;O*rRe-_Ka-IffTux z9?i02LBGay*?&!yZns{N&f-u)nF4*Py6Z!mBZ{%Cx}tC9j}jPJO1gMC!-G_`brAf@ z&FJYT*%7M80Sg90rY>hMi^+U-Srrlyfb7u;Yb3LDtnj?uNH$%!B6_nBu~4<}?Ht>s z#3{S5XZd6UTwrM9&_&))PORUy-197V>Yulr-b~X}(<94lO8HxvKGsb;U88~1Of?vO z)0p%$ecM)Y@}A^$&z7^ZnVVJ@g|C;Lo|j(_1;uPKCo4M*Gv1A>b zY{Ph05q@|~3VO{y+;ZWld?(vZ& z<(Clr9>TvmOr{|&7egCm(D~ZC-NIfCKVSQ>Fkx>pov(d(IN5iduYE*>us5GNU;C&i zv^S~$0P$@L6WNQtT~8Q+10M+kAr@2HAQ%I~a6+!csfvd?AQ{ta8Yb6tSb-^Q9ZuFp zm;o_PSnTgkSvRqS=t_OX}QVHRM2VY}ETtc88WS`}bzid&tc#H!Pk zE7S%hUR|t=SJx?1)Kf~H`l)iKdRpj-2z5vlls6Am@7IyaGA@6gfYOoL@xFJCXAn$oVjGejhoXLeAf? z7R82~qmZ))IbV&OZ$Qo|$T=4|dy(^8SJo3TH1XgIGX` zW4n|w$axZS&PUF(k#hxdz7IJsMb3{Q=Vy`guaWcX$oWm={4R3-2swX+oO{)OsCJCY zC_QJ}!y%k6M$U1_`FiA>ft+s@-;hVn)yR1Xa()Ckzkr043seOz2{iu zJPJ8aLC({Va~X23Le5K&^FzqF89Dz7Ilqpa+mQ1yQX~?+@InPJVk09qw$oVzo{3qo61>DBmFoRtR zh3p#Sd?Rw6ik#;n=O2mhEFp zDad&Sa-N5r7a`{r$ayVtei}J%)^iS96~g&ac`b5&2{|7^&hI1V z9=MrZLVqz}2FpawWypB}a$b&{pFz&Mk@F$s`~hCEX32RYDvsN{QwW+g^*UReM2U=R{Nx=tt#8WmC z*vtX#=(O3v=GfKvA)enf{;~1Tjc?$onV&p)oEm}n_bktK5o$c-IouY|gAKS5$ z!9I4@#WwjxiKRP1JID+zCfC)W;sBeoEzlOYyHT9if;cT^uvj`{>)PAvs2c27yJ*8= z!Xm%ur=RY}3KKLUdw)Cfth1SVZKecor&k=id%x0oSu=|Pv3ij5ag3aXdrii;K zUJt2_c!ftwpixzs&Aeg525nBL`F3qi68u}llyqw)O|h{FUgc(PgtUoAYdkWj1@i!=lpXu(DfiU9ySuaab8GNGvhY z<^r3mC7~su(p!$_4S@taLmbv_0lQUnjOc}UEETw1u6j{WZ$a0z*0ps90`+!tuicdB zJ(thwDr?g_t=$ZEF$$X3%Cex_I>6%mXQPNK?WE?kT4c+3)YDltAh< z2dX2b&yA^MrOSnW+^5vJTq`xRJ~vjup?W*8R*BUU%Rh8(Vyr~ zNU&PKD#jHZx!z%64qKmtc3I8Lsx3_W{8SY5_qKYgg;{MlY2BCs9AvP zBPi+@fq;G|2n1JO3I(PhAD0R5ib$aYL(hfoX%-iBV1dbu<3LN9m`WK9ZCd(~;gHa+ zm1*%1=+`JBNQ(^#38yI-5>9h4B;H`+taY>!)=`-SkNH$KU-R5< z3?F>XjPsIMhfb+^C3e|rYY?*^z-7)*EmF`2kSG@66==Z_YHw?CD$E%YK@>vn_ZoXc zLkJtN%i8F=o;p?U=)2!@Ogt%~zr8CYggOJ>giZ<}i|lP(B2_?DAuAhLZJ4>rl9FPr zF0jglwGx-r8v+;WLny?wFN8XUn|2BY!4L|GA#TFk6l5}pya`-*G+oIsqBwF=9AS() zZ1oyR!A(qj0=7Vb)GbSwSwtxH^z^Cq4VGnMAW$8Yl;rcZ_4K%0Rux=YFnPUJ6S&M? zj7RJ#U3cIm<`t(MoQ~isGnQ5vZJpWG=(I7XLklYO{GM*1+eKuEvZ5?iU*4t#i_kTh-~EoXBhqy zk+^4>h{UosA7$ah#DrLFhuOUnSEz2E%gS6KQPXow+p}VufxX>zRN5Dg=(*aavZtjd zu)C+;WyX082}m#UvDq4W+Pdok-3{nWA(fvb|B^u$jLCp7=nqPaN|(Z1p-~yIxtNU~ z>*($Li1M6vQZIsa2D?P=>(okGg0V?^=Ju`*tuC{m)+M&OU7cbv4n`-2r?z#L@oBT+ z&Q}DTh&r+Jb!+X4z3;*fvGdjT#$rSFd%yE+0LREPxSU<6O<1h{O-o^}AOT4ami&~g`@J!jOX&>Y>Z-Air36X{#-~pg}mc z+s51uEg(O_9Q#qvG1@IB)}O$8C+ZWaT*YJjVk%!eUfXFme6*)mq+qx3lhBCwy2KtD zvm2D**#Jr4MHDo9p&t60JKp;t?bY{)_%??mfPgTe6w!L%SXbAv1Fd`93UiwSp@-eY z>|w?n(2_u1>#Mo1quA|+B$v?oNQyeddq6M|XzvJ^_^$`5oiBvVki8E3~vKjk3*(*~n&JY0_0x~?Y8pL%=ghL$Zv2k?e9cthq3vfzUU z1oH(Z4B+Pegu(k0hITCyhW05;@G~0m8I@AvtC%CN2UTm=Y2rGys%Un+XX3&tU%Y2h zRq0*v^n29to~)vpif=2U+8xR(;ith|gm&O5Mmsn;2kqFhxoF3B_|T4Zh`(=}k(%d$ z>i1RjPpkPW|v-$E;V4C5dLlnFWc9th9Rn;?Ek zKs*zJ?OI_3jE8}c4|l>{Pz^tVW_Sk2W*r=XKf@My4fer7_%CT61~wRp@p%apV2q0I zIxU5t!LvA;P4Fi;4lly%@CF=$Po#Y~*dYOK#4)@Tis-Ku-47e!7ofo7&<=lrt*{gJ z!yn*p^fPJgnFw&eD15iL6sEy!sDSyf44#7LK!qpZUFd+9;CJvQybY(&j%D74GZRZs zn&2wjv=qZ*QX3AJK?$tIkykTwb0@=;yqu{Xm|c)}vj=-CY{aKKP8bbW!%g&2uK0D4 zd*K0`*XQy19QY|5h4*0_t`2SR4tz?_*l}}|0zZK1;`?2&9CG0_-H(MRjL0z%52-K& zZiiC12Nq)XdJujNFMt_tf%Whnd;l-QE_e$L!)LMw(Kxf$(ckj99A-cjEQ0%B1#E<0 zf(7#6NjL@{!Yi;F4!|Fw6Fx5PT80tE6OJZKBupkuBb-uPe7E0{OE{a*M>wCbk#IHP2ExsRuMzGiJR{IL zoG_6vgRr2)S3bw;C7efCL)bvLoNzT^6JayqM#2|LDk|@`ZXtYy@HN8Sg!>2&5FREx zT2@|BWbGh4NqCCz3&O6ls-j|Rk3gG^Fp_W(;V{Ak!f}LYMwrs)z!Wo2K!m{$} zN}tU~ScRP}vegkjP1r*Ci9kCiOe9PuOe37)tEj!(o=Z55a3mSa2#PuWmQRqBZDxDFwZaUPbVxSEFqjn=qH>{SWmcw za5>>aglh<&Aly(@?5lA+OL$D6(?l3gSRf|Jxtwqf;ReD@gxd-C67H|Y*l`{rY$rTU zc#`lG;TMEws>>_NTtH|cbP+}p#u5%D99n&MvEMbEa13EG;bg)*!kL6+)zt~3TzTt7Q*d_qg?^Q7Q!~dw+Y(`j}x9GJWcpDVUIw!g^(j&>mEQj zh;RsD9N`GUF@#BkX@psX1%xvROAyDneS}qnb%YIsO9@vHt`gs%ajzv@Pxv(9bA+1- zw-N5BuJu>D1B5MvZG>+Vwi6yFJW+$b;r@iMlkjW8Zh>4Ov=MT`Xu^Sn9>QUS@r0wr zjs@3)=%_zqu6UFLB}K_qyoz6GR8}b) zlr2g?Iiz$bohqo|sz*&wQ`B75t1eQXQ;(=8)vrw!(*V;j(^yl6X_~3bG~cw$wAS>j z=@runi_0>|5^qVeWLaife3p94O3Ql7Cd+GgL?%aOM;1m_L^eb|6xkfPIdW&@fym>Lrz3l!c+}vi5mCue z*-?d26;TaQ4@EUcZI0R*bs*|^)aj_6XdXQ{dPH<`bar%MbVYPS^h43j(VL@pMjwbi z9(_8xCx*uiju{b?9FrYW7*i3`5c5z>bIj(LoiPVuj>nvi=^4NW3?49IK=OcW`m6X1 z_b0TTM6WgIFBtU02K`8gZZecxaAV~GD+|#tsn_e5exiTgldP}*il_DZu*v%Rhs`wT z6$blN27RMJH}JV~h(Y%o^d^HI(D(nEkM-pvO?rK7jX|FuqGL85TW2WWPr8CDaiqpD zu}PCRXv|F*`2%pbF-*HVE>_ARQpQV}crNSl^-?aAa+Q?p^}QJXww`5DgI-U`(A%eM z(CZTnvoLXyL9f^AX|V=aXdpcn-vX0y@Un4#RfzjF@l)dlSOyQlT4;u6VKck}JD~**z!5kOC*ic#+dHzQ zoFV00DeI(MA*GDxJD!trhm;4TJSyeKQg&%_ru3$n(wk;VFP=G8$_y!|OF2)<1}Wt% z&TN)adi%`n`ca>?LqAHh+DVVbcjfaN7p<1vQcB;PC4F<2^vzj4nk3MsXk?w>+xGRi@ zF^~jl_&!yDyZRE`xzjUV*}7L|67RE8$_Vw!*1fWI?{O(lNqI(-MRM$m2I%`RTddah zeh)1!BVEP!?P!oAR$L>coaf>dWMjy6#ji*ikg`q6qsH+l86c%c?-wOAsNPV^^->!4 z3?*%Pz0{;^<90>tr6RldDSIva@X# z%J^A$M3akL=kn!6Bczm>@7^RS)6TB3K3U2%Dc2jvre2O`{XSy+FJ|dJS>JtfpLL(i zQ}@ZWyg_Ey2ASO(o|bYmSXFT)zV9{!-_l2e%xsPOq?GgBD95rLDMq()jnUDpdA}BBe`8xz4X1 zE~VVHt(M$Zkb(GqgiH*W{-Ysnybjt5#sj~a7&j5Ghnu~kiOR}*O6w~(`LCx_}NS;DGTI>b zs~aSvr=pF0dP?r8o(kwQ|5I%SJ@jo<=%+&7R`FuvX}$hTxZeJmNP})HA8OEt8T2^) z+u(CU^*hq%Z3f+9&`tW7d|ol=u^~Fv|GdYbZ`A8Ar0K_J^I^UI>tTBPU)SmNEh|Fo zQGap0LEoX*w|eyTZ>`YB(^j9J@75ZFUZvMxGVGUL+HcVJ>Gf^HC?5Ozvn?e=7v*UN zeS+cn6oWoP&+p}p2EARczgnVye*0#<{+rPT{S||L)S#cy_jgByUjOYN$`SqgCjIlj z-C@uTx^FCy-Ba{u^4opUjKc(UT?`Y z=*#qaD;RV``){Ne^!WyTkwIT>&{yj9{R0eonL+OkSx<0$-wb_&7P?`6-}D*uijXxz z=#BdMe{+d`{@y&H&mV0A_2q5D4CO-&dV)cpV9?hZ^doxzcxyN!$C6_97?iQ~^clu% zJb|rY_=aZ=H%HLtVUb{QW%3r+WWJXN_-?#oVV|;3(XNx>>cPHXUx32CVqbyEQ+X?+22}~iKK&QT7XAs!7}WnpvV|W31+{j>7Jd|QA98hQ zdqduVeGOpg`y^ZV2Pk)={vpYh5X%$N@~_hJq_q4B?AU^eEs`kkZZYc5jCCF+Cs$nb)7K^`K0gWt<#lccGgUy8dnUCEK%ULC>gca-_ zRs%m~E7%Hnm^HD-VHNwFeGaSH*X(O}l>L+a6V?dVfX5VzVu7`aOL4(EB|?dSCS`y! z03KH^Q7(Zel*^UN;it+~%2lvlNlkQ&y_Bc$6m>A4z$d7e@id;MdiZ2MS^WVoKn#Nz{ud3<%H~cqh2LCPptvZSC7xPX7W9Jk2;0_p8sCG znYZ#*HH*K&-%zLWH~E`tHh+u1rRMO1{Ggi4|G@vC-ooGE@2GkFkNl5nKK~Q{lUl&v zNNfr{ulK&{we=doz6RXr+Pd8f`6gT;9v4D)jRmt{2O&9I&Tr>FQClD z*co;Pvsf?d#q42L%wVE8GgF*dD9)_PKxH7-g8@8_$H7H> zI3EtNJf6qH#e4)G0RwpgPk>AKwftHb#INJm!KM6qemxB4H}D(aGCrP<2M@oI-v~e8 zH}RV=TZ#O8IpyD>d=j4oSMXVU77XLXycn+JrMwj4_#8e5uHruKgWw18^;WkUt1x_)5MKuHz5!hhQv! zgg*k;^GErkkjNk7kHHOm9bX6I_~ZO>7|(yoe~S6~N&Y0<$bZIv2Fd&>{uJEAf6jjn zDf}7!45ad3@L#|L{ycvkCh}kMUqTxH75^2a^DTS}WbmzgD@@|s_%@i#U*WGnCg0At z!xX-Q?|_^6>-=@dqWm|N@?SRQ&b^R>xw8dw`992^xA6U#ZS!~=X4`yz0JCiYKZIHK zR{l0-*=hVRX4%{L5nMB-^LAV_Zs$jF&6vTD^W$&_KgCbMO#T`F3}*4q`R7o`|IYso zck-|JSKyVuFCyNC#!#+(0Da5MX0cg#yOfmzGrekh6%@;VTmBnVTe2+~^Y8`lyWoBQ zZtojN-nU%rwph94y$e2e!N>khKE}-R#Lv3e5M_*-VVVwubU$C3N8f%}0xMuOJONL` z3$PV-z;4(NZ^Kde06u{)aAyz9!d&(`Q0>+B8v8<$wf03M=iBckxxjuO${M`-AMVZr zPKsjN|EH>Yre?Z(hMr+rmLMV`k|c{HQG$qoNK$bnNl@gXqQZg!P((xpMAAia&LSX^ zB%_Fmh>4s81QbL}h)8&+e%mb5SKqtd>;Jyb`~2z8R5j<@UEOu6>YNJQy?qdOkM>2} zBl-;Dtmw0ddq$t*xCbBCA945S^N4#y2O!RhzJR!A^hJ)d__&u4caOe|xJPsl;;iUk z#66?0aNLuR8;ZDl^i{+?qQekpMTaBq8GV{0(|#QGhz{g1D>{V3o_|3jBa!pRqoaJy z{N2~gXkRn0`I;HyYi6vkk#W98#`_wX=xbz>uaU{VMyB{0nd)m~y04KLzD8#H8kyy5 zWVWx7xxPl``5JlM*T{TdBX9T`8NvB~JURgynFfu_`HLD^=xbz=uaP%>jV$&x@|LfW zrM^a%`E_NvubCCTX5RKSv(nehDql0JeT}T~HL}jv$a-HR8+?tt<7?zyUnB4N8rkG) z1*VWuaU#PM!xbja>UojQC}mQ zp^;CaksYWh2mhi*zV?8u`}O$alU*PWc-7-q*+vzD9oZHS)8sk<-3L ze(^PO#@EQNzD9oYHFD0^$a!BQ7yP<%FWoyP3*_yFswq|VQYvvd8mL}WFR7Q+AT^jr!3WhL zby$6+j;N#RnEG0MqmHW+>RWYEeWy;T@6`|LNA;8XIaZm|HD7+)$`_a*Cy-DLRYQ`h zv1&|8byS_mQk~U<6jqO^$0(w@tL_w4St^Tr0=-l(a@5o6X>wH`^&G|2ST&Y%s}<@a zN{d||yPk@3>iNmG3RzcLg{`ZsBG%PbQR^D3n02km8Q;;*M3?3!{P!J*UNnr)$$lPF zsIECjTzc;MB{Mgrx-uav&ANh*E^ZYkC*{2C>!w+GtaK}{m0{(x@>>P0f_&`d>q;t0 zxvZp>+X`7>OIr~uYT1@!xmL`ITe&RHGV@&)=dCZ_q&Y`gH(E8xwQ5eKnCxlC86x7A9uO08CF)LOMptydfVsEm>N3^jvOpQUDT>hsh*PW=tV)|v0{ zBTn7q!=zq@(*7{jKgb7kcF@ZWN&`^4nTlsJrK_h7t{hdbBYux6?(l{DV6KEn$qRDIlrqVQ;PBUmG&7#?C59ZQ5 zdY$Ie8?=BHvUPZq7Smg_gqG4WTF$oOZCXjIXf>^&wX}{c#|CQ)WwQ|mM9bL$Iho3-8AVePbbS-Y)0)?RC$wck2meQ6!E4q1n-udE~1QR|rX zwe^j4+&W=>Yn`;dvrbvxTR&JoT0dDoTc@pGtTWcH)>-Q}>zsAox?o)l#X?D4OV`$Q zbX|RmuBUI+_4RGKfo`Z9>DzT4U_C^?qKE2N^)NkLkI*CaDE)UmTEC{p=&^d7 z9z`c1u9zonPxrFxlOu2<-{^-8@;uhwhy zTD?xM*BkUZ`dz(Izo$3p_w@(*L%mslq_^mg^;Z3f{#1XaKi6OAZF;-jp?B(Cdbi%A z_v(FmzdoS9)Cct;eOP~`kLaWNnEqOSqmSzo`dfWcf2U9B@AVJ*jQ&-h)xYU;`n(lxdyO;fh{iOYr{j}ZN?qm10pRu2{``ORg{q5)N z0nSs-(@t-vkJHzA#(CE1=RD{1cLq2wI0Ky*otK=Kok7lEXNdEPGt?R5jCIC2#TFuI~$yL zoOhj#&Iiti&PUD`=VNE9^NI7R^O^Ix^M$j`+2MTY9CE&LjyhjE$DQw-@0}l=pPZka z)6Or>8Ru8$tn-_5&bi=RbjcO2bd_tlAvf$Eau2&-xkuci?lJdk_Z#=Pd&2$JJ?Vbu zo^ro;e{g?ve{z3zPrJXkXWU=iv+i&1IrqGK!Mzxxn25=kidnHxEF9CZNGuw&V@}MC z#bWVTu9z1~#FDYxv9ws8Sb8jPEF+dLmOoY?Rxox&tWfOASmD@Ju_CdnV?|@v#EQkn z#>U0Q$0o!k#wNuk$EL)l#-_!l$7aN4#%AFi4b1Ihj=is(DA|Hy=6teh!2bOUbvyg_ z?J85qv(vYxC*O5P@KBTL>P@1T%9+#XJ0d#SyYw0+7x#p%h=EcwZQ$j1gf?El~X z^?%318Xx*+{pNqtXC`$=eZTIkpJESWJm)R!GaJF1HD%8<1b(t5`$*#*vwdTyKj{^p z{wMzM_ulX|_Jfx_V5)cdzvc~dydC?+(d@rdy<#oyeVD#ScF)7~JDPCM<5$1m@f-I# z?%^KC1oSy3a*yLI*V?$wRx>E?FR+&pf&o7c^7^SSxm0&YR~ z3b&AZrCZp&$}Qqv?G|;faf`Xvy2afRZplCNw$J~uzl}%ZcHH?>UU%N_{cb#0+>0lE z=Y8LEHaYLZ2XFpEFT9=o@J?s9v)9>w$sfnl<9Xqezx{8$^7-HU<#&@E1#u-@;Uhey)0jpgK4OIMGm8pSUrDD zj*@KcF~*;uT$QAWDov%)43(}*(o9uam8F%cysArU;qgC*$NvJOw{6xG>tp&Rln5n6 zM(A|tw8$4a6FMXEht7u1iUMI)mnay1F+5mY5q>2+Toeh9438GqhR1}*i&Ei<;W?sg zcwTt1s2*Mt-YDvZH-)!~JHy+;+eDl2&hT#0HoP~yU$p1G!Fkase3APG{d8qrT`bc# z>YK#sNQp>Eu`W_NQdVq;l#i4b??oy{DvM2#s*$SVeY1WhK8V~LxmkP|sU4{;Hb?43 z>WPmcw?%FfTOy4jjl{>17LgWWYvk_8-Qtr-W+YR58fhEpAU=yc7b$z3 zF8&pn#aXc=W*RL2FC3V;AM=egg>S7{thsABr=`g}VP^iMVuZC?jr)l;c(bV{Zw|g| z;Zj51;&eA}x$s`NM{RgZ;?HU3rp~;j;d=vl@TGyg`2J`|yi(JB^0^COPlliMI zg{Tmoj=7TCYZEHW?KP9G!m4f&wA8CbSJ9P<;(3{CL=TZg#YAt>o7?n@;zcSUUKWF> zq!=oOQfY3%W9T|=!EaF+u~w|3vSOonkIIY9Vl!35d{ZT~yvpcPH^8h@8_YVj%b5#% z$Da#(nKgBVEHBG*kFm0>%ss}svM%=+8^{LSV{9gyagXtCc{hc)-_wc0+~fHh_ZSD- zFHyuEWDh0Bo?*`<&z^11qolpf-cITEPJ0h!*!%56+?RgZ=|fks_vlYWjc1?|>=`Cg zDPzZ}EZgyIRGzK(Ub>zw_@b6dHKN4%OK86p+H`$%Bp;<$&r#xcr0TfMBDe(#ThYdvA|Y6#24|uNd-` zM84AO`St?N(IR^Z6+j*fBacPwqxRQS%sy^^$N4fjErXnvM@}mvr&ZYhP3Q5!Ode(2 zgk0Z(T;Ixm=`=NDuXG;oDM;>}ckx$W8*%&Wh#UcoAB7w94GEexe6xpy#+`myhS zi$=1q-$~<9_RCQA%h`i}Nh?qT-bM{ri5jp9HDEPrz#7zmwd~oyqjl`vzo+%=;m^|s z_UB>R!hM_!+Q$8wt7sqhWlGW!?v*%nl6w~k`ksA!ejWj^CFBv(qeNx|Nwv)^12r|z zZBcXQDfbrI!slEeUcT-$T$#>$Bkg0V|Qc7gV1HaKQX)gltg}7N zp2k;RYj5J(_N!y+FH14b&c2_ka3|&bt4qF0{(r}^@|k&HhX{5^zz!+cAq#dW1UnRl z9n!Ev5!j(9?2rvRmZ`~|EcqNJ!Gs)^d7foR5e*IslH-9#4mUHXZEVu%>QSDhqghSSD7B zcf*1I;!O5_CT=aNB5ot9 zA#N+GBW@>ZAihuBh`7C|iMWHf32{emc_!{8Y9YQ~)J6QDsE4>KdsGuYBJMyuCY0C2 z){;;L;-#T{CXW9WaZDV_9dS(@o)(EAUKNQWULDDW_?kU8 z*h$>L;r*hvdGT}pl+{WRfq5+4GiAEfb5w~+VPBi9lY$%N+ z=Qj_BOG9SncELE~r|&7;M%lKt3b`keOg$o~YlZGSzFYf=W+^`cyt%8AON2G^$gqA^>@ zOwRZHoQpZEtud^jxrkefd5GJH*AcfB^AWcbZy;_j79j2*79ze+EJEB-yotDzSd94o z$Q35$JQgxB=kiL#t0ILFuZ~=W_ z%0+mpTftTkTDr((J|kk@bZU$@j*3$S_Qn28lt@Qi%ZoaaVXkdb5&3fVI-8ngY%klB zQK%~j%?g?D*g}!W!($o&OC(`mj8Ds;7sZ|8ZgG!jDee-T?Uw9)GPCD}We?d$4wNI+ z7&Tihil2+0k6(yiWcxbao?uV3C)tzjDfU!*8r#}+_Ii7R{f_;vz0rQp-W2~OF5@b0 z#Y6FMT*o8!ul8B{H~XA@-o9X8blB`UuR6n=;m!zWq%+F-yEEE(%~|3sb(T5HofXd8 z&Pr#Mv)b9^>~Z!v2b_b>Vdsc*%=yMS;hc0%IX}4Cjkr{*AuXiiEH@H>Ys%|y6x?97&(XHv;E}4cZ@Dlk z^Dy_CPz%*UN`~JEFQDAvrQxNNC;UtJ7w)(5ScUSMu?qKZ_Ss)i2{U5h-poMu-PL%^ z{ub5Y(e_Si#3SzAbUS=^WBBen;Jcf^cQ=LaZU*1o9KO2+eD|I3-FLxv-wog0(z(E1 z`ySV0Pu(7#x;H%aGx0p^sh{I~uAt06Q=`)A?umPEQ-P%+rwf_HOE&lK8V|Lx)6cVT@gqosI!eNwW z9Oan{<>{e36DZFl$}>00GY#dL2j!WL^303!%s_eOLwV-sa?48v{JVET@szudbAQus zv8K(Vo)YS*Ks}YHr^-Lq4dqHXH-md6Iac6OONZq^D?m*Sp(bn8WcDH4gUa(~=SMv= z_LkWz&9R2DhLq-yEAXa^w~nM|-c=P|JM`u*%T=sW4$1BVnx1?M^!8ryB{lG-#K3^e z^b{YaDFGQUa(Ln8sdX z%I!7xnp2v0uXiuy@!EQADIKXaGH-hPlT^%^HqLZ#&dms~Go>L{%5S)YPVw(F?##VIm>qO??8%th$<~tvTQy7Hs#1InI*GSnw-YcIs99YLxf^~B8PGu$&pD}U!G*? z)BOAC@{w`}|L*5~99t0AYQpjgvS7;5<~rQRQ~6b4RqXQd;)EQ=g#EFSDuQ>1 zE}hYfe__tUF?rJL=bW)Za=D9>Ot#8xW_`Xp|yb%(At7UsJ@O6zK$(a-TyE zlEVUbIe_KESl(SeDW6HXWy~m>K`JO}@>rpXKPtP+XW6yB5!ZUtI!buWxQ>_f?)2`Y(q5+5nyy0|Den#P z22lmn_w1I;eP;4P(NL7nY-d`$*WT;I?d1Wl3x&O|-b36{9`PO}*L&Q1oN{?RyeG)> zp7NgJR@d9>OX=RTUO&p`_4oQy0dIgekP3Q(y-{?fH^!UB_c4R*hIy`E@*Ulo(?S#e z70tB-t`)+yqPUiWYsGP`T)0*mu9XMZ%D}bq<5~s0m%W#%P|g*n;)-*rqW3yqw`z_R z<7*&nhVXWTEfMz1eyfh{g8AR|9H!@3yuoC##p~)d05e~#D2Xo;q*Gz=J2+R(vDYML zuPOGpK6!P$da!ima83pl_0KU@^wL?E_nZ3rDQAi)xRZ+18Gq>hNqUCs$sit8j;0Ab z9+*Q5XbF!9*3%~1!efG6bbyZVsNe@WLl=a_<4aGZi-J7DEH283N<7BAS=1Aac$C?a z`_~=+ah%B(Q*sTuhQn*TVjLFpuI2DrkFB>?oGqzjds>pik{*w?yi%s-c%|8DO7A*Y z*fJiExxBI-kGZ^Z9*?=a@*a=5yb2zVxx9)VkGZ@`tYKq|dCcWi_IS+Y-QZQ>u!>ie z!>VkfrB}_X&S7Pc1&*QP1cdN%^H?O|OV>j_pAhNd2Rjkv8yZZXYJKbxDf<60`Y z-DGl@$sR(opJ>HlD{e#5Yt3y)a!VS-p=qxo6is=9AOmgj)ttGEf2~F4M`>fk;I-jC z!Sg1JRYKj%A`#D|=Xj)7j7u?-t;P^BORN`r#A)gAh`+9EBYUx}ohLWS1M;j&Q)N_r z)m}ZVMyUnr19ezkurjO)RwL_v>ltf|wbM482zG(mzGwk>Ziskyeqc$k51~$UBjJ zku%X`v~;vyv|aSc=!oe2=%(nw=s7#xE@wBeJ8} zFLkT)jcd7uxX0Pd?dlG2C%G%!&)t);NUTV#M(nOwx7bUuX|dI@9kCy{=UFU%^QAEu z>z=h_!dNAQ6)zD}msx*B4MBLBTmy0q$Ta~{45S!Hu>iRiHfs_l7 z@*w3w$_GdVkP09b0;D2HMUaXCQVFCINTmR|9^`tE>jR`RNM(@90dfP#4InoJNEMJO zAXNgSDo9n3ssU0Bq#8)I0I3dA9i)1I)BvdgQX@ca1i2C9#sH}aQWKg zax=)yAU6j{Es$CuwF0CzNNteX0a6E~4oIBsxNTUF04AK~+aey=fX$I0P zK$?Rz2WcK4cY@pra%X^Kf@FeZ2FSf2_k!FTAgw@JfwT&c)*!7xS_ep5khUOg1Ed{D zJCJq((jKHeNc#Zk0MY@ZLx6My=?Ky>KsteR0_hYW4}d%X@<4!e2I&maIY1r+c@X5m z0O?5#0C@`JDUhcEq&G-!klq2(2c!>3 zp8)9#(ify}fIJKGEXcC~(hsB`NWTE-57Hl`e}D`C82~aMKwbcO0px`M83-~EWMF^{ z0vQA{C_n~-3E)MW@xg1cLA3l`kngFA$advJGmcXz$G zYl6$g-Cc5Vy*MvV)y&jqF%8Te$4!e#*=r1%xGMiDdk5u$h=GN;cb%p-d7?NDr} zCL|+WpFD{7UE-IZO$0~0;~$}};YJ9_>|sVe$HS6+`D{WyA{rk8)qr4vK2rF}19M*? zo(tN9bp#MU2z8A!qDy8EJAx5UMJD&zglLe~6fLOW0S&@ttnf*IEk#J_^%cSRS@a(+06#19!-e2yt)|zvW3&IbV>O;b zWB<`=JXFdW=O(4B3PNe_JB`oC$Pi_vm#3=0SSWs)kJg!aosY>ae&Yz1H1>rt_C7Vr zjuVK!s}H*py#IbUOfvMk+`mlfh4*fZ>hx*3pa0Xn-BI^ieV8`XsZp2x;dnPMBYE_S zJ_be7Y(2?x`s$4Fn~!3FM^o^RnX;y^*l9Z6@x41|Wvyha6$tAPi0y&$r9RyO&-{?U zd(>T;x>%Z8l#Nb;4L^}CJ{_x!LZXq2(%jx`at zcSd6?H&sk){7_;xN9rL_Go!gZ&way*eupKw_Yl3x<_1yiVVkLKiC+>XnTBE@M!>y|5v3o=4Cqx_dk`cw?wS6Ie>ff~OHwxO z@V5R+WvN%1lnCw4kWP7r*Cy7fM@dWbh~YDXkB=tO$^gG?S7|7&zjLasBT z8r1n$I-;AIYO7IQoI?R*iZZFyiZB8gMs^kR$fh3=L5ij42DDSvB&~ISYbzcN{5^G{ zXBCq09B3j(7qLD`qu2^`p}C>@Ypzl}K0x%g#XLWSLw7CHH{Wyjrka{o%t9`4|FuaD zOD;hul#|xZrO+a>DdoE#m;Z2p_fQ6whkg@H(a9X$EL&f)H|FMv7-{jyTnL?Z(TPIO z{JdhdkNaJ_H#Rs~61k6yc;f3oZ&M3H`_M;j@9UYNC}}%o{YRv34;#YkC&y<*r)Q5; zRiddLu3ZHaKf=3G$BmZu3DC9%SCl~c36rrB&dr*HSZ}G6gXW>-T|5MBNp!bmCFo+< zo#NlbAwgx=;3;c=+r^c$2P}Q(fwKhn$?UpdSQMJ8BzMqVQ8}KyWSX|g8-7fQ5ob@) zmYL++!JQ+b;Zod`;ZBCS+Nk+U<1P5NCvqF=0hs{qzl^dUNV6sO1iA(c)&5#qtd_94 zY5$rFPhAq{Y|^|UB)Uv%Z)2|yp0ze7`Q4~cVD<@Du9t9iy_Fnfta6pA$WkLDpg#2K zaIhK&%Ke*!=Td~YC zE4_0T%99YrqBqpr8ifki?UtksBp985-KK`N;2|eJ_Gn&Gk({`~$+b<2nz+gLq$5^l z0bQfMQiKcPWf&u;;8wEgsCX}Biilnl<!?Nr>(^!VK?LzJ;&*b6<&=9WWPk*;<_u-1QM zf<|?7<0J9FJNU~>Po=WranYYgl=CO@vV1xu5q4AV$5s)xyH|dTJ?k3^!z_Z5Wtv<9 zT}z+(PXkz|Vm+J)^B9TAww*k9FNDv(z08h(}JeP6fHp)DtPJcqe|o~HM4^FPo%Ypw(MiS z^)Rz!T&h_neobIOT5TRLTpa(AP(f;DF~>2+F&QxIX*8%cs5EF6zb#TPQZ3T___it2 zm9?Di&Qecz<+~d+5Ee0kcoTyW@=U%S7xOH0Gv~p0pQ^48k2xsEj2f$Ih9JL~U*n{IzJm2F>ZGLn*l<82c4&8QKB(LBF&Lr@}bH`P2Z z$SBC@m-^glrO~l}egMT_{*i?T7B2aAL)|%d(gLXadly;ia>hp650Z!3bmBdVinmcjLKsA64@%Y%4+(uu_~m>!ZGy} zHB)s7b#*2+xerW?hV8sM*lIyTzi74MzA^}0&Ra2A83nHFsGtK&a;k3>mwupYLOZl4 z)LP4R7rLzqI+QkQNq$DnTium=$^(_BDty2mcY#k;p32?D1{&`_-__n#!WOtbFiPS+ zWwDhP%~fK&ieWBYcBNS-I2|Q+CAud+p6Gp=?W$^L&YZnAMBL_coXs~R+!i*RSp+oV z=JxJj1eBF#lkRLiQ{R$$^jVe|J^u{c*s(??kc6%bWtli5lX z@KqO+*|FuRR9EKNnzcj!ImQ!B7#vBC5>OZ>!B>>CRdTBX5 z0PfI`e_Ah46BHUK+t1;-$miq}x&%(<<9BqPE8aNU`v^=daUEAQNpBS#O^bN~4V0?x zm|wAgrV}2aiz0@LCr5XDO(K3JO4A0ao5^Kd3KB_j_8Ii0^xgfdY5|Ai*w^UoDm95! z2SwMc?UFSqapNwI-QJ>h@q`D|?b>z;x(7en^+{5;#*MGxAB*^zl}3`Tllhta63zFu zJrr{_i-`^@m93fxiiRp3thzGusVily8ahNKlp*EqR=JtAIOW)2^kx;HO6R<)Rja3t zK+y+62CRKo|0GVVl2ofxM1t6YtAjWAnU6x%{IFb z>uWB}-MK(-{N$RAalzbBgUt8e{uU68n=iVx5RlxeHV5n80-vC|rCAo`ZiAl4yVZoM z)Ba6(Mr~HMW_+d|#o`#H9z;+bDbD1ejA2{F+r!_2q28GPe($4R5l<@O3&+NQoS_~9 zvJx}9BX2y6*XA3kW;gi(!$ydd5fcES7B-tB%gTr!UYAz**0wnGwVQ`Nr}KcwBQBfa zX!OxBpi7fE88Z5O0PUEd$Cx$(e<0>q;jP7-G=4CAjp-rnOD~j^H_o!3cFpkE{1Wz3 z_L9cW_<_|OqrB!@Fhu?d{QT(_<<9&L*QHgbl0;*C-mzKR zR#|u9N3;IfPo>HnFmbbCch#oqMHT9N7})nt%WxdYs|lHYz568a6LmJ|4oyI9tGMjnKhMw}si}pVTkme+ zttwpb^X=;sV)tM9xvX0mAKAP`p?~a8)q-VHglw@63L^sM)H7=A3-4tA>I_k8 zzSN*TQ3ifhBAhhv^IZKJ=^ZwJ3^4h$`cq3A*cwG`W$zY7jVvnMT&+i-EMI(?jE zz0YU`r*_)@*bTe|nczW;=pS@Ux95H)!k%HFvLBH_MaoIH(S z_XuGeG;%iT)j-(=V~AHldD2Q%?Rb9O`<{z9TuJA=qi^lewlBixbY3Eq#~1zV2(n-w zGO(9Q-P4N2eBkv`=i+BioVaT8fLVj(J7Wyg-_J~#NiCbXmX{w_wOApZYk+6+Dq_+k z;Iof4E^_h?Uqh-tI(%$wJ(nbCIEK@9HaF}{^ZecQkXU0nrITW3YG@c{mi1JM{W{oW zY#{I}^iqpq0;w@|@#*GOqKRr5ze`jJR%%vm_N4B*d8Q*((uG>(L?^PBA8OwM-I5#> zgID1CjcZGbL4XyhTl;CF!i9>vW)9Tw+_DDL|+_PiI@keo}>28d+sU z$qj+GZo&oM1UzJuGF}SD=!Z8d(e@P@HjT*_FAu>a@Hj3gHNJK+JOQ_}?37-;3ub7dq z^K*labRNly4?CzsM4(J%`n$!x8e%5gM7EVkRDOvnr3L0~)6~9lCUL~kB&G6ltT^nc zwAys6IGyV^!7wR`saWv<;)vNZ>10!>LV)WxdONAY#1CgHRoL+1?4=4L<6RU+R!cbJ z+iX`vadW-&6+M*Lxo09*9jkVtLptgu1gCJ+Dcn=fim6Un)Jq$xQzrJfi*9Tr>ApCT zq)|F)JyDs;M5d{ms>{Z6nkr2G1CMl6qavF?FNO_OH;*>NbVWAr5_lj(7DZh5rCI&& zv_x~osGfiGB+g{qAp~iSbJLU4Fa}DCMyGP38piC&APzOTshAJdMOl#Tj{Y91VVQFu zO18G3pL&7W7#gXgw}n1`&dRW|E9}kE1vz?-_f82Z2*HhtG|aKnVqrOY!m`CHUi?A* zT&5I{ZNl>!2oPaCS|#HeajE%I38$WO5=0=PQ0DiOrhM<~S1|ltj)=%!egr$W1qp{+ zC>$_wak4I7b9xQm$ivjj9MS2bI%l2Fng%k-8s%l4;dFsz?^On~#z(cfsr{wRZSnJY zPWLy+WXGRwC;N52lGV)&P4tKq(dHpp8(4k!Cli?_f{4iM3^NAKA7Vy5u}2m=jenpjO<4I$*nIQE6VctM z`2a&}#rXC|@xyqXpC56|RX@d=r`w&n*}^gfD4$v7{@uHFxF}%y**C0i35-7##iD)W1hiwYP zd6f)fV%*)^-)8%5H#llRg`@U&1y=2i!?|D8`dG%G@*{MdyI!uDf>4aT(t zMntb<>Zaeiqbs$)DR45ymacBw=>KjJEV9TW_$0t}C+Ut^(ayDzrFhnru_szY zCGOJRWCR~?oKKW0ZfL(K-vU4&tRH z2=c7cV?M(FtueeA*T{?Uqsq9qRk4P0RC@zXzN@g*!)oq^!U%-C4- zH_8ikHOaiCql(W(c4727^9t=aE~=(2#aM@ql`^I)a+DRR6X{ye+{OUp@*$O4`%U%> zWn-s5METT*`H$(DnMxB!^XuN0J;fS_);CI4wmqy{(+h`uPRXm8#)sU^>}4Q2u*qa* zR;{@X6CMx7auL-n(OR{F2QgKkdMDhKyy6K{BWt z!+CxE}cBstmt9%2Y+mJANlsNYRQ+C2{XQX6ky^>Q_RLucy%(!!gPu_r&e#%u zR8js`<(n`w%<#mdHSF7R4!M1{*?_-2DyQUiL|e=nHy9Uma%Jn-lv37g(iTbe!I^pF z=oLVvd5eR6=>(3=OvRy|2?pW{+>VSTrGGq_&>lm7`}&18+7rG@{(fN)rc@9)JT*}b zHy({4lAE=pZ}Z6F`ODq|Jz^V9)W=RpE_DeSau_-ulX1jw=Dw~@fBj}azso7XR&{Kj zaD-#}fs<{k&1R@^q>HNuStTHG+ZC7hReoR7VO7rtHndbuv}kB9P!$=d)+T)r0_=Zh z`>1v+{A-aF4OxH(LyxJzCj;+tl+BmV8=P^lmsWVQjO}74jn3$Sa!w#%K7)#-fEH?L zgJ(zX4b3rNh@Nw0gmU z(1!7viQesIR0-awuee^>X%9tX@hb9an~ySH&4eIEcdJG*Z;Wgh(`-=KqE2Q^t1aD$P}wcQ0jiQDX9xZ1{{TU!^tWHd0Y zrKxx*|J~p`zP5pdzfx~82&0l-U886oI4?c1^HRV-*~sZEK-p++iAUKuWs&_P>LZf0 zpa~}I5Fu8UvIqrw&rN`_4zb+fEiRY0@n zAMbZAM;etNOJX2E+#0Lg2FOJhR**;nC&z^do~+i%ANfrIAjHeQJ%N%KtO`;4^mXt&L@WI`eYl<%HGgxMu#|9m^*z*{Up zGEN=8or(tg7Ok!C0Q#|>6^b`!^&kh;(8^kZI{TJ3x-V@N>EJ#sI(V&$wtoZGOhy`+ zDMV+QSqo}2@0&tHPAj7)H7{X+oPn1PV+VtnyU{^(R$Zgm{-3HuN2CWU3K9wqp3!PU zQYCEclJvTT&joP9(xzXE0gz*t$b>PRs)K6yYM~Bf>!8q6r@)}UB9-{rsFSKd=eIbk zcS4Vca>oRZ1XFs@+99#*-Ke?3otSU97xJ?an=h151iw%S70JUYSMtG)U-HCfr80mo zx=@tk3xMAju{UTdVeB#IbyDr`1=!7W<@VM~xn^tWLMi1n;Ek;E-v`g|r>$SQP&D|V z=w(K2iS$TmD2wBbdo*Q#86YrVcCUXh&j+@ytWs=wAJ+U~zz@qg z$a9*%o_gH;u)o(+JhqP?5QG<}Nu4*IBgS_vEszk`N0&m=i_;gaw|1FSKf8wB`S*d8 z5AtZ!8muP=YzHS_C9Cd@&!7EtoV3_IeN1TMwd^XQC@#K%rjYDRJCsd)Wubd{h%+q; zc3Ibtu=VZwKf9dp_tI$xXD#Qq?B(&SdT78;!8@qC>Xz!67(d~q#fkGjRU*oiQ=9xD zO@>FR{1m6R<;<#&mjn4Oa7^G7rTrIg<0-=75_3z!X%m`hlx!iHnUK6M zbSxA+rtyQa6p|s9=QN2v%rmU%z1a&glKsAwc~Rm2IFn7i6T;Ivovr&V@hY(!>$+Ps zx6pa&hK+8VVUR~0aguyYMW(heO|GPS%)o{IWC6gKd%|>;s$yy%(;8ao73XzQ9@;vy zCf?k|w24x=cXoaMQ|to!0^91m`>ub(`9D(YLaG}mCNYy%cr2CQ_An>CG^&+1oe zv~0PmTi`X;zEpMNxPjnSGMg5tESN;Bi7kL%fXE`&d-=^q1}8|7Va(LRxR^KW?OpcF z_F{^483}B#OK?lORe74CA#9PZ;VokQ+r!%xV}GRlp%md`Q-BG91>wo>eNd3f$Pl9qB`|Mb`54-M`f|t!Xw#%5E7IQPU z+tqEfO`?+{ z$~~GQer=T8^{GbQQ4e99Ykye~W?rQ~4W{qH6Vdh-|E(G1&UN&C!qJ|dDX!hAu;@M9 z^{sLflhOjEjTbSUnK-=$(fY5V6)OMw2H_4OP(y=Fwe?nE^p=OQ%dD@Z)y2}y zi86d|_f{_N6Z8wze1vIBGVgQM>%#ot*0z0q&6}$qpUVyEV_13TJL)^>eE%*q7!h@j zu&b3j5|3|TH(Z~oZfaPepv;5DnQDadB*xLaH9Y{czd8KBUP}_c5Iv+dkhbQbyuxc~mEpj`-0jyMPik|@-cJr?voZq4j84_LGP0N}?<9uYVU2CN06x@(hUld4^c*(W&kni? zMnZqk0az3R`b3bUavt>RA|19;>R({dM<19OL4!u)QT6Rn$L>In29??fd~4O)t|ePe zxk-urI!fMa%tyrMn2Q6*AG}xX7T4SFl5xYwh(yY%jvooVfxkFfwQUcxf2FMsCLAX7 zN{oWDd8O#WDLyL~$DbE?sn7!=#2J;tsR-%^e!nbBlMKfJlRq*fgiLU-=QM0PH5PSE z0PIZJ!}_;>TfOmWNGx4(39>xcH^ffNxR?mfrrc1SZ7akNmUG?}YDW{2nj-U_B0O`1 z&kf?A$~&0x$z|V7k{l@V3`duha2~#O)pR zB)97*{`2o4Iqp5C?5ln%bN)DXgCTtTy=YPvo;}#?E2K$xcypq-j1}Le>~KpDTqYW0 zolUXviu6>-j3aO90d&&~*4#C@&g4K7k?{CRdTNjb59!=>jI9g{-rO~lPNK1nphy(v z+%=t!C@WK55f#zgHLT9Au}+V;oT&V`&~&1E0hPe$TeiViOJnRHtl`1??bJ&i%3hf? zYS?rT;oOnAMTFRwJmR?}sYf9n;xqD_-iVYz&nU}e*l0}-Fgx=r+&N)+?ec5H4`>A> z95{&6VjHcNc)@nTwvoZc^)DVO80(7p5!;A@PR4Z!NN~D=OrVmpzbLZp zpsM$cRZiUk;J%FvOVYdAME^|vmiRZF?lsxzPKq1QgT=79j z*+|Jo&MR7IX#sZytX6_rhoZ|(HbYT-v9kN$FA&ZH<zh1vJC;pZ+}}pfWrmgF!QlPNO1KtC%EKEHh)HC> z8>mX8-1k?BhQj!(~y;C$8U9p{)>KEDXu)`O;WFW93L_E zv-(Y-kZxYH3kRZM{`ME<%GoFL7xv_xyql>Vrh|8Lt7gu(o9@8NOUv>{*?Dh)d2gS2 z?>Xh03GoRoLYEnp@~Biv{U%0m;ej$PFuKD+VP3+zfLRCLWyaHhsg{}Zp1@0?ynP8| zEy>h9X<%-3;BwcYVqF`y6twF8+U4t<;+Eq^tbcX~fm7ABPv^?!EmT>@PuV>$$ASo# zwBgbZ`uwEpA!(i?FV8`dICJKcUK2(OkBMaCApf*$VP6T28`f^@%TPr#Jka~(nKvc`cn0p$?-Dw z8clhMlNjZ>C*NGDJ*(k)t*Bme&^?ReNtsCT>{kU?k-=I;69jFrUNjt#sD#mCuIl5h z>cc$CG~TLoCr66TH7eIL!}+iv`mn(Iups-e1m(bE*B_DLb}BF!wOX3GuMyM*@tUzb zuXG&p+z>4UDa?l{aD*yw1SoI>DKz;j=)npce-b!G5IFuSaEv5y3@gxv;`v+P_SdJ| zUuZS}ZW{oH4S>-GKx6~pw*h>%0Z`ZgFl+#9YXb-!ql8B>W%!%o_7oEXip#oPYu#)A zG%lDgm^&u2_9hCnmrTDBO@on+2@_&PbC zfjHA((P^-nMRS0L*LDc10n9X*Vj2vX-Y2qfq_c3ev~bk6aP+WnG_`P4uyACua3r>H z6tHkK{@qB`wt}>718rx3tZ@EG;rz40Ih?{dio!XR!a1VCIjq9@M?17a4@o9JS|&f9 zH|b{u5+nr@I0X`v^SDnCDbb{1ktBP6YNY^br4VYRAnL8p3O&%7-ZUH@KR6zzruW<9 zSA61EUgK9NMr;sGTE5Qe7&(9sSOb*TL}GVn7VsDU%IA!0tM~~dkFf@3iKwHoT9EEq%d_3yzs><5F;* znB^ZWhd)|Izlz;WC>%TtIm$-7Feq$2E_}kpv)pXq$}6?4-aHNS9;=QjS@4V6y8Onv{c zG4B2&;QF5Ly+gY=OF$gwa32~66d%rl!ikIQ~itN9qSPwv0uiy-Vq?R;+oM! z(AdRDTCH}5scOHeExxmmu)*L>!0@!7@RqH&Go9nQ%s z-13lj65yI+I!{GzMnpwGMM6bH{i1}RgtSE#8L?YfOv^MDd0kjo{;e_`Tv%Gp1de+{pOMwAU2TxGi(yr_45GeED6=#A)&Y8~L))xYX_ z`035qgT57UF^Ia$UAyOb#22QVaZ_|d$untr#9@AalyigS*l(Ai=Rp#ORSuYys2M5ws;sS6 zDzB`dR@7G*S}r0X&ZHdGAWp0t78&VTNMafVR|tiT86!?PhOQuvNza@aVWuY9gaIrs zK4y4Qp<@;Pnm-GO#}hXlKw}Lai%bA~OTjD}w4i4himX=?k;lTUC`@OhR;@{=DE&#r zDhA3Q1C*xAs1Go*MpZ`g4G^m-tW%vAf_JBOr_{vOna)cy0X6Az%>&-7c~((ggWYOU z>on&@9lKChpB6Bq1_6~UBv_~_(DiDJC82-}Eyg_BVrq%7otHdC<2$CWS3NDR8nmm=T8$I{pz2fR zVo#~?)hm6FM|WYbKD(f?MB-KyxodaH=L}Tss)HE~aBEd@uR`5}IwH9Sv*r~(ZZt~I z0lmvg0s~&V0$@5pG}50eAEe^q&D}a~d7lAvhv28kqFvfM%;5pn4viOD-*3;MeQHR2 zLH)8XSbog)3EgR-wX})sBSz1$Yb323zGtj&j3K8>lMXB~z%hIhdx4^puTk&umqU@< zUVe*+D@R~ZX$#gl#Rn*pCEG#Rpby z-wu2xmR)@FG3`uSLv^#JEQV~)He&6#TFcgkiH}j-hNQXOiTV+0S+66X|A5dz zv>_4@7lV08$ZzFKHcJLVsVF!*c0OuzzRbnu&yEYzp1@)ty` z{TE??usVcIC>HwDp80nog#JHZY++XMnh-5ir_J&wM0EXIU~FMm37U|P)$=ujT3~JA zR}nwnt>&~rez(Z8|2=Fs&MoisTRyZfd?3na;?KNT&UnYarxWsvgopiOVTs{+p`0;} zO{NKj%ltPHJTZ@*r}6W-f;K;UG8{8buZdiI>c+gC%J+lnCUizQ7RYxJISQDB*Z<_n zax6M6ksl?J<}Zlwgm$bl9VU_%zznYs^@MOtKfRD27bpnV0q4wgyqA9>yc8q|)dA&9 zc}zV$k*_P<8fXW-fzkneD>I#w-zl8;k&onwsjkHU_uHCvd^KWXq-V%=uoUSS;wFto-5djY!jg>+&97+ECU9*f~XH#+gZ7&Pb1L>~qX>`%l zRM*U@RNc@oTYc_^1VRcSi4en0di&|;3`jD>>B8~?chm7AZIgYow#(c)*5?d(>01<+ zbCYW>)YD%wwi6C1hZx&uKrk;@FUmIeHZeDoHX&UXUJ!So%k#VQn{$ZoF|s@AJQ2ek z$t}t3u;4Yx+YAwR93aMOC4jCv{0*(kpUF&ag}x;eXr8rfeTv)?bO!L!@+USMUZ!&$ zdg~Qn$3VV7?~_9GlW&W@qzR5ge@nh}=%Yc%`}RVz13|nXe}UTR4u+s_Dm{qxLG^uH z)~~Ac!6TAB1y~DFyyR6BNNe)!ups7+ zrdTyMtZLZ;x5{4|=g+mRE*P7gQdha;&N59m>`b);fCd`&RdEYTV@*%m+8TZgYXrZL zo6hUtwB6pMmRg$31Q`}(>o692lr_Xo=&PC+6wfeBUl(OHy~MQj7ixYs(agV}cejIQ zkha0ZC7JBCdUTqj=KGD;JncGfGBpW)sn!QKjyc{UHEDhcg!@saORM&Dxf10mzdqgd zoQ1ie>@wy$BdjL53pGM!@)NU)?sQdj$ACO#D!w|*MVzEUEm0u>M;**&fB1i;X2MU1 z9ZDS<-Gr|S4KT6`vBqUgWmHWFWmtoRvaBbLCR`4`9D3fc9Kzj5I2Aqif)ozr4$W>% zZ?JFFZ#ZvUoTAzn*3I4)y!*Z9yvJYW`R#JQD`nbefE@ThB*`?l1d;4!>V-i1~$zYj%!ln!CXHUW_}v(w@EtH7Ur+11gFGwLF>bPWt8- zf*)=0J8iHKMjt8iju6VuxafnD=mRGC%P87gBifr9!n+58A8+tGMKBBMPVd*9so+b) zkV|v1hgz|RNVK;sv^PBjKf&PI>K#8DQuIl7%L8|6pZAdxzZooJl%IXMeP4eutsy+u zzPUAqy@eLt*S?_SG?GBJADhwbW6PE}HZvdyGttnQsaBpp=fpC7_owqIGf5V1jTOc@ zUW*kKoHB+v1q%MjCs6|g%Jf)PHbH3`#Au06E+-8>^aRTRE zd_0y?NKWZ*bZ_`>ylw(LX1Yw?#;a92F%g-+?qHx(sEAi^!c$1LN%%CM3lBuW>?nI- zMSR=AEJYJN>wC98?+s)_lzWh9upmK!0rcqpJ zuU!~p!QfluDvWV9c()6uBX)gq?pfGXWtP!65eDKp>_61IL3hf1EPqUYY~$H_$!Y)N z-N(p^n;E5bEJ!|)KK#<%zN`ljw0!&PJ@!)6KJUG%XU5E>na(hwe#lE2&;2QSxTtqnp=Bah_3j#^9>r6ex^pQ+LMR5O;IP zIwrTn#V;&7rt*vu1XPb;?hUv*^rq)8UWLmP3d`fD4nPu^y-4l=Up@_#!F4;$3G3Qwnj`v{%%ckNiU9*ZIi0O@$ljspm+D(eiA z5i_DdL{E8s=EJD13U%#5aYn4Dfr~*vy*NMJ8(A)fpBjVY!V_b?JR>f{fNZ@i|9^+` zh=pAxK+Hp<7DF!X$-oz@gZO1K11!P?F!@#DGNS=bZMf#3^{z|3gbnuEfZ-tFuEJHS zyRHrHJC$p=+koN~*Hz6!=Q7nBxog#e#I8@hly|-cUc~FJ7&;ofc*O^DA|h_U$U&S4 zn>32;DuSC@5PNtK(Lu$VS}|g3(DEvT7T2&6TY9(pK}W4zU3#~x9BV7`VgRxm*hb4- zu^@fT^w_e_{4RGMe=(ACZGWx*=+-XxCU&0so_sM1IUsoqSZ9EYzFv#r$HSXc@YAA> z#_bzcg$wO_8! z$xmPl+ZWSVtPvV|WS6@k2XX??t#Cx@ZO(_RCOal`KNQ+&43fll2^f|C+lQE!^r0~8Ar{BXCAzV2i@Pd;8re3mRSvM$Jm-LvG z^i)wf-?9WpA@&NJm!3QnxpY~LG6(sbn&ue0$VMtQkt{l=@}Ag>IF!!Sukb3?lqLbG zS|AMWokZ*$wUam}#tfp4bTdadRviPL`f08*T6Cg&uT5oL*Nt~GxGZ-=H2qgNO3fE9 z@ow&tDzTgzk25RHiN%`&be1;A&2h0b{Gpe6eNhNg8l9;>k@lRC_KwFKctidx$$i|i zMnomwV^#3VRDQc3S2g?C9otfZ_Ep4s)=e4wOct#E$RSjDlP)|8S_#0uz~uM^P6~+} z9ob_b{8DUrZOQiO&4(g>lE6 zcF`nfQPxL+Q(sZ)U9>Iz@{6-sN34*}a8>FlTe)$OOHY-Ripu=C!xxwTGsv;Sc>k^< z+(E<_x={UC{7g;rsJnV>^7iCorv-;p)i~7>WF;PsO{I&j{Cg4G=-kv%xY>?jJ$_}Z zI?LN?qNg5yZ;8Lk9~+(-_9>snAZ5Ajf^IKcWyOERQ%YA7s4lrDg}HFx*kaARE57b0>)PE{s zC;zYNGS5Q$T=1#`Yvrm}wr5^KQ{vOPYzocyStZZdSbiy~TvaI|K1EugMKH_DPIqeu zc64zXZA=>D0X8I&oz)qsx__+{&i02O1ynjhJrsfYSnt?={|K%L{ z3qAiF7uk7)&QkTa7up@EdG@wD1i(s@@Z3RW&9s{S%7-df(W83s7X%^VzN+jsG>ACR zS_8v3xo)gIX7c|kIgQ4eiM&ry7a|MHf1(f>#@Ul%LB*8p3+Th=-#G18dW(~(Q&tB}o> zm1!7OX{I)i5$)p-7KetMT=-@!Bif8Ms67eMoC>6CEuj2RE=1T|D=A?ch$o7nhVleH}7lvzWD}wdp2}oc3%T~A9-zMXyax0=}xE) z+{)|XHC<(Ws2x{t^=|7qbrg3)^B~|`D>Re0&-FTM|48_t`l9(%d(Cyvb!Rx?tM6z1 zWq9vZuvfSs@m2GI@QLs-tIqIn!~FdwxFI zy-`iBFy*2k54*PcEJpN=4xiJ5gEOqb{UqJTbXKm1Ra!});8b3PFJ3_H#O7qWk&^_A z+_WXMuDD}Xt}E@l;c!*wN@t0(>D67vN26o_vAi-Vm%6xX|O2il$hqs#P9 zAHk?3^d;Sp`Zgh8yKMX(R4q1SRfi)}dGl?g~aDKkjk*2Z)XWym13XwxCK z%W>pr+99MR+vrZuHaktf)0kjj#I3}xFJ%&CXTSxC2ceV1eK-+{AdTbKhi3|!z^ORz zPu@oRM3<;i08n%`;UmsNE9DkkZ+Gq!mdU&rYV{JM8RH(S2df7No1OXGV`y)25NKS} zhP%57*DG*Ne95<0`-{KFh#yS^ZU6a_}`RXu8qur zgg2BavP(D5bC_BUp>}>T{%NwOrN(h~&<*#)WA5X&Us`xs_dxkT!t|ckgs;US<1Jl3 zQRHU-#EykmKM^?#8#Bu&nP=_lJW>)G1M!}?R28NBf$3fYoNm#dI1S2%W{ETHyyzhGH&NHoqLVN9ylu#+`lbzR||ur2ltEgnp9pzG2^J zZ*YYeAPiGD=&6|klDQ7V~h7+`yJn%ofy#-L5+q$kxAV>%j+#x`4cW;6xXmEFL zg1ZL~+QA)yySux)yL;mfja}xNYwfkyx_h59&px+q)!Sb`Ivpep3v`jg*C$(aks@6r4QF%_3q2$a&(~}ENE&|VeqP=B zhFK+M!t?!v5eeWIpcSMoDfziq;=tkCnTl!AxiA6@x@sVws29UANLdJFsBju2}JCBNvV*}HanN`5mekf zJvdl)^P=`cpY*6P|C}JjkJ|%|%o!>i_scYE*M7P} zOdZYrdFps^gj5QB7Q=sjir`T}Y*LW|ht6XCEHGG*&`KcSGAfyFs#!YyjOXc~<;tL4 zz{6ybeM8tPz*Yrsku+21Og_uWS7b_lt?D323SBi&G%Yw>VD!~x*D7mFl&pplymtIa zj0NLzV6EJVoKiZeT@%Aft{!G%)V~SiM2y?wsO5g^{$uA5f;Fj@WW9oZmcRyzKOe@4 zY(0*?WXEsJFj9XB^j7QTv&V|wx1FKy%BY(C&h$HD5u29%3DB)1>iITY{hiSOlJ&wH zKl5!3`@+M&P%ekrXm%1II8!VKoqg^UK>(5Qr=4MTvLTGksBG$%(2dbS0cB#+|tie=x9O;|n<>efSyO zb4!(~f9g}t&ZYy%apUfTmNOx$!?g*2^h#%Ax#qd_BC?#R!2EGS#*HZI(Fmk4t$54- z^|YzJ!#USxuF?F4qEN`2Uf!!C$jfd8-GsfseroW(J>ZJL$F{2zH!^HB-FMOICvNyS z8$lK-XV3m568TsAFnk6U{OruW7krw>#aqk{D?`t->{ZZXjOBA4_q9Lntzhoy*}Vsz zcj)Pc@gqGkrGPkjR>{OdKYJh^Mwo$NLB;62!4c~yu5#)0&)!I}$V>8gF$xLCkVQ;o z>?uI5g|yRfe}ZoU4@;<_O5gVgn(&5gtJ6_?oQA!ox_ajZ)CRdGrfKInxze>tuONsB z7-u$J^<@E)47M(79~6jhu2{vi zF63HS8seKz-eb5Jy=A;Cy|ubLxgoxZb7#`e{S}pvl#n)0XKQ1iV<5lz*liqqh*3vz zBFmqmmgYi3!%0Iy!$BiZpgOsp-JG47egAbRYL?1UYBAM;yMX^7ifJ}}Q|dbB!R;RI z;le|#^}f}(^}5xvb)~hd)ozV~ZC08^am1l2o@aiTEp+&Pm({+dVPx5om~&`EicBT@ zr&1)NTW|-N9A@zS>qHY%ZScw_v%`-AD%C3u`wyWRE4hXl$uw7-zKIs!8=I?SxWodq zU^H*LcWqonJnA1e+ihGG<853o511FexN&J=i)~akV={8d8nMV{*nQmgz+@}6#$|Nw z_an?|>zpLgo)?O_oE^StoubKcC%nl2fVa*_d2eS~V7SELDK{muVZO{CwLNsmejx2x zmkW>~FbBr>`^p^nSID#s5O<1KoL}wOFlx4^8PUz-*ZQ<%w#mcJ%R5D-PN>$+uxn2d zC!tKUOH92>lAUr$%at>4l%1Mm_f(xykdd=+8_2C7mLUKxYD~!{nE(qfL8+Ms-({rs zO9`gjrLT|I0dZw(+YAO$3sv8VTB_DYohn+6KJ2=In?_(EWbyscu7;~obz4L(IB7yJ zFY+{{6Asb2*5-{8Q&~yiq|hk{I!=*!WiA7U%#_lc*|K)hE>I)?!0`22m}b!dwv4m6 zo7z-WlF9-*Ku56z_57Iqxb#3z=8`=%~tMASN&zGAqhO`s$yPvJ#;B~{JcHGuuqg}PTW%9`_IcA|x9w<1tC3IcCA)TE!KfVAR`SGIEs|m5X*PZL7aQS4 zzYT%Q(B!X6NdOk3^E>~cI>!7Jwx`CD$Hv>cD)7&&q0_aJ0!w7_nKpXnAx%1R=rqr_X7IA4<#Hmc^=het-A)Z@`H!mzXyuRen z9yBPN`bN?CEba2-#B(xEO?X(_mwSnn%m^T~lljCJ$8OAWEj+0fNl5m(19_{9BhycS z)~S+Pfyk9G`?(h zZLXl9mB|7Ju#^kyt_7H&j2Nr&0Igw3HrZ*8E%ijB&uS_zuTNhxBkp9mRH7eAzshkQ!iuWY#L;BYBFzFtIfI>&_a>)Z|FtWtc&t!?tn#8VrHQG`VMz30_Y1&BBt z0=^gB#W%&P1>5e7I4T7uxefATvOYsY9G&{rQCwq0(sqAI5nkX&bvcx-StJ29XUdu* zxEE1O573y3w2!&f-E^XJO!d^KUMs*gl|@M|-{DWiLh~G%hX-KaL7C?7HmjXy9>?wp z&Z-v!8Lm0BJ#87e1@iJ8!hjlLCK>dvLe@uyB|^BpMn#Cgr+p9KBgM!? z+^ZMqb*9+k-e%Z1m-xiHtrhgBQqQlzz0DugERg+#c2YxVcOjSZl2ggn60>O6|4^}L zM;^31Ps(NAKSE?D6|}raT4&$S#l4M!!m~o^Y}cPkWTzUm+(mk8*Pn>OvqXw&*Y8Jk z!5qXtNxH}T`nLQXqSyJt(fVB58-eVznJ!Pi;wgM#TlO z*l-$(Ym!Hgu&WTj&(qG?&DqroW`5knyV2in&39eiIXNXB)pZtBNWXM86sTAU(=a*!3HR%Dm4gx36`JwY!YV9KZiJ@r=UKU-MesA-#4HD4+oU9?aS~yqo^_GaVP| z0~{GeD0q&6(ugn|$f!N|MJ{|aM`iDBW+Y0gp5vY)pRt~6Xt@<{t@G*T3m$YYNv1ZI zcYfQQ>;7&I!P1&2`g)i&-thG{iTO(qDFUoaAiHJY_%payHID-idif5lnq=8zf)k)P z^6)Y2dt}&nzSwUuo{!_1nLD)>I0fTmHpjgdATp15VD_xk%8xUvLnj1n&0BND%&y!u zOs#m2m5m#F>(}pf(mPM^u$&NID;^YXt~t4SJpO^SrYk1dDNnUJO!lN~9vb%A(IJ1%C0J9mZWMvfdgw^as1J^z#&Bg-x~HkdGms#5kv=Qi zls}+4O&xRen$=Cc1^<>Go)JS|$$FNr@V0+7wuSB?`-&&n#oV*%!Q^)E?WjDVu+`dQ zYfqv)rUKi;H{3WLVrKd(W_u?S($PKBxSOV;wQ?`4C)!GWb-E8sSbO`@v(AYDPxg>K zWS7&3Qa|Y1nB}Ugw3#eZ5aS3@f~J~`{oKxLTfphj_q>QwN!AF1Plk~5yfqjW!&GjW z+xDS*3u8?Bkx`I#Eq;x)&C@IXarlufnm;;8CW+a+H#_PpA5|=t19d;+ywW#L9@mTK z0i4|GR92wmMof+(r{J&0&o?m1i1zPE=$5tabo3WqQ-H)2tmMT>uW*zcRG&=Q<;f&- znl2`p6XYFxj^FCt!l%30O!+m1W`-1JhGj;I5roLfSo5^#K>`nq+3-ir1h-o8E=}H= zKulIS?2m_|duV#BAKUY5CU{r_Kzu!YOz<3;!y#>iM-|)fZn}Y0V*$6AnsbMQ-i3X? ze|-Ri3!bP6+)@R{P z#czZ>#Vj($l)iNk3>b3StnUIiAXXs6TXwQ4pJ+Jg)l8x29f8KTo>RbR>60BZ=c(;X zg~abCh%Fr5^&KiDKDzr)9siU^GyDy#5#rb~bPP3!sbW=n9Aa3`=9`q?8{2TneZk$< zSp30aQmskT7~s|o zB>WYaB7#oT_FZ1YvXB;IVGQ_Q!%$=lx$?>){*=)NprA-iNg)p1R9%9GR|&A-*Xqh*8>#&C{NfYu zjDaSJQqw$SXU)cOD|h63d5Ol9iBYhyZzZr+j6C^H5XneFMEDrjNL$3Wwvr>`2Fpm> zTmIw2LzPH)?6c2$F%1^*`z`9*%3VReailF%IMd_t=1-rdO9hk4{NLsrI7z)!e&gFo zXIvS3?<7!kFZoz`HHhTgr)c{yBON3h$s46pV8SH{R*TtCxg>r_K}_JtDNKy=jQKuc z&Nm*G&o+`f-CR(uNHY@r`i!ZHIpOiC&}21J7-*ujmNHDuVMCE{j`?UNELPVM5GJH7#U58bp(F!70+7^ z>3lrAk#VG?WYYY*df}1v4~iqbjALVNxsJRt*S3-~qHTyx#FKOfR^N1*Qd(G*2ie!y zlr)?gH=fF!+yj9f98KSY#m?(|PaE9e=bGS{+bljv+3dgz+P8FQFvFNWzbS~Rx10O1 zZW;#TJ%+UISt``wSqyL|RU2@6OLxya>yK&*tZ0{-g`OJP@n z#t>iYrd)W{%;vcU05V{UcT}T6w%n<7%;w}$ui&s60+%V}9wzpq30+GhW*0WQ8iZB( z)=A(JinqLIUgk&SmlvIO>h|n)G?gC;GH}vw_@SuZO7~!#RIl?L34KP>JEZ?B9YP377+~clZ*! ziBKJpjSfni)Qc^sKZ3%YN4ir-4P>-XD+1Bz{7yxSq4{CvhaS-$pdQ|kKtun4`JvD* z)K1zi@uTYp`7p`ByEySU*tOtCUop`*zZY~H&O2G9PfdVA*nP*N!hC|ym%cgjTTokX zZeb|lL83Bmep_XJIK?RMUO01Ec$qspC z=%%FMZDfCF*Tc-_{p42(Uc+Cqa@qDclLw=HZ*R$7HzX^a8EZ9Q9Z-<96i_dG%1#W6 zk`V)jYW4Z*W;yU_<&9SXf<{wk1vnYA`)Y*?j~z}`1#U#NT;B?mIot4D)wK$A=GW|U zd;NNf6u_kaFY1Gy>KWMsydKwj`j1O`Zp*iRVEhPKe{&;qhT()_L;UUC9rl^_naGU*idk(G@5fZW z%cP^~S+Rvp`6S0%2eoom1M6~r0}?&Kb&=0dHc;OXxe>VE-XYlF+Pt-)bz#}GZ#Qqp z`C-={|AXyE8IiNxQQa##jvT=iP^&buEByWBbWu`+hywOIu z6S(X7LGgp?jrV5=o$aIgSy4wD)U()`<{Pstvub(vMQVIY%qRJBS_m_gC1yBhh&X{* zd`4J0RK%Nc@riVJRMB)e4tz@dQaTX$tmS^!q1_``CsPAvs)>{M;RtE3D;a-tyfKr3(!lty z@w1dlCI#I@RIFJKlm^R>xFXRa*m2;SY848NLUabK&qZ&?@!w2C#QsG<9$*b2hej}d ziDEIs%5t{cG4Vppl1XFIxfIj2%v`T^w0 zAABqSr7ggL4qW$bgkk>U{8y5rY*eR7c)kO_)T<6zh#r+^c{Jcpo2SONL^`lUT0C7K z1mKS8c|#33Lc3bH+^^c3FdfSpOOr?!Ky^?00dW7|dGir+By_dlw_jyu9aamYZhtk! zG$A)MA=f4<$l$zFt~7`6nV6v9ct5s03otKB@;wjrHCw{cf#2~}hZ97P&$B$W^<{Tu zbnB3Ds2TL)CYwYlBdj(ltZsxgMEOnSjhLvskurQQW8mWIhdh$bZU|@DQr%cjp)qJ$QTa>OMf%wUu=cp)g@9Rdaw6-)sK+18Q z*4(YT_MB(B2=MrOxf7=vA3<>{vRj25)6GE-pru~^)N;NT7l1~QaW8(Lxr9R5vl~|SHDtaPn>YI*fT=pp8zsFD=ge8YhDht5 z(pIi{da!nU5Nl2)8f7@`lOnbZUQJ_X`<~{2i_(Y2jV`g##f{&$t~T6h%S}wRE}G&W zeU6smt7wZ42mx(4co3SdV{l`ncjwt)1H#AFzk6Wst!t$OUE03!LJy=A@Rc~n^$P4e zf9n-NHQ}3A{y}aFkG|7@zN6|6)+>1Lj9;fc)a4zMp*e0&|9E!oxZnW?eIH_TMC__J z`IW3jfKziY%Hyb)0S9k|_?k|!T^nX2oepXi7Tr0VN(Pa2CQMrwut{LEB=+*$2DdOaPe zK=?j6?+7O_*8}Qd3yKg|Jde=d&lUmjQ>28Dq*L1dkeTqLp4^7s#MLupOtxk*W%vA2 zznT_}Q$_L-QGrz)s!I+2b_IsqR7sg6M~HXl(sJGkGHbZ6Xs zFiGMF+OQ$?A-BZ@gk{+xMfA`U25s3w%XWX|#v|#V;l|3_$hyOo?dy|IGEHw!y!!+Q zs5&$EfHU2y;)dVbQS=G&Rx&Ie27`=oG`8Ly$c%ChB19l+om$T2;DPsTW8rh!6UY%3-V9} zbWs*0M7B5>Om&Jale+xjnAH_O*HG>#0B?dZ);VBfut_H4K231_UW8Qp-iEy<3O-l8 z^};rNAm@T6J-9((IM$$)J%=v+{lefL>8j*;lHL8FD&Y&$s{72|(8pIpC8SCrC0Dog z0=cjGpbDkTJ7{LV8{a|fI(Fscm_R4zb)84cswPga%%I24Y0s{vw((+vrofo16-e70 zq^WqJ8{2Un_Ie(xN#FU% zC(sP|ueIAWmb<81onrJqYWHwrA-PR(KVV|*RgqzYV%WyR?_eV zH|(C97x5_SLE-xpE~+q9PxbHd4@Y#PP0;qX6dqG0m1V^c%H4)jnR>iB?q`(SsY^=7 zR`44TYQt>T-8cT49HL9o}ep5*-*6mhqR3$9?tYxNfFF}%Yg?!&m_NfVX& zGDD1}Ag_>Z7`)Jm9KR{3{8(|f)lH+{@){F~PZ5&Q1(A}6OJIhJVF>Rb(@g%?iZ3X- z-7lSj0WykZ-OBa&bDM6<#0CvLcA-1iLkw}6^mS@h70drehSudmdBHAWO6a8xfU!N` zPHXVtX9$xE&VB-595X!dRHk>8pg!C{y1SkCwI*Y!g%jI&?fb)eQ_G)9nam^JwXw{ zXzLK1B}J&cxsNUpuahWs@WOv8o|%Q7u9#Cb(%7)O9BZhK=34)bYYY4JNA4-9M_%i?)#sc|2{3|~+){CC_n2yaj`WSmV1<@hHy zDH+lNhnv6d=(b5#^tm<5fo>1Z0gW@`CArH2T!Da=r99_{E35R?V+?ot8?~c~dBci6 z15gifr~vl=#4tA*K_SR*N$V^*9Vu22C&*N11)krNcmhVlS**fr4Tme*oR3%{1Z480 zV-KWLS(R(qW+qm07REAIP{Ic)#o`z$zegH}(rX$mkPTt0*Y^Lr`+ zh6#t~;KsFb>MA3Z zz&Pcq;-vozs~BV&|8QrdUL4yqgz+L+TV1AxJJVmM-Fj?i89&F)BXjwBaJzZDT$9J* z&J<(%O+KqF3@BOYj7x;mHo5Aw7C=w^)O)g$8obo0#y;TfEE>dPAh9Wa$`QY&8-3@c zlv=;CaB~q1pQ(+3>iV)#US&dZ6#8`b=IGCcM@~Ib{1#pPANTv#kfO8Kbj*LYyU&od zKm;FKldOfMISdN#Rp!-4P4m7e2D@f_jCa_4{w2px{TzxhmuRupj=b6;A2(9ZQ!KOW zd%E6K&#=gJ{&aTO?a{YF0KpLK1lf`hBNlDf6;kTQ{r7FP|7aJ5dkRMeis56Mt7Yr| z9g2AM{2k>D#|LTEgx-oT!G}@bU@xS^KM!rFpIOIq*b!L?o5Sdaa65qJ{BV0Y07(TXb3O*g_$8KCAcZBjP_46dTI2$7xgA-axh&l0s)!y8V*&=pF0MbFzc(>~?A;`?~dH zdz~3Rh=+T)Lm$OzYdxR$VAmip6pwMYK4HrsB9!O<3Z%psu^N=N6(avvz{9vxAG4(p zA@v`C$7oz1v!M_u^_L+yVq+3FzISlNMN?@kfDA{xfqBq}1tq4HD_p@jrZH=pDCdOQ zQKEoPe1!!ZA@3a4XgE!jb%N{Im;YC-RC#cif@utI)-+zuD7B+_0UJJ%6>C^B(pK&n z_Tl|>cWPyQk(OZQ?c*E%?aQF9`UrwK+bv!zt-kQXdRro{(yd;{W$uzba{<{yTMf{Z zTgvQKHF4hcU$H=?zpp=*z>B3|*qVFgI0|XtWIGBq7Qg87>9x|f&9OhCd4)ab!KYI{ zRi3Y@K715}yJzVK33(x182%#hd&J%D`->K=J?5}HvqT5sKR|&i+fhX0e;*((83YQ1 zlD5=68JIzT8I*Ol-aF+aXJV`+>)a-D-$Yf9O+3Ghi8f%yZ`gUjzIHlY82eHP?d^sK zSg8I9S~3OLR-Z9i-(6GwVcyisutM4dqNmO)Ts6U0ENqTP-e5HMTT1t%rRnqAnZwky zHRCe&wYpRI%$2N5n)p1e)@Hi&pkzYF38G%e}%`aH5X`9wJg&6g8& zq(_3H-e|I)lwlEAiq~k88#fy$x}Rggr0ssNC^e_kcr#fOU)thu{MRgHMOY5yZ~q#8 zy9b`&HJa|HW&9K3PShJs57Khn|1u2W$2+V^ZV5^>QNv4J-(}sJT`uLFVwRsvFR6o-ht_v=oQ+ly; zpT|mZA+p}DR!%uMYo5}%vXs4XITE8xitE?tspYdCzUN~;v69FAA}fSd9Db4}OMLjG z&W%FlZ#kgYwY%8+tEh#(dd>@LSLB$3xIkViT;uZh%@Xlmfb{Vn{Xn_RKy?&}Y- z$DXTAB&cqsZa@;p@Hl3brvB^nu1kH3Pg> zDm=_zl$-G?d=4li$WO`z^A<*Yb0kuplV_8A?V{Mn49l}h3=m>Df2;V?LxvTKN&n3T z19nc{h6tAHk1&NPt@vt8d1tLtBHk#Lr~5wr>2S&ajkP3Npsx|9=@iPoG(h)+VA3fR ze<>i#35lWoMk`T3m}3_+DF2_q5kt~4Q%UfT7bp~dDbRAzGq6oaR>M!rnU>_ZJH>=h zD6;1pFo8?hG4sRT+y)oj4;NLnLEP~Cz`rM;M6H_;15 zKHA(-6}IGFj49if41fZtqD}p!sJtS<>32ojbkX)*)(%1zN{1&c9$?4YwT2~Z9f!uH zUqyRbw$n3i+9mlxvNIy4qD?>Dht(}oYo^q5o^dfw=}pVh_uIqj1AA)9=&l-t2eI5T z8|`w3j#E|<#`w0lO~2csMm0{0ear;0lxab@9RXmRECe8he``qx34 zI`NS^X@{R`;O!*?-MV?t7iFX}1LZsWdutcdq8|hztJjL9>2^;_$6Y#t*cYYIA+#wq zirevgS$W&V4HewY1D9`3Z(QowG8zXVmh4RUX3p~awkv67wN0yA`#Gn>6lDPU!1*~e$NJV7Di4RuN8j8`g)7cyxt~T`? zt_XnstwbG?z^tRPh`on{v~)Csi4=yUs%x!~M!te3#jzaf=Bk^;{EVB!a>cC9=0$6F z0MoKausgLas8vedov)@&(cS&nBYkbJ<>CDI9O{Pto&Z<_V{wcKdL0vW z$V6?EXd8p$x9a>>U&Dx4HY0-~1%4gQfp_zyE=A(wu>q}0+1k6Wz!18AK;59aGgmVZ zv!YYo{E#=s>bT1BH|;G$M)576M&uK#@jK<2Amz87t{R`^FqGW-_WgnPsYn_u@7~WUOf}KSu%B{KXa*c z`Up(E1o3JgK21KVSt}o19zWdP!EpX2A?P{|eghJjFMh6dc_g3g7hRKQV4{59zE|1S zGMqOghP_=0%-kc#MdnKSx+ZV=aOlQ8fp_PLK|w;YIMzH8Ku(p z6QXVGTxjmmKzZxa<5B!0TN39LHlWga?t9y@F@(DPu%VGDnO*Y(rmD8sW^Tym3#kNx z*%&YKvQ&cES)E{uENzDYM&4}|qRLR(2BFyH-;!4OPZ48np(N`lj~|@HrYuf-1rkp0 zU{~FtP~DzB5IdzxGw>rh{V?`@R0vu{Za2AT+f@<)LMC3|% ziC#OLN#2xq}(1Yj#r?%a7}#GGSuX%mL?#z}eoN$XktsP1=#5zYdWWTh$@ zpPt;Aqecm>y}Z&(wmFZ>P z!k5Tk@2#s4uy@YmOLf_Mi%f6!GVBne3@i)fO&*J@$#H!v0dp$>n!?GeN7X6&hJ%LX z!5T9IWa}z86Vav~@QledkgW|n_|egxmf~V`5|I*I?ON5-yg+slzU$c5-#2{Z#n$9) z5a7N@uV#czkG|t1ucHW2#aAg)u6bD^zO5WLfwideEvfXaX{nPiyMXkbZFlSFXFO}= z3*u;WsLViC&NzPw}i?_I=f2G^A7>L6&&;mx1rPbWE`oL zX;+$ZkQvLtq{%WBMU0|3GRKB5+KdXJG&QwiC6@9Qn$s$hmO0Zi*p4+h&5c9`RQDuw zTFFToW&xVUJ8Go>>;g~>WFCD*FILSwR;_^8D+~KIZFL8@T!BGLFUdK1H;3k#SNWl! zF|N>JraP4K7RtCu8f-P_Bws6Ki8+Mf=H{}*?)56o@L>gVKA8o8Vy(47+pKYAZBlUw zrA1Max?xhQaY!jE&l)G1YSK@-0-fJ(K8vszHaYL=C*NwBLzl9@QB|nb1DCL0t0Xzl z$y6)ns-@G+nue6H_Grjds9=1LeRmW0D)Z|&^BGg#>qJ@4T*dT*X&`S+l59!tlxD6% zdcm|QZB3G7Nv?dldG#Q>TQZ+ErBvO( zDH=lsH7Obvv`7h1e-?P#k`=Xiy>lXT3$|3jcd@))e?v`2_adv)sbkN^7P-1Q@M|_Y z9q>g>W3y(w^WlY+_Uf^_P48%Zbshi^CPB4dsV2bz&WGg%`b4<-jWINu!0X~%IFjn( zfaBP8FFmw7rzrTfGsz9KKU`)8;4tw1u2!^?Kd+$G{XK}2e*xQUCcjfb%XA#`N1@Oa zz9nwlJQ%ntR76odI7M4l=sZqW7Ueu{O6rL{`5GT9GW8$#04U<}R&>uas2NhEEzyYfG53fJd`VmiI&vJHrtDD*O4PWdkQG7Cxye+j%WQPWZa|#a_FW zX6;sm%3?hh-f50F$hPu{m1Bu<#QaSf~ef(#GFtk#}u_}?I>GD?eq7o#`4m$n9+gY`4o=}vQbE7u1v zc8qbm>pl^5Bkdvrak~q0L=SbB#Tx7{WWM1$x&*XCFN`;m&N^G87L?Daci7 zJafFB7GHzCv0}YMa*UN|d!Btm7-_Cq%cPoo`D=7?XNNA-cM9d|7g-sd7S-caHsfjT zSgE9xpDStQiZ0kC_;%_fYw~vLbs`RT>eGr+oqp2rb;qpenFr{I9~j5zePLobWKWpETv;V@}a+b-G+ z3KWp^<6wQwBb{MU!!1c0dxV<1L~#rG=|hffcO##HDm!px1!&3+BqFRpiBNkG&jj*? z6Op-2k7lYp!`|NgvYnf3`{EMlHx}bvQ&NdTLrKu>p>%)SF|xBpCu?&fHjy{nhAN6x zTG8MUp4G*dv!-o4B-n?_6d=PoI>i0b zstb4dGQ#wlI@Q|`gkqC7S=Q0zup&Ii;C&ft)2kh5&U=K?b;A7Es%sUrVw{lM#l9ju zwb6Ee?exm0Pi{-62|{H-(w($6#>yne^^O^hm@(uFh{5$pfx=mYtpBWuz1LSRub4K9=LD#u*n~Rf;f&t_V_lv(T zq+rH9NlmdIX08ozr$g-uuBW9he*D`QHYva2!rMmGQPh=KZGEvFwf!_ZGc}eSk%Ztb z=J$OY>{^6rNp|mAF?Z!$KQQ24xnTshSJ5n-4aW#(tXIt+Rxr2GqBRypmA9(Oh*KQt z7QL!?O?mdT5Pv!orRg{E>=mtrTP#Nl5Y^*TaXv4s?#-hesLsV>af@x9xUopP$PH9Z z1_?5A#;SwI|J!5&z(0{2DQF8J#n0ZBc(f0;{zH1#-Z`uX+DlM9+iMoP#)p?gp1sjT zz6P#%ihke+nEF^1@)Eg<+Qq^vEH1FZTd`s_vG(xSQ=n+6li_?njQ{q{tPR8|!D0|X z2+^=@phv4`RAu+kiYrGFR)gj_HKm*Y#%gG$zX;ym8=a)`*pDW%wb^^ES$~W!`rW4f zd7JU09nOc>ZO(6q^{J8kHkhHslLAC9g)?1ACz^km>Eu%;XD30ULRAT4bt)ymi7^uq z`{K2S$@iRb?~Mewnvgns3K{u*6;IHPWdH6jx1c-13By6S_NHmUx%(3l{P>J4CchbM zNd|@kTyKXy;m?QwS=CxWLwCxh6C32K0E-L~B-ZyoQ(#3$p~+0&M6xCJCmz|fpv+0V zWpJ=Y?iI((D9G&+VTLggX8Bzw`n&G=t#_d5hNTPqYQXKscSU*}#XdFP-kf&vei4dQ z?@lWJ$)S&CAsoHF53PExfJ0!O0)Fr<9pw>V6>p``;?78*)rN_NPDGzo}(Ky`{Sld zg^f}4?xAnl2cI`=%1(rFlgOojs7CuR_O^mA&)2rp749>U3Z+?`s}uM%hKU$}+aN724@wh@02zmhrphO*$Jvare}#&BJ@1uK5CE)=O;Sp9WF0x!8`58&=FP>4HmYNghZ9Q9sBxL z7dX|kqZ0KaYnNvgKve;=2i#m<%nK5&PlDlY46D6lBF|6)&(Iahgwm9V98{XG^p1?(X*=xA!GE8tPMD1U| zj9jl{&CCCmO^*_@i#`DvwTt{4wEeog3lc7gHRG@!)%w>2CJoaS3Jv3M-w9^8-p&;p z)aCbEdi=}qw_R@&5Tf;1rG4Pn|9r21{2hjU7tYGfu=vEPMyVOU}i%s6CRJ|=y_no2d0LJNYNR=m1sQtr6{1^`XB@1ej`Um@efyExK zucG!rlKkk0-v9e8>wkgelGuOhXR?h3%(@9Kf}bAqGISl>iz<;-5TD-9hD%M=>BwBZ zB6DYzsgvT|6?K`WswB7KT;igFJ|MW_-|=-uyIE6awS@9iCEB@YplN1%1$be_@ekW% zXT)%4g!~1vt(*RM813zOQ6~3%J(T-4SWQ#dncYex;O*3XU?${FF%jKgUgq!87z7mtebQQ68O4LMHi$ckD5vN=y_!e_@J>mXLjJ{^0Vrkx^#uuk_}L(U*O{5A`iHmvCRn50Pl*i(FY%o(IcMH9 zkGhC|Lz{QzTEh78zd9(g2i8}}lfyuYU$!tOjV_>BU4{GsV*40)Eqr(aJ{Dgj!1v$7 zP7%u6;b_o}Aa5hc!n3|?QBQJ*uqO%%?aHmwPjZJ=PeoMr2@^l?o;R?S&u+3#Qv01y z`}y0`_Q@H#H}-cqX3Hb2$RoT~+nyB{u&Xit}LpCPe&(0!tkP9ppHzPvod3 zkxKdvN@OH8KbWsg38OgUZUIx0EI8^5FX-Kvt8BGXS`d!w3+ocsd?}}Kepq%_OZRPV z%6f20U(%-#^5D%cncPt>8?4PXcslijOj;4pUpIq>&by2u(iS0W0=db`^4nmDWb{QuJ1b^S6cLUW!;OFfyjpzGi#JY~-X;*j~ZI>WmvmeO$ z>>LGv*e9~#;(|bex^XxVla3Bp7?-$qxqC+IeqR$^AtvqzeX1W;RCUmwx(d*m!?+;BePKW8c(KGwmsm z{?LVdCULXUe?y52#nn|p=_f3eIUYq_o$b!GCvE(~7V{KMh_N_CjSGAB1M+5(Bc-vIRmilQh6J)6ktF0FPr*1#paBOA# z0nLzfqLv9{NK(;-z z#T8VdD|~2gwvu%%R;@#-m6+WPg!o6tTO7CtsMD;OuDP*p!bx`TFY#R+6a7D|y>(Pv zF&78e7KZ}G9a<=*SaG*f+}+*XePEd44#gdc+u-iS-QC^Y1|8V>zVGbrKfCAbAA8Qr z%gryjH&2qhXJt)cOt!KMAGnOUY0Hi zVCt(Duke|h?o^TKaadI9vdwKixyJrTxvttnzHsfxQ@i5671nzs_NGq=Ob=(ap~9p! zU;JtvKCH^6KNzphW{Cl&#AV7()=H}Y*g*82`0n03F-U4$vGnV-!gJRb2z?8qjnL^ zc2U7B%=qWQ-kbV_NIP4CO%&h$iKdvVYyYqF&Wsaol%!>qZT)RYZ%bn`zLjg}>)Ft1 zs@cBBY9jGAIEm0Zi%)6ha#TWaVZQU*eLAVvUqMF9d5uAlyNThJrv!f8PEB$at(q)o zVTZWbv4KkTkW!0a`RWrYUO7nJ>y9>2ty8m4HYvPTDBT%h---h_xC_=__UM*uLKE+@ zB!n}0Z&xIcwtO{P=ub}GVun|uiYGoIyC)s?OROyHH5VtCF&Y!4) zzcII{9tfw+mGj5>&m=$lW`@$brB&Nucsa6WAQt7JjT=7yE^!Pu;>>QAKcHQA5x`j3 zas1DTf1u-F3*mvj?PJ7}0iw(%-roxhpvU??l<)G##YVtau>W0E7uZ#BR}--T)_vG? zSwwp_>@1mF`t$qSB(8R9Vhcx|U&ACuORmcJ@0GEyK%`al`=40+7&oI4EeU77Y(oNF zs!c_Ic6>)6stp7^)NP%^VAsk6Pf*kQPzevtXNq8nzwpvI$G1=fo{y&uy_;@2! z{&$$azQ_hy{dchUpYnn~8XPj#wn%HP>^?g>0{J{LiW5Hf=B54`sVnhgd(tyzOg9XLg=}tEArS|`K5W4AQIG_G4%A&lh zc79XK<`T@|IZy0!sPZsbbeU=8V?n%;+HjCA&AL2eTEpmGzGQU4#n-)I3&7iG4!5ma z&L1*V*x8c4axCy{Qs&o1C$B)nBzP$L`brxHlHN}B;g658xW1AV%o-YCd-Tj#_EDr2 zEWlMz4iEX>C-UGKxQUW0DEJqVLNQN)(+Vj4z&^Pt_17;+A-6W(^5m;FcNn64E(myA z8S{EK)hYP2Ao2EeXg~oy$@_l?^~;4T=l^N|*{v@F$Ju#jB?9LE@&RuPW7ff8`zh?i zH?-{gDwf0SX{=-Go5Fw)1i$d6};x#X^N>bR&9=20E z0|D8d^HlH^P^06U_8{Iid@8O0;R$(8r2+kT*9y(2QV<_rkwjPJJR0`naa9>)b9$uRD&7&9$ZS|>x$FWhy&&=kpk4&VQ#L#|l zJwK=p6$xiFu9EJ~Xgm2U&XWJ9?WKI+%WuLO^e627l8#^bNzTmS#$$;9ktJMa6kxq9 zDOaZEH>e?EMEEfT%aJBBu+{Lt^+0`hXYc0f^wAUI!w(Q77obqcig_C!@IKgSJd#;l zU}ay!?zb*^lucrbNJ_`FbV50Hr&-YdEdS+On)58Cv7gNZ));thThk`$aQ5MC+Wt>F z?c8PlzKTB$$B6LOg=;%iNRC59pOiCxLTdTy(T)-;Q*m}gyc*Y3*wisq4G&eiyKnaz z!zKm;2cOkZW>o<lL0ijCP@HPvZwzM0Rjs}#_Xv*s<$~t- zwY0c$+NHEOL25{ti6~y#cVcNzVlJ2m`$-a*1QlO z(;y`*FlqlxKDs)LDv)~>{$Cw!C}3C;rG68)(} zns5Fd*_Oa~-heiug1!Br45NklR5I^425C5xFGFcBkui>Ka6oP?H@ur8VRY#D>?)J1 zj0W381(wF3uWh9XA0dqk5(g$5{qFa3Qmwxsz)X(jzRr^)Uo zV^i)M5ZYB$;*|$+wpI2P={#q}U*d+eI!nfP;=&uOscoL*J98 zZ+1{KC(!^@ZAp*$0`w&4=|wKq;$9zbCyGF?$+h`h9CtxqZtcF`7sRoZ9eQu(k*u0K zx@9AR;KKfwNDz8+l&$dQL-W9u_~XOD5BuLj-I9VE|83QC_JcdqT(NCb-FI9L1Mm(= z=Zm!>uPMbJ-L`8Yn_du{ygGc6Jj?Sf)O2+gNv>6cy&L-S!-m*GhuCJdz7>!Y=*{Y; z(6orAh2BtQ*OVKz!nsHM7?R=kQ7#V#MF?V+z9qERpO&c6 zq){lH{Cz^NN#au*SPCgQ3N2B^a#8dul+Tg)t|c`z)9pgw%-)xUTYrW&AvY`n#_4L7 z2u1ASQOfCYG4ZUjZJNbh;QV1zu);(Dc&H%5=Y2Z(_Qj_95Mu`}c6av{z$@69Xx@?c@oM<99ybFTIbfaD8_4a)fV6Gs*IOv5^G#hXI#1s%KuOHn!)0hGx`bhps7;QTsX8CUwdmr*QW* zWM||f9lFDH78_0pN1r0!n{{_67P6Vi8y29Qt5A|Is8beQ3C>N$kUquA31M$7l2052 zKdVYmI3J3*J8E=KMX+bp-`lifRT0wlfOPn&(?nF2y(fCY+m|=Ld>DSx>R63lgq6uXh|e zI-SAwRcQ3l#%g?W6Zrg!!?tI!J;66-ml}cUfu({-iRlrggP6YdG0Qe&uhe<0WtH4B z$28-O6*wwjp{n+*TK`Ty*8(q*S8|Tokx`U4-gT+B+*uN%5~kPf=?o?zfww)HmGGV4 zt;kqQKXHX2^&;dd^3&VnnHdvhC*9CtJ4Dlmo>W8FPiDu=+Q$o7qp;#2lJm# z@T)h7t;g^~z!IK&+Af~;jpRFS8Fv;ZT4e$CtJ-FJOGV3%^GEI)&mT`L+__rQ=f++O zp!6rQ&s>n^c`e8z5)9?QczpiL7KYX`oo3E*FUl5|hV1lREkXPMe12*v4vr5|w~actuG<@R;b%`_g_ z=&4$V*vjp$86IWI#IqPD8r+8sMhtXXcbl(uZ5rDo^^G5|otH+I>Agoz$^)GLkjl<@ z3~srLM#tryGVq8;SDVymj@Rrs=jd9}>_%LRwTAg|Ai->L{O# za!@p`uy-|1B=3=Wpu4U6rImMq`+k@$^J*M=&1(_ICI;~+=l8E>-;8hsr%P7hy+Wk~C`&h={sPC*Fo0sW)z`?WbHRGis@2bvQw)eHD zTZ<+l16^Kv+)>a~FrjapW?Y@uu zUZA=ow7Ok;A}_dw0>YhdRRwhVZmFKI+#<8`4vLeWSfEYT1=+JVY9xB4$;Iw~ApMe- zLk>X9ap`@{px%+YJ71X)Y_k!XvXeE6Rv{&jYio<%oZR`un!WY!XS90;j!9^_w;%DT z{-J)cy$h2&lTu@Aaqx)KQx}sS!PZ7?oaEaUa@(T_v*x)tVedM z!k9B=6VmMI_lQ&gcWJJ4qNtl{8IMxOY~GY;T^N92nSc4$SX4uFgSvwFiug*&#^rSrN>d6Z_>!J^;>;$a04w3wQ-A2DfEbRmYwCl`9tlO>I9aF0cPClelryGudc=<+1_Hw++H+F_|$& zj1Flh*3(?8$_{9YsrSo=WfDAopQm?M$>-!`c2CbU@#sa(@U+#m4V?MP$;vrxMNWd( zIx!R`7m3H)Ty0ftC1137yZEul1CbQ+Ih zjAC?qrBNrR_xKUoPV7B$gyqfEFyG7o@jV3=dDsFO_HM&~ge!-)Q9%5av)dRTIo~F3 zFGe2*AC<|S(^h2kiqlFnU&VH`ivmA!4ByU<-1Y!>{C4?vd4|;$=asv>5BVV6??6B7tG#{Sd*MPJr4T7}$dAPoF0}hXE6ApQn$S-XEp+oPEGg z0AhWOyfJRb%sgWlw@+G|_F`+bgH(^VE%+V@6J#L~ zBgjZ(w&}R@%%o*U^O{S;@@bi0HN;O5Yb*O0qn`G0L&%0f=Q*l2AdhJ0=4qfh!y#cA zVW+dU2xp7Q;4vc?f-j6F0lgm0`RlRovX1m7(a?H0dT^V}Vf%Qkx7S$L*zaT8kmlj3 zmbO-W_Xa7Wb{j{xsbJ+W-yI(lU~r zjq&9?U2k@sFvx%?m1+3Gq!*F`XHK^F1Gy3+E8bC^Y@WW%4wV1XUt-_`by8H=E#58qWhI%DO(T&OdCKPTd(K0hjr) zd!;^!Y`l%wVpRM&e0m^}5PTkY@#tx~T=NHY1auwWl38d7Yc){}t(eLL-O&Ql71#I3H@&Jgs zSPfTNJ@>&HJ5vrL(KJ`q6DRqVcb7+(&!xc&o!rlL4g#&bFC9InW@pRZbrsGGnsW`Q zG5%hJ88cPET)X`0tqWr-`xg=d&-7Yzve;yru}s*>m#-6BS*X{u^xxcOYdhl_M_sRf z+Q4)k;)!t2A48ijpM=y8^6JS<6}P{0x6)VCJ9TQ=>VF#`chh?s(B61RG<*TnzKvfV zD&KI`t}@s#wC<2tfT(NPl*5=Y5w!NY9Vbre^v#E(mmx~7!oa`C>ns|nS>}J6bE{9e z)LGfWoPZ;37M#9znbl$jEEpj_E>BbfHh8hnL}eLpnIdrdu-m+S57$87)Hl~AK2dD0 zd2dcLPK_K}+~J5gPITfR_&MPBd>CK|Ug25^=*#rnNFcNWnSv<$7ZcqRn_z}E#UdLU zmsa|KekAxfr`q0~DW(fcZpPA^!F_NrD5z ztjf#{nximdC|9sOHagtO4Z}}nBh{Nmmwy<4N&JTiHYYMeYK7<&Jg>r<| z*SzBEe9|#&CoT&=YtTny@2KaUe3*awxuZh?PrUKOJ^7&iAE3yuzmXG(c%u{rmVycf zoWoK)!$IxIkZ)Vvm?tmPf$r>lBS}k)H9E`k5WiCq_k<=bg8nGH{wTRs+`L_jFo{Vn zYGXm~tnGrnnQg7DaOK=#l)T|DrTSP()d`E&-HX?u<%y>toi?VLVmGpbiUT~FWkLKU zN=CX=6E)FwCrTdWge7*}l6FMm))CCgIw7&ktDFs3V+Eh} zT|za|FMPuAMa%aE8ujYplqk7rA~!ijFXwXO+k&~zWl8UMpl+fr2h3?JSYuX3B?`0V zMrGECSkcS5?%@peGG}-LrNC14ud}{xn!BY`y}$wH^BA<94n4yAv+gDbf_@<1_A3{q zM0VK(p4JmTFiY{t$NX1ds^fcaz#;i*v!cQGx(-Q$uev}Mwp~y2!GesHcrR@911bEQ zU1_X^PKyRpHl3%?o!hx5;@C-`o57Tl!N`jh@pyLO)=PDY5FC%qpN=Tj#uL6pHc?Eh zh98q6R%R*W;$uCjCR(G_t=u*VKVn=XJ!+F8L!#_58UmXf|P}%1a9tpq?3-7AqyUOm79)N)HeBS%;M!!8d?)g?Qdj41&aglcc8H1ZX9Q?d8SJ~sO;!sG#{2uY^N1h(!!9Py1YU&@G z$_x#KrONz&OX3PEs{Z_mLKjD%p(y$_!ua1PB?Wk}1&ywT5)uCs7R#;2IZ&X-``;(T zy*Fqp=%OP{XKrrpu2dm&WovX|@tJ9yX>s1eN7|S)N4FnKa}$}t5gwoW0~)CxLiE26 z5{7c8nfqo%0nTF{Fd+ct;a}0R6K4V<<~c6Gm@ix%=iWs_M>bm zq#+ZZFY#AFHGWLlN-gW<9xB1FF|U|ncLz@{!?WJE@7thBEKWtM#Yn*zLrf=1 zPpL<%jk0%>>m>l-hZ5yvE(=<)}}{(K%K69e=iGwTeSG_Eci~{J`Xc? zJ$er7lnvpT75-To1rh`!5T0w`pKB1Fo#CJ3B5|pL>ZXSq zlfy&wN-Dlm4<3FXatJmvjYMW`@sCsz?s2`V-+m+MLQbJWPAT+N7gDJ)GzceR6-hvy zb%~8m!MYwDH@BcOac5~&)Dmvj6>fLlkJ^vA+%R2-7osw(_-x2io4%AOP4H!toYfB) zGWh-&Dr(f#O4`+`?4yQzU_nxZp6YLd0ox)-v!7L^Y80Dc#F>|uY|Ds&Ewe1M%!rR? z;qTWxO@roO zPQlzF@KeFL{8VXhq9kFiA4kOb?kUGY_mOi`{Vk0w+Stf-ZeV?pw_16VQbXF5jTq07UU-qV&ri7=VgUI2c;vayNu}ZN@myuDC)Ea*zspX^q zkqL&fjhl+2Gg*tvNh9iWnnTVDeiiR5o(h$a?MtWkqjYp67tHaE? zEBU^k{Gv7~99s%xG#CSe>Xryazh;K-T)ia|y#4Kr5Q-E0@^MAbS4U}-yjzCty>x-$IJN&Rr{1Yvxx$A_`2bYNyF9qGM( zi-wHf@@iM#`DCW-pMb>{hA}9QSN1M8*KdUw=-f^h&?v`u6$RMuT>#1*p)CYtgxeImj+Q4tn*LI5An0dP& znkXL!X~@V#m944ufX)Ob)T713vGOjC;dZeCL7`wf3#I$chbOb&K!ag2 zn33sE9rQI{)rDSt$u=CWkd6XRQeTaYeXJEdRe!FP|13#T?RsN&r}_6by76@b2d(ll za|IjZLGOM)2x>UIpn(d$!ue`|1f~}VyjC6>pKMGEc`6nZf|a@b>t6wpSH_i@NmXk| z2v+{{E;9K8I~L_|R~r4$7wD(S1*haKG{5H?U`WIe=dfVa^H(MCibDYs%_=Fmr z0e97hgA>)*>P{p;4W?io6y?EJxQzrG0{H*{{spO_EH5o&4=qOR zyy2Gokc|2j$UI{Z$m19XDi8{yLiFS@_Z-ir;Inwis&NwEg3kV}DCb`3dMQcEH&Igm z*YQx1#V^#olLtTXd$5?crIsu`BH$JS)i~S4iIm#8_Ig0*{&I!siZt$G=yBr`3>s#P z-nl=pkZP)=^_Yh`$dC2rCr>yeh+jd&u3SVtXFZCdG2u<5C&#xhgtEoR4(dTKTCaql zTp`Kn)=yDCZYj{5J0NdgHa6acb@KGDO4lWGzJgtB4>~`2+o4wUovRUr?@0lKK;e{~G;k?#=XP;s98?3VF95}qU_PEoU;nsO= zK?7m_%%_ZHyZ`-`*EyVh%GF0&gF+}+WZIb_B+@~bsUl<2d2-TaAdE}Wiv81RYPOll z;!0P;iQ_bx+>~@3tpm_q8Tk^!ef(T+l31(v4-Co0tME%vpsDJVC zm5iC!m%G!OzqR1g9kf#3$M=@O`cbaZ4o7Rprgl+guh=GTCDYSAc*|3c9iVdxEz|ZUW(!qt`x>56=0cNIe|22qCcuYs;I{U% zux_vOai^zkNOZOvG!e)T6O^v6Q z+XDGTYX`@U5dly$h3W&p*ejZ8U@N!est`ue>bNvB4bUf;CqH-)|^#_GloAxb70L_g^=Dk2CQ1O zJlyO)58!0Cau1RCBfU8Irk6cli%OpA&$&Bhl!wYU1{e^u6H$5CK1lo0zr|eNxB7w- zj(0kvsI>R)(JE}dt(W7qAg&n~k_%1E!1WSv)D8eA1UaXpflitY|(o-qbt zC4xwvi&;0wu8|&qeIA6s*J>f8ftqfEVR{L4WTm#^7#+w1wP7C>*5b*{-&qn zH^I60Jei9#Ov24`eRTrCo-=;;tUC`>PSHA00Ia`Ol5>)CPopr{6Py?T3U+!u-8I$bI%6jg?rf0&g84mP+>`b3u4Sa$w zVmnDSzTG4TbU;>~TK!~BpdG~~!9rLA_4C=EHt1C!1n&pbVuSRMWtXryzYjzZ${Wu;LFW$ACYm5N&e4*w?9{rr*Q8;d6); z8M}O@2kfx7h-AX8pGo=065mh-9&Z~8TlVb&tv>z!c;iOrwTJcM= zMPBv0YImpos!QSsIux77&2rnpj+)F|+-q0buUo&T z_LfLNRp{KiCrNa5k;4t^I6$C#DT4L;Gi8Mm>f2Q967?w!>nLrJjbgWBv*jXA*!bx; zK%!k_qqMV3&Q;P*{Y@lfPQ5-cy4cQ|1Zrr_CM_X1A)A-2Nf7EYg+MulP&3NUHsL5r zFo?(a_iJtJ(qL#fzG98#d#_xCgl}{|kIXGNyX=;bO0!^q zaEMNzU-N|TL}Eu!eV^r96>AJjRH9d;7I@TT)ZC2RM3DZ5FhVnRz%vYnCTqcnY#3n& zi+<@H<`142=9jK!jbYVFm4LD2*?LC{7+B5kDv{e<7pubj0rrd|Qlhe}VhFfD?}rHoxf>cR;P=8AOngeGQSznYjMut2 zefL|*mVNk136_0udX9L@l6`c`0OLuLTHYF}y_g%*$w6^7I6Lp$1WJ1MTW6!ZlH3@N zj3Z0UbrxQIN(r;Id~M#E)#j*}7F>{5Sd$c;*00TLjR*s1R>|!ZIO6MFXBj;H@+pp) ziIH){;JMBEsLv{)ts<==&8nPxdbB;IKUeWd35m}7h7m5Nidp-k=19w7*Xe^ zmC7__B+AVh$?Ah)Km;t4`Rb%-Ko~4A0t@J)90a<|oq%)m)?t@*$60n*JO&QWdks-@ zn!N{yn_!o1r~H69edVJ3b8|S~_#WGyQtthNU^wo=eEF$a9YVW=6vPv5rVAAoy50v6vf3?tJ2L-fIj z6&PV}4wEASlhasB9{tD!Em=479k%A&w*Z@)oSSoyxGZk>TU(yq<=swXANhbUvCrZR|VvLvY|se1E;K zZ1zxQ=vGWO_chu2{Z#j2?mHI)HtT4fdW1Dpa0dh0!f3>-ySx5`+t zczx8dIQoMPF-L3#!s;mYDj06{6xkbLvNrL1jTsTwJCPIA;CZ}{>p3H=U`w_H;L|H;lO6XEMFxbqI!17`ke8 z9oG0y;rUIaU5|eM(^B)sr`Nmk7^HtT~c9EyL6@bF zk`uWPJg0#h6b#GF8OpJ8HL?U)A{GRbqmAW?=PczktJ1ME-7=U3B?YQ0a!!Kq=FHkT zODA03acuGYrsPNZ2S}0_1FO;}9$(7-bYsDJ36MR3JArHYw`^{gSA3Uq_az8?HZL^P z{qf4d(_NEqbII!L>%i0EQ;O~h&Iyk6L25vp=l;)BCNR_YzGa&;YRsDf4O2}6oee{S zXYiYHY5m5sLV>4vS4T%}N1p6Sm0QY>nvTW}iLz)-iODz$FvTB1YcjDXi4zHHYp5+W zG7VIXSFl`2mRs0hrekVI?u^WA-&OQ|a&776&;#X061-8jWQt zw5`E45_dRdXPz>n~o6d+^Z;1Ez+9_WO zGtQOtskm~8dfhPv1j+J%)0-r*L8wkBKN#Zf&DZcjpAqhIJ=n2qJk(S^!fN7`zeUV%DXF)yeYrIyqaqXkK+r}4b;J6wfGed>v#d2LD?{Ip z-bOLRzwh%^;vL1g1CE1af@G$|z%-I~Jq0}lC9X}fO&_P5w{&OW)J5;Q!Yc}QC5t3! zYN!@OD}M3({Y+0xk{hKCdD2hrK)&JIIjCY`=XuQPd%)*J1e#W~ z*De6_i=lZ{&z<8U){{M5p2V?taT|GoMBZ-ulYRne9&C%+om-L1$ORlFwnY*MtC)fx zP*f`N!&MP>}4?sQ>Lj?`={jg_ooGK|)k}aB9d&{spK3`iB4$}cplik=R!0%EDtnls?)-#FLok zKU||OTqA?IN~J7cJ%^vET({|}TAc+OzlIv`Ru)IZp@RJBclfg?d$7kPf zVqXcI-^$;j0PcsGk|3~9;nQYvc5Vag-@2Mnl9X8&tAtAGVfz$|>K9(laQzQEq!Ju? zn4(h}Q&&@0sy}$Y@V?I)nxa*udXl`wvL2GfC1+MD9g-M{)US7O}bl@HY)Y| z90E-#wQOBEvl2Badq1~{E*ovMmiQ4bSZOPv6Gz@linQpZ-EsIM%m`VaiaBc0Xtatz zmFV3+{5d!oRZQ^Ds~|N@2BrT%pD9a`l8VVSa{-27$A15dulsMee%L6+b@L!h^U+T$&0y~2Pn+uU$(EXaX2|l<$1q#YW{u#`OC4VW@ zjAv;1IFvi`=NruHwtxG4+E5MnDIhtf>6TLQDLS1#vY`+lFCZ~DXU)7U-EBsXmRw%C zu5#hm%o5TZLNjg2T60ma zsmNl>Z>dt3`Mn*Z676efvE z+VX|tU5I_8D^4xS94pg@_zFhZmaL_ZvMmiu)3#wxsZtC07{CI@!?HjT16tB2I-?1@ zuJj^5E(IhFdAy1YT3sHACg;3KX~(^Ye~Da*!iQ0l;?}g0&2AZSLDbUt+6fH&N;QX{ zNuRK{CK*NNax?1I){s)d|7O}08l)zMXpvPRW*UAi_@ z_k zSH@??1Pk+2s?nNQ=8+FZN#$pZ5d7JHFEv4zBR*Tn2;EHSu>7cAYUQ6`3IZ_;+ti;BzHT%Oil~zlK(;{6=HV+xq|Zdv4va{9&&%Qn4>stj!1!tsX zM5Bf49w%l*BZuogE&9I@^#>HG$g7ZiRw=v^r|AMI4dvSW^!VUuJU?tg39@gLQE9kw znz@C&*mHM`*fO{Cz<;hhzQvzoMdU;=fU)OA7w-Zn@{ziFSVX7=WHAFQ0?AQ0C!Btn zh2dCBXDE_6bq8GX2U_N|h@z?8^*k9R9B}6pW|bCw@!;JKEDgmW7qKIyQBC5`@fSr- z{Q9f7{NT8`3O1Fk8U2Y^I?%D%ml*4pfGG0Bzc;Y{FE{F6#WYyI;KDu;Q~vUNpS1@- zn46}*lTOghTlfddl&l@Y>6aa}#+Rs>8-Z~EVko-%--gW4Ps7rIiW;yV<6XWy^2JOrVbD~>1?;gr;@muSIZ#Pw zaL8TE@CgtgG!6DoprMo86GJEoF8-WIfS2zKOQQY;1_hgmsc` z``jml0)8dMQc6^5ygbd=5ngGp+&oo%*91E8XVK@+uUnzTtdzM*2@#>lJ6`~xTi?>~ zP)q(7QUs1a4+qY@LMpLY_?g0P)Tq{oHVM(Z zeDlFlI63=uHD?c?+9^>Q@%h9o8lybm3~%|BzAsN(o^rd@gksoj2Hy}1}ChXaL= z4H%E=l_w&M7s7y2rz6G&IA>Z^^QfqF*ZKZVK$PPFasML#DA zuLYC=UfI)2<7n@=y07lHD&a=~H2HgMeS)-S+0^b0wtNz{J{t@PCSKCZAY&l`Du`C~ z#K7GuniI6~*XgkFlXB{UB5Fa(4D{^P9W@oSiEn6IceRoNHOp|1n~IOga8`|et293q zvgy{RxpR%f$A3i)f%ZJ$Ekdvr9HchBL16Z-_B7A`xPq$Tdf60DKVN+;L>8O8$snh5 zPuayF;FIS6?|mx`6F`K#3nhmns0Gwc*{j2#qgh_Y1mOo(QcT#bnx|~a6Sc5UC`)MNbwdzd0GUSDC$T2fy&yr6;)HaVT2+7}4Rw%@kHYCCZD z4Eyk>58C$Lf!c6jpyW_P>vx_ipXL*aN z8zWjnslf|9I(5`$?Bvf#bsMdc&)_N38;VB@Po`}p&M=?8MNzfJEmHpk5QXJPH ziF6iMcWb)ze^go%_SP;0;PbP7J1KRg2RbPYrEd%T@>*AQAXdA|+TGajUe=MWZzE{i zOSmY%R#&2Oh`L9u+T(b!XT&n{*d!Xr+7mCveG3WSaI8jsix*vGV-rf)wCiC9<9Zh= zKiz#D8R8g|ugUK{didIxg&qHvfZZrCLG=nUz9>fq;Lg~CYJ{4r1o)%P`bhr`jB%?Z zG}0?JKG2<$Aqry3yOXwE^n@v_Ket2IEnIEe3`tR65$^fQ0q8@_rsbw~_OLCZIwC1eVMra5hc=3`llcwftlKp{nn zW}S}NC|>l~Z&X2%M4zupj`8vl@_kN-e?y93BiHz$uJ7rCceCwL#Y$7Wum3ymIbON|7+5Z%~tRg$Epu(X=2x54Zt54QH5_$}4aX#e@aC@Fzv zj}X!{)=Jh4Z12E@JQ=A+d3yXcJF;BLtO{rozTDVb3u?eXif$+KO>UZh^1YxqRm2S0{hkwUf|)wawjQU$6ooRdhQ}SkVx*4KcIJX zRdw66gMNwF-6L==86&X+SdAt3C>RjS<7K6aK=g#DR#3tRkmEO8pvjEQ|4O*uc zjBfAPJ)~>C}x*Yvd5JV#{7!VkJ~bqX&#O@TWRNC;2k)FzAMZ-`(@jl ze{P_+M#(X=n`E``La^*YS6wH$h#H9ewo(SjSEZVhZD^;y-0Rj*s+YWMHZ3)U%`JuA z6C+6xNvP(*Z^*MotqrT=;j}sAMc2VkNGGgVf!XwBE{#=*98B~q`QtY!B$BFC=fnE* zPW-a3v z3ufp2K>$~|%wTh@ZHvNu`c-Q3pMjmynAIJj{T-*iiH{%UFRz=dscnpesH5oxZ+PJ? z^hL+p76v)n47K`Xu=8!4~z5`+y1^)Kv*NJ;+v>D$}?Xwt;GrkasCsb%k+M zHAwL0V;{SXra+E_qsIAT85^-yG&ZVGLXJX?Yb-N189yl%pF>~ZyqrP%j2XsqW0&zo zLf%GlJKsqS3JJjfDQ8 zuBfet;UT9}g1K`vS zzOLAVAxEj?C~Y~)h#aMsqfC*b^yDZ@$x&v?QRc`|mXV{(Ejqpha(u7icj9(e_9|l^ z%{=^;UTd#)5m(udPwY(U6?k@HzPyw^Pw>5q^8vF9&_CY09KU(91kXHdn{3ob@J+<` zQH+$#1TT<0m|qG#JmQ;((pW$#T7FAo zL=755!iR>BATFqg1Q3ZK$cMxSBBCN9A*h7w4~>@?LX4LfFEK_FM8eK||2o|>+q*lc zXjs2{zq|G8+M0TtI(6#QsdMVlE$&rFg+19nPmCSRVG>o~NiepPIIcUMUVydMZG#gl z=m_lxbY1Gkra3L$zSbHsUo5}(-!98bX;rjl%V$ct{$JJ_Bka(!C&1JJk@8rex2M!X|Fpn7|1C(vmlqelhYJTW9a+9f5L|m zC475*`w`yM`@dCJZ9&-RV=LDKtPN9t^(ulC?r?jP=h)#TfVv@!lW{Bu1e%Y9w_{d_%; zT8|pX_{TR=#NXB5)mPx};Dcr8y9KG`{%TLjpz@JlRxoS%h68TU-^Jg(v8?Zi?})O` z_r3zV$OlW?cf_CTFKVQYqxt(vL8lM=K2JRB{XVG6zJ0z%$|%Yl-%_FHD<2|di=b>K zWxQ{eG6gC7kh0RZ-2*G8K<`C8if^KCp3h5>gwjia-m9!YnaRHSjji(a_VtDhBX+&! zBCXap#Z$_XPbiS7%6Jh*&PGo4DJPT@@*x?zg8~g&IU&*;F@a?OUQ)Uv<)Cuh*Vcpm zPI;ZYk?kj0IpmW)5Z5c~edM-EPq8u}&qs~JT>HsMYm3QFmoWxlWyr9v zhI!HmcEK^}L!>3x+H0%y@w8o>Z5AsT3PMSxail8J26;Q|8cBw}BJY;pS3DzC-XJZO z*Gdnt1tp8rM&5$Wv{;%c-y_xzBf0*qh zxv!^&cQN-u{y{^OrFZ2Ho-#Y69hhS_@N6$jJLEuPDOw<{B(ZV=`6oRlD4Z#^kfy=b zf&62gVqYV5q#4o-s)MZq+)$@j$!RQAM}4IPYg~|jQk}HiQ?9Snm)c3a2=b30|L}XZ zo7$TC`hOLhN$8wIG0%Pz+sSJ+xhpQbg6fQs8@Z0lOGZ(8C9`>sshJ#*mnAl@wF>CI z7CCQflC!*|!o@~my;|ruWq6$NK8@oRkg#_p=Av}m9WFfEjZ5>zsjRP_`fBWY_8({M zvp;j=3|HTp&iIc9USiB%?THn7>ORIa9e3*W4q}>}TQcAzE7>B?<*lacRdTDwQ9XVw zb6Yio<7#=ua9)$Rel(l=(;L^7du()*IGxuI?M&RMQ_0RyN&Kp!#`W_k``2W_`{pco z-<$>So3r5U#Es?kP&*TM>Ufto!Fw?K73?Rzu)iBeR*dV$+<+X}WL9r*z$LIsgDkBI zSXvdav?^x#5c&os6?bHcQM#D%_#VfJ-hift|95y|g*7w=A#qY-yeHln%Rc-UF5#w} zx}8tlp^JMtPc{m64&9Yr|-eR;Sg`F#T11KV6}}t^WsIZ`2w?>3ha7V+7r1OfV+U zY-5sf1KnxdW&E7xT3fBx=y8FW%SGbUP7mtiuKzUoOZ9;h=Oxm)C08;+y6c5H#%5&aDbUOb8e#s-Q>kl^#?k{loOBJRHlbY7|@b8i8(V|Ut&U@s%e+*+DT(@MmCh@>K{h#}a-1A8* zTvr0u87}=heRkI$dy8&)Z za*a1#&`i)G!((ZBA9Wm;6J?B)aWZu~=8x7Kwc&h5*FT@Aa$8QRo>)Pm$m7NlcjB?$3uAgQl^ONM8X9Wtz{tK1Wi*t*?5JI!T|fctLhTZS`)QXV z++Vv4;Q`tJga>K^5iSGt96&FJKNxk$T!+kc__+>0*P)<}diYCFlOHwhz<7Vf+C{n6 zFHtM3a+=PvP4dV#DKt6+X&jmqf;0==7!olzg)nP{eh|WZ7Mc^9LlWfL_2gH_tFWKc zDe4sRLB>r*`1|UQDM!6mT>}3B^#Sln-eb2KH^VHt^5T zI=~k+E77jhrXc)7Z4vy3wKeb`*Pf&z?I~?5!mnuusX+Tw`wa5*n0A5+^c=l_a#*ev z>sRW-5gw&qOL_WOeImlQ>a*eBt>257#rk4|>-DAZAJl=B{7OgfM0D?!;c$r_(O~#BpXAGD-a%OTumy=`hal*WW8+MXh63&rW#X`I^CEF|4~T) zTw}fQIF%S1j4f1PJZHQ}#l}m|0H73MZT5VJyY`0Q|BGQu_CvG8vV&x8MP zcm*lpRpBQnH~fe2A1FWkxA3Rnq^;Hqq=Ktnf&Z%YD*WBnZuonwJ@EHg`zYUf(|Qx( z{nmc?Z(DD}f5&vz`g$h6+K-lsh4uyq*mf42S%|1Z{GNR5<4RH0o+mYAQU zn6!-ojc*S8%JYple+OBZiaFoVQR>jbXXB7j80KX{waZG^TM#pI;Ps2^=ub zWb#60bDB3VWDV;W#5zU}MpZA;m^aUZMll4kM(|f})+H`rZW20!%HvnT_#KZiJ3*a@ z5j}~=@(q9`xnubz^(N$*rcOh6x;g{#H>=PP)$ggW)YV&2pUn07xIRDECvkl;*C%m( zLduxmK+5E@)R0*UC_K*nTwe~4A|Kb6$6V`Yt_?ER2AOMv%$+{wvjEBRA@Gnaf2@?@ z*O#6fJiUL;=|DbPnf96gT%XUDHt{jDx}}QUJm94?H#lcm`OXcw^BbRVbivL})veb_bpJ`F^_EMAf^2j=Adl`=RbmzaI-jlW0L)(_^ z&C=tiNN3oO{ovQaHHG8U<-~jQ$UL)*AOhFF%+^aGeh)?L<;LY0HN!AoB%{u_5~F4~ z@+ru-6&k_w*7GD=+pKK}zku-~TQ6EKBK0NfC5)Zz)^>znwqE9uyz(22WRtSX=irIs z#tDQQj0S|0hQJLOPAzd67hfS7^MSrl`S zIS65)xhUpfvjX8tvl3z3v=Oc{s}Qa>t5GUyMiC>l8^0Mh;|M3rAqWpOha%7A&~*If zFmo8?GIt^VSLSQ*-!R{R|7-Kt@b{VjhSdEgbaL}I z<~#5Yn7@NB=Bh%Tt3o_ig?O$Cu?|(tI#e<1P{pi66|)YN!#Y$k>rln4Llv_QRm^i+ z0qam9)}e}7hbm?rs+e`CVqPN<>tpG)QKDP%JHRtP@tp6>@CwiE&P<-eb9kOuMX=sr ze>Sfoh;x0@GJEE?@vcZW>VeqN&~?1AZOKAxg;4bSnFL(Fuig&3V-aMB&0JGG{dVdB%i?bOE?X8o zVNEWpo{b)`d_FI+Z26SB3#1b{8H9s`t!Ig zV_RSlM(=bghgSbK4L1JM_)n_9=>3c;G2RNuHv3{MRk2ndV@*EJ9Grj-zk+JbUzoq3 zA?6F_3pCVx(|nUIH&bSchJokKrYpd6HB<+E{$aWj`ur*y9{ySQXEY+bJ-nSphIfZ| z(&b%4f!7k@|BTZgPeG=X_>BJ<)T z=EBL`o4Pte@RF*tA7PH_#OtT?cye<%bpXwDIqCb%F<)j)uc0!wqsO`W@OW^_MAtT* z27Z|Vy)~%b0*$qhdAkIZo*jCP-(Zz| zYJCcx{D<`q*fO74pJBXxZhcOL)<3O(QjyhQC8;5*V0pUg;M zq>!vgaio|c5iO!oX~c*a)Fu*+SkyMsCeoJLMcPN&Q-?^$NGIwT=^W`yXOej1D&(+q zi!va9s32S60pifJFAao~aBx9h5_7V9JD<}6P*C6M%KRnL+b&l&{OM)*-+?2C71ypML+-4_$ft(M%D!eWsX%^~}xjvcelej*K>yuGm7lhA7tuogtbFC8BDsins6HDt{3hP{cN9STZ zWX3~gJY>d0VmuVaL+Dv;m?I_TNQpVJfH_iPj?7_>%w>)YGDqeyN6O5R5;(Gf{8kct z8DPGYm@o60FMZ(4ASsc2aA%OYQ)BK_nL7)aJ2h};m`WlRcr?U3sxgo1%%jE3qeVP+ z#Vv&kGje~HSS@ckS}{WFsS}S-u`8O>Y=lk)ZN$1ru4Buw{0s7WgVn>I0@4 z`5DI`kDz?Uu8^@SX6#CU-C3k&ucbtK}>?h$Tk}uZwkPMT{tOe$+XIeMe@E zUfAMdE}o3K6|Pn2?*Z;};=Yx*Z)KJ?3iqm?do`yCT<6T@4>E9N%vsl=y%Y2ae4i*_ zeQbRUh{vttfOx`!W@@D@a9%`-_|Td_BmjBi(9YRQb#AK6W_>}d3y@>^^W5NLd%@3^ zLV#_A93kbczk)K28iR9>#_5AQxvjz)5Gxipx$6jm;}zyta`{zop2Dsh_2QkNG>^@3 z_xi+cO(GK~YF%vI$8aPIan=M2RIu)}mav5PF}}q3N{p|}_zGA>Jer6{lf zJ8Anp@w_VG)-}fC(s129C=RcJ^TIg^%&cv5^QP-*UVkRdb(y#!a67X*Zyn8fF;o6W zsUPwG_2Mf>jd7WBmpI>#N$(k*?5_!)phG$=_NBhC+;TWC*4{o#w*#X*v?a8iD6}KA zhw?(Nhu(lpg&aZb7Lr2Gg`Oihv^BI9`JN9!Du=d(wvj*dLg)nwgkB82NI9XGLN8Hn zXgi?f0m=@PdL{G9i*Qqe{ z2I>|klgzJK_sgM|QO6fykk1$tFb0K9FnBHW8p;YhiWrY#SDFe5yN?`cI+rbgkQ&1L zq6;fS4a3S1R%92?H}4dNYiB_=p#_F&pw*=+G*`8!+7q*FrD~H;y;2>Ewij7Nq*@^>M4DwHyrrUO%^BU7dmQ>HUhrVCT1YYWPJonH3d&!}U%%_Hyq4BK5BabnZ=Gx}0@ zH&$pgdl?fkKNIHXdJ4OGj+n#mvi=vRF14P+jNCPH4t2>u>W-8JE+mmlftFkcz3f%} zFm-0lxB{`oYKdBcHfgGcHkqo4HnmaPq7OT%ozN3!sb@h8>!tRB#(JT8A!u;1+L!Xw zOVvv$Umc(h02frL)l{g))i|Kls<3s{>(p^9FMO=u66?1L^up^Y0KIS`1zkR?@SIGV z@9-ID(#vx;X?}AkY+D7`YAJGCRnDz?3tT2zb>?Yq6-xlA14hV&E{=C2raSLI6YoI# zdHp!x>_D@1m&?1(`Mm2~puMHN4PO3t?cagmZ?)f&rXAD%iILq#KLcFcUT+W1?WlJI z=bou|0q1trzXLw)q4%J(n1)@LhTZhr^*gA$K3ktnJ$OCnyL!F8gnIH$b1&Xk?#=5y z7xP|nU*1c;g!gOv@qX<F70p4svQusjF^AG%-mk6XRh}x|udU|&+NhZdOB4$$ zVTEeK{;;2F!#Uv~4G9;7i|L9mh6KN@;alnx*UEAF_Y84!XwUxeQwSeudL+T$<<6-qMM%{<% zhp6Qv^&<*sPifD9m!H+1<985zJ2dU)%25a3|Lw|)GL{!*nbb3p zB-XMPF{X>fc=t*pUYkFYWzt}Qja6V3fUd<>F|;Pt0)|%DvPiMoS?!>?b+kG{I`pu5 zKy&M5^@8ShfpsC}Sr=OuQ@+*D>PH3E0Bax>TIE(bpjBEnl~^OKQQTIEcY7%dUiW>w zc<~N1xl&V{fZyTvY`rTrHTO2b%0LOhnz2p%W>* zJLywrs54O0%*-BT{%OZIH_N_4(%n~($j7VPe%>t#@LG4SbH_!7^(;^2H`_Q;R@}XG z?q-TsJ}BbkLwIbGl-x|5EcX`gmyt2D-Mf?z0@~0fnwQYI0_s|I9oqA_`Z%zEMtug@ zA5!0=BA)d^Jm0D6C+goxQ;(^~$kfEkQNz5`Z)p>?8>m#fQJYHbw41eCsDrjjdxSb` zk7_@qvw5e#oA$i6jk>cQ*Ms%Ap4#uVLv)_@N9~VvKD4>x)LU!NQgjjT`uAl$?h^eR z{T#YX|E~UB8lYdK_oadQrTV2bn0HnxcxT0iG@49R`c!=?MfqevOutjVlj5x3C7|Ec zQw{XHhp1M6Sbvx<*H`P0&@lZ8eG}EOt~Z=@y%9#x2+~NS$S9&wtnrOzjqh64^~SQU zcO7)SAvBJ4z3W-mo4~r>M01KcjV75ln>W)`^9SY+=q7WHIftg1bIrLl-Mrhphh~@y z%mp;lTx2ey@9~*{TUZmkHCz}jq}#(OWZnziyFnmGy z0=heVN%#`FCp;)zK|cytg{$bte4b!Y_{#7|x;K1P_$pcwzAk(nEe+ogo=W$Jr-!H0 zL#Mope>(XedS>Jd9tBzU!Xm_bVG8)4_rfB1kvz(YoPiR&Bgb|D@t&~4d%`~64fgX6 za4zov=bd!tuA%7&6EchUiOIOaxRPXJ1gs-JTStMfIZA!hmfz=4&9wgj_48 z^Y~2D`Fy6SH=k*`fX_5tXj-O47n$dp=TaZDuX!n5%<}G1mT~?0eA8v-4dztXze27J zW?5DNS#~dMUUBZJiqAb&n+MHy&F6T2) z!@`@wn_v+?8-AAR_zcvQ;XjA}OvA&UhmX?;>l*7C8s+IBhcD9S3s!;t`Fy6Hc%5A0 zb#jGIrTO`cTMn;m=kdyRKA-$5gxs4;MaB|iDJ%wY>P^k?m9PWFe*QEU{_2zV`oFT) zro8?L^V?lwl!pEkN_niSQcB;)UXvw{z`Y&kbl^PdNp*62*Vu*361mo9g=Dh!Z@LfB zPqV-hRzfYO=i3`P?6%FDV;$!h^94D6&N;4amZK?s)@QA!X`aWk z*3>l5hAee8r}O{uq&l^>%(Jmso-tYSJkc!AwORXXQ`VL>&-II}ZEK$E$*iqw-uF*s zZC~?TPiJjm^IXqlX=6)1+MK1vmbsqIQe(?pTe4DByl<}^LhYO{6d>)ntaNReZ);Y{ zw#@f@R@z$pz9-)A<1$3svQoEYz8A96w`IN;vr^c~Xqda(57BU&T^c%Bb>R*douS`@(8#0TXK{C$Fch= z{m{J6zHOY-?qA*iD_S}_U2j2HyhIVB=^>%bH|ZyH@bHj z`4%HvC~xo`2$|pd;e#f&3E@ukFZq)!>L+!lg;J4tdn;UbI1A52w51!g%#PlVYz zvH12x8@3|a^2raGtq6s0Pvr6Ki6EZ>$>ZA-K|c9W#8yNxpZo~16`}I&35~4?olk)n z?!9brd*TeXBF^BmAo+ZIBA<2Fc6@uHJ==ZlGdHr6=t-f?N_=~w1M`cY&+7X5_Jq!s zMh;sVIc#a5lwj2ui)Mf|1JyFWHCoJZoj(mHfkZ(`4<=Ybq-=65mH+hQrCQk|9 zq3FnWD0IFyWh2b_D#3g1(f`JS@EYpKpXWd(54_mq{)-Qd+!)`Rde8kX$ry1@7Svynsuw-#r1Sk%k$$Qt z-h*GIAGo}8|6Aem5|Gi43oO9-X3wk&+Vi)S!d`1*sAfT3S0&?yf zNPOSG!BP4mC^VJd&(<7@bGty|HE@v%p69PZI5z+=e(_txh%XV1qm!Igr|!t3@|zMI z_ZwKBPk4SiapM@m={c+4h^e?weKJg)H$n;9vguhwjt7;q#Y@sEYI$N%oVbg!R6M=w z+`uc@zcp$}1zM$+_N`LOk6WP@sa0x`8xFJzkFELdQ%h>pX|DyNKlNjNcgtz7hi*=Z zRj$){zQw8Mfz;&FUXS2`X|2*b&$L4Cw3eiiG55B{=sf*B(>n8v=>PV~{?xehDOxAq z(Jl5z9Bv<%3V44@S2ujxM5a&MVtzx|&-*s#3Gjt1tC z$dl@m3-->|sJdM4sNo0BE($!2k)hTW$eBd}=O68f<%S>iry z8Fyk>mbl=K1@fYJe z<9p)!;%~>_jlUQFApTMO@A2aaN(2%G2{mCQ+9f(Cx+QuhE==@Gl*M}|?1cCm#^I5P zF^TbsDTx`0+Y+-A^AZbM`YyD^i3buZ5^ECc5*rgwC$=WGCw3)XPrR9UCvhg-kSY2LLbfkCB@cw^Yj++4ta2;DXiP-GJa~~Q{yw^ zv*L5&^WzKS_3>r#mGQOl`sz>O>*Jf^o8#M}vDmQqj`;5A()ix!miYeof%u{L;rNmG zC$8HaKNe3We2E|$WX7i^+9o;yo`5J|&QElWFHH1^*C%?%XC``7e^UKP?0vun^h95{ z0Svt~Q4!zkz)r*f{cs`%H#B}EA-+JD{`O;Q(CBI2RV?7SkgD2&KK=jScGQ~RscAwT zn;@^c#Jb0N#rnkh$I4^XvD#Q&Y&8D&kByB@M0hH~Gh?&xzYcCrY<_GZ(niPXW6NB( zGPV{mb#DItND=VX1L7w9+Kjkfv2B&dtGc7qHpKL2*gKGWH{4W)E@0PYV2_666i+?e z#8#zFcfS4OQm5iQDu`~5Zj0`~vn{$C;l0uQ(F1sPXRylSuhtSU0jB(QS_QS=LO%D~ zuaA~IO1b>i*F65-YmmR|nlHXqA&ED<7f`8q|3$?u{786eqf??YqPIn7N9RQsL>EUN zh^~mPiLQ%oj6NOR8r>e<6@5MWX7rut!RY(=Ez&-WejGg-Jt5*_ax7QG$BJTltTfhv zf4?5xu{kb|X*|m3f^y6`)b z$HhCg-hs`trg(sF8uv8b(e1#!8Qu&Jvuyf5w?p6sSd{%cbKio5E?>=0?cn{D>#C1d zpNPuQT>EgesN$Zej{l|64ui)=y9^$SaQA4h%68E{(f-l$XmzwUS{EH1?NYwZaYz{( zofw@Oof(}KofDmpXJPfxXgwe<1I(4twbAue!>boZH$^vB4F}v(@N0+Y4nW@xw^u-C z&OHR!hZ(l(2Jep^89bI+^P6HI;gUf>c{xR$*3+tD0Ojt?HJ_VO4jOAFY~;|Myfa8a%aX>7acGFRxlXcwyCJ zRU4|FtlCobV%5&7JxDPfhm?I)Z&$rr^&awnRP}c}$E(IwQ`O|^K-IMBf@-zes@zq5 zTXnnY&Xv0W_nzu*Rg0>74xU;)8?K*#UR_pgS0}2rR1ag=t}8FA9yyrTs`-mztuFNj z9-)G&vMReOfyXYNkMJ;id08iWdHMVdR$sp-#_<`s0;(3@svkJ#Yd;0{4tuw~7td~c zKf;^r1NI?2`{|p1wdU)$uF2>hu!f)7K@;tMcA0J434545a!{!~#vX5vw5JTJw`bV5 zRgSY~+w<%N_ToVuY}J0iUSY4X*V!BGr|qq-TWoK)ciFEGT5G>qzQumWJ~(gy;0cHV zX2O0Sa6bgZk5SKu_7u3IgX#tJO0&|mPYCE`3#w#5XV_J_fWEORw<@>n9sB8ly{YkQ zG_5WnFPqIzZRLlR9}n12d9?C`E!(+vk*(XMc5X#)yMx_jP)XSlySv@1a-H30z|hNP z+5PQuySi+aU2E6bqg~g>9&1mur`j_I1?^e(9DBaKaKHwJ2$+Cc54g(!d1d9t_OgoJ zfV>v40o`5?w@E-Bc+lPq=nm{{fL;rK8+fa3z%Abi12bt+Xk0Z?ylThHoS6wW$xg%l?N&hUDl)WaAj%b!pb9+6DvQdJXV=>-67j& z2kjEuwAhgNQ6IBC?p}Fqp$~B4QRX zVq`JPixH8R$H`&_Sws#8vk_x9VtDu7>TUv#JL7vE&pYQmzVrLl<*TZ%tG>G5z4xp8 zr5jqFHXJin>hhrFUC=hR-d@{otijf6MvZ!Co!T~(W{Fkm8)*6Zp>kS(Wn+#hGRj{pni8mPa>kJly{lzJLjlpXO>n<5$hLoYh z&}Ha0^wgN;V(`|~)JQe%nofOD&2B?q%~`$jrK{8;wpnZ#fOZc+i-U%I*w#Wh`Vx<= z8;(O6rq)LcqtH6F{c=9E-u-euY{v{e+FR_8{T-)2yWf6coRF*EsoxD{r+!b>8QAu} za96*x&itap9{*m`ANOlOPku^}kgMN_V^(Xgi&i;7s?^tL?Ju6Kt@zV?O64LUS2wG> z4P{n$Pd5+SMcuM)Ry$gKDQgqoqn;W?xJq@ zi&u1K0q3E;aa~e3@p6G~O1DmT?ZuRCMt4(p=fzZ2uqycS4ru$nZny3MlqGDvu20YO zsuwD1C2Tueg8E$DZuWoj8%%%pIdH{8hz7_0?=DRKU8g@w@4REa2*05)SJB6Q3bFDq zU<5D<7z11cT+VK}x+}1qPS@Q4+ydMM%mEev4`Bv`Ga)rV11JC#0!jd-fO5cQ#%n7G zOj}LhwOW7)WC7p=_yG|>9Iy?r9k4_GY%N5D5SudX-WA(@EB=2l-PU2iF~AAHsTJEZ zfOCKg>2q*<2`~w`3b+oK&2D+^Z33sg2bc$WFHiyE(%Bj+5w#am`&jbq|e&}az9`{;1B?2 zqq-r$$@KoyX`F?+^MG;2=_UYE*?VESYk(QRO~4(%eZT|2lHBk3l{9))dYk)rn>ill zZ=?gb#pCfj{C{@;T0k)XY^r_(U=v{L<1IcveHEZ~#nuS0tk~KC-t3mwhvnbiit>w19u>IQ%h)C~gTSx4)R*A3T=)Q#4S z)m^N+Tz92zy6#5ZE#kXbd9H2&823KZ@>;c4qb<-D0?V?@CE8MLxpuR*g1GuAS*tYx z3tA_zUmF3AYqtU8S=+Tcw7ayu+P&I++JoA|+GD^cw5Nd2XwLy-{RQnM?WFdq_PTZ! z__p?*c3!)vUDhdet0DeGUPD|2yiQjJyivCWxKdXGtk;==C7l~M2xmpXgMlwZKPdY@ zVDwHy;O~y;PV3I<&V!em01r2=y9WMk96a24-5v03=hN~y`Zx4!`dod!eyzSZ%j@+U zfU$m4nz!n!^tE~;>_xU@WxL)B9M;ExQ~D0zE`4{Fd-Q$!0sR5}p#G@-xPDkaq94_d z0b^?y(>0g%SAeI9vF3*Umj14OPQRe7Ca%cJSpU$#8`K7kp#Wlj4ap^jQbRe&n++9) zYJ(QydSoFhI}Lu|2+R(u4ciRcfp;#XPq^kH;x-8j8n#I#u?*H;~nFD;{)T8@qRt4S0R&}Tb~d0u(y6~ zeR2JI-BA68`c1&HEc4d-s`UE~WcyRHw>}IUt4{%U)OP`Q*Y^P9S$*{b^#|$)>yOqS zuOF@-sUHO%tG@_*x&8_;)=$^psJ~Tzw|=gE0r;VbH>ph;Q-P__RAMS6E+^g$Tw$sP z)|yPfg2@T&H${NsrftA@)^^hl(=Jo5X|HLY>7ePb=@{?{(<###;&Y}8rc3GfX!r*0 zHZa!QOV`UhZ(1}hHz*rcH{>;}X(-C_x`r}ftlyaCEe(|oH4XX(GqIGF-3>wDXhRaX zy`d9$XTxsbJq`W9`x_259BCM8IN5Nz;cUbChVh1phACid?OM8Krr~CmvF1+0{e}k( zOJ;a>$YxbW#(HzEIp4e%@5@|lnk4z2dA)f9$eYYt%~j@Fn1Ui(va;Rm1rD2Iz$tSF zaF@9oxX0WFJYYTmjOPxTkD8C0hs`7AQS+GjqWN-`uMkh0Z?iZc7k2YDogaS(bK7r)8&Qw`Gr|-?HCw$Z`aD$Z`_+wB;=DdCNHP zgk=goAknCcdc{Q1?xjgk-!V;ENg@UOA*!!g+hr?DwL#e!a3oBa7j1^@+8Pth3l3jVOF?Jd=D6EthK_t zuqZ5xN^$j*jP+ukxJE1z*NJ7~MpL1KAxIHUt#7;|* zxKrE>yhrQ@-Y*^kJ|YgmGs+WBf(+*hh2m-PtbR^BFOG{7;*@wT%QM6`#XG?F#RuY& z#3U8!fRsy|pJ8dOR1Cac+8}L`wn|l5u9b|y7RfGorEp!X6q8a?N1YL41F1{umU^`1 zQlB&cd_WqMJ-Bof{h)MQ8kR=%wbH0Grmsa$Bwds)OIOe4srhx+UF}=8Q^d zL3(IZ;&`#~HnmODSdDsXE3g&XO0>PUQd>FjW?O}W*^eH_w%4}LcF-6^%WpeuJ7zmkUu!#MI|F>qc0sn! zwo7Q`Y?HRDw(AWwwprWlh8napwtKdD+alT=+p@U|Es0%eUv1Ab&)L`5i_CMV`PN$d zI(wOE*}l=f#a?NzvFq(-yJUA8H1?o9S~qP^+S|1nd#8P;eK#Dl2af5t@3$YaAF&T< z=j|u$ryDQX&)U!1$L$mLDeXb~HT#U=l>Mgtj{UyBMhuz_IgdH(Q%F!Xm8;&kVx1-0==NNDtpopTxG3YqzIPMsBjNnsn zj5@|-&+fSBxD0&7F%5jfam#VnG3QutJaqC-wNv9Pa27gCoTbik=VoVxv)ZY3nw)~u zDaW+B)lR=oDaTH7l;VsyMy5a2Ar<{A8z0Q5kgU-XwW6l%K zQ_eHabMPq_=*UaXNqxTas`I*Y)_L1`59E30Vx7^s>{8YlU8`Mr+H%(#R}t_!7sQjU zjV_2MU6rmHeXUFHG6PF4w|>MGbVY%au69GUtJAd;c(-eht6zKFwO@PHb;xzZHRL+! zIt{T0^nmNE>%41R*6qf9t_jx^v~$fhV^q3sy6(8{yB@ff+{~@Q2-BVG&UdeM7rWQH zH{iH%Z*p&SS3#5xvAVmK*yy&n?Q#U}_7aDQW5g-^6nBTr?k;z?w%6U`?$aK054aC# zuet}dlkTJLy)6aRf zdv*XrJ=E;-^vY4IXD{(S;)6KqJ%@>p$?Q4dIb}HIIb%5CIp?|Hxn#KDnKYd9T=iV{ z%o^rBw>=PrdgeWgMx|%jtAyARo{M)iah`XLx5#c5E_l~jio9jsjovNZN^gx<4>e|} zk-YH4y+Lo(oAkDOJH0!-yS;m?gWi7ce(xdg5$}-qB-Eewp7oyhj(aD(Q{HP})yaB$ zXS_GPcf9w#55yXYd6#@lwuC;FFV~mvTk9+Kt@my4ZSrmPRrzXtMxO;^04wv^!3)S< zhU~WNqkUdqxbYZzTVKqVLT~Hq@O4=S(Q5g+g>AkbU$?K%H{d(q8}uEucKD9_hJEX0 zzbx>+5lfM8)HjB)gzsWwwePa;3i@>4G_-ufcPry$~U4F2W{=NQv z{)7I*{$u_V-WmTX{~7-|{{{ag|D^w_|GIzHf7^c#`^`U3?JQcm{L9vkfHJV!couDV zATJ&500-6(7iFTF#^t~|;xgim#9Qzh0+qxy0e!%1+3nvIkSzNHZj43(LCciQABYB$ zvK(k9?nGY@*ojs*ursh5dl;jOz#ig$j2vv$f&GC)wrZ3EM~HU@h5{$8bAi)=vk*~0 zToE`Q7!ORyQH0Hp9GD{R6!-vW2<)8!z8Sb9dwlRdz=8XL2Z1Ho%LiFd70eZugZaU= z!ZOxFO>uC&?B#8vJ-L9E=51;!?09 z*d;DmP6xZeJ8up4fG>tV5B3ocP(Mrs4+ICrn&8pkad9Fz92~Lim!r_&XmHF_7`zy~ z9J~^o4&Dgf3f>LQ1s8%3Lwrab(u4{^g`tvAsctA#9@>n@gero!Le(K{{b0xx5<Ivt(9kaO~p;?fj2a5GMzzR-?X)%y{W2cYhz(kZIiLd z(qs=0HF=xD;j{H4bY!e4)zs0{)zsb8BhRyp(WX9QuxX&_0K^i;rosB!rlU>A>uce2 zhfRe|BTb{3Pj4D)x(M?rnk6+|Zo1Mip1z*xrW?Swn(j8u)z3A-yt3(Ggpa5rnn;1R z7keO57%7RA%CSm)U!**;xuGUf5vexMMYIu9M2I*e{zycg4@TmVZISJf9g$s;UW{`h zdn5ZI2f?qw%sO(I_*mpbIBJ+{Ow7iVmti*V!d9~b=ID2i*BVL17)m+rPuDPtah~$mLTbe7IYfQ_{ z`ew6f8RxLgQnR}`D9>1%qr^$#_J*0}&W7>koz1&5jB`wz7P)y(bAR)Gd0yFksQF0q zP{UI5$>!6*P!Ba{o6pO0%;qBEjl^3pifbMxo*Tg6hMYl$)qP3>Us4;3Wk45cKuX!#S zj>e3^Xe!##SnX|(c3Fy|-O(PQ+A`623go`%fG#9k zV{A*TGFB7Q$IQT6Vp7Z<3&x_cByoGJGqy9f8+Z?Je{8?2ANY`~A8l#uNNgx}GIlz4 zHg-NX9-D|w#jeF>VmD)VV)tVYVoPX6TUo2BHMcdtb!}_0xvF)2>xR}%ty^2G^mDDX ztwwRezpK>}Qn%V$y{%!{Lm{`ur1k#N)|9lqwWGC5j@;bEt=+9Xt$nQntp{2MTaUIL z*Kchd*4MUk$>Yi457OQe~-)sV%9>R82~sGA9nDBYDcolmK|5iSiy_L`7pJ8v~pX1lC zxATSkJJ>V)I{rQES-y;aKYNb>@ds%7rD zYL#j|cUAR1)%&;ys`slla=%u6L{-WCR#l~{;U1~#R9arC(yJPImCCB}^BPr?D#{nB zVyX_lMD=Sw+*inrywnlr3;Hs@5%h~oL2Z|0m; z6y=QOd{>ow`(gT47iJ>Mie9yr%iB=C{f` z#<4CoM0xlPEQhi8Y5@XZZyLL@a;doR(H`-k#7pYRa!Df{nv~G zeP05tf5bgv3efR9rqp~?^HH`+^P=WO`WDl7^2hjNjPbAXuQHDRI{$UX^QZWKfTO?7 zf19o1$NBHWNckcEL#F0`#QzA!%a8f1Y&Ad4PqSS9r~FT0)V#*O#x(pb{$FA2{F47A zjGYDkH!NSlDL7W3fPd^c#VUo0tx@DEa@ktN8pRs+7mBrtwX9I_4n+}rhoVeT#@?y; zpkgyCR#YjfSc#%qQO(}1s8PJk*5Q+9>vO-F`&IT{xT9Py|MB_;G0Jv|Q86jDi=FsJ z5Z?#l98Wv~JS3h(`vS7Qx734g_@o2bGAJFTw|kGv2)^Z`_j^4n$`xrE_=fzJPg=;9hw@uKDh1L3TOri- z*h)YuwUvXk*;WB<_SmZN9iMay*G;@7m9{BLuqn@^zxxos4FSKmPsvKz``9X04kd^E zB@{KQgz^^p9);Dc8p>Olmg%53@jD|0RY!~PMQq01AjQbd~a-ZZnnZSLT+s!1fti8+)w)Jz&!+oCH%Y57b zH^2hiA?^?hf}K6fLfj}f%9^<`?z=3?UF80W#ke1EKVWh0pSjB{LHgSUHlmEB)Em?r z*vHfx)f-v6`h)7t?BnV!>MiUO>PmGb+pexwSF=y5Usk`&I@M-%Bl|0LSlz@vrEXR? zv%iLCdmru|b$=^7JtlYoVL%L!0(7L&l}0z9hxvrC5ED{DhtMT-3q3-gFd!Td28E-- zabZ{(fiemh6D|sug)73ea6`By+!f}81>vE{i)v9L7KnvniC8L@i<`v?v0BuMCMbgF z6#ZgEjEmdE?cxq`m)Hx(?k!jl_lXC^!{RaV1jwhvGvYb%0!Wv{N%5+9U7QtfL%Ao; zi;Lp2xGX89)ddSup0q|PlGaIOP&P_i-W9~ZX;>9ml>k>6&;;o(rR)D>dfo`-?WFB_ zr0vg;wm(bSUO?LZ9BKO+()P8a?S-W6?;ve2B5f}wZGRVOdkJa#I@0#{khZTUZGSIm zdnsxA`$*g0&n`pRKsx`QNaxE*=Qonh|0U`CCery2lFomKbpFGj^E}(iDLEzk2x)r- zX?rF2POcdAzl1AcFOs%bleWJ^+Fk?N9$+tXAuhyfNzdy@&$XoII?{7J==rCak^2ny z8CK7I4)okadfq^K-pGA{`vQy$)OIUq`!`@*e3Sbo6G_`8(sny{8T8!AO>&dWrFN(t z%&qpSz09Lds1x`N*Ll10`slBL@b67I>q*9}rvYaH=KzlAY}qWE z43y2HB)V4s{~si88l^2l_N#9UJnpggeG*4jc;c0iz4BvVZtl%sg$I8+~up(x_Na1P7k;9cY#Po_-Vs1qou@aawd>SPX zSFA)iL>O-t6>=2vdk}0+$~QiRr~BF#w{3e<*lyc_SxU-Jz8-MjK{;bd@+sSyH-&Sy z3$Ld;)W#(MWjSrLZwj~N3}_b1c4Zc@-@Y19 z)Si6e{C0ci6Tp1x)7bMkw~Con+Mczaf4%GZLy+^Z_G`a~nRHI}X*{qmtpw(4Gq(K+ zTb#C)85`%wb>wGk5N38)+L5e|tbDHNXOih7%miny88gBwuL*65qe{Gn`sk>Y_0o|U zzc8*G7J%L1MSXXK0Wn7^qtA{G*uv+m%pzy<$LL=iBaTt5r+Wh*;TUsVL|@{#jCthW z&myC3$9v9XlhHn}%qWvB#x)br7nof}+v)T>sa?z`JGWyN+1cyd`(*Bz`WO0?d;;{L z^PF|sdBJ%J{g`u7IGpKc=T&EfvdYi~?Yr%~hq-0+I~hOw#52mYiOBerv@gKxLf?p4 zY0ODuMmqDH>3Dc{`0O!9jTvgpPh)nP>Ll>tH)No{M!&rh)Sm#C8~n1CR;~zHUu;SP z^atmv~v-?-9OuJn-^KbeLr34RcLA^JY>hpu*U z2tj&_e(t;s%)Fys%DHzJt;WdY-brV{ez;D{*>~DE;hLiDHP;Mg-f`Sw_8rF)-4|uy z^nHeLPqer`i=Vxu;(_`zYah0(znXyG}fZcRgIfmV} zZX;e}=6pIH#vA7S-ClRt9doDT+`hX9ubXTV+=IIhbNuc>I`55aQ2M&uM`b&nt-rM5 znDjN{y*_P^v-g9mQPTK>w(bwuQ}^-ob{OpEk;nKwJGs(UqJ5lsyly1}>O*V?kAZ#I z2ksI3A@`_z44<+4qWdyEw=3z@40qjgSdTUf*D~Nr28>@&@91G=|)&mz@f+?Yl-PF#sM6AR+CWn#LOaa{%&&t>p* zOqY()UN1KDEX#Pq*bHN@mGKwGTMd~#*U>y0;@A3R=Zf~!n?s?2!9 zbtYLE*PCS4oB(j_W!IkI*66cj#-PuRt4(m7iLWC&{$O1Bx_sSuJ-!}aAKENjZGyfh z1B@wg^$D&$!F%?N;F=TPMYLULKXBCv`jpJ^zG+;0f@@BE3%J?@^%2*b;A)f1Die$g z{1yIcYzO-Oa4XwGOj?u`A8ryumH1y$G%r37kj&PW#3K6FBBj#_JAD1+E2VaD_TC@pSn+tXlSAo#F4|~unul)#ipuG!r1-pYi z!9LVo>@Spq1Hl8R&!`K*L0tC{9L9dcZ@$v>Y6ckl|M{2yhw?A@k5kZZyYPR5%+Yt} z@zcZxs`)O-2Z?j2=CdTfo#bx%{VHo1W#Sj8<{QNF@6o&s`H_N z^7qQo-UGCkes$#$_2jaAZJo6DU+GimT*a4(d15OabDp?@YVt{@C&K-fWa?p_dYGdg zR&1l1e<7y3Q2dT$`ZZc!)&Y9@`~zb7_}7R9YG;Cu5vk@V$%DjiMOJR4`d?AaapLEx z<`rW32|B5rDcVb)$bXD#enE06$zLR<-r(u?*E#9|?j>TmXU~ z0n$D07V)p?tb25ptX;A`Z=$^?=z6G6c+wNThHB_~c)A{b4;}d%;*Zn)(iJLXO`v;J zsEK=sNn7~C#MI9UI#=<1VlAmpJV5*t;xEXpJ$eNE1Cl3+ zX?#5*{b%*`M0Sx(SNQ1kqlduXq?$^qp`K?SASRu8G)Hn5@#lz-b9uzMcy}cKkmO;K zhe`ep$)6&5JIUKgo+Wvf-TD9NKF?;!bfnsc##PzJ`mm#F>{ z$tOsTlKfecKTGo0h`;r}SUdZ8pQ`MSf6v+bobL?@-8>}`S5o9zBzg8d=X@V-(jAmW zo_@kDJyD2iBqSk&sXG`yl^T^KPsOA>rBWeDlFZEzNs`2M{q}nAZ%wae{`|ep%ldtO z>#V)@+H0@9_S*YQj;cpCT<@F(D>z)ykCf#1zi;cNwO1y=^25$ZeyXEZnl z{+PEffNNp%(kvCu3f`^|?2x-0yOx-E zmmUnYxt)xvs0S3gt$KyG^sX(RszXs~3x!IPAHKIFH#_BEzd5%u5PP zoH#S#*xm3m`l4oVZ9T(|px7xCt5x;qEo;GDg$7$i5Xs8ckaiE{E4|7}vJ1G(f?FGE z=YncbT?Ks`dLcG23QZNEi7Tz3tR_vxsBBEK+qy)EqB>RVDX#S%<5m5j)IW;Iacyi< zdnl@9MXV}vj=BiSj%n(&W9UCd?mCA51;-_Sx_?2kTee5T*VEwGS)gt-HQ_Ph(9lw9 z%hH?mwTAZgh8S8FTn5}6++1*Myz#eF+6+z;ofhgn1ishMrrvDuwcsJ(E5Q-1t}?Vy zR_BfA94l{;zeU#<>U9^%bMjr$m^aE$b&ej-y<}*vt}E2L&(Ni`nkU-Mg3qo&^CR$w zf}2@tW=#6J2b_R!@%i9R;OoHsjh?T>=Ck{}p4i`xFC-h$*8X#RrUFQ9iLbM`}-ud^R|55w;S-|A_J zo?nB!?f=gQ&Jk~Q2;cOaREOdkBRqKAH~(62)CQ)pIb-sL@Q`d#GREf~%cEO~}rmf)Wf zwC!RPC1~4)wh1y9dLDBzfwmK9JC3%06VG^m7V7>@-Z~XU-eK;Xik2gi&YzKA4;>As zBD1zzs8f+Kek{~r8RL4PMn_xnd@ixYc^q_6b+i&peLJgKyF-)QjuvN9%P=t!tr#Js$Q=ZmnU zh<;zf1Lf(r2oIFU14VeCJRT^*1Lg5R5%K&I9w?HPZm^+VYs+EQitu(HdVH21`$&&! zzHs6fdtf;DKYpM87r~YzJsQsard4w^R7OK{Y|bTqO5o&L4x^|FeSlHqlBpgb+7IFX z#dzruUOI&2d3fm%t9vo)v5tPv;J!lLI%uxr|H05kLe*+$OE`7>YvEiAZUAloE(0zD z?gj1zo=)58g7M5IZKB0{La4imzG?_oW8u_*(_KqOdR?g7-Ey$u2sShaKMwBhU&X3f zWN?0?%Pi>2n2R)R)3mKj+rH)m z;OL`ziC{*NBa+TUXpZGDmdVJ^hxUS#!>oNI)L@x4=FVW5wdWaW4pz-zq&Z@na{`)! zZLR1%qW4zxbu0ZI5E}c|a_I3ns7qY^gR!{0-HpfA!1;`}Yv8Xz{seZeArn3*)LFw$ zT9=qvh@b1?lm2+A8G8C#4qjS-p8hgMgJrz^vAmEOsm_cPB43F7qtFAiDx}o`S`~_C z3~m4}11@)e2-9ATv7aPtiOC+SmF}zi99}p>6@bx&mJnF!lmQ zT^~A$u@^A*NsPUKu}@;`1&nP!$C3ePg(7%VVJP4fyeOuoK7Q%eWpE8lyspozYuG zvryNOUKBoe9mZ0geiul;PI=2g{-_QG)8l+BIm{@=Fp9%?=CJpUQ2cgSTA8&FVuP&W zN9AqoOuV;bW1)s`&UdPvS8Co~!0PBAbLSmp9y>6ndEm9oX&!!Vh7EZ#YTZnzn@60K z;Q9SXens1QwA}%xp5>rHLkFU#9(o3%rydb95Kq=4LI&c=dT1VqC+iV)Q<=wl`l#GH z33cn0ejs-vtc4Hg_m7O|6KlD6(%5`L?yuz3qdsF*4&!aH#YAikD0Vt?c)OLibMWvS z-rmUDL5%tu*5*oVy9Svd;Ma)2+TP^`kJRQ&?XX`swfzxBewI+j?h(_y7Wnx#tP=V@ z{%L5b^)Q}kh-dQfOhY`Ahi4knV;-5|eWCg>@sCrCwv|HNGeW&}7Bd&bv)|ksV`MzW z<%xd7k^Hlhwqu1lJG1LfIIEz`u$u$=SsDNmln3 zSSAf7y9jpRt2Zo0+uuZw{z0hO#a=MDqEKfilIxMI&WhVb zUpwfl59j?dLY-Vr+?QHR^frgn3r_Rw$y!G7l2A2OsDI4j?CE%xM4eD|MfUvrf}CE3 zsup6K-;PnYK>kTa*Pf9+38y{U-lgC6r5jDZUt#|j*l+V{BmDC!ts22ug`P*?tgdx(>tg}OCLFOwOJr^y>>ve4DWlCwe$uEw2PHS*8Zy5TIoM(3%+Ae;D;cxMu6--uXS1NVs2y7UL4I|)>u>7-I zz-7t&LKPa;qhS};mSF7!G|WT81hV%8^i1F`Vjs6C6H0G{mSuMkZB6l5K9Wu8F$Rti zb+4m2mgO6H#jT*fjoXrO(xX3AdQ{_RwE!E|VZ#E;q3r@`s|VPdB|N_mnd=3+HzIkX zNIJ#5y-D6W#bnpw`0GSe6TvxUz!UJf^i=^#b`wMIf|g}p>k1XzhdZS>`&S=4(?{gh ze9;iU82jgQzc(LC5@@cBh0XEavv@BT+qgS*4#nk+vq-3Oh~5t|(tw((8}v4z?$zL2 z?v}46658m`4CfEtJk#m~aoZ5d+Eg!pVE?^NIDSuBU6-vqzZ9K9-Rh-z)VDv8^9^;{ zqoKdZJMGDA$NkoFPVr@>9H*CNw9R8(za!Wi3+ElHU-X!2@E+=&7)nL$&a*a1 z#*06Vo!g|kGv~+e|FeDoaHCR^qrMBio#`1TF zPt*f=W=H8Y@)rA>VSl0bJ*UX;1)r6(6Fe9fp`+v{b z{}efG4))CEJhgyRSRqk2n0|jgD>YT4Qn|-SvKD++lW#G3tUBxQe4EF}LjB?igMSvP zIR(4p(3vNvGvl#9R;5rAx0aTrdX%<@@#J}|zQfXsInBup@MA(_WCPWK-FXOAeFyg9 zqf&3H4z|L?pKGbW^F&_XMl5~88F?5!F9!dS+%gBwRycF$caHxQC!hMR2FMWPsOvsR2fNjST-~Zm77|a?8VM3#UAsi@a_|=MVaN z!S?JUzv`;H^gje6GtLWT2lS>0hLb^0fS$jAPjMUeqflp)aMTdq-tKiaZy(fe2}b{W z!ci{^$9WIVV&sR?*8U>1Q+o6|)8pM)J7pKXU+zVn8r+UvYB3td%3JpYoYly?$X_oU zl@RO?wboD?9yfb~ z;jD+ljhxT#gZi&xq4y*=ghS!{B)~A9dWAb~4&{wj*BlPF=C%?_D&)&TYehgYD)L3Gu zQ;Ouj;1|H31b+jw)Z={hjFSK|^cM z<5+sEL4K=2?rjQxFZ`zPyOMdnhJH+4HEN_V&i<`=RV^3KG|ET{mzFrWKXXL?kCjLU~-#1M)x*&z20l^28&CNv&TuLagMs8 zG4igR*_r!!;O4rjf-SumXUsr;LxeSWazT3t5T&CvqOqoS6I%z{O&xSC6+7 zgyYpaXI71xO>OW`sSOl&$Ywv{Oo|6SmibcOiHtWIneSxP>yh%-JLKIj7@3hoU-H^S3lzWoF+>i0wne!8OET_3``zd?dCcXC)&8FHnQLQUwvBblLc;GLx z_T3BV9>mTr%#^($+>M>@Vae|7IbB-)0c~yI z^dJW=m1t3KYqL|kUG!3^rsQw@Tz*k?J8Suilp^(+U^p|8`4SoK3DqC*z^AO-J&a{1 zW7$KEG0=?R82MX39| zP(NldZzJz@^LD;aH!79tOQWGX8n{1HtaN`Q-n%4Qk#1(!kbYm*o5)(51lwPT7|St6 zdYYVeD|iB}y3>lkIx`tebbe*M&A9TJi*00yr{HhH&f9pK$J^WRSRL}`M5@+?vVxqA zo@4-i<8Y?2%04IZnQOh8U$Wi+^ZTrc5PAC|cse5;1BVl_Q-Xz$%CA@IBKh@7t>HJF zceB3)jg-@?{+yah^5-mWyc-C0W@RH#PBiNo@As_7KlsgzRkOJ{e2H;YB0k4}@6uvh zl2P9UX9%43aE8E1+TWwIzY9yQy9(Te%&>*A``A+qe<$+Kd9s?$l95`#-yu2BJta|X zYJkz~`+{HP_t_1|Y>>YTRo$>3jysHB0e;|S>=`n~Gx1u~7=j!7hmCv_@a0rQ&vPG} zJIiUyWT!^NhRj_X?;M_&+4)+qYk$F9 z$gJ_pyHhGO*3@ld-cI)9T;E*ZE>WTX*U&A3-H)`?MN5UM<68daVTVY@4hVGy(<&j< zTPsxi&_-JB6B20aj-Ex5om4yYl!%PqgSOpmze3&J(9XKLcw6CfwadI61Repur%*Lq zN%Rg99NQ+;xr(>Dg?hQjjE62(PZ;W=p|f@vi||K5(dNyvJwo?FAA&Ab!{xUgp{jvU zcP6-&XR1fHSg>cACgvX&#bj_kD-O?T|!lN=wfVBt|L@( zLd*rbt}7g8EpO%T+nYK2`O9Rf0EfAg4Q=N>CL?m?Z)xTVc7AZ5kyYc$-)c^A#~M1p zHCfTADDuuM=+jUoREtJAiNBezm6+738>H}q$xp5f#<-$>qY%o=wqI_8|^#-U0$ z&Q)$>>@*{aoz}Vhuhq<3$AeCGTMEZrY4k^E$b;6);@xO7H1;Ff=0F>xKhOCmo-|%^ z&p2|IyA#gWXr3)Jc2wkJKZy;o*ts_RFCCf)_4^2wzf?DzCA_T)=Uq5If^P@^nYS;Q z==#iGD6~{6w9n$hNjN9roJ4bLG_*!rYexD8dfpIDyuILfdy9EHn74!Jr9Qa6 z#pvIT{_W`Bj(j&{x}pC``g#)iYv5ml7}6>U{da8G24kSPMM z1FvJ&Dlmf;;NJ=VPWbrN#kbmn@4+vDUxNPg(SJUeC{jd`z6kk?n9T&UnSj3u{wDBj z@NDpI@NRH1xR|z$Xxj+=XVHHanZ3yD1?PeDkimY1{c1FLG&01$CjNB`a0~QLM*n2^ z8os9O2ekbF`~&z05j3Ah^J#DjoC5a- z_eOp#@@vt#3!S^r*#wRi3#KME%o$z;}hfJc#Bpxfyvn%c; z_%DICfVY5&2W8`dI8?-;UlTiPf~$b5pp)44h;5Iy9&L3u*5H#^RRRC+W{p3N%;Pp@ z;1D6+WE&N5SW})|Q)C-$v&~cBr@+LCwsG<~`aj18GP7qh^Y_^QJ^Xvn%-reKv|Wv_ z2H~qgVDgD;^T_~Y29Rx->o{}m4`J*>(1}kxe4;+2uMfd*gWpEaE$F!gJP0jdBHh*#m*eoT@Lv$hkWQ?LBChfZz25_VjFtm=!xfp^O0}PcxmO)O7~?(4x@hq znm5oYmsYvR9KezT^iqemb>L%(#u9Y|%a6eC3coA-N8vvTzYP2`@L3Vcu81*quVgPS zW!5^{Ja6+kbOvo_Apb}3AJMZMJ0 zEX~Sti5!>6@&Ahczk=t1=OM$6@7f)oT}e&1S&n|2u*x<#$0Js+9* z;CI09fPJu!J!BO{R#A82$s})+aQgGMKbZYRvA=kA>9H<+@`EBjXuPVeS6hKw;mMou zpOjgQ;f}^^D7&>$2zSzoY-}-~b$8 zKedWSt>QfoejYprJO@3@rps))c+$m_>Kpp{MzC{KuyYg{GJ{KIaEUaRNYk&<@2g<; zAk7}6JAgZ&a~V39fj#j-QNyt+td(_El z6*^ae$ybU!R1u%b#^(guPN3~SY5Pz3=fOV@pInPiuB9#6SdontD^al$6~0o|SNGBO zK6H{5J)0H3g#RV-%&%g8wa5-Q2RF6K@qoOZGUz!?8 z+xm*JC_9#w@K=J#A=>5;>OF1iJx+73JO{J)*V#i~rV!wW&BpedEGaM2Q z4@ZP!!bif1;nZ+u_;ffwe9q9t;nHwr_8HEP8Ks#!<_tZPd{|;R*gFqIA?F`}B_bMuVbZ(cRJg(b#BwG&!0U%`&Jc zS`aOYUN&f1v^sh#+8Dk6>oY#u7VV1mM#a%L+2>evGAfByCta*Gb|EKkq+ipI8Kran zi!nEr{D1jALn>B2mN8F7zRysJ?=xJ;_ZcpV$@dv5#|FoS#Hz%GnWs8mX}FlLG+e@0 z8ZPB44Yl}6Lv6m&P=~KH)a5G;d3>eea=y|~pRY7r!B-j@@Rf!u`AWl8u`RLpV~t{4 z&C`UhG&JQa4bAvULkqsr&@%R~*r8af*kSXu;ad&Y@~wvJ_*TR9e5;{7-)iW4b*$hR6IzSWTATMa3`)sW^}4H@S_XI$(C z=V9mJSP#C{a1-BZDBxQSJ^4yQFXwep8K3dZk_!`yahQZ;aip59&j)q54OCGFHfcMQZH7V>k$7O@lDV4+?@?f_~;19NZDy z8;lAb3MK?of*Cw>g1N!M;Kg8xd6oxn25$$OgRQ}iV0W;O=Rj~II3Ap$eZQboXsE+7 z=KmfD4ulotkw|i{G?COW8%dSuUt-8Lvv#?OCy5?yW~8Z!Cy5@P=!u)y>0+WvzV30J ziI?6csw85{5;5iYqR06rYKEHFlK7}#qGP&=Fo}#xCN7>c5hgK_8++9}7ZDSciHRyE zBG#KomM?cyGg0uFi8+abi;068e68aW6ARzOYBKA!oC;>XYctcAF}rn`*Scn2`^NH^ z(aX(@4v*D$Mwl6Gz}Gjfbf%bDyoy)^O2aMD<-D= z$1@`_Cowm%F!5scSz`Xbm`oBEJ(`XLMg_VBy zR1IqeL&CbhJ`KXg@^stypQ8?2CTfLke|?Npa4_unyC-4(rNbUPz0K1%%O4aDGxTn& zo#*~=Y&ibEJd?v|;jFNTcsU*}V8rGL7lkhe`x6g^%fi*+Tffos?`Na&(68|nz8`K2 zcV(Zw*{4|IJN$;o-5eeZPf9F=B@#1{EAbKKgq5Q5jJ9Kx8&wyJ3^m%LT1MYCiIk`w z{-_aM6*U#FM%Re)F_!W zXl}B5vZtZ9CI=*kB!?$QB*(CpciSiVNOGc#HaRspGx>CKe)2i9TIDH|T%25*TuFqR zo|0>gzV)&ilUvNckCHo+dy@N;2a`vGWb#DNJ$YKR$jVE_QofDUux_ecs#2HgQ*Wg<218Qs zr?$~zS88vn*gW45%gAw4BrW+HT3SS~#y z>esC#XcR6pD{e%3PS8FaA8bj_O)m^4re8Giyf#>wUSjgZp7io)Li)|nc(7YZ`fZa( zw#d3pgz3$ppWd3@k=`902`8ubnM_uL)qX44X=VCA`hbx#66XKwo<0&hO|~O~vyzfy z%&a8BU}pMww~cA@K3!_Eg>U4B7z<8hv_S(hWil0l5t+)F8kx&7^)rn!&B=l@Gi@XT zX4+@E1}ihkOodE$o}MPpePnZ8vS;R2=}jUyQ#nzP86ep#Gr(vT$sw8HnGu;WK{8Xu zW-iHHW?j{b>SZdMl^+vJQsXm^WG0$tYG!8UX_KM$X69$+cavCYn|Us?SaMQgVPfi zs%3uN{08}r^IPV(&99l?F+Y)?&hL@mJHKz(H-C`%_nR-DMO)?DXTfFpeN*Fu_W4Em z9W(obF{uW;;d^L_t@1szR5}Xfi)e`>zx^g!Fed-lZ@-Hctd%dL<@b&zWWSD<8zg`8 zg|z%&%fG*nmf!n^d(B!~84i;#r5QTG#A$Ey@2&K{pnkaRhAAdD`rS(Y{q3}9N^s(a zCBdE>W+a!|Z>at91+{S64aQ2oqjtnd^Bpz$irR0!q1LE-^K^NtsfPc@-q%1ym0jtc zx>fu%O;?vRO=x1&rinw-wvAN8vX+c7@k_=w{ehY&5^-rqR|C5>sb@@CeX zHM44c`|P{V&pki;>~rqDaQ1(md7b(n=gjLBq6L1|a)^q`JmILEBeE-J(l5U<-#%-f ztt_l8d^zXk9Psli=Z9OuEtP)y>6JQUW%OG}KayE}R$pb5ezldgW%XtCl}qT?P}#tL z4?srseRK~Go^Ax5ZZtg6II-t-;fW>%Pcf<3^QPe`CLK>PkKxJXaXh(XV$bWwp7%-Y zc|CY$nSrO3r-Z|^+|!IU&EhA5MZ1J!(OwDtj6?Q1>enjjebo~bO5c(bwM+nCiPAZ&HIIqHs zgv*3$gd5Y~7UAwA;NGGG@6`zD3T{I76y!~VIa4tAx4=AtcNzo<_FFNvuisnoh=>qs z2#X&Aj$2AtruY?)i00{a2W2o;6V^T=+6fyOW&y%2Nl2vGrox?sJ%s&9_e#b=74|*? z`Uyv;(~n$Y>k{6!DyHQguTC+LJS z!a_n+t}l^VDMy=VjfgCfYe}Smu$-`x&_Y;4Shq;pc5d$m!lp&A&ylT!?Sx&6q%AI1 z{jgWs%g6y)ZiLpp2(5h)TIV9P#zpx4bl=E1)xU}Utdr|xB(a8#CG|6xbxD@NaS^Wb zu&mcx#%anJO{^!0aXKK^#t5x<5nAgaW0U!jewDngqV=wd*1alP^Qy8`nnOuAHVvk( z%a^9X@Z|dXz#5xCZW5-huZi{YcUdRv7d^NZHZ7W51C!Rfs$8j;RdnB~0>xARR23_o zCHqlDb*VB{T)7JA{#7wKZ>#F$eByRh(LJl^epO9sUL@v0!Y-06ty-n*+?L5XQq?N$ z7w4a_fm7$!lzBB_U)0<}88uS>tJ-88C-V2o`Bp{ib``DLRdkOkTA!MQUL%^=TO{(|m9`nzn`Iwx+oBap){%tHY49j} z!M;V4d%EO(T>{sVFlA3Su}Aw|_Ga@IJ-Gi0E}GbPJ+PK_sl49FK1s~2sM=RW8Hs(= z>dAQ(r9D-2nF3$aJd09%l;%vdU4d*ybjySK9^Iy}Q_im_?RBCwx1zMKiS|xE-`F-r zX)hC{y-bwmSoGB7e2bn<;s+ImBzW#cY5x+vuELv>b1+KtC`$7z8c&*k#81IQ&y9HSI?AfuBJUhb)mFfY(J{!%QYpDzM9tmYFg*3Y5lKWDD|8m>lPrkn<`=b|6M;b&T2_qqQqW>sG8luJ4E|l5>uE zu6K;qu^6piF;mt(M(bCsQjU3y>K3b$?TV2-iqX0iYmzo$!sgQW#nhS^W03yF)=PUA zqxl=#EOnRri}_fm(r40z7_BX_-LfCqKim@oPq6=m|9n^WALd<|tH^bNzo=@b#W#g8 zmlBo{RuGyAs|jn9!gdvIBy1sUBkUyXA?zm{B=i#c2}cP7gj0mGgh9d(;flg_!cD?$ z!YCmwgykY+5Ipi;Rt{k%AwMZBOuC*=@Dp@G8DU{k7$wvamZC1mQH{9N_}t65%Rgm@q=PLl_fqX=bMqXinK# zvR!sAVU`LDCc}yJMY2yhMEWdwJrQ3_C?%MLazZ5`HW_mN*mZ>Zq_C0DloYNav`&Ux zXSGINF5Y_2xfn(gLB`fV|{nLEur z=6>^_*=zQjN6i8AlzA2yG>33~#k_9bG;af=X54aF8D_-t7+qG5HPgxm3a$B;-_orz zYoQgjYON*afYo3vw^mv$)*5S_wZYnCZMC*ryR5y|0rR%iZ5^@>TgR*u)@kdUb-}u1 zUA2a-5yBnh8M8@a>`Xh$&b4RR1$L2L43yfYT@F;*F`&+_2O8}rdzH~=x2j*;1jl@v zyzrd9)d1O`g_rgj>tg-YGmV93!aj$iMR^+=kxIZW6x{tb#ibD7I z+}{(=yKlO0ilTqz(jCGpf|GxJlz&J(Y3zQk(%5Ho89hcH;fQhEIBA?IJZzjdE*h8l z%cG~3YL%AqSjuBUcwY6qN|M(+uL;fbDbJ^b)6?c@6E4qZJ)adRo~@o8B2_rFQ`%+h zz82E%OXNp|mIS_bOaaTCNnlxx5ix3v#l})&nX$rXHdep8%UEl)8yk%+#x`T8vB%hN z95i~3e&eVyV4O0}8iU4=amBc9+%#?*qek3xnHi?X%rR%0`DURxKM`;GP2DUr7n)JC z)?8vXn9I$TW{bI|Aje#1ZZJ2QTg~m}E_1JW!0a{;nTLr#W}Yxlo9E06<|Xs$i@G^% zj+l4MF-usfR;HC@0;|X>HWyl@mT8q+l~&BEv+AwJ7j>)2T4lA;bz8wDYrWNB zZMHhC9mY0m_jA40KCA0tVd6ekkJV=#F-}>>t&`Rn>%4W*x=c0MU|q9rShuXZ);(LB zyV_2lx7K#s*>;{i$DV8T+4F3#9klJ|dhH53V%OM<6aDv=ew^B$Dzq-!OYLREss6QB z*v(d#y*km4_FB8$-e_-0j87uo-e&LQ{+$T-*!%5+cJD-gPw98N-#%&&*rz7KvzBQO z+Cx+O-M(U9w{P0F?NK`(c7-!0`u!pO9rlEC!ZVHLiGEjMLOv1ZhYQ2=&5^{og#BUN zxE(Gdt#^9fe=AjUq0!Swb77h1V>A~&?pY~PJ*zyQ6d9gB zqS=w@dEN86c#`JGr-jG!8P8|L(=3SgsEkEA>-Gi?N2Vj<^lRCS$9y z-PrnUE^)g8{ehjvUc!MuztJ5!V;nLL6OIvgA}}uyFTEUy8>a(tz1KKrTre&f7YYZ6 zyQ&Ww!^Q~Vjy@O&>g~puDF~^0yP5f5J~PYAHD?hDh%3^ELb+yfD0g z6SDQ&!E=FrE62Z`YO7f%Dp8OiFwLb7{`RpyvmRkdF)felOb=DfR&Km2ip;;ZQ zJ=PV%b=Ds1rghsIwQfI~YsHO)59*TbvNLRtonaiZbL^S=pq+2l6EL=Rp*`RB+w+ZM zwjS7Nml-qdg~rU_7==;0)?Q*a*vms_?3H$ly~gaZ*U_l%wAa`h^!8Ajy@{}ud;Y1tx28}e2+avZJ!kE;-u$bK(7U5LZ!Qkm|W;iRHY2FIwhB~L}V0f0*AD&fuJfVZ( z0zy$j2gAjLQs^LGnS^q_3RfodGaL)oA+Da#n23}45H}hs3O9v{e(xGUf*w)hC9NWXV-)~xhKeaKIVD1r;w~?k!K$5s$TN=MViO(nBobK?Fo~`{C!Wk znBn=L=Y!%Y&r;7Hifqpo&ldcZ&_K&VltUg7xK-b2mAT;_rB{yUT3`94h=xNKRAZJExn^%+kyC zN<9`F(CZcyJ=3Sx>y3Jo?~uMKFRr)hZT@mB;$eBm9{JM|s(-pWdbS z=zaQ;K&F0NKk2)wpV80j7fZYK%lfrY;d9&aHd3A&`mI2lem78_w`W0zelHoE$Iy&) z!~M*);6@``FM4jHzs|@r<`{F0d4|^r8g}Utqk`*~sAHm@jND;;H`jN;=FkDIz4FrF>SsXdOTZvoEG zstMi^YDK7lZxfEqCtTR!koz;zLbu6RuKW>(ub212Wx1 zMyhmTq#naaWniQp$4F&9WbeuU&9im#Z|Jgb0}h3p;GW?A;K5)ouqW6bJQ^Gbo(i6& z>w(~4a45Jjcm?w7!JChS+rd!|Rc+owwOQn;q!vUx5#fZjN^aNu2@Q0rc5w?bFpf{5 zUD;^I)6|Y7Vk30K=6CejMM4A)NiZL*B*f(Pf|WwdU!&x8LcG{7GJ}P|`SdFc`hz-! zWx<8SM}v4yeVZC_A7`L0kN=jb9b%p6N1Yebu3Qw?Q2j0l@f6i9aEWl0FiiE!CQY-2 zn0Jq^N2Cn=9l{v3K@d_2)b3!G$O@bd3aQtMnV%pM_`q@o`go_yd9q`(*kX8kw2Jz z{RGeVdn-C9ZgW!J&MEnKOk0;nsps3Jo32d~y9xV}uDg=Lo+;q6`Upo&$8@361V zSMO`|HThN%>IkibHt_37-r?H}zSFk@;cmh{N?T817sYq^dVGDvAMqVW{7K&#iaYPS z=(|kU*9hkcHwd@D-zE7yzs9^j-S0-2O~|9P7b%?MyX>FipX;ATyw@K@yzQ^>UGzu% zHU7o^rT%4v2w?@G8T@LJul2Vx?<@9iglr388>OwGa3{s@^zZTSC;p(n7x4%E{S))6+8Yy!WPP*$>%!tJ=;Mc7L? zP|}U-LnVi)jAJDyfYXF?gbV7LaVd#EU2+v^jwN9@Dep+hoszMVJEelckaqffW=s2Ub5a)&|-G8wpzi+lb#uJY!p656Siu z4ib6^{e+`|fxs!kS&BbO=}rOLXm>h5`_lp1p$^a5J;e8f`fzDdT5$+N$hwkZ` ztc#vba1*pptDY_ItLG8s5VAu(`dodU?j-~XHes$_p-1!@eKBDvA)+tSS0HTGSL?iEh59+-L`&D=l@eigsit7P|Qyl7N34{6&@mGQ?^*x0B zge&@Wy%*t46<$aDgK2K#dO+b6hx#ZXt`7xQ8ZINl@DOqcE@P&VkFd~~Z}w7 zW}KLU(-XY|P1+}z zorE3cZo)pZi^3l8edZB_{KaCod4_NvUo0--o5d{(?}m1p_d>fZ%}Tf26lPm_z#MC? zHIG96o?u%Q2qP5MSc|Qt3d}Dftf07N!fNw|wU)qX+vPU|Ya3yw;u(7g`w0gX-%IEx z93>18PN{VKePGDCVqGWPB-~a!W0VlLuGlU@2Ejwu?G)w^W)kvcI`u_Bx6AB>c9c*{ zSVHgv4fb-ya~yvcSZS}Z*Ac$~@=f+uN>gh$`0DKKw6|)aeO4dsyZUGk)`$ICov+W{ zB}2sVzKzDs-b?ZWe9d10DCB+GdHXQ!rJDRf+FxBJT;qKf_E_`mV=_b>@6%{py=zE* zg0K0@0L9Tb+ZXV4VAviZ+#!q+E-?7bzv4%5ok_?F=i0;JS-38+uTq+#u;BKFi%DJ@ zHpAtFN`7Y_t`9d7&uH@Xg{ghvR{y+k8(}@gF*?GV!<}J%Uztu%h#B;Rn3*+Kq-4EF zPm6Q#wCJa&#Z*z7o1U95K9u`BJvp}G$#Ek;RnBOh(JaJ_*JivX99bWGIwUkcW=Z=A zJyrhL=@jfgkx#VVlaeC%9P(_UJ8>$z#uDx4kZ)zVE0t3^Tr6?yf_y2{l=Dno^C7v7 z6ZP+)oEFFnIG1yn`P5dVX#%Z8`6DRp&g4n#+GbaQ;FHfUbFCoxygE&tJwxFPcJWJ` z&>opQWnGLVozvb8+T^^A)9I-+?HnZgq#hW;x-HQ@NnL2Bcw*qIX>nw3R<=V~tfLu8^ zmAw#Y_JA&78gChAKjSsy?1SLwIG>!qgzKiAW8N_jd_7_-z`p_hB2unn-VsL$a}oR3 z$TfyE{}pKpApc9qOTiZ*HWmEG!G8fMb(HpZNO=RXZQw6}&ysb6*J`iv)X~9R)uG`Vo%RPO{wjBt~Q&OC0&&TTsG$!QRbKTi0x+ zj+Z%&vk1HkDNlp%jDM9h&?TUgkbgDaPZIQ@BNq}4^fKd%eJ%jei%t$FXjtUkiRem*zMlHJN$WMab_+3G(KE7IBMQSNZFQR*W=1 zL`{~;7RCRZN09GA<@0RKm*Wy2WM2t!_M1?NGAlV&9OU%WpKKX+U|gPNp4!g-1=>;b zHr(YgZi~Ym|1!=S+{fQskhc@P^iHmiV*n`^@Yp$a$=1c;%Rr4Om#`6ivI-I}`h0Hu zIO=(t$AWq7DEPUc>7d0->F!6tA4h3lgU^mZu0K<)9ds2WanMrKt%Ju%+k+lED%ca{ zuh3T`QbQoW26__XxEHy)QNt^sWlU3{bt%gsiF11A<#G1JF?0MT(SJsntK?l|t1*{H z)r{s|-3&gBX}X`&r$J9#n?TEXbW%$>HsvQ=hm@M}F^u@PK)W$dIqy4VO_-;C<6iyc z_|I{&?sm*y+!sAL6MD4~b*q464SH!adbJC@9=ExF;!Q}?gxC(WqJzu7{|iWl(W_iS zCTImqwCCZwEr=c-f#hU-EyjZVt=zy!T|xSE(09k*B$@(`b-SUr+3~-?y;`8RgX}2- zwH2u0yW;F8rD^=vTmstIi&j6zdgzE?Ml_)ZuJC;whcP+>oF*08NbNd~(4EaWZb+7eH4S0iF|rYRX*H|P7H)k}D!#2B}o zdZQctUyYR9mRS5Jpl{-iMbL0A=QHs?LtgIfR_+a=e}%l4k#`uH&-$Ooy+PUzZzhO+ zCH_nBEo?v0+XG0`gY^3)MW5`#jQ9#-zl;(>XwesOuaNXE%_TT7zK%mk|8122QPeqI zSs}KNI%di-PUD0oryPV0@gmJy_}^KBKA8zU$9k7Gq*ei*O>Sjl`_ZD)u*{r(JMMc2 z@`$p`QcDNXQl<51V z^bFFE@V!#@f_kB)d04p`IX&qWYkMm0GNgLu>lo$jSn0kF+r|B}1h#i0OUNd(oxcRy zhc@=W^326t+lur@WiH%hIqGu*qh>2xZK7xRE^nX@J7p_SPCCnJHRrJ&gQZ%?_f6f# ze@9HS_y<(y9|*E1&U?7aHCV6~%&7vD-_7@>S^R6v#3x~yv$0}kWBp}aJ%c&!#*A>m z(l#LdU0Cd`sL2pkRuKfr+jbq9w^iL^THyHmh)~3_YvG1bo?1i?DY0538 z&VH1z9rJe;BsThO6PHGo>Rae59^YZmt~do%96A3(?2#Vq2FW1b#Cd;gy4_+1x!ipgL~{4(%F zrT-Tpa1t*(zbKzCI15^Nc!5_J<a>9Gap%uD|)(kF}b#v(niDBmluf#Pgl z>0?EDS5badq%RfeMMe5DfX@`^Ek*iCQNB^6cLsUVK2C?09#52)6P(*DeVRx=Cenk6 z^j#vomMDKEsG;&v0#6FcF9}ZhmcB@&7ZT}zM0y^PK1YxcCBAw7L4A0N`Y2mE?S zj~>#ChxFeeJ$Fc-9nxEe^3Z{k{H0fpRCpgyo;air4tU>?emA7Y4e4t`df8C^HKb>a zVNiI}kbX3z2My&rLwe1SJ%HX*J~H4PL;1y!9xF*>CZy89p`UL?-kN- zMGD#?eN`wg6;c!6nL_%cklrYy9}4Ax0=_4d*9rKWke()_kBM}Qk@PDeJxWMl63UAN z{6|Qi5z<41^bH}sLP&oQ(i4RA0U^CVNWTx#c{xMnjF8?Cq#p$7 z0YRSVF3;zOzXR#%K>9dP-VHG8l}7{VNeW&Jl>Y*Zrt(=JeF;fl1E`7gO(4AzNPh&< z6M^zUAiWR3?|}3;AiWGI{{ql+=~F;@6HtBx;6Xt84p3eLaQ?sY6aXIq$~%CROOF7s z57G;OI{zOh|En|qLh>ZEkd5&FVF4AiT&!VetAki&PbOh z^UE{&)oJ`VhhLt+ug>0w#gOOitCRMnw+nd^zC2eSGgO|XFHg~z=jY3l^W~ZO^0a(; zPQE-LU!ILGPsLZ~;Y*u?Gw|i<_v+kxdE&k7Pn>cu&$pK++siZU)oJ$f9D7+SoLw(Z zt(WK3%aiKm8TImXdU-CrI+0$UMGxB|&!1N(&qG($Y4h@&dGww)WCy$#7W#Fn5mnX7~7OQ1fCozL|$Xb9<-qYkfveup(JMvQ- zz-NG80Db{@f9enOm(tg3MNP7q9DAUxRaru%@%Lo4} z$e}-J-vD0@eg$h<$`4u7WE)ejar*Qu=F>8e@*ZmILpdKonmoipbJBvKze20GBK9cw zQOM^)au)QTS(5%N$~19zIXecyYe@4sl>9C9(je}r;qG4q|83-DJt>xy>rZ`nh2`mP z@X#V9_o5Ds;6va)juN2%=})3Qps0^m)+cp4>NCLlfK~@ot9>Z>BS`ZK(tiG zuW5~VE|c%`7m6C<@2lrinoW*V;PLiSdkXXyc>24BCq5q}c%SL`b5Ojo7kJKf;4O{g zgOL1BP`qbwJPrO!pjnX20KWwkZ{Wwj2>Gu;@orP#{r31jK_V&M{fc4GS3$oHiZ2D@ zc*-7^UkC7|K>P_LKL>4;?*3*f(ty!d7j?}PlCp!kL&4uJk1D8_djBYR(d zA3-k()LHxyVsAnIGteJ`zJXemfyceX_aJ#cB=~k9{st2CjN{Kh9|x7=h!GSRL9GSy ze+2zFbPsjY@Fm58+B)6^30kM&n~vijKpRjFv{Af**f8XHFCjh;$up3A8B~6$fp%&C z8xp*G)DD8a8hP*#idJ^#_m- zf}&&%HFu!q4mpqT1)_FE;wzwk3kq8y#>dI33(Pfv zJ|CAofO+R=2R}z@={T*?$$@m0v7 zKUy6m@{XSdUk*8H=z#rk;tjKguOtrG9}O1Ig<3gf&)-4`(0Ktn;J|okxVz&I!G8$3 zyvQZ>8N3U6=#<|g<(M{t$G1Oc9#W=3(gF#zD~{G_(0?b!QF{UM+u-s2&V{x(UxK6x z61)RR=z^Vbz-DUrrmVptIsOhj z)-Tt$kREo!A+-W+bfGQIMUbG4De_A+=9-2caQ!JLY`YWVrJ+AlV3S?aZeYY+SPN4A z8zeu1ett=5*DcE1i?;VLugTf;eWVE~9pf}hl-B)U?!E^;s;o-$zE`h`G{$tORKg>v zsuZb8rBdX7y#zWWm4wC^BO)D{G)?!=h)74oh!_zOX_`i4Sq2elWDr>v=^msRMRJZAoljXmuM~5#w9uY0_ORu9H0((AkI|FQRR8zn zEb6aB>Kr)+$BBczt=sS5a|P7--07Lr3`>p^xw^-bBS+eAk6wEZz%k>rM;iZn@8TNj zLLxc07gHNSl-&wm9*9C*CnBx>V=R^G_Ip<`D z{doqCJqMo-jcst)}ID0!dqv~AK4f|em?$Yi39`9p;$b|hA`*{|w_y*_a1Z(Ky{G4EK^l_%`)2QLS>*E}f;0~>i zt6m>JtI*4}xQ}->!97MF+mPUnv2Q8cL!W-mF#Wu`KJI}NTn7`Jr4!uA^l@}0xYtT> z&z9iW>$mIS85KuJKgUl$_X+*nFZFZ0F|wf6&s~|ace$J!6P$k%+f@N{ zTf%GM)6vIz685#Twggw$1p7Z>UpsdK{Tyk)J<9cz-6H?>5czj0e?mEv`sc{;8V#=> z=#O2ArXClL{rPt(*K^0ap8Y2BpRfa!QcKC#%ZwqP7Smry$XxM4e+bs!IF`wk+LG^9 zx4OEyf5Yodb#BP}xT7X_o|TR-PEL%m-=c!4f|1$EM z)YnsUm73Qn@Ba!`oLVl2PI^{0?3!v4}ZjdK1|JfXuVGT zk6FV=^4F>Fpe7>y+}B6V`zaSgX2>TV^|5_z!kyiecSX9EQ+`TzcS-ABYGl2eV(eD( zuB&{SWu4p?eTwrJB3*fuU!=94@>yyolm9X0N9q4f@()rj64`fwo~6{Rq0cd%>j2L( zl$u}CCqnr->T~HopPCuu3+VYB@>!JMVEz9_pRv@uPMd~Yu~@p`jq(h z$!9_y$105-DfU5FVO`Q2j}J+s#w{}OG10g%_Vm%N4&m~533w(buToO)`UR~K$`>gy zC+L4{Oi=PU2l;GFn)7c&x-!Z4ie5gI1nYd6ZTePFFYl+^c6CzX8PpQaE#%WgI&ZRu z@3NHK->^%@QiJ2KsTsu@e3Ua~z2m26_w^m8HH&ANAXeipYx@c1*XXY(zei0aYtCS) zYH%-GZaCzVQS%a(+(%;yAS}!XC_$JD`lyVi{F@DK8Z3gGAU6h}v{4wQMD8EaekH|{I z_w`=lRcARjQ%cKU!oCT7$a~0J>G?0@-=`eImV0?Ef5O&g@#?~HTe_d{IpwQ z8)XsOIgf3S=l4zd8!Ya!jP^H;Ur-~TsCiRn1ASf}TjKf#<#x6zhjnU{ZpvjWRmpmC zS%(O&*1scT27Du19%sv+VLQ{<@;l@oqLk={QRC;bXO`ze%QCQ^ar~J* z1`IsQ+Vgm2j~P30hx7IduDQ~aS;-y3E0n9bpP9n_OanDs(_LSs{u%1qSX&uudrD;Q z|Ayw<@~)HqqU?gSZ%Zjid8VctxKr8*wq&~V$HEgo5bjz+YlPNM(z=1x&(VsJtPe9P zSf{LC!#KCRD9@BW%=Iyj0a@9{_Gj>ldO1S&Qh$wYYvFinrS+!pAsN(6;k7jKy1FTU zM)@RbILfj^SgJxaj?L8Hqh=oKpF$pCZBc5vWTZLeI|453)-m3Uf0E~ybNie92jp)u z&Y$s_YLL4xGdA{1@=Is0Ca`1ChdcX3Z+wqW;%7Mv86p!Uu)f#%mohsUx1~<$Nyp!F z=6qc|jZ7z2K=se^E{Qg`e+{|pk^b3`J@8)FGn`x2u$>K*Znoi<9I?xIUXD+xN4vqf z>jv+|4cGfA1Js-+A5VURJd4u9@gSecG!ie+`W@EnWX<)|ER&hpd6d`kG)oPk{sJX6 zPCggTS13Sn~~zs84Y|d|tTm zMd=@XFZ)L{UvTvD39_yKjctC0aw=PRky4)4zcFMsd)5JswzgkJW~;n_oA$hu-^1E&<5n`N zdg?eakTc5qAV^*H*vB#IdiS9IdsO}S@HRfY9|z-sev~Nae-QoX)xT)RT4SWOt+l&= zy|w#m4*`2?kJcV%eya8iu($R+qp$6)y(E##YVQEOc5Jn=9>9*N7Dz@_s~Q^z#sei| zTY*VHJ0Nj>tHyQ!oj@1uRbv+dOMvA(mPoZ#$yl|U$L;`jC%NDa-~ezq$-99Qz-izt za3Sd@5rQu>M({1ulaYnjC1VS(tJ?$Y10>F^=#N2f$GcrucaFFS{Yk?1 zw`~#^xCg#HC@04@t~1|4KhfXj@x6M59OY_v)%Dc7L|2~@ia_-!6ftyjoVu69>_O}z`4 zUOx+vIK2yyF9wzY5`kCr>!Dv?zo~vJ_;x@dAkV_F!1lLo`|A%Op9ier@ljwabyBzZ z9S1+f<1_W=kxRKtz*Qg_J2{@lxXBsd8Sz{oKpuu(VkiqA6OS_=546HpM?MMmN%8i& z0`TYRuhKpv-huksQAc|+&m}Km9q^5J;<$^x3*nnle-*kkz{7f=2VY|6Vv}~T>~iQ= z@mQiN7qG6RE*W1rBfcBh47>pxAU_O!cl-qT>G)aZ7l6y~-G=-c>~=)u%@R{t>W9 zG$NO}k~&=PMqo=rbKOqldw_kwLG+90kD-r_;hKBE&(-%fOo(^XZN#yF{z=;v^jQyl zdr(e}ZCqu(g?`YZ&*bwt0b zj{0;Q3+SJ;wP0-3z_$kFE7#p9{*4d5u3(#1E|&jRMechoOHz8F{r ztc>rbem(T-|s|(AF>om22q0$0Xn)Xi?V-f)}wy<~n}>~HmH zRMgG2(dd%AF&%I>nmG11h8m+l1>kS2X{=*DE}7TR{#KtB9#2eSGV`gx^u}3@bL!?d zv^CBH7N8&IH_UEa%zS<_pDp&c`k?>hc$p2kUgRqP_Fv;lU=6UId}re(=KGTQPTJq< zbCAbxt?wkz!}_*1ZU=TH^TF8r5Z!*@5c$!@|r;0NRs?i>;xo zslCDCaYu?bbu}#nmH^9vRlr(c1F*SiThk6;A+Q8k4y*#!0viC^zPss-rUOlff$pXg zO{bgAHeCQNH(hJG)pQp~j5Ed!9hW&SXPozUjAG!-ej$6dQrVx${*E%Tf0%tx)n^|W zeosAVK5ss+FT|@LOHE?D(x@7uRQZMb(8{m9Og^S44PU-yoDuud!A_zv}K8N^>3 zZsN41mMS?!W`UAwF6db*yY z2lS|J>2bY9Z_}UCXX$hGh59mmwZ1{$s_)d_&=2Xy^wauz{fd4|?{&BwnT}jX$Wi7P z;~3|d;AnSDcg%KlITkxsIMzBgIbL(@cI}=)6uNZF zZJCz;zBL!tvsO;|b_;V)>?G`_qPQl2*uyS`}7}Ri_+wZjG~A ztcl8q-Qsx+t6BU}pM!a=RbVw*Whg&JqV^5M?1RRR-Pd7tT3r&Y4`-Vbn-^PPb%D)@ zt&MH4W`j+MO|xcMbHFCX7RQ#Mq#hd|Yqh4uo&)n*Q?2RNEL_oySdG;hYXO^vHcYVE z#2T9rYl}@$de!Y%g`64Iw%C|hJT@L|B1)tEV98d2O+o3Ia$Y&7)D0#rw9ASOb%RZ( zeI8h+RU+*Nn~$qqWGw|-jC$v>Rb5t<#3clq3wz4C%vx!!0b6CQl{kc88?7zYYHJ8rF#{K5-Xm}M_oJ+TDXd6YgLyUnuO zV3iSe4z0GWQgh0B+BzJ&3MOaW7rS6vV|!xz?DI-|NS3{b^IiwLYTb;ji){qEhLS5| zYgqP5Y)Nc6Ta~(AY4Nq#ykyN6V;yYOB`MFc5^?b|*lpB%&pHQo$LfXs6xdxxV(eCW zwW@Z(dJs&nPP1-Wdu=P~-T`*6TA}VOV7(}P%36QF?5g`^mr7Zy7o`_~8P!9pGp$ar z^lEptY0U)lNX%MmDp+Q94$5Nfg)JRro53<*&&Bhj!zN{`#A@wC-4U_k?)StF~fiz$$1xfoB1C zAa*cz1nf}kXl!q6KUjC{K(!g$Ep3jSi*1R$CT))0jIEDtk}JY9y(+dAEG>2qcWH%O zFJ_L*vBlCGvAfo}SQq=sRjtS5nMq(y>5j=Wa}P7Y!B{)*g4@~~lV|1@=A0cddETyD znX#(a7~XwXED|e|QHPP?kA-AR#x}=1v0UEYbn8+qlXu?}Q!y7uj<@PgRWHYtzv^1m zEwIy8Kq8@n6b*JRtPo0FK3L|YP4&y-~UblcZbJuP`nsk$b~3Z!&% zh>5#x2F-}+X-cKY^X|r`?}bkGa3#UZgVI``kZLZugt+zg0O|jx47d zk(Hg5qr6$6tU~3_ie*_UkoCT-W>t{&bk-CV%36^11r^SEIcu4UWWAELUPZIMnYBrk zWc^vzc2$=3de%-=k@b_TBdRLvr&(_*E9;k8zfz;K{!i9#RINw%7^>bg#4|)SdY<$= zshT`bd!AO~JX1Z-swX{jJ#*FjJfHV`UNw7O_Png#@A-=7E2_n_)w5N7!1KE2b=B(m zf#(P6gPtFGexxRN{?c-dkaBtDs{5LbN~{u9*)Yn z(F2vUp@BzqFKe+XYaz=jEyzeE=FQ5?O03?M-pT?tu&~?x>0eh;x0>J z{i$rO#9Un2NFJ$dge+j4wd9S1d?t`jpuU+t%{*@-&)ZlTVW|R2FWZ(wX}4kM{dQXP zx5U4qi~bSuujrtE0sX!7_p;476$#32%EOcgMA}zcv72&-oTXwL&xKND_bN7k53N`$ zS6#8{A0N$dhYAflv_nq96dW5Pv?5IQ|YA> zN+**qfIxEa{e19tLKRVJ}ubyJU}~@<{ny z=F`e+IhHh2!&rL3{2|AZhVjGiE9mR=9r7!9TAO@wzHGzWLVpMQ+rja!{5AvIfE~c@ zcl6iF4*XUaZ)1BDa00H>v47Kj{ZN&oEY+sws?``N$J77ml2$d8wvD#iXf)soWQ=rXCN zbWU_xD#1!ybQaDw3sUSWk>|>h)LeQA))t(tCEAiK-GZ8MbiZsAW$~{pqx6jArRPPm zH*n@?dg+~LI^Nq$JA{7w+Y=~|4|b>{a}iTQnu*$Eu8hfRJDN1LN9l3#Dq+U1+3 zqb~j4puH=LJxdZJ9sund%EmK?7A?){hvA-LG{ahFJZ@aNy zyQUA-BYLwwQ(vlY(GTk9^m`7|QRA5GnCDpI*yT9xxN4*sVPib{ttWaedNI61QuIpn zdh~Ymp72pBjGc$A39phAy*EmYa*awCKFS@&&Les(dNSG*?vxb07riLn0$w8As?^%m|=CDgnRR$)g9_rrQ5daQ6$D&gujMtjH#3OB)eFuJpFaVoLqePl-p z7sI+Ux~Fh@DitT92CTv^7fy$DOZ2tEajAr|1*l8dio$WQu8VFg45d=>26_wIQHV7m zx;nb9(3MK8d84psW}yq#rO|1jE1@gV6(Wn5i#J(k2v05A*cdt%Iu>0jvbZBUH#(ne zQs@}0&C%JRouQr4xgv{O(XJ_Ew?f#xL>r?MLaRfoQ9opHNpwoImh5zBwUjNs9GVM{ zxsoE6imyctvOA%+f z+NK<#9Yo4*Jm9Z|cE1hYcnABV0QQ8)1)68gfIf1Y$D^?$T!~lo2h|yMOH0SA_C{?g zX7<(EcDx!tr`^#r^{75xpQbO+*Xld(qs1}9vB7#oef#tGw!Q+IluHO@)S+0JFo&CY$!Q_gEjFB(%6FB%`%9@t*gS~N*=JHb*{ zG_I&6ti5GSWkDA$*gyk~H}2jSZ5$eRzqq@*ySuwz+}+)wad&rjclYV<8+-31lbIir zoRjRzuBua6_2VR4)~eyCX*1QIF&w%0E@2?&kTWJNj4JX-^bo<9lX`O)m@Qm6jL&FPvDcEtKX_a+I;e#ihq9Wn0?WEb8n-K=yC2%X?@s#am= z8SQS$55S6d-Mmt2%OC#2@DwulVIWU=POuH6lYGAVqW;U-t=D?WDqnj2{!OLr%3Lqz zsw@+BsxSq~A^U2dk7}KvThGma2)zn>JH-Ylpp?tdDb*Fl`m7ElJYS9#~9nZ-U_8+oIVe=nnqQotcEpJ z4fc=OD?83063WAYb;8>~`wowDv{5mJD+1Lf zXmVFpSJpshFzf#SWKc8OzTD2%Fc8tG{2%yWtsf@uC*f)Sy;+#X_11D6w*ky9M?>n z=${FJmsIKor$EW(=G!-$!OETJ-w1(Mos|}#+|#7*b{^ zWGGZ?zL(}{5Qu+JphOe9#ebj^&Z3hRrJE=}tYi!C>&4u{gf)CId4U)V{p&Vf7kxiP zSJOLrF~e$dFtu%*cK*xl?oRbehfzlWPWz9Ja`21M-Pov^_rg2Xq9^_kT?$?I+uULC z&Ik4#)AeU~E4WqMmMy|{^^&z%5b_?yF)f>%ZCW*y&#;C2fG4C;mytJIn$_W-R+0~P z(J;^~ig}#;N$#&aK4C^5MXcm;4X6_`#N7nKoz^1oIcWST9MF{8w^EBrtbQNDC2p*M zYL6SARi|XgM@U2xd>A~q4EX!+ll%$oPETrTlO0p_F%-H$m+n=1dG1SE#DQlGk4j$_(r`(r2mzodS%~Ra+ z2oBI2#EE-|?R}$;$!*jNKU~JOD&H?>E*Z=VJSy6yAvf$?0;n9jwl?y@*9jeEWCt$8 zE|JRRF8@5iD~&!W?HQ-N5IhvTXfp|RY>hLAlp>|58Are7u=n1VBBj3=^GW@I8wX#o z#z-$bHLG8*P@GC-n3JDM&!1p8%{Q)OSN0l@Oh-7~J7l{PfAD|dsaNyrefNEdeBIah zxnDz$aLNmzM(N18H zGqX`LyFqB%tpCe%_=EVNY`#ST2?VQc) z!VlCwnS(SVT4zK&C&<}!LC)K){6kDo z2lz-tP8)!L=c{}qC<9x<-|sT*m%^R7ostE3*-I+EXg7&^I&5e!vO1O3GfU^`+ZGTJ z&qaK^QBA4SQ;!o56CqB>j|J4895Lz<$G?TvlK=;GM$ufHFBx z&8Ri88@$|(EX#v`a*EOGJcAM7&Em zo>lx{Rs4{e0sfi+0-6E6ngQW0Q0QMhRhdXpQV&=8@*kpma-|L^2OQ`+;dZ3jb$h~q zd~xydV5z~MFP+|B$$^}_<5!pYpnM4u5NF#fFO9rD6kD667fHWOti?ZiJhixJ!FO5@ zPHpH9Gyg5GBOCJ7S`KTm45Sa6gZ4c{m9H@r*4|e6PKT>!o~{TwJAUuMVFuP;ZISqX zd}~WjNDU(YlwXEwj9)%Fx}38LguYC_Oy5Nc6ug_SF(;|K1GWm|8H{Du^h$YOI=*;! zTTe^%9Kl{Td0RZkzD(XXzos%UDRhe70SDBeVu}NgFAP#BFUFe~5S&n7C_m3j6zN4m zB$Oco&NMkdJqEv7uw(VaS5HjUQ+ z2oe-IHV~>=c6CODt|9U8ff)x=4IG%m*=SbVh^Zf7`(E~B0b%#!IrCF`@b%P?xq9-G zc;5_k0%CG2bQxZwG?DETQHye<1$$W|@h7TDJ$sOZ`=F_t&gRR~fB}OKIecG54^{SQQ zgxWf1)Fs^Fp7#7O;Z=q}p2XU?AV!|dQhG8-2jr*yyKx7>Ls{c8zvrL70dko{*ipZ+ z348V7x+G+>R0dx@mZ|b7u>&e&>_lG!<|_(LvHfnt2>kHp&0aVcZK zue(@wv{BhyNh2|iu8zf%8^`Rk%_Hxxmaezky7Q^kyG@Jg#z!9Qt00S6KWZegKY??R zM};tJdU1P!t|-&JwDaN8q5gZnHZglB_p=2isS12E|Ba+zKme>TEU%CP>`mUBtDn(W z$;afEh}G)y`0Cpbx8YcK;>aQb6uY*ea-O!ov~FC{b{t*uo*jD*bJ22H7}C(_fY(1K z^S}R9Ljbpu5%I>N$gz!M2GSjh@SlU@7;JueWDv`XTweY3^7ec5D~0|oW&uTxx73&rPG(xbcGeN^k-Y^SRQ5GtiPZ1;3SmaGG ztu8PH|9+M1Mh#(rBO1NCP)RV_40%FWudkmyY{>FTi*OsHI#Yl$L-#Cu=d)jO8=)mG*L=f27)M6-_ zu6Aq@g07;cNB1a7!}YK`tfQKK*K=_${M`3AW>p*HXOx?571`46V(6R(5NX@&H&C8^ z3hA6LwFA}+DJ?uvwbzl&x5 z0)A7!E)d4kz?!@f^8yJXeBwRe?kfz8+XG5(k_3k-Ea&P z&sGXxRl^snyZ|FAL$sTpeZ=TKt~!94&i9xvk5Noe(Ih@Agod{_DeMyzYgmkGPCVLw zk0l9hfSp$ea0ffgBSHXFVs0zO-BX4B4y>(n;`BHNBMo%gBNs%28-#cadI)aJ+sCp; zSs}w9`A%TMN2laJerGNCo4?MYe@zdLes@b*(qa#L#7LK=p~q(b?i+HfSA<=PTen_u z&CXAmihyIVPV_s-CtAE%-aW%iE`g*k{l%qY3#J^gbb_iQT7X{zMM&#Lh&RA$;JMwHXy8KapA4ZJ1 zF{swp1dbfUz2+U#VL zX~88~M9_z>aS6}g9QU(@J*Np(?J4b1dR7^6j_)LkX>Y$J*FwGcVjClPuA=_N>_D(X zP3IK-0ch^wU_yD95re4xfKioL|CHFS_eH~v&baH&<@FoX<;pQK#&CW6C1syV?n^w1 zcVS6*s$S2{FWT4~tA6)QBBru${bYGke>*nuSNTLy!20cj>Ml(^oUN3nD&H@y$#3eF zsL#gt(#~L%ya=OnJfVe4{!9;ksk2aQLkItPl0E3@1c*BlUIIS^Hj3eMg0vt5xdtI) z6;ktm=7%BbO$B6pb50||^y*!+o#m7Zfe!2L+6{|~;aqjgd>&GSyB?^mIC;PiMF{gG z|E(6q+GAcpJy`$bKb#RLE$B|bjVjh2PML%(D#6EPNrmuYw#(c=7ro6NW&rssj50~Q zcO89s39wJG%T^vohvtWl>iq3>mHlQ}=y-{kFz$^~tKuN=Ad_SIs2`b%gtwLnQ~lbX zao2VGFZ;AV8!(Kmj$%hrFq%M>cKdp*e-UUIkfQ^Z7wjUkCzRo8CRBqFdbHqYP(F|{ zvdjp8{qbIc_X&$@A;>ws-=x_*L295g}L?JcUSE_aHyjB2j+zV=z}VDaL=UUtuqLOs}EkozX;m-{X#1NsrA=k9$ z{iLk?B5L2CD?USGCqgm!JPjHY7Jf!0=}bhDyhq(Lfn@k>nFGCw(}jJjj$jFX+@y-? zR~o5mLyw`hlZrtiP%eKbMGizU1DA%h=OOx+!ow9kj?TY;coKd5HH(K=i5jV#)m~FukAS^K28l z8lclJQ}hixEjN88A3|#bVqe)2OFJ-yGDl-M2#fF;M&c6u-J4k#iJJe)k#^TH>E{I4 zl>m|O#y9d9Ua5UVR^%|ef7}Oq!$5_1$e&tL^x&Hyu@N^_ z6xnRjW0G+#p#1|;Fv^jjW3z*=N{Hi~fT9rHih!$0csx1jN;F_BKlF?qO?9}BtJwYjFQ%nReOmL!Bvq}5PD378jh zC!K8$yoO)r7FJ}lrhltJuGgo@eH!Q(o6Th#H$TxV*l9Z^NKOt2A5|WXT%YSzx5#9d z_^q}D%)$zca@PhdeQb6d%@UQK3_`D~#6Fg~4J+SMC2!+*QK;xBb&hf@20T#_A!(R; z6_VgRN__};hMY&*dP)ik7Ws{p7%g4ss>GDfWo3 z$|CAsU$p^2ueaWtTg~1%TE!dw+2M}1Pw}_hR*1n7_{($ZEZff78vXR?qaliezmW7$ zA{f7tyhE67!Rg}cgflUOz!o1oR9Qt(Tx{viakOBMG9xjr2V%lziu8g`5G$y`S|wdV znZ;&(J6~Cc*aHrCkzvUeZq_VNM+d;fAW9Q7(`u6z>lC- zgP|5D9MXQ=cKvH>iM(7#_GsZx+KQA#gBtB#iYy{Fi&FCCLIeqtC0GaqiM5*|ccMf0 zvZG5C2M0v)1r)90mC~G&>Ee78SXiOsKNIf^84Q&|CDM#+2K;S_Wk{S+!Yu@w;4LLg{y7t zb}QBxOXXCQtea}5YD|-)7U3z`2M0w-DP7&|NS$Oha9cU-=9>ZCLUv6s&`02g6%<1b zE~%_N1S2V-w~Br8qd<1-euzMFuf2l;Wp0UnVh698n1+8flBpx6zAsrR7OeM)pDHW^ zLlS9_xt27KY8arn9*t8JLC=j_N-tT}f-tfUK#{NCOJK++{dx4I@&i;C===-1lQl2^n zl|t)wH=ZAQ88wUr7*!DK+~QrMViR%h+hzZvq%K1c9iwv)HsR-V_8iwjIe)fuRZ2rV zJEVEC;s}=+J7cdyvmu!gCEN`-kK*dTQH~NfO$D@kFBn%X@QVGaol7siMvLz{{mCQ= z)5`UaRiy3}(GTQZCU}ae2K1klR+b^JU}3h)Gh(9z+v1DPeY;{Zg4vy zae5lu4HJy2Pf!AHHEha|Fn%;)ViukY|EWM`p(8x|n8bMkzrj1{Qek49qryaS72L4^mDzPA zW#zdjZd4uGD)x$68e9-ZMv|+Dc~?f9iR}4#jV}>x>gkqK5G7BHc+tLFAe|Js0-BB6 zY7c-FV~ury!2P!mRrktG^K0F?9=o+UV2+O2&F&En?lsb9#Hro?=zv?JiD^=LGDehxOMJuo0Xw9adJH@Xz>zagP`DEF%`6L zNhMMH;yu)x-{QRze9N?SbrrF{$PShLG5Q!v+s3@DzN*CuJuyp6O?Vjr4i^e}6xxKa|M#!)4 zoLKN9=Bib_O@F%pF=-)*u+G%xN;s$$-QtOpPI8hbGxLGy=Beiv8>!s!ft3Y`6P@mj zbnW_4Mo>fA!IMtv}Oh1BL^YJG6XBA$Y1@ z0b86=QegD?kQ?F^={R?NRo{h)iw4p_8*|W>%^^JNWp{{PF{rzc7Y!O57=Zu7023NT zAN_8S9SM;~Dy84s=WBt2ii^)iO=Mj(GG!I0)i3o+-b^ywmIqwip8mxX#@-5VjNJ~= z=U+G4Y3wB8?%tly0>A8eml=DsLg3A5%?8^EZ<^t+FMA^FSd@daxzVUHrfhq8sq$Y# zc&=|O<95gG40#|L4C)vq^h$&^opesO$;QUXA@?ril{Dys=0wazjvpq+ylo3I1-w^! zd+Vx{oG@Fb!%qwK&{bI0?RLlR3T2hGZ^Eml**CqY12nx28b+jRAPEcO5NmvrE>_E{ zG3chx)hLH!nkU|?*1NWsp{I(Ohaf`6Lm!?F8WV~2xVT=~)*s2=4hC65biABPt?@|A zLsuNw$!;zM#?<NbQEF;`8`iuuABRFicx{|L2w*sjfWzbs zZiX$SG-TVCu$$90cA3-ZD8kLkpF#bW_%g*3dr&qW*8bt)(DCU<`QT6bJ)Tc)*YhnO;xv~}6=>+xXJUPu1Le({_XzvLAeuXy_i zD!OstD3Lm~j*oDI&G4G5C0Bnep2T&tn!4gN38epAJuLncG3)StOnO=ubfNPHQ;zkO z(<7|&W^BrA_&wM3F#&yKZ+~Z-QXUj~-~j(p9C7$ks1JslOFLC3^eN8{>8DtndnpbU zG6XORXHk&HNp7H$6TjrUZP?jt!=`gWTYIo5amXUTH^+ z&ODhT$u-66Zw*t*BV`S)A0tTQCUIK!sY3BF9*QU6PtN7IKg~nLXY;Uj#B?%!@R19l zkCEF|{t{FYTT~}DaC<>BZZMMC+X=XdvH8`~E*msx$3!OTtg!C0cmJ1VDC+8Qxt4ME z>-qa@S$S}thVsPvjuWc%!JDOgxl*w_QbyXHo$!GMZdvQ)J&xIDp%S9g6}QJ?$Qm^Xo^zUdLujm8zb#FehWo@uyPn1PYbeB` z!Iif(#ac1rlFxQKemZm--0s`64#6sGozAUCYD93cbu^Xt%f-bz>6F7$zZ#xPWlHXX zLAPZpF#)xSWF29lqP(W&Hj2I3o0y!wzwvHUmZojrFhSQtYeQw1@j;15z@P#R;u8Fo zaw=Ige$G|S1?NfQQ~QQYU6VF8o>p1`F8T2$mqxv%!>yWDb$-xX2<=$vg$o;%sGZA7 zC&QJ8(3h$;j@)&pA&X-tt>Bah(e*X$@fCZ}cfly&*Y8!vr7bzF@OQvXq<7@0p z$e%#y*w*CHcJrtb5kIABr#*iS;*&$P?zkYKpJwN^V_#RRWs8?hT z&dxGOInbJ0H`g@ayj?5sUMB{cx$)m3Kb^M?=S0&{&6c-_CHJ_YXa9~$^FpX!x`AC7 zT-5p?nO#(S)}2UAi!8wv?ugb_#LX=BGUA^tSLsC^MJit1gtg-F+Q?;GxR!0qI9y>G z8%tN3Bd7B2)J?!DubaPn`(&K_!`V4+V79Ve7VLi?z+OguMY~+NU1Zz#7_Y9ohO&2c zH1wT1TRhby^~%Hk=J<#D=CPVz#wnskChz%>Kpn|le^v>iYaVYS`rX~@ALaN{eHqWc zCN)TCWm5~5j}?VB7D8X98jqU-Vu3t;J9+Pol5)(R?aOR#=s?t67WA)NaBvY$#+ zyASrc!26O{6CD~CGTo#vD`(5)WxKj*b$-lD8t9ha=?q)O4TaPJ>340=iwQ7NNA zm#{(p{e3Q5ccSclWUzZf%n>ygz8S+SXSLJM>W+$zBCBuLCiJ$Jn)Z5kmGWr~d?#IM zBJ8XX^~&;WkeprTy2_FIy1MCzJ}G7kS#uq@G{26(No_JVQWFSM|LBY|U-M9h4Y_}~ zSxCZ(r#{%4l05v38AVVT%JejJCb<|JwQOD8M)0WXtUVhVZ*yTZxc_sa(&KrWC~WaC z!Gg{IPm)#1^^>5%A?YxUQiOa?)H|)l#pDT3%j9WqNJQpH$PQw6lCC`6!`t7``nEm= zeIdMU@*O55+2SKdpQ(fWFp06$+;knhp07t{h#R?D%X{9I23Xf>IH4zZG!; zdwER$VKICvB&{Uj&@2mabV<$U?H@agc3tBuCGaHKWz(A8wW_{}Z*%6LzZ1rOBDn=N{+jPGAtDiorQpn2NsUO$>-DZwim&~ z`<`*#XMM2wS4k7ayywsywi|12`1`H95_QL6i(y!+Cgavu%$c9&sueTjxQ-P6?Ce)~ z8Os4%tu_90Hk+A`IwWx`@F$b59!;m1>TYptz|>dk0;}Ls%FIO%ZymN2GTb+>$*_80h~p+5@vd9gnpP=(21KD&OP=-Fz-Mjf zo-*Wwh<|vCH>VHe$4FIj*o=>_626|8?-%zd*n>3AWq9ggxqhOXGMKnH`~IED+&R(7 z=8cQ(U-E`+X9OOA;cuv`~QIe9%U!-+st2Wgx zI~82(O;o^5ijp|FV;C~rjbO&j7c}NUAF||>yh39Vgp0UcQln|IxdRzpyuG+_oYds7+c@>JTj6O&JCrt-Wp39E$JBrFp3KPsO4BhUYdYwY% z8*U$=x*pD0JCvvF%CBZQGD}^UGp)EM2~+7M)7^Y5E)5er)Ta!JM%&khFntadlJIg8 zya=;T)HKW7UH&?BI7NHrCmDo|-pq8Zb+Bm=a6YM7c0A58zEy;~9Lsdmt;mhxuk+J8o7I@T_@=&XmgNF?S~KqChQ}ikY^Hvx{os!o zd@0KGk4!6sNW>IzV+@QF%=Di>ndaD-Y2$G}>%r2Pt}M!OCvnH%c7v|pU6HwTwBdOe zLvFxbVfHP^=%TN8df4ONqev(X5FT{Ywqj#S+ZnUn0QU@?3h0=#KK0{+myKX&bt2Xo zKAgE4t1Co027+tV2er*4m`)i4i$q=nMV8~6nc2Mr(4dFrW(#GP4 zcY93zYB4dXimM{V`wt&$YCSI7yYk$!_Dce}7#$m}B`jCB`)PKEMjpz^txSw&Q>C#5 zu!fG$)&tmL`={G>D{ELQIF`-WN?u9c^&y6wY(>@ig7GLijK&PLyOGUp@pdVr(t&o0 zOhLLkr*tX{o{=QE_;Qz<=L}lyj`SM<#Z)=#o5s<6va#LzlQg%29Gi*am!tw9L^=ky+@Y;2j@AliObdT ztTlPIcU5}MH(oTf>zlO(Z|kieYj}U|*S@?WmG#e>s*QGrGMG*`$J6GHF3$MXV5lbZ z-Uifj>$MM>8>?MO6P`K7$+|Kv^oOdvUmcYVyH|6Yo7~%@ss9mx@|sAyC``0oiHKn$ zzI7g{U(VOFOR&nM>|C7cx)|>tOw^N0kaGlni7k6KF*Dk~8pWiB*Ewud!6mGK)24Fs zDWD}c?D$xvy=vUL~**sCn8Wu(uh!Yg7+ng(PF zgC{idU)%Gf@1AzuujGlTRmLHmOJ}8yC)|wA$WL!HuXP9FNujz9z9;Mz^xTm^BOq%1 z*m;Xnq@W`<;mdpM;#$#$VVEJV^=8P0A@ix7uw)!axP03SDihDYY^5$-y?PuxXVh6% zUg1^xDp9gs`^bzs+sZa=yiF&(S#0HhRGn?x19U=b;mu0Y03QpQc}u7!k*hQ~sf6$D zUc#UtZ&)Cq7?WBDp3Xa!nK8fv$ zq+183rqHC{Hs&bp*F?Lv&3N=qz@+`^+Oyf{s&Q|7@)Vaii$pev^{)L+ks_8fqn*qY z2<2~Dhc5WrCiMnHC%m4bm)12nj~7V}6w>sx)%rTFXy(M(IF2VaGTBEO2<&7UzSc?o z=u+SAQi0x6-z@BLK5rwT>UO6qg#xH`K4PsmNeVRV%7-=seto&c4DLEh<;$)ucZyq9zOhMl876t=YO| zUV7D2wFED;AGmz>)qXFHe`jnT`(V7H`B$|X`|pwO;dpS-yeL0aorsyGqmezGh^3yRk&uyrjiC{pq>;6WqbU*NA7&mN1la#` zy=#V6?38sM5&Ym47v#{TEPs5JS|Nq}l9fE-TYjO(H*o3ThHoVIvCY|jWeLVG1%!_# z@2Qz>#goT;D=T)B&hL??%-+9v|iZVP*Tdh_|AK$LLa?=kPdB3cFAt)kGo* zRn&DJ?7&VAkPEyn@a0Y?VLtT*NzCSqk@5d}YMO~`on@VR_={Yq08MO*POK8OzjNg~cd!OKBa1G`rx0 zoWaC>Wb3e38}Avk2ghQBJ3NNzwCIX!MI&NKoy&#Z0Ai?%aHy+O1T5wmf18Y;{hC3oje0Vng5zCmW=99Q+iYB1m zpf<;xP?h>9ACr_he}I<{UBgi5%-I_1&16UDZ?iai{!863vvUIvPz~CZlpAs<)$ZAI zq`sW%GApuQ)gR3tO<24rrl}lzO|}{JyMZ1Ch>EA^B4h=FAW{-fJ&k3Je!X0<4iqmli{z3rf3 z$|XhKKX|4a^__0!+lN2uRXI=k?J8(1Z=~3xD*xk5X6a@2d;$cn3T`bVQC93(i^aoRm4f?+PRzE;P}=7ZbbD`}C6f)|Y;dl5F9P2aS?Dn9oZ^=Q7PzjDMvU`RY~`J*17sIK){6w5u%h zf^XsG!strjZZubUn$$(|m1ZRRysWV{4T88gs4c7qpH&wThE=E3Seri6CPP8F}+`b^z#l|~pUzyngN3d~j`_NSlb9CMcUizZ(=x4{~KPjX}}uqlyO zeWZSGegoZ--H7ww9Q>$q`uS)Fjk%qsr}9T$QK;scWz%>fs2(nub?6GVw3NLL=63%R7inD;X z&$iZ7keK-zF(K3ahnWTw_siAC%Fu`;r%+}CooN7p(1AO$)$}D8Ek8Tbfp`P|I^WDC~mRCNe+T`T$%yXm)*e9Eu>E7Sj80Y;54C%Tbows z|7)lJWr9zX85|`B){g>kGd{*?gyV0jer%;*qDLwGeZHmLY`pGZ1AGMZ;I`skw~I7S z?=qA4|KqX#_fLE+I`VSd*mR@w=6)}{B2RE7j0U%lOZCqo8R6eBqmRmCGDDHdJ!lBK z%&%+^yq|$0DR8=|?S3 zo*q^gd7;}b(>tr}btXGyo+?+GgH5+ixo=r(Z*ATua^skj4yR%B8+ICoF4XrZd>ksO zVrHvnn$MO_RnE%q9h{mwRiBO(uS)+Ra4bW<*}8(-=JH|v#5HhSeR{5D z=t!*KEQ|i^G*VzSc4L*{ElojAnes9H8o5i=Jg`t+qEY*6y5s+HXMQx9%58BNnRa?I zS0tUvRr+^)FH2Tkp38fEnIS(y1U(v0E;c(kooPLh`#dsN5DIA&hmD9SUuEjv+N@Ly zFViZ_jv?uI__}5~cAKbPeCd1P5hpCw`DO!bHRS|@y`hYnvtqB@%Bb8#_}Kke)zih2(uEceJKza1i&FMvbGgfwwThElW_y0f)UsuX~Y#f-0LK8>l(mKri9=k0h4B zSrTuHcz&#yoBF@b7ag?=e4X==srsS<5H`;!i@jTbeMgT7?VII_iMojmaBIa?ygsN% zg^jcfe<3k}Te|}#3g_W{aIkR`tGK!SUxTpMOL17sK8{TrDc#nYBaxci$PDU_YW9WQ z8dd@l$A8fp9}StpEmV*>yO8{-1Q&;7iAvfu>;2EA*qmniJ&(d__FJ;U4-nV{8|1Q1#`S^OE+25GXq_^o zJ*T_v*>@<)9oQU93{NF|)E%)Z@7qBe(TdiZKtEWltj%v(Z>+|=Dx{u=uo7dk0%*Es z6uA2=%4?MOUNZEaR2BYKF~aMl_9-ZkUB&n9UUXs>OE65DHuN^)V^;bfBtjPlFLS3U z(IvRRA$ouzMrY)f)NDIr3%iLr1g~=U0;%9lZbZcv$dF*&%`e5&3df3CNh>LhC@p=a z0p(pXhl)00<(e35(T`zM4x?3e4d=2h9^O*g<%mt<=8rDZ*%$jC^ZTmLGLaa>x6aXu zO72v~w4OM#ShQ}Ks2AEeHcxEkct(g^f>k#JZdUdcQEwWGrxaAQPgpK%Um!_Sh27WR zv~HA;3uQS6|N0qi)fcQ-MoA-YXLg78%HWh51s}_6YujM1$X+Nk(gTUdThB)S?qwJu zUO5(%3P0AHxGVe+E|4r49?+!ewm+gMaojFcnO0p{pQZ1p9XT-9OwnPmpg$DO&#!5l zkvqjUXW`J#~^tVM&;s>GHpd?Hyf}FvUrGdSf+LX&D^RVH1d71z+|MX z5M*2`Q;Wb*v_-Q0-wf*$dAXGHMjLvDR5zl3h~)nf6TMD5U@uZ3PYz*T@;F0Y+D|S1 z5b)j$4aGBag+(>l!R~$gwWUpy-Gy#ck3&7Wf65Qzs(gd2y@Ms9J;4CajTn9%%x-S~ zcPAR}WGd;?@)26xhd#?Qj>xwDAa6}|r&}Nrdylw$yR~Qx^mv$*1d|0yu7n zI3TboL8W7LIHl;1iY@W=2yD(RlYb0YCs_Hz)>u;peGlL7v%{52vuk&?U2SLC!%IhkzZZ=tXmaQSe1wt z-YHia!mW&ESI0Iry@2Fo{87wC+X<^FPd)<%vx?(7cPRhzbzLW(W@!Pul`RieN z8&kSp{i3JW+V&F+#ZbwKc1KaO>OS8lt!{$zMa?RMXScrP2b2k{V0td*l{4OprxvxO z0`bOb97m;bQ~l*Gq9kc5`q!e?Li+ZG<)Z}mNw6-bY9l&&ZbwP!FI7>}G%lSpGG|9? zoQIpUJwJ!rAW#!WabjFt$=N-MLh!DlnAfACh~vm@y7$mg+&~D$N$P+tSlyRy?5T8a zolYwJ{IrHz>3^zuQTceT0j8xW%g4tlJd}^8jMzc6F|3Pq3Q%Fn{I@Jtzib-dm z>f1L%3gqNcf{7bU_(+^67>djo%%e2sN`*el?s&|M*`p)yOz0j;i#yc4pli3@R7a$?@z4W&%?ea3eA2B1dsVVK<4iTfuLR z$9U4-=YlJuqM}8r8EoKwM&6sHI0waFs~tNkE)LV&!?y-hF|1Yq@^22ItTfk$)KkTk8PLktJfE+YmIHz9vfdB=q@O4 z@OQRnhU@%&vV=#l2WYZyuX0KV>tKIjQp{)>VPHi)`0Z8|K+uX1PTh&V*tIeFmxzXF zhD)^T7-*QR)*@3i~Ai#uRe-LX9X&KuMK6`7l^O5ha6C7$s0rSw9}- z00^Sok=vC7kWuX@>-(b=0znjV75%P&K}rVYFa}_xa-LY291x9)K|Tx_h(-}tEtG}Q z3f!k)(1b}wSpmW;?7{l_g?m7T?6m}H=kCZn^03JF0Y7`I9vuaoj=vg)l3FN91_5^ZO33CCtYJ@d|TouAt zL9SY1OCVRJFcr{pW&arPZ$WB4NL=zj(nyk^h$)qfu{D*9 zK9Mnz9-S%51ZRjj$qatZw1}igj>m$yB-X58iUXF}zqH$g2K?ZyBA?$)vQ?5@lQm!G! z>EwJ=DRr^50q`J0D+Nz7lidk6A#-gdB|bGig@ZvYmE)Bo8q!G@c<+P7bN1Y?q+5JAvucJ?SEK2*)ea}7TqWMIV+^NUW-H?6O zY5Ypg*`@y)e*#xD#lFu(>or>FrTGdyM=t5nYK=5Ebp9TCB3Oi!^1re64N;;rL6>dY zw)xt&ZQHhO+x^4fRreeAeHLrr$Mo9qLR!oPNI_H zUextALGu{8TXXW}a1;lb$I-)l0w@mW#Y+$`X>NzAfAe#HIq=@w;Uv(}#{o7EN6#A2 zoINRuGSecR9__}>0W2-_-Ef>=C>`v>@UV~}oN@x435aq#_l{}zEBHjcCA|f`^;neNFjHu>&IS7L_XEi7 z{fFKO{)+JmaRlFo4{!r~19&4k-Q0!z%e>6`T0N?{k(pr8!sfq{uNURKt_!LQ=vw$~ z@~Rt|W#)zW4tjc$pBt5B8~11=A974AohkQw7vTF&7Z6nnj68lo674aSHupO_hMySSWuLGu-YxfvccRPsLE}2CnT1(M z>k8{!x4muKrXsGgwsHl2Icn;Os8;u}1|4fj>-_4;Q#jkJ-fQf5$X1iv*I~f$#&7}Q zX1^S^A}FifN0fp<;_A zvvNyCV__>r1NGWMLupfqP0m4Xn#xUXUHh7D;=Xl8_;aud()Q&T@_N3?&~5D4@TIk? z{<{7)Zj8z?Zd>~hFNI}Zr`4m#OIS643m;uBn&+ zE%eIf+TKAwi<-NuZ2)zr*-apFjZbN_+EfQ|+f1s3V0$V0$@j0S-6_a=ZJ1$Fe_ z^M@V<3k5U-5DFmd)z?Re2ZaFv1Nh7Dmxn)#ehT~u#^|rhFGqlD55(ht&Cixc9S>Fn zqyqpxizyCx=?^Oomgq0*k9rE>41g5y%nyVBKL~i`uTB6~2QbHvf&eKDu*Z*q02K_- z$B&>7COx2KzX%A60HDz9FNXsN2iybP1K0!f1LOag80-67o(-2vV4+~M5O+`-(jRzW{;-J#r(Rw20ox#76MxS_a#xM3~< zG6Jr8e=AKdt3LHEa0USVEr30rJ$@bND$FMICu}QtD|Rb%D{?DvD{d=nE9xr9CeS9# zCdeklCV)1)Hn=vlHn29VHmEkFHlQ|~HkdUC9VjcnMSzsQ3qP?vgFRv$nmTY*$@v~E z(1V`-H}s@mz#ILaAJF5T-cR)OpZ|9-Y6xH%Ab^Em9~MC!807z}`+vrE($l|&p7aT5 zqyO^)dfeT+iJtxe^k492>EhO}HRWpw_Z6N^v!Q5dSXy(xsazPvn`thAdi~QX?T$Gt zRUX)PdDgQ%FeeY*oKpwD1=psagVgLU2b`U@^ArXeAX` z>>4OzN}-xtEpKt!nD5x4zRBAu>n4*}tw}wW8GJ}FeU4Sp3wIlTXfIXoZd|0z#Gl5> zD!(uWZ3leE`!Imm4WA8cNLeKqJ%))G`(O$fTEw z6W#5Yn{!7#a?J@)N4(NuWqDr2`3^soj<*8K0#_&Sqcebxr&xEVqzREPZSKe&ucoN- z)Uq9IW(d~#z+u}8=I6@Ic}cmkzh*>epYB$UDaUGU3jr|^3Sk6 z;jlhv&26A6&+#4C#i@C9fAfTu)uY}S#ct_#urqzHw^GNX($9Ht>a! z+F)^I^abCs4G)#yp~``SQDs6~ABBWV=Zo^YWjiF(tt!q@nWPIgbEOu{*1F%r+^US| zn`?W}^(sy#L~it;37L9I=~W?TWY??zX3ONY^%La_*93E@*z8oJN}hf;>Jh0+ zP_D3PgPk{QzVeZ?OW-D4ylmx^8V7!%@>q^CGW6i$X8_{_+k#5wYe#!4md!~lvQ&n) zrzvacK8iQdG#ql3-?mR@T{LYM`{UCnp6cuK3LENIme}Xe^I31Z2WRB&$Wj z2AAuktMv?wX6*&Q^6Z%fYK_my8-rywuEXTBM+L^`b~3)}~`SvOAcvIMVdgUaYvKr>^l$&2dOFd4GY!;FP>vWLBLyj5A3mndJ9; zd$sXi+sC(zC-BtgaPqx^+Q-|%L0+@|DRok?@jZD7 zGD_S>PBat}G)zL;PcjUI_tTGke*$+}v}-F_->F#)3YDI|F3`h9EBG52n306c^@x$E z$j;E7J%qA7jg*9dgoBMdM{9S#$8wi#94mWwmvwQA0aIFdq_#ligyT}e{4O8+0e3!s zf7AjIg>hy69MyE3deagj6ikf>ws%gUQXzsz9Agyh;Ov52-oVMD9euVhF2#d^h?lU+ zF@}gni6o7PVh}NlXV1OoK9tWzgo9WwL0WIynAy_&L0ETD(s5 z@8fQ`Jj3DWLw-U&nNu=L-yw7y)ZA12>Frb4Oy5Fh$SBFJHhEG~!>JiZOHFKxa1@47 zqws_f30wps`znLteNSgcyGyiNKJRwO`|IniGiDZD(1MA5JB(~v-x>UY6m#>XH$ukz z`yQ{22+3H=7K>|n1l4fL(37Q`5y#Bs1HZ0f} z2Qs>eW0AA$MD!V7o*vXNr><;{vCiX7F$WHV<8c!gnM{8wOO2(%Uc$$Q+s(Fa(UOkF z5?dcfr`Yk~r>z*A+awkK(MF0EW8T2(BBE22+YjepQYKky7DJ=4dYrqK+QOTT%^7lP z+!>33u&TAX>6o{SarE@1vvH)+XzKtz>@!R=?$|kVo$xo;`FQG$G~%&w72B|8V>2?W z!@Jnr8NSG^FGu9A-5~|%SK+C*_C=TqtAepSRB?o}UWeiWaRIoAqTO62r85y)E*rdf zI1C=$#74&-LmA45DIy~)8_A9)(}eEo79ER{a*fAT%}mU7t#7eX&8ETLD@V!E5f(bC zvZZlElp8&XLo8(cw5qYW#JCztL8h?5wvue6*7G5$k!0Pk^)?Nsap)>S(=8MvJftPJ z%>oDH6*c(Ds&#gY$;AEdxjqAdcULLHOTUC?(p?*6vyMg!Q<9h>%$U@p z1h%o*f9?{7ue)dvcB$)Z2^q-1A_HTHDCQQcz-4sk~NwB*_ zw@M(sk!Zom=AwThkov{BAwLJRpbBJB6-vjY{cv9OX0H(rm^ z4zXt*51UZ;*UXb1-YNw1$4WE&bq1fho4{Gu-LP;<^TR`4=6kA3N{kmoGvy-*Wn7j2 zYtEqvj+)h{=wIgc)0kOPm~g_kC&aJRh}=l|xOK0l7?A~O#LgDiF<0Gauw0a0e{{2h zdUL|Pfqd{7rbP?3!NOpI48lW^ai$g?7Hk)W8B+KM!^m_g2xu%5KPa*TvODno30vyH zR0{>%rB@h|`gIX?7r@|;pZ=H&j|k5R%vpE-y!JjbfQ8*6Uuu0VS75~FWa=Vil z8-#{c39|V|e@4W?j~V?J@l;5LN}3dzDDlKgw)By%oGfjxC(wXbE4IMTh6-bvXhZIR ztEj-q?aSP_9_T%ha0pQc=|La>DD!kwIx;MqlEsmS9_)lOGa$iiQ|3WaJ9cV%`>M>_ z$O&o2nIRWT(Zrd8Fpj9R8-|4inuqyHiD?GN{oQ#|P=-yJYSdbcZKvnAl$1N_y7gE+ zmYSJ{A`1;vEQ0tb<(a8rX1iYJQgGeJ=i_30snz)Myv4e9w5WhB5;rPpNko0^?t0RF z0`Ka{{kpA>f$4AJ@vn`5lQ<`DFvjO3djQ_=wu^RJeiKs}Q@mO*k}RSNFg1OAd5=&w zBu#1WNEykI3X%g=7Y^p2CG1I`gGXH!K6N4J=y7-L5kcrmeL51zPBxfaw->w)=N+eU zU?1fjKsT;%<}zh~f?f4UddBIM8fThzk<+|l#?%2tMJ`A}S#d`0jbWvJ+(7m$cZ8k! zqD=%#Ea!A>UpkFLN7jy%TYb)Jh^#rX8HA*Cr+*q(h!$Kvs~Ln;thXO2i1AEeA|g7Z zb*f%?W7ChirCbkH%LyOBxxTs}rdG&YtDe^XIikv^_^ZPeNeOU7xt9UeUK$kV*Ub^= z)^odiLxy0(<)aA-;{WURI&#Q17mg0o#7^R^<3ONk)q73R^Z6zvsy3$Wjs3&ZK91qq zSm03aw|CEddJp0~YXk#pCAUv0Eu3F*94;F3I7*p9x8g#>Ok-(wyI9{@mT*{CLmB$? z)+U7B2*_cRjz4j;mCY5l6~iLHQ%ER|(xo9*HjG~N^e|2!Lc7i5uJ4n*Ok$t4`)5O* zN}n+^I3wTne&UZ|x>-6_cBD+saMieZ_4)rYdwRGM7FaE_+hDBZ+rx>6LMGUS zTLsEZ8W<+h;Q*~Gv3wyew{bjrMi@WPV8@;zQcDE{j|%gwhd3+HfBnRq0sErR&o>P} zhiw0@EQw*7o1cvp)rcjg|He_0>4SL^?h?|&N@H=ygY_A|eHO)D z!`dcn>kDVi=a)XmKoX*iFNumz)%7E6lI}FDjV?My-kAc|rTVC@GSj9hBU_NTvs0$=dZy_f({wi_Brk))tYQ~JxfL3$+ zuLCkrUnH00ZPGf$X%+d&1Eh0oA-*EXzQ|iRci@<mTG5OJ@Q}4&wNMhx{8u}IUj`b& z?AM}-def&ZzSL=50Zig`QN3%&J!+|-Ix8q{D|Wr)3X+AxIO0M6RKxXcE=ALz%o-JQ zcn==N3w2sFlaa`oOpOWx>Zn%7KRIc13uIq){4@{b{=8*p`-JIndC#16bd(gP`8llA z(A!3eE&J`QZ^7F|Bf|4~?v0krk$*q03gLUZEsU=GyjNTK+Mh(SxMgqpUOl+M)wG>_ zII;#X??;)}bkwsyqiT?bTDX)oaVl$OZ#Qvv{rb%Dqjc^7{O|y$dmO;Ly8%_Jz}OoM z`9MW5GEV01??%gnHPXCPDAX%U%s4#RVqsc)ujQSa2K9k3w5c)58kPdx=^GDvcAa|E zU^u%cRN1Y~j`kxN8-2nt|~W^7lv+V96b4>sG> zXIdwdU8hPBl?v3Eo~sIoTmYS5hOvU`hy7_72&7;}k%SJW&i?dtEc^7zv@1~sQ=;}K zkEi};PzF%f3M}Bxp_T-JJ}#h35XlSxCbB&+@+QX~N{Z2#!OJM;#$mb~Hh51H3SdHo zlY~K&gx^gMCcUupzA;eh$#gS@ONV+v^h9V_&9Q~FX*zy&)z^J@M19wv|GYeh+_jGG ztvrp;T5hw4v^$oskx%(ueWLzZ9!B)=J>PN=Z0h0Ra(dgVxlAOFjMsa17< z{H3n(xuzTJ#xJ`pd<^%2RVRN2#x$YVBW7-Z$PndkQ!rH~&fA9g!$#C8JD5+0JOkxV zYY9m;94M(IP+Xp8NYgt7evO4kYBSfMRYv_ucS*>{Qa3O?8xp|(K!sxFNjIH%5PmXo zmtG*ZPM?#_c=yWR0cF+!<fdH5LEgJ?H-#^KHH?_l zsBw6&iTwWFgT;P7QAo54`Mmr@hpCyLb2+W(aQ_}bCYFBV;Z^-(dd{>1Z%lAGUAMWz z?rgQe-qL-9MGdAj({1w~83X4k(L9R4yr*dHC61M)y2_1ob@zP+wOZ+2CEd@a`}i@u zBr0#_$Rvmnq1{1+^FicsyYoiI7Il~{W;rW|cY$uaaHG~@yAYn7jhM(lWU6wz%ag5q zKH1@sU%;^j0vCZ9m?E5`KA?RrQ2AHA4(}+--u}bFpBZx|6j1I8HMxA{Jsr$#bfizp z-dBn%hKUpaxw*K~;|YKV55+msF~uiz$CLwmh9rL4W+*|tWGe#4XTR?W(o{PW(0>$E zEY8f+zbvu2W8mvQ#QR+2YB?iVOAx$y3J?skCwL4^Z0; z>}>hZuDI!KdlffVH_Dd8L|V=7p+{eAFr>zdKol$@RwGdtI7eXZMrChs zEDom5qj(&}qJB0Si%Cg1i)mv}rW$E{3?5e2(72cw(qW_;?sR0#PR=7Wjup!7XT0pO(O zgF&r_VWKX$x;jy6VRz>U+w9sLI2B7tME^bj?rxhGQmrw+bG;8de;WM(tYV9-;hs1g6260uh6x0}*BY{@N!5AnFCw+? z0Q3^ygmLdK#9b4C0XjPxnDD8Go;bw%WYdA{&vZVMTafWcCT=P7{=ip|;Zj!3z9FE^r8+U3NZklW>OvRlt_mc0*Xwfa`6r$-B zFpqI9E?GK5fNnL-{9<(QDwZghSVmB#fiixk${2cnAmzmzur95T;yPhtFWiK(uit%mwf() zVPj&&SvFT7hUxQfas!Yi`d4+QztWv(POkp^&C%Q(zSUW1=ixl;n~Gbz)s?%~UXND4 z0W{u5lV6b}3|lZ#(BIwM>Lnf8os8+Gk*cYh;ic@E?v^5^tG#eb37OICalK1k~f7)hQq6>M&CDeqwiDNEgtaLqohKgDOp;{9!wt+ zuGEWe+cGMS1~ZQfbE`@IMy&g&9Dk-=G%wzdd7FRVmrg#NT)Qx!ld=I%Azd*W_&6Ky zyTa?(_w#|+$<%ebK!$)-{Y{isheQbhA!z-H3zo%B!;mW+Q_qIyr8F*1qG#IM?D-Da z6ILzs(oceEy3~a9G(86+Zn!?vKxZbuu zw>le{be%eEZ=iLs+PR22dZ;Zmt*U`^lLb#vzK9E)KX8?JH`rtaHwzD)gGuz%R=AAR zL*WlM9v&VCU!4YlqK)x%#78}K6$V8ZJUlOC(E)#$JVOQjm_nvrT}9!on##TqzF~{J z%4CCC3!}sKbW9ru2GM+nZ)FwiZ>9GxU~b~YTRz)6*u&6+4Q`dobL^j-`EN%|I}Mq%B@O)9TL-w|>80})HNIT4BA zNK3;w!;uv6>9VSqZs-wa?pk1_H$W9+GL{R%4cPkpIrMMrUpY$Zfa@6I$OVgpeJL{< z(-eY~Q#)z?3l7#}*-~G%q09!2IWJE6R=y^Lb>Sq&A)&zNi;8vU_4a8Iwec8%vG?HVaQk1vWfH|eoR__upv72?d9nR0C{8Z{`ZY1j< z+uCO*f_k$liSLAz8?BzljT9SwhpSDYh}br1yZf@zo<-Yzz(fmgt53vKq*uaQZ&CD! z8pc0&j7FtJ_|2!nOxrj7mzt(`?*h4I+C+aunuzK&8|?jES4S^(cs|YGHEQyKU4kZj zC_=|Es$%t5^&=8=)z8tef#F}-EONg-%>n+AQ9!Dh}SB3e}%rJWNNj`wT++~;~#n|&Fndw;um!4OJ9dNrMbE1s^> z7BfeJIvEAFaz&Av{3){jMm?FA;hA1<1=oQYH)(>gatO)ZPS*+Bb>#daidu=*ZY+qL z!+yfCV_WM48u$w%*vcXH+2DK!1%EB7)`qfCrfm|w%B`Yfqm|*CYA&J?ol~dJ`iqA- zfk~%j#ax8GmdGCjFAf~*$>dB5rl6Kfz7JaTp*(sOy_1p`K-F?A8*)IgzA+t&AHZ$F zW)AtnHyo5*H-3EEGyb)8`ksO}eL}X@~L{ zasj2eY(87goApX6C_uF?8LZKZK{kiN_KVU&UjgY=4V8e}=E*2+nV|=tO;d*@@Eyy9 z;+1m$ZYPX8ha!d-`L1%;K({pkNizhWBDrsts;54B>Y%c6K~dN@l*MKyaamgG^Thom z)$b!kUSj1ri;#A&*;Nrrwj85$DnU)1oBKpL%#i$jRoxo;_&T`gs<{1>aoROnl3~u- zxxG%oD6}R)4OQ7TrqXZ=|1s?u*kfWsq`aww64&IG-J_gF4Da~I(f`nBIujCv6E-TD z^g-v{@^~F3z?Xb>Y zs;w(4vchcqY=Td(R}c%tUy?eFIqpeQ+2aer&Z4 z7t+9vQ3?wUTGMn(l$}zxvSdxhOMh$b0F5w{BbDwWOtbBR!a~a|jO{N8y~OP(sO^IC zb`vbrW=SIPn>y(A^s`pC-+~Oet?e26?N_F4`gR%@)@g^sR(sbMp&y@Tmjwa8yWL0b zOZDmJqa56o-`h57c9^g8O#8|jVq~P9eDW@YNTqZ9MKJ7 zUX+~x8FImm1+(_hP8ISd>e0|yf#$5Rg8!2Ja%9iZk& zEvx(tp_Z~frgdW#VUsT*P>*0xo8?{_GH6e#6u@eWjnr<)e!PtA6q?nTI9C~{v2zwJ zVdh9nOps9A(VjY`7ZrmZhw8}=Db+}Locx%!anp%;q3$YIHzNk$ANPfbF4B#^)L4O1 zw$^M|&_AjT`;L#E5B&B)OIFnuQ=yl8(=(rsv){QyQoip)K@+y!mOiZ|Tas!BUl(wC zF;(wxD%joezn|tVJ(f#i@hS`A5;7Fs3S#P|=dZ!?@O+cV7O5inW7!DX=C&WD( zHP$r@M&^QXCtP1$#aG_F^V`rp`(?xJH_J=)E4Ys*1n(&Rx2(Ajv@gczD&JQe^hZwY zocKu;{u2(-RJdk5e!+Bo+9TBhX(RpvE%uirAps#lk8KoLU3MCQ`M#S2lSBj_XKd65 z_D#CrN?Z4uPSDRKU`sx0yEHlQtb9=HEW}Tqnv{;&?ql=(u_S<$yO0o~DEnz?OG~o? z9Lp!-`pM|Ri|T^y@}e3?SJf<;Z-sH){lcCzqcX9=bJ_1fjf#(TpO9QS{dn#(oHu&itIQjzVy+N+%a|bqZm5phV*bhW;MsI z{X32Io-_k0(R5(-n4TI4-KJ{8yRRz$TD+7#hKb&a+93^5Ah)LM8249jcxQb~o%97! z(r1y?HmX@vjXM^vf8aqU`gbd+*!|VARd=V$0x?%6f)A+~zp;=`Cgb0jY(UjVDZJd# zU3k5p5L~H+B=}9iODf6mHnA3ISFoV$gNyKNlW{u#cpp?9FNBLUE3kB2g_D^Dfet;h ze+9}Er3)K8Nvtsy;Zqd;g)6 znCH1U+R{&=3lj_*c$1VKv3b<5_Iuu~pCkY_^gu?!MyQ)b0wkzq7n;Kmfhmq^<{-J(F%&Y?MXN`Zv)(5HKBJ`H>& z#Limsr*`A#rwrT5t7DO+8p_g$>T~FfC&+Tl0W@=54wor?3)1}o zm$ZZA@L^!G7_5ZqIbc3kA4P6DXYOHkD5cB}UvwN4zJTtra*_mtjR;2imgwlR=Yn<- z3L@F->GPvfLr!R=<34uY$IdSh$Iea_1Tz3uS-3c?u!!3)z6y3Vo9IUZtCyO1 zBO_hsmOw8RxZj$bG5GugEQz0D%CGT#PbfH=)SHhQe8klbhuW5eBW0C6RpEuSG=685{dgSn z(Y15?TM%y=Jz36t!bo6KG~a32eS3W6(V@Ebc&}5j!8)v=GuK&qOvpT)Oe!@Q@>eiY z<*@ul-cepZIou|Q5$W&I7G3Ggs`rlkjEm5c&5EDX2(Q*wr1VAVkqgd*1?BJ(LqoOi zgxRuix2xNUaM(?Q$`zLuX>6fItXA$-&FS_sZ(U_=L5J@W5_;q^&CMSU?&)~3D$NeR zDM~LF*6ptLLgckmLgT_*D_5fwb!t~_T^s&wWps1-m1C^M=5{btmsc&(E(0(>zLN$r z{T|VNcsC?}ROC9w^xY$D7ziENC=C}_DWGJqF`TJ8@!ZNx{L#E)#Hlr|ApFDK%K9QRX~lAX>pKJW6N|h0E|^xAmE~sf{Ad?uM>G_P3#9t8 znu=M4UA5q_+UT}8m3+OrTOTHzy5cgf`kgNW6e}`6^M>0_h3)%RjkeC^%0d{ zUi{=jZhXt;DIwEC4uHJ{1`!)K%N@;`DtxF->+zpR>ke=p%`3Qx_kht$94$%tLKyXm zrd;bpau|n3XY8K*#7+pOx-XX+u+z~e$>c#&m=e(}$M(8D)AMukleZFmOPc9lwemV& zx4kwzYKR*>7b;nvE3UC`wx$*mF{LP6x3p(&HQ1b};H=S6Dm8c%4dQG(wHOTGS*?~g0mHc!acsHB zB`sJb872f66-vIXPG7XD(Y(&uT1|?njYe*sYBMF;#<8<4 zGGDxn1va&u;qYnu)OvY?Me&rV|(J%6ODj+o5ap;e2d{JX{DFdpYmqf z5Al9pVljI-{hZi?jy+kji%&WTp&6xO36fG#{`sZb(hC+Lf)$fA&8P>_u8B7;f(A!l z<4e!__XGRy8~S-{Og$X|Hbg*vS(ZJ%~zEtT?sF5>uZD66LJy_XKkZi7a+EO2(!szT`; zLPPV34-}m5_jUfGjq@1L3&PJ83NC z^c&F^I+*eWn5$#S0XnXjvwtw-nK2;U zFPucdkm3_+wIp~4BW|L#O0}QlPmJ7P#T2(}|Fbx{;uzisJ}UGvTqEad6h9%@TEH`( zauLH7`P*n_Z+!bqtX0eBQCDh(nNNQWlY1cuWp>f6ePWX&en(R&bQ(+Oj4v3;7qDbx zH+HH@hTQe^;r?_2XzYvQlZPuza@n@qx4FRBCBwHSed|kRy{SWBidi)Gr?1qx_f*hpj>op zhZISWc=MA2i%?XdJLxCLD@O80HJ>edR?TgC7!w@9+}><=v=xbzcq1F=YgDXq9hW6= zdSf1B4IS!Dah}d~eznb#oT;93h`}R+H0s6@eK~#gIovEm+kubWb009ZBp2`X(qwB6 zjRb-S`?$ldXyl@aSvX*gAd|IC7-7RUwdwq|@Xysn_odin-zLloj0~F)OI7>RpFOz1 z0QwoT4axXGK!3b{zWCf-O`)elIp9Vy;vx|Lyr#M;z3o+U8ph2><?jUH1oI zhZ?*WH6(IgH{^ z6{C{vx}xj_Tkp3)9kcd`gfdKml5VSjG_NJtrS=lS*)NatC&IN*v~orAEWe~W@}%{4 z#L%CxIfF_KbHKbd=vugm)Q*=GeyZm zoU6w6f*c!YZ_d-)S+z4?67H@CQnq+MNDYyEpvlt6s~k{~+ZI1j_-o6pD`fvb*Df*Ckf$1VLZ!%Vlfdj^}* zos8xQ?IPazxZTT+3&<{gu~}bqTVro>jpS;CakxcQ_m16Zgc+ z=%w~jbLZ0-=E=uUXSCa}T@z5Ho0ex8qhC@Y0PD%25T$s%)EJBR7owt64I@EOHFcrF zFd0-()K`^tzHCG8Gw$h|+)57!SInbWOG@qLL$T|o!&B+sa;mY*L}Eu}ajLNdYieJj zjdhSwnhoSeynYXfH6UXWLJ;g;$fWf2nAH9B1k5T!3-DNGrxT8DdNc=#koYL8y{WEX z|5?)Q@f1JW;73}8Zc!>a#)M*}mWrRs)>Y3-&x`Mm9E!*Ff)vOk&(G;mV( zt)mDkJiU2t<=t+gxMc7fV3d?>*&p>G^#(eC%6{Vs*okC6ox6U}cbAubqK}->_fd+- zQe40Dkb-@Xf__U_cgx=lSaFZc%6u#TEfJVLI zhaL0x>r+P@w(Y7Epli~!Geea%6?jkxjORiQYdf#%_qgKj^aP)w==J69cVKHSoG`pH(VG5p5=@@CQgD#3 z0gyc%cYk|<%TXo)!oo`t;M?2Z^$|z$r*|&zq6LZVujMP?JGIkk;;xzcid8J;8vMM? zC^a*wUt*TY+F+MZ%+L)7LZ`wE_EPAwph9T+woXs_5I*KG;PBuW(W~$DF%TqgWq{Y9 zrcAp@pRJI;{PdmZX=ucY)1gfI@%jlHJIVy?uVAZ$)t>`$1VT&DO5M$V+fve2gSFmhnEUuG&^ThtPBhAwL;uemb zw{H0sO1f>B$;FCRyJ$?cmUU`H7wlcGan9GWI7WhwkFopfFj&ua5W1+vx!CD-5CZ{X zwKGaPPqCY17!I>{4pk(?yydy?j04zQ<8S5831}W|Mx~_b4VUPq>b0bJjQpeB_19 zs z(~+te397Dt7ovj|R-Q3T=RU3m0*7$lG2{v=gy*yLa_Ka}Os-9^e0~FlH%*!j_<7A@ z0L5fm6op43OUSd`!|A**KK3LDS^}?CMU0LjX_xvYq~!T97A50FlyKHnw~&LqI0n+KuQM-J|Ob=U^y zn%U17-k9lMBdf|Z> zMgVrrojnW}&LIxGXx}mmc>_vxBd=P>IOZj`pLum7{7o+Fh!7pBkP=e4Vny9Z*LAYi zi$T+EoZq&CH65(1V%ei-*IaoTWpasfu^{|bylf3O+36wipj(3Mw%5(RnVaAj7Y0Bl zCy+Y}@b=++W4uDQ7~1dQI1-1mx#@D=xLImNF3DvEzZ`p!ez4hn?(G6J`)>H-oN-}3 z$Q4K%3`JFU9eMg!@S3Hq)O->-_xExa55NWjRZVsl_?H?4VkY&$y@e~lMFL_B6!J{?QJhtK2D0x@ZTUMwAb+<~ zc5dcO_|-TTT?|KVW!Tr#j#O@X24Mb#`j*Bq~ZTpe<{#7_rFM}W? z73|#@$C(_ZQ}_fG0`s)JHSh+pjHAh0`_3``gO&z3S^tj@y84_F97d4GzAF!XwQa|OpnRtNLKpwDK{I}5b z4RjAO4-+*h+43(VWZUokN2Mj{lw6*j8&;n4yb1!rr)UP`em)0S-(W;G2FGGL<^x|Y zfmnAP_F1$o?g57QK1QdrgTTY`;HOBDtTCzsVef1NWynzXLd{HtGAf^`b@R9DWefd(aHwsfEKU#j+HwxtP& z6583T{w*qR8&=#nzNwXWwCKHiiayVD+N3(q0Xm1zN*+mG-IA+^bZwNo=Hb8@QvvK{ zOzJo_8)CX9fOUG?$R;jLr)f!A`jN2V1tR53KnmOis?BosK6xrpXj%Xu7R`R!s*3f& zb9!K`MojhiYcREJ>Wcjwrj0xhrKnEb)JzJDOHfAyy0djo(Wk3RKBsEuXuXUIROy!7 z?3&uW{39FZ0gh79p8`UL?-TffP70S+KM0-?89;T-UuJokZU&3#1%|dYOb1UD2wOQ@ zDPu6Wf)_|`SC_%T&S{ZCZUBPYs@r2*ij}FHK|;+)D_S-M=0b1h#zRp^{;gnrQqNlo z?XUWdR>p^t$I2>bPNJUG2BiAL zzKR*=hKAuP{dsw^b{>I)tqnn}Fj~K>|HzAUF^99j*`q5{S;Grai2hf7G1V%#47Ag6Q5m()^^Affy7Q^!w}f5;BjaN6U{~L{ zd#?piG$brh?>NT9t6W+pZ|VoH$BwWKd!E&zdeZBwTG-7wvE-s1pBEV6&EtHory<7@xN%vIeaiDqYtPopcpi zFcjeJ`#b1sX`nUMr{>~mw6V)N<($jB4kPBI*@XhyGnWYLE9nxc6Q_v|CFJBLT8F{8 zE1s_5b}4x2lv{8V)h*Vt%lPT{U2et6c9Xj6(?SpCKBqUf-UWWo5z9`~?7DU@KzZ#x zvvI)%l=PZr0E*x&0`RN6A1UBB`5^x2li)$ch~Ic6ZE72v2m33+7d9T;PR|`nx8J7Q zgjJ`VrgbpguAtf%^ivTD0>a!utZ*>_l2J8~ViHE#Q(s2PHgCOSNOK@W%w_Cilpf)w zAPLsed_UK_LDF?j9`(Q5vGQ&n*+Q41m+HCVHRqSUS{i)M`{knT+$R&JwmP=Ou7e!Q z&P&oKuz~UM?vUyK0YpH%zjDU9@0Ki=vCi$B`He4bQ`~>&#J*uCeJtn=ZOcr3bS~*; zjcHFXO(`k=hV6sHOIx$y;`VO;aDSKGUQKS;6$vkFoV~)ZICZ>PmooJVpNlu>p8K52EaQ&a)X>6GE#cO0Y^n6LS-@{gvGC zW{)H|0(Tcpje}U1el}ROy~SX*;fx`Qo!_{OYy_$v(lni!f~TJ6Q{zE+a#6tttBma} zo)MUW^M5!#4!4VYds-UODQjKD*Vc>l@n7Pzd**u0a=4ZO7kVon87uDI=!=h+%Tcj} zBwpTBj*A)MHAN)hLr?BXs*DzmPH$GjJf6vJa%|XF-51Y?bd&czee2QZ?kgL@gYiQO z+_0lqc?Xr16gJ(vcgPqY&BD3#N#Irf4YN$-Fvj5dLO0pbfL0bt8k#Vd1q@~|-&#Ew z-WCDfB8*#9Y@&jHQ(|OWPX|ixTuhCJO7(;e(4ac@DK1RTW7t}I3gvMWeEo3jj2f?z zu35dHPybEJiu%bhJSUoDVkL6SeF=ENifsS|{ zd}Fx*0ou2kumQ+-&ntx+6tMw>8)=UXu;$WDR|LXJ(?D$D{O}Oa|0e*|E5ue}HIWDQVqN(HRJo?%aLtg>?sQf>QWx{_aQ4PMQGpa^aA&@c$E zyrIX?C#w1YlW>F!JRBmJZ)+4@$R)&qu4Nb2w5|0uh|$Cors;S3FS_^t)&A__bk-`P zC<&vKD&m{=jum!n@B}uM#v|MBDtp8k-JIB%appISMMW3zz3%}&b|eM|JE$@E6TZVn z!(cY(v}Ucr358qaj1Q&`BK=KdVcC_)Spn|7hhg zaxdUdi0CAqMzzAzIk=MtDyIjp{};-3SS%}pTYNwv4@1@^)P!(c6M!<7{W_8|WU10_ zCvs`12Wh~MZ^XP6Gjt0vGl4c>E~45gtPxNj&VzGeX`~Y11jdZ~Jb`a7Brw027I-|i zwU@9(QD!{Fy=7m@rjU@dOd*p7t^SNhEBa$5kImK1z|FKqq z2olk4253+ScSfF7!$-XUo;1?(c!GpN+#&R*)(5$c$GFC=U;U1S8R8_EA-aigAm2<~ z=q2M>GFvo~li5q}zl_+G70L;-ke_PwipmMI&r?w%K&S<-u8*G%CJhIhN)eFBc=BKa zDkfHOmFi1gXqoFWm@YDUR412vU9^1*Kxdnewh4z{H)sZ4VxmtrmYD8eZJo$1a-r^8 zp@DVRhc)dNg&I>w`-L3Qb^?l8h?zR}`4NPD-X2W6p%ci)mP8xdz&bB|Fhkq8SA18@ z%fQdtOnMNXokqCj0NR%$f-vQx2vBZmmMpE^g8}1iHtp5$+q$3N@vsnk*ptIZ+6Yq{ z!D%CR0j^+O+zinSHxIAf3Ih+jX3LD{$#;Rnq9>*2CdNak%6nvdxRmOzq^kBNX=#Qs z&I?GXATku74WZY8_H}lioq&$Vd46Ji1dXQUTZa?HNkD_Rnk%xNZ(+He-o7Oxky?X=;$iM$(WIh8z3TOL`_EIWJE@GATYx=_}s6z#pi~> zjVGfdl}*6kzE^Ij1DVb~M$ zJg*yvuqRZ#D)xF{Pv~C`9R1rnj{LX7z4@bGKMG#^{?0jkU>bxnerMspG}%oe9n7Ozguq+mVo-&wj)lVmA6cK7J;_C5#sn>)XCUs)gM4z3J~R$KBLF|_+w z$8g+Iz5nUso9@{=WCF3>%72}kAKH5o#m}cv{0ymk5nFaZUau+v)ij}0iw&m894BB; zMhDS_1+i&ou-^(hgNZwai93TSi%LtlqS}z~O7v++AIEqZ_DgA(va-A&b4U zXCS!R8iq#(@@|bk^Ej63KP>4pO|=hf@uQm{tp4D8wuJ7Y~MAl0`f)x-sI1VUX0p}@VcSc3ZUJv z#R6JJ=X2zLpzS%7(1lbNky{DgM!6JJq>uQzxsGK-BF-;imqMv79qn+`M(B7>&=Km<@3Qaf^(mQ9VSUx02~7 z#x1ep?AEzrl+XVvZ1x690x#wff1Q6Z?oAfaW8_68ZZ1P~3Fl0T`?e=f zBl|{7I-O}`pJehLV)6v%tsO7*?>JDJcx!g^?5^3RSvozNp54~wUn#U;_pV9ttiv_xkX-MJ zr>&*EMfb-s&;E-Ta{U@+yaBcQ%GQv}EN2uBiPOkOnjIO* z?d^gOZ=Z5SazriwzBKbwd%49$X;);bHqZ6J?4^>MOjcbSTa%Urrw8WIV0sHs=CY7I zyCQTrptPAz7-D{^#?Nzj6b+Auk+L;jS{6MZm@b1OL7Uc^=n|&mb#m8NZ=)57Ob-OP zLzTcN>_tOq&O|55V!eoSoBtqAS1XAovaZhq`eLJ14W+G&s&yLMnvl^rbkItw>7K+0r%%)ddpNkF z^NsMN0ynW<0k)6Kpwe5oDv3Cv0f+V^YW!fahYOpLlZ1{(P7G<;#H%>1>ihm&a379&*BG5$(=yEPGHIM3)H5)Z9@I~|_wh&X?2sb$B z<2pHto7X04J)99LYIt%-lY&E8zPy0b!Ixp`zTP7)kj{>WAvirpsc5ceUJD5c$Y>PE zDBIDZ{!pa#unV7#$mPI1>MrJnlL|8V`vvGzY_Aj0|eB$taW$mHzJ=`&FH0oHa?T1Jp zl5_Kuu#EWGp~-sRE9PGgXN3Ugphd#-ds9kZHGjkzC_6l*I7j>JNQkAHRu~$NuNTvbyfB}nzknEMFMO|l6A)KHMp@oG8#KS5i z?K;B4s?=O>BQr-oB{RA$R<}Vq`8Zk{6w__i4A(h)Vw245g~>KWeZ}yuty4p{KYS|{ zsOQoC-}KJW@cdTlcq6Nc_OhW*04gR)jer{d{<}phTwwR2om|3R(u05TkgPxri|}f0 zEW4vuV}19q_kUmPgIkXo$cTZA>d6>G24diY4FNI~ApP*yf}afeNgw*uM}~Z4R7c+F zC;bqfKw+?OejkXJ{cwjI1#q(c5LW`egZ%Qt@zvl&f2`tHIV!5E2=|9*Pb3!5%3=cl zhx_I%VD7-ZWD?G22|uZ4(1ldsLiI5)E?@$vn_!$}^^Go@R(eDPL=Q!uASs$!xq`4F zJu$mhvhtFIfjcf*-9eK=vLd1XL@7;vr`2PS(SI#bDAlq*|96NDDUoZH^ft9oK?Av` zzz@ZH4z-&46~sHFLOzjGES2NgXG`X~-pmO9Lo6w}M=CnKR}U{GQRC|r(SEHFo%0&y)Q>6hRZ3yqo#Fw!IWC0Zn5P=3- zwluV9gK5$Rbm0O-SG18l&zd4r=@VGSe03~vIyNx==dZ0~ml5r<#L+L~t8PWD?si=Z< zz-77t2WjtzWVMstid|`{C?_P-#DtK>gS%2CusMNd1w!jJA0#B_ASwJbrbM490`cj) z>+rfJ+=gbBQC%OEW^)dofs&q7=;@Wem+M0wPr$5TNRob6Y6$pTA%k?~S-pW#o3&(K zVpP&QEH*7e%XOOf($uRa6$9r)XeqW61odP3B9Xw@=HDdr;7M7yt5^i>1epQHbSuUb zlp<`nL{i_dbGj%>%S_ZkMi;ICiwT%}E^LQ~Jj;V)m?%!eOxz&CsE49{ER}2J@4aeq zLWw6ov+@bO3GSv$NmK^441Tt9ocxL$!~-P=D(dnFv@C1aQwRKE2-qyuvId{lW_9S_ zdt9b>5)?5;eU~aSP9jAN5Pyk$HkoDM5_!{k0#;gIBDWXay6_pF&uKa3OOu`HVwy^) zmCiG<<-W(1$LOQN`ajyT+<>s^n@jJ)zVI0yoSe3t;)%3A{rfbnros6!=NUe>tn7P? zM`H+i00I58vG+}2UF6Tk;#xECCslNM{EqO#Kysqj8=K%qw`jawk?>H;Bi9(UgL^lO zE#w`anvD%cjJag85TgD?tyXC|!*MoQ=tyr$u|a2tOJlScf-aNUV{`XTW6FbG(6d@bs&|;p z4n33G-c`K0-|<?vM$tYrf&w3k?&qfHCrQp{HL#FUMa$O1PHe(KYS ziT%UoL@Z)eNoi=FWXf2P-=}@mD#A2 z_^rEk@4CeuR2X1SCkLMDZm9v-u^x;ad`@BSd7hjHp-hT=xTrUH57`xT>=|}B_XTy6 ztmiR`xyVg;5F2}jXP4EvFYxMSfff>5(iTVbs!(G51w}#49>_Ua$C_#I8CRuT1EPl4+44F;*a{4 z6=!V6#9g_Tg)J$>wJS&iuhw*$2j@uAK+PqjlLqJc;*avaWt-v*Z#!1srVHwK^WIi< z)GyZ3A_NW0FY2(vv~tQA-uzeZDhiqT>h0N+1w=P_k((*c~!a62f7R zZs=fDIndx@Mc z8ag5Y`Qx3315dC|bR2OV)gD3X7d6~C@^(Mv!+M@gi#G+~9&-f^{7qq<9~b($@J*;6WDpQfzY@tielQ7{zqPg zM#KmsISZz=P2}9gYyy0Rof|HZbLV8lBs6ADVhedwF&37b&L4@*oGw~6TPO>6N|%<= zy}+$et9`{&z01CB`8h=P#897a zAjI%uHZM8Ng9}(@PVb| zVIh&PWAR+QhQ4=A2|f6htu3LKN`-gSK8<3+sPzxu)Z^_pDm4+`W9e$QD_A))!QD3E zNk(11po6sshPQROGL{P})wc)woN=dSpxc#jYSKNK2ZOeWvBW@7C;6G3H6?6mF6Ypw zl~%pcMoB44q(2ZF?RK+~9$#$4qscgegH|@5$Z%awDP#NV&OU=XYVPdTyF)7nJRVBo zjIu$W&gMh=w^84xJ`QFi!7iVT86gK zF7Wmu`V)8@$4gKyOl$7Fuc`9$-x;05BHppZp<^_G&a^*}?({eAnX*acQi=jUE_Qf4 z@eZ$tpx*xp`3|WD_ZR@ybV7*n$)BE!s|h%bSV83PxDatKnV)3tg!U8N=Vg4Ok>n-l z8S{~%dxT^qWMIQ2DZHDxB2rFP4VE|zr}6wD7- z%hY*^(&#|tGz#sPzps)v30G?6pV|mk;5DgIO2a3Xn~l$`yk*qkCno+4JaJQn`?m8+ z{UP*JVukx5`~+>K=-*b~SK~e!*A_ID{wF}7bNinpjJ}YBP(QR*flNT(moSi4q1ImB7tyaA+&0UG}@^48NTU= zraHo_P^0nm(4w7y3^jpA?d=biVt7~vJ+hWZs@R3fhc_j2Oz>;kOKG&DP z)0lwQCG5>=CLf0Tl>skPp|n4)TMm96M+;4Rv+C@R1E-k@eqIa@u3fIzOMNLmeFrza zR0%|@$EU^*SHhpsg*T)-HpI;Eb!sd9&gjx?Dq1~M9z8OXj87b@#L7J$S9dwtQR;Rt z!n@_kcd0LeyM*~B%8|xGnH#m|VF{>zmjF#0099_GI8nF(Sij`uaE*R^Sm!}g7liR%V z9s_r=k^(=f2GtX=DBGEtK8RwSMEAE-X&CE(r~o|w-NN&usQ;W>)rX^k4Ea#eWK#7- z-4bnZspA>ja`zXs$C%?dFesox!+K!AipSYHp5blF+U_s#=vW*Z6j}=l#b)qTMQ^M{ zwmLY#h6bqMd{1$Ezr#PWYuKJjrd?7UYw+7~c;MHnK->$%1D*KHq#OixLWF=KKTLwJ zx5fuZ^T;2d^?(xU^9VQy9SjT94&~+OazJPCELn~=>eb%9j1k*4QRQX6@%QVfQp4(o zx5g%a0D?P4t^(7S4tP^+vt_Y$M1m%*pOt}Vnvfl`ZC8`C6;KmAx^mTq2ih z;a)!*sqd0C@Dv4%<@-${Pi}eEm-Hpo_Dkf}qMJ~6oO(U``z)31ebk<3!pq82-!Z&w zpbYGz%(3Pj#}-<*94m&i9jAD_d33n%9V!O=g^{wMj5h~D zkMTkWeOF>*%BIrk)K-he^#4iw68N@?bKf&(lXQ-xbB>PoeX+Gzmb}WlELq;-EwSS` ziMQC!;>1CToj4&ZC4`btwia%n(9(qhZCYq)NMaIQ@@VP1v~VA!@3lXmz5PLZX`#J8 z+H`xp@K`LrnR6t0A?c;>MX}DJnK^T2zWKiY_suuos3~5@c>{Kj$rMb+0!?9~O08kw zFM^uV@S2d-=`$OPrJb{Vu)R)Gk6@LuPIM0QxdN!yd68pXvw;UK?1SQL_=^geM_rOa zB0T?>AHFK-_y*Uk7vXAb8(==mr zdbr6=>sgjn_t81s_bzzF-sLW_`e~A;aDq3QA?P8;wgErVer}?a6iLFbn12D5urKLocpPQieK2CK;Hf;mq6`)YK3xNfHSLg z`xTQ+##M7n_)&{bXVOsRyQ&%oDvUJ5%X$k>*VcR1h4P}Npv+fFq675O^5CVjFLrj7 zML;@#4d2OyzYMiIJr>CATc-0s;CuUm=uvtgvKUFS3b)6jvCe7cu{l{p0!x|IjI{Q` zzGa2qWdWHB0R+KU-l|Xd{i}`oxJz&2lmz(t5u@t1xi*4{;C$#N(z_WW`NLSHji7S|KD;GAtf|7P;Gu7xekq;1Rcdzhj9%%nbC zn{Va*u4`X9aP{|JaY`J8B&i_CuMjata_7OP~g|& z__aX6TMPz^mL`tj_(7l)I}Gh!N``eOa@$lx}n?0hUR=M<(Lx+AWCY+ zAFd#hg3c}>`jzQdOiln}PJjm#^4vsqtr+m#+!5Ov4nE@1ZnI1KIF%@PA$c6=oPjUtxv; z%3(zR66^gN+9f-CCkIKBQC$8FtCbM$1TQe$JMT-|fGRn?MqxCV8II#w&}+46;kC|W z5$*cy0;SO-42cmJq*c@`rpAK#eZ3DX&?*AOU;gDKS+!bF5lJbYvh|mjmM#9WVhaZH ziY|;&Ss1vKpJ|dO9b4}ShuTpkTX?r80+HuAoH)4(C@T5YMP}YKn&J^22&h*v7K9u*Vaz zu&gD5TK4kNWpJ9DkZM>bk9H%z6LVt*y#74Pm%#eYV6d|M^Lb6;Ji3{*p*=RO=>94t zy!vXD{=5Q?M5B$Tfk2BzcAHmJs8q%xhuv>dsZ4&mqsXWNF|->A0l)AiHP28K%c(zd zI|62wH3uAypoL*9K}gdj*IQSX(yy%BVbECxOD4cMJlwb~kiE%P9(<$l;QEEbaMNN1LwJFf*W8`? za=<^>+LYf_RTeS-SfQ5Y)Kx&d+3MEOw9bt@!b*sr!e_pU)5It?4(kB2zatpxdyq~| z(wp%VCOICX6SL%^XUaV~w?J3%EmEU*7%UzgN#o^x3w|JUsBYd_15yrdRb3PK-h`X@hs8L~=3YqCg?dOa1q%L+%bt$XlJfJc&ZJ(_a%daq z#~zc9OKOvj{XlQy6*iAx;%Mr5yIU~vv_i0ns1C=MIPfEK0a91FY_aFT*DxBh!f%KP zaN;Z^E)2N<|K+terJc^tz-xK9=4nfRMcVp7Z+CAOsd1Wh7Lg@NQxT&%nkvB|z&1e4 zMUtO;Ec>m~XR_b^1|u*O!iSr_bNbxa_`*})-UPL0f?`ngvKwISo{?%-DN~jU+;rB! zVHa^7ro$+B!C4upda!c5Tt9fjT=riOl9K?>m`YQnIIDxC@6-UYyK7x9NpKDm)De_; zNh)dvZ;qWi{T)~tNJ_AdUk97doCaGTQ)wBBAYpaC{nP@i;ZsX)@HTl0a_*SS3j~N@ zUp}uiUSuuL$}|PrPFA}a)_5_^l9p#z5EhCnBB|L(#jeaPT0(SX1aE8Ew~St|@hyn& zApcg;$B(nxQ%<|XO{u*$T5IGvla~H)r?kJrZdi|pqWe0}P?6{5 zsXWG{zL++V?3F5l>LDyUa6qUD9`fqnW;Gi2)?3kfBocnTt@l3WcG#UB8~J4@eK_8z zrEOj$RZc9CkX!u+?Ly#AeZeY14^{Urw+d#}LjW774=k^_zbbbX5?h3+Dz zK|OUCN(osKQhZ(#Qb~T5B$P2+-l__`3eK$4S+oihWAK>FP`d-fZH0xS|5FMIp+b=g zar6xQA?Xq6Z}|N#KIAC8cYG881~{`FX4F7!CNXaCGZ*W{x&7dQ<_u3S>XC z>R!2jZH>@bwqo@?9`aNY!-$0> zt{!dM*w(l64_rrn#$Q_AD`|j*^y^WE3u!4wTfH37sjZLO`G+*uQJCz{FP;|BIF^TV zo+cGe!qiN}mW@F2y{gI@Y3^3mzNG^R^ioKLyz38bKCUx=o4@soSD z-ZB*SZ@P8d+rMSBRPQ#c6}-!0bm>*1r@|R&Epsu9j)mh-?Y8R6lSB0-<1_QEi35|p zF$V~6u86B^N0ZGE?Wl-#l^GBD+NN7W>pPRS>fMvW#g(lgUG_~dRJ&t*OKHv4-gaN} zfh|@3jvevF?PH^rq2ZA&L0fxYf2fGjsGw@cX)JZ=-P?jiapVD zcxq47zRyApC_6{rwTwoih~~5IYl}kyqQig8CcwADXL_oa}jN`K4g0uM%>U zU|dxaH~MZGmeq+0TS#}##8Gi))A5;85xdW6mN9>fa`pURe_voD!iJrvXl?i%eoD zKHTP?*n6;U>b~ujz=w3Rwk;Jp&Rbs+YA!JxbTtk)_8zaQo*3Ft9BFiGvaf=0WaEZj zzc&@HwWQL0B}FY;W8n?GeW9)s9f6rz9|VvTuGFfXgLfz56+mS+ixys`VpY!CGIw>c zzGCPQev8Hzsq=d3$|FW=nOAQN)+}BqU036*371q+K24F^RH?6n2{qHpgNZx z+0>E&>NDw0vdBO*vivM7Uzi2Yaf)hOng=0sD{&E35c1yNq*0fnqSfoRkn>00-d?-x zoeWkHO=0#7O?U!I-!ye&qI7!paAWOgqGMKjoxh5)2!(8SSlv( z{qKMMr|*OER}KWbr{i0{mb3+Iq7uzS0FqQW!ZF9dPY&(<{gKuiXErr?N;TB_``=tPv`8`ZeQij zsbPQY_HX>l>^A^7JzBN(+RuEhy=S_`r&0(yL95i)4wueEEPF%;jhy{=_uOP6ctt1ZokN9!VeI~TQx*@R0&<2(1n z%$1F8_^s~x6055!>8`x7!%^S74g`^WD_Z)1_!*_ghM`2gUZO!q|HjSnu3wGB=#gs0 z=&zh5f^!6Zidr`o4Vi{=wnVfxrwVOnkUJzKM=_Lhr0Z8{DXVkzSLro*%_}m8rMxKe zZeC{k2-#B%o#ho*PvNWpU&TD~5c9Lxp4=HSav3Rj3ExPiQQR6l#EG<0qc~9lXbAgF zPM!uO*>_5CinE){=qeUC)UkcY6pZ1gFVt=g1OZ3;-qXG@Be+3Iyjlaq&zL z7)pq11v96}E;zl?@d@NoExA{3UU}|7zbZkm)9rJD7KOY=8lC;#%5&Fbb^-LFK5)Q3 z_#5fRHeiF;&rlznw+{5TkV#txymX=1A1$jTi|Zg#ypYsTWG~sHMfc5?je{xD+zQW= zk)EpJq`60>>MnWTSdNqGiD|^Q%6rS4LRIwu8 zy?@hj_j^yE9(%wAXaRMtjt0)`@8A{hkgr^~aXKCMp!ZIc>>;P83@WMcF0eRoSDDGd zlfZtQ(=9&a1y&FKORqiqXS>Ji`5@niA1t4FJYGBC(Xa1lwWNYkyz`ERla8K=?4JP- zdvIduG8rSQBzx34SvS(-9mod&elwT_7|(KJz6>y)p(a*20!YDoj0M+W;2Z3yDxz8AhQvhltb4yV!a zhQd zeb1#+SX7eDiDii4`&py~3Kl5FzmHQmK-Nd3v22I>0&Q}$~4_2SS zk1w!pcRd^}^+{EoU?k}17!PG+!x?&9a@0ruih_iAmM!TEFh^9%!lv_iXq4$}8s?H5 z_mS_RAmN&v_kJ!&Z}Bon$#oB1KsySg?h|)V45wPG*BG@*QpIV2p>w24G*|RgHSg>$ zXOXT?P)eb3%fZyp@zFAC+x*tw;}uGdq1NeaI>`T>W`o-zs@`du>~Hr3l2NNW;D#K` z2-P?ZU*t6hdS^STx6fSL@qLxNW8XkFOI#1zX9T-PZl5Z=E2-%p3ihXh{r$ldp><^N znG2X!jEiwI?l3&QMcw_>{8C^}HOkLz)_wmtpGar8YY+4Md&XwBJ;3SCa8xKA)+djXgPOX62 zG!(>a^~Txu)Q&c9>EQLfH>f!kNhvvYE;Z8?u*UjhjoZ5`8K?)7IH@!=4(&^f+&&U< zH;*xlhNg3H3p|qv~aY?@AfutuBzFZ@G5w#0YW<3ml&=Ib=A0hAs@xt zjGR%>>Wh7*=%&LR@tKWvYMiRvxF3#s?-D~+LcU#sMRUF+Q4gQN>4gAEVv!6^T`-1u z&uG*Z$NG_%(%E zNl{9*;x!~9^)9=eCt6g4BJ@p->=|;?%D0#5Eft-k$3=yUGGZqTTFr;7*m6 zrSxL9$+NC#_R*QDr>}WzwvIEH^#KnLalP4Ya<|QNCB~bb1Vvhlao+7#igvLmnEgCS z)J)xuB=^y!%fv4zH&%&tV7n#m8H#7{wgt7-s;W%)2~zliPs=7Ei$8c+BI zv24Tjr}hVWTgn6_pw%kIU%$R;+ijb}gf+E(Yx&-jqk*T)bt9?b-p+*8lblGlOg1~g z*M`1wU01L>edpJ=4LreG;{U4?wX8Gq9zPFg@E?nxM|N|)4*!$Zz|zEPI)~M& z{qPcE8w#`tuA33dzPYveh&C~m>@SP-`)87JqH=4+Xn zi+CFp@JYNasb;Dgsqms@xOK76xw4Cito78_=V#eDn9velOk0Mj*2NVaTfau!f83x# zNFI$y^h0%8td_K9c>xXa)A+$tdv`p1ARg-5*U>bd^i=Hn;`H`=#-pB=ZH=9CJ;7hh z@4aT;R=*{_Yk%0+zPmj!nQ(pP#Lc&X-l1DZBB6onHpHhl_jq0H{bMz)M@Fj32KF`A zOl(@`@^ueQ;8UBXwhskbr_cRbZqI<~F4W&vAu=SbR;q%{gR!Z*M*pPQDFY-M{PmOU7qo!f{?t#r; zKGHGr=x+`kJDn~o->|E?ajLagn#)}WDd~l^n6O>KgzXY0G|8CIbVW>P%40&)KZFU1 z7b?U^UiI5PElg!Kl5hxtH6=oK!Ax+F|O zqcqEa%KFBowP32sa|O64&6E%i(~L&3IHpjuGz|q3(5}LcIVCm=bAo8P6sRS$HMF-)VeHr#jgY5l^kmNPz%Y5*I#WMKy3#_q> zhRt7pu4Iglt-z4EA-w`X!h|I#<IjVyeHRsBgp_^e6?p zX!RJ2e3oEOI#s)CZw@X3Y&S%UCt(|G&7;K%ygR8`Kj>ed^sisBG zs2;cm@d7uLfWOWm#cj(-(Ue1q;6EW!WIUq*TNI6a65&563#+aO6s2@1Qm5(Zw44$O z#;Si9ClbHb>rN^~4+>7AT6`H9zA9*ZpySw?bW?g$j7QF=N>;&?bk3}6 zog0V-20pbpzSS>*#yHZ_^t!CW*Hu1yYPSCDo=0XIbQZHlBUlBUO;B2#7FTLdck@KT zrCtphDLgiHC!~dIpj>zsV#aE09rmN#UI^!lKeyC0eo%MRIw=QW|C#{dq{*_6Z;<{YfC{vryn3 z82Hp!yqMQTHyk;&zqmJ9s)b4$P_Qc2U(;8Gc7h|UEq$9SX6_yKf7et4L1}k81f_{> ziR89s2Y77g;p*%I zQXQ*NsR5$W-RCd^r9-IHd_`1h)=8+;{C81l!G(i3ErHS~#hHq%UcZ3T;GM<$L=i&L zzg`8B28;X>ASJMkkXoLR@Mta833Bl)YEY0J0rb|OXgkPRB9_VBiRJH9=kHYK??|D5 zh2@aIUC6%z5nUH3&!;KRhwoW=N7@FUJOk0MMIV&BNP<#upxn6|{u<2S=A;#2W7ym% zur>NZD7j8bQ->nP7#yIlPRf|Oi89tbgSTSX(ktf>*)EgubLaHABcD6UQT3l&Kn@`( z6q|!VWu(esp{abOQ~64#a+OY@!6PszLIo3xQxOQ!+gCyKT+E?Y1b^ennJi zG#;>U?jHp5usBOZ0`l$uD3H%D1Nb&!XMcOs7)0_rBqU#o0swT* z_5^XCanv+LbQxw(o9BOvGI$jvu=f^lrL!BwDiZy1pEi9~i4?JF*pG`TdEi zZJWB>5|*DRX)CuNG~ZP3sy@2-Xtbv#?r}9sX#N_=n>Ir3eM&;}ZL+0pHJWc-C`RlX z@`9wQBnk0=qGTncU%|YWVLjsAB~li3+MPrg0A^Z^>{oE^D}wt~i)YA>LEcmP@_`#4 zhv+_4rdL9IsANhK0~J$uZ7s!P_fMwp8xB;j}k3nP?env|FHx>+f!j zgO@jbTgLZ#y+;oLq>|Th-8X+@%vIjF`|gdKzjSQ}RIBcMqyysn^0Ey(tDCmB7PCet zlIpwR*nE90!mpJOeyxP?wP(3$2;sG7sF{x=d@V%y(+J^fAwQk@xU9Zs#8s6zJl=keb zuiiO-$CgI}C|(Vuw_*vm-}6_jW7gNk{X7i+Q0M>ae^#@ z0JS`4)h+jM8L+evpeU>?10GLu#(=H-Ju>3qdB(Gg*&{8OE7R2@yuJ#oCwYj}%il|{ zNNUcfg@8RjE%x`(x)Yb+`UxHP6@}hqM6m?3rBFSjkX{S!p!Ke>(;Ibaz5<66oqZV3 zegW(SjUMltdH3x%Dc)h$+07;`KEP@jIb_xm1#iLo7ylh$_Kdw_j4n(NE_mjQZR?C} zp5YnWwr$(CJ@44|JGO1xwt2t1x%cjm-E1~_(y3IZDxFUMs_Lhn@?w5u)9_%$Rc&A1 zt@7WdcJWMJIuN%KE*1|P@syHonQF5OD!c8;0dgH#JF-$5ln>Ngn#V6{|IWYTvfSp< z(H!RvCa)e4ZRmG4apq==1{u=-`S__=GM}}7uv^WZ?8|soqJM&fjF78XsyzD-8@rqi zUvD9UW&yLYXpF#l2VhhaI4sZu73^1R!lh%8&E~~*`BVHi%f4n2F{g&UDPKXWezW09 z)28)gpp#V^bAwg-AOX|CBS1{}Szjk7a#(C)jFqoC9qpC6unV&su2AgI=Ea z)Cl0)l;2p#U-f(#?nPDV_^S*Y4?4jBS0+%cpQ55k0xJy!F;5Hv^waqaF)P zpL1jvonn>7w^70cFR?zz|E}Mn?kn4JFzmgs(T+PUvpxeA?7&d~9wNEm5m|ybZaN(U zE^}!9SU%zkFP@Ij5S^bdg=Jt{SZMYsDTUhG6O$>{WM!rJ`X`byktvefeBuA}&}*<_S(>Cru2Wrp;r0iVWE{K$kSpKhe5au}?4 zd>y^oX#URq-peCnc3l}9y`wX5deS#SK?qDEcT;su%X&P*#R1wmXjzIWmgmDjZLp7M zg&9Hgwp?BzU;yRVMLt%@bW-1q*J=$MQ-BPiy|r-Bec&3pWX zWADfQsU;4Iee%?PK6)NA-PeYj>S^cH1bpmRl)0U54MwhYa!%jy6l-8LhnN(7fnl0qp$Ox!CpC9MLjWzIpAr`st*7@z%)(dcD>w`wB#(VGC1C*o7{b1UQy@V+3QJmHOVdjzZj%ofR(AX<=e`8 zkt8xWCSi9IcrR|Rr2Q%1r$x!G4~NT(M({@^uuN1$FT`bWKMWPBl3rTh@Cj%?y|e?N zIZ`IaE6Wo8oSDD#9%F8*Vh#tYi8Fl@!yc#b)dZn?#k}qj6AHwQaTO5yzQV!Z?NXiV zXL$rtp@$^qne%N!$84P~43fX9imYN-o|EB6^6MhrDdX%@u{|{)oH^0p^;DR>I|)WfVAD+83{%THF#7^!h+N zyi)%4CB%Y&?)fnx>lc4up6akB2Tt`TUSKP*hm_LaCIpr04lwQdl_);3_rTfs7Nk=W zXYJ5#e5zVMEa3M*gPlH#ZiM9;KP*%?Od&ddmPFd++YtI%X8Ez_(zCyhziU@~wsovD zS7&7C%cF3jv%Uwk?A0u*un6X0llW|F^Mp@!#|Uftq`%sG7Z*~ryg##P)R~XD3}8r{3ehU ztdzqSs&tq=M@7%k&+Rdu|z9PVwZ>S>?J8KwPi$mlgg zd%^S53IkNg#_=DT9-QaIn)2<<$o;7+`OdcD!q`Slk1*>a{FCN;m+ejZSLVl8+4Wyj z-hZge`rKr@AhlG<91mCE3Vi!#`3LK8hGs9wN)_0j(3l8YcJNkx1&GC*muP){XOl+@ zL$%~=o7}cX-{!LafJyWbwZ#-s7qIUvCrbn`DnGJah@V_6QmDUc+E_g{SGWY9dv{67 zG(Mg;;&?v2`LsLvzCPY(tsHmZ|MzVtOmg~g{z9#$)l{{{b+|-Wc05diHy7Ikh?;)Y`IBei+hH*H z+)|>u=J#b8Rd5!7-7`SHc?_(CC&Ag+U<3U=0{|<|rZ*6Xw!QZ7FAlzr!t+R$;9#Ke zCZ~E@8K9(xrUM2uyPQ7DRe&|Pt>y|!{vA8eM@u{$W~t0UQ!KzFIu_e@)0M=|SX4My z;MlYf9aB?HM>}S~>SFO_~x6a4U7PM(|ucfVC*{=`1YUpv!7YsH6M8j>;7@{H~ zZ4!^@F;*duzn=lFZ@|Da!mww=mu>`jdwO0^-Ge0Jdm5)~5aLvQlRg7?ak2TZsG>&D z)70iOljKz!I6@NP(E*XF!@e7bc!j6!xt5o^>2?AMxlHS^dkDPd@-UxDgp}JJ*#6${ zzRzahYZm&uy#H=avqu5r$p4gNC}M_=o$5fBr|^g2em$pG^#eJzd{?mZ;VlKP)t{~kyYgni+trl%5i@6wV?xL4+&|P#jA-pVpB2{fLGwpzpoQZydJtSv-8x`uFOAvNw%n(Bqom&~i?=)cv6^*Z7%-ZBF0hYLGteM$ z=ub-DPYBcGb^|8>k9C4D;lE>!_0+ZJ1=xB2Vx^AHCh_ zZl83<6e{LBorEF6Qo3nMr2LCG&W`#!Ghc@nr4Z$AJ|#F0dEn_xuKEt zTX$+;=f@i*70Z9~{&a_h$lumt!Ay&Fub00+X3XVzI)X!qy}H-}()MPbkMI5LXh)FUK6^kQ5K5J1K$1R5AG1j{jc&HfCC|vELoEdK%e_a=Y>;oGHj(sK z@e@aHTOi7rSVg%RREUZHI_QyVSRe)sWm&L1QZ{IDEY~qh)J$ODEL(Ws?2w1-)A!Vtf|}KzeNdpDii^F;nzz^XdA#%8RWw_e>hL>%fH zl8r7T_LS>7Q%hKq3)?~4SHy~d@sHI}24P#gt%QnW>zMZ+M$ss5@4Wyo+wIL^DAU3X z16LSJyh4{9$-WT7CRDFb#I<*1$5JU{-8`AV=&*+ZZea2D?-qpZ;;C#;U>`j zTD7G{CL;<{e7m|v2*v(fFhzZ%@^i+>NKd|pwwvpY&+Y`{miMj7TK|z7$7$n@?92V? zD-5}nlJ4W7E2ga2&b#Rzd|lvUK+?;m0}3HxRmaEtkj!&_R&JH=*@zl2wEaIpJ$!{I zjv#e1c=b(~{CLRV%|C~~1t_5Y{u@O=I`T&!bhyAmh84mX>-WbYg@q#ZCa$@K8AeYS zL2t=yCBAPJd_W=$5f6V@ot0sS_%an9dVz~u#iXO|K54%FuVF}ipXEC) zX1Sdo$^QibVNHR_14#U|!iS7gB{*UgcZF{ZBH4jSAcl7!>q(k4VKgX$U@YR&u9n)} z8DPLOvw@r(Vpw=&Z%Q#XKw%HY_W!ZHlY@VVx6&2xWK-ig7I^DHd@l(({4zQNrxR7P zC|m8W;U6hT!AM+QAD>{Loyfa+zoHoJe-qH7&13VN>=fCZ%+G@l>ZbWmfR72n$6e=G z=X?U{DL|t^M*{@>N-$6t#8H!S91q9UQGNX{=@lL;CpvFnh?5OUx{^RAKe9Smptvb{ ze1U42!{t6V;8eDB9{*3YH2w%H1*QKf7LE?5$kpFs3Wu^bvQ>BOw%`1zH0cC|aP~m? z_AtDJJM3OcaRZE@DE=58m zD0*Gr4e_M4t0Q~){0YoZa~Fqh3yorgYI?iK{4EM5Jmt}Wvj8wQvaVAU@M=p*Axc#e zSmaR}W=(KT7J*{)1f1f3O(4Mqf6zb-FvB?okPveXaG|sW27eQqA~AtLk>n)?nsjg3#_X4` zG~i1j{U_;w1@~_)E!Okk-|CUeU z;dfkpIqu;4?~nU*+I}WghW9>eg?OfZ?(sk2d!c3V)qMv$^}{#)#mta(~drHuK^jj{i33Z|Pfj%OK;jp?M z<1RIOXGwoPKb(VnxjXNcig+8sj+{`>c{hv0hKeO{?$kvc4{r+BqbYYNi!N@n^ETQY zsOaG%#~HF-5r;?OC-9TASY{p$*8?IIco^45_Go7ip-F{Jsz=Uoz=cy>1iRggmu8-l z$oOV-&-sNxJ4Z#w%ci;|7z>nd?%IO-C5+lW!HPUv~d@up{K>@gQ*`8fnWf( zqwpWsd3$uX;95N$!!N z9L}TO3=r}$#j!Q}-a>_ot(cvB+^2q;4PRHt)8a>!G406C$xFFP1dTRv)SHbPIB!1D zltD|*W`|BITevm(mU&VV*cAnQwC?8-(vi1H9pK2;iW-s%$W?)3`4D2s5@In6&7_*< zgZb8UK-&CQVCe8jhR}8RmJ;^petOxc(?PeRh6aPXZakr3c$W>DGnh}khs;wH5(soL z$*p$1DxurSmFFBPX)&H@uq$!k8nmg3s>tjGT7ub)9gTT#o0*z&yv@Q?=BRw!Z{4lI zTDNsu=c>kGAU|bphfJ)m3jzM}XDcS6O|d6`5aU;_Z#kU|dPdPZJv6Wr(nR8f0&(c* z3ni?r9L>CcOL7lqs6RzJwktG9vIG0fXX{l_PpGFQpKZ{@#{reS1Xz3kvbZGPh0p8V z{Pp$=w$cBZ5d4*YIwl^~vS)5=QRin=FDhg~19ENs;8gy^HHJzx|Gox0zY9~S5ldVy z3&3r$9=(oSTa&`J{j|9+jH^aiS+Q#C=&1?38QGX3=XF=ERya_$K42nz9p=UE-ju|Y&1Sm zCrtXbd@;Zvg0@jM+n-G8t5Nk=D5LGO&8;2)GR)Q1O(mzPg(DG27&Lli!-G_^4L)~@ znrx0!xBt~NbaR&-33n05Cgl69+=|8`vJQJ|FqE=s_Tp7aOlbTC-e5s@9u18SBMM<} z!h6YPmW@_kh84;b?skkkvS;zJz#ynp1?N!0h}<)X$n&I&&jy$cZrRAKNEj`}T6{kV z-|3^PqJhpR4{bSbES$uP*`&++H248Jxmo3}w3@2-jd_PWFyS>>pb0BQjB=I<#@n+_ z(5-uc1w2kNSqJRXYSBfJhWu%l5O-DRjJlrY$S*uy_#j%x*71$CZWE1u>erQe|SbE(RQ4CGuli( zY;6UFc*7hOuk&yF$7qA1HN}+GmY-^9lD+5mcWB~qJBIpH&ARa4a0<9G6`YN8quKA7 zs->Yp-2zhA_wBoNwZ-tNc-)v2H-R=MWH{mMhPBpoB)bNJf4q11BDecm@qd4o%J709 z2%)ZnA*gAu8|M%Q3dVm*A%P*LyFFBItKK`)nDhS2XV+;4{qmc}U$`}Oy8XA*nPtf< zUrfH<+_EHN@RfP42~{2r<#xIC?L zzWw%gPt%EP8R;+fpXE1m|m9gLl zyC5B?xVk&GU-&EtLLn{>^g3>5H*597J%@_jkWgwxgGmeIIbje@Z1l^)*XU@aDKh2k z$~nsL4>5S!8&B+%Wi=1dk%E=6G8@X`*qmK&E?BdTz|j6Slo!0jw;DkVfIqZq<^fPf zof2e=JX$7;pz$~_UYb0k0*sO@gyZslYn`NL7vmQ{RFDufAE`t5AXqU>XHmIliK;rO;B zKb?m(G3IzVqbQXflSLlS3p<4v_z5IQBIkCIm5KTF+?nId>&ua`x88XQXkz*l{pas6 zrVWkC9Hpc(NHwxZH96U!k_G2qylS?9rJ#J+5g4fSKeb;g-p*9SPGRI}mzy1S{EOws z3pWW_qqVXHOObQJ?vH`^tB7}AU3Fpj1+@V%VBF}+Ic9o3ssfc^6`#6X?qx6xzUu(; z!;;=f*pGMMmHFARhz&U{zQ}9f}H% za)=8#qt$DpzG|2Ne|NxtRWUS$M*}Z|wzfILcW#b6l2)YLZk{sz&xl027)!>XZm#`* zmUwy>T$6h3&Zha)pN4O0m%9Vld6ks}-e5~?)>HsyDJ>2ax8y74M4{-0J|y@7(}wA4 z7tWe$1$4-h9v((bg$ALb45-j9Q#h0@tx*m@P4EEvJaeD2dhYqoyfG?p*c^4Q&3kpr zywhce0)02mn9fmX*B*2Dzi{CanQ2%mMCg-HD7A)Uv3nZTx+w6^#$Ky{xQ=FOF}M-o zELlI{mL#A)ZHEKOgs$E|gBOjc)Z&>;&|?sc=vLtl2eg@6He2jtpQX;0T!dLM7mpMX z&&D)!5KKa$=_kLu`!9KmMZW@0tJ&ko^`{`uf!xs(JuH`2*F1GtVc`ZLa50KLL$pWz zN=XtWnV6e~!0o$SWUR(cG08*BaMQoF4y3-Q&Z?t19Uab^gr2WoSvkQBp}567U9(jYCVRIRF!h`RC;&c!Tl}L+tgJvAJ;-gt`DFm@qL5mvl?^OMFf*2zgd=>o~mrzyxtiI z*~v(3F+%3^*7Pj~?fpmsn~x=JO6ru5LjDoy$9RYUXb7_u zxR5TUI%vlv`%`3u!M1T_IzRy$K)N*?+G!Ur%wH;u$cUawu3c-_czz1^oOpOvPM$FZ zBjaTGsgU`D-tvafWk>hXQ#p9xkg>?LQpxeDT3@LB){w1g~kj&cPe=4^)EM7%acZK(}|2n{L=lAeED_V`hXiGgm=t%DWG z>7zcz+%|z^?8T&r7vikS9w|ChWBrL}-4prAV7odzH3!O){dKYgD-Kj-0_yt%WVXtJ z{tH2+KY(@7G$<$dAc@l4KCNRi6S=M;5k6v&$vMLcc>jiH@=_#QY2y({Hrb+IQyJRi zQAznti@upG+*76$QWnizfY5p3KrTS4f)dX3VDH%;RJ|ZuRi>0VR8C$0n^E#8l8i6~ zQ&hSbE!i?sR>aldNko@E`*71nXp1tYN)|&$!`1kzNC%GV+p;a3&EoTvsTpbtfl6FF zU1sDGjhjvOmJ;pr{cRHc5UVWV9)*br=e zCGU2MYC-RFmSa`&J~C`TNzUr61j>N+MsgV0>H1% zbh?r~=u0~Kd$2d^FG$5|Udg;~ADTCmH<@?f?SsSRmzKb{zjyFAtU^59CvraqHaLeW z*g)rI=BC!)@eHTq{colNN{_F(*L2skhAY&TMe2Bu7nRB5KX-rauCxA*|5;&wAE_w& zcChiTs@PK5c+$XZ$Xi~V+1Lxl49fhKEwikYaWL45agPjji~b0a~dE z&N@U-_i^_EbVK>$4A(YcOjbmGQthcXqnbdN-s;Km4GKo!AQae4 zami}AX$_A>Oo*JMIYUhPaZc;0tBjOlRIHk<0WKK70$L^BM?L+;T+fE?USwe*g6!8( zWBvt!vqyTq`vPCoFnYKdzsV^i@ZPdl4n?bw53$~)-WhH)2-UXtz&w5fUr?K8dPDj7 zBls5%qw?_md@c6@9jE;LGjcS!KPjHD^~&bCeYti4J1+>|pS0hdfuG1nML(#%zOv;% zMvjQ7YlwAmPT*}h?a|*X*?pMdZ0SB_edvtHa{jpa0ZTzy1kkLlosa5h=Oaus{Kq0N zPpD*iV;!~K+z@i|t=v^;A-{!}sJSYE`$r=cx>b@0dlabU;w3U?WFA9s6moHk;^s1x zV*J@cTh`+UDI1~prc~Uu)i%D{pA;-3$(|>n-Va%EJn;nG21B5Twbth;a0Ri7)z-M! z(nZJCmXXplm;MaK7PBrsS7O z)$$Ns%zDp8NXB&B1mhu&5F?bYfjkM>s&XA!c5ES{YA^EFPPJi48!VL?$^Xt604{;jycxJ4_Ib&X)^KXScyyoxh+fA&52IEDk5xZyvCU0hM>9rtAP$cbzfv z(S$K^L%KMLtE0ugeUCfc5z1Ppe)xNVSI2?Rq=0v>oGg73mFOiTzzbouqSqi}#2>w! zKq%`-`2Fu7PK#GGl&f5&5hQnP_@WFmr;xRC;M|#|DfF{&keCDat!Zpn@$_<$$x(jB zf-15(13FsMw`zR0Cv4{P_ciEvx)fM=9gmVbD=UU`W7Qly;boaaU?U2#WyW?me2hw| zO5Kv>?wny@Gxd3Hr1Ea1`R?UQDXTsYdzO@ZrD=4}mZ+sj+SIYhFZjnIS?5eT0tGpG zoHwdB4%yiR|K+kc0?|qNWL4Jqp2bsT((?Jr8-gD-&LDYOusxzg`|GF-nK-U+@jWkn zsy-y$TaoZaoVsy7*`bi1sjTbAwXUC|HK<_}J> z$!n|FwH#JX+(4<7S3?A$bu7zho3#T82-0)=G7>@uEK=Mz-mF7fI8*Hq<9LB>fmuU@ z)zw_Au86x*Z3&Jat7&|(>I!w(ekd1*P@dYv-7--mgu2hNX+G0`DE86i465E|w@Y)b zAVK)+AdfkgBkC$XcZ*MB?=J)Kgm?ITK#y9^`xKcMCs_(N>%*4jHZ4H)glA}2M`|i> z#T1F&PI6e>SF#MXO{a;;LTLADxgMk^P~R)$3NyQQ;sHwxZH-3YBnb*&bXg1)F1$GI zBx*Gip#>wWOaYgM6)+3lN26gtOaYz3;Gauk6)wpF(V|ib1u`ez%FyA94rZ;0~{L zD2Xhj$F#!*r-?;kBG|)d5nu4!Q2!G@7uTGZzr>ol#;ikA7XG1$YmRMcsiOfM+!2b) zYZD9V$qUX+pufmd${w6KaYc~aR%r5&mRfx5LJG##70%Q^T{)47wW@u+y@|QsI`_|? zgNgHJ`W3cd%6zYx4%6Gi{YeM21scPPvg!}I`O{vN(DV+nae{Oekty0gaWk1-@~YE7 zBZm52+QM0C;fSwuDiII@5|p}z0C+(<{d|mU8|D7T-6yNdf}U7>os64-2I${ss5rW& zzJz`vbev>`kP14U(J1?;(n~c7J%Y|=l*7L#3HVjHDSlMBas9xHEKlyaNsK}NyW~w= z=(@mJ)Zcg?ca(6nk_bwa2uyEiZ(WCZ?~t9_du$ys<30PDpv#BCU6(%B~9QFXig2tfW?#QHT{4yaR$ zl&t;YI0CtI(npg>J_-*TpyE~>)K`srVp10rl3X8`&%xq)xU9P4obX0^(I5LXln@G;d5ZVN|Ko?@i5|Kdu!%sp;sL$^Q!<&kzi@6!ZtdU$ia*IjK!7 z)8c6j{0VlE2B&b%7H?LnD)tlJ`|m5LOM&CL1Pr)E#Ui!)_ondM1p><_%-!NgQQztA z5-Sakd<`@f} z%@$(jmIw%<7StJUH$)RU;m9Ovx$eRsP6k8gb?eUwLGjUS`+>)Y{+ocm^I^K< zv&HQV)L_ftwE=Of9mi^P7nGf`dSNGaXPw9!s&sBSK!-pR|+G22qcd*w{_ zWGJO5KZvqysiLitjDpFQqSpq|IxIy|onmjmJ*#3}s#oD8C8VFKSa`@1&`d4ACS9OO z=M!K!)m!-yAk1GWt>LD+m6l3rOt6JgyPd$3h2qJRaBH(p>PCPZ?ChSSe#BS?I8Q&7 z&bY@TMAJm?`Qemd$&q5d2JqK`@f$FtbgNAgDjvdaw{o!$#m57%G22A4GP_tSLkV%m zZskj^^RpFo-j2sveGH}&i#Ds8T8B@=mEMNx|DMUs?bWn@gO^H zR$4%$2qyJ78d*5#hzL}XH|7zaY*0ZA7BGtzdkW0h`9|)tz+68Z?^56!oyaYS;mDfK zko7zA1W(=rp5{ia2PDi~)o;@nklto>0&K?R0!{AzDX1|Q5VMr-#?fA z=8De?OuvYl!Wm7TKx9_5O z@w_e_pR*_JWTS5iwvPgzis z8gAVmOQ%R|hC;M9v(RrKfkC~Q=vr#V#8{7>43!SB&@=0W!=g} zoCYN&r@MXZX2GZ6T0&2Ll>nKN=>g~?vh!-ePT<%2I(Ku@ZlHSLe$fAd=C`QmHjqUGBCOpMBXr6v-aQ=|%# z3ik`8CS}N;ye=G*Dm-!R6!0dMITx6doSagzy4xJZlx&XAQx6}Fc|wLn$tW)F)qprr zz^7}wbpKS5NFI~M^?}T_Ow<&saq+R3H|*DTzPa*=nJI>u9-bWlVL~opy(ne^8r)vv z;eY%p9JPllL1bSUxUYFDg;OVsnGdtW1{112zr|u}-Q~aS#ZBcUjFPwpvxjPI^cxF3 zGdtLYa^V(w?d8-oM1Cd;bDKjy?hp02BVt>llN8LuqW=56z7%Tu*<15Wh50p^NL#yOaes9rz_V7o!G?crPs<` zcfrur`v$N>u>!IPu9Qtm2M2|Vg?Wb$lnI_md__=xK=^$|Y>V+&MJSc(Y#)*VXIFpM z*02N0u>j@Un(a5`jV+8u21-}x_08zwDQzCE8?WWb9qbCVX96%Gz*ZD zWzM&%B2x$$W`YUgxF#k@U}Dgc`@;|IPtqnRn)W+CJVqb(_=DtRGe4lc7aD5n=}M+l z+7dF$C~cfm%k#bCCa!6;ldQzW*(qz}n`}{e37OdW)JkbzZ-tnHBB# zsjOL!C5AVzJnmFhc%XCJa_Pvia&rf;Y7GY!6CEElL$S3Xucm`sNQJS}VBx58q3gXs z^Wa=ZO0iChwvvX4QmwY71#3zOXj8jxce5>T96o>gHWF*ahMsm+D?>%Az0*}w=V~D- zYnkOFq$(?dA17L3Sl%GDHKV9;53Pby<0=QRCm(zJQ|xVw=2K|^uqyr@iHh?jKMVG2nC!<}_6?KXZOi9mP={S>Cbl!Q zZ&>EbWsTbt!WwA1)^5|-ZEmwhXR+nwX1mc{*&&5}M1YTHy#}mW?XIsXCg)oSTjHjAcq4uM}Pc^Y~T?sq9!>KaFE`Fe2C;j)IXz>ZkQvJvSpa#7Und%sSp zq!hEYVMJ`ApD0&PQ+rX{I2C$jAXH5 zuv?uqn;tFO>T(#cbsEmg&uh20aIwwK*H_7}=i(5>UU8kOIXdE9ac=+WI+XtxuR z9W$+LTk7k1e(3oa@-2&2TyrQfA&8Ulk$#(xsB^j!>E>S9)28{1bfTjcMQ!EfVrTcf zCxmxUU_ulwAveM8E{X8q)vy!qx4j*T#S+p?Bytq5UN$$C!+LEuH{w?{NVJaOVYXtzsr}OF_1?)Zb2wWb?h$)Av%iBQ`_BMh2QDBZUTxHwHZTLmUdROnO%rVw(N z%tGihi=ZV-5-|AFV|scABwZ##t~utkON7KMHxD&Su@Dy!9~909tLwVLk% zQp%|=GEiyhc^Fys&8lka@mZ0+VwXGg`rw&}1*8quo zc|Sc}B$JYqRE8F)CF`}KHcfOZj$fhY>^z>X@#t}6`#GfMaxOlx0_XtEh;u3#v@%>j zo~P@ZFD{m=EuJaZfHpODmKL_koUTtdtKGI{3SEtL)5!@fi`o3zdQCb+$<^u#s+u%a zP@lQ??PP6FM=ETUMXi3^XfO8%8I{KfRTPI0=O$;JTzk=aoO(U`y)Zu;z@_od$52qO z)a8qV=FHR4Lmnkt7b}qpW-qa)%n9nGMk}usw(APnmSGq+6A_w+{V6(hy7B#Yl7Se9 zOiJq!=+@d~>s5{XnMsW`ZPRn@FmJ5N^40Ec#x7$K^8J~E=|QF^ig0n-1;RKLz8W5I z8p9N&XhNAWnm3pNJyZFf@~Bw)^|qC6!+qSdceoVU(M6V!*MEa_=fUA}w`c@@Jj@W= zoY&FD##NtQ*`Jsa%DKY_J&iNQCL-R6*(rekWPWl${fHy_J(v_SQ)Z?jDr8dK=+T>1w7;vDOJ)xwOLTe^=N75R>bsbtLJ=)Bqkx9P&(97888xC>t7$G##b1~ z+m*L;4DM4&q48lyUmh=n9J3Fv9uDKQ;c$sqLNkPA(xu6Vf+G%y@2%$>$8^Se(nycm z89j6p7stVmTAB*tc@k}*#)u?N2TC!MrA>~3C!`l0YjG!^#3z_-Mu|{Mu%k*I{8|M% zj_tbv{U?z%?edX6n37{9i894rsqEN@$z&7E9haEe(mxvC7V^~Bu#!iXiMiT;a#elX zKaH2RE5s@YH0rYs1O94w=(!y5cvu-|xSUjQTj5ycBp5%voD6=pkTye-YAn>1T>bWN zxq4X!=)T;)DZUgwr7@5`4a1udc~ltqh{uLa441@EaHQCI);U-l{8(pN!&uwye108A zntDC7(Jj;QTQdPa^Vlk%5~`(Iwi}+}Hm=R3EOG5VuKa*aRvT6rCJNsW%uI6I$%hTr zqoXD-@tsf}SBmqHHLnS!+qipQDL3RlKREb=R`?fOd7MZ-yTzcjgzh)J z6p+)gl_LkkO2bYgQo}SROmQ61JKW!UDPA>-dThQ+Yqj`q>vjmScsw*?`E+5i`0oo# zxHZ0)gwJ0C7c1&D9yjajR(hP9ZmX{DueYarm54j;ug?P`2s>`CI|%%quZ<#aAE_6( z-HMtuH=W;W7k*A(H*w1N<-GIuMMvTkMK|G3qu(UkBx?9?4+(3y*R6XwxYr$fGaQrj z;{~+;7e0TeIj#dcy$(81D=7#v-A_x-v%1|DI>e^pg*Ym0yAkfkr)oDFzEgp=Ivy8W zz~cWC`Ct5}j?~`&%JaXn#TdV~K-zM1dn_G3UabLl+&|)aa$cUax&?LHNzM>P@N2i) z4yT3Ox}DVCb=0<>$kRGfNZX+5MW;oj*xgqQwmP4d_WL#*Qys8c6o1P&J2LJD=^toRE zkG3##BTdtPSu=fn%LDUy1Gsq_-h@qkHLT|M^rxu&1{UUTq^t1o3$waI4YS`qv;OQF z=^>5a1f%`0*fhSLoOE#6Z;aSe}z z#mx9GupYxP40zeq6Pr82n)~aB7~3INwSrpbJ0d#7y^_-e^2r5i=MtYjfTRcAMKNO5 z{*`1)+PA8Ldan7^F~k+Wp;hD<$!{=pJ871eb{g~&nZJ~^%i5^`_v80_T3?!_$ulN{ zXrFLW)l)2g27+c0qZe>qu%9q$?N|vnRZ>a6X!hD@lK5nlL8s`K$tta|25a)L@-`AudDF>FRh> zuY~m=%`trP;8kx(5if)J^?soaV)nrenoxYcp{VzJQAG=*)`zYNTN4o!rHYlWkUE-{ z?qe;@*b73Et>SaSKG(y{Qf)(f4(1YRO;w>a^(FBdiy&6LCO=WGYtWtQ+w4-MPI&Hu zK(3Ck<|S=G=}tNJTBY}TQki)oCm;%RXL=&TPV?R>G7O)XFgO-s9u$&QebBkff3k6s zb^k|c?*TRAtgmUdM)ZCXn}uVz>VwMl*DZ)!DO~Z|ZF*(m1f6>M!{v_Y>J!UWJ}he( zq}l@!HIy49!^=k_I+31B6UW0%K`d@{o$uRxV1$~bw91MEA z21|)k{KzWjp+X3ZQ}mGRB+SO>0nb7C0nbLh0hNm)Fcs34-j%(RtkcqlzCT?>0(AUC ze8dh!p9wO(Ghr9*m_^e3PW4duk^A$~hPm7YnprRBX|ScGkLZB7e;Ojp!cf(LE{*g9 znNGw9bsRp@>w2ea|L@anRt}GhAb46=ePlwc{A)@%Cm8*I+AQiRx~up2$04@i^ww{D zt8}UXKGtNXXg3hN`@EBofE&=a@h1T@*~ZsjtAa-a#Bt$>>)*emdvP8GHb#2!?iA{D zKFK)$>G{AYW>R1QD_at-;+wSK!sP4w|$AbJi$qDfvN z<_3xuM5qf{6SpA6MGyvJ!DnOegUBgNMZZQ0h9UDo#Qg7vARCz*FfusPM@21%Di^^J zz@p4Y_gzX1;`@rl19hx_82684PPmGQ9a$@UWRO3|dlCan zSY6}N9(FH+8+Hp0%CrSs+1sWkr zgCP=+U!{kv5C-vY)T5RJ`c$DhH<*ern8^NrV%j!3wZI+;FnhwrDBuk-iR1@z94_Y% zIW*E&sLT@4EgSt`Ii&2hr5`$qK=oAumEU5YLguq5BA4|piQIR7P%iU-J4G#m6>toa zMX`#J|6feb@b{TMi#s@X7zMuYcmAMKB)ycA{u8k0cP zaj@!S=<)OaiT}NgCsNcDnY9Ph3xeLq)rUB@3*FiNy;1Hh|B3IOJl>k&uM~bWHJXYQ zZ#FSCTa7F>O=0PMK&c~9yx6KGsSQtUC{{MHk?#FHk-sE&wxBQX@zo_*rAZf%S zWyH1SNR7?i4$fq!yL<{(SM598U8}*U>JvYti~gWmqShXA*T&8Mzj<*+1mou3_KR1o zk~5pLUK-u}6%?*~P0GXHH=53e_5{ulo8S5iez($JCSG-j?6)I2?{2mgU3%91bvs`j z)^E7OTWC@9`?D|Ne+w;5L>=iV^-BL|r!UVaH&K{V3~|M+#sj1OYrs}Bv0jF7Mg8G# zCM3Iso~q1MQA+xFiXgIMU;UY!AAw}55bIGGJ!3%92rbnog11oZwx{W-=lwLQMT7%; z_!4j@E_Z31d+W!8@Zm=>gn#N3bQJjpxay+v9S((8`JP0+DI;n-dRyF=DGx0C210CQdBUH8s z11Edf5Ui>MnPq|w-ktxbGa)DX&6}HE=~4>tG<2gqFN@o`wZV8h?EB5@ZfKGBJ(Zfi zJsp!ZM9Lasby@%T{~TCRL{S1N#Y;@NV84gaRL`+`PDxf^&f7n?v3hha-R3Q);LXPd zM(F~Z)Tih!u=$z&%OA;Zv*>mN=ZWWDksp3O2zY7=BV^->AzBUuX#G-=z3@Ja_jW_C zk~K6iq~R^?6yLOVB2amF_tq&_^OkP>w%}NS05uyQVT;A&hR!#%Rxo4HRM8-A@Ad+L z4cyITIel+`!8Mm9%_538*}YL$%2w=4@R$`Eb|5aguM-o=c_ZcXKxuCzv{J^SzHoIn zmDRNE`fC8x4#Ih;J0^1&P24(AgwGCCo-j^>^VGz9gQ1^r;;TeHylP}t-veov8C^9} ztf&4r`T13Kp5-Rzy%_E$oS3CTHy5NyCnjd8xU)YhlP+hhHwxUNlLm)WoomTKXwXM+ zGZ1qaO*~D=m0Km4aZlGZ-b-_%F?7`lp}PsiY)>NEd z$%pF{{^v8m^+8G|5W@J8xFBwa7}3T{Wb(iE!Uc<}ttUZAp4a}53!DhXU>dgTG57}c zHwms?+pta&d|ZAiJ6b1QvA6tA=89KXPaGq)pAR)eLZ#g$^ zoA@1#q_`PM1Uh^oHPs?(m?a9)!0H7$d_z0N!_ebpQgv-y+^(SXeKtGV}6Yg8e zht0E?BCSu4W^tak_t4rX{NkL1(c@}fY?iP8!a#B8W$`9?$83AGVXU$IwBWp-AKfZI z3!h$@=0wBJA|BtaFjvT%fb2CteEIrq(sN&t3H9YnE0lgusioXVrR09ZoGx1aNgPBkEx1d*Aahlx{x(kV|02Fo{@*ok z1LNM-XkmfDKGRq4l?Q3m+N3mcBG++ZxpJ|+P#THO@%)Aw1EkPw$QWDFQP1bN7^IBI zi%HESJtq~aP9JP!PgJJD(wyzM03e_@y^3HdoDs3q`xXv8nxP~dPA2OHh&pKhH${SHMC z>9~+U(AlL@Afpyyc?cj9p~ax{Vc`y^8d?d&h7}Xk@0!N$&J)RtMgiRNhz znB$isK$KbLM7#XMVu0E-;2ipF`AoXO)m#>m?;v)qv@YrAP5D+KCR^x>bQ4J0yp zX#4lm%!C7i>5EB|`8xUht!8a&&{cmMfg2ha2_3BVmn_Ak68;=fc55*9^ z@A~w^UtvuvlRvPEt7rNZeIzE$=U-^|%-4vT=LuX4>9BdCf~0v6&8#1HQ7mWTWUUxp za7(J!wx|H(!R?n~o=FhX4zRvS?eFvbY17I8`Te=YkbNkC z;?EXf$-ys*^`jG$WXV~MdyMgFFX*aBm(4MGX;pvVXF?CgpH+V4BP<$KN;|cOy38qf ziAppo7@*wks+>_zDhQ49d+A=qu;HBIB zzQE=?YeI5>n2xmwE3}SdSrxg0o1ucqde89CXG2y{M}DgFy8#lLaBF^_;mPX7E%mRz zLB66-j4lga2T&Nn*srLA6AjWdgf*ODf}_TdYs?~lGQ(ph3<%EvY0IVKKnv;L}oG8C3A&|jmIwQ#uE`rsV0{wd6=%N63k-{wtr z26kT#+rj`G?!k2QWfE(?Ee{6WmtEhtTx&@Tdp!mHx;x@%RpDoR6n-nY<&1?f<4@b6 z1&sHCdHGnDw0j@8N4Knsf6IIwIl3XtMv_=~+aQ26^s!cQxgE<*H7iI5u&$JK#l#6- z2McZo>3ChL#mQER)Vs7Bdtftds1hvgwdCus(hYUQ*xkMD=e;nXUXMoF<43;M9x20y zbppn>3VoVFPth%+LdSny9h_UfVwqrHC4|}?_`5=UI=&i|X~HVcX!QFg6_M8mJ5n-h z;|SwfM}A#TdP%i$kI6PxGlX2+UJ%9)?PuErRS<5hk4&tt2X%^dH6>?%AUjwlpR@go z$=hVJuas+qKB|3j6eAXjXt>(PX%>>*m6hTsY0OZb?wdA*q)|( zN!;BZfHjl~LS3;eDA}I-UTw{YfL7}vori_9TrL{Vkr1&Q#L39eyA)VQd%f0|nq%;p z!NXVsh&DGn;nrY8l`s}qk&Bk&`?`XTlI@teOz+(k%d<1Dl$A!VJ{uzl-)4opu^gyH zs?vtdGhj8%)McocxFUBjWl_JXWSPY+gWQZ!6Tpdwdy02~D*g{ycsx-L9iRSE> z4cv^L$FJIMiQ0Y+?mk|_NK4vT^DXnrtwmrF@nb`FKV&Lyq81&77TxPl@?j^#7ILfh zW|Dvnu-&p0$;HpFwH2yDe{;yMgoI^4U7CZ|SE6GIzdB*i+=7j^{uaEYJ zEr~2=wCDCoeuEYE1zN6}a)~xnw#`~CSiHII_^*7SzKfqzz8|5G+IOD2&pf$S&^{fT z^Cldu9H!#MB6-hC*X*394cDAf2~FgoR5ZI~bR` z55{)i`fThaZQf8lZ+zR_@fSX4YU?hhS%e^`?~D`mR;g%en~3bnD-^SB@h@@1s-kn| z**}V>^eJ8w54YI(rBKb$=GAlDtiPYbHL6=>2SwV$yJahjaiSi;8s-w=0|K11c8$&D z4k!L|4$fFDA>;m;;K^AY=*5SL3umM?3gh6s&7*4FtRP{)N;5~vmfMqvW?T0>d7bBJ z(588v&-={;{iT&Px_ZHKckrSCv*($6kLa3X)AA{A57Yby-nPYK=NPAJE6mJwOK1L$ zLZs-~>2v1`0hZosQUnu<=+S5u;W&8XZq8GuI5bnt&4jJn`=>1Tp54KA%tfjc3AG$Q zA`yLF{_fpcpLY5`%M9pCtfE~*fANLgwYM*fG0U11FEI7Cw93rin{Sveo!vxSp1s@-`*QB6l>*V&(S)2M1dQ&1?u4tHmP}hAFgoWY$(a~JuntHwny>tkvy2Y znmcjQzdL*)?w@-yRi@4OP6*?4{YXK%J+H|(<{@xupjNi2YjT-J6D5&6basaU^9Af*UDecZ z%sN%)ppK9F?E?OOjyE^6YF|4hb!zi9XepU>Z15}zYRb$UJE0_Y7EIuO7D=Akz3VgG z4WeX%?hcQXwE)UdDkcr+m&i7-X{+(3u;UTVb*KB|st7`+{}Q`D_!3nHWR-s?1b{-@ zoqd8r;YB5vLmL5DcK&5mF-5zKk7<{pJ*XdZYN*5A7!~U{6px*Be)&URvhExRIScw# zKCwnu?-OmSuE>|6w8S<0_)&yw$V0Z?r-b*^*bF1kVU7jy^fcjM84B>;r;i>rF75kF z(zJZ|iRk)K)ZpStrW$SObWL%?0`iya=ef$M0z|(=04pMRVK^agb9?tm4MeC;;dDLU ztC6O!Pk*D=J~e;#;S8^bDh1MF!Up4n&O;)65_D|4xD2W_ayV;jh$=_2^1 zhw}A=@>ad&@2F@WxLzB7TC0w1L$V!?jE!9r3CM(2W;Lk{4m4bEd0Jm~J2ZF48GNz# z9Mq&o_pCm;qzejt2v~k1xhO8RJ!_a0y%wwEg7rm20BEFQiI@tQ3Z362ryHSIqeT6% zmXLsprKX2Rh&BF#F!+Uo2C+Vcl*N1gL|R?lh80r+@JZFCqJY4pE_ib4cQ@I#sOxi{HC7Gx#3CS5=_GY|*}daMbZSG(w1-!m z=kC7Wk$~~3s>%>lQw&;>-P`$quET?|CnKz{9y^vlg7dWlW9(bvBOmx%^d}|~l3k|} zUp&jbGa^?7wS9U$%CD6MA?`MFtfQq|7N5!kv%cK1e;vG%)=-^bkgx$8g&z|p^_fgx zr+qo70Q04$8evs+$1P5RslA=gXaKqX)@m5xL!U5;R20Z@1LLN1h%`YTPj|A!Ut+{nE^kh*BnAF- zFP#z>$A3iw^aC$PgvZL8s%ALZZhQ**c^33yqHJmo1$my@s2t`r=; zph5S+cq5#yHB+bJD(=p5FDX9}mj?oKe-22`T0)I(saYD?yV#qEY=?iQsc8qPIZ$t) zK|$g2MluHOpW!u9MR4^D70)s`I}XEko^7w>-+9fVNeFnlx?nc4__%zIQ^9w)k^k)e zQeQ95=%ahbBXBeKouweXwq!v+2Q1lC-D&sCS3n!!{pWNrIM!$*~8W81q+#wDw z6B1b(8fEzf8prNi8k(dVnn*1RJkji)M#t2#alUN&ByPv~BfX0c#M8zo5+vGpGF~uq zLPr-TvpQCL->rIC)cw$zK1v3U-Loz+cn z2l%#+oiDi;!f<#0Pi274(p0B@yn_ z*S@d)jfi~XKvpP?Cvkfa+CiY#Hr#3<-lJ(AthA+I074j{bO9 zU5OoSTXsRMS9t`hMV$_ATjKgKK@CQEhDd}*wwb~bnjMo*==y|zG}HPi@QzxqPt=q= zBX+$CwmiFnLNGm8E3)FbQ1I25%{uh-2XDCYeCdN?MYds;U(V!==C!~Ii9tohf!hl2 z#&404%m#Ab(7Sh7w#YE~GyBfZPd^;zl&-$&QU;{8Js|rMy(Sp;$6tSc!G6U&-&TVS zJ$w5HXcQkT#$G4RcXF=2wu2q_Ubo_N$%W1Llf<5QTGqlqqJf~lGC)IVU_d;G zLK66jgjTcUy`Q`Z3IrJhf|MkH*WWS&L1_T+JY>%WdW8wSl7;LccpjiauMnYEJWy4C zgBNAUo-_2ywNb{E1{?|5`vSe{ zx-huF$Xtj&1F@*V>kxCS&F(-)sFK-WJSa{I$R-KoN*E=B))KWmQ@H5Ff`F1hEdcnQ z2K)=M*98Di(tv#u+O)<&GYrYOpzL6fK@7+rp=C@4SoD7x0>GmQqe5>yslj5>z#K{7 zJpk-T1HO7Pe!)uN#8R(yO&qO(?C~XzzJu1jYXQZ8cmd!u$etf$FBr0?1a)C^feE@S z$AbLF2yDC4teovic%#-UV@*A5l9%*#MO_5YTFRw>wuz$_&|31AwI3kSAdqM*=&$5| zcmkw=o033f8gLE(SFlRb$94-aRVs6d@B0qx8q|q%W{+SP0osTH0c9=zN&(*lghc~Z zrhbDoYDf)s1b_pifv-J#3|Oy~4A;uH(VkWQ0sYAJ{HQ;M?us)5pUAU@#S!7_!F$*>i*JnLukXT0rq2 z2T7m@ZAPi(c*JX;r|W&NaI0DvcRrz z&`w%*g*fmWyvZ62qL2V?$^hd5;GDX|3~+(XK`jl`p7+W<&z;ppfV8Dt2IwdYT%`HW zN<65*UE)AP$^SZR$^ymAm0g9RFQgj7k+lh5>3t%VEs&=eigJ}Fpbd~cPAJYU-7B?A z3w(?3`(+=p7GIJUU*Z9T;O3>lV)Mc7P2?c(IB7{5KR4_QFhUQpAtqlX~1tH zRHhE6qXxg-s`;BFrez(w3k(Fgy|Dk-Ps|wr{9CYWTj%RA)p#Vz|0M zipeMFc53Hc&A^B?X4b^#{yins79*@CpZsYLfb`kuVrw5kiQ+`PY{ z5?lCK>sPxSY;Bo8eRzKgtZrNBwcq|p1e4vi&Nr4<$#iAZV`pTpcH5P17AhS7xX(}O z(r#-xHqqyzPi*1o^m8i(m4dX{PN`^o_?3_lbak>U^A#IoUoQX`SR6Xp7ogZ6O9`6yGRN3 z5kYH4Z;cO)z>j3=HoufoSw%XEi}MI$BkW?)bqmbi51XMEfCU-Gdy0$P*$dK;hBhm3 z376dUA3xMC*@NCslXR|`PZyKV{OdK|^MB<7EevF3i)@Q*BVT~xi|R4zPG}}8W4@tL zXb=1jkY9AeEHJxz#b3P59(2>^-FFEw^;>j!0MhXz|I(sQal3)MCkGw27hP>w85{0& zJ|;Z1e&3h4*LXQG8RPkK@(KwwKV8PI;b#xc+$s@S@+ZM+#PX{ChetuSdOX`!ebVJ> z93Lhvtxu#6zA47qwVy%$#ZSg@V^mJsmO|Q=Pip$?(UMO(sdq0eCKF9^WvU+boUWSd zZ^t+iuR{Dkr`oNHzbQvkK8x`lmyjz)KGu9i;Db3&4fa&XvU@bO_Rj9T1COH0d5$&s|X zyDEits+}m$udE)rStHJJGGT^Y`Iyk~+6Lp4+{V#8B#fX@c%W=RyFw+7<-7|Aml1^*`xndz2MhomQ>SgT~AcNbHHY(6I=Qq`8=<@2KV0D-FpRxe z`#?X*bOolSv-8!w)XChnXK4RXkZ*1yg)`uxBeG$Pa#1WU$Z*mV;5W3G*5;n6!9@Iz z_C*{DTk>!bTz({{7b@hqv+sBNkMLtJ5Y2x78Kw`m78UY5ZFTrSpxETLYcRBbc<;R5 zh-i{4{o^kLZMU1Ys(n^O%CyPZG~rgfDNcAa0eiu1ztLlB+nuvX2!Pi++|}WB%URnr zotG&aKXEcTs@7P%gJk{5;k~y82@fBCKCxyr!PytNuj8Y(C$55d8xb{qCGtI6#!^@# z%d)=)4X_rpgwiegu>e0jzXN_8Zq>M%R)`#zj%}W~2a+N{zM<#@ihedYFmfOi0Hfpk z)wWfZ+Xd&GvK;#vqQOq&6;pZF2xCe8%<_YcX%&ASEw^b^s!xs_ARSsqqbAoqAe%j=ngT zIh88xzMqIq%{#`gp8kjX-<kXk#61hJ9gkr#@N71Q{tehDTRqaC<&Hn zcDnN;!vM+hyiC-#$m^;0u5ktfiR}c{lBW`-U6zq6Cdz~`M~@5=aVrm=y&27V6e=H) zQ;c3(nyk{OZN15ljk|{6m>9PW}p|tV1w~2d5 z%RN_#svh^1yg{m?#|E_?#3}0PBr~YJQU{3N2r%DaimlnF-~!PM;ZnG{g%LO;&D{`O z1Yf^(CpDlWvXsvc2|FjIe37@bv9ysjkj>7*(G<`U(92R+pk0f!M9#L~))dye09n`D zzgJJEQR6)Zh);``@wtHR!Db?q!AnLKfQW{V$gvemli!QTl;RPDIgG~~j~>1`>xcsY zL-4PmZ22{brqF@p$E6^VkxNp`(o?%4u|urV%~m6jB;Z0Y9o)N9>E}Y%=m@s%k}Dyh zNOm!|`AgeSmr{eFW^YAHEZOrlBiNO(wSimbgQGPEecESLU*B$W(ye#cRlmHLs7g^5 zkIlzOg&K{E!UYB*$G_PE(g|t=8Qeg}pDr&C);Z zF{+wwK>svf3}@X2~ zt@xloY8$RfayHjlIaEZC`*gs0Kh>D6cx0dqZauHeY7H&y$e?Mswl&tLCNW+jhmYj3 z9h3NR3esT!xCy1g4>rVZjYjuZqvG0e&c#-{;L4>W?%<%GHok10i55(Wy-(g)ibBX8 zh2zgReLu(j{R6RfyPRB}Z&vd%qPpq->pbgys#gp)4ZB)*7nD zgQaZ%gdt4M#bbd>A7~6PV#mKD_$_5~dfJtyrV5yH-uNJ1Keow!k3Sq1PH}I3bb8x| ziGX}p;blE;aBje+u|R$lNft#mcnz1W@7YPTOuCGF>7Bml0l!wt7?o|Fs0a8G_?gIA z8Xxr>e1Iv*myet0H-+ePbo$QA-pZ%9U&TU$c4nPlV(AQ-q(kYV6)DU+&1BpBkP*ue zgS%12?JC98Hl~)$(UI(W@Vw^jt*>8GX!3PFuoM9oOF2DtIXx*m>8+4_A+W8ZrR-!& zH76OiRyze4x*OSD!Zs~3l;+VeR}~!EEk#V}-1^iqJ{acVw!Jhldb2#e!8tlI;ypUI zoqEJPRo$9FGyWh|*AUg=<41CRl!!9HI^9==^V|!kV4Xv5KHw)>8KI4Dh z6&3il!4vBuXG?kPfRtKs{MiLsg$O;TW){!Jy(9}PQrHr+_Qz*JOaegizVGcg_U@A!#+y?)yXe`U3+SZC}*W1RH@;*|q_ z@!tbk<(cIqlAXhCSFd&$urN!{u$;@JPuSg2&<8N^eY_hbGFj8|*VRl@;yaQj=ltbx zmtIjf)cn&y(AzE;U?~~xOpy>i+2`BI!rrQf_%i{!M8+`Xb0JNVPMEK!{veI<4+6C0 zOEMH?dby3_FmAz80}q+q?c#g%sn|jwnbIli4Da{8Z0n&m{jboRcQU=~`QGS_RD>O_ zRCdT-s(&p6`oDf^YUfBgJSQo2hEr%_1|qMjOJk@B9JcP*EJqoD3O?waVdlOEG*o*i z&;Z_CCvDT}12gz=vpat$UQ-}kh|W^4%hTAzPlL5NIctJ{MC&zPw~ z^g(=d%%mR~4he~UDJIP9v99ftRao0^gu-6a3zo2Ek3@CA(7+cbl>P2^+VHkp6wL%L z=7Xxh+~e0hBk-2P?rb4Tf!6zHtgkq{3a76iZ)0<oErI!4gk7Eg*y`M{dxY zoE@_=Xqz3O6PZs^! zNFl+?*K@wbC=B2A1HApQz3S?zCp7zs9O^`$GTS6t8Km@}HMJ$mj~)=hV9eM4t}Co- zrtK|o<>}Oy+N>%{SsCp04MD+}$1XHHuA+_q}>Z6!dF!)!t4~l;Ibp-J!sCZKjq(U zXTSG3IhbN$O$5SzyavR>3s$Taqym#P<A$p! zRAeeD24b6{VeGL^!p2C(1jZ3gHqryE4R25Is+9Rt2tAAF-a{SB5!krOyYKaY0kNz+9nNg z)L<=T^7xKR0eJp#@!r@`L*FYj&oFmZE?t~I$O+w8aGSPwDmOzKV?v@Ar*}ETJVYb< zm{3co4E3bCW^Y2K7a64uWfOde6G9xK6GGivGS8#n;Ea!LAj=PSqS%(`CFw=#-J6eQ zF4Z^goe!}^S;TgrIi>ZDa1z>9>ODXKG>uh(pvN@A;txpMdLirDa=%W9wvj^^P<&9I z=vv(*`2GZNe7I-t#SY0sQ9~WVR-($1A_9T5bap4`Sg2SkQ|~RY({MC#Hl(x_np3JH zt3%%~vu=C$KA{Sod@3bf!@{OWJe)?IMNxoKd99!<;=1FA$}~qiQEoHe9tvekhJ>TW zi^b5`hmGoBtcd7%e(B z!N;_Z6LjTLSzBfFyFXWu}xBbHg!zuN1NVRhA6w0rVAofTne&Zi+IwRTkY%%G9Q61TSiF3X5;x~Mm ztR$LHvkb$0RJ&)o8Cq{?_D?7v@=)@-9eC!=Vz$2$+H|(R(5N@&KQG)K(Jrx=#4t_qEU$$t68|2&eAx zFIQ%%hIp_Xd|nqTi7K&`!_|;E5=Cj%qsuY`AHPlFfWYfv%)iZ>O<^*H1AENkwRYzR3g$zKuG9J4qxRVD_)J&(AR`I@7z5@-&@^H0w z(F_Ji3%{bJd+6Q-&Z$2YLEBW+PU;Dqm{`&ppTRU%6_WrJ%WLa++RJ2|^jfZ95yFH2GI&gp#KZcsLEwN%od#^X8zb_WN{2O5UfDeqe;pZorS9~YmR zo;;qg9^)V5pPqK5pCJ#@QZ&mk%%R6WKY9K@yr0 z#uC;NUnN|7S3+1(S8?4L$Q1cAoP_^~snEtLl7Z7xwbQjTw3C;znt7cScJBJuBaw0A z=_!;?m2u<%$%Ay}bolhC^z*bl3>9BjlIlgQJ!X3H^W=t-ACsl8AW-LFo#D{of?=NF z@nPHKCR#`3jbFa(FsrLe=!>z*62y2n%p8Nczaf%LwumE#*1q~MUiXHNz%Ns}>BjPuG1|a(nos8@ zqU9gB%TlRJg6{-3lf~OLCT`l(WGU%I@F?T?bt9I-;ycOJYHCVu-T_wM{2x4ChOsx^#R@*2YJrO=BHnAp3=20_G2*b_)!8+L<#Z(|Y6Hj7HO}c{n5W zhXiG) zGJAa+Zc6c09yXBN|3o=VUYLLE;SiRjmYP)nm7pU{wNq1_&nY{ejAewidf{EEj+Gx^ z)E(xrcvnhS#ZK2emngYD&ap%rSS>gbIZWs9ow!=)-0TyPuZmgn95FScjrIPsqOPox zckZAMuw6qhGiJ5>!Fuu0F5L2Mn@`VYPu#d(4t|Gv_X#)o3AI|y;p(K56!jNj%kU(& zPq4zIbH-&;NXzg1+QOERjQuz@PK99!y3v$!HJ64Y8@vP2{QTOx7HqB)zD_HPqd|w6 zWTz@$i*}-d_T2p1ane-#jbgjHxyrZp%(Q&zgNC4v+#aByRI4Cre#U}MgO+){Jim5& z<{(A=j8UW<>NA&_{_xnLjIM*U?+>n^TSovmX{00JQsGK8=X~mcFL}RLVqSX3i7z+O z$Xp^^uQ+c|Xu~E{2$hpevz32LC9`(uYd`tsfuUn1jd4VFzbdx`^e)VmljE7dKVgig zThKVwbm>ZRWQ4_~#*ZY*7iOI0x=^H&X1Zo3DQE187nsNWI_J4J@eh;6`gx1UYT=h? zG2K(XiPv~&%kroj?awbGo95^(CNuQ1Mpl35Iu3JmXvp`-ygdeSxy}p8d6ZrfT>$_Z z%U>Eu5|07$on`q^Y&@Ti7mA;G9JPn?8b1$P*p5_;L_7Slb+9IX?6e^H=WVWWTasc@ zlN*LBe@E&PS$jaoFuu3pz7_4hQoLi7m!Uzj=+809xhFOnN8;i&Q)1@@lcLjrKKXwc z4q{7NN+~l7Oj<2p<7N`g#8ipzGD~sLx;%1km}u1>C|R8cZItKQ)cGNc|NmV zi<)$mJCALB+43L#3iXxL_4+b_YoIxkhtBE0%4+HJ`olJsM`R1X*#C^y^+GZx(L3hA zL)bFz{r%GLAX)$GRJlohORA!(>iG4^AajYP{fYYKBK}BM>05f%4S<<7?*&$^^_rHu zTUBkD&l7r!fk@L!ifE%s1t#yOeZEg@%MXR~qK%9SD@iu{7NagBM{zYz!|eD`#=OP& zGX&;7dUL|H(hc9Ebpg)`1xYq(XX@zRKb;(^i>wItt`t_-d~I;+61Z~o>&wkypH$aV z9z4~&G+AY^_}7Ey`hpy;_ubYzL;5Z6>+E=2Px$N@^Nj2>fxh=owMcV_yiq2<*zP5I_DYY(~B9m;4BNju7VfvHH&whv}4WTnuHrbUI_9`Wy?U}a#ne4cyYy@5$z_ZE3CQ2Dc+0f6I=*{2^~~%33TAL##d?IH4Y;AA>IwFPI>+XuGKaJPyPI839=<;qZgu$NKlI#OFrq zAc&?DrKmuh=8vOq$eGS;s~9;ow06O`da2NVK&uU5M-G!t52W1F^(i|cY|a6nZXU75 zBQZJm$a@v~ycew`{hhB<--Ke8=r$zQ;{Du6Z6&8;-;&WC2PB;0od5R4z7EBwHYzxE{>J=BxWKsG)6)J8#X_JY9p)+*B{X@e6uIHUvOV=2XB%5m}l+r8$a-F@NV+v`1Lh*H~#oq&05Q9vROaB zdb}*PinEGy$GU3YxT+m?#&^eiXMd0GA@Bt83lFdHuiDJaW4)fZbeN{N*LZZ8dD++7 z*30vw=iY`YIsa)+7>bPUmtJE761)X;IW%MTPgQI+w)k)xhT%^=W3@&NwM2T?^3 zQzi5u0@dW5GB)mjlz%1UBe8FDIxYR>wNxPl3Xn>J>>Nb@M!8BfsZAea`$&zw1E0un z^$?n?o^qv^}WC+xZ)bP!+!nJ??Fb!PsYJK?gK;5Of#`<@EcR;WvE+uwstoXi1>Z0g1c*5}n+weMGQJMfoqUGjFgtDc3PUxfR+02BmR z*DROCo+*Z`6u9x1zn6fG*LIg7obI-_!jt7w(rWdN0?kd~>ya4E8G}b`4Z`vWY$TO0&MAlZ_ zfShyEBz8`OG7QAM;Syyio)}#-xnxN+fV2cBgOzE4@auU@#@~WL%+;!|VS8&%#yYRN zpunbNYu}4#UAWP!(QB(gW93tiWm&C8U^Qz!YY6{aZc5DOMkGCi{?)XVG>IUT2sqj( z6_dGu8V`zZs(5<4T^jRTJhSU++|?`!AF;vp-AP(56I3{eU`FEi(xHfnEI4={si{#j2S8E`F5ogCCd!A1g3c(ifvzRWX#veN+9aqV_u_ zqplXMHyKASgySi?Wr z6zHOA%-b}Ol$P+=V5n=I%n25MzB|{b4d9t}cL-gaa(4*UwGq;tT(LpcT@|!p&|Ph{ zA=F*0489nCY*3_EUu*$70 zJDE~nSLgflv$euO()HdSn;Zk5Mo$REZFDH3nqGS?qXEgHoKqA2UhGZ`37Zk0G1UEG zGtbWo;kC@~bzW5KX(svZNQZ1<d~!TLdzZ^7H8;H!|W@DvZU zyWVy*hugw-g=(xl{a`;PWt6+TcD)P0SliAy(zgw|2aY!~!W$Vgw_WcoVuL*O8LKaT1|yazO|g`YTt%bRVj7X zg0p-s5B<%YC|N0m+1BdPpE{mY}- zEQuo^p5b!ECJ%j+Ky;-Pumz{(R37S~MWdx3338=#Sx<#vXwmTLN3OZkxeTR3^tEX2 zL6K`7bXQZU5P%jvF*n##p`EQY%3%j#S^=FvvY%l`jYA#1kMSX&pH3;7Yp0x_ zVImrBC%2gv7`-`w@2mmBT^zu0R$Ieql63Z5!?WXCjb%t5V?e;2mv-CAEi6WrsK=@3 z`FJMfa}j;t@AL7az7!!!)ceFAfWW`GLfW}Shp3H*tfD~)=~VRMHZ?NODA<2`|MVsx z#Aj0|0+KIt6PG=QAtvp1QlvVub~0}{*V}Vf;U@R#*AB?x!mPrcXxHP1rG}t}!foe| zxFURXf;8}eUQhUdUwsgG%_V=igIM+%qC^>8z{nTNd_GPh;>)JV$x&(&RicuZxvn$&(ax^Nx_1f2NKOW=v$xSv>ia>`mw5Zi5%O0eEru7IxAO?+Dr4x@^aG9Lu~Y zY$tY{*o3lrDr`hG?c00mZNxPlkU!|P1J3(5_s=gTp7ljjDYuYe)c)y|+p;hY|E!#? zuFK?SJhAb~i@Ikru}QXz=x3ZR7R48#E*cj9RKwjx7>0lH=1vog+#kHTdk;f+Nf6tK zhvB~9ef@|FLNP|kxj)NpWEW8_8sRu3Vz3w;~P3oUO(6*SDPj+ zs;b|mM%S0sJu;`?;j(TC;E(MyP-T*{#D*CWypzDs5*LxTlg;fQvdRfTi-WLJ+xHMC!HY}8kp(|x~@4bMEQd06Bo`8l@^6VbEfZ7BY z4Jy&{1(>D)Rc%r9rvs1TyA*)vv_`A;NMNxb=EdP#m*6!sjE;Am$qkv zzX@WHUstj%3NN%Cqd9vNu;?}BfYc|bzn3^gN;uTmONt_O2 zIZxh{x8!YkN8UBNn;)1x%n!|;W-qh1*~jc__A~pNADIKpkIjMBhgMIkm(|nm%ywZht9ZL~I7o2@O@R_j}9o3+FG!TQnKW&LFB zwtlwuSie|%tzWHu)&c7`>y&lII%i$5E?HNt>()){mUY{@W8JmxS@*36)(2UT` z(5z5mXm)50J)=Q$`!vVihfm>XL22fEny7~U{a5iS`uF`JUNnM(qN!*Ghsm2B6`e(A zIEEgt9~>7W#Yi|M#))xonmqd%@>^%gZ=EB*b)Njz1+h}Bf{WxKE*nLSVhm^op8HIn z$@2LepTqLg9D4!(Z~n9Fyv!#Hu`4v+e$yOePT<^}Wp3bU&F{^7ya3I*x1jlUeYfNd zxWHEubyEM&m#vWVF7+zxP|)F+FH~kLh7+XYz0N zo}%AS-x+xOe$~ z%^N0rJM@d=(0_+}#j-g1&|{CJk%u04)WVU+eRtgP5Jw$v;D}=~jX9>^h~oij?dzzi z&9q)V^PRkt*KgX6ozhNar?w;PGJc1HUJJCpsQo!NfL&SJl8N84HL zY<6}#hn>@o`9p8}@gMtJ6;!6O{*>2U@b`XKrBIGa`FGy;J8Qf3J^A3Bf9QqxqaQwK z9kz~I$DjFQ6{*saPyYRX^~#U`-Y=^tl}@Gqm%OvbH`|Z@9p2g5@(=v;YmBor7|f<; zIUs`7V)fu>Rf!(asejE{sdw0{02AIs6+Qny-9O$xiRJQ7@y}y<{R{ldS#keL|29^^zumu^ z)%Wl7?_*8<2mOaxGyhTlan=Gy2aj26|5F?t^p|n6I9n}C%F=9oAZH+kZ3^TLMo`3{(nKVm}6|2CA}Mff|7t?59Az zKs~lQ&>+x&{Tzr7#IrqtW`UOMmq7bKdv+j*fj>KFwlG_=LuPBU9Xn!nFuSl5W`dc( zE}^HLz%HYw-N0_6r@hCXSlnvFrPajh!QZr}*e7|PQ1wuCz9lp)G@frwy3Ylp(nYlL z(uB2fOIC)JFepa%B}dU+pZZ=@j$U^brHgQMFpF~i*E;vFb@fJFzSl z-Chw^1dmynRYoUMomB^ejbr2JZkz4!rZ2rOgD<1+1z#rLi@wahmwZ`#FZ-f>S$)}j z*?l>DIejs{T)y1CJib_8USB?6eqRA!LG}yV%YJ41*nW0^9b|{tVRnQaWyjcYc7pxJ zPO?+%G&{r2vUBV_yTC58OYE{C3`5)zcf~z%Upx>G#Ut@pJP}X-N|}LG(XZo#SbDF6 z{vV0-9FTs8oS?VK87R5zSkv5~?}XC-b#(xxS`5dFCGcZHDgBmKdcTDY)$yaC0e(`@ z^WhFO#ZM~wZ_86dJN!h@dk50c8wb)tAN-`lKFT3IOTiq-fIT%myudQCOpuB0l6et( zZ7s-*y*3_RqSf6j)L&m_9a%?+ru$~HvaT!vvaw#Q7xwHSYzX9JpR%D4!$z3=jcgO-W!uTa*X3~IC8AOE8xhn8n1>U$2zHENn^Tx?_+2Ph@ zDApyDr!GZ#QT(m0k>Is3rZK2SKB|!)$9eG}sa^pb#k~tbs+URi3Q@gMQN1F}MdlK$ z(K2%-WS}}`raERZ&zTn>n|aB+iuKYpjis9Ar<%r5O^cu(oP}}1Y>YNaQ>`mftt+E{ zx&zhGJ3Xf76*!L4JGiT`HL!oSq8bg;b!2s@-$QZ<(3~b`?O2)QGS2F;+`#A^GD&6F z1O7l3C3p#xg_ejxZR{ht_(?93dfc`iJSOH_u=O2Vg z)b^{X?Y~A({u`_z4fuvMU@d9DI?{mkqyZa912&?EzY3es%U_4h=;`S`GQ9N%Ex&t0CBGOt{E38F^KW`DJr~gL{w~Rv>hzVSDlI&1` z>`;*GkV$sPB0FT09a3b6Qji^T$PT32YQtQxC>SJ#4dV4Yb4j$rz;!E87ijc1+864?T_jICzt*;clL z?MB%gK{=dfSJ*A~0FA$&+dLJI;+c3fk3s3iu^6m<4wOY6%Hz@d>AVRmN_kUOjPhoz zIOWY*3CiDOB`I&gN>SdDm8QHE_B@@pW@RaF!zxhTo>ii}BYISwzr$Xme7rBM&W)A6 zD9Ts)(&=113|Kn%r4HCS_s#PI^hT2;ZcGpmZ}yQ~_f@39(~j%Tl8I+4}Hbb>DeIMz1} zrmK8<2IOG?8wP8NT8;HIFkKhG@dnn>ukUIKNKBsu0+>Dx1c77SaP$)>4LOsa-|PZC zVE|hB(J&Ea!U9+hYtfJGguQSCPGih}>)E;H1GPfrrhKoZYL5@%qf z0J^(dkgWjf=`4kQorr$=wI)4zlmiN&FLr07SS0CMTGEjyeQtdemM(d#oz^66d(obZ zC0(IZuaq$dEflac7}+qgM4ar4_GwWtgw07#PnsX*U3qUln2!m+lsImMi6PBW*QGt8N2Yd4vj%`N6u^ILP9`JK64-Bnx(WhkHW zE2#qJee;3&(0pV*HlLVJEi`-9=hjGTlr`EKV~w@Gu*O+mS}Uzp)@ti(YmN1dwboi^ zt+x(YN33Jk3G1YF+B$2Uw=PxM1ufE~0=+p=vtWGg#`oy*Q`=dok$ymmf2 zzg@sCXcw{z+i~_Qb`iU%UCb_Sm#|CPrR>sn8M~}q&Mt3Puq!HCg_KgMRB9EW?y39g zfy$sVD!mq&jbcBbc+ZC;hwA9lI`EF@wYy+kXY*bd*M;aVXhl(63X07fnuAR#mWrj2 z%D>pZ1XBA~`By<2|6TuG9JgV_0%>)`g5#TG<{8MT;}#sv3`XBw3}g2dP#&Z1gHQt_ z@5Asa`Rt-8i~o0hOwt`ZLQ2vUAWad}!hUK`MeUh_+S8%- zOiAsTirOP5{_H%dvqe9jlC#n}Yh(a+N|r#X%0o|lVIp=6RY2Gk?@dNkVm%q=NFPfQH@#Sc@OdqqmmW31TL<2ZEot#(_>!2@p3OhJfaY{NRA?TEMs^f2JIyE7+Q`e~r z5l$ng5u|aNIn5xFj#5KE@$nCiqW3guPmA^`Lunq}8hkCkh%Iytzjx@`v|ScF5kGse z>=1rWgq05$x%pZw+l=4s$>j!5;0Ypv&&1Ms_+64rnEoFO`3TGd$z_~Z;a9Ma9NV2;L9(0g)7!~=U=5j;LXEJpaAaj_CP^GyTN9{w5|9AJl<5tS{@r^gA*1cWBsNaoF=)u?|?G zzn#PO$_N=*DU20rxl!5cE=$t-8mRTUcjR>HppM5l^_}{V+lhCYKpyHN`JJK8P$)q9 zp45|Z%nXh%=nLYW&2^7=S~#t-zqEBafZyrpbi$tUj`J?q&il^$kizNed;pH~q4Od3 zx?WBnh;;fn{UMz*z!?A;oPo|@$mk4n#=?uvcxNVFMg3c8|=SoB8ilTF+r*ma+K6O5YOvz`QL1&x~1)YU>-lEA?49`Jn z9ZFxNv>~Palb)_ayP*FnjA>-D#Ty0&TD*=2xIpp&eS!F3J zEP<665p0I-unS{?LvRAlVpMPg?!gmgV0`JYNR|;J%p5G16~Y*^46DRyV3gSq$Jed? zHqJzg$x(x{Vw%;-hG{k@JEqwkwBAk*w4@yEX$+<@4n|u}F0DCEZnT=*$wL-4*1?#| z$?IUu<>YfP=5q2o7;`xV9E`b~f)2)9P9c=Aw#688IdKlgT+S;_5lo9XMKLXkHkv!d zoZ^@kchHkKC9qvMTJ2JpmU2pCS{ki2cgi?rF)iz0yyld1FkVB?Pyy2l4n}QGMU)?R zDmfUtIh7rZ-JB{8#%@kkY)S4^Lm$GO>RJz-8rWAj_Lw@PpLKQHp_X#&H}ROpqle(= zCmLhg82b=+nqVK|*pr50s{1SR1;d^sh@!sudGcJwKi4DEQ)^Si;55ZC!N)oc6(ZeB z0H)&MBaHO2VJpU?)fmp^u+8iUyTcuf_$%Q7NVyZE0%~I z;UE)8b%wVuQA?OZtOD7`?xQoFV0uX*TFZyH`%w^x5szc@Aqf+7xUNizwICF zpYC7l-{-$3O&Kjq$%gVhIaDUfjq;GZ6;Od3fpUSyfrP+_z`VfLz_Gx+V5(s5V5MO5 zV2|MF;G*F6;K|@4Gt$gwRx?}S2yUFY%-m_7#nEbdtB_UGYG?JgCgLb{w{_7r>`XY~ ztYdey2ijBZHTGWnN+=M@5-JgTJ=8h$NoZzheds{w297+lsWQ)EFqAz?NvEMglootO zv@WCkqBWuPImt>SE0L@ol8s0D%k-|g@dq^CSI3jT#@(Phxh`iz>*W% zR3TEuL#h&~N~EfXR3lQ2NHq_sPNX`K>K;;qNDU%2JftR(nnY@PNF5?|h}7|rxrSBhrmXHxKDfq&t!B9`XT^4~Tr=As-U?kjRG~(u+thBE3AMH<8{% zdV5G8B7KPT@sNH*`Vr~pA^nN;C(_?T1`ruQWPpbZBr=f5Ko1#2WDt=-9x|B7U?PJ( zWGIoLM233EFe1Z<4D*oTM1~U??jfHM`HaYC9x{T+2qGgqFu|924yiZ#c^V1f^{j^1~KW$O`Pg^txKwC5qKwGck)~mEdvjVh5@xGofcgkZ4 zP|R)vWAPA4>wz_)IlSjy=X6*AD`7kAfg^AhuE7J0*CSbG7Q>RRW4xYaa3lN)G~+N) z&o#Iulc;2JSjiMBnHp9yjY_6di9X^CI-=f6Ox~KEyfr6zYi`(?7tk>lCLd!_@~@UA zZ!JsS`YL&AdDt;l&@omfA7fSWuU02-eN9_QdKamC2=1{Cgwp`?^_VwIKH3Ii8{K+_ zXdd$U++aKKIUP{nBlwS@Y+mex_(md~T%wKU1zC9RT;r*tNzGm_U1{@GQBvLHwvx;JJ?eo}c9vGSDmcIQ>~cICgB z7oA6Tc)n}98A?d62km6%X~jyfj7`xEWSjj%P_)v0@|Hv9R(O{VS&Lb377O8o?f5 zNgVaICC|&@5j&Z*PQ@05m7djohy|#(^$#oGMe9!bje>o{N>?-ecIQK+pY_7Zr!uX5 zo87xCpOt<>M?}v6hgi0-^7m*B3VR)k4l8Yp-j4ysfw;%Lu<{DjpR}y+gq6lJ-SV|? zt2gWjE01KlWworgyQNw?Y3;+Qg6%6vrzB|*z1vaGK>Sl{yBsy9rPB(3t-T1x9@P=g z&1SAP}^CJo-E|{i|VeX2kENx z-ReH46KWw~-|ULlqdgIK=AwoQXE}OIKVA1Ec)kLa)N?vgI_KHkKBmhkZAWP$%11yp z`VXoZB!K>>B^E)e@vYoGGv+U^mvi!U{c-;_j$TFOxW_*_UU@uTH5@}(bUd4mr|;Ux z2+!VaY&eZ+1;n5glg4E0;+@8jv`%v8f7tsD@T#h;?R9Q))3eFl<(!iQLI@Cg2)*~J zBE1WU^d_Bvh=|Id@{I*`5CKI%KtYryAc%ruMH$C}4N>glsE8;6Qogm{dlPN|9h~`o zpZWj)<2>Bltn9PPs_$B7pL5tRPjfc#5x&wpKSDpi^nrHYmGf~i`U~v&+dF$Bk)n`l z_ovl#wmKV%Q5Q0`r1P*dzZi8kQ{&hk=MH$uP9QhW2y_c_k=Ekk{LX&1Rf_Bz9)rZV!GI84%ikkm&RPmZo=N5?BM=n`&?QFcMr6Tc#M1D zMLJ-Yc77BshP`lo$g5-#m1PmS!FuHIQ$Ru!L)TJLXm;o}(xJ_vExwSlftv>58khsB;vqo}de8i&1lTCs@EzA4l{)SpKl7`h6+>9=RHH}pkh zCN_TMMf$l9GpUc6lq{lgZ=nI9D=)H7ZY>=dh zO4w)Q@z16>Y_E$VRG&FhF>-|I=kug+jsZqjZfI-h$h| zJr~*?dOq|*=*7@Wp_fCig!Y794cTXv#9%&z`6OXJI?~U?(4^4h(3DU?XliI$=$g>< z(6!v^PoX)Xn|Xz|hUSL;EA;2kZ8*oyVQUD_F0uNkpV2$?({`qMtzJu{pViOVnL3;3 z9XU>pmlNbfIY~~IQ)GdhDyPY<@=3W(ZkJEV9r9`UjNB=A$!Fzra<_b5z93(eFUgnX zD{_x~RmG@S6;yF5UL~kRm86nYib_>Nl~8Faq@+?xt8%Km%2gFqMO8^vR#jA0RZZoo z>MCE=P&HK>)mF7r?bRjfQgxZ?pe|P(RVUS1bx~baH`QJBP(9T&b&Z;?u2nPCb!w)% zUd>WBs2kN^)O>ZXTA=Pz3)L$1uzEzTR?n&3>Us5odQrWkURJNDzpJm+Kh!tsTlJkf zq`p^&)ekzLV|1(z>Nwp>x7KZRTis5#*O%x^^<}z)zFc?Iopfj2MR(QRba&lD_tbs$ zcs)T+)RXjNJw+GjoAhk`Cp||m)64ZkdWBx8SLuiKBYL&oqPOZN^)|gz@6x;Z|4zM6 zzoXyP@9FpT2YSE$P#@4A>5uh6{fYimf2KdzUziM&X-b+>Cd-sI*`|!iF=b6TQ{Lp7 z3Z|l|WGb7grn<>D^-O)1VY(bD$FkSCMzV&;*W>{zt&A#5HB>FtnwqO#>MH80Zc?{U zf3-+GK!ep9wSk7KJ?bqQr;ezf=^7ocGiavnrF+p``U-s&-L0qU>9kPatnZ))^cwvb zt2A4PJ|tJj zm2#DQSUw_G%Qf;*xmG?VAD8RodbvSvl$+!ea)5TAop)92HPktFdaF8m}g(iE5IXtfr^}HC5fA?o{*CUFvRik6NxCQY+L-wOu`> zcBrS-Gis;WrJhwEsgKn`^@;jaeWpHFU#KtDSL&2Ht*%_= zp02MO=!UwHZmgT=rn;GKu3PAqx{n^IN9oaejJ{fr)#LQ_dX~OH->C1?3-uzsSl_Q7 z&`b1#da2&1H|ZzzW|L@=OtMKasYaL*Ce4J5G|FgWjB7mOn{-ph*ymQIL4E}J8OW>x zWL6V0s}GqqhRj++W*s53Zjjk<$ZP~;HV!hI1ewi%Y-T|o^TIU||4JJ(^u~yEZkKn+ zJLNoim%LlvBmW}j%X{Sld7oS;7siNmtfYbX8qV=jq;hkRGgu=%IR;9F=QMQFfk_91WlZYHwmVesr?5@ zhyAiY(7(zbga|S!G>X0cb)lIQejiD|AM6i7v>F+@n(#z0O9^dZ|31x3FjLseY%$l^ zw%lSQpL;d8b2u(AjpM;B97*PK+>7OTOr;=uteYr~z0($o=QE$A1k=bg!n<{N9^qaQ zeeC&!^BtUTt0)67quSB9$#muTJI-|D&+(=^e@^gDQoy_4^x*Fki?!^Dmc7ujH(H)T z%RXqyD`Y8-q_g|Aau8P+BU%mCBRE=((&HG1@k}^YO{25VTr0#S_uH9sFN3)4Nbbuq zW6d~wz8zUkU^!0W^XyqYanDFL$J5qfZ0B#Yv+%c5z>JAdo(AQaD9Tfy?1-Z51Z7u* za-p#c=Qutx#|M~Jf;~6SpS^WY(vU?@$f6hf+FjI}?%tFHd`6V%d?LvW=UNM|oF;=~rf-w^qvBt$rkC{&KxO+Dt)GyQz+BJ%jpr)xoNH(UjvZ-uF<#|=@se-&rcA!eKqwGXgWLM55ssXQR3H-_Z)B{hh;4FpwG(U~~ zbZ5Ua+k98QE7qS8(UisPZ8ZDPW#n<3TS*y=a#m9*ovK?>HY1k4tU-g=I!3r>yt}Bt z5BPCTlAquwJ0<*7UpR_;4>`tHzUz2?x?jr4^h^8MPPU)p=QufjCBK?e*01i@a4Pz> z{Mt?xzn)*;spdEI8#&eerhYT0hTqa}<<#=q_-&m!{w01NdruyP>_|L!9*Kf>Buca+ zQL-J0Qte3GhDRdTjzqp4iPG&zlx0VvGIk^?Ye%B;b|k7`N1{r0B&uRZqH2(JUC25> zt08eq*+ggWdtF9_cQZJVirO-(;f#fPvPC}67FEuFmhGf1>@Q8Gv8b6EQ2WqFE|HDyhTmvv=b&gmM+hO7@wWE0kh=B%|T@)CI|YsKaAa_+6O?81>^ zFzc~|JV!|O;c^V4GwTtCvX*gFLQYr;k%Sm6YaYH9$CGl5X>Xx!jA0kj5X6&Fj9p)$ ziHs>%({!Hs&vX+b!Tk{l5Mc&S=Xk+dE5`duQ$B-dQ`jcaB-yJIkDVr#Ra?#oOK~!S>Etz`e5; zaPRCTxp&qD?wxz%-YL!Y&b$gsEEm-pE3xHt_U@C#k!mC=a%P6D)YuG#`8tuKMSydZ z$t>M+DmP-!@r>Q-Q39j3CX~o{z7-`giocAK8NYX-6pP;|m9g7cDq%4^r7?b=LXr`8 zCOcxr+oTz9=a9=7yDE7WXOj<(XPBPscWnhNo>A>8p24H?QT8AA>HAo+i}WIjXRMdR zk(7}hB{I?rp*j<5Ga8n4N7a!%&oa##$~nSA9C>2GddeAP0M{nN+U$Z4_8Q0xto)&3 z`ui4@;|$_+JYyr7@f)9IksIspz4Bh}`5t`_b7H=p&of)E*YnJd>0`{5)A}^ejGZ#i zEMVd&3CPBJe}{3~7{rMHR!fn*B0#yk&PTcbC*%{HSsa!>@`y*}3D)$}ilxK8bOreq zX;UWS>w{E^(eC;LJoP1qOYgw;A1}c zSc7r&{Zx~Y^iry2ku=q{IGXB%vkk%7#*CspY6A4!(jDzirq=Ft?se4Bo#oD=PVSBF zjnvtl?arny?i_b6b#-rd@1S1pJa-=Taqo8TrM~V$cM)9)o@>-!EV8j2w@)sKXO6dI zskK(!DUCDEp1dN<{Ty(=ChV>ydy*Gu2xOe0GPrdaRfhRlNmZf%XS4b6-2r4p1e_PH zPgF*&{=10B3fGZcK4vlX{UYig7mEVaz~%uw66XPIQ=A8|_gWwqu=iUeS+8v-z~1jc z`5^1TGP#Urz*!x8KhEk%*sPBI-deeqBxiN&NZGuOH1x*h%x(*LjKS|ApY`&7j#gH# zB2CK0{R*bvsneqHfBwsGIWHl2Vz=X(o+jFO8&nHe+H9;EahipcYHLv3b|L zOU=!Gv!7a+1Lh-YX+ANZQET&s`GVS+ugurf-h6AmrOV9s<}h_IN6b;`XpWoX)YY6a zr>L7b;}UguW85J1bmQH4){I0qnKeVWg8I85SJD7iyB-a6Gu#Xs;%2#7G}O&@%g`{l zoSVyWwxU~+M!S{Wsx-!}?pCL9ZcVoqjd$y~^~s`H#;9jyPNyT%T@@{J`nrhvN6I{; zT;;LOW~fZo)>5pwv8uLWpRF3Hrj(+Zv;L>C{(J^D>p$3C|1clCR0-TU=ts3SUZDAt?DUd!Hdb}X{qhf!CwSwWV)7t#5o zh+c>j|JMTkU$KC29`I;H!jGVCbzbzcl?UegV!4F*zEmz}UOysNvz%?DEg>J1o0!X+ zEjJMl(>br7M@=~9&8Mb%pavLs(^UOxe(LA#WoG}LE6gdSt9KKh~z8WtL@tVS!Rphtu zeTy=(%GKOg&IMPwgvVHFwMh9KH&RteRg!J3j4I1kSzpy>TWhEql2Nazz2vIb)$1H* z-%xK+x~-B>N%g*ZpR&|`WovyW)k(IyES*JVb+*ptn46<>Si36d3LJB*>S|O$57Wb_ zlD>Pm)c~JrGoKQu4iG|9Gs#S19hP_?!L}6mvF54A;-s*mh>0*~fiYy>?imKMjurv`j$7GU~kWE}2I$ z_B-YsKJPvA0iXAwImndHS>qFbNN?7gdA(cpR>oD^^fn+i zLPa7#V;xZ>e_4ba>{HHCep}+;e=7Jd!1)k3FTr^O&b#1zX`A(wdk&<%{$deR)0>==5CC0jrn7?aJR?WjpSr+4bzv>WL6Wi!Q0 zqu2bW{ay68^Q8aJxbrx;vz+akVAl@pItF$f3%gE+6{o_A1!KNM)*)-H64q*9tu8R% z|Bc)U*I?R5su+uimJd9#IebdZ|LwtuU-hwbz}juiEn6X|S&cHLco*xOh; zf~N}Skv3nHW;yF`hPTLj2G`AtYxsJ59&GMn&dWxg9WBA|EH|LKm8lsSnCP>BZ(d!x|l}%R5Sr1r$^d+nf8nVyv5BLYDu7AcqL-o@6AJzXS z^6*gQBJ&cHo+XRixY_PTl9sH)e)>%p?=MLrfj0&$jqfdtFHK%(Ve zAj$GCkm4=zJ`PClxOXDZ9eOkkx-~bVF;5ivfPyGWfR5r>6GNhD&ZUmUo1LR*F;3k; z4xSm&IJ#xvL`*CyPK7jw0{$bu#cK}iku;X2ca$*e3HDZA1;4 z5oRPMm@#GyC3*LI51_J@&4}qe=Fn%HbHsAa@g=wWhxvx$&3EPpN;E&26C~Uq=N6iC ziv%)miknKFTf$Y8?z(O!Wx2VWLzLqjq7s#NtGRhp!OeGTQzf?^WAQw9vAcxw-PP`D zs_j1NuBAHedUpfWb2qtLse!xQ-APT|J?MaNc(}uXR3Wd_m-Qacn<#a_qQE-$e=b zY9S?ZrgogM$tit`QrJ5_LaF9wUYo|+e5@@UYsqZC1|Y!tqnkHW1zLM(AprcbveaB z7vrIe2}PM);k%bNBPZC%NSXXA{VVbI&kLbK89_TlTcBa^WAId!;iDSEM-4Fhm^Xvi zCp<&X1GQV+M&2>+820Shp2{)3EhCk)amZHh!n+B#^524C6b0mDfSj^`oT|G%_gy0j zXI*V^7TRT_T_v<@h|#QH4~B6TD)17~C=)BIik0Odqp?_xfYn@_mlK)6tC1O0M$fg- zb8F0?ZBfOsaS<|pGQyK(;K>4>)iKTkvXBQ9j2B%88ErC)##Vy(fgx9B_3KFkLz@T?^KNZPW>PxR*QHT}1ud1FUzq{f~0|;xs3` z3(W4IYa7wi?N(YGl{p8LePkczk-gf6jMeU2z}z^awYwFVa)-JTyMn{l=8AfA?u_*I z)UWo|i03ztsmtX`M%f?9gUqkP@(5@8_WGm1^~W^3|A8bb=bEGVJB%Z3{XAVAQzsZf z+WL8ET%*iE6}>F3QI^9s%JLlJujRbn*32uwQ&mLeyb{Oz$C)D=^+t{@Pv|E&wpg!K z&F*ZVJY1Q`NBz78vg=yxssF*5{?F!T_Npmv3e{x^`|Mk*xz(t?+t6*oe#v^Iuor5| zUT6UOqS5YH_CmJC-o~Bl&SfuT>+0>;Q!i$pyWU;TzUY8^fG%VIbA&p$XWTP%Iioy9 z9lZ)(1?uEA@)}WRuann_x_E=V!PJ#ywuHKQtGrdz-CO6aqaNNCZwvMG4tfWvm-lz? z@6_Ak1L^}0a|L*BaTGCjs>cz#mT&Km3-4Y}!jpXL8&OJ{zMRPqVyEs z{B*<;SMVBFvA)N^W;AT36l?~zZ~<$vePeeRRYTv^&)Z4y4E*X~d(B6UWqV9IYj@~F zqK~rZqa6CkJ+F_Q=wq(Bo%^_xeP8fj=wlb1zoX;gIrJHFQSj|igtr5_Ub(0>U_-aIy)TiHmPlV_EU`S z<1g~;aQ*8Acru$25POkh>{m{xQ=Fxq;p`y&cg^EP$fcysoOM2DYU+2L{SwZep=a{h zvp5^MSRRCTh`bCd+HjHQge}Zo9}eivW+p0!(Pv$2t}RyU3y-k}W30bmeI~45VITi0 zG;AgNaa)mLKfn>cfxp@tPe=t^`k(>E+Cm6?%2l zSQ{ek_S$NeoQKF^M|9-J@?-WJKgb_|F$S@0?dU;_9*5E6Iif7#{9?IU&S-9<+Q>P< zcD0=&&2e>{Qq*a6no<8sk6y*RYXQC+IT1t9y6y-ZmQlcmy zgW^O{J^>{rit-sK!6?e-pu|N{z5pdYit=|*63$Z6lkE5OTuWyii}k-O`u26vw{P$* z+`~ci5QysYQ&3_-;nq=oeOUDEt4I%-{~=Zu?JZ_d7BF7vh|f6pUAV8KzGC+$+6*+w zpYJc=tZ|{gkYfA?{0Dh;%lu^&@2~OKP=deKUrUMpe=DV) z=hgpa?V7F7R&q9_S1h`#=E2C=Zy*xQckFMsN6}*FMn1mb-=`N*|A>08o#%e5;eTND zFdh|OLdCZPD!#Qa%O0rs_D9WV7%IMn`)_TxfIO{;8e)p;GYRas<>84=061s(j*KaHO^`?o@-_KgN-YprkNEPH?RpeRbf@tpy5 zw$d7=O~dnZ8%NP%oFRd|PF*^?Gy842P^C4+Kg5V;R_UeL9r^WchWma8Qy8{G@eraf2LMj=ilEe2oE1Z%~zu4>!RlCq2`;8 znr|j*zS*dY=AtfI0d>(zsEbxXT{Itc(YmOM)4Er1$xj8de8%U&=-0z5PEPG^k6XbU?^;2 z6l`KFY+^iYVghVpB5Yz3Y+^EOVhU`cfNf$lUF%M8C(=xJvOAe(VJGYj?i_ax-RR!x z-by#Qcer=ZZ0v{q6ZXT-aTl@;{TFO#9&G3?*w8(&q4}_(MbNFK(5+?At%smnE1_F! zpj+#qTN|KTo1j~pp+yI)~2%6j+ z+-wVOwgWdW0XHv$CU=G=cZVkTgeLcbCijLW_kkw&g(hDCP3{Lxz6zQ=1e!byd>;-? z9sx}r2~8da?vDod$3T-OK$E9JlczzGr$duxK$CBPCf^KAz6F{*7n=NMX!1PB>|V%h z0c5riGFuFpErqm}Tbz1+OkmV@fMfVEb&_>3Q|ED(G)Rx2%8cM!k~{Cx}f`Eul!{4`szi)@#EbSTJ9jAW38#(9;#Ir$qFl*Ov zJ)HH*T2~URD+RMo#jFKpT>`TXVb&6}R+zPqS*K&x8JKk@W?d4q&cdv1_8R|dC=D1Z z78olCj1-4VB)0I)<`jNTLYP+g+=pRP98G8(1y&Og3qkb#kzlUdU-%;n$`_8K%r+7-&ZF3ZPCrzd~w$2nAcD#TU$G0R&~ zwfN-Rm8gxF@yMbXhh<|wU%=l7ML6*9ON$W#R&W=83%{`<{B1Q;*YPNu&y!YjmY3zj zK>il)CH!p~Q)fn2_u;>@I<}s(a{PB!>X6-$U5*N`f2T!ecmVT3@5Vl{%I92Xf|+P0 znaO5~DKJybG~7)cgZs8g-=ycTU$XZ|Cs6o)YQA5-gbwz#AP&5`zMRU4TiTH_7vvbjN zMTpvp+I~rdmWP)wXxQ4_`UwjG=LbP9)-*&(VK0hkrX-(0MrJR@*qWDDBAHb}PaDrv z7FL$Pcr8k39O4Yhhv9158SmWUZCN;ZNiCyGW(mKOReflNCD zQ&s)L0;VLxlxjuEZQ9_rSeHc;z6jQ^1X~)T-UJcWzU$cyKApA;IO%^l0 zG>sY=B7v$X)J#=x44sw6U<|dsBq)X9mx`QSeM_o&=!X}Xs?Mv{YwB^xYX6z4#u(a7 zNmh%_n%rH#0l=Wdw?$)32BF^zC}2*G87elX`QbHIOlcUZDJ<8oT$_*PvS(|JO%0C00!9pw6Xj6jSlSSO6?FpbjmCAxcMm#a3{ zO!p8@0N$KR2gOF7{)a<|L}!0HQ16&*1M5P?!0sD<4jq6Kh<8kd7%DPnfR7n$_M3|| zYAO*(b%nM}gky00$VFUrIZgUlv}hG=S4=(7b%Vb+vs9L$ zWVgTynGtJ8gq)>`Vo^QG!*bvC3@;rvUrV_Vd1v^xbh{L>G`191d*A6SZ>eq>Xsec5 zKTip!lL^}$$t~MT&m081`3v9HyC56<$(P6Xc`XlPmItEki#z1=muHBeFX9jbRCn@i zQ6TRgvZHe#)PwSCiO+j%L*ZjjzW$@B)5LVrf%@yA<9qC_*OmX`HJ!vYLqC$f!&Uxd zVAO77KhnPPmC>iD`feod1MG>x5(Dl5gF{IHy;n&o-UCU&gCQDk(FLq5CifhW*U4?* zCXZG2VKNLl&OnUUvi&5m#T5O4nO{8pu=^vA=wn*cEhyE&y0_EK?qZyu^*~%SEx=t0 zZ5|y#d0vr}6j-=XRv?7@F-IM_prlN@->?!gm!mwbjHr@UOUJht~nwrXd zMP6FsChKG>pp$C4`OF8@J$Z5S!k7IdaFfA$Gq%Qk<9wcK@{5W48)CfCZa@l>h{xeG zT+Jv`FIe{%8;82aB|O*EL$ymb5fUc~C*DGi%GG&v6Z+gGpOASw%-ZI*HDCAO+xas@ z4+|tqB>PH53XO-tHCFg{B!y|4oDU>F%y&n#93Opd*Z){Nj}G#|)Ul z)qX9X+fV%T^56xexiq9LBd2yML(%k zB5#lq#asn|+Q$BMV#`IRiq3R}BSL#2_PX4q8A`MXH`0m9mAV?%Wtj6tD{4hSCEMnB z?AnoPJ9C8QHtbSzYQd%X^Nvn&X@&b`Psg54wQaI`g;grybB2wg>!s&P59Psg`R1wa z(hM4O^Ki!$9}aST!t`C3KH;-@X#In@Q()QAf7C-NQ7%L+{g*In2@E{|)PQ#6d=w*O zUgC|&C6p^dfF{5Lg_p!x{M65t$PnL917IDpjJb)*2mlGMg|@(5sR0&|UC6qkTA;2F0G%N($TBFPfEJ=FIRJQw4a_T1jfI5`>VC^{ z%YMuG`m1p%kz9z)*tA@~%=yXZ7jiMyX$e`eSy zpcexlp|9wHgCQ6p*~IFk-eInYSFC`WkU^A9ae5I?sH-o)_7EzRO%cAp$9JRw{>Vo# zVCpL14f#q?MOeTb1E>*_jNC2W5%Gv%$PN@lsV4TOn~II(B4B~~*PnxA;=QLO93@*w z*FL?a9mN2&joPcH9NB0S7S9DhY%eo{&*^7LN^uc=i%pH?Q1d~x!v(_El4a-zJkkS| zTi&^X-0=JOqJB^sri5bu%7Zr2)}!yT(2i_$(?6osE|Nz%*kZJ1$`d5ABh+6G;=-z? zmq35HbGx7jW|de>2Q0aa8_7e*0aL^y^cI;DL@Ekc?weB-fKFl>PT}YIFUN9D<<_%r zfOjp!2YP-9QRQ^ox;JE9!o2ct z72go~wJ)fhLuW2#+e15ZZ@<*ip`XLtf!uAXidsImSWA?00D3sDNxMRA_L(jjUXeOd zK(RJ^OxISg6#Bge`>RiMyNfF}G%=b0X=Ym>sA=^AUsQ^Cfuw|gVlcEe8ggGZoft&H zUJ&u@A$)VwfxPJXI>m?y+p%~zCvSFPQQU~sz`7Ew-FhFpBq&-wB`{6fh}FQbLeyY* z7QLc=CT5j>AE%zzSz)!PKkaM^Dmt)LYfwPK8S9gpIcgN5X?}T@(aE2-|3dYcDg?@Bfu;4p}c752M;VKtZcurM>P31 zFETbjo$V;GZX~}p>=H?Oi(ezju^*BRPfEYn?qbCG%$UllG&;ojP(Ha03nst%?HWjW zD_t{DRL8y24Kqo-H|{1=fW)uOhe0$?7{lF(uVwTnI_*`rBrc|l4JRuuD+p&EmEteS z&MK$!0BV(#5&E&D!Z?dfCal&#S0< z+m5BIx|+V}l<*fa%w#QKT1=YCG?+9NuGlmfuQ;sOTLm`7xt^oVq%AdWnwK}sui&1! ztZ1J-tni$*+~oyNIY_{5ZwC(VapH0y5DHro=NkAB$18TKHbJm9>Pg77rEvY^31%>KvHK7EIi*)d!f_*4 zC~wY-p^NR>SN)ApmwmlQFFwTEH$C2>Hxb^d&a+fTB4Txj8&!c2qHan_T?Cx~f#^1ASq(-X%his^vvQTjbaGjnY428PdHVK@HqXBLe* z)y<$AX7)h+lVet&$))a_5vSeG6Skn83j&MABZEP|hJ)R6s9QC--GhrK1Bl;10{8Sv zR3YX@wnFV_psn83xm$C#&%F*x>+?y!dwsAb(Lypsoi}~xoyoA}*U_IUORD!=qo^mY zOO8o>ziM>vzl<8OF4J?Q@6~f#GNung)}V956xIn@GUJghMAm%1Cr_WmtO-1&X$s*V z$E^SHVZ_lRUZ>Pcn8-geP*Y1E$v-f&%*Bx<_uWu#3iX~|XT+OK-MDto5<+L#dsMx4 z-V))3KxfpONj)O$POBoO(gbfwTphqsJAMk$6!_+gQ7>3mc`DWv*-5UBSbKJg(Iiqq z_>oPb{FJ>Zy>sHehI2{3DLS!Eu=ed#&iQv|KVqF=y~rukDf0^*Z}0m+R9$zyk)acc zM|xtd3jmhPIj(b1zV>0+>>iA_PvGa?DVcMU*BI>*RYjcF$i#=iuiHI$`=74wSj)*3 zVO|vK#I>WRcSR~ouMFNXpj0BD&vx*W!4t|mr*{G< zlV}IDHTbmJ5z*bJaOv_S^-d_*zj;vn1oa9Jib?)3n$mh_sU{X&C-lLSdc&El-=6(g z8$p&e+O0>qTf9^Y+O&Dfe$=5}ISFMgQ+2&CW7voN!@Gmm@FS9mULN3s5D|b9o+9?0 zx^J;{qIlD2tQxNoty-Z+tHR{4Jyzk9zDX&AyyMCDXT&Dus;K24hmN;hW^Q@;I99IF zcG0R{Pu<+6XE%Me$insS`dMFCZ*qaMv^=4|rgeFe8S_RDaoc<`MYQ{$X#wy6oR6TW zn9Mqc+Y_Ns7eET&1B{2HGta@7TI5I6E4`P^yWSZCJ_C|3C-8B+{opmIaNL-yIw@5u zX2~Df-e`ui0qxaf6Fn9;#wqZ_CWqw+CmNS{PbqY7Kx^NQrXI4yz3UCwVT}`;p|z^} zkwB9cqUHoF=O)HiAjSKY6x&-kRn~=aH0UxRhs~?5CZAQBLh|*2ykPilt%V1}juc1= zNDR5@uc9dV5jMvRVI&`mTO^5~qS%N2QDc`CLQn~Sj1TP6Ts84`$Ae^TrH*ED@eky$ zb2a~6jw04JIpmlF~@Vm5z1d4jdHqpRt! z{PR)KS^HZ>0a=;R5XF!X_zNJ1Z5}k&s5DTzb8>Y&8Uhi*w4KTQ_4Nx*mHgLh;wrhA zBg}aV)HPE(x}V`Xz6{n7O)VHxzy*)MOt1_djqlK8L=J@pYYZ`q9Igd*x*O$i^@KVZ zLzT_HCT#_~Zv2*@*zvWsVR6Dv2wArxNrT6FU5qC|qn#S)859RYV#;z=@#%AZlj^T0 z8om^2e-+TQ`qpgOpZEOFK5`HOqp+Up7?%mEDEiVkA$TYTLLU(+@)ZKS1z)6jC>rts zvAo1bWDE8Yf}!L|e#|phjigaClAcddX8&JQTsrccjl@~^wz8gZaH05%6`-kIsQ?E0 z*=7ZWdjX6;9}sXKxK zS^qAP&6^mWp+A9a;I+^!NOt@{ad>uck#)cl=QNe!W=XeSCLEfvcXC6dPgrx`&-=A0f*vUW9K{V7Zc55PJ_yW5|Nrq7`Mi z9_ypKxg6oM>u=zddh0%|B6#*RJQWbWnp3N#^8>o8MGUOqcbJ@hRVD&=@6&)uG(b)I zX3E0h63}d+bTD2IuXNCV4vU72MT$w>74*nw2s2l#|Gc9-@+~8R3uAh!7VhKvzz2w7 z+xT_;PH`beQE!u*-X}(#>^L{f2tHdO9U$@5{Hga;`t1hNo+E!auj}5z< zd~86UU!8ZFP>dEo8;QAPGBnjP{pLcdI5)@~U^o=Z=zm64HtCK8-L!(QjjuU3_nkRH zwX$=%oSg5UBfH2v5S-h(6trVJIh|EHwGbXUx8nSyU0UxYI7W1kh{-ltH_34GGmvSl z-Rg4u)h>?Omp1qko@_ES4FFa+kHdwLJ7Kbs+&v7$P*%~ijF|W7nFU#aX?I0XIu$2K zSeTJhn}%6{%_XBCf9*@eoEz2JPNSMIDUDy!tAY@u(fzm~``y!o-kFJ-N5_5ZoC(2c z7-dJf4dqPe6EcLVgFrYJ_t>ax&bb__7fAcUUNp&#=ws})2DzPwhoq4mq*T_Wopzs+ zR24Zp(0-9{j}p`vAV?zJK{_FeO4q`UJ&hg6Bk5nt^>I5V`1FnrYvaK@`%dtDvYSzIeE})h*jiLR(p+@dw1v3Hxqw%iF;GNchT*ZZ1%%My-S)R*PKrNZqw%Ln{AM z;}zW$PWt!f&QwDU{4Ez+#!CxWsP3vi(B)s~#(RkJ*1+CvDL%i(!oETlM$TZqo}vFp zUeh;2dZ(sJRM{U|x7^@!WcN z{_=AB*~cb3IOKSC(1iXYrlIuniJN4*GQLpt`F-9_X-96{`tMnmjN099wDF(wgVIT ztP95slHp@#qKW4S&4@%*U%tP5zYK4bFY_w#qJAW5LIMZbzSv%d&nuL9H4vC1XWdn- z3@&9ufRta9FF^w^HXiZ@COmUHb}QdpD@vz|(9e`i`Bm}YCiSNYcQ(;REL<5H^lM8o zWQA|7rxY9)+x-nRmrI_hz4#ufLI|}s>Fz7+HfhYG52-@AHv3A1V3z}uDCYX=0`iPu z#70rWk|@v_WT>^16dUMsO|^ul>jKKWwnP7S?O(e^#NCR$poZO3pr6T5uOurrFy)#W z2v2tfc^!u9z<2XeEE|dFRS8LU;MnKYh9XL^wjD% zNxQBzgh`G1?I&{%K7;X~gO6UGAHAx7_1(-{wj>QeS~(yVuw(73%iX#$OI-J)hC`Sz zVn%;_?DOb=^}o~8ju|@p8H{PsGn=^iYu1SA)UdCbVrBOMOf2u0*ii46#!hA9I7vL# ze%7%X>1uOuCvbC^(3S>VEzP|6&N0sv`OI80qw-R7lSJHN$HAtEN?I}8BFcT=HKTZw zgTdmp&o`);ZBu!+x`dn;e}0o>HltG)yT@myj#z>!yodMaZf-H@Q7UU)iiDtHVLl2;*p|rr-s~wF7)CyoiGM+Acp*5S!C@A)->=;`UAs!Nm^T5lW zVNuG9Ng!aR$Ze2JFb`%x^zgbwbnJkykOG#^sGm>~_TydRqoR%FCiA94YD4To_>n!x zC`pRs36uE~5k=@l*TP+KugbnrV6q_ALK)fs$VKOJav+V_40(VBG)DDQbXp}{16Msc=o) z5r6O3TrW_-G^Y3>1b3TOF zIkm4l)r5VE$8(F)GxaQz(;z}3irp|~Yw906)!q=4Re*4ddzQl);)e;cFnw(Fi4Wux zjoX&&j`E!PMU^w$I+)elTMitI?64eHcFr`{H9fGsd7&% zktj1F$3B939)D3kS3JeJ#JS;8Y!k@7koKLxQm%Pl6H8k|t64KzQ$feVvbIT9r^~9; zs`{+tEaEKhEGT=NX{kEXAY;?Co3FdyJN?c1nc$Xp+vW!8-sT|tRB*+o45SM(e%E=U z`%Dj(3pNc#0age$_!%7B11ie5wXgLL-8lWY?l@_BKZlGdM~jwzeeW-*w9IBCF)Ctc6Ws}@`fYveJKD~=Nw}s!Uzp!7PUmFAkL?rA8{0NdZt}3=Fsw%D{eFGbOlpOiL1lC?$vjXy zRK1K~cBGNEGU%xu8MD8wmYgHZxX!aF0izC8eI%;Zz64yft5vvsJ_v>GoZ}41iS-L8 zl32U82d*5Ui*=n0F~Z5B$!)Y+t7C6nYU3JDuVc>(L%*)~bEolJPBZ@6tWHz_-;4WD zr7++53u>W5Tl1O~)sXrVhWaaM897eJk+q@brj*>3h@5?BF#N;!m&H zU1?9bq=E&Z>#VrZSF@wI1F0^_YxcO&=bW>|*OuJ_@@_-heLA$L>b#Gx5LV+q#?5Zx z40@QoAG7Vzz10-{*i7f|p2q^S$F<@{l^sWm4>w4k5QiUR7-eP|X6+4PbeqDz%NSX4 zw20b2$@)c0>qq6b$9XncPZtzjnra88+sH zOQn7C95#MKj6Dj#|L72koGKq8TO9b$VyVFJvwGDqZWkhM^t~~#0*v8a(p%Pk*;{7_ z7mwB>Pcc>Av|_iDf{3K`l|Q%c04{M4lXlgjM3(9{WVe_ixtp$GV2i;xdiATWeX9Pb zKwR{-bu)jcOFIJZ_c(==UD^wl%O2S17h)<=kI_zH+$I3?@<`wAA&T=fx`{*|m&?%W zCX1%M5!)?_iPj(bW`x*DCf$YEOX}fvNyZZe*AXiqhSUdXp}(-@1_ma(C5wo9=DTq; zo`xXA*~IIbM{)t*(A|r-WqV_MNzM6w+QyEF1l6*CT_nG_OIoFJA9We0SfKnEGaDyX z?^o@uwI4vHv(P=Yu3kpaJt9AGEsoz<;U(e-j)<>hJCA#~I7~B+dvHA{KS40OGuu6K zj8~{nKQ(SMmSS|6JARd@RN7p^+gBNgY=vlnK4cP&|Mm4lL&NWi; z4TgVc&bX~YL9!HD&PR72S(cq_jhy42Qq;ADm@S5ECQkY{^j?EJy&su(4Uv@g{) zM5k5~>M&Ef)-ab+G`1aRte_esa`JNs~xNhR@{*S!XDe47AM7FHDT zZ;K~2(FHd~;#Evq`?frbsB6vG!^w#As<6EzZAr*|tR)x^Xz|}O}2v14w7hlm3a(W?^-?% z$(lUE^E2753Sn()EOWIq;{hpI6&pOOa?!O;iH=k6q(x z!mwZ6#|)DpH%SihoqJRrgLY}0!{kKwNhS<;NhVlsqfcSYe1q0;tetzUMX%$md)0ql ztP%xMPuTTmo{Bs7Af_qZa$Q~_Ogi?82VA35$f8^$$eBM!+&`Fd3UfrJ7%{qznk8;r z8&PtvlSj|;&w%4DLP=Duj^@kRM-5(}T%-QA-_y(_%b9PcNd%w`S$nZyx_ zPrDD(o)9SZ9WNmj>ywU^-wG@$Q!}Jz@1vD(kf*Jd!gBvtf$ExlMt$yK_%>U_7FgJ@jSmxel z9>ZFI5(XKDu~2r`&`;c9M7k)y80*ecrS>Tk3qT(k%bLcmh1Uh}S0^E`Oc;5Lzo#lI;mXRjTRRXhaA;r~^`@%D(rfzKGP}m52u`>uB90H^SRDRMT#hLeZxN|eF z$-t$=OhaEF?@6YFPC2ztxL^gj#@L8j-xE^^m%9{K@WSk|R@neL?&{ zPx9T^1vGjzaak_@ZE=Fhkk2dJ^8)OAbW9fB3%(U26=fCWv-Gn(@-itL-r5TdTCP4IO6eVNKvOYh4H4aWw3`9P;fw z4w81Wd`P!L@!UwMa}zoz}~i`q6r&g zy%*{Ytd@#LvR;@MDD$!8%1#8ux)C*q=ly6O)LY`piXFq`1SaSQxPtHp=)FbCe{1hm z;CUU3W0*kKui^SeXwwXEz+0h^5T6U}qq_hwP^5KTg1#io&#Uq;;d~~`Q2}6i?;Ka! z%{h=`xb-NhM5Pmw6{E4G3=XU5=NqNs!^dK->_Kxh{YDibv?|ATP6*=luFx9r{3T;@IhfCrrCulF&0SgKBZo_p(FdEnV z{c7!v?dyovnBHejmj^lc3w<KuuzvKqgrXS$4 zi8BZg`ce~9MD(J%BW}o22z)QTKkRYw(v)QPL(>u}euStw-^RWx(k9*D_Zh&~=SbU$ zjhTve{FLI_8SZIC3FvR{|Gd6R;#iv^CuB3t(~5tf=N6}YDQ&@WB1}Wks(UMQO^7_U z#`8y4Zfr`lL+lW|*G(I9;((ZbV($3B;!XKnqej$RR11ELMLMsY>78(>Hg?uqLaABO z4CPq3bAzFF8||(F$|dV7IZy`a!=CzOF{w;MlGO?ON~7MQTj?QwLe!VnnAj_*i`Gf< zj9VD28U^Cg=D23%D^Nnetdb5&jS96$svhRlq9x-pU;teUl_K$zZ{xFw4MK6niTP#Koz_^ z3wY(3^Bfw?SXm_LwmU9v>{gkKMLaes$baTGtM*oUiE>w3g7&@tI;Af~zojzwgR?t$ zt>Uws52afHjug4Gtz7}b5iDcpJ;7JRHCNkN3qdOGMvWnO+t#hDPsvc$lvh;s<_a;( zTh~3~V4PTMpF_Hl*HpGJeLZm33~eWW>wG@fvX$xS6mpO2+^X^iQMPE$B%#`(>%tyM zL;1--a6+$rw?!NN3ThK7*nO5yhI0kJmOZe$Z_ihrtpS4owIQ6r`--`{;{sz z3hA3wI#Qz*@>~;JiIrDw)7|eWot!SwauJK?gvX3JN2;W_qCH8y$>?+Z+sfO)mX)L-SWLfbce zYTz$dU(okmBT!@dX7T2?R;ZuAumGmL>bnq{)5vOi2BA$RWP~?$&j|3u($@EROcV*p zKK=0y!5@04<*k4@ryd))mG!w9dTn+K)87Mf4b^tiPY3qg^vvnn#dh$|8~B=?t&Kk& z_yb}K+SUey3+kH3UuQu{(|xyF^TQ+8iTUP^!mk-}IZ9pWR?Glf6~%Aouq9iCpQStV zLad!dmHb1X?Pv8&VS2?G)ljHO^FG_xi-n4zz(Bw<(D)XRn+?qcaS+7GW*11MnS5o~ zy67-*-s`{5xZyd-c6fi}YCfo2cxYdEe#_F#Ydjn**9^YHhNjC$82Ul_Gk^Tq6BPzF zMR?kA75Gr;`jNm%?p1`I{SH&BE6I#Flu46q}oagNMy=~TR-eEzXf94 zl7zEA@&*al+GD#}ibkMB7@`d@N5NPQkOP#TkOFG#S0ZZao&ky#@7VJFRLoTH>OLDM zM_jWP1_N4;56BvvKugb6o6Q?{lCF!l!WQ`CmscYF1Gmg*i;iwg&l3HuJ1Tvju=Gw4 zw{=ft%lO^J^_xv+_uxx#$A{X-v`C=8-r@C92>GpisP0YOLvayPhvHuEq+!YHk)U=iUH(>PB7UdYi{teEX z=icg$e8P(gyTj_$rWSjDmkpPtRs5fTo(e95-nQ;fKRs4`2AwU#Xd`_yYzD0@?PwxB zXPqjOa;M}S>3sMHVJ|Y5o&GEaht8nLBGdmFqh57PnMLP(sjDO{iRx$<*WvxWjQs8Lla%fp)AgZQa<`U-{F%8X4(=Sqe}^XmuiiGd z1OdHbg_pY{95NBc*2#>6o;#ySq(#jDyXGctg$=?&BV=b>eG|Lp>ir@e_lb`G z^Mx`<@3>p$&7~CNbnKJ)tT5+e_?G_TEj{<$!MsamoXT9l1i`ZAom~@zTj7IH=#GTe zkX0ktPV*W9PL+OeGIkTrZTFSx{O$yy?9J#%%~@G*o9O8L@U+fDOX7jMY%$DDrKPvH zKC^wKmFeYP^k8FmZEu2qzEyAHMOn46GP>(<$Xddg`+DyBy-bz$A+g#qYEy9c0qbpJ z=V3?hh^PC#CsE*3+N$byYkPS+HDU&V!Xu+96($oS{MmZU41Nax6< zLKAnseN?T=JN1OmU<2=sXNCRE)zR^p=Dj-lzTVttg3&H0(F3U+@HQht*;fO7-L;BM zV8!3f%(oYzcS`fXPV$i&kR>s*uuvUikzuDCuwNigM&9Ye!o#t?QIlU8sdm2xQgQwRS$5u6TAkpN zrZk8`yg%Qn8(`Su??rE{Lw{!z@Sg-aJbJ2HT&VxE@OIuBbgC-5QXP1@ugV)~E$2Ha zY4o@EK8N|+GH*maY$4-^^IIh1yKw{dnA|d!}o;k{zLFc;T_n1Y4Hl#-M`)f^#F8fL3-E$;|mGC^q}uG z{t)WuyF@`A=ipv!lgQ~#ivQg?%o#b$P&5aHM7 zF97Yr26sY=aP#K5X&3VnkDdjaQApKl*;{w@KUCysE2^dPQD+qc&F8-Jm@5UtAJ-Bz z4I;+ajKiOPaa$Z}7vEsNu8MVu?6A21Lw^c$wHVSZ{>l!lHv7}Hg!HoW37@)%tk;5F z0{E8OzR!h!(T>LQaR|G4&n>GrFIPDU9vIlRb#+ghTR-x$8WLhYzFQA&9z4E?7>t7p z2!_CHQv7(kg&lHwme2KQ<14zr*H78> zS|FN%9Iw3*9pK%7YFjMt$jzzjc8ID69e z6t>Oy{Eb~PM{7S%<|EKuRjcgYUaF})!=L=Qrv$+)_@2pu&}7dhq|vHp0Of2~v~nH) zH67{CQX(86x|HDa!+eF2xlCqOe!_ka8@%G=^Ht{yEp&xd*ukL}@kl8BIO)Vv&3MHd zXMc%1%{mV-E@8C)cqpkl4q{ezG72fjboD8Jfb07{f3`qQm{7c2{YIHw=J7_kPs^ea z+B%X@<<4bU@(fORr2{9ea;`K8T6T$kdaHI_JLne5Py^B{bngi?KUoznVtFn`VO^o} zxaTA#=LqrxQ7uDKDKe$W~j?HG5&cwSqC9Z zxn<-0U3ZanLaCIP6T6wJA#RysB(AIk!*Prde^cN65*V)`q;v0BNz<=?Tx)}bDS`YX z6S#H@A@$-J>&4|mM${IGaM8cTHZ0j@m!#f$Fi(+|92snW}>PW3HZ*Qjwm2moG~se=~a~SlT}pkqb&w` zp7{WIUa=Q}CEtg4BBH550A&OR0Ou057oZg;QV3q z&AXK;@kRNcFtak+3tSn!5*Zte?lrVyz*toNk1%scj>u-0BV`GWZ;HB^0Gp;N6c0Sb zlB$+;pDhL%ef1g;L@k%$H$kj#n{zm*v6_4wRb4DNPD9tLK6Dpbq-)ckz6+~LW|hk@ z1InVAq;bsxQYk0c?21DFClc(fa!&H+PV~dVZ%h#GG*dFZ`V(woM7hFb7`Sd%d>FV- zSDwearcBF|v2PhxJ~o}+t2ehjs!M^e63RcYXz^RmssRST`I`>+ERD&VV>k1$+yVT5 z0XF6QSlq}t(*kiacHGiw$w+bY|B1vBjO$SH|AJ|3wemnI9Fw1~DmVtDJ2+oEgoBdB z+hB zuQCWHkFN07r6-S|o}Cgm#*fxB{sE!t@?To)qh(s^oe{R&y7IG3yG?UZ6=GmXFUUF+(O#6qDK)GGUnBT@j$KLaSleV1IY_x#{H`=` z`)=-R`;O;j^`o?PULu}GdQJWIx^f!K@VD7Yf5c3)W$cQ)xg^Mlz5ssqM8X1+dJoXnHs}+Ekw*F9L#8VbTN>|Rh&0|%;D!UK2t+io_-`i{X>R7PEWy{wSDf8ZzVpp+7b_)d{3B)t*Gya7%f&MGX$M%53EW} zFM7pa)c0BJ7^~Evz&rN8OKb|?)sO!pl+ViPEHA zUYkrOS8|a?$$5GHSqcBQv%9$sbiHpo>bT8iwl7Xn72)x0aN zjS`7&w&esGu5%NSc1qKG?FnkMp(+-@%ANk0ci#FfU*bo%xy_B|s}nX}mn2G5@|o`` z(M4;!&5isICAL5LV1x*lroSb?`Ea+UKSe`2N>+#%5}=;MXa)m#$QMXz5ksB+v*flm zvZGGeeBF{bSjlI-XGa&UFC(~>^8&Ue0V#~I!JZ%8h4fF67(2mH0Mi{jRn zPzSG!)uC3UiAa0v1c&_B$x9H*dH-6t%HqqpxI7bdI2RY7hXfJP&NjrK3?E;EO2M`^ zJN!*Iq7~bLW#4eDKP8+6%RzU{j%BbP|G!Ic_hzV*#ar1%3Y8cm?adNo@^$|UDeNN< zquEy;D@>7LNj7Bc*F8Q^7EFF_otrH5X1sGK&6aulHzYxKres)M+JyG2ZrzmKUM{Qw zZpE!{N3CWVZ98)W#phg3gQdLk>z|$A<`@*4zx{W~Kq;$yrrmLbib)2~&KyqhgsWrC z@MX|PIghS9Xe>Q3w)3YSnfCCZ!_>a8X||rMD_=$P*d|!Qd&vQN0iHy0Lv8-yykb}q znsEP&29)--vtQKM78S+AY}0TgoMze>LV4i~)DMA0njN zyVi~%SQ{^Wj!`@wWKNa%Q;(Y>tjlMtP8?Szcq%#4ZWcsam(2OFG+Y_GS30ZUjA%W^ zro}qBe>r72F65#!D!N`^B1)Zsy?I~6RNBmfwGInQ;zXeyo}0hWbYzNyIAHq zi=GGGv(Lk%73jpI8GKG!jlW9Uyi^SpF6Pggt%|R5yhmQz1T+;=r3=W*=3fwdL5<$n z_)ZEwMA(LB&IoU=6G9&GNG=MsA^J z8m^KGnYp>BTx3Zp3OjpVk@(yk^g7g$faHTv8`6wd%(=fKQNauGza@RwqnQl1gJJA9 zZowCX_BW1fwmbG2*tje}1_P(8BP_A9D@z1Cx~sXg zeJ+XdI5D8}Az2}L>!+?i+eAk+uGg)aXd|AWo^(6{0of{({P%+6C z#N{AwI_Zh`OLV_op9y+-?BP4*Pwzvgszti*NSS6q7_1Itj>f-yH*M}Smy=t^4+0+x zE!NPQeiGWg1z!c%aNkJq0oyUNiEe0%WrHtxYGKR0&czm9*j?xC$hPOle#r)EoR|bn zKWEm$@ewR|@KkJWJ{$zyyL&vl+szx`QJ&bqr=QsLoVD;qh!;d98*GkyF8k)CSQf<4;FXSiT5so_Z zZ3>W}H#MGKijWY9idNRke45hQ$=-Tw#hwc;_pCrX<$`bhmnXM4+iQ+0u6ef?$v^Kj zSdVDR0j!#Y@8?m~S4Ne*`)faz)CP6!I_a!uWCl+Pkz7eVV~i96 z+c9HCIEck;+c%iS?4h;un`+@6C(1l&6S=TgVUr4``OeHs|6jwXt$b)ap6MSM>&Pe& z7K8ZKdGuTwPpYCOY!@M-vWkl#N9M1SnHSd&cpNfNb!ckmF(9c?s99Pot0`$%QjZbB z^=ic^oC*x*Rl-xZGj18z{u(3dwpDWX!AIsO+U!5@w}pN<%zBU6RGv;6^bgiGL3NizVJSIXq#aFe|UQfptzcK-}_B~kl^kTAh>(* z;O_1g+}$C#4G@9^cXxLQ?hb>yGr*vO510M!v(MS5uGFnt^?g;p?)mrAYpmC*wO03f zx*uS8^hy5PB-4hPmM(+^%ZrC^88#vlsE9XPe{nYhHwl1?n_;2%aOX(T`MlUXxde+Y zk5Y4o^8ciYw0v%J{@CH+csTbFa|Wq4CQGcJTP0Jp$egjMplKKgO* z7T2~o!-bK=uhnH`>A)`%{d5lfAR4aFXcbeuJOB6Tq6y3M#?E(XYn{CJi01RrrHe0+ z4;x5|1<-9ytx)=(B3AzDWldHanyIRd#s+PZPjdGPURicvH~7;C{1p6H%}VNWV1hbm z1PHX5^$C%ktAB2yhQxLy+F!IWL)fn&L+`ffk74}$sWduUh8StLigfdJ+yuAX!ZzO{Wf^s*L@t7Fh}hjpwgmb;cL9w zouXeL$|RqSf0itWD8Eg4eS4Q$G#$@_iyN}&HUi?^v*6uxQ)e?w8rP(}X1{*%$A)4q zZV$DPtx&p9*!R94+T3Z^bxNN?bZQ5!ZC_V+Y+!dKJRqMB3T&s1cW$ecFDIqK`}u$D z9?E0U)tqSR)h~U2SpMe$0>7xWUp4+rtCyr5BuRXi0w(cT+w2HPPOa=uvLH}x=H03({3bX8e`(~7U9>u>Yf)xXx4Rs4 zc98NPqni6moiAOe1Zx3?o2fNLGaZlgqTwjnh5>dBvI5m0Fa0UdBghHH{$_PzTP0Iy zvmnh*WrC~JJpKK(eEIUNN6Td9PS+SG*!{)Fw+pws&u=@gq$jauI`Q7r%gn#6BTL8z z1L@=*j`wNP6@sDvR8!@*k(Pe6MD2F^qv~`%#;)z+lm2~O)l(oh# zu)cFNlb1GbLmAXZ{k2%_Rb|2Okr_Ky`I1@OB3%1AuB}5-e8nc;GDcSH>zdLl&a8`8Is68&&Nz7txzPFK`xVL@avnceJFvXtlmDZD zSu+QF%JGhmw!LQ_#Bs|j-==_BxrhZ~A9$3jHBhZs$Fkw>@s_XAlzXH64Z*q6Dc2MB z%tyIm_qoz_S8MJ}1Jt0Z zDeX<5GNRkO2=OgP_Th0Fqwfo`q3-=kp6-<|@fOG=i|OFlgjjhVPTXL8cYvQ*X>W)h z+fZ#EQT;(gz3V6LwVIc567JsxB~FLjMH5~|jR{@L#uN>h1_Gr8-6PJNU(M6Sp@NJz zR=HC@tT*z~nx{)bB^YnCE4-u2JaR|Z$q#XO1TpQ^YE{`hdy6Mb6nlfMUF;gd$GM&o z6P9bn!_2s-%H3T)GuOMPHb&dmAFYTlGPOAKIljd>aTBwWR`TQy(m2AQjqg_5$)446S*?N+rh1qZ4& zcOQ4%`#K>Ftf9@L^~o6MMau~qxdh09q!E05DK-_1d~?S~73u|8yWd@NZ1_RHyQdv` z8=)r=i_iyOLjwo$>p}v~66AoS1&i<3oV*ItPK?Bhv$Z$Aoyq=~CimrdS=fE`CZ5U3 zuP?1WI1t~}YmeaFyTxdD@6NePkB6$8aecDG8B5K}m}o0KIl_Skr&U8ou1J3^z4ZP@ zX`=N0T4|*8HJ`Q4p*MG2KkhcL#NGOlKjFGVzK@-Aw$t4b-Wm8LZVbwkTJ5=-c$Rlf z?m?GWY;E^o81-zK{=0g(tMrE18_MJNy`xv3)MIgI{r2*8e~(r^?ZZ$~(RiSNEatAl z5ERs(VqhT7y$Wx8X{$f5IM)Za3UwQjJ%28|>c$fgS+gO2u&8SEVLC#8O;Gc`x5F%G zV7pN{Q84%oTwg44qi#G;oJy$j?ks0KFlq19MTG+wY-MKH7Vl*8=4ndaI5+6~E1pCd z@Kh!3>hM(UTek~n#l7*OG1V;>Yrxd(()v<4GP6bS6wbE~E$CQuZtdh3Fjp3ky|sO) zMvtVeqvWSJK_9gc=O^Ep-E@>F$t-l&NTeVq%5_d2)uyv3%_#hbK(JW13hC88F)vLo zv`=o+)=x@l)2;L`lMJQY9-%SAUb8N4pp;pSVYsteEAA=jDOR3h5r0ZheuI8LG0AyU zhNtn&A;ilknf2*D51ZMca7_2OsKQkh0unFR-yrF zOR^gsY-{w5mh!hcBWX1M4RNb(1<}BR54P6#OWD?~ zg;KMuei31q;3M(TDUcG|IrMrSM51_PZaPMx*dL#2(ELj8+Rb<^ovN5ze}1tNsXlCi{GGa9I#qtu zmThHd-JrlhWOLz0{{=$rau>T!DfzaJqVxG%pV8_${>t##xk28tJ*{aZz(>UJ%*ElM z`o})C+e(%D9g&@8_luQ4*-tNwv+Sxnd-Iktd#m}sQIm^{I##x(beHRMmn-b9ikY^` z&zzxw-`M&iO5{20vWGMkUl>5Fy>g4RdW#edQf^N-{x4FE+f$7%`K_thZE@M@iX_iy z!u|uQuzc&8=b23IcGDSR?)Ft258Y#X`urCtypzRyV(}#CnXX%Td0BaRABo1hWn65f ze)Itrg~23oHLQ2<;IVpO-@PLv+lmdU1AmSt7gG}6DY3N&?2IO(a3J^8h+@)3oqqD` zE>1r>ipXbwWf=#zp05z^UzPP#szO3LVTNjKAzATgJh`6)_z9iG` zuo=+H$YUMm;K^)tq?K^0q&E&u!>09`Ir8Yeo5rUj)c zoXZiW;G6C}``X8VK(%}NHCXOGMo0R!56z-{mZ-!z6gQYjjdAngD?=v&XP8>3uRj!x zgLR7iPz?Y4zDUxs7}rbyz}292ZF^DJz8|#i|5h5=_4X*Fx+Cpa)a*%VSx~L1noTXR zOeu%z0b#@isY^9sd~h0JcAkNq_`{!k7Bxne@ORht3QFk{6V{p1{id8spo*|*sZI;d zu(w~ERUKYsU9^DgT@ndI(t?HK2M2ckNz;~`e@~Af-oe*%P0Kk0Trk%SdMC*8uFx_c z%TDMk+F!UqKVi*s7!rf8l-^aTgHD0d2~XiIf<11Z7Hw~{{8a29s=vG6Ki>N*8`xoC z{{^C^6J}Ej3!DB(Ao{k_)xZAv+Plmxg%-r~z^9zJw6V#|zagd!EBQU3g!dt?&v963 z`Sifn-G!^2Cw)D`4xmmL<8(pZ0lRDbUM@tM1;6NzTFQqxW5Io#KO#vJWmC6ynuLDs zpGmty-_N2t3&4hU2z}U$cQ>%(kgxfJit>+EA|?3i+K3JHG}gABa#2ICde@tM4OJkA z`X-2j^LHlue-f3+KKi|bYS#?jY&CB1P(n^6ec*e_dsSlsx$uD+*t%^uEd;|}%?nOn z7(!!A+c3r&Y-ci9FSIZ}Yf3KTzu88-gvt1k;E15mr`<@zSNor=eXZakl>Ce!nTr;o zfP@;g#dl8<3xgi*^nPdGBxl}va#aRV*T@ONIN3I!-Td`X1O7JDjeBu&I*EThCtSx~ zp!XUL58i4_xEOP$g*BTEVc*|fc>?r?>GsMXi zT%a;d2>EE%=gfz#HO9p%v@jZKU!x#6uKS+AzgjNiQbN91GX5HHo0c@I8)9GHO+CHL zJ-vkfJ0`H4-%DH}Uc68InSNQ5f{M6`(QG^To;X>-tQY(pf0-H+b$l!rldmO!lp<+x z1Y=Jai5J)9Emv4_(WgyBnb+qn?;r^Xme*-5FlcZ_t52Ay*EcNFDW=1?7?X^Lza8-g zfttqX9Ur2NKYi~5)nFy}5MEGqz>OKhbs*D|!EY#gdAle}IFS9iP1bw!rcNr|EF+n7 z2v^|0Q&l{9QBQ+@EGXsRSUsytCkV1{*N5!!iuuuomoYlmUyLy3<&BHq9-t>VV8*T$ z*X{c^48NO^BH8n%fi8d3i;KD%L{px`L+IOb!b73>48vyp<$Dhe9`dz1FNB=fC*Ix` z+YF%hQJBOw_zW$K7%ecf2JtHJ;d`@1?P`4uXKI%-U7UV)V2tz~F*XW>67%U-q@VO1 zcC>R5EF@W*gzlTZWyS0Sm7eY&b#nalU-kxlercoq{@+c6*lI~rm*R5&2D%gnrScgW z{|oYqM|MB89`iLwaHG zwe~fGFx+L=(Vl2Brmxk>An20i9o73?H>y6Y6ofe=-tix)EQscPL@9{Oef&oL8UL|k z<7qI5L*jqJPFd@{ljgRJZ!R!{63c8{TdLNPV z7dg$3j|iXtC#lti@CHpFjm+mLjEFqgy#IwXp?vx$QQ=9{kHP_-u(O~4KHH*w=rQt? zyY0(E>Hl}qicI8HYl}*?=F}>z=z#M*X}nsh@HQt#^28OT!JKY2MJRt8@7MMJXUL&< zv9_MX$-Q*S*kiibub}M!f#>S**V~J366j*ztM|Pj>VGo=z0v;W=T|WXAH3LaSg|5x zL@@txo+-!If(-mOZqc>=4a-NCG(kWSYW{KG{||D7m|PnK>(Q(4$UUw>#;^QQ5vvz7 z^g`_aT}Eh;hijQI*7oa|L^k$g*hEqSF=Y49{tZh*M9HTQT)kA=TLXF>Dbcttx4_GQ z{Lx!xJqP9dOLO82b7G8*igdd9r*A?Xt5Y3mSyc#^wsse`FaOLV2dIaK=F^>H9>x4W z|Job!?0%bZt{XAuB5u~9hs8|2@4oYdm^B$*T>kqWFhX-IdC4yt68+od(wWI%a$v00 zlLwdiaA#kyBjaC2acN2AS>;4mEK1`4t&G7RWjF6-NI`ZnV(LJ;HtLcYa4Fj1QH^YO zj`*94JQRcZo@icAWq9V3Epg(Gl3-l7e=}wt=dug97KKmVOsA6Y&WT!hAIYKqyD`g2 z1)HFFiz6ej-Vr4#7xpWr=K=cPba@`2WqF}p8m8Pc%RS>#K@l-uLnB{%#Xh0ug?v;N zNu(hQf>*pf(trM7pD^~s|? zqKk(_kc)_+_guGo`u`p#7x|9PYV-3BqeNE6FN^T6reqq5AT}v+{1$VUje2-xc{5qq)rgqB@e8vo@c~ecl%-?2pm{D(4I;Co9?; z*zq#_gc8yrN*H7=20NIkzb3FR6n0eKUKLyIHF!z@KSy*h?!qSK$ngizMVvyEh{&)c zBecGT{{v6P+6x^lsR&O`#!JUow@k-~NS6pi{+;)6Eq^GOE+P=!b5(FKaXtItNUY7! ztj&Hd=ws{LiK21_-T$edZ8?Iu}kij(y%%LzYmilFA-`4s2bOObV}$z7;Od0tUmk=Ync_RwH2(j6`J)w#RwS?6%!O5dE3|jXl(Mr zQtO|hB_vltO7(v-E9;Y~X?|S57I6zz`oEIHUB3kN;TFq?+Bt6u&Nuk)0^zsGW*?pX z@cwbOR1;UWB*W7A7>ZMuoih$Q`}-%iZe^q4p+9YgnsGPU2u;h+?AizV5~2Xg?XMoC zev|8tsIJPyO76rN=WD2%p_3nnLr9Z064rzeO+%u0T$Fv8qNcI8%(PYuJIpj9>(-F|9dvhpLTtj29xE5R9=xQUc9uvhOitJ0wqO@ovnBy)@4EGy7rn+FeOst=I`XRs%lC5uO|uqk4}I=ZMlGYuBzKW|B4bN|`ZpcDDmjf#8; zT;sM%35;7V$$a>SX|!{t2Oa7b%=4gjGDL|UOV9UNA;nu(o*%M84~>xcQ85=#>@eiQ zXj!m}dR$X--H#>~5IQLfzER(HL4%@>Ow~j5H&WGUzh@`a3~AJ0gr3&Mp4MV`(Yb47 z9f)NpC-W2XF-{G*kD8c`{#qjQGAJCzJC;%B^sVg+E4AlfFvE90ZP<=G;~+942wWoT zQ>T^qdGqB8S-BjL%YsA_zRmKU?K7r?2o4}<<|1mvsskZ#2cCqgLGQRHb#sU!KhoJ_a6nr6paL)VVcUYC741!qc-)pf9ykv z_@l4{_h21btyRfJSrZ_itz(*esPpdRZ870@nx&ZsoO*_84+pL(Bka2zhPzppDS)Bx z{8YQFzZ06?Ylxt9z?VxI{^L8QJrcUo4V_=HeJ5VHYtXrRxR$EiQ_wevgYqu~h{8#? zU^7DI=r(17*pzTag|Qp^$`BTEc;|m4TNk?_BdO3=^jyI5e@ZYCWKh$sFw;{k$95M@ za^P0yJEL#mI$U|j>l~21VtjYtN9u`Y@Y#>*xZa3sJkU(3!u>vU^zQfW=);vu)$>A5 z*vj#Z270kjLC;@!A4Pv$pF3_4Pqz1Yd3*(<9IktP8tj8j`FOLKilk~pCl`Fo0`qTI z!yl}?h(=@`@2h*s8_6BhqhRgB-KR--ze%>h@Cp}s3;o@R2ZT{?NqZt>g;{KAd!h{f zxxRQ02>q`P

L0Q7=ad3cC^2xC*#kej(~hx3|zp;16#@y!$9@EojL&rl!$v*?x}r z+=B=?yieLJ2$N+WFI__Jc|@Lx$oyl`gnas^NG|IopHI>Mzn23-p|Vc^e|x3S{)&Gi zmpxL{0>k6rpPWR`Z}-2FdTqb*uH1j+F|batpyhD$$Qz=5e%A)&8eiY-2O=LH3)h;-A(eiDEjJsWQh6#$_)^>sSf|& z8@(IRI&~rpZr?`OAxbCyK~I#Iit?%?-gBZ3IFIzsS#2e$mgA|Gqk0@>Rq2H&?fFIx zyhIfx5LkL)w)voyi!aC3Wdb5Yu)g7iaOr_i>w$3TF0=E*98|K3*^oFzgEES$J#yci>RYiPzD{CxDw>k8aAQN< zipOY*8g;c|uB^FAp2p5DkP_ZKT(x&X)iy#BIh6vUnwuZGRHXdXH*dhB-|&mQpR-}N zcJ@}8%>jQgiarLI$5X!`KVKjwzhX6Fje_22b*$C#A|~523+7z@+^+iQe|5L>_}3!Y zY0ZQ+JV7g(pD$N!K+~sm6_u1r2N5z4HlBo$W9nkbSn_xVh_U#L(#~IPRxFGIi>@i7 z;vTqlCuTJ*x)`^iT0Q(S$`*R)Pow1+Cij1{hhg%cY+|gOR-g>M4q$R}Z5L=ou3XK3 zQhva-b00Fl4RRgif~D+|_EQ8$;e`*ymW!=Y)85jaeXzV62R9Mstao05&2yWs8bJAj z3G-S~G1Mh9igug7WAj2>fT{EMhj8waBX>bP9r)yBDzl1l@-kU@-#<|X-%#RYbIRbj z&_N>8|1qLyvAq2Ea3Vi2mpxt(~H?@gyACVF0up6OcU39=h z-oG?NN%8gEV);b}KuysF_{LJEX6vZ{PYeP!y)EUe=Pd&@_|<({s3T{2wkC^}h5git zONx=!|IF?&TgPS(gCjGW)v@Qm?1^jHG}tn}%)**0ph1Q6o<{1$T&;o5JQSSjkA%BM z_~j<}W5!@0RhKe#mp{6*v`Um#_~e?KbVkg5@iyhy(2{!GE*NDN+}GeK<`wy(@vZeG z(WPX)$UKu9U0MA`c{BR$M$r?n@HQXNz0gp=h=z|n%ZIdm?=!n!gdj0l7eHl9`T0Ac z^0KCeADBMG^u&i9dDi$ZeuTGU{FW-opIRv;jbffCK9034cj>vaVpq_8#$Q3nq+w~h zZ+r&*){PveTJAOK*pT&wo{8A+*!ZXP2)`JQ@Wb-sog>L+$n zodU(4>o$4R{MUPMen$)DWqP8K1R=%K{c~U_UMAY?Eo!7~avc77$ix-vSZ0m3Y|3%c z_zf@Q_ygp)@^sGKxiqamrK?wzqh*@#S)+qHTp4pQ3V1pLlpw_BI*3*?eaC=R+EtgA zS^dL?Y|->0Q;L&NlwJz+Y}^&^uQ-v80Ip76^)`o_S-*U0Ae~^aN>YPn~)orqC0i-G|lCL#$1g#I&-Pr_Eyh$D*FsJd`~UnN?KmW`m4u!NJb zNjJ+RE1Fh6$nI-37lEK#TFV6hU5R?QH9muza@2o(m9fBl`KV-03(MrZC)~7BaQXf49it4g2HGT;{1lZz3o{+j@T(xTXMf8+2gjv z4e4XmDU{)yU;ya&5(n%qWg8+Uo;F9}*@A4q;n&B59s@Fj+FF-xSR4u>h zB(XHmyd~r*t)qOPowuWW$i;7ib4y?JInemaoQT^7@IWV<(gq=J@a!E$YT~Gp_n$(Osk}`boNRoo@8@TyiR@{75dF({9Sx}gv> zwK$R#e?-qAwKuT`UFb2h23s77ia(-7R7d@ctktB?JVfk_o63>{Jc&f+%^HC6#sk`^ zzc!Lw-!VT0&27lRzRqZ-XS#V5t7cl zns?V=7*YW*yI0shZK6FZ-pTitasMhdk*q1`toh1WV|}*Ys)ep`R$cc6XW;Rsr)*)c ztdYyNmO^XZKr5d>s}T|=q!FV2RE{1YUMuHUW8GM5T^SQqt2#csKQ3izUQnwXdRTVC zX8tA8pwUQ!fQ9}xCaqwR+|cey55di|O18^;uUxU~!=BA>)g@+YYrh^X8?;jpBn~%k zFP>;Gpf->_jgXxO8U51r<}Lsf)hoWm;C|~bI<0KBK&YI-6$hg}5#s0S<;o;3n44;g z_nUi<9P#u}VyRWkQX#?0`(tV9U$5Vuq$;J>2D;bnnLW%Relg>UG*D65kweEFT%6il zWE;@#B7CGfQ5iitJ+(S$>@jx-R4z-j7zMYEDg3D;nFqL7wL=~?Rm%Pxx zc%`B(xk~jG!bV4nD>0@qqC?K0YJbeUE)`N~OIAIP@LhIUW|dxd4AcNq7tyL+M`+6l+>&)K6Z?Rw{CvS zj0D&giFNSk4$m~E8}t%$7lV3pJjgvr^*d}X%lcr@4&m34Be0XSh2u-EV=4?(k=2-@$pz|!O+4<8K zr-qN0WElH9BoFf>*SNO!*Hx@ImoEIVrhPTmw`q)T)zB@kMA8kYwNK`&Sgk;DZeAEU z`EsZ2q3bdB0u%rWT>V&BIH5(=>8up2n?A9)O~|^9;MBv>#s|`PZFu*3&rVk!R9npZ9wfwV;v0vhnv%6`0HZ)}5Mq9Okw zP)vCxCXgq;Q{9Ai*S#FZbnK51`p)5y0(920)lemW@;@Fe&oNU?rycgOi*DQ+bgFm#H<*N10rv8+9~ z3&Dh_4m$b(UV`GS;_3}KB$hExKMi6+zWdZ&76`0onev-4*Ve}zo|!?|OetcO-B`m}vTZ7D zFv%ZOMCexD3mAYMZLgL)jBXD`%Z@_QBcAv?4lcn0}cRM)6ZvAeJ^FaQb*CW%b z{VDe;cX{$<>1E9p?0;s@$=6QC{oUQ8xz+gEpL@sO2I73zvYCq7D%4^zZhX$u+)s2o z7}QHdJRjWlpf>$6?O;xr504`oF&UXEo%usKYc;Bi}+g%%-9F_i~TJT7qg3nEsX2V zF9Nh9dg*F{rF9SDDz^o)tfOCN;&dt@rNMcvyyGvE z(FYNwJk~tj;?@j`9wqjP4~gx4g<#Zjn%8yJN7ob0{auIr6WH!oUkm+$Oj4s0FoYWu z_(E>R&G=?x0@9n{SWle^3?Ka$^BD?Osgb9a+iD2@8N^JV@5+ zc|N6i%eP$25kO^+O&!09mzZ+L3A$>)IKg-{VmQ3!a+TSgl0s0No(5J8%M6sSTLe*Q zPY^iOkhjk*uJ3n`TIrwFk5eAyh)v-Ain{VhD@1)E{@N$S9fq%yY9Dvh$bMmX?8>V0 zpFj_WIv=j}mD8Mq;z=#eJ<3R@pUkAJN9dmC+j!Hoa`S}_pl^P9ZXxUkFJE^ry8%>H z?fzMZcaQpM%vaIH!rh*a&v6SqiqT@w+03G*djF;sC?8`sba~mH40^-Yx~a5$iCHQ5 z)aUnT_7Hrzj=ovn?VWH3FT>7utG9E$9w0Dv;t-I-?<3ZkP@~(v3xZ@rOBOMKH*N>F zi$W06vRtW31+5987G@@uzDuF5awe6w`QEb1SM_7;$XEL~-PqTMDfNJymtb7qR629g zmkZM7E$C`9^7f<{y^wc@0It96=3<*v>fkeQr}>iEyS3$}?YI}*O}n|1w=Zk)Ti+Q2 zKY2d{xYfNtC=lbpeWr@RVt`LTB5X;|0?nDD|5CoIY8)yGTE4!yKnLn#bg%MY! z^Dce=)eZMiNzqR_p!DYD&>H6Y)$Dk#`s}Xt?c)nw)n^R)layqvR(Knu2sWN4Aa{oR z0)tTlXZa@uyd{EVrdH<dko070K4$!0h;|^Jpv#uK>rQS zGmq+9{mnz-uEUzE;2$PaX}A*z*G}loc@S42WbqSLg2AcrX+*qMc1yvfZE_tEx4jAx zl1}0B5*Y-) zj5j%t8h(2Y2t)^Uw>cMuaIZed=2pWX(+4QG^t0 zy-iEg9{5^RPe)d?YW3y?I;m9&!{=C89{l;E?&90?w7M!(l^_mKQI3{U?+Mm-ILb;U z;nEzy0w)crT0DQ-yKmp;q5Y|64_pDY6ax z5CJfb$VRLEE3gLQL}Yw+__de!k5h<>bZ$Y!RYDYHlG1iTr*t=6S5b|Q1b~u4!nhwG zBF37Q)tlm3e#rCcJHHp6Vl*oV8l#Mztc*Qa;tdb=I5v%%ruL)- z)TtXSWY7FI(zLOpI9Q?rwAMAG`u77iBQ<#*BOyv&E76*8kD(|^;=B>)spJ%Fk`e`s z6e?p8^LX*i%5rW4E%R(ls;o?lN=}U$m4zIQ3U)ad*7++^_|}+%Hq!Xj%WMYh-#c&; zF4HJ?Ipy)qXeK_q6uSa+;G$!qj87k>c87}P_(GgAn0CJs?%orWeQzGJtOCrey*mc6 z7SWHTO!lN0h$hzqAJP)9W9n1+Eu!l)$tO`KndtVPPfv@!Tg268%kaUoJkpOIG1KP@ zNlFC70-!|rFFbfJ7OHKVYYqzN-JzR7MBS7J!O;(ng8Y@w8W(z7hTpmZas$ZVpyDai z`>cv9Ii~|DjAW^d=z=gJ%H6~Vau^5W3u?gbK?lVVXd|>yZKfNgo<`hUIcl67vHnOS zv26kz#_#dCzJ0uX_j&*B1}s9~`D{?q%5AO#%U;PWb9?>Nv$+Y5d%iO{7dJcC_7#Nl z3cE+mT@Qbetno+@Ldx2PIwlfq$9>BhdQrXrKqT|2O`lmWjQmwn4yjM@5935jFQK0I z)NdZ4Q&<5A&I)(Meb+%kFlL5VhP_TH$5TD=sUYTWLb#8JPI5R8(bMJJ&)hSs&Cec6 zhv9+}MX!-@tjE<5AxS&Kdr%3Wdt)=i^eU+v&d%<(yU-cx0p%hZ0l%E7^}P1HzGFPU z@W`8Lf7TyifQi> zLSM@QWqANpHQz9`ay~Ik!{S)JN8|{EOZ)R)-YAgj`&)4M5Mm4@vVjy46jKCk0Bu1a z?TgVd2z%$Rg{gq&uR)CSF-yQEuc=-`2Zf>=u4zbsOThUezI;&h^b|k ztf_ABtMp@hUL0As0|fH~1s9$Xo)Xfwqrr10Aj7VSzpHT&3Wd9^gT0Nq*47=LD)Lty z-!S)QuY=ot|4L+(0RylJ<;YKGJUaN)W_Dls0=!^pTZR@w3u}Hq*u^KXPNKWgGacdJ zxQ@MVbNhU@xwZM1PksQ!)#m9Y(&`8pc1#NQ<7_CV)Z!fY)0F-+1e5**j;K~3P3*Qe#Wi= z_`RzEyAEAA199ehg!N4yp)Mq#0P!)T?0@tzo7Q^m__)YBI_| zsrW5)xToC?gW8y5WKS zH*wz7g$P~!u-dbM3_d?M$^&x08;uH&y9_2KmJm16;#=;h{@Ri%+mhl%`+{vH@w34) z((==u@u^hYftbq{bLOAex{|&kt^~aj}x6BQqnzLU1MVbS1e` z3(Gwg2A}{Jcals+&1dwf*H=?^-Ej?nYQah&m9>FGuRXoZOW+H3_V%ygC-sMCap80m z%`bUVc0>U(43DkLmRzLX5{E};I2pO@>#^P8#qR8tXkJ&FLV!+G`dYYn$SSqJK~YVX zUH9vEf2{Ni3XQc;iuM`Xq4tY$gLo;9PO0Omc#!JOH3{WUnG9owh^p5-M!lu51$1!yA)ImbGgOQ>70DyZts%nfrN2c;CpUB1Nm**y1CFs_f;#C%hpfd0C)Y9 z*Chz<)Pw0;Nc9AowS$khb_H~J!G#KWK%vdDpGNe%rA8;{+(vz@hNAorjXNH<#Ixmj zE?~T|DK^>h@&4ODSN07CbTE~{dUVwWbw8;f99I=Cv_`unG6%m}+FSstpt!ffw79x4 z@Fd^y*2D)oKN%7bCMTHsbS`2a(7?49Sq>U$cm1zU8REm(XY_I1#o@QUAhPW5$ehf$ zm#(Y{@|xFZ+56nh(b=MqUSu~ z=C9p;+6o5{rX(_O%_MmY+!=mKQDLlH=BAao%8J^hsHa~SNCc;u3F*JfdVYkgx*dsD zYvR;k9T?Qv2ZSy{`dSWox3eTu&Qd0d@!De>UQnhqNSXp(!2+LNGbJH&mwJ*y=M%2{ zlkYEm7{XF|uI}K4e=I*soEVxL@KZh^+^6b+`cV9oJq$g33JhS${YC9wM4D7UVqdCI5DWXy7jWUm(w zH+!yfH1$CX`_5jqAdfV|9k8jzc=&v$?19g=zHoSzN}U3(L|3ox;+P+*IJl+6>xh^> zV8Q)JU9UCx_$mKRtZT;j&hQ0$H8-AtR2{IL9UGQ?1#?rsHOW&jlw68=`V!$|57IUck`pZHCrQUL)B-S5u;Q!`!Q2_1 zM;ofGskFs8C?uWbJ0&RoK1u^`Sp|FP^ z(RXs+S56k7A~6ZPPR;ZDLL`*0>dE-^+2XP`V|AjSYL7UeT?Go8b;Xu$c6NWJT^Hoy zJVdZ6ew4B6aOe-(N-TKPBZ~x zIE#Im@lD6Bsb^0F(8{++_EAaSosu&C{eI19p|8Cg`3nKNDhP1#uBd z>vn@Q;}ub%8WzHa_gpMYrydbYfvOsd0P1$Omb5Zsj&{(%7z zes91avzfSb5qsR3*91kPQ2Z0A#?^K5)dulf1OX@D^VAI4rM?V_cQ^XrV%&kKS6m^+ zR)H?2W&&{NumRA>G_*OF?;CfI-9$dqc5LLGemL=U2AQU#c&LG`KDPeAECAW`KoIK0 zcd!*-MK@GU1D(9vovGf*IHI~6{_=DA6gGS5lmAWW!!!36w$-g&n?dC9{VOjni9d96 z8yzFr6Drq=*Erd%zy0D!RqoQ>t~a+OjmOp>(ylSHzl5SMNzYEDjui0?_Gr~KhH827 z4Dqusj!Rdp__}v}UHD})>5;zLkK0Y^MC5tP{_ImDBeJdkEj$B}ts9GfB0%rOZ&psc z0;0Av=WOLuvGwfC^TGGlt!UZIAOY#J7=r16_eH{~^X&iuuRy64O-VOK!`}Bt8G1n5 zn=o4yLJc2%=$wJMN@Uwwz$}iPCl971SLsplQ*DV+e`MfgV+GZ++M;g|{%AF)c_k0b zbs?q|rr{!{6P>vQe-T~$JwLrup@{M}=GR@(3Hgnl$xO5`zISS<=)_zY(TOZKO zHIasS`WpHleqo6Oz=7HQ>9GAvv2VC6rK;(T!OUp>(UN}BbSTsnjS-YS64yDL=y1 zT~&Tuh+Qsi$i_Yxj_4(d?9H&ztfANHq8W$Fqr$ zyAx@>Z&p{JXMJ+bV0vRB=SBs#fe)AzXszL)ic{WB8@{lfICf$`YT?F>?TOhjLKo?_ zu0{uHE9C&XsZwe$$|qUqbE_}=3_Yo-n~8I~eV}hE4cjmW^DU%cudAtQxT@mT<)vHq zMcJ1jzE%0$KYVp_$a!D*0oGuv{3%EZO*uGd?{m>P>TIwp=hxwnYzsm`8J{JQKaGCa zpBJrYDIzf*;N(oiUYxJVaaYofFl=U$UR(`<}sIRJ6cb-Nt)@E~p z|5n)5_K=rH;Qf#}|NX5r^+6yw>%_34tEi~%F)eNI#KzkDLh0gX8afhkSlRC&;~q!O zgoVX<1`R{idTl>>Mx$HKMw%322Ic5r&ETXHp`J3M#z(e$hI^4pWFTz%LYR@wMZ(Hp znXNTyd0lB|;3w@9Tn#hTC+#G8Vk32~RRpsznPp|5Y5knYzKHtijnb`>JRhGLwb~F9 zaT#uz#qUpVZrF8WVOOFPr!u(*xd-ksN@LTl5Ubp4(aKYuTtv~zGu~YHT>4Y*+*Q%_ zv&U_=Z3K5y4UhWty0&yblJdso!+M?c^_Yc~Qy;C$36f!JJBVof7&-UK+(W5=5Bl2% z4wi=0tKfQt^q0sk9gilD8ijPf|H0Qg2UpfaeWNq6ZB8b(Cr&1|ot)U##I`lDZQHgv zv2C9i_dM@c?|1*Xb*s92tzPO~XIGu7-M#v^;+)qALP0&pc`cLaypaoa$-*h3PA)_F zFGxTDnv8?2ly&6i7P9av7mg0<4E@c3)vq?4rK&VZMDs4lz8MqzC>v_B3PB#T0`6aZxz@j(8e3KI zcBooT<=WO?wXN*d_&*%s=isLr*T7TGVHlQC0oCw;D`;QebHVYZ5Dj64W8vRt)<1wu z;|(F&@(SKo7z!QDxh^(>mumrQO@3?2@!3!p)sPoH=;wXfOWKR09_t>lc-q(CQ}>Wl zn6tdt%|uL_=rXae#k747srnx6`md`2rzP;k94Io3urfJPMgQ2?qa7Fj0DmT_vijW} zLL5%Oj;DMdXAA+QrFJLkdqeiU66k;U8l-^VPJFvn@AZM%U?gwVld|vOwbxS?<$-&s zAv@6%o$X?18ZE3imvz(^=KCb~=N~0DqUaNXhP8AIFv%K-}Q$oB9sg1r6 zTgk9H9>})H3Fp4`L*_Fnvm6#@aj#_x(Wc3cMaTb`ayn7N(~f(U4_M|WPMpKBA2|oJ)zH>5bBM`q zVsHdo-k%9M*S34fG{1$v!6rGVw^dNB>ReV+jC?G9L;)(f_4*rzb--t|Tm`)t6_*DH?Xrk`2LeunDSW*rx?b_6svg~<_E5Yr@cF(ySy$!v%&wn47r`}im+zWg=M8Bt?`9v+UFS-5{AA2xp zYV3OT8}3440EKi@|8|98ldLaf7b7s9iBfM%-~Ek)uK4k#?#hU0ve0Enm9cB`(>FOq zwoF%&5gG)}BZW$vn;cAkoFt5R*mXCm*H_{9j@d;ss`sx#ioDZOm;JfI^WWI5r5iid z7|&Ei_^bSrocOAiF8!j~dPOv^b0;^I_Hu=Ex;mgyb-M!D7rchV@z)(*UdXU6x?@+d zF8qP@X%pEt&j=5+x9z1))U|W?G8VAqi8~i~-i1Fq9<3|BxG>g{leVoMtj}~{Z=3kJ5})CoAc`Ocb_$m6`ds@&xFNeC=O*=S z=^wm=J|Q=PLH%Bx{8oyE(+e{W6!Rc0W4Qk|4uOml-DT9toOsB0`W?Enq^qQ?j$;Ww zvVgBI;~URhhKREUIMz!arozA#5K$Ogj=shC3|MW2I}=3-2k|3O}p7SpF8P6@x(U^1jGd5flYtd{G0 z-jVOy?%O0^v0F2v4eRUC1$86qf!&~N4P}aZoN{;~bW!dx`8sO*f3o$aYUHBd@q7^7*qz85v9K%~)0>n0T#ePGX;b%-{0g(S6EuZd4CUGhg7ArGc z&S*M_jT4>SbXp{>QdOEguvkzz5zHw7dJ@*XH&eealPWe1{U5ZG5E;rJIBrG$=LNz< zfic@VR(@b8bfNqz$Ak;*mpE5Ivv!0zx1+*}P}HR& zPlS`#qKd`~ofke;;ZUJdiMkUW5+>i7K`>FWrJ_v~EeWm{UX@5eND`st$|Ja-$E1No zZY7-GaXEo)C8YcRZNX{lbDBFYDN1x+n$IkE{Wb$O@!h;$T=SRxmvP-cy)cVd?*otf zZZMQ~%?22MAby6jJrm#geUZ#CqDHESh}NH2oBA*QsNU?Qi^FL6tx@}Xr2_abv7lyd zSpj!bx1|KbyQ(``>iqn{Sw+`%masIlTUSrMxm$2M>W}y)>I=`Q+~|U)t5~)+uu5XP zxI+ojyQ7=X{c3hIuB78Cri<^7JPoAjD*k5&aPuZmum5oYw55LTSyo7tdtFfPkxLYm z`E>pkfB&5ihaPDzwwF~!ASAs{aBf_J5AD(6L;e7_NW5Ui=mBT(zR(Bmv;?>?DxelC z`ilF3a7IuvVsP!omrZjH>j9=}awMhN3n)=={;2dxPCq|>P$?`qwHRg9kssMgd62GU z1x~04gK}PLU7NE5yY7LsDzc2T@VlCY*nQyL@bKO6Z|Oc+G-n(SEC=q&J)%hu>#sHk z@3EiATJCzj-)7)GP|qOn#Xc$?XjcBcDj&Q6eEqxh&KMPE@Vc;0ojnK-1IX=u+u#Gm z$|nJ7QvS5a`G4o$3Xm1rPIjnb(A@4(%2B*q4WB((YanIjgkb3Z(7wI_`n!9e3c<|JU^pVG(yB-5mj`iaZuPU{F zkSRAa(bU|Tom|BCD{CnZ=^tWjTGLYRp8<5n|C>953Gq&G#R7BT3&kS+jL4D1a@n$mSCnr=J>yJqn4b_{f#kf5t} z^})gGqR%aepWe+k*Y~|W+OY6u!6f1sAxa_!RH7Im5#jv`5%v#}!38;aL`ujDX_Xi9 zDdef#_jhM!N}}k z$x>f^fFh!zb478tlHNdrrcH>6O^Z^#wThs3O*84#t60)vtR>}qrK5`IrSe<6X;s1e zSuTr;6h`d$YI0Sa>1KgcvsH&g!>b~fqCmO6_yr8_nEn@g@Y6EtRt|0F7aGqx1FS&9 zSGtRP?bpJk)PK5<<@$=%e{t0Y%YIch%SG*B5ND*Dy|%wgR?U78l~UjSw&`Hr^G|=& zeAFz>+4oOJgKbQb*l`SpSd${NZyWqZ-}fo5i01aiDTO(lLlPyRjpGY$Mug)_%JgZH zc-?v0DL)nT#KD+9b`U|KTyM@&6i%Ux9}Y2{B9U8m!rJuLEth1^Kj}acVW%U91nPzi z>V_Oj0`2h{{%yt{R2C9!;%j&l0r}oX9>*8f;QbhWU;h8Bv#@!16<{K_t;E^In8~lo z=`FqcFn-^A>TPh}UHc+qa7iCr^VYwp99(7w$74s`xq+>lP14y(-paYBTSI}TrE(FX^QaJ4Ymy>QB1w;^;If-IcwT zJus^V=|LWj;09wa{eLq&iT%kru#z`aG#s;Q54;S~xZq=vlJk$605=RR4{G}x~W(5 z%}#ND;5sa_Edwv-dd>B6s0q}D5RSdm7u^@#;#h>?SU&ktM6yPGX&8BAL@F(fp@AZ5 zhXkmkydtX8hxEJjEVos7TRwR*9gSg?h$x+`%HUd9ghp~{m`V8mm{*sEwdMc(Pi-Np zGWb_iln%s>fh6QKh9KR4LA6ChCowVWj6o4WYEXpw|0|fF$^Us25%J&hO{kd6>JI}O zKVJ>1PeMgS1?V&%1^{!&R>~p*lb9d1#&gI{|5y0`ay;@616m+Bru36g9jaFYY!GV& zaw2^-(99=3ity2(okQ-BcotvmQryL$9yJA8)RqHC=YaM_q5PE2AB^VTCYWXJa}{S{fc6Lrbmq93;GVX# za7{WWfG}QcNNmNfT=t)7@42$}us?%b1bHqF-HIjcb?gaWorLYdrG}DlxcV((U`pWE z_FyQfTUk$HJUk%y6wa!57`S;(2p>X9e3`y&2kd8tpsgE6IvQn*a7G7?1dQl&OK0hl2?eZWv zKg=GFv|loh0&SNq1?Jc~++OQPKXnXMsR$J~7c1&6TignT_FkH4m>Eq$l4Y3L&oDDb zgL@%FgL~8g5wei*A0ZZq6LZoc`6*!{jK@Db+UQX0>_K4YdvO(HhHVsHh`byjc7?11 zI{iBOcJSL!o}SMxE4I6~wR{772AYu!qKzj?B9Yap=5v_n^z9gP`O@*DBgrdAdlD{% z1X-wD0*=gCxNX$)p$zCn+~WC!(5Q0GBB%rdDk(U{<=wc8NXC^5~-Wc04y zd60<|@hr5yCQoh1FFXVV3lD2UbzJ@#L1HO{tlj3uK3vh}94-ZReqoL5h&ABi zlFW6z-?g=RbuoON`&PcWsuQzJ~SCG#l-*d`8J5e!a6m% z@?l7vz&lWsJz{e0CMl2d*157dpJNj6ens*ehYYDrgd333BjksiJPFe9Bd_?6@xBxeV zjwB>`%rVSC;gPyvonPXtLmy(5yiloI7?i{2k-i|PgCAtTe527WC+_Hb;P{w(sx!5| zp!jrohN?+hzCm1ZDP~cD+74XMnRIC&>_K0z-fPpjAPlz~KD{#Z3@b6))2=UZ-Rp2{ zk6r2BJ571sx^Vw}pF>(% zK?g76M-EN4g<(mgx1g50;>AQb>B#45`2NUg+u!UI&g2VCnn%^)9{Ad}{jt=)}UhIx2%e|yXz9JO$W_G)bmY1)d0o^gIH(EvVcev4DJ%g6jXgz-$ zv)hfIr`3jb*q+c|(BuT}a<16x!n}M#k7zTS7{yF6yd`g8m$aU5;e=G#(1S==Z>a~w z4D>N5GZJGj@{$51E{Q9*f6YKoy_ZJz$aO1|^uSiPFx346*h%$hQr9pjAbtV3okZ{W z%_}|BP4nn{g{jGDnws>yzB$QpKQ?^KPz=u>&9U$BfVUT{*Vk#V)m!q{Ee|4TC{kM2 zoS~5Axr6DGn0QKqz`1!-6A|iVftpY}YyFz)5vxvTXe*m@-nxS_hG8A_P9+JXn~}o^ z>0CtiM(ezVE~r4SQx1(s^#b%j=;FbbnS=5A9V!I@L#DOX4I7M{ zs5pKJ{K~mAgg2}K)7BlicEQ?vhIbpf9fmZZp?Dhnxc>FV(KOZHk@|@FVMjXBD!Mo0 zms*2({)hNBoMP3LE`#B2+X$fP`95dzEqJ39vyY9%adv ze!mkoX$q}~Z^mL^zpd}&n5)M?+! z;hbjb-bja319u#o_RTN1&$}XT&54)(gw60SbB|E770ov6s7ve>BxU$;e>9qdhvxXb zu|0M8Std{Ai0wW(urLDP)bA4_ORNrxw(R@ryXAakU$(|BwOTt9-LY3u`Ga?T1NQ3S zoW6K4CJy&sN911%hfyY{<;?^A)# zM#72Y2b5o?!-FCFtwSWrsHV-RC=aAVnZ>d5BSyb6O63a;C-oOfMS665k_f!9Osm9F zo3M;@weoBFqZ+k*1E!DD%CSw$ZT_In$PFkOs%O)}yZ`G7Gn+Kjb!OaiZ@_T|M87yU z@~2}=rW!HX#SC?poh_g2GkL~9Ls1y%g9S>?l2k8HlDb6j2j2muCd-+hYp4Q z%~!ypW2_IO)~9mgf)LMjr~tB4W2Hc@z&Gn5nW{77GvxeC$XBq%R+I)88NEVAAz0*7 zWD*zVQgmqNAf5ZLZiFgN6}_0Hs@}}%vsiyicNunc6>aEIkB?)jV_XF$D7fx#?|Oq# zHK&ZHjNnep*Z8ku-lUE;^Bg1}rXSAw-I$FrtW_qhS2E`~3+;XuI-`vY`dZja`Oaj& z&#A0vpcc5x)lZhezy)tPm3hZ8W{e_^S%(T<|9C_F2-s6Bc2o5GZzNI%nEj7J=GL&- zO(h3jYb0$6Z-INdF^1TGNwL_OXn~u!Q5Qi%_Eik*A^OZJmmdE$;Wfd0LitshN^_Qb zym8t{>-c=;bcW=;^OJWc;7U~HiE4o__OzOH^FHlLvDlM&f$wZV=R^bz_qn-0#k|q) zwCf=9_WImA`YDZiHHPm8;m9tp`UTekHQlb?odAp=Qlu z^(Z-6C6#Lwa$qn5E(;4p!#Atn;9jXgj-IF3+4|9FN)JXolZ?(V-6>9KM&1(giK{u| zLbXcoaO=6kFlRznzs#0LLnHrAtP%$+spv1Er@|uOfd|qW@NJcBi3%{O3>4e@v-+7fN5WZ2P z^8E&&?*0&B9tt}ho&LK9X>>x25swEW9=gMm*Ba}GImbT;nYTWtowLC33Wx~WvHI?d zYzyv<8^GvS+=Xik+f5u|Xe%@+<3X&9+f>B+N@@m#Bp7B;k;9Q0XMalbjq%5GIII(Br5V#4YR?q!oQzAlb2-F4hQ)+(-wZ7uEhc%R<6l$Ro_$l3(4dq@51l zBLg=VDwfyi@HZxvgv1SZ_)5@W8Vaq*imDjH$dwoX?S(8kRqyck;BG^le-J;CMNC(k zusLu`OaPJe4&eh~^2Y4U%+Tj5*R{8DQS>@swW-60=qNWECjs&m6DK;D7cVkip!(3N zmL#`A-%A+2^7p6YtFr{3Ximi90`N37xMtaIc+w}*2mYa9{bAi*)dZ<+mf@<GSEs428^frRy)gh(PWaR51CjeFm1Wl)&7_P`z1(Ev*^jOrV=D@fo~DHs5^}R8ZV$yXv?rn7Uman zPiQZ-91NV~)eIx7&~@T~2=1qlA2xBW%=3^2NIu8CRlqzUbjv)~ zD$)qztdzkjjIvU~H?xSkKrWel{^v(CYf6?%`ddwYt;?3aKsfFQ;D~$UXs9VC{X%y; z9L<3C2|l&t9ofduy|Z#iA%TqjipQ9qhx$G11(5#KD} zd-V5KF+%38rQUGG78Vrr@7TY3TM5ENbAv@p;|OeU-Rro$8#Ugf0xzNf{@3+%0LErdZ87VQ_BNn*g-LM*dLKiDM*5Yw5m`4JD!2~I27BN~^} zNnIEWd8|6hsq;v_?_lD>nKG#$n}@1KQQ4pvri*B)9V=|BZbNsXYu&n&p;03=TGqVg zx8piQD_mi(ssDSnV6;%;i*w5&i?!o!)v z(2T^#`F6Bw6gMO9<9CAA@T?y%xot7U|FRim0^-jYbLN$Hl~ z%9d8o@9EULA`05nrbPoPj^d`pHML{a9)%u-$MPJS+DH|RwKHt3Y&C!^q5Qr2xxXAV z8~Zo2F1_IPO3>A3>H?~Uf1*M&>ipu9VuGvIlbp`QEU?U;s11NkEUF5C|5l_2hI4>N z30|RuZ&4Rmm%3&MFXoxfzbYl3QbPTKUaio7q!r+clKy~dFAaYg)!}d^B{iq=asoA^ zi8qrIBsezn!`iT#VTk1KZju&}sovuxO@iOv2=A2dSgi%d#(h+g5?j?YX=f6c7sjAb zTa>v_M%m@FDTHI=1W*kMHwZ^LWYZ{+llngHehtbAzlWS}QpRzI;A;H+ z@pX{wDC!q6^pG@5C`#XLGvqA2EwQa16Z$@2%{Z0?&h8b^hJA;Omrv*YlLPi(mBD6& zpds7LQ*(_$yxeK7Yu*u!mNL$F9#3%By85jYtr_Wn_#DHtqKs5Fp?FSVg}WEgIU*_Z z3?N+Au$aTnFe+eC+Q=4H)3BI^CwVy(!zlQ<>z+)ko$~tnMf)pZmJ8Y-`-7xedDiky z9pDD}51(||fiMt=ny5KenP%wt3-X{yci5^b|4)6ZZ~H<@E^Kfw9dpQ_Y!{xD~H@Yu&ONTi*tv3_+s>%%1N0g z?gGNa;Q8ec8;{PlLr{`+ys3TGsRy=W#qc~BFa51Hd$a+#+tAH4N+s>o-X}u8mP4!# zYJg^^k4AEE7H86Uy$YJVcbUh>`_IRkasQ_W0yp`Y{~?~WbDcz5{~2v{3A^d_`Vc?9 z5jXWzh^uieN4I^ab{wlj+VNwkl`^8Mg45K(Y-pr0ctQF4&V}h~9HGdabK9D2kk64@ zw)?d7O+`bv00oVbd>C?POGC}Qr6=3YSMQ7Oq7r^J2TreZ=$P7z-6WSKPNGew5{h=2 zdRdS$wOl5+nn^hgcA`8zVylwIo*_m6B}d*EU<8n^cgS);Kg?)eO|Wig;R^Cd6Q0Ur z8}yj)$YE&WZ~SD`F+#3|o&!FjcyewU$F__t(0S&2({I{t8wkicZA@M9G|K58ImL5g zDobU(19N66H(_lxJv4tc9Y~8wxK@k?5H!v-&5*B3Qu;N3@)W|ul%FT#S+rk5?pUv< zxZQ~zOi6+9*X;v3-=Fw5e73!zDScRLRqZ9zfe!yhPD^&KWVa`7Qg%hs$5l)`LY|OW z)9M#47cLu2B0XXhT%af!JRvrMPvIM!yRW$%blHP^E!x?zH?8_v+)Ch-^BN$*>^?V> z*OG@kHG5C(r*>Ipth^}ueZGoGPhnCJvHQVqWO(oWLHK>J$ymmehl_#FEIs-Z8MI*qFmSNdc|Xh6hr~DRcIw>A(Lx~ zO9|z4CBmKYotsRp%Z}xp!D}zh+l<}5lJaaRd4K#YqO5jIxx_9AxkmAua2fX_O`P*V zBc0>At;7)zR-*Gu=Hg>zc~AeY2&lGo=K7HzvEjU_gi{Su4b+A%Y#l!~{#}IFyWaB# zd8YEA@Ne@H?`Bi+Ma+8+^n7z31%ysIn*&0JovBKVzP|^TN(#%aGjDDGaaGEGgI|QJ z$Skh6P5`TyjnG34Ws8xI5>6kgiG&|b?7O&>m|s# z**TdRbR$3MenZTx-lu2AFF$kye`Ff8&HSX8_tNx2e{d>z&xEO^SP_>`w=7jTQE31Z zTU%Z1W!1?3eHXt<^X9sl=$mmjJMk{$($8mP?cQ-Mv#OF*U{urh&-dk^$dj6xuqebEj%VK@Un$GuATi4L-fvfL5CDGk=p6e(09kH9x zJ^~%Y?~#|0yU2~uf31=08AYERJ;`wC&PgAe;(+zuS%{lk+1B!67b>1m?;|81_ZLl% z-1DF;2QivPj@26F@UDM8=r@FqjHo{ApMKVF!f*KFPVJn>ii@YG-=IrczzH%PO9(#6 zr^#=ixdV5Rof!fJHgJ)?q>iPY)ww{w#*wuktf3l}WM=w_jLY8NLGx!0imRY62}4}! zaLjNsYR22pLE_RMw@;vS)557MD`UTkt)Q>uH>`J^D~4yg4!eQv^Q$7TC%z}Xpl#Rf z(678VlK0q+*qz9>kw3o2pD_ea%1_GUt^!U1;O~rAdOKhLK4@_z`7B^Ag&c?8$&#eC zsT}vN@v+TPU)~81@C58`vfN^g#fYAd%PfnF`0ZzZJr@!!@CTish)XNHWc4YJW3h@5pNmJ8>h^Jy15#R}Yyjv{V}-SZO4}T)vht z*6B{c7U3Vq9?-*G%i!?KDPa~Dfv5*=_?k0n^>!E{>Li>uxkgm;uRwAMj&fY_e5r_A zIWywTDHNN?z6W@6y94Q2LK_IMa-o$!{2auEpQ9@czkFAsh%E}4RQkQ^Q0?HYNF++L zoH7HbU|t?hwKAG)Vm^{fdZ81l?`6#CwrBQysUs&U@!$@-_H472+a}oIgA4Vw@A`y+&2~jR9OC@(6=WfH&3Wa4 zD=#0&ZP+Dlcm3^F4@kMOwO_VO+xG?yK>ng{?y#8wogOj_2T7r z>hi&3q`RMF$%FxXL1{CJx)DK~=zB7WO`$|#j}ckwg($KjoMIHNBSi10KB@lQ1m$A| z=?%EWZ!T{xqjvn;jHqjv+6=kDr&5eMI26z)!>=S9jB+35rSXelBCbCW+!qYO#(_uC zg{cx`$HI;L<_^;yXw7TQ8&x~|!+76pSSg5@nMlwrtqx&VJX#!8*OpGa8gm%#cTjR3 zUMy}bN$0pmaBUnR>Jn#=H7{g(+AO1CrQV=MPgtGtnAykA;k%+ZU>MFh60-@=w4&Vv(WWbKPuPo`cd&4(g~aHQTQjWb|zG^fRQ-o*vw1+cVQn?BhbRHqLW0h zS@}UG`D1t@limNiG08nW23vmB*7r2@fXfrqBa;G5)JZbyA#E*q5Gxb_VG4v}@nyY= z1Up9}|Me`Yes?*d{uep^e&&*Ir|?-ne@A2){+1#j+-)`bVa-j^pPcaf8BoI7uaj9_ z$hnzm3*+kAPU=O}h&gq-`|$0+>x3?1Vd6Bnb{2qf3|4~NkIjw!6L6GBvax&`X)jPh z?5J}9fC!tlWji)JHVk^>0Y@b|KoCmroBhLP7RCy3Bb_CUN_2(*`Ob2}pUygo3-xS& z>=-1@ss7EUh!rhPahmE`okty`tKFNiOrW7nG@fa;)=YUXh{=ZvbuQl6AX zD2)MOd607{+EQMArEpR$l4?J<3hcQ8Id2PQJrbtb|0@5p++_!=<4@MaCvQicLCt;% zKlZW@yx@7o{ zp3V^t#EBBFOPdC$NdhgBX6vN@TlczAk#jxtu7?YwHI{ zMl&BRD=SC6V4q^FU|cJ}--F+STf$E4S#e_~4+&aAPW+UrssdfZw*=2Hgvf%*Fl1{A z2DHY@wb<6#<|%1lxh~*ZYc>Y5*TqaXW;bOQJ=T)y7By5>)lzWVcT6`UX+WP4oWq~j zEQZ@eOw*yZ-*8?Jx68N^weYQCodnPEBzXu|e8n+PEfAdcoc3f}k>s(!1Iy+~Vt|~{5faXu5fDld z%+V7ZK$EA)F5C^b3Ag#FY_t)( z8EZl3cI)~lx?_7&em@PHl8I5yn4xlKt1)UfAO#`vgUm^VRac-Zp@cJ5*>e^lC&AV< zhAWx9E5f6mkZ)-@bi`ko;2&l3-g^KUCPk8b(0dlivI-I(YY5i zPa_u7|In@=WX(6kBnmXuxQv%?smd3n7iJ~`njp!i0)|-Ni|%O<`uoMG1uVy(+^eV( z>I0PJ%GWa5f3j1tBRzLd(rQhVQxqk`z*BA}*wnY}=`jo<;fOUycGs9K1!kI?|00{R zw*FNdwz_ByH*Id49l_Prw3-e#)oiLw!y-MUG#cn)$}<@#gg?*x#3xt%5%d8;eo1*r zSw=*+Y**^DXa7@HR#V-Q#2Q04K28Q3I0nd>1WE#O27zOxHRSyz`D$raNo{K3R!OaD zd4VR4^ubN^Qwz;F$Z5B6xc?0}$icTLcxv89qT%j}hPrgdx{0aS1(T8Plab6vLtaOs zY3_<(jB4>m#l7*UM-t&&^?qDRkmADMw2NN&-D<$nqjZ#46oSwvS8;E7~I&V zc`aFDwI&P5cjk^%CJWRyw<{QLd2@J1PAul6Y9e>Mv@zs* zQR;YuTUz`r**MBc;&H>A!)_3mOfI5PIPR~j`?yCVy7NmUk3SQzvyHZxwpOJCgK6T8@CLlJ6l%_#;esV-Gk3kQ8km#G?WVW zgj|;0fmYp9T_4+O4V4$7ZRvMu-09m-&vvhc*bsmtBKsqEJtA4=Mhckm81u3Zm`zq`bx%Z=siosK**JS+ifD5U{4?fu`h{dUSNK)bYt?g4nWe_BoYr;u5#1EnU0818{#D=x>7h0p_8|9) zS(=M};t0oZ3+hnI$aeAj^VKtvgvcoBZ;rnh+Zfv-j$Te)tn*!Xp)%2S;dUhSpIYzi z5rcZMJ#ur;OV4WKk#@q=FH|Nxj$OFE=xr{8u0NKwZ(HK5%)BUv_Ikp;0^s&K_Bu+L zJ|~_T}Vp97;v{ z5Z3T#Zl;4@?Wg^<&F1vusgiTnO42e zZQN$F*=*dgxp_b8@CHJm9(D*^WHRkni^0u^n#v8ym0?yEGaJW|!HuNeXEH&P!A+q~ zFdknt8ecRUXVV{NGahF%9A`5fXEPXQGZ|+y8fP;bZ!{inEEi3?UYO|f#1u}mbyd32 z6;10Y-W6nhqTyE7CZ>6acg00#7C=8bhD^L^!e9>F-u)zw(K)c~kJ_MF#Z%KLE^H2K zTV|9jRLb2)3RVLq1992Sx|)%5$wO>U%o8k9%@d3`&6T|izQ5L{%^~p3^_zbuEbjur zKbeXbkeb_9&!yc(+k(&|m^^U!dLg}|eN>DA)_|yaD4bfwXy##!WNaF%lEJkyu&OJ( zh~E_%znODpULoW#HV{RV+|3UAt%#|z4IaX+ZNyst@ zaldkIAxda(aZzk>ExB!S>|GKx*d&j13_*5k1I&H&ePDDyk?#fkcI>9M|5^~SeFews zY+J%G*jlqg9sg=d4{(yN(+Y6X)-3Utm#$K>>|?VYW3x8eG~AT3sRDu5(HuHwvQ|QZ zz;8!*1~1(OTET0UGbcQIv)qwe;jbH94!xJ|-J9wuYeXLRFw?fCf19D`^uASM-<`?m zZJAw;fLN~6Ady2@^1NAcSo*vx_1fhy>%+xoJq_Akxb0j zxmt_q{jGr`qGn6mMpHwCNM|Yo9`j*0BVMLdOPSygdiz9N@kj@2MM_RMZE7c;Gu`Io zM|@wQ=p5V}gGFy!uNbXvw(i2Pmw*>Ew--!M2^yZAkvt20y%>+@&&3pMK26<+NugK< zM&o<1%0LA6)% z(!hy7dK{#4WLe8FsapOKbMgI<8?qFsuChR|$vh+&e#52yQs`MmB%H5*Wb5omPnk(@v44*%RI z`8_1p-0!(kUN@=X-|ac+MzHp>Fs$`3=V8S6dV;$z`Y<;!OglS7+(;CJ)jg_(K<`!7 zgrRh*Xa&h;S68#K$Zbp*(wtSVwAfFKtkJ&Zj%v=`jCQdCdcYgkCkj(^+>XrxbBDS7R4 zxHpT`2eGL0JxgL&NAzE+sf@5JYrSVABqiZ?CqV}owXgc`pGmHeY+L8yH@foKt{kEv zKsxy&j(ImFwT-lgE0qW}emea-AFl}srMr42&8r_<@T5k@Z>~Rx#Vi7a(o<&Jehypi zQ#YDr)gi6)7dt;(|9v$)dm z(iW!hOT*jxy`s`HVl(7t%Q}U(hrRx72-#@H(wgv9XT5kj31nG(j=iI2LE2oQI5SmD zvSkf)0JAh>&kdWJwHDl$aF=iw2e?PhWRA5rD{cRLzm9y^^^RweD1bY&l5M6~5bZ3` zv)#1al*4ZDStfqn1Z#(Q-JCJ^1BL-%5S!JsV&_oKHLbuAO?2#V&OHV$Qwa+uE)6?U z#cGFLA+5aB8>%_dox7N4fl%`rOTcOa9XsB05A;k)Ida)Q#XJiZ^-dX1__7YBJPl>@ zY5S9TWz(C}fb^q{^9^a%Oi+qbd-l_voxbeNu!v%Qn5|?gY=W)qqC8vvI+`rH>?sl` zK_!8cbS~THFDH+~ug$EJzb8UY=v`>uC)py~B6{sPGv6nr0m-eIL=u3s*1uR3!(_v= z`GZa2V-E_3mWDJR7eTJu@)TdfGcvyp6yuC2X(c-oiu}IZDQVMkCQHogNgQu{@YW#* zdD`KwPs%$}DsDe*h_1Jfh#j`8Xn6aBCiir7>lvFENmq%G&Lxb_sqXm1%f{_UZN)xT zk}>Rj?ziSCu7FavWtEZ(*7mqyfidxs9R+R!y;;BDv*G-p1_&43yPCF?bIu5Q76i%v|UV_EXsV} zY!q_4i~7}`#+&iiW&PfWo|b!Gg`ZkxM;<$Ti~kxe7TY835#SwDjb1Ug`_RXRu~dTl zNU|Hfh$ZR>;M0C4tQ3h=wjtas@TM2J+R~{cYYpB>9XW}XzxZ+G{M3vH9ZyPrJ!B9N zYtzX^(`ajWt#R-ltFie@m1Nth${fM!rlWf)n@a3T>=U+g@^f;HR-x*w z>5>E~&FZE=0Bs)Y3*Ee?7giZ~jwpA=%unY(fRQ4n%!RClvV?`zQ?cXv&hufGYMvZv z?wPiwG=w+x(I$9}WFQFQ=G(6t&YH!P8CsyzBiEb$z0akW>>=GggK67Rj>#%HT@O_4 zB;Kf_B}$GC-lSPd2-z>PU%fwl$Zz2g4;!RY$Upx+!y`(2rErQ2Y*uVkn2Y3w@^+7A zB58bo#lF#W+jDjdUmnBcmo2_sZKSHPKE5A!`JS^v6R?pXJoG5!iAo3MpfxpmmsATr zWb0L=_c>1b_}bI3#8e|N{O%HpQ*~Y9T6b4j+US4^L zz75mNY9I@3@*Vj?c&(h$l;@mk&twl?sGLz$m*xKAMLGYc#WTgqJ|8t6f3%8cQ9Zq?hws>2XF?!Dd==b?sDlN>V#zdtva+J2TW=TJCH~?nz7T6*PLt!F8-A{>`zqlEYh;YoVu1 zvMTW;74O=zPM16dp&1eYkEEVVV?3OS))gysHTe;2&YBiwW^Vavrbg^X62w|veaX~s zdtx?kAB3m)-|8HuTS4E#j1`wB#$8>%Lu4?o>8S!u$r^KZj!`ylw|P`ooRik(y_M?R&S?PJoY>p2=F;k;v%&~m z5d~*&h+DWYW1J%mt0#v1)MY{fnJ^Wa8y;adRiRYd9#qEgd11g|-X$EuZeX18c_GoZ zheWOO-d$DbtFSg!vYQ`eLJ z7*>CXa;S~0*bPK6naB&1xA_GsQv`fo3Y-8ef?{CYR)s=&oeP45vM*3_u-mZg} zzu5(9#M#DdGln76l;Y`tPLFDo_t-vv6h;+>5^d=KEi5H1-Os%Fwe`~1l9v{qR8um_ zMZBsUMWd-omf82f?25KmeLIrx->l`sE4n#aVaQXD$` zppZ(0S{yq05SKAyW!5Ro*glI4NCG&)F$mo$=g?+LuM? zPbIPz=Ue40+j@{JR1ayF6H@IuJ@?eLsqhaYN8oXVB?v}h(+kAi~wnk)sF0v*{2*Gk;J7QMV zX|J#H@_6~hXN!ODvovJ-+tbtEJ-eY~7HiLgNWdbYnxq9m5bg ziZ>K&sa=yi$Mf$L+$(uivuW|ilOt3b(#wl0G%c&Oa}Q{; zeOO9FmuFd~tW`XgQToK8FUge&pWy;CJ&48`$`j5SxeKPtTmErz_Io=^dKdC5=8P%V zy2x&-w6JS6m@f?iyS@JXKLDFRWWO(6yTMw9maXL(^`I4MC0dzr)o@<9TBTN_F`Aet zH|l|~rfVK8kf@WFYDNnyUf9-P!}7~D(+?J7+F`9*JDRAOAE*=UxHg~-CTfPTQIg%3&oS zt}5NiQRTQYAaU}XGN=qorZOgQ50l{n~%M3XimU)0e zxkN6LE9DxQkwvmjmpx=_EM;}70E%N}e`4ui!Qd^!we-SScSxI6%Mk3tDeY!AZL z!ytLYI<`H!XIq!Y2s|L!?`iFnknijyYKMD_Ql$ZDP#OjdN~6+%P$P{=6M)gZvwDrZ zBNoA##GnMf3pNRk-h)huH?4jp?JLMa-m&@`dAn)~d5h}%@Vg->{Cn!DC>#Ig1lL1A z0d%QCs+Me$AgPjD@=GBpDs@U-Qm@np*ws%+r}jP%NoS;U(s@#TQJN&>)6$GIE8Ujn zr2Ep6^hkQLf1k;Auphb8aM#G^mjV0W`?7bem+j;I@IJfew|kDe=efHN0DBES7kqc| z`I67O+w7iq!Zz_N06G8NbKgC0j91V5RbcPK=LNRV4}YuMzd;Mlj1%Z$pLjw%B@T&a z#B;)gc%D}|8I~8h6me3V7H2@8p<1BwdM2>N=f5L%(X72*nzUF-2@{AXJ# z643|XSjT^Aqzc`_QQOLM|)|gyKVs!a<=zs1|ItK0y#v+cUu}`0Y1^kPsC*g|37MLa)##oUkD% zJtYhYXM}V77O{wJnuYU1>Y;HVoj)sFgk=H9q)?2l*Dlm9z*=m*OPB`hhV2=!69t~( z)8HtxX#d?W?|BRPb{~O$D4jpgU*sqGX?}*|_*wooKgV%wH-DdBI^^ab@mc&6{u#ga z_Jc#vmhqeXwm=IO!OA^0gkKXfglr*CC=^PBGNDqa5g4$CEn=I~g2>O*_Hdk_3!d5@ zwwvvKdl_tlbs+#EV(YbOLKv(Ywqs!30vr<#3sye;-?ENs`~*60<@G}gyyxu=KEO@$ zVLoQd;ieg#Kg@R@I?NwsH~8a+cKCsstK1VVm7C!Q`C)#PALGS@4Db{DC4P#(%3nXU z!r$cY@biZjutjVWtS;~Y{sA}rb``hAKVWo5uelDk!8*STvVg7Etnw>h-LSn1)<=O? z`E`EGmj52t(K2NvbyNdDZ&mz6(7DUp748~D!(Q$dWG-L87lSueaHV`TZv%T^5!+;XYbLo{yui45 zm3Q+3*uBU5xkYT92dSCF)@y8hl-n?D^PO+gV80Wj3ywm5)$=EMYtnljx8J-)kb3Bu zZO}Hzm2*{GEiAd}8m|1UQrk_m|8}IG?A<%>JW7xtm0e+1K_(c#J;<&y0&8I>*!8zk z{&kL0^@1Rk9kwml7T8gCjOl{q1bYeeDfa45eUulJ5m`O>?ax)i|0>XJSJ=!$m)M*) zU)VC4GB%$rsy+tG(keGwZrft3n6j!~TaWD+Tg!6HLUk=GS6^UVY?0k>2(0niJJ}!` zVax4ZY{#1`%we#1gw0`l-h9CxWBb{YwrlKZcEpwm%TkcDV0)Y`XD_f9n8Vn5?W`?_ zy}V~TA#bj;SJ-m&FL~q9d)^1G+(tSR{r4@%c#G0|Qac}B&%wV6n3{A7eG77BHvr;~ zJp_mXIssjKOUm90ONin26M$2IA;1~HIRNB8_KSc?z%*b6FblX1m;>AgECC)N+Wv$9 z*`E>6_BFsJ=-beP!aXSqzzWC!WCQX5g@6)3nQ;zGC#x*iwKn!r$C{HYDrkj9bjslK@KCoA2U=T2D=x|(`8BLUrB`}e|CBPKmD&RWc zW@6hrWP8NS0~U;WVjk?(xv}js*$y!)fK|Xc;6>sZc8ue(CgWP!6hImv(^${uB-ZCA z08c7g3V^%MRsm}Fu7zAP%K>D73*ZGno6JT49e^W%p2YIlOUvEu`vE5t%Z|i;Ctq5B z8ZZJl3xM7-djW8He~IrIdj)XqT`y_&#{Rt`*9MV_y=&a>pJ-X~&L#G5_MUMZ;!BBg za@?1(2za=^B>q;C@HZXK=`oZ)1w2pqAGWixzht-YOHWL%+fLd}+eU0>ZR3P55WWn2 z#dZz&hV2&cUE4k2McYH*$F`@yc(3QS4cnF-+0FJ;d%8W#o@+0#7uydKuGrJ7?KWV% zR=2>hnZ&_7;1Wy~i%H5821;Q}#K#0gSC}86_Oz%)5+BAeT?)vbbEXfGZ|^ za8JkO6*7x z0o-HRW8A9<@JIRM&?~`x3CyE}#}fUN0n$?$;IBhZWdQe8fEV}&z<94^eueZqUhq4D zNk|dWgiIkv$OkSGN(p1VT&M!ZYik8gkeL<1C3t~@f-FQ5+#wthdW2&_KjD+Wr-^== z=p({e;Bnys@MYl&@HOEEFy8AH@LkaF35&u*;j!=(7>}Gh6t)})7|Ii4Cs~eMM*(oL zTvL0k(=6j%{2bW*g;+wRvKpSR$5*mCRL#U#!_>kr72; zC>Qkv8)I-W@i5`;J^iS79C$z+1RfSgfycxN;7j5Z@Ky1;cvHM1&fAf=AU+V6#T9W? zTqo9EBuaK9lejFUV8$gEvgDGyz*rAT5vfBuBK1he zq<-n7@D#IjdQTsb&H|517l1EISAefcH-K+RcY*Ioi@WgewW-nZG=; zc6a_ySHgs`j%NZBvw(O8knrd$I-U<4ATxpkWKM8^%mxmSz3`mi12P}Ds;nFHf2v7M z0mgHC>uQdg4_u^{D&1;1;VQLOscr!J}wfgh_+fuE}zasg&_OGBDjOV!e~EY+*!Y6ZaQS~1}) zbv#jS@IkFYtJZ9qpmb}h<_7i?4rx(fDAzi*F0I!#lF<9K6WS?lNIOIL+#a9TE&@+# z)4(&@Ebwh@4*0&d1pG*QqCL~rv`uYWv+1;MQHyk|ZUH?*x9Hh=p6b;L^%CGR!nkBy zuheUFMi+Jc6~^TrJ)noRQ+iB4tas~2_2c@0KBx}^V|^5O4D<>8622~ditttax}obg z^*g}x`U3C+eHnN~Uj<&*UjXkoO~A0P*6B=fra3d6InI1%k+alU{t8z)Yk@hZ>~uN3 z&Y&~m?2yl6b{^T|9_KOOe&gj@yZ2F^w2 zL+4|Auk)$%Iq-&a%ZT7EgyW#g>`HZ|vmLH1S1#LuBay4XRqQ&5qmrwF>%y_dRqe95 zpht|u>jdyA*O1|3t~2=ly3V=IyYSuYb4@}G zLp`{rT{Es(TqmyEO1E*Pt~u9z;3d~1X9Pza*Av$>*BU>7TYlH3YrBpXdg?57R^W`f zY@>ay%fqc)U142GT^Y25p1MjgjN6&InmVRV#BENUF7@MauhU-S_#_A^OF4axdU9G!bchg=_cc*UN?yp;@dr-Fw+pNGgt99#j zFY0#OCU=TE&7JAaap${>+@-P)SGuBdwptsI^itf+`5(eg8Jh6gY^~l)%CV|p3_0jsy`mXxk`o8)T z^{48GjGUIy>(4Nvkw+O>O8vR|^N_oeys`db{Uq>o{S32$eWQMs9k0J#KU+Unf4_dI z{!#st`e*fP^_%tE9vW}SSUnad+hg@)c(RSm&9>;tvyFHPJtg3KBuDj>c`EI_%%G=+ z5j~6t0v~(Xqq7|zk0$^e_Qcqyp2MDQ$iXH(N4YM~anAtopl8^f!{m8J!Ty+M!gI+p z<+)m4fNQ{W-E-4($M9{(Gtazd0qi{REOUZq#k1;J_q_1zG?m~LQ4&tLULMbzXTkJh(WUAf@Z?)IP<$49L z3Jm2?;`aKDtkqS$&)yJz>b+6Iod$cmyuFZ%LPqNC^PcdYf?O2xQty!WjQ1R0=soYf z2<-Mwf=`CM(~w)jeeup1?49-A_Rc9W?|q0+&@wCC-X-rNh?_C*6VRV|*Swo*k#`$M z1Rw3Q_^iGRE!CIp%kvfbN_=I$N?(nS@rgd&=kW!Mwms2$AN7TOG2dZdw?5$X`i}aJ z8!e%4z&Gd{_Ko_+d=tJ)>NVe#@2c;*@1~Ihs6pQyh)pEQ_~!MiyD{3g;CtX(_O19< zed|6FZGAg_lU#sXEq{tO<4^Oa_%oGZe~$XtpReBY7x_zl6GmLtRDZeB?XU9J;#|Vd z$wPkG@4_+N?^Oo;K|jP|e}^BUuy)_ygX5R~n0CtF?>`CUr~M-km3I7RcO!*=+<(D; z*?+}<4bBa62~W2FhW}Q50lqr_UH?7*qS4YS^Ztka$6)8F|G6`QW3GS0zoiL{2*;^L zb7N{_dSg~&Zeu}XapS?pipFYOZ;du=r%`BB8{Lim#!zFFOT}$?V`n1U0d`F%csHx@ zGNA8@kqsxv>w&g2ox# z>Nd_a&NkjQ@`}begYi|k&NSX{T!MH(bj+~jqsAv%b>p+fHOMF+uV~zC+z!x27U7!S z=f*xw4Onzbz#4$)AIJ{m84=%g26Lb=P!cFJqI{q-P!qtNTrmJ~AD2UkClD~Ad?5T1 z$8ekq9LC((NA&K%(ZF$t-4MY81A)Q7FhpvIO63V@VrqofrY>WeIT$LSaB8wRs-wKNZ>_aM;>Z2HKjDAHDxyCH03uHHI+7%H&r#& z0&`8WW4p=KCxQ-kTjtYEI_2^NUqU~%wZup(F; zv;~Er8gvK!iQYkNFeJ&rXs|Qb73`Hdf_=dg!BfGZ;F;h#AtDzC&+q2p!HdDk;B;^% zI1Bpi;9T&2a4GmG_$2r&xE9pwA2a-_T;{Vd!x}e@gf{&X+TAl%~wQR~2E!$x_IF0X1*h1LqNDXI% z7Q@-$JV_20hD&4=E(=%672%pN!w-bTupai{zJn2uaN7l~Vs|(Y4u@mm!{P4mQAH0Q z_hg3$!h<+=goneU;W5mhPlPXpr@~jm*MV>D@tyE|cp>~Cyc}K$ufksIMxQRc9)1zt zX*IQ`w5GLYw&t|vw-&XQww8wnaQlf_>ujxRt@Rb+*0Pl&EUOnz7=rmmDX#mH(GBQ{jAozt@m0NTOYPQ z4py{2ZGGOl(Yh5u5pyIpk`9~|$&D1?HZxL8_+X?WQXR1Y3(9gtjktsNBmPJzc;ARA zk!Ykd(iQ2A^hHiYPDO?yXCmh!=OY&*lac9$BHW7lVv(81tdJSG9hr;Vk1RzVMV>^S zMb;vl&YZ}$v#5=3vp6%`tZf-mz7e4?w`Du~{THy_me(j4nOj3nTVY#CTUlFWTaA*@ z#=<-b) zN83BwyV`r(``kJ0C)!W754E3ZKL>oi{bKuM`*izE`)vE|_Br3M*4KVtJJr6_{s{O< z`?L17_RaR~7#*|3tiTx&cPu-W7c2D6#Y$pj%u1{>Ruf}lVoZ;DVu4s#=!wN*hhyEb zqp{<$f!JVdI5rv^(`y=vViU1Tt@*L3*wxr|*zzW9c_%g>TQH6idk|ZWt;AMi>-7b( z7qOkVNzvQpl|Vcto)*v44#sn|>Ue&_F?LLE_j zuU+`}SU*IqQU3=a>buk?qNy$Fd&oq6pQezRK0uq%YxMi*RP;VNjeZ@ypMHbRL?58D z=?|mV>3sU*=!0|-{VDVY{d@H9p${bqNe+~CfIGmW>;qi~K96z^eEq=JQLYsPebky~ zRZyN)w+4`EZMObL)M)*Z_0Leh^^SEF{h{@_brUVZ**K92|3?S1q7R_g(Cf&J-iJIW zj5HKMzl8$mKcUm;1R6r$L<8t=(YMiE^mkMW`X5v(^?vky>I2jp6h&oIMU;ioDL3^Y z%1^aZIaHh)po*!J)CeU|qtx$F_0%6x-=G?kzLxYgs>jrA`U3UKrsJl5>Q_uBO}|AQ zGkwYQCF;28%ckF@e#7+prms*ZOp~T5>WikUraz$uO@C(kGiu267pA|YzGV9Erf*Oq zru(LEQeQSbGA&cTYg#e=19i^y9n*KHanpBA|3v+P>HnF&PfZ;7;DHZP|MkE(5A0AE z%?Hc{)Pnhw=1)=^=1-YRsqdOUYp$ZcXRbEaQr|b*%?xcab7qA$n>BNQwwhba5jw{l zHFwc@<}aB0=$|ux(R`Zzta-?Mj(*Gh`{oO@X#T4CtF+5JY5r@v&OC3Pr+>+O&%8u; znID=T(!Xk6HZRl1%ztlwM*o_5)x1XkhWQ`O8}xvA)4W5UOh(D?r@xe(p8N*=`^g_l z{y6=W)Jo#QyVe;Q5FC~>HKS*9ls!aZefzo+)RK@pkz-k)WVgIwb;)F-s~xM8{g)r9LL|!C^=&5HNKfwXyTL3^MrYb*Rnc_scI4)FIi;_u%@6!`lV zc>VXO@1rE}@ib(z{-X64(E;mQ*0;!SkA8%npeGQbze;}dS>i&4@Lp+|U74MIm;!#u0kH<@mk z!qf7s)cc`jd=py6G*k^O zV>YUxa;cA@x2ca)A4hg*DT@#TZKaM_Xe<4QqnfA?;;AqdMiLdJqDY3;GmaFhlj=k& z^*QQuNTa?$^&p-4HL4%Epk*CEUT9mth$){t`v0zoPyM#i+lg=1`pY?|*+HbCyy|DLP{LjHL|ylBL{I zjy`XxvQ(jNOO2%leZg|batQTUWQ&4+*%Gp}pkJ}HT3XSs!o7VCSC4(a0`4AC&I2w2 zCIQocnFMANxDA*?jmkx3Qkhm}lv(ArGN;^EmXt@z6X0jcnz9M9tQMt~SdFQN)o%5udR!e)2i0MaQFTn6P%o)d>Q(i+dQ-il&Z`UR zgN#jeSzS?A)phj+=sTK8OVQFYHnmJGN6XiWv{J1cq)MyRI86p|X>dm8cf*NL}(ka&9r@%A@}w`US>&m!KQ zO}zcX#M^U-x91XX|0wbHJmT&7#M?hkyuE;U`zMIE7ZPv(B=Po7p*fIZ;`2XCe7=PE zd@1qyPZOU%NPPY?#OHsW`1~(`&(o-aGEpYo-L#M`T=k5IYb|9Mm%dW(2_4e|E3 ziMQ8+w>P0fR5R6#Y{bv)#LpSx=PdDa4*dMrkU;%9_3Ow%eG&XzBz`UtKUb*VrhXfq z3+(M0c>5p0bMeR2A0sF6b{Fw>H#G--?xE(XdDLL3x6~uA#c%N=pCxXIaUV-;|QKz%Su8DrNxWLP~m~EDMkeC;$}WGSlmc5wjl+yv_a$VwqC;3UHb69YThT zx9xwE@J9o0vp>VQ5BwR=F^&TEzi;@Xfw#H7_f5okGVi!Q_qa+%IYGzsk9hVG&pqN% zEIjYH2mE<|2J%P4n4j!WkElHacvce6Nsc4{I=(jb>^{h6W1jK?Sz;Km^~VEmw;zGu z2^^1i?Y;~$+qsXuSpDNbVtFEx|1jX_z8m3pqx`$UZYJ<5aAxpwcJOYnn?3Ai5W88# zZYF`3`x%6hIlPM9{NcwT1F{LQ0a-;M0b~^UZ#LFwr0A%kK&+`YN6;{sCYcJNi|pDKU5a zZm>JQyo+*Y)gJKrS^%D9#`@k&v-1c!)+@mC&18w^S$-Uh8R_wN?2qTAUk09`eieA% z8^(+^nXAUj$Ihoe?0o(?IG^KrZ5MhK#_YB$>j#kQDtHHqT?hBt_Ltk@L|eJr#jF5zkYsnaylR>9CBpJ^6?$>jFKJruF$M>J#f4|?nAD?B-dau3q+H0@9_S*Zs z+b5@OItJ;R_}OvfjE|hvuV?xsWg~q-PwUJ#^s{tQEXf&Q;)xmiIrrHZkn@lozZzFL z!+KWMydi(j8EJk`=ZsDJbI!P)<5iE|=e%scx#ujsk%=y-SW!D^_@9e~O(`Na*zPP93bMI{Xyx+EvZJc{a&$DOuQCg;c z={Y4e7tmHSH}WlCx)t?4lI~~tP2bV?nwK@_scj{_Pl(pL)Xd@8sh{TScIW*1*m;@x zN^`O1;PhPleClWXM{WDr99v|6xj~P*`Z33*=hf6Nw|tMG`laVce(AaKthw_4m?P8k zWa^h&#ms}63pMwd`7pP7kAwPAMsr=U{hf(f=BLvQ{F^%Az!)>}Be%oZXB=_|LN{Yr z^M}T*#*xO8J@=41TCpg1Ozw*s7wK_uCN3J^k{rdk+zBQ&rN@X)fta|Z^640O?$|na zT&3g7nXzSLC~oFX&Yht?*J+D%dFn9Z&7Qu6Fm9<0@uMmd>$aeROIl_JCqhnlCmaHe7zK z(?j@EI*u70jVt+|{?)mn*reDr@YJay{mve5I$xw15nCVIB>#+Ui+%R9JjVQD`(j1< z;g9CWI5crfXOQ^!M65JD1~n$)fjy0s9)B8F`XZ(A7H?orCh0U1=gd++wO4B_*-34V zcaP`96Y<-TZQ@7#?l>_x&94(mI-jJ|NjjILSQMWTpBAc4LVE@&5hW@ns>)u8nu*#vVWf3}P7}#aI%6b$oifttqD0@s0Of#+ z|LNFlPZK3Z@-y-@&&aoRu1I#br-~9Yoif~ zfIUZ)IF&v@^waqv*;i+Xh-0SxI!naAJ(Dr{L$Xi$6p>C4C7(_Ek8*J3A7`H>f}V=G z=~G3LuW^!GpWKu_Lv+Sh)9ZlL93`CdMLK76B3UY$ZoaDgY~^u|^Z&oU{6F;lMgMbs z{vwO_jG)WidERzlM<^@7UjtT!@;rDZ_;WxX$_Lh% z?ca@Bk3KG?1_O5hZ-VE2;P2q>O-jjJ{r0QUoX zA>m_qI)U#4p9-u9&$EI-b0~iXR)LPqy#qj6;eG=CJrJ*O_ku^@k6;b_#62H6ec^S1 z@;Bg9@H~peu##tc!~?hf*%;2((S#oN{tCPT3V!R+JMMpgx#&-g{wClQ;DtbIy@Ql` z1&H_h*w)_+#3y|;(T2Oh@qC|_`UOCA@b3}~`+?(c0e`_S2%tO-o)0V!90~n%@Rx!7 z#XJ8CU@`hIPW@j2{|-sZwauNR+j*aX?qD!_;0~KNWo&PQ35Q+CMCdP zEK>q3#y%y$VysjGEXGzPz+x;`0xZUECBR~=R{|`?h9$saELj39#-1g>Vys#M6uwEg zv1BneECCi{!xEsuJ))(@Uv+o!OcxJyi346E@CHDljcdnm49IA5uLQ3F{sWNs>COSt z@BAfP*RydFIi8JdZGcCBcBUvwX|fefwxWrFUelC5qR_K0nwyxZx3zf7SaW|&=^%RC zu`xlVPC&;$UH)>m+XDJda6H`oGUe?m%U`Q<@i&(k=*|Y(xvV*5nS)&Pcjp7|0pjOw zV|W+~uDv;wIN&k9JffCs$E=+%tmi)j{1ggf&0PVW4P@+lAEGC7rMmF z#~7yv{R^NlpL&ckANvQ;{UngtGr(d#vDagCdmjP$tKu&8dgI~YZ{T^1IR9#J<~7eo zNaBo7toAH^8?Xr!Ea^P~r5pHl;MT87fNP-OZ~iSn;;m1=`$VU};ePqrmZb4}b9KYv0CFEa}^M*_NdTy;b1MgFbVL&m8S<g+MEx9MYquW)yEl@>X&%1AQW)8F5>& zt%ZhfR_hv3f@$>^R$`O-O}bkX9HV= zLwMVdx37RdZ(y_VE%46ZRzg$oo|L`Xz^kCQ2p&L=$~L2xJ|=fFFuYBiW%|pVPB`2^ zzw9F@^9`(KeA&S85d#-dYPvKz4Su>1twuv11l}CHId}`|ZJvU|fxm;t$UObeppk+5 z>|5dH?QQ6Z1*pA8SV1tlPCA#p4*d$?7z3NY(^Q%pI1`btMqzB?69xNmJbQ~EvTz8_y$3*F_dS*!OC(4y=N>1 zO}YRpqRBJSo8pO*!X?=oDhWY<TdwbgTM(v*p{tIy`(foU`FMC>UNw zjqd_ytCXQE1)mE(7kmu(81Q27V(^Xzze3QPUAo@rwkk9{(fjDRin=ZU--lORkfNA~ zqfaYQ(5C`ATp*p@ozm9NQ#5dQV%444X9n;tdh@Sv9>v?M2)ak%JPHr|?osgs z)u_FIzIYp(7obT2^*#n%O6>)V>!nzrfN{MP3luP}mtuhey!SCIP@oYVJO(U;@(DJ- ziMswmT{o#N|0UrW7l+`Mq8c+UiPYDV(0NjdJ{Nkzl&yxG>d2{PTZiYI0Tu3hO$J^%dr%6ScO(i zz@Gwd6Ydme_>93DBEK2Z_986{&QoyS4CQ88_YFLq1l?~e1}VZ;{9^POpt5FOsH^xR2|*)TF@RRI zRbK>k1--VEZA;mml9f-)$+>cn>=K2><)={K(rM8MB47 zZa4Ho=!KNpO{qenUv)vZ5Ucul&Mf-Q$0l8{Lvvtb+fubFXiOWsplu$#kV!A(Q7TWI zK}X<|l**&jla$JnH4MHTyb!#QQgz|43w{asCGcb!{EvcOUg>eo- zxF2f_4C)KF`Y8IjU(&x*X!|kXmsn{Intv_m&OyQ)I9sDRzU_{Kb3Szqq^|jJ&WE!d zJoDk17nr%&e_YU=N9~=k-WBvkCriOHjga37%QV6=ov=(JEYk_gG{Q2S7z@Q%rW516 zSkU0IQXDG{rS>xFs!5NR5#<^Rk2+$LhL)mwqmH505e-$B!JjgCq4hD^wHp3;P*z(C zEqxfckoK--DLr0oyi)i@F~`pZf)WF@oYK1|-V#!&uBo>_OqGd^s)sVdeCbvXz))Bex>wr=r!kXaHk{#_o3mV)U{7-alQwiNxk1& zN{YWSZ&l;XXi`KgM$n2PZ1t%){kO&GewxzR@VL(iI)^jbnX-=o7pourX6mo7n7*@F zWhVGaL1QIZ^$TijlY&#BOx0UFWS#mY=mvilWw%)zX<10vD;Qjige;Xc_*C%0;Df>M z0KWtLLvZF_^;d8?y_OZsQw}U>%H#=xZroC$iwu7?LH7wH z{0^Q>`e-+B3bpJ;pL*8*Sa`R!Fym!6_Q_-(A#4vsyF;VyA1{(46d=cg0dqiyMnfCfwCOR7Dn6_G+&N=wxH*7%1#XW z7-`1^-HEAnjX0~pIXwlR6pu!ww?Q`aKMR#@HY@GnXjMB|FoU*d)ONHU6r~cB+XCJ$)!XnJ`t+#iCi`7V_AALp;R(cJ z;;r5uj(*LEBVs@)g|b5w6T7B@6AeUnHw!wH^|CC4f3Nt1O~EL9RrE~gyG0Kj(T!E7 zVpYu!TX?&iT24~$$!Ho`ad(5SgHi~k8aDD9xshmy_eqe56sIFzQW z>mA~4)6|Yc15;zIsF%T?K*CHUe8v7o9g8!@zhFIbjJ3&Q7DvK(jd!z>)!a88-}@J< zzZHV+zu>7vsRBx6mfqlK-$3<_RlUI*to_S}zcuh{@Lc3v$hxTllnX@(TCr|=S}>Yy zap>>ab+ja#xl*;)U}UXg6fdA%YiY{@^tqC<%aDI%sphQhXgD4Xx7)oVO5IPXHt0MD zn>2zl$F4xNM;9yw_C-(Z<_`w0fHT8F@kgxS$`uMmtY4x(Xl#~q1fxfp>)BffU!m`o z)4IMIxq+RdPb&^YXr96TLv%lKuGHJ;W5IBPc)T&J84t4dKg7y+4(rV&c4q_p8oO`6 zDjQCBBcmO1jqr!zIhfj=u(;?Kux76S{W9nkfJ^NvK80_?_pJTjHk21=-FWm_ zhCbuz$Bfh(e7WKOfjOcVYwB0o#bUoGSjYPMDEkJ1-46^8;%NtkYvRPhUi)0M1Y~x+aDcwFusyI6WeX^~ z1`U^>;YcJ*N5V)XjAU2g1@=Hj+TA%xFcDjFZic5B(6Nwx!ZAp5#N!X3XYXN`V}RsG zgCyS{4F5!G{D2xKS_+acz@Gb{Unbn^K{VS5JrCRNgi@B#S@U~zTyZ{Qp74x(EHBAb zY%d_QpmZ8g_)W|ge_@~JCdu(%5`V_U==KttR6{~_bf|_MnzJK%3Z4mg_%S%IQJxl* z7xb=S#8p+MZE|?U<_JUaYLa((%xGlA;Sr)=RdSeP@-Fw&RDP%oFdLbfsYxeg$kpnD7 zx2=jQ8J)>DwjyT>`V3Wk2pbS9wi8>n5izbKBPvJcP(wLSR6eX^C`2l+8u|YTL9ZIw z$VR)q$Y@1|(2Cjidgj?Xn7KAGi>XB&tqzJC8J*E{hVmb?wtAPfm4$XC@-FKa3(4Ne zLc0n&tjoMcWMX^a*-NSZ@O(oK)*qe&sccP@)zsJr%d9T#pdLZzFVH!I)z%M!?rbc{ z8pQOV@}(^BZLAKqkPR-Ubwg>_BlOWDl$t8&cafZMA2~2P%zhWLWpaMM3wApZn6~?i z1kKt|QKA3|%wO&$jqnD95-i1ulzzFqFRun)TNm zti_bUwzA$Rx*2glkRxT2#kHdc+aX~xd6f1FS_}Hkn42o8zYH#)DOL{Y+qjX5eO8KM zM$JZQd6HFtR)4dw;Q{bJFy0HGY=Tl?Go0vha(=D%3!)c<>uZ|HLWOyLPwXJMaVS;cT!tI@A&++$4%$ zi`qNeojrCM3&{bVlqTLsYGZg6ZTtw%TD0yWT0~j*IJ$MAEytCc`E9^=K%ZrCIOnl1 zG!Oj~$Qgs21d?w?hel}i4cab4LIX;X$+%xZc?`$k}S6?Vapv_0hgu zcsaXsc6JzScVc_VtO-CkX*MPt`XkB$$S4i&U%VOb7c0_$i4rfWS?<2Vjbv381E|x;6*XTlsVM6mTawc)KZkF2z&x|Q`v!V&WCe8+O{dZ&(V5QZ7*YgxEAZmGBT)! zDiw7^!)!_wA*T>IMeNFMAxf`?J`egbJo7Si-bkq>fBiXEh?jdH$rDRiw=-;XAquvx;EF8)wQ2q?%ld=o-rH1)2lZ1}lPu^8-u^%ic?CS#ihki=1%10?@DY2J z0|R{}d6hi*MOON+qW@9KK7h9U(dQrN^8gzDgSXGyx8iqp*bE2Q3G2-fboe^pT`Szg zVP$Lm$ksMuvFX^bD_Qs5?EbBxFCKvMdnjMA$GecZcpH%O4DKQ3=m1KP%Ad=dQZE2= zq??%|?qr@dH3qk_lXe~YG{ipN^0r~BmqoF?vx(ULwB75|nHG}?3}J2+&x6qaUDn*- z$n@_=+j7VmhgL1oY6F)4jr~%!>l{|BW8ps(h-Z0FLp!weMNifyUukXPT}qAY zZ+n-f{6tyS^>|lJyDGtwov`GmPzot^wZDR~b3b08nfmV);_< zj)p_|p{$dUGgK>-U`8-sI4!EqTB#kap2@mkyLiGI;n~i}y-sh#Vc{_0@Z5}#-Np#z zo3B?4C6|6FwpkN%y6}7kPhGo%6x_z>zD+nL=n#<+BI z?DzWh==mnzvL4;er$<=Lg{cmTH49qqSuI}?U1%OxZB`&3K}2dYl8Qxa6xw)ts6`0wxx2X zK3G_kuTm6ibG2nUbh~=JhEY}t|7;cnw8B8|OTlot#nGynvVt%vXd;zqi%b0U{{zer zjH+9l8ed0;uGIB9l!vg`ekc#wu|}Lf7-(0pub{htQWwF)exSdSnA`yRF(foV^HU1H6&Sx~2bqd4<1G(A}@7?vB>EIPdR*?&#E)yYmds?=3&GK(@Y* zM77whvfnHB(QAFs?S5K)C#`08(d!4LE0lgvy4x?6#Mqj^0$?uV{W&bK8OjRa#MGC~ zmks_I?dqmDW@g?jW@JUbx8RviJTIfvdssUQ&EMl~Pe%PQcC@nCZ_J9`HarzmJHv{Z zt=Pp~Mb5?-YOiBdf#|Ij%_iaA2jC^aE-b11s*F{@X?BdS(78kt)io05Vwn%=DfVQ{ zxcjaC%MQ=vt6oC$h~F@{SG5PP8MsQgR~&rj=**0Pen~Lf(H?@~boL0k_fV>{U^rJW zh=5lGBN&_aGBbXp6#)`X>1~utsT^Pn=C2dLa)D-n&-};Lv%bEl<_Pyj>A#-y?i2I} z`+JdQIGy!^Ze3dn%2vVPh@feO;7tDowE)0CKfeMw9f=!jtsLGa1jErtI04)X91C3J zYwx{*px;o?TjBR%T*xy|K&cw$qpf!QDvIK474Ga2H2lKXi$83OoNQ#Sq-+yk`+oKP zN%91LCVI}(&N*cl0owo%q;wFTpzJh%xO^CTyA+(K+#n@krmTckONG1p(8Tv#!SI-Y z9=PXuqPQ!0tFy<=-Tjnik#jG?i&b;PkLs~ zHHynC5?)R8tfGY{kdGJ`ARh5lD4MndrCXqI59vZg<0 zn1~%+EJ}1SlziUigLee)XmN1{N1z;mas)~llrmBLgTnoT7S~&UK9u>QgcXH{6)jHL zQ5cZ>7(Rw%b3=NsgFgZ#S4uM1~Cc+eq8Ku=iQ z+6q{NZbj%>gk&UyRzh7`I+n6yMaifwJfpV7d3!l;FGs@rNPFMn$Ug?<7}EM6xeq#E z?Eq{0AH(x8^#0KMgFg!XC_JO!83o=GyeB-P;Ta7+415^)RPd?7UA)M(UWD%j_@38} zK5d80kC6Eh^nK9xfny&J`*`1ie+&P1__t&K2>VCKFF<|)_yF($;B&#}g1-y?E^T;lCRG ztC86PnJwVKe}d;N68?i_eurg#2Yn6nHC7rPawDZ~q%U^R7dzlw1?MX0 zhoB#V{u=bxEWad}Y)A65pir-C>!sD6ZS~!>;%@4lNsTkXTYDV8E*q} zHc)#VYOh0G-%;0h@GO950XS{+Xrs3RdYCYOO+mKuj$@ORj5!fdJ`}Cbp-v#;1cRP`vilsJYQrFvX zz71y+BsW1H#!JL_@vg^)*CTl`{EOjdMhNVTz<3Ysc)tgpd(fO&DT7%lB5p^u?f626 zPmu5l6r}k`^PZ!|=cwf{Jcp6+M2KW=mQ>shv*ifThJUi%_(~uWp4w= z6MQ_u9|t}ToHqKj(Vql92}^FmlAG|fJbWXMy2xHavX>BhhS)ROj6RzwOOIsGBN1gI z$_CUG*t&?ZJ~7sB4c^*Der)(Elvk;R`75-!(=|xA2K+kk>%e<~_d>!J%5K48UtzJY zz<&h)5&mc4e-;hzLc_bD_lDk^dhyi|UkzJ=w?xj1$axW*aq2NnJ$lWf*MgzQ848XC z0xaO8r*HM7_kDWbC!+c`qP_?I9%Tnnb`Ut@I$&J;wcx3R{tu%6gW!zTh|!t>PX;^z zUKHR(;c(hA9DXv!h>S5H+6P4YfVnbYuJoC|eCDt4e9E2=PgQuTf-^@4%+WqE%_pY$ z=;ouFN7m%otcje#Cuaz$E2OSq2y%vizXARR_#E&#@DrUvn}tI6q5F8Cj|Yb9;8_QK zFZ8{%mslDROMP+xpB%s|LAMg>A`S<{;SiqC<_YvmVEd&Iybw9%ky9S|Q;p{ zG0-KKbIIkRCy@CBbv;g9k0bvi@=ro1bMVO=f)U6W0lo`-7yLvU&qf=3-pA*i66b_- z=Enbccjv+Mtz5c|>#Q5NM%X*JQcn&)dl#2|&fLhAzJW`(Z=HSXR_ewrp7;)a_V4)F zr)}bfv+vzX-Luu#{>8q;ID>JomhRL#^9ed3EO=XP{HVxz9YA+?eu*7Iy`J+H&9=a+Kpc|C4Dzl>YYFXz_t2Hbjn1-G6z+3rq<+qu>`Y@RmUeSRHxpZ}V>&)akNc?a%3zn;6#Z{Y6p zj&5zYwsVu4?PfbSbN6{C?mq9#-RE7n`@Ac6pLgT#^X}YzehYV>_u%gHTiyS1pKyA) zPr6S!x!ir8$KB^K?mka&>v_^$;=beLyUX3>&TqK&{B~|V@5`;{{erQ929>S+BpuslUU?Qv}ps@haVxf1dG|eB)K}m3i9$r;}L6&C+jBsTE+`@8=sROT#O%Nar^xx z#upAbwdwV2w~FcSI`nj1diPTLwVvtMzRqRz=;fwI2RjYip{7S0a+`f4_j%KcSJI1( z>BXzKyZ&nT9e0`2#N;0>%eDQfKk`Ne7vzol+4F4P^LgX*CY^bv<;_YxbAR?M%v+kb zBK54!TW_9ChH~z+E$_3uedZ}L&k>#y;EBA_^b-qW<$vL+qVne7>ZvCy_0)|uFwa%y z*WwqRHn9$|o6kJm;k8dr>Ph_UxjlAwY+!8gnP+HhMCuv+vu8}~#WQtHh)p(6cSAY% znGu^En`a)?w}@w1Y^8x~C}E!szaV{dYzyT_r!5=Xk$U#T_M7LR`OW@?r`WXbSo$%g zPNtu@W1pW}Djvlv##Y9wo_T7V1AwBJ$Q1> zlb@pZjSn#JzB9hEB33>AP<+@gJR{>z$H&IU#b4%`5}%oR=EN76|G(Hg@5UGZB_V2MNWkNCF+J5Foc#!n?YX_WAg zZY3E{NtC0HUQAR@R1=oSl=TzYSo(%UeKU5($LpDKGb?drEGN;-NMD&~mA4|%F456= zb5WwJ@%HkGp0SgOnDOKmxqB0R;<>RFiH^okvJ7v(#65`zV#g9g62lW3kMjQ(iDzkL zk?FB0mX&xu@qm#yJ~7FR9r4g2IPF+b?`Gw#PfUvsi*3jom6&Dd4GiC;yd$v&iMa+F zA3vU0XmnhfSdmzrSfALG*v431Z=b|x3C3%pC~?F*C5aPx_Az5MQJM^>&D4}EAInNs z(U?qDHgL~^v@9%G&t<2{lC<$<))_ElSa$9Rma zl$>CGlTFKwgyalE-PR@`{oZYbuKm@x>4=Rzb`g0|DpV0#x5iBN9Ij3 zUOL$LT&q}%{HOn$z4L*IE6MZwqxmxo%k(@m49l=A$8jvjU}!T8W}*9a(|_KdUdBt< zHgvbUdsxfiyex4Dt#B z^6lcW!;&?^nP2y?Ryks(!B?jj6z8L;+9WD&j=j zF*VQ`xL%X*ZK`QJwh`zJ46u2PF@xR;+zJc_?jG|8?gz#(LJ>JIBI~GkJcw17bPtp` z5qONUShS-NOr8{FXavjN?!XhTJ@Cxy56mHF1Aa}Y&x^n!w1z-jV8z?&dyICk2R4I- zU_3=Z9?616#3e+!V=w&i{`g>WFb#3;fgbCEnZX=(HFl+7zIP^A#G)CFFM>tE(x5FU z2P=YA-rPW(9=TZTLfbNf-e9J$HyC0y37){1VPmH^SRZU;<46DaWz@SZc!tHKV9T-g z;CbIwT;(Fhm4tX81rRf8R)bf3r-IjlUC6l+?8C@-5WE?zsA;Te4Bqx#s9E)OBBmw; z?|4Uo_kv@A_25I_@UeE*@7@N?&8grd>N*{qJ=PI??!Sqeo(<0XZsGbvHC?`v;6r~1 zHM{B?_T~ncf~&!ozKq~jC?=E;5)tQwP)aC0l;!OX<%UclM1znSPnyw?J*0XEQSwk= zBG~60t!WG$33)>PP;IC#)POp+qig~78VxmtnnSJL+z{?o=o04fJ)ab6M>`~pmFiH3 zFC)+yV)u0fW2DnB2hV$ljZVdM+JQ1@JYlc37^!XsozfgcH+^DqZw6!s=(1~eDaRwk*yG)5`36V z!pDgZ&ilmhiQ%XlpUR_^?0aEjrSTu&8X~>j2=sPipf{RmTJy%x8%->|#l+E?_bqyh ziKn-ix9QF0EA-})NNZjrt$E+2HLpnTEC=XqllO&RQsz;juM@?3@NguEVe zz;o5p`9@p^y^-+LVGyzhSY;baOO3%aL#isz9Cn@wSDN~ z+yH%V$W3sYbBA+}GsfjZFbSr?>^?jP^KZZsFJqPS5^U|JI%Xdd_8|TSq=58&$O5_h zVA>6}`wgfr24+4#_JQS9+kwjUBX5WY`1irgsRea!NCRk!pgDrDPb-&g;1Xz$xK<<` zTz0c$Cc*)8nPJz?lEI0=)fXg7$F}4YT+8 z8h&2>HqXfl&+ccT*R%5sjCkH{$1FBB?{dlR`8c;De$=pj#2TXpZK`?3?L+$WgXZnB zKA&o^rl?ueV^PftpBLeI5RQw*dN~y5nj_{&p&qyBd>#W|nOl42)pmRd*T0abz!C>nB7BojrQBD%~CwO*I!wl zopo2_8mo(+n;y|8d*&9Kf8Kkq&MWWO&Kk>$b*gt#k40fW@3cOyy|emw+KvMg`n+P} zk0jQx-g&NLo$6iHiJWiVmwJrwZt3IG7vp(#9Rqv`Iy4?z_}oL>@`+rg?94$nCwyqP zFDqjHVP5%i_4(#Q|M(COeP;dH6Na^y-mxb2Hv6!)^r^gT#1G#Q-cIiCL;Ud}{`jz7 z^wsIH$cGs0WA$PFt@`tiLFZvj;KLfw$JT)C`tABSWzRw%)_y+pgRe*LFCW%+KD3*? zC-{c+e)5gz{ywzPhqasUk%!iREdGaMvk&d{&2X4a)yKB)sow8C)(;E%IP)#*{mAa4 zZ;j_|=w7wY}%FX3?v?e|MuI`#43$NI*P zHH{x@8Gj&R{CM>BfFJ7`Kh`wKW!eoOC+y~F+j{NMkq?C;Fa%3|_j9m(V6M88K95*>5hxec{dng?eB9Mlxl$WaGqLqL)wiiQDOVkx0O~BdKpw_`PIhqoM9feqUJ` z)jQH10LO)&DFezaWmvhZ+*ihl3FWc!M0uvnDT~UAvaW2Z1~pzas>y1anyKcf`Dzh9 zrK(Mp)e5yr^{OHDgj%mQs%O*|;=Fp1(pT`grgniFYM**jy{+C+R@8eaZA^WrP7>4V ztomG?SC`aP^`*L{#b^ndsLpFCTDq2{(>S;zgZj7Mzm4wf%Zt7(q^=$+6!VqTh`Y0Wy6iJpiHw+O_BkF$(F;QO_ z1R>54Ylsu#4PP-N3JC_$Z~)QdeM5?1G^826CWwYC!ygDqhFrskLbBnfhMx*4(eCJP z315vp9s2{}17oJ~BjIbtkBzcWVEl*1dZFBS%J`X}7|$60MEJyb&iG^DJH`vfKNXG| ze`5Sxs4@QBI4YbsJ~aMHXfrMw*M8Ds zlk4zP4Z5zzbBvs1kOneA4#?Mi)gtcCY^4!t8<#StV&|G_m7c~^;2`GG&TL|HX`DWf zcg-pNb1OWzlsVO?EJDK!Q^xrmW3#MOd4gG1gzFiEcVpF9O%PT(7YV9YRP2+_tO zW04SJRE(++Yy6h++rnGMDq~1UG=A530{7wj#(yMy-FVvg10mDcV*HViWo$G4Ga<)# z&v;MBHU7f*3*kfKl5t7M`~O^elu#s;?)>)Ap$_3)C0of;3JqzZQS_izZ^gbH`?es6-xt3R%V*+ef_S z{ja=-hG(?;QnC6H4w(c)1S}o32WF3sfZ3|#VR>AhkRQuW++^zP~sJ% zlB}dDnM#h5uM{bzicOJ~3Z+W%Dk0^BQm-^BXOxzYVwCgBMdgZeP3cl@D1F=cHUwX3RnMD?hC zwN|ZDX4M9@No`hJ)i(8#+OBq}-D;28uMVn1>IlvrRUZ^4qkj0PkJKr7PMtY4pgx7? zi;pYR1$9|nL(YaKXmPKT;p=FLT9THkWoX%2o>r)pXwso|&8gIDZmqKDoL0@s)dJda z?WA@}9@kE5XSH+M1ttEY2JNzTRqNEQYrWclb}QU}uk_>I{^U{{*6u1dwfo`z)yB06 z)0*};+>hE5?U^>GEov*M-MY59ogeOBF5L!qyxZtb4)^!oes`z2Gu=6gH!Q=~-P`Z1 zU)=fbB6q3V=9b+R+x`BU{&rWnz3!0vM7ZC>GHlb|i{F5Pu-N zi#hTqf++q_{GsqZ=E#qQ6!D*mZNdlQ1@VILHSwZ&Nk|v}1!m3fiQQtK@S)f*{!%EW zo$c=K%+rHQLX~n_IjfvgE-06ktN5^EC&}wdFJuXz)r0dYk9aI^!s#S$_8CUpV`u69 zmyyHDdx8?5!9E8YUt^%$6~=KLnG|rsa5fR(=lVp)J`54xTJXICxTCm5uUCIa%IPV&pB;xSXsc$Z3kG zq=0m2SxPRBCOOUJmeb@GrC2d5cEx;f7@n$}W=~X(01xw&GYgs(Kd6;6mAc*M!Mj0e zQX1?|x- zGJ0@Wd2n#pl_(b}kK|HiN|{lnK6)mXI_qSc@>F>N?9djRPnBh54Qx2liu&X;s-VV! zL|Jwi))Tt(^sdB|3qg-VgP%}&e`RC;-$3r>IG_GcYJZfLETBtOsg}e=3wM3PG z6PnvTVNX;mLA8CtIVh*80rj{Vz};Zyy?JU^t>ut*}VuF3)R*gQ>emj~1^PwPelc9M#hDS9gyk`-+yS`^rpcf}t8$&vymE^cl3N@(a*H!YJE2u+^;)BLMr%=2?YwqTyP{om zT$C@`?_zX`+BML{#@X)k_KdSVDOZL%;L6Z$ygCN8KJBJ93AKca~{9e|rBh=+4C$jALVvQYLq?+ssmKI~$*FRdu?L6l_qQ$L)95hV$IH2RGF? zmS>-E*SQ*q1D|t zs)#5q|0*JVs>)?oZjRtGxr$w8m))hhjyM7?kIQen@2Yjxxf;wvt|nKrtCiAiu1nS- zSG%jjT5BG0b=!})dR+Y$w`6{H$+cj=KCqK zxSrq}kL2SnPx;02O!*|MqwT7EN*Ps7qi7|-0=^^4{^r}M69F5 z!=f4E#wuFzq_vB7-0`F0Q9N&hV(_nR?P3?aw01GGJ&6CHP!OcP9a!2@vKwZHG&o0` zBfC~zm3-C3{t}G=EeX=fXo21>oM=x})?*^tJh!b!IonAZNbeA$N>_zw=mG<}v1|d+ zqTw6skWJ18XY>B@;4_|X{Sw*6<)zoi*4NyrlI=V7Hr=TNx>H}FJC*pFwI}=EJncKb zL-+gBL=-2*G3>bOxbGMzh8+`*$BrkCXO20?BIJr=-TcC_$?Q&pGyaV*I-W4eYvV#~ zG|`O~REQNqG)1e(cA2(EU^H(RYe53t$M>jR$<&Vb(GI_Gk;aIP{fz$ifqhbk>EFB} z2##7k?Wh9{y4}$PnuSD1g`*0e3WwJbf;@p^g`?iVe!K9kFR2mh;{>Y9SAIipw)6BI zs5wQo@pLfK zbKzZjE*$tR&WsY$ge+<={SNBy)e}+n2AYdL(a%==h$T^mXNC=ohD5`RFobq`IIP{Z zB%|zgJiUVf<$ReQY1_}f+rMdBXV-g`X5_U-oZGhN{7d`RL^&z|JII(ba#a8*#j%99}M!o3-{mOL_bEdw;zXoSNhTL#8UdfEsM<}TPiG7 z7Oy2_Ibo>>Axk4TL+KXFdB}_43bS;oLaFlm{#%vzpX z=D{Rb0;{CIgndhjVY-wciMj+S@STQCx6DfEQkIkp-6R!LzDY7$rX{zKCRnQ5p+k5Q;;1=?)!*75%Z6Aj1F1QcI!31Ed)i-J5iI}G+YtaxE}7ISHo0O~y z5q5276T{i;m~=KfTS;$oULx5JI-ISrcRPDHDIlHeSzP7<6DW4}I|rRZU<8bUe&++{ zBVLy&FasW;K2P~|Uw{Sh6xUdGt~ocHYc2uAL0@(yx{_R}t_+Y3l3aPDC%Fn;B`yg# zxs1fUkF63^lOAx|UB|&m5OAGxohEsf%TtuU+vgmmFK{k1>ADI!UDu)aIx1Yp!Aa2T z8gQK^d5g;d%HQoXOz8`p%S^iNg8Q!P(8pa9uE*dB7|buJ^>eAOl! zKs?iBqns?K$(eEvV0yk>w1?7d-6qR&1*igEV3R{+W1Il>pb?y*bPL%S=jDrgxU#KZ zle^>_fXO~^Q@$l(~qZ$Ojaju`_`KQ2XaF+YAZztGy z5}n{W=;bN9mtT?~KQz|ahka}4%91J{{7SxW<%S`PGl z$}iGNwRm+?i%DRclBTm)sL6zIp_v*gA{WSgG0xT~Z!fr{TV8^{_Xxw06c?h&;J=f;JlD4Y7)VAC)+H)d7>T!!8#hU6) z2U*BtNHq8bQZ#J5fLA*K5oC%p@&Nml<9CIns`LNr}vbn-s1-#~v z`NYS^&Gn$se8$`Y&YLfquYhZy3!FFK0Da&lxNW`z+dVM0oqy5%5KMw;mi}nfJPY~U zJa1kCtKcQq8C&LgOAJWhx(HH0I>-XKx?kZ1iwP7Lc3O%pW{VwCMfwQv98R$K4=0$a zbZMzQTx_WW4VETLGic?q&2q`o4mvE|7Qdz2RHaKxZNX_v9cZxhz^}*B&t;os&@u!@ z3RWzm;6cHrMVdkWJa5eug50z$;5?Ql$i~Ws zpJl_cP$sanB%q81c3#|A@mX;nd+rDq#5MT z^PHv-TFBk@QNl$}4Q@T%jen}F^QecyA;hbKRjC@%dH&sFRmQ7Ip z326d(3MVcUwdE^VSLR@W7i!D;~UOt%`PO>43>jr2@w4#|8_gnO_KS&IBptIaAyudr58 zzSnd`TDRIzhK);Bp3Ry}Wx%&ULOMjgCrnqMSK<6kt6X-Ul|y!!UsI+$Ru6V=J<^So zK4ZE99c69XdM%^iLD_`m5#$t@0Z*k{WPc$IfN{$L^ku2n+9FNBK2GTVdfpn&Ux4od z&SPnUY$%)gS>QZ1S6fzDVgxiP_nCVcUvS4Fb}pbR6;|I0a7Qelh!5*w5m= zoP&M=T*kesMw&tXJnyQt3UU+U{63{S!8x+AvYB5!(v6h9jxx@%^TIYu)8RGiMQ{aN zvvyf;9A2|DS^K!`V)>CimL^jgrEhX>!*}W;Xlb2GC?vixw1?&5dAl(_n)m6)PV-3TW4)eBp$c- z7cYTTE+culk8R7=d^m>m*26uZpUX%F_puFu5vG@2F1uRR35Ge>%X)2X;1Xyr>n%vK zb%5@IYRGEH6z6WIY#zJV3gC(^2$U4|jYI9_!lAYDxI=5mUqk*H^4E~RhWs_;uj%=P7s?Y0JIj+mYT>i;jKY=jY>-!! zUS3#KTVD9_dU;84X1P@EEO-AuzT3pU|GoDc1mXS9-v75k%=`cG{ZYaC{)4Y>2(Hxc zrheDZB18%Q)$kVr+fkD326Q7fR$zPWB(uNSO*;=6n8olv$llKE`ffvPt2BY_702u$ zc8q<@jxi>(tj0{T9c!qZCbDPK&O-}Kk2_61C+X;>@|UT!HQE8l&vygL7TE4SY*&>V zWcgFRV~|PU`wR)croD(Nc6JgHHfV35WZGwFkd7zW{zFlHtTe-vzB`d_C(lmO4OGH1 z9h>Pm5Z%sx*BAd-9iSV|)4o2{Z08}vIPD2kOnPzje_$n${};3y(0hDeA{>RkWInM) zRKiPkjP}sEGVMHMW|pY;Vy?4(ptDkFC!t*OxxsfH!g1@`7i>qUsxKzl9touTS=|g% zOpj7X4^Xa5`p-#!LY}Rp$CGZR+&`zYHpu?3$(~L2e?#^X((@=cj`Z)5{?GLim>%^P zXLX%&lk~cg<;51;p(mewlBk42I-a218U0APX-wbpe6fyklEW-9Ia@zPKa^AM zkid4WnjYT_$a0U`^(#8-Ur~!%=!#j?mK)TThva#R?DbTW%XIYw zecZA#V~Z$vjE>z@^FGom$bXjV(?FIH{kn8+3fUi!h3-hy4E2edU-1JfpY0=M)Z0$= znJ3Rws^P!f!ps#$*)ckke7^R@1}tsty27uio{d!6EN?rrG*LNE$!DG&qiv)=C3_Ez zs9SWEb99wC{Vve4g#25WJ$jQY)zrqqtxeh!Zj<#J9WS$E)FK_9lO>&e=1ET?`v_Tn zMV3lBn#n%Kj?pgafnvH}<5ah^boG;T-RCrNVyH#A`uO>xhK*#ZPZG1lr?BH&etnd( zUW!X4|9Pgzs^pVMmViEP*|{<4dI@Z#MO~!c^KY^J)lN|j2S`uY^0Cpi#da)vkB*ge z);RT^Li!2j8MQ`d6_UPAp5s))Rno7JeTG^+LG~Q7{4dJ=30Y>Tw4acroIL+0J0had zNT9a6spdzh6@N)}c+T#49F0Rm2WxN45<8-QKBu;PPBZ<_sD{jci%Kx@yZJel&`Z0~ zu$h}o_u^ZWYo_BIjhV+(<`nNSy&qVeWopH9>ZKI2ze7hqd7k1|+@x{1N#l8y#)j_6 zdMtzKh#RXE0hmuM9j}tbN1kbPtu*qSq_f7!r<;#1miv;%&v|x?+Tu~}1G3X>rMoZb zvs;gEG>VpmcWIvD_*OcNB8nK%3*=u%M~ZSWx2exp*tMbuw*DK9{8bwHZ2VO7$g@hL zTJKv{+bOEQKGH5zZ!D6}&&Ym?YVIce6SDVFi)Kh4B)hsbO?S4J^d{=O@hb11d+0jqs zPw0_wtBdYt5k>w^s!0*~3-pX&WnQ5D>XwB%njLlQ4u0_+=w7$W80{1ndIr&qV*4* zn=WMF>S?_G8MNaa`|AM-bQEGPk&dIj`<)=r4ioy`WBQJF`d)YX{&uvd9pBB4cCgd; zuG9Cc)Ay*;cc;^Lq|^7J)Ayg_JI~R+P5M4_`VMpY-g5t!x~~C`s=C(R=VvC7Mt;tm z$(&?KdvClEpmh=_=YF;YaNNP!|EA|k~|5s{(-q9Rg6ijg8BMWl#njFcj!m{LS8 zjfj+w%SD=Uky1=)1d7O|DKm4|diNO;d0O@PK79Io-QDkbv)11G?4Pyw*=L`T<#?5o zp5&wtIrtKj9^<61IO!!$`iGO=;G_pQ>Ge(edy_ui9Pe)M=q7!+NiS~Ff1BgEO?tYL z-rA(^5b2>!`euWFEa{I;dSa74*rfM0$M2f-xCZ|m(#x9kujY7GlRnj?H#PWClOEKh z?=;72n)H_@y|AFKq<1vw7tQgACVip7gNXEh=6F7n-p=6XOnNwzzRjdpGwI1p`Y@B; z%cS2j>9I_DDU<%mq-Qeelg#l(CjF4XkBIa=CcTbHe`C_qnB!widKZ&kaL{AYmzd*4 zO!^O#p2MVtNN-{A6XtjbgKseD_sjA1CB1w}kATvr7rc2%KVB|)f^mFz!E2ZF*Cjo5 zNgrL(JD2p!<#^;mu5@Un|1IfxOZwb$yluhHmh`73J!wfFTGD%#<2Ors%z}p@=?6=C zz>>bNq}MCy?@D^Ql0L4acPr`FO8T;rUaX}5D(Sh(@mVEj4SGO&sFFUYr1vSu?-V>v zIliXgA4qk4O5wiJ6D0h*IDVw02Px@0N_vfgzbNS`O8SU$yhEXG(j%1g1tq;eN&iod z=O^j&34WfWhbPCkll1B&{UpJYljFk)-kYS~Ch4(B`f75#G{HZUhlOBDfk0p$t^sD4}RKlo9FG|vXlJuM;eI_~HlB7>T%ns=r$?=LLeHKbj zNGMHuKazfrq{k!2*OByc1V2X7vytP|2)!r$7)cLC(sz;cS|t4yNl!)6N0H;52!4sA zM=o$sjtTTz5>^(hUy4a8J2Bdj~?;8h?zk@o=h5UZ- z9^gH|%VcdGoC$moX{gNj$9jzLh|{7Nw4+cfv{b(u?Ya*&pU7N02%ZkDK(5yz`9AXA z0!at(>yYKv)7^0$!^3hI|ruIZA*W*aO;9XnX3{FuLwXO?o3G z^B#N}Luq6`EJSHrQCbD~D@e)MO<#vJ5_ zLmIcjbYIJu#&_A^p3L@;HizwjdHfybiMj6Q*dEeObNTLLjA^I9|G?#^HG%&FT&B4V z*aIo=0*+@3;kw0-$U3`UMEcd>GS>;z`7Go)kauKE>xb5L1)f5C=Rvy>mn@>-Nb`&JfptN7Hg`k95ZrJqBKy1CavawqUT;8|#WY`1A|g4dzUdT?pyzX1Li@{Qp8z(0c}zmM9o9rj4R z&VIp@zXSg{-w{}v{26#5kW-d8+Sh@rk$xt)L<{&lzi`Sp>>)835}wgL;_q;Mq$Ke;FbqU4l+ZoC11XU=j-HRd41Nq{ zz6*W;{3Nun>i9>He1dYw8r6IFO-dbtw+6f`saLT?bdm2ze0K$>c`DBC6nQ$=4g5tQ zP6fpa;5b=QP>%Qzi1%V;JrL)j;$36DaHo)JHU1bLM#NPOT=gU68<0>Fd6_Wjt z5|TJhgOvxN#i>XV{jbXYJO~NSA(eWh!O5qBw`?5XDgvje3f{iOlR%uTsyK~NjsizP zegrlq&)!j*@=GAjPE}dv<3Q9wxf{9UxLgmu0V(B~SO{{QglMRP2CGmo`l_@;SfCOH z;*?myh>OEOl&{EB(CLui>_xc}9DSQaiTeL-ITjXC~qepd6{! zfxiag^jmxiL}>yuH~9qk&w#UlIPDR#U9wNU4_pO=U5R|;m2>1y;Jc`goLOj*_yrIa zm;_JCnxp@9tPdK_coo!8m1_>HSC!{Tv%z7R${rwEs@(@U&U=+#0dEG*0OFikL-`6? zq{bk@I-^!Yj`gA=w1%hce|k9!6ZjcM9&^`w_##hCOoDNIYq&3z-g0(`$d{N}rCD^dq04-Xl zVdYok$)r4Wgte=%G#6&KhHon>)*KyUs$rJtXr20JAZ%E}iXpK6=&&>u)}`XRi7vm| zz#3K9L-MDNZO}eJlQ#T1^Q4?R<&a}Fpt-OQ7Qzx~=aZSMvTs*A)hjqw=aE1^z%o^=jhY-CIcGJ;61aT1)_sh&W2S3ZqjaQqW3H*NA6-7< zR-tu&a5&x9eKpeH`Kx1|s(7C2L%@fifAF-@G)o$9RXr?>v+;>^7|>) zc^x|wwFl%FH4SrLkt-GU9x7HL?MIN`0RBC2Sf+;YQn6Fg@QhY5r!>qd4Rc?|H&7MN zP)%A6c0LK%a}w)S5;HCVJ4xUhWdb`BP4))%Hmdwqg1$SUcQUVs6CQ#hKf?@{t@9A+D#~;F^zwb;*p$B4@5HY z2y=eRCgk*<#D9ao1H2D73HUbhmZF>-l<)!4&x7`tkgJfKfTRleAfL38y(`+2iF?55 z9Z(ybe2Ka15#W!2W0A&ywgV(nAZZVN8#pBK^N_e8d5G}A!~jS%V1Vzdk6}yWko*OF33B!3w$oFc`{Z`?^T+7P zC($S9c~^hnchHCDSWEj9mW%{m3;6|4#`{KGr(^Ip8$LjCI18R_aJ!#t^NgC z{R6b5$9|C?(k}`ca;p*gi=C02e-DE$?M~PIFbeF69^ za1Os6bMGmu<9|n*+c;O8N>)3#uHiSep}U0AZJTU=b5e=yq?e2H4GAN%lnC$z|N4{zyZL$z%Wv-Wz_x;O86nvr>PFkw=Q2 z9eyq$u@I74NIC(pguFZ99PSNeGww16b~uO6?x_EN2wnty2KXf8VSeY*ZPaHHb5}8N z7cdX_4)C{>=5^Y?@H#dQ>s6G!1yEc|7e5#vxCaYv!F3qigS)%CyK4yU8r)riyF&=> z?(XjHJG}S3`qysNR_)duIj8%0|7PmmTYbCfF7!8SePtomg;JLv54$l7M!tJ5P3D8v8Hl##eA6En7W$(bSV^NA=Nsj8ucgu(qz*9Y(8oaP%ELJ+{PczX z{x*P+oD)p9mvfrXxfRXQnL>m&F0+=g84*5CL6$f4Yu-}gCMl|@Un0h#~A+bAn zGVGv@S*<<<=7l`b2_luOy@ZE?*FZAf=xZNM+EYuKPr}hQ1?{H8Of$pLi}nm7zB4cM zn>Eq<@QCosuxxv#wa(?iOW?7ogO~GLZ*PVNPGw)HquQ_iZH~-7jr~x*FK%3S6yHx| zE4tCHfdiso!QaC(H?fOW24xkfuUY%xSAdmAF9_c0_pC#u^aL5I0IWU-31j3VU}7g8 zmFE{*e;>G08ME`?Z*ZrA8+Eu!T;`r~d%DT327j;^&)RHJKap5srVZZ&^@bwx)vxWK)D4#Q)Mmg3Vo>QKZv&F@R9KAHL~{CZKJ@I{ zIS1d&`bKj}``FTsO5$}W&th%g_E@JWX62=AU%X=5-me|~ks88_mK=Su&3zX0YRt9vVRReoDLiBrn^pATu_`&P;h07q8uq@UWTouvkM%#Ld{UiKbl;e^&p@$Lujb z*kwz-lh|=sDGLvFH%Z~UBtP)pIb`Y2oq*i?j18=-^`b8*6m)z&EF$&Ry}9TXL;D;= zwCS^d>W;6H5ySPe?;HHn=MRz#Nu4**A!N^3;(c(waQKXDS8y(~4nU^YUG_Ou1zF1v zM`o|3_Yqd1{289{IqosM$n2YA;%fq^IA0S<^mJ zI=)s`HNK8$y9%~~&bmL zpO~iOkDZD&e|m7uUfJ|Qum*czzR;PdF@7Upy@OsOmV3nB*#DCRnxNp(FVP9wOD1Lo z;k0%GjOb3jYHIY~7k$c`Fp=irYbYOtIbYf5YD+}&2dsyC!+rIjjQ!U(VE!eUrZV@7 zaTqP=s&?(zL#s%S*s65A-zO}d=j$U)QE`Z>;2I&-I}C1xFiyVHHJ6NRzI){KM4A=n zw@QbUL7M;)9heAk(5jPw3+z6jPlu`oD>P~jo9-ipzWi)M;t;aXCZGEA#`gaNL-d0Y zLhHbn@u!A@R&uYEOQr2cK0)xpaf7@--S`FCIiB~=v;0!68rX%*ew+)<`sZ%GDpen5*ukGR6expomkZbh!~;9f1pyub{!CP|p8{$w3WZJNY2 zcT1Ap22M%LQ+P3Sz@NHs{#5ln7qo8P_iTAbc!4du*Iu#2sPlmuYLA3pAQ+FozZ`kR zb3+35e)@szeT%Xk7!9ERIt{OOA2%;J+;F)Rh}^|9aTn-NWITB(xA-WF zPL}&_t+DzLnmJmxeREL1F*=#sxzYap{)Yd$z^bzpP4V7&*xxTCAP!odDSL;COv2Xq zCzBd>sg%44O?bTW5R`FgjpAY{oqSgw7fwZAy-b#ALHJ7 zcqetp+Rj8D7u{lFm5b1W_IX~ca8Tt_EwD_sjIhkL{B40i|A~8#ggD=F zrnpr$&v2$?^iLP+hG7JPeh)JSHI0HWG=id5VyuIM5%?-op|>&^fdIk zEO~~^_D1!mp5ph+^;sC;8r=2})UqvVo}MaAKP}WO3@vyVXdYA?pJ>eWu{!)P=r)vm zILj}OEZ=(RnNzf}fB+Pk^sp+NS>MSRET3v?u>vXzMus`5(-u??N>pZKubVM?FqdcV zY$||HW?WZejac&GitesUt)cR~o}JQ`R+hu3hIg>f+1|+u_N^&5rXd-1<4bhCehYyH zW8+Ig)jBH`O?h(Rz%az$t`OCCy@jB8iu`4U_T=kn$?{g0?V%~Xi@o}uEY@%>q9 z#q32*FiUM)cic8Tx$&)^meLJe?FT9Z2KA|1!QFx3%ZuKj>Mm%Ji*odfyi#<(?ao>; zH1gQ}J!(cuW;t8MEDW)%l;@~j$X$4qOSiaJ8i`!kT_Vmc8!sM)pZjnOJRdvmu%Fv9 zHl7tSM0y%H$&jP0W5^s!(=p`_d;Tgh)k`Ffyar-o*z4xDv)uYHvRu2^Fvh0#z^;ZA#AZ-q<7F1D?NK&SlP79>gVI_qtWTFWr?Yx~6h5 zC9+qqglfqU7Wr5hX@)&>SxY1+_c013;{^iv9aQIVg_CMUgiwkm=PomT)FY2kZcTI?u5k@1s6#L@UVG6g%$53jl;xl zr);_l?YL)NjG#egVsX#r4qV5X23GO#U6-M0^Vqss2!-xzk0Z$+xvyXQQeMCGUaL+{ zU#ILlPA)4+Yin5+rkLkux%<2OH?6gGw+Y^`iKgcDAetnoli4s6vJzrKj8cqH^hobb z?@j0a)v~U@WJ|M6whfWEw!XHu4}Y}0o;Qej#C1%2Xn4$QR$Q?lV0@QcF=0)S-g2p3 zd{IIG%BB?c*j26{W4XFFr1>&BoGhT8P_q%q=?8GyBH_ch%U-L z_md9U7pQaOrK^5vLqd;Y$agNcL$FiOpA`l@^59{DiA0-!+hJ$62_u?|6_s6=E41~uN!rYH; zK7TPGO2Qp*u6pPQs6nD&n41>x2%pA^;D17p(Pe}=TutwgBWmZ*&@`jPw)|=7Tl&>l z5ej$2+HF8hr1)rDt#!pc9OSD(un?ujGz_=#0 zd(&YqxAxau{+oi#458Xujftkc#GS;Q@uH@XErpl*^!&x<#YcEb`mstV-B{V@xfil# z&fph&bPB)8Gb$gIF>$2F?Q4onjRaZJEJwzDy*Hzz_9s1^)=0u6JDJ+!8jo~EUL~E9NLwgd6n%m%7>1TZ zr9fv%Vbs+jv<2UpscZsOvRBHRm#NN%Rtcnc&IVVrJSp#Cs<)d*n{O(5e0B-K2#B#E zsS1uCSA+=42XE%@R`w2Dh9rsa5%!B{_Bnn4&T)oXgY3L*w9m*}u3pXFWnEW(wbDE( zon;;<2EaQe?a=2P4{1q=FGOh@EFONT znJbuM2(W&QvvNVPa%l}*|4Q>KLH%S^0$b*lw&mpCfj-R?;1K2@Y5Gj0XS%51POH0h@xAiqgxSzD}=85c3o1;)J_gBX^74Hx59R8J7jgq*F z2*0Aw6_7%??y;zjyz|5|^O+C7bDTDNR}p=#H>uCi$VO7?FLj(%wrF1}ZZJz!sX&}A zv|_x%#9Y_4-9f3IyI0%?BEpg3pAIGB_?SxV1s>1LA?vC1SJJGMOMQ^(tl0#-W`43c5e$jLW77GJb{y0EZ6XMfJj zU=8!}6@c)cU4F4rI55lM7Dr+Ng3=Bx^iT~XDs|&@U}<0DnqFvbl1QgA zV=C}49Gy$K+rWM9^YJ&qJ@P8CpnI{&{MVMAsYuO@b@R*e8ZSk5)0gDw6rhU@XRwdH z)QhZ^5P%*5n`yDYSYo$MR4Jb|M_dBX*X<*oBH~vq4Wd|x0m7~L>UipgT{VK_w8I|F z&apJGe&6E95G!REbU@^*6KL!%Qwy3|gSt+U>0o(wHR%M+)WaSRR%w_S>i)3l2Fb~Q zxN?_$40i0|ESC5|jfP^hZw>`-1|-(T@>x=oW3_0-XTYb2Ynh8{p?`e@ zG#rW>e0V2E85ga&Dsve*>_mh& zf2zHDBXbi0C?CbJ6j%*-w&&=yexr+V8Nl^d^SOuTG(@Hciu~k7{?y(#e3J}>w;*(& z`6=?ql`TuD@x^>WvbuC?=%VSU`W~$hU2o)w5z%PXiQpc)V>*c49Wlz*kiAw-#V`r< zA@5!M=r!^QU7S5R%95}L2Vxi3MVwF-VlzrT>E5|qZ@p8>l6WQ;k!5>oaMLXwe>El= zHSb(@?C}|M>~Z|&i3+MQN9yXy;-T%;$<4_~W*;Sl`JJil^~^wwcC0}r&Xm=4Ft?=9 zKxTpgfydqBvb#yXU#B5Wk?9`fc(T#}@BEPX^bU3m++t+8R&i$T$!jJFb`5eta#08~ zz9vNd4axi8*3-&>4CNSQvGb}IDc*#@cNsaQFzgcwOp4yHMK1tc)FP~hPyRjgR>g@) zUSIJbF9i3v*nEI4n}u0)m#aiAx%#gIndvffrMK?6?whtO!_gcZZhanNr(^MB@y(jd z+RQxD4hf%i%T>t|!YYF78DtG)g`*-mk2APCd)&Um`mWAOlJLozB-6?vo$$sk-5hBK zH3qr0r1B*3nm&zgIt5Wn)_sL9FYE56O6-8(Exmn@<}Jp3&YDOoD_q`9rZr2Z-V*D# z%*nl{1v0?wT$l9&+vHh8wRJ1j`|{H2tcD(4p`3Mjm5SzZna1D9In03ub*}Q&fM*zF zjo@;_IoM<7K*u>q1N7RNvYWCybrTgxj(p50~i3m9{wS46unH~UM1sJ2_t_mkkb~i}`L}ZFEfc8@WJcZDU=x%eyTCNx= zg?Mda8pcpEL*X`tB?D%@QiNS<*HZ8ybgcmeJBs+SGGu z>H|h{u^0g>)OBIK_$q9rtzWHgtA=_qRAft&nud?8wMowkU#Fb62ueGh{hQ`nCUGdw zYHb=iXwKgMKGMGbR?2y|?TaYIfE38jYU93yOZ8Dj=s4NMczBZkdVsrwD>09X-v7|| zO#b-WYudZk%aLj+S$0#Bi$^4{$8sLvC`9+% z9U)pSLW)$eIA|RPW;9#X3g=RbRJAEmAx|kEdsn1cjIDyz;}@cqrIsY?#P|UPQ$$*% zL`{G#?m8xZ1aY5rZk~N3Knp~Br-+Ra)46)_`qR2q_+@(LBz z%|?lG1=OmFnjztdmZcUAr-mmrZ=r=FQ*8#4xlfiq4-8g~ltb_=^I0Tz^zq!avDD_T z19A^>-f?&ZttkkwNUVRNb)(Z{9Pfq+rXOFh)=XMIG!1)oDy56rOQdxc)i;l|fv$SB z7lVAzyeC*2PW=Pl8(hYjjz{s)aYowrz~CbHhAHxMLk>$(nxFhZER2Mj2pmqtA&%1TCndN8quw7F6Gl`15m!pl&;Uz z-+Lo2K+6_1?`Pny>7Ku}zUh`xS*WV2x`K||Om*c*%w=4<211RKPjw65&8h__495Ax z@uM)guHLzjdp5-!hbFeub8o1^GM{Z9Rw%NL-kse&;6%}7Vr4`B8VBNjPNY<#REix( z3)~NQOXY5i>!c2pg*ydL_3DAF0T=eI;TN39dcmf|Lrtbk|DuMK9mDiQqp`LNYmif;;67Zusp0~ksqu}<Ci7i6>{YWMl0*gWHNFXo)8E(vOW3u~lY1);YNbk$-UR!BVtQ~FGEt); zmfcULb;P{CO8nd$_UP7F6avvjLpb*k58OnuG45H%;4g7T==bQl*7YjG+;L$$YJj+& z2~jvi-Wyooh3NU9NZgZMLiTn|1xDzmk&Xjk10F1@7umU-tKgeziE&}-!y)85jz1M? z{s9lE378M2Xx3+}jX1i(qU-((w%}mvQ5=e8@_PvJ>f!R)`U5`rB)QEtM3Ki4DOf9vY(szn^*=m8 zO)Nsb4z`Vg1Yvpu1LHG>tFhTUM`#v(c+4WRc`rPSA{H21N%|2=Ok3FOeHvd$>yfEU z^_-m8qe|78;SPYRkWaFDpN`v zGY)NZ-Kf~C1JicWL~e&V-~1CHh9_V6*P`xpR;>}|kRU?^#v4VaXMppMwh)TD&|(>5 z5iSM%p*jhk?O&9T9U>`=?>pkXQ&LvP)C!+hZLm>YY`{V5|A*Kd6Sw~rGs!n>V2`4o z!|{JHBszws3M7Dw$(&M#`@(&Zm`W7?>(dv?5q$ENzz{jFm|SQb2+G`Nfu=^`JHn;i zaEIW8khV|!gF-hrx%SR_>qkUk$rM8p<64>4vcN-iGWStLo6 z_veEb_#iC7rTgchyh`(J8KF$P0Qkvm!J9W$`W+e$s8V64QtW-dfpa1reAP)#wfziR z(6kVE$<+zX1UusKK=8Fdmrr(ZKz|e&iWU1mwK)1*E7pCigNn{Zc1VLF5D1R14FcKKI=|`j-1b7GNPuaDq?=zd>_;_Uvmu@;AfSAWZR+ z;)EQr^Y7n8^)x{n&B9n&(yU=OMBVWd0GlK~@f^|y#M!lMAa3*XMpZfiGouQ(I>>3N zu3^$AnlvZ6$ND|h*GSidg3$jD3IS6aZ0@{v%W#$GMps_NGCRoR z=~zTdDa)U@(M?EqA5rKW+y+r_#|feKdHzCdYk*(DW}L8e5bTD1X)r9INX+cQ9E1(y zgKvZKp(;arlcAqPeur7Z%oE%D{9wLw?}zyvXz*pjyT?l3`->NF%OQV`qXrAMC!+x7 zCBK5XjQP*wep8k__{b`->L_3JTF${|;0D?T(hm$IK0vVz*4$3|&$6QI#KAI{b`I}9XNNz3Uk_ofsZFk-&m<6|V9$d{;reR2 zzyO+(!)})84#SZ@pmmWWydsUWupB%v9mO5Ea4pZceS=N8i|s4pmUUs2D<&(*bHmT0 zN9Bn4d;-aysg3a+Y15aRbW|J21%C^xQx9fo?e{81bQysV$qm-DE;;FsG( zr>UUv$JdMJNsE0uRFJ9>^U@Bf%m{kMj@^|dlah!+o9F^<(8vTEBVTL#1XA06GvFG5 zGCiRV;RtNbR5tsbF<{f)GzDYnwl;;ue9EP9= zsfiBSwg6NjJ|lycy<6#4P@W9)Ccf_hHT=y_8sDHr;-V4)b{%O2Nv!%MaE;v-f20(G z;z7D>@fL_a;V~1&TtPRL*e-da?}P)CQT$Xlze+=2AAMf|eF-IDuQ!mK^Eii@3*$&9 zrtCMrPRLMKf^1!+Vc=0SgSh`bRIp*5i65nRVZ+(F*e!i)rN!7J4`__xgJ1j1f%xeV zd}9X%-(~H~CXDT*h!}$)H})Flq4K9zcCgM~b5U~+wWEM|rhI0iXfz2&a5yniI}Cw* zepdRw;$sCFY3$!O1c^;ias)*_HOtg7#t28n2^I&+Rl|4wyq$Hdx|;ZdHO&=P&NxW!}GFov;?Bty+1@kRmN)hc9LLj&6Za@dE-3|Q|Qd<9VV z?9^MK84An2g(cS8k_aPqBM-FQsCkalGb(6PaH)8q1!}Oovbu?qG}n}KNem_9`;6(# zlBwN0Ew$U~{x;~d)<;yocZi^Uwki1xw(^SYsAIy&vWPGGEnLw0DEA!C80l#+SS=F@ zRovXzqYN#KD|QO0Agj!!sM7jYvxW;enCX5>{SIDR!skwOpQ-}q@(M|KoYh2%zM&y# z9nAQ10eSP;HRtCk81fdlxa_(*aZYSkwS26Ay9#EA0XU%X&(DzJfloP{rEuut-(~mZ z-jj>f<-b?s+{ptbI;5i!KF=1b^-`i?Aas0Zgs- znl#Odc?v4JB&XyQOpHhscUy#t(Fp3e)uBKn~A<*8}!XdVn-rDu-9zUtukBp|{2gm)&=v3*IQ{J#DWKOOoN$gG`^g#c62Y_jM-Abp=o5ay#kDrH zIw3@}LBDj&jCjc-iOA~~@U_6hN5eqbC*&G_m}mY=yY($j*<46XjiG~mF_}B_t2?Zd zL4Q>gUzR$jp`0qj<>jT%EWh3eUku(bTy`gC1T)Q};y2Y7S=>1I(tsmH_RG}%4DkwA z<)V@(ajSF<^I2Q7kw7-mhVqdtOG)k24tCQ%v$Psl!95%L*{^a(sf1M>(>08*iLac? zfz^d%mkyU6hS0r*SKTUQS5FNm3VkK6#$5dsW|p2VENm@W$NI+0?#W|3tWFiaT%nHg>09c=r(WzcE#-5#a zHqJEC#DA1?p7tZ*jyTPk(U&j*@0r&xjMg?SQ=;cOM=HFE^@y&`2Qt5$gT2<$Rxa%8 zq(0o}yr#dJ&>(S(C5SbfO>q`q89D^789a&PT))ZAB75TySg|u|`4gN}_pD2+G!v%v zVUe1Y2^;l&^Y#o(&bd{1t-ZYxsU@PS{{6l+dyWXs?(Jbes0H&})&3qtd1?7OAgHXv zjL`CZt=~pozDF#k86FYt_C6AE<|`%^?iUO+oHQ7fFwpXCA?i#3HskWp1SZt2JSzC; zcaH1FafH#iwu`?@{ZXY*V~D^bMc0P6#e&=IdM)I&-|`NViIyF;&r#D|YASjCxYeEn zmYz{Mc}!ljIq4-<@1ja-f>Vo);k`S&!%Q6I;kGyJHox2LuAdw9l>P7WRl{d=Ivo}> zU3G@rtri<&8_9->_LHy>OdGscF*-OOe0A#mJ->@Q!t+3ru0mjMlQ)U;?c;c5#U}KM z(7!N-uD;Mc{&11^H*vX2g6o35+&)9K5fw49cFymMfIG_JA~KV`yKa4%0W5bEo}U3E z?Y_;#5|ds-JwOEnC$9p!?LH#1ob-%cG@KFGa$l0z_4%x5g#-7C+tf) zaU5mhWy@JvL=7+%m{gYxE1(-A=RYE36sNrkh)(cU$xL~l=I?V|y|_NSEhm|#dTn*- zxEngS5;&#q&XSWe!Wn4`liM?Mh2f`IW^vJWpXsuNc7_>>h3gGZ;CVO!gQLnIxN_4t z>YlQa1-}Njiyd{E!G%`r)+WWv>_$};n{zubzFn60T#ls+iHTY(c_n>CKQEkEP2 z>^s%HeVMJnzE>FQ)Vj{JeSvPY-{n93F}coH&nK&9`|Vo2jQ1%bZg_83KMr6iyQ6Lk z$J)lMb63Bv)T+agg5z>)Fhs6v3)4S>i8L1-+w#QeVYg2h=eGnO*D8In;z{i1Lcl^jnnz`B-$5wiH6H6 zS_#XicdGPARqd8Fh1Us2x6F#fxZO|ltc_bnFv(V~d=%DoxO^wf(9x0Mdb1eCQ@ux_ zML#;DaKNW&5wkp%@(ce2V;mg=gB@CTm9q^Se*V~aox#f3Lwo*DJm~3d=Jy{L=t>(S(JV@g=(J$O?3EdG| z$YoRuDOH`6uG;BW-OBKg_OB7rcOjyCGU~q-)0g9P3f}wD&KyPOe_C3c>_JS+dSJf((77p zx)skHlqt!m##deVm?u9Pv#CpQZ;Wh@o`jx{kDsMUh`Mm8F7AMhyJ=PXJOvHkZDQ-c zqO6qjTk-XFJY$LTny`$rWH74Dx#_-|<~ON}WfECYhZhz1;_+r3v8zDp_^K1_I3v3f zU8#GAk+^y7(2=;GY?8I2K|E%&&${ji(qJUtaYM12jIvN}oUPliJ5bfyn={3^!FC>dFAhP;zvaR=XiLr_fW&V@Z6-;Ro> zPmgA~Nv99iW%5`**?%91RHZ`7;<=`Is@~3Nf)*T$;|c+Lz1B*p5pdgqg-V5^8q(;|Q8zhTyh2)SA{%*;ruQ znTP5*v~^JL^KSQ&)W=EffyoI`qcKoJ9-dlesy4Rv2|-;_a1O5B*Otwe(Uvsk!-%i%XjrhAzG@_zexWqAi~v6|={W7GkB~Iov+< zy{7gx$E)vDGWWd;>j&qSI~}gxQ&B)2GC$$jWZ2a#c_EBnNE=nXV_!R()^#S%w6!e! zJ>CC3z~y0h9Bb*3E~^-Y5uTQE%4@cK?&)>D4KdvxwCK@uVs#OTmsxLMZoh>+tNJ`w zVsmOvjF*45TTTZs3l7lLRV;bt8=BW4bUlha94s4`bTmHpn>$l~Pf!O^pRJXgplW{8 zg}P*C80Ax0*NMgddVAMRcHiu~yv?jC#ll_h@c=#EzU;Yk7=`b%^$>z>Woq@l5FUQK zuXTgI9(%{kS*wxFmALP26gr12BQ++ZVsi{O*7p+YvGjyPh+NfhVk$zDpImwNR|2V! z?Hl^MrX{X1`0B8vCx8 z?f-h_HcsQymi5-j>eh-YQ9WpDF%TT5l6q$Cr_)`JWa)HaxOHE+%3*^4R$@8m7|nGS z?&fT~t~`ti)?7BEo|uZ0zW`c4-A=P-gXRpefyECg2x<);h zfvw5M1+pakjjnJmz*xA!`A?^V*z+kyz>czC6PTmrWOx81FPysDOSES>^Hb+D1kMYX z^b6Rc7+&= zL8){$@b<;8bnc!#?C+*4*6=qdS}C3FJp9Z~rocKMyl~NzGD4{oh z=U+o!@$Qk|YjLGko@l;yE-{>Ww3_u?nk!>PHrYQ5@@hpt=mA+ORnxjlid4X$Ngzhh zTy~#^vDffho`!2F44Cc<$y1Mhb#j&Z8gVE_T=BnJZTCSeCx5dY6tq_q@JhVA^^SgI!P+jddH(!)h1~JkU z5qMjqv`Wk?6fN+v8P{KD$eaDW>!vCjGqy}D`#&e4X zBV9uno&;tyIQWa)`REVTom2`O{w}D_xjRH^I z{eJInLk_QTMw_^v>*_qa49&gVrTS$*T`!i1OW&vUb>Q&mHS*iepSgEYF(f^Zxe>C; z_-r(rTiCZ{uC*41D70fh(FAEkap~UohxB5RrPC{))v-lsKXY73`UNHV>ZOh^Aqw`T z6!KB4%{&icCN(%in<hJ5pz^>DJxMKP#bK21oi_O57}iy%hkDu1oz$M#V}c70#6K zc10HzyWj4#k2~9oon0LPL9BI*!*BT{;{|88XEcAgG{Q$021(eU*NdZuoyYLg08c(Q z_$Z5wM)Pr_Ps3>LwS{Zs=A(By{Wvkxy8Wc^i6$>KtE4xt<}FUp%l;wzBJt zR5L_WdE18QLM^d@2n1^fURh1;Rk6kk&hb;~pan8@_>+OtQ$hBypD zyqm1L<-{x&+t3+~8xq~dlD0j6t2dAK6Fk$!E`{@6to3tEi(I93kHL$%csfxLO-Y@^ zHoM1-9IRkM-m_-27cmB!Unwm^*YL|uo!o$`NtjU%ZgYDvqnWeq>Hh!*Eg~HZk4(Bk3H+^pK9P7D_j;Z)Z6zceEV!``F5qXRfSi_9oe}K z1@B}+tD~-t6`v9nA?Pt|R?OL$bXH#+dK;`Y_eT!zNA0cGU)M#=3%pjY3fNB`7EYen z>|uv&bu}wwm2j5bJH=?eof*G1-i@#+P*n?xiQ{#|65q->)nHPQBK0BQt>%E$mUW*gvedPpIOkqV)N`G-mSQq5`&4muY=R z@aEJ0G8g`oHbk1hLrQD8<2!A7drG*tw#3y)x~tO?w}EVA5UGhnYgOD92*T81Xnj7aquuHB}o7VEV5+ z_zk#BWpfXR_)B9+9dvw#&-rrwRAlzPnsFNY6(0j{8y7#$?1f@|B;UUHhr0Ovo+d}B ztl`W3h$ik+zL$J-D{6&j+(~+d7w3_TXF{M>PG{44Q&Qz?o4?OBPovR$Kz&{T#V}J* zKeqSs$;F0HU|oW#``h*}`5&$1&)o5n&BmK&Tgz#wH1EtZ^|JPBW4RKz1(5zPOU4w4 zcUCjksrgvV6poig(?`3%oPEBTYw)FD*A?uy6^?EzM1MBFadcAyXxI)f10tueNG%BF z;Z3AujnyJT$^h%M}i{;!p0u{5#q#6nLnKI}b*O7VH(C zOCc=@s)2oxsdmgR1@f07Mb5+UMa(P*K~2kp1JF=UX_hSK+;0187PkX5=j$eC2f#H0 zbsd~?(I|?g&ixPusXWgw3dmZi!|#T=V{f;}wySeH%qk_%aq&;D5G29OZ>0eg@!f=j zWrG5ZjP*gfy>D$KrTJVsI9{B;>aRgdjpx}KJRMKpYrRj;n^mclwYjUy5=vP4l&`ut zPv15gIyR1;SyMe*jUeT<-@#k)b?0wSz?{0E>r9j2Bd~Ty6n{z&)LW+?})=T2C{Ul zo71zp9yo>?|xqtjw$+j*SJR#rh%3{^1Ys5e7R4sErK>^8R6s17sKE z1w;WDL6I`Ee~17WKfE!2=m7vA6D+K3Ahi$kOdmd(|H*)f4FKw4VgWh+Ck1v;3?TMH zh@BNA#PN{?69r5Fq2f_eTO808kW6OaM^40MHnIRGxzcl-|Ee zprHY2fl&1S<*iMmP4s`mBGdoD;s^d1|M3Ab45Ao6eEsA1zmPuKK&gBL{*m2BF8^Pg z{v-2Ikq`a<4iM7+!}tffKXCmYqCc?wuYCXqNFIdU|4{pZ*AJxrhtYpDK(PG4<9|T> zp9fIsK-9mVA3j02{Ac7rfc*Cxlp_dxAE5h(2chT#Um%MtABg%7N}xVaVL^!bA8J0} z@`00ocxF%yK*;zH5}-a17XEML1%f>P@2AcJD%}5{8&fAUP`XS^Z2w~bpahxNSb2F7;r{!Fa!Wt)hSftCUUE^s zPfx-76&`?)j3n3;v=V><3FLqUkMrO7EYCD3g_MFvj)o);OX+7X-v)^svq-!0_dLkcp4=zKv&TG39Zuy9_1-SgDDLy`By3Vz{#~ zl&{DXmn6pkj6C!gjhI5@1c0#=UBDs5siN@(1EK)lU@v9qyjo-7g~lutrkbB9hyJOL zI>zgDNtkF%+qeRC!u52{k-#`f(@2tU_8~>hea)s@{el}q z@&Zup^9NcBU!E`YVDDVJb@M2NzTn2SX+Q?jd`zrt}pr+QI;*p+@umUv_FrnI_YP8PKuC%3ZPixKkXvkkkgQ9K@L=yoKy2!6iJCig~ z+LalmA>Zftzx_p9GDpVa;vb)#A!yHSRN|67EzzB#sQRKvD&Egs{S6f;PMn6iQ&4?8 z^F>`zQWXt!Ct=SmZV$j`n%@yMKpQOP1O2QurG7W+wzyYs+;?As40FNF$i;_TiFMx z%h$@97m_TW>kmiVJ@7S7vJef+Jb@LF?(TUOlla@F>F$hA49d~X`NcVZsXos5z9aan zX!^mY@;Hgms53<%w9LyZV5i`ZZQ5&l7XQ?bSA1RjUcRsJnarJXHgC%9T6b(m70=2% zukVR({>MYSp3RNrw=ao%g(Bl^*EmPV7ONFU&HI_a{gcInsIZh>;{r0^*b1}v(hRT= zU6!7jc3^zCem|Ype9OGZM0NltDx@|TT+_ck=I?FFNciY-aBEg~%&(=KH_!KtL#m4-+Hs8dUBqf=24#V8C5nxB*axCUil^(|yrv_IGkIw0PV2`hxH%&WzszILdAoNkRz3C)K3q>9@SAS6C)1$xMqIhfB&|5KLXQPMx3JPZfyGx^ zSzmd>@`ra>WuAsOPWVBoZs>zR`_?Z}CFS`Upi2u^`TqrqKy|+b1w3vu9X@*jgxwh! zYhhPv3hbR#7(@dLL#e};Vr}t7S@XC73KTX64do8CNuZf8D&I${Y67i%DTM{hFQu@v zs(?Gl`m&Zfwvr702VLV7%ccx6j#M%~e1q~aDI*Fy9j;nme3iw8^;xSpED1h)CBj!} zb+{|pELqLI(I4P(3F;HoRv6m}*20*LT%TxA9Fj$@WV3|=r3g1`-b7TGF6B)1Sia_%E#_4!VAz^l$TAAcu%610F{t*It#(2>)LM;Npyq3OC{fD{qKuN79*LKk`GZEN53=LaN==W3 z*Vu)%24x0b*1@eu1*{FIF|(RSh{Qg9u}Yr02n)GANNYFPjrL$EL_$beQ8!8+fvu5} zO~EvTTDs6@nOfXKJGzW^%T~3gvVdg=ELmAcUl}}SX8k(C^Z39kei}i=Y{#Zxg8tPz zEb>^wx{R&Is=^6VsJPgQys)yqv#OH4PskvwUVKp>wVqCVlJ#_WtPA!9S>1U)lQoZ( z=dkh|Uan*1dh7){h+?wYYq+z@idQrCkb_t#-+eOcQt9#gXF3vgS@u^ZV0X;{W;=XF z8;(0|;u)};%ut2m$-a&nC-Z|;I9OYK;*^?7>{QVqOs4pZ=)oB10TxAS8@2<{s0M4z zX;meP@iOVC^i|r}Lmds3>JB#hNU`-)EPmdnb_9D+N@Zt+HC=rjVfP0mu3%(@Z=~G8 zODxFZVU;{c`e1loV?|X>mBQCZId(TcK7uT~#EnxxGs3MR$P%a|Y*=!lDI~~eOh$+J zV%n6<-a^_$ePtzgUd^ot7~t7vp9vm~=oiXB14IW^Q2}slX%8v0F2u`} zIj#1jN3|~`af%b?9dBHprp4KzE zq%&!%+^z_?4~xJ%ut0%>f07r{#bQv z({$7*j%qA+bXZi?S=7lswAMHSVGBIT*JL}cOY94Z#e!y&g%7K=)RB;2 z!EOXAwfN*}b+V(Ngpa3Ww))}4*~ykT!Dp9Bi%*ZEs@CbYCg4o*u{RKpKJ!r%*fzw{ z*=g7TK1XRt+p+KyJ~b;ZpDlD_*04c)okO&2DMjsR_5@P2orTbF>$`lC&f%aB^x{~o9z(qDVbGIL3r(0 zRTkD-@kuisf$34(Phm|hV#0Vcl&EJ&V1IQb(v>Cs&!#!P!Bfl5wji)cgybsH?IcNB zPbx@`v;imEHub))-@OI3l%%k`bPH^6CKb|J@EWvHBzdzjmY#vAN5FZKgp%8n8_&|s~Li|~X_l0;*h(8JOM11*3oUDWN$t>}?1?#}hc zZ;Q40U6={LJU23Jd@y&8>e3O(L-;h*aQI}L(;ZAZ@)Ff3`IMb^tQmS&Q zq`B$mR>aSKa<(sBZq|%C#q!|@0Gib^Tx~`bE z6w__Rw7Hnti)m3(VMRewennnVZiT%^YU(zI6>XI~(!B2Uc;x4)dH*)4c9Su<=#@vM zHn?n;&fk=lRfOvEyN4tc^+aPTz7m!08)KUvv73(|%6-IDN_K3r?SN`i#@3oc_Y;6HXs< z`iRqq-Ek8@dpUi;X%DAAb9$fCdz}8n>5rV=<@64xw>iDV=}k^=aC*Ia@M)meIK9g0 z6;3a6+RbSfr|ABk8^sA)1#ao z;k1=gPj_rC=wVJbhXr&~GQ!s%vCYdNjqbQ7l=Io-hNdQR7Ix|Y*5oUZ1yn$uOB zuH>|e)9*N4!Kt%5W-{n!htoVx=W%M_RL`l7Q!OVqCl{w0PSu>8 zoT@m@<#aBmb2!c6bT+5ioGLjvx(8K&&f+wSQw67)oXRy|# zPG@kM!l{_kWKKn#3ON;U%IB2FDVLL-Qx2!oIZfg;k<)3MPUV!%X+n2&H7JYIcutv| z#&OE%jxGVEb4ufdshm0RQj^$+Qj!XoN;WWBCg1wVRaT>{K1gAt!!#PNGteWgwAY3t#IFr*j$Z@Z1l$PR09+4T z2V4tW16&QP2Cf3GG`)nuk^YLDJuHq~b+Ion59`V!08-CjD|r(5YG6|>KFbWwU`rTWtW zUpV`LkD6b;Y84rpH`O<^+|ezsUq3Xja;mR`X}ew3ex?abDs2vXSW8q#)GJXRL}@xA z>3TEm@o(E?9-CSOwhuGM4=ZX3qfds>t3ua>ZVZ(-hCUf8JsJ9P=$=s79(sCSk*&?! z-qvojwY9ZbDygll-P_jYCARZ49ykOX1pW$q2Yd@00KNgf2EGFJ z1N(q4fiHm1fzNOPP2HpbR1l|B% z2VMhS1zrJO26h9xfR}(5ffs=1f#-l{foFiHft|n(;3;4`unl+;cmjAFcno+Hcm&uA z^Z*Y7TY!gv&A=w08|VUjz((K!;C|pf;9lS!;BMe9;7;HVz#YH_;C5g=unxEtxCOWw zSPQHHZUSxuZUC+at^=+Gt^uwFRs&Z7R|2bmPT+E2C9nd(_ri2Ja4CTAgy~XX32-rR z5pZEHyDtV70Skcz0KV&_UZ4$V1)6~-pb_u@^MLb!2B03O18M;`-~wuZYQPCp0ds+K zfpdU4z}Y}0-~i47W&stzOrQ)X1xkP!z?r~w;0$02Pz2-wxquzW0VV;b0olL=APX1| zWCG)W3?Lmy15$w$Aep#+o1OcwN|nF01OMBkzPH+Kv4jx4N6zYR;LWUni}F{g#>ktKKF4BmU9aZ+k!KouiBK z>$GqCqvU?P8G-1&%ym&_-N$OU9Exrj881>lRx zGI9yIjI2by4JEvG30aEL736Yq1^FFWMXn@Qp=252xdN6~1WGOkU!~fzK9-{w)}pgt zo2$vSn30>v8nTw$OjaX*4N9&f*Q4~tzVchht*Yhm<+uDz``gGm%+Zk$VXGE{y)3KtvuujSo@b zwhyV+clWE?__Nv`@(6j9JVqYJPU<0Bq1auNJwbK`@;tYl>_ExWo#NUQ` zj=V@-C9jd!)%?5UL-G|lO!kmZF%R!!PCg-jA)k>i$v%{SO}-%q$RW(pQOwm*>|8ca zNtmg@*u#mKt?YlzPX3Ri8-89SRnwQGACC3v``%*M|JTZsB#g@c4OZVAa%FE7ii35j zI+OOW_G@C&W|APSMcVB@L|@l^OpLlHO|<_IIqUEL^|!wFRV~)hE$j`2w-fvQTL*5T zsFjJH#FI3##<$$&*h;K)O?OgQ2>YjXv)K@*e*(v$L?S8s^(0C&>}HJ=a+BFSYVevk z-Hmc?n1b)PH;>ZXK=Vn?u|FT%jobc+?3Dd9<^8>H?KK~GCOkW3@6&JWO-r@dqs<{V zdC)d)@ET8?th>>J_M=z_cB5yMooa6IpqJPjTikBj?i|~08~U-Ord87Lgm4v6p^{Z^ z9hs4yIWAeUS~D}!PmyT)plDV;ELqANce z8}z|Cohc$bBse!H$V4eIv9SeQ$xxk%$)3PcMtB6xo`m7W?o7{S!;Cct1X~qx+F$N3 z_GcYt@Uh8y*(#5eWou+cWK33Mh9>jDIMbxx+~bk{`uHoiSD7B?BQgXAw(N{*j#2R z4Jwa6s4XEm`*U(~u&}eK`K`Tg?A)8pJdcExIrgxJ+iBwudSHq53sbD!Yv zC?-53+&WUHW2-bPYEVY{_^j{|Bdo)8jYUfzYEC^TY2z)rDfiaQy5y{}-SevRt}V6I zXI#0C?(KN?^2xy=^v=VJpE$3!_>!K*TQ<%2($}J*d)Ul9kC`*by(E&%QpX@gJc{ow zB8h=^+0DktC@I`vAd#iPN8r^A0{3$eLUjah>^1WdgavH0!ACr*RV-ZRX3YDHnDOJ` zzTP?>&W1|*aO?Byw3QDyZn<6Olgmr2lc&y1^~w9T&Z~Oi*-IA;&YCsp*kb0{xrE4v z;hBZZXP#}2io!|T1GhqBV}s&ihQvm_PIZGyjXl(4HwK0HbtQ4HFc+R-v9CU5MYKD+0&KF{jHc?$ILx zjweJXWMU37@%kJ-b(U*VYR#zBrlidBX=jY3wqs|${kFVsgDpQfHpp;qSmdCz>En-m zw`!I2W@jMI@;;1aFsWdp*%V@tVlkL4gX1Lq;L_k88oVVW!mlr(EXaHSY?Wl2pMmEO zF`E!(WDoL$=p+0d)sV$mb(e>kd3%Z!6P+yo6z8$CUEawtr%jv?r4;36neCpr(0I*h8N95>xPx9#fd{kOw`*sv~2Fd;b|*INu0V9Tr>qSseD4Q=jr|aucXb`^D>Dcb{3(>5VbwX z8tLKgNJ-N}M{Ei$Bjs!kchi);>XEfcvWK#o%^tOi&05@UHXOWtBH?xaqeoh6S$oQP zmQ-tWv^CW-Bz1Un^zc;e@*{^ex+CXD4^Oj*Dofh%Xf|VaNSX2ucoZJkCBz&q2%?Hxc-(V{ar_%AQ z?7__QxDy((QRovRt&!T1iHTWSIYUm2(aPHkdOwZ1YEbXq9GcO)tB_h^uNq7(MZGV{ z#S0$ky_e2mTJ|rclH4P$u!kFh?8cDdpdhVLr_*RD)%Hls`RSXH!cG%g2HrC% zst(UEv*R|LA~5E_xhn;!-Gq1yCv*|pua6A=y0MN-(6ggRmgVNQjNX?sn!dY!>;)HS zqOZ9|y192co!fgmW}Jp#1P+WKf-F`2H-#92gH7y=r9nM3`5{xd91%f!Xo@|YQj=^j zgz0p_rtt9KU~1ydhT&lUHcB#(X zbsecjTeP98Jw|(<9bP4IF9i+&`qMd!&$W0bhFht*S(CHq^rlwrVj4QsHYB~SV6oQH zm@{F1`^bz_LM9dWvj2W?4Zo=77p{x{4_$kI`I3?cu3vp!AGki_m%Q-baBcs0T$H7BOxNrVXi5xP@z)3?qSsCgXqiN6 z^8;Gb&qhmvwX*_R*RNbpCiZCqiPpRs&_-g`iszb$P1_pK!6aIHYe0w4>Dumq4khDt ze)d-#HO4?Zx#scYn#YrC9#5_{{cLzVxz_b7=kes4$CGOwPp)}9x#scYn#YrC9?yG- zLefbpNh9N-rjrIzLt05QX#;AB7iIZSt)xZ0J5kaA)kKm}lS@3f6qGlRI#Q3iHZ|u4 zyTS8ucfm4WZNYl$?m9VY>HzM!E zlNwYHl1Ul34pttu=b!g;YG8EzeVe6v-WC|8f~X{uY}}`zpBgeswHb|am+ISOwZCT7 zS1+y>^qGQdfqEswqZ6N*9Pl&+dXXB{IyC|<=wT5`YfxURKAT1J_+CYME$pk+sL$)` zuTqVNqDFBMxLutC9z`B)(OL=2L5n(4<{^6qn<)joF6tNgLQ;Wo=LXug_N!@8$K+CD z;#KF9#cF~2tQy=;{@jMZ+&96-gSM`~Q@yZI)ZdFj2=2z3t`u@kG^i%IHwWqqiJ>XS6uj$(}Cm%h3`p^A3 ztzQh-NcdR1>hr{IWc~B8x=^w}9eK05pH4nf9%<)sQFW^`+Z?#_QE}a_uDy1(CG&Q^ zIvRIhKg`mj?!mt`NB^pw@zbtIQN3ZifbDJ8(y6YTh4(1wscGYt=?yil&27!KUM0V| zwWYb$>1}9kN>*|`9;LORuHM_Gw7T2et@GWk2fzZTjwdw zwTidi-LI#**5>vWR$SBE*y3zzaJMCwx$D|J&elGgQx(zMEO%=g9;u8^&K^Ily=IiN zZM5QYE0bHBo4xg3Z_BAEDGL@XNNyA@lhJ94cTr1oU8}RDeo;z|x3;;-+ZHfoy4G3U z+AxpVRW`RPjm||%dz%|B!fjTqU<57h*2V^}*X>fO7pcw{R!qx9ZL6ATX>E43*LanN zCS^f=Lrs0Zw%~@Q8c(|mO})*EtD&vMg9kX9Txit*gBn=5o4m=2A|Bq{?BXpZaPN)+cpIHFq^M)v+1ywl>tTnd$de~wE<6cH7{uL zG&}o06iz$2tL|0><3*5}X!o|XdlC2f4K;3NQ}6b)oG_w)WHQvLNO8Gqo$VfPva_va zq4=ysdJmD+r0*{w_e&HOLAWA`-tQ-2YKi)v7BUQd5FEt~hu)dGZ0H7^am;1Kmwg{X z_0sxfL$5{ART8CXrc|R&YYUa7I4wza>Vj-Ks-erWB&u0oo?4nZwqMDR4X1PrArsYW z23|+}Ky$0x@N{;iCiLq}6J?%UI3g)>$B+XJ?pHz$af{c!oUw2HvRGH@GR=l?o18o^OkDi*8|jbpLDaPC8Wl)dO0%or}~)1WoaW*6M6QO zru>E$w(bhb3zfq1Gf&M+%gahC8lP2^R8*Lso|c$uLqm;b(^izfePtuNY+9})qhrMrst0{Z`-g}>YPIXFBNkfq&N#%4Exg>S& z_j5`T=_0ozO>Qwwr3nd*OL7^?Et4d#Dp|4E=h_cA=i@sv)4YQpWn>y@_FCi z`+Gm1_jNw&yPo~5wV$>2TF-jEYn^>QdF)y@&@3+5r!<}OqUWzUoBww5e;a->sADj2@@kV3t%hy?pcaD~`u3^}cV`V)wY}`B*%1 z#BA2_N%v|U(q{GhebLTsck_nZc!p(SVW09cBYzRJ1?lTtC(X{EF5B>8_GO!y&f9lC ze|;v$Az>?{*i|rpL@0~Y&$Y6|Gevwo*Xq*VE`%Nh&X~w zUZzj-1WiN;jgI&#B1FxZMQMj?0_nHIyQYzvMKp=vl@3saCbF{1iAnAGF2J54gQzgmzC%1iLd*fK-s46}Srep~Z_dn3d8 z9d>J88G1TXHZ^m_!7V}aW5&4-F7LnUmG17Pj`jM(*3NW)pQwMk$P({pH9WbzZ^+F- zA8N9d#q00c@6R%5c*eVCa!Pl@s1V@-b*1zWr@?fh?eX-rPA*aA&x8F6o9?O3N8|?X zCnn6#2#%;PltwL!SEpvzyEn#OEqUQLp(jIsRoSR0^R2(C%|n+uzN$CLvixI|_5o73 zp**;@UFf*ayp?ruUZ|)HYG+!+|j-yrm)ecdqoAW3d0{n((m$0xAbI4LD|Qe>WoOM@}M33&Pk7I*hS{ zO=h~F=w@tODCOjcE}c2l<$o`H{P@``8~b^! z&&vqpSl_|sJXw0GX_Z#bclu;T%c}ALSDdX@{Pe1IhM8Ua(uS4hjSbS>S!V~SF0c9M z^P5h^)I$~dUgWM1`yw{X_}%6%SCyY!@z{pzFeEow=Bw1JW7xILPD|rNu6R9(^V7$; zs?A594BQ;|{)W+O?GviG%A-DMJNcN97v5{g(D416>u$0Vgu6Z@W%uG4>m=?>dhWZp z1K(;c*Zu)B;BcRxxIc*dOR2qj=b`?K$2%@6-}b;``G%}u;+T1l!|m_372ww_Me5HV znCH(~=ynF0127`loSxLeGs5xDB$LB{tXt&y)4)P;gn3Vr`pfmn90Qh)hNyev=x^C` z5;Ew>l#s?rSQBropPM+%({IS;$AgX9Z6C5#n1g z^>5WOa+uDM(;-p7;cAFIPO2b{Lb3+Oz z5?7=aM)EE9Vm)go{&L8FtJy9UqhaT^=YNLuxr9Z8m!;aomm^E_y4EqZT{l<{M zaV8n7EymSq^>zAC?kHU7IA&P1Zdqg0n9o13jUJq12d{1Uy|B+S>FVzZA z?{vMER;r>0HghijvUqynWV{dCSpUWyPlo#+u^-;>;fLgMh2N8(qBch4jucnE zU2-PJG&W@Di(T7>y08}Y4Y@R?|J(tIExNzj{qoy{Bac5ceRu59p8YYdh0+W22N_x~ z)E%u%ogcuRU|f9UNWNr7*-o#|@k<8A@9515f8u30wQt!D%YhXWo(_BZODn(JuCm%G zUTkg4w+aeq^n0=I{+6FEkD7NXVMq+CSM$PwXSOAt9il#dc=ni8Sqp=YMrIl9JCnm} zF`V})#VMko-gn#4hVha%CjT-18+ZB}&+cBM z3!D)59|_4e9BX=E4(MJEr$W`N8Ijb92!b+oe0A*H*gCm80Zq8LX*RZ9mz;#<|0g!K zn(BiV%6xBK-B3zF>jml%7zi%n`D-OF**i&f`@_T%^Rr`$8#@!~+Z zjmxd?Y<-uCB`L2;UKZR`_FjGbh<4#>x$eF1+h*%z-km{~zI;%;z|_L?^kTa$e)pQ+ zINTfS^&wyYCl5DH$xl3{$gG_^z2luJdktrboWr_aA$ibXV;PT zE+fL-?1P;{rrW!Px`(=qaCHoI332N3V;l z2@dwv>PDQfvQ{r_g899`yi|`Ltd9NUS#>Hrck)En+pSMVmH2Eiko<8pt?tS~MS@Pt zkqwg8H->em_(P0y=Gp%~XL;qt>?Q3t2lwM%UP!ET%9E=GmeRyw2i=!6V# z)~)(fvFVDe=#KNrw0EhQ`VYf5?cef&+{jySt7x~=bMbA5d~B?SS-i>RcyQj4gJzy#^_OzkV~DMb51X^HrDL*&_HpZXef8VC z=XjAznu>37l?>s&WXC-wAKE4vZ@9Gk#*<->2LG7fQy@8+Gf}_!?UjVO+6?kSk6Ode zW$U)~o=mpy;_vwIQsjE+RzlsJwVpkev$0EY%TDlf<~IDAQ#QgoER)mk6bB2OktH9d zopsN>wTJ3%VzhkI^SjTC7A@EsX^02LXMAsD@w?#8tQk&Uhb-&FFckKKVnkD%h| z4-00x?0PUi``WC6;OstkjTtwaBn zP1+Y$aD<}-(17bm`XJ$kE*&}4<$s;E5{bX(1A^iM0{8&d+<}&l(FSG?2dg7<5)C9S zMX!$g58Ub1&vwq!0C`R!%e^ltDEj`qUjKU|=3F$a)@^Y=w}q8oHu4s3?C5oKRgWcM zE8;dzvx+#hL$Lkn%&9jXY*Q7m?am*%{qwNASe-+^r%$>(tuOOw_`)YnN^8U64F|O4 zza8;DF{QfHfwADg%-2`uz8)2rWzxzms&Nm?jSO>Lye~7fr~OUO4euY_*XnUAaLHc5 z(1sqTGmRFV-Z_H~9$}0*TySW;d)$qm;_co@{eB$iI{5s^$guBKMcaEG z{AN(nmDVCg@|t%+EfvZ$sT)@mmkf*<9As*9ynKj_`{0a`Lbu(qj|HSut9!$4(x8YtW;pDLob3MMvIa4~uh&#GpS)dLKE} zuhLi4Ffyy>(?Nn_(?aiMK6Pi$N5|GgH&{J5!`*V>#W}Nye=J|qBoT1353GIAGKnr&pWFZ;V0@%1Soi-QV=C;dKgd*GQxHa0Jx&pmIm&TgIO z2-(?(KX|Vy)e&E)+&f`-%%*pd?-yJ6+Zj!nwrT5FnRC*e{FQy~?~t~p=NEG`BQkE( zRIN|Y(P4_f8I37~0?`bJfjDajNi3;wO|fMDIis)fbc#Y90I=O8C1M0QiV zk)4tw=SaJeo!7s1^!W$&V>k&rsm@!F37a_y8#xI-b`6pP1Gb5rah=T(xQX*Wk zA7?sj%CsG4JI7xTIzVSuWI#rG-SXCwb+5U@?zekXoN|2;IXLg}p&`#J?!DGOw8ds~ zhD3ji?knw8RRhjDnbx*ku)j8W$5FvZ_C@x&iwE-_7u^1%_ezD2zq`}?A$`9;+&bj_ zeY;T>voZ=Mt(qA*FZ)Eyc~2&5A8uExK>;0Wi_QJ=Ww$pc? zpP~#&K5riCwmJF1o!9SQnPhDp^6={3%@xg4LdQRvthIf`Sk@xeb=Khp1B_1x2On#> zd$AvR`oZ{L^=+E(Paod2`E6EUdNo#+#XU9Y_2#`gBEG?v_yHBz&zQK|H)4(I}<wxzcsi=X5Xa%^CFNM$!>N+XA;`{pYQo>N&C%_VB&N zlt(qcn#&36c8i{)A4i>O8*lTcKYLLB{fp5o>v*fbT-oX`Z?V4OW-<}SlCV&zPwTyr zWwNkxeo^?EY%^=NQ@}up_EXlorS~uVVQjBeeYeT+#&U5pqtP!er;ng#gn^4!$)MdI za*S-B53+rwELiBQQ$Bbod(OqB4bMteE)#v04eWVo;%{O4xpQyX^%;>DP9!cpZ&WVR zTVAqk)NXHw6a131^WnJm)TFv@=q;Tg;k}Ptud(*ru_fI)F+p$n$w^n|&t4ZB^gcmP zo*vE28Z~Q^yTPZ`l{vpvU8}fQvfXA^Xlm`dbD0TS&#;0w7GO6TcR0qGHuzO!KVn_V zde)15B{%Ko)HhE0`NsO3Mgt4h(ECpUS@P=W$e_xlquv-U)@evw>p$ws)W=zg zgbi#@tiH?<%Q2DY4XR$qHP`;vzMTE@7jNBsatg<^yG!bRd4Y&S<6R|8$DXijbmPFO z(8}lJUwb5HN%p?EB+Dcy`G@@;WBvZT5L0S4u;@FP5ag9I`;mX&r9!K3$O9I{fKz^wfae*FO6xe9f8P)^IO~R~^6qZEw5VKdiAixt;mx;iWmJ z`fT4j^6U@%Dc6!`9eAm{)Z%3Iko1Ykj~kUxJ~ZUVb&K|ydp{35HagU1>ng#g!icLA zudW!QHDKw@h?da3+!apE0@=_jD>i=qy>dp?n`=5I&G}DVJt{(L_{P7obTn^y^&ktt zLofmx0n%s;$H(AfFa|yr9}Dt0d>qK0xF^Wt@$n#g;a(t5z`a5C!F@pH;#`n<}WN6yyuU?;uw*PGUHti186)FxnVxAh$ExLH@+}1ab$-#u%h7sf*#H9_bJA zBrWK_)_5)Gzm}KQ2arE%!M$2-T5zvcyB6H5^+^lv)#}jd0Qs}lXAIZIwBh)qjcdau zPn*!T26+gZz;HH$t&1_(dTcw8?b#k6k7mQOv-xZh$YR|H7*|p=x&-DA{THB~UjY1- z_!aOHaS8AWaRu-yaTV}4;y1wS#C5=%#7)3k#4YrUMS#VOV!+dk(|{$662NnebAY7` z7!$@t#znv~Mj7B`#$~_?Mg?F!qaN@H;|X9RqYCanR7l0yN9k;4G(NqfNI(dsBLGK|BLO`~55O_x7{GDl zIKc7bc)$td1VA4WJdhNSFv6sW1WzL+B={I9BV~XJQURzURe*k^pJv|a8K>u(o@Hu& zVP>^SZ4!{B%>smffNX6xpq@57wYI)CJhirgHaxYqp|&BQv9>XwsWv>jwl#PN&L-Jl zfz4ur1-KO~u;Cx_lMxyZAvB)B##Z8|F-GW;=m^YsM)dSKm{UY>OeE$BCAdoIWkLO( z;`3)zUrQt5&{_093Vy#_6jn65@*QtKT_ zEUX8nPyH|m^FtVcFcDz}!aRhf2=C0DGj|Twg771SIEhdnp(#R3gm&onrS$*0O+tGt z=)Dfrlm9EU)R^c}WAMKq^}x(9E6fhYc{Jt?rw26_h=rkFZ7rkMJ<&+#eHYVtKR-G* ztEO}F%XB_$JDuC5)46>lox56LBthkQn%~!$qW|R@pAbmr`~r+1s9ylLw{89ZpZ^y> z;MctoPh*IoX>KMotW5DS6EQIhaj+I*Uv0#@I*4)Eh--Bb%jzLc?Ey^l0kc5dISsMq zT*Q|}i1~g&oL7U`?kS35z!$yMy=!4j=rIzpdl<&M52+4PBT6{01@gC=>vme^gVXXM z-17A?f)CM+-xrbvBx}0fzmoIq{!UNuoj`Wiq_L2^yXDk9LQSr@MtzZMY;@*+8e4&y z6oL}Yp9v`v)UYmh26Idy$UW3Q&`9*kcPsY&z9uQcWFyAsClK<(fH;3@$dBLoJTzG&_j zm_Rb?mRmxy`4hdvU)1|Lcle8PU@F0=uAC5f(tQ?iA&DU=A^Ag^s<{r+T+fCS)s+*1 zg>>Hq%QVji<043ewDR9fYrjexHEFA6Eah~fuDd5x)-8QyhY(~!z7J9^?Sp~>%{Xgv z)Zao#ry-SU?38u04f8FyteLmYHY$ksKK|Tk+;G4MZa6oZyOC}geBeNiwMgR-q6U^VDmnb>ku{~Y(e-IVLOF90--iSeQr3X&l=J+MooT#4UI=d{FOnC-i{-`v#v`QUWL_#f9Xum%Juic| z13hC3x+?|Q+=DRZ@8lfRRz7O$IAAgF9J(tV+KS6V|11=g&rch(el$(8@A?B7Y@+Eq^0_EA%82 z^cnnporq?W>Q64JUw|-|o?m_;$fq^446XA^QR!ub6}I zl!w4L81tKv=I^n!v*uUw$1W@uVBBy4OP~j6C@=-=FR3vT+=XyI!aR+Jst+qjP$Vcp zc=0Q_TtKazg@WoX&3!=~{VcpdKv+S77QtJ1mUbZl*g~ODTc{5*JR@(tz))yR-7D;i z`frP53xw8R$>l;jsx6^Yr$*>597~T6v{fYZhH*d$dZ9qdcM!@!_7eu8`XLBsB8)^B zL$!$Ha$zj89FO$L^w{zOg{i{zs7?mh*&*D6?B{4kyBbPitqG3{i&33(2+Lr^C=3@~ z69fn=q5d6Y^9xd=T`jB;*3&Xy3Hl~f`U>HDgdJ3`L?qQK5&Mf?1glSEA~NHz6tF~= zNMi#n7Kt2??1F5jAk8SG@kGd_XPjRs5{r~z!CwTcNfaiUt?_|sLAodk)m-?cx83A& z(K6JN#4gQ!(aO&LP)|XPzi2Jee8FA1(HxpAl*Jppk+mjanUIwKzaL4c!~w z`HYm$P*G7FLB&;ZG(uQ|;y7^%@U-!C@zQk zH&A_8U*h}XI&mY&z(?Y@;&!Tzgg~e*(U%xYUC1a87O(BYM zfD)lZE&(Qz1WH1vQIgE;Nt$h8l-#9kDsoD}i=-w}GfAq{ zQfebf<;74nr4CXT=_v7Asi&0N$r@5|7n9O_M>8GWF3nAJ>)ujj7Z-R-yZQq%#Uo8p z#3@qne~Ga)3OJ%kx{w|L=`voiG?Df_;D}=BN-FY8!JnlYrCX($(tXlgX#ve;pedAQ z3W}tsrKObLN-snG3VN4<{4)Yss6tGXBC4X0Ur3?!F2Y)>l}?13;OEk2X)BC;vGgOd zvmdLi5%so3 z7C<2_Lo0SNdN21D-4#yntdLBt8e>^Bvbh+^arC@UHhKE|LP3!%iCVKVYA@6z0~V8| z%eITtY4#C`Wx!&x{jxmSQCX3!M9KvX%~{YlS-A|@3iQ?7YT12R9XAe28xhA*c3MPH zvbUgVr`EfikZaT2C)bw)L&^KfE#%g6JIQgm6YcYzY^Y(Arfw0QLq$uuJCZ5BE|QNG zNafygAI5hp6fTNU3I`;6YOqMbrFcgnruR#tDv*_(aUSt3X>2cr}Dhd>Z6fY}IBP@lQ_Z6^%Ra7XdD4$o{MOcgQ zN!LD;r2tk{v?@L-F+q`%C02{XNT9vT6mEp<=*;{3_axvwBO4xsj#8itEQmagvMCqyhl`=)RUYQ1U z(kcH`Ztsp_wQ`qozcLR>k0Luq(Q1v5XHetTiO4?mjoRxfiJw55WtQ@taiS|eT9S>l zG^HnM=_Cy5*GkvQ$CVH2{Z>uy)g~)E#DVI#oiBJClGQSe9+Eurc?v=e)c|$HYc8Yh zYIXT)&29D--?Gl!RwRxpiyFjb%1PeV>`k@A}^8V08`bF!qUlmt%yNXnNGjA%M@sprKUaeHV?328{aC|{0%0xw%G0or< z3>^kikh)ZFY$MhIdf3dkDBZEot5HSb$}X@}ef?SbtxG$v)o9APZRj6aRmJQBi8M~{;m4r%1tD4<2%hCDSIFc>hZ zV8ZvyI%+pTb5fnpcTo&uhcK5poNJF?>=2vD{!!zU3W@ruy^^~qopx#OR zQFElo$&&XQM>I_kiKKJM*!MUOXx_Mnq8X`N_hm;6O<0BsoJsNj63x(GiT1?XSfu^1 zo2iy?=FD@L`v%~%Ead1uxLsj@0JI^3*`s~zta>@k56yBuR|11kg1MtZOgU85Fio~H zIi7&rDvRp-F1f%H_75VqG~AdqwhFe>KZw&-7VNu&apt3cn6hD~$v-gJva>__%r{3h`X2Kwb0J!2_N|rxtY!wuR8mrtPqPLJs|M~RgL6XuumXvJ z33vIZ1^9*WXO*Wk@W&EV3NeHl=dlB8j&MBTekaeKC#u$ApQuRtcWb#R1nRZ>oGbG@ zQKEi;)ix4tB;X7XfeH?S)Ugl1hdnRe>0el0wC#A(e=Uv0~AQ z!9q>eX^A-#^X4F>|8@=V6mIz!(_sJYxEDFi*2m*(aWXxf&Bpe!+p|H2rjEY?U{@nl zmrO2oNhiar@=Iq@QNv!5U_Z!4l|Q5$mB*N|^1F=^z%fS)6vJXb2`z?Ps9#92YOped zqga?cD>{1uvVoNsoEM}QtQWE|PQ_0pv6W|=&SK7NVp5B3OYntPlr@x*j4P=NFY*v1!qt`#eH9^=`B34i67WSONN8Kz zD!DsMU5- zGA7>ZCBx6ah%%=_jgmWp<(%x5EtWeE4xR<4jH zEvxfK2caG>w_J`K7fGhDCOxiX;z=CveuK|mwSWA)x&@9S{O<};EY!VY)6_pR~ zsay$uLmS5ea9<{tcN3CS-L`t*;lliww$fmaG@;HRW{AU*lr1@4h&cm!^CMMLCP1s> zBniV2)xji0C!E{+srz0xcW)7~XVnoR>bm$Ox*+-yA)58vyTe`pjxn1mc#iJ9Yv5L#fR-b~`rx6g;>uq=}}<=xcw+RfuZb?qkc+O;CLf(>^pF`X~< z`Kt>UqJwx#Y|afqsCBoG?Tha1-qpOoLEv+lv+vvc6F2R(FyX1NZYWSfXl}Im{#)v( z3n9P0*(uZc8Fw;pmo3fQWTagYJI`!kN7^HY4_CpqLLeN0N-FELWlo-?x@d`{f=9B# z)}cdnKaHCr?Rk>n)-4L$nftH%+X8jBnP)$R-i*zcMdl-tEm zyWVxNpUvF#U$T1Cy~R$+yp@93;bsz&63HurGMM@k7Mn39DF!4^el@}SqRH91_qB=|p zte^@Xtcx>5gGRq7&c%FM*S@d+xx7?&ZufM&yCNSEP1a4tL%Pos%OR9QsD@?_)EX%; zV@xAsLB)oa4Llf0IDt6VL_MhH--hic8F5Sq!z7SsNB+3wBjGM6bS#AP`-4?lI{5yP+1;;NtQ-RUt}N;Anqf^DooRI2q8B1b~1NDJjmJa+y}_CL$oVj z>V~w6-y+-~-M|Orb%9bKsTjtDM$$OGz6_!+dk*19avR_s=iEY31($Xi^XTwUa7$z4 zs&s9;Tu9e*(-gXTeu-(_ltK%YGxKzw6var#skE z&oFp)v>Oz(8wbzDv=TbaafYxe;wtkM0`H(w!dx;A$$3Kaf6mSub+9qNB(a4&jHZOa z&SK;oYO}`-8&$J`a~^qG@F8=Be6%%8#)Kvs-ur*~yvRZV)2YTw$q7*O)bh&ymx zk*|2?iy~&i*qkz)<{QmM3l5n8-)9H2%E%WWzZE-0KZT#A3g9V&wwgYk&9vKN4=Gue z98CJN5N2n3Nf~BW06JNxNW!C5XeR{!TzE)O_Up83^@IR1W3fWTh)0%8dPa=XA5WMC zy{2Tcm6^5+E%<~y4b_IwCJ}eGSm88$GZIczpCU6vI-P zp7YWi{+O^#u=$#S3cs$%Lqk>T(hly4!UsHjW;Seob$?*~c|Kvjex8wvuZ62c$SueP z*|;{Fy)Z4}LiCrBy;h|4aRmA=fb9^?ftqD48)qKwEZnKmo;WwgCdw=_whA>;W<=W( ztpO1uvgSN>HBCk&G=yciPk2opQz$P#xgH@x_*mFjBo#FJ(9%%zpsN@(HI9Eh_<8dm*4U8lFT1a~Wpo6WOt(&i#>BIS7Q8Tk& z)@W{Gx~9T40W5Je%oW9QNfehBzH||)i`)^crwLL z!sMLsxN+J7d#?EecL+=6uZ_#*@A1p#%X8kK!tRQ@z_DWj%%&2w37%hhT9P};o;VA} z;+*nkl7W&!ON8irlom`A28;CpU2l0InW7}=qLktEa6AspEAc-GW<#%_}YFQwt7l@-y>ha|U+Kaz=eb-J@MB(AGUoe7Ka)r_|K zvpLQ8cKrDNbtC>?&9|d?z;5DqjL>`cx6ng?O2uXT2fLO%mLlANZ25YgNksIuJRZ9n z(FjNFJg=UNBg&oLCay{th)2fU57%wW@z^cCo zw;a}TFu6BM_IrGcdeDx@wG;Binv9QgCmYKA`bcXD`4hBsR})aQ{jyX|JWee7 z>un3;6Iaohb>tj)5$4$nGBFootLlB(Px`m5WwF$k{Jy6eW!Q_|lB?x-WS)utcwLU_ zR{bM1z9H{i$XED0252BIWEFy@Au9J89*^>;%RG$_T!(rzIU4gGNj>*pqVo@c%f@i^ zd+q)6SEQHYe60dmmN;Dy$yw#Ck_u-*uJ^`;T*djmx|KOPZsC%d0gsE-5cpnZMawHg}y!V1vbdHM_XQ0 zLP)nq?<=x46;bW0x((6nmyJ)YXI3M%horR(_OXv{Ar91x%QT(QXT_@SIlr~qS4N6+ zZ|dKH4;>756yqAxcVyxQb>ANEGrq{YWCM@C-@c@?$#=LG*J3@EWjEz9w12^6XE~aj z)38f!VycDzgw7j8&Zk^tEVimpTB#VvHB~uTQ22?JHFqhe<16#AeylA|lhMI%TWe(* z-#Ru-94(a6nLVhnf3kc%jGNL)=I(pJIO{5QVPI=_5kT84@FaY}Sd3J*s=T36lbj^9*hQt4|=~lPQ(%b(?|!%nheb==@!T38WW_p3HAG z?Umy2-WS$hAY$PW#Z|6OGKnvGK^OVsYriD!l*LI0bWU=@GDs2ac{Oiq3m+OuhR6}# z4-DpYEN}|v9qq5$KCM&$25VQ)YQHRw1nHz-$acrR5XhUtJbF?WS{~H?G6*rmx4_-& z3u@t?hX)$A=coNKa6>Ros9w-o zkNQ-)zTe`?f8fTg`bhEF+=1`I?RN>^C<@9Hx33n)L#4cW;^L}vX?bxGB_kELi)ga3 z1b9}FadY8+fcyp<_vibmSkIT2Z1YQ92vlD_vc4dQV%b{Nfo zVUDbb?|;njBU(T$K=sdk;T#;GPKBNh$sp*-ne48t5R+};t;H<(RV#4!!o2s!_s_6s zn!o&sBJP3Bs%X8=*A6>9+O*dP`IuO8f|FhzTX=S@-@kvS-VDHtnH><9>;ZD~T2tvx z<01*lB%a+aFSK_m$Yv9p%|8Y>+SbP2X!DMIRX1$Nq!aNI_*T{ z=55#x!y>=={Vie7+c%iMJs(|J;|x&w2wP5moZsb?FNGR=YG+|n>><;8AbS?pt0nX^ zom1C+2GbO{?!l*ywPH>d^XVZi1tDSl<b=c-+3Q5=AqGMQLpa!3ClGW|rU5&dlR%dQRDZW$2Uz zA^i=)19~qy;Ca1?1^n>G@w^~i*&*xXK!p&^>%k0cl3o$xp7o9dIB_{w(>W-8#|-)} z`s;6e349mI>>T^Ww9cWk8ZceQMRQO-HH~~v&R#77b44lk{#v+t1M=grm@ImqeRo{P z6#vq_nG!*A-%tNs?Xk+g{yBPTbhXjQZctHZge|l0E{|4jVo+{U4u$rakg51lmNmf6 zD2>I3Xa4)Ja(_UAjC;>?_S1H<1SY6Ca;^GKEqSnfv82=@qj zzz{EWya)btO5e(#brxw<<7_ClK5X4@H`dAK2sc4kzf)tOoej;O>Q=NLZ|?ddw~ISb z5WE-4I(9=qsteZ!6KDw5?$!Q^sL;8%O8Njq*Ns~AZ$-Q;r7v{xGL zu=z6oRS&wej(#u}*}4RHk!vvFFWrEH=V7)~?x7#ei|FJ&wE-FI{6z1MWz(7P%#zWy zVmxRS(}i()p1(}{VhR80$K^bG?BI2Pb++>Vj)=~m(uC;-Upn_Do`uY-D42^*U{T2y z%YrNxh#$b6B~Zj#THUFoc!g3>KgIbLypQBY6dI`~3cRwH`VDb7XbmIK0%cJ$;`g$B z^d$Ouy4rC3@P?I#VVpJ*HqE>5+ON5-IyBZ-@)3i)Ce_P3AG@xzy{*NuMWRrQ59G2s zasFi{cIgdk(=@8z10@>h)Vlatqxu9IVsetK9G+?V^nf8i6c+K4Wg z{(Ml|0e3pseCEAAHV*{6doq@JLlfv6rF&3~8kL#fWe8RCQ8J&;c#TR=$LCYYy4`%og`*vhy*jw8!_{;_YP zf#3+zBh5)d>q180K>p~+8h>Btk&x)=jxqR3R_o3BRs(oGLO!y~`QfQ>VRt0gqZ>#C z2Epp$q17>=-ZwI9#|PalP50v&lGab?`AJB|c#Tq?ue>RfPDR z;0+#_PT1PbNYqcnJa#9%;AY6YDUsXL`wLy+q0zs>`k=6w^NV%{JG@{%qeHXP2rA1xpSM%rRnqx-3q zKsanCLi3djVRvZHzG3Q3WrKG5y%F8Oo8Rk>2;~lWhJKaI+b|7imi9C{-s#Z~c6cND zxFgWruwAvlHs0;>dz#^!#oPP<=qI*{V+Amh+krU*9|?iIPV|F}%o$U9|9flGTk18F zU${aq(wQ6JKzKX_rwfMd2LousyczG)@9$Kq*KOE}fHEKbq@(NnI^7lmZwIXa>C&~; zW^LTwjL`6cKC@gvGOdH`$q>M)tnWuYm?`Q%{^1F0;|`4ksFSfpo{F4Cu!VksZiNE3 z1oZ?t1)mDeRq|xMun4+As*N}Tlfp;Pi(YL2=cR?_CXTX|3QrXcfbzjQi!YJ{4TL=E zx-HX7`8)S}vd(-)gt30e9K{R83qi^^oWJ%#Iig;61UkpEP41z&p(90sf=BeeIhM6G z`%Es)uW?+fS}U43rbqR4BQO8c!!G!+z=-o2-Yf2)(JkvJ&h2xkzZk4+wixSc zXA2lxEgLTj) z=YoD}nkI~Pbd2>GUyd}~G>76YA~U1s;q4b4vU}{{{MGw`+RqjQ}3yoWb zI^=fJ3Ec(FL*|Fji{8eq)Fu8Kfup-gG;bQD9~`?a$ZYfB)8{a&Nw}(fmueDdF)^L| zYu#uS)1_uX27a}Bd&*o5@t>-aEwu~!ywCJCkBenThhFU8p=7%n(+AWvKNB(#&^_@=8q8kt;Xs|pBLji6WD)=+6jH=eq@pei~ws0 z%afe1kthbess{Z#kp;I6xzl^K8DSm%{`li!c7%FV1etgQ>eo!swB-B;XIv$S00B%u z+#1ph&Aoeptx)9_Y|lt#RpIn4_$=D|^^S+U;=fmEns&y%I040f(R`u542>-CK|0I` z-?~}bP;~wj^lQRBMZUKU>mUA-5maqVh$tFK9Qa1z{To~#vopvSs(lF7yoYf4#yjt~ zr?an>U^$qF#=-u>{W*Ba(dyJ$u+DQ#x1=&YOC_=u0G z_$73d=1{f2A&4*7>*BHthT>j6?8VsWj(u?rD(aSre=Kq$-!Zrs^*8c5rumPhNZ0vH_~92a&fc#3KYWmeSXO$M zZ=2xoLAnFq;7R_vJqPyFXR@dt+Yx>D*<`TF@GXm1W0{ALeOU-GZ0Qqw0);mq2!|`mLu&QeU2bM9HGEO-otat=UrK z!6s3BX4K78w!c=UBE8sA{Bs_^;aF~3_qWdn2y?R51 zJUW4tbQT*sHdb75LH94ZW{mAHqRDR@1_yM{sEIW_0k<@D;w&&Z=r*Vm5Hg_spi={~ zj;RH91$hNu+kKLgTU{s8Z?g}vFSie7gy>|yGLK*1%n9BO)(-BehKL`K+6w<0`!}%{ zh!>0&bQ~g)^Am>-IC{`TznYP(k*|@}*_~~~#cV&R5p3gH;{sRwynk?dzexXO@K8`m z&|!pXF%_`Bp(WTU@J^2nnu$oP%punRQV&&6D80FaACw!s71Y|F-!DMsh{LHB2=f8) z5AsfSUm1{y$J3-*KL)i5O$nY0&K6V$8(Q$(kJ8B2NY{w59yx!n2CUa877Sb>SQSRB zAiJNZ|FvJD-@G3)%y`abUuPd`-(p{5-(xV!650vdiOEUm4&)Byz`KCgC~l!9IKYUM zIh=Ve7j7ED0@O8##OP~$Ar#;PA3V|z+(&H$<2E`vyGY)p-A6Iva6)k6cWT;iXM7DD znd46%jK6-8kZu0^&QJ7bdTC3}TN93%E8N^OxT)yy@UrrBKS zWQtUqbZXlyfJ=VTbBYT0+gsWcAuw2)>$i2u1m#Y+3#~TQ3uIa98l|KBZo1$6jHBJAHUEab zNPK)=`Jo)|M5(L159v2Xn?eoz)_B++ms(kJiaIu$y~CHwMcuxpecV@5!h@d7cz!qHUqAuSx2LsuusaBrT!x z?Mb)juw9=F7T=FvVdZUpj5Z(h$rLVew~o+IEXhv<(PSIT6qw4qSq0k4tYvGn zZ$_QjS*B#T(0uDDRQbuug#PWotl;dkoFS7QluBVilkk`da2=rukXF~b{LM)p>~?t{ z`9WecMtV)T;Fottn=8Wc8k0HKOM^ifV~s%>eU5Qcq|P1c^3PN02BT)!hDF^DjY(zJ z1CDJt*Cg_{yx5M3UvQ3zf7g{=%e4m*kjYR$>?9Lu_B|}Q?{GpY!kUj@@Z4CNpo0tT6bz5@&x?UT7J&H<<~m) zSoKijYxT7UFrn9F)@GWCU1DBhW}Efc^w8pKH)uCZm|eJ(0vuUf8JHf29`JayPPd}clwHibW>Kc&mH_O|tASYO3zl{aIX;5H`cj+sZ43wb9z z+cZosx3wL2bQaW8>0Ur}2HTb4Uqm#uIuz%Qb9QDsP&T{eD5ToEN6aQ|@Tyu)dUU2c zP;6atcE(>7HM_S>FI>C(ZYZCg5o}1SCUtzUk+4>Z@sbk~E}-Hrdh;qqxcAOxqV8yW zW}Kdb>Hm0i`^lJg`lyIL<+r2mnT~yCqEF9viKb7=v|QGyhIPiOPsy~{i`i=DB6+YlggK`EC|XXgM}f;L%OU zBzx`SAporG*YEIS>MFuR%)4r5+=1M8BmR1{seNbKfpY4a(*xf-d++dNT=0Xot9zUH zG%?lx^Zv_gBayzB_?gf-hRPgLaT1H4#@0*axj(fDyCBW~uGDTQ)rEkc=KVmLho~S8 zeP6nRh@WOHlIj@ht~4)7zk$S~hullW0YY(d;;wWXryxxg=mN`6!xc_-YyngV3et4@ zQ(b8JX(|R%U3mFvp!cPDzzfpmf~k(>yi^2%tLP4PU?f33jIahC7zLPH?16L#FxxI*Ql3C>P{m0K;7aHPX|lli zXkIFez=*(Z2lg)S4KT6eyHY<~ewr6x)>HQm-iEHnlYP4wg9%CpW$=iN;`4Ur9T7t!|0qR&F!>mnsQV!GJ2oqRJkKj4$xy7*ItN2da41ihDnRzO*v3Roksn zzW<=e81D5WTPkjSfa=jMN|TjSF`Mfh`Bu zfnC*}-KBY99Ht~$89-JN4;aOX-!@@qU9~dHTNt+okR8SY9JuhZjo4Wytd5XMk|zLU zo_O(cEO;HkqkB7CXm}l%c-jBhI9O+_ju=al&w<1E@zczB+1BhX4ZqU00c`jAKN3bQy=>W2tI6#0Yd(-Cnq$+^THqN4e zGsk<>>O{LFyA?oo2^_R#Z`!pw@-9h61dt^PlO>9fDGsU>yl3NWSg}9wl_bjp$e8fr z?Ktr^OxPb%7RILlWDPj+b{u#c7VHl>3*(mnvOs3M4F`4=TlRhY=&ED2DWObql zVB?Fo*aa>)W_1E-b@VbnUNb+AZ%6ZC_;=fpU)z=mZ=&Ythj$$TW6iM?k4!@P;Sq#) zWFCFRr1gUXkJyby=E%yQzFnwsP`4~YhK3VMK$8r$cWP;m*e<;!Ok%ghE~(#xhIdWl z@%4iTDizhF^dQr1nyPq6x2ysc?CqqdL@}-Aak23>zOy6D$6m!<&DIJ8hqAniXXV6& z0*8dW`qICN=uQoN*`00!^(FPt4nAzEURb_NNl}h_DDWq%zY}QL<>Im#B$BELO2}1` z&d1Kh5nDH6Ha~r|7*zT+Vpqs)qIIe>KE=9XuFCp#>;z00er4!P)k@YKubOJaKmBeU z^cmE#7O<{s3NWo}D`;BPQq-N(n$xkZJFRtJb?oVTW_()p&h#dJB67cKZ6P#0Zqu#vi zZ`1ZxmQ!-0>AKROFy$*eqxyJ{5KaMq21s@|a|`KmI$ooVLhwW_e$)6C2F1ZVE!&0rX@@M99t%fru%U(-L$^q!Xa(eK{Vb< z(oS^5+348fh!%f!@GO-a!l_df&;CyGZtR@!Y3oaKp0BH8Q|};4(>X`FRdj8P@hHD# zFPBY!F(j<)ol;SWB{U-|sI9QjS(U7qrEL3m!S-~Why6nK3hu%*@ItvsAZKjdg* z4BgIYk`}&<8(RF__8%`nV|lPuv!R@i#`3qua;U~~z)9y0!24^@MB*i24t-X%>myL> zUEGpIM2EjmevL#e+HZPXTkT9kEpG>0D~&z4!2Do3T9 za!W}SNxpGA?r35#md%!5muVxjW7tA(OD3$0PgqPoRo;}2Fe57KV5Og{$ibcm;ZvIa zxt=U2B%jsv`xfJ9V%NJCy7SrOK_EVkBE zuv_4QtY729`11FdD^P$-l|Z_1%*)~aqwjf^fwP)4?r@KbZSUsXnyyD_i=oHvwtdHd z`Uh5v1Po^>iMm>`BRMIuNeLuE?>{YeJhI_CrJp$%wF@B8l$7HAX|iRIuy}t;645F% zvE!9P{uMeRx7jY>6r3OBIdE0%uswbU_GPnK&aUln+caDa1*kEDC55s^e3J0QeTMl( zeM&yCJWE1+{>n>J4cV2WJZ{MwcyVQ=I4d*Lu{Z)t$Vn$2ABHmtU&kK9I=p%gYY@5Q z%)ck6{-$IPgdeF^&3n!Ri|-4uWn99fEvj*gm-!v(TNyWoN&+i5IaW!~=`V`umzSto zHmjgP174l3DxEJ;dB34nsezVK#49Zwb5W-0Qzi0NR*2`{hHjH0f?H})cpav5B%vqK z_J%;UOxb0IUFIixvFEY?wLNZ?EC|mSqC$)7B z9<%4cUG(dhWb!Ax<}F!&7C4a;{8Z0=_2jFr50r_v3uP~E7JJ7rR9(z*aXadIOgvIN z*tDMGwTE8xFtUe6Ck))bJxn!HMjbD7+KPnDPTpX3um?h5;d5mlDe8=g< z#Q?Is7b&y+U>elx`UgKJs3T!;iNcTs8!P-CMx<}rcl*u*;0UYa)MkZ2!9S(pRKHY^ zM)~Ph^GR0ylT}^lXJ=;3jjY-d+)9fIN1cj%jrvlljZR5x-OA>wwf$vl8)CI}iOptd z4rfg{gOU*;19)BJxp^F%p@fi7F~$}&Si&NX)H5g`3V3Gl1A(0{{IO6Jkx@eY9qDl7 zmc`^FI^d_!nIh-D&MsIJ4WSpu%fXVU9j*ck_3>I1>$|#a+4{U^up9y-OXaW;+NI1P z4-v~do5RTJ$=a7OxVi8TDF1()-u)O*%IiWTm zYe9;j-z#1|9|iWILh%qxn5_nWK)xeD7(s-hToo>>@hg?pJd9GPauh~qmRV`hvoFiT&0-hvO-%7MU7mB~E)B{_pgj6mPTi#0aKNo&} zov8)7RS9KO3w4%sIc-fN5wGm8$S&~B1@F1u=2b9Qgl4mz&ctzx3LLM7J%ocBs)6sbA zR-?Vx3FE<93*R|UFWK2llXu;(`hjC_PHy(khHJX6Yi7ZQ*#z;xgO!A_fK4cfjY-Ex zzHY;@kVAgd()&k3Szt-W0Y~%9Pk7lzwyu5E8@z6k2`Q67hx0`F0NYG{MdSxLkDkaJ|K9W2m z{X-Zn#R!d>JWMyq2t{1@7tR0(DeZDi$ZB;fQ7$A0jENc&6MxK;RnNSRtS$e7ij>Fq zo_B993aQRj!`SzWt;J*Z3)|!LG}~kTVfN{UKPWQ@jEEv=eQ`DHVQzEX3ure62#U9p zpR)4=Q#W!rNS+s_untRaarZ^%RkBS`y-JXTXJSJ&jg^n};Zi^28-@|L9Ag15(O?j0 z7v2zxl0kZk;Bm|~9>_C61b$8iJ7+_WXC(9w2kfASM0vW5Emt@&bPyRlW z0q1-WX?)O@(bsr}86!xl{@9(YJ?_B!YVX3K19TAc7gJ|ntZDna$JhIiW34Z8{=yCr zvX&b8W= z+l`(Qf*Q*%axFu@pIz?ghq4@nW922=Z^^cp2TVl@$vrBbw#&NZccQI1`hy8hRDtzW zC~~W^*78s4)OIN|m*=Vu#p47q@O~AJhKBLPCxKaUTzkBBB9M>p_Y$WSDeqQK_mh@H zE!Ny2K*wiXuIS8SqG|ep&=L!`C)UF=cT74!^Y!B@xGT-=cZ06azU&QeSEO++s3`Nw ziA*Sg|FI|o9Zgp(nQMFd}mgnd&J6W>`7%XnmeLq2&3IsCI+ahyl&{I*9L3q+(H3G7a}p}7J*lElvHBjR`j zTa3S`!1~Uiy=l1r@~v6dWq8;tl!Y$Ewr&8}KUV}g3BOsJ2t~eyo$&j`@RF_?9yu@5 ze0LmvUbr$9D2fC;-%k4mbJ|~f(r>?7bLz`SO9tO>NQgWf5^cMEj@W+%dR2K8Tt(CR^efR`{g1VwM^Lq5LZ zyZ7ed(9+lMK7THzkE(AzHH7aQL(TVZLZoYBUapT@U#L;rtv-C{-ABUrA+G1=+y55u zOObb?0p_g-P16`AvD87SQOI6 zO#cq$Dx!xzzU$Rt7gZaFhs4FjOyX#H9Eep*_7uN~*|y}`%)PF;TH9z48uD-HLxAAR zz=xJjf;0U%&KM-G9CWD=Bh#bLyga9Sai;g=&(KXcR^Y4HK7QtSp0UKqnDW~*(rxm= zKlg3>h9)y%u7};+Q1%%2tTFlZFE8tUXYQVc7%Bh<=FT|CrQuuC^v?^z3NtvK(JP27 zBS8O+j6P7>5xEh{V8X>4vMZ*}G*^o-cf#VX!w=MO9Q!`@)ucDfa4_@1f|F49_QY2U zU3adc(bfx>DU!iSz%Cr$2!9)fYhb34(4&8DAL}=!L+I84Qd@AH5&kQA0HNbyZs~#_ z+umTqsqQPHAAVo*-mv4zmdHqNy6wYtzu~BH)_b7j*g+&A-xih}KOxzhPS}`BF7sD+ z-Jn-s7syASuWCsPwE)DpF*weMTWwmH_Hbwuv_Yw1Q1 z$lLX7e4Eta-&HxPZSLwiI-Gd;MXG=VVFEnQN$_lP8A95X%dsB2LqUhtg2{zp?0xN{ z|FhYto{51dD6dSyOEl3wwhgftLpf z?zm~YTC`n)Aj>u_@|XgVfLNh zEsN1kW5l{(q}Z~#Y9zdcX@5_BQ^uf2mi=Q89mTKN8T{ew*ZZ`_-&C#0?XUzxDJ@49ywMmVM&bbhRJx6f#+iF?%4yLZLh^{<>c+B*B% zz?D1N^#kZJEGlc=E4XuaNM^xgf>1pcpy6w5j_JSbZjwZ~*x6rQJ5*z0#e1$s>=7l@ zM3gg{n>?7z{s~$UTen}&1|=&|T^u0M4J$;`In?B{%@OVx<%S-*7WAy1ICWjj`?=3< zcDNU3<^enhp}lw_{Ums>+w6rec880PFSO!&GPX*27j$uqnMCHanV}SMesvucoh^j0 zJnLA5?wWaF2EsYJDe&MVnZV1A&`6sx(6{nj5F7;*$#4edr~Pt}?`w*~SO)u{~gW}32?KtzNMkTvSb`}rUxXT^;orb{Twv7 z%)6|0{euIP~&?Y8=zo)HbC1ZVAa$&%rLdUiG?uk#R8%gKDK%U0$p5@Eah# zK*~br70ml??v^`esJBL^{o#Wwf770Z{F>$ZX>A`CVGH zw|42-&x|0#yGY>9!n$teskfTixvZ40*0<`DFq%iOYG~*u<%Pj3igcyg*V=jhidTPX z^^FI4j$dgdn)Nak6VZQH;@9k2OMvepn3AZC!T{}piApIq5hkl>zjg<$O`W4Xg5lX- z9uQ7g`a8;Eb+M0Oc@=+%Q&Gg7NkB#Loa7F=iM_UI@=mb8WqX0u!m%8k4Qafd_-|}` zng?lno2NeCSv^SiG*5Sbl@DIruOMo!#49~Y^RUv$7$KevtF#S@icy6-$7W#Eia@4V z$~SUOQ~#3N=Cay#pF20Wy20HR-T6RsRAJcdFdUEBr`uOwwWJbyc{!&0VjWiAo!i0} zewZA6TZdlck_l8bZQOfUYgkTxoohYV<;xw%siDSojUpy?m9aK;tZ4zUY*z2egH{!n ziZM-P&BCL63LX74n9(CN2^*RtO+Y)5Eymhh z+82TqAwP%6GUPX>k-EfUg#=4FA@hni4cKi;x}7VFl<{7cjM1`d!sO49}^cK)zz+uU}v=) zmOl`xDU-4>5ocq-(JSNgvY4=fO#@ z6WfVV5~*JR^hqC~%|H8b-Q=}Fg58*7&^M1X_>~}?$j$HzasQyjpp1sGmNlbIO&~h` z8)NPRV`?v^w{m3Vj0>i)VJJ=0SO)uThT4x(ObBx&DCM8dT;4|xaKoQ!4?ABdzG3vp z_DH67K-`vj0nabX3J{2Wq0W;Q@%S4>Gx7zO$I+A1l=25V}|h2vW4j>hsIJbj}p15Ec;h!nog z*a5$&1n`EiZ%3U@1$RT--S5r2mYQUkJ+i3=l@|x5_&unQa-+jw!&DUA2u%2YhACY| z%Lrqj!ofm@`Gv$LN#aQ2$l{3M$d6MTp)|p2hsh064@!wbk>n=J!)Os`d0{{VjuDkOdNE9L7>wdDbyz%e7>Yt88hseEC^bnKiX1b# z90b{bFbpXK8W@aENJ0^X6^u4Xa-aSWFHZCcn9i(bYYpWlCVsu2ISqsUIljq2QUZB z2Pg+W4zvf^!YW|%!zy5)VStD5(VtNZeq2$wp%78=1H952BO7z~Nm|gi!zB5|;82m# ziAedS{&5_D4mQBNpx=;f%l@M{AR1(X@kGBNy&}CL-9kvD2j^;q}5!V>n znD4}PKsgv+a7Q}KIfxim7j_A&3zG|*3u7O434;%d53`1*1G|Q@>O1I)B2+fNLR8bw zJE$H;2Wty!3sVEDiK2nZNLBk+i;_eGg^`S&ss)9Sf}V^WJp+xAgr0<+Qcq4xTuVwz zI7=~0GD|i~%!+iHlHF(U0u?b#J1je_5XAze9Q^{;Jxpg%BFs7rPqdY)1y)%iOQw=s z%}eyoV{{T$6(&_+;8&D1kt=C&9CI>ru#yapA~gvHdJIfB5qkYwZBUY;(}XW`#{cKF zf|3F&9VR~LBnoDXLsEbu8AX9W!41m-0X?7)MG4$|gGGhFH-;g}GosLfF%DA_g(1l@ zqRfH;gao6A!;nLu{DsJ0fA(Nz5D`;CSA(Ds5lMo{>xbEYAPMiTBvRwLK)7JPaI93V z>;slA_7CNae#3jhypUbSuAEaY{Mo5gSo{_8#&!X5p;)P?6e#W=_Xd7Jwj5b`Sc$B3 zTe2(G6YB&0th_8+iLK03R3Ir3 z$s58O#~a2Q<%9KvaABQxSu$JnaaN%O&HR_#M#zTv0(RLz4f?`)*|btp*;lDcnMlod zKhBEl0(E()GExb-I3V5!uXAqa3Wb!{3 z0gT-M)rn;tYTwi{x29scn{9N*Rteu#S?^YD#7FD;>9y^QU0>JEcwx2MBwh*0;npo% zAhMv$9a&ZDk#_BhET}7UuM@{EmwfCK$AC%dT9#invn6pRB2oClV=alx6EUJC(eLnZ zdE)Yx<&gM)!Lk2z1^$B$dysmyrA25tI_zorD6y}*#aL(&PUdnkGAI!`93FCRf8NAI zD*&h$OIh9--}&b0O4tU!f~Vj(3uf7DA=>~O;AYqd&%j}L8$JRb3t^?KLH?Iq{zpz} z6W<;B{_I`kZS^H6zw&yy)C{InC5L?g5nkL3?HcG`4W6z z@EkDpmY`S-VIoesNZG3ND33{#yrJR>>O22|YR!a&uoOyReZTL5x4`9a1+2yJT8tIn zr)`(NNxL0>0r$Z5a2srb&B$$V2m18=+D1%qFWir|d;lJVhu~p&3_9d*TKB_~@HE@B+LDFTqiG8D7CM z_si!FVGkWS=gc1YntS`M+FQ;ueI-3NpsZKnH8=*pg*V_$cnkacckp|72i}GE;C=W2 zj${9S41a__!JpwX_yWF!ui>Pe<8{nKIgZOIKJ}OMy1%S}C4B^4IKIrNL^cYQ!Lo3) z<*+>FVhh+>wt;PgtI*b)@N+Z#9JZp(Z$|6hjs7k?{w3P$KC~BE?bm3pd(djV_G`y* zuN5Cf8;G{=!!}dv_saGXHaLP76Lt_5p!Oe>t$5ZRFZ3O~2EUOlbFO`;O}$n-YqNK; zg@1s*pdGrU_y*Y$!ze4N2C+GUmpGxHIpXzft z-AVWs{tBm|8@zZA;!MGmOoMqbgy+7I&z^$6VTv=DrU$$@hebL`PN`xVre!++^?oRQ zZ|WdsU`A$=d(X*qEPw^d$6;&;voQyYW$~%UJ98&dc-st22Bydx@Xw@v8e)F`WN5Nn^$DF??UZ4E$X0 zZ4e8ADpREgo${}sR~CX%Y2z>ZzGcT$?|@!qQH1(Vez)xJI$UkcSng-u791bPaD_4} zMQs%@1-)=QD}fs9FcN0$2M2o!5+RV)?=c!RL(~V@MZh71Ed=pzZq>T13O=OWrV2_; zjWgaJ7^qO)A?3#@?qG$G*Yk2-mnp~CWzI=?+4x=;OL@2Jy)I+VVN*^@*Xvzrsm$as z$qEVLPPJ3YNY5NKas+oeM`ff(aY@9+jNqe2j^iS%2=5*x<@8MB_3;xH=BwHQR!=C1 z%xf&n$-4BH3)4@TqT<8ChDDi7QNzN*;-hduJ^iuLc4m>nbmnvZWY&zrxK_psMnq>P zSbvsQv7qOku((L0F)|JlN0>|z@#42V3T*vsY`q$EaI4tS)YQ5Xb>d&qQfs-SkrZ~7 zQlkmb>$LB=m?l-D#ilgh1@VZ(KT6@JH(WkdIs{aXl@nj)=i=c$Lhd)WFN$9I%@PsLPjoV#}Mqx<=tx14%pmeN*s#}|(+ zy0>La_tkHSUr2Re3-@E4v`|AedX#gGiYYqS-yT!3_Z12i;3L`|mpWg*w751J04vsW z(0DK>1?%?c_ootdG&w>YCMWvL!#?wxPoq7{ygOd#Iia+@DqhN?P1JaOJzPfDZ&yqE z*f-ewdiKrZ8c^s9b@^(1badffnMZRTAaSlx<}2ju;6-N5{E4tf9!q)sAdY9zk9#gO zkLW*>s9gs7879f(E&BFbw*On6({n^=>-m}op8go?xI_G_YH}T`{5FzQI;K^kja&wu zE+9~;`#@8u(Bs(2r|wD5qk)#v^ZG&K>+;yrpEnWKAb6Cp5Nedev_s1Ee!%SAhnZb5 z_4((!SH1Y6v;k{y2y5^O)<6lP{Z>*)c=g@D@+I*+?SkHMfg|Q9Ue5EK6L_Tz(az9A zr};3PiygjH&KRK2*U5&>kf(+vCp$YkBQLLao>>p3A51spat1qsEm`Q{^O`nq+imN!)M2l=SXfI|)+%Pp=O3~7=e&jN$>+T9_gIuR@0QmN zv8&|1ob5Xybt}^#2ewg*cgM#^z&;)c!H~w)9&JS26Kn{?`#SdonM>o7(1?$w1uGo| zntjZPvm}TGdtAyQ*{D5lbm3^q>3Jg&=fh!~sU~?s?1Q*}poHG}91|BmDhgNMak$ow zQ1!02me3KBGbV_KhD52P?(*voE}!2ybHtpvb5crXkBi8vZ&}$=pOt<2{?_=Ks>ycy zq{^wW`SrO&vZlAJYMGwJHeGz@{B&!?*T#s@0PFBvXKH@>uo1b_R#s1Ln?F{qH)xIr zgcvlTv6+s9(J2WjW2Rju)`oH%9Ts^7F6f<`tb?6#1uAfj(1P@#OPSAm=R1w}!##)5 z0*?@N>ecpucLuV|0oOvwT4ODu2kaqV;Z%@ z^MT2$Fpdfw7BslT5}tv7@_;&DGBIWx%wdxp{I}jGyvvy7-SHED&s0iVcbC`8Op*du zG-c|&`P)LKD{9|Is5WTa5>vx-2Y+?tuxT5to-;f~|<4bHP#2MqbMv9Az8d_;N z8C5Z&$EBLuyD0k?7jupn3i9fmeo(B0qJ~lbND-n1@j+7Gtv5ryC&#GNqcStaxCk}* zhea4J{aKPmWiP328a-u2MZ(y%`>t#aOPi8Y|H#w{Pt9L;_rmm(+s94I46i88xTK&p zCTF;1#MI>_rB~FZkB!euvL?hn8yS~4?doYg6WP<&;aSn+@+TIH`|u{;XNpMiyo46= zuYc|O$bDQd_8i>e3DJaiur!y}P@%1|ce|9mEfIIG*ZnKyvUlQ+*yS~dC=yUHsm`Zl=a>{ z&yAl}+WO?W>J^imbEbKV^g*e$SK%0I^?fEitExgIl^m{Br? zi1)IA6|j$GVZtaj(&Lh*_D$5DH?UviPCYC)>Br^6md2`7XZKKHme9C3|CHvUnOV}) zrMFeD>b!RRAjb{i`qM3wsw zI~;e+_=Y@S<=fQ{CmlF0u{qd(Om0eMmWf&>=~VWjDRr5m%gQ%*E{mHSb+TuvCAnbu zxMu9S#L+Vghwg}XC5M-`9=L4t(GB@J4cl^hH9J|o54!Q=_;#lLn{)B&lakLfJys#9vL1pV2UT#PW`HMMZ1cmn^=wIrkK^XH6JBV}+hi z46kq&&MO{j8S;$Y9HcI|`oP*XFK#O@y!>9*n7Z7E)y@9jvMX%LTfhbpePh(94ea27 zF0%pe#0EoTlv#V)5F8L-4YGAGhO2Ck%UT^&NvkZ@-Go)o$?z}52VZ{~C*MK3+g?~c z(Q5yo4E1l@Mmc)>mnm#XtMU>jW=6BE-euo<-)8Z7EqBi;SebF=W5r`Z!It#NS@#_1 zspa<{xNY(6O(RSJ-nHVF;*-2H6tRi`7z-890DtuNUaCH$u7ka~TN)c4w2wWE4>gtS z4VN}!SVkg#oH#Xup#;WplPk&DSpA&F9QyUdNfY&{l9WAcqCpd_;RZ=FQ8O{SaqKH& z%4fWgU6KESD{5Nb7!_^8J%_xK;vT}3VeCpbcF7~Wi*_UUEQFso-oXCO5k?yRPrHgR zV^~TpJc|DH0Ccjps`d2XmEF$_L$3rHw2A54(X90@9ZsO&kv-M_w6I)TZGuV ztH()nFXSiI;7#G=$$)6r#Azd^ET0n3cicB;TF+W#tM{Ic*wMp6Gk&tZvS%93wryC) zuapntT`dx>>U|c|@i90HtvKk+krAPh;o*^?5sIj2YvgHLrJ;kBxdIK!XeGBw%8=8p zuyPs(?}^o&_WKqb3n?ka9{HK=(F@DgcLzMXyfE=7AU_cBP5HXIZic3d6&+jhhErx$&~ zdwBIcOLLqtPyjxEc+I)&8*= z9wvUy5aQ78WB1`BUIM${6%KlRV$$$RRd-y4soND*5$*`Hl!sO7&o-Fmzg#CVV$)ym zk_{Ac0hws@`nNdFen%M=F_6Q^$DLiRBES+7JtpooMSwMAh%5fsql&Pk^qiqJbt-F0 z#^|9-7x6uDIdQ?&b9DQ zIG3?PuhEv%WHaaZpD3|`!nQWg4<$QyPFe;`$z!VM!YTFH{idXJO5FOb{t1aQvJ~&} zQCP=d+(J#JT0VvGp%4NQ=`789ZTgbjwBzwQ-{CVb=dSqPRrWv=FQAO*X zSh>uzrl9!hCoa3xvnJoWa=EKM*S2oy*!t`UuDJZj^-~JgA6mca@aD;d*F1aeojcPn zT3)d8uC$9*$ope&iy~6CZG7+E#1p8o3-jv42KiBo4tHK+6XnvIEA-{M8fX74uzU~k zFMDDL?f$!o`|p|0n)0gX#@>CU z1^1O<%n>g3mAU|)XsL+pHdRDnz4y4(Q=!KH;3q%Mh#oqaL9elXIHmlUn04-6%aS#4 zTggx8Bl5~>GenCD)*oCSK4wf-==}pXmfjXN<*+k%xJ7I(S3Q42xk?+Tjy*kJx_ZFw zlEEHa6HM5H7Fg<^9f8_?+$4q^5Ac9aBYp`I_=2*7sa-mkxx!-6VbAyne~PI6njUM{ z*?F=TG#8RVv^I0V!==H9E7U`1HHl@Vv=0vnQ{tjF$&*zKU-+}x7*Rb|!aJzr>#e~B?J`@Zvj7NAh;f@Z*T}Fs8#&EL~6Z_RLgEFc@ z^90KUt*_Ii4KkH#1OD1O$i%A%dL=;*I|t?clH4fAKU_@zcN=1O%5RqAM@Z*93DGtw zN|IiieAQjm&3h_l?yG8;6_PclvTWJJ*zBeEE^W9u=b~H7%BGt~&6-ks`2;&lylC}g zXK2Vf@y@YnmN;juC3IBjEZ4Y8s!~G&jzm~8Q!PVdV#C67N@s|bU)V`G7Q%aH=^j(K z)?gO*97}*!ho2!ft@(6NfT0|G;bFf1k8l}B>HdTFcjLjH-dB2tA4=Zuc~9)3OkRhA0j9|J?mOU(xu`BAQ1tP2 zSohycr~2)v0(4@5Q3Deb(NeCUb=fo(TT-blKWF*wqI;9)hf>Kik1B_EaA)tMor$d- zmp4{kzwHH+r)N#MY)ZUzs%KGE&D?2xeYZw_ddrx(zLGzHwY1ld!n=~T>M02O}JCbLEz7$D)sJ5bx<+vSl` zbx-)-0)y`@@dPGm6#)S%vq_^aS29q^HOTPSAw_;mnVs>2FFo7;(Ng8sN%3mQA!Udr zk|!PYe!1A;Sm6EQhCfX(CCqzbNlNh8cUeJmi}(N0_AP)-Rav`xpOcekoA?rupfAYefTU+4(ph^P#Xf}rAKX4Fxz4@waRe-z=LtLWt- zI*eD*anMmHApYae06D#DpCoN9c*lGHdz-dNcC+_hd+oK?`qtx|9)?xF_4H=!=ZbEMyvFG78aAdObYB27WJs^XH9*=S-*ISCkRT%#BCLj|vlR4Ez_y zkHmrjW+rJg1Z&5k)G|Ukm7{C)H@@Yof zqCAK^HRS1c7%An634;{(H=!$(9of3IYU&@QC-FpnVW zp5z7rPX|H&`#e77r@18Jbr9iy-GihV`~-ySaFXr`{2iA-yErqx@CtSJiC4p$K8{8@ z=#$JgkjZ3J$4W#Z#-Y|e86u#lz5#rqA zkxd!Ne2ob-37ooMNvS1!a?pmia~;@y{e7(9PGAooZo1DW=oiIHgpqI)r$2NEe;W-r z$qBvSMU(@*E;M_<<+5AT6YYc(Qms@ZhDTbuhLCE|8p2Ac^{U;XOcIM}<#ZHwrCb9G zUK+{BctTMbBTI;y{rAA`zt1y`x9h3?u{7((hRvczex-Y{|F^Gk|AFIb?tN{+lr6rj zP4(F`C#K^;uDYZwwJBX7Q@;KpUJ{{RCx1t7iZ1Ha=6Z#kNZp-a_s{cD3Ky!{kE~R2 zuU0IB1?|;{U@Z<3T+k^@c-VwZG9fL#CT&1rjdElqB)0Y0yp=A>EY>SH5#E%tNSThV|3n>${VQ~PXlmlEzEd<=a>&Q>_<}I7;h1?Lyxq6CUT1RAEhh>VM7r3XQiv+&%s|3t%LwP+416iHk zCKTF}lwn2ORgr<#G@`K0GDe_7W0o??+?x|lU-&691wTy`#s;4A8;}mA^+>5fDvnj? zbb5pAs#G*gVdYdj4H4}fBV>r^0iSAIfE^qDkr-(7P1@|&Pi&l(aoh8qIn0LZ8vl&a zP;j~MDD>MVrBY9U#GQfWY~>YY2?eb{%hl6Dpu$>KTA-(;aXN*VP{PN3fDEZzLgaWVXlueT zH>&G*yDeHLVV5OND9rZ6ClEGSkPX#>nZ9GMa`ZmAj~`+ zV?Q;q>wEZ1PANG3w0$Ku$aM6LJPEcxu>`@Rpq~H z?tFelK`J}lqYW14wmvd<=7Y`NyryNL|0$PK;WO4ieU5*=i~AzC7ctwDjolV3_KVG{5)j#-D4r~u9AgS7?*@t1oY%G6Y=mRuyL zQZj>-hSgZr8b`r3Q2!c~S?I|PXHEmL#yzH~1pI`L#uyb+9_PbS5o{L83gR)z*kU@M zx-nbxqpYpoN0nAAt#R2(o7u{?(!`6U_rATpyZ`Po=ft_eISWfxzp^xYex|p@8*Iqc z-_=e1DrLglpm$nPQd)Ie!3S@?QXJki@8x@I3%Z`1S9|BYk~rr>9Wx(p_lqQlB_dlc zn>3G)?wsUyQJ3**z@2)M&8^vs;#BBp0s{R>YEHa420foK5{%ti$H}73$yq9MqFJTS zD0X@4^DX5MzWd-|O6@92PfSm;Iox(zVYbCRxjld0i+5BZ3a=afhWc7S@kqnb!NQjv9? zx)t}IK0Q2)u2TWJi&j!1O#VO9{lKejK+^|c>1fcPj)RDzGU26!3ifJ9-Q1);`BaM7 zy72qh!>59<13dTfWbQoS#0ihsLm4JhM#v7&Mq@?@KS5u-wi8|?B}E$eXOMqv0(ya5Zo=E91nzItjh_wVMcqU7Yl zEVDVQFgdv>%S@k~zM-{9b`bx87`j9C@3Y`#%0NR&K+J3ny;bw1?~Z{7Y`$ zYaj<_VeC2l*p~e$4u8%{U}%LzJOeG`2S4&*ZZ087uU2U(p-|+?SNroQ7he63OV#)F zFMaq3d@(2A<6PXuZQSKcC+2M0gSUPDSN!5`G&~HHzeR5phA1IkhZcb>orU*v`1kjs zMX)k2*G-o)y-0+ty)>zzJjF^F=BlVkaFtpbJ#Isv9^tEVc~=R0DLHo%ySZ*r>7lE| zhhPjbjG>K=fdxW-44S^mETyiZrXP(FakErdj5E1YR2)ZhqHQq8W|$)zdG{LJsR}X3`5snjO?62ADHa>tSF)Q7SM(8ziF}2K zmNCLO=Dpsrv(tajkk?eYr?#ugC60UaajtAe9%u=m z_cYM^L6ly%!733bczRQ<4)H%F5@y$HPH}aBl8s2}vzkiV5fsK@tJ_F}pg^C9RLL9G@+99X*CNbHecT(l4sJ2WDf(-|<(K007v7?BG!k>%P#Y~sFN zc+GMm@%jSp@7TEDI_1Rv{@wT8x4R$vxpyP4fQ}|=r+{1^BSgOa!099XtPtC&LA;ZV zV@cdlB*usmlmQL7BUO5znup9v_~p%!P!8qFM!PNlC9^y^<2wYZX>gmB-YD)`(wyd1vW8S$v)I0T?my|dIW%jUuJ4(NHhUjU5;-pblIJS27#^`J!+xU*X6t5-b{a zL4Ums&B1-3@8h+A~{Fwu0c&|9g4{5(d`igig4*}4TK`78IVwY_fh zcX;yG7QQ}za(UTFp(Q;xF;r>hjG^gknp(HC=L@f1m!y1aaEWo3J~VC3^vZkMS>b!9 z7^UoEm03thCk5RlCc?w!VWB|De2mgjE()P1h%{dWHzyyTXA=@q3etTco7JEPL?@Bv zW@vR-hp}Y8#4+WvzqHB6)+#4V^oq;3bT-V z#}Jy?vCEB+YUhz?sO13^<)tr)cddMtCp@=M?R(b1btkRtunx z>P4a@NGVvfYU3bs{Gax|NmT)()f%EMI+K^`$I(2zw{qiKcP|}SUsLtKk-L@-++TD3 zDuuswbk_%4s?sSX#hL7@CI?%q(x|P^aeL<0y}T6s1OzHRWJ%BE%yLEhpb48&-JBQivY1#urh>9?u(B0z(v!-5z`FoCV6T; zCvziuX2vgS#3_vUMU>ALB#1R=vj+%34N%s~jc?x-S>M}tll2Xw&JHhtI7fsTh}&T*t8uPv$;r#Lphbw$pU= zu4A8E8#v9!T!FqTz&CrWnqfZ@;fsAnqs=-UUMof19fnowP<7^*Tq!F#jHH@Sq(DTO*tmoNSjPdRy&*s7b}yo8fUdk zeld2eqjd_Tf-^M42YfLp360tvbx6sVt(g9+gN{)bkqU@0E?2a|Qz-IBYo@e9k(H*e zSO4a(+mmZ+DlMf?H%N|?foy+RCO>s}1I^DhWh2_CzZKaDQx$vW{ zJC+sDjEMO#PGRxadowG&32)f)s$7lJtfu1%tz4MhbVu!)tuCo|@&an#RI5ydYY{q7Ly_z~zGb1ybSzI}<4p z``0~2t_8tKDxVJHBzpCQ@Gi{Z#-m&V_b^_CrQ8o#dV*%JAN`nihI{!Mm-F-mpw)(M zk4b4ZzTCWEVAC}G{Nfi@6bOV8=9FC3 zy{M~$+V>UrL(1%JWDnXv$C>zb`C>KSJ;uyIJk&2%D#fs}Z~N@xJn-wCtjg#d(kl}Z z7}a;mHqXT$*(IVOK8Q5+ZT2emy!MWL< z8F$o9Ts)&Z(OgpBQe1U+bAD#y@`)u2rj$+4y7O6AS$Sn;YLPGNdtFXJMP^;2I<+u8 zV`4Cuuh-=5pcf^9>_)HM+?Ska^oYd-n+^3UJuZ-6R&LD8IIomzG~(ooiR}_{mm*fY zV2l)ojz`M!bBXM#qQ&@e|FTa+ZUs>l8VHQNw#G_1N9#KSgj-zR{PoZ6YkRVH_0z2$ zG39@-V*bvRB~|P8bS`>rT@{x=q;*AmLUD0GXYkLUK6&cw^PNTd6X^}NPd1tCSuNX_ zwmo-SX~CwiKC>50$?fWpstxh8ADovHLt}z1QG8l8;4r2b1X@Id#}07pE7l0`^rF!) zG>R<0Rdl${F)>W`BTO3pkjTbO;LJ~;%esdSuO^_TZWDQqpv%+AODkgNGSksDfk&6+ z51x5uM){*>c0TyoGczW_vpz$OrXyWiwfc8kJdxb!h?)YI0w2bObJpBF=d$Z;LJL{~6{Szv`Pn zxi3r^fH{MtLl7`$hUw{X40yToiMH(Q`CHmMpJ>m{ZrL(|IiqV&8H+ik(lpZrMTg@; z40Q76{pX<0pF!&Qt+|mh9@Pnig8RW*hk7JnrN1V46G!moHxcZ(h{ujSpax5Cf;EqN zbH;$jUAU3^q!I6W55Ob!%C2`lxjy4G%yJ*Wj<9|kD&|q+fKdq#YvLeP*$0>_5n$jx zGYu%vwt#@Z9!&>|*}-Cm}!Qsm3PHEgNU9`F2IKiJ-uThZE`RKwA*ah~Ph|C)MQf zm{Msm5B_W#F%SMOWJGWpZZTm1X&D{j?{#Vxr5-d;dDWv;7%T&mGa1} zkBtXpM)Al@E@of{GFWL8XDz~=XtX$r>_+YwT;}{Kz*SEGR~PptE+csB*q;G!^#QM6 z9X8QtVeTYUz8@vv*9QzHct|y%*I5aYCL7LKEKCBqQ=M@Uz$RzEOnE8B;ZZMB9TNxV z2zs*8QS{^(MNemEJ@@6qy8K|kU@9&S>0bWBV^4oaeZq+wy9UOp#FQAT{<<4fzehb{xwLaI));<20M;5=gZ|6$({hIDaD%7_03Ab%;^GqvA zNj2a4`0UiAC<+=*r+)z63sDLyrG$Whm?#A56D&exmm}~eHxh4TqCn?M?s5d^d@NXW z{(}*uvw_#*4XEoN3gErS1W2c!l{&PDKpcmnj+iM1v0$bW6^(UlFiT*SB9vw;l4-Rz znJxD#>k?Vz*HXeYAAEy-HUZ~A$UG$1OhQRRAC8NR{0vMpsw52r)kK&|nyD*!Et4{9 z?tQg&$xGeEY2|Zr-BXGj?5gKl+n-%glvFb7AMPrz+3c(Jx~sirdwxw^TBXlw3pVEP z;HGRQbta*F_KNDcTNd~OE%(=!bj_);nkP>0ENghMJ>;9crlM#;(4K#LfJ$nQurnrLQA|E`WJJ6u5ulWx0xZI@c;)(S9k*QDz3cU^?VUyO z!h&BEwm-eJpz7|IT3cVcx0XvV%kQX5^kGH?E!(1Vd2l^Qx zPOYhO6$3@*VHdioI?&E@_oGC7iH*

a$by+1dIOS^{SKJ2puvN$GEJ%y&=@dw;su zu4=FRrXA(F-ZQG~{+6-J%&e;HYUfxAEZRG%Nn(=|Qd zD|acCSq&w#$^(@yxu>LLVbEDrSdv<^dP?TR2@7YK?_N&{82UAJoX)0I+YRwg%1z0d zx$E_{Yd$4T57 z#Mj&<(5qqABoyp3n+@P}>_;kO!B4SjgG~QCG16}sTcmqs@?K{CSn~`V{3ufO;v65@ z;BO=Z##zT3=helxJ~>C{&GYHf7KfT=saf(ZCdr>O~h^FY1x43$v z2Qnh}$5s3_aj|Gd6q=369gB2)Ial;jJ`rDXOK-@VduP=Pd(xL4+N&$ z8l5N)Rie6sC=+i-YLtq%vrx5@x+=!HT zjf*TWqwE-GT?sBFEa6RS39TWSn~}Hyz0QBj9hJ}h-kzPORlCzHu7)*@)vKmt8eKUC zOMZs=uX(;Mv&*fKCYaQ5DLFauBCE|%5;A1kHG)u6$g;RKtIX?=$JyM)895CFc48{i zOLI~cij>?k+-$HE>+B}2M50Sb;Wk;ECY4yIPH-hSoMN>lA`$5z5j8woEAETL2@duu zbDTsXG)9jyxi?Xn@gn&BqQq8dpQw4HbmcfnSn#(~84=nM&)NF=7LFFUrh6^B2@Yq|ujb{YSsYpsiQ`#ASqZS3-wi)cAI5LJV715Xx91zk6G4c!% zy?TUCw?}K+>BHC0(h1}Tiw}g)ya8v|!|CX?bRD^`O0Pzy9K;7$3CqZ&w`%NYIVv@q zjvGr&$Fb344u5o%FSq3Pn+HsK18rJ0aLf(FIJoP-b7`{aBrjsJr0)0%ZI~;>Gv>}2m_?EMLOHu?|@Ga+|S2yj5U<rD@SEdNi!W|lBZi;2Jv0@a%K48+6t*L!_GC5GyNX|9etbdb^b1WwQQV( zqNT)R5sL+&+S;L+#!akRKyCI&N+k^vS|TAx2LY=#07*WNkGJcmek)fCx_bMYPfT5-jtA)tLh?F_ZFB_19M7{doM-7ia?Jwv0&UcV=P24Yh+HJ zXXL&!iQa4aO~jab?I)cC)DuOC-WKa1$&O zhG!@kSs)qDJTgA4%SRSQE+L9ua7exM&67`l^MpW$t=wO!9o!Wx4ljf`s$mXmWR8BJ zkOrBBv6V831R)tC*$mqTix9`?&s7q0N2rGvku|_UfI0XIbX<);diGf^<5^+Z^5tCl ziWLB|F>ZqyGGPV=*?Ix)4B{CBnBEM3K>Vn=FciNNgkBPg!fo4ejz9;SdU_RKnF;;7 zpuZYTWfkP+b1DE+GO3bMsnq1(10V#zc?v6+0y35TFZR9ztch%GcqTm|gc5oU1W0Jf zB!SS2AV`-cprRO(P^1Jys5U_CsMrv7Rj{F82YX+|zSdoP@4EKh!2ir7ly%*E_xr#9 z{`)-lk@sQdoik_7dC%L=loJNJVw{#1MC;8G%}YYe?mjv=loYROA zK!&eCODfP31>$W+zy8IM(I0)FpzFEFHv|8RBf{MRQwyp~K5We=uDBUH?ldbMT?!ge z)#w~IZYo@a+B*|>5FbyBfh-{|G;ljeb>(f^u$7hVHaIiTg34r4nYwkxR<#H{kRD6N z6X}L@dpe#@H!$9Y8`;LSuZ0Oaa242ZR0W`t2@nDIgS{}*B^eQBN!{h6;$Hk~xPyj(CM1Q{LVpB3fQED7~zvxRXMxRV}iD{J?D9LWgXffi=- z^vuoN;>4W;U>$*znXMHI9gzX<2tI=(26;OR@<2UUMytNQ2@T~G)x@xluCoI-5~AWt z+Dt7lm8!s03Zq`_TAuc35-?11PhQ-;7z94UbwF8^PiPi;TYNY z@hk`iF(Z>)jZElchFoCoBh!&j8NY z3|Sb@)z;Ys2%-aaq7+)P=Asmi)KI7)5y%&ntmgfB^ve|v##|50{SJ5aV+|P3ODc^p zL5!Oh;o}uP1AVR5x(b{o^^pTy;|rtH6KQ6StoIZ$ z(=`l@MPC7}8N?_s|FwtuZX|%TGPBi?PDhn4P}kbpkWgzMNM(fC+mmf)nW6eXC(qT^ z?dB-zt&N$mYK6k+T0R)!CeK>aZMaHoE&;}cekIc(6kbmQI*{(_OjuA3n6M^=ZPWygguD-SmcX5rGv5kj7EJXhaCpbb; zyf;gWp{1*($HI{bQE|>O-fUBzzk_2RGh>mZgNLPMptCT}n?=NzbnIan(hbdZ%@}wx z(-uA};JG^?Tp{YYaVPN&K#%E=|0X()v<;R8GC&ec1E+{&9Gwo+_TWZAL|h3-!^T1| z<{!{WI`B%UMxbevF81;Q@K9Kg6^lK<1ij(<^VO?;lwMaluj5Xl@6dwDptWy_Pv=$O z1^0tKmjhqLhK6o5(#L{@1qFz&l}cr@!G1FYF$r_6ftbLI(8$P?tfxnxYubkj49EZw z!^Es=tj1ZP9};Wc=M7LRDH{BR>N|^?FP+dN?vLMO85wK7Y{TPq8BABpMr*2}iKYJ1 znXp-$i-$kMB%Yrb0VqFaWGD{loB$tp(y~PCJ160alAS%=oH!mshjsd+w)GL%Rvs9O z4zMmR=A}02>KX&BY^CVeX``BER;PAO>YHjJe)rR{EmYnLjhZAK<_l4k z!_sv?sOEDv^z(2JG>0@az`k5d;U_y)%v!gu88+#BSZWz7!d<1)@|Cb&O#}Nt&AU7; zbnOt>6i6po?_g``o#F$}?VMQZB!cGv9$W_cUjuq3LC9tj z5eFt$=r2y;@i6-9R}hZ06a9rq?57XTK<3XB!R*Vs7B_C~>{v?-?hJ(Y zbxcLaqX#(4176+|azfX9%GgTMfpqli7+R=Dw>0|$H#PF^X5pW|;X?Vy6BP&-H}jK6 zMY|10#4<;=Ki^_#0#_=+J!a}P{uEK3#MjenG&eJH4dtizH#JkwyiS4)#}CXp!Z%YW zBrGY-2-KqM(6zPc5WddXa2u|;jY_X0>7YLbL#2m-I6ek&8)dL_FXAwH^VM=j$?95p zv3o55W4y|vzgaPi@9bE!cduh~ZcsoPaRFVc!n>wZ3NNbZ7ceZ4rH^uk1!xKd{08%q zKy5skOr_w^`;7$DFh{lGD0LtMAFKY%RMRz@+Jk;=x>jt!hjv_seLGL!Xr0Gk;1`Nu zRn5rG*}klRL_3EHnIM2BUq z+S;ZjU^JVJwe{<8Ok|(9i1%6b-{;T*PZ`_PSe8aiR3f{7Vj3%xf8 z-1t^HokSrJiDY!bO9IKzm^LuYC4x!dm{u(c1sK*?2&XnI;JeZJ31(b+*vtg=>F856 z{!?_)i)Z8UXaCqo}EW<(VYoELi=FsUDW;XW&C_V znE@2CSr=w9^&r&33=BZ9$!yc3(ew;ZBAB|mdSKs1)2^eEuxtR;KVWD9-O~}w?0}H) zn)OF82j~H#WXykP7d50XQcu7(;Tr~|vN}JtZ3VpQcp0aPV8i%D1cH|zP6()4i!PEe zRs#=FfQ0~*01o1a7zl99J%A6ys{pkA_W>c8u##v@98Y{tl9E%&>nK(L@+i$zduj>w z94(mErp451q7&&U^i|pp+N&5gjNJbP9MUoRmk{)i!UA1bSMn!Vpr`#ONY=pgUU0Do z^j$PCLIao8z?@)!`(J<!#zr1v9$YShM5i=H}HFe2eiGFD*+go2~j= zowN3_-p(SiyjT-iUv2zsX4-tVwYJ^Ic4Q0LKJ32iXm%1ijh)3F!7gWy!=Q#em%W(% z$j-!0Vt3wN&pyq5r~M6wEQeDVJm;`E$(&)FY>tvMl2gr@#;M~h;jH3p{0lgP!Aq{* z59kH)|1~fRgA)Kga6dRYIS$5Pq~p;ZaPo8-0$|Dy_yIrQ2mFBl8hEV+=OE{L=QnC_ zSqb2#Ybb!p8o175^M+wC^&f|mYT%pvUqNU$RAF#X4Tv5x=m-3OAMgWyzz_HV|9t>K zkQ3aAu7ipF2(ApJ!O2i* zcRLiw30JGp_SfKAP!hJP8;a8bJ#89X8@B+yuE7~lE}>NYD}~@sNVu!PVTeLB z(%?9VK+MqKc*ukpufYkBHgSjsCqi`Ma1BlZ<;5DD4Ec7qLxBtlziV(Rq)Qy9!L=Y~ z;%p60hYX2(G`Kb#O+2T;8IYLF0=>fos)nCxT+L54uI8s2SMyVitNE$M)%;ZBYJRG5 zH9yt3nxAT1%}+J1=BFB0^HYth`KiX${9FyOAR(YY0Eqz>4P`@8NC_#RLhzpfsX$pU zz?4t{wv~Vq^w-n#As#r>2g(J&0_E9ICX@xv6=Fwnu$O~bw8dP|0$L{< z+e*N{3TsscYUY8x63Ri>%K$y2XR<))_i4(+j*9?QUA3iPp9hX5pr>q%0^WD7^wx~$ zkJJQVxC~0ds4moukOerxgS^1@+bdkVuZRLPNFgp(>IkloVHEen`cz;ePyr|a{qg~n zV88=(eoGfmLq8wjWf-qBF}@Ul7Ro@W6qILRRG>WmW2;@|8K8bTCX1Zz_6A~HVqv^3 z1N$OOGHTwcWzcmsOT&u-j0Y%%XdkHjSfJIiUh)Wm27qz*(Oj$Sb*2CtlMLgD3X?L* z#S*M9DcF9$-)xOWQSljB24#+uq#n&i?Pwj zyW2r)=3Qs~H% z$yxoB3WX|5r7G~|^GixfczIoI@j&~0Rat=|Qz1EhBLIykFK$0&5S7n0=Qcz2tui~*-U7aiPbIVxVY)6(n zFCD$Ar>*?1+TSG!t1ipV&qRfxQf5n0A@%AWT;AQ54@Lnu8}y=*=b;i+W`ka3ijw?X zg`{_4zhWSC%{JC-t=&d~xEb_gNzws!U`^72yCPxV z<6s<)DFP(ce0_D##LCFV79+R@(e9=eCIY%jFf#E22qY$MK!SjY=%EJ`CM{V~m<0r_ zQsfJC5e8aFW|HIOvOGn;Okjmrpd~b>;djkeV1rnr=kZLFp7V*>d2-hTl_al#6&u_K zu`<>cxC;=(L*OZJLqvnXp$8)9K13>8|09%XBU@w_jL1c4fPNUT-`zeVfF}HJtB+mjwk_mCZMKB6>Qs^!!QxTSHU_ED5v47 zU>Lfkn5tj$EJJa~_Dyr*$2}h${qS$bU)db_?8&>hREw3VGv}n98Zmf#!-fx?rgvHh z#V?{SJV|{UHVQwqZsrl2psD2-^lFdVOxcPzN@1OQSa|yLz1xrSeTPgxnv)@_mMwoj z%;ggK13B&Ur|mmm1U$CYb~Z_l9BX!OYVGlaJC|+_kDT<_&wO|`r1N&Vuk|3}`YGjo z3YH)JZ2kO>OUJC|u4$7b&Ic_h=T|QzTHkzf9h$jL%g0OLxgsRxLQD!pLDOHgAR;~G zOQYia#XC2fm34f|sOI0&Tqd2kc4o7w)N|pYs*hRa^pq9D+UE?HEq@+z>|<*}Y+D(x zpl`%YM@TWwW;G5E>}XjPOa&YvB9?%qmJEUs!Qg)Q>Vo~$p9}B4y1HI|_!!GPy-9}Y zkR_X7f*4g8u-!gfiVrQIJq`R^{JGV6!vXhJ9V8L0U`>cdB9ZWhehncLf;A37s?7by zAr$0fqosU}A1&l}ixHI~CPbi>JWvHmAyI%~5{YCOCPX0-NLbe~f}7y`mmUY}K(723 zEvpbFO0PYEj?lWA#8bX8NIa?~Q}xDE-L_R%Dk83>ExvN;GhebSDCgqn@bFK2hU_Ta zXPXj1*(aI}?YiA)b2I>V>~O){Pqmpke0o$>^|U6k1C!dRFu(gUcr;->xox7HMS**B(|_hXfOdS!m7f|aU3BtYEyv?lyZ^7HjD!_BrhTJ^ z{B-d59V=UFf*$^J?(^R{nk9|78*f|*o8>erhi7_g_w76F^W)gD zYfoM?iLujpcBbh})LIq7()*qKTjE^9@Hyr|v)0w8A`X|KoQETKKbcI{dCwr!znFZ& z`nbq$;-Xh?GA&%bjC?T3^6`V1WsUpU3CE^=3OPkRJ#_u)4MBvZA6Mqi&b+|A8k(?S z!s&b5P#$N^g#H8K>38uipNH4hBKZ?v4@4Gy8gqVL>qDD)V?LZ^zNTzV$ct|asa+BV zMfA(i<2Yulns@gcsWM{e$Lc1%eg@R4CDl&{ly<@kEMqC-AzdW&>DKG)(47Ze6PK*F zD(xdEIkE7D&$!u*65M7>?TugFFWdy5w2e&s`jL3xXO>o%qiF#gX+m^R!(+nmR{{~i zgIzDr^IgY+#t2It0s-jX1Vooa)dXro7%hR2%6heDs;osSr&Sm*)>I7(OybPDXV3iN zbc>cSci`RSjnd_k|G0Wq>6WcEYK&-Du{NqOxg z`!t&8qP`4shl#!1VHgsM^zG#iJ^v@}5FJOby4Anw3IVTm=S?|~itj7F_NZ-b$(563 zanbNbo@&J4JUX-G7c^2|u0?L=+4BoRwNf9vAq1 z9ZzH4_?ae|{^G(m$^Q8rPY=H;N$oSC*qPGE-ILlyT&W(*LP)mn@!5o9iJ~<=;%f&F-t${XO(AN^+M~g z%g$55t@l1Y8NK<=@>MGLZ87a5*!m8|TE6i$BL;;A8|-Rr-4LC5WJ%E1iZYvu#fC`6 z!yx^kW=9s=+MEu4&q z7gp`(ByQO_+;4JYv7{}(k-2hjbH5k*ijFD5+)bS~;*Qj?k7n#%WI0h^hVye>KXAsj zyEgYWZ#X7xD@`Q++J_grX3mBcr7f)ubwy^EW=&)k+46;}DESS8YwY(nJg+`xbN+X$ z{zn%)3%~gumMbP}jXsinSwk#9ti18(@@kl)0<}gaTa< z3O@A8Xl4DX=S;ZL$JA4ON3^%rXn~k8=0D<+xrl?>Fs*x^2b02t?96<0LIj*LI*}E0 zCALCOAqW$0VohS(eTYON(BW5^dn~zP$R2 zub#mB`Q)D?T>GpD8*)v5@$;4puG?>;od%4Iik|Xn-^)$s;ti*6X{BtN8mo0}?C0Us zhO3!`gD-FFDl%aO?kRPdpLFfnTi!MQppS#Bk#(@alnqsz;~H+~$vfYh(54}I(cVq% zuf?v<(p(y5K^rZiZXbXD)y{tGg&F&<&C5Tz$#~0Qi@r^E$Hr9Nzc1KsbZLnHnXG4f zehE80!DZc5hRyzcQw}^cpFNQI?vPJ~(qvM?=a#*Qe?!Uc_(^-qFFxEbdu^k6&97rt z>c;Bsxpl-s_Gu#T^TB=@M*FHqir1&@7~eM+r{j8a1H*i&zkOHy7z;QuQk{v~qnT*8 zx59T%-=ZjRuXvJ@G=lJA_fFB~d1Q!PC8Bh>t61tOb@Ol+NZr!Ke@KqQ@@4m8iRY@C zjXgYUH|MQ7R)m}T*U9mBowcH%5DNxCTYy>vr3U(ps&g8;aYZ~_5l<`{NP0zs0SK7K z_KF4}|F(m!fbcK6P$Bg0V2wkdZ>$@O36pNqWx>FcXt@apjqL1A!ppLhnmX1oU=C%JywjHBwQDp z_zC~93T~g;Dm96a-rLw|;6p!f?p4J$U;9VHHoX|I{i=He(e?NIOzAH7J-h5a?z{Hj z;B&lftO0TJi6w1Ex$wKeTlP}i+?v+<*nTcC-s)5xnd6hFT=7tnA5=S|W^dUpiwDhh z7E27p0$;0?G&`#Aa%#G@Nb7vZ>AA=I@3<`5Ui-eLfpIHi?yC7833K`t{kCIX-^H~1 z8ASNL``fFhX3Uc-p1f{P7$sj;WHTexb;sI~@6{a!?(de7pQ$ zGG%$y&4Q^{nVd*sa@!Pk2du15LUfj_ffue1;o3Q~-mXD>koN?3)?SiT3_|n6Ehyb+ zNotDmYjiXgo1dBbZrxLOhg)qzn9(nU^w5{IG4}|!oT?3GA6<6#q4OR4nHzM%qqjFF zGoHOWUU~O+J)vFuw*J>6GZq*QCVW{Mw)o@ANcW?^Ro=~+9;iKz1|1zUYHL_?-h;!< zM?Ct<8W8i{2$Y)H80$=Fyyp2vJK<`TGIs9MtG_c#iWcPS!>JYZW0|ZA;g_>Bg>iRo z4jwyqUx%?v%Qm01UGiI-vfP&57}0bxdy}Ne_^QEU1gRpIBUQxouIT}z_N@Am(zl_Y za?*dq)d~?r?OPrHPLkFW6$D@^0m6X0mwF;0#tsDtdiak=tt#A~;{gth2RIN9fX%hM z_@vae;H|xw-&~~|?Y869mK2*MLFP_5j|RmyZzFk{5yE$jJwUg*=8>&^^3!QdXzQv?2Uy9n`kZv)?Zrfysn(O{zBcrW5djd zk1~oM3gaF0`43tsv8P)5ZXI&*AP-;ElJ)v{-fQpFMx!^OJ8pW)*5u3FOIJ2XbzIK{ z&i-)cI$8U-)Up-fjt{i=G%!o{%<+5v`Hu4-UF+y1?$UDQO?~gJ;X^MzeHuJ-+@+D5 zMouulJM%N@?Hf-zhxTiuc>NY5gqEF=rQ5xV()?nrY*1 zf50nWHa20$PdaN&+11D2?7&Z$_CD>!>G-`hb0+TEXQQ%DGvRJI!Qpz^*Ly{XPmS6* zYn=tVX?4aEiS_VX-0($dlWyA&IcF2;AAfLja)2HF#hLQK{NLDj3Wn&!g_g8_fNt$t zgR4rrvd^$}xB0IFA|H4)>O5kH?=soecT~vT{RfogH$Fv35tI zG(?mp6k@?{Xs=)wkHjJ|y@FlP-<$gUi}qDW1U4so**vMiw}NDgH}5-1l+!=BdUVo!9+ zz!Cc7OQLyFrFja(vix53&3rXsMg7ghGBe(}i>gf9g<5rbx1_V?2i1)}TSl+lFHhsS z_E|iBJD0B} z(ic9>`jo|++u#)7l#>!JwWen052~-bJMPWC8LvZ~uYd75z1#hHzWus;>p4$PUwh41 zKc73VKAPdD^@=k2y!8R0$?X^It|tdCZVUIK9ishwXzjXtn=V~3oD>(5;wc=#F&n$_ z4d=sk7jIT}{icDFv+@;9TU7@FiKLaV6W6~gfEk^kwXZe$?X4MOEfj{MLz;^31v<%> z9T*ayKH-3+R6K9Ojmxh;yfSKBz`6Cyig~A>4UzV_Gno9-M1N8V=?rOOk+s1diDdJO ztB1@9dv5eO%-}w|F6Td)_pULu?jm%)F?9F9*Yj3TBg1s(S6H8h9NRZ8ToDjbVK z?9!!6%FAs(h0V2Y`P`3P@pkcty*XPW=iUCjsMPGqV~_b|CXrvyx3aT}?yvv!W$N!* z6_2xh)_*~s5~8NxxKWfRo#}UGQBq9*y%ou}jiq`*oAT#6crdLZET^Mv$^PAWdBcy!7c#ZWV}B7;5jG%Igf%!AMk?q2(}C;zQ^KBS zB@LAaQRkykVk%xh@BJVKP|Ls1Ef%ZJ}=ynesI zUEhKm+bTXCT4%k7tTSp$3OB-Vt4~psb>;B}yuNQ`SuvV7)t+8{Qa#lPeT-?J)u#MMePN7|b!;ly5@s)aEQs3%hR7-Bj%C9Czvg$itR}!^P zT}T{~LCV?QH15mSi#`_|D6O;C%ywxUbuj7bGN0u|d$$NB*F={XMXLiB9GXfy;P7a; zNBz+?AvQ&?2tyrN5AwW*FP43j1Rr)E8}$3k4VS#_&XuKdgIfl9%A1Cr{#s&J=rG$> zbn;n$>&?MAPH#PI+Qvs#t}8cA{*wJ>#W>xwHH8xv8uwqkbR_?h_rw`PsF{sbI4(#} z?0RHG5>(*~L6IJ&n$!QWo3r14@YYKwhae`sRZ^?x0U``K?><8m=zy$IjDT68P#_e$ zfi&@3jl5iQXs4>|%J{8&hAn-Pw&UKzty%5g+(tunCG@3Y+?jq>dMZJ?c+}xnZ?=7% zA4}7{va!^!gDNu9w_GwzzN`KDe#(h4XMejo#k_t*wPN^4(*f~kDpf7~@KZ}}&pJ3* z`&IJx2kDo>F7Wf$Jfb<<-kEWz)vqNqx!6$jAa)1$XGGR=9`Zfo=(qdW)^~m*Ztn_| zmmeB1R<+7<-A}b6c#MvE!%G(iwVnvEz{kW{F?McXts32Ab<(_a%iI0>sukx;TqE9; zB@UXQH}dPL+lJ9L>B}CnCp8*5Tja7}FL82id}QJ4*(df_w*S&uJJ`=+_?CM^`0hUi zWvyNvwB=s=E9I=~xM}}~y|00<@~HBj=j$fiq$$OeQcS}QODUx(q`4o7DNX4O&Hc#D z{mM;1HcJta#TIE6k!6uZ#7Ge-8`-`TSr(D96cLfK6fv^AEQ={pq=-ln^jlL87clDRSA2PnJG^5qHs0`zhaS8lo<1+Xw#uf0L zrdt_ij+vu0GuIpi{=-=qf7U};7=KnU>uvBCv(Pr{QWn}~UCu(=tSeb)o7I`s3I3g| zca)Z`vhnzot!3kqm#t?H1b>%XSDM>!=P1LS>n;X=uls)R!`zs=JM69jAIr&yT}{Wj zx*CP?M>z8D0${{ira`VWCW(tpG`wgR^rTY=k* zZNTlucHjGFVJuLfn{bH@I&T@fcKmC z10OIS06u6w2znR_5OqEXh$kfS7 ze&JcGO9wsjKE*rluzr@qX&9`@w>0FFu6n56%7`SBVdY6DYCURjCSBuEKBrvX*RVNL zMTtZw=+(|tAiN`+im^w?mD5#mlJYpYWe_}D;Kb1L3 zzZUw*!e{rNh*WHnqjLWs?j}{B2B>1#d6*i3r-zJcRO9*n)=Ycf>+N>lce+2M&E44-5!j%ZpPXiM%i4()SD1#-c}yQ&asS|pJ2Rf zW#sz~W8N`FySI3XAzs{?x%D{qM6NMM{aC5cQP2rcJ4p+jMScNvS*b9bHH>36oCm_G z7VfL`um|}-5YCHmsom~9^KwTI@^IyEenzf`M}Utylms<&%Ns%CoVF94d@^XNqq{4g z!CVi|c4#h13oigI0>9*{%%oEl23b&@_C?zS93k`lr!d~pcSCi zpmm^)pe>+npq-%IpnX?!J#x?p^2omiD>H{hS4d|8+cH||XfbI9Pw$WjZPONt{Xs=r|Y?nvldPnuq>J!!Nz_Y*$ z)t5thC_9uF%C9~e>MQb)N93V_Asu`%C8fl2VkL1zH$F;C5*utj)Cis9$WJ6rCQc>J zAkHSvB`zQ?s%{T02`wkCBCZ8)2yMP9*&5mb+!fjjJPK`NeLj%L((dtpuYzj}KhMB}USNRR$c~>P{!waeTDQaG7TR3zKZC9|)YTLH3 z;gaw=)Y%x`V%t2_7~U2d5#DLrAEmpm;)d`(do4nZg5iUZA7SZn;t|$4byc!8e1>B< zPszpV_K1qOBDs-Xkv@^aa8qOe`YnhI?m}5j8Be4{>PG^^5_^3k!@vjaRhHf(VV1^; znXCMU$mpw*t&y?Pdt`#FV`Ng-7{o(h;U$qNtohqGy{qPR{H!hv505lMPg`U@X4Dc{ z3|tmj8PdV8p?p0gnXFAGL>%&{$+P0XVt zzq&ozH|n`oGdhr1Oe`gqvp)7qbVM{tOkT$~L>qzQq7$!5CP$~*a|sV(1N$g$SUQP~-3gK&p zNA}cI*TjJ7no+j3LyagM0}bPAu!CxvYNoN~%uu5sBy(!!)hvXzr>OHPinY;pY0V0o z4RtOvr8*KXk>i3P6ux8-eXuI(Zv0*ZjSg=bH3&-MJ*F_FBEVy*4|t zADXdiq_h@0r8d8|uRTXfuwQBi*7n7jjNMUNTw7XO&bF0Za}=MEQ?Yi0oU64_BKF0@ z8*33|Y8z|E)lP&2k)?KO?PQ$BO|>&v2a%X~@sjc2H-h$m3?*U41m(lYDPMIpqZ71@ZoIczb+E+$UCw zS9HZP@WUmN#;d6Zql(Ak>G-JlnD}_$qw%KrwD`>UocO%>!uV71rSTQ<)$w&*vCoMe zN4n0;_{R8__%=H($9Kke1NX%bI??u6Xlwk4oSboZa{N^M4D_FmUyP3mbx6&GO1Kia zwYw6%5`DU&MxyX)B(>umBc0PPW0TYSuEf6B81X5>pb>6SES{)khQaDQQVeiH=AtPAn6@O{_%y zHTEq<{ImKjHLPbO%8M-%MBD);HW6E;mo7w2_;#W#u?LnPp0K^Q7_F8h4%vKUm>}zy zCR(q?vP$lSN|ugD9K~o)B#sIn?oElz^sGdg#D%W<;4j;emu;hL8x@_>S9o|*CuZ9* zlp`M}b4l9wkcqKn$^0btMbeWT$eMYgGil$uCR68R>YU8cPED2y+B|yMkhE`QGuX<$ zkIg2}8$vQlok{Zc-E0wcE{Y6`b|xj_bS806OHNE;A0?+IXVgx#qfab136D+AO)f|- zN-jw*PxOIgRadM`u1zAEK)yNDnB1D&5zpA)F3%C zQ$tcdUc{kT~W-AR!*Fac8h}>y7kz-UmMSK!>bXe#+Qnv(xD}(QDV2$nACXm z_$VVnWvVH9EH%v@Gi-Y-HIpNsBT+0hFEu{7B(*R#KJ`?rBsCrqx%IWBR)iW;t5fR| zQ^any#iA#Cu%ZaV(u3{$QfwLclCE=J5Z((+w4!DJXZx}AFk;Y-uu>o}Brc~{64!(pf$+ohrgW?Le0n>vjku@lo>|?N#=b}&N*_(1 zh<2viYl~yK>9eeJp$pTO>vY*ob=e}R!wFEAUxx^oK2(QjSU0e)IDGKog7n$Qp6a%` z(#RgjvAgRk>qaCG)J5x(;(>Jy7*DSBD40ADX{l=zx!kYn#?@5RO+=l^GOD_%-LbZ~ zZbsegy16J_Ks^g&w?@vUn#68h2>Buzo7{Elmek>HS+}Zgt@wZ4hEQYO<~rOP>vq)P z-UxngstL0=P76M6-#;@bGbH1)S2j~o zRL4kCJb)S1l3GOv}v7%&Bh6%*!k!J{292S(;gqS)Eyz z*_hdq*%scJ+?v^$*h z?ZEdEZS{TX3+o5e53Vn%57ZB<57vi)@%l{t==!ns6Y3|`PpO{{epY=m`1z0wt8W1= zu3uKavVKiHJO{W*)~mj?e!HO9xgP#gzo&kG{h{QN`lB+o`V(Tk`gZbXyWs`mWx)nL z?E+?#&m-m&`w~5(p<$r-RYP$ozoE1N{?ky|fLPEFZAh}Tfi>YXb*mb1@;6Lun0&3T z%taeRI_gYiO?g_M(lDEdr}fn82HSr|uyhe|3Fe|2mZQg24Qs7555K8Q)lEtYeo|#SZJUG&K^?C3FkOy%v8u; z82PC5CA(l`)5vLZ8uRQ0%IgUVyPV!WvyFwPopt0=$W&6v679qJ?I z-R3CuNq)oo48P&s#lKVizyPM6i2?n#(W@m)66$#rS}BlX1IIp!GER8~wF>V~{aeyVWQ$yjmaQE5=u}0`tRW zvv!C1MRTndF~4DcL;JY-O>>j>sQGW^cCE?WVYX>g&F`7JwZAj>m_O7$ZT`qSu6@q@ ziP^4w$vka#X#dN+WSQDZ>t-uoTW9sQZr3(i1=av-1dfo7VICO;)S5RnND!Tif;9tXX#n zPv{BPF4qNphr8K5U;l;DR7S}!#pJ3&`CC#|BWN6GB51PeTe?nC)i^u1lXpVZ z#L}lERZX^YI|T+*O_f~!k_f*TqLlkS_kButm%&F2Nb|L0+A;X*o7$W3)l=GA@YOTg z8D(m})P4zHy`)`&uU^sKQCYf6cPY1ivwpM6ksr>6f8MU&u5Qu`^gC3Z-e2#pdg^!S zcd1+Sq54qOOTSO|st@QDdWGt(SLs#iHdz_fr^jb{d`8`l`5jV6Im=%17qZ}LH7>Y1 zxGuOcxFxtPxHGsrxG#7xcqDi{cq(`X{Q2O;;Q1<5<*Lf9>Q&XJs<3JRq-UxISCv!+ zs)khstHM=re9Baft{Pi4p=wgql&a}Sv#Of$c|KB0)#9pUz?G>D|(L zwzb>0RBb|UqOrAVJ96o}4fXe+-hR|LR3$UgvasV-0oLvg?7PpYud2S-X{*$Su+yGZ z73w+luj+pFJW{3Fp?0bVRU1-J?MA9nud3J72<*c*R0KQmO%+w=)deMgt?5v4&Cm>$ z&@9bTNm?^y{n~n4rL9ZWB~@>|W4)squuO&;>9Smw`migZ2UUVbFxP@nJAy-kzW)}g z_#O0K^@jY8^7pF$ZtdUC7>2xy{M~fFAHCP>{6psX`_AdT#_^|0f9N>>E873RvekcA zzwhOL*U$gGeDe>P*YN+uy#H8sc`v=cYp=bVpS<6_`@7D$cU!05XCA++-tW`j|7}k1 zRquD5p}+4O-fz4APWoev;}6yOZu{x|_Ids1;lD?n|Lk1+d4?UyXV}q51!}T-9J0Sb>c^*Ak9rl) zvlZ$n(lB*i{YpKcevR~?en$Vf8V)bItb)b|jRG|SK6AIKhW1fvOg9U_bG=F5Am0!n z;XslP>6Uyb8+3P@EoWH;XgK6u)0<)r7LoA@x+BN4iBlfz(TV0qFzki%9wENwpAdTGW@+ZR#oYw7OmW zhuR5y>_h5@xnF=iG?(deUz?wIlHTE+bUq1T1F8TUmTy0EyB>6nv{m?68ye|0$qAxqOJ87{bU$L*$SMI9>jqpX0Cw&dx zmA<664Sb_-oNpqm5k_e-v=^gfGFn`pvO(oen(CY3n@xScndU;fohsQ&_p|`C$k!k} zOW#hK!J2mMB^b-~so1w1W1Elph9&t{`PTY2peJ2M`>tcO(_|;vpEh&dx~#v|x0QC? z3XAOkwzJMw&LAnJ=yw-odwr8pDtZq14*QNFo%Egd&hnk}uJLt1L#N-wN`e=wc~|<~ z(7Vas)8E@)fHm0f?~l)iuu_ZtgZx9harn~-zt3OcAMUT_c%*baJ!PdohBQcOcw78w z|0sAvw=vTb!~?L({xQ%u-v6k-$M{Ac{<{TF@Z0Tpn;zb1;e2XZl{N*Q&amw#TMPoU88 z{H@|^fdQC@V}-zAq>@0u-eGnd|2*u8ap+Sl8OChI^Ta>8TLE?vuc0^7&rmj#UMZh8 zV5Qash9Lz5;XoXHW{^hvIs;<^69SV0Qv%bGW(Ar9^8+m?T`Y5z(|~iH7+B_Q3#<&R zLCJcY4~?+n>A)uJmKlN8!1h2J#+HW~nZTaFexyUbanN}bwNC`vVZ**SGui`ZeR=-f zfeV4lWqMh5V6nHgEUzrTtgo-K%!58V{96K-;io6c2Ktg^eani=O8stMxp#4y%wb|# zC1%!EHli$AmZay;@HLh-V7=QgN}OEy)L1sIY+~7D-(G(~*;Htsf&Scq3E*ZU%?(WO z<@t&OC(0K1-DQhlrOL7;C~bzHRQg7suS&6T*U3}1ylhq3TJL(Xf^S^ehO*6NTg!Ho z?E>vBI{?}N>EW_tWhedH%69k*`MY8Y$EOf^38j1`PXqJO-d`7rt(XsAWbisesB2RaPgqxK_#mb8?#P_`Ip8PZB5;n$R`5&6*JLx{V&tXB%k_@Il_kYD=my=O8+3zi&<(ml zH|Pf4p#K>p-}m!h#oze8|IhMy(kwLP|B9EZ?@-@RDk~q<7vymWPjXqMpmIW|oO;Whd=+S|qqi&Ha4pYvgL;B` zg9>c@@>$BWhuGuJ_JJxu!$H;E%G01xc0bu;K#zi&x|PoZE$o&*1zHMP0b1QHUw7T- zji4>>lCOJ@@@=4<*K&?$ALtV8N19q)H4JJ9b;zw`YrdX&fI$@TOi_37E$Q|KAs8SE(m1w6y> zIf&0;q_`(T8tobDnb5PhyTUWcGsQDKSNF`4+|%rt?`c8Z#hzuJm7X=8^~g7&eJg0Y zr_Hm+v)^+F{87&dPrK)==K^XAT`tm#vWxPH@{9Twd5Q*tEB3S$l@^s3RZ4l$h@xmw zvZ#U7*t0ijT+zg$$wgC(W)#gXnp?D>Xi?FUqUA!;ll#2;e9i!b}u z8{Avm6}ft@UbOkTv=wOw(k?6GSyQyvGeM~5n4$wkhl`H6E1(}O-3NQj^&Gu5+g(w# z7AJb&cYP1No42Vdv%tJVJ#5}>R;!R1HZ$sDW`j9WP07}??^ILymi`jH zrT7-x-Nsy?Rt2vmXD!*r{DW-s$rRb=)xKdA0BLuN?bW!9VZ zYLGd~9Hs8&Z;uWBqv_T3zSe4BkmwaO{@wf(`4-|L;`Dg6}*D%r>V9 zTK^;x^D{(S&(o|kThMhA<)bN=noqOl)7G<;e2KW8b?zj7h;26!zd^}ISm)c6u+=+v z6TeRUN8*0s_lc#%FB6|5Qor>t)K*2_Bn}l+Jz0k{m0!FAevYO2l#p~UNYof2BZwU#_>>w1;^tHNgul=^}ggLwG!oNp~PKhOC-PR(~x z^BdIsxa%#kiR&MP?-U#6u+ANy8C9RC((#kmR=#h~G9JIBG9XjmsTiC7U zd_kj?+AcfzI&m%WRiccif;Bl>;}ypO`SRP4f_g~irH5ojW+_KoYFnmL-k%yu$=^8m zI&m#g#@1y|Y5TY!T8aOEowgcFTYa6L&|!TLk`GePT^z}qc3ZKM*^}}#$9a+bFb7{J zuBBXNbWv8s{Iu)SzzM8VP8>w*{X4zaYn=tZ)|xJM6R$Roirlzj9RZ$You{P^RudBK zXPkSr-S*01)afB|{m0bvHQ~*>sO=K@N@{zXc$|2I_)9@VX5pU|)Si<)YkW`InxA0l zRAPvjV9j4SV}4D>FK8Nqki1O(W%6stuc5Uck(w^~ZO#v~?c;3q7i{&H)N{t+ej`H9$vl6?A%H%oqDG(wwrtzU9)ex7y{ zogZK?-*noF&0m&YjDHdR#uK*OTp{%Z%}VPnu{I_1aifu#B^sWiwsK0mS-nKFAS@ui zdpb`>Vpv?kU&~5iy~uMlITHOD*WbwQqqW`C7Nh1<#LEtETVNx3v2%+v^1HyhtUJUr zxl;ROj%Jj43fT4v@mb=FPOG1SUnQuwT0cY0u%PxF?f(eJ{5|UlTI@BB^JSvFvh^b8 z$~LpFW|rQ;nvW0@4vPF{_WQDfSREtZx(N9Qv8R^m{5S3|#*VBX@H9(^#|omaU-A5r z6XSH&*W}!D}YAk(% z(dr5IK8KM=@J^25-)#+@OXOr@3@Uddifs|K&A+!8yRFa3nMPb={k@#|te?Xe*XY7- zGXIGuWFyDc$Q578G4q5pA7s4+P~5?@E}G!(?(Vh_+=9CYcNTYd2?W;=+}+*X-5nNp zcXzn_&wJ;c`|hpUt?BOHboW;6Z12?6H~sCktc=Vq?^u{3x@mr&s>Cg43tJJdp2S;B z8R6N@kM<2SdoshhX96U?Q!31So7(f;yCg^2U-nugifQ=vZ{&fCacyz@xP4)K&ms$@ zr7_-WUd6iD=O5ryB05vu{q~5aH_Ola$9tCjjXO8 z`KlKW+J-|$%Rjv zao36RG9CJ{w1BYpI@uFTj_Y4A%P|?W?XJ94$7NZc5ed^QZL5D|>uBfBJU&-bB6v#CUj7IKi4aI8B zXsN3gz-uGt86n!V8z~o~SSMy&`@zbl@{8ynvLKXRHaw?bJS5A z^mtXbH^8Y}yZE9|`vF(+5f18Bd9etK(U{{kaeWj)#|HtAH8Y=MH=VN#WnGtf`(YkHgE=;<78 zdTKh@oMe7zrYR`jF8ZeaqKfn!)xRVf^Wif1ScH4M9yy$9At*SC$8S{C7WQPcp>Srn ziE^}x-J*wmUKivzDEhAIDIew%bCpp`zBBKK-M}yJTb%dI&FwcH%a=vTgOHcG6#Agm zF5cd+=EK!S=os*Q+inm)eJm-Axi&g0HEWybc*No2FK=Drp9kDt!fPD68K$;7xW{bv zq*n(f{&TVl(v_2Rhw#l#`8SSzaO)Nqw6~D3UD}oe&ebHF%?oeHH-OpYRm)}gTkD(P zErR!-qMo?)^&;~oYO&l7p^^3eKAni6s`ftZ3G{kd_L+x3X6e9O!L>>4Yv0Lh0qJ4n-lf+wVPZR?;{7vMfW~Z>tOm&Gv0BRaf#p zH4kPMt0ML1Ty4MJ=FS+YlRcPi98*&-Ydv21>N~q}u4y+ow_r_U-}%K4<^B@v@!yu! zd$1qgtG=w7wd|O24P`mS7{~fWuwQ2HfybZa=V!i2)dW8`#qXgGrRM!^g6+*@bhJ(W zev?Jv$NM}LVE1GH;MlNB&}5Z0ZZqU)6YIVIm8L5SC(aIh9BKz8({X~Jk2!R6M@M-6 zfzV7{fV%B>K~eEGX)amn+Ab$|lSxhEBxcJnf11R2&Ysu~<)O3g5a22;k^bhBNyCJa zHi=gRpZrE*c^C6yKkFLda>6GTO+P5(aHjG0NbJhVhrz}Roe}Hb3rOD)%D3;~mOg3k z2ghgZX71H0#jTguRaX{tQc^{efrLL+^tCJ+m9wh8{VHk>^FnSGz$+mwsa!REoP-554@kcAyW<2gd-8 z)$iF3V#k_G3DW4?sD zxSGyK(9G@_FHRJ-AQR*TF(^&qhr|bt(bcbCMz$AAPx4+=b9>b}T6xR}#AVkh(IPjg z>Yt1CE9+g)9TkT46Kh9T9GzJ&UK^Yu3ZC#ruxdfZiMk6_{p3_WD=vqzsn#&7lEeIP z=jE}6n_Ailnn$@SVI}TG6A;yrKDd~iEf3nmFN;-hEn+1jgWBXDW?OJu>fP(0Hk9VNi((tXojG!s?^b`EBE-P&Azmx|^WT1R zE7Mi#pIWD%p3qs#<1fX<7FF!)oFUuPa2H^;HR0_?N@SXE8Rh4L#gI?64yS<7&fL%w zp?Mz{BN)Hh(+=f4gbzQ*oyYzl@WQ}};-h99t#Pz{BfJq23e*R~N^c-HGS1M^|792N zgg;q?Uyb#3gJml1H(_k%fqPv#vdG&HXR1yKFDs=Ry*)k$SDeX-!MKy)9hx>Ir&iV(#cQhc%ZdcfvY>kDx3>M!8)4Zw}wr9vmv`79_=MW^Re|t&IPJ| z{hmweH#qaJ~PY%-RNAa zu`aHBjg21j81dCahbrLd5%EI~YCy7kWrj0GtVH0Y!cw!^P%oX4;bAfS`8sr@NTv_h zVVtC9*lc2s?!wl|kDK&p5Qo+ZqMNlpvKnOw|n%RRvWr{bFZ zG-b1pM$GS)n@9-LUAzvHys@9!cfha-+6HZ?8*2rrm=K#wpfNw{HVS2Wpzd>QO)wTF zsQ~tI!w)D2uPUDa1xskVSvSUj@O9FU;P+&}V?(Dp>Mv%?eJL*`Ny_MBW8+oJu zQAp|DzHkTil&|@tR{R4}klhF* z+o^r#4yjzYi8zV?#Mc+nnDFEXTVeHc;-BYTL-1KnIe`Q5dY1=tI z_v>?5kGLcFuKSaD?>NS}JT`a(+K6b!?+;h!^Nmt!Oj)EK`>ey|!ERgrv{wJQssXp@ z%$Tljhrbk7M_uR?cg$t)+qQXz$%o~g zR%VP{#2eMm`Z1BV_F?bt4P45A&#^Tv2+!?tsYs!5vPlMro!7Zm+-^e29?mo~7H0l0 zYLbV^b~rvrZ|qRKsV&}Ewki;2B#%brTZl*P=XuPJQd6Nk%Q26rQsH7L%AZNasO)*> zmSQ%=ICt?M^g7wjo=D1R2=PVk>&PC>mxd;Bf`?jF#3h5qMwgy8@Uml6$$u&)5 zqzW}GV{ApDN+!y$>g4yN5;VC3I(@$_$g)G2(YKUZqG@zJJ zC}UVi=eS#v3XrGW9HF)TW6IlbX4I8fnuEEPK`#zBoh2M5&ZSbe<6+N!kXNFIwL_3e z_@|{TGMY3kL>=1uf>?=P*Iw8enIf4l9AuY%p91TxeWaT&P@_Tvq=eOquKj>{0D$?IEk7 znhljro<-|$o4+V;r9o`G{$BnK-Q?ZCZgf>ng>n{Ze`4@1h!4!CKVk~1sxD`Hklim| ziQrfeL0T+JV^9a+ZD3wtGXCI@x9x`Aj$7_qzqYIm8Vov{V45+S!8E|vzO6BUmRF(H zzOTWq!LK3nF=%WNLJ~pye&d6D!+1*AqTFce=7}H$M*zqF76$<@Qqt{gfW-oxjLcA) z%27)3CvD>|q zmUy)po)y{EW6TwqhC3p{qPROupQ-RZf+5=UNgM8@h-T;{`xMo!|R6y zjzp0U6;X%@6dg2IPY0>-tM?ut>S$m{*=@`PQ8Ja=Z@XiwwKjD`=1rM2-eL}}Ow^+K zh(AnPS5)>q#pEgeN-Jjg^!g0>{l@cpYH~5ETL44zB229&M9p!drvJySsyI}dk@kW- zlK$G%y20)IO=XfzLV$Xtyi(NE(aAhWqo+vOi{vm4Wg+a>FSU3A@`eN0M*b%1zyID_ zNkkoGaEIR*6%QW+YpC(OPV}3pO9`dN#4XO585rrHy?6{1B+dYi8xE{mb0!B>1syIz z==e|%C2r@RBw1-IEZOjm98TP6ZRr+FX7EXK*q@6U2PvAfD_|#h`S3Cs#(pyR(jN%0 zPWw*uUv9WgtN*y_DZVp)VW8CDQ<$bxRh268SZ!c?E3hY#4`ukZKv8iYTtyh^R1zz= zRUHqiW3JE>Nj6OD^s5=HaM+_nueNDz=JYmsZ5Ev8vHT$>+LD)FP_7vHk%wA`tf*yJvFS>)sW7kPH|%|E>8oldFz06L+d_9&805%O1& zXEBkpnr=ZQ8aln&z<8->LbdYiyuhMm*UFUmLaElnJgG~l$VoNNcqdP8w0-u5%B4d* zPieHH2BFGObEdCq*XWFA(#8Pb#pRu6Y02x3D0;!}h_l(>SEg$K@qYEiCVCF~?tAnS z^u3V$^yP^gw^G((5x10IzTl4d3;H`O=VI3J6qlx7PS+O3JN7%aZEn}hrhVy$yx)*q zMduIBy3yG+ovZ~j?ulXICKgik9cT39-`c4-=zX|Ui~WxDbJIWe77Jr7`ctTuDV!25 zEK?ov<4k9pE2KD&*Cy*(E4WsTQ)O-KR#F?&fwgiRZYkzp6 z4|RYGUbRq!!sRwAC^8oh6$r_q^esWu2r)b&;z1!kuaF3Y9{LIy{T+V)vqzFL7BEf{ z2OZc^Jp|t83pzaQRobQ@!l=Zq9Qre`1KoEy4S52BI|1RHfS|s*RL|FlC=Na3e31|m z@_tzji-AjjcwB$X*8sDlb%{?vHHt&F->Q~S^q7%OK-Y>x^1pdp$>wX~vZL2wPC!vP z(HRIXXBfpmPJbR(y7`(kVZt`tuR(;67x4td_{GVA3+qX+P^`skqJAjyEMr6a>#=2fLRCq`r)*`?O~DUw6IU(`5$#QTP?ejw#X1I+8wJ4)h<3ED#<=v!dN zyFZM~+bPP!H8d&1eTbw6*|Tp*b(0{5VVEC(ufPXOV8Mqy?5kuaKI+4Rov_AE%umn| zaai>{b5ixMn<#P6YSqs#qtKTzqI&+4`F3feQzy)16EVQ&O(~$DD(LBKM=m*rU0cv{zwX=ZQQ9$h=pmq_Uyo=*854cGN?%A?95;N=@ zvedq)v(`=mYBvD2J%HL7z)c{~I1ac+J2Y%MG&~m_%NQM75gltrNWPk0@wNe|TL;{x z0}nig#^Iw=2%}?@2*{av#q}H_+fQ(ok8wN*BKcs&GyMzNpXhNtCIC0BK;tH$aSYJ7 z3b-dXG^`yR>k}Q@L`eQhNZt?~`zJb9Bsx}*OB{(;Jeo^UNNhxIAY#Ln5V=6r1Sx#p-Wq8-i4ymLWX5#tBh({+k$tS zW3G7FjAJTr!8nDzxy`95SYm#*Q8YR|W@>zH@=(%^o7EbZPwwHbRt1OAfx|_pWJlQo zUW3{~vBU!K#GygHLLJ{%xeCqexR7`*=G4WrVF(aVIeLP+BgQ%*(Bx$o_?Q_i6X z^9$oULRYL-@x?1Q(mel(Uc)!L8r~v-dc{dE)OBVX@1NFOe-cL$#bTzVEk(*K5F4OR z3Fk4`gLq0xEIIZ|ix#j>avMu*oKxaeB}Xg^vC!>} zr>BZ%)2g6yNlQu#!)~W&Zc(R$>1F6cG| zhm@DLD-t37bwuc!FW)nA6$aXT=|dpbexo-7iX8FG?6J(8n6Mo=FGeWWVxVsnJHv*; z1#36VE8TO%+T+#j5$(1n+B5XGJ9Ne*o^U>;#N9ALAWv&_(QTM&oG1T%OvIwW@2UKt zh8?V~PdcsBpdNCpH2_a+Q?nhGgV^O#Wywr~qq1kCrI!2Hmrv4m#P30{r3Zzt!ZsdQ z_sH5ZyEt$0b&B>zr%wf;x$aDQmk=zA;eqr%oc6l@c>BtKoVz{k{8_+JCDkaPlL{*9 zbHqeuhGC9>J;YF$?6YtuE4#^L^MS}a3d+p@v%o?WGAUiQ+u!d0jo+F-h5SEw}IAhN4^3=XvWI<#z& z_JD5J`Z|q#>3@y?RV-ATW8h6AGEQQ)MF;6NNKvMizj<;%us$d@l8oJa;e4j{^|5o@}r!rzS?d$$C-Iq zrZ7>jF-Z=0V}E0Rew6+pyrjYB52A_Z*KaW1WYa*zrhUW_dwH?5A$Y}OuH7QuW>ZPz zyaq28_%#x?-*0%Hn14wbXS|j)OPzVaH|-f4@}o_5MLCljX&Y(3489&Fxf6R4tB|U( zGBteJbFbd6yid?H{g%oM?*DAXx=ibdn?dUUtz}>tO_H4MgSel$YgWJ)m;9l@kP(RD z$GQ%@KYm=RDjD?<_Sf=AUFuWwzh~dA6CGE9Ke+;C@f)FWyy>fV{1Z@;uX;p(tBL0% z7}nPkY2h*!AQUtbBs#<~kyC1kOZwpAA{vYKk4Q?Qj7_0L*fAz%QZKHjw1qY>q0rkf zCu-6)HmX^lN!3)C_2)i>V=j1Rg|7F9T>5_snlV0dTPbm=BU%Bf<8ad52+k-_JF)^d z?7m)+8w%oGMcJEa*$OH%MRGGm6VpW#Ges6hb6Dm!JX))06%#+Lv~v`+^Ay--iZqYr zIA+25Y4zJ(DCLqgw+UJ%r$Vr`>4@wx7ecAMgiS-WO`@Ac+Lx^yKVI`8Loy_INpA;K zSb=ZporRYr(FsotM_KM1?$8&b>jjO~ZxU}95lKeZiVPoN1H3g)R9I8}J4qVtXb}oV z*OK1;f<}#IYib{>+pv0A%>6t58tuqgw|qM)8hmi8n?c-my*n}*d|@|Gp~OlXTk!3n zH?|F3;k9-V)t4?Pmpu=N@DELLJDi}-IUzo%h$e$;3KNDR7;BJ(7f1r>ED!uFkNhkT{fr#@A@;B{2tTj%tQgCWeW2i1=;w#ms4rfQ^t?cW z{zB=0_WrpWDiRBT_tK35{jVK9{#WsD6v#$Cal5y8kNpt!E9{!~o7p_NyxY9P^;yJ0 zXY~&Q)D-4e82^(~`Pz!h(Z^Y=q*})TdGKB&+7PeBnJb;G}r&=q6%e{GEWNCYGi{{0!R#(wBi+V=Fu)t^+iXKqsTyN>pSS(Ik*c^RR`IghWJ7#I$E>m0u&H6^{Al`lJg zXzU1K=TP@FTx7N`s~rZ7J0ataD;UtS<>||6&Yb1v3h(>4D;NE8{V=T1WN;G0S+alF zl=|7gZ(8*sxRUvHjAi|F{lGl&aIUi{fN7k?6u!m|5;Vt4h{@s_9Kcr>+gZ2a<2?@_ zg+;HMt4!DA7T`TU8J%e|E=q__=DGd?trLg-;gLp!GqE*xa&|N^wD~X5&d3rG4gmPU zNy0+%U$Qm{J3Gt&_x>O0Ki2=z{vYXo*Z)WUzx;Jc{(q^s{yP;PAG5fHwX=yMv$(aP zvx%q)(9YO|S=PkX%-NiTjr|7~>;J0&NVwS9IamY)5aIs&qr0b{rNbKhT3C3R^09m1 zXB!!(h%+|5`3Gx=MnyqEYMoLJp;-$=YYeO&LxWJa9GD`*QNvV3Nvc|rUO1tEfl~S@ zILUsY;J|svK^7!26gX6*{9Oxca&I@j8`}{@C`|>2=xmxaQQv3EusU1dACC zCUHRy=W|)|@%`KCpKnqULrDqh^B3ujs9WD5e{)%TyQr?Wj(ympcmE;(hGmDRzodx9 zwciYx?2KVhY~tu~?p_TJ&hZS%u%M`lJ)GkB%mM$+3$2gQMn-FO)wfRa+m#E@1Gz!B zeO4tg%LnjtIpf=C!R^W+^Yz*>Z=^{D#I)8yQ>8gHwWLOt^|)P1*Hf*>fY;PG7#A}Z z30t#)?P(F$RJqh(THb=m&yGQKijm5 zm6~~VGjcbZ|CEgPjSuiOkqsNA8$~Xo5xAK~o+)&?4<|UhdB5$;m~nLd$gBUfKw4$W zW)MF9K(c6Sk@F65_N^TAeYlJAVClR~+a)i8%E#v?X}>34*xu}+J*WvMZbkYp0HI$; zUybgk$dciIz=V(INJAwTA(effRne2UvDlw3`!y|}rM3Sj-VopU^O4|UUj{1hR*@?E z2SVthkI079CJHBJLAieOxxVJ+CpO{9&({T(_FIvKg04PK@7b=Z|1HWR@p*qS#q4GB z7Dn(ibvrcNR+r!y6!Hth5t}faZ18h63|1d&GY*qUUl5p~g0jVIVE%|ygH)8%+&@>! z^^2Ap8B(;^G1dTVr}|#u@fr-K$b+L^g57;NiY0vBUqgsKtLIa(D!SzLNAKMr|i6VKwcdpm??!RZ*CtfCBSU$m)`&$_> zmjAH=v=eb`xuB0?HOtG_U^e!+Hlx}KfBMZ{niBEm?asyxL4+;l^ zUs_jEPfBUysO9+Vg!iAqZ)#`%?HJ57*8s(1=m<3bW?|zbCUEyBCDIi%XVd_&>XfSH zEa&M8gfVc86J;i2;Ol1wFJw%km z&5!Ttj357^L;}{b^6&qnc5auwajrW^6~D7%h)dv4rykQejZ)GPhlo359K(|D{j2C39=V^j zq%6H%26}0w1p~8_M~9pvLHwvm(60j02AxDR(kiy{M`u%_3(&NU|1zU*AAyDZ>D}O8 zPRJZ(Zk$Qv3KM@{49cZPh~I?2rFsqgg68E;2r zfQlI$)zp2bExnC?0ru^8Jwb!@>R^ySRnn4)j~j|9kfgq~Ft(Q{`TQinpnDSoP3tM6 zufuHz?ui!6dI(1$R3sRd!8sfL7L{fxVt+sL%CIaooHQhx3sj)am|H)Y>GYqNh<_yu)$$7Hu7; zX@NrzJlze&YS@J(3ie5xoem03VO+Z}D)k%meH~Ea_YyDk+r+ZDADQAklpNO^y)X)p zdinnV?tKC4l+YeAiILYoSh4S@`iewV>^Id%dwwUYX^q&0@mX0EjIHibJ~FC0ik;@m z!dtu~nDm5p^2|7W*e=kpS1AS;dE07s!TA;_n$!o(%yzb9f-Q;N^(2e*wbj)=CME}y zWA8#Rh)3^q9Nh(GuguX*B&1T*=NPz<+u<)YtaYKE=7+IQL^;|?ROElqleP-hUh2(b zqe;(vqkpU15J>3T-M81TcI-Zf?P%gB#IrEcg=W?a$ATdu5C9n^*a@G$#4e(8G11#w z#?fg}#tf*;%&ZV<^0(!&;hdQ>+i9|dCy~k}&yg;$M>~wDmf5uTx^Vl&V^YGiKiz7K-^qRR zt=K|HLI#~!(_qNAUl7Y^mJ0@UwppI47=bk7M-xSUlC--wY7BF(J7$b}o+=Bk$o>^< zNuF4*6kJ6Ks!rsY;+~jiD|zgT_~jSQoQip6**X4T@b6*_dKc?CpDbsl47_*@a)qtc z{{8P@@l5p$j+_cuo5rS}vaF=}18DU#Xe?qu=^?9Rrj~u3JxKAlRf9jxT~vxkrifRX ztEC!EWMJqDltAad3TKCiG0ab@rMYjq`yH}nw8wG=T7`7U2bx(zk*&xmOAs|~IbiJ? z8r&6E;mpfKc#Fb*9p)yta1qmaJDJS1)yO5rkCH4NSLIymW0tU zhb)StZ`Co9@-GfADJ0Aw6ce_y`!7L$E*hzN%u09w*w+>5b;IZ2s_J$A;n{^mTA*t= z92C8IM#hRim@Zzy3KKK#(hQV(j3E}g0V+Cq9h00>nzF`5Y%Gg35^m1sk(=rMcLjh4RPweUe%(aCg>6pRn9ep+CiPhm!}fRc-%CM0TyRKZ zORIZj&{bthf#%H}%xlryI1o>&FoqwmIYQ0I&oX-zEE3I-W8m$qhSb5X!#RY>BJ^va z+i5J;gY6c3hRs3xK^4i5P~-UhR=jYTKd(}wW&_CJ zmRu%iZI7@jFX^J2BbZ6|Y!A)Tyldf2FagV(Mjyp(>){ zU&WT~pE5kdz0o-vjNUt{EdGHM>kel)pG~EIfR6}*Z*^SjvE5$b#}V-Uso~!e(0jw* z2T#Z1>TxwIYBvy64(gCaXikC~xCqjO$2%~auu4v2FUn!GauuEAwBx@2Pz)yA$lPi z9?X3pk1%6Su%cWsnJlR+l`NSoMY=@t&ti&5>R%|MZD=9nqc8=5Xu<$eNgA?9@<_@^ z(nzXEG7L0vG&q>&K)zlWVJ6ZLmh``JFet$;EjK8dfwBQvEb)JZaD*kPpI|Hke+7t2 zlDfkZ1*H9zl_aA;(+PwS=IfENM>z}R5@zTTuSPS1!3sz*Aj5%$3dqbRH;2UwNX;gr zfrSf@M1hI?#;OsBLUo5SiPlMxPWl#QpL(U|oDef)(1iy{mOWQjXD2OhP zCP4NUc15}k-b)O7iM~PJF5@0@#j(xUn-<6kQ-!Wa-Y)$FzWo@e4#SM@E8!k?MYi1( zhz&yx(?!as;2wR&u#Ma6&|3wofbJ{i9)3l!4ciOo-3`nO9DLK z<@)I!TAej5=l-`kraI~Bn;~*e#IzI)>IL=M5~hCm6XmvEFMnVgY!}HH)tcB7_qJIe zFRUAiuk@2$?|I-VOcR04N>&_p;nTWQ|U-O0k+|KV_?c)lF*sRb)=gE z?E%|lz0$pKy+MKdfv&x*y~@34y+*x|fnvH3rv1XbV7=bGe7!oo1j5?tI`~E$ISJFV znCdbASs}4(FmteTFcX1P!sOfDy}yLPb28z|PQwR+5*N@cVHR{7e~4llN+&JEB+Dj? zCQEX~u=p!V;V2Xzh4<_SC7}QJQlYuL)<88RFk~=9F#KVNk@J`3pW@G9>OE9(6h~O@ z!0kY5;omHgf5nC=o?t~_aRW&ModaV2ic3;T24VyxZIL#^js#K&ll6#6lJujeLqPY) zhm!ZBSwqm11{;tj!%_vr8ju`4`?jVX!)OHrXHyl!NCyOvph19<|6~c+LlZw1;*PkX z9L0_A%1IZ0rXR(o$;$H%zM&n}R}>K94!WuFNPeB7p`Q6f9nDe{5Pkl(A47AQ*B)}i zG|Ee3m)9P2Lp6#|QcY&?)ACarW$LWxL(JK%;!`s%#Toj=eKWgj&@L@)Ee`29l~L6H{peWduT z+&9kAbDE}{wQxuHQKmk!#|auni z&F-f#85?ygIwy5F>5^g6G$A)Z`tImxrF|P1Q_q=W8%LY1)B2^dUP`x~!V{Me3{~?Q zW)DOeAu|t9j3twpW@oPMYUkgTQPfbxq|SHw#T{!mI9rMC>(*Uh2pLX@N4I57qqe#> zCXe?tkyUL|fFR4@iDdDWMlGXI1WY0$R!{$>mgCJ{n^a#nsw_0MQ9Amvmeg4!N#8*enw3^ z%DjO5l1wb4Q(+62H@tESq>{yL*bWaZAT=;8l_y0}i#UASE^+Uxz~hsa7#q=)RMyC} zzdz*vD-6q7SR+Ol{eXD{r?SwhxyenTe`7=S_1(ul&>!L7Q_~CdMfme6*k4233ifG> zW)5 zsfT$BrG3QOnnHVcu~!QOmvP0#2Wck~I8ZCSNcO2u@|myF+Kq(r%+gNNPW3P6P3CF+ z$~l2NSGhCaTb)ntG2W^hOAqz=+gv-rh$0_;CeinWc1iKQctsdM2PKV~f5s3kHtiBk zTRe5Y)Hc@##C^qPp4ONF6+>XzYp2xtZ^UX;rh#O77F*16p?4kR)xAq4yI@|!xHq{| z4p301O*OhNQk#;WTVKbOOf&L>5a-}}w8LYs0^r`Z8QRt0iv|;aa3jc2{sZx#?+S(w zPJ(3$Jo!`z#5_b8*!M+4VUOXp1$*{Kzh~~siVJC~yDs_Cu-)s->iThO?&1&1gBJE# z&&|Uqr7JL{LxCrhf30iJ$>a?y^UvWn#EqCctS>ZkAN`fr6L>pZRUiHp)k=5Um4G*j zoCvLN%;QG?)8iI77p&h!88q`T=lkrnvga6={{tlpA{!h1%NqkSWm1%>7{jpiGFW{( zJxHq&Nv(U|p7NpBvqeFzmvZ=i8fmPCdeP|9I#}C;&~`wt1@r!cM8V*5zn@>cd82ubPdI#GEADhFP#XavIG+$FhdN{O!#rLu zRuw+qRl0)lwqLeZxPlm|*E@n3{g4|Tl?B%wGujs~KX@HGAGc$QcagUhl%jki3%tSn z{1u?@Myz}~ZXR#1b+UR6uOQzLC%ZLaz9?P|qzjGk5JFN=I$qVe7>k}*Q^gI8?05=I zcnZye%?Ue6`6m(hLUQ#j(bGN;&^!e)?o>CByg58bS|%}M zOk|&0qL^UkOJlFW9yX9GgLY4{!Ia=MWS6@I_?O|?!5BE-mk7YZ2q6S!FjiQ>_QKwlL;BiHT$u`EmEvmIhR_ZB)mIE=8f%mxA4g9m7928NUW)ba>p>bKy zC|`pdP@H!fu$qj!X*k5SGTfqoCtzeifMB3LM&IrJX=m@w0n{6`){7lTg^*(WBKnO) zzY#}3r?`;A+B?F~-w1U8L#Au(Gn+gBG_X8BXO>LCUluI9HP3k2ytKDp`&zAq7S;+k zefHwxtMqZM@Zmm`$8t<5w(&Felsjj5s2{&L#*!V3#RREK)XTx@xGWyyXQCvfSZ-n^ zvs4_uJVjEB9py}tN&yo4e;K^_aVW0^uG9*1ZgR2z_yV65r zjwRP5c{G!ds`!D%lr8w%$$ZQ_SsuGTnZK2>QG#z z8_Q;0sqOTfF^+kxwMy57Max;ioB)$8@vC!KLFYBL?1&r(MkGO_!GU{T-YYh5vb%v{ zI&H;avso`BC*yGCgBgh14X$_=#;^g{QIF-L@~hxQe(s z`^5N5@?AB|6i$-Ct1cX(WOcY!m&CtvDd-@E>E8*M{)p7kYfMh;&UI@(reOo2Z%b^? zx*c*y9;i0BW`D31MmvbRsRZA;#zSos*us9m(lmcBe!chd^>>%@6nV#(D!Y{4kKl0C zV*z^kE)F9OU}Dc>%hD%a$2jh)q-NAqj>ydnx_#wMZM+UCY9Epj;p+iif^3D5T~ z0g;D9sLIb8>^rn7ez)PN|oZL5@Ilw`2;)O3GFVcJeq$(4NEZTq_FlBKES_^p{dZXr9Atn`!?wW&Te+wPrd<_B09@`W5O8ZPicn zu`}HW3HG(OQTD^C41l-JP+1cmRwdD8IU{|tfWTLSWc;;83mc?YAupqcak)Akd14LL ze8&sWi24u2uBj9OGJu_q_`oMFY_RxvUtT{QX zXWm@7qYP7$CYXO$6 zu{%CmON0|Vov!eq0@HEMUZiK}7iqBO7H9eKXmo5!A_@?jQ8W|SMvyaf9gbE-TS)r ztX>t{g|#(;-?aq`i&KE`VYynCYM5i2%S`M&UgMflXmz_Q>)t(;!P!=F#D zcrxTkC-OOUgR!J228G3x6)L;y$tZQhx&=;F`|ZFTo4CssstyoGz5ytFbf-e)qI0l` z1Hi%|hQ~f1o6OHd*}&2_d159v`4>r2Gebh7;7o4f%=4E<0uK|tfIuDzw~Mu!@Y}Ta zys|O%N(N@bUQfs6gWU$N{JoP?*^1g<1{KdOjxJbvcq}Yw3EVCq53!2IKwT1QX z4fzHl&HBgGJNgVKPS**q=Wv_F@i!y=SN3ZwIT0CU>P5LRbQQFaD3N=U;gdK;pB?h! z5B?CZN*$L+NPl|QA!(+u6Jl65+2rgPr`5nGe`(_@7_Ajo9r_6t>QbSI<$eUJxY2F| z9;b{F`3da;Fo)laTU)eyDjFoENzk7TH&xnlOuUApv?*7&1x{I7^>!VXP5RXVqt_`S z$cPhbUK+8Je(;w=x^1xPY{)tU75X|l?OH`_pmA{pRxN*kyd+i>T<7jAGtyJf>Zaty(QdV$AF- z8{*H{VX0K3*V(oj{91W>wz^DacY=PQ!_sjwQ#h@E8@wi0HZmor{~O(1erl@FaXLtE z7D=-Cu)rn`gTJ!EI+|+MWYRb>LSvW8AM!YhaaI5TMelX$vG%8?TUMw|A!A9sNst>T zI_jncTrbpcpcAD7KC~K$(P33LmyV%Uy4qtYY~{-AP`6xWblyH z?OiMiVzykY$yf4^m2r{-)TA)-Ce=zHv5Gl7b@$jiTPTcRKuKl_a4ap?vMU;V}xt? z*vqe)7-N%?jTk$Zu9fu6jbK~O)UFDEJd(gktsiDOIxiL502$-SoR_ zam!8$Z71e%k4yirRk+Qpua&3AH3~$67MjyDjQTI_)Fn6O_q~#+e(sE)&hL3H-m+OVp~LW{4Qo%NGc2 z0%8HO^2O-&o|prluW>*WS3w+3j2KG&ZkC$7cC-_v8l9QIFw+wwcX6K$mH{H-kd4RwPB8Kh zQG!hiJ~dp9ZkRFqzPkq_F9RquwjNYhgn7+`@_I%^Ow=eQ*z%8`)XCm#tDacPM1D8Yl5s8(~PX7$3okl8CE7WgiK5 zWxn;?#Y}F8ia8OUHjHzA2^~7(piDcLPJ_Cnv%U|9I?{b1$S39-+nfC`g(@`@|0O6P z#VeiCyzd5OYTvOlf!kNt0e)}f#xjD%PjygAIY4|`EK}*) ze<8DpJ&98sfpeO2AATt2Fc4G$t%)~@dczH+u7Dj0PgUv}!xSdvpg%Y$3YGqY$Dv8X z`67PiGP{@eL0cYw)3Nnw#c(HQetc5G;bF%Nx<-3LJrUZWhLzR@uYhWDQUS5KC#mW2 zVB?%$l&=!T_UP5}aYxFO+qL}fCB?p5yGD=Q>f-0F&0F8`M`c%?MbhU!4GodV*iG-K z!M_0mKynW*b{a|2N&93-~PhpRt=#C0-|;^BNa^&hGy z_UiT*ztTjlw^`OT;P&k5Rsib?R8+OjPwwtfxbwY1sK{||9H(R70OWf`@_MzE5z#d9 zFm&`}Sqrl?-c%iRT+MSNt01osEq9aOtgIZ*^hdtp2TuH`MpV=kUQKf9xlGJ}Lg^o5 zj5128B!cT=L?)SVwM=qjFwc4(f&>q;6Hd#zJc z`V8L5}NUhIBVP#_KeP2jr*<_}wy0vSqt$uUu+6jIJE4xZg zQ{V-#0@{tKv&BXu{n$aJoxI3pCk83_(lRyRl;FXRGdgbdn!*Z_c>WR^Kl4sb%CsRB zP&Q-4QG_hjgKnyLT5s8F*)VI}19id9+R-3Al2#hSfyv3yF6gE3JQU+}i!f|gkkVdr zj4#-Z44pZ{X{q@Znxl%ij2-3fo@D0xB-zU1$Rn6h^gzTcaXsge?Ox3Of!a}Ui{$!% z;`F}&Z9tO0fBi%$w*}W+D~t&m`FYX@>xeqi z!q8wW)=G=VWp%V-eO00g9e?pSH_+q#>oV}!(I@qPCt`ykP#x6!d_#t$#YvJY4Z4Mf zy}C44Mw*XX&yYB2NW2+!@WB7D_Z@IeU2ntp-YhZ*2?={8gbhh<5~j%BvX`i+7?LoQ zWe5UV6;P=P78MXj70{}6D`=$-P*JN^t-ER+b!#hX9d+Y(?oB{yZU23L-~0dmzwdqZ z`Q_%h=brJLXP)Pr$R#<&TLQ;H++sEq2;*SD6+C$W2?+3U=gwd^__`D4#s@rr|KRNa zVMN0Z_aojxA9}s!Ua=!`m}K$+S6ZW>C_A$FSkG=5ECFG z^^XEe{)52Sf0mdWm)H~7PAf6ucb%_&Fo^*p0QNGCv>3$5@IW>eBMo4NE=qo|CztZq zk&<;#CdSvw|0-6-4vdv^{~}f*r~5bFFeyC3{(=*Tl~&NmKoEGSR5qQ>Vi>X*43;6m zib=ILGG|(Kz+UYjJqPrO+RYPi@oeYDvp^tX;$cUMGYIofm_}hM7W~v0+~9c)NNaS5 zf4Gco+L(j&W-Y$IYns_@oFsFmUP58`pQbX zkU?IC1QV8pxMctM-X_E(74}MfI=z~#M+aE~)Bs`~#!nek5NPP=Vk&Z>iD+QqC8IET z229>g*av&n4-9=H;{%Y`VI*=y6c?H)iC~q$dS_rDVZ7sD+ueO(4(F?Gm$5&>iTAU$ zxpIHv*q`ufb;3(K2g<-82iQ;MV!H-j$P={KbM&+rBX&4P62`TnIOw}_f^$O687}FC zH6sM!0%3+-Xz}PFUiMT54LIi5WDw}k^r}R7)aW<_=|m7-fIKi@Oc0N(cY$Hx5IA&< zO6)HVa^_e7PX&hqy)OYaMSx5oAOZ*`tb{=lLIi%Qoe9C54mfrzH3`8?3*-gIrTAgo zC@C%rY;pggKb~=N6>M?WS`uJu>*Zu@g@SKyClh!9;o)nBy`Rv`)Wyq2_YFo*A=DGS z@X;xdE655Fx2Zrk-rCQK5*Q|;blSjGCKPZFM-P!C`q)HANx_v6ZX6~U zSvwjvVjH>JFwMC7upVVvxx6~X-ZmiJM;tR)WN04Y=n@y^Z*CRtp1D9JW9YTL5x9#- zMxm|bPiA1AOnfeGwZ(%(u0&W=u zfMw>sNu)fQQv?OX?hgfI|@ADJDn8;m8PkYm=JPwiaIIHS4|- zEZIUdg0J;HgsH_()yR_G8?Y6o^TEh|!Uj?tFmE>~?i)U23{h>ZJK;iz1v$WKFe)FZ z4`k`rp|;r3DZ$p3Xk@s+tOKcQhn?$yWi$X|_W?l~M6^*o0r)ns3{Q#yIbMBdq@FUF zCy)%pF``ee@+ zk#25fxnsU8@HI9g1iW>oklo^P2D5EUnHD}H0|Dhx$bO;`pqvZYL-BzOqP-CpA|M0; z1cPWcejCHcv;hQQzR6e^)&N{78~30Ec|_hRI=xrg_*{V z)@;q#v?ax%6vAAJ-k>yvi_pp1I~K)wpNe!7ek8>JZzF>Iw@@uQV0F6#TZ91e16gdS zES5yK=W4~G zAp1CcYa2srlyAuMO_XBdVKO*x$D+xDZOu3{xGa{1!*h|Njd!eg9+}E_4+`+cX&@V9Xx4_N3_SxDw~@`JH!?9|EEW*@h>?kVj!7{exL@aO`jx~B+eGrh)@+pi zpKB22ONzX`(+2YxE*mX3^0zp_rk1^T-3TO4SDWFU$W4YJ*Dcb+&G%Ssi=eyqjDZu~ zML^XDG=&2G93zE5W{`9{gAT!T7|T({x9Kr(Cx{p>0RE!^|L^O#|B&H}0-rvIjJrT! zaM*4)xzEr59=nX=q=^qsWQDB_W7=}<{ACU_@3qKrwO4p_YG5p!+`BQv*~SQyB#iGb zD3IfX>wpe@$UYDR1%0p{fFS~77Z_I$_%9fIFzbiK(*08Gln|D2OtA1xMkoSUvCD%g zi$u-I+l-qRZy<&AE-6L}%PRuE>FurpeNzG7hvPB8wvEl#!%WtUfa!T%oBjgs0O{}d z>0fnXvH1_Ssa0=6pnbZlEXl{(*imLJ8YQqqiwB1l#5()tHI6_>pv09727geT(8*iC zGSHvF=GuoT0#a+0UR1EYf^Ndch%sb8V0CGM_B<;~z9pY*NuklnbZa_Uk7(pZrr2iI$m&35_pLeKVoOXGcEQ*4_~udHeu@O;XdTMH?bkp580EFO z6KildGqKC6i>WV;&L3jzE^#%m_J}nHrF58J)%!LDFE?>FL*yhvU?4odHX>k5Adf_$ zk!CXJfq{c+d*yfm5!*}o4gQgEi5w3Z0k19#G%(}yIk2H21aUcZgJv?A!&YN!&Qf4z z^T8~x9K%?>UnB8>Bdj`0TWDP<{k5rLkDo_4C>=p=Sb z?@DWUkGF@Boz!ZO2iIOkvUqzwa%_ZvLf5CvWpGoH;R*9#cagxiTxWWemy=WFF8qKxx1+EZzh21A!9r3fh$-Ad4dK z?DNb(_ja~$xUxb1Hp!_5YiTw-8$Wkvns7bIBH!GNXE-KRI@pzG&*S?hqIbc1u<7ps zw)kHGOArgBmOv+LB!&`uh&`kVvMm5LfNPX69NP2}^o~*ksPm}vXoLSR;X8Wb-@-Bg zPybWE$YR{mkNy+X=wJR2m^9{i<~9H~-h+Yr0GO|XOIl!M{2O4|C-`3kM?S&-G+_V# z{m+cO=pVs_Pw)vo!6*0x|8v1GLkrI1#I1K5LP#umybW(*5CqOo&JRMGixTI1YPJ$Su8XZoCIHV>WPJssZ_oD|H zkw`k63b9CQbvPYzCGF7R3{dBm4%dfcNw0J`6Y``)U~k7EQg!^);#z)caVwlR2T36ml0cpSi-mF_ z1*C=wpO>H%-}*e6P1eckX>ssmbwdU!#RD|;bqYxWIh4U*DZ`&uS zFenwwJ4n}7J>W?ZJ|`v46AdnZjH9LaSPF3c__(<`T`2%{@<3aqZd47ZfpHXjUW7}b z9H+7Xe_DjkRNI3>*Ov;vVmizF)6B6Nc{t_-+F`Q7bTPAktdG%E@Tb22IhWq|uEcxF z?zb5YPPwAr#y&oK?dX4|%Xa_|u$gG*qQS@OvtBj+W}>N!oPpv9esVAtEqL9LZ z;=(+6F)z26C+BI@a-}L?t{%rL%;ITsR0Dd-Ru`5OVa1BV{33ZluBuqn|8NkmznkE~ zJS8tmBhSlK3{|O%!MHq6k&kEJb0PLz`#7dV;Uq71<|$RY2z6nhCP$+w8YC8%mX?b0 z`#KhZ!Ni*KqQY#oyeOw!tk7f?7HEofRWUqEo~h0qht(TiSi;Mfm-9-BRe*Fb9qcg= zkW{2n=jUoPDkU$o9G_HJYD^G#s>W}M)P>3tg@%_~z$?wkRpbn43%+v;6nP~|&{R{% zQ|1;I<$(d@1xnB=7gSJyTB-t#h{x+2ys#jzoadD5%v0rOVy)iyRnS-aqa@+gmAM7k zm}E5STm>ev0mFml{eAi36gcIAQ8cQ2OuXt`Fsib!v>>lgK5$S#v|LN7O3efF1x&_} z5=~Kw2Jn4Cu0n;?$x-DMeK4bcA`DyrQ zP=f}52~pm?gADKz{O^lmAFGYvo2#v8wF<4WT27|AR7X}n*M})cOO;hUD5^tXSVBjs zWRfeBfS8jYR8FS3l3^lT<%Ph+mSi*$bsJD(x!k(K5*my{JaALmFsH)J(;owrKcF)r zw|B^Ef79=WH@O?kVoa;a86%4m=C)LsD$y!pH(Etl*Fr#G1mVa4i8GCo5`Qr9nxuV8 z^h{L0pIR8Kwa`+WOhPJ=%t2CY4YM6AZblufrN!Ju|f)mlB81R7ZxZb z)~F>`LgN^H)QBZ~)CPM_;F!LDo|2od5+-Zp`9-{hkRa6BL|@_|K~XP>x5OQli~zS@ zsHFcEt!)3dP^OR4vFCISF*ZISN#cmwYj3R!LUM~Ry$eka0MG^7fKp+juJs*o=?K@QK9G42*` zPuy~8TmJgp=;NIo;gTE_Ia<19?$e?p`Uk(wupwmVU3A_vS$03zt9>CWd)mzn7bcs# z`@I=jzSCpE4U=mVHa)oI*S_9<--8_%Pt&fEFVzcHuc+IeE{#;AJ@3pTkJ{&S_9slYrw?^gpYbBjMoAbOr+jnEK^T{c{ zxE*Cp?QzQZHZ#F@{ZW_Ce%gCqztef=)5Ih1X8YVtb+su;_|aAWhv5%g^~(_gu%i`K zFcolwgjxZXS}}>nL~aB7ATy$6|Dh5G#kse)&g2}u+seRo$VxyoMU5-C0{0gelERB< zJ%O($yl!{h-0jhBfTmy-Y>2UFG#b?s(Gpf2qVos}b>0Uap=ew#Rw~vx(qeJH7%?g0 zLIheV0#(pqWIbS*BoYONi81I9G_vm=MXCq?rOUwwP^tf>XAR21=(Q&@P+DJ?1icRo zl7MN+M3Dyl&G?Y#58?jiT)(q+#^<(Y4YQ2${vvMGv?qf0*BRL<%YVJOi28WT`8E2L z?9-M?YHunbU@24cRU)Onxz=fu)02@V+ow&oA7P_sdbrvzXRJ75=_((IbDQ1oM}~); zjX2fXR3H|vxwnd4m69I!sJXN1HMFR)En7MI+)oe2JUy5f=FcFQCqx+RYEQFid|5+Z z(7EHwIliCmWzT8JvGDlDd9v>|^8|n9usE6j{nt;Ch-Z5%gX_p&`fYR{>LP3yf7!3; z1u4Jxke>AnL)W3Q_yalS+b|hHkrR| zBPTb#MgI^>jaHEPiBx-pvEIL96LUX~wvuMFHR(5UkrX4OA-Q^ID(1Rx8E8EW!} zLTbOLBtq0xYsYr)?Kla?jk@HczPBTyU&+CewuCR_GZ$Z znO48tiCfXSSCD*U&a1E=s3%5mI~OZ%PCysxV}0*HHmSP z;P!g#+_`AM=TC;COJ7Yn*VKNO-!$dLDb5qU?aBE`Tf^oyM?ym)ve=H!SzkBZJWZ|~ zvi#-r)$9l^wW@h~Pik2&yx1y1ZwACd!+W-05rpsDFHC9PWL*{{DLuO6s_(S=RynfG zN`K3n=S$k)c|Ph`uoz)wK*Zj_ zS&g#DR9&Dpg7J59qLmA^o~d#!S~;hJ%UoYICUB@@(=B_>8<%UeG=%Ky5pUb z>N-Sc442pDXLh6^-^cMd35zaI47{GUbJMWJme;J{>h(Lz9@m_>?+ZV;-dRT@9hwt) z{ZX>f<@j~;Z{C?R_H4!8+YOJ&;u(Z{^IYs~i(dc!=4ROvP>HKi{5@&aEsd?sMKkIy(sz>zi+;;vD(thjZ{mI*IeD$@) zqa*IXc!8nA1p45l+VLa8L%6%z+c(E%A8HPMS5eNdSZ0J~-3>MzZGLE(E&oKwJ=c3X zpGF>aJ9A!I5#!(zX_qnLm!S_=U0JmB$e_aAm5v%R`@sZ$*OIEejw#=48S6Kzb%K0r zK`Uof*V>3jhJ|luOY_=#uO=R<6&%jmz0~S+LnY!T+%&wd<0k*sw#`QrTgy^NXM#it z>l-$&EL+#!vY^EL!u-!UCAMPe*Lnpl>9zJi{HB6g<&p}q0oSR*MWkkR!V`;6VZ(%F-mVz+h?x|bY_TQqF z)Bay9Y<1NMe^LEk#*cc@#q+oQ7hbS1P-et_JuH^%`+VZ`H-p)dK@Vr`8zKx^89C~* z;j)M8vYgz{PIgJ16cao9@t)t>PA3`FeABMiQIkNwJoWWhYRjc;;{M;R?kX|m1$LIX zEgE|H!86fi|KOJ+Y|xD`clPG0ZHX<{^HsgiO=)w`{MbRO4?OV{KFDxunGbETjJf{( z^LKXkQ-7@6bGfPDc$>*L-&uyPwmUMV^7d`Xx5gJn`Jc>r(0MHKM77(-OHBUWJ+r$X zSkw>a{C?24LTx%T`SrRk)W4;4chbzRiRbTbuHVpVQF~^}Dpmrk^V%Ux<*Uy{ulGk} z8Sj}s$#YZ2jv1kOh=K6u%}k5s{`P(GV=CatB<)Jn9$Sg_dn-bJ`WB-H4~QqjhK?sb z+PzcuY+BCII~C#2(>9&5MH8{dY$8}oegz!phq98P66}`5agj+p-KDPXJ}Pg8EK4So z$~=68?p|4*Lb)tcCG=EyE8M+2BntOT&p#}VBMX$b5=f`3)|z;E*>21K`bY`V_}7c$ zk2>qZqGCK40Br$k4U`(_Gp5cN*hPqX2~lr68psDkgH#mcu>+z(*uNa0FChHQ5E_)x zAFL4+`oOyJm@xA?Lm6UqdE~}g-x+q>mIj{NowYta&Evw;yMy+GEn>y~v~}*y?ECqexx264;A{K3)MUHM^yqQEDe9GXuS5ocUj(9yTG#9$Wt=ddRT@Xb?{fz zOdA>f+}jh4-^cH`DEoHq^V$~XwXDXk7ri7lM3kJ}(G4&-|RpI?7 z2a-QittjEwjTG*v4}OxiW(B*|F*>nhd+1p7oyI}4f7bPh`_uHks`|O8<`Ty-nv}M6 zw%{$SUN{srXkHI5*%anv=gNDw9`!|^69qY4Wa~JT=7%gU+hRpt9p~5TY~pEgvgY@V zJ@5|qxyg~n$B3EXztN`LB3^Tu&aFSZ;?!N&8}{=y8$`u^yEcva;P>wIi{mLmVSzKTUkE~cYmBafn>SAuTH1WpI=~El`yftxK*WsJ7 zOLeU+$9?(L@vD#Lw#iqUT;l$MqE(cMXcZ~5FFjz+J*7Q*`Y;q!&iuEyS}BTZeXH|7 zS){#>3KEb?fH2_UqfI0{@mmRs-Tud;Ru%H+cz|H>00Hp;*xb5Duhc9{(T4K{YpYnX z?mHfTGmPIHY~eEQ-iU;?9b|8FV$_bQ-3;r?UgHiLo~J+Z-nWRn`H;_9m@5fBJxjk_ z`T3NFF?M;ImPLJeFK6_rt4osGXl~t`F066gIFY*P#|6WWj4>zO%bIXkn&e<8zOzm* z;fMCn?W4}`7ZFO<Sl^Iy!mVFZ3DW`NL;h=5MqVtX`9KUv4w@ znp4!$jG5Q%N1f(J`zP(+mKI<~cyw}Ny7;W%M$ssP#PHJg7tpm`>yfIApY|BF@3uIT z8hyv7)!?2WYL{t8=x1R!_jao%{;a-ZceN{g(Se8iEYp6PHs^k96uNrd7gz76H*b1# zd2`nFy-k&qdd~Gk--&Ww&2d__dUAHf?b(@S8ExX}Kc;;-va8g|>9?NzZl^l8xI{Z4MF>!dvndp8EOR z%-a5Zst3fwdml-O-Uq;s1|zfn23&;5u%t~XP0G5epK;Li(Y%Kd%u~k{9Cn4(^KaQ&jD_mF`T4XZ5k@ zoBg+{#b9;Ih$^& z>GekkgvZmDZH@Aw9i;6$xMAb1whKQQ%}fj%<}DrXXg+nzQ^yxq+y?P-7q$(bl~Yi- zdb_4OkVIYuyEyq*1#n`s=zH2@pIxh)YFTJBIc)WWTY)aB72TtfGON3-6rN4hS1&$! z@z}U^vE#L4E1OO{7^Mihkxu#YbANIv`6PKui4C_?F5mX((m@Mi=hdL^m`)F_sKoc1 zes3MQ;5>A$HGKE*CrvA<(UGi06*eaz=L1`otPBV%we~o8YWec!i4$#KMK;>3dmSOD zc(&|C*SPJ`P1k=dDKo$Si`SxZ)982S+66f!w>Q0dQ}Zjm;+I_CO>fX1V$2s;ua@L1 z=J}mmIy5f6t0K*|wTvz0Pkb0e+Y<2l>!V+d+S@vFN!s|Kagkwrf)6j5kWQMC z`tymDKOdO3I#k&GrDdnQKgIeWHWRC0$o}E4Ki7O)ql&T#BW2__#_!uZZf>d57OQB* z$gz#W8QtT~C>+PnT)Z*=^=*^YD_f|iXZIvjN{6`nHH`S8Wbmuqr}sjsjpZh(!IgRC z)@Keq^`^7Q)u8STkVlUlxo?xYoU2e>X zF(dEos58tDUz=9jGTS4zTP$vC{JgAXc+1bDhlqaA%owy-zg9NU{N#G)Zs~Ui>pTxX zU9%>A%8(aj&n_>`KVEul`Ni4*rIB=8%Z!sZ;4^v4_i%+d%z~AQX1!Ijmc7^@Npy*l zCpVCpUpJpz-BC_SI`<%KQck_gSc{1}*AKfGHqJs2+FFG;f#r$adl``>RfrKNV&kg$ z#eZyb_TvX{19Wl}YC2FQ>F*B^VK8|A6OzOLtQtL0kQGWLQcriVO#DzI-EAB#E@%7z>&py~j!*oSdeJ@S&cvV&Sgrdy4;q|c|Tfg?5^ER;1H8GjC!foMq z4F@~FkW|cFFAWdNuzGXn|FHKpa8X^?zWaO)NP%Ni1iX{t=CfPwZ?GO5^Ie$#%qkVTxz-2T4JpdgN7JmtyOES zHPje<|FzD+7;9dd+mF}&p6=iOe-`WPz4qQ~uf5lqVHgL+_8$me9yR~1wYi;7zH{%f zpDkGM)i-Z{)}D6vrpRkAUz+gLf2=*)ne*h~FS(~q>T`IIrTh1e&lDuCfBEr6rqsJW zVS6;+`oCLtthnuytM>fvQ%`*Bkw2dJQNk+^@83SIe`eFqo_YD9`Xf0V$xY?fA57nq zo;6|P;y-+%x?uILCufDeGg1E9)HL z@O^HdUxswWnB#n{A2t_KC3L)Pw-4A)JG#2s$(gTo-q$0>3 zsTlHu(u0u4N#h`wNF|WHk{7a1@Kf3VevpNwuB&uE$G)0>pKYkPz|rh zVm$f^aW6jw|4jZF_~-J^!LQ1%f`1|Z0{lz)m*B%P#wouhzXpC?ex1gnw18WcR`4EW z54cTf1MgKZa%G>g54>O54{lf5!3UHB;0~n&d`>wBKCheyUr;W9FDe(omy}E3E6Npc zkJ2L~m4+*+vMPh|4{TCR;J#{Ka6h#lxWC#ToT{dR2dD$U1J!}x!RlbJRkebLszbr| zsP};HRqqAos5#)_>TqzbnhVZX^T7_)0d}b_@F;Z@_*3eqz+==g;QQ74!4IepfIX@Q z{Gf^mQA6!9cu>n!M1mSoBj9qi99*Gd-PKC968vfP)8L2HhrkoniM)=O zqggd$j%Je?b2Q^0*kZ=YniI{5;3RVrIN6*GPBEu|Q_YyQd4L(SHV-so*5)*G8aUmY z4z`*xck^&F=4eT@z=9>o0t=R83oKYtEGZl(_Ztx+81)6=#nb>8j?o)lqHksCz9xcJ|fOMjd=lcBl9NaR^}t0oBFw_;ym+ZVo7CAVjjqB zW6t5vmkR!FSQY)5F#0|;l79#-`J^xT;4hH+iF7ekyu0#N`TwyXuJ7RdDmb(1IaM~YDdka(GpLDkX98zUGv~`boGC4wBl{vd#zi9M zzkbMklCa|3PZ5)F%1;uX;`>*MA90ZO4 zUBJof^k;6s1)v+~5wcGOEP6d(GGB#n0FVyYfMLLJ04KO_v_2kgGZq*JT>yvz#Z@>rb$o|E+E(KNqtAMoto}2xfjO$il2e2F1 zV_@HP{T={10PHdU34NdVPs`mUbtUsl>cI`*WhIRzt4o?nnoBlAvyJsTS>FPET}c~j z_A?)3KEm8}oqUq{4D$s;?uN}ClD#Ukg*lmd0CPICjd>XJ@REA3-8-6jEb};Uz#F}$ z8SkA4uJKL*&+yLXzUF)9dKY*Xd6#&XdsmjM2Crczd7XD7W`Q|+w|KXEckvvXxm7dU zY-Qg2C-PqItDXBg1n%@6=T_U$*K+SESU%^y$o4O@{T83_+I^;ydS9Y1g)I06G7r8c z2Yf@XX~z4q$+B-GS@t<~4}0gLZ2{LQGJM+&*$bONU(E1%w{Jp8mv55pf83pXjRU@E z##+$&p7hOxeh%NA$2^DYEWD-}?_11cSjw6e=w+2}t#5;GlW(hU2S&0R`t80wz09jg z0f3*Voqp`IxcF=-qdM@1ABpe@zbfE?v`%_g$gh{W7hi-`qO}iV%3%?oT4k zO*pl;<_&V%HSY9VOX~eu{v2?g-wiJG7niJt>}P!#9QRlGt0C9=ryF(h{IkGy{`st_ zXH5gwU&h?XyqdYmHxt}k(hc6s8tfYXPJhdFHQSi?GaocGR3H1r-{n8aeC7uEg1;Nw zQ>ylAN-d?y`dqwo{aK~hHKpmLwvyGQ!%Bx=V|(dn%noz(Zt>@pj-|Gx<9PgjmIKVu z8{`Y6JzF_HG$O}prq2`j(<=ko|^BU%L&~Ie< zLg^N^yq)#Cbl-aCmbRAeCAqX6b`F(xvi;+Rx4Uuo6fB%8y$HS>5L7c@g2#yK1Bw2I zK#IRBFpz9s$K>tqz~I18UBNMqh&& zd?B!#N3y3^Q)&t9>m3ixg8Uyiz?%2@LZE}?V?3S{%%{N@0_R!2#CNaII+kHim6^+u z%2N59?=DL#v+8FTzcAEI;G1B z(Y9KTN!+b1o36(U%d~^K%IZ+Rwrsw>?q!9vYxpkqR9QpWvXad-N7i7cl&v;2ZDmbm z&1IXp?Y7=IQq0h)ShkbS)v^|5?5wi=W!O_?N6Na&PC|p7Qg#8n8ge(+LEZ_fK?~Ik zCX*&OAedfuBxs}hLFAg?@Sr_78Z!1va9l9Jb)wAUgA;=_LF|R#jNojt8Jx@d1;Irn zb-^VpFXwaqgwDa0!8Jj|e{f@P3;8X$y*HOZ-bI--*vfVg|H1a)pbXT<~J> za!7iJI3bb7yfZqj?N32nNT z3u;1JubtsFp&jIb&~CPdyTO{!K4hrG&;i2(-QLbnhaP*#5uKr9bn=Jbx6tX(`Ou}% zm9QK(m(+!mNE1#CUGjH@)52DY+i;e*Gn}J8rEq*Ug!71?MhJj-hj*XgpagEdrtKk6H~tmH^|7BS!L3eOMMha1B4N%n4G z%`#YMq%0F&t=Gqn=nOaMnK#_bnYV-G&3a$ByN!8gZw@6UZ-iU)>=~XFZVMw*gb#*~ zaLpEsvoqYK=Tn-y*FrYEmhfiUmEn`japp7LK=^|GOvNMV)}O4{LJzG=L?xRM3(Lv9 z>q6}7C|wevoU$Z>C$Wevf_)Jg9M`G&4fSkdoZ)=JN80JMj2z-~hVpe+1i2$}JaS6U$T%%~BIoGLj9iRdj*6(sKRucl zO^NKfmc^D|muF92mj?sa<*)1e)p_Q1d@bXYwnhh{m%-7Y8IYkHzV%3{%2bYvtNoe&)vofO!LR(a9MR5Lm)+#H=5orB1S-^!ZEj&Ek^ zq3Ar!VqtVK`z&sV{HzC@i<;@iN<(rweU3<>VD#so#Zz*pp z-(R|<{9svAU}^aguG7`aib?(>6_c@#23AZXru|qkGi;5Dia8bYDi&7Y zX)Q9lVrj*SlDdjj+;%OkQ3Y~e#irQsvXlCJ{dE;vD|WEox+`{9>?zq?v9Fw>qSx91 zSnH@bR&gS>rQ&qOdGMu*D{(n)jwi)a<7sg#EJ%k!_}dgcb^W${MvYL=Uro0&H=Z(|D- zhduF@lFjk9I5JE8U>tE6?~0$~yJxs2A~3c*jtGqRRI2ayMUlh2y0o>@LUJXtM&*FY zbe+9RqBARPTxT@j9mb3(t=L^@FWFo5bf$_2E(1}~zVQn`d>&L)*B-+#4LiF3PhUFF8gEtT6V@vKz|`@WeK!OFey zB-n4~UfO$KwLw0_nnU`YMa<__c7o%T$16`=dp%fBdYo?sH|!4mb>C)|Ii4%eRbHfX zhBXJ7k1#juT+&^6xm?$TqXpx|coXfHcuTK_&Rdp?joq=gcMrmo-Ny5C-uOhEUD(%u z%H>v_g)Hv;Yrm_j91~XMP35c@qjam1@bf#oF*Q}aTRf`fs*}VQ_#NKo_#NIh{yo*J z7LUavIx!lrP{rLsh&3r|?~4Bv-w`{-TVk_# zTbvag;KPKLlkI8R{EAnaijOdXs$bXO|`Ca*4NmC?6kxWV-B~j|5+^O6p z^;3o@HYr&drVNu(mEp=r=?=xIjFJW_>y`CVy85WvAPrK#p>C0U>Wk`&(r49|)Sc24 z^~Y+PG(+91?w4k%uc`;6FRKUD*QKwhN7d8Plj?8P3)0usi<*%BO;fZ)X{~mrmM(47 z25TA87Hz1OBW=@0Xm;s`noDy_Ki0-*W2B#GW3>{gP4j7Csa-4A%B2o1u8o&o)2g&8 zsZ*Pz)k;UTsoGTOr1m-Oi_)*OI;~DRuPx9PNWas*sx?R#v?bay>5}$u+6w7itx;=~ z-qXIVJuCHSOr8tDLC4q`e~d(^|AvIaO=Z+T=U6 zcCB3=s2$YaknhrtYrm84)~=fFlpUrtQ<@w!4KjUA4w>3bJ#vfX8Ot~1-w8>?#1wF% z7)-xQ%FhNy3Xyk6h>>eWTE3Txr%S);#V&hJIxbT2oYAj(aqjk9pey|*5pm%Z!ZOM- zO30S`5hDt;sZy8Jg;;%CdKGvmEJ|Hz9(H339?Bx35(oc z?l1bvsd6gf^Dg-=(N9j72Z!#4(;_3jK!6GgMT2hI+C+BRx(}fv3ph^#naJ&jiSmJd+_$^UMV1 zc;REw1t2}Ex8$6plTRl4<@AmBR?DHJ(ba;+=PIykEtn!@qT=HD;tSXj^ z&BaOJBj}lj2#ChMm?VMq#I|7o)`s;s@eW;zg7~(IQ&K{bCPFk=Tpk5wD9k zL^1Z^Tf&E(__pwii{g?f72PO730qEtBu&ypn7tW6EJ=9KR!j=gc!DWG#7!1cUr}l5 zXX+a`qJ=2Y1Wmx%I?V07Nqu-$`=ahH&lNlDn%Q(>CFic3n65`^27XUY?!OfHj4j5dukjl&v9iRp=1u#$*X=_{n7&6w#nU?zShm%EF06}A3du=gYIVfA)>#QhJef3x-d-8labi_O1eUhV&bdH=2a@?myAYOmca zPCoA5{it*9X6y9l%;Tf#{W<;p-{$mT^*-th{d4E=aohbp@wXVqUuyGa`|0EMdE?LR ze@31E>0JNXKKxsZ^A>L5pA2JTV`Ded)2j?MOC8CtGH$(x^3V4kN|R`Q|J?;gw(vT# z9U~o1M?v0+yb}&*-g$5l;LSUqpXCTTV!#B)q;#l_F&OkgBT;;LAhVPDE~o>MHI=R zNXbxaq8KqVLX3m;@nYip9dsF+oMp^4kjdjjeX6n%t8(EoV(xR`-^PogA? zr%-MaPot!WucF*8{uL!vJR_E%%~J7oai>@=zA5e!|0P=Ck9L$HnENI8Lo%r*%XRZh zle_I{cB?(ho@39myX}ScV!PiSw#V&N_G-wr_UZOn_B#7~d%eBEzRcceUk#}dZ9Z6< z?9H&Dm(8%1!}dN{w!u1;ov_=8UOrg1p{3q0l}6MrHcAUd@xjt&-;Z(CahcEMAVzhB z$9QAuf-Sw&8|D37;%9mgG~ z$mfo8__=}Y*E%ldpGVv@J1#qg)8tIVc>HwNF^uCR?o4r};AhmZ7di(z2g6qHnEhO{ zmLmYGjFl>O3~&y0X7l}8*iS<|`R(=2krdyKMNTKixZLS<7C4KXUT4sVb)(?tIQnxs zV<;1xlW09S{)?fTj9BAngN^y{#C+y@wl>W<(;0M5gLf9OzgHXkqY-uWXv3OdbUlt_ zj;__tInH^`h4wb*VyqgiMXh5PW}k>QJ-okaF@B1cYUfgv6^M>u96OChZ1r%xWgG$2 z&kB3DbCu(ib1l|pG{!O9xxu-~xz)MDx!ZoyxyQNBp*jzc=barGQzLo8c?=#n;XJKJ zK1PmMbDqaMPSdP${}Rd-=;K_vxs4ZlBH8YC$u2WuZV^Ww;=`5XN_C}?7hG2Cl;w!C z<=8Fye(|H76Z1suq379HsYX{8N{%bfh0(hTQHmYyF25`6io2?4-mYp_tz#^9U$<+z zu}}Q)Tb*l`tIpo+n(wN|ody?qpxD*uTJ2hf;&wH;n&FRbdmI)wySBNuIS;sY+LK%@ z=%o$&azDJd-?iU$&~?Psd@Ts`(Gw~9V{90S}I?67KgvU`B5 z$DQuBAqGyO$7*|?dl<&t41e^vhr8_*`R>v7B==aXakYCi>^It%xyLzL-2r#h)#y0o z9&c}OPo!PpNVnIzYur=ZGw3XVSBAQ0JLkFQVpJ2|3vjoJW9AI|I+<3MPaeN}k$Z`I zIo5>Eie&dnlr`>k?v3s(?(Obf#L%|7THJeG_0Y7t5219rkGoH~&$%zUFOL$VOzuOY zOtgRXGAhww=Uh~D>-p@T^7*V5&jIw#`vCguxZDXs%AF)qbE{CQbE`+Rk7&=W&8@v} z>3vIcR_3hCot``WzRCAZ&YhJzD`yP~>FRRphHo3bEqDIN9l7-=4Y|ukwC60yY0YWP zZOm=F&v~Dd{(4M#L+Rqbnp-I9=?{&?VmzZX;OW97mWgkPzIeJ?g(nXQk1ZA+5&zaL zcK}9|j;jsVVRCo&{@nCixMaNpaHT=>FC5#pZ5taq*(4j=wry=}+qO13@ri9)8*J>{ zyzl>h_1#;y&eZfwPfzzeRa0l?nR&W@VgFZpJ{f4pGUELH|9<;;*Z9}Ief?GLD_Du7 z?lITZ?;QP{hXPyq1J95%0sIiuz5s;=>iq{!1z1&(cJY<#fws?}8`e(?-4k$6&dmQF z3Rfd?fKILGaf5FJ=KhFwvIBnvhYlhS|6uyCMgmbb6ktKE04)Pyk9mU+lmMU^@;0Dp zg4=RjIe@l-cLl2NqRdu)O+GP$rSJ63r`O!GZi(l6-gyT&E4(MX(E@%Lvan#_!14rv z6~NqpiI70agHVAc2U!8+4daB4h2VpRq0;>2YdS!y5o#d0K(Y~RdqDZ^0kQ6nAa8-- zKcF2Ec#L$Fz)ynY0GNbWFCRjT<|ME`VzlS)J)1R$pK7{leO|F2nOm-!i5lEBInfw;#p6eOJFjDuN;sfo#n>3h>OlQh#e7NP9>xN0;@xUIO~r{lP8xOeR5 z?09TxW*p`<)0X3wP+S(a6RVkXNpoq9!Uu*Ui7|CC$+#A5Os1|D_p86Mhtq8F4dvR` zNRXQm+t=)h?M&Yr0;Aw}Yd)oS~!Zbh$=8CGo>*XYg9s;{x{v>z5Ps;~H8d!y7c zv`L>)_k{0Lj$6`6#h^K&Y>K2_ok4CZV-PvgY>MQ#<%!)Fe0~>*tMD!0Je4faq{Q6k zxomZ_v^+8?{jW2>S2`5g0~bWKtWO*0)vD`MtB0v;(cfNbIhSddacP<}Ei_NGu{E)^ z(s%z5Jl!|V|0(-|xZqpNwAq6B;Jd)OaKB)(sdlS(BTMgDcHd2CUGUEFx0whiCOs^^ zRVJl{P8}3gkE0n4jrvk4uAi^%riZFBUm}}d+E~P36<1J&jzU-}GymPSlz@l!RDoQL z9Bp(?;g-~q-si*c=3uTNYOhaCVfyVIF*L5R=+MxedAO@s>Zwex4|IX zf9st#C)+FbhJD;Q+Z}F6=|#BHlkJ7!dA?LT+Xu`Ec<|Zny%%)Bx%=)N`=W2{^Wfze zdU0sr7o_E|G$Oxq;&HvS$53x9G|?U9Lp)4W;FM?fCdQFE${uqhG5eMC@IscU!}$*; z-6a>3K}xgED83Q8MMl?!eB+6wOS3QQ;-bel_JW|)3suZu!W~%sRVO6#(Ug8$f3@4? zw+1e3{(_ViFzt-?ls&~Yq1D!u>ufK#qstSjaSr`*oXF5qli68sH!DzC z8kld?S??WacV#|9U^sJaE?m;=52|zgQssU;=Bg2UTj39EK6X2KAT+k*F|W%rnhLSy z&9D7{-%{HyNu5`p^OPRZ+nO;baB?Q-9r0Dyn`C`4yRt#O7@g<)evwBwJMz^gH0$Kb z{Xhxy4L%x`soyS~C>eF3TY5*dd3ByTprF?qvc_`ZvYBQHh~Pv@GcZ{jVQ4PXxo)IJ zy_KtZf!$#=o;EsGu2jve&cx4*II~l@S4hT=Qp?m_17!IwWY>5lADO3RYx(CMt;i?> zi(ZFtz`Z^t@~+WsU%vAL+ZX$`cPt(M&3l{MsP8~F|A@3(dvz^PR@Nqc`*+RFucEbS zM=MjE=NiPwo$zXO$c0@m4$4>6vC--ISJyE^c1-Qf{c|%%*t2YIEmKzzkY_z~%uyyN zIhb>JGZM2#i>DR@7sT=}@%po7@ZYIi*x6%iSMnDhos3(GuHSs~{8}UZ9t5OB+aF;q zh5FsE_7b~m+bNkV$7QoO2yi2wP(~=GTZ`^=F*CH=g1O6p802q}HZAWYn*X zY^1c9Ol<;8g{C(vH>lMYwDf0u&LLlkfvI0*Z9oB653DQz7W0r{s zt!=<9)Z-)huZ6>XhY#H${zPpv=jw>nX9jtFm>dD$3-A&FHCi=s;C%r1O^V`rj3$|w7b<0jIUI~wjdMb6)HA|3%_}|s>3gqrv%hNa8?ZN? zimV2#hl7)cW2GZJXB#>S>m~>7_XWmtdFC9p@Q78C04n>ya z7iWc--cpDMh@(N;Ms zB`27VOVz$AVVeG7MTw?4jIMY&GS5u8@2=hzP5Ps*)Q9bvE}Ge&t%lDLsk4;gdH)Od8urGfjY~v)?SIS=&WlLN@u=4g( zy??LRV;v$buv6SgLn$kWKVWwveIp=pzTm~~2^s(0FXu_>1Ns-N^+;eXQq}P$tTwsU{RzvH<8?-n16-KdZCDW)GN&&Pnp;7{U5@ zDVsoZKApPxTap>&#e9U>8iL*_~=?^>$^yz_h2ht~$!FFsp9Qdn($)fjgv#1fMvKWRQkBlR>HiMDoV}yy`apP(8DU`VCxxe4@c7S_LuJ6!E zvZD5ta#|dTs4rIM;V4UN)tUR`K0PC#$f6>2i-P)wU0;-YqMCO8zDrPyKlVnZJfs1pxP<`HvV;L2Hw3|&%z*_LUD6V*1 zgT*V0#(C{Fzf>~|Lj{3D3SzuVoKNWG&hYZ5H2d%Zf2}S5$*HX%c16q63L5=RseVZm z)$h#W$#{Gv?5X$lOMxe8C(iJLa}8MJ;>*Dy&pPQtUs?UvrbmALr>;uI##gPlI<&aV z_t5^?fHa&e>gO2m-!7E7?*SBP*GH=~yNMpo?ur(E{z`oHFcW3W%F}F!#@XbW{!x-i z1AWV24^NkA$j4L_a|F-oX_%$8~ z+pc^?5>N%#ynDJ{KeC#SAz|UxX~As4y1Me}LiGVvKj2-LKk&|Uhs=ZY*3=#Lg4jeZ zJfnoPbGS6K@`>)w#*4>DfN9aG*!vo=-gWz2Rb|t&EcCYN<18Z$lollcR}Ex|2sA=L zB_*4ufQSj4Br^^K(!l%s8wRLff-%z$<^>+ojvnwgvKzwhzxqo==*!3(Rdv#jtEljeQ$A3)LTdk;_q16HKlE zz^NxP*YdbS*)6T#pf_;}J5g7;nz%kuS*5A1mh2vd^Gq>*Dy0 zR4>@f?Zy8q@mxxA784`$C5 zXw3O5QVh~09ktSrkZ~FGYaH)#{>Jp5imaMd!OOoJUkxC6wWN9oiBgp!0OfUfXZ|ij z_mVM9_juM`&>HlEoJ(cr@@Cm47{p>PgvD&*7B$%;8o9@oif0ZvDe9^l%x?i^XH=Gl zsz9rTLzeX~?4d;oc{ER(U^NHm{r(1zUvHM>qMuOz#Oxr)D?!+!T^Bb*f#{^=V!bkl zA~)15&_^p0um+l&5v(&+HI_PE>Jkq+1qawXD0fQ)B_ju$JETfV_eWmb!CaNKWDg35 z@*w&k6a~W-G1ED6tZ-Pt7T|imAzoeB4b;YuqH!KB$KJUY7!ubz1M_)F831OHsw;YR z!zH31t{m~$^un~RO|^{aC9{Tim8D9}Xw6DoIK}-Gu?M)oP8bhmQU0G38-T4O@6}WJ zuAvn|&rG!dR5kUU9|%p+jm@&*OSDWs-obz&ft9~E;cp2HF{km&^U)bV%rEp;bfOmR z0LlO}fbI1RfH8Q)hqyX*8lf`ps=EDjdOUjRmrxP!6yBjj6>uWa1>;?B2rPTQhAC_3 zS9Lxd?F*0beGMew*Dk>!2cT^3oi00Gcd}i2NgWep0wo!d;86CbpIU)hZtV8ag;iSr zW6Gt93-x|83G=L{KWb%iVAh|-O+>A0urSVWXmDsdj(cgd8MMX@*@LWdMhdi@{za!Lcx!?g9r!yxVrGkR-zgG7E70s(%S{4 z__=cAj%+9(G-pZi;M(Srru^+J;PejxA?@#X%)=Vh@h|(KLvkp6yKczxFz)Q&49eGS zTvfzQX|qbTyLidTa$p~|HN7gCtEV@)KoG=cZ`-2}By`-1Zhr5S-TUNjT4 zoQ0?|c@ig|o?4$Zqw+_d@oxLvBl(H98c=sBU#{DkVErnRfEah}OIqUJ z)+hHQEviR!_ZJ5X`9+6Z)})q~1gSk`hR#^8&V|S2*oj{t(~Li?$*gfbWv?B4gBLkk z9({Ki4L#uwt+GaSK|2W>a2pt3^`QhJwR=T)m!xxp^QPyW{~{em4XZs8q+a?DaX_jE zMSwV09jH2C$X@u%x)V~N-wxAwrP^f2CGJIL#_GGOW%?AhHluuT9I7R}foUX6a}8n+ zPe0Ig2+Qh+Jj)w$t}jqmIa%((-1=g)I;N-wVIlo)`mNl6z$&SF`4hMSaL2Z|L>p{1 z8mlF_22TUG`7>`nDjn1IQ%WS72H!f<9FoBZ>XnG(*krL6^F^s37hRXGp9pDSozsFm zrI`q8d-y?ukue5KQR-8uyv*1Xy7}Rz6B`&*iMaOuFz?UXq*Fp zze{W)tb*u!?Z40II!?G)Lc3Z?KWb~){ZUV2b_mNzl`MKxhYZ48m6l|$AI4snt)@kP|P;h1C78hkCaT<7s-opvUD-*ygO(D#!{CC7H-%i6BN4=Cy ztXqSi*hD@MXF=c~$%^E(EpRE481x<9cv+MwDCX@@*&l2&lk@}f@dc$ALf1)5zp6=B zD?Z#^1=1k68{c{6r(lcv9e0obqI25C^$&^iL=H~J4YQCpP;wCfzQZ4SImbj<#70K7~ZuL(ay{N zNak7fUfK4Ia9*!?>(SZ_nbGCHlN;lKJ#g=M6hHOeBjuIYOTPay#B@a7cYy4L5CExx z$^*HA@P~VcsOWp?4O-{U9UmGO!`OorIraGjmJJfGy|RA-`2a4OY`#BBl?8v7hA*aQ zFF-CPde4_GrZbh3x>D+it<3+*I&+TL>!XX_uR!HY?kvon#2H?U7&nu|l`XDFqMCJc z9mMT3AA9EZBffbvc5_hMzO@fl!(IixgbZvzo<`~~>s<$fc zr;>2e$WKg#LR?q&DI(Sy=cIImNeC{GV{rJuTuFp%&(ambRd`LXzsLj-1VY9`t8@+= zj&P1hj;uE#yHLB!iK!X}N0(q%&_odG@dVudH69S27@i281fF1?vUBUo=;5&qsD@-L z;2H3?aJJwKRRqFC^X}kS;BtWYkgv>#?$hru+P`aSH zh`R8bJpKbfCwF*9ddD~Mz1ge(7y=`L@`rgh-CsJ8T$>3BfcCFG#Ww`0;dL^uUiN4E zwuER4F$vvvECE^pvER$?<$EpQ z5}*h80E7Un`$fNCZfI_BZg_4WZc+^25+Ef5VFAAksaP}`#AuLL!8Ab_z}z7pGkCn7 zhkiO3;x>Ts;P9aFnC~A}MHSNRV(cR9PJJ8kUNSY^r1%I;*ju>XDZYOHg7oaT|2olG zEH~WsMJRiRuknhr+*Q3Pk6>^i=$|oJ+^}41rs#UnZ5-h>sdX7SBsQeeHbTACs<`Ck zXWAoRxfCu+T%!1(4#Q>Z%J%`Lcp+&%5j1S`8ZPr1KJxB%I^us3H;g?AXiX=!y25TL zRhvQIVAz!sw!|aN8@)yfl{&0BGf=xwaqs#v?J92O`nd2S?7ETdD8nCvD}SZw3nn3wIwfEuk`ET2HF8Vct-NyJQN=|aE9G1dNa?;4 zVG~7%%VZZDIYYe2JB!@vN^`wf-bSTJ%l24$|E{JsW0%A#?{K!RciWT1zH;32I3w=7 zNw&;D#TOJlu)Oy%C&zAcF8tfuYM>SL9gXdvnXj`U8y>pLu>^j!gZncEmGIZRQ_M&8 z5y@*c!C%3@f?$0OsBGII+hQ5$<_nh@%wnvyCfblx4I(6O-|MAiZ88Sj*W?Y6?XDot zYn=r?BjtHl27&<^ZBhortwLkx>OEWf_(M;OSF*8b9lMoeJ=A&j>tUv3Sz^xArs(nL zuQkXAwCF!UX>;y;RJDjDA7iX1iBVRVXU-!ti2G&@svbVwk!?#O-+4jVJ?T~Zjcj-5UyDf7~vGPyRr<`|Td%N22FE4Go4y(*ABs!&U?Imy*N z#kJDOH2)!n=@zqjjMKTr$~MF59ARSGYjUW479jKN&EkxX z)A?Y*)&yW}nq_SQvfkQ^0*yz3XrsWbY4t0oNd~t`2A9driwv!+jLz@QeI}-1CWqW{ ziO_`P$h?y3e3I(?l0@z?0{b{#i>yr}tj^Rpo$zV5AUIkN?6_JNI9ew-TGu#Uo2*U! ztW9&QO=GN0Ypk~=CWo}6K&(+9dtBlUpN`iu>s^k?vDfIN`sgHlTqZUl`33uPjIBN$uYbQ8fTdYkxtheDNhk+)C zjHAGfxWtfe&AgIGypr2aG2It9mF_X!C$>ioCdbHenNNi5#60G4Jm&h4tyJYe!OU*0tLje;-x&Vb8{d){co)Di3T?oNG(R|CWG zpeGJ%{9&T0C&L88$jU9mtrwkADT~s96~>Kxo(!um`AiYa%rP&w&w_Cs>xwk{XeMn3KK_=J?^Cok z_+?ZMFY!($b^ci5g)_$@n`cu=f)}u6aQdK&z34&6R6R@0ywz(}z?g{BX41s`68W6d zm|3rzbK7KF+ab4o+h=oQS8&hnp35t{hq|Y{yS#^Sn|<4%$D-$?`=qCK8-6=_TW*`{ z`e?Se(9H<07Fm#}tx#bPYEQC|T1?3cJG3n{51t!Ika!)>n|6~Shj^MTda^zs2Hd4w z%Mfcn?%so_`dFsxWFdEcG|YGq(K3p_%N2P`PT^GOPkvU&Lz;P2rflt$s|$N8Yxbo1 zxn)}h?qiYW)lIXsjIJp`^GYrAE6#~2KVdwNJ}*EGKoEm56A+7PZ`%6f z#fe53^Jpd9qZZG+V^MfZ*TFm^^BBsdIMd8NDMxY|$2`1~4d*?JUHjs)rA$BH*b`Bh z%7FL;(KPr|1ai&r2TR7lx2pr?Fw{Y?O{IAQPe(LbNLPqi3c)7!yQH-SzovWHP-5NC zh>CGeN(UC@c+3dCS8+aV*3U<&kW~asSvk; zuH}~TOM9OQ3oPq#v8{tc)x+~?7q8nAL@}$`OdfIz%>2$#C28c6*bCiy#&sMxxDe4Z zw?4y)7N?FD8r}@rhjthua`SexGwoRpn@ zK7sGAaiHWY{xk4QKYCu329Yb#w|=;k)IfF=_arUPLV3&H+85`xD48#dv9%Kw7Q4!u zub?|?17LF|TX~b4J3&vJgxdFzHp9AHn3?vRi2!0=Et67w_)|$*mO{+OX5GOWciP*N zGymMT2ce%)h|}f>2T}X+a>y7H^_`gDyEa8-8B!b?bLenJt%5nVYLX9l^;m+~A#9uo z`^Bd*dq-w;0%qv~D@Gd+j) zDLXcQmFzC`){K5ue<~WNvj?>IjqcFzU3~f*sMCHad@+96jXjipVP^ikOklyNLPt|4 zize&C(&HW^R6?7Y!Omq)7z`=ZtjXy;eRgZi0(VV7J2iQQFLgx4R<_fpB&W#bzClkR z_1=)rd}k1rb93@&B03Iam{eWsh)yVWP%F|E9-S$C{oc?0Kzz^hSF%1$f2LfN-tiWX z{ebHWzP+!NS{&+AtRu@l&yK6e6kf=ed3n_*w~p&VWxVf^T%0#JsV`Rl52yB;vxw_I zg^TMOkDo!kbCK&WZ|{D;D!%!TpH5#qZh+jFyZczlUl>|ATGE7WuKTczt*@UMi9H3m zG1HHlCN1Tmc})|Y=YMnFOEfR-b1wkkLC{dhBS!{)R9`gKllZDuVZ#l*A@g$$tMY#>OYUM2Tm?J6p*Vcw}8oM^tF6 zeWSIgT{2i*ia`}sfCL=_Ct{j5zHUiKG{Su!(rbQ{|FVSeMPa_ysRfsJMhMYyQG|E- zA+Od$mx_lLt+y&{s%lsbiI|xlzC!9RiR4qhEK>RzU8{e_nXTyyaG8F@iF%h%Yb)n> zpQtZ~dtR1loiSQFT=NJ@M$MB(lYGK%3GvzOrdY<4B4)Q|)a(O4GQDDi#g7ROW|G#R zu89-Z!p!WAGQT~^xJX^#E}9Kq4Sx8M$yuPvy4W%SWD9NaG?Sr;z;86c-7(;y4gh)z zj@ZcAvVjUq!Gx3paY=^!V-4RCA{eEhd@p%w;Eqns>e<>}Q4?CwPti${yUN->%-a`F;hcfBN$j{#5qV_Eh(j`4sk4 z`n|umxi0ZoKNp_y#cykDD^qBo<(ewnV8}MXUl-U|`(g&82|)Lr~Xd)b!NOd(i%KZ=(QSr$BK2+Kh!XPt(g5YGk?ovAa&)?;)#gMYP0Y67qNS#Z#5#E znZ2ottFxJr-G7S?#@2{%tgIZoBrGKVE$fhQaPxduI9WNq^Z)b5$;$rUIy=Yz(En-U zU}x7O`9D>0{)hixRRRLcl2*2^X3or#wnnaI;$|iere@6YX7(1YmL#k!tel+O|GUOY z!p6qM%f%ujgb4Tl4bC&?uP?N(&eFq|-?bI9ja(bU*;vydsROlsqsg!jMI)h3RLU74 zHBD494>%hOd%x`fjE@*8Vbg%)YAJ0?Sc7a~so6(?@SjQR7fco7uNgln=f6K_h1X}| zAHV)JPQPvYT=AZ4`CN61O_Lyy4j~|;p4C`6)Xco>E^NSJ`=+71&8=5u#l>+(?~op1 zvx)N9*ewm5OEJOzr93Rn4Q)tYT3KBFs5~}(OCz-7Jh^?3VuRO)4bRK zSF0!1Wsw9jN5HH-c**(L_n&p?`zNCud)zEf=OOBaq<`+Q!;%hj2lnke*b?s(ufJ_B zghK34irfAr*6rGh4W8E(9iOHY&uwh8xge3DviJM~S+!XSSGo?kIe0MaI#(6V(LlO) zd-(J^*dh-6Vrjuf4E*62zsIt89q4@y2rtY9x0k@}t`17*BrJchTch#Oc*G%X@0NcU z%=Z5Ceqg38`lu@&*<~-bGTndJ^Xg^zg>HGmeUIp7ww<_wbg);r-PmX&wv#N}l|+81 zj!F7b8j{_Hq+mVvGTR4LCEQW!aI?uhTBu1(DxR)nSwqP?0!_3z*GyITj-CW#1nK=8 z-<4nb+(c0&D0DK!4p&?Qvri2z3C0hbNiwNC%s6^Npdrd{R7bub%~Z_yXG1z8GKjZf zF-f;69%WcyuLogITGHAFb$S=Qdn-YIl10X=Q>Z3jNd9-|3v@wwe#j$o~5oz z)ogQaOsX%o*H;g14Y==MJs~7X_m%4R;z4OYdIv7$-&-KvB9=qb7<}WJh69pwU#y zz-fuU0=1i>Y)SbqOOum>!?HSlZ1amv%lnH0DL(k!`Ybr)(VCy*M~y!pScTStsIKs4 z9Z_KKs6}p+<#S#44$%iQu64*3(;B+Zgm)8agW*3w!I@ z%Klgf^*Q@XymU9sC+!z?cJi)io#`u8@ef;Zp0~hSmx?ka%MOvnnMr?xQ`IHMd-9W$ z)pr$=W-o8)k(_2xoyOddRApD9@9{%&`j?dvbG#|d-4~Yy$R^L-9+e32N7uXEC7N*B z`^RW)V};%hb6%kk<6@6Thc*G}e=YuW>I4MWw<0axpq3@q$1d6t<5XVOfCd!HO0;(U zQA*5}ki?Xwp)16Y@j#AaJG;Xz1eC3$#WsnTmDR^O=B?LlOWWu)w2d4)he6<0V);r? zETLBJMjJ@I$uk{FHRs`UTFT4);rFZ3_i$JlHw?W1D@LtzH#Id7KZ!Q&upz^>=(Y(X zeUfO}xu?)lxH?Cbi)8mhxYn3&>N&Y4H zGSHDbKNoTiTZriS2S4#%ckY%TI7Zj2LP_9_TW21{7Cua?CmHH7(H|4 zJc|t>P534F>*S@F9)QmzHV2}vZ9f1j{eyaHNQl}QOog%*Emb+lY+Ps>A?-sPmR8}! z6^?o5Q0qg=3+GVT^oOFaznRvI{m^{Knyh+TNHWgRdrz+25nHVy*q+yWv#bbn_I?pu z7*1}aKP8toM)u$^n7Z=e?T7$X_SQQ)~ou#tuRg^F5Ls_Vo1v`w*Ra!&mTnns@5H4T}-;y6y7-5=J{oL zkQK}}Xvi^!maxOg7<&`==PzBlEQOn!CC$W-&QEK?TeYD#a=Yp#rfgpRB-S;K?Oi1* z_XU_Xw7C=@EDuL^HF^wf5izvG<{F@Fn(&A8>L_Jp9>>u+4kuM9b2sHL5*ob(Am<$#8U`s+eOVo=_4_#Oo6isE5|&yg8gM0+&BoXZr0)hIuO#QOqP9IBtdk{-{p5;SKV!ASZLzeP5DP8UU|NslkhKi_ zfNL~3PJdIY$qGwQQO2M^4ywx`jZllpYK#q@p$ztn!6iz4vy*~Qk~7X4ERZo9RAoEm zRm(nx*^U55&LI#n^tx&I9T>0&i>H=C0$&TBAm*62c-zDAsBS>HHND*a4D%|IfrLZ)Kv|VZ5NFGYfM2FZ< zs~BK^h`=!@$2kQ4pKU+|v~%qQBZdfq&Zr{|+U;CA)JjB4fobxHZ6>`xKl$xg&ObCUV$c)eC?Bc?hNnFdI_Q;^9k(dne-xo56 zd;pwIP2#MP6H%!)YT`+t*z;@?Tf`NwaXbgcz%+cTA1kD1J6BOCkym*bn>ol_P?_X9 z_VeG6I+Z126>d8d(xbdP?)SJc{v;zz3-YjiMl7rIw2mG{D z%iTUFmHMC+K~zny7GXx!vU0dh0X4J(G7HR~3DLBgc8P(6WG75M?01-YxAe|4F_yaW zP1ptYRuYnbq&uh1xL122TM5Uz4RtOh_M!TUK7WisqkCwZ=C}tbYha+}p$O*o#KU!7 zrcria(^bkCPo38c3BhhZ9L3x`=EMpxM;HF+c9gir&K^=ZO2Mv?sqiGdcD-&g@$i$m zUiY6a2>mvO4_}p(+$2Xe8eR@!>v$RnwkZM07s>Ccj@Qq7JKO41YN!plf}1Pcwr#c7 z9QGr@GKLKbQK-)r@|HYZ@>MHGCM(3Bi@zx)Z+NK-b`d+g)LMs)zz~e~G^*;d{TV{S zF|Q3nP*}7n_U)x@<-qkUsUagsO*vqZjGSr~tZ}-QWLUykM^;0jz4|x0Aq~y4B*Y?( z0eQrOCXS0+)bk@jjmvsUYBkQ#)*IqY(%L#&2`4K^oJY`@-ez&|uS2dsU2RN8R(ef? zh+!eCKg@7^d1^2L)?7snzR64^-!cE_>H?~}B%y2|AOzvtM zl#v@pCzc`RfNUrx%mk_^j7X>%j2tQ$3?Ymh$~uZRsx}IiRI&{SmXRH)D7z@SD7h%O zsGumJD5I#t;xf!n!d6buU!)8_Zlvs{KvH4uird0KFky~h?oh^2)=_2WEMP@h`RGM6 zqFJE*u*fh9^n^r-M43d9M7j7TL|us2&5U)RMEOMVti4eMEE9h3N_Jr^x{}_&C_+F7 z3ud+8kl^-XDOiI_i%N?(7-vKF$B3c_qxJ{dc|#DwwQ9=!m@W7jTOj(Aj0WX!Zb%(O zi=z?o$8Qo56!OTQVS{+l01_0?44F{aQ1oEXJKp8ba7ThY_E06%&)+w~$~OD}YM6j1 zZGMo?m~YT6DH!6SacG$9;%^xxm;mu-@TzyBJr9^`qm2OacdR|VP>-KSeJIY&(5cFf9?S(^zu2}ZZ(}WwD!4|(XtoU2*n6}Kr$2& z#wc{bk>=u`-vtU(Q5o5PCZzJg8i=1V`P9~S#XtGLyo$tix8eqR>w9q5B1c3x4XIck zP<4se6Ofu1ycuv!R89ctBttwN0SIZR;bH-ZW_SaKb)ZQetT>`$FFPU3C5&7V6wh&H zTLmbRdl_aSWmS2PKEcuoD1)eG2g#z;gQGp0TZKyW0AA>MMIBKAT~%?k zv&b)Oh#M(4(!lWwdtQKV=Fpcqnar63Y8F2#JWMZC{N zqM^YRagr>=q|8;tL6Z`|{$5jZQ=@d>)l&ITgug1^GuwS2dRIhR!m(Hc&iYS-oPhg4 z)U^xAsxmunfB3NsOcadz;`e@~F@y?&{Ba?Wd7k`qRxvsuut4Nz@ozYr@d|g$K(Z=5 zM3Q*x6%Zgw2@DzptM%`n$zr7cncQC#-wxsmAbB941kVnDQISVn9Dlf@KCmz38)`s+ z%&Gf#P`rasq7y0Do@wMDVGw^}PlCe$uut-q576^I=ug5MAHWyl?G9KU{E-L97yPr` ztPlLT16UvEJKv2%U;yHKmU-=+Mi%G^bC3a~Kjbqui4X4034j^259R?IBp=!z=^5vy z888F#h45Ss5LV7V{&w_m7b=~0{lvMc;G1|MPY{GXBCg`*`AAl(Sm7tusCN-6BXSgi zV~nr3%S8qkhVT_{UE?iV-}n95y7Urg4fGPe@dHE@5dPd@Gngai99=zbuvAq2ha8Qq#2FcPu%kB!VmOQ- z)Zn>qILXc5U$1`>=ZeeY95>Cyx#!n=$@6uV@RHf+p&C6(30&TvIp`22{*6xn++y}C zhj&s?yf3pFm-KOAouBCW{;SBWBzvqnj_eV{IkN8@*2lDtLs)3qVNRj8$_$i^gh^Fq zuY`M1ci3uM!oHtLN8Qr-gY5Q=N_}~!%|#j{Bb^~{_yNn30}-n_I6qnverIzVeSx;9 z$SFshY(4!~BP2AAM`3AQIlaJ@ZtX{Dlf>9-YU+}kznQ!K)?^~Q!#!ec_~?19n9v@0 z>k_Fse(hUDhJPE9$alCM^z)&o)KF^>n>H}S;V>}+y9Z*q$@ZmSQ$}2p3ljbEM3%K5TwqVh3Z@CRR5~Q#Vy5_ONJoVk5EuMoXNL$))Ztw~dzyTt z8TQb5eP=51&l*|*A2=o1Y0?wWeE6BWF`=e>T@dMfb~APx@^US|7P*pdvm*U>C zh?x1P_==L*2=;<#$HtsyT10!!ShI2aFCfMymsQF6dwef;>^gHdVSS+m&IEF_qtkoA z>2q#-4pCm1Qf|DOh?xE2ee>Tz9)p#gGd7ioNRX2nD_{M7@~;tLQaI%@rwfY{VKwbK z-G;J!36n`dt{wd3L$mP{SiVj8>7OKk*fq<3sO22hK!l%+~mPOsvLmrPM726;k(WOFlhRUqB0UlGrA-FH($sVXVHNx z=Vg1{oN+!oykgnO8{={b^|>SyX~!uArKc0VQaD)pG#OJ|GfrC0>WL((tCJGh%3$v7 zWDeuRUzEvrQQ_jTmPLv+yq}GlckR~7&kh>SFD}bWv}nY?fSQTow~9Z7dX__gtO5nnNm8t=2)tlw(5-M7#Wq0G+O>a96X~cVCViqig zhpvpCk_zJWkHAC%)Y2v@rpH)K(Zj}o7W5d}pYyg<%-J$P^Aufocda@&rd*eG=cXm7 zB9^-~x@1=R6}7TPuMJyJyB`G7Pg?Xr_h~A6MwH1qr~Km>7%5c8by_{vOLV#Qi&@nn znRBdRe?*@FA&Q9~Ha?Rx@E_z4&k4TNwT5Yec#0XCA=!oKCkh9fy1Ed_vL`dfG^WN9 zmCq+N1E;brx8AI8Ts}n-X_)ST=uw2B7)i(dqT)BAGY{MGqh~8q1aq3Po_ohwuscG0fa>A1*Tv}Wc zM!N7ei;Ev*Z`q5mUZ~-=7Pn7l@g-d{n&ep8O4zxRATv>sSQ(DjXLHiET8tc)Dmfb? z&UHB%NrExJzUn?Li+-Isubkp)YUkW?e0cG@qsS5`^oW?yU-cV%ov-7&KDj<=sln$9 z`ShER2zA^r6xT9ECXCNd$5&uWmN|Z?sG+0(2Nb?H_l%L$e^i#NKO_Hy2cKYEN{?gj zlseO1CZeZVyjt_Pry(pErukxtX~S)bAHePDJ}YMV3M*9c*;%yY)d;dOCl&J72+G_T z`CSSZ>s#a^)VvIFp<0=!psO>nX;)&_`ge%<7gVdJt9Vu#&^px_m6}s)O7du^87Xtl zdYPX3szpXVNT%niQ8tn*GxHCFVI4j2p-$dwkf}NecgkT1J5NER9 z*#7n{(@WpMOaBN*)m@XWwd%B9Y5cABo}0nz!W>FMBa0tNjz$A!@_BAvSFZBWPLCGx z_jvZZKaQYGW%b+OZgmb{7qjkmzSPz?mhKI-@a{~+XBk*3*w9e$Tn~tdZ#F9{J~8Gam2)&IFX9@v;_a+j z=Z{#(1b=niXZoAfryj7VM0GEy;-d|My6MPQ;JrB0X~t8keBJc@_2W2Kl1^o0A4;?t zWhIpBC2~`$NIJd#7hmTXoXHpc`QJ?JiEZ1~OpJ+b+jcUsZQHh<*tVTK@e|vd|88y7 zzS^pDs?O=Y_jFfxzqq%%@8=t?n#@AEqg3Kj^27sm54P`b;n17?^O4ryIr!PBfy`R* zi&u~J36D-@$Jwyb)^dSadWc5McE4J+RJK2=fzo$?IvTNYtR$8BslKpk^Q3`-M}Nmd zabH?9H7TVk6%UPB&cZ{k#ffN+y0I6T1f?%<}7}L|@24igJhL7uZH;%^Ce)n!tJanI(RRC-UTK z^*ZB~hzRBCYHJgfiBnn{KAy+9vrM~gODmnr3b+)`O@|HtsU?VPQm`Rf%%T*x%k{Wqh~HiC#JGjZFQRZF=I44Tp++fd;7gaAe%=;oxWwLW7tJ; zZLnF#1kDT4J2|3{LumKv^rY03Y2=V~h)e|fEd_n3(~JcEN)$roA!0{G2Tfx{(`l82 z>CJj#mL~NJb!JVuY8FApkYHA%tlfJ;Jg2}{Spbo`hp|!gxa=p zn4rVf68no&OAIAEA^S>=l}yD1ojLs4=!S1n>W}2q`iG^osa}QD)yV>qgel|?A?S0v zcNak0AEu@Q%{@LPpJa&W)IRG$eClY_<@6OT%o%y<+w^%&sK z@D|IBaQHf?zMZwnk=|)nTv0XkvZeH-807M|Ma05&O$Qssk=`oTJ_7}HFiV- z0K$>_{t5^BrOX)XV&a_u93`j4xBvni#*MC!@#$Q+sWWQiPFOGqbx5sl=%1Pa^})lw z6vu)H-BCmP;$Ni%)ydnl3>B@TDJ_l}qlApRj1@hsd-^QFk;NIRDDQ$L7|hCWHAl|H zNrXk<-Fn6FzD%~vt4KCv3(bq0_Ea+VG+O&~30e0p5PDS{L||D4W7 zT#xa`Q8c%X)x?TkeN@f8XyU@){^od31k&V^`W3P_g8-bSiY}s(k zXfMsuv@ddRk|OkMipe}Tii==ik*r0dzavG$kT`g=`0XE>+SeFvwrTwLv)+!M_>dn!ZTxA*91~BM@cq8bbnvWOiMJMYUxWuSE=J2wQIZK zdSOvCrS)S!rF`@c^k^GnN(;v5bN^=Hfq1jfFk!GAf)LHdJN3?LArm<^>(t$QZg8K! z?x!S{b`irnTULjJwFfV}uIVJQ%V8uVBAgDMp+S%2o#(*#HB%Cj;V0o@;OJ_q+_ILOYm8Y?n=(Ysc zMicqYNu66%x)Lr6=DK*EwkkVoIh+2JA+kS?h&i(wvY2|zdb(mvr+(qkqUJBHdP`vo z=oSpqw#>3jJAcWZCHY>ixf$YOZz{9F2O&5n;`)cW_$IKhAO0(7G5!$9c1xFi03l^) ztox6(DcU17DFMAIgiDhDEem$-uiEMdP$QqgBmJKgSL;RwSD2-=r`);rRjN7KYBx$tnPO^K*2Wl6W*HqhXy{%y zk-pwiQy6Uz5k@@_VRCq!Bbg~BdxmgHURqTwWKk!RYg;R}R2P5mDaNW#V=ys7&b5aD zqPR#^r+6?X;HvF>mS%q{ru8S)>m0qDJOi^O70oGfN&7hqWsBE&5F8(aRmvkC2uF?g!tG^fS7crm{3zZ$*4OsVSuyCTBP>C+{XJgVDRN z9;Yx3L>Jz6E*WdgUlA5hQ&B6I$8orsBhtcD`nA_Rf4l-1-F~M1FOeTU9sqWrN-cu58FGY(9y14xu zBt

$Ng#I-Rf1_Oo{FwH91bv)VhjIJp3Tyyn$CE6{dRbx~gP;GWeP0bYO|N^;`J zTCBl)FffFXw{pRSr&Az(HE~ZXX{q+XjPo?n<@Av)oV<80V1#Z9K|k(#Ywu(+HJDW7 z0cp`fM67Yn>KU!sLY#R?;Lr=3Hv2OY&Oqx=QTlFR2-m9K9Zs=?*?OJCv_&xNhq79PVFr@SsrbLLq`80FX`zp@}#g$q9G@#m#Q@& zFu44$JuH!`2o<>m{U0hdJdV26Q-k12sawHbIN6 zD*7h6z|u_A0K6Zws`vOBx*UrvSLf<8e{5H@OYijH>0$S6NK-G&Qp%Xx%tT7#71fgEA&zhgC37`kI~fO_q5QstJB=;=@1%oa zx_QA!qRQjf>Kn#&ON7xL_wk7YPXb`%>N%c^(iKH;e>Da*Ymm8bW)LyP#`<`Swt;R# z%2+ZU$Tc0TZw^ab%$7QTOJ(n`2-{SwE#*(~+sOl*0b(3NiaIz&QOW`~Nl}U#!qM!w z%32cU1+4hVs%M8TXKk&qk(ZU1x3Lm+!fLa4DN+h&MdcQCMCMlbmex{WGSuYSq#=DF zxx~a6EugtXXoE5O53#aD;e%}N!BRkN^0Kyq;o+6PQVC{@W;Lr+Ye!8wauY)Rz9u~U+ zOj)}GAgY#BW(AxL!`jho(>!@}uuCZe=R7Q?8zRT@V^&;aXN9-y79d?yN9QSkZUUm1 z78BjvqslZskvpB8)+}SIFKapL>@k=em(!U(H(N_pDNT^I)OqYLf9&PMPSUo*{G6?{ z0s5wlt91HyO9h9Yr`G4Z=BOQTnqxzy!P!7SeKAA+&q2kyW%J2kt(q;pgLZOC%9f*% z0@<6Ti&nsC3}4RPUe#;*8Jl{?JqT_QYD!B|V?HmL(swI%poF{T_Mlgkp7S-5#PV_J zGpH}t6A%3_4J0;nI=uJV~A&Pi$?%qGX_I&_L&10C5jBn}|kA-zsZmOmW*; zI2pLy0pP`KJKIL=HDOJs#uaSG^>j7o)Ki$fy64ojF5?KxbbW-nr-rOk&^GSh|E69_ zrR(w(x@(BJu2oH@v)kPLWn`eQaqZBKv6}T;v8Ac0ptfcDNWJoas}aVT3{aZVVv$(+ z!vmCDSt;e3zq8g@In^@Lwu_|D^0WYfrMxPGip_M6S6bIiwXa%56Q?+CJZhe^5N+zI zw$UQUU8d(_{9m1hBUW_nkEZJxm)RDZ)J#?uSDF|{TdHUTUAE51gMZi!1Z!!5X<4Sw z^9=F(-87(fY`&vzS{)LOX_@(@q^| zm0O;cm)@iUwyk;HW_jJAi<50LY(-zYFMm6TxpNIp`n)N80{u#m_bsTB2q_r0?Xk&Z05|;f2_9ZwSOQ$5_ZF*p|iki|r*4oWu z*wv^t*Y81D>3>^j{XR}~=tY&#mrOlC%u6NX7-ZzfUO0_f#!D63Zn=i7`pa?-_E%ee z&pI!35AdTxDyJ9PKBg(Goulb`Njiq;Odsk~S@eaLbGw16wzkoH*3O*5(n*99%kpw~A# z7Hd6b72Y%zS&K+20=_z}W13>VsuTo3ARM{4tAVbrK?u!OF^>|PDhKO|M61TdWXzH^ z=>jo*3-=54w1tv2M`}q0V(Ld=Vpa(A!gN|nO7cDp9ZhdVw)00MOP+HjY21$?)24#X z9a*Y>>F!muVnc2LU@XR6x=t9C?Tu+{bS8q$51t3dNYxavxc$D8z5pPzJ@ zHO4@Li|MF3)~X=XmKyiKy6g0_k;=-2)Dn8_v{(1$HiSuThNCf7bukcmG091ID+J0? zgH6icc70ik7?jr&&5cQHv2Fl6nOv?`LZSDc>`bLhow&U^c&Q1N$_U)Mk_{lIwuMHy z<3lJ^R*iD1_k8FRs?W$hw&w7CRBROsmXqmj{g1~lC$!Ukzwp}@4dC@$gyMDi1lY}x zqR-@QWlw3@XHSoMq^QRyp`Nv11o|6TuFYIVjE3%W4`WQd5vEzlUFY;w2Z`jNg|4D( z46U3@a`|nomb#sg$9-<;3A>U>#k-$kiXVl^ka9ZBclI~AoJHlce4_CG#wFwb&yN3- z9GVAruZ4UKrCSE6?tWE_lcP&OAqmoE%UtJuGm-s%VaRtyo|zGH^c0W4l6Wu;7&KJ8_!*Jhj}pA8AdGu<+a8}1oj3e!_nBHnhXb2hqf2&Hl0DU7Vb*B7zE+j#%5nbxPpRofxahvS z*g=AQRKQ~VMp#x2MB@7uLL_o+_Fuv3vWOoXwa9EVy+9R2-p^dNe}s`gwpNFG6sRM# zvu;Tq4X}Va@HRi-uICFI!wf&~G?gwYbN7M(+BNC!Mh6#MXy$;_}_2K>}U()$+&* zy9YpQB3`W7FlR!6y=vPAp@Ntru0Q$Ze;T89m5@$x8!xGDL&{CSy&e*m;H|&K4VGp@ zSRBru!x(?N>WIDTp6vx2WF=~~=jIWUMLe*V@e5g<32PWua+X8+5r|Shb25w0!pamy z$FkkXoa_zf8UUHd*&ZAg@IutB2xCUYT_WvVaD*+>D+x{|pNiy-EPfw0x$ zo5vUrz2DztydCXSCJ*6B@vf%jO($X0^)wd52m$0G*RZ+5vfy^14GPsU8G(McUi$#u z`q6_wlACNj;*)v~Nzf`@K{-dSgUf!^*I8-DE4x&1`jRL!@8CJHI}i#9rMZBli53$> z9}2gNcxGq$XK(WY&L_(S+@GsE(!LLVsFC>NGjHbqN3xi3HJ8 zEm*63lnJV2*ofZn93|rSYLs&$pkN^IkGIY0<1I5w20WuMrRinnlL7$n=6EB`z0vbLs>50{S=*N zP?nIHmeSLOyE-cK?Bf@&7x_p#Fk3zW4HJM?Dt_9uSGaUsc{a>poBt<5{HRfi`6yko zzEK&pJ!a1Ap%^B4C)u#zUI&0qiFA7JM*_Qm+3QIW0r3tR%Qvjtu#1v; z=D;#_c&$wE^6^&>VT>1F8sKKolaRk7nNqYh?IN-L!`bMX{1iXY>K2}(-HL)$!5U$L z*wyKPvkB`W`Y!I**3>)DPS^o^IjJMo_$J~nJ4=1aP5q$ufK>fH@;dn^47TAL&s=KE zPQ?xO_9BbG0@>?*5xLk{%s{RqzR{X~kZn$h3BUp69u?>$yKchzcs+O+bHcn|kNxr{ z8K^#7DHFDDi^nX1e0)-R`QKrfdP;at=g{rooS_tnT99FIyQ+`SRpGJr(@2%MyU-i7 zhs8fJPPe)E)_uA2ANB9cu=Mv$sH#NGh>a$k%}oZG{t8XFxmBVS*jkzO#aLeVVaG#F zisw{F&4xYfNe7e69muXkUH*ofuuNe`i({)jU?ckg`nsGW^S(LHGBeKTE*z~5*4#=R=?Z~dI&Ol{GB<9FNp9}cb$|{F%tgGXcIfL zzd5{Bq02qvIJ~Ua|FvBD&0$V%L2mCfWd2A6)$_59?;m{F+*@<-;VW41_o<(!BzV>P;; zCc|<==DkFY3%-XT+=C@iv=;@RF@#4gwa+B)6gSTS8T0Xh6)vOnWM#Aw2-zuV^^V=J zIJOC{)e~$jXkv+#`EDe7^6ALT1uh$)ry6l}>J}+?NOR)-E8v0?cS!}#GHOXcjtdJ0 zQ6#!Vp9>AXv#?t~uiyOoE!uWxSDww#*^6}hn#0adRI0L-5rogqBfS0lN`2I-sdFTN zRAGP+@7@Dmj)@K92oI95-7=p!wQ%TAtaXDBhvl2(EjQZ;p``{9`cv#&!Y^oMffpEw z_4(`o8DLO2w{raCvFR&38RiqfRgMf36U46@7|DBv=oKCRyNAjA)Qa}C8$^8!Zu4Xl@8LXqd?7`bvF)j41RXfFMeh+IhTXM+ zA>)yQaG4-C2L!&-n9m{+vE$>r%XX}S0=a4#nZkq&R?6J4xWPWm|9va>#VBLhC>Zsg z$HWA6A(Mhc>Ae-Q22iOvNd|12Do619dCag0Zp3|v@mQExKnUM|P;yFviA2eBg;n+y z56yji?v^nJ4YPX>5VulVKK~R;+Q1i%)XcyVq|H~3X zJj%dLfATtNHzC8(=?-Fc)PVC0y=GK`if;Fz zpU84eS)GR*K>B08dKmj`u~6@ve6BGu2SvT!1HwLjCTbt1e*{YVwCt_o+HQDz>hl?$ zFg^q;f+z@Yd?iN16yJQa=;kOo`TYS6d%Np6lq23UK`xwYsI8CiWv9#%AtYfJ`xkc; zGMs+%*{eg#TLEERKdQL`tlZhqsY~xa-c5QT-ag$_u}aikRJXt5u5cI{m|WWDj$`tw z`g%nM!{~*=uOaO63>RC6FmE5W|Y&T9Wo}6SQV^cL(tJu?XnH_{lw+ z?g-?ZSnUuCWH;H`O5d?|M>oUw5;9qc^-GGJZ-&BWRfW3dz;%H_D`FG(skmSU@V?%W zyCHZY+3nGDs%-_Mw%~6}ClGB4n`m`LzOL_@_Wqmk60-+RPG2WTo=naV1-7g;ukYsR zC<2B7En=}@t8%FcA|GhzQ!ZSVf)T9%?&+=Z;7*5Aqu39DdnG4Z0oBz>buKU)k{Sc< z*wgMGFq-rvxI}{QVrIRo(0TMnfFG{pT2}P0L_J=EuTJ{?Y2Q&Xt9?R z=D-d)Yoi6F!NSpBGjKlJLD}aqN^Fy*u_ANn-fbeN*eB|cyKmLaLno)&bDcH$okQm5 z%pD!PD=qA;Y`8-R{1TX{xaaGhTD&XwIID%7`GszQbM5`+C#(8do6B?BwoO%6XSS!O z%sidZElWBVHuS5@%X5p}CGFKl#dBfa_}OVY8}^j@d0p9%x$^K!%ku+JT3hp5ixI}c zMp|B4S0Ro@qFQ!Pgia8M>-x^@&QC4u)fRRq939KfZZ3~ayZ2LE*5XRvZ(m1E zb!qXY`HAiM9;C+HR``IE6Nxw_LV!0u{SnpXqF8@~KBjE#sYQBHaeHw-{>a(huX_cH0Bt~bV z!;6to<(SxGbKYkrV*3+OX)YzY=6}0p(>(KUW}buwh<)YuuXArGp{51Tas^WALWg@g zF|0$80udXYWx(R3eyxP>tUW(}Wh75%Gr+t_e2i=ek_W%)c>z9LvR`}b0Ut_A9)8@% z?r){V3SZj?7rF8NAIM-|Q*dw(t^a*ZMVcB<2hx@Q2V2aL#+Z%JJTE#+f*-_Sf}RzD zC%np-G>9v+PcO6$3)Be#(8fKX0r?TQIgEPjQLZW-01I($47aMJc3`(PV%rF#=c0%L zBWvM+1j-mrOb_Y!Cvc#R*fzs=7mv_5Bi=_|&hV#Xu5E81z4&(V=tc;@Dt;vpKn&!< ze7leYCWAQuSmRm|J=BP=%16(Iw-tb0Sll&3zOD9tO;!n8djxK=ZbmP#-Vh9fhHB)U zOSXFR9bQ-4aVp7G%N5J5!z043y6jp?dMFp2ZdY5z%@V58W!=8s-kX*XmFwDyDof29 z3=^)>KKyne4nrgxOEooC7l>n*o6fyDeo{VeALIwAF;~L{8st-^_3CFCZWrC&RK7x$ z#?9%~kF8Z%>hl1rl}q{gAXO(JnG|;BeD1MiJjpkETuEhKWqxU?xz)u^>nvySE?OsKfr8uNHR$C6^)7Hq zSN>asZ?k*U(ff`UeGEHO4|XUA!K^!b*$9eat|A9rG%gr9Q3CgWZK)K7WNxkhuMr6EAICUU+AXI}S!K_Qys zm_I@sz!Bq?2qS*d^*1j6kx(c`vyzpSMgI)N-|PLcL{Yj51xes#?WUcz@1S}41O+L| z&uh5+Vpm|o86EubEmK$LCe7U?O2xUpHP<_*Wg!xJ-+f0?=UxW|L@-3+VhZBl)e(G* zD>1T6!R7UWm$*2!1wx5`R`0F#_Chu?OPcnfl#L`ULX^(;d_eNi=pu0TB1VCC?nz&( zJg4l=G80Vtzo=_TK?N!8vhm^E_3*L%_XT;phqwcm+Q)v4@Z7R1F;vEkiMqkYm%+Ku z-i<2@^|?XMLZCPkFk_JQ;?{=u31%B~CC|svKF`#Oeqcfcm15v1fQ@>H?Hs1YgprHT zxeI3xqwQaRlxa`Rc?G|ak%9{7UD z@m7F44pCM+P(vQ#B~5yrnYcETm3cpwMCevK=j(A4W%Y9bGLKwu-EG9r}jN6 zvw!U{X)guW58Fb@_OxUjdB|XS1Xg=~HN$beNO2NQ>!GqLA%vqKC#G6IWQO3{z6X2O z_tH30)P*lTs(@hme>LmjBLUIQV}t{f-B>NXT-yI(8j*FucE>P3x<2QW^6b~sk*JH; zrR($d=?0V{gPz(x6e4qXIRXyApMC+7cGxS3nT zz9;UcAAe~7VD>o&aD&4I?0x>T;pk%tP$Ayd6|RfXC+$-Wzz6q&u_bGxjtZ$m_;oKn zOK5EnzzZFNx+!yQ7?6qrpxK9mU#+2ZNA1&s+5Tfs*arrVBo@L?ykP{MOa6?~Ck(zr z{EQsx>P4p5+?ylI6su4CeS{@&D;N%vs+L-kRT+EIK7oKtm=KuD-)*zL1bzJAjKuEb zedjRW7s)j!67&X5K;#~``(2OZ8Lv-|h_CMx_J7KfJTvqmaV;7LxPIoy8-cUJ93dAy zg|2G{9D-MplIDlC1noKoOo6K*gGky^b}baN*{frBfA#(MeIxL}M_|6cTa)CtFiji# zz*gxyXT>02TvbXs_i+=H8P*$yG#Ej?kSTKN}=y(g%Lv7DobJxZG6{aB+6fHkGVG>x38pfFhGq zpibA&WZzO?@XIi%Ks8uVdm02zQaLM%SepPu8dysv|d#!q{cz`%R zUq-TyUDvJeG9qlSYrSgGv+Wu4ZT&!gb9!6>U0Jx&i_^}fm{+fR+CUqjrLNjuioEra zy}ppWK8R>e$QC&5n{VD|+Ths?osYffC+?en9E$LC(Z&njg$&-@fh6y%!w-(Pazz9a z7r(Ydcy{7NRw(8=6Hm+%PhwF{Pn}Q?89@@>x=q~i3|`dXzT7dx&kB!H`*$AfA`@wt z3o;1DTHwAxIqFQb#zxkeRxPR^lxIVKM$G4s!iO4fncY^1&)z{5N!41-oHCQ8e31@{ zv9m~8l+$82viar}vbhzC`H8cHlS9e-S29l{lrGlznh6ojjHK2Y?r>X5o-rO5ou77S z?!R}L9&(6nr_XO7xdy+!#|87Q2lUU5fb8uc&i>B5|M-5&^*ddo03hx9ytwjpq>>%U zgrn~AdgWl=8+r}dWzlGT3@dseex|}{Q`&d-nnC0Ja@XFC1n*Yd)dJt9ye9oANO?`{ zAIwST2AxG|pWfU2Gx8<%+d?6>liOe^`7_KP5(y*=awQ9Pq>Iks4$Sd-4D|NzLRwKy z#UzS438ahUV94hAW5_yaV#Hp&QhsFznhhShKOMWhvFiW#vkt*^zxvbgnMNCov%lJ& za#Ng;g&(fxpW2>`(mDaLi(E->@J}_kO*;G5UZ0=h5Zm#cIxWyU3Bei~(|o z?&+1IxGal)Y_pC?Uci@h_AvZfNZh|go;goE87AxCX4bN)klQ4_T$6M*K(QfPpVf&I zVT>qr@OINd@ga)8+z5&zE){=;g_ls9>ncQolSq|nlUw}D4!2mwjhulLxdP(==4}J*{jLzp43kLQx=wtp1 z)){3O)FOC7)38v`Y!`9tm^_w-P%)4SNhqFFs)$ofMuWK0bnh6Ds4=EVA5|zx@-I-P zNq(n~RQAU)IpAl5SKCAMfRT%P?`#B>X_cI_#OYvmGKfEuub@Le6(A@w!`}F{C@FjbzD|G9d4vgk+Ps;HQ&hcjd49}55DM<`UCo=#^a`2Dk zQ+nKsKxRfkAc@MZ!6anSkhW$8r>JRTws%8gBu&G%h42UtQT1L0Bq&Pw!p#P~ALpwc z6PHW#WCc<_U^B^X*TKo1m-Zc#_prft7U!V_@z1ML!|L_ky%9VoIJY}PfRaC{K9CvW zx0APn8KwOaKOs{cpZse_Ro7tkl64y37!ntNY;D@pDUExu+ZJ4ULgLp-tF40Y@6QqB8&j2LpUJZF7b=viy{}59)%tx(gf$I zV6gaXB9?a^ogzk`xK!ahd}uO9u}ng8WpX9?5K6N)rN*Lxi+Y7>pryQKnXO8zRODh9 zPQ0xAjOt8^<)YJ~Q_;{_`&s)u%7f@R`Mlx?dW@ZvpFQ8DHnk5>^ZYe{@2F$?A>w^2 zNlVc-@hj<8eq8DHtt#1t%9Udj5L{5t?`2Ji~T=>KSV8z{Y?B+GFEq1calsW ziQnWc3_{loOdxpjimDdrJ47v(Y&v8t7T9%^R8M8M%Co9UPpG9jW4@>>AK3V1PG?!~ z;fs}K<{Oh3d@=-NxJ74{oRV$%^=8yO@3CLui`8<=XOx#u8t*m6HGN_P#5GD87cpth zU-WtuIZmYS$;ZXIRBKgam(K4|lNV+^w$Kj#6=afiF?PkqU)DKe6^7DO>gW5E%nLMr z+uj8`638)M8#+UqB=ujQ7ctk%&K!6ujyzagLCE4SCibe3KWa1IqiSWB(hk)5MF$tW z`L+LW|G{$%h7cv%m}U}3zN8a2X#9iF$APQ=1*~I&6K05PpaRz0SmOd4nK>XA)_Usw z0$Qc!7K+DFs}d`ewzWA1?BBZ(KXnxt@qogi~odi!gAdmb41eqsn-8+U*b!+i4@zfbj=5V58bulTMIN! z)?W9?GZ&nFv=Af%=<)u3Cb;N<-l^pO=T5jxKn9TW3cL2bAlmb43WeFJ{NUXGSiMr{ ze`_K{-mc{r`)=&e|70mB=8g2|m2fR^;Rk)Sl0)Jh`^dKE)tpPzjrqne?VkIn)c@9e z482qM5p~_Q{y}uGApYxeCFi$$>?3@?*B4^sdluW5|6@SiWMn@}&##o1;<`!c?+69M zLqi?7(ofQZjNzf84s7iwsfCGrmsJNoeMfMS0Ajk^lDbjCAkj}My4#kz(cB=>ZDfp> z#=6nFvM;12wNaALQ4+h4Y>b!4x>2uw5~dJ}Ga{G-g2F3fV+P+>})uL4_Igxv^R zAzIy3nKUKJa!~O*boh4F?-YM2>`;u|(BnJ3`F3ArQq_##q?{6@ZqCy6B$}itceNIS zQkY9&$-ll`paPjz_`At)nON22cXN#bocabxoG}b@VE%7X?w`>>c(w%Eb=9g(e^UZP zwnWO=Nm75)TgVI#wpoeK-=q*fqtW2m6scw<_P#4LX_Bf^%uBd#DOP!Z6^KPZ;!tCl z7rE!fqxg$SeJA+lWX!XpM*d>s;5gK{=0)Cl@v#141W0U(-?bmU;1( zf3!SUX8%6?W@Z05wFu94r<|8gQWURP6z^CRFV#_;H)HzRZE;aZ4K&+$=`4sTqvQ#T zDTq9wd5!oq%ya((k8U)?#jN}H7wS+Y7pwqDOoY*X8yB%`gwlQ!7wL3_(>|b$h#^9H zzoCt!Cc^e^dpH4p}7px+_ANpr6&WN_pNS7$=bfPRDUkhh!Lm6Z;cuN*&MOkqeG+l zhDpVA4-|A*0EOpGuDH=^4G>PKOY8$i1(ijAouyyIx_5Ee@86S8LcpWI4~XAg1xN{3 znMXuxGxdu&S+KC9|B9KCM#e9VhV006)#&y`bI#+O!Y_nj&C82r8DO{UVb$#I=`V85 zpFG&mZP)`Oec(9>IB5cmlsRYubNnTP{u*|x6JbOa8+NS{aYV?FXm~SnL=p~S+#KXu zXwQ^|dl(Yr+z8(gZ%>faLqBwh(DkAC&@b;j#Dq0co8^Y?Xa?O|-9)QXccWJhTGoqI zq3?vd_^FOKKfV|WC;6G`11dt=&xjgp%_4ONAP+%XF}Ni$GJzJhR9ks zGZWCdho6RU)sN?nMBn>%{bMrl<(io8U$8k6MxWs`R!Xm;I%Mw2@sB;$XFSzj%xiM* z;Ff({b(o!g8g&FKeR|CpmpTHDUa?j1v1|VIctU-+cWA9W%2i0gLY1{2yat${5Iub) zkSJ~=A!2W>{V(4@VS6n4X$3CGkv;INASHX8ix~|rsN=n#+CkFx*wu3?ZICN_knf0` zdq`~%{~NjLWlxyNe1PCC{0VP2n0pxQfx4X}IFk5$2f((Q*1ApspxaGtT(af;CSrK!8^F%nB2|6 zJN!2^9wsvP7&B;!@mBhsrKi94*6N+7r_b-0^M&voqi5>sy7Qf&C)PF&h9QV4h?+zQ zmV93XY)+n!6erxYpxvBYGxD;a6^mRfl6fEfrX!7b8)IUSa$o5t{aMkQyd@%ckmIIt zmBBSE+c0&N-WScTrLBti0Dja4$XmjZ2vdChQ#Wjm+9Oga%IP|rdYqNZ>bc?DSW311l~2ITQg{l zrZgm5({GG^i*8yo@{YzdB!cKVM{61~K@48I%W$m%Z4ZebhyQWQBPKA8_T7qdiy$Ts z0PVSjy2-@HQxB=Qaaz@Y>fEw1si;7amx?Sh$_eUQc5an!hSD)kVCF47x6X7N>ml7s zi?{f6%GEgcEkCzfH=|(O9nkwFUz-SXJQhgOuI@{hI$G=){4V56-IJgq|R;@`_geuY*-`^SY){b#r&)vc`Ee z!m^lEVzRZ5olT2xUCXM~#TCfq(bFriYi`?^a${#B^Md{{_EGhX(kt_$v1@DF;Ht@O zt@A?o(cmr2XHmeEuvKoA@o}m2nmGLi*9$C>x&oG=K zJIOZ_Y^htDGEW4KQXD2a4mWRYA)Xvs3*LgX#;QT84wW5Co0MB;wg{~yZ%JAc)fdYS zxvu9O51X&HV6FLYQ5&N^%Qhemhs=(dcFD^-Th`Y0jeeWc7Dug4o6Qzm-qx;-zKauA zN8V1I%^F+Ir#i1e{xbrn*iOpL#7pX@Ca+=s^Bkv{PR!ZMqloE?1-v)=PX=)~Dc0kw zaLtn)xA4-ro-dMw!Mi%OreE|1f*uQcqKtEJhy^2FFutftL&{ta>njq%c;Bs#`ieN6N2 zkBWyvmo%sIDx2FVSAb4@YM#au2is1~%2&;AJmY{(dFV4ka5hx_UYc3j`;ZT?>u2}% zP9ZPP=;0McQZ9zikGMcx27yr2SPKTqi@+-$tRI8t$EXYY8jVKa@Bra04$x?T;PI=~?_zQf3Ew!0g(m^&27ltmb_G??b-=D(ZpZYXo3W(W>;*=lMNOttxOXG={$*J~W0m zCslGhY#5&=KtY^0iVH(=@+KZs@z=UJy1uw&@#5D13b-}kyI=ugbI9PC;g+1flVvS! zU+A|uZF1D)waIFc#f{mx!?9#d@aHW2KjP@CQf*)dI0?eA2U3r#wb(Y3A-GZdLAZp z;cAOl&T?<34R*921b4H~nWyIuCbpkS&5tiX{LGQ0$q1DZnvsz}eF$u-QDv8C#3 zs(6!3`PcNR2135)BRH93+x{3Jr$^MLJs)>LtZ}I8e5L@TW5z!9PZmE2ife~=m9&1! zyYg$u>YSNfJIAI+w&7fQym*Z5VoIi?Nce*Pys2Qr3+fcnFqiZZJl?Z`H*Rw$Hm%Gn z&oI+zr&ITIO5h~DEaL7#jdaL(9F0}+QSni9TEK@~V8*pn>`_UX2I2u}| zZ)^Z$Rb^`+X^2RUtqQj`S|ZsXeOqme7mgRM5(SzAxva3cvAL(#MW&_nFUV6Px~S-* zeu<5bH}JeEE+;jw(kv-G7+R?iF6CA$fi3-Rkwj1sR&T(m32D4~puyRw8ZKi7L9wew zn`kYHu*ZLtQB;muS7ua{G~|B2>`Jszx+-&9ow1C`7-*^Vuq58dPpK%jmea0o)3mN_ z?$sQ9)QGGi2mNcFe9gdbP}QVe)3|R5UE@Blv1vFzv7v47a+P`GXSjH}U)9{K9ibkDHQb=?kH}k~9T?x`wyMK9F|=%4*RMOZ=HZV`gs-0BJ0dmJy;ES#P2Zu@ z$6$j?51t}1RcK7p*rYQ^VUtcTo)WoZZj91crZY?7q)m%$#J8s07;fjoXZCtu);Tn={atv>PwIA}Vvx(*xrnTSFMtT|Hwg1^e z!dgcKS^5GQxScN{%=$as9yJbuGcc!+Q^D)?l>Wm>5w=+qv2X~sO+y};Gt>89um5_R z$1HfOyE|e6la#f;mgXyUz4I(gEBNbL{QdP$_>d?jyw=HkZ)FUO8Sl{u{^adf(=}pV ze$vFioV4-qE)E#SxpiCHvs$D^+bUdOeyJ&e!N=NvqZ1(w`Kyg1^$u~lGWw|b|7G@< z{3ZsCu;w#89?axJYYi+5DYV4f1ngVGZTuv+ST09<=n;vivYOjeg3Iu-OlBc%5lBO> z4A1Zk!#o~hw*3nX$Kq5ySYooR8nQCjT#e%#Y^#T{`$-`tctITCAg30$tZIczW!2e? zYki*OLg0N~-~utfz6J^<3d|muMo0&VgWls1x<)-mg9VIPfpgyyYK4AWFc8h`p#|ns zjei`P_K<`vnhs)rlW5+)Ut?+B1;|5+s|V-PY2FveBUXDlP{pQnktyd<4vpWLQk$&d zB^^@JNk7VKjNdjq>*$d5ogah^=b@x47eI@7ND`H6!y2`1{}MlJDAQ(0rln7&?dvez z`uBHP<{Z(unB}O5<){U2LzK8yXOkiNaGsoAh@4)QT&5@?1@ss`ewcp}%wjJ6tO=sfr=iwZu{bdcAL$ZklTN74*9;fJyIV50`1cR2w51cFci zVV5uRr4O}}5yIfwatLo>dVTWde|F^%RRtJz8JfK~_gBRR^smn&yS(>zDhBwT4?K0~ zq}dpCxP_T9_P(a|LANKIgak++XmB5bPVnF` zBrrg5cXx*%gG&-T=-@uMyE_R4W^i{8GPt{byzk!KyLseE+} zk$eg3f+_TmqVva}db04|g%m;OJP&3CjMJZ2($=FnUf{bA)@n=V>~Gz#XA3eB>ZY%$ zRo`(X5JbsR2^AFWq>_r0v-$}Y60D`8m6ix^%57Xeky;#?# z!DBHL(vq%>-}e+ftHnrqvCL4-4@TR6P7~M+#BWMClkKb>mi2x$uc~#gXwrJF{v+N* zO_OB`=dkMSZl`C}b7FY7smbhnp0^dk!GjiIvrZRuI^9z>gSKSWThFZ5-nINHuhQ9i z0a#eI@}SR1N3On7gol*5GM{X!>Ku@`CfXDeh^i>%+*ueX zn}f11qwjQ>^34Ggs+3mScyQ1&r@UlWkOKWmcYYN&SROkK*HuQwZ; zM|k!CVc+m;B}iRn`d_#(#;AnV_-ilT`L`z?KD zu4BkR{>_B%pyR^O4qHc=-uBp|l7+w|wDd)Neo86pT<`l2ZfGC6B@4@v3$w)IcrC)1 z9DalHTj_K}J%IIbCp@+!8+Nk{aSi7EOb&2RUaRnJ_z4zs$52&xbKT${Wx`zVSZ1(C z^;nq$eL@}jhGQ|~Mgnub^9M)Az9%PfRlh^!!NIH)uPAGK*2#riRSPP`^uZkMGzIQU zn#qM1#Z7_DAXWJ>ch)g?`?0I8++(5KW63XT9#U&jF+Hcyp8khda?3`jJN)0x#8`-o z^B%x(pQj#uc!crVsCjE>nK7%*uY}6snQ2Q7p6tAtNS=M_xeBg5yp}BKAMAF76?jYv z%4&=b<;BWNqG~ycs*ZWomEx<_n(&3e^<0(+bq;whk|fc{V6i_4))$ZsOcj zhLx98ge9+KC)JUKC9VRJc69~bu{)nUs&@*VC$t#))>JL}SSH1hg-zvgQR>Ya-M~GC ztHc8ZDe24HO=j7hSaDjpxSIZST5;yCvT*sXvf4ylVa7;ZA@jDjYO@C9X*Y=4 zxyz%<+kb43N0s5NhXT^=r%lC&cl2I?d-7g^PH9m@0kRyv)6PHXgHe2&pn6$Wc%Y-Q z*pSt1VOWTH6KPd(;@@_)bGGzNit;7mfomc$0=`Sc0QQF51%)mb$sGRJ@W&%Ad&QOT z0cAET+*E{f`*y;@G+E_BU|ZSxFq{%8S4|tY6R*0NgN>3v32zVgCSbtM&Bx-kAEtT+dRE=miTZWv|RG&!JC+>e;J3XTeNCnz=e^(Ed-VJg%wb6sNQzo)pq*gFV&^Br-Daa>zqEtp;ZN|1V;Q`pDRI2 zvEHEXSh@XlrC-(MjMu);h41aR`x2j#hrxD}<+{V!OA6JjE{auRd3lD0Sa|R5y7L>p zz%+n7M?ru}pOi}Nhz9PMQhxK>0M!uhLH?4oNcbsOx(VbBe?rgr?03mk@Q`;R`o60)_0J|4fT#;u< z31><1XGy`*`bp9mU+5e@(K&#tAhBEFf!oPe2>FR0ha6=uW=*_C{T!wroiQTn>4n5d ztpz{XNh-dTECdM`54#sDz;?LaPFiRe54)(sUaP{ki$Gk8KIXYM1K{)I4HydGSB9LFge4kdwm^%b<-!7M5X?X2`BNdr4IRF{5<#)<4pDMt!e+l-h1U+9DLKR zlPc3z_L_2#iwI_ZoWJ-mr7U$Z{o&oB87N%myQd*&2O?hBVT2jF7JKFgc?*zB^A-7U z^D+1~<(BmQ%N$ov3zmqN2NfeZ6n>zHc;$^wiZsz|+7H6=_F$7Df$zC5+9E#kydPhy zq-5uP-}MDA#!#Dj9I2aUGjRCV?fH?{jj+<#@L}-fNw&y~fXC`b&Og^{chg6L<9Alk zL$KB1Z!aH(ABDdh4xb`FqYCt^-t$Z@^M`@PvT5xE%rWzPAgQasYZ6<@#3N__gMBOE zzw4)t^P$x~SN!j2Vej7@$1~nyiS#l#`D3r3fL}a5%cEMg(0Yn5v9_$juEE4PWOIVS zNbv(^Q?$J9bu|8bw}_q%_jJ+LrR;{0?RQPLw~hfCTpkdHU^>b7g{7~lK-a;Yfq41@ zxc*1@zd6(X5}o<>;ToK(Z;i82&-S?O-pA2a2FTiE$%pT8#g+ZOP3foa`C^gt&cp)* zRd~BdXPqJ^?jZt7iVA5mhb*D=8s4iLhu0E2NT0H%X;^d{uP3|H#hJ+b6?bQ1|1YL$j+O>WY>4&`vyyJhkRZP11;^^1XK_RW^6?L%= zy4jcK8Vc{CMeb4_vKCG$?5^KqP^u!&<#Qu=cfr44<-ha>Z=S*jt&-W!-2Q-%NhRP3hzb(1?l5pS1 z>jZv_-#bQ-W5W44V};mGoQgcY`BA-+%K3e&jGJ=IL(Ml|c47%b!DMt5BY}aQ(Pha> zO!bwkL%}aWWqdCd(1KT9Mn13jCD$07^9jS`43iIQAVNO}J@RFLm2{~cD_MghG!p@0?ozvB9NFM2F%F6y>@Jri5%Td>%bX^WPQTaV ziFeSX$UmN|mUWUw0)12w+m=sr`>57(V3!8RaAScfs^D!tKcW1!``1bGpY9#~vLBiA zD{f-h)7KSxTsWuRUlwFyuRho`Q{1RjReas!7%%b}8i53t#hG;c9u>YIBVWvc{Tgi( zA-cu6T!w@GR594WRp*~QU5ay-blC}_-TZ-f4<^E#7?V5^^>^6pPRXSzZtQi!XB_%? zt>Lt{t(sv{mfZBKh4+H4-xgOuY`;p53B7UV=DNLUMgBO1zElEFytDg*bDXb9&l6_y zHv`(sXf(kgKujB+++)$Bz82RzXltIWwfy9&!)cLfkTO5$4E^%nB3Cz8DOb|e*3{nA z%+zf?rq+~FONl4mXC2IMP!7Fa=iVstCO-eDf->>$9I9P`Rh;T*wf3f4Ey{*~m(N~% zA*OM?k=D_z6jPm}Bw=!h)LgD1K}V+)pJzsEPuW$kC02OMcV6Gfp=Q|dWjY01HPE9gs7WYavqv3 zMXi{(rE`JMa+r9 zGn%pBFEE^KnOrt#JIz?s7ZJd{Xs!tK0elmTcm_ZT!9<6C2czPV5Ci-}Faey5V14S7 z7f>QmQ+DWYFlsO&46OhD#5e?#hBE*WjYA@B$_UM(;S5Kx1G+>_`Jhj$#RntM0kk3E z=+MNFR`7Zr4vCcMdnlCVgcF(=+DZ<{2?+;qGJ_@XN#smjLc(b{gAhqL9)SoTSpPM& z3fzN7LIzk138&>`1^3{SD42SO$Wn8DM+D#`e=!9>LupQ!p&FsBq=48ESpX*!m>xe_ z*3>CPmWDGB5r^Xufv^GVV?c|*`FP2sfbkGnT22;lK7O*isb|OlHK#u!94A@aln|;# z!x@Yq2RMta3j+>uJ=nmTc*!!R_93kb>ppI05=Y zKVU-@LcOKe$8oOH5J)fx8%h9H$I-$C7~ow0KoHTK5I`$Jya@oyIM+c4UYhC{gfsvs zwypzM#=GVLtK({ko2rGB(pRS-c5t+0Oj)3HG}Vy^0l==PsW9{$JQa*U1(1chXf)K)h%a8>O=q>mFzllm_Y>Ri5es_XG){5B(OpHqiNJD4n6a(Yng25rgU`&8S zND^2UU>*Yg$43;52L->!#oG3tB^K5;`>4(GDQEJ>)0b?{B!Evl9h=UwC;#2xOk5U8 z>YI-yDo}G<=J9Yjf|RPF&s;p*Rz3MVgEK;MTs#oRR!3I7IeG8(|lgdkS739}0w1 zP%?#6&@$Vm9Sa*o?G*e>B&FD(ZSo|#BRf1gs`6$NhLFCcX_F@e-1^@X^>O@{;7D2Hr}b&70ZT5f*I4ZFzR zXZ1gzc{uxcgZ6#-_rfpl3;&w!`mpLRit>q@G%85c#BbYdT^IC4ZA`Wfz+c%Hn^!;% zN~iltrqh7-4oV$8QftU|;bb1%i%Hbx^X!k3?Iwhq%i*a78s29-|Mw~M1s?1<_~~du z__a8~VZ`YyX`}poRom->BohjNWq|6(|nsDZiFJVe&S!N16_|80YJ>z9zX z#AbNCgY+`_cpabzV<2H#x(aM4$h=EvjXaob+2*n_riNI zpH{MErkM(RP>5CW6w_w>b5!I<(sE^TEbaOBL+~S|_C?x9lP{q^%tsU-c9*yf^Ro}dX$<$Ng9*46%P#_qcWz!Y-shVSwUx>BZRI$)l1KIG#La8 zP-6Ug>a)Cg%X0FhElxqB+(6gwiDA&FRVhY;PrH^yWBRRxWm=B)L4q77`OOx)1VH=l zFC|9{qXa;nc#3c+(a5s#4bxu~jhGRK`C&=Mlu$ z>e_B&9b5bWi5t4{4#AeFIg1?PYw~=%X}8~81K7E1+u>o34pjYQw1;9giZi>K8N^mx z`FrB2`}Eq2RfWzQf{Fb1 zb<{U?YQiYT*k$`pwj3DuQAhK0VPE%@`j6l2ezmo^A**}@S3cTya#4yem)xywIjr4{ z&(yQ*aUm<42_(RBir?bUD z>#{6K19`;^Wa`J$|+0pmut%g>`t_x6`* zEp;^Nm}N!RaY%<-yQ6zM6S3H=3n+L9`6B0m{65gA3nl}kMlDMkpwr<+JKYlKeOue_ zKjXBJ4Qv~JdE(FLkOwm`2^Kwxu)MaZF8f~KPCYs7lVE62y+N}r8)t5UF_TKO%@`+9 zDH8cw%FECipGjyV7kWqJA(%`OTaary6~1HcJ9`)kpPg;J;CAu z8$8{tPGr0sdhtS{?bAw=y760zZg(zGJAI|ps2m=LYu+m1xkuoziI97}Xn}*$4Qif) zvh?to3g$3lzJ)-{^r2u?KQaCyRtB*`7uQkgol%B?keMs5S4!Jp`=+9E<8{pR=f>mV1h~$ zpqiUj{>9R%4=PZcp3CvRAjc_yo+*(Q?x^ZCvhgt{+Vp2N-5*0z|70Vcc{}=g4tuV7 zdaaJjn>{oxv1NyI;fm=)*r})f;c(5=oQ?4AwQk0I#;i^EtH07~eSaf+D58K-f+TLt z2k=JN-#vF)5C9|qqHmY*w)OV*F1zYq+x;^g)&T%g-OH2|Yo2QNF&5FBJS0|1L%Meh__TUMHSwxfOtOz_ zTu*+5ZlRn5I&#SClT`z^z^DbA{1WGmNX`f+$e69&(l64&91jNg0oNg~S1NXLRMP$6 zN9RJXgj*dX33{URkTW(jgLF~KoIzSz!FyHtAua;e9%pZgZR-J1hOX>RsQzl%pa&fN^O51z%B2UQi5cYiSbk&>NXlDs3#ojAFTR~LT1 zt6itGQPs|Eg-zk-Z~K_K?$aUuFu=Bv<{(yI?EKiVz1QaLYqurP5UDc6c2=J7kW)W^ z2^{i65>DtKr@DyRA^ewe`ia`g05JS(4JfU6`+KK2#3%qB z;w`>D4_Jn*SKJMct}@F{RhFp=LTZ}jAnJ?B@s~>XNjU{YS&wkf2qONCY=MVOe$PLX zCsg3ad522h$wU8eu6(QJfySdpofNqNLUWz$Qhedhyq@2d&WtFXGx^`&QNcF(uA@d8 z2DCQOHiev_O%r#0x@P5E&I4teSk4Vv#tA9#!Dx@EpOp*hKh&n1$E531w986erv|6& z)GbO{Ci{0x&TDRTtfmxg8o9^%D)k%ab;e6crpR~2Y})H)ZaTG%%Id4jbb-q!T3%%+ z>d9*M>5+=KO1Rv#tYL)}1Bj%V)xT7dSa`{-PL&8601TI3b-e}<9C}ytWWPR$uYZLO zhk1Wq4+BVs+?61p00TUJ>2=E|ORxO{r`S3W0RQQ&w$2Bw2=&%mcZcGG<=^p3tuKS+ z37-sHCcq=BQ}L6%_hjAHBiz74xYy-~OK>~e$udCX9lyr9BmfzDr@7AX55teq35w_j zh~Qs0A@Tqk$zE-p~QuWQKnjc7TS^ z0U|BxYQ=R|s34dy^el9N#VHEW2Dl7eV5`GjN&_tbXAoZJAh-a2cpmDx`v`66T$-_WR-xfIBut#rrnR(>9Th6+ z=T35)wa<}lA9p~=X|o~T@)NI*$Zh7J(SfhNHi_1iKXNTRWA_`v{i=lfH9IemJ-GEz zq;SncDhqw<%7Zu6K4W}zB^6W~k8Q)%cB$cChHN&H+rWK~za#~hhpQ+{dR~wm!0&Rr)cL=qjs)nxZf=N6e>pr#x5=#}O2z{vk zjC|A`*Zi>fL;OAz%2tz(%b&c@3s^Svk~Za;Cj;m3-9@)*!2& z@9{1UPwM6o&9a2>p^zpe4C`|A+Vp+j<}(dSMyTfyZQny;zoZO!Me zQD>xQly~*awk`_=fS6IfM5BQO)igj;F?EDQ-sLKBwJ48ipe^FfZ1jJpgG!^{YSEDW zPYF(%Dk@XGL8Y;8wHU~%<#|lgZ4m;q(U5AIT~o3D|0awmZKw;D5~=>dXTy@wc$-0b zu}*#d>lcG?*TlCoIE<`QD9vMw;_{wGita9JnnG&G>#Lm(fKz+Y_fvqLDOr*5pPKNS@ZqEX9K0ZIw&5|_tm8ikst zAxxpXKlgatM^-+Yi$GDpSh)PU>x|H~r)Lkp-tBIp^qPg~#-)_O4LPzxGGT`Ej%Szj z5pL4aCHDH{T~7ao&)=ccuykHR-Vq1Xjuft#qs&ttW8`4x(0D{jv3aRpIo*=>gQWD8 z!mjB~gT-V09TsfJLo_jV>6h0N^3h-s-IsF>p+utIQUUCX)*FYj8VXu*9+ELZb0t#auz;w4UkK&=*ZSRmF$!+hS zL(AjW%IOtT*Xrqq(;n<;RHK(7*>-NZMMDnn%3}RCQgK+XCn)rc-O?{dub-u3PhV@M zD~w+U3mBB1#tRHjUbCj#gi)#qM2%lh3OJNG+x6xMLmrZbf_H?(kmHbt??Z`Oy6~=1 z(e%WiQm#IhVIys-zav+^TI0H{uZOqv&<@4g=CwX(W;`Kvy3d~qrceQDY$Gk$2P1c;2<8Q|yr+(lb=bkSn zr5wH2RPTGfUx&Xm;2^a9STFFnXc|Yl(L=)3S!y+9t~VJ1Q4u^PJY`jZdnm&7{zV#A zGVOo=<`94tJ5yZ#W^*ZrVD7kMQ(_|->Vg%frahd~m$RLHI$|l)cM<&jQb=c)^0nLC z_Tk<|f$0qN?c0DeAJH2SjSnokUExbZLG%GctYX(YVF>a_I);lgCTzlpGo-M6;Uz7( z$Fzu_K6c5UcMDayhdlfkssk0c47*9s@5k9!6rB0U$8YIgdKc8`7mz(EjRg+lP3_B8 ziwfzKN_Ow~Rq7TW8cXILMvHGP7S!OsBHE>4H62ABllN&iL=AtaAizhZC`t?6$?{~s zV{QwV`(ey+5BHDZrDvPk{135_eav02+3DO~ZT&;7y`jb9xbvKiR>K!S&#RqWpu#_wL$%_KDd2H1-&}x10ZC#rB-=~$i zHbm0@77NuZduTtBBYolr zx|`Ot|0Hg?)GEIbU|NOB^S#+%v_2)wbE}BM~-8qyT(fI#H&kx=2vH#RQOvO zb0c*gJWb`>{hWbc=RBA<#9FcAa|LQFX{lZGZtl@2M$zZqyqJZW+~1>LrCkiZ$oGc6 zR`!kQQd=ag=Yt(s&LF)>v&V_Y&Vo#0T4Fru{Ge%F7eu`Vxh@V9CC4T)5+9f3-tZ(V zXnPO-vXS`x%ch5Cf9gZ8)HJ5;RFRedQ_gIzxk@TSbc(KE=*Qi;rfJp@riP7)-8lod zX2?E|j1+Kw$a2@l>*fY35Ygh>cV06`L6_b}bKY}luf3mp(KfFT8#kS09{gIM3jPp) z#b2E`o)!Hut7|s0!cEsxW_&(>ndkn3(Cvf$vY0dkzZnso{g(q3&W3zy9}qH{)5f!&FwLK{)t(@+e4ns zr9v+HNVK8}SSSa?qCLnmm9Zo?@Dy=1pJ0@u99oS%j7DK&WAX8gXL zxX;iJ#Lg9tJ5xLjo)gYkvqEZZ9;f%iLTKB#E60CL6o`0xj*gpIy|#KgMw(YAX9JsJ zVG*`-pG&Ikwc;8lEbxg>NLS~xyvl4qWg&hi9VJ}>TY3s&;JJMEeH(uaS%A!?q9Ety zvpO2|cOYAEKBD!P#LVD#pIs^QcY6zBzdDN!p^*!!r5v+C)Y90%q}=nb^CE?vr@3-6 zl1~PTExg52F@0>eyu@T+c1}em!xtVGXL+|Yi}-5NYF;r$QbaN1l9;0fFQ9n8=HjgB8+=jeGht*#CDCupQd50UjCi4f}y6NduzF` zk+2%ovYWlSOkP)blSyp)SfjbM_iK;pTnu6Fd28X@0Sq6HyrqTxiQ8;SA$KKg5cPt? zxQS%?Zb=@w+)b1Pnj^rqEg zuv1T@rwTF0=9HGFFL*@Qrz{?_sZUJM3Ec?iaJtY%VwUf#SQMRxlCOS1x|hXQl{=5B zJK)0PGHuHf?f5;7gi3_6`61BJ-GwWN(Mz3T8x6<+X{Kbc2d8u(Jb!a9_^?+`&|;e8 zX=$apnKBl#`tn+sXS{9l7FGi}+}voGhy3qM-71bG(zC_Jtr)2(GF|1D$1{`t`L*`G zYxaunM?SisvcaajA&>A6dR+nVg!OVxN>%Bh89Yi2Q`4IWDTx~CZ9EHaCGPF21MlXm zHzaGmYB5Sji3P77or~?67}BXe`_cC%-Gs%4pLVdTHAD_zG3SNtGt>l0_lB-j7WnBk zH?BXnxpsUA+}|4@{ky5rM!*#b2@QIC{wH?Z? zoN~?##a;KC)~G~2Fd&P4vk+V`7~A0m9|elzczWoEwT9bl;oC1KdF^Mm45YOfGoWaT8>Owf4oyKo8o!&_TUqQ=`svHz1$Ib5PR{h3vw8ur#Zp*hpiI^taSW zNHOhM!|6mU`?Ckz+ev5Fq8kj%;3xk;cWB)Kx>wr`Fgzg9@Lg-RKw;I2kiRE+D{4iT zd(ty}BRub-1NDCDKLS^!(#rMHobQN_YMYleOYlZ7tV$YoEQL$6%Oq*^+Uz6LJ8JZU zG$zz=Yxd7^omcb_DinbFU+Occ*NV>qD=Az*;a(XQJdJP74~1dQ?9u{NRZXM!W53Sn zQw6eQmos#z=@CK5uD=Sh?RdC9*AE!27qP}{wM~>T`vqiXw*Js+T@ls(nNDIE-HMsX z69+R0S}b-Z!=7O05+!lD#XIYZW?n6fy;;(dlNe(Xwxx1J3rqhFQ;q3YrO@SAr-C|{ z@0w_qc&#=88rTlEW@Wu8ncE)``+sMyyU&Stjt)ICnN(XJS3r|NptxjdK zXhL~u#n<6$NAiLL{iV&|z3=dSkW)uX<1+q21?^=EwB**Q!fF9nGS-%_{Gbu{nAsez z9HqE}E3F4E3S)%O zuwNTLC%+DEj;YpAzL4&t{5(s`x5k!sOjI?JQyUF0yk6TGj`Ks=xXixOi72oWxwN!v z?S=sN%@zvqFG+$?*BYnaT1J1~#xHBTAgN_+z?%8|(aP-AUd`BCf0GKoo4`OimUBUW zlMKc|Opo5&*TnfI8IyyN@UaHvF?E8^4i_}XRAFu={L1od+~MtOcIR)LKD54BTj6%6 z=;?ZygdR&hk5%-x5|`&QmN#}&CCP~)hHapoBD&zdiZ#)d5%1pjw#wn9(hD&T@`lsN z*Io_G!EewzrO;SI=g=Rf`MyA17HE?28Pt)KnVdqr9+dbaq-_9{{YJ# zEi_u)ONnbo`NNlNgpCgAtna`Pu)nePQrj)Ytzk~KqHJK_)oG{cU0Nzv?{{ynAfH!&FH>w}& zTlF^SX!kof?L{m%ifu{ppklMT(5qqNLZN)c+)*d3JLpQ5QQoZ8w$`peHp2%W%L>fs7?L;vhSM za7@BYi*oXhv3@79Py$CIuE|%io|X4Ki|_tJnahfD-O8c3u?%jiZAq_f(4Es^-5-Rz zCwD%H=0AlvSyb?tLq4Bfo+m&3iWsO8a<6MFk~lL=ci|j*@AKyYWbDUK)q&48>)uO$ z;q%9f^|(FO{gNXipFQDUq>yyR`wfpDjM9ff_{QA!k+c4h{IHC={jmOIYqVj`qRejR z_3$6&*L_oqo}RkJMjiyjCmASLPvpF*I`mPCsijM#-x#8$P4QHOuOgwz#x6~@7zq)z1PK6a0^`5^XOP1B*yLKb*a%UM;$rpemE z2gNHHd7$EFSg(~j`w7K7HKGi~mMYi&dAuQo^8N&E4`$1RJDPdthorYpO@=%6QZV+) z@)3<`r|uQ{=5IJV73lk`N*u!91(n1gf4a(;*;C&2k`6)b+bCr&E}N{MPo9Jpp%N>H=;65i#M_ML6pi%UeyMuf2F7m;$csz?)l}d zY5%w)%1hMTG%^TDGBIRb(dCbf{UUR)`yk8)Fwd~_Ryi+ak#hh?dqnq8U z9lj&^P}nTljY|^>{sO>_`4aFR@87B#^o|?U<+6YKK0BF_YxFq&yMSMA+Y>L>SUp?O zeuC<=@pj-N)l*=|LhCHRXbrk>*ogBh&+<>hbexaE+XE72=?&0fUOafBv*|O(1xa;Y zd9v4qyY zW=dDx$O~;zU?Y~c7?~x;w3uMybNhhQOktOAsadoOFB(J{u7np*8@ec-maux|CT6kq zLT{CF3-waeZR@%4DhMg(a3N*#EL`C7blB#t5qw6!|+cP=IU_##5M3p)2CeE{4i~oXOQY!Y+%-m&6|74u%BY z7$rt;|Hn$9JPb543OGI8p=u*#QvAuY2@C|>(9mQTAG7{S8r|8b1mPqdbp zax0{k#S5eVtkZvVKXMSs4T$4Ak2ZV_Z~RGLZW%#0`X>@d_?MazFOyIgHBrp25as+? zy582Fs0EQg%gAitPX{XZ{Yf{jKOsb)WNp07Ua^!^)*-pm*zY0d0w zz;I+I$E_*mKFg--32PsPm`><5?L0PA#5NHzgO?eI76J|n@k|f#REVV9+%l3#8w}?g zY6wbQ-vUJ*j3XnF3w}lDkq5}F2j}IgqAxWi!qMD!76Sz~yAJOa3JK|jQ)aR3t7Qh4 zW8La_Q!i)82E4L@+amBTM{bG6Z;4tgW}n_17{*e3MgF>s#z>{X{x=r+H_D|pjJry* zi(mRPXt!C!(kp0u&(LTL(X2uJjaDf>Nx!j7)La8SYJ7gR;`hwo4!phq&FRR@Y0H2X z_k8C;D;dwZR)wz+OGy2fVMeb%1e#s6lm0Jd(DW%HwHcNX0i2YqGKh<8xy%>dX5oA| zyV*5Dh1%ub67Bg;pyb1WNb2qI=0i=>qwRqK;vS>PkLHqX{ULCnW2EiJaO=@ifh60T zohXVHAW$w4-BJCL{-#KU2Ev>xAGqV%n(%#^vrq?DzdzHLz`cyTB3@;f+*%ndm*$|Z zqr5C|CIQtf;BHfTPiQ?TQuvpo~ z6iWYIz;)z;nlbe2|Cp@&giFZS`9fSX6CLB7(RaWH#ei2oLjo`%4C+UQudv$@J#Q%Q zpmwg^yleZawF=_5En01|rstd_K$osJkgCpe%`8^+MoOTmaiGSL8c%y)kH+j3Gnq}U zafE6q^e4A+l?3zDmu^zZ($(Mpm!L?Kmf(LsK0%&_k3-HrX6=52EB?rVofU&kw#@7zlzWz`CBGfeEau1bff* zj+ZKX@hyu>KPsn=MHMA%-d}vy-tz_NS>?|fr211+1kJpH%q*{I`cwY2cwDMQO0@%ri10{Pq8}V583)p}qT#?b^wDG>^7+;IEq5UY-5+%OcQn zxx4nzc+QQ()y98u?#pBKkB1SAX1YxB%9bFDU3|^aqiboT|>&s zHu(sQ+IzH0bqBVYm%RHQM!n}Bs{WnU4HTRg{mS51VkKrmm&Ory=`*&{TEATV;rW)q z#67&726&n|70+^Q|D@O^yI1_Rh5L6#A6$v~rcGjZ5>CU?Vw*@>zpf=S`6`TojxNqD)J_HRLzV^nFp}}bX_OuAuhv1488_nnXnpPE#OUFUyJ+nHMe_fpgY@}oP_#z+u z@yPP}x=Fl_6>+dBcqREAb7C5esWkQyfI)wm*fLI1{ERlQgcAI|--#3d4j(w)Wh^Hj zT(H7y#Ux>$swW9-4HwlvP`8)wu~D%V(LY-ge*%0Ak3Tk`rL$EzD%zMt+}{3>bu;h@ zC%}UQ+Uha^-g*#@d z=bXi!Bx-Hp#rhIzY&pe=N|Q40u0mc=DXq}H@dkg7Vm!iq1|^@$Yu8 zB==*3heaY!j&z@A0K1&c+{-bC`5&TwKf?YBi!7a{;%@`f5^jU;L6i#-Xl`r27u~<| zQZ9s|VdF5A92tZ32jKtgHvdo1?OsL9M!42(+^QK)6n1RPq5q*L?7u=5VY0=q=5e;F z1|{|#DU;|kcgdK!|GQ*wRN|fczX2vHwvDws@7B1e#FJxXkbWOr=Gy(=Be~PAC!Ma| zv%WnBOKB!uf+TbSE4D5!2-TR|tXlYx4ddgQzA$ z0OWsx+-%G9U=kDK!O(D)*fi2uuA##3TdvO*vC<9&C2C$!ps&EjC{;(dztWXuehZ9%m#FQ-SD>}E@F*_O&=uc*Rf!-W;lub98E#+^$#N8{<4+&P=0IeDUmp_m6ahGIT z7Qw@X_toA54|s;&~{QCDpq<&rJAU~lG~nwZC<2o_K%NMqQUV7YTGR2zG`)j zj6d7vHYQo84niMB$z`5mw+FuwQ7smLa?_T!5A5`7#`7Xk#QoAeEV4xyZ~WEu%{R>c z1*Z5c_<+)T%vB|S%JEwMYsz0`+{B-as(bf=Y+-eVW!8s6Q|13v+|q`osM*#8`cIc(Mt2q1be@9BtrLkc1OR_ ziqV0%Y{Q2;XZd(-BiJxyVwVH6p2FgAUvoYDJ?DxX4w~$BwsZweJL)51t~I}PKSlkZ z=yiHm*?B8It!@-?Du3oKLCW0w4e20BXDjP!SYXqV7OdEdbkv-An!+ZzqBpc0VbTgB zWC+T;t`YNk@YW3|`O4iM?y!>D_*@}q>aH_-V-UEE=enMFyJzi^#a1f`TOTbG(%u{- zXl+X7W?RQDCc^_=G`qRuCyfISC$t2@J6y9GTBFnGkP~}v~6@<*T7v?-H(O$oi9U)2z`To*jgoZ zIR-Z3sNeKTn#9-1`%@AHRft$CNZ>}^TeMNT2SyGf1QEa}>HF_$))(3ShIQ>9)2)(H zY#}eYclW|dM39&BS2ss}M+!{g9I@7|o`dG;P_#d$u~1%OyI+>cqntUW$D^*(1p*oV zz-)}Nf-rB-1Zm#s<|GRW7R>$`Wg~c1#>6I5n6O!3LFsv%tAZ8pv{J^{0aKu5tpe*w zZv^PNRDohor9Zr*hr!^uH(SB=+H{m54CL`mq5P;9OYm@+)FOkD!$42}n>gno#M+ao zlz^mOJ*ez_0vANmpHWwBQ8u6jbe`%bWZbv@L494slK>$PO}18 zEnu~cczsO8!-C$Bilmn{Ek-ekNZ9^x>aao8mmdA!?ZC3hM4{8en(F>*&*5mA3=THg z!g7q3A6i0>;qmNj9~|@OF7p}R3Du~)EJyDbve9x$hnhE@y6%?DFDj57+tV)XtE74u z@v`k*Y!kF9t_F4p8wIXOy96)@?|aNnrT~NK9BE@JLmhaJ_w%pV$s=jMevG0Z(hrY< zzs}+X_148YT5ql1zC8|9@>y8Fi>dUunornKNYp)VaA+wVx6hzl*?zuTo7#Zp-qJZw z@T5r|Y@Qcum>>0F>L(kOxt`wHXDkn%ws^D~m$~YcECMQZUb>zxEM~L+@^WK9QSzx~ zv6T+0T-?=D@@c1UO>)3YJwaPFtYq^0xP(%_uJBc8)W?KE!Rg_+^8MrXk?8}cH|nwz z=&9;FA5Av8cMUr`@b100840y7#ZZvQG2JI6nTLA$@Zk?_R$*g4+H7bHZq70$3I#W5 z;%=Okc9`wZb1;V~%`)svG=R>_6nu7XsYo<8gE>W1cU6=3&FYF2>~>TVZzl3UkRk=W zf2bE0rdsLSDi${r$?Yj_6ANVKQwzbj$If>zuvb0{y)#c4{g{0A?SrQnpp-zII3L~RcZZ^f%3+Ya4&Kdt{J742*w<~vT` z;LD}ZoE+eq33*hvqtOpF_TS8nOrX(g&9wDXhP5_**bkL1k-{e4q9zf7Nv3Q8an%A% zro`fos6U5p_b7a93nZ{Z_vfsbZg47>R&=6Zozi?ZIMvAw-lgvU!`fRw#qoUWq9G6< zxVr=h!686!mjOa>cY?dSC%6;b-DPlRaCZiGcXyXN`Tzd++`Ha8=iGJIdu#3b>Z@J5 zclYX@|&W?xTgBuKxyvwjQrtE&EbvB(QIJyP5*{#+AmCI_7O|hZzDu* zAKUPZztnWcY=7r1&E*S9?D|$}ohp_3WvXDX)@)GtjHye3UW9HjCpTt?`e(qCYKux2 z!Cw0QWmDbQ9FH};fHZqHs#zn`0c!iS^qEXOCuxt0C1Rpxe&SK3%kU;mwJUp}$IDdGk{ovScEu6Ih5JmG0=3R9ux>2MTE`v~d!;&0A@IzO^Nm2Rw3z0jGj zlsMZ-NNLoBCen zRrvT8qRtyMW52*ALb$_ASR;n6y|?my);DhIh{$JE-m}tjZNG{Q4Xq#_`!f#Eo3!Wf z{g!siYTZX2Cb=1$K(mL0{UVUsUN1M(>;s3Mb}5nKC4gV&L~S#dZC^C;%DTTUqh<^Q zG(jk!$mBi~$4~9f+lPtYR*Bzsi#M36jL|8AV?gk@Elnw=?ch-Qg$q30D?3Gb@oI^G z$%|hxxM8d^SJeczH)tek0Poa+3@R33@$Q*3t`ReL;mXbx%Jn_89^j!z4!Y8xJM{JX z8CCkHb^5y}Z!3Og-2%VefCL=%B)!#Q)D>vo`qdR3_XN=GQdP4jQ}l`L4G3Xi0@ z_#o0ZUulvG@?7R#F-^CgCQDQ=bpXyp%pMY1I7QZRiw9{X z&|fsoU3^&-ZeawM|ESe0By<`!YTjWX&eiTWZIIbJOo-AfbO4sdu*t<-Q9iW#g{}zp zTL0W)R060teVTW`mY5*04_N#wxm(B=0m&+dECDIQj8O5_KIMM~t}T zTeQ@vRh^MaS4au?AZCz05i-P6?rS{8I_ql?H$v9HI$hbMV+dGPvzM(l*3eJa+K>#l^On_ zfP7PV8U$K^P(F1L7Q^4#6!S&}J4ZUeRFP~^DVY7~esy0kdf@6-&aoUjc`!h;tSwn( zO{%m)Tt=$Ci+1ys$IeG*IkTa9Xj2F4PF$B2o#P_lAt$e>LopHKX$!Gd2(XJ7vl_Cu8c$iUGZ-@) z;~xQq0>ubfG)lo55%nt&Zo#(boLhGZ=)+BxZ@2;&Z=Z;r zezPx(=6N7zeRR&X;czbm^F*wgU!RDbg%ivS>ehfd4&%j0ST?`ZRj4~ztEPvU|Gctw zDqm_JKeji?Y~41xjWOv_6*EEhC?DpbJac@TThN@L5~D~{1|HefFI*+MzwT0tlPSMN zJX$t`M2zDf9)VwrU{Q-LL=H6y%8ejkja;lQVy?7nmR@r2yt2VyA)vx@PsI&pZ3ZdM zRBQ5DMDnm-?y9UVAC9r{s&DpUFeQY`zqD2@96wS-S#N+bq7qzd9~p!~s2Y;Gi5&J> zf*2k4H)jqT;56Nw=qjvtzzn?20pbmp`zj7;-KCUQX$BeEpq2oa29&3qARt%$Zcg{8 zLr@cPil(vk!fP6VA)x&MIb|*D0V!oXs|hlzh2k);hcdI6fiJ-7ZM;0vn9K|pp$c4D zsKnj9>LMpm%VWGN8s_V>*?YhiTP#Mot89CmLLYu(Y#tu1n0+=cZ6D8KzbTUDK~5%cpFH^hj(*cY%d&M`r9Jb&TAu$LLkM z2j2V#{M=0}jF# zKvQb~F_=y+Y)&Jybeud$1h5W+t8fBH~ z(%bHhgQ6v{9X~`^9VXORi+!tlA>OTJUqg}#4nsEk8Gg}S9CTw9j1SahkD#ikTlBi zs)ra5h(T7y_I2-EG^cO!tRumN^&oW7CyX@K7)YX$~j^;SiQH$53b!OXA;odFs z63+nDceVb62+M`RgmlY=#R_-JAz&;P^Zy zYJTX?o;>(v2<-nGHs`IvOEgZ{$Fl5TBUtffs*FGJGZ93C0#+cIHyI+s#gvZP8LyOa zRD6QmGQyKEi3dqnp26LyeEQ8gQgdMk?ia`C<0P_2o2gFvO*)qDJq6VY@u?2C*+FRe=1CJ@>98|3Z?s^2&R)&I6HlZ(V2s*0(&ZSb@G8ve2da?87maz5d6 zU8l8E3pY zj#&}e<@%Wc&EhGM$2008n4r2hZxpG$3jt^rFNuPGnmKDUm+8B6M&8=H(`?UA!UkIw zIaT0p@LYE{nB!bp3ZX%K4RI}`Eo$j2zCZ`}8L%JL5;+Rc($xyqoR8p}Cp-mm(QudT z8bVe{@226zF5GZT`^{>HGuFq&iF?)$XG=2H|Dh3$vG>C(4|7&>RRur!1K;qtp%i>MJQ?=)~-s5<~$m>*=nzljrH^*k@-Qnj1V$CR-wnG`w0a ztk#{tM!sbo7q|lVvqZ3-Q)$+gtyA-@kwXErC1rTI@q=v0FF>5c+e zb~X^9_}1;YLO1cfOU2vzeae<`KRqvp!n4@r!vIr2Vorr9oO~hN78+4#w=HMi=I6BV zyG^hEI0KpexaX*iL~-A*DLwf{;+j~EPDeIITIEe%7_GZT8$YZe$`0&O8D3#lKYUzx z7CpJHEW4#@Boyc!w?#ee^=c#)=w~{AN79qae~0HyoPl+tK#m+ zbH&Jm-o5e)mK3t=H(tdXX-8eth|~V@3q;;ua2Gt!aIcuMKXDS)oj|{F=JUmzi=(y8 z4HF3#^2LVbI}jXyYP(8G|08M`R^4dzxxU2n*v6%pH8UX#hdq`~`p#B1rog&fpfos& zg`J}NUHwlq%_?M(B5!L_;Uue8KIONA35_>5t>1Qy_V;(WiPO4U_mKS6d}}>7dd@G> zV>bQ7Nxq~TjOVg^Z`0!)6s}lN8*}#&<1b*l`YodVIk_$Z#~j3$4Bb9s!=Ej6^?4a5 zH&@mq647Z$mid^;X`AYM|Y@&4hfi%U<>>s(VQN2-ENT{~RwaI>Vy%ysMM z8BT{vM&8dks3H3zVGU#9$a&CZVVeo!junN}9U=Fvs5>?otp?tblcNp7SVDkDX z!D`vYUwA+!>9@H!`}3QPrBV!bcA$d*U+v1{849FxMK#hFd(WEDHFY1xcaINFN@I-S z>96{jiG=Bp7^vxx*@v6s=P(C(#K_@)(Bva_!+~6mkQrKm!l3{Ksq8?^^dXuzTO7F} zn>*mD3xr22qiMVf#Fnj_Z)<`3+pf7=1^aQFPRCIwC9TZdS+^~XA33&gmP5}ro*I#3 zFQAtr#k*bW^g$ZTZS@QUiY-JR$~q)#bD;rc|9S}ENi#(?d_8zL(t{opsDl6Y=aI#` z=hifDkwxArxu`z8BKC*VWxhR zj?Bz>kBwI5&OKbZW&PZ^`^wqt-~X0e<8VpSa7nWMj8t)=?oG?5_OgtivJ4uP@$EpZ zZv`8C*>xLuL}6RDyfPgjc!*b)U&mqpLZG58txWd;C$!1t$8na5z%_%6s|PhS4RLSs zT6TU6Yb#HrtFG(552hh~-xzyYYB!~W&2p32L;ovq&OuyJ3nMA(cDL*hvlM1}{3B){ zHX4CKazneT$B(nlM&CnD--iK;Q~uEUu2e0`CjkLuNeEE>2z(nh_=Zvq z$B!4ZE=LsD&5T2}@vqzW2Qd&@CJGJq9|frY42luBz6Ye`3Co1`1<^N?aI9xCL%sRh zZ1LRdCIuhr!lz(aD@zv^7fdNlnUx%Tn{iB$l{{{|%awDU8{6=b8>xy}GLYp@E} zxqt89zZmd(RPJmL6_sy_OCFV!y-G^>l>wPWIkHM%s*-(70cE#(&ls%dHc2768|b_QeFuVDj#+lMjHCZZvwRLA<$no~<(fD^d(?F4~G9j1CAdVN{p_q}#-d zahUbU%sFoWzskT#Q6KATiwqo!<)G*O5qtsTV;;6hc;ObGk>6-f)5JC(+7yx=-Pm!0 zTMg_=EDCu9AeS$FnpL=iR(r=2F1HRAu~as$)3hD8xo|#i@ilq8PTpjgl6$ly(_w9_ zum8U9kyy`o+01wq9<)t4uGlqO3(YZ1znDy`-C=Auw*IVP)G9g( zA#mFCmC>3ALFBgUe63vgpQ3KUI8VX8M8Q7iv;2~2tMtQcCr(A?FEAdiD*mhCbaBvp zfBk6V#z!(26!OhM!{=}Ka!=BR+EhABb?qoAi+gYommza-ptL9J`08ISb8uLzQ$9Pa z{K%aTi$TDXU#0jjIQ&WO^hXZPO@k*N?TZ5Z$JX@8C7S;QJ73f7lVrH03W*BlhTGjA zRGHomcXV5ybH5ZQVonu@6i@dk(dv_9GpC0pNdlzu$T4Vh1OZq@!SAu)FTY+2*&9kzrk zZqUprWr5qp@w|00g({cRk{KFwj)^Fe3c7{WF8vWzsu%JAzh4M?=-~>ca8;Hvn=L| z@ywX+;h!kwrd!uW5u6C-MzAkFh_%d{u0^DFr`-cUwG{&1l)R;Dwfz-=4;P?w)|w=9 zSe`9py#YW@$mtUIaT#Pv4P^V>TCb6Q4nIA0iKLB0*Ya(eh_ZRttn9u9jI&_UYnRG59eR$d z?ck4oTV)@kxs}v(r4cRJ5fwujG($&cc69gGa zK<0-&uSVovkP!Jn;^N5ukDTZR%v^Ei-oKXY;Jkxr?$IH^$1~k&SCpNNnx+Hke+0mC zYN$o7vfbsOj0Dz{Nl>9&_}2e+o zdMs5DdQ?Kz8SN^0q!1~7r3OKt_1NiIypi^_K;7YKY>6pBh1}g@cg8-#Q$PM~6zegm$-KZ=rB9lkPhHQedC1EQ##1Tft?Cb@ zxw73lx!Q4>rMIgFH@yHsm-le$qZt9iQ^bNoE4Qa7m>M7Cejb)|P|5QiVg((|oGBM7&zl2|VjI`(g zo38#s=WXO({W$j&dUHnl%41|iP%VYYm_3&;VLEfhR=Q@wnC&s-D4RS>!vR7It(Cf| zIEfe6!4AD1>i#Q(gb*17RnjF#eURu*Fr*Tmj~U<6cD;MMe~4)Cu{O|~XM1)a;8H$8 zeeA9^mExu17Vsu}lzg^#lzEnPgn7!0(49%&0~l=OgH0YbYi=zl95Z?{PJT6_r4KTt zb5Ah48_POIuktIbJ(RVvvY$rqpEh0H`pmh6M(+}s0bFfeM}Vs>LM|n*=^fZgLb_@M zGt*k6Y$4CphYq=jd?zeUw+iF0OYyAA8^q<_YS^n~2sXs8!%;7KiB#pRlDdwo=Poqj zjdwj8XDVagQe(rtQ)-OBa3jwWXAwWUv3I?UOhar8ua>_y+~#bwM|tP3a&v$-tl2`W z@oZJ>*?%{?0!T9mal{RBHEM`{bN{a);eHWK8#(th|2@9Px(V+N`O}(F#`NiTtJ@ms z|FalM-|GBJG^B4Kq9T1OFMTT~eJj1o!aK*MHy}Z&Vi~TR&|2Z2gPxiF>v6%`z%f*f z^l&A+q|TC*m{@6wfLQm+*pYkO(@3pGd#AiKw{)ulPCyFMh~zvV3E{ zV!Jb~G`Z%t-itZ%yLVQAwT7qW&X|MjkD%#C7-3Yk zcL!>wT_Qcc*EwpH0#8?by?aw86=yU!(^Rww2=ng%qG(WN*>?oOpQfqVYDexen>eyz z;9-Uvo|c+wOvd8U`K~e@j9tdrQc%nXI#x2^K5D(!`sKK<8_k1jblO48=*Ro?hlOD% z(?n7fcqNz~6@$B8g6mrkQzy=eqKQIbghBMwVaG|n)we}Eq+NizpfU<4&bl-fFi|LV zsNafp8TXFi49z?^ZUgXR?Q=ph6-Isfm0i7{jYyjaxO7Z-2v)LJVjuwW{f0j6&z`t0 z&LjyWz49C93^n_YTDs}=vaQECH3$1$M9im@lx&$Mldn|G4q zQE`!Wv3`^Ff}0T;R+owI`(LSpF7gk_oLX;|qY@6d@c7s$iaYdTQ~cbU7_7op4f`u; z#_vCli?ox1kFu7Ie=!xl4>eF)Q#meWYfgT$?A31+a^Q7As&_i@=$<;AJvQVuhG;r77Cgf4R52uUj zX+~~H0G8vF2~IX8MxBRz6;Q-6^0U11JCh?=6esv2Z|$~+oJ&5n(ncyxl=@L2*AfKJ zeS6~zR#3?WM$^$p)vJ8s&N&Y&Wza;hPr^=kj5BT(U7L-fnbo z{X3K_cAjc)G(v{?`frtk>36cy;W7@Ho=X+q<9<0OkqnFQU`#~f)pj>{rG{P|*27&* z1j5vk^(|v#fLjO;_9Tv^&qDUo>%2&;*!n%|nS2n%gajm{DJ2X|=82mA4AfV0)eHN= z3ezfniXYW;u{w2+cTK~d3e|z*0@|k=7a^*%Y)#AOqv|#zi0p*q?ttm^!Emn`@e4I2d9oIf}USr2Arkl~PKC{_B zDl^5?_szL8o54<5w^^x0A6A6N6~)yqC=M7?1Y>R4@t^3vg(Xau))GB=!~qtr1r|r} zygJ?|NW`iERHBV54HIMpYT7HuYSWzbv`s0G2HmGnz_bf+APOPV?xU1w0bi ztM(QM9g|uE_<92MQEObP`YX{bKVK~vvPbn^UfqA7Pv+Rhttx?J1MQo1rCUvzSx>xR zFT=6vU#jB%8^j!5j)7P$I$ADE7u^1)1!dfBb!D1Oy4!l02Y7yJk7nE8xJp%LHa(_2{O@qpA8_maxAr+(~| z(cMs`$D8=p73i=(ag6Exi&Q_i38$wGO$+`uUXDC40{b`oJPbP3=DbTzE%oS^(CVD` zPdZB2#mD0E+Idx(qSErI(zrLG+^IAK!LXL~oVr>ayTO4u%)@A-q7LcQn4XX zCe@Y^hAt!B0D@<0*=I%90;%{q*ZML#&C8SOI?K^Ikf<~EXO97Ia-CKBOx*96fQM>R z$~OvK%)|kXoz$ENPfZkTtDPtNMpGv7q3E0%B8vWCI!FRi702kc0h^xqOP+_;eOjvF z!A$}#X&KzPp=A5WMj6JKFj1x^(Q>TT9fBqyEhdhkh)tYiiiThz`^05B>)0TykQSwI zgr1iYPzfE0!d|KICKn_p3`Rw9eTko|h?7EkkDOCLvm_|DT$!)u>x||N4r!@(N#?yM zvK({|yi;zoa=4NUBEiviAx=C}_ky5QFQtcf-WV>lj%3oC= z4dyzexQSLP;a4FIGcxxlph+lYHV;=T?khgfj~|i?>vgWvFbZ8UPvs^@p+0GfJfmSc zj=QkFvvwJBb8&a!`M&D4`XU236FL(rtiG$ftBgHHz9n%`0I9aBrkAYxw9cg|ok^cr z6*k{x++`?E5pG1-o4XmbUdS|aETkT%?MdGhaPN7rf-=()K~1Z?h1m@qn)_PY3>OKU z(ycqI_Jzp}i^Gm79z3A*G0N47)rv8rRT7)1sV~!?r>D&b%}LBpYfg>7wxdXYm z6N9*exOs?0w1u_PWU?(pR?^!2-TmFURQ(?VU^%r#phck5ExK$1-}bZ!Nk~XeuRlg1 zML9kOiU;*`zI5sOx7)({w~qcf+vYK^Wc-JQ2Tr}y*bPugIZsv*d4lELVJtfGw*OrT|&<;Ox@wzBm#7eDsz^`}+LSbXF@H@iCSt`a^Io9+j# zKs)mt9CuAGcd3nizZ{!5x?z85)rnj~;%jNV#cpwU_0xm5~~oY z*g3GqBFA+H%ydmZZ_-XSxks%=ybjWTrja{IepT`b)nRq9o%w#1(p)3&QLf{snAo*n zlMp-;S?>wO)+@BjY4+N=2r3n7W{Om4{@td#=AvxB`fTcy=FN^i1BpWFxD-W1V|%Gxl?;LXm4-L8KYu z!iL@m*LW{Yan4KYG?6*1N8aNws*(6K7w8ee3zmjhdW$S#4g9&eqa6NQL2)e> zJjl1YI93~Rf9$d5)scz*ymX)6z+(!!@2~*w0r-saNO-)ra}BTCTeB~(2IjiPS{rHr zPxU1fm^|>eLEe%Ix+il7&re>*ROKIF8~m8^5e=2d<`A_32d!eg^uBXn!ru$eb@>@! z?mowGl#tJL3HJa!8qc~t9!FNQ^ChLCi&hFTVg(Quz5*ClW|~Qz#cK(h?L2MHm?dlp zV}~^CAw8N9izP@62PqGgEjlJJVHVV}huSw#i1;mGy^s>m;*1-nnmdE1(l@V751!X9 zgFP;%peM#rO>(->Z&fHuRPFm==+*kU;yvNI@~y(WU`2^)#LDr^xp|dlE4L1xbI@D| zPv*)A-v-~{X+Zo9O$+I)608UO?DYHF%BsZqwFu~RdD%YppHtyh9XR+)zkpxsp){>5q3g%yVJ%y$kkt~Zkmqp>2rG3V2HtG-P)i1u z(9Q$!fI|q^_%6dB0s#g7{b6c&_#;_*!C2eEMK=qJ~KS^`hu}IkspT=#b zg0ejQwZXOoAzb^41F^N2pUGq~nHSrFlmdWC0c6UoLYaUtn!toV7c;e39`Cp`rK=!I zM#y2eyeh@BPgUUUwNw^PS})4ZP@{S{aSMu5M_iIcU#O7NX@+2_kbBWZ^MAf{rtLlZ zrS+)7_(OzoPENfop(3BflXyJO-V=IF@H;Q!rN-cp1%R&NyNV7b5TsIhsS#*VUL5{w zQ|99zjYu>?N?~X0&5ZLR5WG+%W*L#HKha$QK@MtO)0ey^flcU7Q*1yf)j+8VvfVPL z>JXmPw9(J$L?%?doK0gSlFql>Pt;=zXG8^9d+>Pzqmn+#bT9G1Yt{Nm-33V>BZ)Wc zxUA+PMy%X&CDoYRW_{j9Jk&YgNognO9d0LLni*{;D%qP3&f-A3NklgfgIfP%(-7uT zEUcmd=PVU_C6pUs|Xo?S6Zthv;%7%261q zelrF|JDG=>)DL!$#8=_9J`{6`hcpwJDzbiW_~ES`k$lFpyd77hH@eeLDelC@HpLBs zZknwTS|T%Y=b+M6IHM$jzy!}M{&yd?V!MnzbJEbwVeBK6D1=CXV!3I|=183V#dcZY zs%zwDFliBBSCKxJA9x4s{xEuQ#-qls0k;)K;;J zlFHNUb(mf;rOvtKeCAB~C0+wo2OYt^w0}sHWiLXPWc4$pG#Ik)2fy~z7DLNAI&zN z6q)hY0MDO8QSgOr(|BXj8p**rW+BDH4NVuY2&HGCP{KI* zHDkft__`~xOQ23hYX}s{ei!n~f>K5pLu3>M=}n+LNBD}Zn-P73m7U6>*&OxVRU$J* zuYD&woQ3Vmh{7m#NZFZ>1-)&K=-!e=yx+m(PwW>7&-1}2ds489u1ITwvM~n3uUtsg zRtH-vf}rICpWp%hwb-B_$MmGYew3}CEuSkaEJw~L#|Mt=v{XM$G*T?2_iw;XU@-KP zAl&Wm7;H;XhK0^A+lxe&VuPB#KxzlgNL|O!nLt-HVA{; zk%_Qy;Iy;WO@gBeY|y-aVA2!Fh&_w{%*rzB5P~AodcUlN;EC;gqOJbd5R)F|iv|&r zIuX5vU1eHt=jSlL9R`%XN=_Vce#mf*9Fz`qGp~J2;(u55gvLLd@d3qoM>|9_?)p~@ zG&(^X5`@u9!s1{KpPzX0OiZo+1od`KKEn$LSK&Zy`BM;u!ri(B2LX~KLo#xxUt@a-ol!mhwzjQ|>FqSMYi;~Zi?z9Kj3L;;m-ovnh{jO90t9Fkt*H)` zUsQMcq6e0SnrkNe7W{=irwIo&!*-`hp{!J~xw2I!as*Jbtm_7WQ zXPOxPOv%Of%G!ye%VU}k9=V(*%p#k*99hLr$PKA+S3^|s@Z*htZyi;qjV;7=27uSs zl&J_8oujdUzWigD1sR(tvQRn4bZQn}xFXJ$eh%JUq&u9WpW_g@wp*B;u-AJ>OnJvA zC9lwX6NIZ$f_$;xQ|sxKuBxwiAWWPytaEFEP1votlRYxRy7Tz&6wibyNiezCHtL@M_0Mr}CA{craOqfSaeEsq9SN~7Z zNL4+)oKF&Z(z)?bYo7l?>UTRuYv*Srg-v zjQX^Q{(f*FA)?BM8?Ea@`|DkTRzW7ZZEp8B3QtW=P^C6&dcG*{XX3aUQ%=uvFL$$NzOMg8+khz=9RHUIUDj@LxF6lp`V!z|rzpDvY8(Rd4* zGUMtUcRcm^12vIT_CCe^hvTCJeDO192UQ96I^$LC76?r;JRE5`+eU(6TCJb$;m|d$ z`%fi#NF!>Ac&sD?)rEd(!vP{_~RDA8-#+f)rsFP-%4c>(6mbFWQgB^f%2`KP1>oNz`4+51@^(Vr|bSbQSd zo(VX)JDKeKPDzA9gkzr;O&hynre0chNJZ!83dk4}K)ife?5O_HZs>qDX_mjdGVZ?&CObtw4 zmKSbkBMs!6*GZNdt4?$3qku-~ZUne)mJw6_#}_*JlFe66FJ~SN~kGa(v->uC&kI_w;Os!yRi@8!oB3%mD;-K237jLBzzmM zxFgiW7fcSX(l&WnI@c?SH)8i;`Wj>Xy0t29R5PZ1amVIzp!i`z3*;0SHmmB0v<|Mpj7`$4pUr!EzLgibkB_6-XH5^iktNwgVlOFG;9t^F0zc?Qn zI}%3s2`ZVA6s~Ru|zY3dH1*H{Z-zC-6k@ z76-Ka&VoJi$0Xh)bkNE1wc(86?3a?8`wQ&t-YW9FUFcu%m+rr zb6_ebpFZPV#qdhy!Z+_uev?PPlRMUzoK37IDp4QS-2U3wuQT;FmQ-<4#)i6IH83FcJdlXt;BSbC4 z*SZ5`ODlTg?e8~l!49^{thUk)*Aen<4Uu$Cb!{auRtMP?lV^vexhCudHg#3w*S21n z+UY51P3`Wyw-vcA;A!Oqf_bV8)T1SaQPB7~BmITT&0gct!fvvEK54^g4(dTRrRg5p zr>SXQIx$Na;j5iww^SO z_8#SVF7{m^OXsaEo$LKH2M}76dE}JSebpJ-FX*>J=>z}J4XZL(gL=mZoT0m+ejzh- zDjkc^KyWH9=-qQqFd15K33!D{vHuN|;-XscQ`bWYb&~HyR`yU#x;O+_dK%(%;40l* zts>}G{P^*OII{Cm_tr(Yv!;6=$KrvMT8QTi^2`S}cYNHd=d}0r{XWy;G}AYFL_QRk z^&%iiTy6Q9=@wwKcQgdSJ?`CEt-fc~b@-#cTN+~ni3)7Bp-SRuWt;M=wO1)`*JCCB zH8o)mEcoY<@(%MYyhp-oZ-$A;KK7P!z$&wj^1!{4TjDCKj`F^bB|@rr_r2S*=c6yj zcEkZ4#G>zF7Q`080qG_5<~kF_yytt?dCU;#c@q;jPkit(MS$e4m6*Ek_xvWkw{`D2 zw7zy}W5%C3Km~sPikT@E_8`R{n{e60>z8ox;0Xm>54$JV=uN!LuU)(s&1TDqqn5Ua z;J_OCwkl^DTf|tR7OdZ3uf*FL(BgoY5(PEhgrm}D#7pYJE6 zB_6Jdaa(;M*`>p*^L_p+L>=)JLU~%-B2$KSNtxy+_#~{!73RlHNKFl7B|n;_=r# zkUsx%UK#<1?j3RQsjGsGMAVhTCW9l&kWSwA8Xw@#KEz=7r?aPAK+45?a>j2u zf})CBA4F@vLWl)~Xjy`O1xG@R_RS%rG#nSr4hS)PeB|5{yq8~9JVjQCaF;VCv{$ZUf?}y>m{0qK2MJh*=ler?JdEDj^*8ahJMn;Dj9#M1^%3ZC&nCOc`Kd} z(f-D}VM1uDALzNR$G(yx7S7ZkSyO++QzKs!Kwj4`YM!t|&??!0B>j1Vn820^H%Db~uA-+3(8_7Gch*IEdAm4A20&$vX zSZL(o_s9mtX)hU+A@T27W?>+bk5dY$Q@2uqyKe3nTEw9{QY$MnsH!gS9cqR&py)Dl z9B;pQ5V&PRS$~1z(9L-~Ho7HX`&j`ka-@a?G+?fc?44Nbm_QGXf{B@hjgiTnwd78f z!L7!(WM?yU_yvrhuebkcA>tqLt8$XuvJ9YRjm|zxZfO=)IZEF(TMh4#0hqrOoJUs* ziQ1+pe*YF1m1D?Hkd&&=%+afFZS?b0)PD`yjZR}5Xdd10{*Mb|7t*?o(fXOEKjN=0 zYz73?T57G{9NQAW2AUjaNJ-db4sU)4g5dTpPV^K#xhgdIoTwUNW16{LQ(*6odB;uy zye3q-A_I4!hGV&L`?Z-Fq!_iILh4g9qv+-jf84*aIg>!^u&&<>V{uk+R!lvNq3t1u zx~w{`<|C%vrQOkh*E!>%BclqN`;M^GzZC|;j?!385b61*3Q8}Fazr2ENfkDCT-SYG zb~@!QIWBjZQ(QN0PTp=LT&B1GeUmD?o?PA)tdx}#=z z1z>k-?ZOe?B~_W2)H1#j)qJ@Ojde!ih^mm2OemM=%oZQt*C|Lb_&K+7J4R7^YaGE& zoIAK>GQTfL0PDkKEPwB?puXWCC%!lmt0b>mbuGO#b9K1XJJ}qm&t$_x)znRC^g%JP zh8V72kK~Aq(}xC9=@tKQD62sGI3js27C=*fc96&A15}=g+FnG1?t&`JkA%W7?LSQ9eT^EmfNZ)SWeY8^B4!;FA0> z@uhX#m8zS7_Gi*RKDl2m^&Y6riRejl3obJ+?Pw|9C^NHQ9`ZaffG-W61ap_+o;W+a zRGA_l$A|aiyTUF3ac?o6AB&lONW>)@(F~4MD-qTlFU2ti4P`5#I-ERZp~iL>r9BIJ zOE_(dUs9M;2MurRZ$#UF?vmXxY1B;$iAH2)79QzYf0OYJ!uT?DCe-PPBs0>VH6B+Z zn9Q&z=fz6r*CxscgjD!+O1|54 z6zT;}i@tDUL8l1gUuY>%!ltnB+4!IQ|F_1U8!EBcOgu=B(v&G|un22bpcv~rs&CM8 zS$_=$Zz7BZk+bkUVKd}csm)n^3t|9; z<$(dggxdc45sbq5*{^j zlgg#NWxtUrUkY3b-s^sP&40sI zzG=U0haFe*A#!cIXcszcc!eo}XV~+Ypn8ZE3@Z0K7k?A?SA5AV0tt*}$_&0Dy_wjK zU7PqiBnS|)8Q&*GJqNt%$02#a*U{f4QfO^dz>J&wATNSub;ImuACAz+uUR|WJJ)`> zCS(S%a6A%5BaDWW;E#p~Po&i=q_mH&iH*LTcr$KJs4AC&D`r?GfKDH&gz9F!hyDbp<=y#T|+j_u}pj1&X`7yIXN9 z#ogVC+riz7`@!8gI2_#d%u&dwOxUY6U#L1w`ko-If=X4 za9E35#;p_G1Ko0B_?VJ~AsZbqphSJlMu!Mcl48VJ^|R7Cr72UR&9vLt9n3v3=HGv| z>X@$og5o;z|Eke%p({;WEFJd*7EAv9gv)-jB zN~t5N)dw=tFh(E~=mQyt)avw62mp)#gKR^Q(dKyL(BlkL(3nc$sYcOW(54ycW&ra| zO%I7#*D1L9;Evw*>tKOe zCg8_d?*}|6ZvApZsv1 zE)cfF{D2?b(C-gCKF|}61eLIdwlG4Uro-G8Lr;j}>>Ixj9zzu8Zk=6Uw{2g{jGx(x zGA2G4%ei^W2Jm)MRKVe_`qX2_)0x}UJ1g(;XH@TTZw^+`Fx;K~U5`OGK%^vvx$FNX;P%yme;{viCB z+jH@TcNL0j`gFVK5%^t-{Q7m{!a0%L*zbqqZ=7j@?T}d-Yb=h!ULM(SXNB)W+A`5| zDd!2-`G=hiKif$A&*CfUrBOA<>t{)+e5s1g|Z$$)}b#Z5pLR+ zQpaCe|25@#SdcNC1G}HPYk(=L#lnyaOlaqB# zP7Om%L&-PH6NLGl_3G@_jqkheO*V0>P)ON3>X`lX6rtP^xSJFMmnY#nf<@iM-669c z3m(Q{t{q|X{&##F**hti{gqOfb3eaH%PRKc^V!SkFvT}?NgMfujJW1Lz+ZKqG$uAi zY&&Ix7ckad5bL_|O*|I{F1tmMYq;!PR?XZ!bo6zI;%_?ncD3iIGK_&;-+liZTr< zjTGY|A`bp{Oq3PN_j8P%_P=BUg?*1S|0Vg~^Jt`5v9KcZw+sI}VWa>Tu?;G22mi!OHL=zesEHPuOL8VJ&WSEdofl2@E>6QHd+58{gpK_&Gv5-GGiL+vf zVu|&BvgaC9f>%Wj(TmLgtAq>_WJ06$NtOLSg8m~KLbWGJ{deIO62qmyNJT`H;{9vW zVa>{SmTPgq>>uH^XN^Fl4XM>yU)l?PcMkt>wV@XZrYXjBm9s$H}5ZXhhT@Y zFtJ-G`-=|C%eS)-cAuI^G1M!*~>uUqeDQy_Fu-^23B!N z^rK(#Velbze8VfB$x})$doDX5QOh6i>P0%2ThLMTruoqREXVatXZP9gsuR=}sgZr9 zc-?vEh357?IX6WY@usLyBUb~Rb$Hf8-o@3GDwtSZddiUKP(%R8)PPJmc4&{{^QBZu zAeN6K|G)-7)6IRby)i52)xgoNPGw2{kmYz;DZatZ2TR%ROl0G%u1-}9jDbmi{I&*6*Wl=mQh5jEYp z=ZANeee?bj@gV%8^TT!fZTPPj(bHU=SRc#TTdV- z@a2MfGk}@({iV~)KahCdfQ)awqD9_=xsg3fpax8FBb}39 zCNom8sqf14=S2l|cldb}QW;_=KcC-W{|Lyd`TG3g_t3w}1NeGv9m3M9xkqneSmf6C z)3B}vw;1iQ`?m`5u$h3k6y3f2ZHj2WX(ra=nD73`%?K5Urd}FCoC{AeK^~E&e$iMrVUKUbAlrO6 zzpieBJ+J5#XDriw;>bF-ToVA6@_(D+7Jpn)b5qKUPlMTt=!$-`h)+PnWiwZ$t_9xg zi@9(YX?Bx8P;4mi6+Mk97bT5Jx9K|^Ry}HTW7I^=CS{9VH(xaZUxmaUIgIId8iL{A*ANUz&q#ekF5!17 zgB`!-QVEDHi#_VDE_wB}NVz6l67^LEUtpkqHK2}mo=|~Tk3BQC_c&Xtb1b|Nj(n4{ zJt*vlH;C>EM|WBuN7RBC>b+sCqpzcX%;gc1&(!F%#Gn8@K?P4GlrXZ&RC@`rYUEy! z`X+GXL^!)nqW5VcD5@xK{(UVEM)Q0}Hb&d&QJI=Te@}f5$i0-e@d>WQ*DN znE&vQa!t9^?-N=b_X+M%^o%{-Qhba%6#n~a)R#8>FO5(=RxX)%|07QS(?A`y%$&}^ z+X=_RB{3`^dsi2#71W#}|AS5pp_#^u?EVjj;3BbYTK5?GzBmAW3sUBa@pkz6iqUCZ zHnWuG<3DS|{yM4anR|%#RY)?EtIvJ*GCTqAQeevCYqwN!PH;{9!GG3^y6jzg6+}?P zE6%B?`d|IC)D9tcbc@=gJTo{hl-}ufI)eSo4h8$RFj3>*`F7xn@V-Yoqxg<1Q{G-0 zutEsgMl>1ePUV@IV7wdr_>g|(ue^fok&qmmQt`M%z*V8k}r@4g7 z=YwSn&m2=huq*wz#meP#(=w-L4oEdv5wjUXNY?XrZO$c3A8MZ_eB9S=nMc?i>pu2i z!C*&pgYTb4y#Hte6}*ICO1Og%OD7p!BXlowhJEWnh(UrA7ftbfh`wI$SUEaFdak&p zAi3sK?BxJcg1>gC`iT3a zY!r8ubj8x&F+1t4m*xyOJNJarGZK{M%y>Hi-c$1|SNU~>>1Q^VYE|am^T*B<-)wYWY`)0^WyZh2Ak=NO|2aNuKbq0}``#2M*Vtwck9EpM-Zo z^4<<=(w#nmwn{F+utIT{;Wx-tA29 zc?jN#yM9=z$!BPBAi@Bb+Y+9;yg0x2IDCSSXme9*Ch6tF!&4{iWBIA$*qC8WspIj< zx3Spa`CWETnY>tJaZM8eoxZ8CdvWwDOweN)lfNEE^s;ThV-MOiHK*n` zNvZ0VVax`u7Zc0Cn9a}5d_h4U7X807f2UGOR>a_Aiqp#eKCi+$aFq{d6C1%G%r z<|8A{AH%T~I2W;OD=AFqm+hLL-1BrbmPaX=O4znj!lgKnyKRcQ`Z4`*8)i3WdBlTP z(H|Egnc(ogev;l@&WJ=xx#rD$Pb?evAsmKwN4XOe_v)z%noeT8s4|@!_7}u3JT?_( z3$RHX{7gQb;ORngC(YXvD%4`bZT1lNSd>kI=@{$h8pX$**7ZF6CY+M*8uT87EWG&S z=5&;8CH?NmLUM(kfyTZT(EmD=XP4BZznYXjnLC?nlJSYkQ9H9b)u@nKNYn@5{MMj> zc2L}~Dn22V*7z}OV_kA>dtvHcZ|(j}$DlEzcU3px zU|390vp8~@ZYDS7+4!*d2t^RDiwL$t2`7j%XD$BFi!)v>OQ>)gGY&YKF z1k>DKGq18nzq&=U6M$Y}E};*slwXSPWp7ib=Iif6?_tlfXBCX&dinr4`8kUU{e5^~ zWEr3B90t!)oVyQo{;-)h4gR>9t_WWJ#mzrU0((vT+f5$%$8%1bR!iHx$P;%Lg&#|- zAo^9fRZ?%@;T!8J3K&*run{+@K`V8lkpZR~NNyMI46s<-MjjA z?TPOtkn}lB6jIYLvV{1=7v-CChu*C~VdyimbbRTC#=-rfDSz#l>5gaT6@AYS_P`y- z1|u@K8kQ#12=1N!0za%?B#=zQ{yp@6NZ6VixgfsoknmTe?kP>!VjsP*M(DXIwTIz? zRY5l1KAg7RTJZmu-mJYAohm%z|EDqzz7nYsX8Jwmp8Mzy!yZB6$=)8@=A-NWq3l5n z9QJr00C>ZFrEzFNta`*wU3)VT4Z~)uCTmdnwKBcmU?ocZ@7$y5BA`zYv8*n^Nv!Wg zZ3MZVmo>jUNIortwjE*lx=BPklIs(P&N+FA>URB_s!zX`ywK8ihJ7>0XC~+)XaVF5 z=IIdLG{_$LS*HgmXhx&@WwdRQua#49!y>)+k>~7NuW@8rz(Sf@dIV&X=3%ZdF^;@v zv5S_TIg{*a5bc=GDA)j+?Xb);87oHmGpYuElOYJ6q@|29vW%+zxtv~*gUVkStR25k z`;32dNO9bkjpfIBCYeC>1CX33mz=CR_Kk`vt=3i!``XK7DUg@eymXp=2w1?Ny%ynk ziorj@gkTd%tO@5%SRSG;J#+JMF&8=y&zqrAk4(8nTV)2OrEEwaTU5$KT1ikDimQaV zmHh~hCDD8x4K%0K@z+~W!oOBMaUPuNl#gI%4gz*QCioqj#_<|}?E5w!{l8~>spK?f z-YaKLW1BX4$Yhc-%FM*}p44E{YG&pFaB2#QJ4d^D*ln*Ksfw^dRKrVs#BN=gV$qaIXV5^lw^!YtyWS8 ztSFU6Nh8ZPCpkSP8`@{TT6W273c}yWMadky+142H&eg4?;ys9rREV}t>hMvjDl!MG z>J#X$!6E-t>d1>HL~9EA-0C>QbwmBKap;`)rSLd`B;rRBbXe`;yv09%Mo2DG6Jo=P zv$h564|uJNRKv%JRyrxPrmHMY&K_mNgZ@e6%S!RuEE^EsRkhkM;4~K>wHO)ltqs-S zi;BsNBA!+Lq%p1xEQ#MG;UZW(K){zNDLInQ?tp6TM%#{5Nl{^gI6T)bVjFSGcWRaU z>SBFviXi{0hvj_0IR9-&P8Z+@Wx2=QA|HAyFx?>crdx3fN8 zo4Y{9FUDu|F5{$_g`$5NRIfMFwE>L-kM723kTEJn$`u@A$oR!+61#@agif4T>f!n4~cP_kd25?25gdA)hzuY1RZs<(H(B zkdb@ug0S{l^sm|;+A}^KMr3+wvM!3BG6D4E{U;rT&j``vLoKa7-B&%(=O2MxAdiU8RMWR zy9x?V!aw&CEgB2`M_a#0rYxerrk+wB{IRs>;2zn_M>0p@znr^|_%(0vOX(Q){IT`S z@gF2brd7A8!)rr(TG*JJ%0{y2mmq*t#{(%vwIm7jkikJ>GQUgxwWn=Kl#P(_${hF0Z1v zn`286f>}~s@?bhLpJat0HHN3FZ{x zOm@IsJ2w2cE1A zr+m4ZK8z-zIW>cVDv-LVQjI>P&Ze9Vwb)i!r<{=ZD)cA1Pn ziI7cJMIpXwLJ)%2W0dOZUDAboR4rrnGz3T;X(`l~DdTl18k_ygL31Cujf#QW$T2c5>HQ}KB9^ion$oiP)Wu)r0xhCru&dD@ zqu4-Ou6~(;Gm7Bn#IM~iW9>yz(p6mHcjTVI%l$WHD>?Jk1{FA^5j@X>sL6|ko#sWw zT4W(lT~N+&V#DYHCGM=1)+itlIW{I%8SKQFPbYCR2P_*4f4`y`x8YeO`Y z$eD?Fn`)!Y3)Z(CJt{5Fx7rOiWc2 z5eHo+dj5QI46jVGu{47H2CeroK<+G(h{LY^86f7%9MOag);+>`W*r7ShpiZVrBAw_ z4r;?$5@z|Se1!g=`xY@L-SmFiju|nhfNwUk3NsV8$Mw%?UTu_*XX}D@Gp;R>P>ZvI zv)ur{XqVDdiE&j>w{hO90#K^b8ld77;k@%qEr;1m)l`W^x1yDw{INUk^wz&ck6GN3 zRP&*!&F&0b3HQ2J)aIwG$;9x`J3_U2C5*TLg{{V|Oy^8w*{Y!~{kv0H`|+UD=Jq7U zNnJdzS6ZcbD?x5B`6N&D-M?Ei!GQ>Lsvj6zK(-MXEXbZuz{eys`nI6x&wc;7w;B+t z#OonmqJCVYnE&uP!V$K#q_pN$6qFDon)SPW^H1}=Z&}TZYfYLweZ6dU?z)&OJ7DWx zwU&#n$o!?;Zn_*_P-#Mc{35rAJ@>%RE}ka;fHrBoLH#frYRr7q^G;3B7Vr+$m$zQ9 zmE<^cG1_wUu%~;=U!5iPAod@+i--1P|A06r!JUJ+w{A%wfCbzE+cf9sHK`LzvCRsQ zVL!6jFRoOl(yx-3yCXYQ`~*@2$_Yir%*W}>Ua@1!p{!v`>vZF1#t;Umm>47q`7@2* z+6EYyyy?HtE^2R4`=)+ykzK2Y;%JsAM~6(Yvr0d1^~sY{HP1Ntw6gp1_;UD?`|_<5 zbbsw8JV!zccI2r~+OblIO}fD18w5g^PL%($A>2UVS5;!$$k?BdKf>XO$|(jSS~fK- zu58=6H}kFK=mUu@^IZS&9ud04@(gDuDa5$~+m7^{t9h674=Spch(HJJrW?-Zo*o6d z=NfHp)yM7UY968dP~X_Hi9MS6r}U2&JI2>j9D+K48;{0vT5G!wt{$x&8(q^)?e-2~ zYy0jU!yBs0{n`(mKBHYH_hD~eLsOF9FO3e z%gz82T6Q;#Xzwz39Oyy;=`3R!Hdo9P5%OH{bEteO8|H+LIQSWX{-qnozfh@Fl3eF` zce35RXRaY#(sG2BLEqXYGk{;tHk}SS>a%v%YheL_mz2w)cTW1PK)mfmu&<`8yUUtG zY!=!n*g;4ofA08OL&6rbV_2MyHo;T4KMZnASOD9HYw6mEjN^6evAJ@`7BO;^u_6z9 zcFX{4Tr^DK&9&ZQ&=hkgipE(!HO}mDMhLPumob*1v#}e6 ziJ_M-)uLt+e0-U{WmdDTVw2@^1K8_cv}4o2qNb7H%q>3uRl;Zda%=63=-$DH`B)ds zHS4jg@806KE@Yn9G_{s?R!OnNz97c`Tyj-4vo-pM>(vKuQ?uT0GqwfqWJ`%8?3s5S zwl5BU6XzX#{XV-T<`(*#Q!6j|E%;e@{vqFAOlbZhD}=aMc1_>z>89XkZ#DM6{+MIBe6v2VmOkh@m@ebsOWe%~^_rj@3;AR1QTOxeDYL zZQ HHoj8TCT3R8~p8GHNnwZJ0rIZ(>F<5#m?`WChk~WC8BH8`An1l_i#CN!9o&D zbJEu?IGJlbg;y__eD-ut?t{>g(^Cm=M_x66DRf;!d8IQ45ZGwCj<+Be9?o=R{4*hU zvQ{dzq*CMkPWX)zZi0Y}HyM3aly_)Z$^X*nFVb4ykD%gEs^GEs8f3088&Pi>!b}Xn zRFhLy9GwUxM)n^jc+Fyt5N56}8OaZKA<5eVxLZGWaJpYKIwJvP>tsEF8eZu7J96yFqtyl`L&w)zaIe;P_htZ>llx1 zc1@1G9@`L}W{kh7({+fB-niV%ZF+`9b4ICQ1hwfj154BdM#DUSrE>DOb%yePpJi0n z%o+6tojemD$KgBxwHmQH(-*JVcq2QGBSyDolpANko!X8$@Xl@|9UBR9hIUqkKxNK-Jo;r7?-eA|XeVJT1YAeMkPUBZ zL36iBfh*o|{!Fzjo@BTS%nIlI&TjHO4lH7vokQu{1)~`f)vR2(x3)aw!vvTyE`?)d zbMz>4PR3}TZ?_*}Zdv{l>GUtD>7{v~MU_a`L>Hp+tQx za)$ECE9{;dq#DnSYHQsCYFlNh>5Rg35< zDN#I=6!#v_9@ZYip1>Xk=9{K;dJhTP$iSCHTL?4_#f*46DdM6pD$uln$)a?-Z-{Ec zFJIw(2Npl$<(lV6*)i=8ejJE7CBvrvNF9D5&`MdZ4*YEm%O!#jYR1rIorZ>pLg5h8 ziQf_iM9K0OvFGz6^JY%xNsSx_--2Wc z5YvRG4VC+yEJ8=ddF2Op_ox+QJc2cH`?&n4VfPIIC=Xw4**h3tEC*lx8=!DYw3J1v zVcClkKN(*Y502OW3a|@$A%!D_BMT#y#8@+xzEc$S##r-9@FZGN5`6d;Q0gG=$IuKS1%XWGs71#>TP+4D%ZqP2j1T(P8r)<|W^`t- zX71~`u{UCd>C-oiKun^;u-KcQj#AnDdOfi}=bfvMcrMhH-&|Z?++WbHy)y;0e6)Tf ze=zJk?YvTcsFN~-AJAZ(x?$L$Rv;7w@}~D-5Fy0_6MxXwk}C;jGw1t~8bK5WTK~%k z;J=-JGyf2_kAFCBi*CF8 zWoG%>ik=Jw)f@d+A{}Z1?HgoN5aw_5UKt~lgLa6_eOS-R?OzijuGR*+kt=qb7WLS^ zO#O00&iqa|7Il8TQ2&B@#eP)tVhBh)dmfJ(DPHPbnq10U>Rh(RnyP3bbUou*{atcJ z7(M~rUOoTp(yM`7Jf@m1epT~>b!rwuYgMX&W17=MH4W-k*eY|JX*uS<-FP_^@gG4(IZr5$oaFsKjE-#bA+S5j*W&TG8%()Dh-l8zJ?1j z)cj}fE2AluRyU0Q)QuMJ7v7qdXwD(;ZK_MJC=DcmE#y7>FioVZCPUDmUyX3@;DMd1GG z0PDcn0_Bn6^ZRAua|I5_w$X2J<6HQw@6!(_q7JZcbaov=(#};q#Z1LO@LvADR^;(% zsbLkOZoO4x?0b-27p+yXDQT}0I3liAGN7N}0yJP&c6KdU(q!w*8?2O{Q>z?(cXu~wGfq7MPWVI>yz&m|K&^U!+ zDY$Egb!Yu@`LgxWvoE6GtC|^2Gvu_-Vi`pd=K3}I^6awnvd_uSQ`^(LmF!olspp21 zzo{pZlddP6lZPjBE$2Gy8N3HXNB?kOC%F7_P;b6 zbsLM9QlVURM-3A;^yamtWwyw0D>VKCk=yc!o2L|gdG8FGOEvccwk6j;ME8QwJr?qb z+37UL;1DvtWw)mfg@^mG#-VyBK;0E-!^8u!Ho7*kc4#56ljBUmxwNCWoOzpZH?q8CxwN{4zWmjQ<4NX?w~Rv+zWMlZ=I7_Dw$OAh zM8b~nRji1$gtds(Br=?1NGhQOoi+HYs0+|Hz|w+ki(Oo1E}Vm$e7}TaA5ujIowYb! zWdN&sTt~3dV0*dpeu0x9 zU1W;2pXHBfb^dPeHGJCMw%k0lF4*e}p8+HcaY z(l67WiFe9d&2G27jBv(g2WlO!U2^*8v^?E^1%27LLpU52Iv!fNmxL>B+Rl2j<8qA> zy)q;~@jxFvI8;X{8-vM#F$(2>cS-4aj?WtgQzue&5vu>v`P z+)SA|R(9-6c{)-zgRNt&izA0PqgBp@&JA@P)eEf~tyv%;P_K8`l~mWLXY|mL=d9np%1){ad<@ysXq=y2d<^Lf%Bj-iF(4f`-$c z&3>mfMfM$bPGOFC&VP<$C@`mOHcaF?8 z;<(kTvu#~qMbEyPJ!=BBVTq89dZJ;{b>e0MiGHobJqt&1!`{Hgz}mpnz;e^*9FRJx zzo30!f1rQRr3%bMOGnd;VS7+ZNmoo)oI2vpV>je57&LKqzSSisw9J%ckWXz(eM%Ki)R=nn?fgQ!I{w1!n3{QzPB%@D+B3mqGRT4mbQKz!krokHkVGF20Wwq4KqJPlV1t0;c7Ag1Iv^BLfh4mcl zTil)2e+pH&MY+)r=PU}QXaZiU!gz?Ah->KTa_e*VJ$qMm7tFy8brnomo2jf#LtK5S z<8}YI#t7?U>QdCs8)8~g)~MDh?A=nSP9U;I4{E(QSE_lfkybF&DVtZCc~z0@Z|sol zqwHnv>FsSf(B_(f&Ap4(r}g#?-~un>HCuio{)w(%=ap;Tzp3p??GHK7*4@$JKi1t- zrkPY|-8~QP<$Rh2YwDK`teC6q8seJLT$nR`=I;^i5q-=Pt0Cw(dd|LeM0qE^8h4Q3 z=Ig9ju4bLNg4so;;F#)LR?-xC%?-^B$KQ<4vDcQ?xL3Jtg=)J9T8UaaDoWb{731Ag z{_!Am(Q6&>@Na6c<=G*p;)4IE|KO&f+YA|hsD}Rc?zcpW@q|LJs#sqmbSI8U?_nOJ zjL_JKSm||~ed6-MG38ShC+6grV$`gD5WJctuSu*nVHOdi9+iD8%zDcnXx9pc$r<1T zaBo~1BqUlFb)M8)I$Pdb?gLv))TPYv6ZsInoDTCyrP0L}#)4v1V;f|eL4OLDW)961 zy4d@9`!Y@$oMK&Kol~va_2)K$S3s9TvrluD!iJgF83!+s0<*QKiRubY=R* zRK2yYccOQ!2huBj`T|4&0=>f;q5#pZ1jqJ=O!YIsE+7=JA4q(NaCqF#R^JSC1xB$W zO55@>)5GOx=A`MHY#?fSzu9S8_{Gt$r5x%V(G{gfj;nuajekm)Gjou+U9qioTv2YT z9^01Ou8}slU*XaES`9y(QB7rDWm|14G%Q}t1=K#*8Fa3)6`V4r&_-Oc&}^~oux+s= zc_3+JHnw*o`bAX7p?s=0tVgYH$svErchSDr-gk5CF&>&?markVp&dDx0|ILDuZy40 zoI9VhJ^Xz5?vc{5Xz#REwzW4;Nus(VI?E!Lj=Un3f!;^pg(7EPIb@#!- z@;dk*>NeH(w~8R2%Fy$ib3wP#^_PvX6YmYlofxO70l#&>F+YvM6fP&G_@15f{Y~+- zj-`!5<4nVy87THL>|FF*J;gvvtRq${&6?5yc7yV(WFIQmxIZu0xi*S9!$n@`=b%p! z-v;-c%btj9s0UbgGODzFr{K2aQ3{NW#k|F+T7Y2$ z^=*s?T60zi@Xd#V1q&?FQ~eBez>8h*8t zLXYM>>Yas`YY9;`@6lup?*Zd=?0sw{?Op1EnkTh4wO3_llR?nZ!JQ}6=JKzgv>LX~ z$<1#UkdBoe5gs8PO)VR133^1yKHLi$J}sUt-X7i$^P!zDU16P)UBg|aotCA%7Fa&a zMXyedH7!%azTSpI$UTn1%uAZv1p5Ra`m30SlozU(3O_fYq^X^oXVLCp$7aV^$6UvJ z$GT?7s`6XLs`6gXQDX3SNok%@+G-j4h_EW<2yQgDr?H zW5c+M;ETqKY{%_bcuTW-qd~Y;Tya*zYm{F@_obf!_ZduR31+ zCpE7mU)kT}=LFx|-k;cI-lAUZ-}m3u-<#g=R=uZtW_nh(z+2~^Mbba^j z72u6J(DL>0E$y))qvr3M=I=Kx`4&vBF*O;G5(+^BzH133{*5hvDUkz%4dLWcr|;Vu zPjwx@*(5O+8bBUUUZ-&?t<%MY4&|<6IoKgZ=ubGa6YHE0HWb!2>t3jX*^eJwY6Iu8n}M z?zCrqT^hxqtAgQgYukz6#OV-(A_jV-I9gu_v2ihJcIYL${gI>5lHsV942spw{_-i| zuuRe*^FSKJ=KnB}ur_ixijA9Tg+745Squ*xbPq7HHZr$&GmfT78fjWa!@a1p>?j&2sJ4bXxx}r|?SX%al#n{fmpo&(k7hZ6;bou7Xz{L4z(kfPE!N zo{*7HGO|AMI#M8|``@Dn`_EGcQ#02h{(8ZB{(5E?7MJfX0AuAnvpwYDn*`Z}I_XY~ zslgqYIbsf0bX5gVuBh#Ov*5oDxk{f&gg{qi%)pF!+ApVDQ_}H~v*q zSyTE_Kx|b+Rg`_W+fcj7nn{O=PbsAdoJp9eU>#=NT%BuORNZ)8rL%I=iu>RkD5F>X z>de!ilf{6Lgr5nQP=@8MuAnzO?fRU$d8J@i8wJU$scJ(A9?yKC% z*e%da*Ui-})Xmz>yGgV~yhXHyGZ}HAfpQgeMR8?%m2|}(s(*D83UwuWMSt}koQ<0~ zBD44X8dE%CCc-(QeqdnWyQEHb#$^r$<1M|6Ngx+! zWH^0RfjC6x<#ldVqJSxWbgI+iMyT7eR{=;0;7ny^8qhxI;` zep%;~5h0p)iAIG4Rq*e&Kw-XtY?p;K2nDlYAZtZ_m(c{3faxHjr7mlb{0jA+(z($S zr$gLV!#8z}c zk?{m{BGP3MaP_41vG!cKba78!8v1AEOTqH_of2D~oEQjYMY?J5{$?a>#&l279L#7Y zyF#)|e$V9-2e!OG5?kRH4RyjK_!^4T0{-zck4I0tJCL&XVj~#XTuZjD9&FU z*;dnz3!jKwcY@EvS8LCv&#Jq2Z+M>(RzA?~cy~SI+I!KYdj`BDxLrem(?; zS6Q2^e!YUT?w@rTG`FUhLmWJujhr2d5LWEqwoh#3t>OYJJ8Q0XHd+qeSc9#>DiW*h zjfPU(#critQ0cF>5m75N?i$)B$168>x z)3^b9X*pjJ-#~=jsNE7AZdIDVHK-+Ms&U5zzN85Jvz&fR>m85>%`ly^nKtzg-%JmIi2Eoec`92g!9e-n0E65KIy9G z3CFnAni$5>ErP74j@};LbG3)~O{KVs;RV&(HS%Etg@-E-zuOn`w|1!>##!d5M{(y% z*SDp?c@T>y3YnP6G&}zjf$2!C=d@T4H`9Uh()NSIMN=te53l=f{-?umXY%N%A&KIp zTB%p;q32kXz`1$9I-ek`84`&Y=Tpj5)(myq>!*jlRR(rL4I2@`cpk?+=X((d$x}>= zHz2eg@g3PT#SYOSI|s;_5+&*n(vzGpi@4Q3Zdj&hRkhG~E!_{8D;Ocm|A zMRMNsMEWk;J6AYuo}Ra8^ryb)1CawuKyF*^3=Tu-n_H|i4m-sLmLCIXZ>;b|K+j9 zKXs7Zn3COqNN)^-CenT!T!c()!DaO1jdzX=A=Q&M)QGK8|5&9)qzHjz`Unvvd!Z`o zf}DLPE%L)Szw?o@`ij47HuBUTej6duN+_$SQ`ra-Ace96>h#u_WVBSAQyV`%*x5VC`2vI9#tC)+rY*k})v%;k9 zYLW{~sAc}J^>Gh+IIV`WKa6!hv+6kyKx<5s!Kay{TFFe_LzqgDZQYr5~=N*c;EJ0f55kq?ovD7Fqi>W5Evey$G{El~WHie*t%>oaz1cZ})(}A1~#NSvCynneWzrJusuOmvt$1 zSarIUoUDCn@(_|GG0~R8IGg?QYxUL&yS`baabX|%aigupbckHA6LS!=689T0rns5% zICcj2Xhy%~;mFln(|`|96%=3E^AH<5e8hgNvf0<9xqDwNSzN!)J-by z=3ZL%AeYQ^a#CG>(a@srd@z^vdN4a?O) zVm0bn$`X)X-esF3U-9vmxs|=w{hP60y|wXYu%jL2X<!Qy{PkFgH21@+`71* z2>(VAf1R*`F*DdJ%g3t6=hA^2?5g@vpy*|&LZWlsnj^Q$E=$%TJnG+!l_lSZrr@=$ z?;@Oi2IS=D_l!>TBHt>KmtKLaz+91tGO1JAU|@<8<^+aM4WpV)f!S6BY_cd#GNc7$ zFGK1B0$vXTOHA9>{3iG>QO;Z9{{<7kO^ zrASot_QaeY`A?uuA?JSbfkmhVY2qElK=MK;IWJ%0P8m+A_@0b6GOi5)K1a7ztU&0|KvAtU1!U%jUXFs%cZj+bTZ`n zxe)C{2%Q9>b0D^P@W+z+lG4IAOZrXld6GT~@=E4G@96Xh=}W5DHNP(9hxJ|8!3ZfI zCh2=nUgVF_=@asIY-?=mQt2r^wFm#t`wM;+ZyTLW0gwE~R7m-WgTsx!V9NI!9FqO| z8NV(S95`#{{il9i%18SrzL~8{3!m$ok?%Fw|KdK^HNR_+k41GwS{^6WC!d*HeE!i+ z&5iSlPVc0AEZbpQ51rj1U6hJvJ7>E{a&5b8yGrtOO^;C=nsZ$K=&xP0r_*U3(0O#8 z$3B+A;Q3}RwND@&L1%kNKJ7JjFrRE^b3Hx08 zd^+!QZT|6jWnXSzMf1(RRy+4&@3wCz+mi~^URq;OKT(_QyXpLo{Rrhv`on(A4jr6| zubuXx^FMU@$B{+4$dTj7cN95BJH~0Jd&vG#fS%+&fp*TvVIzG^r+i3G$rjRCA3Evd zSm0=-{z{$ip>=|z!?B6_$+5+;mEvn>dmM-8R1f)pr2kX8nNIoOj1L8#AK4j_Z94Nq z{hm7aLvluEf2bd69O(=Y-i>#bQyz5whjh8KPCNhe>9aqnlRwT4)W6P7XBYGsZ@+V! za~J;ZbsivjNS*wlGe6GD&Z{Z;As?Ll50~glcV*I|GO~%&+Wp{e}Y7J(6Y4{$B;CVYkN-$4Ht&<4&v z)JH%nd__+@3Y>yCuL8e~m_fn;q!Mev{|)edaBc&=7&r-erh(rK&PC9F1A4(ZhTQP= z?EX!JpF9Wtdq4#x?niulyIhQe|KFhTRX`E>3r~U`g7}{xPpzzO;GKwroLL>-PDY7Z zonv_WF3^ZNqX#5>IZ*m0;-GF~F*w6O&qe%R&_4v-2W&zdANYrWHQ@Xe^j^>yaZyV< z6L<=!B8N8-XDaBEpq~Y1AP&A6F5d^v&wyFrqtC_DK(s>mPtYF&Ar-<2(0b6{MQ=b( zgyG<0EW~Nx{1*5W;yj8TLobQih=`yy`Md`)e}S4{#Km6$?*a#MD`Iqne*jIWKT4GF zHFaqX@Jm3g_s$~KPk@kK34JTQ1%ymWs0l8Sqi;JCqNUO@AnG7B66*Y*A#d_{pbQSa zASpY5LxH$yLi!`>ZYzICM+yVS5a)TtW2B0>wX3(f7 z`w!sP5mTd~TLv)e8QR4#%h-cJ^cZ^t2uWtg7=3a3UEC1xSJ3A{_kex}^f}Of0sRc< z)wt^*pJ?GW@K=I=5%?iE?}F|jdSDCD122L9XYkK}J_`D6&@X`A2>N}{r$7&Y?njAP zC~-LW&x5}n^cv95fPM?~LC{A){{i%Q&>w*Q1o=D#`V{!@p`M@zK#S0pVj21zk|{s} z1?U0sZqT^DL4dXqwE1fRVm1k5LEj4ce}K@R!UiD5T|5BP^d#~THO)2=cpj)(3f%M{ zY@=IAga+V;gyOw;YXS!&E1>4WKD_-h-eRr^e+Qihz7_OiNOc8oA)mr-pd0*mKtsZX z_klS`n+qE9CO`)Y9l+O;y+LVVg9OxHco7%@Le2#X;$SWaXqN~*AYy)r&{~2vXEl4F zNj|;-Ec_lE%r#*<=t3aozW6$%8n#k+8JtA2e~7PHWZcFfVFaa{!GS#$G0P15*uFX$BDbR>HVT&`z>Oqg_%t5PD349LkS@hE|uf-l#{;^Fc%AMaYAM zv6eJlik_4-yR4xpiv$lrz8&;OKzLBnNW}aAs96(eO9`b5=v#3G;eeJu{5SC= z-oi$R{g52=gSZ8yJcO9A)S_lbAh!~HJP8_G8Uxho0~wb70@QQ@JQ)!;sz^mB3*#ul z%1f~FQY#P|Tf|%w;d2U_-G_}3mmtm^Kv+f*qbx&~WXPVJ4%DnXX1WCFm7sYf^tqT1 z%mfEoPSi#amQ;f8B*K!4&{Vtu8<0AvVBzr*|#&mR};pljG=0B0&NP2NEL z1H2hH6?h*cq#ivvOr8KVaQrf@Ui-gJJtL}g=Zo|Vp{Vlp0Z07@obzvl;*SXByTN%k z$>|2C8}vrd8$q{%ZUub-H2F?HO3S6RgreI25T~!|yxgBZM<|t{#Ct&RClv1i=lj&> z@&kn8_rdu!YBCL&ftvhU{{qF)+i9&tIg8DdgB(OY%}BKZ@&6t1A4DlH;q8NX`ykrY zg16rW{fZWoTEyv%pj$z=g1!Kn+BJi;rwNTB}nXcXk8&6VEnw{Uk_H@#DY$@TNcAXi zdXht<_G3a$Q|o3Ca++EAgx++3Wzd#-xAv=T`PD2fgQHNK_~2&sOoYvehPF=DV_*kKzWfH{zXNGIkoGK6^$_ar1bsfq=?148^hVGdLAQc# z1$_bZ1>{!A>AMKUj{bc-=TSNyNB=77zY8Va4EiKw=H?_P!Z~l~a!@NiTSXsgH4Jrl z7$f)>s=x3K)m$>`sJGui+I!K5?T9%9bR*(Fj`$y8UStspA0a+kC%uC>uTdO18#P&m z@%j&p&N9SYhB`ckoIeKL4f?l9>;|VB^hVGdLAQc#1$_ZIP|v(adO&^*xC5MLFd|b> z?(-;j3Y9DUJJD&k=|F27Dlu)Oo+RWo)PFy472du^So+87lieZY^bXKFkd|uo03qk|x(PoeWP4Dzn;hO2Oe-&~5j<;`MM(;rD-UWXL_&bp5U8LFpEt&%@3XLa8keu}x zI|+R<4gGK*@E)zD)GqlRwDDF=+XKxQi#&{l8EMTFU#fCFOye*+WsCg_CD(T1C2&YPoYm@axu|beMnmh?_-qL zpa*(^Z=tU?pyq!i6gD7-4T$*wY7Pk(Jc#)s%8H?^7ZLMC#H>J^?;*}6nOk=0dxXL! zls*-Gco#-ws>VUj+>QLFqG#?#&rC(n+>M@@ik`U}Ju?+E;sSbRDrWr!LQYdJS!vYO7N?)$^bSqZ(5~HxzX_b(8V4=?4sbKt zyBqU&Gv@Eh=$UHt%*%-XGGh7>^JUE9YRuvfQsVPf|=BU;PP7A^&>vz@_&IMLng}-Kl$n(|Nkxod2QjHKLJcKkCqm zw)blsl=4&HQRLr`HXcPAN1%;I2|53D(6594BxtHbKgQj!+t2xX2!-7K3d{n&4wCi~ za!+XttR9uJ7cI(0ZhMhiHgelbxp8_M%_c%#f9j=n>LuZR*tOerdpPG;@Lg|*Cj5ZX z3QGvt2T2aK;&GHx0D1`MZvr2sapZBRb%lhSrV)GqZ5*QeJ*US~t=KEnF7~|aqx^v< zQO`#S#e8ZJ`}qH`cRp}7UDx72|Lz@`$TeglOeWq$g>x?vnT)yToO|a+MMYIbJW&-@ zRq;ebr>dgjiK^#`swXPyiHN7Fq9P(H$V5d&M0BDmDysAIQx#QJQ4v)cZ>{e+!(=k~ zy}Zx+{XU=H=kwlYf7ZIoI%}`J_u6Z(z4pH2Tv}g7>yN{EE|O31R9oyxAC57W$&6)v z6d^^@enBP8Uq(F_8S|gO59R5vAbv}+dAD+`^*X0^sm@*O6!RG7t3}XT6pO{w!oQ8Y z_bF(>=5hSwHJu&jb4Y$CtP8&w4g261!!Jfpjbi5pv=yr!p`U`Tgsz1CGW3_BUxn5= z?W;5BbYLEfozWd9`z^JS{We~DKm4Q6-=QZ*vF94a z2J6dw9GPll$O+BI>HpP8ZUXlXM;dzrkJ`lys%^gkJyo%B9?td7CG``<<`>X*ef&D9 zRjAl}6g^)@ridAQ8@!kn-=>EixCr~djm3&sQ*UFjBG%N~SgeRu@HQ4JB5o{JEb&3D zcaviCP4sWXuij*K4k;abMX_}V&MIv&_KM!UScho+H29FVD15_MVv}Fs^~^O$1eD3UW3+xg0Ijw8P!_L7YB zWoGjMWZvWH27Ma)GjZuH_?(g?i`Emt&B!<^W4_1J8{-wMQ$2V56;2m8`{C3wr~6q` zNSbddHZBnXey;Ko54BcbYSm~hr`nk+{EP7Wz<*o$F#~=TIya$n4^KDI)*-ZAh%X_> z?xRrx&dYE<2j>Mc_PfJ;PxIg$&Be5}aP_?Co!7yOR!?{t16b(f=m z8#>o=3fM+lYgNX;PpoyE)WVyDb@*O;I5{}&;goQ?xjI&;ba?vX&cPsV-xl7V=D=U36Ji`wd{Q#Xq1TW=H$x_N&F%7bknEtHx$QqvDg2Allw?^?U|fR^PzvidA1Mt5+^u) zpNI2Po^l7}gnRu=BJD}e>Bo^QM$fZoK78p(X>nt0o%H|5+`Vk1Cl5JmbVQD1dhzdu zH&pbq3Y%ARuks@IDsgQ41Z~~J4S>aY|2y%0tP4G#UIXV#N=qyfY=$!TbNkdEnQwqs zpy7CQZ?+Fw`OV>9O|37$zZ(AI@Hd2a4)8ai|M$^6z99VVSc4l*@pkR`MtVMvp65k3 z2AibT``GGxocf>SzU~NjcdtR;iVYoXcu(v&etTSLy&Jdw3C<7U{F8FbzrY!<*!&Be zFov^NnsY-h{XoAH`EvY#p|>0}g#S09N*cU`*6*kFzHr8Xd(>Ls5*?{xiTou*d)4!} z>aotz+n>_gwdi>oJ$=xymOBvcb*(9kx{qow_Mm43wsMgyC7K-wY3@j8a({RZ{t=b8 z7qS*cabvSk+loJ^ZP_Vmy+-S=(E3;5JOO@~w(i8*hk5!1rLFeS7!_Z|(|fsr`BAvD zJ+SIsb;eDch$tojyuc92tv?h&`firBM+W3?iO zwAg1`5iw2@hg*<+omI~?e;MVvn#Y!)vxvNxy+C%T8}wwSzg}-140jOnz~7R^wa2%g zR@(U;oTsUE4H?(dVOB;fU*Np6O0na!H;sV*Fqz3T#r9p$>*?nOjdFRt$dvZd?s@t) zLE}{{OYC}!ShtT%{t_8f2HY3ip4{eac#hNjEuJ5{M)<^1tB~`~dBs*CF|bRXM?RxB zrA;^q)^N%=i=J(avX|#nX}|(dh#mttK=b@IkkoORn877 z(_697noo8F7b8={(;jH3LBsuU{!E7X6C{sDS?FfwTAy}CavPQ$f+e@rcGDHdxPHsH z@;H0Guh?9{h;C%3n9Rv&KP$8goPBWCk^OGM4ik`X&MbeExwDzyrVWd6JyN*@s7g(8TgQ% ziA1OJ_i#$uq_lN6yV7KKrBQLsY^TN_)xYf0bDZVrU650t_h6Ps40)eBq66HFy&rK@ z!;_A_S>t6#Zr=^FRMM!$|3}gmbL8Bp_pSzat>&MUHp6?HccG7RC$=l3X}z^_V%5sA zThnf@@V=D$+m}_p^KI>e^}3EO)|t`0j$~`bbsb}&u6d5WeU?$2)9lm^p!dUH9@0pz z=ALae{U3y$ndljW=AQJSBfa`Fz1@fgms;d$<~wk{1!uRT`^7_!?h(7m;@ax1UEXWl z6Sjr3O6ewj;jChZ8ci$b!3A)tH5ZCKNDSwGBZegUOm@$_IQeQDdixdb z35V!CXRHIao#C!j8QyR6bQd?JpW=I{9(+@9e|WEWX|d?RH}YQ7vBVQ$Z#9c+(pR(4 zSS|Oix8V;y{&1=GyP{!K^i71`se0Tz_uw0c-*MNv7ub>(pg4&j<~uV4Zk5`Jj3W(GP{oLsrzGQ>UZ?>F~<15(pEA0S3>^@{!}K;Sq%ji7DefSL@2`a=9p^7A`&2PFU5K)!bZA9#wp z>(4khC=KTqaQ+R>FD_lH-;}ZQ7Ll#3C1>~z8a_b&1M_o&8;c-TITEQ^;P)B5&a7stDmEHJIxha{m@yhcdd3PUrmN^T>pYaV$^V@qo_EX zJz}`hDr1}?Z`m2$Fs~H48`>U0&o{^+i`o6Z0e>8vTj2aF^b#`URx!P&T0(Afo7Rmt z;r@;L5c@XxrL5T^vaBRe-zT$8B2%IfGWMKeJG@tUo!j8ij(#)sBsu&X?nA!J=>COqNnw-I zJWWM&s+@2xs)_m2!<$n5J1EHw#4f{-*#c`FNNR5 zu*c2f^u3bP_blX3I4ZNrxn6I)v420#O7G}d$@&y6{*iuu3i(CkU7yC6T7{=1talgI z`(?z@TDRI&tdxiF1l_@ZrRya=7;8*rjI5*hcsQ>iLk1q71n2!rcZr?}YENgW^Q_X0 zs3m8&9HU;wdE_mXiQS3JTddQJK8;O`O;j40p7_;$tVh0Kw@$$sz`UFa^C<3%3~r*{ z=LX|t?wyKgF{au~tYLpc*PvCUbz>$PxQn*S$n6VQ*HbOiP8$3G5YPN(^~2929ka*QLnHD=-;+EQ|&)1jr;)nPr`ZB)}3X(^BJK} z=-=a6^Jyig{ucisku!(3_09;PzoJ-tjBjVGCzV!g?qhVb7~Q@ory7EVmGe%-32m-p zc^W>aX{#%%?3!@ZI1Zx_Z2eAgY*R?nt5VGqVqa7&(MiT)5-sh&f%6o{TZS|({+K@W zrmY``@eqq0fio^#YeeH#BUq67; z7nya)e3Sc!cZqV}40kA2cL}R|yq*r?F8(~2y4~4Ra#(mEoY8Q;3}-aw$9Iq)L(jj3 z&O>m@;rtxV=ZIj{oME4d@^c;05%m*$w_@{%#&&aBOy<=3m11)mb&oh#iOfHT^6UjA z`h8QpHFt4*M{KMNzlFqCmp~7pw+~^BuV4-CW3AzEdczqGr%(7zF>$z6_^lau0MXzv zEK>rfD*8@&fzSsT?mUa3 z`^H}o$)?<1t|p>R;r4qg*&Tl;VD*S!jc4jPs9I_373ee00j#QjDRGIj!w1~7Cu3E_ zd|juju*|E>9XCj_D*wSSFP*cx82U|LzUDd31A?bJKNP$bx~223=2VKExU)hyXV{f4 zC^lzMYoy}XUd578=X1K8%*uU$2=;*P@6PLnp7q}4tf93k!TrH4(N^o|9&H=^t5c_+&Q&YE?NV1N)LwMR=ajYM; z4uW@qXMvB}dY5>_R_h%E54R`jDDcdcaDv!iB=vjSD&-hUl{VJHnGbzI<(+fL?*X4u z92>0IJZo>&@!AVDTcFkw#m;8vso-MpEb6uZ4+789{#adqu6*)v2CNF z7uxQ%bcf2f-b80e|4!qx(&lO;`=Ftgx_<|K4E&0@Om`ysaKhBv_0Fc=GU@*zGtG!9a?TbMP-*JwtQ~#DHaXWs4a^g3@8O+nc&^@4ggtSUJC*hoga}v%a zIG2=TA6MEw9@6^MeiqKN%84~qI@UC#se6&S7twZ6Ik9fYbW@JyC~Y|*jr=_1=fU|N zPrnz^Jau{M()yQZ_e&v7PlksWP9;w(X?G~KhAJKZHFo$lJxtM)R7lgeJUDr9`at&y zX`bE<=Wg`;89jeSe_QmoMRGVY^uZYfe^5w=wgMlcZ^!88F*Ktg7HVkANM})ZmU8mi zD4o|PqYFMm19nG=zJZWufsnK|1dQ6v9OPI8u~Qy zZz2B{_K#!#IQkz)|Krdjp+`cmf?fr^8G18%UPaHV@SlhOJp4bw{}VKGZ8O*Q7ooq1 zJU;2*llBMjKcGL1D;|z(H~ihu-+}%P^aIcjAb%_Jx1zHQon^@2KhEqB^K>-yXrBHJ z4S&NjU&k_Ehrb>E_D~xhawoO!WG?nI7yFUiisV-KC*Yre|3mmc4CPgWRgC^(IJ9eq z?J{aB9Q6oBF@koN(c&^_588vyLT8c3GI16~9c(NUO^3%{yLnon=&_pMj=oFhz-TBZvpm)&!rSyL(^j_$_ z)IG~uo2u9hZ8qAh$7t~}TKOX~ ze?-Ig(eQm}cE)(PGvYrM{$r0r&$uuqFvgo$ziaT!cWIYc;e@f`6!NEN>sRo91-%@4 zIWp`%HoK4g7wEsBzm#@M=?`=qI!51O^v$^)J-1W$KI+~FjVIW6f;|^{E;M7b8Kb=r zdLfpq#*)=|+P(P3y|hL45+i$wVb2)$jK4{L-lQ%wlE;k1sT-%RLt9SR7BSW)#@ar# zA4Yy`_#~VsX@&hO7UoV}(a;q-1D%1s2l^f~?4j-+EcOl-dk6Zj(0@gK2J$oL;aBP5 zSK%0~(K8133)Qc zI2ofuw0DU14tu4;UTL#`+3a7jX4GwlObcXMK(j|X?9nzc%_gSV^v$Mk7Fm-OW=-S_ zHaSC#wqms9j7QIS=x3mxfnEu{5_zIiEX+dT+wg5X(8dE}uOagq{2KT*jF(s%Czjge z05&;*b%ws3p)KODLmZAF6ASYM=EVu;fhM_}NiG+E z6rGRK)+4m_2>LIe{{nn62b;{nnT(#v&K_aW zh6W>q8XZ&y6N1U|Y+6);U}i8Um>(>XI$@~{mIf<=)xkP?Dg8oDp3thaBcn90Ic-=* zpZ_aw+5fzeFz%MpoVV<^;4S;t@|OMU41LRfOJkHV+PK~rBc+IU?ccz=_HX1}`+vu~ z_Dguz{!P4V|5Lncza8(|znORKci>(7F7Mh;@~-_9@7nLkyY@R7yNumNXQNumZM5U%mKZ&GE-qH26%(LVNynVW4v+RL~n{W-J9jjr7Z9kd&|6)-Wtk!@MdqT;2qv> zZ?6{hB&(x%(k+oEwV-|VAxmbhgZPtrk0UcuD*mM26UTe<#CJX;UZua$aFzJWVDT#T zn5*%aru;6$HR3f7if^fZ6o_{`E*_?y(L(%Ug?O0yM4|D#lMD-GA2 zATyfcHxxRWvt<@LF^jh_i=Fw!gj>xQ&5cGE$v?`QWb4gGU#EGx??=jj^x*Wc^vKI) zOnO{YCVr$$Nl#DDipt#d0x64y)37W{uS~CzvR=w&%GUG_!MoFYFP8)9nnub|>er-C zMCEi;&ZRF(*_yuSS&ft?UcxK9T#AsTcqLJ3|B=$kEA>1tce(WNdPk+-N6J8N$mO<% zd!wWj38!Hh>y7s&NzwMEQf7Fw1<&&qUM@=-(caHnPW^s$mi1OeWv#bCN|nI2M#@eZ zUv*uPTKg`SgHb8@NIC2slRlliT+T%N^YtuIS89F7&#x;gm7e7nG$<_viv6|}S4!uI zU*-pbyIuB`S?LpgPrpwirN2K&%20mcQvdMxh@SmATIuZ}e?L9ikM>ZLc#6M^ zIU3?0@sBGdSgh}#QcKJD&&t{v=3kI?Gfph*957c zOHiIZ?O8!K=pOWv&%R>O(;}M<1_Xn>>R?zfG8iL07XO#0|iY~2urX#*sJR`IN;okZ)(stGbA%S zGfJ#pApI}NjP;J;b#7+7lu3Tu%#hFnrG#@;z)Xq9C@(WrV20l(Gh06A34cXqVP;8Y zd1h5+t#>lBAyehI4Z3Hxd9yP+Gu0xqFLN++ICCr`&(z0-!#c>E&(vmW#fx`koos%# zAlouq?03_Wo|SEzsm{9i>Iiw(*{{r&sh2gM**@9+-m2^%e^GX*c<*HO z#O#Rd=xn7wQ#^EqjBi?Yg6Cx?XQzo(dS_>*M~cK8zfX3)c;r&?s3BfSc9F!1e%Ynv zd$KFaFUV*sGaIt2v+HzSi_eI)vKzBovfHz}vU{@o#X}`>h)0$Zol3HYvWG8kIfeC4x0x`|MeHvQOl4vTinHF9_wS6};ZT;tHc8p4nE> zw4w!3a8y{-R@m%dMJxYQMVpFt66fZHv92Oj(M4O+mMY4@ri+BQrgM=|Okt z*-(9wYm!Uk3Nt-&MY)n(`&=hki{r(EhE{C$MpcZ=mFB$iIsOINX=Vp)ayf63Xr7uq zF7iFRRk_}|ez}3UA-UnXQR4SQ#2XGute>45n;V~-RDK~h)$5m=A?sM;Zf<5-+)Y%*Vmb4`r1 zOq|R&kF!h@&N5eVez}_SOH(r61etF$GT#DDF3mZ!Tx*yGBMQbcrZ##mC>tST8ZDod z@;Sk2UN*X{@)N-Xfyn~XJ_cq+FeiffpAZ%aENuiN0$U>39>J~%_C%li z1r7-u5jfrmoDw+u3E)CmWmmCc*LVa?1zLO<7+VAA+NuGx`AVr4hXfxym?$vi6T)QU_k_nqt9gmD+SgFtZxK1 z3vB%aup`p3JA%Ce2O2<4BXG0|5z6j%~{F8`SRd>pJ2Slb9}5U6Sdwl#pA^29jw>q!yoR`<#%7~NcmIB4$9*}Mu#daO9_odwFm@uq{& z4(V=X#7or)^c3hL&|hGXz)*n^0(JI{?5TCxC+qgG?N{uKp2|>%K1+*SdUDjShV#^5 zjxO&h(L6C$*&EZ-qQ1?nSO07{7XCdg-18XQoQOBM9?Wb68t%(08i957_SgD*Y#kh_ z2MzYu%lqYjWuI(U_Tjy-OWBn@u-<-mIc5o@=SQD9=Sw5L#E-PvQF4s5j48b_;?EBE z57zC@a6P3Zr$}!PV^Mlnv@Yt_L0w!-gnEc`{jRJLoiB}B)Q@7|N7h!ubyXK%>iVA_ zeeQ|oe1Ei39gM06 zAJ^7@6i*j6fQE6heyrS3b|psEkB5a(-L+9{QX5E&_9Tb%;vr~mqw}pfjn_2dNzC%Z zM?LkeD1Y(92fcPt9Fx4mOGWvF#4}I)&?~=^6L{G$ma+fu4da|A`IYX`p5$1bwCN2H z&^EO#X~WYtBp>o7M)9*QUTU1|UUnsKntLUVqASutAE!j|a(yFlxIqlQoWn$MSaoSW z=1ngn-|=SEtG_iGPd2m{d6taLn;Y?Z)q??zK*L;cTq6+K^&@#*9jvJb>l;8_j`m;4 z%}$qnIRCm>b|vqspJR=S>Yl4-lLl)`*PplGqwC6Bd?m;7Bv18LhOwwl&RY}uwYNU> zr^|6*MYyii|M-*~*4rAwRBxiA*_5`%r|kFW9r!~IWz@k&nMOAhF34yf&q3jI{~LSJ$}U+myd3hm`f zZs*Im^_<|(4(;U63*~(oqc6Fezr2hbP~(4HZ1!cm{@MtXR}%WRzag}{uXflL`kB8o zw4=_WUmZQ$7wI?{#&TcsCSP(NU;8e3jxX!hm;5G>ye7yG?Hd$CaZJV^w2VHBLw^V) zzX>F#2_%;Z%If(~r!XG~B(Di1rwMvS{3zxIlG6nJL;bo=0?BOx$!&rWSA6yIIUw4j zlnzFRGC^g?8QO~0#yRr1a=Bk3+xj( zC~#QdSoC?ap8Oes^U-H@RPTJf=e35Bas=`P3ItjT6kqvN`=r_mxB{I8${P81!_PpV zn?O&2KA|mB{iDx8|M92Vr2c1WsK5w;(E^nM6GDBd$pX_NzJAUWm?JP>U{N&ps$*#c zD+E@5OdIP2HX6^lh8uUAx-HyRZX2MTo9a;Pc5%zytlQn~<@R+4xP#qc?nrlxJI~3{;xVwS9?g9Q?Bju=jLg2J}&b^qll1<$1$waa+ zS(GeEwg);TOOswQm+X=3o$Qw!m>iNEo*b1Nn;f5OYTfoC-)@}CJ!f%B~K>LB+n;nQ%)*hpQj2^EmOs*wkbE&IaQVlQr%KLQ+-nX zQ-e}NQzHaMrz%quQj=5DpcQ7O=A`DQ7NwR-Ss`V$6#dkHE4@yN{(Ga8EvfCPT~hW) z*`GR;I+8k`I+Z$`y3o<+81LA$V~dWhI=1Q9u4AfWmyYEfvmLw3&d}OCt-tPMzGt2@ zEc1f-H^VV6$>NJ!F)MB~v97k78u?a%)m);r+Mx%e9h5q4kvY zl+n`eWq;PVJ~lb_k4Edn$i&0O4T*0hW*N66o=L1R?ntalyl8wTu{rTG<8z7aiC2w# z61x&_7=NGmbu-hrx0&6{F{U<4G`r21)~u{q)>!fXanJOBA64?VE1QjuQ3U)yMZr3H zN4WWl(cu_P|T|~qkVMGKB z#Op@;W>z!HaGS-N#f)UL(q^Sb>i@BJ(m~D(1<97l;$+*T zo9vt{O9shq$)3qR$^OYf$)WDSZ@)ZUof z(tN-4PfBuoa#x3=$vw?SC-;lwq3c>Dk0g&LPsx+B$qOmt<38)!NySr5Q!P@hQf*T0 zQmIszRC)7_sjNFH)jid#uv@CH>P-zu4NeV9jqI=^H6}GKH8C~CZE|g=)b!M>)ZEm9 z)Z)~#)XF;heZ-Cp?HPTh)}+?Et5Tcm?3>z}+EGxI+FfVI)ZWyAR88vNiTe(?Dvs^{ z-MxhlDay!ZKk-ur$0%-M6#&YV3nbLPzKEPm&P`5v*Z!u;g*{a5X) zT+Njh!D4T5ebwH7YbZYxciJu%gC)okW@%_?ZfW(p-T$J!E$u8Z zmTs2#SMC1l`bz%#+Rrk8Xik0AFV&qbnZ)v2#Oqb!MDa`Fu?6C6&OuxzF5_J27gJ>{ zEEh?96jzF?iT|z@i#e^hQQX8iid)F&a2B_V+c`HfMs{d@i^xro+6{Bws=*%$C$~DHm2)O z8jcZ8SNVsi(uw|FaaN~ZL;RdY~ zWIQ5;8BZ9`8c%qXNnwG#rH00f#xjx`6YfsnS!22JF-b4<-r{g6)x?>!BsojX^{!GY zle?H`ayQkKT8ZUSJ8`+mPh9RAEw$5sCwYnmCOt`J)X&u<8wZ(Wr4BKs`lhBNMG`Jr zcTDGQ>O@jk-Lb&slBcPs=_6B5(i^ngzyIqktEFzHWYZ9mG6*-4Jbj!g*EG(hjP;Re zl4%-Avrx|8w7|54q?N2aUDT#^#(dLe)}E#v;t|sh(;k}BbWmSnI!4k-szvW@Ds`J< zDm7iERvRjvZ<%h8bnl=2#Z+N>VyZMfk*=Br=ksrSkJ;YrYW6g{25vKZyKa8lmvj|o zf3v~tFBX`CsMY4MYD>&v=7#9oJ?7@-RwT7E#~6~$-K0!&yp*XokYqLYGsl<*m{ZM} z=F!F$<}7obd9pcA%$KrsJBW8Vnx~UAhx*yu<^JGjfB0pnX)0fBTnVhx0A-Rg!MOKmUbC2A(zFH$Nij8S_DD ztJx}T^;|%G(4w|DT11PZ$=Tv%@%d*Tv;>F+uCxB!2Q5b8gIel?_!VM_ur#K>EG?*i zTB1ymmJV(uxEEX9)zahDJ&Vz0mq|5f@?TM{k3O*^O;1Rk*rGIb)Ut0g^fxpAyM z%QD=UU+o}m2@*XW#K9tYLl@J<46>>kCXVLp#B4E#a}jgJ&q&0aAm($`#QEZU&O=-x ze#Lo;C&iQaTcK)7++8Y?RU4wOfat42^tD6ubwc!YLG%?7eXAk*dLjDOK=k!S^z}jX z^+ojcNAxu#`qm?F1o?ak{+b7?JG+`lymI|l>CHSb0nH5ocHfcdI>ZoRXl!U2o zbTGu$*!-DVhMNH+|}@3Dp)|22ku!xaA>zPk(sx(UK~$m+B`9j^!QwHE}3tO z#G?P%+NA~84QrRi^!#h}Gmfjh==Vf_`^j%7DkoPVaAV-cw^m&R%BqXv(Fmj_MOYc# z!MjBysSiCroF*dIpZ`D-qoj!M2kebUF13_FflOwFRsIt z>)5}!<|xw$yHxUuZlGji8Wt{q(#MP;xtZ$ zX!T~jGGBY3N~ssEfg}3S1NHJmJ*ty>^x*Q~BhBefy>Oc2bca}?y!T{|)Awe-^@$`6 zVsd>tNy9m3eLH;&`PWY0O&?FL{RnNRAE2jyV`%p`7ST3#ge^}0VQzXt{8rf9jjYN! zdY-lJ(eKxHPBQ+T3SxgTkv(I-ai8!yE8}a?`jEc!=}E4RoV$+G9VDxple$vfW!(+k zJza(F3E?Vqm3l#MuUnvZ)qA3)hbZ&H$Hcl};xO{IJ%Y>*TFefPm>rzOd@?(@VqS2= zyio1`qa@FHa{;I?{t}g!k$Bx8jKzf_J#B+0v4rPF2oE@lBuWtt7ilyc#T)aa{~H@< zQUa9-cOjobkjP( zi7c_ziXZRj($%Om`Xtb(WHj5bbc$9g>Al(E&ZKkm?wq^TjEqBM?5mIa-9;ROciJ(< z#X6QSy)-x>%jPIET+44hdOq5LP1uBPqfFx}=aGhZ#k0!&M+GNp1E!my$lY*Jp` zJe=1WJqz8QobOpk{y=D3yqCA7zWAbkzWAYIi>*_ zRsKy_@2QXW!9|udmUzxZ!;|{<%;a0UC=M#phWqoM-g_mL#DWaqR9>5@Bj0y+&Bb$z znBf=Q?Jo}qm~XY_x+mSObNT1&U3a-dKY2Vk(|NX8v6agF8rk1VDA~t)hoY9I?;Q+hDRJoV2-)e!1}3xtNQl7u$r0IH?swHtlFK3Q;DzF z*ls94|2k$&U;8j#PIEHRR6ix=t*+>UVZMkk=gfOQ9_M+S$hzC7822@yJG6>+?s`P3 zZqhHA0snmoWKje1>*ejC_A(haCWl&XsSCW`_rzS%YfDn1)P%el?w7&K7T&H&$x7{8 z78BR46v~f&u}X@F)0f9co_qhb(Vq3DwCcvA2A_f|l__4MNv)(uE;FTxIXPsT#4z> zwjstQzi1||mI%wLq~utOj8?iMtnU1wO(EtPR*jlgKd<;)xVXHi?JJ(>#4sEpau~OF zDnB~=%a$T$EhQ;#9~*3b&~K54#5|>W`z4y6O9|$oxjEm@t91*yh%G$Pex1S@_O;q; zYuMUAq=Gr!=`L?){|^_&o*y1axyBv$X4h?3A6vz^%7lhcEI>LK=909tKx8P?wN|-qe?%dRlGx=> zt7kc^@UH7Vrzq78KFMWaTdjLAe0MIf2o_|Oy?&rP^+XYu*1ILbhW@LdKqpNNZg?pv zL)raKe)-!O^VHi1e)stIFDglql5f>~;H6^XMxN(v!JQqTLcPW|DeSIpP5I*axeb4ANP64rkvc$&*nNRfAoRJX`;Ki&7|6Mnw1 zrm0hiZ}aV;{Vn})@F_vbnERcA?3I+Ux9o&GgO%6kk6S689MiVw_KVQnuwhBVKDxl% zX?!MSfA%}^xbB9ckYGl=n{{*#@(gaUgtw}SRTlDD&p%I-SU+`N@QAW8&5toXfkp-j zJ2Up~@}-Y{G`J5-uiz(Jy_c4wjlY~t>(Nf{+6wy}9C!K4 z8MV%B=eWRMzH7%tZp9tfbnejad^EU;$&>S&sXDj89>-TcquIE2r=u!+m0g9ed{pz% z3VRrb;*d#)B)xZt(e&2Mz`-fZ3mLzhs>TJDD>8nsY1di(lB#CrS%#(ko>eu@-3hPS zJI79pyBkn7GsFIcu`3|4AJZeXb2iwhcdLNkV7ExMGyPXavNL^Iuu(gv<+z_#mE<`4 zah~#*Ciid9`vwm-V1_AvlvfE~F)@^Z&yv;@zoS(%!|Y)(cLk~%huFXD(+xDL5IN#N z&lfCEj$tygurKx2+&ge*qtd>YMU3S4y>c%fhZ$F67yH?0n$vQ7h+OA7*?GAXjZN6C z={{MqUThdDe3B1Gv;{Ae?2a+%+@%*PTF*rZ_%IvS~ zsT}1vlhbDO<$;Njm>#U3cICM*?6Mq+LYk5_9k1o-BzIu_FqMrJELSl60U|oarC!V| zb&}GN0v_8+l*@LT>@uR~3QWAal_^lb8$&O-bAzq`^I5`gq2gRIdvWC5$qLDjcZMqV zeqfmt_v@;#&%V?0^6HJPu@46wF(<|SDqbeoZ}n1@?=ya?N1qrzXgxT#r{I8Mt8rZA z-e`8e1Fnw-%|z~B{z>o)tC-1Q#~vvEe4#O$-RzJOZF%v9b6M;{hZT$N68&r|>@(Pt z9^W;sIG27$v*J-2OWn~t-trXVUE1`~0f`qd(ntMHR!F{P`7JVV7V}xekMJ@~gz;_bZg+YY+L^=v>8QqCu1~gwF8R0^yQNNox{2OhP!0B z@!K?*%vqw!6EhfO_WS5C?~cOiWN8gTD!JnV4~w_%z960h#b24YW>?3B8Yn#%3Y@MXW$D`waF%)7cTKg!1&FYR65ncDx0aXE6}dK}($ zF+D%>$?B-}`{INLdD=#LqGq{IfB1xP0uK!Zxkg^(3q_2NUB>AtcVfJr&zPrYxO%@Z#RY_fv8EIJd8;RX@+8QBdx>x!vxr4PSz@ zT+{lZv&G8`3YR*&pYvVsREV&79(&*1<`pla%=X05?K8Ntw~4~-`(gzX{O8gdHEr?? z9zA-TyFfQ(m&e1Ra#*kQ?bdi2SzRaY1rN0;|B{^WI-A7k6p8R*i}Jc{}@F{g&9^SMfVG z_JxHfMDPVHas-@zWVRON@}|l8=}2D2%iAY3PsnYtu@tmRY1vfH8&PLTEs@GG47O$IEmpgc}T{0DPw{bPN*ZLst zj44A#(rgu5L|T&IrKC~g%ZeNs2kr9P#F?Hjf5JpBj z&qGa{PTH+|4KF8d4okAIV_g)V;|VQCXLr+I`P;Kr8{j>MZhkK%ag(ke{uu zHQYAEShKrvNuKYnP@>pkf4WB~j?r;a(!RqhmuF{#btdEPZl+tOJPY3Y#$F4as>ty; z-BRJVp)I-?o_0V}_kjmHU3=DJp3o6`phH%T!$P7u%Xrh#@?^>nw;mMv|#GpyRUmXIcYS;K>2_1KYOPJ5;ki9zL0^Mhzhwz0g=>M=WaQKC&q zB*8afM)a)0W#aMGJHLEntkn{gdW6ooBrtpkx*WWCNf7%wo3FlH!TS`KOBS;7y7mDV z7om_?3@<^O;U2ft;JR~TL4u%sj&Rmp&$uaJJ$@spn2!7*j6va_0f9lu#oLn|bnI&% zb~_(Gaku!QNedm9{gU#z$-{eQNUX(QW;;Cc6-xGCR7zjJ9h_jjv_q!vk=;(4t-HH_ zD3#1Fe<^41xmTebsIv9uAv=+Xs*Xznwlm8Y%H@1_{Ncaa5cDU0N71VtyVFFB88;b* z)ylI9rC1$*zqvEbX1SajuyPG^=sx>4C5DFLuVvxBf`t!{r|w<3?c!VNe0gww+q!yr z@$bRth&$EKl^Rkd+ge`7+V5wHOIPR_*z4OeL)t1UG~gB<8l;YrQ7FsSFJY&T*xg{uC5W- z)3o6=!j(_PvN?%ed?ml#OolK|JCeg5UBI9cqN%!fqfj&ZU0@o&W8g*B;^5uvAq7^A z>vS(IiMMhw6#~K^v`)lmC5Dje1n-i5=N3Di?8WqH-`BDnZh0fYKf_0e9`ZafB`F%J z8!l|gH7H)0a_5;A&vs81U7Fm-0YQY{&jW%)+@EO+e4n)*#@5PhGwrwUR4Y@$7pu%n z(n%=g?cK#28lcRVvNvewZyqL@qv;2=;-5d(;eHhAf15Bj|2CAiYq@+NYe?J-!}>7u z=~v(Sgk`0KxfAL+h_f6 z)A&^0!BW};v~|2L8r5~RIdR9G*lZFVRV_ijkL$~dc%+kiqyvg#Ji zK6TdnZc;Ujxj13NLF?Ijr*3$o`?yx1EGrV z@Yt`yp@-F&OP@v8-t0SCPYd`5*i`2;0n3`z~3@+*=w_kQui8?f=o=WT{w@ zmR@Zt{@D}GqhB($EJlskq9dM$J&9r;7hXH};JNZb@b>-KZ+^*#%~#*5)t=U<{TWSp z6!BWHJ$Xr+m*7>>fAgx!1^X*z{Sh}dpFWwc@g!VPn5Z2bbhLP$-I%Q;Q`11>xX~{6 zV^V#Swf4$2Vb}f|#oinzHS5=hXy3$prW~V9W;*?4#*VeUtK{aDSCyfOp$vDQkCX22 z3AXITis8?-<#4X7Uk~~ohV^4oJ00eG(A-tnaim>9IU{2`;`taA+LU)?G$;a zIGT69T-dX}(bF}-$R__wMO>?s-=gX+cdb4nwfJ|U(V6dl#J6s=?N~Chja2T=(C^Aq zpVf%}alkohWAHJhQvXV1qdecJOCHDk+{{y9LRUd6Usg{3&}-s5p8d%N{oFr4+>pP& zQu8IFym*P@vcsb_*ZBxho&v9|3>A`Gv}00m|MKLa(VfShu%zR-)MSL;>b0cV`R~+c zV8niljuo|JO{(x-ZSTE60RfS$Rgpsi$6ng54tQ(nl4JyrIp`>iG!|vO4_!QXJk-T>SIxnt#;$Ph zl%|f*$Zw2}Qk2zJyUW#9snSvylu|BZixagO$kFMLj3Ie=El&MV394DI~pKRGa?Ml=6NpD^7TI z)YWt2^;};pg_2WVnJR8LnPeD!{FYiQ^we&)sbeR92GH*DRyW-lC_&P)7N8ZzYh zOxLFWE1me+_#LdnHzy5Ev{D>?fBiI*(Z_quk1b>sgoyJIg`LNrt-opQf7%`x9buEY zV6xh7Ya1OFxzK|(HFvLRUQ&&F=os74a`& zY2A6eN)5yCE{Zfh-JnqIxt7imPrusVqnrB6>+etG7FtyMiLxcVimqa0ygk}g|7L=A z#JQ;=UB)xTO!%YTN{FcEWHwQAF>5im_gN+`?d`GGtQ3#l7?)1_I4*bdy85;KQ$2A- zsiRtI(ihh(3Ozr4QVv;L+kt!)QtY(bwDYU1fW`6e!S1t33rvi{u32*Dxl;`0+*clP zYgybSMxDh^tUcv$(@$+dUeU$ zjBq`a^$iz~3-9m7i$UoJBS{Av;%N>XS6(hkY5lx8ckTQ{f?7-K?W2RklN2$msYZBi zN%h1y z!|Vr`yT8~BxA`8U>#ucN>Sub8WoQ0K_?wQOeaAwOJ^L}`nvA|({tTPp_NOPB!me+S z6a}tZIV`^Lm`%O7Ako|V!TGcA(DR}j!=!uHJbOedI!zX4^-Yp(=_pgh*Bn(!YYM)nz7>Pj%(s|Dgmyb1m{B2gyz%7&MKH70hr9z|Ia=mA4UW2J6 z_xB&mrG~@tEp2+jBuE@?!Rlis^aA#1~Xb&Hi8C`F?;pP~kUUt(?Zc}VpC(`!T zfn+xN%gwLL$Aq6dXQfh#;>QNVR+4v3?V9%Vk6wA%QlDbKD*LmWPuI3+W&Mw^UZ#Zp zv&(jRueS3|^e~jiMpd5uQ)sTG+7PB59U!$Rmq-5ggphi+Eot0wF81_NU!VVhoh_86 zxy&EG-iAC6*%>R&P&yh zGMcZ>jB1|t{hTgx#DM6yko@U#`G*Gecx2vgf^CP&TFWPc@a?zQWb0%uj7~Oq#xgaC z+PN)=AWJ|rjfE?2XX;h?_||xF_FVYag%erJ--7c4R1916f7@2C8Tc@{xoe+F-iGCQ z9rrOON!>bJ@6oNnkGD9D&z^Q~wu-5ak^EJ{92WG(o1w1Du1`a4E`+N??y<#7`A#`C zMuv({1qQFV+av?k_U~Kh{6ze{ph0n4-!wK#yLM^g)M81joY}n?iI1)B_3w0TA9x(k zoNQ{!t!Zw0_gFp6m*`2JeeYa0eeB_%nXt7a|FrO+ede;561il>sxu*r>u+YYtQQ*I z+P>*uydBv5Qp1qZ@yLe`!;hPhqKC^ZSKoFJuokv&RezNDUa{m|`Cx8yn`fGmLN-^8 z{PZT`&_r`oupzQMIad~W#%g0=PFCKx`5l8S&zE4?{ITtF8`sCYeEE|LVnku6Tm4NqFvVoBoZA+*=JVJbvVxxev?^Uky(Wj9PJ!xHSDF znk~44Bf7-x##rok4y%Wj>asGn@7M)@6**T6&sN-f6uH)Y$Ze{IN7KtAo+mz^^I+jl z!(+}F`7dq{q^%5TemiK=p@M&udD2i>v~{BWE#u-k*Zi3fTfQ_=XHTv_-OF36Y|T%} zOcnpRcBjX;FEd%ls^;aPJAZPrs#TdbQf4Ln{o6uz<(|Z=SU%kw$IZ)qz#!|q{YjtU z)iRIrwZl8xH7uU)ZAvh(P&iq#YIiL7x#oVmN{h2U(o>rfleX@RhEG+zY@xB%kCal~^U8j7D@GH>MaAc28U~r%i$xO6 z-rtMnN{>^IS&DUy$j`Vw&>KR$_@QLO_@i^wK)mLfa70VDc9K`|=(nj2Che`5xTs@Q z2kd06hvR>D6?MxVG_L3p+A5*kvRoO{?P(vL$x-|3w|Y^JQuWzf_Vs40&8d0_f{ z>DF7rC?~?k{6pRgZt6>4!*6K2SH{@B>HbbwUth=wa(%dIXEiJ|ra4O*J*8nxbc>C(?yzm6?jMTy8ic@YIioFu!Y*h}V*7mx0aX3b=n{&ZGTn8d6 zY;B}$4O@Hj*K3+4xlEl}Xs>+fvGGr?PwP|Fe|N~WY2L7_UCLc(t#Ux**p*5tNB^pz z4Ix+UFH_T(cq|)_=A6sS*Btr2##W&-`B0qTKFNFwBP?h(;`30>Es&jvJaZJ%5H+9T zzo}uPlIu-rkVHey1pw0)z8EwhQ1Oi3RH=a%p4Hhc(m}aOT17jVW~!R zeu#LSW=hZNmsM?16XS$e)IwQk2(o}exPSldhlIlRj~}~aNBq?@idV94c-^<`7~fjL ztG<%&;2zpO^qS`qmr&*z^>mWsZKJz-Gbsmfar!0JqmDwJEpG~vyT#w{kx`mFYOy%; zBYJ%p%gCWSi-4v`o65ls`RBgG}ZuW37sf=KOC7IoB_EYH2O`a5YEm z$fBEe7>FKe%hrt6`bxCEBKL8yFq|&;uIEI{RPFJFZA%0d^>w^$)wZ|!(?1)<~ug@ib~QhX|I!uF7@JyCdT-RZa(158RNK>aV_Xk^XR9z@o49^;~So9 z-9Z68zs9?swDh!DT2FAfEtv)uM>*(unciyY+0IllKjvw)&bA~~fP5cnoZ}DAtcnRy z30C3Asn}SVk+M$ODZ96Xj2TIB!`U;N>3ZR-L6f;Tw!bub**;J68sY!U@tejwST?pC zU^qVgaq)86gEgMx+~kkxroDB~G&JLbeoyZko*h)ijS{B0ho-0eZ`hf zdi5@Fpi<#y-ji+_LTqNQ-mJ&ZkfI0s95skvn|NC5eR=4*NAovA7VlIS9)A9WzO5a1 zo{#cDLx)}HeV?70bB<$R&sb26_*}o+!V23e?#Eh-J<|3$?_S~yv+bIzW`QcF`8cusxKp;Xy59WC%f2gXjy#2jGUeJMxjron ze#@zmexy2}6l(aJrMkGo{;Yt=V|#%=#a_gn!OdL?Y`$%^$*3(pOs^8U#x=SkP7-1trviH~oz3)};KJdYF+*0&Z-jgX^j z7mq&ot!M2=9%p!TTcWskuj<7&b9J84v!aT25f2t;k?w%J+TzN?SwAQ`z#!h9_(Ll}8CH(N}uWWBhtnq6WbWSG}>^LD2uZAMFlzll#+7h|nZw7T7UuZj5(4P!d<6glblH&%Ub z<&|(ctADLq=cvRR35>&pzf{$W%Zt6Yr0ZYJ{xV4Y6Ft^{pi=F9nY~S7 zcVBfwbEKoI+paq6M!`@=dGBm$I^7%9!M>46d z!OKgr@Y|<7@`4qqOTkyTro6JqxrY`9J_UCU`WkuiX41x-trvedVmVX&M~|{O(AU$G z9xh+;>4WxGbB1UgB0D}O-N^VF-Xi9BBP-jYxPWoyHTj`^=LWZ{e~zj1ldj&6IdO^K zrN_^mrut11&6iz4Vt%DbHi;7(+22J+=1C(qR)JA#A3gtEsDDT!a$47gzMNj+fCa%q za``Uq;y%fT-Xo6R?sT5<7TEE$>BKW}o+EL)u9-?|iECM%D|?o!c$99g`5`kM_UJ;> zxY-jSC7PG!ywVSL^Tec{fASziJ@}|hMjSTn_0fBZzmx>$1-(pJ`Qfs-YMSUt^Ad$U;^VLTJ6GO|ZOptmb4j#cc)qaY ztY)wM_fU=9{hxhvSVN^3VsesFE=|R4?6+I{lD2vKhG@0DN4IU%t=BW(>V{@wV>s?v+@ql-|bLNLHw|U8mWc z_~v{0*WJQ8&2K|$8s3Jq&#yC7`l_pOmX18wb1=ArKKkiNZ-u)9FFHwAeR4a5`yPF} zmhQd7F>F{#jUT)1n15|H=bVpljhOfD-nx!&)%ZPvr`~wF8;Ur|nXELs%sRIvUCm$r zeVEyz+#(_Glh)UoIo-Tj*7lHG(H{i{=hhsm*2;@KY|G+uf7KTkjOZLs4{l z_H+xK^|Ci#(D)-0($iBX_eLiDcD6!7$xG$_iSy?w9zWK!IUtsvNB)?5)hYSAY@$?+ zc55@Pw@&dEvJOiNj&j4bUhXF2;^q09yfU$Ge*YYM$t8h5rpiz)c0Bj3sjE+BOyO>zXN`;c*>5Jp+mc_NQn+>Z_kIrcEyoPhJ~8YuwY}za!_m~p=AU2fj4c`V zU@!y>za;-Zze`I?Xx+9qmC(6hY9^t1-PM$z`d5iddi)rEJU>>SU*dwD-3{bFlKdAW z&e)pS@e|1Z{(e$VT8W5N!sAb#l9VUm6|i_E3Km0DAY)G|DPV{v$v8#X|Mv*w07}=b zZlGZ(zk5007OCf8w z>u=Y;t9`e$UOc)K6B^o7m6kXCGn=oWV;#IwfmkduqW)7- z6WGKPN4C}B)aMu}Bi99WVz??zn1zhDjeLrEx}DD0pw53KXUl<42X)vV8e_z~D{@$# zd_JnfDQbd!Qi0Xstu@+n@l$}#)-+>6dN3h}rma#}hjzkP#`?}e&OYMj$_|$3tD@F^ zDILsuSDnl`yIn>1jB`xj zt$|qb&R5^{$qdt#V=hefpPrGqrpw(N4)=wGeBVxH8yzTL-Ya`QWWYVfzhQ8wa0_3# z@A4s8?}mZqWfs|o4;}UudrJ(=7w(AsuDX1%I3R1FcKLX*U+s*EJ6-?hAIsv!K6XQE z%eK5A1)`*vt_nZwgt%7OM* zrGx{U=fkxb(VmC>1MP);UX|OIWjy`m1_Y$bb11H~WCs7|?yDKn zI%~rK-?CI(#k>pS<_+npJ-?X^KGQ8T8B~_c9yKWQ`WP_t((k&-YUH1nUSm4gr!9xw zYp%{cOHTIMZJ6{FDNNB_jpP*askw^$pcZmQem@*gbM->x%weB1`kx}{h2l9F!^`eT z45~6TSLPkvP-WR)aZiGv#uZ+6P4a;nO?X*eP7WR0hU(_Gl6w+mY9h@gza=(QmznD# zur(?Sn&mB$97t5dJ&E&bEdOqk_N}Bv(nbyWC@~MSq0052{WYilMs_Q*zxW#Rx6KRX z5!emtAx|EyQP~WuK8YcyEi+e8cZO`&A~}d0p?FwqLsd7tjJh37b54uohUz#ovUy6a zP8@x5#F!zH_Tj7l{6Mlt{&(l4LyD2*)2GRPDMpS@DqkW$IFYpdYAy#vet#Ks{mQo} zqL8hKK|mxi*+s;l>*+P>>57Va7)VVCN$_{p!{ zr*0JXm^0aB%+T^_z`s9`^C)79?EdhD6r%v-W$+U*pq}b~Larg_-^exkJN3tZ{)X*H zZwbO_y8mb~LcUEMzt4!f(u7zSy!+`rilp$!r=F*5^H*iX?yThOJ@EMr(%r6}7xSyg z+41D_1s%@Xs|2-57BT-HIomFNR?*=|yQ*DRI$^cY z`wA)#bZ|!-iKDc;DcUycKJT*fzdA}F4U&=&y(oa8!Zn4~;W53%~!%3fy4w42B zH)c7x?`?P35?q?-!oyfL;Xr0A=b9Db_7!xX53bB{!3CGUvX}0A5N?0)Y-#t*#Sc5D zOKV)D>OZ}=KYF%o%!PEe{29{WO8sYLKd_k(U5W8o8d6;5m%T$byDuhW-hQjBU�H zPkGH-eOtg!ThSF>!=zHBpM zfBZ^}#V&7__RmU4hm5+v$+@$^B{6tvoo%zM)|q{xKh32=g-3Nctn4J_r5ert3TngT z+Mj(5X|(@z7o^QISDeIzs!*HKe_E6TM*Tnko27Oj{WF`)+4m&W)mi@SN2OuaEs_Ma ze?qpZ9;9}sf4kZd4z->OA6O(-)MhVy$ir+!`orQd?w`Nm#W~bO!i(-n1gQ#9J4ukL zWOF56oS?=Ko{JYRQ{!qbr0#PlY;FhcKi!bJIZ`vElZRJdJIJB-pQHchu>YsJJH&d` zkUsrS<(aep*=O_De=Ar>y+(|O745)r)cuez3j4e3MN_*T^)h6*?cc6f^bF~G9HZgL z+4{G0QO^i-_Wz!k4b}Q@|Ll7xtnAA1L;rWbEPRRdw5X`>FI}#id<&mBY<~U9(I{f_ zKfOEkrArX22G{;YExe7|i+`>M)R*Li_@F9s8Df!eP^HoQVDyD}T!1(BO@8%)pvl`l zGi7%F*{2NqF1><*j?TA}M#^(_JcR`^0=JvdK?M!*mX)nsA8u*F3%Z zBj@17QcIoPq9&~A0j)VgZKcO`=&VdU(jP8lZzXNwDxEkdjm>Y&#%cCz+P>7daw znrPy?IcM)xB%;bV>9kT>T>fQNLq=}qcYW?t`_TyR_kQX^Ju+-(qbfdggwRjoO$L0o z+t-Q-#Zr2-<8QQm4h`Mhx;sg}61?4VaWpk!*ds~3vBDxh|MQPP;U^LZI08TQZ~ys+ zA@Tq78^7)UmSM02bQu}B1*87^zv76J*uTm!1fnD!T}Jr(7#IQxBZ-P5lPG_e5iwX) z91%+)qT&d69IA|j`8y8`k%F!ZNs@xxYW`Pl7!n4HF2j;h#~|T={ShdrJV+!GYJX%& z3~GO53=wty$XGIZTO8%@*8@XF=4Jk!2bqXNy+&jb>b1m>DL7P}DUt-#d8Q!iy!?F( z3YLO;jVO3Y)N4*55YgL`FsOVfWE?7A3I+dnzF0{~;@|mVB{Af`^TkTyaOiCbn7?zw zN|I3ZL&gI5zw^K%O%ins437ABov}zGN5_%yf9H$EP_U>nEQW;24NE|^KOBjK^y&X@ zyErnD^7sC5WHKHVMaTIhrz!OMFumA635XeZSQDr#PF$iP=5gkXu{oM|by%QzT z*CyngqOMoS{?KJOpbQU`5r8rxP(}jE$UqrzJtYBoAblTIXQXYQ%YZycKpsfjKyM4= zK?3q10eK+TUDW=_Kpx0-2ptFHfwWU}9FPYY$b$^zK?d?bu3_k70C^zSCR7}9y+@Y; zc~F2nkiLc97RUoJ3LOXJfn3MYap*iSNFV$=4kL-q1A|nU5U0^)KpuD?59FQ!y)BRj;w37M0OWzZb&8Gy@*n_t5P&=g zKpx1w9{LzS9>`m<=r|w`z;g`XIR@|?gWQLqj)B}4qsxFih(I1hAP*vt2NB2v@EjQ( zpz8v7PQ6z~)fw=ddas9$1M&bo#{iyV0M9Xi=NQ0q>isjSE`aA40MBuN z=QzM~9N;+)@EiwtjsrZ$0iNRk&yi1K(B}{E90z!g13bq8p5p+|ae(JIV0?}PJV!n) zLf0Aa9QgzZhy(fu;5iQP90z!g13bq8p5p+|ae(JIV0=#f+!WQu0nhP(=Xk(#Jm5JV z@Ei|#jt4x)1D;bqOZ~gfc))W!;5i=f91nPo2Rz3Ep5p<}@qp)ez;is{IUevF4|t9T zJjVl`;{ng{faiF?b3EWV9`GCwc#a1=#{-_@0nhP(=Xk(#Jm5JV@Ei|#jt4x)1D@jn z&+&lgc))W!;5i=f91nPo2Rz3Ep5p<}@qp)ez;is{IUevF4|t9TJjVl`;{ng{faiF? zbL5kDbUz2i=Xk(#Jm5JV@Ei|#jt4x)1D@jn&+&lgc))W!;5i=f91nPo2Rz3Ep5p<} z@qp)ez;is{IUevF4|t9TJjVl`;{ng{faiF?b3EWV9`GCwc#a1=#{-_@f$=#W@Ei|# zjt4x)1D@jn&+&lgc))W!;5i=f91nO-06ZrEo)ZAi34rGWz;goNIRWsT0C-LSJSPC2 z69CT%fae6ja{}Nw0q~sq9NFLZ8`S5e&}HcB6#?*^0C-LSJSPC269CT%fae6ja{}Nw z0q~pvcuoL3Cjg!k0M7}4=LEoW0^m6T@SOU@ChFV+o)ZAi34rGWz;goNIRWsT0C-M) z_7YV;z;goNIRWsT0C-LSJSPC269CT%fae6jbL7!jbRK}`1i*6w;5h;CoB()E06a$? zq(#>S@SOU59qQZzo)ZAi34rGWz;goNIRWsT0CO-WL6OP@h{z$Dv;jBH%d@@SF&E zP6RwB0-h5A&yfe=(e(p7Cjy=m0ndqm=R{z9P6RwB0-h5A&xwHNM8I<*;5iZSoCtVM z1Ux4Ko)ZDjk%79 z69LbOfagTOb0Xk5b&kp3$0Y%tlK{_2!1$a5cuoSw=On;$65u%r@En;qhORT+kx0MAK)=On;$65u%r@SFsA zP69k90iKfp&&h!2$b@IqIUoa`lL61kfaheub28vL8StD8cuod9Cj*|70nf>R=VV}f zP6j+D1D+!j!O`am@SF^Ij!e!)Zwq)%20SMNo|6I3$$;l%z;iO-IT`RAndA=C8OQ_h zoH}3p@4iI_JSPL5lL61kfahdjd`<>DCj*|70nf>R=VZWhGT=EG@SF^IP6j+D1D=xs z&&h!2)H&p+^9*>7EJOgrq3?Iefaheub28vLGO-*u2Kw_TGT=EG@SF^IP6j+D1D+#` zETHQGjL*q{=VZWhGT=EG@En=wk3KHoIT`Sr40uikJSPL5lL61k!1$aDc#bSS0n{1D z1Mr*-cuod9Cj*`%3m~A63wTZjJSPL5lL61k!1$aDcuod9Cj*`%i*lgr40uikJSPL5 zlL61kfaheua|+-&1@N2#cuoO4rvRQK3yz@jr2w8&0M99a=M=zm3g9^f@SFm8P60fp z0G?9-&nbZC$YLt!d;!lXfaeszb7Uz*^!@KtNDS+qHHBwOL3hY6p^`k}A86u@%|;5h~GoC1u`DS+n`z;g=VIkGqqy3WA(oC0`G0X(Mwo>KtNDS+n` zz;g=VIR)^X0(edVJf{GjQvlB?faesza|+-&1@N2#cuoO4rvRQ)0MC&{gwU@WFg~XM zo>KtNDS+q5f>7vV0G?9-&nbZC6u@%|;5h~GoC0`G0X(Mwo+FDZq4NMdrvRQ)0M99a z=M=zm3fgmIIT_UZ?MNBQ52Orr-;R`_@<7T^ejsHi&yg}zKS#G|ZpgqS(qCH2-fOduU9Ek(k720zo4ro_s&yhHwU7G|Zpgl*H&I0m4dyd2bd7wQ<;($ERo+EKU z9%#>zI3N$S=NL(}=SUgQKhT~daX|k-dyd2b{R8be5(o4TwC6}1&_B?gBg>Wnd7wQ< z;($ERo+EKU9%#>zI3N$S=SUnn4+O`*?=3J0K0q0|T_G1hAP(KGkj@Ikq1zSGh=4eB zyFwB{#{r&W0M9Xi=NQ0q4B$DkC@yeZAP>NE4B$Bi@SM7C?O$Usq{_hS0eFr9JjVc@ zQ`hN59|On(@EikpjsZN!0G?w2&oO}K7{GH3+H>mCqJQNYTb$;Q9mqG9BC5Ed=-i24I88xQqSDqHAgWNfz=81{gFLEEpIVDVVz3ip?%K7?=Z64GskapwP#HD*h<- zCc4LHy?z7DY1U&hAPXVbO)a7E)nmNbJ}gX`hOwY+&EG|BMfpVtt(j5Cwvbg|1? z&>K1!P?VnQSn?gO^=lIu;D<^6w{7EsG&Wjddbx)I2LF!PkcJ9AaLm0`KTe4UVKVZN ztt1T2COHZUjb_T+(`-F5ct#~hj(yuF@7sa)Nnu>=q8ha2Fte znfZkgAqk_-DE?ARFAm=;Di6jlG)=xK`Ee@frV*)_A|DRTzWi?S1l;cyf6I|HVm-RS{?zC%^>t{BfMEm-XA zJ{?v|3n{uuiM^r#eto_j5Z$V#Y4e-j`B6KhbPU zPs{ER?uR{TT)8OX)%8@SSwmrH6E7^g)t@x0 z4BdPZ`w6757u1_EU?v5U70)zjRZ=G?ZG>#?One?k*BEN>^iWS>2w%Ji+LP$$!HHyx zLt`#~sqH60_BDgm)E;8GCk3KWjOBB-nCihbAYzD1R%pAI_DV2);*_)XXfz(xwsCbZ z{XjlJTcKLzfimw0V?jlosqG7%ND6}irQ(VJW1RR6h5=BUk?CW>@Wp-$O?afTb5%9n z0IA^|MUJDuj@aQZ?uAe^WEwbVaQ~{N4Xhbjr#Q@VcCx3cOSVr$`2otMl~!O+@c&Aa zTq1kZ>7j5by#vXe&}5lIVRL25)zO*JE<7gMzK3t-i>4gKER7!EYd9VW&GCrAnAWVC zJICK&SO%o;8;{r+il;=UuMLsR8KjNqrSwniBS~Y~;PZXPMynTo;m))qq>j7eo8t&JV38({3di%U&zmj~*DB7<$-He8eVXr|ObsYnQ z(R1z$bOXHU@4Q;i_SMT(iY6~m|FhN%O-{2CiKDQLz=jMYD{Am1)Rxi6JJs4tq{|vU zCcGE+q!HY_iDJk0(bj7VJ zEeJdIxnda2%E2{~NTw!)!hG(wZUmt|<4M(8PYPGnhfZ7+<046>=%)g8I5>Zo8V;Yl zPgDauu*%?yWUYs>M0dg7e}~VWxu(+J(t)~SOxS#LX*pUxO0~X%aGu0Y4tin8Xf|P`wcMguXp?{@z?X*`kP5pyh3t>`erjDDq(z z#;Kfd{CV$--s2~S^vZj;r;gk zf2@;)X95SxlV%hGSujE_<$Yjgy=ik4OrYCXsoGdr?FGNXz*q(~9`h=AdgDuQNl8zYqn=#Dq7~_dl(XE89WdCQr5pxfa(RC!hQB($3%(Lt{Uk)_0$m(g8 zO4rA3Vpw(??0D;HVv@{h=e-Pch9N$&Yp;W_%tV*AG?iYitL%iRfW0Sx7+HvYTD^ZQ7h&#w*Fv>)=) zH3}Ci@XJ!d&&kWf*~apZVBnDGIOXo{8R;_c`RA(;X_lLBIjbzL3wk!U<^wjFXu&*_ zJNz=LBo**&5S=*Bku9G<_EpWix-*7!t+I<{zW!AiZs%&5hYK39Y(;j*3z(-QMV~Y> z_&xe~KvW!qE2%GOfsxmh>@5Wfgw$&5SArq8od>(+kc;LPBbYia#b1mE_5KYDGD!Ib z^!tw12od2c=c|S6Zrk25-dD%PtxR8Sg`Wj5$R&w}*QeX=*(pMR+h1`wF=oxTMt3?4 z3xQB5J4Kj%7`;u^*ci*!)Yc~F`5YWl!ODoNC~_+Sp=d5{jGk+1jE9RWn9Q%E$xVN% zGkFQRP;dr8_L@`t_dh=HN_S+b&DWRbW&&oop(SENbh=pQU-}`x3*$~3gBb@K;$$sT zP{hFbyuiifvw=kb3jUxmQ?h=>l%GocWJ&wVa(`i1zV2X+eW&653ZKtF&YKR~1H=xn zp~^@GNns6oe)|d`^4}=m*N6lTN>7ge^D6W;!ZBjkr}gX{$)N@@XgEwMg@Pi}-dlFq z*pcw&)x3^5=b9kH4#$&kA?4_?=_F4lM41C&wpb-&V4E#{pJ&v3uQsCe3p4YNAJ;I# zYfB_yOKjY%>Ny7cs$6jeaQ-7U}QXr@|=W#uEVZW?ZGR=D>1A9f#zk3-L=9?)Jb>B z`8Omr14GSAehxfHojeA4)q;=$eK&4Ao=>Cfhs)mvwFahCRjpdJ6r%}sFselqAhyJ> z@s7`AI}V@M9LPzMLJAVf%T^`S=P5g(9mPWMmwXilogiX9MoQ_9>Psl)BX^xCzu6Zc zR`K%|c(Zd#`Qi(u89I`_WQ8>6;^gL)aToHjEUSZAmh)r})r7q9e41xaf_lhS-BvA`4M6k)mzQG`&sUSg6Ntc_Eq9 zw|CsDNjUq1DzU2Oa!&c_i8pIq!7m0_nGzEGAfV3|Tr4XL+Qb=~{e~$NHn%*4`e*xZ zz1y>|8};Rx6>6-|`vhO#deR>Sgvd|1&mFSMaX==S=(ASSXAS>@V|RV?%6f3+&k7_< zz*{Z{yObzh}jQa*ECQ+H!fYnP<|E?;Go!)?s~ehVeZH-`22~DVH|BH>l46tLf+~^KO6xw2Uu-O4h+~ zAcjeC%veEl5vn=<(Sr&4fr6Wwy%+kP-Lc%(!whcOK6lf%YhX`$@zK4i8?&kMlFu`t z6>AwBO@j~X?cFc^Si_AQrjkeC6R%yrsjst1dEIOJ3^I39-hT+@ux4<&b6mZd5mNSk4&Dh*KFkAA3ecrg**p8 z{%{F7u`)Z-LtNG6%7sO}IYairzpR2>;kLh?B2a!4&TE>eq^1BV`6rFmZ1KjeMU#|j zq&(^iKRD+0AqHyX^d~eJ(c6qYW$As73$uDPtoM-^3ztS<`8rI{z5w5K0>7Fr$lpcV zVYx3$4X_F%>G#6kzkd7?s+zb(Xs~G+TEyIpb@a6(Xp8alE&qm{nsEZs7Zv;v->G)O zeGSO!xA(yIiID`B+rnHVe6Kv)Q-5@y8aa1Dn zK6Qu5a%Zx8Bs?T=~b8MU&!8pB&*eft-)XnHX zAARP6 zOCJh}(-Q#RAFV`$UA*f4s3%-ZvnR(B@8b76$;-;#NkZ2{5^1K;$6eRMy@e_66bpen zGWE$Og_cv&ai#OjGK^_!{uj!#FVW5Vmx@{9TDogZoC?#D6ro*@RZP~c1(-T*4^l&4 z12M&j_%_nv#6NKfu*4X3X)W3c#wjtNA9p`}*;4^&qli9*CZuQvCUei@BKX_LvGZ}P zDFVlIhpL@LbTXn8j+RfUIyiEt`D<1)h5At*GHEM) zjLW=*ahKP4bWv_heQEUv&-zR+=Ghcc^Y`r$pGY4DGZ>>0GPA7kuA#lCVC;^(vjCdr z*hqj#jJfLIC__l^3F@lTiF-)``o%5@c?9*+d!$H0+jY>+G-)IEvT*c#`f-k%UuHXK zr_7Vh^CIw=HjT|>`0qs2hvP6VAo$AskM`nc93vU0qG_YSDj+hR9L5gI;{57 zsyY0kRVe?4nD&cTG7@E~i!tiwyTPaC@nGB+)RL;s)SaQ4Wm+paiehbs9J&wC6{!MJ zEW-UlGv0+@XNh9yW%=ks%yCtHNovb#HIk-*X%#w%;mP5dWNU-P0z)#O#Z!$-I*La9Ezu6A4*W}f$gzVoP^w7T8;7JJCZ z%!|1N21p%h2j?AhecLwCLYvK5Bti1%$mvf18?^4*W8d)^2Ms}TRJsqZ-N6ME^;EkC z+ES4jgj*SYF$f?jhXADo`h!iR+S9NiIbSKF%~jWRkVoCsW$&j*+vp` zgPTWwX>^B<1iq@R$~e8wkU**q7=AzFcbbL2lE;huF+9l~rp=|1pV%PE15P?#2iGqG zVt0MY>J*eezy*h#GsAig^7oE;80e*?KG5b#Vd456bfq}0J1|}qvTFf~478|jKGEtn z0P_LYv~x~IWeZ`JH2Q1R06r8u&`Xcdiyc@dOMx~KW6TgQHtzzhQsWVLj7TB2<77RK zau9Du1j<%S5l?%+H--Df>}2hsLg?%!Q;X5!oWNiqJ;pD{pUr>^#kzEkXKJ-_Wh7(8 z`+N48X~anoK0NPt0}p_Tuw+YuY)bNTxseB{w8-om6M{@qqiv`dHUIe~DVodveF*(u zT`dkPw;wk%k&;PAT5uGM5EtXf^0fJ4((RI_Y2@&{GU${qSoY@r9k8CuMrIzP))^scj}VGBrcMaMiDYZBv8!6dGz)Jy$f{bq2VF-9wPVb%bdoH{$>#-tCg^Zba1 zGwUd4P3fZ?}%gF8b}K{NcA-?bmx-sRco) zYTp=PLdG}v8X$;oUOEz~Id8#oPQkeJ$zx}}2~(!tMqWfz&_ z8q@YNUI_k3GgSj>3s4^6UqM#Pj25f1d8WZ4Ja!#IYnt9G!++B<<R>%gWRM}kNNkcL9WIQkQ`lBl57n$v&y*<+4# zvF5e)#}e}z*W`6n*qhrNG16Z zpBq&O62gb`u@8>~rsN3;wo3qv4beA>)sIerZ4(Qz_JY^_@qHch*8R$=2ndWJ@pH z{))1-Xf);lf~*}KD>CWW#NInnJnT08U&Ev}@YFvt9AF^9z)?*Jp6qiC(NS^-ra1}E z>n<{$_ZGF2SWQy;trxJkn<8s+V$lYV&dXIPPj&Ih$m5&8e-<>7%gOmD*LB3oZe{SL<>uCW(%Et1%V_ zjgDxa3^FgBWwxB@uOhEp$$Ea+_x$*)Lz}?l#XZrMJv*Npkv|5X8@Lt!6Cr*>gWQ7M zZJsXva*gZ8G^6Cja=mio?3K8%Pm2kAA9bj1$8N?m(mBXs27c{9kjniGVdk-DW?Wx* zrfR+BaF_LtmOrGEmLEFDDr3gYV}6ar4)VPGyFjocPVy-M#D zn-vn87f|-wS}@?lQu!}s@&gUFWh}&Q7N^Y2@s^G^MxcnB^IHB}6=gm-!V%bG)sEA5 zTn}CRZcXy{$wt@Axt|eCG)z0Om?G15L$gq%9C}}@ZNN@(3w`iOf=UetXN}ErI(|MX zk$-t;jqP==SNZ;}v^lcb6rxwZ^fTX3 zB|pO1T!rFOjZ(;974d1=rtJr1BztRaQWq$gO*zS4$ zyG5eD!F6l;Wrs|pw(@h!2r@4{8_*HzKa2-0Nn!{y-)z*DY86t!dO+zOr;lvyp6GlB ziQE*8--?q;L*#XY94bp_hWP?_zZ%ND(27l8u<;jViU9DsC6^DaU8oOiaP=kHMtN5S z3){O>VHJA(4jCvm+mSn`KiP@A{Cq+V;cQ3=$JiHlN_!ed;n9;5=_vj^#-48xxPCCi zwd4g|W?>WNC|`FKY^TLXRVA?Oz($sX zO`&e4EkhprL28EhwZWjTlos6Mixwni4e8p|hi$N|bl%AwPiI!}6; z;97fvI1dbJNaQ9jI}@OLU)g)Z-j^htd`pw~aw9PRnChv7H6EMx4<&~(qxd_tm};?D z$hl>E)-t6=m6IT%YYHI+(MH-Xxl>gZ+3XJ&TAAr{Hc4};giW7xwzA(baXr{2^nu1S zX~n`g4ona26Rm?#bA@yjEpjtZ$W|vA&mSRUf@f?>CGZEzxd0QZO#8Kv3PuLI`k!nz z750Q;xJK}7qWCa$P5fW8M<60*r(+=K?DUtne!-MJGyRCOUC#>)xoMMvz_a-b80CjIVw8WE&bqp*kKM5Qu64k0yFL* zCUM+L$)GKUJ_Cxe#3xp0J0B_A!+<}aDH`0LSu;E23b{IH#dg=XqRX+gVg@@fAg%&f2#v{7(T|Tu9)4sMRsxPdpR6ayszZ03 zyTHq9VlV0QP!QQ-YZ~}&ubtVq%iiNfifi-RvdErwRY2L*aBuLH&RFS&ui`>PRqdv2 z?~zB`kRdSBPB%;1@6Y>v0M@URyjs=$7s9Qn#+okdHtfdJtIq%ZH~jNk|K?`<(^mV( z&EkP3^`j4QnZ3hvC3O?G&QBT~IDTfkZC&_W8K*-V-J@4jIc~YKp`N8PLt5p5hL-bI z*+MShF^}u9v0w}Bt3}@T40Z5Tt>TK#if8%PA5N^GmkE zs=OXbcxLLWw2mV6=)y#e+XN%wi$mHWQY;YA*nwzl?}bT!C$cv7ys|!AFx4BOeQ{8f z;1S&z$pHc#V3KmJX=wX>k8chY^V_>8MING))1_nc>qp2IwZXSdZJ5w`7$+VV==byj zT1LG$79!1St0}oM4up=gquxo(hIJ0W%>8*kP<#$mB{(0AmIlYmMQ&13e~s(noQ4AT z5-V4u4=|@lO}XOlm25I+npbJz04OgYz&z1$70CyM6&buk;@y6~Zb;@(^P;O7L6nof zcZdG?50TBo5Z#3&yq6ONbu#`9IQdRi$$An&IIW!j2Z&Orhe2fy*E#?j*^~Gg&JO9? z9i0#%T}hu$sK>qx*}BfO7_NY9J_&#h&oHd-knw_8BB!*5&=*){a)njk2+zn#<;8s1 ze5ggGula4~y}~cwYTWcyJziN2S3FsH9%Eu-MPhOe*M$(eT>XARqR8)so1 zpbV?bABk$@!C@F=YbCua?UUreurp2VPAr9Nx-2 zZv7V@^k+EFKR?TxFv3{!S*#iJ;=FO^u0n?|ixKzB7?M|y zNs$KQp5gFqMVEsL9&08Mt^wiEI{9Uk69$BB_1lboJ9`# zI2y1)_Jau&e39~g4+OHyx$i=XMbS89?W`;tgmF%l2Nb?YamHIZTGbPp#$%>Y7j4)x zj2*k7y@Cw1uY>nXRHv2xo;-<#%a78?k&D}s{yTgk@ z`YKPkQaLYZ_kM~=EH4C#LD0cHcYZ$Q2AsU)9TB~ju(UOU!N6IVJJ$2&?#7 zj*4b%G{PV6v!lb4>jqcXg${|*n#vMDnD2M-;0n(Pu%($bD4h0d+xZWt+}y~@F>PIP z`<^NP$$&B%(rhA#LogaTPV5}N;7|eV0ao&8sXfm!t^)7L^G6a$6o8P%ohwe#+wYGqfoPj0mf`kx!N4Jm6+C$i?+3(m$1wX z;LMq(#AY0c;xN&WOGE`7V zsa`}f0`1PvW)MtPYdXamysLfDdmM#4F)qQcZBhVta|Xe(P=OW1Kwi%s&# z6H@;<>`kz$PerjF=-uwqdXgo~KKrKhwk|JZD&1d@IaWkqU)}pp%oU3czmPT{&8ruH z*Bqw&B2o1c|4BsuJNOlfG%}q^OVY`KLTx&93wRwQ{=q;1qdhsVx*Tcf-m~Je0t!5e zfJjeAAsXONPR4DOVmsE4RmyvgOf8rnkMey!8ah^dvOpE{K#AuO!LFzIFj;(b5j3uU zYAEy({fl#-^?CnyKybC#mzYY-CgdEfIK5V}sHn|9Y%n?dVpJ#?W!L@-C3r#bb&V(@ zCSbQ(+b2vA{YfArQ>|et3Eu3!?;=c`2N03Y}uvmDEuo2SG_ALa$K1W^E`B z-Pa>|4v%}g+S*xTt)T3#n2u|V+A1)TiL%n)im8!_yir|wOGK^TTktu8ATI>bfi`E$ zFyf^6CjGT-Mg$V>B{?T1e6i_4JvJK z6k$%mFzW=SpQXE}NTD;*E;Q%^DjI7d6(3Kwk#**9&(z4v1b*p0D4e=< zjQkRpMy_wIQZN!wa`GtrRu9%45OS+`PYb!Lbbv%kSp?Prjcd^svc~0P`YSgRq9CNY zdN#k+uywb1IS>~2FfV~%YNU7X;2tNnn+Gl`v76_ouQy+J^THJ?7f+|LIqUbeHkLXP zRPB**h9;MPrT-1LwG^lsc>v0hfe>se!=G{2#8j9dfE!yQg&{0)x5v3_Qh3)Sr91Yl zVN8chl!xDE+)Z@NTmB*%<8%;y{&>{|+ zAeP#(oIZVd7+bBf{5Bt9)tfX-fRQa*$WD9WVL9!!QS()Kh5$pH8c4fb{fZ}9Cnvh= zDGqJ@ux5^=%ZxufBQjNkrtzB5c50gYt8Z?~G+zG4Fi{g)3fCOz%^nw|)4z$#{-T&2 z-q1J0N#vlW4AWbbf^a^=5$jyKY4}Q7d;PcTx8}gei%0H=h`DCf($JU_A$rBA_&xlb ziwAQU1WZ_x{ohTcLO|@{8sgA%1p3wE+~z(aHsX;(=G%#GmGMkuK&WFHjknscu^<@D zC6VXuqe?Raq=C_Ot!5yAYF|X;7j?^-MyA;_wj9IGj=}Q~qj}qOZ}8T949-hzF3XT$F@VkVG-v2W!Cp$89>t;I)E`E| zHSf0b8wojl&PnzTD%q>jWL72_ltiLma>X~7_iV`k?i+O$RA(#+||TJ#Nsw$ z?7GYvp@lIgAF6mf(VfJjc!Z@VR_cCVc*GPH*S#UFCDbff36_1@klwfd(SzYo?vXs_ zuWrF*=sFTG>s~L6sSb@SzG$CXN5ap$0%&Pk1JA7RH<8r^sWBpyzFAMY^l6SmG#L6=D zSYho(`?^<7rz*v6J^m9&YT*@cCl5tit||Ymg>*#MvkXao_xb4TXytL|-5XpIkp7wi zL{{Suird&Eh#HIxdkI8q9-)bdPPEz27u&|ujYKEu$yOk=h^G?$!ukgP`k`uf4q}Vn z3=GEK?jd9)i73%#2FkciXMelNM&8C|*i+Nnp*0S3zbd%d1Tv=0Gi zfV3YbH#5EaPEmnepJ;v#%EoVEYT_mRpN@C1stVf(KfNR6uNF>OO@FUh3Z}{}Y z9m*Y&m@Axpvr!YuoTfrNV9qvU^G71eK$+h~P2R6gLy|s1244qIF6XvqhVnmwyArll zQ2F(YLuUV@d7fwwkV;Y)HG+Af{%j3?gE!L@R(Dv^K`-JRf#*8|nKaa?67W>6S)vFd zH$|F8cZS33><2B6HORj8x)BzK5bZ=ClLTG0t@ zZ#{k8+=sP%Dhx&iMn$6k_Q~HdmYjeh0}abVZNA!~VuT*4)%Zu9XgL^*2~k;WljoX= z#mh3x7H?VQBAP*0rJHjM;&>twDdu=rSI#s$J95VeG3V`n)uAQPxNHWnILv9ropC1? zJhyFAP{?uZ>>H_oCVBs(2eL4KlBF#@tB7 z3!Z6NFpsdR3=8Hyi<>g^^CO75!su!0DF%{oW?4Y1FJ!UC$F|LLpZG^E zLvLC#nqO{hTBY;^p4YsXAJ{xP@(}u2khDka{*AM4D6;YHxqjr&#h*sg!P^+Q++B%ldZE5xkc z8F{x;W;Rn2-Aj|)RW_wPsrNUgiClg|@E%v23{7egD+C05*wCDdIaV#8wro8l}H@Q6l4SHg{2IanwJT+|klN~?LcZUC*CI0hT9f9br2@=x$ z&spI=uUFOLe-oZ<&_>DLf!WU?gs_pI4WJyPiw>XL{W6o?=C58$ua3q5m~-k3I4rSd zz%Vk+MCb(H9~74hS%K2-?oxkvgWh@PWrlL zkI?DBmN#T7#whM(X1*33r?e0oq2u~g$$wMS-*2GW+8=>6!<2&4uczz}6o@aKx-=jZvVmF5g34{c1Jd}!Dy)uV0gC(qE?jK7Bas3fh0 zZ{Z_ll?aVMw$kTDNAGxdLfDvxE?2TpohYJ^h8Uhe`f0vOVk&CS0bYNj>5=2s}JG)B4% z=u6T^)>Ql{;-c6r#d;cDJ+3@z9>TA<7(kwVzAs*lwqph~o{#dKTqHm5V(?h7HGSgE zbW#0_6Z+}Fk8~Go_1Ag4lacRTlQf>5aF+|Vi-&&b-kYy_&b?HXXFJUN z>GV_HG)>v~Kf0bHXT*8cMYN{J-6%=rM>~i}@a*7=lASQ_lK1}~=m7p-(y@prx-{Qq zC@8PCYfEt7bxCh&j8A3p#J{lP_W$7<;zF1Imv2P(V(8m1JRC3M2QzLSs5R-=*=ZlO zkLl#u*XLF(XfSS=+h~K$sofzjB0dom=T@CIGCmRW6f^V{hI31b=n!zNOaq!rgdDgB z6Ns`%zT9t|h%Lv0U9HyI?d}H>`2Cn|2LzW>gfsTrka@vXoRReQ@pkPT;46pNktDBg z2l}zX=)xX-N%Y!jMqA!02Gia?k-mNi+8mOxDJyiJZFkGIXTfYu#N6u zdc3VIXfqS?k6CWm2SIO8BLHcPjIxkVGcE9>^YJ4dxa!?xiQuaM;q}4h3Ve)_ayNhW z9(p(rJ15Cx*vd?PfnYxjYFKemy$Qy$K&I&^d&(>j#6CDk;ShxE(n7NG>7@3Jv4CQp z8N`;m;o8rzRqR}`hQSUkuj)98(#{|r#zadDp_kFOA2}Q+`YGTO14M>+{<6a06msr7 z4Ru;dO^!D;qtM1JQNcaF>fni0xsYz5-p@+%_9lGo))hPyDK58k#ln`cQacc$jUFu{EsRf+5bR^Tec}IM@7W^&fhvqp(rn~4 zDX0|p>$e(gviPi}5rA0U9GoO;rjX=R(vGLXMA)cDR$-Mj1QLcAV2m(z)q&v760P+V z(4Yzrpy-g#4#YHFV%vNTPN09>eJm7`eW@$>&lJ6BC*7)orzQSc%X4jxlGDYAFV~pp zG0cUUjdKpCzfP8U;TNtMWXZr@8S(<>r@Ky06tph5^bP=qH9v^9xG#3j8`gCdnArdfId@P)T^qlIEO;EZ#-+n?KAX8#7bS z52gY_uN4EqnX`YMVCMNj*{)=v7CDU8>Qg}2H01vmg{s9_UTWl0{H8``<%@sDao*4zCE?JFUMI>`!ZPKU;UE-luE8dpIv}rtI z*#p;qk>!8lm5oT9?R-zOUj0D)3>L24{}|7%87%c3i-TNa(`NDB^er3xK`M-1@ybEW zx5z|nG{HqEoEH`8B$+%lw88`}Wwb4Tq&trCp4sAQR4PR6rLsUSk?p)}=)moUQE-+A` z49isptC22Iy0pBT9By{5_II2L`da%ubK8dO;?k0+!Pa*1 z)}yFbvE@Z1bE?h7+P_3H|EhC-C3Uc^*x5uP#pj>&TCt4WXg&}K8E_GL`??XK{kn-T zA?|HFB*+CF;(4}eci3FfihY-*w|kS(ii!RB!Uq3-Z;|}y?qTJ1tom>xt9m;*m_IKm z6fn~~=R{)@*+BR(8Ky(pvQma+6>F=1<1Uh)-GO$vWWENiPqfxpLX6+K^{?HevTf*4 zpW~*Xim4S_v*8i&=EU*8O1Q@k>_Bs47j~V2G= ziQaMg!~ll5+N%vdEd^rtHNtnIkV@z+YtBkUyUj8xvls{<^92y_EI%`Z=k< zJcqAA_Paa;jc)O@Lcn(#s#Nmq_7cxe?d!K$ zAsk_LWuc^1ce_6e$R36pToSbkzukvBu5a|66F+ZrmeL#$`-x%V_9VIII1VOSM1{4p zE1`Y#bY8#qd2qyhz!}M&hFQ7k_<6jIF;)PEFNJTsJ!qwWUsD3EFhBNF`ArwB6F7+8 zktJaBb=@d3Om9@UjQ$C*4evrfv(Hp?cELn``brI2abJ}{;2-9Cwuy^lEa2G1beGz* zoc*qa=Yy;Ywa6YKM@gnueq)req|teW=$J=(oA|K!yQI8|5Axns_V-mD!tTh$rWOS9 zAv*K43%Rbtu^tqdF3nT1$NQJ+lk%u&-StNqu@cXDleRT&nQe!(%m3=cLFo0p!z%;- zfJ%|9j#=ejx_V9F77Xi4{{3VvsyVjc*3#3)149IeIqgc5)=Fh{P1gzawG<^y^*JX- z#1Lmgw?A^Y`sxPy^$QwJnzj_SWf*|>=S`ay6A>LmDx8))tX8FGL7EWON^H&PPA09g zbeo!Vo@+j;%B*ZrP{x>8#?0b%6Vp7R&VZ-LxhkM^t&-{%LpjtL$Vz{Tls;V3scCCu z#%F`fo*xks!grjl=T$Ovwt2tkj&1su%Vugzjq0|OjhSAPyAMj`aqDr5cmn=!mdaSO zq%xUgu!MiYWVxawSjl}9ve=Z--O7e}W^5%yryevq8fb561-xpdq69N(HtZbGV@^ni z8QZ33(TZQ1H`L4B(2e?qZSqlhmEG@Z95ige!XDNd0(@WR6qpGWyG8$&2}P4K^TJp0?U9HM*# z&>W<=h@M|t+Pl;VzszndtOk5GqT<0v29(n_;FBFs0#%ZyHK0EG!>glR<%Yu7QMAh3 zWPz9Sw#&}4@^cxV)PF_(FKi+@{)N6ON%m*nB?gZ#?FA*6+`lnJ(71sL-L7WOmE@%8 zC;V#Gh^Az4Y#CLvmY*qbuR`86?vQ$YzmDeDS#RANjNeqJcCxu=AYGa}RY=|$D$w8t zN>NQ604|T%O-HN-jB7=CXE;fbYsFAHQ(ZU454fPL+;o-#$BBKs!FINqygLA}vIKs#5Gj&1z~$ibMl`BJfN$8*||dqrb5D z{m}9V{n$mYiI8ci_u0Zf#95jHGL6qz@Q3}O%9l-vD~;Ex|Zl=v1);@;8byy-lk-=W|4)Rm$Kj_e_G@JOun zbkX?>GGeH}+Joe%5Ir!}Ad{LaS&Q_$0Z|0*BsAD|v2xX?g^=%@Ek7~~4c}2IX-UCf zL3vT>81hNWio`oVWWSBOB~?e4A{EdKe)RI3uUozYL^1h=@nMEc$*%GYVVM|EcOf((Nl!Z!pL5r!(DYc5;pJNka@)QLg2>fvef_Otx&of?KQ@3dzdlER26e){je2XM38JWqMf=H4N)f$qLiXrkrY$l4!LmYLp#1N73Q)3y$xw-IsZk zXAdT0UKvW}eT)_6{VnNm**Y-v3K*+B0wm7k1!!BiVm{(S8$ zb_mvLpYm{YZ@n5YCm}|_*vGXRLwW^at92EJ4LLx%jj`-rhsD~54Vx2S)L^H+ z9~{iEF^|-Nm8R}0*`W7Rt$MZCp%1-*npZ`bfkio|9UbQy^aWitQYm)h5%XAJB9yL{I4sSH=)e>u&4JJK3LBK8(Blk- zrvrLf&=AlguCbPn(J9f*fBFCl@lMlT=7X2;r91c{(m!{F=-5p>@-Y$GO^Q?X%iyPL zTwTH?om}Rh?mYB<{9N%J9wv4a;c6vJrUY!Wzeu^KwE?6p1jcAZ6nTvd9vNQe4z3?- zly1DIh^ACl@0jg=T4Zs%C=;}-Qq#uZ5cIn!(Pw~7d2eOGu3KTCUff%UM#JTc1TAer z5pVl8l$A`bWq@Z!e3!~_m~plmRY;U?3yG8P!J(2gJnh2JgqDQNMv|vG>9;TK#}ytJ zg>X+Pa^2_lj5qpSh3*?ka_h^w*e6u)p5iERo9Na(w}@Lzgeuhm15jUaa}`L`M$!&l za=Cz3?pER=_|w~5Fh_Ktsi_IoNj?@EY504tNq(vd4sZ{0F&QJ!2zz1NA6bMY*r-aPOwquzP1 zlFSy{f3A+1`Mvm4N?RP!r9>#aQ7driwU;fPeC3T0>{O-r7fR#C$ z;AElZaBx3?l9-E=T%>C>hjY#U&)&5*x2+@Df4_oJOcndDOG$mAq1pN zfAd9kIJkzF*p-gJ>9T9Zr#vQArtS~MWffBZjmE88!=41w3MQJ-OhnTCk!kt5qvBjX zuER@+>#((Wq5;Q9wap5Pn)vM!pCrVGsg)F>IS3UyU$tMW`+?E z%bA!%@J$XqT5a3ZTPcYF!IDP3^!1rEf4ztS>aZeACTptUnl5i1sT#=ovC?qfXRGk%)lDBDIHQw$;ehya2X4nFNzb#ww}Q}X#3{K@O)e`8lZRk zUlPa2(g&DN!LhjGDmj7zJ+aess{$Fr5o+!q#qSLZ2l!-cf(e+|Y~I#pdeRg5e=1JT z-P&A|dQ7jOed2Qnrs2%oP#7^JrywzWC5jf(NxtnX!?#d_XWLigwJN>dil9;4NwkPd z9^1NTJI8i-bf22t>*i&{C&3jm>S`|rSICS3n(cN!Eief1)bYhvS3kG{g$O#b)RVz2 zO$E9$r-0hmIvtts@B50T_E58&f6W~@64?G;>&aTXAMNi|ggi++Yq!+!3J&^r4g#ML z-}M0U=uZm~GLl9C0$19O(T3_b^^HrcZnL2eJ?Q! zD5^{)%=$GX^tpR5OQY}v|EO3VKD9avmptHmzj&SIE&E%#Q9>0s^7R>2e-CVOf&mB# zuhZDNB_d!9k!r!<%4T@zB!*Ew%~F z;_Dl}9r{fv+~H#*Fr_BgDKMp8x=>&WI6eoBC_qHmDMzr!1f~c~S`<@Dx))xnFvj{} z!aYzaf=gP0nYwm?(eYqreaLxUEr#B)18S`+>jm6i@4YvC7WHIr3ajy3S9{C^(s4v* z{`qc)m9?zpIm8|Bf2Z)z*Tgq^MaOg9Y1k`8x@@535rVS*GT0PIGmU0T>DAk(bw)FB zs#>powy;ktY@hz>1jP8qBLeXQXH#SqChFQ?6YM%f2ua;Sf&|#@8|>bWa}}9kn?h_Q zjaLy8SpCpP;c}>#kzQ4oLu_N_g+_n!3}h`L!+xp@|LTF`f1m5@kq}@fzz$_4kA9Mr zvG$@w_7wCT)_<8~wU1jo4Y_6dW)@sM+K5*j$Z(P|v3B8mA<&BgXV`gEaiS z0E}bImQ50o^4fe@>Fz6Nq|Pc(IOXf50)?My-+n5@ne8#yFJH?b)&pO2`3aegE}0p# z2hf2`O*f;=e-b=f5t0|bT*Qv#Wq}IEd<}DT1>o%)@ZOALe(*M8N`zZpxaFyR5r@g9 zE8>PRw2oztcoqc6q>jeCAkci@XkMcEdPCz$veZYyXw5T4F)AP*E8@CeGFt2GjSvp` zdU+$RnBG}bEg2nRt-yEsCuF5U&GmZK3sc3_bi`XRXd6QIU zpO-*Rft=eR=bQ2DQ$7>n+!v;>FoiFvrIAWG7tZ}d(??g;>y;R$2N2QOc?)gT5 z`$Gn{#DB%?%{-d_LZoGNKHC_hts=e|{yz(9eX0zF?KPkfan zI?D3}y&dO1_;iGGUpV)Lb6+_3%dZVt9-!~n(Y4mOzeuPf{QK<0qlD6;b@p}$|9-vf z^{T>NmRjcs?&e?gnftfXf1dv9^taPY+@D9c-^B@a@?r{0I<6RvTe|(cTl0?~`;&Lo ze-?Qa(>_EFg4mmhy_wjXiM<)s%{U8%ieIni*Y3@3oryWrH%6N)w4AApy9SMK#yq%k=0xt(1iqVC)93NQnhY_Ah1W4I!a$t*sJvme|!@e z;*Ne#Zc7S+fQGn5k=vT4(QiTLqF2#)@=t~{BlKd>@Qh*5pkJZ?y!8WqYjg|!*mfw( z3~wKAm3u+kKYFHr<@FfJ69hMC_}~-qj0W)n>)NrXDx&!Q;QP6jO@HjHF zHGx(eFqt>!=w(9ZKlv|Dw^e~=d8f2#SX=FLAJ z6L24GF8rlWPfsnZb2>#p6g@kCa?gg29T;|S{^(kcKJw2MNp7D@$~j#RXIRO6*w5G; zW@FTHN8^1oIvlij1e41Q&<46BK6^jO?V}%wcIt7qCnBXDd& z<0>DmK(~WUQY6xCK;M9|e>9LXfk#|&EfMOKSa;YzKXFaNKFR8j9?(0~ANqr3kRF>! zOnYP>i&YkT(XBK5oyJLh@&Lw6xu6BmOWCKWqTyc(tpU)ds)_24M-R-}e|*rC_e%G} z)!EetRX)?Mde>(?^}T%dv2$~C{ZY|0RgwP$vb(Z&F>^+rM`S*;e{BB({~t6$jAg7b z0NI&2^c>$A2W;*^iJ3#=+;yG|&vi^pK}$N*<@vvqwkwy(= zN|LsnT`YGjQgWfb?04J!wx%$B*->%T$*4u=ZV3eniua#RfK)zVhS7;fVe$2rx)Y<7d`paO zhY;erCeVRmUmlV=?{ktQn0>fjwL>dlVF>6l5WUKn{>fBZ_W7h2w3yI$z%C1B+Mr?B zXa9NMApZXie;O4PQi1hB*9+H?bWanH4Rzh;mbh% zltBZ=R%mW596vFzud8Uy&mQ+DKIi#t8?v3)KKalS%gISGNhErAH_QA6!f)|U1ddZetr z35^nas5l~)VO-HO6sp3LBfXt=tmV#?N^RB36-}kIOu3>t?5qiQEA-bE(U4(iM9~PB zPA0;If1Lv5#;U|o7d44+(WGt%AvE~0p9mLO?n+W$K?DopJrUx9XbQGtTZjl%WLZTB z3=Ies5iV3`Y!djw*Mo>zv3P{^Ap;l6MpN!*sFz=(z;XJ)EGN3O7NogwaJgs}zJf@j z0zwM4A!H?D4w$EGDOUfbGoZ3pGi&96Jh^*be==n_I1g(q?pYvpnkxc1payo@i;LxP zgtNg);YIalNI58RSUDZTr+e5QRnT!M1X4S&EC(CPvbh9Gh$!bI(mNeo@fu}Yg6s0ayxWsMF+VT0E_7~W4GHO6C%*d`Ch zPq1%B3VHnS%(NYki!1%@iG;@de%^cDYnqxIO}N!u8a*6rq`5RO3)!4PUNfDd+jD93 z`aP+QV;FXD(r>qMC_~?z9Gqx94M!(9s-h<+GV*s-PPT#NwPLbWgG+;Ur9rYpf14r2 z1Qmd#2c}JeXV=(w-Jv-7epHxGU~OxcGXG0xdT}a*{H8B5Zj>bL@86r%mG_f53Z~7| zre)X@cHz6m&@{4C4Nz7qs99glFf@Ps$PB|eCwgwP4?AI!=CD*VMuo?Iyd>OwW*&y7 zloJR4?hOkHhs?mUVY(o-7&^!Vf3$t_WCl~hMikr-4C?(>N=QK4;_Wyxs8tafntXlz zAjNrp_{;=Xh`frEDIEC?xC3)%`!V2sV>zstbH`{qNT1ngrWVj5jl*)c#d|30pUm+$&m@f8!Q5UV%}Z zM>Tg%9s^HKJY!-5860VdvMu;%YLagJcFH`9#l8?DlKd8g(}448=}78_tw zRFY+e7x<7+H`ee8#P(?Es2V?aJyel5L6Nqh5Z8@;*p_X^W^Ly-hr%pIlsDANhBn+G zQ7AhuxkF^NTcn#%#DN)&e^QKg6Oy=H-6HK?TjrE${Wkm`S<19G=@v$zjoqEg1qLP}N_9JYkAR?Wx;TYxjJ9=57UqI^~ zZ}K0E;K=~p=Uk!e^<=J*6L4=@kG}cSc$dcQjrx+E?0Gb~C5;Fcf2)%smNRZiRW zcTUBVbk9>h+%`rQ6@2HeoxAdj7n3YhPToZN+evYXgX=D#C1bCeIYmX47NlVSE>apOH_9CfuMC1sTgN`4+0^Ohg`OV47t#FhR@Uvm0u@ca@U`xH&pWO!#U8sR`tph?ykzIRr2QTye80R{wXeTT)xNi5FW<84o4*R9 zdBKtV(llGS?K-BBB^;;q+WZTnxkg4acK!5VndOW;IUX!qj=LMft}t)cZcL%KaYn|? zK5>Lp6SGes;l7dZ?U=W}-5aiMZ49%Sh(HD?f5IhvL>HpKbYb06Z}*zb|Et`UmFKcJ zqD%O}n3y*82y+9MhDdTFcRTPEHm0@i*9dHMYF{sq)6htAs1N-(4ydF!kId8Jen??62&1qFnwbOPc`~dfS)jj=g ze`R!D$5OLe_^tu83Jb-G}mW_voKj=M5Ui)5REA*=ZbdoUR@WgB|baAYJCyYUltR28Izyf{c#>Zm9c!qZbg;XsC{ zprL4eMe5=PR<@z9_WHacHaaQFHtcBZgXPR;^ah1>V=kBNVdEs?t;xx5tt91^f5aGj z`i_)BC5d$)wc2YKYAM+U=c$mpT1R1YCor5=T2Fk2#P&>HZqXeWOPm>4bv7K(_@#gz zK-{Izix5yrQ+77uj0q+5X{F!Gx2$2=ftT8yi2#b`OsW?pjmw@E=P{aoVh>~~J*U>& zz(;_U#Bm&Dl4E-?sLcmz+)_u6Z@1ju>*ND_Ci}9 z!D412yrhO-#MqF(kMZ+)BLj8g9GBR{u;GU>4p4IF7()RA!1iY#Uass!f7)==Gsag* zYT@PCegG3VoWC0iC|(xfvP!1^gA)YKZ2byYrkZ4>3BGv-s#YePiR_9zdMNtv$JJH8 zqg?l{&aPzb|`l|Q-TD|$Rfv08dV(1x&m3QpVU<998wts;S z2aRbExEJT=jIToXTQhU$e>uK04q8KJcJ7Q(CCoT?ohQR{9n%iZ6-k!PXCNAAa5sny zK0&o3xBJZBz?mIkuO+R;&|774Y(^0F_L&;5ixdjCp zOq9Bu&?f`?)(_}~tK(tZ_RU85PF@h}(Hk^Odt{73eh`2d10^&9E8WA|9_*U1WbBLLN;f3gu3Hn;)it)e`=O%Z21J4)xav zlZl-mMQ{NZr6Vj-6p=_|o;}a`&VT&p^E8m2yg2cr@Oq$J$^*#@htbFnAFl_0`TFU? z9!N>*h9ftKLhpL;-Ae}ldGkO2$A8R!u99FR;p#BCa<2!IG@V_&f1eB|-qcMlqnQ`t zZ{sMQx+#8*AK#B+_Zvo?2Jcl_Hs4QOKOAJ&$Lm}ljmQ4b`w$K1Q!h;UAh8#?DQ2Eb z{8^HZHd|-3S?nbkhFx8o(hZ($_$3(8Ycl&9dmgRa=uyqyCh(W7w9B`_r?khGAJYyJ7JR?$s<#(jUEO zD$$$kK}=Jm`3-2b+^3(Dl-@j!vMq5(e;-KWX#nDUasx>*WSee@0kiMoT|OjTm_&g; z`s4=z`-SM(yANUs*Ip0Y;Sj{4usa3w>7Swz{~VMoXWNC3X=m8i?fheZ+=6igjl^zU zyGamsxoqdMz27Z$_+cOnqz7g{7UEw@rs;j)xzHY&m`!iK%%@W~_W$WgAKcU>+XXfbezKu^P%X7%bSnji2@YFSIQP-?!4J8*k-m6o{|o_&<=)I! z=z_cGSz?-@U;~LfBlxYEF;)$+ycB`Z4O54&LavITnHr0K22}aMvK-sw2AE=)j%hMA zyuz2M#U7f*mwOdP6iuMdOBX!%>k0?+TkIlUmHcDw#@?XfZj$Yg%x>o?21)tY+$E(y zEP9^HB_NE-7F)xM;mN|WtJ)lFWCX-QDg8e4#_kYtt$W1QqQFfC(##E`1fR&Nd?%Z- zgI877WsQD+;;()>oP2Vpen93c%-rF`jS~-HntTyq2+?F!hHyTyv?W7^i(ps=t*D%URay@%S+gp zZ%^}lyWK3Mephu{G2GLlu3#h_G)VA{e}*EH9V6H0YqJ-WDygIYRd5g#U&kFRSvA|c zY)o?2KgBp0+H|~ZoVxt(Bt;r5kQ3J_D*T=(jdc9)9XG7G$_Fq_>NL_>HXf2NLawP! zZ_OKj>K#tRic_9e1AM~?!Iq4bPMg*ZJ%|dOu%-l}-3Lv%(C_T~3tPKYE;8jXI+Gm`N8LKe`}AI4DFTRwO8X7R!Dle;Xt$HoyKWw;V16Hi1 z+Sw7Hq1&d;V<)rXFg;se5(3#4x?H~%v3Rn^e-+Y9UCNAp70boJs+*6j=1xchMLeEOi=Yq$SqEKt3SeL#a} zH~TU6kzPg5;Exb@mvA%H!cJz#UOV_0w2!CB?~}&T1ka;c3PO5p4ny{{k?g^L7QGAMGa>6=%b@ zTph1?W@6D3lu0>H($uSRNzW616xqi`UZk0SOazc;XPmXTS`^u0X_jWw?s92_5FwuA zM}|jk*a#uA)aK($BZLUEjdO*un4`Wg)gy$65F+Q|I0+$gSRq2PgyJlCt;BAszvDgC zY1LzM&EKgIt^5WLoFS@)5ChetS@l>QbCz49Zp9>65b*S4b-JvgRz=BwYR0w>)u||E zF%*PW>zVkWD_lB$j@ULb^#YX`32dAriSxC zCd(@tx)N&Vk(2Z}uYs?9D|P+gV>n{T|ENevJvT@v*=gGvKmu!j6#KWM1gufs);CH( zxkBSblmO5|L2b*c@=*d&FJoclCXBv_I=3ie(G*?D%DODeFj0OFa#&W!BBPRpe*=uQ zgwo;@s$;31wL?pR5G4(y9 zbiI>4KGUiL2nm29z}Ja_fZo4NJWqP?g6P{1Kd?W53P#QayAQmVD_XDNOvFWv6roU#~uV|O!FX>{6vbqbRLIz3=pQ9Qz+l% zGB0q*@=PzWnC^5Ic*#}caaZN+Gz3AWOPP+(lXf8Dxny51-xcKY)CD|K^}7}D8cc{R zrd0}fU_+RHhJo5WhLohJnrY;~MrR=4SBXt#=&{7Eumin0;{dsroz%&u#`Xt0hw$JzG0Ziy)v`ZEZ~=a znoc7%%TW~t?J{^P3)w+hBg=j26?}uVB!lJX0dNDrt>`|&9KR~y#)cn*;uion9K*4k z0ehxh)wVO-{6*`E?*1z9Hu`*ip6A?eQY$?lNHUY3``Fb(JyjW&Cx)mcUKU6Q^oV;Z;5h_0Ge@k_!K_H%uOoF7Pz-qd zf<75z_O~?s2!az@kWz-6{?Z7^0X48OJV$?e@`Tvq^qGQG4J~+S%)W98=|@WJmv9KT zkKaM4PGL)sg)fXN$T(CR7r1a6El5AIp)Z?DNQnbu0}5wsX`kYATbfLA(8~sYrN$jY z_(gZ-EQDV+&OG-JzG|UT)gj1hGJm#?;9IJO2JuA%Ur~@Ohkp&ugFC$#@ku(h=jpKZ{Zys#d@=;xe(q8` z0T7M{q|;V`sgf=@MI!Zs7Io);N4WVp7Exan-Rg)BJ;c&{ZbR=70q^kfdfB91Q>FiJl)oq1pA2eVr&_J=-bs8v0qYDk3 zq@{R5xd*w=qdsGB(J6AuCZRb1)Zi ziu$z54+ehO|H%a`*@C3_xZ)4BB4&3R( zs58uOvqb-(J8*hSf7KPt>ZsQH@@Fr6Oed$e?EibqDIb)|FaOJ*MV$8! zgM6xhc&r`~xVwwq`6BW+jMkg3JYAF@*8onKv53L&?>?H&=D^&4CwzCHIU-&F_AEWc9M#eb zp*~IT1HeG#?ntnI$q3`|RE7F!tI`kNY{7n66^`WszrnBjRiEW_SjNNp0=-Zx6;0Ar zG8JRAsixb}DDNhJGUV+u8e+Sg`ZCqoSjpO5j&xa@Q$CmxSOK>wZ>XH}_ zxaJVmAod2KpMZVxP72?F3(*lMx&Gt^iFc~jcm+0Ib_^|xcfGU&1?^6;w<$?$n@&go$(lYE3?v3mq7Ure< zhJ!W%JN*Uehh6#-FoJRRWtoshCR4D~ zm!kNXcn6D;u@Jp%kbKz@$p(3AwLlgtm46k1EL~OrL~oi|GTzouHm0T_PQ|lLMIZ~g zkOFl+h?is2yuLuz8YmnmtrgaIK4Bg8;g9!a9dZYMW51OcJLZT!9E|-gh+v^USm^ZW zL@*Amz^@2h5yzBn1%8R8$@O4QXM_7CoY(A+?^mn}b~y;U^ku3D#;FFf3Lv98zEN@V z28sDPatrn=Q!-PHUiKfzLWWQ+kj0YHUPT}a30a0|ScT6&?7_RL|NP$D;Hv zGb?9*H3cFwv#%Zkk$Kg_Nqb-FGWYDcHCq8~X0~+iMs;70eXLeOU(cU!doxFAm-a%Y z6!+|g^|fNhN-_I&L(hj!_Pb7kI`-vt2r1G>6tpiX+M_(_#TiqP0$6j#7~ajRaK=#PDx)n#2*gg15{1=6=QvI;nVfeUZ!?7Xox>R^OcJU^|tUvzpwF?~?X ze!UECJ)Hxc?021Bvp04Q)X|qWa;$XH*Yo!%Pd4<%5+KmbuEM7;0Aiqb!(W~NjpmPF z9>Y`Mp4VprsFQBvIQk~RAA@LN-=j;|`w*9qys3_pu<=S*(r7%E010bZhNf8D2SF%* zRFO?q`(KvmTe^)JeqN$)$fj<*gl|f7V^bSYsMUS?IY~3w7boeDUNohTk;y*#1ob41 z1Alfs2!XdYkm4x)o1ad;Ox&4=?x5`Y#{}22D-vlIB{^tRafDlKub5krk4g0cg$i+3 zAm1*YlNM*i^&{jWQjDD~?$1(i=!W=zu$XHA^b42zKfzZ{hHl_t_Y~SDM27vq`|J&~ zCCQh8?G@jl*;SH$4?OyA5PtT?9JyCzi&5OQ`qX@#l8HO=_{Yn$kJ*4Uat#$Y8(=u` zrrv#?*321&O&U*@eV;D%#$!ysnAnPi%`YylV0Vtf^5UrpK&hp8Wh?BtM91si%5R8!j>GLB)7otM#oow;yWAtG?J+ae;^Vs0!V3`19C z3n0tv(Ad@-jb%d%5s}f?eEMg9$kHbl^e04w5D_WZgyen&=J2RQMA_dQQ2BS!v!pVO zUx){ZL!YWIE&<%{57<6Taz7!=e3kp$t<%GBzki_ zh#^#&epq5ggb^7LBrHiba)T)smwC%m>09i=#7O=zcVjX#3ROX*n#^v0=P3rs^f9kf zq(3ZbjpZt$9j12HPyuUS-$pusrgdQe#nF>L@)F4f)L8uJNr{(A(YPz>;2PTOsjkhE zuD2*a0@@xXow%6x%!^%s?nW?`UiD`4`H{lBP&|=UwfNd|dWj`i>ute9*_-4J7`TOX=gYJDmkya@Bb1RrbgtIl}^^j=!bh zwT>xLGy?gW%NdPJ6nHe`sca!~t?duHTHjF!A2JQbNgS<1-c{;lvR}(xW>| z>IA{X>xphKe0Z^cy?-Ro?NIU3BMdUEh(tWFyG`zkaT{Xl1Y`|~nk{GDW;6D*;IrWg zf=ge2jpwfw-MG!0;nF?!c%WAT*f9`~y%=VI^qc%nQhwi^9H`yCCmOA3^RW-N{2avn zCeYTk)vv1dT=qw_`D}@}5!klz9i6C!)^_TyTHBe9>F5@JZ*AA^L@A~!n>IiBId>7052Jt3?)pSH=)4Lzj8ZvE7KKKZT3QC~Rv z)s5ZOX4=Pp)U;LSCIdPqixPYytMZ);*a5t% zvM%HNm`-_Semb0da;JVk=gM@dGjU^_e9|#H7mBBU$p=IW^I^IRUTE_wyMe{3jjl)h zP=qA~{D|}Oi9ckAr*y>5viFKlevXC+Zims2F%H>z)>i9PJd=Pj3UD0Aa{AaiX&(F6 zgMWR{)mzp2aCdR{K~paDJNy2^)^3%HPv*yu_n%Z<*Hq=-16-}>SJP(OMlQf-Oy?@WLJQ^6iN+~9iA~=l&(R-x?`P3BFP=pR&AeBU<@kQ;`r&{l z?I_4ZK#r4zMnyzUTbFeyvT6W*7C9iGR~tu>9EG;bm#TtnSejz+6i(bYKb*|AuCmm0 zdd-)r@vt~X53v+DE}pt+av8;s??DEHd!znSc!0rSh)2M<9uU9!Ae6WAzSd9XQd(SOHs9d zX3xe=r%>J~w+%3_GB<|c}x7%)F;iT=2+4vof zmXWfB+*<4%!9pH0q238Xg|1*?q%~`Q|7F#l5$e`rTM%jyp{kiq+4|lKLfs9aYG<&J znM9?Ry@N)ojqWFKa6yu-uIafswCH{U57e*~mKM7MhgJb5q@tm4KsdC0yno#(q6+ z>#L7_IA%6Ea(B3o?Ky1xG-H!bf=!N@O+JY2ISMur+Jq4z!gm{n0#+bk6ulyT4d#iY zmaQVF=9EPMI#jePvhApj%;9kW{%NCk0uyybMZ4w5<^W_bm>SrKVm}Jtp&X8WTghh79YsN%0eoSZ02I;>@^;uN{h2 za=afIEqnpb#F3`X1kh;-Q*A0H%i%WUN|1Dhnw_B`fKM4OZjFWj z_Rx`V!F7}=qpc4s5vnk#tpW~#GpHt8_(yk!Y+1sfI|N@hl7b2rd1x>QcVv-BiB?F-D>DiMRS0Efe4|*tX#`=q|8iNpGU(YUPbXkowR0ieFHn+bR z0PbHYbDkbC1LbvpG(`jIE+0fHn0g#rbfVYMmUhx0d zz=v%ImR-Y@=o?tJ41=@)%VHf0D(F@{p*50CI6^9;hqJGq&=u8&Ks0$^YU}C=2jK+# z=d{m+ZX@IdI5)+}NdFa7PiTsoRi;pyQc+a#;>VD}NV3|0Nv|A{gOWGWXi+6tX5gic zzLWSCMHQVah?lEpw$9Fd$!=IS1+mfL#b3Sv#h9WF%oJw=5p*rR$7E&Cf zAGN?Z^0X{}YKC+g0%L1Ch(9JBCRat@F74IRSV)necGLpnp3RO?DKmmfGkjTuuwj-8 zI_DO~*7o$PT*$WbV=M%jY`?g_95-$9V_qJst)}9`dE;OVEJMHORD`Be-7y+6=Y(GN zUcaXyZ0lhADglSKHu27dV>7D+ zd@Mqy9YdzMR3uWU8jWXHzo}fx$8El!PAp2qTp1-XE9nR&&XLDlUya03g~VY#CgYR# zbln$!&y};-iwN2twZsus#S|gz*=ZD^N|^l>H}$yO_)KsKQz;i*v_1O|85gzRUwO#5 zEdDtG{@0-WOsYWH^)$9P2n|CnuP@ORUu_m;aS{0D z^;9~BUGOSW=}??38;)*UyQ)q$RMpaXTtwGR+jiK%?WuH*q7xzsas9dWb`x=tyfnCf ztYzHVt`zR_X}HT<+XG^wPh+D+Ttvi0e$Lxse$L-?Zk-pG1 zf~Q%sqZ@)K-U0<*)=_?8p>GR8UfYhXTMSIQJju{h)3Fq!)Dq;irXm{@sHujlXtd+(dm`&?38bpu`CBA+Wc;j@tsV=}I z_aLm_ypy7EyJ`1C5!Y{?l{Y`srdne?GJFtD#fgl9Tnv@WMt+>al0hy^mB=$A zmfBNb%yc;M!$&EOz9rH)j;39I*{Qron`+DM)m;*n!?sLaN~66@v%7{|InB%E``I~; zwqH6dr^7;hrN}S;ux7lkMrj9{bVogv8^ZgYZILXq8{4<1$?nM9?P<2%`M2Q{^)%02 z8MQr4x4ZN{(6TOb7#cXNnb8$q>7Lz6Uy}S|!&Vbyq{R*JNo=M%5c1Eh=MSmAn-C>A161y+#Qw zBR6#~rO#gKB8(J;!FL`)!Y>a|I*}l_lOFW3P^vo<|3azmLzoJsx)&Ywt|q-|CVEGa z(34g=b)_23Q!lP9`my@??9k^#UHzI;b%TIYf_O6SP3J>yge5w_gQRgW@QbtY||ezb(lBTn(_wSkS?WR9kt>o zn3|y*GK0sfxX2YhK{E}@Ja0(1(jll}+Ls@50vDL}0@I#sjKY<9h(_OkjhW$b0#N5$ zD;(h`9KQ5e0CMcePdEoaj_u5^6=y_cBl`S|sBA^QeGxW}iLyf?!!b~lO(t1sd(?`s zN#SUJDzi@TuuCHJXHbMq3(!MBT=#_EMnYFl^ZX4Hy?j&0( z%9HLIrfaJV-S2uAN9W!7>~KU(=lrDEnB})gh*JXDK;H98)}D&CL8exbxX(OD%iC=m*I0X4@cez3DUz9dMxmN zB)x3SNhUNNnFk#41~&j%hT{_BG3J%q*=%vrOGo z*wOb2yP4)l6GYh3y>g#e$^jx^qwE>n(ccHscpA9Z1LUDeiXpRGV!-UXWQ4^@`lA<3 zX@pgW#XQeRVRs5DC$_NfdA|tQzw_yTpT}b!@wD@gaf>2jimA@ZO<1;DwrJ`105N0? z5`Ol^h!;O!4^(!u;_7O?6`w~kaYr8i*jRc$HXw~WM%#m8>}-IxTrM7AF1}BfdgC!x zxwu$fH8CQG$`$O+QCMC)^}{G`8z4xFJM-@|x=Q%MlqrJl6^~j;aT?r50YluqV~}P+ z6D8cXrfu7{ZQJf?+j!cxZQDI<+qOAvYr3bm=iQCi@B6QmjisFf(td{wsrXK5fpQY7X^mbX3&mwYo%*PYX zkd?bLzcivjSw`^dG8WiWFg0KlZzfcS0F;thagG)ex*!><3^UV`iKF*%xav(fwG^|G z6B6l7ID}%w*hIb9SwsA=VftJg73jWH8)W}9Vw1?~-k z{t|Ddvw-ugk-#tn7tA7lkMSmqM9G^B6L16(XEcnkC9c>{O@gR?1j}U98!|MD?sRde zbFGETdIuxlH!rQ~*w{fLj7Cv~$G3rezu_yG4~2hSb@!7_hKbcdFS*Q%);VsB$Fa=G$ zOSn4Dq#_{GmmFp*fQw1ns$79(Qs_`V@==`+iX}TOGQDS>FCM$m><7*Vi09Hj-4QsO zVg`_4u9KvhB%xA?si}=R#uEzD=5tZU`~B>Lfjx^YB;*!-rKl{hsF0}A@|UYQ0>-Y* zBnsKn%OEbH65Kv+tM-G8*7uW8yLXz9yD!)F6k%KjDOAseRV&t??}<^MPOws8(yBZo zeqV~A0mjjj2xrkG-bX+J9K<`CuA6_?hu(DSD~3mCQPUX)(}WaX9}sx|SEs+@Ct4bg^D}2r~Eq&1{yt_i%zpj96GNFex=f8ot+XdaZ??2w7F>} znb6ZR92+j6MY=O14S*z7eHxT(r~m9b2P_@r!34R3rnAemTugg;T0w561!y`sxQwKr z#Uo;P9Nj-X)+x*64E>ftM?cOxL4F?L6w*cnNg1S z6Ex)1SxDJImP0mpHe|nfFx3LSz5P<@=CG9q59JO`MZWGIRKJnTysL9K?{yOviIzwd zo;9tBSR$kuaBxN3A5ma%gX*>v3|r|Q=w`PS6luXn17mXnC@%XHD7~2p$0BA(Y3;Nj ztSi}8VgpM7@P#AVuh0VuNPfbf^>qOzPV~c%qn8xnxo}67b)5HRz&9uDZm`@1Q@w8FofOIR(=wGwCJ;oz?dK1_2Tk<{ z!l_RCh4|ieq@~Y${ovL21m|x(5!t^F<~Yzph8z$CK<6oq27@yd=ah6#Nm!aNl}W%- z;=)I#b8~wC{*nI?ZfH>!8n(=3nxz)Ih3+(#I;oYrVCp0ToI7t6z zCwJuT7AwS;y}2hfttW%*OeGb6%KP$)I8T3N9xNMstEpO6hABqeyH`634{iNLm37q8 zN{S9}hA?S^ocuoQP2Fq>FKNC_A=&XVXKb-DhIx(6&u3T{J%79uMbJ%0kdOU!WjVcT z$8`)^`}PuYA5jbD?EmDq+CnDw5WVLQ+oM_M1Ooe4B&cul-QUa%Dh6$rfh2X&$Pafm zyii9u1GYX0Aik@2BweND*e3(4-wa=#B4h?gXrG;L6E(I~+FGA-p2y)##xHbPNFklf zYW`v3)^xq4g`jghX~Q`VFX5eG;a;fEreFtMujjSqQD?|xhJixVC#Ef4QDyG*@Lb0D zbO5%^q8R-!XlB7q32{2Bs%$RRIC-+Xmyb<{AD*MX5q*-pN$K6I!l1jK!ASm3Dhp`$ zUp)EL_p0aXjH)6ZRt-^CNG6Vs$gMcaVU6g~>y0>xueJ|1OW1EI=CDl-IW0qBNG;S{ z!#uS%`5@C}r8n$J9ue-u`qK!lq)EVbSYR1xQ@-qtJlm;s#3$z_5#XO|PP61Xn_1Th z#RKyP>bztB)ddxcZ{FEl#fD=wvm9_~d0>yt{!{Li;8fcko&*<|NM>C?0Qc>?fa2hI z-Q89dPX1=~2N7i%RsyHI@!RNDRBx~E!!B(XgPws*5S=G0EbZe^ZUBe;7Y|Gjf}v>* zq!6To9V9%Y%-qW!emS?&9yzGxNvP5R5r5$~VKo)-PxsnPn-BoR`LMWu1W4j}0>w2U zrjx;z*A)d(i7YeyA$`6aM)I7cggd?SH+B9cYj?MO*3YS?I-d3l;!|cyC;j0U-*)S? z(dC%ta6A{Tp_cG^+=t4bElDPtHhv#UQmfg+R%w$Mjr1@pkZ3!Lk ztqJb5#R4RUD^7L`wQ^&x{5}{GbB`1#TO7 z0#OxXZ_Q!_r!%wGiKw;7gSYlB&H0vz-KVIND2|9ii>Km?!t$`0Ct-d#{-YS%rKzmgEK#uy zVnq#`0M6JRsj(**rOt?ZEZmVnJtJr;on{;n8H*4_A|6{irt zgIB-`GTE@R9)7oUOn6ou5`J99Sh|@6``@bH10>G+Qsp=yPb%Km-a^)2elHZNt#4pp zXw>u6EgQD$7UZyVuuP(t@a`7bD0*GL;a3`4mDV0H(r=!6pM{o~K{1zx%s)YtsiE*A zEN{skdEmmPXDc17_vGVDZSWVFVU@?m1u=;9Pb4;zAM zRMl4LK!2d6+Az?#v8?N>B}CU1{R%MKJgoA6m;!-*Q}k4e5w)*{wD8~2s){O7y8xEA zWbN@OPD~9$q2=}c19Xoi-k+}z9s#BB3&5-9L-qXn7riqZ;c!8H>O0-(28wr9^1zwA zRon7ly^Pv;P}W(StcG(zkKv8?AhZiP`I(kA3k*U;&O2Xs*| z{Mz{u+D8P&gG7m4y^!KkxwU~Z-lZ-z)3~K*(@V3T6Vq1GH6yM?SIC2`MHFMro3IVd zdch|X{p6I$3waFPuJgLiph9aS?34j8FnC)XUkjiNjA`~;(WA;{N&MWkP{8poe!L@KC zfDnx4kc|6==kk7+NUmE?0@}fSePxU>jAQz9qTGQO<1_H)8kE#97a7)7=?L$J+o~$j zG2HJKWT*A?jButdA=X;4Z-{GZc~)6z3C|0urlKk{Eq za| zI~Rh)HepE&kEb?7Nq*pP0gNTrCJVePHANVAB_1By6=7JOaLecx6hN35J1rS#wsL|$ z)M-dYY`fwL+oY3?r0spP-1bs4Z^qLW-&LJML$`%HH>?BP?OwmCDSZnLsnTGbxxVYS zP8U_!^$&DUs>e-wRIYaoGXFH*T{wccVTWG*WV}ws>p0I!eR%eJy4dbsoj`+&oi4g} z`yheGKFK?S+4Z;h84&lkN7rK@^$%+2VlCe}jIuq88$utKSm`~nQ<0iEU6Rq@YsKL2 zhQL=9^J$>M`8jkf2|^Ax$(UWj&QH_?oUhO`=J!fA*%&o9UyQYpV@uQ(VeF8WDpnY9 z=sPSD&deCUa#(Dzy!G(*23nXsG0>q$pTL%BNkN^V-ZDevMgV1(g&<$t$8Cdad?-Y5NdvL67oZ>*M(FhOFW?Ph^x}4Xv6Kbc?IFR2q zD;($HsmHkwXM{mBv)l*3YXELfkAqV14g5lA>ZE;8%FShW?@U?i`byAnzaAIv7mk~X z+On!+{dglUs}5V{G~!{e^yeiuWHpf6 z6&Wk{HlWwS{4=m0i4j#aQGqaM^}$ zyO3vriS7b-y+W*H8pP$Tre)NckwNeu-#2AgEi(e=~BtqXo6vo}o zYtXjUN+;iRic9Eo!2(uvs#!+NFRv-u4RqLmrnKDO?!t%Q`U>aL9C~&k%{hDy%D!H$ z7CEOsvy|CZQjeRWG4STc4bSY}f6&nTd!zMJ1OURz`Fy1-o5i0=q;h&YH(Wwhe*+(+ zd)oSp@QVe;am(zg9(p(YEJVA4~e>lU*=a zq$w7%S#{d&*KCj^KVcQ$hK(H{zW_m)G2F){OTo7j6NFbr&Bkbw=bM;sixlwt9}}r= zk-2In?`GGeM&M^kD;PP0%$P_iblc7p0Rwz)Poi;kCY-CaJ=?60ml92R6J^^wrff%Z zCU+7~&LlFgKkh#l{ZRN9MC<4!(%v2>iH2Ph96n3kA|1rtC}r~U)+y5nlHIxMyW@ji z0&e8Y_#qv;_YipZw3%uXNBP*bXhrxuL4WzFaGvwb0v|=u_@^eXsr@Q8Ok*6VxdHHs z8lS&NJiDB^6#-GzXm?xNwVAn}eWUm5qp~QhV2&WoVo-~X zl$mf$;eJ{kWR`yK5`jIbb-|^ZZX~pcdKb`g1)tUiS=9@6&Tc}Mvqy4nmLj)^CZH+w z9KW-iyJBUmyY!Sy=pKJ%(Ti8mVt{0M;!`2VLbUluxn%A!)99DP%2FN~7sEY>*$b&> zCq&;(M`dPfex0jt#YIaj{^nUUUQ$;~>gGonXyOOgo-51oBdP}X+iDLCXw4Od8(IqA;kr&!}8 z;>h7qSZph>XWz3pzEj}FT!1P(nD70LWzU@CM1GNF^1Y43@?Jzvw;$(HYbzD|RNzbQ z2jP%A{co&x%Gnt4S{bnprM(asvg*sENQ8+ylwNTs_S=s}5)98Ns%LV);JI$xL>ivQ zP^p&)e%ai_Ac`Y;Pwxcir`jXUi&qAv2d=+Seg0&5Nm&moh&i*TY5+qyC32(9qPGJrQ-b8YUI`}CWBwTnmGGO7UWSfGi`VNrB~5|3~slx zBs7T;@E%3=vuPl&3H|(!tCtLq*S~H6Nda(ZNw-#Dz&%O+&K!W?xPF)cMkLXjz%Qg{ zy9F<#gi?Lc+dZ*I&{We+h)q({qO;vB6Y&1SOGbOQ%uj3V;UwhFJ+`Gr5TPBg3!l1u zH@@0dLe8Q#P6!3;ouPXBw5Stdk`v^Eelhfr1FgY$=PjyX^Zak!u4E6Vo+Qc%u}U3r zCeL&6idu`3kkpHsr(RH|B^?lpm3 zoW1KwZ>6;Zxy*iUZTAN_$;b)wKQ_=MRaU|L*V}<$&-q_(gTFIf+D$z0fB#~98v@{< z_pr60cQUha{=c$CME>_4`9HBm7;OKWSLM6d1pK^yogWbx5KuoH5D;;aQ363yjSCcD z+isH`wTGbIov_(jtT&a6$jxYdg^^G$aATW~W}Zn&dWmwQg$M4;+BR;H;x`c$mt3}? zThh)Ly6en5*RK0tmp5|_re?Cjk}iC$oe*mxWsI)kr|mJX-l?lK;hG`W2&}sCv5lk^ zFP6VgX_Mr(TAm>U8#~(0kuBo!=u==f=~mJWv$p`~~0Z-!JI{m=JR#jNS}-f` zqOWf5RKnkwikM)T@f&Bs$Q&>r&4AHunCbiq+y~SAx{ITVU zNPqb=4pz=d+V&a<7N|K*K!pll{z^KeDjJp`kXYe#e$E;QuB13bP!s}PT5PYGY&~StN26Ygccn4 zD+DCq@`~jUE|$a{iC@7HS0dm>LLkm~jCZK@vhNs{O2ff=WD_zOhncO&;kQA>JU7WC zdhy;|J?1*HTcD@h6V^!@z?xdV4L`k*3#%^MK3F&J(#NCLv%_nF+hrwB)n?9pN;bnt z3V$g*^rVDMUHXhQ&xqH!wXx%zd0P%1bJ;ddyS(=5>u&-C=NZ>Hmj>PIpP4e2^OEZ7V#el=mVS_(IhF^3m>XPkeT*MJbI4^d zqEo^2>?;xD9I(1ZME&VdWCfsO>_oUV3>Yj=8we0j-iC6gKh_XVm5de49}2F-##wW{ za?_>`L>5}9@3Fgh0Bjn)es3>rbq0UELv)u}I(Mm!*mYa#!CAf5H=$sb)6Arf6b`3m zx)|FAwV0bJJXz}>gpfiPX>(;k%lYT-2AYvX>-qr6+tdhQ5*N;5u>Cqci5TE@xJ_X^ z@L!^$gBAM2d~KVjZI~muH|>mu+eU?rKwIDxe+;f@aVws*2yftIyN0Ky(X|of_XmL& z0F`mrud0k2!`jIXwXK()@L!Jq@gn^NG=q%kMBMH+1=x^`w>cvZOcJl zA}Um+@}&AQ?hXTk0uSK|G?`c#niJ;yK!t}7XO9s@_O5A`Tm4WORc1zy;|ztUSToGE;?R+TS=;Y9=v=OJ>Ahcch=-6Y#6HvUSKpPv2Yh zI*q0LN;{&?cuC{bB&1^AQ8Mv&M~*zE%m*US=U^(GrE?E!c7+|yBYP;G4D)p@<fZ^j5x6b13JL^t0Q5ri?sO}vK;dBC;aS+u)5};fhR^XG63W< z5>lxy4Ghh^W%X@R)O(G8LN z&tx0B*tNvzgK*b9eV`v(y{P9q%~+q=@<$7nQqVU|R&q?g^r7#S3bX}t$v@fooVuM2 z$t^&4Z4`AtY+tV~0UKedh!%DJDl(}%r-i)fLjrwmd_!0oUKNPh+QNi3*}$AWK%cl- z2xTo_e$aUOzp=_N$@57R6wI>|;v`{3WlHCDs##OUAV430;?w$KLl0cyo9XQvhHmgl z(qyeh`PYQukb0uf{8~-<=thI>z!-?i&v03B01LdNq(P29|Lg=IggiU4 zc_F-rD*}ZWqclfcYm((Vgx|dK9oJPyTe|CDKIJphou+!cI@C2w@fBgECPSq=$h#-! z0ol{s!+p0)8eg3MLsvxcpcY=Ul$8_|mj^q$A~7!%@V6`@d|P&^p%hWPUm5iN^6^M4 zQpE}>c-Fz!5YXc$VrbS66PHk?K{bb6i%|3lKwSLEIq}C=K>BbgZqaA|^t0>qC5kaa zE~+^VH44xr48z_&t36t|FjHiYQoV zf&hHNZOOW-aix)N(m;oHAHJ5EVZMw4^>0H zJ#=QPIZBBYn>MmSAWwosK%cZiAcrB;FTJ>Fo)&JRrh` zJ~+Y>G#sPQWtUx>li9lzI?Le=or8mC4nMo!9R5?FNdo?Iq2>jVoZP)FOQ8q&KQ-cX zag&+sz=42jk&_TZh|?I;fx!V?n%^5<9Lbkq4nVkOA+EclQIDtDR_Jen)A9g}<7D<@ zzh*MUxP&r(*GhoGV4+Dy8QG+>6uE$*RdgEvdMcZXrPIQm;9+XbL8;8l(Jqmr)$*2@ zKB?J*lVf+6p?ZwfYCJO7%&dQQnyzlT4MX6|MLp(%w%vHsi4NWn^$C!klSM=It>97} zrO9o2m(#if>7|oNV2P+ZFiM7chFmg1QCKRz>3_&>3K+XXA&qtmjJexNtFD!?++S)Pi(U{#n4bXaJkOKX{gPjEIMf zBS0_a=QuJF9)9OxN9?Hcgb}rtE+9=}hd=VC?C$&oYtw@bFygh;4aBq7jmlMr9hcj2 z=yJVP#(W5|?zx=KOxsO;7}uhde5=yF`wPRso}2NwDX>mm90lN4k+0o(B7qk-)3d^K z#oiGG29L`D<-#c}YiE4mp+`t{x&b2V>Pj-im7BK&kl(3f8`Jid_$ca(s3BE6DV zU-RZh(X&0$0H`oThHVztkp^On-KPz*;= zU5rlTwJsT@Vj$B>1Z)i}t4Qj4Ep>C^EMvgK+U5S@0Wpm@!Kc9+dYqux)bXi}*Z6US zg0V3}2*9EtArq?*iUrO7JNR=A6$I;0Z-j!Ih|m5oCE$;UKqBX!?8+{@`NEdKL45fa zN~^8&2Y^^eRZ2R;UyzSbWWWLF1c>E|+I?q0Nbmbd<;!>fUM%(&+>I*?hnsd{pn@qG ze?_d+z)lNEWK1Q6u+nCm`;uzyl5f3Mi{f3P)kwJMWs{lz>=SM=M~e>%E$IML#p&oZ zUPqQv(Oxc>w15vbUcJ-5F|0jQKA#g;?`YitSQ`b{7VTO_5h<6XoYa3d+y3VCb?VYv z+-hmbY4!=@=R(T!eaaKelRP@_0WkX#_c-V0Q_Pj|??FFSAbNoKo`dEJ=Dqv>XK3=h z_z01AJ4Q(xqZhED>{=dH(?OxLw=wUG+F{}xpUyPy9a#L^n~-A`PN@!7nK*Ck2cScx z={jT`THuuJ;@@C2YKwbV*QWMMxB7a%&|yVL;O@*F~?@%Dzt)$c!lWU7>E)xmFr z@?L&hq@^W2UwjrGHA$6A_iW?Z-<0 zO9|+)slc-&0v{z4&+;?@|<2Ohi>z77JifeSg-P%rzg`jmsLe`x@w1q|ve zh0wd!)Qi>e!U^GM!VTOU-1ye^$Q z`{NSIo)e4k#Z)ze8gzliup^`DJf>2%TcV-chAw+ry`Ketp01I(zEk_>=WG2lGAEm9 zh2F#@pj-X}-*1{&K3$cj#bO8v#OM&LZy)%Pr5Yz|>ADk*)vbhCoRQT}Ave=38vQ2W zAUs*MfJ2j-I35}z7FbXpga;;aQ6#aWYZ#>0H&$14*h5Yj{z-3tw3xO<%6$)zG$2rjq3@U zsJl&_-{04-s#yYooC1w$Gs{z|UpTd-y1g-2&aHt1zhxFZc&7@Nx}anw$3MoNqw%G zD$QogTRH7{b{6_4+}*`;Ql6XjEbfzDFN1G!0@-e!vy(C%wyO)k(WSG$GXR~5jg!8; z4C>XTpHiC*5IuA{N-EHG2Qv)dpCD^&FN1`VADQYgJY!L5V{MaWYV}e&)z&MLpv0JE z$d5M~YO6ZU*JM9)4Eg$g{uW6&Y6Z16Bja+$>z1=I1a3by^kh0MS)9X0w_8TAn5jzq zt>EiUgQ=30IXzpL<<222qYJRL3sy;iF^YHj+#jr$>t(Np$N!Kxa7T|gd67}d%Ui(3 z-QUV(BfR=Tx{DUK5~4IJ1~8}-}m2_}7p;<=xL>JNIILy?agHR^y>NQ-=+GA-d| z8O6+EPH05DAjQ-XEjW3VzYz=RNXh3WwCK1deVw{HaT`umA07SxC<5C^HCS3bOvRna z_AMtU_UX4!lDYAM4}=9MI3VV^F4IW^qlT}~DKg6O(4f$6B(wO*d&rkr`P|TyvT%r*cz+hBlQ;#q3;sHI;pNzH|=g zQ6ut#GB~LqYcxY$?q>@Z4l{aO5&Z=|ktpq$cUzw3IImrMu6=9?KxUg)^>bm@R zD}O#OQ7L23S3$(-O2vkWd~#A5kg+zCf*{ST+O^hw7<)pS<{} zyRTsi@d+x1?f`;ofFLhz5P3+1!u?n62h{yR+1HIm9aGKD(0I9SBS1bFDhW_yzIuU- zb$}>TnNLF(18cPPg_&Yey?3>tWT2M`-bW+v$%0ieM3S7WMCbfe5uLA;7dsy-T7x%F zN?>DaG~dPAvdG|=t*GBf$c!>$3ddlf$??gHDmWs=0{|G)ip)Lf34|GPUoK$~dVv*< zmFv|VIOWdbLFDTb-qQG8WleWhNfe1=Bp!>;b!#q$NqWz;|ERYPqyXjTxy1agxslnhL^r>4 zaod+8Hv*jOTMBnYjk7)}3XiN~5{Z(pgIapC6>;Qm%e22=T^I=X{&|1laZsC8de#RL zc{f8$(kdH$YAGlDBTkYSd}wnNgOuy(A*_63oI12a8_S3v*BIppwhqbsE$=dX@41)Z z+R8G#>+F=*Lo2-6Ea9cv*x*Pb@3rn_oNr0#;ovPW{N-ay8K#s@h)hmITzYJ^{Yd$< z1sr9mMgoJCo`2!m3zRMU#9xc*`kp8ZK>xp=5u@8;Oo9Mmz5asmB1D$1Is;OK2Z8BUE#%y&#@j^r}a=; z=^4>(Mz$)uudW~GJnk2A-Gb>IixRV1O= zx@cLIS6GwU6@dNuIFl$}-+QTi^!Re#@%fR#P($1sl!80jqOj8#u|tsuYq`zL)KUki z)${hbcax11T+^K&l>1b2lGixCdTH#bW+2@*C9q>4brCMHO|gOhlL*3R^qm2MZcx9Q zIb6*lXK&Y;V5@;TX{Z;@_fW|52$y@!X>4?3+>AA^%EM2XRzHoCjO)ZOwEmQ_)VPY} z-;G_hJvAu+YoiC7e62+0@)66Dyr~EH!l_s9YOKqdq@X>^mi}ymIdqA%{CcePGOPH5 zt*%_WGS-r$skJ`IvC*?#=v`HufCACLE{6$e`QFE0dm1bsAKEHrexFT>Fre6LT2+x6 z^Pa0=9YtT-dX%w{+h@1S5xFu=o)aG0t0%>$k)dR)-+xLtGy&&T>Q-8;FB9Rwvt4ebm5$a@SDY3lu2<>Z z-(mp7#~z?}7XS_|{$qA>o*^y#wIvgFCjLis%+k0n>UvBGb<}A{qwJnFZahJLVry+P zaI>`j35INA9)Z;iYy~$3sfi_k#&rcayhh>bg|_#F*6E_a#1m}U881rKEQ=Bl3-ENv zU5b+$W(RV|qm>q(MQri?^Sa?R3+u$VF(AYra@I6d<0jwzW$}&)T%%XLe%G+#-3+^5 zUG=SdsA<^${5uu3I=`vr>bv*k17xE zJB^t8oY)JNN~f7f)=B$(4oz|EZoFHDW+vsT|M`Is*-0ODpm?9u;ZWboveqvA$Cyw~ z0kd!j`Ml?z8RVNh+Pu={G7Jnrv`L0&WQffwu-=mPRA%DZRDt6Tly60~a10{^iS#y~ zN7>zWu{ct>ga+i4g1xjdd2VzQOSux+(s71C_-qt=3E?;lljZ>3XB@E64E{+)#)Rp;zZTPG z2@0D(d>MhD*)pwl+XzaH|DMpwrmKp^wmFztM)KaS)F8T>Z}b5;4VCKZ7Y%(r4^w1kSU26cDsy*A2YxypM+_@AzXO~0Q9Sf9_lwbw?FkgE!u z9qfCE~G#p~Epu7Z(3*sa- z6}siDYV9~3bW>f8UMDRL%5p+s-O+lg%&1$GSs@GL5WOHkP)otzGq&^PgGLfgW+H%T z3-~)oiyjeAk*dPg2ZV;kDt^p9Z{4*+%I@BG3j$6Q$@C~Yu!k#2hC_D}&XyLHwe%Ojvu5MDU=g9m_dKnRvM87nnAbGToL<(k8aj|Ta<2`f`$x5+rV;hhJdGj z7%4ajM@RVp(r~Da>u8_nCo>n1^|dbR)7ZMt^|g-j^;cAHyBq7J8LXAR|F2c0#qc*x z)#A{EQ3<1{PwhSyH zYMZ+Z?kR!0s}F05hZJb6k~uf{M0J4`D%X|{X?g)1%MRS#svn@sI7gw?^(FcDb*&|1 zL1nj=pdFj#%=2V&caF)rXBD;m}7^ zf&<=*b31K$Ao18_g>)9KZjgtzmj3>ECm<<+qGPSQ><6>B5?$dH!*Do9pY2zCj+O!1 zIN?5s`j}2HUPanx2`1+80hAJeb42dHyxahh*7Qz!q%s8N%Qhe{+?J~onLb=Lxb{ER zNB#`qT)?(YwE}&-2*iw+5>tpV&b!b`n+BXGV9ou!YsS~$LTB8XpG-4|lUMj_4H$9X z(-{%l(ige7$w3~&-)M%i{n^tcK&O)W9`IbgB&H0A{`2OJF@A7BQpcF`_XyyB4y_x< z-c&En@jYKb<}LR`c*zUmOf3SRROJ1-Zm^*>y1iMd#b+Q_3ycqF$`L&rNjr*_V>x0g?)V=-(Pa_T7}i@wL{5DZDw z(-VIR-mcxu#Y*QT)k|sXr1@Ffx|j^|zCISU_zJAuijHc{>oo2BnmhHdM5z@3>6}|W3{N%| z;8^fCTY}5$Iq^%=}mrGrQEWYvsCS0S7^jWQuFz68Ra<^m2a30+D zT#jTe*H<6^GeSx8TCrunEmOPF&2-$vY`_r1{-~UB4mjFKj6wi0~zeN0Ve0!rW~ga}tw1 zX&WorB-Asrji5Y%(JS)n&H3ku9g-yh0vOV^h!|K3{2sSoelp9ldsp+$^t^ir?rC-b zL+8@O1Hdtt>;_&7i+8&CE|u+@WeND-8jJX88=Kql1p zr-tq%ZSj4Qgg!&yku^wjFETt(<}g+-T~k^Dl+b4{qqWM_xGJLSBL%;^svK)^Pu}Z} zpX(*XIU7}L@v|P6GE9L|kyf=7Ir}KO{30#yMUX&}2ned>g^oUiqzF*LG*h2I*FWQl zrI!)hAX+WQ52|9M=JCI{+V9ZEMTQh}h0za80buHbHxA0YctoK5NeGfVBJDy&D}VR{ zh%ptA@If0xr~~+OL{qs2VuWi1T;4)>cj3qIjRHS-INqrd@^tOvGq^U)cVhA|Ak=}Z z@5dnAiZ_Vp#>ZoX{hK}F-PJ=Nmf(H<_mU$j{xe@CrdWHZ4&z<&7CRu5saoaX!FJ1=~%f^2Q_o> zJhK;?{yL2XRW&zh?s7SwP52ijNT8BGXjb6)3ggh`Qr?;AYAJ;-Vad`eYd3%rRamNx z=8pzr5Sqt~H;CSMfeg=`a6@g^QyN85JKdVd&e?-P&+V5H@G#AEKStKfi9lF>i+Onx zfDYpE$>!&@N>w=VftUANRixduJ7^GC^L# zG%xwm;N{U;_;qSUL$7gS4*UVpMG0(9zo!b5IGgFs1MH`L;uh{GJ21V^_|Ja1#6#68 ztl2MI7srJeWY0v={`dw$M^(ktm(S_;tILq|=P<=66uzs!`7wTa$>Jh+Ado5Lw|9<& z?ks*`)6p+e>E($#-K8$CT6aSp4uYK%u;Rp7`9vz>!OnMw-* zXuh4I2I7C%ZvO;Gz_!!Dw{q}Cc30?7pJ$|ZP1X<+XY02t`#9<|FscriyV+z4SiqHw zxj~h0wr?W-IQu%%Pr~(QEBn`kIR3ncoUcjuti(dL{X)a#xq1dK2x7I7`@#+aY=V;t zENn?h-Q)DeNVpifA0wUfaRRz4ph6EcU1l^9Kt;%)ysW9pfECb5 zj|j4l;}`XrJ5a;4B`2@$K59oLtSeu?3Ex&tRIH488?{7fcEW5$S|LCKt<`+Na#{Xz zHkAK-TV5q;8=EE#!P@k;J;Z{O4?IOH6Ubr{*0fTH3}?CkV{*MTNVFg6ITwW}wAw|U zCtsC)dyiu`py#-J3{BX3v3>)xCjGBY0Om*ibM3CXq({Bht&>XxWFOcD9K6Gty6*rP zFCk7u;|ZfT?{q2$q-oRfqkr>qHxTQ0+R;ak1D%g!^!HQ-Mk2E5O|_LMlWw>1xk8FmV+vf zfdoPz5EK}uu9PzKOBYg2V=e#_S!z2PR}zl8BuRuz=!!q%Lr-)dZP61J`X;=QvloRE zse6ZUzdGWwF?8K7gJ{oy&fPl(W<#eAlRNNAY;_BCB3T+uSs`7s+kVjU3+WqnFcP8@ zDRKksagDTov|-pQ%{_GYAaGNB_krEifZ+?xMWjeL#E}FyM0$_Cu=eXkpgk5mzKu=7 z_<&ppul+rvWdB$ujeI44H}C^v$M$3*c?r z>N+zUQ0X|f^J;nC=)M{|BH;y|3QUmR>@owSnm}8+XhbbiE4yVEa+l1cho76gTnUrj zyL+rR`(ISKm1BC#7VhLz{@zr8p><<%X&vXtK(dGZrPw5Lsq^Il{LncSVR*roSYcMA4p>nwbj5^@xy<7Z%m zV1HH`iZK%E%9-?X|p^LVau9dw_K=F#ux%LbWqj8UM=d+P;v?9D|5+9NeMoom~5 za|lVx7F7$&#O~p{Is6qaqG3+RtHPraJZyF3ECunzvmFus=eLFs4JP!u2rlzm$Doe& zA6WGtw0NoY|7F-?#3HnR4q*-HC=n)swVZ-tM#L^Bh^7?cc}Idg=v3rG?YL-9)6lrl(6XYNY3&wiR> zqnUnL75M9*<}%6JQ)1l%h=(vdGD~}&YEKqxTcj%Dn*8||i$NRa%|g#iyXzz;yZOw6CFj(%!?1CA+N6ctKop0LHE9M;tSL=4W-yafXfqed{xigotnY055AnDw z@WzA%zn)y)+vlNuX~F*>?k%J0Shj`HMR0crkRZXG;7*W0aQ6TqxVuXy1PLBIc!1#U z?iL_G@ZbSLa9E2V3xCbtXYX_Nec!!zjPLz;&7jxpS+l0qEbFfBB8{YfDf^3Wn@V}| z`xhG&QWR0vq8AETdm{j4iki<%9+Y5e6y;L9b13mZWGHNPe$v&6X)T3r_} zC54y(6W^y=wf zSx58Sp!+oz>$xeh4))$b7YdRYg_a1Zj_FL*Ey-+uU%dlbs@r`Ug}v$1Ctu`VRsVT) z*cIC6i3e2OHCJJjhSD4AjbrxW)kg=@a%k$M`FJKs)u#KRnGc2zoJ@^di98od+aDHd z5D6P}?lnWqQhYHh#xm80K;Od5#h6T;wBnXF)}Z=e{MbIE8gs#Z$hobiKluY3UrpSt$v##_ijJa|g0YrOqrb<25$Xw8@2 zdjX8xKBKxEV{Awn#c$jsbX(=_wco_N`~poHh$XBe7^yt(ss9JenH=f#LP){H`F5|Ot0t)3+5Nb zSW$lQ@c(@LnDzKuKKTWR}GVZkoruRJOBXf|xz@(CZwydw8uT^wul z`wtSF-iX_#IXxmC$5>F`4qmbLH^UB7v0jT)ElXcxs`{XcvIe1%30-5bR?PRonXHcn zX22ESmpgkYot${3mtPl@I@Xw63?EpL`ck*!p_`j@;{KkO{N8wp7RxS5_tt~wOw4Y- z`62G*+GtnE{yQ-=FWT-Cz1x(gKrslZNz_R#9YNbqo(qn3{X$=XZ_IEWLS)sGJp}W) zqld{vp$|E}%D+Lti^3~KLt#F>u8SklqEj;qp!ZCSz^!XC*?mXRHu?sOVp|cpt+UAU zh)umk;7GdS?$7mSDSOcpvfw(0UbH_(eq(8*eu}^;B8FgiC0`WqGV4h#}i^dm+xNs`0womMAJ2ZLqQVSLFT5Rdy$=Jt9Rv_Tj zG{FO#4XmJE+ixTHX5Tn1HH~N7Ml+tfHRgx0#bUdG2rdp6g-}w&0fqz05VMGPHr* zk&z?S)}zKFwFf6ZP^@9@2ddjc$G8CDJ8tdB)@N~fnllzt77Rj z)3PYH&`pj7S9oGpa95bbd_x_yF#q{bzq%!N`}JZQ(5L8+W&CRF**0Tw^|CWyKEaW| zM8-bS+^HuMPQ^J`ECDV!2ozL_{EXJiWklzdtBoQQwx3lxlfBx}d3kX#h+k(Li!WgF zQXg*4&~E`h2nWdGrg6F( zwI!lVVX+7c(=r(JvzNi|8MYauUA{%AYBl6G^a?&YAq)s3%V4JMoqe^BgeAX_&`*zv z_LYcFpO9hP#jp8Y9;y{|4|2@8uYoCndDGM+@bugA_~1V2ACKouS(b!wz^Y}PfRn`n zfJ-{LumY&60IoKxIT$?*Sw2EAJPg5zg6#mcsAlP5J-w>Nq%Zj!~`#M^_W`1u7mZ^KjmuLQ(>V1`J z;n8aJqSkajnQ&Ek9W^R3bJ}+b=EZ3b0D0vDN;u&x0miDr00rGz(rOd3oY#leVk3a- zVKa6~(emK$#=hE`GA@0tHV3pMBLWp$x}3o3Vgu8fdZ(dT*C*7`F}JOBKGp~a9?{$7 z4mJWEi0`aa5rmBALY}K5hAR0npFiXVF}S2Uf8t(G@w6SFkLRxjU514jiW z%uU_zuGp>z%%yfZevZ0FX?*HhP_#~hV`d!vaVDJL0G&+ zwQ_dx+JiSJ1TpEt>r3z%D0E1J4OeKpI4~rWc|x5+o|8S(QF4C)yBPba4V{rc7}zc1 zq~l!?z{8)($G%1oRDCyt#cXx+3Rp+}R_>E{6-A8NKg4%!N2_2*Gk2^;I7&<{WrL2v zqTV4FPSvlECkVb*r1H^RkrA`zE#c1|| zjw{0oW>495@a`d}(o;rXHGt(Z1Yv+wx}%uODk$j~sdnI?A!M;eq{mQaCu9x#vbMufm$R3_tssTa}J2 zbc6lJ%4c|53AR5tWBB&R%I5@=975oAJ3YBP3as>g4Gu#sL^jvVP1r@+*Dg!K%pH+^s1}nkxP0{SIo72EQJ{xY^u16C(HZ=c_fxb%f9oH zOO7P|(jFT7DmVM_2;4V&t&Wo4$5)=JG@q+WonPA5k|Ve%?44u+o1SEPLJ43=f19q` z_UVDdMKAT{31L&D|BnSvK_+XRv}NBFs{p=iEA(`&p_*6u*7AZQ#c!@hpOjLJ>YSUF z3fVo&(>aQuqqMU&T)y*cSmp_^s88X@&V_7Hgnf2|KYx#hf@Z_Ib6yC zgJcQYuBSYePJ0v;$;Rc5O>y}^d`TV`>N3@1r5iSc-riSE7w;@{drvb+w+h|SA|LMY z5XSf=X6wm>*|SG|4oDSkS0l!PQcbqTGO~ ze{zK7;`>7jWU>6qa^ImDg#`Ib`ha#WgWs6)GiwVHNn*Sm>ijc7j%<=rz#OKuMa&l( zx-&0_byUyog{qFaC<@3Lsx7W^u$aLwU#8hSmViW(_CbM1awF#IG-^kNV{Q12>rcbd zceqS8RT$NVe|0`s@)zg1`N)*rl|}K!f5iYTVUmVu5S3(8%aW3{9JldHn^csWY+P!l zS^tILczkSV5mvHgkGCpCH_()W6(SnKRQQS%Eh8pAX$N~#E26eH!}8_!7YGUWGl%ku znl~qXdxvLrtsW1Se)5_|vZ&I;u$I5$ov09-ywH#MHUpbjJwkxJKr$H?bq?xcG;Q0v z_!1*JC~5v;T+a08U{*ZQW#iBZXO^x_{Xu=Szz+0HYtq&-sZ?)t;v3*Qr8)T!Y#~)^H22`+dk}UHD&m+LzNuRc6gJIA2vuHbd)jU; zraeEi|5-O(ki`$0)2J2aCnLLB^O;4H)`~Ux{F5$?AJWb8lZ~7Cc76-rMLQjKAz>4f zwN`_rZ1MC?;7bh2ITbO#32B4K&dEVZfzs~-ZLp=SiTzZO$HQhVF7Oon(`>!=iw56$ zhh^4Q!TYrk0kHt^$H(N=K@WPM(b9a&FwcFkYSs#)kz^uy1?!w68DW`-;QtDWor9!G6STv?q_(gK-Lm zHqg(9+Zg(gehPnVjA2~bLw;FrN6&U+fIZeyAw4<*cJ0OA>f0^@ZK}bd6xbC-{4A)_l=(uaRDqD6`!R!}``D6jN z9$0_W_ci;=!g=glW-Vg^qpUhApFeSkTNG7Hb*X1NJm1f? zoNQb`_QLrTIKhjWw*9nV9_fJBbK{k8YZU{bK*P1oi!yKxQBB5&sEz6kL30A9poX}# zln)h0>WAR)Kk)K$^XLe#boWQ@5d~M{OMbxzv%BQ4hIq_Zt%EgTf|UBSgW3AG!7{V` zY*;{<{*Q}WZz7W@F>|U}!CS61C8PJ(@t&kS-WU7a`CkZ(`DR{b&}l!lcCtY6I1Puj z3{Uq*nRzveuto?id9^Vy2)fs^YYY|0K4xdv`Y^|`yY%t*d0mIGi_MEATAJIzUG3;} z+SKYd;*y^aFBx{PsUZJc_#BIc`uC?V*i$GH|NnobPU8Ri<^De{t7TqCm98DCG3O)% zS9U|d#ZnL&NTDNnXblPk;+YUu_81tCq!1wLAib2pa`U>j8<0r$ekayY>)@GyC&Dab z>oA{JW@#>a?5Kp6_*0K5u^>N1%1JD-;VqJ{{9T%Z<^s_%=ZQQIrD8+IGMi9v{Boq+ z@Ag7faGSye*_6tq*dn(mLKVFw*O3&3igz3M6bar7u1>gDA)$NDZ`YsZvJC+2UT>TI zEY3G#Ir1LkucL{)qWb-K+EtpyQl$S0l_bx4%6F_msAEq&8oIYqLBC6oN|%#NWRga) zP843a5u+@AQ~T4@5BQ#;lliT!?kjoFhnO4`vbU7rqJyRBQ| zHxbG&D_2TSZJQ;KpXK3432f$GorsL8%#Bx(Gu_z0mdm|LEf6a&Djl62uZ~x{Sk-n{ z7rV}{5M(AmIO1FYnx$Kbl=W8k6OqYc*PP-(g{}Y;gGhsb%Hfk@U^zL!CYLVF5Ot`( zro6g0$C~<^7{gLpe9)YfkR(!Iya{dDOrGi$fl3^8H7yB`s3h&j3549ATr^|isi9Jy zp`KimD}~Cm9TRj{9&jcao(WfpOd35}*2+qg>+8#Yo~_f!!ot|BZ1Ky36WGR-TxUTC zR<9V5sz^OA&Jyr zLkA)e#l^v>6?H-1FK)DI$=?jmtDk8;GF$uNwU1^tvVBPKy6^N&6v4o}jG$Nelw==y zJdF^q<s&s%d_d`+gT; zRqjb$+#|O5xR^Z`1Ryo*^>D_@LpR6T=D05J`p+ha*wuwM;5Rl2Q_)<_e>cZIoQgB@ zBnoCqo3BD72opXd3`Ei-5)w$Q>c;)pEcKJ;CqlpBR6y7(+GVTKZif%5nUl6bUj%;l z#gCMJB>U}E^@%VzY5L6oMegG`xDv9eANF3fy;zX=Te+dMt-1 z`sy1~`Ajj&KcDK!rCH|G1iu?lw5k8LJswe{EP2oG)dN}Q2q~gii>yg4 z&f3!W3l5%k_KyQtOx?P%_9$iC%ZV==aa3bCGHtiM)JaOE1C9*n#AMn--tPwl(=%&M zMR`~F=SH;^uzk6S^x1gHPY2TVUh>YpYOKKKolOcF7?U3<*ADo|P0MT7OepxZcibAq zEo?-LBUckH%ugWS@`E#%tu{E4W8IY8nO?YU`v$pXnqn5lU>J6~ zyEN9OMkTb^y;9N9t=PlvD!5JI8TiVee#xN_Mv$z-Osb?Z_e!E^@w|SizDP2n=(`1< zh~R?D`uxTErcJ({g~zmlHZ>YG zeb7Rl2dmGkI%!`*55hTWT8U>mTsjHZB`S@`tBbx&-^i=u>z=c2DtLD09$FbxoaXk^ zd=kgCO$w}C*q_E2wQV(lwq(lYkWCARIIne z>9P*F@TP^d=oZ(t`?gN}Ucjxr!3w6HiDP5JvwH4)F7Y2dHwFmbA2bk*J@~T)05ODGZK>+{75h(tKWf7Pl;5`cv z{{R9Bvw?sQh5w*Y0e|lgklP>EfBruB^x!WF1jr}EzhKC~J?Y&601;HPc5-)evvzW( z=HcLi2ujH-Bi*wEk3V4gKR~?c$5HdKkQ$_&H|QVt;ZlKj>!{oE($Xes8mhALO3xt( zpqi-PO`VZ=RGateth%K z<81a1?IBa__XKg0oB*b|g_{{@)BVuE$=uD%0ffCF5abLqXIFPnDiMem^m2E;hwnKP zxq@Or_|-jZ^(PFbL5N=e3BUh?MoU8)q{#qb95ZWEa}X{AVYb)*fw%etmid$DPbEMS zkdd5)qlK%D8TGx1YOYR}Hue^OB>Z{$KPAUWasv+b9-tY29QS5px!Gwdg1=0lIl<

x+B*eclCU`#-`7R1W!ptB1zD^`J0h4pZ0Xav%)yL{_tK)YgQ6 zRqg)56QYA)3bBAVL0*GrGl(PP20{(dfXG3lz+Y#GE0|&lv4Plw7z;4f0>oH=SAWs4 z|B=rPVh7@FAfA7uxq)Z3KPdmq`4=rU#0tbZLbmT|0Z^DUWj@3MWMvK62hrAlGo^+| zgI69P&g#Em^rtdAe+V^$I3bZDaUt=5{~}0ANMcBWNJ0>5BnhNvNa9E`AXW%Tll>=~n=GsF{K&(e>M{GuHLHr7# zM(jfDM(ji!K&(V;{X4_|6i9!UKjdEZdt>euzYnp$Lhf&OgXVx?iz9_YfkTTUgF}S_ z(BmlmO-7AFit`lb1r8&aMDcgo-N(mYHQ_%UZ3EE<#r@@>|Ff|B=(72n6Icug*kBac z{F@{2S0I2$dx$ouvkk-xWac$q2#ELy zjc5!A)d;{M*wi5We?0C$CjXZ#fPd$^TLwoX)OQOIEJScDgO7+0k$~LZG9hkuJ`9{> z{o(YU{3!G^6Hy_(H;to;m$9`L_PkD&lv+gJmIe| z99#>(R&@Y?vux~c=we@rK&M6g)z^lpQ;GpYBjLe=#>XF^b3CQ*dtPn-@_7mK&ah-f zo2kVu(a-{FNXYA|zjZ65F!1|~;SjZdwlVOqo>I{+xe`29>=SKCdZfcqVVEk79@=AC zt)SZ#;8@SY*^20C-MgL=OtLp$na3(52$LHXclm5?%#b5{H{ogDA_nE~po(05nmto} zT6yxn6>#ze{S<6F8B1>GjY-LskGGAHVEY}+CRp%Flg3Ft5%8&FihS5JD3Jsnn+Fvk zuxt~^*{SG2s5Mb}GDH%6n)KFt35CQvg4ktc z3ZKo`PPCx>5f4%% zW-eIIrt_$c_P|(-cu6Q_17$f$zLa(eAgg(%8Em^i&?aGb2SK%|Hok-ONUvp+GM;(N z#jsbLDsznYj_2i%2cJ4cd!D6f+4-$E@%iwHnJK|ZH!w@(L(_OG1#Ko)O>8uF)eXB7 zphr*3SxmGBHjfrHhj%q|j4L?zII>K`)>5bYB&^|rYs5rTBQTv z#+DLu*Bw!|0Zz!V#=`|xD*3iV&{&+pHm`oiT;k`!&36906PiA2G58yzhNo7+kVF)&V~J56f6j5iVqzF4FRkcg(mbmR^mD z{}rK;^4+3o`*4xUPH&S7#u^N;1|ofMKcbKs@M!D z8#OJ9>*os|53|Vi!}C6x)r?_(bA)o@G5u8i&^JEc06)pZNA~ke3rahtk)*2$ zM;usUVkc7Pnw7ua9VB-U2ISpAaB=&_Z*BK32X7SCI4>DpW3Qip*x~V?*{6$O!Ry^S z$aR&(-=hAOw0#HBXi~flUji`zTG1WkxatmK)YN(#A-Zt~!7K$4U3ZX>OqkhiB-KFd zwWtK4#6@Y|Keg+<@mxQ;WcnR@%~5#=89}@y@#>GgWCO@0pwMfjJIJq!*i+Wv&h5-$ z31~#@9fTG4pT-dRtH3McT7eDHf%s;MH}GB<8-d7=Y~l|%!UVYAHi*PA);Kam5>4`< zT3&aM?pIBVk-jiW7srM{-8;x;*mOhSV;D|7=R#fQg6f7ta)QmL-m=w#JlM#{2^VNH? zZYSGdUGmz3CDl?tR%@pxtq@H%NojDm-cCj1W)r>AvXDb>bI150JSd9=h8of)eWmFC zQuFi9%EM`0qlpggYv5VHlMyAD(N7J1?*z2~)d7A>2ZF&~fi$ZKKBia6D`%LBu-6AT zc~11LP9~VWstCb@Kj5rvfL?h_-swvf6ih;$4igpi(zm%q8bx^#r7`*MyS^nR?u_MA z*(Mf^@x(mz*Ck}@B~957T;(zA z18XdnDw~aYitL_LOXKXTWf}+Jni9Bd%A74w1Sd0v+7aaO1=lNS+lwh-1te1@bh`;K zZRDZs`thj(q$=Jv`t$~d8nL7^dMv=+thiv5S+9bc_rT5?(>k+R{(%dzS+ot)I3ly$ z{FSOra50vWkJG-z@8y(;4xZJ)06Hpian^&XnzW1FYj^mYSYFR_b;A)X<~sic|x6= zZHS+_TVX1a1cP)j*;H-z;g5Kz)5t#iT-#v` z#7hlv%quA3d9OFUDD((_O{NMNPq}@1-66UJ3=0g?UF4J6JZJir;Gh6&dzr^bvNnqR zRp{3>fVukK4Rttoxi&FAYL3@}QFt#&fj_1F%Z3M8(daSRJ9YT+#7@FU=Qb4G9uLbdTHzqMD2Ue5a8iihLB2o29W~}&Tk7EMWvM9v zI%^&ss3a~6X&H|tsaOlK_V*jB&km2L;2zux+XmRoHB}m)!PF#qxB3YR5o3rUrkF;`Hz<|^MJ!hR2`!EuTUZX;z2k0P01 z`3=3ow+0InnfWcDXgNFv`MrH}I=hELoCw2vHzp$+dB>g4efL+)9b9^i|NO3N}YIxT1WDi(Okj zP)BT>g=Eb;(ZY*iUi;&0P`Vm_Usxh5UGn+)f_V^+KuqpRIZ$Y1D7BZEIpt0t_ijd! z?gx|dRPys=x1s?RR_2T}jXXdRuW}?bsa&C6Xec1@fR*nxZ6@q`r=)tkRz;4vUqQc* zIz7ao?s0Qz=}I~Ihe!kE0a%+!yJ8>t+#rA@?c`GG{}SHuVd+AOu@Sxod$1i&Pf9zS z%q041ss}b?yOGJwjoG3kD-QU1p2w{5Gv4+y zRV%|}(&s#Sra?9<~yg{5H`!*;;Vxlg;n-#tBcnSk3zrX7VO|e z)(3A$B%-O{-hm7N@LN~zGO5SpL}il-{#@=3k`#7%(7$@3zC{H$GP#2!qh7NeMN)x_ zOL>8{HF)YBWYmaiR~H?a5J?2qh^cOMsP-C3feE<`V9f)#)uZ}<>*Nj;zofr|$Q*(S z1#1BpdO*dJ(H-PDr3={qhRobSE?xmWWAJJa6}usKOGy=;eLXYrPdiNiX-1e2v~Yc{ ze~~Iu0^K5V{k*^RtwcW%YTz-Nl@F61Z0Se`OiR7@o=u*zi>OW3tKu8mEJP>gnEJR_ zx!L&HiE{UgJO~LQzc>Wg*KkOC!57kuL#2}@%H>%5r}K5P2oh~IGMMEDKGS{5utGhO zl`kYoP}b+!Zjy@|CsNa%q0+Ta{O#^`Fq_f+1Z5F zt}hQB4&SVNG;>MZnasyBW*{ok*51|(@vSbDt?|+>mf}$_)E4m2z~E6ec2Mzo1x?5_ zNa$JH(rA}3Gx%IInYgEJJC>v`_*<)BvavQH(Lkp@ZX=Kv26T^O?h1$#&tCK8jF*46 z77&h3jur@Upbr89SQS(cijdn%3zwf`XYG~yY1YLEX347Jz*p63YBTGw)hZ_~QoZmu zwxF5NLZTcr33}HmP^b45 z*D{yq?<(vis(Bw9YG(b`n>uk&Q7otJ(M`S#IW4whuE$ce@SJU_4++cUeroO5Yq&Z{ z-=?Wc`?bm)YKQ)@r?&s~wpz5P*h`hinthf;Te69F5I=ISXaies-x(iM`XfzSck@D) z!c2?rEgu^Ja15@v``zHQP^Xsa?|yg*g|y@xW>EuEq&Q|yyFPFAEa9@rc`Gf+l-2L%EeBm4D?oenP*5f zn2`X)UIil4`wScdM`%O+!8)N5~+`$d>5%lDOGMikM|0G2mSe zx(a?;mP`ChF@|eb%l8P!dIK$EQgn^Fr?`B1NwZN-8VX6iA1_aQO2*uGw4>Tv$_0BI z#R2|yC~p7l(+N>IpC37XF0ZmE>Y)R5Pq?~^?;uWl9kfsxf}u4u&L8ASF)5|n8`2Sb z8WSD>=Asbi!B-s}Uju{HC0|&Ni=#=(YiIXb0Tp74=qwTJuZ1jBT0K903-w#_|j6v z=WuinpFL8ZIXJ9W_QYIs0!Z>q1Bz^+s`TwY&%dm4z?vHrKUP}BfWBnzo!8RLwB?S7 zq0}lwdu__%QT4k}8dLb?Nxp13i=K~kT852+gi@7{j!R0tCbEK_y*Ud@;u4KQiE9$z zJrlJRmQt5z%*VQ(ryF0!s#&ayPo}_=spTR55IPX@V;NHwIu}|8KWQgalsBucjxh}H zGqG+;J6xCN;MiL_kA6Ui1O%xSr3@@l953L zSC{}DtqQtS2J=*~sR~{&4Vo-ou$CD>6fft9i5xOgRafOLtPn~b6kn*o;kvn%QKHxh|0!LHiN`LWqX&$DySN;>+3icg`xNaS;)r$qT#34* zLyLQ}Sb0ERfgq7Cp)QY+)Po`4yNrW^Ldx6Tk}{MNTW2^QOPNVGPh6i_E-UjnlVY;E zM)b^de0*($3rqrTb_bc)yp10DQFu#Xf)37~3ff~&E5m_pApH)q7RU&8Ba{M{g}qcK z)I)a==yK?N&lG#@casY4W#)u(O$Q4L_ou%S_b07|ktCrcaiod@82STMw^Lcqm7~Gm&Teym$1AWyLFRGs$X{yxJmeZ4}4n;9lSx~2CfB6 zJ1+qJGu-`rDC3XSJ{fozC=>ICK(~?Bws1_iP3{tBcjbfa*<-^cjv7bD^4Y0ga{t== zx%mSVQkn&18SNKFzi|FneZ|{A%_VMh}9)J6U!je>WA zeAW{56}GWFLW5m;h|b#|F{*V{844qaGXJ|)YHiq%; za49k0^NuL;RW~s>q4G_3bBQZGpI@dv)}}GT3rY0Od;cTREbG@qM0?&NPkMSUFY^*Z zoyCBa_Tdyv=jorDqLIuQnU&11xF!>7YFgcoa^*RO40D+k*P^LaJ}`CzK@mYC>P-T! z*9dTX_Cxwwpk+FzA!dm)DD~~M_=p1$LQT;*rT?*Tt)5Pm+3$anpHir5 zn>I2a2^~1Ip#yv+W;?*aQO>OsSf}CH-~ehj8w>dM4y-f1o)qA4EIRDPO^_qID60?Y ze*0SJyZ%zVS|_-bA9YK>2t0@rOVy_2i#z8N(y3FY1ElGS1IvynHv1E06OF7JUbw>{ zcpKuM6%q9x^$>t;T9m6w-n?;7x8~%Ou)Kqy-NYMW<7#KEPo3FxPMb~Dt>St6DZ#{& z4vTNHf0U+7t-n*4DW}pj43@8JzFzd({^U;cKg$3`7&CN%@ z4q{Pm09dPYCif@{Z*thT5qsIVpyW6e8yG!fuV82jC5?0XRGWqQUhS`7{cmE`B=Y%9 z(Eg{XQ*;R{D)b!zfjo`mu}RhW<&8O#Rppk@f#bE45mij(VngsPK7K_TQ)MahX?BOa z8o!}iU?3c(N+8e2cExXVm?c3lb*l1d4#WAY(Z+{O^zyV3=+Mn>w$q;4ggfVV!!TDTLd9fZ~W zw=SppQZ#N?Mk^BJZva9+5&nd;6#2SzBm2r40QXcg)PCdUZK7*Ky2(7;j=v(oZvHUA zMm!~8Y#n$9RBO5Af6lJ8>ireRv8Zb9^yXV}qjQ$eB2mXfd8>C{6GptM72gu~l`!gE z|MY@xt{dB-UkPNnaPpMyZyx>{8c+~u)rhhXJV0epDF3{knV=zhp5Pp0%NcfjRqP6U z0n%8Lj&i;I>b)%2GxOOU0G|cjZq@It`ZH|2eiHN9YYVHkq|sL7#{O7S_SA2a7u8z1 zSB#CleJMoDkKK0w54t;u9X4N0eN9S0l5}6j2BC2bd6rjSNx5yuFiYOo9Vy)Tnc`86 zd_~QIX6sIm(doA0LbTOo=ARj~I-i;4fH2on;rLAjA2BBS!!g~&e1)eGb(BZxdRHa4 z=ShK+CK%$7Ybr`)aOR+UW_*)bj|*4e!`2zsNH>CO=PmaZ_L|)!Y#LEdu1_t~sy2R| zV=xhM{T!$#g#!+H{!7an2K*!R!7k{lG4KSQ1Xf%|xKGKoBs}|y;F<>(T?UYJ5c#uA z@^%KTBD-T->1A!K>(qOE=C5-PkTq;K)TycBVroz(82;IQ)1PS2yPZaKnG$}&^Izos zv!&Nxy4IgSav@^7K8;+CRn^}6hMbwPDawTtuJ$PcqMlXB8JxaV2`i^hMs2+&YRC=Qte5(*; zt$?ep1*Yci;=TOpA(T6F;K+y6)>hlz5Z|+6N0BaD-&7YD8A?zhY+({E_>NGvfC@j; zGO0)hyrGw``8YgDxb9~;lU8JU)EfQg9QqOZy!QEW9P+h&c3W>CN?blky;9aTpL`nn z&{`{BLwLab*Qb2+{ynn(-_J`+t%17sV1x~_Fhi0J2=#*e)4)zS+U%a!a7{CUL~P_ zdCQ8an9VzzMOhChQBvv$qT+sR`8Ygn+x+cvRKeSOQVW{2tKw}v#9^!Ea+2w^SN1y76T5l!EotZ^W*sl43d* z%j%?LqMWT4o6MA0Nyb)Gg`uWrki{dAl04=InxlV0g# z(o7OmHE13n#1zFUvJmtUHAZEx42vfJC{fnp-PRKV2!s`)(aUT0woLR()xXq!#Amy* ziLUV3ib=f@J_t=dJfsNV>=MpAT^{&US;%XQ|LtI-W!gc_;JL@+{{CrYtZ#HfSZ-hA zmNCgXW*i>1IQvY=J8$7>7BbI`7HJfcE9+={b>VT}9=(#6QVZeFCu?dJe10WiM`fj< zP#T4m2lSC)%Mb9nmdw~Hl(?sLnf@rz3Hli|y`}D|z@osSQ8y#xr{I!A&)|0+eB^$h z*mFxcU6=llca%qS!hKYsSEc?6cYeRz15?n6rs4fgB}3K-ff8XrV$DJ`@><-6x#xvw zPm-edqj95vg{zNg0YetfJDZt1`u~4FED9?~4UW%xZWKTj+kD;C? z+dvs@t7W?1%-mBhI34w5yII4QMW>zN$9YWjV`P_1$!ApA&bO1wWjld16TD6fc0Z;A z87AwDUfTs29CZcK)m|~=LeMf}6Gm)b>|iIHLHYcExfou1i_vTaBitxI>Ki%24Ft#X zgrXGC6)Hk&gN!tjUNPxgqQY(}azh(oK-2`9pzJ5mb0a=CxI!{61QRzA?({H<$jcYQW>xE*-Un}=_e%2ooZ96O)V*GVW&w*+q-SUBI$f!g(XtrGGT zIXZYemNFr{=AQBhK-HO3noIk985?fD@4?P7*JrSYnZOAYhL(e zC-`;hqdh0ROiO3z*}6~s!ovJJ1o{Ah+4Job@R6;tUp6MFtixfw26<=t>O5GDF=A|f)Uy=PT^BKZu7h9aB;C0{eTA%gNO6S{qqZO z{ENK`bT{3i`P0w%tNKB+*FrWc>cNM?yeo!AaK!F8{&N#d`#_>4f3D^r0DDKH-wnF{ z)7w|I&YQ%0*&LdZYu0%-w5f8m-*3 zVtVj5KE+5qaXvVpT;DJ`7_*}2O$$@*5l@S*49J??QUgNmcJdv;<~F?FG+z-trZ{ER zp=sSlBVT`TDPBQqkC99+O+8)RIsK=)W>8{A^HcYdnRg+NWS~>aT2ZFzl^&n-$+kB< z(`zHoHB{z!D{oo$Ow5{F^g8YK67p9KK9O|Rp8gat2-ymMGWBcHfx0I0+0^K1Rsz#UDsSzoZ| zs_-pJa0Qj%;0}V}T~05=I20Cr1TAKos#P;q*YNT*U}}5`f43p#)T3c@8CZIy>CJt) zvfxB8zRrJ>+1Vhf=;l!9t=iXU5F5O|U*$5{*dhr^HXOTYpbS z3)XMiItP2*?jBg!jfgMm!`{Mt(}D2LdUNRR?W4NM1@u6q#g{wc@CVooP= zx1(W*XLC~vXM254E+sFoAx4z_Y}b5d>$=rIV`m$HZD{AK36BvG2~bqYSW)CJ_PI$1 zdxoqny(#x52&-+@Q+N#Lr|G&_qQLoqbN!atYas{B+c5`*5E}=aw?JP0%~`CEnHgm~ z6ke!U2YVbA!C|689((qwc52cIYbm4#vcqw$zVW^6k66 z#DcrMn7-DmfDdZbIv6?nD;5nFe77Ud9rro=NPhX4U{G~Z!_dwrrxv`YFtuO)*-$Ia zVSPp22ZrH@8PE6gs2+W)P^{6hEC-yR(3*q!65 zOBa*V3cVC^A1AkU8FVXHD)aOm3^j`pNoS31_(Ar2DP=3%x>ggsMa!^q|>F%BX zVxcYVL%&$jmg;T$hAa4VA%C6l>c&u4utch+@el^yC6XmJIVw3gzZpbW-v-*W(&{JW z3jK|x8*t+}NxhlrhJwWP6ZE{f~&E**#E5*BXMcW&3XiQ zwk;hOhT9m#IB-JExZ!t@9goGmWv`kmMbiTKp##gz8ua{tUeF>jY)YK&nzA9hnm60U zgNk@jrI|W*r4Q#|F!9ytFF>dF*Wq;&r7&>1C${G8pCWFa+`8L2ESki;!77YR9k}U`b2uSZlM5K3+0DqFfLI|OS4pFLfAwVS51OcTc0wF*M_c?dmGsZpd_ucz`|9t;3fWbr7TF+YZ zH|PA#WkePZs4gz80007hZ1$9mi}n zlC&$azL6@OYxBf0vav(F^i$ufiIij+`=l1dq&n@A+hCmBm&|mIBQ@@%>~s1jY*%lj4jg8RI;>0QfdvOdQc*UU;yId zh82Q&eXKlwQ7epTrUgDK%nQUabLeD#!W^68n1S==P*5Hgo4 zTORY&p^L%UnSKTHV2oVoXpMY{65>Hmpk$_tl+Pj0Xd7IOmQjCj)!9|SD)&hHPN8`$ zug4ycJ5)FO?DqzlZ0!d1cr%PWxx%>J_*S_Ppz(~GXGu*&_1cl+w@Mpr1-{$1ST)Vq z??1a{=3Xp`IL2qZ6erZ@M+S!FXN}e8X4pO7c`(&x7#%~2;W8e_RJkY2k(@R|yBa7` zot2pP>xT+BwdwaoaQ|P$7Y5RtuV9k$kV$1U2%&4B5TS6fQ0}7aW!F-XmDkAlUXwgi zpsCW9n>Ri~`#t;UecE}niNH@cvP{;^>Rq93U8IPa0_H)0P`rSdjC@N|rFf)Ga}hsp zLU+GaDpJ5AcgdU{i;6_Syo2T>vmsmgk-ic=ZJx$POp8*|+CH_5uO_JF0t>rXXt@@6 zX{ML1X;w%HJkzBTkrom2QNI%IIr9)eIc7)R-7(0HvU*r9krecaBAJMkHsLs%f=nFx zVU>ObfeYrXb|@&3G0v7z9&k3?K!XBV+|yAZsI>2|oc-Z$=Hi{oL3gpUC5DwxQKW!BBJJ!O z11~N(-z_y5hYY9EBRYTb3mrEVRlp zj&{X1-rgFy@qI#43SnvqkAjphzezUd)>f=J9BtjQ!@hj40PoR=c`Rm)milVh_S3A; z+^d~oz$&B>joQZ`{vk0!E5OK%2{j^#>2lgX{Aklw?83Znkn{9(U#O?2{QKs*F~Y;T zTqjc*-==Nx^pTOx`k3I|S#(g<^&&~G`1G5+k}##u%fDX7FX`Ho;NH3zyYrbg8ZwV1 zj4OjiFAD|ah5P8O3)sMnwM}))gfA!O2!*}ZQ|wt<;{4lH zQwlG;wT%$U46y6g6l}rwy_By%zVAJMaYACsUv5j`ks5cx*QzW2TCVwBEy$b_PF+(c zL)z04GVIDvSN_-I0s(u``B!h>Of)gS>}}b=h~=@7wV+RD3y>?U<8Hnee*A99iCx6w ze3pc)8%;1qB=&rVhBZ*|le}cxno>!HC-sRK@~B&4HbT~FyLAgz`K1>wNXV#{yU7wN zJ|L62me7xavu;_U1kBogg6uzUO3AfR+Apakq9RxPissW-zMKuA9K%L|;YTG}?Zagm zZiEdZ7)Ign$yk4;e1yoAQSXl~5}lSIorFVX4A612w8T39^u8c1K65qxaZBda4oV}dwVRV|=IEA+%Ve~$A4xbz@e9+Zv z$zhVuz7+=Q&s-07c&3LKs~dF^|&`k(sQ!d9$%RoUCP5UmVj!c=Ty5=O=$hrVcDA2fJmT*ykg!2gZ z>I_3^+8Y_eHq|RtWnR#YUz98Ce2%?oF2D%r>b95xx_olSeQsCYa*b($utp~p!{$1l zHPpTp)$?2#^p5oK_~`ws^_!3Cvqn0Rj~1y>bY{7U^i9Le=Bk8#vvF~4;LJm$>ud$m zzP=qL=VOJoOtVMYrWuVFNGMt<^j7wrH4zl(J&&%)a#366#pv&M<$b1 zn=1Z~`ySw*udEN(;#&ye?OYZKQ!7%dnWmGwGZW@@uB4GgLLGkZkOhl8@IR7|?FH&q zH++|{Gx+Wzx#P+$ga?W>b;4H9m82sNd?t?yFzp-0XUNcm1@iujHra9p)ciHVVmRP-l^J>{S9&xySjSdeb~25hZj%lvDo{mzGO8f zcOQkgVc~uBgpol(6!SnbNXZVdcO2kHrkUM30&R@9lu;o#-W*S575ZY#fiiD9om1nG zW62(!TU7A+JsobEc?yzNoE&bdATQh}ge-U$ss&jX-ECyewu|-=PuOZ!H#VuObhqQb zb$(;@8PPhpl0>gl#k#%i2H{hb)tXTD$_pfH&2FE5c&7+S-h6D% z#kd_%^`4sheAguKJV_gH^h|v9;ndr?NT4gw6VujlJo*vBl@K*mgPvw&TGGslI{p}s z9SED=UEsi!6^2V>ca8>1Ciw+G@_&YhvS0M)6CPGO(sAIVr`F2&|Bm#(GBywT9-r^S zR`91s`HpcQ#GNjte+SL7fzt$fA-=7R;UDeq{}JBO)dg@vfQ*F0oNW2r>AkCp_7i(z zT)uhXL(ii8iQjP1F^b(NOKST@xyMahsZT>cj~{omi+pi0^DNMg*g~k4f~6D}bTNmd z-<}b#7zRyl>1coN?Fi{WDwG`g#ZJ(p_LfY~3};tiD}D+=`{WhPtMi?k#=^ZzGEB$n zhZRin<;{Q#3InA19t`GF%Le{eAj^F3DBYX_V_pJS>{_(5UHo{kP~}S25d#U}4gvpJ zhgcF&Z;M9zMA;y>@;~~J{L507@TsEmRMvYBw7tp2Of;k~5yFny#duVy^oO;(><+tr z<)|8@5l+}7nFn~iJ@Zb`*DuCUTPz06Fo5Sm+`J-f|DT96)Ir@b?K|X`+OjlmYa+rO z1;TlgriL+<=xt&6c%T5hE(n}~Y@y57cKyhd9j3oi<7cLzDF#u?gqW1DN zp?bc&o{v_mA09WJGD$l1n(UYD{ zCpPh^DV}M0cjgO=4b~N1wT7mxE-g=$puIX|$2Rcw2)7a=q?Z`=^*7kGkGpe`CIGoLVIV1aItB41UXRsj&uUuPpIpE?TaU^Ec2U%)Pge$U0BOpk8)J!K=79 z8NTLI+S25=2%}2p&mNw~%}b*^4B%6dK#FB^@>fSvMr5lC#hCXjEN| zVIJq2CCWx>B16fH?vHu)gJ`n9JDCi6`;oj7(bIFjRX~gS!QN5TK{WzYnSW=(78ThM zX4LuSGT#L*miUhjPMYKkZJW??BTppbg&}Rj@ktST3&EqAHS#w;8Q&NN56d@Na+;ZG zHyG%Z4Qw#W+3c6Y{sv9q;s^DQ30T(N30uG2?AVJ=@^ZVx{^(vlv*VbPD~Tn<;8;A$ z;BW>IcPHevl@P!ZjEkvGeeJrYR^3$vU+OLoD-xql72`bJ%Y9aQHyp)BYWl?=Jhzd1 z=}h>dlw2%&{>omt6DaIshY@=#!^;fG_H+27*a{|6Y`vm=q*><_Li&=F3~c zD-#fl8(WY2f)iNcOX>oD-qQGv3pI~dj|EVDxw>{IY^ZO@1KXHZY~c_xYk7QuL(JS8 z{BBMxc3`Tl(hGK)CCqR-`jg?V>wj*o5ws6jszwm^FGPkf>$ANYL4|&dG=8O6Hymm zF19NgPe7zG2rbb!q$yv+wFe!wH*R=j_#j z;CMe55u{t@XDg)DWh-7s>+&AktR-XF*M&v{R`k#)!)z%X@0*LrUj=r(4r*r~cG?Zk zOIP|EXI1Y~vp`c=p+Kk2lY_M}lt35huGZ+=<|_O8vZY^n;NES``t3R(kV%I2JxLnc zzX_qPb}s7$OMGsPe*S{;5vH*<$G&wvk@Om z@~rA>zLhnvh3t%sveQg;wa4e3Yw(?a!Dl|-jz||%Pgzb?s8+W=xHjA`Tp7`IRwJa( zYsYw|Hjoju`U!ss3hZJ~>RoEz30qOSIrFZ9(g|DfTQwFh1pp5I#xl~i1y9&?>afh~ zUBFJT$O6?S@SlyBO-_S67kKUSXYjDlj zuuhqZ{5?#?E)7)=Wbo9Okhc@A7|Ml6+oj(NOzv^HYGh|{#=+?HrTZvwP)6j9A+_)U zd1Iw?WuAg3g#=SKg}kV;hNzc}h0m5|N+kurfRd@P;;DFVREeo}c%H98(Q3>hlpz(k zLBlX4DQZb_2o2h)&rzP8{%4JdIIfF69Rk$lhWF(^=l>vXGS$iq$*L(slwbsMno&yA0R%tA1198sUoMyaQj7naseI7y*;T zQ$AD3^DhnJHHOtYv8|>(uFkKDaar*EPJ3n}q<3*CwUex*$wL#!Q~pdqDtbC%YZE7b ze#O}4hhQx)`B;@@RY+^yHG4>glGls>al-Z^mx%xe>TH)7D{623gsuC^lH$QZCO~!t zEgf&UgjqK-eg{S?!JnK1HC131(MaQ>GZ-EevCD9Lj}ZUH_6MJ+xp_1hJ$l|(2g-m@ zm+Xn94UN>jhHOeUEuqnuX1+;m@u}>BlcA&j!<CRL2fiItto)zn~QUZTtUS-IW!PA{A8FSs_LdZVQ zwQovOjfovJYl@z;C4E+eXkm2kT&J68wGL)N1_yxuoW4{4IeW9wV$3_Ci?CzRhn7*y z_~TzjxKY=P|85H2u%y5ha#!afqXg)&bDS?clJYH5agh5C--8OT>xfZVB?{%L#i$>d z`Dt4BX=h)CE%}GOGmQMOFi@LcXl0mi;o`og>-)&7%_nS^Oj7ZyTc)6S2wu!`UteS0 zJz=}t_X4^)6%ImX`#@HJcA(`k2-Q6@E}2yL3TOy>*DvN6hgW&x@g1g_RR@&WiSpOSdzP)KXXgZdd zunIK3+A{%p&;L1$EB|X4pTLMQUH|3t#enuWE@uV^tM1{KR$1r%_4~6MbMXDg#h)j4=~!!3EOEgPT*jD?0)(n4|;$mX&2^%j%*rkKIxsarNyMp0PHr-022m==wnfGa?4QW+B ziN!pqy2-Gs24FiuJ&vf7^2MWWa<^PzcczI%TC@2tGY0(EDO*}yNQa66k25lWQ!HWD z=)5e$`^d_h#l~=Po6rQ#3Z0fY~^)%9*2-pqSO+G=Mu)A|y? zH)r#LtWBwX)>uX%;hkT3BlxMZAK1$mI%tvIo znOmQio8T+;>q}@tmqj5$LSW3sr^8<6H7Xe+)ze?P3~Z3nwq^BIfs+Xq5?CxPI!zl* zXI*0rJuBuyD6wu6>kFYvs@ULncVoyHoo8d1aK+~*Z2ps{J8n&c6?3`V^?Vk#u2H|9 zPq;JV^LPETT-6|0y;v>7q;OG5)7cz$yWBEej9X8k)1dN(qZy<^ZscLck!Chr!+_hT zlqXUH5Vo`mf(p<=#fOwh1-BCirLC|pyNE-z@2UXGp#+IIq@qXSVRJI6qSaB?m4&UyHKnoE=b=snWqss6_4A=p`3 zC+2G-lXdG29Ld>|&8gxuUSbelV?+DaPbiV8wqO*Pvp)@AH|N*6(najG?lulBe>rpu z@^d6BWZT2E1)dsk)0r@Yw(D-Ok(r9;l;FI0k=I8zb(VS5eX~z*a$5y2z#1*$9S3mH z9I&$_Xk2)4>O$>mLRF<)nDv;8TQhjH#izMb>;tq--p#B9&95~Udo?$~xJhf(y~HA6 z0yg{sp6J)gb%gAF{oab?USW;VbU+$+5Uz^cBdf(>hZ7!lPGJ%-iSGq0pQm56Zf;N4 zsg(9<;#a9^uWY&wA#YGs$}La;Oe$W?uBYKoV1*zUu#A6H~2mKt6bPT85 zd=QoV&Go7(W#EsGwf;vN*wpICA_t@8>>asD@=x-uZ>g@Z*H6EFG2z!$2O=p;+BR|u z*KZd_nc1fPIP&qSmuZ;EQ0sas@>~9+P?_<*N4Fdhw|Rr#r0Q@B7I@fuazY|p)pZ`I z)Cfcxt4Fmp26acv6!luaHo70vfv5Irp}cP}OjW5V>G2mARt zT&8{tB&j{Kq%Wv?whUQc2W<3Od{5K<1)^}PjPsr{&Y1Dq?>R0incuO(^QJpGw)rXV zZ;{Uj=W16^`;|N`VGmZdTpDV}EhzPU9+=&g@D*>Cm+F^33UP@b=M7%2%OuNc5Vj58 zyGI;3#14i=swuDWnW9WQL&f^w%lGp-36?m1ZpRhU%5vE)A78)gfUXAX@o}ll!%>YA z%hYnSVvbc~*THGqG;@Dr2Y1J*u$8Q*+4hz}<`-wnq=OJvaphx!ksfCCKDyY;9v7lv zz6RFE+4^twe;Pm@&6Y%kVn)bGMt|-Cr{H|Bm+O_N&f@qe!)|mT$B}e+57m70GWAqX z$_K|a5|T-`TIYBNFrB9s%!=&@yX}VN;|~U()TU`3W01`)^-b+Cs9>+GLbk-)rJCuG ziVEY`Jye!^Lbii$oHx#7W|7Jo=UE?t_#mPN*gxu8lqd{Af1}(E{zw%x6Bbw`3#BK- zpPK;h43 zmoCr#A*9AeoAU5hR;}Q#BdK|dVXARJV_k4&9agHs3|W@lOl)f|aCR4I4Sc?Eg(*)q zCxLv@S>P`Prf`KX=)Ga_4KFTmt`%LUmSLkS?e&ff5zr#t7E~f+Gt{Wo*TLVvQox5u z4>7sJyz)NUtE%eyuTV4jLFETa&x7lIM=zTz3a)6 zY+y`V`A+5HMW``8FOFUsCYy$mxzfy&{|%+YE)t}iC{X30{vuz`Ta~;$_1bC zuQ1cnHKRb@Zknm&w^cXM$=QXN#HA{c?lJph`{pA->q4m!j3T<+Ebm3RfSh4n)&VbY zKUBSU@a@f+*zHe`)z7}Xg_{?ux}Z6RGOK)V;LAN|R#Gk$l2uJZ3+%Mm@K`Me<~VAp zJ_0eAGR=^cgztedB|vPww1+)h2ipO5g}Io5dLVBFg2QqV5LW&P!Bb5E)Pc)nfOCGLFU8v0PyD3g8E&dT{;A^>26?_W1x}0_nHYM6jjg496z4uLYKUjHIF|s)eiDzhdX!>wB+8I%$u}Zt{Dt zM8-Z2oG#2KJX4c@(6p354nTBfDK0Yln`mdI>ZlKggLsrV%@jF$!o0_Op9b}U{xd!9 zgc7KS$TR<>G+xN&4O^LXEe`h1cFn5BNmtSEDL(c4Zn5-8Tj=Fnay7i_3f8uYL$>Jb z1;;XeUPqbwYC!{0n?j9fY0FIPUp;2r?=1_wj=dk>NGMXU@2B8O82cb*vLb9Rr_iHm z3GYEb{`>5j>n(iCI8$qJuoVC}V^CdrDprFLUW(yPCSn|KnHrVz&fbog5X~kuWF`7V1 znKj7RH}&ye?6aT&i5uq)$)nR+6x<|oKarC^80fr4mwnROGKn}XC#*VL)=)U1 zUdrDrF56@vK*DAK@UkSZcV;h}m0j*E6&1y(hmLY*JuWe*eHbWcmRhY|x(dtNyUj6; zJSUwtiJQ5g&6K7=<4)K-7hbjmV9MZ$-BF{dyYjN+$_7&V$F_;Ug$K8NbCpQnj4q@h zduqnR(Ck~7KYslcx!ch2d~Pc}T;}i5TX5!@K%RQRO)jonzz&m+l7TiOjcn;}JS;;j zG3us7tZIcx-Yl+pRjS7?bK3lR?;|CdCLf01Q)TWqUrTZ)(gGK3Dt;VkKPWd)P`Rca z1LG2#>&lMajl6LM*_S~Id%eLLIAP-}BW~TJ^E=L?7R5MyBDfu)orR&Fl-Dy2sGo9M zM(QXg!`yv9USS%eDS_xXzAaueCFF0rMoD_gv$$k_`~)c?F5+v|`L%~4WuWZ9;#W=J zQ?hSvf%~bR&ykgWZ<$+{Mg5j+Ty<49?honZNtB@V37f6#sIY}`+5;DasA$_Vdk{-j zsz@O?=b}uNBzIbQ{4kn;-n_wSdcG<mywIH?`dV?Yk=-D4cdQBY$6njGw znK!0{oEsZebBU&qNs6~E_OWO4cu{X>bjmrE(Rx=WlZy8Gkhwc8$G_WjGyHCzM~VbQ zCU!sebj97D#5bm?TO&;kc;*m}=s`|hwCyE~CNpbQ=zPE}OC110cD7X%Hf!Mcqt}i4 z70f>>`1mWD-KJ0zCsJW;kKnHjDY`n2)EG|?D=L+6=~T*p>8i!ikfy)AL8g^OC+T;x z`?YBWm)(tAfz3MW$1?0lFf{Z z9UHF~fZSjMkHLA8eeGQ{Wk` zi`kxe^qVD#G-*j8S(Qj6joiwr7;xR5oEQ9Mn9I%ATyZmhrAV~&PWPtCbpN`>BoGeQ zsh8C!|9ar>Xpzx|?&51+9!_|Ts0JGw6Ru|wwd~NF?)B1^$ulOeux071^hm}!U#5zX zZb{z<_B~;zkEWeIM2>5%2{#kSp2_x)Zr@k-seUDxxp{82XE$4_=_!N#@eOS~ALA@3 zvx4TU=bFj~j2{MPc}U1>%(OK}N&vSO*E*(*BNd)}G!_WUqt{~2zoMsHmQZvTXc{(F zN_BRvAK#B`Uc!+9HpYv-yzJ3Gt2LZ zl^QGQC=!GSsFdHd$399oHIt=}WA43jB=rQScDnJ%QbK*W&Q};`7IKQCfo9u6p{-OI zgG5|V{L*GKPx;71d&ZQcw{=k#Tv#wQi^_1$zf@zE$(3+|E{LpeUjT4vSg^q76H zqKgb$$FCg)x|vZ?CWNQQxh%-g7#G8ni%0J&o_^2RzvB<)wbVI7J{%BHQY4z^aiP&7~}F%vMj7lie;li*sAA zcZU0#=Ugryz4@`L=oWH8k#lES9XuMvAHUy&V?EzKBGy~<*p??3Ow?Gt4)-$SDc38q zMD_Emo4mI1nVkflilyY<8a>WGg1|cmR*WR_G`PBfI0F@L93U)sk3$$?{q{zE1HrAm zAs*2jAYQaFDF4NN@sejEh`HiQHtX=drV^l&HrUwIAkBV9wYnytblo`HNXP55Zn4aZ zfB?tG+A71Vf=8V3;|2LyPQ$mJrDXe9emA0g&(;jPVSCBBDx}b9z&Gc2v7~XNS&B;X{2 z5|7V5hEfwju4{k`%DUz$2z))m#Z>$Z{3{nJ!Q{9c_>s5{Ww<4eb$AO(Pb0fsWsyg)Xquv2XJY7+ST zP7&A%n-mu#zQ6+nOgvC39^}5jKqWy%Iue9TVwE6U8$ktsu}EHw#@(dHdQl_B&`SYv z&u<`;R+ozZ@sC6n|04dttsFctiAx}5))q+Z>k+-pu=&V{=yJ5{<$94(v96}00CF}x z9=xMHV&|rHd;9*RK2LYitN<~BLYNRe45(K zY~EDd%Hh6jEKm46g^?WzdoMuZDcy_OzJF#3C5$P zDw+mVZuo?)tV)jBYAWU%sfnFTvPNT-K|!{c`TGf5t&` zJM0k5p1)QMeA&4HI!&HMgUaYHWi+N65lPujj6Fr;@4*T=`&PqiwFY;`w*$5icLFPY zt}WM6w-Z#>|CIA%Dq?!|NLx4DfSUd0nX= z^85o9lnDS$fUuVMrI-g?m(c0`9=srH2K%Gs5I>{IuC{=yOXv*@JDUEr+@q~ zPV8EdZFLwfESZH3iU*=h1APPk@t_6Sb$1_6<5r)h#Ng@6<~|=ajbYPo&8f>4W)|OB z1Hku)tm_A)m9)xA4FLKw}*oYVJ?b*N1#P!sXT^9^HG~p_anP*P<9V39bV6Kixgz z;^E}k>NgJ*>uiul6DDhKoRx~TkX&g-eYtZ!zbp@{_y?3W%8HaUVujQ_T*0egg!a}$ zTYM^B9PdSKsaoZBE=8|L-`Hwl5Y{r%b-;|HFO%R#aCBq-)Ct>E4#U%xWk&_yZ91eE zSW6T|%m*@^BaV})VuxRsfmJ8|=WSX`Vx+Sa$~yhZ0^kXDn~P2*$eTD4@)U7Cc){+2 z7`|WQ4)=;Z6J<4C@w@|V*>gME2ATb}vS0!AG-7JOU5bS#k5-U-ry`4HSpQT~$UR}p zZ6=N$5}!Qw5o7BRY)pm-;~559LsgAm!12Mpd~|B1Gt&;xowU@}z%o^gdWD0XI# z=AnQlUofDU0HL8Gref(Giqo5hM_Rns2Ya58S&!~3{oZ@uQmGSPs*?uKV~GX1hGqq| z5=2jLGeKmItNl$J4Vv{uuPW zkQx;?FF6Hznr2-B+l&Mi2W>;r*4&M8G;_Rsn2dMTUKks$`&mxZl8wt z^+C^fT*Yo`d_d^5GP+PdG@}_Tt~f`0#@Q~dKtX3m8Q;yt=tA#Z81E>#x$2P}fxjEH zQji{g;?o!$z-r^Q3Z&J^cRpm?L5GH@4FbR%EH~fjSqDCNS5W(Pj_h=E2-G9MH`Hmr zp}pnMd%tAo>_djbJCN)fO=fB-#szNGJ3#Ov_J*e|n170T6k-!0I9!_I=H4V1?okr^ zhihdsfA=he0}_uBCX^%^%e$)yvBV{(Abfk|*2Jw_gVouxy+!8}6p%8pV;0#yuYuAZ zy_WvXMt66-rdRC-5a#wI#5j)3x3Ai2k>rU<76(%==cnRLno93xCF?5V^Ytg7 z9GGh{Az^W@DJXVe%;3R`AwY8DZQz!4~Yl{c9)s!b!u9kX# z9@)A)xsaP{;!?zQg)mb7U2v4%%PklmC_Su$-D^AJ%%y2B(?OiBs4v$nz;wMuD!6)~ ze5?GSq;;}kTo^3Q_oJ7naFdI{cK4y28fPv`eL`(!`1kjrQhfBhE|o{XD5w02LDI_% z#ke5M!FDp!@J&$hg+7&mn)(ptgF1xjWg^xf=laELNBicHJd`8vwP9(C6wBq7WpBsL zV(+(Z@Dj1-NWFNaA%f~K&rjw)u~bn*zn+uqJr{LG-4mZ|AH_h7+@8UaeXHt0;9FyVND=nBR_U zyzo@ZW8=ePygN)RwogZB>@4%Td5Jw3>laG%bfO7%G)&2%XpX+~5>#x)X8m1q{;{f7 zYk2)Q;^MrxYHh!-TcvkY))a}Je;jJ_7)JFll9xlVtF1;#lji>pY}ii^o8vl&7Q(Do zO7Jv{_xH3ei~w^4yOqNh5Mq_Rq8w{=?E{zPRF0#2o;gAa6EGitSiwXB>S;8U0~}x%zxjbB;gZviVC~Eg zh);=rNPb4&@wH)QH6p6E=1pii@I-nz!?>|!Vh~uI?wS|x1d&NxB1MqU#S*(MyT9+r>LNWf43A8bymyRjR$9W8Wvm?6y;>to z5m3`bkRs?r_@@qsfcVyP$>+O0Q+$0yr6xH0{I}T_jB)IsYJxJms85!(NfA$pO~@~R zkQY~B;9OR7be;LzBo3oyH_;(BI~)_x@D~VIRx=)im1{qXc)N5Px*6Fc7vQaCcR2}d z6YXbeXkPp}Q9@uG#r02GBfDNQ;^8Pz$K1;Ad-?|VItGO2lbyY&$q?tp3x@lG&ehXl zcM8w*l;b@TpXW+K0-|aQmiH%b{C$u?F^vB}!}f7qU^vG3cE^gouxD`4jOYDfosQ@q zvDa`3(3fJT1H@gRHtEu(b!tW&4zq2+2N|Bhp;uG_uYc(%wNuqkvGX~oI%N%PoUqL) zE40jP9c@J_KO3b6^v_5Q)kgyz9nni9;%%KOj*xm8yDr0xj%(z`Vp zcg4cQ_*FxGcuK1iKBq-_259r!i{DzH&h0gj)Jri>Fq0eB#5O^ir*%&cSZkcHdB1U| z4kx@Ub09H1mn1t|O-!TBslD%!3qSLi&m1$7wC{$OGuEQReO;ohnnZaMU1AjIzW$mc zbk=+{t-N*4+SW+cOmoJCcX>O^pLDEReY2Hd*HI-8ewl32KnH$J=9DlZXk35(YX$13 zbb+NxsvpdRW|ye4aCr?@&)?O&_cXIR>Qm8n&mrIYXo{(#19ka0yCdbPwj}Oy7)HxJ z;kWl*`!CBmLUWAQCS8A;kPHs@oN%=bc!{SX{nPV#)69z`#rP_hz-W^go5@`W^Xwut z8G5nUO3g6>=qpaX@-tS0W|-8b2rPkd1pk;)RM<`9{#+CrL9~OA$k*OR~@qP4S7x z<5Q-MGjp~Xc{A=ab`-H5y^F{j%u}=rbq-#6!^iJ{CfY*RftAJN?(pxX27Lx`-g&*K zh%3G=ecA84NiU{v#}6H*quky6d0Si-RQ8T%#rZo{;MxgK8cI(O(# zI^3-h2QI}*8}AtC{Km+e?H(2~iC)4cybKH(b-OUZ{pXbH%?X7wALiOf9y_z7nYo$K zvm`lS11GCt8Kr^3Zq4UMQ;9pVj@WywF%RbYRxUurx?slFwsTgWx5X?iT!EL^jY@=?_gvIi1C5`|k6rN>Wyc5c z88BAQ+fi1 z7*IVV9rowVku)X)FdljT_f;2Ws*nelp=lQ!VG5wA?N)}v z!51{#YgByPFpihIIxkv=lr@)F*7@eR4!269jY8$0v_M3Jfy9L?0f?Ar?95M5#pe1t zY7aJ1ve{|-d4%I}5%T7{@G=6WEyEEyorfTWPZa~@_ND<(CPiKTu_uu(e@DNsd2d4v zs)j3U2IF&&*XQZ@n<*^~K-I{hI2<JYn1LO+8_oyr_N9w!moGKH^=3?!6xzwWea3 z1!}#((anW_fI8PUt)jUG`s*f0vu`LvPT0nk2#uheb)${Q`lOU`td&=^>T0we&&I=s z)VufTIS?s!Z-EJ#ZX4b(M`CJRMMF>6ZY}H!gU=5Zoqw>oS9}z|z$)5>5h<|>-Z1Vz z8jqWprtg7Hv2tZccpoLEc~YnQ<>>`?YGbnVKwV6G`gFJcr32{gpmPs$u%kbYe}|GE zyQ+a^%ZG!C1yfj1wL(MJ{5@WFfgg=$ZBYj;w{+|mgx2Pwqj2Re9xANbkLoficRmyX ziW4=^2Y8RJAZjk_+LXIQCYc%urJt}J&gnnw2GF>Vk6AS$<T#I>p!scd}yBQi?oI)ceiLI~b zzqMxdqq9%go<2F~mbbeq;tPUqkwQW`2m8h(6uohbQZD}1*q(9}#YljjXZ~J0v7n15 z>cMBaa`5hTwRzrmbIqoTM`x~nmwX)QWD@vEp-m(WU*zd}8e6J!mC8*4QYFt-2*h_o zNlil^$|68+WT1?HE#<7a=9ecZrz1m>3>fj?b3XOM5A#A}C@QEg75i!E@sfU5Q`ODz z>enDsQCIyIqg_-P+xGKuY%MKZseqysulO-U6AA|IRFtCiGGAjaA1N~pllK=Fx{QdO zLYTjsS@Yf8PNN(%n>1ki<-3Ei)i-@jy-x*S{YuCFdVU2fcPNRySZ%GYAG`(2uewSv z-rQ9WsX8x!$J&+`vbH%CB!(pPrU9>GhaLCZB0C?}74#vxIgtzFdCuN%DU6h3gCV8u zO}sm+-!AFnl4QbRl~B6XHZkL(o4Zr-n(Lfn{6+go>tf|ZTru!tI5s`@5f9tL-~K$q zhUlZ+(4A~ErFC^hT}-54tUhZNp{V?n>^4!UG5`7_jGeJL!$ItHX-l*K&Llb?QnWTE zS>u!#UslI|b8DE~_MAO)B02DSPw6ap*~cjxUo8E>U^|h#KP~!Tu8t|}1BM^Idh4}& z!uDneeF&NZwSii#?d1JjmeBYn)8feXN1kdi84NtZTTr{mca~Q$!Q@tSD;Q*|xt$z7 zbJg|-Jh%6SWn*hgic$K&Nbl%92Nle9$--?(nxdpfRJnuVbIlop*R<243%P%;rbO)DC>Zm+*omPYF^2i-`&sNmcgJ)C$3j6 z*!b%IBF+EGK2%(}L-ZUxUGE=if->0n-rQD6j4=u`($RoNcZprv1*1`n7|TmymuG$E zwd$z3>6qA4aMeD$uVHu2elFsqGJi3m{{+Ui37<8GM=-SWl=VI+S2&FChh1Y{QO4P1 zZS_32@eii17NJPl0sn@LNk~|h<(MogGI}h;5sj7&J+ zMJ1~}PCFIzkU3F8DQc?6=d$+8f-qPS_;wz;*^r zfz3_c77T9cr=ePx`;sZWyz(chug9F!v2HN7rNjTjcIqe9iEJAW!N1&%&L8E6C)%{p zPOCQN^(ATF!2Z-Me$fV{XgOH{g1Edci-wHuC?*s zQS<&1m_r1eQjybUW$zZ5V!gDZP=#_(-@ z^O~&hwyw0ve5>iNIo0}c=Y}YK2dSX~AcsGubS_uEVJvd4S(Ru~H?N}!d`T$-BS&o+ z?BGUEB=i@`$nIW4A9pyuuA_qcg*8wR$q5LTVm&Ok*;*(BGR$KXydY zfaQ)TL;}di-x$hZ^-OQGioc|)$ASeqjv5yGTRkI^94*HFnY_)d>18GYJ$2gVyaAa=S63`O#y{t%xvva?{ke19u%fn@FUA4MpbKiFVy0QMb zAFVTh-IqzD`K^%2GScLx@45t5A@^wWgZ8N|=ozqp1_b61Uety<>h|JxlAq31AIeh# zjw`MRGN#}Mal*%we*X~2lA2#{x${A%S?AT#u&r#2qF(F7`$;BOxwp`Z$d-=67}4Uq zIN<{4M+Yt}{?_D(oaE8@C+w=d1d@7wON*pKrOhHgvPxZDid)0s}9^6XD~k?QL~iilz|cPygS&$wr?Xe%Q7v-8oeQ9 z?E(k9YqBP@A#3e3(%$k_1Q)eNz^O;f$H|y5<1{pH@bg`=?UBmL+DzChbaCD6n|ErL zo)pW3p1x^M|MEEFSfx&@Kf;UKwx@b2&PZ3!@KUpFz(18&U9ttwJPn zClXH!UZBf<0@n3%-@v%F1DCeh5RVyH3fJ#q3960=*WVfZzLbnhi2p&^dxkZc_5Gqc zjunv+1(Z4?0@9V<8AU)qKx*g_k={f)0d7aC)R8(;rGyY5L`vu+N)-?gLJ|liBuKBJ zga}FI#4|{*lbtTuGb+7ebziJJzvn(pTGMq(1ZCd`|{xhAC{4=}EK%%r{JB&s-u+M;g2dOs*T+ zq*_j;e^aDi>?OXmG+Ojc6`xS<P&WIh_wPUhuBsPRM&-ypucMfygpGT^Kv5&$sl%h!8mzd&c38s(rHO<&+{M*om^? z9Y(VL+Y2)iAKoLrcLky5&KAfk$9nw!{$^dl>66Wx4%|=q&5?1(>qCI7Qq;d^!1C)t zusHPZY{wH%?$PmI42v>)eXkj&&=lP{5$Jy;;2ZM@S0cGx;?+91qJL-f#+KOQ3DfIt zcjewCINXYMNQLuQ=zY+Ko2hiSqi;(yq{C=fI7-Gqlyh{ea{XZcz~y@QGDd#zw*NQU zE@A6u8qm1=`AHW5p#nxag6pYE+j`IN$|mF2|2^ZiIj{m@#uE3(L2bTn@M3M#<@&Nn zj)A<|o8AxhMU!j%PQ?bgECfq3x#|`=hV~BcPy49JyC?ogxGK1A&Us$97UH=DGbqY| z+K;v^ScXTFl&un9OSLEXO;$M@+b7;ZMBy6bLfcnC+*P$k{eanhr;SQndBrWbgGwX& zhTx@p)k}w;`GJl6{QCWHt@*H%E=0Aj=a!TReP6bBF}csr)>}~e-ur=PlF$G2+jdwU zrnc32M85B`@wN>)c=}6RR*JXDZG#Q!ytC{=v23u`?74(5FK_SnOr-NzNq!TYav~{a za9sEFHVZ>7jJp7Ed-X`eEdUxx{eWJAS$skCxgK{)>{C8blZ9*U_RE zyOlecl8Y4#ibK+MdmVVo!lL!cM&V8io7|29BHNV>T`j6AC*a&HkAGOa-+Wf=Im7dp z+>>e(Ba)4IB&3u*%_`T`C^im3X{*LM`>FZu6-{2pihzu`fbQqX&nr``_Jni?8)n)^ z@}()*&y>8n*&61{E9pOnHaBI2@ z>Z1UWG2QkW?3N}^gPVVhn%#6g?_&+ePSg&lhWVF-UK9Ab*9^DlU34=m79scO;J>fH zrLvogy;XOIt#I1zFSBg5T>v7-((hAETW*z>J>mDmnfLTb!&H9X z#Kv{H4sD+{yh8IgF>Gi;?e#uX!}^ED698dOGYI>>OUqurrF)vvvfBh%7+DR42$Svt z1u>w#6J#McGrOT3%7~;M5fVM7Q2qjo7g8w@XT675^8^aB9B~BkH#ngz!cyv7sT>g( zpR{1d5-R7`S$8HRAVS8+eMp|&q)?$7J(}CmkG9QhJFT%3jzpK;8jKVwuNLS=RHd0+ zM<4h)JIOX;O@=mhO6EH2{h^yKx2d{)Oi+)-W)$WxhI}A})3AqF)lj+Sa51BT(50lD z>SR>r9gfz>HpEI4koy02aWL_(Ue~#T!5-QvZCkGF(0(G7YSsPKmBV?$3XP1Z82!~KS;s@M>V>&6f>8?!XoYyU)_3?tPN6lKHD#*(x}Lj9gt|}kM3Z|~OX1VcZ+N%1 z<}C=L`1T{NZN&T;Q8KK_X$yfj`~Mmdvf#XK^`*uLrdRFreiX!t8ALL=n30a*4%-6$ z^-Cz-)qL|$=OJ{I?;zll9+@?$$DWw)nsy`^JxL1|={ZaBFLu3Cjw@O3k{I|F$j#~rQS zc190(CFM^;ec3?0*LG}{Lm+%*K@yzekU7X9z{$JH&a9+H1Ls*1b`d6h4td3)@U?ti z*(H&geP=zuspGpW>YW^-%4CS4?+&$d@@aa5*KlT)34S*Zda=-~zM!9m%4Xp|ziz2V z9BB>OA~yrGTlEG@ z5u5x0;4E5E$lQ^NsLXVuXxm~A`Y`>5WV>U3G)6fJvMibibIo zEu2>rW_0i<{I!EhULFAoUBOLkSIY)o4xqs~`Sinz=aKdK*m>LLnzd5?Se2L>8p)ZC z*}Jp97~voUG1l;83uQ8dg(k0pKw%v}T95ZPh^%h$24C03)KT4$RAZbHds1zcFE;_E zZ-Qs-?XCpl4-yHsFblck1&7z@Oi*{Tg`iu7MFFU(YjGA^l|p0>fgv3p_Vi6^^?-i< zAlD2vI$7&a&lvDx_`tl;!ileRcX%)PNJzS`;Cy1Pn>W-1wxFFbPztoVfMxw!eIe)q$HzEiY=?LTaE(AqYU|hwEgGU~ zelcGPawAJ`Thio{M7T@I`@R}oEiEUjNg)uANvpnsI^L+K`5N`PP#w4@WuzE#s^jnA zeB;mVLjA^E%S}r9AtBGH6#~W0NFSq59<-f~&*viVb}x@s5!PCo!cZmo3(kaI_A1R3 zR@(D(`F#p~CmN8&dFI>f9rCK`T%rR@To>%-GOy9Ajt%Igfw3hzQw%UXS((GpWKHV< zXUltcs@Gv3-K+rCRE{%rWwlQc}xG%lk(48~6@dDsQ`QEUd4Ck>4?zl%nj5`BnO1`Si@JV|B}JC2l?| ziw=?s-wcIp%&(0D41YV99n+uyH~8j z9^m;-9(_Y3ao>5GUNbVt@4WKeElu(b0?sgcqSefF8@HNSS%HlzA0r zMYDIImvE=+qrQAYLlC&LcUXssZAo5NeU{`5q$PEt-d(nC7wKcw>bsQ56CprJq4PQmOC!(va6lsfD+}rhW=V4UDD`ah<};tXaF=tPsYZmZ=;IT) z)Xdg?-*vGmyYQ*5q`j~UkFFs z;zk#$N|O^ zy3vySJKbh*pKs z+Y|Y$3?y*%X2bN=73CsHvzp!*A0?@MIOPMuie?_&Z7$Qf6@V!{O_v zW-)KSxcNLks2Nwu0PI2cVR|=Y?)tjoudi@i`LQNdyRmy?xW=J)oP$}Z^~dQ`c6W)` zQfWqBsYtZDPWqn57o9s1>?4`sNUx~((b1qo%A6e8DoQZfoWs{Ul{m0mYe`4hU^7!RXKLZ4$`d+}`L$?UymI=(=+8gQ@OdYWPh`J;v==AZ2vMxk7YRFR_4hKk3*c5_ zUw=TAe}ykwL#0uYp|w1|`kR2P{BEpRB3b=KjYi19&hKeCg1c6#wLC8=&8XX*3nnK1 zNWHJS455m3A_AQ)p8+gZSdPBNe+lsK(xZ6SS#1Lfk;WeFtnswRqPj}`IIT8TaBi>a z>h0Bx54L(4eeOqe0mVVpI^ zFuufPTZS|!2%!B-fRh~Cbi8LLH$3lY+&q4SbbH_I#o6qXyN z=zs`W{KFE$c+<3njyR2^hY; zoNEMhE>KA_OLJJB+~-=lV~q~~JpJ|HGNrBJZAJF;nn$dvPa%QvVD3=Y%1sg<@<^M@ zS|dD8x%LapS0i|RtspTw%Jo#G;^kOsxTnl_#e30&tq(e38!I!;@>@k&rKoSOlmI4F zrsKC-^U@o}T8C9tb5aJ3?^!NeSy9uBK#i|Ff~6`5W>YDSX78Z`Yeak)gYp;s2yT^qqPnY(f_^LIiybJDq{`E5t92BkpHMjwiTQy*g;T^5MSql4TQxuDZRO~K zLXRg!rP(I0M*==olgnaRJv&~2xZCQ}bgKRF?WOw|`h?(#)W_=I+9P2T69h4qeLC^8lW!WgQXst|B?Y2la=@wwgg4{L=UgeD<s+K`$k96I2}-)x?YyH9gYjrlhRmea z^A1po8MM;3VIzu_O5w=l{+)1`NjE&)WQtf~w{^+48@n4YQ>fCZvc4-eQvE-iGIjrp3`cJ^>@lJ&MH);cPSOt@v4HBQear4^bhBy# zBd*+#Or^9AR;TyQ4ykAMvu^1ru-E6kJKM(^UAbhY$8NHlKNk z9_|eMaF%Cz1F(weaV9vQ$wgqBB_s>HFUFY^OB_TMvhd{48JNK7ny78_Myu-K;?P&>{_ zn$u^KJ#16pKKq>;x%;?g^t+E@acJ8o0 zkOusGxcZ(Qfdlk_)A1mki`4Kwk+fKuDfGE$Lss7;T9NU{aa3V)MLwDBY4v7*m0v#r$j&v%QgUPTV8xrfSY;kvI*U8 z+IdDb+!mtV>09&c73QYPLU?G!i|fjK)THfmceb6jC>7;Jy3OE#=~0%r9!! zcaoX5`>XmLC1WSpXZ$YZ^(RpCTdlPn(f*#49 zjUcYW!U>OoL$ec_t+qLfm(Vk=5&@83(yto%dwNzoouh1@kx|h|LvW_gWeMqx*vX4M z%G>9imA-ii{LK_ME)=u2zS4{#BRXSFZVaF5)Ouop4kAm!73yp^?_G^IaiI7v zA#4bMy3G3Ung_XtYbP)+hkDC=SHgp61Qk%wML^y^JW#uNI9ul z*=o{U3R%pQI#0m$8|%%yE-sJ`Yw4+2T^jZWK#_8IKBj(w80 zj-EDKn6P5V-ZHn2|2<>=!*C=jEa_B+Uh$a->tjzSWc7x%Nriz!FlnmwS7@~Zch(}U0D{hEJFEG9i`l@xyub6y`0{H4<}Xyv%>XGuNa8KZ@O zIARM{O1sRn0h4v1n;Kj7NC0DoS@?khCj$-c(+yMuCsk9*e8-a@^&eXEtP z0C?MFFQ2Fki8ze8^Ha?6U9?{LZP$rX)5J}eW>hP_tg#E5&^>Ru78ofj44(pXv>M`& z#mIn<6&B9-2Fs(Y0B$TO21|l&LnmdO879nBKVU2RArOS1FFl=fr$$RdYG({j^=G^b zVDIvKSa+!pOZBFjlk)`HP@JNcRCnfKflT>eAOApR^-x$xUMtzIa&l0LzjMgNp?L%0 z5NIM-TX3@JF~7ZocBmhom_;^)flM5?u#Nc95UhRF@tar?);Q@#m%1=&5dz$tj|c~T8|;bRloK_a&llIFi*=$+5C9?HlrnrA+;?aE(G zC5N@pq)*o)RCS47ofobN4zyb$faZXH;Qs2|!)$W!Rbuf|5w@H83Y|V|ckMTLYMIr{ zSVun2$55lH;d*iDr<|Cqx+4Cx$hxqXZ7Kg`C=Kl?t=Ym{Fx<;@FHnC^y?_MFw%*$~ zt2XtUyFuOkpV8t!skdefej)t{KDRt!fIa&C5_mXydpS#jk^%;8$v zAE1*P{Usu!3Pq{1eQp4i!IcWrIz4Zo)CNu|D{;FCdpjyI?#OYq^en%f*FaH)*5l3A zD@|o)N>%K-Q_MpaHyzf`(xAI9lK9w-U7mH(zf+OPY;XDo>mJ>{-WM%Q^=Mc$_1OL+ z>$tZdDLGoC8XOU9--d;Xi%%(~&iH!>IKEWidn->muw9Oa0+RYYX3YX|-fb99kO2zut>B@*5cT_*thD0~ z$83s8*c`tzU|r`OVW4bM*yT8M;klX1<+AzHu?2tJz!uPW;51DUcl)erM#GRopN@GwP&fW~_ zld6W?z4rq9JLdTPYi1l4rgf7O>Y={3_wRJI<`KRee@7Bs?@*Kz>fO%gJkC#? z!$GdY@4slq@J*E79cGx4i%6+Weg^WrfXv|KlBKrSw=eR%_(V@LkUyXF`K5i4Vv4*1 zw5KiC@H+T@?c2AB*HG7@&qH4LOa(&|eu@a}+mQKgx-+!1DEZ0%IE6ioAM^3Nt)zZ` z+Q~rT!RVgCh?L%0aPmD!)XjaO9emhs*v9F>RL-YQZxb9Cfm>D-3{NzOS)&`sbpxPR ze39KdeoPf-n^V_M2yakB%0pW2FnR~!Gy1^UUe7Y<5WyM-CTK1}nNcR2S8soe>Hv1cPDK)~qJZTjp z2B9TFZ4`;fiK0^JfZz5MWJ3LVF9JDBLANyixq=JgiTs3enp+#l(+F`L-S#4{jw+kt zkKqOrwYTbDwl5#GX2zb0mhEn6NCYm=8K;KqCkTsCPyQa^xaUXQ+(VxU6H&bR6TPk` z$)Hj$80M|seErpJZ?aE{=YeHqo~Vdk?wHce;yvXxb!P+JzSqVJ+x6pCDffY#2cN#w z>8pZfhCAQdJh+MY1`pQ!4!^KFjFyH@5XDsQ1BlwNDs(-mtRK9QrP1^)y3#d35Vuq+ix4L4U>Z& z^l8fyiKHe$PLDnHD)}3^Q>(oIMQU;xnNSdy-wYdaxw$H`5P^B87(Mv;(q63mZT|Kv z{Q6udBDG>0}9iTCrg|?I@DJZ>mVb!kjG-W?Kg00It~I zrs;X4&pN_|e`Y#&Vm)x&{^9*Za(j%9Ky&kRmJD7w(!b9n{FTbgEm7cavfz_#D z3O!Yoa-~gjrLCG$!dvY1J*a1y?xJsNJ^y&=P1!^957-QpdI74!_bbbi{H^v}F1FwB z{KE+wn6ZsK=r-s>j{^I)13@@gv?|gCtBI+Bv_e1Bh!yso@K{^gv^xzRTwRZEJhisth`Ki4xp&Ew1 z@^C)CRNk#+cFbKHC+{vZTVaQ=dR+1pcfjE90JAl1J~Z(52N3dTHdyLlfIlD=wni{L z6^)p+Y{P|^3b^20_ev9EZ}@~2$hA{MbdGKJj+VENQ$u(GH15z_c6%9rlq#T^dCfE^ z^h}&{d;8mUSw?`b&hf+7%FIF)?-W@U<@3|l7@@WnUV1tL+V=B9h8;;_Ycr8cb5 z$!H5mFi=Sk{4@Ju32yi0Wb;TF3byldfwr9LgExW5mWG$lI6UCc#4I3$k8?|H2{ztx zt)1bvb3i=9y@Ec|d@mF;qub~5TMHgd;zi1*b;&PEZXKS!(=x4?Sty@h<14u{xr3;e zpXoBu9%z7&kI~pe#Yj)zYI@vOf#zknJwzPH>#W`lK|oJ75zQddLuFqU)p*vikge1$ z@LOmFd47lQ&620Q?qq(}x$*?RFZ>`-|mz@Cq%L)Czr6>M7JE3r#65<_gNG`n|5k<14 z+GfK~Bw9)|J1-UoAW8%ZBW=CjSCmLx*=z!tx)4mksFSy0-b7k>)8>!pAgD_OFexia zD`+nW%sAOm(oGlbb>K`yw~&a6YdR9t?v@W1d*p9!6u5U2-e_68E1QhgQI7GonA|C1 zQ(VdG&6W`c$FNBa%x9(rjd|~^b72{*)!%lWIbT{8% zG6NJr;z0SU_JNy)|Mu$=qVfYLKclbLI@1jwc4IO>6W6eHEKzzi-O2(VhQH$KU8Z{ncJ3%bFktW?y!+|H|A=FQsK+b=Bp+LNC%h-ZYHsb-INry^ z$+LYC^=wzWwc5eT0YF*8Y=PJy2R=z&+p6cI2=T_!6DcrDw)Jz?O=&HlLMU?&L zPWE%T9gfIVi8d{8ga#^-Z`{VS?`uhz_$Jt8Wg1n}B-+ap8!#zS$NHMri`1h$prl@IG zXWZo8j9B3k1wO_N&jH0-!PMG^mK09!LM_8vLFFlU&{Jk;HVF3|8uNA+7;KKD$<*Fgq&-jdsC_hidBEb>1&QQ z*HtJ_-U#o?H_Nnn38i~nym3|5+q$}nvfk!!nO*9tUGx#*h3>W(*eK!eK;tT=MBAF| zDpUqvw$!-mm|XBv*i(4OK@c)`l3xYvTYvRvWB9Y1FU_1UnJG=CgY3ulV7EF^aV6WD z_S*pi%?&wa#fweqQYF2d>7OZ{mj;B1)_XW_v+^X2yGofD$EY+a3_Uu^^|);kR$%1O zlIx!Q`~Hjl5B5&?$9}u;t7k1|<<)cT=^?K?FL*eq{zD1+=bPCtD5+^la)qD-&|<@5 z2xbS$|jji}|z_CDXU^Xp-&oYr2r%Q#l9cladU$+mda zhtDciw(ERdxda(f=FxrdKjz$6%zNPLuK0fzr!zZ@D;4}2x2-LggYO?)>SxE30-fAM^Oz2XHNo;suv}W4qrl=|h1E;jAl zc5SxDI1M?_a;nQ_>`^^&ipiJQmU=OVF!Dfo{RK==_R@e)yw^d?-s_4Tjoch?_ubvQ z-Sb&dg5S^_U0L_v#%LzZf3tdz72?S+MXqNS0^b(}k=J`Zm`R*XFXl)2dMG!%&pSUf zX}#h4Da0}#FZsx2!h}nFTi6A}VT2MB(e1m6+RfpG0teO>TWcORYl^AO122m77gBiWD94S- z{lG)>?Z8I8*94Vk!}1>!rPq$e#(Ma&-$k8>=s?Nx0Gyp8PBey2@ z9tNcEGJxOn#>0-U&kqHupeRNw1v|0|tA-{cNrm=*&#~WI&-wYs$BH{8Mgd}-xeL_M ze&I2Z5juvhTe>hEJc1!JLs=*fHj@6?Jf2up!9>C%p1N%KW5&DN@eD&50`iW%!T!eG zjnh!|uUNk)rO)^jMQ07~JZb#p5Tdp~Bj8wo#_ncJ2a&1{^#$)SO2;d(V_%oif6#Rw z?eX=!`EM{admkkGGpxDm6#R|!E`fM^9x({nhQy%=6Bfw^?`GZ;QR%E-4!t&3$Rbl z|9>9L|H%|Io+ghnRqS!OH`T0JdTqwn0Z0n%+`e-#z!%Od%-xKWRhoq#o|fJOJDdym zBu~pNB{~P}ObMjMD9csF-?ljKFY%Jt=&e%J;t;wI?Ec`e&F(G)wIF{qcWxu+V!fZN zyVfwAllu5d8!j?_2Y8h*$qwOtjxm9>czT?>NLo^|GQ!LEKK<=5dt-SJVQ!6T+S_!d zhn?DdiuVz|RcK03@{-FUn#?-N`4`o%5u@h_wenHB9?j+^2p&8v%Bbm6&V{R^AmIQ~(Iy6qqI@_LtQE1zIG%2E@m*|{O0*7z%@`n`z)v^`#3nVkP z$n4^?k}-mLin{0i)EszzgI6U?r7|^jWOl??(`&iCK+VwvA#^-jbt2#0jJZ|`fL|3z zq(o~|RJ&6j;}FW1o7X*3P8P)7+gk4Xbf6RWO#-C`)mr_*f8r0<0=h6wPV?Rx@wWR< z@J+woFrz^Nf5v(v^Roc~(*R7Z#82hp!o$A*+X?SpG}~+vI{uC%hzKEFr;j|w`{@Wq zU&lAsa-EHsHKYR+`5O!NqtulJY%ggqYxA!6^uV6HpgAK~_Zrh+ZCYNQm0qw86z25x zq;BC3u6Q@JcIrU{2Gj>=vQ}?4n;5z`f*eAe9Ts+TTX#-H?0RSE^uX@$*aDrQ?U!4B zukOiVBE=hE3#O;~!}ae^ zFOV&c-=y)P%|x>Aw>NTq z!I$V=a(`8T*L3*{AMUY`Y<+um9K2l}xRU$`~7U9_#aiu&# zDswtIW4p|h5oT<9rloyKxmH$qL33OFG5%xH)9yA4q>y$(cw-l;(R8!Y5#618LKC;y z+u7Z`$=hmVJPX}HY*qM5w+3Vwq0QO*BN_PPyY8BL1$!7D8(|Cc@aSPAWcJfu=qxuqRbhR9uV1%rIU>(iSf*a@zZsQYF?2U`Q8va>ZNful zXX{Z-wam9a&t#pCY8TR|)f}3IS%?)ZZ@1pImq$$CPE!NHZTrC|Q!e_&Vzz~WlEAkt z;~@XmIK<0*+Zjs9?|TciW%QQQK4G3#dj8VaF4K?R{Q0wFW@Yp4wY3u-CcdnUT^()m zaIdA|)%OTDy%b%}2(XRE#L~WUn|bo#yo<_P$3lPLE8wKUyNmBmrleSUN7Zv}P&=9j~39S};Wt=GJzR~hVHR6mR>y&G=wal?UU zC*Q3$cGh96=|TfQ?luRtH%uKB&KuL`%snG%wVESLg~v}k@)n4st$RwvI<`Zuo5-KR z$zi^R>YX!;`i{+!H3Xk*Y*P|D`0J2|0iOp$>}O5$ST(x^{_8fUV~f5`g`%N6 zMST3?HGtRnuVW|5IApRX^$3B=PXOx&p7U0X>&vt4u=z&okb`=4#KLrQwZDZE8Lby_ zwX)T=7{mI41OMTrkdyt{h0R(n#Iyf=n=H;n>cr8uc%c9Z#Ngu}L~BrmGvQz#2l+S@ zhHg~v=!lbLSOo#6MiIXDt#M!~Z)+KSyGJ#xIgooJFG?kqGAeqi^F>yP*+SGh&RaFa zi{g|o3&l8ebXrT1K#koW$9D?jopeYBgecA>5+xTrtVYXqdxLvBl6)obEG@4HY^r2 zi?bIoq>Fw*pWVBLqWA|IQZwLyYRV0Rj2%*6iCT7Fs$YlLTW|b;ke*#f2O$tva0_oz zI|=}{_&^sjtU#bJI5Mi$L5k#aFR%53(Rr$`KPanW74Zu*?q zp#S4$O+YO)ET%^)jsIrX2k6|5zR~`&5r@$Kmd6<>)Rh<9F}Pynt~_PYsQB}Jl*^dO zoqoxIS1Y5hcmt?Wu8IOPZOD>LKq)eyQmm9LD0Z107ZNxkb{8|N&hpQnla(Y=*)kuplG3a_V_f za-f!e^~T?Gk!GLf9oEBJTyDg71aEyrSQ6!1C--+l*YanMbFv6?-E##YhyKhG^u}5) zVrFB6m&Ap>?_f__|3o*Z1djxJ`j_idhGKOhd_-;yR(ZQTARM*ric>Gru6;Z{N$eDP zSCzOm4($!9j0gnuCbpWHdZ+GZt@5JQwGn_kDYh%b&d%LG!rSJjvBYG0?xZlGJ;{=u~HLVKn(Sjv#*1G5qK|`KD+c1e0??BI;-cN z#MGe(*Nok?w+lDkUuU*9TRTPhtxv3N+104T5sBtkaZQ%1xIn2QB9UN>H;-vH|5;aL zU0IA6A?OP`ix`}+i_lG%w{@iGQY?AgA&~B7L2sqzn`S?qWQe*3RYiFAw+IV<4^72f z-ss4v4;?izz=qsHfb6J31LeO{Z*~CF{+^9&SkqqzYsMOTS2HJv4K8SvdpDy}yfbM6 ziiz@nStlkf{knhS8015nWZ7a-q+s(Nk;|YSox7=h2(|_bNU6$X4x=gGCI}| zPQU(de5mUG-G|!r-+ZW3|Al(~f1oGue-QsTbd;HZ;SH??-&haF2!109cS54`I;`2L zUtMCaT3CXZrbOvdE+4D8A&rf9G9*7EIU?~_e;sO_Lx23UuX$wEkPWXBh}!n7z2-a^ zHvb~09I>;}8a>YTgg62d;BI{N&(y&JuAWTJh^1!}p%`Jhs1fea?nToSJ8Yfy2&-i( zmKBlg%@V`?ve^-ypj@oA;9IMunpFm=cFnZ-uv=Fj%4_nqt@g#I93SIcV`@CvcW~n% zw>_)0|5n>ZWl1`1f|TEq!FAiA$j7om@)7uIcb90a!BSI^OFE!UFqJ`!zN?OYOy?|% z2ps5WihkaR)w&NfA904MFtd&}i?3jN3#2y*3u2SXLj1bFEIFc@r^r~j=AS#08>dg> zdcMy*3M{qgYL9tkdz!yW^=dZKZ6&a87$YSi?Mw9Rtq+eoH=Dx_) zchqdOILv)2O(0xvsknx!>aQlc=xo+Ho=W>xzcfNDzW1lw57wB8Y^?k-bZ@l(MdP|s zB{8iSXh9zcx1j`p+WsH4m;^85@@I&|@? zj%2hz7Rh3-HBFxJX9XL^i4Szy8!M}ES$C16$~er0cZ8=j7xVQ!gSMvDcUc7qs^Wih zMB&S-efXog(NNkumZrimwOvK?dftkiXFM?I3cWQ3VK{?>~r;%)KGo_yEXdAtf;`sBCGl1QVk)$rYZju=<;N=$f;w* zTXPN~!x7KUB;DVUpV{nf3HycO_|pmRo>v#~xOK}>^@Veh5L5A%oIX21ODw9r0uie zUF8dWCC{^1WN^HKVD@>(8(OyV=iip&jL|)4h5MH9dzO|oYiX5bbv3=hiJ|KLHrDl| zCBc8sXboEQ>Tx6vEcq@J%hy?b84FM%Yak#F^pG6aOJhD-#Eo*O*Fv+$n_K!W!(CJXcU-uEdjr~z*;2X&Kt>CjBn^EN=t(rsP{Ut2otPFkC zMRWHMxv$1VHWp@ekVY?R>y(6^j^8k!vB?z-wHxGyWu$!?WkWb_k`9fjC;hj{L>o6T z;S_b`4T?@<^$(?*F4>FkfC5;)YYoKV0phO;NpjJB%M3Aze-<|uQmfHO2Ojq7nyMV( zVvnyvp_=ayeqGn0Cdy4NhOZEN=RUuZxv_qtxk<=tG6^ELy~x-X`a@^VZyvwC6v3_L z;+q=c=_pB#`YmmPDv;*6H78s2;_fQ{bZ$J%@s<90B8{|nfQFAncFvTm`XjHE zIV+dfFq6bN=tWyabEz_(+ZMfA&c?5E_MWBG3Tn{MLnup&AdA$Ta#$IelnCbUvZ{9Ih$6~w z)NWn18qeaQ;{?V9OE|DuCU$J4NlxI*(ia~ZjVSbk7z?0+G(@f>rU;3%h$|x&xj4(= zjfDug+jkSK%}`md=5qKc(luFSmz0aGeS+DM(@sjivoPK!*NuZ}4IcPU!S0P+9$MSUukn)I1kz1$PsnEImte2x*6NP*aQ zak}x~nt@pVQ8qZy8Yf!v>j6qL2K$(GlS|~pEVa(j^^IFFbc1*CHbRbDGQ+g@M!b*S zyPNP);*E@Oivjbz`0d*=Utl{gUSY8X-;oA^^YF`K&?4$%35{TK&EIfSmsfeFrp6ov zUD|71-O9Vol-O;h z?5A0$?5Z(8|HIRU$_R;PsHwwsWr%tR5C>dE`4eA;b=Vw$o%CEk&N zozcZs#J>*~>bq_#lb^IPhSJ@R_Lux#_vuJ9dC=_@t$qVk34df0^en-l1j~_}3CK#0 zvNo((f@P96PP@t8kJ8PzqQ$&G=UG#Dy+`_m0%nyW>zT{|Al=IYWL*x6Q^c*&o&&6Z(m+Sh>(x} z(skeEri+vGTc&hW7=kEzF(sbL)k})+SZ*neiK(FTvxMXyWsHz_62wqNH`uub_U@Q` z%Myy(mJR=WSsMg7Mi&Rt%|u5dO3!BWfnesMgHvC~(8PX^`^9!veH5>D0_~rTp`n2^ zewxo^9mu>|X*sn!Dq1yXuSNG<64bIDl8+sD;hNm*evmF{-RoL=@7a&fSf2;E-mq;R zBcpo`7D4%{Z|R;-=;@So<>bSAXd$P6&ph`EFS(0b2uu*Uyj2b~P1JcdquRP6`#X~5duO&^t04xjnYbU_?OG!J;M+Pgn9uxtl11N&jXUUugR;aGAElb1 z+MZk^=Y3>&uB!SY1^QDi?$z6A4G((UXF|N|h2q@1aAas|W}rL4ptn0uoxH5&{G~`+wGX*IDmb=fgQ` z@g;m91oqy~b3ga*zOJ)HR#{IgkVS5ElRaaetfhtWYWqI&PLfq#p2VJ0S{5Ug6fTUw zb=Dyr?N-nU?~0>!mNTA^^eCm)FNkLy3+zO#(At%yAJUa{9{-b1!fyI}WVdt}DP3Fc zsHJ+iUA0T|&8_{&Di3VJ&}Bwx&vBIp2%8g7rMWias=$mhxvKww`?jGC z)mhEzvUu4@W|e@gROPeBvd1UtfdK9#LO@*Ybi zetbzvZ-)1r20J>t?fLpi{;KeY0o;vwkZ+L)9hhU?>+Kb_%Syf|9^Ucf)tQIR`0`Ru z@2yO00b5OI@w5$X!{}#otQa;{#ZmfCc1W~=>6H?xtY}}mqBZ%1!BN%TO1rY?_}=nY z9|DTbI@AY@g-x31@-ce==B7{waS(peJLBQ&%XQ7{eS6K*TUIoa0Yt+lm;f&{br zTrXx%KS){nY;die_Hy3nx3opM>N(HC!Uacc>8&IO#ikjl`o40EgT5m5Nm^&x;u))n zl*IZ>3Es0aJ#f2^-dAq2JW)0XT`obJ=nUK$AQ_ zw2{ni)kOaBshT$Ggz^G4^od!x;yahMZCSvB=i<*jPzd$cY9Pdq1$E0o$oKCz2K;oG zOBN=m7=*YvC6ur{4yvrWX8nBfArZ*kT$guGSM0xKA-4E&f=gom98Y8QT(;zdaX(sWutqs%1A-5{>+W1e zA6wb!#NVr>VD!T}jU}Ss&f$@FTiR2&pWpjsY$A2jdNHCTHG#Q7IEzIPaLTAiaR6ez zQHyQ`tpbe^tmdYgW4`GoQYl$Ex?YOrf}BaIaygeEfa9B~DgWmJ-vOHL%Puq9&!d)w zsTi>I(x#tB5|v9wuBSyZYu1}QF`wxk+Yd)Cb|~t(vYmHfiv&J5gM5dc~!74x$Ez*7;jA{?|2!PE#Vqc z66Lo-fsXpruZ>>nt%yqyiIeV04d|j^TKr<+CF8i!dl4ICh4we)d(}eYYg*1r?SGAoa8EcQedMTJZG6N<26myxi2))a%qn(azHit{# zJY6fBgs_AXEM7Lw>)x`?e0^2>tf}v5ypJ&F0F$-6qXth-t%>e?*cw`4fiav?!nQfw z|8*$%m~DrWUE;!KK&uIOIvM!v|IMxO|Dl(90|4C`5okV?C&allC@0-@iI;j)NIe$) zfc?R=NskZpru#~C39+d;2?p9u5@UY|`fWfpa^BU$Uh1W#9PNVKNWii-R^i@lpr}KG zPuMYA=0>!uWI0P68$EE}0>4Q$ZUHqLEj#t-DKb-{fW672IK|Vel9K{C#&@YIfGQte zJoD=OKNQmODs|U}tb;oT(}rMYgf2ymO%Eh6obP%a4|1sO-hX(eL!~yI5ZiBd>Xx(b z&vL6I=$U9owIp9mS-(BOHrM2A4?&A)Wo~hiY6@L__ zy9c!+=mgj&uKc)lFWV;TX?p5IV76xOY(!6r3tty$J@sSo2iAQLKAs72Ho5{f3?(H} zus+cIUgIhMUQP_mJ`P`g;6?hb= zZ5U3NUb3=qvMjS!Sc}{fp5osM*SFz`b0iqHln(vE+w-fi{P=CqIvyC_(BBO0&=UT2 z=&S@Hy((zWeB*rdmm`g*&86ng)*M`vdoeqm%aPp75xb=Q?1S!(x~`5%$C2^2fJ|kl ziPuKXEu*_t-Bx&|_bjmf^9z88;umcjJqhaERc(|wEo*A<^^b^zh+)}dY;^BvRF+Sa z!Ef^*3T9Sx9ArZZ>?z-(kz7bsR(QPw&gVym~ zsyruV*o>X!7~f&fP%+bHthfmeD(`~e39}L$;g{jQXw{AN2S`h=JGFU`u!N8cZ85s4 z#m7<_YKRa(r|;{TyJVg2ft)&>b367#sj&c}8m<{E@KUVQp_t35)}|A*VaQ4$+E=gSV|rUBIi2~+t<^AHj|As z_MYbd)k4yqJo1^tB-wdob2Vpq+Y71(?b>lp4!I~} zAHez##4uL3bbjZ8fX`|u7EM(OAhR5o@c+t4ytMcoQ~7(|6=%h z@a%2FzQr8nTPZ!^j@NrQjz}{}@!QvXsJ%TIE{TABk+lfEucuS+=DmT+a3^r1D(qFH zJD0N{pknyA+$95m7WM6W`r>XptB6elRu?~)I_2JtIAvgh@Vw5y{LLu=!$u#m(D1f6 zjbms*iVrAusELQ{;J&HHj-OdZcgZIKZ(WrZ-WNiAP9n|H66JzBKF4gG^{DY`Nd z^83oC3iE(phiuMm|9f%9nUi5IHDZx}eLh*&K}^D^T1|7o#>q7`DU|B}GR=D(C>mXx zurxizCiLLk6w7Vj{djEy$b!pLe%MnL9kecp2SQKg>}(E_7QYX1;^2@vW!oXphUF0JUrjtg{gw<&#yW z809&ZV?Wb(->S;u6$WZmQBqR&!HCKx&J)*#IMRcw?9KigV6rQ09&me%oyS;9YJ+*u zhm6y(zcnv}RL|~!Y~1Z=Cl~a3t}R^|Y0|W(^zASAQLc>-f?I*a3urNuPrmgAAJS(X zLsET0s^#{ySv#%)R%&l4`BeMA*h>Qob;kB^hu#kuCEeBz*Rq?|bc+45oR|9C$i;q7 zLgdH4S_hmmnG*Mbx--kK*aKINuLfpZB&taskKOIr?Z*yiP%a&bPBK^W&DGX{tjd^i zwJAn&-=rqvN$T{XLAql!h?|t+H9JwfdBN;a8!1#W!Cx)`2al*#k5>2ldfT61UfPX7 z{88oULq+|BA%kdJ$gp3BQi=vYcQ7Pl-rCIPG<7MB zlftqXsS-gQy>*?d=CBYGO1U@QOV2%x0*$O7w=7lZ7O>g=DSd))F*m$EI2xmQwTam@JaTFa^N3c0NxqH87TdKn)|l`XKY zrlPN{_vAn1hQ5OG%I#_dq^KUSlgqYN#b2)jt8V)*F#;1+n(EVgebDDCc^7JV&(Pl5 z-P1bSzHPp7{*4Wf>_8Gh z?4G3@@(zl$nYE?tmlUvhAEOg5Cx?dyh7k30Q z0byQb58DkPQi?tNmi8Y~_B02Tj4w5CdOH3{%Du*~jh2rn848+EO>RX2YCBOhXtuR# z1Uim$N#ldR&d0Wro0<1HSOZ+aizF3ywc=8*@4GGW<~_y!n%}^`t*=v}n2f8&!p3*% zn}WmZWmun#C5SJ-PS1y)1Z0ETgU6H-y?DJioieVk$oj z$zjnd^r+r@>BF|gUzU}Xp1O{srXID)u5yaFLcB0eU=QO@kSYf($erQo$vt;JZXG*& zwtZqCp#oBhb(b;jZ42*^`Vv}_d(pZSrVyiYIez!u=a{erjz3x_<1-XT6yn9@Jek$k z8T4TD3`|@%PjZ|~X;Bmpr~}e5wdlP~K#y&W@_>bbl$I^fzL5tIA=W!eGxe?3sNoXmI@m|j|uqx}%f>^^E#$v2Uul)FLDdXuDY2{x2j!EcJbuT+nxJ)88i-Y@& zuX1qCbUdAce+QgY?b4{^>IuYYY`&nAnqO3H0W*3_%K1_U!+)8{WX56MXh z^_+9HEsMz?IAq7da=U)Vx6~e5M|t^;s(6Q@7q*#8J6+a^X$4Vz^FLhOR2&Q(l;U0R z!oWl#VxbQ1+!uIrUS{(J$=5Ebvqbh@eD9vxor~52-@q;3mijUCD(BBQi$>CT$F&OF z4HyE4-qU1s$etwCk-D4Oz`k2nTG9jgfahAk1@D4 z?@>9c^*@5aU~Gl)+P8@$GYGvy*?ia`$l>W?e6*B( zS(vq#MbxELMuTKjgc=UXS&qXQmVsy!f!?E!g9bQhvlm-h_wzb1SBRA>;LzJ(BCxh5 zd3VgO-w_oA~vQ-yM*EkD0(Z zfz#f;F;Y$D`>$;qYT`eN`I>w4PN)egJ>fQ?PnP%8=bG<+x%P75Mq;nHX_f1!k_~=x zF+SH%cChVpyvxTQ&?ok#_aDqlzFGk4blwM=+wz`lh!zYTgWL1STn)}(tbs!H>?5*V zM_6_J^tU>N==Bacc33Yr`01`q?460qb@}X`)e^*0x>c&bKiUU*?k}inyjHf<;E8h4 zAHmFjcILs%9ZZk1=HzQ$R9kA;Kj|$ zoy9i)-N9xg8<>RG+?~BjBPIMgBo#4g(;pt_Px!);q}{-RTBqK?Z%Z zI~bWuCX~u*jTP1EvmoSQ9>Bf#Y5mjgHD$P-pJ8v$*0@d&F+0g5Mn#`^P}`_vcD={5 zNcV$AXAyYotAm>gj|$V8>$ZG`@w`RI!@#Wj^D!R-9pbVEnAfP+TIHp%S?RSOI(-A) zdOrC<5Z5bD^~HvmO?%Yx;;j;rMy`*qGG8>0Rju=t7aC*ND^MsO0AxZvM5sz2-Hyqf zxE)*yBR==Dv-K*NP0}sS5W2--Ha4d-Gg!3l2}W5|w=UC1?b1+AXTAZ;ES6WVdCn&u zyJJ_R{c0jel2#T5YQ{7uL>rTVT#M50h((8mV<%&rUMniaUn8na#JFiHVNISTS$x_} zze5A;%hp_p#~OeqQPQ@#N_NJ!@B_gKl@zzTVA9d2@38z=(P`Ia{)K#DvuvT^=2%sQ zyF$l(LoyCdv-x2~^ws_RR%b)%>P&=Ghs)rA2D9Fv&NOXaM;(>C?Jco zuF;{!C?D*2VZm-t$K|)?Bi05VH*H0U+G|hr2A*`=rGWeyFs*rgO{xRqKEN6wm}&XS zv8|L*Fw?d%O=En7R(o^nb-V#j|1CH;)r~2KtwN)VZKqb6zH#X>Ro*2GtITUw>|x?t z73hd=P0B_ouRY6ybF%N#`|rD$3-8v~{R1P@^+bEdvy&_u8dv67G=k(=vnHNx&t+dxhEtY>)&MxhBPO>K0swG1kiz&6K&}RPo=7*2zSay{G%VWSln|T|FWVr zNB6gFNb?q3Q2Cz!r%u?_kF~nFDo?!}O9VtA;#N6}=C=Zi`s8D+A&c6HJJWaeFA zPs=!TgmEl4HII-#Y1+S5>oCCSi*LNZl$NB>cFj@uffIuvhKo#SFVWRN4k&+bL763s zVU{a`v7HOZfjxb~P*}T5eu~M!mPD2*G{COD0cOF)wO42lBbC3j`V)}g2NgZvAd7lK zW^?~bBC7Y1<0fDFcKws5)ncFOoN)Wvw{@RI5(V%Lg3P2q-(F6i_G;i}kZQz(D`^rEHNlSc-+padq*)!5gZAGWaSU;L2wn5bcZx zYT}LF?HuMdeCn{QcHzn3v@3=PhNRIKQfvKXzEX(X2MKphKBq|;)q0;wOO3k}b%kHi zv;^Jlk{Av!sK!p_dJp}T^yrgw5L?GJxIRtB$!1#gMem*wMiD8X<_9xZb8hG#d2+Ty zPY08+U<8z`v?Vv}4&#g)ZSQe?eyUiiX0~gNMGW^Uvp>KA37hI6c&vKV{_3 z;F)9iA&k%Nj(ng;GI0ld$}n=LcDU#rWfnN|nRoPS^KQE^8VSbQRT}2Fe17x67@564 z;?_01UIykzJp7F{!Ty0;t1fDvkj(4MR|TIxGbm2jB%`XPz>7ZWU*XeiqPc-f{jUx z^}s!|ngMt$YHnI2I$!SKBG0T}PDDeulCQg;E!#?5(TX_wN%bOUpjjte*`8w{RE%hj z5OH#1g%ZM4)ZjnVo$6kC?Wcy3H`Q^C5ILn(XBR+_Iuj3IWbTxs>;s-Bq}=lj zc3-tcg@FVk=yp&QWUJ>;|LCZ|VSQ(Z8rZ6$$@ z?;CUM33um_lkR-2&pmXRTi3zR&*{$gv>C;_Wjev#@K@SDMLA}w7!Ttu{9la-YV>|a zAZAoaZD@PDfeZNU{&eb!{kr;$qm)yyANOpVCGS@e?x7?oS#GW$ zL7}Vmz;h3&$>z@4eRLU|WvY;$y(fB@ukHiz?vmMnVLCZBEIb=ERMKy@AKmeYCP5>! zOK`GK=^+qo#E6*|bL}M#oCTOp&4JR6BGCdzU#otP9>W%8lsDw>4!(lguiQEq(ry2m z?&Gt!Hx*`r3T=nk!Vfdubb7(}*%%2R)wip8Te_+HAiFHXrr-}E+Ife~vU_L$U^_nZ z2fSr-{R?=I-cWZmH0aCLIEpfFQaz&%%@f4zpP`26oJFN2G~}rW@L=fgcT#V-5wS3ERQWEUI{ z{9gQEJ^AY#@QA1&E75Z3NeJSiOz3@5MCx3^;=Ia=R{D<|%e1Tnfc^T}ltt4iGLQ}e zn&MWD9yny1?E4(&JZ^&=L;#;P$b`u?7GJZ;o9D%pR}RcyS3i~K)haX^`_p5r+ zg-GtxKbA!<9J$EBnd4@!KGC<-SCgZuS|}e}SFxj(zj{%l&s1C?U)&z92V2q&{&JEW zf4Pt3#eTWn82GL=BMhYR{c#`$xbfXOIm^#b5sElhp01Wc-rTUCJMJdBGSI}by=Dsd!zmKR*B^k%qJna${ zC#xx(Lba8mXf!-rprqlPcjS*Mg6e|ySJ&L(0wzuvTF?<#~+D7+#-OjZqerB_ni#M^z!yFclge$_l(R{it6ma2g9AlC1W)vmSNZ**1SJ# zTnFq#;^7q(^1&Z_2&CEII_gQA?%u_uw#q@{sjzE55Q7)U^Cu#QR<=8)ab8BD3!&kU zS??e$mo2bQ@CQ!$6iZcZ3VHsQbjt1oCOLKh+gU@sga$h|tDrhNZ?21NZKB>oxL3i+Gar0QY4f9qTktS{W~*#B#Z*-jx!ZiP_r(IJU@!quQU!%1r6~ zGeIKDdo3ySeai^Rl-BWOBGI-HN4K8cnB88iUt(s+MyDnnGuK20biZ#Og1NLK#t2Ot zON*1>_22_D7RHA453|V@VQ5iEnCJb-`Mj9jm{_Bi^QyN8uLD!Oa_jo4m^kG}_oS1Q zj?dh*s`D>LjD3p)@5s2R{f7vJb*0t)9mK{DWI4o=L}N|1bX7qd)=F3Q-(e7l!mg(2 zrS#W7Gh@~}bdd!sMLmvfhQwI4ssTdr;2W3=*bABVt)&0gSr^}>qKaAPpKQ&`yQnPxjB|Del)Hn|(z+Iq)FbPm3w=Ln9? z9|m_z6{2RP-~#(KFLpoK3p5I9nuB|ruZ8*habQ29j52~}lKX1#Mn^f`6OW9J@vIek zQ;S$l5i(Rhj3Wa&zeGfN8^%Xj5f{dS$}YAy0FC1vx&^MRHt&I?GKOG>dAalFZiTpu z56;f>ijMLkbzyLDu8sgRVz|7Q4R zKpV5v<4A6BOS}sWlvE-A$*>f#j)b*MVyY}4GuJM#X}Qe5|6q%(i*pX!H^u)t#Pk7T zBLr}|ZZ78=#)2I8-|%&-egDJaExO`22SbI6N)b@ErKAvrXq=qpKr?u(;A*boL^m8x zpGnh_%FU?o`;8@gme-C_PwudrAC7{Wk3&ZD5_g=Vgq9hoE3IT}l{ zybc#-xD7FnRz1cUhWMbpOX!2)z@POo(<2>jw=Kv1)vq#UHuuW7tX~}Ox`n|CM+e3I{)yPcl27;9|LKZ-3l zh1opkvN|i>AT>}&1kFm|R~#l}zCEl$`Ot9*<3_XoFy?lV`F>7ShrZW>T&)}O2On3P z2R@*>rBV=)e@{kbJp|dMG9rDF+uIW>p~10**5c6BAus<@wYmaEXJ$;z-=K`M5#0yb zV+gvkppt7l1z5N}g#^xm+mQB)g2-aNB_F1{--G#CZ0L-E{QRcytTz8Ln1y)$7+u)L zu@8N|(ngvY&P(_A3>xEv z^D!YBs6VU9G#rZ@v+e4~kGs!USFAW@J*^*ec4gHRBJRr;5V4zYcLxaj_+s8rPgY5_ zf0D8lN8)988E(wYiu_G$GUdyUBhf#5w{Kr_nYnX4V?z9pf&cuzSHHU$4QKd<*Y3o= zQc*rVh(ZC(;XkMEw$n6%_U@i`Gbi>}f~-5$;o+QMiK~IC4Z{!Bk`{|@haBm6_Z!E` zND+Hc)vJ399QCT;$P$ZS-=wZ&b7TkKM#ypuvf_Dx{cYU#a$nJYrtxV858?*8m18jb z>OI<7Rhcmwf1rLB+i83;tc-V^CTNMMr39#blI*Gj3U(V@YbI$j{iiToHj2$6teMJ zdoWuWF9<{COBnm+dT#b6|EN z1K9=N!*Ac2(3DOat33E};cwwcGg^dVa}@5kkQ(=hiMGsm4YD}T+KwZ(O%~FC2|4xA ztzKdZ+$`tatp8y<#dTbro4GjY2I(}lM8hObPxq{Z5n0#2Am|R&FG~*r;R$o(vp^3> z+;x*X#vARiCbEJ<5%rvs=>52mFqz8l-LhrGsJ71Lwh8r)jEr%I7Fg3`5B|DSyu|2xp?h94jVgS1^!Z6XlAG;(Wa-`2XrK0a({8GRXiGTHGubTsno z$g}xWfd@&Eu$c(*1qn63brq`Y25XjjIcs)St2Ike0+P`FN~P-lZDmD7HChc-rMh6( zh0K0&KFQt1M)9DFZRGYz#lX0jSH!gHL=<8JO;h`!0ABcI1%iA=1K4|3@dfd|_zKCm z*X_d3X{7PdBq(!SIX^+Kvi@7Mo==~UfK}=}5(gEfWu(wNj8=G16J``PJL6gPh_U|b zPzZ9kC^>ok^7KTem1PZo^y%c3`*;88Ob0cz&7@|P+=3U6*iAl;ySWf11RDq)$*{VB9fv5Rk4^LS0Un&N`W*)Dty4uh`>2w{^KoH{wiKu?wX1iW$b=jY-Uj&Q6nxc|r; zAMJw(dG0RsbzQFG{28OFu_q^D?Wd^ze0(vo(R(<2p^6)-z@_BU5rZ>-{#e7!vWIkH>^H6xpw9aFkB`UNZ|1F9t{3Jr&~YX!o`eEV?%%R0gkiRI9id+ z7aBn9Y8fxJ`!M$!2i|LZa+Ri6+;wuYOLlr?oIyADsA5S)M5-og%Zk@EP`V}!J7zC@ z$6Y02)g5Er9j0pFlk@A1Ulc$LZ*Nn|EnI~wCvjUDaDlOF$RIR3@m7i=i|u76M!D+p_+v=CUsDXanYVc3d$IrOOaH9t%M}>RlV|rL;KD}dVYus@ORQb-F^=14WTioX*VvjS z5;B7}Z~nAp39+1@Ex;$vw~WP(?;1!u{ouBT_>gCLe#J=ULZ^tCyB9!INdBGrGxnp%sv znzqH1wiz`P^Kaozd5M{`Cnq|z9>#~G?{!c&;v+V0I4 zT9HyniVzzn98hEpd9V=wIXM|#{h%GsyM{#%jy&IpLicd<6zlR2PC>|LS@bU%aB}Nd zbt4HCp#1M~qZ65P`MoGBV-cJ(+vlyS^8A-B%~)6mhd^FO5|Is42;f8qdLBq}vTExh zaSStG_<_6P@JbU*`Kr{zRN!)e`MhGs4W^Kl^y;wkz1l<|16gI>&3O^wZRsPj3o&i)q_+?uHn|u>=fb#PWRIV>UvDO zcudL*&;^R6hkxB2f)+PHirxMVQ~jTf!T(6b0g#aYzi^j-f>oUPo+-fkU)f}U9~Vg8 zJM$I3i_goy@c-By45}DEFrLbWP&KOjdgq#-Rc$2+FZVXLIhMA08Sdw9N0H2EeC=?V zFR9=ff{^M>vK{BT$gW_HR{7DL@Qqz@^{2Yum)zD$nr|RptKN)G~Fsf`Q zm*qQ*vj#9<51erAz^Sg^`8u5NVkruE!N>$J;E#?n@#1{F&d&NUX(2hLQG4mP0|g`H zEE(1HB9!l5|Ev`IpsWo0Y%bpgXqZ)+IkTl|rQfXNANsx8J6JX&6~UmbfKbPLm6V)} z%uMC_^t@^%){I7j?<9owjxB2_DPkiOSC1FZVxKR&aE^S;lLZywoBUgTb!i^Qb;C6B z8&Uctn}yUzjS+hdp(`l+YPA4IeB;w6Q|nYlcux3iJniXN4mq+zbx#}T#L+4!1&m_% zH2Hr|8d!3C1~c_C<2uAW7UZO`nRc}a^YvJ~C9=~f{L1bsob$3V?D(%kr&w9E(zfuy zSd->hjkB84!+=#Y$=o)Zo|aR*F^=L(84^N*+o%7TnN&5W#FJPiiWLeEx|AR7niy6# zPeN{#a76x%kmZ3jSQ|(ImB0cJt{2CRy3|>)YNW|ID45?ov;TY%tg5ep^F38-n`%ED zIfri+`#zmLJq90;m8bTyoUSs-CvNX&FTh1set)`fR{pAz-@2~lfLoTivUX?ZvV1Tm z!dt>3t&pBjO3qNmRKmVf1NG%g@b*)F)`xJSv@ZEa+g&%nny3hUm{u1rx&|4F8L}>a^kJJGogWSMinNxP42uakoCD`l zs|$?s-_*qTj5kKf3p{vJxLJ&*S#q1=xYXJ8D`}p#wyRfu(i{&Q!n5}vqmaitFE{dz zfJ#aNdha~gZ`-ZY2(4rsn12I4FB$Jf+7BuJpIx{AtL+BHz*6bHSu)kp2!NSdy|Vou zwhg8KR!&qyJ?22Uj?K*!+UneXp}x751h;U($^S8V%GtN#LdKUbvrvm$owa!kCcfS% zu$jbrNn}AaLA)aHr~;B}c9#=AocFryU;;R%(AGlnQ@!^Ep_MVtdk$H55C46_0cfK( zLP)RIex?#vM$6l#JKO#_)H{ccs~-YnQ;89jwvjl0;iRxP*zCLeg%PKFQDu*ymYvc4 zO{qQ?8U0}>*>*G#C^{R}l5O8o-8WY}onhVVyafSTHCDr&EVGo`Ghw4wu8MBc>IU_0WkLrM{+8K{IXFp- z*&0@MwT_b$$Gp~PB?c0Bk^SpskAi6pU~yo^4PQ!o?AeTbw3H>}r=9SjLuxpway^L4 zgg70d@oMJw7(SG=<0!h{U-mJlDi)Lna&dfN9mM9k+!QnkTj@ zzM&dBZ;cC{&$ex93pcvTOj(42%7;miN~JD}eXoMjsKL97PW0!;YitT4jfD0Kjf6qz zcG(;Zh8IC-MS#oay3xeKkuj%R4eSyEA+xEyy_u@MfbZ9@Q71Q{-`?=yg){f%>^V;> z!aepNh^mTjSwK^gwDGzsyZIls!PBgm*F@OrI1g=Vbg|7%t{?jF^}GuX0@j(2YTa&D z%(8Z=$z`7OrV81t-&V*cHY3i1V|ZJmF+n0xWXTRBx{M`zx@(M-51wqDiI8Pr6KvWj zU2?pmNIer8xr$f*!9(xIB%+mO=f+mjsEoY9^9t=e8%FybV zC_w`O`H~v7m+~29%$lRCz6y}Ex)s@LDq}d*1&eAl$Tu z#LXM|LfO&XG=k%qBU&GL`YbuRGcFuhm#YH8{40zj@-P-Y338|((XrH-H^>8 zBR`l~_VZAnl(03@t-uP#`+^YF$B&)*}w4b1ouCn-=CwHghuD|Emg`xnsnzrD@N3ic+K z&ckw6v;nx_febc}zSn!wcbiVDn#WIx%yB5{Te+}4?YzqziBi8y`-QfvcWOK1mwLDh zmg6OHa~z5E5~QuKH9>$)If&2(LwbE7fy8scqEDWR)BKZ+QOU#eqgNprSG7|*y)RQ7 zsPy{I*Hh6P^yPuFjw_aZ4pjZtJp;AhUCY#fhxCJSm@2Hje#SRBd5>Gju5JkSVwxKr z=VhTqS8P^xuaK%-Bi;dj)R1nV%Wwa&YyDYcZNd1lw~t+V{WA$s093I`gbOsSlb!ZD)3wRmInIwP42;x+T)5TpSQ8DXgcN5Im&2d{V{Ji&z9=0O0r`yRw2S>`$<@`o zAEB@~mfw(ZvoD@_sq_uIH6T06*Os6Fb`KG=R^rOGM4bVHPiaiTnf4!!08bI8R-#K21_O0&4LSSn0Yu~XDmlK&6C{q^A9;kc00 z6eR1F&^h~@On%~Q*)1oh6-q03i61%5`9jkpd|ntjv7Di; zn<(}OHm$AwQqg3hvzVavfKo_GTsUo{=v|+h7$AE&Ev;-YMdxwn$QBTj=IH&A@)J=} zf&5rFk%j_)p);Jke4=@kua=yY^qmQh1ZT>|=-NDc%2N+8tj1rZFh&x%uP&&XY>{Yh zwqM>_wvIPmYtNl9ojgr35E}7yBF4l@JaMg{R!E7hsQ5i0D2=Ci95V+z_<@1=%W&na zlr*$Nj(vyC&;<+3;(wfr1gq={-3~SP1u6eT0@z@FHyl=%DF1OB2gdwEoWIZ9J~*~R zH0)1VF#;~~BGJAY;n!ym+|0N*9uT5CsiM);+(nstKFv+5OZvGFIR3?Ql7hKT4;RBs z{E5yi(f#__O$e9Fgtkl`#JVbj83>obUx#$@>IawL;7##~YzRJL9t(gN7CbiL;JDqM zrix7^S-8s(qBM|jxu`c^_|oY+Vgmn}8Yu#`iTd8p|3tM;rzVbpxe4k3`KR7IoUb%u zyz^0xBgKOsMDJ`Kh~78e`T7(P0p_Z~X7*qoUJmS?(URswk}{x!c)s^M@CQONt2%a0 zv9aCKu0Lxr`5DobWjn<;?dnLde*WWL4D7w?;~F*FDQ_&w@3MVd2cPk6r+-Hau*>o} zlQSJtKL0(ZUFJ3O>ZiV?>Q0L@CsbYC9)ZK4CAqo|>3Y}gDJ;9lYzw=1Kby6o4)^(w zqkp|!Z2U|2qw><3k7~yfS@Ud*G+*ZGt!Q0)|BnjtRyRs>zIf;=&Xm3{#)G*3dcZF~ zSr7ddh&TR?@3|{Kn6M?z_8AC20(4v@&UEFLhDY{=Hup$VC;Ry{NvJ1%$_;D1X9Uq* z{vUXQZ8GwoEM?`+{8DU4Z_DiE9-mj|)Hf)|Md#N}nU|hpu@CKM6eMkPa!TraEo_$J z@A*0gMB*1`FL16LkMVykp)bBSsC2ekN(b9IfO5)%**MtQm)|rMIX1TdD4aDP)|oN& zS$x_iz=Lo)b5#a6l-kDzSq(`Uf9VIi6o&cl*4q@*2WAf z8lSBPWy2gAP|t9-gxEDTQL}q&S+u`awVq6W8}Ik_ zKhpI2f!c$y^-4eVm5faOErX3;hc+qe(^I#}`#{@z`wsfj!6jh`diz-BB)k$r%E)(w zSn_ANJhSPD8ThF=%fBtlaQ8b?Skf((bmr)sty<>g23Ebx7Z^Yw_DXyCYZgnkVLB4R zB`^L}3 z3e27_FOS<_S?$RiG)Mpi!`@}@!`S(tUXkj!V7@2M%?y+K_vGiDP`8YVLid}$U-Xp% z;cZ5WAlYJm`m8_y6az`GpF4$ zxM40H^^`3cqcyO83 z`=GK>zlIk=Anu<8#DRHO5qJ#Sl+VBQc=_NtxU$c+fUh|~AN>8VL*I_o90>n9v>`x| zt3waGBR|?q>@wi2lVjoCUf`L>iAL+AZrETuN->3$Fu^2?F;+ns^8dltmxr^per-Fa zgSLv6qNSxRswi4xso`|uppn)rh8$Hxq>-9xwoh9%7A-{^Ls~>kq2^g?t}1FKK_rTr z6GH?^`t9?+zw5hxzwf=??|;{hy`N`2&%N%o?t7_{7d7i)h%!OG>G67_d;b)*6Pg!ZpKnWi7Gv1sF8yBPOHw=V(BA;_uIZ(%0fs4Z+gDPD9uTkbMZ@54;yBDIF%AgTBP@lB5B+#?KV(W^woU63>$sES zlBCSg6&LASm}%)k+i=_Pyigz)+=u0y3VVkMsqxbN`;G(8l9 z0rEiI-#NE2njQ|%Gz~Pk#ivlbV-q~n3>6m?g6fJ7a0k==z?y*C(1#3>>Y|{hzpd}d zkD8@8gR{hi7UN3zJjN`!9U=HZ48w5Z(WiYMS(3epQTkae$w$>xc4TecMo0SxalhaG zF|d_g|J-Ci9jNDIgIPmYTQ0-qg$rP-YnIP}6y19~E@Q~$)^cHEz#Au}k0KUk&SR3&AwRSZHmiuO~> z3A6@kmN54E@AbN(T?c^TN3GC4C!(@)WmX}g^p89 z#0yvnBm&|0zs=QzQowZN68JU z|3qt`dEy_E6tO3kNG7(9o};2!mSUO&zeR84!DLeJL5KtgF6h2HefPyTT(Uc>IMz19 z3|1!G_V60;R=>Q!{M*#!UaGv657R7nGHB2=b7ENa6!gBfe-Qv3R3BKxC}=+CZ` zMuPU^^~o&-wIM-wjWTa3I5cb&k$Tb>;=r!ZPgTdqf$M6i z(+G20ldwb2*9@uWhq@@ut5FUYvGCHR9g zX`?c^`ADtu4Ihr+(tL)>-lWi>DnoGf+7?>iPl25yw!0-tUiRcDRCn&^~g!q898*Uc+| zil#J+jZh2xmpzLK2Z!5*x0(AgwL?GZp4;bNd^6`rX9skq;4fRvPJS&Gl@hT?3S`qB zEabc-SifDpBH=Jx*ec?z=%n`iElTFq5*>fsA~r=|q-U{*^gT-b>boP&u#|$K477Lw z>B-xVvXwcPGLyaZ?`DUdVwsnFyu$JmfkzGL$%lHsp|V*PZK8=G7d};`ukLA5UezSQ z+CSi?x>CoQ-LOGkk&pIO9j7R3zwo}%HR z^g;~5r|JW%Ox?%U?S^T*+}Gip`ug~ik14Yr(v4YIDMlN_;DoOsrUH7k4~AI7 z1e&U|yB(xki>zAhJdtjn?7+D(?Qra*>evjrrB2GYHl1p)5)YsXmLP7O~SO2AJs5kHf;pxWwsCihJ=yfagp)~pw0`og3`Ppf8_15&TW`B7F2dk#dS;69W?uZo z@d%oIMD^?UB;NyXUFP3E<zfO!94?IdVu0%bGi*zSW1w%8v^BCcv5-Us@N3}!5iXcO4*n6rGO3Q|BR|<{~1(ecrMUO zsQ|-AgK`wP$GbwNO7SlT8zhAC{xedzG%EDup}s@_IjeNc9(8LvMCoD37jV9gwJokX z()i7=I5G)+wK$h+ex#Xsq}aW}t5ul&}I*oiJraX(If3-^4_^$`B7hZV4)kGHBGiyZTPDGsF671jsF zW|XME#hv;HsgajGXKsVq;0n(Q3e0C1#eSjJK~6uscFMyAIgD2r&fgfcy1KPnS7dTB z*|+F)ey17CF}F%zlXYW8DpvnQ6XP5uTK@?4^gkU~ML{*ut zx~?_PEFdd6XW?RfFQ z9>ykKr}9Z4DKOaI+3cGClXYRI2*#<5?85IaVTer=U1A1(orj@s0YAGbAcae8oj(&= zP8xQ3H}}2Zm=&W|;a%)ock|*D-8)algXwLSCR+?)oSw^>oY5*v1`yt)w^k#!(w##_ z7P&H|PEk5$i4*A-v8|3aR6+Iyrp;(~0OS2*^&Ss@6x4(vnfYNFfjTr&DejenESP}9 z@9jt#MAGi1^9dH+j`QBW>gPFB6qO53hIZ{a&doxulnLRZQV@OB{HA=m4&Em*%#Dao z+xeYXm4SzWE;l*=$V0gnmbmTL;X?CvD=Qz&(k69rPM<6&{_fpi#N}Dp5%E)$qY|?uBiNrW!h3m&*fgFa^{gErhDy0`#^)=#$ zi}wx_vICzY{8*{zQ%ggr;o%r1($6-A^?{==b0XRpsTVVWEBXT0J%s|#cH&1oujKH* zn2(n?|It2}<$SC*){-Fvx4StXsZ^@P-iWsQ z>EegxMdE9=p09hB9<*5%MZs6AT(n{wVodxc{$!Q%#x%MINwuH?43j*|0W4M@f)j*}7B%GA@F${0}RlF~Z7y2oZwEI^|GLB)y-+1Y8 z#AdWjs$@x+dDU4nVT~da4YiE!*-tEzmAi=*(xto>l*fxGw&)a{jLu&$?)LQs3Pa7x z-tE!5U88Jc?f7*>K8;Ui#oL%(>ajj>@_B)Y1}6nLdl(^m{BE#qkijGRc(A92wCJo7 zGlr&B$FQFa@3pWE?6I|9{2j)&;$Zwb2iN@hg&dEnINPJ+H20b?Q;vD{{(8DSn(vED zI(<=KEhj*B_;#F6J^H{+Z!6;&dRB*7=3@bOm_K5$qtU3m;8-Qz5P!w_*6HBB6Q#E& z`Y=F&jzt8*fn}hXUp5}q>PLOmu^(`BpvzUgV4%Q;cN>j7Iz%w5)CMV+_6HVOlElh* zBeP6fGylhVZ;CxP?EyFLSIrvO(;WW-N83efxCqhRG*i>@h9jfvB?VGD`1mhYi){Wd znYxW-UjUu+1~?@P)+0*(9&0khKv1FzgxdW5N*YudXI-*tN<9Z%Q+oYM|D<1XZ_-&wcZn`s5XCg}=_nZ#g z?tbK@EaS+cw8mSnJnOgh%a?RwtaJlt*w(~z%k|7>V$YiSYodxj(lg{aDM1WgfcbdP zUvad8!Ouy=1x_b*D*;lv1&@haY$pe(}+DUHwbi z)&3S@F@#qIP9hhKu9cdpyPLdnG0Q+YU)I%iu@(V*d9AVPnJ^#R(Ki0u3%(S7){8UI ztv#2&EK;uyKk@r%ma#nNr(n|P35+EdXn*grwv91)TM)j7mvyo&^cS%kOjSI&17`~x zQO6RSB|=SEo4KJz^vw0=+YQ76p%;dW{n9x$ZQ|c{f-Om4S5Am?K4B~Z#BC=RKea2b z;HyP<#0KDEhObG4x>!3vl@X=i6BwB$NZY8dD2>8dsI~R^=2*Y5$~g=08%dfJsv?t$ zrBodn2;!j>e4uvQE?&TjcKi~`5Onz~n0tqL^3v04C`aC9QQmhTgVwZX;Xv3#gvik5 z_HeTXg2-^`A(WD?!7RT;ix{(>koSSqoa~b4sU=JX2e#?ISIVkD2joGJp%sU)tJZ1y zws(`J9@hV<{1;o71AN)2WZh+420LpFR{3X58}?5VE)4mHlpYRMPu=)(8$UTdHGKs_ z^>w*K$mc5vUA0=}AGYpQ%8-z*Zw&gZS;CbF{X;V~HCUxfMTF(Ur%ecrR;L^aRZJaK z2U}uRFpbvLF*Cn-R#-#rZRUg7&8Z=# zMLn1Oj#%7lsL~oU`+Vj^s2D2-nQOZ^3RG&vKQ3LZTo*#v=&a(O>z``LcN+Jsfmw3J zs{_Ych%Ps00tr>MTc%Mp*!%Uh49$X|!s)3$0TG7-+2x=KrYZ$8ys8X%NQpa9oQynD z#p76N)O$vTAxPjv%a!Wt?is8yqcnHK4@oAj8a0uFs3?OKz_nu7zenlM-<8xrM4+T- zKD7RjwMy$rmp~4MGwkiw@t}SVjbS&wt>%Swrstye4~Oug=J{j$oZ&tt^~$H zZ?&m0wW5tf(2IDjF)aRQQ$ad-T;BfBD-KgAo?&~=+t%OFbVob#b+ef8mod_yF3EG( zJW=$b?l?wr4#AxjW{a1k5QBi1EP{2K#$KrP7_u4tj(s#cUbsD#wD77~f_;{5l{~qu zRqs?Z14J8cw%62ew_9TZGEwl+AR1h-;5Iy#>;?KW&|Hz8Up$!pd~T<~PbYAarYPhWPwxnDcAfjeCy}jX^ou9+ z!pazXw-cO-<3QXk{?X`-I8UD%c<~3r6UtoW>kfl=Z0NrQ8}iVIEW2S)v|REyV0P8s z?)2@(;%LL|<(A%k1SqbYGgzx#EoXr|mw+4>XSy&qFg@6I5)yKqyCA_1)na$_aGxW9 zVY?qNqvwrCU=C3v*U}&PPz`w{O>@sEK_mGTO16^!?A~M0Z^zzl{%y>i)K70vx0!W*lV%jerjl@c@6%)8nnKYm;%+{(K`tbaF2O2Sp zvN5Zy)C?fYCjEIhr{MEnWdmNi`*$CBn(uQ?=GIi!@1yGaKq^k_GMFm+0lS0$zPh~- zmD{XRDmhH+4!x@leHbBnRJb|^j{r_ILUQdrqP@0FA<7T~lcDWv0!4g_qB;tIK}M+l zp&vS#@eO)}Ov2Frx*eDXm5VpCZ#&aCo9`m{K*+LkBTvn2^m}v^`^xW+)Esii?I)yE z{XLTX{>J^U4=HBXfoMDEb%J4>(D)0Z@5jRH!aR z1I=tK2UhH&8cYwIwoKbUEd3{RUo-JagDL428JG$yE^Ltnb^owxX})5Hd4|nCL?zY$ zmZrKfmzFdBYnaeLb@#9^HL3FD{8v{PXx??zHKG+Vq%h?8aeB}Mg1q2XXvN>-E?*ad`V zHJ<|OqBJB#NleY;En#eMjjY(D3CnT&L0p(y zCsx94>?)Vq?{HBLh)K`jLpUI>w8&{Zyn9c!YI}7wy_Dn--kYVX3PBVsev)m%#;$=% z`%0$?DPbo^NJz+~OqX2B&EIDqJezIOI-H8$F~!rdj*oXjmX}F8hr$eEaG4yud;NfH z9tot5F+ZCMO=lc!%+u};bgkfyTkkceh?HRS!@udP)}@Y!FLfyU0aLeVra!khm};!Q zvkTsneiPRzQ!D&Onz1Kp+)Ha{!imY12=j-cAvF^XhYOIapxJekF|eq&HeJeSL@YI5 zX8~wJPe2ic`)!q`1hpH(jaB7wG8)m|v>>)40007SRI9o-lOI5N?H41tzj*9)a3kwX zFI=d&<2ld_z`1rk*rs9LE4d+=+}v1A32|Ct#<-(SF^6_yC!3xoj1keMi}F?H-0zO5 zH}LhK44BppS#BehloOLnQWeAmig{&o11lNejaJ^`C@|J?O)ob}7dTGC`wM#YN(yF= zRGo7J)wX**_31++uiOOgt$981in_fJy*FTxUHB z!O)BcmEchQ^nHWCs5_Bg$wmV!KUn@DxznBj-ghv>NbW^&4#3YU z@t5CH>(BKz*2}=cIIq&S1|bt6%C4pKX^ay;yf6tJFy{pi(j0797+q$?P9<#nHxYKzSkYb~7E@#;?-07nYi;aoN8JDhC~w8O>@l zM|sPRslXxi)n#FJInC8o&#A`=xrp4RpDk5YA5v1e!4yZGWnA&?>}&KPm?+~sBu z)vSP}%|lmH$NsDZNd4{K^U9oMS2NP2z;Uv*(r=XFvmH7I>rTpkQ=PthHnn5)uVtN8 zPJ4p^P62H4ca(pq^11flCstz7F!FgVij?DDXTB^max81R9*NA^xc&qV#8xn$(!Lq3 zBld0O+TV|Tu2TWLbH^$Z(w1>6P8FP#f9MC;t4(|GYOCt33-k8+mL|#ss*F193ugRI zRum81RO}YM*n8B~-#pv4`?KFvKZt@2J_CCfJ{hngo>1|7@iZbw1jNP+**@Z~9r(`2 zgTOZe-!mth0RUW?{?20tG&z>wFP=M^yr7wB!dby>i4I^-VHp&tnPvhT<%z8-&!noy z+9mp<>mfUw2Yq}s9n-Ji}H#` zRVOuQF>@i-FPEc1-`vKWeM!sO15woDV^{F$gSJ4Qz?Vl$GuM0zOpjV)N6y_f^9l@w zR^s4E2t&S9UG_i_^q4pI#{Nxf8hg~d#YOg6+DL7bhRJozwO5Q21>4aU^Re+T-DG^P z#8(@y(X`@!tW%%H#g~T=-=}D!$W*XNFY?vCB z;EW#;yS@~}bwhp3Ot``PSgcyw;U&cq`ESyfJ4SFTdC55%+P4y`t>kMCnR{X4VV-Jl zONcALKB|iIF^-we`S8~eWTc#O1zWij4|uIhe-gK94=DApR=n;;Ot#xs8p(gxg5r-N zDdpMCU+cFk6>e0vEcAT>J=RwqF{SKu`0L_OicR#3yh0~`oVY3fsjneH1i zp_PcfzG70o}X ze3&*Oa>Ee9<@EJWYwbKd;|c)U($qqM;CNL5#dB2;L6a&fr`7D!~`7;}Wirxl32 zpi31Aef|xv9UUN#ze8j$k&pU&l?Q#-(Q*@RCmx|%nX1irN$yp_tQ4HNxE5{2BDw7- zGXZeKQ|C3yqMJB`_FYpdq9^@ii~a?Z%ul*DnEgP@2xORhjIV(k(FedOL)``3ek>eS z{FyRVKZ5Hdb9rV{+-{0sT z-G?3xECEl8n$LQ?ot-;f;*;ho=}mtw zV={rr9KXHDQoJtdbx2Q9t(TY9U3j1NNa$f{9a{Z+H{nA@zeUsTSxYOo*!VrU+UB}0 z2F?=^SC^-K8b%nt6t9#I5jW4&kvBY1dCGz4(Ifxxr)!;vu9x&l8!;ofk49%=atj~g z;9}yZML>(|3Yz>D_;Q&j$bNv%4|LyGV*>P1!7XY0Nu{pOgolm~`AmuY9`$Y+2#(uo`tpU!c_nnptzseD9nYbH4X!97Ja~?m@T8e?J*4jO)F45A?GGKrG8MlFPzi%w{mg1nh#FVS-=;$?Vq7mS5p>S4K_4@Q*)<(@sP3WJ(^u&+}SbJ#Z8|3zYzRrkd@qs)?RlDNZ_?*E+wp!EQ zNz;q%Oq(kfP@p_4$uJVFNt+(RCc(Fkz6GfM2HX(wO%>3+CIO7URy$zxf-U<}O%`oU zv2+66=Hxj2hPr>FD7q3NOOPZ@`CrH0tBzp*{@k{t(co04Vn{?)MJ#iAt@nIW+=`pT zfj3~PbqU8Wt`7lc!w#DltOh$?!b&9M1XRVr>LCPwX0B0QSwG!y zX!XO8**;h$7V46D4PUa*{`VyX{$ouOZ?R-S8K$lAwv;GaWJsOg%4_>_P8^hFheNvU zwQ^f$A@ynZn-Zy_uP?<}4Xu|DRJP*s(Mqvuxr0;Bj(_zrXu?3;a z^8pBY4t3JI?YRDl;Zc(dv zjL-^z`uod86c6$RqCR2}IAs3s7q4xKi4YVmn1fkTih0ci@rim@9?3m0$rETi|MAd! zYoE9H%boHTnUmFkn}Sn#uVINQnoOPR(bdfn&56dz`;-KCK7L;G=Oo4t z5%-_+LNvy{ps+w2cof^R%*ky7Yzs!!_I6Fgn%_~4UMJABdCC*k(K~E|rg03B5Gk`! z3!D4Niwu|ti5rh2Ee0gZrVvUB(0QIraXZpvKN1{(vlA=_7?yHIlNFYB=1lF$j$&Y* zRnkb7x*k~33P|Z5qOKmE(7(nGW)TQ-aa+B}b|8)WJIkFeE)j2P)LdROsNAR*h7WAp zmoO@$hc)gY8bzm%*S+Y(1QAnz@mP#hx!0v~HoXf@HSKn9URo6oK_QKCUkQ~cWy04v z{86p7TME50b9(E|X`!QBwExQ6$M1iD(>WP5lu;|jasK^2;x-;}GEjNlyte5(_xJ=1 z2#nm25|$vi7=k7RYBWt^X*K~Hy)ams46PpW+b2leiB}2r_twNBY2?dI5RoDTa`9kR z4Wde3zj|=2DkHNl2V&R{vk5478X!ZM6{NkIBG%(wmn~&1W1%ZpG@i=QK~MBEV;Hze zYo_=WQdM)}48MW(u(JC86!hjWpbIU9El=yvnaQ|`B@_V#9eps)qc8!G!GQ^x1 z;UCfH{7@OzOFH-K(op*j9L*>Xgo!4>1()hYQ`}Ei(}^*<=BR`QKmZpImRC}vpBSc* zmFTUJ8NUjSbyQGAv4vtQMy-DiRwvj~Oq)<);ooX=n=kjEN=G?^B+^6^JJGsG=JJ6g z78sXuY7A5E7n>G<1Lf+BYOk}S%Zd9EZj`;K_0 z6k>p7-RAHXSPY(9nU-XntBK2@Z&g<;>1cc(RZ`AoxfZ{n>D^L{02}xnkIhe?1C_j1 z5lHdpGQn9N6I3;bRQnw}I6J-o9sL;u4AOe8q2@bHmQH&f{nil$zj(?*Sp7TltG9(g z0dRWwy48GU_4LoG+(jp)L1hc8!SGWoL8B4zZ35usRf!d1->t4oHcS|AIT%ww4)4>f z`JH*Vw+K=G2QzO%Fbz1G{3Wh?j_`{|-v`T<{|s!}k$a)L3?d5}1NK3<*LQe7dKldw zH2ZGh+DFXw%2LtZQ$8t&@5H*(R~vf7o2_I$&Ld78_Q;WYm{A2XX5bfld0mi;6z~M+ zf5(HYZzvRW@G~#~Rct|x?(VY`64nAdg`@oVO(9T{6iox1>$9vgmRKou+7T({@|pMk zKxw)$%aoR{P7yT14G@<*@rUf??%@w8rySAy#z$Mn|lOI6q1uAch zzeJ!LhC;LHSfoXnwKe^C^Jf1>jF(2>IN`&PTV!kB_0eKs zuxGPoQ&>>VgC5`Iz_DY9CZv<2172_S?j(9e#F-BK?@1D|f@QOKQc zXbPwafs9PjIy?Q(nw7E>}@%xs-{N%E&j+mFu}_`U@p43zuu ziNV&nS7c7<##Nj`I4~r23KZ_s=XaEyk_sUqMvw%H?C)6OVO-f*HEYwCUemz`b%1*XwA#%-c z**;O!9*)q$Jcf-H@F&EDp_oNM8h>h>sf?6Ze>$e=3WojJHY6uNgg(*5+Sc@3EE#=T zJ@$RZz~9LWTe|LH_0Vk4utjtQVs13qf5A9f*`0WXs7tXWtA0^7;u~_?x-C)K&MkV8`{i zkMyya`H@P9l1x4DmU+;`K1;yuiK%W300$1Q|8RJ=j&?7__%RYEx;a@QvS6TDij3;zV89)M#o` z$8d24tY^CUvhuz?)IiG z^Gg}k4K&^w1T@l(Q2KQQN2HFq$EymcD>~19$tJ*CmQho~LqN<@;s^V4I)dn@U^o5! zaW{*nO56L7O~xx6^_Ng|bYt>)WD6}_Bilj3@)6$^NxcE*avw}dD8}U+s^x9?{*U?= z^b#B8_Jv;px_eo2+%HCn!1iCEx}_D}20v~mfc#fkGZ^ak8C_lOQXBVYh$|5ABpF^g zIyMTncbyJ3WQEb0H;#q|fr40;vXVJsb!$S`hNdUo#-Q{g-Tm_?JKRWwslrLUrk5#G zKWG=Ht*RjWHT zr~%HUvEVfs0dEgR$K1uggsm1EP!)#-UV=tE>C`(-#soKp#%|U34_SQg7Kkyc_@fHB zl0$KWd+J+jD^qFd)|)Oru(sVulm+Y;j{*@cvt9}j>?7|3M?5L?jKCN7SV8o(4#{KX zHN(mG8L8NP56~J6{5{CjSUW&DiH|Jx%XD5RQl(fndA)={v{gk~+q4ME9LoR2g@te} zxSJDrsS7ej4=sJa(Sid)$R=a+Pb&s)Ta`C8pUXL&uK2T5W~#X9l|9R9EJ!1aWW$=u zNPJBHi<6@NTj1DDdSptmhm!~^od!{BCuOB%d%{mN%ZL8)49tQg0Ef^UjYE;~)P5mW zGAyc7C$a#IrrIk)g*_;bhVc;b1yfKfbw_+2k0Vcpy_HEBOm3)ni*r|YOb#Z#yo4cr z9eZSeP$xq=H5(qhdYrPC-{3Vxz*}>sy$W~QoHSV-)K#yccI;8Mmh*=|FS2FwX95-a zDqIVsb)BjZAAx0Y+5V{Qe&N9IeQH2V;|&pP^!60mlsHhetoLx})-W5qmXH2U){6P? zz3=apADJ^raDKdEO$7vhxC(8!<+*pmCwEvfeu}gc;Rl&s4C#54IW%$uHa5R!L&d*z zp?AI1)jm2X)@IYy!aXrmg+5O56!O$@dQT&tu(lB{ca`2;$gyk$TB{zeU9J9wvIb*QOvqKJ;?nz_@`IIl~e zgZfr1+rN18|HI`*0IgcCAvr%qUr>DYsM_aw5Fv$A-$@Zy>!fr@{?6*Z7NP z`y?2-txzsQ2lR1i>#E1GMW-*&``Zop?9^4OI_{7vfLe%V7TB_LjK2a`xXIs?WpHpjr)c<+-Dh=M!g`#93q> zYltCi64ymrHXdBxzEnLNehzt+_51R@TDsNLYJ|K}4O`a7Q$XnoP_u1VJ&?Dk34@knsBzo8EJ0WofEQ=@&jY{|Rsm+iLpekomSIYp129Kgwa7MD5 zlz`|Cc>A^>RhY&kXyMKk!kXbuTG>td)Zvf*?kX#F06KTlzL$TbH?QtPQ~VDlXQrU^ zzA#rLEZ4lp_uxsgqr^LB!|KaAs4|`-_n(^FNn+Ghk z&qL9kCp*7ej+c`h;r6?aT|LlyWhb1v4sbjEK~(*rUtvIvizc`$9qYG8xhB`mvFq0i zh+TlB!6vvN?~jLpovT0(!^77wnYe{k|o^FAj9Iq3{#7$ke?EZ_v+< zG&w=GonR}KqZFZoSBkfQGGm`sd&=t35BvN{i_cA`o$i8kgtig)EDoeNT*869Xprau z@nIl{+sJ~P<5}Qw(RvNorTlAu{m<^&nzr`OpTTbeuh2=WDQg#!Xz7OO6{_`GM(ArZZeJqd*O8wktM=cDkf+n}-Cdd#zxQeCJ^0V@u4 zIfI}2S%2t9uxvFh+tJ?MWA88)S?o|`JHZnSZmx@%j;;FY6z30MzBVmA+-RrrZ2PxD z_GyOI>n4U2TOn@!gl%Q=Vz@%xu+%peDlG3OsAeIAJfchhWXzr5N@jnq#~2TA279Dg zgO$@UEmAWQE*W}-Q zG^|`DLuZpvxJltZ==VNL;)3wpfz>Ng6VnPTjf2+^Fi0b)eF0j+RU5T*8tfLOorZH` z$({Ayrgw)gjN0EB`l~+XW#(Lxr`TY>$NJ@iVN34y!$T{ae@hvww&D z{X|ubazOtDjz@_H_%E;55+aI7x&caoL!07Q42vc+;I@C zLTIk$C!VBAoP%7hw=C-Jc|V0e0t!>7qvjhCSzjjT1{biU6|Oh)ETLZ5*wDC9ggK)`c1yhz5;IwCj+5W&Qx=H zzVR7*`^A-cR-$^qC(>7I#Fb2o)#&)g%1Xzx=T!>ACEcGs?QlhGg;T`#W~AnJq?8*? zz7T<*!kWmMvR33Tp0suyB3nd)X~a0l)Y92$g~#ko^}YA!=caoXu>`4EIDKpw$m&dx zr)_&e7(zix${#96w(V`blqN34QY?p^4Np2v<|MW?&aKCM#S$LdgVyS@Q%p)pKEMzv z*QgfLvZ3r;hgf3(;;!<(@>tws;!HjOFTa!-z|Y0fqiCp?+f6U4Q_vxBw*ZjQ{{4DkwlN`UmzTG&7ivC(s$yHWDkU8BuKyPzu zEzoWP-gZC5z=W^mI#gvZVFT`SX$Y7p0LtB74OYLQ=g_j=4t=W;u1=pp1CiBA;4qNM zW!=0nnv9`wDsq1})u<;1?e*ih6cx|d+nrHa(BHWmT#6Hxo#&L_e znE<>>xKPRY_+E139PFr<#;SnvYN^I$rP;3#xoE&0yRw1n zwr`Mq|2Q)%=(w=4?yJcSda2GUPt987j#qYh`$GM`0iw6wns^D*ure7gZObZpW8ybc z;LW=(ebkX!9{vH=I;G;5P!B)BJ~>ipuX0Y9L4&9~NOl@p9bo(N6IMIc zl1n{=@JAIyEi6Myu-y(~CjQOICcf3xzwBfi zm7$s?T8+@rt;Ki}t`}IB8hV)sIqW<)u`zuwD<4J_TsU<-QQ0o*Sc!i8QJd0RS34{| zch+c1hWGh+T+s59NwUf)!i)(lyz(Sc8QTcdZUcl$f^v`5uu=lv9i2y7e5w3FLr-s?+u1V@ z%i^l6YD0?$e(`(^7yQL@*aeJ!0^rCL{P!R5my~8z(2d#yo`%beiOserO)puH&ku#( zrZ$Od4XGv!(`4g*-FuYp8V7CZ%v)^y063qAT{xNIp++I@{fEQ z;t2Bn(5NkyfFF3)=BlpvpbdIzF0KgEnE~V+&iRTnY_gNrHbx&#Y5+IX?wTy^xB=fx z{%l#rH^o6hOyn6OKIrdD@@A@OYE~ajlohT9`%X0|sr2&oGZLA>aZZ{bY-qckhtqw- z%5;r_(HWH~Te+z>p4Prv_LjS7cd#B&l@6)BAya3UDeOL0%I8~fpUFt%_faUz0c5Y3 z1?S>b=1dk_;546C#5%nE2DC#rL4y-fz~H@s(LDIDv#_4_J%s@_t` z4U#AFsTB>CQ;RHL<(9_wTP39csi^@d&yg*V8ocbJ@WVRYulAXb$+aQ=w>N5dm8)_d zVUQtBQzap8ar@q(7X~U79hE6QDjW|tpp{#1;lxV%+;@tPN+Y> zTUBmne-K70dPi#|MzD>d+rJi>?<;E&)BX8e%294M~7HrY_I~R~=;| z>qIN^iBZJLX7HMO2(t@Jv}1@(RQ4QS1V^KeT4VHv)9xe6!1g1DZqQgYXK`$EiFvgt zsg)iTEpE>J-o3J<1E6gJBzaFI8R^;4FA|$b{(lD zvSKGRhJlI-J=d?9pPF3Gz&W$f7{!pOp#gbcHIUss`!sD2m)6lwn(nt~j?~^y4 z)>l0-O&v{}OGVk9m}?-ycoD5vh~s=rc5;XZ43G&mjU*6UIfwW(B{yUF$HA^QaTAm(ApGD8}_cp_m} zOoKx3-AZNlMM{#UaY)4wq{RC>PdW$E_dm*x;P&kn)5HNi^fBK_y<;p)EgL@pIDhhI z3^Dz$CFL*PFVAp3^H+}8gRjOnO>!^J79}BW-T2P1xY%x!6N!J;n*w8zuAkN(Y$^04 zgu5R#d(dfOf_a6O6Zc6SPauOZz=LgzsvaWMJ&_=cdPlf&wIol!bNNJrd zr8uxIIeLcK7>1cGv_9wr4X$|q288rfNA;BXIw@y^bNZBBQ+qZxw+8<)K#b6pbw<$k z6Qe1WbKKNDu^9Dkbvu6`s=`PaiwiugWP+S?&VxC$(5@>rTI3>O;bokUdf&37>1@rK zq)g?7`^n}!xYAHJdg)7Lu!wZLfkoW64dU{Hye>Fq&WSApP9IIq0gxZh*EWxA8wkjZ zVLdCVQ`^iDTXW6LRhcr$_Z7#xUB88E*l&;kXe${l-PHTEBEOAQ?)bC+W)Gsvu=5~OjR(6zaq0}Vu>`1%aGM| zmDiAye1toMsv0g;=>qzro{EyPo@0d!km?8x`|gEiot?I2GD~{QIacI(+k;`0mltaF z@my89MOj7sLvhFXqA+)NAHO&~$YpT_>5b@VLBAuJIuAWD% zKhM1`{@D0L%5e8sT2j)b)!?6yi&9b)Y1}U7`qpE-t|t#*CUEmi?Bc#r>_1` z#LXg66ImFHR zdvB)<+~Xvc1&OtAp;W`G^ap}_A7t*Fe{*m-CDd~HGyQw(ikta?S@K`NSHS+b(%btE z7i~smXB_o1^DG(7j_nUiabKrqt$-^bLBr1@8&bmPu>aH8nTJF5{SW-!vF}S`-;#+W znUEq$l8h~seMy8UMwXE*V{4L~p=+(|qO2iHGV7nboNL~`8ZV52=Mp5#=2w>o}FUZK)` z1L?A=N{qpbUZ2hMV`1~7kB{@*9Da9dh%+NNDAGwZ`@s{t;Dzmlku_C=_DzGewwbJ} zf-f$nn~ja$*L|C3mr3%+H=85uG&hTstbJnay>2)m)0$5RZFfS7$(0{ zEL${}Im;M2+>&nnD*P5t9bs5zsD1SAyQQZ7X0EEwJ`5(qnc|zAyX%u{1PN4swO6L# zcNQTh#9*^xz`>_Vf^-M<=~`x78^Pmo3Vw#`ESQveyFkOuiP1P3e7x_DB&IJ&WID=; zsq-`qf5G5Fe1=ykTdk7%vqe&?^;JGaio=g+3D=|NCZu&2#k$m<5WR=HMA+)p=RA&1 zS|}csrSonijftc3Xwws7O2g>TgcRkFYL#4^vh^ z4)`1zxg3@Oy5eH+gB)x|B|;6<$j%BJve%mY{?l8gwqGllTa(DYBtCql0<_6*tyP%& zOMMJD7PN%(xcQ}`nX$C%#{oLA+@jbYkGxHM^oj}uv{t@VA#&gJlzsDT7mNI&T&7e$ zDBImur>mGIXHNEFXsCm-!ksX&Wo%uETkE5L;lW*a8!!zVz`Gd~Q-64?ryuZzYp1=K z^1|2g20N2W$+2kjgKI0@RhMT6#R``TlwNj<*Y~ZCzfle6`=YehLS}aHR6wh!I7+8F zPe-FhmeV**zv-rCER=sEjgH0S+2CLEK>9-Dg6?IvN|l7U6RdC@tF?4qQg!AFrXK4{!RZ&JFZ zXn^FlI%|GM?$lYi#M~@9dj7yd-FAvMV&sSuE`4u3416nx#ZBSuAY?XfUoAfJrnum9 z&d#=Vnu|jqTd`G#dcG$7v$c#Z#rrc^E9jSA62dkHc(sth0ZMD($(`IwD+pz)ZsIE$^yfAy}sLUZAD)HooHsH)oSe#N*w3M+V}Qb5AO!< z9B>+L{8*Y}98{4RARR6j_d`IJ3ub#+$LDW}ys<^d!oMjkIFSM4tSJhG!)L^BNN4ASsq3xG!){LwUvy|IX zj!R>5R}n**zj0^xa(jqS_CZ@bi&d(8tGn6upwIsL%g)W$H0~wRuMQ3>3u&U8- zZf#8eA)A!7K^sUnlt`aQH=TF&GpWSuIuGM9ZMO58Rp)F2&SxiHy8uB z$&y!CDbE%|S8IN<9&ua(dG$l^A9_ya5TDX+dP`Mz@kP_J0F z*gh1ZKg5u8cG4|%B=Ny$3d{IRcDlqulS01)X&1v%1NY-SB8rNILiA)wjD8t;D2M{_ zUeP+YYSGC1BUIm+4lg()$hpxu+;`ZNzK*uG!U~q#8xjeq;_U^f)R^rCwtVBUm+VyO z*6sY9n^gTR#vDN?(O|_DW7XH9UG}mO0y-1&z5aS71O(rk6eABj3}}|gn(KliLfM43MOUDkc%y9pgW!DLfaLUFQtDb z>)9E~A)^jYK~M~x(O~uOG-lE-Pvod*xvz`CqXm1r=MS4K`kc<*&|<93w2QWRH%Prp zvuj-C?W$w-Y}o9MqV@69Ydr?qlbc@jB94ArL+f)X)Wgr?RzeCkOUReJs8@J&UUwB0 z5a*hXVzM|}7cHQCJfMu^?Nu1DTb8}&Vb-A&pK*hgcuaWQTIJ@pa_@}$?DAKi#HZNo zv}bz{drx}@6)W=UNHTWOjnQ=3R_xwtJ@>QQ;h~e0)`H{!P$>)mN8mZ1las52le3+p ztF`kLYgbo0M_ZS_k53%8v~!#=R>ux0GG~^Pu1zu^{9J8SUW7iAYf4Bhv}VTy;i|KD?XC^N9=i0q?!t; zmFfnyyG6!lG(c|Z+wmu+_>?YRzRcI=?l$S#a_XL{`?CwCrH_F^8Qy8mHto>_ z-+Q5ah9}!(XUh_X@Ipo#e#)wCcQg~-T&oTWeLa3c-rVbH-d5NUy~#j*FY^dfQ|~z zLmD0ez4|p}S%Q0hz2XziR&h~L_c>TD6$r$rve(Zo*sIEK)8h}3QV8V335K4ZSPk+p zZy4#T)J)kQCV5oQ;+mO*Z<%Jl;2E$m(m0W9MQ2*BV53Tnq(Ds9xZYW0DN3TD%+^zA zWj-pYBw8G0ae7p?uTxdl|6>0UjOyLoN+aK5M#>L0C+9rx@!Z(?_A>q2^tSetC)K-y z&|OTu0*x%5;!-J|Vzo9sS!Fd-xF9O`1NB$rUHWFEDyb~cx;_xC@^tzARkks&qJA3b z{uciw2HL|;Ra|~DA2@!SIb7`3_ISH50_&?LhNp`{sejOjb>_6P>7mGb zweE5w@}0wYsUBkFvWn63RRt-2eSRUzYgNK7`VG>no(Sg)cuWgJ^+UN+@z;a#w4Zd= znI^{QwKm^kJs%WWi=Kb&#rQ#2(-(8nJup4c?($;Tz&w9LD94brZTZ04b>Hz#=N5V3 z>Is4|g)vX3MWB1G}9B;q{+|e|LPhILG zXPPhfdDLQND(JC=7_fG-Azp!ysaNGod)~&qte9$nJyAh5D(d}E)IFuq_=)6dszrq3 zr27-pkd8t6bhvarf{FN?T_;MGZK^jkSX4jy3MreggC8`kDIp_P3cTRfm?yYa`x_@v zuwJOr;!}HJ@kk_i_J~CY(zJB&ql`a>)Oslmedu$VJE8|@FSI6e=HWYDcGhe?m_;@A+ogv?>?FBu-@OxY9;Lq~q-NvcYI z5EJD%MQ!3@K)!Wgh4p@Iv+LQ$Gybt!S1@$x={YKj8cqxjQAjnJ2J?-l?oKc9ce}(o z_ZV0)tqYH=Ds8~%=wOqJj!{#!O_YYqVH_mMtnE(ibH~KZ3zlQerUu70L;%P!9Au}} z!wudVhX-^l0~8((p5jgW<}b_uACJNJdOCQ(`Biwyq0w-Vk4M*yj>-}yyy0Q#;IGs0 zH)Dt^0+gaSR_5HJ0_jA6Y~VIz3rqOz{dfqnaFq?XiohRLd}slHs6{IP1xJ9)JhyYW ztNUTYfeZk=XT|Sj2SP-GBD~yWEOoJ30C4CuIl%V&d4QamsB@A7dJ+jT^IDfaER;SA z0L{yYzo<5IVi*$#WC~LhecjPZ@TQPNkPRV~1nI5OB zL{9{HekV1a?28v7L4NSR`mR3R7o|o05}Bs_6cHsHX!k?q?KaQ%`8lwY=;3Joh~!2p z@q7wD)Di=-|7HekfLP){Br)<13z66Uze=Rr%R2<%c5R{pfMb6sY0o0^I~|AK5<%oY zPRTsmcZx*ZcWOT(g`9oSk0_9b*TDbd`2jLG@o?7vbm&sp-8u>0wV zVeRl z0HLZl@Gvj!?51L#JP7=JbNr)f|8vd^@{0#Wh5ucffxf1xv+iU{{Zk_EARjS delta 70642 zcmXuJ18^qK_dOijwry-|+qRQUHnyMGwr$(ClZ}%n#>U=Q?|wh;?_a0t%$=&4?yjzT zZuOkG>349Ktx!11a^MgcAW$GMARr*5AWYA9^X1s*@CO2lv!_6vGM zG_AL(k(vDkgbGW*J{LYntyVQ69^IL6SCkyTboV2OA1B(JPtKmJWYKYe=d+G}5*-P@ z;@?UBSq)$7iS!cqS3K~cx}O=VLE$s|yoA#~OVg%mkcd;QV+ft?m|2V!!ptrFf-*jJhe5|uSqxRRs3;fZ7#Fsc^fn>ZC96ORMe{R)a28TTH1BYFpn@;xa z5;kpTCj=pTx+?>UwR+8a)t&{KEAcvYkS&8vpG5zB8kL#4kDJJ+Q<`_tkuJHxX5#&e zXi3$YL*#$7Q<_(4OvAWUB@-Nj)DRTub~ z{%i%AETgM5(t0kCMFQe)#Cp7@KPG70q+22?<*C~&uz#DLM`C6a5XCzNGU#dp7dFl>SZ@uSSJLua^>0O}2w(04#6I4zpkk-1!W8&~!R1YH~@e zUP*=q?o9XwB;>T2%tB9A8u)oP^ZXv7f-_53NdA6J6}60xwNK`{712>k%CV#XC9hF0 zW174;XYHkw5w@prs0~a)K$$T0_hR)4Of%!hz&jR2qo+zK5png1f&3ltCr}rfs;{zU zjo`4j9*}uxI{V95B26LldxPXbg7}%XY|_T@HodKuj&g4&N-=m4oIGRE$sNJj z2s|xBwM>NWtJ6@~%4P_OJ*t`Yei!GJ7Pn9XE$_vckHEygCZ2P9f%_oC)>~EwU9L~V z^7Crb>bUDIl{F%xlQh9&A$pvlgX5qNXVPMOxqP#+r7TFY}CKf}dE7BpEf7t0G1|#4+17pI zGa20`hg6LkZ8fgBiW2_0!I%e+~Oq2#p46V%yzhn=IX z#v|i&5=%ZZUO8}NC4Vo+tp4;&1b6>SKlOV3wNuKMr%J%yEvN)Oes(*^4H<$Ykh$t> z58mGF+>>c~lReY_-yWjSu?w!F|0}y15FjALATXfmh_ztE&E0S?pn#qf6}f*I8@dbm zE!J_vX}3b#6T)MhCza7X z3GHHGBz5ymF^tA<7?8`bYZ1(^v;_Hlh(LqHw$X*Qx3)srpm1}`ywO&t5&7Kqak>zK zF6FpbHH1Us7c4TM_|@TswiJ*b){%)$Ho+CkT)5Ua1!`fg7gLt7vu zx{(ddN7abcY^a(;=+>O<bf@wkJQI1^~~#vN8W(xj^k9vNCC(VmR$#0KYKqtPL~j^MOjlJa2THl;1`a& zp4-P%j^~v>AXZoOC2^pW=*elQfa^V8Vn+1^y+`>3-cTD#$nER;JmUIv8m9Afr@poY z-R7c?dS-|3q_hfpR|_510-_&MFshH@7z1N5Qg$O-KNvM#p;1s+0!MA$OEM=*SO8r- z*`VmN(MIa{W!F)moY>TgI`Z2f64oKZ6rhVXTR8y`ojbM%pW`3C+dPp=d2S2Z`O!{F z1bI2n2DLjmzQ|tT6cVgZ!5<)eLX;E=y2;)QB_^Bal-y%|Hy$<+5-QEst#EB+diF%? zJ2f+b%^M+{R@0?T)$gUw+;(tLk$~TaRSMMtS_-@GGP^Kb*jO}Dx+rvHOg@YO>lEx21>T0*QJGJ_9 zhFCh2UftGHp?)V*8!8c&PuuVMKU-A@T`Km!c+^MRWn((5f1G9fh6gRqB>}8q2kGZ~ zZgKJVc#xziIGUm-(~zBkOsF;n451fzz=0QHMN?ut!jy8yz~I5?Fr+E_l3|2c%d+79 zce6+?zDP1ODLi4?7_x|7$fAfAG#r>PY6`oLUI;@;Gc-K^6)}oNCLn4}knH*bByMa` zxE!}^CK}#sYi)3DtIE;s0{EMAZScbkC+3H2(7^q=bARVbbrdMla0P;XUZXYeQ0u!e1fR-n@O7hKs z|I!;t4J4*FkrD(%^Z^V66C{1R77QD3>8Iy%$enck9?AU;y6@NUYZx1hM5MuV8^5i( zs+_CK$9tNC9hs02Jq1SQvP_yo=9LZK$NwEvSlMGTNm`~Xr7j{#KAD6mGjc}!Px|*= zKJzDxK;#ejXWk6c(Q!&1&&Slw6 z_+6%#pux#&$r3E^Ky8ueNg@Y3wuJ27W#gd+8>y+Ob&$6fQ8jVW^6!$G!JA@S%_k1z zIXk|uT&VKyLHW(n;Nf1n`u8i~q})gq1IoE?PWkfrxFu&hLns6IxBf2q%bESbv;E^7 z;Coi$_eeV)W@tZubktwTm5E@l<1tihIAwEr$0Lt6gR36 z{AVZIDfxVmA#!L>4oBh*TY~3+R5sC9vu(v*&UEli;djHg8XCqL<%YH;?2$z)+Z!7* z^R9f0`UmH063xz0ug*LGS=?zur~EVf0FvuCX=46^18p?AKPpb&iRY%|XZOJ`x8U>Z zIN!$#yQP7HwrIyZC z&!)-?&loKEX_g0lx&B+vOstNxiB9WLByBhF7Q*Uv5ZJ~>f#&ax6rYTYZ6H;Or>mJF zn;20eG|GiYq=p(mq$@$aw1!8Fu~sM!2Fv2}gA~o5fAReOhA8~AFF))X?KC<6z3L_u zW?A9Jxf2=H*=mDWI3Hu}7smS%C$b(XEhVjki9NB)p zNhsW(ZjMS?cxh?RK4V%^9yp2~U1{>VC`V+}@Rv;CF;xa_f$`l7r})E-gx6n>X-EuB z?$_x2+`<4LK3U$dF~ZG(bvikJq=~WWD;Umi%d?bP@=ne{c25h&q`V%vG#Bi zG>4#IxA_eI_jT=oSYm6|=cM02>+%DX+7Pa^@Wo0m>W-()egIA1H{;xeRCl%HE0qkn zG9Q|Qo`&d2k4!=ax^&VdEN{3cJY@H}(1hTNXPFO>{2`+E_jrBd#9{ z_SAa%=%^<~0nF>_3`$&vqDLGdw&-V=r){5T-I#VD&D7&tDpQs2D^X=# zZxdLWYx#gmgG{pv-#qQ4gV?o@S{BwOb1x3rg|B?O3 zBxMtDA#8s2!`6_Yv0Ms9jA~xm z$^r#fQea)z8H3H&`wv{CYom+HyT4>LZFMJBUY%~+sgf4VwilmGF`PI?nXMfg69<}f zjEfg`rpRT)_vLtBtk&O&h1{=#gc*Ksmiuae(8M~y(#qQTt-oA9>G$dLe+;zu_wCb*vsxl}rfll4HE z-4Ty1ge0p(q$K5VPvhy4$e+-NtvGFwax(j`i`#TU(ZhpHH&jQa?#ApyP>P-9qDU=( z@!29Gr*Z=XRf_Hq{Pb)y7fb95Zu^Oun-Q(6h~H|hPU7-)X=?B(J*D&P$Km?-X~fV* z$tTXqD<0MkL&$?3--rAt2kTGUsqe4s-A$DWb=Qp9MouSROTo{_=>7#B-_@1w_pE** z9QHf+et1x$I754+kqN%IPRFu%L-qRr=ewbM0k0_UbMcwcnuLPRydfQ{X*8uxy1HVy z4Ev_L9s#tBd&}43U$4h{ugCsJi#kqyTjy9vi<8Y3TCXB&5i`nuEJk>IUqky-Q@@GM zUZw)V%n0*$1U2K8{4k0Re!e_)tfkCOQu;W}!0WLmTxP7}mRVj&SMruag}}f7P*KQ8 z#DyaQP(T$yNu3=@RXZsDo`l?3-g_i-vtMNcXS7Zf-_KhbebM@Ofn*qJwn?6J-RCgF zGlRaJ^>ZC30~*kC?dWJJysOtG>Sargyqg6Q!TDIyrq6YDB$0%blUM04zei$(D(ah~ zSeY>o$N3+J?U|)Kth4z4)x|h~1fP;>2`2w%gtakKfN#-0CF{}1y=~l;s9lS}A2tVF zyOJ2war4}MRKdK=xHxZZ8ZWt^$zts@;~3f0quG@iqp6YL%5G<&7{T#CBnFjwZ2y$! znb=?E?)8Z#joch&v6oSP<(4s4KSgZ9*kWaE^zzF0X8+KhYFG2#W6-n#8Yh8H2H}mJ zBe|2}knrgq1X4vqwm82F<~;Es`I07OWb%fD%_q`hOAG?p5~~B+@yrfp-`R5QU!!=3 zJ8DfN&Iu5veEKe*g-iUIl+mWUDalKcSQm&*2+VDygH10 zLu)~}Sfrn-loTf9Bf_2my2SI1mTTk=i6)W%azv6nev+YswTh?k16~TgNh(=z+9>Zr zc{k=EbmG7cj}DFn0=ufhH#pv;ltw0q5sH+hLGz>+?PgRB&jU-y@FjWR*k2im%Nhy7 z>bVz`e>P4X?qg2_7^#CoL}Q=bwKOe-@GCDQ|Uqgch<3x{DmPrNSq5C z8OncB3NWQIBHaUw4QSJ>4vf}FD|(^MvJ^IQE zR^#tnvcW{HB2!7bS_CEqAncvNcv)ZJ3YVxcW36BfuxjvL9FG9QFE4 zF#mXtc)kqaZlxx;@1MUTCachhz2TPP=&iKRc4q0f=%cTQBj0-&{ypk{>aST8YuZ{a z)*+R+4UOT-LMkOsPr?%*U@U4Zx)YoNvH?;9f)3UM`U6I}VGxH87ih*D!SAWQywGtw zo{A8^9jc^nYo;V(xOOn;0qe$t>vhZeWH|+Hc8LMVRNEVZp7(DScppC^4?6_q{`(V2 zq#pDUA`F@)nT2DoiJMh-gRzYXM#C6610^G&I6J`%C7XmH5c7YN3u_1MBdyX-z!zY$ zY&{19YX(wwLF}2>Lg>i2*dc}IX7|b5^sm93A0o-Ps?m>dW3UaLi=bk#fvwzT_97ND ze*jo(Fig_}*^w41MQ69%tcFM#3;3>C>GM8%jnmM%*T00OhwQvcppi04i=5jXsUJk=(>iVB< zxBc&maQoZ@&fIdvi zWy>@(9u7+Vjjf_}43YA@Sc7qryVM<bi&GHV*k}3~O2Zx>5;2;&|hUs-c>Cpq%Y=8>_aNajXat!eta7ffa1Os=z@E z3*`S^;u!PcsbLr_J$`fp8_PKTiY>6VTF0XJDq)pGKm{=$b&X91QFtgB1n7Kxu!h3()oA9N1KBshjcoS z+V(ssJLaa)<*fzFU^S)yFgJ|w4R_>*Pcq0~W#2peK{Nx1$ z$0;zZvD(8a3REG_3yTLtfS~^7M`dNAb{IID>?rWHaP%guDsqFhWVe|;MN`}Ro1>ej zHPT7|$@$3uv09*5K`(^lOIx#eM2jFE=DQ)iiAjUbBg?CCr;OcmW1XemtQ&~oQ1%)_g~cx#A7%Q^XVan=jGBKEiz%tbTtQt&6VdW zpCN7-N4yFV-DPdXlB? z9LiW^Z4s0kDnh*TJUzVup$TdXcmbhhZ$q-t9{B)wiqJ?-mqro<^5h<-fZR%pxv_wR zDo%?b>JCANTi$z?7(pfRx6^NIFJ@T#d2m##gAZ}o(S&7?(J=o1u^2O5)R1k8x6Yy zbZ8eU)&@m?P1cF4R0Lot$}hO->?TWZ-kD>K-qflMSw*7UyrU^;2^P>@jX7bmwL3A_ zjj?S}>{TYurPqZCU;g=>#6NH11wW=Q_Cn7!;wR)+iid8wW2>ZcbmPprA#0=`TbOJC zsNp6y)=OFJif|a4qt*sYql&0Qu+|0~C2n>Y?ag_<284Y<(+ds2D_n(?Lt;{+l#j?V3xVJ>ZGmheD@9o=x|2Be!z1;N4l0ej~o5}smR)PC_{ zghZ#x2)hF2hEah79Z3GTan_S&+^ae(+F@;-KF<9uZ}2zK%@%=`Mh%n2>NF|B zemSg`XxS4Pgu%(!IFfR!-(sgVijM|o?;)QKz2|ntb{;guGe(wEl^~#Tvs1uft&Z(jD*0s_TVgms@vzMPcB%z_fGZ zXD8D`Pdf`zc7(nra=d)fy;&Vf!y?G17TVaxQ{uG-Z!^($tI>g!m^bqI1;Jx1*b(u= z^4e)u62}1;b!=Sm@$9e*2KVn%3RD}{>HhS8pAk|~v|4nE&ptanM)OC zF?Q-txk^^J!-ElDQjR`=NmY%BB14ih>bjm`mQ>Ryo*?7ViEJU`lyu9U+&iPs1~{%3wR2``+{Rmmwy zv;;KZ#%+u?->LFkZGEYN-Cie!cji%c-ZGX+W2o^WP!3bSDq$6xNzE}cXc|MmETP}) z1DES%6D%6Ww{Fh`GP0RDol%;=aZ^;2_1{HW_*H zgv$Z{Alt(Rb;120YgqUf^uxSJ3^0TFquxdb7(slH0az0Eumdm6CltdXLlXKoo03T- zv1o{b^arPSf=nf!^j9LtW}4EfP}}t3*I03eiP{a+ez);Y7KC^)X$tSqL8U0q5FkeR z5oD}g6ph?~Ui+cwKTB+4UL0=KAK!0@%`?-vzXl`=8hI9jKKMmu@$J`2D!!ujbvMfuks52L7YpraOowe(GlFosusX=;z!0u}RF z;w0d7C*xX^2IfD1{na??v!#zLUK15a>~Bfx2|xPTEO0sAP&rfvmrZ9|)lhlusiqLt z36RDY!FyXc!Qs>pic$}lQ#Mb3>)tfNwyVreO=+Lh4~T^=tt>3ze_+5AECyMu7?;jO zhNnEPPEASIqIjmoRzCbS1$32A@X`x6w^vj1?@H<^Puk_cWSe%525G94y-%}HQF|ZV zw;-UwND`7J^@3z%fn72Fb#+0Al5%4sbRTj4475B}rce zX5-91y@2g|sslJk?_M@W1(jFyKYoT|0% zV8wA)+MCGD>xu<4PPpr^#?%41Ansu6upkql0R-cP{9ftAtgnp}@ z{Bp@ZZ8mqgA?QXJpV@$Q&X&Eu=+P?gpy{VhNI*Zo367P)`hULk2aw%5ndDO2f)w24 zCMK`f!?t0ikt>wTSVE{Nk2vC>$r+u&fA`=vhvUHei7*d6c9)+Rkk>r0hnvkk5fAL8 z$Ze=!V;jah>sT+GS+V>& zZ~;j-4IkhE$pQa=Gy*TugYthw2wONbNtaVYMA0ydD`xmBdKDMkkQiW`EtlaP7&nPy9)8E0nfl4ph9d{Rm@e` z@p^1XHbKW z%gZz3hmB8_RkdifR_!bfv-x=)&Ca<9As`BrmH3AJkJ+>%trY~+-`Dv9W;m<@qQ#J8p?O*?wm}8D5q-~s@#2LMeargl;Y#A&~JCfBIoH14a%{VZk8GISSnR zQwi^sT%qy&zGPmG@=uih4|p6)*5McvM7c0raD2^ComN_p*wPb|aMNTFQ10p#M@?c0 zf6Pz-%-^*@X-W&q*)d%=$p{|!27qQRtFIzvjmGqGj3)6tEU%Gbn9Cu?s z3&#N2`a?1>E2Lm9o&FcxI`)xRpdmP1U}G2I+P?N(zAawNQ8(W zL%ffJ!)Fp>;P{sVSD&GsFCPCkM7L41^TVm}DVwPco!^!o`v0^E_)nY(W{5ni|D-Ac z55djRT-pJw8(h9yiG^YOC%Bj$LTtdm;>1?-`vO8F@ zx;zNy#!gz_q{BTKuEd7_m5@Ogzy>-6L4$tdGUPM)0J!pEOG$bp_z|6&6$`g3KV|VUbvg;_AchUx4hbJFc^0K>}$J_bj<%vo5eMH!Y*zg!GBXZ zt>ruK#$@JBQW^pb00dO8mu)R~%EP0ij zczX0L4>I0kR0 z$?H7Llt9&bkf|C&tyr^btihR>uC05^piG0yMz%Z{HSA_FipJtJW()ay{riUd0GkFjG3(D~6VJ0h6QsKL{u-ai>&fhHd}W*@K9f4qzo%3n>Znc=jF=oN?`)-7LG z0M&&li%tuFO>K1xtP9TmEdF(>=Ue#k|KpTMcu9E4m>f(2Tp{=x^atV(#4hNC9~1u` zTq-+bG-24G`!OboX(kmqcD?w+E02b2Y>d*JJ_Y|FA~u>gu}-2<%<_CkrHlz{e8B-k z*SxH_miOm2^{NIQ3%?OgjJ2N1|uvyk&{@|Oq6l_bZz_qMo)i%t$|bhZkc%j^)X zRHA3RL-_1BHrH?r8-6+q)5@WM?RY7aNLXi+n@`vh6}TXh#umbX0Lc#M81fy_-ZZ!$ zo~`(;56J(PybJ!*oB1}^7O787ZXR!sFY*Vb!8qsO6V{*R!KV=8>&>z<0B>5ms;Uk7 zc=V1J3jfTUn^tK{3ton&_wS_kmgLh*7!dy>B>@m2GU77Udyk}-isbxjuF{m1$@qWH z@?>Yav|1sI@`Xm^P~n7nS*8A#iO{Mljj(qmC=s&ww4k^gMP3NT=j>=8>|sHmG8i@8 zj zE1L&l^Ui7m8=8R{dT*L1owhpT5_=s)qOSC!V)mouH;E7v2>wO_!2XqB`~30>?{w9j zl2%o9k7qqZnK$@En1+gplltVJxgkR!BpZDX;^VOk=%f+0#tSTOCUTBD=Ak6C>$3BY z6E_-pU2(~P-Pn@LZb$57(A{;gUT1b= z5K6aMHO`ohrN-0&Fh>U2-n@nJ)n#f=T{*3c*f?i*68K=TJL99*Bd~yU_EL8ej*+yd zPhGJtSFsJJv2YOg+p;5^kawOtfDWc2sF~q5o&_oC zMB(q70tuuRfMsY4wO?-yRj6!+GiFc4-sD#t>Ji7>ENs8|uSqz^q5f<$?mIF)_@NHq zKrlBqFrSYo>i3DW$m~W8hKwqzO8>l1*#m8FiNGcDiYavuT#N3i~Td zSn)>7&XOXrA7e$K`9ai4%dhDGUT9K1f2I^{ivS!o0EJ+HtdIxU`KkvQujX0hg^6JC zq%>t750c;<`fm>s@1|QLu;l<)a{v!<)W*O6rkQ5PpUuE6kvhFK5hl_QR9$mnm#Ij0 z_!)rehr6)b=nbce(ni(v{yv|f{iT8sVH4TQ(Kj2Dj@|a(y<%@?T0dX zU0qy7JE#m356P;oXYF2`QQ;LFt|TR8SGduM#CSX8`ZFB7kkI(9$NKIAjnYdHaMTV> zZl%uK`osCfb#U2x2wY&Q)9M$w<&eZQcca`TzvJ&~X8?ipTi11{&Q8SX6|RL3G;}Kf zkf@gEs@5ORE2Eo}xLo;+1!w>0XQSWVYCh+5ycz|I8PmPBrz`u1;^8E9AjijIuIB2b zQyRZw$Udvt9VgS-$1pyyx%^r%-$)Z;WF6S^H0MEGhn;7Oom2F#&O=bf?SuPbQYEJL zqvF;zG`j@IJketL3@H+0B0cRfX)Ia;#XHyuW?|(Xp6y0r0GJ1;f zbYF2Nb0*le9yKHaOIX%d{I~eDJ>*j4P0U!_JFQ%_?nK+e0Uwq3ZFtsPY#Ngn8MwvR zvr05RQp)RT*<)oKuCm7Ash>l8Q9hN&x=CX%>C)>wC3W{!OiL_vcJpg0mb||XKF`a=DAnAq}S!Ml8434N3LiIf5kmza(O51Q0@V@c5r}4gu znfI{?JdQGq$$8&(9nw{L;hFLlt-$zei&6&pGT-K)o2dQ7F&!?FvCIP%JNg~3{`!?d z))1^HRPp+IsoCNXpLnK6;PUY>zYM4=2`C-NE$Wv?GbZ#id@r4!pI@^Eh>&&Zjy&vD zF&<~7Byx#ao4tyZO-XQbVRfo}I6mq#y{DF$^h&M;BO*EMvS{kQ^Qo(Vljn2 z69fy1!*z(XXkBM}AurR&7zS7;mp=oId&L2l=nv z3$9;hD$AZ71cX*NnE-_l;Nwy}(RA{S-?&>?cQEGkY%pC`ad7OyPi?z#4MY`)|M|TA zTHHPl%aAaN00xyn%+81Z%y((@{&-Ej;SGXz%1AmXy@75<3L&A9_&QxPQ!~^5`Dw9N zOwFa(!_r~9&t4k3Re;ZHcyRFD8tJU)XAuUV^A>wtzg{kZK z^iHA6$DTSLboX#@E@=z$#tR_rh1cS95rqSzZdoghJNjx&NB45+&H6;wbKkH~@~PZq z*9VQ~8;aEPZ>2`S%WHUfzUdKF-|%Z zhv%F$j|wgUw01FLA@aA0d~j_g)$ba3KBeE<4yTqjXHFuaURMO3W{R$A0?+KGEt@Vz z`OY8L8L+wNqyM#Zp(r5*|DFYBhB>rGd?}`$zBq=MjErPjOutz0I(`;tP?BtLI`eC)k8n2BH-kNTZS7mu3su#Wv6c!#Mn-canDZD8}J zw*JW>A~KN>(l0dWp>~~QJ1KL-p*daaOsg?A|)*V@$ueh~2Q=4Fe5b12Z` z?aJ;1pjetx@uGwjlwpVWYpI{tHa?ha5Rt;TQ^}n%W8jEKOM*YR)&C7Jx_rJhMSd7HB)o|R91}ZZL5Q2lscqcQ_uqQ4Tc%RkmLSdIL?y+G z68TOd*TEk<#Z`UY(qM)()iD5#(lfAQ5hYtfQLiGH_P2XKQG01GL3%nF`|;^f2J<9F|OI^<-UmWi^pC7G8dc-YhDZ(ToqH zru`*GOlEpHk_e&x^bF`i<6AfvknxljT8pQ=+ZObnRb)L*Qu)0t^R1TSis$DR8#r2W z%zS=piogAK-Ilnvre|O%tsWQv-flRkj=-f9CZ%TiIm>UJu&>$64>~iuK9&PZe~DpU z%nr6N2z6@p6$&)vRL9Bgs>J>%C}XMJYC=CGxeWar%?0JHWPguuypkt5i{E`eJS<`h zN^i!C=vUsqFND6C&qtVy-i7M6v78UNlT&z<;X(Do`XR9*Bd=?RSH^4sb*sM)ZETdI z)#a~}s@*16x=iVy+6FwJGn$2Vx3dUB;N!n2;7#t5IBx^0@sd4)A`whg&4#}1Q0lPc zzS~PLvi5WmVxVbpL%Fk=aQi*y=+Vx*;>2aQtCGF((*?D%RiE3nb>iYaWOgBY0cQlGUbP}%9u=>-c}1I9mM!{%qe+K zKGDUpeZPsp&5Kji(2ckXMiiLS^to%W76a%>5nnt-@jOAm2D?&C9>-V_myN41QnRo| zehABhkNgO@z&FJsDFZdd8<2uFG>@AERw^NsGn#1Z%kvqYqq7vGZ(-yO$zYo2k@XI_;;q6+2kMzZQM)08#G zA{LGoGVQnoU6mGUx3B8=6fo@MJro5u8pjnL={Y%3I}RxUG>!TC;Xb6jKScIjl$D^n z3xx;!J&ws#`Zh%_>SvOHjW#dWg$SET$vvkLH22$}<>HWoF0jB`&o}|m z7se*xM%K#*9D;g8|7?o02rH3+)yC0-La!B05l&%`aK0lGv(a@!qhyUJQb(TxQ5ixj z9_1oTedKPfqYSIl$r?>(F^x2_UPi+sV_@Z6-Wv`diOn}3IS8AMsjX$hDX{AqQKnAA zu`!TCP_ylAgs?u}+EU$)AOHEgXajGU?9YMYCzM(lAfg--HDTKk<&{Z^IK4(eCv}^M zE$FHL>)~6Y_@|a3s)J;@S9=-|KJjoNCUhw|A#mrWu34wt<_rOjoGVS|i!a%xw+;T0p-3YA-1dN#n96Nps+d(M+vpwjrG#z z_yr>X2*0a8z+#pTH#`^Utl4hdPOz#Lf0!A_b8`$^*zm!NP|o69Um7T`ElssMcNeOm zQD1q<(_X*v*n0mI?6%z`@4E@`@lW4<=5pILWPJa6yVS*IMY8l@Wl~aE7js9Y>ra*T zV%v0QPX3O~S%M~}tv4Aifm)^fb|9dQf6c=N%qGn#84RvZm^&99PGmSlq=c_O7b7JK zj!ADGMuzbnw0B#|Z$zPWH#AzI`IsvUc}CWnE~%<3T&vfKm{x~`hfefnk~E5#4)0(m ztp6`qV`zkjHm7Qo+8GN@U*|2vYs0V(trjsw7mc(J8i9&Le4iFE4gMGOTJPwv0Z`*b z&ZEW$)>SJ_W$U?C5M=4!kt#$Kaxf>@)WueiNb1u5wHHYU<3U|Y0A*lh>T#2rMtcX; zhrTrvQlOD%!rlNt?w3G$2%&_oAYztPgkYQ`B$i=EIn>{Xhi4HAx#l4ahaEY!s6#30 zeS9P#O$L61q6JeBq!H~J(4;Z=fE)@^b+VQKlykC>rcO&nQ5v$#=#-=5kZ61vsG75| zMAV=%V_a*g%_Nro9jTR#z5n6OkM)y=#VzNI3tom1)_Vrs9B1I~|G;PQ zwxaLef`&!^sWrae1UO9m;?-#O*3PC*u#__xPLXKT$VKTrZyJUJ7iZTySM8Hgbsn?# zb9dQx)s@oVkgeYwyQ{GS4@`39J_OpS4sFLX1JYmWwn6eAhkV3|E7lFq`Cw<(Qr`$CI>fWOPuh++0OmV_IEBJf>gR}y z3=7MbHIC~N8Q;QCXL2T+zuRE|UlA4A!O86S1hH~g72e*15R?c|srh<6l~`ZzYe9id z$Tc}t>;&J+6-aqEr|W}pZko9Ze~k7*_&)&t(I?2n{_Xghqa1w#-+y>-KLY>33 z$}j+;fpuQfG~kh=`DDM^8ktU)CfkIUE?x{(|Ef~Nwz9YXrNiVy=l($;&IIr=A&=^YJ`~y zFGGkc{6F$k8H|Pl7^>eEf%Sv`hhTi%t%*HJC}-&Zfs3spF>otzi`2#x_-^jCE;@P0 zJ|*aLN<>^`3ZD&?s29xj{F0$gt)2S%Qs(VTLauShrW#0n+Q6 z{&+`^k_CVLg3zgL4f})*E25KagAe-|7H8pj>lw>hbCD6QGyD~@S$sNAc0+4B0Ts%i z{pma$3kMJXzPkIE?ER#9S$v`7zBTYFcs#-7nk!HrM>j8K zuNq*LzT}6|-PU}@MBUdYWA8L2KH=`pbCznL_;z>cJhZIJIBVLaZ>OQnVCW^GUNpwf zWm~#)`ocu*=oRPy@2fX{1q5qm%&;4@g-1f#!`MrqT1{P`zjJ;=9i66yTKy57#bF^$ zQ%mToKpOVb^KDb4xJ=RH3gj2GHs8P|=nnwdLWD3mnBu=a_y2>}L-pkbsoZGLTXRH0 zcidH7v}E;pC96|3+7n$^VWsY$RMj6EtaO#?w45}l^l~*BS&58{R^fDaeUWvsU}+;{ zGn2dyeEkinwhnv1bVknc2s~SbOvdH0I+gKyT&1nlR&I{IIODc09p*lyAi$}th^yMc z_lS6YK5M#0Y5397e4;S`To^d&+!#4akv-N1C)QDzw-fgZPfRZTmwsLw&1qA) zQCpXF4&bb$vFArj_w`CDH{d!fctTMK@>zuo@Sbzo*Yp2z_m;7BL`%D-nVFd}W@bBf zOo=IGW@ct+bIgvJneCXFnVA`4W`>x@$@%VpH2-F#D;+&Q_U^52Z7p?I^;@-S)q1fS zlo!9IHB2P#dF~*LoPME^l+&T>y>*M%woReoBl4wTqq{<491F`oE)3C0Kl_t~NcSX4 zk*}jlL1C=nU!ex3!)`@Ws=}frT)1y-VsRcUyl5jyv#VP4LoOrp$=VY(xH327>N`UY zm%iNiX& zlaiuOoG@5Zoa<&;gaOk;5%+p^(0KwlYF%AmP*@+g|Nj-3#!Qo@dAeXteQW+Ge<#u2 zL}P<5EnS=+vID@3j7Z;36sA)*s6nw@Ds7fPXW(%_KVuy1tP6F4F72+^d}dS*@NSz8 zwha4>6iNPLWoO&TQA01Ck!*P(DuRvPg0}8E7|nDINj;ea4~tB?Y5`@~rw1cpJpzaU zVLh!lupJdm6WnjcDU?lbG>Pg6C}JaHUe&es%eGR}JWfFPkWf!ZJy_LG6zFY35<#di zQr~slh_au*@cG%hXuD`-?x!vx7dAm+||E;@+dDBe|o@_&qe=I z*0=C+OhW=~2^Z4*g9F{nAEgZZ8!OFlRXW7q6&xt$``fWQpRgkarWZ5PfRPP z7!=t`)`0GEeHk`mhLCDr9W}#IiXk1)WxB9a%%UNmt5~U*g<3TuW5d+=Ap=e*jCpb) zwl{<5OKtX2tY+CuSQ6Ni`Cq3hr=l0j(uNCBLG@{o4LVVgj?8htN>*h8O3XE;8$%D z4Z0y{9QX&e0mI?DiiXmH-#*tP#hoOYm}`BXG;k29%DLv&)pPjZnvmJOTHZclSRxIR z%h;i$LUpa%n5qbiGa((vZ}=~krVfTZpQu%)IMnu%XKaq*e@3iwh;gvti*}y@EkU6v z3=_BqbYb;owz4;84$sBEPj6FIT|H)Jc0DuBYbhdTfdzROAL3WQl{JJ2Uw?AF4(El4 zsz)pqW8CTpu=UmxPtB*vi#U&LHQDvS14Sqj%UcA<9 zZ0>U@aLSo1x7gqphzSA=*39$k|FQ+4a$=;noXk^Xy<`-|Wh^=hJhmT!ERq^q0fh7s4V!C$Bd z2-vZ{HmNV@8d#(be|dXM6mrqS;W;<07MrU>$S{ftRH7`cr_o4ujd`$j@j?wmvB41G z4jaUe?o6xRbNPm_W@Ym|NHnWaV`wqH7rNr=b?9wC4uKa(gMQKxxTbYn1s-_3WK~%3 z!nC3r>U`SR4`D#IX@(b&)zfFavpgm1zjXm`!XZ!IC)IE z1D#sfx&x%rBYtvTne;M41VqE@hT$MC+#xt@RNaP_2oqH37JbpkkW~)T%xFkEN%rzi zhSc8f8GmR4#2MTAw;(n5wH6O1W@e*jho&6lol!8asR zATSXB;MalagVQ7V$JWM4>Dlxf7c!|bh~z(pxNPpye(2-r;rwIiiqT2N_wzrx$Q$m2 zLE!&$q5SGoVt5oONWySDwWp3B}_#zg{F?tB4hEpS%x0s4MJ602=6okW;jN?q49z;Je`M%iS|6= z7bY|Wk(4{bO4}NXkt+dZl%=XPK3PX%9@7)7b2oVB%mq2ePU7RGc<%ddm&Z;ajYz%E ztw0dt)Z+d)n^;j~@}|R@C3)dJM#A{Lt+Pd^%p8|SbJPJ+k*kTkobN_U4mJn42QkEL zqN^_#1j(49QiCP9ILHTco0}C~AtP8v!M+d!r|&i0592+3S#*ovyzu)F=vuUegX#U< zn~yZiB{CQX)wv*;=MdTUdO zO~f*N784_T+zbSshP>4XO)p_-`Q`qI2!RqoQMKE<;~}VjJQOMh7DChodBmsz^_JFky~M_2{ET4G;u zeRgGqUIvZ<;~?DAF_uaR8Tc%H_|^34r$ecbl9^v?kN2{2F0XKRID3*1Mo?AKkQ)kIoi zJ*+R^=i`7m=n}w4C7rrU;2-VcTN@5BkSKB>B+!2`gZN*_C;3YKMe6qqf4D$c@4<73 zZOPC?8;55i_&q%c3kXY4%jE~4?3L{clZ(L z7CFHY{sZ64%srJa=|6s>rs0wL08q>*A6#0e;BEe#uhg=D2SNMCb$ReNrp8N1IVe56 z4_5RcWRgZP!T!h42}|U9!I&Zb2kFi-j6f)Mh<~i*xPHOHz|1lfQylCJ$19sJZK_eR zxpX2{rrXwV7h9V|xWZw5VnlF2n102V4~Dt%XvYAx6P<{clcs0$pL+)vtm{^IBH zFzC5@{GBq6=$R-W{e?v{p|mEk;=4guZq3|Ou|1jt&!`=!7hH9FUC-t14)yNU>XDAo*gXt< z=@M#ISlmizEj&!7P0w(TFH4gTepZ<+KSj;ciS)J%3<#L9jY;lW8A z(zvPKESCyNuxSSD=Kxb3)-18Bey>k&QRNiQVzFEG^aQRFJ=LPRs8PbCD z&V7rcx2Ah%qrXY4hmO6lufKe-mD7XwK?*H>dRM4P$;`LOrrw`Bgy5POKaM7KRyr?M z2@dmJ*k+|w*%=S?Z`Z2LM}j$1X&+M-f9_jNEVq}|63Gp$h#r*8+wv0!!D#1o>=pr zn2zJ7cBcE@*NbJdM_@_Oz1|XltJu-)zmhETu0j`dYzurnG({SZ@bbZf(w=S;<=}K% zem$34SL(3mXEYkDY;y6Ow}cnwcso&+L)nQqsnI14j_B)X3o&=c*?89>*r?XW(Dl)) z;W8ZgF+Db#!k~1o`vcq- z;9xEfLzt_@(qOO#vD0Mrt7qS5%+f6zt6V%2JwN{!Q4F+K*3#eN`m_vqxXiJ#S$wkEkO`te`kq3ZQF!E2Z3wU5?IJNJi zr~slZ7S-F|CmDr}>%KF)MP>RK2SQT;5V}>oASWJHXF`I0m;MoRK{eRcvBAXd!+^Y7 z&UzN==n%G<)v=R>-!mq-pLW-VJA2CM=+T*g2$3ykjm`e^(%&j%rL~3^5HOEl=HccM zpiOcDYW1$pUY{RMZa#8(iEp0bmKjSxy`rl!XG-P1QDmj%Le}PvDbpFbCmkUXtqdKL zo$0+h!#g!C*^66t{z@MioC>@U6zJ zgXdgs=?%4K3JFoTZAEg1iS)apf+0A<}CO54hzLA)x6V^bMAEdX4xIK0BQp^wmW+F!U!U?3e z^dw!`E&6bY>Y?O+y7S@?yIlYu^PG*YMkdV&5|jlDHcRH#2DHyrvN*N80ukz>s*DJR zP%i7#trI&`%3O}zIvGF0s;>Bp#j%}KzkH9hFc|&LM%0s9Vv^4#P&_b2BM&)CB>sKe zYZoCdzRW)=BirxuG(^7*Z$tpY5%a2iODsx(H=i*DVWBq{V>FEO8HfPTG!#X)X}FYr z9j?te7I>s+3gdm`340ACPaxRrb=zPlZ~ppxYNm?2dy&E$}% z$p_96X^Umk4%TE;JB6XF=EZu#_S8Z7k2aT4SzflEP4%ddZ?3CFi0Qj*=IK%`@e5*J zysI&XICil*mOE$rc=9qJKEMH@a|+7zLh9S!(A8s0sk=#t zt9}uN_I928c869?#QqarpQ8!Cm}c?@&tzR&)1in*snp!vzX;jTv{V+0f2Da$70#^A zy4gcD=0zfX*zwLQBu~L3bJ*0r5#fCd-7O@K{$vjzpX#Y-dEZE|v#xA03`Z>NsuFDJ#N;z!D>P3`lm z@!#vc%fHuJTP$)Bo}5Fu4$Wuzvkcgd#)=vv3jX;!3;%kbE_-+b!B?6S=Y`bZs&P*a zw9$|1nm(QwwN8e?CC4P$@>7S84XW;LkFL4Hd+!1oXYS?Lk;vkZc`Fyi8w5X; zk6+md)ZDNtEVdA%D^^`rlyKe!Q*gx*>pHssAj5+>?pDgm#4yzp0BT060&0SOuzs*S zO|U!^?8}I6V6feRT_1oP_w*1a`Z8PK+}SfdEdBzW@rFIGMbswrQFG?HmL8Z{cgq@R zcvzB)0g7+@c=b`5*4ZOsXj|xQMpBcP);Sh#BM^03M5? zARlAneieyf8{73qMC$l-y(;of$M-W?i;tW*{7$BDwiY_WPPxTCchxeG+rAx-kUEuE zyE7S>Mvz$^8xok{X)oWueEt&VPdJ!v?cK3pg_Ix5?NL6X+dYEF{T(aF@j0TiWu=c5)-dV~ zzlsSDPkKA!fu%wQogB@FauB9_MuyS6t8&+ngMpb!=In71H4C6sf1^ZdTE> z90Q#(Wm_BPPi65a)u7`Vx3D^1_QM@e+n3qZcCnJ?Ff1rZXSNpg`waE1{e*2E^^U)g zKKJv~S@%!shas*Qe*BnJea}Byg=4%oqcyl&q8+3ojzlH3s%1q5+KiKrVYHBDJWXKX zi1!kxTEfaQgJGeF%|jd9E&ggsi`812nx7~O6-wS@Wu!UibT`ohG{SB#&e+=k5&yS? zOZBb607CBNE5;qh5CaWd>WGYir73#FT|}lVE(9?yQH6_Z_Y54k;W%%(KDA52 z=_cZCO!}n4?fNO57|wbT{r&mW;G=rf&1g>aO*p~9-N#4Ef*Q`;`$i=3I@lH0HN)5d zVHd{#gZNj25mq|N&)6R%k$+D5)<@M4QfkbZqYU(vo22>u;@VdH6Q1?v44F1lXLG^- z^&H_Da~`3-N`IKickwUd{U<*Lhev9#Dvw(Bn3o}&1U5Y`sK|~>j(=&xl#Oa0gC70C z3}z%C@OAo&rbr7{cFx=Ap8R3aj|h|OEHDdji%!(n+aJtg=eA!~CCp(yee(Ti5(W6K z+jO71MU+MBX+iQs|7EY9Xz%XJ=}HNHfH&r88Hfvr3$&^3`bC3eGveczRNI~_UI(W3 z^E$7<{r#q`cxW$X!|%?7BOPOMG@LzsZSY(oQGs8U`@bxE^aG*Jx4I+aI@-B$aFR)*xRSCP+0(9Z+7!DPy-+{cw+5B?-0*@t4uJh3gbwdtwm6 zzbO6oKu^tPPUMs5oEiEd1FyQh7_1-;q{X=hx~q_by_rt~jG%{+V!exky{Wvr zn~)D-|1$7exHk*BBM8F#m*U=f>N(J7!KDY#dysn)XLs$VQh|^+kEc$#_sJt6S*}{X z3ofRggl(LY%2yrBiW{OAC{3UAtf9Waee{3KpF?-U6!g&cROZp6uDF0CpvGp8KJ5Z{ zj4eA9&rx`^r^#7&SRB{Lne^MrW72DK+G>Q*S<$g7NYFo1`ARQEf(HZqLs`Uze8-;a z_4h#B}8R$G@uo znX2M<5;>AG6M=pI^48a(oSUl!v@5_|x9xTt^c}%dpzoX{I0wa|4&{r`uftL?n~W95iW` zz_QSCnVCpf?m=>#!S(7?)N*U2Vki33;|xh?MKG-;g{9_3b?Fj^X}Gp<_{U0{mzAsb z^GceItqYR3!{YfM*?;AijtwOCr@QnX&P$ep@H>My1(0j$I=eHtf5nMrf0IjCY{yCA z5Pa-VxHhyv8zWOWom?t;wh;Bv)Kw{mA15Ao5pe4`c5XEL)D$V}`sXF&NWJ!|t&9W7 z^O0uQL9_NSS~{;Y5LFNOD7t;h`6#+It)_^nwc+LlqE^creqUJ+SSMf$NG(R4JmmSfUL0sUhy7@oq5 zGQA-7hW%4=XC&JFL9D_*Q>b|TCH#^Jn96F%-JY0T3O@-+YYYL%&)u#uv^u3txgMIN zxqT^e`VXo^)OVy&tiK%u?-*$v{!qu^A>7uIvq!X#E)a7mwJSuXo!s9cc3XQ`yR1+( z_nh&)-ZwZFDST6Up-$U}nGF{{^m3XS?Lv-)zkjVs(Es(b8^H)#hYKmLDhY1IP-N_+ z45wp3z`v-UzCn)#boxCXTk_y*?D&E_SeHN&Bvm{Vw6~sV7aXRu(mpc@tNXJ zy9KgKF*>M8$;oPs(zJ}u{WEMF26GBI;PG?dJgN!P!7=z%&4)o(zV=Zl46;^2Y$kaB zBW$og8S*!ckP5PT>~_M~|CxR~M=sARm3Gptk6hIR+GSvsR~O+pRtsIcXRFU`?0#sy z(waHHG)N~5<9S^Q^<|PjpYceOIA%zF%EjVX)I!y3;m`S}?-6NO!R z<~m*pfQ{z|39S}tS%Ty~L`RGHlp=~z1Q@lXaF-txZ3bqqA=;=E6($E<&tb4N43_x8 zf^zbTbAq|gLhPDhc!I?D1cHnb;0B$FNBQ1@sDM-j!h$A(9)Xsjs8xXb`s1Q3!4#V` zPXDA6Z#qnX!Tt@&8wP`2io@#(@~kKq2tN=Er-asf}xz;_+0ZO|8+nN)8ba+9PAOd3WH+zh1)qY5XWq+~UC^RK|l zha&Xh*01MsD0}tBTiq}e5>vG&5WIP<^;?zc#YyRDA^)qdhU}O>^c4j(PT42r5P(W% zrNyK)zs54|5W+I$D_>SV?h#__uQD-W2iHr~>p=@NBF?O#0 zwNBTlk)}XilDojfTw{$W%`gVtfa+{F8eR3K%;PksKT`e2!FsYblS$2@VQuxu|2?2e zv@%*iFkT#;9+h_-1Y~6GY8}m!O*oRE@_IZR%##yK%y<)&Ow92pbN5ga@STiZ{x(mr~X`KNQAZhq(-z)mzEC(9$|BG+c9E`vn) zBZDKc9eLkY;rq1vi~TFzo#ABmf~XmH%*!1jLzcIgONR!D-AI})sq0o+%Fl$4hUdm4 zShhD)@`WIYn7Z?1$s?aXUHK1!lQ7a0O^b??l#Tp$UL`XTJSO0jYBUmL2=F~E704)W9GBJ z5EyjdAK+V~Z1Nh%p+uCjDQ9x#nFrdzK_= zMwInMhc{T&3@-iD?Z-1KOTkhVL`lK~x44_#X`zPu>+jvVd-dT8L25@vwV_-7fg3YpQ(HtDo2o)dtlDK(z(e zENU`&zItQv$L3dK^HZ(S3{X+(4X&02Mv`&eFF|DdLfZc>f_I7eL46|Hj6^mQfYxgJ zh8b@H;{2EIsg1epY#=*P#?_{~JP|1-boSVDnx94f^4+#Ef8%XuBhrevPP5DDFH<&+ zx$A;W(K=lWJqkeg9%-z?ur{)n@E{~V#9s(qzY;#|(b-sTe&W>nEc=fMD$7`ECTfF& z@utW43vT7X-%Pm8T& zQ4*G8Op8r#pix7{8E;)oIjcxEgU?P@z1ix4hH&;t#?Hz%S;0(#uw7}QjtAF*$5m<^ z9A8NlvcV?VcIUm;)ukM<=R3|DT}|5hSk9fHb5;29{yL7+E4aCg6iKF}Gywnlz{k^) zP*a0k%6(LU@ad7eje55YTW%i#PI=_Dnf|=R@6VE?rdD*`0>o{zz`1K5P6K1CBgzyz z|DTQOLdGM9h^V`DTBDBPpWQ4s@6wZ-KkvORe6}M&(4@HQ_uc}M+^c!Phi`)yg&qjr zrWU++vNNo(s`51%WI^ZY+{ghpz5zxp2V>0=#b$&cpcxn5CK52apacUBY{Tfby!b~JS-Xm-a4)K8Q( zRlmGNa@8`QI4#^{_{{Da5bINdBRAY!r|?qf?0=u7QO!k#m#@_-O_XVmU) zv0Kon-4&=+irw79)wF5iV_a4)B=En_-v609|33%z|HaCuTymMTcYojUo zLgV6xmIFny%VLQ>tL+e5NQo=RNVDfI-U9oZcDmg9t`Q-IaUvilSLB7;@ z2ygn_0Mg74IbgL)Uge@jIMGpu*)s!lkDiT(7W6*9=jDcdTen6=+ZB^Y#u@kfGd_9p z311MC11mPML4oxYyyWc3>$2P`djyD!WXbIrq4go&N?a)Xic5IR6{&4BRBu0P_+!2uL47oNgKUhiicbR-;xqQM{R^ z0FpV=@`^-C@{KMoOi0dUv9sHZm_rWKZ$@8Nsx>i&Z0ob)n2vtNwzcZmTDWFl$K{$J znNIvRJLIx$#QcI!hPih)1zX_-9n-Q!S$^?j;)mFIA(8^#BBednB3`<3pug)zshSK^MOEu3-UdVSuc}}P_lpEH0O~Z%J?CS`mLT)9 zy0>c#phIiMU)sI8Lb4nm$YbI7!U<#fb!tm-`t z5x=OK5harXji>ya`m+vkOeD6YtrzXxBScG~gm{?N3QBty;7qZl!xK=QvvVG_6IMx2 zKv8mowMPtsUg>#Tg+5?omwnNBsp&IM71dAn5(*8P*6Y33=umJ#Z&cP2h-pK;v6C8I zz}1}i+WkAAe<-*LeXD%Vw2MhaDKm;FBQ*1-ey%L;_m1Q5qidR{2fBtPx(3>X-LH}A z7CM@SlymV<0Qd^I$!JAOs+ul6Q|pO7W}Dr4(iEBu7I0r%l@*mYgjmb%xMRidI5@rJ zn#V6mmB)V*=XI~TXf7qGJR7!q>%%a7w!%I!dkZVV1efg$JsiM~(#r+$lvQ)ftL`u% z^HmUQfT(m4u6C|Qe*b|ZvFZ!;1mN{NfqGCAK|x3kIEqX8Z&a2=L$}>BcEfA2gFex4 z+)FwAcI1=lt>YS%ICsJ;wBeUxb!E@ETPGQ&I--E_w@N*qEN4wwL|HExygNzi1fHJu zvf$H|`JAw&%#gXhHL}$uVxZwh*HWbC8@dwoHRH?GcJPhFWu`ska%V=&PE+`UL1d8- zVZ<(}YF;uKy8=ktrd)?%Sg~{+bE=S)D`id8e@w|Bi|#pz{+zwK2gqc>cpX;z2w5uo zPqj=#SvqB<-Rc`%6Jj;udy0Qry`ggX+SM6!AHtuUJl6^kFTR8QjT&GN+oU`5Kg@kE zPN$3>_>ZCg1W4jd%YTK}C!*Tw4Xb?Di_@Ny-NX&mLCp~j&L?aY^LtTw=6Q@oq8;AQ zgyTEu2tniEPv(e-gm_Wg+wOPEw!>HlO8>~M#v*+z80T+6)c@jC_k}U zrrOztYS=$w*|uXD36dvo$Z!9tydPM)QIe0rD}!g01b1@*2h7IZw_$6_Xe2FdJl_a*68_H*6m2pt+ayLWo>ciaDvYef^&qXOkwO~TnZ%8&S!0 z@vV*t+LX8EmUC}!!s;4UoG%m@ls*dDU@;I1bXVh|_%m8HY&X+HjQ#U>{RB?;cqWQ> z0*i*1cq+MqLga>FL8#~gZZWhk`>EMzOb~^PdpPCW8H>Z$f{wwWs`G~q4}lb+LiSTy z@1NR-iNF1o(i66n?2u9s&5^caMR@AmFS_1Vd$$#vxS-*(QLNH!V7*I$KxQTDrOfM( z1h`stPcigJUAoZ543u+EBD{iKv7I<)AR(wNCJ(5Oy1YBThZ~v4#G-<#M6X0q&+L5d z;k&MksJF~m6&3QgZiSl}1sc-3d15M6JiB=r8Qq_1{7-+HGIV`=R3 zC^A~jlabb#A9k!#sROuytoQj%QOdwla~TJZ#>8C{YsTcA+beLU+us&9I|2M#y&s+L z;%X2)|5@z@_5jtqRFJy)@~0c)NXpZx@EbOXIpjVbCUX19A^+*vY~+)tS#q7=gDprr z7GIx4JWsR?9@~RFJ@)hiMORLEaU1q%bXVRq-4>s8qr9<)SHOsK<)EB3hHGoR`snF( zb+fV)cs@XAw)oKj8gTMkmyNkTBJggPEn zjcO~r)-x3X*X-7X0U{giRR8y@T}$?oOh8% z(v79S-9=62;^GXAc12}@^6rXckyY#Z581Ic70X$>PKC~sG^aZCAq62)yoLR_;C$=5 z%ChGradq|8lrzuTy?KF}We1n&x>e*YB81X*i{Bm2&=2Qp7-4MiXKlysX-^_X7x-mp zThT6^&bHLkPrtJ6YT(;v3VECF^UP26&m%33=?i_kZvlJ&?n65rIL8lF3CJ?Y4xkyzAtF5&`>d;e@ZUUlh;%Q zOb`rXl^;10ly(TzC*VtvYP9hiJzZrWh-8~OrJ9gVIf`&K;gI%he-{n4BQ+v|9|Z~& zLXbs}XF(t*fZTs6jx{rb^oIhOenPK0C}gM`MUPXVkj!<)V(?hq8b}vm-4&cWPcec-^P|N*Eg7}9_*pN z2X>PuaeWVMe;}_vPUc#Kgc(sl)Eo9>2_^Pe+~Ch-9XdxE^2lU~P< zlb;)%WRSsH+n-}J+cw(AkTwaduY_vd<9u#175bw%ZQp4*FSM4akflbjU#T{r?ps@% zaPPXy7uu4W7`L{{+y791v%`q?-t30`N)Q`iIbu#@DShaH-&rGFomb5!>9X-PDzb#$9##*cvw+NOmGQjJ!Jcc!>Ea6G zawE{Fh{3NR)K^Qmy=6~{@(ugh$f_d`=z$|sKsUbmbytJhTjAm2qL_@SNZTMpo9Wc+EqC(hLBD7c$n z!T4&9H6MCZ8jf^aQ5Mr4a^L>$x^oKupPoq>=#t}sAF`qX{-1|p{p+FXRM!7`s1D*P zSCTqQu|&$zSoxwVBTJm0oZabyAms9>s^4}cSL(SC4lT>0XQ_`!DVM*dB2H|thY0b$ zxOH3*#2$>VYWGehN{YGjxxR$Q1W0OL#x^`S9`stD4}3%tjUSN&rK73bMkz|$KbybsXa zuq{<`vACG+r9R}voG`8}2Z=g3r41!%;i3(YXaF$S+^6kn=@`+0<}PVBOuZHHfu)46NXDUJ_YZs5wt~ z%bD^|9qnEs|AGP%nzs8Tf;5o45MHA;x^s7|G~^#fEWzhjorSCq4yBnAHwxO;(qzuF z>w!48fq0!00dg0TvB4J7ZrxxRgSds)s^C5wrvVrnWdFm_i0N(e8G@Kti(4c*8H$>T zE*22Uyr>2|b;+4TF12mlEV3k}V`vY!leeuhu8_ycSg3$}Z=D?cV0Z;b6@R{+MOf+4 z_32%N6}T@xqex^O3YKwQDi(K=C$jxTnt%o#(~^S>98yt241+#(mu7)32%xFx&g*e0 za+_|OUm+KatP3YsW;*@eNss)4Ue`c|sGuofzur}{#P5Lk5=l>m$9u)KBa`xi&J7A7 zT&QT+4X;ReD=LoJzL!F~+5BhJ8p%VRRSb0=jqWQ4KP~0=`8rab_DxBLb{4q-bKvS~ zP-m+be@|lY(iiz1E&ClOaTp~r-O-VP80terSb{5>FN^IzASPDzqNKCwd^=F4z{5~y z(?GlRnOzf@Hg-VL&dvopG%cv+nohZK!HnJ+{-oP9y2tI(s~q1HzGrd#Bp=(Ka`8OO zD)>=tJf?YhKIioM^mvig;q&GY3~1Gle^=#lbVF==>C zdw05ToLU&i#Jp_?jN7+r0NFH%qG9d1fB;TR=%qr*%8aymJE4QdZFVs|b1N$ZBYmZ2 zmiR7c8!Ii1|C_%&cM1_Jju*bhNjdDx%H@y5VOKRRTkBZ%`{~isc;A+$Nw}E2(BFb{ z%SoI6tP@tC!W~!jLq9mgku=i+Nq@}}CwLP2lnCNXErv!u-fWI`yHo#jDulBKXr1Vw z2(3K1IoAerc4NJ^Y^4zerMgRMp-aYDUq0>i?}wY~xIg=6hQsNIm^EATQ;^txCsUtx z+~oxj){!6Bn4AeqBoojF=y;tPm0E0>fVU$VfuHo9RGJ+sQ6@?bpM!z(ZzUdE&#ao$ zu|x1Wv)_a?9pupp-ov1Uuy^B3lE#n&KprV+KQ(GIMX66FjgsY|XO>H8lt7Wukbhfc zQWaz1)9^+p$5OJ0(DpWqU0GPsRHhNv9+XAL7XVq4I&QfBh{shLC&$KBW=&0=*P-4v3J1ytnkv^}`CL;!`(~(SwAXa(FH0OL zX+0aR*{-dQn|X0sw_D_8((0R^&Fwdz-QM%#BcOCqh%L3iqQwKBF^Hg`eAz>1o9?*3 z8AJ$Q&s{+2AkXcD9B9^hW3A$ z)$$P$U%xO+SIXb-O{RnPsfAn6P7Yn1UGKS1!xa#(S&%3$kai*%&gVG8hheo^Y3S|Z zq++@1a`Sfn^73^AP$(t8pjnRa=0xHdDW zsXrFM;OuE{^WIo{E6U38PAdGrAltR+(w+>jp+tnhp7`Li1qD`uvC(b=%tKr2#<;mw zmgEq;BKzp;x2#QkbkTEM4`XjKROe}j-Pc31*MYPZuV1`eCk|H>jQ1g#ulpv=;zIbT z`*%+*Eg6OScygwy8%Ah_MCoLw+A@Wq&?rPjh9DrB`F!i&cH9ff`BW-&1>avslP2HW z{8g-HwGnT*hU%59C)LA6_*n7uF`{N;xWMf8yJ2P9%5cNl)d_^Ti3(JAB#A$_Lv;yL z6|1(_yd=6N5rTBKoX3k%HSJ0{$6VF4b3Jmri;9iIdpd&&doC}d6mGiWY@q~!a3!p) zEYvS(#QkZ$(ruJ(BfmQv2+OaVceVOq;oWG-F(b#);p*By3jEDm!wTP4aCkt2fRx1H zbYYVjEH>`%`_uLk7!n8Gzh9D9c-rs zCj^HPJFR#=7Qfd**6{TDiXlpnJNR_s#u|3MzBPQxz}&NzugwYHaI5&Du5tjGhP)oV z7^kz%TTVhxZ9INB)t*=W{5mZ3e${|mXZ!}Bn?vp)E$N;4BCH3YjAw7!)BFY-@jU3&?nVd4su)f$VXOo?+dg!*1JhUc1> z*YDFV;K^z-=Vfl-&A^T^wSIDR&zQJ-faY}!d%!P0!~N-NusE#qp_)pn?YrPBTp1NTCQSd5fcXtVr{`nKQ^aP@{-lIP*yo*6J zC64N`5sb&I&=oY;wAy_44b8>1X$$%Q$Cyluh@5kI&H+>9cn_*8Vlw8;uj$#0(rVl# z*~}@p}t_b_ojT_|a_NiX;V(N3xuMxd=mQO}gmntuxPex)Qb4JanFwlpNb1^mM zON&{KV%Wj$`ATp2N_c|k++anJGIi%(xE5(QtFgp?Fhdo=I|uYG{lMQiOR>2FLL9V` z;HY|7tJs{_y64q!P+pv9kJa_X^L1%kM^v;zTVqSzxvr%`c@T;zShs0UyxaUkD*Kx9 z1Iyh>We1F7*12+v`Lu+KsNikx{aYiF^=#IH) zyg%pSuO14L8+a~%uW+|<#p4^@&IT0#Th`d0LF@ zp(Eg%Q=f7R4R3((P{1sPS zL8n`-_{K+F=0aZ4Ix|a3hTGWxNc&W2Oo-l@T0R0a?0skLRiU{Vm{>mu%?dn-o84GW z$-2FCkH43loY)}>o3UpVhbb@}=5=k1^*}hhnu~vZoF#sLdO4b`{8>5QpLy&j=-Xk= zBc;{Y z+)u1Jjd#**!^a~SfXI`~E8U)qcCiM`wY0F9Fa|{jG(@yW>!4rtav5HE1i#|W(6={q z^YkNv?rTO8KJe`-_KnY$yblFqNvygf*`jj0BznsD7V1Geq!ZyTCeG~au zG64}gmP@~c$o)k}%Ty&v$imP1hyvVv+kb6d>WYy|(utyl3h=Xw=q5ysNJP#L>>QQh zFsQ89oaTTYYi|*jShP}oINpw`Q15;?4kpx&Q+PNQCtN>-Z?Tt9HEx!0ii@(jhFq`9 zOB1ZXALVZt5D_~fAJObuuHPZO+*T<_RH--iGq~Vf@H=&qDO6=egrYT4hRvDcNr;8# z+~z7}rydO-1J*Tw^v2zco||LMBF@)MYVp1yL~(f{E#BehFe0An4!K z^3XRB&wfcnEI`7)<0~jf0t2R?fK}9+#gJx*6}?O=X5?2A#A*A@H(r*c&-i1rNc-kL zNu1UiJ7vcxr=B(~(%@v+iu?Vq0q1t8%%-_9=2}lR>dj&+Y}Z{T4Bafs&D1zoY~ft? zP$R_M!)$L%@ROjtANLuR?d;P0kE_GeUs~nze9{f*=};t)&-hx0=hvj7h*p3ZVcM|W zkG_#P0taB}`I$`b%Z|5G9okOG6tKJ%#+sfJ=bVbK<2`R4Wx(3}JDA`j81aAPzr=#o z=GaGYt`YZp>=U5*k@UxSWttK|Ic^kR&}7|)rCol8cYLAyfdzc5b1OQ8P9;Jc+_gDU~bv$$DEA zIy?;QS4b90Gb8JmDjnUomsvVajHVWU&6e2D`Y0@ccT8Eq5_p{KEUO9zjNxoPu;3} z{yuy5RIRQpJ=5Lm*UNe}(uix-YIM@*35?a~-gTE{p-7j9ebH?R*WYFV{j-U-Btj^+ z12wx!LybCb7EsOz`Kj;8x~;@Xt?locLOpSplVzFN-^8=A)I3Dx4UaK8^Qoe|K%dye zEN+F&6Ofm~Z{;eS*2Kr)b3Cem`6yNHu1pFp#@7v(T=L>;%ET9hV%*zePJe8n5lZ7Hxk`vEpgsAwU}2$F z5DNdey`cSGhgvn=q{Gh3B4$}MRpE4O628HpNlEzQjHs&o>robv=%}`U#&Fz>e+B4l@;GFjk!+k|l28}+pVtBPsiu(^z`P?}&a6$E{ zq($a##YsWD2|=Tnmtum;;P;b4jMtaTquxP|z7mT^WNAGN4KY`ts_lA5s?nLD(fF~6 z)$FQ3nRhEYACON!TnAE2#yR{gg;n)hHPm!8k*(b*=_qmvG&j{#kxNcTZiykFwrP2s zGP)*nkhD_y&=Octq$B>ebgbgcOP?@xWr3~Xro37mMP&A+6#t4RMz2yKhI4;rbR1KK z%bI}liF*02gQf$5`j1BsIT}%Q2o??m(jxlD^cHVB4G*NOIp)5(2F?U#-3$-8p!_k1 zYKIjei%0U@!*lBhUp_@01$++vvvlfgW>D&r?h_{qZ9RBm@?)ogU_6rg)1$MmGdjY< z+LKzWjvPZtM6DBXx<`YAR?lq2TGk4Ap)qYkxh!^4TsnuRZKZ{2XXpJ(tD9-%$Sh;c zK8giBd@10$yP7Gdb*tMbyI)5!PsBp6GHIrZ`?*BU;jAD)kj^!>wd3G@BLz`y+QY9= zZ{ajkB+^S0r7tvAq1k+~vM-jIiE46;ty8k1jKOTR6w*T&T@$+dGFQc#Px zpG^(0Ucv-Lgc-rIt=A%*w@(G~mL=Y_^9C3-RXRp~^m3d(IMN1ps+2n*9qlEi54k9g zOB4eUgkoABX!*_PoP-rs$8a5ERC>4zk_Z9?<%G53bU4TJZv0i_kVlJ(KghqY$D9fM z8k3OU_&Fx)ZrFlmH>yyU(uuJY>(#>?j@TTqC|%vB%5lmpl*NC=^_^{~nHO7L9&Bde z{(FTLL4iv4#M0L#4pp*-;7!t9q@Ne~#MBeVry;XnVb+kz`D}_l$5~3TJ<=yOpqxnH zya>WyWOzu?qR+61R_&x0|A6~@L9g`S``adu%$Qq*9==i~1iySrB)R4c<=~*Ayjc!` z?>D(qD}$Emfx$XuXIredJyDv7y*_1TTub%{#72lh&VuT-@0gaq!i2(1hDZN%V`PIS zBI~E@9ZBAx`7(IoGehG)4C871zk{?HLoo~X3Jh09yoM<61E+cG(ogx!4NK^Ca)C+Z!81w)Z!QcKa>zL>ni-wPQH=F|4J_U}l-A=h*OL;veGpL!W zPdT`DVqTlQS6KM7O}^O*We@mZWS4s-pic50N;meE>kOoy#H_l=J0lN{?vkBg4a{>Q2+nfK9N;ydVW#82|ojf|M5)xCuplo{nU1r756#1Pe60a zEonbjOLpx65uy}NuU~Dm@yRUu;Xi(j6aC|r zzzi%#0x~_V%KNDhRoiJu)unVX;k_6?3Y2ZB=_5q;WxaQm3jD??8;IUGMhw|=f;?Ex zFPKc6C`kSM8z?PS4G*zgL*D!K*wW6O- zdmh@?rodo14C$o5qKC*S5@_6K1$4Pdyl}&jlH){zQETuIcIg8mGuFGjkW~J{Bf?%;sz9F_!eOGzUant6`~pbK+mzw^@k8NQT(hB zlIno?REWpI2)FOO?mBC`9ERb&RKN=d^A9<>{$~1!ryoBsVt*zk`IrgYR0ZMaI;NjN z#W`t83@FA@+Ykq=+qIM;v`~di!D*uf`6|EjdXKK#7|t{@5N^3Yd7!jU8_szLm|yi* zlNWfTBpMO!U;SyYO*EPStvQX}EZ&h<`ueOy@D)dsU0m<4miXmNN_V6H~DSi(8OzFuOMu2R|I|^&Lvx}uip);yjB`-%^^Yv z0BDrY7jAd#Ii07N3t}>>m8=rzTc}_-d9XZi(^$d zUjAl5-USz*9dPrAG8*@Hhdlmn~n`v z^sEs7HW3z9A^CUQWxv0I60ZQRf{2>Z4V>pC{Bpk)fnlnryD79nhv4{yuMePfE5{{& z>=;|j>%I(f5Zlrj{v3$c7M5s$=#9~wwv6G~-m$S6)}q(u4i;n_WuWCqvU?|{mi9U3 z9b=I}1>;8>?w3q!T}&hk`Rh*VVj+aOMy>+43ggv2eAFRQm`iUh0`z_~Lgk&D$T$NO zvFj*X5tNQy6eDkzMwBhp;3(kqt8VAhRC9_(3m5UL#Qlivg}p|YWaTISTH@yHIeify z8%lw_-EuadX8)v=$NTCAf@ZftwkaZ@nrj*Eg+^OtZ>EW5uC$Y0rlp6ul@0Srr)8Ib zsWTqk6r{j0{XxjR?)+wCk@O}wVBiVr!*+}F)+aiG`e1GIT91<2x4^(tW%WNdIgM5C zD9IL-nddub_JdEPkdQ5LMG$0ILuXm91E^%%=wuG`-xUb4jC}$4hvW1`(q3zxzkpQL zxTs3iiA5{8N9LPhS)pw}BB(j*=nUaqr&;z>E8g*Rgwxqe%*AGgWnM z=$lw@ZNV49?dgcVu*d=5Oos_oC8 zHLr3uvbZBfccVmzOFz?aeZQl;I~7FVp7r3qFsr4w{pBHYK~wwCCH`#k4vZx>%p`nW zJ~u?KdbjTC<9+{pbg?ywh|hz^ z(!XS<$-Kg#Ys+2vYUA{ke{DqKc_Cj9w!~pzoP~#AZ?qfWH&yl2W&^TIVyQnU2 zw6qux7cSEg^-=cP_2^Kh{@klitv@1&E9vI~7>swursX%MZhF216TXiy26$+FH-jpG zz*NmO5T z=`t@cTB^nSq*5~B)0=>kTE))g*plsm;5lDomkp$vwh6S}N#3%hIW0Z3%rt7RW(cQ| znZBL{jT@X9*Bq{t6ev3#CDF|Hsg?^;KFnFyRVd|f-0@D{u)1C_rs*E?`l*M?%YA6mf>dvLmr*yLd5PTv9MUwU`BL&^xTx$2?T zATRbxBqKRzEdCICJC<)qffP~3bM+$vL9fwhJ2nBh-$dU5*rIN*GWBQq0SRF+d}5LA zFQsI~S_W!pF1eTd!7S87{p48R7c)8zv_9Te@9n${9eeoL+=(wC$rk5`ob40M0< zwn7WBwSLIO)M@NhGReWj*oG2A#Mp7!SYW@zZaZ17*ywnY zb9r$GJ1gtn1cFK2v9;;CTwM)Dmu1Tt4-i2*JQ_iz!;Far)6}=;hBm3B}19A?BNC zg3gyq|Hu)}4yvcGl$?O!?p|p6iPih1AjS8Dsu|DK;Sv|c`T%NV8>nJ^tSnmBKU@rF-6tY-) zinbW1`O7lMb^(|(JLcXf`TkJ!m*5s5HnqYajsm8G$r-whis62{tF4bbt>{eCeoutuF z(aSl4+Q5W4Fp+otd5q zGzAn=MK8eN9_#hX%T?n(C6wH9OWgQxN$bGH3h^&|KqSl+L$ov+4VAqORv}Ou*rlI`}r8EAQ#cUv-DmXga4pQ9ZGU`B>Oq|#EL071Avva*<`gt_K70%FR6`Ykwmvv} z?}7P?vSqd#ENaz>_6z+dnPPum6ldK(n(FSzZx;-wp!I@cqdjSwQ4$i~5t2dWU|I($ zLdnXa^)XLf^{{^Ssa=JV9icc*{4DX2I-;+7fZ-Xqm#Nt!Q>W)UjpiC9`q~|5nO!w*4z9S(NPA*#(+n7?|EP=Hz8jCn?az)C5*x&l43mc(Ds)q@X(IAeMjyzfL4jMz%V+v#)Zv74!(Lr%He>7(27viqtY zppo+6B+lEGBQrrvyh1KB0aRfjrG_%iQWY%E)3kcw64a}>+v$#Q!e7_sco>rk726)< zka}MAFZ~?lxM{lO;sx5wU`mvsPTg9ATvr)5@#Hs{W&WYgP zf-{mSG@wWUa5ym!ZYRMbY2PYo^eNG+);8MhlH&3on)j_T!%CU4<_Yr=5maXtNd9po zD8Vyz*UE?GAv4@DpID9xWzuE}Vk*Bx4j9WdtWuA|Lr0>K9gUv}JzuZS#t%4M%vi{F zv3(_(EjyX}*o6K`OPVshU+-XHt7yg*M8FzOkwbJ0@H_Qo$hDID+GG6G z&%T8s=LDK)s+`OX2<4)=W}943k!aqNlb!JT{$T};{^XGXi5Og8rmvZWE#_e+yBNPr zTib5x*xGH@qqad)!PZ-{IWEqKl6zVc zBB|tq36ayG*s^{hp;Ghv!A29od+{ooJSrGTHsiLsxb1T9+P&PU%00mSr6{r>_!VOp z>ZtA)f(n*ce9+W(;T$S87wPc4g>Cb^y?+QSoFs=_LbiGjFnk;6^!>HU7&{;@;hmk& z050Taw>3Gsqh_Z$>h-yB()Z~`PTnhL_~1-C>*&`fVAHx_$5zbUnpi+H{LG#^Q7g)ZCJ>g0(+-o! zkuVn(DP|*A3Q=qBuc7t53wcO{BR|$AI=oM4`A5D!W6CsjSq*!J9ri}7gbQx(T7dYP zzB+|G+Zcyl1CEM2K9tb54b>dFKz)Ffn%@y4tWWaNa6+tiOLs_mF!T#+1m*!Zka1#d zJJrs>7$NlM$s{vXn&vzftr>xSJ{HCkmx?t*n{S6{x|;lgGUXTMYIcuoKq@2E*(h(9 zwN;ayu}RR=pL!oTd=0OFT{p9_1}aRh#hr9{3gv-PD{7(s-zhgf!9 z%5OY7&>HpoVMabc{t{e3rJM4^0J=P9R`*ZuUVyT#m4&MZjlXKu>VuD$=M`gl=IOh% zKVx9nXun05ex2N-%8kV>iBi@R7M92Pt&3{8`4wJ`n3X}e`vaf!rYgF+3T23kC+I{%)>1)YPnVy3)}q8vVfFhzI@ib#KcHjxxn1CYvr=CC5PWZk4iq zJY~P`BS zh->jYKLt~cG_t|>=`EZ`+rnHkfkwTRT;GyR%hBTqyCGZYV9V2gJJuKt9InLeVs z*!!sluTi@_aq@zcdeLD+XtFq*%!D%~`-^pKS<9uqQJ?1>TDM2Y1l3U>6+e_QyD?9W zbHYZ+9(n{Tft;P>PmAI9%WdsuZ6=AGth#yZV3X9Y{-|FBSI`y9asdCbAnxpqR?2g~ z$$$e{9?!bVq`Ed^st$LI_hQ{*bYtVNRhhcWhM`)*-y7{kdaJbtz3p)x*vMr|)*INQMo(hWvkYp5TP4csXJ(c+r)OrMq|D~UvS|KD#ZIjwPS4yh zp2J4QjO^c)sHvh>tnXlbTRB?J{EXplJtMoU&{q=8m=PKDXDP>z8U+%QG%D}f)GLLq zJ?{uQk~ml@^S$sY*8tG}*gYE4XrUZglCx-`kdv?Lp)qT_c8|nR)ZBB(F!Vs6w{J4M zJ`_Cw1u}g5fL>QX;vg7k=zq?)1NQBNLxh8ag@r>#K!8U?MMgzMdE5U94ILX34IK*| z1qBly6AK3y4-XF&gMbhpmk=8l5BHy!K*78{0}F=)2Zw}lkgzt>;8Kv;;N z0Vog*)Q84ZGpHsgU>Sr80|kP{g24ibfSy$S>v> ze24*$1h9gKJZ~l#+kVv-(seh|t&ZfGQh#eD(|HAr9h5y z4LfNWpRz@F&a!#e`p*cx<=f8(ilNH`CXJ|A^#%Ue9NB6CNygm+yEHi0PK$U`Rvo+z zQ)l@#URFPBlkii-D3-hEjV8Q;6J9|pw6~+2uOMQS(&n9{0Zrp59{a7t@4M!xg$NZm zLPC*lURm$*3yTNOAm|kxAwq9I$kK*iNSIVb_VSE2VWp-2{l3H!Nm2KMIPYeIL;Sev zyfBv_cda-8pHyXOV3um4^J8G15h3;G&cUXiMqnnq=b{WoU%BwM=CAKF$6EQH4u9;$ zIMIcAuiW17olKEI)Vc?UwXLF@5St`DuH6M_0do9EUv3Mv! zq!aS3Br7-7;Q;4>P3*7GZf z^S=vRx8N{u`mJYzUeA2sx$--whpkO{UMlZ;4?9t!49Jh&E!$sMDa4z4@`+efUyU+f4y6J#@oWFbXD7Y8$Z9G2Y+2=VMpc1 z?>hz}Jeef|+x-bg$=+^3jp?a-sqQ;=LHolNk{a6e&vikuHy{n_3vFOj?p)E|$}9gK zEZ&l-Q}3T-!$kr|x10VELk%xdeg#d*tIp;Qh^-f_#8G&fue^H&rEI4H;an>|_P>wy zMdnO{ukY=Y`X9AUuc)IzDLl}hS@!Wr4*X|n#w+Seb(u?e`iEz<`q&A0u;7BVOqmFY zzDv^u7E!tn?KgC8aRII!mjvA}1ee~65qjKV-H0Hg>Q-KwRBt<;WmMztOaer_CD~;G zqp&1ogz*iSt1^8HiBs@-^us%*hDPnIh|aOg-TV9<3z}BZ9l5G1#wN2?3YoeclwF7a$4m3h`De)D`Eai>=h zVsII>**Q}=K%Nv=ZGdrEGDIF*%Lm$w$_`+Qm5%}=$#e>XnicY3T6;U1280zoZzuRB z*raX;h|A&bST4zL6)9f;k@mOVxccnNd+}$+8cdSx9fQY3+P7&E8?~`%F?+u=_@$=i z{$kfZ@3L*LLqK#{Crfnj#6VsI@&QY%lGfi;(q;o_AtPVCCsoA5Ac@NS6^FIOSMd&O zlW5xCAO#`}9pez+YRh{-SxEvjpmEzl{^HHj48#B?+ad>7Pa6+eY?aY%?o&U&Q7O3b zevjV?A^naBg`}QeOO64YHm8DMk@HktJw`T|KX1Y-}QK zfSY$V8Jn)qVpKV27J#^Jh3(BV+a}7%pI;uZe3+`8WRkFzN9<2Xeu@Vh!x0B{__F-J z*j=Rh;(v3yz<)F0zq#G_qBE4)Tc}*Z%3^Z^Jr8V{Y<`+*H$m!8BOK7J@pQh73!&LS{f}dlNvGLy+ zsmIex9xlu_C;Y+Papt5?W08B2V5*;qeY9DR$d4lMkJZKtgLt0%he3rj@OecbhwkbF zJ8>i*k&;8k1|1$x_ft3qPQMR88v;06p`;09eMI_W{rPibr#sG2*|FKZR*#aVbDF-v zt_7H(ug6lc)zuf+_Y8gu#nxW0H7^3Ab^QEDaiuE7wsGfx$p-~?vvq=?Wu}xY2(x!I zH#V_bDB#9^DB_KQPstrp{9>5&-t%Gu0R)-J<*ta%b@`Q%l2c~omZTli&Rl>xNo!Y*wY_%%30IyhsBmJJHB^p>dA_aIH(lD z$X9Q>s3?1%Dd2-F?p!Sx_-qUZ+Z393dFMUAy#qs}d$U#IHXG}+eE>ABK+sF&+I++n z>Y8I=!JKlGAL7K~pa!JLN6IxwMIvt8Q;!yVlfk$W3!KPLdyI{+-akR#MZ&T?8;jjb z9w#ohK1}bWE?|z?lC)XXe_<1GBH2rDn)euxaBAV`2)~d#uCpnHpr^J?d%MB)IW70^ zq`|gE@d5739;jtme85)}L%&#gX~H2xQ9`1>-U6lEt-0Oa(@gNnub@_qwFo|?353F} znYvOWiT<$~jN7)--2c)W))C$WQESIK|&oOX1^ zft@nX#vf_waU1=$+D9ist=2Un#`Upkl8~Q%NnA*=FJTXZiCor_}=R_!aNv zhLt@59joir8DLOLR$Mgx#Cy-9bU{{3Q|u)Y@~48f(}dyj%wH3m-SOTKX>#m0FHzT) zFpf9Uxybidka@e;R+$KP-a_1?o|5Szj*0>QTD}Dhoc)qmEWfnI-v+IV`f*Qi*9OwP z!B$~UCkjL`uIA)B*y&L z+6Zf9!iZcw`$67L5 zF<_!E0%nrrx~vQiL!3|-1i@!FNNp^+L#ZLBoQDgs>Akz@UhY3CnF<-HM`9D9E84ss8f-l~C;pr->;8 z@NMpA5%fMu+e9#ySP6U%XW+18Pl~oo?1^(HG>+98!AdHNQoS)lJDt1Jwpxk4m$y-T zXUC8DWkh!174#tvgDN&6!WLyF{G05F=)%bW&=w&%`k~k#h3+JA$aHP7;7zY+C-Ae` zj%ixpTG*CnJa(*tn=7WIEt-(ZA;OgcHt8r^bfQl)Z1z=Sp=?Bldi^u`9in)*%c<(b zwHZm$Y@@g&$oNAhd6PaBX_9?9K!82Cx94~2!WAcJ35w|gN52=FIA#2!*7Nno=efGvF4Fb;%anH`^QdRm&xp(}PgV6dLEMJfcX;o3d7U{5P|{I~ z37q94I`EIU>E`9*D?p&-OjcS$+5wYr#xupB9sX2X*5HLfnIO`{D3;V}^a5ES_SO-e&k z+5|tG`MiSs3IT}K2$u{ODYsdm%Cw*W-1^XGhjmN)qA1?o)e=nq%NXvgFFKj-S1ZrL z%NzR5O)YbRjJC5fRNL!%wo7;#t01T}Tc|_V8||~ghWh1OTl50R1k{QswV2w=7UbK|pobiF%gC&G&)iqio2>);}J$nHKrCL5t$+lLwt+nRyp=Tds= zgkqwvQpi$-g6UW~fTQkt`_lrZ6H8CO}-&YMOLjD z_j8lED_bkyC$$g2>LmeevI~cu9}Hq3yXVR*xDX2zCvC`tf?W%I9-fn&c-CIUO1Cw575IB(?eP9zKxMe+lt1^_kQT_H-#d1N8G{0qx#Aknp zG8-UH@a5W9_C~sJ23P&=(IM&?BaZGp(o!D1JW?$tQk~Xbt{5F{&ewGn=G@!?Yj_<# zJ>Ci>Yd`0-v3a<7IGDTG)^H3=aVJNnlVTcY z`sxFfQJch|`w-}4QfeOMbW=(73JNCLJ_8;u44%$jLFG~qx=8|H@=e~|0G=(6Uq(k> zK?i5sokROCGh=V$#^x(%?3&J(U&;!F>0MTc(Hgxg_+FBlzN%t88)-4l4>kRUvHn6WGSFy})N8*a6VOFr7-KLivH7bRfLG2edE zD+tFMI4{2~CcWv1Em-T{YYMwknOz;<8<>vCrn}Y3qO~qz zW4SO@7lou-W~q}vk+t@05A>Pm%Wy|;16=YQ=v7Z7jwyZ26nf>=xK;1rH=5Hy2tsVS zfKs;s0LcJi7jOR9G`xa_4IZMK?lU&glxR~AdS2eOWT=^4QB8Sw)p#t8Vy6ryh#gey zEU9eYXPqD4eTk5HYdO@Rc`I|n2;@| zyT_0%j;F^jz3B2{!t?e>zT-Tz?Ie43+1*gb;J5CugJX4#6iI$cXg5lX{n9nzp?g;q zFS!T=35fA=BA(f}M|o3*-Fgh_T;p?3G>}SuTd3npt{JoB{g#hU!9jV)UgKY_Lg5tK z1%tQv3i=O^0RNjwuDGTlnhlo}=k?7Q-Jxq-!xn9oHEH(wVI&Q+O^srocJL3OTju(;UkVG3(0(BCVLDzDxQBgkAo)v7f z{X@l;D&E03fsYT?p)T5fpz=wCc_rl)v?(23xlepq34zO8`4^}`zWo~yxVAAN%nM2T z;Yut@{)s~w+7F=J#+Ay16Gr zFO#FdKXoZ#RPKw6be`jTMMJox{6!Ah?_Qv@E^_|`Zi}JA=T3b!HGj|dqUJAq{N2NQ zS~<*N;#R7_*7p2RTxUJxn0k^U9r#tjzZ-3J=1+pv49Z3V%lV=E62vl%Hyaq3bVGbs(-h)W(iUO^JW07gdj`|fkMJ~Hy^ zsT&1$1v#1OIQsH(JEn+IVoAp!TdIrMRcv%oiU-^Bb8!E=Ap87=U{gcNVPn$eYHvSw znMc5UHoo8NZ7_)3kG^=HzJkuFMF#4kbQ0^!<$#q@NNAE#3T?gsBETJ4e|Lq|0dQ~r3L*j~Gdf>j3;&sZ zP~T=&mWQjxw?CTzyx}wOyV(Hp4AFSNdCq-n>dDIPw_$TF_2120j)Czqor!uuQ*WBT zth-h|zxs=?ZRKl;<~fv5jeF(WQ(IXO(^rtN>l^rDXYJ@=^iBq3(oI1;wob(^*AdHY zD!1Y!aY{)LNI_-Cd%sOAh6f57d^8?_ce7(0M2ZZm*PZ4s=5v{huOL11mp@tKjOYyz zO|fw^FpTwEA*p7uZd?|Nt033H1;w^6W}>3dZhe>RH_`RD_Zo{oxwy)XImUJ+;?jM1 ze=@e-WKnMeweIgCqb^;CJsw(z14=>?Cw=gao!<-LqM$QXbH`$Ia=}1Oq8QNl_?!3l ztvA-$3gLmEmwHa=Z|#$rwffVRZYpKGB!QoJp$g$NC`}Kl5j;67n(qqiytTM?RYJZq zN;J6gxUO4D6Qx&fY}%Moku|Gm8(AtwQ7%iYjA;hHOW-3b3}gATvU#;lV9@9uf?|ni z4eI^q$o-rZ+ja2ftK|m&p6mgvw&8w!Y!kbvV{8#qlSOUb@}0ag@jr^PS}unO6zf}E z>Wjn9ijy`qTHsr`#_DD~i6d;@~^P{nnbs9ZhwwnpU& z&%ezm-`q^HvPdzR-ujYM^bl#OET}k27Y0jw$pO~}7A6lA7KRT-i!NTYy&@>f?ixM(8SNQu zNf*P6L+WCE@q0y+W8i!b0ryNo;2X-QiYc2(bGkf(C!Hz#T#F z@ErgFplhzH2VWGcxV3vu4z3(tPkvu6 z?~9RPFRKcO9c)S;$t~E@b?eT;5p~$ofd(2hu)a4L!hD7o_6FP+xX!imt-3Z$<8>d5@E52y<+ zffH9Rr_ZB5u<=j$Nk7#GF*h z^Cp#XV)XLfw0bXO=NU&Vm$cw*L>UuJMJ+bB_)rGhqdc^H4 zr!rJCKYxtWmbObux#oDWOgFsb+O^! z7h+WIFG-9T-KNi%?odv_YQ>aMx6V932$ZwiVg@Vd-QGT4Dd75QdU#7-)rCrHVI0^8=e9>3(l;YHnQTTC z<^L^sY%Zbipr4QFWce)?;zQSUk!|_A1{9@|b9_na_x4q_HGdO?7JwKUM(Mlhi zZMwf7@r9iFkKd%rllN8C_P~QHFH}qgo;^FOG$>;*-dMZl0(IfCAgjVXS}qC4QE<1q zp&2J*1n+hTiSQ$LFaz=c1f346vtYb@Y0!N*8`h(i9?d%fH2@O?Vi6n4t#tG$**;!A zVc|!nbl$&_KI%T2<*TrLInkZ7#rLv`#yV0GiyBxUW;BzL4sBSl0AQL0n&2FUVG6mD z&F!`YetduS8W7zy!t$tl!c(Va*MX)WM6FOnocMHT;lK9nv7z3hSOM?l$8*-VoB;4` zVoWPcch@1Ji0%P}`nAZ#jm0a-01G(muVkODQvkj+HAWrubLy5d;PHfqa6~Q}_pZ zJA}qf+|#n*Ov+LWExaRD0-~^Pcq2d$R~}$q?l;SfJ^a-je~m&-B65KA6O3PP5vf(h zg~8*A$eqcHg{FMBYBdGjvCO8ra|f&*I5K^p`23{z>f5Cbpmp!{IqKVXjJ8LMw`r=vCrQ-{36*%d9ejhr?>X=Ou<17)k zU*X424(OZ&AiZH5#SpCtMw~q>WcM9QI9DXCqK)t!Ethn&@=?IM@6;#`*-w=7Hu}q4#ozk9 zmJ9eSuDT=pRQYe~pEp-Nc$f&9yAu7}l7*baPsQg@1O2yI+kwn2CmOf>+5RK)g~JV~ z)I(}?s3w?YE78i?r`!G+iIIcLe{2pma5JqfpEi9i8b0x<|Lq8ADn3b`h7fz@t`D-` z$1Ar-8h^dOPP?G=rrZHT;xma~EULs_7+suJdi&)~wk2~Nx zwafl7U^&Q98VmhKxgiCeIn z>_#=iTotwGjwhP^i5-;MX*X7$qvrNJ2uT`}Aqg#5-T~s{4EUZN2$4_f%mv93t$JU$ zTNK1(>Z37_oHP>|VzgIe%hE)tYGbe1h;@Deeo#FzqS}isixD<1EVUK!Elkk5$Ml(! zo?p{kwlQ$D?!np%f46m@@}zEuDL5WBEc~&SRh9Fsz3F-}d*|H9oHo+?vbWo-ZYsd2ywT%N@eS9l}-ZWu>6KmE)C` zNKs@?X-OxX77e;sNtw|`LtkUQUJ^tyvO!9GOLCX-;+^5|7{-xq`_KdtC7l2|;p?BX zzpoDvA5s$MrfzMc<^>Rg3Q7A_&!uiFsv2`OW>4$(LUTfOe(tVovRXsZ?Gg5^$SW9} zG+k)V3>0h%H_dq~O!g~&i`kLBHT5?(omF?PweQza3TA4;T3^I>#hk9Nc?k#kCM2|K zF6OU)@V3si(W3RV4B{$TTAu)vHlxt}1SF)rP%Xx*7KbmUXQB1u!dtnf)$Z~bB#4Eh zae}ATfpLU9lrXTU4)F~wB(gHfqQWtQXD@@zkP|D9#7p|a#?<+RMA$nAi*hHEe<_zQFS{;x%0U&nSp27EotH?k-gOe#ztTkLCC5o8tiV2 zMA|nL-lZnpMSg!O@Fy;+(JA^(U%cQv-Di9tB)GgE=Dc~rYY1V!LgGOQ{}IPK0l30b zeuX%KMdoK0uM#Wg+s4_dq+%LIoQeQ%A;xR zEkwc#`9w)?JeOe{zok*8twKxZmo+e4t1bVD|JOAoTLDd$L^T=Ya6na*JSP4t!gEym zFZ_ECopGJ-=+0#lg1>u%)wRAl{y^kTCQeqBH#3jpN||} zgVKGQZJ~Zm>@06g3}j)9G|Y#!Gq{fM0pO(KA80-4smL`w^w&HClf8UALN-mr2tgmy zmZaz%K}DG#!a~X56kErl04g!p;+(dv>JWp=@8QNK2twBC6sp6*9Xs^bX=y}|o#CD0 zl22ztjxnWlvYSu+G6ab2!hNqcU5-<*`5C1!y&*rs-kr%c+bkjF*gvp;BP( zrpFKK1@ZbwX=>QDwI><3f6zU3`oVkP%Ac9wyS#PtPPpkPhM49_j+i0_;e#}NS-?EU zN|ka&<4kTRfVX%xiy8cQlEeN3zt_}kr8>gr{3sYX$A2JZvb$~w2H#&{|4lbU)Y^dw z*E06Ib%b)ASsJ?NYKSh^|GL-8G1k4ExX$0!{GhTgwB@V`hjt?S3}5A^HPx>7qBUdDYR-5#1Fi*`GGRzAEd^W-8 zQW^K|-IwoLj#ZSOvjQxhsw%58Di^<%ToyN#+OGJ@m3cuBg1(#YU+pibn^LLn03_q& zYbY=+nXE+_{`*|?gS?l42bp1|!xu6Z!J%T`1^@lj`;7WuvcK_&@p}#f8jiuG^W{XM zdN*pRbOvTRglD?(2}#Mvg+4ETC&bRmm~g6&NBq z(OZ2YJRCb3jU#1AuUov~b4B_CR7U*VI?w8&q}0ADBov&#ueDw&>Ivm0V6=arL~U#4 z4^!fKUL^3oYLdA;rQ5uW^ZLCdztr#J<-!&B4G+o)6+Vb=yR7 z8sk09jTr|zP8pa&1YAf8P`~&7>Vc=_?6o8F7XlTw6Qk!fg7-`_nJOq3~B&sciBpLkz;}p2iO!m zc4Y45F0Y@AV+*vIsC;pyr!!es>e#c&ZtRg05umf_KEJ}cqvbJ-6neR<1eqsjD8E}w zLs6FG3fz*uQstnV$B*MabBE6>2J`lLH@o-ndy%;bp$WoDeGOmp`fX`se2KbuSD--A z&^uvL9?xPtI>HA0Vfe~53;lr?6r(AcFN%8W`c@(uu7Jq8QWfEe;Y3to00pK@2=@Ms zD+F8D+?jHrIqNpU_vK@Tp@7oc+dA-q$TeYRF9}@rUie8+pvE?Bq`Uwd|PE z1>dwOMhe^ZBg&7`Tr@=`G98cY<|@f8HYwnyr^4dW;QJK7PQtWS-}lk4%gc=H==IG8 z?$LX@(SAs((Lx}FPKMTDEh)Ph%+YQQGXffnrs#g1&Pe_V-_bQvKoO52)Z|*rSwE#!8a8932|FvGwj&HM3@=~F* z?!d44-7Qv_U981vsHoA@?&z|sx{>(E`Zl>(E1B{~Tp|Tk{?haB=|#gg>${&GK6o?} z&`Iw}udon7n(g8k~`Fo{qS`ICa~Cd}8oUPQw-_V3!Q-SOjH{d13~V^m@X zC1Cn8%^=4QgyX5%Ffe-NOSG7tJoPb@3Lqw5j{s#rt zD|^{x9G9!oXADsCkyJ_qdO!;Fr@-^rC-vijOCX|N3D2s=RtKhL?dZYo#o;4vM2n`m zT^Lm7r9;8|L*?&%>w)ESo2J0 z`0hxDlw!fKuaj`m{qv6YZo^zrGNsGc6j2sM8LY@}W-4hVhvTWQxhl`F+?N~!Ho zxeM{pw!bGYhWGsKFOwyrOu~#Q3U`iqu*{d+w*RI)@;P5$Qo1y} z0;l9Zl)2mT3h&IFL!9C>2JcPH*4VB;Vl{3?-!y?gWJbs5e#q+!5cr9UvJX&~#0XL8Z-ecvtNkxUtLRa+?8|6V+eW~CL(U>uAv`RaH5g}?+q^9LH6g1wiN z^t=GKld2%0d6;<=UZU;eris)~n9rhr6)yn5bTM$o&l@c0GKsj|blGpuL9=I^xFNEG z`6YUs)?>QUi2k8&6Mw(Jk;qS4Yd7f~vl6?|d4!&=7kxAw#VaFQbf^lE0Q(=P?l-M% zc{K;w3ie8?(z#{FJYo2&*Bay!;Ch#AXYpFy;g_Fb4tL{nr!rQNrovOrjchS8H4*`K zP1np912`Xp?5ZT=w^!qtod6oaZj6je^o%fSPmlI|4a*#+2UBah9Ddizv;%~H%&dwxrYpY%X^T%v{XQcp%j?s=7 z#i-IakxL{L`K8pla&Ds6lMuDn`EvM5y&&$wr?C-g$QBr*qMH5Ptv^R6T8m_)?Q)jx z3|Q9b z*&1(V{uNJao-U-`qvv5a{kR?g5RQCMcONuZ1XRbhTCB5rsK{yeX^F*iQd_1-u2Kit zUWG3=LoaU2>1AmDg4u?dDbZ>IBbJ~%va1&b%O~6Uuc8MGXgYo6Y=zOK==x`a7z+zK zg+tY~>R3-ff^YPMJV*^?zE-coyY{*yvaw30j1V68nXitsgNf0Gs|HAy*^creAvi0x zDui}%b0?4QZMQ%tdKGQ1%Qm0B#iUZD^UuobYo<`umdY3=l{@H!^Z8&s0ltWMwvYXL z+e#B|=?LS(KRk^0iOdu#vW34EbP*D>Q~e@TbT&cOfqD1r(UG$@Ik?#!F1v6>-Sp>g zN!WrY_D7*E_ChPQ0vTYhEg`YObyMhd!Q~NIc`3m^-hgEQA=D(JznX&QD^eZ(IqXfg za}3lbPBcVbO1UGRVsgAOB-R?4e>3zm4u5EmDzw`gPUIuVd5z`N#@3PaRkpdgDMH&y z)XYrVtdFf*$0N|8zy0-Cl>P@ByUJUR>Po{y{(?}}EhI`5$^kE7mA_1~6@*ne$js=) zwy}1YX_ROK7$7a2;bHooEeE@7l4(wVly>^Ujq%zR?~=}i&RH`Uzp*fQD_&w0|CvMf zG4A+x+P7?8lUcX49tls(yieTxbgl=+P4;U@iL6tylS(n!Tw3QVdmQ|qY^f6D&4V=% zl=GX`Jf*0^dUg)8e`v(hcg5150D{|cni`+&MNFshCeuD|>TBzsUb!5L>6a!Q)a)==Ge%lqy zazpo@nPIdD98(j;t)X|HOA!OLt=PUO;cfd@3CxOc1z_eY+xf`QXd86mX6KWLhN^3f z18IKw3tN->Lq1S%bq}}-3TvX+=8+O=Ql-hoy zAc5OdstMXl{9HxZC%>AxudJvP(6IH8L)h+FqBF($HzffvI*A~3guZ=nC{jrZ6u@b_ zeK6F80ocCtO}nkWDDa*+RGUP;{wkCrFoJ?%RRv7Uwf5L;G6C{vyYf3^N-8mu8#s(6<$e$-`19vN1Sbb3=wg7Qm_%DfMJJB7je3E zU)lavx%q1|-5wR!hsl7m0Lm@`#1V5lxkc2~8k}3W1H+F7_$JykNUZxx6il;aIX}PX z?V|a-<4iPk{2dDSlh%C*H8{kWc7?ct5=<&k2HP%hO4y z^Do`O+A?%czV2V5*nTY)Z{EhGt;T#SeEf2mvZ-gae;Io$kRbV|9I#2+t+8fUNgqd%1f19 z0wL5ww!BuD(fX^nfIsPW@vIO!bs)u_5Ny-{d(5bqk1?h-0A?=N?3xI8E#Xf zix{Q2Q}@RL@~xXkO(M$|>PJJRX1Qp4$9~+KywMSoe!9LU7zu;O?dm6FvgfVM=Z}l( zTK*25B+au)GU;y`v2H~g+*VI>J4$`cLd052Ypcfw0*mS1fPpYmpXzrGZ~o=D!yygO zo1|@!X7sr9V8YdB1{)f&w<7ycqrgBRMRZUA&Mnzs7Ps>IVuyfP0 zg16P>P{jL8PW?J~bm-MzW^ruj_2#bH5H-dj(VeP#e;vXaMO7gYzn|*I5iZ-;zX>6n z)iX!3h$PklRG2ngzcItmwmuk{)x5gu3x^1>+Jhaj8pEA(Fx;BWv>)_0Y`D%4rx*|r zWY$F9Ag%_({v7id|Kbb&B}g$mpAE@N@+Y~MIU$@Kat0msEq@ynxR6U}zZ+=lCkVj) z47^H-DK%dD5fIi;9{WHA{=8`bX~oj8FO~OYdRBqymDGEm88=Yw6|ia9ENLx5mW>>Z^oW~S@cit!F*d{CE zIemc-!Uw4xF59dIf4L3nl=hwrY#yaWi)7OQuOk84%Y6G9nJb55kt(L|j+LvJ%^!L$ zzf~dfK9qKCV?2J1I?utSiQ@JAf_b9xJ3JIt1QU#pzwY0TXn_OCzqg&C zva%$ly`nnC+hz8|D*G14V$uTxHRP;D^_JjA%@IqL-q?CRYLpIOB&P(?@IVa#jqI0FRq6}Qak$1Yym1-*c zi$PH4W)iXPpbN*3vHo&maT!njYO;W!QPr$z{?l@U+?k^3*$uEMHNTj7qH4`mtE;=d z>i6}hK3+^DC%qo53AY*c?#CfOIM!ck=hyN%k(KF7&h}JSP4&mCqS*fQ;$Ir{DWIDM z!}as=;~*H3N?hg0!kNTBm0LpLDx(SG?s)SI)F1 zW%OVMXZ4#Ogc|<7e!6r>zG8IOI=dqyw)$c;-MN}2`+>?LQ3gss!bz+QaPV#PSxD_` zqmMW}K*#BQfAqg|6g>{E@?*YqpNbL?-pTrW{Ja^KysD4m^ZnOHU8@V|pXi zP!hzKGDttCUz1_zJi7J)>2?V$A(`b1r$Dbt`MgM-Vit+T~m zeX@S%HLrvIcaHbDR^J9gjJ3isD`c!%@>0m)mL`*Bawcaz`2I0V0Ou3NaNnu_SFGiK zprf>s)SVOc$}v??OFPL$P42kN<~S302SS+O8Cmoo@^-j`$_R%mD>;Pkm9nIIimi9(GMtpG8*=O zAVIwMH1s1w&4s5px=KW;Wts9cV<$HoiJQTLSiOL;PO{dSEdKa@B( zAWFOc^wCwS!Wh=Bd=xesao0?@>RE{9lmL00H$Ddh*!-PC#B-+9tuw!1%{diio_&nc zo{D*-%#m0<_7h3eAZiIr-oksPeRy`L902L} z*_%rYV|2>5UZ26I;FDuwAR1OJJt}$6sT3i7fFCJ;8 zmSw5g>M7ftS|$m`YE3fg$~l=AfCy5iaRGt(`lw9N;q`{Xc18xkEM3!@klXscDCI@v zYn(Ma)&7AdbXgBUD?4S(9DSKW%RbFBpDI8Ls^z5_|Kv!6{`<_U{ckB} zN88e)X+z4s4?9n>foW$6QV0V~(zS}( z;--}h=pa(Ju3ZM;Bm_8=Q-9UmiP77%E;FtyS*o1aO%RbUA6){$d(h7Kkl7l| z{quE2Ui<5>68_e{RfmbX($iHrIQ89%a;dPyc+?r8lt}QjRcIWQ?f%SzmW%S+^&A5Y1Q-(r>9BM`Dfk_k0WI}H_g!#mBExhAQ>C=#Qq_(%! z^UnYEJNFXkw&*h(3Z?V>yaIL>!1^oNvwRt~W?gHbZ@6yD*U`|28?^!!XF&+m>Q>J4 zbW)F>7-90+QiiJzef31PnxN^bw6b{rJhMk=w_A+#nE@y6lz{u}Tymp?v&(TOmhk+^ zZtHJ_MVraj)ghZmMuoU|9?WoxzK$xHn5!%G?_+S6 z@AAGqYeZI)&N1(5P`bKvag{}S+R-PK+!K04<=lvLD7$AS2*l2ro?!U&8!h9mS)CV! zrqltSm@j)`xCQo}#Is*C5^8m<5vcp@Oq5M`DKpug(W5`joD1;#lSL51%!~qs8EFE| z_q$}kuU+R%e;>_c1JUihaHTm0`*!ZPs7Tbw^zzcRuVW|`gYZbvy$)hQZN#kjyQ&{< zxgI+4=BeAcg?JZmS*}dZ&;M@!DC^}LEOOCG?tYRpf+J&{sB{_9{!8;BQ8^S_?_Cls z;l*e$DQ%L(0KKEopEZpC*{Iq12Oo+)ni7f^;9-mxzJ2+fkX?azc3t;G96Rnyaz2xj zN6nAh5tuziSACr7n7j@!kO;=W<$$q|$E^mFV{yO6hL~?rVTf2>&u@<;wFsnlaNUyaK1QGi@;3!TW z^iO!>x6R!)pLv2VP*3Bwom@m%p)$|Ff>Q#n9n!`U*ve3l7d-z?*WCheKJq%-%=1q2 zs6mjME{t+`#DITYsEZIH6_=s7A&j*C*Z}sY?ZG%oI`*W%;+IY+9x_>4kpZ66Zo$=+ zUh9hk=%|JYR0)p-T1`YNsz2~naeI~tY+HKT@t8!oQQ~KO9?FuNk*fHz9pGT~B`tiB^&vR{+G~{r=B!@Xe)E0O z$|7jYbB7(7Qu#qxo3n!+P8Dn`x+CZ<-_<@k!u+c(#;!;y=?6;){#GPe)p2C!CocyX zCam|59bR(eMm_pHiXa|RDSYU;`S~{`EQl6&b8YCZUHJ9Y(1*ipuAb`(h7l+^O4<-} zCDvs0Hoh+pth8?j=pfXo&}mQ-&5jyIE>_UImuD|uYFIV)l32l#R@)%y>O9s0Jd^aSzz z3`0zmSFP1$Cc^*ql!W#AAf58SL&0J@=ReRVQwSkP1koRV)p3lgu3wGjbvtqZbI(XF zhHa22*d4@C!w@t%z^c0lIRyT&#o88*<)|i60Nji9Dtc!$c(H85h$cm!emY;1roDa< zaIEk$AbN9{p`4>!EU2~3cyi>3WHR+NS09xf%@Gonmuho{4slgu0bS_rMgM`7~8|_wAiS=N}^Qxp(@koJaM( zTyu_Yp%JAcz64GID$POradP3pk|fe&u0%f$fp^^W=e5S%=Y@{_YHJsK4@PbVl9}J->PxY*H;% zDuqcDX4m)(jWtLZkX3fb*7JLFqFC!GfqzZRYas?{xJYwc6#=FD3&8=;g@JRab?}Yy zIN<1xLb04UQqeB|2fhiVjl|=%NBY`(gp8YaD@XjQ=sc+8kWo02BG{kN^1EGgBRBGX z8n(oDr2fY1W^yNQFhMol6;1VahQI2Ly~aT=^n^w_Ync_(A|in~r7s4{s19Rypr|qe z43@~4rw(dz*2L~L;@@-Q<|c;sPmz@734mzoZntWWDLmCj)VWapjf*_n9k1Yr}4{%j0L45U6#N-;1vt$9 z9Ig7jVWgohWBhu7vpvp!85EQ(C`}HYj@MagYK}VI;N42F2$MidloZuaWkaAWrD*3{ z)V?&x*+HP0sd_~(He7jfF6#HelHyB){vEN4!usnT1?4Jpi=|rY((Yy?zfcp^&S1Gc zu9ZhBjjUt}Af^L6Pjq6pmNcy*Z2SOy((&w7UN1zimwVHVQ+j2=ldv#CF4 zcL8pciLv)7RTZe$4IdwiU4o{ZZI&~Hex+<~3!Y>~x6t}VJv+b>R);4PsRc-}t-U)Ado#l5n=Ds!7Sy0>0N!0azp`}C5T7x+4 zvKoNiu6nMJvcPPTnd-ch`TAKRdm^Rpb?kL@KP-uNQ>cAkhdO+K*Du0Qz;p=29}tia zaWEgJ)Lq(kr#!nQG6w?;_Oi)uH)5D-L}4;d&nB%T@w8S|6ibV)?1OiVD z+LEYBwCm&UC@5m>$_U>prdt~SVOht}lHNk&DW40NWfOBNBj)RTF+`jWZ{naBhGpWj1~djG4f?vHKD1Z#0oROnUQ+kdm+>*}Dn(Wr=|WAY3k5DIH8LDd-!&{ixc%S3*}V-ANtetc^80rp zfcfPP?;i+HgJbjiJL(KXa{b)blhTOyL%;h`MQAd|MPp_EGX<2W}MaRqUGwy7|k$US{8F49arGDC>$=Z`|-aHF-@wOu2>4&PMy zR{#DHZ1j97+&iS4%h*vTQL-;oU>FQ2@Ty?_^XmC+eK7=o-HhVDgbcK?5 zN`5;JJ?SG|-fwc?v=<`EVbiW}Y_X=YE+!@UIcmn+Lm)sB6GF?|b2spD3atP|F*+|N z3!FMWP0dF6^t}a?iT-2vpzPqs((m`_vXa&=vKzelPu1-Za`KvGPT?<5wfAvgdlOYU zsm5Krt?BdV9Obr%JW#Gey2TzgVeIv=-LcV8p$(Xel^k7B=a($+wlzsoNq7H2@+L59 z;pGMY!8v?J&E)F<_rbI>G&#Lr{j##hSi^vEX9&0yxZ{B*sngep-{$xA<-t;|n>{~g zTzy~{HK~DB_C)zY3(pG(5S#5nsVlLTE6S?KK;nE~{`&b_JVOR#y3y#{uUF`DFC0JV z^xqQ1sWeOZL*xN?6T9G(XU$q$Lc8>$oFtsP$t;?!-ks@E=eSL_Kjbe_Lzg+P0g(7y zF8G8Yojr)&A5cG5E7xH0aX9nm^B?k~ z`om$Ym`R+{W;_3a?R)x856w&huAf-wTQX-1*v7>GHXm9(32IADX(y4Z(8mfsDZ{M|8o5R zLUx`q;w9D`ueA~;q-vGYdR4&YeF8(=_4&)SZPNUTGr@{ziXsOgvk3yiR|RA|S4vPK z$UD&@GGi%JKW~>n&~?POIAqICkl8U2z?R(W=<{yZ!F=bwKSUdg(6RYz(od$`EGf>Y z8zuw@6sYhR61Xchz0|u}?IU}oO7F3H_>$_!zpDyA+AlSUK*hZ=#?8Z1o%AY-KB%>A z#c(HRD%TlPmUQ+`+u&UPg(~RUl=oY&{+T-5m)4rAO&IYe7OWMEawDAcm(`SNV`$Ph|z{ zoXdpcsal287~@r1MTF~T=T#B~KS;HV5Pt??kVWm~{(;~wA=kbe&`bFu2TbciO6?v< zwXbh@FDU$sRPqlLgnR-4)N|Ng;>(37nGR}+3cecug3Ic1&zdn}xxo?uciC*a0lctr zMcA!BX2`?jf2|B2R@BN}#~~}cEvT$r3)1L|KhKeXy)2f zCCaIEWx&$DjADBH6L^NMoH#p!2UGHrqa&{0cB@RSS=@(SjUQ*g{-bT)r@ z!|4A5-H#SjN$nlgvIL(~5FmJ00WuO#LyRvoLtu4A%>ly|t(L}6JGkvmN*8kn$+-{= z=$M0~m3n47hlAPITBf~LYD6;2pOVr>-mtDw&ik$ZK*4AWy!7s^Nn@|BS{9#Y6 z*TVi67xVMFjyf`Xae|0z1B47+)<%llew8oa>zJ!4*P*HyA%gi?p1<`Z01L=P{Zl6F z$?)4ySJSu0!3~_Q6x$%~u=*D91j*P>!HyeHIX~mccbSbZR!TPBt29D=4I7!qqYH3N zTgEjRAkwvepsIuO(zEXSGR-q&2Xn{M;&=iLtlCneA$TF@L+$|bcU;QLjvL9gTU1VM zF<6PFU!nZcs^|&BBR%-U0M>1ZLcZ-sroOzT(ceqziM+kZeJ;W-PA=oD@nC_wkqbZ* zH`LbKonDw`VWPfSMUoW%T;Id5-&{@MZ5obemrVA^bl&X=N87*yrAJg>1VbwkMAmTv zu{o*LUm*2xP&X)$;$-sM7%))yV>76YLFS&VrHmDRyYyb`|kHNYGMM@j)vYA(VTop1-tW z&h@svL1^$cXMWm{d2=bkcaw$(B&Hg-oiMIykIM%uP2uhe$a3QG`d0yU{A-cfx>)A> zpReze`y+l+J_#L;IrzQAc?=4w%C!+`);vc2DG-@pTwe!rrRWgoDW$&nxp>-xKJtO_ zA>rK*LG>4K|8nvRa#{dPl-^DW@OHQ>Z4o2AHQlG^MXEJdo8g=ikrkX1!php%(v_A@ zNvPpGF3OqA@cRP33*YfI0{FUWY8nE)#QK|lCk){yhjyjEskNp2Lf?kJ_~_||(~$T0 z`#`4yV+_c((NXg?qp|%5`iwff{dI?z?85BNBoCRMck*6VSb=xB!HUGz>dEI6D8YmK zj($;@tj3SS6*VqsI5N9uR8UDJC2L;D-9D4?HV-E;Q02~;0&OvD50 zEGc2Oj8J~ybl+9PfCXlLxQ5JCdfyl29dFRh-4Ow{S={2Sv%UHeI1WrGwTd$OmA!VV z8y2UmgLoutd}r`7`fQUp*OZM3YI#v>38d^j^qIKb1OnOZJEc;Z+rp1;U6Awd4Diy2 zw!~x!D@ruQ?wOfc!e!iuLLA2baL>^rpv;^6o) zx7?~KR0Eb@+wKh{IRaqpmX1lu+e6nidX2lFa+_Ptp$U5Uy#trowHVkLRaD z%peoSZj5s%IF5adP~TE1Nx7v1xumZ=_E>nw6nBjzLgXJP-g2jbLz|OlMT@DYt}arY zF*KchPg07`ex4lK3WT+hp$G~7f1q82R9VP5XLBy2gn9BSZmS1T z$CR^+k$i9{G>`f5wj}$clWV4C=JAR~xHB8=l3PQqEo|JN#1+-T;eu}sG=57=6@e2P z5KgvEn(SLf)3(woU5zKnR*bX*;DcrtCzOt+vgyW$76Or< zt!WA@_@xi#1U$SDPB5H(>P$-q_g1LD*p<81c!vM@2=?e+?Ew+r9a3Jtdd}*JeYBKh8J zG{tCAcl3W#ES}Cs^d`}y>oV7WSd-=71#c4X1oF|*XE)EjN>F*l>o>**-~S=bJZQ^w z9+Eu-@c1RY1+PSq{|m@w1j~gsr1wg~#MUy8-9ZKiYAVkM%$Pdkv?uB+QQ6f@)N`lw z8*#BuqVwcO-CFx0+#J8!=|-0m^@jPXu*NpgrV%#I1+%eFZ{>{7nPw=4NhI=f$Y8^2 zX#K>1I|cbSHND;XD`0CBi{-V}G+DChL_`B%XX$xM0!rbu*F$a(M%G`e(9C4**NdJc zysz+k64+2wK3VnjjXY_1RNHD$i7rZpI-d9a!s|)bWMIg<)!T3SjEwHlS44cGD0a%3 zSg{Asi`fc0N~j~&AB0jpu15A*<@>_S$rFbxY2!Ngqv&-w@>E7f01c#MYJ@7yH$bl( zt#WI)Fl?EFJ~XyaQ$Vbb1zy-SyX%0njWlih&0v&K39{IgmsH%5O{>pbi(8ZwVGL^o zjIE*)RfOdBl;6ONuIidqINpILJ}n^VUAnPi{b1|v@73$cZ94`65sM3{bX;X;WV{SE zoQEZHe$Z7&l#Av97+2h$@P`sLfd|7cORFxtAgF1i&(%!o)U@6XKA2HD6rokA+p9dg zH6apK_U8~=R@R5w%%Frlck*lD6$d33ty|2JF~1doV2&6^^=1*L2rft=?^FUkS0ekl zbl==1-(D6hvtVHxz0S9@qjioe&@gJm8;L-P>9+FrvR^*N9De&EW6s9`L=r~qm`7P% zYa&4snFbq3d}ua`zBsl-T~Zf+zTIern9~u3bBFA7wTCL6xF-t{8DOSxLC_8 zx=+KUqVd^$S5(ltz?m$2umMbaV-Y~(`wCN$z7Y0VWa>HN#CPhuu zju9FgW^>-H2dPc`LL1GWg2>jkVwbn>KKyQqsVtGpZHPR@?YD>D%95ct*ypEg$J|7{VzXcn7_Pc&a+1=!bs)w4`dw zPUhN!S^l^DpEYT4AOy(x@J}GouA9{&C=kd18U(@w0U(f*lN+;>i@B?^i=&h4f3Fgp zTen$d!3J%;^lbDT8q`pYdDxlY{ey)g`Z*&MNH-Q)YL6&m%|E!|pPuAqEteOgKk`-R z!0%vbMNd${^pZR#*@g*Z@ChR#t=wR;?ZeuJ-8Lc2LQRWhLAnG~00MH==u{3D-=d@z zEB`*=ci4dFm%qGU0rEX?BMnQr80bHhwPt;(6HSZc?c!|v!+kn~_iOM6`OoiIN$flW z8S(w)#y|AxqsVB`d1zQo^r$?T8w4%q$VNPthmci|GIk4|EYSAC_7+ah(`R7N?HEAz zs)U~~QZgI8}MXin^*Wndk!SPcd|f@6X?m z=h{B}KOmH%8afHKbz4^fW5&(ce9l&FPBY5F323CFxsG;0os4@a_msotb0p zztjKxfmFQQ8q7gKP;uNC>aJf2ruy`G@NoGvoCp58iCx2MyujBDSclc-RSk7yl|{E7 zoP#Rxb?)CkDcF4cn`KDh12f3G$qsfv0YAKxy)cEPYtIO?g8}12Ihsy!(#5@k@ak&u zC!*Av%n$fbzNw>`AIJcWO~(}$jDVc_e?8Z}gxAo77AYwgTYoy)VEl-b?VOG|sb{suLEg=HQutKSL zCtE5vl5@Czu)RI+8wk4ioKWM}hxwEo*qeaJF^w)Z4#R@o?>G8|6ke?-6dcMn#?5NE z12q&Vj*Lh1{Ynj3o|=_Mu|mqmbT<}Jd4#z$&@@~=;G$#nGRF}lD4Ku^M4*>rmc4-; zs8Lpe+rQ>dYeIc0>NTUGoLUmc9Oj`;P%1TP4;3HySY}dma2_RfBuJfP9m_)h5Il7( z_9Y8m4+aPi-&+z#}MFO2Mf&`f|4A3c0VmVIQ@>$$64-YehzK|s~MM1mREawsC?0gIuZ(K z%crBm8c`YMrlm#D8%{IvZiQ;C+T(^l>}gs1OUBQx9?3z&`!718mCz+O;2~AhGS^XWoMIeUZ8|BdYri%0&0k!eF#K8T65oHD;Xj36zoKKyam%s# zU|)Hr)oH=AG0m|;xyd&1AR~W5$6EdIhK@Df<~-kXQrpCV{Z*K1|EQoG88czP1# zt{W<@TEZ2uu|*PURp5oDteh=ocs#hi+s|f4>HVyt4Gp^qeLeJa_jh~`&_!)SpOWr2 zVnXUf>)DeDc0q0E);;OQXsqqT4U#pa8I2}ETvTUMoMR{p@g_ClEs_{HXPL5(Jz3v>Tpr!r_TUM2AU3r^oRNUh;8AE~bO^ zX1hkE6O6B!99mK7@b>5X8peJ01k=95O^a`nRu(@8v*yIcP zi;AZx?UNusd&xo6#Jl(YxLoEo<@XN#-%IJ+-93L%Ko-Y_!!|YMeISPdRQ_-3Fxu;Y zQ69r!tD>Xp7t>BaSg#{8jNlxKv+ElTcw<8QZWjH!!F$BhGe;;#=p}BW3nz-WhtK*} zfDknd6Do`F(_H}RS>oOV2gRf-V>27!{tTj?+f-*j9+9o=AA?}Z&w1+tqaDna&Zo1) zNat``3tA&FjLk#9^tox0Ia{by5-HNO5Gi_i68?sO71U0#9ZZ3av&U@?4h~J_vn%sR znYf@N)@RuP!9NGxG>D^6u^Ai+F$Z)Buzx-kX9T{pXz*~Q%a?FevHE|OqyBb2!`hfq z#yM%0of*o#kGB+?urk-IAl}t&41CfFyIxq9nOt96y#rJ(uy^A=aXkffP^RA^hNWJ$duM#vY2fZV(SNkj&Rge z+%zd+{;$We)Qazec&w4b^q$JRg7I-{vQ_?SbI0UH>U@sNvsM?wlR>%8Nhp6N>`lGL z&Obo)wDs$p0Djf~Dg~bdJ^sW~&C)xO@tCXu;t0X<5p8ITQUaD<2c1Kg)~lD(V)mZv zlhDWI9=@nLPSl2#e$|rMyFw3dD1LeHzQ@%BXE8e@nuPON&7{5VX41YNg~rbCUs98m zRR6iPv9ZSgoz5}z;g)IQZ6bHd;YgaMIz$p|`n|*KLR$4e&Xfrt|4sKQd!Qp)M-6)n zjddDPHwCo4PXDu95H`3X(?Y)KuA&P?3+<+4xQha1Ul=dq6!mP z%kN*MNav94ZS4@WZgbD5hvr`WccIjnWYYM}y&m?&tUyjY5nCeHo3hX#_0W_zo=kaH zU@rJ?Y}m)Mo;+=L-)~N0-lp3VTyPbsTlpfhaW3ETV;{myP9Nd0U1gjO$*Tx~@?Yp> z`lfx4=(^C8vu;s1UNpZfo1q6SsPdszFEOnH?AM`ThLl4-oRQEV_Rvbt(uvSU9LVVY0?|dj9RP}7y3H<%l>A0vmkM8md3T9KcpK18@ zukW@C<4+Piud=~jEAVgsr>!%MX(9^Z_}izAT2O21MyXr5S_lXQL^Mb|KqKDqMnFRl zDx#EPA>IZxXfP;73mGJu24e6kg@RBcC`1i0s8M4i#*28PUkm|7F)_{->mK_0ZQt+x zXQyv=cHWnr#o=XPIfcf7_3S5YMhZ9Gx5BSFGD`a`F4Xp;@2;ivQked6MP}6Lu;_%x ziIuMkcfQ@wZ#(cgvVBf*Z1U#VWx@T1xWLKGt>w^GGl)rYsC_L%!^wE!smu%my>-_x zi4J-bqQ+WHI#TDEVem5bQ*8NOP&EvE?UYsap;DT~n3Ep?Dko(psp}HsSE0gwYzFBk zSso0zb{frb{Rhs)Az;RZQouD5%yv4_Y8a)2ZaY;?3XPuqRE_9b)bf>RMS`-H;^*W? zUkG$zGTA$tpcCRz3`w;#m|IPbT|cQOB%zNIRyLqErCnRBuWNMHV>TshtP?K3$i~YCXsM?v#gj*;djHUGw(>O8*3rom zThaIobR}GJ6Fu7?-bwY)?xY&=$}c%?J7=nt(CL(lel@wLIy;n5-$+%Q+unc0f9ATy zt+r|f{BlrMkzJkU&%(ocbcE(vKb<%HBc8sUUy|T)H_PI7;2HM!frn126yKsL-#s_q zK{ecippM9=xSh8#Y62t<<`*EPh86YHpX+H|+I9}9_g-?UWS58XwiaGefTjT*`_Z%` z=m>7SZXADP5D)V{5AFqnjg;lisu$ibLS+XkOVMcu1+=jN(OHh&K1rx*86!sriae+{ z48uL=;`<#jdq!YWys2{Z_))9xjX*v!R9s?LPWL^uoibKVEjZ~B^nyiK(DsRuYwiqJ z5R&X8M^}~$Xa$e1Hr=e&p=t*nn4}(+RSM`w9u+#V%h7?e0y-VTmlHZv%TaxmfF9;i zp`WW9T~i~VnfRBAgJSDtoYV3B5H;GrJq0!LXEV=Epu8Ew{$enr62(hP!%)>`GG=C{ zM`WAQvy4%Oyxc5!*F-~U$Qw{K(;!aNV~St_C>jBJ=xU}RTIump1dz>#7&zmi0oqfs zJumlS;wZtSAkA|#ix^0CQJq#^PYOC8jxfOSDnczo2!4D`G)@;Y@Z3fHhhAO4T&3)k m-V%mnnls$>XheqDV&o6HE0!p*?;@rj{B6R=r=S#*1o;g`vr_y3 From 669973aafdcf239c2e657601231879659ee3249c Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Thu, 27 Apr 2017 17:27:53 -0400 Subject: [PATCH 480/933] DEPR: provide deprecations and exposure for NaTType (#16146) xref #16137 --- doc/source/whatsnew/v0.20.0.txt | 2 +- pandas/__init__.py | 1 + pandas/tslib.py | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 025ac7673622b..b6feb5cf8cedd 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -1246,7 +1246,7 @@ these are now the public subpackages. - The function :func:`~pandas.api.types.union_categoricals` is now importable from ``pandas.api.types``, formerly from ``pandas.types.concat`` (:issue:`15998`) - +- The type import ``pandas.tslib.NaTType`` is deprecated and can be replaced by using ``type(pandas.NaT)`` (:issue:`16146`) .. _whatsnew_0200.privacy.errors: diff --git a/pandas/__init__.py b/pandas/__init__.py index 43fa362b66ed5..20c7e0d9d5993 100644 --- a/pandas/__init__.py +++ b/pandas/__init__.py @@ -77,6 +77,7 @@ moved={'Timestamp': 'pandas.Timestamp', 'Timedelta': 'pandas.Timedelta', 'NaT': 'pandas.NaT', + 'NaTType': 'type(pandas.NaT)', 'OutOfBoundsDatetime': 'pandas.errors.OutOfBoundsDatetime'}) # use the closest tagged version if possible diff --git a/pandas/tslib.py b/pandas/tslib.py index f7d99538c2ea2..c960a4eaf59ad 100644 --- a/pandas/tslib.py +++ b/pandas/tslib.py @@ -4,4 +4,4 @@ warnings.warn("The pandas.tslib module is deprecated and will be " "removed in a future version.", FutureWarning, stacklevel=2) from pandas._libs.tslib import (Timestamp, Timedelta, - NaT, OutOfBoundsDatetime) + NaT, NaTType, OutOfBoundsDatetime) From 075eca1fa085359b71f26d02c2f44156068e123c Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Thu, 27 Apr 2017 17:28:35 -0400 Subject: [PATCH 481/933] DEPR: allow options for using bottleneck/numexpr (#16157) * DEPR: allow options for using bottleneck/numexpr deprecate pd.computation.expressions.set_use_numexpr() * DEPR: pandas.types.concat.union_categoricals in favor of pandas.api.type.union_categoricals closes #16140 --- doc/source/basics.rst | 11 +++- doc/source/options.rst | 6 +- doc/source/whatsnew/v0.20.0.txt | 3 +- pandas/computation/__init__.py | 0 pandas/computation/expressions.py | 11 ++++ pandas/core/computation/expressions.py | 3 +- pandas/core/config_init.py | 35 ++++++++++- pandas/core/frame.py | 5 +- pandas/core/indexes/base.py | 3 +- pandas/core/nanops.py | 28 ++++++--- pandas/io/formats/console.py | 84 ++++++++++++++++++++++++++ pandas/io/formats/format.py | 77 ----------------------- pandas/tests/api/test_api.py | 14 +++++ pandas/tests/test_nanops.py | 16 ++++- pandas/types/__init__.py | 0 pandas/types/concat.py | 11 ++++ setup.py | 2 + 17 files changed, 215 insertions(+), 94 deletions(-) create mode 100644 pandas/computation/__init__.py create mode 100644 pandas/computation/expressions.py create mode 100644 pandas/io/formats/console.py create mode 100644 pandas/types/__init__.py create mode 100644 pandas/types/concat.py diff --git a/doc/source/basics.rst b/doc/source/basics.rst index 5789f39266927..7a056203ed447 100644 --- a/doc/source/basics.rst +++ b/doc/source/basics.rst @@ -93,7 +93,7 @@ Accelerated operations ---------------------- pandas has support for accelerating certain types of binary numerical and boolean operations using -the ``numexpr`` library (starting in 0.11.0) and the ``bottleneck`` libraries. +the ``numexpr`` library and the ``bottleneck`` libraries. These libraries are especially useful when dealing with large data sets, and provide large speedups. ``numexpr`` uses smart chunking, caching, and multiple cores. ``bottleneck`` is @@ -114,6 +114,15 @@ Here is a sample (using 100 column x 100,000 row ``DataFrames``): You are highly encouraged to install both libraries. See the section :ref:`Recommended Dependencies ` for more installation info. +These are both enabled to be used by default, you can control this by setting the options: + +.. versionadded:: 0.20.0 + +.. code-block:: python + + pd.set_option('compute.use_bottleneck', False) + pd.set_option('compute.use_numexpr', False) + .. _basics.binop: Flexible binary operations diff --git a/doc/source/options.rst b/doc/source/options.rst index 1b219f640cc87..5f6bf2fbb9662 100644 --- a/doc/source/options.rst +++ b/doc/source/options.rst @@ -425,6 +425,10 @@ mode.use_inf_as_null False True means treat None, NaN, -IN INF as null (old way), False means None and NaN are null, but INF, -INF are not null (new way). +compute.use_bottleneck True Use the bottleneck library to accelerate + computation if it is installed +compute.use_numexpr True Use the numexpr library to accelerate + computation if it is installed =================================== ============ ================================== @@ -538,4 +542,4 @@ Only ``'display.max_rows'`` are serialized and published. .. ipython:: python :suppress: - pd.reset_option('display.html.table_schema') \ No newline at end of file + pd.reset_option('display.html.table_schema') diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index b6feb5cf8cedd..86a598183517c 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -521,6 +521,7 @@ Other Enhancements - The ``display.show_dimensions`` option can now also be used to specify whether the length of a ``Series`` should be shown in its repr (:issue:`7117`). - ``parallel_coordinates()`` has gained a ``sort_labels`` keyword arg that sorts class labels and the colours assigned to them (:issue:`15908`) +- Options added to allow one to turn on/off using ``bottleneck`` and ``numexpr``, see :ref:`here ` (:issue:`16157`) .. _ISO 8601 duration: https://en.wikipedia.org/wiki/ISO_8601#Durations @@ -1217,7 +1218,7 @@ If indicated, a deprecation warning will be issued if you reference theses modul "pandas.lib", "pandas._libs.lib", "X" "pandas.tslib", "pandas._libs.tslib", "X" - "pandas.computation", "pandas.core.computation", "" + "pandas.computation", "pandas.core.computation", "X" "pandas.msgpack", "pandas.io.msgpack", "" "pandas.index", "pandas._libs.index", "" "pandas.algos", "pandas._libs.algos", "" diff --git a/pandas/computation/__init__.py b/pandas/computation/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/computation/expressions.py b/pandas/computation/expressions.py new file mode 100644 index 0000000000000..f46487cfa1b79 --- /dev/null +++ b/pandas/computation/expressions.py @@ -0,0 +1,11 @@ +import warnings + + +def set_use_numexpr(v=True): + warnings.warn("pandas.computation.expressions.set_use_numexpr is " + "deprecated and will be removed in a future version.\n" + "you can toggle usage of numexpr via " + "pandas.get_option('compute.use_numexpr')", + FutureWarning, stacklevel=2) + from pandas import set_option + set_option('compute.use_numexpr', v) diff --git a/pandas/core/computation/expressions.py b/pandas/core/computation/expressions.py index 4eeefb183001e..83d02af65cc85 100644 --- a/pandas/core/computation/expressions.py +++ b/pandas/core/computation/expressions.py @@ -10,6 +10,7 @@ import numpy as np from pandas.core.common import _values_from_object from pandas.core.computation import _NUMEXPR_INSTALLED +from pandas.core.config import get_option if _NUMEXPR_INSTALLED: import numexpr as ne @@ -156,7 +157,7 @@ def _where_numexpr(cond, a, b, raise_on_error=False): # turn myself on -set_use_numexpr(True) +set_use_numexpr(get_option('compute.use_numexpr')) def _has_bool_dtype(x): diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py index f8cbdffa27bb4..70ebb170cb763 100644 --- a/pandas/core/config_init.py +++ b/pandas/core/config_init.py @@ -15,8 +15,41 @@ from pandas.core.config import (is_int, is_bool, is_text, is_instance_factory, is_one_of_factory, get_default_val, is_callable) -from pandas.io.formats.format import detect_console_encoding +from pandas.io.formats.console import detect_console_encoding +# compute + +use_bottleneck_doc = """ +: bool + Use the bottleneck library to accelerate if it is installed, + the default is True + Valid values: False,True +""" + + +def use_bottleneck_cb(key): + from pandas.core import nanops + nanops.set_use_bottleneck(cf.get_option(key)) + + +use_numexpr_doc = """ +: bool + Use the numexpr library to accelerate computation if it is installed, + the default is True + Valid values: False,True +""" + + +def use_numexpr_cb(key): + from pandas.core.computation import expressions + expressions.set_use_numexpr(cf.get_option(key)) + + +with cf.config_prefix('compute'): + cf.register_option('use_bottleneck', True, use_bottleneck_doc, + validator=is_bool, cb=use_bottleneck_cb) + cf.register_option('use_numexpr', True, use_numexpr_doc, + validator=is_bool, cb=use_numexpr_cb) # # options from the "display" namespace diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 983a6ef3e045a..06bd8f8fc51bc 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -91,6 +91,7 @@ import pandas.core.nanops as nanops import pandas.core.ops as ops import pandas.io.formats.format as fmt +import pandas.io.formats.console as console from pandas.io.formats.printing import pprint_thing import pandas.plotting._core as gfx @@ -513,7 +514,7 @@ def _repr_fits_horizontal_(self, ignore_width=False): GH3541, GH3573 """ - width, height = fmt.get_console_size() + width, height = console.get_console_size() max_columns = get_option("display.max_columns") nb_columns = len(self.columns) @@ -577,7 +578,7 @@ def __unicode__(self): max_cols = get_option("display.max_columns") show_dimensions = get_option("display.show_dimensions") if get_option("display.expand_frame_repr"): - width, _ = fmt.get_console_size() + width, _ = console.get_console_size() else: width = None self.to_string(buf=buf, max_rows=max_rows, max_cols=max_cols, diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 04458d684d795..4345c74664bf5 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -837,7 +837,8 @@ def _format_data(self): """ Return the formatted data as a unicode string """ - from pandas.io.formats.format import get_console_size, _get_adjustment + from pandas.io.formats.console import get_console_size + from pandas.io.formats.format import _get_adjustment display_width, _ = get_console_size() if display_width is None: display_width = get_option('display.width') or 80 diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index e9be43b184537..1d64f87b15761 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -1,14 +1,8 @@ import itertools import functools -import numpy as np import operator -try: - import bottleneck as bn - _USE_BOTTLENECK = True -except ImportError: # pragma: no cover - _USE_BOTTLENECK = False - +import numpy as np from pandas import compat from pandas._libs import tslib, algos, lib from pandas.core.dtypes.common import ( @@ -23,9 +17,27 @@ is_int_or_datetime_dtype, is_any_int_dtype) from pandas.core.dtypes.cast import _int64_max, maybe_upcast_putmask from pandas.core.dtypes.missing import isnull, notnull - +from pandas.core.config import get_option from pandas.core.common import _values_from_object +try: + import bottleneck as bn + _BOTTLENECK_INSTALLED = True +except ImportError: # pragma: no cover + _BOTTLENECK_INSTALLED = False + +_USE_BOTTLENECK = False + + +def set_use_bottleneck(v=True): + # set/unset to use bottleneck + global _USE_BOTTLENECK + if _BOTTLENECK_INSTALLED: + _USE_BOTTLENECK = v + + +set_use_bottleneck(get_option('compute.use_bottleneck')) + class disallow(object): diff --git a/pandas/io/formats/console.py b/pandas/io/formats/console.py new file mode 100644 index 0000000000000..0e46b0073a53d --- /dev/null +++ b/pandas/io/formats/console.py @@ -0,0 +1,84 @@ +""" +Internal module for console introspection +""" + +import sys +import locale +from pandas.util.terminal import get_terminal_size + +# ----------------------------------------------------------------------------- +# Global formatting options +_initial_defencoding = None + + +def detect_console_encoding(): + """ + Try to find the most capable encoding supported by the console. + slighly modified from the way IPython handles the same issue. + """ + global _initial_defencoding + + encoding = None + try: + encoding = sys.stdout.encoding or sys.stdin.encoding + except AttributeError: + pass + + # try again for something better + if not encoding or 'ascii' in encoding.lower(): + try: + encoding = locale.getpreferredencoding() + except Exception: + pass + + # when all else fails. this will usually be "ascii" + if not encoding or 'ascii' in encoding.lower(): + encoding = sys.getdefaultencoding() + + # GH3360, save the reported defencoding at import time + # MPL backends may change it. Make available for debugging. + if not _initial_defencoding: + _initial_defencoding = sys.getdefaultencoding() + + return encoding + + +def get_console_size(): + """Return console size as tuple = (width, height). + + Returns (None,None) in non-interactive session. + """ + from pandas import get_option + from pandas.core import common as com + + display_width = get_option('display.width') + # deprecated. + display_height = get_option('display.height', silent=True) + + # Consider + # interactive shell terminal, can detect term size + # interactive non-shell terminal (ipnb/ipqtconsole), cannot detect term + # size non-interactive script, should disregard term size + + # in addition + # width,height have default values, but setting to 'None' signals + # should use Auto-Detection, But only in interactive shell-terminal. + # Simple. yeah. + + if com.in_interactive_session(): + if com.in_ipython_frontend(): + # sane defaults for interactive non-shell terminal + # match default for width,height in config_init + from pandas.core.config import get_default_val + terminal_width = get_default_val('display.width') + terminal_height = get_default_val('display.height') + else: + # pure terminal + terminal_width, terminal_height = get_terminal_size() + else: + terminal_width, terminal_height = None, None + + # Note if the User sets width/Height to None (auto-detection) + # and we're in a script (non-inter), this will return (None,None) + # caller needs to deal. + return (display_width or terminal_width, display_height or terminal_height) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 1a9b3526a7503..43b0b5fbeee90 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -8,7 +8,6 @@ from distutils.version import LooseVersion # pylint: disable=W0141 -import sys from textwrap import dedent from pandas.core.dtypes.missing import isnull, notnull @@ -2290,82 +2289,6 @@ def _has_names(index): return index.name is not None -# ----------------------------------------------------------------------------- -# Global formatting options -_initial_defencoding = None - - -def detect_console_encoding(): - """ - Try to find the most capable encoding supported by the console. - slighly modified from the way IPython handles the same issue. - """ - import locale - global _initial_defencoding - - encoding = None - try: - encoding = sys.stdout.encoding or sys.stdin.encoding - except AttributeError: - pass - - # try again for something better - if not encoding or 'ascii' in encoding.lower(): - try: - encoding = locale.getpreferredencoding() - except Exception: - pass - - # when all else fails. this will usually be "ascii" - if not encoding or 'ascii' in encoding.lower(): - encoding = sys.getdefaultencoding() - - # GH3360, save the reported defencoding at import time - # MPL backends may change it. Make available for debugging. - if not _initial_defencoding: - _initial_defencoding = sys.getdefaultencoding() - - return encoding - - -def get_console_size(): - """Return console size as tuple = (width, height). - - Returns (None,None) in non-interactive session. - """ - display_width = get_option('display.width') - # deprecated. - display_height = get_option('display.height', silent=True) - - # Consider - # interactive shell terminal, can detect term size - # interactive non-shell terminal (ipnb/ipqtconsole), cannot detect term - # size non-interactive script, should disregard term size - - # in addition - # width,height have default values, but setting to 'None' signals - # should use Auto-Detection, But only in interactive shell-terminal. - # Simple. yeah. - - if com.in_interactive_session(): - if com.in_ipython_frontend(): - # sane defaults for interactive non-shell terminal - # match default for width,height in config_init - from pandas.core.config import get_default_val - terminal_width = get_default_val('display.width') - terminal_height = get_default_val('display.height') - else: - # pure terminal - terminal_width, terminal_height = get_terminal_size() - else: - terminal_width, terminal_height = None, None - - # Note if the User sets width/Height to None (auto-detection) - # and we're in a script (non-inter), this will return (None,None) - # caller needs to deal. - return (display_width or terminal_width, display_height or terminal_height) - - class EngFormatter(object): """ Formats float values according to engineering format. diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index 026a36fd9f4f9..4678db4a52c5a 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -217,3 +217,17 @@ class TestTSLib(tm.TestCase): def test_deprecation_access_func(self): with catch_warnings(record=True): pd.tslib.Timestamp('20160101') + + +class TestTypes(tm.TestCase): + + def test_deprecation_access_func(self): + with tm.assert_produces_warning( + FutureWarning, check_stacklevel=False): + from pandas.types.concat import union_categoricals + c1 = pd.Categorical(list('aabc')) + c2 = pd.Categorical(list('abcd')) + union_categoricals( + [c1, c2], + sort_categories=True, + ignore_order=True) diff --git a/pandas/tests/test_nanops.py b/pandas/tests/test_nanops.py index dda466a6937dd..92d7f29366c69 100644 --- a/pandas/tests/test_nanops.py +++ b/pandas/tests/test_nanops.py @@ -4,9 +4,10 @@ from functools import partial import pytest - import warnings import numpy as np + +import pandas as pd from pandas import Series, isnull, _np_version_under1p9 from pandas.core.dtypes.common import is_integer_dtype import pandas.core.nanops as nanops @@ -991,3 +992,16 @@ def test_nans_skipna(self): @property def prng(self): return np.random.RandomState(1234) + + +def test_use_bottleneck(): + + if nanops._BOTTLENECK_INSTALLED: + + pd.set_option('use_bottleneck', True) + assert pd.get_option('use_bottleneck') + + pd.set_option('use_bottleneck', False) + assert not pd.get_option('use_bottleneck') + + pd.set_option('use_bottleneck', use_bn) diff --git a/pandas/types/__init__.py b/pandas/types/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/types/concat.py b/pandas/types/concat.py new file mode 100644 index 0000000000000..477156b38d56d --- /dev/null +++ b/pandas/types/concat.py @@ -0,0 +1,11 @@ +import warnings + + +def union_categoricals(to_union, sort_categories=False, ignore_order=False): + warnings.warn("pandas.types.concat.union_categoricals is " + "deprecated and will be removed in a future version.\n" + "use pandas.api.types.union_categoricals", + FutureWarning, stacklevel=2) + from pandas.api.types import union_categoricals + return union_categoricals( + to_union, sort_categories=sort_categories, ignore_order=ignore_order) diff --git a/setup.py b/setup.py index 5647e18aa227c..6f3ddbe2ad9d0 100755 --- a/setup.py +++ b/setup.py @@ -645,6 +645,7 @@ def pxd(name): 'pandas.core.reshape', 'pandas.core.sparse', 'pandas.core.tools', + 'pandas.computation', 'pandas.errors', 'pandas.io', 'pandas.io.json', @@ -654,6 +655,7 @@ def pxd(name): 'pandas._libs', 'pandas.plotting', 'pandas.stats', + 'pandas.types', 'pandas.util', 'pandas.tests', 'pandas.tests.api', From b6f65eb98f3d29d9ed61109d1a83f8fb72954c60 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Fri, 28 Apr 2017 06:20:15 -0400 Subject: [PATCH 482/933] DEPR: deprecate is_any_int_dtype and is_floating_dtype from pandas.api.types (#16163) * DEPR: deprecate is_any_int_dtype and is_floating_dtype from pandas.api.types closes #16042 * is_ docs --- doc/source/api.rst | 58 +++++++++++++++++++++++++++++++++ doc/source/whatsnew/v0.20.0.txt | 1 + pandas/api/types/__init__.py | 1 - pandas/core/dtypes/api.py | 25 ++++++++++++-- pandas/tests/api/test_types.py | 14 ++++++-- 5 files changed, 92 insertions(+), 7 deletions(-) diff --git a/doc/source/api.rst b/doc/source/api.rst index ab14c2758ae49..7102258318b5b 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -1939,3 +1939,61 @@ Data types related functionality api.types.union_categoricals api.types.infer_dtype api.types.pandas_dtype + +Dtype introspection + +.. autosummary:: + :toctree: generated/ + + api.types.is_bool_dtype + api.types.is_categorical_dtype + api.types.is_complex_dtype + api.types.is_datetime64_any_dtype + api.types.is_datetime64_dtype + api.types.is_datetime64_ns_dtype + api.types.is_datetime64tz_dtype + api.types.is_extension_type + api.types.is_float_dtype + api.types.is_int64_dtype + api.types.is_integer_dtype + api.types.is_interval_dtype + api.types.is_numeric_dtype + api.types.is_object_dtype + api.types.is_period_dtype + api.types.is_signed_integer_dtype + api.types.is_string_dtype + api.types.is_timedelta64_dtype + api.types.is_timedelta64_ns_dtype + api.types.is_unsigned_integer_dtype + api.types.is_sparse + +Iterable introspection + + api.types.is_dict_like + api.types.is_file_like + api.types.is_list_like + api.types.is_named_tuple + api.types.is_iterator + api.types.is_sequence + +.. autosummary:: + :toctree: generated/ + +Scalar introspection + +.. autosummary:: + :toctree: generated/ + + api.types.is_bool + api.types.is_categorical + api.types.is_complex + api.types.is_datetimetz + api.types.is_float + api.types.is_hashable + api.types.is_integer + api.types.is_interval + api.types.is_number + api.types.is_period + api.types.is_re + api.types.is_re_compilable + api.types.is_scalar diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 86a598183517c..720e4a588034e 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -1520,6 +1520,7 @@ Other Deprecations * ``pd.match()``, is removed. * ``pd.groupby()``, replaced by using the ``.groupby()`` method directly on a ``Series/DataFrame`` * ``pd.get_store()``, replaced by a direct call to ``pd.HDFStore(...)`` +- ``is_any_int_dtype`` and ``is_floating_dtype`` are deprecated from ``pandas.api.types`` (:issue:`16042`) .. _whatsnew_0200.prior_deprecations: diff --git a/pandas/api/types/__init__.py b/pandas/api/types/__init__.py index 8bda0c75f8540..438e4afa3f580 100644 --- a/pandas/api/types/__init__.py +++ b/pandas/api/types/__init__.py @@ -7,4 +7,3 @@ IntervalDtype) from pandas.core.dtypes.concat import union_categoricals # noqa from pandas._libs.lib import infer_dtype # noqa -del np # noqa diff --git a/pandas/core/dtypes/api.py b/pandas/core/dtypes/api.py index 6dbd3dc6b640c..242c62125664c 100644 --- a/pandas/core/dtypes/api.py +++ b/pandas/core/dtypes/api.py @@ -1,6 +1,6 @@ # flake8: noqa -import numpy as np +import sys from .common import (pandas_dtype, is_dtype_equal, @@ -40,12 +40,10 @@ is_float, is_complex, is_number, - is_any_int_dtype, is_integer_dtype, is_int64_dtype, is_numeric_dtype, is_float_dtype, - is_floating_dtype, is_bool_dtype, is_complex_dtype, is_signed_integer_dtype, @@ -61,3 +59,24 @@ is_hashable, is_named_tuple, is_sequence) + + +# deprecated +m = sys.modules['pandas.core.dtypes.api'] + +for t in ['is_any_int_dtype', 'is_floating_dtype']: + + def outer(t=t): + + def wrapper(arr_or_dtype): + import warnings + import pandas + warnings.warn("{t} is deprecated and will be " + "removed in a future version".format(t=t), + FutureWarning, stacklevel=3) + return getattr(pandas.core.dtypes.common, t)(arr_or_dtype) + return wrapper + + setattr(m, t, outer(t)) + +del sys, m, t, outer diff --git a/pandas/tests/api/test_types.py b/pandas/tests/api/test_types.py index 6b37501045d40..b9198c42e2eff 100644 --- a/pandas/tests/api/test_types.py +++ b/pandas/tests/api/test_types.py @@ -15,13 +15,13 @@ class TestTypes(Base, tm.TestCase): - allowed = ['is_any_int_dtype', 'is_bool', 'is_bool_dtype', + allowed = ['is_bool', 'is_bool_dtype', 'is_categorical', 'is_categorical_dtype', 'is_complex', 'is_complex_dtype', 'is_datetime64_any_dtype', 'is_datetime64_dtype', 'is_datetime64_ns_dtype', 'is_datetime64tz_dtype', 'is_datetimetz', 'is_dtype_equal', 'is_extension_type', 'is_float', 'is_float_dtype', - 'is_floating_dtype', 'is_int64_dtype', 'is_integer', + 'is_int64_dtype', 'is_integer', 'is_integer_dtype', 'is_number', 'is_numeric_dtype', 'is_object_dtype', 'is_scalar', 'is_sparse', 'is_string_dtype', 'is_signed_integer_dtype', @@ -33,12 +33,13 @@ class TestTypes(Base, tm.TestCase): 'is_list_like', 'is_hashable', 'is_named_tuple', 'is_sequence', 'pandas_dtype', 'union_categoricals', 'infer_dtype'] + deprecated = ['is_any_int_dtype', 'is_floating_dtype'] dtypes = ['CategoricalDtype', 'DatetimeTZDtype', 'PeriodDtype', 'IntervalDtype'] def test_types(self): - self.check(types, self.allowed + self.dtypes) + self.check(types, self.allowed + self.dtypes + self.deprecated) def check_deprecation(self, fold, fnew): with tm.assert_produces_warning(DeprecationWarning): @@ -87,6 +88,13 @@ def test_removed_from_core_common(self): 'ensure_float']: pytest.raises(AttributeError, lambda: getattr(com, t)) + def test_deprecated_from_api_types(self): + + for t in ['is_any_int_dtype', 'is_floating_dtype']: + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + getattr(types, t)(1) + def test_moved_infer_dtype(): From a7a0574a0bcb8a4c65ed7f5006972e24f90ae3df Mon Sep 17 00:00:00 2001 From: Pietro Battiston Date: Fri, 28 Apr 2017 17:25:01 -0400 Subject: [PATCH 483/933] ENH: Support resetting index with tuple name closes #16164 Author: Pietro Battiston Closes #16165 from toobaz/reix_col_name and squashes the following commits: 9e1bdba [Pietro Battiston] REF: reorganize reinsertion code 3b0bb1f [Pietro Battiston] ENH: Handle tuples shorter than nlevels gracefully c958de7 [Pietro Battiston] TST: additional test for reset_index with tuple-named index level e12bca1 [Pietro Battiston] ENH: allow tuple index names to be interpreted as full column keys 6315d07 [Pietro Battiston] REF: Avoid duplication in reset_index() when reinsering index columns --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/core/frame.py | 68 ++++++++++++++++----------------- pandas/tests/test_multilevel.py | 44 ++++++++++++++++++--- 3 files changed, 72 insertions(+), 41 deletions(-) mode change 100755 => 100644 pandas/tests/test_multilevel.py diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 720e4a588034e..aded04e82ee7e 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -490,6 +490,7 @@ Other Enhancements - ``Series.interpolate()`` now supports timedelta as an index type with ``method='time'`` (:issue:`6424`) - Addition of a ``level`` keyword to ``DataFrame/Series.rename`` to rename labels in the specified level of a MultiIndex (:issue:`4160`). +- ``DataFrame.reset_index()`` will now interpret a tuple ``index.name`` as a key spanning across levels of ``columns``, if this is a ``MultiIndex`` (:issues:`16164`) - ``Timedelta.isoformat`` method added for formatting Timedeltas as an `ISO 8601 duration`_. See the :ref:`Timedelta docs ` (:issue:`15136`) - ``.select_dtypes()`` now allows the string ``datetimetz`` to generically select datetimes with tz (:issue:`14910`) - The ``.to_latex()`` method will now accept ``multicolumn`` and ``multirow`` arguments to use the accompanying LaTeX enhancements diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 06bd8f8fc51bc..9a62259202653 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3019,44 +3019,40 @@ def _maybe_casted_values(index, labels=None): if len(level) < len(self.index.levels): new_index = self.index.droplevel(level) - if not drop: - names = self.index.names - zipped = lzip(self.index.levels, self.index.labels) - - multi_col = isinstance(self.columns, MultiIndex) - for i, (lev, lab) in reversed(list(enumerate(zipped))): - col_name = names[i] - if col_name is None: - col_name = 'level_%d' % i - - if multi_col: - if col_fill is None: - col_name = tuple([col_name] * self.columns.nlevels) - else: - name_lst = [col_fill] * self.columns.nlevels - lev_num = self.columns._get_level_number(col_level) - name_lst[lev_num] = col_name - col_name = tuple(name_lst) - - # to ndarray and maybe infer different dtype - level_values = _maybe_casted_values(lev, lab) - if level is None or i in level: - new_obj.insert(0, col_name, level_values) - - elif not drop: - name = self.index.name - if name is None or name == 'index': - name = 'index' if 'index' not in self else 'level_0' - if isinstance(self.columns, MultiIndex): - if col_fill is None: - name = tuple([name] * self.columns.nlevels) - else: - name_lst = [col_fill] * self.columns.nlevels + if not drop: + if isinstance(self.index, MultiIndex): + names = [n if n is not None else ('level_%d' % i) + for (i, n) in enumerate(self.index.names)] + to_insert = lzip(self.index.levels, self.index.labels) + else: + default = 'index' if 'index' not in self else 'level_0' + names = ([default] if self.index.name is None + else [self.index.name]) + to_insert = ((self.index, None),) + + multi_col = isinstance(self.columns, MultiIndex) + for i, (lev, lab) in reversed(list(enumerate(to_insert))): + name = names[i] + if multi_col: + col_name = (list(name) if isinstance(name, tuple) + else [name]) + if col_fill is None: + if len(col_name) not in (1, self.columns.nlevels): + raise ValueError("col_fill=None is incompatible " + "with incomplete column name " + "{}".format(name)) + col_fill = col_name[0] + lev_num = self.columns._get_level_number(col_level) - name_lst[lev_num] = name + name_lst = [col_fill] * lev_num + col_name + missing = self.columns.nlevels - len(name_lst) + name_lst += [col_fill] * missing name = tuple(name_lst) - values = _maybe_casted_values(self.index) - new_obj.insert(0, name, values) + + # to ndarray and maybe infer different dtype + level_values = _maybe_casted_values(lev, lab) + if level is None or i in level: + new_obj.insert(0, name, level_values) new_obj.index = new_index if not inplace: diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py old mode 100755 new mode 100644 index 1a4603978ce38..e4ed194b75bcd --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -2242,16 +2242,50 @@ def test_reset_index_multiindex_columns(self): levels = [['A', ''], ['B', 'b']] df = pd.DataFrame([[0, 2], [1, 3]], columns=pd.MultiIndex.from_tuples(levels)) - expected = df.copy() - df.index.name = 'A' - result = df[['B']].reset_index() - tm.assert_frame_equal(result, expected) + result = df[['B']].rename_axis('A').reset_index() + tm.assert_frame_equal(result, df) # gh-16120: already existing column with tm.assert_raises_regex(ValueError, ("cannot insert \('A', ''\), " "already exists")): - df.reset_index() + df.rename_axis('A').reset_index() + + # gh-16164: multiindex (tuple) full key + result = df.set_index([('A', '')]).reset_index() + tm.assert_frame_equal(result, df) + + # with additional (unnamed) index level + idx_col = pd.DataFrame([[0], [1]], + columns=pd.MultiIndex.from_tuples([('level_0', + '')])) + expected = pd.concat([idx_col, df[[('B', 'b'), ('A', '')]]], axis=1) + result = df.set_index([('B', 'b')], append=True).reset_index() + tm.assert_frame_equal(result, expected) + + # with index name which is a too long tuple... + with tm.assert_raises_regex(ValueError, + ("Item must have length equal to number " + "of levels.")): + df.rename_axis([('C', 'c', 'i')]).reset_index() + + # or too short... + levels = [['A', 'a', ''], ['B', 'b', 'i']] + df2 = pd.DataFrame([[0, 2], [1, 3]], + columns=pd.MultiIndex.from_tuples(levels)) + idx_col = pd.DataFrame([[0], [1]], + columns=pd.MultiIndex.from_tuples([('C', + 'c', + 'ii')])) + expected = pd.concat([idx_col, df2], axis=1) + result = df2.rename_axis([('C', 'c')]).reset_index(col_fill='ii') + tm.assert_frame_equal(result, expected) + + # ... which is incompatible with col_fill=None + with tm.assert_raises_regex(ValueError, + ("col_fill=None is incompatible with " + "incomplete column name \('C', 'c'\)")): + df2.rename_axis([('C', 'c')]).reset_index(col_fill=None) def test_set_index_period(self): # GH 6631 From 5bc736c421ecb96e744ddf3261e50bf94ae5b6da Mon Sep 17 00:00:00 2001 From: gfyoung Date: Sat, 29 Apr 2017 16:04:08 -0400 Subject: [PATCH 484/933] MAINT: Remove self.assertEqual from testing (#16169) --- pandas/compat/__init__.py | 2 + pandas/tests/computation/test_eval.py | 99 +- pandas/tests/dtypes/test_cast.py | 45 +- pandas/tests/dtypes/test_common.py | 16 +- pandas/tests/dtypes/test_concat.py | 10 +- pandas/tests/dtypes/test_dtypes.py | 38 +- pandas/tests/dtypes/test_inference.py | 144 +-- pandas/tests/frame/test_alter_axes.py | 42 +- pandas/tests/frame/test_analytics.py | 60 +- pandas/tests/frame/test_api.py | 72 +- pandas/tests/frame/test_apply.py | 33 +- .../tests/frame/test_axis_select_reindex.py | 42 +- pandas/tests/frame/test_block_internals.py | 46 +- pandas/tests/frame/test_combine_concat.py | 38 +- pandas/tests/frame/test_constructors.py | 191 ++-- pandas/tests/frame/test_convert_to.py | 22 +- pandas/tests/frame/test_dtypes.py | 17 +- pandas/tests/frame/test_indexing.py | 152 ++-- pandas/tests/frame/test_missing.py | 2 +- pandas/tests/frame/test_mutate_columns.py | 12 +- pandas/tests/frame/test_nonunique_indexes.py | 4 +- pandas/tests/frame/test_operators.py | 16 +- pandas/tests/frame/test_period.py | 8 +- pandas/tests/frame/test_quantile.py | 40 +- pandas/tests/frame/test_query_eval.py | 2 +- pandas/tests/frame/test_replace.py | 2 +- pandas/tests/frame/test_repr_info.py | 29 +- pandas/tests/frame/test_reshape.py | 45 +- pandas/tests/frame/test_subclass.py | 22 +- pandas/tests/frame/test_timeseries.py | 18 +- pandas/tests/frame/test_to_csv.py | 44 +- pandas/tests/groupby/test_aggregate.py | 6 +- pandas/tests/groupby/test_categorical.py | 2 +- pandas/tests/groupby/test_groupby.py | 153 ++-- pandas/tests/groupby/test_nth.py | 10 +- pandas/tests/groupby/test_timegrouper.py | 6 +- pandas/tests/groupby/test_transform.py | 4 +- pandas/tests/indexes/common.py | 34 +- pandas/tests/indexes/datetimes/test_astype.py | 32 +- .../indexes/datetimes/test_construction.py | 32 +- .../indexes/datetimes/test_date_range.py | 66 +- .../tests/indexes/datetimes/test_datetime.py | 102 +-- .../tests/indexes/datetimes/test_indexing.py | 36 +- pandas/tests/indexes/datetimes/test_misc.py | 157 ++-- pandas/tests/indexes/datetimes/test_ops.py | 120 ++- .../indexes/datetimes/test_partial_slicing.py | 24 +- pandas/tests/indexes/datetimes/test_setops.py | 22 +- pandas/tests/indexes/datetimes/test_tools.py | 143 ++- pandas/tests/indexes/period/test_asfreq.py | 116 +-- .../tests/indexes/period/test_construction.py | 56 +- pandas/tests/indexes/period/test_indexing.py | 76 +- pandas/tests/indexes/period/test_ops.py | 88 +- .../indexes/period/test_partial_slicing.py | 2 +- pandas/tests/indexes/period/test_period.py | 150 ++- pandas/tests/indexes/period/test_setops.py | 14 +- pandas/tests/indexes/period/test_tools.py | 184 ++-- pandas/tests/indexes/test_base.py | 274 +++--- pandas/tests/indexes/test_category.py | 68 +- pandas/tests/indexes/test_interval.py | 96 +- pandas/tests/indexes/test_multi.py | 223 +++-- pandas/tests/indexes/test_numeric.py | 40 +- pandas/tests/indexes/test_range.py | 94 +- .../indexes/timedeltas/test_construction.py | 4 +- .../tests/indexes/timedeltas/test_indexing.py | 12 +- pandas/tests/indexes/timedeltas/test_ops.py | 170 ++-- .../timedeltas/test_partial_slicing.py | 4 +- .../tests/indexes/timedeltas/test_setops.py | 4 +- .../indexes/timedeltas/test_timedelta.py | 51 +- pandas/tests/indexes/timedeltas/test_tools.py | 36 +- pandas/tests/indexing/common.py | 2 +- pandas/tests/indexing/test_callable.py | 4 +- .../indexing/test_chaining_and_caching.py | 8 +- pandas/tests/indexing/test_coercion.py | 90 +- pandas/tests/indexing/test_datetime.py | 8 +- pandas/tests/indexing/test_floats.py | 37 +- pandas/tests/indexing/test_iloc.py | 14 +- pandas/tests/indexing/test_indexing.py | 16 +- pandas/tests/indexing/test_ix.py | 18 +- pandas/tests/indexing/test_loc.py | 14 +- pandas/tests/indexing/test_multiindex.py | 29 +- pandas/tests/indexing/test_panel.py | 18 +- pandas/tests/indexing/test_partial.py | 2 +- pandas/tests/indexing/test_scalar.py | 20 +- .../tests/io/formats/test_eng_formatting.py | 16 +- pandas/tests/io/formats/test_format.py | 494 +++++----- pandas/tests/io/formats/test_printing.py | 70 +- pandas/tests/io/formats/test_style.py | 150 +-- pandas/tests/io/formats/test_to_csv.py | 82 +- pandas/tests/io/formats/test_to_html.py | 58 +- .../tests/io/json/test_json_table_schema.py | 88 +- pandas/tests/io/json/test_normalize.py | 8 +- pandas/tests/io/json/test_pandas.py | 85 +- pandas/tests/io/json/test_ujson.py | 320 +++---- pandas/tests/io/parser/c_parser_only.py | 8 +- pandas/tests/io/parser/common.py | 52 +- pandas/tests/io/parser/converters.py | 12 +- pandas/tests/io/parser/dtypes.py | 4 +- pandas/tests/io/parser/header.py | 2 +- pandas/tests/io/parser/index_col.py | 4 +- pandas/tests/io/parser/na_values.py | 4 +- pandas/tests/io/parser/parse_dates.py | 24 +- pandas/tests/io/parser/python_parser_only.py | 2 +- pandas/tests/io/parser/test_network.py | 4 +- pandas/tests/io/parser/test_textreader.py | 34 +- pandas/tests/io/parser/usecols.py | 2 +- pandas/tests/io/test_clipboard.py | 2 +- pandas/tests/io/test_common.py | 18 +- pandas/tests/io/test_excel.py | 131 ++- pandas/tests/io/test_gbq.py | 2 +- pandas/tests/io/test_html.py | 20 +- pandas/tests/io/test_packers.py | 30 +- pandas/tests/io/test_pytables.py | 122 +-- pandas/tests/io/test_sql.py | 160 ++-- pandas/tests/io/test_stata.py | 22 +- pandas/tests/plotting/common.py | 38 +- pandas/tests/plotting/test_boxplot_method.py | 16 +- pandas/tests/plotting/test_converter.py | 50 +- pandas/tests/plotting/test_datetimelike.py | 242 +++-- pandas/tests/plotting/test_frame.py | 234 +++-- pandas/tests/plotting/test_groupby.py | 4 +- pandas/tests/plotting/test_hist_method.py | 10 +- pandas/tests/plotting/test_misc.py | 4 +- pandas/tests/plotting/test_series.py | 80 +- pandas/tests/reshape/test_concat.py | 81 +- pandas/tests/reshape/test_hashing.py | 2 +- pandas/tests/reshape/test_join.py | 24 +- pandas/tests/reshape/test_merge.py | 20 +- pandas/tests/reshape/test_pivot.py | 50 +- pandas/tests/reshape/test_reshape.py | 64 +- pandas/tests/reshape/test_tile.py | 24 +- pandas/tests/scalar/test_interval.py | 44 +- pandas/tests/scalar/test_period.py | 708 +++++++-------- pandas/tests/scalar/test_period_asfreq.py | 519 ++++++----- pandas/tests/scalar/test_timedelta.py | 414 ++++----- pandas/tests/scalar/test_timestamp.py | 469 +++++----- pandas/tests/series/test_alter_axes.py | 18 +- pandas/tests/series/test_analytics.py | 126 +-- pandas/tests/series/test_api.py | 81 +- pandas/tests/series/test_apply.py | 34 +- pandas/tests/series/test_asof.py | 18 +- pandas/tests/series/test_combine_concat.py | 77 +- pandas/tests/series/test_constructors.py | 126 +-- pandas/tests/series/test_datetime_values.py | 21 +- pandas/tests/series/test_indexing.py | 194 ++-- pandas/tests/series/test_internals.py | 4 +- pandas/tests/series/test_io.py | 6 +- pandas/tests/series/test_missing.py | 24 +- pandas/tests/series/test_operators.py | 28 +- pandas/tests/series/test_period.py | 12 +- pandas/tests/series/test_quantile.py | 39 +- pandas/tests/series/test_repr.py | 6 +- pandas/tests/series/test_subclass.py | 12 +- pandas/tests/series/test_timeseries.py | 100 +- pandas/tests/sparse/test_arithmetics.py | 34 +- pandas/tests/sparse/test_array.py | 122 +-- pandas/tests/sparse/test_format.py | 20 +- pandas/tests/sparse/test_frame.py | 92 +- pandas/tests/sparse/test_indexing.py | 125 +-- pandas/tests/sparse/test_libsparse.py | 68 +- pandas/tests/sparse/test_list.py | 14 +- pandas/tests/sparse/test_series.py | 130 +-- pandas/tests/test_algos.py | 30 +- pandas/tests/test_base.py | 80 +- pandas/tests/test_categorical.py | 262 +++--- pandas/tests/test_config.py | 118 +-- pandas/tests/test_multilevel.py | 89 +- pandas/tests/test_nanops.py | 19 +- pandas/tests/test_panel.py | 83 +- pandas/tests/test_panel4d.py | 36 +- pandas/tests/test_resample.py | 142 ++- pandas/tests/test_strings.py | 121 ++- pandas/tests/test_take.py | 4 +- pandas/tests/test_testing.py | 4 +- pandas/tests/test_util.py | 20 +- pandas/tests/test_window.py | 22 +- pandas/tests/tools/test_numeric.py | 10 +- pandas/tests/tseries/test_frequencies.py | 243 +++-- pandas/tests/tseries/test_holiday.py | 123 ++- pandas/tests/tseries/test_offsets.py | 855 +++++++++--------- pandas/tests/tseries/test_timezones.py | 208 ++--- 180 files changed, 6578 insertions(+), 6853 deletions(-) diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py index 7ebdd9735b967..2fe6359fd1ea6 100644 --- a/pandas/compat/__init__.py +++ b/pandas/compat/__init__.py @@ -104,6 +104,7 @@ def signature(f): map = map zip = zip filter = filter + intern = sys.intern reduce = functools.reduce long = int unichr = chr @@ -146,6 +147,7 @@ def signature(f): # import iterator versions of these functions range = xrange + intern = intern zip = itertools.izip filter = itertools.ifilter map = itertools.imap diff --git a/pandas/tests/computation/test_eval.py b/pandas/tests/computation/test_eval.py index 827a4668ed0bc..f8f84985142a8 100644 --- a/pandas/tests/computation/test_eval.py +++ b/pandas/tests/computation/test_eval.py @@ -8,7 +8,7 @@ from numpy.random import randn, rand, randint import numpy as np -from pandas.core.dtypes.common import is_list_like, is_scalar +from pandas.core.dtypes.common import is_bool, is_list_like, is_scalar import pandas as pd from pandas.core import common as com from pandas.errors import PerformanceWarning @@ -209,7 +209,7 @@ def check_equal(self, result, expected): elif isinstance(result, np.ndarray): tm.assert_numpy_array_equal(result, expected) else: - self.assertEqual(result, expected) + assert result == expected def check_complex_cmp_op(self, lhs, cmp1, rhs, binop, cmp2): skip_these = _scalar_skip @@ -610,30 +610,28 @@ def test_scalar_unary(self): with pytest.raises(TypeError): pd.eval('~1.0', engine=self.engine, parser=self.parser) - self.assertEqual( - pd.eval('-1.0', parser=self.parser, engine=self.engine), -1.0) - self.assertEqual( - pd.eval('+1.0', parser=self.parser, engine=self.engine), +1.0) - - self.assertEqual( - pd.eval('~1', parser=self.parser, engine=self.engine), ~1) - self.assertEqual( - pd.eval('-1', parser=self.parser, engine=self.engine), -1) - self.assertEqual( - pd.eval('+1', parser=self.parser, engine=self.engine), +1) - - self.assertEqual( - pd.eval('~True', parser=self.parser, engine=self.engine), ~True) - self.assertEqual( - pd.eval('~False', parser=self.parser, engine=self.engine), ~False) - self.assertEqual( - pd.eval('-True', parser=self.parser, engine=self.engine), -True) - self.assertEqual( - pd.eval('-False', parser=self.parser, engine=self.engine), -False) - self.assertEqual( - pd.eval('+True', parser=self.parser, engine=self.engine), +True) - self.assertEqual( - pd.eval('+False', parser=self.parser, engine=self.engine), +False) + assert pd.eval('-1.0', parser=self.parser, + engine=self.engine) == -1.0 + assert pd.eval('+1.0', parser=self.parser, + engine=self.engine) == +1.0 + assert pd.eval('~1', parser=self.parser, + engine=self.engine) == ~1 + assert pd.eval('-1', parser=self.parser, + engine=self.engine) == -1 + assert pd.eval('+1', parser=self.parser, + engine=self.engine) == +1 + assert pd.eval('~True', parser=self.parser, + engine=self.engine) == ~True + assert pd.eval('~False', parser=self.parser, + engine=self.engine) == ~False + assert pd.eval('-True', parser=self.parser, + engine=self.engine) == -True + assert pd.eval('-False', parser=self.parser, + engine=self.engine) == -False + assert pd.eval('+True', parser=self.parser, + engine=self.engine) == +True + assert pd.eval('+False', parser=self.parser, + engine=self.engine) == +False def test_unary_in_array(self): # GH 11235 @@ -658,50 +656,51 @@ def test_disallow_scalar_bool_ops(self): pd.eval(ex, engine=self.engine, parser=self.parser) def test_identical(self): - # GH 10546 + # see gh-10546 x = 1 result = pd.eval('x', engine=self.engine, parser=self.parser) - self.assertEqual(result, 1) + assert result == 1 assert is_scalar(result) x = 1.5 result = pd.eval('x', engine=self.engine, parser=self.parser) - self.assertEqual(result, 1.5) + assert result == 1.5 assert is_scalar(result) x = False result = pd.eval('x', engine=self.engine, parser=self.parser) - self.assertEqual(result, False) + assert not result + assert is_bool(result) assert is_scalar(result) x = np.array([1]) result = pd.eval('x', engine=self.engine, parser=self.parser) tm.assert_numpy_array_equal(result, np.array([1])) - self.assertEqual(result.shape, (1, )) + assert result.shape == (1, ) x = np.array([1.5]) result = pd.eval('x', engine=self.engine, parser=self.parser) tm.assert_numpy_array_equal(result, np.array([1.5])) - self.assertEqual(result.shape, (1, )) + assert result.shape == (1, ) x = np.array([False]) # noqa result = pd.eval('x', engine=self.engine, parser=self.parser) tm.assert_numpy_array_equal(result, np.array([False])) - self.assertEqual(result.shape, (1, )) + assert result.shape == (1, ) def test_line_continuation(self): # GH 11149 exp = """1 + 2 * \ 5 - 1 + 2 """ result = pd.eval(exp, engine=self.engine, parser=self.parser) - self.assertEqual(result, 12) + assert result == 12 def test_float_truncation(self): # GH 14241 exp = '1000000000.006' result = pd.eval(exp, engine=self.engine, parser=self.parser) expected = np.float64(exp) - self.assertEqual(result, expected) + assert result == expected df = pd.DataFrame({'A': [1000000000.0009, 1000000000.0011, @@ -1121,7 +1120,7 @@ def test_simple_bool_ops(self): ex = '{0} {1} {2}'.format(lhs, op, rhs) res = self.eval(ex) exp = eval(ex) - self.assertEqual(res, exp) + assert res == exp def test_bool_ops_with_constants(self): for op, lhs, rhs in product(expr._bool_ops_syms, ('True', 'False'), @@ -1129,7 +1128,7 @@ def test_bool_ops_with_constants(self): ex = '{0} {1} {2}'.format(lhs, op, rhs) res = self.eval(ex) exp = eval(ex) - self.assertEqual(res, exp) + assert res == exp def test_panel_fails(self): with catch_warnings(record=True): @@ -1169,19 +1168,19 @@ def test_truediv(self): res = self.eval('1 / 2', truediv=True) expec = 0.5 - self.assertEqual(res, expec) + assert res == expec res = self.eval('1 / 2', truediv=False) expec = 0.5 - self.assertEqual(res, expec) + assert res == expec res = self.eval('s / 2', truediv=False) expec = 0.5 - self.assertEqual(res, expec) + assert res == expec res = self.eval('s / 2', truediv=True) expec = 0.5 - self.assertEqual(res, expec) + assert res == expec else: res = self.eval(ex, truediv=False) tm.assert_numpy_array_equal(res, np.array([1])) @@ -1191,19 +1190,19 @@ def test_truediv(self): res = self.eval('1 / 2', truediv=True) expec = 0.5 - self.assertEqual(res, expec) + assert res == expec res = self.eval('1 / 2', truediv=False) expec = 0 - self.assertEqual(res, expec) + assert res == expec res = self.eval('s / 2', truediv=False) expec = 0 - self.assertEqual(res, expec) + assert res == expec res = self.eval('s / 2', truediv=True) expec = 0.5 - self.assertEqual(res, expec) + assert res == expec def test_failing_subscript_with_name_error(self): df = DataFrame(np.random.randn(5, 3)) # noqa @@ -1549,7 +1548,7 @@ def test_bool_ops_with_constants(self): else: res = self.eval(ex) exp = eval(ex) - self.assertEqual(res, exp) + assert res == exp def test_simple_bool_ops(self): for op, lhs, rhs in product(expr._bool_ops_syms, (True, False), @@ -1561,7 +1560,7 @@ def test_simple_bool_ops(self): else: res = pd.eval(ex, engine=self.engine, parser=self.parser) exp = eval(ex) - self.assertEqual(res, exp) + assert res == exp class TestOperationsPythonPython(TestOperationsNumExprPython): @@ -1650,14 +1649,14 @@ def test_df_arithmetic_subexpression(self): def check_result_type(self, dtype, expect_dtype): df = DataFrame({'a': np.random.randn(10).astype(dtype)}) - self.assertEqual(df.a.dtype, dtype) + assert df.a.dtype == dtype df.eval("b = sin(a)", engine=self.engine, parser=self.parser, inplace=True) got = df.b expect = np.sin(df.a) - self.assertEqual(expect.dtype, got.dtype) - self.assertEqual(expect_dtype, got.dtype) + assert expect.dtype == got.dtype + assert expect_dtype == got.dtype tm.assert_series_equal(got, expect, check_names=False) def test_result_types(self): diff --git a/pandas/tests/dtypes/test_cast.py b/pandas/tests/dtypes/test_cast.py index 22640729c262f..cbf049b95b6ef 100644 --- a/pandas/tests/dtypes/test_cast.py +++ b/pandas/tests/dtypes/test_cast.py @@ -164,7 +164,7 @@ def test_maybe_convert_string_to_array(self): assert result.dtype == object result = maybe_convert_string_to_object(1) - self.assertEqual(result, 1) + assert result == 1 arr = np.array(['x', 'y'], dtype=str) result = maybe_convert_string_to_object(arr) @@ -187,31 +187,31 @@ def test_maybe_convert_scalar(self): # pass thru result = maybe_convert_scalar('x') - self.assertEqual(result, 'x') + assert result == 'x' result = maybe_convert_scalar(np.array([1])) - self.assertEqual(result, np.array([1])) + assert result == np.array([1]) # leave scalar dtype result = maybe_convert_scalar(np.int64(1)) - self.assertEqual(result, np.int64(1)) + assert result == np.int64(1) result = maybe_convert_scalar(np.int32(1)) - self.assertEqual(result, np.int32(1)) + assert result == np.int32(1) result = maybe_convert_scalar(np.float32(1)) - self.assertEqual(result, np.float32(1)) + assert result == np.float32(1) result = maybe_convert_scalar(np.int64(1)) - self.assertEqual(result, np.float64(1)) + assert result == np.float64(1) # coerce result = maybe_convert_scalar(1) - self.assertEqual(result, np.int64(1)) + assert result == np.int64(1) result = maybe_convert_scalar(1.0) - self.assertEqual(result, np.float64(1)) + assert result == np.float64(1) result = maybe_convert_scalar(Timestamp('20130101')) - self.assertEqual(result, Timestamp('20130101').value) + assert result == Timestamp('20130101').value result = maybe_convert_scalar(datetime(2013, 1, 1)) - self.assertEqual(result, Timestamp('20130101').value) + assert result == Timestamp('20130101').value result = maybe_convert_scalar(Timedelta('1 day 1 min')) - self.assertEqual(result, Timedelta('1 day 1 min').value) + assert result == Timedelta('1 day 1 min').value class TestConvert(tm.TestCase): @@ -291,7 +291,7 @@ def test_numpy_dtypes(self): ((np.dtype('datetime64[ns]'), np.int64), np.object) ) for src, common in testcases: - self.assertEqual(find_common_type(src), common) + assert find_common_type(src) == common with pytest.raises(ValueError): # empty @@ -299,26 +299,25 @@ def test_numpy_dtypes(self): def test_categorical_dtype(self): dtype = CategoricalDtype() - self.assertEqual(find_common_type([dtype]), 'category') - self.assertEqual(find_common_type([dtype, dtype]), 'category') - self.assertEqual(find_common_type([np.object, dtype]), np.object) + assert find_common_type([dtype]) == 'category' + assert find_common_type([dtype, dtype]) == 'category' + assert find_common_type([np.object, dtype]) == np.object def test_datetimetz_dtype(self): dtype = DatetimeTZDtype(unit='ns', tz='US/Eastern') - self.assertEqual(find_common_type([dtype, dtype]), - 'datetime64[ns, US/Eastern]') + assert find_common_type([dtype, dtype]) == 'datetime64[ns, US/Eastern]' for dtype2 in [DatetimeTZDtype(unit='ns', tz='Asia/Tokyo'), np.dtype('datetime64[ns]'), np.object, np.int64]: - self.assertEqual(find_common_type([dtype, dtype2]), np.object) - self.assertEqual(find_common_type([dtype2, dtype]), np.object) + assert find_common_type([dtype, dtype2]) == np.object + assert find_common_type([dtype2, dtype]) == np.object def test_period_dtype(self): dtype = PeriodDtype(freq='D') - self.assertEqual(find_common_type([dtype, dtype]), 'period[D]') + assert find_common_type([dtype, dtype]) == 'period[D]' for dtype2 in [DatetimeTZDtype(unit='ns', tz='Asia/Tokyo'), PeriodDtype(freq='2D'), PeriodDtype(freq='H'), np.dtype('datetime64[ns]'), np.object, np.int64]: - self.assertEqual(find_common_type([dtype, dtype2]), np.object) - self.assertEqual(find_common_type([dtype2, dtype]), np.object) + assert find_common_type([dtype, dtype2]) == np.object + assert find_common_type([dtype2, dtype]) == np.object diff --git a/pandas/tests/dtypes/test_common.py b/pandas/tests/dtypes/test_common.py index 2aad1b6baaac0..0472f0599cd9b 100644 --- a/pandas/tests/dtypes/test_common.py +++ b/pandas/tests/dtypes/test_common.py @@ -30,30 +30,30 @@ def test_invalid_dtype_error(self): def test_numpy_dtype(self): for dtype in ['M8[ns]', 'm8[ns]', 'object', 'float64', 'int64']: - self.assertEqual(pandas_dtype(dtype), np.dtype(dtype)) + assert pandas_dtype(dtype) == np.dtype(dtype) def test_numpy_string_dtype(self): # do not parse freq-like string as period dtype - self.assertEqual(pandas_dtype('U'), np.dtype('U')) - self.assertEqual(pandas_dtype('S'), np.dtype('S')) + assert pandas_dtype('U') == np.dtype('U') + assert pandas_dtype('S') == np.dtype('S') def test_datetimetz_dtype(self): for dtype in ['datetime64[ns, US/Eastern]', 'datetime64[ns, Asia/Tokyo]', 'datetime64[ns, UTC]']: assert pandas_dtype(dtype) is DatetimeTZDtype(dtype) - self.assertEqual(pandas_dtype(dtype), DatetimeTZDtype(dtype)) - self.assertEqual(pandas_dtype(dtype), dtype) + assert pandas_dtype(dtype) == DatetimeTZDtype(dtype) + assert pandas_dtype(dtype) == dtype def test_categorical_dtype(self): - self.assertEqual(pandas_dtype('category'), CategoricalDtype()) + assert pandas_dtype('category') == CategoricalDtype() def test_period_dtype(self): for dtype in ['period[D]', 'period[3M]', 'period[U]', 'Period[D]', 'Period[3M]', 'Period[U]']: assert pandas_dtype(dtype) is PeriodDtype(dtype) - self.assertEqual(pandas_dtype(dtype), PeriodDtype(dtype)) - self.assertEqual(pandas_dtype(dtype), dtype) + assert pandas_dtype(dtype) == PeriodDtype(dtype) + assert pandas_dtype(dtype) == dtype dtypes = dict(datetime_tz=pandas_dtype('datetime64[ns, US/Eastern]'), diff --git a/pandas/tests/dtypes/test_concat.py b/pandas/tests/dtypes/test_concat.py index e8eb042d78f30..c0be0dc38d27f 100644 --- a/pandas/tests/dtypes/test_concat.py +++ b/pandas/tests/dtypes/test_concat.py @@ -11,7 +11,7 @@ def check_concat(self, to_concat, exp): for klass in [pd.Index, pd.Series]: to_concat_klass = [klass(c) for c in to_concat] res = _concat.get_dtype_kinds(to_concat_klass) - self.assertEqual(res, set(exp)) + assert res == set(exp) def test_get_dtype_kinds(self): to_concat = [['a'], [1, 2]] @@ -60,19 +60,19 @@ def test_get_dtype_kinds_period(self): to_concat = [pd.PeriodIndex(['2011-01'], freq='M'), pd.PeriodIndex(['2011-01'], freq='M')] res = _concat.get_dtype_kinds(to_concat) - self.assertEqual(res, set(['period[M]'])) + assert res == set(['period[M]']) to_concat = [pd.Series([pd.Period('2011-01', freq='M')]), pd.Series([pd.Period('2011-02', freq='M')])] res = _concat.get_dtype_kinds(to_concat) - self.assertEqual(res, set(['object'])) + assert res == set(['object']) to_concat = [pd.PeriodIndex(['2011-01'], freq='M'), pd.PeriodIndex(['2011-01'], freq='D')] res = _concat.get_dtype_kinds(to_concat) - self.assertEqual(res, set(['period[M]', 'period[D]'])) + assert res == set(['period[M]', 'period[D]']) to_concat = [pd.Series([pd.Period('2011-01', freq='M')]), pd.Series([pd.Period('2011-02', freq='D')])] res = _concat.get_dtype_kinds(to_concat) - self.assertEqual(res, set(['object'])) + assert res == set(['object']) diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py index b02c846d50c89..da3120145fe38 100644 --- a/pandas/tests/dtypes/test_dtypes.py +++ b/pandas/tests/dtypes/test_dtypes.py @@ -124,10 +124,10 @@ def test_subclass(self): assert issubclass(type(a), type(b)) def test_coerce_to_dtype(self): - self.assertEqual(_coerce_to_dtype('datetime64[ns, US/Eastern]'), - DatetimeTZDtype('ns', 'US/Eastern')) - self.assertEqual(_coerce_to_dtype('datetime64[ns, Asia/Tokyo]'), - DatetimeTZDtype('ns', 'Asia/Tokyo')) + assert (_coerce_to_dtype('datetime64[ns, US/Eastern]') == + DatetimeTZDtype('ns', 'US/Eastern')) + assert (_coerce_to_dtype('datetime64[ns, Asia/Tokyo]') == + DatetimeTZDtype('ns', 'Asia/Tokyo')) def test_compat(self): assert is_datetime64tz_dtype(self.dtype) @@ -194,16 +194,14 @@ def test_dst(self): dr2 = date_range('2013-08-01', periods=3, tz='US/Eastern') s2 = Series(dr2, name='A') assert is_datetimetz(s2) - self.assertEqual(s1.dtype, s2.dtype) + assert s1.dtype == s2.dtype def test_parser(self): # pr #11245 for tz, constructor in product(('UTC', 'US/Eastern'), ('M8', 'datetime64')): - self.assertEqual( - DatetimeTZDtype('%s[ns, %s]' % (constructor, tz)), - DatetimeTZDtype('ns', tz), - ) + assert (DatetimeTZDtype('%s[ns, %s]' % (constructor, tz)) == + DatetimeTZDtype('ns', tz)) def test_empty(self): dt = DatetimeTZDtype() @@ -222,18 +220,18 @@ def test_construction(self): for s in ['period[D]', 'Period[D]', 'D']: dt = PeriodDtype(s) - self.assertEqual(dt.freq, pd.tseries.offsets.Day()) + assert dt.freq == pd.tseries.offsets.Day() assert is_period_dtype(dt) for s in ['period[3D]', 'Period[3D]', '3D']: dt = PeriodDtype(s) - self.assertEqual(dt.freq, pd.tseries.offsets.Day(3)) + assert dt.freq == pd.tseries.offsets.Day(3) assert is_period_dtype(dt) for s in ['period[26H]', 'Period[26H]', '26H', 'period[1D2H]', 'Period[1D2H]', '1D2H']: dt = PeriodDtype(s) - self.assertEqual(dt.freq, pd.tseries.offsets.Hour(26)) + assert dt.freq == pd.tseries.offsets.Hour(26) assert is_period_dtype(dt) def test_subclass(self): @@ -254,10 +252,8 @@ def test_identity(self): assert PeriodDtype('period[1S1U]') is PeriodDtype('period[1000001U]') def test_coerce_to_dtype(self): - self.assertEqual(_coerce_to_dtype('period[D]'), - PeriodDtype('period[D]')) - self.assertEqual(_coerce_to_dtype('period[3M]'), - PeriodDtype('period[3M]')) + assert _coerce_to_dtype('period[D]') == PeriodDtype('period[D]') + assert _coerce_to_dtype('period[3M]') == PeriodDtype('period[3M]') def test_compat(self): assert not is_datetime64_ns_dtype(self.dtype) @@ -354,7 +350,7 @@ def test_construction(self): for s in ['interval[int64]', 'Interval[int64]', 'int64']: i = IntervalDtype(s) - self.assertEqual(i.subtype, np.dtype('int64')) + assert i.subtype == np.dtype('int64') assert is_interval_dtype(i) def test_construction_generic(self): @@ -393,12 +389,12 @@ def test_is_dtype(self): assert not IntervalDtype.is_dtype(np.float64) def test_identity(self): - self.assertEqual(IntervalDtype('interval[int64]'), - IntervalDtype('interval[int64]')) + assert (IntervalDtype('interval[int64]') == + IntervalDtype('interval[int64]')) def test_coerce_to_dtype(self): - self.assertEqual(_coerce_to_dtype('interval[int64]'), - IntervalDtype('interval[int64]')) + assert (_coerce_to_dtype('interval[int64]') == + IntervalDtype('interval[int64]')) def test_construction_from_string(self): result = IntervalDtype('interval[int64]') diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index 3449d6c56167e..ec02a5a200308 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -233,11 +233,11 @@ def test_infer_dtype_bytes(self): # string array of bytes arr = np.array(list('abc'), dtype='S1') - self.assertEqual(lib.infer_dtype(arr), compare) + assert lib.infer_dtype(arr) == compare # object array of bytes arr = arr.astype(object) - self.assertEqual(lib.infer_dtype(arr), compare) + assert lib.infer_dtype(arr) == compare def test_isinf_scalar(self): # GH 11352 @@ -409,58 +409,58 @@ class TestTypeInference(tm.TestCase): def test_length_zero(self): result = lib.infer_dtype(np.array([], dtype='i4')) - self.assertEqual(result, 'integer') + assert result == 'integer' result = lib.infer_dtype([]) - self.assertEqual(result, 'empty') + assert result == 'empty' def test_integers(self): arr = np.array([1, 2, 3, np.int64(4), np.int32(5)], dtype='O') result = lib.infer_dtype(arr) - self.assertEqual(result, 'integer') + assert result == 'integer' arr = np.array([1, 2, 3, np.int64(4), np.int32(5), 'foo'], dtype='O') result = lib.infer_dtype(arr) - self.assertEqual(result, 'mixed-integer') + assert result == 'mixed-integer' arr = np.array([1, 2, 3, 4, 5], dtype='i4') result = lib.infer_dtype(arr) - self.assertEqual(result, 'integer') + assert result == 'integer' def test_bools(self): arr = np.array([True, False, True, True, True], dtype='O') result = lib.infer_dtype(arr) - self.assertEqual(result, 'boolean') + assert result == 'boolean' arr = np.array([np.bool_(True), np.bool_(False)], dtype='O') result = lib.infer_dtype(arr) - self.assertEqual(result, 'boolean') + assert result == 'boolean' arr = np.array([True, False, True, 'foo'], dtype='O') result = lib.infer_dtype(arr) - self.assertEqual(result, 'mixed') + assert result == 'mixed' arr = np.array([True, False, True], dtype=bool) result = lib.infer_dtype(arr) - self.assertEqual(result, 'boolean') + assert result == 'boolean' def test_floats(self): arr = np.array([1., 2., 3., np.float64(4), np.float32(5)], dtype='O') result = lib.infer_dtype(arr) - self.assertEqual(result, 'floating') + assert result == 'floating' arr = np.array([1, 2, 3, np.float64(4), np.float32(5), 'foo'], dtype='O') result = lib.infer_dtype(arr) - self.assertEqual(result, 'mixed-integer') + assert result == 'mixed-integer' arr = np.array([1, 2, 3, 4, 5], dtype='f4') result = lib.infer_dtype(arr) - self.assertEqual(result, 'floating') + assert result == 'floating' arr = np.array([1, 2, 3, 4, 5], dtype='f8') result = lib.infer_dtype(arr) - self.assertEqual(result, 'floating') + assert result == 'floating' def test_string(self): pass @@ -472,198 +472,198 @@ def test_datetime(self): dates = [datetime(2012, 1, x) for x in range(1, 20)] index = Index(dates) - self.assertEqual(index.inferred_type, 'datetime64') + assert index.inferred_type == 'datetime64' def test_infer_dtype_datetime(self): arr = np.array([Timestamp('2011-01-01'), Timestamp('2011-01-02')]) - self.assertEqual(lib.infer_dtype(arr), 'datetime') + assert lib.infer_dtype(arr) == 'datetime' arr = np.array([np.datetime64('2011-01-01'), np.datetime64('2011-01-01')], dtype=object) - self.assertEqual(lib.infer_dtype(arr), 'datetime64') + assert lib.infer_dtype(arr) == 'datetime64' arr = np.array([datetime(2011, 1, 1), datetime(2012, 2, 1)]) - self.assertEqual(lib.infer_dtype(arr), 'datetime') + assert lib.infer_dtype(arr) == 'datetime' # starts with nan for n in [pd.NaT, np.nan]: arr = np.array([n, pd.Timestamp('2011-01-02')]) - self.assertEqual(lib.infer_dtype(arr), 'datetime') + assert lib.infer_dtype(arr) == 'datetime' arr = np.array([n, np.datetime64('2011-01-02')]) - self.assertEqual(lib.infer_dtype(arr), 'datetime64') + assert lib.infer_dtype(arr) == 'datetime64' arr = np.array([n, datetime(2011, 1, 1)]) - self.assertEqual(lib.infer_dtype(arr), 'datetime') + assert lib.infer_dtype(arr) == 'datetime' arr = np.array([n, pd.Timestamp('2011-01-02'), n]) - self.assertEqual(lib.infer_dtype(arr), 'datetime') + assert lib.infer_dtype(arr) == 'datetime' arr = np.array([n, np.datetime64('2011-01-02'), n]) - self.assertEqual(lib.infer_dtype(arr), 'datetime64') + assert lib.infer_dtype(arr) == 'datetime64' arr = np.array([n, datetime(2011, 1, 1), n]) - self.assertEqual(lib.infer_dtype(arr), 'datetime') + assert lib.infer_dtype(arr) == 'datetime' # different type of nat arr = np.array([np.timedelta64('nat'), np.datetime64('2011-01-02')], dtype=object) - self.assertEqual(lib.infer_dtype(arr), 'mixed') + assert lib.infer_dtype(arr) == 'mixed' arr = np.array([np.datetime64('2011-01-02'), np.timedelta64('nat')], dtype=object) - self.assertEqual(lib.infer_dtype(arr), 'mixed') + assert lib.infer_dtype(arr) == 'mixed' # mixed datetime arr = np.array([datetime(2011, 1, 1), pd.Timestamp('2011-01-02')]) - self.assertEqual(lib.infer_dtype(arr), 'datetime') + assert lib.infer_dtype(arr) == 'datetime' # should be datetime? arr = np.array([np.datetime64('2011-01-01'), pd.Timestamp('2011-01-02')]) - self.assertEqual(lib.infer_dtype(arr), 'mixed') + assert lib.infer_dtype(arr) == 'mixed' arr = np.array([pd.Timestamp('2011-01-02'), np.datetime64('2011-01-01')]) - self.assertEqual(lib.infer_dtype(arr), 'mixed') + assert lib.infer_dtype(arr) == 'mixed' arr = np.array([np.nan, pd.Timestamp('2011-01-02'), 1]) - self.assertEqual(lib.infer_dtype(arr), 'mixed-integer') + assert lib.infer_dtype(arr) == 'mixed-integer' arr = np.array([np.nan, pd.Timestamp('2011-01-02'), 1.1]) - self.assertEqual(lib.infer_dtype(arr), 'mixed') + assert lib.infer_dtype(arr) == 'mixed' arr = np.array([np.nan, '2011-01-01', pd.Timestamp('2011-01-02')]) - self.assertEqual(lib.infer_dtype(arr), 'mixed') + assert lib.infer_dtype(arr) == 'mixed' def test_infer_dtype_timedelta(self): arr = np.array([pd.Timedelta('1 days'), pd.Timedelta('2 days')]) - self.assertEqual(lib.infer_dtype(arr), 'timedelta') + assert lib.infer_dtype(arr) == 'timedelta' arr = np.array([np.timedelta64(1, 'D'), np.timedelta64(2, 'D')], dtype=object) - self.assertEqual(lib.infer_dtype(arr), 'timedelta') + assert lib.infer_dtype(arr) == 'timedelta' arr = np.array([timedelta(1), timedelta(2)]) - self.assertEqual(lib.infer_dtype(arr), 'timedelta') + assert lib.infer_dtype(arr) == 'timedelta' # starts with nan for n in [pd.NaT, np.nan]: arr = np.array([n, Timedelta('1 days')]) - self.assertEqual(lib.infer_dtype(arr), 'timedelta') + assert lib.infer_dtype(arr) == 'timedelta' arr = np.array([n, np.timedelta64(1, 'D')]) - self.assertEqual(lib.infer_dtype(arr), 'timedelta') + assert lib.infer_dtype(arr) == 'timedelta' arr = np.array([n, timedelta(1)]) - self.assertEqual(lib.infer_dtype(arr), 'timedelta') + assert lib.infer_dtype(arr) == 'timedelta' arr = np.array([n, pd.Timedelta('1 days'), n]) - self.assertEqual(lib.infer_dtype(arr), 'timedelta') + assert lib.infer_dtype(arr) == 'timedelta' arr = np.array([n, np.timedelta64(1, 'D'), n]) - self.assertEqual(lib.infer_dtype(arr), 'timedelta') + assert lib.infer_dtype(arr) == 'timedelta' arr = np.array([n, timedelta(1), n]) - self.assertEqual(lib.infer_dtype(arr), 'timedelta') + assert lib.infer_dtype(arr) == 'timedelta' # different type of nat arr = np.array([np.datetime64('nat'), np.timedelta64(1, 'D')], dtype=object) - self.assertEqual(lib.infer_dtype(arr), 'mixed') + assert lib.infer_dtype(arr) == 'mixed' arr = np.array([np.timedelta64(1, 'D'), np.datetime64('nat')], dtype=object) - self.assertEqual(lib.infer_dtype(arr), 'mixed') + assert lib.infer_dtype(arr) == 'mixed' def test_infer_dtype_period(self): # GH 13664 arr = np.array([pd.Period('2011-01', freq='D'), pd.Period('2011-02', freq='D')]) - self.assertEqual(lib.infer_dtype(arr), 'period') + assert lib.infer_dtype(arr) == 'period' arr = np.array([pd.Period('2011-01', freq='D'), pd.Period('2011-02', freq='M')]) - self.assertEqual(lib.infer_dtype(arr), 'period') + assert lib.infer_dtype(arr) == 'period' # starts with nan for n in [pd.NaT, np.nan]: arr = np.array([n, pd.Period('2011-01', freq='D')]) - self.assertEqual(lib.infer_dtype(arr), 'period') + assert lib.infer_dtype(arr) == 'period' arr = np.array([n, pd.Period('2011-01', freq='D'), n]) - self.assertEqual(lib.infer_dtype(arr), 'period') + assert lib.infer_dtype(arr) == 'period' # different type of nat arr = np.array([np.datetime64('nat'), pd.Period('2011-01', freq='M')], dtype=object) - self.assertEqual(lib.infer_dtype(arr), 'mixed') + assert lib.infer_dtype(arr) == 'mixed' arr = np.array([pd.Period('2011-01', freq='M'), np.datetime64('nat')], dtype=object) - self.assertEqual(lib.infer_dtype(arr), 'mixed') + assert lib.infer_dtype(arr) == 'mixed' def test_infer_dtype_all_nan_nat_like(self): arr = np.array([np.nan, np.nan]) - self.assertEqual(lib.infer_dtype(arr), 'floating') + assert lib.infer_dtype(arr) == 'floating' # nan and None mix are result in mixed arr = np.array([np.nan, np.nan, None]) - self.assertEqual(lib.infer_dtype(arr), 'mixed') + assert lib.infer_dtype(arr) == 'mixed' arr = np.array([None, np.nan, np.nan]) - self.assertEqual(lib.infer_dtype(arr), 'mixed') + assert lib.infer_dtype(arr) == 'mixed' # pd.NaT arr = np.array([pd.NaT]) - self.assertEqual(lib.infer_dtype(arr), 'datetime') + assert lib.infer_dtype(arr) == 'datetime' arr = np.array([pd.NaT, np.nan]) - self.assertEqual(lib.infer_dtype(arr), 'datetime') + assert lib.infer_dtype(arr) == 'datetime' arr = np.array([np.nan, pd.NaT]) - self.assertEqual(lib.infer_dtype(arr), 'datetime') + assert lib.infer_dtype(arr) == 'datetime' arr = np.array([np.nan, pd.NaT, np.nan]) - self.assertEqual(lib.infer_dtype(arr), 'datetime') + assert lib.infer_dtype(arr) == 'datetime' arr = np.array([None, pd.NaT, None]) - self.assertEqual(lib.infer_dtype(arr), 'datetime') + assert lib.infer_dtype(arr) == 'datetime' # np.datetime64(nat) arr = np.array([np.datetime64('nat')]) - self.assertEqual(lib.infer_dtype(arr), 'datetime64') + assert lib.infer_dtype(arr) == 'datetime64' for n in [np.nan, pd.NaT, None]: arr = np.array([n, np.datetime64('nat'), n]) - self.assertEqual(lib.infer_dtype(arr), 'datetime64') + assert lib.infer_dtype(arr) == 'datetime64' arr = np.array([pd.NaT, n, np.datetime64('nat'), n]) - self.assertEqual(lib.infer_dtype(arr), 'datetime64') + assert lib.infer_dtype(arr) == 'datetime64' arr = np.array([np.timedelta64('nat')], dtype=object) - self.assertEqual(lib.infer_dtype(arr), 'timedelta') + assert lib.infer_dtype(arr) == 'timedelta' for n in [np.nan, pd.NaT, None]: arr = np.array([n, np.timedelta64('nat'), n]) - self.assertEqual(lib.infer_dtype(arr), 'timedelta') + assert lib.infer_dtype(arr) == 'timedelta' arr = np.array([pd.NaT, n, np.timedelta64('nat'), n]) - self.assertEqual(lib.infer_dtype(arr), 'timedelta') + assert lib.infer_dtype(arr) == 'timedelta' # datetime / timedelta mixed arr = np.array([pd.NaT, np.datetime64('nat'), np.timedelta64('nat'), np.nan]) - self.assertEqual(lib.infer_dtype(arr), 'mixed') + assert lib.infer_dtype(arr) == 'mixed' arr = np.array([np.timedelta64('nat'), np.datetime64('nat')], dtype=object) - self.assertEqual(lib.infer_dtype(arr), 'mixed') + assert lib.infer_dtype(arr) == 'mixed' def test_is_datetimelike_array_all_nan_nat_like(self): arr = np.array([np.nan, pd.NaT, np.datetime64('nat')]) @@ -706,7 +706,7 @@ def test_date(self): dates = [date(2012, 1, x) for x in range(1, 20)] index = Index(dates) - self.assertEqual(index.inferred_type, 'date') + assert index.inferred_type == 'date' def test_to_object_array_tuples(self): r = (5, 6) @@ -729,7 +729,7 @@ def test_object(self): # cannot infer more than this as only a single element arr = np.array([None], dtype='O') result = lib.infer_dtype(arr) - self.assertEqual(result, 'mixed') + assert result == 'mixed' def test_to_object_array_width(self): # see gh-13320 @@ -761,17 +761,17 @@ def test_categorical(self): from pandas import Categorical, Series arr = Categorical(list('abc')) result = lib.infer_dtype(arr) - self.assertEqual(result, 'categorical') + assert result == 'categorical' result = lib.infer_dtype(Series(arr)) - self.assertEqual(result, 'categorical') + assert result == 'categorical' arr = Categorical(list('abc'), categories=['cegfab'], ordered=True) result = lib.infer_dtype(arr) - self.assertEqual(result, 'categorical') + assert result == 'categorical' result = lib.infer_dtype(Series(arr)) - self.assertEqual(result, 'categorical') + assert result == 'categorical' class TestNumberScalar(tm.TestCase): diff --git a/pandas/tests/frame/test_alter_axes.py b/pandas/tests/frame/test_alter_axes.py index 303c8cb6e858a..34ab0b72f9b9a 100644 --- a/pandas/tests/frame/test_alter_axes.py +++ b/pandas/tests/frame/test_alter_axes.py @@ -69,7 +69,7 @@ def test_set_index2(self): assert_frame_equal(result, expected) assert_frame_equal(result_nodrop, expected_nodrop) - self.assertEqual(result.index.name, index.name) + assert result.index.name == index.name # inplace, single df2 = df.copy() @@ -97,7 +97,7 @@ def test_set_index2(self): assert_frame_equal(result, expected) assert_frame_equal(result_nodrop, expected_nodrop) - self.assertEqual(result.index.names, index.names) + assert result.index.names == index.names # inplace df2 = df.copy() @@ -127,7 +127,7 @@ def test_set_index2(self): # Series result = df.set_index(df.C) - self.assertEqual(result.index.name, 'C') + assert result.index.name == 'C' def test_set_index_nonuniq(self): df = DataFrame({'A': ['foo', 'foo', 'foo', 'bar', 'bar'], @@ -174,7 +174,7 @@ def test_construction_with_categorical_index(self): idf = df.set_index('B') str(idf) tm.assert_index_equal(idf.index, ci, check_names=False) - self.assertEqual(idf.index.name, 'B') + assert idf.index.name == 'B' # from a CategoricalIndex df = DataFrame({'A': np.random.randn(10), @@ -182,17 +182,17 @@ def test_construction_with_categorical_index(self): idf = df.set_index('B') str(idf) tm.assert_index_equal(idf.index, ci, check_names=False) - self.assertEqual(idf.index.name, 'B') + assert idf.index.name == 'B' idf = df.set_index('B').reset_index().set_index('B') str(idf) tm.assert_index_equal(idf.index, ci, check_names=False) - self.assertEqual(idf.index.name, 'B') + assert idf.index.name == 'B' new_df = idf.reset_index() new_df.index = df.B tm.assert_index_equal(new_df.index, ci, check_names=False) - self.assertEqual(idf.index.name, 'B') + assert idf.index.name == 'B' def test_set_index_cast_datetimeindex(self): df = DataFrame({'A': [datetime(2000, 1, 1) + timedelta(i) @@ -224,7 +224,7 @@ def test_set_index_cast_datetimeindex(self): df['B'] = i result = df['B'] assert_series_equal(result, expected, check_names=False) - self.assertEqual(result.name, 'B') + assert result.name == 'B' # keep the timezone result = i.to_series(keep_tz=True) @@ -241,7 +241,7 @@ def test_set_index_cast_datetimeindex(self): df['D'] = i.to_pydatetime() result = df['D'] assert_series_equal(result, expected, check_names=False) - self.assertEqual(result.name, 'D') + assert result.name == 'D' # GH 6785 # set the index manually @@ -279,9 +279,9 @@ def test_set_index_timezone(self): i = pd.to_datetime(["2014-01-01 10:10:10"], utc=True).tz_convert('Europe/Rome') df = DataFrame({'i': i}) - self.assertEqual(df.set_index(i).index[0].hour, 11) - self.assertEqual(pd.DatetimeIndex(pd.Series(df.i))[0].hour, 11) - self.assertEqual(df.set_index(df.i).index[0].hour, 11) + assert df.set_index(i).index[0].hour == 11 + assert pd.DatetimeIndex(pd.Series(df.i))[0].hour == 11 + assert df.set_index(df.i).index[0].hour == 11 def test_set_index_dst(self): di = pd.date_range('2006-10-29 00:00:00', periods=3, @@ -365,7 +365,7 @@ def test_dti_set_index_reindex(self): # TODO: unused? result = df.set_index(new_index) # noqa - self.assertEqual(new_index.freq, index.freq) + assert new_index.freq == index.freq # Renaming @@ -416,7 +416,7 @@ def test_rename(self): renamed = renamer.rename(index={'foo': 'bar', 'bar': 'foo'}) tm.assert_index_equal(renamed.index, pd.Index(['bar', 'foo'], name='name')) - self.assertEqual(renamed.index.name, renamer.index.name) + assert renamed.index.name == renamer.index.name def test_rename_multiindex(self): @@ -440,8 +440,8 @@ def test_rename_multiindex(self): names=['fizz', 'buzz']) tm.assert_index_equal(renamed.index, new_index) tm.assert_index_equal(renamed.columns, new_columns) - self.assertEqual(renamed.index.names, df.index.names) - self.assertEqual(renamed.columns.names, df.columns.names) + assert renamed.index.names == df.index.names + assert renamed.columns.names == df.columns.names # # with specifying a level (GH13766) @@ -609,7 +609,7 @@ def test_reset_index(self): # preserve column names self.frame.columns.name = 'columns' resetted = self.frame.reset_index() - self.assertEqual(resetted.columns.name, 'columns') + assert resetted.columns.name == 'columns' # only remove certain columns frame = self.frame.reset_index().set_index(['index', 'A', 'B']) @@ -649,10 +649,10 @@ def test_reset_index_right_dtype(self): df = DataFrame(s1) resetted = s1.reset_index() - self.assertEqual(resetted['time'].dtype, np.float64) + assert resetted['time'].dtype == np.float64 resetted = df.reset_index() - self.assertEqual(resetted['time'].dtype, np.float64) + assert resetted['time'].dtype == np.float64 def test_reset_index_multiindex_col(self): vals = np.random.randn(3, 3).astype(object) @@ -752,7 +752,7 @@ def test_set_index_names(self): df = pd.util.testing.makeDataFrame() df.index.name = 'name' - self.assertEqual(df.set_index(df.index).index.names, ['name']) + assert df.set_index(df.index).index.names == ['name'] mi = MultiIndex.from_arrays(df[['A', 'B']].T.values, names=['A', 'B']) mi2 = MultiIndex.from_arrays(df[['A', 'B', 'A', 'B']].T.values, @@ -760,7 +760,7 @@ def test_set_index_names(self): df = df.set_index(['A', 'B']) - self.assertEqual(df.set_index(df.index).index.names, ['A', 'B']) + assert df.set_index(df.index).index.names == ['A', 'B'] # Check that set_index isn't converting a MultiIndex into an Index assert isinstance(df.set_index(df.index).index, MultiIndex) diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index 8f46f055343d4..89ee096b4434e 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -83,8 +83,8 @@ def test_corr_nooverlap(self): rs = df.corr(meth) assert isnull(rs.loc['A', 'B']) assert isnull(rs.loc['B', 'A']) - self.assertEqual(rs.loc['A', 'A'], 1) - self.assertEqual(rs.loc['B', 'B'], 1) + assert rs.loc['A', 'A'] == 1 + assert rs.loc['B', 'B'] == 1 assert isnull(rs.loc['C', 'C']) def test_corr_constant(self): @@ -335,8 +335,8 @@ def test_describe_datetime_columns(self): '50%', '75%', 'max']) expected.columns = exp_columns tm.assert_frame_equal(result, expected) - self.assertEqual(result.columns.freq, 'MS') - self.assertEqual(result.columns.tz, expected.columns.tz) + assert result.columns.freq == 'MS' + assert result.columns.tz == expected.columns.tz def test_describe_timedelta_values(self): # GH 6145 @@ -373,7 +373,7 @@ def test_describe_timedelta_values(self): "50% 3 days 00:00:00 0 days 03:00:00\n" "75% 4 days 00:00:00 0 days 04:00:00\n" "max 5 days 00:00:00 0 days 05:00:00") - self.assertEqual(repr(res), exp_repr) + assert repr(res) == exp_repr def test_reduce_mixed_frame(self): # GH 6806 @@ -462,7 +462,7 @@ def test_stat_operators_attempt_obj_array(self): for df in [df1, df2]: for meth in methods: - self.assertEqual(df.values.dtype, np.object_) + assert df.values.dtype == np.object_ result = getattr(df, meth)(1) expected = getattr(df.astype('f8'), meth)(1) @@ -508,7 +508,7 @@ def test_cummin(self): # fix issue cummin_xs = self.tsframe.cummin(axis=1) - self.assertEqual(np.shape(cummin_xs), np.shape(self.tsframe)) + assert np.shape(cummin_xs) == np.shape(self.tsframe) def test_cummax(self): self.tsframe.loc[5:10, 0] = nan @@ -531,7 +531,7 @@ def test_cummax(self): # fix issue cummax_xs = self.tsframe.cummax(axis=1) - self.assertEqual(np.shape(cummax_xs), np.shape(self.tsframe)) + assert np.shape(cummax_xs) == np.shape(self.tsframe) def test_max(self): self._check_stat_op('max', np.max, check_dates=True) @@ -629,7 +629,7 @@ def test_cumsum(self): # fix issue cumsum_xs = self.tsframe.cumsum(axis=1) - self.assertEqual(np.shape(cumsum_xs), np.shape(self.tsframe)) + assert np.shape(cumsum_xs) == np.shape(self.tsframe) def test_cumprod(self): self.tsframe.loc[5:10, 0] = nan @@ -648,7 +648,7 @@ def test_cumprod(self): # fix issue cumprod_xs = self.tsframe.cumprod(axis=1) - self.assertEqual(np.shape(cumprod_xs), np.shape(self.tsframe)) + assert np.shape(cumprod_xs) == np.shape(self.tsframe) # ints df = self.tsframe.fillna(0).astype(int) @@ -711,7 +711,7 @@ def alt(x): kurt2 = df.kurt(level=0).xs('bar') tm.assert_series_equal(kurt, kurt2, check_names=False) assert kurt.name is None - self.assertEqual(kurt2.name, 'bar') + assert kurt2.name == 'bar' def _check_stat_op(self, name, alternative, frame=None, has_skipna=True, has_numeric_only=False, check_dtype=True, @@ -771,8 +771,8 @@ def wrapper(x): # check dtypes if check_dtype: lcd_dtype = frame.values.dtype - self.assertEqual(lcd_dtype, result0.dtype) - self.assertEqual(lcd_dtype, result1.dtype) + assert lcd_dtype == result0.dtype + assert lcd_dtype == result1.dtype # result = f(axis=1) # comp = frame.apply(alternative, axis=1).reindex(result.index) @@ -860,16 +860,16 @@ def test_operators_timedelta64(self): # min result = diffs.min() - self.assertEqual(result[0], diffs.loc[0, 'A']) - self.assertEqual(result[1], diffs.loc[0, 'B']) + assert result[0] == diffs.loc[0, 'A'] + assert result[1] == diffs.loc[0, 'B'] result = diffs.min(axis=1) assert (result == diffs.loc[0, 'B']).all() # max result = diffs.max() - self.assertEqual(result[0], diffs.loc[2, 'A']) - self.assertEqual(result[1], diffs.loc[2, 'B']) + assert result[0] == diffs.loc[2, 'A'] + assert result[1] == diffs.loc[2, 'B'] result = diffs.max(axis=1) assert (result == diffs['A']).all() @@ -920,7 +920,7 @@ def test_operators_timedelta64(self): df = DataFrame({'time': date_range('20130102', periods=5), 'time2': date_range('20130105', periods=5)}) df['off1'] = df['time2'] - df['time'] - self.assertEqual(df['off1'].dtype, 'timedelta64[ns]') + assert df['off1'].dtype == 'timedelta64[ns]' df['off2'] = df['time'] - df['time2'] df._consolidate_inplace() @@ -932,8 +932,8 @@ def test_sum_corner(self): axis1 = self.empty.sum(1) assert isinstance(axis0, Series) assert isinstance(axis1, Series) - self.assertEqual(len(axis0), 0) - self.assertEqual(len(axis1), 0) + assert len(axis0) == 0 + assert len(axis1) == 0 def test_sum_object(self): values = self.frame.values.astype(int) @@ -963,7 +963,7 @@ def test_mean_corner(self): # take mean of boolean column self.frame['bool'] = self.frame['A'] > 0 means = self.frame.mean(0) - self.assertEqual(means['bool'], self.frame['bool'].values.mean()) + assert means['bool'] == self.frame['bool'].values.mean() def test_stats_mixed_type(self): # don't blow up @@ -999,7 +999,7 @@ def test_cumsum_corner(self): def test_sum_bools(self): df = DataFrame(index=lrange(1), columns=lrange(10)) bools = isnull(df) - self.assertEqual(bools.sum(axis=1)[0], 10) + assert bools.sum(axis=1)[0] == 10 # Index of max / min @@ -1307,7 +1307,7 @@ def test_drop_duplicates(self): result = df.drop_duplicates('AAA', keep=False) expected = df.loc[[]] tm.assert_frame_equal(result, expected) - self.assertEqual(len(result), 0) + assert len(result) == 0 # multi column expected = df.loc[[0, 1, 2, 3]] @@ -1380,7 +1380,7 @@ def test_drop_duplicates(self): df = df.append([[1] + [0] * 8], ignore_index=True) for keep in ['first', 'last', False]: - self.assertEqual(df.duplicated(keep=keep).sum(), 0) + assert df.duplicated(keep=keep).sum() == 0 def test_drop_duplicates_for_take_all(self): df = DataFrame({'AAA': ['foo', 'bar', 'baz', 'bar', @@ -1435,7 +1435,7 @@ def test_drop_duplicates_tuple(self): result = df.drop_duplicates(('AA', 'AB'), keep=False) expected = df.loc[[]] # empty df - self.assertEqual(len(result), 0) + assert len(result) == 0 tm.assert_frame_equal(result, expected) # multi column @@ -1464,7 +1464,7 @@ def test_drop_duplicates_NA(self): result = df.drop_duplicates('A', keep=False) expected = df.loc[[]] # empty df tm.assert_frame_equal(result, expected) - self.assertEqual(len(result), 0) + assert len(result) == 0 # multi column result = df.drop_duplicates(['A', 'B']) @@ -1499,7 +1499,7 @@ def test_drop_duplicates_NA(self): result = df.drop_duplicates('C', keep=False) expected = df.loc[[]] # empty df tm.assert_frame_equal(result, expected) - self.assertEqual(len(result), 0) + assert len(result) == 0 # multi column result = df.drop_duplicates(['C', 'B']) @@ -1574,7 +1574,7 @@ def test_drop_duplicates_inplace(self): expected = orig.loc[[]] result = df tm.assert_frame_equal(result, expected) - self.assertEqual(len(df), 0) + assert len(df) == 0 # multi column df = orig.copy() @@ -1840,11 +1840,11 @@ def test_clip_against_series(self): result = clipped_df.loc[lb_mask, i] tm.assert_series_equal(result, lb[lb_mask], check_names=False) - self.assertEqual(result.name, i) + assert result.name == i result = clipped_df.loc[ub_mask, i] tm.assert_series_equal(result, ub[ub_mask], check_names=False) - self.assertEqual(result.name, i) + assert result.name == i tm.assert_series_equal(clipped_df.loc[mask, i], df.loc[mask, i]) diff --git a/pandas/tests/frame/test_api.py b/pandas/tests/frame/test_api.py index 6b1e9d66d2071..d2a1e32f015b2 100644 --- a/pandas/tests/frame/test_api.py +++ b/pandas/tests/frame/test_api.py @@ -41,16 +41,16 @@ def test_copy_index_name_checking(self): def test_getitem_pop_assign_name(self): s = self.frame['A'] - self.assertEqual(s.name, 'A') + assert s.name == 'A' s = self.frame.pop('A') - self.assertEqual(s.name, 'A') + assert s.name == 'A' s = self.frame.loc[:, 'B'] - self.assertEqual(s.name, 'B') + assert s.name == 'B' s2 = s.loc[:] - self.assertEqual(s2.name, 'B') + assert s2.name == 'B' def test_get_value(self): for idx in self.frame.index: @@ -75,17 +75,17 @@ class TestDataFrameMisc(tm.TestCase, SharedWithSparse, TestData): def test_get_axis(self): f = self.frame - self.assertEqual(f._get_axis_number(0), 0) - self.assertEqual(f._get_axis_number(1), 1) - self.assertEqual(f._get_axis_number('index'), 0) - self.assertEqual(f._get_axis_number('rows'), 0) - self.assertEqual(f._get_axis_number('columns'), 1) - - self.assertEqual(f._get_axis_name(0), 'index') - self.assertEqual(f._get_axis_name(1), 'columns') - self.assertEqual(f._get_axis_name('index'), 'index') - self.assertEqual(f._get_axis_name('rows'), 'index') - self.assertEqual(f._get_axis_name('columns'), 'columns') + assert f._get_axis_number(0) == 0 + assert f._get_axis_number(1) == 1 + assert f._get_axis_number('index') == 0 + assert f._get_axis_number('rows') == 0 + assert f._get_axis_number('columns') == 1 + + assert f._get_axis_name(0) == 'index' + assert f._get_axis_name(1) == 'columns' + assert f._get_axis_name('index') == 'index' + assert f._get_axis_name('rows') == 'index' + assert f._get_axis_name('columns') == 'columns' assert f._get_axis(0) is f.index assert f._get_axis(1) is f.columns @@ -154,7 +154,7 @@ def test_nonzero(self): def test_iteritems(self): df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=['a', 'a', 'b']) for k, v in compat.iteritems(df): - self.assertEqual(type(v), Series) + assert type(v) == Series def test_iter(self): assert tm.equalContents(list(self.frame), self.frame.columns) @@ -183,27 +183,25 @@ def test_itertuples(self): df = DataFrame(data={"a": [1, 2, 3], "b": [4, 5, 6]}) dfaa = df[['a', 'a']] - self.assertEqual(list(dfaa.itertuples()), [ - (0, 1, 1), (1, 2, 2), (2, 3, 3)]) - self.assertEqual(repr(list(df.itertuples(name=None))), - '[(0, 1, 4), (1, 2, 5), (2, 3, 6)]') + assert (list(dfaa.itertuples()) == + [(0, 1, 1), (1, 2, 2), (2, 3, 3)]) + assert (repr(list(df.itertuples(name=None))) == + '[(0, 1, 4), (1, 2, 5), (2, 3, 6)]') tup = next(df.itertuples(name='TestName')) - # no support for field renaming in Python 2.6, regular tuples are - # returned if sys.version >= LooseVersion('2.7'): - self.assertEqual(tup._fields, ('Index', 'a', 'b')) - self.assertEqual((tup.Index, tup.a, tup.b), tup) - self.assertEqual(type(tup).__name__, 'TestName') + assert tup._fields == ('Index', 'a', 'b') + assert (tup.Index, tup.a, tup.b) == tup + assert type(tup).__name__ == 'TestName' df.columns = ['def', 'return'] tup2 = next(df.itertuples(name='TestName')) - self.assertEqual(tup2, (0, 1, 4)) + assert tup2 == (0, 1, 4) if sys.version >= LooseVersion('2.7'): - self.assertEqual(tup2._fields, ('Index', '_1', '_2')) + assert tup2._fields == ('Index', '_1', '_2') df3 = DataFrame(dict(('f' + str(i), [i]) for i in range(1024))) # will raise SyntaxError if trying to create namedtuple @@ -212,7 +210,7 @@ def test_itertuples(self): assert isinstance(tup3, tuple) def test_len(self): - self.assertEqual(len(self.frame), len(self.frame.index)) + assert len(self.frame) == len(self.frame.index) def test_as_matrix(self): frame = self.frame @@ -225,15 +223,15 @@ def test_as_matrix(self): if np.isnan(value): assert np.isnan(frame[col][i]) else: - self.assertEqual(value, frame[col][i]) + assert value == frame[col][i] # mixed type mat = self.mixed_frame.as_matrix(['foo', 'A']) - self.assertEqual(mat[0, 0], 'bar') + assert mat[0, 0] == 'bar' df = DataFrame({'real': [1, 2, 3], 'complex': [1j, 2j, 3j]}) mat = df.as_matrix() - self.assertEqual(mat[0, 0], 1j) + assert mat[0, 0] == 1j # single block corner case mat = self.frame.as_matrix(['A', 'B']) @@ -262,7 +260,7 @@ def test_transpose(self): if np.isnan(value): assert np.isnan(frame[col][idx]) else: - self.assertEqual(value, frame[col][idx]) + assert value == frame[col][idx] # mixed type index, data = tm.getMixedTypeDict() @@ -270,7 +268,7 @@ def test_transpose(self): mixed_T = mixed.T for col, s in compat.iteritems(mixed_T): - self.assertEqual(s.dtype, np.object_) + assert s.dtype == np.object_ def test_transpose_get_view(self): dft = self.frame.T @@ -299,23 +297,23 @@ def test_axis_aliases(self): def test_more_asMatrix(self): values = self.mixed_frame.as_matrix() - self.assertEqual(values.shape[1], len(self.mixed_frame.columns)) + assert values.shape[1] == len(self.mixed_frame.columns) def test_repr_with_mi_nat(self): df = DataFrame({'X': [1, 2]}, index=[[pd.NaT, pd.Timestamp('20130101')], ['a', 'b']]) res = repr(df) exp = ' X\nNaT a 1\n2013-01-01 b 2' - self.assertEqual(res, exp) + assert res == exp def test_iteritems_names(self): for k, v in compat.iteritems(self.mixed_frame): - self.assertEqual(v.name, k) + assert v.name == k def test_series_put_names(self): series = self.mixed_frame._series for k, v in compat.iteritems(series): - self.assertEqual(v.name, k) + assert v.name == k def test_empty_nonzero(self): df = DataFrame([1, 2, 3]) diff --git a/pandas/tests/frame/test_apply.py b/pandas/tests/frame/test_apply.py index 0bccca5cecb27..5febe8c62abe8 100644 --- a/pandas/tests/frame/test_apply.py +++ b/pandas/tests/frame/test_apply.py @@ -97,7 +97,7 @@ def test_apply_empty(self): [], index=pd.Index([], dtype=object))) # Ensure that x.append hasn't been called - self.assertEqual(x, []) + assert x == [] def test_apply_standard_nonunique(self): df = DataFrame( @@ -150,7 +150,7 @@ def test_apply_raw(self): def test_apply_axis1(self): d = self.frame.index[0] tapplied = self.frame.apply(np.mean, axis=1) - self.assertEqual(tapplied[d], np.mean(self.frame.xs(d))) + assert tapplied[d] == np.mean(self.frame.xs(d)) def test_apply_ignore_failures(self): result = self.mixed_frame._apply_standard(np.mean, 0, @@ -284,12 +284,11 @@ def transform2(row): return row try: - transformed = data.apply(transform, axis=1) # noqa + data.apply(transform, axis=1) except AttributeError as e: - self.assertEqual(len(e.args), 2) - self.assertEqual(e.args[1], 'occurred at index 4') - self.assertEqual( - e.args[0], "'float' object has no attribute 'startswith'") + assert len(e.args) == 2 + assert e.args[1] == 'occurred at index 4' + assert e.args[0] == "'float' object has no attribute 'startswith'" def test_apply_bug(self): @@ -383,23 +382,23 @@ def test_apply_dict(self): def test_applymap(self): applied = self.frame.applymap(lambda x: x * 2) - assert_frame_equal(applied, self.frame * 2) - result = self.frame.applymap(type) + tm.assert_frame_equal(applied, self.frame * 2) + self.frame.applymap(type) - # GH #465, function returning tuples + # gh-465: function returning tuples result = self.frame.applymap(lambda x: (x, x)) assert isinstance(result['A'][0], tuple) - # GH 2909, object conversion to float in constructor? + # gh-2909: object conversion to float in constructor? df = DataFrame(data=[1, 'a']) result = df.applymap(lambda x: x) - self.assertEqual(result.dtypes[0], object) + assert result.dtypes[0] == object df = DataFrame(data=[1., 'a']) result = df.applymap(lambda x: x) - self.assertEqual(result.dtypes[0], object) + assert result.dtypes[0] == object - # GH2786 + # see gh-2786 df = DataFrame(np.random.random((3, 4))) df2 = df.copy() cols = ['a', 'a', 'a', 'a'] @@ -408,16 +407,16 @@ def test_applymap(self): expected = df2.applymap(str) expected.columns = cols result = df.applymap(str) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) # datetime/timedelta df['datetime'] = Timestamp('20130101') df['timedelta'] = pd.Timedelta('1 min') result = df.applymap(str) for f in ['datetime', 'timedelta']: - self.assertEqual(result.loc[0, f], str(df.loc[0, f])) + assert result.loc[0, f] == str(df.loc[0, f]) - # GH 8222 + # see gh-8222 empty_frames = [pd.DataFrame(), pd.DataFrame(columns=list('ABC')), pd.DataFrame(index=list('ABC')), diff --git a/pandas/tests/frame/test_axis_select_reindex.py b/pandas/tests/frame/test_axis_select_reindex.py index 2c285c6261415..a563b678a3786 100644 --- a/pandas/tests/frame/test_axis_select_reindex.py +++ b/pandas/tests/frame/test_axis_select_reindex.py @@ -37,9 +37,9 @@ def test_drop_names(self): df_inplace_b.drop('b', inplace=True) df_inplace_e.drop('e', axis=1, inplace=True) for obj in (df_dropped_b, df_dropped_e, df_inplace_b, df_inplace_e): - self.assertEqual(obj.index.name, 'first') - self.assertEqual(obj.columns.name, 'second') - self.assertEqual(list(df.columns), ['d', 'e', 'f']) + assert obj.index.name == 'first' + assert obj.columns.name == 'second' + assert list(df.columns) == ['d', 'e', 'f'] pytest.raises(ValueError, df.drop, ['g']) pytest.raises(ValueError, df.drop, ['g'], 1) @@ -174,14 +174,14 @@ def test_reindex(self): if np.isnan(val): assert np.isnan(self.frame[col][idx]) else: - self.assertEqual(val, self.frame[col][idx]) + assert val == self.frame[col][idx] else: assert np.isnan(val) for col, series in compat.iteritems(newFrame): assert tm.equalContents(series.index, newFrame.index) emptyFrame = self.frame.reindex(Index([])) - self.assertEqual(len(emptyFrame.index), 0) + assert len(emptyFrame.index) == 0 # Cython code should be unit-tested directly nonContigFrame = self.frame.reindex(self.ts1.index[::2]) @@ -192,7 +192,7 @@ def test_reindex(self): if np.isnan(val): assert np.isnan(self.frame[col][idx]) else: - self.assertEqual(val, self.frame[col][idx]) + assert val == self.frame[col][idx] else: assert np.isnan(val) @@ -208,13 +208,13 @@ def test_reindex(self): # length zero newFrame = self.frame.reindex([]) assert newFrame.empty - self.assertEqual(len(newFrame.columns), len(self.frame.columns)) + assert len(newFrame.columns) == len(self.frame.columns) # length zero with columns reindexed with non-empty index newFrame = self.frame.reindex([]) newFrame = newFrame.reindex(self.frame.index) - self.assertEqual(len(newFrame.index), len(self.frame.index)) - self.assertEqual(len(newFrame.columns), len(self.frame.columns)) + assert len(newFrame.index) == len(self.frame.index) + assert len(newFrame.columns) == len(self.frame.columns) # pass non-Index newFrame = self.frame.reindex(list(self.ts1.index)) @@ -255,27 +255,27 @@ def test_reindex_name_remains(self): i = Series(np.arange(10), name='iname') df = df.reindex(i) - self.assertEqual(df.index.name, 'iname') + assert df.index.name == 'iname' df = df.reindex(Index(np.arange(10), name='tmpname')) - self.assertEqual(df.index.name, 'tmpname') + assert df.index.name == 'tmpname' s = Series(random.rand(10)) df = DataFrame(s.T, index=np.arange(len(s))) i = Series(np.arange(10), name='iname') df = df.reindex(columns=i) - self.assertEqual(df.columns.name, 'iname') + assert df.columns.name == 'iname' def test_reindex_int(self): smaller = self.intframe.reindex(self.intframe.index[::2]) - self.assertEqual(smaller['A'].dtype, np.int64) + assert smaller['A'].dtype == np.int64 bigger = smaller.reindex(self.intframe.index) - self.assertEqual(bigger['A'].dtype, np.float64) + assert bigger['A'].dtype == np.float64 smaller = self.intframe.reindex(columns=['A', 'B']) - self.assertEqual(smaller['A'].dtype, np.int64) + assert smaller['A'].dtype == np.int64 def test_reindex_like(self): other = self.frame.reindex(index=self.frame.index[:10], @@ -346,8 +346,8 @@ def test_reindex_axes(self): both_freq = df.reindex(index=time_freq, columns=some_cols).index.freq seq_freq = df.reindex(index=time_freq).reindex( columns=some_cols).index.freq - self.assertEqual(index_freq, both_freq) - self.assertEqual(index_freq, seq_freq) + assert index_freq == both_freq + assert index_freq == seq_freq def test_reindex_fill_value(self): df = DataFrame(np.random.randn(10, 4)) @@ -732,7 +732,7 @@ def test_filter_regex_search(self): # regex filtered = fcopy.filter(regex='[A]+') - self.assertEqual(len(filtered.columns), 2) + assert len(filtered.columns) == 2 assert 'AA' in filtered # doesn't have to be at beginning @@ -845,11 +845,11 @@ def test_reindex_boolean(self): columns=[0, 2]) reindexed = frame.reindex(np.arange(10)) - self.assertEqual(reindexed.values.dtype, np.object_) + assert reindexed.values.dtype == np.object_ assert isnull(reindexed[0][1]) reindexed = frame.reindex(columns=lrange(3)) - self.assertEqual(reindexed.values.dtype, np.object_) + assert reindexed.values.dtype == np.object_ assert isnull(reindexed[1]).all() def test_reindex_objects(self): @@ -867,7 +867,7 @@ def test_reindex_corner(self): # ints are weird smaller = self.intframe.reindex(columns=['A', 'B', 'E']) - self.assertEqual(smaller['E'].dtype, np.float64) + assert smaller['E'].dtype == np.float64 def test_reindex_axis(self): cols = ['A', 'B', 'E'] diff --git a/pandas/tests/frame/test_block_internals.py b/pandas/tests/frame/test_block_internals.py index 2a319348aca3f..44dc6df756f3d 100644 --- a/pandas/tests/frame/test_block_internals.py +++ b/pandas/tests/frame/test_block_internals.py @@ -95,47 +95,47 @@ def test_as_matrix_numeric_cols(self): self.frame['foo'] = 'bar' values = self.frame.as_matrix(['A', 'B', 'C', 'D']) - self.assertEqual(values.dtype, np.float64) + assert values.dtype == np.float64 def test_as_matrix_lcd(self): # mixed lcd values = self.mixed_float.as_matrix(['A', 'B', 'C', 'D']) - self.assertEqual(values.dtype, np.float64) + assert values.dtype == np.float64 values = self.mixed_float.as_matrix(['A', 'B', 'C']) - self.assertEqual(values.dtype, np.float32) + assert values.dtype == np.float32 values = self.mixed_float.as_matrix(['C']) - self.assertEqual(values.dtype, np.float16) + assert values.dtype == np.float16 # GH 10364 # B uint64 forces float because there are other signed int types values = self.mixed_int.as_matrix(['A', 'B', 'C', 'D']) - self.assertEqual(values.dtype, np.float64) + assert values.dtype == np.float64 values = self.mixed_int.as_matrix(['A', 'D']) - self.assertEqual(values.dtype, np.int64) + assert values.dtype == np.int64 # B uint64 forces float because there are other signed int types values = self.mixed_int.as_matrix(['A', 'B', 'C']) - self.assertEqual(values.dtype, np.float64) + assert values.dtype == np.float64 # as B and C are both unsigned, no forcing to float is needed values = self.mixed_int.as_matrix(['B', 'C']) - self.assertEqual(values.dtype, np.uint64) + assert values.dtype == np.uint64 values = self.mixed_int.as_matrix(['A', 'C']) - self.assertEqual(values.dtype, np.int32) + assert values.dtype == np.int32 values = self.mixed_int.as_matrix(['C', 'D']) - self.assertEqual(values.dtype, np.int64) + assert values.dtype == np.int64 values = self.mixed_int.as_matrix(['A']) - self.assertEqual(values.dtype, np.int32) + assert values.dtype == np.int32 values = self.mixed_int.as_matrix(['C']) - self.assertEqual(values.dtype, np.uint8) + assert values.dtype == np.uint8 def test_constructor_with_convert(self): # this is actually mostly a test of lib.maybe_convert_objects @@ -220,8 +220,8 @@ def test_construction_with_mixed(self): # mixed-type frames self.mixed_frame['datetime'] = datetime.now() self.mixed_frame['timedelta'] = timedelta(days=1, seconds=1) - self.assertEqual(self.mixed_frame['datetime'].dtype, 'M8[ns]') - self.assertEqual(self.mixed_frame['timedelta'].dtype, 'm8[ns]') + assert self.mixed_frame['datetime'].dtype == 'M8[ns]' + assert self.mixed_frame['timedelta'].dtype == 'm8[ns]' result = self.mixed_frame.get_dtype_counts().sort_values() expected = Series({'float64': 4, 'object': 1, @@ -452,7 +452,7 @@ def test_convert_objects(self): oops = self.mixed_frame.T.T converted = oops._convert(datetime=True) assert_frame_equal(converted, self.mixed_frame) - self.assertEqual(converted['A'].dtype, np.float64) + assert converted['A'].dtype == np.float64 # force numeric conversion self.mixed_frame['H'] = '1.' @@ -464,19 +464,19 @@ def test_convert_objects(self): self.mixed_frame['K'] = '1' self.mixed_frame.loc[0:5, ['J', 'K']] = 'garbled' converted = self.mixed_frame._convert(datetime=True, numeric=True) - self.assertEqual(converted['H'].dtype, 'float64') - self.assertEqual(converted['I'].dtype, 'int64') - self.assertEqual(converted['J'].dtype, 'float64') - self.assertEqual(converted['K'].dtype, 'float64') - self.assertEqual(len(converted['J'].dropna()), l - 5) - self.assertEqual(len(converted['K'].dropna()), l - 5) + assert converted['H'].dtype == 'float64' + assert converted['I'].dtype == 'int64' + assert converted['J'].dtype == 'float64' + assert converted['K'].dtype == 'float64' + assert len(converted['J'].dropna()) == l - 5 + assert len(converted['K'].dropna()) == l - 5 # via astype converted = self.mixed_frame.copy() converted['H'] = converted['H'].astype('float64') converted['I'] = converted['I'].astype('int64') - self.assertEqual(converted['H'].dtype, 'float64') - self.assertEqual(converted['I'].dtype, 'int64') + assert converted['H'].dtype == 'float64' + assert converted['I'].dtype == 'int64' # via astype, but errors converted = self.mixed_frame.copy() diff --git a/pandas/tests/frame/test_combine_concat.py b/pandas/tests/frame/test_combine_concat.py index 5452792def1ac..44f17faabe20d 100644 --- a/pandas/tests/frame/test_combine_concat.py +++ b/pandas/tests/frame/test_combine_concat.py @@ -303,7 +303,7 @@ def test_join_str_datetime(self): tst = A.join(C, on='aa') - self.assertEqual(len(tst.columns), 3) + assert len(tst.columns) == 3 def test_join_multiindex_leftright(self): # GH 10741 @@ -538,7 +538,7 @@ def test_combine_first_mixed_bug(self): "col5": ser3}) combined = frame1.combine_first(frame2) - self.assertEqual(len(combined.columns), 5) + assert len(combined.columns) == 5 # gh 3016 (same as in update) df = DataFrame([[1., 2., False, True], [4., 5., True, False]], @@ -603,28 +603,28 @@ def test_combine_first_align_nan(self): dfa = pd.DataFrame([[pd.Timestamp('2011-01-01'), 2]], columns=['a', 'b']) dfb = pd.DataFrame([[4], [5]], columns=['b']) - self.assertEqual(dfa['a'].dtype, 'datetime64[ns]') - self.assertEqual(dfa['b'].dtype, 'int64') + assert dfa['a'].dtype == 'datetime64[ns]' + assert dfa['b'].dtype == 'int64' res = dfa.combine_first(dfb) exp = pd.DataFrame({'a': [pd.Timestamp('2011-01-01'), pd.NaT], 'b': [2., 5.]}, columns=['a', 'b']) tm.assert_frame_equal(res, exp) - self.assertEqual(res['a'].dtype, 'datetime64[ns]') + assert res['a'].dtype == 'datetime64[ns]' # ToDo: this must be int64 - self.assertEqual(res['b'].dtype, 'float64') + assert res['b'].dtype == 'float64' res = dfa.iloc[:0].combine_first(dfb) exp = pd.DataFrame({'a': [np.nan, np.nan], 'b': [4, 5]}, columns=['a', 'b']) tm.assert_frame_equal(res, exp) # ToDo: this must be datetime64 - self.assertEqual(res['a'].dtype, 'float64') + assert res['a'].dtype == 'float64' # ToDo: this must be int64 - self.assertEqual(res['b'].dtype, 'int64') + assert res['b'].dtype == 'int64' def test_combine_first_timezone(self): - # GH 7630 + # see gh-7630 data1 = pd.to_datetime('20100101 01:01').tz_localize('UTC') df1 = pd.DataFrame(columns=['UTCdatetime', 'abc'], data=data1, @@ -644,10 +644,10 @@ def test_combine_first_timezone(self): index=pd.date_range('20140627', periods=2, freq='D')) tm.assert_frame_equal(res, exp) - self.assertEqual(res['UTCdatetime'].dtype, 'datetime64[ns, UTC]') - self.assertEqual(res['abc'].dtype, 'datetime64[ns, UTC]') + assert res['UTCdatetime'].dtype == 'datetime64[ns, UTC]' + assert res['abc'].dtype == 'datetime64[ns, UTC]' - # GH 10567 + # see gh-10567 dts1 = pd.date_range('2015-01-01', '2015-01-05', tz='UTC') df1 = pd.DataFrame({'DATE': dts1}) dts2 = pd.date_range('2015-01-03', '2015-01-05', tz='UTC') @@ -655,7 +655,7 @@ def test_combine_first_timezone(self): res = df1.combine_first(df2) tm.assert_frame_equal(res, df1) - self.assertEqual(res['DATE'].dtype, 'datetime64[ns, UTC]') + assert res['DATE'].dtype == 'datetime64[ns, UTC]' dts1 = pd.DatetimeIndex(['2011-01-01', 'NaT', '2011-01-03', '2011-01-04'], tz='US/Eastern') @@ -680,7 +680,7 @@ def test_combine_first_timezone(self): # if df1 doesn't have NaN, keep its dtype res = df1.combine_first(df2) tm.assert_frame_equal(res, df1) - self.assertEqual(res['DATE'].dtype, 'datetime64[ns, US/Eastern]') + assert res['DATE'].dtype == 'datetime64[ns, US/Eastern]' dts1 = pd.date_range('2015-01-01', '2015-01-02', tz='US/Eastern') df1 = pd.DataFrame({'DATE': dts1}) @@ -693,7 +693,7 @@ def test_combine_first_timezone(self): pd.Timestamp('2015-01-03')] exp = pd.DataFrame({'DATE': exp_dts}) tm.assert_frame_equal(res, exp) - self.assertEqual(res['DATE'].dtype, 'object') + assert res['DATE'].dtype == 'object' def test_combine_first_timedelta(self): data1 = pd.TimedeltaIndex(['1 day', 'NaT', '3 day', '4day']) @@ -706,7 +706,7 @@ def test_combine_first_timedelta(self): '11 day', '3 day', '4 day']) exp = pd.DataFrame({'TD': exp_dts}, index=[1, 2, 3, 4, 5, 7]) tm.assert_frame_equal(res, exp) - self.assertEqual(res['TD'].dtype, 'timedelta64[ns]') + assert res['TD'].dtype == 'timedelta64[ns]' def test_combine_first_period(self): data1 = pd.PeriodIndex(['2011-01', 'NaT', '2011-03', @@ -722,7 +722,7 @@ def test_combine_first_period(self): freq='M') exp = pd.DataFrame({'P': exp_dts}, index=[1, 2, 3, 4, 5, 7]) tm.assert_frame_equal(res, exp) - self.assertEqual(res['P'].dtype, 'object') + assert res['P'].dtype == 'object' # different freq dts2 = pd.PeriodIndex(['2012-01-01', '2012-01-02', @@ -738,7 +738,7 @@ def test_combine_first_period(self): pd.Period('2011-04', freq='M')] exp = pd.DataFrame({'P': exp_dts}, index=[1, 2, 3, 4, 5, 7]) tm.assert_frame_equal(res, exp) - self.assertEqual(res['P'].dtype, 'object') + assert res['P'].dtype == 'object' def test_combine_first_int(self): # GH14687 - integer series that do no align exactly @@ -748,7 +748,7 @@ def test_combine_first_int(self): res = df1.combine_first(df2) tm.assert_frame_equal(res, df1) - self.assertEqual(res['a'].dtype, 'int64') + assert res['a'].dtype == 'int64' def test_concat_datetime_datetime64_frame(self): # #2624 diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 588182eb30336..5b00ddc51da46 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -36,10 +36,10 @@ class TestDataFrameConstructors(tm.TestCase, TestData): def test_constructor(self): df = DataFrame() - self.assertEqual(len(df.index), 0) + assert len(df.index) == 0 df = DataFrame(data={}) - self.assertEqual(len(df.index), 0) + assert len(df.index) == 0 def test_constructor_mixed(self): index, data = tm.getMixedTypeDict() @@ -48,11 +48,11 @@ def test_constructor_mixed(self): indexed_frame = DataFrame(data, index=index) # noqa unindexed_frame = DataFrame(data) # noqa - self.assertEqual(self.mixed_frame['foo'].dtype, np.object_) + assert self.mixed_frame['foo'].dtype == np.object_ def test_constructor_cast_failure(self): foo = DataFrame({'a': ['a', 'b', 'c']}, dtype=np.float64) - self.assertEqual(foo['a'].dtype, object) + assert foo['a'].dtype == object # GH 3010, constructing with odd arrays df = DataFrame(np.ones((4, 2))) @@ -76,29 +76,28 @@ def test_constructor_dtype_copy(self): new_df = pd.DataFrame(orig_df, dtype=float, copy=True) new_df['col1'] = 200. - self.assertEqual(orig_df['col1'][0], 1.) + assert orig_df['col1'][0] == 1. def test_constructor_dtype_nocast_view(self): df = DataFrame([[1, 2]]) should_be_view = DataFrame(df, dtype=df[0].dtype) should_be_view[0][0] = 99 - self.assertEqual(df.values[0, 0], 99) + assert df.values[0, 0] == 99 should_be_view = DataFrame(df.values, dtype=df[0].dtype) should_be_view[0][0] = 97 - self.assertEqual(df.values[0, 0], 97) + assert df.values[0, 0] == 97 def test_constructor_dtype_list_data(self): df = DataFrame([[1, '2'], [None, 'a']], dtype=object) assert df.loc[1, 0] is None - self.assertEqual(df.loc[0, 1], '2') + assert df.loc[0, 1] == '2' def test_constructor_list_frames(self): - - # GH 3243 + # see gh-3243 result = DataFrame([DataFrame([])]) - self.assertEqual(result.shape, (1, 0)) + assert result.shape == (1, 0) result = DataFrame([DataFrame(dict(A=lrange(5)))]) assert isinstance(result.iloc[0, 0], DataFrame) @@ -149,8 +148,8 @@ def test_constructor_complex_dtypes(self): b = np.random.rand(10).astype(np.complex128) df = DataFrame({'a': a, 'b': b}) - self.assertEqual(a.dtype, df.a.dtype) - self.assertEqual(b.dtype, df.b.dtype) + assert a.dtype == df.a.dtype + assert b.dtype == df.b.dtype def test_constructor_rec(self): rec = self.frame.to_records(index=False) @@ -175,7 +174,7 @@ def test_constructor_rec(self): def test_constructor_bool(self): df = DataFrame({0: np.ones(10, dtype=bool), 1: np.zeros(10, dtype=bool)}) - self.assertEqual(df.values.dtype, np.bool_) + assert df.values.dtype == np.bool_ def test_constructor_overflow_int64(self): # see gh-14881 @@ -183,7 +182,7 @@ def test_constructor_overflow_int64(self): dtype=np.uint64) result = DataFrame({'a': values}) - self.assertEqual(result['a'].dtype, np.uint64) + assert result['a'].dtype == np.uint64 # see gh-2355 data_scores = [(6311132704823138710, 273), (2685045978526272070, 23), @@ -194,7 +193,7 @@ def test_constructor_overflow_int64(self): data = np.zeros((len(data_scores),), dtype=dtype) data[:] = data_scores df_crawls = DataFrame(data) - self.assertEqual(df_crawls['uid'].dtype, np.uint64) + assert df_crawls['uid'].dtype == np.uint64 def test_constructor_ordereddict(self): import random @@ -203,7 +202,7 @@ def test_constructor_ordereddict(self): random.shuffle(nums) expected = ['A%d' % i for i in nums] df = DataFrame(OrderedDict(zip(expected, [[0]] * nitems))) - self.assertEqual(expected, list(df.columns)) + assert expected == list(df.columns) def test_constructor_dict(self): frame = DataFrame({'col1': self.ts1, @@ -378,14 +377,14 @@ def test_constructor_dict_cast(self): 'B': {'1': '1', '2': '2', '3': '3'}, } frame = DataFrame(test_data, dtype=float) - self.assertEqual(len(frame), 3) - self.assertEqual(frame['B'].dtype, np.float64) - self.assertEqual(frame['A'].dtype, np.float64) + assert len(frame) == 3 + assert frame['B'].dtype == np.float64 + assert frame['A'].dtype == np.float64 frame = DataFrame(test_data) - self.assertEqual(len(frame), 3) - self.assertEqual(frame['B'].dtype, np.object_) - self.assertEqual(frame['A'].dtype, np.float64) + assert len(frame) == 3 + assert frame['B'].dtype == np.object_ + assert frame['A'].dtype == np.float64 # can't cast to float test_data = { @@ -393,9 +392,9 @@ def test_constructor_dict_cast(self): 'B': dict(zip(range(15), randn(15))) } frame = DataFrame(test_data, dtype=float) - self.assertEqual(len(frame), 20) - self.assertEqual(frame['A'].dtype, np.object_) - self.assertEqual(frame['B'].dtype, np.float64) + assert len(frame) == 20 + assert frame['A'].dtype == np.object_ + assert frame['B'].dtype == np.float64 def test_constructor_dict_dont_upcast(self): d = {'Col1': {'Row1': 'A String', 'Row2': np.nan}} @@ -494,14 +493,14 @@ def test_constructor_period(self): a = pd.PeriodIndex(['2012-01', 'NaT', '2012-04'], freq='M') b = pd.PeriodIndex(['2012-02-01', '2012-03-01', 'NaT'], freq='D') df = pd.DataFrame({'a': a, 'b': b}) - self.assertEqual(df['a'].dtype, 'object') - self.assertEqual(df['b'].dtype, 'object') + assert df['a'].dtype == 'object' + assert df['b'].dtype == 'object' # list of periods df = pd.DataFrame({'a': a.asobject.tolist(), 'b': b.asobject.tolist()}) - self.assertEqual(df['a'].dtype, 'object') - self.assertEqual(df['b'].dtype, 'object') + assert df['a'].dtype == 'object' + assert df['b'].dtype == 'object' def test_nested_dict_frame_constructor(self): rng = pd.period_range('1/1/2000', periods=5) @@ -530,18 +529,18 @@ def _check_basic_constructor(self, empty): # 2-D input frame = DataFrame(mat, columns=['A', 'B', 'C'], index=[1, 2]) - self.assertEqual(len(frame.index), 2) - self.assertEqual(len(frame.columns), 3) + assert len(frame.index) == 2 + assert len(frame.columns) == 3 # 1-D input frame = DataFrame(empty((3,)), columns=['A'], index=[1, 2, 3]) - self.assertEqual(len(frame.index), 3) - self.assertEqual(len(frame.columns), 1) + assert len(frame.index) == 3 + assert len(frame.columns) == 1 # cast type frame = DataFrame(mat, columns=['A', 'B', 'C'], index=[1, 2], dtype=np.int64) - self.assertEqual(frame.values.dtype, np.int64) + assert frame.values.dtype == np.int64 # wrong size axis labels msg = r'Shape of passed values is \(3, 2\), indices imply \(3, 1\)' @@ -569,16 +568,16 @@ def _check_basic_constructor(self, empty): # 0-length axis frame = DataFrame(empty((0, 3))) - self.assertEqual(len(frame.index), 0) + assert len(frame.index) == 0 frame = DataFrame(empty((3, 0))) - self.assertEqual(len(frame.columns), 0) + assert len(frame.columns) == 0 def test_constructor_ndarray(self): self._check_basic_constructor(np.ones) frame = DataFrame(['foo', 'bar'], index=[0, 1], columns=['A']) - self.assertEqual(len(frame), 2) + assert len(frame) == 2 def test_constructor_maskedarray(self): self._check_basic_constructor(ma.masked_all) @@ -588,8 +587,8 @@ def test_constructor_maskedarray(self): mat[0, 0] = 1.0 mat[1, 2] = 2.0 frame = DataFrame(mat, columns=['A', 'B', 'C'], index=[1, 2]) - self.assertEqual(1.0, frame['A'][1]) - self.assertEqual(2.0, frame['C'][2]) + assert 1.0 == frame['A'][1] + assert 2.0 == frame['C'][2] # what is this even checking?? mat = ma.masked_all((2, 3), dtype=float) @@ -602,66 +601,66 @@ def test_constructor_maskedarray_nonfloat(self): # 2-D input frame = DataFrame(mat, columns=['A', 'B', 'C'], index=[1, 2]) - self.assertEqual(len(frame.index), 2) - self.assertEqual(len(frame.columns), 3) + assert len(frame.index) == 2 + assert len(frame.columns) == 3 assert np.all(~np.asarray(frame == frame)) # cast type frame = DataFrame(mat, columns=['A', 'B', 'C'], index=[1, 2], dtype=np.float64) - self.assertEqual(frame.values.dtype, np.float64) + assert frame.values.dtype == np.float64 # Check non-masked values mat2 = ma.copy(mat) mat2[0, 0] = 1 mat2[1, 2] = 2 frame = DataFrame(mat2, columns=['A', 'B', 'C'], index=[1, 2]) - self.assertEqual(1, frame['A'][1]) - self.assertEqual(2, frame['C'][2]) + assert 1 == frame['A'][1] + assert 2 == frame['C'][2] # masked np.datetime64 stays (use lib.NaT as null) mat = ma.masked_all((2, 3), dtype='M8[ns]') # 2-D input frame = DataFrame(mat, columns=['A', 'B', 'C'], index=[1, 2]) - self.assertEqual(len(frame.index), 2) - self.assertEqual(len(frame.columns), 3) + assert len(frame.index) == 2 + assert len(frame.columns) == 3 assert isnull(frame).values.all() # cast type frame = DataFrame(mat, columns=['A', 'B', 'C'], index=[1, 2], dtype=np.int64) - self.assertEqual(frame.values.dtype, np.int64) + assert frame.values.dtype == np.int64 # Check non-masked values mat2 = ma.copy(mat) mat2[0, 0] = 1 mat2[1, 2] = 2 frame = DataFrame(mat2, columns=['A', 'B', 'C'], index=[1, 2]) - self.assertEqual(1, frame['A'].view('i8')[1]) - self.assertEqual(2, frame['C'].view('i8')[2]) + assert 1 == frame['A'].view('i8')[1] + assert 2 == frame['C'].view('i8')[2] # masked bool promoted to object mat = ma.masked_all((2, 3), dtype=bool) # 2-D input frame = DataFrame(mat, columns=['A', 'B', 'C'], index=[1, 2]) - self.assertEqual(len(frame.index), 2) - self.assertEqual(len(frame.columns), 3) + assert len(frame.index) == 2 + assert len(frame.columns) == 3 assert np.all(~np.asarray(frame == frame)) # cast type frame = DataFrame(mat, columns=['A', 'B', 'C'], index=[1, 2], dtype=object) - self.assertEqual(frame.values.dtype, object) + assert frame.values.dtype == object # Check non-masked values mat2 = ma.copy(mat) mat2[0, 0] = True mat2[1, 2] = False frame = DataFrame(mat2, columns=['A', 'B', 'C'], index=[1, 2]) - self.assertEqual(True, frame['A'][1]) - self.assertEqual(False, frame['C'][2]) + assert frame['A'][1] is True + assert frame['C'][2] is False def test_constructor_mrecarray(self): # Ensure mrecarray produces frame identical to dict of masked arrays @@ -708,34 +707,34 @@ def test_constructor_mrecarray(self): def test_constructor_corner(self): df = DataFrame(index=[]) - self.assertEqual(df.values.shape, (0, 0)) + assert df.values.shape == (0, 0) # empty but with specified dtype df = DataFrame(index=lrange(10), columns=['a', 'b'], dtype=object) - self.assertEqual(df.values.dtype, np.object_) + assert df.values.dtype == np.object_ # does not error but ends up float df = DataFrame(index=lrange(10), columns=['a', 'b'], dtype=int) - self.assertEqual(df.values.dtype, np.object_) + assert df.values.dtype == np.object_ # #1783 empty dtype object df = DataFrame({}, columns=['foo', 'bar']) - self.assertEqual(df.values.dtype, np.object_) + assert df.values.dtype == np.object_ df = DataFrame({'b': 1}, index=lrange(10), columns=list('abc'), dtype=int) - self.assertEqual(df.values.dtype, np.object_) + assert df.values.dtype == np.object_ def test_constructor_scalar_inference(self): data = {'int': 1, 'bool': True, 'float': 3., 'complex': 4j, 'object': 'foo'} df = DataFrame(data, index=np.arange(10)) - self.assertEqual(df['int'].dtype, np.int64) - self.assertEqual(df['bool'].dtype, np.bool_) - self.assertEqual(df['float'].dtype, np.float64) - self.assertEqual(df['complex'].dtype, np.complex128) - self.assertEqual(df['object'].dtype, np.object_) + assert df['int'].dtype == np.int64 + assert df['bool'].dtype == np.bool_ + assert df['float'].dtype == np.float64 + assert df['complex'].dtype == np.complex128 + assert df['object'].dtype == np.object_ def test_constructor_arrays_and_scalars(self): df = DataFrame({'a': randn(10), 'b': True}) @@ -750,28 +749,28 @@ def test_constructor_DataFrame(self): tm.assert_frame_equal(df, self.frame) df_casted = DataFrame(self.frame, dtype=np.int64) - self.assertEqual(df_casted.values.dtype, np.int64) + assert df_casted.values.dtype == np.int64 def test_constructor_more(self): # used to be in test_matrix.py arr = randn(10) dm = DataFrame(arr, columns=['A'], index=np.arange(10)) - self.assertEqual(dm.values.ndim, 2) + assert dm.values.ndim == 2 arr = randn(0) dm = DataFrame(arr) - self.assertEqual(dm.values.ndim, 2) - self.assertEqual(dm.values.ndim, 2) + assert dm.values.ndim == 2 + assert dm.values.ndim == 2 # no data specified dm = DataFrame(columns=['A', 'B'], index=np.arange(10)) - self.assertEqual(dm.values.shape, (10, 2)) + assert dm.values.shape == (10, 2) dm = DataFrame(columns=['A', 'B']) - self.assertEqual(dm.values.shape, (0, 2)) + assert dm.values.shape == (0, 2) dm = DataFrame(index=np.arange(10)) - self.assertEqual(dm.values.shape, (10, 0)) + assert dm.values.shape == (10, 0) # corner, silly # TODO: Fix this Exception to be better... @@ -792,8 +791,8 @@ def test_constructor_more(self): 'B': np.ones(10, dtype=np.float64)}, index=np.arange(10)) - self.assertEqual(len(dm.columns), 2) - self.assertEqual(dm.values.dtype, np.float64) + assert len(dm.columns) == 2 + assert dm.values.dtype == np.float64 def test_constructor_empty_list(self): df = DataFrame([], index=[]) @@ -818,7 +817,7 @@ def test_constructor_list_of_lists(self): l = [[1, 'a'], [2, 'b']] df = DataFrame(data=l, columns=["num", "str"]) assert is_integer_dtype(df['num']) - self.assertEqual(df['str'].dtype, np.object_) + assert df['str'].dtype == np.object_ # GH 4851 # list of 0-dim ndarrays @@ -1075,7 +1074,7 @@ def test_constructor_orient(self): def test_constructor_Series_named(self): a = Series([1, 2, 3], index=['a', 'b', 'c'], name='x') df = DataFrame(a) - self.assertEqual(df.columns[0], 'x') + assert df.columns[0] == 'x' tm.assert_index_equal(df.index, a.index) # ndarray like @@ -1095,7 +1094,7 @@ def test_constructor_Series_named(self): # #2234 a = Series([], name='x') df = DataFrame(a) - self.assertEqual(df.columns[0], 'x') + assert df.columns[0] == 'x' # series with name and w/o s1 = Series(arr, name='x') @@ -1120,12 +1119,12 @@ def test_constructor_Series_differently_indexed(self): df1 = DataFrame(s1, index=other_index) exp1 = DataFrame(s1.reindex(other_index)) - self.assertEqual(df1.columns[0], 'x') + assert df1.columns[0] == 'x' tm.assert_frame_equal(df1, exp1) df2 = DataFrame(s2, index=other_index) exp2 = DataFrame(s2.reindex(other_index)) - self.assertEqual(df2.columns[0], 0) + assert df2.columns[0] == 0 tm.assert_index_equal(df2.index, other_index) tm.assert_frame_equal(df2, exp2) @@ -1156,7 +1155,7 @@ def test_constructor_from_items(self): columns=self.mixed_frame.columns, orient='index') tm.assert_frame_equal(recons, self.mixed_frame) - self.assertEqual(recons['A'].dtype, np.float64) + assert recons['A'].dtype == np.float64 with tm.assert_raises_regex(TypeError, "Must pass columns with " @@ -1305,7 +1304,7 @@ def test_constructor_with_datetimes(self): ind = date_range(start="2000-01-01", freq="D", periods=10) datetimes = [ts.to_pydatetime() for ts in ind] datetime_s = Series(datetimes) - self.assertEqual(datetime_s.dtype, 'M8[ns]') + assert datetime_s.dtype == 'M8[ns]' df = DataFrame({'datetime_s': datetime_s}) result = df.get_dtype_counts() expected = Series({datetime64name: 1}) @@ -1331,12 +1330,12 @@ def test_constructor_with_datetimes(self): dt = tz.localize(datetime(2012, 1, 1)) df = DataFrame({'End Date': dt}, index=[0]) - self.assertEqual(df.iat[0, 0], dt) + assert df.iat[0, 0] == dt tm.assert_series_equal(df.dtypes, Series( {'End Date': 'datetime64[ns, US/Eastern]'})) df = DataFrame([{'End Date': dt}]) - self.assertEqual(df.iat[0, 0], dt) + assert df.iat[0, 0] == dt tm.assert_series_equal(df.dtypes, Series( {'End Date': 'datetime64[ns, US/Eastern]'})) @@ -1511,7 +1510,7 @@ def f(): def test_constructor_lists_to_object_dtype(self): # from #1074 d = DataFrame({'a': [np.nan, False]}) - self.assertEqual(d['a'].dtype, np.object_) + assert d['a'].dtype == np.object_ assert not d['a'][1] def test_from_records_to_records(self): @@ -1616,7 +1615,7 @@ def test_from_records_columns_not_modified(self): df = DataFrame.from_records(tuples, columns=columns, index='a') # noqa - self.assertEqual(columns, original_columns) + assert columns == original_columns def test_from_records_decimal(self): from decimal import Decimal @@ -1624,10 +1623,10 @@ def test_from_records_decimal(self): tuples = [(Decimal('1.5'),), (Decimal('2.5'),), (None,)] df = DataFrame.from_records(tuples, columns=['a']) - self.assertEqual(df['a'].dtype, object) + assert df['a'].dtype == object df = DataFrame.from_records(tuples, columns=['a'], coerce_float=True) - self.assertEqual(df['a'].dtype, np.float64) + assert df['a'].dtype == np.float64 assert np.isnan(df['a'].values[-1]) def test_from_records_duplicates(self): @@ -1648,12 +1647,12 @@ def create_dict(order_id): documents.append({'order_id': 10, 'quantity': 5}) result = DataFrame.from_records(documents, index='order_id') - self.assertEqual(result.index.name, 'order_id') + assert result.index.name == 'order_id' # MultiIndex result = DataFrame.from_records(documents, index=['order_id', 'quantity']) - self.assertEqual(result.index.names, ('order_id', 'quantity')) + assert result.index.names == ('order_id', 'quantity') def test_from_records_misc_brokenness(self): # #2179 @@ -1702,13 +1701,13 @@ def test_from_records_empty_with_nonempty_fields_gh3682(self): a = np.array([(1, 2)], dtype=[('id', np.int64), ('value', np.int64)]) df = DataFrame.from_records(a, index='id') tm.assert_index_equal(df.index, Index([1], name='id')) - self.assertEqual(df.index.name, 'id') + assert df.index.name == 'id' tm.assert_index_equal(df.columns, Index(['value'])) b = np.array([], dtype=[('id', np.int64), ('value', np.int64)]) df = DataFrame.from_records(b, index='id') tm.assert_index_equal(df.index, Index([], name='id')) - self.assertEqual(df.index.name, 'id') + assert df.index.name == 'id' def test_from_records_with_datetimes(self): @@ -1804,13 +1803,13 @@ def test_from_records_sequencelike(self): # empty case result = DataFrame.from_records([], columns=['foo', 'bar', 'baz']) - self.assertEqual(len(result), 0) + assert len(result) == 0 tm.assert_index_equal(result.columns, pd.Index(['foo', 'bar', 'baz'])) result = DataFrame.from_records([]) - self.assertEqual(len(result), 0) - self.assertEqual(len(result.columns), 0) + assert len(result) == 0 + assert len(result.columns) == 0 def test_from_records_dictlike(self): @@ -1891,8 +1890,8 @@ def test_from_records_len0_with_columns(self): columns=['foo', 'bar']) assert np.array_equal(result.columns, ['bar']) - self.assertEqual(len(result), 0) - self.assertEqual(result.index.name, 'foo') + assert len(result) == 0 + assert result.index.name == 'foo' def test_to_frame_with_falsey_names(self): # GH 16114 diff --git a/pandas/tests/frame/test_convert_to.py b/pandas/tests/frame/test_convert_to.py index d3a675e3dc1a3..353b4b873332e 100644 --- a/pandas/tests/frame/test_convert_to.py +++ b/pandas/tests/frame/test_convert_to.py @@ -22,19 +22,19 @@ def test_to_dict(self): for k, v in compat.iteritems(test_data): for k2, v2 in compat.iteritems(v): - self.assertEqual(v2, recons_data[k][k2]) + assert v2 == recons_data[k][k2] recons_data = DataFrame(test_data).to_dict("l") for k, v in compat.iteritems(test_data): for k2, v2 in compat.iteritems(v): - self.assertEqual(v2, recons_data[k][int(k2) - 1]) + assert v2 == recons_data[k][int(k2) - 1] recons_data = DataFrame(test_data).to_dict("s") for k, v in compat.iteritems(test_data): for k2, v2 in compat.iteritems(v): - self.assertEqual(v2, recons_data[k][k2]) + assert v2 == recons_data[k][k2] recons_data = DataFrame(test_data).to_dict("sp") expected_split = {'columns': ['A', 'B'], 'index': ['1', '2', '3'], @@ -46,7 +46,7 @@ def test_to_dict(self): {'A': 2.0, 'B': '2'}, {'A': np.nan, 'B': '3'}] assert isinstance(recons_data, list) - self.assertEqual(len(recons_data), 3) + assert len(recons_data) == 3 for l, r in zip(recons_data, expected_records): tm.assert_dict_equal(l, r) @@ -55,7 +55,7 @@ def test_to_dict(self): for k, v in compat.iteritems(test_data): for k2, v2 in compat.iteritems(v): - self.assertEqual(v2, recons_data[k2][k]) + assert v2 == recons_data[k2][k] def test_to_dict_timestamp(self): @@ -72,10 +72,10 @@ def test_to_dict_timestamp(self): expected_records_mixed = [{'A': tsmp, 'B': 1}, {'A': tsmp, 'B': 2}] - self.assertEqual(test_data.to_dict(orient='records'), - expected_records) - self.assertEqual(test_data_mixed.to_dict(orient='records'), - expected_records_mixed) + assert (test_data.to_dict(orient='records') == + expected_records) + assert (test_data_mixed.to_dict(orient='records') == + expected_records_mixed) expected_series = { 'A': Series([tsmp, tsmp], name='A'), @@ -117,10 +117,10 @@ def test_to_records_dt64(self): df = DataFrame([["one", "two", "three"], ["four", "five", "six"]], index=date_range("2012-01-01", "2012-01-02")) - self.assertEqual(df.to_records()['index'][0], df.index[0]) + assert df.to_records()['index'][0] == df.index[0] rs = df.to_records(convert_datetime64=False) - self.assertEqual(rs['index'][0], df.index.values[0]) + assert rs['index'][0] == df.index.values[0] def test_to_records_with_multindex(self): # GH3189 diff --git a/pandas/tests/frame/test_dtypes.py b/pandas/tests/frame/test_dtypes.py index 427834b3dbf38..2d39db16dbd8d 100644 --- a/pandas/tests/frame/test_dtypes.py +++ b/pandas/tests/frame/test_dtypes.py @@ -28,14 +28,14 @@ def test_concat_empty_dataframe_dtypes(self): df['c'] = df['c'].astype(np.float64) result = pd.concat([df, df]) - self.assertEqual(result['a'].dtype, np.bool_) - self.assertEqual(result['b'].dtype, np.int32) - self.assertEqual(result['c'].dtype, np.float64) + assert result['a'].dtype == np.bool_ + assert result['b'].dtype == np.int32 + assert result['c'].dtype == np.float64 result = pd.concat([df, df.astype(np.float64)]) - self.assertEqual(result['a'].dtype, np.object_) - self.assertEqual(result['b'].dtype, np.float64) - self.assertEqual(result['c'].dtype, np.float64) + assert result['a'].dtype == np.object_ + assert result['b'].dtype == np.float64 + assert result['c'].dtype == np.float64 def test_empty_frame_dtypes_ftypes(self): empty_df = pd.DataFrame() @@ -326,9 +326,8 @@ def test_astype(self): # mixed casting def _check_cast(df, v): - self.assertEqual( - list(set([s.dtype.name - for _, s in compat.iteritems(df)]))[0], v) + assert (list(set([s.dtype.name for + _, s in compat.iteritems(df)]))[0] == v) mn = self.all_mixed._get_numeric_data().copy() mn['little_float'] = np.array(12345., dtype='float16') diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py index 8f6128ad4e525..cd1529d04c991 100644 --- a/pandas/tests/frame/test_indexing.py +++ b/pandas/tests/frame/test_indexing.py @@ -113,7 +113,7 @@ def test_getitem_list(self): assert_frame_equal(result, expected) assert_frame_equal(result2, expected) - self.assertEqual(result.columns.name, 'foo') + assert result.columns.name == 'foo' with tm.assert_raises_regex(KeyError, 'not in index'): self.frame[['B', 'A', 'food']] @@ -128,7 +128,7 @@ def test_getitem_list(self): result = df[[('foo', 'bar'), ('baz', 'qux')]] expected = df.iloc[:, :2] assert_frame_equal(result, expected) - self.assertEqual(result.columns.names, ['sth', 'sth2']) + assert result.columns.names == ['sth', 'sth2'] def test_getitem_callable(self): # GH 12533 @@ -282,7 +282,7 @@ def test_getitem_boolean(self): assert_frame_equal(bif, bifw, check_dtype=False) for c in df.columns: if bif[c].dtype != bifw[c].dtype: - self.assertEqual(bif[c].dtype, df[c].dtype) + assert bif[c].dtype == df[c].dtype def test_getitem_boolean_casting(self): @@ -404,8 +404,8 @@ def test_getitem_setitem_ix_negative_integers(self): with catch_warnings(record=True): assert_series_equal(a.ix[-1], a.ix[-2], check_names=False) - self.assertEqual(a.ix[-1].name, 'T') - self.assertEqual(a.ix[-2].name, 'S') + assert a.ix[-1].name == 'T' + assert a.ix[-2].name == 'S' def test_getattr(self): assert_series_equal(self.frame.A, self.frame['A']) @@ -424,8 +424,8 @@ def test_setitem(self): self.frame['col5'] = series assert 'col5' in self.frame - self.assertEqual(len(series), 15) - self.assertEqual(len(self.frame), 30) + assert len(series) == 15 + assert len(self.frame) == 30 exp = np.ravel(np.column_stack((series.values, [np.nan] * 15))) exp = Series(exp, index=self.frame.index, name='col5') @@ -459,13 +459,13 @@ def test_setitem(self): def f(): smaller['col10'] = ['1', '2'] pytest.raises(com.SettingWithCopyError, f) - self.assertEqual(smaller['col10'].dtype, np.object_) + assert smaller['col10'].dtype == np.object_ assert (smaller['col10'] == ['1', '2']).all() # with a dtype for dtype in ['int32', 'int64', 'float32', 'float64']: self.frame[dtype] = np.array(arr, dtype=dtype) - self.assertEqual(self.frame[dtype].dtype.name, dtype) + assert self.frame[dtype].dtype.name == dtype # dtype changing GH4204 df = DataFrame([[0, 0]]) @@ -542,13 +542,13 @@ def test_setitem_boolean(self): def test_setitem_cast(self): self.frame['D'] = self.frame['D'].astype('i8') - self.assertEqual(self.frame['D'].dtype, np.int64) + assert self.frame['D'].dtype == np.int64 # #669, should not cast? # this is now set to int64, which means a replacement of the column to # the value dtype (and nothing to do with the existing dtype) self.frame['B'] = 0 - self.assertEqual(self.frame['B'].dtype, np.int64) + assert self.frame['B'].dtype == np.int64 # cast if pass array of course self.frame['B'] = np.arange(len(self.frame)) @@ -556,18 +556,18 @@ def test_setitem_cast(self): self.frame['foo'] = 'bar' self.frame['foo'] = 0 - self.assertEqual(self.frame['foo'].dtype, np.int64) + assert self.frame['foo'].dtype == np.int64 self.frame['foo'] = 'bar' self.frame['foo'] = 2.5 - self.assertEqual(self.frame['foo'].dtype, np.float64) + assert self.frame['foo'].dtype == np.float64 self.frame['something'] = 0 - self.assertEqual(self.frame['something'].dtype, np.int64) + assert self.frame['something'].dtype == np.int64 self.frame['something'] = 2 - self.assertEqual(self.frame['something'].dtype, np.int64) + assert self.frame['something'].dtype == np.int64 self.frame['something'] = 2.5 - self.assertEqual(self.frame['something'].dtype, np.float64) + assert self.frame['something'].dtype == np.float64 # GH 7704 # dtype conversion on setting @@ -581,9 +581,9 @@ def test_setitem_cast(self): # Test that data type is preserved . #5782 df = DataFrame({'one': np.arange(6, dtype=np.int8)}) df.loc[1, 'one'] = 6 - self.assertEqual(df.dtypes.one, np.dtype(np.int8)) + assert df.dtypes.one == np.dtype(np.int8) df.one = np.int8(7) - self.assertEqual(df.dtypes.one, np.dtype(np.int8)) + assert df.dtypes.one == np.dtype(np.int8) def test_setitem_boolean_column(self): expected = self.frame.copy() @@ -602,7 +602,7 @@ def test_setitem_corner(self): del df['B'] df['B'] = [1., 2., 3.] assert 'B' in df - self.assertEqual(len(df.columns), 2) + assert len(df.columns) == 2 df['A'] = 'beginning' df['E'] = 'foo' @@ -614,29 +614,29 @@ def test_setitem_corner(self): dm = DataFrame(index=self.frame.index) dm['A'] = 'foo' dm['B'] = 'bar' - self.assertEqual(len(dm.columns), 2) - self.assertEqual(dm.values.dtype, np.object_) + assert len(dm.columns) == 2 + assert dm.values.dtype == np.object_ # upcast dm['C'] = 1 - self.assertEqual(dm['C'].dtype, np.int64) + assert dm['C'].dtype == np.int64 dm['E'] = 1. - self.assertEqual(dm['E'].dtype, np.float64) + assert dm['E'].dtype == np.float64 # set existing column dm['A'] = 'bar' - self.assertEqual('bar', dm['A'][0]) + assert 'bar' == dm['A'][0] dm = DataFrame(index=np.arange(3)) dm['A'] = 1 dm['foo'] = 'bar' del dm['foo'] dm['foo'] = 'bar' - self.assertEqual(dm['foo'].dtype, np.object_) + assert dm['foo'].dtype == np.object_ dm['coercable'] = ['1', '2', '3'] - self.assertEqual(dm['coercable'].dtype, np.object_) + assert dm['coercable'].dtype == np.object_ def test_setitem_corner2(self): data = {"title": ['foobar', 'bar', 'foobar'] + ['foobar'] * 17, @@ -648,8 +648,8 @@ def test_setitem_corner2(self): df.loc[ix, ['title']] = 'foobar' df.loc[ix, ['cruft']] = 0 - self.assertEqual(df.loc[1, 'title'], 'foobar') - self.assertEqual(df.loc[1, 'cruft'], 0) + assert df.loc[1, 'title'] == 'foobar' + assert df.loc[1, 'cruft'] == 0 def test_setitem_ambig(self): # Difficulties with mixed-type data @@ -731,10 +731,10 @@ def test_getitem_empty_frame_with_boolean(self): def test_delitem_corner(self): f = self.frame.copy() del f['D'] - self.assertEqual(len(f.columns), 3) + assert len(f.columns) == 3 pytest.raises(KeyError, f.__delitem__, 'D') del f['B'] - self.assertEqual(len(f.columns), 2) + assert len(f.columns) == 2 def test_getitem_fancy_2d(self): f = self.frame @@ -781,13 +781,13 @@ def test_slice_floats(self): df = DataFrame(np.random.rand(3, 2), index=index) s1 = df.loc[52195.1:52196.5] - self.assertEqual(len(s1), 2) + assert len(s1) == 2 s1 = df.loc[52195.1:52196.6] - self.assertEqual(len(s1), 2) + assert len(s1) == 2 s1 = df.loc[52195.1:52198.9] - self.assertEqual(len(s1), 3) + assert len(s1) == 3 def test_getitem_fancy_slice_integers_step(self): df = DataFrame(np.random.randn(10, 5)) @@ -930,7 +930,7 @@ def test_setitem_fancy_2d(self): def test_fancy_getitem_slice_mixed(self): sliced = self.mixed_frame.iloc[:, -3:] - self.assertEqual(sliced['D'].dtype, np.float64) + assert sliced['D'].dtype == np.float64 # get view with single block # setting it triggers setting with copy @@ -1282,7 +1282,7 @@ def test_getitem_fancy_scalar(self): for col in f.columns: ts = f[col] for idx in f.index[::5]: - self.assertEqual(ix[idx, col], ts[idx]) + assert ix[idx, col] == ts[idx] def test_setitem_fancy_scalar(self): f = self.frame @@ -1394,17 +1394,17 @@ def test_getitem_setitem_float_labels(self): result = df.loc[1.5:4] expected = df.reindex([1.5, 2, 3, 4]) assert_frame_equal(result, expected) - self.assertEqual(len(result), 4) + assert len(result) == 4 result = df.loc[4:5] expected = df.reindex([4, 5]) # reindex with int assert_frame_equal(result, expected, check_index_type=False) - self.assertEqual(len(result), 2) + assert len(result) == 2 result = df.loc[4:5] expected = df.reindex([4.0, 5.0]) # reindex with float assert_frame_equal(result, expected) - self.assertEqual(len(result), 2) + assert len(result) == 2 # loc_float changes this to work properly result = df.loc[1:2] @@ -1425,7 +1425,7 @@ def test_getitem_setitem_float_labels(self): result = df.iloc[4:5] expected = df.reindex([5.0]) assert_frame_equal(result, expected) - self.assertEqual(len(result), 1) + assert len(result) == 1 cp = df.copy() @@ -1449,22 +1449,22 @@ def f(): result = df.loc[1.0:5] expected = df assert_frame_equal(result, expected) - self.assertEqual(len(result), 5) + assert len(result) == 5 result = df.loc[1.1:5] expected = df.reindex([2.5, 3.5, 4.5, 5.0]) assert_frame_equal(result, expected) - self.assertEqual(len(result), 4) + assert len(result) == 4 result = df.loc[4.51:5] expected = df.reindex([5.0]) assert_frame_equal(result, expected) - self.assertEqual(len(result), 1) + assert len(result) == 1 result = df.loc[1.0:5.0] expected = df.reindex([1.0, 2.5, 3.5, 4.5, 5.0]) assert_frame_equal(result, expected) - self.assertEqual(len(result), 5) + assert len(result) == 5 cp = df.copy() cp.loc[1.0:5.0] = 0 @@ -1621,7 +1621,7 @@ def test_getitem_list_duplicates(self): df.columns.name = 'foo' result = df[['B', 'C']] - self.assertEqual(result.columns.name, 'foo') + assert result.columns.name == 'foo' expected = df.iloc[:, 2:] assert_frame_equal(result, expected) @@ -1631,7 +1631,7 @@ def test_get_value(self): for col in self.frame.columns: result = self.frame.get_value(idx, col) expected = self.frame[col][idx] - self.assertEqual(result, expected) + assert result == expected def test_lookup(self): def alt(df, rows, cols, dtype): @@ -1657,7 +1657,7 @@ def testit(df): df['mask'] = df.lookup(df.index, 'mask_' + df['label']) exp_mask = alt(df, df.index, 'mask_' + df['label'], dtype=np.bool_) tm.assert_series_equal(df['mask'], pd.Series(exp_mask, name='mask')) - self.assertEqual(df['mask'].dtype, np.bool_) + assert df['mask'].dtype == np.bool_ with pytest.raises(KeyError): self.frame.lookup(['xyz'], ['A']) @@ -1672,25 +1672,25 @@ def test_set_value(self): for idx in self.frame.index: for col in self.frame.columns: self.frame.set_value(idx, col, 1) - self.assertEqual(self.frame[col][idx], 1) + assert self.frame[col][idx] == 1 def test_set_value_resize(self): res = self.frame.set_value('foobar', 'B', 0) assert res is self.frame - self.assertEqual(res.index[-1], 'foobar') - self.assertEqual(res.get_value('foobar', 'B'), 0) + assert res.index[-1] == 'foobar' + assert res.get_value('foobar', 'B') == 0 self.frame.loc['foobar', 'qux'] = 0 - self.assertEqual(self.frame.get_value('foobar', 'qux'), 0) + assert self.frame.get_value('foobar', 'qux') == 0 res = self.frame.copy() res3 = res.set_value('foobar', 'baz', 'sam') - self.assertEqual(res3['baz'].dtype, np.object_) + assert res3['baz'].dtype == np.object_ res = self.frame.copy() res3 = res.set_value('foobar', 'baz', True) - self.assertEqual(res3['baz'].dtype, np.object_) + assert res3['baz'].dtype == np.object_ res = self.frame.copy() res3 = res.set_value('foobar', 'baz', 5) @@ -1705,24 +1705,24 @@ def test_set_value_with_index_dtype_change(self): # so column is not created df = df_orig.copy() df.set_value('C', 2, 1.0) - self.assertEqual(list(df.index), list(df_orig.index) + ['C']) - # self.assertEqual(list(df.columns), list(df_orig.columns) + [2]) + assert list(df.index) == list(df_orig.index) + ['C'] + # assert list(df.columns) == list(df_orig.columns) + [2] df = df_orig.copy() df.loc['C', 2] = 1.0 - self.assertEqual(list(df.index), list(df_orig.index) + ['C']) - # self.assertEqual(list(df.columns), list(df_orig.columns) + [2]) + assert list(df.index) == list(df_orig.index) + ['C'] + # assert list(df.columns) == list(df_orig.columns) + [2] # create both new df = df_orig.copy() df.set_value('C', 'D', 1.0) - self.assertEqual(list(df.index), list(df_orig.index) + ['C']) - self.assertEqual(list(df.columns), list(df_orig.columns) + ['D']) + assert list(df.index) == list(df_orig.index) + ['C'] + assert list(df.columns) == list(df_orig.columns) + ['D'] df = df_orig.copy() df.loc['C', 'D'] = 1.0 - self.assertEqual(list(df.index), list(df_orig.index) + ['C']) - self.assertEqual(list(df.columns), list(df_orig.columns) + ['D']) + assert list(df.index) == list(df_orig.index) + ['C'] + assert list(df.columns) == list(df_orig.columns) + ['D'] def test_get_set_value_no_partial_indexing(self): # partial w/ MultiIndex raise exception @@ -1874,7 +1874,7 @@ def test_iat(self): for j, col in enumerate(self.frame.columns): result = self.frame.iat[i, j] expected = self.frame.at[row, col] - self.assertEqual(result, expected) + assert result == expected def test_nested_exception(self): # Ignore the strange way of triggering the problem @@ -1941,7 +1941,7 @@ def test_reindex_frame_add_nat(self): def test_set_dataframe_column_ns_dtype(self): x = DataFrame([datetime.now(), datetime.now()]) - self.assertEqual(x[0].dtype, np.dtype('M8[ns]')) + assert x[0].dtype == np.dtype('M8[ns]') def test_non_monotonic_reindex_methods(self): dr = pd.date_range('2013-08-01', periods=6, freq='B') @@ -2095,13 +2095,13 @@ def test_setitem_with_unaligned_tz_aware_datetime_column(self): assert_series_equal(df['dates'], column) def test_setitem_datetime_coercion(self): - # GH 1048 + # gh-1048 df = pd.DataFrame({'c': [pd.Timestamp('2010-10-01')] * 3}) df.loc[0:1, 'c'] = np.datetime64('2008-08-08') - self.assertEqual(pd.Timestamp('2008-08-08'), df.loc[0, 'c']) - self.assertEqual(pd.Timestamp('2008-08-08'), df.loc[1, 'c']) + assert pd.Timestamp('2008-08-08') == df.loc[0, 'c'] + assert pd.Timestamp('2008-08-08') == df.loc[1, 'c'] df.loc[2, 'c'] = date(2005, 5, 5) - self.assertEqual(pd.Timestamp('2005-05-05'), df.loc[2, 'c']) + assert pd.Timestamp('2005-05-05') == df.loc[2, 'c'] def test_setitem_datetimelike_with_inference(self): # GH 7592 @@ -2139,14 +2139,14 @@ def test_at_time_between_time_datetimeindex(self): expected2 = df.iloc[ainds] assert_frame_equal(result, expected) assert_frame_equal(result, expected2) - self.assertEqual(len(result), 4) + assert len(result) == 4 result = df.between_time(bkey.start, bkey.stop) expected = df.loc[bkey] expected2 = df.iloc[binds] assert_frame_equal(result, expected) assert_frame_equal(result, expected2) - self.assertEqual(len(result), 12) + assert len(result) == 12 result = df.copy() result.loc[akey] = 0 @@ -2179,7 +2179,7 @@ def test_xs(self): if np.isnan(value): assert np.isnan(self.frame[item][idx]) else: - self.assertEqual(value, self.frame[item][idx]) + assert value == self.frame[item][idx] # mixed-type xs test_data = { @@ -2188,9 +2188,9 @@ def test_xs(self): } frame = DataFrame(test_data) xs = frame.xs('1') - self.assertEqual(xs.dtype, np.object_) - self.assertEqual(xs['A'], 1) - self.assertEqual(xs['B'], '1') + assert xs.dtype == np.object_ + assert xs['A'] == 1 + assert xs['B'] == '1' with pytest.raises(KeyError): self.tsframe.xs(self.tsframe.index[0] - BDay()) @@ -2266,10 +2266,10 @@ def test_index_namedtuple(self): with catch_warnings(record=True): result = df.ix[IndexType("foo", "bar")]["A"] - self.assertEqual(result, 1) + assert result == 1 result = df.loc[IndexType("foo", "bar")]["A"] - self.assertEqual(result, 1) + assert result == 1 def test_boolean_indexing(self): idx = lrange(3) @@ -2442,7 +2442,7 @@ def _check_set(df, cond, check_dtypes=True): for k, v in compat.iteritems(df.dtypes): if issubclass(v.type, np.integer) and not cond[k].all(): v = np.dtype('float64') - self.assertEqual(dfi[k].dtype, v) + assert dfi[k].dtype == v for df in [default_frame, self.mixed_frame, self.mixed_float, self.mixed_int]: @@ -3011,7 +3011,7 @@ def test_set_reset(self): # set/reset df = DataFrame({'A': [0, 1, 2]}, index=idx) result = df.reset_index() - self.assertEqual(result['foo'].dtype, np.dtype('uint64')) + assert result['foo'].dtype == np.dtype('uint64') df = result.set_index('foo') tm.assert_index_equal(df.index, idx) diff --git a/pandas/tests/frame/test_missing.py b/pandas/tests/frame/test_missing.py index 17f12679ae92e..ffba141ddc15d 100644 --- a/pandas/tests/frame/test_missing.py +++ b/pandas/tests/frame/test_missing.py @@ -493,7 +493,7 @@ def test_fillna_col_reordering(self): data = np.random.rand(20, 5) df = DataFrame(index=lrange(20), columns=cols, data=data) filled = df.fillna(method='ffill') - self.assertEqual(df.columns.tolist(), filled.columns.tolist()) + assert df.columns.tolist() == filled.columns.tolist() def test_fill_corner(self): mf = self.mixed_frame diff --git a/pandas/tests/frame/test_mutate_columns.py b/pandas/tests/frame/test_mutate_columns.py index fbd1b7be3e431..ac76970aaa901 100644 --- a/pandas/tests/frame/test_mutate_columns.py +++ b/pandas/tests/frame/test_mutate_columns.py @@ -150,7 +150,7 @@ def test_insert(self): df.columns.name = 'some_name' # preserve columns name field df.insert(0, 'baz', df['c']) - self.assertEqual(df.columns.name, 'some_name') + assert df.columns.name == 'some_name' # GH 13522 df = DataFrame(index=['A', 'B', 'C']) @@ -197,7 +197,7 @@ def test_pop(self): self.frame['foo'] = 'bar' self.frame.pop('foo') assert 'foo' not in self.frame - # TODO self.assertEqual(self.frame.columns.name, 'baz') + # TODO assert self.frame.columns.name == 'baz' # gh-10912: inplace ops cause caching issue a = DataFrame([[1, 2, 3], [4, 5, 6]], columns=[ @@ -219,12 +219,12 @@ def test_pop_non_unique_cols(self): df.columns = ["a", "b", "a"] res = df.pop("a") - self.assertEqual(type(res), DataFrame) - self.assertEqual(len(res), 2) - self.assertEqual(len(df.columns), 1) + assert type(res) == DataFrame + assert len(res) == 2 + assert len(df.columns) == 1 assert "b" in df.columns assert "a" not in df.columns - self.assertEqual(len(df.index), 2) + assert len(df.index) == 2 def test_insert_column_bug_4032(self): diff --git a/pandas/tests/frame/test_nonunique_indexes.py b/pandas/tests/frame/test_nonunique_indexes.py index 61dd92fcd1fab..4bc0176b570e3 100644 --- a/pandas/tests/frame/test_nonunique_indexes.py +++ b/pandas/tests/frame/test_nonunique_indexes.py @@ -425,8 +425,8 @@ def test_columns_with_dups(self): columns=df_float.columns) df = pd.concat([df_float, df_int, df_bool, df_object, df_dt], axis=1) - self.assertEqual(len(df._data._blknos), len(df.columns)) - self.assertEqual(len(df._data._blklocs), len(df.columns)) + assert len(df._data._blknos) == len(df.columns) + assert len(df._data._blklocs) == len(df.columns) # testing iloc for i in range(len(df.columns)): diff --git a/pandas/tests/frame/test_operators.py b/pandas/tests/frame/test_operators.py index efe167297627a..9083b7952909e 100644 --- a/pandas/tests/frame/test_operators.py +++ b/pandas/tests/frame/test_operators.py @@ -41,7 +41,7 @@ def test_operators(self): for idx, val in compat.iteritems(series): origVal = self.frame[col][idx] * 2 if not np.isnan(val): - self.assertEqual(val, origVal) + assert val == origVal else: assert np.isnan(origVal) @@ -49,7 +49,7 @@ def test_operators(self): for idx, val in compat.iteritems(series): origVal = self.frame[col][idx] + colSeries[col] if not np.isnan(val): - self.assertEqual(val, origVal) + assert val == origVal else: assert np.isnan(origVal) @@ -278,14 +278,14 @@ def _check_bin_op(op): result = op(df1, df2) expected = DataFrame(op(df1.values, df2.values), index=df1.index, columns=df1.columns) - self.assertEqual(result.values.dtype, np.bool_) + assert result.values.dtype == np.bool_ assert_frame_equal(result, expected) def _check_unary_op(op): result = op(df1) expected = DataFrame(op(df1.values), index=df1.index, columns=df1.columns) - self.assertEqual(result.values.dtype, np.bool_) + assert result.values.dtype == np.bool_ assert_frame_equal(result, expected) df1 = {'a': {'a': True, 'b': False, 'c': False, 'd': True, 'e': True}, @@ -861,9 +861,9 @@ def test_combineSeries(self): for key, col in compat.iteritems(self.tsframe): result = col + ts assert_series_equal(added[key], result, check_names=False) - self.assertEqual(added[key].name, key) + assert added[key].name == key if col.name == ts.name: - self.assertEqual(result.name, 'A') + assert result.name == 'A' else: assert result.name is None @@ -891,7 +891,7 @@ def test_combineSeries(self): # empty but with non-empty index frame = self.tsframe[:1].reindex(columns=[]) result = frame.mul(ts, axis='index') - self.assertEqual(len(result), len(ts)) + assert len(result) == len(ts) def test_combineFunc(self): result = self.frame * 2 @@ -906,7 +906,7 @@ def test_combineFunc(self): result = self.empty * 2 assert result.index is self.empty.index - self.assertEqual(len(result.columns), 0) + assert len(result.columns) == 0 def test_comparisons(self): df1 = tm.makeTimeDataFrame() diff --git a/pandas/tests/frame/test_period.py b/pandas/tests/frame/test_period.py index 0ca37de6bf2d4..826ece2ed2c9b 100644 --- a/pandas/tests/frame/test_period.py +++ b/pandas/tests/frame/test_period.py @@ -37,8 +37,8 @@ def test_frame_setitem(self): df['Index'] = rng rs = Index(df['Index']) tm.assert_index_equal(rs, rng, check_names=False) - self.assertEqual(rs.name, 'Index') - self.assertEqual(rng.name, 'index') + assert rs.name == 'Index' + assert rng.name == 'index' rs = df.reset_index().set_index('index') assert isinstance(rs.index, PeriodIndex) @@ -117,8 +117,8 @@ def _get_with_delta(delta, freq='A-DEC'): tm.assert_numpy_array_equal(result1.columns.asi8, expected.asi8) tm.assert_numpy_array_equal(result2.columns.asi8, expected.asi8) # PeriodIndex.to_timestamp always use 'infer' - self.assertEqual(result1.columns.freqstr, 'AS-JAN') - self.assertEqual(result2.columns.freqstr, 'AS-JAN') + assert result1.columns.freqstr == 'AS-JAN' + assert result2.columns.freqstr == 'AS-JAN' def test_frame_index_to_string(self): index = PeriodIndex(['2011-1', '2011-2', '2011-3'], freq='M') diff --git a/pandas/tests/frame/test_quantile.py b/pandas/tests/frame/test_quantile.py index 406f8107952ef..33f72cde1b9a3 100644 --- a/pandas/tests/frame/test_quantile.py +++ b/pandas/tests/frame/test_quantile.py @@ -23,12 +23,12 @@ def test_quantile(self): from numpy import percentile q = self.tsframe.quantile(0.1, axis=0) - self.assertEqual(q['A'], percentile(self.tsframe['A'], 10)) + assert q['A'] == percentile(self.tsframe['A'], 10) tm.assert_index_equal(q.index, self.tsframe.columns) q = self.tsframe.quantile(0.9, axis=1) - self.assertEqual(q['2000-01-17'], - percentile(self.tsframe.loc['2000-01-17'], 90)) + assert (q['2000-01-17'] == + percentile(self.tsframe.loc['2000-01-17'], 90)) tm.assert_index_equal(q.index, self.tsframe.index) # test degenerate case @@ -102,7 +102,7 @@ def test_quantile_axis_parameter(self): pytest.raises(ValueError, df.quantile, 0.1, axis="column") def test_quantile_interpolation(self): - # GH #10174 + # see gh-10174 if _np_version_under1p9: pytest.skip("Numpy version under 1.9") @@ -110,32 +110,32 @@ def test_quantile_interpolation(self): # interpolation = linear (default case) q = self.tsframe.quantile(0.1, axis=0, interpolation='linear') - self.assertEqual(q['A'], percentile(self.tsframe['A'], 10)) + assert q['A'] == percentile(self.tsframe['A'], 10) q = self.intframe.quantile(0.1) - self.assertEqual(q['A'], percentile(self.intframe['A'], 10)) + assert q['A'] == percentile(self.intframe['A'], 10) # test with and without interpolation keyword q1 = self.intframe.quantile(0.1) - self.assertEqual(q1['A'], np.percentile(self.intframe['A'], 10)) - assert_series_equal(q, q1) + assert q1['A'] == np.percentile(self.intframe['A'], 10) + tm.assert_series_equal(q, q1) # interpolation method other than default linear df = DataFrame({"A": [1, 2, 3], "B": [2, 3, 4]}, index=[1, 2, 3]) result = df.quantile(.5, axis=1, interpolation='nearest') expected = Series([1, 2, 3], index=[1, 2, 3], name=0.5) - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) # cross-check interpolation=nearest results in original dtype exp = np.percentile(np.array([[1, 2, 3], [2, 3, 4]]), .5, axis=0, interpolation='nearest') expected = Series(exp, index=[1, 2, 3], name=0.5, dtype='int64') - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) # float df = DataFrame({"A": [1., 2., 3.], "B": [2., 3., 4.]}, index=[1, 2, 3]) result = df.quantile(.5, axis=1, interpolation='nearest') expected = Series([1., 2., 3.], index=[1, 2, 3], name=0.5) - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) exp = np.percentile(np.array([[1., 2., 3.], [2., 3., 4.]]), .5, axis=0, interpolation='nearest') expected = Series(exp, index=[1, 2, 3], name=0.5, dtype='float64') @@ -167,7 +167,7 @@ def test_quantile_interpolation(self): assert_frame_equal(result, expected) def test_quantile_interpolation_np_lt_1p9(self): - # GH #10174 + # see gh-10174 if not _np_version_under1p9: pytest.skip("Numpy version is greater than 1.9") @@ -175,33 +175,33 @@ def test_quantile_interpolation_np_lt_1p9(self): # interpolation = linear (default case) q = self.tsframe.quantile(0.1, axis=0, interpolation='linear') - self.assertEqual(q['A'], percentile(self.tsframe['A'], 10)) + assert q['A'] == percentile(self.tsframe['A'], 10) q = self.intframe.quantile(0.1) - self.assertEqual(q['A'], percentile(self.intframe['A'], 10)) + assert q['A'] == percentile(self.intframe['A'], 10) # test with and without interpolation keyword q1 = self.intframe.quantile(0.1) - self.assertEqual(q1['A'], np.percentile(self.intframe['A'], 10)) + assert q1['A'] == np.percentile(self.intframe['A'], 10) assert_series_equal(q, q1) # interpolation method other than default linear - expErrMsg = "Interpolation methods other than linear" + msg = "Interpolation methods other than linear" df = DataFrame({"A": [1, 2, 3], "B": [2, 3, 4]}, index=[1, 2, 3]) - with tm.assert_raises_regex(ValueError, expErrMsg): + with tm.assert_raises_regex(ValueError, msg): df.quantile(.5, axis=1, interpolation='nearest') - with tm.assert_raises_regex(ValueError, expErrMsg): + with tm.assert_raises_regex(ValueError, msg): df.quantile([.5, .75], axis=1, interpolation='lower') # test degenerate case df = DataFrame({'x': [], 'y': []}) - with tm.assert_raises_regex(ValueError, expErrMsg): + with tm.assert_raises_regex(ValueError, msg): q = df.quantile(0.1, axis=0, interpolation='higher') # multi df = DataFrame([[1, 1, 1], [2, 2, 2], [3, 3, 3]], columns=['a', 'b', 'c']) - with tm.assert_raises_regex(ValueError, expErrMsg): + with tm.assert_raises_regex(ValueError, msg): df.quantile([.25, .5], interpolation='midpoint') def test_quantile_multi(self): diff --git a/pandas/tests/frame/test_query_eval.py b/pandas/tests/frame/test_query_eval.py index 575906fb5c8b2..80db2c50c3eb6 100644 --- a/pandas/tests/frame/test_query_eval.py +++ b/pandas/tests/frame/test_query_eval.py @@ -808,7 +808,7 @@ def test_nested_scope(self): # smoke test x = 1 # noqa result = pd.eval('x + 1', engine=engine, parser=parser) - self.assertEqual(result, 2) + assert result == 2 df = DataFrame(np.random.randn(5, 3)) df2 = DataFrame(np.random.randn(5, 3)) diff --git a/pandas/tests/frame/test_replace.py b/pandas/tests/frame/test_replace.py index 87075e6d6e631..3f160012cb446 100644 --- a/pandas/tests/frame/test_replace.py +++ b/pandas/tests/frame/test_replace.py @@ -548,7 +548,7 @@ def test_regex_replace_numeric_to_object_conversion(self): expec = DataFrame({'a': ['a', 1, 2, 3], 'b': mix['b'], 'c': mix['c']}) res = df.replace(0, 'a') assert_frame_equal(res, expec) - self.assertEqual(res.a.dtype, np.object_) + assert res.a.dtype == np.object_ def test_replace_regex_metachar(self): metachars = '[]', '()', r'\d', r'\w', r'\s' diff --git a/pandas/tests/frame/test_repr_info.py b/pandas/tests/frame/test_repr_info.py index dbdbebddcc0b5..74301b918bd02 100644 --- a/pandas/tests/frame/test_repr_info.py +++ b/pandas/tests/frame/test_repr_info.py @@ -132,11 +132,11 @@ def test_repr_unicode(self): result = repr(df) ex_top = ' A' - self.assertEqual(result.split('\n')[0].rstrip(), ex_top) + assert result.split('\n')[0].rstrip() == ex_top df = DataFrame({'A': [uval, uval]}) result = repr(df) - self.assertEqual(result.split('\n')[0].rstrip(), ex_top) + assert result.split('\n')[0].rstrip() == ex_top def test_unicode_string_with_unicode(self): df = DataFrame({'A': [u("\u05d0")]}) @@ -186,7 +186,7 @@ def test_latex_repr(self): with option_context("display.latex.escape", False, 'display.latex.repr', True): df = DataFrame([[r'$\alpha$', 'b', 'c'], [1, 2, 3]]) - self.assertEqual(result, df._repr_latex_()) + assert result == df._repr_latex_() # GH 12182 assert df._repr_latex_() is None @@ -217,7 +217,7 @@ def test_info_wide(self): set_option('display.max_info_columns', 101) io = StringIO() df.info(buf=io) - self.assertEqual(rs, xp) + assert rs == xp reset_option('display.max_info_columns') def test_info_duplicate_columns(self): @@ -237,8 +237,8 @@ def test_info_duplicate_columns_shows_correct_dtypes(self): frame.info(buf=io) io.seek(0) lines = io.readlines() - self.assertEqual('a 1 non-null int64\n', lines[3]) - self.assertEqual('a 1 non-null float64\n', lines[4]) + assert 'a 1 non-null int64\n' == lines[3] + assert 'a 1 non-null float64\n' == lines[4] def test_info_shows_column_dtypes(self): dtypes = ['int64', 'float64', 'datetime64[ns]', 'timedelta64[ns]', @@ -263,7 +263,7 @@ def test_info_max_cols(self): buf = StringIO() df.info(buf=buf, verbose=verbose) res = buf.getvalue() - self.assertEqual(len(res.strip().split('\n')), len_) + assert len(res.strip().split('\n')) == len_ for len_, verbose in [(10, None), (5, False), (10, True)]: @@ -272,7 +272,7 @@ def test_info_max_cols(self): buf = StringIO() df.info(buf=buf, verbose=verbose) res = buf.getvalue() - self.assertEqual(len(res.strip().split('\n')), len_) + assert len(res.strip().split('\n')) == len_ for len_, max_cols in [(10, 5), (5, 4)]: # setting truncates @@ -280,14 +280,14 @@ def test_info_max_cols(self): buf = StringIO() df.info(buf=buf, max_cols=max_cols) res = buf.getvalue() - self.assertEqual(len(res.strip().split('\n')), len_) + assert len(res.strip().split('\n')) == len_ # setting wouldn't truncate with option_context('max_info_columns', 5): buf = StringIO() df.info(buf=buf, max_cols=max_cols) res = buf.getvalue() - self.assertEqual(len(res.strip().split('\n')), len_) + assert len(res.strip().split('\n')) == len_ def test_info_memory_usage(self): # Ensure memory usage is displayed, when asserted, on the last line @@ -352,15 +352,14 @@ def test_info_memory_usage(self): # (cols * rows * bytes) + index size df_size = df.memory_usage().sum() exp_size = len(dtypes) * n * 8 + df.index.nbytes - self.assertEqual(df_size, exp_size) + assert df_size == exp_size # Ensure number of cols in memory_usage is the same as df size_df = np.size(df.columns.values) + 1 # index=True; default - self.assertEqual(size_df, np.size(df.memory_usage())) + assert size_df == np.size(df.memory_usage()) # assert deep works only on object - self.assertEqual(df.memory_usage().sum(), - df.memory_usage(deep=True).sum()) + assert df.memory_usage().sum() == df.memory_usage(deep=True).sum() # test for validity DataFrame(1, index=['a'], columns=['A'] @@ -428,7 +427,7 @@ def memory_usage(f): df = DataFrame({'value': np.random.randn(N * M)}, index=index) unstacked = df.unstack('id') - self.assertEqual(df.values.nbytes, unstacked.values.nbytes) + assert df.values.nbytes == unstacked.values.nbytes assert memory_usage(df) > memory_usage(unstacked) # high upper bound diff --git a/pandas/tests/frame/test_reshape.py b/pandas/tests/frame/test_reshape.py index 9c48233ff29cd..79ee76ee362c3 100644 --- a/pandas/tests/frame/test_reshape.py +++ b/pandas/tests/frame/test_reshape.py @@ -41,25 +41,25 @@ def test_pivot(self): 'One': {'A': 1., 'B': 2., 'C': 3.}, 'Two': {'A': 1., 'B': 2., 'C': 3.} }) - expected.index.name, expected.columns.name = 'index', 'columns' - assert_frame_equal(pivoted, expected) + expected.index.name, expected.columns.name = 'index', 'columns' + tm.assert_frame_equal(pivoted, expected) # name tracking - self.assertEqual(pivoted.index.name, 'index') - self.assertEqual(pivoted.columns.name, 'columns') + assert pivoted.index.name == 'index' + assert pivoted.columns.name == 'columns' # don't specify values pivoted = frame.pivot(index='index', columns='columns') - self.assertEqual(pivoted.index.name, 'index') - self.assertEqual(pivoted.columns.names, (None, 'columns')) + assert pivoted.index.name == 'index' + assert pivoted.columns.names == (None, 'columns') with catch_warnings(record=True): # pivot multiple columns wp = tm.makePanel() lp = wp.to_frame() df = lp.reset_index() - assert_frame_equal(df.pivot('major', 'minor'), lp.unstack()) + tm.assert_frame_equal(df.pivot('major', 'minor'), lp.unstack()) def test_pivot_duplicates(self): data = DataFrame({'a': ['bar', 'bar', 'foo', 'foo', 'foo'], @@ -72,7 +72,7 @@ def test_pivot_empty(self): df = DataFrame({}, columns=['a', 'b', 'c']) result = df.pivot('a', 'b', 'c') expected = DataFrame({}) - assert_frame_equal(result, expected, check_names=False) + tm.assert_frame_equal(result, expected, check_names=False) def test_pivot_integer_bug(self): df = DataFrame(data=[("A", "1", "A1"), ("B", "2", "B2")]) @@ -106,21 +106,14 @@ def test_pivot_index_none(self): ('values', 'Two')], names=[None, 'columns']) expected.index.name = 'index' - assert_frame_equal(result, expected, check_names=False) - self.assertEqual(result.index.name, 'index',) - self.assertEqual(result.columns.names, (None, 'columns')) + tm.assert_frame_equal(result, expected, check_names=False) + assert result.index.name == 'index' + assert result.columns.names == (None, 'columns') expected.columns = expected.columns.droplevel(0) - - data = { - 'index': range(7), - 'columns': ['One', 'One', 'One', 'Two', 'Two', 'Two'], - 'values': [1., 2., 3., 3., 2., 1.] - } - result = frame.pivot(columns='columns', values='values') expected.columns.name = 'columns' - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_stack_unstack(self): f = self.frame.copy() @@ -516,8 +509,8 @@ def test_unstack_dtypes(self): right = right.set_index(['A', 'B']).unstack(0) right[('D', 'a')] = right[('D', 'a')].astype('int64') - self.assertEqual(left.shape, (3, 2)) - assert_frame_equal(left, right) + assert left.shape == (3, 2) + tm.assert_frame_equal(left, right) def test_unstack_non_unique_index_names(self): idx = MultiIndex.from_tuples([('a', 'b'), ('c', 'd')], @@ -540,7 +533,7 @@ def verify(df): left = sorted(df.iloc[i, j].split('.')) right = mk_list(df.index[i]) + mk_list(df.columns[j]) right = sorted(list(map(cast, right))) - self.assertEqual(left, right) + assert left == right df = DataFrame({'jim': ['a', 'b', nan, 'd'], 'joe': ['w', 'x', 'y', 'z'], @@ -554,7 +547,7 @@ def verify(df): mi = df.set_index(list(idx)) for lev in range(2): udf = mi.unstack(level=lev) - self.assertEqual(udf.notnull().values.sum(), len(df)) + assert udf.notnull().values.sum() == len(df) verify(udf['jolie']) df = DataFrame({'1st': ['d'] * 3 + [nan] * 5 + ['a'] * 2 + @@ -572,7 +565,7 @@ def verify(df): mi = df.set_index(list(idx)) for lev in range(3): udf = mi.unstack(level=lev) - self.assertEqual(udf.notnull().values.sum(), 2 * len(df)) + assert udf.notnull().values.sum() == 2 * len(df) for col in ['4th', '5th']: verify(udf[col]) @@ -677,12 +670,12 @@ def verify(df): df.loc[1, '3rd'] = df.loc[4, '3rd'] = nan left = df.set_index(['1st', '2nd', '3rd']).unstack(['2nd', '3rd']) - self.assertEqual(left.notnull().values.sum(), 2 * len(df)) + assert left.notnull().values.sum() == 2 * len(df) for col in ['jim', 'joe']: for _, r in df.iterrows(): key = r['1st'], (col, r['2nd'], r['3rd']) - self.assertEqual(r[col], left.loc[key]) + assert r[col] == left.loc[key] def test_stack_datetime_column_multiIndex(self): # GH 8039 diff --git a/pandas/tests/frame/test_subclass.py b/pandas/tests/frame/test_subclass.py index ade696885c2e0..40a8ece852623 100644 --- a/pandas/tests/frame/test_subclass.py +++ b/pandas/tests/frame/test_subclass.py @@ -55,12 +55,12 @@ def custom_frame_function(self): # Do we get back our own Series class after selecting a column? cdf_series = cdf.col1 assert isinstance(cdf_series, CustomSeries) - self.assertEqual(cdf_series.custom_series_function(), 'OK') + assert cdf_series.custom_series_function() == 'OK' # Do we get back our own DF class after slicing row-wise? cdf_rows = cdf[1:5] assert isinstance(cdf_rows, CustomDataFrame) - self.assertEqual(cdf_rows.custom_frame_function(), 'OK') + assert cdf_rows.custom_frame_function() == 'OK' # Make sure sliced part of multi-index frame is custom class mcol = pd.MultiIndex.from_tuples([('A', 'A'), ('A', 'B')]) @@ -76,19 +76,19 @@ def test_dataframe_metadata(self): index=['a', 'b', 'c']) df.testattr = 'XXX' - self.assertEqual(df.testattr, 'XXX') - self.assertEqual(df[['X']].testattr, 'XXX') - self.assertEqual(df.loc[['a', 'b'], :].testattr, 'XXX') - self.assertEqual(df.iloc[[0, 1], :].testattr, 'XXX') + assert df.testattr == 'XXX' + assert df[['X']].testattr == 'XXX' + assert df.loc[['a', 'b'], :].testattr == 'XXX' + assert df.iloc[[0, 1], :].testattr == 'XXX' - # GH9776 - self.assertEqual(df.iloc[0:1, :].testattr, 'XXX') + # see gh-9776 + assert df.iloc[0:1, :].testattr == 'XXX' - # GH10553 + # see gh-10553 unpickled = tm.round_trip_pickle(df) tm.assert_frame_equal(df, unpickled) - self.assertEqual(df._metadata, unpickled._metadata) - self.assertEqual(df.testattr, unpickled.testattr) + assert df._metadata == unpickled._metadata + assert df.testattr == unpickled.testattr def test_indexing_sliced(self): # GH 11559 diff --git a/pandas/tests/frame/test_timeseries.py b/pandas/tests/frame/test_timeseries.py index 910f04f0d63c6..f52f4697b1b08 100644 --- a/pandas/tests/frame/test_timeseries.py +++ b/pandas/tests/frame/test_timeseries.py @@ -38,7 +38,7 @@ def test_diff(self): s = Series([a, b]) rs = DataFrame({'s': s}).diff() - self.assertEqual(rs.s[1], 1) + assert rs.s[1] == 1 # mixed numeric tf = self.tsframe.astype('float32') @@ -71,7 +71,7 @@ def test_diff_mixed_dtype(self): df['A'] = np.array([1, 2, 3, 4, 5], dtype=object) result = df.diff() - self.assertEqual(result[0].dtype, np.float64) + assert result[0].dtype == np.float64 def test_diff_neg_n(self): rs = self.tsframe.diff(-1) @@ -153,7 +153,7 @@ def test_frame_add_datetime64_col_other_units(self): ex_vals = to_datetime(vals.astype('O')).values - self.assertEqual(df[unit].dtype, ns_dtype) + assert df[unit].dtype == ns_dtype assert (df[unit].values == ex_vals).all() # Test insertion into existing datetime64 column @@ -191,7 +191,7 @@ def test_shift(self): # shift by DateOffset shiftedFrame = self.tsframe.shift(5, freq=offsets.BDay()) - self.assertEqual(len(shiftedFrame), len(self.tsframe)) + assert len(shiftedFrame) == len(self.tsframe) shiftedFrame2 = self.tsframe.shift(5, freq='B') assert_frame_equal(shiftedFrame, shiftedFrame2) @@ -408,10 +408,10 @@ def test_first_last_valid(self): frame = DataFrame({'foo': mat}, index=self.frame.index) index = frame.first_valid_index() - self.assertEqual(index, frame.index[5]) + assert index == frame.index[5] index = frame.last_valid_index() - self.assertEqual(index, frame.index[-6]) + assert index == frame.index[-6] # GH12800 empty = DataFrame() @@ -446,7 +446,7 @@ def test_at_time_frame(self): rng = date_range('1/1/2012', freq='23Min', periods=384) ts = DataFrame(np.random.randn(len(rng), 2), rng) rs = ts.at_time('16:00') - self.assertEqual(len(rs), 0) + assert len(rs) == 0 def test_between_time_frame(self): rng = date_range('1/1/2000', '1/5/2000', freq='5min') @@ -463,7 +463,7 @@ def test_between_time_frame(self): if not inc_end: exp_len -= 4 - self.assertEqual(len(filtered), exp_len) + assert len(filtered) == exp_len for rs in filtered.index: t = rs.time() if inc_start: @@ -495,7 +495,7 @@ def test_between_time_frame(self): if not inc_end: exp_len -= 4 - self.assertEqual(len(filtered), exp_len) + assert len(filtered) == exp_len for rs in filtered.index: t = rs.time() if inc_start: diff --git a/pandas/tests/frame/test_to_csv.py b/pandas/tests/frame/test_to_csv.py index 11c10f1982558..3e38f2a71d99d 100644 --- a/pandas/tests/frame/test_to_csv.py +++ b/pandas/tests/frame/test_to_csv.py @@ -433,13 +433,13 @@ def test_to_csv_no_index(self): assert_frame_equal(df, result) def test_to_csv_with_mix_columns(self): - # GH11637, incorrect output when a mix of integer and string column + # gh-11637: incorrect output when a mix of integer and string column # names passed as columns parameter in to_csv df = DataFrame({0: ['a', 'b', 'c'], 1: ['aa', 'bb', 'cc']}) df['test'] = 'txt' - self.assertEqual(df.to_csv(), df.to_csv(columns=[0, 1, 'test'])) + assert df.to_csv() == df.to_csv(columns=[0, 1, 'test']) def test_to_csv_headers(self): # GH6186, the presence or absence of `index` incorrectly @@ -475,7 +475,7 @@ def test_to_csv_multiindex(self): # TODO to_csv drops column name assert_frame_equal(frame, df, check_names=False) - self.assertEqual(frame.index.names, df.index.names) + assert frame.index.names == df.index.names # needed if setUP becomes a classmethod self.frame.index = old_index @@ -494,7 +494,7 @@ def test_to_csv_multiindex(self): # do not load index tsframe.to_csv(path) recons = DataFrame.from_csv(path, index_col=None) - self.assertEqual(len(recons.columns), len(tsframe.columns) + 2) + assert len(recons.columns) == len(tsframe.columns) + 2 # no index tsframe.to_csv(path, index=False) @@ -604,7 +604,7 @@ def _make_frame(names=None): exp.index = [] tm.assert_index_equal(recons.columns, exp.columns) - self.assertEqual(len(recons), 0) + assert len(recons) == 0 def test_to_csv_float32_nanrep(self): df = DataFrame(np.random.randn(1, 4).astype(np.float32)) @@ -615,7 +615,7 @@ def test_to_csv_float32_nanrep(self): with open(path) as f: lines = f.readlines() - self.assertEqual(lines[1].split(',')[2], '999') + assert lines[1].split(',')[2] == '999' def test_to_csv_withcommas(self): @@ -813,7 +813,7 @@ def test_to_csv_unicodewriter_quoting(self): '2,"bar"\n' '3,"baz"\n') - self.assertEqual(result, expected) + assert result == expected def test_to_csv_quote_none(self): # GH4328 @@ -824,7 +824,7 @@ def test_to_csv_quote_none(self): encoding=encoding, index=False) result = buf.getvalue() expected = 'A\nhello\n{"hello"}\n' - self.assertEqual(result, expected) + assert result == expected def test_to_csv_index_no_leading_comma(self): df = DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}, @@ -836,7 +836,7 @@ def test_to_csv_index_no_leading_comma(self): 'one,1,4\n' 'two,2,5\n' 'three,3,6\n') - self.assertEqual(buf.getvalue(), expected) + assert buf.getvalue() == expected def test_to_csv_line_terminators(self): df = DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}, @@ -848,7 +848,7 @@ def test_to_csv_line_terminators(self): 'one,1,4\r\n' 'two,2,5\r\n' 'three,3,6\r\n') - self.assertEqual(buf.getvalue(), expected) + assert buf.getvalue() == expected buf = StringIO() df.to_csv(buf) # The default line terminator remains \n @@ -856,7 +856,7 @@ def test_to_csv_line_terminators(self): 'one,1,4\n' 'two,2,5\n' 'three,3,6\n') - self.assertEqual(buf.getvalue(), expected) + assert buf.getvalue() == expected def test_to_csv_from_csv_categorical(self): @@ -868,7 +868,7 @@ def test_to_csv_from_csv_categorical(self): s.to_csv(res) exp = StringIO() s2.to_csv(exp) - self.assertEqual(res.getvalue(), exp.getvalue()) + assert res.getvalue() == exp.getvalue() df = DataFrame({"s": s}) df2 = DataFrame({"s": s2}) @@ -876,7 +876,7 @@ def test_to_csv_from_csv_categorical(self): df.to_csv(res) exp = StringIO() df2.to_csv(exp) - self.assertEqual(res.getvalue(), exp.getvalue()) + assert res.getvalue() == exp.getvalue() def test_to_csv_path_is_none(self): # GH 8215 @@ -1078,13 +1078,13 @@ def test_to_csv_quoting(self): 1,False,3.2,,"b,c" """ result = df.to_csv() - self.assertEqual(result, expected) + assert result == expected result = df.to_csv(quoting=None) - self.assertEqual(result, expected) + assert result == expected result = df.to_csv(quoting=csv.QUOTE_MINIMAL) - self.assertEqual(result, expected) + assert result == expected expected = """\ "","c_bool","c_float","c_int","c_string" @@ -1092,7 +1092,7 @@ def test_to_csv_quoting(self): "1","False","3.2","","b,c" """ result = df.to_csv(quoting=csv.QUOTE_ALL) - self.assertEqual(result, expected) + assert result == expected # see gh-12922, gh-13259: make sure changes to # the formatters do not break this behaviour @@ -1102,7 +1102,7 @@ def test_to_csv_quoting(self): 1,False,3.2,"","b,c" """ result = df.to_csv(quoting=csv.QUOTE_NONNUMERIC) - self.assertEqual(result, expected) + assert result == expected msg = "need to escape, but no escapechar set" tm.assert_raises_regex(csv.Error, msg, df.to_csv, @@ -1118,7 +1118,7 @@ def test_to_csv_quoting(self): """ result = df.to_csv(quoting=csv.QUOTE_NONE, escapechar='!') - self.assertEqual(result, expected) + assert result == expected expected = """\ ,c_bool,c_ffloat,c_int,c_string @@ -1127,7 +1127,7 @@ def test_to_csv_quoting(self): """ result = df.to_csv(quoting=csv.QUOTE_NONE, escapechar='f') - self.assertEqual(result, expected) + assert result == expected # see gh-3503: quoting Windows line terminators # presents with encoding? @@ -1135,14 +1135,14 @@ def test_to_csv_quoting(self): df = pd.read_csv(StringIO(text)) buf = StringIO() df.to_csv(buf, encoding='utf-8', index=False) - self.assertEqual(buf.getvalue(), text) + assert buf.getvalue() == text # xref gh-7791: make sure the quoting parameter is passed through # with multi-indexes df = pd.DataFrame({'a': [1, 2], 'b': [3, 4], 'c': [5, 6]}) df = df.set_index(['a', 'b']) expected = '"a","b","c"\n"1","3","5"\n"2","4","6"\n' - self.assertEqual(df.to_csv(quoting=csv.QUOTE_ALL), expected) + assert df.to_csv(quoting=csv.QUOTE_ALL) == expected def test_period_index_date_overflow(self): # see gh-15982 diff --git a/pandas/tests/groupby/test_aggregate.py b/pandas/tests/groupby/test_aggregate.py index e3f166d2294e2..310a5aca77b77 100644 --- a/pandas/tests/groupby/test_aggregate.py +++ b/pandas/tests/groupby/test_aggregate.py @@ -197,7 +197,7 @@ def test_agg_ser_multi_key(self): def test_agg_apply_corner(self): # nothing to group, all NA grouped = self.ts.groupby(self.ts * np.nan) - self.assertEqual(self.ts.dtype, np.float64) + assert self.ts.dtype == np.float64 # groupby float64 values results in Float64Index exp = Series([], dtype=np.float64, index=pd.Index( @@ -445,7 +445,7 @@ def test_aggregate_item_by_item(self): # def aggfun(ser): # return len(ser + 'a') # result = grouped.agg(aggfun) - # self.assertEqual(len(result.columns), 1) + # assert len(result.columns) == 1 aggfun = lambda ser: ser.size result = grouped.agg(aggfun) @@ -468,7 +468,7 @@ def aggfun(ser): result = DataFrame().groupby(self.df.A).agg(aggfun) assert isinstance(result, DataFrame) - self.assertEqual(len(result), 0) + assert len(result) == 0 def test_agg_item_by_item_raise_typeerror(self): from numpy.random import randint diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py index b9a731f2204da..9d2134927389d 100644 --- a/pandas/tests/groupby/test_categorical.py +++ b/pandas/tests/groupby/test_categorical.py @@ -48,7 +48,7 @@ def get_stats(group): 'mean': group.mean()} result = self.df.groupby(cats).D.apply(get_stats) - self.assertEqual(result.index.names[0], 'C') + assert result.index.names[0] == 'C' def test_apply_categorical_data(self): # GH 10138 diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 278682ccb8d45..09643e918af31 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -41,10 +41,10 @@ def checkit(dtype): grouped = data.groupby(lambda x: x // 3) for k, v in grouped: - self.assertEqual(len(v), 3) + assert len(v) == 3 agged = grouped.aggregate(np.mean) - self.assertEqual(agged[1], 1) + assert agged[1] == 1 assert_series_equal(agged, grouped.agg(np.mean)) # shorthand assert_series_equal(agged, grouped.mean()) @@ -52,7 +52,7 @@ def checkit(dtype): expected = grouped.apply(lambda x: x * x.sum()) transformed = grouped.transform(lambda x: x * x.sum()) - self.assertEqual(transformed[7], 12) + assert transformed[7] == 12 assert_series_equal(transformed, expected) value_grouped = data.groupby(data) @@ -68,7 +68,7 @@ def checkit(dtype): group_constants = {0: 10, 1: 20, 2: 30} agged = grouped.agg(lambda x: group_constants[x.name] + x.mean()) - self.assertEqual(agged[1], 21) + assert agged[1] == 21 # corner cases pytest.raises(Exception, grouped.aggregate, lambda x: x * 2) @@ -423,10 +423,10 @@ def test_grouper_getting_correct_binner(self): assert_frame_equal(result, expected) def test_grouper_iter(self): - self.assertEqual(sorted(self.df.groupby('A').grouper), ['bar', 'foo']) + assert sorted(self.df.groupby('A').grouper) == ['bar', 'foo'] def test_empty_groups(self): - # GH # 1048 + # see gh-1048 pytest.raises(ValueError, self.df.groupby, []) def test_groupby_grouper(self): @@ -434,7 +434,7 @@ def test_groupby_grouper(self): result = self.df.groupby(grouped.grouper).mean() expected = grouped.mean() - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_groupby_duplicated_column_errormsg(self): # GH7511 @@ -744,17 +744,17 @@ def test_len(self): df = tm.makeTimeDataFrame() grouped = df.groupby([lambda x: x.year, lambda x: x.month, lambda x: x.day]) - self.assertEqual(len(grouped), len(df)) + assert len(grouped) == len(df) grouped = df.groupby([lambda x: x.year, lambda x: x.month]) expected = len(set([(x.year, x.month) for x in df.index])) - self.assertEqual(len(grouped), expected) + assert len(grouped) == expected # issue 11016 df = pd.DataFrame(dict(a=[np.nan] * 3, b=[1, 2, 3])) - self.assertEqual(len(df.groupby(('a'))), 0) - self.assertEqual(len(df.groupby(('b'))), 3) - self.assertEqual(len(df.groupby(('a', 'b'))), 3) + assert len(df.groupby(('a'))) == 0 + assert len(df.groupby(('b'))) == 3 + assert len(df.groupby(('a', 'b'))) == 3 def test_groups(self): grouped = self.df.groupby(['A']) @@ -900,7 +900,7 @@ def test_series_describe_single(self): def test_series_index_name(self): grouped = self.df.loc[:, ['C']].groupby(self.df['A']) result = grouped.agg(lambda x: x.mean()) - self.assertEqual(result.index.name, 'A') + assert result.index.name == 'A' def test_frame_describe_multikey(self): grouped = self.tsframe.groupby([lambda x: x.year, lambda x: x.month]) @@ -962,8 +962,8 @@ def test_frame_groupby(self): # aggregate aggregated = grouped.aggregate(np.mean) - self.assertEqual(len(aggregated), 5) - self.assertEqual(len(aggregated.columns), 4) + assert len(aggregated) == 5 + assert len(aggregated.columns) == 4 # by string tscopy = self.tsframe.copy() @@ -974,8 +974,8 @@ def test_frame_groupby(self): # transform grouped = self.tsframe.head(30).groupby(lambda x: x.weekday()) transformed = grouped.transform(lambda x: x - x.mean()) - self.assertEqual(len(transformed), 30) - self.assertEqual(len(transformed.columns), 4) + assert len(transformed) == 30 + assert len(transformed.columns) == 4 # transform propagate transformed = grouped.transform(lambda x: x.mean()) @@ -987,7 +987,7 @@ def test_frame_groupby(self): # iterate for weekday, group in grouped: - self.assertEqual(group.index[0].weekday(), weekday) + assert group.index[0].weekday() == weekday # groups / group_indices groups = grouped.groups @@ -1013,8 +1013,8 @@ def test_frame_groupby_columns(self): # aggregate aggregated = grouped.aggregate(np.mean) - self.assertEqual(len(aggregated), len(self.tsframe)) - self.assertEqual(len(aggregated.columns), 2) + assert len(aggregated) == len(self.tsframe) + assert len(aggregated.columns) == 2 # transform tf = lambda x: x - x.mean() @@ -1023,34 +1023,34 @@ def test_frame_groupby_columns(self): # iterate for k, v in grouped: - self.assertEqual(len(v.columns), 2) + assert len(v.columns) == 2 def test_frame_set_name_single(self): grouped = self.df.groupby('A') result = grouped.mean() - self.assertEqual(result.index.name, 'A') + assert result.index.name == 'A' result = self.df.groupby('A', as_index=False).mean() self.assertNotEqual(result.index.name, 'A') result = grouped.agg(np.mean) - self.assertEqual(result.index.name, 'A') + assert result.index.name == 'A' result = grouped.agg({'C': np.mean, 'D': np.std}) - self.assertEqual(result.index.name, 'A') + assert result.index.name == 'A' result = grouped['C'].mean() - self.assertEqual(result.index.name, 'A') + assert result.index.name == 'A' result = grouped['C'].agg(np.mean) - self.assertEqual(result.index.name, 'A') + assert result.index.name == 'A' result = grouped['C'].agg([np.mean, np.std]) - self.assertEqual(result.index.name, 'A') + assert result.index.name == 'A' with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = grouped['C'].agg({'foo': np.mean, 'bar': np.std}) - self.assertEqual(result.index.name, 'A') + assert result.index.name == 'A' def test_multi_iter(self): s = Series(np.arange(6)) @@ -1064,8 +1064,8 @@ def test_multi_iter(self): ('b', '1', s[[4]]), ('b', '2', s[[3, 5]])] for i, ((one, two), three) in enumerate(iterated): e1, e2, e3 = expected[i] - self.assertEqual(e1, one) - self.assertEqual(e2, two) + assert e1 == one + assert e2 == two assert_series_equal(three, e3) def test_multi_iter_frame(self): @@ -1087,8 +1087,8 @@ def test_multi_iter_frame(self): ('b', '2', df.loc[idx[[1]]])] for i, ((one, two), three) in enumerate(iterated): e1, e2, e3 = expected[i] - self.assertEqual(e1, one) - self.assertEqual(e2, two) + assert e1 == one + assert e2 == two assert_frame_equal(three, e3) # don't iterate through groups with no data @@ -1098,7 +1098,7 @@ def test_multi_iter_frame(self): groups = {} for key, gp in grouped: groups[key] = gp - self.assertEqual(len(groups), 2) + assert len(groups) == 2 # axis = 1 three_levels = self.three_group.groupby(['A', 'B', 'C']).mean() @@ -1563,7 +1563,7 @@ def test_empty_groups_corner(self): agged = grouped.apply(lambda x: x.mean()) agged_A = grouped['A'].apply(np.mean) assert_series_equal(agged['A'], agged_A) - self.assertEqual(agged.index.name, 'first') + assert agged.index.name == 'first' def test_apply_concat_preserve_names(self): grouped = self.three_group.groupby(['A', 'B']) @@ -1591,13 +1591,13 @@ def desc3(group): return result result = grouped.apply(desc) - self.assertEqual(result.index.names, ('A', 'B', 'stat')) + assert result.index.names == ('A', 'B', 'stat') result2 = grouped.apply(desc2) - self.assertEqual(result2.index.names, ('A', 'B', 'stat')) + assert result2.index.names == ('A', 'B', 'stat') result3 = grouped.apply(desc3) - self.assertEqual(result3.index.names, ('A', 'B', None)) + assert result3.index.names == ('A', 'B', None) def test_nonsense_func(self): df = DataFrame([0]) @@ -1789,7 +1789,7 @@ def aggfun(ser): return ser.sum() agged2 = df.groupby(keys).aggregate(aggfun) - self.assertEqual(len(agged2.columns) + 1, len(df.columns)) + assert len(agged2.columns) + 1 == len(df.columns) def test_groupby_level(self): frame = self.mframe @@ -1804,13 +1804,13 @@ def test_groupby_level(self): expected0 = expected0.reindex(frame.index.levels[0]) expected1 = expected1.reindex(frame.index.levels[1]) - self.assertEqual(result0.index.name, 'first') - self.assertEqual(result1.index.name, 'second') + assert result0.index.name == 'first' + assert result1.index.name == 'second' assert_frame_equal(result0, expected0) assert_frame_equal(result1, expected1) - self.assertEqual(result0.index.name, frame.index.names[0]) - self.assertEqual(result1.index.name, frame.index.names[1]) + assert result0.index.name == frame.index.names[0] + assert result1.index.name == frame.index.names[1] # groupby level name result0 = frame.groupby(level='first').sum() @@ -1860,12 +1860,12 @@ def test_groupby_level_apply(self): frame = self.mframe result = frame.groupby(level=0).count() - self.assertEqual(result.index.name, 'first') + assert result.index.name == 'first' result = frame.groupby(level=1).count() - self.assertEqual(result.index.name, 'second') + assert result.index.name == 'second' result = frame['A'].groupby(level=0).count() - self.assertEqual(result.index.name, 'first') + assert result.index.name == 'first' def test_groupby_args(self): # PR8618 and issue 8015 @@ -1965,7 +1965,7 @@ def f(piece): def test_apply_series_yield_constant(self): result = self.df.groupby(['A', 'B'])['C'].apply(len) - self.assertEqual(result.index.names[:2], ('A', 'B')) + assert result.index.names[:2] == ('A', 'B') def test_apply_frame_yield_constant(self): # GH13568 @@ -1999,7 +1999,7 @@ def trans2(group): result = df.groupby('A').apply(trans) exp = df.groupby('A')['C'].apply(trans2) assert_series_equal(result, exp, check_names=False) - self.assertEqual(result.name, 'C') + assert result.name == 'C' def test_apply_transform(self): grouped = self.ts.groupby(lambda x: x.month) @@ -2161,17 +2161,17 @@ def test_size(self): grouped = self.df.groupby(['A', 'B']) result = grouped.size() for key, group in grouped: - self.assertEqual(result[key], len(group)) + assert result[key] == len(group) grouped = self.df.groupby('A') result = grouped.size() for key, group in grouped: - self.assertEqual(result[key], len(group)) + assert result[key] == len(group) grouped = self.df.groupby('B') result = grouped.size() for key, group in grouped: - self.assertEqual(result[key], len(group)) + assert result[key] == len(group) df = DataFrame(np.random.choice(20, (1000, 3)), columns=list('abc')) for sort, key in cart_product((False, True), ('a', 'b', ['a', 'b'])): @@ -2481,24 +2481,24 @@ def test_groupby_wrong_multi_labels(self): def test_groupby_series_with_name(self): result = self.df.groupby(self.df['A']).mean() result2 = self.df.groupby(self.df['A'], as_index=False).mean() - self.assertEqual(result.index.name, 'A') + assert result.index.name == 'A' assert 'A' in result2 result = self.df.groupby([self.df['A'], self.df['B']]).mean() result2 = self.df.groupby([self.df['A'], self.df['B']], as_index=False).mean() - self.assertEqual(result.index.names, ('A', 'B')) + assert result.index.names == ('A', 'B') assert 'A' in result2 assert 'B' in result2 def test_seriesgroupby_name_attr(self): # GH 6265 result = self.df.groupby('A')['C'] - self.assertEqual(result.count().name, 'C') - self.assertEqual(result.mean().name, 'C') + assert result.count().name == 'C' + assert result.mean().name == 'C' testFunc = lambda x: np.sum(x) * 2 - self.assertEqual(result.agg(testFunc).name, 'C') + assert result.agg(testFunc).name == 'C' def test_consistency_name(self): # GH 12363 @@ -2530,11 +2530,11 @@ def summarize_random_name(df): }, name=df.iloc[0]['A']) metrics = self.df.groupby('A').apply(summarize) - self.assertEqual(metrics.columns.name, None) + assert metrics.columns.name is None metrics = self.df.groupby('A').apply(summarize, 'metrics') - self.assertEqual(metrics.columns.name, 'metrics') + assert metrics.columns.name == 'metrics' metrics = self.df.groupby('A').apply(summarize_random_name) - self.assertEqual(metrics.columns.name, None) + assert metrics.columns.name is None def test_groupby_nonstring_columns(self): df = DataFrame([np.arange(10) for x in range(10)]) @@ -2595,11 +2595,11 @@ def convert_force_pure(x): grouped = s.groupby(labels) result = grouped.agg(convert_fast) - self.assertEqual(result.dtype, np.object_) + assert result.dtype == np.object_ assert isinstance(result[0], Decimal) result = grouped.agg(convert_force_pure) - self.assertEqual(result.dtype, np.object_) + assert result.dtype == np.object_ assert isinstance(result[0], Decimal) def test_fast_apply(self): @@ -2670,7 +2670,7 @@ def test_groupby_aggregation_mixed_dtype(self): def test_groupby_dtype_inference_empty(self): # GH 6733 df = DataFrame({'x': [], 'range': np.arange(0, dtype='int64')}) - self.assertEqual(df['x'].dtype, np.float64) + assert df['x'].dtype == np.float64 result = df.groupby('x').first() exp_index = Index([], name='x', dtype=np.float64) @@ -2725,7 +2725,7 @@ def test_groupby_nat_exclude(self): expected = [pd.Index([1, 7]), pd.Index([3, 5])] keys = sorted(grouped.groups.keys()) - self.assertEqual(len(keys), 2) + assert len(keys) == 2 for k, e in zip(keys, expected): # grouped.groups keys are np.datetime64 with system tz # not to be affected by tz, only compare values @@ -2733,7 +2733,7 @@ def test_groupby_nat_exclude(self): # confirm obj is not filtered tm.assert_frame_equal(grouped.grouper.groupings[0].obj, df) - self.assertEqual(grouped.ngroups, 2) + assert grouped.ngroups == 2 expected = { Timestamp('2013-01-01 00:00:00'): np.array([1, 7], dtype=np.int64), @@ -2752,14 +2752,14 @@ def test_groupby_nat_exclude(self): nan_df = DataFrame({'nan': [np.nan, np.nan, np.nan], 'nat': [pd.NaT, pd.NaT, pd.NaT]}) - self.assertEqual(nan_df['nan'].dtype, 'float64') - self.assertEqual(nan_df['nat'].dtype, 'datetime64[ns]') + assert nan_df['nan'].dtype == 'float64' + assert nan_df['nat'].dtype == 'datetime64[ns]' for key in ['nan', 'nat']: grouped = nan_df.groupby(key) - self.assertEqual(grouped.groups, {}) - self.assertEqual(grouped.ngroups, 0) - self.assertEqual(grouped.indices, {}) + assert grouped.groups == {} + assert grouped.ngroups == 0 + assert grouped.indices == {} pytest.raises(KeyError, grouped.get_group, np.nan) pytest.raises(KeyError, grouped.get_group, pd.NaT) @@ -2837,7 +2837,7 @@ def test_int32_overflow(self): left = df.groupby(['A', 'B', 'C', 'D']).sum() right = df.groupby(['D', 'C', 'B', 'A']).sum() - self.assertEqual(len(left), len(right)) + assert len(left) == len(right) def test_groupby_sort_multi(self): df = DataFrame({'a': ['foo', 'bar', 'baz'], @@ -2963,7 +2963,7 @@ def test_multifunc_sum_bug(self): grouped = x.groupby('test') result = grouped.agg({'fl': 'sum', 2: 'size'}) - self.assertEqual(result['fl'].dtype, np.float64) + assert result['fl'].dtype == np.float64 def test_handle_dict_return_value(self): def f(group): @@ -3056,14 +3056,13 @@ def f(group): assert names == expected_names def test_no_dummy_key_names(self): - # GH #1291 - + # see gh-1291 result = self.df.groupby(self.df['A'].values).sum() assert result.index.name is None result = self.df.groupby([self.df['A'].values, self.df['B'].values ]).sum() - self.assertEqual(result.index.names, (None, None)) + assert result.index.names == (None, None) def test_groupby_sort_multiindex_series(self): # series multiindex groupby sort argument was not being passed through @@ -3121,16 +3120,16 @@ def test_multiindex_columns_empty_level(self): df = DataFrame([[long(1), 'A']], columns=midx) grouped = df.groupby('to filter').groups - self.assertEqual(grouped['A'], [0]) + assert grouped['A'] == [0] grouped = df.groupby([('to filter', '')]).groups - self.assertEqual(grouped['A'], [0]) + assert grouped['A'] == [0] df = DataFrame([[long(1), 'A'], [long(2), 'B']], columns=midx) expected = df.groupby('to filter').groups result = df.groupby([('to filter', '')]).groups - self.assertEqual(result, expected) + assert result == expected df = DataFrame([[long(1), 'A'], [long(2), 'A']], columns=midx) @@ -3230,7 +3229,7 @@ def test_groupby_non_arithmetic_agg_intlike_precision(self): grpd = df.groupby('a') res = getattr(grpd, method)(*data['args']) - self.assertEqual(res.iloc[0].b, data['expected']) + assert res.iloc[0].b == data['expected'] def test_groupby_multiindex_missing_pair(self): # GH9049 diff --git a/pandas/tests/groupby/test_nth.py b/pandas/tests/groupby/test_nth.py index f583fa7aa7e86..0b6aeaf155f86 100644 --- a/pandas/tests/groupby/test_nth.py +++ b/pandas/tests/groupby/test_nth.py @@ -87,9 +87,9 @@ def test_first_last_nth_dtypes(self): idx = lrange(10) idx.append(9) s = Series(data=lrange(11), index=idx, name='IntCol') - self.assertEqual(s.dtype, 'int64') + assert s.dtype == 'int64' f = s.groupby(level=0).first() - self.assertEqual(f.dtype, 'int64') + assert f.dtype == 'int64' def test_nth(self): df = DataFrame([[1, np.nan], [1, 4], [5, 6]], columns=['A', 'B']) @@ -155,12 +155,12 @@ def test_nth(self): expected2 = s.groupby(g).apply(lambda x: x.iloc[0]) assert_series_equal(expected2, expected, check_names=False) assert expected.name, 0 - self.assertEqual(expected.name, 1) + assert expected.name == 1 # validate first v = s[g == 1].iloc[0] - self.assertEqual(expected.iloc[0], v) - self.assertEqual(expected2.iloc[0], v) + assert expected.iloc[0] == v + assert expected2.iloc[0] == v # this is NOT the same as .first (as sorted is default!) # as it keeps the order in the series (and not the group order) diff --git a/pandas/tests/groupby/test_timegrouper.py b/pandas/tests/groupby/test_timegrouper.py index db3fdfa605b5b..42caecbdb700e 100644 --- a/pandas/tests/groupby/test_timegrouper.py +++ b/pandas/tests/groupby/test_timegrouper.py @@ -444,7 +444,7 @@ def test_frame_datetime64_handling_groupby(self): (3, np.datetime64('2012-07-04'))], columns=['a', 'date']) result = df.groupby('a').first() - self.assertEqual(result['date'][3], Timestamp('2012-07-03')) + assert result['date'][3] == Timestamp('2012-07-03') def test_groupby_multi_timezone(self): @@ -575,10 +575,10 @@ def test_timezone_info(self): import pytz df = pd.DataFrame({'a': [1], 'b': [datetime.now(pytz.utc)]}) - self.assertEqual(df['b'][0].tzinfo, pytz.utc) + assert df['b'][0].tzinfo == pytz.utc df = pd.DataFrame({'a': [1, 2, 3]}) df['b'] = datetime.now(pytz.utc) - self.assertEqual(df['b'][0].tzinfo, pytz.utc) + assert df['b'][0].tzinfo == pytz.utc def test_datetime_count(self): df = DataFrame({'a': [1, 2, 3] * 2, diff --git a/pandas/tests/groupby/test_transform.py b/pandas/tests/groupby/test_transform.py index e0d81003e325f..0b81235ef2117 100644 --- a/pandas/tests/groupby/test_transform.py +++ b/pandas/tests/groupby/test_transform.py @@ -29,7 +29,7 @@ def test_transform(self): grouped = data.groupby(lambda x: x // 3) transformed = grouped.transform(lambda x: x * x.sum()) - self.assertEqual(transformed[7], 12) + assert transformed[7] == 12 # GH 8046 # make sure that we preserve the input order @@ -408,7 +408,7 @@ def f(group): grouped = df.groupby('c') result = grouped.apply(f) - self.assertEqual(result['d'].dtype, np.float64) + assert result['d'].dtype == np.float64 # this is by definition a mutating operation! with option_context('mode.chained_assignment', None): diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index d9dccc39f469f..bbde902fb87bf 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -139,7 +139,7 @@ def test_ndarray_compat_properties(self): values = idx.values for prop in self._compat_props: - self.assertEqual(getattr(idx, prop), getattr(values, prop)) + assert getattr(idx, prop) == getattr(values, prop) # test for validity idx.nbytes @@ -162,7 +162,7 @@ def test_dtype_str(self): for idx in self.indices.values(): dtype = idx.dtype_str assert isinstance(dtype, compat.string_types) - self.assertEqual(dtype, str(idx.dtype)) + assert dtype == str(idx.dtype) def test_repr_max_seq_item_setting(self): # GH10182 @@ -189,14 +189,14 @@ def test_set_name_methods(self): original_name = ind.name new_ind = ind.set_names([new_name]) - self.assertEqual(new_ind.name, new_name) - self.assertEqual(ind.name, original_name) + assert new_ind.name == new_name + assert ind.name == original_name res = ind.rename(new_name, inplace=True) # should return None assert res is None - self.assertEqual(ind.name, new_name) - self.assertEqual(ind.names, [new_name]) + assert ind.name == new_name + assert ind.names == [new_name] # with tm.assert_raises_regex(TypeError, "list-like"): # # should still fail even if it would be the right length # ind.set_names("a") @@ -206,8 +206,8 @@ def test_set_name_methods(self): # rename in place just leaves tuples and other containers alone name = ('A', 'B') ind.rename(name, inplace=True) - self.assertEqual(ind.name, name) - self.assertEqual(ind.names, [name]) + assert ind.name == name + assert ind.names == [name] def test_hash_error(self): for ind in self.indices.values(): @@ -310,7 +310,7 @@ def test_duplicates(self): # preserve names idx.name = 'foo' result = idx.drop_duplicates() - self.assertEqual(result.name, 'foo') + assert result.name == 'foo' tm.assert_index_equal(result, Index([ind[0]], name='foo')) def test_get_unique_index(self): @@ -351,8 +351,8 @@ def test_get_unique_index(self): idx_unique_nan = ind._shallow_copy(vals_unique) assert idx_unique_nan.is_unique - self.assertEqual(idx_nan.dtype, ind.dtype) - self.assertEqual(idx_unique_nan.dtype, ind.dtype) + assert idx_nan.dtype == ind.dtype + assert idx_unique_nan.dtype == ind.dtype for dropna, expected in zip([False, True], [idx_unique_nan, idx_unique]): @@ -373,11 +373,11 @@ def test_mutability(self): def test_view(self): for ind in self.indices.values(): i_view = ind.view() - self.assertEqual(i_view.name, ind.name) + assert i_view.name == ind.name def test_compat(self): for ind in self.indices.values(): - self.assertEqual(ind.tolist(), list(ind)) + assert ind.tolist() == list(ind) def test_memory_usage(self): for name, index in compat.iteritems(self.indices): @@ -398,7 +398,7 @@ def test_memory_usage(self): else: # we report 0 for no-length - self.assertEqual(result, 0) + assert result == 0 def test_argsort(self): for k, ind in self.indices.items(): @@ -617,7 +617,7 @@ def test_difference_base(self): elif isinstance(idx, CategoricalIndex): pass elif isinstance(idx, (DatetimeIndex, TimedeltaIndex)): - self.assertEqual(result.__class__, answer.__class__) + assert result.__class__ == answer.__class__ tm.assert_numpy_array_equal(result.asi8, answer.asi8) else: result = first.difference(case) @@ -687,12 +687,12 @@ def test_delete_base(self): expected = idx[1:] result = idx.delete(0) assert result.equals(expected) - self.assertEqual(result.name, expected.name) + assert result.name == expected.name expected = idx[:-1] result = idx.delete(-1) assert result.equals(expected) - self.assertEqual(result.name, expected.name) + assert result.name == expected.name with pytest.raises((IndexError, ValueError)): # either depending on numpy version diff --git a/pandas/tests/indexes/datetimes/test_astype.py b/pandas/tests/indexes/datetimes/test_astype.py index 35031746efebe..1c8189d0c75ac 100644 --- a/pandas/tests/indexes/datetimes/test_astype.py +++ b/pandas/tests/indexes/datetimes/test_astype.py @@ -131,8 +131,8 @@ def _check_rng(rng): assert isinstance(converted, np.ndarray) for x, stamp in zip(converted, rng): assert isinstance(x, datetime) - self.assertEqual(x, stamp.to_pydatetime()) - self.assertEqual(x.tzinfo, stamp.tzinfo) + assert x == stamp.to_pydatetime() + assert x.tzinfo == stamp.tzinfo rng = date_range('20090415', '20090519') rng_eastern = date_range('20090415', '20090519', tz='US/Eastern') @@ -151,8 +151,8 @@ def _check_rng(rng): assert isinstance(converted, np.ndarray) for x, stamp in zip(converted, rng): assert isinstance(x, datetime) - self.assertEqual(x, stamp.to_pydatetime()) - self.assertEqual(x.tzinfo, stamp.tzinfo) + assert x == stamp.to_pydatetime() + assert x.tzinfo == stamp.tzinfo rng = date_range('20090415', '20090519') rng_eastern = date_range('20090415', '20090519', @@ -172,8 +172,8 @@ def _check_rng(rng): assert isinstance(converted, np.ndarray) for x, stamp in zip(converted, rng): assert isinstance(x, datetime) - self.assertEqual(x, stamp.to_pydatetime()) - self.assertEqual(x.tzinfo, stamp.tzinfo) + assert x == stamp.to_pydatetime() + assert x.tzinfo == stamp.tzinfo rng = date_range('20090415', '20090519') rng_eastern = date_range('20090415', '20090519', @@ -196,17 +196,17 @@ def test_to_period_millisecond(self): index = self.index period = index.to_period(freq='L') - self.assertEqual(2, len(period)) - self.assertEqual(period[0], Period('2007-01-01 10:11:12.123Z', 'L')) - self.assertEqual(period[1], Period('2007-01-01 10:11:13.789Z', 'L')) + assert 2 == len(period) + assert period[0] == Period('2007-01-01 10:11:12.123Z', 'L') + assert period[1] == Period('2007-01-01 10:11:13.789Z', 'L') def test_to_period_microsecond(self): index = self.index period = index.to_period(freq='U') - self.assertEqual(2, len(period)) - self.assertEqual(period[0], Period('2007-01-01 10:11:12.123456Z', 'U')) - self.assertEqual(period[1], Period('2007-01-01 10:11:13.789123Z', 'U')) + assert 2 == len(period) + assert period[0] == Period('2007-01-01 10:11:12.123456Z', 'U') + assert period[1] == Period('2007-01-01 10:11:13.789123Z', 'U') def test_to_period_tz_pytz(self): tm._skip_if_no_pytz() @@ -220,7 +220,7 @@ def test_to_period_tz_pytz(self): result = ts.to_period()[0] expected = ts[0].to_period() - self.assertEqual(result, expected) + assert result == expected tm.assert_index_equal(ts.to_period(), xp) ts = date_range('1/1/2000', '4/1/2000', tz=UTC) @@ -228,7 +228,7 @@ def test_to_period_tz_pytz(self): result = ts.to_period()[0] expected = ts[0].to_period() - self.assertEqual(result, expected) + assert result == expected tm.assert_index_equal(ts.to_period(), xp) ts = date_range('1/1/2000', '4/1/2000', tz=tzlocal()) @@ -236,7 +236,7 @@ def test_to_period_tz_pytz(self): result = ts.to_period()[0] expected = ts[0].to_period() - self.assertEqual(result, expected) + assert result == expected tm.assert_index_equal(ts.to_period(), xp) def test_to_period_tz_explicit_pytz(self): @@ -309,4 +309,4 @@ def test_astype_object(self): exp_values = list(rng) tm.assert_index_equal(casted, Index(exp_values, dtype=np.object_)) - self.assertEqual(casted.tolist(), exp_values) + assert casted.tolist() == exp_values diff --git a/pandas/tests/indexes/datetimes/test_construction.py b/pandas/tests/indexes/datetimes/test_construction.py index 098d4755b385c..9af4136afd025 100644 --- a/pandas/tests/indexes/datetimes/test_construction.py +++ b/pandas/tests/indexes/datetimes/test_construction.py @@ -436,14 +436,14 @@ def test_constructor_dtype(self): def test_constructor_name(self): idx = DatetimeIndex(start='2000-01-01', periods=1, freq='A', name='TEST') - self.assertEqual(idx.name, 'TEST') + assert idx.name == 'TEST' def test_000constructor_resolution(self): # 2252 t1 = Timestamp((1352934390 * 1000000000) + 1000000 + 1000 + 1) idx = DatetimeIndex([t1]) - self.assertEqual(idx.nanosecond[0], t1.nanosecond) + assert idx.nanosecond[0] == t1.nanosecond class TestTimeSeries(tm.TestCase): @@ -452,7 +452,7 @@ def test_dti_constructor_preserve_dti_freq(self): rng = date_range('1/1/2000', '1/2/2000', freq='5min') rng2 = DatetimeIndex(rng) - self.assertEqual(rng.freq, rng2.freq) + assert rng.freq == rng2.freq def test_dti_constructor_years_only(self): # GH 6961 @@ -487,7 +487,7 @@ def test_dti_constructor_small_int(self): def test_ctor_str_intraday(self): rng = DatetimeIndex(['1-1-2000 00:00:01']) - self.assertEqual(rng[0].second, 1) + assert rng[0].second == 1 def test_is_(self): dti = DatetimeIndex(start='1/1/2005', end='12/1/2005', freq='M') @@ -565,29 +565,29 @@ def test_datetimeindex_constructor_misc(self): sdate = datetime(1999, 12, 25) edate = datetime(2000, 1, 1) idx = DatetimeIndex(start=sdate, freq='1B', periods=20) - self.assertEqual(len(idx), 20) - self.assertEqual(idx[0], sdate + 0 * offsets.BDay()) - self.assertEqual(idx.freq, 'B') + assert len(idx) == 20 + assert idx[0] == sdate + 0 * offsets.BDay() + assert idx.freq == 'B' idx = DatetimeIndex(end=edate, freq=('D', 5), periods=20) - self.assertEqual(len(idx), 20) - self.assertEqual(idx[-1], edate) - self.assertEqual(idx.freq, '5D') + assert len(idx) == 20 + assert idx[-1] == edate + assert idx.freq == '5D' idx1 = DatetimeIndex(start=sdate, end=edate, freq='W-SUN') idx2 = DatetimeIndex(start=sdate, end=edate, freq=offsets.Week(weekday=6)) - self.assertEqual(len(idx1), len(idx2)) - self.assertEqual(idx1.offset, idx2.offset) + assert len(idx1) == len(idx2) + assert idx1.offset == idx2.offset idx1 = DatetimeIndex(start=sdate, end=edate, freq='QS') idx2 = DatetimeIndex(start=sdate, end=edate, freq=offsets.QuarterBegin(startingMonth=1)) - self.assertEqual(len(idx1), len(idx2)) - self.assertEqual(idx1.offset, idx2.offset) + assert len(idx1) == len(idx2) + assert idx1.offset == idx2.offset idx1 = DatetimeIndex(start=sdate, end=edate, freq='BQ') idx2 = DatetimeIndex(start=sdate, end=edate, freq=offsets.BQuarterEnd(startingMonth=12)) - self.assertEqual(len(idx1), len(idx2)) - self.assertEqual(idx1.offset, idx2.offset) + assert len(idx1) == len(idx2) + assert idx1.offset == idx2.offset diff --git a/pandas/tests/indexes/datetimes/test_date_range.py b/pandas/tests/indexes/datetimes/test_date_range.py index 6b011ad6db98e..a9fdd40406770 100644 --- a/pandas/tests/indexes/datetimes/test_date_range.py +++ b/pandas/tests/indexes/datetimes/test_date_range.py @@ -30,7 +30,7 @@ class TestDateRanges(TestData, tm.TestCase): def test_date_range_gen_error(self): rng = date_range('1/1/2000 00:00', '1/1/2000 00:18', freq='5min') - self.assertEqual(len(rng), 4) + assert len(rng) == 4 def test_date_range_negative_freq(self): # GH 11018 @@ -38,20 +38,20 @@ def test_date_range_negative_freq(self): exp = pd.DatetimeIndex(['2011-12-31', '2009-12-31', '2007-12-31'], freq='-2A') tm.assert_index_equal(rng, exp) - self.assertEqual(rng.freq, '-2A') + assert rng.freq == '-2A' rng = date_range('2011-01-31', freq='-2M', periods=3) exp = pd.DatetimeIndex(['2011-01-31', '2010-11-30', '2010-09-30'], freq='-2M') tm.assert_index_equal(rng, exp) - self.assertEqual(rng.freq, '-2M') + assert rng.freq == '-2M' def test_date_range_bms_bug(self): # #1645 rng = date_range('1/1/2000', periods=10, freq='BMS') ex_first = Timestamp('2000-01-03') - self.assertEqual(rng[0], ex_first) + assert rng[0] == ex_first def test_date_range_normalize(self): snap = datetime.today() @@ -68,13 +68,13 @@ def test_date_range_normalize(self): freq='B') the_time = time(8, 15) for val in rng: - self.assertEqual(val.time(), the_time) + assert val.time() == the_time def test_date_range_fy5252(self): dr = date_range(start="2013-01-01", periods=2, freq=offsets.FY5253( startingMonth=1, weekday=3, variation="nearest")) - self.assertEqual(dr[0], Timestamp('2013-01-31')) - self.assertEqual(dr[1], Timestamp('2014-01-30')) + assert dr[0] == Timestamp('2013-01-31') + assert dr[1] == Timestamp('2014-01-30') def test_date_range_ambiguous_arguments(self): # #2538 @@ -138,7 +138,7 @@ def test_compat_replace(self): freq='QS-JAN'), periods=f(76), freq='QS-JAN') - self.assertEqual(len(result), 76) + assert len(result) == 76 def test_catch_infinite_loop(self): offset = offsets.DateOffset(minute=5) @@ -152,12 +152,12 @@ class TestGenRangeGeneration(tm.TestCase): def test_generate(self): rng1 = list(generate_range(START, END, offset=BDay())) rng2 = list(generate_range(START, END, time_rule='B')) - self.assertEqual(rng1, rng2) + assert rng1 == rng2 def test_generate_cday(self): rng1 = list(generate_range(START, END, offset=CDay())) rng2 = list(generate_range(START, END, time_rule='C')) - self.assertEqual(rng1, rng2) + assert rng1 == rng2 def test_1(self): eq_gen_range(dict(start=datetime(2009, 3, 25), periods=2), @@ -241,14 +241,14 @@ def test_cached_range(self): def test_cached_range_bug(self): rng = date_range('2010-09-01 05:00:00', periods=50, freq=DateOffset(hours=6)) - self.assertEqual(len(rng), 50) - self.assertEqual(rng[0], datetime(2010, 9, 1, 5)) + assert len(rng) == 50 + assert rng[0] == datetime(2010, 9, 1, 5) def test_timezone_comparaison_bug(self): # smoke test start = Timestamp('20130220 10:00', tz='US/Eastern') result = date_range(start, periods=2, tz='US/Eastern') - self.assertEqual(len(result), 2) + assert len(result) == 2 def test_timezone_comparaison_assert(self): start = Timestamp('20130220 10:00', tz='US/Eastern') @@ -308,19 +308,19 @@ def test_range_tz_pytz(self): end = tz.localize(datetime(2011, 1, 3)) dr = date_range(start=start, periods=3) - self.assertEqual(dr.tz.zone, tz.zone) - self.assertEqual(dr[0], start) - self.assertEqual(dr[2], end) + assert dr.tz.zone == tz.zone + assert dr[0] == start + assert dr[2] == end dr = date_range(end=end, periods=3) - self.assertEqual(dr.tz.zone, tz.zone) - self.assertEqual(dr[0], start) - self.assertEqual(dr[2], end) + assert dr.tz.zone == tz.zone + assert dr[0] == start + assert dr[2] == end dr = date_range(start=start, end=end) - self.assertEqual(dr.tz.zone, tz.zone) - self.assertEqual(dr[0], start) - self.assertEqual(dr[2], end) + assert dr.tz.zone == tz.zone + assert dr[0] == start + assert dr[2] == end def test_range_tz_dst_straddle_pytz(self): @@ -333,20 +333,20 @@ def test_range_tz_dst_straddle_pytz(self): tz.localize(datetime(2013, 11, 6)))] for (start, end) in dates: dr = date_range(start, end, freq='D') - self.assertEqual(dr[0], start) - self.assertEqual(dr[-1], end) - self.assertEqual(np.all(dr.hour == 0), True) + assert dr[0] == start + assert dr[-1] == end + assert np.all(dr.hour == 0) dr = date_range(start, end, freq='D', tz='US/Eastern') - self.assertEqual(dr[0], start) - self.assertEqual(dr[-1], end) - self.assertEqual(np.all(dr.hour == 0), True) + assert dr[0] == start + assert dr[-1] == end + assert np.all(dr.hour == 0) dr = date_range(start.replace(tzinfo=None), end.replace( tzinfo=None), freq='D', tz='US/Eastern') - self.assertEqual(dr[0], start) - self.assertEqual(dr[-1], end) - self.assertEqual(np.all(dr.hour == 0), True) + assert dr[0] == start + assert dr[-1] == end + assert np.all(dr.hour == 0) def test_range_tz_dateutil(self): # GH 2906 @@ -461,8 +461,8 @@ def test_range_closed_boundary(self): def test_years_only(self): # GH 6961 dr = date_range('2014', '2015', freq='M') - self.assertEqual(dr[0], datetime(2014, 1, 31)) - self.assertEqual(dr[-1], datetime(2014, 12, 31)) + assert dr[0] == datetime(2014, 1, 31) + assert dr[-1] == datetime(2014, 12, 31) def test_freq_divides_end_in_nanos(self): # GH 10885 diff --git a/pandas/tests/indexes/datetimes/test_datetime.py b/pandas/tests/indexes/datetimes/test_datetime.py index 83f9119377b19..7b22d1615fbeb 100644 --- a/pandas/tests/indexes/datetimes/test_datetime.py +++ b/pandas/tests/indexes/datetimes/test_datetime.py @@ -21,35 +21,35 @@ def test_get_loc(self): idx = pd.date_range('2000-01-01', periods=3) for method in [None, 'pad', 'backfill', 'nearest']: - self.assertEqual(idx.get_loc(idx[1], method), 1) - self.assertEqual(idx.get_loc(idx[1].to_pydatetime(), method), 1) - self.assertEqual(idx.get_loc(str(idx[1]), method), 1) + assert idx.get_loc(idx[1], method) == 1 + assert idx.get_loc(idx[1].to_pydatetime(), method) == 1 + assert idx.get_loc(str(idx[1]), method) == 1 + if method is not None: - self.assertEqual(idx.get_loc(idx[1], method, - tolerance=pd.Timedelta('0 days')), - 1) - - self.assertEqual(idx.get_loc('2000-01-01', method='nearest'), 0) - self.assertEqual(idx.get_loc('2000-01-01T12', method='nearest'), 1) - - self.assertEqual(idx.get_loc('2000-01-01T12', method='nearest', - tolerance='1 day'), 1) - self.assertEqual(idx.get_loc('2000-01-01T12', method='nearest', - tolerance=pd.Timedelta('1D')), 1) - self.assertEqual(idx.get_loc('2000-01-01T12', method='nearest', - tolerance=np.timedelta64(1, 'D')), 1) - self.assertEqual(idx.get_loc('2000-01-01T12', method='nearest', - tolerance=timedelta(1)), 1) + assert idx.get_loc(idx[1], method, + tolerance=pd.Timedelta('0 days')) == 1 + + assert idx.get_loc('2000-01-01', method='nearest') == 0 + assert idx.get_loc('2000-01-01T12', method='nearest') == 1 + + assert idx.get_loc('2000-01-01T12', method='nearest', + tolerance='1 day') == 1 + assert idx.get_loc('2000-01-01T12', method='nearest', + tolerance=pd.Timedelta('1D')) == 1 + assert idx.get_loc('2000-01-01T12', method='nearest', + tolerance=np.timedelta64(1, 'D')) == 1 + assert idx.get_loc('2000-01-01T12', method='nearest', + tolerance=timedelta(1)) == 1 with tm.assert_raises_regex(ValueError, 'must be convertible'): idx.get_loc('2000-01-01T12', method='nearest', tolerance='foo') with pytest.raises(KeyError): idx.get_loc('2000-01-01T03', method='nearest', tolerance='2 hours') - self.assertEqual(idx.get_loc('2000', method='nearest'), slice(0, 3)) - self.assertEqual(idx.get_loc('2000-01', method='nearest'), slice(0, 3)) + assert idx.get_loc('2000', method='nearest') == slice(0, 3) + assert idx.get_loc('2000-01', method='nearest') == slice(0, 3) - self.assertEqual(idx.get_loc('1999', method='nearest'), 0) - self.assertEqual(idx.get_loc('2001', method='nearest'), 2) + assert idx.get_loc('1999', method='nearest') == 0 + assert idx.get_loc('2001', method='nearest') == 2 with pytest.raises(KeyError): idx.get_loc('1999', method='pad') @@ -62,9 +62,9 @@ def test_get_loc(self): idx.get_loc(slice(2)) idx = pd.to_datetime(['2000-01-01', '2000-01-04']) - self.assertEqual(idx.get_loc('2000-01-02', method='nearest'), 0) - self.assertEqual(idx.get_loc('2000-01-03', method='nearest'), 1) - self.assertEqual(idx.get_loc('2000-01', method='nearest'), slice(0, 2)) + assert idx.get_loc('2000-01-02', method='nearest') == 0 + assert idx.get_loc('2000-01-03', method='nearest') == 1 + assert idx.get_loc('2000-01', method='nearest') == slice(0, 2) # time indexing idx = pd.date_range('2000-01-01', periods=24, freq='H') @@ -114,8 +114,8 @@ def test_roundtrip_pickle_with_tz(self): def test_reindex_preserves_tz_if_target_is_empty_list_or_array(self): # GH7774 index = date_range('20130101', periods=3, tz='US/Eastern') - self.assertEqual(str(index.reindex([])[0].tz), 'US/Eastern') - self.assertEqual(str(index.reindex(np.array([]))[0].tz), 'US/Eastern') + assert str(index.reindex([])[0].tz) == 'US/Eastern' + assert str(index.reindex(np.array([]))[0].tz) == 'US/Eastern' def test_time_loc(self): # GH8667 from datetime import time @@ -150,10 +150,10 @@ def test_time_overflow_for_32bit_machines(self): periods = np.int_(1000) idx1 = pd.date_range(start='2000', periods=periods, freq='S') - self.assertEqual(len(idx1), periods) + assert len(idx1) == periods idx2 = pd.date_range(end='2000', periods=periods, freq='S') - self.assertEqual(len(idx2), periods) + assert len(idx2) == periods def test_nat(self): assert DatetimeIndex([np.nan])[0] is pd.NaT @@ -166,13 +166,13 @@ def test_ufunc_coercions(self): assert isinstance(result, DatetimeIndex) exp = date_range('2011-01-02', periods=3, freq='2D', name='x') tm.assert_index_equal(result, exp) - self.assertEqual(result.freq, '2D') + assert result.freq == '2D' for result in [idx - delta, np.subtract(idx, delta)]: assert isinstance(result, DatetimeIndex) exp = date_range('2010-12-31', periods=3, freq='2D', name='x') tm.assert_index_equal(result, exp) - self.assertEqual(result.freq, '2D') + assert result.freq == '2D' delta = np.array([np.timedelta64(1, 'D'), np.timedelta64(2, 'D'), np.timedelta64(3, 'D')]) @@ -181,14 +181,14 @@ def test_ufunc_coercions(self): exp = DatetimeIndex(['2011-01-02', '2011-01-05', '2011-01-08'], freq='3D', name='x') tm.assert_index_equal(result, exp) - self.assertEqual(result.freq, '3D') + assert result.freq == '3D' for result in [idx - delta, np.subtract(idx, delta)]: assert isinstance(result, DatetimeIndex) exp = DatetimeIndex(['2010-12-31', '2011-01-01', '2011-01-02'], freq='D', name='x') tm.assert_index_equal(result, exp) - self.assertEqual(result.freq, 'D') + assert result.freq == 'D' def test_week_of_month_frequency(self): # GH 5348: "ValueError: Could not evaluate WOM-1SUN" shouldn't raise @@ -240,14 +240,14 @@ def test_to_period_nofreq(self): idx = DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-03'], freq='infer') - self.assertEqual(idx.freqstr, 'D') + assert idx.freqstr == 'D' expected = pd.PeriodIndex(['2000-01-01', '2000-01-02', '2000-01-03'], freq='D') tm.assert_index_equal(idx.to_period(), expected) # GH 7606 idx = DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-03']) - self.assertEqual(idx.freqstr, None) + assert idx.freqstr is None tm.assert_index_equal(idx.to_period(), expected) def test_comparisons_coverage(self): @@ -373,7 +373,7 @@ def test_iteration_preserves_tz(self): for i, ts in enumerate(index): result = ts expected = index[i] - self.assertEqual(result, expected) + assert result == expected index = date_range("2012-01-01", periods=3, freq='H', tz=dateutil.tz.tzoffset(None, -28800)) @@ -381,8 +381,8 @@ def test_iteration_preserves_tz(self): for i, ts in enumerate(index): result = ts expected = index[i] - self.assertEqual(result._repr_base, expected._repr_base) - self.assertEqual(result, expected) + assert result._repr_base == expected._repr_base + assert result == expected # 9100 index = pd.DatetimeIndex(['2014-12-01 03:32:39.987000-08:00', @@ -390,8 +390,8 @@ def test_iteration_preserves_tz(self): for i, ts in enumerate(index): result = ts expected = index[i] - self.assertEqual(result._repr_base, expected._repr_base) - self.assertEqual(result, expected) + assert result._repr_base == expected._repr_base + assert result == expected def test_misc_coverage(self): rng = date_range('1/1/2000', periods=5) @@ -410,10 +410,10 @@ def test_string_index_series_name_converted(self): index=date_range('1/1/2000', periods=10)) result = df.loc['1/3/2000'] - self.assertEqual(result.name, df.index[2]) + assert result.name == df.index[2] result = df.T['1/3/2000'] - self.assertEqual(result.name, df.index[2]) + assert result.name == df.index[2] def test_overflow_offset(self): # xref https://github.com/statsmodels/statsmodels/issues/3374 @@ -444,8 +444,8 @@ def test_get_duplicates(self): def test_argmin_argmax(self): idx = DatetimeIndex(['2000-01-04', '2000-01-01', '2000-01-02']) - self.assertEqual(idx.argmin(), 1) - self.assertEqual(idx.argmax(), 0) + assert idx.argmin() == 1 + assert idx.argmax() == 0 def test_sort_values(self): idx = DatetimeIndex(['2000-01-04', '2000-01-01', '2000-01-02']) @@ -481,8 +481,8 @@ def test_take(self): tm.assert_index_equal(taken, expected) assert isinstance(taken, DatetimeIndex) assert taken.freq is None - self.assertEqual(taken.tz, expected.tz) - self.assertEqual(taken.name, expected.name) + assert taken.tz == expected.tz + assert taken.name == expected.name def test_take_fill_value(self): # GH 12631 @@ -601,8 +601,8 @@ def test_does_not_convert_mixed_integer(self): r_idx_type='i', c_idx_type='dt') cols = df.columns.join(df.index, how='outer') joined = cols.join(df.columns) - self.assertEqual(cols.dtype, np.dtype('O')) - self.assertEqual(cols.dtype, joined.dtype) + assert cols.dtype == np.dtype('O') + assert cols.dtype == joined.dtype tm.assert_numpy_array_equal(cols.values, joined.values) def test_slice_keeps_name(self): @@ -610,7 +610,7 @@ def test_slice_keeps_name(self): st = pd.Timestamp('2013-07-01 00:00:00', tz='America/Los_Angeles') et = pd.Timestamp('2013-07-02 00:00:00', tz='America/Los_Angeles') dr = pd.date_range(st, et, freq='H', name='timebucket') - self.assertEqual(dr[1:].name, dr.name) + assert dr[1:].name == dr.name def test_join_self(self): index = date_range('1/1/2000', periods=10) @@ -769,8 +769,8 @@ def test_slice_bounds_empty(self): right = empty_idx._maybe_cast_slice_bound('2015-01-02', 'right', 'loc') exp = Timestamp('2015-01-02 23:59:59.999999999') - self.assertEqual(right, exp) + assert right == exp left = empty_idx._maybe_cast_slice_bound('2015-01-02', 'left', 'loc') exp = Timestamp('2015-01-02 00:00:00') - self.assertEqual(left, exp) + assert left == exp diff --git a/pandas/tests/indexes/datetimes/test_indexing.py b/pandas/tests/indexes/datetimes/test_indexing.py index 568e045d9f5e7..92134a296b08f 100644 --- a/pandas/tests/indexes/datetimes/test_indexing.py +++ b/pandas/tests/indexes/datetimes/test_indexing.py @@ -164,8 +164,8 @@ def test_delete(self): for n, expected in compat.iteritems(cases): result = idx.delete(n) tm.assert_index_equal(result, expected) - self.assertEqual(result.name, expected.name) - self.assertEqual(result.freq, expected.freq) + assert result.name == expected.name + assert result.freq == expected.freq with pytest.raises((IndexError, ValueError)): # either depeidnig on numpy version @@ -179,17 +179,17 @@ def test_delete(self): freq='H', name='idx', tz=tz) result = idx.delete(0) tm.assert_index_equal(result, expected) - self.assertEqual(result.name, expected.name) - self.assertEqual(result.freqstr, 'H') - self.assertEqual(result.tz, expected.tz) + assert result.name == expected.name + assert result.freqstr == 'H' + assert result.tz == expected.tz expected = date_range(start='2000-01-01 09:00', periods=9, freq='H', name='idx', tz=tz) result = idx.delete(-1) tm.assert_index_equal(result, expected) - self.assertEqual(result.name, expected.name) - self.assertEqual(result.freqstr, 'H') - self.assertEqual(result.tz, expected.tz) + assert result.name == expected.name + assert result.freqstr == 'H' + assert result.tz == expected.tz def test_delete_slice(self): idx = date_range(start='2000-01-01', periods=10, freq='D', name='idx') @@ -211,13 +211,13 @@ def test_delete_slice(self): for n, expected in compat.iteritems(cases): result = idx.delete(n) tm.assert_index_equal(result, expected) - self.assertEqual(result.name, expected.name) - self.assertEqual(result.freq, expected.freq) + assert result.name == expected.name + assert result.freq == expected.freq result = idx.delete(slice(n[0], n[-1] + 1)) tm.assert_index_equal(result, expected) - self.assertEqual(result.name, expected.name) - self.assertEqual(result.freq, expected.freq) + assert result.name == expected.name + assert result.freq == expected.freq for tz in [None, 'Asia/Tokyo', 'US/Pacific']: ts = pd.Series(1, index=pd.date_range( @@ -227,9 +227,9 @@ def test_delete_slice(self): expected = pd.date_range('2000-01-01 14:00', periods=5, freq='H', name='idx', tz=tz) tm.assert_index_equal(result, expected) - self.assertEqual(result.name, expected.name) - self.assertEqual(result.freq, expected.freq) - self.assertEqual(result.tz, expected.tz) + assert result.name == expected.name + assert result.freq == expected.freq + assert result.tz == expected.tz # reset freq to None result = ts.drop(ts.index[[1, 3, 5, 7, 9]]).index @@ -238,6 +238,6 @@ def test_delete_slice(self): '2000-01-01 15:00', '2000-01-01 17:00'], freq=None, name='idx', tz=tz) tm.assert_index_equal(result, expected) - self.assertEqual(result.name, expected.name) - self.assertEqual(result.freq, expected.freq) - self.assertEqual(result.tz, expected.tz) + assert result.name == expected.name + assert result.freq == expected.freq + assert result.tz == expected.tz diff --git a/pandas/tests/indexes/datetimes/test_misc.py b/pandas/tests/indexes/datetimes/test_misc.py index 55165aa39a1a4..ae5d29ca426b4 100644 --- a/pandas/tests/indexes/datetimes/test_misc.py +++ b/pandas/tests/indexes/datetimes/test_misc.py @@ -181,81 +181,80 @@ def test_datetimeindex_accessors(self): periods=365, tz='US/Eastern') for dti in [dti_naive, dti_tz]: - self.assertEqual(dti.year[0], 1998) - self.assertEqual(dti.month[0], 1) - self.assertEqual(dti.day[0], 1) - self.assertEqual(dti.hour[0], 0) - self.assertEqual(dti.minute[0], 0) - self.assertEqual(dti.second[0], 0) - self.assertEqual(dti.microsecond[0], 0) - self.assertEqual(dti.dayofweek[0], 3) - - self.assertEqual(dti.dayofyear[0], 1) - self.assertEqual(dti.dayofyear[120], 121) - - self.assertEqual(dti.weekofyear[0], 1) - self.assertEqual(dti.weekofyear[120], 18) - - self.assertEqual(dti.quarter[0], 1) - self.assertEqual(dti.quarter[120], 2) - - self.assertEqual(dti.days_in_month[0], 31) - self.assertEqual(dti.days_in_month[90], 30) - - self.assertEqual(dti.is_month_start[0], True) - self.assertEqual(dti.is_month_start[1], False) - self.assertEqual(dti.is_month_start[31], True) - self.assertEqual(dti.is_quarter_start[0], True) - self.assertEqual(dti.is_quarter_start[90], True) - self.assertEqual(dti.is_year_start[0], True) - self.assertEqual(dti.is_year_start[364], False) - self.assertEqual(dti.is_month_end[0], False) - self.assertEqual(dti.is_month_end[30], True) - self.assertEqual(dti.is_month_end[31], False) - self.assertEqual(dti.is_month_end[364], True) - self.assertEqual(dti.is_quarter_end[0], False) - self.assertEqual(dti.is_quarter_end[30], False) - self.assertEqual(dti.is_quarter_end[89], True) - self.assertEqual(dti.is_quarter_end[364], True) - self.assertEqual(dti.is_year_end[0], False) - self.assertEqual(dti.is_year_end[364], True) + assert dti.year[0] == 1998 + assert dti.month[0] == 1 + assert dti.day[0] == 1 + assert dti.hour[0] == 0 + assert dti.minute[0] == 0 + assert dti.second[0] == 0 + assert dti.microsecond[0] == 0 + assert dti.dayofweek[0] == 3 + + assert dti.dayofyear[0] == 1 + assert dti.dayofyear[120] == 121 + + assert dti.weekofyear[0] == 1 + assert dti.weekofyear[120] == 18 + + assert dti.quarter[0] == 1 + assert dti.quarter[120] == 2 + + assert dti.days_in_month[0] == 31 + assert dti.days_in_month[90] == 30 + + assert dti.is_month_start[0] + assert not dti.is_month_start[1] + assert dti.is_month_start[31] + assert dti.is_quarter_start[0] + assert dti.is_quarter_start[90] + assert dti.is_year_start[0] + assert not dti.is_year_start[364] + assert not dti.is_month_end[0] + assert dti.is_month_end[30] + assert not dti.is_month_end[31] + assert dti.is_month_end[364] + assert not dti.is_quarter_end[0] + assert not dti.is_quarter_end[30] + assert dti.is_quarter_end[89] + assert dti.is_quarter_end[364] + assert not dti.is_year_end[0] + assert dti.is_year_end[364] # GH 11128 - self.assertEqual(dti.weekday_name[4], u'Monday') - self.assertEqual(dti.weekday_name[5], u'Tuesday') - self.assertEqual(dti.weekday_name[6], u'Wednesday') - self.assertEqual(dti.weekday_name[7], u'Thursday') - self.assertEqual(dti.weekday_name[8], u'Friday') - self.assertEqual(dti.weekday_name[9], u'Saturday') - self.assertEqual(dti.weekday_name[10], u'Sunday') - - self.assertEqual(Timestamp('2016-04-04').weekday_name, u'Monday') - self.assertEqual(Timestamp('2016-04-05').weekday_name, u'Tuesday') - self.assertEqual(Timestamp('2016-04-06').weekday_name, - u'Wednesday') - self.assertEqual(Timestamp('2016-04-07').weekday_name, u'Thursday') - self.assertEqual(Timestamp('2016-04-08').weekday_name, u'Friday') - self.assertEqual(Timestamp('2016-04-09').weekday_name, u'Saturday') - self.assertEqual(Timestamp('2016-04-10').weekday_name, u'Sunday') - - self.assertEqual(len(dti.year), 365) - self.assertEqual(len(dti.month), 365) - self.assertEqual(len(dti.day), 365) - self.assertEqual(len(dti.hour), 365) - self.assertEqual(len(dti.minute), 365) - self.assertEqual(len(dti.second), 365) - self.assertEqual(len(dti.microsecond), 365) - self.assertEqual(len(dti.dayofweek), 365) - self.assertEqual(len(dti.dayofyear), 365) - self.assertEqual(len(dti.weekofyear), 365) - self.assertEqual(len(dti.quarter), 365) - self.assertEqual(len(dti.is_month_start), 365) - self.assertEqual(len(dti.is_month_end), 365) - self.assertEqual(len(dti.is_quarter_start), 365) - self.assertEqual(len(dti.is_quarter_end), 365) - self.assertEqual(len(dti.is_year_start), 365) - self.assertEqual(len(dti.is_year_end), 365) - self.assertEqual(len(dti.weekday_name), 365) + assert dti.weekday_name[4] == u'Monday' + assert dti.weekday_name[5] == u'Tuesday' + assert dti.weekday_name[6] == u'Wednesday' + assert dti.weekday_name[7] == u'Thursday' + assert dti.weekday_name[8] == u'Friday' + assert dti.weekday_name[9] == u'Saturday' + assert dti.weekday_name[10] == u'Sunday' + + assert Timestamp('2016-04-04').weekday_name == u'Monday' + assert Timestamp('2016-04-05').weekday_name == u'Tuesday' + assert Timestamp('2016-04-06').weekday_name == u'Wednesday' + assert Timestamp('2016-04-07').weekday_name == u'Thursday' + assert Timestamp('2016-04-08').weekday_name == u'Friday' + assert Timestamp('2016-04-09').weekday_name == u'Saturday' + assert Timestamp('2016-04-10').weekday_name == u'Sunday' + + assert len(dti.year) == 365 + assert len(dti.month) == 365 + assert len(dti.day) == 365 + assert len(dti.hour) == 365 + assert len(dti.minute) == 365 + assert len(dti.second) == 365 + assert len(dti.microsecond) == 365 + assert len(dti.dayofweek) == 365 + assert len(dti.dayofyear) == 365 + assert len(dti.weekofyear) == 365 + assert len(dti.quarter) == 365 + assert len(dti.is_month_start) == 365 + assert len(dti.is_month_end) == 365 + assert len(dti.is_quarter_start) == 365 + assert len(dti.is_quarter_end) == 365 + assert len(dti.is_year_start) == 365 + assert len(dti.is_year_end) == 365 + assert len(dti.weekday_name) == 365 dti.name = 'name' @@ -283,10 +282,10 @@ def test_datetimeindex_accessors(self): dti = DatetimeIndex(freq='BQ-FEB', start=datetime(1998, 1, 1), periods=4) - self.assertEqual(sum(dti.is_quarter_start), 0) - self.assertEqual(sum(dti.is_quarter_end), 4) - self.assertEqual(sum(dti.is_year_start), 0) - self.assertEqual(sum(dti.is_year_end), 1) + assert sum(dti.is_quarter_start) == 0 + assert sum(dti.is_quarter_end) == 4 + assert sum(dti.is_year_start) == 0 + assert sum(dti.is_year_end) == 1 # Ensure is_start/end accessors throw ValueError for CustomBusinessDay, # CBD requires np >= 1.7 @@ -296,7 +295,7 @@ def test_datetimeindex_accessors(self): dti = DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-03']) - self.assertEqual(dti.is_month_start[0], 1) + assert dti.is_month_start[0] == 1 tests = [ (Timestamp('2013-06-01', freq='M').is_month_start, 1), @@ -333,7 +332,7 @@ def test_datetimeindex_accessors(self): (Timestamp('2013-02-01').days_in_month, 28)] for ts, value in tests: - self.assertEqual(ts, value) + assert ts == value def test_nanosecond_field(self): dti = DatetimeIndex(np.arange(10)) diff --git a/pandas/tests/indexes/datetimes/test_ops.py b/pandas/tests/indexes/datetimes/test_ops.py index fa1b2c0d7c68d..e25e3d448190e 100644 --- a/pandas/tests/indexes/datetimes/test_ops.py +++ b/pandas/tests/indexes/datetimes/test_ops.py @@ -45,9 +45,9 @@ def test_ops_properties_basic(self): # attribute access should still work! s = Series(dict(year=2000, month=1, day=10)) - self.assertEqual(s.year, 2000) - self.assertEqual(s.month, 1) - self.assertEqual(s.day, 10) + assert s.year == 2000 + assert s.month == 1 + assert s.day == 10 pytest.raises(AttributeError, lambda: s.weekday) def test_asobject_tolist(self): @@ -61,10 +61,10 @@ def test_asobject_tolist(self): result = idx.asobject assert isinstance(result, Index) - self.assertEqual(result.dtype, object) + assert result.dtype == object tm.assert_index_equal(result, expected) - self.assertEqual(result.name, expected.name) - self.assertEqual(idx.tolist(), expected_list) + assert result.name == expected.name + assert idx.tolist() == expected_list idx = pd.date_range(start='2013-01-01', periods=4, freq='M', name='idx', tz='Asia/Tokyo') @@ -75,10 +75,10 @@ def test_asobject_tolist(self): expected = pd.Index(expected_list, dtype=object, name='idx') result = idx.asobject assert isinstance(result, Index) - self.assertEqual(result.dtype, object) + assert result.dtype == object tm.assert_index_equal(result, expected) - self.assertEqual(result.name, expected.name) - self.assertEqual(idx.tolist(), expected_list) + assert result.name == expected.name + assert idx.tolist() == expected_list idx = DatetimeIndex([datetime(2013, 1, 1), datetime(2013, 1, 2), pd.NaT, datetime(2013, 1, 4)], name='idx') @@ -88,10 +88,10 @@ def test_asobject_tolist(self): expected = pd.Index(expected_list, dtype=object, name='idx') result = idx.asobject assert isinstance(result, Index) - self.assertEqual(result.dtype, object) + assert result.dtype == object tm.assert_index_equal(result, expected) - self.assertEqual(result.name, expected.name) - self.assertEqual(idx.tolist(), expected_list) + assert result.name == expected.name + assert idx.tolist() == expected_list def test_minmax(self): for tz in self.tz: @@ -106,10 +106,10 @@ def test_minmax(self): assert not idx2.is_monotonic for idx in [idx1, idx2]: - self.assertEqual(idx.min(), Timestamp('2011-01-01', tz=tz)) - self.assertEqual(idx.max(), Timestamp('2011-01-03', tz=tz)) - self.assertEqual(idx.argmin(), 0) - self.assertEqual(idx.argmax(), 2) + assert idx.min() == Timestamp('2011-01-01', tz=tz) + assert idx.max() == Timestamp('2011-01-03', tz=tz) + assert idx.argmin() == 0 + assert idx.argmax() == 2 for op in ['min', 'max']: # Return NaT @@ -125,17 +125,15 @@ def test_minmax(self): def test_numpy_minmax(self): dr = pd.date_range(start='2016-01-15', end='2016-01-20') - self.assertEqual(np.min(dr), - Timestamp('2016-01-15 00:00:00', freq='D')) - self.assertEqual(np.max(dr), - Timestamp('2016-01-20 00:00:00', freq='D')) + assert np.min(dr) == Timestamp('2016-01-15 00:00:00', freq='D') + assert np.max(dr) == Timestamp('2016-01-20 00:00:00', freq='D') errmsg = "the 'out' parameter is not supported" tm.assert_raises_regex(ValueError, errmsg, np.min, dr, out=0) tm.assert_raises_regex(ValueError, errmsg, np.max, dr, out=0) - self.assertEqual(np.argmin(dr), 0) - self.assertEqual(np.argmax(dr), 5) + assert np.argmin(dr) == 0 + assert np.argmax(dr) == 5 if not _np_version_under1p10: errmsg = "the 'out' parameter is not supported" @@ -160,7 +158,7 @@ def test_round(self): expected_elt = expected_rng[1] tm.assert_index_equal(rng.round(freq='H'), expected_rng) - self.assertEqual(elt.round(freq='H'), expected_elt) + assert elt.round(freq='H') == expected_elt msg = pd.tseries.frequencies._INVALID_FREQ_ERROR with tm.assert_raises_regex(ValueError, msg): @@ -200,7 +198,7 @@ def test_repeat_range(self): result = rng.repeat(5) assert result.freq is None - self.assertEqual(len(result), 5 * len(rng)) + assert len(result) == 5 * len(rng) for tz in self.tz: index = pd.date_range('2001-01-01', periods=2, freq='D', tz=tz) @@ -288,7 +286,7 @@ def test_representation(self): for indx, expected in zip(idx, exp): for func in ['__repr__', '__unicode__', '__str__']: result = getattr(indx, func)() - self.assertEqual(result, expected) + assert result == expected def test_representation_to_series(self): idx1 = DatetimeIndex([], freq='D') @@ -336,7 +334,7 @@ def test_representation_to_series(self): [exp1, exp2, exp3, exp4, exp5, exp6, exp7]): result = repr(Series(idx)) - self.assertEqual(result, expected) + assert result == expected def test_summary(self): # GH9116 @@ -372,7 +370,7 @@ def test_summary(self): for idx, expected in zip([idx1, idx2, idx3, idx4, idx5, idx6], [exp1, exp2, exp3, exp4, exp5, exp6]): result = idx.summary() - self.assertEqual(result, expected) + assert result == expected def test_resolution(self): for freq, expected in zip(['A', 'Q', 'M', 'D', 'H', 'T', @@ -383,7 +381,7 @@ def test_resolution(self): for tz in self.tz: idx = pd.date_range(start='2013-04-01', periods=30, freq=freq, tz=tz) - self.assertEqual(idx.resolution, expected) + assert idx.resolution == expected def test_union(self): for tz in self.tz: @@ -724,39 +722,39 @@ def test_getitem(self): for idx in [idx1, idx2]: result = idx[0] - self.assertEqual(result, Timestamp('2011-01-01', tz=idx.tz)) + assert result == Timestamp('2011-01-01', tz=idx.tz) result = idx[0:5] expected = pd.date_range('2011-01-01', '2011-01-05', freq='D', tz=idx.tz, name='idx') tm.assert_index_equal(result, expected) - self.assertEqual(result.freq, expected.freq) + assert result.freq == expected.freq result = idx[0:10:2] expected = pd.date_range('2011-01-01', '2011-01-09', freq='2D', tz=idx.tz, name='idx') tm.assert_index_equal(result, expected) - self.assertEqual(result.freq, expected.freq) + assert result.freq == expected.freq result = idx[-20:-5:3] expected = pd.date_range('2011-01-12', '2011-01-24', freq='3D', tz=idx.tz, name='idx') tm.assert_index_equal(result, expected) - self.assertEqual(result.freq, expected.freq) + assert result.freq == expected.freq result = idx[4::-1] expected = DatetimeIndex(['2011-01-05', '2011-01-04', '2011-01-03', '2011-01-02', '2011-01-01'], freq='-1D', tz=idx.tz, name='idx') tm.assert_index_equal(result, expected) - self.assertEqual(result.freq, expected.freq) + assert result.freq == expected.freq def test_drop_duplicates_metadata(self): # GH 10115 idx = pd.date_range('2011-01-01', '2011-01-31', freq='D', name='idx') result = idx.drop_duplicates() tm.assert_index_equal(idx, result) - self.assertEqual(idx.freq, result.freq) + assert idx.freq == result.freq idx_dup = idx.append(idx) assert idx_dup.freq is None # freq is reset @@ -793,25 +791,25 @@ def test_take(self): for idx in [idx1, idx2]: result = idx.take([0]) - self.assertEqual(result, Timestamp('2011-01-01', tz=idx.tz)) + assert result == Timestamp('2011-01-01', tz=idx.tz) result = idx.take([0, 1, 2]) expected = pd.date_range('2011-01-01', '2011-01-03', freq='D', tz=idx.tz, name='idx') tm.assert_index_equal(result, expected) - self.assertEqual(result.freq, expected.freq) + assert result.freq == expected.freq result = idx.take([0, 2, 4]) expected = pd.date_range('2011-01-01', '2011-01-05', freq='2D', tz=idx.tz, name='idx') tm.assert_index_equal(result, expected) - self.assertEqual(result.freq, expected.freq) + assert result.freq == expected.freq result = idx.take([7, 4, 1]) expected = pd.date_range('2011-01-08', '2011-01-02', freq='-3D', tz=idx.tz, name='idx') tm.assert_index_equal(result, expected) - self.assertEqual(result.freq, expected.freq) + assert result.freq == expected.freq result = idx.take([3, 2, 5]) expected = DatetimeIndex(['2011-01-04', '2011-01-03', @@ -851,7 +849,7 @@ def test_infer_freq(self): idx = pd.date_range('2011-01-01 09:00:00', freq=freq, periods=10) result = pd.DatetimeIndex(idx.asi8, freq='infer') tm.assert_index_equal(idx, result) - self.assertEqual(result.freq, freq) + assert result.freq == freq def test_nat_new(self): idx = pd.date_range('2011-01-01', freq='D', periods=5, name='x') @@ -1139,18 +1137,18 @@ def test_getitem(self): exp = DatetimeIndex(self.rng.view(np.ndarray)[:5]) tm.assert_index_equal(smaller, exp) - self.assertEqual(smaller.offset, self.rng.offset) + assert smaller.offset == self.rng.offset sliced = self.rng[::5] - self.assertEqual(sliced.offset, BDay() * 5) + assert sliced.offset == BDay() * 5 fancy_indexed = self.rng[[4, 3, 2, 1, 0]] - self.assertEqual(len(fancy_indexed), 5) + assert len(fancy_indexed) == 5 assert isinstance(fancy_indexed, DatetimeIndex) assert fancy_indexed.freq is None # 32-bit vs. 64-bit platforms - self.assertEqual(self.rng[4], self.rng[np.int_(4)]) + assert self.rng[4] == self.rng[np.int_(4)] def test_getitem_matplotlib_hackaround(self): values = self.rng[:, None] @@ -1159,20 +1157,20 @@ def test_getitem_matplotlib_hackaround(self): def test_shift(self): shifted = self.rng.shift(5) - self.assertEqual(shifted[0], self.rng[5]) - self.assertEqual(shifted.offset, self.rng.offset) + assert shifted[0] == self.rng[5] + assert shifted.offset == self.rng.offset shifted = self.rng.shift(-5) - self.assertEqual(shifted[5], self.rng[0]) - self.assertEqual(shifted.offset, self.rng.offset) + assert shifted[5] == self.rng[0] + assert shifted.offset == self.rng.offset shifted = self.rng.shift(0) - self.assertEqual(shifted[0], self.rng[0]) - self.assertEqual(shifted.offset, self.rng.offset) + assert shifted[0] == self.rng[0] + assert shifted.offset == self.rng.offset rng = date_range(START, END, freq=BMonthEnd()) shifted = rng.shift(1, freq=BDay()) - self.assertEqual(shifted[0], rng[0] + BDay()) + assert shifted[0] == rng[0] + BDay() def test_summary(self): self.rng.summary() @@ -1234,18 +1232,18 @@ def test_getitem(self): smaller = self.rng[:5] exp = DatetimeIndex(self.rng.view(np.ndarray)[:5]) tm.assert_index_equal(smaller, exp) - self.assertEqual(smaller.offset, self.rng.offset) + assert smaller.offset == self.rng.offset sliced = self.rng[::5] - self.assertEqual(sliced.offset, CDay() * 5) + assert sliced.offset == CDay() * 5 fancy_indexed = self.rng[[4, 3, 2, 1, 0]] - self.assertEqual(len(fancy_indexed), 5) + assert len(fancy_indexed) == 5 assert isinstance(fancy_indexed, DatetimeIndex) assert fancy_indexed.freq is None # 32-bit vs. 64-bit platforms - self.assertEqual(self.rng[4], self.rng[np.int_(4)]) + assert self.rng[4] == self.rng[np.int_(4)] def test_getitem_matplotlib_hackaround(self): values = self.rng[:, None] @@ -1255,22 +1253,22 @@ def test_getitem_matplotlib_hackaround(self): def test_shift(self): shifted = self.rng.shift(5) - self.assertEqual(shifted[0], self.rng[5]) - self.assertEqual(shifted.offset, self.rng.offset) + assert shifted[0] == self.rng[5] + assert shifted.offset == self.rng.offset shifted = self.rng.shift(-5) - self.assertEqual(shifted[5], self.rng[0]) - self.assertEqual(shifted.offset, self.rng.offset) + assert shifted[5] == self.rng[0] + assert shifted.offset == self.rng.offset shifted = self.rng.shift(0) - self.assertEqual(shifted[0], self.rng[0]) - self.assertEqual(shifted.offset, self.rng.offset) + assert shifted[0] == self.rng[0] + assert shifted.offset == self.rng.offset # PerformanceWarning with warnings.catch_warnings(record=True): rng = date_range(START, END, freq=BMonthEnd()) shifted = rng.shift(1, freq=CDay()) - self.assertEqual(shifted[0], rng[0] + CDay()) + assert shifted[0] == rng[0] + CDay() def test_pickle_unpickle(self): unpickled = tm.round_trip_pickle(self.rng) diff --git a/pandas/tests/indexes/datetimes/test_partial_slicing.py b/pandas/tests/indexes/datetimes/test_partial_slicing.py index c3eda8b378c96..b3661ae0e7a97 100644 --- a/pandas/tests/indexes/datetimes/test_partial_slicing.py +++ b/pandas/tests/indexes/datetimes/test_partial_slicing.py @@ -30,24 +30,24 @@ def test_slice_year(self): result = rng.get_loc('2009') expected = slice(3288, 3653) - self.assertEqual(result, expected) + assert result == expected def test_slice_quarter(self): dti = DatetimeIndex(freq='D', start=datetime(2000, 6, 1), periods=500) s = Series(np.arange(len(dti)), index=dti) - self.assertEqual(len(s['2001Q1']), 90) + assert len(s['2001Q1']) == 90 df = DataFrame(np.random.rand(len(dti), 5), index=dti) - self.assertEqual(len(df.loc['1Q01']), 90) + assert len(df.loc['1Q01']) == 90 def test_slice_month(self): dti = DatetimeIndex(freq='D', start=datetime(2005, 1, 1), periods=500) s = Series(np.arange(len(dti)), index=dti) - self.assertEqual(len(s['2005-11']), 30) + assert len(s['2005-11']) == 30 df = DataFrame(np.random.rand(len(dti), 5), index=dti) - self.assertEqual(len(df.loc['2005-11']), 30) + assert len(df.loc['2005-11']) == 30 tm.assert_series_equal(s['2005-11'], s['11-2005']) @@ -68,7 +68,7 @@ def test_partial_slice(self): tm.assert_series_equal(result, expected) result = s['2005-1-1'] - self.assertEqual(result, s.iloc[0]) + assert result == s.iloc[0] pytest.raises(Exception, s.__getitem__, '2004-12-31') @@ -92,7 +92,7 @@ def test_partial_slice_hourly(self): result = s['2005-1-1 20'] tm.assert_series_equal(result, s.iloc[:60]) - self.assertEqual(s['2005-1-1 20:00'], s.iloc[0]) + assert s['2005-1-1 20:00'] == s.iloc[0] pytest.raises(Exception, s.__getitem__, '2004-12-31 00:15') def test_partial_slice_minutely(self): @@ -106,7 +106,7 @@ def test_partial_slice_minutely(self): result = s['2005-1-1'] tm.assert_series_equal(result, s.iloc[:60]) - self.assertEqual(s[Timestamp('2005-1-1 23:59:00')], s.iloc[0]) + assert s[Timestamp('2005-1-1 23:59:00')] == s.iloc[0] pytest.raises(Exception, s.__getitem__, '2004-12-31 00:00:00') def test_partial_slice_second_precision(self): @@ -121,7 +121,7 @@ def test_partial_slice_second_precision(self): tm.assert_series_equal(s['2005-1-1 00:01'], s.iloc[10:]) tm.assert_series_equal(s['2005-1-1 00:01:00'], s.iloc[10:]) - self.assertEqual(s[Timestamp('2005-1-1 00:00:59.999990')], s.iloc[0]) + assert s[Timestamp('2005-1-1 00:00:59.999990')] == s.iloc[0] tm.assert_raises_regex(KeyError, '2005-1-1 00:00:00', lambda: s['2005-1-1 00:00:00']) @@ -144,7 +144,7 @@ def test_partial_slicing_dataframe(self): middate, middate + unit]) values = [1, 2, 3] df = DataFrame({'a': values}, index, dtype=np.int64) - self.assertEqual(df.index.resolution, resolution) + assert df.index.resolution == resolution # Timestamp with the same resolution as index # Should be exact match for Series (return scalar) @@ -154,7 +154,7 @@ def test_partial_slicing_dataframe(self): # make ts_string as precise as index result = df['a'][ts_string] assert isinstance(result, np.int64) - self.assertEqual(result, expected) + assert result == expected pytest.raises(KeyError, df.__getitem__, ts_string) # Timestamp with resolution less precise than index @@ -181,7 +181,7 @@ def test_partial_slicing_dataframe(self): ts_string = index[1].strftime(fmt) result = df['a'][ts_string] assert isinstance(result, np.int64) - self.assertEqual(result, 2) + assert result == 2 pytest.raises(KeyError, df.__getitem__, ts_string) # Not compatible with existing key diff --git a/pandas/tests/indexes/datetimes/test_setops.py b/pandas/tests/indexes/datetimes/test_setops.py index 6612ab844b849..b25fdaf6be3b0 100644 --- a/pandas/tests/indexes/datetimes/test_setops.py +++ b/pandas/tests/indexes/datetimes/test_setops.py @@ -29,7 +29,7 @@ def test_union_coverage(self): result = ordered[:0].union(ordered) tm.assert_index_equal(result, ordered) - self.assertEqual(result.freq, ordered.freq) + assert result.freq == ordered.freq def test_union_bug_1730(self): rng_a = date_range('1/1/2012', periods=4, freq='3H') @@ -106,9 +106,9 @@ def test_intersection(self): (rng4, expected4)]: result = base.intersection(rng) tm.assert_index_equal(result, expected) - self.assertEqual(result.name, expected.name) - self.assertEqual(result.freq, expected.freq) - self.assertEqual(result.tz, expected.tz) + assert result.name == expected.name + assert result.freq == expected.freq + assert result.tz == expected.tz # non-monotonic base = DatetimeIndex(['2011-01-05', '2011-01-04', @@ -136,17 +136,17 @@ def test_intersection(self): (rng4, expected4)]: result = base.intersection(rng) tm.assert_index_equal(result, expected) - self.assertEqual(result.name, expected.name) + assert result.name == expected.name assert result.freq is None - self.assertEqual(result.tz, expected.tz) + assert result.tz == expected.tz # empty same freq GH2129 rng = date_range('6/1/2000', '6/15/2000', freq='T') result = rng[0:0].intersection(rng) - self.assertEqual(len(result), 0) + assert len(result) == 0 result = rng.intersection(rng[0:0]) - self.assertEqual(len(result), 0) + assert len(result) == 0 def test_intersection_bug_1708(self): from pandas import DateOffset @@ -154,7 +154,7 @@ def test_intersection_bug_1708(self): index_2 = index_1 + DateOffset(hours=1) result = index_1 & index_2 - self.assertEqual(len(result), 0) + assert len(result) == 0 def test_difference_freq(self): # GH14323: difference of DatetimeIndex should not preserve frequency @@ -177,7 +177,7 @@ def test_datetimeindex_diff(self): periods=100) dti2 = DatetimeIndex(freq='Q-JAN', start=datetime(1997, 12, 31), periods=98) - self.assertEqual(len(dti1.difference(dti2)), 2) + assert len(dti1.difference(dti2)) == 2 def test_datetimeindex_union_join_empty(self): dti = DatetimeIndex(start='1/1/2001', end='2/1/2001', freq='D') @@ -288,7 +288,7 @@ def test_intersection(self): expected = rng[10:25] tm.assert_index_equal(the_int, expected) assert isinstance(the_int, DatetimeIndex) - self.assertEqual(the_int.offset, rng.offset) + assert the_int.offset == rng.offset the_int = rng1.intersection(rng2.view(DatetimeIndex)) tm.assert_index_equal(the_int, expected) diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index 4c32f41db207c..3c7f2e424f779 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -45,7 +45,7 @@ def test_to_datetime_format(self): if isinstance(expected, Series): assert_series_equal(result, Series(expected)) elif isinstance(expected, Timestamp): - self.assertEqual(result, expected) + assert result == expected else: tm.assert_index_equal(result, expected) @@ -112,7 +112,7 @@ def test_to_datetime_format_microsecond(self): format = '%d-%b-%Y %H:%M:%S.%f' result = to_datetime(val, format=format) exp = datetime.strptime(val, format) - self.assertEqual(result, exp) + assert result == exp def test_to_datetime_format_time(self): data = [ @@ -130,7 +130,7 @@ def test_to_datetime_format_time(self): # Timestamp('2010-01-10 09:12:56')] ] for s, format, dt in data: - self.assertEqual(to_datetime(s, format=format), dt) + assert to_datetime(s, format=format) == dt def test_to_datetime_with_non_exact(self): # GH 10834 @@ -159,7 +159,7 @@ def test_parse_nanoseconds_with_formula(self): "2012-01-01 09:00:00.001000000", ]: expected = pd.to_datetime(v) result = pd.to_datetime(v, format="%Y-%m-%d %H:%M:%S.%f") - self.assertEqual(result, expected) + assert result == expected def test_to_datetime_format_weeks(self): data = [ @@ -167,7 +167,7 @@ def test_to_datetime_format_weeks(self): ['2013020', '%Y%U%w', Timestamp('2013-01-13')] ] for s, format, dt in data: - self.assertEqual(to_datetime(s, format=format), dt) + assert to_datetime(s, format=format) == dt class TestToDatetime(tm.TestCase): @@ -312,11 +312,11 @@ def test_datetime_bool(self): with pytest.raises(TypeError): to_datetime(False) assert to_datetime(False, errors="coerce") is NaT - self.assertEqual(to_datetime(False, errors="ignore"), False) + assert to_datetime(False, errors="ignore") is False with pytest.raises(TypeError): to_datetime(True) assert to_datetime(True, errors="coerce") is NaT - self.assertEqual(to_datetime(True, errors="ignore"), True) + assert to_datetime(True, errors="ignore") is True with pytest.raises(TypeError): to_datetime([False, datetime.today()]) with pytest.raises(TypeError): @@ -390,15 +390,15 @@ def test_unit_consistency(self): # consistency of conversions expected = Timestamp('1970-05-09 14:25:11') result = pd.to_datetime(11111111, unit='s', errors='raise') - self.assertEqual(result, expected) + assert result == expected assert isinstance(result, Timestamp) result = pd.to_datetime(11111111, unit='s', errors='coerce') - self.assertEqual(result, expected) + assert result == expected assert isinstance(result, Timestamp) result = pd.to_datetime(11111111, unit='s', errors='ignore') - self.assertEqual(result, expected) + assert result == expected assert isinstance(result, Timestamp) def test_unit_with_numeric(self): @@ -617,11 +617,11 @@ def test_index_to_datetime(self): def test_to_datetime_iso8601(self): result = to_datetime(["2012-01-01 00:00:00"]) exp = Timestamp("2012-01-01 00:00:00") - self.assertEqual(result[0], exp) + assert result[0] == exp result = to_datetime(['20121001']) # bad iso 8601 exp = Timestamp('2012-10-01') - self.assertEqual(result[0], exp) + assert result[0] == exp def test_to_datetime_default(self): rs = to_datetime('2001') @@ -639,7 +639,7 @@ def test_to_datetime_on_datetime64_series(self): s = Series(date_range('1/1/2000', periods=10)) result = to_datetime(s) - self.assertEqual(result[0], s[0]) + assert result[0] == s[0] def test_to_datetime_with_space_in_series(self): # GH 6428 @@ -689,12 +689,12 @@ def test_to_datetime_types(self): # ints result = Timestamp(0) expected = to_datetime(0) - self.assertEqual(result, expected) + assert result == expected # GH 3888 (strings) expected = to_datetime(['2012'])[0] result = to_datetime('2012') - self.assertEqual(result, expected) + assert result == expected # array = ['2012','20120101','20120101 12:01:01'] array = ['20120101', '20120101 12:01:01'] @@ -705,7 +705,7 @@ def test_to_datetime_types(self): # currently fails ### # result = Timestamp('2012') # expected = to_datetime('2012') - # self.assertEqual(result, expected) + # assert result == expected def test_to_datetime_unprocessable_input(self): # GH 4928 @@ -721,10 +721,10 @@ def test_to_datetime_other_datetime64_units(self): as_obj = scalar.astype('O') index = DatetimeIndex([scalar]) - self.assertEqual(index[0], scalar.astype('O')) + assert index[0] == scalar.astype('O') value = Timestamp(scalar) - self.assertEqual(value, as_obj) + assert value == as_obj def test_to_datetime_list_of_integers(self): rng = date_range('1/1/2000', periods=20) @@ -739,8 +739,8 @@ def test_to_datetime_list_of_integers(self): def test_to_datetime_freq(self): xp = bdate_range('2000-1-1', periods=10, tz='UTC') rs = xp.to_datetime() - self.assertEqual(xp.freq, rs.freq) - self.assertEqual(xp.tzinfo, rs.tzinfo) + assert xp.freq == rs.freq + assert xp.tzinfo == rs.tzinfo def test_string_na_nat_conversion(self): # GH #999, #858 @@ -794,10 +794,10 @@ def test_string_na_nat_conversion(self): expected[i] = to_datetime(x) assert_series_equal(result, expected, check_names=False) - self.assertEqual(result.name, 'foo') + assert result.name == 'foo' assert_series_equal(dresult, expected, check_names=False) - self.assertEqual(dresult.name, 'foo') + assert dresult.name == 'foo' def test_dti_constructor_numpy_timeunits(self): # GH 9114 @@ -842,21 +842,14 @@ def test_guess_datetime_format_with_parseable_formats(self): '%Y-%m-%d %H:%M:%S.%f'), ) for dt_string, dt_format in dt_string_to_format: - self.assertEqual( - tools._guess_datetime_format(dt_string), - dt_format - ) + assert tools._guess_datetime_format(dt_string) == dt_format def test_guess_datetime_format_with_dayfirst(self): ambiguous_string = '01/01/2011' - self.assertEqual( - tools._guess_datetime_format(ambiguous_string, dayfirst=True), - '%d/%m/%Y' - ) - self.assertEqual( - tools._guess_datetime_format(ambiguous_string, dayfirst=False), - '%m/%d/%Y' - ) + assert tools._guess_datetime_format( + ambiguous_string, dayfirst=True) == '%d/%m/%Y' + assert tools._guess_datetime_format( + ambiguous_string, dayfirst=False) == '%m/%d/%Y' def test_guess_datetime_format_with_locale_specific_formats(self): # The month names will vary depending on the locale, in which @@ -868,10 +861,7 @@ def test_guess_datetime_format_with_locale_specific_formats(self): ('30/Dec/2011 00:00:00', '%d/%b/%Y %H:%M:%S'), ) for dt_string, dt_format in dt_string_to_format: - self.assertEqual( - tools._guess_datetime_format(dt_string), - dt_format - ) + assert tools._guess_datetime_format(dt_string) == dt_format def test_guess_datetime_format_invalid_inputs(self): # A datetime string must include a year, month and a day for it @@ -901,10 +891,7 @@ def test_guess_datetime_format_nopadding(self): ('2011-1-3T00:00:0', '%Y-%m-%dT%H:%M:%S')) for dt_string, dt_format in dt_string_to_format: - self.assertEqual( - tools._guess_datetime_format(dt_string), - dt_format - ) + assert tools._guess_datetime_format(dt_string) == dt_format def test_guess_datetime_format_for_array(self): tm._skip_if_not_us_locale() @@ -918,10 +905,8 @@ def test_guess_datetime_format_for_array(self): ] for test_array in test_arrays: - self.assertEqual( - tools._guess_datetime_format_for_array(test_array), - expected_format - ) + assert tools._guess_datetime_format_for_array( + test_array) == expected_format format_for_string_of_nans = tools._guess_datetime_format_for_array( np.array( @@ -1012,14 +997,13 @@ def test_day_not_in_month_raise(self): errors='raise', format="%Y-%m-%d") def test_day_not_in_month_ignore(self): - self.assertEqual(to_datetime( - '2015-02-29', errors='ignore'), '2015-02-29') - self.assertEqual(to_datetime( - '2015-02-29', errors='ignore', format="%Y-%m-%d"), '2015-02-29') - self.assertEqual(to_datetime( - '2015-02-32', errors='ignore', format="%Y-%m-%d"), '2015-02-32') - self.assertEqual(to_datetime( - '2015-04-31', errors='ignore', format="%Y-%m-%d"), '2015-04-31') + assert to_datetime('2015-02-29', errors='ignore') == '2015-02-29' + assert to_datetime('2015-02-29', errors='ignore', + format="%Y-%m-%d") == '2015-02-29' + assert to_datetime('2015-02-32', errors='ignore', + format="%Y-%m-%d") == '2015-02-32' + assert to_datetime('2015-04-31', errors='ignore', + format="%Y-%m-%d") == '2015-04-31' class TestDatetimeParsingWrappers(tm.TestCase): @@ -1110,7 +1094,7 @@ def test_parsers(self): result9 = DatetimeIndex(Series([date_str]), yearfirst=yearfirst) for res in [result1, result2]: - self.assertEqual(res, expected) + assert res == expected for res in [result3, result4, result6, result8, result9]: exp = DatetimeIndex([pd.Timestamp(expected)]) tm.assert_index_equal(res, exp) @@ -1118,10 +1102,10 @@ def test_parsers(self): # these really need to have yearfist, but we don't support if not yearfirst: result5 = Timestamp(date_str) - self.assertEqual(result5, expected) + assert result5 == expected result7 = date_range(date_str, freq='S', periods=1, yearfirst=yearfirst) - self.assertEqual(result7, expected) + assert result7 == expected # NaT result1, _, _ = tools.parse_time_string('NaT') @@ -1215,7 +1199,7 @@ def test_parsers_dayfirst_yearfirst(self): # compare with dateutil result dateutil_result = parse(date_str, dayfirst=dayfirst, yearfirst=yearfirst) - self.assertEqual(dateutil_result, expected) + assert dateutil_result == expected result1, _, _ = tools.parse_time_string(date_str, dayfirst=dayfirst, @@ -1224,7 +1208,7 @@ def test_parsers_dayfirst_yearfirst(self): # we don't support dayfirst/yearfirst here: if not dayfirst and not yearfirst: result2 = Timestamp(date_str) - self.assertEqual(result2, expected) + assert result2 == expected result3 = to_datetime(date_str, dayfirst=dayfirst, yearfirst=yearfirst) @@ -1232,9 +1216,9 @@ def test_parsers_dayfirst_yearfirst(self): result4 = DatetimeIndex([date_str], dayfirst=dayfirst, yearfirst=yearfirst)[0] - self.assertEqual(result1, expected) - self.assertEqual(result3, expected) - self.assertEqual(result4, expected) + assert result1 == expected + assert result3 == expected + assert result4 == expected def test_parsers_timestring(self): tm._skip_if_no_dateutil() @@ -1253,11 +1237,11 @@ def test_parsers_timestring(self): # parse time string return time string based on default date # others are not, and can't be changed because it is used in # time series plot - self.assertEqual(result1, exp_def) - self.assertEqual(result2, exp_now) - self.assertEqual(result3, exp_now) - self.assertEqual(result4, exp_now) - self.assertEqual(result5, exp_now) + assert result1 == exp_def + assert result2 == exp_now + assert result3 == exp_now + assert result4 == exp_now + assert result5 == exp_now def test_parsers_time(self): # GH11818 @@ -1267,20 +1251,19 @@ def test_parsers_time(self): expected = time(14, 15) for time_string in strings: - self.assertEqual(tools.to_time(time_string), expected) + assert tools.to_time(time_string) == expected new_string = "14.15" pytest.raises(ValueError, tools.to_time, new_string) - self.assertEqual(tools.to_time(new_string, format="%H.%M"), expected) + assert tools.to_time(new_string, format="%H.%M") == expected arg = ["14:15", "20:20"] expected_arr = [time(14, 15), time(20, 20)] - self.assertEqual(tools.to_time(arg), expected_arr) - self.assertEqual(tools.to_time(arg, format="%H:%M"), expected_arr) - self.assertEqual(tools.to_time(arg, infer_time_format=True), - expected_arr) - self.assertEqual(tools.to_time(arg, format="%I:%M%p", errors="coerce"), - [None, None]) + assert tools.to_time(arg) == expected_arr + assert tools.to_time(arg, format="%H:%M") == expected_arr + assert tools.to_time(arg, infer_time_format=True) == expected_arr + assert tools.to_time(arg, format="%I:%M%p", + errors="coerce") == [None, None] res = tools.to_time(arg, format="%I:%M%p", errors="ignore") tm.assert_numpy_array_equal(res, np.array(arg, dtype=np.object_)) @@ -1301,7 +1284,7 @@ def test_parsers_monthfreq(self): for date_str, expected in compat.iteritems(cases): result1, _, _ = tools.parse_time_string(date_str, freq='M') - self.assertEqual(result1, expected) + assert result1 == expected def test_parsers_quarterly_with_freq(self): msg = ('Incorrect quarterly string is given, quarter ' @@ -1321,7 +1304,7 @@ def test_parsers_quarterly_with_freq(self): for (date_str, freq), exp in compat.iteritems(cases): result, _, _ = tools.parse_time_string(date_str, freq=freq) - self.assertEqual(result, exp) + assert result == exp def test_parsers_timezone_minute_offsets_roundtrip(self): # GH11708 @@ -1337,9 +1320,9 @@ def test_parsers_timezone_minute_offsets_roundtrip(self): for dt_string, tz, dt_string_repr in dt_strings: dt_time = to_datetime(dt_string) - self.assertEqual(base, dt_time) + assert base == dt_time converted_time = dt_time.tz_localize('UTC').tz_convert(tz) - self.assertEqual(dt_string_repr, repr(converted_time)) + assert dt_string_repr == repr(converted_time) def test_parsers_iso8601(self): # GH 12060 @@ -1358,7 +1341,7 @@ def test_parsers_iso8601(self): '2013-1-1 5:30:00': datetime(2013, 1, 1, 5, 30)} for date_str, exp in compat.iteritems(cases): actual = tslib._test_parse_iso8601(date_str) - self.assertEqual(actual, exp) + assert actual == exp # seperators must all match - YYYYMM not valid invalid_cases = ['2011-01/02', '2011^11^11', diff --git a/pandas/tests/indexes/period/test_asfreq.py b/pandas/tests/indexes/period/test_asfreq.py index 4d1fe9c46f126..f9effd3d1aea6 100644 --- a/pandas/tests/indexes/period/test_asfreq.py +++ b/pandas/tests/indexes/period/test_asfreq.py @@ -20,64 +20,64 @@ def test_asfreq(self): pi6 = PeriodIndex(freq='Min', start='1/1/2001', end='1/1/2001 00:00') pi7 = PeriodIndex(freq='S', start='1/1/2001', end='1/1/2001 00:00:00') - self.assertEqual(pi1.asfreq('Q', 'S'), pi2) - self.assertEqual(pi1.asfreq('Q', 's'), pi2) - self.assertEqual(pi1.asfreq('M', 'start'), pi3) - self.assertEqual(pi1.asfreq('D', 'StarT'), pi4) - self.assertEqual(pi1.asfreq('H', 'beGIN'), pi5) - self.assertEqual(pi1.asfreq('Min', 'S'), pi6) - self.assertEqual(pi1.asfreq('S', 'S'), pi7) - - self.assertEqual(pi2.asfreq('A', 'S'), pi1) - self.assertEqual(pi2.asfreq('M', 'S'), pi3) - self.assertEqual(pi2.asfreq('D', 'S'), pi4) - self.assertEqual(pi2.asfreq('H', 'S'), pi5) - self.assertEqual(pi2.asfreq('Min', 'S'), pi6) - self.assertEqual(pi2.asfreq('S', 'S'), pi7) - - self.assertEqual(pi3.asfreq('A', 'S'), pi1) - self.assertEqual(pi3.asfreq('Q', 'S'), pi2) - self.assertEqual(pi3.asfreq('D', 'S'), pi4) - self.assertEqual(pi3.asfreq('H', 'S'), pi5) - self.assertEqual(pi3.asfreq('Min', 'S'), pi6) - self.assertEqual(pi3.asfreq('S', 'S'), pi7) - - self.assertEqual(pi4.asfreq('A', 'S'), pi1) - self.assertEqual(pi4.asfreq('Q', 'S'), pi2) - self.assertEqual(pi4.asfreq('M', 'S'), pi3) - self.assertEqual(pi4.asfreq('H', 'S'), pi5) - self.assertEqual(pi4.asfreq('Min', 'S'), pi6) - self.assertEqual(pi4.asfreq('S', 'S'), pi7) - - self.assertEqual(pi5.asfreq('A', 'S'), pi1) - self.assertEqual(pi5.asfreq('Q', 'S'), pi2) - self.assertEqual(pi5.asfreq('M', 'S'), pi3) - self.assertEqual(pi5.asfreq('D', 'S'), pi4) - self.assertEqual(pi5.asfreq('Min', 'S'), pi6) - self.assertEqual(pi5.asfreq('S', 'S'), pi7) - - self.assertEqual(pi6.asfreq('A', 'S'), pi1) - self.assertEqual(pi6.asfreq('Q', 'S'), pi2) - self.assertEqual(pi6.asfreq('M', 'S'), pi3) - self.assertEqual(pi6.asfreq('D', 'S'), pi4) - self.assertEqual(pi6.asfreq('H', 'S'), pi5) - self.assertEqual(pi6.asfreq('S', 'S'), pi7) - - self.assertEqual(pi7.asfreq('A', 'S'), pi1) - self.assertEqual(pi7.asfreq('Q', 'S'), pi2) - self.assertEqual(pi7.asfreq('M', 'S'), pi3) - self.assertEqual(pi7.asfreq('D', 'S'), pi4) - self.assertEqual(pi7.asfreq('H', 'S'), pi5) - self.assertEqual(pi7.asfreq('Min', 'S'), pi6) + assert pi1.asfreq('Q', 'S') == pi2 + assert pi1.asfreq('Q', 's') == pi2 + assert pi1.asfreq('M', 'start') == pi3 + assert pi1.asfreq('D', 'StarT') == pi4 + assert pi1.asfreq('H', 'beGIN') == pi5 + assert pi1.asfreq('Min', 'S') == pi6 + assert pi1.asfreq('S', 'S') == pi7 + + assert pi2.asfreq('A', 'S') == pi1 + assert pi2.asfreq('M', 'S') == pi3 + assert pi2.asfreq('D', 'S') == pi4 + assert pi2.asfreq('H', 'S') == pi5 + assert pi2.asfreq('Min', 'S') == pi6 + assert pi2.asfreq('S', 'S') == pi7 + + assert pi3.asfreq('A', 'S') == pi1 + assert pi3.asfreq('Q', 'S') == pi2 + assert pi3.asfreq('D', 'S') == pi4 + assert pi3.asfreq('H', 'S') == pi5 + assert pi3.asfreq('Min', 'S') == pi6 + assert pi3.asfreq('S', 'S') == pi7 + + assert pi4.asfreq('A', 'S') == pi1 + assert pi4.asfreq('Q', 'S') == pi2 + assert pi4.asfreq('M', 'S') == pi3 + assert pi4.asfreq('H', 'S') == pi5 + assert pi4.asfreq('Min', 'S') == pi6 + assert pi4.asfreq('S', 'S') == pi7 + + assert pi5.asfreq('A', 'S') == pi1 + assert pi5.asfreq('Q', 'S') == pi2 + assert pi5.asfreq('M', 'S') == pi3 + assert pi5.asfreq('D', 'S') == pi4 + assert pi5.asfreq('Min', 'S') == pi6 + assert pi5.asfreq('S', 'S') == pi7 + + assert pi6.asfreq('A', 'S') == pi1 + assert pi6.asfreq('Q', 'S') == pi2 + assert pi6.asfreq('M', 'S') == pi3 + assert pi6.asfreq('D', 'S') == pi4 + assert pi6.asfreq('H', 'S') == pi5 + assert pi6.asfreq('S', 'S') == pi7 + + assert pi7.asfreq('A', 'S') == pi1 + assert pi7.asfreq('Q', 'S') == pi2 + assert pi7.asfreq('M', 'S') == pi3 + assert pi7.asfreq('D', 'S') == pi4 + assert pi7.asfreq('H', 'S') == pi5 + assert pi7.asfreq('Min', 'S') == pi6 pytest.raises(ValueError, pi7.asfreq, 'T', 'foo') result1 = pi1.asfreq('3M') result2 = pi1.asfreq('M') expected = PeriodIndex(freq='M', start='2001-12', end='2001-12') tm.assert_numpy_array_equal(result1.asi8, expected.asi8) - self.assertEqual(result1.freqstr, '3M') + assert result1.freqstr == '3M' tm.assert_numpy_array_equal(result2.asi8, expected.asi8) - self.assertEqual(result2.freqstr, 'M') + assert result2.freqstr == 'M' def test_asfreq_nat(self): idx = PeriodIndex(['2011-01', '2011-02', 'NaT', '2011-04'], freq='M') @@ -93,13 +93,13 @@ def test_asfreq_mult_pi(self): exp = PeriodIndex(['2001-02-28', '2001-03-31', 'NaT', '2001-04-30'], freq=freq) tm.assert_index_equal(result, exp) - self.assertEqual(result.freq, exp.freq) + assert result.freq == exp.freq result = pi.asfreq(freq, how='S') exp = PeriodIndex(['2001-01-01', '2001-02-01', 'NaT', '2001-03-01'], freq=freq) tm.assert_index_equal(result, exp) - self.assertEqual(result.freq, exp.freq) + assert result.freq == exp.freq def test_asfreq_combined_pi(self): pi = pd.PeriodIndex(['2001-01-01 00:00', '2001-01-02 02:00', 'NaT'], @@ -109,7 +109,7 @@ def test_asfreq_combined_pi(self): for freq, how in zip(['1D1H', '1H1D'], ['S', 'E']): result = pi.asfreq(freq, how=how) tm.assert_index_equal(result, exp) - self.assertEqual(result.freq, exp.freq) + assert result.freq == exp.freq for freq in ['1D1H', '1H1D']: pi = pd.PeriodIndex(['2001-01-01 00:00', '2001-01-02 02:00', @@ -118,7 +118,7 @@ def test_asfreq_combined_pi(self): exp = PeriodIndex(['2001-01-02 00:00', '2001-01-03 02:00', 'NaT'], freq='H') tm.assert_index_equal(result, exp) - self.assertEqual(result.freq, exp.freq) + assert result.freq == exp.freq pi = pd.PeriodIndex(['2001-01-01 00:00', '2001-01-02 02:00', 'NaT'], freq=freq) @@ -126,7 +126,7 @@ def test_asfreq_combined_pi(self): exp = PeriodIndex(['2001-01-01 00:00', '2001-01-02 02:00', 'NaT'], freq='H') tm.assert_index_equal(result, exp) - self.assertEqual(result.freq, exp.freq) + assert result.freq == exp.freq def test_asfreq_ts(self): index = PeriodIndex(freq='A', start='1/1/2001', end='12/31/2010') @@ -136,12 +136,12 @@ def test_asfreq_ts(self): result = ts.asfreq('D', how='end') df_result = df.asfreq('D', how='end') exp_index = index.asfreq('D', how='end') - self.assertEqual(len(result), len(ts)) + assert len(result) == len(ts) tm.assert_index_equal(result.index, exp_index) tm.assert_index_equal(df_result.index, exp_index) result = ts.asfreq('D', how='start') - self.assertEqual(len(result), len(ts)) + assert len(result) == len(ts) tm.assert_index_equal(result.index, index.asfreq('D', how='start')) def test_astype_asfreq(self): diff --git a/pandas/tests/indexes/period/test_construction.py b/pandas/tests/indexes/period/test_construction.py index 6ab42f14efae6..a95ad808cadce 100644 --- a/pandas/tests/indexes/period/test_construction.py +++ b/pandas/tests/indexes/period/test_construction.py @@ -160,12 +160,12 @@ def test_constructor_dtype(self): idx = PeriodIndex(['2013-01', '2013-03'], dtype='period[M]') exp = PeriodIndex(['2013-01', '2013-03'], freq='M') tm.assert_index_equal(idx, exp) - self.assertEqual(idx.dtype, 'period[M]') + assert idx.dtype == 'period[M]' idx = PeriodIndex(['2013-01-05', '2013-03-05'], dtype='period[3D]') exp = PeriodIndex(['2013-01-05', '2013-03-05'], freq='3D') tm.assert_index_equal(idx, exp) - self.assertEqual(idx.dtype, 'period[3D]') + assert idx.dtype == 'period[3D]' # if we already have a freq and its not the same, then asfreq # (not changed) @@ -174,11 +174,11 @@ def test_constructor_dtype(self): res = PeriodIndex(idx, dtype='period[M]') exp = PeriodIndex(['2013-01', '2013-01'], freq='M') tm.assert_index_equal(res, exp) - self.assertEqual(res.dtype, 'period[M]') + assert res.dtype == 'period[M]' res = PeriodIndex(idx, freq='M') tm.assert_index_equal(res, exp) - self.assertEqual(res.dtype, 'period[M]') + assert res.dtype == 'period[M]' msg = 'specified freq and dtype are different' with tm.assert_raises_regex(period.IncompatibleFrequency, msg): @@ -187,8 +187,8 @@ def test_constructor_dtype(self): def test_constructor_empty(self): idx = pd.PeriodIndex([], freq='M') assert isinstance(idx, PeriodIndex) - self.assertEqual(len(idx), 0) - self.assertEqual(idx.freq, 'M') + assert len(idx) == 0 + assert idx.freq == 'M' with tm.assert_raises_regex(ValueError, 'freq not specified'): pd.PeriodIndex([]) @@ -367,64 +367,64 @@ def test_constructor_freq_combined(self): def test_constructor(self): pi = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009') - self.assertEqual(len(pi), 9) + assert len(pi) == 9 pi = PeriodIndex(freq='Q', start='1/1/2001', end='12/1/2009') - self.assertEqual(len(pi), 4 * 9) + assert len(pi) == 4 * 9 pi = PeriodIndex(freq='M', start='1/1/2001', end='12/1/2009') - self.assertEqual(len(pi), 12 * 9) + assert len(pi) == 12 * 9 pi = PeriodIndex(freq='D', start='1/1/2001', end='12/31/2009') - self.assertEqual(len(pi), 365 * 9 + 2) + assert len(pi) == 365 * 9 + 2 pi = PeriodIndex(freq='B', start='1/1/2001', end='12/31/2009') - self.assertEqual(len(pi), 261 * 9) + assert len(pi) == 261 * 9 pi = PeriodIndex(freq='H', start='1/1/2001', end='12/31/2001 23:00') - self.assertEqual(len(pi), 365 * 24) + assert len(pi) == 365 * 24 pi = PeriodIndex(freq='Min', start='1/1/2001', end='1/1/2001 23:59') - self.assertEqual(len(pi), 24 * 60) + assert len(pi) == 24 * 60 pi = PeriodIndex(freq='S', start='1/1/2001', end='1/1/2001 23:59:59') - self.assertEqual(len(pi), 24 * 60 * 60) + assert len(pi) == 24 * 60 * 60 start = Period('02-Apr-2005', 'B') i1 = PeriodIndex(start=start, periods=20) - self.assertEqual(len(i1), 20) - self.assertEqual(i1.freq, start.freq) - self.assertEqual(i1[0], start) + assert len(i1) == 20 + assert i1.freq == start.freq + assert i1[0] == start end_intv = Period('2006-12-31', 'W') i1 = PeriodIndex(end=end_intv, periods=10) - self.assertEqual(len(i1), 10) - self.assertEqual(i1.freq, end_intv.freq) - self.assertEqual(i1[-1], end_intv) + assert len(i1) == 10 + assert i1.freq == end_intv.freq + assert i1[-1] == end_intv end_intv = Period('2006-12-31', '1w') i2 = PeriodIndex(end=end_intv, periods=10) - self.assertEqual(len(i1), len(i2)) + assert len(i1) == len(i2) assert (i1 == i2).all() - self.assertEqual(i1.freq, i2.freq) + assert i1.freq == i2.freq end_intv = Period('2006-12-31', ('w', 1)) i2 = PeriodIndex(end=end_intv, periods=10) - self.assertEqual(len(i1), len(i2)) + assert len(i1) == len(i2) assert (i1 == i2).all() - self.assertEqual(i1.freq, i2.freq) + assert i1.freq == i2.freq end_intv = Period('2005-05-01', 'B') i1 = PeriodIndex(start=start, end=end_intv) # infer freq from first element i2 = PeriodIndex([end_intv, Period('2005-05-05', 'B')]) - self.assertEqual(len(i2), 2) - self.assertEqual(i2[0], end_intv) + assert len(i2) == 2 + assert i2[0] == end_intv i2 = PeriodIndex(np.array([end_intv, Period('2005-05-05', 'B')])) - self.assertEqual(len(i2), 2) - self.assertEqual(i2[0], end_intv) + assert len(i2) == 2 + assert i2[0] == end_intv # Mixed freq should fail vals = [end_intv, Period('2006-12-31', 'w')] diff --git a/pandas/tests/indexes/period/test_indexing.py b/pandas/tests/indexes/period/test_indexing.py index cf5f741fb09ed..ebbe05d51598c 100644 --- a/pandas/tests/indexes/period/test_indexing.py +++ b/pandas/tests/indexes/period/test_indexing.py @@ -22,17 +22,17 @@ def test_getitem(self): for idx in [idx1]: result = idx[0] - self.assertEqual(result, pd.Period('2011-01-01', freq='D')) + assert result == pd.Period('2011-01-01', freq='D') result = idx[-1] - self.assertEqual(result, pd.Period('2011-01-31', freq='D')) + assert result == pd.Period('2011-01-31', freq='D') result = idx[0:5] expected = pd.period_range('2011-01-01', '2011-01-05', freq='D', name='idx') tm.assert_index_equal(result, expected) - self.assertEqual(result.freq, expected.freq) - self.assertEqual(result.freq, 'D') + assert result.freq == expected.freq + assert result.freq == 'D' result = idx[0:10:2] expected = pd.PeriodIndex(['2011-01-01', '2011-01-03', @@ -40,8 +40,8 @@ def test_getitem(self): '2011-01-07', '2011-01-09'], freq='D', name='idx') tm.assert_index_equal(result, expected) - self.assertEqual(result.freq, expected.freq) - self.assertEqual(result.freq, 'D') + assert result.freq == expected.freq + assert result.freq == 'D' result = idx[-20:-5:3] expected = pd.PeriodIndex(['2011-01-12', '2011-01-15', @@ -49,16 +49,16 @@ def test_getitem(self): '2011-01-21', '2011-01-24'], freq='D', name='idx') tm.assert_index_equal(result, expected) - self.assertEqual(result.freq, expected.freq) - self.assertEqual(result.freq, 'D') + assert result.freq == expected.freq + assert result.freq == 'D' result = idx[4::-1] expected = PeriodIndex(['2011-01-05', '2011-01-04', '2011-01-03', '2011-01-02', '2011-01-01'], freq='D', name='idx') tm.assert_index_equal(result, expected) - self.assertEqual(result.freq, expected.freq) - self.assertEqual(result.freq, 'D') + assert result.freq == expected.freq + assert result.freq == 'D' def test_getitem_index(self): idx = period_range('2007-01', periods=10, freq='M', name='x') @@ -84,19 +84,19 @@ def test_getitem_partial(self): assert (result.index.year == 2008).all() result = ts['2008':'2009'] - self.assertEqual(len(result), 24) + assert len(result) == 24 result = ts['2008-1':'2009-12'] - self.assertEqual(len(result), 24) + assert len(result) == 24 result = ts['2008Q1':'2009Q4'] - self.assertEqual(len(result), 24) + assert len(result) == 24 result = ts[:'2009'] - self.assertEqual(len(result), 36) + assert len(result) == 36 result = ts['2009':] - self.assertEqual(len(result), 50 - 24) + assert len(result) == 50 - 24 exp = result result = ts[24:] @@ -120,15 +120,15 @@ def test_getitem_datetime(self): def test_getitem_nat(self): idx = pd.PeriodIndex(['2011-01', 'NaT', '2011-02'], freq='M') - self.assertEqual(idx[0], pd.Period('2011-01', freq='M')) + assert idx[0] == pd.Period('2011-01', freq='M') assert idx[1] is tslib.NaT s = pd.Series([0, 1, 2], index=idx) - self.assertEqual(s[pd.NaT], 1) + assert s[pd.NaT] == 1 s = pd.Series(idx, index=idx) - self.assertEqual(s[pd.Period('2011-01', freq='M')], - pd.Period('2011-01', freq='M')) + assert (s[pd.Period('2011-01', freq='M')] == + pd.Period('2011-01', freq='M')) assert s[pd.NaT] is tslib.NaT def test_getitem_list_periods(self): @@ -210,7 +210,7 @@ def test_get_loc_msg(self): try: idx.get_loc(bad_period) except KeyError as inst: - self.assertEqual(inst.args[0], bad_period) + assert inst.args[0] == bad_period def test_get_loc_nat(self): didx = DatetimeIndex(['2011-01-01', 'NaT', '2011-01-03']) @@ -218,10 +218,10 @@ def test_get_loc_nat(self): # check DatetimeIndex compat for idx in [didx, pidx]: - self.assertEqual(idx.get_loc(pd.NaT), 1) - self.assertEqual(idx.get_loc(None), 1) - self.assertEqual(idx.get_loc(float('nan')), 1) - self.assertEqual(idx.get_loc(np.nan), 1) + assert idx.get_loc(pd.NaT) == 1 + assert idx.get_loc(None) == 1 + assert idx.get_loc(float('nan')) == 1 + assert idx.get_loc(np.nan) == 1 def test_take(self): # GH 10295 @@ -230,46 +230,46 @@ def test_take(self): for idx in [idx1]: result = idx.take([0]) - self.assertEqual(result, pd.Period('2011-01-01', freq='D')) + assert result == pd.Period('2011-01-01', freq='D') result = idx.take([5]) - self.assertEqual(result, pd.Period('2011-01-06', freq='D')) + assert result == pd.Period('2011-01-06', freq='D') result = idx.take([0, 1, 2]) expected = pd.period_range('2011-01-01', '2011-01-03', freq='D', name='idx') tm.assert_index_equal(result, expected) - self.assertEqual(result.freq, 'D') - self.assertEqual(result.freq, expected.freq) + assert result.freq == 'D' + assert result.freq == expected.freq result = idx.take([0, 2, 4]) expected = pd.PeriodIndex(['2011-01-01', '2011-01-03', '2011-01-05'], freq='D', name='idx') tm.assert_index_equal(result, expected) - self.assertEqual(result.freq, expected.freq) - self.assertEqual(result.freq, 'D') + assert result.freq == expected.freq + assert result.freq == 'D' result = idx.take([7, 4, 1]) expected = pd.PeriodIndex(['2011-01-08', '2011-01-05', '2011-01-02'], freq='D', name='idx') tm.assert_index_equal(result, expected) - self.assertEqual(result.freq, expected.freq) - self.assertEqual(result.freq, 'D') + assert result.freq == expected.freq + assert result.freq == 'D' result = idx.take([3, 2, 5]) expected = PeriodIndex(['2011-01-04', '2011-01-03', '2011-01-06'], freq='D', name='idx') tm.assert_index_equal(result, expected) - self.assertEqual(result.freq, expected.freq) - self.assertEqual(result.freq, 'D') + assert result.freq == expected.freq + assert result.freq == 'D' result = idx.take([-3, 2, 5]) expected = PeriodIndex(['2011-01-29', '2011-01-03', '2011-01-06'], freq='D', name='idx') tm.assert_index_equal(result, expected) - self.assertEqual(result.freq, expected.freq) - self.assertEqual(result.freq, 'D') + assert result.freq == expected.freq + assert result.freq == 'D' def test_take_misc(self): index = PeriodIndex(start='1/1/10', end='12/31/12', freq='D', @@ -284,8 +284,8 @@ def test_take_misc(self): for taken in [taken1, taken2]: tm.assert_index_equal(taken, expected) assert isinstance(taken, PeriodIndex) - self.assertEqual(taken.freq, index.freq) - self.assertEqual(taken.name, expected.name) + assert taken.freq == index.freq + assert taken.name == expected.name def test_take_fill_value(self): # GH 12631 diff --git a/pandas/tests/indexes/period/test_ops.py b/pandas/tests/indexes/period/test_ops.py index af377c1b69922..fb688bda58ae8 100644 --- a/pandas/tests/indexes/period/test_ops.py +++ b/pandas/tests/indexes/period/test_ops.py @@ -38,10 +38,10 @@ def test_asobject_tolist(self): expected = pd.Index(expected_list, dtype=object, name='idx') result = idx.asobject assert isinstance(result, Index) - self.assertEqual(result.dtype, object) + assert result.dtype == object tm.assert_index_equal(result, expected) - self.assertEqual(result.name, expected.name) - self.assertEqual(idx.tolist(), expected_list) + assert result.name == expected.name + assert idx.tolist() == expected_list idx = PeriodIndex(['2013-01-01', '2013-01-02', 'NaT', '2013-01-04'], freq='D', name='idx') @@ -52,16 +52,16 @@ def test_asobject_tolist(self): expected = pd.Index(expected_list, dtype=object, name='idx') result = idx.asobject assert isinstance(result, Index) - self.assertEqual(result.dtype, object) + assert result.dtype == object tm.assert_index_equal(result, expected) for i in [0, 1, 3]: - self.assertEqual(result[i], expected[i]) + assert result[i] == expected[i] assert result[2] is pd.NaT - self.assertEqual(result.name, expected.name) + assert result.name == expected.name result_list = idx.tolist() for i in [0, 1, 3]: - self.assertEqual(result_list[i], expected_list[i]) + assert result_list[i] == expected_list[i] assert result_list[2] is pd.NaT def test_minmax(self): @@ -77,12 +77,12 @@ def test_minmax(self): assert not idx2.is_monotonic for idx in [idx1, idx2]: - self.assertEqual(idx.min(), pd.Period('2011-01-01', freq='D')) - self.assertEqual(idx.max(), pd.Period('2011-01-03', freq='D')) - self.assertEqual(idx1.argmin(), 1) - self.assertEqual(idx2.argmin(), 0) - self.assertEqual(idx1.argmax(), 3) - self.assertEqual(idx2.argmax(), 2) + assert idx.min() == pd.Period('2011-01-01', freq='D') + assert idx.max() == pd.Period('2011-01-03', freq='D') + assert idx1.argmin() == 1 + assert idx2.argmin() == 0 + assert idx1.argmax() == 3 + assert idx2.argmax() == 2 for op in ['min', 'max']: # Return NaT @@ -101,15 +101,15 @@ def test_minmax(self): def test_numpy_minmax(self): pr = pd.period_range(start='2016-01-15', end='2016-01-20') - self.assertEqual(np.min(pr), Period('2016-01-15', freq='D')) - self.assertEqual(np.max(pr), Period('2016-01-20', freq='D')) + assert np.min(pr) == Period('2016-01-15', freq='D') + assert np.max(pr) == Period('2016-01-20', freq='D') errmsg = "the 'out' parameter is not supported" tm.assert_raises_regex(ValueError, errmsg, np.min, pr, out=0) tm.assert_raises_regex(ValueError, errmsg, np.max, pr, out=0) - self.assertEqual(np.argmin(pr), 0) - self.assertEqual(np.argmax(pr), 5) + assert np.argmin(pr) == 0 + assert np.argmax(pr) == 5 if not _np_version_under1p10: errmsg = "the 'out' parameter is not supported" @@ -167,7 +167,7 @@ def test_representation(self): exp6, exp7, exp8, exp9, exp10]): for func in ['__repr__', '__unicode__', '__str__']: result = getattr(idx, func)() - self.assertEqual(result, expected) + assert result == expected def test_representation_to_series(self): # GH 10971 @@ -225,7 +225,7 @@ def test_representation_to_series(self): [exp1, exp2, exp3, exp4, exp5, exp6, exp7, exp8, exp9]): result = repr(pd.Series(idx)) - self.assertEqual(result, expected) + assert result == expected def test_summary(self): # GH9116 @@ -274,7 +274,7 @@ def test_summary(self): [exp1, exp2, exp3, exp4, exp5, exp6, exp7, exp8, exp9]): result = idx.summary() - self.assertEqual(result, expected) + assert result == expected def test_resolution(self): for freq, expected in zip(['A', 'Q', 'M', 'D', 'H', @@ -284,7 +284,7 @@ def test_resolution(self): 'millisecond', 'microsecond']): idx = pd.period_range(start='2013-04-01', periods=30, freq=freq) - self.assertEqual(idx.resolution, expected) + assert idx.resolution == expected def test_add_iadd(self): rng = pd.period_range('1/1/2000', freq='D', periods=5) @@ -569,12 +569,12 @@ def test_drop_duplicates_metadata(self): idx = pd.period_range('2011-01-01', '2011-01-31', freq='D', name='idx') result = idx.drop_duplicates() tm.assert_index_equal(idx, result) - self.assertEqual(idx.freq, result.freq) + assert idx.freq == result.freq idx_dup = idx.append(idx) # freq will not be reset result = idx_dup.drop_duplicates() tm.assert_index_equal(idx, result) - self.assertEqual(idx.freq, result.freq) + assert idx.freq == result.freq def test_drop_duplicates(self): # to check Index/Series compat @@ -601,7 +601,7 @@ def test_drop_duplicates(self): def test_order_compat(self): def _check_freq(index, expected_index): if isinstance(index, PeriodIndex): - self.assertEqual(index.freq, expected_index.freq) + assert index.freq == expected_index.freq pidx = PeriodIndex(['2011', '2012', '2013'], name='pidx', freq='A') # for compatibility check @@ -666,13 +666,13 @@ def _check_freq(index, expected_index): expected = PeriodIndex(['NaT', '2011', '2011', '2013'], name='pidx', freq='D') tm.assert_index_equal(result, expected) - self.assertEqual(result.freq, 'D') + assert result.freq == 'D' result = pidx.sort_values(ascending=False) expected = PeriodIndex( ['2013', '2011', '2011', 'NaT'], name='pidx', freq='D') tm.assert_index_equal(result, expected) - self.assertEqual(result.freq, 'D') + assert result.freq == 'D' def test_order(self): for freq in ['D', '2D', '4D']: @@ -681,20 +681,20 @@ def test_order(self): ordered = idx.sort_values() tm.assert_index_equal(ordered, idx) - self.assertEqual(ordered.freq, idx.freq) + assert ordered.freq == idx.freq ordered = idx.sort_values(ascending=False) expected = idx[::-1] tm.assert_index_equal(ordered, expected) - self.assertEqual(ordered.freq, expected.freq) - self.assertEqual(ordered.freq, freq) + assert ordered.freq == expected.freq + assert ordered.freq == freq ordered, indexer = idx.sort_values(return_indexer=True) tm.assert_index_equal(ordered, idx) tm.assert_numpy_array_equal(indexer, np.array([0, 1, 2]), check_dtype=False) - self.assertEqual(ordered.freq, idx.freq) - self.assertEqual(ordered.freq, freq) + assert ordered.freq == idx.freq + assert ordered.freq == freq ordered, indexer = idx.sort_values(return_indexer=True, ascending=False) @@ -702,8 +702,8 @@ def test_order(self): tm.assert_index_equal(ordered, expected) tm.assert_numpy_array_equal(indexer, np.array([2, 1, 0]), check_dtype=False) - self.assertEqual(ordered.freq, expected.freq) - self.assertEqual(ordered.freq, freq) + assert ordered.freq == expected.freq + assert ordered.freq == freq idx1 = PeriodIndex(['2011-01-01', '2011-01-03', '2011-01-05', '2011-01-02', '2011-01-01'], freq='D', name='idx1') @@ -725,18 +725,18 @@ def test_order(self): for idx, expected in [(idx1, exp1), (idx2, exp2), (idx3, exp3)]: ordered = idx.sort_values() tm.assert_index_equal(ordered, expected) - self.assertEqual(ordered.freq, 'D') + assert ordered.freq == 'D' ordered = idx.sort_values(ascending=False) tm.assert_index_equal(ordered, expected[::-1]) - self.assertEqual(ordered.freq, 'D') + assert ordered.freq == 'D' ordered, indexer = idx.sort_values(return_indexer=True) tm.assert_index_equal(ordered, expected) exp = np.array([0, 4, 3, 1, 2]) tm.assert_numpy_array_equal(indexer, exp, check_dtype=False) - self.assertEqual(ordered.freq, 'D') + assert ordered.freq == 'D' ordered, indexer = idx.sort_values(return_indexer=True, ascending=False) @@ -744,7 +744,7 @@ def test_order(self): exp = np.array([2, 1, 3, 4, 0]) tm.assert_numpy_array_equal(indexer, exp, check_dtype=False) - self.assertEqual(ordered.freq, 'D') + assert ordered.freq == 'D' def test_nat_new(self): @@ -1144,7 +1144,7 @@ def test_ops_series_timedelta(self): # GH 13043 s = pd.Series([pd.Period('2015-01-01', freq='D'), pd.Period('2015-01-02', freq='D')], name='xxx') - self.assertEqual(s.dtype, object) + assert s.dtype == object exp = pd.Series([pd.Period('2015-01-02', freq='D'), pd.Period('2015-01-03', freq='D')], name='xxx') @@ -1158,7 +1158,7 @@ def test_ops_series_period(self): # GH 13043 s = pd.Series([pd.Period('2015-01-01', freq='D'), pd.Period('2015-01-02', freq='D')], name='xxx') - self.assertEqual(s.dtype, object) + assert s.dtype == object p = pd.Period('2015-01-10', freq='D') # dtype will be object because of original dtype @@ -1168,7 +1168,7 @@ def test_ops_series_period(self): s2 = pd.Series([pd.Period('2015-01-05', freq='D'), pd.Period('2015-01-04', freq='D')], name='xxx') - self.assertEqual(s2.dtype, object) + assert s2.dtype == object exp = pd.Series([4, 2], name='xxx', dtype=object) tm.assert_series_equal(s2 - s, exp) @@ -1183,8 +1183,8 @@ def test_ops_frame_period(self): pd.Period('2015-02', freq='M')], 'B': [pd.Period('2014-01', freq='M'), pd.Period('2014-02', freq='M')]}) - self.assertEqual(df['A'].dtype, object) - self.assertEqual(df['B'].dtype, object) + assert df['A'].dtype == object + assert df['B'].dtype == object p = pd.Period('2015-03', freq='M') # dtype will be object because of original dtype @@ -1197,8 +1197,8 @@ def test_ops_frame_period(self): pd.Period('2015-06', freq='M')], 'B': [pd.Period('2015-05', freq='M'), pd.Period('2015-06', freq='M')]}) - self.assertEqual(df2['A'].dtype, object) - self.assertEqual(df2['B'].dtype, object) + assert df2['A'].dtype == object + assert df2['B'].dtype == object exp = pd.DataFrame({'A': np.array([4, 4], dtype=object), 'B': np.array([16, 16], dtype=object)}) diff --git a/pandas/tests/indexes/period/test_partial_slicing.py b/pandas/tests/indexes/period/test_partial_slicing.py index 7c1279a12450c..04b4e6795e770 100644 --- a/pandas/tests/indexes/period/test_partial_slicing.py +++ b/pandas/tests/indexes/period/test_partial_slicing.py @@ -51,7 +51,7 @@ def test_slice_with_zero_step_raises(self): def test_slice_keep_name(self): idx = period_range('20010101', periods=10, freq='D', name='bob') - self.assertEqual(idx.name, idx[1:].name) + assert idx.name == idx[1:].name def test_pindex_slice_index(self): pi = PeriodIndex(start='1/1/10', end='12/31/12', freq='M') diff --git a/pandas/tests/indexes/period/test_period.py b/pandas/tests/indexes/period/test_period.py index 8ee3e9d6707b4..6ec567509cd76 100644 --- a/pandas/tests/indexes/period/test_period.py +++ b/pandas/tests/indexes/period/test_period.py @@ -56,8 +56,8 @@ def test_pickle_compat_construction(self): pass def test_pickle_round_trip(self): - for freq in ['D', 'M', 'Y']: - idx = PeriodIndex(['2016-05-16', 'NaT', NaT, np.NaN], freq='D') + for freq in ['D', 'M', 'A']: + idx = PeriodIndex(['2016-05-16', 'NaT', NaT, np.NaN], freq=freq) result = tm.round_trip_pickle(idx) tm.assert_index_equal(result, idx) @@ -65,23 +65,22 @@ def test_get_loc(self): idx = pd.period_range('2000-01-01', periods=3) for method in [None, 'pad', 'backfill', 'nearest']: - self.assertEqual(idx.get_loc(idx[1], method), 1) - self.assertEqual( - idx.get_loc(idx[1].asfreq('H', how='start'), method), 1) - self.assertEqual(idx.get_loc(idx[1].to_timestamp(), method), 1) - self.assertEqual( - idx.get_loc(idx[1].to_timestamp().to_pydatetime(), method), 1) - self.assertEqual(idx.get_loc(str(idx[1]), method), 1) + assert idx.get_loc(idx[1], method) == 1 + assert idx.get_loc(idx[1].asfreq('H', how='start'), method) == 1 + assert idx.get_loc(idx[1].to_timestamp(), method) == 1 + assert idx.get_loc(idx[1].to_timestamp() + .to_pydatetime(), method) == 1 + assert idx.get_loc(str(idx[1]), method) == 1 idx = pd.period_range('2000-01-01', periods=5)[::2] - self.assertEqual(idx.get_loc('2000-01-02T12', method='nearest', - tolerance='1 day'), 1) - self.assertEqual(idx.get_loc('2000-01-02T12', method='nearest', - tolerance=pd.Timedelta('1D')), 1) - self.assertEqual(idx.get_loc('2000-01-02T12', method='nearest', - tolerance=np.timedelta64(1, 'D')), 1) - self.assertEqual(idx.get_loc('2000-01-02T12', method='nearest', - tolerance=timedelta(1)), 1) + assert idx.get_loc('2000-01-02T12', method='nearest', + tolerance='1 day') == 1 + assert idx.get_loc('2000-01-02T12', method='nearest', + tolerance=pd.Timedelta('1D')) == 1 + assert idx.get_loc('2000-01-02T12', method='nearest', + tolerance=np.timedelta64(1, 'D')) == 1 + assert idx.get_loc('2000-01-02T12', method='nearest', + tolerance=timedelta(1)) == 1 with tm.assert_raises_regex(ValueError, 'must be convertible'): idx.get_loc('2000-01-10', method='nearest', tolerance='foo') @@ -164,7 +163,7 @@ def test_repeat(self): res = idx.repeat(3) exp = PeriodIndex(idx.values.repeat(3), freq='D') tm.assert_index_equal(res, exp) - self.assertEqual(res.freqstr, 'D') + assert res.freqstr == 'D' def test_period_index_indexer(self): # GH4125 @@ -243,12 +242,12 @@ def test_shallow_copy_empty(self): def test_dtype_str(self): pi = pd.PeriodIndex([], freq='M') - self.assertEqual(pi.dtype_str, 'period[M]') - self.assertEqual(pi.dtype_str, str(pi.dtype)) + assert pi.dtype_str == 'period[M]' + assert pi.dtype_str == str(pi.dtype) pi = pd.PeriodIndex([], freq='3M') - self.assertEqual(pi.dtype_str, 'period[3M]') - self.assertEqual(pi.dtype_str, str(pi.dtype)) + assert pi.dtype_str == 'period[3M]' + assert pi.dtype_str == str(pi.dtype) def test_view_asi8(self): idx = pd.PeriodIndex([], freq='M') @@ -296,37 +295,37 @@ def test_values(self): def test_period_index_length(self): pi = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009') - self.assertEqual(len(pi), 9) + assert len(pi) == 9 pi = PeriodIndex(freq='Q', start='1/1/2001', end='12/1/2009') - self.assertEqual(len(pi), 4 * 9) + assert len(pi) == 4 * 9 pi = PeriodIndex(freq='M', start='1/1/2001', end='12/1/2009') - self.assertEqual(len(pi), 12 * 9) + assert len(pi) == 12 * 9 start = Period('02-Apr-2005', 'B') i1 = PeriodIndex(start=start, periods=20) - self.assertEqual(len(i1), 20) - self.assertEqual(i1.freq, start.freq) - self.assertEqual(i1[0], start) + assert len(i1) == 20 + assert i1.freq == start.freq + assert i1[0] == start end_intv = Period('2006-12-31', 'W') i1 = PeriodIndex(end=end_intv, periods=10) - self.assertEqual(len(i1), 10) - self.assertEqual(i1.freq, end_intv.freq) - self.assertEqual(i1[-1], end_intv) + assert len(i1) == 10 + assert i1.freq == end_intv.freq + assert i1[-1] == end_intv end_intv = Period('2006-12-31', '1w') i2 = PeriodIndex(end=end_intv, periods=10) - self.assertEqual(len(i1), len(i2)) + assert len(i1) == len(i2) assert (i1 == i2).all() - self.assertEqual(i1.freq, i2.freq) + assert i1.freq == i2.freq end_intv = Period('2006-12-31', ('w', 1)) i2 = PeriodIndex(end=end_intv, periods=10) - self.assertEqual(len(i1), len(i2)) + assert len(i1) == len(i2) assert (i1 == i2).all() - self.assertEqual(i1.freq, i2.freq) + assert i1.freq == i2.freq try: PeriodIndex(start=start, end=end_intv) @@ -346,12 +345,12 @@ def test_period_index_length(self): # infer freq from first element i2 = PeriodIndex([end_intv, Period('2005-05-05', 'B')]) - self.assertEqual(len(i2), 2) - self.assertEqual(i2[0], end_intv) + assert len(i2) == 2 + assert i2[0] == end_intv i2 = PeriodIndex(np.array([end_intv, Period('2005-05-05', 'B')])) - self.assertEqual(len(i2), 2) - self.assertEqual(i2[0], end_intv) + assert len(i2) == 2 + assert i2[0] == end_intv # Mixed freq should fail vals = [end_intv, Period('2006-12-31', 'w')] @@ -402,17 +401,17 @@ def _check_all_fields(self, periodindex): for field in fields: field_idx = getattr(periodindex, field) - self.assertEqual(len(periodindex), len(field_idx)) + assert len(periodindex) == len(field_idx) for x, val in zip(periods, field_idx): - self.assertEqual(getattr(x, field), val) + assert getattr(x, field) == val if len(s) == 0: continue field_s = getattr(s.dt, field) - self.assertEqual(len(periodindex), len(field_s)) + assert len(periodindex) == len(field_s) for x, val in zip(periods, field_s): - self.assertEqual(getattr(x, field), val) + assert getattr(x, field) == val def test_indexing(self): @@ -421,7 +420,7 @@ def test_indexing(self): s = Series(randn(10), index=index) expected = s[index[0]] result = s.iat[0] - self.assertEqual(expected, result) + assert expected == result def test_period_set_index_reindex(self): # GH 6631 @@ -486,20 +485,19 @@ def test_is_(self): create_index = lambda: PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009') index = create_index() - self.assertEqual(index.is_(index), True) - self.assertEqual(index.is_(create_index()), False) - self.assertEqual(index.is_(index.view()), True) - self.assertEqual( - index.is_(index.view().view().view().view().view()), True) - self.assertEqual(index.view().is_(index), True) + assert index.is_(index) + assert not index.is_(create_index()) + assert index.is_(index.view()) + assert index.is_(index.view().view().view().view().view()) + assert index.view().is_(index) ind2 = index.view() index.name = "Apple" - self.assertEqual(ind2.is_(index), True) - self.assertEqual(index.is_(index[:]), False) - self.assertEqual(index.is_(index.asfreq('M')), False) - self.assertEqual(index.is_(index.asfreq('A')), False) - self.assertEqual(index.is_(index - 2), False) - self.assertEqual(index.is_(index - 0), False) + assert ind2.is_(index) + assert not index.is_(index[:]) + assert not index.is_(index.asfreq('M')) + assert not index.is_(index.asfreq('A')) + assert not index.is_(index - 2) + assert not index.is_(index - 0) def test_comp_period(self): idx = period_range('2007-01', periods=20, freq='M') @@ -566,14 +564,14 @@ def test_index_unique(self): idx = PeriodIndex([2000, 2007, 2007, 2009, 2009], freq='A-JUN') expected = PeriodIndex([2000, 2007, 2009], freq='A-JUN') tm.assert_index_equal(idx.unique(), expected) - self.assertEqual(idx.nunique(), 3) + assert idx.nunique() == 3 idx = PeriodIndex([2000, 2007, 2007, 2009, 2007], freq='A-JUN', tz='US/Eastern') expected = PeriodIndex([2000, 2007, 2009], freq='A-JUN', tz='US/Eastern') tm.assert_index_equal(idx.unique(), expected) - self.assertEqual(idx.nunique(), 3) + assert idx.nunique() == 3 def test_shift_gh8083(self): @@ -591,32 +589,32 @@ def test_shift(self): tm.assert_index_equal(pi1.shift(0), pi1) - self.assertEqual(len(pi1), len(pi2)) + assert len(pi1) == len(pi2) tm.assert_index_equal(pi1.shift(1), pi2) pi1 = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009') pi2 = PeriodIndex(freq='A', start='1/1/2000', end='12/1/2008') - self.assertEqual(len(pi1), len(pi2)) + assert len(pi1) == len(pi2) tm.assert_index_equal(pi1.shift(-1), pi2) pi1 = PeriodIndex(freq='M', start='1/1/2001', end='12/1/2009') pi2 = PeriodIndex(freq='M', start='2/1/2001', end='1/1/2010') - self.assertEqual(len(pi1), len(pi2)) + assert len(pi1) == len(pi2) tm.assert_index_equal(pi1.shift(1), pi2) pi1 = PeriodIndex(freq='M', start='1/1/2001', end='12/1/2009') pi2 = PeriodIndex(freq='M', start='12/1/2000', end='11/1/2009') - self.assertEqual(len(pi1), len(pi2)) + assert len(pi1) == len(pi2) tm.assert_index_equal(pi1.shift(-1), pi2) pi1 = PeriodIndex(freq='D', start='1/1/2001', end='12/1/2009') pi2 = PeriodIndex(freq='D', start='1/2/2001', end='12/2/2009') - self.assertEqual(len(pi1), len(pi2)) + assert len(pi1) == len(pi2) tm.assert_index_equal(pi1.shift(1), pi2) pi1 = PeriodIndex(freq='D', start='1/1/2001', end='12/1/2009') pi2 = PeriodIndex(freq='D', start='12/31/2000', end='11/30/2009') - self.assertEqual(len(pi1), len(pi2)) + assert len(pi1) == len(pi2) tm.assert_index_equal(pi1.shift(-1), pi2) def test_shift_nat(self): @@ -626,7 +624,7 @@ def test_shift_nat(self): expected = PeriodIndex(['2011-02', '2011-03', 'NaT', '2011-05'], freq='M', name='idx') tm.assert_index_equal(result, expected) - self.assertEqual(result.name, expected.name) + assert result.name == expected.name def test_ndarray_compat_properties(self): if compat.is_platform_32bit(): @@ -669,7 +667,7 @@ def test_pindex_qaccess(self): pi = PeriodIndex(['2Q05', '3Q05', '4Q05', '1Q06', '2Q06'], freq='Q') s = Series(np.random.rand(len(pi)), index=pi).cumsum() # Todo: fix these accessors! - self.assertEqual(s['05Q4'], s[2]) + assert s['05Q4'] == s[2] def test_numpy_repeat(self): index = period_range('20010101', periods=2) @@ -687,25 +685,25 @@ def test_pindex_multiples(self): expected = PeriodIndex(['2011-01', '2011-03', '2011-05', '2011-07', '2011-09', '2011-11'], freq='2M') tm.assert_index_equal(pi, expected) - self.assertEqual(pi.freq, offsets.MonthEnd(2)) - self.assertEqual(pi.freqstr, '2M') + assert pi.freq == offsets.MonthEnd(2) + assert pi.freqstr == '2M' pi = period_range(start='1/1/11', end='12/31/11', freq='2M') tm.assert_index_equal(pi, expected) - self.assertEqual(pi.freq, offsets.MonthEnd(2)) - self.assertEqual(pi.freqstr, '2M') + assert pi.freq == offsets.MonthEnd(2) + assert pi.freqstr == '2M' pi = period_range(start='1/1/11', periods=6, freq='2M') tm.assert_index_equal(pi, expected) - self.assertEqual(pi.freq, offsets.MonthEnd(2)) - self.assertEqual(pi.freqstr, '2M') + assert pi.freq == offsets.MonthEnd(2) + assert pi.freqstr == '2M' def test_iteration(self): index = PeriodIndex(start='1/1/10', periods=4, freq='B') result = list(index) assert isinstance(result[0], Period) - self.assertEqual(result[0].freq, index.freq) + assert result[0].freq == index.freq def test_is_full(self): index = PeriodIndex([2005, 2007, 2009], freq='A') @@ -757,14 +755,14 @@ def test_append_concat(self): # drops index result = pd.concat([s1, s2]) assert isinstance(result.index, PeriodIndex) - self.assertEqual(result.index[0], s1.index[0]) + assert result.index[0] == s1.index[0] def test_pickle_freq(self): # GH2891 prng = period_range('1/1/2011', '1/1/2012', freq='M') new_prng = tm.round_trip_pickle(prng) - self.assertEqual(new_prng.freq, offsets.MonthEnd()) - self.assertEqual(new_prng.freqstr, 'M') + assert new_prng.freq == offsets.MonthEnd() + assert new_prng.freqstr == 'M' def test_map(self): index = PeriodIndex([2005, 2007, 2009], freq='A') diff --git a/pandas/tests/indexes/period/test_setops.py b/pandas/tests/indexes/period/test_setops.py index e1fdc85d670d4..025ee7e732a7c 100644 --- a/pandas/tests/indexes/period/test_setops.py +++ b/pandas/tests/indexes/period/test_setops.py @@ -24,7 +24,7 @@ def test_joins(self): joined = index.join(index[:-5], how=kind) assert isinstance(joined, PeriodIndex) - self.assertEqual(joined.freq, index.freq) + assert joined.freq == index.freq def test_join_self(self): index = period_range('1/1/2000', '1/20/2000', freq='D') @@ -172,8 +172,8 @@ def test_intersection_cases(self): (rng4, expected4)]: result = base.intersection(rng) tm.assert_index_equal(result, expected) - self.assertEqual(result.name, expected.name) - self.assertEqual(result.freq, expected.freq) + assert result.name == expected.name + assert result.freq == expected.freq # non-monotonic base = PeriodIndex(['2011-01-05', '2011-01-04', '2011-01-02', @@ -198,16 +198,16 @@ def test_intersection_cases(self): (rng4, expected4)]: result = base.intersection(rng) tm.assert_index_equal(result, expected) - self.assertEqual(result.name, expected.name) - self.assertEqual(result.freq, 'D') + assert result.name == expected.name + assert result.freq == 'D' # empty same freq rng = date_range('6/1/2000', '6/15/2000', freq='T') result = rng[0:0].intersection(rng) - self.assertEqual(len(result), 0) + assert len(result) == 0 result = rng.intersection(rng[0:0]) - self.assertEqual(len(result), 0) + assert len(result) == 0 def test_difference(self): # diff diff --git a/pandas/tests/indexes/period/test_tools.py b/pandas/tests/indexes/period/test_tools.py index 60ad8fed32399..9e5994dd54f50 100644 --- a/pandas/tests/indexes/period/test_tools.py +++ b/pandas/tests/indexes/period/test_tools.py @@ -65,7 +65,7 @@ def test_negone_ordinals(self): for freq in freqs: period = Period(ordinal=-1, freq=freq) repr(period) - self.assertEqual(period.year, 1969) + assert period.year == 1969 period = Period(ordinal=-1, freq='B') repr(period) @@ -75,97 +75,79 @@ def test_negone_ordinals(self): class TestTslib(tm.TestCase): def test_intraday_conversion_factors(self): - self.assertEqual(period_asfreq( - 1, get_freq('D'), get_freq('H'), False), 24) - self.assertEqual(period_asfreq( - 1, get_freq('D'), get_freq('T'), False), 1440) - self.assertEqual(period_asfreq( - 1, get_freq('D'), get_freq('S'), False), 86400) - self.assertEqual(period_asfreq(1, get_freq( - 'D'), get_freq('L'), False), 86400000) - self.assertEqual(period_asfreq(1, get_freq( - 'D'), get_freq('U'), False), 86400000000) - self.assertEqual(period_asfreq(1, get_freq( - 'D'), get_freq('N'), False), 86400000000000) - - self.assertEqual(period_asfreq( - 1, get_freq('H'), get_freq('T'), False), 60) - self.assertEqual(period_asfreq( - 1, get_freq('H'), get_freq('S'), False), 3600) - self.assertEqual(period_asfreq(1, get_freq('H'), - get_freq('L'), False), 3600000) - self.assertEqual(period_asfreq(1, get_freq( - 'H'), get_freq('U'), False), 3600000000) - self.assertEqual(period_asfreq(1, get_freq( - 'H'), get_freq('N'), False), 3600000000000) - - self.assertEqual(period_asfreq( - 1, get_freq('T'), get_freq('S'), False), 60) - self.assertEqual(period_asfreq( - 1, get_freq('T'), get_freq('L'), False), 60000) - self.assertEqual(period_asfreq(1, get_freq( - 'T'), get_freq('U'), False), 60000000) - self.assertEqual(period_asfreq(1, get_freq( - 'T'), get_freq('N'), False), 60000000000) - - self.assertEqual(period_asfreq( - 1, get_freq('S'), get_freq('L'), False), 1000) - self.assertEqual(period_asfreq(1, get_freq('S'), - get_freq('U'), False), 1000000) - self.assertEqual(period_asfreq(1, get_freq( - 'S'), get_freq('N'), False), 1000000000) - - self.assertEqual(period_asfreq( - 1, get_freq('L'), get_freq('U'), False), 1000) - self.assertEqual(period_asfreq(1, get_freq('L'), - get_freq('N'), False), 1000000) - - self.assertEqual(period_asfreq( - 1, get_freq('U'), get_freq('N'), False), 1000) + assert period_asfreq(1, get_freq('D'), get_freq('H'), False) == 24 + assert period_asfreq(1, get_freq('D'), get_freq('T'), False) == 1440 + assert period_asfreq(1, get_freq('D'), get_freq('S'), False) == 86400 + assert period_asfreq(1, get_freq('D'), + get_freq('L'), False) == 86400000 + assert period_asfreq(1, get_freq('D'), + get_freq('U'), False) == 86400000000 + assert period_asfreq(1, get_freq('D'), + get_freq('N'), False) == 86400000000000 + + assert period_asfreq(1, get_freq('H'), get_freq('T'), False) == 60 + assert period_asfreq(1, get_freq('H'), get_freq('S'), False) == 3600 + assert period_asfreq(1, get_freq('H'), + get_freq('L'), False) == 3600000 + assert period_asfreq(1, get_freq('H'), + get_freq('U'), False) == 3600000000 + assert period_asfreq(1, get_freq('H'), + get_freq('N'), False) == 3600000000000 + + assert period_asfreq(1, get_freq('T'), get_freq('S'), False) == 60 + assert period_asfreq(1, get_freq('T'), get_freq('L'), False) == 60000 + assert period_asfreq(1, get_freq('T'), + get_freq('U'), False) == 60000000 + assert period_asfreq(1, get_freq('T'), + get_freq('N'), False) == 60000000000 + + assert period_asfreq(1, get_freq('S'), get_freq('L'), False) == 1000 + assert period_asfreq(1, get_freq('S'), + get_freq('U'), False) == 1000000 + assert period_asfreq(1, get_freq('S'), + get_freq('N'), False) == 1000000000 + + assert period_asfreq(1, get_freq('L'), get_freq('U'), False) == 1000 + assert period_asfreq(1, get_freq('L'), + get_freq('N'), False) == 1000000 + + assert period_asfreq(1, get_freq('U'), get_freq('N'), False) == 1000 def test_period_ordinal_start_values(self): # information for 1.1.1970 - self.assertEqual(0, period_ordinal(1970, 1, 1, 0, 0, 0, 0, 0, - get_freq('A'))) - self.assertEqual(0, period_ordinal(1970, 1, 1, 0, 0, 0, 0, 0, - get_freq('M'))) - self.assertEqual(1, period_ordinal(1970, 1, 1, 0, 0, 0, 0, 0, - get_freq('W'))) - self.assertEqual(0, period_ordinal(1970, 1, 1, 0, 0, 0, 0, 0, - get_freq('D'))) - self.assertEqual(0, period_ordinal(1970, 1, 1, 0, 0, 0, 0, 0, - get_freq('B'))) + assert period_ordinal(1970, 1, 1, 0, 0, 0, 0, 0, get_freq('A')) == 0 + assert period_ordinal(1970, 1, 1, 0, 0, 0, 0, 0, get_freq('M')) == 0 + assert period_ordinal(1970, 1, 1, 0, 0, 0, 0, 0, get_freq('W')) == 1 + assert period_ordinal(1970, 1, 1, 0, 0, 0, 0, 0, get_freq('D')) == 0 + assert period_ordinal(1970, 1, 1, 0, 0, 0, 0, 0, get_freq('B')) == 0 def test_period_ordinal_week(self): - self.assertEqual(1, period_ordinal(1970, 1, 4, 0, 0, 0, 0, 0, - get_freq('W'))) - self.assertEqual(2, period_ordinal(1970, 1, 5, 0, 0, 0, 0, 0, - get_freq('W'))) - - self.assertEqual(2284, period_ordinal(2013, 10, 6, 0, 0, 0, 0, 0, - get_freq('W'))) - self.assertEqual(2285, period_ordinal(2013, 10, 7, 0, 0, 0, 0, 0, - get_freq('W'))) + assert period_ordinal(1970, 1, 4, 0, 0, 0, 0, 0, get_freq('W')) == 1 + assert period_ordinal(1970, 1, 5, 0, 0, 0, 0, 0, get_freq('W')) == 2 + assert period_ordinal(2013, 10, 6, 0, + 0, 0, 0, 0, get_freq('W')) == 2284 + assert period_ordinal(2013, 10, 7, 0, + 0, 0, 0, 0, get_freq('W')) == 2285 def test_period_ordinal_business_day(self): # Thursday - self.assertEqual(11415, period_ordinal(2013, 10, 3, 0, 0, 0, 0, 0, - get_freq('B'))) + assert period_ordinal(2013, 10, 3, 0, + 0, 0, 0, 0, get_freq('B')) == 11415 # Friday - self.assertEqual(11416, period_ordinal(2013, 10, 4, 0, 0, 0, 0, 0, - get_freq('B'))) + assert period_ordinal(2013, 10, 4, 0, + 0, 0, 0, 0, get_freq('B')) == 11416 # Saturday - self.assertEqual(11417, period_ordinal(2013, 10, 5, 0, 0, 0, 0, 0, - get_freq('B'))) + assert period_ordinal(2013, 10, 5, 0, + 0, 0, 0, 0, get_freq('B')) == 11417 # Sunday - self.assertEqual(11417, period_ordinal(2013, 10, 6, 0, 0, 0, 0, 0, - get_freq('B'))) + assert period_ordinal(2013, 10, 6, 0, + 0, 0, 0, 0, get_freq('B')) == 11417 # Monday - self.assertEqual(11417, period_ordinal(2013, 10, 7, 0, 0, 0, 0, 0, - get_freq('B'))) + assert period_ordinal(2013, 10, 7, 0, + 0, 0, 0, 0, get_freq('B')) == 11417 # Tuesday - self.assertEqual(11418, period_ordinal(2013, 10, 8, 0, 0, 0, 0, 0, - get_freq('B'))) + assert period_ordinal(2013, 10, 8, 0, + 0, 0, 0, 0, get_freq('B')) == 11418 class TestPeriodIndex(tm.TestCase): @@ -189,7 +171,7 @@ def test_to_timestamp(self): exp_index = date_range('1/1/2001', end='12/31/2009', freq='A-DEC') result = series.to_timestamp(how='end') tm.assert_index_equal(result.index, exp_index) - self.assertEqual(result.name, 'foo') + assert result.name == 'foo' exp_index = date_range('1/1/2001', end='1/1/2009', freq='AS-JAN') result = series.to_timestamp(how='start') @@ -221,7 +203,7 @@ def _get_with_delta(delta, freq='A-DEC'): freq='H') result = series.to_timestamp(how='end') tm.assert_index_equal(result.index, exp_index) - self.assertEqual(result.name, 'foo') + assert result.name == 'foo' def test_to_timestamp_quarterly_bug(self): years = np.arange(1960, 2000).repeat(4) @@ -236,10 +218,10 @@ def test_to_timestamp_quarterly_bug(self): def test_to_timestamp_preserve_name(self): index = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009', name='foo') - self.assertEqual(index.name, 'foo') + assert index.name == 'foo' conv = index.to_timestamp('D') - self.assertEqual(conv.name, 'foo') + assert conv.name == 'foo' def test_to_timestamp_repr_is_code(self): zs = [Timestamp('99-04-17 00:00:00', tz='UTC'), @@ -247,7 +229,7 @@ def test_to_timestamp_repr_is_code(self): Timestamp('2001-04-17 00:00:00', tz='America/Los_Angeles'), Timestamp('2001-04-17 00:00:00', tz=None)] for z in zs: - self.assertEqual(eval(repr(z)), z) + assert eval(repr(z)) == z def test_to_timestamp_pi_nat(self): # GH 7228 @@ -258,16 +240,16 @@ def test_to_timestamp_pi_nat(self): expected = DatetimeIndex([pd.NaT, datetime(2011, 1, 1), datetime(2011, 2, 1)], name='idx') tm.assert_index_equal(result, expected) - self.assertEqual(result.name, 'idx') + assert result.name == 'idx' result2 = result.to_period(freq='M') tm.assert_index_equal(result2, index) - self.assertEqual(result2.name, 'idx') + assert result2.name == 'idx' result3 = result.to_period(freq='3M') exp = PeriodIndex(['NaT', '2011-01', '2011-02'], freq='3M', name='idx') tm.assert_index_equal(result3, exp) - self.assertEqual(result3.freqstr, '3M') + assert result3.freqstr == '3M' msg = ('Frequency must be positive, because it' ' represents span: -2A') @@ -317,13 +299,13 @@ def test_dti_to_period(self): pi2 = dti.to_period(freq='D') pi3 = dti.to_period(freq='3D') - self.assertEqual(pi1[0], Period('Jan 2005', freq='M')) - self.assertEqual(pi2[0], Period('1/31/2005', freq='D')) - self.assertEqual(pi3[0], Period('1/31/2005', freq='3D')) + assert pi1[0] == Period('Jan 2005', freq='M') + assert pi2[0] == Period('1/31/2005', freq='D') + assert pi3[0] == Period('1/31/2005', freq='3D') - self.assertEqual(pi1[-1], Period('Nov 2005', freq='M')) - self.assertEqual(pi2[-1], Period('11/30/2005', freq='D')) - self.assertEqual(pi3[-1], Period('11/30/2005', freq='3D')) + assert pi1[-1] == Period('Nov 2005', freq='M') + assert pi2[-1] == Period('11/30/2005', freq='D') + assert pi3[-1], Period('11/30/2005', freq='3D') tm.assert_index_equal(pi1, period_range('1/1/2005', '11/1/2005', freq='M')) @@ -365,25 +347,25 @@ def test_to_period_quarterlyish(self): for off in offsets: rng = date_range('01-Jan-2012', periods=8, freq=off) prng = rng.to_period() - self.assertEqual(prng.freq, 'Q-DEC') + assert prng.freq == 'Q-DEC' def test_to_period_annualish(self): offsets = ['BA', 'AS', 'BAS'] for off in offsets: rng = date_range('01-Jan-2012', periods=8, freq=off) prng = rng.to_period() - self.assertEqual(prng.freq, 'A-DEC') + assert prng.freq == 'A-DEC' def test_to_period_monthish(self): offsets = ['MS', 'BM'] for off in offsets: rng = date_range('01-Jan-2012', periods=8, freq=off) prng = rng.to_period() - self.assertEqual(prng.freq, 'M') + assert prng.freq == 'M' rng = date_range('01-Jan-2012', periods=8, freq='M') prng = rng.to_period() - self.assertEqual(prng.freq, 'M') + assert prng.freq == 'M' msg = pd.tseries.frequencies._INVALID_FREQ_ERROR with tm.assert_raises_regex(ValueError, msg): @@ -402,7 +384,7 @@ def test_to_timestamp_1703(self): index = period_range('1/1/2012', periods=4, freq='D') result = index.to_timestamp() - self.assertEqual(result[0], Timestamp('1/1/2012')) + assert result[0] == Timestamp('1/1/2012') def test_to_datetime_depr(self): index = period_range('1/1/2012', periods=4, freq='D') @@ -410,7 +392,7 @@ def test_to_datetime_depr(self): with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = index.to_datetime() - self.assertEqual(result[0], Timestamp('1/1/2012')) + assert result[0] == Timestamp('1/1/2012') def test_combine_first(self): # GH 3367 @@ -433,10 +415,10 @@ def test_searchsorted(self): '2014-01-04', '2014-01-05'], freq=freq) p1 = pd.Period('2014-01-01', freq=freq) - self.assertEqual(pidx.searchsorted(p1), 0) + assert pidx.searchsorted(p1) == 0 p2 = pd.Period('2014-01-04', freq=freq) - self.assertEqual(pidx.searchsorted(p2), 3) + assert pidx.searchsorted(p2) == 3 msg = "Input has different freq=H from PeriodIndex" with tm.assert_raises_regex( diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 8ac1ef3e1911b..23c72e511d2b3 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -54,14 +54,14 @@ def create_index(self): def test_new_axis(self): new_index = self.dateIndex[None, :] - self.assertEqual(new_index.ndim, 2) + assert new_index.ndim == 2 assert isinstance(new_index, np.ndarray) def test_copy_and_deepcopy(self): super(TestIndex, self).test_copy_and_deepcopy() new_copy2 = self.intIndex.copy(dtype=int) - self.assertEqual(new_copy2.dtype.kind, 'i') + assert new_copy2.dtype.kind == 'i' def test_constructor(self): # regular instance creation @@ -78,7 +78,7 @@ def test_constructor(self): arr = np.array(self.strIndex) index = Index(arr, copy=True, name='name') assert isinstance(index, Index) - self.assertEqual(index.name, 'name') + assert index.name == 'name' tm.assert_numpy_array_equal(arr, index.values) arr[0] = "SOMEBIGLONGSTRING" self.assertNotEqual(index[0], "SOMEBIGLONGSTRING") @@ -107,11 +107,11 @@ def test_constructor_from_index_datetimetz(self): tz='US/Eastern') result = pd.Index(idx) tm.assert_index_equal(result, idx) - self.assertEqual(result.tz, idx.tz) + assert result.tz == idx.tz result = pd.Index(idx.asobject) tm.assert_index_equal(result, idx) - self.assertEqual(result.tz, idx.tz) + assert result.tz == idx.tz def test_constructor_from_index_timedelta(self): idx = pd.timedelta_range('1 days', freq='D', periods=3) @@ -134,7 +134,7 @@ def test_constructor_from_series_datetimetz(self): tz='US/Eastern') result = pd.Index(pd.Series(idx)) tm.assert_index_equal(result, idx) - self.assertEqual(result.tz, idx.tz) + assert result.tz == idx.tz def test_constructor_from_series_timedelta(self): idx = pd.timedelta_range('1 days', freq='D', periods=3) @@ -172,7 +172,7 @@ def test_constructor_from_series(self): result = DatetimeIndex(df['date'], freq='MS') expected.name = 'date' tm.assert_index_equal(result, expected) - self.assertEqual(df['date'].dtype, object) + assert df['date'].dtype == object exp = pd.Series(['1-1-1990', '2-1-1990', '3-1-1990', '4-1-1990', '5-1-1990'], name='date') @@ -181,7 +181,7 @@ def test_constructor_from_series(self): # GH 6274 # infer freq of same result = pd.infer_freq(df['date']) - self.assertEqual(result, 'MS') + assert result == 'MS' def test_constructor_ndarray_like(self): # GH 5460#issuecomment-44474502 @@ -221,17 +221,17 @@ def test_constructor_int_dtype_nan(self): def test_index_ctor_infer_nan_nat(self): # GH 13467 exp = pd.Float64Index([np.nan, np.nan]) - self.assertEqual(exp.dtype, np.float64) + assert exp.dtype == np.float64 tm.assert_index_equal(Index([np.nan, np.nan]), exp) tm.assert_index_equal(Index(np.array([np.nan, np.nan])), exp) exp = pd.DatetimeIndex([pd.NaT, pd.NaT]) - self.assertEqual(exp.dtype, 'datetime64[ns]') + assert exp.dtype == 'datetime64[ns]' tm.assert_index_equal(Index([pd.NaT, pd.NaT]), exp) tm.assert_index_equal(Index(np.array([pd.NaT, pd.NaT])), exp) exp = pd.DatetimeIndex([pd.NaT, pd.NaT]) - self.assertEqual(exp.dtype, 'datetime64[ns]') + assert exp.dtype == 'datetime64[ns]' for data in [[pd.NaT, np.nan], [np.nan, pd.NaT], [np.nan, np.datetime64('nat')], @@ -240,7 +240,7 @@ def test_index_ctor_infer_nan_nat(self): tm.assert_index_equal(Index(np.array(data, dtype=object)), exp) exp = pd.TimedeltaIndex([pd.NaT, pd.NaT]) - self.assertEqual(exp.dtype, 'timedelta64[ns]') + assert exp.dtype == 'timedelta64[ns]' for data in [[np.nan, np.timedelta64('nat')], [np.timedelta64('nat'), np.nan], @@ -407,7 +407,7 @@ def test_astype(self): # pass on name self.intIndex.name = 'foobar' casted = self.intIndex.astype('i8') - self.assertEqual(casted.name, 'foobar') + assert casted.name == 'foobar' def test_equals_object(self): # same @@ -449,12 +449,12 @@ def test_delete(self): expected = Index(['b', 'c', 'd'], name='idx') result = idx.delete(0) tm.assert_index_equal(result, expected) - self.assertEqual(result.name, expected.name) + assert result.name == expected.name expected = Index(['a', 'b', 'c'], name='idx') result = idx.delete(-1) tm.assert_index_equal(result, expected) - self.assertEqual(result.name, expected.name) + assert result.name == expected.name with pytest.raises((IndexError, ValueError)): # either depending on numpy version @@ -505,11 +505,11 @@ def test_is_(self): def test_asof(self): d = self.dateIndex[0] - self.assertEqual(self.dateIndex.asof(d), d) + assert self.dateIndex.asof(d) == d assert isnull(self.dateIndex.asof(d - timedelta(1))) d = self.dateIndex[-1] - self.assertEqual(self.dateIndex.asof(d + timedelta(1)), d) + assert self.dateIndex.asof(d + timedelta(1)) == d d = self.dateIndex[0].to_pydatetime() assert isinstance(self.dateIndex.asof(d), Timestamp) @@ -518,7 +518,7 @@ def test_asof_datetime_partial(self): idx = pd.date_range('2010-01-01', periods=2, freq='m') expected = Timestamp('2010-02-28') result = idx.asof('2010-02') - self.assertEqual(result, expected) + assert result == expected assert not isinstance(result, Index) def test_nanosecond_index_access(self): @@ -529,12 +529,11 @@ def test_nanosecond_index_access(self): first_value = x.asof(x.index[0]) # this does not yet work, as parsing strings is done via dateutil - # self.assertEqual(first_value, - # x['2013-01-01 00:00:00.000000050+0000']) + # assert first_value == x['2013-01-01 00:00:00.000000050+0000'] exp_ts = np_datetime64_compat('2013-01-01 00:00:00.000000050+0000', 'ns') - self.assertEqual(first_value, x[Timestamp(exp_ts)]) + assert first_value == x[Timestamp(exp_ts)] def test_comparators(self): index = self.dateIndex @@ -564,16 +563,16 @@ def test_booleanindex(self): subIndex = self.strIndex[boolIdx] for i, val in enumerate(subIndex): - self.assertEqual(subIndex.get_loc(val), i) + assert subIndex.get_loc(val) == i subIndex = self.strIndex[list(boolIdx)] for i, val in enumerate(subIndex): - self.assertEqual(subIndex.get_loc(val), i) + assert subIndex.get_loc(val) == i def test_fancy(self): sl = self.strIndex[[1, 2, 3]] for i in sl: - self.assertEqual(i, sl[sl.get_loc(i)]) + assert i == sl[sl.get_loc(i)] def test_empty_fancy(self): empty_farr = np.array([], dtype=np.float_) @@ -598,7 +597,7 @@ def test_getitem(self): exp = self.dateIndex[5] exp = _to_m8(exp) - self.assertEqual(exp, arr[5]) + assert exp == arr[5] def test_intersection(self): first = self.strIndex[:20] @@ -616,14 +615,14 @@ def test_intersection(self): expected2 = Index([3, 4, 5], name='idx') result2 = idx1.intersection(idx2) tm.assert_index_equal(result2, expected2) - self.assertEqual(result2.name, expected2.name) + assert result2.name == expected2.name # if target name is different, it will be reset idx3 = Index([3, 4, 5, 6, 7], name='other') expected3 = Index([3, 4, 5], name=None) result3 = idx1.intersection(idx3) tm.assert_index_equal(result3, expected3) - self.assertEqual(result3.name, expected3.name) + assert result3.name == expected3.name # non monotonic idx1 = Index([5, 3, 2, 4, 1], name='idx') @@ -655,7 +654,7 @@ def test_intersection(self): first.name = 'A' second.name = 'A' intersect = first.intersection(second) - self.assertEqual(intersect.name, 'A') + assert intersect.name == 'A' second.name = 'B' intersect = first.intersection(second) @@ -838,7 +837,7 @@ def test_append_empty_preserve_name(self): right = Index([1, 2, 3], name='foo') result = left.append(right) - self.assertEqual(result.name, 'foo') + assert result.name == 'foo' left = Index([], name='foo') right = Index([1, 2, 3], name='bar') @@ -872,22 +871,22 @@ def test_difference(self): result = first.difference(second) assert tm.equalContents(result, answer) - self.assertEqual(result.name, None) + assert result.name is None # same names second.name = 'name' result = first.difference(second) - self.assertEqual(result.name, 'name') + assert result.name == 'name' # with empty result = first.difference([]) assert tm.equalContents(result, first) - self.assertEqual(result.name, first.name) + assert result.name == first.name - # with everythin + # with everything result = first.difference(first) - self.assertEqual(len(result), 0) - self.assertEqual(result.name, first.name) + assert len(result) == 0 + assert result.name == first.name def test_symmetric_difference(self): # smoke @@ -931,11 +930,11 @@ def test_symmetric_difference(self): expected = Index([1, 5]) result = idx1.symmetric_difference(idx2) assert tm.equalContents(result, expected) - self.assertEqual(result.name, 'idx1') + assert result.name == 'idx1' result = idx1.symmetric_difference(idx2, result_name='new_name') assert tm.equalContents(result, expected) - self.assertEqual(result.name, 'new_name') + assert result.name == 'new_name' def test_is_numeric(self): assert not self.dateIndex.is_numeric() @@ -978,19 +977,19 @@ def test_format(self): index = Index([now]) formatted = index.format() expected = [str(index[0])] - self.assertEqual(formatted, expected) + assert formatted == expected # 2845 index = Index([1, 2.0 + 3.0j, np.nan]) formatted = index.format() expected = [str(index[0]), str(index[1]), u('NaN')] - self.assertEqual(formatted, expected) + assert formatted == expected # is this really allowed? index = Index([1, 2.0 + 3.0j, None]) formatted = index.format() expected = [str(index[0]), str(index[1]), u('NaN')] - self.assertEqual(formatted, expected) + assert formatted == expected self.strIndex[:0].format() @@ -1000,15 +999,15 @@ def test_format_with_name_time_info(self): dates = Index([dt + inc for dt in self.dateIndex], name='something') formatted = dates.format(name=True) - self.assertEqual(formatted[0], 'something') + assert formatted[0] == 'something' def test_format_datetime_with_time(self): t = Index([datetime(2012, 2, 7), datetime(2012, 2, 7, 23)]) result = t.format() expected = ['2012-02-07 00:00:00', '2012-02-07 23:00:00'] - self.assertEqual(len(result), 2) - self.assertEqual(result, expected) + assert len(result) == 2 + assert result == expected def test_format_none(self): values = ['a', 'b', 'c', None] @@ -1019,8 +1018,8 @@ def test_format_none(self): def test_logical_compat(self): idx = self.create_index() - self.assertEqual(idx.all(), idx.values.all()) - self.assertEqual(idx.any(), idx.values.any()) + assert idx.all() == idx.values.all() + assert idx.any() == idx.values.any() def _check_method_works(self, method): method(self.empty) @@ -1138,17 +1137,17 @@ def test_get_loc(self): idx = pd.Index([0, 1, 2]) all_methods = [None, 'pad', 'backfill', 'nearest'] for method in all_methods: - self.assertEqual(idx.get_loc(1, method=method), 1) + assert idx.get_loc(1, method=method) == 1 if method is not None: - self.assertEqual(idx.get_loc(1, method=method, tolerance=0), 1) + assert idx.get_loc(1, method=method, tolerance=0) == 1 with pytest.raises(TypeError): idx.get_loc([1, 2], method=method) for method, loc in [('pad', 1), ('backfill', 2), ('nearest', 1)]: - self.assertEqual(idx.get_loc(1.1, method), loc) + assert idx.get_loc(1.1, method) == loc for method, loc in [('pad', 1), ('backfill', 2), ('nearest', 1)]: - self.assertEqual(idx.get_loc(1.1, method, tolerance=1), loc) + assert idx.get_loc(1.1, method, tolerance=1) == loc for method in ['pad', 'backfill', 'nearest']: with pytest.raises(KeyError): @@ -1170,26 +1169,26 @@ def test_slice_locs(self): idx = Index(np.array([0, 1, 2, 5, 6, 7, 9, 10], dtype=dtype)) n = len(idx) - self.assertEqual(idx.slice_locs(start=2), (2, n)) - self.assertEqual(idx.slice_locs(start=3), (3, n)) - self.assertEqual(idx.slice_locs(3, 8), (3, 6)) - self.assertEqual(idx.slice_locs(5, 10), (3, n)) - self.assertEqual(idx.slice_locs(end=8), (0, 6)) - self.assertEqual(idx.slice_locs(end=9), (0, 7)) + assert idx.slice_locs(start=2) == (2, n) + assert idx.slice_locs(start=3) == (3, n) + assert idx.slice_locs(3, 8) == (3, 6) + assert idx.slice_locs(5, 10) == (3, n) + assert idx.slice_locs(end=8) == (0, 6) + assert idx.slice_locs(end=9) == (0, 7) # reversed idx2 = idx[::-1] - self.assertEqual(idx2.slice_locs(8, 2), (2, 6)) - self.assertEqual(idx2.slice_locs(7, 3), (2, 5)) + assert idx2.slice_locs(8, 2) == (2, 6) + assert idx2.slice_locs(7, 3) == (2, 5) # float slicing idx = Index(np.array([0, 1, 2, 5, 6, 7, 9, 10], dtype=float)) n = len(idx) - self.assertEqual(idx.slice_locs(5.0, 10.0), (3, n)) - self.assertEqual(idx.slice_locs(4.5, 10.5), (3, 8)) + assert idx.slice_locs(5.0, 10.0) == (3, n) + assert idx.slice_locs(4.5, 10.5) == (3, 8) idx2 = idx[::-1] - self.assertEqual(idx2.slice_locs(8.5, 1.5), (2, 6)) - self.assertEqual(idx2.slice_locs(10.5, -1), (0, n)) + assert idx2.slice_locs(8.5, 1.5) == (2, 6) + assert idx2.slice_locs(10.5, -1) == (0, n) # int slicing with floats # GH 4892, these are all TypeErrors @@ -1206,35 +1205,35 @@ def test_slice_locs(self): def test_slice_locs_dup(self): idx = Index(['a', 'a', 'b', 'c', 'd', 'd']) - self.assertEqual(idx.slice_locs('a', 'd'), (0, 6)) - self.assertEqual(idx.slice_locs(end='d'), (0, 6)) - self.assertEqual(idx.slice_locs('a', 'c'), (0, 4)) - self.assertEqual(idx.slice_locs('b', 'd'), (2, 6)) + assert idx.slice_locs('a', 'd') == (0, 6) + assert idx.slice_locs(end='d') == (0, 6) + assert idx.slice_locs('a', 'c') == (0, 4) + assert idx.slice_locs('b', 'd') == (2, 6) idx2 = idx[::-1] - self.assertEqual(idx2.slice_locs('d', 'a'), (0, 6)) - self.assertEqual(idx2.slice_locs(end='a'), (0, 6)) - self.assertEqual(idx2.slice_locs('d', 'b'), (0, 4)) - self.assertEqual(idx2.slice_locs('c', 'a'), (2, 6)) + assert idx2.slice_locs('d', 'a') == (0, 6) + assert idx2.slice_locs(end='a') == (0, 6) + assert idx2.slice_locs('d', 'b') == (0, 4) + assert idx2.slice_locs('c', 'a') == (2, 6) for dtype in [int, float]: idx = Index(np.array([10, 12, 12, 14], dtype=dtype)) - self.assertEqual(idx.slice_locs(12, 12), (1, 3)) - self.assertEqual(idx.slice_locs(11, 13), (1, 3)) + assert idx.slice_locs(12, 12) == (1, 3) + assert idx.slice_locs(11, 13) == (1, 3) idx2 = idx[::-1] - self.assertEqual(idx2.slice_locs(12, 12), (1, 3)) - self.assertEqual(idx2.slice_locs(13, 11), (1, 3)) + assert idx2.slice_locs(12, 12) == (1, 3) + assert idx2.slice_locs(13, 11) == (1, 3) def test_slice_locs_na(self): idx = Index([np.nan, 1, 2]) pytest.raises(KeyError, idx.slice_locs, start=1.5) pytest.raises(KeyError, idx.slice_locs, end=1.5) - self.assertEqual(idx.slice_locs(1), (1, 3)) - self.assertEqual(idx.slice_locs(np.nan), (0, 3)) + assert idx.slice_locs(1) == (1, 3) + assert idx.slice_locs(np.nan) == (0, 3) idx = Index([0, np.nan, np.nan, 1, 2]) - self.assertEqual(idx.slice_locs(np.nan), (1, 5)) + assert idx.slice_locs(np.nan) == (1, 5) def test_slice_locs_negative_step(self): idx = Index(list('bcdxy')) @@ -1320,13 +1319,13 @@ def test_tuple_union_bug(self): int_idx = idx1.intersection(idx2) # needs to be 1d like idx1 and idx2 expected = idx1[:4] # pandas.Index(sorted(set(idx1) & set(idx2))) - self.assertEqual(int_idx.ndim, 1) + assert int_idx.ndim == 1 tm.assert_index_equal(int_idx, expected) # union broken union_idx = idx1.union(idx2) expected = idx2 - self.assertEqual(union_idx.ndim, 1) + assert union_idx.ndim == 1 tm.assert_index_equal(union_idx, expected) def test_is_monotonic_incomparable(self): @@ -1341,7 +1340,7 @@ def test_get_set_value(self): assert_almost_equal(self.dateIndex.get_value(values, date), values[67]) self.dateIndex.set_value(values, date, 10) - self.assertEqual(values[67], 10) + assert values[67] == 10 def test_isin(self): values = ['foo', 'bar', 'quux'] @@ -1358,8 +1357,8 @@ def test_isin(self): # empty, return dtype bool idx = Index([]) result = idx.isin(values) - self.assertEqual(len(result), 0) - self.assertEqual(result.dtype, np.bool_) + assert len(result) == 0 + assert result.dtype == np.bool_ def test_isin_nan(self): tm.assert_numpy_array_equal(Index(['a', np.nan]).isin([np.nan]), @@ -1423,7 +1422,7 @@ def test_get_level_values(self): def test_slice_keep_name(self): idx = Index(['a', 'b'], name='asdf') - self.assertEqual(idx.name, idx[1:].name) + assert idx.name == idx[1:].name def test_join_self(self): # instance attributes of the form self.Index @@ -1546,28 +1545,28 @@ def test_reindex_preserves_name_if_target_is_list_or_ndarray(self): dt_idx = pd.date_range('20130101', periods=3) idx.name = None - self.assertEqual(idx.reindex([])[0].name, None) - self.assertEqual(idx.reindex(np.array([]))[0].name, None) - self.assertEqual(idx.reindex(idx.tolist())[0].name, None) - self.assertEqual(idx.reindex(idx.tolist()[:-1])[0].name, None) - self.assertEqual(idx.reindex(idx.values)[0].name, None) - self.assertEqual(idx.reindex(idx.values[:-1])[0].name, None) + assert idx.reindex([])[0].name is None + assert idx.reindex(np.array([]))[0].name is None + assert idx.reindex(idx.tolist())[0].name is None + assert idx.reindex(idx.tolist()[:-1])[0].name is None + assert idx.reindex(idx.values)[0].name is None + assert idx.reindex(idx.values[:-1])[0].name is None # Must preserve name even if dtype changes. - self.assertEqual(idx.reindex(dt_idx.values)[0].name, None) - self.assertEqual(idx.reindex(dt_idx.tolist())[0].name, None) + assert idx.reindex(dt_idx.values)[0].name is None + assert idx.reindex(dt_idx.tolist())[0].name is None idx.name = 'foobar' - self.assertEqual(idx.reindex([])[0].name, 'foobar') - self.assertEqual(idx.reindex(np.array([]))[0].name, 'foobar') - self.assertEqual(idx.reindex(idx.tolist())[0].name, 'foobar') - self.assertEqual(idx.reindex(idx.tolist()[:-1])[0].name, 'foobar') - self.assertEqual(idx.reindex(idx.values)[0].name, 'foobar') - self.assertEqual(idx.reindex(idx.values[:-1])[0].name, 'foobar') + assert idx.reindex([])[0].name == 'foobar' + assert idx.reindex(np.array([]))[0].name == 'foobar' + assert idx.reindex(idx.tolist())[0].name == 'foobar' + assert idx.reindex(idx.tolist()[:-1])[0].name == 'foobar' + assert idx.reindex(idx.values)[0].name == 'foobar' + assert idx.reindex(idx.values[:-1])[0].name == 'foobar' # Must preserve name even if dtype changes. - self.assertEqual(idx.reindex(dt_idx.values)[0].name, 'foobar') - self.assertEqual(idx.reindex(dt_idx.tolist())[0].name, 'foobar') + assert idx.reindex(dt_idx.values)[0].name == 'foobar' + assert idx.reindex(dt_idx.tolist())[0].name == 'foobar' def test_reindex_preserves_type_if_target_is_empty_list_or_array(self): # GH7774 @@ -1576,10 +1575,9 @@ def test_reindex_preserves_type_if_target_is_empty_list_or_array(self): def get_reindex_type(target): return idx.reindex(target)[0].dtype.type - self.assertEqual(get_reindex_type([]), np.object_) - self.assertEqual(get_reindex_type(np.array([])), np.object_) - self.assertEqual(get_reindex_type(np.array([], dtype=np.int64)), - np.object_) + assert get_reindex_type([]) == np.object_ + assert get_reindex_type(np.array([])) == np.object_ + assert get_reindex_type(np.array([], dtype=np.int64)) == np.object_ def test_reindex_doesnt_preserve_type_if_target_is_empty_index(self): # GH7774 @@ -1588,14 +1586,14 @@ def test_reindex_doesnt_preserve_type_if_target_is_empty_index(self): def get_reindex_type(target): return idx.reindex(target)[0].dtype.type - self.assertEqual(get_reindex_type(pd.Int64Index([])), np.int64) - self.assertEqual(get_reindex_type(pd.Float64Index([])), np.float64) - self.assertEqual(get_reindex_type(pd.DatetimeIndex([])), np.datetime64) + assert get_reindex_type(pd.Int64Index([])) == np.int64 + assert get_reindex_type(pd.Float64Index([])) == np.float64 + assert get_reindex_type(pd.DatetimeIndex([])) == np.datetime64 reindexed = idx.reindex(pd.MultiIndex( [pd.Int64Index([]), pd.Float64Index([])], [[], []]))[0] - self.assertEqual(reindexed.levels[0].dtype.type, np.int64) - self.assertEqual(reindexed.levels[1].dtype.type, np.float64) + assert reindexed.levels[0].dtype.type == np.int64 + assert reindexed.levels[1].dtype.type == np.float64 def test_groupby(self): idx = Index(range(5)) @@ -1628,8 +1626,8 @@ def test_equals_op_multiindex(self): def test_conversion_preserves_name(self): # GH 10875 i = pd.Index(['01:02:03', '01:02:04'], name='label') - self.assertEqual(i.name, pd.to_datetime(i).name) - self.assertEqual(i.name, pd.to_timedelta(i).name) + assert i.name == pd.to_datetime(i).name + assert i.name == pd.to_timedelta(i).name def test_string_index_repr(self): # py3/py2 repr can differ because of "u" prefix @@ -1644,10 +1642,10 @@ def test_string_index_repr(self): idx = pd.Index(['a', 'bb', 'ccc']) if PY3: expected = u"""Index(['a', 'bb', 'ccc'], dtype='object')""" - self.assertEqual(repr(idx), expected) + assert repr(idx) == expected else: expected = u"""Index([u'a', u'bb', u'ccc'], dtype='object')""" - self.assertEqual(coerce(idx), expected) + assert coerce(idx) == expected # multiple lines idx = pd.Index(['a', 'bb', 'ccc'] * 10) @@ -1658,7 +1656,7 @@ def test_string_index_repr(self): 'a', 'bb', 'ccc', 'a', 'bb', 'ccc'], dtype='object')""" - self.assertEqual(repr(idx), expected) + assert repr(idx) == expected else: expected = u"""\ Index([u'a', u'bb', u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', u'ccc', u'a', @@ -1666,7 +1664,7 @@ def test_string_index_repr(self): u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', u'ccc'], dtype='object')""" - self.assertEqual(coerce(idx), expected) + assert coerce(idx) == expected # truncated idx = pd.Index(['a', 'bb', 'ccc'] * 100) @@ -1677,7 +1675,7 @@ def test_string_index_repr(self): 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc'], dtype='object', length=300)""" - self.assertEqual(repr(idx), expected) + assert repr(idx) == expected else: expected = u"""\ Index([u'a', u'bb', u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', u'ccc', u'a', @@ -1685,16 +1683,16 @@ def test_string_index_repr(self): u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', u'ccc'], dtype='object', length=300)""" - self.assertEqual(coerce(idx), expected) + assert coerce(idx) == expected # short idx = pd.Index([u'あ', u'いい', u'ううう']) if PY3: expected = u"""Index(['あ', 'いい', 'ううう'], dtype='object')""" - self.assertEqual(repr(idx), expected) + assert repr(idx) == expected else: expected = u"""Index([u'あ', u'いい', u'ううう'], dtype='object')""" - self.assertEqual(coerce(idx), expected) + assert coerce(idx) == expected # multiple lines idx = pd.Index([u'あ', u'いい', u'ううう'] * 10) @@ -1706,7 +1704,7 @@ def test_string_index_repr(self): u" 'あ', 'いい', 'ううう', 'あ', 'いい', " u"'ううう'],\n" u" dtype='object')") - self.assertEqual(repr(idx), expected) + assert repr(idx) == expected else: expected = (u"Index([u'あ', u'いい', u'ううう', u'あ', u'いい', " u"u'ううう', u'あ', u'いい', u'ううう', u'あ',\n" @@ -1715,7 +1713,7 @@ def test_string_index_repr(self): u" u'ううう', u'あ', u'いい', u'ううう', u'あ', " u"u'いい', u'ううう', u'あ', u'いい', u'ううう'],\n" u" dtype='object')") - self.assertEqual(coerce(idx), expected) + assert coerce(idx) == expected # truncated idx = pd.Index([u'あ', u'いい', u'ううう'] * 100) @@ -1726,7 +1724,7 @@ def test_string_index_repr(self): u" 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', " u"'ううう', 'あ', 'いい', 'ううう'],\n" u" dtype='object', length=300)") - self.assertEqual(repr(idx), expected) + assert repr(idx) == expected else: expected = (u"Index([u'あ', u'いい', u'ううう', u'あ', u'いい', " u"u'ううう', u'あ', u'いい', u'ううう', u'あ',\n" @@ -1735,7 +1733,7 @@ def test_string_index_repr(self): u"u'いい', u'ううう', u'あ', u'いい', u'ううう'],\n" u" dtype='object', length=300)") - self.assertEqual(coerce(idx), expected) + assert coerce(idx) == expected # Emable Unicode option ----------------------------------------- with cf.option_context('display.unicode.east_asian_width', True): @@ -1745,11 +1743,11 @@ def test_string_index_repr(self): if PY3: expected = (u"Index(['あ', 'いい', 'ううう'], " u"dtype='object')") - self.assertEqual(repr(idx), expected) + assert repr(idx) == expected else: expected = (u"Index([u'あ', u'いい', u'ううう'], " u"dtype='object')") - self.assertEqual(coerce(idx), expected) + assert coerce(idx) == expected # multiple lines idx = pd.Index([u'あ', u'いい', u'ううう'] * 10) @@ -1763,7 +1761,7 @@ def test_string_index_repr(self): u" 'あ', 'いい', 'ううう'],\n" u" dtype='object')""") - self.assertEqual(repr(idx), expected) + assert repr(idx) == expected else: expected = (u"Index([u'あ', u'いい', u'ううう', u'あ', u'いい', " u"u'ううう', u'あ', u'いい',\n" @@ -1775,7 +1773,7 @@ def test_string_index_repr(self): u"u'あ', u'いい', u'ううう'],\n" u" dtype='object')") - self.assertEqual(coerce(idx), expected) + assert coerce(idx) == expected # truncated idx = pd.Index([u'あ', u'いい', u'ううう'] * 100) @@ -1789,7 +1787,7 @@ def test_string_index_repr(self): u" 'ううう'],\n" u" dtype='object', length=300)") - self.assertEqual(repr(idx), expected) + assert repr(idx) == expected else: expected = (u"Index([u'あ', u'いい', u'ううう', u'あ', u'いい', " u"u'ううう', u'あ', u'いい',\n" @@ -1800,7 +1798,7 @@ def test_string_index_repr(self): u" u'いい', u'ううう'],\n" u" dtype='object', length=300)") - self.assertEqual(coerce(idx), expected) + assert coerce(idx) == expected class TestMixedIntIndex(Base, tm.TestCase): @@ -1876,22 +1874,22 @@ def test_copy_name2(self): idx1 = idx.copy() assert idx.equals(idx1) - self.assertEqual(idx.name, 'MyName') - self.assertEqual(idx1.name, 'MyName') + assert idx.name == 'MyName' + assert idx1.name == 'MyName' idx2 = idx.copy(name='NewName') assert idx.equals(idx2) - self.assertEqual(idx.name, 'MyName') - self.assertEqual(idx2.name, 'NewName') + assert idx.name == 'MyName' + assert idx2.name == 'NewName' idx3 = idx.copy(names=['NewName']) assert idx.equals(idx3) - self.assertEqual(idx.name, 'MyName') - self.assertEqual(idx.names, ['MyName']) - self.assertEqual(idx3.name, 'NewName') - self.assertEqual(idx3.names, ['NewName']) + assert idx.name == 'MyName' + assert idx.names == ['MyName'] + assert idx3.name == 'NewName' + assert idx3.names == ['NewName'] def test_union_base(self): idx = self.create_index() @@ -1960,8 +1958,8 @@ def test_symmetric_difference(self): def test_logical_compat(self): idx = self.create_index() - self.assertEqual(idx.all(), idx.values.all()) - self.assertEqual(idx.any(), idx.values.any()) + assert idx.all() == idx.values.all() + assert idx.any() == idx.values.any() def test_dropna(self): # GH 6194 @@ -2074,4 +2072,4 @@ def test_intersect_str_dates(self): i2 = Index(['aa'], dtype=object) res = i2.intersection(i1) - self.assertEqual(len(res), 0) + assert len(res) == 0 diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py index 7b2d27c9b51a4..6a2eea0b84b72 100644 --- a/pandas/tests/indexes/test_category.py +++ b/pandas/tests/indexes/test_category.py @@ -198,8 +198,8 @@ def test_min_max(self): ci = self.create_index(ordered=True) - self.assertEqual(ci.min(), 'c') - self.assertEqual(ci.max(), 'b') + assert ci.min() == 'c' + assert ci.max() == 'b' def test_map(self): ci = pd.CategoricalIndex(list('ABABC'), categories=list('CBA'), @@ -450,8 +450,8 @@ def test_get_loc(self): # GH 12531 cidx1 = CategoricalIndex(list('abcde'), categories=list('edabc')) idx1 = Index(list('abcde')) - self.assertEqual(cidx1.get_loc('a'), idx1.get_loc('a')) - self.assertEqual(cidx1.get_loc('e'), idx1.get_loc('e')) + assert cidx1.get_loc('a') == idx1.get_loc('a') + assert cidx1.get_loc('e') == idx1.get_loc('e') for i in [cidx1, idx1]: with pytest.raises(KeyError): @@ -468,8 +468,8 @@ def test_get_loc(self): True, False, True])) # unique element results in scalar res = cidx2.get_loc('e') - self.assertEqual(res, idx2.get_loc('e')) - self.assertEqual(res, 4) + assert res == idx2.get_loc('e') + assert res == 4 for i in [cidx2, idx2]: with pytest.raises(KeyError): @@ -481,12 +481,12 @@ def test_get_loc(self): # results in slice res = cidx3.get_loc('a') - self.assertEqual(res, idx3.get_loc('a')) - self.assertEqual(res, slice(0, 2, None)) + assert res == idx3.get_loc('a') + assert res == slice(0, 2, None) res = cidx3.get_loc('b') - self.assertEqual(res, idx3.get_loc('b')) - self.assertEqual(res, slice(2, 5, None)) + assert res == idx3.get_loc('b') + assert res == slice(2, 5, None) for i in [cidx3, idx3]: with pytest.raises(KeyError): @@ -612,10 +612,10 @@ def test_string_categorical_index_repr(self): idx = pd.CategoricalIndex(['a', 'bb', 'ccc']) if PY3: expected = u"""CategoricalIndex(['a', 'bb', 'ccc'], categories=['a', 'bb', 'ccc'], ordered=False, dtype='category')""" # noqa - self.assertEqual(repr(idx), expected) + assert repr(idx) == expected else: expected = u"""CategoricalIndex([u'a', u'bb', u'ccc'], categories=[u'a', u'bb', u'ccc'], ordered=False, dtype='category')""" # noqa - self.assertEqual(unicode(idx), expected) + assert unicode(idx) == expected # multiple lines idx = pd.CategoricalIndex(['a', 'bb', 'ccc'] * 10) @@ -625,7 +625,7 @@ def test_string_categorical_index_repr(self): 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc'], categories=['a', 'bb', 'ccc'], ordered=False, dtype='category')""" # noqa - self.assertEqual(repr(idx), expected) + assert repr(idx) == expected else: expected = u"""CategoricalIndex([u'a', u'bb', u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', u'ccc', u'a', @@ -633,7 +633,7 @@ def test_string_categorical_index_repr(self): u'a', u'bb', u'ccc', u'a', u'bb', u'ccc'], categories=[u'a', u'bb', u'ccc'], ordered=False, dtype='category')""" # noqa - self.assertEqual(unicode(idx), expected) + assert unicode(idx) == expected # truncated idx = pd.CategoricalIndex(['a', 'bb', 'ccc'] * 100) @@ -643,7 +643,7 @@ def test_string_categorical_index_repr(self): 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc'], categories=['a', 'bb', 'ccc'], ordered=False, dtype='category', length=300)""" # noqa - self.assertEqual(repr(idx), expected) + assert repr(idx) == expected else: expected = u"""CategoricalIndex([u'a', u'bb', u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', u'ccc', u'a', @@ -652,7 +652,7 @@ def test_string_categorical_index_repr(self): u'bb', u'ccc'], categories=[u'a', u'bb', u'ccc'], ordered=False, dtype='category', length=300)""" # noqa - self.assertEqual(unicode(idx), expected) + assert unicode(idx) == expected # larger categories idx = pd.CategoricalIndex(list('abcdefghijklmmo')) @@ -661,22 +661,22 @@ def test_string_categorical_index_repr(self): 'm', 'm', 'o'], categories=['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', ...], ordered=False, dtype='category')""" # noqa - self.assertEqual(repr(idx), expected) + assert repr(idx) == expected else: expected = u"""CategoricalIndex([u'a', u'b', u'c', u'd', u'e', u'f', u'g', u'h', u'i', u'j', u'k', u'l', u'm', u'm', u'o'], categories=[u'a', u'b', u'c', u'd', u'e', u'f', u'g', u'h', ...], ordered=False, dtype='category')""" # noqa - self.assertEqual(unicode(idx), expected) + assert unicode(idx) == expected # short idx = pd.CategoricalIndex([u'あ', u'いい', u'ううう']) if PY3: expected = u"""CategoricalIndex(['あ', 'いい', 'ううう'], categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category')""" # noqa - self.assertEqual(repr(idx), expected) + assert repr(idx) == expected else: expected = u"""CategoricalIndex([u'あ', u'いい', u'ううう'], categories=[u'あ', u'いい', u'ううう'], ordered=False, dtype='category')""" # noqa - self.assertEqual(unicode(idx), expected) + assert unicode(idx) == expected # multiple lines idx = pd.CategoricalIndex([u'あ', u'いい', u'ううう'] * 10) @@ -686,7 +686,7 @@ def test_string_categorical_index_repr(self): 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう'], categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category')""" # noqa - self.assertEqual(repr(idx), expected) + assert repr(idx) == expected else: expected = u"""CategoricalIndex([u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', @@ -694,7 +694,7 @@ def test_string_categorical_index_repr(self): u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう'], categories=[u'あ', u'いい', u'ううう'], ordered=False, dtype='category')""" # noqa - self.assertEqual(unicode(idx), expected) + assert unicode(idx) == expected # truncated idx = pd.CategoricalIndex([u'あ', u'いい', u'ううう'] * 100) @@ -704,7 +704,7 @@ def test_string_categorical_index_repr(self): 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう'], categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category', length=300)""" # noqa - self.assertEqual(repr(idx), expected) + assert repr(idx) == expected else: expected = u"""CategoricalIndex([u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', @@ -713,7 +713,7 @@ def test_string_categorical_index_repr(self): u'いい', u'ううう'], categories=[u'あ', u'いい', u'ううう'], ordered=False, dtype='category', length=300)""" # noqa - self.assertEqual(unicode(idx), expected) + assert unicode(idx) == expected # larger categories idx = pd.CategoricalIndex(list(u'あいうえおかきくけこさしすせそ')) @@ -722,13 +722,13 @@ def test_string_categorical_index_repr(self): 'す', 'せ', 'そ'], categories=['あ', 'い', 'う', 'え', 'お', 'か', 'き', 'く', ...], ordered=False, dtype='category')""" # noqa - self.assertEqual(repr(idx), expected) + assert repr(idx) == expected else: expected = u"""CategoricalIndex([u'あ', u'い', u'う', u'え', u'お', u'か', u'き', u'く', u'け', u'こ', u'さ', u'し', u'す', u'せ', u'そ'], categories=[u'あ', u'い', u'う', u'え', u'お', u'か', u'き', u'く', ...], ordered=False, dtype='category')""" # noqa - self.assertEqual(unicode(idx), expected) + assert unicode(idx) == expected # Emable Unicode option ----------------------------------------- with cf.option_context('display.unicode.east_asian_width', True): @@ -737,10 +737,10 @@ def test_string_categorical_index_repr(self): idx = pd.CategoricalIndex([u'あ', u'いい', u'ううう']) if PY3: expected = u"""CategoricalIndex(['あ', 'いい', 'ううう'], categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category')""" # noqa - self.assertEqual(repr(idx), expected) + assert repr(idx) == expected else: expected = u"""CategoricalIndex([u'あ', u'いい', u'ううう'], categories=[u'あ', u'いい', u'ううう'], ordered=False, dtype='category')""" # noqa - self.assertEqual(unicode(idx), expected) + assert unicode(idx) == expected # multiple lines idx = pd.CategoricalIndex([u'あ', u'いい', u'ううう'] * 10) @@ -751,7 +751,7 @@ def test_string_categorical_index_repr(self): 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう'], categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category')""" # noqa - self.assertEqual(repr(idx), expected) + assert repr(idx) == expected else: expected = u"""CategoricalIndex([u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', @@ -760,7 +760,7 @@ def test_string_categorical_index_repr(self): u'いい', u'ううう', u'あ', u'いい', u'ううう'], categories=[u'あ', u'いい', u'ううう'], ordered=False, dtype='category')""" # noqa - self.assertEqual(unicode(idx), expected) + assert unicode(idx) == expected # truncated idx = pd.CategoricalIndex([u'あ', u'いい', u'ううう'] * 100) @@ -772,7 +772,7 @@ def test_string_categorical_index_repr(self): 'あ', 'いい', 'ううう'], categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category', length=300)""" # noqa - self.assertEqual(repr(idx), expected) + assert repr(idx) == expected else: expected = u"""CategoricalIndex([u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', @@ -781,7 +781,7 @@ def test_string_categorical_index_repr(self): u'ううう', u'あ', u'いい', u'ううう'], categories=[u'あ', u'いい', u'ううう'], ordered=False, dtype='category', length=300)""" # noqa - self.assertEqual(unicode(idx), expected) + assert unicode(idx) == expected # larger categories idx = pd.CategoricalIndex(list(u'あいうえおかきくけこさしすせそ')) @@ -790,13 +790,13 @@ def test_string_categorical_index_repr(self): 'さ', 'し', 'す', 'せ', 'そ'], categories=['あ', 'い', 'う', 'え', 'お', 'か', 'き', 'く', ...], ordered=False, dtype='category')""" # noqa - self.assertEqual(repr(idx), expected) + assert repr(idx) == expected else: expected = u"""CategoricalIndex([u'あ', u'い', u'う', u'え', u'お', u'か', u'き', u'く', u'け', u'こ', u'さ', u'し', u'す', u'せ', u'そ'], categories=[u'あ', u'い', u'う', u'え', u'お', u'か', u'き', u'く', ...], ordered=False, dtype='category')""" # noqa - self.assertEqual(unicode(idx), expected) + assert unicode(idx) == expected def test_fillna_categorical(self): # GH 11343 diff --git a/pandas/tests/indexes/test_interval.py b/pandas/tests/indexes/test_interval.py index 815fefa813a9d..00897f290f292 100644 --- a/pandas/tests/indexes/test_interval.py +++ b/pandas/tests/indexes/test_interval.py @@ -118,15 +118,15 @@ def f(): def test_properties(self): index = self.index - self.assertEqual(len(index), 2) - self.assertEqual(index.size, 2) - self.assertEqual(index.shape, (2, )) + assert len(index) == 2 + assert index.size == 2 + assert index.shape == (2, ) tm.assert_index_equal(index.left, Index([0, 1])) tm.assert_index_equal(index.right, Index([1, 2])) tm.assert_index_equal(index.mid, Index([0.5, 1.5])) - self.assertEqual(index.closed, 'right') + assert index.closed == 'right' expected = np.array([Interval(0, 1), Interval(1, 2)], dtype=object) tm.assert_numpy_array_equal(np.asarray(index), expected) @@ -134,15 +134,15 @@ def test_properties(self): # with nans index = self.index_with_nan - self.assertEqual(len(index), 3) - self.assertEqual(index.size, 3) - self.assertEqual(index.shape, (3, )) + assert len(index) == 3 + assert index.size == 3 + assert index.shape == (3, ) tm.assert_index_equal(index.left, Index([0, np.nan, 1])) tm.assert_index_equal(index.right, Index([1, np.nan, 2])) tm.assert_index_equal(index.mid, Index([0.5, np.nan, 1.5])) - self.assertEqual(index.closed, 'right') + assert index.closed == 'right' expected = np.array([Interval(0, 1), np.nan, Interval(1, 2)], dtype=object) @@ -285,7 +285,7 @@ def test_repr(self): "\n right=[1, 2]," "\n closed='right'," "\n dtype='interval[int64]')") - self.assertEqual(repr(i), expected) + assert repr(i) == expected i = IntervalIndex.from_tuples((Timestamp('20130101'), Timestamp('20130102')), @@ -296,7 +296,7 @@ def test_repr(self): "\n right=['2013-01-02', '2013-01-03']," "\n closed='right'," "\n dtype='interval[datetime64[ns]]')") - self.assertEqual(repr(i), expected) + assert repr(i) == expected @pytest.mark.xfail(reason='not a valid repr as we use interval notation') def test_repr_max_seq_item_setting(self): @@ -328,21 +328,21 @@ def test_get_item(self): def test_get_loc_value(self): pytest.raises(KeyError, self.index.get_loc, 0) - self.assertEqual(self.index.get_loc(0.5), 0) - self.assertEqual(self.index.get_loc(1), 0) - self.assertEqual(self.index.get_loc(1.5), 1) - self.assertEqual(self.index.get_loc(2), 1) + assert self.index.get_loc(0.5) == 0 + assert self.index.get_loc(1) == 0 + assert self.index.get_loc(1.5) == 1 + assert self.index.get_loc(2) == 1 pytest.raises(KeyError, self.index.get_loc, -1) pytest.raises(KeyError, self.index.get_loc, 3) idx = IntervalIndex.from_tuples([(0, 2), (1, 3)]) - self.assertEqual(idx.get_loc(0.5), 0) - self.assertEqual(idx.get_loc(1), 0) + assert idx.get_loc(0.5) == 0 + assert idx.get_loc(1) == 0 tm.assert_numpy_array_equal(idx.get_loc(1.5), np.array([0, 1], dtype='int64')) tm.assert_numpy_array_equal(np.sort(idx.get_loc(2)), np.array([0, 1], dtype='int64')) - self.assertEqual(idx.get_loc(3), 1) + assert idx.get_loc(3) == 1 pytest.raises(KeyError, idx.get_loc, 3.5) idx = IntervalIndex.from_arrays([0, 2], [1, 3]) @@ -351,29 +351,29 @@ def test_get_loc_value(self): def slice_locs_cases(self, breaks): # TODO: same tests for more index types index = IntervalIndex.from_breaks([0, 1, 2], closed='right') - self.assertEqual(index.slice_locs(), (0, 2)) - self.assertEqual(index.slice_locs(0, 1), (0, 1)) - self.assertEqual(index.slice_locs(1, 1), (0, 1)) - self.assertEqual(index.slice_locs(0, 2), (0, 2)) - self.assertEqual(index.slice_locs(0.5, 1.5), (0, 2)) - self.assertEqual(index.slice_locs(0, 0.5), (0, 1)) - self.assertEqual(index.slice_locs(start=1), (0, 2)) - self.assertEqual(index.slice_locs(start=1.2), (1, 2)) - self.assertEqual(index.slice_locs(end=1), (0, 1)) - self.assertEqual(index.slice_locs(end=1.1), (0, 2)) - self.assertEqual(index.slice_locs(end=1.0), (0, 1)) - self.assertEqual(*index.slice_locs(-1, -1)) + assert index.slice_locs() == (0, 2) + assert index.slice_locs(0, 1) == (0, 1) + assert index.slice_locs(1, 1) == (0, 1) + assert index.slice_locs(0, 2) == (0, 2) + assert index.slice_locs(0.5, 1.5) == (0, 2) + assert index.slice_locs(0, 0.5) == (0, 1) + assert index.slice_locs(start=1) == (0, 2) + assert index.slice_locs(start=1.2) == (1, 2) + assert index.slice_locs(end=1) == (0, 1) + assert index.slice_locs(end=1.1) == (0, 2) + assert index.slice_locs(end=1.0) == (0, 1) + assert index.slice_locs(-1, -1) == (0, 0) index = IntervalIndex.from_breaks([0, 1, 2], closed='neither') - self.assertEqual(index.slice_locs(0, 1), (0, 1)) - self.assertEqual(index.slice_locs(0, 2), (0, 2)) - self.assertEqual(index.slice_locs(0.5, 1.5), (0, 2)) - self.assertEqual(index.slice_locs(1, 1), (1, 1)) - self.assertEqual(index.slice_locs(1, 2), (1, 2)) + assert index.slice_locs(0, 1) == (0, 1) + assert index.slice_locs(0, 2) == (0, 2) + assert index.slice_locs(0.5, 1.5) == (0, 2) + assert index.slice_locs(1, 1) == (1, 1) + assert index.slice_locs(1, 2) == (1, 2) index = IntervalIndex.from_breaks([0, 1, 2], closed='both') - self.assertEqual(index.slice_locs(1, 1), (0, 2)) - self.assertEqual(index.slice_locs(1, 2), (0, 2)) + assert index.slice_locs(1, 1) == (0, 2) + assert index.slice_locs(1, 2) == (0, 2) def test_slice_locs_int64(self): self.slice_locs_cases([0, 1, 2]) @@ -383,14 +383,16 @@ def test_slice_locs_float64(self): def slice_locs_decreasing_cases(self, tuples): index = IntervalIndex.from_tuples(tuples) - self.assertEqual(index.slice_locs(1.5, 0.5), (1, 3)) - self.assertEqual(index.slice_locs(2, 0), (1, 3)) - self.assertEqual(index.slice_locs(2, 1), (1, 3)) - self.assertEqual(index.slice_locs(3, 1.1), (0, 3)) - self.assertEqual(index.slice_locs(3, 3), (0, 2)) - self.assertEqual(index.slice_locs(3.5, 3.3), (0, 1)) - self.assertEqual(index.slice_locs(1, -3), (2, 3)) - self.assertEqual(*index.slice_locs(-1, -1)) + assert index.slice_locs(1.5, 0.5) == (1, 3) + assert index.slice_locs(2, 0) == (1, 3) + assert index.slice_locs(2, 1) == (1, 3) + assert index.slice_locs(3, 1.1) == (0, 3) + assert index.slice_locs(3, 3) == (0, 2) + assert index.slice_locs(3.5, 3.3) == (0, 1) + assert index.slice_locs(1, -3) == (2, 3) + + slice_locs = index.slice_locs(-1, -1) + assert slice_locs[0] == slice_locs[1] def test_slice_locs_decreasing_int64(self): self.slice_locs_cases([(2, 4), (1, 3), (0, 2)]) @@ -404,9 +406,9 @@ def test_slice_locs_fails(self): index.slice_locs(1, 2) def test_get_loc_interval(self): - self.assertEqual(self.index.get_loc(Interval(0, 1)), 0) - self.assertEqual(self.index.get_loc(Interval(0, 0.5)), 0) - self.assertEqual(self.index.get_loc(Interval(0, 1, 'left')), 0) + assert self.index.get_loc(Interval(0, 1)) == 0 + assert self.index.get_loc(Interval(0, 0.5)) == 0 + assert self.index.get_loc(Interval(0, 1, 'left')) == 0 pytest.raises(KeyError, self.index.get_loc, Interval(2, 3)) pytest.raises(KeyError, self.index.get_loc, Interval(-1, 0, 'left')) diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index 714e901532ed9..a840711e37fb0 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -128,35 +128,35 @@ def test_numpy_repeat(self): def test_set_name_methods(self): # so long as these are synonyms, we don't need to test set_names - self.assertEqual(self.index.rename, self.index.set_names) + assert self.index.rename == self.index.set_names new_names = [name + "SUFFIX" for name in self.index_names] ind = self.index.set_names(new_names) - self.assertEqual(self.index.names, self.index_names) - self.assertEqual(ind.names, new_names) + assert self.index.names == self.index_names + assert ind.names == new_names with tm.assert_raises_regex(ValueError, "^Length"): ind.set_names(new_names + new_names) new_names2 = [name + "SUFFIX2" for name in new_names] res = ind.set_names(new_names2, inplace=True) assert res is None - self.assertEqual(ind.names, new_names2) + assert ind.names == new_names2 # set names for specific level (# GH7792) ind = self.index.set_names(new_names[0], level=0) - self.assertEqual(self.index.names, self.index_names) - self.assertEqual(ind.names, [new_names[0], self.index_names[1]]) + assert self.index.names == self.index_names + assert ind.names == [new_names[0], self.index_names[1]] res = ind.set_names(new_names2[0], level=0, inplace=True) assert res is None - self.assertEqual(ind.names, [new_names2[0], self.index_names[1]]) + assert ind.names == [new_names2[0], self.index_names[1]] # set names for multiple levels ind = self.index.set_names(new_names, level=[0, 1]) - self.assertEqual(self.index.names, self.index_names) - self.assertEqual(ind.names, new_names) + assert self.index.names == self.index_names + assert ind.names == new_names res = ind.set_names(new_names2, level=[0, 1], inplace=True) assert res is None - self.assertEqual(ind.names, new_names2) + assert ind.names == new_names2 def test_set_levels(self): # side note - you probably wouldn't want to use levels and labels @@ -167,7 +167,7 @@ def test_set_levels(self): def assert_matching(actual, expected, check_dtype=False): # avoid specifying internal representation # as much as possible - self.assertEqual(len(actual), len(expected)) + assert len(actual) == len(expected) for act, exp in zip(actual, expected): act = np.asarray(act) exp = np.asarray(exp) @@ -256,7 +256,7 @@ def test_set_labels(self): def assert_matching(actual, expected): # avoid specifying internal representation # as much as possible - self.assertEqual(len(actual), len(expected)) + assert len(actual) == len(expected) for act, exp in zip(actual, expected): act = np.asarray(act) exp = np.asarray(exp, dtype=np.int8) @@ -439,12 +439,12 @@ def test_copy_in_constructor(self): val = labels[0] mi = MultiIndex(levels=[levels, levels], labels=[labels, labels], copy=True) - self.assertEqual(mi.labels[0][0], val) + assert mi.labels[0][0] == val labels[0] = 15 - self.assertEqual(mi.labels[0][0], val) + assert mi.labels[0][0] == val val = levels[0] levels[0] = "PANDA" - self.assertEqual(mi.levels[0][0], val) + assert mi.levels[0][0] == val def test_set_value_keeps_names(self): # motivating example from #3742 @@ -457,10 +457,10 @@ def test_set_value_keeps_names(self): index=idx) df = df.sort_index() assert df.is_copy is None - self.assertEqual(df.index.names, ('Name', 'Number')) + assert df.index.names == ('Name', 'Number') df = df.set_value(('grethe', '4'), 'one', 99.34) assert df.is_copy is None - self.assertEqual(df.index.names, ('Name', 'Number')) + assert df.index.names == ('Name', 'Number') def test_copy_names(self): # Check that adding a "names" parameter to the copy is honored @@ -469,27 +469,27 @@ def test_copy_names(self): multi_idx1 = multi_idx.copy() assert multi_idx.equals(multi_idx1) - self.assertEqual(multi_idx.names, ['MyName1', 'MyName2']) - self.assertEqual(multi_idx1.names, ['MyName1', 'MyName2']) + assert multi_idx.names == ['MyName1', 'MyName2'] + assert multi_idx1.names == ['MyName1', 'MyName2'] multi_idx2 = multi_idx.copy(names=['NewName1', 'NewName2']) assert multi_idx.equals(multi_idx2) - self.assertEqual(multi_idx.names, ['MyName1', 'MyName2']) - self.assertEqual(multi_idx2.names, ['NewName1', 'NewName2']) + assert multi_idx.names == ['MyName1', 'MyName2'] + assert multi_idx2.names == ['NewName1', 'NewName2'] multi_idx3 = multi_idx.copy(name=['NewName1', 'NewName2']) assert multi_idx.equals(multi_idx3) - self.assertEqual(multi_idx.names, ['MyName1', 'MyName2']) - self.assertEqual(multi_idx3.names, ['NewName1', 'NewName2']) + assert multi_idx.names == ['MyName1', 'MyName2'] + assert multi_idx3.names == ['NewName1', 'NewName2'] def test_names(self): # names are assigned in __init__ names = self.index_names level_names = [level.name for level in self.index.levels] - self.assertEqual(names, level_names) + assert names == level_names # setting bad names on existing index = self.index @@ -515,7 +515,7 @@ def test_names(self): index.names = ["a", "b"] ind_names = list(index.names) level_names = [level.name for level in index.levels] - self.assertEqual(ind_names, level_names) + assert ind_names == level_names def test_reference_duplicate_name(self): idx = MultiIndex.from_tuples( @@ -623,7 +623,7 @@ def test_view(self): self.assert_multiindex_copied(i_view, self.index) def check_level_names(self, index, names): - self.assertEqual([level.name for level in index.levels], list(names)) + assert [level.name for level in index.levels] == list(names) def test_changing_names(self): @@ -656,8 +656,8 @@ def test_duplicate_names(self): def test_get_level_number_integer(self): self.index.names = [1, 0] - self.assertEqual(self.index._get_level_number(1), 0) - self.assertEqual(self.index._get_level_number(0), 1) + assert self.index._get_level_number(1) == 0 + assert self.index._get_level_number(0) == 1 pytest.raises(IndexError, self.index._get_level_number, 2) tm.assert_raises_regex(KeyError, 'Level fourth not found', self.index._get_level_number, 'fourth') @@ -668,7 +668,7 @@ def test_from_arrays(self): arrays.append(np.asarray(lev).take(lab)) result = MultiIndex.from_arrays(arrays) - self.assertEqual(list(result), list(self.index)) + assert list(result) == list(self.index) # infer correctly result = MultiIndex.from_arrays([[pd.NaT, Timestamp('20130101')], @@ -819,7 +819,7 @@ def test_from_product(self): expected = MultiIndex.from_tuples(tuples, names=names) tm.assert_index_equal(result, expected) - self.assertEqual(result.names, names) + assert result.names == names def test_from_product_empty(self): # 0 levels @@ -914,7 +914,7 @@ def test_append_mixed_dtypes(self): [1.1, np.nan, 3.3], ['a', 'b', 'c'], dti, dti_tz, pi]) - self.assertEqual(mi.nlevels, 6) + assert mi.nlevels == 6 res = mi.append(mi) exp = MultiIndex.from_arrays([[1, 2, 3, 1, 2, 3], @@ -943,7 +943,7 @@ def test_get_level_values(self): expected = Index(['foo', 'foo', 'bar', 'baz', 'qux', 'qux'], name='first') tm.assert_index_equal(result, expected) - self.assertEqual(result.name, 'first') + assert result.name == 'first' result = self.index.get_level_values('first') expected = self.index.get_level_values(0) @@ -989,7 +989,7 @@ def test_get_level_values_na(self): arrays = [[], []] index = pd.MultiIndex.from_arrays(arrays) values = index.get_level_values(0) - self.assertEqual(values.shape, (0, )) + assert values.shape == (0, ) def test_reorder_levels(self): # this blows up @@ -997,13 +997,13 @@ def test_reorder_levels(self): self.index.reorder_levels, [2, 1, 0]) def test_nlevels(self): - self.assertEqual(self.index.nlevels, 2) + assert self.index.nlevels == 2 def test_iter(self): result = list(self.index) expected = [('foo', 'one'), ('foo', 'two'), ('bar', 'one'), ('baz', 'two'), ('qux', 'one'), ('qux', 'two')] - self.assertEqual(result, expected) + assert result == expected def test_legacy_pickle(self): if PY3: @@ -1089,7 +1089,7 @@ def test_is_numeric(self): def test_getitem(self): # scalar - self.assertEqual(self.index[2], ('bar', 'one')) + assert self.index[2] == ('bar', 'one') # slice result = self.index[2:5] @@ -1105,12 +1105,12 @@ def test_getitem(self): def test_getitem_group_select(self): sorted_idx, _ = self.index.sortlevel(0) - self.assertEqual(sorted_idx.get_loc('baz'), slice(3, 4)) - self.assertEqual(sorted_idx.get_loc('foo'), slice(0, 2)) + assert sorted_idx.get_loc('baz') == slice(3, 4) + assert sorted_idx.get_loc('foo') == slice(0, 2) def test_get_loc(self): - self.assertEqual(self.index.get_loc(('foo', 'two')), 1) - self.assertEqual(self.index.get_loc(('baz', 'two')), 3) + assert self.index.get_loc(('foo', 'two')) == 1 + assert self.index.get_loc(('baz', 'two')) == 3 pytest.raises(KeyError, self.index.get_loc, ('bar', 'two')) pytest.raises(KeyError, self.index.get_loc, 'quux') @@ -1122,19 +1122,19 @@ def test_get_loc(self): lrange(4))], labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array( [0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])]) pytest.raises(KeyError, index.get_loc, (1, 1)) - self.assertEqual(index.get_loc((2, 0)), slice(3, 5)) + assert index.get_loc((2, 0)) == slice(3, 5) def test_get_loc_duplicates(self): index = Index([2, 2, 2, 2]) result = index.get_loc(2) expected = slice(0, 4) - self.assertEqual(result, expected) + assert result == expected # pytest.raises(Exception, index.get_loc, 2) index = Index(['c', 'a', 'a', 'b', 'b']) rs = index.get_loc('c') xp = 0 - assert (rs == xp) + assert rs == xp def test_get_value_duplicates(self): index = MultiIndex(levels=[['D', 'B', 'C'], @@ -1155,12 +1155,12 @@ def test_get_loc_level(self): loc, new_index = index.get_loc_level((0, 1)) expected = slice(1, 2) exp_index = index[expected].droplevel(0).droplevel(0) - self.assertEqual(loc, expected) + assert loc == expected assert new_index.equals(exp_index) loc, new_index = index.get_loc_level((0, 1, 0)) expected = 1 - self.assertEqual(loc, expected) + assert loc == expected assert new_index is None pytest.raises(KeyError, index.get_loc_level, (2, 2)) @@ -1169,7 +1169,7 @@ def test_get_loc_level(self): [0, 0, 0, 0]), np.array([0, 1, 2, 3])]) result, new_index = index.get_loc_level((2000, slice(None, None))) expected = slice(None, None) - self.assertEqual(result, expected) + assert result == expected assert new_index.equals(index.droplevel(0)) def test_slice_locs(self): @@ -1225,16 +1225,16 @@ def test_slice_locs_partial(self): sorted_idx, _ = self.index.sortlevel(0) result = sorted_idx.slice_locs(('foo', 'two'), ('qux', 'one')) - self.assertEqual(result, (1, 5)) + assert result == (1, 5) result = sorted_idx.slice_locs(None, ('qux', 'one')) - self.assertEqual(result, (0, 5)) + assert result == (0, 5) result = sorted_idx.slice_locs(('foo', 'two'), None) - self.assertEqual(result, (1, len(sorted_idx))) + assert result == (1, len(sorted_idx)) result = sorted_idx.slice_locs('bar', 'baz') - self.assertEqual(result, (2, 4)) + assert result == (2, 4) def test_slice_locs_not_contained(self): # some searchsorted action @@ -1244,22 +1244,22 @@ def test_slice_locs_not_contained(self): [0, 1, 2, 1, 2, 2, 0, 1, 2]], sortorder=0) result = index.slice_locs((1, 0), (5, 2)) - self.assertEqual(result, (3, 6)) + assert result == (3, 6) result = index.slice_locs(1, 5) - self.assertEqual(result, (3, 6)) + assert result == (3, 6) result = index.slice_locs((2, 2), (5, 2)) - self.assertEqual(result, (3, 6)) + assert result == (3, 6) result = index.slice_locs(2, 5) - self.assertEqual(result, (3, 6)) + assert result == (3, 6) result = index.slice_locs((1, 0), (6, 3)) - self.assertEqual(result, (3, 8)) + assert result == (3, 8) result = index.slice_locs(-1, 10) - self.assertEqual(result, (0, len(index))) + assert result == (0, len(index)) def test_consistency(self): # need to construct an overflow @@ -1374,7 +1374,7 @@ def test_hash_collisions(self): for i in [0, 1, len(index) - 2, len(index) - 1]: result = index.get_loc(index[i]) - self.assertEqual(result, i) + assert result == i def test_format(self): self.index.format() @@ -1391,7 +1391,7 @@ def test_format_sparse_display(self): [0, 1, 0, 0, 1, 0], [0, 0, 0, 0, 0, 0]]) result = index.format() - self.assertEqual(result[3], '1 0 0 0') + assert result[3] == '1 0 0 0' def test_format_sparse_config(self): warn_filters = warnings.filters @@ -1401,7 +1401,7 @@ def test_format_sparse_config(self): pd.set_option('display.multi_sparse', False) result = self.index.format() - self.assertEqual(result[1], 'foo two') + assert result[1] == 'foo two' tm.reset_display_options() @@ -1452,7 +1452,7 @@ def test_to_hierarchical(self): labels=[[0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1], [0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1]]) tm.assert_index_equal(result, expected) - self.assertEqual(result.names, index.names) + assert result.names == index.names # K > 1 result = index.to_hierarchical(3, 2) @@ -1460,7 +1460,7 @@ def test_to_hierarchical(self): labels=[[0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1], [0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1]]) tm.assert_index_equal(result, expected) - self.assertEqual(result.names, index.names) + assert result.names == index.names # non-sorted index = MultiIndex.from_tuples([(2, 'c'), (1, 'b'), @@ -1474,7 +1474,7 @@ def test_to_hierarchical(self): (2, 'b'), (2, 'b')], names=['N1', 'N2']) tm.assert_index_equal(result, expected) - self.assertEqual(result.names, index.names) + assert result.names == index.names def test_bounds(self): self.index._bounds @@ -1655,35 +1655,35 @@ def test_difference(self): assert isinstance(result, MultiIndex) assert result.equals(expected) - self.assertEqual(result.names, self.index.names) + assert result.names == self.index.names # empty difference: reflexive result = self.index.difference(self.index) expected = self.index[:0] assert result.equals(expected) - self.assertEqual(result.names, self.index.names) + assert result.names == self.index.names # empty difference: superset result = self.index[-3:].difference(self.index) expected = self.index[:0] assert result.equals(expected) - self.assertEqual(result.names, self.index.names) + assert result.names == self.index.names # empty difference: degenerate result = self.index[:0].difference(self.index) expected = self.index[:0] assert result.equals(expected) - self.assertEqual(result.names, self.index.names) + assert result.names == self.index.names # names not the same chunklet = self.index[-3:] chunklet.names = ['foo', 'baz'] result = first.difference(chunklet) - self.assertEqual(result.names, (None, None)) + assert result.names == (None, None) # empty, but non-equal result = self.index.difference(self.index.sortlevel(1)[0]) - self.assertEqual(len(result), 0) + assert len(result) == 0 # raise Exception called with non-MultiIndex result = first.difference(first.values) @@ -1692,14 +1692,14 @@ def test_difference(self): # name from empty array result = first.difference([]) assert first.equals(result) - self.assertEqual(first.names, result.names) + assert first.names == result.names # name from non-empty array result = first.difference([('foo', 'one')]) expected = pd.MultiIndex.from_tuples([('bar', 'one'), ('baz', 'two'), ( 'foo', 'two'), ('qux', 'one'), ('qux', 'two')]) expected.names = first.names - self.assertEqual(first.names, result.names) + assert first.names == result.names tm.assert_raises_regex(TypeError, "other must be a MultiIndex " "or a list of tuples", first.difference, [1, 2, 3, 4, 5]) @@ -1710,7 +1710,7 @@ def test_from_tuples(self): MultiIndex.from_tuples, []) idx = MultiIndex.from_tuples(((1, 2), (3, 4)), names=['a', 'b']) - self.assertEqual(len(idx), 2) + assert len(idx) == 2 def test_argsort(self): result = self.index.argsort() @@ -1824,14 +1824,14 @@ def test_drop(self): def test_droplevel_with_names(self): index = self.index[self.index.get_loc('foo')] dropped = index.droplevel(0) - self.assertEqual(dropped.name, 'second') + assert dropped.name == 'second' index = MultiIndex(levels=[Index(lrange(4)), Index(lrange(4)), Index( lrange(4))], labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array( [0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])], names=['one', 'two', 'three']) dropped = index.droplevel(0) - self.assertEqual(dropped.names, ('two', 'three')) + assert dropped.names == ('two', 'three') dropped = index.droplevel('two') expected = index.droplevel(1) @@ -1873,7 +1873,7 @@ def test_insert(self): # key contained in all levels new_index = self.index.insert(0, ('bar', 'two')) assert new_index.equal_levels(self.index) - self.assertEqual(new_index[0], ('bar', 'two')) + assert new_index[0] == ('bar', 'two') # key not contained in all levels new_index = self.index.insert(0, ('abc', 'three')) @@ -1883,7 +1883,7 @@ def test_insert(self): exp1 = Index(list(self.index.levels[1]) + ['three'], name='second') tm.assert_index_equal(new_index.levels[1], exp1) - self.assertEqual(new_index[0], ('abc', 'three')) + assert new_index[0] == ('abc', 'three') # key wrong length msg = "Item must have length equal to number of levels" @@ -1937,7 +1937,7 @@ def test_insert(self): def test_take_preserve_name(self): taken = self.index.take([3, 0, 1]) - self.assertEqual(taken.names, self.index.names) + assert taken.names == self.index.names def test_take_fill_value(self): # GH 12631 @@ -2203,7 +2203,7 @@ def check(nlevels, with_nulls): for a in [101, 102]: mi = MultiIndex.from_arrays([[101, a], [3.5, np.nan]]) assert not mi.has_duplicates - self.assertEqual(mi.get_duplicates(), []) + assert mi.get_duplicates() == [] tm.assert_numpy_array_equal(mi.duplicated(), np.zeros( 2, dtype='bool')) @@ -2213,9 +2213,9 @@ def check(nlevels, with_nulls): lab = product(range(-1, n), range(-1, m)) mi = MultiIndex(levels=[list('abcde')[:n], list('WXYZ')[:m]], labels=np.random.permutation(list(lab)).T) - self.assertEqual(len(mi), (n + 1) * (m + 1)) + assert len(mi) == (n + 1) * (m + 1) assert not mi.has_duplicates - self.assertEqual(mi.get_duplicates(), []) + assert mi.get_duplicates() == [] tm.assert_numpy_array_equal(mi.duplicated(), np.zeros( len(mi), dtype='bool')) @@ -2228,7 +2228,7 @@ def test_duplicate_meta_data(self): index.set_names([None, 'Num']), index.set_names(['Upper', 'Num']), ]: assert idx.has_duplicates - self.assertEqual(idx.drop_duplicates().names, idx.names) + assert idx.drop_duplicates().names == idx.names def test_get_unique_index(self): idx = self.index[[0, 1, 0, 1, 1, 0, 0]] @@ -2274,7 +2274,7 @@ def test_unique_datetimelike(self): def test_tolist(self): result = self.index.tolist() exp = list(self.index.values) - self.assertEqual(result, exp) + assert result == exp def test_repr_with_unicode_data(self): with pd.core.config.option_context("display.encoding", 'UTF-8'): @@ -2294,10 +2294,8 @@ def test_repr_roundtrip(self): result = eval(repr(mi)) # string coerces to unicode tm.assert_index_equal(result, mi, exact=False) - self.assertEqual( - mi.get_level_values('first').inferred_type, 'string') - self.assertEqual( - result.get_level_values('first').inferred_type, 'unicode') + assert mi.get_level_values('first').inferred_type == 'string' + assert result.get_level_values('first').inferred_type == 'unicode' mi_u = MultiIndex.from_product( [list(u'ab'), range(3)], names=['first', 'second']) @@ -2313,7 +2311,6 @@ def test_repr_roundtrip(self): # long format mi = MultiIndex.from_product([list('abcdefg'), range(10)], names=['first', 'second']) - result = str(mi) if PY3: tm.assert_index_equal(eval(repr(mi)), mi, exact=True) @@ -2321,13 +2318,9 @@ def test_repr_roundtrip(self): result = eval(repr(mi)) # string coerces to unicode tm.assert_index_equal(result, mi, exact=False) - self.assertEqual( - mi.get_level_values('first').inferred_type, 'string') - self.assertEqual( - result.get_level_values('first').inferred_type, 'unicode') + assert mi.get_level_values('first').inferred_type == 'string' + assert result.get_level_values('first').inferred_type == 'unicode' - mi = MultiIndex.from_product( - [list(u'abcdefg'), range(10)], names=['first', 'second']) result = eval(repr(mi_u)) tm.assert_index_equal(result, mi_u, exact=True) @@ -2356,7 +2349,7 @@ def test_bytestring_with_unicode(self): def test_slice_keep_name(self): x = MultiIndex.from_tuples([('a', 'b'), (1, 2), ('c', 'd')], names=['x', 'y']) - self.assertEqual(x[1:].names, x.names) + assert x[1:].names == x.names def test_isnull_behavior(self): # should not segfault GH5123 @@ -2510,8 +2503,8 @@ def test_isin(self): # empty, return dtype bool idx = MultiIndex.from_arrays([[], []]) result = idx.isin(values) - self.assertEqual(len(result), 0) - self.assertEqual(result.dtype, np.bool_) + assert len(result) == 0 + assert result.dtype == np.bool_ def test_isin_nan(self): idx = MultiIndex.from_arrays([['foo', 'bar'], [1.0, np.nan]]) @@ -2556,39 +2549,33 @@ def test_reindex_preserves_names_when_target_is_list_or_ndarray(self): other_dtype = pd.MultiIndex.from_product([[1, 2], [3, 4]]) # list & ndarray cases - self.assertEqual(idx.reindex([])[0].names, [None, None]) - self.assertEqual(idx.reindex(np.array([]))[0].names, [None, None]) - self.assertEqual(idx.reindex(target.tolist())[0].names, [None, None]) - self.assertEqual(idx.reindex(target.values)[0].names, [None, None]) - self.assertEqual( - idx.reindex(other_dtype.tolist())[0].names, [None, None]) - self.assertEqual( - idx.reindex(other_dtype.values)[0].names, [None, None]) + assert idx.reindex([])[0].names == [None, None] + assert idx.reindex(np.array([]))[0].names == [None, None] + assert idx.reindex(target.tolist())[0].names == [None, None] + assert idx.reindex(target.values)[0].names == [None, None] + assert idx.reindex(other_dtype.tolist())[0].names == [None, None] + assert idx.reindex(other_dtype.values)[0].names == [None, None] idx.names = ['foo', 'bar'] - self.assertEqual(idx.reindex([])[0].names, ['foo', 'bar']) - self.assertEqual(idx.reindex(np.array([]))[0].names, ['foo', 'bar']) - self.assertEqual(idx.reindex(target.tolist())[0].names, ['foo', 'bar']) - self.assertEqual(idx.reindex(target.values)[0].names, ['foo', 'bar']) - self.assertEqual( - idx.reindex(other_dtype.tolist())[0].names, ['foo', 'bar']) - self.assertEqual( - idx.reindex(other_dtype.values)[0].names, ['foo', 'bar']) + assert idx.reindex([])[0].names == ['foo', 'bar'] + assert idx.reindex(np.array([]))[0].names == ['foo', 'bar'] + assert idx.reindex(target.tolist())[0].names == ['foo', 'bar'] + assert idx.reindex(target.values)[0].names == ['foo', 'bar'] + assert idx.reindex(other_dtype.tolist())[0].names == ['foo', 'bar'] + assert idx.reindex(other_dtype.values)[0].names == ['foo', 'bar'] def test_reindex_lvl_preserves_names_when_target_is_list_or_array(self): # GH7774 idx = pd.MultiIndex.from_product([[0, 1], ['a', 'b']], names=['foo', 'bar']) - self.assertEqual(idx.reindex([], level=0)[0].names, ['foo', 'bar']) - self.assertEqual(idx.reindex([], level=1)[0].names, ['foo', 'bar']) + assert idx.reindex([], level=0)[0].names == ['foo', 'bar'] + assert idx.reindex([], level=1)[0].names == ['foo', 'bar'] def test_reindex_lvl_preserves_type_if_target_is_empty_list_or_array(self): # GH7774 idx = pd.MultiIndex.from_product([[0, 1], ['a', 'b']]) - self.assertEqual(idx.reindex([], level=0)[0].levels[0].dtype.type, - np.int64) - self.assertEqual(idx.reindex([], level=1)[0].levels[1].dtype.type, - np.object_) + assert idx.reindex([], level=0)[0].levels[0].dtype.type == np.int64 + assert idx.reindex([], level=1)[0].levels[1].dtype.type == np.object_ def test_groupby(self): groups = self.index.groupby(np.array([1, 1, 1, 2, 2, 2])) @@ -2781,7 +2768,7 @@ def test_unsortedindex(self): with pytest.raises(UnsortedIndexError): df.loc(axis=0)['z', :] df.sort_index(inplace=True) - self.assertEqual(len(df.loc(axis=0)['z', :]), 2) + assert len(df.loc(axis=0)['z', :]) == 2 with pytest.raises(KeyError): df.loc(axis=0)['q', :] diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py index 68a329a7f741f..19bca875e650d 100644 --- a/pandas/tests/indexes/test_numeric.py +++ b/pandas/tests/indexes/test_numeric.py @@ -216,15 +216,15 @@ def test_constructor(self): assert isinstance(index, Float64Index) index = Float64Index(np.array([1., 2, 3, 4, 5])) assert isinstance(index, Float64Index) - self.assertEqual(index.dtype, float) + assert index.dtype == float index = Float64Index(np.array([1., 2, 3, 4, 5]), dtype=np.float32) assert isinstance(index, Float64Index) - self.assertEqual(index.dtype, np.float64) + assert index.dtype == np.float64 index = Float64Index(np.array([1, 2, 3, 4, 5]), dtype=np.float32) assert isinstance(index, Float64Index) - self.assertEqual(index.dtype, np.float64) + assert index.dtype == np.float64 # nan handling result = Float64Index([np.nan, np.nan]) @@ -336,13 +336,13 @@ def test_get_indexer(self): def test_get_loc(self): idx = Float64Index([0.0, 1.0, 2.0]) for method in [None, 'pad', 'backfill', 'nearest']: - self.assertEqual(idx.get_loc(1, method), 1) + assert idx.get_loc(1, method) == 1 if method is not None: - self.assertEqual(idx.get_loc(1, method, tolerance=0), 1) + assert idx.get_loc(1, method, tolerance=0) == 1 for method, loc in [('pad', 1), ('backfill', 2), ('nearest', 1)]: - self.assertEqual(idx.get_loc(1.1, method), loc) - self.assertEqual(idx.get_loc(1.1, method, tolerance=0.9), loc) + assert idx.get_loc(1.1, method) == loc + assert idx.get_loc(1.1, method, tolerance=0.9) == loc pytest.raises(KeyError, idx.get_loc, 'foo') pytest.raises(KeyError, idx.get_loc, 1.5) @@ -354,21 +354,21 @@ def test_get_loc(self): def test_get_loc_na(self): idx = Float64Index([np.nan, 1, 2]) - self.assertEqual(idx.get_loc(1), 1) - self.assertEqual(idx.get_loc(np.nan), 0) + assert idx.get_loc(1) == 1 + assert idx.get_loc(np.nan) == 0 idx = Float64Index([np.nan, 1, np.nan]) - self.assertEqual(idx.get_loc(1), 1) + assert idx.get_loc(1) == 1 # representable by slice [0:2:2] # pytest.raises(KeyError, idx.slice_locs, np.nan) sliced = idx.slice_locs(np.nan) assert isinstance(sliced, tuple) - self.assertEqual(sliced, (0, 3)) + assert sliced == (0, 3) # not representable by slice idx = Float64Index([np.nan, 1, np.nan, np.nan]) - self.assertEqual(idx.get_loc(1), 1) + assert idx.get_loc(1) == 1 pytest.raises(KeyError, idx.slice_locs, np.nan) def test_contains_nans(self): @@ -400,7 +400,7 @@ def test_astype_from_object(self): index = Index([1.0, np.nan, 0.2], dtype='object') result = index.astype(float) expected = Float64Index([1.0, np.nan, 0.2]) - self.assertEqual(result.dtype, expected.dtype) + assert result.dtype == expected.dtype tm.assert_index_equal(result, expected) def test_fillna_float64(self): @@ -454,7 +454,7 @@ def test_view(self): i = self._holder([], name='Foo') i_view = i.view() - self.assertEqual(i_view.name, 'Foo') + assert i_view.name == 'Foo' i_view = i.view(self._dtype) tm.assert_index_equal(i, self._holder(i_view, name='Foo')) @@ -478,8 +478,8 @@ def test_is_monotonic(self): def test_logical_compat(self): idx = self.create_index() - self.assertEqual(idx.all(), idx.values.all()) - self.assertEqual(idx.any(), idx.values.any()) + assert idx.all() == idx.values.all() + assert idx.any() == idx.values.any() def test_identical(self): i = Index(self.index.copy()) @@ -546,12 +546,12 @@ def test_view_index(self): def test_prevent_casting(self): result = self.index.astype('O') - self.assertEqual(result.dtype, np.object_) + assert result.dtype == np.object_ def test_take_preserve_name(self): index = self._holder([1, 2, 3, 4], name='foo') taken = index.take([3, 0, 1]) - self.assertEqual(index.name, taken.name) + assert index.name == taken.name def test_take_fill_value(self): # see gh-12631 @@ -584,7 +584,7 @@ def test_take_fill_value(self): def test_slice_keep_name(self): idx = self._holder([1, 2], name='asdf') - self.assertEqual(idx.name, idx[1:].name) + assert idx.name == idx[1:].name def test_ufunc_coercions(self): idx = self._holder([1, 2, 3, 4, 5], name='x') @@ -666,7 +666,7 @@ def test_constructor(self): def test_constructor_corner(self): arr = np.array([1, 2, 3, 4], dtype=object) index = Int64Index(arr) - self.assertEqual(index.values.dtype, np.int64) + assert index.values.dtype == np.int64 tm.assert_index_equal(index, Index(arr)) # preventing casting diff --git a/pandas/tests/indexes/test_range.py b/pandas/tests/indexes/test_range.py index 49536be1aa57c..0379718b004e1 100644 --- a/pandas/tests/indexes/test_range.py +++ b/pandas/tests/indexes/test_range.py @@ -70,22 +70,22 @@ def test_constructor(self): index = RangeIndex(5) expected = np.arange(5, dtype=np.int64) assert isinstance(index, RangeIndex) - self.assertEqual(index._start, 0) - self.assertEqual(index._stop, 5) - self.assertEqual(index._step, 1) - self.assertEqual(index.name, None) + assert index._start == 0 + assert index._stop == 5 + assert index._step == 1 + assert index.name is None tm.assert_index_equal(Index(expected), index) index = RangeIndex(1, 5) expected = np.arange(1, 5, dtype=np.int64) assert isinstance(index, RangeIndex) - self.assertEqual(index._start, 1) + assert index._start == 1 tm.assert_index_equal(Index(expected), index) index = RangeIndex(1, 5, 2) expected = np.arange(1, 5, 2, dtype=np.int64) assert isinstance(index, RangeIndex) - self.assertEqual(index._step, 2) + assert index._step == 2 tm.assert_index_equal(Index(expected), index) msg = "RangeIndex\\(\\.\\.\\.\\) must be called with integers" @@ -96,9 +96,9 @@ def test_constructor(self): RangeIndex(0, 0)]: expected = np.empty(0, dtype=np.int64) assert isinstance(index, RangeIndex) - self.assertEqual(index._start, 0) - self.assertEqual(index._stop, 0) - self.assertEqual(index._step, 1) + assert index._start == 0 + assert index._stop == 0 + assert index._step == 1 tm.assert_index_equal(Index(expected), index) with tm.assert_raises_regex(TypeError, msg): @@ -109,7 +109,7 @@ def test_constructor(self): RangeIndex(stop=0, name='Foo'), RangeIndex(0, 0, name='Foo')]: assert isinstance(index, RangeIndex) - self.assertEqual(index.name, 'Foo') + assert index.name == 'Foo' # we don't allow on a bare Index pytest.raises(TypeError, lambda: Index(0, 1000)) @@ -246,7 +246,7 @@ def test_numeric_compat2(self): def test_constructor_corner(self): arr = np.array([1, 2, 3, 4], dtype=object) index = RangeIndex(1, 5) - self.assertEqual(index.values.dtype, np.int64) + assert index.values.dtype == np.int64 tm.assert_index_equal(index, Index(arr)) # non-int raise Exception @@ -261,10 +261,10 @@ def test_copy(self): i_copy = i.copy() assert i_copy is not i assert i_copy.identical(i) - self.assertEqual(i_copy._start, 0) - self.assertEqual(i_copy._stop, 5) - self.assertEqual(i_copy._step, 1) - self.assertEqual(i_copy.name, 'Foo') + assert i_copy._start == 0 + assert i_copy._stop == 5 + assert i_copy._step == 1 + assert i_copy.name == 'Foo' def test_repr(self): i = RangeIndex(5, name='Foo') @@ -281,7 +281,7 @@ def test_repr(self): i = RangeIndex(5, 0, -1) result = repr(i) expected = "RangeIndex(start=5, stop=0, step=-1)" - self.assertEqual(result, expected) + assert result == expected result = eval(result) tm.assert_index_equal(result, i, exact=True) @@ -300,12 +300,12 @@ def test_delete(self): expected = idx[1:].astype(int) result = idx.delete(0) tm.assert_index_equal(result, expected) - self.assertEqual(result.name, expected.name) + assert result.name == expected.name expected = idx[:-1].astype(int) result = idx.delete(-1) tm.assert_index_equal(result, expected) - self.assertEqual(result.name, expected.name) + assert result.name == expected.name with pytest.raises((IndexError, ValueError)): # either depending on numpy version @@ -316,7 +316,7 @@ def test_view(self): i = RangeIndex(0, name='Foo') i_view = i.view() - self.assertEqual(i_view.name, 'Foo') + assert i_view.name == 'Foo' i_view = i.view('i8') tm.assert_numpy_array_equal(i.values, i_view) @@ -325,7 +325,7 @@ def test_view(self): tm.assert_index_equal(i, i_view) def test_dtype(self): - self.assertEqual(self.index.dtype, np.int64) + assert self.index.dtype == np.int64 def test_is_monotonic(self): assert self.index.is_monotonic @@ -362,8 +362,8 @@ def test_equals_range(self): def test_logical_compat(self): idx = self.create_index() - self.assertEqual(idx.all(), idx.values.all()) - self.assertEqual(idx.any(), idx.values.any()) + assert idx.all() == idx.values.all() + assert idx.any() == idx.values.any() def test_identical(self): i = Index(self.index.copy()) @@ -636,7 +636,7 @@ def test_intersect_str_dates(self): i2 = Index(['aa'], dtype=object) res = i2.intersection(i1) - self.assertEqual(len(res), 0) + assert len(res) == 0 def test_union_noncomparable(self): from datetime import datetime, timedelta @@ -692,7 +692,7 @@ def test_nbytes(self): # constant memory usage i2 = RangeIndex(0, 10) - self.assertEqual(i.nbytes, i2.nbytes) + assert i.nbytes == i2.nbytes def test_cant_or_shouldnt_cast(self): # can't @@ -706,12 +706,12 @@ def test_view_Index(self): def test_prevent_casting(self): result = self.index.astype('O') - self.assertEqual(result.dtype, np.object_) + assert result.dtype == np.object_ def test_take_preserve_name(self): index = RangeIndex(1, 5, name='foo') taken = index.take([3, 0, 1]) - self.assertEqual(index.name, taken.name) + assert index.name == taken.name def test_take_fill_value(self): # GH 12631 @@ -751,7 +751,7 @@ def test_repr_roundtrip(self): def test_slice_keep_name(self): idx = RangeIndex(1, 2, name='asdf') - self.assertEqual(idx.name, idx[1:].name) + assert idx.name == idx[1:].name def test_explicit_conversions(self): @@ -794,48 +794,48 @@ def test_ufunc_compat(self): def test_extended_gcd(self): result = self.index._extended_gcd(6, 10) - self.assertEqual(result[0], result[1] * 6 + result[2] * 10) - self.assertEqual(2, result[0]) + assert result[0] == result[1] * 6 + result[2] * 10 + assert 2 == result[0] result = self.index._extended_gcd(10, 6) - self.assertEqual(2, result[1] * 10 + result[2] * 6) - self.assertEqual(2, result[0]) + assert 2 == result[1] * 10 + result[2] * 6 + assert 2 == result[0] def test_min_fitting_element(self): result = RangeIndex(0, 20, 2)._min_fitting_element(1) - self.assertEqual(2, result) + assert 2 == result result = RangeIndex(1, 6)._min_fitting_element(1) - self.assertEqual(1, result) + assert 1 == result result = RangeIndex(18, -2, -2)._min_fitting_element(1) - self.assertEqual(2, result) + assert 2 == result result = RangeIndex(5, 0, -1)._min_fitting_element(1) - self.assertEqual(1, result) + assert 1 == result big_num = 500000000000000000000000 result = RangeIndex(5, big_num * 2, 1)._min_fitting_element(big_num) - self.assertEqual(big_num, result) + assert big_num == result def test_max_fitting_element(self): result = RangeIndex(0, 20, 2)._max_fitting_element(17) - self.assertEqual(16, result) + assert 16 == result result = RangeIndex(1, 6)._max_fitting_element(4) - self.assertEqual(4, result) + assert 4 == result result = RangeIndex(18, -2, -2)._max_fitting_element(17) - self.assertEqual(16, result) + assert 16 == result result = RangeIndex(5, 0, -1)._max_fitting_element(4) - self.assertEqual(4, result) + assert 4 == result big_num = 500000000000000000000000 result = RangeIndex(5, big_num * 2, 1)._max_fitting_element(big_num) - self.assertEqual(big_num, result) + assert big_num == result def test_pickle_compat_construction(self): # RangeIndex() is a valid constructor @@ -846,11 +846,11 @@ def test_slice_specialised(self): # scalar indexing res = self.index[1] expected = 2 - self.assertEqual(res, expected) + assert res == expected res = self.index[-1] expected = 18 - self.assertEqual(res, expected) + assert res == expected # slicing # slice value completion @@ -903,19 +903,19 @@ def test_len_specialised(self): arr = np.arange(0, 5, step) i = RangeIndex(0, 5, step) - self.assertEqual(len(i), len(arr)) + assert len(i) == len(arr) i = RangeIndex(5, 0, step) - self.assertEqual(len(i), 0) + assert len(i) == 0 for step in np.arange(-6, -1, 1): arr = np.arange(5, 0, step) i = RangeIndex(5, 0, step) - self.assertEqual(len(i), len(arr)) + assert len(i) == len(arr) i = RangeIndex(0, 5, step) - self.assertEqual(len(i), 0) + assert len(i) == 0 def test_where(self): i = self.create_index() diff --git a/pandas/tests/indexes/timedeltas/test_construction.py b/pandas/tests/indexes/timedeltas/test_construction.py index 6681a03a3b271..bdaa62c5ce221 100644 --- a/pandas/tests/indexes/timedeltas/test_construction.py +++ b/pandas/tests/indexes/timedeltas/test_construction.py @@ -81,8 +81,8 @@ def test_constructor_coverage(self): def test_constructor_name(self): idx = TimedeltaIndex(start='1 days', periods=1, freq='D', name='TEST') - self.assertEqual(idx.name, 'TEST') + assert idx.name == 'TEST' # GH10025 idx2 = TimedeltaIndex(idx, name='something else') - self.assertEqual(idx2.name, 'something else') + assert idx2.name == 'something else' diff --git a/pandas/tests/indexes/timedeltas/test_indexing.py b/pandas/tests/indexes/timedeltas/test_indexing.py index 58b83dde5f402..6ffe3516c4a94 100644 --- a/pandas/tests/indexes/timedeltas/test_indexing.py +++ b/pandas/tests/indexes/timedeltas/test_indexing.py @@ -76,8 +76,8 @@ def test_delete(self): for n, expected in compat.iteritems(cases): result = idx.delete(n) tm.assert_index_equal(result, expected) - self.assertEqual(result.name, expected.name) - self.assertEqual(result.freq, expected.freq) + assert result.name == expected.name + assert result.freq == expected.freq with pytest.raises((IndexError, ValueError)): # either depeidnig on numpy version @@ -103,10 +103,10 @@ def test_delete_slice(self): for n, expected in compat.iteritems(cases): result = idx.delete(n) tm.assert_index_equal(result, expected) - self.assertEqual(result.name, expected.name) - self.assertEqual(result.freq, expected.freq) + assert result.name == expected.name + assert result.freq == expected.freq result = idx.delete(slice(n[0], n[-1] + 1)) tm.assert_index_equal(result, expected) - self.assertEqual(result.name, expected.name) - self.assertEqual(result.freq, expected.freq) + assert result.name == expected.name + assert result.freq == expected.freq diff --git a/pandas/tests/indexes/timedeltas/test_ops.py b/pandas/tests/indexes/timedeltas/test_ops.py index feaec50264872..474dd283530c5 100644 --- a/pandas/tests/indexes/timedeltas/test_ops.py +++ b/pandas/tests/indexes/timedeltas/test_ops.py @@ -35,10 +35,10 @@ def test_asobject_tolist(self): result = idx.asobject assert isinstance(result, Index) - self.assertEqual(result.dtype, object) + assert result.dtype == object tm.assert_index_equal(result, expected) - self.assertEqual(result.name, expected.name) - self.assertEqual(idx.tolist(), expected_list) + assert result.name == expected.name + assert idx.tolist() == expected_list idx = TimedeltaIndex([timedelta(days=1), timedelta(days=2), pd.NaT, timedelta(days=4)], name='idx') @@ -47,10 +47,10 @@ def test_asobject_tolist(self): expected = pd.Index(expected_list, dtype=object, name='idx') result = idx.asobject assert isinstance(result, Index) - self.assertEqual(result.dtype, object) + assert result.dtype == object tm.assert_index_equal(result, expected) - self.assertEqual(result.name, expected.name) - self.assertEqual(idx.tolist(), expected_list) + assert result.name == expected.name + assert idx.tolist() == expected_list def test_minmax(self): @@ -63,10 +63,10 @@ def test_minmax(self): assert not idx2.is_monotonic for idx in [idx1, idx2]: - self.assertEqual(idx.min(), Timedelta('1 days')), - self.assertEqual(idx.max(), Timedelta('3 days')), - self.assertEqual(idx.argmin(), 0) - self.assertEqual(idx.argmax(), 2) + assert idx.min() == Timedelta('1 days') + assert idx.max() == Timedelta('3 days') + assert idx.argmin() == 0 + assert idx.argmax() == 2 for op in ['min', 'max']: # Return NaT @@ -83,15 +83,15 @@ def test_numpy_minmax(self): dr = pd.date_range(start='2016-01-15', end='2016-01-20') td = TimedeltaIndex(np.asarray(dr)) - self.assertEqual(np.min(td), Timedelta('16815 days')) - self.assertEqual(np.max(td), Timedelta('16820 days')) + assert np.min(td) == Timedelta('16815 days') + assert np.max(td) == Timedelta('16820 days') errmsg = "the 'out' parameter is not supported" tm.assert_raises_regex(ValueError, errmsg, np.min, td, out=0) tm.assert_raises_regex(ValueError, errmsg, np.max, td, out=0) - self.assertEqual(np.argmin(td), 0) - self.assertEqual(np.argmax(td), 5) + assert np.argmin(td) == 0 + assert np.argmax(td) == 5 if not _np_version_under1p10: errmsg = "the 'out' parameter is not supported" @@ -114,7 +114,7 @@ def test_round(self): expected_elt = expected_rng[1] tm.assert_index_equal(td.round(freq='H'), expected_rng) - self.assertEqual(elt.round(freq='H'), expected_elt) + assert elt.round(freq='H') == expected_elt msg = pd.tseries.frequencies._INVALID_FREQ_ERROR with tm.assert_raises_regex(ValueError, msg): @@ -152,7 +152,7 @@ def test_representation(self): [exp1, exp2, exp3, exp4, exp5]): for func in ['__repr__', '__unicode__', '__str__']: result = getattr(idx, func)() - self.assertEqual(result, expected) + assert result == expected def test_representation_to_series(self): idx1 = TimedeltaIndex([], freq='D') @@ -184,7 +184,7 @@ def test_representation_to_series(self): for idx, expected in zip([idx1, idx2, idx3, idx4, idx5], [exp1, exp2, exp3, exp4, exp5]): result = repr(pd.Series(idx)) - self.assertEqual(result, expected) + assert result == expected def test_summary(self): # GH9116 @@ -212,7 +212,7 @@ def test_summary(self): for idx, expected in zip([idx1, idx2, idx3, idx4, idx5], [exp1, exp2, exp3, exp4, exp5]): result = idx.summary() - self.assertEqual(result, expected) + assert result == expected def test_add_iadd(self): @@ -355,7 +355,7 @@ def test_subtraction_ops_with_tz(self): td = Timedelta('1 days') def _check(result, expected): - self.assertEqual(result, expected) + assert result == expected assert isinstance(result, Timedelta) # scalars @@ -491,11 +491,11 @@ def test_addition_ops(self): result = dt + td expected = Timestamp('20130102') - self.assertEqual(result, expected) + assert result == expected result = td + dt expected = Timestamp('20130102') - self.assertEqual(result, expected) + assert result == expected def test_comp_nat(self): left = pd.TimedeltaIndex([pd.Timedelta('1 days'), pd.NaT, @@ -582,25 +582,25 @@ def test_order(self): for idx in [idx1, idx2]: ordered = idx.sort_values() tm.assert_index_equal(ordered, idx) - self.assertEqual(ordered.freq, idx.freq) + assert ordered.freq == idx.freq ordered = idx.sort_values(ascending=False) expected = idx[::-1] tm.assert_index_equal(ordered, expected) - self.assertEqual(ordered.freq, expected.freq) - self.assertEqual(ordered.freq.n, -1) + assert ordered.freq == expected.freq + assert ordered.freq.n == -1 ordered, indexer = idx.sort_values(return_indexer=True) tm.assert_index_equal(ordered, idx) tm.assert_numpy_array_equal(indexer, np.array([0, 1, 2]), check_dtype=False) - self.assertEqual(ordered.freq, idx.freq) + assert ordered.freq == idx.freq ordered, indexer = idx.sort_values(return_indexer=True, ascending=False) tm.assert_index_equal(ordered, idx[::-1]) - self.assertEqual(ordered.freq, expected.freq) - self.assertEqual(ordered.freq.n, -1) + assert ordered.freq == expected.freq + assert ordered.freq.n == -1 idx1 = TimedeltaIndex(['1 hour', '3 hour', '5 hour', '2 hour ', '1 hour'], name='idx1') @@ -648,39 +648,39 @@ def test_getitem(self): for idx in [idx1]: result = idx[0] - self.assertEqual(result, pd.Timedelta('1 day')) + assert result == pd.Timedelta('1 day') result = idx[0:5] expected = pd.timedelta_range('1 day', '5 day', freq='D', name='idx') tm.assert_index_equal(result, expected) - self.assertEqual(result.freq, expected.freq) + assert result.freq == expected.freq result = idx[0:10:2] expected = pd.timedelta_range('1 day', '9 day', freq='2D', name='idx') tm.assert_index_equal(result, expected) - self.assertEqual(result.freq, expected.freq) + assert result.freq == expected.freq result = idx[-20:-5:3] expected = pd.timedelta_range('12 day', '24 day', freq='3D', name='idx') tm.assert_index_equal(result, expected) - self.assertEqual(result.freq, expected.freq) + assert result.freq == expected.freq result = idx[4::-1] expected = TimedeltaIndex(['5 day', '4 day', '3 day', '2 day', '1 day'], freq='-1D', name='idx') tm.assert_index_equal(result, expected) - self.assertEqual(result.freq, expected.freq) + assert result.freq == expected.freq def test_drop_duplicates_metadata(self): # GH 10115 idx = pd.timedelta_range('1 day', '31 day', freq='D', name='idx') result = idx.drop_duplicates() tm.assert_index_equal(idx, result) - self.assertEqual(idx.freq, result.freq) + assert idx.freq == result.freq idx_dup = idx.append(idx) assert idx_dup.freq is None # freq is reset @@ -715,28 +715,28 @@ def test_take(self): for idx in [idx1]: result = idx.take([0]) - self.assertEqual(result, pd.Timedelta('1 day')) + assert result == pd.Timedelta('1 day') result = idx.take([-1]) - self.assertEqual(result, pd.Timedelta('31 day')) + assert result == pd.Timedelta('31 day') result = idx.take([0, 1, 2]) expected = pd.timedelta_range('1 day', '3 day', freq='D', name='idx') tm.assert_index_equal(result, expected) - self.assertEqual(result.freq, expected.freq) + assert result.freq == expected.freq result = idx.take([0, 2, 4]) expected = pd.timedelta_range('1 day', '5 day', freq='2D', name='idx') tm.assert_index_equal(result, expected) - self.assertEqual(result.freq, expected.freq) + assert result.freq == expected.freq result = idx.take([7, 4, 1]) expected = pd.timedelta_range('8 day', '2 day', freq='-3D', name='idx') tm.assert_index_equal(result, expected) - self.assertEqual(result.freq, expected.freq) + assert result.freq == expected.freq result = idx.take([3, 2, 5]) expected = TimedeltaIndex(['4 day', '3 day', '6 day'], name='idx') @@ -771,7 +771,7 @@ def test_infer_freq(self): idx = pd.timedelta_range('1', freq=freq, periods=10) result = pd.TimedeltaIndex(idx.asi8, freq='infer') tm.assert_index_equal(idx, result) - self.assertEqual(result.freq, freq) + assert result.freq == freq def test_nat_new(self): @@ -867,27 +867,27 @@ class TestTimedeltas(tm.TestCase): def test_ops(self): td = Timedelta(10, unit='d') - self.assertEqual(-td, Timedelta(-10, unit='d')) - self.assertEqual(+td, Timedelta(10, unit='d')) - self.assertEqual(td - td, Timedelta(0, unit='ns')) + assert -td == Timedelta(-10, unit='d') + assert +td == Timedelta(10, unit='d') + assert td - td == Timedelta(0, unit='ns') assert (td - pd.NaT) is pd.NaT - self.assertEqual(td + td, Timedelta(20, unit='d')) + assert td + td == Timedelta(20, unit='d') assert (td + pd.NaT) is pd.NaT - self.assertEqual(td * 2, Timedelta(20, unit='d')) + assert td * 2 == Timedelta(20, unit='d') assert (td * pd.NaT) is pd.NaT - self.assertEqual(td / 2, Timedelta(5, unit='d')) - self.assertEqual(td // 2, Timedelta(5, unit='d')) - self.assertEqual(abs(td), td) - self.assertEqual(abs(-td), td) - self.assertEqual(td / td, 1) + assert td / 2 == Timedelta(5, unit='d') + assert td // 2 == Timedelta(5, unit='d') + assert abs(td) == td + assert abs(-td) == td + assert td / td == 1 assert (td / pd.NaT) is np.nan assert (td // pd.NaT) is np.nan # invert - self.assertEqual(-td, Timedelta('-10d')) - self.assertEqual(td * -1, Timedelta('-10d')) - self.assertEqual(-1 * td, Timedelta('-10d')) - self.assertEqual(abs(-td), Timedelta('10d')) + assert -td == Timedelta('-10d') + assert td * -1 == Timedelta('-10d') + assert -1 * td == Timedelta('-10d') + assert abs(-td) == Timedelta('10d') # invalid multiply with another timedelta pytest.raises(TypeError, lambda: td * td) @@ -898,12 +898,12 @@ def test_ops(self): def test_ops_offsets(self): td = Timedelta(10, unit='d') - self.assertEqual(Timedelta(241, unit='h'), td + pd.offsets.Hour(1)) - self.assertEqual(Timedelta(241, unit='h'), pd.offsets.Hour(1) + td) - self.assertEqual(240, td / pd.offsets.Hour(1)) - self.assertEqual(1 / 240.0, pd.offsets.Hour(1) / td) - self.assertEqual(Timedelta(239, unit='h'), td - pd.offsets.Hour(1)) - self.assertEqual(Timedelta(-239, unit='h'), pd.offsets.Hour(1) - td) + assert Timedelta(241, unit='h') == td + pd.offsets.Hour(1) + assert Timedelta(241, unit='h') == pd.offsets.Hour(1) + td + assert 240 == td / pd.offsets.Hour(1) + assert 1 / 240.0 == pd.offsets.Hour(1) / td + assert Timedelta(239, unit='h') == td - pd.offsets.Hour(1) + assert Timedelta(-239, unit='h') == pd.offsets.Hour(1) - td def test_ops_ndarray(self): td = Timedelta('1 day') @@ -961,7 +961,7 @@ def test_ops_series_object(self): s = pd.Series([pd.Timestamp('2015-01-01', tz='US/Eastern'), pd.Timestamp('2015-01-01', tz='Asia/Tokyo')], name='xxx') - self.assertEqual(s.dtype, object) + assert s.dtype == object exp = pd.Series([pd.Timestamp('2015-01-02', tz='US/Eastern'), pd.Timestamp('2015-01-02', tz='Asia/Tokyo')], @@ -973,7 +973,7 @@ def test_ops_series_object(self): s2 = pd.Series([pd.Timestamp('2015-01-03', tz='US/Eastern'), pd.Timestamp('2015-01-05', tz='Asia/Tokyo')], name='xxx') - self.assertEqual(s2.dtype, object) + assert s2.dtype == object exp = pd.Series([pd.Timedelta('2 days'), pd.Timedelta('4 days')], name='xxx') tm.assert_series_equal(s2 - s, exp) @@ -981,7 +981,7 @@ def test_ops_series_object(self): s = pd.Series([pd.Timedelta('01:00:00'), pd.Timedelta('02:00:00')], name='xxx', dtype=object) - self.assertEqual(s.dtype, object) + assert s.dtype == object exp = pd.Series([pd.Timedelta('01:30:00'), pd.Timedelta('02:30:00')], name='xxx') @@ -1027,38 +1027,38 @@ def test_timedelta_ops(self): result = td.mean() expected = to_timedelta(timedelta(seconds=9)) - self.assertEqual(result, expected) + assert result == expected result = td.to_frame().mean() - self.assertEqual(result[0], expected) + assert result[0] == expected result = td.quantile(.1) expected = Timedelta(np.timedelta64(2600, 'ms')) - self.assertEqual(result, expected) + assert result == expected result = td.median() expected = to_timedelta('00:00:09') - self.assertEqual(result, expected) + assert result == expected result = td.to_frame().median() - self.assertEqual(result[0], expected) + assert result[0] == expected # GH 6462 # consistency in returned values for sum result = td.sum() expected = to_timedelta('00:01:21') - self.assertEqual(result, expected) + assert result == expected result = td.to_frame().sum() - self.assertEqual(result[0], expected) + assert result[0] == expected # std result = td.std() expected = to_timedelta(Series(td.dropna().values).std()) - self.assertEqual(result, expected) + assert result == expected result = td.to_frame().std() - self.assertEqual(result[0], expected) + assert result[0] == expected # invalid ops for op in ['skew', 'kurt', 'sem', 'prod']: @@ -1067,11 +1067,11 @@ def test_timedelta_ops(self): # GH 10040 # make sure NaT is properly handled by median() s = Series([Timestamp('2015-02-03'), Timestamp('2015-02-07')]) - self.assertEqual(s.diff().median(), timedelta(days=4)) + assert s.diff().median() == timedelta(days=4) s = Series([Timestamp('2015-02-03'), Timestamp('2015-02-07'), Timestamp('2015-02-15')]) - self.assertEqual(s.diff().median(), timedelta(days=6)) + assert s.diff().median() == timedelta(days=6) def test_timedelta_ops_scalar(self): # GH 6808 @@ -1084,10 +1084,10 @@ def test_timedelta_ops_scalar(self): np.timedelta64(10000000000, 'ns'), pd.offsets.Second(10)]: result = base + offset - self.assertEqual(result, expected_add) + assert result == expected_add result = base - offset - self.assertEqual(result, expected_sub) + assert result == expected_sub base = pd.to_datetime('20130102 09:01:12.123456') expected_add = pd.to_datetime('20130103 09:01:22.123456') @@ -1099,10 +1099,10 @@ def test_timedelta_ops_scalar(self): np.timedelta64(1, 'D') + np.timedelta64(10, 's'), pd.offsets.Day() + pd.offsets.Second(10)]: result = base + offset - self.assertEqual(result, expected_add) + assert result == expected_add result = base - offset - self.assertEqual(result, expected_sub) + assert result == expected_sub def test_timedelta_ops_with_missing_values(self): # setup @@ -1118,9 +1118,9 @@ def test_timedelta_ops_with_missing_values(self): NA = np.nan actual = scalar1 + scalar1 - self.assertEqual(actual, scalar2) + assert actual == scalar2 actual = scalar2 - scalar1 - self.assertEqual(actual, scalar1) + assert actual == scalar1 actual = s1 + s1 assert_series_equal(actual, s2) @@ -1217,27 +1217,27 @@ def test_tdi_ops_attributes(self): result = rng + 1 exp = timedelta_range('4 days', periods=5, freq='2D', name='x') tm.assert_index_equal(result, exp) - self.assertEqual(result.freq, '2D') + assert result.freq == '2D' result = rng - 2 exp = timedelta_range('-2 days', periods=5, freq='2D', name='x') tm.assert_index_equal(result, exp) - self.assertEqual(result.freq, '2D') + assert result.freq == '2D' result = rng * 2 exp = timedelta_range('4 days', periods=5, freq='4D', name='x') tm.assert_index_equal(result, exp) - self.assertEqual(result.freq, '4D') + assert result.freq == '4D' result = rng / 2 exp = timedelta_range('1 days', periods=5, freq='D', name='x') tm.assert_index_equal(result, exp) - self.assertEqual(result.freq, 'D') + assert result.freq == 'D' result = -rng exp = timedelta_range('-2 days', periods=5, freq='-2D', name='x') tm.assert_index_equal(result, exp) - self.assertEqual(result.freq, '-2D') + assert result.freq == '-2D' rng = pd.timedelta_range('-2 days', periods=5, freq='D', name='x') @@ -1245,7 +1245,7 @@ def test_tdi_ops_attributes(self): exp = TimedeltaIndex(['2 days', '1 days', '0 days', '1 days', '2 days'], name='x') tm.assert_index_equal(result, exp) - self.assertEqual(result.freq, None) + assert result.freq is None def test_add_overflow(self): # see gh-14068 diff --git a/pandas/tests/indexes/timedeltas/test_partial_slicing.py b/pandas/tests/indexes/timedeltas/test_partial_slicing.py index 230dbe91b4e34..5e6e1440a7c04 100644 --- a/pandas/tests/indexes/timedeltas/test_partial_slicing.py +++ b/pandas/tests/indexes/timedeltas/test_partial_slicing.py @@ -27,7 +27,7 @@ def test_partial_slice(self): assert_series_equal(result, expected) result = s['6 days, 23:11:12'] - self.assertEqual(result, s.iloc[133]) + assert result == s.iloc[133] pytest.raises(KeyError, s.__getitem__, '50 days') @@ -46,7 +46,7 @@ def test_partial_slice_high_reso(self): assert_series_equal(result, expected) result = s['1 days, 10:11:12.001001'] - self.assertEqual(result, s.iloc[1001]) + assert result == s.iloc[1001] def test_slice_with_negative_step(self): ts = Series(np.arange(20), timedelta_range('0', periods=20, freq='H')) diff --git a/pandas/tests/indexes/timedeltas/test_setops.py b/pandas/tests/indexes/timedeltas/test_setops.py index 45900788f7bda..8779f6d49cdd5 100644 --- a/pandas/tests/indexes/timedeltas/test_setops.py +++ b/pandas/tests/indexes/timedeltas/test_setops.py @@ -30,7 +30,7 @@ def test_union_coverage(self): result = ordered[:0].union(ordered) tm.assert_index_equal(result, ordered) - self.assertEqual(result.freq, ordered.freq) + assert result.freq == ordered.freq def test_union_bug_1730(self): @@ -66,7 +66,7 @@ def test_intersection_bug_1708(self): index_2 = index_1 + pd.offsets.Hour(5) result = index_1 & index_2 - self.assertEqual(len(result), 0) + assert len(result) == 0 index_1 = timedelta_range('1 day', periods=4, freq='h') index_2 = index_1 + pd.offsets.Hour(1) diff --git a/pandas/tests/indexes/timedeltas/test_timedelta.py b/pandas/tests/indexes/timedeltas/test_timedelta.py index 8a327d2ecb08f..d1379973dfec5 100644 --- a/pandas/tests/indexes/timedeltas/test_timedelta.py +++ b/pandas/tests/indexes/timedeltas/test_timedelta.py @@ -49,29 +49,30 @@ def test_get_loc(self): idx = pd.to_timedelta(['0 days', '1 days', '2 days']) for method in [None, 'pad', 'backfill', 'nearest']: - self.assertEqual(idx.get_loc(idx[1], method), 1) - self.assertEqual(idx.get_loc(idx[1].to_pytimedelta(), method), 1) - self.assertEqual(idx.get_loc(str(idx[1]), method), 1) + assert idx.get_loc(idx[1], method) == 1 + assert idx.get_loc(idx[1].to_pytimedelta(), method) == 1 + assert idx.get_loc(str(idx[1]), method) == 1 - self.assertEqual( - idx.get_loc(idx[1], 'pad', tolerance=pd.Timedelta(0)), 1) - self.assertEqual( - idx.get_loc(idx[1], 'pad', tolerance=np.timedelta64(0, 's')), 1) - self.assertEqual(idx.get_loc(idx[1], 'pad', tolerance=timedelta(0)), 1) + assert idx.get_loc(idx[1], 'pad', + tolerance=pd.Timedelta(0)) == 1 + assert idx.get_loc(idx[1], 'pad', + tolerance=np.timedelta64(0, 's')) == 1 + assert idx.get_loc(idx[1], 'pad', + tolerance=timedelta(0)) == 1 with tm.assert_raises_regex(ValueError, 'must be convertible'): idx.get_loc(idx[1], method='nearest', tolerance='foo') for method, loc in [('pad', 1), ('backfill', 2), ('nearest', 1)]: - self.assertEqual(idx.get_loc('1 day 1 hour', method), loc) + assert idx.get_loc('1 day 1 hour', method) == loc def test_get_loc_nat(self): tidx = TimedeltaIndex(['1 days 01:00:00', 'NaT', '2 days 01:00:00']) - self.assertEqual(tidx.get_loc(pd.NaT), 1) - self.assertEqual(tidx.get_loc(None), 1) - self.assertEqual(tidx.get_loc(float('nan')), 1) - self.assertEqual(tidx.get_loc(np.nan), 1) + assert tidx.get_loc(pd.NaT) == 1 + assert tidx.get_loc(None) == 1 + assert tidx.get_loc(float('nan')) == 1 + assert tidx.get_loc(np.nan) == 1 def test_get_indexer(self): idx = pd.to_timedelta(['0 days', '1 days', '2 days']) @@ -138,14 +139,14 @@ def test_ufunc_coercions(self): exp = TimedeltaIndex(['4H', '8H', '12H', '16H', '20H'], freq='4H', name='x') tm.assert_index_equal(result, exp) - self.assertEqual(result.freq, '4H') + assert result.freq == '4H' for result in [idx / 2, np.divide(idx, 2)]: assert isinstance(result, TimedeltaIndex) exp = TimedeltaIndex(['1H', '2H', '3H', '4H', '5H'], freq='H', name='x') tm.assert_index_equal(result, exp) - self.assertEqual(result.freq, 'H') + assert result.freq == 'H' idx = TimedeltaIndex(['2H', '4H', '6H', '8H', '10H'], freq='2H', name='x') @@ -154,7 +155,7 @@ def test_ufunc_coercions(self): exp = TimedeltaIndex(['-2H', '-4H', '-6H', '-8H', '-10H'], freq='-2H', name='x') tm.assert_index_equal(result, exp) - self.assertEqual(result.freq, '-2H') + assert result.freq == '-2H' idx = TimedeltaIndex(['-2H', '-1H', '0H', '1H', '2H'], freq='H', name='x') @@ -163,7 +164,7 @@ def test_ufunc_coercions(self): exp = TimedeltaIndex(['2H', '1H', '0H', '1H', '2H'], freq=None, name='x') tm.assert_index_equal(result, exp) - self.assertEqual(result.freq, None) + assert result.freq is None def test_fillna_timedelta(self): # GH 11343 @@ -209,7 +210,7 @@ def test_take(self): tm.assert_index_equal(taken, expected) assert isinstance(taken, TimedeltaIndex) assert taken.freq is None - self.assertEqual(taken.name, expected.name) + assert taken.name == expected.name def test_take_fill_value(self): # GH 12631 @@ -289,7 +290,7 @@ def test_slice_keeps_name(self): # GH4226 dr = pd.timedelta_range('1d', '5d', freq='H', name='timebucket') - self.assertEqual(dr[1:].name, dr.name) + assert dr[1:].name == dr.name def test_does_not_convert_mixed_integer(self): df = tm.makeCustomDataframe(10, 10, @@ -299,8 +300,8 @@ def test_does_not_convert_mixed_integer(self): cols = df.columns.join(df.index, how='outer') joined = cols.join(df.columns) - self.assertEqual(cols.dtype, np.dtype('O')) - self.assertEqual(cols.dtype, joined.dtype) + assert cols.dtype == np.dtype('O') + assert cols.dtype == joined.dtype tm.assert_index_equal(cols, joined) def test_sort_values(self): @@ -336,8 +337,8 @@ def test_get_duplicates(self): def test_argmin_argmax(self): idx = TimedeltaIndex(['1 day 00:00:05', '1 day 00:00:01', '1 day 00:00:02']) - self.assertEqual(idx.argmin(), 1) - self.assertEqual(idx.argmax(), 0) + assert idx.argmin() == 1 + assert idx.argmax() == 0 def test_misc_coverage(self): @@ -570,8 +571,8 @@ def test_timedelta(self): shifted = index + timedelta(1) back = shifted + timedelta(-1) assert tm.equalContents(index, back) - self.assertEqual(shifted.freq, index.freq) - self.assertEqual(shifted.freq, back.freq) + assert shifted.freq == index.freq + assert shifted.freq == back.freq result = index - timedelta(1) expected = index + timedelta(-1) diff --git a/pandas/tests/indexes/timedeltas/test_tools.py b/pandas/tests/indexes/timedeltas/test_tools.py index d69f78bfd73b1..faee627488dc0 100644 --- a/pandas/tests/indexes/timedeltas/test_tools.py +++ b/pandas/tests/indexes/timedeltas/test_tools.py @@ -20,16 +20,15 @@ def conv(v): d1 = np.timedelta64(1, 'D') - self.assertEqual(to_timedelta('1 days 06:05:01.00003', box=False), - conv(d1 + np.timedelta64(6 * 3600 + - 5 * 60 + 1, 's') + - np.timedelta64(30, 'us'))) - self.assertEqual(to_timedelta('15.5us', box=False), - conv(np.timedelta64(15500, 'ns'))) + assert (to_timedelta('1 days 06:05:01.00003', box=False) == + conv(d1 + np.timedelta64(6 * 3600 + 5 * 60 + 1, 's') + + np.timedelta64(30, 'us'))) + assert (to_timedelta('15.5us', box=False) == + conv(np.timedelta64(15500, 'ns'))) # empty string result = to_timedelta('', box=False) - self.assertEqual(result.astype('int64'), iNaT) + assert result.astype('int64') == iNaT result = to_timedelta(['', '']) assert isnull(result).all() @@ -42,7 +41,7 @@ def conv(v): # ints result = np.timedelta64(0, 'ns') expected = to_timedelta(0, box=False) - self.assertEqual(result, expected) + assert result == expected # Series expected = Series([timedelta(days=1), timedelta(days=1, seconds=1)]) @@ -59,12 +58,12 @@ def conv(v): v = timedelta(seconds=1) result = to_timedelta(v, box=False) expected = np.timedelta64(timedelta(seconds=1)) - self.assertEqual(result, expected) + assert result == expected v = np.timedelta64(timedelta(seconds=1)) result = to_timedelta(v, box=False) expected = np.timedelta64(timedelta(seconds=1)) - self.assertEqual(result, expected) + assert result == expected # arrays of various dtypes arr = np.array([1] * 5, dtype='int64') @@ -134,8 +133,7 @@ def test_to_timedelta_invalid(self): # gh-13613: these should not error because errors='ignore' invalid_data = 'apple' - self.assertEqual(invalid_data, to_timedelta( - invalid_data, errors='ignore')) + assert invalid_data == to_timedelta(invalid_data, errors='ignore') invalid_data = ['apple', '1 days'] tm.assert_numpy_array_equal( @@ -172,32 +170,32 @@ def test_to_timedelta_on_missing_values(self): assert_series_equal(actual, expected) actual = pd.to_timedelta(np.nan) - self.assertEqual(actual.value, timedelta_NaT.astype('int64')) + assert actual.value == timedelta_NaT.astype('int64') actual = pd.to_timedelta(pd.NaT) - self.assertEqual(actual.value, timedelta_NaT.astype('int64')) + assert actual.value == timedelta_NaT.astype('int64') def test_to_timedelta_on_nanoseconds(self): # GH 9273 result = Timedelta(nanoseconds=100) expected = Timedelta('100ns') - self.assertEqual(result, expected) + assert result == expected result = Timedelta(days=1, hours=1, minutes=1, weeks=1, seconds=1, milliseconds=1, microseconds=1, nanoseconds=1) expected = Timedelta(694861001001001) - self.assertEqual(result, expected) + assert result == expected result = Timedelta(microseconds=1) + Timedelta(nanoseconds=1) expected = Timedelta('1us1ns') - self.assertEqual(result, expected) + assert result == expected result = Timedelta(microseconds=1) - Timedelta(nanoseconds=1) expected = Timedelta('999ns') - self.assertEqual(result, expected) + assert result == expected result = Timedelta(microseconds=1) + 5 * Timedelta(nanoseconds=-2) expected = Timedelta('990ns') - self.assertEqual(result, expected) + assert result == expected pytest.raises(TypeError, lambda: Timedelta(nanoseconds='abc')) diff --git a/pandas/tests/indexing/common.py b/pandas/tests/indexing/common.py index b555a9c1fd0df..bd5b7f45a6f4c 100644 --- a/pandas/tests/indexing/common.py +++ b/pandas/tests/indexing/common.py @@ -201,7 +201,7 @@ def _print(result, error=None): try: if is_scalar(rs) and is_scalar(xp): - self.assertEqual(rs, xp) + assert rs == xp elif xp.ndim == 1: tm.assert_series_equal(rs, xp) elif xp.ndim == 2: diff --git a/pandas/tests/indexing/test_callable.py b/pandas/tests/indexing/test_callable.py index 1d70205076b86..727c87ac90872 100644 --- a/pandas/tests/indexing/test_callable.py +++ b/pandas/tests/indexing/test_callable.py @@ -59,10 +59,10 @@ def test_frame_loc_ix_callable(self): # scalar res = df.loc[lambda x: 1, lambda x: 'A'] - self.assertEqual(res, df.loc[1, 'A']) + assert res == df.loc[1, 'A'] res = df.loc[lambda x: 1, lambda x: 'A'] - self.assertEqual(res, df.loc[1, 'A']) + assert res == df.loc[1, 'A'] def test_frame_loc_ix_callable_mixture(self): # GH 11485 diff --git a/pandas/tests/indexing/test_chaining_and_caching.py b/pandas/tests/indexing/test_chaining_and_caching.py index b776d3c2d08ea..c39876a8c6e44 100644 --- a/pandas/tests/indexing/test_chaining_and_caching.py +++ b/pandas/tests/indexing/test_chaining_and_caching.py @@ -50,8 +50,8 @@ def test_setitem_cache_updating(self): # set it df.loc[7, 'c'] = 1 - self.assertEqual(df.loc[0, 'c'], 0.0) - self.assertEqual(df.loc[7, 'c'], 1.0) + assert df.loc[0, 'c'] == 0.0 + assert df.loc[7, 'c'] == 1.0 # GH 7084 # not updating cache on series setting with slices @@ -395,12 +395,12 @@ def test_cache_updating(self): # but actually works, since everything is a view df.loc[0]['z'].iloc[0] = 1. result = df.loc[(0, 0), 'z'] - self.assertEqual(result, 1) + assert result == 1 # correct setting df.loc[(0, 0), 'z'] = 2 result = df.loc[(0, 0), 'z'] - self.assertEqual(result, 2) + assert result == 2 # 10264 df = DataFrame(np.zeros((5, 5), dtype='int64'), columns=[ diff --git a/pandas/tests/indexing/test_coercion.py b/pandas/tests/indexing/test_coercion.py index b8030d84e7929..56bc8c1d72bb8 100644 --- a/pandas/tests/indexing/test_coercion.py +++ b/pandas/tests/indexing/test_coercion.py @@ -31,8 +31,8 @@ def _assert(self, left, right, dtype): tm.assert_index_equal(left, right) else: raise NotImplementedError - self.assertEqual(left.dtype, dtype) - self.assertEqual(right.dtype, dtype) + assert left.dtype == dtype + assert right.dtype == dtype def test_has_comprehensive_tests(self): for klass in self.klasses: @@ -55,7 +55,7 @@ def _assert_setitem_series_conversion(self, original_series, loc_value, temp[1] = loc_value tm.assert_series_equal(temp, expected_series) # check dtype explicitly for sure - self.assertEqual(temp.dtype, expected_dtype) + assert temp.dtype == expected_dtype # .loc works different rule, temporary disable # temp = original_series.copy() @@ -64,7 +64,7 @@ def _assert_setitem_series_conversion(self, original_series, loc_value, def test_setitem_series_object(self): obj = pd.Series(list('abcd')) - self.assertEqual(obj.dtype, np.object) + assert obj.dtype == np.object # object + int -> object exp = pd.Series(['a', 1, 'c', 'd']) @@ -84,7 +84,7 @@ def test_setitem_series_object(self): def test_setitem_series_int64(self): obj = pd.Series([1, 2, 3, 4]) - self.assertEqual(obj.dtype, np.int64) + assert obj.dtype == np.int64 # int + int -> int exp = pd.Series([1, 1, 3, 4]) @@ -93,7 +93,7 @@ def test_setitem_series_int64(self): # int + float -> float # TODO_GH12747 The result must be float # tm.assert_series_equal(temp, pd.Series([1, 1.1, 3, 4])) - # self.assertEqual(temp.dtype, np.float64) + # assert temp.dtype == np.float64 exp = pd.Series([1, 1, 3, 4]) self._assert_setitem_series_conversion(obj, 1.1, exp, np.int64) @@ -107,7 +107,7 @@ def test_setitem_series_int64(self): def test_setitem_series_float64(self): obj = pd.Series([1.1, 2.2, 3.3, 4.4]) - self.assertEqual(obj.dtype, np.float64) + assert obj.dtype == np.float64 # float + int -> float exp = pd.Series([1.1, 1.0, 3.3, 4.4]) @@ -128,7 +128,7 @@ def test_setitem_series_float64(self): def test_setitem_series_complex128(self): obj = pd.Series([1 + 1j, 2 + 2j, 3 + 3j, 4 + 4j]) - self.assertEqual(obj.dtype, np.complex128) + assert obj.dtype == np.complex128 # complex + int -> complex exp = pd.Series([1 + 1j, 1, 3 + 3j, 4 + 4j]) @@ -148,33 +148,33 @@ def test_setitem_series_complex128(self): def test_setitem_series_bool(self): obj = pd.Series([True, False, True, False]) - self.assertEqual(obj.dtype, np.bool) + assert obj.dtype == np.bool # bool + int -> int # TODO_GH12747 The result must be int # tm.assert_series_equal(temp, pd.Series([1, 1, 1, 0])) - # self.assertEqual(temp.dtype, np.int64) + # assert temp.dtype == np.int64 exp = pd.Series([True, True, True, False]) self._assert_setitem_series_conversion(obj, 1, exp, np.bool) # TODO_GH12747 The result must be int # assigning int greater than bool # tm.assert_series_equal(temp, pd.Series([1, 3, 1, 0])) - # self.assertEqual(temp.dtype, np.int64) + # assert temp.dtype == np.int64 exp = pd.Series([True, True, True, False]) self._assert_setitem_series_conversion(obj, 3, exp, np.bool) # bool + float -> float # TODO_GH12747 The result must be float # tm.assert_series_equal(temp, pd.Series([1., 1.1, 1., 0.])) - # self.assertEqual(temp.dtype, np.float64) + # assert temp.dtype == np.float64 exp = pd.Series([True, True, True, False]) self._assert_setitem_series_conversion(obj, 1.1, exp, np.bool) # bool + complex -> complex (buggy, results in bool) # TODO_GH12747 The result must be complex # tm.assert_series_equal(temp, pd.Series([1, 1 + 1j, 1, 0])) - # self.assertEqual(temp.dtype, np.complex128) + # assert temp.dtype == np.complex128 exp = pd.Series([True, True, True, False]) self._assert_setitem_series_conversion(obj, 1 + 1j, exp, np.bool) @@ -187,7 +187,7 @@ def test_setitem_series_datetime64(self): pd.Timestamp('2011-01-02'), pd.Timestamp('2011-01-03'), pd.Timestamp('2011-01-04')]) - self.assertEqual(obj.dtype, 'datetime64[ns]') + assert obj.dtype == 'datetime64[ns]' # datetime64 + datetime64 -> datetime64 exp = pd.Series([pd.Timestamp('2011-01-01'), @@ -213,7 +213,7 @@ def test_setitem_series_datetime64tz(self): pd.Timestamp('2011-01-02', tz=tz), pd.Timestamp('2011-01-03', tz=tz), pd.Timestamp('2011-01-04', tz=tz)]) - self.assertEqual(obj.dtype, 'datetime64[ns, US/Eastern]') + assert obj.dtype == 'datetime64[ns, US/Eastern]' # datetime64tz + datetime64tz -> datetime64tz exp = pd.Series([pd.Timestamp('2011-01-01', tz=tz), @@ -249,18 +249,18 @@ def _assert_setitem_index_conversion(self, original_series, loc_key, exp = pd.Series([1, 2, 3, 4, 5], index=expected_index) tm.assert_series_equal(temp, exp) # check dtype explicitly for sure - self.assertEqual(temp.index.dtype, expected_dtype) + assert temp.index.dtype == expected_dtype temp = original_series.copy() temp.loc[loc_key] = 5 exp = pd.Series([1, 2, 3, 4, 5], index=expected_index) tm.assert_series_equal(temp, exp) # check dtype explicitly for sure - self.assertEqual(temp.index.dtype, expected_dtype) + assert temp.index.dtype == expected_dtype def test_setitem_index_object(self): obj = pd.Series([1, 2, 3, 4], index=list('abcd')) - self.assertEqual(obj.index.dtype, np.object) + assert obj.index.dtype == np.object # object + object -> object exp_index = pd.Index(list('abcdx')) @@ -278,7 +278,7 @@ def test_setitem_index_object(self): def test_setitem_index_int64(self): # tests setitem with non-existing numeric key obj = pd.Series([1, 2, 3, 4]) - self.assertEqual(obj.index.dtype, np.int64) + assert obj.index.dtype == np.int64 # int + int -> int exp_index = pd.Index([0, 1, 2, 3, 5]) @@ -295,7 +295,7 @@ def test_setitem_index_int64(self): def test_setitem_index_float64(self): # tests setitem with non-existing numeric key obj = pd.Series([1, 2, 3, 4], index=[1.1, 2.1, 3.1, 4.1]) - self.assertEqual(obj.index.dtype, np.float64) + assert obj.index.dtype == np.float64 # float + int -> int temp = obj.copy() @@ -341,11 +341,11 @@ def _assert_insert_conversion(self, original, value, target = original.copy() res = target.insert(1, value) tm.assert_index_equal(res, expected) - self.assertEqual(res.dtype, expected_dtype) + assert res.dtype == expected_dtype def test_insert_index_object(self): obj = pd.Index(list('abcd')) - self.assertEqual(obj.dtype, np.object) + assert obj.dtype == np.object # object + int -> object exp = pd.Index(['a', 1, 'b', 'c', 'd']) @@ -358,7 +358,7 @@ def test_insert_index_object(self): # object + bool -> object res = obj.insert(1, False) tm.assert_index_equal(res, pd.Index(['a', False, 'b', 'c', 'd'])) - self.assertEqual(res.dtype, np.object) + assert res.dtype == np.object # object + object -> object exp = pd.Index(['a', 'x', 'b', 'c', 'd']) @@ -366,7 +366,7 @@ def test_insert_index_object(self): def test_insert_index_int64(self): obj = pd.Int64Index([1, 2, 3, 4]) - self.assertEqual(obj.dtype, np.int64) + assert obj.dtype == np.int64 # int + int -> int exp = pd.Index([1, 1, 2, 3, 4]) @@ -386,7 +386,7 @@ def test_insert_index_int64(self): def test_insert_index_float64(self): obj = pd.Float64Index([1., 2., 3., 4.]) - self.assertEqual(obj.dtype, np.float64) + assert obj.dtype == np.float64 # float + int -> int exp = pd.Index([1., 1., 2., 3., 4.]) @@ -413,7 +413,7 @@ def test_insert_index_bool(self): def test_insert_index_datetime64(self): obj = pd.DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03', '2011-01-04']) - self.assertEqual(obj.dtype, 'datetime64[ns]') + assert obj.dtype == 'datetime64[ns]' # datetime64 + datetime64 => datetime64 exp = pd.DatetimeIndex(['2011-01-01', '2012-01-01', '2011-01-02', @@ -434,7 +434,7 @@ def test_insert_index_datetime64(self): def test_insert_index_datetime64tz(self): obj = pd.DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03', '2011-01-04'], tz='US/Eastern') - self.assertEqual(obj.dtype, 'datetime64[ns, US/Eastern]') + assert obj.dtype == 'datetime64[ns, US/Eastern]' # datetime64tz + datetime64tz => datetime64 exp = pd.DatetimeIndex(['2011-01-01', '2012-01-01', '2011-01-02', @@ -460,7 +460,7 @@ def test_insert_index_datetime64tz(self): def test_insert_index_timedelta64(self): obj = pd.TimedeltaIndex(['1 day', '2 day', '3 day', '4 day']) - self.assertEqual(obj.dtype, 'timedelta64[ns]') + assert obj.dtype == 'timedelta64[ns]' # timedelta64 + timedelta64 => timedelta64 exp = pd.TimedeltaIndex(['1 day', '10 day', '2 day', '3 day', '4 day']) @@ -480,7 +480,7 @@ def test_insert_index_timedelta64(self): def test_insert_index_period(self): obj = pd.PeriodIndex(['2011-01', '2011-02', '2011-03', '2011-04'], freq='M') - self.assertEqual(obj.dtype, 'period[M]') + assert obj.dtype == 'period[M]' # period + period => period exp = pd.PeriodIndex(['2011-01', '2012-01', '2011-02', @@ -527,7 +527,7 @@ def _assert_where_conversion(self, original, cond, values, def _where_object_common(self, klass): obj = klass(list('abcd')) - self.assertEqual(obj.dtype, np.object) + assert obj.dtype == np.object cond = klass([True, False, True, False]) # object + int -> object @@ -580,7 +580,7 @@ def test_where_index_object(self): def _where_int64_common(self, klass): obj = klass([1, 2, 3, 4]) - self.assertEqual(obj.dtype, np.int64) + assert obj.dtype == np.int64 cond = klass([True, False, True, False]) # int + int -> int @@ -626,7 +626,7 @@ def test_where_index_int64(self): def _where_float64_common(self, klass): obj = klass([1.1, 2.2, 3.3, 4.4]) - self.assertEqual(obj.dtype, np.float64) + assert obj.dtype == np.float64 cond = klass([True, False, True, False]) # float + int -> float @@ -672,7 +672,7 @@ def test_where_index_float64(self): def test_where_series_complex128(self): obj = pd.Series([1 + 1j, 2 + 2j, 3 + 3j, 4 + 4j]) - self.assertEqual(obj.dtype, np.complex128) + assert obj.dtype == np.complex128 cond = pd.Series([True, False, True, False]) # complex + int -> complex @@ -712,7 +712,7 @@ def test_where_index_complex128(self): def test_where_series_bool(self): obj = pd.Series([True, False, True, False]) - self.assertEqual(obj.dtype, np.bool) + assert obj.dtype == np.bool cond = pd.Series([True, False, True, False]) # bool + int -> int @@ -755,7 +755,7 @@ def test_where_series_datetime64(self): pd.Timestamp('2011-01-02'), pd.Timestamp('2011-01-03'), pd.Timestamp('2011-01-04')]) - self.assertEqual(obj.dtype, 'datetime64[ns]') + assert obj.dtype == 'datetime64[ns]' cond = pd.Series([True, False, True, False]) # datetime64 + datetime64 -> datetime64 @@ -797,7 +797,7 @@ def test_where_index_datetime64(self): pd.Timestamp('2011-01-02'), pd.Timestamp('2011-01-03'), pd.Timestamp('2011-01-04')]) - self.assertEqual(obj.dtype, 'datetime64[ns]') + assert obj.dtype == 'datetime64[ns]' cond = pd.Index([True, False, True, False]) # datetime64 + datetime64 -> datetime64 @@ -867,7 +867,7 @@ def _assert_fillna_conversion(self, original, value, def _fillna_object_common(self, klass): obj = klass(['a', np.nan, 'c', 'd']) - self.assertEqual(obj.dtype, np.object) + assert obj.dtype == np.object # object + int -> object exp = klass(['a', 1, 'c', 'd']) @@ -900,7 +900,7 @@ def test_fillna_index_int64(self): def _fillna_float64_common(self, klass): obj = klass([1.1, np.nan, 3.3, 4.4]) - self.assertEqual(obj.dtype, np.float64) + assert obj.dtype == np.float64 # float + int -> float exp = klass([1.1, 1.0, 3.3, 4.4]) @@ -933,7 +933,7 @@ def test_fillna_index_float64(self): def test_fillna_series_complex128(self): obj = pd.Series([1 + 1j, np.nan, 3 + 3j, 4 + 4j]) - self.assertEqual(obj.dtype, np.complex128) + assert obj.dtype == np.complex128 # complex + int -> complex exp = pd.Series([1 + 1j, 1, 3 + 3j, 4 + 4j]) @@ -966,7 +966,7 @@ def test_fillna_series_datetime64(self): pd.NaT, pd.Timestamp('2011-01-03'), pd.Timestamp('2011-01-04')]) - self.assertEqual(obj.dtype, 'datetime64[ns]') + assert obj.dtype == 'datetime64[ns]' # datetime64 + datetime64 => datetime64 exp = pd.Series([pd.Timestamp('2011-01-01'), @@ -1006,7 +1006,7 @@ def test_fillna_series_datetime64tz(self): pd.NaT, pd.Timestamp('2011-01-03', tz=tz), pd.Timestamp('2011-01-04', tz=tz)]) - self.assertEqual(obj.dtype, 'datetime64[ns, US/Eastern]') + assert obj.dtype == 'datetime64[ns, US/Eastern]' # datetime64tz + datetime64tz => datetime64tz exp = pd.Series([pd.Timestamp('2011-01-01', tz=tz), @@ -1058,7 +1058,7 @@ def test_fillna_series_period(self): def test_fillna_index_datetime64(self): obj = pd.DatetimeIndex(['2011-01-01', 'NaT', '2011-01-03', '2011-01-04']) - self.assertEqual(obj.dtype, 'datetime64[ns]') + assert obj.dtype == 'datetime64[ns]' # datetime64 + datetime64 => datetime64 exp = pd.DatetimeIndex(['2011-01-01', '2012-01-01', @@ -1093,7 +1093,7 @@ def test_fillna_index_datetime64tz(self): obj = pd.DatetimeIndex(['2011-01-01', 'NaT', '2011-01-03', '2011-01-04'], tz=tz) - self.assertEqual(obj.dtype, 'datetime64[ns, US/Eastern]') + assert obj.dtype == 'datetime64[ns, US/Eastern]' # datetime64tz + datetime64tz => datetime64tz exp = pd.DatetimeIndex(['2011-01-01', '2012-01-01', @@ -1168,7 +1168,7 @@ def setUp(self): def _assert_replace_conversion(self, from_key, to_key, how): index = pd.Index([3, 4], name='xxx') obj = pd.Series(self.rep[from_key], index=index, name='yyy') - self.assertEqual(obj.dtype, from_key) + assert obj.dtype == from_key if (from_key.startswith('datetime') and to_key.startswith('datetime')): # different tz, currently mask_missing raises SystemError @@ -1198,7 +1198,7 @@ def _assert_replace_conversion(self, from_key, to_key, how): else: exp = pd.Series(self.rep[to_key], index=index, name='yyy') - self.assertEqual(exp.dtype, to_key) + assert exp.dtype == to_key tm.assert_series_equal(result, exp) diff --git a/pandas/tests/indexing/test_datetime.py b/pandas/tests/indexing/test_datetime.py index 9b224ba796268..3089bc1dbddea 100644 --- a/pandas/tests/indexing/test_datetime.py +++ b/pandas/tests/indexing/test_datetime.py @@ -37,10 +37,10 @@ def test_indexing_with_datetime_tz(self): df = DataFrame({'a': date_range('2014-01-01', periods=10, tz='UTC')}) result = df.iloc[5] expected = Timestamp('2014-01-06 00:00:00+0000', tz='UTC', freq='D') - self.assertEqual(result, expected) + assert result == expected result = df.loc[5] - self.assertEqual(result, expected) + assert result == expected # indexing - boolean result = df[df.a > df.a[3]] @@ -129,7 +129,7 @@ def test_indexing_with_datetimeindex_tz(self): # single element indexing # getitem - self.assertEqual(ser[index[1]], 1) + assert ser[index[1]] == 1 # setitem result = ser.copy() @@ -138,7 +138,7 @@ def test_indexing_with_datetimeindex_tz(self): tm.assert_series_equal(result, expected) # .loc getitem - self.assertEqual(ser.loc[index[1]], 1) + assert ser.loc[index[1]] == 1 # .loc setitem result = ser.copy() diff --git a/pandas/tests/indexing/test_floats.py b/pandas/tests/indexing/test_floats.py index 4d4ef65b40074..1701dd9f6ba90 100644 --- a/pandas/tests/indexing/test_floats.py +++ b/pandas/tests/indexing/test_floats.py @@ -165,7 +165,7 @@ def f(): result = s2.loc['b'] expected = 2 - self.assertEqual(result, expected) + assert result == expected # mixed index so we have label # indexing @@ -180,14 +180,14 @@ def f(): result = idxr(s3)[1] expected = 2 - self.assertEqual(result, expected) + assert result == expected pytest.raises(TypeError, lambda: s3.iloc[1.0]) pytest.raises(KeyError, lambda: s3.loc[1.0]) result = s3.loc[1.5] expected = 3 - self.assertEqual(result, expected) + assert result == expected def test_scalar_integer(self): @@ -216,7 +216,8 @@ def test_scalar_integer(self): (lambda x: x, True)]: if isinstance(s, Series): - compare = self.assertEqual + def compare(x, y): + assert x == y expected = 100 else: compare = tm.assert_series_equal @@ -576,10 +577,10 @@ def test_floating_index_doc_example(self): index = Index([1.5, 2, 3, 4.5, 5]) s = Series(range(5), index=index) - self.assertEqual(s[3], 2) - self.assertEqual(s.loc[3], 2) - self.assertEqual(s.loc[3], 2) - self.assertEqual(s.iloc[3], 3) + assert s[3] == 2 + assert s.loc[3] == 2 + assert s.loc[3] == 2 + assert s.iloc[3] == 3 def test_floating_misc(self): @@ -598,16 +599,16 @@ def test_floating_misc(self): result1 = s[5.0] result2 = s.loc[5.0] result3 = s.loc[5.0] - self.assertEqual(result1, result2) - self.assertEqual(result1, result3) + assert result1 == result2 + assert result1 == result3 result1 = s[5] result2 = s.loc[5] result3 = s.loc[5] - self.assertEqual(result1, result2) - self.assertEqual(result1, result3) + assert result1 == result2 + assert result1 == result3 - self.assertEqual(s[5.0], s[5]) + assert s[5.0] == s[5] # value not found (and no fallbacking at all) @@ -702,15 +703,17 @@ def test_floating_misc(self): assert_series_equal(result1, Series([1], index=[2.5])) def test_floating_tuples(self): - # GH13509 + # see gh-13509 s = Series([(1, 1), (2, 2), (3, 3)], index=[0.0, 0.1, 0.2], name='foo') + result = s[0.0] - self.assertEqual(result, (1, 1)) + assert result == (1, 1) + expected = Series([(1, 1), (2, 2)], index=[0.0, 0.0], name='foo') s = Series([(1, 1), (2, 2), (3, 3)], index=[0.0, 0.0, 0.2], name='foo') + result = s[0.0] - expected = Series([(1, 1), (2, 2)], index=[0.0, 0.0], name='foo') - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) def test_float64index_slicing_bug(self): # GH 5557, related to slicing a float index diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index baced46923fd4..3e625fa483f7b 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -166,7 +166,7 @@ def test_iloc_getitem_neg_int_can_reach_first_index(self): expected = s.iloc[0] result = s.iloc[-3] - self.assertEqual(result, expected) + assert result == expected expected = s.iloc[[0]] result = s.iloc[[-3]] @@ -256,7 +256,7 @@ def test_iloc_setitem(self): df.iloc[1, 1] = 1 result = df.iloc[1, 1] - self.assertEqual(result, 1) + assert result == 1 df.iloc[:, 2:3] = 0 expected = df.iloc[:, 2:3] @@ -326,7 +326,7 @@ def test_iloc_getitem_frame(self): result = df.iloc[2, 2] with catch_warnings(record=True): exp = df.ix[4, 4] - self.assertEqual(result, exp) + assert result == exp # slice result = df.iloc[4:8] @@ -376,7 +376,7 @@ def test_iloc_getitem_labelled_frame(self): result = df.iloc[1, 1] exp = df.loc['b', 'B'] - self.assertEqual(result, exp) + assert result == exp result = df.iloc[:, 2:3] expected = df.loc[:, ['C']] @@ -385,7 +385,7 @@ def test_iloc_getitem_labelled_frame(self): # negative indexing result = df.iloc[-1, -1] exp = df.loc['j', 'D'] - self.assertEqual(result, exp) + assert result == exp # out-of-bounds exception pytest.raises(IndexError, df.iloc.__getitem__, tuple([10, 5])) @@ -444,7 +444,7 @@ def test_iloc_setitem_series(self): df.iloc[1, 1] = 1 result = df.iloc[1, 1] - self.assertEqual(result, 1) + assert result == 1 df.iloc[:, 2:3] = 0 expected = df.iloc[:, 2:3] @@ -455,7 +455,7 @@ def test_iloc_setitem_series(self): s.iloc[1] = 1 result = s.iloc[1] - self.assertEqual(result, 1) + assert result == 1 s.iloc[:4] = 0 expected = s.iloc[:4] diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index 5924dba488043..0759dc2333ad5 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -68,7 +68,7 @@ def test_setitem_dtype_upcast(self): # GH3216 df = DataFrame([{"a": 1}, {"a": 3, "b": 2}]) df['c'] = np.nan - self.assertEqual(df['c'].dtype, np.float64) + assert df['c'].dtype == np.float64 df.loc[0, 'c'] = 'foo' expected = DataFrame([{"a": 1, "c": 'foo'}, @@ -231,7 +231,7 @@ def test_indexing_mixed_frame_bug(self): idx = df['test'] == '_' temp = df.loc[idx, 'a'].apply(lambda x: '-----' if x == 'aaa' else x) df.loc[idx, 'test'] = temp - self.assertEqual(df.iloc[0, 2], '-----') + assert df.iloc[0, 2] == '-----' # if I look at df, then element [0,2] equals '_'. If instead I type # df.ix[idx,'test'], I get '-----', finally by typing df.iloc[0,2] I @@ -244,7 +244,7 @@ def test_multitype_list_index_access(self): with pytest.raises(KeyError): df[[22, 26, -8]] - self.assertEqual(df[21].shape[0], df.shape[0]) + assert df[21].shape[0] == df.shape[0] def test_set_index_nan(self): @@ -638,9 +638,9 @@ def test_float_index_non_scalar_assignment(self): def test_float_index_at_iat(self): s = pd.Series([1, 2, 3], index=[0.1, 0.2, 0.3]) for el, item in s.iteritems(): - self.assertEqual(s.at[el], item) + assert s.at[el] == item for i in range(len(s)): - self.assertEqual(s.iat[i], i + 1) + assert s.iat[i] == i + 1 def test_rhs_alignment(self): # GH8258, tests that both rows & columns are aligned to what is @@ -741,7 +741,7 @@ def test_indexing_dtypes_on_empty(self): with catch_warnings(record=True): df2 = df.ix[[], :] - self.assertEqual(df2.loc[:, 'a'].dtype, np.int64) + assert df2.loc[:, 'a'].dtype == np.int64 tm.assert_series_equal(df2.loc[:, 'a'], df2.iloc[:, 0]) with catch_warnings(record=True): tm.assert_series_equal(df2.loc[:, 'a'], df2.ix[:, 0]) @@ -791,13 +791,13 @@ def test_maybe_numeric_slice(self): df = pd.DataFrame({'A': [1, 2], 'B': ['c', 'd'], 'C': [True, False]}) result = _maybe_numeric_slice(df, slice_=None) expected = pd.IndexSlice[:, ['A']] - self.assertEqual(result, expected) + assert result == expected result = _maybe_numeric_slice(df, None, include_bool=True) expected = pd.IndexSlice[:, ['A', 'C']] result = _maybe_numeric_slice(df, [1]) expected = [1] - self.assertEqual(result, expected) + assert result == expected class TestSeriesNoneCoercion(tm.TestCase): diff --git a/pandas/tests/indexing/test_ix.py b/pandas/tests/indexing/test_ix.py index 433b44c952ca1..8290bc80edac1 100644 --- a/pandas/tests/indexing/test_ix.py +++ b/pandas/tests/indexing/test_ix.py @@ -82,7 +82,7 @@ def test_ix_loc_consistency(self): def compare(result, expected): if is_scalar(expected): - self.assertEqual(result, expected) + assert result == expected else: assert expected.equals(result) @@ -216,7 +216,7 @@ def test_ix_assign_column_mixed(self): indexer = i * 2 v = 1000 + i * 200 expected.loc[indexer, 'y'] = v - self.assertEqual(expected.loc[indexer, 'y'], v) + assert expected.loc[indexer, 'y'] == v df.loc[df.x % 2 == 0, 'y'] = df.loc[df.x % 2 == 0, 'y'] * 100 tm.assert_frame_equal(df, expected) @@ -252,21 +252,21 @@ def test_ix_get_set_consistency(self): index=['e', 7, 'f', 'g']) with catch_warnings(record=True): - self.assertEqual(df.ix['e', 8], 2) - self.assertEqual(df.loc['e', 8], 2) + assert df.ix['e', 8] == 2 + assert df.loc['e', 8] == 2 with catch_warnings(record=True): df.ix['e', 8] = 42 - self.assertEqual(df.ix['e', 8], 42) - self.assertEqual(df.loc['e', 8], 42) + assert df.ix['e', 8] == 42 + assert df.loc['e', 8] == 42 df.loc['e', 8] = 45 with catch_warnings(record=True): - self.assertEqual(df.ix['e', 8], 45) - self.assertEqual(df.loc['e', 8], 45) + assert df.ix['e', 8] == 45 + assert df.loc['e', 8] == 45 def test_ix_slicing_strings(self): - # GH3836 + # see gh-3836 data = {'Classification': ['SA EQUITY CFD', 'bbb', 'SA EQUITY', 'SA SSF', 'aaa'], 'Random': [1, 2, 3, 4, 5], diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index b430f458d48b5..410d01431ef5a 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -58,7 +58,7 @@ def test_loc_setitem_dups(self): indexer = tuple(['r', 'bar']) df = df_orig.copy() df.loc[indexer] *= 2.0 - self.assertEqual(df.loc[indexer], 2.0 * df_orig.loc[indexer]) + assert df.loc[indexer] == 2.0 * df_orig.loc[indexer] indexer = tuple(['t', ['bar', 'bar2']]) df = df_orig.copy() @@ -332,7 +332,7 @@ def test_loc_general(self): result = DataFrame({'a': [Timestamp('20130101')], 'b': [1]}).iloc[0] expected = Series([Timestamp('20130101'), 1], index=['a', 'b'], name=0) tm.assert_series_equal(result, expected) - self.assertEqual(result.dtype, object) + assert result.dtype == object def test_loc_setitem_consistency(self): # GH 6149 @@ -415,10 +415,10 @@ def test_loc_setitem_frame(self): df.loc['a', 'A'] = 1 result = df.loc['a', 'A'] - self.assertEqual(result, 1) + assert result == 1 result = df.iloc[0, 0] - self.assertEqual(result, 1) + assert result == 1 df.loc[:, 'B':'D'] = 0 expected = df.loc[:, 'B':'D'] @@ -608,14 +608,14 @@ def test_loc_name(self): df = DataFrame([[1, 1], [1, 1]]) df.index.name = 'index_name' result = df.iloc[[0, 1]].index.name - self.assertEqual(result, 'index_name') + assert result == 'index_name' with catch_warnings(record=True): result = df.ix[[0, 1]].index.name - self.assertEqual(result, 'index_name') + assert result == 'index_name' result = df.loc[[0, 1]].index.name - self.assertEqual(result, 'index_name') + assert result == 'index_name' def test_loc_empty_list_indexer_is_ok(self): from pandas.util.testing import makeCustomDataframe as mkdf diff --git a/pandas/tests/indexing/test_multiindex.py b/pandas/tests/indexing/test_multiindex.py index dbd0f5a9e6e1c..b8c34f9f28d83 100644 --- a/pandas/tests/indexing/test_multiindex.py +++ b/pandas/tests/indexing/test_multiindex.py @@ -30,7 +30,7 @@ def test_iloc_getitem_multiindex2(self): rs = df.iloc[2, 2] xp = df.values[2, 2] - self.assertEqual(rs, xp) + assert rs == xp # for multiple items # GH 5528 @@ -50,6 +50,9 @@ def test_setitem_multiindex(self): for index_fn in ('ix', 'loc'): + def assert_equal(a, b): + assert a == b + def check(target, indexers, value, compare_fn, expected=None): fn = getattr(target, index_fn) fn.__setitem__(indexers, value) @@ -66,28 +69,28 @@ def check(target, indexers, value, compare_fn, expected=None): 'X', 'd', 'profit'], index=index) check(target=df, indexers=((t, n), 'X'), value=0, - compare_fn=self.assertEqual) + compare_fn=assert_equal) df = DataFrame(-999, columns=['A', 'w', 'l', 'a', 'x', 'X', 'd', 'profit'], index=index) check(target=df, indexers=((t, n), 'X'), value=1, - compare_fn=self.assertEqual) + compare_fn=assert_equal) df = DataFrame(columns=['A', 'w', 'l', 'a', 'x', 'X', 'd', 'profit'], index=index) check(target=df, indexers=((t, n), 'X'), value=2, - compare_fn=self.assertEqual) + compare_fn=assert_equal) - # GH 7218, assinging with 0-dim arrays + # gh-7218: assigning with 0-dim arrays df = DataFrame(-999, columns=['A', 'w', 'l', 'a', 'x', 'X', 'd', 'profit'], index=index) check(target=df, indexers=((t, n), 'X'), value=np.array(3), - compare_fn=self.assertEqual, + compare_fn=assert_equal, expected=3, ) # GH5206 @@ -215,8 +218,8 @@ def test_iloc_getitem_multiindex(self): with catch_warnings(record=True): xp = mi_int.ix[4].ix[8] tm.assert_series_equal(rs, xp, check_names=False) - self.assertEqual(rs.name, (4, 8)) - self.assertEqual(xp.name, 8) + assert rs.name == (4, 8) + assert xp.name == 8 # 2nd (last) columns rs = mi_int.iloc[:, 2] @@ -228,13 +231,13 @@ def test_iloc_getitem_multiindex(self): rs = mi_int.iloc[2, 2] with catch_warnings(record=True): xp = mi_int.ix[:, 2].ix[2] - self.assertEqual(rs, xp) + assert rs == xp # this is basically regular indexing rs = mi_labels.iloc[2, 2] with catch_warnings(record=True): xp = mi_labels.ix['j'].ix[:, 'j'].ix[0, 0] - self.assertEqual(rs, xp) + assert rs == xp def test_loc_multiindex(self): @@ -572,7 +575,7 @@ def f(): ('functs', 'median')]), index=['function', 'name']) result = df.loc['function', ('functs', 'mean')] - self.assertEqual(result, np.mean) + assert result == np.mean def test_multiindex_assignment(self): @@ -798,9 +801,9 @@ def f(): tm.assert_frame_equal(result, expected) # not lexsorted - self.assertEqual(df.index.lexsort_depth, 2) + assert df.index.lexsort_depth == 2 df = df.sort_index(level=1, axis=0) - self.assertEqual(df.index.lexsort_depth, 0) + assert df.index.lexsort_depth == 0 with tm.assert_raises_regex( UnsortedIndexError, 'MultiIndex Slicing requires the index to be fully ' diff --git a/pandas/tests/indexing/test_panel.py b/pandas/tests/indexing/test_panel.py index 8aa35a163babc..b704e15b81502 100644 --- a/pandas/tests/indexing/test_panel.py +++ b/pandas/tests/indexing/test_panel.py @@ -27,7 +27,7 @@ def test_iloc_getitem_panel(self): result = p.iloc[1, 1, 1] expected = p.loc['B', 'b', 'two'] - self.assertEqual(result, expected) + assert result == expected # slice result = p.iloc[1:3] @@ -99,16 +99,16 @@ def f(): def test_iloc_panel_issue(self): with catch_warnings(record=True): - # GH 3617 + # see gh-3617 p = Panel(np.random.randn(4, 4, 4)) - self.assertEqual(p.iloc[:3, :3, :3].shape, (3, 3, 3)) - self.assertEqual(p.iloc[1, :3, :3].shape, (3, 3)) - self.assertEqual(p.iloc[:3, 1, :3].shape, (3, 3)) - self.assertEqual(p.iloc[:3, :3, 1].shape, (3, 3)) - self.assertEqual(p.iloc[1, 1, :3].shape, (3, )) - self.assertEqual(p.iloc[1, :3, 1].shape, (3, )) - self.assertEqual(p.iloc[:3, 1, 1].shape, (3, )) + assert p.iloc[:3, :3, :3].shape == (3, 3, 3) + assert p.iloc[1, :3, :3].shape == (3, 3) + assert p.iloc[:3, 1, :3].shape == (3, 3) + assert p.iloc[:3, :3, 1].shape == (3, 3) + assert p.iloc[1, 1, :3].shape == (3, ) + assert p.iloc[1, :3, 1].shape == (3, ) + assert p.iloc[:3, 1, 1].shape == (3, ) def test_panel_getitem(self): diff --git a/pandas/tests/indexing/test_partial.py b/pandas/tests/indexing/test_partial.py index 80d2d5729c610..20cec2a3aa7db 100644 --- a/pandas/tests/indexing/test_partial.py +++ b/pandas/tests/indexing/test_partial.py @@ -392,7 +392,7 @@ def f(): tm.assert_frame_equal(df, exp) tm.assert_index_equal(df.index, pd.Index(orig.index.tolist() + ['a'])) - self.assertEqual(df.index.dtype, 'object') + assert df.index.dtype == 'object' def test_partial_set_empty_series(self): diff --git a/pandas/tests/indexing/test_scalar.py b/pandas/tests/indexing/test_scalar.py index 70c7eaf7446db..fb40c539e16ba 100644 --- a/pandas/tests/indexing/test_scalar.py +++ b/pandas/tests/indexing/test_scalar.py @@ -77,7 +77,7 @@ def test_at_iat_coercion(self): result = s.at[dates[5]] xp = s.values[5] - self.assertEqual(result, xp) + assert result == xp # GH 7729 # make sure we are boxing the returns @@ -86,14 +86,14 @@ def test_at_iat_coercion(self): for r in [lambda: s.iat[1], lambda: s.iloc[1]]: result = r() - self.assertEqual(result, expected) + assert result == expected s = Series(['1 days', '2 days'], dtype='timedelta64[ns]') expected = Timedelta('2 days') for r in [lambda: s.iat[1], lambda: s.iloc[1]]: result = r() - self.assertEqual(result, expected) + assert result == expected def test_iat_invalid_args(self): pass @@ -105,9 +105,9 @@ def test_imethods_with_dups(self): s = Series(range(5), index=[1, 1, 2, 2, 3], dtype='int64') result = s.iloc[2] - self.assertEqual(result, 2) + assert result == 2 result = s.iat[2] - self.assertEqual(result, 2) + assert result == 2 pytest.raises(IndexError, lambda: s.iat[10]) pytest.raises(IndexError, lambda: s.iat[-10]) @@ -123,29 +123,29 @@ def test_imethods_with_dups(self): result = df.iat[2, 0] expected = 2 - self.assertEqual(result, 2) + assert result == 2 def test_at_to_fail(self): # at should not fallback # GH 7814 s = Series([1, 2, 3], index=list('abc')) result = s.at['a'] - self.assertEqual(result, 1) + assert result == 1 pytest.raises(ValueError, lambda: s.at[0]) df = DataFrame({'A': [1, 2, 3]}, index=list('abc')) result = df.at['a', 'A'] - self.assertEqual(result, 1) + assert result == 1 pytest.raises(ValueError, lambda: df.at['a', 0]) s = Series([1, 2, 3], index=[3, 2, 1]) result = s.at[1] - self.assertEqual(result, 3) + assert result == 3 pytest.raises(ValueError, lambda: s.at['a']) df = DataFrame({0: [1, 2, 3]}, index=[3, 2, 1]) result = df.at[1, 0] - self.assertEqual(result, 3) + assert result == 3 pytest.raises(ValueError, lambda: df.at['a', 0]) # GH 13822, incorrect error string with non-unique columns when missing diff --git a/pandas/tests/io/formats/test_eng_formatting.py b/pandas/tests/io/formats/test_eng_formatting.py index 41bb95964b4a2..e064d1200d672 100644 --- a/pandas/tests/io/formats/test_eng_formatting.py +++ b/pandas/tests/io/formats/test_eng_formatting.py @@ -18,7 +18,7 @@ def test_eng_float_formatter(self): '1 141.000E+00\n' '2 14.100E+03\n' '3 1.410E+06') - self.assertEqual(result, expected) + assert result == expected fmt.set_eng_float_format(use_eng_prefix=True) result = df.to_string() @@ -27,7 +27,7 @@ def test_eng_float_formatter(self): '1 141.000\n' '2 14.100k\n' '3 1.410M') - self.assertEqual(result, expected) + assert result == expected fmt.set_eng_float_format(accuracy=0) result = df.to_string() @@ -36,15 +36,13 @@ def test_eng_float_formatter(self): '1 141E+00\n' '2 14E+03\n' '3 1E+06') - self.assertEqual(result, expected) + assert result == expected tm.reset_display_options() def compare(self, formatter, input, output): formatted_input = formatter(input) - msg = ("formatting of %s results in '%s', expected '%s'" % - (str(input), formatted_input, output)) - self.assertEqual(formatted_input, output, msg) + assert formatted_input == output def compare_all(self, formatter, in_out): """ @@ -169,14 +167,14 @@ def test_rounding(self): formatter = fmt.EngFormatter(accuracy=3, use_eng_prefix=True) result = formatter(0) - self.assertEqual(result, u(' 0.000')) + assert result == u(' 0.000') def test_nan(self): # Issue #11981 formatter = fmt.EngFormatter(accuracy=1, use_eng_prefix=True) result = formatter(np.nan) - self.assertEqual(result, u('NaN')) + assert result == u('NaN') df = pd.DataFrame({'a': [1.5, 10.3, 20.5], 'b': [50.3, 60.67, 70.12], @@ -192,4 +190,4 @@ def test_inf(self): formatter = fmt.EngFormatter(accuracy=1, use_eng_prefix=True) result = formatter(np.inf) - self.assertEqual(result, u('inf')) + assert result == u('inf') diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index 6f19a4a126118..dee645e9d70ec 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -196,16 +196,16 @@ def test_repr_truncation(self): def test_repr_chop_threshold(self): df = DataFrame([[0.1, 0.5], [0.5, -0.1]]) pd.reset_option("display.chop_threshold") # default None - self.assertEqual(repr(df), ' 0 1\n0 0.1 0.5\n1 0.5 -0.1') + assert repr(df) == ' 0 1\n0 0.1 0.5\n1 0.5 -0.1' with option_context("display.chop_threshold", 0.2): - self.assertEqual(repr(df), ' 0 1\n0 0.0 0.5\n1 0.5 0.0') + assert repr(df) == ' 0 1\n0 0.0 0.5\n1 0.5 0.0' with option_context("display.chop_threshold", 0.6): - self.assertEqual(repr(df), ' 0 1\n0 0.0 0.0\n1 0.0 0.0') + assert repr(df) == ' 0 1\n0 0.0 0.0\n1 0.0 0.0' with option_context("display.chop_threshold", None): - self.assertEqual(repr(df), ' 0 1\n0 0.1 0.5\n1 0.5 -0.1') + assert repr(df) == ' 0 1\n0 0.1 0.5\n1 0.5 -0.1' def test_repr_obeys_max_seq_limit(self): with option_context("display.max_seq_items", 2000): @@ -215,7 +215,7 @@ def test_repr_obeys_max_seq_limit(self): assert len(printing.pprint_thing(lrange(1000))) < 100 def test_repr_set(self): - self.assertEqual(printing.pprint_thing(set([1])), '{1}') + assert printing.pprint_thing(set([1])) == '{1}' def test_repr_is_valid_construction_code(self): # for the case of Index, where the repr is traditional rather then @@ -389,7 +389,7 @@ def test_to_string_repr_unicode(self): except: pass if not line.startswith('dtype:'): - self.assertEqual(len(line), line_len) + assert len(line) == line_len # it works even if sys.stdin in None _stdin = sys.stdin @@ -441,11 +441,11 @@ def test_to_string_with_formatters(self): ('object', lambda x: '-%s-' % str(x))] result = df.to_string(formatters=dict(formatters)) result2 = df.to_string(formatters=lzip(*formatters)[1]) - self.assertEqual(result, (' int float object\n' - '0 0x1 [ 1.0] -(1, 2)-\n' - '1 0x2 [ 2.0] -True-\n' - '2 0x3 [ 3.0] -False-')) - self.assertEqual(result, result2) + assert result == (' int float object\n' + '0 0x1 [ 1.0] -(1, 2)-\n' + '1 0x2 [ 2.0] -True-\n' + '2 0x3 [ 3.0] -False-') + assert result == result2 def test_to_string_with_datetime64_monthformatter(self): months = [datetime(2016, 1, 1), datetime(2016, 2, 2)] @@ -455,7 +455,7 @@ def format_func(x): return x.strftime('%Y-%m') result = x.to_string(formatters={'months': format_func}) expected = 'months\n0 2016-01\n1 2016-02' - self.assertEqual(result.strip(), expected) + assert result.strip() == expected def test_to_string_with_datetime64_hourformatter(self): @@ -467,12 +467,12 @@ def format_func(x): result = x.to_string(formatters={'hod': format_func}) expected = 'hod\n0 10:10\n1 12:12' - self.assertEqual(result.strip(), expected) + assert result.strip() == expected def test_to_string_with_formatters_unicode(self): df = DataFrame({u('c/\u03c3'): [1, 2, 3]}) result = df.to_string(formatters={u('c/\u03c3'): lambda x: '%s' % x}) - self.assertEqual(result, u(' c/\u03c3\n') + '0 1\n1 2\n2 3') + assert result == u(' c/\u03c3\n') + '0 1\n1 2\n2 3' def test_east_asian_unicode_frame(self): if PY3: @@ -489,7 +489,7 @@ def test_east_asian_unicode_frame(self): expected = (u" a b\na あ 1\n" u"bb いいい 222\nc う 33333\n" u"ddd ええええええ 4") - self.assertEqual(_rep(df), expected) + assert _rep(df) == expected # last col df = DataFrame({'a': [1, 222, 33333, 4], @@ -498,7 +498,7 @@ def test_east_asian_unicode_frame(self): expected = (u" a b\na 1 あ\n" u"bb 222 いいい\nc 33333 う\n" u"ddd 4 ええええええ") - self.assertEqual(_rep(df), expected) + assert _rep(df) == expected # all col df = DataFrame({'a': [u'あああああ', u'い', u'う', u'えええ'], @@ -507,7 +507,7 @@ def test_east_asian_unicode_frame(self): expected = (u" a b\na あああああ あ\n" u"bb い いいい\nc う う\n" u"ddd えええ ええええええ") - self.assertEqual(_rep(df), expected) + assert _rep(df) == expected # column name df = DataFrame({u'あああああ': [1, 222, 33333, 4], @@ -516,7 +516,7 @@ def test_east_asian_unicode_frame(self): expected = (u" b あああああ\na あ 1\n" u"bb いいい 222\nc う 33333\n" u"ddd ええええええ 4") - self.assertEqual(_rep(df), expected) + assert _rep(df) == expected # index df = DataFrame({'a': [u'あああああ', u'い', u'う', u'えええ'], @@ -525,7 +525,7 @@ def test_east_asian_unicode_frame(self): expected = (u" a b\nあああ あああああ あ\n" u"いいいいいい い いいい\nうう う う\n" u"え えええ ええええええ") - self.assertEqual(_rep(df), expected) + assert _rep(df) == expected # index name df = DataFrame({'a': [u'あああああ', u'い', u'う', u'えええ'], @@ -538,7 +538,7 @@ def test_east_asian_unicode_frame(self): u"い い いいい\n" u"うう う う\n" u"え えええ ええええええ") - self.assertEqual(_rep(df), expected) + assert _rep(df) == expected # all df = DataFrame({u'あああ': [u'あああ', u'い', u'う', u'えええええ'], @@ -551,7 +551,7 @@ def test_east_asian_unicode_frame(self): u"いいい い いいい\n" u"うう う う\n" u"え えええええ ええ") - self.assertEqual(_rep(df), expected) + assert _rep(df) == expected # MultiIndex idx = pd.MultiIndex.from_tuples([(u'あ', u'いい'), (u'う', u'え'), ( @@ -564,7 +564,7 @@ def test_east_asian_unicode_frame(self): u"う え い いいい\n" u"おおお かかかか う う\n" u"き くく えええ ええええええ") - self.assertEqual(_rep(df), expected) + assert _rep(df) == expected # truncate with option_context('display.max_rows', 3, 'display.max_columns', 3): @@ -577,13 +577,13 @@ def test_east_asian_unicode_frame(self): expected = (u" a ... ああああ\n0 あああああ ... さ\n" u".. ... ... ...\n3 えええ ... せ\n" u"\n[4 rows x 4 columns]") - self.assertEqual(_rep(df), expected) + assert _rep(df) == expected df.index = [u'あああ', u'いいいい', u'う', 'aaa'] expected = (u" a ... ああああ\nあああ あああああ ... さ\n" u".. ... ... ...\naaa えええ ... せ\n" u"\n[4 rows x 4 columns]") - self.assertEqual(_rep(df), expected) + assert _rep(df) == expected # Emable Unicode option ----------------------------------------- with option_context('display.unicode.east_asian_width', True): @@ -595,7 +595,7 @@ def test_east_asian_unicode_frame(self): expected = (u" a b\na あ 1\n" u"bb いいい 222\nc う 33333\n" u"ddd ええええええ 4") - self.assertEqual(_rep(df), expected) + assert _rep(df) == expected # last col df = DataFrame({'a': [1, 222, 33333, 4], @@ -604,7 +604,7 @@ def test_east_asian_unicode_frame(self): expected = (u" a b\na 1 あ\n" u"bb 222 いいい\nc 33333 う\n" u"ddd 4 ええええええ") - self.assertEqual(_rep(df), expected) + assert _rep(df) == expected # all col df = DataFrame({'a': [u'あああああ', u'い', u'う', u'えええ'], @@ -615,7 +615,7 @@ def test_east_asian_unicode_frame(self): u"bb い いいい\n" u"c う う\n" u"ddd えええ ええええええ") - self.assertEqual(_rep(df), expected) + assert _rep(df) == expected # column name df = DataFrame({u'あああああ': [1, 222, 33333, 4], @@ -626,7 +626,7 @@ def test_east_asian_unicode_frame(self): u"bb いいい 222\n" u"c う 33333\n" u"ddd ええええええ 4") - self.assertEqual(_rep(df), expected) + assert _rep(df) == expected # index df = DataFrame({'a': [u'あああああ', u'い', u'う', u'えええ'], @@ -637,7 +637,7 @@ def test_east_asian_unicode_frame(self): u"いいいいいい い いいい\n" u"うう う う\n" u"え えええ ええええええ") - self.assertEqual(_rep(df), expected) + assert _rep(df) == expected # index name df = DataFrame({'a': [u'あああああ', u'い', u'う', u'えええ'], @@ -650,7 +650,7 @@ def test_east_asian_unicode_frame(self): u"い い いいい\n" u"うう う う\n" u"え えええ ええええええ") - self.assertEqual(_rep(df), expected) + assert _rep(df) == expected # all df = DataFrame({u'あああ': [u'あああ', u'い', u'う', u'えええええ'], @@ -663,7 +663,7 @@ def test_east_asian_unicode_frame(self): u"いいい い いいい\n" u"うう う う\n" u"え えええええ ええ") - self.assertEqual(_rep(df), expected) + assert _rep(df) == expected # MultiIndex idx = pd.MultiIndex.from_tuples([(u'あ', u'いい'), (u'う', u'え'), ( @@ -676,7 +676,7 @@ def test_east_asian_unicode_frame(self): u"う え い いいい\n" u"おおお かかかか う う\n" u"き くく えええ ええええええ") - self.assertEqual(_rep(df), expected) + assert _rep(df) == expected # truncate with option_context('display.max_rows', 3, 'display.max_columns', @@ -693,7 +693,7 @@ def test_east_asian_unicode_frame(self): u".. ... ... ...\n" u"3 えええ ... せ\n" u"\n[4 rows x 4 columns]") - self.assertEqual(_rep(df), expected) + assert _rep(df) == expected df.index = [u'あああ', u'いいいい', u'う', 'aaa'] expected = (u" a ... ああああ\n" @@ -701,7 +701,7 @@ def test_east_asian_unicode_frame(self): u"... ... ... ...\n" u"aaa えええ ... せ\n" u"\n[4 rows x 4 columns]") - self.assertEqual(_rep(df), expected) + assert _rep(df) == expected # ambiguous unicode df = DataFrame({u'あああああ': [1, 222, 33333, 4], @@ -712,7 +712,7 @@ def test_east_asian_unicode_frame(self): u"bb いいい 222\n" u"c ¡¡ 33333\n" u"¡¡¡ ええええええ 4") - self.assertEqual(_rep(df), expected) + assert _rep(df) == expected def test_to_string_buffer_all_unicode(self): buf = StringIO() @@ -738,7 +738,7 @@ def test_to_string_with_col_space(self): with_header = df.to_string(col_space=20) with_header_row1 = with_header.splitlines()[1] no_header = df.to_string(col_space=20, header=False) - self.assertEqual(len(with_header_row1), len(no_header)) + assert len(with_header_row1) == len(no_header) def test_to_string_truncate_indices(self): for index in [tm.makeStringIndex, tm.makeUnicodeIndex, tm.makeIntIndex, @@ -825,7 +825,7 @@ def test_datetimelike_frame(self): '8 NaT 9\n' '9 NaT 10\n\n' '[10 rows x 2 columns]') - self.assertEqual(repr(df), expected) + assert repr(df) == expected dts = [pd.NaT] * 5 + [pd.Timestamp('2011-01-01', tz='US/Eastern')] * 5 df = pd.DataFrame({"dt": dts, @@ -838,7 +838,7 @@ def test_datetimelike_frame(self): '8 2011-01-01 00:00:00-05:00 9\n' '9 2011-01-01 00:00:00-05:00 10\n\n' '[10 rows x 2 columns]') - self.assertEqual(repr(df), expected) + assert repr(df) == expected dts = ([pd.Timestamp('2011-01-01', tz='Asia/Tokyo')] * 5 + [pd.Timestamp('2011-01-01', tz='US/Eastern')] * 5) @@ -852,13 +852,13 @@ def test_datetimelike_frame(self): '8 2011-01-01 00:00:00-05:00 9\n' '9 2011-01-01 00:00:00-05:00 10\n\n' '[10 rows x 2 columns]') - self.assertEqual(repr(df), expected) + assert repr(df) == expected def test_nonunicode_nonascii_alignment(self): df = DataFrame([["aa\xc3\xa4\xc3\xa4", 1], ["bbbb", 2]]) rep_str = df.to_string() lines = rep_str.split('\n') - self.assertEqual(len(lines[1]), len(lines[2])) + assert len(lines[1]) == len(lines[2]) def test_unicode_problem_decoding_as_ascii(self): dm = DataFrame({u('c/\u03c3'): Series({'test': np.nan})}) @@ -890,25 +890,21 @@ def test_pprint_thing(self): if PY3: pytest.skip("doesn't work on Python 3") - self.assertEqual(pp_t('a'), u('a')) - self.assertEqual(pp_t(u('a')), u('a')) - self.assertEqual(pp_t(None), 'None') - self.assertEqual(pp_t(u('\u05d0'), quote_strings=True), u("u'\u05d0'")) - self.assertEqual(pp_t(u('\u05d0'), quote_strings=False), u('\u05d0')) - self.assertEqual(pp_t((u('\u05d0'), - u('\u05d1')), quote_strings=True), - u("(u'\u05d0', u'\u05d1')")) - self.assertEqual(pp_t((u('\u05d0'), (u('\u05d1'), - u('\u05d2'))), - quote_strings=True), - u("(u'\u05d0', (u'\u05d1', u'\u05d2'))")) - self.assertEqual(pp_t(('foo', u('\u05d0'), (u('\u05d0'), - u('\u05d0'))), - quote_strings=True), - u("(u'foo', u'\u05d0', (u'\u05d0', u'\u05d0'))")) - - # escape embedded tabs in string - # GH #2038 + assert pp_t('a') == u('a') + assert pp_t(u('a')) == u('a') + assert pp_t(None) == 'None' + assert pp_t(u('\u05d0'), quote_strings=True) == u("u'\u05d0'") + assert pp_t(u('\u05d0'), quote_strings=False) == u('\u05d0') + assert (pp_t((u('\u05d0'), u('\u05d1')), quote_strings=True) == + u("(u'\u05d0', u'\u05d1')")) + assert (pp_t((u('\u05d0'), (u('\u05d1'), u('\u05d2'))), + quote_strings=True) == u("(u'\u05d0', " + "(u'\u05d1', u'\u05d2'))")) + assert (pp_t(('foo', u('\u05d0'), (u('\u05d0'), u('\u05d0'))), + quote_strings=True) == u("(u'foo', u'\u05d0', " + "(u'\u05d0', u'\u05d0'))")) + + # gh-2038: escape embedded tabs in string assert "\t" not in pp_t("a\tb", escape_chars=("\t", )) def test_wide_repr(self): @@ -936,7 +932,7 @@ def test_wide_repr_wide_columns(self): columns=['a' * 90, 'b' * 90, 'c' * 90]) rep_str = repr(df) - self.assertEqual(len(rep_str.splitlines()), 20) + assert len(rep_str.splitlines()) == 20 def test_wide_repr_named(self): with option_context('mode.sim_interactive', True): @@ -1036,7 +1032,7 @@ def test_long_series(self): import re str_rep = str(s) nmatches = len(re.findall('dtype', str_rep)) - self.assertEqual(nmatches, 1) + assert nmatches == 1 def test_index_with_nan(self): # GH 2850 @@ -1055,7 +1051,7 @@ def test_index_with_nan(self): expected = u( ' value\nid1 id2 id3 \n' '1a3 NaN 78d 123\n9h4 d67 79d 64') - self.assertEqual(result, expected) + assert result == expected # index y = df.set_index('id2') @@ -1063,7 +1059,7 @@ def test_index_with_nan(self): expected = u( ' id1 id3 value\nid2 \n' 'NaN 1a3 78d 123\nd67 9h4 79d 64') - self.assertEqual(result, expected) + assert result == expected # with append (this failed in 0.12) y = df.set_index(['id1', 'id2']).set_index('id3', append=True) @@ -1071,7 +1067,7 @@ def test_index_with_nan(self): expected = u( ' value\nid1 id2 id3 \n' '1a3 NaN 78d 123\n9h4 d67 79d 64') - self.assertEqual(result, expected) + assert result == expected # all-nan in mi df2 = df.copy() @@ -1081,7 +1077,7 @@ def test_index_with_nan(self): expected = u( ' id1 id3 value\nid2 \n' 'NaN 1a3 78d 123\nNaN 9h4 79d 64') - self.assertEqual(result, expected) + assert result == expected # partial nan in mi df2 = df.copy() @@ -1091,7 +1087,7 @@ def test_index_with_nan(self): expected = u( ' id1 value\nid2 id3 \n' 'NaN 78d 1a3 123\n 79d 9h4 64') - self.assertEqual(result, expected) + assert result == expected df = DataFrame({'id1': {0: np.nan, 1: '9h4'}, @@ -1107,7 +1103,7 @@ def test_index_with_nan(self): expected = u( ' value\nid1 id2 id3 \n' 'NaN NaN NaN 123\n9h4 d67 79d 64') - self.assertEqual(result, expected) + assert result == expected def test_to_string(self): @@ -1123,7 +1119,7 @@ def test_to_string(self): buf = StringIO() retval = biggie.to_string(buf=buf) assert retval is None - self.assertEqual(buf.getvalue(), s) + assert buf.getvalue() == s assert isinstance(s, compat.string_types) @@ -1136,17 +1132,17 @@ def test_to_string(self): recons = read_table(StringIO(joined), names=header, header=None, sep=' ') tm.assert_series_equal(recons['B'], biggie['B']) - self.assertEqual(recons['A'].count(), biggie['A'].count()) + assert recons['A'].count() == biggie['A'].count() assert (np.abs(recons['A'].dropna() - biggie['A'].dropna()) < 0.1).all() # expected = ['B', 'A'] - # self.assertEqual(header, expected) + # assert header == expected result = biggie.to_string(columns=['A'], col_space=17) header = result.split('\n')[0].strip().split() expected = ['A'] - self.assertEqual(header, expected) + assert header == expected biggie.to_string(columns=['B', 'A'], formatters={'A': lambda x: '%.1f' % x}) @@ -1163,7 +1159,7 @@ def test_to_string_no_header(self): df_s = df.to_string(header=False) expected = "0 1 4\n1 2 5\n2 3 6" - self.assertEqual(df_s, expected) + assert df_s == expected def test_to_string_specified_header(self): df = DataFrame({'x': [1, 2, 3], 'y': [4, 5, 6]}) @@ -1171,7 +1167,7 @@ def test_to_string_specified_header(self): df_s = df.to_string(header=['X', 'Y']) expected = ' X Y\n0 1 4\n1 2 5\n2 3 6' - self.assertEqual(df_s, expected) + assert df_s == expected with pytest.raises(ValueError): df.to_string(header=['X']) @@ -1182,7 +1178,7 @@ def test_to_string_no_index(self): df_s = df.to_string(index=False) expected = "x y\n1 4\n2 5\n3 6" - self.assertEqual(df_s, expected) + assert df_s == expected def test_to_string_line_width_no_index(self): df = DataFrame({'x': [1, 2, 3], 'y': [4, 5, 6]}) @@ -1190,7 +1186,7 @@ def test_to_string_line_width_no_index(self): df_s = df.to_string(line_width=1, index=False) expected = "x \\\n1 \n2 \n3 \n\ny \n4 \n5 \n6" - self.assertEqual(df_s, expected) + assert df_s == expected def test_to_string_float_formatting(self): tm.reset_display_options() @@ -1214,16 +1210,16 @@ def test_to_string_float_formatting(self): '2 3.45600e+03\n3 1.20000e+46\n4 1.64000e+06\n' '5 1.70000e+08\n6 1.25346e+00\n7 3.14159e+00\n' '8 -1.00000e+06') - self.assertEqual(df_s, expected) + assert df_s == expected df = DataFrame({'x': [3234, 0.253]}) df_s = df.to_string() expected = (' x\n' '0 3234.000\n' '1 0.253') - self.assertEqual(df_s, expected) + assert df_s == expected tm.reset_display_options() - self.assertEqual(get_option("display.precision"), 6) + assert get_option("display.precision") == 6 df = DataFrame({'x': [1e9, 0.2512]}) df_s = df.to_string() @@ -1237,7 +1233,7 @@ def test_to_string_float_formatting(self): expected = (' x\n' '0 1.000000e+09\n' '1 2.512000e-01') - self.assertEqual(df_s, expected) + assert df_s == expected def test_to_string_small_float_values(self): df = DataFrame({'a': [1.5, 1e-17, -5.5e-7]}) @@ -1254,7 +1250,7 @@ def test_to_string_small_float_values(self): '0 1.500000e+00\n' '1 1.000000e-17\n' '2 -5.500000e-07') - self.assertEqual(result, expected) + assert result == expected # but not all exactly zero df = df * 0 @@ -1272,7 +1268,7 @@ def test_to_string_float_index(self): '3.0 2\n' '4.0 3\n' '5.0 4') - self.assertEqual(result, expected) + assert result == expected def test_to_string_ascii_error(self): data = [('0 ', u(' .gitignore '), u(' 5 '), @@ -1289,7 +1285,7 @@ def test_to_string_int_formatting(self): output = df.to_string() expected = (' x\n' '0 -15\n' '1 20\n' '2 25\n' '3 -35') - self.assertEqual(output, expected) + assert output == expected def test_to_string_index_formatter(self): df = DataFrame([lrange(5), lrange(5, 10), lrange(10, 15)]) @@ -1303,14 +1299,14 @@ def test_to_string_index_formatter(self): c 10 11 12 13 14\ """ - self.assertEqual(rs, xp) + assert rs == xp def test_to_string_left_justify_cols(self): tm.reset_display_options() df = DataFrame({'x': [3234, 0.253]}) df_s = df.to_string(justify='left') expected = (' x \n' '0 3234.000\n' '1 0.253') - self.assertEqual(df_s, expected) + assert df_s == expected def test_to_string_format_na(self): tm.reset_display_options() @@ -1324,7 +1320,7 @@ def test_to_string_format_na(self): '2 -2.1234 foooo\n' '3 3.0000 fooooo\n' '4 4.0000 bar') - self.assertEqual(result, expected) + assert result == expected df = DataFrame({'A': [np.nan, -1., -2., 3., 4.], 'B': [np.nan, 'foo', 'foooo', 'fooooo', 'bar']}) @@ -1336,12 +1332,12 @@ def test_to_string_format_na(self): '2 -2.0 foooo\n' '3 3.0 fooooo\n' '4 4.0 bar') - self.assertEqual(result, expected) + assert result == expected def test_to_string_line_width(self): df = DataFrame(123, lrange(10, 15), lrange(30)) s = df.to_string(line_width=80) - self.assertEqual(max(len(l) for l in s.split('\n')), 80) + assert max(len(l) for l in s.split('\n')) == 80 def test_show_dimensions(self): df = DataFrame(123, lrange(10, 15), lrange(30)) @@ -1596,7 +1592,7 @@ def test_period(self): exp = (" A B C\n0 2013-01 2011-01 a\n" "1 2013-02 2011-02-01 b\n2 2013-03 2011-03-01 09:00 c\n" "3 2013-04 2011-04 d") - self.assertEqual(str(df), exp) + assert str(df) == exp def gen_series_formatting(): @@ -1628,30 +1624,29 @@ def test_to_string(self): retval = self.ts.to_string(buf=buf) assert retval is None - self.assertEqual(buf.getvalue().strip(), s) + assert buf.getvalue().strip() == s # pass float_format format = '%.4f'.__mod__ result = self.ts.to_string(float_format=format) result = [x.split()[1] for x in result.split('\n')[:-1]] expected = [format(x) for x in self.ts] - self.assertEqual(result, expected) + assert result == expected # empty string result = self.ts[:0].to_string() - self.assertEqual(result, 'Series([], Freq: B)') + assert result == 'Series([], Freq: B)' result = self.ts[:0].to_string(length=0) - self.assertEqual(result, 'Series([], Freq: B)') + assert result == 'Series([], Freq: B)' # name and length cp = self.ts.copy() cp.name = 'foo' result = cp.to_string(length=True, name=True, dtype=True) last_line = result.split('\n')[-1].strip() - self.assertEqual(last_line, - "Freq: B, Name: foo, Length: %d, dtype: float64" % - len(cp)) + assert last_line == ("Freq: B, Name: foo, " + "Length: %d, dtype: float64" % len(cp)) def test_freq_name_separation(self): s = Series(np.random.randn(10), @@ -1665,18 +1660,18 @@ def test_to_string_mixed(self): result = s.to_string() expected = (u('0 foo\n') + u('1 NaN\n') + u('2 -1.23\n') + u('3 4.56')) - self.assertEqual(result, expected) + assert result == expected # but don't count NAs as floats s = Series(['foo', np.nan, 'bar', 'baz']) result = s.to_string() expected = (u('0 foo\n') + '1 NaN\n' + '2 bar\n' + '3 baz') - self.assertEqual(result, expected) + assert result == expected s = Series(['foo', 5, 'bar', 'baz']) result = s.to_string() expected = (u('0 foo\n') + '1 5\n' + '2 bar\n' + '3 baz') - self.assertEqual(result, expected) + assert result == expected def test_to_string_float_na_spacing(self): s = Series([0., 1.5678, 2., -3., 4.]) @@ -1685,14 +1680,14 @@ def test_to_string_float_na_spacing(self): result = s.to_string() expected = (u('0 NaN\n') + '1 1.5678\n' + '2 NaN\n' + '3 -3.0000\n' + '4 NaN') - self.assertEqual(result, expected) + assert result == expected def test_to_string_without_index(self): # GH 11729 Test index=False option s = Series([1, 2, 3, 4]) result = s.to_string(index=False) expected = (u('1\n') + '2\n' + '3\n' + '4') - self.assertEqual(result, expected) + assert result == expected def test_unicode_name_in_footer(self): s = Series([1, 2], name=u('\u05e2\u05d1\u05e8\u05d9\u05ea')) @@ -1711,21 +1706,21 @@ def test_east_asian_unicode_series(self): index=[u'あ', u'いい', u'ううう', u'ええええ']) expected = (u"あ a\nいい bb\nううう CCC\n" u"ええええ D\ndtype: object") - self.assertEqual(_rep(s), expected) + assert _rep(s) == expected # unicode values s = Series([u'あ', u'いい', u'ううう', u'ええええ'], index=['a', 'bb', 'c', 'ddd']) expected = (u"a あ\nbb いい\nc ううう\n" u"ddd ええええ\ndtype: object") - self.assertEqual(_rep(s), expected) + assert _rep(s) == expected # both s = Series([u'あ', u'いい', u'ううう', u'ええええ'], index=[u'ああ', u'いいいい', u'う', u'えええ']) expected = (u"ああ あ\nいいいい いい\nう ううう\n" u"えええ ええええ\ndtype: object") - self.assertEqual(_rep(s), expected) + assert _rep(s) == expected # unicode footer s = Series([u'あ', u'いい', u'ううう', u'ええええ'], @@ -1733,7 +1728,7 @@ def test_east_asian_unicode_series(self): name=u'おおおおおおお') expected = (u"ああ あ\nいいいい いい\nう ううう\n" u"えええ ええええ\nName: おおおおおおお, dtype: object") - self.assertEqual(_rep(s), expected) + assert _rep(s) == expected # MultiIndex idx = pd.MultiIndex.from_tuples([(u'あ', u'いい'), (u'う', u'え'), ( @@ -1743,13 +1738,13 @@ def test_east_asian_unicode_series(self): u"う え 22\n" u"おおお かかかか 3333\n" u"き くく 44444\ndtype: int64") - self.assertEqual(_rep(s), expected) + assert _rep(s) == expected # object dtype, shorter than unicode repr s = Series([1, 22, 3333, 44444], index=[1, 'AB', np.nan, u'あああ']) expected = (u"1 1\nAB 22\nNaN 3333\n" u"あああ 44444\ndtype: int64") - self.assertEqual(_rep(s), expected) + assert _rep(s) == expected # object dtype, longer than unicode repr s = Series([1, 22, 3333, 44444], @@ -1758,7 +1753,7 @@ def test_east_asian_unicode_series(self): u"AB 22\n" u"2011-01-01 00:00:00 3333\n" u"あああ 44444\ndtype: int64") - self.assertEqual(_rep(s), expected) + assert _rep(s) == expected # truncate with option_context('display.max_rows', 3): @@ -1768,13 +1763,13 @@ def test_east_asian_unicode_series(self): expected = (u"0 あ\n ... \n" u"3 ええええ\n" u"Name: おおおおおおお, Length: 4, dtype: object") - self.assertEqual(_rep(s), expected) + assert _rep(s) == expected s.index = [u'ああ', u'いいいい', u'う', u'えええ'] expected = (u"ああ あ\n ... \n" u"えええ ええええ\n" u"Name: おおおおおおお, Length: 4, dtype: object") - self.assertEqual(_rep(s), expected) + assert _rep(s) == expected # Emable Unicode option ----------------------------------------- with option_context('display.unicode.east_asian_width', True): @@ -1784,14 +1779,14 @@ def test_east_asian_unicode_series(self): index=[u'あ', u'いい', u'ううう', u'ええええ']) expected = (u"あ a\nいい bb\nううう CCC\n" u"ええええ D\ndtype: object") - self.assertEqual(_rep(s), expected) + assert _rep(s) == expected # unicode values s = Series([u'あ', u'いい', u'ううう', u'ええええ'], index=['a', 'bb', 'c', 'ddd']) expected = (u"a あ\nbb いい\nc ううう\n" u"ddd ええええ\ndtype: object") - self.assertEqual(_rep(s), expected) + assert _rep(s) == expected # both s = Series([u'あ', u'いい', u'ううう', u'ええええ'], @@ -1800,7 +1795,7 @@ def test_east_asian_unicode_series(self): u"いいいい いい\n" u"う ううう\n" u"えええ ええええ\ndtype: object") - self.assertEqual(_rep(s), expected) + assert _rep(s) == expected # unicode footer s = Series([u'あ', u'いい', u'ううう', u'ええええ'], @@ -1811,7 +1806,7 @@ def test_east_asian_unicode_series(self): u"う ううう\n" u"えええ ええええ\n" u"Name: おおおおおおお, dtype: object") - self.assertEqual(_rep(s), expected) + assert _rep(s) == expected # MultiIndex idx = pd.MultiIndex.from_tuples([(u'あ', u'いい'), (u'う', u'え'), ( @@ -1822,13 +1817,13 @@ def test_east_asian_unicode_series(self): u"おおお かかかか 3333\n" u"き くく 44444\n" u"dtype: int64") - self.assertEqual(_rep(s), expected) + assert _rep(s) == expected # object dtype, shorter than unicode repr s = Series([1, 22, 3333, 44444], index=[1, 'AB', np.nan, u'あああ']) expected = (u"1 1\nAB 22\nNaN 3333\n" u"あああ 44444\ndtype: int64") - self.assertEqual(_rep(s), expected) + assert _rep(s) == expected # object dtype, longer than unicode repr s = Series([1, 22, 3333, 44444], @@ -1837,7 +1832,7 @@ def test_east_asian_unicode_series(self): u"AB 22\n" u"2011-01-01 00:00:00 3333\n" u"あああ 44444\ndtype: int64") - self.assertEqual(_rep(s), expected) + assert _rep(s) == expected # truncate with option_context('display.max_rows', 3): @@ -1846,14 +1841,14 @@ def test_east_asian_unicode_series(self): expected = (u"0 あ\n ... \n" u"3 ええええ\n" u"Name: おおおおおおお, Length: 4, dtype: object") - self.assertEqual(_rep(s), expected) + assert _rep(s) == expected s.index = [u'ああ', u'いいいい', u'う', u'えええ'] expected = (u"ああ あ\n" u" ... \n" u"えええ ええええ\n" u"Name: おおおおおおお, Length: 4, dtype: object") - self.assertEqual(_rep(s), expected) + assert _rep(s) == expected # ambiguous unicode s = Series([u'¡¡', u'い¡¡', u'ううう', u'ええええ'], @@ -1862,7 +1857,7 @@ def test_east_asian_unicode_series(self): u"¡¡¡¡いい い¡¡\n" u"¡¡ ううう\n" u"えええ ええええ\ndtype: object") - self.assertEqual(_rep(s), expected) + assert _rep(s) == expected def test_float_trim_zeros(self): vals = [2.08430917305e+10, 3.52205017305e+10, 2.30674817305e+10, @@ -1950,7 +1945,7 @@ def test_timedelta64(self): # no boxing of the actual elements td = Series(pd.timedelta_range('1 days', periods=3)) result = td.to_string() - self.assertEqual(result, u("0 1 days\n1 2 days\n2 3 days")) + assert result == u("0 1 days\n1 2 days\n2 3 days") def test_mixed_datetime64(self): df = DataFrame({'A': [1, 2], 'B': ['2012-01-01', '2012-01-02']}) @@ -1965,12 +1960,12 @@ def test_period(self): s = Series(np.arange(6, dtype='int64'), index=index) exp = ("2013-01 0\n2013-02 1\n2013-03 2\n2013-04 3\n" "2013-05 4\n2013-06 5\nFreq: M, dtype: int64") - self.assertEqual(str(s), exp) + assert str(s) == exp s = Series(index) exp = ("0 2013-01\n1 2013-02\n2 2013-03\n3 2013-04\n" "4 2013-05\n5 2013-06\ndtype: object") - self.assertEqual(str(s), exp) + assert str(s) == exp # periods with mixed freq s = Series([pd.Period('2011-01', freq='M'), @@ -1978,7 +1973,7 @@ def test_period(self): pd.Period('2011-03-01 09:00', freq='H')]) exp = ("0 2011-01\n1 2011-02-01\n" "2 2011-03-01 09:00\ndtype: object") - self.assertEqual(str(s), exp) + assert str(s) == exp def test_max_multi_index_display(self): # GH 7101 @@ -1993,29 +1988,29 @@ def test_max_multi_index_display(self): s = Series(np.random.randn(8), index=index) with option_context("display.max_rows", 10): - self.assertEqual(len(str(s).split('\n')), 10) + assert len(str(s).split('\n')) == 10 with option_context("display.max_rows", 3): - self.assertEqual(len(str(s).split('\n')), 5) + assert len(str(s).split('\n')) == 5 with option_context("display.max_rows", 2): - self.assertEqual(len(str(s).split('\n')), 5) + assert len(str(s).split('\n')) == 5 with option_context("display.max_rows", 1): - self.assertEqual(len(str(s).split('\n')), 4) + assert len(str(s).split('\n')) == 4 with option_context("display.max_rows", 0): - self.assertEqual(len(str(s).split('\n')), 10) + assert len(str(s).split('\n')) == 10 # index s = Series(np.random.randn(8), None) with option_context("display.max_rows", 10): - self.assertEqual(len(str(s).split('\n')), 9) + assert len(str(s).split('\n')) == 9 with option_context("display.max_rows", 3): - self.assertEqual(len(str(s).split('\n')), 4) + assert len(str(s).split('\n')) == 4 with option_context("display.max_rows", 2): - self.assertEqual(len(str(s).split('\n')), 4) + assert len(str(s).split('\n')) == 4 with option_context("display.max_rows", 1): - self.assertEqual(len(str(s).split('\n')), 3) + assert len(str(s).split('\n')) == 3 with option_context("display.max_rows", 0): - self.assertEqual(len(str(s).split('\n')), 9) + assert len(str(s).split('\n')) == 9 # Make sure #8532 is fixed def test_consistent_format(self): @@ -2027,7 +2022,7 @@ def test_consistent_format(self): '1.0000\n4 1.0000\n ... \n125 ' '1.0000\n126 1.0000\n127 0.9999\n128 ' '1.0000\n129 1.0000\ndtype: float64') - self.assertEqual(res, exp) + assert res == exp def chck_ncols(self, s): with option_context("display.max_rows", 10): @@ -2036,7 +2031,7 @@ def chck_ncols(self, s): lines = [line for line in repr(s).split('\n') if not re.match(r'[^\.]*\.+', line)][:-1] ncolsizes = len(set(len(line.strip()) for line in lines)) - self.assertEqual(ncolsizes, 1) + assert ncolsizes == 1 def test_format_explicit(self): test_sers = gen_series_formatting() @@ -2044,19 +2039,19 @@ def test_format_explicit(self): "display.show_dimensions", False): res = repr(test_sers['onel']) exp = '0 a\n1 a\n ..\n98 a\n99 a\ndtype: object' - self.assertEqual(exp, res) + assert exp == res res = repr(test_sers['twol']) exp = ('0 ab\n1 ab\n ..\n98 ab\n99 ab\ndtype:' ' object') - self.assertEqual(exp, res) + assert exp == res res = repr(test_sers['asc']) exp = ('0 a\n1 ab\n ... \n4 abcde\n5' ' abcdef\ndtype: object') - self.assertEqual(exp, res) + assert exp == res res = repr(test_sers['desc']) exp = ('5 abcdef\n4 abcde\n ... \n1 ab\n0' ' a\ndtype: object') - self.assertEqual(exp, res) + assert exp == res def test_ncols(self): test_sers = gen_series_formatting() @@ -2069,10 +2064,10 @@ def test_max_rows_eq_one(self): strrepr = repr(s).split('\n') exp1 = ['0', '0'] res1 = strrepr[0].split() - self.assertEqual(exp1, res1) + assert exp1 == res1 exp2 = ['..'] res2 = strrepr[1].split() - self.assertEqual(exp2, res2) + assert exp2 == res2 def test_truncate_ndots(self): def getndots(s): @@ -2081,12 +2076,12 @@ def getndots(s): s = Series([0, 2, 3, 6]) with option_context("display.max_rows", 2): strrepr = repr(s).replace('\n', '') - self.assertEqual(getndots(strrepr), 2) + assert getndots(strrepr) == 2 s = Series([0, 100, 200, 400]) with option_context("display.max_rows", 2): strrepr = repr(s).replace('\n', '') - self.assertEqual(getndots(strrepr), 3) + assert getndots(strrepr) == 3 def test_show_dimensions(self): # gh-7117 @@ -2109,48 +2104,48 @@ def test_to_string_name(self): s.name = 'myser' res = s.to_string(max_rows=2, name=True) exp = '0 0\n ..\n99 99\nName: myser' - self.assertEqual(res, exp) + assert res == exp res = s.to_string(max_rows=2, name=False) exp = '0 0\n ..\n99 99' - self.assertEqual(res, exp) + assert res == exp def test_to_string_dtype(self): s = Series(range(100), dtype='int64') res = s.to_string(max_rows=2, dtype=True) exp = '0 0\n ..\n99 99\ndtype: int64' - self.assertEqual(res, exp) + assert res == exp res = s.to_string(max_rows=2, dtype=False) exp = '0 0\n ..\n99 99' - self.assertEqual(res, exp) + assert res == exp def test_to_string_length(self): s = Series(range(100), dtype='int64') res = s.to_string(max_rows=2, length=True) exp = '0 0\n ..\n99 99\nLength: 100' - self.assertEqual(res, exp) + assert res == exp def test_to_string_na_rep(self): s = pd.Series(index=range(100)) res = s.to_string(na_rep='foo', max_rows=2) exp = '0 foo\n ..\n99 foo' - self.assertEqual(res, exp) + assert res == exp def test_to_string_float_format(self): s = pd.Series(range(10), dtype='float64') res = s.to_string(float_format=lambda x: '{0:2.1f}'.format(x), max_rows=2) exp = '0 0.0\n ..\n9 9.0' - self.assertEqual(res, exp) + assert res == exp def test_to_string_header(self): s = pd.Series(range(10), dtype='int64') s.index.name = 'foo' res = s.to_string(header=True, max_rows=2) exp = 'foo\n0 0\n ..\n9 9' - self.assertEqual(res, exp) + assert res == exp res = s.to_string(header=False, max_rows=2) exp = '0 0\n ..\n9 9' - self.assertEqual(res, exp) + assert res == exp def _three_digit_exp(): @@ -2167,8 +2162,8 @@ def test_misc(self): def test_format(self): obj = fmt.FloatArrayFormatter(np.array([12, 0], dtype=np.float64)) result = obj.get_result() - self.assertEqual(result[0], " 12.0") - self.assertEqual(result[1], " 0.0") + assert result[0] == " 12.0" + assert result[1] == " 0.0" def test_output_significant_digits(self): # Issue #9764 @@ -2228,7 +2223,7 @@ def test_output_significant_digits(self): } for (start, stop), v in expected_output.items(): - self.assertEqual(str(d[start:stop]), v) + assert str(d[start:stop]) == v def test_too_long(self): # GH 10451 @@ -2236,12 +2231,11 @@ def test_too_long(self): # need both a number > 1e6 and something that normally formats to # having length > display.precision + 6 df = pd.DataFrame(dict(x=[12345.6789])) - self.assertEqual(str(df), ' x\n0 12345.6789') + assert str(df) == ' x\n0 12345.6789' df = pd.DataFrame(dict(x=[2e6])) - self.assertEqual(str(df), ' x\n0 2000000.0') + assert str(df) == ' x\n0 2000000.0' df = pd.DataFrame(dict(x=[12345.6789, 2e6])) - self.assertEqual( - str(df), ' x\n0 1.2346e+04\n1 2.0000e+06') + assert str(df) == ' x\n0 1.2346e+04\n1 2.0000e+06' class TestRepr_timedelta64(tm.TestCase): @@ -2253,14 +2247,13 @@ def test_none(self): delta_500ms = pd.to_timedelta(500, unit='ms') drepr = lambda x: x._repr_base() - self.assertEqual(drepr(delta_1d), "1 days") - self.assertEqual(drepr(-delta_1d), "-1 days") - self.assertEqual(drepr(delta_0d), "0 days") - self.assertEqual(drepr(delta_1s), "0 days 00:00:01") - self.assertEqual(drepr(delta_500ms), "0 days 00:00:00.500000") - self.assertEqual(drepr(delta_1d + delta_1s), "1 days 00:00:01") - self.assertEqual( - drepr(delta_1d + delta_500ms), "1 days 00:00:00.500000") + assert drepr(delta_1d) == "1 days" + assert drepr(-delta_1d) == "-1 days" + assert drepr(delta_0d) == "0 days" + assert drepr(delta_1s) == "0 days 00:00:01" + assert drepr(delta_500ms) == "0 days 00:00:00.500000" + assert drepr(delta_1d + delta_1s) == "1 days 00:00:01" + assert drepr(delta_1d + delta_500ms) == "1 days 00:00:00.500000" def test_even_day(self): delta_1d = pd.to_timedelta(1, unit='D') @@ -2269,14 +2262,13 @@ def test_even_day(self): delta_500ms = pd.to_timedelta(500, unit='ms') drepr = lambda x: x._repr_base(format='even_day') - self.assertEqual(drepr(delta_1d), "1 days") - self.assertEqual(drepr(-delta_1d), "-1 days") - self.assertEqual(drepr(delta_0d), "0 days") - self.assertEqual(drepr(delta_1s), "0 days 00:00:01") - self.assertEqual(drepr(delta_500ms), "0 days 00:00:00.500000") - self.assertEqual(drepr(delta_1d + delta_1s), "1 days 00:00:01") - self.assertEqual( - drepr(delta_1d + delta_500ms), "1 days 00:00:00.500000") + assert drepr(delta_1d) == "1 days" + assert drepr(-delta_1d) == "-1 days" + assert drepr(delta_0d) == "0 days" + assert drepr(delta_1s) == "0 days 00:00:01" + assert drepr(delta_500ms) == "0 days 00:00:00.500000" + assert drepr(delta_1d + delta_1s) == "1 days 00:00:01" + assert drepr(delta_1d + delta_500ms) == "1 days 00:00:00.500000" def test_sub_day(self): delta_1d = pd.to_timedelta(1, unit='D') @@ -2285,14 +2277,13 @@ def test_sub_day(self): delta_500ms = pd.to_timedelta(500, unit='ms') drepr = lambda x: x._repr_base(format='sub_day') - self.assertEqual(drepr(delta_1d), "1 days") - self.assertEqual(drepr(-delta_1d), "-1 days") - self.assertEqual(drepr(delta_0d), "00:00:00") - self.assertEqual(drepr(delta_1s), "00:00:01") - self.assertEqual(drepr(delta_500ms), "00:00:00.500000") - self.assertEqual(drepr(delta_1d + delta_1s), "1 days 00:00:01") - self.assertEqual( - drepr(delta_1d + delta_500ms), "1 days 00:00:00.500000") + assert drepr(delta_1d) == "1 days" + assert drepr(-delta_1d) == "-1 days" + assert drepr(delta_0d) == "00:00:00" + assert drepr(delta_1s) == "00:00:01" + assert drepr(delta_500ms) == "00:00:00.500000" + assert drepr(delta_1d + delta_1s) == "1 days 00:00:01" + assert drepr(delta_1d + delta_500ms) == "1 days 00:00:00.500000" def test_long(self): delta_1d = pd.to_timedelta(1, unit='D') @@ -2301,14 +2292,13 @@ def test_long(self): delta_500ms = pd.to_timedelta(500, unit='ms') drepr = lambda x: x._repr_base(format='long') - self.assertEqual(drepr(delta_1d), "1 days 00:00:00") - self.assertEqual(drepr(-delta_1d), "-1 days +00:00:00") - self.assertEqual(drepr(delta_0d), "0 days 00:00:00") - self.assertEqual(drepr(delta_1s), "0 days 00:00:01") - self.assertEqual(drepr(delta_500ms), "0 days 00:00:00.500000") - self.assertEqual(drepr(delta_1d + delta_1s), "1 days 00:00:01") - self.assertEqual( - drepr(delta_1d + delta_500ms), "1 days 00:00:00.500000") + assert drepr(delta_1d) == "1 days 00:00:00" + assert drepr(-delta_1d) == "-1 days +00:00:00" + assert drepr(delta_0d) == "0 days 00:00:00" + assert drepr(delta_1s) == "0 days 00:00:01" + assert drepr(delta_500ms) == "0 days 00:00:00.500000" + assert drepr(delta_1d + delta_1s) == "1 days 00:00:01" + assert drepr(delta_1d + delta_500ms) == "1 days 00:00:00.500000" def test_all(self): delta_1d = pd.to_timedelta(1, unit='D') @@ -2316,9 +2306,9 @@ def test_all(self): delta_1ns = pd.to_timedelta(1, unit='ns') drepr = lambda x: x._repr_base(format='all') - self.assertEqual(drepr(delta_1d), "1 days 00:00:00.000000000") - self.assertEqual(drepr(delta_0d), "0 days 00:00:00.000000000") - self.assertEqual(drepr(delta_1ns), "0 days 00:00:00.000000001") + assert drepr(delta_1d) == "1 days 00:00:00.000000000" + assert drepr(delta_0d) == "0 days 00:00:00.000000000" + assert drepr(delta_1ns) == "0 days 00:00:00.000000001" class TestTimedelta64Formatter(tm.TestCase): @@ -2326,45 +2316,45 @@ class TestTimedelta64Formatter(tm.TestCase): def test_days(self): x = pd.to_timedelta(list(range(5)) + [pd.NaT], unit='D') result = fmt.Timedelta64Formatter(x, box=True).get_result() - self.assertEqual(result[0].strip(), "'0 days'") - self.assertEqual(result[1].strip(), "'1 days'") + assert result[0].strip() == "'0 days'" + assert result[1].strip() == "'1 days'" result = fmt.Timedelta64Formatter(x[1:2], box=True).get_result() - self.assertEqual(result[0].strip(), "'1 days'") + assert result[0].strip() == "'1 days'" result = fmt.Timedelta64Formatter(x, box=False).get_result() - self.assertEqual(result[0].strip(), "0 days") - self.assertEqual(result[1].strip(), "1 days") + assert result[0].strip() == "0 days" + assert result[1].strip() == "1 days" result = fmt.Timedelta64Formatter(x[1:2], box=False).get_result() - self.assertEqual(result[0].strip(), "1 days") + assert result[0].strip() == "1 days" def test_days_neg(self): x = pd.to_timedelta(list(range(5)) + [pd.NaT], unit='D') result = fmt.Timedelta64Formatter(-x, box=True).get_result() - self.assertEqual(result[0].strip(), "'0 days'") - self.assertEqual(result[1].strip(), "'-1 days'") + assert result[0].strip() == "'0 days'" + assert result[1].strip() == "'-1 days'" def test_subdays(self): y = pd.to_timedelta(list(range(5)) + [pd.NaT], unit='s') result = fmt.Timedelta64Formatter(y, box=True).get_result() - self.assertEqual(result[0].strip(), "'00:00:00'") - self.assertEqual(result[1].strip(), "'00:00:01'") + assert result[0].strip() == "'00:00:00'" + assert result[1].strip() == "'00:00:01'" def test_subdays_neg(self): y = pd.to_timedelta(list(range(5)) + [pd.NaT], unit='s') result = fmt.Timedelta64Formatter(-y, box=True).get_result() - self.assertEqual(result[0].strip(), "'00:00:00'") - self.assertEqual(result[1].strip(), "'-1 days +23:59:59'") + assert result[0].strip() == "'00:00:00'" + assert result[1].strip() == "'-1 days +23:59:59'" def test_zero(self): x = pd.to_timedelta(list(range(1)) + [pd.NaT], unit='D') result = fmt.Timedelta64Formatter(x, box=True).get_result() - self.assertEqual(result[0].strip(), "'0 days'") + assert result[0].strip() == "'0 days'" x = pd.to_timedelta(list(range(1)), unit='D') result = fmt.Timedelta64Formatter(x, box=True).get_result() - self.assertEqual(result[0].strip(), "'0 days'") + assert result[0].strip() == "'0 days'" class TestDatetime64Formatter(tm.TestCase): @@ -2372,19 +2362,19 @@ class TestDatetime64Formatter(tm.TestCase): def test_mixed(self): x = Series([datetime(2013, 1, 1), datetime(2013, 1, 1, 12), pd.NaT]) result = fmt.Datetime64Formatter(x).get_result() - self.assertEqual(result[0].strip(), "2013-01-01 00:00:00") - self.assertEqual(result[1].strip(), "2013-01-01 12:00:00") + assert result[0].strip() == "2013-01-01 00:00:00" + assert result[1].strip() == "2013-01-01 12:00:00" def test_dates(self): x = Series([datetime(2013, 1, 1), datetime(2013, 1, 2), pd.NaT]) result = fmt.Datetime64Formatter(x).get_result() - self.assertEqual(result[0].strip(), "2013-01-01") - self.assertEqual(result[1].strip(), "2013-01-02") + assert result[0].strip() == "2013-01-01" + assert result[1].strip() == "2013-01-02" def test_date_nanos(self): x = Series([Timestamp(200)]) result = fmt.Datetime64Formatter(x).get_result() - self.assertEqual(result[0].strip(), "1970-01-01 00:00:00.000000200") + assert result[0].strip() == "1970-01-01 00:00:00.000000200" def test_dates_display(self): @@ -2393,37 +2383,37 @@ def test_dates_display(self): x = Series(date_range('20130101 09:00:00', periods=5, freq='D')) x.iloc[1] = np.nan result = fmt.Datetime64Formatter(x).get_result() - self.assertEqual(result[0].strip(), "2013-01-01 09:00:00") - self.assertEqual(result[1].strip(), "NaT") - self.assertEqual(result[4].strip(), "2013-01-05 09:00:00") + assert result[0].strip() == "2013-01-01 09:00:00" + assert result[1].strip() == "NaT" + assert result[4].strip() == "2013-01-05 09:00:00" x = Series(date_range('20130101 09:00:00', periods=5, freq='s')) x.iloc[1] = np.nan result = fmt.Datetime64Formatter(x).get_result() - self.assertEqual(result[0].strip(), "2013-01-01 09:00:00") - self.assertEqual(result[1].strip(), "NaT") - self.assertEqual(result[4].strip(), "2013-01-01 09:00:04") + assert result[0].strip() == "2013-01-01 09:00:00" + assert result[1].strip() == "NaT" + assert result[4].strip() == "2013-01-01 09:00:04" x = Series(date_range('20130101 09:00:00', periods=5, freq='ms')) x.iloc[1] = np.nan result = fmt.Datetime64Formatter(x).get_result() - self.assertEqual(result[0].strip(), "2013-01-01 09:00:00.000") - self.assertEqual(result[1].strip(), "NaT") - self.assertEqual(result[4].strip(), "2013-01-01 09:00:00.004") + assert result[0].strip() == "2013-01-01 09:00:00.000" + assert result[1].strip() == "NaT" + assert result[4].strip() == "2013-01-01 09:00:00.004" x = Series(date_range('20130101 09:00:00', periods=5, freq='us')) x.iloc[1] = np.nan result = fmt.Datetime64Formatter(x).get_result() - self.assertEqual(result[0].strip(), "2013-01-01 09:00:00.000000") - self.assertEqual(result[1].strip(), "NaT") - self.assertEqual(result[4].strip(), "2013-01-01 09:00:00.000004") + assert result[0].strip() == "2013-01-01 09:00:00.000000" + assert result[1].strip() == "NaT" + assert result[4].strip() == "2013-01-01 09:00:00.000004" x = Series(date_range('20130101 09:00:00', periods=5, freq='N')) x.iloc[1] = np.nan result = fmt.Datetime64Formatter(x).get_result() - self.assertEqual(result[0].strip(), "2013-01-01 09:00:00.000000000") - self.assertEqual(result[1].strip(), "NaT") - self.assertEqual(result[4].strip(), "2013-01-01 09:00:00.000000004") + assert result[0].strip() == "2013-01-01 09:00:00.000000000" + assert result[1].strip() == "NaT" + assert result[4].strip() == "2013-01-01 09:00:00.000000004" def test_datetime64formatter_yearmonth(self): x = Series([datetime(2016, 1, 1), datetime(2016, 2, 2)]) @@ -2433,7 +2423,7 @@ def format_func(x): formatter = fmt.Datetime64Formatter(x, formatter=format_func) result = formatter.get_result() - self.assertEqual(result, ['2016-01', '2016-02']) + assert result == ['2016-01', '2016-02'] def test_datetime64formatter_hoursecond(self): @@ -2445,43 +2435,43 @@ def format_func(x): formatter = fmt.Datetime64Formatter(x, formatter=format_func) result = formatter.get_result() - self.assertEqual(result, ['10:10', '12:12']) + assert result == ['10:10', '12:12'] class TestNaTFormatting(tm.TestCase): def test_repr(self): - self.assertEqual(repr(pd.NaT), "NaT") + assert repr(pd.NaT) == "NaT" def test_str(self): - self.assertEqual(str(pd.NaT), "NaT") + assert str(pd.NaT) == "NaT" class TestDatetimeIndexFormat(tm.TestCase): def test_datetime(self): formatted = pd.to_datetime([datetime(2003, 1, 1, 12), pd.NaT]).format() - self.assertEqual(formatted[0], "2003-01-01 12:00:00") - self.assertEqual(formatted[1], "NaT") + assert formatted[0] == "2003-01-01 12:00:00" + assert formatted[1] == "NaT" def test_date(self): formatted = pd.to_datetime([datetime(2003, 1, 1), pd.NaT]).format() - self.assertEqual(formatted[0], "2003-01-01") - self.assertEqual(formatted[1], "NaT") + assert formatted[0] == "2003-01-01" + assert formatted[1] == "NaT" def test_date_tz(self): formatted = pd.to_datetime([datetime(2013, 1, 1)], utc=True).format() - self.assertEqual(formatted[0], "2013-01-01 00:00:00+00:00") + assert formatted[0] == "2013-01-01 00:00:00+00:00" formatted = pd.to_datetime( [datetime(2013, 1, 1), pd.NaT], utc=True).format() - self.assertEqual(formatted[0], "2013-01-01 00:00:00+00:00") + assert formatted[0] == "2013-01-01 00:00:00+00:00" def test_date_explict_date_format(self): formatted = pd.to_datetime([datetime(2003, 2, 1), pd.NaT]).format( date_format="%m-%d-%Y", na_rep="UT") - self.assertEqual(formatted[0], "02-01-2003") - self.assertEqual(formatted[1], "UT") + assert formatted[0] == "02-01-2003" + assert formatted[1] == "UT" class TestDatetimeIndexUnicode(tm.TestCase): @@ -2503,19 +2493,19 @@ class TestStringRepTimestamp(tm.TestCase): def test_no_tz(self): dt_date = datetime(2013, 1, 2) - self.assertEqual(str(dt_date), str(Timestamp(dt_date))) + assert str(dt_date) == str(Timestamp(dt_date)) dt_datetime = datetime(2013, 1, 2, 12, 1, 3) - self.assertEqual(str(dt_datetime), str(Timestamp(dt_datetime))) + assert str(dt_datetime) == str(Timestamp(dt_datetime)) dt_datetime_us = datetime(2013, 1, 2, 12, 1, 3, 45) - self.assertEqual(str(dt_datetime_us), str(Timestamp(dt_datetime_us))) + assert str(dt_datetime_us) == str(Timestamp(dt_datetime_us)) ts_nanos_only = Timestamp(200) - self.assertEqual(str(ts_nanos_only), "1970-01-01 00:00:00.000000200") + assert str(ts_nanos_only) == "1970-01-01 00:00:00.000000200" ts_nanos_micros = Timestamp(1200) - self.assertEqual(str(ts_nanos_micros), "1970-01-01 00:00:00.000001200") + assert str(ts_nanos_micros) == "1970-01-01 00:00:00.000001200" def test_tz_pytz(self): tm._skip_if_no_pytz() @@ -2523,13 +2513,13 @@ def test_tz_pytz(self): import pytz dt_date = datetime(2013, 1, 2, tzinfo=pytz.utc) - self.assertEqual(str(dt_date), str(Timestamp(dt_date))) + assert str(dt_date) == str(Timestamp(dt_date)) dt_datetime = datetime(2013, 1, 2, 12, 1, 3, tzinfo=pytz.utc) - self.assertEqual(str(dt_datetime), str(Timestamp(dt_datetime))) + assert str(dt_datetime) == str(Timestamp(dt_datetime)) dt_datetime_us = datetime(2013, 1, 2, 12, 1, 3, 45, tzinfo=pytz.utc) - self.assertEqual(str(dt_datetime_us), str(Timestamp(dt_datetime_us))) + assert str(dt_datetime_us) == str(Timestamp(dt_datetime_us)) def test_tz_dateutil(self): tm._skip_if_no_dateutil() @@ -2537,17 +2527,17 @@ def test_tz_dateutil(self): utc = dateutil.tz.tzutc() dt_date = datetime(2013, 1, 2, tzinfo=utc) - self.assertEqual(str(dt_date), str(Timestamp(dt_date))) + assert str(dt_date) == str(Timestamp(dt_date)) dt_datetime = datetime(2013, 1, 2, 12, 1, 3, tzinfo=utc) - self.assertEqual(str(dt_datetime), str(Timestamp(dt_datetime))) + assert str(dt_datetime) == str(Timestamp(dt_datetime)) dt_datetime_us = datetime(2013, 1, 2, 12, 1, 3, 45, tzinfo=utc) - self.assertEqual(str(dt_datetime_us), str(Timestamp(dt_datetime_us))) + assert str(dt_datetime_us) == str(Timestamp(dt_datetime_us)) def test_nat_representations(self): for f in (str, repr, methodcaller('isoformat')): - self.assertEqual(f(pd.NaT), 'NaT') + assert f(pd.NaT) == 'NaT' def test_format_percentiles(): diff --git a/pandas/tests/io/formats/test_printing.py b/pandas/tests/io/formats/test_printing.py index 63cd08545610f..7725b2063c7b6 100644 --- a/pandas/tests/io/formats/test_printing.py +++ b/pandas/tests/io/formats/test_printing.py @@ -44,13 +44,13 @@ def test_adjoin(self): adjoined = printing.adjoin(2, *data) - self.assertEqual(adjoined, expected) + assert adjoined == expected def test_adjoin_unicode(self): data = [[u'あ', 'b', 'c'], ['dd', u'ええ', 'ff'], ['ggg', 'hhh', u'いいい']] expected = u'あ dd ggg\nb ええ hhh\nc ff いいい' adjoined = printing.adjoin(2, *data) - self.assertEqual(adjoined, expected) + assert adjoined == expected adj = fmt.EastAsianTextAdjustment() @@ -59,22 +59,22 @@ def test_adjoin_unicode(self): c ff いいい""" adjoined = adj.adjoin(2, *data) - self.assertEqual(adjoined, expected) + assert adjoined == expected cols = adjoined.split('\n') - self.assertEqual(adj.len(cols[0]), 13) - self.assertEqual(adj.len(cols[1]), 13) - self.assertEqual(adj.len(cols[2]), 16) + assert adj.len(cols[0]) == 13 + assert adj.len(cols[1]) == 13 + assert adj.len(cols[2]) == 16 expected = u"""あ dd ggg b ええ hhh c ff いいい""" adjoined = adj.adjoin(7, *data) - self.assertEqual(adjoined, expected) + assert adjoined == expected cols = adjoined.split('\n') - self.assertEqual(adj.len(cols[0]), 23) - self.assertEqual(adj.len(cols[1]), 23) - self.assertEqual(adj.len(cols[2]), 26) + assert adj.len(cols[0]) == 23 + assert adj.len(cols[1]) == 23 + assert adj.len(cols[2]) == 26 def test_justify(self): adj = fmt.EastAsianTextAdjustment() @@ -83,45 +83,45 @@ def just(x, *args, **kwargs): # wrapper to test single str return adj.justify([x], *args, **kwargs)[0] - self.assertEqual(just('abc', 5, mode='left'), 'abc ') - self.assertEqual(just('abc', 5, mode='center'), ' abc ') - self.assertEqual(just('abc', 5, mode='right'), ' abc') - self.assertEqual(just(u'abc', 5, mode='left'), 'abc ') - self.assertEqual(just(u'abc', 5, mode='center'), ' abc ') - self.assertEqual(just(u'abc', 5, mode='right'), ' abc') + assert just('abc', 5, mode='left') == 'abc ' + assert just('abc', 5, mode='center') == ' abc ' + assert just('abc', 5, mode='right') == ' abc' + assert just(u'abc', 5, mode='left') == 'abc ' + assert just(u'abc', 5, mode='center') == ' abc ' + assert just(u'abc', 5, mode='right') == ' abc' - self.assertEqual(just(u'パンダ', 5, mode='left'), u'パンダ') - self.assertEqual(just(u'パンダ', 5, mode='center'), u'パンダ') - self.assertEqual(just(u'パンダ', 5, mode='right'), u'パンダ') + assert just(u'パンダ', 5, mode='left') == u'パンダ' + assert just(u'パンダ', 5, mode='center') == u'パンダ' + assert just(u'パンダ', 5, mode='right') == u'パンダ' - self.assertEqual(just(u'パンダ', 10, mode='left'), u'パンダ ') - self.assertEqual(just(u'パンダ', 10, mode='center'), u' パンダ ') - self.assertEqual(just(u'パンダ', 10, mode='right'), u' パンダ') + assert just(u'パンダ', 10, mode='left') == u'パンダ ' + assert just(u'パンダ', 10, mode='center') == u' パンダ ' + assert just(u'パンダ', 10, mode='right') == u' パンダ' def test_east_asian_len(self): adj = fmt.EastAsianTextAdjustment() - self.assertEqual(adj.len('abc'), 3) - self.assertEqual(adj.len(u'abc'), 3) + assert adj.len('abc') == 3 + assert adj.len(u'abc') == 3 - self.assertEqual(adj.len(u'パンダ'), 6) - self.assertEqual(adj.len(u'パンダ'), 5) - self.assertEqual(adj.len(u'パンダpanda'), 11) - self.assertEqual(adj.len(u'パンダpanda'), 10) + assert adj.len(u'パンダ') == 6 + assert adj.len(u'パンダ') == 5 + assert adj.len(u'パンダpanda') == 11 + assert adj.len(u'パンダpanda') == 10 def test_ambiguous_width(self): adj = fmt.EastAsianTextAdjustment() - self.assertEqual(adj.len(u'¡¡ab'), 4) + assert adj.len(u'¡¡ab') == 4 with cf.option_context('display.unicode.ambiguous_as_wide', True): adj = fmt.EastAsianTextAdjustment() - self.assertEqual(adj.len(u'¡¡ab'), 6) + assert adj.len(u'¡¡ab') == 6 data = [[u'あ', 'b', 'c'], ['dd', u'ええ', 'ff'], ['ggg', u'¡¡ab', u'いいい']] expected = u'あ dd ggg \nb ええ ¡¡ab\nc ff いいい' adjoined = adj.adjoin(2, *data) - self.assertEqual(adjoined, expected) + assert adjoined == expected class TestTableSchemaRepr(tm.TestCase): @@ -151,13 +151,13 @@ def test_publishes(self): for obj, expected in zip(objects, expected_keys): with opt, make_patch as mock_display: handle = obj._ipython_display_() - self.assertEqual(mock_display.call_count, 1) + assert mock_display.call_count == 1 assert handle is None args, kwargs = mock_display.call_args arg, = args # just one argument - self.assertEqual(kwargs, {"raw": True}) - self.assertEqual(set(arg.keys()), expected) + assert kwargs == {"raw": True} + assert set(arg.keys()) == expected with_latex = pd.option_context('display.latex.repr', True) @@ -168,7 +168,7 @@ def test_publishes(self): expected = {'text/plain', 'text/html', 'text/latex', 'application/vnd.dataresource+json'} - self.assertEqual(set(arg.keys()), expected) + assert set(arg.keys()) == expected def test_publishes_not_implemented(self): # column MultiIndex diff --git a/pandas/tests/io/formats/test_style.py b/pandas/tests/io/formats/test_style.py index 7d8ac6f81c31e..371cc2b61634a 100644 --- a/pandas/tests/io/formats/test_style.py +++ b/pandas/tests/io/formats/test_style.py @@ -39,7 +39,7 @@ def test_init_non_pandas(self): def test_init_series(self): result = Styler(pd.Series([1, 2])) - self.assertEqual(result.data.ndim, 2) + assert result.data.ndim == 2 def test_repr_html_ok(self): self.styler._repr_html_() @@ -48,7 +48,7 @@ def test_update_ctx(self): self.styler._update_ctx(self.attrs) expected = {(0, 0): ['color: red'], (1, 0): ['color: blue']} - self.assertEqual(self.styler.ctx, expected) + assert self.styler.ctx == expected def test_update_ctx_flatten_multi(self): attrs = DataFrame({"A": ['color: red; foo: bar', @@ -56,7 +56,7 @@ def test_update_ctx_flatten_multi(self): self.styler._update_ctx(attrs) expected = {(0, 0): ['color: red', ' foo: bar'], (1, 0): ['color: blue', ' foo: baz']} - self.assertEqual(self.styler.ctx, expected) + assert self.styler.ctx == expected def test_update_ctx_flatten_multi_traliing_semi(self): attrs = DataFrame({"A": ['color: red; foo: bar;', @@ -64,7 +64,7 @@ def test_update_ctx_flatten_multi_traliing_semi(self): self.styler._update_ctx(attrs) expected = {(0, 0): ['color: red', ' foo: bar'], (1, 0): ['color: blue', ' foo: baz']} - self.assertEqual(self.styler.ctx, expected) + assert self.styler.ctx == expected def test_copy(self): s2 = copy.copy(self.styler) @@ -74,8 +74,8 @@ def test_copy(self): self.styler._update_ctx(self.attrs) self.styler.highlight_max() - self.assertEqual(self.styler.ctx, s2.ctx) - self.assertEqual(self.styler._todo, s2._todo) + assert self.styler.ctx == s2.ctx + assert self.styler._todo == s2._todo def test_deepcopy(self): s2 = copy.deepcopy(self.styler) @@ -86,7 +86,7 @@ def test_deepcopy(self): self.styler._update_ctx(self.attrs) self.styler.highlight_max() self.assertNotEqual(self.styler.ctx, s2.ctx) - self.assertEqual(s2._todo, []) + assert s2._todo == [] self.assertNotEqual(self.styler._todo, s2._todo) def test_clear(self): @@ -119,16 +119,16 @@ def test_set_properties(self): # order is deterministic v = ["color: white", "size: 10px"] expected = {(0, 0): v, (1, 0): v} - self.assertEqual(result.keys(), expected.keys()) + assert result.keys() == expected.keys() for v1, v2 in zip(result.values(), expected.values()): - self.assertEqual(sorted(v1), sorted(v2)) + assert sorted(v1) == sorted(v2) def test_set_properties_subset(self): df = pd.DataFrame({'A': [0, 1]}) result = df.style.set_properties(subset=pd.IndexSlice[0, 'A'], color='white')._compute().ctx expected = {(0, 0): ['color: white']} - self.assertEqual(result, expected) + assert result == expected def test_empty_index_name_doesnt_display(self): # https://github.com/pandas-dev/pandas/pull/12090#issuecomment-180695902 @@ -156,7 +156,7 @@ def test_empty_index_name_doesnt_display(self): 'is_visible': True, }]] - self.assertEqual(result['head'], expected) + assert result['head'] == expected def test_index_name(self): # https://github.com/pandas-dev/pandas/issues/11655 @@ -174,7 +174,7 @@ def test_index_name(self): {'class': 'blank', 'type': 'th', 'value': ''}, {'class': 'blank', 'type': 'th', 'value': ''}]] - self.assertEqual(result['head'], expected) + assert result['head'] == expected def test_multiindex_name(self): # https://github.com/pandas-dev/pandas/issues/11655 @@ -194,7 +194,7 @@ def test_multiindex_name(self): 'value': 'B'}, {'class': 'blank', 'type': 'th', 'value': ''}]] - self.assertEqual(result['head'], expected) + assert result['head'] == expected def test_numeric_columns(self): # https://github.com/pandas-dev/pandas/issues/12125 @@ -206,21 +206,21 @@ def test_apply_axis(self): df = pd.DataFrame({'A': [0, 0], 'B': [1, 1]}) f = lambda x: ['val: %s' % x.max() for v in x] result = df.style.apply(f, axis=1) - self.assertEqual(len(result._todo), 1) - self.assertEqual(len(result.ctx), 0) + assert len(result._todo) == 1 + assert len(result.ctx) == 0 result._compute() expected = {(0, 0): ['val: 1'], (0, 1): ['val: 1'], (1, 0): ['val: 1'], (1, 1): ['val: 1']} - self.assertEqual(result.ctx, expected) + assert result.ctx == expected result = df.style.apply(f, axis=0) expected = {(0, 0): ['val: 0'], (0, 1): ['val: 1'], (1, 0): ['val: 0'], (1, 1): ['val: 1']} result._compute() - self.assertEqual(result.ctx, expected) + assert result.ctx == expected result = df.style.apply(f) # default result._compute() - self.assertEqual(result.ctx, expected) + assert result.ctx == expected def test_apply_subset(self): axes = [0, 1] @@ -236,7 +236,7 @@ def test_apply_subset(self): for c, col in enumerate(self.df.columns) if row in self.df.loc[slice_].index and col in self.df.loc[slice_].columns) - self.assertEqual(result, expected) + assert result == expected def test_applymap_subset(self): def f(x): @@ -253,7 +253,7 @@ def f(x): for c, col in enumerate(self.df.columns) if row in self.df.loc[slice_].index and col in self.df.loc[slice_].columns) - self.assertEqual(result, expected) + assert result == expected def test_empty(self): df = pd.DataFrame({'A': [1, 0]}) @@ -264,7 +264,7 @@ def test_empty(self): result = s._translate()['cellstyle'] expected = [{'props': [['color', ' red']], 'selector': 'row0_col0'}, {'props': [['', '']], 'selector': 'row1_col0'}] - self.assertEqual(result, expected) + assert result == expected def test_bar(self): df = pd.DataFrame({'A': [0, 1, 2]}) @@ -278,7 +278,7 @@ def test_bar(self): 'background: linear-gradient(' '90deg,#d65f5f 100.0%, transparent 0%)'] } - self.assertEqual(result, expected) + assert result == expected result = df.style.bar(color='red', width=50)._compute().ctx expected = { @@ -290,14 +290,14 @@ def test_bar(self): 'background: linear-gradient(' '90deg,red 50.0%, transparent 0%)'] } - self.assertEqual(result, expected) + assert result == expected df['C'] = ['a'] * len(df) result = df.style.bar(color='red', width=50)._compute().ctx - self.assertEqual(result, expected) + assert result == expected df['C'] = df['C'].astype('category') result = df.style.bar(color='red', width=50)._compute().ctx - self.assertEqual(result, expected) + assert result == expected def test_bar_0points(self): df = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) @@ -323,7 +323,7 @@ def test_bar_0points(self): (2, 2): ['width: 10em', ' height: 80%', 'background: linear-gradient(90deg,#d65f5f 100.0%' ', transparent 0%)']} - self.assertEqual(result, expected) + assert result == expected result = df.style.bar(axis=1)._compute().ctx expected = {(0, 0): ['width: 10em', ' height: 80%'], @@ -347,14 +347,14 @@ def test_bar_0points(self): (2, 2): ['width: 10em', ' height: 80%', 'background: linear-gradient(90deg,#d65f5f 100.0%' ', transparent 0%)']} - self.assertEqual(result, expected) + assert result == expected def test_highlight_null(self, null_color='red'): df = pd.DataFrame({'A': [0, np.nan]}) result = df.style.highlight_null()._compute().ctx expected = {(0, 0): [''], (1, 0): ['background-color: red']} - self.assertEqual(result, expected) + assert result == expected def test_nonunique_raises(self): df = pd.DataFrame([[1, 2]], columns=['A', 'A']) @@ -372,7 +372,7 @@ def test_caption(self): styler = self.df.style result = styler.set_caption('baz') assert styler is result - self.assertEqual(styler.caption, 'baz') + assert styler.caption == 'baz' def test_uuid(self): styler = Styler(self.df, uuid='abc123') @@ -382,7 +382,7 @@ def test_uuid(self): styler = self.df.style result = styler.set_uuid('aaa') assert result is styler - self.assertEqual(result.uuid, 'aaa') + assert result.uuid == 'aaa' def test_table_styles(self): style = [{'selector': 'th', 'props': [('foo', 'bar')]}] @@ -393,7 +393,7 @@ def test_table_styles(self): styler = self.df.style result = styler.set_table_styles(style) assert styler is result - self.assertEqual(styler.table_styles, style) + assert styler.table_styles == style def test_table_attributes(self): attributes = 'class="foo" data-bar' @@ -407,13 +407,13 @@ def test_table_attributes(self): def test_precision(self): with pd.option_context('display.precision', 10): s = Styler(self.df) - self.assertEqual(s.precision, 10) + assert s.precision == 10 s = Styler(self.df, precision=2) - self.assertEqual(s.precision, 2) + assert s.precision == 2 s2 = s.set_precision(4) assert s is s2 - self.assertEqual(s.precision, 4) + assert s.precision == 4 def test_apply_none(self): def f(x): @@ -421,14 +421,14 @@ def f(x): index=x.index, columns=x.columns) result = (pd.DataFrame([[1, 2], [3, 4]]) .style.apply(f, axis=None)._compute().ctx) - self.assertEqual(result[(1, 1)], ['color: red']) + assert result[(1, 1)] == ['color: red'] def test_trim(self): result = self.df.style.render() # trim=True - self.assertEqual(result.count('#'), 0) + assert result.count('#') == 0 result = self.df.style.highlight_max().render() - self.assertEqual(result.count('#'), len(self.df.columns)) + assert result.count('#') == len(self.df.columns) def test_highlight_max(self): df = pd.DataFrame([[1, 2], [3, 4]], columns=['A', 'B']) @@ -440,25 +440,25 @@ def test_highlight_max(self): df = -df attr = 'highlight_min' result = getattr(df.style, attr)()._compute().ctx - self.assertEqual(result[(1, 1)], ['background-color: yellow']) + assert result[(1, 1)] == ['background-color: yellow'] result = getattr(df.style, attr)(color='green')._compute().ctx - self.assertEqual(result[(1, 1)], ['background-color: green']) + assert result[(1, 1)] == ['background-color: green'] result = getattr(df.style, attr)(subset='A')._compute().ctx - self.assertEqual(result[(1, 0)], ['background-color: yellow']) + assert result[(1, 0)] == ['background-color: yellow'] result = getattr(df.style, attr)(axis=0)._compute().ctx expected = {(1, 0): ['background-color: yellow'], (1, 1): ['background-color: yellow'], (0, 1): [''], (0, 0): ['']} - self.assertEqual(result, expected) + assert result == expected result = getattr(df.style, attr)(axis=1)._compute().ctx expected = {(0, 1): ['background-color: yellow'], (1, 1): ['background-color: yellow'], (0, 0): [''], (1, 0): ['']} - self.assertEqual(result, expected) + assert result == expected # separate since we cant negate the strs df['C'] = ['a', 'b'] @@ -478,7 +478,7 @@ def test_export(self): result = style1.export() style2 = self.df.style style2.use(result) - self.assertEqual(style1._todo, style2._todo) + assert style1._todo == style2._todo style2.render() def test_display_format(self): @@ -503,48 +503,48 @@ def test_display_subset(self): ctx = df.style.format({"a": "{:0.1f}", "b": "{0:.2%}"}, subset=pd.IndexSlice[0, :])._translate() expected = '0.1' - self.assertEqual(ctx['body'][0][1]['display_value'], expected) - self.assertEqual(ctx['body'][1][1]['display_value'], '1.1234') - self.assertEqual(ctx['body'][0][2]['display_value'], '12.34%') + assert ctx['body'][0][1]['display_value'] == expected + assert ctx['body'][1][1]['display_value'] == '1.1234' + assert ctx['body'][0][2]['display_value'] == '12.34%' raw_11 = '1.1234' ctx = df.style.format("{:0.1f}", subset=pd.IndexSlice[0, :])._translate() - self.assertEqual(ctx['body'][0][1]['display_value'], expected) - self.assertEqual(ctx['body'][1][1]['display_value'], raw_11) + assert ctx['body'][0][1]['display_value'] == expected + assert ctx['body'][1][1]['display_value'] == raw_11 ctx = df.style.format("{:0.1f}", subset=pd.IndexSlice[0, :])._translate() - self.assertEqual(ctx['body'][0][1]['display_value'], expected) - self.assertEqual(ctx['body'][1][1]['display_value'], raw_11) + assert ctx['body'][0][1]['display_value'] == expected + assert ctx['body'][1][1]['display_value'] == raw_11 ctx = df.style.format("{:0.1f}", subset=pd.IndexSlice['a'])._translate() - self.assertEqual(ctx['body'][0][1]['display_value'], expected) - self.assertEqual(ctx['body'][0][2]['display_value'], '0.1234') + assert ctx['body'][0][1]['display_value'] == expected + assert ctx['body'][0][2]['display_value'] == '0.1234' ctx = df.style.format("{:0.1f}", subset=pd.IndexSlice[0, 'a'])._translate() - self.assertEqual(ctx['body'][0][1]['display_value'], expected) - self.assertEqual(ctx['body'][1][1]['display_value'], raw_11) + assert ctx['body'][0][1]['display_value'] == expected + assert ctx['body'][1][1]['display_value'] == raw_11 ctx = df.style.format("{:0.1f}", subset=pd.IndexSlice[[0, 1], ['a']])._translate() - self.assertEqual(ctx['body'][0][1]['display_value'], expected) - self.assertEqual(ctx['body'][1][1]['display_value'], '1.1') - self.assertEqual(ctx['body'][0][2]['display_value'], '0.1234') - self.assertEqual(ctx['body'][1][2]['display_value'], '1.1234') + assert ctx['body'][0][1]['display_value'] == expected + assert ctx['body'][1][1]['display_value'] == '1.1' + assert ctx['body'][0][2]['display_value'] == '0.1234' + assert ctx['body'][1][2]['display_value'] == '1.1234' def test_display_dict(self): df = pd.DataFrame([[.1234, .1234], [1.1234, 1.1234]], columns=['a', 'b']) ctx = df.style.format({"a": "{:0.1f}", "b": "{0:.2%}"})._translate() - self.assertEqual(ctx['body'][0][1]['display_value'], '0.1') - self.assertEqual(ctx['body'][0][2]['display_value'], '12.34%') + assert ctx['body'][0][1]['display_value'] == '0.1' + assert ctx['body'][0][2]['display_value'] == '12.34%' df['c'] = ['aaa', 'bbb'] ctx = df.style.format({"a": "{:0.1f}", "c": str.upper})._translate() - self.assertEqual(ctx['body'][0][1]['display_value'], '0.1') - self.assertEqual(ctx['body'][0][3]['display_value'], 'AAA') + assert ctx['body'][0][1]['display_value'] == '0.1' + assert ctx['body'][0][3]['display_value'] == 'AAA' def test_bad_apply_shape(self): df = pd.DataFrame([[1, 2], [3, 4]]) @@ -629,7 +629,7 @@ def test_mi_sparse(self): 'is_visible': True, 'display_value': ''}, {'type': 'th', 'class': 'col_heading level0 col0', 'value': 'A', 'is_visible': True, 'display_value': 'A'}] - self.assertEqual(head, expected) + assert head == expected def test_mi_sparse_disabled(self): with pd.option_context('display.multi_sparse', False): @@ -655,7 +655,7 @@ def test_mi_sparse_index_names(self): 'type': 'th'}, {'class': 'blank', 'value': '', 'type': 'th'}] - self.assertEqual(head, expected) + assert head == expected def test_mi_sparse_column_names(self): df = pd.DataFrame( @@ -698,7 +698,7 @@ def test_mi_sparse_column_names(self): 'type': 'th', 'value': 0}, ] - self.assertEqual(head, expected) + assert head == expected @tm.mplskip @@ -706,16 +706,16 @@ class TestStylerMatplotlibDep(TestCase): def test_background_gradient(self): df = pd.DataFrame([[1, 2], [2, 4]], columns=['A', 'B']) - for axis in [0, 1, 'index', 'columns']: - for cmap in [None, 'YlOrRd']: - result = df.style.background_gradient(cmap=cmap)._compute().ctx - assert all("#" in x[0] for x in result.values()) - self.assertEqual(result[(0, 0)], result[(0, 1)]) - self.assertEqual(result[(1, 0)], result[(1, 1)]) - - result = (df.style.background_gradient(subset=pd.IndexSlice[1, 'A']) - ._compute().ctx) - self.assertEqual(result[(1, 0)], ['background-color: #fff7fb']) + + for c_map in [None, 'YlOrRd']: + result = df.style.background_gradient(cmap=c_map)._compute().ctx + assert all("#" in x[0] for x in result.values()) + assert result[(0, 0)] == result[(0, 1)] + assert result[(1, 0)] == result[(1, 1)] + + result = df.style.background_gradient( + subset=pd.IndexSlice[1, 'A'])._compute().ctx + assert result[(1, 0)] == ['background-color: #fff7fb'] def test_block_names(): diff --git a/pandas/tests/io/formats/test_to_csv.py b/pandas/tests/io/formats/test_to_csv.py index 02c73019b0f65..552fb77bb54cc 100644 --- a/pandas/tests/io/formats/test_to_csv.py +++ b/pandas/tests/io/formats/test_to_csv.py @@ -17,7 +17,7 @@ def test_to_csv_quotechar(self): with tm.ensure_clean('test.csv') as path: df.to_csv(path, quoting=1) # 1=QUOTE_ALL with open(path, 'r') as f: - self.assertEqual(f.read(), expected) + assert f.read() == expected expected = """\ $$,$col$ @@ -28,7 +28,7 @@ def test_to_csv_quotechar(self): with tm.ensure_clean('test.csv') as path: df.to_csv(path, quoting=1, quotechar="$") with open(path, 'r') as f: - self.assertEqual(f.read(), expected) + assert f.read() == expected with tm.ensure_clean('test.csv') as path: with tm.assert_raises_regex(TypeError, 'quotechar'): @@ -45,7 +45,7 @@ def test_to_csv_doublequote(self): with tm.ensure_clean('test.csv') as path: df.to_csv(path, quoting=1, doublequote=True) # QUOTE_ALL with open(path, 'r') as f: - self.assertEqual(f.read(), expected) + assert f.read() == expected from _csv import Error with tm.ensure_clean('test.csv') as path: @@ -63,7 +63,7 @@ def test_to_csv_escapechar(self): with tm.ensure_clean('test.csv') as path: # QUOTE_ALL df.to_csv(path, quoting=1, doublequote=False, escapechar='\\') with open(path, 'r') as f: - self.assertEqual(f.read(), expected) + assert f.read() == expected df = DataFrame({'col': ['a,a', ',bb,']}) expected = """\ @@ -75,76 +75,71 @@ def test_to_csv_escapechar(self): with tm.ensure_clean('test.csv') as path: df.to_csv(path, quoting=3, escapechar='\\') # QUOTE_NONE with open(path, 'r') as f: - self.assertEqual(f.read(), expected) + assert f.read() == expected def test_csv_to_string(self): df = DataFrame({'col': [1, 2]}) expected = ',col\n0,1\n1,2\n' - self.assertEqual(df.to_csv(), expected) + assert df.to_csv() == expected def test_to_csv_decimal(self): # GH 781 df = DataFrame({'col1': [1], 'col2': ['a'], 'col3': [10.1]}) expected_default = ',col1,col2,col3\n0,1,a,10.1\n' - self.assertEqual(df.to_csv(), expected_default) + assert df.to_csv() == expected_default expected_european_excel = ';col1;col2;col3\n0;1;a;10,1\n' - self.assertEqual( - df.to_csv(decimal=',', sep=';'), expected_european_excel) + assert df.to_csv(decimal=',', sep=';') == expected_european_excel expected_float_format_default = ',col1,col2,col3\n0,1,a,10.10\n' - self.assertEqual( - df.to_csv(float_format='%.2f'), expected_float_format_default) + assert df.to_csv(float_format='%.2f') == expected_float_format_default expected_float_format = ';col1;col2;col3\n0;1;a;10,10\n' - self.assertEqual( - df.to_csv(decimal=',', sep=';', - float_format='%.2f'), expected_float_format) + assert df.to_csv(decimal=',', sep=';', + float_format='%.2f') == expected_float_format # GH 11553: testing if decimal is taken into account for '0.0' df = pd.DataFrame({'a': [0, 1.1], 'b': [2.2, 3.3], 'c': 1}) expected = 'a,b,c\n0^0,2^2,1\n1^1,3^3,1\n' - self.assertEqual(df.to_csv(index=False, decimal='^'), expected) + assert df.to_csv(index=False, decimal='^') == expected # same but for an index - self.assertEqual(df.set_index('a').to_csv(decimal='^'), expected) + assert df.set_index('a').to_csv(decimal='^') == expected # same for a multi-index - self.assertEqual( - df.set_index(['a', 'b']).to_csv(decimal="^"), expected) + assert df.set_index(['a', 'b']).to_csv(decimal="^") == expected def test_to_csv_float_format(self): # testing if float_format is taken into account for the index # GH 11553 df = pd.DataFrame({'a': [0, 1], 'b': [2.2, 3.3], 'c': 1}) expected = 'a,b,c\n0,2.20,1\n1,3.30,1\n' - self.assertEqual( - df.set_index('a').to_csv(float_format='%.2f'), expected) + assert df.set_index('a').to_csv(float_format='%.2f') == expected # same for a multi-index - self.assertEqual( - df.set_index(['a', 'b']).to_csv(float_format='%.2f'), expected) + assert df.set_index(['a', 'b']).to_csv( + float_format='%.2f') == expected def test_to_csv_na_rep(self): # testing if NaN values are correctly represented in the index # GH 11553 df = DataFrame({'a': [0, np.NaN], 'b': [0, 1], 'c': [2, 3]}) expected = "a,b,c\n0.0,0,2\n_,1,3\n" - self.assertEqual(df.set_index('a').to_csv(na_rep='_'), expected) - self.assertEqual(df.set_index(['a', 'b']).to_csv(na_rep='_'), expected) + assert df.set_index('a').to_csv(na_rep='_') == expected + assert df.set_index(['a', 'b']).to_csv(na_rep='_') == expected # now with an index containing only NaNs df = DataFrame({'a': np.NaN, 'b': [0, 1], 'c': [2, 3]}) expected = "a,b,c\n_,0,2\n_,1,3\n" - self.assertEqual(df.set_index('a').to_csv(na_rep='_'), expected) - self.assertEqual(df.set_index(['a', 'b']).to_csv(na_rep='_'), expected) + assert df.set_index('a').to_csv(na_rep='_') == expected + assert df.set_index(['a', 'b']).to_csv(na_rep='_') == expected # check if na_rep parameter does not break anything when no NaN df = DataFrame({'a': 0, 'b': [0, 1], 'c': [2, 3]}) expected = "a,b,c\n0,0,2\n0,1,3\n" - self.assertEqual(df.set_index('a').to_csv(na_rep='_'), expected) - self.assertEqual(df.set_index(['a', 'b']).to_csv(na_rep='_'), expected) + assert df.set_index('a').to_csv(na_rep='_') == expected + assert df.set_index(['a', 'b']).to_csv(na_rep='_') == expected def test_to_csv_date_format(self): # GH 10209 @@ -157,26 +152,23 @@ def test_to_csv_date_format(self): '2013-01-01 00:00:01\n2,2013-01-01 00:00:02' '\n3,2013-01-01 00:00:03\n4,' '2013-01-01 00:00:04\n') - self.assertEqual(df_sec.to_csv(), expected_default_sec) + assert df_sec.to_csv() == expected_default_sec expected_ymdhms_day = (',A\n0,2013-01-01 00:00:00\n1,' '2013-01-02 00:00:00\n2,2013-01-03 00:00:00' '\n3,2013-01-04 00:00:00\n4,' '2013-01-05 00:00:00\n') - self.assertEqual( - df_day.to_csv( - date_format='%Y-%m-%d %H:%M:%S'), expected_ymdhms_day) + assert (df_day.to_csv(date_format='%Y-%m-%d %H:%M:%S') == + expected_ymdhms_day) expected_ymd_sec = (',A\n0,2013-01-01\n1,2013-01-01\n2,' '2013-01-01\n3,2013-01-01\n4,2013-01-01\n') - self.assertEqual( - df_sec.to_csv(date_format='%Y-%m-%d'), expected_ymd_sec) + assert df_sec.to_csv(date_format='%Y-%m-%d') == expected_ymd_sec expected_default_day = (',A\n0,2013-01-01\n1,2013-01-02\n2,' '2013-01-03\n3,2013-01-04\n4,2013-01-05\n') - self.assertEqual(df_day.to_csv(), expected_default_day) - self.assertEqual( - df_day.to_csv(date_format='%Y-%m-%d'), expected_default_day) + assert df_day.to_csv() == expected_default_day + assert df_day.to_csv(date_format='%Y-%m-%d') == expected_default_day # testing if date_format parameter is taken into account for # multi-indexed dataframes (GH 7791) @@ -184,33 +176,33 @@ def test_to_csv_date_format(self): df_sec['C'] = 1 expected_ymd_sec = 'A,B,C\n2013-01-01,0,1\n' df_sec_grouped = df_sec.groupby([pd.Grouper(key='A', freq='1h'), 'B']) - self.assertEqual(df_sec_grouped.mean().to_csv(date_format='%Y-%m-%d'), - expected_ymd_sec) + assert (df_sec_grouped.mean().to_csv(date_format='%Y-%m-%d') == + expected_ymd_sec) def test_to_csv_multi_index(self): # see gh-6618 df = DataFrame([1], columns=pd.MultiIndex.from_arrays([[1], [2]])) exp = ",1\n,2\n0,1\n" - self.assertEqual(df.to_csv(), exp) + assert df.to_csv() == exp exp = "1\n2\n1\n" - self.assertEqual(df.to_csv(index=False), exp) + assert df.to_csv(index=False) == exp df = DataFrame([1], columns=pd.MultiIndex.from_arrays([[1], [2]]), index=pd.MultiIndex.from_arrays([[1], [2]])) exp = ",,1\n,,2\n1,2,1\n" - self.assertEqual(df.to_csv(), exp) + assert df.to_csv() == exp exp = "1\n2\n1\n" - self.assertEqual(df.to_csv(index=False), exp) + assert df.to_csv(index=False) == exp df = DataFrame( [1], columns=pd.MultiIndex.from_arrays([['foo'], ['bar']])) exp = ",foo\n,bar\n0,1\n" - self.assertEqual(df.to_csv(), exp) + assert df.to_csv() == exp exp = "foo\nbar\n1\n" - self.assertEqual(df.to_csv(index=False), exp) + assert df.to_csv(index=False) == exp diff --git a/pandas/tests/io/formats/test_to_html.py b/pandas/tests/io/formats/test_to_html.py index fd9ae0851635a..4a4546dd807f1 100644 --- a/pandas/tests/io/formats/test_to_html.py +++ b/pandas/tests/io/formats/test_to_html.py @@ -50,10 +50,10 @@ def test_to_html_with_empty_string_label(self): def test_to_html_unicode(self): df = DataFrame({u('\u03c3'): np.arange(10.)}) expected = u'

\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
\u03c3
00.0
11.0
22.0
33.0
44.0
55.0
66.0
77.0
88.0
99.0
' # noqa - self.assertEqual(df.to_html(), expected) + assert df.to_html() == expected df = DataFrame({'A': [u('\u03c3')]}) expected = u'\n \n \n \n \n \n \n \n \n \n \n \n \n
A
0\u03c3
' # noqa - self.assertEqual(df.to_html(), expected) + assert df.to_html() == expected def test_to_html_decimal(self): # GH 12031 @@ -81,7 +81,7 @@ def test_to_html_decimal(self): ' \n' ' \n' '') - self.assertEqual(result, expected) + assert result == expected def test_to_html_escaped(self): a = 'str """ - self.assertEqual(xp, rs) + assert xp == rs def test_to_html_escape_disabled(self): a = 'str """ - self.assertEqual(xp, rs) + assert xp == rs def test_to_html_multiindex_index_false(self): # issue 8452 @@ -189,11 +189,11 @@ def test_to_html_multiindex_index_false(self): """ - self.assertEqual(result, expected) + assert result == expected df.index = Index(df.index.values, name='idx') result = df.to_html(index=False) - self.assertEqual(result, expected) + assert result == expected def test_to_html_multiindex_sparsify_false_multi_sparse(self): with option_context('display.multi_sparse', False): @@ -247,7 +247,7 @@ def test_to_html_multiindex_sparsify_false_multi_sparse(self): """ - self.assertEqual(result, expected) + assert result == expected df = DataFrame([[0, 1], [2, 3], [4, 5], [6, 7]], columns=index[::2], index=index) @@ -303,7 +303,7 @@ def test_to_html_multiindex_sparsify_false_multi_sparse(self): """ - self.assertEqual(result, expected) + assert result == expected def test_to_html_multiindex_sparsify(self): index = MultiIndex.from_arrays([[0, 0, 1, 1], [0, 1, 0, 1]], @@ -353,7 +353,7 @@ def test_to_html_multiindex_sparsify(self): """ - self.assertEqual(result, expected) + assert result == expected df = DataFrame([[0, 1], [2, 3], [4, 5], [6, 7]], columns=index[::2], index=index) @@ -407,7 +407,7 @@ def test_to_html_multiindex_sparsify(self): """ - self.assertEqual(result, expected) + assert result == expected def test_to_html_multiindex_odd_even_truncate(self): # GH 14882 - Issue on truncation with odd length DataFrame @@ -692,7 +692,7 @@ def test_to_html_multiindex_odd_even_truncate(self): """ - self.assertEqual(result, expected) + assert result == expected # Test that ... appears in a middle level result = df.to_html(max_rows=56) @@ -955,7 +955,7 @@ def test_to_html_multiindex_odd_even_truncate(self): """ - self.assertEqual(result, expected) + assert result == expected def test_to_html_index_formatter(self): df = DataFrame([[0, 1], [2, 3], [4, 5], [6, 7]], columns=['foo', None], @@ -996,7 +996,7 @@ def test_to_html_index_formatter(self): """ - self.assertEqual(result, expected) + assert result == expected def test_to_html_datetime64_monthformatter(self): months = [datetime(2016, 1, 1), datetime(2016, 2, 2)] @@ -1024,7 +1024,7 @@ def format_func(x): """ - self.assertEqual(result, expected) + assert result == expected def test_to_html_datetime64_hourformatter(self): @@ -1053,7 +1053,7 @@ def format_func(x): """ - self.assertEqual(result, expected) + assert result == expected def test_to_html_regression_GH6098(self): df = DataFrame({ @@ -1164,7 +1164,7 @@ def test_to_html_truncate(self): '''.format(div_style) if compat.PY2: expected = expected.decode('utf-8') - self.assertEqual(result, expected) + assert result == expected def test_to_html_truncate_multi_index(self): pytest.skip("unreliable on travis") @@ -1281,7 +1281,7 @@ def test_to_html_truncate_multi_index(self): '''.format(div_style) if compat.PY2: expected = expected.decode('utf-8') - self.assertEqual(result, expected) + assert result == expected def test_to_html_truncate_multi_index_sparse_off(self): pytest.skip("unreliable on travis") @@ -1392,7 +1392,7 @@ def test_to_html_truncate_multi_index_sparse_off(self): '''.format(div_style) if compat.PY2: expected = expected.decode('utf-8') - self.assertEqual(result, expected) + assert result == expected def test_to_html_border(self): df = DataFrame({'A': [1, 2]}) @@ -1424,7 +1424,7 @@ def test_to_html(self): buf = StringIO() retval = biggie.to_html(buf=buf) assert retval is None - self.assertEqual(buf.getvalue(), s) + assert buf.getvalue() == s assert isinstance(s, compat.string_types) @@ -1450,13 +1450,13 @@ def test_to_html_filename(self): with open(path, 'r') as f: s = biggie.to_html() s2 = f.read() - self.assertEqual(s, s2) + assert s == s2 frame = DataFrame(index=np.arange(200)) with tm.ensure_clean('test.html') as path: frame.to_html(path) with open(path, 'r') as f: - self.assertEqual(frame.to_html(), f.read()) + assert frame.to_html() == f.read() def test_to_html_with_no_bold(self): x = DataFrame({'x': np.random.randn(5)}) @@ -1507,7 +1507,7 @@ def test_to_html_multiindex(self): ' \n' '') - self.assertEqual(result, expected) + assert result == expected columns = MultiIndex.from_tuples(list(zip( range(4), np.mod( @@ -1550,7 +1550,7 @@ def test_to_html_multiindex(self): ' \n' '') - self.assertEqual(result, expected) + assert result == expected def test_to_html_justify(self): df = DataFrame({'A': [6, 30000, 2], @@ -1588,7 +1588,7 @@ def test_to_html_justify(self): ' \n' ' \n' '') - self.assertEqual(result, expected) + assert result == expected result = df.to_html(justify='right') expected = ('\n' @@ -1621,7 +1621,7 @@ def test_to_html_justify(self): ' \n' ' \n' '
') - self.assertEqual(result, expected) + assert result == expected def test_to_html_index(self): index = ['foo', 'bar', 'baz'] @@ -1836,10 +1836,10 @@ def test_to_html_with_classes(self): """).strip() - self.assertEqual(result, expected) + assert result == expected result = df.to_html(classes=["sortable", "draggable"]) - self.assertEqual(result, expected) + assert result == expected def test_to_html_no_index_max_rows(self): # GH https://github.com/pandas-dev/pandas/issues/14998 @@ -1858,7 +1858,7 @@ def test_to_html_no_index_max_rows(self): """) - self.assertEqual(result, expected) + assert result == expected def test_to_html_notebook_has_style(self): df = pd.DataFrame({"A": [1, 2, 3]}) diff --git a/pandas/tests/io/json/test_json_table_schema.py b/pandas/tests/io/json/test_json_table_schema.py index 4ec13fa667452..0f77a886dd302 100644 --- a/pandas/tests/io/json/test_json_table_schema.py +++ b/pandas/tests/io/json/test_json_table_schema.py @@ -39,7 +39,7 @@ def test_build_table_schema(self): ], 'primaryKey': ['idx'] } - self.assertEqual(result, expected) + assert result == expected result = build_table_schema(self.df) assert "pandas_version" in result @@ -49,7 +49,7 @@ def test_series(self): expected = {'fields': [{'name': 'index', 'type': 'integer'}, {'name': 'foo', 'type': 'integer'}], 'primaryKey': ['index']} - self.assertEqual(result, expected) + assert result == expected result = build_table_schema(s) assert 'pandas_version' in result @@ -58,7 +58,7 @@ def tets_series_unnamed(self): expected = {'fields': [{'name': 'index', 'type': 'integer'}, {'name': 'values', 'type': 'integer'}], 'primaryKey': ['index']} - self.assertEqual(result, expected) + assert result == expected def test_multiindex(self): df = self.df.copy() @@ -76,13 +76,13 @@ def test_multiindex(self): ], 'primaryKey': ['level_0', 'level_1'] } - self.assertEqual(result, expected) + assert result == expected df.index.names = ['idx0', None] expected['fields'][0]['name'] = 'idx0' expected['primaryKey'] = ['idx0', 'level_1'] result = build_table_schema(df, version=False) - self.assertEqual(result, expected) + assert result == expected class TestTableSchemaType(tm.TestCase): @@ -91,23 +91,22 @@ def test_as_json_table_type_int_data(self): int_data = [1, 2, 3] int_types = [np.int, np.int16, np.int32, np.int64] for t in int_types: - self.assertEqual(as_json_table_type(np.array(int_data, dtype=t)), - 'integer') + assert as_json_table_type(np.array( + int_data, dtype=t)) == 'integer' def test_as_json_table_type_float_data(self): float_data = [1., 2., 3.] float_types = [np.float, np.float16, np.float32, np.float64] for t in float_types: - self.assertEqual(as_json_table_type(np.array(float_data, - dtype=t)), - 'number') + assert as_json_table_type(np.array( + float_data, dtype=t)) == 'number' def test_as_json_table_type_bool_data(self): bool_data = [True, False] bool_types = [bool, np.bool] for t in bool_types: - self.assertEqual(as_json_table_type(np.array(bool_data, dtype=t)), - 'boolean') + assert as_json_table_type(np.array( + bool_data, dtype=t)) == 'boolean' def test_as_json_table_type_date_data(self): date_data = [pd.to_datetime(['2016']), @@ -116,20 +115,19 @@ def test_as_json_table_type_date_data(self): pd.Series(pd.to_datetime(['2016'], utc=True)), pd.period_range('2016', freq='A', periods=3)] for arr in date_data: - self.assertEqual(as_json_table_type(arr), 'datetime') + assert as_json_table_type(arr) == 'datetime' def test_as_json_table_type_string_data(self): strings = [pd.Series(['a', 'b']), pd.Index(['a', 'b'])] for t in strings: - self.assertEqual(as_json_table_type(t), 'string') + assert as_json_table_type(t) == 'string' def test_as_json_table_type_categorical_data(self): - self.assertEqual(as_json_table_type(pd.Categorical(['a'])), 'any') - self.assertEqual(as_json_table_type(pd.Categorical([1])), 'any') - self.assertEqual(as_json_table_type( - pd.Series(pd.Categorical([1]))), 'any') - self.assertEqual(as_json_table_type(pd.CategoricalIndex([1])), 'any') - self.assertEqual(as_json_table_type(pd.Categorical([1])), 'any') + assert as_json_table_type(pd.Categorical(['a'])) == 'any' + assert as_json_table_type(pd.Categorical([1])) == 'any' + assert as_json_table_type(pd.Series(pd.Categorical([1]))) == 'any' + assert as_json_table_type(pd.CategoricalIndex([1])) == 'any' + assert as_json_table_type(pd.Categorical([1])) == 'any' # ------ # dtypes @@ -137,38 +135,38 @@ def test_as_json_table_type_categorical_data(self): def test_as_json_table_type_int_dtypes(self): integers = [np.int, np.int16, np.int32, np.int64] for t in integers: - self.assertEqual(as_json_table_type(t), 'integer') + assert as_json_table_type(t) == 'integer' def test_as_json_table_type_float_dtypes(self): floats = [np.float, np.float16, np.float32, np.float64] for t in floats: - self.assertEqual(as_json_table_type(t), 'number') + assert as_json_table_type(t) == 'number' def test_as_json_table_type_bool_dtypes(self): bools = [bool, np.bool] for t in bools: - self.assertEqual(as_json_table_type(t), 'boolean') + assert as_json_table_type(t) == 'boolean' def test_as_json_table_type_date_dtypes(self): # TODO: datedate.date? datetime.time? dates = [np.datetime64, np.dtype("= 0) frame.to_excel(path, @@ -1274,7 +1274,7 @@ def test_roundtrip_indexlabels(self): index_col=0, ).astype(np.int64) frame.index.names = ['test'] - self.assertEqual(frame.index.names, recons.index.names) + assert frame.index.names == recons.index.names frame = (DataFrame(np.random.randn(10, 2)) >= 0) frame.to_excel(path, @@ -1316,7 +1316,7 @@ def test_excel_roundtrip_indexname(self): index_col=0) tm.assert_frame_equal(result, df) - self.assertEqual(result.index.name, 'foo') + assert result.index.name == 'foo' def test_excel_roundtrip_datetime(self): _skip_if_no_xlrd() @@ -1463,7 +1463,7 @@ def test_to_excel_multiindex_dates(self): index_col=[0, 1]) tm.assert_frame_equal(tsframe, recons) - self.assertEqual(recons.index.names, ('time', 'foo')) + assert recons.index.names == ('time', 'foo') def test_to_excel_multiindex_no_write_index(self): _skip_if_no_xlrd() @@ -1577,21 +1577,20 @@ def test_to_excel_unicode_filename(self): # wbk = xlrd.open_workbook(filename, # formatting_info=True) - # self.assertEqual(["test1"], wbk.sheet_names()) + # assert ["test1"] == wbk.sheet_names() # ws = wbk.sheet_by_name('test1') - # self.assertEqual([(0, 1, 5, 7), (0, 1, 3, 5), (0, 1, 1, 3)], - # ws.merged_cells) + # assert [(0, 1, 5, 7), (0, 1, 3, 5), (0, 1, 1, 3)] == ws.merged_cells # for i in range(0, 2): # for j in range(0, 7): # xfx = ws.cell_xf_index(0, 0) # cell_xf = wbk.xf_list[xfx] # font = wbk.font_list - # self.assertEqual(1, font[cell_xf.font_index].bold) - # self.assertEqual(1, cell_xf.border.top_line_style) - # self.assertEqual(1, cell_xf.border.right_line_style) - # self.assertEqual(1, cell_xf.border.bottom_line_style) - # self.assertEqual(1, cell_xf.border.left_line_style) - # self.assertEqual(2, cell_xf.alignment.hor_align) + # assert 1 == font[cell_xf.font_index].bold + # assert 1 == cell_xf.border.top_line_style + # assert 1 == cell_xf.border.right_line_style + # assert 1 == cell_xf.border.bottom_line_style + # assert 1 == cell_xf.border.left_line_style + # assert 2 == cell_xf.alignment.hor_align # os.remove(filename) # def test_to_excel_header_styling_xlsx(self): # import StringIO @@ -1623,7 +1622,7 @@ def test_to_excel_unicode_filename(self): # filename = '__tmp_to_excel_header_styling_xlsx__.xlsx' # pdf.to_excel(filename, 'test1') # wbk = openpyxl.load_workbook(filename) - # self.assertEqual(["test1"], wbk.get_sheet_names()) + # assert ["test1"] == wbk.get_sheet_names() # ws = wbk.get_sheet_by_name('test1') # xlsaddrs = ["%s2" % chr(i) for i in range(ord('A'), ord('H'))] # xlsaddrs += ["A%s" % i for i in range(1, 6)] @@ -1631,16 +1630,16 @@ def test_to_excel_unicode_filename(self): # for xlsaddr in xlsaddrs: # cell = ws.cell(xlsaddr) # assert cell.style.font.bold - # self.assertEqual(openpyxl.style.Border.BORDER_THIN, - # cell.style.borders.top.border_style) - # self.assertEqual(openpyxl.style.Border.BORDER_THIN, - # cell.style.borders.right.border_style) - # self.assertEqual(openpyxl.style.Border.BORDER_THIN, - # cell.style.borders.bottom.border_style) - # self.assertEqual(openpyxl.style.Border.BORDER_THIN, - # cell.style.borders.left.border_style) - # self.assertEqual(openpyxl.style.Alignment.HORIZONTAL_CENTER, - # cell.style.alignment.horizontal) + # assert (openpyxl.style.Border.BORDER_THIN == + # cell.style.borders.top.border_style) + # assert (openpyxl.style.Border.BORDER_THIN == + # cell.style.borders.right.border_style) + # assert (openpyxl.style.Border.BORDER_THIN == + # cell.style.borders.bottom.border_style) + # assert (openpyxl.style.Border.BORDER_THIN == + # cell.style.borders.left.border_style) + # assert (openpyxl.style.Alignment.HORIZONTAL_CENTER == + # cell.style.alignment.horizontal) # mergedcells_addrs = ["C1", "E1", "G1"] # for maddr in mergedcells_addrs: # assert ws.cell(maddr).merged @@ -1681,10 +1680,10 @@ def roundtrip(df, header=True, parser_hdr=0, index=True): res = roundtrip(df, use_headers) if use_headers: - self.assertEqual(res.shape, (nrows, ncols + i)) + assert res.shape == (nrows, ncols + i) else: # first row taken as columns - self.assertEqual(res.shape, (nrows - 1, ncols + i)) + assert res.shape == (nrows - 1, ncols + i) # no nans for r in range(len(res.index)): @@ -1692,11 +1691,11 @@ def roundtrip(df, header=True, parser_hdr=0, index=True): assert res.iloc[r, c] is not np.nan res = roundtrip(DataFrame([0])) - self.assertEqual(res.shape, (1, 1)) + assert res.shape == (1, 1) assert res.iloc[0, 0] is not np.nan res = roundtrip(DataFrame([0]), False, None) - self.assertEqual(res.shape, (1, 2)) + assert res.shape == (1, 2) assert res.iloc[0, 0] is not np.nan def test_excel_010_hemstring_raises_NotImplementedError(self): @@ -1909,18 +1908,18 @@ def test_to_excel_styleconverter(self): xlsx_style = _Openpyxl1Writer._convert_to_style(hstyle) assert xlsx_style.font.bold - self.assertEqual(openpyxl.style.Border.BORDER_THIN, - xlsx_style.borders.top.border_style) - self.assertEqual(openpyxl.style.Border.BORDER_THIN, - xlsx_style.borders.right.border_style) - self.assertEqual(openpyxl.style.Border.BORDER_THIN, - xlsx_style.borders.bottom.border_style) - self.assertEqual(openpyxl.style.Border.BORDER_THIN, - xlsx_style.borders.left.border_style) - self.assertEqual(openpyxl.style.Alignment.HORIZONTAL_CENTER, - xlsx_style.alignment.horizontal) - self.assertEqual(openpyxl.style.Alignment.VERTICAL_TOP, - xlsx_style.alignment.vertical) + assert (openpyxl.style.Border.BORDER_THIN == + xlsx_style.borders.top.border_style) + assert (openpyxl.style.Border.BORDER_THIN == + xlsx_style.borders.right.border_style) + assert (openpyxl.style.Border.BORDER_THIN == + xlsx_style.borders.bottom.border_style) + assert (openpyxl.style.Border.BORDER_THIN == + xlsx_style.borders.left.border_style) + assert (openpyxl.style.Alignment.HORIZONTAL_CENTER == + xlsx_style.alignment.horizontal) + assert (openpyxl.style.Alignment.VERTICAL_TOP == + xlsx_style.alignment.vertical) def skip_openpyxl_gt21(cls): @@ -1999,12 +1998,12 @@ def test_to_excel_styleconverter(self): protection = styles.Protection(locked=True, hidden=False) kw = _Openpyxl20Writer._convert_to_style_kwargs(hstyle) - self.assertEqual(kw['font'], font) - self.assertEqual(kw['border'], border) - self.assertEqual(kw['alignment'], alignment) - self.assertEqual(kw['fill'], fill) - self.assertEqual(kw['number_format'], number_format) - self.assertEqual(kw['protection'], protection) + assert kw['font'] == font + assert kw['border'] == border + assert kw['alignment'] == alignment + assert kw['fill'] == fill + assert kw['number_format'] == number_format + assert kw['protection'] == protection def test_write_cells_merge_styled(self): from pandas.io.formats.excel import ExcelCell @@ -2036,8 +2035,8 @@ def test_write_cells_merge_styled(self): wks = writer.sheets[sheet_name] xcell_b1 = wks['B1'] xcell_a2 = wks['A2'] - self.assertEqual(xcell_b1.style, openpyxl_sty_merged) - self.assertEqual(xcell_a2.style, openpyxl_sty_merged) + assert xcell_b1.style == openpyxl_sty_merged + assert xcell_a2.style == openpyxl_sty_merged def skip_openpyxl_lt22(cls): @@ -2109,12 +2108,12 @@ def test_to_excel_styleconverter(self): protection = styles.Protection(locked=True, hidden=False) kw = _Openpyxl22Writer._convert_to_style_kwargs(hstyle) - self.assertEqual(kw['font'], font) - self.assertEqual(kw['border'], border) - self.assertEqual(kw['alignment'], alignment) - self.assertEqual(kw['fill'], fill) - self.assertEqual(kw['number_format'], number_format) - self.assertEqual(kw['protection'], protection) + assert kw['font'] == font + assert kw['border'] == border + assert kw['alignment'] == alignment + assert kw['fill'] == fill + assert kw['number_format'] == number_format + assert kw['protection'] == protection def test_write_cells_merge_styled(self): if not openpyxl_compat.is_compat(major_ver=2): @@ -2148,8 +2147,8 @@ def test_write_cells_merge_styled(self): wks = writer.sheets[sheet_name] xcell_b1 = wks['B1'] xcell_a2 = wks['A2'] - self.assertEqual(xcell_b1.font, openpyxl_sty_merged) - self.assertEqual(xcell_a2.font, openpyxl_sty_merged) + assert xcell_b1.font == openpyxl_sty_merged + assert xcell_a2.font == openpyxl_sty_merged class XlwtTests(ExcelWriterBase, tm.TestCase): @@ -2201,12 +2200,12 @@ def test_to_excel_styleconverter(self): xls_style = _XlwtWriter._convert_to_style(hstyle) assert xls_style.font.bold - self.assertEqual(xlwt.Borders.THIN, xls_style.borders.top) - self.assertEqual(xlwt.Borders.THIN, xls_style.borders.right) - self.assertEqual(xlwt.Borders.THIN, xls_style.borders.bottom) - self.assertEqual(xlwt.Borders.THIN, xls_style.borders.left) - self.assertEqual(xlwt.Alignment.HORZ_CENTER, xls_style.alignment.horz) - self.assertEqual(xlwt.Alignment.VERT_TOP, xls_style.alignment.vert) + assert xlwt.Borders.THIN == xls_style.borders.top + assert xlwt.Borders.THIN == xls_style.borders.right + assert xlwt.Borders.THIN == xls_style.borders.bottom + assert xlwt.Borders.THIN == xls_style.borders.left + assert xlwt.Alignment.HORZ_CENTER == xls_style.alignment.horz + assert xlwt.Alignment.VERT_TOP == xls_style.alignment.vert class XlsxWriterTests(ExcelWriterBase, tm.TestCase): @@ -2259,7 +2258,7 @@ def test_column_format(self): except: read_num_format = cell.style.number_format._format_code - self.assertEqual(read_num_format, num_format) + assert read_num_format == num_format class OpenpyxlTests_NoMerge(ExcelWriterBase, tm.TestCase): diff --git a/pandas/tests/io/test_gbq.py b/pandas/tests/io/test_gbq.py index 13529e7b54714..138def3ea1ac9 100644 --- a/pandas/tests/io/test_gbq.py +++ b/pandas/tests/io/test_gbq.py @@ -133,4 +133,4 @@ def test_roundtrip(self): .format(destination_table), project_id=_get_project_id(), private_key=_get_private_key_path()) - self.assertEqual(result['num_rows'][0], test_size) + assert result['num_rows'][0] == test_size diff --git a/pandas/tests/io/test_html.py b/pandas/tests/io/test_html.py index db6ab236ee793..0a79173df731c 100644 --- a/pandas/tests/io/test_html.py +++ b/pandas/tests/io/test_html.py @@ -144,16 +144,16 @@ def test_spam_no_types(self): df2 = self.read_html(self.spam_data, 'Unit') assert_framelist_equal(df1, df2) - self.assertEqual(df1[0].iloc[0, 0], 'Proximates') - self.assertEqual(df1[0].columns[0], 'Nutrient') + assert df1[0].iloc[0, 0] == 'Proximates' + assert df1[0].columns[0] == 'Nutrient' def test_spam_with_types(self): df1 = self.read_html(self.spam_data, '.*Water.*') df2 = self.read_html(self.spam_data, 'Unit') assert_framelist_equal(df1, df2) - self.assertEqual(df1[0].iloc[0, 0], 'Proximates') - self.assertEqual(df1[0].columns[0], 'Nutrient') + assert df1[0].iloc[0, 0] == 'Proximates' + assert df1[0].columns[0] == 'Nutrient' def test_spam_no_match(self): dfs = self.read_html(self.spam_data) @@ -167,7 +167,7 @@ def test_banklist_no_match(self): def test_spam_header(self): df = self.read_html(self.spam_data, '.*Water.*', header=1)[0] - self.assertEqual(df.columns[0], 'Proximates') + assert df.columns[0] == 'Proximates' assert not df.empty def test_skiprows_int(self): @@ -288,7 +288,7 @@ def test_invalid_url(self): self.read_html('http://www.a23950sdfa908sd.com', match='.*Water.*') except ValueError as e: - self.assertEqual(str(e), 'No tables found') + assert str(e) == 'No tables found' @tm.slow def test_file_url(self): @@ -368,7 +368,7 @@ def test_python_docs_table(self): url = 'https://docs.python.org/2/' dfs = self.read_html(url, match='Python') zz = [df.iloc[0, 0][0:4] for df in dfs] - self.assertEqual(sorted(zz), sorted(['Repo', 'What'])) + assert sorted(zz) == sorted(['Repo', 'What']) @tm.slow def test_thousands_macau_stats(self): @@ -518,7 +518,7 @@ def test_nyse_wsj_commas_table(self): columns = Index(['Issue(Roll over for charts and headlines)', 'Volume', 'Price', 'Chg', '% Chg']) nrows = 100 - self.assertEqual(df.shape[0], nrows) + assert df.shape[0] == nrows tm.assert_index_equal(df.columns, columns) @tm.slow @@ -536,7 +536,7 @@ def try_remove_ws(x): ground_truth = read_csv(os.path.join(DATA_PATH, 'banklist.csv'), converters={'Updated Date': Timestamp, 'Closing Date': Timestamp}) - self.assertEqual(df.shape, ground_truth.shape) + assert df.shape == ground_truth.shape old = ['First Vietnamese American BankIn Vietnamese', 'Westernbank Puerto RicoEn Espanol', 'R-G Premier Bank of Puerto RicoEn Espanol', @@ -663,7 +663,7 @@ def test_wikipedia_states_table(self): assert os.path.isfile(data), '%r is not a file' % data assert os.path.getsize(data), '%r is an empty file' % data result = self.read_html(data, 'Arizona', header=1)[0] - self.assertEqual(result['sq mi'].dtype, np.dtype('float64')) + assert result['sq mi'].dtype == np.dtype('float64') def test_decimal_rows(self): diff --git a/pandas/tests/io/test_packers.py b/pandas/tests/io/test_packers.py index ae1cadcd41496..451cce125e228 100644 --- a/pandas/tests/io/test_packers.py +++ b/pandas/tests/io/test_packers.py @@ -217,9 +217,10 @@ def test_dict_float(self): def test_dict_complex(self): x = {'foo': 1.0 + 1.0j, 'bar': 2.0 + 2.0j} x_rec = self.encode_decode(x) - self.assertEqual(x, x_rec) + tm.assert_dict_equal(x, x_rec) + for key in x: - self.assertEqual(type(x[key]), type(x_rec[key])) + tm.assert_class_equal(x[key], x_rec[key], obj="complex value") def test_dict_numpy_float(self): x = {'foo': np.float32(1.0), 'bar': np.float32(2.0)} @@ -230,9 +231,10 @@ def test_dict_numpy_complex(self): x = {'foo': np.complex128(1.0 + 1.0j), 'bar': np.complex128(2.0 + 2.0j)} x_rec = self.encode_decode(x) - self.assertEqual(x, x_rec) + tm.assert_dict_equal(x, x_rec) + for key in x: - self.assertEqual(type(x[key]), type(x_rec[key])) + tm.assert_class_equal(x[key], x_rec[key], obj="numpy complex128") def test_numpy_array_float(self): @@ -268,7 +270,7 @@ def test_timestamp(self): '20130101'), Timestamp('20130101', tz='US/Eastern'), Timestamp('201301010501')]: i_rec = self.encode_decode(i) - self.assertEqual(i, i_rec) + assert i == i_rec def test_nat(self): nat_rec = self.encode_decode(NaT) @@ -286,7 +288,7 @@ def test_datetimes(self): datetime.date(2013, 1, 1), np.datetime64(datetime.datetime(2013, 1, 5, 2, 15))]: i_rec = self.encode_decode(i) - self.assertEqual(i, i_rec) + assert i == i_rec def test_timedeltas(self): @@ -294,7 +296,7 @@ def test_timedeltas(self): datetime.timedelta(days=1, seconds=10), np.timedelta64(1000000)]: i_rec = self.encode_decode(i) - self.assertEqual(i, i_rec) + assert i == i_rec class TestIndex(TestPackers): @@ -668,16 +670,14 @@ def decompress(ob): for w in ws: # check the messages from our warnings - self.assertEqual( - str(w.message), - 'copying data after decompressing; this may mean that' - ' decompress is caching its result', - ) + assert str(w.message) == ('copying data after decompressing; ' + 'this may mean that decompress is ' + 'caching its result') for buf, control_buf in zip(not_garbage, control): # make sure none of our mutations above affected the # original buffers - self.assertEqual(buf, control_buf) + assert buf == control_buf def test_compression_warns_when_decompress_caches_zlib(self): if not _ZLIB_INSTALLED: @@ -710,7 +710,7 @@ def _test_small_strings_no_warn(self, compress): # we compare the ord of bytes b'a' with unicode u'a' because the should # always be the same (unless we were able to mutate the shared # character singleton in which case ord(b'a') == ord(b'b'). - self.assertEqual(ord(b'a'), ord(u'a')) + assert ord(b'a') == ord(u'a') tm.assert_numpy_array_equal( char_unpacked, np.array([ord(b'b')], dtype='uint8'), @@ -801,7 +801,7 @@ def test_default_encoding(self): for frame in compat.itervalues(self.frame): result = frame.to_msgpack() expected = frame.to_msgpack(encoding='utf8') - self.assertEqual(result, expected) + assert result == expected result = self.encode_decode(frame) assert_frame_equal(result, frame) diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index ae1b4137c354f..a268fa96175cf 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -165,8 +165,8 @@ def test_factory_fun(self): with catch_warnings(record=True): with get_store(path) as tbl: - self.assertEqual(len(tbl), 1) - self.assertEqual(type(tbl['a']), DataFrame) + assert len(tbl) == 1 + assert type(tbl['a']) == DataFrame finally: safe_remove(self.path) @@ -185,8 +185,8 @@ def test_context(self): tbl['a'] = tm.makeDataFrame() with HDFStore(path) as tbl: - self.assertEqual(len(tbl), 1) - self.assertEqual(type(tbl['a']), DataFrame) + assert len(tbl) == 1 + assert type(tbl['a']) == DataFrame finally: safe_remove(path) @@ -374,7 +374,7 @@ def test_keys(self): with catch_warnings(record=True): store['d'] = tm.makePanel() store['foo/bar'] = tm.makePanel() - self.assertEqual(len(store), 5) + assert len(store) == 5 expected = set(['/a', '/b', '/c', '/d', '/foo/bar']) assert set(store.keys()) == expected assert set(store) == expected @@ -461,9 +461,9 @@ def test_versioning(self): _maybe_remove(store, 'df1') store.append('df1', df[:10]) store.append('df1', df[10:]) - self.assertEqual(store.root.a._v_attrs.pandas_version, '0.15.2') - self.assertEqual(store.root.b._v_attrs.pandas_version, '0.15.2') - self.assertEqual(store.root.df1._v_attrs.pandas_version, '0.15.2') + assert store.root.a._v_attrs.pandas_version == '0.15.2' + assert store.root.b._v_attrs.pandas_version == '0.15.2' + assert store.root.df1._v_attrs.pandas_version == '0.15.2' # write a file and wipe its versioning _maybe_remove(store, 'df2') @@ -488,7 +488,7 @@ def check(mode): else: store = HDFStore(path, mode=mode) - self.assertEqual(store._handle.mode, mode) + assert store._handle.mode == mode store.close() with ensure_clean_path(self.path) as path: @@ -501,7 +501,7 @@ def f(): pytest.raises(IOError, f) else: with HDFStore(path, mode=mode) as store: - self.assertEqual(store._handle.mode, mode) + assert store._handle.mode == mode with ensure_clean_path(self.path) as path: @@ -550,7 +550,7 @@ def test_reopen_handle(self): # truncation ok here store.open('w') assert store.is_open - self.assertEqual(len(store), 0) + assert len(store) == 0 store.close() assert not store.is_open @@ -560,24 +560,24 @@ def test_reopen_handle(self): # reopen as read store.open('r') assert store.is_open - self.assertEqual(len(store), 1) - self.assertEqual(store._mode, 'r') + assert len(store) == 1 + assert store._mode == 'r' store.close() assert not store.is_open # reopen as append store.open('a') assert store.is_open - self.assertEqual(len(store), 1) - self.assertEqual(store._mode, 'a') + assert len(store) == 1 + assert store._mode == 'a' store.close() assert not store.is_open # reopen as append (again) store.open('a') assert store.is_open - self.assertEqual(len(store), 1) - self.assertEqual(store._mode, 'a') + assert len(store) == 1 + assert store._mode == 'a' store.close() assert not store.is_open @@ -889,7 +889,7 @@ def test_append_series(self): store.append('ns', ns) result = store['ns'] tm.assert_series_equal(result, ns) - self.assertEqual(result.name, ns.name) + assert result.name == ns.name # select on the values expected = ns[ns > 60] @@ -1300,8 +1300,8 @@ def test_append_with_strings(self): dict([(x, "%s_extra" % x) for x in wp.minor_axis]), axis=2) def check_col(key, name, size): - self.assertEqual(getattr(store.get_storer( - key).table.description, name).itemsize, size) + assert getattr(store.get_storer(key) + .table.description, name).itemsize == size store.append('s1', wp, min_itemsize=20) store.append('s1', wp2) @@ -1395,8 +1395,8 @@ def check_col(key, name, size): with ensure_clean_store(self.path) as store: def check_col(key, name, size): - self.assertEqual(getattr(store.get_storer( - key).table.description, name).itemsize, size) + assert getattr(store.get_storer(key) + .table.description, name).itemsize, size df = DataFrame(dict(A='foo', B='bar'), index=range(10)) @@ -1404,13 +1404,13 @@ def check_col(key, name, size): _maybe_remove(store, 'df') store.append('df', df, min_itemsize={'A': 200}) check_col('df', 'A', 200) - self.assertEqual(store.get_storer('df').data_columns, ['A']) + assert store.get_storer('df').data_columns == ['A'] # a min_itemsize that creates a data_column2 _maybe_remove(store, 'df') store.append('df', df, data_columns=['B'], min_itemsize={'A': 200}) check_col('df', 'A', 200) - self.assertEqual(store.get_storer('df').data_columns, ['B', 'A']) + assert store.get_storer('df').data_columns == ['B', 'A'] # a min_itemsize that creates a data_column2 _maybe_remove(store, 'df') @@ -1418,7 +1418,7 @@ def check_col(key, name, size): 'B'], min_itemsize={'values': 200}) check_col('df', 'B', 200) check_col('df', 'values_block_0', 200) - self.assertEqual(store.get_storer('df').data_columns, ['B']) + assert store.get_storer('df').data_columns == ['B'] # infer the .typ on subsequent appends _maybe_remove(store, 'df') @@ -1492,8 +1492,8 @@ def test_append_with_data_columns(self): # using min_itemsize and a data column def check_col(key, name, size): - self.assertEqual(getattr(store.get_storer( - key).table.description, name).itemsize, size) + assert getattr(store.get_storer(key) + .table.description, name).itemsize == size with ensure_clean_store(self.path) as store: _maybe_remove(store, 'df') @@ -1985,7 +1985,7 @@ def test_append_raise(self): # list in column df = tm.makeDataFrame() df['invalid'] = [['a']] * len(df) - self.assertEqual(df.dtypes['invalid'], np.object_) + assert df.dtypes['invalid'] == np.object_ pytest.raises(TypeError, store.append, 'df', df) # multiple invalid columns @@ -1999,7 +1999,7 @@ def test_append_raise(self): s = s.astype(object) s[0:5] = np.nan df['invalid'] = s - self.assertEqual(df.dtypes['invalid'], np.object_) + assert df.dtypes['invalid'] == np.object_ pytest.raises(TypeError, store.append, 'df', df) # directy ndarray @@ -2227,11 +2227,11 @@ def test_remove(self): store['a'] = ts store['b'] = df _maybe_remove(store, 'a') - self.assertEqual(len(store), 1) + assert len(store) == 1 tm.assert_frame_equal(df, store['b']) _maybe_remove(store, 'b') - self.assertEqual(len(store), 0) + assert len(store) == 0 # nonexistence pytest.raises(KeyError, store.remove, 'a_nonexistent_store') @@ -2241,19 +2241,19 @@ def test_remove(self): store['b/foo'] = df _maybe_remove(store, 'foo') _maybe_remove(store, 'b/foo') - self.assertEqual(len(store), 1) + assert len(store) == 1 store['a'] = ts store['b/foo'] = df _maybe_remove(store, 'b') - self.assertEqual(len(store), 1) + assert len(store) == 1 # __delitem__ store['a'] = ts store['b'] = df del store['a'] del store['b'] - self.assertEqual(len(store), 0) + assert len(store) == 0 def test_remove_where(self): @@ -3281,14 +3281,14 @@ def test_select_with_many_inputs(self): result = store.select('df', 'B=selector') expected = df[df.B.isin(selector)] tm.assert_frame_equal(expected, result) - self.assertEqual(len(result), 100) + assert len(result) == 100 # big selector along the index selector = Index(df.ts[0:100].values) result = store.select('df', 'ts=selector') expected = df[df.ts.isin(selector.values)] tm.assert_frame_equal(expected, result) - self.assertEqual(len(result), 100) + assert len(result) == 100 def test_select_iterator(self): @@ -3306,7 +3306,7 @@ def test_select_iterator(self): tm.assert_frame_equal(expected, result) results = [s for s in store.select('df', chunksize=100)] - self.assertEqual(len(results), 5) + assert len(results) == 5 result = concat(results) tm.assert_frame_equal(expected, result) @@ -3331,7 +3331,7 @@ def test_select_iterator(self): results = [s for s in read_hdf(path, 'df', chunksize=100)] result = concat(results) - self.assertEqual(len(results), 5) + assert len(results) == 5 tm.assert_frame_equal(result, df) tm.assert_frame_equal(result, read_hdf(path, 'df')) @@ -3484,7 +3484,7 @@ def test_select_iterator_non_complete_8014(self): where = "index > '%s'" % end_dt results = [s for s in store.select( 'df', where=where, chunksize=chunksize)] - self.assertEqual(0, len(results)) + assert 0 == len(results) def test_select_iterator_many_empty_frames(self): @@ -3563,8 +3563,8 @@ def test_retain_index_attributes(self): for attr in ['freq', 'tz', 'name']: for idx in ['index', 'columns']: - self.assertEqual(getattr(getattr(df, idx), attr, None), - getattr(getattr(result, idx), attr, None)) + assert (getattr(getattr(df, idx), attr, None) == + getattr(getattr(result, idx), attr, None)) # try to append a table with a different frequency with catch_warnings(record=True): @@ -3610,7 +3610,7 @@ def test_retain_index_attributes2(self): df = DataFrame(dict(A=Series(lrange(3), index=idx))) df.to_hdf(path, 'data', mode='w', append=True) - self.assertEqual(read_hdf(path, 'data').index.name, 'foo') + assert read_hdf(path, 'data').index.name == 'foo' with catch_warnings(record=True): @@ -3655,7 +3655,7 @@ def test_frame_select(self): date = df.index[len(df) // 2] crit1 = Term('index>=date') - self.assertEqual(crit1.env.scope['date'], date) + assert crit1.env.scope['date'] == date crit2 = ("columns=['A', 'D']") crit3 = ('columns=A') @@ -4481,7 +4481,7 @@ def do_copy(f=None, new_f=None, keys=None, # check keys if keys is None: keys = store.keys() - self.assertEqual(set(keys), set(tstore.keys())) + assert set(keys) == set(tstore.keys()) # check indicies & nrows for k in tstore.keys(): @@ -4489,7 +4489,7 @@ def do_copy(f=None, new_f=None, keys=None, new_t = tstore.get_storer(k) orig_t = store.get_storer(k) - self.assertEqual(orig_t.nrows, new_t.nrows) + assert orig_t.nrows == new_t.nrows # check propindixes if propindexes: @@ -4554,7 +4554,7 @@ def test_store_datetime_fractional_secs(self): dt = datetime.datetime(2012, 1, 2, 3, 4, 5, 123456) series = Series([0], [dt]) store['a'] = series - self.assertEqual(store['a'].index[0], dt) + assert store['a'].index[0] == dt def test_tseries_indices_series(self): @@ -4564,18 +4564,18 @@ def test_tseries_indices_series(self): store['a'] = ser result = store['a'] - assert_series_equal(result, ser) - self.assertEqual(type(result.index), type(ser.index)) - self.assertEqual(result.index.freq, ser.index.freq) + tm.assert_series_equal(result, ser) + assert result.index.freq == ser.index.freq + tm.assert_class_equal(result.index, ser.index, obj="series index") idx = tm.makePeriodIndex(10) ser = Series(np.random.randn(len(idx)), idx) store['a'] = ser result = store['a'] - assert_series_equal(result, ser) - self.assertEqual(type(result.index), type(ser.index)) - self.assertEqual(result.index.freq, ser.index.freq) + tm.assert_series_equal(result, ser) + assert result.index.freq == ser.index.freq + tm.assert_class_equal(result.index, ser.index, obj="series index") def test_tseries_indices_frame(self): @@ -4586,8 +4586,9 @@ def test_tseries_indices_frame(self): result = store['a'] assert_frame_equal(result, df) - self.assertEqual(type(result.index), type(df.index)) - self.assertEqual(result.index.freq, df.index.freq) + assert result.index.freq == df.index.freq + tm.assert_class_equal(result.index, df.index, + obj="dataframe index") idx = tm.makePeriodIndex(10) df = DataFrame(np.random.randn(len(idx), 3), idx) @@ -4595,8 +4596,9 @@ def test_tseries_indices_frame(self): result = store['a'] assert_frame_equal(result, df) - self.assertEqual(type(result.index), type(df.index)) - self.assertEqual(result.index.freq, df.index.freq) + assert result.index.freq == df.index.freq + tm.assert_class_equal(result.index, df.index, + obj="dataframe index") def test_unicode_index(self): @@ -5394,7 +5396,7 @@ def test_tseries_select_index_column(self): with ensure_clean_store(self.path) as store: store.append('frame', frame) result = store.select_column('frame', 'index') - self.assertEqual(rng.tz, DatetimeIndex(result.values).tz) + assert rng.tz == DatetimeIndex(result.values).tz # check utc rng = date_range('1/1/2000', '1/30/2000', tz='UTC') @@ -5403,7 +5405,7 @@ def test_tseries_select_index_column(self): with ensure_clean_store(self.path) as store: store.append('frame', frame) result = store.select_column('frame', 'index') - self.assertEqual(rng.tz, result.dt.tz) + assert rng.tz == result.dt.tz # double check non-utc rng = date_range('1/1/2000', '1/30/2000', tz='US/Eastern') @@ -5412,7 +5414,7 @@ def test_tseries_select_index_column(self): with ensure_clean_store(self.path) as store: store.append('frame', frame) result = store.select_column('frame', 'index') - self.assertEqual(rng.tz, result.dt.tz) + assert rng.tz == result.dt.tz def test_timezones_fixed(self): with ensure_clean_store(self.path) as store: @@ -5443,7 +5445,7 @@ def test_fixed_offset_tz(self): store['frame'] = frame recons = store['frame'] tm.assert_index_equal(recons.index, rng) - self.assertEqual(rng.tz, recons.index.tz) + assert rng.tz == recons.index.tz def test_store_timezone(self): # GH2852 diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index fd883c9c0ff00..52883a41b08c2 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -405,9 +405,7 @@ def _to_sql_replace(self): num_entries = len(self.test_frame1) num_rows = self._count_rows('test_frame1') - self.assertEqual( - num_rows, num_entries, "not the same number of rows as entries") - + assert num_rows == num_entries self.drop_table('test_frame1') def _to_sql_append(self): @@ -425,9 +423,7 @@ def _to_sql_append(self): num_entries = 2 * len(self.test_frame1) num_rows = self._count_rows('test_frame1') - self.assertEqual( - num_rows, num_entries, "not the same number of rows as entries") - + assert num_rows == num_entries self.drop_table('test_frame1') def _roundtrip(self): @@ -454,7 +450,7 @@ def _to_sql_save_index(self): columns=['A', 'B', 'C'], index=['A']) self.pandasSQL.to_sql(df, 'test_to_sql_saves_index') ix_cols = self._get_index_columns('test_to_sql_saves_index') - self.assertEqual(ix_cols, [['A', ], ]) + assert ix_cols == [['A', ], ] def _transaction_test(self): self.pandasSQL.execute("CREATE TABLE test_trans (A INT, B TEXT)") @@ -470,13 +466,13 @@ def _transaction_test(self): # ignore raised exception pass res = self.pandasSQL.read_query('SELECT * FROM test_trans') - self.assertEqual(len(res), 0) + assert len(res) == 0 # Make sure when transaction is committed, rows do get inserted with self.pandasSQL.run_transaction() as trans: trans.execute(ins_sql) res2 = self.pandasSQL.read_query('SELECT * FROM test_trans') - self.assertEqual(len(res2), 1) + assert len(res2) == 1 # ----------------------------------------------------------------------------- @@ -544,8 +540,7 @@ def test_to_sql_replace(self): num_entries = len(self.test_frame1) num_rows = self._count_rows('test_frame3') - self.assertEqual( - num_rows, num_entries, "not the same number of rows as entries") + assert num_rows == num_entries def test_to_sql_append(self): sql.to_sql(self.test_frame1, 'test_frame4', @@ -559,8 +554,7 @@ def test_to_sql_append(self): num_entries = 2 * len(self.test_frame1) num_rows = self._count_rows('test_frame4') - self.assertEqual( - num_rows, num_entries, "not the same number of rows as entries") + assert num_rows == num_entries def test_to_sql_type_mapping(self): sql.to_sql(self.test_frame3, 'test_frame5', self.conn, index=False) @@ -663,44 +657,39 @@ def test_to_sql_index_label(self): # no index name, defaults to 'index' sql.to_sql(temp_frame, 'test_index_label', self.conn) frame = sql.read_sql_query('SELECT * FROM test_index_label', self.conn) - self.assertEqual(frame.columns[0], 'index') + assert frame.columns[0] == 'index' # specifying index_label sql.to_sql(temp_frame, 'test_index_label', self.conn, if_exists='replace', index_label='other_label') frame = sql.read_sql_query('SELECT * FROM test_index_label', self.conn) - self.assertEqual(frame.columns[0], 'other_label', - "Specified index_label not written to database") + assert frame.columns[0] == "other_label" # using the index name temp_frame.index.name = 'index_name' sql.to_sql(temp_frame, 'test_index_label', self.conn, if_exists='replace') frame = sql.read_sql_query('SELECT * FROM test_index_label', self.conn) - self.assertEqual(frame.columns[0], 'index_name', - "Index name not written to database") + assert frame.columns[0] == "index_name" # has index name, but specifying index_label sql.to_sql(temp_frame, 'test_index_label', self.conn, if_exists='replace', index_label='other_label') frame = sql.read_sql_query('SELECT * FROM test_index_label', self.conn) - self.assertEqual(frame.columns[0], 'other_label', - "Specified index_label not written to database") + assert frame.columns[0] == "other_label" # index name is integer temp_frame.index.name = 0 sql.to_sql(temp_frame, 'test_index_label', self.conn, if_exists='replace') frame = sql.read_sql_query('SELECT * FROM test_index_label', self.conn) - self.assertEqual(frame.columns[0], '0', - "Integer index label not written to database") + assert frame.columns[0] == "0" temp_frame.index.name = None sql.to_sql(temp_frame, 'test_index_label', self.conn, if_exists='replace', index_label=0) frame = sql.read_sql_query('SELECT * FROM test_index_label', self.conn) - self.assertEqual(frame.columns[0], '0', - "Integer index label not written to database") + assert frame.columns[0] == "0" def test_to_sql_index_label_multiindex(self): temp_frame = DataFrame({'col1': range(4)}, @@ -710,30 +699,27 @@ def test_to_sql_index_label_multiindex(self): # no index name, defaults to 'level_0' and 'level_1' sql.to_sql(temp_frame, 'test_index_label', self.conn) frame = sql.read_sql_query('SELECT * FROM test_index_label', self.conn) - self.assertEqual(frame.columns[0], 'level_0') - self.assertEqual(frame.columns[1], 'level_1') + assert frame.columns[0] == 'level_0' + assert frame.columns[1] == 'level_1' # specifying index_label sql.to_sql(temp_frame, 'test_index_label', self.conn, if_exists='replace', index_label=['A', 'B']) frame = sql.read_sql_query('SELECT * FROM test_index_label', self.conn) - self.assertEqual(frame.columns[:2].tolist(), ['A', 'B'], - "Specified index_labels not written to database") + assert frame.columns[:2].tolist() == ['A', 'B'] # using the index name temp_frame.index.names = ['A', 'B'] sql.to_sql(temp_frame, 'test_index_label', self.conn, if_exists='replace') frame = sql.read_sql_query('SELECT * FROM test_index_label', self.conn) - self.assertEqual(frame.columns[:2].tolist(), ['A', 'B'], - "Index names not written to database") + assert frame.columns[:2].tolist() == ['A', 'B'] # has index name, but specifying index_label sql.to_sql(temp_frame, 'test_index_label', self.conn, if_exists='replace', index_label=['C', 'D']) frame = sql.read_sql_query('SELECT * FROM test_index_label', self.conn) - self.assertEqual(frame.columns[:2].tolist(), ['C', 'D'], - "Specified index_labels not written to database") + assert frame.columns[:2].tolist() == ['C', 'D'] # wrong length of index_label pytest.raises(ValueError, sql.to_sql, temp_frame, @@ -793,7 +779,7 @@ def test_chunksize_read(self): for chunk in sql.read_sql_query("select * from test_chunksize", self.conn, chunksize=5): res2 = concat([res2, chunk], ignore_index=True) - self.assertEqual(len(chunk), sizes[i]) + assert len(chunk) == sizes[i] i += 1 tm.assert_frame_equal(res1, res2) @@ -807,7 +793,7 @@ def test_chunksize_read(self): for chunk in sql.read_sql_table("test_chunksize", self.conn, chunksize=5): res3 = concat([res3, chunk], ignore_index=True) - self.assertEqual(len(chunk), sizes[i]) + assert len(chunk) == sizes[i] i += 1 tm.assert_frame_equal(res1, res3) @@ -856,29 +842,24 @@ def test_read_table_columns(self): cols = ['A', 'B'] result = sql.read_sql_table('test_frame', self.conn, columns=cols) - self.assertEqual(result.columns.tolist(), cols, - "Columns not correctly selected") + assert result.columns.tolist() == cols def test_read_table_index_col(self): # test columns argument in read_table sql.to_sql(self.test_frame1, 'test_frame', self.conn) result = sql.read_sql_table('test_frame', self.conn, index_col="index") - self.assertEqual(result.index.names, ["index"], - "index_col not correctly set") + assert result.index.names == ["index"] result = sql.read_sql_table( 'test_frame', self.conn, index_col=["A", "B"]) - self.assertEqual(result.index.names, ["A", "B"], - "index_col not correctly set") + assert result.index.names == ["A", "B"] result = sql.read_sql_table('test_frame', self.conn, index_col=["A", "B"], columns=["C", "D"]) - self.assertEqual(result.index.names, ["A", "B"], - "index_col not correctly set") - self.assertEqual(result.columns.tolist(), ["C", "D"], - "columns not set correctly whith index_col") + assert result.index.names == ["A", "B"] + assert result.columns.tolist() == ["C", "D"] def test_read_sql_delegate(self): iris_frame1 = sql.read_sql_query( @@ -905,10 +886,11 @@ def test_not_reflect_all_tables(self): sql.read_sql_table('other_table', self.conn) sql.read_sql_query('SELECT * FROM other_table', self.conn) # Verify some things - self.assertEqual(len(w), 0, "Warning triggered for other table") + assert len(w) == 0 def test_warning_case_insensitive_table_name(self): - # see GH7815. + # see gh-7815 + # # We can't test that this warning is triggered, a the database # configuration would have to be altered. But here we test that # the warning is certainly NOT triggered in a normal case. @@ -918,8 +900,7 @@ def test_warning_case_insensitive_table_name(self): # This should not trigger a Warning self.test_frame1.to_sql('CaseSensitive', self.conn) # Verify some things - self.assertEqual( - len(w), 0, "Warning triggered for writing a table") + assert len(w) == 0 def _get_index_columns(self, tbl_name): from sqlalchemy.engine import reflection @@ -981,7 +962,7 @@ def test_query_by_text_obj(self): iris_df = sql.read_sql(name_text, self.conn, params={ 'name': 'Iris-versicolor'}) all_names = set(iris_df['Name']) - self.assertEqual(all_names, set(['Iris-versicolor'])) + assert all_names == set(['Iris-versicolor']) def test_query_by_select_obj(self): # WIP : GH10846 @@ -992,7 +973,7 @@ def test_query_by_select_obj(self): iris_df = sql.read_sql(name_select, self.conn, params={'name': 'Iris-setosa'}) all_names = set(iris_df['Name']) - self.assertEqual(all_names, set(['Iris-setosa'])) + assert all_names == set(['Iris-setosa']) class _EngineToConnMixin(object): @@ -1094,8 +1075,7 @@ def test_sqlite_type_mapping(self): db = sql.SQLiteDatabase(self.conn) table = sql.SQLiteTable("test_type", db, frame=df) schema = table.sql_schema() - self.assertEqual(self._get_sqlite_column_type(schema, 'time'), - "TIMESTAMP") + assert self._get_sqlite_column_type(schema, 'time') == "TIMESTAMP" # ----------------------------------------------------------------------------- @@ -1264,24 +1244,22 @@ def check(col): # "2000-01-01 00:00:00-08:00" should convert to # "2000-01-01 08:00:00" - self.assertEqual(col[0], Timestamp('2000-01-01 08:00:00')) + assert col[0] == Timestamp('2000-01-01 08:00:00') # "2000-06-01 00:00:00-07:00" should convert to # "2000-06-01 07:00:00" - self.assertEqual(col[1], Timestamp('2000-06-01 07:00:00')) + assert col[1] == Timestamp('2000-06-01 07:00:00') elif is_datetime64tz_dtype(col.dtype): assert str(col.dt.tz) == 'UTC' # "2000-01-01 00:00:00-08:00" should convert to # "2000-01-01 08:00:00" - self.assertEqual(col[0], Timestamp( - '2000-01-01 08:00:00', tz='UTC')) + assert col[0] == Timestamp('2000-01-01 08:00:00', tz='UTC') # "2000-06-01 00:00:00-07:00" should convert to # "2000-06-01 07:00:00" - self.assertEqual(col[1], Timestamp( - '2000-06-01 07:00:00', tz='UTC')) + assert col[1] == Timestamp('2000-06-01 07:00:00', tz='UTC') else: raise AssertionError("DateCol loaded with incorrect type " @@ -1525,7 +1503,7 @@ def test_dtype(self): meta.reflect() sqltype = meta.tables['dtype_test3'].columns['B'].type assert isinstance(sqltype, sqlalchemy.String) - self.assertEqual(sqltype.length, 10) + assert sqltype.length == 10 # single dtype df.to_sql('single_dtype_test', self.conn, dtype=sqlalchemy.TEXT) @@ -1576,15 +1554,14 @@ def test_double_precision(self): res = sql.read_sql_table('test_dtypes', self.conn) # check precision of float64 - self.assertEqual(np.round(df['f64'].iloc[0], 14), - np.round(res['f64'].iloc[0], 14)) + assert (np.round(df['f64'].iloc[0], 14) == + np.round(res['f64'].iloc[0], 14)) # check sql types meta = sqlalchemy.schema.MetaData(bind=self.conn) meta.reflect() col_dict = meta.tables['test_dtypes'].columns - self.assertEqual(str(col_dict['f32'].type), - str(col_dict['f64_as_f32'].type)) + assert str(col_dict['f32'].type) == str(col_dict['f64_as_f32'].type) assert isinstance(col_dict['f32'].type, sqltypes.Float) assert isinstance(col_dict['f64'].type, sqltypes.Float) assert isinstance(col_dict['i32'].type, sqltypes.Integer) @@ -1690,7 +1667,7 @@ def test_bigint_warning(self): with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") sql.read_sql_table('test_bigintwarning', self.conn) - self.assertEqual(len(w), 0, "Warning triggered for other table") + assert len(w) == 0 class _TestMySQLAlchemy(object): @@ -2002,20 +1979,20 @@ def test_dtype(self): df.to_sql('dtype_test2', self.conn, dtype={'B': 'STRING'}) # sqlite stores Boolean values as INTEGER - self.assertEqual(self._get_sqlite_column_type( - 'dtype_test', 'B'), 'INTEGER') + assert self._get_sqlite_column_type( + 'dtype_test', 'B') == 'INTEGER' - self.assertEqual(self._get_sqlite_column_type( - 'dtype_test2', 'B'), 'STRING') + assert self._get_sqlite_column_type( + 'dtype_test2', 'B') == 'STRING' pytest.raises(ValueError, df.to_sql, 'error', self.conn, dtype={'B': bool}) # single dtype df.to_sql('single_dtype_test', self.conn, dtype='STRING') - self.assertEqual( - self._get_sqlite_column_type('single_dtype_test', 'A'), 'STRING') - self.assertEqual( - self._get_sqlite_column_type('single_dtype_test', 'B'), 'STRING') + assert self._get_sqlite_column_type( + 'single_dtype_test', 'A') == 'STRING' + assert self._get_sqlite_column_type( + 'single_dtype_test', 'B') == 'STRING' def test_notnull_dtype(self): if self.flavor == 'mysql': @@ -2031,11 +2008,10 @@ def test_notnull_dtype(self): tbl = 'notnull_dtype_test' df.to_sql(tbl, self.conn) - self.assertEqual(self._get_sqlite_column_type(tbl, 'Bool'), 'INTEGER') - self.assertEqual(self._get_sqlite_column_type( - tbl, 'Date'), 'TIMESTAMP') - self.assertEqual(self._get_sqlite_column_type(tbl, 'Int'), 'INTEGER') - self.assertEqual(self._get_sqlite_column_type(tbl, 'Float'), 'REAL') + assert self._get_sqlite_column_type(tbl, 'Bool') == 'INTEGER' + assert self._get_sqlite_column_type(tbl, 'Date') == 'TIMESTAMP' + assert self._get_sqlite_column_type(tbl, 'Int') == 'INTEGER' + assert self._get_sqlite_column_type(tbl, 'Float') == 'REAL' def test_illegal_names(self): # For sqlite, these should work fine @@ -2251,7 +2227,7 @@ def test_onecolumn_of_integer(self): the_sum = sum([my_c0[0] for my_c0 in con_x.execute("select * from mono_df")]) # it should not fail, and gives 3 ( Issue #3628 ) - self.assertEqual(the_sum, 3) + assert the_sum == 3 result = sql.read_sql("select * from mono_df", con_x) tm.assert_frame_equal(result, mono_df) @@ -2292,23 +2268,21 @@ def clean_up(test_table_to_drop): # test if_exists='replace' sql.to_sql(frame=df_if_exists_1, con=self.conn, name=table_name, if_exists='replace', index=False) - self.assertEqual(tquery(sql_select, con=self.conn), - [(1, 'A'), (2, 'B')]) + assert tquery(sql_select, con=self.conn) == [(1, 'A'), (2, 'B')] sql.to_sql(frame=df_if_exists_2, con=self.conn, name=table_name, if_exists='replace', index=False) - self.assertEqual(tquery(sql_select, con=self.conn), - [(3, 'C'), (4, 'D'), (5, 'E')]) + assert (tquery(sql_select, con=self.conn) == + [(3, 'C'), (4, 'D'), (5, 'E')]) clean_up(table_name) # test if_exists='append' sql.to_sql(frame=df_if_exists_1, con=self.conn, name=table_name, if_exists='fail', index=False) - self.assertEqual(tquery(sql_select, con=self.conn), - [(1, 'A'), (2, 'B')]) + assert tquery(sql_select, con=self.conn) == [(1, 'A'), (2, 'B')] sql.to_sql(frame=df_if_exists_2, con=self.conn, name=table_name, if_exists='append', index=False) - self.assertEqual(tquery(sql_select, con=self.conn), - [(1, 'A'), (2, 'B'), (3, 'C'), (4, 'D'), (5, 'E')]) + assert (tquery(sql_select, con=self.conn) == + [(1, 'A'), (2, 'B'), (3, 'C'), (4, 'D'), (5, 'E')]) clean_up(table_name) @@ -2610,21 +2584,19 @@ def clean_up(test_table_to_drop): # test if_exists='replace' sql.to_sql(frame=df_if_exists_1, con=self.conn, name=table_name, if_exists='replace', index=False) - self.assertEqual(tquery(sql_select, con=self.conn), - [(1, 'A'), (2, 'B')]) + assert tquery(sql_select, con=self.conn) == [(1, 'A'), (2, 'B')] sql.to_sql(frame=df_if_exists_2, con=self.conn, name=table_name, if_exists='replace', index=False) - self.assertEqual(tquery(sql_select, con=self.conn), - [(3, 'C'), (4, 'D'), (5, 'E')]) + assert (tquery(sql_select, con=self.conn) == + [(3, 'C'), (4, 'D'), (5, 'E')]) clean_up(table_name) # test if_exists='append' sql.to_sql(frame=df_if_exists_1, con=self.conn, name=table_name, if_exists='fail', index=False) - self.assertEqual(tquery(sql_select, con=self.conn), - [(1, 'A'), (2, 'B')]) + assert tquery(sql_select, con=self.conn) == [(1, 'A'), (2, 'B')] sql.to_sql(frame=df_if_exists_2, con=self.conn, name=table_name, if_exists='append', index=False) - self.assertEqual(tquery(sql_select, con=self.conn), - [(1, 'A'), (2, 'B'), (3, 'C'), (4, 'D'), (5, 'E')]) + assert (tquery(sql_select, con=self.conn) == + [(1, 'A'), (2, 'B'), (3, 'C'), (4, 'D'), (5, 'E')]) clean_up(table_name) diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py index 72023c77e7c88..945f0b009a9da 100644 --- a/pandas/tests/io/test_stata.py +++ b/pandas/tests/io/test_stata.py @@ -181,7 +181,7 @@ def test_read_dta2(self): w = [x for x in w if x.category is UserWarning] # should get warning for each call to read_dta - self.assertEqual(len(w), 3) + assert len(w) == 3 # buggy test because of the NaT comparison on certain platforms # Format 113 test fails since it does not support tc and tC formats @@ -283,7 +283,7 @@ def test_read_dta18(self): u'Floats': u'float data'} tm.assert_dict_equal(vl, vl_expected) - self.assertEqual(rdr.data_label, u'This is a Ünicode data label') + assert rdr.data_label == u'This is a Ünicode data label' def test_read_write_dta5(self): original = DataFrame([(np.nan, np.nan, np.nan, np.nan, np.nan)], @@ -351,11 +351,11 @@ def test_encoding(self): if compat.PY3: expected = raw.kreis1849[0] - self.assertEqual(result, expected) + assert result == expected assert isinstance(result, compat.string_types) else: expected = raw.kreis1849.str.decode("latin-1")[0] - self.assertEqual(result, expected) + assert result == expected assert isinstance(result, unicode) # noqa with tm.ensure_clean() as path: @@ -377,7 +377,7 @@ def test_read_write_dta11(self): with warnings.catch_warnings(record=True) as w: original.to_stata(path, None) # should get a warning for that format. - self.assertEqual(len(w), 1) + assert len(w) == 1 written_and_read_again = self.read_dta(path) tm.assert_frame_equal( @@ -405,7 +405,7 @@ def test_read_write_dta12(self): with warnings.catch_warnings(record=True) as w: original.to_stata(path, None) # should get a warning for that format. - self.assertEqual(len(w), 1) + assert len(w) == 1 written_and_read_again = self.read_dta(path) tm.assert_frame_equal( @@ -904,7 +904,7 @@ def test_categorical_warnings_and_errors(self): with warnings.catch_warnings(record=True) as w: original.to_stata(path) # should get a warning for mixed content - self.assertEqual(len(w), 1) + assert len(w) == 1 def test_categorical_with_stata_missing_values(self): values = [['a' + str(i)] for i in range(120)] @@ -986,10 +986,10 @@ def test_categorical_ordering(self): for col in parsed_115: if not is_categorical_dtype(parsed_115[col]): continue - self.assertEqual(True, parsed_115[col].cat.ordered) - self.assertEqual(True, parsed_117[col].cat.ordered) - self.assertEqual(False, parsed_115_unordered[col].cat.ordered) - self.assertEqual(False, parsed_117_unordered[col].cat.ordered) + assert parsed_115[col].cat.ordered + assert parsed_117[col].cat.ordered + assert not parsed_115_unordered[col].cat.ordered + assert not parsed_117_unordered[col].cat.ordered def test_read_chunks_117(self): files_117 = [self.dta1_117, self.dta2_117, self.dta3_117, diff --git a/pandas/tests/plotting/common.py b/pandas/tests/plotting/common.py index 64bcb55cb4e6a..7d0c39dae6e4b 100644 --- a/pandas/tests/plotting/common.py +++ b/pandas/tests/plotting/common.py @@ -149,7 +149,7 @@ def check_line(xpl, rsl): rsdata = rsl.get_xydata() tm.assert_almost_equal(xpdata, rsdata) - self.assertEqual(len(xp_lines), len(rs_lines)) + assert len(xp_lines) == len(rs_lines) [check_line(xpl, rsl) for xpl, rsl in zip(xp_lines, rs_lines)] tm.close() @@ -170,7 +170,7 @@ def _check_visible(self, collections, visible=True): collections = [collections] for patch in collections: - self.assertEqual(patch.get_visible(), visible) + assert patch.get_visible() == visible def _get_colors_mapped(self, series, colors): unique = series.unique() @@ -208,7 +208,7 @@ def _check_colors(self, collections, linecolors=None, facecolors=None, linecolors = self._get_colors_mapped(mapping, linecolors) linecolors = linecolors[:len(collections)] - self.assertEqual(len(collections), len(linecolors)) + assert len(collections) == len(linecolors) for patch, color in zip(collections, linecolors): if isinstance(patch, Line2D): result = patch.get_color() @@ -220,7 +220,7 @@ def _check_colors(self, collections, linecolors=None, facecolors=None, result = patch.get_edgecolor() expected = conv.to_rgba(color) - self.assertEqual(result, expected) + assert result == expected if facecolors is not None: @@ -228,7 +228,7 @@ def _check_colors(self, collections, linecolors=None, facecolors=None, facecolors = self._get_colors_mapped(mapping, facecolors) facecolors = facecolors[:len(collections)] - self.assertEqual(len(collections), len(facecolors)) + assert len(collections) == len(facecolors) for patch, color in zip(collections, facecolors): if isinstance(patch, Collection): # returned as list of np.array @@ -240,7 +240,7 @@ def _check_colors(self, collections, linecolors=None, facecolors=None, result = tuple(result) expected = conv.to_rgba(color) - self.assertEqual(result, expected) + assert result == expected def _check_text_labels(self, texts, expected): """ @@ -254,12 +254,12 @@ def _check_text_labels(self, texts, expected): expected text label, or its list """ if not is_list_like(texts): - self.assertEqual(texts.get_text(), expected) + assert texts.get_text() == expected else: labels = [t.get_text() for t in texts] - self.assertEqual(len(labels), len(expected)) + assert len(labels) == len(expected) for l, e in zip(labels, expected): - self.assertEqual(l, e) + assert l == e def _check_ticks_props(self, axes, xlabelsize=None, xrot=None, ylabelsize=None, yrot=None): @@ -325,8 +325,8 @@ def _check_ax_scales(self, axes, xaxis='linear', yaxis='linear'): """ axes = self._flatten_visible(axes) for ax in axes: - self.assertEqual(ax.xaxis.get_scale(), xaxis) - self.assertEqual(ax.yaxis.get_scale(), yaxis) + assert ax.xaxis.get_scale() == xaxis + assert ax.yaxis.get_scale() == yaxis def _check_axes_shape(self, axes, axes_num=None, layout=None, figsize=None): @@ -349,14 +349,14 @@ def _check_axes_shape(self, axes, axes_num=None, layout=None, visible_axes = self._flatten_visible(axes) if axes_num is not None: - self.assertEqual(len(visible_axes), axes_num) + assert len(visible_axes) == axes_num for ax in visible_axes: # check something drawn on visible axes assert len(ax.get_children()) > 0 if layout is not None: result = self._get_axes_layout(_flatten(axes)) - self.assertEqual(result, layout) + assert result == layout tm.assert_numpy_array_equal( visible_axes[0].figure.get_size_inches(), @@ -409,8 +409,8 @@ def _check_has_errorbars(self, axes, xerr=0, yerr=0): xerr_count += 1 if has_yerr: yerr_count += 1 - self.assertEqual(xerr, xerr_count) - self.assertEqual(yerr, yerr_count) + assert xerr == xerr_count + assert yerr == yerr_count def _check_box_return_type(self, returned, return_type, expected_keys=None, check_ax_title=True): @@ -450,23 +450,23 @@ def _check_box_return_type(self, returned, return_type, expected_keys=None, assert isinstance(returned, Series) - self.assertEqual(sorted(returned.keys()), sorted(expected_keys)) + assert sorted(returned.keys()) == sorted(expected_keys) for key, value in iteritems(returned): assert isinstance(value, types[return_type]) # check returned dict has correct mapping if return_type == 'axes': if check_ax_title: - self.assertEqual(value.get_title(), key) + assert value.get_title() == key elif return_type == 'both': if check_ax_title: - self.assertEqual(value.ax.get_title(), key) + assert value.ax.get_title() == key assert isinstance(value.ax, Axes) assert isinstance(value.lines, dict) elif return_type == 'dict': line = value['medians'][0] axes = line.axes if self.mpl_ge_1_5_0 else line.get_axes() if check_ax_title: - self.assertEqual(axes.get_title(), key) + assert axes.get_title() == key else: raise AssertionError diff --git a/pandas/tests/plotting/test_boxplot_method.py b/pandas/tests/plotting/test_boxplot_method.py index fe6d5e5cf148f..1f70d408767f3 100644 --- a/pandas/tests/plotting/test_boxplot_method.py +++ b/pandas/tests/plotting/test_boxplot_method.py @@ -90,7 +90,7 @@ def test_boxplot_legacy(self): fig, ax = self.plt.subplots() d = df.boxplot(ax=ax, return_type='dict') lines = list(itertools.chain.from_iterable(d.values())) - self.assertEqual(len(ax.get_lines()), len(lines)) + assert len(ax.get_lines()) == len(lines) @slow def test_boxplot_return_type_none(self): @@ -138,7 +138,7 @@ def _check_ax_limits(col, ax): height_ax, weight_ax = df.boxplot(['height', 'weight'], by='category') _check_ax_limits(df['height'], height_ax) _check_ax_limits(df['weight'], weight_ax) - self.assertEqual(weight_ax._sharey, height_ax) + assert weight_ax._sharey == height_ax # Two rows, one partial p = df.boxplot(['height', 'weight', 'age'], by='category') @@ -148,8 +148,8 @@ def _check_ax_limits(col, ax): _check_ax_limits(df['height'], height_ax) _check_ax_limits(df['weight'], weight_ax) _check_ax_limits(df['age'], age_ax) - self.assertEqual(weight_ax._sharey, height_ax) - self.assertEqual(age_ax._sharey, height_ax) + assert weight_ax._sharey == height_ax + assert age_ax._sharey == height_ax assert dummy_ax._sharey is None @slow @@ -209,13 +209,13 @@ def test_grouped_plot_fignums(self): gb = df.groupby('gender') res = gb.plot() - self.assertEqual(len(self.plt.get_fignums()), 2) - self.assertEqual(len(res), 2) + assert len(self.plt.get_fignums()) == 2 + assert len(res) == 2 tm.close() res = gb.boxplot(return_type='axes') - self.assertEqual(len(self.plt.get_fignums()), 1) - self.assertEqual(len(res), 2) + assert len(self.plt.get_fignums()) == 1 + assert len(res) == 2 tm.close() # now works with GH 5610 as gender is excluded diff --git a/pandas/tests/plotting/test_converter.py b/pandas/tests/plotting/test_converter.py index 30eb3ef24fe30..e23bc2ef6c563 100644 --- a/pandas/tests/plotting/test_converter.py +++ b/pandas/tests/plotting/test_converter.py @@ -29,35 +29,35 @@ def test_convert_accepts_unicode(self): def test_conversion(self): rs = self.dtc.convert(['2012-1-1'], None, None)[0] xp = datetime(2012, 1, 1).toordinal() - self.assertEqual(rs, xp) + assert rs == xp rs = self.dtc.convert('2012-1-1', None, None) - self.assertEqual(rs, xp) + assert rs == xp rs = self.dtc.convert(date(2012, 1, 1), None, None) - self.assertEqual(rs, xp) + assert rs == xp rs = self.dtc.convert(datetime(2012, 1, 1).toordinal(), None, None) - self.assertEqual(rs, xp) + assert rs == xp rs = self.dtc.convert('2012-1-1', None, None) - self.assertEqual(rs, xp) + assert rs == xp rs = self.dtc.convert(Timestamp('2012-1-1'), None, None) - self.assertEqual(rs, xp) + assert rs == xp # also testing datetime64 dtype (GH8614) rs = self.dtc.convert(np_datetime64_compat('2012-01-01'), None, None) - self.assertEqual(rs, xp) + assert rs == xp rs = self.dtc.convert(np_datetime64_compat( '2012-01-01 00:00:00+0000'), None, None) - self.assertEqual(rs, xp) + assert rs == xp rs = self.dtc.convert(np.array([ np_datetime64_compat('2012-01-01 00:00:00+0000'), np_datetime64_compat('2012-01-02 00:00:00+0000')]), None, None) - self.assertEqual(rs[0], xp) + assert rs[0] == xp # we have a tz-aware date (constructed to that when we turn to utc it # is the same as our sample) @@ -66,17 +66,17 @@ def test_conversion(self): .tz_convert('US/Eastern') ) rs = self.dtc.convert(ts, None, None) - self.assertEqual(rs, xp) + assert rs == xp rs = self.dtc.convert(ts.to_pydatetime(), None, None) - self.assertEqual(rs, xp) + assert rs == xp rs = self.dtc.convert(Index([ts - Day(1), ts]), None, None) - self.assertEqual(rs[1], xp) + assert rs[1] == xp rs = self.dtc.convert(Index([ts - Day(1), ts]).to_pydatetime(), None, None) - self.assertEqual(rs[1], xp) + assert rs[1] == xp def test_conversion_float(self): decimals = 9 @@ -101,7 +101,7 @@ def test_conversion_outofbounds_datetime(self): tm.assert_numpy_array_equal(rs, xp) rs = self.dtc.convert(values[0], None, None) xp = converter.dates.date2num(values[0]) - self.assertEqual(rs, xp) + assert rs == xp values = [datetime(1677, 1, 1, 12), datetime(1677, 1, 2, 12)] rs = self.dtc.convert(values, None, None) @@ -109,7 +109,7 @@ def test_conversion_outofbounds_datetime(self): tm.assert_numpy_array_equal(rs, xp) rs = self.dtc.convert(values[0], None, None) xp = converter.dates.date2num(values[0]) - self.assertEqual(rs, xp) + assert rs == xp def test_time_formatter(self): self.tc(90000) @@ -165,44 +165,44 @@ def test_convert_accepts_unicode(self): def test_conversion(self): rs = self.pc.convert(['2012-1-1'], None, self.axis)[0] xp = Period('2012-1-1').ordinal - self.assertEqual(rs, xp) + assert rs == xp rs = self.pc.convert('2012-1-1', None, self.axis) - self.assertEqual(rs, xp) + assert rs == xp rs = self.pc.convert([date(2012, 1, 1)], None, self.axis)[0] - self.assertEqual(rs, xp) + assert rs == xp rs = self.pc.convert(date(2012, 1, 1), None, self.axis) - self.assertEqual(rs, xp) + assert rs == xp rs = self.pc.convert([Timestamp('2012-1-1')], None, self.axis)[0] - self.assertEqual(rs, xp) + assert rs == xp rs = self.pc.convert(Timestamp('2012-1-1'), None, self.axis) - self.assertEqual(rs, xp) + assert rs == xp # FIXME # rs = self.pc.convert( # np_datetime64_compat('2012-01-01'), None, self.axis) - # self.assertEqual(rs, xp) + # assert rs == xp # # rs = self.pc.convert( # np_datetime64_compat('2012-01-01 00:00:00+0000'), # None, self.axis) - # self.assertEqual(rs, xp) + # assert rs == xp # # rs = self.pc.convert(np.array([ # np_datetime64_compat('2012-01-01 00:00:00+0000'), # np_datetime64_compat('2012-01-02 00:00:00+0000')]), # None, self.axis) - # self.assertEqual(rs[0], xp) + # assert rs[0] == xp def test_integer_passthrough(self): # GH9012 rs = self.pc.convert([0, 1], None, self.axis) xp = [0, 1] - self.assertEqual(rs, xp) + assert rs == xp def test_convert_nested(self): data = ['2012-1-1', '2012-1-2'] diff --git a/pandas/tests/plotting/test_datetimelike.py b/pandas/tests/plotting/test_datetimelike.py index 30d67630afa41..ae8faa031174e 100644 --- a/pandas/tests/plotting/test_datetimelike.py +++ b/pandas/tests/plotting/test_datetimelike.py @@ -58,7 +58,7 @@ def test_fontsize_set_correctly(self): df = DataFrame(np.random.randn(10, 9), index=range(10)) ax = df.plot(fontsize=2) for label in (ax.get_xticklabels() + ax.get_yticklabels()): - self.assertEqual(label.get_fontsize(), 2) + assert label.get_fontsize() == 2 @slow def test_frame_inferred(self): @@ -95,7 +95,7 @@ def test_nonnumeric_exclude(self): df = DataFrame({'A': ["x", "y", "z"], 'B': [1, 2, 3]}, idx) ax = df.plot() # it works - self.assertEqual(len(ax.get_lines()), 1) # B was plotted + assert len(ax.get_lines()) == 1 # B was plotted plt.close(plt.gcf()) pytest.raises(TypeError, df['A'].plot) @@ -124,7 +124,7 @@ def test_tsplot(self): ax = ts.plot(style='k') color = (0., 0., 0., 1) if self.mpl_ge_2_0_0 else (0., 0., 0.) - self.assertEqual(color, ax.get_lines()[0].get_color()) + assert color == ax.get_lines()[0].get_color() def test_both_style_and_color(self): import matplotlib.pyplot as plt # noqa @@ -146,11 +146,11 @@ def test_high_freq(self): def test_get_datevalue(self): from pandas.plotting._converter import get_datevalue assert get_datevalue(None, 'D') is None - self.assertEqual(get_datevalue(1987, 'A'), 1987) - self.assertEqual(get_datevalue(Period(1987, 'A'), 'M'), - Period('1987-12', 'M').ordinal) - self.assertEqual(get_datevalue('1/1/1987', 'D'), - Period('1987-1-1', 'D').ordinal) + assert get_datevalue(1987, 'A') == 1987 + assert (get_datevalue(Period(1987, 'A'), 'M') == + Period('1987-12', 'M').ordinal) + assert (get_datevalue('1/1/1987', 'D') == + Period('1987-1-1', 'D').ordinal) @slow def test_ts_plot_format_coord(self): @@ -159,8 +159,7 @@ def check_format_of_first_point(ax, expected_string): first_x = first_line.get_xdata()[0].ordinal first_y = first_line.get_ydata()[0] try: - self.assertEqual(expected_string, - ax.format_coord(first_x, first_y)) + assert expected_string == ax.format_coord(first_x, first_y) except (ValueError): pytest.skip("skipping test because issue forming " "test comparison GH7664") @@ -261,7 +260,7 @@ def test_uhf(self): xp = conv._from_ordinal(loc).strftime('%H:%M:%S.%f') rs = str(label.get_text()) if len(rs): - self.assertEqual(xp, rs) + assert xp == rs @slow def test_irreg_hf(self): @@ -308,10 +307,9 @@ def test_business_freq(self): import matplotlib.pyplot as plt # noqa bts = tm.makePeriodSeries() ax = bts.plot() - self.assertEqual(ax.get_lines()[0].get_xydata()[0, 0], - bts.index[0].ordinal) + assert ax.get_lines()[0].get_xydata()[0, 0] == bts.index[0].ordinal idx = ax.get_lines()[0].get_xdata() - self.assertEqual(PeriodIndex(data=idx).freqstr, 'B') + assert PeriodIndex(data=idx).freqstr == 'B' @slow def test_business_freq_convert(self): @@ -321,10 +319,9 @@ def test_business_freq_convert(self): tm.N = n ts = bts.to_period('M') ax = bts.plot() - self.assertEqual(ax.get_lines()[0].get_xydata()[0, 0], - ts.index[0].ordinal) + assert ax.get_lines()[0].get_xydata()[0, 0] == ts.index[0].ordinal idx = ax.get_lines()[0].get_xdata() - self.assertEqual(PeriodIndex(data=idx).freqstr, 'M') + assert PeriodIndex(data=idx).freqstr == 'M' def test_nonzero_base(self): # GH2571 @@ -350,8 +347,8 @@ def _test(ax): ax.set_xlim(xlim[0] - 5, xlim[1] + 10) ax.get_figure().canvas.draw() result = ax.get_xlim() - self.assertEqual(result[0], xlim[0] - 5) - self.assertEqual(result[1], xlim[1] + 10) + assert result[0] == xlim[0] - 5 + assert result[1] == xlim[1] + 10 # string expected = (Period('1/1/2000', ax.freq), @@ -359,8 +356,8 @@ def _test(ax): ax.set_xlim('1/1/2000', '4/1/2000') ax.get_figure().canvas.draw() result = ax.get_xlim() - self.assertEqual(int(result[0]), expected[0].ordinal) - self.assertEqual(int(result[1]), expected[1].ordinal) + assert int(result[0]) == expected[0].ordinal + assert int(result[1]) == expected[1].ordinal # datetim expected = (Period('1/1/2000', ax.freq), @@ -368,8 +365,8 @@ def _test(ax): ax.set_xlim(datetime(2000, 1, 1), datetime(2000, 4, 1)) ax.get_figure().canvas.draw() result = ax.get_xlim() - self.assertEqual(int(result[0]), expected[0].ordinal) - self.assertEqual(int(result[1]), expected[1].ordinal) + assert int(result[0]) == expected[0].ordinal + assert int(result[1]) == expected[1].ordinal fig = ax.get_figure() plt.close(fig) @@ -390,12 +387,12 @@ def _test(ax): def test_get_finder(self): import pandas.plotting._converter as conv - self.assertEqual(conv.get_finder('B'), conv._daily_finder) - self.assertEqual(conv.get_finder('D'), conv._daily_finder) - self.assertEqual(conv.get_finder('M'), conv._monthly_finder) - self.assertEqual(conv.get_finder('Q'), conv._quarterly_finder) - self.assertEqual(conv.get_finder('A'), conv._annual_finder) - self.assertEqual(conv.get_finder('W'), conv._daily_finder) + assert conv.get_finder('B') == conv._daily_finder + assert conv.get_finder('D') == conv._daily_finder + assert conv.get_finder('M') == conv._monthly_finder + assert conv.get_finder('Q') == conv._quarterly_finder + assert conv.get_finder('A') == conv._annual_finder + assert conv.get_finder('W') == conv._daily_finder @slow def test_finder_daily(self): @@ -408,11 +405,11 @@ def test_finder_daily(self): ax = ser.plot() xaxis = ax.get_xaxis() rs = xaxis.get_majorticklocs()[0] - self.assertEqual(xp, rs) + assert xp == rs vmin, vmax = ax.get_xlim() ax.set_xlim(vmin + 0.9, vmax) rs = xaxis.get_majorticklocs()[0] - self.assertEqual(xp, rs) + assert xp == rs plt.close(ax.get_figure()) @slow @@ -426,11 +423,11 @@ def test_finder_quarterly(self): ax = ser.plot() xaxis = ax.get_xaxis() rs = xaxis.get_majorticklocs()[0] - self.assertEqual(rs, xp) + assert rs == xp (vmin, vmax) = ax.get_xlim() ax.set_xlim(vmin + 0.9, vmax) rs = xaxis.get_majorticklocs()[0] - self.assertEqual(xp, rs) + assert xp == rs plt.close(ax.get_figure()) @slow @@ -444,11 +441,11 @@ def test_finder_monthly(self): ax = ser.plot() xaxis = ax.get_xaxis() rs = xaxis.get_majorticklocs()[0] - self.assertEqual(rs, xp) + assert rs == xp vmin, vmax = ax.get_xlim() ax.set_xlim(vmin + 0.9, vmax) rs = xaxis.get_majorticklocs()[0] - self.assertEqual(xp, rs) + assert xp == rs plt.close(ax.get_figure()) def test_finder_monthly_long(self): @@ -458,7 +455,7 @@ def test_finder_monthly_long(self): xaxis = ax.get_xaxis() rs = xaxis.get_majorticklocs()[0] xp = Period('1989Q1', 'M').ordinal - self.assertEqual(rs, xp) + assert rs == xp @slow def test_finder_annual(self): @@ -470,7 +467,7 @@ def test_finder_annual(self): ax = ser.plot() xaxis = ax.get_xaxis() rs = xaxis.get_majorticklocs()[0] - self.assertEqual(rs, Period(xp[i], freq='A').ordinal) + assert rs == Period(xp[i], freq='A').ordinal plt.close(ax.get_figure()) @slow @@ -482,7 +479,7 @@ def test_finder_minutely(self): xaxis = ax.get_xaxis() rs = xaxis.get_majorticklocs()[0] xp = Period('1/1/1999', freq='Min').ordinal - self.assertEqual(rs, xp) + assert rs == xp def test_finder_hourly(self): nhours = 23 @@ -492,7 +489,7 @@ def test_finder_hourly(self): xaxis = ax.get_xaxis() rs = xaxis.get_majorticklocs()[0] xp = Period('1/1/1999', freq='H').ordinal - self.assertEqual(rs, xp) + assert rs == xp @slow def test_gaps(self): @@ -503,7 +500,7 @@ def test_gaps(self): ax = ts.plot() lines = ax.get_lines() tm._skip_if_mpl_1_5() - self.assertEqual(len(lines), 1) + assert len(lines) == 1 l = lines[0] data = l.get_xydata() assert isinstance(data, np.ma.core.MaskedArray) @@ -517,7 +514,7 @@ def test_gaps(self): ts[2:5] = np.nan ax = ts.plot() lines = ax.get_lines() - self.assertEqual(len(lines), 1) + assert len(lines) == 1 l = lines[0] data = l.get_xydata() assert isinstance(data, np.ma.core.MaskedArray) @@ -531,7 +528,7 @@ def test_gaps(self): ser[2:5] = np.nan ax = ser.plot() lines = ax.get_lines() - self.assertEqual(len(lines), 1) + assert len(lines) == 1 l = lines[0] data = l.get_xydata() assert isinstance(data, np.ma.core.MaskedArray) @@ -548,8 +545,8 @@ def test_gap_upsample(self): s = Series(np.random.randn(len(idxh)), idxh) s.plot(secondary_y=True) lines = ax.get_lines() - self.assertEqual(len(lines), 1) - self.assertEqual(len(ax.right_ax.get_lines()), 1) + assert len(lines) == 1 + assert len(ax.right_ax.get_lines()) == 1 l = lines[0] data = l.get_xydata() @@ -573,13 +570,13 @@ def test_secondary_y(self): l = ax.get_lines()[0] xp = Series(l.get_ydata(), l.get_xdata()) assert_series_equal(ser, xp) - self.assertEqual(ax.get_yaxis().get_ticks_position(), 'right') + assert ax.get_yaxis().get_ticks_position() == 'right' assert not axes[0].get_yaxis().get_visible() plt.close(fig) ax2 = ser2.plot() - self.assertEqual(ax2.get_yaxis().get_ticks_position(), - self.default_tick_position) + assert (ax2.get_yaxis().get_ticks_position() == + self.default_tick_position) plt.close(ax2.get_figure()) ax = ser2.plot() @@ -604,13 +601,13 @@ def test_secondary_y_ts(self): l = ax.get_lines()[0] xp = Series(l.get_ydata(), l.get_xdata()).to_timestamp() assert_series_equal(ser, xp) - self.assertEqual(ax.get_yaxis().get_ticks_position(), 'right') + assert ax.get_yaxis().get_ticks_position() == 'right' assert not axes[0].get_yaxis().get_visible() plt.close(fig) ax2 = ser2.plot() - self.assertEqual(ax2.get_yaxis().get_ticks_position(), - self.default_tick_position) + assert (ax2.get_yaxis().get_ticks_position() == + self.default_tick_position) plt.close(ax2.get_figure()) ax = ser2.plot() @@ -629,7 +626,7 @@ def test_secondary_kde(self): assert not hasattr(ax, 'right_ax') fig = ax.get_figure() axes = fig.get_axes() - self.assertEqual(axes[1].get_yaxis().get_ticks_position(), 'right') + assert axes[1].get_yaxis().get_ticks_position() == 'right' @slow def test_secondary_bar(self): @@ -637,25 +634,25 @@ def test_secondary_bar(self): ax = ser.plot(secondary_y=True, kind='bar') fig = ax.get_figure() axes = fig.get_axes() - self.assertEqual(axes[1].get_yaxis().get_ticks_position(), 'right') + assert axes[1].get_yaxis().get_ticks_position() == 'right' @slow def test_secondary_frame(self): df = DataFrame(np.random.randn(5, 3), columns=['a', 'b', 'c']) axes = df.plot(secondary_y=['a', 'c'], subplots=True) - self.assertEqual(axes[0].get_yaxis().get_ticks_position(), 'right') - self.assertEqual(axes[1].get_yaxis().get_ticks_position(), - self.default_tick_position) - self.assertEqual(axes[2].get_yaxis().get_ticks_position(), 'right') + assert axes[0].get_yaxis().get_ticks_position() == 'right' + assert (axes[1].get_yaxis().get_ticks_position() == + self.default_tick_position) + assert axes[2].get_yaxis().get_ticks_position() == 'right' @slow def test_secondary_bar_frame(self): df = DataFrame(np.random.randn(5, 3), columns=['a', 'b', 'c']) axes = df.plot(kind='bar', secondary_y=['a', 'c'], subplots=True) - self.assertEqual(axes[0].get_yaxis().get_ticks_position(), 'right') - self.assertEqual(axes[1].get_yaxis().get_ticks_position(), - self.default_tick_position) - self.assertEqual(axes[2].get_yaxis().get_ticks_position(), 'right') + assert axes[0].get_yaxis().get_ticks_position() == 'right' + assert (axes[1].get_yaxis().get_ticks_position() == + self.default_tick_position) + assert axes[2].get_yaxis().get_ticks_position() == 'right' def test_mixed_freq_regular_first(self): import matplotlib.pyplot as plt # noqa @@ -673,8 +670,8 @@ def test_mixed_freq_regular_first(self): assert idx2.equals(s2.index.to_period('B')) left, right = ax2.get_xlim() pidx = s1.index.to_period() - self.assertEqual(left, pidx[0].ordinal) - self.assertEqual(right, pidx[-1].ordinal) + assert left == pidx[0].ordinal + assert right == pidx[-1].ordinal @slow def test_mixed_freq_irregular_first(self): @@ -704,8 +701,8 @@ def test_mixed_freq_regular_first_df(self): assert idx2.equals(s2.index.to_period('B')) left, right = ax2.get_xlim() pidx = s1.index.to_period() - self.assertEqual(left, pidx[0].ordinal) - self.assertEqual(right, pidx[-1].ordinal) + assert left == pidx[0].ordinal + assert right == pidx[-1].ordinal @slow def test_mixed_freq_irregular_first_df(self): @@ -730,7 +727,7 @@ def test_mixed_freq_hf_first(self): high.plot() ax = low.plot() for l in ax.get_lines(): - self.assertEqual(PeriodIndex(data=l.get_xdata()).freq, 'D') + assert PeriodIndex(data=l.get_xdata()).freq == 'D' @slow def test_mixed_freq_alignment(self): @@ -743,8 +740,7 @@ def test_mixed_freq_alignment(self): ax = ts.plot() ts2.plot(style='r') - self.assertEqual(ax.lines[0].get_xdata()[0], - ax.lines[1].get_xdata()[0]) + assert ax.lines[0].get_xdata()[0] == ax.lines[1].get_xdata()[0] @slow def test_mixed_freq_lf_first(self): @@ -757,9 +753,9 @@ def test_mixed_freq_lf_first(self): low.plot(legend=True) ax = high.plot(legend=True) for l in ax.get_lines(): - self.assertEqual(PeriodIndex(data=l.get_xdata()).freq, 'D') + assert PeriodIndex(data=l.get_xdata()).freq == 'D' leg = ax.get_legend() - self.assertEqual(len(leg.texts), 2) + assert len(leg.texts) == 2 plt.close(ax.get_figure()) idxh = date_range('1/1/1999', periods=240, freq='T') @@ -769,7 +765,7 @@ def test_mixed_freq_lf_first(self): low.plot() ax = high.plot() for l in ax.get_lines(): - self.assertEqual(PeriodIndex(data=l.get_xdata()).freq, 'T') + assert PeriodIndex(data=l.get_xdata()).freq == 'T' def test_mixed_freq_irreg_period(self): ts = tm.makeTimeSeries() @@ -791,10 +787,10 @@ def test_mixed_freq_shared_ax(self): s1.plot(ax=ax1) s2.plot(ax=ax2) - self.assertEqual(ax1.freq, 'M') - self.assertEqual(ax2.freq, 'M') - self.assertEqual(ax1.lines[0].get_xydata()[0, 0], - ax2.lines[0].get_xydata()[0, 0]) + assert ax1.freq == 'M' + assert ax2.freq == 'M' + assert (ax1.lines[0].get_xydata()[0, 0] == + ax2.lines[0].get_xydata()[0, 0]) # using twinx fig, ax1 = self.plt.subplots() @@ -802,8 +798,8 @@ def test_mixed_freq_shared_ax(self): s1.plot(ax=ax1) s2.plot(ax=ax2) - self.assertEqual(ax1.lines[0].get_xydata()[0, 0], - ax2.lines[0].get_xydata()[0, 0]) + assert (ax1.lines[0].get_xydata()[0, 0] == + ax2.lines[0].get_xydata()[0, 0]) # TODO (GH14330, GH14322) # plotting the irregular first does not yet work @@ -811,8 +807,8 @@ def test_mixed_freq_shared_ax(self): # ax2 = ax1.twinx() # s2.plot(ax=ax1) # s1.plot(ax=ax2) - # self.assertEqual(ax1.lines[0].get_xydata()[0, 0], - # ax2.lines[0].get_xydata()[0, 0]) + # assert (ax1.lines[0].get_xydata()[0, 0] == + # ax2.lines[0].get_xydata()[0, 0]) @slow def test_to_weekly_resampling(self): @@ -823,7 +819,7 @@ def test_to_weekly_resampling(self): high.plot() ax = low.plot() for l in ax.get_lines(): - self.assertEqual(PeriodIndex(data=l.get_xdata()).freq, idxh.freq) + assert PeriodIndex(data=l.get_xdata()).freq == idxh.freq # tsplot from pandas.tseries.plotting import tsplot @@ -890,7 +886,7 @@ def test_from_resampling_area_line_mixed(self): expected_y = np.zeros(len(expected_x), dtype=np.float64) for i in range(3): l = ax.lines[i] - self.assertEqual(PeriodIndex(l.get_xdata()).freq, idxh.freq) + assert PeriodIndex(l.get_xdata()).freq == idxh.freq tm.assert_numpy_array_equal(l.get_xdata(orig=False), expected_x) # check stacked values are correct @@ -951,17 +947,17 @@ def test_mixed_freq_second_millisecond(self): # high to low high.plot() ax = low.plot() - self.assertEqual(len(ax.get_lines()), 2) + assert len(ax.get_lines()) == 2 for l in ax.get_lines(): - self.assertEqual(PeriodIndex(data=l.get_xdata()).freq, 'L') + assert PeriodIndex(data=l.get_xdata()).freq == 'L' tm.close() # low to high low.plot() ax = high.plot() - self.assertEqual(len(ax.get_lines()), 2) + assert len(ax.get_lines()) == 2 for l in ax.get_lines(): - self.assertEqual(PeriodIndex(data=l.get_xdata()).freq, 'L') + assert PeriodIndex(data=l.get_xdata()).freq == 'L' @slow def test_irreg_dtypes(self): @@ -995,7 +991,7 @@ def test_time(self): xp = l.get_text() if len(xp) > 0: rs = time(h, m, s).strftime('%H:%M:%S') - self.assertEqual(xp, rs) + assert xp == rs # change xlim ax.set_xlim('1:30', '5:00') @@ -1009,7 +1005,7 @@ def test_time(self): xp = l.get_text() if len(xp) > 0: rs = time(h, m, s).strftime('%H:%M:%S') - self.assertEqual(xp, rs) + assert xp == rs @slow def test_time_musec(self): @@ -1035,7 +1031,7 @@ def test_time_musec(self): xp = l.get_text() if len(xp) > 0: rs = time(h, m, s).strftime('%H:%M:%S.%f') - self.assertEqual(xp, rs) + assert xp == rs @slow def test_secondary_upsample(self): @@ -1046,11 +1042,11 @@ def test_secondary_upsample(self): low.plot() ax = high.plot(secondary_y=True) for l in ax.get_lines(): - self.assertEqual(PeriodIndex(l.get_xdata()).freq, 'D') + assert PeriodIndex(l.get_xdata()).freq == 'D' assert hasattr(ax, 'left_ax') assert not hasattr(ax, 'right_ax') for l in ax.left_ax.get_lines(): - self.assertEqual(PeriodIndex(l.get_xdata()).freq, 'D') + assert PeriodIndex(l.get_xdata()).freq == 'D' @slow def test_secondary_legend(self): @@ -1063,54 +1059,54 @@ def test_secondary_legend(self): df = tm.makeTimeDataFrame() ax = df.plot(secondary_y=['A', 'B']) leg = ax.get_legend() - self.assertEqual(len(leg.get_lines()), 4) - self.assertEqual(leg.get_texts()[0].get_text(), 'A (right)') - self.assertEqual(leg.get_texts()[1].get_text(), 'B (right)') - self.assertEqual(leg.get_texts()[2].get_text(), 'C') - self.assertEqual(leg.get_texts()[3].get_text(), 'D') + assert len(leg.get_lines()) == 4 + assert leg.get_texts()[0].get_text() == 'A (right)' + assert leg.get_texts()[1].get_text() == 'B (right)' + assert leg.get_texts()[2].get_text() == 'C' + assert leg.get_texts()[3].get_text() == 'D' assert ax.right_ax.get_legend() is None colors = set() for line in leg.get_lines(): colors.add(line.get_color()) # TODO: color cycle problems - self.assertEqual(len(colors), 4) + assert len(colors) == 4 plt.clf() ax = fig.add_subplot(211) ax = df.plot(secondary_y=['A', 'C'], mark_right=False) leg = ax.get_legend() - self.assertEqual(len(leg.get_lines()), 4) - self.assertEqual(leg.get_texts()[0].get_text(), 'A') - self.assertEqual(leg.get_texts()[1].get_text(), 'B') - self.assertEqual(leg.get_texts()[2].get_text(), 'C') - self.assertEqual(leg.get_texts()[3].get_text(), 'D') + assert len(leg.get_lines()) == 4 + assert leg.get_texts()[0].get_text() == 'A' + assert leg.get_texts()[1].get_text() == 'B' + assert leg.get_texts()[2].get_text() == 'C' + assert leg.get_texts()[3].get_text() == 'D' plt.clf() ax = df.plot(kind='bar', secondary_y=['A']) leg = ax.get_legend() - self.assertEqual(leg.get_texts()[0].get_text(), 'A (right)') - self.assertEqual(leg.get_texts()[1].get_text(), 'B') + assert leg.get_texts()[0].get_text() == 'A (right)' + assert leg.get_texts()[1].get_text() == 'B' plt.clf() ax = df.plot(kind='bar', secondary_y=['A'], mark_right=False) leg = ax.get_legend() - self.assertEqual(leg.get_texts()[0].get_text(), 'A') - self.assertEqual(leg.get_texts()[1].get_text(), 'B') + assert leg.get_texts()[0].get_text() == 'A' + assert leg.get_texts()[1].get_text() == 'B' plt.clf() ax = fig.add_subplot(211) df = tm.makeTimeDataFrame() ax = df.plot(secondary_y=['C', 'D']) leg = ax.get_legend() - self.assertEqual(len(leg.get_lines()), 4) + assert len(leg.get_lines()) == 4 assert ax.right_ax.get_legend() is None colors = set() for line in leg.get_lines(): colors.add(line.get_color()) # TODO: color cycle problems - self.assertEqual(len(colors), 4) + assert len(colors) == 4 # non-ts df = tm.makeDataFrame() @@ -1118,27 +1114,27 @@ def test_secondary_legend(self): ax = fig.add_subplot(211) ax = df.plot(secondary_y=['A', 'B']) leg = ax.get_legend() - self.assertEqual(len(leg.get_lines()), 4) + assert len(leg.get_lines()) == 4 assert ax.right_ax.get_legend() is None colors = set() for line in leg.get_lines(): colors.add(line.get_color()) # TODO: color cycle problems - self.assertEqual(len(colors), 4) + assert len(colors) == 4 plt.clf() ax = fig.add_subplot(211) ax = df.plot(secondary_y=['C', 'D']) leg = ax.get_legend() - self.assertEqual(len(leg.get_lines()), 4) + assert len(leg.get_lines()) == 4 assert ax.right_ax.get_legend() is None colors = set() for line in leg.get_lines(): colors.add(line.get_color()) # TODO: color cycle problems - self.assertEqual(len(colors), 4) + assert len(colors) == 4 def test_format_date_axis(self): rng = date_range('1/1/2012', periods=12, freq='M') @@ -1147,7 +1143,7 @@ def test_format_date_axis(self): xaxis = ax.get_xaxis() for l in xaxis.get_ticklabels(): if len(l.get_text()) > 0: - self.assertEqual(l.get_rotation(), 30) + assert l.get_rotation() == 30 @slow def test_ax_plot(self): @@ -1195,8 +1191,8 @@ def test_irregular_ts_shared_ax_xlim(self): # check that axis limits are correct left, right = ax.get_xlim() - self.assertEqual(left, ts_irregular.index.min().toordinal()) - self.assertEqual(right, ts_irregular.index.max().toordinal()) + assert left == ts_irregular.index.min().toordinal() + assert right == ts_irregular.index.max().toordinal() @slow def test_secondary_y_non_ts_xlim(self): @@ -1211,7 +1207,7 @@ def test_secondary_y_non_ts_xlim(self): s2.plot(secondary_y=True, ax=ax) left_after, right_after = ax.get_xlim() - self.assertEqual(left_before, left_after) + assert left_before == left_after assert right_before < right_after @slow @@ -1227,7 +1223,7 @@ def test_secondary_y_regular_ts_xlim(self): s2.plot(secondary_y=True, ax=ax) left_after, right_after = ax.get_xlim() - self.assertEqual(left_before, left_after) + assert left_before == left_after assert right_before < right_after @slow @@ -1242,8 +1238,8 @@ def test_secondary_y_mixed_freq_ts_xlim(self): left_after, right_after = ax.get_xlim() # a downsample should not have changed either limit - self.assertEqual(left_before, left_after) - self.assertEqual(right_before, right_after) + assert left_before == left_after + assert right_before == right_after @slow def test_secondary_y_irregular_ts_xlim(self): @@ -1258,8 +1254,8 @@ def test_secondary_y_irregular_ts_xlim(self): ts_irregular[:5].plot(ax=ax) left, right = ax.get_xlim() - self.assertEqual(left, ts_irregular.index.min().toordinal()) - self.assertEqual(right, ts_irregular.index.max().toordinal()) + assert left == ts_irregular.index.min().toordinal() + assert right == ts_irregular.index.max().toordinal() def test_plot_outofbounds_datetime(self): # 2579 - checking this does not raise @@ -1283,9 +1279,9 @@ def test_format_timedelta_ticks_narrow(self): fig = ax.get_figure() fig.canvas.draw() labels = ax.get_xticklabels() - self.assertEqual(len(labels), len(expected_labels)) + assert len(labels) == len(expected_labels) for l, l_expected in zip(labels, expected_labels): - self.assertEqual(l.get_text(), l_expected) + assert l.get_text() == l_expected def test_format_timedelta_ticks_wide(self): if is_platform_mac(): @@ -1309,9 +1305,9 @@ def test_format_timedelta_ticks_wide(self): fig = ax.get_figure() fig.canvas.draw() labels = ax.get_xticklabels() - self.assertEqual(len(labels), len(expected_labels)) + assert len(labels) == len(expected_labels) for l, l_expected in zip(labels, expected_labels): - self.assertEqual(l.get_text(), l_expected) + assert l.get_text() == l_expected def test_timedelta_plot(self): # test issue #8711 diff --git a/pandas/tests/plotting/test_frame.py b/pandas/tests/plotting/test_frame.py index c550504063b3e..7297e3548b956 100644 --- a/pandas/tests/plotting/test_frame.py +++ b/pandas/tests/plotting/test_frame.py @@ -134,7 +134,7 @@ def test_plot(self): # passed ax should be used: fig, ax = self.plt.subplots() axes = df.plot.bar(subplots=True, ax=ax) - self.assertEqual(len(axes), 1) + assert len(axes) == 1 if self.mpl_ge_1_5_0: result = ax.axes else: @@ -164,10 +164,10 @@ def test_color_and_style_arguments(self): ax = df.plot(color=['red', 'black'], style=['-', '--']) # check that the linestyles are correctly set: linestyle = [line.get_linestyle() for line in ax.lines] - self.assertEqual(linestyle, ['-', '--']) + assert linestyle == ['-', '--'] # check that the colors are correctly set: color = [line.get_color() for line in ax.lines] - self.assertEqual(color, ['red', 'black']) + assert color == ['red', 'black'] # passing both 'color' and 'style' arguments should not be allowed # if there is a color symbol in the style strings: with pytest.raises(ValueError): @@ -176,7 +176,7 @@ def test_color_and_style_arguments(self): def test_nonnumeric_exclude(self): df = DataFrame({'A': ["x", "y", "z"], 'B': [1, 2, 3]}) ax = df.plot() - self.assertEqual(len(ax.get_lines()), 1) # B was plotted + assert len(ax.get_lines()) == 1 # B was plotted @slow def test_implicit_label(self): @@ -190,7 +190,7 @@ def test_donot_overwrite_index_name(self): df = DataFrame(randn(2, 2), columns=['a', 'b']) df.index.name = 'NAME' df.plot(y='b', label='LABEL') - self.assertEqual(df.index.name, 'NAME') + assert df.index.name == 'NAME' @slow def test_plot_xy(self): @@ -303,7 +303,7 @@ def test_subplots(self): for kind in ['bar', 'barh', 'line', 'area']: axes = df.plot(kind=kind, subplots=True, sharex=True, legend=True) self._check_axes_shape(axes, axes_num=3, layout=(3, 1)) - self.assertEqual(axes.shape, (3, )) + assert axes.shape == (3, ) for ax, column in zip(axes, df.columns): self._check_legend_labels(ax, @@ -379,43 +379,43 @@ def test_subplots_layout(self): axes = df.plot(subplots=True, layout=(2, 2)) self._check_axes_shape(axes, axes_num=3, layout=(2, 2)) - self.assertEqual(axes.shape, (2, 2)) + assert axes.shape == (2, 2) axes = df.plot(subplots=True, layout=(-1, 2)) self._check_axes_shape(axes, axes_num=3, layout=(2, 2)) - self.assertEqual(axes.shape, (2, 2)) + assert axes.shape == (2, 2) axes = df.plot(subplots=True, layout=(2, -1)) self._check_axes_shape(axes, axes_num=3, layout=(2, 2)) - self.assertEqual(axes.shape, (2, 2)) + assert axes.shape == (2, 2) axes = df.plot(subplots=True, layout=(1, 4)) self._check_axes_shape(axes, axes_num=3, layout=(1, 4)) - self.assertEqual(axes.shape, (1, 4)) + assert axes.shape == (1, 4) axes = df.plot(subplots=True, layout=(-1, 4)) self._check_axes_shape(axes, axes_num=3, layout=(1, 4)) - self.assertEqual(axes.shape, (1, 4)) + assert axes.shape == (1, 4) axes = df.plot(subplots=True, layout=(4, -1)) self._check_axes_shape(axes, axes_num=3, layout=(4, 1)) - self.assertEqual(axes.shape, (4, 1)) + assert axes.shape == (4, 1) with pytest.raises(ValueError): - axes = df.plot(subplots=True, layout=(1, 1)) + df.plot(subplots=True, layout=(1, 1)) with pytest.raises(ValueError): - axes = df.plot(subplots=True, layout=(-1, -1)) + df.plot(subplots=True, layout=(-1, -1)) # single column df = DataFrame(np.random.rand(10, 1), index=list(string.ascii_letters[:10])) axes = df.plot(subplots=True) self._check_axes_shape(axes, axes_num=1, layout=(1, 1)) - self.assertEqual(axes.shape, (1, )) + assert axes.shape == (1, ) axes = df.plot(subplots=True, layout=(3, 3)) self._check_axes_shape(axes, axes_num=1, layout=(3, 3)) - self.assertEqual(axes.shape, (3, 3)) + assert axes.shape == (3, 3) @slow def test_subplots_warnings(self): @@ -442,13 +442,13 @@ def test_subplots_multiple_axes(self): returned = df.plot(subplots=True, ax=axes[0], sharex=False, sharey=False) self._check_axes_shape(returned, axes_num=3, layout=(1, 3)) - self.assertEqual(returned.shape, (3, )) + assert returned.shape == (3, ) assert returned[0].figure is fig # draw on second row returned = df.plot(subplots=True, ax=axes[1], sharex=False, sharey=False) self._check_axes_shape(returned, axes_num=3, layout=(1, 3)) - self.assertEqual(returned.shape, (3, )) + assert returned.shape == (3, ) assert returned[0].figure is fig self._check_axes_shape(axes, axes_num=6, layout=(2, 3)) tm.close() @@ -471,17 +471,17 @@ def test_subplots_multiple_axes(self): returned = df.plot(subplots=True, ax=axes, layout=(2, 1), sharex=False, sharey=False) self._check_axes_shape(returned, axes_num=4, layout=(2, 2)) - self.assertEqual(returned.shape, (4, )) + assert returned.shape == (4, ) returned = df.plot(subplots=True, ax=axes, layout=(2, -1), sharex=False, sharey=False) self._check_axes_shape(returned, axes_num=4, layout=(2, 2)) - self.assertEqual(returned.shape, (4, )) + assert returned.shape == (4, ) returned = df.plot(subplots=True, ax=axes, layout=(-1, 2), sharex=False, sharey=False) self._check_axes_shape(returned, axes_num=4, layout=(2, 2)) - self.assertEqual(returned.shape, (4, )) + assert returned.shape == (4, ) # single column fig, axes = self.plt.subplots(1, 1) @@ -490,7 +490,7 @@ def test_subplots_multiple_axes(self): axes = df.plot(subplots=True, ax=[axes], sharex=False, sharey=False) self._check_axes_shape(axes, axes_num=1, layout=(1, 1)) - self.assertEqual(axes.shape, (1, )) + assert axes.shape == (1, ) def test_subplots_ts_share_axes(self): # GH 3964 @@ -540,20 +540,20 @@ def test_subplots_dup_columns(self): axes = df.plot(subplots=True) for ax in axes: self._check_legend_labels(ax, labels=['a']) - self.assertEqual(len(ax.lines), 1) + assert len(ax.lines) == 1 tm.close() axes = df.plot(subplots=True, secondary_y='a') for ax in axes: # (right) is only attached when subplots=False self._check_legend_labels(ax, labels=['a']) - self.assertEqual(len(ax.lines), 1) + assert len(ax.lines) == 1 tm.close() ax = df.plot(secondary_y='a') self._check_legend_labels(ax, labels=['a (right)'] * 5) - self.assertEqual(len(ax.lines), 0) - self.assertEqual(len(ax.right_ax.lines), 5) + assert len(ax.lines) == 0 + assert len(ax.right_ax.lines) == 5 def test_negative_log(self): df = - DataFrame(rand(6, 4), @@ -651,14 +651,14 @@ def test_line_lim(self): ax = df.plot() xmin, xmax = ax.get_xlim() lines = ax.get_lines() - self.assertEqual(xmin, lines[0].get_data()[0][0]) - self.assertEqual(xmax, lines[0].get_data()[0][-1]) + assert xmin == lines[0].get_data()[0][0] + assert xmax == lines[0].get_data()[0][-1] ax = df.plot(secondary_y=True) xmin, xmax = ax.get_xlim() lines = ax.get_lines() - self.assertEqual(xmin, lines[0].get_data()[0][0]) - self.assertEqual(xmax, lines[0].get_data()[0][-1]) + assert xmin == lines[0].get_data()[0][0] + assert xmax == lines[0].get_data()[0][-1] axes = df.plot(secondary_y=True, subplots=True) self._check_axes_shape(axes, axes_num=3, layout=(3, 1)) @@ -667,8 +667,8 @@ def test_line_lim(self): assert not hasattr(ax, 'right_ax') xmin, xmax = ax.get_xlim() lines = ax.get_lines() - self.assertEqual(xmin, lines[0].get_data()[0][0]) - self.assertEqual(xmax, lines[0].get_data()[0][-1]) + assert xmin == lines[0].get_data()[0][0] + assert xmax == lines[0].get_data()[0][-1] def test_area_lim(self): df = DataFrame(rand(6, 4), columns=['x', 'y', 'z', 'four']) @@ -679,13 +679,13 @@ def test_area_lim(self): xmin, xmax = ax.get_xlim() ymin, ymax = ax.get_ylim() lines = ax.get_lines() - self.assertEqual(xmin, lines[0].get_data()[0][0]) - self.assertEqual(xmax, lines[0].get_data()[0][-1]) - self.assertEqual(ymin, 0) + assert xmin == lines[0].get_data()[0][0] + assert xmax == lines[0].get_data()[0][-1] + assert ymin == 0 ax = _check_plot_works(neg_df.plot.area, stacked=stacked) ymin, ymax = ax.get_ylim() - self.assertEqual(ymax, 0) + assert ymax == 0 @slow def test_bar_colors(self): @@ -730,19 +730,19 @@ def test_bar_linewidth(self): # regular ax = df.plot.bar(linewidth=2) for r in ax.patches: - self.assertEqual(r.get_linewidth(), 2) + assert r.get_linewidth() == 2 # stacked ax = df.plot.bar(stacked=True, linewidth=2) for r in ax.patches: - self.assertEqual(r.get_linewidth(), 2) + assert r.get_linewidth() == 2 # subplots axes = df.plot.bar(linewidth=2, subplots=True) self._check_axes_shape(axes, axes_num=5, layout=(5, 1)) for ax in axes: for r in ax.patches: - self.assertEqual(r.get_linewidth(), 2) + assert r.get_linewidth() == 2 @slow def test_bar_barwidth(self): @@ -753,34 +753,34 @@ def test_bar_barwidth(self): # regular ax = df.plot.bar(width=width) for r in ax.patches: - self.assertEqual(r.get_width(), width / len(df.columns)) + assert r.get_width() == width / len(df.columns) # stacked ax = df.plot.bar(stacked=True, width=width) for r in ax.patches: - self.assertEqual(r.get_width(), width) + assert r.get_width() == width # horizontal regular ax = df.plot.barh(width=width) for r in ax.patches: - self.assertEqual(r.get_height(), width / len(df.columns)) + assert r.get_height() == width / len(df.columns) # horizontal stacked ax = df.plot.barh(stacked=True, width=width) for r in ax.patches: - self.assertEqual(r.get_height(), width) + assert r.get_height() == width # subplots axes = df.plot.bar(width=width, subplots=True) for ax in axes: for r in ax.patches: - self.assertEqual(r.get_width(), width) + assert r.get_width() == width # horizontal subplots axes = df.plot.barh(width=width, subplots=True) for ax in axes: for r in ax.patches: - self.assertEqual(r.get_height(), width) + assert r.get_height() == width @slow def test_bar_barwidth_position(self): @@ -807,10 +807,10 @@ def test_bar_barwidth_position_int(self): ax = df.plot.bar(stacked=True, width=w) ticks = ax.xaxis.get_ticklocs() tm.assert_numpy_array_equal(ticks, np.array([0, 1, 2, 3, 4])) - self.assertEqual(ax.get_xlim(), (-0.75, 4.75)) + assert ax.get_xlim() == (-0.75, 4.75) # check left-edge of bars - self.assertEqual(ax.patches[0].get_x(), -0.5) - self.assertEqual(ax.patches[-1].get_x(), 3.5) + assert ax.patches[0].get_x() == -0.5 + assert ax.patches[-1].get_x() == 3.5 self._check_bar_alignment(df, kind='bar', stacked=True, width=1) self._check_bar_alignment(df, kind='barh', stacked=False, width=1) @@ -823,29 +823,29 @@ def test_bar_bottom_left(self): df = DataFrame(rand(5, 5)) ax = df.plot.bar(stacked=False, bottom=1) result = [p.get_y() for p in ax.patches] - self.assertEqual(result, [1] * 25) + assert result == [1] * 25 ax = df.plot.bar(stacked=True, bottom=[-1, -2, -3, -4, -5]) result = [p.get_y() for p in ax.patches[:5]] - self.assertEqual(result, [-1, -2, -3, -4, -5]) + assert result == [-1, -2, -3, -4, -5] ax = df.plot.barh(stacked=False, left=np.array([1, 1, 1, 1, 1])) result = [p.get_x() for p in ax.patches] - self.assertEqual(result, [1] * 25) + assert result == [1] * 25 ax = df.plot.barh(stacked=True, left=[1, 2, 3, 4, 5]) result = [p.get_x() for p in ax.patches[:5]] - self.assertEqual(result, [1, 2, 3, 4, 5]) + assert result == [1, 2, 3, 4, 5] axes = df.plot.bar(subplots=True, bottom=-1) for ax in axes: result = [p.get_y() for p in ax.patches] - self.assertEqual(result, [-1] * 5) + assert result == [-1] * 5 axes = df.plot.barh(subplots=True, left=np.array([1, 1, 1, 1, 1])) for ax in axes: result = [p.get_x() for p in ax.patches] - self.assertEqual(result, [1] * 5) + assert result == [1] * 5 @slow def test_bar_nan(self): @@ -855,15 +855,15 @@ def test_bar_nan(self): ax = df.plot.bar() expected = [10, 0, 20, 5, 10, 20, 1, 2, 3] result = [p.get_height() for p in ax.patches] - self.assertEqual(result, expected) + assert result == expected ax = df.plot.bar(stacked=True) result = [p.get_height() for p in ax.patches] - self.assertEqual(result, expected) + assert result == expected result = [p.get_y() for p in ax.patches] expected = [0.0, 0.0, 0.0, 10.0, 0.0, 20.0, 15.0, 10.0, 40.0] - self.assertEqual(result, expected) + assert result == expected @slow def test_bar_categorical(self): @@ -880,16 +880,16 @@ def test_bar_categorical(self): ax = df.plot.bar() ticks = ax.xaxis.get_ticklocs() tm.assert_numpy_array_equal(ticks, np.array([0, 1, 2, 3, 4, 5])) - self.assertEqual(ax.get_xlim(), (-0.5, 5.5)) + assert ax.get_xlim() == (-0.5, 5.5) # check left-edge of bars - self.assertEqual(ax.patches[0].get_x(), -0.25) - self.assertEqual(ax.patches[-1].get_x(), 5.15) + assert ax.patches[0].get_x() == -0.25 + assert ax.patches[-1].get_x() == 5.15 ax = df.plot.bar(stacked=True) tm.assert_numpy_array_equal(ticks, np.array([0, 1, 2, 3, 4, 5])) - self.assertEqual(ax.get_xlim(), (-0.5, 5.5)) - self.assertEqual(ax.patches[0].get_x(), -0.25) - self.assertEqual(ax.patches[-1].get_x(), 4.75) + assert ax.get_xlim() == (-0.5, 5.5) + assert ax.patches[0].get_x() == -0.25 + assert ax.patches[-1].get_x() == 4.75 @slow def test_plot_scatter(self): @@ -919,17 +919,17 @@ def test_plot_scatter_with_c(self): df.plot.scatter(x=0, y=1, c=2)] for ax in axes: # default to Greys - self.assertEqual(ax.collections[0].cmap.name, 'Greys') + assert ax.collections[0].cmap.name == 'Greys' if self.mpl_ge_1_3_1: # n.b. there appears to be no public method to get the colorbar # label - self.assertEqual(ax.collections[0].colorbar._label, 'z') + assert ax.collections[0].colorbar._label == 'z' cm = 'cubehelix' ax = df.plot.scatter(x='x', y='y', c='z', colormap=cm) - self.assertEqual(ax.collections[0].cmap.name, cm) + assert ax.collections[0].cmap.name == cm # verify turning off colorbar works ax = df.plot.scatter(x='x', y='y', c='z', colorbar=False) @@ -1167,7 +1167,7 @@ def test_boxplot(self): self._check_text_labels(ax.get_xticklabels(), labels) tm.assert_numpy_array_equal(ax.xaxis.get_ticklocs(), np.arange(1, len(numeric_cols) + 1)) - self.assertEqual(len(ax.lines), self.bp_n_objects * len(numeric_cols)) + assert len(ax.lines) == self.bp_n_objects * len(numeric_cols) # different warning on py3 if not PY3: @@ -1178,7 +1178,7 @@ def test_boxplot(self): self._check_ax_scales(axes, yaxis='log') for ax, label in zip(axes, labels): self._check_text_labels(ax.get_xticklabels(), [label]) - self.assertEqual(len(ax.lines), self.bp_n_objects) + assert len(ax.lines) == self.bp_n_objects axes = series.plot.box(rot=40) self._check_ticks_props(axes, xrot=40, yrot=0) @@ -1192,7 +1192,7 @@ def test_boxplot(self): labels = [pprint_thing(c) for c in numeric_cols] self._check_text_labels(ax.get_xticklabels(), labels) tm.assert_numpy_array_equal(ax.xaxis.get_ticklocs(), positions) - self.assertEqual(len(ax.lines), self.bp_n_objects * len(numeric_cols)) + assert len(ax.lines) == self.bp_n_objects * len(numeric_cols) @slow def test_boxplot_vertical(self): @@ -1204,7 +1204,7 @@ def test_boxplot_vertical(self): ax = df.plot.box(rot=50, fontsize=8, vert=False) self._check_ticks_props(ax, xrot=0, yrot=50, ylabelsize=8) self._check_text_labels(ax.get_yticklabels(), labels) - self.assertEqual(len(ax.lines), self.bp_n_objects * len(numeric_cols)) + assert len(ax.lines) == self.bp_n_objects * len(numeric_cols) # _check_plot_works adds an ax so catch warning. see GH #13188 with tm.assert_produces_warning(UserWarning): @@ -1214,13 +1214,13 @@ def test_boxplot_vertical(self): self._check_ax_scales(axes, xaxis='log') for ax, label in zip(axes, labels): self._check_text_labels(ax.get_yticklabels(), [label]) - self.assertEqual(len(ax.lines), self.bp_n_objects) + assert len(ax.lines) == self.bp_n_objects positions = np.array([3, 2, 8]) ax = df.plot.box(positions=positions, vert=False) self._check_text_labels(ax.get_yticklabels(), labels) tm.assert_numpy_array_equal(ax.yaxis.get_ticklocs(), positions) - self.assertEqual(len(ax.lines), self.bp_n_objects * len(numeric_cols)) + assert len(ax.lines) == self.bp_n_objects * len(numeric_cols) @slow def test_boxplot_return_type(self): @@ -1563,16 +1563,16 @@ def test_style_by_column(self): fig.add_subplot(111) ax = df.plot(style=markers) for i, l in enumerate(ax.get_lines()[:len(markers)]): - self.assertEqual(l.get_marker(), markers[i]) + assert l.get_marker() == markers[i] @slow def test_line_label_none(self): s = Series([1, 2]) ax = s.plot() - self.assertEqual(ax.get_legend(), None) + assert ax.get_legend() is None ax = s.plot(legend=True) - self.assertEqual(ax.get_legend().get_texts()[0].get_text(), 'None') + assert ax.get_legend().get_texts()[0].get_text() == 'None' @slow @tm.capture_stdout @@ -1591,7 +1591,7 @@ def test_line_colors(self): lines2 = ax2.get_lines() for l1, l2 in zip(ax.get_lines(), lines2): - self.assertEqual(l1.get_color(), l2.get_color()) + assert l1.get_color() == l2.get_color() tm.close() @@ -1630,7 +1630,7 @@ def test_line_colors(self): def test_dont_modify_colors(self): colors = ['r', 'g', 'b'] pd.DataFrame(np.random.rand(10, 2)).plot(color=colors) - self.assertEqual(len(colors), 3) + assert len(colors) == 3 @slow def test_line_colors_and_styles_subplots(self): @@ -1768,7 +1768,7 @@ def test_area_colors(self): linecolors = jet_colors self._check_colors(handles[:len(jet_colors)], linecolors=linecolors) for h in handles: - self.assertEqual(h.get_alpha(), 0.5) + assert h.get_alpha() == 0.5 @slow def test_hist_colors(self): @@ -2028,13 +2028,13 @@ def test_hexbin_basic(self): ax = df.plot.hexbin(x='A', y='B', gridsize=10) # TODO: need better way to test. This just does existence. - self.assertEqual(len(ax.collections), 1) + assert len(ax.collections) == 1 # GH 6951 axes = df.plot.hexbin(x='A', y='B', subplots=True) # hexbin should have 2 axes in the figure, 1 for plotting and another # is colorbar - self.assertEqual(len(axes[0].figure.axes), 2) + assert len(axes[0].figure.axes) == 2 # return value is single axes self._check_axes_shape(axes, axes_num=1, layout=(1, 1)) @@ -2043,10 +2043,10 @@ def test_hexbin_with_c(self): df = self.hexbin_df ax = df.plot.hexbin(x='A', y='B', C='C') - self.assertEqual(len(ax.collections), 1) + assert len(ax.collections) == 1 ax = df.plot.hexbin(x='A', y='B', C='C', reduce_C_function=np.std) - self.assertEqual(len(ax.collections), 1) + assert len(ax.collections) == 1 @slow def test_hexbin_cmap(self): @@ -2054,11 +2054,11 @@ def test_hexbin_cmap(self): # Default to BuGn ax = df.plot.hexbin(x='A', y='B') - self.assertEqual(ax.collections[0].cmap.name, 'BuGn') + assert ax.collections[0].cmap.name == 'BuGn' cm = 'cubehelix' ax = df.plot.hexbin(x='A', y='B', colormap=cm) - self.assertEqual(ax.collections[0].cmap.name, cm) + assert ax.collections[0].cmap.name == cm @slow def test_no_color_bar(self): @@ -2072,7 +2072,7 @@ def test_allow_cmap(self): df = self.hexbin_df ax = df.plot.hexbin(x='A', y='B', cmap='YlGn') - self.assertEqual(ax.collections[0].cmap.name, 'YlGn') + assert ax.collections[0].cmap.name == 'YlGn' with pytest.raises(TypeError): df.plot.hexbin(x='A', y='B', cmap='YlGn', colormap='BuGn') @@ -2094,11 +2094,11 @@ def test_pie_df(self): with tm.assert_produces_warning(UserWarning): axes = _check_plot_works(df.plot.pie, subplots=True) - self.assertEqual(len(axes), len(df.columns)) + assert len(axes) == len(df.columns) for ax in axes: self._check_text_labels(ax.texts, df.index) for ax, ylabel in zip(axes, df.columns): - self.assertEqual(ax.get_ylabel(), ylabel) + assert ax.get_ylabel() == ylabel labels = ['A', 'B', 'C', 'D', 'E'] color_args = ['r', 'g', 'b', 'c', 'm'] @@ -2106,7 +2106,7 @@ def test_pie_df(self): axes = _check_plot_works(df.plot.pie, subplots=True, labels=labels, colors=color_args) - self.assertEqual(len(axes), len(df.columns)) + assert len(axes) == len(df.columns) for ax in axes: self._check_text_labels(ax.texts, labels) @@ -2124,13 +2124,12 @@ def test_pie_df_nan(self): expected = list(base_expected) # force copy expected[i] = '' result = [x.get_text() for x in ax.texts] - self.assertEqual(result, expected) + assert result == expected # legend labels # NaN's not included in legend with subplots # see https://github.com/pandas-dev/pandas/issues/8390 - self.assertEqual([x.get_text() for x in - ax.get_legend().get_texts()], - base_expected[:i] + base_expected[i + 1:]) + assert ([x.get_text() for x in ax.get_legend().get_texts()] == + base_expected[:i] + base_expected[i + 1:]) @slow def test_errorbar_plot(self): @@ -2280,13 +2279,10 @@ def test_errorbar_asymmetrical(self): expected_0_0 = err[0, :, 0] * np.array([-1, 1]) tm.assert_almost_equal(yerr_0_0, expected_0_0) else: - self.assertEqual(ax.lines[7].get_ydata()[0], - data[0, 1] - err[1, 0, 0]) - self.assertEqual(ax.lines[8].get_ydata()[0], - data[0, 1] + err[1, 1, 0]) - - self.assertEqual(ax.lines[5].get_xdata()[0], -err[1, 0, 0] / 2) - self.assertEqual(ax.lines[6].get_xdata()[0], err[1, 1, 0] / 2) + assert ax.lines[7].get_ydata()[0] == data[0, 1] - err[1, 0, 0] + assert ax.lines[8].get_ydata()[0] == data[0, 1] + err[1, 1, 0] + assert ax.lines[5].get_xdata()[0] == -err[1, 0, 0] / 2 + assert ax.lines[6].get_xdata()[0] == err[1, 1, 0] / 2 with pytest.raises(ValueError): df.plot(yerr=err.T) @@ -2362,7 +2358,7 @@ def test_sharex_and_ax(self): def _check(axes): for ax in axes: - self.assertEqual(len(ax.lines), 1) + assert len(ax.lines) == 1 self._check_visible(ax.get_yticklabels(), visible=True) for ax in [axes[0], axes[2]]: self._check_visible(ax.get_xticklabels(), visible=False) @@ -2392,7 +2388,7 @@ def _check(axes): gs.tight_layout(plt.gcf()) for ax in axes: - self.assertEqual(len(ax.lines), 1) + assert len(ax.lines) == 1 self._check_visible(ax.get_yticklabels(), visible=True) self._check_visible(ax.get_xticklabels(), visible=True) self._check_visible(ax.get_xticklabels(minor=True), visible=True) @@ -2414,7 +2410,7 @@ def test_sharey_and_ax(self): def _check(axes): for ax in axes: - self.assertEqual(len(ax.lines), 1) + assert len(ax.lines) == 1 self._check_visible(ax.get_xticklabels(), visible=True) self._check_visible( ax.get_xticklabels(minor=True), visible=True) @@ -2444,7 +2440,7 @@ def _check(axes): gs.tight_layout(plt.gcf()) for ax in axes: - self.assertEqual(len(ax.lines), 1) + assert len(ax.lines) == 1 self._check_visible(ax.get_yticklabels(), visible=True) self._check_visible(ax.get_xticklabels(), visible=True) self._check_visible(ax.get_xticklabels(minor=True), visible=True) @@ -2494,7 +2490,7 @@ def test_df_subplots_patterns_minorticks(self): fig, axes = plt.subplots(2, 1, sharex=True) axes = df.plot(subplots=True, ax=axes) for ax in axes: - self.assertEqual(len(ax.lines), 1) + assert len(ax.lines) == 1 self._check_visible(ax.get_yticklabels(), visible=True) # xaxis of 1st ax must be hidden self._check_visible(axes[0].get_xticklabels(), visible=False) @@ -2507,7 +2503,7 @@ def test_df_subplots_patterns_minorticks(self): with tm.assert_produces_warning(UserWarning): axes = df.plot(subplots=True, ax=axes, sharex=True) for ax in axes: - self.assertEqual(len(ax.lines), 1) + assert len(ax.lines) == 1 self._check_visible(ax.get_yticklabels(), visible=True) # xaxis of 1st ax must be hidden self._check_visible(axes[0].get_xticklabels(), visible=False) @@ -2520,7 +2516,7 @@ def test_df_subplots_patterns_minorticks(self): fig, axes = plt.subplots(2, 1) axes = df.plot(subplots=True, ax=axes) for ax in axes: - self.assertEqual(len(ax.lines), 1) + assert len(ax.lines) == 1 self._check_visible(ax.get_yticklabels(), visible=True) self._check_visible(ax.get_xticklabels(), visible=True) self._check_visible(ax.get_xticklabels(minor=True), visible=True) @@ -2554,9 +2550,9 @@ def _get_horizontal_grid(): for ax1, ax2 in [_get_vertical_grid(), _get_horizontal_grid()]: ax1 = ts.plot(ax=ax1) - self.assertEqual(len(ax1.lines), 1) + assert len(ax1.lines) == 1 ax2 = df.plot(ax=ax2) - self.assertEqual(len(ax2.lines), 2) + assert len(ax2.lines) == 2 for ax in [ax1, ax2]: self._check_visible(ax.get_yticklabels(), visible=True) self._check_visible(ax.get_xticklabels(), visible=True) @@ -2567,8 +2563,8 @@ def _get_horizontal_grid(): # subplots=True for ax1, ax2 in [_get_vertical_grid(), _get_horizontal_grid()]: axes = df.plot(subplots=True, ax=[ax1, ax2]) - self.assertEqual(len(ax1.lines), 1) - self.assertEqual(len(ax2.lines), 1) + assert len(ax1.lines) == 1 + assert len(ax2.lines) == 1 for ax in axes: self._check_visible(ax.get_yticklabels(), visible=True) self._check_visible(ax.get_xticklabels(), visible=True) @@ -2581,8 +2577,8 @@ def _get_horizontal_grid(): with tm.assert_produces_warning(UserWarning): axes = df.plot(subplots=True, ax=[ax1, ax2], sharex=True, sharey=True) - self.assertEqual(len(axes[0].lines), 1) - self.assertEqual(len(axes[1].lines), 1) + assert len(axes[0].lines) == 1 + assert len(axes[1].lines) == 1 for ax in [ax1, ax2]: # yaxis are visible because there is only one column self._check_visible(ax.get_yticklabels(), visible=True) @@ -2598,8 +2594,8 @@ def _get_horizontal_grid(): with tm.assert_produces_warning(UserWarning): axes = df.plot(subplots=True, ax=[ax1, ax2], sharex=True, sharey=True) - self.assertEqual(len(axes[0].lines), 1) - self.assertEqual(len(axes[1].lines), 1) + assert len(axes[0].lines) == 1 + assert len(axes[1].lines) == 1 self._check_visible(axes[0].get_yticklabels(), visible=True) # yaxis of axes1 (right) are hidden self._check_visible(axes[1].get_yticklabels(), visible=False) @@ -2624,7 +2620,7 @@ def _get_boxed_grid(): index=ts.index, columns=list('ABCD')) axes = df.plot(subplots=True, ax=axes) for ax in axes: - self.assertEqual(len(ax.lines), 1) + assert len(ax.lines) == 1 # axis are visible because these are not shared self._check_visible(ax.get_yticklabels(), visible=True) self._check_visible(ax.get_xticklabels(), visible=True) @@ -2636,7 +2632,7 @@ def _get_boxed_grid(): with tm.assert_produces_warning(UserWarning): axes = df.plot(subplots=True, ax=axes, sharex=True, sharey=True) for ax in axes: - self.assertEqual(len(ax.lines), 1) + assert len(ax.lines) == 1 for ax in [axes[0], axes[2]]: # left column self._check_visible(ax.get_yticklabels(), visible=True) for ax in [axes[1], axes[3]]: # right column @@ -2710,8 +2706,7 @@ def test_passed_bar_colors(self): color_tuples = [(0.9, 0, 0, 1), (0, 0.9, 0, 1), (0, 0, 0.9, 1)] colormap = mpl.colors.ListedColormap(color_tuples) barplot = pd.DataFrame([[1, 2, 3]]).plot(kind="bar", cmap=colormap) - self.assertEqual(color_tuples, [c.get_facecolor() - for c in barplot.patches]) + assert color_tuples == [c.get_facecolor() for c in barplot.patches] def test_rcParams_bar_colors(self): import matplotlib as mpl @@ -2723,8 +2718,7 @@ def test_rcParams_bar_colors(self): except (AttributeError, KeyError): # mpl 1.4 with mpl.rc_context(rc={'axes.color_cycle': color_tuples}): barplot = pd.DataFrame([[1, 2, 3]]).plot(kind="bar") - self.assertEqual(color_tuples, [c.get_facecolor() - for c in barplot.patches]) + assert color_tuples == [c.get_facecolor() for c in barplot.patches] def _generate_4_axes_via_gridspec(): diff --git a/pandas/tests/plotting/test_groupby.py b/pandas/tests/plotting/test_groupby.py index 93efb3f994c38..121f2f9b75698 100644 --- a/pandas/tests/plotting/test_groupby.py +++ b/pandas/tests/plotting/test_groupby.py @@ -68,7 +68,7 @@ def test_plot_kwargs(self): res = df.groupby('z').plot(kind='scatter', x='x', y='y') # check that a scatter plot is effectively plotted: the axes should # contain a PathCollection from the scatter plot (GH11805) - self.assertEqual(len(res['a'].collections), 1) + assert len(res['a'].collections) == 1 res = df.groupby('z').plot.scatter(x='x', y='y') - self.assertEqual(len(res['a'].collections), 1) + assert len(res['a'].collections) == 1 diff --git a/pandas/tests/plotting/test_hist_method.py b/pandas/tests/plotting/test_hist_method.py index 7002321908ef0..39bab59242c22 100644 --- a/pandas/tests/plotting/test_hist_method.py +++ b/pandas/tests/plotting/test_hist_method.py @@ -54,7 +54,7 @@ def test_hist_legacy(self): def test_hist_bins_legacy(self): df = DataFrame(np.random.randn(10, 2)) ax = df.hist(bins=2)[0][0] - self.assertEqual(len(ax.patches), 2) + assert len(ax.patches) == 2 @slow def test_hist_layout(self): @@ -122,13 +122,13 @@ def test_hist_no_overlap(self): y.hist() fig = gcf() axes = fig.axes if self.mpl_ge_1_5_0 else fig.get_axes() - self.assertEqual(len(axes), 2) + assert len(axes) == 2 @slow def test_hist_by_no_extra_plots(self): df = self.hist_df axes = df.height.hist(by=df.gender) # noqa - self.assertEqual(len(self.plt.get_fignums()), 1) + assert len(self.plt.get_fignums()) == 1 @slow def test_plot_fails_when_ax_differs_from_figure(self): @@ -314,8 +314,8 @@ def test_grouped_hist_legacy2(self): 'gender': gender_int}) gb = df_int.groupby('gender') axes = gb.hist() - self.assertEqual(len(axes), 2) - self.assertEqual(len(self.plt.get_fignums()), 2) + assert len(axes) == 2 + assert len(self.plt.get_fignums()) == 2 tm.close() @slow diff --git a/pandas/tests/plotting/test_misc.py b/pandas/tests/plotting/test_misc.py index 9b8569e8680e4..3a9cb309db707 100644 --- a/pandas/tests/plotting/test_misc.py +++ b/pandas/tests/plotting/test_misc.py @@ -309,7 +309,7 @@ def test_subplot_titles(self): # Case len(title) == len(df) plot = df.plot(subplots=True, title=title) - self.assertEqual([p.get_title() for p in plot], title) + assert [p.get_title() for p in plot] == title # Case len(title) > len(df) pytest.raises(ValueError, df.plot, subplots=True, @@ -325,4 +325,4 @@ def test_subplot_titles(self): plot = df.drop('SepalWidth', axis=1).plot(subplots=True, layout=(2, 2), title=title[:-1]) title_list = [ax.get_title() for sublist in plot for ax in sublist] - self.assertEqual(title_list, title[:3] + ['']) + assert title_list == title[:3] + [''] diff --git a/pandas/tests/plotting/test_series.py b/pandas/tests/plotting/test_series.py index 8ae301a0b7b4c..d1325c7130d04 100644 --- a/pandas/tests/plotting/test_series.py +++ b/pandas/tests/plotting/test_series.py @@ -93,36 +93,36 @@ def test_dont_modify_rcParams(self): key = 'axes.color_cycle' colors = self.plt.rcParams[key] Series([1, 2, 3]).plot() - self.assertEqual(colors, self.plt.rcParams[key]) + assert colors == self.plt.rcParams[key] def test_ts_line_lim(self): ax = self.ts.plot() xmin, xmax = ax.get_xlim() lines = ax.get_lines() - self.assertEqual(xmin, lines[0].get_data(orig=False)[0][0]) - self.assertEqual(xmax, lines[0].get_data(orig=False)[0][-1]) + assert xmin == lines[0].get_data(orig=False)[0][0] + assert xmax == lines[0].get_data(orig=False)[0][-1] tm.close() ax = self.ts.plot(secondary_y=True) xmin, xmax = ax.get_xlim() lines = ax.get_lines() - self.assertEqual(xmin, lines[0].get_data(orig=False)[0][0]) - self.assertEqual(xmax, lines[0].get_data(orig=False)[0][-1]) + assert xmin == lines[0].get_data(orig=False)[0][0] + assert xmax == lines[0].get_data(orig=False)[0][-1] def test_ts_area_lim(self): ax = self.ts.plot.area(stacked=False) xmin, xmax = ax.get_xlim() line = ax.get_lines()[0].get_data(orig=False)[0] - self.assertEqual(xmin, line[0]) - self.assertEqual(xmax, line[-1]) + assert xmin == line[0] + assert xmax == line[-1] tm.close() # GH 7471 ax = self.ts.plot.area(stacked=False, x_compat=True) xmin, xmax = ax.get_xlim() line = ax.get_lines()[0].get_data(orig=False)[0] - self.assertEqual(xmin, line[0]) - self.assertEqual(xmax, line[-1]) + assert xmin == line[0] + assert xmax == line[-1] tm.close() tz_ts = self.ts.copy() @@ -130,15 +130,15 @@ def test_ts_area_lim(self): ax = tz_ts.plot.area(stacked=False, x_compat=True) xmin, xmax = ax.get_xlim() line = ax.get_lines()[0].get_data(orig=False)[0] - self.assertEqual(xmin, line[0]) - self.assertEqual(xmax, line[-1]) + assert xmin == line[0] + assert xmax == line[-1] tm.close() ax = tz_ts.plot.area(stacked=False, secondary_y=True) xmin, xmax = ax.get_xlim() line = ax.get_lines()[0].get_data(orig=False)[0] - self.assertEqual(xmin, line[0]) - self.assertEqual(xmax, line[-1]) + assert xmin == line[0] + assert xmax == line[-1] def test_label(self): s = Series([1, 2]) @@ -159,7 +159,7 @@ def test_label(self): self.plt.close() # Add lebel info, but don't draw ax = s.plot(legend=False, label='LABEL') - self.assertEqual(ax.get_legend(), None) # Hasn't been drawn + assert ax.get_legend() is None # Hasn't been drawn ax.legend() # draw it self._check_legend_labels(ax, labels=['LABEL']) @@ -190,10 +190,10 @@ def test_line_use_index_false(self): s.index.name = 'The Index' ax = s.plot(use_index=False) label = ax.get_xlabel() - self.assertEqual(label, '') + assert label == '' ax2 = s.plot.bar(use_index=False) label2 = ax2.get_xlabel() - self.assertEqual(label2, '') + assert label2 == '' @slow def test_bar_log(self): @@ -255,7 +255,7 @@ def test_irregular_datetime(self): ax = ser.plot() xp = datetime(1999, 1, 1).toordinal() ax.set_xlim('1/1/1999', '1/1/2001') - self.assertEqual(xp, ax.get_xlim()[0]) + assert xp == ax.get_xlim()[0] @slow def test_pie_series(self): @@ -265,7 +265,7 @@ def test_pie_series(self): index=['a', 'b', 'c', 'd', 'e'], name='YLABEL') ax = _check_plot_works(series.plot.pie) self._check_text_labels(ax.texts, series.index) - self.assertEqual(ax.get_ylabel(), 'YLABEL') + assert ax.get_ylabel() == 'YLABEL' # without wedge labels ax = _check_plot_works(series.plot.pie, labels=None) @@ -295,7 +295,7 @@ def test_pie_series(self): expected_texts = list(next(it) for it in itertools.cycle(iters)) self._check_text_labels(ax.texts, expected_texts) for t in ax.texts: - self.assertEqual(t.get_fontsize(), 7) + assert t.get_fontsize() == 7 # includes negative value with pytest.raises(ValueError): @@ -313,13 +313,13 @@ def test_pie_nan(self): ax = s.plot.pie(legend=True) expected = ['0', '', '2', '3'] result = [x.get_text() for x in ax.texts] - self.assertEqual(result, expected) + assert result == expected @slow def test_hist_df_kwargs(self): df = DataFrame(np.random.randn(10, 2)) ax = df.plot.hist(bins=5) - self.assertEqual(len(ax.patches), 10) + assert len(ax.patches) == 10 @slow def test_hist_df_with_nonnumerics(self): @@ -329,10 +329,10 @@ def test_hist_df_with_nonnumerics(self): np.random.randn(10, 4), columns=['A', 'B', 'C', 'D']) df['E'] = ['x', 'y'] * 5 ax = df.plot.hist(bins=5) - self.assertEqual(len(ax.patches), 20) + assert len(ax.patches) == 20 ax = df.plot.hist() # bins=10 - self.assertEqual(len(ax.patches), 40) + assert len(ax.patches) == 40 @slow def test_hist_legacy(self): @@ -364,7 +364,7 @@ def test_hist_legacy(self): def test_hist_bins_legacy(self): df = DataFrame(np.random.randn(10, 2)) ax = df.hist(bins=2)[0][0] - self.assertEqual(len(ax.patches), 2) + assert len(ax.patches) == 2 @slow def test_hist_layout(self): @@ -430,7 +430,7 @@ def test_hist_no_overlap(self): y.hist() fig = gcf() axes = fig.axes if self.mpl_ge_1_5_0 else fig.get_axes() - self.assertEqual(len(axes), 2) + assert len(axes) == 2 @slow def test_hist_secondary_legend(self): @@ -583,7 +583,7 @@ def test_kde_missing_vals(self): @slow def test_hist_kwargs(self): ax = self.ts.plot.hist(bins=5) - self.assertEqual(len(ax.patches), 5) + assert len(ax.patches) == 5 self._check_text_labels(ax.yaxis.get_label(), 'Frequency') tm.close() @@ -599,7 +599,7 @@ def test_hist_kwargs(self): def test_hist_kde_color(self): ax = self.ts.plot.hist(logy=True, bins=10, color='b') self._check_ax_scales(ax, yaxis='log') - self.assertEqual(len(ax.patches), 10) + assert len(ax.patches) == 10 self._check_colors(ax.patches, facecolors=['b'] * 10) tm._skip_if_no_scipy() @@ -607,7 +607,7 @@ def test_hist_kde_color(self): ax = self.ts.plot.kde(logy=True, color='r') self._check_ax_scales(ax, yaxis='log') lines = ax.get_lines() - self.assertEqual(len(lines), 1) + assert len(lines) == 1 self._check_colors(lines, ['r']) @slow @@ -729,16 +729,16 @@ def test_standard_colors(self): for c in ['r', 'red', 'green', '#FF0000']: result = _get_standard_colors(1, color=c) - self.assertEqual(result, [c]) + assert result == [c] result = _get_standard_colors(1, color=[c]) - self.assertEqual(result, [c]) + assert result == [c] result = _get_standard_colors(3, color=c) - self.assertEqual(result, [c] * 3) + assert result == [c] * 3 result = _get_standard_colors(3, color=[c]) - self.assertEqual(result, [c] * 3) + assert result == [c] * 3 @slow def test_standard_colors_all(self): @@ -748,30 +748,30 @@ def test_standard_colors_all(self): # multiple colors like mediumaquamarine for c in colors.cnames: result = _get_standard_colors(num_colors=1, color=c) - self.assertEqual(result, [c]) + assert result == [c] result = _get_standard_colors(num_colors=1, color=[c]) - self.assertEqual(result, [c]) + assert result == [c] result = _get_standard_colors(num_colors=3, color=c) - self.assertEqual(result, [c] * 3) + assert result == [c] * 3 result = _get_standard_colors(num_colors=3, color=[c]) - self.assertEqual(result, [c] * 3) + assert result == [c] * 3 # single letter colors like k for c in colors.ColorConverter.colors: result = _get_standard_colors(num_colors=1, color=c) - self.assertEqual(result, [c]) + assert result == [c] result = _get_standard_colors(num_colors=1, color=[c]) - self.assertEqual(result, [c]) + assert result == [c] result = _get_standard_colors(num_colors=3, color=c) - self.assertEqual(result, [c] * 3) + assert result == [c] * 3 result = _get_standard_colors(num_colors=3, color=[c]) - self.assertEqual(result, [c] * 3) + assert result == [c] * 3 def test_series_plot_color_kwargs(self): # GH1890 diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py index 9854245cf1abd..2d4d0a09060de 100644 --- a/pandas/tests/reshape/test_concat.py +++ b/pandas/tests/reshape/test_concat.py @@ -65,14 +65,14 @@ def _check_expected_dtype(self, obj, label): """ if isinstance(obj, pd.Index): if label == 'bool': - self.assertEqual(obj.dtype, 'object') + assert obj.dtype == 'object' else: - self.assertEqual(obj.dtype, label) + assert obj.dtype == label elif isinstance(obj, pd.Series): if label.startswith('period'): - self.assertEqual(obj.dtype, 'object') + assert obj.dtype == 'object' else: - self.assertEqual(obj.dtype, label) + assert obj.dtype == label else: raise ValueError @@ -814,7 +814,7 @@ def test_append_preserve_index_name(self): df2 = df2.set_index(['A']) result = df1.append(df2) - self.assertEqual(result.index.name, 'A') + assert result.index.name == 'A' def test_append_dtype_coerce(self): @@ -849,8 +849,8 @@ def test_append_missing_column_proper_upcast(self): dtype=bool)}) appended = df1.append(df2, ignore_index=True) - self.assertEqual(appended['A'].dtype, 'f8') - self.assertEqual(appended['B'].dtype, 'O') + assert appended['A'].dtype == 'f8' + assert appended['B'].dtype == 'O' class TestConcatenate(ConcatenateBase): @@ -934,7 +934,7 @@ def test_concat_keys_specific_levels(self): tm.assert_index_equal(result.columns.levels[0], Index(level, name='group_key')) - self.assertEqual(result.columns.names[0], 'group_key') + assert result.columns.names[0] == 'group_key' def test_concat_dataframe_keys_bug(self): t1 = DataFrame({ @@ -945,8 +945,7 @@ def test_concat_dataframe_keys_bug(self): # it works result = concat([t1, t2], axis=1, keys=['t1', 't2']) - self.assertEqual(list(result.columns), [('t1', 'value'), - ('t2', 'value')]) + assert list(result.columns) == [('t1', 'value'), ('t2', 'value')] def test_concat_series_partial_columns_names(self): # GH10698 @@ -1020,10 +1019,10 @@ def test_concat_multiindex_with_keys(self): columns=Index(['A', 'B', 'C'], name='exp')) result = concat([frame, frame], keys=[0, 1], names=['iteration']) - self.assertEqual(result.index.names, ('iteration',) + index.names) + assert result.index.names == ('iteration',) + index.names tm.assert_frame_equal(result.loc[0], frame) tm.assert_frame_equal(result.loc[1], frame) - self.assertEqual(result.index.nlevels, 3) + assert result.index.nlevels == 3 def test_concat_multiindex_with_tz(self): # GH 6606 @@ -1088,22 +1087,21 @@ def test_concat_keys_and_levels(self): names=names + [None]) expected.index = exp_index - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) # no names - result = concat([df, df2, df, df2], keys=[('foo', 'one'), ('foo', 'two'), ('baz', 'one'), ('baz', 'two')], levels=levels) - self.assertEqual(result.index.names, (None,) * 3) + assert result.index.names == (None,) * 3 # no levels result = concat([df, df2, df, df2], keys=[('foo', 'one'), ('foo', 'two'), ('baz', 'one'), ('baz', 'two')], names=['first', 'second']) - self.assertEqual(result.index.names, ('first', 'second') + (None,)) + assert result.index.names == ('first', 'second') + (None,) tm.assert_index_equal(result.index.levels[0], Index(['baz', 'foo'], name='first')) @@ -1135,7 +1133,7 @@ def test_concat_rename_index(self): exp.index.set_names(names, inplace=True) tm.assert_frame_equal(result, exp) - self.assertEqual(result.index.names, exp.index.names) + assert result.index.names == exp.index.names def test_crossed_dtypes_weird_corner(self): columns = ['A', 'B', 'C', 'D'] @@ -1160,7 +1158,7 @@ def test_crossed_dtypes_weird_corner(self): df2 = DataFrame(np.random.randn(1, 4), index=['b']) result = concat( [df, df2], keys=['one', 'two'], names=['first', 'second']) - self.assertEqual(result.index.names, ('first', 'second')) + assert result.index.names == ('first', 'second') def test_dups_index(self): # GH 4771 @@ -1442,7 +1440,7 @@ def test_concat_series(self): result = concat(pieces) tm.assert_series_equal(result, ts) - self.assertEqual(result.name, ts.name) + assert result.name == ts.name result = concat(pieces, keys=[0, 1, 2]) expected = ts.copy() @@ -1549,7 +1547,7 @@ def test_concat_bug_1719(self): left = concat([ts1, ts2], join='outer', axis=1) right = concat([ts2, ts1], join='outer', axis=1) - self.assertEqual(len(left), len(right)) + assert len(left) == len(right) def test_concat_bug_2972(self): ts0 = Series(np.zeros(5)) @@ -1706,8 +1704,7 @@ def test_concat_tz_frame(self): assert_frame_equal(df2, df3) def test_concat_tz_series(self): - # GH 11755 - # tz and no tz + # gh-11755: tz and no tz x = Series(date_range('20151124 08:00', '20151124 09:00', freq='1h', tz='UTC')) @@ -1717,8 +1714,7 @@ def test_concat_tz_series(self): result = concat([x, y], ignore_index=True) tm.assert_series_equal(result, expected) - # GH 11887 - # concat tz and object + # gh-11887: concat tz and object x = Series(date_range('20151124 08:00', '20151124 09:00', freq='1h', tz='UTC')) @@ -1728,10 +1724,8 @@ def test_concat_tz_series(self): result = concat([x, y], ignore_index=True) tm.assert_series_equal(result, expected) - # 12217 - # 12306 fixed I think - - # Concat'ing two UTC times + # see gh-12217 and gh-12306 + # Concatenating two UTC times first = pd.DataFrame([[datetime(2016, 1, 1)]]) first[0] = first[0].dt.tz_localize('UTC') @@ -1739,9 +1733,9 @@ def test_concat_tz_series(self): second[0] = second[0].dt.tz_localize('UTC') result = pd.concat([first, second]) - self.assertEqual(result[0].dtype, 'datetime64[ns, UTC]') + assert result[0].dtype == 'datetime64[ns, UTC]' - # Concat'ing two London times + # Concatenating two London times first = pd.DataFrame([[datetime(2016, 1, 1)]]) first[0] = first[0].dt.tz_localize('Europe/London') @@ -1749,9 +1743,9 @@ def test_concat_tz_series(self): second[0] = second[0].dt.tz_localize('Europe/London') result = pd.concat([first, second]) - self.assertEqual(result[0].dtype, 'datetime64[ns, Europe/London]') + assert result[0].dtype == 'datetime64[ns, Europe/London]' - # Concat'ing 2+1 London times + # Concatenating 2+1 London times first = pd.DataFrame([[datetime(2016, 1, 1)], [datetime(2016, 1, 2)]]) first[0] = first[0].dt.tz_localize('Europe/London') @@ -1759,7 +1753,7 @@ def test_concat_tz_series(self): second[0] = second[0].dt.tz_localize('Europe/London') result = pd.concat([first, second]) - self.assertEqual(result[0].dtype, 'datetime64[ns, Europe/London]') + assert result[0].dtype == 'datetime64[ns, Europe/London]' # Concat'ing 1+2 London times first = pd.DataFrame([[datetime(2016, 1, 1)]]) @@ -1769,11 +1763,10 @@ def test_concat_tz_series(self): second[0] = second[0].dt.tz_localize('Europe/London') result = pd.concat([first, second]) - self.assertEqual(result[0].dtype, 'datetime64[ns, Europe/London]') + assert result[0].dtype == 'datetime64[ns, Europe/London]' def test_concat_tz_series_with_datetimelike(self): - # GH 12620 - # tz and timedelta + # see gh-12620: tz and timedelta x = [pd.Timestamp('2011-01-01', tz='US/Eastern'), pd.Timestamp('2011-02-01', tz='US/Eastern')] y = [pd.Timedelta('1 day'), pd.Timedelta('2 day')] @@ -1786,16 +1779,18 @@ def test_concat_tz_series_with_datetimelike(self): tm.assert_series_equal(result, pd.Series(x + y, dtype='object')) def test_concat_tz_series_tzlocal(self): - # GH 13583 + # see gh-13583 tm._skip_if_no_dateutil() import dateutil + x = [pd.Timestamp('2011-01-01', tz=dateutil.tz.tzlocal()), pd.Timestamp('2011-02-01', tz=dateutil.tz.tzlocal())] y = [pd.Timestamp('2012-01-01', tz=dateutil.tz.tzlocal()), pd.Timestamp('2012-02-01', tz=dateutil.tz.tzlocal())] + result = concat([pd.Series(x), pd.Series(y)], ignore_index=True) tm.assert_series_equal(result, pd.Series(x + y)) - self.assertEqual(result.dtype, 'datetime64[ns, tzlocal()]') + assert result.dtype == 'datetime64[ns, tzlocal()]' def test_concat_period_series(self): x = Series(pd.PeriodIndex(['2015-11-01', '2015-12-01'], freq='D')) @@ -1803,7 +1798,7 @@ def test_concat_period_series(self): expected = Series([x[0], x[1], y[0], y[1]], dtype='object') result = concat([x, y], ignore_index=True) tm.assert_series_equal(result, expected) - self.assertEqual(result.dtype, 'object') + assert result.dtype == 'object' # different freq x = Series(pd.PeriodIndex(['2015-11-01', '2015-12-01'], freq='D')) @@ -1811,14 +1806,14 @@ def test_concat_period_series(self): expected = Series([x[0], x[1], y[0], y[1]], dtype='object') result = concat([x, y], ignore_index=True) tm.assert_series_equal(result, expected) - self.assertEqual(result.dtype, 'object') + assert result.dtype == 'object' x = Series(pd.PeriodIndex(['2015-11-01', '2015-12-01'], freq='D')) y = Series(pd.PeriodIndex(['2015-11-01', '2015-12-01'], freq='M')) expected = Series([x[0], x[1], y[0], y[1]], dtype='object') result = concat([x, y], ignore_index=True) tm.assert_series_equal(result, expected) - self.assertEqual(result.dtype, 'object') + assert result.dtype == 'object' # non-period x = Series(pd.PeriodIndex(['2015-11-01', '2015-12-01'], freq='D')) @@ -1826,14 +1821,14 @@ def test_concat_period_series(self): expected = Series([x[0], x[1], y[0], y[1]], dtype='object') result = concat([x, y], ignore_index=True) tm.assert_series_equal(result, expected) - self.assertEqual(result.dtype, 'object') + assert result.dtype == 'object' x = Series(pd.PeriodIndex(['2015-11-01', '2015-12-01'], freq='D')) y = Series(['A', 'B']) expected = Series([x[0], x[1], y[0], y[1]], dtype='object') result = concat([x, y], ignore_index=True) tm.assert_series_equal(result, expected) - self.assertEqual(result.dtype, 'object') + assert result.dtype == 'object' def test_concat_empty_series(self): # GH 11082 diff --git a/pandas/tests/reshape/test_hashing.py b/pandas/tests/reshape/test_hashing.py index f19f6b1374978..85807da33e38d 100644 --- a/pandas/tests/reshape/test_hashing.py +++ b/pandas/tests/reshape/test_hashing.py @@ -76,7 +76,7 @@ def test_hash_tuples(self): tm.assert_numpy_array_equal(result, expected) result = hash_tuples(tups[0]) - self.assertEqual(result, expected[0]) + assert result == expected[0] def test_hash_tuples_err(self): diff --git a/pandas/tests/reshape/test_join.py b/pandas/tests/reshape/test_join.py index 1da187788e99d..cda343175fd0a 100644 --- a/pandas/tests/reshape/test_join.py +++ b/pandas/tests/reshape/test_join.py @@ -257,7 +257,7 @@ def test_join_with_len0(self): merged2 = self.target.join(self.source.reindex([]), on='C', how='inner') tm.assert_index_equal(merged2.columns, merged.columns) - self.assertEqual(len(merged2), 0) + assert len(merged2) == 0 def test_join_on_inner(self): df = DataFrame({'key': ['a', 'a', 'd', 'b', 'b', 'c']}) @@ -301,8 +301,8 @@ def test_join_index_mixed(self): df1 = DataFrame({'A': 1., 'B': 2, 'C': 'foo', 'D': True}, index=np.arange(10), columns=['A', 'B', 'C', 'D']) - self.assertEqual(df1['B'].dtype, np.int64) - self.assertEqual(df1['D'].dtype, np.bool_) + assert df1['B'].dtype == np.int64 + assert df1['D'].dtype == np.bool_ df2 = DataFrame({'A': 1., 'B': 2, 'C': 'foo', 'D': True}, index=np.arange(0, 10, 2), @@ -374,7 +374,7 @@ def test_join_multiindex(self): expected = df1.reindex(ex_index).join(df2.reindex(ex_index)) expected.index.names = index1.names assert_frame_equal(joined, expected) - self.assertEqual(joined.index.names, index1.names) + assert joined.index.names == index1.names df1 = df1.sort_index(level=1) df2 = df2.sort_index(level=1) @@ -385,7 +385,7 @@ def test_join_multiindex(self): expected.index.names = index1.names assert_frame_equal(joined, expected) - self.assertEqual(joined.index.names, index1.names) + assert joined.index.names == index1.names def test_join_inner_multiindex(self): key1 = ['bar', 'bar', 'bar', 'foo', 'foo', 'baz', 'baz', 'qux', @@ -445,9 +445,9 @@ def test_join_float64_float32(self): a = DataFrame(randn(10, 2), columns=['a', 'b'], dtype=np.float64) b = DataFrame(randn(10, 1), columns=['c'], dtype=np.float32) joined = a.join(b) - self.assertEqual(joined.dtypes['a'], 'float64') - self.assertEqual(joined.dtypes['b'], 'float64') - self.assertEqual(joined.dtypes['c'], 'float32') + assert joined.dtypes['a'] == 'float64' + assert joined.dtypes['b'] == 'float64' + assert joined.dtypes['c'] == 'float32' a = np.random.randint(0, 5, 100).astype('int64') b = np.random.random(100).astype('float64') @@ -456,10 +456,10 @@ def test_join_float64_float32(self): xpdf = DataFrame({'a': a, 'b': b, 'c': c}) s = DataFrame(np.random.random(5).astype('float32'), columns=['md']) rs = df.merge(s, left_on='a', right_index=True) - self.assertEqual(rs.dtypes['a'], 'int64') - self.assertEqual(rs.dtypes['b'], 'float64') - self.assertEqual(rs.dtypes['c'], 'float32') - self.assertEqual(rs.dtypes['md'], 'float32') + assert rs.dtypes['a'] == 'int64' + assert rs.dtypes['b'] == 'float64' + assert rs.dtypes['c'] == 'float32' + assert rs.dtypes['md'] == 'float32' xp = xpdf.merge(s, left_on='a', right_index=True) assert_frame_equal(rs, xp) diff --git a/pandas/tests/reshape/test_merge.py b/pandas/tests/reshape/test_merge.py index 86580e5a84d92..db0e4631381f1 100644 --- a/pandas/tests/reshape/test_merge.py +++ b/pandas/tests/reshape/test_merge.py @@ -127,7 +127,7 @@ def test_index_and_on_parameters_confusion(self): def test_merge_overlap(self): merged = merge(self.left, self.left, on='key') exp_len = (self.left['key'].value_counts() ** 2).sum() - self.assertEqual(len(merged), exp_len) + assert len(merged) == exp_len assert 'v1_x' in merged assert 'v1_y' in merged @@ -202,7 +202,7 @@ def test_merge_join_key_dtype_cast(self): df1 = DataFrame({'key': [1], 'v1': [10]}) df2 = DataFrame({'key': [2], 'v1': [20]}) df = merge(df1, df2, how='outer') - self.assertEqual(df['key'].dtype, 'int64') + assert df['key'].dtype == 'int64' df1 = DataFrame({'key': [True], 'v1': [1]}) df2 = DataFrame({'key': [False], 'v1': [0]}) @@ -210,14 +210,14 @@ def test_merge_join_key_dtype_cast(self): # GH13169 # this really should be bool - self.assertEqual(df['key'].dtype, 'object') + assert df['key'].dtype == 'object' df1 = DataFrame({'val': [1]}) df2 = DataFrame({'val': [2]}) lkey = np.array([1]) rkey = np.array([2]) df = merge(df1, df2, left_on=lkey, right_on=rkey, how='outer') - self.assertEqual(df['key_0'].dtype, 'int64') + assert df['key_0'].dtype == 'int64' def test_handle_join_key_pass_array(self): left = DataFrame({'key': [1, 1, 2, 2, 3], @@ -499,7 +499,7 @@ def test_other_datetime_unit(self): df2 = s.astype(dtype).to_frame('days') # coerces to datetime64[ns], thus sholuld not be affected - self.assertEqual(df2['days'].dtype, 'datetime64[ns]') + assert df2['days'].dtype == 'datetime64[ns]' result = df1.merge(df2, left_on='entity_id', right_index=True) @@ -519,7 +519,7 @@ def test_other_timedelta_unit(self): 'timedelta64[ns]']: df2 = s.astype(dtype).to_frame('days') - self.assertEqual(df2['days'].dtype, dtype) + assert df2['days'].dtype == dtype result = df1.merge(df2, left_on='entity_id', right_index=True) @@ -582,8 +582,8 @@ def test_merge_on_datetime64tz(self): 'key': [1, 2, 3]}) result = pd.merge(left, right, on='key', how='outer') assert_frame_equal(result, expected) - self.assertEqual(result['value_x'].dtype, 'datetime64[ns, US/Eastern]') - self.assertEqual(result['value_y'].dtype, 'datetime64[ns, US/Eastern]') + assert result['value_x'].dtype == 'datetime64[ns, US/Eastern]' + assert result['value_y'].dtype == 'datetime64[ns, US/Eastern]' def test_merge_on_periods(self): left = pd.DataFrame({'key': pd.period_range('20151010', periods=2, @@ -614,8 +614,8 @@ def test_merge_on_periods(self): 'key': [1, 2, 3]}) result = pd.merge(left, right, on='key', how='outer') assert_frame_equal(result, expected) - self.assertEqual(result['value_x'].dtype, 'object') - self.assertEqual(result['value_y'].dtype, 'object') + assert result['value_x'].dtype == 'object' + assert result['value_y'].dtype == 'object' def test_indicator(self): # PR #10054. xref #7412 and closes #8790. diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 3b3b4fe247b72..df679966e0002 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -45,14 +45,14 @@ def test_pivot_table(self): pivot_table(self.data, values='D', index=index) if len(index) > 1: - self.assertEqual(table.index.names, tuple(index)) + assert table.index.names == tuple(index) else: - self.assertEqual(table.index.name, index[0]) + assert table.index.name == index[0] if len(columns) > 1: - self.assertEqual(table.columns.names, columns) + assert table.columns.names == columns else: - self.assertEqual(table.columns.name, columns[0]) + assert table.columns.name == columns[0] expected = self.data.groupby( index + [columns])['D'].agg(np.mean).unstack() @@ -148,7 +148,7 @@ def test_pivot_dtypes(self): # can convert dtypes f = DataFrame({'a': ['cat', 'bat', 'cat', 'bat'], 'v': [ 1, 2, 3, 4], 'i': ['a', 'b', 'a', 'b']}) - self.assertEqual(f.dtypes['v'], 'int64') + assert f.dtypes['v'] == 'int64' z = pivot_table(f, values='v', index=['a'], columns=[ 'i'], fill_value=0, aggfunc=np.sum) @@ -159,7 +159,7 @@ def test_pivot_dtypes(self): # cannot convert dtypes f = DataFrame({'a': ['cat', 'bat', 'cat', 'bat'], 'v': [ 1.5, 2.5, 3.5, 4.5], 'i': ['a', 'b', 'a', 'b']}) - self.assertEqual(f.dtypes['v'], 'float64') + assert f.dtypes['v'] == 'float64' z = pivot_table(f, values='v', index=['a'], columns=[ 'i'], fill_value=0, aggfunc=np.mean) @@ -249,10 +249,10 @@ def test_pivot_index_with_nan(self): df.loc[1, 'b'] = df.loc[4, 'b'] = nan pv = df.pivot('a', 'b', 'c') - self.assertEqual(pv.notnull().values.sum(), len(df)) + assert pv.notnull().values.sum() == len(df) for _, row in df.iterrows(): - self.assertEqual(pv.loc[row['a'], row['b']], row['c']) + assert pv.loc[row['a'], row['b']] == row['c'] tm.assert_frame_equal(df.pivot('b', 'a', 'c'), pv.T) @@ -341,7 +341,7 @@ def _check_output(result, values_col, index=['A', 'B'], expected_col_margins = self.data.groupby(index)[values_col].mean() tm.assert_series_equal(col_margins, expected_col_margins, check_names=False) - self.assertEqual(col_margins.name, margins_col) + assert col_margins.name == margins_col result = result.sort_index() index_margins = result.loc[(margins_col, '')].iloc[:-1] @@ -349,11 +349,11 @@ def _check_output(result, values_col, index=['A', 'B'], expected_ix_margins = self.data.groupby(columns)[values_col].mean() tm.assert_series_equal(index_margins, expected_ix_margins, check_names=False) - self.assertEqual(index_margins.name, (margins_col, '')) + assert index_margins.name == (margins_col, '') grand_total_margins = result.loc[(margins_col, ''), margins_col] expected_total_margins = self.data[values_col].mean() - self.assertEqual(grand_total_margins, expected_total_margins) + assert grand_total_margins == expected_total_margins # column specified result = self.data.pivot_table(values='D', index=['A', 'B'], @@ -382,7 +382,7 @@ def _check_output(result, values_col, index=['A', 'B'], aggfunc=np.mean) for value_col in table.columns: totals = table.loc[('All', ''), value_col] - self.assertEqual(totals, self.data[value_col].mean()) + assert totals == self.data[value_col].mean() # no rows rtable = self.data.pivot_table(columns=['AA', 'BB'], margins=True, @@ -393,7 +393,7 @@ def _check_output(result, values_col, index=['A', 'B'], aggfunc='mean') for item in ['DD', 'EE', 'FF']: totals = table.loc[('All', ''), item] - self.assertEqual(totals, self.data[item].mean()) + assert totals == self.data[item].mean() # issue number #8349: pivot_table with margins and dictionary aggfunc data = [ @@ -528,21 +528,21 @@ def test_margins_no_values_no_cols(self): result = self.data[['A', 'B']].pivot_table( index=['A', 'B'], aggfunc=len, margins=True) result_list = result.tolist() - self.assertEqual(sum(result_list[:-1]), result_list[-1]) + assert sum(result_list[:-1]) == result_list[-1] def test_margins_no_values_two_rows(self): # Regression test on pivot table: no values passed but rows are a # multi-index result = self.data[['A', 'B', 'C']].pivot_table( index=['A', 'B'], columns='C', aggfunc=len, margins=True) - self.assertEqual(result.All.tolist(), [3.0, 1.0, 4.0, 3.0, 11.0]) + assert result.All.tolist() == [3.0, 1.0, 4.0, 3.0, 11.0] def test_margins_no_values_one_row_one_col(self): # Regression test on pivot table: no values passed but row and col # defined result = self.data[['A', 'B']].pivot_table( index='A', columns='B', aggfunc=len, margins=True) - self.assertEqual(result.All.tolist(), [4.0, 7.0, 11.0]) + assert result.All.tolist() == [4.0, 7.0, 11.0] def test_margins_no_values_two_row_two_cols(self): # Regression test on pivot table: no values passed but rows and cols @@ -551,10 +551,10 @@ def test_margins_no_values_two_row_two_cols(self): 'e', 'f', 'g', 'h', 'i', 'j', 'k'] result = self.data[['A', 'B', 'C', 'D']].pivot_table( index=['A', 'B'], columns=['C', 'D'], aggfunc=len, margins=True) - self.assertEqual(result.All.tolist(), [3.0, 1.0, 4.0, 3.0, 11.0]) + assert result.All.tolist() == [3.0, 1.0, 4.0, 3.0, 11.0] def test_pivot_table_with_margins_set_margin_name(self): - # GH 3335 + # see gh-3335 for margin_name in ['foo', 'one', 666, None, ['a', 'b']]: with pytest.raises(ValueError): # multi-index index @@ -1037,8 +1037,8 @@ def test_crosstab_ndarray(self): # assign arbitrary names result = crosstab(self.df['A'].values, self.df['C'].values) - self.assertEqual(result.index.name, 'row_0') - self.assertEqual(result.columns.name, 'col_0') + assert result.index.name == 'row_0' + assert result.columns.name == 'col_0' def test_crosstab_margins(self): a = np.random.randint(0, 7, size=100) @@ -1050,8 +1050,8 @@ def test_crosstab_margins(self): result = crosstab(a, [b, c], rownames=['a'], colnames=('b', 'c'), margins=True) - self.assertEqual(result.index.names, ('a',)) - self.assertEqual(result.columns.names, ['b', 'c']) + assert result.index.names == ('a',) + assert result.columns.names == ['b', 'c'] all_cols = result['All', ''] exp_cols = df.groupby(['a']).size().astype('i8') @@ -1420,7 +1420,7 @@ def test_daily(self): result = annual[i].dropna() tm.assert_series_equal(result, subset, check_names=False) - self.assertEqual(result.name, i) + assert result.name == i # check leap days leaps = ts[(ts.index.month == 2) & (ts.index.day == 29)] @@ -1453,7 +1453,7 @@ def test_hourly(self): result = annual[i].dropna() tm.assert_series_equal(result, subset, check_names=False) - self.assertEqual(result.name, i) + assert result.name == i leaps = ts_hourly[(ts_hourly.index.month == 2) & ( ts_hourly.index.day == 29) & (ts_hourly.index.hour == 0)] @@ -1478,7 +1478,7 @@ def test_monthly(self): subset.index = [x.year for x in subset.index] result = annual[i].dropna() tm.assert_series_equal(result, subset, check_names=False) - self.assertEqual(result.name, i) + assert result.name == i def test_period_monthly(self): pass diff --git a/pandas/tests/reshape/test_reshape.py b/pandas/tests/reshape/test_reshape.py index 87f16cfaf31ec..87cd0637f1125 100644 --- a/pandas/tests/reshape/test_reshape.py +++ b/pandas/tests/reshape/test_reshape.py @@ -35,7 +35,7 @@ def setUp(self): def test_top_level_method(self): result = melt(self.df) - self.assertEqual(result.columns.tolist(), ['variable', 'value']) + assert result.columns.tolist() == ['variable', 'value'] def test_method_signatures(self): tm.assert_frame_equal(self.df.melt(), @@ -58,19 +58,17 @@ def test_method_signatures(self): def test_default_col_names(self): result = self.df.melt() - self.assertEqual(result.columns.tolist(), ['variable', 'value']) + assert result.columns.tolist() == ['variable', 'value'] result1 = self.df.melt(id_vars=['id1']) - self.assertEqual(result1.columns.tolist(), ['id1', 'variable', 'value' - ]) + assert result1.columns.tolist() == ['id1', 'variable', 'value'] result2 = self.df.melt(id_vars=['id1', 'id2']) - self.assertEqual(result2.columns.tolist(), ['id1', 'id2', 'variable', - 'value']) + assert result2.columns.tolist() == ['id1', 'id2', 'variable', 'value'] def test_value_vars(self): result3 = self.df.melt(id_vars=['id1', 'id2'], value_vars='A') - self.assertEqual(len(result3), 10) + assert len(result3) == 10 result4 = self.df.melt(id_vars=['id1', 'id2'], value_vars=['A', 'B']) expected4 = DataFrame({'id1': self.df['id1'].tolist() * 2, @@ -122,19 +120,17 @@ def test_tuple_vars_fail_with_multiindex(self): def test_custom_var_name(self): result5 = self.df.melt(var_name=self.var_name) - self.assertEqual(result5.columns.tolist(), ['var', 'value']) + assert result5.columns.tolist() == ['var', 'value'] result6 = self.df.melt(id_vars=['id1'], var_name=self.var_name) - self.assertEqual(result6.columns.tolist(), ['id1', 'var', 'value']) + assert result6.columns.tolist() == ['id1', 'var', 'value'] result7 = self.df.melt(id_vars=['id1', 'id2'], var_name=self.var_name) - self.assertEqual(result7.columns.tolist(), ['id1', 'id2', 'var', - 'value']) + assert result7.columns.tolist() == ['id1', 'id2', 'var', 'value'] result8 = self.df.melt(id_vars=['id1', 'id2'], value_vars='A', var_name=self.var_name) - self.assertEqual(result8.columns.tolist(), ['id1', 'id2', 'var', - 'value']) + assert result8.columns.tolist() == ['id1', 'id2', 'var', 'value'] result9 = self.df.melt(id_vars=['id1', 'id2'], value_vars=['A', 'B'], var_name=self.var_name) @@ -148,20 +144,18 @@ def test_custom_var_name(self): def test_custom_value_name(self): result10 = self.df.melt(value_name=self.value_name) - self.assertEqual(result10.columns.tolist(), ['variable', 'val']) + assert result10.columns.tolist() == ['variable', 'val'] result11 = self.df.melt(id_vars=['id1'], value_name=self.value_name) - self.assertEqual(result11.columns.tolist(), ['id1', 'variable', 'val']) + assert result11.columns.tolist() == ['id1', 'variable', 'val'] result12 = self.df.melt(id_vars=['id1', 'id2'], value_name=self.value_name) - self.assertEqual(result12.columns.tolist(), ['id1', 'id2', 'variable', - 'val']) + assert result12.columns.tolist() == ['id1', 'id2', 'variable', 'val'] result13 = self.df.melt(id_vars=['id1', 'id2'], value_vars='A', value_name=self.value_name) - self.assertEqual(result13.columns.tolist(), ['id1', 'id2', 'variable', - 'val']) + assert result13.columns.tolist() == ['id1', 'id2', 'variable', 'val'] result14 = self.df.melt(id_vars=['id1', 'id2'], value_vars=['A', 'B'], value_name=self.value_name) @@ -178,23 +172,21 @@ def test_custom_var_and_value_name(self): result15 = self.df.melt(var_name=self.var_name, value_name=self.value_name) - self.assertEqual(result15.columns.tolist(), ['var', 'val']) + assert result15.columns.tolist() == ['var', 'val'] result16 = self.df.melt(id_vars=['id1'], var_name=self.var_name, value_name=self.value_name) - self.assertEqual(result16.columns.tolist(), ['id1', 'var', 'val']) + assert result16.columns.tolist() == ['id1', 'var', 'val'] result17 = self.df.melt(id_vars=['id1', 'id2'], var_name=self.var_name, value_name=self.value_name) - self.assertEqual(result17.columns.tolist(), ['id1', 'id2', 'var', 'val' - ]) + assert result17.columns.tolist() == ['id1', 'id2', 'var', 'val'] result18 = self.df.melt(id_vars=['id1', 'id2'], value_vars='A', var_name=self.var_name, value_name=self.value_name) - self.assertEqual(result18.columns.tolist(), ['id1', 'id2', 'var', 'val' - ]) + assert result18.columns.tolist() == ['id1', 'id2', 'var', 'val'] result19 = self.df.melt(id_vars=['id1', 'id2'], value_vars=['A', 'B'], var_name=self.var_name, @@ -211,17 +203,17 @@ def test_custom_var_and_value_name(self): df20 = self.df.copy() df20.columns.name = 'foo' result20 = df20.melt() - self.assertEqual(result20.columns.tolist(), ['foo', 'value']) + assert result20.columns.tolist() == ['foo', 'value'] def test_col_level(self): res1 = self.df1.melt(col_level=0) res2 = self.df1.melt(col_level='CAP') - self.assertEqual(res1.columns.tolist(), ['CAP', 'value']) - self.assertEqual(res2.columns.tolist(), ['CAP', 'value']) + assert res1.columns.tolist() == ['CAP', 'value'] + assert res2.columns.tolist() == ['CAP', 'value'] def test_multiindex(self): res = self.df1.melt() - self.assertEqual(res.columns.tolist(), ['CAP', 'low', 'value']) + assert res.columns.tolist() == ['CAP', 'low', 'value'] class TestGetDummies(tm.TestCase): @@ -298,13 +290,13 @@ def test_just_na(self): res_series_index = get_dummies(just_na_series_index, sparse=self.sparse) - self.assertEqual(res_list.empty, True) - self.assertEqual(res_series.empty, True) - self.assertEqual(res_series_index.empty, True) + assert res_list.empty + assert res_series.empty + assert res_series_index.empty - self.assertEqual(res_list.index.tolist(), [0]) - self.assertEqual(res_series.index.tolist(), [0]) - self.assertEqual(res_series_index.index.tolist(), ['A']) + assert res_list.index.tolist() == [0] + assert res_series.index.tolist() == [0] + assert res_series_index.index.tolist() == ['A'] def test_include_na(self): s = ['a', 'b', np.nan] @@ -784,7 +776,7 @@ def test_stubs(self): # TODO: unused? df_long = pd.wide_to_long(df, stubs, i='id', j='age') # noqa - self.assertEqual(stubs, ['inc', 'edu']) + assert stubs == ['inc', 'edu'] def test_separating_character(self): # GH14779 diff --git a/pandas/tests/reshape/test_tile.py b/pandas/tests/reshape/test_tile.py index 923615c93d98b..2291030a2735c 100644 --- a/pandas/tests/reshape/test_tile.py +++ b/pandas/tests/reshape/test_tile.py @@ -122,7 +122,7 @@ def test_cut_pass_series_name_to_factor(self): s = Series(np.random.randn(100), name='foo') factor = cut(s, 4) - self.assertEqual(factor.name, 'foo') + assert factor.name == 'foo' def test_label_precision(self): arr = np.arange(0, 0.73, 0.01) @@ -158,16 +158,16 @@ def test_inf_handling(self): ex_uniques = IntervalIndex.from_breaks(bins) tm.assert_index_equal(result.categories, ex_uniques) - self.assertEqual(result[5], Interval(4, np.inf)) - self.assertEqual(result[0], Interval(-np.inf, 2)) - self.assertEqual(result_ser[5], Interval(4, np.inf)) - self.assertEqual(result_ser[0], Interval(-np.inf, 2)) + assert result[5] == Interval(4, np.inf) + assert result[0] == Interval(-np.inf, 2) + assert result_ser[5] == Interval(4, np.inf) + assert result_ser[0] == Interval(-np.inf, 2) def test_qcut(self): arr = np.random.randn(1000) - # we store the bins as Index that have been rounded - # to comparisions are a bit tricky + # We store the bins as Index that have been rounded + # to comparisons are a bit tricky. labels, bins = qcut(arr, 4, retbins=True) ex_bins = quantile(arr, [0, .25, .5, .75, 1.]) result = labels.categories.left.values @@ -182,7 +182,7 @@ def test_qcut_bounds(self): arr = np.random.randn(1000) factor = qcut(arr, 10, labels=False) - self.assertEqual(len(np.unique(factor)), 10) + assert len(np.unique(factor)) == 10 def test_qcut_specify_quantiles(self): arr = np.random.randn(100) @@ -253,14 +253,14 @@ def test_round_frac(self): # #1979, negative numbers result = tmod._round_frac(-117.9998, precision=3) - self.assertEqual(result, -118) + assert result == -118 result = tmod._round_frac(117.9998, precision=3) - self.assertEqual(result, 118) + assert result == 118 result = tmod._round_frac(117.9998, precision=2) - self.assertEqual(result, 118) + assert result == 118 result = tmod._round_frac(0.000123456, precision=2) - self.assertEqual(result, 0.00012) + assert result == 0.00012 def test_qcut_binning_issues(self): # #1978, 1979 diff --git a/pandas/tests/scalar/test_interval.py b/pandas/tests/scalar/test_interval.py index d77deabee58d4..079c41657bec6 100644 --- a/pandas/tests/scalar/test_interval.py +++ b/pandas/tests/scalar/test_interval.py @@ -10,20 +10,18 @@ def setUp(self): self.interval = Interval(0, 1) def test_properties(self): - self.assertEqual(self.interval.closed, 'right') - self.assertEqual(self.interval.left, 0) - self.assertEqual(self.interval.right, 1) - self.assertEqual(self.interval.mid, 0.5) + assert self.interval.closed == 'right' + assert self.interval.left == 0 + assert self.interval.right == 1 + assert self.interval.mid == 0.5 def test_repr(self): - self.assertEqual(repr(self.interval), - "Interval(0, 1, closed='right')") - self.assertEqual(str(self.interval), "(0, 1]") + assert repr(self.interval) == "Interval(0, 1, closed='right')" + assert str(self.interval) == "(0, 1]" interval_left = Interval(0, 1, closed='left') - self.assertEqual(repr(interval_left), - "Interval(0, 1, closed='left')") - self.assertEqual(str(interval_left), "[0, 1)") + assert repr(interval_left) == "Interval(0, 1, closed='left')" + assert str(interval_left) == "[0, 1)" def test_contains(self): assert 0.5 in self.interval @@ -41,9 +39,9 @@ def test_contains(self): assert 1 not in interval def test_equal(self): - self.assertEqual(Interval(0, 1), Interval(0, 1, closed='right')) - self.assertNotEqual(Interval(0, 1), Interval(0, 1, closed='left')) - self.assertNotEqual(Interval(0, 1), 0) + assert Interval(0, 1) == Interval(0, 1, closed='right') + assert Interval(0, 1) != Interval(0, 1, closed='left') + assert Interval(0, 1) != 0 def test_comparison(self): with tm.assert_raises_regex(TypeError, 'unorderable types'): @@ -63,15 +61,15 @@ def test_hash(self): def test_math_add(self): expected = Interval(1, 2) actual = self.interval + 1 - self.assertEqual(expected, actual) + assert expected == actual expected = Interval(1, 2) actual = 1 + self.interval - self.assertEqual(expected, actual) + assert expected == actual actual = self.interval actual += 1 - self.assertEqual(expected, actual) + assert expected == actual with pytest.raises(TypeError): self.interval + Interval(1, 2) @@ -82,11 +80,11 @@ def test_math_add(self): def test_math_sub(self): expected = Interval(-1, 0) actual = self.interval - 1 - self.assertEqual(expected, actual) + assert expected == actual actual = self.interval actual -= 1 - self.assertEqual(expected, actual) + assert expected == actual with pytest.raises(TypeError): self.interval - Interval(1, 2) @@ -97,15 +95,15 @@ def test_math_sub(self): def test_math_mult(self): expected = Interval(0, 2) actual = self.interval * 2 - self.assertEqual(expected, actual) + assert expected == actual expected = Interval(0, 2) actual = 2 * self.interval - self.assertEqual(expected, actual) + assert expected == actual actual = self.interval actual *= 2 - self.assertEqual(expected, actual) + assert expected == actual with pytest.raises(TypeError): self.interval * Interval(1, 2) @@ -116,11 +114,11 @@ def test_math_mult(self): def test_math_div(self): expected = Interval(0, 0.5) actual = self.interval / 2.0 - self.assertEqual(expected, actual) + assert expected == actual actual = self.interval actual /= 2.0 - self.assertEqual(expected, actual) + assert expected == actual with pytest.raises(TypeError): self.interval / Interval(1, 2) diff --git a/pandas/tests/scalar/test_period.py b/pandas/tests/scalar/test_period.py index fc0921451c133..00a1fa1b507b6 100644 --- a/pandas/tests/scalar/test_period.py +++ b/pandas/tests/scalar/test_period.py @@ -35,18 +35,18 @@ def test_is_leap_year(self): def test_quarterly_negative_ordinals(self): p = Period(ordinal=-1, freq='Q-DEC') - self.assertEqual(p.year, 1969) - self.assertEqual(p.quarter, 4) + assert p.year == 1969 + assert p.quarter == 4 assert isinstance(p, Period) p = Period(ordinal=-2, freq='Q-DEC') - self.assertEqual(p.year, 1969) - self.assertEqual(p.quarter, 3) + assert p.year == 1969 + assert p.quarter == 3 assert isinstance(p, Period) p = Period(ordinal=-2, freq='M') - self.assertEqual(p.year, 1969) - self.assertEqual(p.month, 11) + assert p.year == 1969 + assert p.month == 11 assert isinstance(p, Period) def test_period_cons_quarterly(self): @@ -57,11 +57,11 @@ def test_period_cons_quarterly(self): assert '1989Q3' in str(exp) stamp = exp.to_timestamp('D', how='end') p = Period(stamp, freq=freq) - self.assertEqual(p, exp) + assert p == exp stamp = exp.to_timestamp('3D', how='end') p = Period(stamp, freq=freq) - self.assertEqual(p, exp) + assert p == exp def test_period_cons_annual(self): # bugs in scikits.timeseries @@ -70,7 +70,7 @@ def test_period_cons_annual(self): exp = Period('1989', freq=freq) stamp = exp.to_timestamp('D', how='end') + timedelta(days=30) p = Period(stamp, freq=freq) - self.assertEqual(p, exp + 1) + assert p == exp + 1 assert isinstance(p, Period) def test_period_cons_weekly(self): @@ -81,13 +81,13 @@ def test_period_cons_weekly(self): result = Period(daystr, freq=freq) expected = Period(daystr, freq='D').asfreq(freq) - self.assertEqual(result, expected) + assert result == expected assert isinstance(result, Period) def test_period_from_ordinal(self): p = pd.Period('2011-01', freq='M') res = pd.Period._from_ordinal(p.ordinal, freq='M') - self.assertEqual(p, res) + assert p == res assert isinstance(res, Period) def test_period_cons_nat(self): @@ -115,23 +115,23 @@ def test_period_cons_nat(self): def test_period_cons_mult(self): p1 = Period('2011-01', freq='3M') p2 = Period('2011-01', freq='M') - self.assertEqual(p1.ordinal, p2.ordinal) + assert p1.ordinal == p2.ordinal - self.assertEqual(p1.freq, offsets.MonthEnd(3)) - self.assertEqual(p1.freqstr, '3M') + assert p1.freq == offsets.MonthEnd(3) + assert p1.freqstr == '3M' - self.assertEqual(p2.freq, offsets.MonthEnd()) - self.assertEqual(p2.freqstr, 'M') + assert p2.freq == offsets.MonthEnd() + assert p2.freqstr == 'M' result = p1 + 1 - self.assertEqual(result.ordinal, (p2 + 3).ordinal) - self.assertEqual(result.freq, p1.freq) - self.assertEqual(result.freqstr, '3M') + assert result.ordinal == (p2 + 3).ordinal + assert result.freq == p1.freq + assert result.freqstr == '3M' result = p1 - 1 - self.assertEqual(result.ordinal, (p2 - 3).ordinal) - self.assertEqual(result.freq, p1.freq) - self.assertEqual(result.freqstr, '3M') + assert result.ordinal == (p2 - 3).ordinal + assert result.freq == p1.freq + assert result.freqstr == '3M' msg = ('Frequency must be positive, because it' ' represents span: -3M') @@ -151,37 +151,37 @@ def test_period_cons_combined(self): Period(ordinal=1, freq='H'))] for p1, p2, p3 in p: - self.assertEqual(p1.ordinal, p3.ordinal) - self.assertEqual(p2.ordinal, p3.ordinal) + assert p1.ordinal == p3.ordinal + assert p2.ordinal == p3.ordinal - self.assertEqual(p1.freq, offsets.Hour(25)) - self.assertEqual(p1.freqstr, '25H') + assert p1.freq == offsets.Hour(25) + assert p1.freqstr == '25H' - self.assertEqual(p2.freq, offsets.Hour(25)) - self.assertEqual(p2.freqstr, '25H') + assert p2.freq == offsets.Hour(25) + assert p2.freqstr == '25H' - self.assertEqual(p3.freq, offsets.Hour()) - self.assertEqual(p3.freqstr, 'H') + assert p3.freq == offsets.Hour() + assert p3.freqstr == 'H' result = p1 + 1 - self.assertEqual(result.ordinal, (p3 + 25).ordinal) - self.assertEqual(result.freq, p1.freq) - self.assertEqual(result.freqstr, '25H') + assert result.ordinal == (p3 + 25).ordinal + assert result.freq == p1.freq + assert result.freqstr == '25H' result = p2 + 1 - self.assertEqual(result.ordinal, (p3 + 25).ordinal) - self.assertEqual(result.freq, p2.freq) - self.assertEqual(result.freqstr, '25H') + assert result.ordinal == (p3 + 25).ordinal + assert result.freq == p2.freq + assert result.freqstr == '25H' result = p1 - 1 - self.assertEqual(result.ordinal, (p3 - 25).ordinal) - self.assertEqual(result.freq, p1.freq) - self.assertEqual(result.freqstr, '25H') + assert result.ordinal == (p3 - 25).ordinal + assert result.freq == p1.freq + assert result.freqstr == '25H' result = p2 - 1 - self.assertEqual(result.ordinal, (p3 - 25).ordinal) - self.assertEqual(result.freq, p2.freq) - self.assertEqual(result.freqstr, '25H') + assert result.ordinal == (p3 - 25).ordinal + assert result.freq == p2.freq + assert result.freqstr == '25H' msg = ('Frequency must be positive, because it' ' represents span: -25H') @@ -217,33 +217,33 @@ def test_timestamp_tz_arg(self): exp = Timestamp('1/1/2005', tz='UTC').tz_convert(case) exp_zone = pytz.timezone(case).normalize(p) - self.assertEqual(p, exp) - self.assertEqual(p.tz, exp_zone.tzinfo) - self.assertEqual(p.tz, exp.tz) + assert p == exp + assert p.tz == exp_zone.tzinfo + assert p.tz == exp.tz p = Period('1/1/2005', freq='3H').to_timestamp(tz=case) exp = Timestamp('1/1/2005', tz='UTC').tz_convert(case) exp_zone = pytz.timezone(case).normalize(p) - self.assertEqual(p, exp) - self.assertEqual(p.tz, exp_zone.tzinfo) - self.assertEqual(p.tz, exp.tz) + assert p == exp + assert p.tz == exp_zone.tzinfo + assert p.tz == exp.tz p = Period('1/1/2005', freq='A').to_timestamp(freq='A', tz=case) exp = Timestamp('31/12/2005', tz='UTC').tz_convert(case) exp_zone = pytz.timezone(case).normalize(p) - self.assertEqual(p, exp) - self.assertEqual(p.tz, exp_zone.tzinfo) - self.assertEqual(p.tz, exp.tz) + assert p == exp + assert p.tz == exp_zone.tzinfo + assert p.tz == exp.tz p = Period('1/1/2005', freq='A').to_timestamp(freq='3H', tz=case) exp = Timestamp('1/1/2005', tz='UTC').tz_convert(case) exp_zone = pytz.timezone(case).normalize(p) - self.assertEqual(p, exp) - self.assertEqual(p.tz, exp_zone.tzinfo) - self.assertEqual(p.tz, exp.tz) + assert p == exp + assert p.tz == exp_zone.tzinfo + assert p.tz == exp.tz def test_timestamp_tz_arg_dateutil(self): from pandas._libs.tslib import _dateutil_gettz as gettz @@ -253,86 +253,86 @@ def test_timestamp_tz_arg_dateutil(self): p = Period('1/1/2005', freq='M').to_timestamp( tz=maybe_get_tz(case)) exp = Timestamp('1/1/2005', tz='UTC').tz_convert(case) - self.assertEqual(p, exp) - self.assertEqual(p.tz, gettz(case.split('/', 1)[1])) - self.assertEqual(p.tz, exp.tz) + assert p == exp + assert p.tz == gettz(case.split('/', 1)[1]) + assert p.tz == exp.tz p = Period('1/1/2005', freq='M').to_timestamp(freq='3H', tz=maybe_get_tz(case)) exp = Timestamp('1/1/2005', tz='UTC').tz_convert(case) - self.assertEqual(p, exp) - self.assertEqual(p.tz, gettz(case.split('/', 1)[1])) - self.assertEqual(p.tz, exp.tz) + assert p == exp + assert p.tz == gettz(case.split('/', 1)[1]) + assert p.tz == exp.tz def test_timestamp_tz_arg_dateutil_from_string(self): from pandas._libs.tslib import _dateutil_gettz as gettz p = Period('1/1/2005', freq='M').to_timestamp(tz='dateutil/Europe/Brussels') - self.assertEqual(p.tz, gettz('Europe/Brussels')) + assert p.tz == gettz('Europe/Brussels') def test_timestamp_mult(self): p = pd.Period('2011-01', freq='M') - self.assertEqual(p.to_timestamp(how='S'), pd.Timestamp('2011-01-01')) - self.assertEqual(p.to_timestamp(how='E'), pd.Timestamp('2011-01-31')) + assert p.to_timestamp(how='S') == pd.Timestamp('2011-01-01') + assert p.to_timestamp(how='E') == pd.Timestamp('2011-01-31') p = pd.Period('2011-01', freq='3M') - self.assertEqual(p.to_timestamp(how='S'), pd.Timestamp('2011-01-01')) - self.assertEqual(p.to_timestamp(how='E'), pd.Timestamp('2011-03-31')) + assert p.to_timestamp(how='S') == pd.Timestamp('2011-01-01') + assert p.to_timestamp(how='E') == pd.Timestamp('2011-03-31') def test_construction(self): i1 = Period('1/1/2005', freq='M') i2 = Period('Jan 2005') - self.assertEqual(i1, i2) + assert i1 == i2 i1 = Period('2005', freq='A') i2 = Period('2005') i3 = Period('2005', freq='a') - self.assertEqual(i1, i2) - self.assertEqual(i1, i3) + assert i1 == i2 + assert i1 == i3 i4 = Period('2005', freq='M') i5 = Period('2005', freq='m') pytest.raises(ValueError, i1.__ne__, i4) - self.assertEqual(i4, i5) + assert i4 == i5 i1 = Period.now('Q') i2 = Period(datetime.now(), freq='Q') i3 = Period.now('q') - self.assertEqual(i1, i2) - self.assertEqual(i1, i3) + assert i1 == i2 + assert i1 == i3 i1 = Period('1982', freq='min') i2 = Period('1982', freq='MIN') - self.assertEqual(i1, i2) + assert i1 == i2 i2 = Period('1982', freq=('Min', 1)) - self.assertEqual(i1, i2) + assert i1 == i2 i1 = Period(year=2005, month=3, day=1, freq='D') i2 = Period('3/1/2005', freq='D') - self.assertEqual(i1, i2) + assert i1 == i2 i3 = Period(year=2005, month=3, day=1, freq='d') - self.assertEqual(i1, i3) + assert i1 == i3 i1 = Period('2007-01-01 09:00:00.001') expected = Period(datetime(2007, 1, 1, 9, 0, 0, 1000), freq='L') - self.assertEqual(i1, expected) + assert i1 == expected expected = Period(np_datetime64_compat( '2007-01-01 09:00:00.001Z'), freq='L') - self.assertEqual(i1, expected) + assert i1 == expected i1 = Period('2007-01-01 09:00:00.00101') expected = Period(datetime(2007, 1, 1, 9, 0, 0, 1010), freq='U') - self.assertEqual(i1, expected) + assert i1 == expected expected = Period(np_datetime64_compat('2007-01-01 09:00:00.00101Z'), freq='U') - self.assertEqual(i1, expected) + assert i1 == expected pytest.raises(ValueError, Period, ordinal=200701) @@ -343,157 +343,155 @@ def test_construction_bday(self): # Biz day construction, roll forward if non-weekday i1 = Period('3/10/12', freq='B') i2 = Period('3/10/12', freq='D') - self.assertEqual(i1, i2.asfreq('B')) + assert i1 == i2.asfreq('B') i2 = Period('3/11/12', freq='D') - self.assertEqual(i1, i2.asfreq('B')) + assert i1 == i2.asfreq('B') i2 = Period('3/12/12', freq='D') - self.assertEqual(i1, i2.asfreq('B')) + assert i1 == i2.asfreq('B') i3 = Period('3/10/12', freq='b') - self.assertEqual(i1, i3) + assert i1 == i3 i1 = Period(year=2012, month=3, day=10, freq='B') i2 = Period('3/12/12', freq='B') - self.assertEqual(i1, i2) + assert i1 == i2 def test_construction_quarter(self): i1 = Period(year=2005, quarter=1, freq='Q') i2 = Period('1/1/2005', freq='Q') - self.assertEqual(i1, i2) + assert i1 == i2 i1 = Period(year=2005, quarter=3, freq='Q') i2 = Period('9/1/2005', freq='Q') - self.assertEqual(i1, i2) + assert i1 == i2 i1 = Period('2005Q1') i2 = Period(year=2005, quarter=1, freq='Q') i3 = Period('2005q1') - self.assertEqual(i1, i2) - self.assertEqual(i1, i3) + assert i1 == i2 + assert i1 == i3 i1 = Period('05Q1') - self.assertEqual(i1, i2) + assert i1 == i2 lower = Period('05q1') - self.assertEqual(i1, lower) + assert i1 == lower i1 = Period('1Q2005') - self.assertEqual(i1, i2) + assert i1 == i2 lower = Period('1q2005') - self.assertEqual(i1, lower) + assert i1 == lower i1 = Period('1Q05') - self.assertEqual(i1, i2) + assert i1 == i2 lower = Period('1q05') - self.assertEqual(i1, lower) + assert i1 == lower i1 = Period('4Q1984') - self.assertEqual(i1.year, 1984) + assert i1.year == 1984 lower = Period('4q1984') - self.assertEqual(i1, lower) + assert i1 == lower def test_construction_month(self): expected = Period('2007-01', freq='M') i1 = Period('200701', freq='M') - self.assertEqual(i1, expected) + assert i1 == expected i1 = Period('200701', freq='M') - self.assertEqual(i1, expected) + assert i1 == expected i1 = Period(200701, freq='M') - self.assertEqual(i1, expected) + assert i1 == expected i1 = Period(ordinal=200701, freq='M') - self.assertEqual(i1.year, 18695) + assert i1.year == 18695 i1 = Period(datetime(2007, 1, 1), freq='M') i2 = Period('200701', freq='M') - self.assertEqual(i1, i2) + assert i1 == i2 i1 = Period(date(2007, 1, 1), freq='M') i2 = Period(datetime(2007, 1, 1), freq='M') i3 = Period(np.datetime64('2007-01-01'), freq='M') i4 = Period(np_datetime64_compat('2007-01-01 00:00:00Z'), freq='M') i5 = Period(np_datetime64_compat('2007-01-01 00:00:00.000Z'), freq='M') - self.assertEqual(i1, i2) - self.assertEqual(i1, i3) - self.assertEqual(i1, i4) - self.assertEqual(i1, i5) + assert i1 == i2 + assert i1 == i3 + assert i1 == i4 + assert i1 == i5 def test_period_constructor_offsets(self): - self.assertEqual(Period('1/1/2005', freq=offsets.MonthEnd()), - Period('1/1/2005', freq='M')) - self.assertEqual(Period('2005', freq=offsets.YearEnd()), - Period('2005', freq='A')) - self.assertEqual(Period('2005', freq=offsets.MonthEnd()), - Period('2005', freq='M')) - self.assertEqual(Period('3/10/12', freq=offsets.BusinessDay()), - Period('3/10/12', freq='B')) - self.assertEqual(Period('3/10/12', freq=offsets.Day()), - Period('3/10/12', freq='D')) - - self.assertEqual(Period(year=2005, quarter=1, - freq=offsets.QuarterEnd(startingMonth=12)), - Period(year=2005, quarter=1, freq='Q')) - self.assertEqual(Period(year=2005, quarter=2, - freq=offsets.QuarterEnd(startingMonth=12)), - Period(year=2005, quarter=2, freq='Q')) - - self.assertEqual(Period(year=2005, month=3, day=1, freq=offsets.Day()), - Period(year=2005, month=3, day=1, freq='D')) - self.assertEqual(Period(year=2012, month=3, day=10, - freq=offsets.BDay()), - Period(year=2012, month=3, day=10, freq='B')) + assert (Period('1/1/2005', freq=offsets.MonthEnd()) == + Period('1/1/2005', freq='M')) + assert (Period('2005', freq=offsets.YearEnd()) == + Period('2005', freq='A')) + assert (Period('2005', freq=offsets.MonthEnd()) == + Period('2005', freq='M')) + assert (Period('3/10/12', freq=offsets.BusinessDay()) == + Period('3/10/12', freq='B')) + assert (Period('3/10/12', freq=offsets.Day()) == + Period('3/10/12', freq='D')) + + assert (Period(year=2005, quarter=1, + freq=offsets.QuarterEnd(startingMonth=12)) == + Period(year=2005, quarter=1, freq='Q')) + assert (Period(year=2005, quarter=2, + freq=offsets.QuarterEnd(startingMonth=12)) == + Period(year=2005, quarter=2, freq='Q')) + + assert (Period(year=2005, month=3, day=1, freq=offsets.Day()) == + Period(year=2005, month=3, day=1, freq='D')) + assert (Period(year=2012, month=3, day=10, freq=offsets.BDay()) == + Period(year=2012, month=3, day=10, freq='B')) expected = Period('2005-03-01', freq='3D') - self.assertEqual(Period(year=2005, month=3, day=1, - freq=offsets.Day(3)), expected) - self.assertEqual(Period(year=2005, month=3, day=1, freq='3D'), - expected) + assert (Period(year=2005, month=3, day=1, + freq=offsets.Day(3)) == expected) + assert Period(year=2005, month=3, day=1, freq='3D') == expected - self.assertEqual(Period(year=2012, month=3, day=10, - freq=offsets.BDay(3)), - Period(year=2012, month=3, day=10, freq='3B')) + assert (Period(year=2012, month=3, day=10, + freq=offsets.BDay(3)) == + Period(year=2012, month=3, day=10, freq='3B')) - self.assertEqual(Period(200701, freq=offsets.MonthEnd()), - Period(200701, freq='M')) + assert (Period(200701, freq=offsets.MonthEnd()) == + Period(200701, freq='M')) i1 = Period(ordinal=200701, freq=offsets.MonthEnd()) i2 = Period(ordinal=200701, freq='M') - self.assertEqual(i1, i2) - self.assertEqual(i1.year, 18695) - self.assertEqual(i2.year, 18695) + assert i1 == i2 + assert i1.year == 18695 + assert i2.year == 18695 i1 = Period(datetime(2007, 1, 1), freq='M') i2 = Period('200701', freq='M') - self.assertEqual(i1, i2) + assert i1 == i2 i1 = Period(date(2007, 1, 1), freq='M') i2 = Period(datetime(2007, 1, 1), freq='M') i3 = Period(np.datetime64('2007-01-01'), freq='M') i4 = Period(np_datetime64_compat('2007-01-01 00:00:00Z'), freq='M') i5 = Period(np_datetime64_compat('2007-01-01 00:00:00.000Z'), freq='M') - self.assertEqual(i1, i2) - self.assertEqual(i1, i3) - self.assertEqual(i1, i4) - self.assertEqual(i1, i5) + assert i1 == i2 + assert i1 == i3 + assert i1 == i4 + assert i1 == i5 i1 = Period('2007-01-01 09:00:00.001') expected = Period(datetime(2007, 1, 1, 9, 0, 0, 1000), freq='L') - self.assertEqual(i1, expected) + assert i1 == expected expected = Period(np_datetime64_compat( '2007-01-01 09:00:00.001Z'), freq='L') - self.assertEqual(i1, expected) + assert i1 == expected i1 = Period('2007-01-01 09:00:00.00101') expected = Period(datetime(2007, 1, 1, 9, 0, 0, 1010), freq='U') - self.assertEqual(i1, expected) + assert i1 == expected expected = Period(np_datetime64_compat('2007-01-01 09:00:00.00101Z'), freq='U') - self.assertEqual(i1, expected) + assert i1 == expected pytest.raises(ValueError, Period, ordinal=200701) @@ -501,8 +499,8 @@ def test_period_constructor_offsets(self): def test_freq_str(self): i1 = Period('1982', freq='Min') - self.assertEqual(i1.freq, offsets.Minute()) - self.assertEqual(i1.freqstr, 'T') + assert i1.freq == offsets.Minute() + assert i1.freqstr == 'T' def test_period_deprecated_freq(self): cases = {"M": ["MTH", "MONTH", "MONTHLY", "Mth", "month", "monthly"], @@ -530,17 +528,17 @@ def test_period_deprecated_freq(self): assert isinstance(p2, Period) def test_hash(self): - self.assertEqual(hash(Period('2011-01', freq='M')), - hash(Period('2011-01', freq='M'))) + assert (hash(Period('2011-01', freq='M')) == + hash(Period('2011-01', freq='M'))) - self.assertNotEqual(hash(Period('2011-01-01', freq='D')), - hash(Period('2011-01', freq='M'))) + assert (hash(Period('2011-01-01', freq='D')) != + hash(Period('2011-01', freq='M'))) - self.assertNotEqual(hash(Period('2011-01', freq='3M')), - hash(Period('2011-01', freq='2M'))) + assert (hash(Period('2011-01', freq='3M')) != + hash(Period('2011-01', freq='2M'))) - self.assertNotEqual(hash(Period('2011-01', freq='M')), - hash(Period('2011-02', freq='M'))) + assert (hash(Period('2011-01', freq='M')) != + hash(Period('2011-02', freq='M'))) def test_repr(self): p = Period('Jan-2000') @@ -556,23 +554,23 @@ def test_repr_nat(self): def test_millisecond_repr(self): p = Period('2000-01-01 12:15:02.123') - self.assertEqual("Period('2000-01-01 12:15:02.123', 'L')", repr(p)) + assert repr(p) == "Period('2000-01-01 12:15:02.123', 'L')" def test_microsecond_repr(self): p = Period('2000-01-01 12:15:02.123567') - self.assertEqual("Period('2000-01-01 12:15:02.123567', 'U')", repr(p)) + assert repr(p) == "Period('2000-01-01 12:15:02.123567', 'U')" def test_strftime(self): p = Period('2000-1-1 12:34:12', freq='S') res = p.strftime('%Y-%m-%d %H:%M:%S') - self.assertEqual(res, '2000-01-01 12:34:12') + assert res == '2000-01-01 12:34:12' assert isinstance(res, text_type) # GH3363 def test_sub_delta(self): left, right = Period('2011', freq='A'), Period('2007', freq='A') result = left - right - self.assertEqual(result, 4) + assert result == 4 with pytest.raises(period.IncompatibleFrequency): left - Period('2007-01', freq='M') @@ -582,15 +580,15 @@ def test_to_timestamp(self): start_ts = p.to_timestamp(how='S') aliases = ['s', 'StarT', 'BEGIn'] for a in aliases: - self.assertEqual(start_ts, p.to_timestamp('D', how=a)) + assert start_ts == p.to_timestamp('D', how=a) # freq with mult should not affect to the result - self.assertEqual(start_ts, p.to_timestamp('3D', how=a)) + assert start_ts == p.to_timestamp('3D', how=a) end_ts = p.to_timestamp(how='E') aliases = ['e', 'end', 'FINIsH'] for a in aliases: - self.assertEqual(end_ts, p.to_timestamp('D', how=a)) - self.assertEqual(end_ts, p.to_timestamp('3D', how=a)) + assert end_ts == p.to_timestamp('D', how=a) + assert end_ts == p.to_timestamp('3D', how=a) from_lst = ['A', 'Q', 'M', 'W', 'B', 'D', 'H', 'Min', 'S'] @@ -600,11 +598,11 @@ def _ex(p): for i, fcode in enumerate(from_lst): p = Period('1982', freq=fcode) result = p.to_timestamp().to_period(fcode) - self.assertEqual(result, p) + assert result == p - self.assertEqual(p.start_time, p.to_timestamp(how='S')) + assert p.start_time == p.to_timestamp(how='S') - self.assertEqual(p.end_time, _ex(p)) + assert p.end_time == _ex(p) # Frequency other than daily @@ -612,42 +610,40 @@ def _ex(p): result = p.to_timestamp('H', how='end') expected = datetime(1985, 12, 31, 23) - self.assertEqual(result, expected) + assert result == expected result = p.to_timestamp('3H', how='end') - self.assertEqual(result, expected) + assert result == expected result = p.to_timestamp('T', how='end') expected = datetime(1985, 12, 31, 23, 59) - self.assertEqual(result, expected) + assert result == expected result = p.to_timestamp('2T', how='end') - self.assertEqual(result, expected) + assert result == expected result = p.to_timestamp(how='end') expected = datetime(1985, 12, 31) - self.assertEqual(result, expected) + assert result == expected expected = datetime(1985, 1, 1) result = p.to_timestamp('H', how='start') - self.assertEqual(result, expected) + assert result == expected result = p.to_timestamp('T', how='start') - self.assertEqual(result, expected) + assert result == expected result = p.to_timestamp('S', how='start') - self.assertEqual(result, expected) + assert result == expected result = p.to_timestamp('3H', how='start') - self.assertEqual(result, expected) + assert result == expected result = p.to_timestamp('5S', how='start') - self.assertEqual(result, expected) + assert result == expected def test_start_time(self): freq_lst = ['A', 'Q', 'M', 'D', 'H', 'T', 'S'] xp = datetime(2012, 1, 1) for f in freq_lst: p = Period('2012', freq=f) - self.assertEqual(p.start_time, xp) - self.assertEqual(Period('2012', freq='B').start_time, - datetime(2012, 1, 2)) - self.assertEqual(Period('2012', freq='W').start_time, - datetime(2011, 12, 26)) + assert p.start_time == xp + assert Period('2012', freq='B').start_time == datetime(2012, 1, 2) + assert Period('2012', freq='W').start_time == datetime(2011, 12, 26) def test_end_time(self): p = Period('2012', freq='A') @@ -656,44 +652,44 @@ def _ex(*args): return Timestamp(Timestamp(datetime(*args)).value - 1) xp = _ex(2013, 1, 1) - self.assertEqual(xp, p.end_time) + assert xp == p.end_time p = Period('2012', freq='Q') xp = _ex(2012, 4, 1) - self.assertEqual(xp, p.end_time) + assert xp == p.end_time p = Period('2012', freq='M') xp = _ex(2012, 2, 1) - self.assertEqual(xp, p.end_time) + assert xp == p.end_time p = Period('2012', freq='D') xp = _ex(2012, 1, 2) - self.assertEqual(xp, p.end_time) + assert xp == p.end_time p = Period('2012', freq='H') xp = _ex(2012, 1, 1, 1) - self.assertEqual(xp, p.end_time) + assert xp == p.end_time p = Period('2012', freq='B') xp = _ex(2012, 1, 3) - self.assertEqual(xp, p.end_time) + assert xp == p.end_time p = Period('2012', freq='W') xp = _ex(2012, 1, 2) - self.assertEqual(xp, p.end_time) + assert xp == p.end_time # Test for GH 11738 p = Period('2012', freq='15D') xp = _ex(2012, 1, 16) - self.assertEqual(xp, p.end_time) + assert xp == p.end_time p = Period('2012', freq='1D1H') xp = _ex(2012, 1, 2, 1) - self.assertEqual(xp, p.end_time) + assert xp == p.end_time p = Period('2012', freq='1H1D') xp = _ex(2012, 1, 2, 1) - self.assertEqual(xp, p.end_time) + assert xp == p.end_time def test_anchor_week_end_time(self): def _ex(*args): @@ -701,12 +697,12 @@ def _ex(*args): p = Period('2013-1-1', 'W-SAT') xp = _ex(2013, 1, 6) - self.assertEqual(p.end_time, xp) + assert p.end_time == xp def test_properties_annually(self): # Test properties on Periods with annually frequency. a_date = Period(freq='A', year=2007) - self.assertEqual(a_date.year, 2007) + assert a_date.year == 2007 def test_properties_quarterly(self): # Test properties on Periods with daily frequency. @@ -716,50 +712,50 @@ def test_properties_quarterly(self): # for x in range(3): for qd in (qedec_date, qejan_date, qejun_date): - self.assertEqual((qd + x).qyear, 2007) - self.assertEqual((qd + x).quarter, x + 1) + assert (qd + x).qyear == 2007 + assert (qd + x).quarter == x + 1 def test_properties_monthly(self): # Test properties on Periods with daily frequency. m_date = Period(freq='M', year=2007, month=1) for x in range(11): m_ival_x = m_date + x - self.assertEqual(m_ival_x.year, 2007) + assert m_ival_x.year == 2007 if 1 <= x + 1 <= 3: - self.assertEqual(m_ival_x.quarter, 1) + assert m_ival_x.quarter == 1 elif 4 <= x + 1 <= 6: - self.assertEqual(m_ival_x.quarter, 2) + assert m_ival_x.quarter == 2 elif 7 <= x + 1 <= 9: - self.assertEqual(m_ival_x.quarter, 3) + assert m_ival_x.quarter == 3 elif 10 <= x + 1 <= 12: - self.assertEqual(m_ival_x.quarter, 4) - self.assertEqual(m_ival_x.month, x + 1) + assert m_ival_x.quarter == 4 + assert m_ival_x.month == x + 1 def test_properties_weekly(self): # Test properties on Periods with daily frequency. w_date = Period(freq='W', year=2007, month=1, day=7) # - self.assertEqual(w_date.year, 2007) - self.assertEqual(w_date.quarter, 1) - self.assertEqual(w_date.month, 1) - self.assertEqual(w_date.week, 1) - self.assertEqual((w_date - 1).week, 52) - self.assertEqual(w_date.days_in_month, 31) - self.assertEqual(Period(freq='W', year=2012, - month=2, day=1).days_in_month, 29) + assert w_date.year == 2007 + assert w_date.quarter == 1 + assert w_date.month == 1 + assert w_date.week == 1 + assert (w_date - 1).week == 52 + assert w_date.days_in_month == 31 + assert Period(freq='W', year=2012, + month=2, day=1).days_in_month == 29 def test_properties_weekly_legacy(self): # Test properties on Periods with daily frequency. w_date = Period(freq='W', year=2007, month=1, day=7) - self.assertEqual(w_date.year, 2007) - self.assertEqual(w_date.quarter, 1) - self.assertEqual(w_date.month, 1) - self.assertEqual(w_date.week, 1) - self.assertEqual((w_date - 1).week, 52) - self.assertEqual(w_date.days_in_month, 31) + assert w_date.year == 2007 + assert w_date.quarter == 1 + assert w_date.month == 1 + assert w_date.week == 1 + assert (w_date - 1).week == 52 + assert w_date.days_in_month == 31 exp = Period(freq='W', year=2012, month=2, day=1) - self.assertEqual(exp.days_in_month, 29) + assert exp.days_in_month == 29 msg = pd.tseries.frequencies._INVALID_FREQ_ERROR with tm.assert_raises_regex(ValueError, msg): @@ -769,27 +765,27 @@ def test_properties_daily(self): # Test properties on Periods with daily frequency. b_date = Period(freq='B', year=2007, month=1, day=1) # - self.assertEqual(b_date.year, 2007) - self.assertEqual(b_date.quarter, 1) - self.assertEqual(b_date.month, 1) - self.assertEqual(b_date.day, 1) - self.assertEqual(b_date.weekday, 0) - self.assertEqual(b_date.dayofyear, 1) - self.assertEqual(b_date.days_in_month, 31) - self.assertEqual(Period(freq='B', year=2012, - month=2, day=1).days_in_month, 29) - # + assert b_date.year == 2007 + assert b_date.quarter == 1 + assert b_date.month == 1 + assert b_date.day == 1 + assert b_date.weekday == 0 + assert b_date.dayofyear == 1 + assert b_date.days_in_month == 31 + assert Period(freq='B', year=2012, + month=2, day=1).days_in_month == 29 + d_date = Period(freq='D', year=2007, month=1, day=1) - # - self.assertEqual(d_date.year, 2007) - self.assertEqual(d_date.quarter, 1) - self.assertEqual(d_date.month, 1) - self.assertEqual(d_date.day, 1) - self.assertEqual(d_date.weekday, 0) - self.assertEqual(d_date.dayofyear, 1) - self.assertEqual(d_date.days_in_month, 31) - self.assertEqual(Period(freq='D', year=2012, month=2, - day=1).days_in_month, 29) + + assert d_date.year == 2007 + assert d_date.quarter == 1 + assert d_date.month == 1 + assert d_date.day == 1 + assert d_date.weekday == 0 + assert d_date.dayofyear == 1 + assert d_date.days_in_month == 31 + assert Period(freq='D', year=2012, month=2, + day=1).days_in_month == 29 def test_properties_hourly(self): # Test properties on Periods with hourly frequency. @@ -797,50 +793,50 @@ def test_properties_hourly(self): h_date2 = Period(freq='2H', year=2007, month=1, day=1, hour=0) for h_date in [h_date1, h_date2]: - self.assertEqual(h_date.year, 2007) - self.assertEqual(h_date.quarter, 1) - self.assertEqual(h_date.month, 1) - self.assertEqual(h_date.day, 1) - self.assertEqual(h_date.weekday, 0) - self.assertEqual(h_date.dayofyear, 1) - self.assertEqual(h_date.hour, 0) - self.assertEqual(h_date.days_in_month, 31) - self.assertEqual(Period(freq='H', year=2012, month=2, day=1, - hour=0).days_in_month, 29) + assert h_date.year == 2007 + assert h_date.quarter == 1 + assert h_date.month == 1 + assert h_date.day == 1 + assert h_date.weekday == 0 + assert h_date.dayofyear == 1 + assert h_date.hour == 0 + assert h_date.days_in_month == 31 + assert Period(freq='H', year=2012, month=2, day=1, + hour=0).days_in_month == 29 def test_properties_minutely(self): # Test properties on Periods with minutely frequency. t_date = Period(freq='Min', year=2007, month=1, day=1, hour=0, minute=0) # - self.assertEqual(t_date.quarter, 1) - self.assertEqual(t_date.month, 1) - self.assertEqual(t_date.day, 1) - self.assertEqual(t_date.weekday, 0) - self.assertEqual(t_date.dayofyear, 1) - self.assertEqual(t_date.hour, 0) - self.assertEqual(t_date.minute, 0) - self.assertEqual(t_date.days_in_month, 31) - self.assertEqual(Period(freq='D', year=2012, month=2, day=1, hour=0, - minute=0).days_in_month, 29) + assert t_date.quarter == 1 + assert t_date.month == 1 + assert t_date.day == 1 + assert t_date.weekday == 0 + assert t_date.dayofyear == 1 + assert t_date.hour == 0 + assert t_date.minute == 0 + assert t_date.days_in_month == 31 + assert Period(freq='D', year=2012, month=2, day=1, hour=0, + minute=0).days_in_month == 29 def test_properties_secondly(self): # Test properties on Periods with secondly frequency. s_date = Period(freq='Min', year=2007, month=1, day=1, hour=0, minute=0, second=0) # - self.assertEqual(s_date.year, 2007) - self.assertEqual(s_date.quarter, 1) - self.assertEqual(s_date.month, 1) - self.assertEqual(s_date.day, 1) - self.assertEqual(s_date.weekday, 0) - self.assertEqual(s_date.dayofyear, 1) - self.assertEqual(s_date.hour, 0) - self.assertEqual(s_date.minute, 0) - self.assertEqual(s_date.second, 0) - self.assertEqual(s_date.days_in_month, 31) - self.assertEqual(Period(freq='Min', year=2012, month=2, day=1, hour=0, - minute=0, second=0).days_in_month, 29) + assert s_date.year == 2007 + assert s_date.quarter == 1 + assert s_date.month == 1 + assert s_date.day == 1 + assert s_date.weekday == 0 + assert s_date.dayofyear == 1 + assert s_date.hour == 0 + assert s_date.minute == 0 + assert s_date.second == 0 + assert s_date.days_in_month == 31 + assert Period(freq='Min', year=2012, month=2, day=1, hour=0, + minute=0, second=0).days_in_month == 29 def test_pnow(self): @@ -851,7 +847,7 @@ def test_pnow(self): def test_constructor_corner(self): expected = Period('2007-01', freq='2M') - self.assertEqual(Period(year=2007, month=1, freq='2M'), expected) + assert Period(year=2007, month=1, freq='2M') == expected pytest.raises(ValueError, Period, datetime.now()) pytest.raises(ValueError, Period, datetime.now().date()) @@ -865,29 +861,29 @@ def test_constructor_corner(self): result = Period(p, freq='A') exp = Period('2007', freq='A') - self.assertEqual(result, exp) + assert result == exp def test_constructor_infer_freq(self): p = Period('2007-01-01') - self.assertEqual(p.freq, 'D') + assert p.freq == 'D' p = Period('2007-01-01 07') - self.assertEqual(p.freq, 'H') + assert p.freq == 'H' p = Period('2007-01-01 07:10') - self.assertEqual(p.freq, 'T') + assert p.freq == 'T' p = Period('2007-01-01 07:10:15') - self.assertEqual(p.freq, 'S') + assert p.freq == 'S' p = Period('2007-01-01 07:10:15.123') - self.assertEqual(p.freq, 'L') + assert p.freq == 'L' p = Period('2007-01-01 07:10:15.123000') - self.assertEqual(p.freq, 'L') + assert p.freq == 'L' p = Period('2007-01-01 07:10:15.123400') - self.assertEqual(p.freq, 'U') + assert p.freq == 'U' def test_badinput(self): pytest.raises(ValueError, Period, '-2000', 'A') @@ -897,22 +893,22 @@ def test_badinput(self): def test_multiples(self): result1 = Period('1989', freq='2A') result2 = Period('1989', freq='A') - self.assertEqual(result1.ordinal, result2.ordinal) - self.assertEqual(result1.freqstr, '2A-DEC') - self.assertEqual(result2.freqstr, 'A-DEC') - self.assertEqual(result1.freq, offsets.YearEnd(2)) - self.assertEqual(result2.freq, offsets.YearEnd()) + assert result1.ordinal == result2.ordinal + assert result1.freqstr == '2A-DEC' + assert result2.freqstr == 'A-DEC' + assert result1.freq == offsets.YearEnd(2) + assert result2.freq == offsets.YearEnd() - self.assertEqual((result1 + 1).ordinal, result1.ordinal + 2) - self.assertEqual((1 + result1).ordinal, result1.ordinal + 2) - self.assertEqual((result1 - 1).ordinal, result2.ordinal - 2) - self.assertEqual((-1 + result1).ordinal, result2.ordinal - 2) + assert (result1 + 1).ordinal == result1.ordinal + 2 + assert (1 + result1).ordinal == result1.ordinal + 2 + assert (result1 - 1).ordinal == result2.ordinal - 2 + assert (-1 + result1).ordinal == result2.ordinal - 2 def test_round_trip(self): p = Period('2000Q1') new_p = tm.round_trip_pickle(p) - self.assertEqual(new_p, p) + assert new_p == p class TestPeriodField(tm.TestCase): @@ -935,7 +931,7 @@ def setUp(self): self.day = Period('2012-01-01', 'D') def test_equal(self): - self.assertEqual(self.january1, self.january2) + assert self.january1 == self.january2 def test_equal_Raises_Value(self): with pytest.raises(period.IncompatibleFrequency): @@ -991,7 +987,7 @@ def test_smaller_Raises_Type(self): def test_sort(self): periods = [self.march, self.january1, self.february] correctPeriods = [self.january1, self.february, self.march] - self.assertEqual(sorted(periods), correctPeriods) + assert sorted(periods) == correctPeriods def test_period_nat_comp(self): p_nat = Period('NaT', freq='D') @@ -1002,12 +998,12 @@ def test_period_nat_comp(self): # confirm Period('NaT') work identical with Timestamp('NaT') for left, right in [(p_nat, p), (p, p_nat), (p_nat, p_nat), (nat, t), (t, nat), (nat, nat)]: - self.assertEqual(left < right, False) - self.assertEqual(left > right, False) - self.assertEqual(left == right, False) - self.assertEqual(left != right, True) - self.assertEqual(left <= right, False) - self.assertEqual(left >= right, False) + assert not left < right + assert not left > right + assert not left == right + assert left != right + assert not left <= right + assert not left >= right class TestMethods(tm.TestCase): @@ -1015,8 +1011,8 @@ class TestMethods(tm.TestCase): def test_add(self): dt1 = Period(freq='D', year=2008, month=1, day=1) dt2 = Period(freq='D', year=2008, month=1, day=2) - self.assertEqual(dt1 + 1, dt2) - self.assertEqual(1 + dt1, dt2) + assert dt1 + 1 == dt2 + assert 1 + dt1 == dt2 def test_add_pdnat(self): p = pd.Period('2011-01', freq='M') @@ -1046,8 +1042,8 @@ def test_sub(self): dt1 = Period('2011-01-01', freq='D') dt2 = Period('2011-01-15', freq='D') - self.assertEqual(dt1 - dt2, -14) - self.assertEqual(dt2 - dt1, 14) + assert dt1 - dt2 == -14 + assert dt2 - dt1 == 14 msg = r"Input has different freq=M from Period\(freq=D\)" with tm.assert_raises_regex(period.IncompatibleFrequency, msg): @@ -1058,8 +1054,8 @@ def test_add_offset(self): for freq in ['A', '2A', '3A']: p = Period('2011', freq=freq) exp = Period('2013', freq=freq) - self.assertEqual(p + offsets.YearEnd(2), exp) - self.assertEqual(offsets.YearEnd(2) + p, exp) + assert p + offsets.YearEnd(2) == exp + assert offsets.YearEnd(2) + p == exp for o in [offsets.YearBegin(2), offsets.MonthBegin(1), offsets.Minute(), np.timedelta64(365, 'D'), @@ -1077,12 +1073,12 @@ def test_add_offset(self): for freq in ['M', '2M', '3M']: p = Period('2011-03', freq=freq) exp = Period('2011-05', freq=freq) - self.assertEqual(p + offsets.MonthEnd(2), exp) - self.assertEqual(offsets.MonthEnd(2) + p, exp) + assert p + offsets.MonthEnd(2) == exp + assert offsets.MonthEnd(2) + p == exp exp = Period('2012-03', freq=freq) - self.assertEqual(p + offsets.MonthEnd(12), exp) - self.assertEqual(offsets.MonthEnd(12) + p, exp) + assert p + offsets.MonthEnd(12) == exp + assert offsets.MonthEnd(12) + p == exp for o in [offsets.YearBegin(2), offsets.MonthBegin(1), offsets.Minute(), np.timedelta64(365, 'D'), @@ -1102,30 +1098,30 @@ def test_add_offset(self): p = Period('2011-04-01', freq=freq) exp = Period('2011-04-06', freq=freq) - self.assertEqual(p + offsets.Day(5), exp) - self.assertEqual(offsets.Day(5) + p, exp) + assert p + offsets.Day(5) == exp + assert offsets.Day(5) + p == exp exp = Period('2011-04-02', freq=freq) - self.assertEqual(p + offsets.Hour(24), exp) - self.assertEqual(offsets.Hour(24) + p, exp) + assert p + offsets.Hour(24) == exp + assert offsets.Hour(24) + p == exp exp = Period('2011-04-03', freq=freq) - self.assertEqual(p + np.timedelta64(2, 'D'), exp) + assert p + np.timedelta64(2, 'D') == exp with pytest.raises(TypeError): np.timedelta64(2, 'D') + p exp = Period('2011-04-02', freq=freq) - self.assertEqual(p + np.timedelta64(3600 * 24, 's'), exp) + assert p + np.timedelta64(3600 * 24, 's') == exp with pytest.raises(TypeError): np.timedelta64(3600 * 24, 's') + p exp = Period('2011-03-30', freq=freq) - self.assertEqual(p + timedelta(-2), exp) - self.assertEqual(timedelta(-2) + p, exp) + assert p + timedelta(-2) == exp + assert timedelta(-2) + p == exp exp = Period('2011-04-03', freq=freq) - self.assertEqual(p + timedelta(hours=48), exp) - self.assertEqual(timedelta(hours=48) + p, exp) + assert p + timedelta(hours=48) == exp + assert timedelta(hours=48) + p == exp for o in [offsets.YearBegin(2), offsets.MonthBegin(1), offsets.Minute(), np.timedelta64(4, 'h'), @@ -1144,30 +1140,30 @@ def test_add_offset(self): p = Period('2011-04-01 09:00', freq=freq) exp = Period('2011-04-03 09:00', freq=freq) - self.assertEqual(p + offsets.Day(2), exp) - self.assertEqual(offsets.Day(2) + p, exp) + assert p + offsets.Day(2) == exp + assert offsets.Day(2) + p == exp exp = Period('2011-04-01 12:00', freq=freq) - self.assertEqual(p + offsets.Hour(3), exp) - self.assertEqual(offsets.Hour(3) + p, exp) + assert p + offsets.Hour(3) == exp + assert offsets.Hour(3) + p == exp exp = Period('2011-04-01 12:00', freq=freq) - self.assertEqual(p + np.timedelta64(3, 'h'), exp) + assert p + np.timedelta64(3, 'h') == exp with pytest.raises(TypeError): np.timedelta64(3, 'h') + p exp = Period('2011-04-01 10:00', freq=freq) - self.assertEqual(p + np.timedelta64(3600, 's'), exp) + assert p + np.timedelta64(3600, 's') == exp with pytest.raises(TypeError): np.timedelta64(3600, 's') + p exp = Period('2011-04-01 11:00', freq=freq) - self.assertEqual(p + timedelta(minutes=120), exp) - self.assertEqual(timedelta(minutes=120) + p, exp) + assert p + timedelta(minutes=120) == exp + assert timedelta(minutes=120) + p == exp exp = Period('2011-04-05 12:00', freq=freq) - self.assertEqual(p + timedelta(days=4, minutes=180), exp) - self.assertEqual(timedelta(days=4, minutes=180) + p, exp) + assert p + timedelta(days=4, minutes=180) == exp + assert timedelta(days=4, minutes=180) + p == exp for o in [offsets.YearBegin(2), offsets.MonthBegin(1), offsets.Minute(), np.timedelta64(3200, 's'), @@ -1283,7 +1279,7 @@ def test_sub_offset(self): # freq is DateOffset for freq in ['A', '2A', '3A']: p = Period('2011', freq=freq) - self.assertEqual(p - offsets.YearEnd(2), Period('2009', freq=freq)) + assert p - offsets.YearEnd(2) == Period('2009', freq=freq) for o in [offsets.YearBegin(2), offsets.MonthBegin(1), offsets.Minute(), np.timedelta64(365, 'D'), @@ -1293,10 +1289,8 @@ def test_sub_offset(self): for freq in ['M', '2M', '3M']: p = Period('2011-03', freq=freq) - self.assertEqual(p - offsets.MonthEnd(2), - Period('2011-01', freq=freq)) - self.assertEqual(p - offsets.MonthEnd(12), - Period('2010-03', freq=freq)) + assert p - offsets.MonthEnd(2) == Period('2011-01', freq=freq) + assert p - offsets.MonthEnd(12) == Period('2010-03', freq=freq) for o in [offsets.YearBegin(2), offsets.MonthBegin(1), offsets.Minute(), np.timedelta64(365, 'D'), @@ -1307,18 +1301,14 @@ def test_sub_offset(self): # freq is Tick for freq in ['D', '2D', '3D']: p = Period('2011-04-01', freq=freq) - self.assertEqual(p - offsets.Day(5), - Period('2011-03-27', freq=freq)) - self.assertEqual(p - offsets.Hour(24), - Period('2011-03-31', freq=freq)) - self.assertEqual(p - np.timedelta64(2, 'D'), - Period('2011-03-30', freq=freq)) - self.assertEqual(p - np.timedelta64(3600 * 24, 's'), - Period('2011-03-31', freq=freq)) - self.assertEqual(p - timedelta(-2), - Period('2011-04-03', freq=freq)) - self.assertEqual(p - timedelta(hours=48), - Period('2011-03-30', freq=freq)) + assert p - offsets.Day(5) == Period('2011-03-27', freq=freq) + assert p - offsets.Hour(24) == Period('2011-03-31', freq=freq) + assert p - np.timedelta64(2, 'D') == Period( + '2011-03-30', freq=freq) + assert p - np.timedelta64(3600 * 24, 's') == Period( + '2011-03-31', freq=freq) + assert p - timedelta(-2) == Period('2011-04-03', freq=freq) + assert p - timedelta(hours=48) == Period('2011-03-30', freq=freq) for o in [offsets.YearBegin(2), offsets.MonthBegin(1), offsets.Minute(), np.timedelta64(4, 'h'), @@ -1328,18 +1318,16 @@ def test_sub_offset(self): for freq in ['H', '2H', '3H']: p = Period('2011-04-01 09:00', freq=freq) - self.assertEqual(p - offsets.Day(2), - Period('2011-03-30 09:00', freq=freq)) - self.assertEqual(p - offsets.Hour(3), - Period('2011-04-01 06:00', freq=freq)) - self.assertEqual(p - np.timedelta64(3, 'h'), - Period('2011-04-01 06:00', freq=freq)) - self.assertEqual(p - np.timedelta64(3600, 's'), - Period('2011-04-01 08:00', freq=freq)) - self.assertEqual(p - timedelta(minutes=120), - Period('2011-04-01 07:00', freq=freq)) - self.assertEqual(p - timedelta(days=4, minutes=180), - Period('2011-03-28 06:00', freq=freq)) + assert p - offsets.Day(2) == Period('2011-03-30 09:00', freq=freq) + assert p - offsets.Hour(3) == Period('2011-04-01 06:00', freq=freq) + assert p - np.timedelta64(3, 'h') == Period( + '2011-04-01 06:00', freq=freq) + assert p - np.timedelta64(3600, 's') == Period( + '2011-04-01 08:00', freq=freq) + assert p - timedelta(minutes=120) == Period( + '2011-04-01 07:00', freq=freq) + assert p - timedelta(days=4, minutes=180) == Period( + '2011-03-28 06:00', freq=freq) for o in [offsets.YearBegin(2), offsets.MonthBegin(1), offsets.Minute(), np.timedelta64(3200, 's'), @@ -1407,11 +1395,11 @@ def test_period_ops_offset(self): p = Period('2011-04-01', freq='D') result = p + offsets.Day() exp = pd.Period('2011-04-02', freq='D') - self.assertEqual(result, exp) + assert result == exp result = p - offsets.Day(2) exp = pd.Period('2011-03-30', freq='D') - self.assertEqual(result, exp) + assert result == exp msg = r"Input cannot be converted to Period\(freq=D\)" with tm.assert_raises_regex(period.IncompatibleFrequency, msg): diff --git a/pandas/tests/scalar/test_period_asfreq.py b/pandas/tests/scalar/test_period_asfreq.py index d31eeda5c8e3c..7011cfeef90ae 100644 --- a/pandas/tests/scalar/test_period_asfreq.py +++ b/pandas/tests/scalar/test_period_asfreq.py @@ -5,17 +5,17 @@ class TestFreqConversion(tm.TestCase): - "Test frequency conversion of date objects" + """Test frequency conversion of date objects""" def test_asfreq_corner(self): val = Period(freq='A', year=2007) result1 = val.asfreq('5t') result2 = val.asfreq('t') expected = Period('2007-12-31 23:59', freq='t') - self.assertEqual(result1.ordinal, expected.ordinal) - self.assertEqual(result1.freqstr, '5T') - self.assertEqual(result2.ordinal, expected.ordinal) - self.assertEqual(result2.freqstr, 'T') + assert result1.ordinal == expected.ordinal + assert result1.freqstr == '5T' + assert result2.ordinal == expected.ordinal + assert result2.freqstr == 'T' def test_conv_annual(self): # frequency conversion tests: from Annual Frequency @@ -55,35 +55,35 @@ def test_conv_annual(self): ival_ANOV_to_D_end = Period(freq='D', year=2007, month=11, day=30) ival_ANOV_to_D_start = Period(freq='D', year=2006, month=12, day=1) - self.assertEqual(ival_A.asfreq('Q', 'S'), ival_A_to_Q_start) - self.assertEqual(ival_A.asfreq('Q', 'e'), ival_A_to_Q_end) - self.assertEqual(ival_A.asfreq('M', 's'), ival_A_to_M_start) - self.assertEqual(ival_A.asfreq('M', 'E'), ival_A_to_M_end) - self.assertEqual(ival_A.asfreq('W', 'S'), ival_A_to_W_start) - self.assertEqual(ival_A.asfreq('W', 'E'), ival_A_to_W_end) - self.assertEqual(ival_A.asfreq('B', 'S'), ival_A_to_B_start) - self.assertEqual(ival_A.asfreq('B', 'E'), ival_A_to_B_end) - self.assertEqual(ival_A.asfreq('D', 'S'), ival_A_to_D_start) - self.assertEqual(ival_A.asfreq('D', 'E'), ival_A_to_D_end) - self.assertEqual(ival_A.asfreq('H', 'S'), ival_A_to_H_start) - self.assertEqual(ival_A.asfreq('H', 'E'), ival_A_to_H_end) - self.assertEqual(ival_A.asfreq('min', 'S'), ival_A_to_T_start) - self.assertEqual(ival_A.asfreq('min', 'E'), ival_A_to_T_end) - self.assertEqual(ival_A.asfreq('T', 'S'), ival_A_to_T_start) - self.assertEqual(ival_A.asfreq('T', 'E'), ival_A_to_T_end) - self.assertEqual(ival_A.asfreq('S', 'S'), ival_A_to_S_start) - self.assertEqual(ival_A.asfreq('S', 'E'), ival_A_to_S_end) - - self.assertEqual(ival_AJAN.asfreq('D', 'S'), ival_AJAN_to_D_start) - self.assertEqual(ival_AJAN.asfreq('D', 'E'), ival_AJAN_to_D_end) - - self.assertEqual(ival_AJUN.asfreq('D', 'S'), ival_AJUN_to_D_start) - self.assertEqual(ival_AJUN.asfreq('D', 'E'), ival_AJUN_to_D_end) - - self.assertEqual(ival_ANOV.asfreq('D', 'S'), ival_ANOV_to_D_start) - self.assertEqual(ival_ANOV.asfreq('D', 'E'), ival_ANOV_to_D_end) - - self.assertEqual(ival_A.asfreq('A'), ival_A) + assert ival_A.asfreq('Q', 'S') == ival_A_to_Q_start + assert ival_A.asfreq('Q', 'e') == ival_A_to_Q_end + assert ival_A.asfreq('M', 's') == ival_A_to_M_start + assert ival_A.asfreq('M', 'E') == ival_A_to_M_end + assert ival_A.asfreq('W', 'S') == ival_A_to_W_start + assert ival_A.asfreq('W', 'E') == ival_A_to_W_end + assert ival_A.asfreq('B', 'S') == ival_A_to_B_start + assert ival_A.asfreq('B', 'E') == ival_A_to_B_end + assert ival_A.asfreq('D', 'S') == ival_A_to_D_start + assert ival_A.asfreq('D', 'E') == ival_A_to_D_end + assert ival_A.asfreq('H', 'S') == ival_A_to_H_start + assert ival_A.asfreq('H', 'E') == ival_A_to_H_end + assert ival_A.asfreq('min', 'S') == ival_A_to_T_start + assert ival_A.asfreq('min', 'E') == ival_A_to_T_end + assert ival_A.asfreq('T', 'S') == ival_A_to_T_start + assert ival_A.asfreq('T', 'E') == ival_A_to_T_end + assert ival_A.asfreq('S', 'S') == ival_A_to_S_start + assert ival_A.asfreq('S', 'E') == ival_A_to_S_end + + assert ival_AJAN.asfreq('D', 'S') == ival_AJAN_to_D_start + assert ival_AJAN.asfreq('D', 'E') == ival_AJAN_to_D_end + + assert ival_AJUN.asfreq('D', 'S') == ival_AJUN_to_D_start + assert ival_AJUN.asfreq('D', 'E') == ival_AJUN_to_D_end + + assert ival_ANOV.asfreq('D', 'S') == ival_ANOV_to_D_start + assert ival_ANOV.asfreq('D', 'E') == ival_ANOV_to_D_end + + assert ival_A.asfreq('A') == ival_A def test_conv_quarterly(self): # frequency conversion tests: from Quarterly Frequency @@ -120,30 +120,30 @@ def test_conv_quarterly(self): ival_QEJUN_to_D_start = Period(freq='D', year=2006, month=7, day=1) ival_QEJUN_to_D_end = Period(freq='D', year=2006, month=9, day=30) - self.assertEqual(ival_Q.asfreq('A'), ival_Q_to_A) - self.assertEqual(ival_Q_end_of_year.asfreq('A'), ival_Q_to_A) - - self.assertEqual(ival_Q.asfreq('M', 'S'), ival_Q_to_M_start) - self.assertEqual(ival_Q.asfreq('M', 'E'), ival_Q_to_M_end) - self.assertEqual(ival_Q.asfreq('W', 'S'), ival_Q_to_W_start) - self.assertEqual(ival_Q.asfreq('W', 'E'), ival_Q_to_W_end) - self.assertEqual(ival_Q.asfreq('B', 'S'), ival_Q_to_B_start) - self.assertEqual(ival_Q.asfreq('B', 'E'), ival_Q_to_B_end) - self.assertEqual(ival_Q.asfreq('D', 'S'), ival_Q_to_D_start) - self.assertEqual(ival_Q.asfreq('D', 'E'), ival_Q_to_D_end) - self.assertEqual(ival_Q.asfreq('H', 'S'), ival_Q_to_H_start) - self.assertEqual(ival_Q.asfreq('H', 'E'), ival_Q_to_H_end) - self.assertEqual(ival_Q.asfreq('Min', 'S'), ival_Q_to_T_start) - self.assertEqual(ival_Q.asfreq('Min', 'E'), ival_Q_to_T_end) - self.assertEqual(ival_Q.asfreq('S', 'S'), ival_Q_to_S_start) - self.assertEqual(ival_Q.asfreq('S', 'E'), ival_Q_to_S_end) - - self.assertEqual(ival_QEJAN.asfreq('D', 'S'), ival_QEJAN_to_D_start) - self.assertEqual(ival_QEJAN.asfreq('D', 'E'), ival_QEJAN_to_D_end) - self.assertEqual(ival_QEJUN.asfreq('D', 'S'), ival_QEJUN_to_D_start) - self.assertEqual(ival_QEJUN.asfreq('D', 'E'), ival_QEJUN_to_D_end) - - self.assertEqual(ival_Q.asfreq('Q'), ival_Q) + assert ival_Q.asfreq('A') == ival_Q_to_A + assert ival_Q_end_of_year.asfreq('A') == ival_Q_to_A + + assert ival_Q.asfreq('M', 'S') == ival_Q_to_M_start + assert ival_Q.asfreq('M', 'E') == ival_Q_to_M_end + assert ival_Q.asfreq('W', 'S') == ival_Q_to_W_start + assert ival_Q.asfreq('W', 'E') == ival_Q_to_W_end + assert ival_Q.asfreq('B', 'S') == ival_Q_to_B_start + assert ival_Q.asfreq('B', 'E') == ival_Q_to_B_end + assert ival_Q.asfreq('D', 'S') == ival_Q_to_D_start + assert ival_Q.asfreq('D', 'E') == ival_Q_to_D_end + assert ival_Q.asfreq('H', 'S') == ival_Q_to_H_start + assert ival_Q.asfreq('H', 'E') == ival_Q_to_H_end + assert ival_Q.asfreq('Min', 'S') == ival_Q_to_T_start + assert ival_Q.asfreq('Min', 'E') == ival_Q_to_T_end + assert ival_Q.asfreq('S', 'S') == ival_Q_to_S_start + assert ival_Q.asfreq('S', 'E') == ival_Q_to_S_end + + assert ival_QEJAN.asfreq('D', 'S') == ival_QEJAN_to_D_start + assert ival_QEJAN.asfreq('D', 'E') == ival_QEJAN_to_D_end + assert ival_QEJUN.asfreq('D', 'S') == ival_QEJUN_to_D_start + assert ival_QEJUN.asfreq('D', 'E') == ival_QEJUN_to_D_end + + assert ival_Q.asfreq('Q') == ival_Q def test_conv_monthly(self): # frequency conversion tests: from Monthly Frequency @@ -170,25 +170,25 @@ def test_conv_monthly(self): ival_M_to_S_end = Period(freq='S', year=2007, month=1, day=31, hour=23, minute=59, second=59) - self.assertEqual(ival_M.asfreq('A'), ival_M_to_A) - self.assertEqual(ival_M_end_of_year.asfreq('A'), ival_M_to_A) - self.assertEqual(ival_M.asfreq('Q'), ival_M_to_Q) - self.assertEqual(ival_M_end_of_quarter.asfreq('Q'), ival_M_to_Q) - - self.assertEqual(ival_M.asfreq('W', 'S'), ival_M_to_W_start) - self.assertEqual(ival_M.asfreq('W', 'E'), ival_M_to_W_end) - self.assertEqual(ival_M.asfreq('B', 'S'), ival_M_to_B_start) - self.assertEqual(ival_M.asfreq('B', 'E'), ival_M_to_B_end) - self.assertEqual(ival_M.asfreq('D', 'S'), ival_M_to_D_start) - self.assertEqual(ival_M.asfreq('D', 'E'), ival_M_to_D_end) - self.assertEqual(ival_M.asfreq('H', 'S'), ival_M_to_H_start) - self.assertEqual(ival_M.asfreq('H', 'E'), ival_M_to_H_end) - self.assertEqual(ival_M.asfreq('Min', 'S'), ival_M_to_T_start) - self.assertEqual(ival_M.asfreq('Min', 'E'), ival_M_to_T_end) - self.assertEqual(ival_M.asfreq('S', 'S'), ival_M_to_S_start) - self.assertEqual(ival_M.asfreq('S', 'E'), ival_M_to_S_end) - - self.assertEqual(ival_M.asfreq('M'), ival_M) + assert ival_M.asfreq('A') == ival_M_to_A + assert ival_M_end_of_year.asfreq('A') == ival_M_to_A + assert ival_M.asfreq('Q') == ival_M_to_Q + assert ival_M_end_of_quarter.asfreq('Q') == ival_M_to_Q + + assert ival_M.asfreq('W', 'S') == ival_M_to_W_start + assert ival_M.asfreq('W', 'E') == ival_M_to_W_end + assert ival_M.asfreq('B', 'S') == ival_M_to_B_start + assert ival_M.asfreq('B', 'E') == ival_M_to_B_end + assert ival_M.asfreq('D', 'S') == ival_M_to_D_start + assert ival_M.asfreq('D', 'E') == ival_M_to_D_end + assert ival_M.asfreq('H', 'S') == ival_M_to_H_start + assert ival_M.asfreq('H', 'E') == ival_M_to_H_end + assert ival_M.asfreq('Min', 'S') == ival_M_to_T_start + assert ival_M.asfreq('Min', 'E') == ival_M_to_T_end + assert ival_M.asfreq('S', 'S') == ival_M_to_S_start + assert ival_M.asfreq('S', 'E') == ival_M_to_S_end + + assert ival_M.asfreq('M') == ival_M def test_conv_weekly(self): # frequency conversion tests: from Weekly Frequency @@ -254,45 +254,44 @@ def test_conv_weekly(self): ival_W_to_S_end = Period(freq='S', year=2007, month=1, day=7, hour=23, minute=59, second=59) - self.assertEqual(ival_W.asfreq('A'), ival_W_to_A) - self.assertEqual(ival_W_end_of_year.asfreq('A'), - ival_W_to_A_end_of_year) - self.assertEqual(ival_W.asfreq('Q'), ival_W_to_Q) - self.assertEqual(ival_W_end_of_quarter.asfreq('Q'), - ival_W_to_Q_end_of_quarter) - self.assertEqual(ival_W.asfreq('M'), ival_W_to_M) - self.assertEqual(ival_W_end_of_month.asfreq('M'), - ival_W_to_M_end_of_month) - - self.assertEqual(ival_W.asfreq('B', 'S'), ival_W_to_B_start) - self.assertEqual(ival_W.asfreq('B', 'E'), ival_W_to_B_end) - - self.assertEqual(ival_W.asfreq('D', 'S'), ival_W_to_D_start) - self.assertEqual(ival_W.asfreq('D', 'E'), ival_W_to_D_end) - - self.assertEqual(ival_WSUN.asfreq('D', 'S'), ival_WSUN_to_D_start) - self.assertEqual(ival_WSUN.asfreq('D', 'E'), ival_WSUN_to_D_end) - self.assertEqual(ival_WSAT.asfreq('D', 'S'), ival_WSAT_to_D_start) - self.assertEqual(ival_WSAT.asfreq('D', 'E'), ival_WSAT_to_D_end) - self.assertEqual(ival_WFRI.asfreq('D', 'S'), ival_WFRI_to_D_start) - self.assertEqual(ival_WFRI.asfreq('D', 'E'), ival_WFRI_to_D_end) - self.assertEqual(ival_WTHU.asfreq('D', 'S'), ival_WTHU_to_D_start) - self.assertEqual(ival_WTHU.asfreq('D', 'E'), ival_WTHU_to_D_end) - self.assertEqual(ival_WWED.asfreq('D', 'S'), ival_WWED_to_D_start) - self.assertEqual(ival_WWED.asfreq('D', 'E'), ival_WWED_to_D_end) - self.assertEqual(ival_WTUE.asfreq('D', 'S'), ival_WTUE_to_D_start) - self.assertEqual(ival_WTUE.asfreq('D', 'E'), ival_WTUE_to_D_end) - self.assertEqual(ival_WMON.asfreq('D', 'S'), ival_WMON_to_D_start) - self.assertEqual(ival_WMON.asfreq('D', 'E'), ival_WMON_to_D_end) - - self.assertEqual(ival_W.asfreq('H', 'S'), ival_W_to_H_start) - self.assertEqual(ival_W.asfreq('H', 'E'), ival_W_to_H_end) - self.assertEqual(ival_W.asfreq('Min', 'S'), ival_W_to_T_start) - self.assertEqual(ival_W.asfreq('Min', 'E'), ival_W_to_T_end) - self.assertEqual(ival_W.asfreq('S', 'S'), ival_W_to_S_start) - self.assertEqual(ival_W.asfreq('S', 'E'), ival_W_to_S_end) - - self.assertEqual(ival_W.asfreq('W'), ival_W) + assert ival_W.asfreq('A') == ival_W_to_A + assert ival_W_end_of_year.asfreq('A') == ival_W_to_A_end_of_year + + assert ival_W.asfreq('Q') == ival_W_to_Q + assert ival_W_end_of_quarter.asfreq('Q') == ival_W_to_Q_end_of_quarter + + assert ival_W.asfreq('M') == ival_W_to_M + assert ival_W_end_of_month.asfreq('M') == ival_W_to_M_end_of_month + + assert ival_W.asfreq('B', 'S') == ival_W_to_B_start + assert ival_W.asfreq('B', 'E') == ival_W_to_B_end + + assert ival_W.asfreq('D', 'S') == ival_W_to_D_start + assert ival_W.asfreq('D', 'E') == ival_W_to_D_end + + assert ival_WSUN.asfreq('D', 'S') == ival_WSUN_to_D_start + assert ival_WSUN.asfreq('D', 'E') == ival_WSUN_to_D_end + assert ival_WSAT.asfreq('D', 'S') == ival_WSAT_to_D_start + assert ival_WSAT.asfreq('D', 'E') == ival_WSAT_to_D_end + assert ival_WFRI.asfreq('D', 'S') == ival_WFRI_to_D_start + assert ival_WFRI.asfreq('D', 'E') == ival_WFRI_to_D_end + assert ival_WTHU.asfreq('D', 'S') == ival_WTHU_to_D_start + assert ival_WTHU.asfreq('D', 'E') == ival_WTHU_to_D_end + assert ival_WWED.asfreq('D', 'S') == ival_WWED_to_D_start + assert ival_WWED.asfreq('D', 'E') == ival_WWED_to_D_end + assert ival_WTUE.asfreq('D', 'S') == ival_WTUE_to_D_start + assert ival_WTUE.asfreq('D', 'E') == ival_WTUE_to_D_end + assert ival_WMON.asfreq('D', 'S') == ival_WMON_to_D_start + assert ival_WMON.asfreq('D', 'E') == ival_WMON_to_D_end + + assert ival_W.asfreq('H', 'S') == ival_W_to_H_start + assert ival_W.asfreq('H', 'E') == ival_W_to_H_end + assert ival_W.asfreq('Min', 'S') == ival_W_to_T_start + assert ival_W.asfreq('Min', 'E') == ival_W_to_T_end + assert ival_W.asfreq('S', 'S') == ival_W_to_S_start + assert ival_W.asfreq('S', 'E') == ival_W_to_S_end + + assert ival_W.asfreq('W') == ival_W msg = pd.tseries.frequencies._INVALID_FREQ_ERROR with tm.assert_raises_regex(ValueError, msg): @@ -342,25 +341,25 @@ def test_conv_business(self): ival_B_to_S_end = Period(freq='S', year=2007, month=1, day=1, hour=23, minute=59, second=59) - self.assertEqual(ival_B.asfreq('A'), ival_B_to_A) - self.assertEqual(ival_B_end_of_year.asfreq('A'), ival_B_to_A) - self.assertEqual(ival_B.asfreq('Q'), ival_B_to_Q) - self.assertEqual(ival_B_end_of_quarter.asfreq('Q'), ival_B_to_Q) - self.assertEqual(ival_B.asfreq('M'), ival_B_to_M) - self.assertEqual(ival_B_end_of_month.asfreq('M'), ival_B_to_M) - self.assertEqual(ival_B.asfreq('W'), ival_B_to_W) - self.assertEqual(ival_B_end_of_week.asfreq('W'), ival_B_to_W) + assert ival_B.asfreq('A') == ival_B_to_A + assert ival_B_end_of_year.asfreq('A') == ival_B_to_A + assert ival_B.asfreq('Q') == ival_B_to_Q + assert ival_B_end_of_quarter.asfreq('Q') == ival_B_to_Q + assert ival_B.asfreq('M') == ival_B_to_M + assert ival_B_end_of_month.asfreq('M') == ival_B_to_M + assert ival_B.asfreq('W') == ival_B_to_W + assert ival_B_end_of_week.asfreq('W') == ival_B_to_W - self.assertEqual(ival_B.asfreq('D'), ival_B_to_D) + assert ival_B.asfreq('D') == ival_B_to_D - self.assertEqual(ival_B.asfreq('H', 'S'), ival_B_to_H_start) - self.assertEqual(ival_B.asfreq('H', 'E'), ival_B_to_H_end) - self.assertEqual(ival_B.asfreq('Min', 'S'), ival_B_to_T_start) - self.assertEqual(ival_B.asfreq('Min', 'E'), ival_B_to_T_end) - self.assertEqual(ival_B.asfreq('S', 'S'), ival_B_to_S_start) - self.assertEqual(ival_B.asfreq('S', 'E'), ival_B_to_S_end) + assert ival_B.asfreq('H', 'S') == ival_B_to_H_start + assert ival_B.asfreq('H', 'E') == ival_B_to_H_end + assert ival_B.asfreq('Min', 'S') == ival_B_to_T_start + assert ival_B.asfreq('Min', 'E') == ival_B_to_T_end + assert ival_B.asfreq('S', 'S') == ival_B_to_S_start + assert ival_B.asfreq('S', 'E') == ival_B_to_S_end - self.assertEqual(ival_B.asfreq('B'), ival_B) + assert ival_B.asfreq('B') == ival_B def test_conv_daily(self): # frequency conversion tests: from Business Frequency" @@ -405,39 +404,36 @@ def test_conv_daily(self): ival_D_to_S_end = Period(freq='S', year=2007, month=1, day=1, hour=23, minute=59, second=59) - self.assertEqual(ival_D.asfreq('A'), ival_D_to_A) - - self.assertEqual(ival_D_end_of_quarter.asfreq('A-JAN'), - ival_Deoq_to_AJAN) - self.assertEqual(ival_D_end_of_quarter.asfreq('A-JUN'), - ival_Deoq_to_AJUN) - self.assertEqual(ival_D_end_of_quarter.asfreq('A-DEC'), - ival_Deoq_to_ADEC) - - self.assertEqual(ival_D_end_of_year.asfreq('A'), ival_D_to_A) - self.assertEqual(ival_D_end_of_quarter.asfreq('Q'), ival_D_to_QEDEC) - self.assertEqual(ival_D.asfreq("Q-JAN"), ival_D_to_QEJAN) - self.assertEqual(ival_D.asfreq("Q-JUN"), ival_D_to_QEJUN) - self.assertEqual(ival_D.asfreq("Q-DEC"), ival_D_to_QEDEC) - self.assertEqual(ival_D.asfreq('M'), ival_D_to_M) - self.assertEqual(ival_D_end_of_month.asfreq('M'), ival_D_to_M) - self.assertEqual(ival_D.asfreq('W'), ival_D_to_W) - self.assertEqual(ival_D_end_of_week.asfreq('W'), ival_D_to_W) - - self.assertEqual(ival_D_friday.asfreq('B'), ival_B_friday) - self.assertEqual(ival_D_saturday.asfreq('B', 'S'), ival_B_friday) - self.assertEqual(ival_D_saturday.asfreq('B', 'E'), ival_B_monday) - self.assertEqual(ival_D_sunday.asfreq('B', 'S'), ival_B_friday) - self.assertEqual(ival_D_sunday.asfreq('B', 'E'), ival_B_monday) - - self.assertEqual(ival_D.asfreq('H', 'S'), ival_D_to_H_start) - self.assertEqual(ival_D.asfreq('H', 'E'), ival_D_to_H_end) - self.assertEqual(ival_D.asfreq('Min', 'S'), ival_D_to_T_start) - self.assertEqual(ival_D.asfreq('Min', 'E'), ival_D_to_T_end) - self.assertEqual(ival_D.asfreq('S', 'S'), ival_D_to_S_start) - self.assertEqual(ival_D.asfreq('S', 'E'), ival_D_to_S_end) - - self.assertEqual(ival_D.asfreq('D'), ival_D) + assert ival_D.asfreq('A') == ival_D_to_A + + assert ival_D_end_of_quarter.asfreq('A-JAN') == ival_Deoq_to_AJAN + assert ival_D_end_of_quarter.asfreq('A-JUN') == ival_Deoq_to_AJUN + assert ival_D_end_of_quarter.asfreq('A-DEC') == ival_Deoq_to_ADEC + + assert ival_D_end_of_year.asfreq('A') == ival_D_to_A + assert ival_D_end_of_quarter.asfreq('Q') == ival_D_to_QEDEC + assert ival_D.asfreq("Q-JAN") == ival_D_to_QEJAN + assert ival_D.asfreq("Q-JUN") == ival_D_to_QEJUN + assert ival_D.asfreq("Q-DEC") == ival_D_to_QEDEC + assert ival_D.asfreq('M') == ival_D_to_M + assert ival_D_end_of_month.asfreq('M') == ival_D_to_M + assert ival_D.asfreq('W') == ival_D_to_W + assert ival_D_end_of_week.asfreq('W') == ival_D_to_W + + assert ival_D_friday.asfreq('B') == ival_B_friday + assert ival_D_saturday.asfreq('B', 'S') == ival_B_friday + assert ival_D_saturday.asfreq('B', 'E') == ival_B_monday + assert ival_D_sunday.asfreq('B', 'S') == ival_B_friday + assert ival_D_sunday.asfreq('B', 'E') == ival_B_monday + + assert ival_D.asfreq('H', 'S') == ival_D_to_H_start + assert ival_D.asfreq('H', 'E') == ival_D_to_H_end + assert ival_D.asfreq('Min', 'S') == ival_D_to_T_start + assert ival_D.asfreq('Min', 'E') == ival_D_to_T_end + assert ival_D.asfreq('S', 'S') == ival_D_to_S_start + assert ival_D.asfreq('S', 'E') == ival_D_to_S_end + + assert ival_D.asfreq('D') == ival_D def test_conv_hourly(self): # frequency conversion tests: from Hourly Frequency" @@ -472,25 +468,25 @@ def test_conv_hourly(self): ival_H_to_S_end = Period(freq='S', year=2007, month=1, day=1, hour=0, minute=59, second=59) - self.assertEqual(ival_H.asfreq('A'), ival_H_to_A) - self.assertEqual(ival_H_end_of_year.asfreq('A'), ival_H_to_A) - self.assertEqual(ival_H.asfreq('Q'), ival_H_to_Q) - self.assertEqual(ival_H_end_of_quarter.asfreq('Q'), ival_H_to_Q) - self.assertEqual(ival_H.asfreq('M'), ival_H_to_M) - self.assertEqual(ival_H_end_of_month.asfreq('M'), ival_H_to_M) - self.assertEqual(ival_H.asfreq('W'), ival_H_to_W) - self.assertEqual(ival_H_end_of_week.asfreq('W'), ival_H_to_W) - self.assertEqual(ival_H.asfreq('D'), ival_H_to_D) - self.assertEqual(ival_H_end_of_day.asfreq('D'), ival_H_to_D) - self.assertEqual(ival_H.asfreq('B'), ival_H_to_B) - self.assertEqual(ival_H_end_of_bus.asfreq('B'), ival_H_to_B) - - self.assertEqual(ival_H.asfreq('Min', 'S'), ival_H_to_T_start) - self.assertEqual(ival_H.asfreq('Min', 'E'), ival_H_to_T_end) - self.assertEqual(ival_H.asfreq('S', 'S'), ival_H_to_S_start) - self.assertEqual(ival_H.asfreq('S', 'E'), ival_H_to_S_end) - - self.assertEqual(ival_H.asfreq('H'), ival_H) + assert ival_H.asfreq('A') == ival_H_to_A + assert ival_H_end_of_year.asfreq('A') == ival_H_to_A + assert ival_H.asfreq('Q') == ival_H_to_Q + assert ival_H_end_of_quarter.asfreq('Q') == ival_H_to_Q + assert ival_H.asfreq('M') == ival_H_to_M + assert ival_H_end_of_month.asfreq('M') == ival_H_to_M + assert ival_H.asfreq('W') == ival_H_to_W + assert ival_H_end_of_week.asfreq('W') == ival_H_to_W + assert ival_H.asfreq('D') == ival_H_to_D + assert ival_H_end_of_day.asfreq('D') == ival_H_to_D + assert ival_H.asfreq('B') == ival_H_to_B + assert ival_H_end_of_bus.asfreq('B') == ival_H_to_B + + assert ival_H.asfreq('Min', 'S') == ival_H_to_T_start + assert ival_H.asfreq('Min', 'E') == ival_H_to_T_end + assert ival_H.asfreq('S', 'S') == ival_H_to_S_start + assert ival_H.asfreq('S', 'E') == ival_H_to_S_end + + assert ival_H.asfreq('H') == ival_H def test_conv_minutely(self): # frequency conversion tests: from Minutely Frequency" @@ -525,25 +521,25 @@ def test_conv_minutely(self): ival_T_to_S_end = Period(freq='S', year=2007, month=1, day=1, hour=0, minute=0, second=59) - self.assertEqual(ival_T.asfreq('A'), ival_T_to_A) - self.assertEqual(ival_T_end_of_year.asfreq('A'), ival_T_to_A) - self.assertEqual(ival_T.asfreq('Q'), ival_T_to_Q) - self.assertEqual(ival_T_end_of_quarter.asfreq('Q'), ival_T_to_Q) - self.assertEqual(ival_T.asfreq('M'), ival_T_to_M) - self.assertEqual(ival_T_end_of_month.asfreq('M'), ival_T_to_M) - self.assertEqual(ival_T.asfreq('W'), ival_T_to_W) - self.assertEqual(ival_T_end_of_week.asfreq('W'), ival_T_to_W) - self.assertEqual(ival_T.asfreq('D'), ival_T_to_D) - self.assertEqual(ival_T_end_of_day.asfreq('D'), ival_T_to_D) - self.assertEqual(ival_T.asfreq('B'), ival_T_to_B) - self.assertEqual(ival_T_end_of_bus.asfreq('B'), ival_T_to_B) - self.assertEqual(ival_T.asfreq('H'), ival_T_to_H) - self.assertEqual(ival_T_end_of_hour.asfreq('H'), ival_T_to_H) - - self.assertEqual(ival_T.asfreq('S', 'S'), ival_T_to_S_start) - self.assertEqual(ival_T.asfreq('S', 'E'), ival_T_to_S_end) - - self.assertEqual(ival_T.asfreq('Min'), ival_T) + assert ival_T.asfreq('A') == ival_T_to_A + assert ival_T_end_of_year.asfreq('A') == ival_T_to_A + assert ival_T.asfreq('Q') == ival_T_to_Q + assert ival_T_end_of_quarter.asfreq('Q') == ival_T_to_Q + assert ival_T.asfreq('M') == ival_T_to_M + assert ival_T_end_of_month.asfreq('M') == ival_T_to_M + assert ival_T.asfreq('W') == ival_T_to_W + assert ival_T_end_of_week.asfreq('W') == ival_T_to_W + assert ival_T.asfreq('D') == ival_T_to_D + assert ival_T_end_of_day.asfreq('D') == ival_T_to_D + assert ival_T.asfreq('B') == ival_T_to_B + assert ival_T_end_of_bus.asfreq('B') == ival_T_to_B + assert ival_T.asfreq('H') == ival_T_to_H + assert ival_T_end_of_hour.asfreq('H') == ival_T_to_H + + assert ival_T.asfreq('S', 'S') == ival_T_to_S_start + assert ival_T.asfreq('S', 'E') == ival_T_to_S_end + + assert ival_T.asfreq('Min') == ival_T def test_conv_secondly(self): # frequency conversion tests: from Secondly Frequency" @@ -577,24 +573,24 @@ def test_conv_secondly(self): ival_S_to_T = Period(freq='Min', year=2007, month=1, day=1, hour=0, minute=0) - self.assertEqual(ival_S.asfreq('A'), ival_S_to_A) - self.assertEqual(ival_S_end_of_year.asfreq('A'), ival_S_to_A) - self.assertEqual(ival_S.asfreq('Q'), ival_S_to_Q) - self.assertEqual(ival_S_end_of_quarter.asfreq('Q'), ival_S_to_Q) - self.assertEqual(ival_S.asfreq('M'), ival_S_to_M) - self.assertEqual(ival_S_end_of_month.asfreq('M'), ival_S_to_M) - self.assertEqual(ival_S.asfreq('W'), ival_S_to_W) - self.assertEqual(ival_S_end_of_week.asfreq('W'), ival_S_to_W) - self.assertEqual(ival_S.asfreq('D'), ival_S_to_D) - self.assertEqual(ival_S_end_of_day.asfreq('D'), ival_S_to_D) - self.assertEqual(ival_S.asfreq('B'), ival_S_to_B) - self.assertEqual(ival_S_end_of_bus.asfreq('B'), ival_S_to_B) - self.assertEqual(ival_S.asfreq('H'), ival_S_to_H) - self.assertEqual(ival_S_end_of_hour.asfreq('H'), ival_S_to_H) - self.assertEqual(ival_S.asfreq('Min'), ival_S_to_T) - self.assertEqual(ival_S_end_of_minute.asfreq('Min'), ival_S_to_T) - - self.assertEqual(ival_S.asfreq('S'), ival_S) + assert ival_S.asfreq('A') == ival_S_to_A + assert ival_S_end_of_year.asfreq('A') == ival_S_to_A + assert ival_S.asfreq('Q') == ival_S_to_Q + assert ival_S_end_of_quarter.asfreq('Q') == ival_S_to_Q + assert ival_S.asfreq('M') == ival_S_to_M + assert ival_S_end_of_month.asfreq('M') == ival_S_to_M + assert ival_S.asfreq('W') == ival_S_to_W + assert ival_S_end_of_week.asfreq('W') == ival_S_to_W + assert ival_S.asfreq('D') == ival_S_to_D + assert ival_S_end_of_day.asfreq('D') == ival_S_to_D + assert ival_S.asfreq('B') == ival_S_to_B + assert ival_S_end_of_bus.asfreq('B') == ival_S_to_B + assert ival_S.asfreq('H') == ival_S_to_H + assert ival_S_end_of_hour.asfreq('H') == ival_S_to_H + assert ival_S.asfreq('Min') == ival_S_to_T + assert ival_S_end_of_minute.asfreq('Min') == ival_S_to_T + + assert ival_S.asfreq('S') == ival_S def test_asfreq_mult(self): # normal freq to mult freq @@ -604,17 +600,17 @@ def test_asfreq_mult(self): result = p.asfreq(freq) expected = Period('2007', freq='3A') - self.assertEqual(result, expected) - self.assertEqual(result.ordinal, expected.ordinal) - self.assertEqual(result.freq, expected.freq) + assert result == expected + assert result.ordinal == expected.ordinal + assert result.freq == expected.freq # ordinal will not change for freq in ['3A', offsets.YearEnd(3)]: result = p.asfreq(freq, how='S') expected = Period('2007', freq='3A') - self.assertEqual(result, expected) - self.assertEqual(result.ordinal, expected.ordinal) - self.assertEqual(result.freq, expected.freq) + assert result == expected + assert result.ordinal == expected.ordinal + assert result.freq == expected.freq # mult freq to normal freq p = Period(freq='3A', year=2007) @@ -623,49 +619,49 @@ def test_asfreq_mult(self): result = p.asfreq(freq) expected = Period('2009', freq='A') - self.assertEqual(result, expected) - self.assertEqual(result.ordinal, expected.ordinal) - self.assertEqual(result.freq, expected.freq) + assert result == expected + assert result.ordinal == expected.ordinal + assert result.freq == expected.freq # ordinal will not change for freq in ['A', offsets.YearEnd()]: result = p.asfreq(freq, how='S') expected = Period('2007', freq='A') - self.assertEqual(result, expected) - self.assertEqual(result.ordinal, expected.ordinal) - self.assertEqual(result.freq, expected.freq) + assert result == expected + assert result.ordinal == expected.ordinal + assert result.freq == expected.freq p = Period(freq='A', year=2007) for freq in ['2M', offsets.MonthEnd(2)]: result = p.asfreq(freq) expected = Period('2007-12', freq='2M') - self.assertEqual(result, expected) - self.assertEqual(result.ordinal, expected.ordinal) - self.assertEqual(result.freq, expected.freq) + assert result == expected + assert result.ordinal == expected.ordinal + assert result.freq == expected.freq for freq in ['2M', offsets.MonthEnd(2)]: result = p.asfreq(freq, how='S') expected = Period('2007-01', freq='2M') - self.assertEqual(result, expected) - self.assertEqual(result.ordinal, expected.ordinal) - self.assertEqual(result.freq, expected.freq) + assert result == expected + assert result.ordinal == expected.ordinal + assert result.freq == expected.freq p = Period(freq='3A', year=2007) for freq in ['2M', offsets.MonthEnd(2)]: result = p.asfreq(freq) expected = Period('2009-12', freq='2M') - self.assertEqual(result, expected) - self.assertEqual(result.ordinal, expected.ordinal) - self.assertEqual(result.freq, expected.freq) + assert result == expected + assert result.ordinal == expected.ordinal + assert result.freq == expected.freq for freq in ['2M', offsets.MonthEnd(2)]: result = p.asfreq(freq, how='S') expected = Period('2007-01', freq='2M') - self.assertEqual(result, expected) - self.assertEqual(result.ordinal, expected.ordinal) - self.assertEqual(result.freq, expected.freq) + assert result == expected + assert result.ordinal == expected.ordinal + assert result.freq == expected.freq def test_asfreq_combined(self): # normal freq to combined freq @@ -675,9 +671,9 @@ def test_asfreq_combined(self): expected = Period('2007', freq='25H') for freq, how in zip(['1D1H', '1H1D'], ['E', 'S']): result = p.asfreq(freq, how=how) - self.assertEqual(result, expected) - self.assertEqual(result.ordinal, expected.ordinal) - self.assertEqual(result.freq, expected.freq) + assert result == expected + assert result.ordinal == expected.ordinal + assert result.freq == expected.freq # combined freq to normal freq p1 = Period(freq='1D1H', year=2007) @@ -687,29 +683,28 @@ def test_asfreq_combined(self): result1 = p1.asfreq('H') result2 = p2.asfreq('H') expected = Period('2007-01-02', freq='H') - self.assertEqual(result1, expected) - self.assertEqual(result1.ordinal, expected.ordinal) - self.assertEqual(result1.freq, expected.freq) - self.assertEqual(result2, expected) - self.assertEqual(result2.ordinal, expected.ordinal) - self.assertEqual(result2.freq, expected.freq) + assert result1 == expected + assert result1.ordinal == expected.ordinal + assert result1.freq == expected.freq + assert result2 == expected + assert result2.ordinal == expected.ordinal + assert result2.freq == expected.freq # ordinal will not change result1 = p1.asfreq('H', how='S') result2 = p2.asfreq('H', how='S') expected = Period('2007-01-01', freq='H') - self.assertEqual(result1, expected) - self.assertEqual(result1.ordinal, expected.ordinal) - self.assertEqual(result1.freq, expected.freq) - self.assertEqual(result2, expected) - self.assertEqual(result2.ordinal, expected.ordinal) - self.assertEqual(result2.freq, expected.freq) + assert result1 == expected + assert result1.ordinal == expected.ordinal + assert result1.freq == expected.freq + assert result2 == expected + assert result2.ordinal == expected.ordinal + assert result2.freq == expected.freq def test_asfreq_MS(self): initial = Period("2013") - self.assertEqual(initial.asfreq(freq="M", how="S"), - Period('2013-01', 'M')) + assert initial.asfreq(freq="M", how="S") == Period('2013-01', 'M') msg = pd.tseries.frequencies._INVALID_FREQ_ERROR with tm.assert_raises_regex(ValueError, msg): diff --git a/pandas/tests/scalar/test_timedelta.py b/pandas/tests/scalar/test_timedelta.py index 9efd180afc2da..faddbcc84109f 100644 --- a/pandas/tests/scalar/test_timedelta.py +++ b/pandas/tests/scalar/test_timedelta.py @@ -21,22 +21,20 @@ def setUp(self): def test_construction(self): expected = np.timedelta64(10, 'D').astype('m8[ns]').view('i8') - self.assertEqual(Timedelta(10, unit='d').value, expected) - self.assertEqual(Timedelta(10.0, unit='d').value, expected) - self.assertEqual(Timedelta('10 days').value, expected) - self.assertEqual(Timedelta(days=10).value, expected) - self.assertEqual(Timedelta(days=10.0).value, expected) + assert Timedelta(10, unit='d').value == expected + assert Timedelta(10.0, unit='d').value == expected + assert Timedelta('10 days').value == expected + assert Timedelta(days=10).value == expected + assert Timedelta(days=10.0).value == expected expected += np.timedelta64(10, 's').astype('m8[ns]').view('i8') - self.assertEqual(Timedelta('10 days 00:00:10').value, expected) - self.assertEqual(Timedelta(days=10, seconds=10).value, expected) - self.assertEqual( - Timedelta(days=10, milliseconds=10 * 1000).value, expected) - self.assertEqual( - Timedelta(days=10, microseconds=10 * 1000 * 1000).value, expected) - - # test construction with np dtypes - # GH 8757 + assert Timedelta('10 days 00:00:10').value == expected + assert Timedelta(days=10, seconds=10).value == expected + assert Timedelta(days=10, milliseconds=10 * 1000).value == expected + assert (Timedelta(days=10, microseconds=10 * 1000 * 1000) + .value == expected) + + # gh-8757: test construction with np dtypes timedelta_kwargs = {'days': 'D', 'seconds': 's', 'microseconds': 'us', @@ -48,70 +46,64 @@ def test_construction(self): np.float16] for npdtype in npdtypes: for pykwarg, npkwarg in timedelta_kwargs.items(): - expected = np.timedelta64(1, - npkwarg).astype('m8[ns]').view('i8') - self.assertEqual( - Timedelta(**{pykwarg: npdtype(1)}).value, expected) + expected = np.timedelta64(1, npkwarg).astype( + 'm8[ns]').view('i8') + assert Timedelta(**{pykwarg: npdtype(1)}).value == expected # rounding cases - self.assertEqual(Timedelta(82739999850000).value, 82739999850000) + assert Timedelta(82739999850000).value == 82739999850000 assert ('0 days 22:58:59.999850' in str(Timedelta(82739999850000))) - self.assertEqual(Timedelta(123072001000000).value, 123072001000000) + assert Timedelta(123072001000000).value == 123072001000000 assert ('1 days 10:11:12.001' in str(Timedelta(123072001000000))) # string conversion with/without leading zero # GH 9570 - self.assertEqual(Timedelta('0:00:00'), timedelta(hours=0)) - self.assertEqual(Timedelta('00:00:00'), timedelta(hours=0)) - self.assertEqual(Timedelta('-1:00:00'), -timedelta(hours=1)) - self.assertEqual(Timedelta('-01:00:00'), -timedelta(hours=1)) + assert Timedelta('0:00:00') == timedelta(hours=0) + assert Timedelta('00:00:00') == timedelta(hours=0) + assert Timedelta('-1:00:00') == -timedelta(hours=1) + assert Timedelta('-01:00:00') == -timedelta(hours=1) # more strings & abbrevs # GH 8190 - self.assertEqual(Timedelta('1 h'), timedelta(hours=1)) - self.assertEqual(Timedelta('1 hour'), timedelta(hours=1)) - self.assertEqual(Timedelta('1 hr'), timedelta(hours=1)) - self.assertEqual(Timedelta('1 hours'), timedelta(hours=1)) - self.assertEqual(Timedelta('-1 hours'), -timedelta(hours=1)) - self.assertEqual(Timedelta('1 m'), timedelta(minutes=1)) - self.assertEqual(Timedelta('1.5 m'), timedelta(seconds=90)) - self.assertEqual(Timedelta('1 minute'), timedelta(minutes=1)) - self.assertEqual(Timedelta('1 minutes'), timedelta(minutes=1)) - self.assertEqual(Timedelta('1 s'), timedelta(seconds=1)) - self.assertEqual(Timedelta('1 second'), timedelta(seconds=1)) - self.assertEqual(Timedelta('1 seconds'), timedelta(seconds=1)) - self.assertEqual(Timedelta('1 ms'), timedelta(milliseconds=1)) - self.assertEqual(Timedelta('1 milli'), timedelta(milliseconds=1)) - self.assertEqual(Timedelta('1 millisecond'), timedelta(milliseconds=1)) - self.assertEqual(Timedelta('1 us'), timedelta(microseconds=1)) - self.assertEqual(Timedelta('1 micros'), timedelta(microseconds=1)) - self.assertEqual(Timedelta('1 microsecond'), timedelta(microseconds=1)) - self.assertEqual(Timedelta('1.5 microsecond'), - Timedelta('00:00:00.000001500')) - self.assertEqual(Timedelta('1 ns'), Timedelta('00:00:00.000000001')) - self.assertEqual(Timedelta('1 nano'), Timedelta('00:00:00.000000001')) - self.assertEqual(Timedelta('1 nanosecond'), - Timedelta('00:00:00.000000001')) + assert Timedelta('1 h') == timedelta(hours=1) + assert Timedelta('1 hour') == timedelta(hours=1) + assert Timedelta('1 hr') == timedelta(hours=1) + assert Timedelta('1 hours') == timedelta(hours=1) + assert Timedelta('-1 hours') == -timedelta(hours=1) + assert Timedelta('1 m') == timedelta(minutes=1) + assert Timedelta('1.5 m') == timedelta(seconds=90) + assert Timedelta('1 minute') == timedelta(minutes=1) + assert Timedelta('1 minutes') == timedelta(minutes=1) + assert Timedelta('1 s') == timedelta(seconds=1) + assert Timedelta('1 second') == timedelta(seconds=1) + assert Timedelta('1 seconds') == timedelta(seconds=1) + assert Timedelta('1 ms') == timedelta(milliseconds=1) + assert Timedelta('1 milli') == timedelta(milliseconds=1) + assert Timedelta('1 millisecond') == timedelta(milliseconds=1) + assert Timedelta('1 us') == timedelta(microseconds=1) + assert Timedelta('1 micros') == timedelta(microseconds=1) + assert Timedelta('1 microsecond') == timedelta(microseconds=1) + assert Timedelta('1.5 microsecond') == Timedelta('00:00:00.000001500') + assert Timedelta('1 ns') == Timedelta('00:00:00.000000001') + assert Timedelta('1 nano') == Timedelta('00:00:00.000000001') + assert Timedelta('1 nanosecond') == Timedelta('00:00:00.000000001') # combos - self.assertEqual(Timedelta('10 days 1 hour'), - timedelta(days=10, hours=1)) - self.assertEqual(Timedelta('10 days 1 h'), timedelta(days=10, hours=1)) - self.assertEqual(Timedelta('10 days 1 h 1m 1s'), timedelta( - days=10, hours=1, minutes=1, seconds=1)) - self.assertEqual(Timedelta('-10 days 1 h 1m 1s'), - - timedelta(days=10, hours=1, minutes=1, seconds=1)) - self.assertEqual(Timedelta('-10 days 1 h 1m 1s'), - - timedelta(days=10, hours=1, minutes=1, seconds=1)) - self.assertEqual(Timedelta('-10 days 1 h 1m 1s 3us'), - - timedelta(days=10, hours=1, minutes=1, - seconds=1, microseconds=3)) - self.assertEqual(Timedelta('-10 days 1 h 1.5m 1s 3us'), - - timedelta(days=10, hours=1, minutes=1, - seconds=31, microseconds=3)) - - # currently invalid as it has a - on the hhmmdd part (only allowed on - # the days) + assert Timedelta('10 days 1 hour') == timedelta(days=10, hours=1) + assert Timedelta('10 days 1 h') == timedelta(days=10, hours=1) + assert Timedelta('10 days 1 h 1m 1s') == timedelta( + days=10, hours=1, minutes=1, seconds=1) + assert Timedelta('-10 days 1 h 1m 1s') == -timedelta( + days=10, hours=1, minutes=1, seconds=1) + assert Timedelta('-10 days 1 h 1m 1s') == -timedelta( + days=10, hours=1, minutes=1, seconds=1) + assert Timedelta('-10 days 1 h 1m 1s 3us') == -timedelta( + days=10, hours=1, minutes=1, seconds=1, microseconds=3) + assert Timedelta('-10 days 1 h 1.5m 1s 3us'), -timedelta( + days=10, hours=1, minutes=1, seconds=31, microseconds=3) + + # Currently invalid as it has a - on the hh:mm:dd part + # (only allowed on the days) pytest.raises(ValueError, lambda: Timedelta('-10 days -1 h 1.5m 1s 3us')) @@ -139,34 +131,33 @@ def test_construction(self): '1ns', '-23:59:59.999999999']: td = Timedelta(v) - self.assertEqual(Timedelta(td.value), td) + assert Timedelta(td.value) == td # str does not normally display nanos if not td.nanoseconds: - self.assertEqual(Timedelta(str(td)), td) - self.assertEqual(Timedelta(td._repr_base(format='all')), td) + assert Timedelta(str(td)) == td + assert Timedelta(td._repr_base(format='all')) == td # floats expected = np.timedelta64( 10, 's').astype('m8[ns]').view('i8') + np.timedelta64( 500, 'ms').astype('m8[ns]').view('i8') - self.assertEqual(Timedelta(10.5, unit='s').value, expected) + assert Timedelta(10.5, unit='s').value == expected # offset - self.assertEqual(to_timedelta(pd.offsets.Hour(2)), - Timedelta('0 days, 02:00:00')) - self.assertEqual(Timedelta(pd.offsets.Hour(2)), - Timedelta('0 days, 02:00:00')) - self.assertEqual(Timedelta(pd.offsets.Second(2)), - Timedelta('0 days, 00:00:02')) - - # unicode - # GH 11995 + assert (to_timedelta(pd.offsets.Hour(2)) == + Timedelta('0 days, 02:00:00')) + assert (Timedelta(pd.offsets.Hour(2)) == + Timedelta('0 days, 02:00:00')) + assert (Timedelta(pd.offsets.Second(2)) == + Timedelta('0 days, 00:00:02')) + + # gh-11995: unicode expected = Timedelta('1H') result = pd.Timedelta(u'1H') - self.assertEqual(result, expected) - self.assertEqual(to_timedelta(pd.offsets.Hour(2)), - Timedelta(u'0 days, 02:00:00')) + assert result == expected + assert (to_timedelta(pd.offsets.Hour(2)) == + Timedelta(u'0 days, 02:00:00')) pytest.raises(ValueError, lambda: Timedelta(u'foo bar')) @@ -176,7 +167,7 @@ def test_overflow_on_construction(self): pytest.raises(OverflowError, pd.Timedelta, value) def test_total_seconds_scalar(self): - # GH 10939 + # see gh-10939 rng = Timedelta('1 days, 10:11:12.100123456') expt = 1 * 86400 + 10 * 3600 + 11 * 60 + 12 + 100123456. / 1e9 tm.assert_almost_equal(rng.total_seconds(), expt) @@ -186,14 +177,14 @@ def test_total_seconds_scalar(self): def test_repr(self): - self.assertEqual(repr(Timedelta(10, unit='d')), - "Timedelta('10 days 00:00:00')") - self.assertEqual(repr(Timedelta(10, unit='s')), - "Timedelta('0 days 00:00:10')") - self.assertEqual(repr(Timedelta(10, unit='ms')), - "Timedelta('0 days 00:00:00.010000')") - self.assertEqual(repr(Timedelta(-10, unit='ms')), - "Timedelta('-1 days +23:59:59.990000')") + assert (repr(Timedelta(10, unit='d')) == + "Timedelta('10 days 00:00:00')") + assert (repr(Timedelta(10, unit='s')) == + "Timedelta('0 days 00:00:10')") + assert (repr(Timedelta(10, unit='ms')) == + "Timedelta('0 days 00:00:00.010000')") + assert (repr(Timedelta(-10, unit='ms')) == + "Timedelta('-1 days +23:59:59.990000')") def test_conversion(self): @@ -201,14 +192,16 @@ def test_conversion(self): Timedelta('1 days, 10:11:12.012345')]: pydt = td.to_pytimedelta() assert td == Timedelta(pydt) - self.assertEqual(td, pydt) + assert td == pydt assert (isinstance(pydt, timedelta) and not isinstance( pydt, Timedelta)) - self.assertEqual(td, np.timedelta64(td.value, 'ns')) + assert td == np.timedelta64(td.value, 'ns') td64 = td.to_timedelta64() - self.assertEqual(td64, np.timedelta64(td.value, 'ns')) - self.assertEqual(td, td64) + + assert td64 == np.timedelta64(td.value, 'ns') + assert td == td64 + assert isinstance(td64, np.timedelta64) # this is NOT equal and cannot be roundtriped (because of the nanos) @@ -220,20 +213,20 @@ def test_freq_conversion(self): # truediv td = Timedelta('1 days 2 hours 3 ns') result = td / np.timedelta64(1, 'D') - self.assertEqual(result, td.value / float(86400 * 1e9)) + assert result == td.value / float(86400 * 1e9) result = td / np.timedelta64(1, 's') - self.assertEqual(result, td.value / float(1e9)) + assert result == td.value / float(1e9) result = td / np.timedelta64(1, 'ns') - self.assertEqual(result, td.value) + assert result == td.value # floordiv td = Timedelta('1 days 2 hours 3 ns') result = td // np.timedelta64(1, 'D') - self.assertEqual(result, 1) + assert result == 1 result = td // np.timedelta64(1, 's') - self.assertEqual(result, 93600) + assert result == 93600 result = td // np.timedelta64(1, 'ns') - self.assertEqual(result, td.value) + assert result == td.value def test_fields(self): def check(value): @@ -242,10 +235,10 @@ def check(value): # compat to datetime.timedelta rng = to_timedelta('1 days, 10:11:12') - self.assertEqual(rng.days, 1) - self.assertEqual(rng.seconds, 10 * 3600 + 11 * 60 + 12) - self.assertEqual(rng.microseconds, 0) - self.assertEqual(rng.nanoseconds, 0) + assert rng.days == 1 + assert rng.seconds == 10 * 3600 + 11 * 60 + 12 + assert rng.microseconds == 0 + assert rng.nanoseconds == 0 pytest.raises(AttributeError, lambda: rng.hours) pytest.raises(AttributeError, lambda: rng.minutes) @@ -258,30 +251,30 @@ def check(value): check(rng.nanoseconds) td = Timedelta('-1 days, 10:11:12') - self.assertEqual(abs(td), Timedelta('13:48:48')) + assert abs(td) == Timedelta('13:48:48') assert str(td) == "-1 days +10:11:12" - self.assertEqual(-td, Timedelta('0 days 13:48:48')) - self.assertEqual(-Timedelta('-1 days, 10:11:12').value, 49728000000000) - self.assertEqual(Timedelta('-1 days, 10:11:12').value, -49728000000000) + assert -td == Timedelta('0 days 13:48:48') + assert -Timedelta('-1 days, 10:11:12').value == 49728000000000 + assert Timedelta('-1 days, 10:11:12').value == -49728000000000 rng = to_timedelta('-1 days, 10:11:12.100123456') - self.assertEqual(rng.days, -1) - self.assertEqual(rng.seconds, 10 * 3600 + 11 * 60 + 12) - self.assertEqual(rng.microseconds, 100 * 1000 + 123) - self.assertEqual(rng.nanoseconds, 456) + assert rng.days == -1 + assert rng.seconds == 10 * 3600 + 11 * 60 + 12 + assert rng.microseconds == 100 * 1000 + 123 + assert rng.nanoseconds == 456 pytest.raises(AttributeError, lambda: rng.hours) pytest.raises(AttributeError, lambda: rng.minutes) pytest.raises(AttributeError, lambda: rng.milliseconds) # components tup = pd.to_timedelta(-1, 'us').components - self.assertEqual(tup.days, -1) - self.assertEqual(tup.hours, 23) - self.assertEqual(tup.minutes, 59) - self.assertEqual(tup.seconds, 59) - self.assertEqual(tup.milliseconds, 999) - self.assertEqual(tup.microseconds, 999) - self.assertEqual(tup.nanoseconds, 0) + assert tup.days == -1 + assert tup.hours == 23 + assert tup.minutes == 59 + assert tup.seconds == 59 + assert tup.milliseconds == 999 + assert tup.microseconds == 999 + assert tup.nanoseconds == 0 # GH 10050 check(tup.days) @@ -293,19 +286,17 @@ def check(value): check(tup.nanoseconds) tup = Timedelta('-1 days 1 us').components - self.assertEqual(tup.days, -2) - self.assertEqual(tup.hours, 23) - self.assertEqual(tup.minutes, 59) - self.assertEqual(tup.seconds, 59) - self.assertEqual(tup.milliseconds, 999) - self.assertEqual(tup.microseconds, 999) - self.assertEqual(tup.nanoseconds, 0) + assert tup.days == -2 + assert tup.hours == 23 + assert tup.minutes == 59 + assert tup.seconds == 59 + assert tup.milliseconds == 999 + assert tup.microseconds == 999 + assert tup.nanoseconds == 0 def test_nat_converters(self): - self.assertEqual(to_timedelta( - 'nat', box=False).astype('int64'), iNaT) - self.assertEqual(to_timedelta( - 'nan', box=False).astype('int64'), iNaT) + assert to_timedelta('nat', box=False).astype('int64') == iNaT + assert to_timedelta('nan', box=False).astype('int64') == iNaT def testit(unit, transform): @@ -319,7 +310,7 @@ def testit(unit, transform): result = to_timedelta(2, unit=unit) expected = Timedelta(np.timedelta64(2, transform(unit)).astype( 'timedelta64[ns]')) - self.assertEqual(result, expected) + assert result == expected # validate all units # GH 6855 @@ -340,27 +331,22 @@ def testit(unit, transform): testit('L', lambda x: 'ms') def test_numeric_conversions(self): - self.assertEqual(ct(0), np.timedelta64(0, 'ns')) - self.assertEqual(ct(10), np.timedelta64(10, 'ns')) - self.assertEqual(ct(10, unit='ns'), np.timedelta64( - 10, 'ns').astype('m8[ns]')) - - self.assertEqual(ct(10, unit='us'), np.timedelta64( - 10, 'us').astype('m8[ns]')) - self.assertEqual(ct(10, unit='ms'), np.timedelta64( - 10, 'ms').astype('m8[ns]')) - self.assertEqual(ct(10, unit='s'), np.timedelta64( - 10, 's').astype('m8[ns]')) - self.assertEqual(ct(10, unit='d'), np.timedelta64( - 10, 'D').astype('m8[ns]')) + assert ct(0) == np.timedelta64(0, 'ns') + assert ct(10) == np.timedelta64(10, 'ns') + assert ct(10, unit='ns') == np.timedelta64(10, 'ns').astype('m8[ns]') + + assert ct(10, unit='us') == np.timedelta64(10, 'us').astype('m8[ns]') + assert ct(10, unit='ms') == np.timedelta64(10, 'ms').astype('m8[ns]') + assert ct(10, unit='s') == np.timedelta64(10, 's').astype('m8[ns]') + assert ct(10, unit='d') == np.timedelta64(10, 'D').astype('m8[ns]') def test_timedelta_conversions(self): - self.assertEqual(ct(timedelta(seconds=1)), - np.timedelta64(1, 's').astype('m8[ns]')) - self.assertEqual(ct(timedelta(microseconds=1)), - np.timedelta64(1, 'us').astype('m8[ns]')) - self.assertEqual(ct(timedelta(days=1)), - np.timedelta64(1, 'D').astype('m8[ns]')) + assert (ct(timedelta(seconds=1)) == + np.timedelta64(1, 's').astype('m8[ns]')) + assert (ct(timedelta(microseconds=1)) == + np.timedelta64(1, 'us').astype('m8[ns]')) + assert (ct(timedelta(days=1)) == + np.timedelta64(1, 'D').astype('m8[ns]')) def test_round(self): @@ -387,9 +373,9 @@ def test_round(self): ('d', Timedelta('1 days'), Timedelta('-1 days'))]: r1 = t1.round(freq) - self.assertEqual(r1, s1) + assert r1 == s1 r2 = t2.round(freq) - self.assertEqual(r2, s2) + assert r2 == s2 # invalid for freq in ['Y', 'M', 'foobar']: @@ -465,43 +451,43 @@ def test_short_format_converters(self): def conv(v): return v.astype('m8[ns]') - self.assertEqual(ct('10'), np.timedelta64(10, 'ns')) - self.assertEqual(ct('10ns'), np.timedelta64(10, 'ns')) - self.assertEqual(ct('100'), np.timedelta64(100, 'ns')) - self.assertEqual(ct('100ns'), np.timedelta64(100, 'ns')) - - self.assertEqual(ct('1000'), np.timedelta64(1000, 'ns')) - self.assertEqual(ct('1000ns'), np.timedelta64(1000, 'ns')) - self.assertEqual(ct('1000NS'), np.timedelta64(1000, 'ns')) - - self.assertEqual(ct('10us'), np.timedelta64(10000, 'ns')) - self.assertEqual(ct('100us'), np.timedelta64(100000, 'ns')) - self.assertEqual(ct('1000us'), np.timedelta64(1000000, 'ns')) - self.assertEqual(ct('1000Us'), np.timedelta64(1000000, 'ns')) - self.assertEqual(ct('1000uS'), np.timedelta64(1000000, 'ns')) - - self.assertEqual(ct('1ms'), np.timedelta64(1000000, 'ns')) - self.assertEqual(ct('10ms'), np.timedelta64(10000000, 'ns')) - self.assertEqual(ct('100ms'), np.timedelta64(100000000, 'ns')) - self.assertEqual(ct('1000ms'), np.timedelta64(1000000000, 'ns')) - - self.assertEqual(ct('-1s'), -np.timedelta64(1000000000, 'ns')) - self.assertEqual(ct('1s'), np.timedelta64(1000000000, 'ns')) - self.assertEqual(ct('10s'), np.timedelta64(10000000000, 'ns')) - self.assertEqual(ct('100s'), np.timedelta64(100000000000, 'ns')) - self.assertEqual(ct('1000s'), np.timedelta64(1000000000000, 'ns')) - - self.assertEqual(ct('1d'), conv(np.timedelta64(1, 'D'))) - self.assertEqual(ct('-1d'), -conv(np.timedelta64(1, 'D'))) - self.assertEqual(ct('1D'), conv(np.timedelta64(1, 'D'))) - self.assertEqual(ct('10D'), conv(np.timedelta64(10, 'D'))) - self.assertEqual(ct('100D'), conv(np.timedelta64(100, 'D'))) - self.assertEqual(ct('1000D'), conv(np.timedelta64(1000, 'D'))) - self.assertEqual(ct('10000D'), conv(np.timedelta64(10000, 'D'))) + assert ct('10') == np.timedelta64(10, 'ns') + assert ct('10ns') == np.timedelta64(10, 'ns') + assert ct('100') == np.timedelta64(100, 'ns') + assert ct('100ns') == np.timedelta64(100, 'ns') + + assert ct('1000') == np.timedelta64(1000, 'ns') + assert ct('1000ns') == np.timedelta64(1000, 'ns') + assert ct('1000NS') == np.timedelta64(1000, 'ns') + + assert ct('10us') == np.timedelta64(10000, 'ns') + assert ct('100us') == np.timedelta64(100000, 'ns') + assert ct('1000us') == np.timedelta64(1000000, 'ns') + assert ct('1000Us') == np.timedelta64(1000000, 'ns') + assert ct('1000uS') == np.timedelta64(1000000, 'ns') + + assert ct('1ms') == np.timedelta64(1000000, 'ns') + assert ct('10ms') == np.timedelta64(10000000, 'ns') + assert ct('100ms') == np.timedelta64(100000000, 'ns') + assert ct('1000ms') == np.timedelta64(1000000000, 'ns') + + assert ct('-1s') == -np.timedelta64(1000000000, 'ns') + assert ct('1s') == np.timedelta64(1000000000, 'ns') + assert ct('10s') == np.timedelta64(10000000000, 'ns') + assert ct('100s') == np.timedelta64(100000000000, 'ns') + assert ct('1000s') == np.timedelta64(1000000000000, 'ns') + + assert ct('1d') == conv(np.timedelta64(1, 'D')) + assert ct('-1d') == -conv(np.timedelta64(1, 'D')) + assert ct('1D') == conv(np.timedelta64(1, 'D')) + assert ct('10D') == conv(np.timedelta64(10, 'D')) + assert ct('100D') == conv(np.timedelta64(100, 'D')) + assert ct('1000D') == conv(np.timedelta64(1000, 'D')) + assert ct('10000D') == conv(np.timedelta64(10000, 'D')) # space - self.assertEqual(ct(' 10000D '), conv(np.timedelta64(10000, 'D'))) - self.assertEqual(ct(' - 10000D '), -conv(np.timedelta64(10000, 'D'))) + assert ct(' 10000D ') == conv(np.timedelta64(10000, 'D')) + assert ct(' - 10000D ') == -conv(np.timedelta64(10000, 'D')) # invalid pytest.raises(ValueError, ct, '1foo') @@ -513,24 +499,22 @@ def conv(v): d1 = np.timedelta64(1, 'D') - self.assertEqual(ct('1days'), conv(d1)) - self.assertEqual(ct('1days,'), conv(d1)) - self.assertEqual(ct('- 1days,'), -conv(d1)) - - self.assertEqual(ct('00:00:01'), conv(np.timedelta64(1, 's'))) - self.assertEqual(ct('06:00:01'), conv( - np.timedelta64(6 * 3600 + 1, 's'))) - self.assertEqual(ct('06:00:01.0'), conv( - np.timedelta64(6 * 3600 + 1, 's'))) - self.assertEqual(ct('06:00:01.01'), conv( - np.timedelta64(1000 * (6 * 3600 + 1) + 10, 'ms'))) - - self.assertEqual(ct('- 1days, 00:00:01'), - conv(-d1 + np.timedelta64(1, 's'))) - self.assertEqual(ct('1days, 06:00:01'), conv( - d1 + np.timedelta64(6 * 3600 + 1, 's'))) - self.assertEqual(ct('1days, 06:00:01.01'), conv( - d1 + np.timedelta64(1000 * (6 * 3600 + 1) + 10, 'ms'))) + assert ct('1days') == conv(d1) + assert ct('1days,') == conv(d1) + assert ct('- 1days,') == -conv(d1) + + assert ct('00:00:01') == conv(np.timedelta64(1, 's')) + assert ct('06:00:01') == conv(np.timedelta64(6 * 3600 + 1, 's')) + assert ct('06:00:01.0') == conv(np.timedelta64(6 * 3600 + 1, 's')) + assert ct('06:00:01.01') == conv(np.timedelta64( + 1000 * (6 * 3600 + 1) + 10, 'ms')) + + assert (ct('- 1days, 00:00:01') == + conv(-d1 + np.timedelta64(1, 's'))) + assert (ct('1days, 06:00:01') == + conv(d1 + np.timedelta64(6 * 3600 + 1, 's'))) + assert (ct('1days, 06:00:01.01') == + conv(d1 + np.timedelta64(1000 * (6 * 3600 + 1) + 10, 'ms'))) # invalid pytest.raises(ValueError, ct, '- 1days, 00') @@ -560,16 +544,16 @@ def test_pickle(self): v = Timedelta('1 days 10:11:12.0123456') v_p = tm.round_trip_pickle(v) - self.assertEqual(v, v_p) + assert v == v_p def test_timedelta_hash_equality(self): # GH 11129 v = Timedelta(1, 'D') td = timedelta(days=1) - self.assertEqual(hash(v), hash(td)) + assert hash(v) == hash(td) d = {td: 2} - self.assertEqual(d[v], 2) + assert d[v] == 2 tds = timedelta_range('1 second', periods=20) assert all(hash(td) == hash(td.to_pytimedelta()) for td in tds) @@ -662,34 +646,34 @@ def test_isoformat(self): milliseconds=10, microseconds=10, nanoseconds=12) expected = 'P6DT0H50M3.010010012S' result = td.isoformat() - self.assertEqual(result, expected) + assert result == expected td = Timedelta(days=4, hours=12, minutes=30, seconds=5) result = td.isoformat() expected = 'P4DT12H30M5S' - self.assertEqual(result, expected) + assert result == expected td = Timedelta(nanoseconds=123) result = td.isoformat() expected = 'P0DT0H0M0.000000123S' - self.assertEqual(result, expected) + assert result == expected # trim nano td = Timedelta(microseconds=10) result = td.isoformat() expected = 'P0DT0H0M0.00001S' - self.assertEqual(result, expected) + assert result == expected # trim micro td = Timedelta(milliseconds=1) result = td.isoformat() expected = 'P0DT0H0M0.001S' - self.assertEqual(result, expected) + assert result == expected # don't strip every 0 result = Timedelta(minutes=1).isoformat() expected = 'P0DT0H1M0S' - self.assertEqual(result, expected) + assert result == expected def test_ops_error_str(self): # GH 13624 diff --git a/pandas/tests/scalar/test_timestamp.py b/pandas/tests/scalar/test_timestamp.py index 72b1e4d450b84..8a28a9a4bedd0 100644 --- a/pandas/tests/scalar/test_timestamp.py +++ b/pandas/tests/scalar/test_timestamp.py @@ -31,8 +31,8 @@ def test_constructor(self): # confirm base representation is correct import calendar - self.assertEqual(calendar.timegm(base_dt.timetuple()) * 1000000000, - base_expected) + assert (calendar.timegm(base_dt.timetuple()) * 1000000000 == + base_expected) tests = [(base_str, base_dt, base_expected), ('2014-07-01 10:00', datetime(2014, 7, 1, 10), @@ -56,32 +56,32 @@ def test_constructor(self): for date_str, date, expected in tests: for result in [Timestamp(date_str), Timestamp(date)]: # only with timestring - self.assertEqual(result.value, expected) - self.assertEqual(tslib.pydt_to_i8(result), expected) + assert result.value == expected + assert tslib.pydt_to_i8(result) == expected # re-creation shouldn't affect to internal value result = Timestamp(result) - self.assertEqual(result.value, expected) - self.assertEqual(tslib.pydt_to_i8(result), expected) + assert result.value == expected + assert tslib.pydt_to_i8(result) == expected # with timezone for tz, offset in timezones: for result in [Timestamp(date_str, tz=tz), Timestamp(date, tz=tz)]: expected_tz = expected - offset * 3600 * 1000000000 - self.assertEqual(result.value, expected_tz) - self.assertEqual(tslib.pydt_to_i8(result), expected_tz) + assert result.value == expected_tz + assert tslib.pydt_to_i8(result) == expected_tz # should preserve tz result = Timestamp(result) - self.assertEqual(result.value, expected_tz) - self.assertEqual(tslib.pydt_to_i8(result), expected_tz) + assert result.value == expected_tz + assert tslib.pydt_to_i8(result) == expected_tz # should convert to UTC result = Timestamp(result, tz='UTC') expected_utc = expected - offset * 3600 * 1000000000 - self.assertEqual(result.value, expected_utc) - self.assertEqual(tslib.pydt_to_i8(result), expected_utc) + assert result.value == expected_utc + assert tslib.pydt_to_i8(result) == expected_utc def test_constructor_with_stringoffset(self): # GH 7833 @@ -91,8 +91,8 @@ def test_constructor_with_stringoffset(self): # confirm base representation is correct import calendar - self.assertEqual(calendar.timegm(base_dt.timetuple()) * 1000000000, - base_expected) + assert (calendar.timegm(base_dt.timetuple()) * 1000000000 == + base_expected) tests = [(base_str, base_expected), ('2014-07-01 12:00:00+02:00', @@ -112,64 +112,64 @@ def test_constructor_with_stringoffset(self): for date_str, expected in tests: for result in [Timestamp(date_str)]: # only with timestring - self.assertEqual(result.value, expected) - self.assertEqual(tslib.pydt_to_i8(result), expected) + assert result.value == expected + assert tslib.pydt_to_i8(result) == expected # re-creation shouldn't affect to internal value result = Timestamp(result) - self.assertEqual(result.value, expected) - self.assertEqual(tslib.pydt_to_i8(result), expected) + assert result.value == expected + assert tslib.pydt_to_i8(result) == expected # with timezone for tz, offset in timezones: result = Timestamp(date_str, tz=tz) expected_tz = expected - self.assertEqual(result.value, expected_tz) - self.assertEqual(tslib.pydt_to_i8(result), expected_tz) + assert result.value == expected_tz + assert tslib.pydt_to_i8(result) == expected_tz # should preserve tz result = Timestamp(result) - self.assertEqual(result.value, expected_tz) - self.assertEqual(tslib.pydt_to_i8(result), expected_tz) + assert result.value == expected_tz + assert tslib.pydt_to_i8(result) == expected_tz # should convert to UTC result = Timestamp(result, tz='UTC') expected_utc = expected - self.assertEqual(result.value, expected_utc) - self.assertEqual(tslib.pydt_to_i8(result), expected_utc) + assert result.value == expected_utc + assert tslib.pydt_to_i8(result) == expected_utc # This should be 2013-11-01 05:00 in UTC # converted to Chicago tz result = Timestamp('2013-11-01 00:00:00-0500', tz='America/Chicago') - self.assertEqual(result.value, Timestamp('2013-11-01 05:00').value) + assert result.value == Timestamp('2013-11-01 05:00').value expected = "Timestamp('2013-11-01 00:00:00-0500', tz='America/Chicago')" # noqa - self.assertEqual(repr(result), expected) - self.assertEqual(result, eval(repr(result))) + assert repr(result) == expected + assert result == eval(repr(result)) # This should be 2013-11-01 05:00 in UTC # converted to Tokyo tz (+09:00) result = Timestamp('2013-11-01 00:00:00-0500', tz='Asia/Tokyo') - self.assertEqual(result.value, Timestamp('2013-11-01 05:00').value) + assert result.value == Timestamp('2013-11-01 05:00').value expected = "Timestamp('2013-11-01 14:00:00+0900', tz='Asia/Tokyo')" - self.assertEqual(repr(result), expected) - self.assertEqual(result, eval(repr(result))) + assert repr(result) == expected + assert result == eval(repr(result)) # GH11708 # This should be 2015-11-18 10:00 in UTC # converted to Asia/Katmandu result = Timestamp("2015-11-18 15:45:00+05:45", tz="Asia/Katmandu") - self.assertEqual(result.value, Timestamp("2015-11-18 10:00").value) + assert result.value == Timestamp("2015-11-18 10:00").value expected = "Timestamp('2015-11-18 15:45:00+0545', tz='Asia/Katmandu')" - self.assertEqual(repr(result), expected) - self.assertEqual(result, eval(repr(result))) + assert repr(result) == expected + assert result == eval(repr(result)) # This should be 2015-11-18 10:00 in UTC # converted to Asia/Kolkata result = Timestamp("2015-11-18 15:30:00+05:30", tz="Asia/Kolkata") - self.assertEqual(result.value, Timestamp("2015-11-18 10:00").value) + assert result.value == Timestamp("2015-11-18 10:00").value expected = "Timestamp('2015-11-18 15:30:00+0530', tz='Asia/Kolkata')" - self.assertEqual(repr(result), expected) - self.assertEqual(result, eval(repr(result))) + assert repr(result) == expected + assert result == eval(repr(result)) def test_constructor_invalid(self): with tm.assert_raises_regex(TypeError, 'Cannot convert input'): @@ -178,7 +178,7 @@ def test_constructor_invalid(self): Timestamp(Period('1000-01-01')) def test_constructor_positional(self): - # GH 10758 + # see gh-10758 with pytest.raises(TypeError): Timestamp(2000, 1) with pytest.raises(ValueError): @@ -190,14 +190,11 @@ def test_constructor_positional(self): with pytest.raises(ValueError): Timestamp(2000, 1, 32) - # GH 11630 - self.assertEqual( - repr(Timestamp(2015, 11, 12)), - repr(Timestamp('20151112'))) - - self.assertEqual( - repr(Timestamp(2015, 11, 12, 1, 2, 3, 999999)), - repr(Timestamp('2015-11-12 01:02:03.999999'))) + # see gh-11630 + assert (repr(Timestamp(2015, 11, 12)) == + repr(Timestamp('20151112'))) + assert (repr(Timestamp(2015, 11, 12, 1, 2, 3, 999999)) == + repr(Timestamp('2015-11-12 01:02:03.999999'))) def test_constructor_keyword(self): # GH 10758 @@ -212,37 +209,35 @@ def test_constructor_keyword(self): with pytest.raises(ValueError): Timestamp(year=2000, month=1, day=32) - self.assertEqual( - repr(Timestamp(year=2015, month=11, day=12)), - repr(Timestamp('20151112'))) + assert (repr(Timestamp(year=2015, month=11, day=12)) == + repr(Timestamp('20151112'))) - self.assertEqual( - repr(Timestamp(year=2015, month=11, day=12, - hour=1, minute=2, second=3, microsecond=999999)), - repr(Timestamp('2015-11-12 01:02:03.999999'))) + assert (repr(Timestamp(year=2015, month=11, day=12, hour=1, minute=2, + second=3, microsecond=999999)) == + repr(Timestamp('2015-11-12 01:02:03.999999'))) def test_constructor_fromordinal(self): base = datetime(2000, 1, 1) ts = Timestamp.fromordinal(base.toordinal(), freq='D') - self.assertEqual(base, ts) - self.assertEqual(ts.freq, 'D') - self.assertEqual(base.toordinal(), ts.toordinal()) + assert base == ts + assert ts.freq == 'D' + assert base.toordinal() == ts.toordinal() ts = Timestamp.fromordinal(base.toordinal(), tz='US/Eastern') - self.assertEqual(Timestamp('2000-01-01', tz='US/Eastern'), ts) - self.assertEqual(base.toordinal(), ts.toordinal()) + assert Timestamp('2000-01-01', tz='US/Eastern') == ts + assert base.toordinal() == ts.toordinal() def test_constructor_offset_depr(self): - # GH 12160 + # see gh-12160 with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): ts = Timestamp('2011-01-01', offset='D') - self.assertEqual(ts.freq, 'D') + assert ts.freq == 'D' with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - self.assertEqual(ts.offset, 'D') + assert ts.offset == 'D' msg = "Can only specify freq or offset, not both" with tm.assert_raises_regex(TypeError, msg): @@ -255,9 +250,9 @@ def test_constructor_offset_depr_fromordinal(self): with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): ts = Timestamp.fromordinal(base.toordinal(), offset='D') - self.assertEqual(Timestamp('2000-01-01'), ts) - self.assertEqual(ts.freq, 'D') - self.assertEqual(base.toordinal(), ts.toordinal()) + assert Timestamp('2000-01-01') == ts + assert ts.freq == 'D' + assert base.toordinal() == ts.toordinal() msg = "Can only specify freq or offset, not both" with tm.assert_raises_regex(TypeError, msg): @@ -269,14 +264,14 @@ def test_conversion(self): result = ts.to_pydatetime() expected = datetime(2000, 1, 1) - self.assertEqual(result, expected) - self.assertEqual(type(result), type(expected)) + assert result == expected + assert type(result) == type(expected) result = ts.to_datetime64() expected = np.datetime64(ts.value, 'ns') - self.assertEqual(result, expected) - self.assertEqual(type(result), type(expected)) - self.assertEqual(result.dtype, expected.dtype) + assert result == expected + assert type(result) == type(expected) + assert result.dtype == expected.dtype def test_repr(self): tm._skip_if_no_pytz() @@ -365,20 +360,20 @@ def test_tz(self): t = '2014-02-01 09:00' ts = Timestamp(t) local = ts.tz_localize('Asia/Tokyo') - self.assertEqual(local.hour, 9) - self.assertEqual(local, Timestamp(t, tz='Asia/Tokyo')) + assert local.hour == 9 + assert local == Timestamp(t, tz='Asia/Tokyo') conv = local.tz_convert('US/Eastern') - self.assertEqual(conv, Timestamp('2014-01-31 19:00', tz='US/Eastern')) - self.assertEqual(conv.hour, 19) + assert conv == Timestamp('2014-01-31 19:00', tz='US/Eastern') + assert conv.hour == 19 # preserves nanosecond ts = Timestamp(t) + offsets.Nano(5) local = ts.tz_localize('Asia/Tokyo') - self.assertEqual(local.hour, 9) - self.assertEqual(local.nanosecond, 5) + assert local.hour == 9 + assert local.nanosecond == 5 conv = local.tz_convert('US/Eastern') - self.assertEqual(conv.nanosecond, 5) - self.assertEqual(conv.hour, 19) + assert conv.nanosecond == 5 + assert conv.hour == 19 def test_tz_localize_ambiguous(self): @@ -387,8 +382,8 @@ def test_tz_localize_ambiguous(self): ts_no_dst = ts.tz_localize('US/Eastern', ambiguous=False) rng = date_range('2014-11-02', periods=3, freq='H', tz='US/Eastern') - self.assertEqual(rng[1], ts_dst) - self.assertEqual(rng[2], ts_no_dst) + assert rng[1] == ts_dst + assert rng[2] == ts_no_dst pytest.raises(ValueError, ts.tz_localize, 'US/Eastern', ambiguous='infer') @@ -431,13 +426,13 @@ def test_tz_localize_roundtrip(self): '2014-11-01 17:00', '2014-11-05 00:00']: ts = Timestamp(t) localized = ts.tz_localize(tz) - self.assertEqual(localized, Timestamp(t, tz=tz)) + assert localized == Timestamp(t, tz=tz) with pytest.raises(TypeError): localized.tz_localize(tz) reset = localized.tz_localize(None) - self.assertEqual(reset, ts) + assert reset == ts assert reset.tzinfo is None def test_tz_convert_roundtrip(self): @@ -448,10 +443,9 @@ def test_tz_convert_roundtrip(self): converted = ts.tz_convert(tz) reset = converted.tz_convert(None) - self.assertEqual(reset, Timestamp(t)) + assert reset == Timestamp(t) assert reset.tzinfo is None - self.assertEqual(reset, - converted.tz_convert('UTC').tz_localize(None)) + assert reset == converted.tz_convert('UTC').tz_localize(None) def test_barely_oob_dts(self): one_us = np.timedelta64(1).astype('timedelta64[us]') @@ -472,8 +466,7 @@ def test_barely_oob_dts(self): pytest.raises(ValueError, Timestamp, max_ts_us + one_us) def test_utc_z_designator(self): - self.assertEqual(get_timezone( - Timestamp('2014-11-02 01:00Z').tzinfo), 'UTC') + assert get_timezone(Timestamp('2014-11-02 01:00Z').tzinfo) == 'UTC' def test_now(self): # #9000 @@ -513,18 +506,20 @@ def test_today(self): def test_asm8(self): np.random.seed(7960929) - ns = [Timestamp.min.value, Timestamp.max.value, 1000, ] + ns = [Timestamp.min.value, Timestamp.max.value, 1000] + for n in ns: - self.assertEqual(Timestamp(n).asm8.view('i8'), - np.datetime64(n, 'ns').view('i8'), n) - self.assertEqual(Timestamp('nat').asm8.view('i8'), - np.datetime64('nat', 'ns').view('i8')) + assert (Timestamp(n).asm8.view('i8') == + np.datetime64(n, 'ns').view('i8') == n) + + assert (Timestamp('nat').asm8.view('i8') == + np.datetime64('nat', 'ns').view('i8')) def test_fields(self): def check(value, equal): # that we are int/long like assert isinstance(value, (int, compat.long)) - self.assertEqual(value, equal) + assert value == equal # GH 10050 ts = Timestamp('2015-05-10 09:06:03.000100001') @@ -587,7 +582,7 @@ def test_pprint(self): {'w': {'a': Timestamp('2011-01-01 00:00:00')}}, {'w': {'a': Timestamp('2011-01-01 00:00:00')}}], 'foo': 1}""" - self.assertEqual(result, expected) + assert result == expected def to_datetime_depr(self): # see gh-8254 @@ -597,7 +592,7 @@ def to_datetime_depr(self): check_stacklevel=False): expected = datetime(2011, 1, 1) result = ts.to_datetime() - self.assertEqual(result, expected) + assert result == expected def to_pydatetime_nonzero_nano(self): ts = Timestamp('2011-01-01 9:00:00.123456789') @@ -607,7 +602,7 @@ def to_pydatetime_nonzero_nano(self): check_stacklevel=False): expected = datetime(2011, 1, 1, 9, 0, 0, 123456) result = ts.to_pydatetime() - self.assertEqual(result, expected) + assert result == expected def test_round(self): @@ -615,27 +610,27 @@ def test_round(self): dt = Timestamp('20130101 09:10:11') result = dt.round('D') expected = Timestamp('20130101') - self.assertEqual(result, expected) + assert result == expected dt = Timestamp('20130101 19:10:11') result = dt.round('D') expected = Timestamp('20130102') - self.assertEqual(result, expected) + assert result == expected dt = Timestamp('20130201 12:00:00') result = dt.round('D') expected = Timestamp('20130202') - self.assertEqual(result, expected) + assert result == expected dt = Timestamp('20130104 12:00:00') result = dt.round('D') expected = Timestamp('20130105') - self.assertEqual(result, expected) + assert result == expected dt = Timestamp('20130104 12:32:00') result = dt.round('30Min') expected = Timestamp('20130104 12:30:00') - self.assertEqual(result, expected) + assert result == expected dti = date_range('20130101 09:10:11', periods=5) result = dti.round('D') @@ -646,23 +641,23 @@ def test_round(self): dt = Timestamp('20130101 09:10:11') result = dt.floor('D') expected = Timestamp('20130101') - self.assertEqual(result, expected) + assert result == expected # ceil dt = Timestamp('20130101 09:10:11') result = dt.ceil('D') expected = Timestamp('20130102') - self.assertEqual(result, expected) + assert result == expected # round with tz dt = Timestamp('20130101 09:10:11', tz='US/Eastern') result = dt.round('D') expected = Timestamp('20130101', tz='US/Eastern') - self.assertEqual(result, expected) + assert result == expected dt = Timestamp('20130101 09:10:11', tz='US/Eastern') result = dt.round('s') - self.assertEqual(result, dt) + assert result == dt dti = date_range('20130101 09:10:11', periods=5).tz_localize('UTC').tz_convert('US/Eastern') @@ -680,19 +675,19 @@ def test_round(self): # GH 14440 & 15578 result = Timestamp('2016-10-17 12:00:00.0015').round('ms') expected = Timestamp('2016-10-17 12:00:00.002000') - self.assertEqual(result, expected) + assert result == expected result = Timestamp('2016-10-17 12:00:00.00149').round('ms') expected = Timestamp('2016-10-17 12:00:00.001000') - self.assertEqual(result, expected) + assert result == expected ts = Timestamp('2016-10-17 12:00:00.0015') for freq in ['us', 'ns']: - self.assertEqual(ts, ts.round(freq)) + assert ts == ts.round(freq) result = Timestamp('2016-10-17 12:00:00.001501031').round('10ns') expected = Timestamp('2016-10-17 12:00:00.001501030') - self.assertEqual(result, expected) + assert result == expected with tm.assert_produces_warning(): Timestamp('2016-10-17 12:00:00.001501031').round('1010ns') @@ -702,7 +697,7 @@ def test_round_misc(self): def _check_round(freq, expected): result = stamp.round(freq=freq) - self.assertEqual(result, expected) + assert result == expected for freq, expected in [('D', Timestamp('2000-01-05 00:00:00')), ('H', Timestamp('2000-01-05 05:00:00')), @@ -718,8 +713,8 @@ def test_class_ops_pytz(self): from pytz import timezone def compare(x, y): - self.assertEqual(int(Timestamp(x).value / 1e9), - int(Timestamp(y).value / 1e9)) + assert (int(Timestamp(x).value / 1e9) == + int(Timestamp(y).value / 1e9)) compare(Timestamp.now(), datetime.now()) compare(Timestamp.now('UTC'), datetime.now(timezone('UTC'))) @@ -741,8 +736,8 @@ def test_class_ops_dateutil(self): from dateutil.tz import tzutc def compare(x, y): - self.assertEqual(int(np.round(Timestamp(x).value / 1e9)), - int(np.round(Timestamp(y).value / 1e9))) + assert (int(np.round(Timestamp(x).value / 1e9)) == + int(np.round(Timestamp(y).value / 1e9))) compare(Timestamp.now(), datetime.now()) compare(Timestamp.now('UTC'), datetime.now(tzutc())) @@ -762,37 +757,37 @@ def compare(x, y): def test_basics_nanos(self): val = np.int64(946684800000000000).view('M8[ns]') stamp = Timestamp(val.view('i8') + 500) - self.assertEqual(stamp.year, 2000) - self.assertEqual(stamp.month, 1) - self.assertEqual(stamp.microsecond, 0) - self.assertEqual(stamp.nanosecond, 500) + assert stamp.year == 2000 + assert stamp.month == 1 + assert stamp.microsecond == 0 + assert stamp.nanosecond == 500 # GH 14415 val = np.iinfo(np.int64).min + 80000000000000 stamp = Timestamp(val) - self.assertEqual(stamp.year, 1677) - self.assertEqual(stamp.month, 9) - self.assertEqual(stamp.day, 21) - self.assertEqual(stamp.microsecond, 145224) - self.assertEqual(stamp.nanosecond, 192) + assert stamp.year == 1677 + assert stamp.month == 9 + assert stamp.day == 21 + assert stamp.microsecond == 145224 + assert stamp.nanosecond == 192 def test_unit(self): def check(val, unit=None, h=1, s=1, us=0): stamp = Timestamp(val, unit=unit) - self.assertEqual(stamp.year, 2000) - self.assertEqual(stamp.month, 1) - self.assertEqual(stamp.day, 1) - self.assertEqual(stamp.hour, h) + assert stamp.year == 2000 + assert stamp.month == 1 + assert stamp.day == 1 + assert stamp.hour == h if unit != 'D': - self.assertEqual(stamp.minute, 1) - self.assertEqual(stamp.second, s) - self.assertEqual(stamp.microsecond, us) + assert stamp.minute == 1 + assert stamp.second == s + assert stamp.microsecond == us else: - self.assertEqual(stamp.minute, 0) - self.assertEqual(stamp.second, 0) - self.assertEqual(stamp.microsecond, 0) - self.assertEqual(stamp.nanosecond, 0) + assert stamp.minute == 0 + assert stamp.second == 0 + assert stamp.microsecond == 0 + assert stamp.nanosecond == 0 ts = Timestamp('20000101 01:01:01') val = ts.value @@ -835,25 +830,25 @@ def test_roundtrip(self): base = Timestamp('20140101 00:00:00') result = Timestamp(base.value + Timedelta('5ms').value) - self.assertEqual(result, Timestamp(str(base) + ".005000")) - self.assertEqual(result.microsecond, 5000) + assert result == Timestamp(str(base) + ".005000") + assert result.microsecond == 5000 result = Timestamp(base.value + Timedelta('5us').value) - self.assertEqual(result, Timestamp(str(base) + ".000005")) - self.assertEqual(result.microsecond, 5) + assert result == Timestamp(str(base) + ".000005") + assert result.microsecond == 5 result = Timestamp(base.value + Timedelta('5ns').value) - self.assertEqual(result, Timestamp(str(base) + ".000000005")) - self.assertEqual(result.nanosecond, 5) - self.assertEqual(result.microsecond, 0) + assert result == Timestamp(str(base) + ".000000005") + assert result.nanosecond == 5 + assert result.microsecond == 0 result = Timestamp(base.value + Timedelta('6ms 5us').value) - self.assertEqual(result, Timestamp(str(base) + ".006005")) - self.assertEqual(result.microsecond, 5 + 6 * 1000) + assert result == Timestamp(str(base) + ".006005") + assert result.microsecond == 5 + 6 * 1000 result = Timestamp(base.value + Timedelta('200ms 5us').value) - self.assertEqual(result, Timestamp(str(base) + ".200005")) - self.assertEqual(result.microsecond, 5 + 200 * 1000) + assert result == Timestamp(str(base) + ".200005") + assert result.microsecond == 5 + 200 * 1000 def test_comparison(self): # 5-18-2012 00:00:00.000 @@ -861,7 +856,7 @@ def test_comparison(self): val = Timestamp(stamp) - self.assertEqual(val, val) + assert val == val assert not val != val assert not val < val assert val <= val @@ -869,7 +864,7 @@ def test_comparison(self): assert val >= val other = datetime(2012, 5, 18) - self.assertEqual(val, other) + assert val == other assert not val != other assert not val < other assert val <= other @@ -986,26 +981,26 @@ def test_cant_compare_tz_naive_w_aware_dateutil(self): def test_delta_preserve_nanos(self): val = Timestamp(long(1337299200000000123)) result = val + timedelta(1) - self.assertEqual(result.nanosecond, val.nanosecond) + assert result.nanosecond == val.nanosecond def test_frequency_misc(self): - self.assertEqual(frequencies.get_freq_group('T'), - frequencies.FreqGroup.FR_MIN) + assert (frequencies.get_freq_group('T') == + frequencies.FreqGroup.FR_MIN) code, stride = frequencies.get_freq_code(offsets.Hour()) - self.assertEqual(code, frequencies.FreqGroup.FR_HR) + assert code == frequencies.FreqGroup.FR_HR code, stride = frequencies.get_freq_code((5, 'T')) - self.assertEqual(code, frequencies.FreqGroup.FR_MIN) - self.assertEqual(stride, 5) + assert code == frequencies.FreqGroup.FR_MIN + assert stride == 5 offset = offsets.Hour() result = frequencies.to_offset(offset) - self.assertEqual(result, offset) + assert result == offset result = frequencies.to_offset((5, 'T')) expected = offsets.Minute(5) - self.assertEqual(result, expected) + assert result == expected pytest.raises(ValueError, frequencies.get_freq_code, (5, 'baz')) @@ -1015,12 +1010,12 @@ def test_frequency_misc(self): with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = frequencies.get_standard_freq(offsets.Hour()) - self.assertEqual(result, 'H') + assert result == 'H' def test_hash_equivalent(self): d = {datetime(2011, 1, 1): 5} stamp = Timestamp(datetime(2011, 1, 1)) - self.assertEqual(d[stamp], 5) + assert d[stamp] == 5 def test_timestamp_compare_scalars(self): # case where ndim == 0 @@ -1041,11 +1036,11 @@ def test_timestamp_compare_scalars(self): expected = left_f(lhs, rhs) result = right_f(rhs, lhs) - self.assertEqual(result, expected) + assert result == expected expected = left_f(rhs, nat) result = right_f(nat, rhs) - self.assertEqual(result, expected) + assert result == expected def test_timestamp_compare_series(self): # make sure we can compare Timestamps on the right AND left hand side @@ -1108,7 +1103,7 @@ def assert_ns_timedelta(self, modified_timestamp, expected_value): value = self.timestamp.value modified_value = modified_timestamp.value - self.assertEqual(modified_value - value, expected_value) + assert modified_value - value == expected_value def test_timedelta_ns_arithmetic(self): self.assert_ns_timedelta(self.timestamp + np.timedelta64(-123, 'ns'), @@ -1131,68 +1126,68 @@ def test_nanosecond_string_parsing(self): # GH 7878 expected_repr = '2013-05-01 07:15:45.123456789' expected_value = 1367392545123456789 - self.assertEqual(ts.value, expected_value) + assert ts.value == expected_value assert expected_repr in repr(ts) ts = Timestamp('2013-05-01 07:15:45.123456789+09:00', tz='Asia/Tokyo') - self.assertEqual(ts.value, expected_value - 9 * 3600 * 1000000000) + assert ts.value == expected_value - 9 * 3600 * 1000000000 assert expected_repr in repr(ts) ts = Timestamp('2013-05-01 07:15:45.123456789', tz='UTC') - self.assertEqual(ts.value, expected_value) + assert ts.value == expected_value assert expected_repr in repr(ts) ts = Timestamp('2013-05-01 07:15:45.123456789', tz='US/Eastern') - self.assertEqual(ts.value, expected_value + 4 * 3600 * 1000000000) + assert ts.value == expected_value + 4 * 3600 * 1000000000 assert expected_repr in repr(ts) # GH 10041 ts = Timestamp('20130501T071545.123456789') - self.assertEqual(ts.value, expected_value) + assert ts.value == expected_value assert expected_repr in repr(ts) def test_nanosecond_timestamp(self): # GH 7610 expected = 1293840000000000005 t = Timestamp('2011-01-01') + offsets.Nano(5) - self.assertEqual(repr(t), "Timestamp('2011-01-01 00:00:00.000000005')") - self.assertEqual(t.value, expected) - self.assertEqual(t.nanosecond, 5) + assert repr(t) == "Timestamp('2011-01-01 00:00:00.000000005')" + assert t.value == expected + assert t.nanosecond == 5 t = Timestamp(t) - self.assertEqual(repr(t), "Timestamp('2011-01-01 00:00:00.000000005')") - self.assertEqual(t.value, expected) - self.assertEqual(t.nanosecond, 5) + assert repr(t) == "Timestamp('2011-01-01 00:00:00.000000005')" + assert t.value == expected + assert t.nanosecond == 5 t = Timestamp(np_datetime64_compat('2011-01-01 00:00:00.000000005Z')) - self.assertEqual(repr(t), "Timestamp('2011-01-01 00:00:00.000000005')") - self.assertEqual(t.value, expected) - self.assertEqual(t.nanosecond, 5) + assert repr(t) == "Timestamp('2011-01-01 00:00:00.000000005')" + assert t.value == expected + assert t.nanosecond == 5 expected = 1293840000000000010 t = t + offsets.Nano(5) - self.assertEqual(repr(t), "Timestamp('2011-01-01 00:00:00.000000010')") - self.assertEqual(t.value, expected) - self.assertEqual(t.nanosecond, 10) + assert repr(t) == "Timestamp('2011-01-01 00:00:00.000000010')" + assert t.value == expected + assert t.nanosecond == 10 t = Timestamp(t) - self.assertEqual(repr(t), "Timestamp('2011-01-01 00:00:00.000000010')") - self.assertEqual(t.value, expected) - self.assertEqual(t.nanosecond, 10) + assert repr(t) == "Timestamp('2011-01-01 00:00:00.000000010')" + assert t.value == expected + assert t.nanosecond == 10 t = Timestamp(np_datetime64_compat('2011-01-01 00:00:00.000000010Z')) - self.assertEqual(repr(t), "Timestamp('2011-01-01 00:00:00.000000010')") - self.assertEqual(t.value, expected) - self.assertEqual(t.nanosecond, 10) + assert repr(t) == "Timestamp('2011-01-01 00:00:00.000000010')" + assert t.value == expected + assert t.nanosecond == 10 class TestTimestampOps(tm.TestCase): def test_timestamp_and_datetime(self): - self.assertEqual((Timestamp(datetime( - 2013, 10, 13)) - datetime(2013, 10, 12)).days, 1) - self.assertEqual((datetime(2013, 10, 12) - - Timestamp(datetime(2013, 10, 13))).days, -1) + assert ((Timestamp(datetime(2013, 10, 13)) - + datetime(2013, 10, 12)).days == 1) + assert ((datetime(2013, 10, 12) - + Timestamp(datetime(2013, 10, 13))).days == -1) def test_timestamp_and_series(self): timestamp_series = Series(date_range('2014-03-17', periods=2, freq='D', @@ -1213,42 +1208,36 @@ def test_addition_subtraction_types(self): timestamp_instance = date_range(datetime_instance, periods=1, freq='D')[0] - self.assertEqual(type(timestamp_instance + 1), Timestamp) - self.assertEqual(type(timestamp_instance - 1), Timestamp) + assert type(timestamp_instance + 1) == Timestamp + assert type(timestamp_instance - 1) == Timestamp # Timestamp + datetime not supported, though subtraction is supported # and yields timedelta more tests in tseries/base/tests/test_base.py - self.assertEqual( - type(timestamp_instance - datetime_instance), Timedelta) - self.assertEqual( - type(timestamp_instance + timedelta_instance), Timestamp) - self.assertEqual( - type(timestamp_instance - timedelta_instance), Timestamp) + assert type(timestamp_instance - datetime_instance) == Timedelta + assert type(timestamp_instance + timedelta_instance) == Timestamp + assert type(timestamp_instance - timedelta_instance) == Timestamp # Timestamp +/- datetime64 not supported, so not tested (could possibly # assert error raised?) timedelta64_instance = np.timedelta64(1, 'D') - self.assertEqual( - type(timestamp_instance + timedelta64_instance), Timestamp) - self.assertEqual( - type(timestamp_instance - timedelta64_instance), Timestamp) + assert type(timestamp_instance + timedelta64_instance) == Timestamp + assert type(timestamp_instance - timedelta64_instance) == Timestamp def test_addition_subtraction_preserve_frequency(self): timestamp_instance = date_range('2014-03-05', periods=1, freq='D')[0] timedelta_instance = timedelta(days=1) original_freq = timestamp_instance.freq - self.assertEqual((timestamp_instance + 1).freq, original_freq) - self.assertEqual((timestamp_instance - 1).freq, original_freq) - self.assertEqual( - (timestamp_instance + timedelta_instance).freq, original_freq) - self.assertEqual( - (timestamp_instance - timedelta_instance).freq, original_freq) + + assert (timestamp_instance + 1).freq == original_freq + assert (timestamp_instance - 1).freq == original_freq + assert (timestamp_instance + timedelta_instance).freq == original_freq + assert (timestamp_instance - timedelta_instance).freq == original_freq timedelta64_instance = np.timedelta64(1, 'D') - self.assertEqual( - (timestamp_instance + timedelta64_instance).freq, original_freq) - self.assertEqual( - (timestamp_instance - timedelta64_instance).freq, original_freq) + assert (timestamp_instance + + timedelta64_instance).freq == original_freq + assert (timestamp_instance - + timedelta64_instance).freq == original_freq def test_resolution(self): @@ -1264,30 +1253,30 @@ def test_resolution(self): idx = date_range(start='2013-04-01', periods=30, freq=freq, tz=tz) result = period.resolution(idx.asi8, idx.tz) - self.assertEqual(result, expected) + assert result == expected class TestTimestampToJulianDate(tm.TestCase): def test_compare_1700(self): r = Timestamp('1700-06-23').to_julian_date() - self.assertEqual(r, 2342145.5) + assert r == 2342145.5 def test_compare_2000(self): r = Timestamp('2000-04-12').to_julian_date() - self.assertEqual(r, 2451646.5) + assert r == 2451646.5 def test_compare_2100(self): r = Timestamp('2100-08-12').to_julian_date() - self.assertEqual(r, 2488292.5) + assert r == 2488292.5 def test_compare_hour01(self): r = Timestamp('2000-08-12T01:00:00').to_julian_date() - self.assertEqual(r, 2451768.5416666666666666) + assert r == 2451768.5416666666666666 def test_compare_hour13(self): r = Timestamp('2000-08-12T13:00:00').to_julian_date() - self.assertEqual(r, 2451769.0416666666666666) + assert r == 2451769.0416666666666666 class TestTimeSeries(tm.TestCase): @@ -1298,8 +1287,8 @@ def test_timestamp_to_datetime(self): stamp = rng[0] dtval = stamp.to_pydatetime() - self.assertEqual(stamp, dtval) - self.assertEqual(stamp.tzinfo, dtval.tzinfo) + assert stamp == dtval + assert stamp.tzinfo == dtval.tzinfo def test_timestamp_to_datetime_dateutil(self): tm._skip_if_no_pytz() @@ -1307,8 +1296,8 @@ def test_timestamp_to_datetime_dateutil(self): stamp = rng[0] dtval = stamp.to_pydatetime() - self.assertEqual(stamp, dtval) - self.assertEqual(stamp.tzinfo, dtval.tzinfo) + assert stamp == dtval + assert stamp.tzinfo == dtval.tzinfo def test_timestamp_to_datetime_explicit_pytz(self): tm._skip_if_no_pytz() @@ -1318,8 +1307,8 @@ def test_timestamp_to_datetime_explicit_pytz(self): stamp = rng[0] dtval = stamp.to_pydatetime() - self.assertEqual(stamp, dtval) - self.assertEqual(stamp.tzinfo, dtval.tzinfo) + assert stamp == dtval + assert stamp.tzinfo == dtval.tzinfo def test_timestamp_to_datetime_explicit_dateutil(self): tm._skip_if_windows_python_3() @@ -1329,8 +1318,8 @@ def test_timestamp_to_datetime_explicit_dateutil(self): stamp = rng[0] dtval = stamp.to_pydatetime() - self.assertEqual(stamp, dtval) - self.assertEqual(stamp.tzinfo, dtval.tzinfo) + assert stamp == dtval + assert stamp.tzinfo == dtval.tzinfo def test_timestamp_fields(self): # extra fields from DatetimeIndex like quarter and week @@ -1343,16 +1332,16 @@ def test_timestamp_fields(self): for f in fields: expected = getattr(idx, f)[-1] result = getattr(Timestamp(idx[-1]), f) - self.assertEqual(result, expected) + assert result == expected - self.assertEqual(idx.freq, Timestamp(idx[-1], idx.freq).freq) - self.assertEqual(idx.freqstr, Timestamp(idx[-1], idx.freq).freqstr) + assert idx.freq == Timestamp(idx[-1], idx.freq).freq + assert idx.freqstr == Timestamp(idx[-1], idx.freq).freqstr def test_timestamp_date_out_of_range(self): pytest.raises(ValueError, Timestamp, '1676-01-01') pytest.raises(ValueError, Timestamp, '2263-01-01') - # 1475 + # see gh-1475 pytest.raises(ValueError, DatetimeIndex, ['1400-01-01']) pytest.raises(ValueError, DatetimeIndex, [datetime(1400, 1, 1)]) @@ -1371,13 +1360,13 @@ def test_timestamp_from_ordinal(self): # GH 3042 dt = datetime(2011, 4, 16, 0, 0) ts = Timestamp.fromordinal(dt.toordinal()) - self.assertEqual(ts.to_pydatetime(), dt) + assert ts.to_pydatetime() == dt # with a tzinfo stamp = Timestamp('2011-4-16', tz='US/Eastern') dt_tz = stamp.to_pydatetime() ts = Timestamp.fromordinal(dt_tz.toordinal(), tz='US/Eastern') - self.assertEqual(ts.to_pydatetime(), dt_tz) + assert ts.to_pydatetime() == dt_tz def test_timestamp_compare_with_early_datetime(self): # e.g. datetime.min @@ -1461,9 +1450,9 @@ def test_dti_slicing(self): v2 = dti2[1] v3 = dti2[2] - self.assertEqual(v1, Timestamp('2/28/2005')) - self.assertEqual(v2, Timestamp('4/30/2005')) - self.assertEqual(v3, Timestamp('6/30/2005')) + assert v1 == Timestamp('2/28/2005') + assert v2 == Timestamp('4/30/2005') + assert v3 == Timestamp('6/30/2005') # don't carry freq through irregular slicing assert dti2.freq is None @@ -1473,27 +1462,27 @@ def test_woy_boundary(self): d = datetime(2013, 12, 31) result = Timestamp(d).week expected = 1 # ISO standard - self.assertEqual(result, expected) + assert result == expected d = datetime(2008, 12, 28) result = Timestamp(d).week expected = 52 # ISO standard - self.assertEqual(result, expected) + assert result == expected d = datetime(2009, 12, 31) result = Timestamp(d).week expected = 53 # ISO standard - self.assertEqual(result, expected) + assert result == expected d = datetime(2010, 1, 1) result = Timestamp(d).week expected = 53 # ISO standard - self.assertEqual(result, expected) + assert result == expected d = datetime(2010, 1, 3) result = Timestamp(d).week expected = 53 # ISO standard - self.assertEqual(result, expected) + assert result == expected result = np.array([Timestamp(datetime(*args)).week for args in [(2000, 1, 1), (2000, 1, 2), ( @@ -1516,12 +1505,10 @@ def test_to_datetime_bijective(self): # by going from nanoseconds to microseconds. exp_warning = None if Timestamp.max.nanosecond == 0 else UserWarning with tm.assert_produces_warning(exp_warning, check_stacklevel=False): - self.assertEqual( - Timestamp(Timestamp.max.to_pydatetime()).value / 1000, - Timestamp.max.value / 1000) + assert (Timestamp(Timestamp.max.to_pydatetime()).value / 1000 == + Timestamp.max.value / 1000) exp_warning = None if Timestamp.min.nanosecond == 0 else UserWarning with tm.assert_produces_warning(exp_warning, check_stacklevel=False): - self.assertEqual( - Timestamp(Timestamp.min.to_pydatetime()).value / 1000, - Timestamp.min.value / 1000) + assert (Timestamp(Timestamp.min.to_pydatetime()).value / 1000 == + Timestamp.min.value / 1000) diff --git a/pandas/tests/series/test_alter_axes.py b/pandas/tests/series/test_alter_axes.py index e0964fea95cc9..33a4cdb6e26c4 100644 --- a/pandas/tests/series/test_alter_axes.py +++ b/pandas/tests/series/test_alter_axes.py @@ -38,7 +38,7 @@ def test_setindex(self): def test_rename(self): renamer = lambda x: x.strftime('%Y%m%d') renamed = self.ts.rename(renamer) - self.assertEqual(renamed.index[0], renamer(self.ts.index[0])) + assert renamed.index[0] == renamer(self.ts.index[0]) # dict rename_dict = dict(zip(self.ts.index, renamed.index)) @@ -55,7 +55,7 @@ def test_rename(self): index=Index(['a', 'b', 'c', 'd'], name='name'), dtype='int64') renamed = renamer.rename({}) - self.assertEqual(renamed.index.name, renamer.index.name) + assert renamed.index.name == renamer.index.name def test_rename_by_series(self): s = Series(range(5), name='foo') @@ -68,7 +68,7 @@ def test_rename_set_name(self): s = Series(range(4), index=list('abcd')) for name in ['foo', 123, 123., datetime(2001, 11, 11), ('foo',)]: result = s.rename(name) - self.assertEqual(result.name, name) + assert result.name == name tm.assert_numpy_array_equal(result.index.values, s.index.values) assert s.name is None @@ -76,7 +76,7 @@ def test_rename_set_name_inplace(self): s = Series(range(3), index=list('abc')) for name in ['foo', 123, 123., datetime(2001, 11, 11), ('foo',)]: s.rename(name, inplace=True) - self.assertEqual(s.name, name) + assert s.name == name exp = np.array(['a', 'b', 'c'], dtype=np.object_) tm.assert_numpy_array_equal(s.index.values, exp) @@ -86,14 +86,14 @@ def test_set_name_attribute(self): s2 = Series([1, 2, 3], name='bar') for name in [7, 7., 'name', datetime(2001, 1, 1), (1,), u"\u05D0"]: s.name = name - self.assertEqual(s.name, name) + assert s.name == name s2.name = name - self.assertEqual(s2.name, name) + assert s2.name == name def test_set_name(self): s = Series([1, 2, 3]) s2 = s._set_name('foo') - self.assertEqual(s2.name, 'foo') + assert s2.name == 'foo' assert s.name is None assert s is not s2 @@ -102,7 +102,7 @@ def test_rename_inplace(self): expected = renamer(self.ts.index[0]) self.ts.rename(renamer, inplace=True) - self.assertEqual(self.ts.index[0], expected) + assert self.ts.index[0] == expected def test_set_index_makes_timeseries(self): idx = tm.makeDateIndex(10) @@ -135,7 +135,7 @@ def test_reset_index(self): [0, 1, 0, 1, 0, 1]]) s = Series(np.random.randn(6), index=index) rs = s.reset_index(level=1) - self.assertEqual(len(rs.columns), 2) + assert len(rs.columns) == 2 rs = s.reset_index(level=[0, 2], drop=True) tm.assert_index_equal(rs.index, Index(index.get_level_values(1))) diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index 233d71cb1d8a5..73515c47388ea 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -32,14 +32,14 @@ class TestSeriesAnalytics(TestData, tm.TestCase): def test_sum_zero(self): arr = np.array([]) - self.assertEqual(nanops.nansum(arr), 0) + assert nanops.nansum(arr) == 0 arr = np.empty((10, 0)) assert (nanops.nansum(arr, axis=1) == 0).all() # GH #844 s = Series([], index=[]) - self.assertEqual(s.sum(), 0) + assert s.sum() == 0 df = DataFrame(np.empty((10, 0))) assert (df.sum(1) == 0).all() @@ -58,19 +58,19 @@ def test_overflow(self): # no bottleneck result = s.sum(skipna=False) - self.assertEqual(int(result), v.sum(dtype='int64')) + assert int(result) == v.sum(dtype='int64') result = s.min(skipna=False) - self.assertEqual(int(result), 0) + assert int(result) == 0 result = s.max(skipna=False) - self.assertEqual(int(result), v[-1]) + assert int(result) == v[-1] # use bottleneck if available result = s.sum() - self.assertEqual(int(result), v.sum(dtype='int64')) + assert int(result) == v.sum(dtype='int64') result = s.min() - self.assertEqual(int(result), 0) + assert int(result) == 0 result = s.max() - self.assertEqual(int(result), v[-1]) + assert int(result) == v[-1] for dtype in ['float32', 'float64']: v = np.arange(5000000, dtype=dtype) @@ -78,7 +78,7 @@ def test_overflow(self): # no bottleneck result = s.sum(skipna=False) - self.assertEqual(result, v.sum(dtype=dtype)) + assert result == v.sum(dtype=dtype) result = s.min(skipna=False) assert np.allclose(float(result), 0.0) result = s.max(skipna=False) @@ -86,7 +86,7 @@ def test_overflow(self): # use bottleneck if available result = s.sum() - self.assertEqual(result, v.sum(dtype=dtype)) + assert result == v.sum(dtype=dtype) result = s.min() assert np.allclose(float(result), 0.0) result = s.max() @@ -284,7 +284,7 @@ def test_skew(self): assert np.isnan(s.skew()) assert np.isnan(df.skew()).all() else: - self.assertEqual(0, s.skew()) + assert 0 == s.skew() assert (df.skew() == 0).all() def test_kurt(self): @@ -310,7 +310,7 @@ def test_kurt(self): assert np.isnan(s.kurt()) assert np.isnan(df.kurt()).all() else: - self.assertEqual(0, s.kurt()) + assert 0 == s.kurt() assert (df.kurt() == 0).all() def test_describe(self): @@ -341,9 +341,9 @@ def test_argsort(self): # GH 2967 (introduced bug in 0.11-dev I think) s = Series([Timestamp('201301%02d' % (i + 1)) for i in range(5)]) - self.assertEqual(s.dtype, 'datetime64[ns]') + assert s.dtype == 'datetime64[ns]' shifted = s.shift(-1) - self.assertEqual(shifted.dtype, 'datetime64[ns]') + assert shifted.dtype == 'datetime64[ns]' assert isnull(shifted[4]) result = s.argsort() @@ -520,7 +520,7 @@ def testit(): assert nanops._USE_BOTTLENECK import bottleneck as bn # noqa assert bn.__version__ >= LooseVersion('1.0') - self.assertEqual(f(allna), 0.0) + assert f(allna) == 0.0 except: assert np.isnan(f(allna)) @@ -539,7 +539,7 @@ def testit(): s = Series(bdate_range('1/1/2000', periods=10)) res = f(s) exp = alternate(s) - self.assertEqual(res, exp) + assert res == exp # check on string data if name not in ['sum', 'min', 'max']: @@ -609,7 +609,7 @@ def test_round(self): expected = Series(np.round(self.ts.values, 2), index=self.ts.index, name='ts') assert_series_equal(result, expected) - self.assertEqual(result.name, self.ts.name) + assert result.name == self.ts.name def test_numpy_round(self): # See gh-12600 @@ -651,7 +651,7 @@ def test_all_any(self): # Alternative types, with implicit 'object' dtype. s = Series(['abc', True]) - self.assertEqual('abc', s.any()) # 'abc' || True => 'abc' + assert 'abc' == s.any() # 'abc' || True => 'abc' def test_all_any_params(self): # Check skipna, with implicit 'object' dtype. @@ -719,7 +719,7 @@ def test_ops_consistency_on_empty(self): # float result = Series(dtype=float).sum() - self.assertEqual(result, 0) + assert result == 0 result = Series(dtype=float).mean() assert isnull(result) @@ -729,7 +729,7 @@ def test_ops_consistency_on_empty(self): # timedelta64[ns] result = Series(dtype='m8[ns]').sum() - self.assertEqual(result, Timedelta(0)) + assert result == Timedelta(0) result = Series(dtype='m8[ns]').mean() assert result is pd.NaT @@ -827,11 +827,11 @@ def test_cov(self): assert isnull(ts1.cov(ts2, min_periods=12)) def test_count(self): - self.assertEqual(self.ts.count(), len(self.ts)) + assert self.ts.count() == len(self.ts) self.ts[::2] = np.NaN - self.assertEqual(self.ts.count(), np.isfinite(self.ts).sum()) + assert self.ts.count() == np.isfinite(self.ts).sum() mi = MultiIndex.from_arrays([list('aabbcc'), [1, 2, 2, nan, 1, 2]]) ts = Series(np.arange(len(mi)), index=mi) @@ -876,7 +876,7 @@ def test_value_counts_nunique(self): series[20:500] = np.nan series[10:20] = 5000 result = series.nunique() - self.assertEqual(result, 11) + assert result == 11 def test_unique(self): @@ -884,18 +884,18 @@ def test_unique(self): s = Series([1.2345] * 100) s[::2] = np.nan result = s.unique() - self.assertEqual(len(result), 2) + assert len(result) == 2 s = Series([1.2345] * 100, dtype='f4') s[::2] = np.nan result = s.unique() - self.assertEqual(len(result), 2) + assert len(result) == 2 # NAs in object arrays #714 s = Series(['foo'] * 100, dtype='O') s[::2] = np.nan result = s.unique() - self.assertEqual(len(result), 2) + assert len(result) == 2 # decision about None s = Series([1, 2, 3, None, None, None], dtype=object) @@ -953,11 +953,11 @@ def test_drop_duplicates(self): def test_clip(self): val = self.ts.median() - self.assertEqual(self.ts.clip_lower(val).min(), val) - self.assertEqual(self.ts.clip_upper(val).max(), val) + assert self.ts.clip_lower(val).min() == val + assert self.ts.clip_upper(val).max() == val - self.assertEqual(self.ts.clip(lower=val).min(), val) - self.assertEqual(self.ts.clip(upper=val).max(), val) + assert self.ts.clip(lower=val).min() == val + assert self.ts.clip(upper=val).max() == val result = self.ts.clip(-0.5, 0.5) expected = np.clip(self.ts, -0.5, 0.5) @@ -974,10 +974,10 @@ def test_clip_types_and_nulls(self): thresh = s[2] l = s.clip_lower(thresh) u = s.clip_upper(thresh) - self.assertEqual(l[notnull(l)].min(), thresh) - self.assertEqual(u[notnull(u)].max(), thresh) - self.assertEqual(list(isnull(s)), list(isnull(l))) - self.assertEqual(list(isnull(s)), list(isnull(u))) + assert l[notnull(l)].min() == thresh + assert u[notnull(u)].max() == thresh + assert list(isnull(s)) == list(isnull(l)) + assert list(isnull(s)) == list(isnull(u)) def test_clip_against_series(self): # GH #6966 @@ -1109,20 +1109,20 @@ def test_timedelta64_analytics(self): Timestamp('20120101') result = td.idxmin() - self.assertEqual(result, 0) + assert result == 0 result = td.idxmax() - self.assertEqual(result, 2) + assert result == 2 # GH 2982 # with NaT td[0] = np.nan result = td.idxmin() - self.assertEqual(result, 1) + assert result == 1 result = td.idxmax() - self.assertEqual(result, 2) + assert result == 2 # abs s1 = Series(date_range('20120101', periods=3)) @@ -1139,11 +1139,11 @@ def test_timedelta64_analytics(self): # max/min result = td.max() expected = Timedelta('2 days') - self.assertEqual(result, expected) + assert result == expected result = td.min() expected = Timedelta('1 days') - self.assertEqual(result, expected) + assert result == expected def test_idxmin(self): # test idxmin @@ -1153,14 +1153,14 @@ def test_idxmin(self): self.series[5:15] = np.NaN # skipna or no - self.assertEqual(self.series[self.series.idxmin()], self.series.min()) + assert self.series[self.series.idxmin()] == self.series.min() assert isnull(self.series.idxmin(skipna=False)) # no NaNs nona = self.series.dropna() - self.assertEqual(nona[nona.idxmin()], nona.min()) - self.assertEqual(nona.index.values.tolist().index(nona.idxmin()), - nona.values.argmin()) + assert nona[nona.idxmin()] == nona.min() + assert (nona.index.values.tolist().index(nona.idxmin()) == + nona.values.argmin()) # all NaNs allna = self.series * nan @@ -1170,17 +1170,17 @@ def test_idxmin(self): from pandas import date_range s = Series(date_range('20130102', periods=6)) result = s.idxmin() - self.assertEqual(result, 0) + assert result == 0 s[0] = np.nan result = s.idxmin() - self.assertEqual(result, 1) + assert result == 1 def test_numpy_argmin(self): # argmin is aliased to idxmin data = np.random.randint(0, 11, size=10) result = np.argmin(Series(data)) - self.assertEqual(result, np.argmin(data)) + assert result == np.argmin(data) if not _np_version_under1p10: msg = "the 'out' parameter is not supported" @@ -1195,14 +1195,14 @@ def test_idxmax(self): self.series[5:15] = np.NaN # skipna or no - self.assertEqual(self.series[self.series.idxmax()], self.series.max()) + assert self.series[self.series.idxmax()] == self.series.max() assert isnull(self.series.idxmax(skipna=False)) # no NaNs nona = self.series.dropna() - self.assertEqual(nona[nona.idxmax()], nona.max()) - self.assertEqual(nona.index.values.tolist().index(nona.idxmax()), - nona.values.argmax()) + assert nona[nona.idxmax()] == nona.max() + assert (nona.index.values.tolist().index(nona.idxmax()) == + nona.values.argmax()) # all NaNs allna = self.series * nan @@ -1211,32 +1211,32 @@ def test_idxmax(self): from pandas import date_range s = Series(date_range('20130102', periods=6)) result = s.idxmax() - self.assertEqual(result, 5) + assert result == 5 s[5] = np.nan result = s.idxmax() - self.assertEqual(result, 4) + assert result == 4 # Float64Index # GH 5914 s = pd.Series([1, 2, 3], [1.1, 2.1, 3.1]) result = s.idxmax() - self.assertEqual(result, 3.1) + assert result == 3.1 result = s.idxmin() - self.assertEqual(result, 1.1) + assert result == 1.1 s = pd.Series(s.index, s.index) result = s.idxmax() - self.assertEqual(result, 3.1) + assert result == 3.1 result = s.idxmin() - self.assertEqual(result, 1.1) + assert result == 1.1 def test_numpy_argmax(self): # argmax is aliased to idxmax data = np.random.randint(0, 11, size=10) result = np.argmax(Series(data)) - self.assertEqual(result, np.argmax(data)) + assert result == np.argmax(data) if not _np_version_under1p10: msg = "the 'out' parameter is not supported" @@ -1247,11 +1247,11 @@ def test_ptp(self): N = 1000 arr = np.random.randn(N) ser = Series(arr) - self.assertEqual(np.ptp(ser), np.ptp(arr)) + assert np.ptp(ser) == np.ptp(arr) # GH11163 s = Series([3, 5, np.nan, -3, 10]) - self.assertEqual(s.ptp(), 13) + assert s.ptp() == 13 assert pd.isnull(s.ptp(skipna=False)) mi = pd.MultiIndex.from_product([['a', 'b'], [1, 2, 3]]) @@ -1326,7 +1326,7 @@ def test_searchsorted_numeric_dtypes_scalar(self): s = Series([1, 2, 90, 1000, 3e9]) r = s.searchsorted(30) e = 2 - self.assertEqual(r, e) + assert r == e r = s.searchsorted([30]) e = np.array([2], dtype=np.intp) @@ -1343,7 +1343,7 @@ def test_search_sorted_datetime64_scalar(self): v = pd.Timestamp('20120102') r = s.searchsorted(v) e = 1 - self.assertEqual(r, e) + assert r == e def test_search_sorted_datetime64_list(self): s = Series(pd.date_range('20120101', periods=10, freq='2D')) @@ -1417,7 +1417,7 @@ def test_apply_categorical(self): result = s.apply(lambda x: 'A') exp = pd.Series(['A'] * 7, name='XX', index=list('abcdefg')) tm.assert_series_equal(result, exp) - self.assertEqual(result.dtype, np.object) + assert result.dtype == np.object def test_shift_int(self): ts = self.ts.astype(int) diff --git a/pandas/tests/series/test_api.py b/pandas/tests/series/test_api.py index 7d331f0643b18..5bb463c7a2ebe 100644 --- a/pandas/tests/series/test_api.py +++ b/pandas/tests/series/test_api.py @@ -23,11 +23,11 @@ class SharedWithSparse(object): def test_scalarop_preserve_name(self): result = self.ts * 2 - self.assertEqual(result.name, self.ts.name) + assert result.name == self.ts.name def test_copy_name(self): result = self.ts.copy() - self.assertEqual(result.name, self.ts.name) + assert result.name == self.ts.name def test_copy_index_name_checking(self): # don't want to be able to modify the index stored elsewhere after @@ -44,17 +44,17 @@ def test_copy_index_name_checking(self): def test_append_preserve_name(self): result = self.ts[:5].append(self.ts[5:]) - self.assertEqual(result.name, self.ts.name) + assert result.name == self.ts.name def test_binop_maybe_preserve_name(self): # names match, preserve result = self.ts * self.ts - self.assertEqual(result.name, self.ts.name) + assert result.name == self.ts.name result = self.ts.mul(self.ts) - self.assertEqual(result.name, self.ts.name) + assert result.name == self.ts.name result = self.ts * self.ts[:-2] - self.assertEqual(result.name, self.ts.name) + assert result.name == self.ts.name # names don't match, don't preserve cp = self.ts.copy() @@ -70,7 +70,7 @@ def test_binop_maybe_preserve_name(self): # names match, preserve s = self.ts.copy() result = getattr(s, op)(s) - self.assertEqual(result.name, self.ts.name) + assert result.name == self.ts.name # names don't match, don't preserve cp = self.ts.copy() @@ -80,17 +80,17 @@ def test_binop_maybe_preserve_name(self): def test_combine_first_name(self): result = self.ts.combine_first(self.ts[:5]) - self.assertEqual(result.name, self.ts.name) + assert result.name == self.ts.name def test_getitem_preserve_name(self): result = self.ts[self.ts > 0] - self.assertEqual(result.name, self.ts.name) + assert result.name == self.ts.name result = self.ts[[0, 2, 4]] - self.assertEqual(result.name, self.ts.name) + assert result.name == self.ts.name result = self.ts[5:10] - self.assertEqual(result.name, self.ts.name) + assert result.name == self.ts.name def test_pickle(self): unp_series = self._pickle_roundtrip(self.series) @@ -107,15 +107,15 @@ def _pickle_roundtrip(self, obj): def test_argsort_preserve_name(self): result = self.ts.argsort() - self.assertEqual(result.name, self.ts.name) + assert result.name == self.ts.name def test_sort_index_name(self): result = self.ts.sort_index(ascending=False) - self.assertEqual(result.name, self.ts.name) + assert result.name == self.ts.name def test_to_sparse_pass_name(self): result = self.ts.to_sparse() - self.assertEqual(result.name, self.ts.name) + assert result.name == self.ts.name class TestSeriesMisc(TestData, SharedWithSparse, tm.TestCase): @@ -158,46 +158,47 @@ def test_contains(self): def test_iter(self): for i, val in enumerate(self.series): - self.assertEqual(val, self.series[i]) + assert val == self.series[i] for i, val in enumerate(self.ts): - self.assertEqual(val, self.ts[i]) + assert val == self.ts[i] def test_iter_box(self): vals = [pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-02')] s = pd.Series(vals) - self.assertEqual(s.dtype, 'datetime64[ns]') + assert s.dtype == 'datetime64[ns]' for res, exp in zip(s, vals): assert isinstance(res, pd.Timestamp) - self.assertEqual(res, exp) assert res.tz is None + assert res == exp vals = [pd.Timestamp('2011-01-01', tz='US/Eastern'), pd.Timestamp('2011-01-02', tz='US/Eastern')] s = pd.Series(vals) - self.assertEqual(s.dtype, 'datetime64[ns, US/Eastern]') + + assert s.dtype == 'datetime64[ns, US/Eastern]' for res, exp in zip(s, vals): assert isinstance(res, pd.Timestamp) - self.assertEqual(res, exp) - self.assertEqual(res.tz, exp.tz) + assert res.tz == exp.tz + assert res == exp # timedelta vals = [pd.Timedelta('1 days'), pd.Timedelta('2 days')] s = pd.Series(vals) - self.assertEqual(s.dtype, 'timedelta64[ns]') + assert s.dtype == 'timedelta64[ns]' for res, exp in zip(s, vals): assert isinstance(res, pd.Timedelta) - self.assertEqual(res, exp) + assert res == exp # period (object dtype, not boxed) vals = [pd.Period('2011-01-01', freq='M'), pd.Period('2011-01-02', freq='M')] s = pd.Series(vals) - self.assertEqual(s.dtype, 'object') + assert s.dtype == 'object' for res, exp in zip(s, vals): assert isinstance(res, pd.Period) - self.assertEqual(res, exp) - self.assertEqual(res.freq, 'M') + assert res.freq == 'M' + assert res == exp def test_keys(self): # HACK: By doing this in two stages, we avoid 2to3 wrapping the call @@ -210,10 +211,10 @@ def test_values(self): def test_iteritems(self): for idx, val in compat.iteritems(self.series): - self.assertEqual(val, self.series[idx]) + assert val == self.series[idx] for idx, val in compat.iteritems(self.ts): - self.assertEqual(val, self.ts[idx]) + assert val == self.ts[idx] # assert is lazy (genrators don't define reverse, lists do) assert not hasattr(self.series.iteritems(), 'reverse') @@ -274,9 +275,9 @@ def test_copy(self): def test_axis_alias(self): s = Series([1, 2, np.nan]) assert_series_equal(s.dropna(axis='rows'), s.dropna(axis='index')) - self.assertEqual(s.dropna().sum('rows'), 3) - self.assertEqual(s._get_axis_number('rows'), 0) - self.assertEqual(s._get_axis_name('rows'), 'index') + assert s.dropna().sum('rows') == 3 + assert s._get_axis_number('rows') == 0 + assert s._get_axis_name('rows') == 'index' def test_numpy_unique(self): # it works! @@ -293,19 +294,19 @@ def f(x): result = tsdf.apply(f) expected = tsdf.max() - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) # .item() s = Series([1]) result = s.item() - self.assertEqual(result, 1) - self.assertEqual(s.item(), s.iloc[0]) + assert result == 1 + assert s.item() == s.iloc[0] # using an ndarray like function s = Series(np.random.randn(10)) - result = np.ones_like(s) + result = Series(np.ones_like(s)) expected = Series(1, index=range(10), dtype='float64') - # assert_series_equal(result,expected) + tm.assert_series_equal(result, expected) # ravel s = Series(np.random.randn(10)) @@ -315,21 +316,21 @@ def f(x): # GH 6658 s = Series([0, 1., -1], index=list('abc')) result = np.compress(s > 0, s) - assert_series_equal(result, Series([1.], index=['b'])) + tm.assert_series_equal(result, Series([1.], index=['b'])) result = np.compress(s < -1, s) # result empty Index(dtype=object) as the same as original exp = Series([], dtype='float64', index=Index([], dtype='object')) - assert_series_equal(result, exp) + tm.assert_series_equal(result, exp) s = Series([0, 1., -1], index=[.1, .2, .3]) result = np.compress(s > 0, s) - assert_series_equal(result, Series([1.], index=[.2])) + tm.assert_series_equal(result, Series([1.], index=[.2])) result = np.compress(s < -1, s) # result empty Float64Index as the same as original exp = Series([], dtype='float64', index=Index([], dtype='float64')) - assert_series_equal(result, exp) + tm.assert_series_equal(result, exp) def test_str_attribute(self): # GH9068 diff --git a/pandas/tests/series/test_apply.py b/pandas/tests/series/test_apply.py index c764d7b856bb8..089a2c36a5574 100644 --- a/pandas/tests/series/test_apply.py +++ b/pandas/tests/series/test_apply.py @@ -61,27 +61,27 @@ def test_apply_dont_convert_dtype(self): f = lambda x: x if x > 0 else np.nan result = s.apply(f, convert_dtype=False) - self.assertEqual(result.dtype, object) + assert result.dtype == object def test_with_string_args(self): for arg in ['sum', 'mean', 'min', 'max', 'std']: result = self.ts.apply(arg) expected = getattr(self.ts, arg)() - self.assertEqual(result, expected) + assert result == expected def test_apply_args(self): s = Series(['foo,bar']) result = s.apply(str.split, args=(',', )) - self.assertEqual(result[0], ['foo', 'bar']) + assert result[0] == ['foo', 'bar'] assert isinstance(result[0], list) def test_apply_box(self): # ufunc will not be boxed. Same test cases as the test_map_box vals = [pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-02')] s = pd.Series(vals) - self.assertEqual(s.dtype, 'datetime64[ns]') + assert s.dtype == 'datetime64[ns]' # boxed value must be Timestamp instance res = s.apply(lambda x: '{0}_{1}_{2}'.format(x.__class__.__name__, x.day, x.tz)) @@ -91,7 +91,7 @@ def test_apply_box(self): vals = [pd.Timestamp('2011-01-01', tz='US/Eastern'), pd.Timestamp('2011-01-02', tz='US/Eastern')] s = pd.Series(vals) - self.assertEqual(s.dtype, 'datetime64[ns, US/Eastern]') + assert s.dtype == 'datetime64[ns, US/Eastern]' res = s.apply(lambda x: '{0}_{1}_{2}'.format(x.__class__.__name__, x.day, x.tz)) exp = pd.Series(['Timestamp_1_US/Eastern', 'Timestamp_2_US/Eastern']) @@ -100,7 +100,7 @@ def test_apply_box(self): # timedelta vals = [pd.Timedelta('1 days'), pd.Timedelta('2 days')] s = pd.Series(vals) - self.assertEqual(s.dtype, 'timedelta64[ns]') + assert s.dtype == 'timedelta64[ns]' res = s.apply(lambda x: '{0}_{1}'.format(x.__class__.__name__, x.days)) exp = pd.Series(['Timedelta_1', 'Timedelta_2']) tm.assert_series_equal(res, exp) @@ -109,7 +109,7 @@ def test_apply_box(self): vals = [pd.Period('2011-01-01', freq='M'), pd.Period('2011-01-02', freq='M')] s = pd.Series(vals) - self.assertEqual(s.dtype, 'object') + assert s.dtype == 'object' res = s.apply(lambda x: '{0}_{1}'.format(x.__class__.__name__, x.freqstr)) exp = pd.Series(['Period_M', 'Period_M']) @@ -318,13 +318,13 @@ def test_map(self): merged = target.map(source) for k, v in compat.iteritems(merged): - self.assertEqual(v, source[target[k]]) + assert v == source[target[k]] # input could be a dict merged = target.map(source.to_dict()) for k, v in compat.iteritems(merged): - self.assertEqual(v, source[target[k]]) + assert v == source[target[k]] # function result = self.ts.map(lambda x: x * 2) @@ -372,11 +372,11 @@ def test_map_int(self): left = Series({'a': 1., 'b': 2., 'c': 3., 'd': 4}) right = Series({1: 11, 2: 22, 3: 33}) - self.assertEqual(left.dtype, np.float_) + assert left.dtype == np.float_ assert issubclass(right.dtype.type, np.integer) merged = left.map(right) - self.assertEqual(merged.dtype, np.float_) + assert merged.dtype == np.float_ assert isnull(merged['d']) assert not isnull(merged['c']) @@ -389,7 +389,7 @@ def test_map_decimal(self): from decimal import Decimal result = self.series.map(lambda x: Decimal(str(x))) - self.assertEqual(result.dtype, np.object_) + assert result.dtype == np.object_ assert isinstance(result[0], Decimal) def test_map_na_exclusion(self): @@ -457,7 +457,7 @@ class DictWithoutMissing(dict): def test_map_box(self): vals = [pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-02')] s = pd.Series(vals) - self.assertEqual(s.dtype, 'datetime64[ns]') + assert s.dtype == 'datetime64[ns]' # boxed value must be Timestamp instance res = s.map(lambda x: '{0}_{1}_{2}'.format(x.__class__.__name__, x.day, x.tz)) @@ -467,7 +467,7 @@ def test_map_box(self): vals = [pd.Timestamp('2011-01-01', tz='US/Eastern'), pd.Timestamp('2011-01-02', tz='US/Eastern')] s = pd.Series(vals) - self.assertEqual(s.dtype, 'datetime64[ns, US/Eastern]') + assert s.dtype == 'datetime64[ns, US/Eastern]' res = s.map(lambda x: '{0}_{1}_{2}'.format(x.__class__.__name__, x.day, x.tz)) exp = pd.Series(['Timestamp_1_US/Eastern', 'Timestamp_2_US/Eastern']) @@ -476,7 +476,7 @@ def test_map_box(self): # timedelta vals = [pd.Timedelta('1 days'), pd.Timedelta('2 days')] s = pd.Series(vals) - self.assertEqual(s.dtype, 'timedelta64[ns]') + assert s.dtype == 'timedelta64[ns]' res = s.map(lambda x: '{0}_{1}'.format(x.__class__.__name__, x.days)) exp = pd.Series(['Timedelta_1', 'Timedelta_2']) tm.assert_series_equal(res, exp) @@ -485,7 +485,7 @@ def test_map_box(self): vals = [pd.Period('2011-01-01', freq='M'), pd.Period('2011-01-02', freq='M')] s = pd.Series(vals) - self.assertEqual(s.dtype, 'object') + assert s.dtype == 'object' res = s.map(lambda x: '{0}_{1}'.format(x.__class__.__name__, x.freqstr)) exp = pd.Series(['Period_M', 'Period_M']) @@ -506,7 +506,7 @@ def test_map_categorical(self): result = s.map(lambda x: 'A') exp = pd.Series(['A'] * 7, name='XX', index=list('abcdefg')) tm.assert_series_equal(result, exp) - self.assertEqual(result.dtype, np.object) + assert result.dtype == np.object with pytest.raises(NotImplementedError): s.map(lambda x: x, na_action='ignore') diff --git a/pandas/tests/series/test_asof.py b/pandas/tests/series/test_asof.py index 80556a5e5ffdb..a839d571c116c 100644 --- a/pandas/tests/series/test_asof.py +++ b/pandas/tests/series/test_asof.py @@ -37,7 +37,7 @@ def test_basic(self): assert (rs == ts[lb]).all() val = result[result.index[result.index >= ub][0]] - self.assertEqual(ts[ub], val) + assert ts[ub] == val def test_scalar(self): @@ -50,16 +50,16 @@ def test_scalar(self): val1 = ts.asof(ts.index[7]) val2 = ts.asof(ts.index[19]) - self.assertEqual(val1, ts[4]) - self.assertEqual(val2, ts[14]) + assert val1 == ts[4] + assert val2 == ts[14] # accepts strings val1 = ts.asof(str(ts.index[7])) - self.assertEqual(val1, ts[4]) + assert val1 == ts[4] # in there result = ts.asof(ts.index[3]) - self.assertEqual(result, ts[3]) + assert result == ts[3] # no as of value d = ts.index[0] - offsets.BDay() @@ -118,15 +118,15 @@ def test_periodindex(self): val1 = ts.asof(ts.index[7]) val2 = ts.asof(ts.index[19]) - self.assertEqual(val1, ts[4]) - self.assertEqual(val2, ts[14]) + assert val1 == ts[4] + assert val2 == ts[14] # accepts strings val1 = ts.asof(str(ts.index[7])) - self.assertEqual(val1, ts[4]) + assert val1 == ts[4] # in there - self.assertEqual(ts.asof(ts.index[3]), ts[3]) + assert ts.asof(ts.index[3]) == ts[3] # no as of value d = ts.index[0].to_timestamp() - offsets.BDay() diff --git a/pandas/tests/series/test_combine_concat.py b/pandas/tests/series/test_combine_concat.py index 6042a8c0a2e9d..1291449ae7ce9 100644 --- a/pandas/tests/series/test_combine_concat.py +++ b/pandas/tests/series/test_combine_concat.py @@ -24,9 +24,9 @@ def test_append(self): appendedSeries = self.series.append(self.objSeries) for idx, value in compat.iteritems(appendedSeries): if idx in self.series.index: - self.assertEqual(value, self.series[idx]) + assert value == self.series[idx] elif idx in self.objSeries.index: - self.assertEqual(value, self.objSeries[idx]) + assert value == self.objSeries[idx] else: self.fail("orphaned index!") @@ -117,9 +117,9 @@ def test_concat_empty_series_dtypes_roundtrips(self): 'M8[ns]']) for dtype in dtypes: - self.assertEqual(pd.concat([Series(dtype=dtype)]).dtype, dtype) - self.assertEqual(pd.concat([Series(dtype=dtype), - Series(dtype=dtype)]).dtype, dtype) + assert pd.concat([Series(dtype=dtype)]).dtype == dtype + assert pd.concat([Series(dtype=dtype), + Series(dtype=dtype)]).dtype == dtype def int_result_type(dtype, dtype2): typs = set([dtype.kind, dtype2.kind]) @@ -155,55 +155,52 @@ def get_result_type(dtype, dtype2): expected = get_result_type(dtype, dtype2) result = pd.concat([Series(dtype=dtype), Series(dtype=dtype2) ]).dtype - self.assertEqual(result.kind, expected) + assert result.kind == expected def test_concat_empty_series_dtypes(self): - # bools - self.assertEqual(pd.concat([Series(dtype=np.bool_), - Series(dtype=np.int32)]).dtype, np.int32) - self.assertEqual(pd.concat([Series(dtype=np.bool_), - Series(dtype=np.float32)]).dtype, - np.object_) - - # datetimelike - self.assertEqual(pd.concat([Series(dtype='m8[ns]'), - Series(dtype=np.bool)]).dtype, np.object_) - self.assertEqual(pd.concat([Series(dtype='m8[ns]'), - Series(dtype=np.int64)]).dtype, np.object_) - self.assertEqual(pd.concat([Series(dtype='M8[ns]'), - Series(dtype=np.bool)]).dtype, np.object_) - self.assertEqual(pd.concat([Series(dtype='M8[ns]'), - Series(dtype=np.int64)]).dtype, np.object_) - self.assertEqual(pd.concat([Series(dtype='M8[ns]'), - Series(dtype=np.bool_), - Series(dtype=np.int64)]).dtype, np.object_) + # booleans + assert pd.concat([Series(dtype=np.bool_), + Series(dtype=np.int32)]).dtype == np.int32 + assert pd.concat([Series(dtype=np.bool_), + Series(dtype=np.float32)]).dtype == np.object_ + + # datetime-like + assert pd.concat([Series(dtype='m8[ns]'), + Series(dtype=np.bool)]).dtype == np.object_ + assert pd.concat([Series(dtype='m8[ns]'), + Series(dtype=np.int64)]).dtype == np.object_ + assert pd.concat([Series(dtype='M8[ns]'), + Series(dtype=np.bool)]).dtype == np.object_ + assert pd.concat([Series(dtype='M8[ns]'), + Series(dtype=np.int64)]).dtype == np.object_ + assert pd.concat([Series(dtype='M8[ns]'), + Series(dtype=np.bool_), + Series(dtype=np.int64)]).dtype == np.object_ # categorical - self.assertEqual(pd.concat([Series(dtype='category'), - Series(dtype='category')]).dtype, - 'category') - self.assertEqual(pd.concat([Series(dtype='category'), - Series(dtype='float64')]).dtype, - 'float64') - self.assertEqual(pd.concat([Series(dtype='category'), - Series(dtype='object')]).dtype, 'object') + assert pd.concat([Series(dtype='category'), + Series(dtype='category')]).dtype == 'category' + assert pd.concat([Series(dtype='category'), + Series(dtype='float64')]).dtype == 'float64' + assert pd.concat([Series(dtype='category'), + Series(dtype='object')]).dtype == 'object' # sparse result = pd.concat([Series(dtype='float64').to_sparse(), Series( dtype='float64').to_sparse()]) - self.assertEqual(result.dtype, np.float64) - self.assertEqual(result.ftype, 'float64:sparse') + assert result.dtype == np.float64 + assert result.ftype == 'float64:sparse' result = pd.concat([Series(dtype='float64').to_sparse(), Series( dtype='float64')]) - self.assertEqual(result.dtype, np.float64) - self.assertEqual(result.ftype, 'float64:sparse') + assert result.dtype == np.float64 + assert result.ftype == 'float64:sparse' result = pd.concat([Series(dtype='float64').to_sparse(), Series( dtype='object')]) - self.assertEqual(result.dtype, np.object_) - self.assertEqual(result.ftype, 'object:dense') + assert result.dtype == np.object_ + assert result.ftype == 'object:dense' def test_combine_first_dt64(self): from pandas.core.tools.datetimes import to_datetime @@ -245,7 +242,7 @@ def test_append_concat(self): rng2 = rng.copy() rng1.name = 'foo' rng2.name = 'bar' - self.assertEqual(rng1.append(rng1).name, 'foo') + assert rng1.append(rng1).name == 'foo' assert rng1.append(rng2).name is None def test_append_concat_tz(self): diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 966861fe3c1e4..a0a68a332f735 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -58,11 +58,11 @@ def test_constructor(self): assert tm.equalContents(derived.index, self.ts.index) # Ensure new index is not created - self.assertEqual(id(self.ts.index), id(derived.index)) + assert id(self.ts.index) == id(derived.index) # Mixed type Series mixed = Series(['hello', np.NaN], index=[0, 1]) - self.assertEqual(mixed.dtype, np.object_) + assert mixed.dtype == np.object_ assert mixed[1] is np.NaN assert not self.empty.index.is_all_dates @@ -73,7 +73,7 @@ def test_constructor(self): mixed.name = 'Series' rs = Series(mixed).name xp = 'Series' - self.assertEqual(rs, xp) + assert rs == xp # raise on MultiIndex GH4187 m = MultiIndex.from_arrays([[1, 2], [3, 4]]) @@ -248,10 +248,10 @@ def test_constructor_corner(self): def test_constructor_sanitize(self): s = Series(np.array([1., 1., 8.]), dtype='i8') - self.assertEqual(s.dtype, np.dtype('i8')) + assert s.dtype == np.dtype('i8') s = Series(np.array([1., 1., np.nan]), copy=True, dtype='i8') - self.assertEqual(s.dtype, np.dtype('f8')) + assert s.dtype == np.dtype('f8') def test_constructor_copy(self): # GH15125 @@ -266,15 +266,15 @@ def test_constructor_copy(self): # changes to origin of copy does not affect the copy x[0] = 2. assert not x.equals(y) - self.assertEqual(x[0], 2.) - self.assertEqual(y[0], 1.) + assert x[0] == 2. + assert y[0] == 1. def test_constructor_pass_none(self): s = Series(None, index=lrange(5)) - self.assertEqual(s.dtype, np.float64) + assert s.dtype == np.float64 s = Series(None, index=lrange(5), dtype=object) - self.assertEqual(s.dtype, np.object_) + assert s.dtype == np.object_ # GH 7431 # inference on the index @@ -285,12 +285,12 @@ def test_constructor_pass_none(self): def test_constructor_pass_nan_nat(self): # GH 13467 exp = Series([np.nan, np.nan], dtype=np.float64) - self.assertEqual(exp.dtype, np.float64) + assert exp.dtype == np.float64 tm.assert_series_equal(Series([np.nan, np.nan]), exp) tm.assert_series_equal(Series(np.array([np.nan, np.nan])), exp) exp = Series([pd.NaT, pd.NaT]) - self.assertEqual(exp.dtype, 'datetime64[ns]') + assert exp.dtype == 'datetime64[ns]' tm.assert_series_equal(Series([pd.NaT, pd.NaT]), exp) tm.assert_series_equal(Series(np.array([pd.NaT, pd.NaT])), exp) @@ -310,7 +310,7 @@ def test_constructor_dtype_nocast(self): s2 = Series(s, dtype=np.int64) s2[1] = 5 - self.assertEqual(s[1], 5) + assert s[1] == 5 def test_constructor_datelike_coercion(self): @@ -318,8 +318,8 @@ def test_constructor_datelike_coercion(self): # incorrectly infering on dateimelike looking when object dtype is # specified s = Series([Timestamp('20130101'), 'NOV'], dtype=object) - self.assertEqual(s.iloc[0], Timestamp('20130101')) - self.assertEqual(s.iloc[1], 'NOV') + assert s.iloc[0] == Timestamp('20130101') + assert s.iloc[1] == 'NOV' assert s.dtype == object # the dtype was being reset on the slicing and re-inferred to datetime @@ -361,11 +361,11 @@ def test_constructor_dtype_datetime64(self): s = Series([datetime(2001, 1, 2, 0, 0), iNaT], dtype='M8[ns]') assert isnull(s[1]) - self.assertEqual(s.dtype, 'M8[ns]') + assert s.dtype == 'M8[ns]' s = Series([datetime(2001, 1, 2, 0, 0), nan], dtype='M8[ns]') assert isnull(s[1]) - self.assertEqual(s.dtype, 'M8[ns]') + assert s.dtype == 'M8[ns]' # GH3416 dates = [ @@ -375,10 +375,10 @@ def test_constructor_dtype_datetime64(self): ] s = Series(dates) - self.assertEqual(s.dtype, 'M8[ns]') + assert s.dtype == 'M8[ns]' s.iloc[0] = np.nan - self.assertEqual(s.dtype, 'M8[ns]') + assert s.dtype == 'M8[ns]' # invalid astypes for t in ['s', 'D', 'us', 'ms']: @@ -392,15 +392,15 @@ def test_constructor_dtype_datetime64(self): # invalid dates can be help as object result = Series([datetime(2, 1, 1)]) - self.assertEqual(result[0], datetime(2, 1, 1, 0, 0)) + assert result[0] == datetime(2, 1, 1, 0, 0) result = Series([datetime(3000, 1, 1)]) - self.assertEqual(result[0], datetime(3000, 1, 1, 0, 0)) + assert result[0] == datetime(3000, 1, 1, 0, 0) # don't mix types result = Series([Timestamp('20130101'), 1], index=['a', 'b']) - self.assertEqual(result['a'], Timestamp('20130101')) - self.assertEqual(result['b'], 1) + assert result['a'] == Timestamp('20130101') + assert result['b'] == 1 # GH6529 # coerce datetime64 non-ns properly @@ -426,17 +426,17 @@ def test_constructor_dtype_datetime64(self): dtype=object) series1 = Series(dates2, dates) tm.assert_numpy_array_equal(series1.values, dates2) - self.assertEqual(series1.dtype, object) + assert series1.dtype == object # these will correctly infer a datetime s = Series([None, pd.NaT, '2013-08-05 15:30:00.000001']) - self.assertEqual(s.dtype, 'datetime64[ns]') + assert s.dtype == 'datetime64[ns]' s = Series([np.nan, pd.NaT, '2013-08-05 15:30:00.000001']) - self.assertEqual(s.dtype, 'datetime64[ns]') + assert s.dtype == 'datetime64[ns]' s = Series([pd.NaT, None, '2013-08-05 15:30:00.000001']) - self.assertEqual(s.dtype, 'datetime64[ns]') + assert s.dtype == 'datetime64[ns]' s = Series([pd.NaT, np.nan, '2013-08-05 15:30:00.000001']) - self.assertEqual(s.dtype, 'datetime64[ns]') + assert s.dtype == 'datetime64[ns]' # tz-aware (UTC and other tz's) # GH 8411 @@ -488,11 +488,11 @@ def test_constructor_with_datetime_tz(self): # indexing result = s.iloc[0] - self.assertEqual(result, Timestamp('2013-01-01 00:00:00-0500', - tz='US/Eastern', freq='D')) + assert result == Timestamp('2013-01-01 00:00:00-0500', + tz='US/Eastern', freq='D') result = s[0] - self.assertEqual(result, Timestamp('2013-01-01 00:00:00-0500', - tz='US/Eastern', freq='D')) + assert result == Timestamp('2013-01-01 00:00:00-0500', + tz='US/Eastern', freq='D') result = s[Series([True, True, False], index=s.index)] assert_series_equal(result, s[0:2]) @@ -589,7 +589,7 @@ def test_constructor_periodindex(self): expected = Series(pi.asobject) assert_series_equal(s, expected) - self.assertEqual(s.dtype, 'object') + assert s.dtype == 'object' def test_constructor_dict(self): d = {'a': 0., 'b': 1., 'c': 2.} @@ -693,12 +693,12 @@ class A(OrderedDict): def test_constructor_list_of_tuples(self): data = [(1, 1), (2, 2), (2, 3)] s = Series(data) - self.assertEqual(list(s), data) + assert list(s) == data def test_constructor_tuple_of_tuples(self): data = ((1, 1), (2, 2), (2, 3)) s = Series(data) - self.assertEqual(tuple(s), data) + assert tuple(s) == data def test_constructor_set(self): values = set([1, 2, 3, 4, 5]) @@ -714,80 +714,80 @@ def test_fromDict(self): data = {'a': 0, 'b': '1', 'c': '2', 'd': datetime.now()} series = Series(data) - self.assertEqual(series.dtype, np.object_) + assert series.dtype == np.object_ data = {'a': 0, 'b': '1', 'c': '2', 'd': '3'} series = Series(data) - self.assertEqual(series.dtype, np.object_) + assert series.dtype == np.object_ data = {'a': '0', 'b': '1'} series = Series(data, dtype=float) - self.assertEqual(series.dtype, np.float64) + assert series.dtype == np.float64 def test_fromValue(self): nans = Series(np.NaN, index=self.ts.index) - self.assertEqual(nans.dtype, np.float_) - self.assertEqual(len(nans), len(self.ts)) + assert nans.dtype == np.float_ + assert len(nans) == len(self.ts) strings = Series('foo', index=self.ts.index) - self.assertEqual(strings.dtype, np.object_) - self.assertEqual(len(strings), len(self.ts)) + assert strings.dtype == np.object_ + assert len(strings) == len(self.ts) d = datetime.now() dates = Series(d, index=self.ts.index) - self.assertEqual(dates.dtype, 'M8[ns]') - self.assertEqual(len(dates), len(self.ts)) + assert dates.dtype == 'M8[ns]' + assert len(dates) == len(self.ts) # GH12336 # Test construction of categorical series from value categorical = Series(0, index=self.ts.index, dtype="category") expected = Series(0, index=self.ts.index).astype("category") - self.assertEqual(categorical.dtype, 'category') - self.assertEqual(len(categorical), len(self.ts)) + assert categorical.dtype == 'category' + assert len(categorical) == len(self.ts) tm.assert_series_equal(categorical, expected) def test_constructor_dtype_timedelta64(self): # basic td = Series([timedelta(days=i) for i in range(3)]) - self.assertEqual(td.dtype, 'timedelta64[ns]') + assert td.dtype == 'timedelta64[ns]' td = Series([timedelta(days=1)]) - self.assertEqual(td.dtype, 'timedelta64[ns]') + assert td.dtype == 'timedelta64[ns]' td = Series([timedelta(days=1), timedelta(days=2), np.timedelta64( 1, 's')]) - self.assertEqual(td.dtype, 'timedelta64[ns]') + assert td.dtype == 'timedelta64[ns]' # mixed with NaT td = Series([timedelta(days=1), NaT], dtype='m8[ns]') - self.assertEqual(td.dtype, 'timedelta64[ns]') + assert td.dtype == 'timedelta64[ns]' td = Series([timedelta(days=1), np.nan], dtype='m8[ns]') - self.assertEqual(td.dtype, 'timedelta64[ns]') + assert td.dtype == 'timedelta64[ns]' td = Series([np.timedelta64(300000000), pd.NaT], dtype='m8[ns]') - self.assertEqual(td.dtype, 'timedelta64[ns]') + assert td.dtype == 'timedelta64[ns]' # improved inference # GH5689 td = Series([np.timedelta64(300000000), NaT]) - self.assertEqual(td.dtype, 'timedelta64[ns]') + assert td.dtype == 'timedelta64[ns]' # because iNaT is int, not coerced to timedelta td = Series([np.timedelta64(300000000), iNaT]) - self.assertEqual(td.dtype, 'object') + assert td.dtype == 'object' td = Series([np.timedelta64(300000000), np.nan]) - self.assertEqual(td.dtype, 'timedelta64[ns]') + assert td.dtype == 'timedelta64[ns]' td = Series([pd.NaT, np.timedelta64(300000000)]) - self.assertEqual(td.dtype, 'timedelta64[ns]') + assert td.dtype == 'timedelta64[ns]' td = Series([np.timedelta64(1, 's')]) - self.assertEqual(td.dtype, 'timedelta64[ns]') + assert td.dtype == 'timedelta64[ns]' # these are frequency conversion astypes # for t in ['s', 'D', 'us', 'ms']: @@ -807,17 +807,17 @@ def f(): # leave as object here td = Series([timedelta(days=i) for i in range(3)] + ['foo']) - self.assertEqual(td.dtype, 'object') + assert td.dtype == 'object' # these will correctly infer a timedelta s = Series([None, pd.NaT, '1 Day']) - self.assertEqual(s.dtype, 'timedelta64[ns]') + assert s.dtype == 'timedelta64[ns]' s = Series([np.nan, pd.NaT, '1 Day']) - self.assertEqual(s.dtype, 'timedelta64[ns]') + assert s.dtype == 'timedelta64[ns]' s = Series([pd.NaT, None, '1 Day']) - self.assertEqual(s.dtype, 'timedelta64[ns]') + assert s.dtype == 'timedelta64[ns]' s = Series([pd.NaT, np.nan, '1 Day']) - self.assertEqual(s.dtype, 'timedelta64[ns]') + assert s.dtype == 'timedelta64[ns]' def test_NaT_scalar(self): series = Series([0, 1000, 2000, iNaT], dtype='M8[ns]') @@ -838,7 +838,7 @@ def test_constructor_name_hashable(self): for n in [777, 777., 'name', datetime(2001, 11, 11), (1, ), u"\u05D0"]: for data in [[1, 2, 3], np.ones(3), {'a': 0, 'b': 1}]: s = Series(data, name=n) - self.assertEqual(s.name, n) + assert s.name == n def test_constructor_name_unhashable(self): for n in [['name_list'], np.ones(2), {1: 2}]: @@ -847,7 +847,7 @@ def test_constructor_name_unhashable(self): def test_auto_conversion(self): series = Series(list(date_range('1/1/2000', periods=10))) - self.assertEqual(series.dtype, 'M8[ns]') + assert series.dtype == 'M8[ns]' def test_constructor_cant_cast_datetime64(self): msg = "Cannot cast datetime64 to " diff --git a/pandas/tests/series/test_datetime_values.py b/pandas/tests/series/test_datetime_values.py index 13fa3bc782f89..50914eef1abc8 100644 --- a/pandas/tests/series/test_datetime_values.py +++ b/pandas/tests/series/test_datetime_values.py @@ -50,7 +50,7 @@ def compare(s, name): a = getattr(s.dt, prop) b = get_expected(s, prop) if not (is_list_like(a) and is_list_like(b)): - self.assertEqual(a, b) + assert a == b else: tm.assert_series_equal(a, b) @@ -79,10 +79,9 @@ def compare(s, name): tm.assert_series_equal(result, expected) tz_result = result.dt.tz - self.assertEqual(str(tz_result), 'US/Eastern') + assert str(tz_result) == 'US/Eastern' freq_result = s.dt.freq - self.assertEqual(freq_result, DatetimeIndex(s.values, - freq='infer').freq) + assert freq_result == DatetimeIndex(s.values, freq='infer').freq # let's localize, then convert result = s.dt.tz_localize('UTC').dt.tz_convert('US/Eastern') @@ -149,12 +148,11 @@ def compare(s, name): tm.assert_series_equal(result, expected) tz_result = result.dt.tz - self.assertEqual(str(tz_result), 'CET') + assert str(tz_result) == 'CET' freq_result = s.dt.freq - self.assertEqual(freq_result, DatetimeIndex(s.values, - freq='infer').freq) + assert freq_result == DatetimeIndex(s.values, freq='infer').freq - # timedeltaindex + # timedelta index cases = [Series(timedelta_range('1 day', periods=5), index=list('abcde'), name='xxx'), Series(timedelta_range('1 day 01:23:45', periods=5, @@ -183,8 +181,7 @@ def compare(s, name): assert result.dtype == 'float64' freq_result = s.dt.freq - self.assertEqual(freq_result, TimedeltaIndex(s.values, - freq='infer').freq) + assert freq_result == TimedeltaIndex(s.values, freq='infer').freq # both index = date_range('20130101', periods=3, freq='D') @@ -218,7 +215,7 @@ def compare(s, name): getattr(s.dt, prop) freq_result = s.dt.freq - self.assertEqual(freq_result, PeriodIndex(s.values).freq) + assert freq_result == PeriodIndex(s.values).freq # test limited display api def get_dir(s): @@ -387,7 +384,7 @@ def test_sub_of_datetime_from_TimeSeries(self): b = datetime(1993, 6, 22, 13, 30) a = Series([a]) result = to_timedelta(np.abs(a - b)) - self.assertEqual(result.dtype, 'timedelta64[ns]') + assert result.dtype == 'timedelta64[ns]' def test_between(self): s = Series(bdate_range('1/1/2000', periods=20).asobject) diff --git a/pandas/tests/series/test_indexing.py b/pandas/tests/series/test_indexing.py index 954e80facf848..9f5d80411ed17 100644 --- a/pandas/tests/series/test_indexing.py +++ b/pandas/tests/series/test_indexing.py @@ -41,7 +41,7 @@ def test_get(self): result = s.get(25, 0) expected = 0 - self.assertEqual(result, expected) + assert result == expected s = Series(np.array([43, 48, 60, 48, 50, 51, 50, 45, 57, 48, 56, 45, 51, 39, 55, 43, 54, 52, 51, 54]), @@ -54,21 +54,21 @@ def test_get(self): result = s.get(25, 0) expected = 43 - self.assertEqual(result, expected) + assert result == expected # GH 7407 # with a boolean accessor df = pd.DataFrame({'i': [0] * 3, 'b': [False] * 3}) vc = df.i.value_counts() result = vc.get(99, default='Missing') - self.assertEqual(result, 'Missing') + assert result == 'Missing' vc = df.b.value_counts() result = vc.get(False, default='Missing') - self.assertEqual(result, 3) + assert result == 3 result = vc.get(True, default='Missing') - self.assertEqual(result, 'Missing') + assert result == 'Missing' def test_delitem(self): @@ -137,7 +137,7 @@ def test_pop(self): k = df.iloc[4] result = k.pop('B') - self.assertEqual(result, 4) + assert result == 4 expected = Series([0, 0], index=['A', 'C'], name=4) assert_series_equal(k, expected) @@ -146,15 +146,14 @@ def test_getitem_get(self): idx1 = self.series.index[5] idx2 = self.objSeries.index[5] - self.assertEqual(self.series[idx1], self.series.get(idx1)) - self.assertEqual(self.objSeries[idx2], self.objSeries.get(idx2)) + assert self.series[idx1] == self.series.get(idx1) + assert self.objSeries[idx2] == self.objSeries.get(idx2) - self.assertEqual(self.series[idx1], self.series[5]) - self.assertEqual(self.objSeries[idx2], self.objSeries[5]) + assert self.series[idx1] == self.series[5] + assert self.objSeries[idx2] == self.objSeries[5] - self.assertEqual( - self.series.get(-1), self.series.get(self.series.index[-1])) - self.assertEqual(self.series[5], self.series.get(self.series.index[5])) + assert self.series.get(-1) == self.series.get(self.series.index[-1]) + assert self.series[5] == self.series.get(self.series.index[5]) # missing d = self.ts.index[0] - BDay() @@ -191,7 +190,7 @@ def test_iloc(self): def test_iloc_nonunique(self): s = Series([0, 1, 2], index=[0, 1, 0]) - self.assertEqual(s.iloc[2], 2) + assert s.iloc[2] == 2 def test_getitem_regression(self): s = Series(lrange(5), index=lrange(5)) @@ -218,15 +217,15 @@ def test_getitem_setitem_slice_bug(self): def test_getitem_int64(self): idx = np.int64(5) - self.assertEqual(self.ts[idx], self.ts[5]) + assert self.ts[idx] == self.ts[5] def test_getitem_fancy(self): slice1 = self.series[[1, 2, 3]] slice2 = self.objSeries[[1, 2, 3]] - self.assertEqual(self.series.index[2], slice1.index[1]) - self.assertEqual(self.objSeries.index[2], slice2.index[1]) - self.assertEqual(self.series[2], slice1[1]) - self.assertEqual(self.objSeries[2], slice2[1]) + assert self.series.index[2] == slice1.index[1] + assert self.objSeries.index[2] == slice2.index[1] + assert self.series[2] == slice1[1] + assert self.objSeries[2] == slice2[1] def test_getitem_boolean(self): s = self.series @@ -242,8 +241,8 @@ def test_getitem_boolean_empty(self): s = Series([], dtype=np.int64) s.index.name = 'index_name' s = s[s.isnull()] - self.assertEqual(s.index.name, 'index_name') - self.assertEqual(s.dtype, np.int64) + assert s.index.name == 'index_name' + assert s.dtype == np.int64 # GH5877 # indexing with empty series @@ -421,7 +420,7 @@ def test_getitem_setitem_datetimeindex(self): result = ts["1990-01-01 04:00:00"] expected = ts[4] - self.assertEqual(result, expected) + assert result == expected result = ts.copy() result["1990-01-01 04:00:00"] = 0 @@ -446,7 +445,7 @@ def test_getitem_setitem_datetimeindex(self): # repeat all the above with naive datetimes result = ts[datetime(1990, 1, 1, 4)] expected = ts[4] - self.assertEqual(result, expected) + assert result == expected result = ts.copy() result[datetime(1990, 1, 1, 4)] = 0 @@ -470,7 +469,7 @@ def test_getitem_setitem_datetimeindex(self): result = ts[ts.index[4]] expected = ts[4] - self.assertEqual(result, expected) + assert result == expected result = ts[ts.index[4:8]] expected = ts[4:8] @@ -500,7 +499,7 @@ def test_getitem_setitem_periodindex(self): result = ts["1990-01-01 04"] expected = ts[4] - self.assertEqual(result, expected) + assert result == expected result = ts.copy() result["1990-01-01 04"] = 0 @@ -525,7 +524,7 @@ def test_getitem_setitem_periodindex(self): # GH 2782 result = ts[ts.index[4]] expected = ts[4] - self.assertEqual(result, expected) + assert result == expected result = ts[ts.index[4:8]] expected = ts[4:8] @@ -557,7 +556,7 @@ def test_getitem_setitem_integers(self): # caused bug without test s = Series([1, 2, 3], ['a', 'b', 'c']) - self.assertEqual(s.iloc[0], s['a']) + assert s.iloc[0] == s['a'] s.iloc[0] = 5 self.assertAlmostEqual(s['a'], 5) @@ -573,7 +572,7 @@ def test_getitem_ambiguous_keyerror(self): def test_getitem_unordered_dup(self): obj = Series(lrange(5), index=['c', 'a', 'a', 'b', 'b']) assert is_scalar(obj['c']) - self.assertEqual(obj['c'], 0) + assert obj['c'] == 0 def test_getitem_dups_with_missing(self): @@ -600,7 +599,7 @@ def test_getitem_callable(self): # GH 12533 s = pd.Series(4, index=list('ABCD')) result = s[lambda x: 'A'] - self.assertEqual(result, s.loc['A']) + assert result == s.loc['A'] result = s[lambda x: ['A', 'B']] tm.assert_series_equal(result, s.loc[['A', 'B']]) @@ -687,14 +686,14 @@ def f(): def test_slice_floats2(self): s = Series(np.random.rand(10), index=np.arange(10, 20, dtype=float)) - self.assertEqual(len(s.loc[12.0:]), 8) - self.assertEqual(len(s.loc[12.5:]), 7) + assert len(s.loc[12.0:]) == 8 + assert len(s.loc[12.5:]) == 7 i = np.arange(10, 20, dtype=float) i[2] = 12.2 s.index = i - self.assertEqual(len(s.loc[12.0:]), 8) - self.assertEqual(len(s.loc[12.5:]), 7) + assert len(s.loc[12.0:]) == 8 + assert len(s.loc[12.5:]) == 7 def test_slice_float64(self): @@ -787,23 +786,23 @@ def test_set_value(self): idx = self.ts.index[10] res = self.ts.set_value(idx, 0) assert res is self.ts - self.assertEqual(self.ts[idx], 0) + assert self.ts[idx] == 0 # equiv s = self.series.copy() res = s.set_value('foobar', 0) assert res is s - self.assertEqual(res.index[-1], 'foobar') - self.assertEqual(res['foobar'], 0) + assert res.index[-1] == 'foobar' + assert res['foobar'] == 0 s = self.series.copy() s.loc['foobar'] = 0 - self.assertEqual(s.index[-1], 'foobar') - self.assertEqual(s['foobar'], 0) + assert s.index[-1] == 'foobar' + assert s['foobar'] == 0 def test_setslice(self): sl = self.ts[5:20] - self.assertEqual(len(sl), len(sl.index)) + assert len(sl) == len(sl.index) assert sl.index.is_unique def test_basic_getitem_setitem_corner(self): @@ -853,11 +852,11 @@ def test_basic_getitem_with_labels(self): index=['a', 'b', 'c']) expected = Timestamp('2011-01-01', tz='US/Eastern') result = s.loc['a'] - self.assertEqual(result, expected) + assert result == expected result = s.iloc[0] - self.assertEqual(result, expected) + assert result == expected result = s['a'] - self.assertEqual(result, expected) + assert result == expected def test_basic_setitem_with_labels(self): indices = self.ts.index[[5, 10, 15]] @@ -904,17 +903,17 @@ def test_basic_setitem_with_labels(self): expected = Timestamp('2011-01-03', tz='US/Eastern') s2.loc['a'] = expected result = s2.loc['a'] - self.assertEqual(result, expected) + assert result == expected s2 = s.copy() s2.iloc[0] = expected result = s2.iloc[0] - self.assertEqual(result, expected) + assert result == expected s2 = s.copy() s2['a'] = expected result = s2['a'] - self.assertEqual(result, expected) + assert result == expected def test_loc_getitem(self): inds = self.series.index[[3, 4, 7]] @@ -932,8 +931,8 @@ def test_loc_getitem(self): assert_series_equal(self.series.loc[mask], self.series[mask]) # ask for index value - self.assertEqual(self.ts.loc[d1], self.ts[d1]) - self.assertEqual(self.ts.loc[d2], self.ts[d2]) + assert self.ts.loc[d1] == self.ts[d1] + assert self.ts.loc[d2] == self.ts[d2] def test_loc_getitem_not_monotonic(self): d1, d2 = self.ts.index[[5, 15]] @@ -977,7 +976,7 @@ def test_setitem_with_tz(self): for tz in ['US/Eastern', 'UTC', 'Asia/Tokyo']: orig = pd.Series(pd.date_range('2016-01-01', freq='H', periods=3, tz=tz)) - self.assertEqual(orig.dtype, 'datetime64[ns, {0}]'.format(tz)) + assert orig.dtype == 'datetime64[ns, {0}]'.format(tz) # scalar s = orig.copy() @@ -998,7 +997,7 @@ def test_setitem_with_tz(self): # vector vals = pd.Series([pd.Timestamp('2011-01-01', tz=tz), pd.Timestamp('2012-01-01', tz=tz)], index=[1, 2]) - self.assertEqual(vals.dtype, 'datetime64[ns, {0}]'.format(tz)) + assert vals.dtype == 'datetime64[ns, {0}]'.format(tz) s[[1, 2]] = vals exp = pd.Series([pd.Timestamp('2016-01-01 00:00', tz=tz), @@ -1019,7 +1018,7 @@ def test_setitem_with_tz_dst(self): tz = 'US/Eastern' orig = pd.Series(pd.date_range('2016-11-06', freq='H', periods=3, tz=tz)) - self.assertEqual(orig.dtype, 'datetime64[ns, {0}]'.format(tz)) + assert orig.dtype == 'datetime64[ns, {0}]'.format(tz) # scalar s = orig.copy() @@ -1040,7 +1039,7 @@ def test_setitem_with_tz_dst(self): # vector vals = pd.Series([pd.Timestamp('2011-01-01', tz=tz), pd.Timestamp('2012-01-01', tz=tz)], index=[1, 2]) - self.assertEqual(vals.dtype, 'datetime64[ns, {0}]'.format(tz)) + assert vals.dtype == 'datetime64[ns, {0}]'.format(tz) s[[1, 2]] = vals exp = pd.Series([pd.Timestamp('2016-11-06 00:00', tz=tz), @@ -1107,7 +1106,7 @@ def test_where(self): s[mask] = lrange(2, 7) expected = Series(lrange(2, 7) + lrange(5, 10), dtype=dtype) assert_series_equal(s, expected) - self.assertEqual(s.dtype, expected.dtype) + assert s.dtype == expected.dtype # these are allowed operations, but are upcasted for dtype in [np.int64, np.float64]: @@ -1117,7 +1116,7 @@ def test_where(self): s[mask] = values expected = Series(values + lrange(5, 10), dtype='float64') assert_series_equal(s, expected) - self.assertEqual(s.dtype, expected.dtype) + assert s.dtype == expected.dtype # GH 9731 s = Series(np.arange(10), dtype='int64') @@ -1141,7 +1140,7 @@ def test_where(self): s[mask] = lrange(2, 7) expected = Series(lrange(2, 7) + lrange(5, 10), dtype='int64') assert_series_equal(s, expected) - self.assertEqual(s.dtype, expected.dtype) + assert s.dtype == expected.dtype s = Series(np.arange(10), dtype='int64') mask = s > 5 @@ -1506,8 +1505,8 @@ def test_ix_setitem(self): # set index value self.series.loc[d1] = 4 self.series.loc[d2] = 6 - self.assertEqual(self.series[d1], 4) - self.assertEqual(self.series[d2], 6) + assert self.series[d1] == 4 + assert self.series[d2] == 6 def test_where_numeric_with_string(self): # GH 9280 @@ -1639,7 +1638,7 @@ def test_datetime_indexing(self): pytest.raises(KeyError, s.__getitem__, stamp) s[stamp] = 0 - self.assertEqual(s[stamp], 0) + assert s[stamp] == 0 # not monotonic s = Series(len(index), index=index) @@ -1647,7 +1646,7 @@ def test_datetime_indexing(self): pytest.raises(KeyError, s.__getitem__, stamp) s[stamp] = 0 - self.assertEqual(s[stamp], 0) + assert s[stamp] == 0 def test_timedelta_assignment(self): # GH 8209 @@ -1702,7 +1701,7 @@ def test_underlying_data_conversion(self): df_tmp = df.iloc[ck] # noqa df["bb"].iloc[0] = .15 - self.assertEqual(df['bb'].iloc[0], 0.15) + assert df['bb'].iloc[0] == 0.15 pd.set_option('chained_assignment', 'raise') # GH 3217 @@ -1788,10 +1787,10 @@ def _check_align(a, b, how='left', fill=None): assert_series_equal(aa, ea) assert_series_equal(ab, eb) - self.assertEqual(aa.name, 'ts') - self.assertEqual(ea.name, 'ts') - self.assertEqual(ab.name, 'ts') - self.assertEqual(eb.name, 'ts') + assert aa.name == 'ts' + assert ea.name == 'ts' + assert ab.name == 'ts' + assert eb.name == 'ts' for kind in JOIN_TYPES: _check_align(self.ts[2:], self.ts[:-5], how=kind) @@ -1932,13 +1931,13 @@ def test_reindex(self): subSeries = self.series.reindex(subIndex) for idx, val in compat.iteritems(subSeries): - self.assertEqual(val, self.series[idx]) + assert val == self.series[idx] subIndex2 = self.ts.index[10:20] subTS = self.ts.reindex(subIndex2) for idx, val in compat.iteritems(subTS): - self.assertEqual(val, self.ts[idx]) + assert val == self.ts[idx] stuffSeries = self.ts.reindex(subIndex) assert np.isnan(stuffSeries).all() @@ -1947,7 +1946,7 @@ def test_reindex(self): nonContigIndex = self.ts.index[::2] subNonContig = self.ts.reindex(nonContigIndex) for idx, val in compat.iteritems(subNonContig): - self.assertEqual(val, self.ts[idx]) + assert val == self.ts[idx] # return a copy the same index here result = self.ts.reindex() @@ -2070,11 +2069,11 @@ def test_reindex_int(self): reindexed_int = int_ts.reindex(self.ts.index) # if NaNs introduced - self.assertEqual(reindexed_int.dtype, np.float_) + assert reindexed_int.dtype == np.float_ # NO NaNs introduced reindexed_int = int_ts.reindex(int_ts.index[::2]) - self.assertEqual(reindexed_int.dtype, np.int_) + assert reindexed_int.dtype == np.int_ def test_reindex_bool(self): @@ -2086,11 +2085,11 @@ def test_reindex_bool(self): reindexed_bool = bool_ts.reindex(self.ts.index) # if NaNs introduced - self.assertEqual(reindexed_bool.dtype, np.object_) + assert reindexed_bool.dtype == np.object_ # NO NaNs introduced reindexed_bool = bool_ts.reindex(bool_ts.index[::2]) - self.assertEqual(reindexed_bool.dtype, np.bool_) + assert reindexed_bool.dtype == np.bool_ def test_reindex_bool_pad(self): # fail @@ -2224,8 +2223,8 @@ def test_multilevel_preserve_name(self): result = s['foo'] result2 = s.loc['foo'] - self.assertEqual(result.name, s.name) - self.assertEqual(result2.name, s.name) + assert result.name == s.name + assert result2.name == s.name def test_setitem_scalar_into_readonly_backing_data(self): # GH14359: test that you cannot mutate a read only buffer @@ -2238,12 +2237,7 @@ def test_setitem_scalar_into_readonly_backing_data(self): with pytest.raises(ValueError): series[n] = 1 - self.assertEqual( - array[n], - 0, - msg='even though the ValueError was raised, the underlying' - ' array was still mutated!', - ) + assert array[n] == 0 def test_setitem_slice_into_readonly_backing_data(self): # GH14359: test that you cannot mutate a read only buffer @@ -2280,9 +2274,9 @@ def test_index_unique(self): uniques = self.dups.index.unique() expected = DatetimeIndex([datetime(2000, 1, 2), datetime(2000, 1, 3), datetime(2000, 1, 4), datetime(2000, 1, 5)]) - self.assertEqual(uniques.dtype, 'M8[ns]') # sanity + assert uniques.dtype == 'M8[ns]' # sanity tm.assert_index_equal(uniques, expected) - self.assertEqual(self.dups.index.nunique(), 4) + assert self.dups.index.nunique() == 4 # #2563 assert isinstance(uniques, DatetimeIndex) @@ -2293,22 +2287,22 @@ def test_index_unique(self): expected = DatetimeIndex(expected, name='foo') expected = expected.tz_localize('US/Eastern') assert result.tz is not None - self.assertEqual(result.name, 'foo') + assert result.name == 'foo' tm.assert_index_equal(result, expected) # NaT, note this is excluded arr = [1370745748 + t for t in range(20)] + [tslib.iNaT] idx = DatetimeIndex(arr * 3) tm.assert_index_equal(idx.unique(), DatetimeIndex(arr)) - self.assertEqual(idx.nunique(), 20) - self.assertEqual(idx.nunique(dropna=False), 21) + assert idx.nunique() == 20 + assert idx.nunique(dropna=False) == 21 arr = [Timestamp('2013-06-09 02:42:28') + timedelta(seconds=t) for t in range(20)] + [NaT] idx = DatetimeIndex(arr * 3) tm.assert_index_equal(idx.unique(), DatetimeIndex(arr)) - self.assertEqual(idx.nunique(), 20) - self.assertEqual(idx.nunique(dropna=False), 21) + assert idx.nunique() == 20 + assert idx.nunique(dropna=False) == 21 def test_index_dupes_contains(self): d = datetime(2011, 12, 5, 20, 30) @@ -2339,7 +2333,7 @@ def test_duplicate_dates_indexing(self): # new index ts[datetime(2000, 1, 6)] = 0 - self.assertEqual(ts[datetime(2000, 1, 6)], 0) + assert ts[datetime(2000, 1, 6)] == 0 def test_range_slice(self): idx = DatetimeIndex(['1/1/2000', '1/2/2000', '1/2/2000', '1/3/2000', @@ -2516,11 +2510,11 @@ def test_fancy_getitem(self): s = Series(np.arange(len(dti)), index=dti) - self.assertEqual(s[48], 48) - self.assertEqual(s['1/2/2009'], 48) - self.assertEqual(s['2009-1-2'], 48) - self.assertEqual(s[datetime(2009, 1, 2)], 48) - self.assertEqual(s[lib.Timestamp(datetime(2009, 1, 2))], 48) + assert s[48] == 48 + assert s['1/2/2009'] == 48 + assert s['2009-1-2'] == 48 + assert s[datetime(2009, 1, 2)] == 48 + assert s[lib.Timestamp(datetime(2009, 1, 2))] == 48 pytest.raises(KeyError, s.__getitem__, '2009-1-3') assert_series_equal(s['3/6/2009':'2009-06-05'], @@ -2532,9 +2526,9 @@ def test_fancy_setitem(self): s = Series(np.arange(len(dti)), index=dti) s[48] = -1 - self.assertEqual(s[48], -1) + assert s[48] == -1 s['1/2/2009'] = -2 - self.assertEqual(s[48], -2) + assert s[48] == -2 s['1/2/2009':'2009-06-05'] = -3 assert (s[48:54] == -3).all() @@ -2557,7 +2551,7 @@ def test_dti_reset_index_round_trip(self): dti = DatetimeIndex(start='1/1/2001', end='6/1/2001', freq='D') d1 = DataFrame({'v': np.random.rand(len(dti))}, index=dti) d2 = d1.reset_index() - self.assertEqual(d2.dtypes[0], np.dtype('M8[ns]')) + assert d2.dtypes[0] == np.dtype('M8[ns]') d3 = d2.set_index('index') assert_frame_equal(d1, d3, check_names=False) @@ -2566,8 +2560,8 @@ def test_dti_reset_index_round_trip(self): df = DataFrame([[stamp, 12.1]], columns=['Date', 'Value']) df = df.set_index('Date') - self.assertEqual(df.index[0], stamp) - self.assertEqual(df.reset_index()['Date'][0], stamp) + assert df.index[0] == stamp + assert df.reset_index()['Date'][0] == stamp def test_series_set_value(self): # #1561 @@ -2584,7 +2578,7 @@ def test_series_set_value(self): # s = Series(index[:1], index[:1]) # s2 = s.set_value(dates[1], index[1]) - # self.assertEqual(s2.values.dtype, 'M8[ns]') + # assert s2.values.dtype == 'M8[ns]' @slow def test_slice_locs_indexerror(self): @@ -2669,9 +2663,9 @@ def test_nat_operations(self): # GH 8617 s = Series([0, pd.NaT], dtype='m8[ns]') exp = s[0] - self.assertEqual(s.median(), exp) - self.assertEqual(s.min(), exp) - self.assertEqual(s.max(), exp) + assert s.median() == exp + assert s.min() == exp + assert s.max() == exp def test_round_nat(self): # GH14940 diff --git a/pandas/tests/series/test_internals.py b/pandas/tests/series/test_internals.py index 19170c82953ad..31492a4ab214a 100644 --- a/pandas/tests/series/test_internals.py +++ b/pandas/tests/series/test_internals.py @@ -116,7 +116,7 @@ def test_convert_objects(self): # r = s.copy() # r[0] = np.nan # result = r.convert_objects(convert_dates=True,convert_numeric=False) - # self.assertEqual(result.dtype, 'M8[ns]') + # assert result.dtype == 'M8[ns]' # dateutil parses some single letters into today's value as a date for x in 'abcdefghijklmnopqrstuvwxyz': @@ -282,7 +282,7 @@ def test_convert(self): # r = s.copy() # r[0] = np.nan # result = r._convert(convert_dates=True,convert_numeric=False) - # self.assertEqual(result.dtype, 'M8[ns]') + # assert result.dtype == 'M8[ns]' # dateutil parses some single letters into today's value as a date expected = Series([lib.NaT]) diff --git a/pandas/tests/series/test_io.py b/pandas/tests/series/test_io.py index 7a9d0390a2cfa..24bb3bbc7fc16 100644 --- a/pandas/tests/series/test_io.py +++ b/pandas/tests/series/test_io.py @@ -135,12 +135,12 @@ def test_timeseries_periodindex(self): prng = period_range('1/1/2011', '1/1/2012', freq='M') ts = Series(np.random.randn(len(prng)), prng) new_ts = tm.round_trip_pickle(ts) - self.assertEqual(new_ts.index.freq, 'M') + assert new_ts.index.freq == 'M' def test_pickle_preserve_name(self): for n in [777, 777., 'name', datetime(2001, 11, 11), (1, 2)]: unpickled = self._pickle_roundtrip_name(tm.makeTimeSeries(name=n)) - self.assertEqual(unpickled.name, n) + assert unpickled.name == n def _pickle_roundtrip_name(self, obj): @@ -178,7 +178,7 @@ def test_tolist(self): # datetime64 s = Series(self.ts.index) rs = s.tolist() - self.assertEqual(self.ts.index[0], rs[0]) + assert self.ts.index[0] == rs[0] def test_tolist_np_int(self): # GH10904 diff --git a/pandas/tests/series/test_missing.py b/pandas/tests/series/test_missing.py index 251954b5da05e..9937f6a34172e 100644 --- a/pandas/tests/series/test_missing.py +++ b/pandas/tests/series/test_missing.py @@ -190,7 +190,7 @@ def test_datetime64_tz_fillna(self): idx = pd.DatetimeIndex(['2011-01-01 10:00', pd.NaT, '2011-01-03 10:00', pd.NaT], tz=tz) s = pd.Series(idx) - self.assertEqual(s.dtype, 'datetime64[ns, {0}]'.format(tz)) + assert s.dtype == 'datetime64[ns, {0}]'.format(tz) tm.assert_series_equal(pd.isnull(s), null_loc) result = s.fillna(pd.Timestamp('2011-01-02 10:00')) @@ -485,19 +485,19 @@ def test_timedelta64_nan(self): td1 = td.copy() td1[0] = np.nan assert isnull(td1[0]) - self.assertEqual(td1[0].value, iNaT) + assert td1[0].value == iNaT td1[0] = td[0] assert not isnull(td1[0]) td1[1] = iNaT assert isnull(td1[1]) - self.assertEqual(td1[1].value, iNaT) + assert td1[1].value == iNaT td1[1] = td[1] assert not isnull(td1[1]) td1[2] = NaT assert isnull(td1[2]) - self.assertEqual(td1[2].value, iNaT) + assert td1[2].value == iNaT td1[2] = td[2] assert not isnull(td1[2]) @@ -505,7 +505,7 @@ def test_timedelta64_nan(self): # this doesn't work, not sure numpy even supports it # result = td[(td>np.timedelta64(timedelta(days=3))) & # td= 0 tm.assert_almost_equal(out[0, 0], 0.0) @@ -1033,7 +1033,7 @@ def test_group_var_large_inputs(self): self.algo(out, counts, values, labels) - self.assertEqual(counts[0], 10 ** 6) + assert counts[0] == 10 ** 6 tm.assert_almost_equal(out[0, 0], 1.0 / 12, check_less_precise=True) diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py index cbcc4dc84e6d0..ed0d61cdbbaf9 100644 --- a/pandas/tests/test_base.py +++ b/pandas/tests/test_base.py @@ -83,7 +83,7 @@ def test_slicing_maintains_type(self): def check_result(self, result, expected, klass=None): klass = klass or self.klass assert isinstance(result, klass) - self.assertEqual(result, expected) + assert result == expected class TestPandasDelegate(tm.TestCase): @@ -219,7 +219,7 @@ def check_ops_properties(self, props, filter=None, ignore_failures=False): np.ndarray): tm.assert_numpy_array_equal(result, expected) else: - self.assertEqual(result, expected) + assert result == expected # freq raises AttributeError on an Int64Index because its not # defined we mostly care about Series here anyhow @@ -337,12 +337,12 @@ def test_ops(self): expected = pd.Period(ordinal=getattr(o._values, op)(), freq=o.freq) try: - self.assertEqual(result, expected) + assert result == expected except TypeError: # comparing tz-aware series with np.array results in # TypeError expected = expected.astype('M8[ns]').astype('int64') - self.assertEqual(result.value, expected) + assert result.value == expected def test_nanops(self): # GH 7261 @@ -350,7 +350,7 @@ def test_nanops(self): for klass in [Index, Series]: obj = klass([np.nan, 2.0]) - self.assertEqual(getattr(obj, op)(), 2.0) + assert getattr(obj, op)() == 2.0 obj = klass([np.nan]) assert pd.isnull(getattr(obj, op)()) @@ -360,33 +360,33 @@ def test_nanops(self): obj = klass([pd.NaT, datetime(2011, 11, 1)]) # check DatetimeIndex monotonic path - self.assertEqual(getattr(obj, op)(), datetime(2011, 11, 1)) + assert getattr(obj, op)() == datetime(2011, 11, 1) obj = klass([pd.NaT, datetime(2011, 11, 1), pd.NaT]) # check DatetimeIndex non-monotonic path - self.assertEqual(getattr(obj, op)(), datetime(2011, 11, 1)) + assert getattr(obj, op)(), datetime(2011, 11, 1) # argmin/max obj = Index(np.arange(5, dtype='int64')) - self.assertEqual(obj.argmin(), 0) - self.assertEqual(obj.argmax(), 4) + assert obj.argmin() == 0 + assert obj.argmax() == 4 obj = Index([np.nan, 1, np.nan, 2]) - self.assertEqual(obj.argmin(), 1) - self.assertEqual(obj.argmax(), 3) + assert obj.argmin() == 1 + assert obj.argmax() == 3 obj = Index([np.nan]) - self.assertEqual(obj.argmin(), -1) - self.assertEqual(obj.argmax(), -1) + assert obj.argmin() == -1 + assert obj.argmax() == -1 obj = Index([pd.NaT, datetime(2011, 11, 1), datetime(2011, 11, 2), pd.NaT]) - self.assertEqual(obj.argmin(), 1) - self.assertEqual(obj.argmax(), 2) + assert obj.argmin() == 1 + assert obj.argmax() == 2 obj = Index([pd.NaT]) - self.assertEqual(obj.argmin(), -1) - self.assertEqual(obj.argmax(), -1) + assert obj.argmin() == -1 + assert obj.argmax() == -1 def test_value_counts_unique_nunique(self): for orig in self.objs: @@ -414,7 +414,7 @@ def test_value_counts_unique_nunique(self): o = klass(rep, index=idx, name='a') # check values has the same dtype as the original - self.assertEqual(o.dtype, orig.dtype) + assert o.dtype == orig.dtype expected_s = Series(range(10, 0, -1), index=expected_index, dtype='int64', name='a') @@ -422,7 +422,7 @@ def test_value_counts_unique_nunique(self): result = o.value_counts() tm.assert_series_equal(result, expected_s) assert result.index.name is None - self.assertEqual(result.name, 'a') + assert result.name == 'a' result = o.unique() if isinstance(o, Index): @@ -430,7 +430,7 @@ def test_value_counts_unique_nunique(self): tm.assert_index_equal(result, orig) elif is_datetimetz(o): # datetimetz Series returns array of Timestamp - self.assertEqual(result[0], orig[0]) + assert result[0] == orig[0] for r in result: assert isinstance(r, pd.Timestamp) tm.assert_numpy_array_equal(result, @@ -438,7 +438,7 @@ def test_value_counts_unique_nunique(self): else: tm.assert_numpy_array_equal(result, orig.values) - self.assertEqual(o.nunique(), len(np.unique(o.values))) + assert o.nunique() == len(np.unique(o.values)) def test_value_counts_unique_nunique_null(self): @@ -469,7 +469,7 @@ def test_value_counts_unique_nunique_null(self): values[0:2] = null_obj # check values has the same dtype as the original - self.assertEqual(values.dtype, o.dtype) + assert values.dtype == o.dtype # create repeated values, 'n'th element is repeated by n+1 # times @@ -490,7 +490,7 @@ def test_value_counts_unique_nunique_null(self): o.name = 'a' # check values has the same dtype as the original - self.assertEqual(o.dtype, orig.dtype) + assert o.dtype == orig.dtype # check values correctly have NaN nanloc = np.zeros(len(o), dtype=np.bool) nanloc[:3] = True @@ -510,11 +510,11 @@ def test_value_counts_unique_nunique_null(self): result_s_na = o.value_counts(dropna=False) tm.assert_series_equal(result_s_na, expected_s_na) assert result_s_na.index.name is None - self.assertEqual(result_s_na.name, 'a') + assert result_s_na.name == 'a' result_s = o.value_counts() tm.assert_series_equal(o.value_counts(), expected_s) assert result_s.index.name is None - self.assertEqual(result_s.name, 'a') + assert result_s.name == 'a' result = o.unique() if isinstance(o, Index): @@ -529,10 +529,10 @@ def test_value_counts_unique_nunique_null(self): tm.assert_numpy_array_equal(result[1:], values[2:]) assert pd.isnull(result[0]) - self.assertEqual(result.dtype, orig.dtype) + assert result.dtype == orig.dtype - self.assertEqual(o.nunique(), 8) - self.assertEqual(o.nunique(dropna=False), 9) + assert o.nunique() == 8 + assert o.nunique(dropna=False) == 9 def test_value_counts_inferred(self): klasses = [Index, Series] @@ -549,7 +549,7 @@ def test_value_counts_inferred(self): exp = np.unique(np.array(s_values, dtype=np.object_)) tm.assert_numpy_array_equal(s.unique(), exp) - self.assertEqual(s.nunique(), 4) + assert s.nunique() == 4 # don't sort, have to sort after the fact as not sorting is # platform-dep hist = s.value_counts(sort=False).sort_values() @@ -666,14 +666,14 @@ def test_value_counts_datetime64(self): else: tm.assert_numpy_array_equal(s.unique(), expected) - self.assertEqual(s.nunique(), 3) + assert s.nunique() == 3 # with NaT s = df['dt'].copy() s = klass([v for v in s.values] + [pd.NaT]) result = s.value_counts() - self.assertEqual(result.index.dtype, 'datetime64[ns]') + assert result.index.dtype == 'datetime64[ns]' tm.assert_series_equal(result, expected_s) result = s.value_counts(dropna=False) @@ -681,7 +681,7 @@ def test_value_counts_datetime64(self): tm.assert_series_equal(result, expected_s) unique = s.unique() - self.assertEqual(unique.dtype, 'datetime64[ns]') + assert unique.dtype == 'datetime64[ns]' # numpy_array_equal cannot compare pd.NaT if isinstance(s, Index): @@ -691,8 +691,8 @@ def test_value_counts_datetime64(self): tm.assert_numpy_array_equal(unique[:3], expected) assert pd.isnull(unique[3]) - self.assertEqual(s.nunique(), 3) - self.assertEqual(s.nunique(dropna=False), 4) + assert s.nunique() == 3 + assert s.nunique(dropna=False) == 4 # timedelta64[ns] td = df.dt - df.dt + timedelta(1) @@ -931,7 +931,7 @@ def test_fillna(self): o = klass(values) # check values has the same dtype as the original - self.assertEqual(o.dtype, orig.dtype) + assert o.dtype == orig.dtype result = o.fillna(fill_value) if isinstance(o, Index): @@ -951,14 +951,12 @@ def test_memory_usage(self): # if there are objects, only deep will pick them up assert res_deep > res else: - self.assertEqual(res, res_deep) + assert res == res_deep if isinstance(o, Series): - self.assertEqual( - (o.memory_usage(index=False) + - o.index.memory_usage()), - o.memory_usage(index=True) - ) + assert ((o.memory_usage(index=False) + + o.index.memory_usage()) == + o.memory_usage(index=True)) # sys.getsizeof will call the .memory_usage with # deep=True, and add on some GC overhead diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py index 708ca92c30cac..515ca8d9cedc5 100644 --- a/pandas/tests/test_categorical.py +++ b/pandas/tests/test_categorical.py @@ -35,8 +35,8 @@ def setUp(self): ordered=True) def test_getitem(self): - self.assertEqual(self.factor[0], 'a') - self.assertEqual(self.factor[-1], 'c') + assert self.factor[0] == 'a' + assert self.factor[-1] == 'c' subf = self.factor[[0, 1, 2]] tm.assert_numpy_array_equal(subf._codes, @@ -82,9 +82,9 @@ def test_setitem(self): # int/positional c = self.factor.copy() c[0] = 'b' - self.assertEqual(c[0], 'b') + assert c[0] == 'b' c[-1] = 'a' - self.assertEqual(c[-1], 'a') + assert c[-1] == 'a' # boolean c = self.factor.copy() @@ -110,7 +110,7 @@ def test_setitem_listlike(self): # we are asserting the code result here # which maps to the -1000 category result = c.codes[np.array([100000]).astype(np.int64)] - self.assertEqual(result, np.array([5], dtype='int8')) + tm.assert_numpy_array_equal(result, np.array([5], dtype='int8')) def test_constructor_unsortable(self): @@ -665,7 +665,7 @@ def test_print(self): "Categories (3, object): [a < b < c]"] expected = "\n".join(expected) actual = repr(self.factor) - self.assertEqual(actual, expected) + assert actual == expected def test_big_print(self): factor = Categorical([0, 1, 2, 0, 1, 2] * 100, ['a', 'b', 'c'], @@ -676,24 +676,24 @@ def test_big_print(self): actual = repr(factor) - self.assertEqual(actual, expected) + assert actual == expected def test_empty_print(self): factor = Categorical([], ["a", "b", "c"]) expected = ("[], Categories (3, object): [a, b, c]") # hack because array_repr changed in numpy > 1.6.x actual = repr(factor) - self.assertEqual(actual, expected) + assert actual == expected - self.assertEqual(expected, actual) + assert expected == actual factor = Categorical([], ["a", "b", "c"], ordered=True) expected = ("[], Categories (3, object): [a < b < c]") actual = repr(factor) - self.assertEqual(expected, actual) + assert expected == actual factor = Categorical([], []) expected = ("[], Categories (0, object): []") - self.assertEqual(expected, repr(factor)) + assert expected == repr(factor) def test_print_none_width(self): # GH10087 @@ -702,7 +702,7 @@ def test_print_none_width(self): "dtype: category\nCategories (4, int64): [1, 2, 3, 4]") with option_context("display.width", None): - self.assertEqual(exp, repr(a)) + assert exp == repr(a) def test_unicode_print(self): if PY3: @@ -716,7 +716,7 @@ def test_unicode_print(self): Length: 60 Categories (3, object): [aaaaa, bb, cccc]""" - self.assertEqual(_rep(c), expected) + assert _rep(c) == expected c = pd.Categorical([u'ああああ', u'いいいいい', u'ううううううう'] * 20) expected = u"""\ @@ -724,7 +724,7 @@ def test_unicode_print(self): Length: 60 Categories (3, object): [ああああ, いいいいい, ううううううう]""" # noqa - self.assertEqual(_rep(c), expected) + assert _rep(c) == expected # unicode option should not affect to Categorical, as it doesn't care # the repr width @@ -735,7 +735,7 @@ def test_unicode_print(self): Length: 60 Categories (3, object): [ああああ, いいいいい, ううううううう]""" # noqa - self.assertEqual(_rep(c), expected) + assert _rep(c) == expected def test_periodindex(self): idx1 = PeriodIndex(['2014-01', '2014-01', '2014-02', '2014-02', @@ -1080,7 +1080,7 @@ def test_remove_unused_categories(self): tm.assert_index_equal(out.categories, Index(['B', 'D', 'F'])) exp_codes = np.array([2, -1, 1, 0, 1, 2, -1], dtype=np.int8) tm.assert_numpy_array_equal(out.codes, exp_codes) - self.assertEqual(out.get_values().tolist(), val) + assert out.get_values().tolist() == val alpha = list('abcdefghijklmnopqrstuvwxyz') val = np.random.choice(alpha[::2], 10000).astype('object') @@ -1088,7 +1088,7 @@ def test_remove_unused_categories(self): cat = pd.Categorical(values=val, categories=alpha) out = cat.remove_unused_categories() - self.assertEqual(out.get_values().tolist(), val.tolist()) + assert out.get_values().tolist() == val.tolist() def test_nan_handling(self): @@ -1156,37 +1156,37 @@ def test_min_max(self): cat = Categorical(["a", "b", "c", "d"], ordered=True) _min = cat.min() _max = cat.max() - self.assertEqual(_min, "a") - self.assertEqual(_max, "d") + assert _min == "a" + assert _max == "d" cat = Categorical(["a", "b", "c", "d"], categories=['d', 'c', 'b', 'a'], ordered=True) _min = cat.min() _max = cat.max() - self.assertEqual(_min, "d") - self.assertEqual(_max, "a") + assert _min == "d" + assert _max == "a" cat = Categorical([np.nan, "b", "c", np.nan], categories=['d', 'c', 'b', 'a'], ordered=True) _min = cat.min() _max = cat.max() assert np.isnan(_min) - self.assertEqual(_max, "b") + assert _max == "b" _min = cat.min(numeric_only=True) - self.assertEqual(_min, "c") + assert _min == "c" _max = cat.max(numeric_only=True) - self.assertEqual(_max, "b") + assert _max == "b" cat = Categorical([np.nan, 1, 2, np.nan], categories=[5, 4, 3, 2, 1], ordered=True) _min = cat.min() _max = cat.max() assert np.isnan(_min) - self.assertEqual(_max, 1) + assert _max == 1 _min = cat.min(numeric_only=True) - self.assertEqual(_min, 2) + assert _min == 2 _max = cat.max(numeric_only=True) - self.assertEqual(_max, 1) + assert _max == 1 def test_unique(self): # categories are reordered based on value when ordered=False @@ -1391,7 +1391,7 @@ def test_sort_values_na_position(self): def test_slicing_directly(self): cat = Categorical(["a", "b", "c", "d", "a", "b", "c"]) sliced = cat[3] - self.assertEqual(sliced, "d") + assert sliced == "d" sliced = cat[3:5] expected = Categorical(["d", "a"], categories=['a', 'b', 'c', 'd']) tm.assert_numpy_array_equal(sliced._codes, expected._codes) @@ -1427,7 +1427,7 @@ def test_shift(self): def test_nbytes(self): cat = pd.Categorical([1, 2, 3]) exp = cat._codes.nbytes + cat._categories.values.nbytes - self.assertEqual(cat.nbytes, exp) + assert cat.nbytes == exp def test_memory_usage(self): cat = pd.Categorical([1, 2, 3]) @@ -1661,8 +1661,8 @@ def test_basic(self): # test basic creation / coercion of categoricals s = Series(self.factor, name='A') - self.assertEqual(s.dtype, 'category') - self.assertEqual(len(s), len(self.factor)) + assert s.dtype == 'category' + assert len(s) == len(self.factor) str(s.values) str(s) @@ -1672,14 +1672,14 @@ def test_basic(self): tm.assert_series_equal(result, s) result = df.iloc[:, 0] tm.assert_series_equal(result, s) - self.assertEqual(len(df), len(self.factor)) + assert len(df) == len(self.factor) str(df.values) str(df) df = DataFrame({'A': s}) result = df['A'] tm.assert_series_equal(result, s) - self.assertEqual(len(df), len(self.factor)) + assert len(df) == len(self.factor) str(df.values) str(df) @@ -1689,8 +1689,8 @@ def test_basic(self): result2 = df['B'] tm.assert_series_equal(result1, s) tm.assert_series_equal(result2, s, check_names=False) - self.assertEqual(result2.name, 'B') - self.assertEqual(len(df), len(self.factor)) + assert result2.name == 'B' + assert len(df) == len(self.factor) str(df.values) str(df) @@ -1703,13 +1703,13 @@ def test_basic(self): expected = x.iloc[0].person_name result = x.person_name.iloc[0] - self.assertEqual(result, expected) + assert result == expected result = x.person_name[0] - self.assertEqual(result, expected) + assert result == expected result = x.person_name.loc[0] - self.assertEqual(result, expected) + assert result == expected def test_creation_astype(self): l = ["a", "b", "c", "a"] @@ -1976,11 +1976,11 @@ def test_series_delegations(self): exp_codes = Series([0, 1, 2, 0], dtype='int8') tm.assert_series_equal(s.cat.codes, exp_codes) - self.assertEqual(s.cat.ordered, True) + assert s.cat.ordered s = s.cat.as_unordered() - self.assertEqual(s.cat.ordered, False) + assert not s.cat.ordered s.cat.as_ordered(inplace=True) - self.assertEqual(s.cat.ordered, True) + assert s.cat.ordered # reorder s = Series(Categorical(["a", "b", "c", "a"], ordered=True)) @@ -2058,7 +2058,7 @@ def test_describe(self): # Categoricals should not show up together with numerical columns result = self.cat.describe() - self.assertEqual(len(result.columns), 1) + assert len(result.columns) == 1 # In a frame, describe() for the cat should be the same as for string # arrays (count, unique, top, freq) @@ -2081,75 +2081,75 @@ def test_repr(self): exp = u("0 1\n1 2\n2 3\n3 4\n" + "dtype: category\nCategories (4, int64): [1, 2, 3, 4]") - self.assertEqual(exp, a.__unicode__()) + assert exp == a.__unicode__() a = pd.Series(pd.Categorical(["a", "b"] * 25)) exp = u("0 a\n1 b\n" + " ..\n" + "48 a\n49 b\n" + "Length: 50, dtype: category\nCategories (2, object): [a, b]") with option_context("display.max_rows", 5): - self.assertEqual(exp, repr(a)) + assert exp == repr(a) levs = list("abcdefghijklmnopqrstuvwxyz") a = pd.Series(pd.Categorical( ["a", "b"], categories=levs, ordered=True)) exp = u("0 a\n1 b\n" + "dtype: category\n" "Categories (26, object): [a < b < c < d ... w < x < y < z]") - self.assertEqual(exp, a.__unicode__()) + assert exp == a.__unicode__() def test_categorical_repr(self): c = pd.Categorical([1, 2, 3]) exp = """[1, 2, 3] Categories (3, int64): [1, 2, 3]""" - self.assertEqual(repr(c), exp) + assert repr(c) == exp c = pd.Categorical([1, 2, 3, 1, 2, 3], categories=[1, 2, 3]) exp = """[1, 2, 3, 1, 2, 3] Categories (3, int64): [1, 2, 3]""" - self.assertEqual(repr(c), exp) + assert repr(c) == exp c = pd.Categorical([1, 2, 3, 4, 5] * 10) exp = """[1, 2, 3, 4, 5, ..., 1, 2, 3, 4, 5] Length: 50 Categories (5, int64): [1, 2, 3, 4, 5]""" - self.assertEqual(repr(c), exp) + assert repr(c) == exp c = pd.Categorical(np.arange(20)) exp = """[0, 1, 2, 3, 4, ..., 15, 16, 17, 18, 19] Length: 20 Categories (20, int64): [0, 1, 2, 3, ..., 16, 17, 18, 19]""" - self.assertEqual(repr(c), exp) + assert repr(c) == exp def test_categorical_repr_ordered(self): c = pd.Categorical([1, 2, 3], ordered=True) exp = """[1, 2, 3] Categories (3, int64): [1 < 2 < 3]""" - self.assertEqual(repr(c), exp) + assert repr(c) == exp c = pd.Categorical([1, 2, 3, 1, 2, 3], categories=[1, 2, 3], ordered=True) exp = """[1, 2, 3, 1, 2, 3] Categories (3, int64): [1 < 2 < 3]""" - self.assertEqual(repr(c), exp) + assert repr(c) == exp c = pd.Categorical([1, 2, 3, 4, 5] * 10, ordered=True) exp = """[1, 2, 3, 4, 5, ..., 1, 2, 3, 4, 5] Length: 50 Categories (5, int64): [1 < 2 < 3 < 4 < 5]""" - self.assertEqual(repr(c), exp) + assert repr(c) == exp c = pd.Categorical(np.arange(20), ordered=True) exp = """[0, 1, 2, 3, 4, ..., 15, 16, 17, 18, 19] Length: 20 Categories (20, int64): [0 < 1 < 2 < 3 ... 16 < 17 < 18 < 19]""" - self.assertEqual(repr(c), exp) + assert repr(c) == exp def test_categorical_repr_datetime(self): idx = pd.date_range('2011-01-01 09:00', freq='H', periods=5) @@ -2164,7 +2164,7 @@ def test_categorical_repr_datetime(self): "2011-01-01 10:00:00, 2011-01-01 11:00:00,\n" " 2011-01-01 12:00:00, " "2011-01-01 13:00:00]""") - self.assertEqual(repr(c), exp) + assert repr(c) == exp c = pd.Categorical(idx.append(idx), categories=idx) exp = ( @@ -2177,7 +2177,7 @@ def test_categorical_repr_datetime(self): " 2011-01-01 12:00:00, " "2011-01-01 13:00:00]") - self.assertEqual(repr(c), exp) + assert repr(c) == exp idx = pd.date_range('2011-01-01 09:00', freq='H', periods=5, tz='US/Eastern') @@ -2193,7 +2193,7 @@ def test_categorical_repr_datetime(self): " " "2011-01-01 13:00:00-05:00]") - self.assertEqual(repr(c), exp) + assert repr(c) == exp c = pd.Categorical(idx.append(idx), categories=idx) exp = ( @@ -2209,7 +2209,7 @@ def test_categorical_repr_datetime(self): " " "2011-01-01 13:00:00-05:00]") - self.assertEqual(repr(c), exp) + assert repr(c) == exp def test_categorical_repr_datetime_ordered(self): idx = pd.date_range('2011-01-01 09:00', freq='H', periods=5) @@ -2218,14 +2218,14 @@ def test_categorical_repr_datetime_ordered(self): Categories (5, datetime64[ns]): [2011-01-01 09:00:00 < 2011-01-01 10:00:00 < 2011-01-01 11:00:00 < 2011-01-01 12:00:00 < 2011-01-01 13:00:00]""" # noqa - self.assertEqual(repr(c), exp) + assert repr(c) == exp c = pd.Categorical(idx.append(idx), categories=idx, ordered=True) exp = """[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, 2011-01-01 13:00:00, 2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, 2011-01-01 13:00:00] Categories (5, datetime64[ns]): [2011-01-01 09:00:00 < 2011-01-01 10:00:00 < 2011-01-01 11:00:00 < 2011-01-01 12:00:00 < 2011-01-01 13:00:00]""" # noqa - self.assertEqual(repr(c), exp) + assert repr(c) == exp idx = pd.date_range('2011-01-01 09:00', freq='H', periods=5, tz='US/Eastern') @@ -2235,7 +2235,7 @@ def test_categorical_repr_datetime_ordered(self): 2011-01-01 11:00:00-05:00 < 2011-01-01 12:00:00-05:00 < 2011-01-01 13:00:00-05:00]""" # noqa - self.assertEqual(repr(c), exp) + assert repr(c) == exp c = pd.Categorical(idx.append(idx), categories=idx, ordered=True) exp = """[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00, 2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00] @@ -2243,7 +2243,7 @@ def test_categorical_repr_datetime_ordered(self): 2011-01-01 11:00:00-05:00 < 2011-01-01 12:00:00-05:00 < 2011-01-01 13:00:00-05:00]""" # noqa - self.assertEqual(repr(c), exp) + assert repr(c) == exp def test_categorical_repr_period(self): idx = pd.period_range('2011-01-01 09:00', freq='H', periods=5) @@ -2252,27 +2252,27 @@ def test_categorical_repr_period(self): Categories (5, period[H]): [2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00]""" # noqa - self.assertEqual(repr(c), exp) + assert repr(c) == exp c = pd.Categorical(idx.append(idx), categories=idx) exp = """[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00, 2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00] Categories (5, period[H]): [2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00]""" # noqa - self.assertEqual(repr(c), exp) + assert repr(c) == exp idx = pd.period_range('2011-01', freq='M', periods=5) c = pd.Categorical(idx) exp = """[2011-01, 2011-02, 2011-03, 2011-04, 2011-05] Categories (5, period[M]): [2011-01, 2011-02, 2011-03, 2011-04, 2011-05]""" - self.assertEqual(repr(c), exp) + assert repr(c) == exp c = pd.Categorical(idx.append(idx), categories=idx) exp = """[2011-01, 2011-02, 2011-03, 2011-04, 2011-05, 2011-01, 2011-02, 2011-03, 2011-04, 2011-05] Categories (5, period[M]): [2011-01, 2011-02, 2011-03, 2011-04, 2011-05]""" # noqa - self.assertEqual(repr(c), exp) + assert repr(c) == exp def test_categorical_repr_period_ordered(self): idx = pd.period_range('2011-01-01 09:00', freq='H', periods=5) @@ -2281,27 +2281,27 @@ def test_categorical_repr_period_ordered(self): Categories (5, period[H]): [2011-01-01 09:00 < 2011-01-01 10:00 < 2011-01-01 11:00 < 2011-01-01 12:00 < 2011-01-01 13:00]""" # noqa - self.assertEqual(repr(c), exp) + assert repr(c) == exp c = pd.Categorical(idx.append(idx), categories=idx, ordered=True) exp = """[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00, 2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00] Categories (5, period[H]): [2011-01-01 09:00 < 2011-01-01 10:00 < 2011-01-01 11:00 < 2011-01-01 12:00 < 2011-01-01 13:00]""" # noqa - self.assertEqual(repr(c), exp) + assert repr(c) == exp idx = pd.period_range('2011-01', freq='M', periods=5) c = pd.Categorical(idx, ordered=True) exp = """[2011-01, 2011-02, 2011-03, 2011-04, 2011-05] Categories (5, period[M]): [2011-01 < 2011-02 < 2011-03 < 2011-04 < 2011-05]""" - self.assertEqual(repr(c), exp) + assert repr(c) == exp c = pd.Categorical(idx.append(idx), categories=idx, ordered=True) exp = """[2011-01, 2011-02, 2011-03, 2011-04, 2011-05, 2011-01, 2011-02, 2011-03, 2011-04, 2011-05] Categories (5, period[M]): [2011-01 < 2011-02 < 2011-03 < 2011-04 < 2011-05]""" # noqa - self.assertEqual(repr(c), exp) + assert repr(c) == exp def test_categorical_repr_timedelta(self): idx = pd.timedelta_range('1 days', periods=5) @@ -2309,13 +2309,13 @@ def test_categorical_repr_timedelta(self): exp = """[1 days, 2 days, 3 days, 4 days, 5 days] Categories (5, timedelta64[ns]): [1 days, 2 days, 3 days, 4 days, 5 days]""" - self.assertEqual(repr(c), exp) + assert repr(c) == exp c = pd.Categorical(idx.append(idx), categories=idx) exp = """[1 days, 2 days, 3 days, 4 days, 5 days, 1 days, 2 days, 3 days, 4 days, 5 days] Categories (5, timedelta64[ns]): [1 days, 2 days, 3 days, 4 days, 5 days]""" # noqa - self.assertEqual(repr(c), exp) + assert repr(c) == exp idx = pd.timedelta_range('1 hours', periods=20) c = pd.Categorical(idx) @@ -2325,7 +2325,7 @@ def test_categorical_repr_timedelta(self): 3 days 01:00:00, ..., 16 days 01:00:00, 17 days 01:00:00, 18 days 01:00:00, 19 days 01:00:00]""" # noqa - self.assertEqual(repr(c), exp) + assert repr(c) == exp c = pd.Categorical(idx.append(idx), categories=idx) exp = """[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, 4 days 01:00:00, ..., 15 days 01:00:00, 16 days 01:00:00, 17 days 01:00:00, 18 days 01:00:00, 19 days 01:00:00] @@ -2334,7 +2334,7 @@ def test_categorical_repr_timedelta(self): 3 days 01:00:00, ..., 16 days 01:00:00, 17 days 01:00:00, 18 days 01:00:00, 19 days 01:00:00]""" # noqa - self.assertEqual(repr(c), exp) + assert repr(c) == exp def test_categorical_repr_timedelta_ordered(self): idx = pd.timedelta_range('1 days', periods=5) @@ -2342,13 +2342,13 @@ def test_categorical_repr_timedelta_ordered(self): exp = """[1 days, 2 days, 3 days, 4 days, 5 days] Categories (5, timedelta64[ns]): [1 days < 2 days < 3 days < 4 days < 5 days]""" # noqa - self.assertEqual(repr(c), exp) + assert repr(c) == exp c = pd.Categorical(idx.append(idx), categories=idx, ordered=True) exp = """[1 days, 2 days, 3 days, 4 days, 5 days, 1 days, 2 days, 3 days, 4 days, 5 days] Categories (5, timedelta64[ns]): [1 days < 2 days < 3 days < 4 days < 5 days]""" # noqa - self.assertEqual(repr(c), exp) + assert repr(c) == exp idx = pd.timedelta_range('1 hours', periods=20) c = pd.Categorical(idx, ordered=True) @@ -2358,7 +2358,7 @@ def test_categorical_repr_timedelta_ordered(self): 3 days 01:00:00 ... 16 days 01:00:00 < 17 days 01:00:00 < 18 days 01:00:00 < 19 days 01:00:00]""" # noqa - self.assertEqual(repr(c), exp) + assert repr(c) == exp c = pd.Categorical(idx.append(idx), categories=idx, ordered=True) exp = """[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, 4 days 01:00:00, ..., 15 days 01:00:00, 16 days 01:00:00, 17 days 01:00:00, 18 days 01:00:00, 19 days 01:00:00] @@ -2367,7 +2367,7 @@ def test_categorical_repr_timedelta_ordered(self): 3 days 01:00:00 ... 16 days 01:00:00 < 17 days 01:00:00 < 18 days 01:00:00 < 19 days 01:00:00]""" # noqa - self.assertEqual(repr(c), exp) + assert repr(c) == exp def test_categorical_series_repr(self): s = pd.Series(pd.Categorical([1, 2, 3])) @@ -2377,7 +2377,7 @@ def test_categorical_series_repr(self): dtype: category Categories (3, int64): [1, 2, 3]""" - self.assertEqual(repr(s), exp) + assert repr(s) == exp s = pd.Series(pd.Categorical(np.arange(10))) exp = """0 0 @@ -2393,7 +2393,7 @@ def test_categorical_series_repr(self): dtype: category Categories (10, int64): [0, 1, 2, 3, ..., 6, 7, 8, 9]""" - self.assertEqual(repr(s), exp) + assert repr(s) == exp def test_categorical_series_repr_ordered(self): s = pd.Series(pd.Categorical([1, 2, 3], ordered=True)) @@ -2403,7 +2403,7 @@ def test_categorical_series_repr_ordered(self): dtype: category Categories (3, int64): [1 < 2 < 3]""" - self.assertEqual(repr(s), exp) + assert repr(s) == exp s = pd.Series(pd.Categorical(np.arange(10), ordered=True)) exp = """0 0 @@ -2419,7 +2419,7 @@ def test_categorical_series_repr_ordered(self): dtype: category Categories (10, int64): [0 < 1 < 2 < 3 ... 6 < 7 < 8 < 9]""" - self.assertEqual(repr(s), exp) + assert repr(s) == exp def test_categorical_series_repr_datetime(self): idx = pd.date_range('2011-01-01 09:00', freq='H', periods=5) @@ -2433,7 +2433,7 @@ def test_categorical_series_repr_datetime(self): Categories (5, datetime64[ns]): [2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, 2011-01-01 13:00:00]""" # noqa - self.assertEqual(repr(s), exp) + assert repr(s) == exp idx = pd.date_range('2011-01-01 09:00', freq='H', periods=5, tz='US/Eastern') @@ -2448,7 +2448,7 @@ def test_categorical_series_repr_datetime(self): 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00]""" # noqa - self.assertEqual(repr(s), exp) + assert repr(s) == exp def test_categorical_series_repr_datetime_ordered(self): idx = pd.date_range('2011-01-01 09:00', freq='H', periods=5) @@ -2462,7 +2462,7 @@ def test_categorical_series_repr_datetime_ordered(self): Categories (5, datetime64[ns]): [2011-01-01 09:00:00 < 2011-01-01 10:00:00 < 2011-01-01 11:00:00 < 2011-01-01 12:00:00 < 2011-01-01 13:00:00]""" # noqa - self.assertEqual(repr(s), exp) + assert repr(s) == exp idx = pd.date_range('2011-01-01 09:00', freq='H', periods=5, tz='US/Eastern') @@ -2477,7 +2477,7 @@ def test_categorical_series_repr_datetime_ordered(self): 2011-01-01 11:00:00-05:00 < 2011-01-01 12:00:00-05:00 < 2011-01-01 13:00:00-05:00]""" # noqa - self.assertEqual(repr(s), exp) + assert repr(s) == exp def test_categorical_series_repr_period(self): idx = pd.period_range('2011-01-01 09:00', freq='H', periods=5) @@ -2491,7 +2491,7 @@ def test_categorical_series_repr_period(self): Categories (5, period[H]): [2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00]""" # noqa - self.assertEqual(repr(s), exp) + assert repr(s) == exp idx = pd.period_range('2011-01', freq='M', periods=5) s = pd.Series(pd.Categorical(idx)) @@ -2503,7 +2503,7 @@ def test_categorical_series_repr_period(self): dtype: category Categories (5, period[M]): [2011-01, 2011-02, 2011-03, 2011-04, 2011-05]""" - self.assertEqual(repr(s), exp) + assert repr(s) == exp def test_categorical_series_repr_period_ordered(self): idx = pd.period_range('2011-01-01 09:00', freq='H', periods=5) @@ -2517,7 +2517,7 @@ def test_categorical_series_repr_period_ordered(self): Categories (5, period[H]): [2011-01-01 09:00 < 2011-01-01 10:00 < 2011-01-01 11:00 < 2011-01-01 12:00 < 2011-01-01 13:00]""" # noqa - self.assertEqual(repr(s), exp) + assert repr(s) == exp idx = pd.period_range('2011-01', freq='M', periods=5) s = pd.Series(pd.Categorical(idx, ordered=True)) @@ -2529,7 +2529,7 @@ def test_categorical_series_repr_period_ordered(self): dtype: category Categories (5, period[M]): [2011-01 < 2011-02 < 2011-03 < 2011-04 < 2011-05]""" - self.assertEqual(repr(s), exp) + assert repr(s) == exp def test_categorical_series_repr_timedelta(self): idx = pd.timedelta_range('1 days', periods=5) @@ -2542,7 +2542,7 @@ def test_categorical_series_repr_timedelta(self): dtype: category Categories (5, timedelta64[ns]): [1 days, 2 days, 3 days, 4 days, 5 days]""" - self.assertEqual(repr(s), exp) + assert repr(s) == exp idx = pd.timedelta_range('1 hours', periods=10) s = pd.Series(pd.Categorical(idx)) @@ -2561,7 +2561,7 @@ def test_categorical_series_repr_timedelta(self): 3 days 01:00:00, ..., 6 days 01:00:00, 7 days 01:00:00, 8 days 01:00:00, 9 days 01:00:00]""" # noqa - self.assertEqual(repr(s), exp) + assert repr(s) == exp def test_categorical_series_repr_timedelta_ordered(self): idx = pd.timedelta_range('1 days', periods=5) @@ -2574,7 +2574,7 @@ def test_categorical_series_repr_timedelta_ordered(self): dtype: category Categories (5, timedelta64[ns]): [1 days < 2 days < 3 days < 4 days < 5 days]""" # noqa - self.assertEqual(repr(s), exp) + assert repr(s) == exp idx = pd.timedelta_range('1 hours', periods=10) s = pd.Series(pd.Categorical(idx, ordered=True)) @@ -2593,25 +2593,25 @@ def test_categorical_series_repr_timedelta_ordered(self): 3 days 01:00:00 ... 6 days 01:00:00 < 7 days 01:00:00 < 8 days 01:00:00 < 9 days 01:00:00]""" # noqa - self.assertEqual(repr(s), exp) + assert repr(s) == exp def test_categorical_index_repr(self): idx = pd.CategoricalIndex(pd.Categorical([1, 2, 3])) exp = """CategoricalIndex([1, 2, 3], categories=[1, 2, 3], ordered=False, dtype='category')""" # noqa - self.assertEqual(repr(idx), exp) + assert repr(idx) == exp i = pd.CategoricalIndex(pd.Categorical(np.arange(10))) exp = """CategoricalIndex([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], categories=[0, 1, 2, 3, 4, 5, 6, 7, ...], ordered=False, dtype='category')""" # noqa - self.assertEqual(repr(i), exp) + assert repr(i) == exp def test_categorical_index_repr_ordered(self): i = pd.CategoricalIndex(pd.Categorical([1, 2, 3], ordered=True)) exp = """CategoricalIndex([1, 2, 3], categories=[1, 2, 3], ordered=True, dtype='category')""" # noqa - self.assertEqual(repr(i), exp) + assert repr(i) == exp i = pd.CategoricalIndex(pd.Categorical(np.arange(10), ordered=True)) exp = """CategoricalIndex([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], categories=[0, 1, 2, 3, 4, 5, 6, 7, ...], ordered=True, dtype='category')""" # noqa - self.assertEqual(repr(i), exp) + assert repr(i) == exp def test_categorical_index_repr_datetime(self): idx = pd.date_range('2011-01-01 09:00', freq='H', periods=5) @@ -2621,7 +2621,7 @@ def test_categorical_index_repr_datetime(self): '2011-01-01 13:00:00'], categories=[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, 2011-01-01 13:00:00], ordered=False, dtype='category')""" # noqa - self.assertEqual(repr(i), exp) + assert repr(i) == exp idx = pd.date_range('2011-01-01 09:00', freq='H', periods=5, tz='US/Eastern') @@ -2631,7 +2631,7 @@ def test_categorical_index_repr_datetime(self): '2011-01-01 13:00:00-05:00'], categories=[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00], ordered=False, dtype='category')""" # noqa - self.assertEqual(repr(i), exp) + assert repr(i) == exp def test_categorical_index_repr_datetime_ordered(self): idx = pd.date_range('2011-01-01 09:00', freq='H', periods=5) @@ -2641,7 +2641,7 @@ def test_categorical_index_repr_datetime_ordered(self): '2011-01-01 13:00:00'], categories=[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, 2011-01-01 13:00:00], ordered=True, dtype='category')""" # noqa - self.assertEqual(repr(i), exp) + assert repr(i) == exp idx = pd.date_range('2011-01-01 09:00', freq='H', periods=5, tz='US/Eastern') @@ -2651,7 +2651,7 @@ def test_categorical_index_repr_datetime_ordered(self): '2011-01-01 13:00:00-05:00'], categories=[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00], ordered=True, dtype='category')""" # noqa - self.assertEqual(repr(i), exp) + assert repr(i) == exp i = pd.CategoricalIndex(pd.Categorical(idx.append(idx), ordered=True)) exp = """CategoricalIndex(['2011-01-01 09:00:00-05:00', '2011-01-01 10:00:00-05:00', @@ -2661,24 +2661,24 @@ def test_categorical_index_repr_datetime_ordered(self): '2011-01-01 12:00:00-05:00', '2011-01-01 13:00:00-05:00'], categories=[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00], ordered=True, dtype='category')""" # noqa - self.assertEqual(repr(i), exp) + assert repr(i) == exp def test_categorical_index_repr_period(self): # test all length idx = pd.period_range('2011-01-01 09:00', freq='H', periods=1) i = pd.CategoricalIndex(pd.Categorical(idx)) exp = """CategoricalIndex(['2011-01-01 09:00'], categories=[2011-01-01 09:00], ordered=False, dtype='category')""" # noqa - self.assertEqual(repr(i), exp) + assert repr(i) == exp idx = pd.period_range('2011-01-01 09:00', freq='H', periods=2) i = pd.CategoricalIndex(pd.Categorical(idx)) exp = """CategoricalIndex(['2011-01-01 09:00', '2011-01-01 10:00'], categories=[2011-01-01 09:00, 2011-01-01 10:00], ordered=False, dtype='category')""" # noqa - self.assertEqual(repr(i), exp) + assert repr(i) == exp idx = pd.period_range('2011-01-01 09:00', freq='H', periods=3) i = pd.CategoricalIndex(pd.Categorical(idx)) exp = """CategoricalIndex(['2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00'], categories=[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00], ordered=False, dtype='category')""" # noqa - self.assertEqual(repr(i), exp) + assert repr(i) == exp idx = pd.period_range('2011-01-01 09:00', freq='H', periods=5) i = pd.CategoricalIndex(pd.Categorical(idx)) @@ -2686,7 +2686,7 @@ def test_categorical_index_repr_period(self): '2011-01-01 12:00', '2011-01-01 13:00'], categories=[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00], ordered=False, dtype='category')""" # noqa - self.assertEqual(repr(i), exp) + assert repr(i) == exp i = pd.CategoricalIndex(pd.Categorical(idx.append(idx))) exp = """CategoricalIndex(['2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00', @@ -2695,12 +2695,12 @@ def test_categorical_index_repr_period(self): '2011-01-01 13:00'], categories=[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00], ordered=False, dtype='category')""" # noqa - self.assertEqual(repr(i), exp) + assert repr(i) == exp idx = pd.period_range('2011-01', freq='M', periods=5) i = pd.CategoricalIndex(pd.Categorical(idx)) exp = """CategoricalIndex(['2011-01', '2011-02', '2011-03', '2011-04', '2011-05'], categories=[2011-01, 2011-02, 2011-03, 2011-04, 2011-05], ordered=False, dtype='category')""" # noqa - self.assertEqual(repr(i), exp) + assert repr(i) == exp def test_categorical_index_repr_period_ordered(self): idx = pd.period_range('2011-01-01 09:00', freq='H', periods=5) @@ -2709,18 +2709,18 @@ def test_categorical_index_repr_period_ordered(self): '2011-01-01 12:00', '2011-01-01 13:00'], categories=[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00], ordered=True, dtype='category')""" # noqa - self.assertEqual(repr(i), exp) + assert repr(i) == exp idx = pd.period_range('2011-01', freq='M', periods=5) i = pd.CategoricalIndex(pd.Categorical(idx, ordered=True)) exp = """CategoricalIndex(['2011-01', '2011-02', '2011-03', '2011-04', '2011-05'], categories=[2011-01, 2011-02, 2011-03, 2011-04, 2011-05], ordered=True, dtype='category')""" # noqa - self.assertEqual(repr(i), exp) + assert repr(i) == exp def test_categorical_index_repr_timedelta(self): idx = pd.timedelta_range('1 days', periods=5) i = pd.CategoricalIndex(pd.Categorical(idx)) exp = """CategoricalIndex(['1 days', '2 days', '3 days', '4 days', '5 days'], categories=[1 days 00:00:00, 2 days 00:00:00, 3 days 00:00:00, 4 days 00:00:00, 5 days 00:00:00], ordered=False, dtype='category')""" # noqa - self.assertEqual(repr(i), exp) + assert repr(i) == exp idx = pd.timedelta_range('1 hours', periods=10) i = pd.CategoricalIndex(pd.Categorical(idx)) @@ -2730,13 +2730,13 @@ def test_categorical_index_repr_timedelta(self): '9 days 01:00:00'], categories=[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, 4 days 01:00:00, 5 days 01:00:00, 6 days 01:00:00, 7 days 01:00:00, ...], ordered=False, dtype='category')""" # noqa - self.assertEqual(repr(i), exp) + assert repr(i) == exp def test_categorical_index_repr_timedelta_ordered(self): idx = pd.timedelta_range('1 days', periods=5) i = pd.CategoricalIndex(pd.Categorical(idx, ordered=True)) exp = """CategoricalIndex(['1 days', '2 days', '3 days', '4 days', '5 days'], categories=[1 days 00:00:00, 2 days 00:00:00, 3 days 00:00:00, 4 days 00:00:00, 5 days 00:00:00], ordered=True, dtype='category')""" # noqa - self.assertEqual(repr(i), exp) + assert repr(i) == exp idx = pd.timedelta_range('1 hours', periods=10) i = pd.CategoricalIndex(pd.Categorical(idx, ordered=True)) @@ -2746,7 +2746,7 @@ def test_categorical_index_repr_timedelta_ordered(self): '9 days 01:00:00'], categories=[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, 4 days 01:00:00, 5 days 01:00:00, 6 days 01:00:00, 7 days 01:00:00, ...], ordered=True, dtype='category')""" # noqa - self.assertEqual(repr(i), exp) + assert repr(i) == exp def test_categorical_frame(self): # normal DataFrame @@ -2762,7 +2762,7 @@ def test_categorical_frame(self): 4 2011-01-01 13:00:00-05:00 2011-05""" df = pd.DataFrame({'dt': pd.Categorical(dt), 'p': pd.Categorical(p)}) - self.assertEqual(repr(df), exp) + assert repr(df) == exp def test_info(self): @@ -2800,15 +2800,15 @@ def test_min_max(self): cat = Series(Categorical(["a", "b", "c", "d"], ordered=True)) _min = cat.min() _max = cat.max() - self.assertEqual(_min, "a") - self.assertEqual(_max, "d") + assert _min == "a" + assert _max == "d" cat = Series(Categorical(["a", "b", "c", "d"], categories=[ 'd', 'c', 'b', 'a'], ordered=True)) _min = cat.min() _max = cat.max() - self.assertEqual(_min, "d") - self.assertEqual(_max, "a") + assert _min == "d" + assert _max == "a" cat = Series(Categorical( [np.nan, "b", "c", np.nan], categories=['d', 'c', 'b', 'a' @@ -2816,14 +2816,14 @@ def test_min_max(self): _min = cat.min() _max = cat.max() assert np.isnan(_min) - self.assertEqual(_max, "b") + assert _max == "b" cat = Series(Categorical( [np.nan, 1, 2, np.nan], categories=[5, 4, 3, 2, 1], ordered=True)) _min = cat.min() _max = cat.max() assert np.isnan(_min) - self.assertEqual(_max, 1) + assert _max == 1 def test_mode(self): s = Series(Categorical([1, 1, 2, 4, 5, 5, 5], @@ -3050,7 +3050,7 @@ def test_count(self): s = Series(Categorical([np.nan, 1, 2, np.nan], categories=[5, 4, 3, 2, 1], ordered=True)) result = s.count() - self.assertEqual(result, 2) + assert result == 2 def test_sort_values(self): @@ -3099,13 +3099,13 @@ def test_sort_values(self): res = df.sort_values(by=["string"], ascending=False) exp = np.array(["d", "c", "b", "a"], dtype=np.object_) tm.assert_numpy_array_equal(res["sort"].values.__array__(), exp) - self.assertEqual(res["sort"].dtype, "category") + assert res["sort"].dtype == "category" res = df.sort_values(by=["sort"], ascending=False) exp = df.sort_values(by=["string"], ascending=True) tm.assert_series_equal(res["values"], exp["values"]) - self.assertEqual(res["sort"].dtype, "category") - self.assertEqual(res["unsort"].dtype, "category") + assert res["sort"].dtype == "category" + assert res["unsort"].dtype == "category" # unordered cat, but we allow this df.sort_values(by=["unsort"], ascending=False) @@ -3201,7 +3201,7 @@ def test_slicing_and_getting_ops(self): # single value res_val = df.iloc[2, 0] - self.assertEqual(res_val, exp_val) + assert res_val == exp_val # loc # frame @@ -3221,7 +3221,7 @@ def test_slicing_and_getting_ops(self): # single value res_val = df.loc["j", "cats"] - self.assertEqual(res_val, exp_val) + assert res_val == exp_val # ix # frame @@ -3242,15 +3242,15 @@ def test_slicing_and_getting_ops(self): # single value res_val = df.loc["j", df.columns[0]] - self.assertEqual(res_val, exp_val) + assert res_val == exp_val # iat res_val = df.iat[2, 0] - self.assertEqual(res_val, exp_val) + assert res_val == exp_val # at res_val = df.at["j", "cats"] - self.assertEqual(res_val, exp_val) + assert res_val == exp_val # fancy indexing exp_fancy = df.iloc[[2]] @@ -3262,7 +3262,7 @@ def test_slicing_and_getting_ops(self): # get_value res_val = df.get_value("j", "cats") - self.assertEqual(res_val, exp_val) + assert res_val == exp_val # i : int, slice, or sequence of integers res_row = df.iloc[2] diff --git a/pandas/tests/test_config.py b/pandas/tests/test_config.py index ad5418f4a4a29..ba055b105dc41 100644 --- a/pandas/tests/test_config.py +++ b/pandas/tests/test_config.py @@ -111,9 +111,9 @@ def test_case_insensitive(self): self.cf.register_option('KanBAN', 1, 'doc') assert 'doc' in self.cf.describe_option('kanbaN', _print_desc=False) - self.assertEqual(self.cf.get_option('kanBaN'), 1) + assert self.cf.get_option('kanBaN') == 1 self.cf.set_option('KanBan', 2) - self.assertEqual(self.cf.get_option('kAnBaN'), 2) + assert self.cf.get_option('kAnBaN') == 2 # gets of non-existent keys fail pytest.raises(KeyError, self.cf.get_option, 'no_such_option') @@ -127,8 +127,8 @@ def test_get_option(self): self.cf.register_option('b.b', None, 'doc2') # gets of existing keys succeed - self.assertEqual(self.cf.get_option('a'), 1) - self.assertEqual(self.cf.get_option('b.c'), 'hullo') + assert self.cf.get_option('a') == 1 + assert self.cf.get_option('b.c') == 'hullo' assert self.cf.get_option('b.b') is None # gets of non-existent keys fail @@ -139,17 +139,17 @@ def test_set_option(self): self.cf.register_option('b.c', 'hullo', 'doc2') self.cf.register_option('b.b', None, 'doc2') - self.assertEqual(self.cf.get_option('a'), 1) - self.assertEqual(self.cf.get_option('b.c'), 'hullo') + assert self.cf.get_option('a') == 1 + assert self.cf.get_option('b.c') == 'hullo' assert self.cf.get_option('b.b') is None self.cf.set_option('a', 2) self.cf.set_option('b.c', 'wurld') self.cf.set_option('b.b', 1.1) - self.assertEqual(self.cf.get_option('a'), 2) - self.assertEqual(self.cf.get_option('b.c'), 'wurld') - self.assertEqual(self.cf.get_option('b.b'), 1.1) + assert self.cf.get_option('a') == 2 + assert self.cf.get_option('b.c') == 'wurld' + assert self.cf.get_option('b.b') == 1.1 pytest.raises(KeyError, self.cf.set_option, 'no.such.key', None) @@ -167,15 +167,15 @@ def test_set_option_multiple(self): self.cf.register_option('b.c', 'hullo', 'doc2') self.cf.register_option('b.b', None, 'doc2') - self.assertEqual(self.cf.get_option('a'), 1) - self.assertEqual(self.cf.get_option('b.c'), 'hullo') + assert self.cf.get_option('a') == 1 + assert self.cf.get_option('b.c') == 'hullo' assert self.cf.get_option('b.b') is None self.cf.set_option('a', '2', 'b.c', None, 'b.b', 10.0) - self.assertEqual(self.cf.get_option('a'), '2') + assert self.cf.get_option('a') == '2' assert self.cf.get_option('b.c') is None - self.assertEqual(self.cf.get_option('b.b'), 10.0) + assert self.cf.get_option('b.b') == 10.0 def test_validation(self): self.cf.register_option('a', 1, 'doc', validator=self.cf.is_int) @@ -203,36 +203,36 @@ def test_reset_option(self): self.cf.register_option('a', 1, 'doc', validator=self.cf.is_int) self.cf.register_option('b.c', 'hullo', 'doc2', validator=self.cf.is_str) - self.assertEqual(self.cf.get_option('a'), 1) - self.assertEqual(self.cf.get_option('b.c'), 'hullo') + assert self.cf.get_option('a') == 1 + assert self.cf.get_option('b.c') == 'hullo' self.cf.set_option('a', 2) self.cf.set_option('b.c', 'wurld') - self.assertEqual(self.cf.get_option('a'), 2) - self.assertEqual(self.cf.get_option('b.c'), 'wurld') + assert self.cf.get_option('a') == 2 + assert self.cf.get_option('b.c') == 'wurld' self.cf.reset_option('a') - self.assertEqual(self.cf.get_option('a'), 1) - self.assertEqual(self.cf.get_option('b.c'), 'wurld') + assert self.cf.get_option('a') == 1 + assert self.cf.get_option('b.c') == 'wurld' self.cf.reset_option('b.c') - self.assertEqual(self.cf.get_option('a'), 1) - self.assertEqual(self.cf.get_option('b.c'), 'hullo') + assert self.cf.get_option('a') == 1 + assert self.cf.get_option('b.c') == 'hullo' def test_reset_option_all(self): self.cf.register_option('a', 1, 'doc', validator=self.cf.is_int) self.cf.register_option('b.c', 'hullo', 'doc2', validator=self.cf.is_str) - self.assertEqual(self.cf.get_option('a'), 1) - self.assertEqual(self.cf.get_option('b.c'), 'hullo') + assert self.cf.get_option('a') == 1 + assert self.cf.get_option('b.c') == 'hullo' self.cf.set_option('a', 2) self.cf.set_option('b.c', 'wurld') - self.assertEqual(self.cf.get_option('a'), 2) - self.assertEqual(self.cf.get_option('b.c'), 'wurld') + assert self.cf.get_option('a') == 2 + assert self.cf.get_option('b.c') == 'wurld' self.cf.reset_option("all") - self.assertEqual(self.cf.get_option('a'), 1) - self.assertEqual(self.cf.get_option('b.c'), 'hullo') + assert self.cf.get_option('a') == 1 + assert self.cf.get_option('b.c') == 'hullo' def test_deprecate_option(self): # we can deprecate non-existent options @@ -248,7 +248,7 @@ def test_deprecate_option(self): else: self.fail("Nonexistent option didn't raise KeyError") - self.assertEqual(len(w), 1) # should have raised one warning + assert len(w) == 1 # should have raised one warning assert 'deprecated' in str(w[-1]) # we get the default message self.cf.register_option('a', 1, 'doc', validator=self.cf.is_int) @@ -260,7 +260,7 @@ def test_deprecate_option(self): warnings.simplefilter('always') self.cf.get_option('a') - self.assertEqual(len(w), 1) # should have raised one warning + assert len(w) == 1 # should have raised one warning assert 'eprecated' in str(w[-1]) # we get the default message assert 'nifty_ver' in str(w[-1]) # with the removal_ver quoted @@ -272,51 +272,51 @@ def test_deprecate_option(self): warnings.simplefilter('always') self.cf.get_option('b.c') - self.assertEqual(len(w), 1) # should have raised one warning + assert len(w) == 1 # should have raised one warning assert 'zounds!' in str(w[-1]) # we get the custom message # test rerouting keys self.cf.register_option('d.a', 'foo', 'doc2') self.cf.register_option('d.dep', 'bar', 'doc2') - self.assertEqual(self.cf.get_option('d.a'), 'foo') - self.assertEqual(self.cf.get_option('d.dep'), 'bar') + assert self.cf.get_option('d.a') == 'foo' + assert self.cf.get_option('d.dep') == 'bar' self.cf.deprecate_option('d.dep', rkey='d.a') # reroute d.dep to d.a with warnings.catch_warnings(record=True) as w: warnings.simplefilter('always') - self.assertEqual(self.cf.get_option('d.dep'), 'foo') + assert self.cf.get_option('d.dep') == 'foo' - self.assertEqual(len(w), 1) # should have raised one warning + assert len(w) == 1 # should have raised one warning assert 'eprecated' in str(w[-1]) # we get the custom message with warnings.catch_warnings(record=True) as w: warnings.simplefilter('always') self.cf.set_option('d.dep', 'baz') # should overwrite "d.a" - self.assertEqual(len(w), 1) # should have raised one warning + assert len(w) == 1 # should have raised one warning assert 'eprecated' in str(w[-1]) # we get the custom message with warnings.catch_warnings(record=True) as w: warnings.simplefilter('always') - self.assertEqual(self.cf.get_option('d.dep'), 'baz') + assert self.cf.get_option('d.dep') == 'baz' - self.assertEqual(len(w), 1) # should have raised one warning + assert len(w) == 1 # should have raised one warning assert 'eprecated' in str(w[-1]) # we get the custom message def test_config_prefix(self): with self.cf.config_prefix("base"): self.cf.register_option('a', 1, "doc1") self.cf.register_option('b', 2, "doc2") - self.assertEqual(self.cf.get_option('a'), 1) - self.assertEqual(self.cf.get_option('b'), 2) + assert self.cf.get_option('a') == 1 + assert self.cf.get_option('b') == 2 self.cf.set_option('a', 3) self.cf.set_option('b', 4) - self.assertEqual(self.cf.get_option('a'), 3) - self.assertEqual(self.cf.get_option('b'), 4) + assert self.cf.get_option('a') == 3 + assert self.cf.get_option('b') == 4 - self.assertEqual(self.cf.get_option('base.a'), 3) - self.assertEqual(self.cf.get_option('base.b'), 4) + assert self.cf.get_option('base.a') == 3 + assert self.cf.get_option('base.b') == 4 assert 'doc1' in self.cf.describe_option('base.a', _print_desc=False) assert 'doc2' in self.cf.describe_option('base.b', _print_desc=False) @@ -324,8 +324,8 @@ def test_config_prefix(self): self.cf.reset_option('base.b') with self.cf.config_prefix("base"): - self.assertEqual(self.cf.get_option('a'), 1) - self.assertEqual(self.cf.get_option('b'), 2) + assert self.cf.get_option('a') == 1 + assert self.cf.get_option('b') == 2 def test_callback(self): k = [None] @@ -340,21 +340,21 @@ def callback(key): del k[-1], v[-1] self.cf.set_option("d.a", "fooz") - self.assertEqual(k[-1], "d.a") - self.assertEqual(v[-1], "fooz") + assert k[-1] == "d.a" + assert v[-1] == "fooz" del k[-1], v[-1] self.cf.set_option("d.b", "boo") - self.assertEqual(k[-1], "d.b") - self.assertEqual(v[-1], "boo") + assert k[-1] == "d.b" + assert v[-1] == "boo" del k[-1], v[-1] self.cf.reset_option("d.b") - self.assertEqual(k[-1], "d.b") + assert k[-1] == "d.b" def test_set_ContextManager(self): def eq(val): - self.assertEqual(self.cf.get_option("a"), val) + assert self.cf.get_option("a") == val self.cf.register_option('a', 0) eq(0) @@ -384,22 +384,22 @@ def f3(key): self.cf.register_option('c', 0, cb=f3) options = self.cf.options - self.assertEqual(options.a, 0) + assert options.a == 0 with self.cf.option_context("a", 15): - self.assertEqual(options.a, 15) + assert options.a == 15 options.a = 500 - self.assertEqual(self.cf.get_option("a"), 500) + assert self.cf.get_option("a") == 500 self.cf.reset_option("a") - self.assertEqual(options.a, self.cf.get_option("a", 0)) + assert options.a == self.cf.get_option("a", 0) pytest.raises(KeyError, f) pytest.raises(KeyError, f2) # make sure callback kicks when using this form of setting options.c = 1 - self.assertEqual(len(holder), 1) + assert len(holder) == 1 def test_option_context_scope(self): # Ensure that creating a context does not affect the existing @@ -414,11 +414,11 @@ def test_option_context_scope(self): # Ensure creating contexts didn't affect the current context. ctx = self.cf.option_context(option_name, context_value) - self.assertEqual(self.cf.get_option(option_name), original_value) + assert self.cf.get_option(option_name) == original_value # Ensure the correct value is available inside the context. with ctx: - self.assertEqual(self.cf.get_option(option_name), context_value) + assert self.cf.get_option(option_name) == context_value # Ensure the current context is reset - self.assertEqual(self.cf.get_option(option_name), original_value) + assert self.cf.get_option(option_name) == original_value diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index e4ed194b75bcd..5b2057f830102 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -259,7 +259,7 @@ def test_series_getitem(self): result = s[2000, 3, 10] expected = s[49] - self.assertEqual(result, expected) + assert result == expected # fancy expected = s.reindex(s.index[49:51]) @@ -404,9 +404,9 @@ def test_frame_setitem_multi_column(self): sliced_b1 = df['B', '1'] tm.assert_series_equal(sliced_a1, sliced_b1, check_names=False) tm.assert_series_equal(sliced_a2, sliced_b1, check_names=False) - self.assertEqual(sliced_a1.name, ('A', '1')) - self.assertEqual(sliced_a2.name, ('A', '2')) - self.assertEqual(sliced_b1.name, ('B', '1')) + assert sliced_a1.name == ('A', '1') + assert sliced_a2.name == ('A', '2') + assert sliced_b1.name == ('B', '1') def test_getitem_tuple_plus_slice(self): # GH #671 @@ -557,7 +557,7 @@ def test_xs_level0(self): result = df.xs('a', level=0) expected = df.xs('a') - self.assertEqual(len(result), 2) + assert len(result) == 2 tm.assert_frame_equal(result, expected) def test_xs_level_series(self): @@ -667,19 +667,19 @@ def test_setitem_change_dtype(self): def test_frame_setitem_ix(self): self.frame.loc[('bar', 'two'), 'B'] = 5 - self.assertEqual(self.frame.loc[('bar', 'two'), 'B'], 5) + assert self.frame.loc[('bar', 'two'), 'B'] == 5 # with integer labels df = self.frame.copy() df.columns = lrange(3) df.loc[('bar', 'two'), 1] = 7 - self.assertEqual(df.loc[('bar', 'two'), 1], 7) + assert df.loc[('bar', 'two'), 1] == 7 with catch_warnings(record=True): df = self.frame.copy() df.columns = lrange(3) df.ix[('bar', 'two'), 1] = 7 - self.assertEqual(df.loc[('bar', 'two'), 1], 7) + assert df.loc[('bar', 'two'), 1] == 7 def test_fancy_slice_partial(self): result = self.frame.loc['bar':'baz'] @@ -724,12 +724,11 @@ def test_delevel_infer_dtype(self): def test_reset_index_with_drop(self): deleveled = self.ymd.reset_index(drop=True) - self.assertEqual(len(deleveled.columns), len(self.ymd.columns)) + assert len(deleveled.columns) == len(self.ymd.columns) deleveled = self.series.reset_index() assert isinstance(deleveled, DataFrame) - self.assertEqual(len(deleveled.columns), - len(self.series.index.levels) + 1) + assert len(deleveled.columns) == len(self.series.index.levels) + 1 deleveled = self.series.reset_index(drop=True) assert isinstance(deleveled, Series) @@ -942,7 +941,7 @@ def test_stack_mixed_dtype(self): result = df['foo'].stack().sort_index() tm.assert_series_equal(stacked['foo'], result, check_names=False) assert result.name is None - self.assertEqual(stacked['bar'].dtype, np.float_) + assert stacked['bar'].dtype == np.float_ def test_unstack_bug(self): df = DataFrame({'state': ['naive', 'naive', 'naive', 'activ', 'activ', @@ -961,11 +960,11 @@ def test_unstack_bug(self): def test_stack_unstack_preserve_names(self): unstacked = self.frame.unstack() - self.assertEqual(unstacked.index.name, 'first') - self.assertEqual(unstacked.columns.names, ['exp', 'second']) + assert unstacked.index.name == 'first' + assert unstacked.columns.names == ['exp', 'second'] restacked = unstacked.stack() - self.assertEqual(restacked.index.names, self.frame.index.names) + assert restacked.index.names == self.frame.index.names def test_unstack_level_name(self): result = self.frame.unstack('second') @@ -986,7 +985,7 @@ def test_stack_unstack_multiple(self): unstacked = self.ymd.unstack(['year', 'month']) expected = self.ymd.unstack('year').unstack('month') tm.assert_frame_equal(unstacked, expected) - self.assertEqual(unstacked.columns.names, expected.columns.names) + assert unstacked.columns.names == expected.columns.names # series s = self.ymd['A'] @@ -998,7 +997,7 @@ def test_stack_unstack_multiple(self): restacked = restacked.sort_index(level=0) tm.assert_frame_equal(restacked, self.ymd) - self.assertEqual(restacked.index.names, self.ymd.index.names) + assert restacked.index.names == self.ymd.index.names # GH #451 unstacked = self.ymd.unstack([1, 2]) @@ -1191,7 +1190,7 @@ def test_unstack_unobserved_keys(self): df = DataFrame(np.random.randn(4, 2), index=index) result = df.unstack() - self.assertEqual(len(result.columns), 4) + assert len(result.columns) == 4 recons = result.stack() tm.assert_frame_equal(recons, df) @@ -1351,12 +1350,12 @@ def test_count(self): result = series.count(level='b') expect = self.series.count(level=1) tm.assert_series_equal(result, expect, check_names=False) - self.assertEqual(result.index.name, 'b') + assert result.index.name == 'b' result = series.count(level='a') expect = self.series.count(level=0) tm.assert_series_equal(result, expect, check_names=False) - self.assertEqual(result.index.name, 'a') + assert result.index.name == 'a' pytest.raises(KeyError, series.count, 'x') pytest.raises(KeyError, frame.count, level='x') @@ -1465,7 +1464,7 @@ def test_groupby_multilevel(self): # TODO groupby with level_values drops names tm.assert_frame_equal(result, expected, check_names=False) - self.assertEqual(result.index.names, self.ymd.index.names[:2]) + assert result.index.names == self.ymd.index.names[:2] result2 = self.ymd.groupby(level=self.ymd.index.names[:2]).mean() tm.assert_frame_equal(result, result2) @@ -1483,13 +1482,13 @@ def test_multilevel_consolidate(self): def test_ix_preserve_names(self): result = self.ymd.loc[2000] result2 = self.ymd['A'].loc[2000] - self.assertEqual(result.index.names, self.ymd.index.names[1:]) - self.assertEqual(result2.index.names, self.ymd.index.names[1:]) + assert result.index.names == self.ymd.index.names[1:] + assert result2.index.names == self.ymd.index.names[1:] result = self.ymd.loc[2000, 2] result2 = self.ymd['A'].loc[2000, 2] - self.assertEqual(result.index.name, self.ymd.index.names[2]) - self.assertEqual(result2.index.name, self.ymd.index.names[2]) + assert result.index.name == self.ymd.index.names[2] + assert result2.index.name == self.ymd.index.names[2] def test_partial_set(self): # GH #397 @@ -1509,7 +1508,7 @@ def test_partial_set(self): # this works...for now df['A'].iloc[14] = 5 - self.assertEqual(df['A'][14], 5) + assert df['A'][14] == 5 def test_unstack_preserve_types(self): # GH #403 @@ -1517,9 +1516,9 @@ def test_unstack_preserve_types(self): self.ymd['F'] = 2 unstacked = self.ymd.unstack('month') - self.assertEqual(unstacked['A', 1].dtype, np.float64) - self.assertEqual(unstacked['E', 1].dtype, np.object_) - self.assertEqual(unstacked['F', 1].dtype, np.float64) + assert unstacked['A', 1].dtype == np.float64 + assert unstacked['E', 1].dtype == np.object_ + assert unstacked['F', 1].dtype == np.float64 def test_unstack_group_index_overflow(self): labels = np.tile(np.arange(500), 2) @@ -1530,7 +1529,7 @@ def test_unstack_group_index_overflow(self): s = Series(np.arange(1000), index=index) result = s.unstack() - self.assertEqual(result.shape, (500, 2)) + assert result.shape == (500, 2) # test roundtrip stacked = result.stack() @@ -1542,7 +1541,7 @@ def test_unstack_group_index_overflow(self): s = Series(np.arange(1000), index=index) result = s.unstack(0) - self.assertEqual(result.shape, (500, 2)) + assert result.shape == (500, 2) # put it in middle index = MultiIndex(levels=[level] * 4 + [[0, 1]] + [level] * 4, @@ -1551,7 +1550,7 @@ def test_unstack_group_index_overflow(self): s = Series(np.arange(1000), index=index) result = s.unstack(4) - self.assertEqual(result.shape, (500, 2)) + assert result.shape == (500, 2) def test_getitem_lowerdim_corner(self): pytest.raises(KeyError, self.frame.loc.__getitem__, @@ -1559,7 +1558,7 @@ def test_getitem_lowerdim_corner(self): # in theory should be inserting in a sorted space???? self.frame.loc[('bar', 'three'), 'B'] = 0 - self.assertEqual(self.frame.sort_index().loc[('bar', 'three'), 'B'], 0) + assert self.frame.sort_index().loc[('bar', 'three'), 'B'] == 0 # --------------------------------------------------------------------- # AMBIGUOUS CASES! @@ -1659,12 +1658,12 @@ def test_mixed_depth_get(self): result = df['a'] expected = df['a', '', ''] tm.assert_series_equal(result, expected, check_names=False) - self.assertEqual(result.name, 'a') + assert result.name == 'a' result = df['routine1', 'result1'] expected = df['routine1', 'result1', ''] tm.assert_series_equal(result, expected, check_names=False) - self.assertEqual(result.name, ('routine1', 'result1')) + assert result.name == ('routine1', 'result1') def test_mixed_depth_insert(self): arrays = [['a', 'top', 'top', 'routine1', 'routine1', 'routine2'], @@ -1747,7 +1746,7 @@ def test_mixed_depth_pop(self): expected = df2.pop(('a', '', '')) tm.assert_series_equal(expected, result, check_names=False) tm.assert_frame_equal(df1, df2) - self.assertEqual(result.name, 'a') + assert result.name == 'a' expected = df1['top'] df1 = df1.drop(['top'], axis=1) @@ -1845,7 +1844,7 @@ def test_drop_preserve_names(self): df = DataFrame(np.random.randn(6, 3), index=index) result = df.drop([(0, 2)]) - self.assertEqual(result.index.names, ('one', 'two')) + assert result.index.names == ('one', 'two') def test_unicode_repr_issues(self): levels = [Index([u('a/\u03c3'), u('b/\u03c3'), u('c/\u03c3')]), @@ -1944,9 +1943,9 @@ def test_indexing_over_hashtable_size_cutoff(self): MultiIndex.from_arrays((["a"] * n, np.arange(n)))) # hai it works! - self.assertEqual(s[("a", 5)], 5) - self.assertEqual(s[("a", 6)], 6) - self.assertEqual(s[("a", 7)], 7) + assert s[("a", 5)] == 5 + assert s[("a", 6)] == 6 + assert s[("a", 7)] == 7 _index._SIZE_CUTOFF = old_cutoff @@ -1998,7 +1997,7 @@ def test_duplicate_groupby_issues(self): s = Series(dt, index=idx) result = s.groupby(s.index).first() - self.assertEqual(len(result), 3) + assert len(result) == 3 def test_duplicate_mi(self): # GH 4516 @@ -2353,7 +2352,7 @@ class TestSorted(Base, tm.TestCase): def test_sort_index_preserve_levels(self): result = self.frame.sort_index() - self.assertEqual(result.index.names, self.frame.index.names) + assert result.index.names == self.frame.index.names def test_sorting_repr_8017(self): @@ -2375,7 +2374,7 @@ def test_sorting_repr_8017(self): # check that the repr is good # make sure that we have a correct sparsified repr # e.g. only 1 header of read - self.assertEqual(str(df2).splitlines()[0].split(), ['red']) + assert str(df2).splitlines()[0].split() == ['red'] # GH 8017 # sorting fails after columns added @@ -2406,7 +2405,7 @@ def test_sort_index_level(self): a_sorted = self.frame['A'].sort_index(level=0) # preserve names - self.assertEqual(a_sorted.index.names, self.frame.index.names) + assert a_sorted.index.names == self.frame.index.names # inplace rs = self.frame.copy() @@ -2469,7 +2468,7 @@ def test_is_lexsorted(self): index = MultiIndex(levels=levels, labels=[[0, 0, 1, 0, 1, 1], [0, 1, 0, 2, 2, 1]]) assert not index.is_lexsorted() - self.assertEqual(index.lexsort_depth, 0) + assert index.lexsort_depth == 0 def test_getitem_multilevel_index_tuple_not_sorted(self): index_columns = list("abc") diff --git a/pandas/tests/test_nanops.py b/pandas/tests/test_nanops.py index 92d7f29366c69..35d0198ae06a9 100644 --- a/pandas/tests/test_nanops.py +++ b/pandas/tests/test_nanops.py @@ -346,8 +346,8 @@ def test_nanmean_overflow(self): s = Series(a, index=range(500), dtype=np.int64) result = s.mean() np_result = s.values.mean() - self.assertEqual(result, a) - self.assertEqual(result, np_result) + assert result == a + assert result == np_result assert result.dtype == np.float64 def test_returned_dtype(self): @@ -746,12 +746,13 @@ class TestEnsureNumeric(tm.TestCase): def test_numeric_values(self): # Test integer - self.assertEqual(nanops._ensure_numeric(1), 1, 'Failed for int') + assert nanops._ensure_numeric(1) == 1 + # Test float - self.assertEqual(nanops._ensure_numeric(1.1), 1.1, 'Failed for float') + assert nanops._ensure_numeric(1.1) == 1.1 + # Test complex - self.assertEqual(nanops._ensure_numeric(1 + 2j), 1 + 2j, - 'Failed for complex') + assert nanops._ensure_numeric(1 + 2j) == 1 + 2j def test_ndarray(self): # Test numeric ndarray @@ -887,7 +888,7 @@ def test_nanstd_roundoff(self): data = Series(766897346 * np.ones(10)) for ddof in range(3): result = data.std(ddof=ddof) - self.assertEqual(result, 0.0) + assert result == 0.0 @property def prng(self): @@ -908,7 +909,7 @@ def test_constant_series(self): for val in [3075.2, 3075.3, 3075.5]: data = val * np.ones(300) skew = nanops.nanskew(data) - self.assertEqual(skew, 0.0) + assert skew == 0.0 def test_all_finite(self): alpha, beta = 0.3, 0.1 @@ -958,7 +959,7 @@ def test_constant_series(self): for val in [3075.2, 3075.3, 3075.5]: data = val * np.ones(300) kurt = nanops.nankurt(data) - self.assertEqual(kurt, 0.0) + assert kurt == 0.0 def test_all_finite(self): alpha, beta = 0.3, 0.1 diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index c9894ad9a9acf..a692f6b26c61e 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -222,9 +222,9 @@ def test_set_axis(self): assert self.panel.minor_axis is new_minor def test_get_axis_number(self): - self.assertEqual(self.panel._get_axis_number('items'), 0) - self.assertEqual(self.panel._get_axis_number('major'), 1) - self.assertEqual(self.panel._get_axis_number('minor'), 2) + assert self.panel._get_axis_number('items') == 0 + assert self.panel._get_axis_number('major') == 1 + assert self.panel._get_axis_number('minor') == 2 with tm.assert_raises_regex(ValueError, "No axis named foo"): self.panel._get_axis_number('foo') @@ -233,9 +233,9 @@ def test_get_axis_number(self): self.panel.__ge__(self.panel, axis='foo') def test_get_axis_name(self): - self.assertEqual(self.panel._get_axis_name(0), 'items') - self.assertEqual(self.panel._get_axis_name(1), 'major_axis') - self.assertEqual(self.panel._get_axis_name(2), 'minor_axis') + assert self.panel._get_axis_name(0) == 'items' + assert self.panel._get_axis_name(1) == 'major_axis' + assert self.panel._get_axis_name(2) == 'minor_axis' def test_get_plane_axes(self): # what to do here? @@ -303,8 +303,7 @@ def test_iteritems(self): for k, v in self.panel.iteritems(): pass - self.assertEqual(len(list(self.panel.iteritems())), - len(self.panel.items)) + assert len(list(self.panel.iteritems())) == len(self.panel.items) def test_combineFrame(self): with catch_warnings(record=True): @@ -432,8 +431,8 @@ def test_abs(self): expected = np.abs(s) assert_series_equal(result, expected) assert_series_equal(result2, expected) - self.assertEqual(result.name, 'A') - self.assertEqual(result2.name, 'A') + assert result.name == 'A' + assert result2.name == 'A' class CheckIndexing(object): @@ -497,16 +496,16 @@ def test_setitem(self): # scalar self.panel['ItemG'] = 1 self.panel['ItemE'] = True - self.assertEqual(self.panel['ItemG'].values.dtype, np.int64) - self.assertEqual(self.panel['ItemE'].values.dtype, np.bool_) + assert self.panel['ItemG'].values.dtype == np.int64 + assert self.panel['ItemE'].values.dtype == np.bool_ # object dtype self.panel['ItemQ'] = 'foo' - self.assertEqual(self.panel['ItemQ'].values.dtype, np.object_) + assert self.panel['ItemQ'].values.dtype == np.object_ # boolean dtype self.panel['ItemP'] = self.panel['ItemA'] > 0 - self.assertEqual(self.panel['ItemP'].values.dtype, np.bool_) + assert self.panel['ItemP'].values.dtype == np.bool_ pytest.raises(TypeError, self.panel.__setitem__, 'foo', self.panel.loc[['ItemP']]) @@ -560,7 +559,7 @@ def test_major_xs(self): result = xs['ItemA'] assert_series_equal(result, ref.xs(idx), check_names=False) - self.assertEqual(result.name, 'ItemA') + assert result.name == 'ItemA' # not contained idx = self.panel.major_axis[0] - BDay() @@ -570,8 +569,8 @@ def test_major_xs_mixed(self): with catch_warnings(record=True): self.panel['ItemD'] = 'foo' xs = self.panel.major_xs(self.panel.major_axis[0]) - self.assertEqual(xs['ItemA'].dtype, np.float64) - self.assertEqual(xs['ItemD'].dtype, np.object_) + assert xs['ItemA'].dtype == np.float64 + assert xs['ItemD'].dtype == np.object_ def test_minor_xs(self): with catch_warnings(record=True): @@ -590,8 +589,8 @@ def test_minor_xs_mixed(self): self.panel['ItemD'] = 'foo' xs = self.panel.minor_xs('D') - self.assertEqual(xs['ItemA'].dtype, np.float64) - self.assertEqual(xs['ItemD'].dtype, np.object_) + assert xs['ItemA'].dtype == np.float64 + assert xs['ItemD'].dtype == np.object_ def test_xs(self): with catch_warnings(record=True): @@ -985,16 +984,16 @@ def test_constructor_cast(self): def test_constructor_empty_panel(self): with catch_warnings(record=True): empty = Panel() - self.assertEqual(len(empty.items), 0) - self.assertEqual(len(empty.major_axis), 0) - self.assertEqual(len(empty.minor_axis), 0) + assert len(empty.items) == 0 + assert len(empty.major_axis) == 0 + assert len(empty.minor_axis) == 0 def test_constructor_observe_dtype(self): with catch_warnings(record=True): # GH #411 panel = Panel(items=lrange(3), major_axis=lrange(3), minor_axis=lrange(3), dtype='O') - self.assertEqual(panel.values.dtype, np.object_) + assert panel.values.dtype == np.object_ def test_constructor_dtypes(self): with catch_warnings(record=True): @@ -1002,7 +1001,7 @@ def test_constructor_dtypes(self): def _check_dtype(panel, dtype): for i in panel.items: - self.assertEqual(panel[i].values.dtype.name, dtype) + assert panel[i].values.dtype.name == dtype # only nan holding types allowed here for dtype in ['float64', 'float32', 'object']: @@ -1173,8 +1172,8 @@ def test_from_dict_mixed_orient(self): panel = Panel.from_dict(data, orient='minor') - self.assertEqual(panel['foo'].values.dtype, np.object_) - self.assertEqual(panel['A'].values.dtype, np.float64) + assert panel['foo'].values.dtype == np.object_ + assert panel['A'].values.dtype == np.float64 def test_constructor_error_msgs(self): with catch_warnings(record=True): @@ -1709,7 +1708,7 @@ def test_to_frame(self): assert_panel_equal(unfiltered.to_panel(), self.panel) # names - self.assertEqual(unfiltered.index.names, ('major', 'minor')) + assert unfiltered.index.names == ('major', 'minor') # unsorted, round trip df = self.panel.to_frame(filter_observations=False) @@ -1726,8 +1725,8 @@ def test_to_frame(self): df.columns.name = 'baz' rdf = df.to_panel().to_frame() - self.assertEqual(rdf.index.names, df.index.names) - self.assertEqual(rdf.columns.names, df.columns.names) + assert rdf.index.names == df.index.names + assert rdf.columns.names == df.columns.names def test_to_frame_mixed(self): with catch_warnings(record=True): @@ -1737,7 +1736,7 @@ def test_to_frame_mixed(self): lp = panel.to_frame() wp = lp.to_panel() - self.assertEqual(wp['bool'].values.dtype, np.bool_) + assert wp['bool'].values.dtype == np.bool_ # Previously, this was mutating the underlying # index and changing its name assert_frame_equal(wp['bool'], panel['bool'], check_names=False) @@ -2591,18 +2590,16 @@ def test_axis_dummies(self): from pandas.core.reshape.reshape import make_axis_dummies minor_dummies = make_axis_dummies(self.panel, 'minor').astype(np.uint8) - self.assertEqual(len(minor_dummies.columns), - len(self.panel.index.levels[1])) + assert len(minor_dummies.columns) == len(self.panel.index.levels[1]) major_dummies = make_axis_dummies(self.panel, 'major').astype(np.uint8) - self.assertEqual(len(major_dummies.columns), - len(self.panel.index.levels[0])) + assert len(major_dummies.columns) == len(self.panel.index.levels[0]) mapping = {'A': 'one', 'B': 'one', 'C': 'two', 'D': 'two'} transformed = make_axis_dummies(self.panel, 'minor', transform=mapping.get).astype(np.uint8) - self.assertEqual(len(transformed.columns), 2) + assert len(transformed.columns) == 2 tm.assert_index_equal(transformed.columns, Index(['one', 'two'])) # TODO: test correctness @@ -2638,12 +2635,12 @@ def test_count(self): major_count = self.panel.count(level=0)['ItemA'] labels = index.labels[0] for i, idx in enumerate(index.levels[0]): - self.assertEqual(major_count[i], (labels == i).sum()) + assert major_count[i] == (labels == i).sum() minor_count = self.panel.count(level=1)['ItemA'] labels = index.labels[1] for i, idx in enumerate(index.levels[1]): - self.assertEqual(minor_count[i], (labels == i).sum()) + assert minor_count[i] == (labels == i).sum() def test_join(self): with catch_warnings(record=True): @@ -2652,7 +2649,7 @@ def test_join(self): joined = lp1.join(lp2) - self.assertEqual(len(joined.columns), 3) + assert len(joined.columns) == 3 pytest.raises(Exception, lp1.join, self.panel.filter(['ItemB', 'ItemC'])) @@ -2665,11 +2662,11 @@ def test_pivot(self): np.array(['a', 'b', 'c', 'd', 'e']), np.array([1, 2, 3, 5, 4.])) df = pivot(one, two, three) - self.assertEqual(df['a'][1], 1) - self.assertEqual(df['b'][2], 2) - self.assertEqual(df['c'][3], 3) - self.assertEqual(df['d'][4], 5) - self.assertEqual(df['e'][5], 4) + assert df['a'][1] == 1 + assert df['b'][2] == 2 + assert df['c'][3] == 3 + assert df['d'][4] == 5 + assert df['e'][5] == 4 assert_frame_equal(df, _slow_pivot(one, two, three)) # weird overlap, TODO: test? diff --git a/pandas/tests/test_panel4d.py b/pandas/tests/test_panel4d.py index 05ce239b9c5a3..f2a1414957d44 100644 --- a/pandas/tests/test_panel4d.py +++ b/pandas/tests/test_panel4d.py @@ -194,16 +194,16 @@ def test_set_axis(self): assert self.panel4d.minor_axis is new_minor def test_get_axis_number(self): - self.assertEqual(self.panel4d._get_axis_number('labels'), 0) - self.assertEqual(self.panel4d._get_axis_number('items'), 1) - self.assertEqual(self.panel4d._get_axis_number('major'), 2) - self.assertEqual(self.panel4d._get_axis_number('minor'), 3) + assert self.panel4d._get_axis_number('labels') == 0 + assert self.panel4d._get_axis_number('items') == 1 + assert self.panel4d._get_axis_number('major') == 2 + assert self.panel4d._get_axis_number('minor') == 3 def test_get_axis_name(self): - self.assertEqual(self.panel4d._get_axis_name(0), 'labels') - self.assertEqual(self.panel4d._get_axis_name(1), 'items') - self.assertEqual(self.panel4d._get_axis_name(2), 'major_axis') - self.assertEqual(self.panel4d._get_axis_name(3), 'minor_axis') + assert self.panel4d._get_axis_name(0) == 'labels' + assert self.panel4d._get_axis_name(1) == 'items' + assert self.panel4d._get_axis_name(2) == 'major_axis' + assert self.panel4d._get_axis_name(3) == 'minor_axis' def test_arith(self): with catch_warnings(record=True): @@ -234,8 +234,8 @@ def test_keys(self): def test_iteritems(self): """Test panel4d.iteritems()""" - self.assertEqual(len(list(self.panel4d.iteritems())), - len(self.panel4d.labels)) + assert (len(list(self.panel4d.iteritems())) == + len(self.panel4d.labels)) def test_combinePanel4d(self): with catch_warnings(record=True): @@ -374,16 +374,16 @@ def test_setitem(self): # scalar self.panel4d['lG'] = 1 self.panel4d['lE'] = True - self.assertEqual(self.panel4d['lG'].values.dtype, np.int64) - self.assertEqual(self.panel4d['lE'].values.dtype, np.bool_) + assert self.panel4d['lG'].values.dtype == np.int64 + assert self.panel4d['lE'].values.dtype == np.bool_ # object dtype self.panel4d['lQ'] = 'foo' - self.assertEqual(self.panel4d['lQ'].values.dtype, np.object_) + assert self.panel4d['lQ'].values.dtype == np.object_ # boolean dtype self.panel4d['lP'] = self.panel4d['l1'] > 0 - self.assertEqual(self.panel4d['lP'].values.dtype, np.bool_) + assert self.panel4d['lP'].values.dtype == np.bool_ def test_setitem_by_indexer(self): @@ -484,8 +484,8 @@ def test_major_xs_mixed(self): self.panel4d['l4'] = 'foo' with catch_warnings(record=True): xs = self.panel4d.major_xs(self.panel4d.major_axis[0]) - self.assertEqual(xs['l1']['A'].dtype, np.float64) - self.assertEqual(xs['l4']['A'].dtype, np.object_) + assert xs['l1']['A'].dtype == np.float64 + assert xs['l4']['A'].dtype == np.object_ def test_minor_xs(self): ref = self.panel4d['l1']['ItemA'] @@ -504,8 +504,8 @@ def test_minor_xs_mixed(self): with catch_warnings(record=True): xs = self.panel4d.minor_xs('D') - self.assertEqual(xs['l1'].T['ItemA'].dtype, np.float64) - self.assertEqual(xs['l4'].T['ItemA'].dtype, np.object_) + assert xs['l1'].T['ItemA'].dtype == np.float64 + assert xs['l4'].T['ItemA'].dtype == np.object_ def test_xs(self): l1 = self.panel4d.xs('l1', axis=0) diff --git a/pandas/tests/test_resample.py b/pandas/tests/test_resample.py index 37e22f101612b..276e9a12c1993 100644 --- a/pandas/tests/test_resample.py +++ b/pandas/tests/test_resample.py @@ -71,12 +71,12 @@ def test_api(self): r = self.series.resample('H') result = r.mean() assert isinstance(result, Series) - self.assertEqual(len(result), 217) + assert len(result) == 217 r = self.series.to_frame().resample('H') result = r.mean() assert isinstance(result, DataFrame) - self.assertEqual(len(result), 217) + assert len(result) == 217 def test_api_changes_v018(self): @@ -186,13 +186,13 @@ def f(): check_stacklevel=False): result = self.series.resample('H')[0] expected = self.series.resample('H').mean()[0] - self.assertEqual(result, expected) + assert result == expected with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = self.series.resample('H')['2005-01-09 23:00:00'] expected = self.series.resample('H').mean()['2005-01-09 23:00:00'] - self.assertEqual(result, expected) + assert result == expected def test_groupby_resample_api(self): @@ -254,7 +254,7 @@ def test_getitem(self): tm.assert_index_equal(r._selected_obj.columns, self.frame.columns) r = self.frame.resample('H')['B'] - self.assertEqual(r._selected_obj.name, self.frame.columns[1]) + assert r._selected_obj.name == self.frame.columns[1] # technically this is allowed r = self.frame.resample('H')['A', 'B'] @@ -771,7 +771,7 @@ def test_resample_empty_series(self): expected = s.copy() expected.index = s.index._shallow_copy(freq=freq) assert_index_equal(result.index, expected.index) - self.assertEqual(result.index.freq, expected.index.freq) + assert result.index.freq == expected.index.freq assert_series_equal(result, expected, check_dtype=False) def test_resample_empty_dataframe(self): @@ -788,7 +788,7 @@ def test_resample_empty_dataframe(self): expected = f.copy() expected.index = f.index._shallow_copy(freq=freq) assert_index_equal(result.index, expected.index) - self.assertEqual(result.index.freq, expected.index.freq) + assert result.index.freq == expected.index.freq assert_frame_equal(result, expected, check_dtype=False) # test size for GH13212 (currently stays as df) @@ -884,7 +884,7 @@ def test_custom_grouper(self): for f in funcs: g._cython_agg_general(f) - self.assertEqual(g.ngroups, 2593) + assert g.ngroups == 2593 assert notnull(g.mean()).all() # construct expected val @@ -901,8 +901,8 @@ def test_custom_grouper(self): index=dti, dtype='float64') r = df.groupby(b).agg(np.sum) - self.assertEqual(len(r.columns), 10) - self.assertEqual(len(r.index), 2593) + assert len(r.columns) == 10 + assert len(r.index) == 2593 def test_resample_basic(self): rng = date_range('1/1/2000 00:00:00', '1/1/2000 00:13:00', freq='min', @@ -914,7 +914,7 @@ def test_resample_basic(self): expected = Series([s[0], s[1:6].mean(), s[6:11].mean(), s[11:].mean()], index=exp_idx) assert_series_equal(result, expected) - self.assertEqual(result.index.name, 'index') + assert result.index.name == 'index' result = s.resample('5min', closed='left', label='right').mean() @@ -958,7 +958,7 @@ def _ohlc(group): '5min', closed='right', label='right'), arg)() expected = s.groupby(grouplist).agg(func) - self.assertEqual(result.index.name, 'index') + assert result.index.name == 'index' if arg == 'ohlc': expected = DataFrame(expected.values.tolist()) expected.columns = ['open', 'high', 'low', 'close'] @@ -1116,51 +1116,51 @@ def test_resample_basic_from_daily(self): # to weekly result = s.resample('w-sun').last() - self.assertEqual(len(result), 3) + assert len(result) == 3 assert (result.index.dayofweek == [6, 6, 6]).all() - self.assertEqual(result.iloc[0], s['1/2/2005']) - self.assertEqual(result.iloc[1], s['1/9/2005']) - self.assertEqual(result.iloc[2], s.iloc[-1]) + assert result.iloc[0] == s['1/2/2005'] + assert result.iloc[1] == s['1/9/2005'] + assert result.iloc[2] == s.iloc[-1] result = s.resample('W-MON').last() - self.assertEqual(len(result), 2) + assert len(result) == 2 assert (result.index.dayofweek == [0, 0]).all() - self.assertEqual(result.iloc[0], s['1/3/2005']) - self.assertEqual(result.iloc[1], s['1/10/2005']) + assert result.iloc[0] == s['1/3/2005'] + assert result.iloc[1] == s['1/10/2005'] result = s.resample('W-TUE').last() - self.assertEqual(len(result), 2) + assert len(result) == 2 assert (result.index.dayofweek == [1, 1]).all() - self.assertEqual(result.iloc[0], s['1/4/2005']) - self.assertEqual(result.iloc[1], s['1/10/2005']) + assert result.iloc[0] == s['1/4/2005'] + assert result.iloc[1] == s['1/10/2005'] result = s.resample('W-WED').last() - self.assertEqual(len(result), 2) + assert len(result) == 2 assert (result.index.dayofweek == [2, 2]).all() - self.assertEqual(result.iloc[0], s['1/5/2005']) - self.assertEqual(result.iloc[1], s['1/10/2005']) + assert result.iloc[0] == s['1/5/2005'] + assert result.iloc[1] == s['1/10/2005'] result = s.resample('W-THU').last() - self.assertEqual(len(result), 2) + assert len(result) == 2 assert (result.index.dayofweek == [3, 3]).all() - self.assertEqual(result.iloc[0], s['1/6/2005']) - self.assertEqual(result.iloc[1], s['1/10/2005']) + assert result.iloc[0] == s['1/6/2005'] + assert result.iloc[1] == s['1/10/2005'] result = s.resample('W-FRI').last() - self.assertEqual(len(result), 2) + assert len(result) == 2 assert (result.index.dayofweek == [4, 4]).all() - self.assertEqual(result.iloc[0], s['1/7/2005']) - self.assertEqual(result.iloc[1], s['1/10/2005']) + assert result.iloc[0] == s['1/7/2005'] + assert result.iloc[1] == s['1/10/2005'] # to biz day result = s.resample('B').last() - self.assertEqual(len(result), 7) + assert len(result) == 7 assert (result.index.dayofweek == [4, 0, 1, 2, 3, 4, 0]).all() - self.assertEqual(result.iloc[0], s['1/2/2005']) - self.assertEqual(result.iloc[1], s['1/3/2005']) - self.assertEqual(result.iloc[5], s['1/9/2005']) - self.assertEqual(result.index.name, 'index') + assert result.iloc[0] == s['1/2/2005'] + assert result.iloc[1] == s['1/3/2005'] + assert result.iloc[5] == s['1/9/2005'] + assert result.index.name == 'index' def test_resample_upsampling_picked_but_not_correct(self): @@ -1169,7 +1169,7 @@ def test_resample_upsampling_picked_but_not_correct(self): series = Series(1, index=dates) result = series.resample('D').mean() - self.assertEqual(result.index[0], dates[0]) + assert result.index[0] == dates[0] # GH 5955 # incorrect deciding to upsample when the axis frequency matches the @@ -1230,7 +1230,7 @@ def test_resample_loffset(self): loffset=Minute(1)).mean() assert_series_equal(result, expected) - self.assertEqual(result.index.freq, Minute(5)) + assert result.index.freq == Minute(5) # from daily dti = DatetimeIndex(start=datetime(2005, 1, 1), @@ -1240,7 +1240,7 @@ def test_resample_loffset(self): # to weekly result = ser.resample('w-sun').last() expected = ser.resample('w-sun', loffset=-bday).last() - self.assertEqual(result.index[0] - bday, expected.index[0]) + assert result.index[0] - bday == expected.index[0] def test_resample_loffset_count(self): # GH 12725 @@ -1273,11 +1273,11 @@ def test_resample_upsample(self): # to minutely, by padding result = s.resample('Min').pad() - self.assertEqual(len(result), 12961) - self.assertEqual(result[0], s[0]) - self.assertEqual(result[-1], s[-1]) + assert len(result) == 12961 + assert result[0] == s[0] + assert result[-1] == s[-1] - self.assertEqual(result.index.name, 'index') + assert result.index.name == 'index' def test_resample_how_method(self): # GH9915 @@ -1320,20 +1320,20 @@ def test_resample_ohlc(self): expect = s.groupby(grouper).agg(lambda x: x[-1]) result = s.resample('5Min').ohlc() - self.assertEqual(len(result), len(expect)) - self.assertEqual(len(result.columns), 4) + assert len(result) == len(expect) + assert len(result.columns) == 4 xs = result.iloc[-2] - self.assertEqual(xs['open'], s[-6]) - self.assertEqual(xs['high'], s[-6:-1].max()) - self.assertEqual(xs['low'], s[-6:-1].min()) - self.assertEqual(xs['close'], s[-2]) + assert xs['open'] == s[-6] + assert xs['high'] == s[-6:-1].max() + assert xs['low'] == s[-6:-1].min() + assert xs['close'] == s[-2] xs = result.iloc[0] - self.assertEqual(xs['open'], s[0]) - self.assertEqual(xs['high'], s[:5].max()) - self.assertEqual(xs['low'], s[:5].min()) - self.assertEqual(xs['close'], s[4]) + assert xs['open'] == s[0] + assert xs['high'] == s[:5].max() + assert xs['low'] == s[:5].min() + assert xs['close'] == s[4] def test_resample_ohlc_result(self): @@ -1410,9 +1410,9 @@ def test_resample_reresample(self): s = Series(np.random.rand(len(dti)), dti) bs = s.resample('B', closed='right', label='right').mean() result = bs.resample('8H').mean() - self.assertEqual(len(result), 22) + assert len(result) == 22 assert isinstance(result.index.freq, offsets.DateOffset) - self.assertEqual(result.index.freq, offsets.Hour(8)) + assert result.index.freq == offsets.Hour(8) def test_resample_timestamp_to_period(self): ts = _simple_ts('1/1/1990', '1/1/2000') @@ -1465,7 +1465,7 @@ def test_downsample_non_unique(self): result = ts.resample('M').mean() expected = ts.groupby(lambda x: x.month).mean() - self.assertEqual(len(result), 2) + assert len(result) == 2 assert_almost_equal(result[0], expected[1]) assert_almost_equal(result[1], expected[2]) @@ -1665,10 +1665,10 @@ def test_resample_dtype_preservation(self): ).set_index('date') result = df.resample('1D').ffill() - self.assertEqual(result.val.dtype, np.int32) + assert result.val.dtype == np.int32 result = df.groupby('group').resample('1D').ffill() - self.assertEqual(result.val.dtype, np.int32) + assert result.val.dtype == np.int32 def test_weekly_resample_buglet(self): # #1327 @@ -1742,7 +1742,7 @@ def test_resample_anchored_intraday(self): ts = _simple_ts('2012-04-29 23:00', '2012-04-30 5:00', freq='h') resampled = ts.resample('M').mean() - self.assertEqual(len(resampled), 1) + assert len(resampled) == 1 def test_resample_anchored_monthstart(self): ts = _simple_ts('1/1/2000', '12/31/2002') @@ -1768,13 +1768,11 @@ def test_resample_anchored_multiday(self): # Ensure left closing works result = s.resample('2200L').mean() - self.assertEqual(result.index[-1], - pd.Timestamp('2014-10-15 23:00:02.000')) + assert result.index[-1] == pd.Timestamp('2014-10-15 23:00:02.000') # Ensure right closing works result = s.resample('2200L', label='right').mean() - self.assertEqual(result.index[-1], - pd.Timestamp('2014-10-15 23:00:04.200')) + assert result.index[-1] == pd.Timestamp('2014-10-15 23:00:04.200') def test_corner_cases(self): # miscellaneous test coverage @@ -1789,13 +1787,13 @@ def test_corner_cases(self): len0pts = _simple_pts('2007-01', '2010-05', freq='M')[:0] # it works result = len0pts.resample('A-DEC').mean() - self.assertEqual(len(result), 0) + assert len(result) == 0 # resample to periods ts = _simple_ts('2000-04-28', '2000-04-30 11:00', freq='h') result = ts.resample('M', kind='period').mean() - self.assertEqual(len(result), 1) - self.assertEqual(result.index[0], Period('2000-04', freq='M')) + assert len(result) == 1 + assert result.index[0] == Period('2000-04', freq='M') def test_anchored_lowercase_buglet(self): dates = date_range('4/16/2012 20:00', periods=50000, freq='s') @@ -1941,7 +1939,7 @@ def test_resample_nunique(self): g = df.groupby(pd.Grouper(freq='D')) expected = df.groupby(pd.TimeGrouper('D')).ID.apply(lambda x: x.nunique()) - self.assertEqual(expected.name, 'ID') + assert expected.name == 'ID' for t in [r, g]: result = r.ID.nunique() @@ -2691,8 +2689,8 @@ def test_resample_bms_2752(self): foo = pd.Series(index=pd.bdate_range('20000101', '20000201')) res1 = foo.resample("BMS").mean() res2 = foo.resample("BMS").mean().resample("B").mean() - self.assertEqual(res1.index[0], Timestamp('20000103')) - self.assertEqual(res1.index[0], res2.index[0]) + assert res1.index[0] == Timestamp('20000103') + assert res1.index[0] == res2.index[0] # def test_monthly_convention_span(self): # rng = period_range('2000-01', periods=3, freq='M') @@ -2969,11 +2967,11 @@ def test_consistency_with_window(self): df = self.frame expected = pd.Int64Index([1, 2, 3], name='A') result = df.groupby('A').resample('2s').mean() - self.assertEqual(result.index.nlevels, 2) + assert result.index.nlevels == 2 tm.assert_index_equal(result.index.levels[0], expected) result = df.groupby('A').rolling(20).mean() - self.assertEqual(result.index.nlevels, 2) + assert result.index.nlevels == 2 tm.assert_index_equal(result.index.levels[0], expected) def test_median_duplicate_columns(self): @@ -3219,7 +3217,7 @@ def test_aggregate_with_nat(self): dt_result = getattr(dt_grouped, func)() assert_series_equal(expected, dt_result) # GH 9925 - self.assertEqual(dt_result.index.name, 'key') + assert dt_result.index.name == 'key' # if NaT is included, 'var', 'std', 'mean', 'first','last' # and 'nth' doesn't work yet diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index 5b9797ce76a45..412a88e13bb23 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -54,7 +54,7 @@ def test_iter(self): # desired behavior is to iterate until everything would be nan on the # next iter so make sure the last element of the iterator was 'l' in # this case since 'wikitravel' is the longest string - self.assertEqual(s.dropna().values.item(), 'l') + assert s.dropna().values.item() == 'l' def test_iter_empty(self): ds = Series([], dtype=object) @@ -66,8 +66,8 @@ def test_iter_empty(self): # nothing to iterate over so nothing defined values should remain # unchanged - self.assertEqual(i, 100) - self.assertEqual(s, 1) + assert i == 100 + assert s == 1 def test_iter_single_element(self): ds = Series(['a']) @@ -87,8 +87,8 @@ def test_iter_object_try_string(self): for i, s in enumerate(ds.str): pass - self.assertEqual(i, 100) - self.assertEqual(s, 'h') + assert i == 100 + assert s == 'h' def test_cat(self): one = np.array(['a', 'a', 'b', 'b', 'c', NA], dtype=np.object_) @@ -97,23 +97,23 @@ def test_cat(self): # single array result = strings.str_cat(one) exp = 'aabbc' - self.assertEqual(result, exp) + assert result == exp result = strings.str_cat(one, na_rep='NA') exp = 'aabbcNA' - self.assertEqual(result, exp) + assert result == exp result = strings.str_cat(one, na_rep='-') exp = 'aabbc-' - self.assertEqual(result, exp) + assert result == exp result = strings.str_cat(one, sep='_', na_rep='NA') exp = 'a_a_b_b_c_NA' - self.assertEqual(result, exp) + assert result == exp result = strings.str_cat(two, sep='-') exp = 'a-b-d-foo' - self.assertEqual(result, exp) + assert result == exp # Multiple arrays result = strings.str_cat(one, [two], na_rep='NA') @@ -177,7 +177,7 @@ def test_contains(self): values = ['foo', 'xyz', 'fooommm__foo', 'mmm_'] result = strings.str_contains(values, pat) expected = np.array([False, False, True, True]) - self.assertEqual(result.dtype, np.bool_) + assert result.dtype == np.bool_ tm.assert_numpy_array_equal(result, expected) # case insensitive using regex @@ -220,13 +220,13 @@ def test_contains(self): dtype=np.object_) result = strings.str_contains(values, pat) expected = np.array([False, False, True, True]) - self.assertEqual(result.dtype, np.bool_) + assert result.dtype == np.bool_ tm.assert_numpy_array_equal(result, expected) # na values = Series(['om', 'foo', np.nan]) res = values.str.contains('foo', na="foo") - self.assertEqual(res.loc[2], "foo") + assert res.loc[2] == "foo" def test_startswith(self): values = Series(['om', NA, 'foo_nom', 'nom', 'bar_foo', NA, 'foo']) @@ -381,13 +381,11 @@ def test_swapcase(self): def test_casemethods(self): values = ['aaa', 'bbb', 'CCC', 'Dddd', 'eEEE'] s = Series(values) - self.assertEqual(s.str.lower().tolist(), [v.lower() for v in values]) - self.assertEqual(s.str.upper().tolist(), [v.upper() for v in values]) - self.assertEqual(s.str.title().tolist(), [v.title() for v in values]) - self.assertEqual(s.str.capitalize().tolist(), [ - v.capitalize() for v in values]) - self.assertEqual(s.str.swapcase().tolist(), [ - v.swapcase() for v in values]) + assert s.str.lower().tolist() == [v.lower() for v in values] + assert s.str.upper().tolist() == [v.upper() for v in values] + assert s.str.title().tolist() == [v.title() for v in values] + assert s.str.capitalize().tolist() == [v.capitalize() for v in values] + assert s.str.swapcase().tolist() == [v.swapcase() for v in values] def test_replace(self): values = Series(['fooBAD__barBAD', NA]) @@ -668,7 +666,7 @@ def test_extract_expand_False(self): # single group renames series/index properly s_or_idx = klass(['A1', 'A2']) result = s_or_idx.str.extract(r'(?PA)\d', expand=False) - self.assertEqual(result.name, 'uno') + assert result.name == 'uno' exp = klass(['A', 'A'], name='uno') if klass == Series: @@ -772,7 +770,7 @@ def check_index(index): r = s.str.extract(r'(?P[a-z])', expand=False) e = Series(['a', 'b', 'c'], name='sue') tm.assert_series_equal(r, e) - self.assertEqual(r.name, e.name) + assert r.name == e.name def test_extract_expand_True(self): # Contains tests like those in test_match and some others. @@ -1220,7 +1218,7 @@ def test_empty_str_methods(self): # (extract) on empty series tm.assert_series_equal(empty_str, empty.str.cat(empty)) - self.assertEqual('', empty.str.cat()) + assert '' == empty.str.cat() tm.assert_series_equal(empty_str, empty.str.title()) tm.assert_series_equal(empty_int, empty.str.count('a')) tm.assert_series_equal(empty_bool, empty.str.contains('a')) @@ -1322,20 +1320,13 @@ def test_ismethods(self): tm.assert_series_equal(str_s.str.isupper(), Series(upper_e)) tm.assert_series_equal(str_s.str.istitle(), Series(title_e)) - self.assertEqual(str_s.str.isalnum().tolist(), [v.isalnum() - for v in values]) - self.assertEqual(str_s.str.isalpha().tolist(), [v.isalpha() - for v in values]) - self.assertEqual(str_s.str.isdigit().tolist(), [v.isdigit() - for v in values]) - self.assertEqual(str_s.str.isspace().tolist(), [v.isspace() - for v in values]) - self.assertEqual(str_s.str.islower().tolist(), [v.islower() - for v in values]) - self.assertEqual(str_s.str.isupper().tolist(), [v.isupper() - for v in values]) - self.assertEqual(str_s.str.istitle().tolist(), [v.istitle() - for v in values]) + assert str_s.str.isalnum().tolist() == [v.isalnum() for v in values] + assert str_s.str.isalpha().tolist() == [v.isalpha() for v in values] + assert str_s.str.isdigit().tolist() == [v.isdigit() for v in values] + assert str_s.str.isspace().tolist() == [v.isspace() for v in values] + assert str_s.str.islower().tolist() == [v.islower() for v in values] + assert str_s.str.isupper().tolist() == [v.isupper() for v in values] + assert str_s.str.istitle().tolist() == [v.istitle() for v in values] def test_isnumeric(self): # 0x00bc: ¼ VULGAR FRACTION ONE QUARTER @@ -1350,10 +1341,8 @@ def test_isnumeric(self): tm.assert_series_equal(s.str.isdecimal(), Series(decimal_e)) unicodes = [u'A', u'3', u'¼', u'★', u'፸', u'3', u'four'] - self.assertEqual(s.str.isnumeric().tolist(), [ - v.isnumeric() for v in unicodes]) - self.assertEqual(s.str.isdecimal().tolist(), [ - v.isdecimal() for v in unicodes]) + assert s.str.isnumeric().tolist() == [v.isnumeric() for v in unicodes] + assert s.str.isdecimal().tolist() == [v.isdecimal() for v in unicodes] values = ['A', np.nan, u'¼', u'★', np.nan, u'3', 'four'] s = Series(values) @@ -1962,9 +1951,9 @@ def test_split_noargs(self): s = Series(['Wes McKinney', 'Travis Oliphant']) result = s.str.split() expected = ['Travis', 'Oliphant'] - self.assertEqual(result[1], expected) + assert result[1] == expected result = s.str.rsplit() - self.assertEqual(result[1], expected) + assert result[1] == expected def test_split_maxsplit(self): # re.split 0, str.split -1 @@ -2027,14 +2016,14 @@ def test_split_to_multiindex_expand(self): result = idx.str.split('_', expand=True) exp = idx tm.assert_index_equal(result, exp) - self.assertEqual(result.nlevels, 1) + assert result.nlevels == 1 idx = Index(['some_equal_splits', 'with_no_nans']) result = idx.str.split('_', expand=True) exp = MultiIndex.from_tuples([('some', 'equal', 'splits'), ( 'with', 'no', 'nans')]) tm.assert_index_equal(result, exp) - self.assertEqual(result.nlevels, 3) + assert result.nlevels == 3 idx = Index(['some_unequal_splits', 'one_of_these_things_is_not']) result = idx.str.split('_', expand=True) @@ -2042,7 +2031,7 @@ def test_split_to_multiindex_expand(self): ), ('one', 'of', 'these', 'things', 'is', 'not')]) tm.assert_index_equal(result, exp) - self.assertEqual(result.nlevels, 6) + assert result.nlevels == 6 with tm.assert_raises_regex(ValueError, "expand must be"): idx.str.split('_', expand="not_a_boolean") @@ -2081,21 +2070,21 @@ def test_rsplit_to_multiindex_expand(self): result = idx.str.rsplit('_', expand=True) exp = idx tm.assert_index_equal(result, exp) - self.assertEqual(result.nlevels, 1) + assert result.nlevels == 1 idx = Index(['some_equal_splits', 'with_no_nans']) result = idx.str.rsplit('_', expand=True) exp = MultiIndex.from_tuples([('some', 'equal', 'splits'), ( 'with', 'no', 'nans')]) tm.assert_index_equal(result, exp) - self.assertEqual(result.nlevels, 3) + assert result.nlevels == 3 idx = Index(['some_equal_splits', 'with_no_nans']) result = idx.str.rsplit('_', expand=True, n=1) exp = MultiIndex.from_tuples([('some_equal', 'splits'), ('with_no', 'nans')]) tm.assert_index_equal(result, exp) - self.assertEqual(result.nlevels, 2) + assert result.nlevels == 2 def test_split_with_name(self): # GH 12617 @@ -2184,9 +2173,9 @@ def test_partition_series(self): # compare to standard lib values = Series(['A_B_C', 'B_C_D', 'E_F_G', 'EFGHEF']) result = values.str.partition('_', expand=False).tolist() - self.assertEqual(result, [v.partition('_') for v in values]) + assert result == [v.partition('_') for v in values] result = values.str.rpartition('_', expand=False).tolist() - self.assertEqual(result, [v.rpartition('_') for v in values]) + assert result == [v.rpartition('_') for v in values] def test_partition_index(self): values = Index(['a_b_c', 'c_d_e', 'f_g_h']) @@ -2195,25 +2184,25 @@ def test_partition_index(self): exp = Index(np.array([('a', '_', 'b_c'), ('c', '_', 'd_e'), ('f', '_', 'g_h')])) tm.assert_index_equal(result, exp) - self.assertEqual(result.nlevels, 1) + assert result.nlevels == 1 result = values.str.rpartition('_', expand=False) exp = Index(np.array([('a_b', '_', 'c'), ('c_d', '_', 'e'), ( 'f_g', '_', 'h')])) tm.assert_index_equal(result, exp) - self.assertEqual(result.nlevels, 1) + assert result.nlevels == 1 result = values.str.partition('_') exp = Index([('a', '_', 'b_c'), ('c', '_', 'd_e'), ('f', '_', 'g_h')]) tm.assert_index_equal(result, exp) assert isinstance(result, MultiIndex) - self.assertEqual(result.nlevels, 3) + assert result.nlevels == 3 result = values.str.rpartition('_') exp = Index([('a_b', '_', 'c'), ('c_d', '_', 'e'), ('f_g', '_', 'h')]) tm.assert_index_equal(result, exp) assert isinstance(result, MultiIndex) - self.assertEqual(result.nlevels, 3) + assert result.nlevels == 3 def test_partition_to_dataframe(self): values = Series(['a_b_c', 'c_d_e', NA, 'f_g_h']) @@ -2604,20 +2593,20 @@ def test_match_findall_flags(self): pat = r'([A-Z0-9._%+-]+)@([A-Z0-9.-]+)\.([A-Z]{2,4})' result = data.str.extract(pat, flags=re.IGNORECASE, expand=True) - self.assertEqual(result.iloc[0].tolist(), ['dave', 'google', 'com']) + assert result.iloc[0].tolist() == ['dave', 'google', 'com'] result = data.str.match(pat, flags=re.IGNORECASE) - self.assertEqual(result[0], True) + assert result[0] result = data.str.findall(pat, flags=re.IGNORECASE) - self.assertEqual(result[0][0], ('dave', 'google', 'com')) + assert result[0][0] == ('dave', 'google', 'com') result = data.str.count(pat, flags=re.IGNORECASE) - self.assertEqual(result[0], 1) + assert result[0] == 1 with tm.assert_produces_warning(UserWarning): result = data.str.contains(pat, flags=re.IGNORECASE) - self.assertEqual(result[0], True) + assert result[0] def test_encode_decode(self): base = Series([u('a'), u('b'), u('a\xe4')]) @@ -2685,11 +2674,11 @@ def test_cat_on_filtered_index(self): str_month = df.month.astype('str') str_both = str_year.str.cat(str_month, sep=' ') - self.assertEqual(str_both.loc[1], '2011 2') + assert str_both.loc[1] == '2011 2' str_multiple = str_year.str.cat([str_month, str_month], sep=' ') - self.assertEqual(str_multiple.loc[1], '2011 2 2') + assert str_multiple.loc[1] == '2011 2 2' def test_str_cat_raises_intuitive_error(self): # https://github.com/pandas-dev/pandas/issues/11334 @@ -2721,13 +2710,13 @@ def test_index_str_accessor_visibility(self): idx = Index(values) assert isinstance(Series(values).str, StringMethods) assert isinstance(idx.str, StringMethods) - self.assertEqual(idx.inferred_type, tp) + assert idx.inferred_type == tp for values, tp in cases: idx = Index(values) assert isinstance(Series(values).str, StringMethods) assert isinstance(idx.str, StringMethods) - self.assertEqual(idx.inferred_type, tp) + assert idx.inferred_type == tp cases = [([1, np.nan], 'floating'), ([datetime(2011, 1, 1)], 'datetime64'), @@ -2739,11 +2728,11 @@ def test_index_str_accessor_visibility(self): Series(values).str with tm.assert_raises_regex(AttributeError, message): idx.str - self.assertEqual(idx.inferred_type, tp) + assert idx.inferred_type == tp # MultiIndex has mixed dtype, but not allow to use accessor idx = MultiIndex.from_tuples([('a', 'b'), ('a', 'b')]) - self.assertEqual(idx.inferred_type, 'mixed') + assert idx.inferred_type == 'mixed' message = 'Can only use .str accessor with Index, not MultiIndex' with tm.assert_raises_regex(AttributeError, message): idx.str diff --git a/pandas/tests/test_take.py b/pandas/tests/test_take.py index 9fb61998f6c54..617d268be8f67 100644 --- a/pandas/tests/test_take.py +++ b/pandas/tests/test_take.py @@ -353,7 +353,7 @@ def test_1d_bool(self): tm.assert_numpy_array_equal(result, expected) result = algos.take_1d(arr, [0, 2, -1]) - self.assertEqual(result.dtype, np.object_) + assert result.dtype == np.object_ def test_2d_bool(self): arr = np.array([[0, 1, 0], [1, 0, 1], [0, 1, 1]], dtype=bool) @@ -367,7 +367,7 @@ def test_2d_bool(self): tm.assert_numpy_array_equal(result, expected) result = algos.take_nd(arr, [0, 2, -1]) - self.assertEqual(result.dtype, np.object_) + assert result.dtype == np.object_ def test_2d_float32(self): arr = np.random.randn(4, 3).astype(np.float32) diff --git a/pandas/tests/test_testing.py b/pandas/tests/test_testing.py index 80db5eb49c127..2c0cd55205a5a 100644 --- a/pandas/tests/test_testing.py +++ b/pandas/tests/test_testing.py @@ -726,8 +726,8 @@ def test_RNGContext(self): with RNGContext(0): with RNGContext(1): - self.assertEqual(np.random.randn(), expected1) - self.assertEqual(np.random.randn(), expected0) + assert np.random.randn() == expected1 + assert np.random.randn() == expected0 class TestLocale(tm.TestCase): diff --git a/pandas/tests/test_util.py b/pandas/tests/test_util.py index 6581e7688a32f..80eb5bb9dfe16 100644 --- a/pandas/tests/test_util.py +++ b/pandas/tests/test_util.py @@ -7,6 +7,7 @@ from collections import OrderedDict import pytest +from pandas.compat import intern from pandas.util._move import move_into_mutable_buffer, BadMove, stolenbuf from pandas.util.decorators import deprecate_kwarg from pandas.util.validators import (validate_args, validate_kwargs, @@ -50,19 +51,19 @@ def test_dict_deprecate_kwarg(self): x = 'yes' with tm.assert_produces_warning(FutureWarning): result = self.f2(old=x) - self.assertEqual(result, True) + assert result def test_missing_deprecate_kwarg(self): x = 'bogus' with tm.assert_produces_warning(FutureWarning): result = self.f2(old=x) - self.assertEqual(result, 'bogus') + assert result == 'bogus' def test_callable_deprecate_kwarg(self): x = 5 with tm.assert_produces_warning(FutureWarning): result = self.f3(old=x) - self.assertEqual(result, x + 1) + assert result == x + 1 with pytest.raises(TypeError): self.f3(old='hello') @@ -358,7 +359,7 @@ def test_exactly_one_ref(self): as_stolen_buf = move_into_mutable_buffer(b[:-3]) # materialize as bytearray to show that it is mutable - self.assertEqual(bytearray(as_stolen_buf), b'test') + assert bytearray(as_stolen_buf) == b'test' @pytest.mark.skipif( sys.version_info[0] > 2, @@ -393,12 +394,7 @@ def ref_capture(ob): # be the same instance. move_into_mutable_buffer(ref_capture(intern(make_string()))) # noqa - self.assertEqual( - refcount[0], - 1, - msg='The BadMove was probably raised for refcount reasons instead' - ' of interning reasons', - ) + assert refcount[0] == 1 def test_numpy_errstate_is_default(): @@ -468,7 +464,7 @@ def test_set_locale(self): new_lang, new_enc = normalized_locale.split('.') new_enc = codecs.lookup(enc).name normalized_locale = new_lang, new_enc - self.assertEqual(normalized_locale, new_locale) + assert normalized_locale == new_locale current_locale = locale.getlocale() - self.assertEqual(current_locale, CURRENT_LOCALE) + assert current_locale == CURRENT_LOCALE diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index 7979e7d77a49d..55be6302036f1 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -57,7 +57,7 @@ def test_getitem(self): tm.assert_index_equal(r._selected_obj.columns, self.frame.columns) r = self.frame.rolling(window=5)[1] - self.assertEqual(r._selected_obj.name, self.frame.columns[1]) + assert r._selected_obj.name == self.frame.columns[1] # technically this is allowed r = self.frame.rolling(window=5)[1, 3] @@ -281,8 +281,8 @@ def test_preserve_metadata(self): s2 = s.rolling(30).sum() s3 = s.rolling(20).sum() - self.assertEqual(s2.name, 'foo') - self.assertEqual(s3.name, 'foo') + assert s2.name == 'foo' + assert s3.name == 'foo' def test_how_compat(self): # in prior versions, we would allow how to be used in the resample @@ -859,14 +859,14 @@ def test_cmov_window_corner(self): vals = np.array([]) with catch_warnings(record=True): rs = mom.rolling_window(vals, 5, 'boxcar', center=True) - self.assertEqual(len(rs), 0) + assert len(rs) == 0 # shorter than window vals = np.random.randn(5) with catch_warnings(record=True): rs = mom.rolling_window(vals, 10, 'boxcar') assert np.isnan(rs).all() - self.assertEqual(len(rs), 5) + assert len(rs) == 5 def test_cmov_window_frame(self): # Gh 8238 @@ -1382,7 +1382,7 @@ def get_result(obj, window, min_periods=None, freq=None, center=False): frame_result = get_result(self.frame, window=50) assert isinstance(series_result, Series) - self.assertEqual(type(frame_result), DataFrame) + assert type(frame_result) == DataFrame # check time_rule works if has_time_rule: @@ -1689,14 +1689,14 @@ def _check_ew_ndarray(self, func, preserve_nan=False, name=None): # pass in ints result2 = func(np.arange(50), span=10) - self.assertEqual(result2.dtype, np.float_) + assert result2.dtype == np.float_ def _check_ew_structures(self, func, name): series_result = getattr(self.series.ewm(com=10), name)() assert isinstance(series_result, Series) frame_result = getattr(self.frame.ewm(com=10), name)() - self.assertEqual(type(frame_result), DataFrame) + assert type(frame_result) == DataFrame class TestPairwise(object): @@ -2911,7 +2911,7 @@ def _check_expanding_structures(self, func): series_result = func(self.series) assert isinstance(series_result, Series) frame_result = func(self.frame) - self.assertEqual(type(frame_result), DataFrame) + assert type(frame_result) == DataFrame def _check_expanding(self, func, static_comp, has_min_periods=True, has_time_rule=True, preserve_nan=True): @@ -3031,10 +3031,10 @@ def test_rolling_min_max_numeric_types(self): # correctness result = (DataFrame(np.arange(20, dtype=data_type)) .rolling(window=5).max()) - self.assertEqual(result.dtypes[0], np.dtype("f8")) + assert result.dtypes[0] == np.dtype("f8") result = (DataFrame(np.arange(20, dtype=data_type)) .rolling(window=5).min()) - self.assertEqual(result.dtypes[0], np.dtype("f8")) + assert result.dtypes[0] == np.dtype("f8") class TestGrouperGrouping(tm.TestCase): diff --git a/pandas/tests/tools/test_numeric.py b/pandas/tests/tools/test_numeric.py index 45b736102aa3d..b298df4f4b5d8 100644 --- a/pandas/tests/tools/test_numeric.py +++ b/pandas/tests/tools/test_numeric.py @@ -156,16 +156,16 @@ def test_type_check(self): to_numeric(df, errors=errors) def test_scalar(self): - self.assertEqual(pd.to_numeric(1), 1) - self.assertEqual(pd.to_numeric(1.1), 1.1) + assert pd.to_numeric(1) == 1 + assert pd.to_numeric(1.1) == 1.1 - self.assertEqual(pd.to_numeric('1'), 1) - self.assertEqual(pd.to_numeric('1.1'), 1.1) + assert pd.to_numeric('1') == 1 + assert pd.to_numeric('1.1') == 1.1 with pytest.raises(ValueError): to_numeric('XX', errors='raise') - self.assertEqual(to_numeric('XX', errors='ignore'), 'XX') + assert to_numeric('XX', errors='ignore') == 'XX' assert np.isnan(to_numeric('XX', errors='coerce')) def test_numeric_dtypes(self): diff --git a/pandas/tests/tseries/test_frequencies.py b/pandas/tests/tseries/test_frequencies.py index 894269aaf451a..a78150e9cf728 100644 --- a/pandas/tests/tseries/test_frequencies.py +++ b/pandas/tests/tseries/test_frequencies.py @@ -345,97 +345,92 @@ def _assert_depr(freq, expected, aliases): class TestFrequencyCode(tm.TestCase): def test_freq_code(self): - self.assertEqual(frequencies.get_freq('A'), 1000) - self.assertEqual(frequencies.get_freq('3A'), 1000) - self.assertEqual(frequencies.get_freq('-1A'), 1000) + assert frequencies.get_freq('A') == 1000 + assert frequencies.get_freq('3A') == 1000 + assert frequencies.get_freq('-1A') == 1000 - self.assertEqual(frequencies.get_freq('W'), 4000) - self.assertEqual(frequencies.get_freq('W-MON'), 4001) - self.assertEqual(frequencies.get_freq('W-FRI'), 4005) + assert frequencies.get_freq('W') == 4000 + assert frequencies.get_freq('W-MON') == 4001 + assert frequencies.get_freq('W-FRI') == 4005 for freqstr, code in compat.iteritems(frequencies._period_code_map): result = frequencies.get_freq(freqstr) - self.assertEqual(result, code) + assert result == code result = frequencies.get_freq_group(freqstr) - self.assertEqual(result, code // 1000 * 1000) + assert result == code // 1000 * 1000 result = frequencies.get_freq_group(code) - self.assertEqual(result, code // 1000 * 1000) + assert result == code // 1000 * 1000 def test_freq_group(self): - self.assertEqual(frequencies.get_freq_group('A'), 1000) - self.assertEqual(frequencies.get_freq_group('3A'), 1000) - self.assertEqual(frequencies.get_freq_group('-1A'), 1000) - self.assertEqual(frequencies.get_freq_group('A-JAN'), 1000) - self.assertEqual(frequencies.get_freq_group('A-MAY'), 1000) - self.assertEqual(frequencies.get_freq_group(offsets.YearEnd()), 1000) - self.assertEqual(frequencies.get_freq_group( - offsets.YearEnd(month=1)), 1000) - self.assertEqual(frequencies.get_freq_group( - offsets.YearEnd(month=5)), 1000) - - self.assertEqual(frequencies.get_freq_group('W'), 4000) - self.assertEqual(frequencies.get_freq_group('W-MON'), 4000) - self.assertEqual(frequencies.get_freq_group('W-FRI'), 4000) - self.assertEqual(frequencies.get_freq_group(offsets.Week()), 4000) - self.assertEqual(frequencies.get_freq_group( - offsets.Week(weekday=1)), 4000) - self.assertEqual(frequencies.get_freq_group( - offsets.Week(weekday=5)), 4000) + assert frequencies.get_freq_group('A') == 1000 + assert frequencies.get_freq_group('3A') == 1000 + assert frequencies.get_freq_group('-1A') == 1000 + assert frequencies.get_freq_group('A-JAN') == 1000 + assert frequencies.get_freq_group('A-MAY') == 1000 + assert frequencies.get_freq_group(offsets.YearEnd()) == 1000 + assert frequencies.get_freq_group(offsets.YearEnd(month=1)) == 1000 + assert frequencies.get_freq_group(offsets.YearEnd(month=5)) == 1000 + + assert frequencies.get_freq_group('W') == 4000 + assert frequencies.get_freq_group('W-MON') == 4000 + assert frequencies.get_freq_group('W-FRI') == 4000 + assert frequencies.get_freq_group(offsets.Week()) == 4000 + assert frequencies.get_freq_group(offsets.Week(weekday=1)) == 4000 + assert frequencies.get_freq_group(offsets.Week(weekday=5)) == 4000 def test_get_to_timestamp_base(self): tsb = frequencies.get_to_timestamp_base - self.assertEqual(tsb(frequencies.get_freq_code('D')[0]), - frequencies.get_freq_code('D')[0]) - self.assertEqual(tsb(frequencies.get_freq_code('W')[0]), - frequencies.get_freq_code('D')[0]) - self.assertEqual(tsb(frequencies.get_freq_code('M')[0]), - frequencies.get_freq_code('D')[0]) + assert (tsb(frequencies.get_freq_code('D')[0]) == + frequencies.get_freq_code('D')[0]) + assert (tsb(frequencies.get_freq_code('W')[0]) == + frequencies.get_freq_code('D')[0]) + assert (tsb(frequencies.get_freq_code('M')[0]) == + frequencies.get_freq_code('D')[0]) - self.assertEqual(tsb(frequencies.get_freq_code('S')[0]), - frequencies.get_freq_code('S')[0]) - self.assertEqual(tsb(frequencies.get_freq_code('T')[0]), - frequencies.get_freq_code('S')[0]) - self.assertEqual(tsb(frequencies.get_freq_code('H')[0]), - frequencies.get_freq_code('S')[0]) + assert (tsb(frequencies.get_freq_code('S')[0]) == + frequencies.get_freq_code('S')[0]) + assert (tsb(frequencies.get_freq_code('T')[0]) == + frequencies.get_freq_code('S')[0]) + assert (tsb(frequencies.get_freq_code('H')[0]) == + frequencies.get_freq_code('S')[0]) def test_freq_to_reso(self): Reso = frequencies.Resolution - self.assertEqual(Reso.get_str_from_freq('A'), 'year') - self.assertEqual(Reso.get_str_from_freq('Q'), 'quarter') - self.assertEqual(Reso.get_str_from_freq('M'), 'month') - self.assertEqual(Reso.get_str_from_freq('D'), 'day') - self.assertEqual(Reso.get_str_from_freq('H'), 'hour') - self.assertEqual(Reso.get_str_from_freq('T'), 'minute') - self.assertEqual(Reso.get_str_from_freq('S'), 'second') - self.assertEqual(Reso.get_str_from_freq('L'), 'millisecond') - self.assertEqual(Reso.get_str_from_freq('U'), 'microsecond') - self.assertEqual(Reso.get_str_from_freq('N'), 'nanosecond') + assert Reso.get_str_from_freq('A') == 'year' + assert Reso.get_str_from_freq('Q') == 'quarter' + assert Reso.get_str_from_freq('M') == 'month' + assert Reso.get_str_from_freq('D') == 'day' + assert Reso.get_str_from_freq('H') == 'hour' + assert Reso.get_str_from_freq('T') == 'minute' + assert Reso.get_str_from_freq('S') == 'second' + assert Reso.get_str_from_freq('L') == 'millisecond' + assert Reso.get_str_from_freq('U') == 'microsecond' + assert Reso.get_str_from_freq('N') == 'nanosecond' for freq in ['A', 'Q', 'M', 'D', 'H', 'T', 'S', 'L', 'U', 'N']: # check roundtrip result = Reso.get_freq(Reso.get_str_from_freq(freq)) - self.assertEqual(freq, result) + assert freq == result for freq in ['D', 'H', 'T', 'S', 'L', 'U']: result = Reso.get_freq(Reso.get_str(Reso.get_reso_from_freq(freq))) - self.assertEqual(freq, result) + assert freq == result def test_resolution_bumping(self): - # GH 14378 + # see gh-14378 Reso = frequencies.Resolution - self.assertEqual(Reso.get_stride_from_decimal(1.5, 'T'), (90, 'S')) - self.assertEqual(Reso.get_stride_from_decimal(62.4, 'T'), (3744, 'S')) - self.assertEqual(Reso.get_stride_from_decimal(1.04, 'H'), (3744, 'S')) - self.assertEqual(Reso.get_stride_from_decimal(1, 'D'), (1, 'D')) - self.assertEqual(Reso.get_stride_from_decimal(0.342931, 'H'), - (1234551600, 'U')) - self.assertEqual(Reso.get_stride_from_decimal(1.2345, 'D'), - (106660800, 'L')) + assert Reso.get_stride_from_decimal(1.5, 'T') == (90, 'S') + assert Reso.get_stride_from_decimal(62.4, 'T') == (3744, 'S') + assert Reso.get_stride_from_decimal(1.04, 'H') == (3744, 'S') + assert Reso.get_stride_from_decimal(1, 'D') == (1, 'D') + assert (Reso.get_stride_from_decimal(0.342931, 'H') == + (1234551600, 'U')) + assert Reso.get_stride_from_decimal(1.2345, 'D') == (106660800, 'L') with pytest.raises(ValueError): Reso.get_stride_from_decimal(0.5, 'N') @@ -445,54 +440,54 @@ def test_resolution_bumping(self): Reso.get_stride_from_decimal(0.3429324798798269273987982, 'H') def test_get_freq_code(self): - # freqstr - self.assertEqual(frequencies.get_freq_code('A'), - (frequencies.get_freq('A'), 1)) - self.assertEqual(frequencies.get_freq_code('3D'), - (frequencies.get_freq('D'), 3)) - self.assertEqual(frequencies.get_freq_code('-2M'), - (frequencies.get_freq('M'), -2)) + # frequency str + assert (frequencies.get_freq_code('A') == + (frequencies.get_freq('A'), 1)) + assert (frequencies.get_freq_code('3D') == + (frequencies.get_freq('D'), 3)) + assert (frequencies.get_freq_code('-2M') == + (frequencies.get_freq('M'), -2)) # tuple - self.assertEqual(frequencies.get_freq_code(('D', 1)), - (frequencies.get_freq('D'), 1)) - self.assertEqual(frequencies.get_freq_code(('A', 3)), - (frequencies.get_freq('A'), 3)) - self.assertEqual(frequencies.get_freq_code(('M', -2)), - (frequencies.get_freq('M'), -2)) + assert (frequencies.get_freq_code(('D', 1)) == + (frequencies.get_freq('D'), 1)) + assert (frequencies.get_freq_code(('A', 3)) == + (frequencies.get_freq('A'), 3)) + assert (frequencies.get_freq_code(('M', -2)) == + (frequencies.get_freq('M'), -2)) + # numeric tuple - self.assertEqual(frequencies.get_freq_code((1000, 1)), (1000, 1)) + assert frequencies.get_freq_code((1000, 1)) == (1000, 1) # offsets - self.assertEqual(frequencies.get_freq_code(offsets.Day()), - (frequencies.get_freq('D'), 1)) - self.assertEqual(frequencies.get_freq_code(offsets.Day(3)), - (frequencies.get_freq('D'), 3)) - self.assertEqual(frequencies.get_freq_code(offsets.Day(-2)), - (frequencies.get_freq('D'), -2)) - - self.assertEqual(frequencies.get_freq_code(offsets.MonthEnd()), - (frequencies.get_freq('M'), 1)) - self.assertEqual(frequencies.get_freq_code(offsets.MonthEnd(3)), - (frequencies.get_freq('M'), 3)) - self.assertEqual(frequencies.get_freq_code(offsets.MonthEnd(-2)), - (frequencies.get_freq('M'), -2)) - - self.assertEqual(frequencies.get_freq_code(offsets.Week()), - (frequencies.get_freq('W'), 1)) - self.assertEqual(frequencies.get_freq_code(offsets.Week(3)), - (frequencies.get_freq('W'), 3)) - self.assertEqual(frequencies.get_freq_code(offsets.Week(-2)), - (frequencies.get_freq('W'), -2)) - - # monday is weekday=0 - self.assertEqual(frequencies.get_freq_code(offsets.Week(weekday=1)), - (frequencies.get_freq('W-TUE'), 1)) - self.assertEqual(frequencies.get_freq_code(offsets.Week(3, weekday=0)), - (frequencies.get_freq('W-MON'), 3)) - self.assertEqual( - frequencies.get_freq_code(offsets.Week(-2, weekday=4)), - (frequencies.get_freq('W-FRI'), -2)) + assert (frequencies.get_freq_code(offsets.Day()) == + (frequencies.get_freq('D'), 1)) + assert (frequencies.get_freq_code(offsets.Day(3)) == + (frequencies.get_freq('D'), 3)) + assert (frequencies.get_freq_code(offsets.Day(-2)) == + (frequencies.get_freq('D'), -2)) + + assert (frequencies.get_freq_code(offsets.MonthEnd()) == + (frequencies.get_freq('M'), 1)) + assert (frequencies.get_freq_code(offsets.MonthEnd(3)) == + (frequencies.get_freq('M'), 3)) + assert (frequencies.get_freq_code(offsets.MonthEnd(-2)) == + (frequencies.get_freq('M'), -2)) + + assert (frequencies.get_freq_code(offsets.Week()) == + (frequencies.get_freq('W'), 1)) + assert (frequencies.get_freq_code(offsets.Week(3)) == + (frequencies.get_freq('W'), 3)) + assert (frequencies.get_freq_code(offsets.Week(-2)) == + (frequencies.get_freq('W'), -2)) + + # Monday is weekday=0 + assert (frequencies.get_freq_code(offsets.Week(weekday=1)) == + (frequencies.get_freq('W-TUE'), 1)) + assert (frequencies.get_freq_code(offsets.Week(3, weekday=0)) == + (frequencies.get_freq('W-MON'), 3)) + assert (frequencies.get_freq_code(offsets.Week(-2, weekday=4)) == + (frequencies.get_freq('W-FRI'), -2)) _dti = DatetimeIndex @@ -510,18 +505,18 @@ def test_raise_if_too_few(self): def test_business_daily(self): index = _dti(['12/31/1998', '1/3/1999', '1/4/1999']) - self.assertEqual(frequencies.infer_freq(index), 'B') + assert frequencies.infer_freq(index) == 'B' def test_day(self): self._check_tick(timedelta(1), 'D') def test_day_corner(self): index = _dti(['1/1/2000', '1/2/2000', '1/3/2000']) - self.assertEqual(frequencies.infer_freq(index), 'D') + assert frequencies.infer_freq(index) == 'D' def test_non_datetimeindex(self): dates = to_datetime(['1/1/2000', '1/2/2000', '1/3/2000']) - self.assertEqual(frequencies.infer_freq(dates), 'D') + assert frequencies.infer_freq(dates) == 'D' def test_hour(self): self._check_tick(timedelta(hours=1), 'H') @@ -550,7 +545,7 @@ def _check_tick(self, base_delta, code): exp_freq = '%d%s' % (i, code) else: exp_freq = code - self.assertEqual(frequencies.infer_freq(index), exp_freq) + assert frequencies.infer_freq(index) == exp_freq index = _dti([b + base_delta * 7] + [b + base_delta * j for j in range( 3)]) @@ -595,7 +590,7 @@ def test_monthly(self): def test_monthly_ambiguous(self): rng = _dti(['1/31/2000', '2/29/2000', '3/31/2000']) - self.assertEqual(rng.inferred_freq, 'M') + assert rng.inferred_freq == 'M' def test_business_monthly(self): self._check_generated_range('1/1/2000', 'BM') @@ -617,7 +612,7 @@ def test_business_annual(self): def test_annual_ambiguous(self): rng = _dti(['1/31/2000', '1/31/2001', '1/31/2002']) - self.assertEqual(rng.inferred_freq, 'A-JAN') + assert rng.inferred_freq == 'A-JAN' def _check_generated_range(self, start, freq): freq = freq.upper() @@ -625,7 +620,7 @@ def _check_generated_range(self, start, freq): gen = date_range(start, periods=7, freq=freq) index = _dti(gen.values) if not freq.startswith('Q-'): - self.assertEqual(frequencies.infer_freq(index), gen.freqstr) + assert frequencies.infer_freq(index) == gen.freqstr else: inf_freq = frequencies.infer_freq(index) is_dec_range = inf_freq == 'Q-DEC' and gen.freqstr in ( @@ -640,7 +635,7 @@ def _check_generated_range(self, start, freq): index = _dti(gen.values) if not freq.startswith('Q-'): - self.assertEqual(frequencies.infer_freq(index), gen.freqstr) + assert frequencies.infer_freq(index) == gen.freqstr else: inf_freq = frequencies.infer_freq(index) is_dec_range = inf_freq == 'Q-DEC' and gen.freqstr in ( @@ -655,15 +650,15 @@ def _check_generated_range(self, start, freq): def test_infer_freq(self): rng = period_range('1959Q2', '2009Q3', freq='Q') rng = Index(rng.to_timestamp('D', how='e').asobject) - self.assertEqual(rng.inferred_freq, 'Q-DEC') + assert rng.inferred_freq == 'Q-DEC' rng = period_range('1959Q2', '2009Q3', freq='Q-NOV') rng = Index(rng.to_timestamp('D', how='e').asobject) - self.assertEqual(rng.inferred_freq, 'Q-NOV') + assert rng.inferred_freq == 'Q-NOV' rng = period_range('1959Q2', '2009Q3', freq='Q-OCT') rng = Index(rng.to_timestamp('D', how='e').asobject) - self.assertEqual(rng.inferred_freq, 'Q-OCT') + assert rng.inferred_freq == 'Q-OCT' def test_infer_freq_tz(self): @@ -683,7 +678,7 @@ def test_infer_freq_tz(self): 'US/Pacific', 'US/Eastern']: for expected, dates in compat.iteritems(freqs): idx = DatetimeIndex(dates, tz=tz) - self.assertEqual(idx.inferred_freq, expected) + assert idx.inferred_freq == expected def test_infer_freq_tz_transition(self): # Tests for #8772 @@ -699,7 +694,7 @@ def test_infer_freq_tz_transition(self): for freq in freqs: idx = date_range(date_pair[0], date_pair[ 1], freq=freq, tz=tz) - self.assertEqual(idx.inferred_freq, freq) + assert idx.inferred_freq == freq index = date_range("2013-11-03", periods=5, freq="3H").tz_localize("America/Chicago") @@ -711,21 +706,21 @@ def test_infer_freq_businesshour(self): ['2014-07-01 09:00', '2014-07-01 10:00', '2014-07-01 11:00', '2014-07-01 12:00', '2014-07-01 13:00', '2014-07-01 14:00']) # hourly freq in a day must result in 'H' - self.assertEqual(idx.inferred_freq, 'H') + assert idx.inferred_freq == 'H' idx = DatetimeIndex( ['2014-07-01 09:00', '2014-07-01 10:00', '2014-07-01 11:00', '2014-07-01 12:00', '2014-07-01 13:00', '2014-07-01 14:00', '2014-07-01 15:00', '2014-07-01 16:00', '2014-07-02 09:00', '2014-07-02 10:00', '2014-07-02 11:00']) - self.assertEqual(idx.inferred_freq, 'BH') + assert idx.inferred_freq == 'BH' idx = DatetimeIndex( ['2014-07-04 09:00', '2014-07-04 10:00', '2014-07-04 11:00', '2014-07-04 12:00', '2014-07-04 13:00', '2014-07-04 14:00', '2014-07-04 15:00', '2014-07-04 16:00', '2014-07-07 09:00', '2014-07-07 10:00', '2014-07-07 11:00']) - self.assertEqual(idx.inferred_freq, 'BH') + assert idx.inferred_freq == 'BH' idx = DatetimeIndex( ['2014-07-04 09:00', '2014-07-04 10:00', '2014-07-04 11:00', @@ -736,12 +731,12 @@ def test_infer_freq_businesshour(self): '2014-07-07 16:00', '2014-07-08 09:00', '2014-07-08 10:00', '2014-07-08 11:00', '2014-07-08 12:00', '2014-07-08 13:00', '2014-07-08 14:00', '2014-07-08 15:00', '2014-07-08 16:00']) - self.assertEqual(idx.inferred_freq, 'BH') + assert idx.inferred_freq == 'BH' def test_not_monotonic(self): rng = _dti(['1/31/2000', '1/31/2001', '1/31/2002']) rng = rng[::-1] - self.assertEqual(rng.inferred_freq, '-1A-JAN') + assert rng.inferred_freq == '-1A-JAN' def test_non_datetimeindex2(self): rng = _dti(['1/31/2000', '1/31/2001', '1/31/2002']) @@ -749,7 +744,7 @@ def test_non_datetimeindex2(self): vals = rng.to_pydatetime() result = frequencies.infer_freq(vals) - self.assertEqual(result, rng.inferred_freq) + assert result == rng.inferred_freq def test_invalid_index_types(self): @@ -771,7 +766,7 @@ def test_string_datetimelike_compat(self): '2004-04']) result = frequencies.infer_freq(Index(['2004-01', '2004-02', '2004-03', '2004-04'])) - self.assertEqual(result, expected) + assert result == expected def test_series(self): diff --git a/pandas/tests/tseries/test_holiday.py b/pandas/tests/tseries/test_holiday.py index c87f580582335..109adaaa7e0b0 100644 --- a/pandas/tests/tseries/test_holiday.py +++ b/pandas/tests/tseries/test_holiday.py @@ -49,9 +49,9 @@ def test_calendar(self): Timestamp(self.start_date), Timestamp(self.end_date)) - self.assertEqual(list(holidays.to_pydatetime()), self.holiday_list) - self.assertEqual(list(holidays_1.to_pydatetime()), self.holiday_list) - self.assertEqual(list(holidays_2.to_pydatetime()), self.holiday_list) + assert list(holidays.to_pydatetime()) == self.holiday_list + assert list(holidays_1.to_pydatetime()) == self.holiday_list + assert list(holidays_2.to_pydatetime()) == self.holiday_list def test_calendar_caching(self): # Test for issue #9552 @@ -82,8 +82,7 @@ def test_calendar_observance_dates(self): def test_rule_from_name(self): USFedCal = get_calendar('USFederalHolidayCalendar') - self.assertEqual(USFedCal.rule_from_name( - 'Thanksgiving'), USThanksgivingDay) + assert USFedCal.rule_from_name('Thanksgiving') == USThanksgivingDay class TestHoliday(tm.TestCase): @@ -93,17 +92,12 @@ def setUp(self): self.end_date = datetime(2020, 12, 31) def check_results(self, holiday, start, end, expected): - self.assertEqual(list(holiday.dates(start, end)), expected) + assert list(holiday.dates(start, end)) == expected + # Verify that timezone info is preserved. - self.assertEqual( - list( - holiday.dates( - utc.localize(Timestamp(start)), - utc.localize(Timestamp(end)), - ) - ), - [utc.localize(dt) for dt in expected], - ) + assert (list(holiday.dates(utc.localize(Timestamp(start)), + utc.localize(Timestamp(end)))) == + [utc.localize(dt) for dt in expected]) def test_usmemorialday(self): self.check_results(holiday=USMemorialDay, @@ -234,7 +228,7 @@ def test_holidays_within_dates(self): for rule, dates in compat.iteritems(holidays): empty_dates = rule.dates(start_date, end_date) - self.assertEqual(empty_dates.tolist(), []) + assert empty_dates.tolist() == [] if isinstance(dates, tuple): dates = [dates] @@ -266,17 +260,15 @@ def test_special_holidays(self): end_date=datetime(2012, 12, 31), offset=DateOffset(weekday=MO(1))) - self.assertEqual(base_date, - holiday_1.dates(self.start_date, self.end_date)) - self.assertEqual(base_date, - holiday_2.dates(self.start_date, self.end_date)) + assert base_date == holiday_1.dates(self.start_date, self.end_date) + assert base_date == holiday_2.dates(self.start_date, self.end_date) def test_get_calendar(self): class TestCalendar(AbstractHolidayCalendar): rules = [] calendar = get_calendar('TestCalendar') - self.assertEqual(TestCalendar, calendar.__class__) + assert TestCalendar == calendar.__class__ def test_factory(self): class_1 = HolidayCalendarFactory('MemorialDay', @@ -287,9 +279,9 @@ def test_factory(self): USThanksgivingDay) class_3 = HolidayCalendarFactory('Combined', class_1, class_2) - self.assertEqual(len(class_1.rules), 1) - self.assertEqual(len(class_2.rules), 1) - self.assertEqual(len(class_3.rules), 2) + assert len(class_1.rules) == 1 + assert len(class_2.rules) == 1 + assert len(class_3.rules) == 2 class TestObservanceRules(tm.TestCase): @@ -304,64 +296,65 @@ def setUp(self): self.tu = datetime(2014, 4, 15) def test_next_monday(self): - self.assertEqual(next_monday(self.sa), self.mo) - self.assertEqual(next_monday(self.su), self.mo) + assert next_monday(self.sa) == self.mo + assert next_monday(self.su) == self.mo def test_next_monday_or_tuesday(self): - self.assertEqual(next_monday_or_tuesday(self.sa), self.mo) - self.assertEqual(next_monday_or_tuesday(self.su), self.tu) - self.assertEqual(next_monday_or_tuesday(self.mo), self.tu) + assert next_monday_or_tuesday(self.sa) == self.mo + assert next_monday_or_tuesday(self.su) == self.tu + assert next_monday_or_tuesday(self.mo) == self.tu def test_previous_friday(self): - self.assertEqual(previous_friday(self.sa), self.fr) - self.assertEqual(previous_friday(self.su), self.fr) + assert previous_friday(self.sa) == self.fr + assert previous_friday(self.su) == self.fr def test_sunday_to_monday(self): - self.assertEqual(sunday_to_monday(self.su), self.mo) + assert sunday_to_monday(self.su) == self.mo def test_nearest_workday(self): - self.assertEqual(nearest_workday(self.sa), self.fr) - self.assertEqual(nearest_workday(self.su), self.mo) - self.assertEqual(nearest_workday(self.mo), self.mo) + assert nearest_workday(self.sa) == self.fr + assert nearest_workday(self.su) == self.mo + assert nearest_workday(self.mo) == self.mo def test_weekend_to_monday(self): - self.assertEqual(weekend_to_monday(self.sa), self.mo) - self.assertEqual(weekend_to_monday(self.su), self.mo) - self.assertEqual(weekend_to_monday(self.mo), self.mo) + assert weekend_to_monday(self.sa) == self.mo + assert weekend_to_monday(self.su) == self.mo + assert weekend_to_monday(self.mo) == self.mo def test_next_workday(self): - self.assertEqual(next_workday(self.sa), self.mo) - self.assertEqual(next_workday(self.su), self.mo) - self.assertEqual(next_workday(self.mo), self.tu) + assert next_workday(self.sa) == self.mo + assert next_workday(self.su) == self.mo + assert next_workday(self.mo) == self.tu def test_previous_workday(self): - self.assertEqual(previous_workday(self.sa), self.fr) - self.assertEqual(previous_workday(self.su), self.fr) - self.assertEqual(previous_workday(self.tu), self.mo) + assert previous_workday(self.sa) == self.fr + assert previous_workday(self.su) == self.fr + assert previous_workday(self.tu) == self.mo def test_before_nearest_workday(self): - self.assertEqual(before_nearest_workday(self.sa), self.th) - self.assertEqual(before_nearest_workday(self.su), self.fr) - self.assertEqual(before_nearest_workday(self.tu), self.mo) + assert before_nearest_workday(self.sa) == self.th + assert before_nearest_workday(self.su) == self.fr + assert before_nearest_workday(self.tu) == self.mo def test_after_nearest_workday(self): - self.assertEqual(after_nearest_workday(self.sa), self.mo) - self.assertEqual(after_nearest_workday(self.su), self.tu) - self.assertEqual(after_nearest_workday(self.fr), self.mo) + assert after_nearest_workday(self.sa) == self.mo + assert after_nearest_workday(self.su) == self.tu + assert after_nearest_workday(self.fr) == self.mo class TestFederalHolidayCalendar(tm.TestCase): - # Test for issue 10278 def test_no_mlk_before_1984(self): + # see gh-10278 class MLKCalendar(AbstractHolidayCalendar): rules = [USMartinLutherKingJr] holidays = MLKCalendar().holidays(start='1984', end='1988').to_pydatetime().tolist() + # Testing to make sure holiday is not incorrectly observed before 1986 - self.assertEqual(holidays, [datetime(1986, 1, 20, 0, 0), datetime( - 1987, 1, 19, 0, 0)]) + assert holidays == [datetime(1986, 1, 20, 0, 0), + datetime(1987, 1, 19, 0, 0)] def test_memorial_day(self): class MemorialDay(AbstractHolidayCalendar): @@ -369,23 +362,23 @@ class MemorialDay(AbstractHolidayCalendar): holidays = MemorialDay().holidays(start='1971', end='1980').to_pydatetime().tolist() - # Fixes 5/31 error and checked manually against wikipedia - self.assertEqual(holidays, [datetime(1971, 5, 31, 0, 0), - datetime(1972, 5, 29, 0, 0), - datetime(1973, 5, 28, 0, 0), - datetime(1974, 5, 27, 0, - 0), datetime(1975, 5, 26, 0, 0), - datetime(1976, 5, 31, 0, - 0), datetime(1977, 5, 30, 0, 0), - datetime(1978, 5, 29, 0, - 0), datetime(1979, 5, 28, 0, 0)]) + # Fixes 5/31 error and checked manually against Wikipedia + assert holidays == [datetime(1971, 5, 31, 0, 0), + datetime(1972, 5, 29, 0, 0), + datetime(1973, 5, 28, 0, 0), + datetime(1974, 5, 27, 0, 0), + datetime(1975, 5, 26, 0, 0), + datetime(1976, 5, 31, 0, 0), + datetime(1977, 5, 30, 0, 0), + datetime(1978, 5, 29, 0, 0), + datetime(1979, 5, 28, 0, 0)] -class TestHolidayConflictingArguments(tm.TestCase): - # GH 10217 +class TestHolidayConflictingArguments(tm.TestCase): def test_both_offset_observance_raises(self): + # see gh-10217 with pytest.raises(NotImplementedError): Holiday("Cyber Monday", month=11, day=1, offset=[DateOffset(weekday=SA(4))], diff --git a/pandas/tests/tseries/test_offsets.py b/pandas/tests/tseries/test_offsets.py index 08f17fc358a47..ce4208a8cea69 100644 --- a/pandas/tests/tseries/test_offsets.py +++ b/pandas/tests/tseries/test_offsets.py @@ -155,7 +155,7 @@ def test_apply_out_of_range(self): t = Timestamp('20080101', tz=tz) result = t + offset assert isinstance(result, datetime) - self.assertEqual(t.tzinfo, result.tzinfo) + assert t.tzinfo == result.tzinfo except (tslib.OutOfBoundsDatetime): raise @@ -230,13 +230,13 @@ def test_return_type(self): def test_offset_n(self): for offset_klass in self.offset_types: offset = self._get_offset(offset_klass) - self.assertEqual(offset.n, 1) + assert offset.n == 1 neg_offset = offset * -1 - self.assertEqual(neg_offset.n, -1) + assert neg_offset.n == -1 mul_offset = offset * 3 - self.assertEqual(mul_offset.n, 3) + assert mul_offset.n == 3 def test_offset_freqstr(self): for offset_klass in self.offset_types: @@ -247,7 +247,7 @@ def test_offset_freqstr(self): "", 'LWOM-SAT', ): code = get_offset(freqstr) - self.assertEqual(offset.rule_code, code) + assert offset.rule_code == code def _check_offsetfunc_works(self, offset, funcname, dt, expected, normalize=False): @@ -256,11 +256,11 @@ def _check_offsetfunc_works(self, offset, funcname, dt, expected, result = func(dt) assert isinstance(result, Timestamp) - self.assertEqual(result, expected) + assert result == expected result = func(Timestamp(dt)) assert isinstance(result, Timestamp) - self.assertEqual(result, expected) + assert result == expected # see gh-14101 exp_warning = None @@ -277,9 +277,9 @@ def _check_offsetfunc_works(self, offset, funcname, dt, expected, result = func(ts) assert isinstance(result, Timestamp) if normalize is False: - self.assertEqual(result, expected + Nano(5)) + assert result == expected + Nano(5) else: - self.assertEqual(result, expected) + assert result == expected if isinstance(dt, np.datetime64): # test tz when input is datetime or Timestamp @@ -295,11 +295,11 @@ def _check_offsetfunc_works(self, offset, funcname, dt, expected, result = func(dt_tz) assert isinstance(result, Timestamp) - self.assertEqual(result, expected_localize) + assert result == expected_localize result = func(Timestamp(dt, tz=tz)) assert isinstance(result, Timestamp) - self.assertEqual(result, expected_localize) + assert result == expected_localize # see gh-14101 exp_warning = None @@ -316,9 +316,9 @@ def _check_offsetfunc_works(self, offset, funcname, dt, expected, result = func(ts) assert isinstance(result, Timestamp) if normalize is False: - self.assertEqual(result, expected_localize + Nano(5)) + assert result == expected_localize + Nano(5) else: - self.assertEqual(result, expected_localize) + assert result == expected_localize def test_apply(self): sdt = datetime(2011, 1, 1, 9, 0) @@ -466,14 +466,14 @@ def test_add(self): result_ts = Timestamp(dt) + offset_s for result in [result_dt, result_ts]: assert isinstance(result, Timestamp) - self.assertEqual(result, expected) + assert result == expected tm._skip_if_no_pytz() for tz in self.timezones: expected_localize = expected.tz_localize(tz) result = Timestamp(dt, tz=tz) + offset_s assert isinstance(result, Timestamp) - self.assertEqual(result, expected_localize) + assert result == expected_localize # normalize=True offset_s = self._get_offset(offset, normalize=True) @@ -483,13 +483,13 @@ def test_add(self): result_ts = Timestamp(dt) + offset_s for result in [result_dt, result_ts]: assert isinstance(result, Timestamp) - self.assertEqual(result, expected) + assert result == expected for tz in self.timezones: expected_localize = expected.tz_localize(tz) result = Timestamp(dt, tz=tz) + offset_s assert isinstance(result, Timestamp) - self.assertEqual(result, expected_localize) + assert result == expected_localize def test_pickle_v0_15_2(self): offsets = {'DateOffset': DateOffset(years=1), @@ -558,10 +558,10 @@ def test_different_normalize_equals(self): offset = BDay() offset2 = BDay() offset2.normalize = True - self.assertEqual(offset, offset2) + assert offset == offset2 def test_repr(self): - self.assertEqual(repr(self.offset), '') + assert repr(self.offset) == '' assert repr(self.offset2) == '<2 * BusinessDays>' expected = '' @@ -573,49 +573,49 @@ def test_with_offset(self): assert (self.d + offset) == datetime(2008, 1, 2, 2) def testEQ(self): - self.assertEqual(self.offset2, self.offset2) + assert self.offset2 == self.offset2 def test_mul(self): pass def test_hash(self): - self.assertEqual(hash(self.offset2), hash(self.offset2)) + assert hash(self.offset2) == hash(self.offset2) def testCall(self): - self.assertEqual(self.offset2(self.d), datetime(2008, 1, 3)) + assert self.offset2(self.d) == datetime(2008, 1, 3) def testRAdd(self): - self.assertEqual(self.d + self.offset2, self.offset2 + self.d) + assert self.d + self.offset2 == self.offset2 + self.d def testSub(self): off = self.offset2 pytest.raises(Exception, off.__sub__, self.d) - self.assertEqual(2 * off - off, off) + assert 2 * off - off == off - self.assertEqual(self.d - self.offset2, self.d + BDay(-2)) + assert self.d - self.offset2 == self.d + BDay(-2) def testRSub(self): - self.assertEqual(self.d - self.offset2, (-self.offset2).apply(self.d)) + assert self.d - self.offset2 == (-self.offset2).apply(self.d) def testMult1(self): - self.assertEqual(self.d + 10 * self.offset, self.d + BDay(10)) + assert self.d + 10 * self.offset == self.d + BDay(10) def testMult2(self): - self.assertEqual(self.d + (-5 * BDay(-10)), self.d + BDay(50)) + assert self.d + (-5 * BDay(-10)) == self.d + BDay(50) def testRollback1(self): - self.assertEqual(BDay(10).rollback(self.d), self.d) + assert BDay(10).rollback(self.d) == self.d def testRollback2(self): - self.assertEqual( - BDay(10).rollback(datetime(2008, 1, 5)), datetime(2008, 1, 4)) + assert (BDay(10).rollback(datetime(2008, 1, 5)) == + datetime(2008, 1, 4)) def testRollforward1(self): - self.assertEqual(BDay(10).rollforward(self.d), self.d) + assert BDay(10).rollforward(self.d) == self.d def testRollforward2(self): - self.assertEqual( - BDay(10).rollforward(datetime(2008, 1, 5)), datetime(2008, 1, 7)) + assert (BDay(10).rollforward(datetime(2008, 1, 5)) == + datetime(2008, 1, 7)) def test_roll_date_object(self): offset = BDay() @@ -623,17 +623,17 @@ def test_roll_date_object(self): dt = date(2012, 9, 15) result = offset.rollback(dt) - self.assertEqual(result, datetime(2012, 9, 14)) + assert result == datetime(2012, 9, 14) result = offset.rollforward(dt) - self.assertEqual(result, datetime(2012, 9, 17)) + assert result == datetime(2012, 9, 17) offset = offsets.Day() result = offset.rollback(dt) - self.assertEqual(result, datetime(2012, 9, 15)) + assert result == datetime(2012, 9, 15) result = offset.rollforward(dt) - self.assertEqual(result, datetime(2012, 9, 15)) + assert result == datetime(2012, 9, 15) def test_onOffset(self): tests = [(BDay(), datetime(2008, 1, 1), True), @@ -691,25 +691,25 @@ def test_apply_large_n(self): dt = datetime(2012, 10, 23) result = dt + BDay(10) - self.assertEqual(result, datetime(2012, 11, 6)) + assert result == datetime(2012, 11, 6) result = dt + BDay(100) - BDay(100) - self.assertEqual(result, dt) + assert result == dt off = BDay() * 6 rs = datetime(2012, 1, 1) - off xp = datetime(2011, 12, 23) - self.assertEqual(rs, xp) + assert rs == xp st = datetime(2011, 12, 18) rs = st + off xp = datetime(2011, 12, 26) - self.assertEqual(rs, xp) + assert rs == xp off = BDay() * 10 rs = datetime(2014, 1, 5) + off # see #5890 xp = datetime(2014, 1, 17) - self.assertEqual(rs, xp) + assert rs == xp def test_apply_corner(self): pytest.raises(TypeError, BDay().apply, BMonthEnd()) @@ -753,34 +753,30 @@ def test_different_normalize_equals(self): offset = self._offset() offset2 = self._offset() offset2.normalize = True - self.assertEqual(offset, offset2) + assert offset == offset2 def test_repr(self): - self.assertEqual(repr(self.offset1), '') - self.assertEqual(repr(self.offset2), - '<3 * BusinessHours: BH=09:00-17:00>') - self.assertEqual(repr(self.offset3), - '<-1 * BusinessHour: BH=09:00-17:00>') - self.assertEqual(repr(self.offset4), - '<-4 * BusinessHours: BH=09:00-17:00>') - - self.assertEqual(repr(self.offset5), '') - self.assertEqual(repr(self.offset6), '') - self.assertEqual(repr(self.offset7), - '<-2 * BusinessHours: BH=21:30-06:30>') + assert repr(self.offset1) == '' + assert repr(self.offset2) == '<3 * BusinessHours: BH=09:00-17:00>' + assert repr(self.offset3) == '<-1 * BusinessHour: BH=09:00-17:00>' + assert repr(self.offset4) == '<-4 * BusinessHours: BH=09:00-17:00>' + + assert repr(self.offset5) == '' + assert repr(self.offset6) == '' + assert repr(self.offset7) == '<-2 * BusinessHours: BH=21:30-06:30>' def test_with_offset(self): expected = Timestamp('2014-07-01 13:00') - self.assertEqual(self.d + BusinessHour() * 3, expected) - self.assertEqual(self.d + BusinessHour(n=3), expected) + assert self.d + BusinessHour() * 3 == expected + assert self.d + BusinessHour(n=3) == expected def testEQ(self): for offset in [self.offset1, self.offset2, self.offset3, self.offset4]: - self.assertEqual(offset, offset) + assert offset == offset self.assertNotEqual(BusinessHour(), BusinessHour(-1)) - self.assertEqual(BusinessHour(start='09:00'), BusinessHour()) + assert BusinessHour(start='09:00') == BusinessHour() self.assertNotEqual(BusinessHour(start='09:00'), BusinessHour(start='09:01')) self.assertNotEqual(BusinessHour(start='09:00', end='17:00'), @@ -788,90 +784,83 @@ def testEQ(self): def test_hash(self): for offset in [self.offset1, self.offset2, self.offset3, self.offset4]: - self.assertEqual(hash(offset), hash(offset)) + assert hash(offset) == hash(offset) def testCall(self): - self.assertEqual(self.offset1(self.d), datetime(2014, 7, 1, 11)) - self.assertEqual(self.offset2(self.d), datetime(2014, 7, 1, 13)) - self.assertEqual(self.offset3(self.d), datetime(2014, 6, 30, 17)) - self.assertEqual(self.offset4(self.d), datetime(2014, 6, 30, 14)) + assert self.offset1(self.d) == datetime(2014, 7, 1, 11) + assert self.offset2(self.d) == datetime(2014, 7, 1, 13) + assert self.offset3(self.d) == datetime(2014, 6, 30, 17) + assert self.offset4(self.d) == datetime(2014, 6, 30, 14) def testRAdd(self): - self.assertEqual(self.d + self.offset2, self.offset2 + self.d) + assert self.d + self.offset2 == self.offset2 + self.d def testSub(self): off = self.offset2 pytest.raises(Exception, off.__sub__, self.d) - self.assertEqual(2 * off - off, off) + assert 2 * off - off == off - self.assertEqual(self.d - self.offset2, self.d + self._offset(-3)) + assert self.d - self.offset2 == self.d + self._offset(-3) def testRSub(self): - self.assertEqual(self.d - self.offset2, (-self.offset2).apply(self.d)) + assert self.d - self.offset2 == (-self.offset2).apply(self.d) def testMult1(self): - self.assertEqual(self.d + 5 * self.offset1, self.d + self._offset(5)) + assert self.d + 5 * self.offset1 == self.d + self._offset(5) def testMult2(self): - self.assertEqual(self.d + (-3 * self._offset(-2)), - self.d + self._offset(6)) + assert self.d + (-3 * self._offset(-2)) == self.d + self._offset(6) def testRollback1(self): - self.assertEqual(self.offset1.rollback(self.d), self.d) - self.assertEqual(self.offset2.rollback(self.d), self.d) - self.assertEqual(self.offset3.rollback(self.d), self.d) - self.assertEqual(self.offset4.rollback(self.d), self.d) - self.assertEqual(self.offset5.rollback(self.d), - datetime(2014, 6, 30, 14, 30)) - self.assertEqual(self.offset6.rollback( - self.d), datetime(2014, 7, 1, 5, 0)) - self.assertEqual(self.offset7.rollback( - self.d), datetime(2014, 7, 1, 6, 30)) + assert self.offset1.rollback(self.d) == self.d + assert self.offset2.rollback(self.d) == self.d + assert self.offset3.rollback(self.d) == self.d + assert self.offset4.rollback(self.d) == self.d + assert self.offset5.rollback(self.d) == datetime(2014, 6, 30, 14, 30) + assert self.offset6.rollback(self.d) == datetime(2014, 7, 1, 5, 0) + assert self.offset7.rollback(self.d) == datetime(2014, 7, 1, 6, 30) d = datetime(2014, 7, 1, 0) - self.assertEqual(self.offset1.rollback(d), datetime(2014, 6, 30, 17)) - self.assertEqual(self.offset2.rollback(d), datetime(2014, 6, 30, 17)) - self.assertEqual(self.offset3.rollback(d), datetime(2014, 6, 30, 17)) - self.assertEqual(self.offset4.rollback(d), datetime(2014, 6, 30, 17)) - self.assertEqual(self.offset5.rollback( - d), datetime(2014, 6, 30, 14, 30)) - self.assertEqual(self.offset6.rollback(d), d) - self.assertEqual(self.offset7.rollback(d), d) + assert self.offset1.rollback(d) == datetime(2014, 6, 30, 17) + assert self.offset2.rollback(d) == datetime(2014, 6, 30, 17) + assert self.offset3.rollback(d) == datetime(2014, 6, 30, 17) + assert self.offset4.rollback(d) == datetime(2014, 6, 30, 17) + assert self.offset5.rollback(d) == datetime(2014, 6, 30, 14, 30) + assert self.offset6.rollback(d) == d + assert self.offset7.rollback(d) == d - self.assertEqual(self._offset(5).rollback(self.d), self.d) + assert self._offset(5).rollback(self.d) == self.d def testRollback2(self): - self.assertEqual(self._offset(-3) - .rollback(datetime(2014, 7, 5, 15, 0)), - datetime(2014, 7, 4, 17, 0)) + assert (self._offset(-3).rollback(datetime(2014, 7, 5, 15, 0)) == + datetime(2014, 7, 4, 17, 0)) def testRollforward1(self): - self.assertEqual(self.offset1.rollforward(self.d), self.d) - self.assertEqual(self.offset2.rollforward(self.d), self.d) - self.assertEqual(self.offset3.rollforward(self.d), self.d) - self.assertEqual(self.offset4.rollforward(self.d), self.d) - self.assertEqual(self.offset5.rollforward( - self.d), datetime(2014, 7, 1, 11, 0)) - self.assertEqual(self.offset6.rollforward( - self.d), datetime(2014, 7, 1, 20, 0)) - self.assertEqual(self.offset7.rollforward( - self.d), datetime(2014, 7, 1, 21, 30)) + assert self.offset1.rollforward(self.d) == self.d + assert self.offset2.rollforward(self.d) == self.d + assert self.offset3.rollforward(self.d) == self.d + assert self.offset4.rollforward(self.d) == self.d + assert (self.offset5.rollforward(self.d) == + datetime(2014, 7, 1, 11, 0)) + assert (self.offset6.rollforward(self.d) == + datetime(2014, 7, 1, 20, 0)) + assert (self.offset7.rollforward(self.d) == + datetime(2014, 7, 1, 21, 30)) d = datetime(2014, 7, 1, 0) - self.assertEqual(self.offset1.rollforward(d), datetime(2014, 7, 1, 9)) - self.assertEqual(self.offset2.rollforward(d), datetime(2014, 7, 1, 9)) - self.assertEqual(self.offset3.rollforward(d), datetime(2014, 7, 1, 9)) - self.assertEqual(self.offset4.rollforward(d), datetime(2014, 7, 1, 9)) - self.assertEqual(self.offset5.rollforward(d), datetime(2014, 7, 1, 11)) - self.assertEqual(self.offset6.rollforward(d), d) - self.assertEqual(self.offset7.rollforward(d), d) + assert self.offset1.rollforward(d) == datetime(2014, 7, 1, 9) + assert self.offset2.rollforward(d) == datetime(2014, 7, 1, 9) + assert self.offset3.rollforward(d) == datetime(2014, 7, 1, 9) + assert self.offset4.rollforward(d) == datetime(2014, 7, 1, 9) + assert self.offset5.rollforward(d) == datetime(2014, 7, 1, 11) + assert self.offset6.rollforward(d) == d + assert self.offset7.rollforward(d) == d - self.assertEqual(self._offset(5).rollforward(self.d), self.d) + assert self._offset(5).rollforward(self.d) == self.d def testRollforward2(self): - self.assertEqual(self._offset(-3) - .rollforward(datetime(2014, 7, 5, 16, 0)), - datetime(2014, 7, 7, 9)) + assert (self._offset(-3).rollforward(datetime(2014, 7, 5, 16, 0)) == + datetime(2014, 7, 7, 9)) def test_roll_date_object(self): offset = BusinessHour() @@ -879,10 +868,10 @@ def test_roll_date_object(self): dt = datetime(2014, 7, 6, 15, 0) result = offset.rollback(dt) - self.assertEqual(result, datetime(2014, 7, 4, 17)) + assert result == datetime(2014, 7, 4, 17) result = offset.rollforward(dt) - self.assertEqual(result, datetime(2014, 7, 7, 9)) + assert result == datetime(2014, 7, 7, 9) def test_normalize(self): tests = [] @@ -924,7 +913,7 @@ def test_normalize(self): for offset, cases in tests: for dt, expected in compat.iteritems(cases): - self.assertEqual(offset.apply(dt), expected) + assert offset.apply(dt) == expected def test_onOffset(self): tests = [] @@ -963,7 +952,7 @@ def test_onOffset(self): for offset, cases in tests: for dt, expected in compat.iteritems(cases): - self.assertEqual(offset.onOffset(dt), expected) + assert offset.onOffset(dt) == expected def test_opening_time(self): tests = [] @@ -1127,8 +1116,8 @@ def test_opening_time(self): for _offsets, cases in tests: for offset in _offsets: for dt, (exp_next, exp_prev) in compat.iteritems(cases): - self.assertEqual(offset._next_opening_time(dt), exp_next) - self.assertEqual(offset._prev_opening_time(dt), exp_prev) + assert offset._next_opening_time(dt) == exp_next + assert offset._prev_opening_time(dt) == exp_prev def test_apply(self): tests = [] @@ -1457,93 +1446,89 @@ def test_different_normalize_equals(self): offset = self._offset() offset2 = self._offset() offset2.normalize = True - self.assertEqual(offset, offset2) + assert offset == offset2 def test_repr(self): - self.assertEqual(repr(self.offset1), - '') - self.assertEqual(repr(self.offset2), - '') + assert repr(self.offset1) == '' + assert repr(self.offset2) == '' def test_with_offset(self): expected = Timestamp('2014-07-01 13:00') - self.assertEqual(self.d + CustomBusinessHour() * 3, expected) - self.assertEqual(self.d + CustomBusinessHour(n=3), expected) + assert self.d + CustomBusinessHour() * 3 == expected + assert self.d + CustomBusinessHour(n=3) == expected def testEQ(self): for offset in [self.offset1, self.offset2]: - self.assertEqual(offset, offset) + assert offset == offset - self.assertNotEqual(CustomBusinessHour(), CustomBusinessHour(-1)) - self.assertEqual(CustomBusinessHour(start='09:00'), - CustomBusinessHour()) - self.assertNotEqual(CustomBusinessHour(start='09:00'), - CustomBusinessHour(start='09:01')) - self.assertNotEqual(CustomBusinessHour(start='09:00', end='17:00'), - CustomBusinessHour(start='17:00', end='09:01')) + assert CustomBusinessHour() != CustomBusinessHour(-1) + assert (CustomBusinessHour(start='09:00') == + CustomBusinessHour()) + assert (CustomBusinessHour(start='09:00') != + CustomBusinessHour(start='09:01')) + assert (CustomBusinessHour(start='09:00', end='17:00') != + CustomBusinessHour(start='17:00', end='09:01')) - self.assertNotEqual(CustomBusinessHour(weekmask='Tue Wed Thu Fri'), - CustomBusinessHour(weekmask='Mon Tue Wed Thu Fri')) - self.assertNotEqual(CustomBusinessHour(holidays=['2014-06-27']), - CustomBusinessHour(holidays=['2014-06-28'])) + assert (CustomBusinessHour(weekmask='Tue Wed Thu Fri') != + CustomBusinessHour(weekmask='Mon Tue Wed Thu Fri')) + assert (CustomBusinessHour(holidays=['2014-06-27']) != + CustomBusinessHour(holidays=['2014-06-28'])) def test_hash(self): - self.assertEqual(hash(self.offset1), hash(self.offset1)) - self.assertEqual(hash(self.offset2), hash(self.offset2)) + assert hash(self.offset1) == hash(self.offset1) + assert hash(self.offset2) == hash(self.offset2) def testCall(self): - self.assertEqual(self.offset1(self.d), datetime(2014, 7, 1, 11)) - self.assertEqual(self.offset2(self.d), datetime(2014, 7, 1, 11)) + assert self.offset1(self.d) == datetime(2014, 7, 1, 11) + assert self.offset2(self.d) == datetime(2014, 7, 1, 11) def testRAdd(self): - self.assertEqual(self.d + self.offset2, self.offset2 + self.d) + assert self.d + self.offset2 == self.offset2 + self.d def testSub(self): off = self.offset2 pytest.raises(Exception, off.__sub__, self.d) - self.assertEqual(2 * off - off, off) + assert 2 * off - off == off - self.assertEqual(self.d - self.offset2, self.d - (2 * off - off)) + assert self.d - self.offset2 == self.d - (2 * off - off) def testRSub(self): - self.assertEqual(self.d - self.offset2, (-self.offset2).apply(self.d)) + assert self.d - self.offset2 == (-self.offset2).apply(self.d) def testMult1(self): - self.assertEqual(self.d + 5 * self.offset1, self.d + self._offset(5)) + assert self.d + 5 * self.offset1 == self.d + self._offset(5) def testMult2(self): - self.assertEqual(self.d + (-3 * self._offset(-2)), - self.d + self._offset(6)) + assert self.d + (-3 * self._offset(-2)) == self.d + self._offset(6) def testRollback1(self): - self.assertEqual(self.offset1.rollback(self.d), self.d) - self.assertEqual(self.offset2.rollback(self.d), self.d) + assert self.offset1.rollback(self.d) == self.d + assert self.offset2.rollback(self.d) == self.d d = datetime(2014, 7, 1, 0) + # 2014/07/01 is Tuesday, 06/30 is Monday(holiday) - self.assertEqual(self.offset1.rollback(d), datetime(2014, 6, 27, 17)) + assert self.offset1.rollback(d) == datetime(2014, 6, 27, 17) # 2014/6/30 and 2014/6/27 are holidays - self.assertEqual(self.offset2.rollback(d), datetime(2014, 6, 26, 17)) + assert self.offset2.rollback(d) == datetime(2014, 6, 26, 17) def testRollback2(self): - self.assertEqual(self._offset(-3) - .rollback(datetime(2014, 7, 5, 15, 0)), - datetime(2014, 7, 4, 17, 0)) + assert (self._offset(-3).rollback(datetime(2014, 7, 5, 15, 0)) == + datetime(2014, 7, 4, 17, 0)) def testRollforward1(self): - self.assertEqual(self.offset1.rollforward(self.d), self.d) - self.assertEqual(self.offset2.rollforward(self.d), self.d) + assert self.offset1.rollforward(self.d) == self.d + assert self.offset2.rollforward(self.d) == self.d d = datetime(2014, 7, 1, 0) - self.assertEqual(self.offset1.rollforward(d), datetime(2014, 7, 1, 9)) - self.assertEqual(self.offset2.rollforward(d), datetime(2014, 7, 1, 9)) + assert self.offset1.rollforward(d) == datetime(2014, 7, 1, 9) + assert self.offset2.rollforward(d) == datetime(2014, 7, 1, 9) def testRollforward2(self): - self.assertEqual(self._offset(-3) - .rollforward(datetime(2014, 7, 5, 16, 0)), - datetime(2014, 7, 7, 9)) + assert (self._offset(-3).rollforward(datetime(2014, 7, 5, 16, 0)) == + datetime(2014, 7, 7, 9)) def test_roll_date_object(self): offset = BusinessHour() @@ -1551,10 +1536,10 @@ def test_roll_date_object(self): dt = datetime(2014, 7, 6, 15, 0) result = offset.rollback(dt) - self.assertEqual(result, datetime(2014, 7, 4, 17)) + assert result == datetime(2014, 7, 4, 17) result = offset.rollforward(dt) - self.assertEqual(result, datetime(2014, 7, 7, 9)) + assert result == datetime(2014, 7, 7, 9) def test_normalize(self): tests = [] @@ -1598,7 +1583,7 @@ def test_normalize(self): for offset, cases in tests: for dt, expected in compat.iteritems(cases): - self.assertEqual(offset.apply(dt), expected) + assert offset.apply(dt) == expected def test_onOffset(self): tests = [] @@ -1614,7 +1599,7 @@ def test_onOffset(self): for offset, cases in tests: for dt, expected in compat.iteritems(cases): - self.assertEqual(offset.onOffset(dt), expected) + assert offset.onOffset(dt) == expected def test_apply(self): tests = [] @@ -1702,7 +1687,7 @@ def test_different_normalize_equals(self): offset = CDay() offset2 = CDay() offset2.normalize = True - self.assertEqual(offset, offset2) + assert offset == offset2 def test_repr(self): assert repr(self.offset) == '' @@ -1717,50 +1702,50 @@ def test_with_offset(self): assert (self.d + offset) == datetime(2008, 1, 2, 2) def testEQ(self): - self.assertEqual(self.offset2, self.offset2) + assert self.offset2 == self.offset2 def test_mul(self): pass def test_hash(self): - self.assertEqual(hash(self.offset2), hash(self.offset2)) + assert hash(self.offset2) == hash(self.offset2) def testCall(self): - self.assertEqual(self.offset2(self.d), datetime(2008, 1, 3)) - self.assertEqual(self.offset2(self.nd), datetime(2008, 1, 3)) + assert self.offset2(self.d) == datetime(2008, 1, 3) + assert self.offset2(self.nd) == datetime(2008, 1, 3) def testRAdd(self): - self.assertEqual(self.d + self.offset2, self.offset2 + self.d) + assert self.d + self.offset2 == self.offset2 + self.d def testSub(self): off = self.offset2 pytest.raises(Exception, off.__sub__, self.d) - self.assertEqual(2 * off - off, off) + assert 2 * off - off == off - self.assertEqual(self.d - self.offset2, self.d + CDay(-2)) + assert self.d - self.offset2 == self.d + CDay(-2) def testRSub(self): - self.assertEqual(self.d - self.offset2, (-self.offset2).apply(self.d)) + assert self.d - self.offset2 == (-self.offset2).apply(self.d) def testMult1(self): - self.assertEqual(self.d + 10 * self.offset, self.d + CDay(10)) + assert self.d + 10 * self.offset == self.d + CDay(10) def testMult2(self): - self.assertEqual(self.d + (-5 * CDay(-10)), self.d + CDay(50)) + assert self.d + (-5 * CDay(-10)) == self.d + CDay(50) def testRollback1(self): - self.assertEqual(CDay(10).rollback(self.d), self.d) + assert CDay(10).rollback(self.d) == self.d def testRollback2(self): - self.assertEqual( - CDay(10).rollback(datetime(2008, 1, 5)), datetime(2008, 1, 4)) + assert (CDay(10).rollback(datetime(2008, 1, 5)) == + datetime(2008, 1, 4)) def testRollforward1(self): - self.assertEqual(CDay(10).rollforward(self.d), self.d) + assert CDay(10).rollforward(self.d) == self.d def testRollforward2(self): - self.assertEqual( - CDay(10).rollforward(datetime(2008, 1, 5)), datetime(2008, 1, 7)) + assert (CDay(10).rollforward(datetime(2008, 1, 5)) == + datetime(2008, 1, 7)) def test_roll_date_object(self): offset = CDay() @@ -1768,17 +1753,17 @@ def test_roll_date_object(self): dt = date(2012, 9, 15) result = offset.rollback(dt) - self.assertEqual(result, datetime(2012, 9, 14)) + assert result == datetime(2012, 9, 14) result = offset.rollforward(dt) - self.assertEqual(result, datetime(2012, 9, 17)) + assert result == datetime(2012, 9, 17) offset = offsets.Day() result = offset.rollback(dt) - self.assertEqual(result, datetime(2012, 9, 15)) + assert result == datetime(2012, 9, 15) result = offset.rollforward(dt) - self.assertEqual(result, datetime(2012, 9, 15)) + assert result == datetime(2012, 9, 15) def test_onOffset(self): tests = [(CDay(), datetime(2008, 1, 1), True), @@ -1837,20 +1822,20 @@ def test_apply_large_n(self): dt = datetime(2012, 10, 23) result = dt + CDay(10) - self.assertEqual(result, datetime(2012, 11, 6)) + assert result == datetime(2012, 11, 6) result = dt + CDay(100) - CDay(100) - self.assertEqual(result, dt) + assert result == dt off = CDay() * 6 rs = datetime(2012, 1, 1) - off xp = datetime(2011, 12, 23) - self.assertEqual(rs, xp) + assert rs == xp st = datetime(2011, 12, 18) rs = st + off xp = datetime(2011, 12, 26) - self.assertEqual(rs, xp) + assert rs == xp def test_apply_corner(self): pytest.raises(Exception, CDay().apply, BMonthEnd()) @@ -1870,7 +1855,7 @@ def test_holidays(self): dt = datetime(year, 4, 30) xp = datetime(year, 5, 2) rs = dt + tday - self.assertEqual(rs, xp) + assert rs == xp def test_weekmask(self): weekmask_saudi = 'Sat Sun Mon Tue Wed' # Thu-Fri Weekend @@ -1883,13 +1868,13 @@ def test_weekmask(self): xp_saudi = datetime(2013, 5, 4) xp_uae = datetime(2013, 5, 2) xp_egypt = datetime(2013, 5, 2) - self.assertEqual(xp_saudi, dt + bday_saudi) - self.assertEqual(xp_uae, dt + bday_uae) - self.assertEqual(xp_egypt, dt + bday_egypt) + assert xp_saudi == dt + bday_saudi + assert xp_uae == dt + bday_uae + assert xp_egypt == dt + bday_egypt xp2 = datetime(2013, 5, 5) - self.assertEqual(xp2, dt + 2 * bday_saudi) - self.assertEqual(xp2, dt + 2 * bday_uae) - self.assertEqual(xp2, dt + 2 * bday_egypt) + assert xp2 == dt + 2 * bday_saudi + assert xp2 == dt + 2 * bday_uae + assert xp2 == dt + 2 * bday_egypt def test_weekmask_and_holidays(self): weekmask_egypt = 'Sun Mon Tue Wed Thu' # Fri-Sat Weekend @@ -1898,7 +1883,7 @@ def test_weekmask_and_holidays(self): bday_egypt = CDay(holidays=holidays, weekmask=weekmask_egypt) dt = datetime(2013, 4, 30) xp_egypt = datetime(2013, 5, 5) - self.assertEqual(xp_egypt, dt + 2 * bday_egypt) + assert xp_egypt == dt + 2 * bday_egypt def test_calendar(self): calendar = USFederalHolidayCalendar() @@ -1908,7 +1893,7 @@ def test_calendar(self): def test_roundtrip_pickle(self): def _check_roundtrip(obj): unpickled = tm.round_trip_pickle(obj) - self.assertEqual(unpickled, obj) + assert unpickled == obj _check_roundtrip(self.offset) _check_roundtrip(self.offset2) @@ -1921,7 +1906,7 @@ def test_pickle_compat_0_14_1(self): cday0_14_1 = read_pickle(os.path.join(pth, 'cday-0.14.1.pickle')) cday = CDay(holidays=hdays) - self.assertEqual(cday, cday0_14_1) + assert cday == cday0_14_1 class CustomBusinessMonthBase(object): @@ -1933,33 +1918,32 @@ def setUp(self): self.offset2 = self._object(2) def testEQ(self): - self.assertEqual(self.offset2, self.offset2) + assert self.offset2 == self.offset2 def test_mul(self): pass def test_hash(self): - self.assertEqual(hash(self.offset2), hash(self.offset2)) + assert hash(self.offset2) == hash(self.offset2) def testRAdd(self): - self.assertEqual(self.d + self.offset2, self.offset2 + self.d) + assert self.d + self.offset2 == self.offset2 + self.d def testSub(self): off = self.offset2 pytest.raises(Exception, off.__sub__, self.d) - self.assertEqual(2 * off - off, off) + assert 2 * off - off == off - self.assertEqual(self.d - self.offset2, self.d + self._object(-2)) + assert self.d - self.offset2 == self.d + self._object(-2) def testRSub(self): - self.assertEqual(self.d - self.offset2, (-self.offset2).apply(self.d)) + assert self.d - self.offset2 == (-self.offset2).apply(self.d) def testMult1(self): - self.assertEqual(self.d + 10 * self.offset, self.d + self._object(10)) + assert self.d + 10 * self.offset == self.d + self._object(10) def testMult2(self): - self.assertEqual(self.d + (-5 * self._object(-10)), - self.d + self._object(50)) + assert self.d + (-5 * self._object(-10)) == self.d + self._object(50) def test_offsets_compare_equal(self): offset1 = self._object() @@ -1969,7 +1953,7 @@ def test_offsets_compare_equal(self): def test_roundtrip_pickle(self): def _check_roundtrip(obj): unpickled = tm.round_trip_pickle(obj) - self.assertEqual(unpickled, obj) + assert unpickled == obj _check_roundtrip(self._object()) _check_roundtrip(self._object(2)) @@ -1984,26 +1968,24 @@ def test_different_normalize_equals(self): offset = CBMonthEnd() offset2 = CBMonthEnd() offset2.normalize = True - self.assertEqual(offset, offset2) + assert offset == offset2 def test_repr(self): assert repr(self.offset) == '' assert repr(self.offset2) == '<2 * CustomBusinessMonthEnds>' def testCall(self): - self.assertEqual(self.offset2(self.d), datetime(2008, 2, 29)) + assert self.offset2(self.d) == datetime(2008, 2, 29) def testRollback1(self): - self.assertEqual( - CDay(10).rollback(datetime(2007, 12, 31)), datetime(2007, 12, 31)) + assert (CDay(10).rollback(datetime(2007, 12, 31)) == + datetime(2007, 12, 31)) def testRollback2(self): - self.assertEqual(CBMonthEnd(10).rollback(self.d), - datetime(2007, 12, 31)) + assert CBMonthEnd(10).rollback(self.d) == datetime(2007, 12, 31) def testRollforward1(self): - self.assertEqual(CBMonthEnd(10).rollforward( - self.d), datetime(2008, 1, 31)) + assert CBMonthEnd(10).rollforward(self.d) == datetime(2008, 1, 31) def test_roll_date_object(self): offset = CBMonthEnd() @@ -2011,17 +1993,17 @@ def test_roll_date_object(self): dt = date(2012, 9, 15) result = offset.rollback(dt) - self.assertEqual(result, datetime(2012, 8, 31)) + assert result == datetime(2012, 8, 31) result = offset.rollforward(dt) - self.assertEqual(result, datetime(2012, 9, 28)) + assert result == datetime(2012, 9, 28) offset = offsets.Day() result = offset.rollback(dt) - self.assertEqual(result, datetime(2012, 9, 15)) + assert result == datetime(2012, 9, 15) result = offset.rollforward(dt) - self.assertEqual(result, datetime(2012, 9, 15)) + assert result == datetime(2012, 9, 15) def test_onOffset(self): tests = [(CBMonthEnd(), datetime(2008, 1, 31), True), @@ -2059,20 +2041,20 @@ def test_apply_large_n(self): dt = datetime(2012, 10, 23) result = dt + CBMonthEnd(10) - self.assertEqual(result, datetime(2013, 7, 31)) + assert result == datetime(2013, 7, 31) result = dt + CDay(100) - CDay(100) - self.assertEqual(result, dt) + assert result == dt off = CBMonthEnd() * 6 rs = datetime(2012, 1, 1) - off xp = datetime(2011, 7, 29) - self.assertEqual(rs, xp) + assert rs == xp st = datetime(2011, 12, 18) rs = st + off xp = datetime(2012, 5, 31) - self.assertEqual(rs, xp) + assert rs == xp def test_holidays(self): # Define a TradingDay offset @@ -2080,17 +2062,16 @@ def test_holidays(self): np.datetime64('2012-02-29')] bm_offset = CBMonthEnd(holidays=holidays) dt = datetime(2012, 1, 1) - self.assertEqual(dt + bm_offset, datetime(2012, 1, 30)) - self.assertEqual(dt + 2 * bm_offset, datetime(2012, 2, 27)) + assert dt + bm_offset == datetime(2012, 1, 30) + assert dt + 2 * bm_offset == datetime(2012, 2, 27) def test_datetimeindex(self): from pandas.tseries.holiday import USFederalHolidayCalendar hcal = USFederalHolidayCalendar() freq = CBMonthEnd(calendar=hcal) - self.assertEqual(DatetimeIndex(start='20120101', end='20130101', - freq=freq).tolist()[0], - datetime(2012, 1, 31)) + assert (DatetimeIndex(start='20120101', end='20130101', + freq=freq).tolist()[0] == datetime(2012, 1, 31)) class TestCustomBusinessMonthBegin(CustomBusinessMonthBase, Base): @@ -2101,26 +2082,24 @@ def test_different_normalize_equals(self): offset = CBMonthBegin() offset2 = CBMonthBegin() offset2.normalize = True - self.assertEqual(offset, offset2) + assert offset == offset2 def test_repr(self): assert repr(self.offset) == '' assert repr(self.offset2) == '<2 * CustomBusinessMonthBegins>' def testCall(self): - self.assertEqual(self.offset2(self.d), datetime(2008, 3, 3)) + assert self.offset2(self.d) == datetime(2008, 3, 3) def testRollback1(self): - self.assertEqual( - CDay(10).rollback(datetime(2007, 12, 31)), datetime(2007, 12, 31)) + assert (CDay(10).rollback(datetime(2007, 12, 31)) == + datetime(2007, 12, 31)) def testRollback2(self): - self.assertEqual(CBMonthBegin(10).rollback(self.d), - datetime(2008, 1, 1)) + assert CBMonthBegin(10).rollback(self.d) == datetime(2008, 1, 1) def testRollforward1(self): - self.assertEqual(CBMonthBegin(10).rollforward( - self.d), datetime(2008, 1, 1)) + assert CBMonthBegin(10).rollforward(self.d) == datetime(2008, 1, 1) def test_roll_date_object(self): offset = CBMonthBegin() @@ -2128,17 +2107,17 @@ def test_roll_date_object(self): dt = date(2012, 9, 15) result = offset.rollback(dt) - self.assertEqual(result, datetime(2012, 9, 3)) + assert result == datetime(2012, 9, 3) result = offset.rollforward(dt) - self.assertEqual(result, datetime(2012, 10, 1)) + assert result == datetime(2012, 10, 1) offset = offsets.Day() result = offset.rollback(dt) - self.assertEqual(result, datetime(2012, 9, 15)) + assert result == datetime(2012, 9, 15) result = offset.rollforward(dt) - self.assertEqual(result, datetime(2012, 9, 15)) + assert result == datetime(2012, 9, 15) def test_onOffset(self): tests = [(CBMonthBegin(), datetime(2008, 1, 1), True), @@ -2175,20 +2154,21 @@ def test_apply_large_n(self): dt = datetime(2012, 10, 23) result = dt + CBMonthBegin(10) - self.assertEqual(result, datetime(2013, 8, 1)) + assert result == datetime(2013, 8, 1) result = dt + CDay(100) - CDay(100) - self.assertEqual(result, dt) + assert result == dt off = CBMonthBegin() * 6 rs = datetime(2012, 1, 1) - off xp = datetime(2011, 7, 1) - self.assertEqual(rs, xp) + assert rs == xp st = datetime(2011, 12, 18) rs = st + off + xp = datetime(2012, 6, 1) - self.assertEqual(rs, xp) + assert rs == xp def test_holidays(self): # Define a TradingDay offset @@ -2196,15 +2176,15 @@ def test_holidays(self): np.datetime64('2012-03-01')] bm_offset = CBMonthBegin(holidays=holidays) dt = datetime(2012, 1, 1) - self.assertEqual(dt + bm_offset, datetime(2012, 1, 2)) - self.assertEqual(dt + 2 * bm_offset, datetime(2012, 2, 3)) + + assert dt + bm_offset == datetime(2012, 1, 2) + assert dt + 2 * bm_offset == datetime(2012, 2, 3) def test_datetimeindex(self): hcal = USFederalHolidayCalendar() cbmb = CBMonthBegin(calendar=hcal) - self.assertEqual(DatetimeIndex(start='20120101', end='20130101', - freq=cbmb).tolist()[0], - datetime(2012, 1, 3)) + assert (DatetimeIndex(start='20120101', end='20130101', + freq=cbmb).tolist()[0] == datetime(2012, 1, 3)) def assertOnOffset(offset, date, expected): @@ -2218,10 +2198,9 @@ class TestWeek(Base): _offset = Week def test_repr(self): - self.assertEqual(repr(Week(weekday=0)), "") - self.assertEqual(repr(Week(n=-1, weekday=0)), "<-1 * Week: weekday=0>") - self.assertEqual(repr(Week(n=-2, weekday=0)), - "<-2 * Weeks: weekday=0>") + assert repr(Week(weekday=0)) == "" + assert repr(Week(n=-1, weekday=0)) == "<-1 * Week: weekday=0>" + assert repr(Week(n=-2, weekday=0)) == "<-2 * Weeks: weekday=0>" def test_corner(self): pytest.raises(ValueError, Week, weekday=7) @@ -2303,8 +2282,8 @@ def test_constructor(self): n=1, week=0, weekday=7) def test_repr(self): - self.assertEqual(repr(WeekOfMonth(weekday=1, week=2)), - "") + assert (repr(WeekOfMonth(weekday=1, week=2)) == + "") def test_offset(self): date1 = datetime(2011, 1, 4) # 1st Tuesday of Month @@ -2354,9 +2333,10 @@ def test_offset(self): # try subtracting result = datetime(2011, 2, 1) - WeekOfMonth(week=1, weekday=2) - self.assertEqual(result, datetime(2011, 1, 12)) + assert result == datetime(2011, 1, 12) + result = datetime(2011, 2, 3) - WeekOfMonth(week=0, weekday=2) - self.assertEqual(result, datetime(2011, 2, 2)) + assert result == datetime(2011, 2, 2) def test_onOffset(self): test_cases = [ @@ -2370,7 +2350,7 @@ def test_onOffset(self): for week, weekday, dt, expected in test_cases: offset = WeekOfMonth(week=week, weekday=weekday) - self.assertEqual(offset.onOffset(dt), expected) + assert offset.onOffset(dt) == expected class TestLastWeekOfMonth(Base): @@ -2392,13 +2372,13 @@ def test_offset(self): offset_sat = LastWeekOfMonth(n=1, weekday=5) one_day_before = (last_sat + timedelta(days=-1)) - self.assertEqual(one_day_before + offset_sat, last_sat) + assert one_day_before + offset_sat == last_sat one_day_after = (last_sat + timedelta(days=+1)) - self.assertEqual(one_day_after + offset_sat, next_sat) + assert one_day_after + offset_sat == next_sat # Test On that day - self.assertEqual(last_sat + offset_sat, next_sat) + assert last_sat + offset_sat == next_sat # Thursday @@ -2407,23 +2387,22 @@ def test_offset(self): next_thurs = datetime(2013, 2, 28) one_day_before = last_thurs + timedelta(days=-1) - self.assertEqual(one_day_before + offset_thur, last_thurs) + assert one_day_before + offset_thur == last_thurs one_day_after = last_thurs + timedelta(days=+1) - self.assertEqual(one_day_after + offset_thur, next_thurs) + assert one_day_after + offset_thur == next_thurs # Test on that day - self.assertEqual(last_thurs + offset_thur, next_thurs) + assert last_thurs + offset_thur == next_thurs three_before = last_thurs + timedelta(days=-3) - self.assertEqual(three_before + offset_thur, last_thurs) + assert three_before + offset_thur == last_thurs two_after = last_thurs + timedelta(days=+2) - self.assertEqual(two_after + offset_thur, next_thurs) + assert two_after + offset_thur == next_thurs offset_sunday = LastWeekOfMonth(n=1, weekday=WeekDay.SUN) - self.assertEqual(datetime(2013, 7, 31) + - offset_sunday, datetime(2013, 8, 25)) + assert datetime(2013, 7, 31) + offset_sunday == datetime(2013, 8, 25) def test_onOffset(self): test_cases = [ @@ -2445,7 +2424,7 @@ def test_onOffset(self): for weekday, dt, expected in test_cases: offset = LastWeekOfMonth(weekday=weekday) - self.assertEqual(offset.onOffset(dt), expected, msg=date) + assert offset.onOffset(dt) == expected class TestBMonthBegin(Base): @@ -2556,7 +2535,7 @@ def test_normalize(self): result = dt + BMonthEnd(normalize=True) expected = dt.replace(hour=0) + BMonthEnd() - self.assertEqual(result, expected) + assert result == expected def test_onOffset(self): @@ -2655,23 +2634,22 @@ def test_offset(self): for base, expected in compat.iteritems(cases): assertEq(offset, base, expected) - # def test_day_of_month(self): - # dt = datetime(2007, 1, 1) - - # offset = MonthEnd(day=20) + def test_day_of_month(self): + dt = datetime(2007, 1, 1) + offset = MonthEnd(day=20) - # result = dt + offset - # self.assertEqual(result, datetime(2007, 1, 20)) + result = dt + offset + assert result == Timestamp(2007, 1, 31) - # result = result + offset - # self.assertEqual(result, datetime(2007, 2, 20)) + result = result + offset + assert result == Timestamp(2007, 2, 28) def test_normalize(self): dt = datetime(2007, 1, 1, 3) result = dt + MonthEnd(normalize=True) expected = dt.replace(hour=0) + MonthEnd() - self.assertEqual(result, expected) + assert result == expected def test_onOffset(self): @@ -3033,12 +3011,12 @@ class TestBQuarterBegin(Base): _offset = BQuarterBegin def test_repr(self): - self.assertEqual(repr(BQuarterBegin()), - "") - self.assertEqual(repr(BQuarterBegin(startingMonth=3)), - "") - self.assertEqual(repr(BQuarterBegin(startingMonth=1)), - "") + assert (repr(BQuarterBegin()) == + "") + assert (repr(BQuarterBegin(startingMonth=3)) == + "") + assert (repr(BQuarterBegin(startingMonth=1)) == + "") def test_isAnchored(self): assert BQuarterBegin(startingMonth=1).isAnchored() @@ -3120,19 +3098,19 @@ def test_offset(self): # corner offset = BQuarterBegin(n=-1, startingMonth=1) - self.assertEqual(datetime(2007, 4, 3) + offset, datetime(2007, 4, 2)) + assert datetime(2007, 4, 3) + offset == datetime(2007, 4, 2) class TestBQuarterEnd(Base): _offset = BQuarterEnd def test_repr(self): - self.assertEqual(repr(BQuarterEnd()), - "") - self.assertEqual(repr(BQuarterEnd(startingMonth=3)), - "") - self.assertEqual(repr(BQuarterEnd(startingMonth=1)), - "") + assert (repr(BQuarterEnd()) == + "") + assert (repr(BQuarterEnd(startingMonth=3)) == + "") + assert (repr(BQuarterEnd(startingMonth=1)) == + "") def test_isAnchored(self): assert BQuarterEnd(startingMonth=1).isAnchored() @@ -3197,7 +3175,7 @@ def test_offset(self): # corner offset = BQuarterEnd(n=-1, startingMonth=1) - self.assertEqual(datetime(2010, 1, 31) + offset, datetime(2010, 1, 29)) + assert datetime(2010, 1, 31) + offset == datetime(2010, 1, 29) def test_onOffset(self): @@ -3334,58 +3312,52 @@ def test_apply(self): current = data[0] for datum in data[1:]: current = current + offset - self.assertEqual(current, datum) + assert current == datum class TestFY5253NearestEndMonth(Base): def test_get_target_month_end(self): - self.assertEqual(makeFY5253NearestEndMonth(startingMonth=8, - weekday=WeekDay.SAT) - .get_target_month_end( - datetime(2013, 1, 1)), datetime(2013, 8, 31)) - self.assertEqual(makeFY5253NearestEndMonth(startingMonth=12, - weekday=WeekDay.SAT) - .get_target_month_end(datetime(2013, 1, 1)), - datetime(2013, 12, 31)) - self.assertEqual(makeFY5253NearestEndMonth(startingMonth=2, - weekday=WeekDay.SAT) - .get_target_month_end(datetime(2013, 1, 1)), - datetime(2013, 2, 28)) + assert (makeFY5253NearestEndMonth( + startingMonth=8, weekday=WeekDay.SAT).get_target_month_end( + datetime(2013, 1, 1)) == datetime(2013, 8, 31)) + assert (makeFY5253NearestEndMonth( + startingMonth=12, weekday=WeekDay.SAT).get_target_month_end( + datetime(2013, 1, 1)) == datetime(2013, 12, 31)) + assert (makeFY5253NearestEndMonth( + startingMonth=2, weekday=WeekDay.SAT).get_target_month_end( + datetime(2013, 1, 1)) == datetime(2013, 2, 28)) def test_get_year_end(self): - self.assertEqual(makeFY5253NearestEndMonth(startingMonth=8, - weekday=WeekDay.SAT) - .get_year_end(datetime(2013, 1, 1)), - datetime(2013, 8, 31)) - self.assertEqual(makeFY5253NearestEndMonth(startingMonth=8, - weekday=WeekDay.SUN) - .get_year_end(datetime(2013, 1, 1)), - datetime(2013, 9, 1)) - self.assertEqual(makeFY5253NearestEndMonth(startingMonth=8, - weekday=WeekDay.FRI) - .get_year_end(datetime(2013, 1, 1)), - datetime(2013, 8, 30)) + assert (makeFY5253NearestEndMonth( + startingMonth=8, weekday=WeekDay.SAT).get_year_end( + datetime(2013, 1, 1)) == datetime(2013, 8, 31)) + assert (makeFY5253NearestEndMonth( + startingMonth=8, weekday=WeekDay.SUN).get_year_end( + datetime(2013, 1, 1)) == datetime(2013, 9, 1)) + assert (makeFY5253NearestEndMonth( + startingMonth=8, weekday=WeekDay.FRI).get_year_end( + datetime(2013, 1, 1)) == datetime(2013, 8, 30)) offset_n = FY5253(weekday=WeekDay.TUE, startingMonth=12, variation="nearest") - self.assertEqual(offset_n.get_year_end( - datetime(2012, 1, 1)), datetime(2013, 1, 1)) - self.assertEqual(offset_n.get_year_end( - datetime(2012, 1, 10)), datetime(2013, 1, 1)) - - self.assertEqual(offset_n.get_year_end( - datetime(2013, 1, 1)), datetime(2013, 12, 31)) - self.assertEqual(offset_n.get_year_end( - datetime(2013, 1, 2)), datetime(2013, 12, 31)) - self.assertEqual(offset_n.get_year_end( - datetime(2013, 1, 3)), datetime(2013, 12, 31)) - self.assertEqual(offset_n.get_year_end( - datetime(2013, 1, 10)), datetime(2013, 12, 31)) + assert (offset_n.get_year_end(datetime(2012, 1, 1)) == + datetime(2013, 1, 1)) + assert (offset_n.get_year_end(datetime(2012, 1, 10)) == + datetime(2013, 1, 1)) + + assert (offset_n.get_year_end(datetime(2013, 1, 1)) == + datetime(2013, 12, 31)) + assert (offset_n.get_year_end(datetime(2013, 1, 2)) == + datetime(2013, 12, 31)) + assert (offset_n.get_year_end(datetime(2013, 1, 3)) == + datetime(2013, 12, 31)) + assert (offset_n.get_year_end(datetime(2013, 1, 10)) == + datetime(2013, 12, 31)) JNJ = FY5253(n=1, startingMonth=12, weekday=6, variation="nearest") - self.assertEqual(JNJ.get_year_end( - datetime(2006, 1, 1)), datetime(2006, 12, 31)) + assert (JNJ.get_year_end(datetime(2006, 1, 1)) == + datetime(2006, 12, 31)) def test_onOffset(self): offset_lom_aug_sat = makeFY5253NearestEndMonth(1, startingMonth=8, @@ -3500,7 +3472,7 @@ def test_apply(self): current = data[0] for datum in data[1:]: current = current + offset - self.assertEqual(current, datum) + assert current == datum class TestFY5253LastOfMonthQuarter(Base): @@ -3517,26 +3489,18 @@ def test_isAnchored(self): qtr_with_extra_week=4).isAnchored() def test_equality(self): - self.assertEqual(makeFY5253LastOfMonthQuarter(startingMonth=1, - weekday=WeekDay.SAT, - qtr_with_extra_week=4), - makeFY5253LastOfMonthQuarter(startingMonth=1, - weekday=WeekDay.SAT, - qtr_with_extra_week=4)) - self.assertNotEqual( - makeFY5253LastOfMonthQuarter( - startingMonth=1, weekday=WeekDay.SAT, - qtr_with_extra_week=4), - makeFY5253LastOfMonthQuarter( - startingMonth=1, weekday=WeekDay.SUN, - qtr_with_extra_week=4)) - self.assertNotEqual( - makeFY5253LastOfMonthQuarter( - startingMonth=1, weekday=WeekDay.SAT, - qtr_with_extra_week=4), - makeFY5253LastOfMonthQuarter( - startingMonth=2, weekday=WeekDay.SAT, - qtr_with_extra_week=4)) + assert (makeFY5253LastOfMonthQuarter( + startingMonth=1, weekday=WeekDay.SAT, + qtr_with_extra_week=4) == makeFY5253LastOfMonthQuarter( + startingMonth=1, weekday=WeekDay.SAT, qtr_with_extra_week=4)) + assert (makeFY5253LastOfMonthQuarter( + startingMonth=1, weekday=WeekDay.SAT, + qtr_with_extra_week=4) != makeFY5253LastOfMonthQuarter( + startingMonth=1, weekday=WeekDay.SUN, qtr_with_extra_week=4)) + assert (makeFY5253LastOfMonthQuarter( + startingMonth=1, weekday=WeekDay.SAT, + qtr_with_extra_week=4) != makeFY5253LastOfMonthQuarter( + startingMonth=2, weekday=WeekDay.SAT, qtr_with_extra_week=4)) def test_offset(self): offset = makeFY5253LastOfMonthQuarter(1, startingMonth=9, @@ -3705,12 +3669,9 @@ def test_get_weeks(self): weekday=WeekDay.SAT, qtr_with_extra_week=4) - self.assertEqual(sat_dec_1.get_weeks( - datetime(2011, 4, 2)), [14, 13, 13, 13]) - self.assertEqual(sat_dec_4.get_weeks( - datetime(2011, 4, 2)), [13, 13, 13, 14]) - self.assertEqual(sat_dec_1.get_weeks( - datetime(2010, 12, 25)), [13, 13, 13, 13]) + assert sat_dec_1.get_weeks(datetime(2011, 4, 2)) == [14, 13, 13, 13] + assert sat_dec_4.get_weeks(datetime(2011, 4, 2)) == [13, 13, 13, 14] + assert sat_dec_1.get_weeks(datetime(2010, 12, 25)) == [13, 13, 13, 13] class TestFY5253NearestEndMonthQuarter(Base): @@ -3802,12 +3763,12 @@ def test_offset(self): class TestQuarterBegin(Base): def test_repr(self): - self.assertEqual(repr(QuarterBegin()), - "") - self.assertEqual(repr(QuarterBegin(startingMonth=3)), - "") - self.assertEqual(repr(QuarterBegin(startingMonth=1)), - "") + assert (repr(QuarterBegin()) == + "") + assert (repr(QuarterBegin(startingMonth=3)) == + "") + assert (repr(QuarterBegin(startingMonth=1)) == + "") def test_isAnchored(self): assert QuarterBegin(startingMonth=1).isAnchored() @@ -3874,18 +3835,19 @@ def test_offset(self): # corner offset = QuarterBegin(n=-1, startingMonth=1) - self.assertEqual(datetime(2010, 2, 1) + offset, datetime(2010, 1, 1)) + assert datetime(2010, 2, 1) + offset == datetime(2010, 1, 1) class TestQuarterEnd(Base): _offset = QuarterEnd def test_repr(self): - self.assertEqual(repr(QuarterEnd()), "") - self.assertEqual(repr(QuarterEnd(startingMonth=3)), - "") - self.assertEqual(repr(QuarterEnd(startingMonth=1)), - "") + assert (repr(QuarterEnd()) == + "") + assert (repr(QuarterEnd(startingMonth=3)) == + "") + assert (repr(QuarterEnd(startingMonth=1)) == + "") def test_isAnchored(self): assert QuarterEnd(startingMonth=1).isAnchored() @@ -3951,7 +3913,7 @@ def test_offset(self): # corner offset = QuarterEnd(n=-1, startingMonth=1) - self.assertEqual(datetime(2010, 2, 1) + offset, datetime(2010, 1, 31)) + assert datetime(2010, 2, 1) + offset == datetime(2010, 1, 31) def test_onOffset(self): @@ -4173,14 +4135,14 @@ def test_offset(self): for offset, cases in tests: for base, expected in compat.iteritems(cases): - self.assertEqual(base + offset, expected) + assert base + offset == expected def test_roll(self): offset = BYearEnd(month=6) date = datetime(2009, 11, 30) - self.assertEqual(offset.rollforward(date), datetime(2010, 6, 30)) - self.assertEqual(offset.rollback(date), datetime(2009, 6, 30)) + assert offset.rollforward(date) == datetime(2010, 6, 30) + assert offset.rollback(date) == datetime(2009, 6, 30) def test_onOffset(self): @@ -4389,7 +4351,7 @@ def test_ticks(self): offset = kls(3) result = offset + Timedelta(hours=2) assert isinstance(result, Timedelta) - self.assertEqual(result, expected) + assert result == expected def test_Hour(self): assertEq(Hour(), datetime(2010, 1, 1), datetime(2010, 1, 1, 1)) @@ -4397,8 +4359,8 @@ def test_Hour(self): assertEq(2 * Hour(), datetime(2010, 1, 1), datetime(2010, 1, 1, 2)) assertEq(-1 * Hour(), datetime(2010, 1, 1, 1), datetime(2010, 1, 1)) - self.assertEqual(Hour(3) + Hour(2), Hour(5)) - self.assertEqual(Hour(3) - Hour(2), Hour()) + assert Hour(3) + Hour(2) == Hour(5) + assert Hour(3) - Hour(2) == Hour() self.assertNotEqual(Hour(4), Hour(1)) @@ -4410,8 +4372,8 @@ def test_Minute(self): assertEq(-1 * Minute(), datetime(2010, 1, 1, 0, 1), datetime(2010, 1, 1)) - self.assertEqual(Minute(3) + Minute(2), Minute(5)) - self.assertEqual(Minute(3) - Minute(2), Minute()) + assert Minute(3) + Minute(2) == Minute(5) + assert Minute(3) - Minute(2) == Minute() self.assertNotEqual(Minute(5), Minute()) def test_Second(self): @@ -4423,8 +4385,8 @@ def test_Second(self): assertEq(-1 * Second(), datetime(2010, 1, 1, 0, 0, 1), datetime(2010, 1, 1)) - self.assertEqual(Second(3) + Second(2), Second(5)) - self.assertEqual(Second(3) - Second(2), Second()) + assert Second(3) + Second(2) == Second(5) + assert Second(3) - Second(2) == Second() def test_Millisecond(self): assertEq(Milli(), datetime(2010, 1, 1), @@ -4438,8 +4400,8 @@ def test_Millisecond(self): assertEq(-1 * Milli(), datetime(2010, 1, 1, 0, 0, 0, 1000), datetime(2010, 1, 1)) - self.assertEqual(Milli(3) + Milli(2), Milli(5)) - self.assertEqual(Milli(3) - Milli(2), Milli()) + assert Milli(3) + Milli(2) == Milli(5) + assert Milli(3) - Milli(2) == Milli() def test_MillisecondTimestampArithmetic(self): assertEq(Milli(), Timestamp('2010-01-01'), @@ -4457,18 +4419,18 @@ def test_Microsecond(self): assertEq(-1 * Micro(), datetime(2010, 1, 1, 0, 0, 0, 1), datetime(2010, 1, 1)) - self.assertEqual(Micro(3) + Micro(2), Micro(5)) - self.assertEqual(Micro(3) - Micro(2), Micro()) + assert Micro(3) + Micro(2) == Micro(5) + assert Micro(3) - Micro(2) == Micro() def test_NanosecondGeneric(self): timestamp = Timestamp(datetime(2010, 1, 1)) - self.assertEqual(timestamp.nanosecond, 0) + assert timestamp.nanosecond == 0 result = timestamp + Nano(10) - self.assertEqual(result.nanosecond, 10) + assert result.nanosecond == 10 reverse_result = Nano(10) + timestamp - self.assertEqual(reverse_result.nanosecond, 10) + assert reverse_result.nanosecond == 10 def test_Nanosecond(self): timestamp = Timestamp(datetime(2010, 1, 1)) @@ -4477,29 +4439,29 @@ def test_Nanosecond(self): assertEq(2 * Nano(), timestamp, timestamp + np.timedelta64(2, 'ns')) assertEq(-1 * Nano(), timestamp + np.timedelta64(1, 'ns'), timestamp) - self.assertEqual(Nano(3) + Nano(2), Nano(5)) - self.assertEqual(Nano(3) - Nano(2), Nano()) + assert Nano(3) + Nano(2) == Nano(5) + assert Nano(3) - Nano(2) == Nano() # GH9284 - self.assertEqual(Nano(1) + Nano(10), Nano(11)) - self.assertEqual(Nano(5) + Micro(1), Nano(1005)) - self.assertEqual(Micro(5) + Nano(1), Nano(5001)) + assert Nano(1) + Nano(10) == Nano(11) + assert Nano(5) + Micro(1) == Nano(1005) + assert Micro(5) + Nano(1) == Nano(5001) def test_tick_zero(self): for t1 in self.ticks: for t2 in self.ticks: - self.assertEqual(t1(0), t2(0)) - self.assertEqual(t1(0) + t2(0), t1(0)) + assert t1(0) == t2(0) + assert t1(0) + t2(0) == t1(0) if t1 is not Nano: - self.assertEqual(t1(2) + t2(0), t1(2)) + assert t1(2) + t2(0) == t1(2) if t1 is Nano: - self.assertEqual(t1(2) + Nano(0), t1(2)) + assert t1(2) + Nano(0) == t1(2) def test_tick_equalities(self): for t in self.ticks: - self.assertEqual(t(3), t(3)) - self.assertEqual(t(), t(1)) + assert t(3) == t(3) + assert t() == t(1) # not equals self.assertNotEqual(t(3), t(2)) @@ -4507,10 +4469,10 @@ def test_tick_equalities(self): def test_tick_operators(self): for t in self.ticks: - self.assertEqual(t(3) + t(2), t(5)) - self.assertEqual(t(3) - t(2), t(1)) - self.assertEqual(t(800) + t(300), t(1100)) - self.assertEqual(t(1000) - t(5), t(995)) + assert t(3) + t(2) == t(5) + assert t(3) - t(2) == t(1) + assert t(800) + t(300) == t(1100) + assert t(1000) - t(5) == t(995) def test_tick_offset(self): for t in self.ticks: @@ -4533,25 +4495,22 @@ def test_compare_ticks(self): class TestOffsetNames(tm.TestCase): def test_get_offset_name(self): - self.assertEqual(BDay().freqstr, 'B') - self.assertEqual(BDay(2).freqstr, '2B') - self.assertEqual(BMonthEnd().freqstr, 'BM') - self.assertEqual(Week(weekday=0).freqstr, 'W-MON') - self.assertEqual(Week(weekday=1).freqstr, 'W-TUE') - self.assertEqual(Week(weekday=2).freqstr, 'W-WED') - self.assertEqual(Week(weekday=3).freqstr, 'W-THU') - self.assertEqual(Week(weekday=4).freqstr, 'W-FRI') - - self.assertEqual(LastWeekOfMonth( - weekday=WeekDay.SUN).freqstr, "LWOM-SUN") - self.assertEqual( - makeFY5253LastOfMonthQuarter(weekday=1, startingMonth=3, - qtr_with_extra_week=4).freqstr, - "REQ-L-MAR-TUE-4") - self.assertEqual( - makeFY5253NearestEndMonthQuarter(weekday=1, startingMonth=3, - qtr_with_extra_week=3).freqstr, - "REQ-N-MAR-TUE-3") + assert BDay().freqstr == 'B' + assert BDay(2).freqstr == '2B' + assert BMonthEnd().freqstr == 'BM' + assert Week(weekday=0).freqstr == 'W-MON' + assert Week(weekday=1).freqstr == 'W-TUE' + assert Week(weekday=2).freqstr == 'W-WED' + assert Week(weekday=3).freqstr == 'W-THU' + assert Week(weekday=4).freqstr == 'W-FRI' + + assert LastWeekOfMonth(weekday=WeekDay.SUN).freqstr == "LWOM-SUN" + assert (makeFY5253LastOfMonthQuarter( + weekday=1, startingMonth=3, + qtr_with_extra_week=4).freqstr == "REQ-L-MAR-TUE-4") + assert (makeFY5253NearestEndMonthQuarter( + weekday=1, startingMonth=3, + qtr_with_extra_week=3).freqstr == "REQ-N-MAR-TUE-3") def test_get_offset(): @@ -4594,9 +4553,9 @@ class TestParseTimeString(tm.TestCase): def test_parse_time_string(self): (date, parsed, reso) = parse_time_string('4Q1984') (date_lower, parsed_lower, reso_lower) = parse_time_string('4q1984') - self.assertEqual(date, date_lower) - self.assertEqual(parsed, parsed_lower) - self.assertEqual(reso, reso_lower) + assert date == date_lower + assert parsed == parsed_lower + assert reso == reso_lower def test_parse_time_quarter_w_dash(self): # https://github.com/pandas-dev/pandas/issue/9688 @@ -4606,9 +4565,9 @@ def test_parse_time_quarter_w_dash(self): (date_dash, parsed_dash, reso_dash) = parse_time_string(dashed) (date, parsed, reso) = parse_time_string(normal) - self.assertEqual(date_dash, date) - self.assertEqual(parsed_dash, parsed) - self.assertEqual(reso_dash, reso) + assert date_dash == date + assert parsed_dash == parsed + assert reso_dash == reso pytest.raises(DateParseError, parse_time_string, "-2Q1992") pytest.raises(DateParseError, parse_time_string, "2-Q1992") @@ -4661,22 +4620,22 @@ def test_alias_equality(self): for k, v in compat.iteritems(_offset_map): if v is None: continue - self.assertEqual(k, v.copy()) + assert k == v.copy() def test_rule_code(self): lst = ['M', 'MS', 'BM', 'BMS', 'D', 'B', 'H', 'T', 'S', 'L', 'U'] for k in lst: - self.assertEqual(k, get_offset(k).rule_code) + assert k == get_offset(k).rule_code # should be cached - this is kind of an internals test... assert k in _offset_map - self.assertEqual(k, (get_offset(k) * 3).rule_code) + assert k == (get_offset(k) * 3).rule_code suffix_lst = ['MON', 'TUE', 'WED', 'THU', 'FRI', 'SAT', 'SUN'] base = 'W' for v in suffix_lst: alias = '-'.join([base, v]) - self.assertEqual(alias, get_offset(alias).rule_code) - self.assertEqual(alias, (get_offset(alias) * 5).rule_code) + assert alias == get_offset(alias).rule_code + assert alias == (get_offset(alias) * 5).rule_code suffix_lst = ['JAN', 'FEB', 'MAR', 'APR', 'MAY', 'JUN', 'JUL', 'AUG', 'SEP', 'OCT', 'NOV', 'DEC'] @@ -4684,15 +4643,15 @@ def test_rule_code(self): for base in base_lst: for v in suffix_lst: alias = '-'.join([base, v]) - self.assertEqual(alias, get_offset(alias).rule_code) - self.assertEqual(alias, (get_offset(alias) * 5).rule_code) + assert alias == get_offset(alias).rule_code + assert alias == (get_offset(alias) * 5).rule_code lst = ['M', 'D', 'B', 'H', 'T', 'S', 'L', 'U'] for k in lst: code, stride = get_freq_code('3' + k) assert isinstance(code, int) - self.assertEqual(stride, 3) - self.assertEqual(k, _get_freq_str(code)) + assert stride == 3 + assert k == _get_freq_str(code) def test_apply_ticks(): @@ -4804,7 +4763,7 @@ def test_str_for_named_is_name(self): _offset_map.clear() for name in names: offset = get_offset(name) - self.assertEqual(offset.freqstr, name) + assert offset.freqstr == name def get_utc_offset_hours(ts): @@ -4949,4 +4908,4 @@ def test_all_offset_classes(self): for offset, test_values in iteritems(tests): first = Timestamp(test_values[0], tz='US/Eastern') + offset() second = Timestamp(test_values[1], tz='US/Eastern') - self.assertEqual(first, second, msg=str(offset)) + assert first == second diff --git a/pandas/tests/tseries/test_timezones.py b/pandas/tests/tseries/test_timezones.py index 2c3aa03e85904..8b6774885c8b7 100644 --- a/pandas/tests/tseries/test_timezones.py +++ b/pandas/tests/tseries/test_timezones.py @@ -89,7 +89,7 @@ def test_utc_to_local_no_modify_explicit(self): # Values are unmodified tm.assert_numpy_array_equal(rng.asi8, rng_eastern.asi8) - self.assertEqual(rng_eastern.tz, self.tz('US/Eastern')) + assert rng_eastern.tz == self.tz('US/Eastern') def test_localize_utc_conversion(self): # Localizing to time zone should: @@ -129,16 +129,16 @@ def test_timestamp_tz_localize(self): result = stamp.tz_localize(self.tzstr('US/Eastern')) expected = Timestamp('3/11/2012 04:00', tz=self.tzstr('US/Eastern')) - self.assertEqual(result.hour, expected.hour) - self.assertEqual(result, expected) + assert result.hour == expected.hour + assert result == expected def test_timestamp_tz_localize_explicit(self): stamp = Timestamp('3/11/2012 04:00') result = stamp.tz_localize(self.tz('US/Eastern')) expected = Timestamp('3/11/2012 04:00', tz=self.tz('US/Eastern')) - self.assertEqual(result.hour, expected.hour) - self.assertEqual(result, expected) + assert result.hour == expected.hour + assert result == expected def test_timestamp_constructed_by_date_and_tz(self): # Fix Issue 2993, Timestamp cannot be constructed by datetime.date @@ -147,8 +147,8 @@ def test_timestamp_constructed_by_date_and_tz(self): result = Timestamp(date(2012, 3, 11), tz=self.tzstr('US/Eastern')) expected = Timestamp('3/11/2012', tz=self.tzstr('US/Eastern')) - self.assertEqual(result.hour, expected.hour) - self.assertEqual(result, expected) + assert result.hour == expected.hour + assert result == expected def test_timestamp_constructed_by_date_and_tz_explicit(self): # Fix Issue 2993, Timestamp cannot be constructed by datetime.date @@ -157,8 +157,8 @@ def test_timestamp_constructed_by_date_and_tz_explicit(self): result = Timestamp(date(2012, 3, 11), tz=self.tz('US/Eastern')) expected = Timestamp('3/11/2012', tz=self.tz('US/Eastern')) - self.assertEqual(result.hour, expected.hour) - self.assertEqual(result, expected) + assert result.hour == expected.hour + assert result == expected def test_timestamp_constructor_near_dst_boundary(self): # GH 11481 & 15777 @@ -212,7 +212,7 @@ def test_timestamp_to_datetime_tzoffset(self): tzinfo = tzoffset(None, 7200) expected = Timestamp('3/11/2012 04:00', tz=tzinfo) result = Timestamp(expected.to_pydatetime()) - self.assertEqual(expected, result) + assert expected == result def test_timedelta_push_over_dst_boundary(self): # #1389 @@ -225,7 +225,7 @@ def test_timedelta_push_over_dst_boundary(self): # spring forward, + "7" hours expected = Timestamp('3/11/2012 05:00', tz=self.tzstr('US/Eastern')) - self.assertEqual(result, expected) + assert result == expected def test_timedelta_push_over_dst_boundary_explicit(self): # #1389 @@ -238,7 +238,7 @@ def test_timedelta_push_over_dst_boundary_explicit(self): # spring forward, + "7" hours expected = Timestamp('3/11/2012 05:00', tz=self.tz('US/Eastern')) - self.assertEqual(result, expected) + assert result == expected def test_tz_localize_dti(self): dti = DatetimeIndex(start='1/1/2005', end='1/1/2005 0:00:30.256', @@ -278,31 +278,31 @@ def test_astimezone(self): utc = Timestamp('3/11/2012 22:00', tz='UTC') expected = utc.tz_convert(self.tzstr('US/Eastern')) result = utc.astimezone(self.tzstr('US/Eastern')) - self.assertEqual(expected, result) + assert expected == result assert isinstance(result, Timestamp) def test_create_with_tz(self): stamp = Timestamp('3/11/2012 05:00', tz=self.tzstr('US/Eastern')) - self.assertEqual(stamp.hour, 5) + assert stamp.hour == 5 rng = date_range('3/11/2012 04:00', periods=10, freq='H', tz=self.tzstr('US/Eastern')) - self.assertEqual(stamp, rng[1]) + assert stamp == rng[1] utc_stamp = Timestamp('3/11/2012 05:00', tz='utc') assert utc_stamp.tzinfo is pytz.utc - self.assertEqual(utc_stamp.hour, 5) + assert utc_stamp.hour == 5 stamp = Timestamp('3/11/2012 05:00').tz_localize('utc') - self.assertEqual(utc_stamp.hour, 5) + assert utc_stamp.hour == 5 def test_create_with_fixed_tz(self): off = FixedOffset(420, '+07:00') start = datetime(2012, 3, 11, 5, 0, 0, tzinfo=off) end = datetime(2012, 6, 11, 5, 0, 0, tzinfo=off) rng = date_range(start=start, end=end) - self.assertEqual(off, rng.tz) + assert off == rng.tz rng2 = date_range(start, periods=len(rng), tz=off) tm.assert_index_equal(rng, rng2) @@ -316,10 +316,10 @@ def test_create_with_fixedoffset_noname(self): start = datetime(2012, 3, 11, 5, 0, 0, tzinfo=off) end = datetime(2012, 6, 11, 5, 0, 0, tzinfo=off) rng = date_range(start=start, end=end) - self.assertEqual(off, rng.tz) + assert off == rng.tz idx = Index([start, end]) - self.assertEqual(off, idx.tz) + assert off == idx.tz def test_date_range_localize(self): rng = date_range('3/11/2012 03:00', periods=15, freq='H', @@ -335,9 +335,9 @@ def test_date_range_localize(self): val = rng[0] exp = Timestamp('3/11/2012 03:00', tz='US/Eastern') - self.assertEqual(val.hour, 3) - self.assertEqual(exp.hour, 3) - self.assertEqual(val, exp) # same UTC value + assert val.hour == 3 + assert exp.hour == 3 + assert val == exp # same UTC value tm.assert_index_equal(rng[:2], rng2) # Right before the DST transition @@ -347,15 +347,15 @@ def test_date_range_localize(self): tz='US/Eastern') tm.assert_index_equal(rng, rng2) exp = Timestamp('3/11/2012 00:00', tz='US/Eastern') - self.assertEqual(exp.hour, 0) - self.assertEqual(rng[0], exp) + assert exp.hour == 0 + assert rng[0] == exp exp = Timestamp('3/11/2012 01:00', tz='US/Eastern') - self.assertEqual(exp.hour, 1) - self.assertEqual(rng[1], exp) + assert exp.hour == 1 + assert rng[1] == exp rng = date_range('3/11/2012 00:00', periods=10, freq='H', tz='US/Eastern') - self.assertEqual(rng[2].hour, 3) + assert rng[2].hour == 3 def test_utc_box_timestamp_and_localize(self): rng = date_range('3/11/2012', '3/12/2012', freq='H', tz='utc') @@ -365,8 +365,8 @@ def test_utc_box_timestamp_and_localize(self): expected = rng[-1].astimezone(tz) stamp = rng_eastern[-1] - self.assertEqual(stamp, expected) - self.assertEqual(stamp.tzinfo, expected.tzinfo) + assert stamp == expected + assert stamp.tzinfo == expected.tzinfo # right tzinfo rng = date_range('3/13/2012', '3/14/2012', freq='H', tz='utc') @@ -383,7 +383,7 @@ def test_timestamp_tz_convert(self): conv = idx[0].tz_convert(self.tzstr('US/Pacific')) expected = idx.tz_convert(self.tzstr('US/Pacific'))[0] - self.assertEqual(conv, expected) + assert conv == expected def test_pass_dates_localize_to_utc(self): strdates = ['1/1/2012', '3/1/2012', '4/1/2012'] @@ -393,7 +393,7 @@ def test_pass_dates_localize_to_utc(self): fromdates = DatetimeIndex(strdates, tz=self.tzstr('US/Eastern')) - self.assertEqual(conv.tz, fromdates.tz) + assert conv.tz == fromdates.tz tm.assert_numpy_array_equal(conv.values, fromdates.values) def test_field_access_localize(self): @@ -560,12 +560,12 @@ def f(): times = date_range("2013-10-26 23:00", "2013-10-27 01:00", freq="H", tz=tz, ambiguous='infer') - self.assertEqual(times[0], Timestamp('2013-10-26 23:00', tz=tz, - freq="H")) + assert times[0] == Timestamp('2013-10-26 23:00', tz=tz, freq="H") + if dateutil.__version__ != LooseVersion('2.6.0'): - # GH 14621 - self.assertEqual(times[-1], Timestamp('2013-10-27 01:00:00+0000', - tz=tz, freq="H")) + # see gh-14621 + assert times[-1] == Timestamp('2013-10-27 01:00:00+0000', + tz=tz, freq="H") def test_ambiguous_nat(self): tz = self.tz('US/Eastern') @@ -595,10 +595,10 @@ def f(): pytest.raises(pytz.AmbiguousTimeError, f) result = t.tz_localize('US/Central', ambiguous=True) - self.assertEqual(result, expected0) + assert result == expected0 result = t.tz_localize('US/Central', ambiguous=False) - self.assertEqual(result, expected1) + assert result == expected1 s = Series([t]) expected0 = Series([expected0]) @@ -674,8 +674,8 @@ def test_take_dont_lose_meta(self): rng = date_range('1/1/2000', periods=20, tz=self.tzstr('US/Eastern')) result = rng.take(lrange(5)) - self.assertEqual(result.tz, rng.tz) - self.assertEqual(result.freq, rng.freq) + assert result.tz == rng.tz + assert result.freq == rng.freq def test_index_with_timezone_repr(self): rng = date_range('4/13/2010', '5/6/2010') @@ -694,14 +694,14 @@ def test_index_astype_asobject_tzinfos(self): objs = rng.asobject for i, x in enumerate(objs): exval = rng[i] - self.assertEqual(x, exval) - self.assertEqual(x.tzinfo, exval.tzinfo) + assert x == exval + assert x.tzinfo == exval.tzinfo objs = rng.astype(object) for i, x in enumerate(objs): exval = rng[i] - self.assertEqual(x, exval) - self.assertEqual(x.tzinfo, exval.tzinfo) + assert x == exval + assert x.tzinfo == exval.tzinfo def test_localized_at_time_between_time(self): from datetime import time @@ -736,7 +736,7 @@ def test_fixed_offset(self): datetime(2000, 1, 2, tzinfo=fixed_off), datetime(2000, 1, 3, tzinfo=fixed_off)] result = to_datetime(dates) - self.assertEqual(result.tz, fixed_off) + assert result.tz == fixed_off def test_fixedtz_topydatetime(self): dates = np.array([datetime(2000, 1, 1, tzinfo=fixed_off), @@ -796,7 +796,7 @@ def test_frame_no_datetime64_dtype(self): dr_tz = dr.tz_localize(self.tzstr('US/Eastern')) e = DataFrame({'A': 'foo', 'B': dr_tz}, index=dr) tz_expected = DatetimeTZDtype('ns', dr_tz.tzinfo) - self.assertEqual(e['B'].dtype, tz_expected) + assert e['B'].dtype == tz_expected # GH 2810 (with timezones) datetimes_naive = [ts.to_pydatetime() for ts in dr] @@ -830,7 +830,7 @@ def test_shift_localized(self): dr_tz = dr.tz_localize(self.tzstr('US/Eastern')) result = dr_tz.shift(1, '10T') - self.assertEqual(result.tz, dr_tz.tz) + assert result.tz == dr_tz.tz def test_tz_aware_asfreq(self): dr = date_range('2011-12-01', '2012-07-20', freq='D', @@ -870,8 +870,8 @@ def test_convert_datetime_list(self): tz=self.tzstr('US/Eastern'), name='foo') dr2 = DatetimeIndex(list(dr), name='foo') tm.assert_index_equal(dr, dr2) - self.assertEqual(dr.tz, dr2.tz) - self.assertEqual(dr2.name, 'foo') + assert dr.tz == dr2.tz + assert dr2.name == 'foo' def test_frame_from_records_utc(self): rec = {'datum': 1.5, @@ -886,7 +886,7 @@ def test_frame_reset_index(self): roundtripped = df.reset_index().set_index('index') xp = df.index.tz rs = roundtripped.index.tz - self.assertEqual(xp, rs) + assert xp == rs def test_dateutil_tzoffset_support(self): from dateutil.tz import tzoffset @@ -896,7 +896,7 @@ def test_dateutil_tzoffset_support(self): datetime(2012, 5, 11, 12, tzinfo=tzinfo)] series = Series(data=values, index=index) - self.assertEqual(series.index.tz, tzinfo) + assert series.index.tz == tzinfo # it works! #2443 repr(series.index[0]) @@ -909,7 +909,7 @@ def test_getitem_pydatetime_tz(self): tz=self.tzstr('Europe/Berlin')) time_datetime = self.localize( self.tz('Europe/Berlin'), datetime(2012, 12, 24, 17, 0)) - self.assertEqual(ts[time_pandas], ts[time_datetime]) + assert ts[time_pandas] == ts[time_datetime] def test_index_drop_dont_lose_tz(self): # #2621 @@ -977,12 +977,12 @@ def test_utc_with_system_utc(self): # from system utc to real utc ts = Timestamp('2001-01-05 11:56', tz=maybe_get_tz('dateutil/UTC')) # check that the time hasn't changed. - self.assertEqual(ts, ts.tz_convert(dateutil.tz.tzutc())) + assert ts == ts.tz_convert(dateutil.tz.tzutc()) # from system utc to real utc ts = Timestamp('2001-01-05 11:56', tz=maybe_get_tz('dateutil/UTC')) # check that the time hasn't changed. - self.assertEqual(ts, ts.tz_convert(dateutil.tz.tzutc())) + assert ts == ts.tz_convert(dateutil.tz.tzutc()) def test_tz_convert_hour_overflow_dst(self): # Regression test for: @@ -1140,16 +1140,16 @@ def test_tslib_tz_convert_dst(self): def test_tzlocal(self): # GH 13583 ts = Timestamp('2011-01-01', tz=dateutil.tz.tzlocal()) - self.assertEqual(ts.tz, dateutil.tz.tzlocal()) + assert ts.tz == dateutil.tz.tzlocal() assert "tz='tzlocal()')" in repr(ts) tz = tslib.maybe_get_tz('tzlocal()') - self.assertEqual(tz, dateutil.tz.tzlocal()) + assert tz == dateutil.tz.tzlocal() # get offset using normal datetime for test offset = dateutil.tz.tzlocal().utcoffset(datetime(2011, 1, 1)) offset = offset.total_seconds() * 1000000000 - self.assertEqual(ts.value + offset, Timestamp('2011-01-01').value) + assert ts.value + offset == Timestamp('2011-01-01').value def test_tz_localize_tzlocal(self): # GH 13583 @@ -1208,26 +1208,26 @@ def test_replace(self): dt = Timestamp('2016-01-01 09:00:00') result = dt.replace(hour=0) expected = Timestamp('2016-01-01 00:00:00') - self.assertEqual(result, expected) + assert result == expected for tz in self.timezones: dt = Timestamp('2016-01-01 09:00:00', tz=tz) result = dt.replace(hour=0) expected = Timestamp('2016-01-01 00:00:00', tz=tz) - self.assertEqual(result, expected) + assert result == expected # we preserve nanoseconds dt = Timestamp('2016-01-01 09:00:00.000000123', tz=tz) result = dt.replace(hour=0) expected = Timestamp('2016-01-01 00:00:00.000000123', tz=tz) - self.assertEqual(result, expected) + assert result == expected # test all dt = Timestamp('2016-01-01 09:00:00.000000123', tz=tz) result = dt.replace(year=2015, month=2, day=2, hour=0, minute=5, second=5, microsecond=5, nanosecond=5) expected = Timestamp('2015-02-02 00:05:05.000005005', tz=tz) - self.assertEqual(result, expected) + assert result == expected # error def f(): @@ -1240,7 +1240,7 @@ def f(): # assert conversion to naive is the same as replacing tzinfo with None dt = Timestamp('2013-11-03 01:59:59.999999-0400', tz='US/Eastern') - self.assertEqual(dt.tz_localize(None), dt.replace(tzinfo=None)) + assert dt.tz_localize(None) == dt.replace(tzinfo=None) def test_ambiguous_compat(self): # validate that pytz and dateutil are compat for dst @@ -1254,31 +1254,31 @@ def test_ambiguous_compat(self): .tz_localize(pytz_zone, ambiguous=0)) result_dateutil = (Timestamp('2013-10-27 01:00:00') .tz_localize(dateutil_zone, ambiguous=0)) - self.assertEqual(result_pytz.value, result_dateutil.value) - self.assertEqual(result_pytz.value, 1382835600000000000) + assert result_pytz.value == result_dateutil.value + assert result_pytz.value == 1382835600000000000 # dateutil 2.6 buggy w.r.t. ambiguous=0 if dateutil.__version__ != LooseVersion('2.6.0'): - # GH 14621 - # https://github.com/dateutil/dateutil/issues/321 - self.assertEqual(result_pytz.to_pydatetime().tzname(), - result_dateutil.to_pydatetime().tzname()) - self.assertEqual(str(result_pytz), str(result_dateutil)) + # see gh-14621 + # see https://github.com/dateutil/dateutil/issues/321 + assert (result_pytz.to_pydatetime().tzname() == + result_dateutil.to_pydatetime().tzname()) + assert str(result_pytz) == str(result_dateutil) # 1 hour difference result_pytz = (Timestamp('2013-10-27 01:00:00') .tz_localize(pytz_zone, ambiguous=1)) result_dateutil = (Timestamp('2013-10-27 01:00:00') .tz_localize(dateutil_zone, ambiguous=1)) - self.assertEqual(result_pytz.value, result_dateutil.value) - self.assertEqual(result_pytz.value, 1382832000000000000) + assert result_pytz.value == result_dateutil.value + assert result_pytz.value == 1382832000000000000 # dateutil < 2.6 is buggy w.r.t. ambiguous timezones if dateutil.__version__ > LooseVersion('2.5.3'): - # GH 14621 - self.assertEqual(str(result_pytz), str(result_dateutil)) - self.assertEqual(result_pytz.to_pydatetime().tzname(), - result_dateutil.to_pydatetime().tzname()) + # see gh-14621 + assert str(result_pytz) == str(result_dateutil) + assert (result_pytz.to_pydatetime().tzname() == + result_dateutil.to_pydatetime().tzname()) def test_index_equals_with_tz(self): left = date_range('1/1/2011', periods=100, freq='H', tz='utc') @@ -1319,17 +1319,17 @@ def test_series_frame_tz_localize(self): ts = Series(1, index=rng) result = ts.tz_localize('utc') - self.assertEqual(result.index.tz.zone, 'UTC') + assert result.index.tz.zone == 'UTC' df = DataFrame({'a': 1}, index=rng) result = df.tz_localize('utc') expected = DataFrame({'a': 1}, rng.tz_localize('UTC')) - self.assertEqual(result.index.tz.zone, 'UTC') + assert result.index.tz.zone == 'UTC' assert_frame_equal(result, expected) df = df.T result = df.tz_localize('utc', axis=1) - self.assertEqual(result.columns.tz.zone, 'UTC') + assert result.columns.tz.zone == 'UTC' assert_frame_equal(result, expected.T) # Can't localize if already tz-aware @@ -1343,17 +1343,17 @@ def test_series_frame_tz_convert(self): ts = Series(1, index=rng) result = ts.tz_convert('Europe/Berlin') - self.assertEqual(result.index.tz.zone, 'Europe/Berlin') + assert result.index.tz.zone == 'Europe/Berlin' df = DataFrame({'a': 1}, index=rng) result = df.tz_convert('Europe/Berlin') expected = DataFrame({'a': 1}, rng.tz_convert('Europe/Berlin')) - self.assertEqual(result.index.tz.zone, 'Europe/Berlin') + assert result.index.tz.zone == 'Europe/Berlin' assert_frame_equal(result, expected) df = df.T result = df.tz_convert('Europe/Berlin', axis=1) - self.assertEqual(result.columns.tz.zone, 'Europe/Berlin') + assert result.columns.tz.zone == 'Europe/Berlin' assert_frame_equal(result, expected.T) # can't convert tz-naive @@ -1398,11 +1398,11 @@ def test_join_utc_convert(self): for how in ['inner', 'outer', 'left', 'right']: result = left.join(left[:-5], how=how) assert isinstance(result, DatetimeIndex) - self.assertEqual(result.tz, left.tz) + assert result.tz == left.tz result = left.join(right[:-5], how=how) assert isinstance(result, DatetimeIndex) - self.assertEqual(result.tz.zone, 'UTC') + assert result.tz.zone == 'UTC' def test_join_aware(self): rng = date_range('1/1/2011', periods=10, freq='H') @@ -1443,30 +1443,30 @@ def test_align_aware(self): df1 = DataFrame(np.random.randn(len(idx1), 3), idx1) df2 = DataFrame(np.random.randn(len(idx2), 3), idx2) new1, new2 = df1.align(df2) - self.assertEqual(df1.index.tz, new1.index.tz) - self.assertEqual(df2.index.tz, new2.index.tz) + assert df1.index.tz == new1.index.tz + assert df2.index.tz == new2.index.tz # # different timezones convert to UTC # frame df1_central = df1.tz_convert('US/Central') new1, new2 = df1.align(df1_central) - self.assertEqual(new1.index.tz, pytz.UTC) - self.assertEqual(new2.index.tz, pytz.UTC) + assert new1.index.tz == pytz.UTC + assert new2.index.tz == pytz.UTC # series new1, new2 = df1[0].align(df1_central[0]) - self.assertEqual(new1.index.tz, pytz.UTC) - self.assertEqual(new2.index.tz, pytz.UTC) + assert new1.index.tz == pytz.UTC + assert new2.index.tz == pytz.UTC # combination new1, new2 = df1.align(df1_central[0], axis=0) - self.assertEqual(new1.index.tz, pytz.UTC) - self.assertEqual(new2.index.tz, pytz.UTC) + assert new1.index.tz == pytz.UTC + assert new2.index.tz == pytz.UTC df1[0].align(df1_central, axis=0) - self.assertEqual(new1.index.tz, pytz.UTC) - self.assertEqual(new2.index.tz, pytz.UTC) + assert new1.index.tz == pytz.UTC + assert new2.index.tz == pytz.UTC def test_append_aware(self): rng1 = date_range('1/1/2011 01:00', periods=1, freq='H', @@ -1481,7 +1481,7 @@ def test_append_aware(self): tz='US/Eastern') exp = Series([1, 2], index=exp_index) assert_series_equal(ts_result, exp) - self.assertEqual(ts_result.index.tz, rng1.tz) + assert ts_result.index.tz == rng1.tz rng1 = date_range('1/1/2011 01:00', periods=1, freq='H', tz='UTC') rng2 = date_range('1/1/2011 02:00', periods=1, freq='H', tz='UTC') @@ -1494,7 +1494,7 @@ def test_append_aware(self): exp = Series([1, 2], index=exp_index) assert_series_equal(ts_result, exp) utc = rng1.tz - self.assertEqual(utc, ts_result.index.tz) + assert utc == ts_result.index.tz # GH 7795 # different tz coerces to object dtype, not UTC @@ -1525,7 +1525,7 @@ def test_append_dst(self): tz='US/Eastern') exp = Series([1, 2, 3, 10, 11, 12], index=exp_index) assert_series_equal(ts_result, exp) - self.assertEqual(ts_result.index.tz, rng1.tz) + assert ts_result.index.tz == rng1.tz def test_append_aware_naive(self): rng1 = date_range('1/1/2011 01:00', periods=1, freq='H') @@ -1584,7 +1584,7 @@ def test_arith_utc_convert(self): uts2 = ts2.tz_convert('utc') expected = uts1 + uts2 - self.assertEqual(result.index.tz, pytz.UTC) + assert result.index.tz == pytz.UTC assert_series_equal(result, expected) def test_intersection(self): @@ -1593,9 +1593,9 @@ def test_intersection(self): left = rng[10:90][::-1] right = rng[20:80][::-1] - self.assertEqual(left.tz, rng.tz) + assert left.tz == rng.tz result = left.intersection(right) - self.assertEqual(result.tz, left.tz) + assert result.tz == left.tz def test_timestamp_equality_different_timezones(self): utc_range = date_range('1/1/2000', periods=20, tz='UTC') @@ -1603,9 +1603,9 @@ def test_timestamp_equality_different_timezones(self): berlin_range = utc_range.tz_convert('Europe/Berlin') for a, b, c in zip(utc_range, eastern_range, berlin_range): - self.assertEqual(a, b) - self.assertEqual(b, c) - self.assertEqual(a, c) + assert a == b + assert b == c + assert a == c assert (utc_range == eastern_range).all() assert (utc_range == berlin_range).all() @@ -1670,7 +1670,7 @@ def test_normalize_tz_local(self): def test_tzaware_offset(self): dates = date_range('2012-11-01', periods=3, tz='US/Pacific') offset = dates + offsets.Hour(5) - self.assertEqual(dates[0] + offsets.Hour(5), offset[0]) + assert dates[0] + offsets.Hour(5) == offset[0] # GH 6818 for tz in ['UTC', 'US/Pacific', 'Asia/Tokyo']: From de8734474daddf772d97c66a9ef759e23a3d362f Mon Sep 17 00:00:00 2001 From: dwkenefick Date: Sun, 30 Apr 2017 07:09:45 -0400 Subject: [PATCH 485/933] DOC: pandas read_* example #15904 (#16166) * DOC: pandas read_* wildcard #15904 Added example in cookbook about reading multiple files into a dataframe. --- doc/source/cookbook.rst | 38 +++++++++++++++++++++++++++++++++++--- doc/source/io.rst | 8 ++++++++ 2 files changed, 43 insertions(+), 3 deletions(-) diff --git a/doc/source/cookbook.rst b/doc/source/cookbook.rst index 8fa1283ffc924..8466b3d3c3297 100644 --- a/doc/source/cookbook.rst +++ b/doc/source/cookbook.rst @@ -910,9 +910,6 @@ The :ref:`CSV ` docs `appending to a csv `__ -`how to read in multiple files, appending to create a single dataframe -`__ - `Reading a csv chunk-by-chunk `__ @@ -943,6 +940,41 @@ using that handle to read. `Write a multi-row index CSV without writing duplicates `__ +.. _cookbook.csv.multiple_files: + +Reading multiple files to create a single DataFrame +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The best way to combine multiple files into a single DataFrame is to read the individual frames one by one, put all +of the individual frames into a list, and then combine the frames in the list using :func:`pd.concat`: + +.. ipython:: python + + for i in range(3): + data = pd.DataFrame(np.random.randn(10, 4)) + data.to_csv('file_{}.csv'.format(i)) + + files = ['file_0.csv', 'file_1.csv', 'file_2.csv'] + result = pd.concat([pd.read_csv(f) for f in files], ignore_index=True) + +You can use the same approach to read all files matching a pattern. Here is an example using ``glob``: + +.. ipython:: python + + import glob + files = glob.glob('file_*.csv') + result = pd.concat([pd.read_csv(f) for f in files], ignore_index=True) + +Finally, this strategy will work with the other ``pd.read_*(...)`` functions described in the :ref:`io docs`. + +.. ipython:: python + :supress: + for i in range(3): + os.remove('file_{}.csv'.format(i)) + +Parsing date components in multi-columns +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + Parsing date components in multi-columns is faster with a format .. code-block:: python diff --git a/doc/source/io.rst b/doc/source/io.rst index 2b3d2895333d3..9692766505d7a 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -1439,6 +1439,14 @@ class of the csv module. For this, you have to specify ``sep=None``. print(open('tmp2.sv').read()) pd.read_csv('tmp2.sv', sep=None, engine='python') +.. _io.multiple_files: + +Reading multiple files to create a single DataFrame +''''''''''''''''''''''''''''''''''''''''''''''''''' + +It's best to use :func:`~pandas.concat` to combine multiple files. +See the :ref:`cookbook` for an example. + .. _io.chunking: Iterating through files chunk by chunk From 668f91a86568af8f13b19b2b76ca5f9a373b2a21 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Mon, 1 May 2017 04:34:19 -0700 Subject: [PATCH 486/933] TST: DatetimeIndex and its Timestamp elements returning same .weekofyear with tz (#16181) --- doc/source/whatsnew/v0.20.0.txt | 2 +- pandas/tests/indexes/datetimes/test_misc.py | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index aded04e82ee7e..6e4756c3c5245 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -1580,7 +1580,7 @@ Conversion - Bug in ``Timestamp.replace`` now raises ``TypeError`` when incorrect argument names are given; previously this raised ``ValueError`` (:issue:`15240`) - Bug in ``Timestamp.replace`` with compat for passing long integers (:issue:`15030`) -- Bug in ``Timestamp`` returning UTC based time/date attributes when a timezone was provided (:issue:`13303`) +- Bug in ``Timestamp`` returning UTC based time/date attributes when a timezone was provided (:issue:`13303`, :issue:`6538`) - Bug in ``Timestamp`` incorrectly localizing timezones during construction (:issue:`11481`, :issue:`15777`) - Bug in ``TimedeltaIndex`` addition where overflow was being allowed without error (:issue:`14816`) - Bug in ``TimedeltaIndex`` raising a ``ValueError`` when boolean indexing with ``loc`` (:issue:`14946`) diff --git a/pandas/tests/indexes/datetimes/test_misc.py b/pandas/tests/indexes/datetimes/test_misc.py index ae5d29ca426b4..d9a61776a0d1c 100644 --- a/pandas/tests/indexes/datetimes/test_misc.py +++ b/pandas/tests/indexes/datetimes/test_misc.py @@ -334,6 +334,14 @@ def test_datetimeindex_accessors(self): for ts, value in tests: assert ts == value + # GH 6538: Check that DatetimeIndex and its TimeStamp elements + # return the same weekofyear accessor close to new year w/ tz + dates = ["2013/12/29", "2013/12/30", "2013/12/31"] + dates = DatetimeIndex(dates, tz="Europe/Brussels") + expected = [52, 1, 1] + assert dates.weekofyear.tolist() == expected + assert [d.weekofyear for d in dates] == expected + def test_nanosecond_field(self): dti = DatetimeIndex(np.arange(10)) From 6d0118e3573bde1e009949eda30d1844fb344789 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Mon, 1 May 2017 07:40:42 -0400 Subject: [PATCH 487/933] MAINT: Remove self.assertNotEqual from testing (#16176) --- pandas/tests/frame/test_api.py | 2 +- pandas/tests/frame/test_indexing.py | 2 +- pandas/tests/frame/test_sorting.py | 2 +- pandas/tests/groupby/test_groupby.py | 2 +- pandas/tests/indexes/test_base.py | 2 +- pandas/tests/indexes/test_numeric.py | 2 +- pandas/tests/io/formats/test_format.py | 10 +++++----- pandas/tests/io/formats/test_style.py | 4 ++-- pandas/tests/io/test_common.py | 4 ++-- pandas/tests/scalar/test_period.py | 4 ++-- pandas/tests/scalar/test_timedelta.py | 2 +- pandas/tests/scalar/test_timestamp.py | 10 +++++----- pandas/tests/series/test_missing.py | 6 +++--- pandas/tests/test_expressions.py | 4 ++-- pandas/tests/test_panel.py | 2 +- pandas/tests/test_panel4d.py | 2 +- pandas/tests/tseries/test_offsets.py | 19 +++++++++---------- pandas/tests/tseries/test_timezones.py | 3 +-- 18 files changed, 40 insertions(+), 42 deletions(-) diff --git a/pandas/tests/frame/test_api.py b/pandas/tests/frame/test_api.py index d2a1e32f015b2..208c7b5ace50e 100644 --- a/pandas/tests/frame/test_api.py +++ b/pandas/tests/frame/test_api.py @@ -247,7 +247,7 @@ def test_deepcopy(self): series = cp['A'] series[:] = 10 for idx, value in compat.iteritems(series): - self.assertNotEqual(self.frame['A'][idx], value) + assert self.frame['A'][idx] != value # --------------------------------------------------------------------- # Transposing diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py index cd1529d04c991..75d4263cbe68f 100644 --- a/pandas/tests/frame/test_indexing.py +++ b/pandas/tests/frame/test_indexing.py @@ -1890,7 +1890,7 @@ def test_nested_exception(self): try: repr(df) except Exception as e: - self.assertNotEqual(type(e), UnboundLocalError) + assert type(e) != UnboundLocalError def test_reindex_methods(self): df = pd.DataFrame({'x': list(range(5))}) diff --git a/pandas/tests/frame/test_sorting.py b/pandas/tests/frame/test_sorting.py index bdb5fd0e8354c..457ea32ec56f7 100644 --- a/pandas/tests/frame/test_sorting.py +++ b/pandas/tests/frame/test_sorting.py @@ -361,7 +361,7 @@ def test_sort_index_inplace(self): df.sort_index(inplace=True) expected = frame assert_frame_equal(df, expected) - self.assertNotEqual(a_id, id(df['A'])) + assert a_id != id(df['A']) df = unordered.copy() df.sort_index(ascending=False, inplace=True) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 09643e918af31..8d86d40c379bf 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -1032,7 +1032,7 @@ def test_frame_set_name_single(self): assert result.index.name == 'A' result = self.df.groupby('A', as_index=False).mean() - self.assertNotEqual(result.index.name, 'A') + assert result.index.name != 'A' result = grouped.agg(np.mean) assert result.index.name == 'A' diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 23c72e511d2b3..10958681af450 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -81,7 +81,7 @@ def test_constructor(self): assert index.name == 'name' tm.assert_numpy_array_equal(arr, index.values) arr[0] = "SOMEBIGLONGSTRING" - self.assertNotEqual(index[0], "SOMEBIGLONGSTRING") + assert index[0] != "SOMEBIGLONGSTRING" # what to do here? # arr = np.array(5.) diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py index 19bca875e650d..428c261df5654 100644 --- a/pandas/tests/indexes/test_numeric.py +++ b/pandas/tests/indexes/test_numeric.py @@ -653,7 +653,7 @@ def test_constructor(self): # this should not change index arr[0] = val - self.assertNotEqual(new_index[0], val) + assert new_index[0] != val # interpret list-like expected = Int64Index([5, 0]) diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index dee645e9d70ec..ac00e441047dd 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -918,7 +918,7 @@ def test_wide_repr(self): assert "10 rows x %d columns" % (max_cols - 1) in rep_str set_option('display.expand_frame_repr', True) wide_repr = repr(df) - self.assertNotEqual(rep_str, wide_repr) + assert rep_str != wide_repr with option_context('display.width', 120): wider_repr = repr(df) @@ -944,7 +944,7 @@ def test_wide_repr_named(self): rep_str = repr(df) set_option('display.expand_frame_repr', True) wide_repr = repr(df) - self.assertNotEqual(rep_str, wide_repr) + assert rep_str != wide_repr with option_context('display.width', 150): wider_repr = repr(df) @@ -966,7 +966,7 @@ def test_wide_repr_multiindex(self): rep_str = repr(df) set_option('display.expand_frame_repr', True) wide_repr = repr(df) - self.assertNotEqual(rep_str, wide_repr) + assert rep_str != wide_repr with option_context('display.width', 150): wider_repr = repr(df) @@ -990,7 +990,7 @@ def test_wide_repr_multiindex_cols(self): rep_str = repr(df) set_option('display.expand_frame_repr', True) wide_repr = repr(df) - self.assertNotEqual(rep_str, wide_repr) + assert rep_str != wide_repr with option_context('display.width', 150): wider_repr = repr(df) @@ -1006,7 +1006,7 @@ def test_wide_repr_unicode(self): rep_str = repr(df) set_option('display.expand_frame_repr', True) wide_repr = repr(df) - self.assertNotEqual(rep_str, wide_repr) + assert rep_str != wide_repr with option_context('display.width', 150): wider_repr = repr(df) diff --git a/pandas/tests/io/formats/test_style.py b/pandas/tests/io/formats/test_style.py index 371cc2b61634a..f421c0f8e6d69 100644 --- a/pandas/tests/io/formats/test_style.py +++ b/pandas/tests/io/formats/test_style.py @@ -85,9 +85,9 @@ def test_deepcopy(self): self.styler._update_ctx(self.attrs) self.styler.highlight_max() - self.assertNotEqual(self.styler.ctx, s2.ctx) + assert self.styler.ctx != s2.ctx assert s2._todo == [] - self.assertNotEqual(self.styler._todo, s2._todo) + assert self.styler._todo != s2._todo def test_clear(self): s = self.df.style.highlight_max()._compute() diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py index 804d76c3c9eca..c427fab4103e0 100644 --- a/pandas/tests/io/test_common.py +++ b/pandas/tests/io/test_common.py @@ -38,7 +38,7 @@ def test_expand_user(self): filename = '~/sometest' expanded_name = common._expand_user(filename) - self.assertNotEqual(expanded_name, filename) + assert expanded_name != filename assert isabs(expanded_name) assert os.path.expanduser(filename) == expanded_name @@ -68,7 +68,7 @@ def test_stringify_path_localpath(self): def test_get_filepath_or_buffer_with_path(self): filename = '~/sometest' filepath_or_buffer, _, _ = common.get_filepath_or_buffer(filename) - self.assertNotEqual(filepath_or_buffer, filename) + assert filepath_or_buffer != filename assert isabs(filepath_or_buffer) assert os.path.expanduser(filename) == filepath_or_buffer diff --git a/pandas/tests/scalar/test_period.py b/pandas/tests/scalar/test_period.py index 00a1fa1b507b6..2e60cfdb7a4f2 100644 --- a/pandas/tests/scalar/test_period.py +++ b/pandas/tests/scalar/test_period.py @@ -938,8 +938,8 @@ def test_equal_Raises_Value(self): self.january1 == self.day def test_notEqual(self): - self.assertNotEqual(self.january1, 1) - self.assertNotEqual(self.january1, self.february) + assert self.january1 != 1 + assert self.january1 != self.february def test_greater(self): assert self.february > self.january1 diff --git a/pandas/tests/scalar/test_timedelta.py b/pandas/tests/scalar/test_timedelta.py index faddbcc84109f..5659bc26fc1cc 100644 --- a/pandas/tests/scalar/test_timedelta.py +++ b/pandas/tests/scalar/test_timedelta.py @@ -560,7 +560,7 @@ def test_timedelta_hash_equality(self): # python timedeltas drop ns resolution ns_td = Timedelta(1, 'ns') - self.assertNotEqual(hash(ns_td), hash(ns_td.to_pytimedelta())) + assert hash(ns_td) != hash(ns_td.to_pytimedelta()) def test_implementation_limits(self): min_td = Timedelta(Timedelta.min) diff --git a/pandas/tests/scalar/test_timestamp.py b/pandas/tests/scalar/test_timestamp.py index 8a28a9a4bedd0..04b33bbc6c3bf 100644 --- a/pandas/tests/scalar/test_timestamp.py +++ b/pandas/tests/scalar/test_timestamp.py @@ -873,8 +873,8 @@ def test_comparison(self): other = Timestamp(stamp + 100) - self.assertNotEqual(val, other) - self.assertNotEqual(val, other) + assert val != other + assert val != other assert val < other assert val <= other assert other > val @@ -1375,9 +1375,9 @@ def test_timestamp_compare_with_early_datetime(self): assert not stamp == datetime.min assert not stamp == datetime(1600, 1, 1) assert not stamp == datetime(2700, 1, 1) - self.assertNotEqual(stamp, datetime.min) - self.assertNotEqual(stamp, datetime(1600, 1, 1)) - self.assertNotEqual(stamp, datetime(2700, 1, 1)) + assert stamp != datetime.min + assert stamp != datetime(1600, 1, 1) + assert stamp != datetime(2700, 1, 1) assert stamp > datetime(1600, 1, 1) assert stamp >= datetime(1600, 1, 1) assert stamp < datetime(2700, 1, 1) diff --git a/pandas/tests/series/test_missing.py b/pandas/tests/series/test_missing.py index 9937f6a34172e..0eaab2e588cc2 100644 --- a/pandas/tests/series/test_missing.py +++ b/pandas/tests/series/test_missing.py @@ -1078,8 +1078,8 @@ def test_spline_extrapolate(self): def test_spline_smooth(self): tm._skip_if_no_scipy() s = Series([1, 2, np.nan, 4, 5.1, np.nan, 7]) - self.assertNotEqual(s.interpolate(method='spline', order=3, s=0)[5], - s.interpolate(method='spline', order=3)[5]) + assert (s.interpolate(method='spline', order=3, s=0)[5] != + s.interpolate(method='spline', order=3)[5]) def test_spline_interpolation(self): tm._skip_if_no_scipy() @@ -1090,8 +1090,8 @@ def test_spline_interpolation(self): expected1 = s.interpolate(method='spline', order=1) assert_series_equal(result1, expected1) - # GH #10633 def test_spline_error(self): + # see gh-10633 tm._skip_if_no_scipy() s = pd.Series(np.arange(10) ** 2) diff --git a/pandas/tests/test_expressions.py b/pandas/tests/test_expressions.py index ae505a66ad75a..8ef29097b66e8 100644 --- a/pandas/tests/test_expressions.py +++ b/pandas/tests/test_expressions.py @@ -293,7 +293,7 @@ def testit(): if op is not None: result = expr._can_use_numexpr(op, op_str, f, f, 'evaluate') - self.assertNotEqual(result, f._is_mixed_type) + assert result != f._is_mixed_type result = expr.evaluate(op, op_str, f, f, use_numexpr=True) @@ -336,7 +336,7 @@ def testit(): result = expr._can_use_numexpr(op, op_str, f11, f12, 'evaluate') - self.assertNotEqual(result, f11._is_mixed_type) + assert result != f11._is_mixed_type result = expr.evaluate(op, op_str, f11, f12, use_numexpr=True) diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index a692f6b26c61e..b9cceab4d65f4 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -1636,7 +1636,7 @@ def test_swapaxes(self): # this works, but return a copy result = self.panel.swapaxes('items', 'items') assert_panel_equal(self.panel, result) - self.assertNotEqual(id(self.panel), id(result)) + assert id(self.panel) != id(result) def test_transpose(self): with catch_warnings(record=True): diff --git a/pandas/tests/test_panel4d.py b/pandas/tests/test_panel4d.py index f2a1414957d44..041e36848e1d8 100644 --- a/pandas/tests/test_panel4d.py +++ b/pandas/tests/test_panel4d.py @@ -888,7 +888,7 @@ def test_swapaxes(self): # this works, but return a copy result = self.panel4d.swapaxes('items', 'items') assert_panel4d_equal(self.panel4d, result) - self.assertNotEqual(id(self.panel4d), id(result)) + assert id(self.panel4d) != id(result) def test_update(self): diff --git a/pandas/tests/tseries/test_offsets.py b/pandas/tests/tseries/test_offsets.py index ce4208a8cea69..79190aa98f8d9 100644 --- a/pandas/tests/tseries/test_offsets.py +++ b/pandas/tests/tseries/test_offsets.py @@ -541,7 +541,7 @@ def test_eq(self): offset1 = DateOffset(days=1) offset2 = DateOffset(days=365) - self.assertNotEqual(offset1, offset2) + assert offset1 != offset2 class TestBusinessDay(Base): @@ -775,12 +775,11 @@ def testEQ(self): for offset in [self.offset1, self.offset2, self.offset3, self.offset4]: assert offset == offset - self.assertNotEqual(BusinessHour(), BusinessHour(-1)) + assert BusinessHour() != BusinessHour(-1) assert BusinessHour(start='09:00') == BusinessHour() - self.assertNotEqual(BusinessHour(start='09:00'), - BusinessHour(start='09:01')) - self.assertNotEqual(BusinessHour(start='09:00', end='17:00'), - BusinessHour(start='17:00', end='09:01')) + assert BusinessHour(start='09:00') != BusinessHour(start='09:01') + assert (BusinessHour(start='09:00', end='17:00') != + BusinessHour(start='17:00', end='09:01')) def test_hash(self): for offset in [self.offset1, self.offset2, self.offset3, self.offset4]: @@ -4362,7 +4361,7 @@ def test_Hour(self): assert Hour(3) + Hour(2) == Hour(5) assert Hour(3) - Hour(2) == Hour() - self.assertNotEqual(Hour(4), Hour(1)) + assert Hour(4) != Hour(1) def test_Minute(self): assertEq(Minute(), datetime(2010, 1, 1), datetime(2010, 1, 1, 0, 1)) @@ -4374,7 +4373,7 @@ def test_Minute(self): assert Minute(3) + Minute(2) == Minute(5) assert Minute(3) - Minute(2) == Minute() - self.assertNotEqual(Minute(5), Minute()) + assert Minute(5) != Minute() def test_Second(self): assertEq(Second(), datetime(2010, 1, 1), datetime(2010, 1, 1, 0, 0, 1)) @@ -4464,8 +4463,8 @@ def test_tick_equalities(self): assert t() == t(1) # not equals - self.assertNotEqual(t(3), t(2)) - self.assertNotEqual(t(3), t(-3)) + assert t(3) != t(2) + assert t(3) != t(-3) def test_tick_operators(self): for t in self.ticks: diff --git a/pandas/tests/tseries/test_timezones.py b/pandas/tests/tseries/test_timezones.py index 8b6774885c8b7..0c8aaf77aec12 100644 --- a/pandas/tests/tseries/test_timezones.py +++ b/pandas/tests/tseries/test_timezones.py @@ -1191,8 +1191,7 @@ def test_cache_keys_are_distinct_for_pytz_vs_dateutil(self): if tz_d is None: # skip timezones that dateutil doesn't know about. continue - self.assertNotEqual(tslib._p_tz_cache_key( - tz_p), tslib._p_tz_cache_key(tz_d)) + assert tslib._p_tz_cache_key(tz_p) != tslib._p_tz_cache_key(tz_d) class TestTimeZones(tm.TestCase): From d647045d6b25e9549bbccb4c725030b82858c58b Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Mon, 1 May 2017 11:20:33 -0500 Subject: [PATCH 488/933] BUG: Restore return value in notebook reprs (#16171) * BUG: Restore return value in notebook reprs Monkey patches the _ipython_display_ method onto NDFrame, so that notebook cells have a real return value. Setting the display.html.table_schema will monkey patch the method on, and remove it when unset. closes https://github.com/ipython/ipython/issues/10491 * Define in generic.py * PEP8 --- pandas/core/config_init.py | 17 ++++++- pandas/core/generic.py | 57 +++++++++++++----------- pandas/tests/io/formats/test_printing.py | 27 +++++++++++ 3 files changed, 75 insertions(+), 26 deletions(-) diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py index 70ebb170cb763..d77d17aa4d00e 100644 --- a/pandas/core/config_init.py +++ b/pandas/core/config_init.py @@ -340,6 +340,21 @@ def mpl_style_cb(key): return val +def table_schema_cb(key): + # Having _ipython_display_ defined messes with the return value + # from cells, so the Out[x] dictionary breaks. + # Currently table schema is the only thing using it, so we'll + # monkey patch `_ipython_display_` onto NDFrame when config option + # is set + # see https://github.com/pandas-dev/pandas/issues/16168 + from pandas.core.generic import NDFrame, _ipython_display_ + + if cf.get_option(key): + NDFrame._ipython_display_ = _ipython_display_ + elif getattr(NDFrame, '_ipython_display_', None): + del NDFrame._ipython_display_ + + with cf.config_prefix('display'): cf.register_option('precision', 6, pc_precision_doc, validator=is_int) cf.register_option('float_format', None, float_format_doc, @@ -407,7 +422,7 @@ def mpl_style_cb(key): cf.register_option('latex.multirow', False, pc_latex_multirow, validator=is_bool) cf.register_option('html.table_schema', False, pc_table_schema_doc, - validator=is_bool) + validator=is_bool, cb=table_schema_cb) cf.deprecate_option('display.line_width', diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 70862015dff5b..9318a9f5ef27c 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -130,31 +130,6 @@ def __init__(self, data, axes=None, copy=False, dtype=None, object.__setattr__(self, '_data', data) object.__setattr__(self, '_item_cache', {}) - def _ipython_display_(self): - try: - from IPython.display import display - except ImportError: - return None - - # Series doesn't define _repr_html_ or _repr_latex_ - latex = self._repr_latex_() if hasattr(self, '_repr_latex_') else None - html = self._repr_html_() if hasattr(self, '_repr_html_') else None - try: - table_schema = self._repr_table_schema_() - except Exception as e: - warnings.warn("Cannot create table schema representation. " - "{}".format(e), UnserializableWarning) - table_schema = None - # We need the inital newline since we aren't going through the - # usual __repr__. See - # https://github.com/pandas-dev/pandas/pull/14904#issuecomment-277829277 - text = "\n" + repr(self) - - reprs = {"text/plain": text, "text/html": html, "text/latex": latex, - "application/vnd.dataresource+json": table_schema} - reprs = {k: v for k, v in reprs.items() if v} - display(reprs, raw=True) - def _repr_table_schema_(self): """ Not a real Jupyter special repr method, but we use the same @@ -6283,6 +6258,38 @@ def logical_func(self, axis=None, bool_only=None, skipna=None, level=None, return set_function_name(logical_func, name, cls) +def _ipython_display_(self): + # Having _ipython_display_ defined messes with the return value + # from cells, so the Out[x] dictionary breaks. + # Currently table schema is the only thing using it, so we'll + # monkey patch `_ipython_display_` onto NDFrame when config option + # is set + # see https://github.com/pandas-dev/pandas/issues/16168 + try: + from IPython.display import display + except ImportError: + return None + + # Series doesn't define _repr_html_ or _repr_latex_ + latex = self._repr_latex_() if hasattr(self, '_repr_latex_') else None + html = self._repr_html_() if hasattr(self, '_repr_html_') else None + try: + table_schema = self._repr_table_schema_() + except Exception as e: + warnings.warn("Cannot create table schema representation. " + "{}".format(e), UnserializableWarning) + table_schema = None + # We need the inital newline since we aren't going through the + # usual __repr__. See + # https://github.com/pandas-dev/pandas/pull/14904#issuecomment-277829277 + text = "\n" + repr(self) + + reprs = {"text/plain": text, "text/html": html, "text/latex": latex, + "application/vnd.dataresource+json": table_schema} + reprs = {k: v for k, v in reprs.items() if v} + display(reprs, raw=True) + + # install the indexes for _name, _indexer in indexing.get_indexers_list(): NDFrame._create_indexer(_name, _indexer) diff --git a/pandas/tests/io/formats/test_printing.py b/pandas/tests/io/formats/test_printing.py index 7725b2063c7b6..b8d6e9578339f 100644 --- a/pandas/tests/io/formats/test_printing.py +++ b/pandas/tests/io/formats/test_printing.py @@ -203,6 +203,33 @@ def test_config_default_off(self): assert result is None + def test_config_monkeypatches(self): + # GH 10491 + df = pd.DataFrame({"A": [1, 2]}) + assert not hasattr(df, '_ipython_display_') + assert not hasattr(df['A'], '_ipython_display_') + + with pd.option_context('display.html.table_schema', True): + assert hasattr(df, '_ipython_display_') + # smoke test that it works + df._ipython_display_() + assert hasattr(df['A'], '_ipython_display_') + df['A']._ipython_display_() + + assert not hasattr(df, '_ipython_display_') + assert not hasattr(df['A'], '_ipython_display_') + # re-unsetting is OK + assert not hasattr(df, '_ipython_display_') + assert not hasattr(df['A'], '_ipython_display_') + + # able to re-set + with pd.option_context('display.html.table_schema', True): + assert hasattr(df, '_ipython_display_') + # smoke test that it works + df._ipython_display_() + assert hasattr(df['A'], '_ipython_display_') + df['A']._ipython_display_() + # TODO: fix this broken test From 096e2158f0101ecc34223e63ef276acb51d9ccca Mon Sep 17 00:00:00 2001 From: dr-leo Date: Mon, 1 May 2017 22:10:11 +0200 Subject: [PATCH 489/933] DOC: Update docs.ecosystem.api.pandasdmx (#16172) * Update docs.ecosystem.api.pandasdmx * Remove links to data providers * Removed references to data providers --- doc/source/ecosystem.rst | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/doc/source/ecosystem.rst b/doc/source/ecosystem.rst index ee0ea60c6f220..31849fc142aea 100644 --- a/doc/source/ecosystem.rst +++ b/doc/source/ecosystem.rst @@ -173,13 +173,15 @@ This package requires valid credentials for this API (non free). `pandaSDMX `__ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -pandaSDMX is an extensible library to retrieve and acquire statistical data +pandaSDMX is a library to retrieve and acquire statistical data and metadata disseminated in -`SDMX `_ 2.1. This standard is currently supported by -the European statistics office (Eurostat) -and the European Central Bank (ECB). Datasets may be returned as pandas Series -or multi-indexed DataFrames. - +`SDMX `_ 2.1, an ISO-standard +widely used by institutions such as statistics offices, central banks, +and international organisations. pandaSDMX can expose datasets and related +structural metadata including dataflows, code-lists, +and datastructure definitions as pandas Series +or multi-indexed DataFrames. + `fredapi `__ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fredapi is a Python interface to the `Federal Reserve Economic Data (FRED) `__ From 85c4879095353e04574a9809336b973047b7174f Mon Sep 17 00:00:00 2001 From: gfyoung Date: Mon, 1 May 2017 20:01:58 -0400 Subject: [PATCH 490/933] MAINT: Remove self.assertAlmostEqual from testing (#16183) --- .../indexing/test_chaining_and_caching.py | 2 +- pandas/tests/io/json/test_ujson.py | 87 +++++++++++-------- pandas/tests/plotting/common.py | 8 +- pandas/tests/plotting/test_frame.py | 12 +-- pandas/tests/plotting/test_hist_method.py | 4 +- pandas/tests/plotting/test_series.py | 8 +- pandas/tests/series/test_analytics.py | 22 ++--- pandas/tests/series/test_indexing.py | 2 +- pandas/tests/test_nanops.py | 4 +- pandas/tests/test_window.py | 2 +- pandas/tests/tseries/test_timezones.py | 2 +- pandas/util/testing.py | 3 +- 12 files changed, 85 insertions(+), 71 deletions(-) diff --git a/pandas/tests/indexing/test_chaining_and_caching.py b/pandas/tests/indexing/test_chaining_and_caching.py index c39876a8c6e44..c1f5d2941106d 100644 --- a/pandas/tests/indexing/test_chaining_and_caching.py +++ b/pandas/tests/indexing/test_chaining_and_caching.py @@ -32,7 +32,7 @@ def test_slice_consolidate_invalidate_item_cache(self): # Assignment to wrong series df['bb'].iloc[0] = 0.17 df._clear_item_cache() - self.assertAlmostEqual(df['bb'][0], 0.17) + tm.assert_almost_equal(df['bb'][0], 0.17) def test_setitem_cache_updating(self): # GH 5424 diff --git a/pandas/tests/io/json/test_ujson.py b/pandas/tests/io/json/test_ujson.py index b132322952024..b749cd150d445 100644 --- a/pandas/tests/io/json/test_ujson.py +++ b/pandas/tests/io/json/test_ujson.py @@ -738,37 +738,37 @@ def test_numericIntExp(self): def test_numericIntFrcExp(self): input = "1.337E40" output = ujson.decode(input) - self.assertAlmostEqual(output, json.loads(input)) + tm.assert_almost_equal(output, json.loads(input)) def test_decodeNumericIntExpEPLUS(self): input = "1337E+9" output = ujson.decode(input) - self.assertAlmostEqual(output, json.loads(input)) + tm.assert_almost_equal(output, json.loads(input)) def test_decodeNumericIntExpePLUS(self): input = "1.337e+40" output = ujson.decode(input) - self.assertAlmostEqual(output, json.loads(input)) + tm.assert_almost_equal(output, json.loads(input)) def test_decodeNumericIntExpE(self): input = "1337E40" output = ujson.decode(input) - self.assertAlmostEqual(output, json.loads(input)) + tm.assert_almost_equal(output, json.loads(input)) def test_decodeNumericIntExpe(self): input = "1337e40" output = ujson.decode(input) - self.assertAlmostEqual(output, json.loads(input)) + tm.assert_almost_equal(output, json.loads(input)) def test_decodeNumericIntExpEMinus(self): input = "1.337E-4" output = ujson.decode(input) - self.assertAlmostEqual(output, json.loads(input)) + tm.assert_almost_equal(output, json.loads(input)) def test_decodeNumericIntExpeMinus(self): input = "1.337e-4" output = ujson.decode(input) - self.assertAlmostEqual(output, json.loads(input)) + tm.assert_almost_equal(output, json.loads(input)) def test_dumpToFile(self): f = StringIO() @@ -1583,36 +1583,49 @@ def test_decodeArrayFaultyUnicode(self): def test_decodeFloatingPointAdditionalTests(self): places = 15 - self.assertAlmostEqual(-1.1234567893, - ujson.loads("-1.1234567893"), places=places) - self.assertAlmostEqual(-1.234567893, - ujson.loads("-1.234567893"), places=places) - self.assertAlmostEqual(-1.34567893, - ujson.loads("-1.34567893"), places=places) - self.assertAlmostEqual(-1.4567893, - ujson.loads("-1.4567893"), places=places) - self.assertAlmostEqual(-1.567893, - ujson.loads("-1.567893"), places=places) - self.assertAlmostEqual(-1.67893, - ujson.loads("-1.67893"), places=places) - self.assertAlmostEqual(-1.7893, ujson.loads("-1.7893"), places=places) - self.assertAlmostEqual(-1.893, ujson.loads("-1.893"), places=places) - self.assertAlmostEqual(-1.3, ujson.loads("-1.3"), places=places) - - self.assertAlmostEqual(1.1234567893, ujson.loads( - "1.1234567893"), places=places) - self.assertAlmostEqual(1.234567893, ujson.loads( - "1.234567893"), places=places) - self.assertAlmostEqual( - 1.34567893, ujson.loads("1.34567893"), places=places) - self.assertAlmostEqual( - 1.4567893, ujson.loads("1.4567893"), places=places) - self.assertAlmostEqual( - 1.567893, ujson.loads("1.567893"), places=places) - self.assertAlmostEqual(1.67893, ujson.loads("1.67893"), places=places) - self.assertAlmostEqual(1.7893, ujson.loads("1.7893"), places=places) - self.assertAlmostEqual(1.893, ujson.loads("1.893"), places=places) - self.assertAlmostEqual(1.3, ujson.loads("1.3"), places=places) + tm.assert_almost_equal(-1.1234567893, + ujson.loads("-1.1234567893"), + check_less_precise=places) + tm.assert_almost_equal(-1.234567893, + ujson.loads("-1.234567893"), + check_less_precise=places) + tm.assert_almost_equal(-1.34567893, + ujson.loads("-1.34567893"), + check_less_precise=places) + tm.assert_almost_equal(-1.4567893, + ujson.loads("-1.4567893"), + check_less_precise=places) + tm.assert_almost_equal(-1.567893, + ujson.loads("-1.567893"), + check_less_precise=places) + tm.assert_almost_equal(-1.67893, + ujson.loads("-1.67893"), + check_less_precise=places) + tm.assert_almost_equal(-1.7893, ujson.loads("-1.7893"), + check_less_precise=places) + tm.assert_almost_equal(-1.893, ujson.loads("-1.893"), + check_less_precise=places) + tm.assert_almost_equal(-1.3, ujson.loads("-1.3"), + check_less_precise=places) + + tm.assert_almost_equal(1.1234567893, ujson.loads( + "1.1234567893"), check_less_precise=places) + tm.assert_almost_equal(1.234567893, ujson.loads( + "1.234567893"), check_less_precise=places) + tm.assert_almost_equal( + 1.34567893, ujson.loads("1.34567893"), check_less_precise=places) + tm.assert_almost_equal( + 1.4567893, ujson.loads("1.4567893"), check_less_precise=places) + tm.assert_almost_equal( + 1.567893, ujson.loads("1.567893"), check_less_precise=places) + tm.assert_almost_equal(1.67893, ujson.loads("1.67893"), + check_less_precise=places) + tm.assert_almost_equal(1.7893, ujson.loads("1.7893"), + check_less_precise=places) + tm.assert_almost_equal(1.893, ujson.loads("1.893"), + check_less_precise=places) + tm.assert_almost_equal(1.3, ujson.loads("1.3"), + check_less_precise=places) def test_encodeBigSet(self): s = set() diff --git a/pandas/tests/plotting/common.py b/pandas/tests/plotting/common.py index 7d0c39dae6e4b..2c0ac974e9e43 100644 --- a/pandas/tests/plotting/common.py +++ b/pandas/tests/plotting/common.py @@ -292,10 +292,10 @@ def _check_ticks_props(self, axes, xlabelsize=None, xrot=None, for label in labels: if xlabelsize is not None: - self.assertAlmostEqual(label.get_fontsize(), + tm.assert_almost_equal(label.get_fontsize(), xlabelsize) if xrot is not None: - self.assertAlmostEqual(label.get_rotation(), xrot) + tm.assert_almost_equal(label.get_rotation(), xrot) if ylabelsize or yrot: if isinstance(ax.yaxis.get_minor_formatter(), NullFormatter): @@ -306,10 +306,10 @@ def _check_ticks_props(self, axes, xlabelsize=None, xrot=None, for label in labels: if ylabelsize is not None: - self.assertAlmostEqual(label.get_fontsize(), + tm.assert_almost_equal(label.get_fontsize(), ylabelsize) if yrot is not None: - self.assertAlmostEqual(label.get_rotation(), yrot) + tm.assert_almost_equal(label.get_rotation(), yrot) def _check_ax_scales(self, axes, xaxis='linear', yaxis='linear'): """ diff --git a/pandas/tests/plotting/test_frame.py b/pandas/tests/plotting/test_frame.py index 7297e3548b956..03bc477d6f852 100644 --- a/pandas/tests/plotting/test_frame.py +++ b/pandas/tests/plotting/test_frame.py @@ -1036,8 +1036,8 @@ def _check_bar_alignment(self, df, kind='bar', stacked=False, # GH 7498 # compare margins between lim and bar edges - self.assertAlmostEqual(ax_min, min_edge - 0.25) - self.assertAlmostEqual(ax_max, max_edge + 0.25) + tm.assert_almost_equal(ax_min, min_edge - 0.25) + tm.assert_almost_equal(ax_max, max_edge + 0.25) p = ax.patches[0] if kind == 'bar' and (stacked is True or subplots is True): @@ -1061,10 +1061,10 @@ def _check_bar_alignment(self, df, kind='bar', stacked=False, if align == 'center': # Check whether the bar locates on center - self.assertAlmostEqual(axis.get_ticklocs()[0], center) + tm.assert_almost_equal(axis.get_ticklocs()[0], center) elif align == 'edge': # Check whether the bar's edge starts from the tick - self.assertAlmostEqual(axis.get_ticklocs()[0], edge) + tm.assert_almost_equal(axis.get_ticklocs()[0], edge) else: raise ValueError @@ -1314,13 +1314,13 @@ def test_hist_df(self): ax = series.plot.hist(normed=True, cumulative=True, bins=4) # height of last bin (index 5) must be 1.0 rects = [x for x in ax.get_children() if isinstance(x, Rectangle)] - self.assertAlmostEqual(rects[-1].get_height(), 1.0) + tm.assert_almost_equal(rects[-1].get_height(), 1.0) tm.close() ax = series.plot.hist(cumulative=True, bins=4) rects = [x for x in ax.get_children() if isinstance(x, Rectangle)] - self.assertAlmostEqual(rects[-2].get_height(), 100.0) + tm.assert_almost_equal(rects[-2].get_height(), 100.0) tm.close() # if horizontal, yticklabels are rotated diff --git a/pandas/tests/plotting/test_hist_method.py b/pandas/tests/plotting/test_hist_method.py index 39bab59242c22..b75fcd4d8b680 100644 --- a/pandas/tests/plotting/test_hist_method.py +++ b/pandas/tests/plotting/test_hist_method.py @@ -196,7 +196,7 @@ def test_hist_df_legacy(self): ax = ser.hist(normed=True, cumulative=True, bins=4) # height of last bin (index 5) must be 1.0 rects = [x for x in ax.get_children() if isinstance(x, Rectangle)] - self.assertAlmostEqual(rects[-1].get_height(), 1.0) + tm.assert_almost_equal(rects[-1].get_height(), 1.0) tm.close() ax = ser.hist(log=True) @@ -286,7 +286,7 @@ def test_grouped_hist_legacy(self): for ax in axes.ravel(): rects = [x for x in ax.get_children() if isinstance(x, Rectangle)] height = rects[-1].get_height() - self.assertAlmostEqual(height, 1.0) + tm.assert_almost_equal(height, 1.0) self._check_ticks_props(axes, xlabelsize=xf, xrot=xrot, ylabelsize=yf, yrot=yrot) diff --git a/pandas/tests/plotting/test_series.py b/pandas/tests/plotting/test_series.py index d1325c7130d04..91a27142069c7 100644 --- a/pandas/tests/plotting/test_series.py +++ b/pandas/tests/plotting/test_series.py @@ -222,15 +222,15 @@ def test_bar_log(self): ymin = 0.0007943282347242822 if self.mpl_ge_2_0_0 else 0.001 ymax = 0.12589254117941673 if self.mpl_ge_2_0_0 else .10000000000000001 res = ax.get_ylim() - self.assertAlmostEqual(res[0], ymin) - self.assertAlmostEqual(res[1], ymax) + tm.assert_almost_equal(res[0], ymin) + tm.assert_almost_equal(res[1], ymax) tm.assert_numpy_array_equal(ax.yaxis.get_ticklocs(), expected) tm.close() ax = Series([0.1, 0.01, 0.001]).plot(log=True, kind='barh') res = ax.get_xlim() - self.assertAlmostEqual(res[0], ymin) - self.assertAlmostEqual(res[1], ymax) + tm.assert_almost_equal(res[0], ymin) + tm.assert_almost_equal(res[1], ymax) tm.assert_numpy_array_equal(ax.xaxis.get_ticklocs(), expected) @slow diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index 73515c47388ea..71131452393a7 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -123,7 +123,7 @@ def test_median(self): # test with integers, test failure int_ts = Series(np.ones(10, dtype=int), index=lrange(10)) - self.assertAlmostEqual(np.median(int_ts), int_ts.median()) + tm.assert_almost_equal(np.median(int_ts), int_ts.median()) def test_mode(self): # No mode should be found. @@ -298,7 +298,7 @@ def test_kurt(self): labels=[[0, 0, 0, 0, 0, 0], [0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1]]) s = Series(np.random.randn(6), index=index) - self.assertAlmostEqual(s.kurt(), s.kurt(level=0)['bar']) + tm.assert_almost_equal(s.kurt(), s.kurt(level=0)['bar']) # test corner cases, kurt() returns NaN unless there's at least 4 # values @@ -743,10 +743,10 @@ def test_corr(self): import scipy.stats as stats # full overlap - self.assertAlmostEqual(self.ts.corr(self.ts), 1) + tm.assert_almost_equal(self.ts.corr(self.ts), 1) # partial overlap - self.assertAlmostEqual(self.ts[:15].corr(self.ts[5:]), 1) + tm.assert_almost_equal(self.ts[:15].corr(self.ts[5:]), 1) assert isnull(self.ts[:15].corr(self.ts[5:], min_periods=12)) @@ -766,7 +766,7 @@ def test_corr(self): B = tm.makeTimeSeries() result = A.corr(B) expected, _ = stats.pearsonr(A, B) - self.assertAlmostEqual(result, expected) + tm.assert_almost_equal(result, expected) def test_corr_rank(self): tm._skip_if_no_scipy() @@ -780,11 +780,11 @@ def test_corr_rank(self): A[-5:] = A[:5] result = A.corr(B, method='kendall') expected = stats.kendalltau(A, B)[0] - self.assertAlmostEqual(result, expected) + tm.assert_almost_equal(result, expected) result = A.corr(B, method='spearman') expected = stats.spearmanr(A, B)[0] - self.assertAlmostEqual(result, expected) + tm.assert_almost_equal(result, expected) # these methods got rewritten in 0.8 if scipy.__version__ < LooseVersion('0.9'): @@ -800,15 +800,15 @@ def test_corr_rank(self): 1.17258718, -1.06009347, -0.10222060, -0.89076239, 0.89372375]) kexp = 0.4319297 sexp = 0.5853767 - self.assertAlmostEqual(A.corr(B, method='kendall'), kexp) - self.assertAlmostEqual(A.corr(B, method='spearman'), sexp) + tm.assert_almost_equal(A.corr(B, method='kendall'), kexp) + tm.assert_almost_equal(A.corr(B, method='spearman'), sexp) def test_cov(self): # full overlap - self.assertAlmostEqual(self.ts.cov(self.ts), self.ts.std() ** 2) + tm.assert_almost_equal(self.ts.cov(self.ts), self.ts.std() ** 2) # partial overlap - self.assertAlmostEqual(self.ts[:15].cov(self.ts[5:]), + tm.assert_almost_equal(self.ts[:15].cov(self.ts[5:]), self.ts[5:15].std() ** 2) # No overlap diff --git a/pandas/tests/series/test_indexing.py b/pandas/tests/series/test_indexing.py index 9f5d80411ed17..394ae88983faa 100644 --- a/pandas/tests/series/test_indexing.py +++ b/pandas/tests/series/test_indexing.py @@ -558,7 +558,7 @@ def test_getitem_setitem_integers(self): assert s.iloc[0] == s['a'] s.iloc[0] = 5 - self.assertAlmostEqual(s['a'], 5) + tm.assert_almost_equal(s['a'], 5) def test_getitem_box_float64(self): value = self.ts[5] diff --git a/pandas/tests/test_nanops.py b/pandas/tests/test_nanops.py index 35d0198ae06a9..2aa3638b18e9b 100644 --- a/pandas/tests/test_nanops.py +++ b/pandas/tests/test_nanops.py @@ -922,7 +922,7 @@ def test_all_finite(self): def test_ground_truth(self): skew = nanops.nanskew(self.samples) - self.assertAlmostEqual(skew, self.actual_skew) + tm.assert_almost_equal(skew, self.actual_skew) def test_axis(self): samples = np.vstack([self.samples, @@ -972,7 +972,7 @@ def test_all_finite(self): def test_ground_truth(self): kurt = nanops.nankurt(self.samples) - self.assertAlmostEqual(kurt, self.actual_kurt) + tm.assert_almost_equal(kurt, self.actual_kurt) def test_axis(self): samples = np.vstack([self.samples, diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index 55be6302036f1..d3e427dfb4c7b 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -1406,7 +1406,7 @@ def get_result(obj, window, min_periods=None, freq=None, center=False): trunc_series = self.series[::2].truncate(prev_date, last_date) trunc_frame = self.frame[::2].truncate(prev_date, last_date) - self.assertAlmostEqual(series_result[-1], + tm.assert_almost_equal(series_result[-1], static_comp(trunc_series)) tm.assert_series_equal(frame_result.xs(last_date), diff --git a/pandas/tests/tseries/test_timezones.py b/pandas/tests/tseries/test_timezones.py index 0c8aaf77aec12..10776381974de 100644 --- a/pandas/tests/tseries/test_timezones.py +++ b/pandas/tests/tseries/test_timezones.py @@ -729,7 +729,7 @@ def test_string_index_alias_tz_aware(self): ts = Series(np.random.randn(len(rng)), index=rng) result = ts['1/3/2000'] - self.assertAlmostEqual(result, ts[2]) + tm.assert_almost_equal(result, ts[2]) def test_fixed_offset(self): dates = [datetime(2000, 1, 1, tzinfo=fixed_off), diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 3f07937a6e552..d0c56e9974a3f 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -140,7 +140,8 @@ def round_trip_pickle(obj, path=None): def assert_almost_equal(left, right, check_exact=False, check_dtype='equiv', check_less_precise=False, **kwargs): - """Check that left and right Index are equal. + """ + Check that the left and right objects are approximately equal. Parameters ---------- From 86df2c100a4c6021a00c2cda95ead6c5428e2795 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Mon, 1 May 2017 21:22:24 -0400 Subject: [PATCH 491/933] DEPR: deprecate pandas.api.types.is_sequence (#16189) xref #16042 --- doc/source/api.rst | 7 +++---- doc/source/whatsnew/v0.20.0.txt | 2 +- pandas/core/dtypes/api.py | 5 ++--- pandas/tests/api/test_types.py | 6 +++--- 4 files changed, 9 insertions(+), 11 deletions(-) diff --git a/doc/source/api.rst b/doc/source/api.rst index 7102258318b5b..491bec3c83f61 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -1969,15 +1969,14 @@ Dtype introspection Iterable introspection +.. autosummary:: + :toctree: generated/ + api.types.is_dict_like api.types.is_file_like api.types.is_list_like api.types.is_named_tuple api.types.is_iterator - api.types.is_sequence - -.. autosummary:: - :toctree: generated/ Scalar introspection diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 6e4756c3c5245..cdad8094e8dd6 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -1521,7 +1521,7 @@ Other Deprecations * ``pd.match()``, is removed. * ``pd.groupby()``, replaced by using the ``.groupby()`` method directly on a ``Series/DataFrame`` * ``pd.get_store()``, replaced by a direct call to ``pd.HDFStore(...)`` -- ``is_any_int_dtype`` and ``is_floating_dtype`` are deprecated from ``pandas.api.types`` (:issue:`16042`) +- ``is_any_int_dtype``, ``is_floating_dtype``, and ``is_sequence`` are deprecated from ``pandas.api.types`` (:issue:`16042`) .. _whatsnew_0200.prior_deprecations: diff --git a/pandas/core/dtypes/api.py b/pandas/core/dtypes/api.py index 242c62125664c..a2180ecc4632f 100644 --- a/pandas/core/dtypes/api.py +++ b/pandas/core/dtypes/api.py @@ -57,14 +57,13 @@ is_file_like, is_list_like, is_hashable, - is_named_tuple, - is_sequence) + is_named_tuple) # deprecated m = sys.modules['pandas.core.dtypes.api'] -for t in ['is_any_int_dtype', 'is_floating_dtype']: +for t in ['is_any_int_dtype', 'is_floating_dtype', 'is_sequence']: def outer(t=t): diff --git a/pandas/tests/api/test_types.py b/pandas/tests/api/test_types.py index b9198c42e2eff..834857b87960c 100644 --- a/pandas/tests/api/test_types.py +++ b/pandas/tests/api/test_types.py @@ -31,9 +31,9 @@ class TestTypes(Base, tm.TestCase): 'is_re', 'is_re_compilable', 'is_dict_like', 'is_iterator', 'is_file_like', 'is_list_like', 'is_hashable', - 'is_named_tuple', 'is_sequence', + 'is_named_tuple', 'pandas_dtype', 'union_categoricals', 'infer_dtype'] - deprecated = ['is_any_int_dtype', 'is_floating_dtype'] + deprecated = ['is_any_int_dtype', 'is_floating_dtype', 'is_sequence'] dtypes = ['CategoricalDtype', 'DatetimeTZDtype', 'PeriodDtype', 'IntervalDtype'] @@ -90,7 +90,7 @@ def test_removed_from_core_common(self): def test_deprecated_from_api_types(self): - for t in ['is_any_int_dtype', 'is_floating_dtype']: + for t in self.deprecated: with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): getattr(types, t)(1) From 217864e3902592b9723a1070f1bbbb203a1759f4 Mon Sep 17 00:00:00 2001 From: Julien Marrec Date: Tue, 2 May 2017 03:46:44 +0200 Subject: [PATCH 492/933] ENH: Added more options for formats.style.bar (#14757) * ENH: Added more options for formats.style.bar You can now have the bar be centered on zero or midpoint value (in addition to the already existing way of having the min value at the left side of the cell) Fixed line too long `git diff upstream/master | flake8 --diff now passes` Change the tests to match new float formats. Added documentation on the new df.style.bar options for align and Colors in the documentation. Fix versionadded ENH: Added more options for formats.style.bar You can now have the bar be centered on zero or midpoint value (in addition to the already existing way of having the min value at the left side of the cell) Fixed line too long `git diff upstream/master | flake8 --diff now passes` Change the tests to match new float formats. Added documentation on the new df.style.bar options for align and Colors in the documentation. Fix versionadded Check for bad align value and raise. Wrote test for it too Added a simple example before the parametric one Added a whatsnew note Replaced 'self.assertEqual(left, rigth)' by 'assert left == right' like @TomAugspurger asked rebased * small doc fixes --- doc/source/style.ipynb | 223 +++++++++++++------------- doc/source/whatsnew/v0.20.0.txt | 2 + pandas/io/formats/style.py | 158 ++++++++++++++++-- pandas/tests/io/formats/test_style.py | 116 +++++++++++++- 4 files changed, 377 insertions(+), 122 deletions(-) diff --git a/doc/source/style.ipynb b/doc/source/style.ipynb index 2cacbb19d81bb..427b18b988aef 100644 --- a/doc/source/style.ipynb +++ b/doc/source/style.ipynb @@ -2,7 +2,9 @@ "cells": [ { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "collapsed": true + }, "source": [ "# Styling\n", "\n", @@ -87,9 +89,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "df.style" @@ -107,9 +107,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "df.style.highlight_null().render().split('\\n')[:10]" @@ -160,9 +158,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "s = df.style.applymap(color_negative_red)\n", @@ -208,9 +204,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "df.style.apply(highlight_max)" @@ -234,9 +228,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "df.style.\\\n", @@ -290,9 +282,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "df.style.apply(highlight_max, color='darkorange', axis=None)" @@ -340,9 +330,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "df.style.apply(highlight_max, subset=['B', 'C', 'D'])" @@ -358,9 +346,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "df.style.applymap(color_negative_red,\n", @@ -393,9 +379,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "df.style.format(\"{:.2%}\")" @@ -411,9 +395,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "df.style.format({'B': \"{:0<4.0f}\", 'D': '{:+.2f}'})" @@ -429,9 +411,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "df.style.format({\"B\": lambda x: \"±{:.2f}\".format(abs(x))})" @@ -454,9 +434,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "df.style.highlight_null(null_color='red')" @@ -472,9 +450,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "import seaborn as sns\n", @@ -495,9 +471,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "# Uses the full color range\n", @@ -507,9 +481,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "# Compress the color range\n", @@ -523,67 +495,128 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "You can include \"bar charts\" in your DataFrame." + "There's also `.highlight_min` and `.highlight_max`." ] }, { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ - "df.style.bar(subset=['A', 'B'], color='#d65f5f')" + "df.style.highlight_max(axis=0)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "There's also `.highlight_min` and `.highlight_max`." + "Use `Styler.set_properties` when the style doesn't actually depend on the values." ] }, { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ - "df.style.highlight_max(axis=0)" + "df.style.set_properties(**{'background-color': 'black',\n", + " 'color': 'lawngreen',\n", + " 'border-color': 'white'})" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Bar charts" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can include \"bar charts\" in your DataFrame." ] }, { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ - "df.style.highlight_min(axis=0)" + "df.style.bar(subset=['A', 'B'], color='#d65f5f')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Use `Styler.set_properties` when the style doesn't actually depend on the values." + "New in version 0.20.0 is the ability to customize further the bar chart: You can now have the `df.style.bar` be centered on zero or midpoint value (in addition to the already existing way of having the min value at the left side of the cell), and you can pass a list of `[color_negative, color_positive]`.\n", + "\n", + "Here's how you can change the above with the new `align='mid'` option:" ] }, { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ - "df.style.set_properties(**{'background-color': 'black',\n", - " 'color': 'lawngreen',\n", - " 'border-color': 'white'})" + "df.style.bar(subset=['A', 'B'], align='mid', color=['#d65f5f', '#5fba7d'])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The following example aims to give a highlight of the behavior of the new align options:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "from IPython.display import HTML\n", + "\n", + "# Test series\n", + "test1 = pd.Series([-100,-60,-30,-20], name='All Negative')\n", + "test2 = pd.Series([10,20,50,100], name='All Positive')\n", + "test3 = pd.Series([-10,-5,0,90], name='Both Pos and Neg')\n", + "\n", + "head = \"\"\"\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + "\"\"\"\n", + "\n", + "aligns = ['left','zero','mid']\n", + "for align in aligns:\n", + " row = \"\".format(align)\n", + " for serie in [test1,test2,test3]:\n", + " s = serie.copy()\n", + " s.name=''\n", + " row += \"\".format(s.to_frame().style.bar(align=align, \n", + " color=['#d65f5f', '#5fba7d'], \n", + " width=100).render()) #testn['width']\n", + " row += ''\n", + " head += row\n", + " \n", + "head+= \"\"\"\n", + "\n", + "
AlignAll NegativeAll PositiveBoth Neg and Pos
{}{}
\"\"\"\n", + " \n", + "\n", + "HTML(head)" ] }, { @@ -603,9 +636,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "df2 = -df\n", @@ -616,9 +647,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "style2 = df2.style\n", @@ -671,9 +700,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "with pd.option_context('display.precision', 2):\n", @@ -693,9 +720,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "df.style\\\n", @@ -728,9 +753,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "df.style.set_caption('Colormaps, with a caption.')\\\n", @@ -756,9 +779,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "from IPython.display import HTML\n", @@ -854,9 +875,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "from IPython.html import widgets\n", @@ -892,9 +911,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "np.random.seed(25)\n", @@ -993,9 +1010,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "%mkdir templates" @@ -1012,9 +1027,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "%%file templates/myhtml.tpl\n", @@ -1065,9 +1078,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "MyStyler(df)" @@ -1083,9 +1094,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "HTML(MyStyler(df).render(table_title=\"Extending Example\"))" @@ -1101,9 +1110,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "EasyStyler = Styler.from_custom_template(\"templates\", \"myhtml.tpl\")\n", @@ -1120,9 +1127,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "with open(\"template_structure.html\") as f:\n", diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index cdad8094e8dd6..3332bfcd65d50 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -524,6 +524,8 @@ Other Enhancements - ``parallel_coordinates()`` has gained a ``sort_labels`` keyword arg that sorts class labels and the colours assigned to them (:issue:`15908`) - Options added to allow one to turn on/off using ``bottleneck`` and ``numexpr``, see :ref:`here ` (:issue:`16157`) +- ``DataFrame.style.bar()`` now accepts two more options to further customize the bar chart. Bar alignment is set with ``align='left'|'mid'|'zero'``, the default is "left", which is backward compatible; You can now pass a list of ``color=[color_negative, color_positive]``. (:issue:`14757`) + .. _ISO 8601 duration: https://en.wikipedia.org/wiki/ISO_8601#Durations diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 83062e7d764cd..f1ff2966dca48 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -23,6 +23,7 @@ import numpy as np import pandas as pd +from pandas.api.types import is_list_like from pandas.compat import range from pandas.core.config import get_option from pandas.core.generic import _shared_docs @@ -868,30 +869,141 @@ def set_properties(self, subset=None, **kwargs): return self.applymap(f, subset=subset) @staticmethod - def _bar(s, color, width): + def _bar_left(s, color, width, base): + """ + The minimum value is aligned at the left of the cell + Parameters + ---------- + color: 2-tuple/list, of [``color_negative``, ``color_positive``] + width: float + A number between 0 or 100. The largest value will cover ``width`` + percent of the cell's width + base: str + The base css format of the cell, e.g.: + ``base = 'width: 10em; height: 80%;'`` + Returns + ------- + self : Styler + """ normed = width * (s - s.min()) / (s.max() - s.min()) - - base = 'width: 10em; height: 80%;' - attrs = (base + 'background: linear-gradient(90deg,{c} {w}%, ' + zero_normed = width * (0 - s.min()) / (s.max() - s.min()) + attrs = (base + 'background: linear-gradient(90deg,{c} {w:.1f}%, ' 'transparent 0%)') - return [attrs.format(c=color, w=x) if x != 0 else base for x in normed] - def bar(self, subset=None, axis=0, color='#d65f5f', width=100): + return [base if x == 0 else attrs.format(c=color[0], w=x) + if x < zero_normed + else attrs.format(c=color[1], w=x) if x >= zero_normed + else base for x in normed] + + @staticmethod + def _bar_center_zero(s, color, width, base): + """ + Creates a bar chart where the zero is centered in the cell + Parameters + ---------- + color: 2-tuple/list, of [``color_negative``, ``color_positive``] + width: float + A number between 0 or 100. The largest value will cover ``width`` + percent of the cell's width + base: str + The base css format of the cell, e.g.: + ``base = 'width: 10em; height: 80%;'`` + Returns + ------- + self : Styler + """ + + # Either the min or the max should reach the edge + # (50%, centered on zero) + m = max(abs(s.min()), abs(s.max())) + + normed = s * 50 * width / (100.0 * m) + + attrs_neg = (base + 'background: linear-gradient(90deg, transparent 0%' + ', transparent {w:.1f}%, {c} {w:.1f}%, ' + '{c} 50%, transparent 50%)') + + attrs_pos = (base + 'background: linear-gradient(90deg, transparent 0%' + ', transparent 50%, {c} 50%, {c} {w:.1f}%, ' + 'transparent {w:.1f}%)') + + return [attrs_pos.format(c=color[1], w=(50 + x)) if x >= 0 + else attrs_neg.format(c=color[0], w=(50 + x)) + for x in normed] + + @staticmethod + def _bar_center_mid(s, color, width, base): + """ + Creates a bar chart where the midpoint is centered in the cell + Parameters + ---------- + color: 2-tuple/list, of [``color_negative``, ``color_positive``] + width: float + A number between 0 or 100. The largest value will cover ``width`` + percent of the cell's width + base: str + The base css format of the cell, e.g.: + ``base = 'width: 10em; height: 80%;'`` + Returns + ------- + self : Styler + """ + + if s.min() >= 0: + # In this case, we place the zero at the left, and the max() should + # be at width + zero = 0.0 + slope = width / s.max() + elif s.max() <= 0: + # In this case, we place the zero at the right, and the min() + # should be at 100-width + zero = 100.0 + slope = width / -s.min() + else: + slope = width / (s.max() - s.min()) + zero = (100.0 + width) / 2.0 - slope * s.max() + + normed = zero + slope * s + + attrs_neg = (base + 'background: linear-gradient(90deg, transparent 0%' + ', transparent {w:.1f}%, {c} {w:.1f}%, ' + '{c} {zero:.1f}%, transparent {zero:.1f}%)') + + attrs_pos = (base + 'background: linear-gradient(90deg, transparent 0%' + ', transparent {zero:.1f}%, {c} {zero:.1f}%, ' + '{c} {w:.1f}%, transparent {w:.1f}%)') + + return [attrs_pos.format(c=color[1], zero=zero, w=x) if x > zero + else attrs_neg.format(c=color[0], zero=zero, w=x) + for x in normed] + + def bar(self, subset=None, axis=0, color='#d65f5f', width=100, + align='left'): """ Color the background ``color`` proptional to the values in each column. Excludes non-numeric data by default. - .. versionadded:: 0.17.1 - Parameters ---------- subset: IndexSlice, default None a valid slice for ``data`` to limit the style application to axis: int - color: str + color: str or 2-tuple/list + If a str is passed, the color is the same for both + negative and positive numbers. If 2-tuple/list is used, the + first element is the color_negative and the second is the + color_positive (eg: ['#d65f5f', '#5fba7d']) width: float A number between 0 or 100. The largest value will cover ``width`` percent of the cell's width + align : {'left', 'zero',' mid'}, default 'left' + - 'left' : the min value starts at the left of the cell + - 'zero' : a value of zero is located at the center of the cell + - 'mid' : the center of the cell is at (max-min)/2, or + if values are all negative (positive) the zero is aligned + at the right (left) of the cell + + .. versionadded:: 0.20.0 Returns ------- @@ -899,8 +1011,32 @@ def bar(self, subset=None, axis=0, color='#d65f5f', width=100): """ subset = _maybe_numeric_slice(self.data, subset) subset = _non_reducing_slice(subset) - self.apply(self._bar, subset=subset, axis=axis, color=color, - width=width) + + base = 'width: 10em; height: 80%;' + + if not(is_list_like(color)): + color = [color, color] + elif len(color) == 1: + color = [color[0], color[0]] + elif len(color) > 2: + msg = ("Must pass `color` as string or a list-like" + " of length 2: [`color_negative`, `color_positive`]\n" + "(eg: color=['#d65f5f', '#5fba7d'])") + raise ValueError(msg) + + if align == 'left': + self.apply(self._bar_left, subset=subset, axis=axis, color=color, + width=width, base=base) + elif align == 'zero': + self.apply(self._bar_center_zero, subset=subset, axis=axis, + color=color, width=width, base=base) + elif align == 'mid': + self.apply(self._bar_center_mid, subset=subset, axis=axis, + color=color, width=width, base=base) + else: + msg = ("`align` must be one of {'left', 'zero',' mid'}") + raise ValueError(msg) + return self def highlight_max(self, subset=None, color='yellow', axis=0): diff --git a/pandas/tests/io/formats/test_style.py b/pandas/tests/io/formats/test_style.py index f421c0f8e6d69..9219ac1c9c26b 100644 --- a/pandas/tests/io/formats/test_style.py +++ b/pandas/tests/io/formats/test_style.py @@ -266,7 +266,7 @@ def test_empty(self): {'props': [['', '']], 'selector': 'row1_col0'}] assert result == expected - def test_bar(self): + def test_bar_align_left(self): df = pd.DataFrame({'A': [0, 1, 2]}) result = df.style.bar()._compute().ctx expected = { @@ -299,7 +299,7 @@ def test_bar(self): result = df.style.bar(color='red', width=50)._compute().ctx assert result == expected - def test_bar_0points(self): + def test_bar_align_left_0points(self): df = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) result = df.style.bar()._compute().ctx expected = {(0, 0): ['width: 10em', ' height: 80%'], @@ -349,6 +349,118 @@ def test_bar_0points(self): ', transparent 0%)']} assert result == expected + def test_bar_align_mid_pos_and_neg(self): + df = pd.DataFrame({'A': [-10, 0, 20, 90]}) + + result = df.style.bar(align='mid', color=[ + '#d65f5f', '#5fba7d'])._compute().ctx + + expected = {(0, 0): ['width: 10em', ' height: 80%', + 'background: linear-gradient(90deg, ' + 'transparent 0%, transparent 0.0%, #d65f5f 0.0%, ' + '#d65f5f 10.0%, transparent 10.0%)'], + (1, 0): ['width: 10em', ' height: 80%', + 'background: linear-gradient(90deg, ' + 'transparent 0%, transparent 10.0%, ' + '#d65f5f 10.0%, #d65f5f 10.0%, ' + 'transparent 10.0%)'], + (2, 0): ['width: 10em', ' height: 80%', + 'background: linear-gradient(90deg, ' + 'transparent 0%, transparent 10.0%, #5fba7d 10.0%' + ', #5fba7d 30.0%, transparent 30.0%)'], + (3, 0): ['width: 10em', ' height: 80%', + 'background: linear-gradient(90deg, ' + 'transparent 0%, transparent 10.0%, ' + '#5fba7d 10.0%, #5fba7d 100.0%, ' + 'transparent 100.0%)']} + + self.assertEqual(result, expected) + + def test_bar_align_mid_all_pos(self): + df = pd.DataFrame({'A': [10, 20, 50, 100]}) + + result = df.style.bar(align='mid', color=[ + '#d65f5f', '#5fba7d'])._compute().ctx + + expected = {(0, 0): ['width: 10em', ' height: 80%', + 'background: linear-gradient(90deg, ' + 'transparent 0%, transparent 0.0%, #5fba7d 0.0%, ' + '#5fba7d 10.0%, transparent 10.0%)'], + (1, 0): ['width: 10em', ' height: 80%', + 'background: linear-gradient(90deg, ' + 'transparent 0%, transparent 0.0%, #5fba7d 0.0%, ' + '#5fba7d 20.0%, transparent 20.0%)'], + (2, 0): ['width: 10em', ' height: 80%', + 'background: linear-gradient(90deg, ' + 'transparent 0%, transparent 0.0%, #5fba7d 0.0%, ' + '#5fba7d 50.0%, transparent 50.0%)'], + (3, 0): ['width: 10em', ' height: 80%', + 'background: linear-gradient(90deg, ' + 'transparent 0%, transparent 0.0%, #5fba7d 0.0%, ' + '#5fba7d 100.0%, transparent 100.0%)']} + + self.assertEqual(result, expected) + + def test_bar_align_mid_all_neg(self): + df = pd.DataFrame({'A': [-100, -60, -30, -20]}) + + result = df.style.bar(align='mid', color=[ + '#d65f5f', '#5fba7d'])._compute().ctx + + expected = {(0, 0): ['width: 10em', ' height: 80%', + 'background: linear-gradient(90deg, ' + 'transparent 0%, transparent 0.0%, ' + '#d65f5f 0.0%, #d65f5f 100.0%, ' + 'transparent 100.0%)'], + (1, 0): ['width: 10em', ' height: 80%', + 'background: linear-gradient(90deg, ' + 'transparent 0%, transparent 40.0%, ' + '#d65f5f 40.0%, #d65f5f 100.0%, ' + 'transparent 100.0%)'], + (2, 0): ['width: 10em', ' height: 80%', + 'background: linear-gradient(90deg, ' + 'transparent 0%, transparent 70.0%, ' + '#d65f5f 70.0%, #d65f5f 100.0%, ' + 'transparent 100.0%)'], + (3, 0): ['width: 10em', ' height: 80%', + 'background: linear-gradient(90deg, ' + 'transparent 0%, transparent 80.0%, ' + '#d65f5f 80.0%, #d65f5f 100.0%, ' + 'transparent 100.0%)']} + assert result == expected + + def test_bar_align_zero_pos_and_neg(self): + # See https://github.com/pandas-dev/pandas/pull/14757 + df = pd.DataFrame({'A': [-10, 0, 20, 90]}) + + result = df.style.bar(align='zero', color=[ + '#d65f5f', '#5fba7d'], width=90)._compute().ctx + + expected = {(0, 0): ['width: 10em', ' height: 80%', + 'background: linear-gradient(90deg, ' + 'transparent 0%, transparent 45.0%, ' + '#d65f5f 45.0%, #d65f5f 50%, ' + 'transparent 50%)'], + (1, 0): ['width: 10em', ' height: 80%', + 'background: linear-gradient(90deg, ' + 'transparent 0%, transparent 50%, ' + '#5fba7d 50%, #5fba7d 50.0%, ' + 'transparent 50.0%)'], + (2, 0): ['width: 10em', ' height: 80%', + 'background: linear-gradient(90deg, ' + 'transparent 0%, transparent 50%, #5fba7d 50%, ' + '#5fba7d 60.0%, transparent 60.0%)'], + (3, 0): ['width: 10em', ' height: 80%', + 'background: linear-gradient(90deg, ' + 'transparent 0%, transparent 50%, #5fba7d 50%, ' + '#5fba7d 95.0%, transparent 95.0%)']} + assert result == expected + + def test_bar_bad_align_raises(self): + df = pd.DataFrame({'A': [-100, -60, -30, -20]}) + with pytest.raises(ValueError): + df.style.bar(align='poorly', color=['#d65f5f', '#5fba7d']) + def test_highlight_null(self, null_color='red'): df = pd.DataFrame({'A': [0, np.nan]}) result = df.style.highlight_null()._compute().ctx From e19a0ffeb5d64dbfb05262435f27c80f10bc582d Mon Sep 17 00:00:00 2001 From: gfyoung Date: Tue, 2 May 2017 06:24:16 -0400 Subject: [PATCH 493/933] MAINT: Remove vestigial self.assert* (#16190) Remove all remaining self.assert* method calls originating from unittest. Any that are left are calls to methods directly defined in the test class or a higher derived pandas test class. --- pandas/tests/frame/test_repr_info.py | 10 +- pandas/tests/io/formats/test_style.py | 4 +- pandas/tests/test_nanops.py | 8 +- pandas/tests/test_panel4d.py | 134 ++++++++++++-------------- 4 files changed, 71 insertions(+), 85 deletions(-) diff --git a/pandas/tests/frame/test_repr_info.py b/pandas/tests/frame/test_repr_info.py index 74301b918bd02..0300c53e086cd 100644 --- a/pandas/tests/frame/test_repr_info.py +++ b/pandas/tests/frame/test_repr_info.py @@ -331,13 +331,13 @@ def test_info_memory_usage(self): res = buf.getvalue().splitlines() assert re.match(r"memory usage: [^+]+$", res[-1]) - self.assertGreater(df_with_object_index.memory_usage(index=True, - deep=True).sum(), - df_with_object_index.memory_usage(index=True).sum()) + assert (df_with_object_index.memory_usage( + index=True, deep=True).sum() > df_with_object_index.memory_usage( + index=True).sum()) df_object = pd.DataFrame({'a': ['a']}) - self.assertGreater(df_object.memory_usage(deep=True).sum(), - df_object.memory_usage().sum()) + assert (df_object.memory_usage(deep=True).sum() > + df_object.memory_usage().sum()) # Test a DataFrame with duplicate columns dtypes = ['int64', 'int64', 'int64', 'float64'] diff --git a/pandas/tests/io/formats/test_style.py b/pandas/tests/io/formats/test_style.py index 9219ac1c9c26b..1cd338479bd0c 100644 --- a/pandas/tests/io/formats/test_style.py +++ b/pandas/tests/io/formats/test_style.py @@ -374,7 +374,7 @@ def test_bar_align_mid_pos_and_neg(self): '#5fba7d 10.0%, #5fba7d 100.0%, ' 'transparent 100.0%)']} - self.assertEqual(result, expected) + assert result == expected def test_bar_align_mid_all_pos(self): df = pd.DataFrame({'A': [10, 20, 50, 100]}) @@ -399,7 +399,7 @@ def test_bar_align_mid_all_pos(self): 'transparent 0%, transparent 0.0%, #5fba7d 0.0%, ' '#5fba7d 100.0%, transparent 100.0%)']} - self.assertEqual(result, expected) + assert result == expected def test_bar_align_mid_all_neg(self): df = pd.DataFrame({'A': [-100, -60, -30, -20]}) diff --git a/pandas/tests/test_nanops.py b/pandas/tests/test_nanops.py index 2aa3638b18e9b..efa647fd91a0d 100644 --- a/pandas/tests/test_nanops.py +++ b/pandas/tests/test_nanops.py @@ -914,11 +914,11 @@ def test_constant_series(self): def test_all_finite(self): alpha, beta = 0.3, 0.1 left_tailed = self.prng.beta(alpha, beta, size=100) - self.assertLess(nanops.nanskew(left_tailed), 0) + assert nanops.nanskew(left_tailed) < 0 alpha, beta = 0.1, 0.3 right_tailed = self.prng.beta(alpha, beta, size=100) - self.assertGreater(nanops.nanskew(right_tailed), 0) + assert nanops.nanskew(right_tailed) > 0 def test_ground_truth(self): skew = nanops.nanskew(self.samples) @@ -964,11 +964,11 @@ def test_constant_series(self): def test_all_finite(self): alpha, beta = 0.3, 0.1 left_tailed = self.prng.beta(alpha, beta, size=100) - self.assertLess(nanops.nankurt(left_tailed), 0) + assert nanops.nankurt(left_tailed) < 0 alpha, beta = 0.1, 0.3 right_tailed = self.prng.beta(alpha, beta, size=100) - self.assertGreater(nanops.nankurt(right_tailed), 0) + assert nanops.nankurt(right_tailed) > 0 def test_ground_truth(self): kurt = nanops.nankurt(self.samples) diff --git a/pandas/tests/test_panel4d.py b/pandas/tests/test_panel4d.py index 041e36848e1d8..1b611309aece0 100644 --- a/pandas/tests/test_panel4d.py +++ b/pandas/tests/test_panel4d.py @@ -13,10 +13,7 @@ from pandas.core.series import remove_na from pandas.tseries.offsets import BDay -from pandas.util.testing import (assert_panel_equal, - assert_panel4d_equal, - assert_frame_equal, - assert_series_equal, +from pandas.util.testing import (assert_frame_equal, assert_series_equal, assert_almost_equal) import pandas.util.testing as tm @@ -133,7 +130,7 @@ def wrapper(x): for i in range(obj.ndim): result = f(axis=i, skipna=False) expected = obj.apply(wrapper, axis=i) - assert_panel_equal(result, expected) + tm.assert_panel_equal(result, expected) else: skipna_wrapper = alternative wrapper = alternative @@ -143,26 +140,18 @@ def wrapper(x): result = f(axis=i) if not tm._incompat_bottleneck_version(name): expected = obj.apply(skipna_wrapper, axis=i) - assert_panel_equal(result, expected) + tm.assert_panel_equal(result, expected) pytest.raises(Exception, f, axis=obj.ndim) class SafeForSparse(object): - @classmethod - def assert_panel_equal(cls, x, y): - assert_panel_equal(x, y) - - @classmethod - def assert_panel4d_equal(cls, x, y): - assert_panel4d_equal(x, y) - def test_get_axis(self): - assert(self.panel4d._get_axis(0) is self.panel4d.labels) - assert(self.panel4d._get_axis(1) is self.panel4d.items) - assert(self.panel4d._get_axis(2) is self.panel4d.major_axis) - assert(self.panel4d._get_axis(3) is self.panel4d.minor_axis) + assert self.panel4d._get_axis(0) is self.panel4d.labels + assert self.panel4d._get_axis(1) is self.panel4d.items + assert self.panel4d._get_axis(2) is self.panel4d.major_axis + assert self.panel4d._get_axis(3) is self.panel4d.minor_axis def test_set_axis(self): with catch_warnings(record=True): @@ -226,7 +215,7 @@ def test_arith(self): @staticmethod def _test_op(panel4d, op): result = op(panel4d, 1) - assert_panel_equal(result['l1'], op(panel4d['l1'], 1)) + tm.assert_panel_equal(result['l1'], op(panel4d['l1'], 1)) def test_keys(self): tm.equalContents(list(self.panel4d.keys()), self.panel4d.labels) @@ -240,11 +229,11 @@ def test_iteritems(self): def test_combinePanel4d(self): with catch_warnings(record=True): result = self.panel4d.add(self.panel4d) - self.assert_panel4d_equal(result, self.panel4d * 2) + tm.assert_panel4d_equal(result, self.panel4d * 2) def test_neg(self): with catch_warnings(record=True): - self.assert_panel4d_equal(-self.panel4d, self.panel4d * -1) + tm.assert_panel4d_equal(-self.panel4d, self.panel4d * -1) def test_select(self): with catch_warnings(record=True): @@ -254,28 +243,28 @@ def test_select(self): # select labels result = p.select(lambda x: x in ('l1', 'l3'), axis='labels') expected = p.reindex(labels=['l1', 'l3']) - self.assert_panel4d_equal(result, expected) + tm.assert_panel4d_equal(result, expected) # select items result = p.select(lambda x: x in ('ItemA', 'ItemC'), axis='items') expected = p.reindex(items=['ItemA', 'ItemC']) - self.assert_panel4d_equal(result, expected) + tm.assert_panel4d_equal(result, expected) # select major_axis result = p.select(lambda x: x >= datetime(2000, 1, 15), axis='major') new_major = p.major_axis[p.major_axis >= datetime(2000, 1, 15)] expected = p.reindex(major=new_major) - self.assert_panel4d_equal(result, expected) + tm.assert_panel4d_equal(result, expected) # select minor_axis result = p.select(lambda x: x in ('D', 'A'), axis=3) expected = p.reindex(minor=['A', 'D']) - self.assert_panel4d_equal(result, expected) + tm.assert_panel4d_equal(result, expected) # corner case, empty thing result = p.select(lambda x: x in ('foo',), axis='items') - self.assert_panel4d_equal(result, p.reindex(items=[])) + tm.assert_panel4d_equal(result, p.reindex(items=[])) def test_get_value(self): @@ -291,12 +280,12 @@ def test_abs(self): with catch_warnings(record=True): result = self.panel4d.abs() expected = np.abs(self.panel4d) - self.assert_panel4d_equal(result, expected) + tm.assert_panel4d_equal(result, expected) p = self.panel4d['l1'] result = p.abs() expected = np.abs(p) - assert_panel_equal(result, expected) + tm.assert_panel_equal(result, expected) df = p['ItemA'] result = df.abs() @@ -314,7 +303,7 @@ def test_delitem_and_pop(self): with catch_warnings(record=True): expected = self.panel4d['l2'] result = self.panel4d.pop('l2') - assert_panel_equal(expected, result) + tm.assert_panel_equal(expected, result) assert 'l2' not in self.panel4d.labels del self.panel4d['l3'] @@ -367,9 +356,9 @@ def test_setitem(self): p2 = self.panel4d['l4'] - assert_panel_equal(p, p2.reindex(items=p.items, - major_axis=p.major_axis, - minor_axis=p.minor_axis)) + tm.assert_panel_equal(p, p2.reindex(items=p.items, + major_axis=p.major_axis, + minor_axis=p.minor_axis)) # scalar self.panel4d['lG'] = 1 @@ -534,34 +523,34 @@ def test_getitem_fancy_labels(self): cols = ['D', 'C', 'F'] # all 4 specified - assert_panel4d_equal(panel4d.loc[labels, items, dates, cols], - panel4d.reindex(labels=labels, items=items, - major=dates, minor=cols)) + tm.assert_panel4d_equal(panel4d.loc[labels, items, dates, cols], + panel4d.reindex(labels=labels, items=items, + major=dates, minor=cols)) # 3 specified - assert_panel4d_equal(panel4d.loc[:, items, dates, cols], - panel4d.reindex(items=items, major=dates, - minor=cols)) + tm.assert_panel4d_equal(panel4d.loc[:, items, dates, cols], + panel4d.reindex(items=items, major=dates, + minor=cols)) # 2 specified - assert_panel4d_equal(panel4d.loc[:, :, dates, cols], - panel4d.reindex(major=dates, minor=cols)) + tm.assert_panel4d_equal(panel4d.loc[:, :, dates, cols], + panel4d.reindex(major=dates, minor=cols)) - assert_panel4d_equal(panel4d.loc[:, items, :, cols], - panel4d.reindex(items=items, minor=cols)) + tm.assert_panel4d_equal(panel4d.loc[:, items, :, cols], + panel4d.reindex(items=items, minor=cols)) - assert_panel4d_equal(panel4d.loc[:, items, dates, :], - panel4d.reindex(items=items, major=dates)) + tm.assert_panel4d_equal(panel4d.loc[:, items, dates, :], + panel4d.reindex(items=items, major=dates)) # only 1 - assert_panel4d_equal(panel4d.loc[:, items, :, :], - panel4d.reindex(items=items)) + tm.assert_panel4d_equal(panel4d.loc[:, items, :, :], + panel4d.reindex(items=items)) - assert_panel4d_equal(panel4d.loc[:, :, dates, :], - panel4d.reindex(major=dates)) + tm.assert_panel4d_equal(panel4d.loc[:, :, dates, :], + panel4d.reindex(major=dates)) - assert_panel4d_equal(panel4d.loc[:, :, :, cols], - panel4d.reindex(minor=cols)) + tm.assert_panel4d_equal(panel4d.loc[:, :, :, cols], + panel4d.reindex(minor=cols)) def test_getitem_fancy_slice(self): pass @@ -607,10 +596,6 @@ def test_set_value(self): class TestPanel4d(tm.TestCase, CheckIndexing, SafeForSparse, SafeForLongAndSparse): - @classmethod - def assert_panel4d_equal(cls, x, y): - assert_panel4d_equal(x, y) - def setUp(self): with catch_warnings(record=True): self.panel4d = tm.makePanel4D(nper=8) @@ -697,10 +682,10 @@ def test_ctor_dict(self): d = {'A': l1, 'B': l2.loc[['ItemB'], :, :]} panel4d = Panel4D(d) - assert_panel_equal(panel4d['A'], self.panel4d['l1']) - assert_frame_equal(panel4d.loc['B', 'ItemB', :, :], - self.panel4d.loc['l2', ['ItemB'], - :, :]['ItemB']) + tm.assert_panel_equal(panel4d['A'], self.panel4d['l1']) + tm.assert_frame_equal(panel4d.loc['B', 'ItemB', :, :], + self.panel4d.loc['l2', ['ItemB'], + :, :]['ItemB']) def test_constructor_dict_mixed(self): with catch_warnings(record=True): @@ -715,12 +700,12 @@ def test_constructor_dict_mixed(self): items=self.panel4d.items, major_axis=self.panel4d.major_axis, minor_axis=self.panel4d.minor_axis) - assert_panel4d_equal(result, self.panel4d) + tm.assert_panel4d_equal(result, self.panel4d) data['l2'] = self.panel4d['l2'] result = Panel4D(data) - assert_panel4d_equal(result, self.panel4d) + tm.assert_panel4d_equal(result, self.panel4d) # corner, blow up data['l2'] = data['l2']['ItemB'] @@ -741,19 +726,19 @@ def test_constructor_resize(self): major_axis=major, minor_axis=minor) expected = self.panel4d.reindex( labels=labels, items=items, major=major, minor=minor) - assert_panel4d_equal(result, expected) + tm.assert_panel4d_equal(result, expected) result = Panel4D(data, items=items, major_axis=major) expected = self.panel4d.reindex(items=items, major=major) - assert_panel4d_equal(result, expected) + tm.assert_panel4d_equal(result, expected) result = Panel4D(data, items=items) expected = self.panel4d.reindex(items=items) - assert_panel4d_equal(result, expected) + tm.assert_panel4d_equal(result, expected) result = Panel4D(data, minor_axis=minor) expected = self.panel4d.reindex(minor=minor) - assert_panel4d_equal(result, expected) + tm.assert_panel4d_equal(result, expected) def test_conform(self): with catch_warnings(record=True): @@ -773,7 +758,7 @@ def test_reindex(self): # labels result = self.panel4d.reindex(labels=['l1', 'l2']) - assert_panel_equal(result['l2'], ref) + tm.assert_panel_equal(result['l2'], ref) # items result = self.panel4d.reindex(items=['ItemA', 'ItemB']) @@ -802,7 +787,7 @@ def test_reindex(self): # don't necessarily copy result = self.panel4d.reindex() - assert_panel4d_equal(result, self.panel4d) + tm.assert_panel4d_equal(result, self.panel4d) assert result is not self.panel4d # with filling @@ -812,13 +797,14 @@ def test_reindex(self): larger = smaller.reindex(major=self.panel4d.major_axis, method='pad') - assert_panel_equal(larger.loc[:, :, self.panel4d.major_axis[1], :], - smaller.loc[:, :, smaller_major[0], :]) + tm.assert_panel_equal(larger.loc[:, :, + self.panel4d.major_axis[1], :], + smaller.loc[:, :, smaller_major[0], :]) # don't necessarily copy result = self.panel4d.reindex( major=self.panel4d.major_axis, copy=False) - assert_panel4d_equal(result, self.panel4d) + tm.assert_panel4d_equal(result, self.panel4d) assert result is self.panel4d def test_not_hashable(self): @@ -835,7 +821,7 @@ def test_reindex_like(self): major=self.panel4d.major_axis[:-1], minor=self.panel4d.minor_axis[:-1]) smaller_like = self.panel4d.reindex_like(smaller) - assert_panel4d_equal(smaller, smaller_like) + tm.assert_panel4d_equal(smaller, smaller_like) def test_sort_index(self): with catch_warnings(record=True): @@ -852,7 +838,7 @@ def test_sort_index(self): random_order = self.panel4d.reindex(labels=rlabels) sorted_panel4d = random_order.sort_index(axis=0) - assert_panel4d_equal(sorted_panel4d, self.panel4d) + tm.assert_panel4d_equal(sorted_panel4d, self.panel4d) def test_fillna(self): @@ -887,7 +873,7 @@ def test_swapaxes(self): # this works, but return a copy result = self.panel4d.swapaxes('items', 'items') - assert_panel4d_equal(self.panel4d, result) + tm.assert_panel4d_equal(self.panel4d, result) assert id(self.panel4d) != id(result) def test_update(self): @@ -916,7 +902,7 @@ def test_update(self): [1.5, np.nan, 3.], [1.5, np.nan, 3.]]]]) - assert_panel4d_equal(p4d, expected) + tm.assert_panel4d_equal(p4d, expected) def test_dtypes(self): @@ -952,4 +938,4 @@ def test_rename(self): assert (self.panel4d['l1'].values == 3).all() def test_get_attr(self): - assert_panel_equal(self.panel4d['l1'], self.panel4d.l1) + tm.assert_panel_equal(self.panel4d['l1'], self.panel4d.l1) From 49ca1efba1a297091a9f61453becf0bc0c54ec10 Mon Sep 17 00:00:00 2001 From: Pietro Battiston Date: Tue, 2 May 2017 12:26:19 +0200 Subject: [PATCH 494/933] TST: test reset_index with tuple index name and col_level!=0 (#16195) --- pandas/tests/test_multilevel.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index 5b2057f830102..f4cb07625faf2 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -2286,6 +2286,11 @@ def test_reset_index_multiindex_columns(self): "incomplete column name \('C', 'c'\)")): df2.rename_axis([('C', 'c')]).reset_index(col_fill=None) + # with col_level != 0 + result = df2.rename_axis([('c', 'ii')]).reset_index(col_level=1, + col_fill='C') + tm.assert_frame_equal(result, expected) + def test_set_index_period(self): # GH 6631 df = DataFrame(np.random.random(6)) From 24719672685027710cf432023a7fbc3222b0b05c Mon Sep 17 00:00:00 2001 From: Matti Picus Date: Tue, 2 May 2017 13:28:22 +0300 Subject: [PATCH 495/933] COMPAT: PySlice_GetIndicesEx is a macro on PyPy (#16194) --- pandas/_libs/src/compat_helper.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/src/compat_helper.h b/pandas/_libs/src/compat_helper.h index 8f86bb3f8e62f..bdff61d7d4150 100644 --- a/pandas/_libs/src/compat_helper.h +++ b/pandas/_libs/src/compat_helper.h @@ -26,8 +26,10 @@ the macro, which restores compat. https://bugs.python.org/issue29943 */ -#if PY_VERSION_HEX < 0x03070000 && defined(PySlice_GetIndicesEx) - #undef PySlice_GetIndicesEx +#ifndef PYPY_VERSION +# if PY_VERSION_HEX < 0x03070000 && defined(PySlice_GetIndicesEx) +# undef PySlice_GetIndicesEx +# endif #endif PANDAS_INLINE int slice_get_indices(PyObject *s, From 20fda2223d5121be3f8204702b5ce1e6037e5b18 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 2 May 2017 07:26:28 -0400 Subject: [PATCH 496/933] DOC: update the .agg doc-string with examples (#16188) * DOC: update the .agg doc-string with examples * various updates --- doc/source/whatsnew/v0.20.0.txt | 6 +- pandas/core/base.py | 47 ++------ pandas/core/frame.py | 41 ++++++- pandas/core/generic.py | 43 +++++-- pandas/core/groupby.py | 152 +++++++++++++++--------- pandas/core/resample.py | 109 ++++++++---------- pandas/core/series.py | 27 ++++- pandas/core/window.py | 197 +++++++++++++++++++++++++++++--- 8 files changed, 443 insertions(+), 179 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 3332bfcd65d50..4882acbe820ea 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -9,7 +9,7 @@ users upgrade to this version. Highlights include: -- new ``.agg()`` API for Series/DataFrame similar to the groupby-rolling-resample API's, see :ref:`here ` +- New ``.agg()`` API for Series/DataFrame similar to the groupby-rolling-resample API's, see :ref:`here ` - Integration with the ``feather-format``, including a new top-level ``pd.read_feather()`` and ``DataFrame.to_feather()`` method, see :ref:`here `. - The ``.ix`` indexer has been deprecated, see :ref:`here ` - ``Panel`` has been deprecated, see :ref:`here ` @@ -45,8 +45,8 @@ New features ^^^^^^^^^^^ Series & DataFrame have been enhanced to support the aggregation API. This is an already familiar API that -is supported for groupby, window operations, and resampling. This allows one to express, possibly multiple, -aggregation operations in a single concise way by using :meth:`~DataFrame.agg`, +is supported for groupby, window operations, and resampling. This allows one to express aggregation operations +in a single concise way by using :meth:`~DataFrame.agg`, and :meth:`~DataFrame.transform`. The full documentation is :ref:`here ` (:issue:`1623`). Here is a sample diff --git a/pandas/core/base.py b/pandas/core/base.py index 87c649c5fbd79..fd0846b0ad33c 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -370,42 +370,6 @@ def _gotitem(self, key, ndim, subset=None): """ raise AbstractMethodError(self) - _agg_doc = """Aggregate using input function or dict of {column -> -function} - -Parameters ----------- -arg : function or dict - Function to use for aggregating groups. If a function, must either - work when passed a DataFrame or when passed to DataFrame.apply. If - passed a dict, the keys must be DataFrame column names. - - Accepted Combinations are: - - string cythonized function name - - function - - list of functions - - dict of columns -> functions - - nested dict of names -> dicts of functions - -Notes ------ -Numpy functions mean/median/prod/sum/std/var are special cased so the -default behavior is applying the function along axis=0 -(e.g., np.mean(arr_2d, axis=0)) as opposed to -mimicking the default Numpy behavior (e.g., np.mean(arr_2d)). - -Returns -------- -aggregated : DataFrame -""" - - _see_also_template = """ -See also --------- -pandas.Series.%(name)s -pandas.DataFrame.%(name)s -""" - def aggregate(self, func, *args, **kwargs): raise AbstractMethodError(self) @@ -1150,30 +1114,39 @@ def factorize(self, sort=False, na_sentinel=-1): Examples -------- + >>> x = pd.Series([1, 2, 3]) >>> x 0 1 1 2 2 3 dtype: int64 + >>> x.searchsorted(4) array([3]) + >>> x.searchsorted([0, 4]) array([0, 3]) + >>> x.searchsorted([1, 3], side='left') array([0, 2]) + >>> x.searchsorted([1, 3], side='right') array([1, 3]) - >>> + >>> x = pd.Categorical(['apple', 'bread', 'bread', 'cheese', 'milk' ]) [apple, bread, bread, cheese, milk] Categories (4, object): [apple < bread < cheese < milk] + >>> x.searchsorted('bread') array([1]) # Note: an array, not a scalar + >>> x.searchsorted(['bread']) array([1]) + >>> x.searchsorted(['bread', 'eggs']) array([1, 4]) + >>> x.searchsorted(['bread', 'eggs'], side='right') array([3, 4]) # eggs before milk """) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 9a62259202653..67966374fcf9a 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -18,6 +18,7 @@ import sys import types import warnings +from textwrap import dedent from numpy import nan as NA import numpy as np @@ -4200,7 +4201,43 @@ def _gotitem(self, key, ndim, subset=None): # TODO: _shallow_copy(subset)? return self[key] - @Appender(_shared_docs['aggregate'] % _shared_doc_kwargs) + _agg_doc = dedent(""" + Examples + -------- + + >>> df = pd.DataFrame(np.random.randn(10, 3), columns=['A', 'B', 'C'], + ... index=pd.date_range('1/1/2000', periods=10)) + >>> df.iloc[3:7] = np.nan + + Aggregate these functions across all columns + + >>> df.agg(['sum', 'min']) + A B C + sum -0.182253 -0.614014 -2.909534 + min -1.916563 -1.460076 -1.568297 + + Different aggregations per column + + >>> df.agg({'A' : ['sum', 'min'], 'B' : ['min', 'max']}) + A B + max NaN 1.514318 + min -1.916563 -1.460076 + sum -0.182253 NaN + + See also + -------- + pandas.DataFrame.apply + pandas.DataFrame.transform + pandas.DataFrame.groupby.aggregate + pandas.DataFrame.resample.aggregate + pandas.DataFrame.rolling.aggregate + + """) + + @Appender(_agg_doc) + @Appender(_shared_docs['aggregate'] % dict( + versionadded='.. versionadded:: 0.20.0', + **_shared_doc_kwargs)) def aggregate(self, func, axis=0, *args, **kwargs): axis = self._get_axis_number(axis) @@ -4272,7 +4309,7 @@ def apply(self, func, axis=0, broadcast=False, raw=False, reduce=None, See also -------- DataFrame.applymap: For elementwise operations - DataFrame.agg: only perform aggregating type operations + DataFrame.aggregate: only perform aggregating type operations DataFrame.transform: only perform transformating type operations Returns diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 9318a9f5ef27c..48ee1842dc4a0 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2854,19 +2854,19 @@ def pipe(self, func, *args, **kwargs): return func(self, *args, **kwargs) _shared_docs['aggregate'] = (""" - Aggregate using input function or dict of {column -> - function} + Aggregate using callable, string, dict, or list of string/callables - .. versionadded:: 0.20.0 + %(versionadded)s Parameters ---------- func : callable, string, dictionary, or list of string/callables Function to use for aggregating the data. If a function, must either - work when passed a DataFrame or when passed to DataFrame.apply. If - passed a dict, the keys must be DataFrame column names. + work when passed a %(klass)s or when passed to %(klass)s.apply. For + a DataFrame, can pass a dict, if the keys are DataFrame column names. Accepted Combinations are: + - string function name - function - list of functions @@ -2879,12 +2879,11 @@ def pipe(self, func, *args, **kwargs): (e.g., np.mean(arr_2d, axis=0)) as opposed to mimicking the default Numpy behavior (e.g., np.mean(arr_2d)). + agg is an alias for aggregate. Use it. + Returns ------- aggregated : %(klass)s - - See also - -------- """) _shared_docs['transform'] = (""" @@ -2899,18 +2898,40 @@ def pipe(self, func, *args, **kwargs): To apply to column Accepted Combinations are: + - string function name - function - list of functions - dict of column names -> functions (or list of functions) + Returns + ------- + transformed : %(klass)s + Examples -------- + >>> df = pd.DataFrame(np.random.randn(10, 3), columns=['A', 'B', 'C'], + ... index=pd.date_range('1/1/2000', periods=10)) + df.iloc[3:7] = np.nan + >>> df.transform(lambda x: (x - x.mean()) / x.std()) + A B C + 2000-01-01 0.579457 1.236184 0.123424 + 2000-01-02 0.370357 -0.605875 -1.231325 + 2000-01-03 1.455756 -0.277446 0.288967 + 2000-01-04 NaN NaN NaN + 2000-01-05 NaN NaN NaN + 2000-01-06 NaN NaN NaN + 2000-01-07 NaN NaN NaN + 2000-01-08 -0.498658 1.274522 1.642524 + 2000-01-09 -0.540524 -1.012676 -0.828968 + 2000-01-10 -1.366388 -0.614710 0.005378 + + See also + -------- + pandas.%(klass)s.aggregate + pandas.%(klass)s.apply - Returns - ------- - transformed : %(klass)s """) # ---------------------------------------------------------------------- diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 1f715c685c27e..479d2f7d26eb6 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -5,6 +5,7 @@ import collections import warnings import copy +from textwrap import dedent from pandas.compat import ( zip, range, lzip, @@ -46,7 +47,7 @@ CategoricalIndex, _ensure_index) from pandas.core.categorical import Categorical from pandas.core.frame import DataFrame -from pandas.core.generic import NDFrame +from pandas.core.generic import NDFrame, _shared_docs from pandas.core.internals import BlockManager, make_block from pandas.core.series import Series from pandas.core.panel import Panel @@ -2749,57 +2750,47 @@ def _selection_name(self): else: return self._selection - def aggregate(self, func_or_funcs, *args, **kwargs): - """ - Apply aggregation function or functions to groups, yielding most likely - Series but in some cases DataFrame depending on the output of the - aggregation function + _agg_doc = dedent(""" + Examples + -------- - Parameters - ---------- - func_or_funcs : function or list / dict of functions - List/dict of functions will produce DataFrame with column names - determined by the function names themselves (list) or the keys in - the dict + >>> s = Series([1, 2, 3, 4]) - Notes - ----- - agg is an alias for aggregate. Use it. + >>> s + 0 1 + 1 2 + 2 3 + 3 4 + dtype: int64 - Examples - -------- - >>> series - bar 1.0 - baz 2.0 - qot 3.0 - qux 4.0 - - >>> mapper = lambda x: x[0] # first letter - >>> grouped = series.groupby(mapper) - - >>> grouped.aggregate(np.sum) - b 3.0 - q 7.0 - - >>> grouped.aggregate([np.sum, np.mean, np.std]) - mean std sum - b 1.5 0.5 3 - q 3.5 0.5 7 - - >>> grouped.agg({'result' : lambda x: x.mean() / x.std(), - ... 'total' : np.sum}) - result total - b 2.121 3 - q 4.95 7 + >>> s.groupby([1, 1, 2, 2]).min() + 1 1 + 2 3 + dtype: int64 - See also - -------- - apply, transform + >>> s.groupby([1, 1, 2, 2]).agg('min') + 1 1 + 2 3 + dtype: int64 - Returns - ------- - Series or DataFrame - """ + >>> s.groupby([1, 1, 2, 2]).agg(['min', 'max']) + min max + 1 1 2 + 2 3 4 + + See also + -------- + pandas.Series.groupby.apply + pandas.Series.groupby.transform + pandas.Series.aggregate + + """) + + @Appender(_agg_doc) + @Appender(_shared_docs['aggregate'] % dict( + klass='Series', + versionadded='')) + def aggregate(self, func_or_funcs, *args, **kwargs): _level = kwargs.pop('_level', None) if isinstance(func_or_funcs, compat.string_types): return getattr(self, func_or_funcs)(*args, **kwargs) @@ -3905,9 +3896,67 @@ class DataFrameGroupBy(NDFrameGroupBy): _block_agg_axis = 1 - @Substitution(name='groupby') - @Appender(SelectionMixin._see_also_template) - @Appender(SelectionMixin._agg_doc) + _agg_doc = dedent(""" + Examples + -------- + + >>> df = pd.DataFrame({'A': [1, 1, 2, 2], + ... 'B': [1, 2, 3, 4], + ... 'C': np.random.randn(4)}) + + >>> df + A B C + 0 1 1 0.362838 + 1 1 2 0.227877 + 2 2 3 1.267767 + 3 2 4 -0.562860 + + The aggregation is for each column. + + >>> df.groupby('A').agg('min') + B C + A + 1 1 0.227877 + 2 3 -0.562860 + + Multiple aggregations + + >>> df.groupby('A').agg(['min', 'max']) + B C + min max min max + A + 1 1 2 0.227877 0.362838 + 2 3 4 -0.562860 1.267767 + + Select a column for aggregation + + >>> df.groupby('A').B.agg(['min', 'max']) + min max + A + 1 1 2 + 2 3 4 + + Different aggregations per column + + >>> df.groupby('A').agg({'B': ['min', 'max'], 'C': 'sum'}) + B C + min max sum + A + 1 1 2 0.590716 + 2 3 4 0.704907 + + See also + -------- + pandas.DataFrame.groupby.apply + pandas.DataFrame.groupby.transform + pandas.DataFrame.aggregate + + """) + + @Appender(_agg_doc) + @Appender(_shared_docs['aggregate'] % dict( + klass='DataFrame', + versionadded='')) def aggregate(self, arg, *args, **kwargs): return super(DataFrameGroupBy, self).aggregate(arg, *args, **kwargs) @@ -4166,9 +4215,6 @@ def groupby_series(obj, col=None): class PanelGroupBy(NDFrameGroupBy): - @Substitution(name='groupby') - @Appender(SelectionMixin._see_also_template) - @Appender(SelectionMixin._agg_doc) def aggregate(self, arg, *args, **kwargs): return super(PanelGroupBy, self).aggregate(arg, *args, **kwargs) diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 1685a5d75245d..cbb2f6a93c2fd 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -2,6 +2,7 @@ import numpy as np import warnings import copy +from textwrap import dedent import pandas as pd from pandas.core.base import AbstractMethodError, GroupByMixin @@ -254,66 +255,56 @@ def plot(self, *args, **kwargs): # have the warnings shown here and just have this work return self._deprecated('plot').plot(*args, **kwargs) + _agg_doc = dedent(""" + + Examples + -------- + >>> s = Series([1,2,3,4,5], + index=pd.date_range('20130101', + periods=5,freq='s')) + 2013-01-01 00:00:00 1 + 2013-01-01 00:00:01 2 + 2013-01-01 00:00:02 3 + 2013-01-01 00:00:03 4 + 2013-01-01 00:00:04 5 + Freq: S, dtype: int64 + + >>> r = s.resample('2s') + DatetimeIndexResampler [freq=<2 * Seconds>, axis=0, closed=left, + label=left, convention=start, base=0] + + >>> r.agg(np.sum) + 2013-01-01 00:00:00 3 + 2013-01-01 00:00:02 7 + 2013-01-01 00:00:04 5 + Freq: 2S, dtype: int64 + + >>> r.agg(['sum','mean','max']) + sum mean max + 2013-01-01 00:00:00 3 1.5 2 + 2013-01-01 00:00:02 7 3.5 4 + 2013-01-01 00:00:04 5 5.0 5 + + >>> r.agg({'result' : lambda x: x.mean() / x.std(), + 'total' : np.sum}) + total result + 2013-01-01 00:00:00 3 2.121320 + 2013-01-01 00:00:02 7 4.949747 + 2013-01-01 00:00:04 5 NaN + + See also + -------- + pandas.DataFrame.groupby.aggregate + pandas.DataFrame.resample.transform + pandas.DataFrame.aggregate + + """) + + @Appender(_agg_doc) + @Appender(_shared_docs['aggregate'] % dict( + klass='DataFrame', + versionadded='')) def aggregate(self, arg, *args, **kwargs): - """ - Apply aggregation function or functions to resampled groups, yielding - most likely Series but in some cases DataFrame depending on the output - of the aggregation function - - Parameters - ---------- - func_or_funcs : function or list / dict of functions - List/dict of functions will produce DataFrame with column names - determined by the function names themselves (list) or the keys in - the dict - - Notes - ----- - agg is an alias for aggregate. Use it. - - Examples - -------- - >>> s = Series([1,2,3,4,5], - index=pd.date_range('20130101', - periods=5,freq='s')) - 2013-01-01 00:00:00 1 - 2013-01-01 00:00:01 2 - 2013-01-01 00:00:02 3 - 2013-01-01 00:00:03 4 - 2013-01-01 00:00:04 5 - Freq: S, dtype: int64 - - >>> r = s.resample('2s') - DatetimeIndexResampler [freq=<2 * Seconds>, axis=0, closed=left, - label=left, convention=start, base=0] - - >>> r.agg(np.sum) - 2013-01-01 00:00:00 3 - 2013-01-01 00:00:02 7 - 2013-01-01 00:00:04 5 - Freq: 2S, dtype: int64 - - >>> r.agg(['sum','mean','max']) - sum mean max - 2013-01-01 00:00:00 3 1.5 2 - 2013-01-01 00:00:02 7 3.5 4 - 2013-01-01 00:00:04 5 5.0 5 - - >>> r.agg({'result' : lambda x: x.mean() / x.std(), - 'total' : np.sum}) - total result - 2013-01-01 00:00:00 3 2.121320 - 2013-01-01 00:00:02 7 4.949747 - 2013-01-01 00:00:04 5 NaN - - See also - -------- - transform - - Returns - ------- - Series or DataFrame - """ self._set_binner() result, how = self._aggregate(arg, *args, **kwargs) diff --git a/pandas/core/series.py b/pandas/core/series.py index f03091d7e6a66..e5f1d91eedfec 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -8,6 +8,7 @@ import types import warnings +from textwrap import dedent from numpy import nan, ndarray import numpy as np @@ -2174,7 +2175,31 @@ def _gotitem(self, key, ndim, subset=None): """ return self - @Appender(generic._shared_docs['aggregate'] % _shared_doc_kwargs) + _agg_doc = dedent(""" + Examples + -------- + + >>> s = Series(np.random.randn(10)) + + >>> s.agg('min') + -1.3018049988556679 + + >>> s.agg(['min', 'max']) + min -1.301805 + max 1.127688 + dtype: float64 + + See also + -------- + pandas.Series.apply + pandas.Series.transform + + """) + + @Appender(_agg_doc) + @Appender(generic._shared_docs['aggregate'] % dict( + versionadded='.. versionadded:: 0.20.0', + **_shared_doc_kwargs)) def aggregate(self, func, axis=0, *args, **kwargs): axis = self._get_axis_number(axis) result, how = self._aggregate(func, *args, **kwargs) diff --git a/pandas/core/window.py b/pandas/core/window.py index 6fdc05a13b773..6d8f12e982f12 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -39,10 +39,11 @@ from pandas.compat.numpy import function as nv from pandas.util.decorators import (Substitution, Appender, cache_readonly) +from pandas.core.generic import _shared_docs from textwrap import dedent -_shared_docs = dict() +_shared_docs = dict(**_shared_docs) _doc_template = """ Returns @@ -611,9 +612,48 @@ def f(arg, *args, **kwargs): return self._wrap_results(results, blocks, obj) - @Substitution(name='rolling') - @Appender(SelectionMixin._see_also_template) - @Appender(SelectionMixin._agg_doc) + _agg_doc = dedent(""" + Examples + -------- + + >>> df = pd.DataFrame(np.random.randn(10, 3), columns=['A', 'B', 'C']) + >>> df + A B C + 0 -2.385977 -0.102758 0.438822 + 1 -1.004295 0.905829 -0.954544 + 2 0.735167 -0.165272 -1.619346 + 3 -0.702657 -1.340923 -0.706334 + 4 -0.246845 0.211596 -0.901819 + 5 2.463718 3.157577 -1.380906 + 6 -1.142255 2.340594 -0.039875 + 7 1.396598 -1.647453 1.677227 + 8 -0.543425 1.761277 -0.220481 + 9 -0.640505 0.289374 -1.550670 + + >>> df.rolling(3, win_type='boxcar').agg('mean') + A B C + 0 NaN NaN NaN + 1 NaN NaN NaN + 2 -0.885035 0.212600 -0.711689 + 3 -0.323928 -0.200122 -1.093408 + 4 -0.071445 -0.431533 -1.075833 + 5 0.504739 0.676083 -0.996353 + 6 0.358206 1.903256 -0.774200 + 7 0.906020 1.283573 0.085482 + 8 -0.096361 0.818139 0.472290 + 9 0.070889 0.134399 -0.031308 + + See also + -------- + pandas.DataFrame.rolling.aggregate + pandas.DataFrame.aggregate + + """) + + @Appender(_agg_doc) + @Appender(_shared_docs['aggregate'] % dict( + versionadded='', + klass='Series/DataFrame')) def aggregate(self, arg, *args, **kwargs): result, how = self._aggregate(arg, *args, **kwargs) if result is None: @@ -1081,9 +1121,62 @@ def _validate_freq(self): "compat with a datetimelike " "index".format(self.window)) - @Substitution(name='rolling') - @Appender(SelectionMixin._see_also_template) - @Appender(SelectionMixin._agg_doc) + _agg_doc = dedent(""" + Examples + -------- + + >>> df = pd.DataFrame(np.random.randn(10, 3), columns=['A', 'B', 'C']) + >>> df + A B C + 0 -2.385977 -0.102758 0.438822 + 1 -1.004295 0.905829 -0.954544 + 2 0.735167 -0.165272 -1.619346 + 3 -0.702657 -1.340923 -0.706334 + 4 -0.246845 0.211596 -0.901819 + 5 2.463718 3.157577 -1.380906 + 6 -1.142255 2.340594 -0.039875 + 7 1.396598 -1.647453 1.677227 + 8 -0.543425 1.761277 -0.220481 + 9 -0.640505 0.289374 -1.550670 + + >>> df.rolling(3).sum() + A B C + 0 NaN NaN NaN + 1 NaN NaN NaN + 2 -2.655105 0.637799 -2.135068 + 3 -0.971785 -0.600366 -3.280224 + 4 -0.214334 -1.294599 -3.227500 + 5 1.514216 2.028250 -2.989060 + 6 1.074618 5.709767 -2.322600 + 7 2.718061 3.850718 0.256446 + 8 -0.289082 2.454418 1.416871 + 9 0.212668 0.403198 -0.093924 + + + >>> df.rolling(3).agg({'A':'sum', 'B':'min'}) + A B + 0 NaN NaN + 1 NaN NaN + 2 -2.655105 -0.165272 + 3 -0.971785 -1.340923 + 4 -0.214334 -1.340923 + 5 1.514216 -1.340923 + 6 1.074618 0.211596 + 7 2.718061 -1.647453 + 8 -0.289082 -1.647453 + 9 0.212668 -1.647453 + + See also + -------- + pandas.Series.rolling + pandas.DataFrame.rolling + + """) + + @Appender(_agg_doc) + @Appender(_shared_docs['aggregate'] % dict( + versionadded='', + klass='Series/DataFrame')) def aggregate(self, arg, *args, **kwargs): return super(Rolling, self).aggregate(arg, *args, **kwargs) @@ -1288,9 +1381,49 @@ def _get_window(self, other=None): return (max((len(obj) + len(obj)), self.min_periods) if self.min_periods else (len(obj) + len(obj))) - @Substitution(name='expanding') - @Appender(SelectionMixin._see_also_template) - @Appender(SelectionMixin._agg_doc) + _agg_doc = dedent(""" + Examples + -------- + + >>> df = pd.DataFrame(np.random.randn(10, 3), columns=['A', 'B', 'C']) + >>> df + A B C + 0 -2.385977 -0.102758 0.438822 + 1 -1.004295 0.905829 -0.954544 + 2 0.735167 -0.165272 -1.619346 + 3 -0.702657 -1.340923 -0.706334 + 4 -0.246845 0.211596 -0.901819 + 5 2.463718 3.157577 -1.380906 + 6 -1.142255 2.340594 -0.039875 + 7 1.396598 -1.647453 1.677227 + 8 -0.543425 1.761277 -0.220481 + 9 -0.640505 0.289374 -1.550670 + + >>> df.ewm(alpha=0.5).mean() + A B C + 0 -2.385977 -0.102758 0.438822 + 1 -1.464856 0.569633 -0.490089 + 2 -0.207700 0.149687 -1.135379 + 3 -0.471677 -0.645305 -0.906555 + 4 -0.355635 -0.203033 -0.904111 + 5 1.076417 1.503943 -1.146293 + 6 -0.041654 1.925562 -0.588728 + 7 0.680292 0.132049 0.548693 + 8 0.067236 0.948257 0.163353 + 9 -0.286980 0.618493 -0.694496 + + See also + -------- + pandas.DataFrame.expanding.aggregate + pandas.DataFrame.rolling.aggregate + pandas.DataFrame.aggregate + + """) + + @Appender(_agg_doc) + @Appender(_shared_docs['aggregate'] % dict( + versionadded='', + klass='Series/DataFrame')) def aggregate(self, arg, *args, **kwargs): return super(Expanding, self).aggregate(arg, *args, **kwargs) @@ -1534,9 +1667,47 @@ def __init__(self, obj, com=None, span=None, halflife=None, alpha=None, def _constructor(self): return EWM - @Substitution(name='ewm') - @Appender(SelectionMixin._see_also_template) - @Appender(SelectionMixin._agg_doc) + _agg_doc = dedent(""" + Examples + -------- + + >>> df = pd.DataFrame(np.random.randn(10, 3), columns=['A', 'B', 'C']) + >>> df + A B C + 0 -2.385977 -0.102758 0.438822 + 1 -1.004295 0.905829 -0.954544 + 2 0.735167 -0.165272 -1.619346 + 3 -0.702657 -1.340923 -0.706334 + 4 -0.246845 0.211596 -0.901819 + 5 2.463718 3.157577 -1.380906 + 6 -1.142255 2.340594 -0.039875 + 7 1.396598 -1.647453 1.677227 + 8 -0.543425 1.761277 -0.220481 + 9 -0.640505 0.289374 -1.550670 + + >>> df.ewm(alpha=0.5).mean() + A B C + 0 -2.385977 -0.102758 0.438822 + 1 -1.464856 0.569633 -0.490089 + 2 -0.207700 0.149687 -1.135379 + 3 -0.471677 -0.645305 -0.906555 + 4 -0.355635 -0.203033 -0.904111 + 5 1.076417 1.503943 -1.146293 + 6 -0.041654 1.925562 -0.588728 + 7 0.680292 0.132049 0.548693 + 8 0.067236 0.948257 0.163353 + 9 -0.286980 0.618493 -0.694496 + + See also + -------- + pandas.DataFrame.rolling.aggregate + + """) + + @Appender(_agg_doc) + @Appender(_shared_docs['aggregate'] % dict( + versionadded='', + klass='Series/DataFrame')) def aggregate(self, arg, *args, **kwargs): return super(EWM, self).aggregate(arg, *args, **kwargs) From ef0ad362f842dfd0b8676da5db14ba5f2fecd53c Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 2 May 2017 12:52:46 -0500 Subject: [PATCH 497/933] DOC: Add redirect for moved classes (#16200) * DOC: Add redirect for moved classes The new redirects in this commit are for Resampler and Styler Refactor how we do redirects. Moved all the logic into the config file, where you state the methods / classes to be redirected. Removed all the logic from the template, and just look up in the new html_context variable. Closes https://github.com/pandas-dev/pandas/issues/16186 * Only redirect for used dunder methods --- doc/_templates/api_redirect.html | 13 ++---- doc/source/conf.py | 76 ++++++++++++++++++++++++++------ 2 files changed, 67 insertions(+), 22 deletions(-) diff --git a/doc/_templates/api_redirect.html b/doc/_templates/api_redirect.html index 24bdd8363830f..c04a8b58ce544 100644 --- a/doc/_templates/api_redirect.html +++ b/doc/_templates/api_redirect.html @@ -1,15 +1,10 @@ -{% set pgn = pagename.split('.') -%} -{% if pgn[-2][0].isupper() -%} - {% set redirect = ["pandas", pgn[-2], pgn[-1], 'html']|join('.') -%} -{% else -%} - {% set redirect = ["pandas", pgn[-1], 'html']|join('.') -%} -{% endif -%} +{% set redirect = redirects[pagename.split("/")[-1]] %} - + This API page has moved -
+

This API page has moved here.

- \ No newline at end of file + diff --git a/doc/source/conf.py b/doc/source/conf.py index a2a6dca57c34c..556e5f0227471 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -14,6 +14,7 @@ import os import re import inspect +import importlib from pandas.compat import u, PY3 # https://github.com/sphinx-doc/sphinx/pull/2325/files @@ -226,20 +227,69 @@ # Additional templates that should be rendered to pages, maps page names to # template names. -# Add redirect for previously existing API pages (which are now included in -# the API pages as top-level functions) based on a template (GH9911) +# Add redirect for previously existing API pages +# each item is like `(from_old, to_new)` +# To redirect a class and all its methods, see below +# https://github.com/pandas-dev/pandas/issues/16186 + moved_api_pages = [ - 'pandas.core.common.isnull', 'pandas.core.common.notnull', 'pandas.core.reshape.get_dummies', - 'pandas.tools.merge.concat', 'pandas.tools.merge.merge', 'pandas.tools.pivot.pivot_table', - 'pandas.tseries.tools.to_datetime', 'pandas.io.clipboard.read_clipboard', 'pandas.io.excel.ExcelFile.parse', - 'pandas.io.excel.read_excel', 'pandas.io.html.read_html', 'pandas.io.json.read_json', - 'pandas.io.parsers.read_csv', 'pandas.io.parsers.read_fwf', 'pandas.io.parsers.read_table', - 'pandas.io.pickle.read_pickle', 'pandas.io.pytables.HDFStore.append', 'pandas.io.pytables.HDFStore.get', - 'pandas.io.pytables.HDFStore.put', 'pandas.io.pytables.HDFStore.select', 'pandas.io.pytables.read_hdf', - 'pandas.io.sql.read_sql', 'pandas.io.sql.read_frame', 'pandas.io.sql.write_frame', - 'pandas.io.stata.read_stata'] - -html_additional_pages = {'generated/' + page: 'api_redirect.html' for page in moved_api_pages} + ('pandas.core.common.isnull', 'pandas.isnull'), + ('pandas.core.common.notnull', 'pandas.notnull'), + ('pandas.core.reshape.get_dummies', 'pandas.get_dummies'), + ('pandas.tools.merge.concat', 'pandas.concat'), + ('pandas.tools.merge.merge', 'pandas.merge'), + ('pandas.tools.pivot.pivot_table', 'pandas.pivot_table'), + ('pandas.tseries.tools.to_datetime', 'pandas.to_datetime'), + ('pandas.io.clipboard.read_clipboard', 'pandas.read_clipboard'), + ('pandas.io.excel.ExcelFile.parse', 'pandas.ExcelFile.parse'), + ('pandas.io.excel.read_excel', 'pandas.read_excel'), + ('pandas.io.html.read_html', 'pandas.read_html'), + ('pandas.io.json.read_json', 'pandas.read_json'), + ('pandas.io.parsers.read_csv', 'pandas.read_csv'), + ('pandas.io.parsers.read_fwf', 'pandas.read_fwf'), + ('pandas.io.parsers.read_table', 'pandas.read_table'), + ('pandas.io.pickle.read_pickle', 'pandas.read_pickle'), + ('pandas.io.pytables.HDFStore.append', 'pandas.HDFStore.append'), + ('pandas.io.pytables.HDFStore.get', 'pandas.HDFStore.get'), + ('pandas.io.pytables.HDFStore.put', 'pandas.HDFStore.put'), + ('pandas.io.pytables.HDFStore.select', 'pandas.HDFStore.select'), + ('pandas.io.pytables.read_hdf', 'pandas.read_hdf'), + ('pandas.io.sql.read_sql', 'pandas.read_sql'), + ('pandas.io.sql.read_frame', 'pandas.read_frame'), + ('pandas.io.sql.write_frame', 'pandas.write_frame'), + ('pandas.io.stata.read_stata', 'pandas.read_stata'), +] + +# Again, tuples of (from_old, to_new) +moved_classes = [ + ('pandas.tseries.resample.Resampler', 'pandas.core.resample.Resampler'), + ('pandas.formats.style.Styler', 'pandas.io.formats.style.Styler'), +] + +for old, new in moved_classes: + # the class itself... + moved_api_pages.append((old, new)) + + mod, classname = new.rsplit('.', 1) + klass = getattr(importlib.import_module(mod), classname) + methods = [x for x in dir(klass) + if not x.startswith('_') or x in ('__iter__', '__array__')] + + for method in methods: + # ... and each of its public methods + moved_api_pages.append( + ("{old}.{method}".format(old=old, method=method), + "{new}.{method}".format(new=new, method=method)) + ) + +html_additional_pages = { + 'generated/' + page[0]: 'api_redirect.html' + for page in moved_api_pages +} + +html_context = { + 'redirects': {old: new for old, new in moved_api_pages} +} # If false, no module index is generated. html_use_modindex = True From 39cc1d0685481c77115f061d856cc60c1e59c8c2 Mon Sep 17 00:00:00 2001 From: Min RK Date: Tue, 2 May 2017 21:17:09 +0200 Subject: [PATCH 498/933] REF: register custom DisplayFormatter for table schema (#16198) * register custom DisplayFormatter for table schema instead of using `_ipython_display_` for custom mime-types * remove unused UnserializableWarning * PEP8 fixes --- pandas/core/config_init.py | 39 +++++++++++---- pandas/core/generic.py | 33 ------------- pandas/errors/__init__.py | 6 --- pandas/tests/io/formats/test_printing.py | 63 +++++++++--------------- 4 files changed, 53 insertions(+), 88 deletions(-) diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py index d77d17aa4d00e..81fb8090a7afe 100644 --- a/pandas/core/config_init.py +++ b/pandas/core/config_init.py @@ -9,6 +9,7 @@ module is imported, register them here rather then in the module. """ +import sys import warnings import pandas.core.config as cf @@ -341,18 +342,36 @@ def mpl_style_cb(key): def table_schema_cb(key): - # Having _ipython_display_ defined messes with the return value - # from cells, so the Out[x] dictionary breaks. - # Currently table schema is the only thing using it, so we'll - # monkey patch `_ipython_display_` onto NDFrame when config option - # is set - # see https://github.com/pandas-dev/pandas/issues/16168 - from pandas.core.generic import NDFrame, _ipython_display_ + # first, check if we are in IPython + if 'IPython' not in sys.modules: + # definitely not in IPython + return + from IPython import get_ipython + ip = get_ipython() + if ip is None: + # still not in IPython + return + + formatters = ip.display_formatter.formatters + + mimetype = "application/vnd.dataresource+json" if cf.get_option(key): - NDFrame._ipython_display_ = _ipython_display_ - elif getattr(NDFrame, '_ipython_display_', None): - del NDFrame._ipython_display_ + if mimetype not in formatters: + # define tableschema formatter + from IPython.core.formatters import BaseFormatter + + class TableSchemaFormatter(BaseFormatter): + print_method = '_repr_table_schema_' + _return_type = (dict,) + # register it: + formatters[mimetype] = TableSchemaFormatter() + # enable it if it's been disabled: + formatters[mimetype].enabled = True + else: + # unregister tableschema mime-type + if mimetype in formatters: + formatters[mimetype].enabled = False with cf.config_prefix('display'): diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 48ee1842dc4a0..b3498583f6e14 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -43,7 +43,6 @@ import pandas.core.algorithms as algos import pandas.core.common as com import pandas.core.missing as missing -from pandas.errors import UnserializableWarning from pandas.io.formats.printing import pprint_thing from pandas.io.formats.format import format_percentiles from pandas.tseries.frequencies import to_offset @@ -6279,38 +6278,6 @@ def logical_func(self, axis=None, bool_only=None, skipna=None, level=None, return set_function_name(logical_func, name, cls) -def _ipython_display_(self): - # Having _ipython_display_ defined messes with the return value - # from cells, so the Out[x] dictionary breaks. - # Currently table schema is the only thing using it, so we'll - # monkey patch `_ipython_display_` onto NDFrame when config option - # is set - # see https://github.com/pandas-dev/pandas/issues/16168 - try: - from IPython.display import display - except ImportError: - return None - - # Series doesn't define _repr_html_ or _repr_latex_ - latex = self._repr_latex_() if hasattr(self, '_repr_latex_') else None - html = self._repr_html_() if hasattr(self, '_repr_html_') else None - try: - table_schema = self._repr_table_schema_() - except Exception as e: - warnings.warn("Cannot create table schema representation. " - "{}".format(e), UnserializableWarning) - table_schema = None - # We need the inital newline since we aren't going through the - # usual __repr__. See - # https://github.com/pandas-dev/pandas/pull/14904#issuecomment-277829277 - text = "\n" + repr(self) - - reprs = {"text/plain": text, "text/html": html, "text/latex": latex, - "application/vnd.dataresource+json": table_schema} - reprs = {k: v for k, v in reprs.items() if v} - display(reprs, raw=True) - - # install the indexes for _name, _indexer in indexing.get_indexers_list(): NDFrame._create_indexer(_name, _indexer) diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py index 9b6c9c5be319c..805e689dca840 100644 --- a/pandas/errors/__init__.py +++ b/pandas/errors/__init__.py @@ -57,9 +57,3 @@ class ParserWarning(Warning): """ -class UnserializableWarning(Warning): - """ - Warning that is raised when a DataFrame cannot be serialized. - - .. versionadded:: 0.20.0 - """ diff --git a/pandas/tests/io/formats/test_printing.py b/pandas/tests/io/formats/test_printing.py index b8d6e9578339f..3acd5c7a5e8c5 100644 --- a/pandas/tests/io/formats/test_printing.py +++ b/pandas/tests/io/formats/test_printing.py @@ -5,7 +5,6 @@ import pandas as pd from pandas import compat -from pandas.errors import UnserializableWarning import pandas.io.formats.printing as printing import pandas.io.formats.format as fmt import pandas.util.testing as tm @@ -137,8 +136,11 @@ def setUpClass(cls): except ImportError: pytest.skip("Mock is not installed") cls.mock = mock + from IPython.core.interactiveshell import InteractiveShell + cls.display_formatter = InteractiveShell.instance().display_formatter def test_publishes(self): + df = pd.DataFrame({"A": [1, 2]}) objects = [df['A'], df, df] # dataframe / series expected_keys = [ @@ -146,29 +148,20 @@ def test_publishes(self): {'text/plain', 'text/html', 'application/vnd.dataresource+json'}, ] - make_patch = self.mock.patch('IPython.display.display') opt = pd.option_context('display.html.table_schema', True) for obj, expected in zip(objects, expected_keys): - with opt, make_patch as mock_display: - handle = obj._ipython_display_() - assert mock_display.call_count == 1 - assert handle is None - args, kwargs = mock_display.call_args - arg, = args # just one argument - - assert kwargs == {"raw": True} - assert set(arg.keys()) == expected + with opt: + formatted = self.display_formatter.format(obj) + assert set(formatted[0].keys()) == expected with_latex = pd.option_context('display.latex.repr', True) - with opt, with_latex, make_patch as mock_display: - handle = obj._ipython_display_() - args, kwargs = mock_display.call_args - arg, = args + with opt, with_latex: + formatted = self.display_formatter.format(obj) expected = {'text/plain', 'text/html', 'text/latex', 'application/vnd.dataresource+json'} - assert set(arg.keys()) == expected + assert set(formatted[0].keys()) == expected def test_publishes_not_implemented(self): # column MultiIndex @@ -176,18 +169,13 @@ def test_publishes_not_implemented(self): midx = pd.MultiIndex.from_product([['A', 'B'], ['a', 'b', 'c']]) df = pd.DataFrame(np.random.randn(5, len(midx)), columns=midx) - make_patch = self.mock.patch('IPython.display.display') opt = pd.option_context('display.html.table_schema', True) - with opt, make_patch as mock_display: - with pytest.warns(UnserializableWarning) as record: - df._ipython_display_() - args, _ = mock_display.call_args - arg, = args # just one argument + + with opt: + formatted = self.display_formatter.format(df) expected = {'text/plain', 'text/html'} - assert set(arg.keys()) == expected - assert "orient='table' is not supported for MultiIndex" in ( - record[-1].message.args[0]) + assert set(formatted[0].keys()) == expected def test_config_on(self): df = pd.DataFrame({"A": [1, 2]}) @@ -209,26 +197,23 @@ def test_config_monkeypatches(self): assert not hasattr(df, '_ipython_display_') assert not hasattr(df['A'], '_ipython_display_') + formatters = self.display_formatter.formatters + mimetype = 'application/vnd.dataresource+json' + with pd.option_context('display.html.table_schema', True): - assert hasattr(df, '_ipython_display_') - # smoke test that it works - df._ipython_display_() - assert hasattr(df['A'], '_ipython_display_') - df['A']._ipython_display_() + assert 'application/vnd.dataresource+json' in formatters + assert formatters[mimetype].enabled - assert not hasattr(df, '_ipython_display_') - assert not hasattr(df['A'], '_ipython_display_') - # re-unsetting is OK - assert not hasattr(df, '_ipython_display_') - assert not hasattr(df['A'], '_ipython_display_') + # still there, just disabled + assert 'application/vnd.dataresource+json' in formatters + assert not formatters[mimetype].enabled # able to re-set with pd.option_context('display.html.table_schema', True): - assert hasattr(df, '_ipython_display_') + assert 'application/vnd.dataresource+json' in formatters + assert formatters[mimetype].enabled # smoke test that it works - df._ipython_display_() - assert hasattr(df['A'], '_ipython_display_') - df['A']._ipython_display_() + self.display_formatter.format(cf) # TODO: fix this broken test From 154a6478eec50459ae46d28e4393b345c9ab9cd1 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 2 May 2017 20:54:58 -0400 Subject: [PATCH 499/933] COMPAT: ensure proper extension dtype's don't pickle the cache (#16207) xref #16201 --- pandas/core/dtypes/dtypes.py | 28 +++++++- pandas/tests/dtypes/test_dtypes.py | 109 +++++++++++++++++++++++++---- 2 files changed, 119 insertions(+), 18 deletions(-) diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 59c23addd418e..561f1951a4151 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -24,6 +24,7 @@ class ExtensionDtype(object): isbuiltin = 0 isnative = 0 _metadata = [] + _cache = {} def __unicode__(self): return self.name @@ -71,6 +72,15 @@ def __eq__(self, other): def __ne__(self, other): return not self.__eq__(other) + def __getstate__(self): + # pickle support; we don't want to pickle the cache + return {k: getattr(self, k, None) for k in self._metadata} + + @classmethod + def reset_cache(cls): + """ clear the cache """ + cls._cache = {} + @classmethod def is_dtype(cls, dtype): """ Return a boolean if the passed type is an actual dtype that @@ -110,6 +120,7 @@ class CategoricalDtype(ExtensionDtype): kind = 'O' str = '|O08' base = np.dtype('O') + _metadata = [] _cache = {} def __new__(cls): @@ -408,9 +419,15 @@ def __new__(cls, subtype=None): if isinstance(subtype, IntervalDtype): return subtype - elif subtype is None or (isinstance(subtype, compat.string_types) and - subtype == 'interval'): - subtype = None + elif subtype is None: + # we are called as an empty constructor + # generally for pickle compat + u = object.__new__(cls) + u.subtype = None + return u + elif (isinstance(subtype, compat.string_types) and + subtype == 'interval'): + subtype = '' else: if isinstance(subtype, compat.string_types): m = cls._match.search(subtype) @@ -423,6 +440,11 @@ def __new__(cls, subtype=None): except TypeError: raise ValueError("could not construct IntervalDtype") + if subtype is None: + u = object.__new__(cls) + u.subtype = None + return u + try: return cls._cache[str(subtype)] except KeyError: diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py index da3120145fe38..fb20571213c15 100644 --- a/pandas/tests/dtypes/test_dtypes.py +++ b/pandas/tests/dtypes/test_dtypes.py @@ -23,6 +23,9 @@ class Base(object): + def setup_method(self, method): + self.dtype = self.create() + def test_hash(self): hash(self.dtype) @@ -37,14 +40,38 @@ def test_numpy_informed(self): assert not np.str_ == self.dtype def test_pickle(self): + # make sure our cache is NOT pickled + + # clear the cache + type(self.dtype).reset_cache() + assert not len(self.dtype._cache) + + # force back to the cache result = tm.round_trip_pickle(self.dtype) + assert not len(self.dtype._cache) assert result == self.dtype -class TestCategoricalDtype(Base, tm.TestCase): +class TestCategoricalDtype(Base): + + def create(self): + return CategoricalDtype() + + def test_pickle(self): + # make sure our cache is NOT pickled + + # clear the cache + type(self.dtype).reset_cache() + assert not len(self.dtype._cache) - def setUp(self): - self.dtype = CategoricalDtype() + # force back to the cache + result = tm.round_trip_pickle(self.dtype) + + # we are a singular object so we are added + # back to the cache upon unpickling + # this is to ensure object identity + assert len(self.dtype._cache) == 1 + assert result == self.dtype def test_hash_vs_equality(self): # make sure that we satisfy is semantics @@ -93,10 +120,10 @@ def test_basic(self): assert not is_categorical(1.0) -class TestDatetimeTZDtype(Base, tm.TestCase): +class TestDatetimeTZDtype(Base): - def setUp(self): - self.dtype = DatetimeTZDtype('ns', 'US/Eastern') + def create(self): + return DatetimeTZDtype('ns', 'US/Eastern') def test_hash_vs_equality(self): # make sure that we satisfy is semantics @@ -209,10 +236,24 @@ def test_empty(self): str(dt) -class TestPeriodDtype(Base, tm.TestCase): +class TestPeriodDtype(Base): - def setUp(self): - self.dtype = PeriodDtype('D') + def create(self): + return PeriodDtype('D') + + def test_hash_vs_equality(self): + # make sure that we satisfy is semantics + dtype = self.dtype + dtype2 = PeriodDtype('D') + dtype3 = PeriodDtype(dtype2) + assert dtype == dtype2 + assert dtype2 == dtype + assert dtype3 == dtype + assert dtype is dtype2 + assert dtype2 is dtype + assert dtype3 is dtype + assert hash(dtype) == hash(dtype2) + assert hash(dtype) == hash(dtype3) def test_construction(self): with pytest.raises(ValueError): @@ -338,11 +379,37 @@ def test_not_string(self): assert not is_string_dtype(PeriodDtype('D')) -class TestIntervalDtype(Base, tm.TestCase): +class TestIntervalDtype(Base): + + def create(self): + return IntervalDtype('int64') + + def test_hash_vs_equality(self): + # make sure that we satisfy is semantics + dtype = self.dtype + dtype2 = IntervalDtype('int64') + dtype3 = IntervalDtype(dtype2) + assert dtype == dtype2 + assert dtype2 == dtype + assert dtype3 == dtype + assert dtype is dtype2 + assert dtype2 is dtype + assert dtype3 is dtype + assert hash(dtype) == hash(dtype2) + assert hash(dtype) == hash(dtype3) - # TODO: placeholder - def setUp(self): - self.dtype = IntervalDtype('int64') + dtype1 = IntervalDtype('interval') + dtype2 = IntervalDtype(dtype1) + dtype3 = IntervalDtype('interval') + assert dtype2 == dtype1 + assert dtype2 == dtype2 + assert dtype2 == dtype3 + assert dtype2 is dtype1 + assert dtype2 is dtype2 + assert dtype2 is dtype3 + assert hash(dtype2) == hash(dtype1) + assert hash(dtype2) == hash(dtype2) + assert hash(dtype2) == hash(dtype3) def test_construction(self): with pytest.raises(ValueError): @@ -356,9 +423,9 @@ def test_construction(self): def test_construction_generic(self): # generic i = IntervalDtype('interval') - assert i.subtype is None + assert i.subtype == '' assert is_interval_dtype(i) - assert str(i) == 'interval' + assert str(i) == 'interval[]' i = IntervalDtype() assert i.subtype is None @@ -445,3 +512,15 @@ def test_basic_dtype(self): assert not is_interval_dtype(np.object_) assert not is_interval_dtype(np.int64) assert not is_interval_dtype(np.float64) + + def test_caching(self): + IntervalDtype.reset_cache() + dtype = IntervalDtype("int64") + assert len(IntervalDtype._cache) == 1 + + IntervalDtype("interval") + assert len(IntervalDtype._cache) == 2 + + IntervalDtype.reset_cache() + tm.round_trip_pickle(dtype) + assert len(IntervalDtype._cache) == 0 From ae70ecee9caa7173634756aa2daae41dde0fa16a Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 2 May 2017 20:22:49 -0500 Subject: [PATCH 500/933] BUG: Fixed renaming of falsey names in build_table_schema (#16205) Closes https://github.com/pandas-dev/pandas/issues/16203 --- pandas/io/json/table_schema.py | 6 +++++- pandas/tests/io/json/test_json_table_schema.py | 8 ++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/pandas/io/json/table_schema.py b/pandas/io/json/table_schema.py index d8ef3afc9591f..c3865afa9c0c0 100644 --- a/pandas/io/json/table_schema.py +++ b/pandas/io/json/table_schema.py @@ -76,7 +76,11 @@ def set_default_names(data): def make_field(arr, dtype=None): dtype = dtype or arr.dtype - field = {'name': arr.name or 'values', + if arr.name is None: + name = 'values' + else: + name = arr.name + field = {'name': name, 'type': as_json_table_type(dtype)} if is_categorical_dtype(arr): diff --git a/pandas/tests/io/json/test_json_table_schema.py b/pandas/tests/io/json/test_json_table_schema.py index 0f77a886dd302..c3a976973bb29 100644 --- a/pandas/tests/io/json/test_json_table_schema.py +++ b/pandas/tests/io/json/test_json_table_schema.py @@ -461,3 +461,11 @@ def test_overlapping_names(self): data.to_json(orient='table') assert 'Overlapping' in str(excinfo.value) + + def test_mi_falsey_name(self): + # GH 16203 + df = pd.DataFrame(np.random.randn(4, 4), + index=pd.MultiIndex.from_product([('A', 'B'), + ('a', 'b')])) + result = [x['name'] for x in build_table_schema(df)['fields']] + assert result == ['level_0', 'level_1', 0, 1, 2, 3] From f1549668c84b481a67547bb46261d1394c8dd3e7 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 3 May 2017 06:30:08 -0400 Subject: [PATCH 501/933] DOC: add whatsnew for v0.20.1 --- doc/source/whatsnew/v0.20.1.txt | 85 +++++++++++++++++++++++++++++++++ 1 file changed, 85 insertions(+) create mode 100644 doc/source/whatsnew/v0.20.1.txt diff --git a/doc/source/whatsnew/v0.20.1.txt b/doc/source/whatsnew/v0.20.1.txt new file mode 100644 index 0000000000000..504f8004bc8a6 --- /dev/null +++ b/doc/source/whatsnew/v0.20.1.txt @@ -0,0 +1,85 @@ +.. _whatsnew_0201: + +v0.20.1 (???) +------------- + +This is a minor bug-fix release in the 0.20.x series and includes some small regression fixes, +bug fixes and performance improvements. +We recommend that all users upgrade to this version. + +Highlights include: + +.. contents:: What's new in v0.20.1 + :local: + :backlinks: none + + +.. _whatsnew_0201.enhancements: + +Enhancements +~~~~~~~~~~~~ + + + +.. _whatsnew_0201.performance: + +Performance Improvements +~~~~~~~~~~~~~~~~~~~~~~~~ + + + +.. _whatsnew_0201.bug_fixes: + +Bug Fixes +~~~~~~~~~ + +Conversion +^^^^^^^^^^ + + + + +Indexing +^^^^^^^^ + + + + +I/O +^^^ + + + + +Plotting +^^^^^^^^ + + + + +Groupby/Resample/Rolling +^^^^^^^^^^^^^^^^^^^^^^^^ + + + + +Sparse +^^^^^^ + + + + +Reshaping +^^^^^^^^^ + + + + +Numeric +^^^^^^^ + + + + +Other +^^^^^ From 9051f0d8674c4e112f8f63bc186e96aa2bff22d6 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 3 May 2017 13:16:45 -0500 Subject: [PATCH 502/933] DOC: Remove various warnings from doc build (#16206) * DOC: start fixing warnings * DOC: Remove various warnings from doc build * fixup advanced --- doc/source/basics.rst | 2 ++ doc/source/cookbook.rst | 3 ++- doc/source/whatsnew/v0.19.0.txt | 2 +- doc/source/whatsnew/v0.20.0.txt | 35 ++++++++++++++++++--------------- doc/source/whatsnew/v0.8.0.txt | 2 -- pandas/core/indexes/interval.py | 2 +- pandas/io/formats/style.py | 2 ++ 7 files changed, 27 insertions(+), 21 deletions(-) diff --git a/doc/source/basics.rst b/doc/source/basics.rst index 7a056203ed447..134cc5106015b 100644 --- a/doc/source/basics.rst +++ b/doc/source/basics.rst @@ -1004,6 +1004,7 @@ Transform the entire frame. ``.transform()`` allows input functions as: a numpy function name or a user defined function. .. ipython:: python + :okwarning: tsdf.transform(np.abs) tsdf.transform('abs') @@ -1055,6 +1056,7 @@ Passing a dict of lists will generate a multi-indexed DataFrame with these selective transforms. .. ipython:: python + :okwarning: tsdf.transform({'A': np.abs, 'B': [lambda x: x+1, 'sqrt']}) diff --git a/doc/source/cookbook.rst b/doc/source/cookbook.rst index 8466b3d3c3297..62aa487069132 100644 --- a/doc/source/cookbook.rst +++ b/doc/source/cookbook.rst @@ -968,7 +968,8 @@ You can use the same approach to read all files matching a pattern. Here is an Finally, this strategy will work with the other ``pd.read_*(...)`` functions described in the :ref:`io docs`. .. ipython:: python - :supress: + :suppress: + for i in range(3): os.remove('file_{}.csv'.format(i)) diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt index 0c9bb029b9b68..bc5e278df743f 100644 --- a/doc/source/whatsnew/v0.19.0.txt +++ b/doc/source/whatsnew/v0.19.0.txt @@ -479,7 +479,7 @@ Other enhancements df.resample('M', on='date').sum() df.resample('M', level='d').sum() -- The ``.get_credentials()`` method of ``GbqConnector`` can now first try to fetch `the application default credentials `__. See the :ref:`docs ` for more details (:issue:`13577`). +- The ``.get_credentials()`` method of ``GbqConnector`` can now first try to fetch `the application default credentials `__. See the docs for more details (:issue:`13577`). - The ``.tz_localize()`` method of ``DatetimeIndex`` and ``Timestamp`` has gained the ``errors`` keyword, so you can potentially coerce nonexistent timestamps to ``NaT``. The default behavior remains to raising a ``NonExistentTimeError`` (:issue:`13057`) - ``.to_hdf/read_hdf()`` now accept path objects (e.g. ``pathlib.Path``, ``py.path.local``) for the file path (:issue:`11773`) - The ``pd.read_csv()`` with ``engine='python'`` has gained support for the diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 4882acbe820ea..230c7c0b90ac0 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -83,6 +83,7 @@ of all unique functions. Those that are not noted for a particular column will b The API also supports a ``.transform()`` function to provide for broadcasting results. .. ipython:: python + :okwarning: df.transform(['abs', lambda x: x - x.min()]) @@ -373,26 +374,28 @@ Experimental support has been added to export ``DataFrame.style`` formats to Exc For example, after running the following, ``styled.xlsx`` renders as below: .. ipython:: python + :okwarning: - np.random.seed(24) - df = pd.DataFrame({'A': np.linspace(1, 10, 10)}) - df = pd.concat([df, pd.DataFrame(np.random.RandomState(24).randn(10, 4), - columns=list('BCDE'))], - axis=1) - df.iloc[0, 2] = np.nan - df - styled = df.style.\ - applymap(lambda val: 'color: %s' % 'red' if val < 0 else 'black').\ - apply(lambda s: ['background-color: yellow' if v else '' - for v in s == s.max()]) - styled.to_excel('styled.xlsx', engine='openpyxl') + np.random.seed(24) + df = pd.DataFrame({'A': np.linspace(1, 10, 10)}) + df = pd.concat([df, pd.DataFrame(np.random.RandomState(24).randn(10, 4), + columns=list('BCDE'))], + axis=1) + df.iloc[0, 2] = np.nan + df + styled = df.style.\ + applymap(lambda val: 'color: %s' % 'red' if val < 0 else 'black').\ + apply(lambda s: ['background-color: yellow' if v else '' + for v in s == s.max()]) + styled.to_excel('styled.xlsx', engine='openpyxl') .. image:: _static/style-excel.png .. ipython:: python - :suppress: - import os - os.remove('styled.xlsx') + :suppress: + + import os + os.remove('styled.xlsx') See the :ref:`Style documentation ` for more detail. @@ -490,7 +493,7 @@ Other Enhancements - ``Series.interpolate()`` now supports timedelta as an index type with ``method='time'`` (:issue:`6424`) - Addition of a ``level`` keyword to ``DataFrame/Series.rename`` to rename labels in the specified level of a MultiIndex (:issue:`4160`). -- ``DataFrame.reset_index()`` will now interpret a tuple ``index.name`` as a key spanning across levels of ``columns``, if this is a ``MultiIndex`` (:issues:`16164`) +- ``DataFrame.reset_index()`` will now interpret a tuple ``index.name`` as a key spanning across levels of ``columns``, if this is a ``MultiIndex`` (:issue:`16164`) - ``Timedelta.isoformat`` method added for formatting Timedeltas as an `ISO 8601 duration`_. See the :ref:`Timedelta docs ` (:issue:`15136`) - ``.select_dtypes()`` now allows the string ``datetimetz`` to generically select datetimes with tz (:issue:`14910`) - The ``.to_latex()`` method will now accept ``multicolumn`` and ``multirow`` arguments to use the accompanying LaTeX enhancements diff --git a/doc/source/whatsnew/v0.8.0.txt b/doc/source/whatsnew/v0.8.0.txt index 4136c108fba57..b9cece752981e 100644 --- a/doc/source/whatsnew/v0.8.0.txt +++ b/doc/source/whatsnew/v0.8.0.txt @@ -168,7 +168,6 @@ New plotting methods fx['FR'].plot(style='g') - @savefig whatsnew_secondary_y.png fx['IT'].plot(style='k--', secondary_y=True) Vytautas Jancauskas, the 2012 GSOC participant, has added many new plot @@ -180,7 +179,6 @@ types. For example, ``'kde'`` is a new option: np.random.randn(1000) * 0.5 + 3))) plt.figure() s.hist(normed=True, alpha=0.2) - @savefig whatsnew_kde.png s.plot(kind='kde') See :ref:`the plotting page ` for much more. diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index f14e7bf6bd183..ccd0d8bee4abc 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -99,7 +99,7 @@ class IntervalIndex(IntervalMixin, Index): .. versionadded:: 0.20.0 - Properties + Attributes ---------- left, right : array-like (1-dimensional) Left and right bounds for each interval. diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index f1ff2966dca48..71c61998be092 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -982,7 +982,9 @@ def bar(self, subset=None, axis=0, color='#d65f5f', width=100, """ Color the background ``color`` proptional to the values in each column. Excludes non-numeric data by default. + .. versionadded:: 0.17.1 + Parameters ---------- subset: IndexSlice, default None From 67e7efccf88ca0532f219d5151156cd19e63df04 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 3 May 2017 13:23:28 -0500 Subject: [PATCH 503/933] API Change repr name for table schema (#16204) * API Change repr name for table schema Not API breaking, since pandas 0.20.0 hasn't been released yet. * REF: Move Formatter to printing * pep8 --- pandas/core/config_init.py | 33 ++---------------------- pandas/core/generic.py | 2 +- pandas/io/formats/printing.py | 32 +++++++++++++++++++++++ pandas/tests/io/formats/test_printing.py | 10 +++---- 4 files changed, 38 insertions(+), 39 deletions(-) diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py index 81fb8090a7afe..7e6ffaaffb72b 100644 --- a/pandas/core/config_init.py +++ b/pandas/core/config_init.py @@ -9,7 +9,6 @@ module is imported, register them here rather then in the module. """ -import sys import warnings import pandas.core.config as cf @@ -342,36 +341,8 @@ def mpl_style_cb(key): def table_schema_cb(key): - # first, check if we are in IPython - if 'IPython' not in sys.modules: - # definitely not in IPython - return - from IPython import get_ipython - ip = get_ipython() - if ip is None: - # still not in IPython - return - - formatters = ip.display_formatter.formatters - - mimetype = "application/vnd.dataresource+json" - - if cf.get_option(key): - if mimetype not in formatters: - # define tableschema formatter - from IPython.core.formatters import BaseFormatter - - class TableSchemaFormatter(BaseFormatter): - print_method = '_repr_table_schema_' - _return_type = (dict,) - # register it: - formatters[mimetype] = TableSchemaFormatter() - # enable it if it's been disabled: - formatters[mimetype].enabled = True - else: - # unregister tableschema mime-type - if mimetype in formatters: - formatters[mimetype].enabled = False + from pandas.io.formats.printing import _enable_data_resource_formatter + _enable_data_resource_formatter(cf.get_option(key)) with cf.config_prefix('display'): diff --git a/pandas/core/generic.py b/pandas/core/generic.py index b3498583f6e14..2bc64795b5f20 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -129,7 +129,7 @@ def __init__(self, data, axes=None, copy=False, dtype=None, object.__setattr__(self, '_data', data) object.__setattr__(self, '_item_cache', {}) - def _repr_table_schema_(self): + def _repr_data_resource_(self): """ Not a real Jupyter special repr method, but we use the same naming convention. diff --git a/pandas/io/formats/printing.py b/pandas/io/formats/printing.py index 5ea47df2c817f..cbad603630bd3 100644 --- a/pandas/io/formats/printing.py +++ b/pandas/io/formats/printing.py @@ -2,6 +2,7 @@ printing tools """ +import sys from pandas.core.dtypes.inference import is_sequence from pandas import compat from pandas.compat import u @@ -233,3 +234,34 @@ def as_escaped_unicode(thing, escape_chars=escape_chars): def pprint_thing_encoded(object, encoding='utf-8', errors='replace', **kwds): value = pprint_thing(object) # get unicode representation of object return value.encode(encoding, errors, **kwds) + + +def _enable_data_resource_formatter(enable): + if 'IPython' not in sys.modules: + # definitely not in IPython + return + from IPython import get_ipython + ip = get_ipython() + if ip is None: + # still not in IPython + return + + formatters = ip.display_formatter.formatters + mimetype = "application/vnd.dataresource+json" + + if enable: + if mimetype not in formatters: + # define tableschema formatter + from IPython.core.formatters import BaseFormatter + + class TableSchemaFormatter(BaseFormatter): + print_method = '_repr_data_resource_' + _return_type = (dict,) + # register it: + formatters[mimetype] = TableSchemaFormatter() + # enable it if it's been disabled: + formatters[mimetype].enabled = True + else: + # unregister tableschema mime-type + if mimetype in formatters: + formatters[mimetype].enabled = False diff --git a/pandas/tests/io/formats/test_printing.py b/pandas/tests/io/formats/test_printing.py index 3acd5c7a5e8c5..44fbd5a958d8c 100644 --- a/pandas/tests/io/formats/test_printing.py +++ b/pandas/tests/io/formats/test_printing.py @@ -180,23 +180,19 @@ def test_publishes_not_implemented(self): def test_config_on(self): df = pd.DataFrame({"A": [1, 2]}) with pd.option_context("display.html.table_schema", True): - result = df._repr_table_schema_() + result = df._repr_data_resource_() assert result is not None def test_config_default_off(self): df = pd.DataFrame({"A": [1, 2]}) with pd.option_context("display.html.table_schema", False): - result = df._repr_table_schema_() + result = df._repr_data_resource_() assert result is None - def test_config_monkeypatches(self): + def test_enable_data_resource_formatter(self): # GH 10491 - df = pd.DataFrame({"A": [1, 2]}) - assert not hasattr(df, '_ipython_display_') - assert not hasattr(df['A'], '_ipython_display_') - formatters = self.display_formatter.formatters mimetype = 'application/vnd.dataresource+json' From 02eafaf2b8e761779e31e185719c31b0778b5317 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 3 May 2017 21:18:41 +0200 Subject: [PATCH 504/933] DEPR: correct deprecation message for datetools (#16202) --- pandas/util/depr_module.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/util/depr_module.py b/pandas/util/depr_module.py index b438c91d980af..9c648b76fdad1 100644 --- a/pandas/util/depr_module.py +++ b/pandas/util/depr_module.py @@ -83,8 +83,7 @@ def __getattr__(self, name): FutureWarning, stacklevel=2) else: if deprmodto is None: - deprmodto = "{modname}.{name}".format( - modname=obj.__module__, name=name) + deprmodto = obj.__module__ # The object is actually located in another module. warnings.warn( "{deprmod}.{name} is deprecated. Please use " From 1002cc339d81591b280ec3ec3c8ac58ce157e3b6 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 3 May 2017 18:52:38 -0400 Subject: [PATCH 505/933] DOC: add whatsnew for 0.21.0 --- doc/source/whatsnew/v0.21.0.txt | 113 ++++++++++++++++++++++++++++++++ 1 file changed, 113 insertions(+) create mode 100644 doc/source/whatsnew/v0.21.0.txt diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt new file mode 100644 index 0000000000000..36dffc3d3378b --- /dev/null +++ b/doc/source/whatsnew/v0.21.0.txt @@ -0,0 +1,113 @@ +.. _whatsnew_0210: + +v0.21.0 (???) +------------- + +This is a major release from 0.20.x and includes a number of API changes, deprecations, new features, +enhancements, and performance improvements along with a large number of bug fixes. We recommend that all +users upgrade to this version. + +Highlights include: + +Check the :ref:`API Changes ` and :ref:`deprecations ` before updating. + +.. contents:: What's new in v0.21.0 + :local: + :backlinks: none + +.. _whatsnew_0210.enhancements: + +New features +~~~~~~~~~~~~ + + + +.. _whatsnew_0210.enhancements.other: + +Other Enhancements +^^^^^^^^^^^^^^^^^^ + + + +.. _whatsnew_0210.api_breaking: + +Backwards incompatible API changes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + +.. _whatsnew_0210.api: + +Other API Changes +^^^^^^^^^^^^^^^^^ + + + +.. _whatsnew_0210.deprecations: + +Deprecations +~~~~~~~~~~~~ + + + +.. _whatsnew_0210.prior_deprecations: + +Removal of prior version deprecations/changes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + +.. _whatsnew_0210.performance: + +Performance Improvements +~~~~~~~~~~~~~~~~~~~~~~~~ + + + +.. _whatsnew_0210.bug_fixes: + +Bug Fixes +~~~~~~~~~ + +Conversion +^^^^^^^^^^ + + + +Indexing +^^^^^^^^ + + + +I/O +^^^ + + + +Plotting +^^^^^^^^ + + + +Groupby/Resample/Rolling +^^^^^^^^^^^^^^^^^^^^^^^^ + + + +Sparse +^^^^^^ + + + +Reshaping +^^^^^^^^^ + + + +Numeric +^^^^^^^ + + + +Other +^^^^^ From 4a748ccf6086a07e5ce18066099c835b2beef31f Mon Sep 17 00:00:00 2001 From: gfyoung Date: Wed, 3 May 2017 22:38:12 -0400 Subject: [PATCH 506/933] MAINT: Complete Conversion to Pytest Idiom (#16201) * MAINT: Convert test setup/teardown to pytest idiom * tm.TestCase now just inherits from object * setUpClass renamed to setup_class * tearDownClass renamed to teardown_class * setUp renamed to setup_method * tearDown renamed to teardown_method * MAINT: Remove unittest.TestCase from testing * DOC: Update documentation for TestCase usage tm.TestCase no longer follows the nosetest idiom, so it is here to stay, so update the documentation to say that we are using it still. Closes gh-15990. * TST: Patch Circle matplotlib failure The tm.mplskip decorator was breaking on Circle, so this commit removes the decorator and replaces it with direct function calls to check for matplotlib. * TST: Replace yield-based tests in test_query_eval --- doc/source/contributing.rst | 8 - pandas/tests/computation/test_eval.py | 62 ++++---- pandas/tests/frame/test_asof.py | 2 +- pandas/tests/frame/test_indexing.py | 4 +- pandas/tests/frame/test_period.py | 2 +- pandas/tests/frame/test_query_eval.py | 148 ++++++------------ pandas/tests/frame/test_validate.py | 4 +- pandas/tests/groupby/common.py | 2 +- pandas/tests/groupby/test_aggregate.py | 2 +- pandas/tests/groupby/test_bin_groupby.py | 2 +- pandas/tests/groupby/test_filters.py | 2 +- pandas/tests/indexes/datetimes/test_astype.py | 2 +- .../indexes/datetimes/test_date_range.py | 4 +- .../indexes/datetimes/test_datetimelike.py | 2 +- pandas/tests/indexes/datetimes/test_ops.py | 8 +- pandas/tests/indexes/datetimes/test_setops.py | 4 +- pandas/tests/indexes/period/test_asfreq.py | 2 +- .../tests/indexes/period/test_construction.py | 4 +- pandas/tests/indexes/period/test_indexing.py | 2 +- pandas/tests/indexes/period/test_ops.py | 6 +- .../indexes/period/test_partial_slicing.py | 2 +- pandas/tests/indexes/period/test_period.py | 2 +- pandas/tests/indexes/period/test_setops.py | 2 +- pandas/tests/indexes/period/test_tools.py | 2 +- pandas/tests/indexes/test_base.py | 4 +- pandas/tests/indexes/test_category.py | 2 +- pandas/tests/indexes/test_frozen.py | 4 +- pandas/tests/indexes/test_interval.py | 4 +- pandas/tests/indexes/test_multi.py | 2 +- pandas/tests/indexes/test_numeric.py | 6 +- pandas/tests/indexes/test_range.py | 2 +- .../tests/indexes/timedeltas/test_astype.py | 2 +- pandas/tests/indexes/timedeltas/test_ops.py | 4 +- .../indexes/timedeltas/test_timedelta.py | 2 +- pandas/tests/indexing/common.py | 2 +- pandas/tests/indexing/test_categorical.py | 2 +- pandas/tests/indexing/test_coercion.py | 2 +- pandas/tests/indexing/test_interval.py | 2 +- pandas/tests/io/formats/test_format.py | 6 +- pandas/tests/io/formats/test_printing.py | 2 +- pandas/tests/io/formats/test_style.py | 9 +- .../tests/io/json/test_json_table_schema.py | 4 +- pandas/tests/io/json/test_pandas.py | 4 +- pandas/tests/io/json/test_ujson.py | 8 +- pandas/tests/io/parser/test_network.py | 2 +- pandas/tests/io/parser/test_parsers.py | 2 +- pandas/tests/io/parser/test_textreader.py | 2 +- pandas/tests/io/sas/test_sas7bdat.py | 2 +- pandas/tests/io/sas/test_xport.py | 2 +- pandas/tests/io/test_clipboard.py | 8 +- pandas/tests/io/test_common.py | 2 +- pandas/tests/io/test_excel.py | 20 +-- pandas/tests/io/test_gbq.py | 4 +- pandas/tests/io/test_html.py | 16 +- pandas/tests/io/test_packers.py | 28 ++-- pandas/tests/io/test_pytables.py | 12 +- pandas/tests/io/test_sql.py | 52 +++--- pandas/tests/io/test_stata.py | 2 +- pandas/tests/plotting/common.py | 8 +- pandas/tests/plotting/test_boxplot_method.py | 4 +- pandas/tests/plotting/test_converter.py | 4 +- pandas/tests/plotting/test_datetimelike.py | 9 +- pandas/tests/plotting/test_deprecated.py | 3 +- pandas/tests/plotting/test_frame.py | 7 +- pandas/tests/plotting/test_groupby.py | 3 +- pandas/tests/plotting/test_hist_method.py | 10 +- pandas/tests/plotting/test_misc.py | 8 +- pandas/tests/plotting/test_series.py | 7 +- pandas/tests/reshape/test_concat.py | 4 +- pandas/tests/reshape/test_hashing.py | 2 +- pandas/tests/reshape/test_join.py | 2 +- pandas/tests/reshape/test_merge.py | 4 +- pandas/tests/reshape/test_merge_asof.py | 2 +- pandas/tests/reshape/test_merge_ordered.py | 2 +- pandas/tests/reshape/test_pivot.py | 4 +- pandas/tests/reshape/test_reshape.py | 4 +- pandas/tests/scalar/test_interval.py | 2 +- pandas/tests/scalar/test_period.py | 2 +- pandas/tests/scalar/test_timedelta.py | 2 +- pandas/tests/scalar/test_timestamp.py | 2 +- pandas/tests/series/test_indexing.py | 6 +- pandas/tests/series/test_period.py | 2 +- pandas/tests/sparse/test_array.py | 2 +- pandas/tests/sparse/test_combine_concat.py | 2 +- pandas/tests/sparse/test_frame.py | 4 +- pandas/tests/sparse/test_groupby.py | 2 +- pandas/tests/sparse/test_indexing.py | 6 +- pandas/tests/sparse/test_list.py | 5 +- pandas/tests/sparse/test_pivot.py | 2 +- pandas/tests/sparse/test_series.py | 8 +- pandas/tests/test_algos.py | 2 +- pandas/tests/test_base.py | 8 +- pandas/tests/test_categorical.py | 4 +- pandas/tests/test_config.py | 9 +- pandas/tests/test_expressions.py | 4 +- pandas/tests/test_internals.py | 2 +- pandas/tests/test_multilevel.py | 2 +- pandas/tests/test_nanops.py | 10 +- pandas/tests/test_panel.py | 4 +- pandas/tests/test_panel4d.py | 6 +- pandas/tests/test_panelnd.py | 2 +- pandas/tests/test_resample.py | 8 +- pandas/tests/test_testing.py | 7 +- pandas/tests/test_util.py | 10 +- pandas/tests/test_window.py | 24 +-- pandas/tests/tseries/test_holiday.py | 6 +- pandas/tests/tseries/test_offsets.py | 18 +-- pandas/tests/tseries/test_timezones.py | 6 +- pandas/util/testing.py | 41 ++--- 109 files changed, 374 insertions(+), 440 deletions(-) diff --git a/doc/source/contributing.rst b/doc/source/contributing.rst index 08e28582e7469..26a2f56f3c1a1 100644 --- a/doc/source/contributing.rst +++ b/doc/source/contributing.rst @@ -632,14 +632,6 @@ framework that will facilitate testing and developing. Thus, instead of writing def test_really_cool_feature(): .... -Sometimes, it does make sense to bundle test functions together into a single class, either because the test file is testing multiple functions from a single module, and -using test classes allows for better organization. However, instead of inheriting from ``tm.TestCase``, we should just inherit from ``object``: - -.. code-block:: python - - class TestReallyCoolFeature(object): - .... - Using ``pytest`` ~~~~~~~~~~~~~~~~ diff --git a/pandas/tests/computation/test_eval.py b/pandas/tests/computation/test_eval.py index f8f84985142a8..5086b803419c6 100644 --- a/pandas/tests/computation/test_eval.py +++ b/pandas/tests/computation/test_eval.py @@ -98,8 +98,8 @@ def _is_py3_complex_incompat(result, expected): class TestEvalNumexprPandas(tm.TestCase): @classmethod - def setUpClass(cls): - super(TestEvalNumexprPandas, cls).setUpClass() + def setup_class(cls): + super(TestEvalNumexprPandas, cls).setup_class() tm.skip_if_no_ne() import numexpr as ne cls.ne = ne @@ -107,8 +107,8 @@ def setUpClass(cls): cls.parser = 'pandas' @classmethod - def tearDownClass(cls): - super(TestEvalNumexprPandas, cls).tearDownClass() + def teardown_class(cls): + super(TestEvalNumexprPandas, cls).teardown_class() del cls.engine, cls.parser if hasattr(cls, 'ne'): del cls.ne @@ -137,12 +137,12 @@ def setup_ops(self): self.arith_ops = _good_arith_ops self.unary_ops = '-', '~', 'not ' - def setUp(self): + def setup_method(self, method): self.setup_ops() self.setup_data() self.current_engines = filter(lambda x: x != self.engine, _engines) - def tearDown(self): + def teardown_method(self, method): del self.lhses, self.rhses, self.scalar_rhses, self.scalar_lhses del self.pandas_rhses, self.pandas_lhses, self.current_engines @@ -723,8 +723,8 @@ def test_float_truncation(self): class TestEvalNumexprPython(TestEvalNumexprPandas): @classmethod - def setUpClass(cls): - super(TestEvalNumexprPython, cls).setUpClass() + def setup_class(cls): + super(TestEvalNumexprPython, cls).setup_class() tm.skip_if_no_ne() import numexpr as ne cls.ne = ne @@ -750,8 +750,8 @@ def check_chained_cmp_op(self, lhs, cmp1, mid, cmp2, rhs): class TestEvalPythonPython(TestEvalNumexprPython): @classmethod - def setUpClass(cls): - super(TestEvalPythonPython, cls).setUpClass() + def setup_class(cls): + super(TestEvalPythonPython, cls).setup_class() cls.engine = 'python' cls.parser = 'python' @@ -780,8 +780,8 @@ def check_alignment(self, result, nlhs, ghs, op): class TestEvalPythonPandas(TestEvalPythonPython): @classmethod - def setUpClass(cls): - super(TestEvalPythonPandas, cls).setUpClass() + def setup_class(cls): + super(TestEvalPythonPandas, cls).setup_class() cls.engine = 'python' cls.parser = 'pandas' @@ -1070,16 +1070,16 @@ def test_performance_warning_for_poor_alignment(self, engine, parser): class TestOperationsNumExprPandas(tm.TestCase): @classmethod - def setUpClass(cls): - super(TestOperationsNumExprPandas, cls).setUpClass() + def setup_class(cls): + super(TestOperationsNumExprPandas, cls).setup_class() tm.skip_if_no_ne() cls.engine = 'numexpr' cls.parser = 'pandas' cls.arith_ops = expr._arith_ops_syms + expr._cmp_ops_syms @classmethod - def tearDownClass(cls): - super(TestOperationsNumExprPandas, cls).tearDownClass() + def teardown_class(cls): + super(TestOperationsNumExprPandas, cls).teardown_class() del cls.engine, cls.parser def eval(self, *args, **kwargs): @@ -1492,8 +1492,8 @@ def test_simple_in_ops(self): class TestOperationsNumExprPython(TestOperationsNumExprPandas): @classmethod - def setUpClass(cls): - super(TestOperationsNumExprPython, cls).setUpClass() + def setup_class(cls): + super(TestOperationsNumExprPython, cls).setup_class() cls.engine = 'numexpr' cls.parser = 'python' tm.skip_if_no_ne(cls.engine) @@ -1566,8 +1566,8 @@ def test_simple_bool_ops(self): class TestOperationsPythonPython(TestOperationsNumExprPython): @classmethod - def setUpClass(cls): - super(TestOperationsPythonPython, cls).setUpClass() + def setup_class(cls): + super(TestOperationsPythonPython, cls).setup_class() cls.engine = cls.parser = 'python' cls.arith_ops = expr._arith_ops_syms + expr._cmp_ops_syms cls.arith_ops = filter(lambda x: x not in ('in', 'not in'), @@ -1577,8 +1577,8 @@ def setUpClass(cls): class TestOperationsPythonPandas(TestOperationsNumExprPandas): @classmethod - def setUpClass(cls): - super(TestOperationsPythonPandas, cls).setUpClass() + def setup_class(cls): + super(TestOperationsPythonPandas, cls).setup_class() cls.engine = 'python' cls.parser = 'pandas' cls.arith_ops = expr._arith_ops_syms + expr._cmp_ops_syms @@ -1587,8 +1587,8 @@ def setUpClass(cls): class TestMathPythonPython(tm.TestCase): @classmethod - def setUpClass(cls): - super(TestMathPythonPython, cls).setUpClass() + def setup_class(cls): + super(TestMathPythonPython, cls).setup_class() tm.skip_if_no_ne() cls.engine = 'python' cls.parser = 'pandas' @@ -1596,7 +1596,7 @@ def setUpClass(cls): cls.binary_fns = _binary_math_ops @classmethod - def tearDownClass(cls): + def teardown_class(cls): del cls.engine, cls.parser def eval(self, *args, **kwargs): @@ -1694,8 +1694,8 @@ def test_keyword_arg(self): class TestMathPythonPandas(TestMathPythonPython): @classmethod - def setUpClass(cls): - super(TestMathPythonPandas, cls).setUpClass() + def setup_class(cls): + super(TestMathPythonPandas, cls).setup_class() cls.engine = 'python' cls.parser = 'pandas' @@ -1703,8 +1703,8 @@ def setUpClass(cls): class TestMathNumExprPandas(TestMathPythonPython): @classmethod - def setUpClass(cls): - super(TestMathNumExprPandas, cls).setUpClass() + def setup_class(cls): + super(TestMathNumExprPandas, cls).setup_class() cls.engine = 'numexpr' cls.parser = 'pandas' @@ -1712,8 +1712,8 @@ def setUpClass(cls): class TestMathNumExprPython(TestMathPythonPython): @classmethod - def setUpClass(cls): - super(TestMathNumExprPython, cls).setUpClass() + def setup_class(cls): + super(TestMathNumExprPython, cls).setup_class() cls.engine = 'numexpr' cls.parser = 'python' diff --git a/pandas/tests/frame/test_asof.py b/pandas/tests/frame/test_asof.py index ba3e239756f51..4207238f0cd4f 100644 --- a/pandas/tests/frame/test_asof.py +++ b/pandas/tests/frame/test_asof.py @@ -10,7 +10,7 @@ class TestFrameAsof(TestData, tm.TestCase): - def setUp(self): + def setup_method(self, method): self.N = N = 50 self.rng = date_range('1/1/1990', periods=N, freq='53s') self.df = DataFrame({'A': np.arange(N), 'B': np.arange(N)}, diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py index 75d4263cbe68f..42eb7148d616e 100644 --- a/pandas/tests/frame/test_indexing.py +++ b/pandas/tests/frame/test_indexing.py @@ -2914,7 +2914,7 @@ def test_type_error_multiindex(self): class TestDataFrameIndexingDatetimeWithTZ(tm.TestCase, TestData): - def setUp(self): + def setup_method(self, method): self.idx = Index(date_range('20130101', periods=3, tz='US/Eastern'), name='foo') self.dr = date_range('20130110', periods=3) @@ -2972,7 +2972,7 @@ def test_transpose(self): class TestDataFrameIndexingUInt64(tm.TestCase, TestData): - def setUp(self): + def setup_method(self, method): self.ir = Index(np.arange(3), dtype=np.uint64) self.idx = Index([2**63, 2**63 + 5, 2**63 + 10], name='foo') diff --git a/pandas/tests/frame/test_period.py b/pandas/tests/frame/test_period.py index 826ece2ed2c9b..49de3b8e8cd9b 100644 --- a/pandas/tests/frame/test_period.py +++ b/pandas/tests/frame/test_period.py @@ -14,7 +14,7 @@ def _permute(obj): class TestPeriodIndex(tm.TestCase): - def setUp(self): + def setup_method(self, method): pass def test_as_frame_columns(self): diff --git a/pandas/tests/frame/test_query_eval.py b/pandas/tests/frame/test_query_eval.py index 80db2c50c3eb6..6a06e3f4872ce 100644 --- a/pandas/tests/frame/test_query_eval.py +++ b/pandas/tests/frame/test_query_eval.py @@ -4,7 +4,6 @@ import operator import pytest -from itertools import product from pandas.compat import (zip, range, lrange, StringIO) from pandas import DataFrame, Series, Index, MultiIndex, date_range @@ -27,6 +26,16 @@ ENGINES = 'python', 'numexpr' +@pytest.fixture(params=PARSERS, ids=lambda x: x) +def parser(request): + return request.param + + +@pytest.fixture(params=ENGINES, ids=lambda x: x) +def engine(request): + return request.param + + def skip_if_no_pandas_parser(parser): if parser != 'pandas': pytest.skip("cannot evaluate with parser {0!r}".format(parser)) @@ -41,7 +50,7 @@ def skip_if_no_ne(engine='numexpr'): class TestCompat(tm.TestCase): - def setUp(self): + def setup_method(self, method): self.df = DataFrame({'A': [1, 2, 3]}) self.expected1 = self.df[self.df.A > 0] self.expected2 = self.df.A + 1 @@ -165,8 +174,9 @@ def test_eval_resolvers_as_list(self): class TestDataFrameQueryWithMultiIndex(tm.TestCase): - def check_query_with_named_multiindex(self, parser, engine): + def test_query_with_named_multiindex(self, parser, engine): tm.skip_if_no_ne(engine) + skip_if_no_pandas_parser(parser) a = np.random.choice(['red', 'green'], size=10) b = np.random.choice(['eggs', 'ham'], size=10) index = MultiIndex.from_arrays([a, b], names=['color', 'food']) @@ -214,12 +224,9 @@ def check_query_with_named_multiindex(self, parser, engine): assert_frame_equal(res1, exp) assert_frame_equal(res2, exp) - def test_query_with_named_multiindex(self): - for parser, engine in product(['pandas'], ENGINES): - yield self.check_query_with_named_multiindex, parser, engine - - def check_query_with_unnamed_multiindex(self, parser, engine): + def test_query_with_unnamed_multiindex(self, parser, engine): tm.skip_if_no_ne(engine) + skip_if_no_pandas_parser(parser) a = np.random.choice(['red', 'green'], size=10) b = np.random.choice(['eggs', 'ham'], size=10) index = MultiIndex.from_arrays([a, b]) @@ -308,12 +315,9 @@ def check_query_with_unnamed_multiindex(self, parser, engine): assert_frame_equal(res1, exp) assert_frame_equal(res2, exp) - def test_query_with_unnamed_multiindex(self): - for parser, engine in product(['pandas'], ENGINES): - yield self.check_query_with_unnamed_multiindex, parser, engine - - def check_query_with_partially_named_multiindex(self, parser, engine): + def test_query_with_partially_named_multiindex(self, parser, engine): tm.skip_if_no_ne(engine) + skip_if_no_pandas_parser(parser) a = np.random.choice(['red', 'green'], size=10) b = np.arange(10) index = MultiIndex.from_arrays([a, b]) @@ -341,17 +345,7 @@ def check_query_with_partially_named_multiindex(self, parser, engine): exp = df[ind != "red"] assert_frame_equal(res, exp) - def test_query_with_partially_named_multiindex(self): - for parser, engine in product(['pandas'], ENGINES): - yield (self.check_query_with_partially_named_multiindex, - parser, engine) - def test_query_multiindex_get_index_resolvers(self): - for parser, engine in product(['pandas'], ENGINES): - yield (self.check_query_multiindex_get_index_resolvers, parser, - engine) - - def check_query_multiindex_get_index_resolvers(self, parser, engine): df = mkdf(10, 3, r_idx_nlevels=2, r_idx_names=['spam', 'eggs']) resolvers = df._get_index_resolvers() @@ -375,22 +369,14 @@ def to_series(mi, level): else: raise AssertionError("object must be a Series or Index") - def test_raise_on_panel_with_multiindex(self): - for parser, engine in product(PARSERS, ENGINES): - yield self.check_raise_on_panel_with_multiindex, parser, engine - - def check_raise_on_panel_with_multiindex(self, parser, engine): + def test_raise_on_panel_with_multiindex(self, parser, engine): tm.skip_if_no_ne() p = tm.makePanel(7) p.items = tm.makeCustomIndex(len(p.items), nlevels=2) with pytest.raises(NotImplementedError): pd.eval('p + 1', parser=parser, engine=engine) - def test_raise_on_panel4d_with_multiindex(self): - for parser, engine in product(PARSERS, ENGINES): - yield self.check_raise_on_panel4d_with_multiindex, parser, engine - - def check_raise_on_panel4d_with_multiindex(self, parser, engine): + def test_raise_on_panel4d_with_multiindex(self, parser, engine): tm.skip_if_no_ne() p4d = tm.makePanel4D(7) p4d.items = tm.makeCustomIndex(len(p4d.items), nlevels=2) @@ -401,15 +387,15 @@ def check_raise_on_panel4d_with_multiindex(self, parser, engine): class TestDataFrameQueryNumExprPandas(tm.TestCase): @classmethod - def setUpClass(cls): - super(TestDataFrameQueryNumExprPandas, cls).setUpClass() + def setup_class(cls): + super(TestDataFrameQueryNumExprPandas, cls).setup_class() cls.engine = 'numexpr' cls.parser = 'pandas' tm.skip_if_no_ne(cls.engine) @classmethod - def tearDownClass(cls): - super(TestDataFrameQueryNumExprPandas, cls).tearDownClass() + def teardown_class(cls): + super(TestDataFrameQueryNumExprPandas, cls).teardown_class() del cls.engine, cls.parser def test_date_query_with_attribute_access(self): @@ -733,8 +719,8 @@ def test_inf(self): class TestDataFrameQueryNumExprPython(TestDataFrameQueryNumExprPandas): @classmethod - def setUpClass(cls): - super(TestDataFrameQueryNumExprPython, cls).setUpClass() + def setup_class(cls): + super(TestDataFrameQueryNumExprPython, cls).setup_class() cls.engine = 'numexpr' cls.parser = 'python' tm.skip_if_no_ne(cls.engine) @@ -834,8 +820,8 @@ def test_nested_scope(self): class TestDataFrameQueryPythonPandas(TestDataFrameQueryNumExprPandas): @classmethod - def setUpClass(cls): - super(TestDataFrameQueryPythonPandas, cls).setUpClass() + def setup_class(cls): + super(TestDataFrameQueryPythonPandas, cls).setup_class() cls.engine = 'python' cls.parser = 'pandas' cls.frame = TestData().frame @@ -855,8 +841,8 @@ def test_query_builtin(self): class TestDataFrameQueryPythonPython(TestDataFrameQueryNumExprPython): @classmethod - def setUpClass(cls): - super(TestDataFrameQueryPythonPython, cls).setUpClass() + def setup_class(cls): + super(TestDataFrameQueryPythonPython, cls).setup_class() cls.engine = cls.parser = 'python' cls.frame = TestData().frame @@ -874,7 +860,7 @@ def test_query_builtin(self): class TestDataFrameQueryStrings(tm.TestCase): - def check_str_query_method(self, parser, engine): + def test_str_query_method(self, parser, engine): tm.skip_if_no_ne(engine) df = DataFrame(randn(10, 1), columns=['b']) df['strings'] = Series(list('aabbccddee')) @@ -911,15 +897,7 @@ def check_str_query_method(self, parser, engine): assert_frame_equal(res, expect) assert_frame_equal(res, df[~df.strings.isin(['a'])]) - def test_str_query_method(self): - for parser, engine in product(PARSERS, ENGINES): - yield self.check_str_query_method, parser, engine - - def test_str_list_query_method(self): - for parser, engine in product(PARSERS, ENGINES): - yield self.check_str_list_query_method, parser, engine - - def check_str_list_query_method(self, parser, engine): + def test_str_list_query_method(self, parser, engine): tm.skip_if_no_ne(engine) df = DataFrame(randn(10, 1), columns=['b']) df['strings'] = Series(list('aabbccddee')) @@ -958,7 +936,7 @@ def check_str_list_query_method(self, parser, engine): parser=parser) assert_frame_equal(res, expect) - def check_query_with_string_columns(self, parser, engine): + def test_query_with_string_columns(self, parser, engine): tm.skip_if_no_ne(engine) df = DataFrame({'a': list('aaaabbbbcccc'), 'b': list('aabbccddeeff'), @@ -979,11 +957,7 @@ def check_query_with_string_columns(self, parser, engine): with pytest.raises(NotImplementedError): df.query('a in b and c < d', parser=parser, engine=engine) - def test_query_with_string_columns(self): - for parser, engine in product(PARSERS, ENGINES): - yield self.check_query_with_string_columns, parser, engine - - def check_object_array_eq_ne(self, parser, engine): + def test_object_array_eq_ne(self, parser, engine): tm.skip_if_no_ne(engine) df = DataFrame({'a': list('aaaabbbbcccc'), 'b': list('aabbccddeeff'), @@ -997,11 +971,7 @@ def check_object_array_eq_ne(self, parser, engine): exp = df[df.a != df.b] assert_frame_equal(res, exp) - def test_object_array_eq_ne(self): - for parser, engine in product(PARSERS, ENGINES): - yield self.check_object_array_eq_ne, parser, engine - - def check_query_with_nested_strings(self, parser, engine): + def test_query_with_nested_strings(self, parser, engine): tm.skip_if_no_ne(engine) skip_if_no_pandas_parser(parser) raw = """id event timestamp @@ -1025,11 +995,7 @@ def check_query_with_nested_strings(self, parser, engine): engine=engine) assert_frame_equal(expected, res) - def test_query_with_nested_string(self): - for parser, engine in product(PARSERS, ENGINES): - yield self.check_query_with_nested_strings, parser, engine - - def check_query_with_nested_special_character(self, parser, engine): + def test_query_with_nested_special_character(self, parser, engine): skip_if_no_pandas_parser(parser) tm.skip_if_no_ne(engine) df = DataFrame({'a': ['a', 'b', 'test & test'], @@ -1038,12 +1004,7 @@ def check_query_with_nested_special_character(self, parser, engine): expec = df[df.a == 'test & test'] assert_frame_equal(res, expec) - def test_query_with_nested_special_character(self): - for parser, engine in product(PARSERS, ENGINES): - yield (self.check_query_with_nested_special_character, - parser, engine) - - def check_query_lex_compare_strings(self, parser, engine): + def test_query_lex_compare_strings(self, parser, engine): tm.skip_if_no_ne(engine=engine) import operator as opr @@ -1058,11 +1019,7 @@ def check_query_lex_compare_strings(self, parser, engine): expected = df[func(df.X, 'd')] assert_frame_equal(res, expected) - def test_query_lex_compare_strings(self): - for parser, engine in product(PARSERS, ENGINES): - yield self.check_query_lex_compare_strings, parser, engine - - def check_query_single_element_booleans(self, parser, engine): + def test_query_single_element_booleans(self, parser, engine): tm.skip_if_no_ne(engine) columns = 'bid', 'bidsize', 'ask', 'asksize' data = np.random.randint(2, size=(1, len(columns))).astype(bool) @@ -1071,12 +1028,9 @@ def check_query_single_element_booleans(self, parser, engine): expected = df[df.bid & df.ask] assert_frame_equal(res, expected) - def test_query_single_element_booleans(self): - for parser, engine in product(PARSERS, ENGINES): - yield self.check_query_single_element_booleans, parser, engine - - def check_query_string_scalar_variable(self, parser, engine): + def test_query_string_scalar_variable(self, parser, engine): tm.skip_if_no_ne(engine) + skip_if_no_pandas_parser(parser) df = pd.DataFrame({'Symbol': ['BUD US', 'BUD US', 'IBM US', 'IBM US'], 'Price': [109.70, 109.72, 183.30, 183.35]}) e = df[df.Symbol == 'BUD US'] @@ -1084,24 +1038,20 @@ def check_query_string_scalar_variable(self, parser, engine): r = df.query('Symbol == @symb', parser=parser, engine=engine) assert_frame_equal(e, r) - def test_query_string_scalar_variable(self): - for parser, engine in product(['pandas'], ENGINES): - yield self.check_query_string_scalar_variable, parser, engine - class TestDataFrameEvalNumExprPandas(tm.TestCase): @classmethod - def setUpClass(cls): - super(TestDataFrameEvalNumExprPandas, cls).setUpClass() + def setup_class(cls): + super(TestDataFrameEvalNumExprPandas, cls).setup_class() cls.engine = 'numexpr' cls.parser = 'pandas' tm.skip_if_no_ne() - def setUp(self): + def setup_method(self, method): self.frame = DataFrame(randn(10, 3), columns=list('abc')) - def tearDown(self): + def teardown_method(self, method): del self.frame def test_simple_expr(self): @@ -1129,8 +1079,8 @@ def test_invalid_type_for_operator_raises(self): class TestDataFrameEvalNumExprPython(TestDataFrameEvalNumExprPandas): @classmethod - def setUpClass(cls): - super(TestDataFrameEvalNumExprPython, cls).setUpClass() + def setup_class(cls): + super(TestDataFrameEvalNumExprPython, cls).setup_class() cls.engine = 'numexpr' cls.parser = 'python' tm.skip_if_no_ne(cls.engine) @@ -1139,8 +1089,8 @@ def setUpClass(cls): class TestDataFrameEvalPythonPandas(TestDataFrameEvalNumExprPandas): @classmethod - def setUpClass(cls): - super(TestDataFrameEvalPythonPandas, cls).setUpClass() + def setup_class(cls): + super(TestDataFrameEvalPythonPandas, cls).setup_class() cls.engine = 'python' cls.parser = 'pandas' @@ -1148,6 +1098,6 @@ def setUpClass(cls): class TestDataFrameEvalPythonPython(TestDataFrameEvalNumExprPython): @classmethod - def setUpClass(cls): - super(TestDataFrameEvalPythonPython, cls).tearDownClass() + def setup_class(cls): + super(TestDataFrameEvalPythonPython, cls).teardown_class() cls.engine = cls.parser = 'python' diff --git a/pandas/tests/frame/test_validate.py b/pandas/tests/frame/test_validate.py index 4c4abb7e58e75..343853b3fcfa0 100644 --- a/pandas/tests/frame/test_validate.py +++ b/pandas/tests/frame/test_validate.py @@ -1,10 +1,10 @@ -from unittest import TestCase from pandas.core.frame import DataFrame +import pandas.util.testing as tm import pytest -class TestDataFrameValidate(TestCase): +class TestDataFrameValidate(tm.TestCase): """Tests for error handling related to data types of method arguments.""" df = DataFrame({'a': [1, 2], 'b': [3, 4]}) diff --git a/pandas/tests/groupby/common.py b/pandas/tests/groupby/common.py index f3dccf473f53a..3e99e8211b4f8 100644 --- a/pandas/tests/groupby/common.py +++ b/pandas/tests/groupby/common.py @@ -28,7 +28,7 @@ def df(): class MixIn(object): - def setUp(self): + def setup_method(self, method): self.ts = tm.makeTimeSeries() self.seriesd = tm.getSeriesData() diff --git a/pandas/tests/groupby/test_aggregate.py b/pandas/tests/groupby/test_aggregate.py index 310a5aca77b77..769e4d14d354b 100644 --- a/pandas/tests/groupby/test_aggregate.py +++ b/pandas/tests/groupby/test_aggregate.py @@ -27,7 +27,7 @@ class TestGroupByAggregate(tm.TestCase): - def setUp(self): + def setup_method(self, method): self.ts = tm.makeTimeSeries() self.seriesd = tm.getSeriesData() diff --git a/pandas/tests/groupby/test_bin_groupby.py b/pandas/tests/groupby/test_bin_groupby.py index 320acacff483c..bdac535b3d2e2 100644 --- a/pandas/tests/groupby/test_bin_groupby.py +++ b/pandas/tests/groupby/test_bin_groupby.py @@ -48,7 +48,7 @@ def test_series_bin_grouper(): class TestBinGroupers(tm.TestCase): - def setUp(self): + def setup_method(self, method): self.obj = np.random.randn(10, 1) self.labels = np.array([0, 0, 0, 1, 1, 1, 2, 2, 2, 2], dtype=np.int64) self.bins = np.array([3, 6], dtype=np.int64) diff --git a/pandas/tests/groupby/test_filters.py b/pandas/tests/groupby/test_filters.py index 2cfbe0ab68c8e..b05b938fd8205 100644 --- a/pandas/tests/groupby/test_filters.py +++ b/pandas/tests/groupby/test_filters.py @@ -25,7 +25,7 @@ class TestGroupByFilter(tm.TestCase): - def setUp(self): + def setup_method(self, method): self.ts = tm.makeTimeSeries() self.seriesd = tm.getSeriesData() diff --git a/pandas/tests/indexes/datetimes/test_astype.py b/pandas/tests/indexes/datetimes/test_astype.py index 1c8189d0c75ac..185787d75f6e1 100644 --- a/pandas/tests/indexes/datetimes/test_astype.py +++ b/pandas/tests/indexes/datetimes/test_astype.py @@ -187,7 +187,7 @@ def _check_rng(rng): class TestToPeriod(tm.TestCase): - def setUp(self): + def setup_method(self, method): data = [Timestamp('2007-01-01 10:11:12.123456Z'), Timestamp('2007-01-01 10:11:13.789123Z')] self.index = DatetimeIndex(data) diff --git a/pandas/tests/indexes/datetimes/test_date_range.py b/pandas/tests/indexes/datetimes/test_date_range.py index a9fdd40406770..67d6b0f314ecb 100644 --- a/pandas/tests/indexes/datetimes/test_date_range.py +++ b/pandas/tests/indexes/datetimes/test_date_range.py @@ -198,7 +198,7 @@ def test_precision_finer_than_offset(self): class TestBusinessDateRange(tm.TestCase): - def setUp(self): + def setup_method(self, method): self.rng = bdate_range(START, END) def test_constructor(self): @@ -483,7 +483,7 @@ def test_freq_divides_end_in_nanos(self): class TestCustomDateRange(tm.TestCase): - def setUp(self): + def setup_method(self, method): self.rng = cdate_range(START, END) def test_constructor(self): diff --git a/pandas/tests/indexes/datetimes/test_datetimelike.py b/pandas/tests/indexes/datetimes/test_datetimelike.py index 0eb565bf0ec55..2e184b1aa4e51 100644 --- a/pandas/tests/indexes/datetimes/test_datetimelike.py +++ b/pandas/tests/indexes/datetimes/test_datetimelike.py @@ -11,7 +11,7 @@ class TestDatetimeIndex(DatetimeLike, tm.TestCase): _holder = DatetimeIndex - def setUp(self): + def setup_method(self, method): self.indices = dict(index=tm.makeDateIndex(10)) self.setup_indices() diff --git a/pandas/tests/indexes/datetimes/test_ops.py b/pandas/tests/indexes/datetimes/test_ops.py index e25e3d448190e..75c6626b47401 100644 --- a/pandas/tests/indexes/datetimes/test_ops.py +++ b/pandas/tests/indexes/datetimes/test_ops.py @@ -23,8 +23,8 @@ class TestDatetimeIndexOps(Ops): tz = [None, 'UTC', 'Asia/Tokyo', 'US/Eastern', 'dateutil/Asia/Singapore', 'dateutil/US/Pacific'] - def setUp(self): - super(TestDatetimeIndexOps, self).setUp() + def setup_method(self, method): + super(TestDatetimeIndexOps, self).setup_method(method) mask = lambda x: (isinstance(x, DatetimeIndex) or isinstance(x, PeriodIndex)) self.is_valid_objs = [o for o in self.objs if mask(o)] @@ -1109,7 +1109,7 @@ def test_shift_months(years, months): class TestBusinessDatetimeIndex(tm.TestCase): - def setUp(self): + def setup_method(self, method): self.rng = bdate_range(START, END) def test_comparison(self): @@ -1209,7 +1209,7 @@ def test_identical(self): class TestCustomDatetimeIndex(tm.TestCase): - def setUp(self): + def setup_method(self, method): self.rng = cdate_range(START, END) def test_comparison(self): diff --git a/pandas/tests/indexes/datetimes/test_setops.py b/pandas/tests/indexes/datetimes/test_setops.py index b25fdaf6be3b0..fb4b6e9d226f8 100644 --- a/pandas/tests/indexes/datetimes/test_setops.py +++ b/pandas/tests/indexes/datetimes/test_setops.py @@ -201,7 +201,7 @@ def test_join_nonunique(self): class TestBusinessDatetimeIndex(tm.TestCase): - def setUp(self): + def setup_method(self, method): self.rng = bdate_range(START, END) def test_union(self): @@ -345,7 +345,7 @@ def test_month_range_union_tz_dateutil(self): class TestCustomDatetimeIndex(tm.TestCase): - def setUp(self): + def setup_method(self, method): self.rng = cdate_range(START, END) def test_union(self): diff --git a/pandas/tests/indexes/period/test_asfreq.py b/pandas/tests/indexes/period/test_asfreq.py index f9effd3d1aea6..b97be3f61a2dd 100644 --- a/pandas/tests/indexes/period/test_asfreq.py +++ b/pandas/tests/indexes/period/test_asfreq.py @@ -8,7 +8,7 @@ class TestPeriodIndex(tm.TestCase): - def setUp(self): + def setup_method(self, method): pass def test_asfreq(self): diff --git a/pandas/tests/indexes/period/test_construction.py b/pandas/tests/indexes/period/test_construction.py index a95ad808cadce..b0db27b5f2cea 100644 --- a/pandas/tests/indexes/period/test_construction.py +++ b/pandas/tests/indexes/period/test_construction.py @@ -11,7 +11,7 @@ class TestPeriodIndex(tm.TestCase): - def setUp(self): + def setup_method(self, method): pass def test_construction_base_constructor(self): @@ -475,7 +475,7 @@ def test_map_with_string_constructor(self): class TestSeriesPeriod(tm.TestCase): - def setUp(self): + def setup_method(self, method): self.series = Series(period_range('2000-01-01', periods=10, freq='D')) def test_constructor_cant_cast_period(self): diff --git a/pandas/tests/indexes/period/test_indexing.py b/pandas/tests/indexes/period/test_indexing.py index ebbe05d51598c..36db56b751633 100644 --- a/pandas/tests/indexes/period/test_indexing.py +++ b/pandas/tests/indexes/period/test_indexing.py @@ -13,7 +13,7 @@ class TestGetItem(tm.TestCase): - def setUp(self): + def setup_method(self, method): pass def test_getitem(self): diff --git a/pandas/tests/indexes/period/test_ops.py b/pandas/tests/indexes/period/test_ops.py index fb688bda58ae8..583848f75c6b4 100644 --- a/pandas/tests/indexes/period/test_ops.py +++ b/pandas/tests/indexes/period/test_ops.py @@ -15,8 +15,8 @@ class TestPeriodIndexOps(Ops): - def setUp(self): - super(TestPeriodIndexOps, self).setUp() + def setup_method(self, method): + super(TestPeriodIndexOps, self).setup_method(method) mask = lambda x: (isinstance(x, DatetimeIndex) or isinstance(x, PeriodIndex)) self.is_valid_objs = [o for o in self.objs if mask(o)] @@ -1137,7 +1137,7 @@ def test_pi_comp_period_nat(self): class TestSeriesPeriod(tm.TestCase): - def setUp(self): + def setup_method(self, method): self.series = Series(period_range('2000-01-01', periods=10, freq='D')) def test_ops_series_timedelta(self): diff --git a/pandas/tests/indexes/period/test_partial_slicing.py b/pandas/tests/indexes/period/test_partial_slicing.py index 04b4e6795e770..88a9ff5752322 100644 --- a/pandas/tests/indexes/period/test_partial_slicing.py +++ b/pandas/tests/indexes/period/test_partial_slicing.py @@ -10,7 +10,7 @@ class TestPeriodIndex(tm.TestCase): - def setUp(self): + def setup_method(self, method): pass def test_slice_with_negative_step(self): diff --git a/pandas/tests/indexes/period/test_period.py b/pandas/tests/indexes/period/test_period.py index 6ec567509cd76..11ec3bc215cf8 100644 --- a/pandas/tests/indexes/period/test_period.py +++ b/pandas/tests/indexes/period/test_period.py @@ -17,7 +17,7 @@ class TestPeriodIndex(DatetimeLike, tm.TestCase): _holder = PeriodIndex _multiprocess_can_split_ = True - def setUp(self): + def setup_method(self, method): self.indices = dict(index=tm.makePeriodIndex(10)) self.setup_indices() diff --git a/pandas/tests/indexes/period/test_setops.py b/pandas/tests/indexes/period/test_setops.py index 025ee7e732a7c..7041724faeb89 100644 --- a/pandas/tests/indexes/period/test_setops.py +++ b/pandas/tests/indexes/period/test_setops.py @@ -14,7 +14,7 @@ def _permute(obj): class TestPeriodIndex(tm.TestCase): - def setUp(self): + def setup_method(self, method): pass def test_joins(self): diff --git a/pandas/tests/indexes/period/test_tools.py b/pandas/tests/indexes/period/test_tools.py index 9e5994dd54f50..bd80c2c4f341e 100644 --- a/pandas/tests/indexes/period/test_tools.py +++ b/pandas/tests/indexes/period/test_tools.py @@ -152,7 +152,7 @@ def test_period_ordinal_business_day(self): class TestPeriodIndex(tm.TestCase): - def setUp(self): + def setup_method(self, method): pass def test_tolist(self): diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 10958681af450..ce3f4b5d68d89 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -32,7 +32,7 @@ class TestIndex(Base, tm.TestCase): _holder = Index - def setUp(self): + def setup_method(self, method): self.indices = dict(unicodeIndex=tm.makeUnicodeIndex(100), strIndex=tm.makeStringIndex(100), dateIndex=tm.makeDateIndex(100), @@ -1808,7 +1808,7 @@ class TestMixedIntIndex(Base, tm.TestCase): _holder = Index - def setUp(self): + def setup_method(self, method): self.indices = dict(mixedIndex=Index([0, 'a', 1, 'b', 2, 'c'])) self.setup_indices() diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py index 6a2eea0b84b72..94349b4860698 100644 --- a/pandas/tests/indexes/test_category.py +++ b/pandas/tests/indexes/test_category.py @@ -22,7 +22,7 @@ class TestCategoricalIndex(Base, tm.TestCase): _holder = CategoricalIndex - def setUp(self): + def setup_method(self, method): self.indices = dict(catIndex=tm.makeCategoricalIndex(100)) self.setup_indices() diff --git a/pandas/tests/indexes/test_frozen.py b/pandas/tests/indexes/test_frozen.py index ed2e3d94aa4a4..ae4a130c24310 100644 --- a/pandas/tests/indexes/test_frozen.py +++ b/pandas/tests/indexes/test_frozen.py @@ -9,7 +9,7 @@ class TestFrozenList(CheckImmutable, CheckStringMixin, tm.TestCase): mutable_methods = ('extend', 'pop', 'remove', 'insert') unicode_container = FrozenList([u("\u05d0"), u("\u05d1"), "c"]) - def setUp(self): + def setup_method(self, method): self.lst = [1, 2, 3, 4, 5] self.container = FrozenList(self.lst) self.klass = FrozenList @@ -35,7 +35,7 @@ class TestFrozenNDArray(CheckImmutable, CheckStringMixin, tm.TestCase): mutable_methods = ('put', 'itemset', 'fill') unicode_container = FrozenNDArray([u("\u05d0"), u("\u05d1"), "c"]) - def setUp(self): + def setup_method(self, method): self.lst = [3, 5, 7, -2] self.container = FrozenNDArray(self.lst) self.klass = FrozenNDArray diff --git a/pandas/tests/indexes/test_interval.py b/pandas/tests/indexes/test_interval.py index 00897f290f292..90e5b1b6c9788 100644 --- a/pandas/tests/indexes/test_interval.py +++ b/pandas/tests/indexes/test_interval.py @@ -15,7 +15,7 @@ class TestIntervalIndex(Base, tm.TestCase): _holder = IntervalIndex - def setUp(self): + def setup_method(self, method): self.index = IntervalIndex.from_arrays([0, 1], [1, 2]) self.index_with_nan = IntervalIndex.from_tuples( [(0, 1), np.nan, (1, 2)]) @@ -721,7 +721,7 @@ def f(): class TestIntervalTree(tm.TestCase): - def setUp(self): + def setup_method(self, method): gentree = lambda dtype: IntervalTree(np.arange(5, dtype=dtype), np.arange(5, dtype=dtype) + 2) self.tree = gentree('int64') diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index a840711e37fb0..d2024340c522e 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -31,7 +31,7 @@ class TestMultiIndex(Base, tm.TestCase): _holder = MultiIndex _compat_props = ['shape', 'ndim', 'size', 'itemsize'] - def setUp(self): + def setup_method(self, method): major_axis = Index(['foo', 'bar', 'baz', 'qux']) minor_axis = Index(['one', 'two']) diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py index 428c261df5654..e82b1c5e74543 100644 --- a/pandas/tests/indexes/test_numeric.py +++ b/pandas/tests/indexes/test_numeric.py @@ -179,7 +179,7 @@ def test_modulo(self): class TestFloat64Index(Numeric, tm.TestCase): _holder = Float64Index - def setUp(self): + def setup_method(self, method): self.indices = dict(mixed=Float64Index([1.5, 2, 3, 4, 5]), float=Float64Index(np.arange(5) * 2.5)) self.setup_indices() @@ -625,7 +625,7 @@ class TestInt64Index(NumericInt, tm.TestCase): _dtype = 'int64' _holder = Int64Index - def setUp(self): + def setup_method(self, method): self.indices = dict(index=Int64Index(np.arange(0, 20, 2))) self.setup_indices() @@ -920,7 +920,7 @@ class TestUInt64Index(NumericInt, tm.TestCase): _dtype = 'uint64' _holder = UInt64Index - def setUp(self): + def setup_method(self, method): self.indices = dict(index=UInt64Index([2**63, 2**63 + 10, 2**63 + 15, 2**63 + 20, 2**63 + 25])) self.setup_indices() diff --git a/pandas/tests/indexes/test_range.py b/pandas/tests/indexes/test_range.py index 0379718b004e1..cc3a76aa7cac1 100644 --- a/pandas/tests/indexes/test_range.py +++ b/pandas/tests/indexes/test_range.py @@ -24,7 +24,7 @@ class TestRangeIndex(Numeric, tm.TestCase): _holder = RangeIndex _compat_props = ['shape', 'ndim', 'size', 'itemsize'] - def setUp(self): + def setup_method(self, method): self.indices = dict(index=RangeIndex(0, 20, 2, name='foo')) self.setup_indices() diff --git a/pandas/tests/indexes/timedeltas/test_astype.py b/pandas/tests/indexes/timedeltas/test_astype.py index 6e82f165e4909..b9720f4a300d1 100644 --- a/pandas/tests/indexes/timedeltas/test_astype.py +++ b/pandas/tests/indexes/timedeltas/test_astype.py @@ -14,7 +14,7 @@ class TestTimedeltaIndex(DatetimeLike, tm.TestCase): _holder = TimedeltaIndex _multiprocess_can_split_ = True - def setUp(self): + def setup_method(self, method): self.indices = dict(index=tm.makeTimedeltaIndex(10)) self.setup_indices() diff --git a/pandas/tests/indexes/timedeltas/test_ops.py b/pandas/tests/indexes/timedeltas/test_ops.py index 474dd283530c5..12d29dc00e273 100644 --- a/pandas/tests/indexes/timedeltas/test_ops.py +++ b/pandas/tests/indexes/timedeltas/test_ops.py @@ -16,8 +16,8 @@ class TestTimedeltaIndexOps(Ops): - def setUp(self): - super(TestTimedeltaIndexOps, self).setUp() + def setup_method(self, method): + super(TestTimedeltaIndexOps, self).setup_method(method) mask = lambda x: isinstance(x, TimedeltaIndex) self.is_valid_objs = [o for o in self.objs if mask(o)] self.not_valid_objs = [] diff --git a/pandas/tests/indexes/timedeltas/test_timedelta.py b/pandas/tests/indexes/timedeltas/test_timedelta.py index d1379973dfec5..933674c425cd8 100644 --- a/pandas/tests/indexes/timedeltas/test_timedelta.py +++ b/pandas/tests/indexes/timedeltas/test_timedelta.py @@ -20,7 +20,7 @@ class TestTimedeltaIndex(DatetimeLike, tm.TestCase): _holder = TimedeltaIndex _multiprocess_can_split_ = True - def setUp(self): + def setup_method(self, method): self.indices = dict(index=tm.makeTimedeltaIndex(10)) self.setup_indices() diff --git a/pandas/tests/indexing/common.py b/pandas/tests/indexing/common.py index bd5b7f45a6f4c..259a8aea94df0 100644 --- a/pandas/tests/indexing/common.py +++ b/pandas/tests/indexing/common.py @@ -31,7 +31,7 @@ class Base(object): _typs = set(['ints', 'uints', 'labels', 'mixed', 'ts', 'floats', 'empty', 'ts_rev']) - def setUp(self): + def setup_method(self, method): self.series_ints = Series(np.random.rand(4), index=lrange(0, 8, 2)) self.frame_ints = DataFrame(np.random.randn(4, 4), diff --git a/pandas/tests/indexing/test_categorical.py b/pandas/tests/indexing/test_categorical.py index f9fcef16c12d4..6d2723ae0ff01 100644 --- a/pandas/tests/indexing/test_categorical.py +++ b/pandas/tests/indexing/test_categorical.py @@ -12,7 +12,7 @@ class TestCategoricalIndex(tm.TestCase): - def setUp(self): + def setup_method(self, method): self.df = DataFrame({'A': np.arange(6, dtype='int64'), 'B': Series(list('aabbca')).astype( diff --git a/pandas/tests/indexing/test_coercion.py b/pandas/tests/indexing/test_coercion.py index 56bc8c1d72bb8..8e81a3bd1df7a 100644 --- a/pandas/tests/indexing/test_coercion.py +++ b/pandas/tests/indexing/test_coercion.py @@ -1146,7 +1146,7 @@ class TestReplaceSeriesCoercion(CoercionBase, tm.TestCase): klasses = ['series'] method = 'replace' - def setUp(self): + def setup_method(self, method): self.rep = {} self.rep['object'] = ['a', 'b'] self.rep['int64'] = [4, 5] diff --git a/pandas/tests/indexing/test_interval.py b/pandas/tests/indexing/test_interval.py index bccc21ed6c086..b8d8739af1d15 100644 --- a/pandas/tests/indexing/test_interval.py +++ b/pandas/tests/indexing/test_interval.py @@ -8,7 +8,7 @@ class TestIntervalIndex(tm.TestCase): - def setUp(self): + def setup_method(self, method): self.s = Series(np.arange(5), IntervalIndex.from_breaks(np.arange(6))) def test_loc_with_scalar(self): diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index ac00e441047dd..3cea731cfd440 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -107,14 +107,14 @@ def has_expanded_repr(df): class TestDataFrameFormatting(tm.TestCase): - def setUp(self): + def setup_method(self, method): self.warn_filters = warnings.filters warnings.filterwarnings('ignore', category=FutureWarning, module=".*format") self.frame = _frame.copy() - def tearDown(self): + def teardown_method(self, method): warnings.filters = self.warn_filters def test_repr_embedded_ndarray(self): @@ -1606,7 +1606,7 @@ def gen_series_formatting(): class TestSeriesFormatting(tm.TestCase): - def setUp(self): + def setup_method(self, method): self.ts = tm.makeTimeSeries() def test_repr_unicode(self): diff --git a/pandas/tests/io/formats/test_printing.py b/pandas/tests/io/formats/test_printing.py index 44fbd5a958d8c..05b697ffbb756 100644 --- a/pandas/tests/io/formats/test_printing.py +++ b/pandas/tests/io/formats/test_printing.py @@ -126,7 +126,7 @@ def test_ambiguous_width(self): class TestTableSchemaRepr(tm.TestCase): @classmethod - def setUpClass(cls): + def setup_class(cls): pytest.importorskip('IPython') try: import mock diff --git a/pandas/tests/io/formats/test_style.py b/pandas/tests/io/formats/test_style.py index 1cd338479bd0c..687e78e64a3e7 100644 --- a/pandas/tests/io/formats/test_style.py +++ b/pandas/tests/io/formats/test_style.py @@ -5,16 +5,15 @@ import numpy as np import pandas as pd from pandas import DataFrame -from pandas.util.testing import TestCase import pandas.util.testing as tm jinja2 = pytest.importorskip('jinja2') from pandas.io.formats.style import Styler, _get_level_lengths # noqa -class TestStyler(TestCase): +class TestStyler(tm.TestCase): - def setUp(self): + def setup_method(self, method): np.random.seed(24) self.s = DataFrame({'A': np.random.permutation(range(6))}) self.df = DataFrame({'A': [0, 1], 'B': np.random.randn(2)}) @@ -813,10 +812,10 @@ def test_mi_sparse_column_names(self): assert head == expected -@tm.mplskip -class TestStylerMatplotlibDep(TestCase): +class TestStylerMatplotlibDep(tm.TestCase): def test_background_gradient(self): + tm._skip_if_no_mpl() df = pd.DataFrame([[1, 2], [2, 4]], columns=['A', 'B']) for c_map in [None, 'YlOrRd']: diff --git a/pandas/tests/io/json/test_json_table_schema.py b/pandas/tests/io/json/test_json_table_schema.py index c3a976973bb29..1e667245809ec 100644 --- a/pandas/tests/io/json/test_json_table_schema.py +++ b/pandas/tests/io/json/test_json_table_schema.py @@ -19,7 +19,7 @@ class TestBuildSchema(tm.TestCase): - def setUp(self): + def setup_method(self, method): self.df = DataFrame( {'A': [1, 2, 3, 4], 'B': ['a', 'b', 'c', 'c'], @@ -171,7 +171,7 @@ def test_as_json_table_type_categorical_dtypes(self): class TestTableOrient(tm.TestCase): - def setUp(self): + def setup_method(self, method): self.df = DataFrame( {'A': [1, 2, 3, 4], 'B': ['a', 'b', 'c', 'c'], diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 2e92910f82b74..0cf9000fcffb2 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -37,7 +37,7 @@ class TestPandasContainer(tm.TestCase): - def setUp(self): + def setup_method(self, method): self.dirpath = tm.get_data_path() self.ts = tm.makeTimeSeries() @@ -59,7 +59,7 @@ def setUp(self): self.mixed_frame = _mixed_frame.copy() self.categorical = _cat_frame.copy() - def tearDown(self): + def teardown_method(self, method): del self.dirpath del self.ts diff --git a/pandas/tests/io/json/test_ujson.py b/pandas/tests/io/json/test_ujson.py index b749cd150d445..a23ae225c19b0 100644 --- a/pandas/tests/io/json/test_ujson.py +++ b/pandas/tests/io/json/test_ujson.py @@ -1,7 +1,5 @@ # -*- coding: utf-8 -*- -from unittest import TestCase - try: import json except ImportError: @@ -27,7 +25,7 @@ else partial(json.dumps, encoding="utf-8")) -class UltraJSONTests(TestCase): +class UltraJSONTests(tm.TestCase): @pytest.mark.skipif(compat.is_platform_32bit(), reason="not compliant on 32-bit, xref #15865") @@ -948,7 +946,7 @@ def my_obj_handler(obj): ujson.decode(ujson.encode(l, default_handler=str))) -class NumpyJSONTests(TestCase): +class NumpyJSONTests(tm.TestCase): def testBool(self): b = np.bool(True) @@ -1224,7 +1222,7 @@ def testArrayNumpyLabelled(self): assert (np.array(['a', 'b']) == output[2]).all() -class PandasJSONTests(TestCase): +class PandasJSONTests(tm.TestCase): def testDataFrame(self): df = DataFrame([[1, 2, 3], [4, 5, 6]], index=[ diff --git a/pandas/tests/io/parser/test_network.py b/pandas/tests/io/parser/test_network.py index cabee76dd6dfc..26b5c4788d53a 100644 --- a/pandas/tests/io/parser/test_network.py +++ b/pandas/tests/io/parser/test_network.py @@ -49,7 +49,7 @@ def check_compressed_urls(salaries_table, compression, extension, mode, class TestS3(tm.TestCase): - def setUp(self): + def setup_method(self, method): try: import s3fs # noqa except ImportError: diff --git a/pandas/tests/io/parser/test_parsers.py b/pandas/tests/io/parser/test_parsers.py index 2ae557a7d57db..cced8299691df 100644 --- a/pandas/tests/io/parser/test_parsers.py +++ b/pandas/tests/io/parser/test_parsers.py @@ -42,7 +42,7 @@ def read_table(self, *args, **kwargs): def float_precision_choices(self): raise AbstractMethodError(self) - def setUp(self): + def setup_method(self, method): self.dirpath = tm.get_data_path() self.csv1 = os.path.join(self.dirpath, 'test1.csv') self.csv2 = os.path.join(self.dirpath, 'test2.csv') diff --git a/pandas/tests/io/parser/test_textreader.py b/pandas/tests/io/parser/test_textreader.py index d8ae66a2b275c..f09d8c8e778d5 100644 --- a/pandas/tests/io/parser/test_textreader.py +++ b/pandas/tests/io/parser/test_textreader.py @@ -28,7 +28,7 @@ class TestTextReader(tm.TestCase): - def setUp(self): + def setup_method(self, method): self.dirpath = tm.get_data_path() self.csv1 = os.path.join(self.dirpath, 'test1.csv') self.csv2 = os.path.join(self.dirpath, 'test2.csv') diff --git a/pandas/tests/io/sas/test_sas7bdat.py b/pandas/tests/io/sas/test_sas7bdat.py index afd40e7017cff..cb28ab6c6c345 100644 --- a/pandas/tests/io/sas/test_sas7bdat.py +++ b/pandas/tests/io/sas/test_sas7bdat.py @@ -8,7 +8,7 @@ class TestSAS7BDAT(tm.TestCase): - def setUp(self): + def setup_method(self, method): self.dirpath = tm.get_data_path() self.data = [] self.test_ix = [list(range(1, 16)), [16]] diff --git a/pandas/tests/io/sas/test_xport.py b/pandas/tests/io/sas/test_xport.py index 2ed7ebbbfce32..17b286a4915ce 100644 --- a/pandas/tests/io/sas/test_xport.py +++ b/pandas/tests/io/sas/test_xport.py @@ -18,7 +18,7 @@ def numeric_as_float(data): class TestXport(tm.TestCase): - def setUp(self): + def setup_method(self, method): self.dirpath = tm.get_data_path() self.file01 = os.path.join(self.dirpath, "DEMO_G.xpt") self.file02 = os.path.join(self.dirpath, "SSHSV1_A.xpt") diff --git a/pandas/tests/io/test_clipboard.py b/pandas/tests/io/test_clipboard.py index 756dd0db8c3b7..e9ffb2dca7ae5 100644 --- a/pandas/tests/io/test_clipboard.py +++ b/pandas/tests/io/test_clipboard.py @@ -26,8 +26,8 @@ class TestClipboard(tm.TestCase): @classmethod - def setUpClass(cls): - super(TestClipboard, cls).setUpClass() + def setup_class(cls): + super(TestClipboard, cls).setup_class() cls.data = {} cls.data['string'] = mkdf(5, 3, c_idx_type='s', r_idx_type='i', c_idx_names=[None], r_idx_names=[None]) @@ -62,8 +62,8 @@ def setUpClass(cls): cls.data_types = list(cls.data.keys()) @classmethod - def tearDownClass(cls): - super(TestClipboard, cls).tearDownClass() + def teardown_class(cls): + super(TestClipboard, cls).teardown_class() del cls.data_types, cls.data def check_round_trip_frame(self, data_type, excel=None, sep=None, diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py index c427fab4103e0..1837e5381a07e 100644 --- a/pandas/tests/io/test_common.py +++ b/pandas/tests/io/test_common.py @@ -92,7 +92,7 @@ def test_iterator(self): class TestMMapWrapper(tm.TestCase): - def setUp(self): + def setup_method(self, method): self.mmap_file = os.path.join(tm.get_data_path(), 'test_mmap.csv') diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py index d733f26b2c04d..919c521f22f60 100644 --- a/pandas/tests/io/test_excel.py +++ b/pandas/tests/io/test_excel.py @@ -84,7 +84,7 @@ def _skip_if_no_s3fs(): class SharedItems(object): - def setUp(self): + def setup_method(self, method): self.dirpath = tm.get_data_path() self.frame = _frame.copy() self.frame2 = _frame2.copy() @@ -161,9 +161,9 @@ class ReadingTestsBase(SharedItems): # 3. Add a property engine_name, which is the name of the reader class. # For the reader this is not used for anything at the moment. - def setUp(self): + def setup_method(self, method): self.check_skip() - super(ReadingTestsBase, self).setUp() + super(ReadingTestsBase, self).setup_method(method) def test_parse_cols_int(self): @@ -1019,14 +1019,14 @@ class ExcelWriterBase(SharedItems): # Test with MultiIndex and Hierarchical Rows as merged cells. merge_cells = True - def setUp(self): + def setup_method(self, method): self.check_skip() - super(ExcelWriterBase, self).setUp() + super(ExcelWriterBase, self).setup_method(method) self.option_name = 'io.excel.%s.writer' % self.ext.strip('.') self.prev_engine = get_option(self.option_name) set_option(self.option_name, self.engine_name) - def tearDown(self): + def teardown_method(self, method): set_option(self.option_name, self.prev_engine) def test_excel_sheet_by_name_raise(self): @@ -1926,7 +1926,7 @@ def skip_openpyxl_gt21(cls): """Skip a TestCase instance if openpyxl >= 2.2""" @classmethod - def setUpClass(cls): + def setup_class(cls): _skip_if_no_openpyxl() import openpyxl ver = openpyxl.__version__ @@ -1934,7 +1934,7 @@ def setUpClass(cls): LooseVersion(ver) < LooseVersion('2.2.0'))): pytest.skip("openpyxl %s >= 2.2" % str(ver)) - cls.setUpClass = setUpClass + cls.setup_class = setup_class return cls @@ -2043,14 +2043,14 @@ def skip_openpyxl_lt22(cls): """Skip a TestCase instance if openpyxl < 2.2""" @classmethod - def setUpClass(cls): + def setup_class(cls): _skip_if_no_openpyxl() import openpyxl ver = openpyxl.__version__ if LooseVersion(ver) < LooseVersion('2.2.0'): pytest.skip("openpyxl %s < 2.2" % str(ver)) - cls.setUpClass = setUpClass + cls.setup_class = setup_class return cls diff --git a/pandas/tests/io/test_gbq.py b/pandas/tests/io/test_gbq.py index 138def3ea1ac9..47fc495201754 100644 --- a/pandas/tests/io/test_gbq.py +++ b/pandas/tests/io/test_gbq.py @@ -97,7 +97,7 @@ def make_mixed_dataframe_v2(test_size): class TestToGBQIntegrationWithServiceAccountKeyPath(tm.TestCase): @classmethod - def setUpClass(cls): + def setup_class(cls): # - GLOBAL CLASS FIXTURES - # put here any instruction you want to execute only *ONCE* *BEFORE* # executing *ALL* tests described below. @@ -111,7 +111,7 @@ def setUpClass(cls): ).create(DATASET_ID + "1") @classmethod - def tearDownClass(cls): + def teardown_class(cls): # - GLOBAL CLASS FIXTURES - # put here any instruction you want to execute only *ONCE* *AFTER* # executing all tests. diff --git a/pandas/tests/io/test_html.py b/pandas/tests/io/test_html.py index 0a79173df731c..6b1215e443b47 100644 --- a/pandas/tests/io/test_html.py +++ b/pandas/tests/io/test_html.py @@ -99,8 +99,8 @@ class TestReadHtml(tm.TestCase, ReadHtmlMixin): banklist_data = os.path.join(DATA_PATH, 'banklist.html') @classmethod - def setUpClass(cls): - super(TestReadHtml, cls).setUpClass() + def setup_class(cls): + super(TestReadHtml, cls).setup_class() _skip_if_none_of(('bs4', 'html5lib')) def test_to_html_compat(self): @@ -783,8 +783,8 @@ class TestReadHtmlEncoding(tm.TestCase): flavor = 'bs4' @classmethod - def setUpClass(cls): - super(TestReadHtmlEncoding, cls).setUpClass() + def setup_class(cls): + super(TestReadHtmlEncoding, cls).setup_class() _skip_if_none_of((cls.flavor, 'html5lib')) def read_html(self, *args, **kwargs): @@ -825,8 +825,8 @@ class TestReadHtmlEncodingLxml(TestReadHtmlEncoding): flavor = 'lxml' @classmethod - def setUpClass(cls): - super(TestReadHtmlEncodingLxml, cls).setUpClass() + def setup_class(cls): + super(TestReadHtmlEncodingLxml, cls).setup_class() _skip_if_no(cls.flavor) @@ -834,8 +834,8 @@ class TestReadHtmlLxml(tm.TestCase, ReadHtmlMixin): flavor = 'lxml' @classmethod - def setUpClass(cls): - super(TestReadHtmlLxml, cls).setUpClass() + def setup_class(cls): + super(TestReadHtmlLxml, cls).setup_class() _skip_if_no('lxml') def test_data_fail(self): diff --git a/pandas/tests/io/test_packers.py b/pandas/tests/io/test_packers.py index 451cce125e228..96abf3415fff8 100644 --- a/pandas/tests/io/test_packers.py +++ b/pandas/tests/io/test_packers.py @@ -92,10 +92,10 @@ def check_arbitrary(a, b): class TestPackers(tm.TestCase): - def setUp(self): + def setup_method(self, method): self.path = '__%s__.msg' % tm.rands(10) - def tearDown(self): + def teardown_method(self, method): pass def encode_decode(self, x, compress=None, **kwargs): @@ -301,8 +301,8 @@ def test_timedeltas(self): class TestIndex(TestPackers): - def setUp(self): - super(TestIndex, self).setUp() + def setup_method(self, method): + super(TestIndex, self).setup_method(method) self.d = { 'string': tm.makeStringIndex(100), @@ -364,8 +364,8 @@ def categorical_index(self): class TestSeries(TestPackers): - def setUp(self): - super(TestSeries, self).setUp() + def setup_method(self, method): + super(TestSeries, self).setup_method(method) self.d = {} @@ -412,8 +412,8 @@ def test_basic(self): class TestCategorical(TestPackers): - def setUp(self): - super(TestCategorical, self).setUp() + def setup_method(self, method): + super(TestCategorical, self).setup_method(method) self.d = {} @@ -435,8 +435,8 @@ def test_basic(self): class TestNDFrame(TestPackers): - def setUp(self): - super(TestNDFrame, self).setUp() + def setup_method(self, method): + super(TestNDFrame, self).setup_method(method) data = { 'A': [0., 1., 2., 3., np.nan], @@ -579,7 +579,7 @@ class TestCompression(TestPackers): """See https://github.com/pandas-dev/pandas/pull/9783 """ - def setUp(self): + def setup_method(self, method): try: from sqlalchemy import create_engine self._create_sql_engine = create_engine @@ -588,7 +588,7 @@ def setUp(self): else: self._SQLALCHEMY_INSTALLED = True - super(TestCompression, self).setUp() + super(TestCompression, self).setup_method(method) data = { 'A': np.arange(1000, dtype=np.float64), 'B': np.arange(1000, dtype=np.int32), @@ -773,8 +773,8 @@ def test_readonly_axis_zlib_to_sql(self): class TestEncoding(TestPackers): - def setUp(self): - super(TestEncoding, self).setUp() + def setup_method(self, method): + super(TestEncoding, self).setup_method(method) data = { 'A': [compat.u('\u2019')] * 1000, 'B': np.arange(1000, dtype=np.int32), diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index a268fa96175cf..9e7196593650a 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -124,23 +124,23 @@ def _maybe_remove(store, key): class Base(tm.TestCase): @classmethod - def setUpClass(cls): - super(Base, cls).setUpClass() + def setup_class(cls): + super(Base, cls).setup_class() # Pytables 3.0.0 deprecates lots of things tm.reset_testing_mode() @classmethod - def tearDownClass(cls): - super(Base, cls).tearDownClass() + def teardown_class(cls): + super(Base, cls).teardown_class() # Pytables 3.0.0 deprecates lots of things tm.set_testing_mode() - def setUp(self): + def setup_method(self, method): self.path = 'tmp.__%s__.h5' % tm.rands(10) - def tearDown(self): + def teardown_method(self, method): pass diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 52883a41b08c2..21de0cd371a37 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -20,7 +20,6 @@ from __future__ import print_function from warnings import catch_warnings import pytest -import unittest import sqlite3 import csv import os @@ -179,7 +178,7 @@ class MixInBase(object): - def tearDown(self): + def teardown_method(self, method): for tbl in self._get_all_tables(): self.drop_table(tbl) self._close_conn() @@ -498,7 +497,7 @@ class _TestSQLApi(PandasSQLTest): flavor = 'sqlite' mode = None - def setUp(self): + def setup_method(self, method): self.conn = self.connect() self._load_iris_data() self._load_iris_view() @@ -819,7 +818,7 @@ def test_unicode_column_name(self): @pytest.mark.single -class TestSQLApi(SQLAlchemyMixIn, _TestSQLApi, unittest.TestCase): +class TestSQLApi(SQLAlchemyMixIn, _TestSQLApi, tm.TestCase): """ Test the public API as it would be used directly @@ -981,8 +980,8 @@ class _EngineToConnMixin(object): A mixin that causes setup_connect to create a conn rather than an engine. """ - def setUp(self): - super(_EngineToConnMixin, self).setUp() + def setup_method(self, method): + super(_EngineToConnMixin, self).setup_method(method) engine = self.conn conn = engine.connect() self.__tx = conn.begin() @@ -990,21 +989,21 @@ def setUp(self): self.__engine = engine self.conn = conn - def tearDown(self): + def teardown_method(self, method): self.__tx.rollback() self.conn.close() self.conn = self.__engine self.pandasSQL = sql.SQLDatabase(self.__engine) - super(_EngineToConnMixin, self).tearDown() + super(_EngineToConnMixin, self).teardown_method(method) @pytest.mark.single -class TestSQLApiConn(_EngineToConnMixin, TestSQLApi, unittest.TestCase): +class TestSQLApiConn(_EngineToConnMixin, TestSQLApi, tm.TestCase): pass @pytest.mark.single -class TestSQLiteFallbackApi(SQLiteMixIn, _TestSQLApi, unittest.TestCase): +class TestSQLiteFallbackApi(SQLiteMixIn, _TestSQLApi, tm.TestCase): """ Test the public sqlite connection fallback API @@ -1093,7 +1092,7 @@ class _TestSQLAlchemy(SQLAlchemyMixIn, PandasSQLTest): flavor = None @classmethod - def setUpClass(cls): + def setup_class(cls): cls.setup_import() cls.setup_driver() @@ -1105,7 +1104,7 @@ def setUpClass(cls): msg = "{0} - can't connect to {1} server".format(cls, cls.flavor) pytest.skip(msg) - def setUp(self): + def setup_method(self, method): self.setup_connect() self._load_iris_data() @@ -1822,37 +1821,37 @@ def test_schema_support(self): @pytest.mark.single -class TestMySQLAlchemy(_TestMySQLAlchemy, _TestSQLAlchemy, unittest.TestCase): +class TestMySQLAlchemy(_TestMySQLAlchemy, _TestSQLAlchemy, tm.TestCase): pass @pytest.mark.single class TestMySQLAlchemyConn(_TestMySQLAlchemy, _TestSQLAlchemyConn, - unittest.TestCase): + tm.TestCase): pass @pytest.mark.single class TestPostgreSQLAlchemy(_TestPostgreSQLAlchemy, _TestSQLAlchemy, - unittest.TestCase): + tm.TestCase): pass @pytest.mark.single class TestPostgreSQLAlchemyConn(_TestPostgreSQLAlchemy, _TestSQLAlchemyConn, - unittest.TestCase): + tm.TestCase): pass @pytest.mark.single class TestSQLiteAlchemy(_TestSQLiteAlchemy, _TestSQLAlchemy, - unittest.TestCase): + tm.TestCase): pass @pytest.mark.single class TestSQLiteAlchemyConn(_TestSQLiteAlchemy, _TestSQLAlchemyConn, - unittest.TestCase): + tm.TestCase): pass @@ -1860,7 +1859,7 @@ class TestSQLiteAlchemyConn(_TestSQLiteAlchemy, _TestSQLAlchemyConn, # -- Test Sqlite / MySQL fallback @pytest.mark.single -class TestSQLiteFallback(SQLiteMixIn, PandasSQLTest, unittest.TestCase): +class TestSQLiteFallback(SQLiteMixIn, PandasSQLTest, tm.TestCase): """ Test the fallback mode against an in-memory sqlite database. @@ -1871,7 +1870,7 @@ class TestSQLiteFallback(SQLiteMixIn, PandasSQLTest, unittest.TestCase): def connect(cls): return sqlite3.connect(':memory:') - def setUp(self): + def setup_method(self, method): self.conn = self.connect() self.pandasSQL = sql.SQLiteDatabase(self.conn) @@ -2086,7 +2085,8 @@ def _skip_if_no_pymysql(): @pytest.mark.single class TestXSQLite(SQLiteMixIn, tm.TestCase): - def setUp(self): + def setup_method(self, method): + self.method = method self.conn = sqlite3.connect(':memory:') def test_basic(self): @@ -2186,7 +2186,7 @@ def test_execute_closed_connection(self): tquery("select * from test", con=self.conn) # Initialize connection again (needed for tearDown) - self.setUp() + self.setup_method(self.method) def test_na_roundtrip(self): pass @@ -2317,7 +2317,7 @@ def test_deprecated_flavor(self): class TestXMySQL(MySQLMixIn, tm.TestCase): @classmethod - def setUpClass(cls): + def setup_class(cls): _skip_if_no_pymysql() # test connection @@ -2345,7 +2345,7 @@ def setUpClass(cls): "[pandas] in your system's mysql default file, " "typically located at ~/.my.cnf or /etc/.my.cnf. ") - def setUp(self): + def setup_method(self, method): _skip_if_no_pymysql() import pymysql try: @@ -2371,6 +2371,8 @@ def setUp(self): "[pandas] in your system's mysql default file, " "typically located at ~/.my.cnf or /etc/.my.cnf. ") + self.method = method + def test_basic(self): _skip_if_no_pymysql() frame = tm.makeTimeDataFrame() @@ -2498,7 +2500,7 @@ def test_execute_closed_connection(self): tquery("select * from test", con=self.conn) # Initialize connection again (needed for tearDown) - self.setUp() + self.setup_method(self.method) def test_na_roundtrip(self): _skip_if_no_pymysql() diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py index 945f0b009a9da..7867e6866876a 100644 --- a/pandas/tests/io/test_stata.py +++ b/pandas/tests/io/test_stata.py @@ -25,7 +25,7 @@ class TestStata(tm.TestCase): - def setUp(self): + def setup_method(self, method): self.dirpath = tm.get_data_path() self.dta1_114 = os.path.join(self.dirpath, 'stata1_114.dta') self.dta1_117 = os.path.join(self.dirpath, 'stata1_117.dta') diff --git a/pandas/tests/plotting/common.py b/pandas/tests/plotting/common.py index 2c0ac974e9e43..9a24e4ae2dad0 100644 --- a/pandas/tests/plotting/common.py +++ b/pandas/tests/plotting/common.py @@ -19,11 +19,12 @@ import pandas.plotting as plotting from pandas.plotting._tools import _flatten - """ This is a common base class used for various plotting tests """ +tm._skip_module_if_no_mpl() + def _skip_if_no_scipy_gaussian_kde(): try: @@ -41,10 +42,9 @@ def _ok_for_gaussian_kde(kind): return True -@tm.mplskip class TestPlotBase(tm.TestCase): - def setUp(self): + def setup_method(self, method): import matplotlib as mpl mpl.rcdefaults() @@ -95,7 +95,7 @@ def setUp(self): "C": np.arange(20) + np.random.uniform( size=20)}) - def tearDown(self): + def teardown_method(self, method): tm.close() @cache_readonly diff --git a/pandas/tests/plotting/test_boxplot_method.py b/pandas/tests/plotting/test_boxplot_method.py index 1f70d408767f3..1e06c13980657 100644 --- a/pandas/tests/plotting/test_boxplot_method.py +++ b/pandas/tests/plotting/test_boxplot_method.py @@ -21,6 +21,8 @@ """ Test cases for .boxplot method """ +tm._skip_module_if_no_mpl() + def _skip_if_mpl_14_or_dev_boxplot(): # GH 8382 @@ -31,7 +33,6 @@ def _skip_if_mpl_14_or_dev_boxplot(): pytest.skip("Matplotlib Regression in 1.4 and current dev.") -@tm.mplskip class TestDataFramePlots(TestPlotBase): @slow @@ -165,7 +166,6 @@ def test_fontsize(self): xlabelsize=16, ylabelsize=16) -@tm.mplskip class TestDataFrameGroupByPlots(TestPlotBase): @slow diff --git a/pandas/tests/plotting/test_converter.py b/pandas/tests/plotting/test_converter.py index e23bc2ef6c563..21d8d1f0ab555 100644 --- a/pandas/tests/plotting/test_converter.py +++ b/pandas/tests/plotting/test_converter.py @@ -17,7 +17,7 @@ def test_timtetonum_accepts_unicode(): class TestDateTimeConverter(tm.TestCase): - def setUp(self): + def setup_method(self, method): self.dtc = converter.DatetimeConverter() self.tc = converter.TimeFormatter(None) @@ -148,7 +148,7 @@ def test_convert_nested(self): class TestPeriodConverter(tm.TestCase): - def setUp(self): + def setup_method(self, method): self.pc = converter.PeriodConverter() class Axis(object): diff --git a/pandas/tests/plotting/test_datetimelike.py b/pandas/tests/plotting/test_datetimelike.py index ae8faa031174e..ed198de11bac1 100644 --- a/pandas/tests/plotting/test_datetimelike.py +++ b/pandas/tests/plotting/test_datetimelike.py @@ -20,12 +20,13 @@ from pandas.tests.plotting.common import (TestPlotBase, _skip_if_no_scipy_gaussian_kde) +tm._skip_module_if_no_mpl() + -@tm.mplskip class TestTSPlot(TestPlotBase): - def setUp(self): - TestPlotBase.setUp(self) + def setup_method(self, method): + TestPlotBase.setup_method(self, method) freq = ['S', 'T', 'H', 'D', 'W', 'M', 'Q', 'A'] idx = [period_range('12/31/1999', freq=x, periods=100) for x in freq] @@ -41,7 +42,7 @@ def setUp(self): columns=['A', 'B', 'C']) for x in idx] - def tearDown(self): + def teardown_method(self, method): tm.close() @slow diff --git a/pandas/tests/plotting/test_deprecated.py b/pandas/tests/plotting/test_deprecated.py index d7eaa69460a3a..48030df48deca 100644 --- a/pandas/tests/plotting/test_deprecated.py +++ b/pandas/tests/plotting/test_deprecated.py @@ -18,8 +18,9 @@ pandas.tools.plotting """ +tm._skip_module_if_no_mpl() + -@tm.mplskip class TestDeprecatedNameSpace(TestPlotBase): @slow diff --git a/pandas/tests/plotting/test_frame.py b/pandas/tests/plotting/test_frame.py index 03bc477d6f852..4a4a71d7ea639 100644 --- a/pandas/tests/plotting/test_frame.py +++ b/pandas/tests/plotting/test_frame.py @@ -27,12 +27,13 @@ _skip_if_no_scipy_gaussian_kde, _ok_for_gaussian_kde) +tm._skip_module_if_no_mpl() + -@tm.mplskip class TestDataFramePlots(TestPlotBase): - def setUp(self): - TestPlotBase.setUp(self) + def setup_method(self, method): + TestPlotBase.setup_method(self, method) import matplotlib as mpl mpl.rcdefaults() diff --git a/pandas/tests/plotting/test_groupby.py b/pandas/tests/plotting/test_groupby.py index 121f2f9b75698..8dcf73bce03c0 100644 --- a/pandas/tests/plotting/test_groupby.py +++ b/pandas/tests/plotting/test_groupby.py @@ -10,8 +10,9 @@ from pandas.tests.plotting.common import TestPlotBase +tm._skip_module_if_no_mpl() + -@tm.mplskip class TestDataFrameGroupByPlots(TestPlotBase): def test_series_groupby_plotting_nominally_works(self): diff --git a/pandas/tests/plotting/test_hist_method.py b/pandas/tests/plotting/test_hist_method.py index b75fcd4d8b680..c3e32f52e0474 100644 --- a/pandas/tests/plotting/test_hist_method.py +++ b/pandas/tests/plotting/test_hist_method.py @@ -15,11 +15,13 @@ from pandas.tests.plotting.common import (TestPlotBase, _check_plot_works) -@tm.mplskip +tm._skip_module_if_no_mpl() + + class TestSeriesPlots(TestPlotBase): - def setUp(self): - TestPlotBase.setUp(self) + def setup_method(self, method): + TestPlotBase.setup_method(self, method) import matplotlib as mpl mpl.rcdefaults() @@ -140,7 +142,6 @@ def test_plot_fails_when_ax_differs_from_figure(self): self.ts.hist(ax=ax1, figure=fig2) -@tm.mplskip class TestDataFramePlots(TestPlotBase): @slow @@ -251,7 +252,6 @@ def test_tight_layout(self): tm.close() -@tm.mplskip class TestDataFrameGroupByPlots(TestPlotBase): @slow diff --git a/pandas/tests/plotting/test_misc.py b/pandas/tests/plotting/test_misc.py index 3a9cb309db707..9eace32aa19a3 100644 --- a/pandas/tests/plotting/test_misc.py +++ b/pandas/tests/plotting/test_misc.py @@ -17,12 +17,13 @@ from pandas.tests.plotting.common import (TestPlotBase, _check_plot_works, _ok_for_gaussian_kde) +tm._skip_module_if_no_mpl() + -@tm.mplskip class TestSeriesPlots(TestPlotBase): - def setUp(self): - TestPlotBase.setUp(self) + def setup_method(self, method): + TestPlotBase.setup_method(self, method) import matplotlib as mpl mpl.rcdefaults() @@ -50,7 +51,6 @@ def test_bootstrap_plot(self): _check_plot_works(bootstrap_plot, series=self.ts, size=10) -@tm.mplskip class TestDataFramePlots(TestPlotBase): @slow diff --git a/pandas/tests/plotting/test_series.py b/pandas/tests/plotting/test_series.py index 91a27142069c7..448661c7af0e9 100644 --- a/pandas/tests/plotting/test_series.py +++ b/pandas/tests/plotting/test_series.py @@ -22,12 +22,13 @@ _skip_if_no_scipy_gaussian_kde, _ok_for_gaussian_kde) +tm._skip_module_if_no_mpl() + -@tm.mplskip class TestSeriesPlots(TestPlotBase): - def setUp(self): - TestPlotBase.setUp(self) + def setup_method(self, method): + TestPlotBase.setup_method(self, method) import matplotlib as mpl mpl.rcdefaults() diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py index 2d4d0a09060de..1842af465ca89 100644 --- a/pandas/tests/reshape/test_concat.py +++ b/pandas/tests/reshape/test_concat.py @@ -19,7 +19,7 @@ class ConcatenateBase(tm.TestCase): - def setUp(self): + def setup_method(self, method): self.frame = DataFrame(tm.getSeriesData()) self.mixed_frame = self.frame.copy() self.mixed_frame['foo'] = 'bar' @@ -31,7 +31,7 @@ class TestConcatAppendCommon(ConcatenateBase): Test common dtype coercion rules between concat and append. """ - def setUp(self): + def setup_method(self, method): dt_data = [pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-02'), diff --git a/pandas/tests/reshape/test_hashing.py b/pandas/tests/reshape/test_hashing.py index 85807da33e38d..622768353dd50 100644 --- a/pandas/tests/reshape/test_hashing.py +++ b/pandas/tests/reshape/test_hashing.py @@ -11,7 +11,7 @@ class TestHashing(tm.TestCase): - def setUp(self): + def setup_method(self, method): self.df = DataFrame( {'i32': np.array([1, 2, 3] * 3, dtype='int32'), 'f32': np.array([None, 2.5, 3.5] * 3, dtype='float32'), diff --git a/pandas/tests/reshape/test_join.py b/pandas/tests/reshape/test_join.py index cda343175fd0a..3a6985fd4a373 100644 --- a/pandas/tests/reshape/test_join.py +++ b/pandas/tests/reshape/test_join.py @@ -21,7 +21,7 @@ class TestJoin(tm.TestCase): - def setUp(self): + def setup_method(self, method): # aggregate multiple columns self.df = DataFrame({'key1': get_test_data(), 'key2': get_test_data(), diff --git a/pandas/tests/reshape/test_merge.py b/pandas/tests/reshape/test_merge.py index db0e4631381f1..e36b7ecbc3c7b 100644 --- a/pandas/tests/reshape/test_merge.py +++ b/pandas/tests/reshape/test_merge.py @@ -35,7 +35,7 @@ def get_test_data(ngroups=NGROUPS, n=N): class TestMerge(tm.TestCase): - def setUp(self): + def setup_method(self, method): # aggregate multiple columns self.df = DataFrame({'key1': get_test_data(), 'key2': get_test_data(), @@ -739,7 +739,7 @@ def _check_merge(x, y): class TestMergeMulti(tm.TestCase): - def setUp(self): + def setup_method(self, method): self.index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two', 'three']], labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], diff --git a/pandas/tests/reshape/test_merge_asof.py b/pandas/tests/reshape/test_merge_asof.py index 7934b8abf85a8..7e33449c92665 100644 --- a/pandas/tests/reshape/test_merge_asof.py +++ b/pandas/tests/reshape/test_merge_asof.py @@ -23,7 +23,7 @@ def read_data(self, name, dedupe=False): x.time = to_datetime(x.time) return x - def setUp(self): + def setup_method(self, method): self.trades = self.read_data('trades.csv') self.quotes = self.read_data('quotes.csv', dedupe=True) diff --git a/pandas/tests/reshape/test_merge_ordered.py b/pandas/tests/reshape/test_merge_ordered.py index 1f1eee0e9980b..375e2e13847e8 100644 --- a/pandas/tests/reshape/test_merge_ordered.py +++ b/pandas/tests/reshape/test_merge_ordered.py @@ -8,7 +8,7 @@ class TestOrderedMerge(tm.TestCase): - def setUp(self): + def setup_method(self, method): self.left = DataFrame({'key': ['a', 'c', 'e'], 'lvalue': [1, 2., 3]}) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index df679966e0002..905cd27ca4c58 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -17,7 +17,7 @@ class TestPivotTable(tm.TestCase): - def setUp(self): + def setup_method(self, method): self.data = DataFrame({'A': ['foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'bar', 'foo', 'foo', 'foo'], @@ -984,7 +984,7 @@ def test_pivot_table_not_series(self): class TestCrosstab(tm.TestCase): - def setUp(self): + def setup_method(self, method): df = DataFrame({'A': ['foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'bar', 'foo', 'foo', 'foo'], diff --git a/pandas/tests/reshape/test_reshape.py b/pandas/tests/reshape/test_reshape.py index 87cd0637f1125..de2fe444bc4ea 100644 --- a/pandas/tests/reshape/test_reshape.py +++ b/pandas/tests/reshape/test_reshape.py @@ -19,7 +19,7 @@ class TestMelt(tm.TestCase): - def setUp(self): + def setup_method(self, method): self.df = tm.makeTimeDataFrame()[:10] self.df['id1'] = (self.df['A'] > 0).astype(np.int64) self.df['id2'] = (self.df['B'] > 0).astype(np.int64) @@ -220,7 +220,7 @@ class TestGetDummies(tm.TestCase): sparse = False - def setUp(self): + def setup_method(self, method): self.df = DataFrame({'A': ['a', 'b', 'a'], 'B': ['b', 'b', 'c'], 'C': [1, 2, 3]}) diff --git a/pandas/tests/scalar/test_interval.py b/pandas/tests/scalar/test_interval.py index 079c41657bec6..fab6f170bec60 100644 --- a/pandas/tests/scalar/test_interval.py +++ b/pandas/tests/scalar/test_interval.py @@ -6,7 +6,7 @@ class TestInterval(tm.TestCase): - def setUp(self): + def setup_method(self, method): self.interval = Interval(0, 1) def test_properties(self): diff --git a/pandas/tests/scalar/test_period.py b/pandas/tests/scalar/test_period.py index 2e60cfdb7a4f2..8c89fa60b12d6 100644 --- a/pandas/tests/scalar/test_period.py +++ b/pandas/tests/scalar/test_period.py @@ -923,7 +923,7 @@ def test_get_period_field_array_raises_on_out_of_range(self): class TestComparisons(tm.TestCase): - def setUp(self): + def setup_method(self, method): self.january1 = Period('2000-01', 'M') self.january2 = Period('2000-01', 'M') self.february = Period('2000-02', 'M') diff --git a/pandas/tests/scalar/test_timedelta.py b/pandas/tests/scalar/test_timedelta.py index 5659bc26fc1cc..82d6f6e8c84e5 100644 --- a/pandas/tests/scalar/test_timedelta.py +++ b/pandas/tests/scalar/test_timedelta.py @@ -15,7 +15,7 @@ class TestTimedeltas(tm.TestCase): _multiprocess_can_split_ = True - def setUp(self): + def setup_method(self, method): pass def test_construction(self): diff --git a/pandas/tests/scalar/test_timestamp.py b/pandas/tests/scalar/test_timestamp.py index 04b33bbc6c3bf..64f68112f4b81 100644 --- a/pandas/tests/scalar/test_timestamp.py +++ b/pandas/tests/scalar/test_timestamp.py @@ -1096,7 +1096,7 @@ def test_is_leap_year(self): class TestTimestampNsOperations(tm.TestCase): - def setUp(self): + def setup_method(self, method): self.timestamp = Timestamp(datetime.utcnow()) def assert_ns_timedelta(self, modified_timestamp, expected_value): diff --git a/pandas/tests/series/test_indexing.py b/pandas/tests/series/test_indexing.py index 394ae88983faa..8eae59a473995 100644 --- a/pandas/tests/series/test_indexing.py +++ b/pandas/tests/series/test_indexing.py @@ -2254,7 +2254,7 @@ def test_setitem_slice_into_readonly_backing_data(self): class TestTimeSeriesDuplicates(tm.TestCase): - def setUp(self): + def setup_method(self, method): dates = [datetime(2000, 1, 2), datetime(2000, 1, 2), datetime(2000, 1, 2), datetime(2000, 1, 3), datetime(2000, 1, 3), datetime(2000, 1, 3), @@ -2499,7 +2499,7 @@ class TestDatetimeIndexing(tm.TestCase): Also test support for datetime64[ns] in Series / DataFrame """ - def setUp(self): + def setup_method(self, method): dti = DatetimeIndex(start=datetime(2005, 1, 1), end=datetime(2005, 1, 10), freq='Min') self.series = Series(np.random.rand(len(dti)), dti) @@ -2640,7 +2640,7 @@ def test_frame_datetime64_duplicated(self): class TestNatIndexing(tm.TestCase): - def setUp(self): + def setup_method(self, method): self.series = Series(date_range('1/1/2000', periods=10)) # --------------------------------------------------------------------- diff --git a/pandas/tests/series/test_period.py b/pandas/tests/series/test_period.py index 5ea27d605c28a..792d5b9e5c383 100644 --- a/pandas/tests/series/test_period.py +++ b/pandas/tests/series/test_period.py @@ -12,7 +12,7 @@ def _permute(obj): class TestSeriesPeriod(tm.TestCase): - def setUp(self): + def setup_method(self, method): self.series = Series(period_range('2000-01-01', periods=10, freq='D')) def test_auto_conversion(self): diff --git a/pandas/tests/sparse/test_array.py b/pandas/tests/sparse/test_array.py index 9a2c958a252af..c205a1efbeeb1 100644 --- a/pandas/tests/sparse/test_array.py +++ b/pandas/tests/sparse/test_array.py @@ -17,7 +17,7 @@ class TestSparseArray(tm.TestCase): - def setUp(self): + def setup_method(self, method): self.arr_data = np.array([nan, nan, 1, 2, 3, nan, 4, 5, nan, 6]) self.arr = SparseArray(self.arr_data) self.zarr = SparseArray([0, 0, 1, 2, 3, 0, 4, 5, 0, 6], fill_value=0) diff --git a/pandas/tests/sparse/test_combine_concat.py b/pandas/tests/sparse/test_combine_concat.py index 57b4065744e32..ab56a83c90530 100644 --- a/pandas/tests/sparse/test_combine_concat.py +++ b/pandas/tests/sparse/test_combine_concat.py @@ -124,7 +124,7 @@ def test_concat_sparse_dense(self): class TestSparseDataFrameConcat(tm.TestCase): - def setUp(self): + def setup_method(self, method): self.dense1 = pd.DataFrame({'A': [0., 1., 2., np.nan], 'B': [0., 0., 0., 0.], diff --git a/pandas/tests/sparse/test_frame.py b/pandas/tests/sparse/test_frame.py index f2dd2aa79cc6a..762bfba85dd0a 100644 --- a/pandas/tests/sparse/test_frame.py +++ b/pandas/tests/sparse/test_frame.py @@ -29,7 +29,7 @@ class TestSparseDataFrame(tm.TestCase, SharedWithSparse): klass = SparseDataFrame - def setUp(self): + def setup_method(self, method): self.data = {'A': [nan, nan, nan, 0, 1, 2, 3, 4, 5, 6], 'B': [0, 1, 2, nan, nan, nan, 3, 4, 5, 6], 'C': np.arange(10, dtype=np.float64), @@ -1275,7 +1275,7 @@ def test_comparison_op_scalar(self): class TestSparseDataFrameAnalytics(tm.TestCase): - def setUp(self): + def setup_method(self, method): self.data = {'A': [nan, nan, nan, 0, 1, 2, 3, 4, 5, 6], 'B': [0, 1, 2, nan, nan, nan, 3, 4, 5, 6], 'C': np.arange(10, dtype=float), diff --git a/pandas/tests/sparse/test_groupby.py b/pandas/tests/sparse/test_groupby.py index 23bea94a2aef8..501e40c6ebffd 100644 --- a/pandas/tests/sparse/test_groupby.py +++ b/pandas/tests/sparse/test_groupby.py @@ -6,7 +6,7 @@ class TestSparseGroupBy(tm.TestCase): - def setUp(self): + def setup_method(self, method): self.dense = pd.DataFrame({'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'], 'B': ['one', 'one', 'two', 'three', diff --git a/pandas/tests/sparse/test_indexing.py b/pandas/tests/sparse/test_indexing.py index 0fc2211bbeeae..bb449c05729d4 100644 --- a/pandas/tests/sparse/test_indexing.py +++ b/pandas/tests/sparse/test_indexing.py @@ -8,7 +8,7 @@ class TestSparseSeriesIndexing(tm.TestCase): - def setUp(self): + def setup_method(self, method): self.orig = pd.Series([1, np.nan, np.nan, 3, np.nan]) self.sparse = self.orig.to_sparse() @@ -446,7 +446,7 @@ def tests_indexing_with_sparse(self): class TestSparseSeriesMultiIndexing(TestSparseSeriesIndexing): - def setUp(self): + def setup_method(self, method): # Mi with duplicated values idx = pd.MultiIndex.from_tuples([('A', 0), ('A', 1), ('B', 0), ('C', 0), ('C', 1)]) @@ -954,7 +954,7 @@ def test_reindex_fill_value(self): class TestMultitype(tm.TestCase): - def setUp(self): + def setup_method(self, method): self.cols = ['string', 'int', 'float', 'object'] self.string_series = pd.SparseSeries(['a', 'b', 'c']) diff --git a/pandas/tests/sparse/test_list.py b/pandas/tests/sparse/test_list.py index 941e07a5582b0..3eab34661ae2b 100644 --- a/pandas/tests/sparse/test_list.py +++ b/pandas/tests/sparse/test_list.py @@ -1,5 +1,4 @@ from pandas.compat import range -import unittest from numpy import nan import numpy as np @@ -8,9 +7,9 @@ import pandas.util.testing as tm -class TestSparseList(unittest.TestCase): +class TestSparseList(tm.TestCase): - def setUp(self): + def setup_method(self, method): self.na_data = np.array([nan, nan, 1, 2, 3, nan, 4, 5, nan, 6]) self.zero_data = np.array([0, 0, 1, 2, 3, 0, 4, 5, 0, 6]) diff --git a/pandas/tests/sparse/test_pivot.py b/pandas/tests/sparse/test_pivot.py index 4ff9f20093c67..57c47b4e68811 100644 --- a/pandas/tests/sparse/test_pivot.py +++ b/pandas/tests/sparse/test_pivot.py @@ -5,7 +5,7 @@ class TestPivotTable(tm.TestCase): - def setUp(self): + def setup_method(self, method): self.dense = pd.DataFrame({'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'], 'B': ['one', 'one', 'two', 'three', diff --git a/pandas/tests/sparse/test_series.py b/pandas/tests/sparse/test_series.py index 0f04e1a06900d..b756b63523798 100644 --- a/pandas/tests/sparse/test_series.py +++ b/pandas/tests/sparse/test_series.py @@ -58,7 +58,7 @@ def _test_data2_zero(): class TestSparseSeries(tm.TestCase, SharedWithSparse): - def setUp(self): + def setup_method(self, method): arr, index = _test_data1() date_index = bdate_range('1/1/2011', periods=len(index)) @@ -936,7 +936,7 @@ def test_combine_first(self): class TestSparseHandlingMultiIndexes(tm.TestCase): - def setUp(self): + def setup_method(self, method): miindex = pd.MultiIndex.from_product( [["x", "y"], ["10", "20"]], names=['row-foo', 'row-bar']) micol = pd.MultiIndex.from_product( @@ -963,7 +963,7 @@ def test_round_trip_preserve_multiindex_names(self): class TestSparseSeriesScipyInteraction(tm.TestCase): # Issue 8048: add SparseSeries coo methods - def setUp(self): + def setup_method(self, method): tm._skip_if_no_scipy() import scipy.sparse # SparseSeries inputs used in tests, the tests rely on the order @@ -1312,7 +1312,7 @@ def _dense_series_compare(s, f): class TestSparseSeriesAnalytics(tm.TestCase): - def setUp(self): + def setup_method(self, method): arr, index = _test_data1() self.bseries = SparseSeries(arr, index=index, kind='block', name='bseries') diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index 86d9ab3643cc9..dda95426d8011 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -282,7 +282,7 @@ def test_complex_sorting(self): # gh 12666 - check no segfault # Test not valid numpy versions older than 1.11 if pd._np_version_under1p11: - self.skipTest("Test valid only for numpy 1.11+") + pytest.skip("Test valid only for numpy 1.11+") x17 = np.array([complex(i) for i in range(17)], dtype=object) diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py index ed0d61cdbbaf9..dcc685ceef28e 100644 --- a/pandas/tests/test_base.py +++ b/pandas/tests/test_base.py @@ -109,7 +109,7 @@ class Delegate(PandasDelegate): def __init__(self, obj): self.obj = obj - def setUp(self): + def setup_method(self, method): pass def test_invalida_delgation(self): @@ -162,7 +162,7 @@ def _allow_na_ops(self, obj): return False return True - def setUp(self): + def setup_method(self, method): self.bool_index = tm.makeBoolIndex(10, name='a') self.int_index = tm.makeIntIndex(10, name='a') self.float_index = tm.makeFloatIndex(10, name='a') @@ -259,8 +259,8 @@ def test_binary_ops_docs(self): class TestIndexOps(Ops): - def setUp(self): - super(TestIndexOps, self).setUp() + def setup_method(self, method): + super(TestIndexOps, self).setup_method(method) self.is_valid_objs = [o for o in self.objs if o._allow_index_ops] self.not_valid_objs = [o for o in self.objs if not o._allow_index_ops] diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py index 515ca8d9cedc5..2a53cf15278e0 100644 --- a/pandas/tests/test_categorical.py +++ b/pandas/tests/test_categorical.py @@ -30,7 +30,7 @@ class TestCategorical(tm.TestCase): - def setUp(self): + def setup_method(self, method): self.factor = Categorical(['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c'], ordered=True) @@ -1602,7 +1602,7 @@ def test_validate_inplace(self): class TestCategoricalAsBlock(tm.TestCase): - def setUp(self): + def setup_method(self, method): self.factor = Categorical(['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c']) df = DataFrame({'value': np.random.randint(0, 10000, 100)}) diff --git a/pandas/tests/test_config.py b/pandas/tests/test_config.py index ba055b105dc41..79475b297f83c 100644 --- a/pandas/tests/test_config.py +++ b/pandas/tests/test_config.py @@ -1,12 +1,13 @@ # -*- coding: utf-8 -*- import pytest +import pandas.util.testing as tm import pandas as pd -import unittest + import warnings -class TestConfig(unittest.TestCase): +class TestConfig(tm.TestCase): def __init__(self, *args): super(TestConfig, self).__init__(*args) @@ -17,14 +18,14 @@ def __init__(self, *args): self.do = deepcopy(getattr(self.cf, '_deprecated_options')) self.ro = deepcopy(getattr(self.cf, '_registered_options')) - def setUp(self): + def setup_method(self, method): setattr(self.cf, '_global_config', {}) setattr( self.cf, 'options', self.cf.DictWrapper(self.cf._global_config)) setattr(self.cf, '_deprecated_options', {}) setattr(self.cf, '_registered_options', {}) - def tearDown(self): + def teardown_method(self, method): setattr(self.cf, '_global_config', self.gc) setattr(self.cf, '_deprecated_options', self.do) setattr(self.cf, '_registered_options', self.ro) diff --git a/pandas/tests/test_expressions.py b/pandas/tests/test_expressions.py index 8ef29097b66e8..79b057c0548a9 100644 --- a/pandas/tests/test_expressions.py +++ b/pandas/tests/test_expressions.py @@ -58,7 +58,7 @@ @pytest.mark.skipif(not expr._USE_NUMEXPR, reason='not using numexpr') class TestExpressions(tm.TestCase): - def setUp(self): + def setup_method(self, method): self.frame = _frame.copy() self.frame2 = _frame2.copy() @@ -67,7 +67,7 @@ def setUp(self): self.integer = _integer.copy() self._MIN_ELEMENTS = expr._MIN_ELEMENTS - def tearDown(self): + def teardown_method(self, method): expr._MIN_ELEMENTS = self._MIN_ELEMENTS def run_arithmetic(self, df, other, assert_func, check_dtype=False, diff --git a/pandas/tests/test_internals.py b/pandas/tests/test_internals.py index 61b4369d21ab4..0f2a3ce1d1e94 100644 --- a/pandas/tests/test_internals.py +++ b/pandas/tests/test_internals.py @@ -194,7 +194,7 @@ def create_mgr(descr, item_shape=None): class TestBlock(tm.TestCase): - def setUp(self): + def setup_method(self, method): # self.fblock = get_float_ex() # a,c,e # self.cblock = get_complex_ex() # # self.oblock = get_obj_ex() diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index f4cb07625faf2..bfab10b7e63e7 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -22,7 +22,7 @@ class Base(object): - def setUp(self): + def setup_method(self, method): index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two', 'three']], diff --git a/pandas/tests/test_nanops.py b/pandas/tests/test_nanops.py index efa647fd91a0d..c5ecd75290fc6 100644 --- a/pandas/tests/test_nanops.py +++ b/pandas/tests/test_nanops.py @@ -18,7 +18,7 @@ class TestnanopsDataFrame(tm.TestCase): - def setUp(self): + def setup_method(self, method): np.random.seed(11235) nanops._USE_BOTTLENECK = False @@ -118,7 +118,7 @@ def setUp(self): self.arr_float_nan_inf_1d = self.arr_float_nan_inf[:, 0, 0] self.arr_nan_nan_inf_1d = self.arr_nan_nan_inf[:, 0, 0] - def tearDown(self): + def teardown_method(self, method): nanops._USE_BOTTLENECK = use_bn def check_results(self, targ, res, axis, check_dtype=True): @@ -786,7 +786,7 @@ class TestNanvarFixedValues(tm.TestCase): # xref GH10242 - def setUp(self): + def setup_method(self, method): # Samples from a normal distribution. self.variance = variance = 3.0 self.samples = self.prng.normal(scale=variance ** 0.5, size=100000) @@ -899,7 +899,7 @@ class TestNanskewFixedValues(tm.TestCase): # xref GH 11974 - def setUp(self): + def setup_method(self, method): # Test data + skewness value (computed with scipy.stats.skew) self.samples = np.sin(np.linspace(0, 1, 200)) self.actual_skew = -0.1875895205961754 @@ -949,7 +949,7 @@ class TestNankurtFixedValues(tm.TestCase): # xref GH 11974 - def setUp(self): + def setup_method(self, method): # Test data + kurtosis value (computed with scipy.stats.kurtosis) self.samples = np.sin(np.linspace(0, 1, 200)) self.actual_kurt = -1.2058303433799713 diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index b9cceab4d65f4..44e1db494c041 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -908,7 +908,7 @@ class TestPanel(tm.TestCase, PanelTests, CheckIndexing, SafeForLongAndSparse, def assert_panel_equal(cls, x, y): assert_panel_equal(x, y) - def setUp(self): + def setup_method(self, method): self.panel = make_test_panel() self.panel.major_axis.name = None self.panel.minor_axis.name = None @@ -2435,7 +2435,7 @@ class TestLongPanel(tm.TestCase): LongPanel no longer exists, but... """ - def setUp(self): + def setup_method(self, method): panel = make_test_panel() self.panel = panel.to_frame() self.unfiltered_panel = panel.to_frame(filter_observations=False) diff --git a/pandas/tests/test_panel4d.py b/pandas/tests/test_panel4d.py index 1b611309aece0..7d966422a7d79 100644 --- a/pandas/tests/test_panel4d.py +++ b/pandas/tests/test_panel4d.py @@ -596,7 +596,7 @@ def test_set_value(self): class TestPanel4d(tm.TestCase, CheckIndexing, SafeForSparse, SafeForLongAndSparse): - def setUp(self): + def setup_method(self, method): with catch_warnings(record=True): self.panel4d = tm.makePanel4D(nper=8) add_nans(self.panel4d) @@ -685,7 +685,7 @@ def test_ctor_dict(self): tm.assert_panel_equal(panel4d['A'], self.panel4d['l1']) tm.assert_frame_equal(panel4d.loc['B', 'ItemB', :, :], self.panel4d.loc['l2', ['ItemB'], - :, :]['ItemB']) + :, :]['ItemB']) def test_constructor_dict_mixed(self): with catch_warnings(record=True): @@ -798,7 +798,7 @@ def test_reindex(self): method='pad') tm.assert_panel_equal(larger.loc[:, :, - self.panel4d.major_axis[1], :], + self.panel4d.major_axis[1], :], smaller.loc[:, :, smaller_major[0], :]) # don't necessarily copy diff --git a/pandas/tests/test_panelnd.py b/pandas/tests/test_panelnd.py index 33c37e9c8feb2..7861b98b0ddd9 100644 --- a/pandas/tests/test_panelnd.py +++ b/pandas/tests/test_panelnd.py @@ -11,7 +11,7 @@ class TestPanelnd(tm.TestCase): - def setUp(self): + def setup_method(self, method): pass def test_4d_construction(self): diff --git a/pandas/tests/test_resample.py b/pandas/tests/test_resample.py index 276e9a12c1993..c6719790c9e35 100644 --- a/pandas/tests/test_resample.py +++ b/pandas/tests/test_resample.py @@ -52,7 +52,7 @@ def _simple_pts(start, end, freq='D'): class TestResampleAPI(tm.TestCase): - def setUp(self): + def setup_method(self, method): dti = DatetimeIndex(start=datetime(2005, 1, 1), end=datetime(2005, 1, 10), freq='Min') @@ -850,7 +850,7 @@ def test_resample_loffset_arg_type(self): class TestDatetimeIndex(Base, tm.TestCase): _index_factory = lambda x: date_range - def setUp(self): + def setup_method(self, method): dti = DatetimeIndex(start=datetime(2005, 1, 1), end=datetime(2005, 1, 10), freq='Min') @@ -2796,7 +2796,7 @@ def test_asfreq_bug(self): class TestResamplerGrouper(tm.TestCase): - def setUp(self): + def setup_method(self, method): self.frame = DataFrame({'A': [1] * 20 + [2] * 12 + [3] * 8, 'B': np.arange(40)}, index=date_range('1/1/2000', @@ -2991,7 +2991,7 @@ def test_median_duplicate_columns(self): class TestTimeGrouper(tm.TestCase): - def setUp(self): + def setup_method(self, method): self.ts = Series(np.random.randn(1000), index=date_range('1/1/2000', periods=1000)) diff --git a/pandas/tests/test_testing.py b/pandas/tests/test_testing.py index 2c0cd55205a5a..2e84638533820 100644 --- a/pandas/tests/test_testing.py +++ b/pandas/tests/test_testing.py @@ -1,6 +1,5 @@ # -*- coding: utf-8 -*- import pandas as pd -import unittest import pytest import numpy as np import sys @@ -340,7 +339,7 @@ def test_assert_almost_equal_iterable_message(self): assert_almost_equal([1, 2], [1, 3]) -class TestAssertIndexEqual(unittest.TestCase): +class TestAssertIndexEqual(tm.TestCase): def test_index_equal_message(self): @@ -680,7 +679,7 @@ def test_frame_equal_message(self): by_blocks=True) -class TestAssertCategoricalEqual(unittest.TestCase): +class TestAssertCategoricalEqual(tm.TestCase): def test_categorical_equal_message(self): @@ -718,7 +717,7 @@ def test_categorical_equal_message(self): tm.assert_categorical_equal(a, b) -class TestRNGContext(unittest.TestCase): +class TestRNGContext(tm.TestCase): def test_RNGContext(self): expected0 = 1.764052345967664 diff --git a/pandas/tests/test_util.py b/pandas/tests/test_util.py index 80eb5bb9dfe16..e9e04f76704f2 100644 --- a/pandas/tests/test_util.py +++ b/pandas/tests/test_util.py @@ -22,7 +22,7 @@ class TestDecorators(tm.TestCase): - def setUp(self): + def setup_method(self, method): @deprecate_kwarg('old', 'new') def _f1(new=False): return new @@ -410,8 +410,8 @@ def test_numpy_errstate_is_default(): class TestLocaleUtils(tm.TestCase): @classmethod - def setUpClass(cls): - super(TestLocaleUtils, cls).setUpClass() + def setup_class(cls): + super(TestLocaleUtils, cls).setup_class() cls.locales = tm.get_locales() if not cls.locales: @@ -420,8 +420,8 @@ def setUpClass(cls): tm._skip_if_windows() @classmethod - def tearDownClass(cls): - super(TestLocaleUtils, cls).tearDownClass() + def teardown_class(cls): + super(TestLocaleUtils, cls).teardown_class() del cls.locales def test_get_locales(self): diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index d3e427dfb4c7b..5436f3c342019 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -48,7 +48,7 @@ def _create_data(self): class TestApi(Base): - def setUp(self): + def setup_method(self, method): self._create_data() def test_getitem(self): @@ -315,7 +315,7 @@ def test_how_compat(self): class TestWindow(Base): - def setUp(self): + def setup_method(self, method): self._create_data() def test_constructor(self): @@ -360,7 +360,7 @@ def test_numpy_compat(self): class TestRolling(Base): - def setUp(self): + def setup_method(self, method): self._create_data() def test_doc_string(self): @@ -444,7 +444,7 @@ def test_closed(self): class TestExpanding(Base): - def setUp(self): + def setup_method(self, method): self._create_data() def test_doc_string(self): @@ -486,7 +486,7 @@ def test_numpy_compat(self): class TestEWM(Base): - def setUp(self): + def setup_method(self, method): self._create_data() def test_doc_string(self): @@ -549,7 +549,7 @@ def test_numpy_compat(self): class TestDeprecations(Base): """ test that we are catching deprecation warnings """ - def setUp(self): + def setup_method(self, method): self._create_data() def test_deprecations(self): @@ -559,11 +559,11 @@ def test_deprecations(self): mom.rolling_mean(Series(np.ones(10)), 3, center=True, axis=0) -# GH #12373 : rolling functions error on float32 data +# gh-12373 : rolling functions error on float32 data # make sure rolling functions works for different dtypes # # NOTE that these are yielded tests and so _create_data is -# explicity called, nor do these inherit from unittest.TestCase +# explicity called, nor do these inherit from tm.TestCase # # further note that we are only checking rolling for fully dtype # compliance (though both expanding and ewm inherit) @@ -775,7 +775,7 @@ def _create_data(self): class TestMoments(Base): - def setUp(self): + def setup_method(self, method): self._create_data() def test_centered_axis_validation(self): @@ -1958,7 +1958,7 @@ def _create_data(self): super(TestMomentsConsistency, self)._create_data() self.data = _consistency_data - def setUp(self): + def setup_method(self, method): self._create_data() def _test_moments_consistency(self, min_periods, count, mean, mock_mean, @@ -3039,7 +3039,7 @@ def test_rolling_min_max_numeric_types(self): class TestGrouperGrouping(tm.TestCase): - def setUp(self): + def setup_method(self, method): self.series = Series(np.arange(10)) self.frame = DataFrame({'A': [1] * 20 + [2] * 12 + [3] * 8, 'B': np.arange(40)}) @@ -3187,7 +3187,7 @@ class TestRollingTS(tm.TestCase): # rolling time-series friendly # xref GH13327 - def setUp(self): + def setup_method(self, method): self.regular = DataFrame({'A': pd.date_range('20130101', periods=5, diff --git a/pandas/tests/tseries/test_holiday.py b/pandas/tests/tseries/test_holiday.py index 109adaaa7e0b0..8ea4140bb85a7 100644 --- a/pandas/tests/tseries/test_holiday.py +++ b/pandas/tests/tseries/test_holiday.py @@ -21,7 +21,7 @@ class TestCalendar(tm.TestCase): - def setUp(self): + def setup_method(self, method): self.holiday_list = [ datetime(2012, 1, 2), datetime(2012, 1, 16), @@ -87,7 +87,7 @@ def test_rule_from_name(self): class TestHoliday(tm.TestCase): - def setUp(self): + def setup_method(self, method): self.start_date = datetime(2011, 1, 1) self.end_date = datetime(2020, 12, 31) @@ -286,7 +286,7 @@ def test_factory(self): class TestObservanceRules(tm.TestCase): - def setUp(self): + def setup_method(self, method): self.we = datetime(2014, 4, 9) self.th = datetime(2014, 4, 10) self.fr = datetime(2014, 4, 11) diff --git a/pandas/tests/tseries/test_offsets.py b/pandas/tests/tseries/test_offsets.py index 79190aa98f8d9..b6cd5e7958342 100644 --- a/pandas/tests/tseries/test_offsets.py +++ b/pandas/tests/tseries/test_offsets.py @@ -167,7 +167,7 @@ def test_apply_out_of_range(self): class TestCommon(Base): - def setUp(self): + def setup_method(self, method): # exected value created by Base._get_offset # are applied to 2011/01/01 09:00 (Saturday) # used for .apply and .rollforward @@ -507,7 +507,7 @@ def test_pickle_v0_15_2(self): class TestDateOffset(Base): - def setUp(self): + def setup_method(self, method): self.d = Timestamp(datetime(2008, 1, 2)) _offset_map.clear() @@ -547,7 +547,7 @@ def test_eq(self): class TestBusinessDay(Base): _offset = BDay - def setUp(self): + def setup_method(self, method): self.d = datetime(2008, 1, 1) self.offset = BDay() @@ -724,7 +724,7 @@ def test_offsets_compare_equal(self): class TestBusinessHour(Base): _offset = BusinessHour - def setUp(self): + def setup_method(self, method): self.d = datetime(2014, 7, 1, 10, 00) self.offset1 = BusinessHour() @@ -1418,7 +1418,7 @@ def test_datetimeindex(self): class TestCustomBusinessHour(Base): _offset = CustomBusinessHour - def setUp(self): + def setup_method(self, method): # 2014 Calendar to check custom holidays # Sun Mon Tue Wed Thu Fri Sat # 6/22 23 24 25 26 27 28 @@ -1674,7 +1674,7 @@ def test_apply_nanoseconds(self): class TestCustomBusinessDay(Base): _offset = CDay - def setUp(self): + def setup_method(self, method): self.d = datetime(2008, 1, 1) self.nd = np_datetime64_compat('2008-01-01 00:00:00Z') @@ -1910,7 +1910,7 @@ def test_pickle_compat_0_14_1(self): class CustomBusinessMonthBase(object): - def setUp(self): + def setup_method(self, method): self.d = datetime(2008, 1, 1) self.offset = self._object() @@ -4612,7 +4612,7 @@ def test_quarterly_dont_normalize(): class TestOffsetAliases(tm.TestCase): - def setUp(self): + def setup_method(self, method): _offset_map.clear() def test_alias_equality(self): @@ -4696,7 +4696,7 @@ class TestCaching(tm.TestCase): # as of GH 6479 (in 0.14.0), offset caching is turned off # as of v0.12.0 only BusinessMonth/Quarter were actually caching - def setUp(self): + def setup_method(self, method): _daterange_cache.clear() _offset_map.clear() diff --git a/pandas/tests/tseries/test_timezones.py b/pandas/tests/tseries/test_timezones.py index 10776381974de..74220aa5cd183 100644 --- a/pandas/tests/tseries/test_timezones.py +++ b/pandas/tests/tseries/test_timezones.py @@ -52,7 +52,7 @@ def dst(self, dt): class TestTimeZoneSupportPytz(tm.TestCase): - def setUp(self): + def setup_method(self, method): tm._skip_if_no_pytz() def tz(self, tz): @@ -944,7 +944,7 @@ def test_datetimeindex_tz_nat(self): class TestTimeZoneSupportDateutil(TestTimeZoneSupportPytz): - def setUp(self): + def setup_method(self, method): tm._skip_if_no_dateutil() def tz(self, tz): @@ -1197,7 +1197,7 @@ def test_cache_keys_are_distinct_for_pytz_vs_dateutil(self): class TestTimeZones(tm.TestCase): timezones = ['UTC', 'Asia/Tokyo', 'US/Eastern', 'dateutil/US/Pacific'] - def setUp(self): + def setup_method(self, method): tm._skip_if_no_pytz() def test_replace(self): diff --git a/pandas/util/testing.py b/pandas/util/testing.py index d0c56e9974a3f..354e11ce0133a 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -10,7 +10,6 @@ import os import subprocess import locale -import unittest import traceback from datetime import datetime @@ -86,22 +85,17 @@ def reset_testing_mode(): set_testing_mode() -class TestCase(unittest.TestCase): +class TestCase(object): """ - The test case class that we originally used when using the - nosetests framework. Under the new pytest framework, we are - moving away from this class. - - Do not create new test classes derived from this one. Rather, - they should inherit from object directly. + Base class for all test case classes. """ @classmethod - def setUpClass(cls): + def setup_class(cls): pd.set_option('chained_assignment', 'raise') @classmethod - def tearDownClass(cls): + def teardown_class(cls): pass @@ -295,36 +289,31 @@ def _skip_if_32bit(): pytest.skip("skipping for 32 bit") -def mplskip(cls): - """Skip a TestCase instance if matplotlib isn't installed""" - - @classmethod - def setUpClass(cls): - try: - import matplotlib as mpl - mpl.use("Agg", warn=False) - except ImportError: - import pytest - pytest.skip("matplotlib not installed") +def _skip_module_if_no_mpl(): + import pytest - cls.setUpClass = setUpClass - return cls + mpl = pytest.importorskip("matplotlib") + mpl.use("Agg", warn=False) def _skip_if_no_mpl(): try: - import matplotlib # noqa + import matplotlib as mpl + mpl.use("Agg", warn=False) except ImportError: import pytest pytest.skip("matplotlib not installed") def _skip_if_mpl_1_5(): - import matplotlib - v = matplotlib.__version__ + import matplotlib as mpl + + v = mpl.__version__ if v > LooseVersion('1.4.3') or v[0] == '0': import pytest pytest.skip("matplotlib 1.5") + else: + mpl.use("Agg", warn=False) def _skip_if_no_scipy(): From 1f5ecc9a7008ea2a8af6e136a5bd6b410625623d Mon Sep 17 00:00:00 2001 From: gfyoung Date: Thu, 4 May 2017 07:01:21 -0400 Subject: [PATCH 507/933] MAINT: Remove tm.TestCase from testing (#16225) --- doc/source/contributing.rst | 4 +-- pandas/conftest.py | 7 +++++ pandas/tests/api/test_api.py | 18 +++++------ pandas/tests/api/test_types.py | 2 +- pandas/tests/computation/test_eval.py | 13 +++----- pandas/tests/dtypes/test_cast.py | 8 ++--- pandas/tests/dtypes/test_common.py | 2 +- pandas/tests/dtypes/test_concat.py | 3 +- pandas/tests/dtypes/test_generic.py | 3 +- pandas/tests/dtypes/test_inference.py | 8 ++--- pandas/tests/dtypes/test_io.py | 2 +- pandas/tests/dtypes/test_missing.py | 2 +- pandas/tests/frame/test_alter_axes.py | 4 +-- pandas/tests/frame/test_analytics.py | 2 +- pandas/tests/frame/test_api.py | 2 +- pandas/tests/frame/test_apply.py | 4 +-- pandas/tests/frame/test_asof.py | 2 +- .../tests/frame/test_axis_select_reindex.py | 2 +- pandas/tests/frame/test_block_internals.py | 2 +- pandas/tests/frame/test_combine_concat.py | 4 +-- pandas/tests/frame/test_constructors.py | 4 +-- pandas/tests/frame/test_convert_to.py | 2 +- pandas/tests/frame/test_dtypes.py | 4 +-- pandas/tests/frame/test_indexing.py | 6 ++-- pandas/tests/frame/test_missing.py | 4 +-- pandas/tests/frame/test_mutate_columns.py | 2 +- pandas/tests/frame/test_nonunique_indexes.py | 2 +- pandas/tests/frame/test_operators.py | 2 +- pandas/tests/frame/test_period.py | 2 +- pandas/tests/frame/test_quantile.py | 2 +- pandas/tests/frame/test_query_eval.py | 16 ++++------ pandas/tests/frame/test_rank.py | 2 +- pandas/tests/frame/test_replace.py | 2 +- pandas/tests/frame/test_repr_info.py | 2 +- pandas/tests/frame/test_reshape.py | 2 +- pandas/tests/frame/test_sorting.py | 4 +-- pandas/tests/frame/test_subclass.py | 2 +- pandas/tests/frame/test_timeseries.py | 2 +- pandas/tests/frame/test_to_csv.py | 2 +- pandas/tests/frame/test_validate.py | 3 +- pandas/tests/groupby/test_aggregate.py | 2 +- pandas/tests/groupby/test_bin_groupby.py | 6 ++-- pandas/tests/groupby/test_categorical.py | 2 +- pandas/tests/groupby/test_filters.py | 2 +- pandas/tests/groupby/test_groupby.py | 2 +- pandas/tests/groupby/test_nth.py | 3 +- pandas/tests/groupby/test_timegrouper.py | 2 +- pandas/tests/groupby/test_transform.py | 2 +- pandas/tests/indexes/datetimes/test_astype.py | 4 +-- .../indexes/datetimes/test_construction.py | 4 +-- .../indexes/datetimes/test_date_range.py | 8 ++--- .../tests/indexes/datetimes/test_datetime.py | 2 +- .../indexes/datetimes/test_datetimelike.py | 2 +- .../tests/indexes/datetimes/test_indexing.py | 2 +- pandas/tests/indexes/datetimes/test_misc.py | 6 ++-- .../tests/indexes/datetimes/test_missing.py | 2 +- pandas/tests/indexes/datetimes/test_ops.py | 6 ++-- .../indexes/datetimes/test_partial_slicing.py | 2 +- pandas/tests/indexes/datetimes/test_setops.py | 6 ++-- pandas/tests/indexes/datetimes/test_tools.py | 18 +++++------ pandas/tests/indexes/period/test_asfreq.py | 2 +- .../tests/indexes/period/test_construction.py | 4 +-- pandas/tests/indexes/period/test_indexing.py | 4 +-- pandas/tests/indexes/period/test_ops.py | 8 ++--- .../indexes/period/test_partial_slicing.py | 2 +- pandas/tests/indexes/period/test_period.py | 2 +- pandas/tests/indexes/period/test_setops.py | 2 +- pandas/tests/indexes/period/test_tools.py | 6 ++-- pandas/tests/indexes/test_base.py | 4 +-- pandas/tests/indexes/test_category.py | 2 +- pandas/tests/indexes/test_frozen.py | 4 +-- pandas/tests/indexes/test_interval.py | 6 ++-- pandas/tests/indexes/test_multi.py | 2 +- pandas/tests/indexes/test_numeric.py | 6 ++-- pandas/tests/indexes/test_range.py | 2 +- .../tests/indexes/timedeltas/test_astype.py | 2 +- .../indexes/timedeltas/test_construction.py | 2 +- .../tests/indexes/timedeltas/test_indexing.py | 2 +- pandas/tests/indexes/timedeltas/test_ops.py | 4 +-- .../timedeltas/test_partial_slicing.py | 2 +- .../tests/indexes/timedeltas/test_setops.py | 2 +- .../indexes/timedeltas/test_timedelta.py | 6 ++-- .../timedeltas/test_timedelta_range.py | 2 +- pandas/tests/indexes/timedeltas/test_tools.py | 2 +- pandas/tests/indexing/test_callable.py | 2 +- pandas/tests/indexing/test_categorical.py | 2 +- .../indexing/test_chaining_and_caching.py | 4 +-- pandas/tests/indexing/test_coercion.py | 10 +++--- pandas/tests/indexing/test_datetime.py | 2 +- pandas/tests/indexing/test_floats.py | 2 +- pandas/tests/indexing/test_iloc.py | 2 +- pandas/tests/indexing/test_indexing.py | 8 ++--- pandas/tests/indexing/test_indexing_slow.py | 2 +- pandas/tests/indexing/test_interval.py | 2 +- pandas/tests/indexing/test_ix.py | 2 +- pandas/tests/indexing/test_loc.py | 2 +- pandas/tests/indexing/test_multiindex.py | 6 ++-- pandas/tests/indexing/test_panel.py | 2 +- pandas/tests/indexing/test_partial.py | 2 +- pandas/tests/indexing/test_scalar.py | 2 +- pandas/tests/indexing/test_timedelta.py | 2 +- .../tests/io/formats/test_eng_formatting.py | 2 +- pandas/tests/io/formats/test_format.py | 20 ++++++------ pandas/tests/io/formats/test_printing.py | 5 ++- pandas/tests/io/formats/test_style.py | 4 +-- pandas/tests/io/formats/test_to_csv.py | 2 +- pandas/tests/io/formats/test_to_html.py | 2 +- .../tests/io/json/test_json_table_schema.py | 7 ++--- pandas/tests/io/json/test_normalize.py | 2 +- pandas/tests/io/json/test_pandas.py | 2 +- pandas/tests/io/json/test_ujson.py | 6 ++-- pandas/tests/io/msgpack/test_limits.py | 4 +-- pandas/tests/io/msgpack/test_unpack.py | 3 +- pandas/tests/io/parser/test_network.py | 2 +- pandas/tests/io/parser/test_parsers.py | 6 ++-- pandas/tests/io/parser/test_read_fwf.py | 2 +- pandas/tests/io/parser/test_textreader.py | 2 +- pandas/tests/io/parser/test_unsupported.py | 4 +-- pandas/tests/io/sas/test_sas.py | 3 +- pandas/tests/io/sas/test_sas7bdat.py | 2 +- pandas/tests/io/sas/test_xport.py | 2 +- pandas/tests/io/test_clipboard.py | 4 +-- pandas/tests/io/test_common.py | 4 +-- pandas/tests/io/test_excel.py | 28 ++++++++--------- pandas/tests/io/test_gbq.py | 3 +- pandas/tests/io/test_html.py | 9 ++---- pandas/tests/io/test_packers.py | 2 +- pandas/tests/io/test_pytables.py | 8 ++--- pandas/tests/io/test_s3.py | 4 +-- pandas/tests/io/test_sql.py | 31 ++++++++----------- pandas/tests/io/test_stata.py | 2 +- pandas/tests/plotting/common.py | 2 +- pandas/tests/plotting/test_converter.py | 4 +-- pandas/tests/reshape/test_concat.py | 2 +- pandas/tests/reshape/test_hashing.py | 2 +- pandas/tests/reshape/test_join.py | 2 +- pandas/tests/reshape/test_merge.py | 4 +-- pandas/tests/reshape/test_merge_asof.py | 2 +- pandas/tests/reshape/test_merge_ordered.py | 2 +- pandas/tests/reshape/test_pivot.py | 6 ++-- pandas/tests/reshape/test_reshape.py | 10 +++--- pandas/tests/reshape/test_tile.py | 2 +- .../tests/reshape/test_union_categoricals.py | 2 +- pandas/tests/reshape/test_util.py | 2 +- pandas/tests/scalar/test_interval.py | 2 +- pandas/tests/scalar/test_period.py | 8 ++--- pandas/tests/scalar/test_period_asfreq.py | 2 +- pandas/tests/scalar/test_timedelta.py | 2 +- pandas/tests/scalar/test_timestamp.py | 12 +++---- pandas/tests/series/test_alter_axes.py | 2 +- pandas/tests/series/test_analytics.py | 2 +- pandas/tests/series/test_api.py | 2 +- pandas/tests/series/test_apply.py | 6 ++-- pandas/tests/series/test_asof.py | 2 +- pandas/tests/series/test_combine_concat.py | 4 +-- pandas/tests/series/test_constructors.py | 2 +- pandas/tests/series/test_datetime_values.py | 2 +- pandas/tests/series/test_indexing.py | 8 ++--- pandas/tests/series/test_internals.py | 2 +- pandas/tests/series/test_io.py | 6 ++-- pandas/tests/series/test_missing.py | 4 +-- pandas/tests/series/test_operators.py | 2 +- pandas/tests/series/test_period.py | 2 +- pandas/tests/series/test_quantile.py | 2 +- pandas/tests/series/test_rank.py | 2 +- pandas/tests/series/test_replace.py | 2 +- pandas/tests/series/test_repr.py | 2 +- pandas/tests/series/test_sorting.py | 2 +- pandas/tests/series/test_subclass.py | 4 +-- pandas/tests/series/test_timeseries.py | 2 +- pandas/tests/sparse/test_arithmetics.py | 2 +- pandas/tests/sparse/test_array.py | 4 +-- pandas/tests/sparse/test_combine_concat.py | 4 +-- pandas/tests/sparse/test_format.py | 4 +-- pandas/tests/sparse/test_frame.py | 6 ++-- pandas/tests/sparse/test_groupby.py | 2 +- pandas/tests/sparse/test_indexing.py | 6 ++-- pandas/tests/sparse/test_libsparse.py | 12 +++---- pandas/tests/sparse/test_list.py | 2 +- pandas/tests/sparse/test_pivot.py | 2 +- pandas/tests/sparse/test_series.py | 8 ++--- pandas/tests/test_algos.py | 26 ++++++++-------- pandas/tests/test_base.py | 6 ++-- pandas/tests/test_categorical.py | 6 ++-- pandas/tests/test_compat.py | 3 +- pandas/tests/test_config.py | 3 +- pandas/tests/test_expressions.py | 2 +- pandas/tests/test_internals.py | 6 ++-- pandas/tests/test_join.py | 2 +- pandas/tests/test_lib.py | 6 ++-- pandas/tests/test_multilevel.py | 4 +-- pandas/tests/test_nanops.py | 10 +++--- pandas/tests/test_panel.py | 4 +-- pandas/tests/test_panel4d.py | 2 +- pandas/tests/test_panelnd.py | 2 +- pandas/tests/test_resample.py | 12 +++---- pandas/tests/test_sorting.py | 4 +-- pandas/tests/test_strings.py | 2 +- pandas/tests/test_take.py | 2 +- pandas/tests/test_testing.py | 18 +++++------ pandas/tests/test_util.py | 14 ++++----- pandas/tests/test_window.py | 10 +++--- pandas/tests/tools/test_numeric.py | 2 +- pandas/tests/tseries/test_frequencies.py | 6 ++-- pandas/tests/tseries/test_holiday.py | 10 +++--- pandas/tests/tseries/test_offsets.py | 16 +++++----- pandas/tests/tseries/test_timezones.py | 8 ++--- pandas/util/testing.py | 14 --------- 208 files changed, 446 insertions(+), 491 deletions(-) diff --git a/doc/source/contributing.rst b/doc/source/contributing.rst index 26a2f56f3c1a1..aacfe25b91564 100644 --- a/doc/source/contributing.rst +++ b/doc/source/contributing.rst @@ -617,11 +617,11 @@ the expected correct result:: Transitioning to ``pytest`` ~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*pandas* existing test structure is *mostly* classed based, meaning that you will typically find tests wrapped in a class, inheriting from ``tm.TestCase``. +*pandas* existing test structure is *mostly* classed based, meaning that you will typically find tests wrapped in a class. .. code-block:: python - class TestReallyCoolFeature(tm.TestCase): + class TestReallyCoolFeature(object): .... Going forward, we are moving to a more *functional* style using the `pytest `__ framework, which offers a richer testing diff --git a/pandas/conftest.py b/pandas/conftest.py index caced6a0c568e..1149fae3fc0b0 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -25,6 +25,13 @@ def pytest_runtest_setup(item): pytest.skip("skipping due to --skip-network") +# Configurations for all tests and all test modules + +@pytest.fixture(autouse=True) +def configure_tests(): + pandas.set_option('chained_assignment', 'raise') + + # For running doctests: make np and pd names available @pytest.fixture(autouse=True) diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index 4678db4a52c5a..b1652cf6eb6db 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -23,7 +23,7 @@ def check(self, namespace, expected, ignored=None): tm.assert_almost_equal(result, expected) -class TestPDApi(Base, tm.TestCase): +class TestPDApi(Base): # these are optionally imported based on testing # & need to be ignored @@ -117,7 +117,7 @@ def test_api(self): self.ignored) -class TestApi(Base, tm.TestCase): +class TestApi(Base): allowed = ['types'] @@ -137,7 +137,7 @@ def test_testing(self): self.check(testing, self.funcs) -class TestDatetoolsDeprecation(tm.TestCase): +class TestDatetoolsDeprecation(object): def test_deprecation_access_func(self): with tm.assert_produces_warning(FutureWarning, @@ -150,7 +150,7 @@ def test_deprecation_access_obj(self): pd.datetools.monthEnd -class TestTopLevelDeprecations(tm.TestCase): +class TestTopLevelDeprecations(object): # top-level API deprecations # GH 13790 @@ -191,35 +191,35 @@ def test_get_store(self): s.close() -class TestJson(tm.TestCase): +class TestJson(object): def test_deprecation_access_func(self): with catch_warnings(record=True): pd.json.dumps([]) -class TestParser(tm.TestCase): +class TestParser(object): def test_deprecation_access_func(self): with catch_warnings(record=True): pd.parser.na_values -class TestLib(tm.TestCase): +class TestLib(object): def test_deprecation_access_func(self): with catch_warnings(record=True): pd.lib.infer_dtype('foo') -class TestTSLib(tm.TestCase): +class TestTSLib(object): def test_deprecation_access_func(self): with catch_warnings(record=True): pd.tslib.Timestamp('20160101') -class TestTypes(tm.TestCase): +class TestTypes(object): def test_deprecation_access_func(self): with tm.assert_produces_warning( diff --git a/pandas/tests/api/test_types.py b/pandas/tests/api/test_types.py index 834857b87960c..1cbcf3f9109a4 100644 --- a/pandas/tests/api/test_types.py +++ b/pandas/tests/api/test_types.py @@ -13,7 +13,7 @@ from .test_api import Base -class TestTypes(Base, tm.TestCase): +class TestTypes(Base): allowed = ['is_bool', 'is_bool_dtype', 'is_categorical', 'is_categorical_dtype', 'is_complex', diff --git a/pandas/tests/computation/test_eval.py b/pandas/tests/computation/test_eval.py index 5086b803419c6..89ab4531877a4 100644 --- a/pandas/tests/computation/test_eval.py +++ b/pandas/tests/computation/test_eval.py @@ -95,11 +95,10 @@ def _is_py3_complex_incompat(result, expected): _good_arith_ops = com.difference(_arith_ops_syms, _special_case_arith_ops_syms) -class TestEvalNumexprPandas(tm.TestCase): +class TestEvalNumexprPandas(object): @classmethod def setup_class(cls): - super(TestEvalNumexprPandas, cls).setup_class() tm.skip_if_no_ne() import numexpr as ne cls.ne = ne @@ -108,7 +107,6 @@ def setup_class(cls): @classmethod def teardown_class(cls): - super(TestEvalNumexprPandas, cls).teardown_class() del cls.engine, cls.parser if hasattr(cls, 'ne'): del cls.ne @@ -1067,11 +1065,10 @@ def test_performance_warning_for_poor_alignment(self, engine, parser): # ------------------------------------ # Slightly more complex ops -class TestOperationsNumExprPandas(tm.TestCase): +class TestOperationsNumExprPandas(object): @classmethod def setup_class(cls): - super(TestOperationsNumExprPandas, cls).setup_class() tm.skip_if_no_ne() cls.engine = 'numexpr' cls.parser = 'pandas' @@ -1079,7 +1076,6 @@ def setup_class(cls): @classmethod def teardown_class(cls): - super(TestOperationsNumExprPandas, cls).teardown_class() del cls.engine, cls.parser def eval(self, *args, **kwargs): @@ -1584,11 +1580,10 @@ def setup_class(cls): cls.arith_ops = expr._arith_ops_syms + expr._cmp_ops_syms -class TestMathPythonPython(tm.TestCase): +class TestMathPythonPython(object): @classmethod def setup_class(cls): - super(TestMathPythonPython, cls).setup_class() tm.skip_if_no_ne() cls.engine = 'python' cls.parser = 'pandas' @@ -1873,7 +1868,7 @@ def test_negate_lt_eq_le(engine, parser): tm.assert_frame_equal(result, expected) -class TestValidate(tm.TestCase): +class TestValidate(object): def test_validate_bool_args(self): invalid_values = [1, "True", [1, 2, 3], 5.0] diff --git a/pandas/tests/dtypes/test_cast.py b/pandas/tests/dtypes/test_cast.py index cbf049b95b6ef..e92724a5d9cd4 100644 --- a/pandas/tests/dtypes/test_cast.py +++ b/pandas/tests/dtypes/test_cast.py @@ -26,7 +26,7 @@ from pandas.util import testing as tm -class TestMaybeDowncast(tm.TestCase): +class TestMaybeDowncast(object): def test_downcast_conv(self): # test downcasting @@ -156,7 +156,7 @@ def test_infer_dtype_from_array(self, arr, expected): assert dtype == expected -class TestMaybe(tm.TestCase): +class TestMaybe(object): def test_maybe_convert_string_to_array(self): result = maybe_convert_string_to_object('x') @@ -214,7 +214,7 @@ def test_maybe_convert_scalar(self): assert result == Timedelta('1 day 1 min').value -class TestConvert(tm.TestCase): +class TestConvert(object): def test_maybe_convert_objects_copy(self): values = np.array([1, 2]) @@ -233,7 +233,7 @@ def test_maybe_convert_objects_copy(self): assert values is not out -class TestCommonTypes(tm.TestCase): +class TestCommonTypes(object): def test_numpy_dtypes(self): # (source_types, destination_type) diff --git a/pandas/tests/dtypes/test_common.py b/pandas/tests/dtypes/test_common.py index 0472f0599cd9b..68518e235d417 100644 --- a/pandas/tests/dtypes/test_common.py +++ b/pandas/tests/dtypes/test_common.py @@ -12,7 +12,7 @@ import pandas.util.testing as tm -class TestPandasDtype(tm.TestCase): +class TestPandasDtype(object): # Passing invalid dtype, both as a string or object, must raise TypeError # Per issue GH15520 diff --git a/pandas/tests/dtypes/test_concat.py b/pandas/tests/dtypes/test_concat.py index c0be0dc38d27f..ca579e2dc9390 100644 --- a/pandas/tests/dtypes/test_concat.py +++ b/pandas/tests/dtypes/test_concat.py @@ -2,10 +2,9 @@ import pandas as pd import pandas.core.dtypes.concat as _concat -import pandas.util.testing as tm -class TestConcatCompat(tm.TestCase): +class TestConcatCompat(object): def check_concat(self, to_concat, exp): for klass in [pd.Index, pd.Series]: diff --git a/pandas/tests/dtypes/test_generic.py b/pandas/tests/dtypes/test_generic.py index e9af53aaa1e1a..653d7d3082c08 100644 --- a/pandas/tests/dtypes/test_generic.py +++ b/pandas/tests/dtypes/test_generic.py @@ -3,11 +3,10 @@ from warnings import catch_warnings import numpy as np import pandas as pd -import pandas.util.testing as tm from pandas.core.dtypes import generic as gt -class TestABCClasses(tm.TestCase): +class TestABCClasses(object): tuples = [[1, 2, 2], ['red', 'blue', 'red']] multi_index = pd.MultiIndex.from_arrays(tuples, names=('number', 'color')) datetime_index = pd.to_datetime(['2000/1/1', '2010/1/1']) diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index ec02a5a200308..3790ebe0d3e7c 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -226,7 +226,7 @@ def test_is_recompilable(): assert not inference.is_re_compilable(f) -class TestInference(tm.TestCase): +class TestInference(object): def test_infer_dtype_bytes(self): compare = 'string' if PY2 else 'bytes' @@ -405,7 +405,7 @@ def test_mixed_dtypes_remain_object_array(self): tm.assert_numpy_array_equal(result, array) -class TestTypeInference(tm.TestCase): +class TestTypeInference(object): def test_length_zero(self): result = lib.infer_dtype(np.array([], dtype='i4')) @@ -774,7 +774,7 @@ def test_categorical(self): assert result == 'categorical' -class TestNumberScalar(tm.TestCase): +class TestNumberScalar(object): def test_is_number(self): @@ -917,7 +917,7 @@ def test_is_timedelta(self): assert not is_timedelta64_ns_dtype(tdi.astype('timedelta64[h]')) -class Testisscalar(tm.TestCase): +class Testisscalar(object): def test_isscalar_builtin_scalars(self): assert is_scalar(None) diff --git a/pandas/tests/dtypes/test_io.py b/pandas/tests/dtypes/test_io.py index 443c0c5410e61..58a1c3540cd03 100644 --- a/pandas/tests/dtypes/test_io.py +++ b/pandas/tests/dtypes/test_io.py @@ -7,7 +7,7 @@ from pandas.compat import long, u -class TestParseSQL(tm.TestCase): +class TestParseSQL(object): def test_convert_sql_column_floats(self): arr = np.array([1.5, None, 3, 4.2], dtype=object) diff --git a/pandas/tests/dtypes/test_missing.py b/pandas/tests/dtypes/test_missing.py index 78396a8d89d91..90993890b7553 100644 --- a/pandas/tests/dtypes/test_missing.py +++ b/pandas/tests/dtypes/test_missing.py @@ -45,7 +45,7 @@ def test_notnull(): assert (isinstance(isnull(s), Series)) -class TestIsNull(tm.TestCase): +class TestIsNull(object): def test_0d_array(self): assert isnull(np.array(np.nan)) diff --git a/pandas/tests/frame/test_alter_axes.py b/pandas/tests/frame/test_alter_axes.py index 34ab0b72f9b9a..e6313dfc602a8 100644 --- a/pandas/tests/frame/test_alter_axes.py +++ b/pandas/tests/frame/test_alter_axes.py @@ -25,7 +25,7 @@ from pandas.tests.frame.common import TestData -class TestDataFrameAlterAxes(tm.TestCase, TestData): +class TestDataFrameAlterAxes(TestData): def test_set_index(self): idx = Index(np.arange(len(self.mixed_frame))) @@ -806,7 +806,7 @@ def test_set_index_preserve_categorical_dtype(self): tm.assert_frame_equal(result, df) -class TestIntervalIndex(tm.TestCase): +class TestIntervalIndex(object): def test_setitem(self): diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index 89ee096b4434e..be89b27912d1c 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -24,7 +24,7 @@ from pandas.tests.frame.common import TestData -class TestDataFrameAnalytics(tm.TestCase, TestData): +class TestDataFrameAnalytics(TestData): # ---------------------------------------------------------------------= # Correlation and covariance diff --git a/pandas/tests/frame/test_api.py b/pandas/tests/frame/test_api.py index 208c7b5ace50e..f63918c97c614 100644 --- a/pandas/tests/frame/test_api.py +++ b/pandas/tests/frame/test_api.py @@ -69,7 +69,7 @@ def test_add_prefix_suffix(self): tm.assert_index_equal(with_suffix.columns, expected) -class TestDataFrameMisc(tm.TestCase, SharedWithSparse, TestData): +class TestDataFrameMisc(SharedWithSparse, TestData): klass = DataFrame diff --git a/pandas/tests/frame/test_apply.py b/pandas/tests/frame/test_apply.py index 5febe8c62abe8..aa7c7a7120c1b 100644 --- a/pandas/tests/frame/test_apply.py +++ b/pandas/tests/frame/test_apply.py @@ -19,7 +19,7 @@ from pandas.tests.frame.common import TestData -class TestDataFrameApply(tm.TestCase, TestData): +class TestDataFrameApply(TestData): def test_apply(self): with np.errstate(all='ignore'): @@ -482,7 +482,7 @@ def zip_frames(*frames): return pd.concat(zipped, axis=1) -class TestDataFrameAggregate(tm.TestCase, TestData): +class TestDataFrameAggregate(TestData): _multiprocess_can_split_ = True diff --git a/pandas/tests/frame/test_asof.py b/pandas/tests/frame/test_asof.py index 4207238f0cd4f..d4e3d541937dc 100644 --- a/pandas/tests/frame/test_asof.py +++ b/pandas/tests/frame/test_asof.py @@ -9,7 +9,7 @@ from .common import TestData -class TestFrameAsof(TestData, tm.TestCase): +class TestFrameAsof(TestData): def setup_method(self, method): self.N = N = 50 self.rng = date_range('1/1/1990', periods=N, freq='53s') diff --git a/pandas/tests/frame/test_axis_select_reindex.py b/pandas/tests/frame/test_axis_select_reindex.py index a563b678a3786..a6326083c1bee 100644 --- a/pandas/tests/frame/test_axis_select_reindex.py +++ b/pandas/tests/frame/test_axis_select_reindex.py @@ -22,7 +22,7 @@ from pandas.tests.frame.common import TestData -class TestDataFrameSelectReindex(tm.TestCase, TestData): +class TestDataFrameSelectReindex(TestData): # These are specific reindex-based tests; other indexing tests should go in # test_indexing diff --git a/pandas/tests/frame/test_block_internals.py b/pandas/tests/frame/test_block_internals.py index 44dc6df756f3d..c1a5b437be5d0 100644 --- a/pandas/tests/frame/test_block_internals.py +++ b/pandas/tests/frame/test_block_internals.py @@ -28,7 +28,7 @@ # structure -class TestDataFrameBlockInternals(tm.TestCase, TestData): +class TestDataFrameBlockInternals(TestData): def test_cast_internals(self): casted = DataFrame(self.frame._data, dtype=int) diff --git a/pandas/tests/frame/test_combine_concat.py b/pandas/tests/frame/test_combine_concat.py index 44f17faabe20d..688cacdee263e 100644 --- a/pandas/tests/frame/test_combine_concat.py +++ b/pandas/tests/frame/test_combine_concat.py @@ -18,7 +18,7 @@ from pandas.util.testing import assert_frame_equal, assert_series_equal -class TestDataFrameConcatCommon(tm.TestCase, TestData): +class TestDataFrameConcatCommon(TestData): def test_concat_multiple_frames_dtypes(self): @@ -441,7 +441,7 @@ def test_concat_numerical_names(self): tm.assert_frame_equal(result, expected) -class TestDataFrameCombineFirst(tm.TestCase, TestData): +class TestDataFrameCombineFirst(TestData): def test_combine_first_mixed(self): a = Series(['a', 'b'], index=lrange(2)) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 5b00ddc51da46..8459900ea1059 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -32,7 +32,7 @@ 'int32', 'int64'] -class TestDataFrameConstructors(tm.TestCase, TestData): +class TestDataFrameConstructors(TestData): def test_constructor(self): df = DataFrame() @@ -1903,7 +1903,7 @@ def test_to_frame_with_falsey_names(self): tm.assert_series_equal(result, expected) -class TestDataFrameConstructorWithDatetimeTZ(tm.TestCase, TestData): +class TestDataFrameConstructorWithDatetimeTZ(TestData): def test_from_dict(self): diff --git a/pandas/tests/frame/test_convert_to.py b/pandas/tests/frame/test_convert_to.py index 353b4b873332e..e0cdca7904db7 100644 --- a/pandas/tests/frame/test_convert_to.py +++ b/pandas/tests/frame/test_convert_to.py @@ -11,7 +11,7 @@ from pandas.tests.frame.common import TestData -class TestDataFrameConvertTo(tm.TestCase, TestData): +class TestDataFrameConvertTo(TestData): def test_to_dict(self): test_data = { diff --git a/pandas/tests/frame/test_dtypes.py b/pandas/tests/frame/test_dtypes.py index 2d39db16dbd8d..b99a6fabfa42b 100644 --- a/pandas/tests/frame/test_dtypes.py +++ b/pandas/tests/frame/test_dtypes.py @@ -19,7 +19,7 @@ import pandas as pd -class TestDataFrameDataTypes(tm.TestCase, TestData): +class TestDataFrameDataTypes(TestData): def test_concat_empty_dataframe_dtypes(self): df = DataFrame(columns=list("abc")) @@ -542,7 +542,7 @@ def test_arg_for_errors_in_astype(self): df.astype(np.int8, errors='ignore') -class TestDataFrameDatetimeWithTZ(tm.TestCase, TestData): +class TestDataFrameDatetimeWithTZ(TestData): def test_interleave(self): diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py index 42eb7148d616e..f0503b60eeefa 100644 --- a/pandas/tests/frame/test_indexing.py +++ b/pandas/tests/frame/test_indexing.py @@ -36,7 +36,7 @@ from pandas.tests.frame.common import TestData -class TestDataFrameIndexing(tm.TestCase, TestData): +class TestDataFrameIndexing(TestData): def test_getitem(self): # Slicing @@ -2912,7 +2912,7 @@ def test_type_error_multiindex(self): assert_series_equal(result, expected) -class TestDataFrameIndexingDatetimeWithTZ(tm.TestCase, TestData): +class TestDataFrameIndexingDatetimeWithTZ(TestData): def setup_method(self, method): self.idx = Index(date_range('20130101', periods=3, tz='US/Eastern'), @@ -2970,7 +2970,7 @@ def test_transpose(self): assert_frame_equal(result, expected) -class TestDataFrameIndexingUInt64(tm.TestCase, TestData): +class TestDataFrameIndexingUInt64(TestData): def setup_method(self, method): self.ir = Index(np.arange(3), dtype=np.uint64) diff --git a/pandas/tests/frame/test_missing.py b/pandas/tests/frame/test_missing.py index ffba141ddc15d..77f0357685cab 100644 --- a/pandas/tests/frame/test_missing.py +++ b/pandas/tests/frame/test_missing.py @@ -34,7 +34,7 @@ def _skip_if_no_pchip(): pytest.skip('scipy.interpolate.pchip missing') -class TestDataFrameMissingData(tm.TestCase, TestData): +class TestDataFrameMissingData(TestData): def test_dropEmptyRows(self): N = len(self.frame.index) @@ -519,7 +519,7 @@ def test_fill_value_when_combine_const(self): assert_frame_equal(res, exp) -class TestDataFrameInterpolate(tm.TestCase, TestData): +class TestDataFrameInterpolate(TestData): def test_interp_basic(self): df = DataFrame({'A': [1, 2, np.nan, 4], diff --git a/pandas/tests/frame/test_mutate_columns.py b/pandas/tests/frame/test_mutate_columns.py index ac76970aaa901..4462260a290d9 100644 --- a/pandas/tests/frame/test_mutate_columns.py +++ b/pandas/tests/frame/test_mutate_columns.py @@ -17,7 +17,7 @@ # Column add, remove, delete. -class TestDataFrameMutateColumns(tm.TestCase, TestData): +class TestDataFrameMutateColumns(TestData): def test_assign(self): df = DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}) diff --git a/pandas/tests/frame/test_nonunique_indexes.py b/pandas/tests/frame/test_nonunique_indexes.py index 4bc0176b570e3..4f77ba0ae1f5a 100644 --- a/pandas/tests/frame/test_nonunique_indexes.py +++ b/pandas/tests/frame/test_nonunique_indexes.py @@ -16,7 +16,7 @@ from pandas.tests.frame.common import TestData -class TestDataFrameNonuniqueIndexes(tm.TestCase, TestData): +class TestDataFrameNonuniqueIndexes(TestData): def test_column_dups_operations(self): diff --git a/pandas/tests/frame/test_operators.py b/pandas/tests/frame/test_operators.py index 9083b7952909e..8ec6c6e6263d8 100644 --- a/pandas/tests/frame/test_operators.py +++ b/pandas/tests/frame/test_operators.py @@ -28,7 +28,7 @@ _check_mixed_int) -class TestDataFrameOperators(tm.TestCase, TestData): +class TestDataFrameOperators(TestData): def test_operators(self): garbage = random.random(4) diff --git a/pandas/tests/frame/test_period.py b/pandas/tests/frame/test_period.py index 49de3b8e8cd9b..482210966fe6b 100644 --- a/pandas/tests/frame/test_period.py +++ b/pandas/tests/frame/test_period.py @@ -12,7 +12,7 @@ def _permute(obj): return obj.take(np.random.permutation(len(obj))) -class TestPeriodIndex(tm.TestCase): +class TestPeriodIndex(object): def setup_method(self, method): pass diff --git a/pandas/tests/frame/test_quantile.py b/pandas/tests/frame/test_quantile.py index 33f72cde1b9a3..2482e493dbefd 100644 --- a/pandas/tests/frame/test_quantile.py +++ b/pandas/tests/frame/test_quantile.py @@ -17,7 +17,7 @@ from pandas.tests.frame.common import TestData -class TestDataFrameQuantile(tm.TestCase, TestData): +class TestDataFrameQuantile(TestData): def test_quantile(self): from numpy import percentile diff --git a/pandas/tests/frame/test_query_eval.py b/pandas/tests/frame/test_query_eval.py index 6a06e3f4872ce..f0f1a2df27e93 100644 --- a/pandas/tests/frame/test_query_eval.py +++ b/pandas/tests/frame/test_query_eval.py @@ -48,7 +48,7 @@ def skip_if_no_ne(engine='numexpr'): "installed") -class TestCompat(tm.TestCase): +class TestCompat(object): def setup_method(self, method): self.df = DataFrame({'A': [1, 2, 3]}) @@ -96,7 +96,7 @@ def test_query_numexpr(self): lambda: df.eval('A+1', engine='numexpr')) -class TestDataFrameEval(tm.TestCase, TestData): +class TestDataFrameEval(TestData): def test_ops(self): @@ -172,7 +172,7 @@ def test_eval_resolvers_as_list(self): dict1['a'] + dict2['b']) -class TestDataFrameQueryWithMultiIndex(tm.TestCase): +class TestDataFrameQueryWithMultiIndex(object): def test_query_with_named_multiindex(self, parser, engine): tm.skip_if_no_ne(engine) @@ -384,18 +384,16 @@ def test_raise_on_panel4d_with_multiindex(self, parser, engine): pd.eval('p4d + 1', parser=parser, engine=engine) -class TestDataFrameQueryNumExprPandas(tm.TestCase): +class TestDataFrameQueryNumExprPandas(object): @classmethod def setup_class(cls): - super(TestDataFrameQueryNumExprPandas, cls).setup_class() cls.engine = 'numexpr' cls.parser = 'pandas' tm.skip_if_no_ne(cls.engine) @classmethod def teardown_class(cls): - super(TestDataFrameQueryNumExprPandas, cls).teardown_class() del cls.engine, cls.parser def test_date_query_with_attribute_access(self): @@ -858,7 +856,7 @@ def test_query_builtin(self): assert_frame_equal(expected, result) -class TestDataFrameQueryStrings(tm.TestCase): +class TestDataFrameQueryStrings(object): def test_str_query_method(self, parser, engine): tm.skip_if_no_ne(engine) @@ -1039,11 +1037,10 @@ def test_query_string_scalar_variable(self, parser, engine): assert_frame_equal(e, r) -class TestDataFrameEvalNumExprPandas(tm.TestCase): +class TestDataFrameEvalNumExprPandas(object): @classmethod def setup_class(cls): - super(TestDataFrameEvalNumExprPandas, cls).setup_class() cls.engine = 'numexpr' cls.parser = 'pandas' tm.skip_if_no_ne() @@ -1099,5 +1096,4 @@ class TestDataFrameEvalPythonPython(TestDataFrameEvalNumExprPython): @classmethod def setup_class(cls): - super(TestDataFrameEvalPythonPython, cls).teardown_class() cls.engine = cls.parser = 'python' diff --git a/pandas/tests/frame/test_rank.py b/pandas/tests/frame/test_rank.py index b115218d76958..acf887d047c9e 100644 --- a/pandas/tests/frame/test_rank.py +++ b/pandas/tests/frame/test_rank.py @@ -12,7 +12,7 @@ from pandas.tests.frame.common import TestData -class TestRank(tm.TestCase, TestData): +class TestRank(TestData): s = Series([1, 3, 4, 2, nan, 2, 1, 5, nan, 3]) df = DataFrame({'A': s, 'B': s}) diff --git a/pandas/tests/frame/test_replace.py b/pandas/tests/frame/test_replace.py index 3f160012cb446..fbc4accd0e41e 100644 --- a/pandas/tests/frame/test_replace.py +++ b/pandas/tests/frame/test_replace.py @@ -23,7 +23,7 @@ from pandas.tests.frame.common import TestData -class TestDataFrameReplace(tm.TestCase, TestData): +class TestDataFrameReplace(TestData): def test_replace_inplace(self): self.tsframe['A'][:5] = nan diff --git a/pandas/tests/frame/test_repr_info.py b/pandas/tests/frame/test_repr_info.py index 0300c53e086cd..cc37f8cc3cb02 100644 --- a/pandas/tests/frame/test_repr_info.py +++ b/pandas/tests/frame/test_repr_info.py @@ -23,7 +23,7 @@ # structure -class TestDataFrameReprInfoEtc(tm.TestCase, TestData): +class TestDataFrameReprInfoEtc(TestData): def test_repr_empty(self): # empty diff --git a/pandas/tests/frame/test_reshape.py b/pandas/tests/frame/test_reshape.py index 79ee76ee362c3..fdb0119d8ae60 100644 --- a/pandas/tests/frame/test_reshape.py +++ b/pandas/tests/frame/test_reshape.py @@ -24,7 +24,7 @@ from pandas.tests.frame.common import TestData -class TestDataFrameReshape(tm.TestCase, TestData): +class TestDataFrameReshape(TestData): def test_pivot(self): data = { diff --git a/pandas/tests/frame/test_sorting.py b/pandas/tests/frame/test_sorting.py index 457ea32ec56f7..98f7f82c0ace7 100644 --- a/pandas/tests/frame/test_sorting.py +++ b/pandas/tests/frame/test_sorting.py @@ -18,7 +18,7 @@ from pandas.tests.frame.common import TestData -class TestDataFrameSorting(tm.TestCase, TestData): +class TestDataFrameSorting(TestData): def test_sort(self): frame = DataFrame(np.arange(16).reshape(4, 4), index=[1, 2, 3, 4], @@ -315,7 +315,7 @@ def test_sort_nat_values_in_int_column(self): assert_frame_equal(df_sorted, df_reversed) -class TestDataFrameSortIndexKinds(tm.TestCase, TestData): +class TestDataFrameSortIndexKinds(TestData): def test_sort_index_multicolumn(self): A = np.arange(5).repeat(20) diff --git a/pandas/tests/frame/test_subclass.py b/pandas/tests/frame/test_subclass.py index 40a8ece852623..52c591e4dcbb0 100644 --- a/pandas/tests/frame/test_subclass.py +++ b/pandas/tests/frame/test_subclass.py @@ -12,7 +12,7 @@ from pandas.tests.frame.common import TestData -class TestDataFrameSubclassing(tm.TestCase, TestData): +class TestDataFrameSubclassing(TestData): def test_frame_subclassing_and_slicing(self): # Subclass frame and ensure it returns the right class on slicing it diff --git a/pandas/tests/frame/test_timeseries.py b/pandas/tests/frame/test_timeseries.py index f52f4697b1b08..143a7ea8f6fb2 100644 --- a/pandas/tests/frame/test_timeseries.py +++ b/pandas/tests/frame/test_timeseries.py @@ -24,7 +24,7 @@ from pandas.tests.frame.common import TestData -class TestDataFrameTimeSeriesMethods(tm.TestCase, TestData): +class TestDataFrameTimeSeriesMethods(TestData): def test_diff(self): the_diff = self.tsframe.diff(1) diff --git a/pandas/tests/frame/test_to_csv.py b/pandas/tests/frame/test_to_csv.py index 3e38f2a71d99d..69bd2b008416f 100644 --- a/pandas/tests/frame/test_to_csv.py +++ b/pandas/tests/frame/test_to_csv.py @@ -29,7 +29,7 @@ 'int32', 'int64'] -class TestDataFrameToCSV(tm.TestCase, TestData): +class TestDataFrameToCSV(TestData): def test_to_csv_from_csv1(self): diff --git a/pandas/tests/frame/test_validate.py b/pandas/tests/frame/test_validate.py index 343853b3fcfa0..d6065e6042908 100644 --- a/pandas/tests/frame/test_validate.py +++ b/pandas/tests/frame/test_validate.py @@ -1,10 +1,9 @@ from pandas.core.frame import DataFrame -import pandas.util.testing as tm import pytest -class TestDataFrameValidate(tm.TestCase): +class TestDataFrameValidate(object): """Tests for error handling related to data types of method arguments.""" df = DataFrame({'a': [1, 2], 'b': [3, 4]}) diff --git a/pandas/tests/groupby/test_aggregate.py b/pandas/tests/groupby/test_aggregate.py index 769e4d14d354b..d7b46e6748b99 100644 --- a/pandas/tests/groupby/test_aggregate.py +++ b/pandas/tests/groupby/test_aggregate.py @@ -25,7 +25,7 @@ import pandas.util.testing as tm -class TestGroupByAggregate(tm.TestCase): +class TestGroupByAggregate(object): def setup_method(self, method): self.ts = tm.makeTimeSeries() diff --git a/pandas/tests/groupby/test_bin_groupby.py b/pandas/tests/groupby/test_bin_groupby.py index bdac535b3d2e2..f527c732fb76b 100644 --- a/pandas/tests/groupby/test_bin_groupby.py +++ b/pandas/tests/groupby/test_bin_groupby.py @@ -46,7 +46,7 @@ def test_series_bin_grouper(): assert_almost_equal(counts, exp_counts) -class TestBinGroupers(tm.TestCase): +class TestBinGroupers(object): def setup_method(self, method): self.obj = np.random.randn(10, 1) @@ -117,11 +117,11 @@ def _ohlc(group): _check('float64') -class TestMoments(tm.TestCase): +class TestMoments(object): pass -class TestReducer(tm.TestCase): +class TestReducer(object): def test_int_index(self): from pandas.core.series import Series diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py index 9d2134927389d..fdc03acd3e931 100644 --- a/pandas/tests/groupby/test_categorical.py +++ b/pandas/tests/groupby/test_categorical.py @@ -15,7 +15,7 @@ from .common import MixIn -class TestGroupByCategorical(MixIn, tm.TestCase): +class TestGroupByCategorical(MixIn): def test_level_groupby_get_group(self): # GH15155 diff --git a/pandas/tests/groupby/test_filters.py b/pandas/tests/groupby/test_filters.py index b05b938fd8205..cac6b46af8f87 100644 --- a/pandas/tests/groupby/test_filters.py +++ b/pandas/tests/groupby/test_filters.py @@ -23,7 +23,7 @@ import pandas as pd -class TestGroupByFilter(tm.TestCase): +class TestGroupByFilter(object): def setup_method(self, method): self.ts = tm.makeTimeSeries() diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 8d86d40c379bf..88afa51e46b6c 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -28,7 +28,7 @@ from .common import MixIn -class TestGroupBy(MixIn, tm.TestCase): +class TestGroupBy(MixIn): def test_basic(self): def checkit(dtype): diff --git a/pandas/tests/groupby/test_nth.py b/pandas/tests/groupby/test_nth.py index 0b6aeaf155f86..7912b4bf3bdf6 100644 --- a/pandas/tests/groupby/test_nth.py +++ b/pandas/tests/groupby/test_nth.py @@ -2,13 +2,12 @@ import pandas as pd from pandas import DataFrame, MultiIndex, Index, Series, isnull from pandas.compat import lrange -from pandas.util import testing as tm from pandas.util.testing import assert_frame_equal, assert_series_equal from .common import MixIn -class TestNth(MixIn, tm.TestCase): +class TestNth(MixIn): def test_first_last_nth(self): # tests for first / last / nth diff --git a/pandas/tests/groupby/test_timegrouper.py b/pandas/tests/groupby/test_timegrouper.py index 42caecbdb700e..2196318d1920e 100644 --- a/pandas/tests/groupby/test_timegrouper.py +++ b/pandas/tests/groupby/test_timegrouper.py @@ -14,7 +14,7 @@ from pandas.util.testing import assert_frame_equal, assert_series_equal -class TestGroupBy(tm.TestCase): +class TestGroupBy(object): def test_groupby_with_timegrouper(self): # GH 4161 diff --git a/pandas/tests/groupby/test_transform.py b/pandas/tests/groupby/test_transform.py index 0b81235ef2117..40434ff510421 100644 --- a/pandas/tests/groupby/test_transform.py +++ b/pandas/tests/groupby/test_transform.py @@ -17,7 +17,7 @@ from pandas.core.config import option_context -class TestGroupBy(MixIn, tm.TestCase): +class TestGroupBy(MixIn): def test_transform(self): data = Series(np.arange(9) // 3, index=np.arange(9)) diff --git a/pandas/tests/indexes/datetimes/test_astype.py b/pandas/tests/indexes/datetimes/test_astype.py index 185787d75f6e1..0f7acf1febae8 100644 --- a/pandas/tests/indexes/datetimes/test_astype.py +++ b/pandas/tests/indexes/datetimes/test_astype.py @@ -9,7 +9,7 @@ Int64Index, Period) -class TestDatetimeIndex(tm.TestCase): +class TestDatetimeIndex(object): def test_astype(self): # GH 13149, GH 13209 @@ -185,7 +185,7 @@ def _check_rng(rng): _check_rng(rng_utc) -class TestToPeriod(tm.TestCase): +class TestToPeriod(object): def setup_method(self, method): data = [Timestamp('2007-01-01 10:11:12.123456Z'), diff --git a/pandas/tests/indexes/datetimes/test_construction.py b/pandas/tests/indexes/datetimes/test_construction.py index 9af4136afd025..fcfc56ea823da 100644 --- a/pandas/tests/indexes/datetimes/test_construction.py +++ b/pandas/tests/indexes/datetimes/test_construction.py @@ -12,7 +12,7 @@ to_datetime) -class TestDatetimeIndex(tm.TestCase): +class TestDatetimeIndex(object): def test_construction_caching(self): @@ -446,7 +446,7 @@ def test_000constructor_resolution(self): assert idx.nanosecond[0] == t1.nanosecond -class TestTimeSeries(tm.TestCase): +class TestTimeSeries(object): def test_dti_constructor_preserve_dti_freq(self): rng = date_range('1/1/2000', '1/2/2000', freq='5min') diff --git a/pandas/tests/indexes/datetimes/test_date_range.py b/pandas/tests/indexes/datetimes/test_date_range.py index 67d6b0f314ecb..0586ea9c4db2b 100644 --- a/pandas/tests/indexes/datetimes/test_date_range.py +++ b/pandas/tests/indexes/datetimes/test_date_range.py @@ -26,7 +26,7 @@ def eq_gen_range(kwargs, expected): assert (np.array_equal(list(rng), expected)) -class TestDateRanges(TestData, tm.TestCase): +class TestDateRanges(TestData): def test_date_range_gen_error(self): rng = date_range('1/1/2000 00:00', '1/1/2000 00:18', freq='5min') @@ -147,7 +147,7 @@ def test_catch_infinite_loop(self): datetime(2011, 11, 12), freq=offset) -class TestGenRangeGeneration(tm.TestCase): +class TestGenRangeGeneration(object): def test_generate(self): rng1 = list(generate_range(START, END, offset=BDay())) @@ -196,7 +196,7 @@ def test_precision_finer_than_offset(self): tm.assert_index_equal(result2, expected2) -class TestBusinessDateRange(tm.TestCase): +class TestBusinessDateRange(object): def setup_method(self, method): self.rng = bdate_range(START, END) @@ -482,7 +482,7 @@ def test_freq_divides_end_in_nanos(self): tm.assert_index_equal(result_2, expected_2) -class TestCustomDateRange(tm.TestCase): +class TestCustomDateRange(object): def setup_method(self, method): self.rng = cdate_range(START, END) diff --git a/pandas/tests/indexes/datetimes/test_datetime.py b/pandas/tests/indexes/datetimes/test_datetime.py index 7b22d1615fbeb..96c8da546ff9d 100644 --- a/pandas/tests/indexes/datetimes/test_datetime.py +++ b/pandas/tests/indexes/datetimes/test_datetime.py @@ -15,7 +15,7 @@ randn = np.random.randn -class TestDatetimeIndex(tm.TestCase): +class TestDatetimeIndex(object): def test_get_loc(self): idx = pd.date_range('2000-01-01', periods=3) diff --git a/pandas/tests/indexes/datetimes/test_datetimelike.py b/pandas/tests/indexes/datetimes/test_datetimelike.py index 2e184b1aa4e51..3b970ee382521 100644 --- a/pandas/tests/indexes/datetimes/test_datetimelike.py +++ b/pandas/tests/indexes/datetimes/test_datetimelike.py @@ -8,7 +8,7 @@ from ..datetimelike import DatetimeLike -class TestDatetimeIndex(DatetimeLike, tm.TestCase): +class TestDatetimeIndex(DatetimeLike): _holder = DatetimeIndex def setup_method(self, method): diff --git a/pandas/tests/indexes/datetimes/test_indexing.py b/pandas/tests/indexes/datetimes/test_indexing.py index 92134a296b08f..a9ea028c9d0f7 100644 --- a/pandas/tests/indexes/datetimes/test_indexing.py +++ b/pandas/tests/indexes/datetimes/test_indexing.py @@ -7,7 +7,7 @@ from pandas import notnull, Index, DatetimeIndex, datetime, date_range -class TestDatetimeIndex(tm.TestCase): +class TestDatetimeIndex(object): def test_where_other(self): diff --git a/pandas/tests/indexes/datetimes/test_misc.py b/pandas/tests/indexes/datetimes/test_misc.py index d9a61776a0d1c..951aa2c520d0f 100644 --- a/pandas/tests/indexes/datetimes/test_misc.py +++ b/pandas/tests/indexes/datetimes/test_misc.py @@ -7,7 +7,7 @@ Float64Index, date_range, Timestamp) -class TestDateTimeIndexToJulianDate(tm.TestCase): +class TestDateTimeIndexToJulianDate(object): def test_1700(self): r1 = Float64Index([2345897.5, 2345898.5, 2345899.5, 2345900.5, @@ -53,7 +53,7 @@ def test_second(self): tm.assert_index_equal(r1, r2) -class TestTimeSeries(tm.TestCase): +class TestTimeSeries(object): def test_pass_datetimeindex_to_index(self): # Bugs in #1396 @@ -170,7 +170,7 @@ def test_normalize(self): assert not rng.is_normalized -class TestDatetime64(tm.TestCase): +class TestDatetime64(object): def test_datetimeindex_accessors(self): diff --git a/pandas/tests/indexes/datetimes/test_missing.py b/pandas/tests/indexes/datetimes/test_missing.py index 0c356e3251e2f..adc0b7b3d81e8 100644 --- a/pandas/tests/indexes/datetimes/test_missing.py +++ b/pandas/tests/indexes/datetimes/test_missing.py @@ -2,7 +2,7 @@ import pandas.util.testing as tm -class TestDatetimeIndex(tm.TestCase): +class TestDatetimeIndex(object): def test_fillna_datetime64(self): # GH 11343 diff --git a/pandas/tests/indexes/datetimes/test_ops.py b/pandas/tests/indexes/datetimes/test_ops.py index 75c6626b47401..80e93a1f76a66 100644 --- a/pandas/tests/indexes/datetimes/test_ops.py +++ b/pandas/tests/indexes/datetimes/test_ops.py @@ -931,7 +931,7 @@ def test_equals(self): assert not idx.equals(pd.Series(idx3)) -class TestDateTimeIndexToJulianDate(tm.TestCase): +class TestDateTimeIndexToJulianDate(object): def test_1700(self): r1 = Float64Index([2345897.5, 2345898.5, 2345899.5, 2345900.5, @@ -1107,7 +1107,7 @@ def test_shift_months(years, months): tm.assert_index_equal(actual, expected) -class TestBusinessDatetimeIndex(tm.TestCase): +class TestBusinessDatetimeIndex(object): def setup_method(self, method): self.rng = bdate_range(START, END) @@ -1207,7 +1207,7 @@ def test_identical(self): assert not t1.identical(t2v) -class TestCustomDatetimeIndex(tm.TestCase): +class TestCustomDatetimeIndex(object): def setup_method(self, method): self.rng = cdate_range(START, END) diff --git a/pandas/tests/indexes/datetimes/test_partial_slicing.py b/pandas/tests/indexes/datetimes/test_partial_slicing.py index b3661ae0e7a97..e7d03aa193cbd 100644 --- a/pandas/tests/indexes/datetimes/test_partial_slicing.py +++ b/pandas/tests/indexes/datetimes/test_partial_slicing.py @@ -11,7 +11,7 @@ from pandas.util import testing as tm -class TestSlicing(tm.TestCase): +class TestSlicing(object): def test_slice_year(self): dti = DatetimeIndex(freq='B', start=datetime(2005, 1, 1), periods=500) diff --git a/pandas/tests/indexes/datetimes/test_setops.py b/pandas/tests/indexes/datetimes/test_setops.py index fb4b6e9d226f8..f3af7dd30c27f 100644 --- a/pandas/tests/indexes/datetimes/test_setops.py +++ b/pandas/tests/indexes/datetimes/test_setops.py @@ -12,7 +12,7 @@ START, END = datetime(2009, 1, 1), datetime(2010, 1, 1) -class TestDatetimeIndex(tm.TestCase): +class TestDatetimeIndex(object): def test_union(self): i1 = Int64Index(np.arange(0, 20, 2)) @@ -199,7 +199,7 @@ def test_join_nonunique(self): assert rs.is_monotonic -class TestBusinessDatetimeIndex(tm.TestCase): +class TestBusinessDatetimeIndex(object): def setup_method(self, method): self.rng = bdate_range(START, END) @@ -343,7 +343,7 @@ def test_month_range_union_tz_dateutil(self): early_dr.union(late_dr) -class TestCustomDatetimeIndex(tm.TestCase): +class TestCustomDatetimeIndex(object): def setup_method(self, method): self.rng = cdate_range(START, END) diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index 3c7f2e424f779..648df01be5289 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -22,7 +22,7 @@ compat) -class TimeConversionFormats(tm.TestCase): +class TimeConversionFormats(object): def test_to_datetime_format(self): values = ['1/1/2000', '1/2/2000', '1/3/2000'] @@ -170,7 +170,7 @@ def test_to_datetime_format_weeks(self): assert to_datetime(s, format=format) == dt -class TestToDatetime(tm.TestCase): +class TestToDatetime(object): def test_to_datetime_dt64s(self): in_bound_dts = [ @@ -335,7 +335,7 @@ def test_datetime_invalid_datatype(self): pd.to_datetime(pd.to_datetime) -class ToDatetimeUnit(tm.TestCase): +class ToDatetimeUnit(object): def test_unit(self): # GH 11758 @@ -595,7 +595,7 @@ def test_dataframe_dtypes(self): to_datetime(df) -class ToDatetimeMisc(tm.TestCase): +class ToDatetimeMisc(object): def test_index_to_datetime(self): idx = Index(['1/1/2000', '1/2/2000', '1/3/2000']) @@ -829,7 +829,7 @@ def test_dayfirst(self): tm.assert_index_equal(expected, idx6) -class TestGuessDatetimeFormat(tm.TestCase): +class TestGuessDatetimeFormat(object): def test_guess_datetime_format_with_parseable_formats(self): tm._skip_if_not_us_locale() @@ -914,7 +914,7 @@ def test_guess_datetime_format_for_array(self): assert format_for_string_of_nans is None -class TestToDatetimeInferFormat(tm.TestCase): +class TestToDatetimeInferFormat(object): def test_to_datetime_infer_datetime_format_consistent_format(self): s = pd.Series(pd.date_range('20000101', periods=50, freq='H')) @@ -974,7 +974,7 @@ def test_to_datetime_iso8601_noleading_0s(self): tm.assert_series_equal(pd.to_datetime(s, format='%Y-%m-%d'), expected) -class TestDaysInMonth(tm.TestCase): +class TestDaysInMonth(object): # tests for issue #10154 def test_day_not_in_month_coerce(self): @@ -1006,7 +1006,7 @@ def test_day_not_in_month_ignore(self): format="%Y-%m-%d") == '2015-04-31' -class TestDatetimeParsingWrappers(tm.TestCase): +class TestDatetimeParsingWrappers(object): def test_does_not_convert_mixed_integer(self): bad_date_strings = ('-50000', '999', '123.1234', 'm', 'T') @@ -1362,7 +1362,7 @@ def test_parsers_iso8601(self): raise Exception(date_str) -class TestArrayToDatetime(tm.TestCase): +class TestArrayToDatetime(object): def test_try_parse_dates(self): from dateutil.parser import parse diff --git a/pandas/tests/indexes/period/test_asfreq.py b/pandas/tests/indexes/period/test_asfreq.py index b97be3f61a2dd..c8724b2a3bc91 100644 --- a/pandas/tests/indexes/period/test_asfreq.py +++ b/pandas/tests/indexes/period/test_asfreq.py @@ -6,7 +6,7 @@ from pandas import PeriodIndex, Series, DataFrame -class TestPeriodIndex(tm.TestCase): +class TestPeriodIndex(object): def setup_method(self, method): pass diff --git a/pandas/tests/indexes/period/test_construction.py b/pandas/tests/indexes/period/test_construction.py index b0db27b5f2cea..6a188c0987f91 100644 --- a/pandas/tests/indexes/period/test_construction.py +++ b/pandas/tests/indexes/period/test_construction.py @@ -9,7 +9,7 @@ Series, Index) -class TestPeriodIndex(tm.TestCase): +class TestPeriodIndex(object): def setup_method(self, method): pass @@ -473,7 +473,7 @@ def test_map_with_string_constructor(self): tm.assert_index_equal(res, expected) -class TestSeriesPeriod(tm.TestCase): +class TestSeriesPeriod(object): def setup_method(self, method): self.series = Series(period_range('2000-01-01', periods=10, freq='D')) diff --git a/pandas/tests/indexes/period/test_indexing.py b/pandas/tests/indexes/period/test_indexing.py index 36db56b751633..d4dac1cf88fff 100644 --- a/pandas/tests/indexes/period/test_indexing.py +++ b/pandas/tests/indexes/period/test_indexing.py @@ -11,7 +11,7 @@ period_range, Period, _np_version_under1p9) -class TestGetItem(tm.TestCase): +class TestGetItem(object): def setup_method(self, method): pass @@ -200,7 +200,7 @@ def test_getitem_day(self): s[v] -class TestIndexing(tm.TestCase): +class TestIndexing(object): def test_get_loc_msg(self): idx = period_range('2000-1-1', freq='A', periods=10) diff --git a/pandas/tests/indexes/period/test_ops.py b/pandas/tests/indexes/period/test_ops.py index 583848f75c6b4..7acc335c31be4 100644 --- a/pandas/tests/indexes/period/test_ops.py +++ b/pandas/tests/indexes/period/test_ops.py @@ -851,7 +851,7 @@ def test_equals(self): assert not idx.equals(pd.Series(idx3)) -class TestPeriodIndexSeriesMethods(tm.TestCase): +class TestPeriodIndexSeriesMethods(object): """ Test PeriodIndex and Period Series Ops consistency """ def _check(self, values, func, expected): @@ -1135,7 +1135,7 @@ def test_pi_comp_period_nat(self): self._check(idx, f, exp) -class TestSeriesPeriod(tm.TestCase): +class TestSeriesPeriod(object): def setup_method(self, method): self.series = Series(period_range('2000-01-01', periods=10, freq='D')) @@ -1175,7 +1175,7 @@ def test_ops_series_period(self): tm.assert_series_equal(s - s2, -exp) -class TestFramePeriod(tm.TestCase): +class TestFramePeriod(object): def test_ops_frame_period(self): # GH 13043 @@ -1206,7 +1206,7 @@ def test_ops_frame_period(self): tm.assert_frame_equal(df - df2, -exp) -class TestPeriodIndexComparisons(tm.TestCase): +class TestPeriodIndexComparisons(object): def test_pi_pi_comp(self): diff --git a/pandas/tests/indexes/period/test_partial_slicing.py b/pandas/tests/indexes/period/test_partial_slicing.py index 88a9ff5752322..6d142722c315a 100644 --- a/pandas/tests/indexes/period/test_partial_slicing.py +++ b/pandas/tests/indexes/period/test_partial_slicing.py @@ -8,7 +8,7 @@ DataFrame, _np_version_under1p12, Period) -class TestPeriodIndex(tm.TestCase): +class TestPeriodIndex(object): def setup_method(self, method): pass diff --git a/pandas/tests/indexes/period/test_period.py b/pandas/tests/indexes/period/test_period.py index 11ec3bc215cf8..6f73e7c15e4d9 100644 --- a/pandas/tests/indexes/period/test_period.py +++ b/pandas/tests/indexes/period/test_period.py @@ -13,7 +13,7 @@ from ..datetimelike import DatetimeLike -class TestPeriodIndex(DatetimeLike, tm.TestCase): +class TestPeriodIndex(DatetimeLike): _holder = PeriodIndex _multiprocess_can_split_ = True diff --git a/pandas/tests/indexes/period/test_setops.py b/pandas/tests/indexes/period/test_setops.py index 7041724faeb89..1ac05f9fa94b7 100644 --- a/pandas/tests/indexes/period/test_setops.py +++ b/pandas/tests/indexes/period/test_setops.py @@ -12,7 +12,7 @@ def _permute(obj): return obj.take(np.random.permutation(len(obj))) -class TestPeriodIndex(tm.TestCase): +class TestPeriodIndex(object): def setup_method(self, method): pass diff --git a/pandas/tests/indexes/period/test_tools.py b/pandas/tests/indexes/period/test_tools.py index bd80c2c4f341e..074678164e6f9 100644 --- a/pandas/tests/indexes/period/test_tools.py +++ b/pandas/tests/indexes/period/test_tools.py @@ -11,7 +11,7 @@ date_range, to_datetime, period_range) -class TestPeriodRepresentation(tm.TestCase): +class TestPeriodRepresentation(object): """ Wish to match NumPy units """ @@ -73,7 +73,7 @@ def test_negone_ordinals(self): repr(period) -class TestTslib(tm.TestCase): +class TestTslib(object): def test_intraday_conversion_factors(self): assert period_asfreq(1, get_freq('D'), get_freq('H'), False) == 24 assert period_asfreq(1, get_freq('D'), get_freq('T'), False) == 1440 @@ -150,7 +150,7 @@ def test_period_ordinal_business_day(self): 0, 0, 0, 0, get_freq('B')) == 11418 -class TestPeriodIndex(tm.TestCase): +class TestPeriodIndex(object): def setup_method(self, method): pass diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index ce3f4b5d68d89..6a2087b37631e 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -29,7 +29,7 @@ from pandas._libs.lib import Timestamp -class TestIndex(Base, tm.TestCase): +class TestIndex(Base): _holder = Index def setup_method(self, method): @@ -1801,7 +1801,7 @@ def test_string_index_repr(self): assert coerce(idx) == expected -class TestMixedIntIndex(Base, tm.TestCase): +class TestMixedIntIndex(Base): # Mostly the tests from common.py for which the results differ # in py2 and py3 because ints and strings are uncomparable in py3 # (GH 13514) diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py index 94349b4860698..4e4f9b29f9a4c 100644 --- a/pandas/tests/indexes/test_category.py +++ b/pandas/tests/indexes/test_category.py @@ -19,7 +19,7 @@ unicode = lambda x: x -class TestCategoricalIndex(Base, tm.TestCase): +class TestCategoricalIndex(Base): _holder = CategoricalIndex def setup_method(self, method): diff --git a/pandas/tests/indexes/test_frozen.py b/pandas/tests/indexes/test_frozen.py index ae4a130c24310..ca9841112b1d5 100644 --- a/pandas/tests/indexes/test_frozen.py +++ b/pandas/tests/indexes/test_frozen.py @@ -5,7 +5,7 @@ from pandas.compat import u -class TestFrozenList(CheckImmutable, CheckStringMixin, tm.TestCase): +class TestFrozenList(CheckImmutable, CheckStringMixin): mutable_methods = ('extend', 'pop', 'remove', 'insert') unicode_container = FrozenList([u("\u05d0"), u("\u05d1"), "c"]) @@ -31,7 +31,7 @@ def test_inplace(self): self.check_result(r, self.lst) -class TestFrozenNDArray(CheckImmutable, CheckStringMixin, tm.TestCase): +class TestFrozenNDArray(CheckImmutable, CheckStringMixin): mutable_methods = ('put', 'itemset', 'fill') unicode_container = FrozenNDArray([u("\u05d0"), u("\u05d1"), "c"]) diff --git a/pandas/tests/indexes/test_interval.py b/pandas/tests/indexes/test_interval.py index 90e5b1b6c9788..33745017fe3d6 100644 --- a/pandas/tests/indexes/test_interval.py +++ b/pandas/tests/indexes/test_interval.py @@ -12,7 +12,7 @@ import pandas as pd -class TestIntervalIndex(Base, tm.TestCase): +class TestIntervalIndex(Base): _holder = IntervalIndex def setup_method(self, method): @@ -682,7 +682,7 @@ def f(): pytest.raises(ValueError, f) -class TestIntervalRange(tm.TestCase): +class TestIntervalRange(object): def test_construction(self): result = interval_range(0, 5, name='foo', closed='both') @@ -720,7 +720,7 @@ def f(): pytest.raises(ValueError, f) -class TestIntervalTree(tm.TestCase): +class TestIntervalTree(object): def setup_method(self, method): gentree = lambda dtype: IntervalTree(np.arange(5, dtype=dtype), np.arange(5, dtype=dtype) + 2) diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index d2024340c522e..402dba0ba08b8 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -27,7 +27,7 @@ from .common import Base -class TestMultiIndex(Base, tm.TestCase): +class TestMultiIndex(Base): _holder = MultiIndex _compat_props = ['shape', 'ndim', 'size', 'itemsize'] diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py index e82b1c5e74543..3d06f1672ae32 100644 --- a/pandas/tests/indexes/test_numeric.py +++ b/pandas/tests/indexes/test_numeric.py @@ -176,7 +176,7 @@ def test_modulo(self): tm.assert_index_equal(index % 2, expected) -class TestFloat64Index(Numeric, tm.TestCase): +class TestFloat64Index(Numeric): _holder = Float64Index def setup_method(self, method): @@ -621,7 +621,7 @@ def test_ufunc_coercions(self): tm.assert_index_equal(result, exp) -class TestInt64Index(NumericInt, tm.TestCase): +class TestInt64Index(NumericInt): _dtype = 'int64' _holder = Int64Index @@ -915,7 +915,7 @@ def test_join_outer(self): tm.assert_numpy_array_equal(ridx, eridx) -class TestUInt64Index(NumericInt, tm.TestCase): +class TestUInt64Index(NumericInt): _dtype = 'uint64' _holder = UInt64Index diff --git a/pandas/tests/indexes/test_range.py b/pandas/tests/indexes/test_range.py index cc3a76aa7cac1..18539989084e9 100644 --- a/pandas/tests/indexes/test_range.py +++ b/pandas/tests/indexes/test_range.py @@ -20,7 +20,7 @@ from .test_numeric import Numeric -class TestRangeIndex(Numeric, tm.TestCase): +class TestRangeIndex(Numeric): _holder = RangeIndex _compat_props = ['shape', 'ndim', 'size', 'itemsize'] diff --git a/pandas/tests/indexes/timedeltas/test_astype.py b/pandas/tests/indexes/timedeltas/test_astype.py index b9720f4a300d1..586b96f980f8f 100644 --- a/pandas/tests/indexes/timedeltas/test_astype.py +++ b/pandas/tests/indexes/timedeltas/test_astype.py @@ -10,7 +10,7 @@ from ..datetimelike import DatetimeLike -class TestTimedeltaIndex(DatetimeLike, tm.TestCase): +class TestTimedeltaIndex(DatetimeLike): _holder = TimedeltaIndex _multiprocess_can_split_ = True diff --git a/pandas/tests/indexes/timedeltas/test_construction.py b/pandas/tests/indexes/timedeltas/test_construction.py index bdaa62c5ce221..dd25e2cca2e55 100644 --- a/pandas/tests/indexes/timedeltas/test_construction.py +++ b/pandas/tests/indexes/timedeltas/test_construction.py @@ -8,7 +8,7 @@ from pandas import TimedeltaIndex, timedelta_range, to_timedelta -class TestTimedeltaIndex(tm.TestCase): +class TestTimedeltaIndex(object): _multiprocess_can_split_ = True def test_construction_base_constructor(self): diff --git a/pandas/tests/indexes/timedeltas/test_indexing.py b/pandas/tests/indexes/timedeltas/test_indexing.py index 6ffe3516c4a94..844033cc19eed 100644 --- a/pandas/tests/indexes/timedeltas/test_indexing.py +++ b/pandas/tests/indexes/timedeltas/test_indexing.py @@ -6,7 +6,7 @@ from pandas import TimedeltaIndex, timedelta_range, compat, Index, Timedelta -class TestTimedeltaIndex(tm.TestCase): +class TestTimedeltaIndex(object): _multiprocess_can_split_ = True def test_insert(self): diff --git a/pandas/tests/indexes/timedeltas/test_ops.py b/pandas/tests/indexes/timedeltas/test_ops.py index 12d29dc00e273..9a9912d4f0ab1 100644 --- a/pandas/tests/indexes/timedeltas/test_ops.py +++ b/pandas/tests/indexes/timedeltas/test_ops.py @@ -861,7 +861,7 @@ def test_equals(self): assert not idx.equals(pd.Series(idx2)) -class TestTimedeltas(tm.TestCase): +class TestTimedeltas(object): _multiprocess_can_split_ = True def test_ops(self): @@ -1209,7 +1209,7 @@ def test_compare_timedelta_ndarray(self): tm.assert_numpy_array_equal(result, expected) -class TestSlicing(tm.TestCase): +class TestSlicing(object): def test_tdi_ops_attributes(self): rng = timedelta_range('2 days', periods=5, freq='2D', name='x') diff --git a/pandas/tests/indexes/timedeltas/test_partial_slicing.py b/pandas/tests/indexes/timedeltas/test_partial_slicing.py index 5e6e1440a7c04..8e5eae2a7a3ef 100644 --- a/pandas/tests/indexes/timedeltas/test_partial_slicing.py +++ b/pandas/tests/indexes/timedeltas/test_partial_slicing.py @@ -8,7 +8,7 @@ from pandas.util.testing import assert_series_equal -class TestSlicing(tm.TestCase): +class TestSlicing(object): def test_partial_slice(self): rng = timedelta_range('1 day 10:11:12', freq='h', periods=500) diff --git a/pandas/tests/indexes/timedeltas/test_setops.py b/pandas/tests/indexes/timedeltas/test_setops.py index 8779f6d49cdd5..22546d25273a7 100644 --- a/pandas/tests/indexes/timedeltas/test_setops.py +++ b/pandas/tests/indexes/timedeltas/test_setops.py @@ -5,7 +5,7 @@ from pandas import TimedeltaIndex, timedelta_range, Int64Index -class TestTimedeltaIndex(tm.TestCase): +class TestTimedeltaIndex(object): _multiprocess_can_split_ = True def test_union(self): diff --git a/pandas/tests/indexes/timedeltas/test_timedelta.py b/pandas/tests/indexes/timedeltas/test_timedelta.py index 933674c425cd8..79fe0a864f246 100644 --- a/pandas/tests/indexes/timedeltas/test_timedelta.py +++ b/pandas/tests/indexes/timedeltas/test_timedelta.py @@ -16,7 +16,7 @@ randn = np.random.randn -class TestTimedeltaIndex(DatetimeLike, tm.TestCase): +class TestTimedeltaIndex(DatetimeLike): _holder = TimedeltaIndex _multiprocess_can_split_ = True @@ -563,7 +563,7 @@ def test_freq_conversion(self): assert_index_equal(result, expected) -class TestSlicing(tm.TestCase): +class TestSlicing(object): def test_timedelta(self): # this is valid too @@ -589,7 +589,7 @@ def test_timedelta(self): tm.assert_index_equal(result2, result3) -class TestTimeSeries(tm.TestCase): +class TestTimeSeries(object): _multiprocess_can_split_ = True def test_series_box_timedelta(self): diff --git a/pandas/tests/indexes/timedeltas/test_timedelta_range.py b/pandas/tests/indexes/timedeltas/test_timedelta_range.py index 55f16c10e9945..4732a0ce110de 100644 --- a/pandas/tests/indexes/timedeltas/test_timedelta_range.py +++ b/pandas/tests/indexes/timedeltas/test_timedelta_range.py @@ -7,7 +7,7 @@ from pandas.util.testing import assert_frame_equal -class TestTimedeltas(tm.TestCase): +class TestTimedeltas(object): _multiprocess_can_split_ = True def test_timedelta_range(self): diff --git a/pandas/tests/indexes/timedeltas/test_tools.py b/pandas/tests/indexes/timedeltas/test_tools.py index faee627488dc0..a991b7bbe140a 100644 --- a/pandas/tests/indexes/timedeltas/test_tools.py +++ b/pandas/tests/indexes/timedeltas/test_tools.py @@ -11,7 +11,7 @@ from pandas._libs.tslib import iNaT -class TestTimedeltas(tm.TestCase): +class TestTimedeltas(object): _multiprocess_can_split_ = True def test_to_timedelta(self): diff --git a/pandas/tests/indexing/test_callable.py b/pandas/tests/indexing/test_callable.py index 727c87ac90872..95b406517be62 100644 --- a/pandas/tests/indexing/test_callable.py +++ b/pandas/tests/indexing/test_callable.py @@ -6,7 +6,7 @@ import pandas.util.testing as tm -class TestIndexingCallable(tm.TestCase): +class TestIndexingCallable(object): def test_frame_loc_ix_callable(self): # GH 11485 diff --git a/pandas/tests/indexing/test_categorical.py b/pandas/tests/indexing/test_categorical.py index 6d2723ae0ff01..6874fedaa705f 100644 --- a/pandas/tests/indexing/test_categorical.py +++ b/pandas/tests/indexing/test_categorical.py @@ -10,7 +10,7 @@ from pandas.util import testing as tm -class TestCategoricalIndex(tm.TestCase): +class TestCategoricalIndex(object): def setup_method(self, method): diff --git a/pandas/tests/indexing/test_chaining_and_caching.py b/pandas/tests/indexing/test_chaining_and_caching.py index c1f5d2941106d..27a889e58e55e 100644 --- a/pandas/tests/indexing/test_chaining_and_caching.py +++ b/pandas/tests/indexing/test_chaining_and_caching.py @@ -10,7 +10,7 @@ from pandas.util import testing as tm -class TestCaching(tm.TestCase): +class TestCaching(object): def test_slice_consolidate_invalidate_item_cache(self): @@ -90,7 +90,7 @@ def test_setitem_cache_updating(self): tm.assert_series_equal(out['A'], expected['A']) -class TestChaining(tm.TestCase): +class TestChaining(object): def test_setitem_chained_setfault(self): diff --git a/pandas/tests/indexing/test_coercion.py b/pandas/tests/indexing/test_coercion.py index 8e81a3bd1df7a..25cc810299678 100644 --- a/pandas/tests/indexing/test_coercion.py +++ b/pandas/tests/indexing/test_coercion.py @@ -44,7 +44,7 @@ def test_has_comprehensive_tests(self): raise AssertionError(msg.format(type(self), method_name)) -class TestSetitemCoercion(CoercionBase, tm.TestCase): +class TestSetitemCoercion(CoercionBase): method = 'setitem' @@ -330,7 +330,7 @@ def test_setitem_index_period(self): pass -class TestInsertIndexCoercion(CoercionBase, tm.TestCase): +class TestInsertIndexCoercion(CoercionBase): klasses = ['index'] method = 'insert' @@ -514,7 +514,7 @@ def test_insert_index_period(self): self._assert_insert_conversion(obj, 'x', exp, np.object) -class TestWhereCoercion(CoercionBase, tm.TestCase): +class TestWhereCoercion(CoercionBase): method = 'where' @@ -852,7 +852,7 @@ def test_where_index_period(self): pass -class TestFillnaSeriesCoercion(CoercionBase, tm.TestCase): +class TestFillnaSeriesCoercion(CoercionBase): # not indexing, but place here for consisntency @@ -1139,7 +1139,7 @@ def test_fillna_index_period(self): pass -class TestReplaceSeriesCoercion(CoercionBase, tm.TestCase): +class TestReplaceSeriesCoercion(CoercionBase): # not indexing, but place here for consisntency diff --git a/pandas/tests/indexing/test_datetime.py b/pandas/tests/indexing/test_datetime.py index 3089bc1dbddea..da8a896cb6f4a 100644 --- a/pandas/tests/indexing/test_datetime.py +++ b/pandas/tests/indexing/test_datetime.py @@ -6,7 +6,7 @@ from pandas.util import testing as tm -class TestDatetimeIndex(tm.TestCase): +class TestDatetimeIndex(object): def test_indexing_with_datetime_tz(self): diff --git a/pandas/tests/indexing/test_floats.py b/pandas/tests/indexing/test_floats.py index 1701dd9f6ba90..00a2b8166ceed 100644 --- a/pandas/tests/indexing/test_floats.py +++ b/pandas/tests/indexing/test_floats.py @@ -9,7 +9,7 @@ import pandas.util.testing as tm -class TestFloatIndexers(tm.TestCase): +class TestFloatIndexers(object): def check(self, result, original, indexer, getitem): """ diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index 3e625fa483f7b..af4b9e1f0cc25 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -12,7 +12,7 @@ from pandas.tests.indexing.common import Base -class TestiLoc(Base, tm.TestCase): +class TestiLoc(Base): def test_iloc_exceeds_bounds(self): diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index 0759dc2333ad5..9fa677eb624ae 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -26,7 +26,7 @@ # Indexing test cases -class TestFancy(Base, tm.TestCase): +class TestFancy(Base): """ pure get/set item & fancy indexing """ def test_setitem_ndarray_1d(self): @@ -599,7 +599,7 @@ def test_index_type_coercion(self): assert s2.index.is_object() -class TestMisc(Base, tm.TestCase): +class TestMisc(Base): def test_indexer_caching(self): # GH5727 @@ -800,7 +800,7 @@ def test_maybe_numeric_slice(self): assert result == expected -class TestSeriesNoneCoercion(tm.TestCase): +class TestSeriesNoneCoercion(object): EXPECTED_RESULTS = [ # For numeric series, we should coerce to NaN. ([1, 2, 3], [np.nan, 2, 3]), @@ -847,7 +847,7 @@ def test_coercion_with_loc_and_series(self): tm.assert_series_equal(start_series, expected_series) -class TestDataframeNoneCoercion(tm.TestCase): +class TestDataframeNoneCoercion(object): EXPECTED_SINGLE_ROW_RESULTS = [ # For numeric series, we should coerce to NaN. ([1, 2, 3], [np.nan, 2, 3]), diff --git a/pandas/tests/indexing/test_indexing_slow.py b/pandas/tests/indexing/test_indexing_slow.py index 21cdbb17f52ce..08d390a6a213e 100644 --- a/pandas/tests/indexing/test_indexing_slow.py +++ b/pandas/tests/indexing/test_indexing_slow.py @@ -8,7 +8,7 @@ import pandas.util.testing as tm -class TestIndexingSlow(tm.TestCase): +class TestIndexingSlow(object): @tm.slow def test_multiindex_get_loc(self): # GH7724, GH2646 diff --git a/pandas/tests/indexing/test_interval.py b/pandas/tests/indexing/test_interval.py index b8d8739af1d15..2552fc066cc87 100644 --- a/pandas/tests/indexing/test_interval.py +++ b/pandas/tests/indexing/test_interval.py @@ -6,7 +6,7 @@ import pandas.util.testing as tm -class TestIntervalIndex(tm.TestCase): +class TestIntervalIndex(object): def setup_method(self, method): self.s = Series(np.arange(5), IntervalIndex.from_breaks(np.arange(6))) diff --git a/pandas/tests/indexing/test_ix.py b/pandas/tests/indexing/test_ix.py index 8290bc80edac1..dc9a591ee3101 100644 --- a/pandas/tests/indexing/test_ix.py +++ b/pandas/tests/indexing/test_ix.py @@ -14,7 +14,7 @@ from pandas.errors import PerformanceWarning -class TestIX(tm.TestCase): +class TestIX(object): def test_ix_deprecation(self): # GH 15114 diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 410d01431ef5a..fe2318be72eda 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -14,7 +14,7 @@ from pandas.tests.indexing.common import Base -class TestLoc(Base, tm.TestCase): +class TestLoc(Base): def test_loc_getitem_dups(self): # GH 5678 diff --git a/pandas/tests/indexing/test_multiindex.py b/pandas/tests/indexing/test_multiindex.py index b8c34f9f28d83..483c39ed8694e 100644 --- a/pandas/tests/indexing/test_multiindex.py +++ b/pandas/tests/indexing/test_multiindex.py @@ -9,7 +9,7 @@ from pandas.tests.indexing.common import _mklbl -class TestMultiIndexBasic(tm.TestCase): +class TestMultiIndexBasic(object): def test_iloc_getitem_multiindex2(self): # TODO(wesm): fix this @@ -698,7 +698,7 @@ def test_multiindex_slice_first_level(self): tm.assert_frame_equal(result, expected) -class TestMultiIndexSlicers(tm.TestCase): +class TestMultiIndexSlicers(object): def test_per_axis_per_level_getitem(self): @@ -1188,7 +1188,7 @@ def f(): tm.assert_frame_equal(df, expected) -class TestMultiIndexPanel(tm.TestCase): +class TestMultiIndexPanel(object): def test_iloc_getitem_panel_multiindex(self): diff --git a/pandas/tests/indexing/test_panel.py b/pandas/tests/indexing/test_panel.py index b704e15b81502..2d4ffd6a4e783 100644 --- a/pandas/tests/indexing/test_panel.py +++ b/pandas/tests/indexing/test_panel.py @@ -6,7 +6,7 @@ from pandas import Panel, date_range, DataFrame -class TestPanel(tm.TestCase): +class TestPanel(object): def test_iloc_getitem_panel(self): diff --git a/pandas/tests/indexing/test_partial.py b/pandas/tests/indexing/test_partial.py index 20cec2a3aa7db..93a85e247a787 100644 --- a/pandas/tests/indexing/test_partial.py +++ b/pandas/tests/indexing/test_partial.py @@ -14,7 +14,7 @@ from pandas.util import testing as tm -class TestPartialSetting(tm.TestCase): +class TestPartialSetting(object): def test_partial_setting(self): diff --git a/pandas/tests/indexing/test_scalar.py b/pandas/tests/indexing/test_scalar.py index fb40c539e16ba..5dd1714b903eb 100644 --- a/pandas/tests/indexing/test_scalar.py +++ b/pandas/tests/indexing/test_scalar.py @@ -10,7 +10,7 @@ from pandas.tests.indexing.common import Base -class TestScalar(Base, tm.TestCase): +class TestScalar(Base): def test_at_and_iat_get(self): def _check(f, func, values=False): diff --git a/pandas/tests/indexing/test_timedelta.py b/pandas/tests/indexing/test_timedelta.py index 5f0088382ce57..cf8cc6c2d345d 100644 --- a/pandas/tests/indexing/test_timedelta.py +++ b/pandas/tests/indexing/test_timedelta.py @@ -2,7 +2,7 @@ from pandas.util import testing as tm -class TestTimedeltaIndexing(tm.TestCase): +class TestTimedeltaIndexing(object): def test_boolean_indexing(self): # GH 14946 diff --git a/pandas/tests/io/formats/test_eng_formatting.py b/pandas/tests/io/formats/test_eng_formatting.py index e064d1200d672..9d5773283176c 100644 --- a/pandas/tests/io/formats/test_eng_formatting.py +++ b/pandas/tests/io/formats/test_eng_formatting.py @@ -6,7 +6,7 @@ from pandas.util import testing as tm -class TestEngFormatter(tm.TestCase): +class TestEngFormatter(object): def test_eng_float_formatter(self): df = DataFrame({'A': [1.41, 141., 14100, 1410000.]}) diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index 3cea731cfd440..e99c70952e5b3 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -105,7 +105,7 @@ def has_expanded_repr(df): return False -class TestDataFrameFormatting(tm.TestCase): +class TestDataFrameFormatting(object): def setup_method(self, method): self.warn_filters = warnings.filters @@ -1604,7 +1604,7 @@ def gen_series_formatting(): return test_sers -class TestSeriesFormatting(tm.TestCase): +class TestSeriesFormatting(object): def setup_method(self, method): self.ts = tm.makeTimeSeries() @@ -2152,7 +2152,7 @@ def _three_digit_exp(): return '%.4g' % 1.7e8 == '1.7e+008' -class TestFloatArrayFormatter(tm.TestCase): +class TestFloatArrayFormatter(object): def test_misc(self): obj = fmt.FloatArrayFormatter(np.array([], dtype=np.float64)) @@ -2238,7 +2238,7 @@ def test_too_long(self): assert str(df) == ' x\n0 1.2346e+04\n1 2.0000e+06' -class TestRepr_timedelta64(tm.TestCase): +class TestRepr_timedelta64(object): def test_none(self): delta_1d = pd.to_timedelta(1, unit='D') @@ -2311,7 +2311,7 @@ def test_all(self): assert drepr(delta_1ns) == "0 days 00:00:00.000000001" -class TestTimedelta64Formatter(tm.TestCase): +class TestTimedelta64Formatter(object): def test_days(self): x = pd.to_timedelta(list(range(5)) + [pd.NaT], unit='D') @@ -2357,7 +2357,7 @@ def test_zero(self): assert result[0].strip() == "'0 days'" -class TestDatetime64Formatter(tm.TestCase): +class TestDatetime64Formatter(object): def test_mixed(self): x = Series([datetime(2013, 1, 1), datetime(2013, 1, 1, 12), pd.NaT]) @@ -2438,7 +2438,7 @@ def format_func(x): assert result == ['10:10', '12:12'] -class TestNaTFormatting(tm.TestCase): +class TestNaTFormatting(object): def test_repr(self): assert repr(pd.NaT) == "NaT" @@ -2447,7 +2447,7 @@ def test_str(self): assert str(pd.NaT) == "NaT" -class TestDatetimeIndexFormat(tm.TestCase): +class TestDatetimeIndexFormat(object): def test_datetime(self): formatted = pd.to_datetime([datetime(2003, 1, 1, 12), pd.NaT]).format() @@ -2474,7 +2474,7 @@ def test_date_explict_date_format(self): assert formatted[1] == "UT" -class TestDatetimeIndexUnicode(tm.TestCase): +class TestDatetimeIndexUnicode(object): def test_dates(self): text = str(pd.to_datetime([datetime(2013, 1, 1), datetime(2014, 1, 1) @@ -2489,7 +2489,7 @@ def test_mixed(self): assert "'2014-01-01 00:00:00']" in text -class TestStringRepTimestamp(tm.TestCase): +class TestStringRepTimestamp(object): def test_no_tz(self): dt_date = datetime(2013, 1, 2) diff --git a/pandas/tests/io/formats/test_printing.py b/pandas/tests/io/formats/test_printing.py index 05b697ffbb756..aae3ba31648ff 100644 --- a/pandas/tests/io/formats/test_printing.py +++ b/pandas/tests/io/formats/test_printing.py @@ -7,7 +7,6 @@ from pandas import compat import pandas.io.formats.printing as printing import pandas.io.formats.format as fmt -import pandas.util.testing as tm import pandas.core.config as cf @@ -35,7 +34,7 @@ def test_repr_binary_type(): assert res == b -class TestFormattBase(tm.TestCase): +class TestFormattBase(object): def test_adjoin(self): data = [['a', 'b', 'c'], ['dd', 'ee', 'ff'], ['ggg', 'hhh', 'iii']] @@ -123,7 +122,7 @@ def test_ambiguous_width(self): assert adjoined == expected -class TestTableSchemaRepr(tm.TestCase): +class TestTableSchemaRepr(object): @classmethod def setup_class(cls): diff --git a/pandas/tests/io/formats/test_style.py b/pandas/tests/io/formats/test_style.py index 687e78e64a3e7..ee7356f12f498 100644 --- a/pandas/tests/io/formats/test_style.py +++ b/pandas/tests/io/formats/test_style.py @@ -11,7 +11,7 @@ from pandas.io.formats.style import Styler, _get_level_lengths # noqa -class TestStyler(tm.TestCase): +class TestStyler(object): def setup_method(self, method): np.random.seed(24) @@ -812,7 +812,7 @@ def test_mi_sparse_column_names(self): assert head == expected -class TestStylerMatplotlibDep(tm.TestCase): +class TestStylerMatplotlibDep(object): def test_background_gradient(self): tm._skip_if_no_mpl() diff --git a/pandas/tests/io/formats/test_to_csv.py b/pandas/tests/io/formats/test_to_csv.py index 552fb77bb54cc..1073fbcef5aec 100644 --- a/pandas/tests/io/formats/test_to_csv.py +++ b/pandas/tests/io/formats/test_to_csv.py @@ -4,7 +4,7 @@ from pandas.util import testing as tm -class TestToCSV(tm.TestCase): +class TestToCSV(object): def test_to_csv_quotechar(self): df = DataFrame({'col': [1, 2]}) diff --git a/pandas/tests/io/formats/test_to_html.py b/pandas/tests/io/formats/test_to_html.py index 4a4546dd807f1..cde920b1511d2 100644 --- a/pandas/tests/io/formats/test_to_html.py +++ b/pandas/tests/io/formats/test_to_html.py @@ -22,7 +22,7 @@ pass -class TestToHTML(tm.TestCase): +class TestToHTML(object): def test_to_html_with_col_space(self): def check_with_width(df, col_space): diff --git a/pandas/tests/io/json/test_json_table_schema.py b/pandas/tests/io/json/test_json_table_schema.py index 1e667245809ec..e447a74b2b462 100644 --- a/pandas/tests/io/json/test_json_table_schema.py +++ b/pandas/tests/io/json/test_json_table_schema.py @@ -9,7 +9,6 @@ from pandas import DataFrame from pandas.core.dtypes.dtypes import ( PeriodDtype, CategoricalDtype, DatetimeTZDtype) -import pandas.util.testing as tm from pandas.io.json.table_schema import ( as_json_table_type, build_table_schema, @@ -17,7 +16,7 @@ set_default_names) -class TestBuildSchema(tm.TestCase): +class TestBuildSchema(object): def setup_method(self, method): self.df = DataFrame( @@ -85,7 +84,7 @@ def test_multiindex(self): assert result == expected -class TestTableSchemaType(tm.TestCase): +class TestTableSchemaType(object): def test_as_json_table_type_int_data(self): int_data = [1, 2, 3] @@ -169,7 +168,7 @@ def test_as_json_table_type_categorical_dtypes(self): assert as_json_table_type(CategoricalDtype()) == 'any' -class TestTableOrient(tm.TestCase): +class TestTableOrient(object): def setup_method(self, method): self.df = DataFrame( diff --git a/pandas/tests/io/json/test_normalize.py b/pandas/tests/io/json/test_normalize.py index d24250f534521..49b765b18d623 100644 --- a/pandas/tests/io/json/test_normalize.py +++ b/pandas/tests/io/json/test_normalize.py @@ -212,7 +212,7 @@ def test_non_ascii_key(self): tm.assert_frame_equal(result, expected) -class TestNestedToRecord(tm.TestCase): +class TestNestedToRecord(object): def test_flat_stays_flat(self): recs = [dict(flat1=1, flat2=2), diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 0cf9000fcffb2..671d4248818e4 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -35,7 +35,7 @@ _mixed_frame = _frame.copy() -class TestPandasContainer(tm.TestCase): +class TestPandasContainer(object): def setup_method(self, method): self.dirpath = tm.get_data_path() diff --git a/pandas/tests/io/json/test_ujson.py b/pandas/tests/io/json/test_ujson.py index a23ae225c19b0..10f99c4fcd0a8 100644 --- a/pandas/tests/io/json/test_ujson.py +++ b/pandas/tests/io/json/test_ujson.py @@ -25,7 +25,7 @@ else partial(json.dumps, encoding="utf-8")) -class UltraJSONTests(tm.TestCase): +class UltraJSONTests(object): @pytest.mark.skipif(compat.is_platform_32bit(), reason="not compliant on 32-bit, xref #15865") @@ -946,7 +946,7 @@ def my_obj_handler(obj): ujson.decode(ujson.encode(l, default_handler=str))) -class NumpyJSONTests(tm.TestCase): +class NumpyJSONTests(object): def testBool(self): b = np.bool(True) @@ -1222,7 +1222,7 @@ def testArrayNumpyLabelled(self): assert (np.array(['a', 'b']) == output[2]).all() -class PandasJSONTests(tm.TestCase): +class PandasJSONTests(object): def testDataFrame(self): df = DataFrame([[1, 2, 3], [4, 5, 6]], index=[ diff --git a/pandas/tests/io/msgpack/test_limits.py b/pandas/tests/io/msgpack/test_limits.py index e906d14a2b5a8..07044dbb7e5de 100644 --- a/pandas/tests/io/msgpack/test_limits.py +++ b/pandas/tests/io/msgpack/test_limits.py @@ -4,12 +4,10 @@ import pytest -import pandas.util.testing as tm - from pandas.io.msgpack import packb, unpackb, Packer, Unpacker, ExtType -class TestLimits(tm.TestCase): +class TestLimits(object): def test_integer(self): x = -(2 ** 63) diff --git a/pandas/tests/io/msgpack/test_unpack.py b/pandas/tests/io/msgpack/test_unpack.py index 158094d111b54..c056f8d800e11 100644 --- a/pandas/tests/io/msgpack/test_unpack.py +++ b/pandas/tests/io/msgpack/test_unpack.py @@ -1,11 +1,10 @@ from io import BytesIO import sys from pandas.io.msgpack import Unpacker, packb, OutOfData, ExtType -import pandas.util.testing as tm import pytest -class TestUnpack(tm.TestCase): +class TestUnpack(object): def test_unpack_array_header_from_file(self): f = BytesIO(packb([1, 2, 3, 4])) diff --git a/pandas/tests/io/parser/test_network.py b/pandas/tests/io/parser/test_network.py index 26b5c4788d53a..e12945a6a3102 100644 --- a/pandas/tests/io/parser/test_network.py +++ b/pandas/tests/io/parser/test_network.py @@ -47,7 +47,7 @@ def check_compressed_urls(salaries_table, compression, extension, mode, tm.assert_frame_equal(url_table, salaries_table) -class TestS3(tm.TestCase): +class TestS3(object): def setup_method(self, method): try: diff --git a/pandas/tests/io/parser/test_parsers.py b/pandas/tests/io/parser/test_parsers.py index cced8299691df..8d59e3acb3230 100644 --- a/pandas/tests/io/parser/test_parsers.py +++ b/pandas/tests/io/parser/test_parsers.py @@ -50,7 +50,7 @@ def setup_method(self, method): self.csv_shiftjs = os.path.join(self.dirpath, 'sauron.SHIFT_JIS.csv') -class TestCParserHighMemory(BaseParser, CParserTests, tm.TestCase): +class TestCParserHighMemory(BaseParser, CParserTests): engine = 'c' low_memory = False float_precision_choices = [None, 'high', 'round_trip'] @@ -68,7 +68,7 @@ def read_table(self, *args, **kwds): return read_table(*args, **kwds) -class TestCParserLowMemory(BaseParser, CParserTests, tm.TestCase): +class TestCParserLowMemory(BaseParser, CParserTests): engine = 'c' low_memory = True float_precision_choices = [None, 'high', 'round_trip'] @@ -86,7 +86,7 @@ def read_table(self, *args, **kwds): return read_table(*args, **kwds) -class TestPythonParser(BaseParser, PythonParserTests, tm.TestCase): +class TestPythonParser(BaseParser, PythonParserTests): engine = 'python' float_precision_choices = [None] diff --git a/pandas/tests/io/parser/test_read_fwf.py b/pandas/tests/io/parser/test_read_fwf.py index 90231e01d0173..0bfeb5215f370 100644 --- a/pandas/tests/io/parser/test_read_fwf.py +++ b/pandas/tests/io/parser/test_read_fwf.py @@ -19,7 +19,7 @@ from pandas.io.parsers import read_csv, read_fwf, EmptyDataError -class TestFwfParsing(tm.TestCase): +class TestFwfParsing(object): def test_fwf(self): data_expected = """\ diff --git a/pandas/tests/io/parser/test_textreader.py b/pandas/tests/io/parser/test_textreader.py index f09d8c8e778d5..7cd02a07bbd4c 100644 --- a/pandas/tests/io/parser/test_textreader.py +++ b/pandas/tests/io/parser/test_textreader.py @@ -26,7 +26,7 @@ import pandas.io.libparsers as parser -class TestTextReader(tm.TestCase): +class TestTextReader(object): def setup_method(self, method): self.dirpath = tm.get_data_path() diff --git a/pandas/tests/io/parser/test_unsupported.py b/pandas/tests/io/parser/test_unsupported.py index 6c2d883aeb16b..3f62ff44531fb 100644 --- a/pandas/tests/io/parser/test_unsupported.py +++ b/pandas/tests/io/parser/test_unsupported.py @@ -17,7 +17,7 @@ from pandas.io.parsers import read_csv, read_table -class TestUnsupportedFeatures(tm.TestCase): +class TestUnsupportedFeatures(object): def test_mangle_dupe_cols_false(self): # see gh-12935 @@ -102,7 +102,7 @@ def test_python_engine(self): read_csv(StringIO(data), engine=engine, **kwargs) -class TestDeprecatedFeatures(tm.TestCase): +class TestDeprecatedFeatures(object): def test_deprecated_args(self): data = '1,2,3' diff --git a/pandas/tests/io/sas/test_sas.py b/pandas/tests/io/sas/test_sas.py index 461c0fe1fd848..617df99b99f0b 100644 --- a/pandas/tests/io/sas/test_sas.py +++ b/pandas/tests/io/sas/test_sas.py @@ -1,11 +1,10 @@ import pytest -import pandas.util.testing as tm from pandas.compat import StringIO from pandas import read_sas -class TestSas(tm.TestCase): +class TestSas(object): def test_sas_buffer_format(self): diff --git a/pandas/tests/io/sas/test_sas7bdat.py b/pandas/tests/io/sas/test_sas7bdat.py index cb28ab6c6c345..a5157744038f4 100644 --- a/pandas/tests/io/sas/test_sas7bdat.py +++ b/pandas/tests/io/sas/test_sas7bdat.py @@ -6,7 +6,7 @@ import numpy as np -class TestSAS7BDAT(tm.TestCase): +class TestSAS7BDAT(object): def setup_method(self, method): self.dirpath = tm.get_data_path() diff --git a/pandas/tests/io/sas/test_xport.py b/pandas/tests/io/sas/test_xport.py index 17b286a4915ce..de31c3e36a8d5 100644 --- a/pandas/tests/io/sas/test_xport.py +++ b/pandas/tests/io/sas/test_xport.py @@ -16,7 +16,7 @@ def numeric_as_float(data): data[v] = data[v].astype(np.float64) -class TestXport(tm.TestCase): +class TestXport(object): def setup_method(self, method): self.dirpath = tm.get_data_path() diff --git a/pandas/tests/io/test_clipboard.py b/pandas/tests/io/test_clipboard.py index e9ffb2dca7ae5..406045a69beca 100644 --- a/pandas/tests/io/test_clipboard.py +++ b/pandas/tests/io/test_clipboard.py @@ -23,11 +23,10 @@ @pytest.mark.single @pytest.mark.skipif(not _DEPS_INSTALLED, reason="clipboard primitives not installed") -class TestClipboard(tm.TestCase): +class TestClipboard(object): @classmethod def setup_class(cls): - super(TestClipboard, cls).setup_class() cls.data = {} cls.data['string'] = mkdf(5, 3, c_idx_type='s', r_idx_type='i', c_idx_names=[None], r_idx_names=[None]) @@ -63,7 +62,6 @@ def setup_class(cls): @classmethod def teardown_class(cls): - super(TestClipboard, cls).teardown_class() del cls.data_types, cls.data def check_round_trip_frame(self, data_type, excel=None, sep=None, diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py index 1837e5381a07e..a1a95e09915f1 100644 --- a/pandas/tests/io/test_common.py +++ b/pandas/tests/io/test_common.py @@ -24,7 +24,7 @@ pass -class TestCommonIOCapabilities(tm.TestCase): +class TestCommonIOCapabilities(object): data1 = """index,A,B,C,D foo,2,3,4,5 bar,7,8,9,10 @@ -90,7 +90,7 @@ def test_iterator(self): tm.assert_frame_equal(concat(it), expected.iloc[1:]) -class TestMMapWrapper(tm.TestCase): +class TestMMapWrapper(object): def setup_method(self, method): self.mmap_file = os.path.join(tm.get_data_path(), diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py index 919c521f22f60..c70b5937fea3f 100644 --- a/pandas/tests/io/test_excel.py +++ b/pandas/tests/io/test_excel.py @@ -989,19 +989,19 @@ def test_read_excel_squeeze(self): tm.assert_series_equal(actual, expected) -class XlsReaderTests(XlrdTests, tm.TestCase): +class XlsReaderTests(XlrdTests): ext = '.xls' engine_name = 'xlrd' check_skip = staticmethod(_skip_if_no_xlrd) -class XlsxReaderTests(XlrdTests, tm.TestCase): +class XlsxReaderTests(XlrdTests): ext = '.xlsx' engine_name = 'xlrd' check_skip = staticmethod(_skip_if_no_xlrd) -class XlsmReaderTests(XlrdTests, tm.TestCase): +class XlsmReaderTests(XlrdTests): ext = '.xlsm' engine_name = 'xlrd' check_skip = staticmethod(_skip_if_no_xlrd) @@ -1887,7 +1887,7 @@ def versioned_raise_on_incompat_version(cls): @raise_on_incompat_version(1) -class OpenpyxlTests(ExcelWriterBase, tm.TestCase): +class OpenpyxlTests(ExcelWriterBase): ext = '.xlsx' engine_name = 'openpyxl1' check_skip = staticmethod(lambda *args, **kwargs: None) @@ -1923,7 +1923,7 @@ def test_to_excel_styleconverter(self): def skip_openpyxl_gt21(cls): - """Skip a TestCase instance if openpyxl >= 2.2""" + """Skip test case if openpyxl >= 2.2""" @classmethod def setup_class(cls): @@ -1940,7 +1940,7 @@ def setup_class(cls): @raise_on_incompat_version(2) @skip_openpyxl_gt21 -class Openpyxl20Tests(ExcelWriterBase, tm.TestCase): +class Openpyxl20Tests(ExcelWriterBase): ext = '.xlsx' engine_name = 'openpyxl20' check_skip = staticmethod(lambda *args, **kwargs: None) @@ -2040,7 +2040,7 @@ def test_write_cells_merge_styled(self): def skip_openpyxl_lt22(cls): - """Skip a TestCase instance if openpyxl < 2.2""" + """Skip test case if openpyxl < 2.2""" @classmethod def setup_class(cls): @@ -2056,7 +2056,7 @@ def setup_class(cls): @raise_on_incompat_version(2) @skip_openpyxl_lt22 -class Openpyxl22Tests(ExcelWriterBase, tm.TestCase): +class Openpyxl22Tests(ExcelWriterBase): ext = '.xlsx' engine_name = 'openpyxl22' check_skip = staticmethod(lambda *args, **kwargs: None) @@ -2151,7 +2151,7 @@ def test_write_cells_merge_styled(self): assert xcell_a2.font == openpyxl_sty_merged -class XlwtTests(ExcelWriterBase, tm.TestCase): +class XlwtTests(ExcelWriterBase): ext = '.xls' engine_name = 'xlwt' check_skip = staticmethod(_skip_if_no_xlwt) @@ -2208,7 +2208,7 @@ def test_to_excel_styleconverter(self): assert xlwt.Alignment.VERT_TOP == xls_style.alignment.vert -class XlsxWriterTests(ExcelWriterBase, tm.TestCase): +class XlsxWriterTests(ExcelWriterBase): ext = '.xlsx' engine_name = 'xlsxwriter' check_skip = staticmethod(_skip_if_no_xlsxwriter) @@ -2261,7 +2261,7 @@ def test_column_format(self): assert read_num_format == num_format -class OpenpyxlTests_NoMerge(ExcelWriterBase, tm.TestCase): +class OpenpyxlTests_NoMerge(ExcelWriterBase): ext = '.xlsx' engine_name = 'openpyxl' check_skip = staticmethod(_skip_if_no_openpyxl) @@ -2270,7 +2270,7 @@ class OpenpyxlTests_NoMerge(ExcelWriterBase, tm.TestCase): merge_cells = False -class XlwtTests_NoMerge(ExcelWriterBase, tm.TestCase): +class XlwtTests_NoMerge(ExcelWriterBase): ext = '.xls' engine_name = 'xlwt' check_skip = staticmethod(_skip_if_no_xlwt) @@ -2279,7 +2279,7 @@ class XlwtTests_NoMerge(ExcelWriterBase, tm.TestCase): merge_cells = False -class XlsxWriterTests_NoMerge(ExcelWriterBase, tm.TestCase): +class XlsxWriterTests_NoMerge(ExcelWriterBase): ext = '.xlsx' engine_name = 'xlsxwriter' check_skip = staticmethod(_skip_if_no_xlsxwriter) @@ -2288,7 +2288,7 @@ class XlsxWriterTests_NoMerge(ExcelWriterBase, tm.TestCase): merge_cells = False -class ExcelWriterEngineTests(tm.TestCase): +class ExcelWriterEngineTests(object): def test_ExcelWriter_dispatch(self): with tm.assert_raises_regex(ValueError, 'No engine'): diff --git a/pandas/tests/io/test_gbq.py b/pandas/tests/io/test_gbq.py index 47fc495201754..58a84ad4d47f8 100644 --- a/pandas/tests/io/test_gbq.py +++ b/pandas/tests/io/test_gbq.py @@ -10,7 +10,6 @@ from pandas import compat, DataFrame from pandas.compat import range -import pandas.util.testing as tm pandas_gbq = pytest.importorskip('pandas_gbq') @@ -94,7 +93,7 @@ def make_mixed_dataframe_v2(test_size): @pytest.mark.single -class TestToGBQIntegrationWithServiceAccountKeyPath(tm.TestCase): +class TestToGBQIntegrationWithServiceAccountKeyPath(object): @classmethod def setup_class(cls): diff --git a/pandas/tests/io/test_html.py b/pandas/tests/io/test_html.py index 6b1215e443b47..fa83c43ba8dd4 100644 --- a/pandas/tests/io/test_html.py +++ b/pandas/tests/io/test_html.py @@ -93,14 +93,13 @@ def read_html(self, *args, **kwargs): return read_html(*args, **kwargs) -class TestReadHtml(tm.TestCase, ReadHtmlMixin): +class TestReadHtml(ReadHtmlMixin): flavor = 'bs4' spam_data = os.path.join(DATA_PATH, 'spam.html') banklist_data = os.path.join(DATA_PATH, 'banklist.html') @classmethod def setup_class(cls): - super(TestReadHtml, cls).setup_class() _skip_if_none_of(('bs4', 'html5lib')) def test_to_html_compat(self): @@ -778,13 +777,12 @@ def _lang_enc(filename): return os.path.splitext(os.path.basename(filename))[0].split('_') -class TestReadHtmlEncoding(tm.TestCase): +class TestReadHtmlEncoding(object): files = glob.glob(os.path.join(DATA_PATH, 'html_encoding', '*.html')) flavor = 'bs4' @classmethod def setup_class(cls): - super(TestReadHtmlEncoding, cls).setup_class() _skip_if_none_of((cls.flavor, 'html5lib')) def read_html(self, *args, **kwargs): @@ -830,12 +828,11 @@ def setup_class(cls): _skip_if_no(cls.flavor) -class TestReadHtmlLxml(tm.TestCase, ReadHtmlMixin): +class TestReadHtmlLxml(ReadHtmlMixin): flavor = 'lxml' @classmethod def setup_class(cls): - super(TestReadHtmlLxml, cls).setup_class() _skip_if_no('lxml') def test_data_fail(self): diff --git a/pandas/tests/io/test_packers.py b/pandas/tests/io/test_packers.py index 96abf3415fff8..4b1145129c364 100644 --- a/pandas/tests/io/test_packers.py +++ b/pandas/tests/io/test_packers.py @@ -90,7 +90,7 @@ def check_arbitrary(a, b): assert(a == b) -class TestPackers(tm.TestCase): +class TestPackers(object): def setup_method(self, method): self.path = '__%s__.msg' % tm.rands(10) diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index 9e7196593650a..ee44fea55e51a 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -121,18 +121,16 @@ def _maybe_remove(store, key): pass -class Base(tm.TestCase): +class Base(object): @classmethod def setup_class(cls): - super(Base, cls).setup_class() # Pytables 3.0.0 deprecates lots of things tm.reset_testing_mode() @classmethod def teardown_class(cls): - super(Base, cls).teardown_class() # Pytables 3.0.0 deprecates lots of things tm.set_testing_mode() @@ -145,7 +143,7 @@ def teardown_method(self, method): @pytest.mark.single -class TestHDFStore(Base, tm.TestCase): +class TestHDFStore(Base): def test_factory_fun(self): path = create_tempfile(self.path) @@ -5228,7 +5226,7 @@ def test_complex_append(self): assert_frame_equal(pd.concat([df, df], 0), result) -class TestTimezones(Base, tm.TestCase): +class TestTimezones(Base): def _compare_with_tz(self, a, b): tm.assert_frame_equal(a, b) diff --git a/pandas/tests/io/test_s3.py b/pandas/tests/io/test_s3.py index 36a0304bddfaf..8c2a32af33765 100644 --- a/pandas/tests/io/test_s3.py +++ b/pandas/tests/io/test_s3.py @@ -1,9 +1,7 @@ -from pandas.util import testing as tm - from pandas.io.common import _is_s3_url -class TestS3URL(tm.TestCase): +class TestS3URL(object): def test_is_s3_url(self): assert _is_s3_url("s3://pandas/somethingelse.com") diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 21de0cd371a37..7b3717281bf89 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -818,7 +818,7 @@ def test_unicode_column_name(self): @pytest.mark.single -class TestSQLApi(SQLAlchemyMixIn, _TestSQLApi, tm.TestCase): +class TestSQLApi(SQLAlchemyMixIn, _TestSQLApi): """ Test the public API as it would be used directly @@ -998,12 +998,12 @@ def teardown_method(self, method): @pytest.mark.single -class TestSQLApiConn(_EngineToConnMixin, TestSQLApi, tm.TestCase): +class TestSQLApiConn(_EngineToConnMixin, TestSQLApi): pass @pytest.mark.single -class TestSQLiteFallbackApi(SQLiteMixIn, _TestSQLApi, tm.TestCase): +class TestSQLiteFallbackApi(SQLiteMixIn, _TestSQLApi): """ Test the public sqlite connection fallback API @@ -1821,37 +1821,32 @@ def test_schema_support(self): @pytest.mark.single -class TestMySQLAlchemy(_TestMySQLAlchemy, _TestSQLAlchemy, tm.TestCase): +class TestMySQLAlchemy(_TestMySQLAlchemy, _TestSQLAlchemy): pass @pytest.mark.single -class TestMySQLAlchemyConn(_TestMySQLAlchemy, _TestSQLAlchemyConn, - tm.TestCase): +class TestMySQLAlchemyConn(_TestMySQLAlchemy, _TestSQLAlchemyConn): pass @pytest.mark.single -class TestPostgreSQLAlchemy(_TestPostgreSQLAlchemy, _TestSQLAlchemy, - tm.TestCase): +class TestPostgreSQLAlchemy(_TestPostgreSQLAlchemy, _TestSQLAlchemy): pass @pytest.mark.single -class TestPostgreSQLAlchemyConn(_TestPostgreSQLAlchemy, _TestSQLAlchemyConn, - tm.TestCase): +class TestPostgreSQLAlchemyConn(_TestPostgreSQLAlchemy, _TestSQLAlchemyConn): pass @pytest.mark.single -class TestSQLiteAlchemy(_TestSQLiteAlchemy, _TestSQLAlchemy, - tm.TestCase): +class TestSQLiteAlchemy(_TestSQLiteAlchemy, _TestSQLAlchemy): pass @pytest.mark.single -class TestSQLiteAlchemyConn(_TestSQLiteAlchemy, _TestSQLAlchemyConn, - tm.TestCase): +class TestSQLiteAlchemyConn(_TestSQLiteAlchemy, _TestSQLAlchemyConn): pass @@ -1859,7 +1854,7 @@ class TestSQLiteAlchemyConn(_TestSQLiteAlchemy, _TestSQLAlchemyConn, # -- Test Sqlite / MySQL fallback @pytest.mark.single -class TestSQLiteFallback(SQLiteMixIn, PandasSQLTest, tm.TestCase): +class TestSQLiteFallback(SQLiteMixIn, PandasSQLTest): """ Test the fallback mode against an in-memory sqlite database. @@ -2083,7 +2078,7 @@ def _skip_if_no_pymysql(): @pytest.mark.single -class TestXSQLite(SQLiteMixIn, tm.TestCase): +class TestXSQLite(SQLiteMixIn): def setup_method(self, method): self.method = method @@ -2287,7 +2282,7 @@ def clean_up(test_table_to_drop): @pytest.mark.single -class TestSQLFlavorDeprecation(tm.TestCase): +class TestSQLFlavorDeprecation(object): """ gh-13611: test that the 'flavor' parameter is appropriately deprecated by checking the @@ -2314,7 +2309,7 @@ def test_deprecated_flavor(self): @pytest.mark.single @pytest.mark.skip(reason="gh-13611: there is no support for MySQL " "if SQLAlchemy is not installed") -class TestXMySQL(MySQLMixIn, tm.TestCase): +class TestXMySQL(MySQLMixIn): @classmethod def setup_class(cls): diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py index 7867e6866876a..4c92c19c51e7a 100644 --- a/pandas/tests/io/test_stata.py +++ b/pandas/tests/io/test_stata.py @@ -23,7 +23,7 @@ from pandas.core.dtypes.common import is_categorical_dtype -class TestStata(tm.TestCase): +class TestStata(object): def setup_method(self, method): self.dirpath = tm.get_data_path() diff --git a/pandas/tests/plotting/common.py b/pandas/tests/plotting/common.py index 9a24e4ae2dad0..ac490a00bf684 100644 --- a/pandas/tests/plotting/common.py +++ b/pandas/tests/plotting/common.py @@ -42,7 +42,7 @@ def _ok_for_gaussian_kde(kind): return True -class TestPlotBase(tm.TestCase): +class TestPlotBase(object): def setup_method(self, method): diff --git a/pandas/tests/plotting/test_converter.py b/pandas/tests/plotting/test_converter.py index 21d8d1f0ab555..e1f64bed5598d 100644 --- a/pandas/tests/plotting/test_converter.py +++ b/pandas/tests/plotting/test_converter.py @@ -15,7 +15,7 @@ def test_timtetonum_accepts_unicode(): assert (converter.time2num("00:01") == converter.time2num(u("00:01"))) -class TestDateTimeConverter(tm.TestCase): +class TestDateTimeConverter(object): def setup_method(self, method): self.dtc = converter.DatetimeConverter() @@ -146,7 +146,7 @@ def test_convert_nested(self): assert result == expected -class TestPeriodConverter(tm.TestCase): +class TestPeriodConverter(object): def setup_method(self, method): self.pc = converter.PeriodConverter() diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py index 1842af465ca89..4dfa2904313ce 100644 --- a/pandas/tests/reshape/test_concat.py +++ b/pandas/tests/reshape/test_concat.py @@ -17,7 +17,7 @@ import pytest -class ConcatenateBase(tm.TestCase): +class ConcatenateBase(object): def setup_method(self, method): self.frame = DataFrame(tm.getSeriesData()) diff --git a/pandas/tests/reshape/test_hashing.py b/pandas/tests/reshape/test_hashing.py index 622768353dd50..5f2c67ee300b5 100644 --- a/pandas/tests/reshape/test_hashing.py +++ b/pandas/tests/reshape/test_hashing.py @@ -9,7 +9,7 @@ import pandas.util.testing as tm -class TestHashing(tm.TestCase): +class TestHashing(object): def setup_method(self, method): self.df = DataFrame( diff --git a/pandas/tests/reshape/test_join.py b/pandas/tests/reshape/test_join.py index 3a6985fd4a373..e25661fb65271 100644 --- a/pandas/tests/reshape/test_join.py +++ b/pandas/tests/reshape/test_join.py @@ -19,7 +19,7 @@ a_ = np.array -class TestJoin(tm.TestCase): +class TestJoin(object): def setup_method(self, method): # aggregate multiple columns diff --git a/pandas/tests/reshape/test_merge.py b/pandas/tests/reshape/test_merge.py index e36b7ecbc3c7b..d3257243d7a2c 100644 --- a/pandas/tests/reshape/test_merge.py +++ b/pandas/tests/reshape/test_merge.py @@ -33,7 +33,7 @@ def get_test_data(ngroups=NGROUPS, n=N): return arr -class TestMerge(tm.TestCase): +class TestMerge(object): def setup_method(self, method): # aggregate multiple columns @@ -737,7 +737,7 @@ def _check_merge(x, y): assert_frame_equal(result, expected, check_names=False) -class TestMergeMulti(tm.TestCase): +class TestMergeMulti(object): def setup_method(self, method): self.index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], diff --git a/pandas/tests/reshape/test_merge_asof.py b/pandas/tests/reshape/test_merge_asof.py index 7e33449c92665..78bfa2ff8597c 100644 --- a/pandas/tests/reshape/test_merge_asof.py +++ b/pandas/tests/reshape/test_merge_asof.py @@ -11,7 +11,7 @@ from pandas.util.testing import assert_frame_equal -class TestAsOfMerge(tm.TestCase): +class TestAsOfMerge(object): def read_data(self, name, dedupe=False): path = os.path.join(tm.get_data_path(), name) diff --git a/pandas/tests/reshape/test_merge_ordered.py b/pandas/tests/reshape/test_merge_ordered.py index 375e2e13847e8..9469e98f336fd 100644 --- a/pandas/tests/reshape/test_merge_ordered.py +++ b/pandas/tests/reshape/test_merge_ordered.py @@ -6,7 +6,7 @@ from numpy import nan -class TestOrderedMerge(tm.TestCase): +class TestOrderedMerge(object): def setup_method(self, method): self.left = DataFrame({'key': ['a', 'c', 'e'], diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 905cd27ca4c58..270a93e4ae382 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -15,7 +15,7 @@ from pandas.tseries.util import pivot_annual, isleapyear -class TestPivotTable(tm.TestCase): +class TestPivotTable(object): def setup_method(self, method): self.data = DataFrame({'A': ['foo', 'foo', 'foo', 'foo', @@ -982,7 +982,7 @@ def test_pivot_table_not_series(self): tm.assert_frame_equal(result, expected) -class TestCrosstab(tm.TestCase): +class TestCrosstab(object): def setup_method(self, method): df = DataFrame({'A': ['foo', 'foo', 'foo', 'foo', @@ -1397,7 +1397,7 @@ def test_crosstab_with_numpy_size(self): tm.assert_frame_equal(result, expected) -class TestPivotAnnual(tm.TestCase): +class TestPivotAnnual(object): """ New pandas of scikits.timeseries pivot_annual """ diff --git a/pandas/tests/reshape/test_reshape.py b/pandas/tests/reshape/test_reshape.py index de2fe444bc4ea..79626d89026a7 100644 --- a/pandas/tests/reshape/test_reshape.py +++ b/pandas/tests/reshape/test_reshape.py @@ -17,7 +17,7 @@ from pandas.compat import range, u -class TestMelt(tm.TestCase): +class TestMelt(object): def setup_method(self, method): self.df = tm.makeTimeDataFrame()[:10] @@ -216,7 +216,7 @@ def test_multiindex(self): assert res.columns.tolist() == ['CAP', 'low', 'value'] -class TestGetDummies(tm.TestCase): +class TestGetDummies(object): sparse = False @@ -644,7 +644,7 @@ class TestGetDummiesSparse(TestGetDummies): sparse = True -class TestMakeAxisDummies(tm.TestCase): +class TestMakeAxisDummies(object): def test_preserve_categorical_dtype(self): # GH13854 @@ -665,7 +665,7 @@ def test_preserve_categorical_dtype(self): tm.assert_frame_equal(result, expected) -class TestLreshape(tm.TestCase): +class TestLreshape(object): def test_pairs(self): data = {'birthdt': ['08jan2009', '20dec2008', '30dec2008', '21dec2008', @@ -737,7 +737,7 @@ def test_pairs(self): pytest.raises(ValueError, lreshape, df, spec) -class TestWideToLong(tm.TestCase): +class TestWideToLong(object): def test_simple(self): np.random.seed(123) diff --git a/pandas/tests/reshape/test_tile.py b/pandas/tests/reshape/test_tile.py index 2291030a2735c..8602b33856fea 100644 --- a/pandas/tests/reshape/test_tile.py +++ b/pandas/tests/reshape/test_tile.py @@ -14,7 +14,7 @@ import pandas.core.reshape.tile as tmod -class TestCut(tm.TestCase): +class TestCut(object): def test_simple(self): data = np.ones(5, dtype='int64') diff --git a/pandas/tests/reshape/test_union_categoricals.py b/pandas/tests/reshape/test_union_categoricals.py index 5cc476718add2..fe8d54005ba9b 100644 --- a/pandas/tests/reshape/test_union_categoricals.py +++ b/pandas/tests/reshape/test_union_categoricals.py @@ -7,7 +7,7 @@ from pandas.util import testing as tm -class TestUnionCategoricals(tm.TestCase): +class TestUnionCategoricals(object): def test_union_categorical(self): # GH 13361 diff --git a/pandas/tests/reshape/test_util.py b/pandas/tests/reshape/test_util.py index a7fbe8d305011..e4a9591b95c26 100644 --- a/pandas/tests/reshape/test_util.py +++ b/pandas/tests/reshape/test_util.py @@ -5,7 +5,7 @@ from pandas.core.reshape.util import cartesian_product -class TestCartesianProduct(tm.TestCase): +class TestCartesianProduct(object): def test_simple(self): x, y = list('ABC'), [1, 22] diff --git a/pandas/tests/scalar/test_interval.py b/pandas/tests/scalar/test_interval.py index fab6f170bec60..e06f7cb34eb52 100644 --- a/pandas/tests/scalar/test_interval.py +++ b/pandas/tests/scalar/test_interval.py @@ -5,7 +5,7 @@ import pandas.util.testing as tm -class TestInterval(tm.TestCase): +class TestInterval(object): def setup_method(self, method): self.interval = Interval(0, 1) diff --git a/pandas/tests/scalar/test_period.py b/pandas/tests/scalar/test_period.py index 8c89fa60b12d6..54366dc9b1c3f 100644 --- a/pandas/tests/scalar/test_period.py +++ b/pandas/tests/scalar/test_period.py @@ -14,7 +14,7 @@ from pandas.tseries.frequencies import DAYS, MONTHS -class TestPeriodProperties(tm.TestCase): +class TestPeriodProperties(object): "Test properties such as year, month, weekday, etc...." def test_is_leap_year(self): @@ -911,7 +911,7 @@ def test_round_trip(self): assert new_p == p -class TestPeriodField(tm.TestCase): +class TestPeriodField(object): def test_get_period_field_raises_on_out_of_range(self): pytest.raises(ValueError, libperiod.get_period_field, -1, 0, 0) @@ -921,7 +921,7 @@ def test_get_period_field_array_raises_on_out_of_range(self): np.empty(1), 0) -class TestComparisons(tm.TestCase): +class TestComparisons(object): def setup_method(self, method): self.january1 = Period('2000-01', 'M') @@ -1006,7 +1006,7 @@ def test_period_nat_comp(self): assert not left >= right -class TestMethods(tm.TestCase): +class TestMethods(object): def test_add(self): dt1 = Period(freq='D', year=2008, month=1, day=1) diff --git a/pandas/tests/scalar/test_period_asfreq.py b/pandas/tests/scalar/test_period_asfreq.py index 7011cfeef90ae..32cea60c333b7 100644 --- a/pandas/tests/scalar/test_period_asfreq.py +++ b/pandas/tests/scalar/test_period_asfreq.py @@ -4,7 +4,7 @@ from pandas.tseries.frequencies import _period_code_map -class TestFreqConversion(tm.TestCase): +class TestFreqConversion(object): """Test frequency conversion of date objects""" def test_asfreq_corner(self): diff --git a/pandas/tests/scalar/test_timedelta.py b/pandas/tests/scalar/test_timedelta.py index 82d6f6e8c84e5..ecc44204924d3 100644 --- a/pandas/tests/scalar/test_timedelta.py +++ b/pandas/tests/scalar/test_timedelta.py @@ -12,7 +12,7 @@ from pandas._libs.tslib import iNaT, NaTType -class TestTimedeltas(tm.TestCase): +class TestTimedeltas(object): _multiprocess_can_split_ = True def setup_method(self, method): diff --git a/pandas/tests/scalar/test_timestamp.py b/pandas/tests/scalar/test_timestamp.py index 64f68112f4b81..5caa0252b69b8 100644 --- a/pandas/tests/scalar/test_timestamp.py +++ b/pandas/tests/scalar/test_timestamp.py @@ -22,7 +22,7 @@ RESO_MS, RESO_SEC) -class TestTimestamp(tm.TestCase): +class TestTimestamp(object): def test_constructor(self): base_str = '2014-07-01 09:00' @@ -1094,7 +1094,7 @@ def test_is_leap_year(self): assert not dt.is_leap_year -class TestTimestampNsOperations(tm.TestCase): +class TestTimestampNsOperations(object): def setup_method(self, method): self.timestamp = Timestamp(datetime.utcnow()) @@ -1181,7 +1181,7 @@ def test_nanosecond_timestamp(self): assert t.nanosecond == 10 -class TestTimestampOps(tm.TestCase): +class TestTimestampOps(object): def test_timestamp_and_datetime(self): assert ((Timestamp(datetime(2013, 10, 13)) - @@ -1256,7 +1256,7 @@ def test_resolution(self): assert result == expected -class TestTimestampToJulianDate(tm.TestCase): +class TestTimestampToJulianDate(object): def test_compare_1700(self): r = Timestamp('1700-06-23').to_julian_date() @@ -1279,7 +1279,7 @@ def test_compare_hour13(self): assert r == 2451769.0416666666666666 -class TestTimeSeries(tm.TestCase): +class TestTimeSeries(object): def test_timestamp_to_datetime(self): tm._skip_if_no_pytz() @@ -1490,7 +1490,7 @@ def test_woy_boundary(self): assert (result == [52, 52, 53, 53]).all() -class TestTsUtil(tm.TestCase): +class TestTsUtil(object): def test_min_valid(self): # Ensure that Timestamp.min is a valid Timestamp diff --git a/pandas/tests/series/test_alter_axes.py b/pandas/tests/series/test_alter_axes.py index 33a4cdb6e26c4..150767ee9e2b2 100644 --- a/pandas/tests/series/test_alter_axes.py +++ b/pandas/tests/series/test_alter_axes.py @@ -18,7 +18,7 @@ from .common import TestData -class TestSeriesAlterAxes(TestData, tm.TestCase): +class TestSeriesAlterAxes(TestData): def test_setindex(self): # wrong type diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index 71131452393a7..257f992f57f6d 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -28,7 +28,7 @@ from .common import TestData -class TestSeriesAnalytics(TestData, tm.TestCase): +class TestSeriesAnalytics(TestData): def test_sum_zero(self): arr = np.array([]) diff --git a/pandas/tests/series/test_api.py b/pandas/tests/series/test_api.py index 5bb463c7a2ebe..1eb2b98a7d7cc 100644 --- a/pandas/tests/series/test_api.py +++ b/pandas/tests/series/test_api.py @@ -118,7 +118,7 @@ def test_to_sparse_pass_name(self): assert result.name == self.ts.name -class TestSeriesMisc(TestData, SharedWithSparse, tm.TestCase): +class TestSeriesMisc(TestData, SharedWithSparse): def test_tab_completion(self): # GH 9910 diff --git a/pandas/tests/series/test_apply.py b/pandas/tests/series/test_apply.py index 089a2c36a5574..c273d3161fff5 100644 --- a/pandas/tests/series/test_apply.py +++ b/pandas/tests/series/test_apply.py @@ -17,7 +17,7 @@ from .common import TestData -class TestSeriesApply(TestData, tm.TestCase): +class TestSeriesApply(TestData): def test_apply(self): with np.errstate(all='ignore'): @@ -151,7 +151,7 @@ def test_apply_dict_depr(self): tsdf.A.agg({'foo': ['sum', 'mean']}) -class TestSeriesAggregate(TestData, tm.TestCase): +class TestSeriesAggregate(TestData): _multiprocess_can_split_ = True @@ -307,7 +307,7 @@ def test_reduce(self): assert_series_equal(result, expected) -class TestSeriesMap(TestData, tm.TestCase): +class TestSeriesMap(TestData): def test_map(self): index, data = tm.getMixedTypeDict() diff --git a/pandas/tests/series/test_asof.py b/pandas/tests/series/test_asof.py index a839d571c116c..1f62d618b20e1 100644 --- a/pandas/tests/series/test_asof.py +++ b/pandas/tests/series/test_asof.py @@ -11,7 +11,7 @@ from .common import TestData -class TestSeriesAsof(TestData, tm.TestCase): +class TestSeriesAsof(TestData): def test_basic(self): diff --git a/pandas/tests/series/test_combine_concat.py b/pandas/tests/series/test_combine_concat.py index 1291449ae7ce9..bb998b7fa55dd 100644 --- a/pandas/tests/series/test_combine_concat.py +++ b/pandas/tests/series/test_combine_concat.py @@ -18,7 +18,7 @@ from .common import TestData -class TestSeriesCombine(TestData, tm.TestCase): +class TestSeriesCombine(TestData): def test_append(self): appendedSeries = self.series.append(self.objSeries) @@ -217,7 +217,7 @@ def test_combine_first_dt64(self): assert_series_equal(rs, xp) -class TestTimeseries(tm.TestCase): +class TestTimeseries(object): def test_append_concat(self): rng = date_range('5/8/2012 1:45', periods=10, freq='5T') diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index a0a68a332f735..d591aa4f567a9 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -28,7 +28,7 @@ from .common import TestData -class TestSeriesConstructors(TestData, tm.TestCase): +class TestSeriesConstructors(TestData): def test_invalid_dtype(self): # GH15520 diff --git a/pandas/tests/series/test_datetime_values.py b/pandas/tests/series/test_datetime_values.py index 50914eef1abc8..e1fc9af0cca89 100644 --- a/pandas/tests/series/test_datetime_values.py +++ b/pandas/tests/series/test_datetime_values.py @@ -20,7 +20,7 @@ from .common import TestData -class TestSeriesDatetimeValues(TestData, tm.TestCase): +class TestSeriesDatetimeValues(TestData): def test_dt_namespace_accessor(self): diff --git a/pandas/tests/series/test_indexing.py b/pandas/tests/series/test_indexing.py index 8eae59a473995..7f876357ad3ab 100644 --- a/pandas/tests/series/test_indexing.py +++ b/pandas/tests/series/test_indexing.py @@ -31,7 +31,7 @@ JOIN_TYPES = ['inner', 'outer', 'left', 'right'] -class TestSeriesIndexing(TestData, tm.TestCase): +class TestSeriesIndexing(TestData): def test_get(self): @@ -2252,7 +2252,7 @@ def test_setitem_slice_into_readonly_backing_data(self): assert not array.any() -class TestTimeSeriesDuplicates(tm.TestCase): +class TestTimeSeriesDuplicates(object): def setup_method(self, method): dates = [datetime(2000, 1, 2), datetime(2000, 1, 2), @@ -2494,7 +2494,7 @@ def test_indexing(self): pytest.raises(KeyError, df.__getitem__, df.index[2], ) -class TestDatetimeIndexing(tm.TestCase): +class TestDatetimeIndexing(object): """ Also test support for datetime64[ns] in Series / DataFrame """ @@ -2638,7 +2638,7 @@ def test_frame_datetime64_duplicated(self): assert (-result).all() -class TestNatIndexing(tm.TestCase): +class TestNatIndexing(object): def setup_method(self, method): self.series = Series(date_range('1/1/2000', periods=10)) diff --git a/pandas/tests/series/test_internals.py b/pandas/tests/series/test_internals.py index 31492a4ab214a..79e23459ac992 100644 --- a/pandas/tests/series/test_internals.py +++ b/pandas/tests/series/test_internals.py @@ -16,7 +16,7 @@ import pandas.util.testing as tm -class TestSeriesInternals(tm.TestCase): +class TestSeriesInternals(object): def test_convert_objects(self): diff --git a/pandas/tests/series/test_io.py b/pandas/tests/series/test_io.py index 24bb3bbc7fc16..d1c9e5a6d16cf 100644 --- a/pandas/tests/series/test_io.py +++ b/pandas/tests/series/test_io.py @@ -16,7 +16,7 @@ from .common import TestData -class TestSeriesToCSV(TestData, tm.TestCase): +class TestSeriesToCSV(TestData): def test_from_csv(self): @@ -108,7 +108,7 @@ def test_to_csv_path_is_none(self): assert isinstance(csv_str, str) -class TestSeriesIO(TestData, tm.TestCase): +class TestSeriesIO(TestData): def test_to_frame(self): self.ts.name = None @@ -168,7 +168,7 @@ class SubclassedFrame(DataFrame): assert_frame_equal(result, expected) -class TestSeriesToList(TestData, tm.TestCase): +class TestSeriesToList(TestData): def test_tolist(self): rs = self.ts.tolist() diff --git a/pandas/tests/series/test_missing.py b/pandas/tests/series/test_missing.py index 0eaab2e588cc2..c52c41877d5c0 100644 --- a/pandas/tests/series/test_missing.py +++ b/pandas/tests/series/test_missing.py @@ -48,7 +48,7 @@ def _simple_ts(start, end, freq='D'): return Series(np.random.randn(len(rng)), index=rng) -class TestSeriesMissingData(TestData, tm.TestCase): +class TestSeriesMissingData(TestData): def test_timedelta_fillna(self): # GH 3371 @@ -700,7 +700,7 @@ def test_series_pad_backfill_limit(self): assert_series_equal(result, expected) -class TestSeriesInterpolateData(TestData, tm.TestCase): +class TestSeriesInterpolateData(TestData): def test_interpolate(self): ts = Series(np.arange(len(self.ts), dtype=float), self.ts.index) diff --git a/pandas/tests/series/test_operators.py b/pandas/tests/series/test_operators.py index 7c7b98961d960..db0d06aa35a2a 100644 --- a/pandas/tests/series/test_operators.py +++ b/pandas/tests/series/test_operators.py @@ -28,7 +28,7 @@ from .common import TestData -class TestSeriesOperators(TestData, tm.TestCase): +class TestSeriesOperators(TestData): def test_series_comparison_scalars(self): series = Series(date_range('1/1/2000', periods=10)) diff --git a/pandas/tests/series/test_period.py b/pandas/tests/series/test_period.py index 792d5b9e5c383..6e8ee38d366e2 100644 --- a/pandas/tests/series/test_period.py +++ b/pandas/tests/series/test_period.py @@ -10,7 +10,7 @@ def _permute(obj): return obj.take(np.random.permutation(len(obj))) -class TestSeriesPeriod(tm.TestCase): +class TestSeriesPeriod(object): def setup_method(self, method): self.series = Series(period_range('2000-01-01', periods=10, freq='D')) diff --git a/pandas/tests/series/test_quantile.py b/pandas/tests/series/test_quantile.py index 6d2cdd046ea7f..2d02260ac7303 100644 --- a/pandas/tests/series/test_quantile.py +++ b/pandas/tests/series/test_quantile.py @@ -13,7 +13,7 @@ from .common import TestData -class TestSeriesQuantile(TestData, tm.TestCase): +class TestSeriesQuantile(TestData): def test_quantile(self): diff --git a/pandas/tests/series/test_rank.py b/pandas/tests/series/test_rank.py index 1a1829eb5829f..ff489eb7f15b1 100644 --- a/pandas/tests/series/test_rank.py +++ b/pandas/tests/series/test_rank.py @@ -15,7 +15,7 @@ from pandas.tests.series.common import TestData -class TestSeriesRank(tm.TestCase, TestData): +class TestSeriesRank(TestData): s = Series([1, 3, 4, 2, nan, 2, 1, 5, nan, 3]) results = { diff --git a/pandas/tests/series/test_replace.py b/pandas/tests/series/test_replace.py index 19a99c8351db8..35d13a62ca083 100644 --- a/pandas/tests/series/test_replace.py +++ b/pandas/tests/series/test_replace.py @@ -11,7 +11,7 @@ from .common import TestData -class TestSeriesReplace(TestData, tm.TestCase): +class TestSeriesReplace(TestData): def test_replace(self): N = 100 ser = pd.Series(np.random.randn(N)) diff --git a/pandas/tests/series/test_repr.py b/pandas/tests/series/test_repr.py index 8c1d74c5c2c23..3af61b0a902d3 100644 --- a/pandas/tests/series/test_repr.py +++ b/pandas/tests/series/test_repr.py @@ -18,7 +18,7 @@ from .common import TestData -class TestSeriesRepr(TestData, tm.TestCase): +class TestSeriesRepr(TestData): def test_multilevel_name_print(self): index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two', diff --git a/pandas/tests/series/test_sorting.py b/pandas/tests/series/test_sorting.py index 791a7d5db9a26..40b0280de3719 100644 --- a/pandas/tests/series/test_sorting.py +++ b/pandas/tests/series/test_sorting.py @@ -13,7 +13,7 @@ from .common import TestData -class TestSeriesSorting(TestData, tm.TestCase): +class TestSeriesSorting(TestData): def test_sortlevel_deprecated(self): ts = self.ts.copy() diff --git a/pandas/tests/series/test_subclass.py b/pandas/tests/series/test_subclass.py index fe8a5e7658d9c..37c8d7343f7f1 100644 --- a/pandas/tests/series/test_subclass.py +++ b/pandas/tests/series/test_subclass.py @@ -6,7 +6,7 @@ import pandas.util.testing as tm -class TestSeriesSubclassing(tm.TestCase): +class TestSeriesSubclassing(object): def test_indexing_sliced(self): s = tm.SubclassedSeries([1, 2, 3, 4], index=list('abcd')) @@ -33,7 +33,7 @@ def test_to_frame(self): assert isinstance(res, tm.SubclassedDataFrame) -class TestSparseSeriesSubclassing(tm.TestCase): +class TestSparseSeriesSubclassing(object): def test_subclass_sparse_slice(self): # int64 diff --git a/pandas/tests/series/test_timeseries.py b/pandas/tests/series/test_timeseries.py index 78e5d87636532..d5517bdcceac7 100644 --- a/pandas/tests/series/test_timeseries.py +++ b/pandas/tests/series/test_timeseries.py @@ -33,7 +33,7 @@ def assert_range_equal(left, right): assert (left.tz == right.tz) -class TestTimeSeries(TestData, tm.TestCase): +class TestTimeSeries(TestData): def test_shift(self): shifted = self.ts.shift(1) diff --git a/pandas/tests/sparse/test_arithmetics.py b/pandas/tests/sparse/test_arithmetics.py index 468d856ca68ce..f023cd0003910 100644 --- a/pandas/tests/sparse/test_arithmetics.py +++ b/pandas/tests/sparse/test_arithmetics.py @@ -3,7 +3,7 @@ import pandas.util.testing as tm -class TestSparseArrayArithmetics(tm.TestCase): +class TestSparseArrayArithmetics(object): _base = np.array _klass = pd.SparseArray diff --git a/pandas/tests/sparse/test_array.py b/pandas/tests/sparse/test_array.py index c205a1efbeeb1..ab7340c89f016 100644 --- a/pandas/tests/sparse/test_array.py +++ b/pandas/tests/sparse/test_array.py @@ -15,7 +15,7 @@ import pandas.util.testing as tm -class TestSparseArray(tm.TestCase): +class TestSparseArray(object): def setup_method(self, method): self.arr_data = np.array([nan, nan, 1, 2, 3, nan, 4, 5, nan, 6]) @@ -656,7 +656,7 @@ def test_fillna_overlap(self): tm.assert_sp_array_equal(res, exp) -class TestSparseArrayAnalytics(tm.TestCase): +class TestSparseArrayAnalytics(object): def test_sum(self): data = np.arange(10).astype(float) diff --git a/pandas/tests/sparse/test_combine_concat.py b/pandas/tests/sparse/test_combine_concat.py index ab56a83c90530..15639fbe156c6 100644 --- a/pandas/tests/sparse/test_combine_concat.py +++ b/pandas/tests/sparse/test_combine_concat.py @@ -5,7 +5,7 @@ import pandas.util.testing as tm -class TestSparseSeriesConcat(tm.TestCase): +class TestSparseSeriesConcat(object): def test_concat(self): val1 = np.array([1, 2, np.nan, np.nan, 0, np.nan]) @@ -122,7 +122,7 @@ def test_concat_sparse_dense(self): tm.assert_sp_series_equal(res, exp) -class TestSparseDataFrameConcat(tm.TestCase): +class TestSparseDataFrameConcat(object): def setup_method(self, method): diff --git a/pandas/tests/sparse/test_format.py b/pandas/tests/sparse/test_format.py index 74be14ff5cf15..d983bd209085a 100644 --- a/pandas/tests/sparse/test_format.py +++ b/pandas/tests/sparse/test_format.py @@ -13,7 +13,7 @@ use_32bit_repr = is_platform_windows() or is_platform_32bit() -class TestSparseSeriesFormatting(tm.TestCase): +class TestSparseSeriesFormatting(object): @property def dtype_format_for_platform(self): @@ -105,7 +105,7 @@ def test_sparse_int(self): assert result == exp -class TestSparseDataFrameFormatting(tm.TestCase): +class TestSparseDataFrameFormatting(object): def test_sparse_frame(self): # GH 13110 diff --git a/pandas/tests/sparse/test_frame.py b/pandas/tests/sparse/test_frame.py index 762bfba85dd0a..4a4a596e3bed4 100644 --- a/pandas/tests/sparse/test_frame.py +++ b/pandas/tests/sparse/test_frame.py @@ -26,7 +26,7 @@ from pandas.tests.frame.test_api import SharedWithSparse -class TestSparseDataFrame(tm.TestCase, SharedWithSparse): +class TestSparseDataFrame(SharedWithSparse): klass = SparseDataFrame def setup_method(self, method): @@ -1245,7 +1245,7 @@ def test_from_to_scipy_object(spmatrix, fill_value): assert sdf.to_coo().dtype == res_dtype -class TestSparseDataFrameArithmetic(tm.TestCase): +class TestSparseDataFrameArithmetic(object): def test_numeric_op_scalar(self): df = pd.DataFrame({'A': [nan, nan, 0, 1, ], @@ -1274,7 +1274,7 @@ def test_comparison_op_scalar(self): tm.assert_frame_equal(res.to_dense(), df != 0) -class TestSparseDataFrameAnalytics(tm.TestCase): +class TestSparseDataFrameAnalytics(object): def setup_method(self, method): self.data = {'A': [nan, nan, nan, 0, 1, 2, 3, 4, 5, 6], 'B': [0, 1, 2, nan, nan, nan, 3, 4, 5, 6], diff --git a/pandas/tests/sparse/test_groupby.py b/pandas/tests/sparse/test_groupby.py index 501e40c6ebffd..c9049ed9743dd 100644 --- a/pandas/tests/sparse/test_groupby.py +++ b/pandas/tests/sparse/test_groupby.py @@ -4,7 +4,7 @@ import pandas.util.testing as tm -class TestSparseGroupBy(tm.TestCase): +class TestSparseGroupBy(object): def setup_method(self, method): self.dense = pd.DataFrame({'A': ['foo', 'bar', 'foo', 'bar', diff --git a/pandas/tests/sparse/test_indexing.py b/pandas/tests/sparse/test_indexing.py index bb449c05729d4..382cff4b9d0ac 100644 --- a/pandas/tests/sparse/test_indexing.py +++ b/pandas/tests/sparse/test_indexing.py @@ -6,7 +6,7 @@ import pandas.util.testing as tm -class TestSparseSeriesIndexing(tm.TestCase): +class TestSparseSeriesIndexing(object): def setup_method(self, method): self.orig = pd.Series([1, np.nan, np.nan, 3, np.nan]) @@ -589,7 +589,7 @@ def test_reindex(self): assert sparse is not res -class TestSparseDataFrameIndexing(tm.TestCase): +class TestSparseDataFrameIndexing(object): def test_getitem(self): orig = pd.DataFrame([[1, np.nan, np.nan], @@ -952,7 +952,7 @@ def test_reindex_fill_value(self): tm.assert_sp_frame_equal(res, exp) -class TestMultitype(tm.TestCase): +class TestMultitype(object): def setup_method(self, method): self.cols = ['string', 'int', 'float', 'object'] diff --git a/pandas/tests/sparse/test_libsparse.py b/pandas/tests/sparse/test_libsparse.py index c7207870b22b9..c41025582c651 100644 --- a/pandas/tests/sparse/test_libsparse.py +++ b/pandas/tests/sparse/test_libsparse.py @@ -42,7 +42,7 @@ def _check_case_dict(case): _check_case([], [], [], [], [], []) -class TestSparseIndexUnion(tm.TestCase): +class TestSparseIndexUnion(object): def test_index_make_union(self): def _check_case(xloc, xlen, yloc, ylen, eloc, elen): @@ -188,7 +188,7 @@ def test_intindex_make_union(self): a.make_union(b) -class TestSparseIndexIntersect(tm.TestCase): +class TestSparseIndexIntersect(object): def test_intersect(self): def _check_correct(a, b, expected): @@ -239,7 +239,7 @@ def test_intersect_identical(self): assert case.intersect(case).equals(case) -class TestSparseIndexCommon(tm.TestCase): +class TestSparseIndexCommon(object): def test_int_internal(self): idx = _make_index(4, np.array([2, 3], dtype=np.int32), kind='integer') @@ -387,7 +387,7 @@ def _check(index): # corner cases -class TestBlockIndex(tm.TestCase): +class TestBlockIndex(object): def test_block_internal(self): idx = _make_index(4, np.array([2, 3], dtype=np.int32), kind='block') @@ -472,7 +472,7 @@ def test_to_block_index(self): assert index.to_block_index() is index -class TestIntIndex(tm.TestCase): +class TestIntIndex(object): def test_check_integrity(self): @@ -557,7 +557,7 @@ def test_to_int_index(self): assert index.to_int_index() is index -class TestSparseOperators(tm.TestCase): +class TestSparseOperators(object): def _op_tests(self, sparse_op, python_op): def _check_case(xloc, xlen, yloc, ylen, eloc, elen): diff --git a/pandas/tests/sparse/test_list.py b/pandas/tests/sparse/test_list.py index 3eab34661ae2b..6c721ca813a21 100644 --- a/pandas/tests/sparse/test_list.py +++ b/pandas/tests/sparse/test_list.py @@ -7,7 +7,7 @@ import pandas.util.testing as tm -class TestSparseList(tm.TestCase): +class TestSparseList(object): def setup_method(self, method): self.na_data = np.array([nan, nan, 1, 2, 3, nan, 4, 5, nan, 6]) diff --git a/pandas/tests/sparse/test_pivot.py b/pandas/tests/sparse/test_pivot.py index 57c47b4e68811..e7eba63e4e0b3 100644 --- a/pandas/tests/sparse/test_pivot.py +++ b/pandas/tests/sparse/test_pivot.py @@ -3,7 +3,7 @@ import pandas.util.testing as tm -class TestPivotTable(tm.TestCase): +class TestPivotTable(object): def setup_method(self, method): self.dense = pd.DataFrame({'A': ['foo', 'bar', 'foo', 'bar', diff --git a/pandas/tests/sparse/test_series.py b/pandas/tests/sparse/test_series.py index b756b63523798..344bca54b180b 100644 --- a/pandas/tests/sparse/test_series.py +++ b/pandas/tests/sparse/test_series.py @@ -56,7 +56,7 @@ def _test_data2_zero(): return arr, index -class TestSparseSeries(tm.TestCase, SharedWithSparse): +class TestSparseSeries(SharedWithSparse): def setup_method(self, method): arr, index = _test_data1() @@ -934,7 +934,7 @@ def test_combine_first(self): tm.assert_sp_series_equal(result, expected) -class TestSparseHandlingMultiIndexes(tm.TestCase): +class TestSparseHandlingMultiIndexes(object): def setup_method(self, method): miindex = pd.MultiIndex.from_product( @@ -960,7 +960,7 @@ def test_round_trip_preserve_multiindex_names(self): check_names=True) -class TestSparseSeriesScipyInteraction(tm.TestCase): +class TestSparseSeriesScipyInteraction(object): # Issue 8048: add SparseSeries coo methods def setup_method(self, method): @@ -1310,7 +1310,7 @@ def _dense_series_compare(s, f): tm.assert_series_equal(result.to_dense(), dense_result) -class TestSparseSeriesAnalytics(tm.TestCase): +class TestSparseSeriesAnalytics(object): def setup_method(self, method): arr, index = _test_data1() diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index dda95426d8011..093730fb2478b 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -23,7 +23,7 @@ from pandas.util.testing import assert_almost_equal -class TestMatch(tm.TestCase): +class TestMatch(object): def test_ints(self): values = np.array([0, 2, 1]) @@ -59,7 +59,7 @@ def test_strings(self): tm.assert_series_equal(result, expected) -class TestSafeSort(tm.TestCase): +class TestSafeSort(object): def test_basic_sort(self): values = [3, 1, 2, 0, 4] @@ -146,7 +146,7 @@ def test_exceptions(self): algos.safe_sort(values=[0, 1, 2, 1], labels=[0, 1]) -class TestFactorize(tm.TestCase): +class TestFactorize(object): def test_basic(self): @@ -306,7 +306,7 @@ def test_uint64_factorize(self): tm.assert_numpy_array_equal(uniques, exp_uniques) -class TestUnique(tm.TestCase): +class TestUnique(object): def test_ints(self): arr = np.random.randint(0, 100, size=50) @@ -503,7 +503,7 @@ def test_order_of_appearance(self): tm.assert_categorical_equal(result, expected) -class TestIsin(tm.TestCase): +class TestIsin(object): def test_invalid(self): @@ -587,7 +587,7 @@ def test_large(self): tm.assert_numpy_array_equal(result, expected) -class TestValueCounts(tm.TestCase): +class TestValueCounts(object): def test_value_counts(self): np.random.seed(1234) @@ -779,7 +779,7 @@ def test_value_counts_uint64(self): tm.assert_series_equal(result, expected) -class TestDuplicated(tm.TestCase): +class TestDuplicated(object): def test_duplicated_with_nas(self): keys = np.array([0, 1, np.nan, 0, 2, np.nan], dtype=object) @@ -1014,7 +1014,7 @@ def test_group_var_constant(self): tm.assert_almost_equal(out[0, 0], 0.0) -class TestGroupVarFloat64(tm.TestCase, GroupVarTestMixin): +class TestGroupVarFloat64(GroupVarTestMixin): __test__ = True algo = libgroupby.group_var_float64 @@ -1037,7 +1037,7 @@ def test_group_var_large_inputs(self): tm.assert_almost_equal(out[0, 0], 1.0 / 12, check_less_precise=True) -class TestGroupVarFloat32(tm.TestCase, GroupVarTestMixin): +class TestGroupVarFloat32(GroupVarTestMixin): __test__ = True algo = libgroupby.group_var_float32 @@ -1045,7 +1045,7 @@ class TestGroupVarFloat32(tm.TestCase, GroupVarTestMixin): rtol = 1e-2 -class TestHashTable(tm.TestCase): +class TestHashTable(object): def test_lookup_nan(self): xs = np.array([2.718, 3.14, np.nan, -7, 5, 2, 3]) @@ -1116,7 +1116,7 @@ def test_unique_label_indices(): check_dtype=False) -class TestRank(tm.TestCase): +class TestRank(object): def test_scipy_compat(self): tm._skip_if_no_scipy() @@ -1184,7 +1184,7 @@ def test_arrmap(): assert (result.dtype == np.bool_) -class TestTseriesUtil(tm.TestCase): +class TestTseriesUtil(object): def test_combineFunc(self): pass @@ -1378,7 +1378,7 @@ def test_int64_add_overflow(): b_mask=np.array([False, True])) -class TestMode(tm.TestCase): +class TestMode(object): def test_no_mode(self): exp = Series([], dtype=np.float64) diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py index dcc685ceef28e..85976b9fabd66 100644 --- a/pandas/tests/test_base.py +++ b/pandas/tests/test_base.py @@ -86,7 +86,7 @@ def check_result(self, result, expected, klass=None): assert result == expected -class TestPandasDelegate(tm.TestCase): +class TestPandasDelegate(object): class Delegator(object): _properties = ['foo'] @@ -152,7 +152,7 @@ def test_memory_usage(self): sys.getsizeof(delegate) -class Ops(tm.TestCase): +class Ops(object): def _allow_na_ops(self, obj): """Whether to skip test cases including NaN""" @@ -1008,7 +1008,7 @@ def test_numpy_transpose(self): np.transpose, obj, axes=1) -class TestNoNewAttributesMixin(tm.TestCase): +class TestNoNewAttributesMixin(object): def test_mixin(self): class T(NoNewAttributesMixin): diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py index 2a53cf15278e0..03adf17f50300 100644 --- a/pandas/tests/test_categorical.py +++ b/pandas/tests/test_categorical.py @@ -28,7 +28,7 @@ from pandas.core.config import option_context -class TestCategorical(tm.TestCase): +class TestCategorical(object): def setup_method(self, method): self.factor = Categorical(['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c'], @@ -1600,7 +1600,7 @@ def test_validate_inplace(self): cat.sort_values(inplace=value) -class TestCategoricalAsBlock(tm.TestCase): +class TestCategoricalAsBlock(object): def setup_method(self, method): self.factor = Categorical(['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c']) @@ -4411,7 +4411,7 @@ def test_concat_categorical(self): tm.assert_frame_equal(res, exp) -class TestCategoricalSubclassing(tm.TestCase): +class TestCategoricalSubclassing(object): def test_constructor(self): sc = tm.SubclassedCategorical(['a', 'b', 'c']) diff --git a/pandas/tests/test_compat.py b/pandas/tests/test_compat.py index 5c56142687b5c..ff9d09c033164 100644 --- a/pandas/tests/test_compat.py +++ b/pandas/tests/test_compat.py @@ -6,10 +6,9 @@ from pandas.compat import (range, zip, map, filter, lrange, lzip, lmap, lfilter, builtins, iterkeys, itervalues, iteritems, next) -import pandas.util.testing as tm -class TestBuiltinIterators(tm.TestCase): +class TestBuiltinIterators(object): @classmethod def check_result(cls, actual, expected, lengths): diff --git a/pandas/tests/test_config.py b/pandas/tests/test_config.py index 79475b297f83c..f014b16976d39 100644 --- a/pandas/tests/test_config.py +++ b/pandas/tests/test_config.py @@ -1,13 +1,12 @@ # -*- coding: utf-8 -*- import pytest -import pandas.util.testing as tm import pandas as pd import warnings -class TestConfig(tm.TestCase): +class TestConfig(object): def __init__(self, *args): super(TestConfig, self).__init__(*args) diff --git a/pandas/tests/test_expressions.py b/pandas/tests/test_expressions.py index 79b057c0548a9..fae7bfa513dcd 100644 --- a/pandas/tests/test_expressions.py +++ b/pandas/tests/test_expressions.py @@ -56,7 +56,7 @@ @pytest.mark.skipif(not expr._USE_NUMEXPR, reason='not using numexpr') -class TestExpressions(tm.TestCase): +class TestExpressions(object): def setup_method(self, method): diff --git a/pandas/tests/test_internals.py b/pandas/tests/test_internals.py index 0f2a3ce1d1e94..0900d21b250ed 100644 --- a/pandas/tests/test_internals.py +++ b/pandas/tests/test_internals.py @@ -192,7 +192,7 @@ def create_mgr(descr, item_shape=None): [mgr_items] + [np.arange(n) for n in item_shape]) -class TestBlock(tm.TestCase): +class TestBlock(object): def setup_method(self, method): # self.fblock = get_float_ex() # a,c,e @@ -309,7 +309,7 @@ def test_split_block_at(self): # assert len(bs), 0) -class TestDatetimeBlock(tm.TestCase): +class TestDatetimeBlock(object): def test_try_coerce_arg(self): block = create_block('datetime', [0]) @@ -1072,7 +1072,7 @@ def assert_reindex_indexer_is_ok(mgr, axis, new_labels, indexer, # reindex_indexer(new_labels, indexer, axis) -class TestBlockPlacement(tm.TestCase): +class TestBlockPlacement(object): def test_slice_len(self): assert len(BlockPlacement(slice(0, 4))) == 4 diff --git a/pandas/tests/test_join.py b/pandas/tests/test_join.py index e9e7ffba7fe54..3fc13d23b53f7 100644 --- a/pandas/tests/test_join.py +++ b/pandas/tests/test_join.py @@ -8,7 +8,7 @@ from pandas.util.testing import assert_almost_equal -class TestIndexer(tm.TestCase): +class TestIndexer(object): def test_outer_join_indexer(self): typemap = [('int32', _join.outer_join_indexer_int32), diff --git a/pandas/tests/test_lib.py b/pandas/tests/test_lib.py index 0ac05bae624e5..df97095035952 100644 --- a/pandas/tests/test_lib.py +++ b/pandas/tests/test_lib.py @@ -8,7 +8,7 @@ import pandas.util.testing as tm -class TestMisc(tm.TestCase): +class TestMisc(object): def test_max_len_string_array(self): @@ -41,7 +41,7 @@ def test_fast_unique_multiple_list_gen_sort(self): tm.assert_numpy_array_equal(np.array(out), expected) -class TestIndexing(tm.TestCase): +class TestIndexing(object): def test_maybe_indices_to_slice_left_edge(self): target = np.arange(100) @@ -201,7 +201,7 @@ def test_get_reverse_indexer(self): assert np.array_equal(result, expected) -class TestNullObj(tm.TestCase): +class TestNullObj(object): _1d_methods = ['isnullobj', 'isnullobj_old'] _2d_methods = ['isnullobj2d', 'isnullobj2d_old'] diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index bfab10b7e63e7..ab28b8b43f359 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -56,7 +56,7 @@ def setup_method(self, method): self.ymd.index.set_names(['year', 'month', 'day'], inplace=True) -class TestMultiLevel(Base, tm.TestCase): +class TestMultiLevel(Base): def test_append(self): a, b = self.frame[:5], self.frame[5:] @@ -2352,7 +2352,7 @@ def test_iloc_mi(self): tm.assert_frame_equal(result, expected) -class TestSorted(Base, tm.TestCase): +class TestSorted(Base): """ everthing you wanted to test about sorting """ def test_sort_index_preserve_levels(self): diff --git a/pandas/tests/test_nanops.py b/pandas/tests/test_nanops.py index c5ecd75290fc6..6798e64b01d7e 100644 --- a/pandas/tests/test_nanops.py +++ b/pandas/tests/test_nanops.py @@ -16,7 +16,7 @@ use_bn = nanops._USE_BOTTLENECK -class TestnanopsDataFrame(tm.TestCase): +class TestnanopsDataFrame(object): def setup_method(self, method): np.random.seed(11235) @@ -742,7 +742,7 @@ def test__bn_ok_dtype(self): assert not nanops._bn_ok_dtype(self.arr_obj.dtype, 'test') -class TestEnsureNumeric(tm.TestCase): +class TestEnsureNumeric(object): def test_numeric_values(self): # Test integer @@ -782,7 +782,7 @@ def test_non_convertable_values(self): pytest.raises(TypeError, lambda: nanops._ensure_numeric([])) -class TestNanvarFixedValues(tm.TestCase): +class TestNanvarFixedValues(object): # xref GH10242 @@ -895,7 +895,7 @@ def prng(self): return np.random.RandomState(1234) -class TestNanskewFixedValues(tm.TestCase): +class TestNanskewFixedValues(object): # xref GH 11974 @@ -945,7 +945,7 @@ def prng(self): return np.random.RandomState(1234) -class TestNankurtFixedValues(tm.TestCase): +class TestNankurtFixedValues(object): # xref GH 11974 diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index 44e1db494c041..3243b69a25acd 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -901,7 +901,7 @@ def test_set_value(self): self.panel.set_value('a') -class TestPanel(tm.TestCase, PanelTests, CheckIndexing, SafeForLongAndSparse, +class TestPanel(PanelTests, CheckIndexing, SafeForLongAndSparse, SafeForSparse): @classmethod @@ -2430,7 +2430,7 @@ def test_all_any_unhandled(self): pytest.raises(NotImplementedError, self.panel.any, bool_only=True) -class TestLongPanel(tm.TestCase): +class TestLongPanel(object): """ LongPanel no longer exists, but... """ diff --git a/pandas/tests/test_panel4d.py b/pandas/tests/test_panel4d.py index 7d966422a7d79..96f02d63712fc 100644 --- a/pandas/tests/test_panel4d.py +++ b/pandas/tests/test_panel4d.py @@ -593,7 +593,7 @@ def test_set_value(self): assert is_float_dtype(res3['l4'].values) -class TestPanel4d(tm.TestCase, CheckIndexing, SafeForSparse, +class TestPanel4d(CheckIndexing, SafeForSparse, SafeForLongAndSparse): def setup_method(self, method): diff --git a/pandas/tests/test_panelnd.py b/pandas/tests/test_panelnd.py index 7861b98b0ddd9..c473e3c09cc74 100644 --- a/pandas/tests/test_panelnd.py +++ b/pandas/tests/test_panelnd.py @@ -9,7 +9,7 @@ import pandas.util.testing as tm -class TestPanelnd(tm.TestCase): +class TestPanelnd(object): def setup_method(self, method): pass diff --git a/pandas/tests/test_resample.py b/pandas/tests/test_resample.py index c6719790c9e35..9734431c8b012 100644 --- a/pandas/tests/test_resample.py +++ b/pandas/tests/test_resample.py @@ -50,7 +50,7 @@ def _simple_pts(start, end, freq='D'): return Series(np.random.randn(len(rng)), index=rng) -class TestResampleAPI(tm.TestCase): +class TestResampleAPI(object): def setup_method(self, method): dti = DatetimeIndex(start=datetime(2005, 1, 1), @@ -847,7 +847,7 @@ def test_resample_loffset_arg_type(self): assert_frame_equal(result_how, expected) -class TestDatetimeIndex(Base, tm.TestCase): +class TestDatetimeIndex(Base): _index_factory = lambda x: date_range def setup_method(self, method): @@ -2165,7 +2165,7 @@ def test_resample_datetime_values(self): tm.assert_series_equal(res, exp) -class TestPeriodIndex(Base, tm.TestCase): +class TestPeriodIndex(Base): _index_factory = lambda x: period_range def create_series(self): @@ -2773,7 +2773,7 @@ def test_evenly_divisible_with_no_extra_bins(self): assert_frame_equal(result, expected) -class TestTimedeltaIndex(Base, tm.TestCase): +class TestTimedeltaIndex(Base): _index_factory = lambda x: timedelta_range def create_series(self): @@ -2794,7 +2794,7 @@ def test_asfreq_bug(self): assert_frame_equal(result, expected) -class TestResamplerGrouper(tm.TestCase): +class TestResamplerGrouper(object): def setup_method(self, method): self.frame = DataFrame({'A': [1] * 20 + [2] * 12 + [3] * 8, @@ -2989,7 +2989,7 @@ def test_median_duplicate_columns(self): assert_frame_equal(result, expected) -class TestTimeGrouper(tm.TestCase): +class TestTimeGrouper(object): def setup_method(self, method): self.ts = Series(np.random.randn(1000), diff --git a/pandas/tests/test_sorting.py b/pandas/tests/test_sorting.py index c40cbcfdec883..e09270bcadf27 100644 --- a/pandas/tests/test_sorting.py +++ b/pandas/tests/test_sorting.py @@ -16,7 +16,7 @@ lexsort_indexer) -class TestSorting(tm.TestCase): +class TestSorting(object): @pytest.mark.slow def test_int64_overflow(self): @@ -191,7 +191,7 @@ def test_nargsort(self): tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) -class TestMerge(tm.TestCase): +class TestMerge(object): @pytest.mark.slow def test_int64_overflow_issues(self): diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index 412a88e13bb23..f28a5926087ac 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -19,7 +19,7 @@ import pandas.core.strings as strings -class TestStringMethods(tm.TestCase): +class TestStringMethods(object): def test_api(self): diff --git a/pandas/tests/test_take.py b/pandas/tests/test_take.py index 617d268be8f67..7b97b0e975df3 100644 --- a/pandas/tests/test_take.py +++ b/pandas/tests/test_take.py @@ -9,7 +9,7 @@ from pandas._libs.tslib import iNaT -class TestTake(tm.TestCase): +class TestTake(object): # standard incompatible fill error fill_error = re.compile("Incompatible type for fill_value") diff --git a/pandas/tests/test_testing.py b/pandas/tests/test_testing.py index 2e84638533820..fe7c3b99987f5 100644 --- a/pandas/tests/test_testing.py +++ b/pandas/tests/test_testing.py @@ -12,7 +12,7 @@ from pandas.compat import is_platform_windows -class TestAssertAlmostEqual(tm.TestCase): +class TestAssertAlmostEqual(object): def _assert_almost_equal_both(self, a, b, **kwargs): assert_almost_equal(a, b, **kwargs) @@ -139,7 +139,7 @@ def test_assert_almost_equal_object(self): self._assert_almost_equal_both(a, b) -class TestUtilTesting(tm.TestCase): +class TestUtilTesting(object): def test_raise_with_traceback(self): with tm.assert_raises_regex(LookupError, "error_text"): @@ -157,7 +157,7 @@ def test_raise_with_traceback(self): raise_with_traceback(e, traceback) -class TestAssertNumpyArrayEqual(tm.TestCase): +class TestAssertNumpyArrayEqual(object): def test_numpy_array_equal_message(self): @@ -339,7 +339,7 @@ def test_assert_almost_equal_iterable_message(self): assert_almost_equal([1, 2], [1, 3]) -class TestAssertIndexEqual(tm.TestCase): +class TestAssertIndexEqual(object): def test_index_equal_message(self): @@ -486,7 +486,7 @@ def test_index_equal_metadata_message(self): assert_index_equal(idx1, idx2) -class TestAssertSeriesEqual(tm.TestCase): +class TestAssertSeriesEqual(object): def _assert_equal(self, x, y, **kwargs): assert_series_equal(x, y, **kwargs) @@ -580,7 +580,7 @@ def test_series_equal_message(self): check_less_precise=True) -class TestAssertFrameEqual(tm.TestCase): +class TestAssertFrameEqual(object): def _assert_equal(self, x, y, **kwargs): assert_frame_equal(x, y, **kwargs) @@ -679,7 +679,7 @@ def test_frame_equal_message(self): by_blocks=True) -class TestAssertCategoricalEqual(tm.TestCase): +class TestAssertCategoricalEqual(object): def test_categorical_equal_message(self): @@ -717,7 +717,7 @@ def test_categorical_equal_message(self): tm.assert_categorical_equal(a, b) -class TestRNGContext(tm.TestCase): +class TestRNGContext(object): def test_RNGContext(self): expected0 = 1.764052345967664 @@ -729,7 +729,7 @@ def test_RNGContext(self): assert np.random.randn() == expected0 -class TestLocale(tm.TestCase): +class TestLocale(object): def test_locale(self): if sys.platform == 'win32': diff --git a/pandas/tests/test_util.py b/pandas/tests/test_util.py index e9e04f76704f2..2d9ab78ceeb8a 100644 --- a/pandas/tests/test_util.py +++ b/pandas/tests/test_util.py @@ -20,7 +20,7 @@ LOCALE_OVERRIDE = os.environ.get('LOCALE_OVERRIDE', None) -class TestDecorators(tm.TestCase): +class TestDecorators(object): def setup_method(self, method): @deprecate_kwarg('old', 'new') @@ -89,7 +89,7 @@ def test_rands_array(): assert(len(arr[1, 1]) == 7) -class TestValidateArgs(tm.TestCase): +class TestValidateArgs(object): fname = 'func' def test_bad_min_fname_arg_count(self): @@ -159,7 +159,7 @@ def test_validation(self): validate_args(self.fname, (1, None), 2, compat_args) -class TestValidateKwargs(tm.TestCase): +class TestValidateKwargs(object): fname = 'func' def test_bad_kwarg(self): @@ -225,7 +225,7 @@ def test_validate_bool_kwarg(self): assert validate_bool_kwarg(value, name) == value -class TestValidateKwargsAndArgs(tm.TestCase): +class TestValidateKwargsAndArgs(object): fname = 'func' def test_invalid_total_length_max_length_one(self): @@ -322,7 +322,7 @@ def test_validation(self): compat_args) -class TestMove(tm.TestCase): +class TestMove(object): def test_cannot_create_instance_of_stolenbuffer(self): """Stolen buffers need to be created through the smart constructor @@ -407,11 +407,10 @@ def test_numpy_errstate_is_default(): assert np.geterr() == expected -class TestLocaleUtils(tm.TestCase): +class TestLocaleUtils(object): @classmethod def setup_class(cls): - super(TestLocaleUtils, cls).setup_class() cls.locales = tm.get_locales() if not cls.locales: @@ -421,7 +420,6 @@ def setup_class(cls): @classmethod def teardown_class(cls): - super(TestLocaleUtils, cls).teardown_class() del cls.locales def test_get_locales(self): diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index 5436f3c342019..634cd5fe2586b 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -30,7 +30,7 @@ def assert_equal(left, right): tm.assert_frame_equal(left, right) -class Base(tm.TestCase): +class Base(object): _nan_locs = np.arange(20, 40) _inf_locs = np.array([]) @@ -562,8 +562,8 @@ def test_deprecations(self): # gh-12373 : rolling functions error on float32 data # make sure rolling functions works for different dtypes # -# NOTE that these are yielded tests and so _create_data is -# explicity called, nor do these inherit from tm.TestCase +# NOTE that these are yielded tests and so _create_data +# is explicitly called. # # further note that we are only checking rolling for fully dtype # compliance (though both expanding and ewm inherit) @@ -3037,7 +3037,7 @@ def test_rolling_min_max_numeric_types(self): assert result.dtypes[0] == np.dtype("f8") -class TestGrouperGrouping(tm.TestCase): +class TestGrouperGrouping(object): def setup_method(self, method): self.series = Series(np.arange(10)) @@ -3182,7 +3182,7 @@ def test_expanding_apply(self): tm.assert_frame_equal(result, expected) -class TestRollingTS(tm.TestCase): +class TestRollingTS(object): # rolling time-series friendly # xref GH13327 diff --git a/pandas/tests/tools/test_numeric.py b/pandas/tests/tools/test_numeric.py index b298df4f4b5d8..f82ad97d7b70f 100644 --- a/pandas/tests/tools/test_numeric.py +++ b/pandas/tests/tools/test_numeric.py @@ -9,7 +9,7 @@ from numpy import iinfo -class TestToNumeric(tm.TestCase): +class TestToNumeric(object): def test_series(self): s = pd.Series(['1', '-3.14', '7']) diff --git a/pandas/tests/tseries/test_frequencies.py b/pandas/tests/tseries/test_frequencies.py index a78150e9cf728..2edca1bd4676b 100644 --- a/pandas/tests/tseries/test_frequencies.py +++ b/pandas/tests/tseries/test_frequencies.py @@ -19,7 +19,7 @@ from pandas import Timedelta -class TestToOffset(tm.TestCase): +class TestToOffset(object): def test_to_offset_multiple(self): freqstr = '2h30min' @@ -342,7 +342,7 @@ def _assert_depr(freq, expected, aliases): assert (frequencies._period_str_to_code('NS') == 12000) -class TestFrequencyCode(tm.TestCase): +class TestFrequencyCode(object): def test_freq_code(self): assert frequencies.get_freq('A') == 1000 @@ -493,7 +493,7 @@ def test_get_freq_code(self): _dti = DatetimeIndex -class TestFrequencyInference(tm.TestCase): +class TestFrequencyInference(object): def test_raise_if_period_index(self): index = PeriodIndex(start="1/1/1990", periods=20, freq="M") diff --git a/pandas/tests/tseries/test_holiday.py b/pandas/tests/tseries/test_holiday.py index 8ea4140bb85a7..59a2a225ab5f8 100644 --- a/pandas/tests/tseries/test_holiday.py +++ b/pandas/tests/tseries/test_holiday.py @@ -19,7 +19,7 @@ from pytz import utc -class TestCalendar(tm.TestCase): +class TestCalendar(object): def setup_method(self, method): self.holiday_list = [ @@ -85,7 +85,7 @@ def test_rule_from_name(self): assert USFedCal.rule_from_name('Thanksgiving') == USThanksgivingDay -class TestHoliday(tm.TestCase): +class TestHoliday(object): def setup_method(self, method): self.start_date = datetime(2011, 1, 1) @@ -284,7 +284,7 @@ def test_factory(self): assert len(class_3.rules) == 2 -class TestObservanceRules(tm.TestCase): +class TestObservanceRules(object): def setup_method(self, method): self.we = datetime(2014, 4, 9) @@ -342,7 +342,7 @@ def test_after_nearest_workday(self): assert after_nearest_workday(self.fr) == self.mo -class TestFederalHolidayCalendar(tm.TestCase): +class TestFederalHolidayCalendar(object): def test_no_mlk_before_1984(self): # see gh-10278 @@ -375,7 +375,7 @@ class MemorialDay(AbstractHolidayCalendar): datetime(1979, 5, 28, 0, 0)] -class TestHolidayConflictingArguments(tm.TestCase): +class TestHolidayConflictingArguments(object): def test_both_offset_observance_raises(self): # see gh-10217 diff --git a/pandas/tests/tseries/test_offsets.py b/pandas/tests/tseries/test_offsets.py index b6cd5e7958342..09de064c15183 100644 --- a/pandas/tests/tseries/test_offsets.py +++ b/pandas/tests/tseries/test_offsets.py @@ -97,7 +97,7 @@ def test_to_m8(): ##### -class Base(tm.TestCase): +class Base(object): _offset = None _offset_types = [getattr(offsets, o) for o in offsets.__all__] @@ -4334,7 +4334,7 @@ def test_Easter(): assertEq(-Easter(2), datetime(2010, 4, 4), datetime(2008, 3, 23)) -class TestTicks(tm.TestCase): +class TestTicks(object): ticks = [Hour, Minute, Second, Milli, Micro, Nano] @@ -4491,7 +4491,7 @@ def test_compare_ticks(self): assert kls(3) != kls(4) -class TestOffsetNames(tm.TestCase): +class TestOffsetNames(object): def test_get_offset_name(self): assert BDay().freqstr == 'B' @@ -4547,7 +4547,7 @@ def test_get_offset_legacy(): get_offset(name) -class TestParseTimeString(tm.TestCase): +class TestParseTimeString(object): def test_parse_time_string(self): (date, parsed, reso) = parse_time_string('4Q1984') @@ -4610,7 +4610,7 @@ def test_quarterly_dont_normalize(): assert (result.time() == date.time()) -class TestOffsetAliases(tm.TestCase): +class TestOffsetAliases(object): def setup_method(self, method): _offset_map.clear() @@ -4691,7 +4691,7 @@ def get_all_subclasses(cls): return ret -class TestCaching(tm.TestCase): +class TestCaching(object): # as of GH 6479 (in 0.14.0), offset caching is turned off # as of v0.12.0 only BusinessMonth/Quarter were actually caching @@ -4746,7 +4746,7 @@ def test_week_of_month_index_creation(self): assert inst2 not in _daterange_cache -class TestReprNames(tm.TestCase): +class TestReprNames(object): def test_str_for_named_is_name(self): # look at all the amazing combinations! @@ -4771,7 +4771,7 @@ def get_utc_offset_hours(ts): return (o.days * 24 * 3600 + o.seconds) / 3600.0 -class TestDST(tm.TestCase): +class TestDST(object): """ test DateOffset additions over Daylight Savings Time """ diff --git a/pandas/tests/tseries/test_timezones.py b/pandas/tests/tseries/test_timezones.py index 74220aa5cd183..97c54922d36e9 100644 --- a/pandas/tests/tseries/test_timezones.py +++ b/pandas/tests/tseries/test_timezones.py @@ -50,7 +50,7 @@ def dst(self, dt): fixed_off_no_name = FixedOffset(-330, None) -class TestTimeZoneSupportPytz(tm.TestCase): +class TestTimeZoneSupportPytz(object): def setup_method(self, method): tm._skip_if_no_pytz() @@ -1178,7 +1178,7 @@ def test_tz_convert_tzlocal(self): tm.assert_numpy_array_equal(dti2.asi8, dti.asi8) -class TestTimeZoneCacheKey(tm.TestCase): +class TestTimeZoneCacheKey(object): def test_cache_keys_are_distinct_for_pytz_vs_dateutil(self): tzs = pytz.common_timezones @@ -1194,7 +1194,7 @@ def test_cache_keys_are_distinct_for_pytz_vs_dateutil(self): assert tslib._p_tz_cache_key(tz_p) != tslib._p_tz_cache_key(tz_d) -class TestTimeZones(tm.TestCase): +class TestTimeZones(object): timezones = ['UTC', 'Asia/Tokyo', 'US/Eastern', 'dateutil/US/Pacific'] def setup_method(self, method): @@ -1719,7 +1719,7 @@ def test_nat(self): tm.assert_index_equal(idx, DatetimeIndex(expected, tz='US/Eastern')) -class TestTslib(tm.TestCase): +class TestTslib(object): def test_tslib_tz_convert(self): def compare_utc_to_local(tz_didx, utc_didx): diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 354e11ce0133a..0d70d51032b3d 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -85,20 +85,6 @@ def reset_testing_mode(): set_testing_mode() -class TestCase(object): - """ - Base class for all test case classes. - """ - - @classmethod - def setup_class(cls): - pd.set_option('chained_assignment', 'raise') - - @classmethod - def teardown_class(cls): - pass - - def reset_display_options(): """ Reset the display options for printing and representing objects. From e99f56e43ab65710a34440067efe372d5f74280b Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Thu, 4 May 2017 10:27:36 -0400 Subject: [PATCH 508/933] CLN: make submodules of pandas.util private (#16223) * CLN: make submodules of pandas.util private xref #13634 CLN: move pandas.util.* validators, depr_module, decorators, print_versions to _ leading * CLN: move remaning extensions to _libs * pandas.tools.hashing FutureWarning -> DeprecationWarning --- asv_bench/benchmarks/algorithms.py | 2 +- asv_bench/benchmarks/attrs_caching.py | 6 ++- doc/source/merging.rst | 2 +- doc/source/whatsnew/v0.20.0.txt | 21 +++++---- pandas/__init__.py | 6 +-- pandas/{util => _libs}/hashing.pyx | 0 pandas/{io => _libs}/parsers.pyx | 0 pandas/{core/sparse => _libs}/sparse.pyx | 0 .../sparse => _libs}/sparse_op_helper.pxi.in | 0 pandas/_libs/src/ujson/python/ujson.c | 6 +-- pandas/{util => _libs}/testing.pyx | 0 pandas/{core => _libs}/window.pyx | 0 pandas/compat/numpy/function.py | 4 +- pandas/compat/pickle_compat.py | 2 +- pandas/core/api.py | 2 +- pandas/core/base.py | 6 +-- pandas/core/categorical.py | 8 ++-- pandas/core/computation/eval.py | 2 +- pandas/core/frame.py | 4 +- pandas/core/generic.py | 6 +-- pandas/core/groupby.py | 6 +-- pandas/core/indexes/base.py | 4 +- pandas/core/indexes/category.py | 2 +- pandas/core/indexes/datetimelike.py | 2 +- pandas/core/indexes/datetimes.py | 4 +- pandas/core/indexes/interval.py | 2 +- pandas/core/indexes/multi.py | 8 ++-- pandas/core/indexes/numeric.py | 2 +- pandas/core/indexes/period.py | 4 +- pandas/core/indexes/range.py | 2 +- pandas/core/indexes/timedeltas.py | 2 +- pandas/core/internals.py | 4 +- pandas/core/ops.py | 2 +- pandas/core/panel.py | 2 +- pandas/core/resample.py | 2 +- pandas/core/reshape/merge.py | 2 +- pandas/core/reshape/reshape.py | 4 +- pandas/core/series.py | 6 +-- pandas/core/sparse/array.py | 6 +-- pandas/core/sparse/frame.py | 4 +- pandas/core/sparse/list.py | 4 +- pandas/core/sparse/series.py | 6 +-- pandas/core/strings.py | 2 +- .../importing.py => core/util/__init__.py} | 0 pandas/{ => core}/util/hashing.py | 18 +++++--- pandas/core/window.py | 6 +-- pandas/io/api.py | 2 +- pandas/{util => io}/clipboard/__init__.py | 0 pandas/io/{ => clipboard}/clipboard.py | 4 +- pandas/{util => io}/clipboard/clipboards.py | 0 pandas/{util => io}/clipboard/exceptions.py | 0 pandas/{util => io}/clipboard/windows.py | 0 pandas/io/excel.py | 8 ++-- pandas/io/formats/console.py | 2 +- pandas/io/formats/format.py | 2 +- pandas/io/formats/style.py | 2 +- pandas/{util => io/formats}/terminal.py | 0 pandas/io/json/json.py | 6 +-- pandas/io/parsers.py | 8 ++-- pandas/io/sas/sas7bdat.py | 2 +- pandas/io/sas/sas_xport.py | 2 +- pandas/io/stata.py | 2 +- pandas/json.py | 2 +- pandas/parser.py | 2 +- pandas/plotting/_core.py | 4 +- pandas/plotting/_misc.py | 2 +- pandas/stats/moments.py | 2 +- pandas/tests/dtypes/test_io.py | 2 +- pandas/tests/frame/common.py | 2 +- pandas/tests/io/formats/test_format.py | 2 +- pandas/tests/io/json/test_ujson.py | 2 +- pandas/tests/io/parser/test_textreader.py | 4 +- pandas/tests/io/test_clipboard.py | 6 +-- pandas/tests/io/test_html.py | 2 +- pandas/tests/plotting/common.py | 2 +- pandas/tests/series/common.py | 2 +- pandas/tests/sparse/test_array.py | 2 +- pandas/tests/sparse/test_frame.py | 2 +- pandas/tests/sparse/test_libsparse.py | 2 +- pandas/tests/sparse/test_series.py | 2 +- pandas/tests/util/__init__.py | 0 .../tests/{reshape => util}/test_hashing.py | 18 +++++++- pandas/tests/{ => util}/test_testing.py | 0 pandas/tests/{ => util}/test_util.py | 8 ++-- pandas/tools/hashing.py | 18 ++++++++ pandas/tseries/frequencies.py | 2 +- pandas/util/__init__.py | 2 + pandas/util/{decorators.py => _decorators.py} | 0 .../util/{depr_module.py => _depr_module.py} | 0 pandas/util/{doctools.py => _doctools.py} | 0 .../{print_versions.py => _print_versions.py} | 0 pandas/util/{validators.py => _validators.py} | 0 pandas/util/testing.py | 26 +++++------ setup.py | 46 ++++++++++--------- 94 files changed, 217 insertions(+), 170 deletions(-) rename pandas/{util => _libs}/hashing.pyx (100%) rename pandas/{io => _libs}/parsers.pyx (100%) rename pandas/{core/sparse => _libs}/sparse.pyx (100%) rename pandas/{core/sparse => _libs}/sparse_op_helper.pxi.in (100%) rename pandas/{util => _libs}/testing.pyx (100%) rename pandas/{core => _libs}/window.pyx (100%) rename pandas/{util/importing.py => core/util/__init__.py} (100%) rename pandas/{ => core}/util/hashing.py (94%) rename pandas/{util => io}/clipboard/__init__.py (100%) rename pandas/io/{ => clipboard}/clipboard.py (97%) rename pandas/{util => io}/clipboard/clipboards.py (100%) rename pandas/{util => io}/clipboard/exceptions.py (100%) rename pandas/{util => io}/clipboard/windows.py (100%) rename pandas/{util => io/formats}/terminal.py (100%) create mode 100644 pandas/tests/util/__init__.py rename pandas/tests/{reshape => util}/test_hashing.py (94%) rename pandas/tests/{ => util}/test_testing.py (100%) rename pandas/tests/{ => util}/test_util.py (98%) create mode 100644 pandas/tools/hashing.py rename pandas/util/{decorators.py => _decorators.py} (100%) rename pandas/util/{depr_module.py => _depr_module.py} (100%) rename pandas/util/{doctools.py => _doctools.py} (100%) rename pandas/util/{print_versions.py => _print_versions.py} (100%) rename pandas/util/{validators.py => _validators.py} (100%) diff --git a/asv_bench/benchmarks/algorithms.py b/asv_bench/benchmarks/algorithms.py index d79051ed2d66c..40cfec1bcd4c7 100644 --- a/asv_bench/benchmarks/algorithms.py +++ b/asv_bench/benchmarks/algorithms.py @@ -5,7 +5,7 @@ import pandas as pd from pandas.util import testing as tm -for imp in ['pandas.util.hashing', 'pandas.tools.hashing']: +for imp in ['pandas.util', 'pandas.tools.hashing']: try: hashing = import_module(imp) break diff --git a/asv_bench/benchmarks/attrs_caching.py b/asv_bench/benchmarks/attrs_caching.py index 9210f1f2878d4..b7610037bed4d 100644 --- a/asv_bench/benchmarks/attrs_caching.py +++ b/asv_bench/benchmarks/attrs_caching.py @@ -1,5 +1,9 @@ from .pandas_vb_common import * -from pandas.util.decorators import cache_readonly + +try: + from pandas.util import cache_readonly +except ImportError: + from pandas.util.decorators import cache_readonly class DataFrameAttributes(object): diff --git a/doc/source/merging.rst b/doc/source/merging.rst index fb020727d077e..170dde87c8363 100644 --- a/doc/source/merging.rst +++ b/doc/source/merging.rst @@ -13,7 +13,7 @@ import matplotlib.pyplot as plt plt.close('all') - import pandas.util.doctools as doctools + import pandas.util._doctools as doctools p = doctools.TablePlotter() diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 230c7c0b90ac0..bfd8031b4c305 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -1230,19 +1230,19 @@ If indicated, a deprecation warning will be issued if you reference theses modul "pandas.algos", "pandas._libs.algos", "" "pandas.hashtable", "pandas._libs.hashtable", "" "pandas.indexes", "pandas.core.indexes", "" - "pandas.json", "pandas.io.json.libjson", "X" - "pandas.parser", "pandas.io.libparsers", "X" + "pandas.json", "pandas._libs.json", "X" + "pandas.parser", "pandas._libs.parsers", "X" "pandas.formats", "pandas.io.formats", "" "pandas.sparse", "pandas.core.sparse", "" "pandas.tools", "pandas.core.reshape", "" "pandas.types", "pandas.core.dtypes", "" - "pandas.io.sas.saslib", "pandas.io.sas.libsas", "" + "pandas.io.sas.saslib", "pandas.io.sas._sas", "" "pandas._join", "pandas._libs.join", "" - "pandas._hash", "pandas.util.libhashing", "" + "pandas._hash", "pandas._libs.hashing", "" "pandas._period", "pandas._libs.period", "" - "pandas._sparse", "pandas.core.sparse.libsparse", "" - "pandas._testing", "pandas.util.libtesting", "" - "pandas._window", "pandas.core.libwindow", "" + "pandas._sparse", "pandas._libs.sparse", "" + "pandas._testing", "pandas._libs.testing", "" + "pandas._window", "pandas._libs.window", "" Some new subpackages are created with public functionality that is not directly @@ -1254,6 +1254,8 @@ these are now the public subpackages. - The function :func:`~pandas.api.types.union_categoricals` is now importable from ``pandas.api.types``, formerly from ``pandas.types.concat`` (:issue:`15998`) - The type import ``pandas.tslib.NaTType`` is deprecated and can be replaced by using ``type(pandas.NaT)`` (:issue:`16146`) +- The public functions in ``pandas.tools.hashing`` deprecated from that locations, but are now importable from ``pandas.util`` (:issue:`16223`) +- The modules in ``pandas.util``: ``decorators``, ``print_versions``, ``doctools``, `validators``, ``depr_module`` are now private (:issue:`16223`) .. _whatsnew_0200.privacy.errors: @@ -1278,7 +1280,7 @@ The following are now part of this API: 'UnsupportedFunctionCall'] -.. _whatsnew_0200.privay.testing: +.. _whatsnew_0200.privacy.testing: ``pandas.testing`` ^^^^^^^^^^^^^^^^^^ @@ -1292,14 +1294,13 @@ The following testing functions are now part of this API: - :func:`testing.assert_index_equal` -.. _whatsnew_0200.privay.plotting: +.. _whatsnew_0200.privacy.plotting: ``pandas.plotting`` ^^^^^^^^^^^^^^^^^^^ A new public ``pandas.plotting`` module has been added that holds plotting functionality that was previously in either ``pandas.tools.plotting`` or in the top-level namespace. See the :ref:`deprecations sections ` for more details. - .. _whatsnew_0200.privacy.development: Other Development Changes diff --git a/pandas/__init__.py b/pandas/__init__.py index 20c7e0d9d5993..48ac9d173559d 100644 --- a/pandas/__init__.py +++ b/pandas/__init__.py @@ -50,17 +50,17 @@ import pandas.tools.plotting plot_params = pandas.plotting._style._Options(deprecated=True) # do not import deprecate to top namespace -scatter_matrix = pandas.util.decorators.deprecate( +scatter_matrix = pandas.util._decorators.deprecate( 'pandas.scatter_matrix', pandas.plotting.scatter_matrix, 'pandas.plotting.scatter_matrix') -from pandas.util.print_versions import show_versions +from pandas.util._print_versions import show_versions from pandas.io.api import * from pandas.util._tester import test import pandas.testing # extension module deprecations -from pandas.util.depr_module import _DeprecatedModule +from pandas.util._depr_module import _DeprecatedModule json = _DeprecatedModule(deprmod='pandas.json', moved={'dumps': 'pandas.io.json.dumps', diff --git a/pandas/util/hashing.pyx b/pandas/_libs/hashing.pyx similarity index 100% rename from pandas/util/hashing.pyx rename to pandas/_libs/hashing.pyx diff --git a/pandas/io/parsers.pyx b/pandas/_libs/parsers.pyx similarity index 100% rename from pandas/io/parsers.pyx rename to pandas/_libs/parsers.pyx diff --git a/pandas/core/sparse/sparse.pyx b/pandas/_libs/sparse.pyx similarity index 100% rename from pandas/core/sparse/sparse.pyx rename to pandas/_libs/sparse.pyx diff --git a/pandas/core/sparse/sparse_op_helper.pxi.in b/pandas/_libs/sparse_op_helper.pxi.in similarity index 100% rename from pandas/core/sparse/sparse_op_helper.pxi.in rename to pandas/_libs/sparse_op_helper.pxi.in diff --git a/pandas/_libs/src/ujson/python/ujson.c b/pandas/_libs/src/ujson/python/ujson.c index ec6720f16bc77..a0c2146c30eed 100644 --- a/pandas/_libs/src/ujson/python/ujson.c +++ b/pandas/_libs/src/ujson/python/ujson.c @@ -90,14 +90,14 @@ static struct PyModuleDef moduledef = { NULL /* m_free */ }; -#define PYMODINITFUNC PyMODINIT_FUNC PyInit_libjson(void) +#define PYMODINITFUNC PyMODINIT_FUNC PyInit_json(void) #define PYMODULE_CREATE() PyModule_Create(&moduledef) #define MODINITERROR return NULL #else -#define PYMODINITFUNC PyMODINIT_FUNC initlibjson(void) -#define PYMODULE_CREATE() Py_InitModule("libjson", ujsonMethods) +#define PYMODINITFUNC PyMODINIT_FUNC initjson(void) +#define PYMODULE_CREATE() Py_InitModule("json", ujsonMethods) #define MODINITERROR return #endif diff --git a/pandas/util/testing.pyx b/pandas/_libs/testing.pyx similarity index 100% rename from pandas/util/testing.pyx rename to pandas/_libs/testing.pyx diff --git a/pandas/core/window.pyx b/pandas/_libs/window.pyx similarity index 100% rename from pandas/core/window.pyx rename to pandas/_libs/window.pyx diff --git a/pandas/compat/numpy/function.py b/pandas/compat/numpy/function.py index d707ac66c4eab..a324bf94171ce 100644 --- a/pandas/compat/numpy/function.py +++ b/pandas/compat/numpy/function.py @@ -19,8 +19,8 @@ """ from numpy import ndarray -from pandas.util.validators import (validate_args, validate_kwargs, - validate_args_and_kwargs) +from pandas.util._validators import (validate_args, validate_kwargs, + validate_args_and_kwargs) from pandas.errors import UnsupportedFunctionCall from pandas.core.dtypes.common import is_integer, is_bool from pandas.compat import OrderedDict diff --git a/pandas/compat/pickle_compat.py b/pandas/compat/pickle_compat.py index 6df365a1cd898..b875bbb0d63c0 100644 --- a/pandas/compat/pickle_compat.py +++ b/pandas/compat/pickle_compat.py @@ -71,7 +71,7 @@ def load_reduce(self): # 12588, extensions moving ('pandas._sparse', 'BlockIndex'): - ('pandas.core.sparse.libsparse', 'BlockIndex'), + ('pandas._libs.sparse', 'BlockIndex'), ('pandas.tslib', 'Timestamp'): ('pandas._libs.tslib', 'Timestamp'), ('pandas.tslib', '__nat_unpickle'): diff --git a/pandas/core/api.py b/pandas/core/api.py index 3e84720c32a1c..265fb4004d997 100644 --- a/pandas/core/api.py +++ b/pandas/core/api.py @@ -35,7 +35,7 @@ from pandas.core.resample import TimeGrouper # see gh-14094. -from pandas.util.depr_module import _DeprecatedModule +from pandas.util._depr_module import _DeprecatedModule _removals = ['day', 'bday', 'businessDay', 'cday', 'customBusinessDay', 'customBusinessMonthEnd', 'customBusinessMonthBegin', diff --git a/pandas/core/base.py b/pandas/core/base.py index fd0846b0ad33c..a3ef24c80f883 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -9,14 +9,14 @@ from pandas.core.dtypes.missing import isnull from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries, ABCIndexClass from pandas.core.dtypes.common import is_object_dtype, is_list_like, is_scalar -from pandas.util.validators import validate_bool_kwarg +from pandas.util._validators import validate_bool_kwarg from pandas.core import common as com import pandas.core.nanops as nanops import pandas._libs.lib as lib from pandas.compat.numpy import function as nv -from pandas.util.decorators import (Appender, cache_readonly, - deprecate_kwarg, Substitution) +from pandas.util._decorators import (Appender, cache_readonly, + deprecate_kwarg, Substitution) from pandas.core.common import AbstractMethodError _shared_docs = dict() diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index a3667e9322959..7eb86232cbb07 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -34,11 +34,11 @@ import pandas.core.common as com from pandas.core.missing import interpolate_2d from pandas.compat.numpy import function as nv -from pandas.util.decorators import (Appender, cache_readonly, - deprecate_kwarg, Substitution) +from pandas.util._decorators import (Appender, cache_readonly, + deprecate_kwarg, Substitution) -from pandas.util.terminal import get_terminal_size -from pandas.util.validators import validate_bool_kwarg +from pandas.io.formats.terminal import get_terminal_size +from pandas.util._validators import validate_bool_kwarg from pandas.core.config import get_option diff --git a/pandas/core/computation/eval.py b/pandas/core/computation/eval.py index 15e13025a7c53..22e376306280a 100644 --- a/pandas/core/computation/eval.py +++ b/pandas/core/computation/eval.py @@ -11,7 +11,7 @@ from pandas.core.computation.scope import _ensure_scope from pandas.compat import string_types from pandas.core.computation.engines import _engines -from pandas.util.validators import validate_bool_kwarg +from pandas.util._validators import validate_bool_kwarg def _check_engine(engine): diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 67966374fcf9a..e6ea58e7e05be 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -80,8 +80,8 @@ OrderedDict, raise_with_traceback) from pandas import compat from pandas.compat.numpy import function as nv -from pandas.util.decorators import Appender, Substitution -from pandas.util.validators import validate_bool_kwarg +from pandas.util._decorators import Appender, Substitution +from pandas.util._validators import validate_bool_kwarg from pandas.core.indexes.period import PeriodIndex from pandas.core.indexes.datetimes import DatetimeIndex diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 2bc64795b5f20..27a489293db8f 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -51,8 +51,8 @@ from pandas.compat import (map, zip, lzip, lrange, string_types, isidentifier, set_function_name) import pandas.core.nanops as nanops -from pandas.util.decorators import Appender, Substitution, deprecate_kwarg -from pandas.util.validators import validate_bool_kwarg +from pandas.util._decorators import Appender, Substitution, deprecate_kwarg +from pandas.util._validators import validate_bool_kwarg from pandas.core import config # goal is to be able to define the docs close to function, while still being @@ -1382,7 +1382,7 @@ def to_clipboard(self, excel=None, sep=None, **kwargs): - Windows: none - OS X: none """ - from pandas.io import clipboard + from pandas.io.clipboard import clipboard clipboard.to_clipboard(self, excel=excel, sep=sep, **kwargs) def to_xarray(self): diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 479d2f7d26eb6..91b55c414b507 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -54,10 +54,10 @@ from pandas.core.sorting import (get_group_index_sorter, get_group_index, compress_group_index, get_flattened_iterator, decons_obs_group_ids, get_indexer_dict) -from pandas.util.decorators import (cache_readonly, Substitution, - Appender, make_signature) +from pandas.util._decorators import (cache_readonly, Substitution, + Appender, make_signature) from pandas.io.formats.printing import pprint_thing -from pandas.util.validators import validate_kwargs +from pandas.util._validators import validate_kwargs import pandas.core.algorithms as algorithms import pandas.core.common as com diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 4345c74664bf5..82f3bf3b15462 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -41,8 +41,8 @@ from pandas.core.base import PandasObject, IndexOpsMixin import pandas.core.base as base -from pandas.util.decorators import (Appender, Substitution, cache_readonly, - deprecate, deprecate_kwarg) +from pandas.util._decorators import (Appender, Substitution, cache_readonly, + deprecate, deprecate_kwarg) from pandas.core.indexes.frozen import FrozenList import pandas.core.common as com import pandas.core.dtypes.concat as _concat diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 760db4ba20675..395513d7b9b81 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -16,7 +16,7 @@ from pandas.core.algorithms import take_1d -from pandas.util.decorators import Appender, cache_readonly +from pandas.util._decorators import Appender, cache_readonly from pandas.core.config import get_option from pandas.core.indexes.base import Index, _index_shared_docs import pandas.core.base as base diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 387209ceb038f..cd8559bcca03c 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -28,7 +28,7 @@ from pandas._libs.period import Period from pandas.core.indexes.base import Index, _index_shared_docs -from pandas.util.decorators import Appender, cache_readonly +from pandas.util._decorators import Appender, cache_readonly import pandas.core.dtypes.concat as _concat import pandas.tseries.frequencies as frequencies diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index b0264759f2f8d..ec678b1577d81 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -41,8 +41,8 @@ from pandas.core.tools.datetimes import ( parse_time_string, normalize_date, to_time) from pandas.core.tools.timedeltas import to_timedelta -from pandas.util.decorators import (Appender, cache_readonly, - deprecate_kwarg, Substitution) +from pandas.util._decorators import (Appender, cache_readonly, + deprecate_kwarg, Substitution) import pandas.core.common as com import pandas.tseries.offsets as offsets import pandas.core.tools.datetimes as tools diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index ccd0d8bee4abc..039346cba56c8 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -28,7 +28,7 @@ from pandas.core.indexes.multi import MultiIndex from pandas.compat.numpy import function as nv from pandas.core import common as com -from pandas.util.decorators import cache_readonly, Appender +from pandas.util._decorators import cache_readonly, Appender from pandas.core.config import get_option import pandas.core.indexes.base as ibase diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index c760d2943b823..7ef037d8f3536 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -26,8 +26,8 @@ is_null_slice) import pandas.core.base as base -from pandas.util.decorators import (Appender, cache_readonly, - deprecate, deprecate_kwarg) +from pandas.util._decorators import (Appender, cache_readonly, + deprecate, deprecate_kwarg) import pandas.core.common as com import pandas.core.missing as missing import pandas.core.algorithms as algos @@ -718,7 +718,7 @@ def _inferred_type_levels(self): @cache_readonly def _hashed_values(self): """ return a uint64 ndarray of my hashed values """ - from pandas.util.hashing import hash_tuples + from pandas.core.util.hashing import hash_tuples return hash_tuples(self) def _hashed_indexing_key(self, key): @@ -740,7 +740,7 @@ def _hashed_indexing_key(self, key): we need to stringify if we have mixed levels """ - from pandas.util.hashing import hash_tuples + from pandas.core.util.hashing import hash_tuples if not isinstance(key, tuple): return hash_tuples(key) diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index 21ba2a386d96a..bdae0ac7ac5e9 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -11,7 +11,7 @@ from pandas.core import algorithms from pandas.core.indexes.base import ( Index, InvalidIndexError, _index_shared_docs) -from pandas.util.decorators import Appender, cache_readonly +from pandas.util._decorators import Appender, cache_readonly import pandas.core.indexes.base as ibase diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 378661a49e20d..15fd9b7dc2b6a 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -40,8 +40,8 @@ from pandas.core.indexes.base import _index_shared_docs, _ensure_index from pandas import compat -from pandas.util.decorators import (Appender, Substitution, cache_readonly, - deprecate_kwarg) +from pandas.util._decorators import (Appender, Substitution, cache_readonly, + deprecate_kwarg) from pandas.compat import zip, u import pandas.core.indexes.base as ibase diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index acd040693af2e..b7a8e0b54a128 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -13,7 +13,7 @@ from pandas.compat import lrange, range from pandas.compat.numpy import function as nv from pandas.core.indexes.base import Index, _index_shared_docs -from pandas.util.decorators import Appender, cache_readonly +from pandas.util._decorators import Appender, cache_readonly import pandas.core.indexes.base as ibase from pandas.core.indexes.numeric import Int64Index diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 1081787b2c0b0..ab94a5bffb4f9 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -27,7 +27,7 @@ from pandas.core.indexes.base import _index_shared_docs import pandas.core.common as com import pandas.core.dtypes.concat as _concat -from pandas.util.decorators import Appender, Substitution, deprecate_kwarg +from pandas.util._decorators import Appender, Substitution, deprecate_kwarg from pandas.core.indexes.datetimelike import TimelikeOps, DatetimeIndexOpsMixin from pandas.core.tools.timedeltas import ( to_timedelta, _coerce_scalar_to_timedelta_type) diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 840206977cf30..15851a17274ca 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -64,8 +64,8 @@ from pandas._libs.lib import BlockPlacement import pandas.core.computation.expressions as expressions -from pandas.util.decorators import cache_readonly -from pandas.util.validators import validate_bool_kwarg +from pandas.util._decorators import cache_readonly +from pandas.util._validators import validate_bool_kwarg from pandas import compat, _np_version_under1p9 from pandas.compat import range, map, zip, u diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 41a17a0957cbf..e7cfbdb0fc9c6 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -15,7 +15,7 @@ tslib as libts, algos as libalgos, iNaT) from pandas import compat -from pandas.util.decorators import Appender +from pandas.util._decorators import Appender import pandas.core.computation.expressions as expressions from pandas.compat import bind_method diff --git a/pandas/core/panel.py b/pandas/core/panel.py index 39d2ebdeec3ac..d1f5b4587059c 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -34,7 +34,7 @@ from pandas.core.ops import _op_descriptions from pandas.core.series import Series from pandas.core.reshape.util import cartesian_product -from pandas.util.decorators import (deprecate, Appender) +from pandas.util._decorators import (deprecate, Appender) _shared_doc_kwargs = dict( axes='items, major_axis, minor_axis', diff --git a/pandas/core/resample.py b/pandas/core/resample.py index cbb2f6a93c2fd..631b91c3aad11 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -25,7 +25,7 @@ from pandas._libs.lib import Timestamp from pandas._libs.period import IncompatibleFrequency -from pandas.util.decorators import Appender +from pandas.util._decorators import Appender from pandas.core.generic import _shared_docs _shared_docs_kwargs = dict() diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 1ca3786ecc174..c55f4b5bf935f 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -34,7 +34,7 @@ from pandas.core.dtypes.missing import na_value_for_dtype from pandas.core.internals import (items_overlap_with_suffix, concatenate_block_managers) -from pandas.util.decorators import Appender, Substitution +from pandas.util._decorators import Appender, Substitution from pandas.core.sorting import is_int64_overflow_possible import pandas.core.algorithms as algos diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index a3cf80d758b7b..779002b300cc7 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -20,7 +20,7 @@ from pandas.core.sparse.api import SparseDataFrame, SparseSeries from pandas.core.sparse.array import SparseArray -from pandas.core.sparse.libsparse import IntIndex +from pandas._libs.sparse import IntIndex from pandas.core.categorical import Categorical, _factorize_from_iterable from pandas.core.sorting import (get_group_index, get_compressed_ids, @@ -30,7 +30,7 @@ from pandas._libs import algos as _algos, reshape as _reshape from pandas.core.frame import _shared_docs -from pandas.util.decorators import Appender +from pandas.util._decorators import Appender from pandas.core.index import MultiIndex, _get_na_value diff --git a/pandas/core/series.py b/pandas/core/series.py index e5f1d91eedfec..6ec163bbaa73d 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -60,7 +60,7 @@ from pandas.core.indexes.timedeltas import TimedeltaIndex from pandas.core.indexes.period import PeriodIndex from pandas import compat -from pandas.util.terminal import get_terminal_size +from pandas.io.formats.terminal import get_terminal_size from pandas.compat import zip, u, OrderedDict, StringIO from pandas.compat.numpy import function as nv @@ -70,8 +70,8 @@ import pandas.core.common as com import pandas.core.nanops as nanops import pandas.io.formats.format as fmt -from pandas.util.decorators import Appender, deprecate_kwarg, Substitution -from pandas.util.validators import validate_bool_kwarg +from pandas.util._decorators import Appender, deprecate_kwarg, Substitution +from pandas.util._validators import validate_bool_kwarg from pandas._libs import index as libindex, tslib as libts, lib, iNaT from pandas.core.config import get_option diff --git a/pandas/core/sparse/array.py b/pandas/core/sparse/array.py index ef3600266c037..8ac9d3916573e 100644 --- a/pandas/core/sparse/array.py +++ b/pandas/core/sparse/array.py @@ -29,13 +29,13 @@ astype_nansafe, find_common_type) from pandas.core.dtypes.missing import isnull, notnull, na_value_for_dtype -from pandas.core.sparse import libsparse as splib -from pandas.core.sparse.libsparse import SparseIndex, BlockIndex, IntIndex +import pandas._libs.sparse as splib +from pandas._libs.sparse import SparseIndex, BlockIndex, IntIndex from pandas._libs import index as libindex import pandas.core.algorithms as algos import pandas.core.ops as ops import pandas.io.formats.printing as printing -from pandas.util.decorators import Appender +from pandas.util._decorators import Appender from pandas.core.indexes.base import _index_shared_docs diff --git a/pandas/core/sparse/frame.py b/pandas/core/sparse/frame.py index 05c97fac4b53a..3c8f6e8c6257d 100644 --- a/pandas/core/sparse/frame.py +++ b/pandas/core/sparse/frame.py @@ -25,8 +25,8 @@ create_block_manager_from_arrays) import pandas.core.generic as generic from pandas.core.sparse.series import SparseSeries, SparseArray -from pandas.core.sparse.libsparse import BlockIndex, get_blocks -from pandas.util.decorators import Appender +from pandas._libs.sparse import BlockIndex, get_blocks +from pandas.util._decorators import Appender import pandas.core.ops as ops diff --git a/pandas/core/sparse/list.py b/pandas/core/sparse/list.py index e69ad6d0ab7ad..e2a8c6a29cc23 100644 --- a/pandas/core/sparse/list.py +++ b/pandas/core/sparse/list.py @@ -5,8 +5,8 @@ from pandas.core.dtypes.common import is_scalar from pandas.core.sparse.array import SparseArray -from pandas.util.validators import validate_bool_kwarg -from pandas.core.sparse import libsparse as splib +from pandas.util._validators import validate_bool_kwarg +import pandas._libs.sparse as splib class SparseList(PandasObject): diff --git a/pandas/core/sparse/series.py b/pandas/core/sparse/series.py index a77bce8f06783..9dd061e26ba06 100644 --- a/pandas/core/sparse/series.py +++ b/pandas/core/sparse/series.py @@ -21,13 +21,13 @@ import pandas.core.common as com import pandas.core.ops as ops import pandas._libs.index as _index -from pandas.util.decorators import Appender +from pandas.util._decorators import Appender from pandas.core.sparse.array import ( make_sparse, _sparse_array_op, SparseArray, _make_index) -from pandas.core.sparse.libsparse import BlockIndex, IntIndex -import pandas.core.sparse.libsparse as splib +from pandas._libs.sparse import BlockIndex, IntIndex +import pandas._libs.sparse as splib from pandas.core.sparse.scipy_sparse import ( _sparse_series_to_coo, diff --git a/pandas/core/strings.py b/pandas/core/strings.py index 5082ac7f80fbf..c57d7a9362490 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -17,7 +17,7 @@ from pandas.core.algorithms import take_1d import pandas.compat as compat from pandas.core.base import AccessorProperty, NoNewAttributesMixin -from pandas.util.decorators import Appender +from pandas.util._decorators import Appender import re import pandas._libs.lib as lib import warnings diff --git a/pandas/util/importing.py b/pandas/core/util/__init__.py similarity index 100% rename from pandas/util/importing.py rename to pandas/core/util/__init__.py diff --git a/pandas/util/hashing.py b/pandas/core/util/hashing.py similarity index 94% rename from pandas/util/hashing.py rename to pandas/core/util/hashing.py index 3046c62a03f48..6a5343e8a8e25 100644 --- a/pandas/util/hashing.py +++ b/pandas/core/util/hashing.py @@ -4,10 +4,10 @@ import itertools import numpy as np -from pandas import Series, factorize, Categorical, Index, MultiIndex -from pandas.util import libhashing as _hash +from pandas._libs import hashing from pandas._libs.lib import is_bool_array from pandas.core.dtypes.generic import ( + ABCMultiIndex, ABCIndexClass, ABCSeries, ABCDataFrame) @@ -73,10 +73,11 @@ def hash_pandas_object(obj, index=True, encoding='utf8', hash_key=None, Series of uint64, same length as the object """ + from pandas import Series if hash_key is None: hash_key = _default_hash_key - if isinstance(obj, MultiIndex): + if isinstance(obj, ABCMultiIndex): return Series(hash_tuples(obj, encoding, hash_key), dtype='uint64', copy=False) @@ -143,7 +144,9 @@ def hash_tuples(vals, encoding='utf8', hash_key=None): elif not is_list_like(vals): raise TypeError("must be convertible to a list-of-tuples") - if not isinstance(vals, MultiIndex): + from pandas import Categorical, MultiIndex + + if not isinstance(vals, ABCMultiIndex): vals = MultiIndex.from_tuples(vals) # create a list-of-Categoricals @@ -257,17 +260,18 @@ def hash_array(vals, encoding='utf8', hash_key=None, categorize=True): # then hash and rename categories. We allow skipping the categorization # when the values are known/likely to be unique. if categorize: + from pandas import factorize, Categorical, Index codes, categories = factorize(vals, sort=False) cat = Categorical(codes, Index(categories), ordered=False, fastpath=True) return _hash_categorical(cat, encoding, hash_key) try: - vals = _hash.hash_object_array(vals, hash_key, encoding) + vals = hashing.hash_object_array(vals, hash_key, encoding) except TypeError: # we have mixed types - vals = _hash.hash_object_array(vals.astype(str).astype(object), - hash_key, encoding) + vals = hashing.hash_object_array(vals.astype(str).astype(object), + hash_key, encoding) # Then, redistribute these 64-bit ints within the space of 64-bit ints vals ^= vals >> 30 diff --git a/pandas/core/window.py b/pandas/core/window.py index 6d8f12e982f12..df8e0c05009f4 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -33,12 +33,12 @@ from pandas.core.base import (PandasObject, SelectionMixin, GroupByMixin) import pandas.core.common as com -import pandas.core.libwindow as _window +import pandas._libs.window as _window from pandas.tseries.offsets import DateOffset from pandas import compat from pandas.compat.numpy import function as nv -from pandas.util.decorators import (Substitution, Appender, - cache_readonly) +from pandas.util._decorators import (Substitution, Appender, + cache_readonly) from pandas.core.generic import _shared_docs from textwrap import dedent diff --git a/pandas/io/api.py b/pandas/io/api.py index e312e7bc2f300..7f0d3c3631f63 100644 --- a/pandas/io/api.py +++ b/pandas/io/api.py @@ -5,7 +5,7 @@ # flake8: noqa from pandas.io.parsers import read_csv, read_table, read_fwf -from pandas.io.clipboard import read_clipboard +from pandas.io.clipboard.clipboard import read_clipboard from pandas.io.excel import ExcelFile, ExcelWriter, read_excel from pandas.io.pytables import HDFStore, get_store, read_hdf from pandas.io.json import read_json diff --git a/pandas/util/clipboard/__init__.py b/pandas/io/clipboard/__init__.py similarity index 100% rename from pandas/util/clipboard/__init__.py rename to pandas/io/clipboard/__init__.py diff --git a/pandas/io/clipboard.py b/pandas/io/clipboard/clipboard.py similarity index 97% rename from pandas/io/clipboard.py rename to pandas/io/clipboard/clipboard.py index 3c7ac528d83fd..6252a02b0d63d 100644 --- a/pandas/io/clipboard.py +++ b/pandas/io/clipboard/clipboard.py @@ -26,7 +26,7 @@ def read_clipboard(sep='\s+', **kwargs): # pragma: no cover raise NotImplementedError( 'reading from clipboard only supports utf-8 encoding') - from pandas.util.clipboard import clipboard_get + from pandas.io.clipboard import clipboard_get from pandas.io.parsers import read_table text = clipboard_get() @@ -92,7 +92,7 @@ def to_clipboard(obj, excel=None, sep=None, **kwargs): # pragma: no cover if encoding is not None and encoding.lower().replace('-', '') != 'utf8': raise ValueError('clipboard only supports utf-8 encoding') - from pandas.util.clipboard import clipboard_set + from pandas.io.clipboard import clipboard_set if excel is None: excel = True diff --git a/pandas/util/clipboard/clipboards.py b/pandas/io/clipboard/clipboards.py similarity index 100% rename from pandas/util/clipboard/clipboards.py rename to pandas/io/clipboard/clipboards.py diff --git a/pandas/util/clipboard/exceptions.py b/pandas/io/clipboard/exceptions.py similarity index 100% rename from pandas/util/clipboard/exceptions.py rename to pandas/io/clipboard/exceptions.py diff --git a/pandas/util/clipboard/windows.py b/pandas/io/clipboard/windows.py similarity index 100% rename from pandas/util/clipboard/windows.py rename to pandas/io/clipboard/windows.py diff --git a/pandas/io/excel.py b/pandas/io/excel.py index fbb10ebdfc56d..9b0f49ccc45b1 100644 --- a/pandas/io/excel.py +++ b/pandas/io/excel.py @@ -20,7 +20,7 @@ from pandas.io.common import (_is_url, _urlopen, _validate_header_arg, get_filepath_or_buffer, _NA_VALUES) from pandas.core.indexes.period import Period -from pandas.io.json import libjson +import pandas._libs.json as json from pandas.compat import (map, zip, reduce, range, lrange, u, add_metaclass, string_types, OrderedDict) from pandas.core import config @@ -29,7 +29,7 @@ import pandas.compat.openpyxl_compat as openpyxl_compat from warnings import warn from distutils.version import LooseVersion -from pandas.util.decorators import Appender +from pandas.util._decorators import Appender from textwrap import fill __all__ = ["read_excel", "ExcelWriter", "ExcelFile"] @@ -1447,7 +1447,7 @@ def write_cells(self, cells, sheet_name=None, startrow=0, startcol=0, elif isinstance(cell.val, date): num_format_str = self.date_format - stylekey = libjson.dumps(cell.style) + stylekey = json.dumps(cell.style) if num_format_str: stylekey += num_format_str @@ -1575,7 +1575,7 @@ def write_cells(self, cells, sheet_name=None, startrow=0, startcol=0, elif isinstance(cell.val, date): num_format_str = self.date_format - stylekey = libjson.dumps(cell.style) + stylekey = json.dumps(cell.style) if num_format_str: stylekey += num_format_str diff --git a/pandas/io/formats/console.py b/pandas/io/formats/console.py index 0e46b0073a53d..ab75e3fa253ce 100644 --- a/pandas/io/formats/console.py +++ b/pandas/io/formats/console.py @@ -4,7 +4,7 @@ import sys import locale -from pandas.util.terminal import get_terminal_size +from pandas.io.formats.terminal import get_terminal_size # ----------------------------------------------------------------------------- # Global formatting options diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 43b0b5fbeee90..65098bb2aa404 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -30,7 +30,7 @@ from pandas import compat from pandas.compat import (StringIO, lzip, range, map, zip, u, OrderedDict, unichr) -from pandas.util.terminal import get_terminal_size +from pandas.io.formats.terminal import get_terminal_size from pandas.core.config import get_option, set_option from pandas.io.common import _get_handle, UnicodeWriter, _expand_user from pandas.io.formats.printing import adjoin, justify, pprint_thing diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 71c61998be092..eac82ddde2318 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -29,7 +29,7 @@ from pandas.core.generic import _shared_docs import pandas.core.common as com from pandas.core.indexing import _maybe_numeric_slice, _non_reducing_slice -from pandas.util.decorators import Appender +from pandas.util._decorators import Appender try: import matplotlib.pyplot as plt from matplotlib import colors diff --git a/pandas/util/terminal.py b/pandas/io/formats/terminal.py similarity index 100% rename from pandas/util/terminal.py rename to pandas/io/formats/terminal.py diff --git a/pandas/io/json/json.py b/pandas/io/json/json.py index 28ea8298cee9e..b2fe074732cbb 100644 --- a/pandas/io/json/json.py +++ b/pandas/io/json/json.py @@ -2,7 +2,7 @@ import os import numpy as np -from pandas.io.json import libjson +import pandas._libs.json as json from pandas._libs.tslib import iNaT from pandas.compat import StringIO, long, u from pandas import compat, isnull @@ -14,8 +14,8 @@ from .table_schema import build_table_schema from pandas.core.dtypes.common import is_period_dtype -loads = libjson.loads -dumps = libjson.dumps +loads = json.loads +dumps = json.dumps TABLE_SCHEMA_VERSION = '0.20.0' diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 95b1394c88ac2..ce8643504932f 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -37,10 +37,10 @@ _NA_VALUES, _infer_compression) from pandas.core.tools import datetimes as tools -from pandas.util.decorators import Appender +from pandas.util._decorators import Appender import pandas._libs.lib as lib -import pandas.io.libparsers as libparsers +import pandas._libs.parsers as parsers # BOM character (byte order mark) @@ -1460,7 +1460,7 @@ def _convert_to_ndarrays(self, dct, na_values, na_fvalues, verbose=False, if issubclass(cvals.dtype.type, np.integer) and self.compact_ints: cvals = lib.downcast_int64( - cvals, libparsers.na_values, + cvals, parsers.na_values, self.use_unsigned) result[c] = cvals @@ -1579,7 +1579,7 @@ def __init__(self, src, **kwds): # #2442 kwds['allow_leading_cols'] = self.index_col is not False - self._reader = libparsers.TextReader(src, **kwds) + self._reader = parsers.TextReader(src, **kwds) # XXX self.usecols, self.usecols_dtype = _validate_usecols_arg( diff --git a/pandas/io/sas/sas7bdat.py b/pandas/io/sas/sas7bdat.py index d33cee2c5a1bc..20b0cf85e95b7 100644 --- a/pandas/io/sas/sas7bdat.py +++ b/pandas/io/sas/sas7bdat.py @@ -20,7 +20,7 @@ import numpy as np import struct import pandas.io.sas.sas_constants as const -from pandas.io.sas.libsas import Parser +from pandas.io.sas._sas import Parser class _subheader_pointer(object): diff --git a/pandas/io/sas/sas_xport.py b/pandas/io/sas/sas_xport.py index 76fc55154bc49..a43a5988a2ade 100644 --- a/pandas/io/sas/sas_xport.py +++ b/pandas/io/sas/sas_xport.py @@ -14,7 +14,7 @@ from pandas import compat import struct import numpy as np -from pandas.util.decorators import Appender +from pandas.util._decorators import Appender import warnings _correct_line1 = ("HEADER RECORD*******LIBRARY HEADER RECORD!!!!!!!" diff --git a/pandas/io/stata.py b/pandas/io/stata.py index 691582629251a..55cac83804cd9 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -27,7 +27,7 @@ from pandas import compat, to_timedelta, to_datetime, isnull, DatetimeIndex from pandas.compat import lrange, lmap, lzip, text_type, string_types, range, \ zip, BytesIO -from pandas.util.decorators import Appender +from pandas.util._decorators import Appender import pandas as pd from pandas.io.common import get_filepath_or_buffer, BaseIterator diff --git a/pandas/json.py b/pandas/json.py index 5b1e395fa4b74..0b87aa22394b9 100644 --- a/pandas/json.py +++ b/pandas/json.py @@ -4,4 +4,4 @@ warnings.warn("The pandas.json module is deprecated and will be " "removed in a future version. Please import from " "the pandas.io.json instead", FutureWarning, stacklevel=2) -from pandas.io.json.libjson import dumps, loads +from pandas._libs.json import dumps, loads diff --git a/pandas/parser.py b/pandas/parser.py index af203c3df8cc9..c0c3bf3179a2d 100644 --- a/pandas/parser.py +++ b/pandas/parser.py @@ -4,5 +4,5 @@ warnings.warn("The pandas.parser module is deprecated and will be " "removed in a future version. Please import from " "the pandas.io.parser instead", FutureWarning, stacklevel=2) -from pandas.io.libparsers import na_values +from pandas._libs.parsers import na_values from pandas.io.common import CParserError diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index c3476d1443fc3..e88979b14c8af 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -9,7 +9,7 @@ import numpy as np -from pandas.util.decorators import cache_readonly +from pandas.util._decorators import cache_readonly from pandas.core.base import PandasObject from pandas.core.dtypes.common import ( is_list_like, @@ -25,7 +25,7 @@ from pandas.compat import range, lrange, map, zip, string_types import pandas.compat as compat from pandas.io.formats.printing import pprint_thing -from pandas.util.decorators import Appender +from pandas.util._decorators import Appender from pandas.plotting._compat import (_mpl_ge_1_3_1, _mpl_ge_1_5_0) diff --git a/pandas/plotting/_misc.py b/pandas/plotting/_misc.py index 93eceba9a3f02..20ada033c0f58 100644 --- a/pandas/plotting/_misc.py +++ b/pandas/plotting/_misc.py @@ -4,7 +4,7 @@ import numpy as np -from pandas.util.decorators import deprecate_kwarg +from pandas.util._decorators import deprecate_kwarg from pandas.core.dtypes.missing import notnull from pandas.compat import range, lrange, lmap, zip from pandas.io.formats.printing import pprint_thing diff --git a/pandas/stats/moments.py b/pandas/stats/moments.py index f98ffa26e0c2b..f6c3a08c6721a 100644 --- a/pandas/stats/moments.py +++ b/pandas/stats/moments.py @@ -8,7 +8,7 @@ import numpy as np from pandas.core.dtypes.common import is_scalar from pandas.core.api import DataFrame, Series -from pandas.util.decorators import Substitution, Appender +from pandas.util._decorators import Substitution, Appender __all__ = ['rolling_count', 'rolling_max', 'rolling_min', 'rolling_sum', 'rolling_mean', 'rolling_std', 'rolling_cov', diff --git a/pandas/tests/dtypes/test_io.py b/pandas/tests/dtypes/test_io.py index 58a1c3540cd03..ae92e9ecca681 100644 --- a/pandas/tests/dtypes/test_io.py +++ b/pandas/tests/dtypes/test_io.py @@ -73,7 +73,7 @@ def test_convert_sql_column_decimals(self): tm.assert_numpy_array_equal(result, expected) def test_convert_downcast_int64(self): - from pandas.io.libparsers import na_values + from pandas._libs.parsers import na_values arr = np.array([1, 2, 7, 8, 10], dtype=np.int64) expected = np.array([1, 2, 7, 8, 10], dtype=np.int8) diff --git a/pandas/tests/frame/common.py b/pandas/tests/frame/common.py index b9cd764c8704c..b475d25eb5dac 100644 --- a/pandas/tests/frame/common.py +++ b/pandas/tests/frame/common.py @@ -1,7 +1,7 @@ import numpy as np from pandas import compat -from pandas.util.decorators import cache_readonly +from pandas.util._decorators import cache_readonly import pandas.util.testing as tm import pandas as pd diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index e99c70952e5b3..3f08013e05ac8 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -29,7 +29,7 @@ import pandas.io.formats.printing as printing import pandas.util.testing as tm -from pandas.util.terminal import get_terminal_size +from pandas.io.formats.terminal import get_terminal_size from pandas.core.config import (set_option, get_option, option_context, reset_option) diff --git a/pandas/tests/io/json/test_ujson.py b/pandas/tests/io/json/test_ujson.py index 10f99c4fcd0a8..86b0e5a0c6a2d 100644 --- a/pandas/tests/io/json/test_ujson.py +++ b/pandas/tests/io/json/test_ujson.py @@ -13,7 +13,7 @@ import decimal from functools import partial from pandas.compat import range, zip, StringIO, u -import pandas.io.json.libjson as ujson +import pandas._libs.json as ujson import pandas.compat as compat import numpy as np diff --git a/pandas/tests/io/parser/test_textreader.py b/pandas/tests/io/parser/test_textreader.py index 7cd02a07bbd4c..c9088d2ecc5e7 100644 --- a/pandas/tests/io/parser/test_textreader.py +++ b/pandas/tests/io/parser/test_textreader.py @@ -22,8 +22,8 @@ import pandas.util.testing as tm -from pandas.io.libparsers import TextReader -import pandas.io.libparsers as parser +from pandas._libs.parsers import TextReader +import pandas._libs.parsers as parser class TestTextReader(object): diff --git a/pandas/tests/io/test_clipboard.py b/pandas/tests/io/test_clipboard.py index 406045a69beca..940a331a9de84 100644 --- a/pandas/tests/io/test_clipboard.py +++ b/pandas/tests/io/test_clipboard.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- import numpy as np from numpy.random import randint +from textwrap import dedent import pytest import pandas as pd @@ -10,7 +11,8 @@ from pandas import get_option from pandas.util import testing as tm from pandas.util.testing import makeCustomDataframe as mkdf -from pandas.util.clipboard.exceptions import PyperclipException +from pandas.io.clipboard.exceptions import PyperclipException +from pandas.io.clipboard import clipboard_set try: @@ -89,8 +91,6 @@ def test_round_trip_frame(self): self.check_round_trip_frame(dt) def test_read_clipboard_infer_excel(self): - from textwrap import dedent - from pandas.util.clipboard import clipboard_set text = dedent(""" John James Charlie Mingus diff --git a/pandas/tests/io/test_html.py b/pandas/tests/io/test_html.py index fa83c43ba8dd4..6da77bf423609 100644 --- a/pandas/tests/io/test_html.py +++ b/pandas/tests/io/test_html.py @@ -23,7 +23,7 @@ is_platform_windows) from pandas.io.common import URLError, urlopen, file_path_to_url from pandas.io.html import read_html -from pandas.io.libparsers import ParserError +from pandas._libs.parsers import ParserError import pandas.util.testing as tm from pandas.util.testing import makeCustomDataframe as mkdf, network diff --git a/pandas/tests/plotting/common.py b/pandas/tests/plotting/common.py index ac490a00bf684..1dbba676e4bc5 100644 --- a/pandas/tests/plotting/common.py +++ b/pandas/tests/plotting/common.py @@ -7,7 +7,7 @@ from pandas import DataFrame, Series from pandas.compat import zip, iteritems -from pandas.util.decorators import cache_readonly +from pandas.util._decorators import cache_readonly from pandas.core.dtypes.api import is_list_like import pandas.util.testing as tm from pandas.util.testing import (ensure_clean, diff --git a/pandas/tests/series/common.py b/pandas/tests/series/common.py index 613961e1c670f..0c25dcb29c3b2 100644 --- a/pandas/tests/series/common.py +++ b/pandas/tests/series/common.py @@ -1,4 +1,4 @@ -from pandas.util.decorators import cache_readonly +from pandas.util._decorators import cache_readonly import pandas.util.testing as tm import pandas as pd diff --git a/pandas/tests/sparse/test_array.py b/pandas/tests/sparse/test_array.py index ab7340c89f016..4ce03f72dbba6 100644 --- a/pandas/tests/sparse/test_array.py +++ b/pandas/tests/sparse/test_array.py @@ -10,7 +10,7 @@ from pandas import _np_version_under1p8 from pandas.core.sparse.api import SparseArray, SparseSeries -from pandas.core.sparse.libsparse import IntIndex +from pandas._libs.sparse import IntIndex from pandas.util.testing import assert_almost_equal import pandas.util.testing as tm diff --git a/pandas/tests/sparse/test_frame.py b/pandas/tests/sparse/test_frame.py index 4a4a596e3bed4..0312b76ec30a5 100644 --- a/pandas/tests/sparse/test_frame.py +++ b/pandas/tests/sparse/test_frame.py @@ -21,7 +21,7 @@ from pandas import compat from pandas.core.sparse import frame as spf -from pandas.core.sparse.libsparse import BlockIndex, IntIndex +from pandas._libs.sparse import BlockIndex, IntIndex from pandas.core.sparse.api import SparseSeries, SparseDataFrame, SparseArray from pandas.tests.frame.test_api import SharedWithSparse diff --git a/pandas/tests/sparse/test_libsparse.py b/pandas/tests/sparse/test_libsparse.py index c41025582c651..4842ebdd103c4 100644 --- a/pandas/tests/sparse/test_libsparse.py +++ b/pandas/tests/sparse/test_libsparse.py @@ -8,7 +8,7 @@ from pandas import compat from pandas.core.sparse.array import IntIndex, BlockIndex, _make_index -import pandas.core.sparse.libsparse as splib +import pandas._libs.sparse as splib TEST_LENGTH = 20 diff --git a/pandas/tests/sparse/test_series.py b/pandas/tests/sparse/test_series.py index 344bca54b180b..b524d6bfab418 100644 --- a/pandas/tests/sparse/test_series.py +++ b/pandas/tests/sparse/test_series.py @@ -17,7 +17,7 @@ import pandas.core.sparse.frame as spf -from pandas.core.sparse.libsparse import BlockIndex, IntIndex +from pandas._libs.sparse import BlockIndex, IntIndex from pandas.core.sparse.api import SparseSeries from pandas.tests.series.test_api import SharedWithSparse diff --git a/pandas/tests/util/__init__.py b/pandas/tests/util/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/tests/reshape/test_hashing.py b/pandas/tests/util/test_hashing.py similarity index 94% rename from pandas/tests/reshape/test_hashing.py rename to pandas/tests/util/test_hashing.py index 5f2c67ee300b5..e1e6e43529a7d 100644 --- a/pandas/tests/reshape/test_hashing.py +++ b/pandas/tests/util/test_hashing.py @@ -5,7 +5,8 @@ import pandas as pd from pandas import DataFrame, Series, Index, MultiIndex -from pandas.util.hashing import hash_array, hash_tuples, hash_pandas_object +from pandas.util import hash_array, hash_pandas_object +from pandas.core.util.hashing import hash_tuples import pandas.util.testing as tm @@ -267,3 +268,18 @@ def test_hash_collisions(self): result = hash_array(np.asarray(L, dtype=object), 'utf8') tm.assert_numpy_array_equal( result, np.concatenate([expected1, expected2], axis=0)) + + +def test_deprecation(): + + with tm.assert_produces_warning(DeprecationWarning, + check_stacklevel=False): + from pandas.tools.hashing import hash_pandas_object + obj = Series(list('abc')) + hash_pandas_object(obj, hash_key='9876543210123456') + + with tm.assert_produces_warning(DeprecationWarning, + check_stacklevel=False): + from pandas.tools.hashing import hash_array + obj = np.array([1, 2, 3]) + hash_array(obj, hash_key='9876543210123456') diff --git a/pandas/tests/test_testing.py b/pandas/tests/util/test_testing.py similarity index 100% rename from pandas/tests/test_testing.py rename to pandas/tests/util/test_testing.py diff --git a/pandas/tests/test_util.py b/pandas/tests/util/test_util.py similarity index 98% rename from pandas/tests/test_util.py rename to pandas/tests/util/test_util.py index 2d9ab78ceeb8a..532d596220501 100644 --- a/pandas/tests/test_util.py +++ b/pandas/tests/util/test_util.py @@ -9,10 +9,10 @@ import pytest from pandas.compat import intern from pandas.util._move import move_into_mutable_buffer, BadMove, stolenbuf -from pandas.util.decorators import deprecate_kwarg -from pandas.util.validators import (validate_args, validate_kwargs, - validate_args_and_kwargs, - validate_bool_kwarg) +from pandas.util._decorators import deprecate_kwarg +from pandas.util._validators import (validate_args, validate_kwargs, + validate_args_and_kwargs, + validate_bool_kwarg) import pandas.util.testing as tm diff --git a/pandas/tools/hashing.py b/pandas/tools/hashing.py new file mode 100644 index 0000000000000..ba38710b607af --- /dev/null +++ b/pandas/tools/hashing.py @@ -0,0 +1,18 @@ +import warnings +import sys + +m = sys.modules['pandas.tools.hashing'] +for t in ['hash_pandas_object', 'hash_array']: + + def outer(t=t): + + def wrapper(*args, **kwargs): + from pandas import util + warnings.warn("pandas.tools.hashing is deprecated and will be " + "removed in a future version, import " + "from pandas.util", + DeprecationWarning, stacklevel=3) + return getattr(util, t)(*args, **kwargs) + return wrapper + + setattr(m, t, outer(t)) diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index 06d70f1456518..dddf835424f67 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -16,7 +16,7 @@ import pandas.core.algorithms as algos from pandas.core.algorithms import unique from pandas.tseries.offsets import DateOffset -from pandas.util.decorators import cache_readonly, deprecate_kwarg +from pandas.util._decorators import cache_readonly, deprecate_kwarg import pandas.tseries.offsets as offsets from pandas._libs import lib, tslib diff --git a/pandas/util/__init__.py b/pandas/util/__init__.py index e69de29bb2d1d..e86af930fef7c 100644 --- a/pandas/util/__init__.py +++ b/pandas/util/__init__.py @@ -0,0 +1,2 @@ +from pandas.core.util.hashing import hash_pandas_object, hash_array # noqa +from pandas.util._decorators import Appender, Substitution, cache_readonly # noqa diff --git a/pandas/util/decorators.py b/pandas/util/_decorators.py similarity index 100% rename from pandas/util/decorators.py rename to pandas/util/_decorators.py diff --git a/pandas/util/depr_module.py b/pandas/util/_depr_module.py similarity index 100% rename from pandas/util/depr_module.py rename to pandas/util/_depr_module.py diff --git a/pandas/util/doctools.py b/pandas/util/_doctools.py similarity index 100% rename from pandas/util/doctools.py rename to pandas/util/_doctools.py diff --git a/pandas/util/print_versions.py b/pandas/util/_print_versions.py similarity index 100% rename from pandas/util/print_versions.py rename to pandas/util/_print_versions.py diff --git a/pandas/util/validators.py b/pandas/util/_validators.py similarity index 100% rename from pandas/util/validators.py rename to pandas/util/_validators.py diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 0d70d51032b3d..f6b572cdf7179 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -48,7 +48,7 @@ Index, MultiIndex, Series, DataFrame, Panel, Panel4D) -from pandas.util import libtesting +from pandas._libs import testing as _testing from pandas.io.common import urlopen try: import pytest @@ -170,7 +170,7 @@ def assert_almost_equal(left, right, check_exact=False, else: obj = 'Input' assert_class_equal(left, right, obj=obj) - return libtesting.assert_almost_equal( + return _testing.assert_almost_equal( left, right, check_dtype=check_dtype, check_less_precise=check_less_precise, @@ -206,7 +206,7 @@ def _check_isinstance(left, right, cls): def assert_dict_equal(left, right, compare_keys=True): _check_isinstance(left, right, dict) - return libtesting.assert_dict_equal(left, right, compare_keys=compare_keys) + return _testing.assert_dict_equal(left, right, compare_keys=compare_keys) def randbool(size=(), p=0.5): @@ -923,10 +923,10 @@ def _get_ilevel_values(index, level): .format(obj, np.round(diff, 5)) raise_assert_detail(obj, msg, left, right) else: - libtesting.assert_almost_equal(left.values, right.values, - check_less_precise=check_less_precise, - check_dtype=exact, - obj=obj, lobj=left, robj=right) + _testing.assert_almost_equal(left.values, right.values, + check_less_precise=check_less_precise, + check_dtype=exact, + obj=obj, lobj=left, robj=right) # metadata comparison if check_names: @@ -1259,10 +1259,10 @@ def assert_series_equal(left, right, check_dtype=True, assert_index_equal(l, r, obj='{0}.index'.format(obj)) else: - libtesting.assert_almost_equal(left.get_values(), right.get_values(), - check_less_precise=check_less_precise, - check_dtype=check_dtype, - obj='{0}'.format(obj)) + _testing.assert_almost_equal(left.get_values(), right.get_values(), + check_less_precise=check_less_precise, + check_dtype=check_dtype, + obj='{0}'.format(obj)) # metadata comparison if check_names: @@ -1476,8 +1476,8 @@ def assert_sp_array_equal(left, right, check_dtype=True): check_dtype=check_dtype) # SparseIndex comparison - assert isinstance(left.sp_index, pd.core.sparse.libsparse.SparseIndex) - assert isinstance(right.sp_index, pd.core.sparse.libsparse.SparseIndex) + assert isinstance(left.sp_index, pd._libs.sparse.SparseIndex) + assert isinstance(right.sp_index, pd._libs.sparse.SparseIndex) if not left.sp_index.equals(right.sp_index): raise_assert_detail('SparseArray.index', 'index are not equal', diff --git a/setup.py b/setup.py index 6f3ddbe2ad9d0..806047a344281 100755 --- a/setup.py +++ b/setup.py @@ -116,9 +116,9 @@ def is_platform_mac(): 'join': ['_libs/join_helper.pxi.in', '_libs/join_func_helper.pxi.in'], 'reshape': ['_libs/reshape_helper.pxi.in'], 'hashtable': ['_libs/hashtable_class_helper.pxi.in', - '_libs/hashtable_func_helper.pxi.in'], + '_libs/hashtable_func_helper.pxi.in'], 'index': ['_libs/index_class_helper.pxi.in'], - 'sparse': ['core/sparse/sparse_op_helper.pxi.in'], + 'sparse': ['_libs/sparse_op_helper.pxi.in'], 'interval': ['_libs/intervaltree.pxi.in'] } @@ -337,11 +337,11 @@ class CheckSDist(sdist_class): 'pandas/_libs/algos.pyx', 'pandas/_libs/join.pyx', 'pandas/_libs/interval.pyx', - 'pandas/core/window.pyx', - 'pandas/core/sparse/sparse.pyx', - 'pandas/util/testing.pyx', - 'pandas/tools/hash.pyx', - 'pandas/io/parsers.pyx', + 'pandas/_libs/hashing.pyx', + 'pandas/_libs/testing.pyx', + 'pandas/_libs/window.pyx', + 'pandas/_libs/sparse.pyx', + 'pandas/_libs/parsers.pyx', 'pandas/io/sas/sas.pyx'] def initialize_options(self): @@ -513,24 +513,24 @@ def pxd(name): '_libs.interval': {'pyxfile': '_libs/interval', 'pxdfiles': ['_libs/hashtable'], 'depends': _pxi_dep['interval']}, - 'core.libwindow': {'pyxfile': 'core/window', - 'pxdfiles': ['_libs/src/skiplist', '_libs/src/util'], - 'depends': ['pandas/_libs/src/skiplist.pyx', - 'pandas/_libs/src/skiplist.h']}, - 'io.libparsers': {'pyxfile': 'io/parsers', + '_libs.window': {'pyxfile': '_libs/window', + 'pxdfiles': ['_libs/src/skiplist', '_libs/src/util'], + 'depends': ['pandas/_libs/src/skiplist.pyx', + 'pandas/_libs/src/skiplist.h']}, + '_libs.parsers': {'pyxfile': '_libs/parsers', 'depends': ['pandas/_libs/src/parser/tokenizer.h', 'pandas/_libs/src/parser/io.h', 'pandas/_libs/src/numpy_helper.h'], 'sources': ['pandas/_libs/src/parser/tokenizer.c', 'pandas/_libs/src/parser/io.c']}, - 'core.sparse.libsparse': {'pyxfile': 'core/sparse/sparse', - 'depends': (['pandas/core/sparse/sparse.pyx'] + - _pxi_dep['sparse'])}, - 'util.libtesting': {'pyxfile': 'util/testing', - 'depends': ['pandas/util/testing.pyx']}, - 'util.libhashing': {'pyxfile': 'util/hashing', - 'depends': ['pandas/util/hashing.pyx']}, - 'io.sas.libsas': {'pyxfile': 'io/sas/sas'}, + '_libs.sparse': {'pyxfile': '_libs/sparse', + 'depends': (['pandas/core/sparse/sparse.pyx'] + + _pxi_dep['sparse'])}, + '_libs.testing': {'pyxfile': '_libs/testing', + 'depends': ['pandas/_libs/testing.pyx']}, + '_libs.hashing': {'pyxfile': '_libs/hashing', + 'depends': ['pandas/_libs/hashing.pyx']}, + 'io.sas._sas': {'pyxfile': 'io/sas/sas'}, } extensions = [] @@ -596,7 +596,7 @@ def pxd(name): root, _ = os.path.splitext(ext.sources[0]) ext.sources[0] = root + suffix -ujson_ext = Extension('pandas.io.json.libjson', +ujson_ext = Extension('pandas._libs.json', depends=['pandas/_libs/src/ujson/lib/ultrajson.h', 'pandas/_libs/src/datetime_helper.h', 'pandas/_libs/src/numpy_helper.h'], @@ -645,6 +645,7 @@ def pxd(name): 'pandas.core.reshape', 'pandas.core.sparse', 'pandas.core.tools', + 'pandas.core.util', 'pandas.computation', 'pandas.errors', 'pandas.io', @@ -652,6 +653,7 @@ def pxd(name): 'pandas.io.sas', 'pandas.io.msgpack', 'pandas.io.formats', + 'pandas.io.clipboard', 'pandas._libs', 'pandas.plotting', 'pandas.stats', @@ -679,9 +681,9 @@ def pxd(name): 'pandas.tests.tseries', 'pandas.tests.plotting', 'pandas.tests.tools', + 'pandas.tests.util', 'pandas.tools', 'pandas.tseries', - 'pandas.util.clipboard' ], package_data={'pandas.tests': ['data/*.csv'], 'pandas.tests.indexes': ['data/*.pickle'], From 2002da33b0a755fcf7ef64b2c87ca4252f0e7df0 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 4 May 2017 09:54:28 -0500 Subject: [PATCH 509/933] RLS: v0.20.0rc2 From 844373002983ef5386731fbd5d85fea9c5dfe6d5 Mon Sep 17 00:00:00 2001 From: Christoph Gohlke Date: Thu, 4 May 2017 13:58:25 -0700 Subject: [PATCH 510/933] PKG: Fix ModuleNotFoundError: No module named 'pandas.formats' (#16239) --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index 806047a344281..d101358fb63dd 100755 --- a/setup.py +++ b/setup.py @@ -648,6 +648,7 @@ def pxd(name): 'pandas.core.util', 'pandas.computation', 'pandas.errors', + 'pandas.formats', 'pandas.io', 'pandas.io.json', 'pandas.io.sas', From 8ebd65b8a8c8e049666af38cc4ce0f01c28c15cc Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 4 May 2017 23:15:55 +0200 Subject: [PATCH 511/933] DOC: don't include all methods/attributes of IntervalIndex (#16221) * DOC: don't include all methods/attributes of IntervalIndex * Change docs for CategoricalIndex and IntervalIndex No autosummary with methods for them. Added to our hacks list in our numpydoc * Don't exclude Interval --- doc/source/api.rst | 3 ++- doc/sphinxext/numpydoc/numpydoc.py | 4 +++- pandas/core/indexes/category.py | 3 +++ pandas/core/indexes/interval.py | 4 ++++ 4 files changed, 12 insertions(+), 2 deletions(-) diff --git a/doc/source/api.rst b/doc/source/api.rst index 491bec3c83f61..c652573bc6677 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -618,7 +618,6 @@ strings and apply several methods to it. These can be accessed like Series.cat Series.dt Index.str - CategoricalIndex.str MultiIndex.str DatetimeIndex.str TimedeltaIndex.str @@ -1404,6 +1403,7 @@ CategoricalIndex .. autosummary:: :toctree: generated/ + :template: autosummary/class_without_autosummary.rst CategoricalIndex @@ -1432,6 +1432,7 @@ IntervalIndex .. autosummary:: :toctree: generated/ + :template: autosummary/class_without_autosummary.rst IntervalIndex diff --git a/doc/sphinxext/numpydoc/numpydoc.py b/doc/sphinxext/numpydoc/numpydoc.py index 0cccf72de3745..710c3cc9842c4 100755 --- a/doc/sphinxext/numpydoc/numpydoc.py +++ b/doc/sphinxext/numpydoc/numpydoc.py @@ -43,7 +43,9 @@ def mangle_docstrings(app, what, name, obj, options, lines, ) # PANDAS HACK (to remove the list of methods/attributes for Categorical) - if what == "class" and name.endswith(".Categorical"): + if what == "class" and (name.endswith(".Categorical") or + name.endswith("CategoricalIndex") or + name.endswith("IntervalIndex")): cfg['class_members_list'] = False if what == 'module': diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 395513d7b9b81..d9e0c218bfafc 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -47,6 +47,9 @@ class CategoricalIndex(Index, base.PandasDelegate): name : object Name to be stored in the index + See Also + -------- + Categorical, Index """ _typ = 'categoricalindex' diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 039346cba56c8..8363cead01e56 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -110,6 +110,10 @@ class IntervalIndex(IntervalMixin, Index): Name to be stored in the index. copy : boolean, default False Copy the meta-data + + See Also + -------- + Index """ _typ = 'intervalindex' _comparables = ['name'] From a54efdd3f8ff4c9d9248c71e87679af29c856806 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Thu, 4 May 2017 17:29:15 -0400 Subject: [PATCH 512/933] TST: Remove __init__ statements in testing (#16238) Closes gh-16235. --- pandas/tests/indexes/test_multi.py | 2 +- pandas/tests/test_config.py | 24 +++++++++++++++--------- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index 402dba0ba08b8..1fe4d85815c4b 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -486,7 +486,7 @@ def test_copy_names(self): def test_names(self): - # names are assigned in __init__ + # names are assigned in setup names = self.index_names level_names = [level.name for level in self.index.levels] assert names == level_names diff --git a/pandas/tests/test_config.py b/pandas/tests/test_config.py index f014b16976d39..8d6f36ac6a798 100644 --- a/pandas/tests/test_config.py +++ b/pandas/tests/test_config.py @@ -8,22 +8,28 @@ class TestConfig(object): - def __init__(self, *args): - super(TestConfig, self).__init__(*args) - + @classmethod + def setup_class(cls): from copy import deepcopy - self.cf = pd.core.config - self.gc = deepcopy(getattr(self.cf, '_global_config')) - self.do = deepcopy(getattr(self.cf, '_deprecated_options')) - self.ro = deepcopy(getattr(self.cf, '_registered_options')) + + cls.cf = pd.core.config + cls.gc = deepcopy(getattr(cls.cf, '_global_config')) + cls.do = deepcopy(getattr(cls.cf, '_deprecated_options')) + cls.ro = deepcopy(getattr(cls.cf, '_registered_options')) def setup_method(self, method): setattr(self.cf, '_global_config', {}) - setattr( - self.cf, 'options', self.cf.DictWrapper(self.cf._global_config)) + setattr(self.cf, 'options', self.cf.DictWrapper( + self.cf._global_config)) setattr(self.cf, '_deprecated_options', {}) setattr(self.cf, '_registered_options', {}) + # Our test fixture in conftest.py sets "chained_assignment" + # to "raise" only after all test methods have been setup. + # However, after this setup, there is no longer any + # "chained_assignment" option, so re-register it. + self.cf.register_option('chained_assignment', 'raise') + def teardown_method(self, method): setattr(self.cf, '_global_config', self.gc) setattr(self.cf, '_deprecated_options', self.do) From 1a0c878f733c5fc2b37e6f6ab655e943685036f9 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Thu, 4 May 2017 17:37:14 -0400 Subject: [PATCH 513/933] DOC, TST: Document and Test Functions in dtypes/common.py (#16237) * DOC: Document pandas.core.dtypes.common Closes gh-15895. * TST: Add tests for pandas.core.dtypes.common The testing of this module was especially lacking with the exception of is_dtype_equal and pandas_dtype. --- pandas/core/dtypes/common.py | 1209 ++++++++++++++++++++++++++-- pandas/tests/dtypes/test_common.py | 459 ++++++++++- 2 files changed, 1584 insertions(+), 84 deletions(-) diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index ba822071a3b72..6c2bbe330eeee 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -37,7 +37,7 @@ def _ensure_float(arr): Parameters ---------- - arr : ndarray, Series + arr : array-like The array whose data type we want to enforce as float. Returns @@ -82,46 +82,243 @@ def _ensure_categorical(arr): def is_object_dtype(arr_or_dtype): + """ + Check whether an array-like or dtype is of the object dtype. + + Parameters + ---------- + arr_or_dtype : array-like + The array-like or dtype to check. + + Returns + ------- + boolean : Whether or not the array-like or dtype is of the object dtype. + + Examples + -------- + >>> is_object_dtype(object) + True + >>> is_object_dtype(int) + False + >>> is_object_dtype(np.array([], dtype=object)) + True + >>> is_object_dtype(np.array([], dtype=int)) + False + >>> is_object_dtype([1, 2, 3]) + False + """ + if arr_or_dtype is None: return False tipo = _get_dtype_type(arr_or_dtype) return issubclass(tipo, np.object_) -def is_sparse(array): - """ return if we are a sparse array """ - return isinstance(array, (ABCSparseArray, ABCSparseSeries)) +def is_sparse(arr): + """ + Check whether an array-like is a pandas sparse array. + + Parameters + ---------- + arr : array-like + The array-like to check. + + Returns + ------- + boolean : Whether or not the array-like is a pandas sparse array. + + Examples + -------- + >>> is_sparse(np.array([1, 2, 3])) + False + >>> is_sparse(pd.SparseArray([1, 2, 3])) + True + >>> is_sparse(pd.SparseSeries([1, 2, 3])) + True + + This function checks only for pandas sparse array instances, so + sparse arrays from other libraries will return False. + + >>> from scipy.sparse import bsr_matrix + >>> is_sparse(bsr_matrix([1, 2, 3])) + False + """ + + return isinstance(arr, (ABCSparseArray, ABCSparseSeries)) + + +def is_scipy_sparse(arr): + """ + Check whether an array-like is a scipy.sparse.spmatrix instance. + + Parameters + ---------- + arr : array-like + The array-like to check. + + Returns + ------- + boolean : Whether or not the array-like is a + scipy.sparse.spmatrix instance. + + Notes + ----- + If scipy is not installed, this function will always return False. + Examples + -------- + >>> from scipy.sparse import bsr_matrix + >>> is_scipy_sparse(bsr_matrix([1, 2, 3])) + True + >>> is_scipy_sparse(pd.SparseArray([1, 2, 3])) + False + >>> is_scipy_sparse(pd.SparseSeries([1, 2, 3])) + False + """ -def is_scipy_sparse(array): - """ return if we are a scipy.sparse.spmatrix """ global _is_scipy_sparse + if _is_scipy_sparse is None: try: from scipy.sparse import issparse as _is_scipy_sparse except ImportError: _is_scipy_sparse = lambda _: False - return _is_scipy_sparse(array) + return _is_scipy_sparse(arr) -def is_categorical(array): - """ return if we are a categorical possibility """ - return isinstance(array, ABCCategorical) or is_categorical_dtype(array) +def is_categorical(arr): + """ + Check whether an array-like is a Categorical instance. -def is_datetimetz(array): - """ return if we are a datetime with tz array """ - return ((isinstance(array, ABCDatetimeIndex) and - getattr(array, 'tz', None) is not None) or - is_datetime64tz_dtype(array)) + Parameters + ---------- + arr : array-like + The array-like to check. + + Returns + ------- + boolean : Whether or not the array-like is of a Categorical instance. + + Examples + -------- + >>> is_categorical([1, 2, 3]) + False + + Categoricals and Series Categoricals will return True. + + >>> cat = pd.Categorical([1, 2, 3]) + >>> is_categorical(cat) + True + >>> is_categorical(pd.Series(cat)) + True + """ + + return isinstance(arr, ABCCategorical) or is_categorical_dtype(arr) + + +def is_datetimetz(arr): + """ + Check whether an array-like is a datetime array-like with a timezone + component in its dtype. + + Parameters + ---------- + arr : array-like + The array-like to check. + Returns + ------- + boolean : Whether or not the array-like is a datetime array-like with + a timezone component in its dtype. + + Examples + -------- + >>> is_datetimetz([1, 2, 3]) + False + + Although the following examples are both DatetimeIndex objects, + the first one returns False because it has no timezone component + unlike the second one, which returns True. + + >>> is_datetimetz(pd.DatetimeIndex([1, 2, 3])) + False + >>> is_datetimetz(pd.DatetimeIndex([1, 2, 3], tz="US/Eastern")) + True + + The object need not be a DatetimeIndex object. It just needs to have + a dtype which has a timezone component. + + >>> dtype = DatetimeTZDtype("ns", tz="US/Eastern") + >>> s = pd.Series([], dtype=dtype) + >>> is_datetimetz(s) + True + """ + + # TODO: do we need this function? + # It seems like a repeat of is_datetime64tz_dtype. + + return ((isinstance(arr, ABCDatetimeIndex) and + getattr(arr, 'tz', None) is not None) or + is_datetime64tz_dtype(arr)) + + +def is_period(arr): + """ + Check whether an array-like is a periodical index. + + Parameters + ---------- + arr : array-like + The array-like to check. + + Returns + ------- + boolean : Whether or not the array-like is a periodical index. + + Examples + -------- + >>> is_period([1, 2, 3]) + False + >>> is_period(pd.Index([1, 2, 3])) + False + >>> is_period(pd.PeriodIndex(["2017-01-01"], freq="D")) + True + """ -def is_period(array): - """ return if we are a period array """ - return isinstance(array, ABCPeriodIndex) or is_period_arraylike(array) + # TODO: do we need this function? + # It seems like a repeat of is_period_arraylike. + return isinstance(arr, ABCPeriodIndex) or is_period_arraylike(arr) def is_datetime64_dtype(arr_or_dtype): + """ + Check whether an array-like or dtype is of the datetime64 dtype. + + Parameters + ---------- + arr_or_dtype : array-like + The array-like or dtype to check. + + Returns + ------- + boolean : Whether or not the array-like or dtype is of + the datetime64 dtype. + + Examples + -------- + >>> is_datetime64_dtype(object) + False + >>> is_datetime64_dtype(np.datetime64) + True + >>> is_datetime64_dtype(np.array([], dtype=int)) + False + >>> is_datetime64_dtype(np.array([], dtype=np.datetime64)) + True + >>> is_datetime64_dtype([1, 2, 3]) + False + """ + if arr_or_dtype is None: return False try: @@ -132,12 +329,69 @@ def is_datetime64_dtype(arr_or_dtype): def is_datetime64tz_dtype(arr_or_dtype): + """ + Check whether an array-like or dtype is of a DatetimeTZDtype dtype. + + Parameters + ---------- + arr_or_dtype : array-like + The array-like or dtype to check. + + Returns + ------- + boolean : Whether or not the array-like or dtype is of + a DatetimeTZDtype dtype. + + Examples + -------- + >>> is_datetime64tz_dtype(object) + False + >>> is_datetime64tz_dtype([1, 2, 3]) + False + >>> is_datetime64tz_dtype(pd.DatetimeIndex([1, 2, 3])) # tz-naive + False + >>> is_datetime64tz_dtype(pd.DatetimeIndex([1, 2, 3], tz="US/Eastern")) + True + + >>> dtype = DatetimeTZDtype("ns", tz="US/Eastern") + >>> s = pd.Series([], dtype=dtype) + >>> is_datetime64tz_dtype(dtype) + True + >>> is_datetime64tz_dtype(s) + True + """ + if arr_or_dtype is None: return False return DatetimeTZDtype.is_dtype(arr_or_dtype) def is_timedelta64_dtype(arr_or_dtype): + """ + Check whether an array-like or dtype is of the timedelta64 dtype. + + Parameters + ---------- + arr_or_dtype : array-like + The array-like or dtype to check. + + Returns + ------- + boolean : Whether or not the array-like or dtype is + of the timedelta64 dtype. + + Examples + -------- + >>> is_timedelta64_dtype(object) + False + >>> is_timedelta64_dtype(np.timedelta64) + True + >>> is_timedelta64_dtype([1, 2, 3]) + False + >>> is_timedelta64_dtype(pd.Series([], dtype="timedelta64[ns]")) + True + """ + if arr_or_dtype is None: return False tipo = _get_dtype_type(arr_or_dtype) @@ -145,18 +399,102 @@ def is_timedelta64_dtype(arr_or_dtype): def is_period_dtype(arr_or_dtype): + """ + Check whether an array-like or dtype is of the Period dtype. + + Parameters + ---------- + arr_or_dtype : array-like + The array-like or dtype to check. + + Returns + ------- + boolean : Whether or not the array-like or dtype is of the Period dtype. + + Examples + -------- + >>> is_period_dtype(object) + False + >>> is_period_dtype(PeriodDtype(freq="D")) + True + >>> is_period_dtype([1, 2, 3]) + False + >>> is_period_dtype(pd.Period("2017-01-01")) + False + >>> is_period_dtype(pd.PeriodIndex([], freq="A")) + True + """ + + # TODO: Consider making Period an instance of PeriodDtype if arr_or_dtype is None: return False return PeriodDtype.is_dtype(arr_or_dtype) def is_interval_dtype(arr_or_dtype): + """ + Check whether an array-like or dtype is of the Interval dtype. + + Parameters + ---------- + arr_or_dtype : array-like + The array-like or dtype to check. + + Returns + ------- + boolean : Whether or not the array-like or dtype is + of the Interval dtype. + + Examples + -------- + >>> is_interval_dtype(object) + False + >>> is_interval_dtype(IntervalDtype()) + True + >>> is_interval_dtype([1, 2, 3]) + False + >>> + >>> interval = pd.Interval(1, 2, closed="right") + >>> is_interval_dtype(interval) + False + >>> is_interval_dtype(pd.IntervalIndex([interval])) + True + """ + + # TODO: Consider making Interval an instance of IntervalDtype if arr_or_dtype is None: return False return IntervalDtype.is_dtype(arr_or_dtype) def is_categorical_dtype(arr_or_dtype): + """ + Check whether an array-like or dtype is of the Categorical dtype. + + Parameters + ---------- + arr_or_dtype : array-like + The array-like or dtype to check. + + Returns + ------- + boolean : Whether or not the array-like or dtype is + of the Categorical dtype. + + Examples + -------- + >>> is_categorical_dtype(object) + False + >>> is_categorical_dtype(CategoricalDtype()) + True + >>> is_categorical_dtype([1, 2, 3]) + False + >>> is_categorical_dtype(pd.Categorical([1, 2, 3])) + True + >>> is_categorical_dtype(pd.CategoricalIndex([1, 2, 3])) + True + """ + if arr_or_dtype is None: return False return CategoricalDtype.is_dtype(arr_or_dtype) @@ -168,7 +506,7 @@ def is_string_dtype(arr_or_dtype): Parameters ---------- - arr_or_dtype : ndarray, dtype, type + arr_or_dtype : array-like The array or dtype to check. Returns @@ -186,7 +524,7 @@ def is_string_dtype(arr_or_dtype): >>> >>> is_string_dtype(np.array(['a', 'b'])) True - >>> is_string_dtype(np.array([1, 2])) + >>> is_string_dtype(pd.Series([1, 2])) False """ @@ -202,7 +540,29 @@ def is_string_dtype(arr_or_dtype): def is_period_arraylike(arr): - """ return if we are period arraylike / PeriodIndex """ + """ + Check whether an array-like is a periodical array-like or PeriodIndex. + + Parameters + ---------- + arr : array-like + The array-like to check. + + Returns + ------- + boolean : Whether or not the array-like is a periodical + array-like or PeriodIndex instance. + + Examples + -------- + >>> is_period_arraylike([1, 2, 3]) + False + >>> is_period_arraylike(pd.Index([1, 2, 3])) + False + >>> is_period_arraylike(pd.PeriodIndex(["2017-01-01"], freq="D")) + True + """ + if isinstance(arr, ABCPeriodIndex): return True elif isinstance(arr, (np.ndarray, ABCSeries)): @@ -211,7 +571,29 @@ def is_period_arraylike(arr): def is_datetime_arraylike(arr): - """ return if we are datetime arraylike / DatetimeIndex """ + """ + Check whether an array-like is a datetime array-like or DatetimeIndex. + + Parameters + ---------- + arr : array-like + The array-like to check. + + Returns + ------- + boolean : Whether or not the array-like is a datetime + array-like or DatetimeIndex. + + Examples + -------- + >>> is_datetime_arraylike([1, 2, 3]) + False + >>> is_datetime_arraylike(pd.Index([1, 2, 3])) + False + >>> is_datetime_arraylike(pd.DatetimeIndex([1, 2, 3])) + True + """ + if isinstance(arr, ABCDatetimeIndex): return True elif isinstance(arr, (np.ndarray, ABCSeries)): @@ -220,6 +602,44 @@ def is_datetime_arraylike(arr): def is_datetimelike(arr): + """ + Check whether an array-like is a datetime-like array-like. + + Acceptable datetime-like objects are (but not limited to) datetime + indices, periodic indices, and timedelta indices. + + Parameters + ---------- + arr : array-like + The array-like to check. + + Returns + ------- + boolean : Whether or not the array-like is a datetime-like array-like. + + Examples + -------- + >>> is_datetimelike([1, 2, 3]) + False + >>> is_datetimelike(pd.Index([1, 2, 3])) + False + >>> is_datetimelike(pd.DatetimeIndex([1, 2, 3])) + True + >>> is_datetimelike(pd.DatetimeIndex([1, 2, 3], tz="US/Eastern")) + True + >>> is_datetimelike(pd.PeriodIndex([], freq="A")) + True + >>> is_datetimelike(np.array([], dtype=np.datetime64)) + True + >>> is_datetimelike(pd.Series([], dtype="timedelta64[ns]")) + True + >>> + >>> dtype = DatetimeTZDtype("ns", tz="US/Eastern") + >>> s = pd.Series([], dtype=dtype) + >>> is_datetimelike(s) + True + """ + return (is_datetime64_dtype(arr) or is_datetime64tz_dtype(arr) or is_timedelta64_dtype(arr) or isinstance(arr, ABCPeriodIndex) or @@ -227,7 +647,32 @@ def is_datetimelike(arr): def is_dtype_equal(source, target): - """ return a boolean if the dtypes are equal """ + """ + Check if two dtypes are equal. + + Parameters + ---------- + source : The first dtype to compare + target : The second dtype to compare + + Returns + ---------- + boolean : Whether or not the two dtypes are equal. + + Examples + -------- + >>> is_dtype_equal(int, float) + False + >>> is_dtype_equal("int", int) + True + >>> is_dtype_equal(object, "category") + False + >>> is_dtype_equal(CategoricalDtype(), "category") + True + >>> is_dtype_equal(DatetimeTZDtype(), "datetime64") + False + """ + try: source = _get_dtype(source) target = _get_dtype(target) @@ -240,6 +685,47 @@ def is_dtype_equal(source, target): def is_any_int_dtype(arr_or_dtype): + """ + DEPRECATED: This function will be removed in a future version. + + Check whether the provided array or dtype is of an integer dtype. + + In this function, timedelta64 instances are also considered "any-integer" + type objects and will return True. + + Parameters + ---------- + arr_or_dtype : array-like + The array or dtype to check. + + Returns + ------- + boolean : Whether or not the array or dtype is of an integer dtype. + + Examples + -------- + >>> is_any_int_dtype(str) + False + >>> is_any_int_dtype(int) + True + >>> is_any_int_dtype(float) + False + >>> is_any_int_dtype(np.uint64) + True + >>> is_any_int_dtype(np.datetime64) + False + >>> is_any_int_dtype(np.timedelta64) + True + >>> is_any_int_dtype(np.array(['a', 'b'])) + False + >>> is_any_int_dtype(pd.Series([1, 2])) + True + >>> is_any_int_dtype(np.array([], dtype=np.timedelta64)) + True + >>> is_any_int_dtype(pd.Index([1, 2.])) # float + False + """ + if arr_or_dtype is None: return False tipo = _get_dtype_type(arr_or_dtype) @@ -247,6 +733,45 @@ def is_any_int_dtype(arr_or_dtype): def is_integer_dtype(arr_or_dtype): + """ + Check whether the provided array or dtype is of an integer dtype. + + Unlike in `in_any_int_dtype`, timedelta64 instances will return False. + + Parameters + ---------- + arr_or_dtype : array-like + The array or dtype to check. + + Returns + ------- + boolean : Whether or not the array or dtype is of an integer dtype + and not an instance of timedelta64. + + Examples + -------- + >>> is_integer_dtype(str) + False + >>> is_integer_dtype(int) + True + >>> is_integer_dtype(float) + False + >>> is_integer_dtype(np.uint64) + True + >>> is_integer_dtype(np.datetime64) + False + >>> is_integer_dtype(np.timedelta64) + False + >>> is_integer_dtype(np.array(['a', 'b'])) + False + >>> is_integer_dtype(pd.Series([1, 2])) + True + >>> is_integer_dtype(np.array([], dtype=np.timedelta64)) + False + >>> is_integer_dtype(pd.Index([1, 2.])) # float + False + """ + if arr_or_dtype is None: return False tipo = _get_dtype_type(arr_or_dtype) @@ -255,6 +780,47 @@ def is_integer_dtype(arr_or_dtype): def is_signed_integer_dtype(arr_or_dtype): + """ + Check whether the provided array or dtype is of a signed integer dtype. + + Unlike in `in_any_int_dtype`, timedelta64 instances will return False. + + Parameters + ---------- + arr_or_dtype : array-like + The array or dtype to check. + + Returns + ------- + boolean : Whether or not the array or dtype is of a signed integer dtype + and not an instance of timedelta64. + + Examples + -------- + >>> is_signed_integer_dtype(str) + False + >>> is_signed_integer_dtype(int) + True + >>> is_signed_integer_dtype(float) + False + >>> is_signed_integer_dtype(np.uint64) # unsigned + False + >>> is_signed_integer_dtype(np.datetime64) + False + >>> is_signed_integer_dtype(np.timedelta64) + False + >>> is_signed_integer_dtype(np.array(['a', 'b'])) + False + >>> is_signed_integer_dtype(pd.Series([1, 2])) + True + >>> is_signed_integer_dtype(np.array([], dtype=np.timedelta64)) + False + >>> is_signed_integer_dtype(pd.Index([1, 2.])) # float + False + >>> is_signed_integer_dtype(np.array([1, 2], dtype=np.uint32)) # unsigned + False + """ + if arr_or_dtype is None: return False tipo = _get_dtype_type(arr_or_dtype) @@ -263,6 +829,39 @@ def is_signed_integer_dtype(arr_or_dtype): def is_unsigned_integer_dtype(arr_or_dtype): + """ + Check whether the provided array or dtype is of an unsigned integer dtype. + + Parameters + ---------- + arr_or_dtype : array-like + The array or dtype to check. + + Returns + ------- + boolean : Whether or not the array or dtype is of an + unsigned integer dtype. + + Examples + -------- + >>> is_unsigned_integer_dtype(str) + False + >>> is_unsigned_integer_dtype(int) # signed + False + >>> is_unsigned_integer_dtype(float) + False + >>> is_unsigned_integer_dtype(np.uint64) + True + >>> is_unsigned_integer_dtype(np.array(['a', 'b'])) + False + >>> is_unsigned_integer_dtype(pd.Series([1, 2])) # signed + False + >>> is_unsigned_integer_dtype(pd.Index([1, 2.])) # float + False + >>> is_unsigned_integer_dtype(np.array([1, 2], dtype=np.uint32)) + True + """ + if arr_or_dtype is None: return False tipo = _get_dtype_type(arr_or_dtype) @@ -271,6 +870,46 @@ def is_unsigned_integer_dtype(arr_or_dtype): def is_int64_dtype(arr_or_dtype): + """ + Check whether the provided array or dtype is of the int64 dtype. + + Parameters + ---------- + arr_or_dtype : array-like + The array or dtype to check. + + Returns + ------- + boolean : Whether or not the array or dtype is of the int64 dtype. + + Notes + ----- + Depending on system architecture, the return value of `is_int64_dtype( + int)` will be True if the OS uses 64-bit integers and False if the OS + uses 32-bit integers. + + Examples + -------- + >>> is_int64_dtype(str) + False + >>> is_int64_dtype(np.int32) + False + >>> is_int64_dtype(np.int64) + True + >>> is_int64_dtype(float) + False + >>> is_int64_dtype(np.uint64) # unsigned + False + >>> is_int64_dtype(np.array(['a', 'b'])) + False + >>> is_int64_dtype(np.array([1, 2], dtype=np.int64)) + True + >>> is_int64_dtype(pd.Index([1, 2.])) # float + False + >>> is_int64_dtype(np.array([1, 2], dtype=np.uint32)) # unsigned + False + """ + if arr_or_dtype is None: return False tipo = _get_dtype_type(arr_or_dtype) @@ -278,6 +917,46 @@ def is_int64_dtype(arr_or_dtype): def is_int_or_datetime_dtype(arr_or_dtype): + """ + Check whether the provided array or dtype is of an + integer, timedelta64, or datetime64 dtype. + + Parameters + ---------- + arr_or_dtype : array-like + The array or dtype to check. + + Returns + ------- + boolean : Whether or not the array or dtype is of an + integer, timedelta64, or datetime64 dtype. + + Examples + -------- + >>> is_int_or_datetime_dtype(str) + False + >>> is_int_or_datetime_dtype(int) + True + >>> is_int_or_datetime_dtype(float) + False + >>> is_int_or_datetime_dtype(np.uint64) + True + >>> is_int_or_datetime_dtype(np.datetime64) + True + >>> is_int_or_datetime_dtype(np.timedelta64) + True + >>> is_int_or_datetime_dtype(np.array(['a', 'b'])) + False + >>> is_int_or_datetime_dtype(pd.Series([1, 2])) + True + >>> is_int_or_datetime_dtype(np.array([], dtype=np.timedelta64)) + True + >>> is_int_or_datetime_dtype(np.array([], dtype=np.datetime64)) + True + >>> is_int_or_datetime_dtype(pd.Index([1, 2.])) # float + False + """ + if arr_or_dtype is None: return False tipo = _get_dtype_type(arr_or_dtype) @@ -285,7 +964,40 @@ def is_int_or_datetime_dtype(arr_or_dtype): issubclass(tipo, (np.datetime64, np.timedelta64))) -def is_datetime64_any_dtype(arr_or_dtype): +def is_datetime64_any_dtype(arr_or_dtype): + """ + Check whether the provided array or dtype is of the datetime64 dtype. + + Parameters + ---------- + arr_or_dtype : array-like + The array or dtype to check. + + Returns + ------- + boolean : Whether or not the array or dtype is of the datetime64 dtype. + + Examples + -------- + >>> is_datetime64_any_dtype(str) + False + >>> is_datetime64_any_dtype(int) + False + >>> is_datetime64_any_dtype(np.datetime64) # can be tz-naive + True + >>> is_datetime64_any_dtype(DatetimeTZDtype("ns", "US/Eastern")) + True + >>> is_datetime64_any_dtype(np.array(['a', 'b'])) + False + >>> is_datetime64_any_dtype(np.array([1, 2])) + False + >>> is_datetime64_any_dtype(np.array([], dtype=np.datetime64)) + True + >>> is_datetime64_any_dtype(pd.DatetimeIndex([1, 2, 3], + dtype=np.datetime64)) + True + """ + if arr_or_dtype is None: return False return (is_datetime64_dtype(arr_or_dtype) or @@ -293,6 +1005,42 @@ def is_datetime64_any_dtype(arr_or_dtype): def is_datetime64_ns_dtype(arr_or_dtype): + """ + Check whether the provided array or dtype is of the datetime64[ns] dtype. + + Parameters + ---------- + arr_or_dtype : array-like + The array or dtype to check. + + Returns + ------- + boolean : Whether or not the array or dtype is of the datetime64[ns] dtype. + + Examples + -------- + >>> is_datetime64_ns_dtype(str) + False + >>> is_datetime64_ns_dtype(int) + False + >>> is_datetime64_ns_dtype(np.datetime64) # no unit + False + >>> is_datetime64_ns_dtype(DatetimeTZDtype("ns", "US/Eastern")) + True + >>> is_datetime64_ns_dtype(np.array(['a', 'b'])) + False + >>> is_datetime64_ns_dtype(np.array([1, 2])) + False + >>> is_datetime64_ns_dtype(np.array([], dtype=np.datetime64)) # no unit + False + >>> is_datetime64_ns_dtype(np.array([], + dtype="datetime64[ps]")) # wrong unit + False + >>> is_datetime64_ns_dtype(pd.DatetimeIndex([1, 2, 3], + dtype=np.datetime64)) # has 'ns' unit + True + """ + if arr_or_dtype is None: return False try: @@ -314,21 +1062,20 @@ def is_timedelta64_ns_dtype(arr_or_dtype): Parameters ---------- - arr_or_dtype : ndarray, dtype, type + arr_or_dtype : array-like The array or dtype to check. Returns ------- - boolean : Whether or not the array or dtype - is of the timedelta64[ns] dtype. + boolean : Whether or not the array or dtype is of the + timedelta64[ns] dtype. Examples -------- - >>> is_timedelta64_ns_dtype(np.dtype('m8[ns]') + >>> is_timedelta64_ns_dtype(np.dtype('m8[ns]')) True - >>> is_timedelta64_ns_dtype(np.dtype('m8[ps]') # Wrong frequency + >>> is_timedelta64_ns_dtype(np.dtype('m8[ps]')) # Wrong frequency False - >>> >>> is_timedelta64_ns_dtype(np.array([1, 2], dtype='m8[ns]')) True >>> is_timedelta64_ns_dtype(np.array([1, 2], dtype=np.timedelta64)) @@ -345,6 +1092,40 @@ def is_timedelta64_ns_dtype(arr_or_dtype): def is_datetime_or_timedelta_dtype(arr_or_dtype): + """ + Check whether the provided array or dtype is of + a timedelta64 or datetime64 dtype. + + Parameters + ---------- + arr_or_dtype : array-like + The array or dtype to check. + + Returns + ------- + boolean : Whether or not the array or dtype is of a + timedelta64, or datetime64 dtype. + + Examples + -------- + >>> is_datetime_or_timedelta_dtype(str) + False + >>> is_datetime_or_timedelta_dtype(int) + False + >>> is_datetime_or_timedelta_dtype(np.datetime64) + True + >>> is_datetime_or_timedelta_dtype(np.timedelta64) + True + >>> is_datetime_or_timedelta_dtype(np.array(['a', 'b'])) + False + >>> is_datetime_or_timedelta_dtype(pd.Series([1, 2])) + False + >>> is_datetime_or_timedelta_dtype(np.array([], dtype=np.timedelta64)) + True + >>> is_datetime_or_timedelta_dtype(np.array([], dtype=np.datetime64)) + True + """ + if arr_or_dtype is None: return False tipo = _get_dtype_type(arr_or_dtype) @@ -378,11 +1159,45 @@ def _is_unorderable_exception(e): def is_numeric_v_string_like(a, b): """ - numpy doesn't like to compare numeric arrays vs scalar string-likes + Check if we are comparing a string-like object to a numeric ndarray. + + NumPy doesn't like to compare such objects, especially numeric arrays + and scalar string-likes. + + Parameters + ---------- + a : array-like, scalar + The first object to check. + b : array-like, scalar + The second object to check. - return a boolean result if this is the case for a,b or b,a + Returns + ------- + boolean : Whether we return a comparing a string-like + object to a numeric array. + Examples + -------- + >>> is_numeric_v_string_like(1, 1) + False + >>> is_numeric_v_string_like("foo", "foo") + False + >>> is_numeric_v_string_like(1, "foo") # non-array numeric + False + >>> is_numeric_v_string_like(np.array([1]), "foo") + True + >>> is_numeric_v_string_like("foo", np.array([1])) # symmetric check + True + >>> is_numeric_v_string_like(np.array([1, 2]), np.array(["foo"])) + True + >>> is_numeric_v_string_like(np.array(["foo"]), np.array([1, 2])) + True + >>> is_numeric_v_string_like(np.array([1]), np.array([2])) + False + >>> is_numeric_v_string_like(np.array(["foo"]), np.array(["foo"])) + False """ + is_a_array = isinstance(a, np.ndarray) is_b_array = isinstance(b, np.ndarray) @@ -401,13 +1216,56 @@ def is_numeric_v_string_like(a, b): def is_datetimelike_v_numeric(a, b): - # return if we have an i8 convertible and numeric comparison + """ + Check if we are comparing a datetime-like object to a numeric object. + + By "numeric," we mean an object that is either of an int or float dtype. + + Parameters + ---------- + a : array-like, scalar + The first object to check. + b : array-like, scalar + The second object to check. + + Returns + ------- + boolean : Whether we return a comparing a datetime-like + to a numeric object. + + Examples + -------- + >>> dt = np.datetime64(pd.datetime(2017, 1, 1)) + >>> + >>> is_datetimelike_v_numeric(1, 1) + False + >>> is_datetimelike_v_numeric(dt, dt) + False + >>> is_datetimelike_v_numeric(1, dt) + True + >>> is_datetimelike_v_numeric(dt, 1) # symmetric check + True + >>> is_datetimelike_v_numeric(np.array([dt]), 1) + True + >>> is_datetimelike_v_numeric(np.array([1]), dt) + True + >>> is_datetimelike_v_numeric(np.array([dt]), np.array([1])) + True + >>> is_datetimelike_v_numeric(np.array([1]), np.array([2])) + False + >>> is_datetimelike_v_numeric(np.array([dt]), np.array([dt])) + False + """ + if not hasattr(a, 'dtype'): a = np.asarray(a) if not hasattr(b, 'dtype'): b = np.asarray(b) def is_numeric(x): + """ + Check if an object has a numeric dtype (i.e. integer or float). + """ return is_integer_dtype(x) or is_float_dtype(x) is_datetimelike = needs_i8_conversion @@ -416,24 +1274,92 @@ def is_numeric(x): def is_datetimelike_v_object(a, b): - # return if we have an i8 convertible and object comparsion + """ + Check if we are comparing a datetime-like object to an object instance. + + Parameters + ---------- + a : array-like, scalar + The first object to check. + b : array-like, scalar + The second object to check. + + Returns + ------- + boolean : Whether we return a comparing a datetime-like + to an object instance. + + Examples + -------- + >>> obj = object() + >>> dt = np.datetime64(pd.datetime(2017, 1, 1)) + >>> + >>> is_datetimelike_v_object(obj, obj) + False + >>> is_datetimelike_v_object(dt, dt) + False + >>> is_datetimelike_v_object(obj, dt) + True + >>> is_datetimelike_v_object(dt, obj) # symmetric check + True + >>> is_datetimelike_v_object(np.array([dt]), obj) + True + >>> is_datetimelike_v_object(np.array([obj]), dt) + True + >>> is_datetimelike_v_object(np.array([dt]), np.array([obj])) + True + >>> is_datetimelike_v_object(np.array([obj]), np.array([obj])) + False + >>> is_datetimelike_v_object(np.array([dt]), np.array([1])) + False + >>> is_datetimelike_v_object(np.array([dt]), np.array([dt])) + False + """ + if not hasattr(a, 'dtype'): a = np.asarray(a) if not hasattr(b, 'dtype'): b = np.asarray(b) - def f(x): - return is_object_dtype(x) - - def is_object(x): - return is_integer_dtype(x) or is_float_dtype(x) - is_datetimelike = needs_i8_conversion - return ((is_datetimelike(a) and is_object(b)) or - (is_datetimelike(b) and is_object(a))) + return ((is_datetimelike(a) and is_object_dtype(b)) or + (is_datetimelike(b) and is_object_dtype(a))) def needs_i8_conversion(arr_or_dtype): + """ + Check whether the array or dtype should be converted to int64. + + An array-like or dtype "needs" such a conversion if the array-like + or dtype is of a datetime-like dtype + + Parameters + ---------- + arr_or_dtype : array-like + The array or dtype to check. + + Returns + ------- + boolean : Whether or not the array or dtype should be converted to int64. + + Examples + -------- + >>> needs_i8_conversion(str) + False + >>> needs_i8_conversion(np.int64) + False + >>> needs_i8_conversion(np.datetime64) + True + >>> needs_i8_conversion(np.array(['a', 'b'])) + False + >>> needs_i8_conversion(pd.Series([1, 2])) + False + >>> needs_i8_conversion(pd.Series([], dtype="timedelta64[ns]")) + True + >>> needs_i8_conversion(pd.DatetimeIndex([1, 2, 3], tz="US/Eastern")) + True + """ + if arr_or_dtype is None: return False return (is_datetime_or_timedelta_dtype(arr_or_dtype) or @@ -442,6 +1368,42 @@ def needs_i8_conversion(arr_or_dtype): def is_numeric_dtype(arr_or_dtype): + """ + Check whether the provided array or dtype is of a numeric dtype. + + Parameters + ---------- + arr_or_dtype : array-like + The array or dtype to check. + + Returns + ------- + boolean : Whether or not the array or dtype is of a numeric dtype. + + Examples + -------- + >>> is_numeric_dtype(str) + False + >>> is_numeric_dtype(int) + True + >>> is_numeric_dtype(float) + True + >>> is_numeric_dtype(np.uint64) + True + >>> is_numeric_dtype(np.datetime64) + False + >>> is_numeric_dtype(np.timedelta64) + False + >>> is_numeric_dtype(np.array(['a', 'b'])) + False + >>> is_numeric_dtype(pd.Series([1, 2])) + True + >>> is_numeric_dtype(pd.Index([1, 2.])) + True + >>> is_numeric_dtype(np.array([], dtype=np.timedelta64)) + False + """ + if arr_or_dtype is None: return False tipo = _get_dtype_type(arr_or_dtype) @@ -458,7 +1420,7 @@ def is_string_like_dtype(arr_or_dtype): Parameters ---------- - arr_or_dtype : ndarray, dtype, type + arr_or_dtype : array-like The array or dtype to check. Returns @@ -471,10 +1433,9 @@ def is_string_like_dtype(arr_or_dtype): True >>> is_string_like_dtype(object) False - >>> >>> is_string_like_dtype(np.array(['a', 'b'])) True - >>> is_string_like_dtype(np.array([1, 2])) + >>> is_string_like_dtype(pd.Series([1, 2])) False """ @@ -488,6 +1449,34 @@ def is_string_like_dtype(arr_or_dtype): def is_float_dtype(arr_or_dtype): + """ + Check whether the provided array or dtype is of a float dtype. + + Parameters + ---------- + arr_or_dtype : array-like + The array or dtype to check. + + Returns + ------- + boolean : Whether or not the array or dtype is of a float dtype. + + Examples + -------- + >>> is_float_dtype(str) + False + >>> is_float_dtype(int) + False + >>> is_float_dtype(float) + True + >>> is_float_dtype(np.array(['a', 'b'])) + False + >>> is_float_dtype(pd.Series([1, 2])) + False + >>> is_float_dtype(pd.Index([1, 2.])) + True + """ + if arr_or_dtype is None: return False tipo = _get_dtype_type(arr_or_dtype) @@ -495,6 +1484,16 @@ def is_float_dtype(arr_or_dtype): def is_floating_dtype(arr_or_dtype): + """ + DEPRECATED: This function will be removed in a future version. + + Check whether the provided array or dtype is an instance of + numpy's float dtype. + + Unlike, `is_float_dtype`, this check is a lot stricter, as it requires + `isinstance` of `np.floating` and not `issubclass`. + """ + if arr_or_dtype is None: return False tipo = _get_dtype_type(arr_or_dtype) @@ -502,6 +1501,36 @@ def is_floating_dtype(arr_or_dtype): def is_bool_dtype(arr_or_dtype): + """ + Check whether the provided array or dtype is of a boolean dtype. + + Parameters + ---------- + arr_or_dtype : array-like + The array or dtype to check. + + Returns + ------- + boolean : Whether or not the array or dtype is of a boolean dtype. + + Examples + -------- + >>> is_bool_dtype(str) + False + >>> is_bool_dtype(int) + False + >>> is_bool_dtype(bool) + True + >>> is_bool_dtype(np.bool) + True + >>> is_bool_dtype(np.array(['a', 'b'])) + False + >>> is_bool_dtype(pd.Series([1, 2])) + False + >>> is_bool_dtype(np.array([True, False])) + True + """ + if arr_or_dtype is None: return False try: @@ -512,21 +1541,94 @@ def is_bool_dtype(arr_or_dtype): return issubclass(tipo, np.bool_) -def is_extension_type(value): +def is_extension_type(arr): """ - if we are a klass that is preserved by the internals - these are internal klasses that we represent (and don't use a np.array) + Check whether an array-like is of a pandas extension class instance. + + Extension classes include categoricals, pandas sparse objects (i.e. + classes represented within the pandas library and not ones external + to it like scipy sparse matrices), and datetime-like arrays. + + Parameters + ---------- + arr : array-like + The array-like to check. + + Returns + ------- + boolean : Whether or not the array-like is of a pandas + extension class instance. + + Examples + -------- + >>> is_extension_type([1, 2, 3]) + False + >>> is_extension_type(np.array([1, 2, 3])) + False + >>> + >>> cat = pd.Categorical([1, 2, 3]) + >>> + >>> is_extension_type(cat) + True + >>> is_extension_type(pd.Series(cat)) + True + >>> is_extension_type(pd.SparseArray([1, 2, 3])) + True + >>> is_extension_type(pd.SparseSeries([1, 2, 3])) + True + >>> + >>> from scipy.sparse import bsr_matrix + >>> is_extension_type(bsr_matrix([1, 2, 3])) + False + >>> is_extension_type(pd.DatetimeIndex([1, 2, 3])) + False + >>> is_extension_type(pd.DatetimeIndex([1, 2, 3], tz="US/Eastern")) + True + >>> + >>> dtype = DatetimeTZDtype("ns", tz="US/Eastern") + >>> s = pd.Series([], dtype=dtype) + >>> is_extension_type(s) + True """ - if is_categorical(value): + + if is_categorical(arr): return True - elif is_sparse(value): + elif is_sparse(arr): return True - elif is_datetimetz(value): + elif is_datetimetz(arr): return True return False def is_complex_dtype(arr_or_dtype): + """ + Check whether the provided array or dtype is of a complex dtype. + + Parameters + ---------- + arr_or_dtype : array-like + The array or dtype to check. + + Returns + ------- + boolean : Whether or not the array or dtype is of a compex dtype. + + Examples + -------- + >>> is_complex_dtype(str) + False + >>> is_complex_dtype(int) + False + >>> is_complex_dtype(np.complex) + True + >>> is_complex_dtype(np.array(['a', 'b'])) + False + >>> is_complex_dtype(pd.Series([1, 2])) + False + >>> is_complex_dtype(np.array([1 + 1j, 5])) + True + """ + if arr_or_dtype is None: return False tipo = _get_dtype_type(arr_or_dtype) @@ -570,7 +1672,7 @@ def _get_dtype(arr_or_dtype): Parameters ---------- - arr_or_dtype : ndarray, Series, dtype, type + arr_or_dtype : array-like The array-like or dtype object whose dtype we want to extract. Returns @@ -619,7 +1721,7 @@ def _get_dtype_type(arr_or_dtype): Parameters ---------- - arr_or_dtype : ndarray, Series, dtype, type + arr_or_dtype : array-like The array-like or dtype object whose type we want to extract. Returns @@ -754,6 +1856,7 @@ def pandas_dtype(dtype): ------- np.dtype or a pandas dtype """ + if isinstance(dtype, DatetimeTZDtype): return dtype elif isinstance(dtype, PeriodDtype): diff --git a/pandas/tests/dtypes/test_common.py b/pandas/tests/dtypes/test_common.py index 68518e235d417..5b74397b1e770 100644 --- a/pandas/tests/dtypes/test_common.py +++ b/pandas/tests/dtypes/test_common.py @@ -4,11 +4,10 @@ import numpy as np import pandas as pd -from pandas.core.dtypes.dtypes import ( - DatetimeTZDtype, PeriodDtype, CategoricalDtype) -from pandas.core.dtypes.common import ( - pandas_dtype, is_dtype_equal) +from pandas.core.dtypes.dtypes import (DatetimeTZDtype, PeriodDtype, + CategoricalDtype, IntervalDtype) +import pandas.core.dtypes.common as com import pandas.util.testing as tm @@ -21,49 +20,49 @@ def test_invalid_dtype_error(self): invalid_list = [pd.Timestamp, 'pd.Timestamp', list] for dtype in invalid_list: with tm.assert_raises_regex(TypeError, msg): - pandas_dtype(dtype) + com.pandas_dtype(dtype) valid_list = [object, 'float64', np.object_, np.dtype('object'), 'O', np.float64, float, np.dtype('float64')] for dtype in valid_list: - pandas_dtype(dtype) + com.pandas_dtype(dtype) def test_numpy_dtype(self): for dtype in ['M8[ns]', 'm8[ns]', 'object', 'float64', 'int64']: - assert pandas_dtype(dtype) == np.dtype(dtype) + assert com.pandas_dtype(dtype) == np.dtype(dtype) def test_numpy_string_dtype(self): # do not parse freq-like string as period dtype - assert pandas_dtype('U') == np.dtype('U') - assert pandas_dtype('S') == np.dtype('S') + assert com.pandas_dtype('U') == np.dtype('U') + assert com.pandas_dtype('S') == np.dtype('S') def test_datetimetz_dtype(self): for dtype in ['datetime64[ns, US/Eastern]', 'datetime64[ns, Asia/Tokyo]', 'datetime64[ns, UTC]']: - assert pandas_dtype(dtype) is DatetimeTZDtype(dtype) - assert pandas_dtype(dtype) == DatetimeTZDtype(dtype) - assert pandas_dtype(dtype) == dtype + assert com.pandas_dtype(dtype) is DatetimeTZDtype(dtype) + assert com.pandas_dtype(dtype) == DatetimeTZDtype(dtype) + assert com.pandas_dtype(dtype) == dtype def test_categorical_dtype(self): - assert pandas_dtype('category') == CategoricalDtype() + assert com.pandas_dtype('category') == CategoricalDtype() def test_period_dtype(self): for dtype in ['period[D]', 'period[3M]', 'period[U]', 'Period[D]', 'Period[3M]', 'Period[U]']: - assert pandas_dtype(dtype) is PeriodDtype(dtype) - assert pandas_dtype(dtype) == PeriodDtype(dtype) - assert pandas_dtype(dtype) == dtype + assert com.pandas_dtype(dtype) is PeriodDtype(dtype) + assert com.pandas_dtype(dtype) == PeriodDtype(dtype) + assert com.pandas_dtype(dtype) == dtype -dtypes = dict(datetime_tz=pandas_dtype('datetime64[ns, US/Eastern]'), - datetime=pandas_dtype('datetime64[ns]'), - timedelta=pandas_dtype('timedelta64[ns]'), +dtypes = dict(datetime_tz=com.pandas_dtype('datetime64[ns, US/Eastern]'), + datetime=com.pandas_dtype('datetime64[ns]'), + timedelta=com.pandas_dtype('timedelta64[ns]'), period=PeriodDtype('D'), integer=np.dtype(np.int64), float=np.dtype(np.float64), object=np.dtype(np.object), - category=pandas_dtype('category')) + category=com.pandas_dtype('category')) @pytest.mark.parametrize('name1,dtype1', @@ -75,31 +74,30 @@ def test_period_dtype(self): def test_dtype_equal(name1, dtype1, name2, dtype2): # match equal to self, but not equal to other - assert is_dtype_equal(dtype1, dtype1) + assert com.is_dtype_equal(dtype1, dtype1) if name1 != name2: - assert not is_dtype_equal(dtype1, dtype2) + assert not com.is_dtype_equal(dtype1, dtype2) def test_dtype_equal_strict(): # we are strict on kind equality for dtype in [np.int8, np.int16, np.int32]: - assert not is_dtype_equal(np.int64, dtype) + assert not com.is_dtype_equal(np.int64, dtype) for dtype in [np.float32]: - assert not is_dtype_equal(np.float64, dtype) + assert not com.is_dtype_equal(np.float64, dtype) # strict w.r.t. PeriodDtype - assert not is_dtype_equal(PeriodDtype('D'), - PeriodDtype('2D')) + assert not com.is_dtype_equal(PeriodDtype('D'), PeriodDtype('2D')) # strict w.r.t. datetime64 - assert not is_dtype_equal( - pandas_dtype('datetime64[ns, US/Eastern]'), - pandas_dtype('datetime64[ns, CET]')) + assert not com.is_dtype_equal( + com.pandas_dtype('datetime64[ns, US/Eastern]'), + com.pandas_dtype('datetime64[ns, CET]')) # see gh-15941: no exception should be raised - assert not is_dtype_equal(None, None) + assert not com.is_dtype_equal(None, None) def get_is_dtype_funcs(): @@ -108,7 +106,6 @@ def get_is_dtype_funcs(): begin with 'is_' and end with 'dtype' """ - import pandas.core.dtypes.common as com fnames = [f for f in dir(com) if (f.startswith('is_') and f.endswith('dtype'))] @@ -124,3 +121,403 @@ def test_get_dtype_error_catch(func): # No exception should be raised. assert not func(None) + + +def test_is_object(): + assert com.is_object_dtype(object) + assert com.is_object_dtype(np.array([], dtype=object)) + + assert not com.is_object_dtype(int) + assert not com.is_object_dtype(np.array([], dtype=int)) + assert not com.is_object_dtype([1, 2, 3]) + + +def test_is_sparse(): + assert com.is_sparse(pd.SparseArray([1, 2, 3])) + assert com.is_sparse(pd.SparseSeries([1, 2, 3])) + + assert not com.is_sparse(np.array([1, 2, 3])) + + # This test will only skip if the previous assertions + # pass AND scipy is not installed. + sparse = pytest.importorskip("scipy.sparse") + assert not com.is_sparse(sparse.bsr_matrix([1, 2, 3])) + + +def test_is_scipy_sparse(): + tm._skip_if_no_scipy() + + from scipy.sparse import bsr_matrix + assert com.is_scipy_sparse(bsr_matrix([1, 2, 3])) + + assert not com.is_scipy_sparse(pd.SparseArray([1, 2, 3])) + assert not com.is_scipy_sparse(pd.SparseSeries([1, 2, 3])) + + +def test_is_categorical(): + cat = pd.Categorical([1, 2, 3]) + assert com.is_categorical(cat) + assert com.is_categorical(pd.Series(cat)) + + assert not com.is_categorical([1, 2, 3]) + + +def test_is_datetimetz(): + assert not com.is_datetimetz([1, 2, 3]) + assert not com.is_datetimetz(pd.DatetimeIndex([1, 2, 3])) + + assert com.is_datetimetz(pd.DatetimeIndex([1, 2, 3], tz="US/Eastern")) + + dtype = DatetimeTZDtype("ns", tz="US/Eastern") + s = pd.Series([], dtype=dtype) + assert com.is_datetimetz(s) + + +def test_is_period(): + assert not com.is_period([1, 2, 3]) + assert not com.is_period(pd.Index([1, 2, 3])) + assert com.is_period(pd.PeriodIndex(["2017-01-01"], freq="D")) + + +def test_is_datetime64_dtype(): + assert not com.is_datetime64_dtype(object) + assert not com.is_datetime64_dtype([1, 2, 3]) + assert not com.is_datetime64_dtype(np.array([], dtype=int)) + + assert com.is_datetime64_dtype(np.datetime64) + assert com.is_datetime64_dtype(np.array([], dtype=np.datetime64)) + + +def test_is_datetime64tz_dtype(): + assert not com.is_datetime64tz_dtype(object) + assert not com.is_datetime64tz_dtype([1, 2, 3]) + assert not com.is_datetime64tz_dtype(pd.DatetimeIndex([1, 2, 3])) + assert com.is_datetime64tz_dtype(pd.DatetimeIndex( + [1, 2, 3], tz="US/Eastern")) + + +def test_is_timedelta64_dtype(): + assert not com.is_timedelta64_dtype(object) + assert not com.is_timedelta64_dtype([1, 2, 3]) + + assert com.is_timedelta64_dtype(np.timedelta64) + assert com.is_timedelta64_dtype(pd.Series([], dtype="timedelta64[ns]")) + + +def test_is_period_dtype(): + assert not com.is_period_dtype(object) + assert not com.is_period_dtype([1, 2, 3]) + assert not com.is_period_dtype(pd.Period("2017-01-01")) + + assert com.is_period_dtype(PeriodDtype(freq="D")) + assert com.is_period_dtype(pd.PeriodIndex([], freq="A")) + + +def test_is_interval_dtype(): + assert not com.is_interval_dtype(object) + assert not com.is_interval_dtype([1, 2, 3]) + + assert com.is_interval_dtype(IntervalDtype()) + + interval = pd.Interval(1, 2, closed="right") + assert not com.is_interval_dtype(interval) + assert com.is_interval_dtype(pd.IntervalIndex([interval])) + + +def test_is_categorical_dtype(): + assert not com.is_categorical_dtype(object) + assert not com.is_categorical_dtype([1, 2, 3]) + + assert com.is_categorical_dtype(CategoricalDtype()) + assert com.is_categorical_dtype(pd.Categorical([1, 2, 3])) + assert com.is_categorical_dtype(pd.CategoricalIndex([1, 2, 3])) + + +def test_is_string_dtype(): + assert not com.is_string_dtype(int) + assert not com.is_string_dtype(pd.Series([1, 2])) + + assert com.is_string_dtype(str) + assert com.is_string_dtype(object) + assert com.is_string_dtype(np.array(['a', 'b'])) + + +def test_is_period_arraylike(): + assert not com.is_period_arraylike([1, 2, 3]) + assert not com.is_period_arraylike(pd.Index([1, 2, 3])) + assert com.is_period_arraylike(pd.PeriodIndex(["2017-01-01"], freq="D")) + + +def test_is_datetime_arraylike(): + assert not com.is_datetime_arraylike([1, 2, 3]) + assert not com.is_datetime_arraylike(pd.Index([1, 2, 3])) + assert com.is_datetime_arraylike(pd.DatetimeIndex([1, 2, 3])) + + +def test_is_datetimelike(): + assert not com.is_datetimelike([1, 2, 3]) + assert not com.is_datetimelike(pd.Index([1, 2, 3])) + + assert com.is_datetimelike(pd.DatetimeIndex([1, 2, 3])) + assert com.is_datetimelike(pd.PeriodIndex([], freq="A")) + assert com.is_datetimelike(np.array([], dtype=np.datetime64)) + assert com.is_datetimelike(pd.Series([], dtype="timedelta64[ns]")) + assert com.is_datetimelike(pd.DatetimeIndex([1, 2, 3], tz="US/Eastern")) + + dtype = DatetimeTZDtype("ns", tz="US/Eastern") + s = pd.Series([], dtype=dtype) + assert com.is_datetimelike(s) + + +def test_is_integer_dtype(): + assert not com.is_integer_dtype(str) + assert not com.is_integer_dtype(float) + assert not com.is_integer_dtype(np.datetime64) + assert not com.is_integer_dtype(np.timedelta64) + assert not com.is_integer_dtype(pd.Index([1, 2.])) + assert not com.is_integer_dtype(np.array(['a', 'b'])) + assert not com.is_integer_dtype(np.array([], dtype=np.timedelta64)) + + assert com.is_integer_dtype(int) + assert com.is_integer_dtype(np.uint64) + assert com.is_integer_dtype(pd.Series([1, 2])) + + +def test_is_signed_integer_dtype(): + assert not com.is_signed_integer_dtype(str) + assert not com.is_signed_integer_dtype(float) + assert not com.is_signed_integer_dtype(np.uint64) + assert not com.is_signed_integer_dtype(np.datetime64) + assert not com.is_signed_integer_dtype(np.timedelta64) + assert not com.is_signed_integer_dtype(pd.Index([1, 2.])) + assert not com.is_signed_integer_dtype(np.array(['a', 'b'])) + assert not com.is_signed_integer_dtype(np.array([1, 2], dtype=np.uint32)) + assert not com.is_signed_integer_dtype(np.array([], dtype=np.timedelta64)) + + assert com.is_signed_integer_dtype(int) + assert com.is_signed_integer_dtype(pd.Series([1, 2])) + + +def test_is_unsigned_integer_dtype(): + assert not com.is_unsigned_integer_dtype(str) + assert not com.is_unsigned_integer_dtype(int) + assert not com.is_unsigned_integer_dtype(float) + assert not com.is_unsigned_integer_dtype(pd.Series([1, 2])) + assert not com.is_unsigned_integer_dtype(pd.Index([1, 2.])) + assert not com.is_unsigned_integer_dtype(np.array(['a', 'b'])) + + assert com.is_unsigned_integer_dtype(np.uint64) + assert com.is_unsigned_integer_dtype(np.array([1, 2], dtype=np.uint32)) + + +def test_is_int64_dtype(): + assert not com.is_int64_dtype(str) + assert not com.is_int64_dtype(float) + assert not com.is_int64_dtype(np.int32) + assert not com.is_int64_dtype(np.uint64) + assert not com.is_int64_dtype(pd.Index([1, 2.])) + assert not com.is_int64_dtype(np.array(['a', 'b'])) + assert not com.is_int64_dtype(np.array([1, 2], dtype=np.uint32)) + + assert com.is_int64_dtype(np.int64) + assert com.is_int64_dtype(np.array([1, 2], dtype=np.int64)) + + +def test_is_int_or_datetime_dtype(): + assert not com.is_int_or_datetime_dtype(str) + assert not com.is_int_or_datetime_dtype(float) + assert not com.is_int_or_datetime_dtype(pd.Index([1, 2.])) + assert not com.is_int_or_datetime_dtype(np.array(['a', 'b'])) + + assert com.is_int_or_datetime_dtype(int) + assert com.is_int_or_datetime_dtype(np.uint64) + assert com.is_int_or_datetime_dtype(np.datetime64) + assert com.is_int_or_datetime_dtype(np.timedelta64) + assert com.is_int_or_datetime_dtype(pd.Series([1, 2])) + assert com.is_int_or_datetime_dtype(np.array([], dtype=np.datetime64)) + assert com.is_int_or_datetime_dtype(np.array([], dtype=np.timedelta64)) + + +def test_is_datetime64_any_dtype(): + assert not com.is_datetime64_any_dtype(int) + assert not com.is_datetime64_any_dtype(str) + assert not com.is_datetime64_any_dtype(np.array([1, 2])) + assert not com.is_datetime64_any_dtype(np.array(['a', 'b'])) + + assert com.is_datetime64_any_dtype(np.datetime64) + assert com.is_datetime64_any_dtype(np.array([], dtype=np.datetime64)) + assert com.is_datetime64_any_dtype(DatetimeTZDtype("ns", "US/Eastern")) + assert com.is_datetime64_any_dtype(pd.DatetimeIndex([1, 2, 3], + dtype=np.datetime64)) + + +def test_is_datetime64_ns_dtype(): + assert not com.is_datetime64_ns_dtype(int) + assert not com.is_datetime64_ns_dtype(str) + assert not com.is_datetime64_ns_dtype(np.datetime64) + assert not com.is_datetime64_ns_dtype(np.array([1, 2])) + assert not com.is_datetime64_ns_dtype(np.array(['a', 'b'])) + assert not com.is_datetime64_ns_dtype(np.array([], dtype=np.datetime64)) + + # This datetime array has the wrong unit (ps instead of ns) + assert not com.is_datetime64_ns_dtype(np.array([], dtype="datetime64[ps]")) + + assert com.is_datetime64_ns_dtype(DatetimeTZDtype("ns", "US/Eastern")) + assert com.is_datetime64_ns_dtype(pd.DatetimeIndex([1, 2, 3], + dtype=np.datetime64)) + + +def test_is_timedelta64_ns_dtype(): + assert not com.is_timedelta64_ns_dtype(np.dtype('m8[ps]')) + assert not com.is_timedelta64_ns_dtype( + np.array([1, 2], dtype=np.timedelta64)) + + assert com.is_timedelta64_ns_dtype(np.dtype('m8[ns]')) + assert com.is_timedelta64_ns_dtype(np.array([1, 2], dtype='m8[ns]')) + + +def test_is_datetime_or_timedelta_dtype(): + assert not com.is_datetime_or_timedelta_dtype(int) + assert not com.is_datetime_or_timedelta_dtype(str) + assert not com.is_datetime_or_timedelta_dtype(pd.Series([1, 2])) + assert not com.is_datetime_or_timedelta_dtype(np.array(['a', 'b'])) + + assert com.is_datetime_or_timedelta_dtype(np.datetime64) + assert com.is_datetime_or_timedelta_dtype(np.timedelta64) + assert com.is_datetime_or_timedelta_dtype( + np.array([], dtype=np.timedelta64)) + assert com.is_datetime_or_timedelta_dtype( + np.array([], dtype=np.datetime64)) + + +def test_is_numeric_v_string_like(): + assert not com.is_numeric_v_string_like(1, 1) + assert not com.is_numeric_v_string_like(1, "foo") + assert not com.is_numeric_v_string_like("foo", "foo") + assert not com.is_numeric_v_string_like(np.array([1]), np.array([2])) + assert not com.is_numeric_v_string_like( + np.array(["foo"]), np.array(["foo"])) + + assert com.is_numeric_v_string_like(np.array([1]), "foo") + assert com.is_numeric_v_string_like("foo", np.array([1])) + assert com.is_numeric_v_string_like(np.array([1, 2]), np.array(["foo"])) + assert com.is_numeric_v_string_like(np.array(["foo"]), np.array([1, 2])) + + +def test_is_datetimelike_v_numeric(): + dt = np.datetime64(pd.datetime(2017, 1, 1)) + + assert not com.is_datetimelike_v_numeric(1, 1) + assert not com.is_datetimelike_v_numeric(dt, dt) + assert not com.is_datetimelike_v_numeric(np.array([1]), np.array([2])) + assert not com.is_datetimelike_v_numeric(np.array([dt]), np.array([dt])) + + assert com.is_datetimelike_v_numeric(1, dt) + assert com.is_datetimelike_v_numeric(1, dt) + assert com.is_datetimelike_v_numeric(np.array([dt]), 1) + assert com.is_datetimelike_v_numeric(np.array([1]), dt) + assert com.is_datetimelike_v_numeric(np.array([dt]), np.array([1])) + + +def test_is_datetimelike_v_object(): + obj = object() + dt = np.datetime64(pd.datetime(2017, 1, 1)) + + assert not com.is_datetimelike_v_object(dt, dt) + assert not com.is_datetimelike_v_object(obj, obj) + assert not com.is_datetimelike_v_object(np.array([dt]), np.array([1])) + assert not com.is_datetimelike_v_object(np.array([dt]), np.array([dt])) + assert not com.is_datetimelike_v_object(np.array([obj]), np.array([obj])) + + assert com.is_datetimelike_v_object(dt, obj) + assert com.is_datetimelike_v_object(obj, dt) + assert com.is_datetimelike_v_object(np.array([dt]), obj) + assert com.is_datetimelike_v_object(np.array([obj]), dt) + assert com.is_datetimelike_v_object(np.array([dt]), np.array([obj])) + + +def test_needs_i8_conversion(): + assert not com.needs_i8_conversion(str) + assert not com.needs_i8_conversion(np.int64) + assert not com.needs_i8_conversion(pd.Series([1, 2])) + assert not com.needs_i8_conversion(np.array(['a', 'b'])) + + assert com.needs_i8_conversion(np.datetime64) + assert com.needs_i8_conversion(pd.Series([], dtype="timedelta64[ns]")) + assert com.needs_i8_conversion(pd.DatetimeIndex( + [1, 2, 3], tz="US/Eastern")) + + +def test_is_numeric_dtype(): + assert not com.is_numeric_dtype(str) + assert not com.is_numeric_dtype(np.datetime64) + assert not com.is_numeric_dtype(np.timedelta64) + assert not com.is_numeric_dtype(np.array(['a', 'b'])) + assert not com.is_numeric_dtype(np.array([], dtype=np.timedelta64)) + + assert com.is_numeric_dtype(int) + assert com.is_numeric_dtype(float) + assert com.is_numeric_dtype(np.uint64) + assert com.is_numeric_dtype(pd.Series([1, 2])) + assert com.is_numeric_dtype(pd.Index([1, 2.])) + + +def test_is_string_like_dtype(): + assert not com.is_string_like_dtype(object) + assert not com.is_string_like_dtype(pd.Series([1, 2])) + + assert com.is_string_like_dtype(str) + assert com.is_string_like_dtype(np.array(['a', 'b'])) + + +def test_is_float_dtype(): + assert not com.is_float_dtype(str) + assert not com.is_float_dtype(int) + assert not com.is_float_dtype(pd.Series([1, 2])) + assert not com.is_float_dtype(np.array(['a', 'b'])) + + assert com.is_float_dtype(float) + assert com.is_float_dtype(pd.Index([1, 2.])) + + +def test_is_bool_dtype(): + assert not com.is_bool_dtype(int) + assert not com.is_bool_dtype(str) + assert not com.is_bool_dtype(pd.Series([1, 2])) + assert not com.is_bool_dtype(np.array(['a', 'b'])) + + assert com.is_bool_dtype(bool) + assert com.is_bool_dtype(np.bool) + assert com.is_bool_dtype(np.array([True, False])) + + +def test_is_extension_type(): + assert not com.is_extension_type([1, 2, 3]) + assert not com.is_extension_type(np.array([1, 2, 3])) + assert not com.is_extension_type(pd.DatetimeIndex([1, 2, 3])) + + cat = pd.Categorical([1, 2, 3]) + assert com.is_extension_type(cat) + assert com.is_extension_type(pd.Series(cat)) + assert com.is_extension_type(pd.SparseArray([1, 2, 3])) + assert com.is_extension_type(pd.SparseSeries([1, 2, 3])) + assert com.is_extension_type(pd.DatetimeIndex([1, 2, 3], tz="US/Eastern")) + + dtype = DatetimeTZDtype("ns", tz="US/Eastern") + s = pd.Series([], dtype=dtype) + assert com.is_extension_type(s) + + # This test will only skip if the previous assertions + # pass AND scipy is not installed. + sparse = pytest.importorskip("scipy.sparse") + assert not com.is_extension_type(sparse.bsr_matrix([1, 2, 3])) + + +def test_is_complex_dtype(): + assert not com.is_complex_dtype(int) + assert not com.is_complex_dtype(str) + assert not com.is_complex_dtype(pd.Series([1, 2])) + assert not com.is_complex_dtype(np.array(['a', 'b'])) + + assert com.is_complex_dtype(np.complex) + assert com.is_complex_dtype(np.array([1 + 1j, 5])) From 5dd3b43cd87d25f37025e524be1225db51d4b8ea Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Thu, 4 May 2017 19:26:08 -0400 Subject: [PATCH 514/933] TST: xfail some bottleneck on windows (#16240) * CI: add bottleneck for 3.6 on windows * TST: xfail some bottleneck tests on windows xref https://github.com/pandas-dev/pandas/issues/16049#issuecomment-299298192 --- ci/requirements-3.6_WIN.run | 1 + pandas/tests/series/test_analytics.py | 39 ++++++++++++++++++++------- 2 files changed, 31 insertions(+), 9 deletions(-) diff --git a/ci/requirements-3.6_WIN.run b/ci/requirements-3.6_WIN.run index 840d2867e9297..899bfbc6b6b23 100644 --- a/ci/requirements-3.6_WIN.run +++ b/ci/requirements-3.6_WIN.run @@ -1,6 +1,7 @@ python-dateutil pytz numpy=1.12* +bottleneck openpyxl xlsxwriter xlrd diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index 257f992f57f6d..ec6a118ec3639 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -19,7 +19,7 @@ import pandas.core.nanops as nanops -from pandas.compat import lrange, range +from pandas.compat import lrange, range, is_platform_windows from pandas import compat from pandas.util.testing import (assert_series_equal, assert_almost_equal, assert_frame_equal, assert_index_equal) @@ -28,6 +28,10 @@ from .common import TestData +skip_if_bottleneck_on_windows = (is_platform_windows() and + nanops._USE_BOTTLENECK) + + class TestSeriesAnalytics(TestData): def test_sum_zero(self): @@ -64,14 +68,6 @@ def test_overflow(self): result = s.max(skipna=False) assert int(result) == v[-1] - # use bottleneck if available - result = s.sum() - assert int(result) == v.sum(dtype='int64') - result = s.min() - assert int(result) == 0 - result = s.max() - assert int(result) == v[-1] - for dtype in ['float32', 'float64']: v = np.arange(5000000, dtype=dtype) s = Series(v) @@ -84,6 +80,28 @@ def test_overflow(self): result = s.max(skipna=False) assert np.allclose(float(result), v[-1]) + @pytest.mark.xfail( + skip_if_bottleneck_on_windows, + reason="buggy bottleneck with sum overflow on windows") + def test_overflow_with_bottleneck(self): + # GH 6915 + # overflowing on the smaller int dtypes + for dtype in ['int32', 'int64']: + v = np.arange(5000000, dtype=dtype) + s = Series(v) + + # use bottleneck if available + result = s.sum() + assert int(result) == v.sum(dtype='int64') + result = s.min() + assert int(result) == 0 + result = s.max() + assert int(result) == v[-1] + + for dtype in ['float32', 'float64']: + v = np.arange(5000000, dtype=dtype) + s = Series(v) + # use bottleneck if available result = s.sum() assert result == v.sum(dtype=dtype) @@ -92,6 +110,9 @@ def test_overflow(self): result = s.max() assert np.allclose(float(result), v[-1]) + @pytest.mark.xfail( + skip_if_bottleneck_on_windows, + reason="buggy bottleneck with sum overflow on windows") def test_sum(self): self._check_stat_op('sum', np.sum, check_allna=True) From 2aa6e7917000baffb6a14a89d9433bf8409edf5f Mon Sep 17 00:00:00 2001 From: gfyoung Date: Thu, 4 May 2017 19:31:48 -0400 Subject: [PATCH 515/933] TST: Test CategoricalIndex in test_is_categorical (#16243) Follow-up to gh-16237. --- pandas/core/dtypes/common.py | 4 +++- pandas/tests/dtypes/test_common.py | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 6c2bbe330eeee..bfec1ec3ebe8c 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -205,13 +205,15 @@ def is_categorical(arr): >>> is_categorical([1, 2, 3]) False - Categoricals and Series Categoricals will return True. + Categoricals, Series Categoricals, and CategoricalIndex will return True. >>> cat = pd.Categorical([1, 2, 3]) >>> is_categorical(cat) True >>> is_categorical(pd.Series(cat)) True + >>> is_categorical(pd.CategoricalIndex([1, 2, 3])) + True """ return isinstance(arr, ABCCategorical) or is_categorical_dtype(arr) diff --git a/pandas/tests/dtypes/test_common.py b/pandas/tests/dtypes/test_common.py index 5b74397b1e770..4633dde5ed537 100644 --- a/pandas/tests/dtypes/test_common.py +++ b/pandas/tests/dtypes/test_common.py @@ -158,6 +158,7 @@ def test_is_categorical(): cat = pd.Categorical([1, 2, 3]) assert com.is_categorical(cat) assert com.is_categorical(pd.Series(cat)) + assert com.is_categorical(pd.CategoricalIndex([1, 2, 3])) assert not com.is_categorical([1, 2, 3]) From 50f8b9f3389e8f8ac7fc7bdf0e146af0a4322037 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 4 May 2017 21:23:34 -0500 Subject: [PATCH 516/933] DOC: Whatsnew cleanup (#16245) * DOC: Whatsnew cleanup * DOC: remove hash_tuples reference --- doc/source/whatsnew/v0.20.0.txt | 79 ++++++++++++++++----------------- 1 file changed, 38 insertions(+), 41 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index bfd8031b4c305..61042071a52ec 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -1,7 +1,7 @@ .. _whatsnew_0200: -v0.20.0 (May 12, 2017) ------------------------- +v0.20.0 (May 4, 2017) +--------------------- This is a major release from 0.19.2 and includes a number of API changes, deprecations, new features, enhancements, and performance improvements along with a large number of bug fixes. We recommend that all @@ -17,8 +17,8 @@ Highlights include: - Improved user API when accessing levels in ``.groupby()``, see :ref:`here ` - Improved support for ``UInt64`` dtypes, see :ref:`here ` - A new orient for JSON serialization, ``orient='table'``, that uses the :ref:`Table Schema spec ` -- Experimental support for exporting ``DataFrame.style`` formats to Excel , see :ref:`here ` -- Window Binary Corr/Cov operations now return a MultiIndexed ``DataFrame`` rather than a ``Panel``, as ``Panel`` is now deprecated, see :ref:`here ` +- Experimental support for exporting ``DataFrame.style`` formats to Excel, see :ref:`here ` +- Window binary corr/cov operations now return a MultiIndexed ``DataFrame`` rather than a ``Panel``, as ``Panel`` is now deprecated, see :ref:`here ` - Support for S3 handling now uses ``s3fs``, see :ref:`here ` - Google BigQuery support now uses the ``pandas-gbq`` library, see :ref:`here ` - Switched the test framework to use `pytest `__ (:issue:`13097`) @@ -44,10 +44,10 @@ New features ``agg`` API ^^^^^^^^^^^ -Series & DataFrame have been enhanced to support the aggregation API. This is an already familiar API that -is supported for groupby, window operations, and resampling. This allows one to express aggregation operations -in a single concise way by using :meth:`~DataFrame.agg`, -and :meth:`~DataFrame.transform`. The full documentation is :ref:`here ` (:issue:`1623`). +Series & DataFrame have been enhanced to support the aggregation API. This is a familiar API +from groupby, window operations, and resampling. This allows aggregation operations in a concise +by using :meth:`~DataFrame.agg`, and :meth:`~DataFrame.transform`. The full documentation +is :ref:`here ` (:issue:`1623`). Here is a sample @@ -66,28 +66,28 @@ Using a single function is equivalent to ``.apply``. df.agg('sum') -Multiple functions in lists. +Multiple aggregations with a list of functions. .. ipython:: python df.agg(['sum', 'min']) -Using a dict provides the ability to have selective aggregation per column. -You will get a matrix-like output of all of the aggregators. The output will consist -of all unique functions. Those that are not noted for a particular column will be ``NaN``: +Using a dict provides the ability to apply specific aggregations per column. +You will get a matrix-like output of all of the aggregators. The output has one column +per unique function. Those functions applied to a particular column will be ``NaN``: .. ipython:: python df.agg({'A' : ['sum', 'min'], 'B' : ['min', 'max']}) -The API also supports a ``.transform()`` function to provide for broadcasting results. +The API also supports a ``.transform()`` function for broadcasting results. .. ipython:: python :okwarning: df.transform(['abs', lambda x: x - x.min()]) -When presented with mixed dtypes that cannot aggregate, ``.agg()`` will only take the valid +When presented with mixed dtypes that cannot be aggregated, ``.agg()`` will only take the valid aggregations. This is similiar to how groupby ``.agg()`` works. (:issue:`15015`) .. ipython:: python @@ -107,7 +107,7 @@ aggregations. This is similiar to how groupby ``.agg()`` works. (:issue:`15015`) ``dtype`` keyword for data IO ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -The ``dtype`` keyword argument in the :func:`read_csv` function for specifying the types of parsed columns is now supported with the ``'python'`` engine (:issue:`14295`). See the :ref:`io docs ` for more information. +The ``'python'`` engine for :func:`read_csv` now accepts the ``dtype`` keyword argument for specifying the types of specific columns (:issue:`14295`). See the :ref:`io docs ` for more information. .. ipython:: python :suppress: @@ -156,7 +156,7 @@ Commonly called 'unix epoch' or POSIX time. This was the previous default, so th Groupby Enhancements ^^^^^^^^^^^^^^^^^^^^ -Strings passed to ``DataFrame.groupby()`` as the ``by`` parameter may now reference either column names or index level names (:issue:`5677`) +Strings passed to ``DataFrame.groupby()`` as the ``by`` parameter may now reference either column names or index level names. .. ipython:: python @@ -172,6 +172,9 @@ Strings passed to ``DataFrame.groupby()`` as the ``by`` parameter may now refere df.groupby(['second', 'A']).sum() +Previously, only column names could be referenced. (:issue:`5677`) + + .. _whatsnew_0200.enhancements.compressed_urls: Better support for compressed URLs in ``read_csv`` @@ -181,8 +184,8 @@ The compression code was refactored (:issue:`12688`). As a result, reading dataframes from URLs in :func:`read_csv` or :func:`read_table` now supports additional compression methods: ``xz``, ``bz2``, and ``zip`` (:issue:`14570`). Previously, only ``gzip`` compression was supported. By default, compression of -URLs and paths are now both inferred using their file extensions. Additionally, -support for bz2 compression in the python 2 c-engine improved (:issue:`14874`). +URLs and paths are now inferred using their file extensions. Additionally, +support for bz2 compression in the python 2 C-engine improved (:issue:`14874`). .. ipython:: python @@ -203,7 +206,7 @@ Pickle file I/O now supports compression :func:`read_pickle`, :meth:`DataFame.to_pickle` and :meth:`Series.to_pickle` can now read from and write to compressed pickle files. Compression methods can be an explicit parameter or be inferred from the file extension. -See :ref:`the docs here ` +See :ref:`the docs here. ` .. ipython:: python @@ -432,7 +435,7 @@ New behavior: c c.categories -Furthermore, this allows one to bin *other* data with these same bins, with ``NaN`` represents a missing +Furthermore, this allows one to bin *other* data with these same bins, with ``NaN`` representing a missing value similar to other dtypes. .. ipython:: python @@ -465,7 +468,7 @@ Selecting via a scalar value that is contained *in* the intervals. Other Enhancements ^^^^^^^^^^^^^^^^^^ -- ``DataFrame.rolling()`` now accepts the parameter ``closed='right'|'left'|'both'|'neither'`` to choose the rolling window endpoint closedness. See the :ref:`documentation ` (:issue:`13965`) +- ``DataFrame.rolling()`` now accepts the parameter ``closed='right'|'left'|'both'|'neither'`` to choose the rolling window-endpoint closedness. See the :ref:`documentation ` (:issue:`13965`) - Integration with the ``feather-format``, including a new top-level ``pd.read_feather()`` and ``DataFrame.to_feather()`` method, see :ref:`here `. - ``Series.str.replace()`` now accepts a callable, as replacement, which is passed to ``re.sub`` (:issue:`15055`) - ``Series.str.replace()`` now accepts a compiled regular expression as a pattern (:issue:`15446`) @@ -473,11 +476,9 @@ Other Enhancements - ``DataFrame`` has gained a ``nunique()`` method to count the distinct values over an axis (:issue:`14336`). - ``DataFrame`` has gained a ``melt()`` method, equivalent to ``pd.melt()``, for unpivoting from a wide to long format (:issue:`12640`). - ``DataFrame.groupby()`` has gained a ``.nunique()`` method to count the distinct values for all columns within each group (:issue:`14336`, :issue:`15197`). - - ``pd.read_excel()`` now preserves sheet order when using ``sheetname=None`` (:issue:`9930`) - Multiple offset aliases with decimal points are now supported (e.g. ``0.5min`` is parsed as ``30s``) (:issue:`8419`) - ``.isnull()`` and ``.notnull()`` have been added to ``Index`` object to make them more consistent with the ``Series`` API (:issue:`15300`) - - New ``UnsortedIndexError`` (subclass of ``KeyError``) raised when indexing/slicing into an unsorted MultiIndex (:issue:`11897`). This allows differentiation between errors due to lack of sorting or an incorrect key. See :ref:`here ` @@ -497,20 +498,19 @@ Other Enhancements - ``Timedelta.isoformat`` method added for formatting Timedeltas as an `ISO 8601 duration`_. See the :ref:`Timedelta docs ` (:issue:`15136`) - ``.select_dtypes()`` now allows the string ``datetimetz`` to generically select datetimes with tz (:issue:`14910`) - The ``.to_latex()`` method will now accept ``multicolumn`` and ``multirow`` arguments to use the accompanying LaTeX enhancements - - ``pd.merge_asof()`` gained the option ``direction='backward'|'forward'|'nearest'`` (:issue:`14887`) - ``Series/DataFrame.asfreq()`` have gained a ``fill_value`` parameter, to fill missing values (:issue:`3715`). - ``Series/DataFrame.resample.asfreq`` have gained a ``fill_value`` parameter, to fill missing values during resampling (:issue:`3715`). -- ``pandas.util.hashing`` has gained a ``hash_tuples`` routine, and ``hash_pandas_object`` has gained the ability to hash a ``MultiIndex`` (:issue:`15224`) +- :func:`pandas.util.hash_pandas_object` has gained the ability to hash a ``MultiIndex`` (:issue:`15224`) - ``Series/DataFrame.squeeze()`` have gained the ``axis`` parameter. (:issue:`15339`) - ``DataFrame.to_excel()`` has a new ``freeze_panes`` parameter to turn on Freeze Panes when exporting to Excel (:issue:`15160`) -- ``pd.read_html()`` will parse multiple header rows, creating a multiindex header. (:issue:`13434`). +- ``pd.read_html()`` will parse multiple header rows, creating a MutliIndex header. (:issue:`13434`). - HTML table output skips ``colspan`` or ``rowspan`` attribute if equal to 1. (:issue:`15403`) -- ``pd.io.api.Styler`` template now has blocks for easier extension, :ref:`see the example notebook ` (:issue:`15649`) +- :class:`pandas.io.formats.style.Styler`` template now has blocks for easier extension, :ref:`see the example notebook ` (:issue:`15649`) +- :meth:`pandas.io.formats.style.Styler.render` now accepts ``**kwargs`` to allow user-defined variables in the template (:issue:`15649`) - ``pd.io.api.Styler.render`` now accepts ``**kwargs`` to allow user-defined variables in the template (:issue:`15649`) -- Compatability with Jupyter notebook 5.0; MultiIndex column labels are left-aligned and MultiIndex row-labels are top-aligned (:issue:`15379`) - -- ``TimedeltaIndex`` now has a custom datetick formatter specifically designed for nanosecond level precision (:issue:`8711`) +- Compatibility with Jupyter notebook 5.0; MultiIndex column labels are left-aligned and MultiIndex row-labels are top-aligned (:issue:`15379`) +- ``TimedeltaIndex`` now has a custom date-tick formatter specifically designed for nanosecond level precision (:issue:`8711`) - ``pd.api.types.union_categoricals`` gained the ``ignore_ordered`` argument to allow ignoring the ordered attribute of unioned categoricals (:issue:`13410`). See the :ref:`categorical union docs ` for more information. - ``DataFrame.to_latex()`` and ``DataFrame.to_string()`` now allow optional header aliases. (:issue:`15536`) - Re-enable the ``parse_dates`` keyword of ``pd.read_excel()`` to parse string columns as dates (:issue:`14326`) @@ -524,9 +524,8 @@ Other Enhancements - ``pd.read_csv()`` now supports the ``error_bad_lines`` and ``warn_bad_lines`` arguments for the Python parser (:issue:`15925`) - The ``display.show_dimensions`` option can now also be used to specify whether the length of a ``Series`` should be shown in its repr (:issue:`7117`). -- ``parallel_coordinates()`` has gained a ``sort_labels`` keyword arg that sorts class labels and the colours assigned to them (:issue:`15908`) +- ``parallel_coordinates()`` has gained a ``sort_labels`` keyword argument that sorts class labels and the colors assigned to them (:issue:`15908`) - Options added to allow one to turn on/off using ``bottleneck`` and ``numexpr``, see :ref:`here ` (:issue:`16157`) - - ``DataFrame.style.bar()`` now accepts two more options to further customize the bar chart. Bar alignment is set with ``align='left'|'mid'|'zero'``, the default is "left", which is backward compatible; You can now pass a list of ``color=[color_negative, color_positive]``. (:issue:`14757`) @@ -653,7 +652,7 @@ Accessing datetime fields of Index now return Index The datetime-related attributes (see :ref:`here ` for an overview) of ``DatetimeIndex``, ``PeriodIndex`` and ``TimedeltaIndex`` previously returned numpy arrays. They will now return a new ``Index`` object, except -in the case of a boolean field, where the result will stil be a boolean ndarray. (:issue:`15022`) +in the case of a boolean field, where the result will still be a boolean ndarray. (:issue:`15022`) Previous behaviour: @@ -682,7 +681,7 @@ pd.unique will now be consistent with extension types ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ In prior versions, using ``Series.unique()`` and :func:`unique` on ``Categorical`` and tz-aware -datatypes would yield different return types. These are now made consistent. (:issue:`15903`) +data-types would yield different return types. These are now made consistent. (:issue:`15903`) - Datetime tz-aware @@ -1044,7 +1043,7 @@ HDFStore where string comparison ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ In previous versions most types could be compared to string column in a ``HDFStore`` -usually resulting in an invalid comparsion, returning an empty result frame. These comparisions will now raise a +usually resulting in an invalid comparison, returning an empty result frame. These comparisons will now raise a ``TypeError`` (:issue:`15492`) .. ipython:: python @@ -1085,8 +1084,8 @@ Index.intersection and inner join now preserve the order of the left Index ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ :meth:`Index.intersection` now preserves the order of the calling ``Index`` (left) -instead of the other ``Index`` (right) (:issue:`15582`). This affects the inner -joins, :meth:`DataFrame.join` and :func:`merge`, and the ``.align`` methods. +instead of the other ``Index`` (right) (:issue:`15582`). This affects inner +joins, :meth:`DataFrame.join` and :func:`merge`, and the ``.align`` method. - ``Index.intersection`` @@ -1141,7 +1140,7 @@ Pivot Table always returns a DataFrame ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ The documentation for :meth:`pivot_table` states that a ``DataFrame`` is *always* returned. Here a bug -is fixed that allowed this to return a ``Series`` under a narrow circumstance. (:issue:`4386`) +is fixed that allowed this to return a ``Series`` under certain circumstance. (:issue:`4386`) .. ipython:: python @@ -1199,7 +1198,6 @@ Other API Changes - ``NaT`` will now returns ``NaT`` for ``tz_localize`` and ``tz_convert`` methods (:issue:`15830`) - ``DataFrame`` and ``Panel`` constructors with invalid input will now raise ``ValueError`` rather than ``PandasError``, if called with scalar inputs and not axes (:issue:`15541`) - - ``DataFrame`` and ``Panel`` constructors with invalid input will now raise ``ValueError`` rather than ``pandas.core.common.PandasError``, if called with scalar inputs and not axes; The exception ``PandasError`` is removed as well. (:issue:`15541`) - The exception ``pandas.core.common.AmbiguousIndexError`` is removed as it is not referenced (:issue:`15541`) @@ -1324,7 +1322,6 @@ Deprecate ``.ix`` The ``.ix`` indexer is deprecated, in favor of the more strict ``.iloc`` and ``.loc`` indexers. ``.ix`` offers a lot of magic on the inference of what the user wants to do. To wit, ``.ix`` can decide to index *positionally* OR via *labels*, depending on the data type of the index. This has caused quite a bit of user confusion over the years. The full indexing documentation are :ref:`here `. (:issue:`14218`) - The recommended methods of indexing are: - ``.loc`` if you want to *label* index @@ -1720,7 +1717,7 @@ Reshaping - Bug in ``DataFrame.pivot_table()`` where ``dropna=True`` would not drop all-NaN columns when the columns was a ``category`` dtype (:issue:`15193`) - Bug in ``pd.melt()`` where passing a tuple value for ``value_vars`` caused a ``TypeError`` (:issue:`15348`) - Bug in ``pd.pivot_table()`` where no error was raised when values argument was not in the columns (:issue:`14938`) -- Bug in ``pd.concat()`` in which concatting with an empty dataframe with ``join='inner'`` was being improperly handled (:issue:`15328`) +- Bug in ``pd.concat()`` in which concatenating with an empty dataframe with ``join='inner'`` was being improperly handled (:issue:`15328`) - Bug with ``sort=True`` in ``DataFrame.join`` and ``pd.merge`` when joining on indexes (:issue:`15582`) - Bug in ``DataFrame.nsmallest`` and ``DataFrame.nlargest`` where identical values resulted in duplicated rows (:issue:`15297`) From a31c96d34d00dc757908b564dc93991e867d83e2 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 4 May 2017 21:25:09 -0500 Subject: [PATCH 517/933] RLS: v0.20.0 From ced39cb48a5f52d13f9d24bf2574adcc6b741947 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 5 May 2017 09:06:37 -0500 Subject: [PATCH 518/933] DOC: Updated release notes for 0.20.1 (#16251) * DOC: Updated release notes for 0.20.1 - Moved whatsnew for 0.20.1 -> 0.20.2 (unreleased) - Moved whatsnew for 0.20.0 -> 0.20.1 (releasing soon) - Added release note for https://github.com/pandas-dev/pandas/pull/16250 * DOC: Move 0.20.1 to a note * DOC: Simplify excel style example --- doc/source/whatsnew/v0.20.0.txt | 10 +++++++--- doc/source/whatsnew/{v0.20.1.txt => v0.20.2.txt} | 0 2 files changed, 7 insertions(+), 3 deletions(-) rename doc/source/whatsnew/{v0.20.1.txt => v0.20.2.txt} (100%) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 61042071a52ec..b0aac2aee4238 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -1,6 +1,6 @@ .. _whatsnew_0200: -v0.20.0 (May 4, 2017) +v0.20.1 (May 5, 2017) --------------------- This is a major release from 0.19.2 and includes a number of API changes, deprecations, new features, @@ -30,6 +30,11 @@ Highlights include: Check the :ref:`API Changes ` and :ref:`deprecations ` before updating. +.. note:: + + This is a combined release for 0.20.0 and and 0.20.1. + Version 0.20.1 contains one additional change for backwards-compatibility with downstream projects using pandas' ``utils`` routines. (:issue:`16250`) + .. contents:: What's new in v0.20.0 :local: :backlinks: none @@ -388,8 +393,7 @@ For example, after running the following, ``styled.xlsx`` renders as below: df styled = df.style.\ applymap(lambda val: 'color: %s' % 'red' if val < 0 else 'black').\ - apply(lambda s: ['background-color: yellow' if v else '' - for v in s == s.max()]) + highlight_max() styled.to_excel('styled.xlsx', engine='openpyxl') .. image:: _static/style-excel.png diff --git a/doc/source/whatsnew/v0.20.1.txt b/doc/source/whatsnew/v0.20.2.txt similarity index 100% rename from doc/source/whatsnew/v0.20.1.txt rename to doc/source/whatsnew/v0.20.2.txt From 4caa69596087afb1e2579d802f0e3697633d21ff Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 5 May 2017 16:35:49 +0200 Subject: [PATCH 519/933] DOC: add read_gbq as top-level in api.rst (#16256) --- doc/source/api.rst | 4 ---- doc/source/conf.py | 1 + 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/doc/source/api.rst b/doc/source/api.rst index c652573bc6677..cb5136df1ff8b 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -128,7 +128,6 @@ SQL Google BigQuery ~~~~~~~~~~~~~~~ -.. currentmodule:: pandas.io.gbq .. autosummary:: :toctree: generated/ @@ -136,9 +135,6 @@ Google BigQuery read_gbq -.. currentmodule:: pandas - - STATA ~~~~~ diff --git a/doc/source/conf.py b/doc/source/conf.py index 556e5f0227471..394fa44c30573 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -243,6 +243,7 @@ ('pandas.io.clipboard.read_clipboard', 'pandas.read_clipboard'), ('pandas.io.excel.ExcelFile.parse', 'pandas.ExcelFile.parse'), ('pandas.io.excel.read_excel', 'pandas.read_excel'), + ('pandas.io.gbq.read_gbq', 'pandas.read_gbq'), ('pandas.io.html.read_html', 'pandas.read_html'), ('pandas.io.json.read_json', 'pandas.read_json'), ('pandas.io.parsers.read_csv', 'pandas.read_csv'), From 9f33f3c7b68d65963f19ed893907068eda7815d9 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 5 May 2017 17:21:31 +0200 Subject: [PATCH 520/933] DOC: some reviewing of the 0.20 whatsnew file (#16254) --- doc/source/whatsnew/v0.20.0.txt | 114 ++++++++++++++------------------ pandas/core/indexes/interval.py | 3 + 2 files changed, 51 insertions(+), 66 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index b0aac2aee4238..a0bf2f9b3758a 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -14,14 +14,13 @@ Highlights include: - The ``.ix`` indexer has been deprecated, see :ref:`here ` - ``Panel`` has been deprecated, see :ref:`here ` - Addition of an ``IntervalIndex`` and ``Interval`` scalar type, see :ref:`here ` -- Improved user API when accessing levels in ``.groupby()``, see :ref:`here ` +- Improved user API when grouping by index levels in ``.groupby()``, see :ref:`here ` - Improved support for ``UInt64`` dtypes, see :ref:`here ` -- A new orient for JSON serialization, ``orient='table'``, that uses the :ref:`Table Schema spec ` -- Experimental support for exporting ``DataFrame.style`` formats to Excel, see :ref:`here ` +- A new orient for JSON serialization, ``orient='table'``, that uses the Table Schema spec and that gives the possibility for a more interactive repr in the Jupyter Notebook, see :ref:`here ` +- Experimental support for exporting styled DataFrames (``DataFrame.style``) to Excel, see :ref:`here ` - Window binary corr/cov operations now return a MultiIndexed ``DataFrame`` rather than a ``Panel``, as ``Panel`` is now deprecated, see :ref:`here ` - Support for S3 handling now uses ``s3fs``, see :ref:`here ` - Google BigQuery support now uses the ``pandas-gbq`` library, see :ref:`here ` -- Switched the test framework to use `pytest `__ (:issue:`13097`) .. warning:: @@ -46,12 +45,12 @@ New features .. _whatsnew_0200.enhancements.agg: -``agg`` API -^^^^^^^^^^^ +``agg`` API for DataFrame/Series +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Series & DataFrame have been enhanced to support the aggregation API. This is a familiar API -from groupby, window operations, and resampling. This allows aggregation operations in a concise -by using :meth:`~DataFrame.agg`, and :meth:`~DataFrame.transform`. The full documentation +from groupby, window operations, and resampling. This allows aggregation operations in a concise way +by using :meth:`~DataFrame.agg` and :meth:`~DataFrame.transform`. The full documentation is :ref:`here ` (:issue:`1623`). Here is a sample @@ -112,22 +111,14 @@ aggregations. This is similiar to how groupby ``.agg()`` works. (:issue:`15015`) ``dtype`` keyword for data IO ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -The ``'python'`` engine for :func:`read_csv` now accepts the ``dtype`` keyword argument for specifying the types of specific columns (:issue:`14295`). See the :ref:`io docs ` for more information. +The ``'python'`` engine for :func:`read_csv`, as well as the :func:`read_fwf` function for parsing +fixed-width text files and :func:`read_excel` for parsing Excel files, now accept the ``dtype`` keyword argument for specifying the types of specific columns (:issue:`14295`). See the :ref:`io docs ` for more information. .. ipython:: python :suppress: from pandas.compat import StringIO -.. ipython:: python - - data = "a,b\n1,2\n3,4" - pd.read_csv(StringIO(data), engine='python').dtypes - pd.read_csv(StringIO(data), engine='python', dtype={'a':'float64', 'b':'object'}).dtypes - -The ``dtype`` keyword argument is also now supported in the :func:`read_fwf` function for parsing -fixed-width text files, and :func:`read_excel` for parsing Excel files. - .. ipython:: python data = "a b\n1 2\n3 4" @@ -140,16 +131,16 @@ fixed-width text files, and :func:`read_excel` for parsing Excel files. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ :func:`to_datetime` has gained a new parameter, ``origin``, to define a reference date -from where to compute the resulting ``DatetimeIndex`` when ``unit`` is specified. (:issue:`11276`, :issue:`11745`) +from where to compute the resulting timestamps when parsing numerical values with a specific ``unit`` specified. (:issue:`11276`, :issue:`11745`) -Start with 1960-01-01 as the starting date +For example, with 1960-01-01 as the starting date: .. ipython:: python pd.to_datetime([1, 2, 3], unit='D', origin=pd.Timestamp('1960-01-01')) -The default is set at ``origin='unix'``, which defaults to ``1970-01-01 00:00:00``. -Commonly called 'unix epoch' or POSIX time. This was the previous default, so this is a backward compatible change. +The default is set at ``origin='unix'``, which defaults to ``1970-01-01 00:00:00``, which is +commonly called 'unix epoch' or POSIX time. This was the previous default, so this is a backward compatible change. .. ipython:: python @@ -161,7 +152,7 @@ Commonly called 'unix epoch' or POSIX time. This was the previous default, so th Groupby Enhancements ^^^^^^^^^^^^^^^^^^^^ -Strings passed to ``DataFrame.groupby()`` as the ``by`` parameter may now reference either column names or index level names. +Strings passed to ``DataFrame.groupby()`` as the ``by`` parameter may now reference either column names or index level names. Previously, only column names could be referenced. This allows to easily group by a column and index level at the same time. (:issue:`5677`) .. ipython:: python @@ -177,8 +168,6 @@ Strings passed to ``DataFrame.groupby()`` as the ``by`` parameter may now refere df.groupby(['second', 'A']).sum() -Previously, only column names could be referenced. (:issue:`5677`) - .. _whatsnew_0200.enhancements.compressed_urls: @@ -208,7 +197,7 @@ support for bz2 compression in the python 2 C-engine improved (:issue:`14874`). Pickle file I/O now supports compression ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -:func:`read_pickle`, :meth:`DataFame.to_pickle` and :meth:`Series.to_pickle` +:func:`read_pickle`, :meth:`DataFrame.to_pickle` and :meth:`Series.to_pickle` can now read from and write to compressed pickle files. Compression methods can be an explicit parameter or be inferred from the file extension. See :ref:`the docs here. ` @@ -226,33 +215,24 @@ Using an explicit compression type df.to_pickle("data.pkl.compress", compression="gzip") rt = pd.read_pickle("data.pkl.compress", compression="gzip") - rt - -Inferring compression type from the extension - -.. ipython:: python + rt.head() - df.to_pickle("data.pkl.xz", compression="infer") - rt = pd.read_pickle("data.pkl.xz", compression="infer") - rt - -The default is to ``infer``: +The default is to infer the compression type from the extension (``compression='infer'``): .. ipython:: python df.to_pickle("data.pkl.gz") rt = pd.read_pickle("data.pkl.gz") - rt + rt.head() df["A"].to_pickle("s1.pkl.bz2") rt = pd.read_pickle("s1.pkl.bz2") - rt + rt.head() .. ipython:: python :suppress: import os os.remove("data.pkl.compress") - os.remove("data.pkl.xz") os.remove("data.pkl.gz") os.remove("s1.pkl.bz2") @@ -298,7 +278,7 @@ In previous versions, ``.groupby(..., sort=False)`` would fail with a ``ValueErr ordered=True)}) df -Previous Behavior: +**Previous Behavior**: .. code-block:: ipython @@ -306,7 +286,7 @@ Previous Behavior: --------------------------------------------------------------------------- ValueError: items in new_categories are not the same as in old categories -New Behavior: +**New Behavior**: .. ipython:: python @@ -332,7 +312,7 @@ the data. df.to_json(orient='table') -See :ref:`IO: Table Schema for more`. +See :ref:`IO: Table Schema for more information `. Additionally, the repr for ``DataFrame`` and ``Series`` can now publish this JSON Table schema representation of the Series or DataFrame if you are @@ -415,6 +395,11 @@ pandas has gained an ``IntervalIndex`` with its own dtype, ``interval`` as well notation, specifically as a return type for the categories in :func:`cut` and :func:`qcut`. The ``IntervalIndex`` allows some unique indexing, see the :ref:`docs `. (:issue:`7640`, :issue:`8625`) +.. warning:: + + These indexing behaviors of the IntervalIndex are provisional and may change in a future version of pandas. Feedback on usage is welcome. + + Previous behavior: The returned categories were strings, representing Intervals @@ -477,9 +462,8 @@ Other Enhancements - ``Series.str.replace()`` now accepts a callable, as replacement, which is passed to ``re.sub`` (:issue:`15055`) - ``Series.str.replace()`` now accepts a compiled regular expression as a pattern (:issue:`15446`) - ``Series.sort_index`` accepts parameters ``kind`` and ``na_position`` (:issue:`13589`, :issue:`14444`) -- ``DataFrame`` has gained a ``nunique()`` method to count the distinct values over an axis (:issue:`14336`). +- ``DataFrame`` and ``DataFrame.groupby()`` have gained a ``nunique()`` method to count the distinct values over an axis (:issue:`14336`, :issue:`15197`). - ``DataFrame`` has gained a ``melt()`` method, equivalent to ``pd.melt()``, for unpivoting from a wide to long format (:issue:`12640`). -- ``DataFrame.groupby()`` has gained a ``.nunique()`` method to count the distinct values for all columns within each group (:issue:`14336`, :issue:`15197`). - ``pd.read_excel()`` now preserves sheet order when using ``sheetname=None`` (:issue:`9930`) - Multiple offset aliases with decimal points are now supported (e.g. ``0.5min`` is parsed as ``30s``) (:issue:`8419`) - ``.isnull()`` and ``.notnull()`` have been added to ``Index`` object to make them more consistent with the ``Series`` API (:issue:`15300`) @@ -510,9 +494,8 @@ Other Enhancements - ``DataFrame.to_excel()`` has a new ``freeze_panes`` parameter to turn on Freeze Panes when exporting to Excel (:issue:`15160`) - ``pd.read_html()`` will parse multiple header rows, creating a MutliIndex header. (:issue:`13434`). - HTML table output skips ``colspan`` or ``rowspan`` attribute if equal to 1. (:issue:`15403`) -- :class:`pandas.io.formats.style.Styler`` template now has blocks for easier extension, :ref:`see the example notebook ` (:issue:`15649`) -- :meth:`pandas.io.formats.style.Styler.render` now accepts ``**kwargs`` to allow user-defined variables in the template (:issue:`15649`) -- ``pd.io.api.Styler.render`` now accepts ``**kwargs`` to allow user-defined variables in the template (:issue:`15649`) +- :class:`pandas.io.formats.style.Styler` template now has blocks for easier extension, :ref:`see the example notebook ` (:issue:`15649`) +- :meth:`Styler.render() ` now accepts ``**kwargs`` to allow user-defined variables in the template (:issue:`15649`) - Compatibility with Jupyter notebook 5.0; MultiIndex column labels are left-aligned and MultiIndex row-labels are top-aligned (:issue:`15379`) - ``TimedeltaIndex`` now has a custom date-tick formatter specifically designed for nanosecond level precision (:issue:`8711`) - ``pd.api.types.union_categoricals`` gained the ``ignore_ordered`` argument to allow ignoring the ordered attribute of unioned categoricals (:issue:`13410`). See the :ref:`categorical union docs ` for more information. @@ -523,7 +506,7 @@ Other Enhancements - ``pandas.io.json.json_normalize()`` gained the option ``errors='ignore'|'raise'``; the default is ``errors='raise'`` which is backward compatible. (:issue:`14583`) - ``pandas.io.json.json_normalize()`` with an empty ``list`` will return an empty ``DataFrame`` (:issue:`15534`) - ``pandas.io.json.json_normalize()`` has gained a ``sep`` option that accepts ``str`` to separate joined fields; the default is ".", which is backward compatible. (:issue:`14883`) -- :meth:`~MultiIndex.remove_unused_levels` has been added to facilitate :ref:`removing unused levels `. (:issue:`15694`) +- :meth:`MultiIndex.remove_unused_levels` has been added to facilitate :ref:`removing unused levels `. (:issue:`15694`) - ``pd.read_csv()`` will now raise a ``ParserError`` error whenever any parsing error occurs (:issue:`15913`, :issue:`15925`) - ``pd.read_csv()`` now supports the ``error_bad_lines`` and ``warn_bad_lines`` arguments for the Python parser (:issue:`15925`) - The ``display.show_dimensions`` option can now also be used to specify @@ -546,7 +529,7 @@ Backwards incompatible API changes Possible incompatibility for HDF5 formats created with pandas < 0.13.0 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -``pd.TimeSeries`` was deprecated officially in 0.17.0, though has only been an alias since 0.13.0. It has +``pd.TimeSeries`` was deprecated officially in 0.17.0, though has already been an alias since 0.13.0. It has been dropped in favor of ``pd.Series``. (:issue:`15098`). This *may* cause HDF5 files that were created in prior versions to become unreadable if ``pd.TimeSeries`` @@ -684,7 +667,7 @@ ndarray, you can always convert explicitly using ``np.asarray(idx.hour)``. pd.unique will now be consistent with extension types ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -In prior versions, using ``Series.unique()`` and :func:`unique` on ``Categorical`` and tz-aware +In prior versions, using :meth:`Series.unique` and :func:`pandas.unique` on ``Categorical`` and tz-aware data-types would yield different return types. These are now made consistent. (:issue:`15903`) - Datetime tz-aware @@ -733,12 +716,12 @@ data-types would yield different return types. These are now made consistent. (: .. code-block:: ipython - In [1]: pd.Series(pd.Categorical(list('baabc'))).unique() + In [1]: pd.Series(list('baabc'), dtype='category').unique() Out[1]: [b, a, c] Categories (3, object): [b, a, c] - In [2]: pd.unique(pd.Series(pd.Categorical(list('baabc')))) + In [2]: pd.unique(pd.Series(list('baabc'), dtype='category')) Out[2]: array(['b', 'a', 'c'], dtype=object) New Behavior: @@ -746,8 +729,8 @@ data-types would yield different return types. These are now made consistent. (: .. ipython:: python # returns a Categorical - pd.Series(pd.Categorical(list('baabc'))).unique() - pd.unique(pd.Series(pd.Categorical(list('baabc'))).unique()) + pd.Series(list('baabc'), dtype='category').unique() + pd.unique(pd.Series(list('baabc'), dtype='category')) .. _whatsnew_0200.api_breaking.s3: @@ -808,8 +791,6 @@ Now the smallest acceptable dtype will be used (:issue:`13247`) df1 = pd.DataFrame(np.array([1.0], dtype=np.float32, ndmin=2)) df1.dtypes -.. ipython:: python - df2 = pd.DataFrame(np.array([np.nan], dtype=np.float32, ndmin=2)) df2.dtypes @@ -817,7 +798,7 @@ Previous Behavior: .. code-block:: ipython - In [7]: pd.concat([df1,df2]).dtypes + In [7]: pd.concat([df1, df2]).dtypes Out[7]: 0 float64 dtype: object @@ -826,7 +807,7 @@ New Behavior: .. ipython:: python - pd.concat([df1,df2]).dtypes + pd.concat([df1, df2]).dtypes .. _whatsnew_0200.api_breaking.gbq: @@ -1016,7 +997,7 @@ See the section on :ref:`Windowed Binary Operations ` for periods=100, freq='D', name='foo')) df.tail() -Old Behavior: +Previous Behavior: .. code-block:: ipython @@ -1232,12 +1213,12 @@ If indicated, a deprecation warning will be issued if you reference theses modul "pandas.algos", "pandas._libs.algos", "" "pandas.hashtable", "pandas._libs.hashtable", "" "pandas.indexes", "pandas.core.indexes", "" - "pandas.json", "pandas._libs.json", "X" + "pandas.json", "pandas._libs.json / pandas.io.json", "X" "pandas.parser", "pandas._libs.parsers", "X" "pandas.formats", "pandas.io.formats", "" "pandas.sparse", "pandas.core.sparse", "" - "pandas.tools", "pandas.core.reshape", "" - "pandas.types", "pandas.core.dtypes", "" + "pandas.tools", "pandas.core.reshape", "X" + "pandas.types", "pandas.core.dtypes", "X" "pandas.io.sas.saslib", "pandas.io.sas._sas", "" "pandas._join", "pandas._libs.join", "" "pandas._hash", "pandas._libs.hashing", "" @@ -1253,11 +1234,12 @@ exposed in the top-level namespace: ``pandas.errors``, ``pandas.plotting`` and certain functions in the ``pandas.io`` and ``pandas.tseries`` submodules, these are now the public subpackages. +Further changes: - The function :func:`~pandas.api.types.union_categoricals` is now importable from ``pandas.api.types``, formerly from ``pandas.types.concat`` (:issue:`15998`) - The type import ``pandas.tslib.NaTType`` is deprecated and can be replaced by using ``type(pandas.NaT)`` (:issue:`16146`) - The public functions in ``pandas.tools.hashing`` deprecated from that locations, but are now importable from ``pandas.util`` (:issue:`16223`) -- The modules in ``pandas.util``: ``decorators``, ``print_versions``, ``doctools``, `validators``, ``depr_module`` are now private (:issue:`16223`) +- The modules in ``pandas.util``: ``decorators``, ``print_versions``, ``doctools``, ``validators``, ``depr_module`` are now private. Only the functions exposed in ``pandas.util`` itself are public (:issue:`16223`) .. _whatsnew_0200.privacy.errors: @@ -1324,7 +1306,7 @@ Deprecations Deprecate ``.ix`` ^^^^^^^^^^^^^^^^^ -The ``.ix`` indexer is deprecated, in favor of the more strict ``.iloc`` and ``.loc`` indexers. ``.ix`` offers a lot of magic on the inference of what the user wants to do. To wit, ``.ix`` can decide to index *positionally* OR via *labels*, depending on the data type of the index. This has caused quite a bit of user confusion over the years. The full indexing documentation are :ref:`here `. (:issue:`14218`) +The ``.ix`` indexer is deprecated, in favor of the more strict ``.iloc`` and ``.loc`` indexers. ``.ix`` offers a lot of magic on the inference of what the user wants to do. To wit, ``.ix`` can decide to index *positionally* OR via *labels*, depending on the data type of the index. This has caused quite a bit of user confusion over the years. The full indexing documentation is :ref:`here `. (:issue:`14218`) The recommended methods of indexing are: @@ -1372,7 +1354,7 @@ Deprecate Panel ``Panel`` is deprecated and will be removed in a future version. The recommended way to represent 3-D data are with a ``MultiIndex`` on a ``DataFrame`` via the :meth:`~Panel.to_frame` or with the `xarray package `__. Pandas -provides a :meth:`~Panel.to_xarray` method to automate this conversion. See the documentation :ref:`Deprecate Panel `. (:issue:`13563`). +provides a :meth:`~Panel.to_xarray` method to automate this conversion. For more details see :ref:`Deprecate Panel ` documentation. (:issue:`13563`). .. ipython:: python :okwarning: @@ -1420,7 +1402,7 @@ This is an illustrative example: Here is a typical useful syntax for computing different aggregations for different columns. This is a natural, and useful syntax. We aggregate from the dict-to-list by taking the specified -columns and applying the list of functions. This returns a ``MultiIndex`` for the columns. +columns and applying the list of functions. This returns a ``MultiIndex`` for the columns (this is *not* deprecated). .. ipython:: python diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 8363cead01e56..b1523cd6c0d0c 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -99,6 +99,9 @@ class IntervalIndex(IntervalMixin, Index): .. versionadded:: 0.20.0 + Warning: the indexing behaviors are provisional and may change in + a future version of pandas. + Attributes ---------- left, right : array-like (1-dimensional) From 1d176187826fbb8ae0c5730db9cae9fbb11bc14f Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Fri, 5 May 2017 12:54:48 -0400 Subject: [PATCH 521/933] DEPR: add shims for util + TST: test that we work in downstream packages (#16250) --- ci/install_release_build.sh | 10 ++++ ci/install_travis.sh | 26 ++++----- ci/requirements-2.7_BUILD_TEST.pip | 7 +++ ci/requirements-2.7_BUILD_TEST.sh | 7 +++ pandas/tests/test_downstream.py | 85 ++++++++++++++++++++++++++++++ pandas/types/common.py | 8 +++ pandas/util/decorators.py | 8 +++ pandas/util/hashing.py | 18 +++++++ scripts/build_dist.sh | 4 +- 9 files changed, 159 insertions(+), 14 deletions(-) create mode 100644 ci/install_release_build.sh create mode 100644 ci/requirements-2.7_BUILD_TEST.pip create mode 100755 ci/requirements-2.7_BUILD_TEST.sh create mode 100644 pandas/tests/test_downstream.py create mode 100644 pandas/types/common.py create mode 100644 pandas/util/decorators.py create mode 100644 pandas/util/hashing.py diff --git a/ci/install_release_build.sh b/ci/install_release_build.sh new file mode 100644 index 0000000000000..f8373176643fa --- /dev/null +++ b/ci/install_release_build.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +# this requires cython to be installed + +# this builds the release cleanly +rm -rf dist +git clean -xfd +python setup.py clean +python setup.py cython +python setup.py sdist --formats=gztar diff --git a/ci/install_travis.sh b/ci/install_travis.sh index 09668cbccc9d2..601edded29f5a 100755 --- a/ci/install_travis.sh +++ b/ci/install_travis.sh @@ -123,12 +123,9 @@ if [ "$BUILD_TEST" ]; then # build & install testing echo ["Starting installation test."] - rm -rf dist - python setup.py clean - python setup.py build_ext --inplace - python setup.py sdist --formats=gztar - conda uninstall cython - pip install dist/*tar.gz || exit 1 + bash ci/install_release_build.sh + conda uninstall -y cython + time pip install dist/*tar.gz || exit 1 else @@ -162,14 +159,13 @@ if [ -e ${REQ} ]; then time bash $REQ || exit 1 fi -# finish install if we are not doing a build-testk -if [ -z "$BUILD_TEST" ]; then +# remove any installed pandas package +# w/o removing anything else +echo +echo "[removing installed pandas]" +conda remove pandas --force - # remove any installed pandas package - # w/o removing anything else - echo - echo "[removing installed pandas]" - conda remove pandas --force +if [ -z "$BUILD_TEST" ]; then # install our pandas echo @@ -178,6 +174,10 @@ if [ -z "$BUILD_TEST" ]; then fi +echo +echo "[show pandas]" +conda list pandas + echo echo "[done]" exit 0 diff --git a/ci/requirements-2.7_BUILD_TEST.pip b/ci/requirements-2.7_BUILD_TEST.pip new file mode 100644 index 0000000000000..a0fc77c40bc00 --- /dev/null +++ b/ci/requirements-2.7_BUILD_TEST.pip @@ -0,0 +1,7 @@ +xarray +geopandas +seaborn +pandas_gbq +pandas_datareader +statsmodels +scikit-learn diff --git a/ci/requirements-2.7_BUILD_TEST.sh b/ci/requirements-2.7_BUILD_TEST.sh new file mode 100755 index 0000000000000..78941fd0944e5 --- /dev/null +++ b/ci/requirements-2.7_BUILD_TEST.sh @@ -0,0 +1,7 @@ +#!/bin/bash + +source activate pandas + +echo "install 27 BUILD_TEST" + +conda install -n pandas -c conda-forge pyarrow dask diff --git a/pandas/tests/test_downstream.py b/pandas/tests/test_downstream.py new file mode 100644 index 0000000000000..2baedb82aa2a7 --- /dev/null +++ b/pandas/tests/test_downstream.py @@ -0,0 +1,85 @@ +""" +Testing that we work in the downstream packages +""" +import pytest +import numpy as np # noqa +from pandas import DataFrame +from pandas.util import testing as tm + + +@pytest.fixture +def df(): + return DataFrame({'A': [1, 2, 3]}) + + +def test_dask(df): + + toolz = pytest.importorskip('toolz') # noqa + dask = pytest.importorskip('dask') # noqa + + import dask.dataframe as dd + + ddf = dd.from_pandas(df, npartitions=3) + assert ddf.A is not None + assert ddf.compute() is not None + + +def test_xarray(df): + + xarray = pytest.importorskip('xarray') # noqa + + assert df.to_xarray() is not None + + +def test_statsmodels(): + + statsmodels = pytest.importorskip('statsmodels') # noqa + import statsmodels.api as sm + import statsmodels.formula.api as smf + df = sm.datasets.get_rdataset("Guerry", "HistData").data + smf.ols('Lottery ~ Literacy + np.log(Pop1831)', data=df).fit() + + +def test_scikit_learn(df): + + sklearn = pytest.importorskip('sklearn') # noqa + from sklearn import svm, datasets + + digits = datasets.load_digits() + clf = svm.SVC(gamma=0.001, C=100.) + clf.fit(digits.data[:-1], digits.target[:-1]) + clf.predict(digits.data[-1:]) + + +def test_seaborn(): + + seaborn = pytest.importorskip('seaborn') + tips = seaborn.load_dataset("tips") + seaborn.stripplot(x="day", y="total_bill", data=tips) + + +def test_pandas_gbq(df): + + pandas_gbq = pytest.importorskip('pandas-gbq') # noqa + + +@tm.network +def test_pandas_datareader(): + + pandas_datareader = pytest.importorskip('pandas-datareader') # noqa + pandas_datareader.get_data_yahoo('AAPL') + + +def test_geopandas(): + + geopandas = pytest.importorskip('geopandas') # noqa + fp = geopandas.datasets.get_path('naturalearth_lowres') + assert geopandas.read_file(fp) is not None + + +def test_pyarrow(df): + + pyarrow = pytest.importorskip('pyarrow') # noqa + table = pyarrow.Table.from_pandas(df) + result = table.to_pandas() + tm.assert_frame_equal(result, df) diff --git a/pandas/types/common.py b/pandas/types/common.py new file mode 100644 index 0000000000000..a125c27d04596 --- /dev/null +++ b/pandas/types/common.py @@ -0,0 +1,8 @@ +import warnings + +warnings.warn("pandas.types.common is deprecated and will be " + "removed in a future version, import " + "from pandas.api.types", + DeprecationWarning, stacklevel=3) + +from pandas.core.dtypes.common import * # noqa diff --git a/pandas/util/decorators.py b/pandas/util/decorators.py new file mode 100644 index 0000000000000..54bb834e829f3 --- /dev/null +++ b/pandas/util/decorators.py @@ -0,0 +1,8 @@ +import warnings + +warnings.warn("pandas.util.decorators is deprecated and will be " + "removed in a future version, import " + "from pandas.util", + DeprecationWarning, stacklevel=3) + +from pandas.util._decorators import * # noqa diff --git a/pandas/util/hashing.py b/pandas/util/hashing.py new file mode 100644 index 0000000000000..f97a7ac507407 --- /dev/null +++ b/pandas/util/hashing.py @@ -0,0 +1,18 @@ +import warnings +import sys + +m = sys.modules['pandas.util.hashing'] +for t in ['hash_pandas_object', 'hash_array']: + + def outer(t=t): + + def wrapper(*args, **kwargs): + from pandas import util + warnings.warn("pandas.util.hashing is deprecated and will be " + "removed in a future version, import " + "from pandas.util", + DeprecationWarning, stacklevel=3) + return getattr(util, t)(*args, **kwargs) + return wrapper + + setattr(m, t, outer(t)) diff --git a/scripts/build_dist.sh b/scripts/build_dist.sh index c9c36c18bed9c..d6a7d0ba67239 100755 --- a/scripts/build_dist.sh +++ b/scripts/build_dist.sh @@ -10,8 +10,10 @@ read -p "Ok to continue (y/n)? " answer case ${answer:0:1} in y|Y ) echo "Building distribution" + rm -rf dist + git clean -xfd python setup.py clean - python setup.py build_ext --inplace + python setup.py cython python setup.py sdist --formats=gztar ;; * ) From f947cfcc2d907688edcd7016157728a89578d339 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 5 May 2017 18:57:45 +0200 Subject: [PATCH 522/933] DOC: update release notes (#16259) --- doc/source/release.rst | 242 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 242 insertions(+) diff --git a/doc/source/release.rst b/doc/source/release.rst index f89fec9fb86e6..2587962299569 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -37,6 +37,248 @@ analysis / manipulation tool available in any language. * Binary installers on PyPI: http://pypi.python.org/pypi/pandas * Documentation: http://pandas.pydata.org +pandas 0.20.0 / 0.20.1 +---------------------- + +**Release date:** May 5, 2017 + + +This is a major release from 0.19.2 and includes a number of API changes, deprecations, new features, +enhancements, and performance improvements along with a large number of bug fixes. We recommend that all +users upgrade to this version. + +Highlights include: + +- New ``.agg()`` API for Series/DataFrame similar to the groupby-rolling-resample API's, see :ref:`here ` +- Integration with the ``feather-format``, including a new top-level ``pd.read_feather()`` and ``DataFrame.to_feather()`` method, see :ref:`here `. +- The ``.ix`` indexer has been deprecated, see :ref:`here ` +- ``Panel`` has been deprecated, see :ref:`here ` +- Addition of an ``IntervalIndex`` and ``Interval`` scalar type, see :ref:`here ` +- Improved user API when grouping by index levels in ``.groupby()``, see :ref:`here ` +- Improved support for ``UInt64`` dtypes, see :ref:`here ` +- A new orient for JSON serialization, ``orient='table'``, that uses the Table Schema spec and that gives the possibility for a more interactive repr in the Jupyter Notebook, see :ref:`here ` +- Experimental support for exporting styled DataFrames (``DataFrame.style``) to Excel, see :ref:`here ` +- Window binary corr/cov operations now return a MultiIndexed ``DataFrame`` rather than a ``Panel``, as ``Panel`` is now deprecated, see :ref:`here ` +- Support for S3 handling now uses ``s3fs``, see :ref:`here ` +- Google BigQuery support now uses the ``pandas-gbq`` library, see :ref:`here ` + +See the :ref:`v0.20.1 Whatsnew ` overview for an extensive list +of all enhancements and bugs that have been fixed in 0.20.1. + + +.. note:: + + This is a combined release for 0.20.0 and and 0.20.1. + Version 0.20.1 contains one additional change for backwards-compatibility with downstream projects using pandas' ``utils`` routines. (:issue:`16250`) + +Thanks +~~~~~~ + +- abaldenko +- Adam J. Stewart +- Adrian +- adrian-stepien +- Ajay Saxena +- Akash Tandon +- Albert Villanova del Moral +- Aleksey Bilogur +- alexandercbooth +- Alexis Mignon +- Amol Kahat +- Andreas Winkler +- Andrew Kittredge +- Anthonios Partheniou +- Arco Bast +- Ashish Singal +- atbd +- bastewart +- Baurzhan Muftakhidinov +- Ben Kandel +- Ben Thayer +- Ben Welsh +- Bill Chambers +- bmagnusson +- Brandon M. Burroughs +- Brian +- Brian McFee +- carlosdanielcsantos +- Carlos Souza +- chaimdemulder +- Chris +- chris-b1 +- Chris Ham +- Christopher C. Aycock +- Christoph Gohlke +- Christoph Paulik +- Chris Warth +- Clemens Brunner +- DaanVanHauwermeiren +- Daniel Himmelstein +- Dave Willmer +- David Cook +- David Gwynne +- David Hoffman +- David Krych +- dickreuter +- Diego Fernandez +- Dimitris Spathis +- discort +- Dmitry L +- Dody Suria Wijaya +- Dominik Stanczak +- Dr-Irv +- Dr. Irv +- dr-leo +- D.S. McNeil +- dubourg +- dwkenefick +- Elliott Sales de Andrade +- Ennemoser Christoph +- Francesc Alted +- Fumito Hamamura +- funnycrab +- gfyoung +- Giacomo Ferroni +- goldenbull +- Graham R. Jeffries +- Greg Williams +- Guilherme Beltramini +- Guilherme Samora +- Hao Wu +- Harshit Patni +- hesham.shabana@hotmail.com +- Ilya V. Schurov +- Iván Vallés Pérez +- Jackie Leng +- Jaehoon Hwang +- James Draper +- James Goppert +- James McBride +- James Santucci +- Jan Schulz +- Jeff Carey +- Jeff Reback +- JennaVergeynst +- Jim +- Jim Crist +- Joe Jevnik +- Joel Nothman +- John +- John Tucker +- John W. O'Brien +- John Zwinck +- jojomdt +- Jonathan de Bruin +- Jonathan Whitmore +- Jon Mease +- Jon M. Mease +- Joost Kranendonk +- Joris Van den Bossche +- Joshua Bradt +- Julian Santander +- Julien Marrec +- Jun Kim +- Justin Solinsky +- Kacawi +- Kamal Kamalaldin +- Kerby Shedden +- Kernc +- Keshav Ramaswamy +- Kevin Sheppard +- Kyle Kelley +- Larry Ren +- Leon Yin +- linebp +- Line Pedersen +- Lorenzo Cestaro +- Luca Scarabello +- Lukasz +- Mahmoud Lababidi +- manu +- manuels +- Mark Mandel +- Matthew Brett +- Matthew Roeschke +- mattip +- Matti Picus +- Matt Roeschke +- maxalbert +- Maximilian Roos +- mcocdawc +- Michael Charlton +- Michael Felt +- Michael Lamparski +- Michiel Stock +- Mikolaj Chwalisz +- Min RK +- Miroslav Šedivý +- Mykola Golubyev +- Nate Yoder +- Nathalie Rud +- Nicholas Ver Halen +- Nick Chmura +- Nolan Nichols +- nuffe +- Pankaj Pandey +- paul-mannino +- Pawel Kordek +- pbreach +- Pete Huang +- Peter +- Peter Csizsek +- Petio Petrov +- Phil Ruffwind +- Pietro Battiston +- Piotr Chromiec +- Prasanjit Prakash +- Robert Bradshaw +- Rob Forgione +- Robin +- Rodolfo Fernandez +- Roger Thomas +- Rouz Azari +- Sahil Dua +- sakkemo +- Sam Foo +- Sami Salonen +- Sarah Bird +- Sarma Tangirala +- scls19fr +- Scott Sanderson +- Sebastian Bank +- Sebastian Gsänger +- Sébastien de Menten +- Shawn Heide +- Shyam Saladi +- sinhrks +- Sinhrks +- Stephen Rauch +- stijnvanhoey +- Tara Adiseshan +- themrmax +- the-nose-knows +- Thiago Serafim +- Thoralf Gutierrez +- Thrasibule +- Tobias Gustafsson +- Tom Augspurger +- tomrod +- Tong Shen +- Tong SHEN +- TrigonaMinima +- tzinckgraf +- Uwe +- wandersoncferreira +- watercrossing +- wcwagner +- Wes Turner +- Wiktor Tomczak +- WillAyd +- xgdgsc +- Yaroslav Halchenko +- Yimeng Zhang +- yui-knk + pandas 0.19.2 ------------- From e346c663cf76186c22f4d3b703461b1b60db280f Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 5 May 2017 11:59:41 -0500 Subject: [PATCH 523/933] RLS: v0.20.1 From e5134306bd47db9f6d0f125d2cafd0b8a789e065 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 5 May 2017 14:27:57 -0500 Subject: [PATCH 524/933] DEV: Start 0.21.0 development cycle A tagged, empty commit to ensure that the version reported for master is always ahead of the 0.20.x backports branch. From ba60321d00d686a3137ffe26f919d64e84d43aa0 Mon Sep 17 00:00:00 2001 From: Andreas Winkler Date: Sat, 6 May 2017 12:09:00 +0200 Subject: [PATCH 525/933] DOC: tm.assert_almost_equal() check_exact param defaults to False (#16265) --- pandas/util/testing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/util/testing.py b/pandas/util/testing.py index f6b572cdf7179..4b610c505c574 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -127,7 +127,7 @@ def assert_almost_equal(left, right, check_exact=False, ---------- left : object right : object - check_exact : bool, default True + check_exact : bool, default False Whether to compare number exactly. check_dtype: bool, default True check dtype if both a and b are the same type From 8809b04ffe55013b7e6fd2eab4156b6c3665c9fe Mon Sep 17 00:00:00 2001 From: Pietro Battiston Date: Sat, 6 May 2017 23:50:14 +0200 Subject: [PATCH 526/933] BUG: support for "level=" when reset_index() is called with a single level Index (#16266) --- doc/source/whatsnew/v0.20.2.txt | 2 +- pandas/core/frame.py | 18 ++++++------ pandas/tests/frame/test_alter_axes.py | 37 ++++++++++++++++++++++++ pandas/tests/series/test_alter_axes.py | 39 ++++++++++++++++++++++++++ 4 files changed, 86 insertions(+), 10 deletions(-) diff --git a/doc/source/whatsnew/v0.20.2.txt b/doc/source/whatsnew/v0.20.2.txt index 504f8004bc8a6..f14a08876b6e8 100644 --- a/doc/source/whatsnew/v0.20.2.txt +++ b/doc/source/whatsnew/v0.20.2.txt @@ -42,7 +42,7 @@ Conversion Indexing ^^^^^^^^ - +- Bug in ``DataFrame.reset_index(level=)`` with single level index (:issue:`16263`) I/O diff --git a/pandas/core/frame.py b/pandas/core/frame.py index e6ea58e7e05be..8d437102e4d18 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3012,12 +3012,12 @@ def _maybe_casted_values(index, labels=None): return values new_index = _default_index(len(new_obj)) - if isinstance(self.index, MultiIndex): - if level is not None: - if not isinstance(level, (tuple, list)): - level = [level] - level = [self.index._get_level_number(lev) for lev in level] - if len(level) < len(self.index.levels): + if level is not None: + if not isinstance(level, (tuple, list)): + level = [level] + level = [self.index._get_level_number(lev) for lev in level] + if isinstance(self.index, MultiIndex): + if len(level) < self.index.nlevels: new_index = self.index.droplevel(level) if not drop: @@ -3033,6 +3033,8 @@ def _maybe_casted_values(index, labels=None): multi_col = isinstance(self.columns, MultiIndex) for i, (lev, lab) in reversed(list(enumerate(to_insert))): + if not (level is None or i in level): + continue name = names[i] if multi_col: col_name = (list(name) if isinstance(name, tuple) @@ -3049,11 +3051,9 @@ def _maybe_casted_values(index, labels=None): missing = self.columns.nlevels - len(name_lst) name_lst += [col_fill] * missing name = tuple(name_lst) - # to ndarray and maybe infer different dtype level_values = _maybe_casted_values(lev, lab) - if level is None or i in level: - new_obj.insert(0, name, level_values) + new_obj.insert(0, name, level_values) new_obj.index = new_index if not inplace: diff --git a/pandas/tests/frame/test_alter_axes.py b/pandas/tests/frame/test_alter_axes.py index e6313dfc602a8..fbfbcc14e9150 100644 --- a/pandas/tests/frame/test_alter_axes.py +++ b/pandas/tests/frame/test_alter_axes.py @@ -641,6 +641,43 @@ def test_reset_index(self): xp = xp.set_index(['B'], append=True) assert_frame_equal(rs, xp, check_names=False) + def test_reset_index_level(self): + df = pd.DataFrame([[1, 2, 3, 4], [5, 6, 7, 8]], + columns=['A', 'B', 'C', 'D']) + + for levels in ['A', 'B'], [0, 1]: + # With MultiIndex + result = df.set_index(['A', 'B']).reset_index(level=levels[0]) + tm.assert_frame_equal(result, df.set_index('B')) + + result = df.set_index(['A', 'B']).reset_index(level=levels[:1]) + tm.assert_frame_equal(result, df.set_index('B')) + + result = df.set_index(['A', 'B']).reset_index(level=levels) + tm.assert_frame_equal(result, df) + + result = df.set_index(['A', 'B']).reset_index(level=levels, + drop=True) + tm.assert_frame_equal(result, df[['C', 'D']]) + + # With single-level Index (GH 16263) + result = df.set_index('A').reset_index(level=levels[0]) + tm.assert_frame_equal(result, df) + + result = df.set_index('A').reset_index(level=levels[:1]) + tm.assert_frame_equal(result, df) + + result = df.set_index(['A']).reset_index(level=levels[0], + drop=True) + tm.assert_frame_equal(result, df[['B', 'C', 'D']]) + + # Missing levels - for both MultiIndex and single-level Index: + for idx_lev in ['A', 'B'], ['A']: + with tm.assert_raises_regex(KeyError, 'Level E '): + df.set_index(idx_lev).reset_index(level=['A', 'E']) + with tm.assert_raises_regex(IndexError, 'Too many levels'): + df.set_index(idx_lev).reset_index(level=[0, 1, 2]) + def test_reset_index_right_dtype(self): time = np.arange(0.0, 10, np.sqrt(2) / 2) s1 = Series((9.81 * time ** 2) / 2, diff --git a/pandas/tests/series/test_alter_axes.py b/pandas/tests/series/test_alter_axes.py index 150767ee9e2b2..98ae749aaa10e 100644 --- a/pandas/tests/series/test_alter_axes.py +++ b/pandas/tests/series/test_alter_axes.py @@ -141,6 +141,45 @@ def test_reset_index(self): tm.assert_index_equal(rs.index, Index(index.get_level_values(1))) assert isinstance(rs, Series) + def test_reset_index_level(self): + df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], + columns=['A', 'B', 'C']) + + for levels in ['A', 'B'], [0, 1]: + # With MultiIndex + s = df.set_index(['A', 'B'])['C'] + + result = s.reset_index(level=levels[0]) + tm.assert_frame_equal(result, df.set_index('B')) + + result = s.reset_index(level=levels[:1]) + tm.assert_frame_equal(result, df.set_index('B')) + + result = s.reset_index(level=levels) + tm.assert_frame_equal(result, df) + + result = df.set_index(['A', 'B']).reset_index(level=levels, + drop=True) + tm.assert_frame_equal(result, df[['C']]) + + with tm.assert_raises_regex(KeyError, 'Level E '): + s.reset_index(level=['A', 'E']) + + # With single-level Index + s = df.set_index('A')['B'] + + result = s.reset_index(level=levels[0]) + tm.assert_frame_equal(result, df[['A', 'B']]) + + result = s.reset_index(level=levels[:1]) + tm.assert_frame_equal(result, df[['A', 'B']]) + + result = s.reset_index(level=levels[0], drop=True) + tm.assert_series_equal(result, df['B']) + + with tm.assert_raises_regex(IndexError, 'Too many levels'): + s.reset_index(level=[0, 1, 2]) + def test_reset_index_range(self): # GH 12071 s = pd.Series(range(2), name='A', dtype='int64') From b9798161b37109020048c101753fd904536970ad Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sat, 6 May 2017 18:05:06 -0400 Subject: [PATCH 527/933] TST: release testing of downstream packages (#16261) --- .travis.yml | 2 +- ci/install_travis.sh | 27 +++++++----- ci/script_multi.sh | 20 ++++++--- ci/script_single.sh | 4 +- pandas/tests/test_downstream.py | 44 ++++++++++++++----- scripts/build_dist.sh | 6 +-- .../build_dist_for_release.sh | 2 +- 7 files changed, 67 insertions(+), 38 deletions(-) rename ci/install_release_build.sh => scripts/build_dist_for_release.sh (69%) diff --git a/.travis.yml b/.travis.yml index e5e05ed26da56..f0ece15de65db 100644 --- a/.travis.yml +++ b/.travis.yml @@ -123,7 +123,7 @@ after_success: after_script: - echo "after_script start" - - source activate pandas && python -c "import pandas; pandas.show_versions();" + - source activate pandas && cd /tmp && python -c "import pandas; pandas.show_versions();" - if [ -e /tmp/single.xml ]; then ci/print_skipped.py /tmp/single.xml; fi diff --git a/ci/install_travis.sh b/ci/install_travis.sh index 601edded29f5a..8cf6f2ce636da 100755 --- a/ci/install_travis.sh +++ b/ci/install_travis.sh @@ -119,15 +119,7 @@ if [ "$COVERAGE" ]; then fi echo -if [ "$BUILD_TEST" ]; then - - # build & install testing - echo ["Starting installation test."] - bash ci/install_release_build.sh - conda uninstall -y cython - time pip install dist/*tar.gz || exit 1 - -else +if [ -z "$BUILD_TEST" ]; then # build but don't install echo "[build em]" @@ -163,9 +155,22 @@ fi # w/o removing anything else echo echo "[removing installed pandas]" -conda remove pandas --force +conda remove pandas -y --force -if [ -z "$BUILD_TEST" ]; then +if [ "$BUILD_TEST" ]; then + + # remove any installation + pip uninstall -y pandas + conda list pandas + pip list --format columns |grep pandas + + # build & install testing + echo ["building release"] + bash scripts/build_dist_for_release.sh + conda uninstall -y cython + time pip install dist/*tar.gz || exit 1 + +else # install our pandas echo diff --git a/ci/script_multi.sh b/ci/script_multi.sh index 663d2feb5be23..daa929e177666 100755 --- a/ci/script_multi.sh +++ b/ci/script_multi.sh @@ -19,20 +19,26 @@ export PYTHONHASHSEED=$(python -c 'import random; print(random.randint(1, 429496 echo PYTHONHASHSEED=$PYTHONHASHSEED if [ "$BUILD_TEST" ]; then - echo "build-test" + echo "[build-test]" + + echo "[env]" + pip list --format columns |grep pandas + + echo "[running]" cd /tmp - pwd - conda list pandas - echo "running" - python -c "import pandas; pandas.test(['-n 2'])" + unset PYTHONPATH + python -c "import pandas; pandas.test(['-n 2', '--skip-slow', '--skip-network', '-r xX'])" + elif [ "$DOC" ]; then echo "We are not running pytest as this is a doc-build" + elif [ "$COVERAGE" ]; then echo pytest -s -n 2 -m "not single" --cov=pandas --cov-report xml:/tmp/cov-multiple.xml --junitxml=/tmp/multiple.xml $TEST_ARGS pandas pytest -s -n 2 -m "not single" --cov=pandas --cov-report xml:/tmp/cov-multiple.xml --junitxml=/tmp/multiple.xml $TEST_ARGS pandas + else - echo pytest -n 2 -m "not single" --junitxml=/tmp/multiple.xml $TEST_ARGS pandas - pytest -n 2 -m "not single" --junitxml=/tmp/multiple.xml $TEST_ARGS pandas # TODO: doctest + echo pytest -n 2 -r xX -m "not single" --junitxml=/tmp/multiple.xml $TEST_ARGS pandas + pytest -n 2 -r xX -m "not single" --junitxml=/tmp/multiple.xml $TEST_ARGS pandas # TODO: doctest fi RET="$?" diff --git a/ci/script_single.sh b/ci/script_single.sh index db637679f0e0f..245b4e6152c4d 100755 --- a/ci/script_single.sh +++ b/ci/script_single.sh @@ -20,8 +20,8 @@ elif [ "$COVERAGE" ]; then echo pytest -s -m "single" --cov=pandas --cov-report xml:/tmp/cov-single.xml --junitxml=/tmp/single.xml $TEST_ARGS pandas pytest -s -m "single" --cov=pandas --cov-report xml:/tmp/cov-single.xml --junitxml=/tmp/single.xml $TEST_ARGS pandas else - echo pytest -m "single" --junitxml=/tmp/single.xml $TEST_ARGS pandas - pytest -m "single" --junitxml=/tmp/single.xml $TEST_ARGS pandas # TODO: doctest + echo pytest -m "single" -r xX --junitxml=/tmp/single.xml $TEST_ARGS pandas + pytest -m "single" -r xX --junitxml=/tmp/single.xml $TEST_ARGS pandas # TODO: doctest fi RET="$?" diff --git a/pandas/tests/test_downstream.py b/pandas/tests/test_downstream.py index 2baedb82aa2a7..12976272cb8b1 100644 --- a/pandas/tests/test_downstream.py +++ b/pandas/tests/test_downstream.py @@ -4,7 +4,28 @@ import pytest import numpy as np # noqa from pandas import DataFrame +from pandas.compat import PY36 from pandas.util import testing as tm +import importlib + + +def import_module(name): + # we *only* want to skip if the module is truly not available + # and NOT just an actual import error because of pandas changes + + if PY36: + try: + return importlib.import_module(name) + except ModuleNotFoundError: # noqa + pytest.skip("skipping as {} not available".format(name)) + + else: + try: + return importlib.import_module(name) + except ImportError as e: + if "No module named" in str(e) and name in str(e): + pytest.skip("skipping as {} not available".format(name)) + raise @pytest.fixture @@ -14,8 +35,8 @@ def df(): def test_dask(df): - toolz = pytest.importorskip('toolz') # noqa - dask = pytest.importorskip('dask') # noqa + toolz = import_module('toolz') # noqa + dask = import_module('dask') # noqa import dask.dataframe as dd @@ -26,14 +47,14 @@ def test_dask(df): def test_xarray(df): - xarray = pytest.importorskip('xarray') # noqa + xarray = import_module('xarray') # noqa assert df.to_xarray() is not None def test_statsmodels(): - statsmodels = pytest.importorskip('statsmodels') # noqa + statsmodels = import_module('statsmodels') # noqa import statsmodels.api as sm import statsmodels.formula.api as smf df = sm.datasets.get_rdataset("Guerry", "HistData").data @@ -42,7 +63,7 @@ def test_statsmodels(): def test_scikit_learn(df): - sklearn = pytest.importorskip('sklearn') # noqa + sklearn = import_module('sklearn') # noqa from sklearn import svm, datasets digits = datasets.load_digits() @@ -53,33 +74,34 @@ def test_scikit_learn(df): def test_seaborn(): - seaborn = pytest.importorskip('seaborn') + seaborn = import_module('seaborn') tips = seaborn.load_dataset("tips") seaborn.stripplot(x="day", y="total_bill", data=tips) def test_pandas_gbq(df): - pandas_gbq = pytest.importorskip('pandas-gbq') # noqa + pandas_gbq = import_module('pandas_gbq') # noqa -@tm.network +@pytest.mark.xfail(reason=("pandas_datareader<=0.3.0 " + "broken w.r.t. pandas >= 0.20.0")) def test_pandas_datareader(): - pandas_datareader = pytest.importorskip('pandas-datareader') # noqa + pandas_datareader = import_module('pandas_datareader') # noqa pandas_datareader.get_data_yahoo('AAPL') def test_geopandas(): - geopandas = pytest.importorskip('geopandas') # noqa + geopandas = import_module('geopandas') # noqa fp = geopandas.datasets.get_path('naturalearth_lowres') assert geopandas.read_file(fp) is not None def test_pyarrow(df): - pyarrow = pytest.importorskip('pyarrow') # noqa + pyarrow = import_module('pyarrow') # noqa table = pyarrow.Table.from_pandas(df) result = table.to_pandas() tm.assert_frame_equal(result, df) diff --git a/scripts/build_dist.sh b/scripts/build_dist.sh index d6a7d0ba67239..c3f849ce7a6eb 100755 --- a/scripts/build_dist.sh +++ b/scripts/build_dist.sh @@ -10,11 +10,7 @@ read -p "Ok to continue (y/n)? " answer case ${answer:0:1} in y|Y ) echo "Building distribution" - rm -rf dist - git clean -xfd - python setup.py clean - python setup.py cython - python setup.py sdist --formats=gztar + ./build_dist_for_release.sh ;; * ) echo "Not building distribution" diff --git a/ci/install_release_build.sh b/scripts/build_dist_for_release.sh similarity index 69% rename from ci/install_release_build.sh rename to scripts/build_dist_for_release.sh index f8373176643fa..e77974ae08b0c 100644 --- a/ci/install_release_build.sh +++ b/scripts/build_dist_for_release.sh @@ -2,7 +2,7 @@ # this requires cython to be installed -# this builds the release cleanly +# this builds the release cleanly & is building on the current checkout rm -rf dist git clean -xfd python setup.py clean From b8f6556cad57e60a4a522ff6574003b40c06f688 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sat, 6 May 2017 18:20:12 -0400 Subject: [PATCH 528/933] DOC: add whatsnew 0.20.2 to display (#16273) --- doc/source/whatsnew.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/source/whatsnew.rst b/doc/source/whatsnew.rst index d6fb1c6a8f9cc..ffaeeb78c2799 100644 --- a/doc/source/whatsnew.rst +++ b/doc/source/whatsnew.rst @@ -18,6 +18,8 @@ What's New These are new features and improvements of note in each release. +.. include:: whatsnew/v0.20.2.txt + .. include:: whatsnew/v0.20.0.txt .. include:: whatsnew/v0.19.2.txt From 82b9a172e871fbf6448511bc1d3ee546688b9539 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sat, 6 May 2017 18:38:10 -0400 Subject: [PATCH 529/933] DOC: add 0.21.0 to whatsnew --- doc/source/whatsnew.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/source/whatsnew.rst b/doc/source/whatsnew.rst index ffaeeb78c2799..b1f9990a3e6af 100644 --- a/doc/source/whatsnew.rst +++ b/doc/source/whatsnew.rst @@ -18,6 +18,8 @@ What's New These are new features and improvements of note in each release. +.. include:: whatsnew/v0.21.0.txt + .. include:: whatsnew/v0.20.2.txt .. include:: whatsnew/v0.20.0.txt From 1fdcb3a7e05bba36c9b5781e8f1d996293f6127c Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sat, 6 May 2017 19:22:03 -0400 Subject: [PATCH 530/933] TST: remove xfailing css tests (#16272) --- pandas/tests/io/formats/test_css.py | 76 +----------------------- pandas/tests/io/formats/test_to_excel.py | 17 +----- 2 files changed, 2 insertions(+), 91 deletions(-) diff --git a/pandas/tests/io/formats/test_css.py b/pandas/tests/io/formats/test_css.py index 44f95266b6c78..c07856dc63602 100644 --- a/pandas/tests/io/formats/test_css.py +++ b/pandas/tests/io/formats/test_css.py @@ -29,32 +29,6 @@ def test_css_parse_normalisation(name, norm, abnorm): assert_same_resolution(norm, abnorm) -@pytest.mark.xfail(reason='CSS comments not yet stripped') -def test_css_parse_comments(): - assert_same_resolution('hello: world', - 'hello/* foo */:/* bar \n */ world /*;not:here*/') - - -@pytest.mark.xfail(reason='''we don't need to handle specificity - markers like !important, but we should - ignore them in the future''') -def test_css_parse_specificity(): - assert_same_resolution('font-weight: bold', 'font-weight: bold !important') - - -@pytest.mark.xfail(reason='Splitting CSS declarations not yet sensitive to ' - '; in CSS strings') -def test_css_parse_strings(): - # semicolons in strings - with tm.assert_produces_warning(CSSWarning): - assert_resolves( - 'background-image: url(\'http://blah.com/foo?a;b=c\')', - {'background-image': 'url(\'http://blah.com/foo?a;b=c\')'}) - assert_resolves( - 'background-image: url("http://blah.com/foo?a;b=c")', - {'background-image': 'url("http://blah.com/foo?a;b=c")'}) - - @pytest.mark.parametrize( 'invalid_css,remainder', [ # No colon @@ -62,15 +36,7 @@ def test_css_parse_strings(): ('border-style: solid; hello-world', 'border-style: solid'), ('border-style: solid; hello-world; font-weight: bold', 'border-style: solid; font-weight: bold'), - # Unclosed string - pytest.mark.xfail(('background-image: "abc', ''), - reason='Unclosed CSS strings not detected'), - pytest.mark.xfail(('font-family: "abc', ''), - reason='Unclosed CSS strings not detected'), - pytest.mark.xfail(('background-image: \'abc', ''), - reason='Unclosed CSS strings not detected'), - pytest.mark.xfail(('font-family: \'abc', ''), - reason='Unclosed CSS strings not detected'), + # Unclosed string fail # Invalid size ('font-size: blah', 'font-size: 1em'), ('font-size: 1a2b', 'font-size: 1em'), @@ -124,46 +90,6 @@ def test_css_side_shorthands(shorthand, expansions): {}) -@pytest.mark.xfail(reason='CSS font shorthand not yet handled') -@pytest.mark.parametrize('css,props', [ - ('font: italic bold 12pt helvetica,sans-serif', - {'font-family': 'helvetica,sans-serif', - 'font-style': 'italic', - 'font-weight': 'bold', - 'font-size': '12pt'}), - ('font: bold italic 12pt helvetica,sans-serif', - {'font-family': 'helvetica,sans-serif', - 'font-style': 'italic', - 'font-weight': 'bold', - 'font-size': '12pt'}), -]) -def test_css_font_shorthand(css, props): - assert_resolves(css, props) - - -@pytest.mark.xfail(reason='CSS background shorthand not yet handled') -@pytest.mark.parametrize('css,props', [ - ('background: blue', {'background-color': 'blue'}), - ('background: fixed blue', - {'background-color': 'blue', 'background-attachment': 'fixed'}), -]) -def test_css_background_shorthand(css, props): - assert_resolves(css, props) - - -@pytest.mark.xfail(reason='CSS border shorthand not yet handled') -@pytest.mark.parametrize('style,equiv', [ - ('border: 1px solid red', - 'border-width: 1px; border-style: solid; border-color: red'), - ('border: solid red 1px', - 'border-width: 1px; border-style: solid; border-color: red'), - ('border: red solid', - 'border-style: solid; border-color: red'), -]) -def test_css_border_shorthand(style, equiv): - assert_same_resolution(style, equiv) - - @pytest.mark.parametrize('style,inherited,equiv', [ ('margin: 1px; margin: 2px', '', 'margin: 2px'), diff --git a/pandas/tests/io/formats/test_to_excel.py b/pandas/tests/io/formats/test_to_excel.py index fff5299921270..cdff3b8a5cca8 100644 --- a/pandas/tests/io/formats/test_to_excel.py +++ b/pandas/tests/io/formats/test_to_excel.py @@ -73,14 +73,7 @@ ('text-shadow: 0px -0em 2px #CCC', {'font': {'shadow': True}}), ('text-shadow: 0px -0em 2px', {'font': {'shadow': True}}), ('text-shadow: 0px -2em', {'font': {'shadow': True}}), - pytest.mark.xfail(('text-shadow: #CCC 3px 3px 3px', - {'font': {'shadow': True}}), - reason='text-shadow with color preceding width not yet ' - 'identified as shadow'), - pytest.mark.xfail(('text-shadow: #999 0px 0px 0px', - {'font': {'shadow': False}}), - reason='text-shadow with color preceding zero width not ' - 'yet identified as non-shadow'), + # FILL # - color, fillType ('background-color: red', {'fill': {'fgColor': 'FF0000', @@ -209,11 +202,3 @@ def test_css_to_excel_multiple(): def test_css_to_excel_inherited(css, inherited, expected): convert = CSSToExcelConverter(inherited) assert expected == convert(css) - - -@pytest.mark.xfail(reason='We are not currently warning for all unconverted ' - 'CSS, but possibly should') -def test_css_to_excel_warns_when_not_supported(): - convert = CSSToExcelConverter() - with pytest.warns(UserWarning): - convert('background: red') From ea56550ded81ff20e6ac77548958231a895264f3 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sat, 6 May 2017 19:39:28 -0400 Subject: [PATCH 531/933] DOC: change 0.20.1 whatsnew text -> 0.20.2 (#16274) --- doc/source/whatsnew/v0.20.2.txt | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/doc/source/whatsnew/v0.20.2.txt b/doc/source/whatsnew/v0.20.2.txt index f14a08876b6e8..e0a8065d9a507 100644 --- a/doc/source/whatsnew/v0.20.2.txt +++ b/doc/source/whatsnew/v0.20.2.txt @@ -1,6 +1,6 @@ -.. _whatsnew_0201: +.. _whatsnew_0202: -v0.20.1 (???) +v0.20.2 (???) ------------- This is a minor bug-fix release in the 0.20.x series and includes some small regression fixes, @@ -9,26 +9,26 @@ We recommend that all users upgrade to this version. Highlights include: -.. contents:: What's new in v0.20.1 +.. contents:: What's new in v0.20.2 :local: :backlinks: none -.. _whatsnew_0201.enhancements: +.. _whatsnew_0202.enhancements: Enhancements ~~~~~~~~~~~~ -.. _whatsnew_0201.performance: +.. _whatsnew_0202.performance: Performance Improvements ~~~~~~~~~~~~~~~~~~~~~~~~ -.. _whatsnew_0201.bug_fixes: +.. _whatsnew_0202.bug_fixes: Bug Fixes ~~~~~~~~~ From d50f981d768c3f9b870b8eb0be8f50783467e3be Mon Sep 17 00:00:00 2001 From: gfyoung Date: Sun, 7 May 2017 17:40:58 -0400 Subject: [PATCH 532/933] MAINT: Clean up pandas/util/testing.py (#16271) Transform testing methods to use more pytest idiom. Also, assume that dateutil and pytz are installed for testing because there are core dependencies for pandas. --- pandas/tests/groupby/test_timegrouper.py | 7 +- pandas/tests/indexes/datetimes/test_astype.py | 22 ++---- .../indexes/datetimes/test_construction.py | 8 +-- .../indexes/datetimes/test_date_range.py | 13 ++-- .../tests/indexes/datetimes/test_datetime.py | 7 +- .../tests/indexes/datetimes/test_indexing.py | 6 +- pandas/tests/indexes/datetimes/test_ops.py | 10 +-- pandas/tests/indexes/datetimes/test_setops.py | 3 +- pandas/tests/indexes/datetimes/test_tools.py | 17 ++--- pandas/tests/io/formats/test_format.py | 14 ++-- pandas/tests/io/json/test_ujson.py | 10 ++- pandas/tests/io/test_pytables.py | 1 - pandas/tests/plotting/common.py | 2 +- pandas/tests/plotting/test_boxplot_method.py | 2 +- pandas/tests/plotting/test_datetimelike.py | 2 +- pandas/tests/plotting/test_deprecated.py | 2 +- pandas/tests/plotting/test_frame.py | 2 +- pandas/tests/plotting/test_groupby.py | 2 +- pandas/tests/plotting/test_hist_method.py | 2 +- pandas/tests/plotting/test_misc.py | 2 +- pandas/tests/plotting/test_series.py | 2 +- pandas/tests/reshape/test_concat.py | 4 +- pandas/tests/scalar/test_period.py | 3 +- pandas/tests/scalar/test_timestamp.py | 52 ++++---------- pandas/tests/series/test_combine_concat.py | 10 +-- pandas/tests/series/test_indexing.py | 3 - pandas/tests/series/test_operators.py | 5 +- pandas/tests/test_multilevel.py | 6 +- pandas/tests/test_resample.py | 12 ++-- pandas/tests/tseries/test_offsets.py | 8 +-- pandas/tests/tseries/test_timezones.py | 42 +++--------- pandas/util/testing.py | 68 ++++--------------- 32 files changed, 90 insertions(+), 259 deletions(-) diff --git a/pandas/tests/groupby/test_timegrouper.py b/pandas/tests/groupby/test_timegrouper.py index 2196318d1920e..70b6b1e439691 100644 --- a/pandas/tests/groupby/test_timegrouper.py +++ b/pandas/tests/groupby/test_timegrouper.py @@ -1,6 +1,7 @@ """ test with the TimeGrouper / grouping with datetimes """ import pytest +import pytz from datetime import datetime import numpy as np @@ -569,10 +570,8 @@ def test_groupby_with_timezone_selection(self): tm.assert_series_equal(df1, df2) def test_timezone_info(self): - # GH 11682 - # Timezone info lost when broadcasting scalar datetime to DataFrame - tm._skip_if_no_pytz() - import pytz + # see gh-11682: Timezone info lost when broadcasting + # scalar datetime to DataFrame df = pd.DataFrame({'a': [1], 'b': [datetime.now(pytz.utc)]}) assert df['b'][0].tzinfo == pytz.utc diff --git a/pandas/tests/indexes/datetimes/test_astype.py b/pandas/tests/indexes/datetimes/test_astype.py index 0f7acf1febae8..46be24b90faae 100644 --- a/pandas/tests/indexes/datetimes/test_astype.py +++ b/pandas/tests/indexes/datetimes/test_astype.py @@ -1,8 +1,12 @@ import pytest +import pytz +import dateutil import numpy as np from datetime import datetime +from dateutil.tz import tzlocal + import pandas as pd import pandas.util.testing as tm from pandas import (DatetimeIndex, date_range, Series, NaT, Index, Timestamp, @@ -124,8 +128,6 @@ def test_astype_raises(self): pytest.raises(ValueError, idx.astype, 'datetime64[D]') def test_index_convert_to_datetime_array(self): - tm._skip_if_no_pytz() - def _check_rng(rng): converted = rng.to_pydatetime() assert isinstance(converted, np.ndarray) @@ -143,9 +145,6 @@ def _check_rng(rng): _check_rng(rng_utc) def test_index_convert_to_datetime_array_explicit_pytz(self): - tm._skip_if_no_pytz() - import pytz - def _check_rng(rng): converted = rng.to_pydatetime() assert isinstance(converted, np.ndarray) @@ -164,9 +163,6 @@ def _check_rng(rng): _check_rng(rng_utc) def test_index_convert_to_datetime_array_dateutil(self): - tm._skip_if_no_dateutil() - import dateutil - def _check_rng(rng): converted = rng.to_pydatetime() assert isinstance(converted, np.ndarray) @@ -209,8 +205,6 @@ def test_to_period_microsecond(self): assert period[1] == Period('2007-01-01 10:11:13.789123Z', 'U') def test_to_period_tz_pytz(self): - tm._skip_if_no_pytz() - from dateutil.tz import tzlocal from pytz import utc as UTC xp = date_range('1/1/2000', '4/1/2000').to_period() @@ -240,10 +234,6 @@ def test_to_period_tz_pytz(self): tm.assert_index_equal(ts.to_period(), xp) def test_to_period_tz_explicit_pytz(self): - tm._skip_if_no_pytz() - import pytz - from dateutil.tz import tzlocal - xp = date_range('1/1/2000', '4/1/2000').to_period() ts = date_range('1/1/2000', '4/1/2000', tz=pytz.timezone('US/Eastern')) @@ -271,10 +261,6 @@ def test_to_period_tz_explicit_pytz(self): tm.assert_index_equal(ts.to_period(), xp) def test_to_period_tz_dateutil(self): - tm._skip_if_no_dateutil() - import dateutil - from dateutil.tz import tzlocal - xp = date_range('1/1/2000', '4/1/2000').to_period() ts = date_range('1/1/2000', '4/1/2000', tz='dateutil/US/Eastern') diff --git a/pandas/tests/indexes/datetimes/test_construction.py b/pandas/tests/indexes/datetimes/test_construction.py index fcfc56ea823da..cf896b06130a2 100644 --- a/pandas/tests/indexes/datetimes/test_construction.py +++ b/pandas/tests/indexes/datetimes/test_construction.py @@ -1,5 +1,6 @@ import pytest +import pytz import numpy as np from datetime import timedelta @@ -350,10 +351,7 @@ def test_constructor_coverage(self): pytest.raises(ValueError, DatetimeIndex, periods=10, freq='D') def test_constructor_datetime64_tzformat(self): - # GH 6572 - tm._skip_if_no_pytz() - import pytz - # ISO 8601 format results in pytz.FixedOffset + # see gh-6572: ISO 8601 format results in pytz.FixedOffset for freq in ['AS', 'W-SUN']: idx = date_range('2013-01-01T00:00:00-05:00', '2016-01-01T23:59:59-05:00', freq=freq) @@ -376,8 +374,6 @@ def test_constructor_datetime64_tzformat(self): tz='Asia/Tokyo') tm.assert_numpy_array_equal(idx.asi8, expected_i8.asi8) - tm._skip_if_no_dateutil() - # Non ISO 8601 format results in dateutil.tz.tzoffset for freq in ['AS', 'W-SUN']: idx = date_range('2013/1/1 0:00:00-5:00', '2016/1/1 23:59:59-5:00', diff --git a/pandas/tests/indexes/datetimes/test_date_range.py b/pandas/tests/indexes/datetimes/test_date_range.py index 0586ea9c4db2b..62686b356dc30 100644 --- a/pandas/tests/indexes/datetimes/test_date_range.py +++ b/pandas/tests/indexes/datetimes/test_date_range.py @@ -6,6 +6,7 @@ import pytest import numpy as np +from pytz import timezone from datetime import datetime, timedelta, time import pandas as pd @@ -299,10 +300,7 @@ def test_range_bug(self): tm.assert_index_equal(result, DatetimeIndex(exp_values)) def test_range_tz_pytz(self): - # GH 2906 - tm._skip_if_no_pytz() - from pytz import timezone - + # see gh-2906 tz = timezone('US/Eastern') start = tz.localize(datetime(2011, 1, 1)) end = tz.localize(datetime(2011, 1, 3)) @@ -323,9 +321,6 @@ def test_range_tz_pytz(self): assert dr[2] == end def test_range_tz_dst_straddle_pytz(self): - - tm._skip_if_no_pytz() - from pytz import timezone tz = timezone('US/Eastern') dates = [(tz.localize(datetime(2014, 3, 6)), tz.localize(datetime(2014, 3, 12))), @@ -349,8 +344,8 @@ def test_range_tz_dst_straddle_pytz(self): assert np.all(dr.hour == 0) def test_range_tz_dateutil(self): - # GH 2906 - tm._skip_if_no_dateutil() + # see gh-2906 + # Use maybe_get_tz to fix filename in tz under dateutil. from pandas._libs.tslib import maybe_get_tz tz = lambda x: maybe_get_tz('dateutil/' + x) diff --git a/pandas/tests/indexes/datetimes/test_datetime.py b/pandas/tests/indexes/datetimes/test_datetime.py index 96c8da546ff9d..6cba7e17abf8e 100644 --- a/pandas/tests/indexes/datetimes/test_datetime.py +++ b/pandas/tests/indexes/datetimes/test_datetime.py @@ -3,6 +3,7 @@ import numpy as np from datetime import date, timedelta, time +import dateutil import pandas as pd import pandas.util.testing as tm from pandas.compat import lrange @@ -363,11 +364,7 @@ def test_map(self): tm.assert_index_equal(result, exp) def test_iteration_preserves_tz(self): - - tm._skip_if_no_dateutil() - - # GH 8890 - import dateutil + # see gh-8890 index = date_range("2012-01-01", periods=3, freq='H', tz='US/Eastern') for i, ts in enumerate(index): diff --git a/pandas/tests/indexes/datetimes/test_indexing.py b/pandas/tests/indexes/datetimes/test_indexing.py index a9ea028c9d0f7..4ef5cc5499f4d 100644 --- a/pandas/tests/indexes/datetimes/test_indexing.py +++ b/pandas/tests/indexes/datetimes/test_indexing.py @@ -1,5 +1,6 @@ import pytest +import pytz import numpy as np import pandas as pd import pandas.util.testing as tm @@ -97,10 +98,7 @@ def test_insert(self): assert result.name == expected.name assert result.freq is None - # GH 7299 - tm._skip_if_no_pytz() - import pytz - + # see gh-7299 idx = date_range('1/1/2000', periods=3, freq='D', tz='Asia/Tokyo', name='idx') with pytest.raises(ValueError): diff --git a/pandas/tests/indexes/datetimes/test_ops.py b/pandas/tests/indexes/datetimes/test_ops.py index 80e93a1f76a66..f33cdf8800791 100644 --- a/pandas/tests/indexes/datetimes/test_ops.py +++ b/pandas/tests/indexes/datetimes/test_ops.py @@ -1,4 +1,6 @@ +import pytz import pytest +import dateutil import warnings import numpy as np from datetime import timedelta @@ -1177,13 +1179,9 @@ def test_summary(self): self.rng[2:2].summary() def test_summary_pytz(self): - tm._skip_if_no_pytz() - import pytz bdate_range('1/1/2005', '1/1/2009', tz=pytz.utc).summary() def test_summary_dateutil(self): - tm._skip_if_no_dateutil() - import dateutil bdate_range('1/1/2005', '1/1/2009', tz=dateutil.tz.tzutc()).summary() def test_equals(self): @@ -1279,13 +1277,9 @@ def test_summary(self): self.rng[2:2].summary() def test_summary_pytz(self): - tm._skip_if_no_pytz() - import pytz cdate_range('1/1/2005', '1/1/2009', tz=pytz.utc).summary() def test_summary_dateutil(self): - tm._skip_if_no_dateutil() - import dateutil cdate_range('1/1/2005', '1/1/2009', tz=dateutil.tz.tzutc()).summary() def test_equals(self): diff --git a/pandas/tests/indexes/datetimes/test_setops.py b/pandas/tests/indexes/datetimes/test_setops.py index f3af7dd30c27f..f43c010f59b9e 100644 --- a/pandas/tests/indexes/datetimes/test_setops.py +++ b/pandas/tests/indexes/datetimes/test_setops.py @@ -306,7 +306,6 @@ def test_intersection_bug(self): tm.assert_index_equal(result, b) def test_month_range_union_tz_pytz(self): - tm._skip_if_no_pytz() from pytz import timezone tz = timezone('US/Eastern') @@ -325,7 +324,7 @@ def test_month_range_union_tz_pytz(self): def test_month_range_union_tz_dateutil(self): tm._skip_if_windows_python_3() - tm._skip_if_no_dateutil() + from pandas._libs.tslib import _dateutil_gettz as timezone tz = timezone('US/Eastern') diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index 648df01be5289..61f7ac8abaf09 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -1,10 +1,13 @@ """ test to_datetime """ import sys +import pytz import pytest import locale import calendar +import dateutil import numpy as np +from dateutil.parser import parse from datetime import datetime, date, time from distutils.version import LooseVersion @@ -244,11 +247,7 @@ def test_to_datetime_tz(self): pytest.raises(ValueError, lambda: pd.to_datetime(arr)) def test_to_datetime_tz_pytz(self): - - # xref 8260 - tm._skip_if_no_pytz() - import pytz - + # see gh-8260 us_eastern = pytz.timezone('US/Eastern') arr = np.array([us_eastern.localize(datetime(year=2000, month=1, day=1, hour=3, minute=0)), @@ -1124,8 +1123,6 @@ def test_parsers_quarter_invalid(self): pytest.raises(ValueError, tools.parse_time_string, case) def test_parsers_dayfirst_yearfirst(self): - tm._skip_if_no_dateutil() - # OK # 2.5.1 10-11-12 [dayfirst=0, yearfirst=0] -> 2012-10-11 00:00:00 # 2.5.2 10-11-12 [dayfirst=0, yearfirst=1] -> 2012-10-11 00:00:00 @@ -1166,7 +1163,6 @@ def test_parsers_dayfirst_yearfirst(self): # 2.5.2 20/12/21 [dayfirst=1, yearfirst=0] -> 2021-12-20 00:00:00 # 2.5.3 20/12/21 [dayfirst=1, yearfirst=0] -> 2021-12-20 00:00:00 - import dateutil is_lt_253 = dateutil.__version__ < LooseVersion('2.5.3') # str : dayfirst, yearfirst, expected @@ -1187,7 +1183,6 @@ def test_parsers_dayfirst_yearfirst(self): (True, True, datetime(2020, 12, 21))]} - from dateutil.parser import parse for date_str, values in compat.iteritems(cases): for dayfirst, yearfirst, expected in values: @@ -1221,9 +1216,6 @@ def test_parsers_dayfirst_yearfirst(self): assert result4 == expected def test_parsers_timestring(self): - tm._skip_if_no_dateutil() - from dateutil.parser import parse - # must be the same as dateutil result cases = {'10:15': (parse('10:15'), datetime(1, 1, 1, 10, 15)), '9:05': (parse('9:05'), datetime(1, 1, 1, 9, 5))} @@ -1365,7 +1357,6 @@ def test_parsers_iso8601(self): class TestArrayToDatetime(object): def test_try_parse_dates(self): - from dateutil.parser import parse arr = np.array(['5/1/2000', '6/1/2000', '7/1/2000'], dtype=object) result = lib.try_parse_dates(arr, dayfirst=True) diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index 3f08013e05ac8..4431108a55963 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -7,6 +7,8 @@ from __future__ import print_function import re +import pytz +import dateutil import itertools from operator import methodcaller import os @@ -1548,17 +1550,15 @@ def get_ipython(): def test_pprint_pathological_object(self): """ - if the test fails, the stack will overflow and nose crash, - but it won't hang. + If the test fails, it at least won't hang. """ class A: - def __getitem__(self, key): return 3 # obviously simplified df = DataFrame([A()]) - repr(df) # just don't dine + repr(df) # just don't die def test_float_trim_zeros(self): vals = [2.08430917305e+10, 3.52205017305e+10, 2.30674817305e+10, @@ -2508,10 +2508,6 @@ def test_no_tz(self): assert str(ts_nanos_micros) == "1970-01-01 00:00:00.000001200" def test_tz_pytz(self): - tm._skip_if_no_pytz() - - import pytz - dt_date = datetime(2013, 1, 2, tzinfo=pytz.utc) assert str(dt_date) == str(Timestamp(dt_date)) @@ -2522,8 +2518,6 @@ def test_tz_pytz(self): assert str(dt_datetime_us) == str(Timestamp(dt_datetime_us)) def test_tz_dateutil(self): - tm._skip_if_no_dateutil() - import dateutil utc = dateutil.tz.tzutc() dt_date = datetime(2013, 1, 2, tzinfo=utc) diff --git a/pandas/tests/io/json/test_ujson.py b/pandas/tests/io/json/test_ujson.py index 86b0e5a0c6a2d..662f06dbb725e 100644 --- a/pandas/tests/io/json/test_ujson.py +++ b/pandas/tests/io/json/test_ujson.py @@ -5,12 +5,14 @@ except ImportError: import simplejson as json import math +import pytz import pytest import time import datetime import calendar import re import decimal +import dateutil from functools import partial from pandas.compat import range, zip, StringIO, u import pandas._libs.json as ujson @@ -396,18 +398,14 @@ def test_encodeTimeConversion(self): assert expected == output def test_encodeTimeConversion_pytz(self): - # GH11473 to_json segfaults with timezone-aware datetimes - tm._skip_if_no_pytz() - import pytz + # see gh-11473: to_json segfaults with timezone-aware datetimes test = datetime.time(10, 12, 15, 343243, pytz.utc) output = ujson.encode(test) expected = '"%s"' % test.isoformat() assert expected == output def test_encodeTimeConversion_dateutil(self): - # GH11473 to_json segfaults with timezone-aware datetimes - tm._skip_if_no_dateutil() - import dateutil + # see gh-11473: to_json segfaults with timezone-aware datetimes test = datetime.time(10, 12, 15, 343243, dateutil.tz.tzutc()) output = ujson.encode(test) expected = '"%s"' % test.isoformat() diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index ee44fea55e51a..873bb20b3bba9 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -5243,7 +5243,6 @@ def _compare_with_tz(self, a, b): def test_append_with_timezones_dateutil(self): from datetime import timedelta - tm._skip_if_no_dateutil() # use maybe_get_tz instead of dateutil.tz.gettz to handle the windows # filename issues. diff --git a/pandas/tests/plotting/common.py b/pandas/tests/plotting/common.py index 1dbba676e4bc5..3ab443b223f20 100644 --- a/pandas/tests/plotting/common.py +++ b/pandas/tests/plotting/common.py @@ -23,7 +23,7 @@ This is a common base class used for various plotting tests """ -tm._skip_module_if_no_mpl() +tm._skip_if_no_mpl() def _skip_if_no_scipy_gaussian_kde(): diff --git a/pandas/tests/plotting/test_boxplot_method.py b/pandas/tests/plotting/test_boxplot_method.py index 1e06c13980657..a4c70f7945347 100644 --- a/pandas/tests/plotting/test_boxplot_method.py +++ b/pandas/tests/plotting/test_boxplot_method.py @@ -21,7 +21,7 @@ """ Test cases for .boxplot method """ -tm._skip_module_if_no_mpl() +tm._skip_if_no_mpl() def _skip_if_mpl_14_or_dev_boxplot(): diff --git a/pandas/tests/plotting/test_datetimelike.py b/pandas/tests/plotting/test_datetimelike.py index ed198de11bac1..3e7e789fa7de7 100644 --- a/pandas/tests/plotting/test_datetimelike.py +++ b/pandas/tests/plotting/test_datetimelike.py @@ -20,7 +20,7 @@ from pandas.tests.plotting.common import (TestPlotBase, _skip_if_no_scipy_gaussian_kde) -tm._skip_module_if_no_mpl() +tm._skip_if_no_mpl() class TestTSPlot(TestPlotBase): diff --git a/pandas/tests/plotting/test_deprecated.py b/pandas/tests/plotting/test_deprecated.py index 48030df48deca..ca03bcb060e25 100644 --- a/pandas/tests/plotting/test_deprecated.py +++ b/pandas/tests/plotting/test_deprecated.py @@ -18,7 +18,7 @@ pandas.tools.plotting """ -tm._skip_module_if_no_mpl() +tm._skip_if_no_mpl() class TestDeprecatedNameSpace(TestPlotBase): diff --git a/pandas/tests/plotting/test_frame.py b/pandas/tests/plotting/test_frame.py index 4a4a71d7ea639..9abbb348fbfa8 100644 --- a/pandas/tests/plotting/test_frame.py +++ b/pandas/tests/plotting/test_frame.py @@ -27,7 +27,7 @@ _skip_if_no_scipy_gaussian_kde, _ok_for_gaussian_kde) -tm._skip_module_if_no_mpl() +tm._skip_if_no_mpl() class TestDataFramePlots(TestPlotBase): diff --git a/pandas/tests/plotting/test_groupby.py b/pandas/tests/plotting/test_groupby.py index 8dcf73bce03c0..de48b58133e9a 100644 --- a/pandas/tests/plotting/test_groupby.py +++ b/pandas/tests/plotting/test_groupby.py @@ -10,7 +10,7 @@ from pandas.tests.plotting.common import TestPlotBase -tm._skip_module_if_no_mpl() +tm._skip_if_no_mpl() class TestDataFrameGroupByPlots(TestPlotBase): diff --git a/pandas/tests/plotting/test_hist_method.py b/pandas/tests/plotting/test_hist_method.py index c3e32f52e0474..17a75e5cb287c 100644 --- a/pandas/tests/plotting/test_hist_method.py +++ b/pandas/tests/plotting/test_hist_method.py @@ -15,7 +15,7 @@ from pandas.tests.plotting.common import (TestPlotBase, _check_plot_works) -tm._skip_module_if_no_mpl() +tm._skip_if_no_mpl() class TestSeriesPlots(TestPlotBase): diff --git a/pandas/tests/plotting/test_misc.py b/pandas/tests/plotting/test_misc.py index 9eace32aa19a3..d93ad90a36a9c 100644 --- a/pandas/tests/plotting/test_misc.py +++ b/pandas/tests/plotting/test_misc.py @@ -17,7 +17,7 @@ from pandas.tests.plotting.common import (TestPlotBase, _check_plot_works, _ok_for_gaussian_kde) -tm._skip_module_if_no_mpl() +tm._skip_if_no_mpl() class TestSeriesPlots(TestPlotBase): diff --git a/pandas/tests/plotting/test_series.py b/pandas/tests/plotting/test_series.py index 448661c7af0e9..340a98484480f 100644 --- a/pandas/tests/plotting/test_series.py +++ b/pandas/tests/plotting/test_series.py @@ -22,7 +22,7 @@ _skip_if_no_scipy_gaussian_kde, _ok_for_gaussian_kde) -tm._skip_module_if_no_mpl() +tm._skip_if_no_mpl() class TestSeriesPlots(TestPlotBase): diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py index 4dfa2904313ce..7486c32f57fdb 100644 --- a/pandas/tests/reshape/test_concat.py +++ b/pandas/tests/reshape/test_concat.py @@ -1,5 +1,6 @@ from warnings import catch_warnings +import dateutil import numpy as np from numpy.random import randn @@ -1780,9 +1781,6 @@ def test_concat_tz_series_with_datetimelike(self): def test_concat_tz_series_tzlocal(self): # see gh-13583 - tm._skip_if_no_dateutil() - import dateutil - x = [pd.Timestamp('2011-01-01', tz=dateutil.tz.tzlocal()), pd.Timestamp('2011-02-01', tz=dateutil.tz.tzlocal())] y = [pd.Timestamp('2012-01-01', tz=dateutil.tz.tzlocal()), diff --git a/pandas/tests/scalar/test_period.py b/pandas/tests/scalar/test_period.py index 54366dc9b1c3f..931d6b2b8f1f0 100644 --- a/pandas/tests/scalar/test_period.py +++ b/pandas/tests/scalar/test_period.py @@ -1,5 +1,6 @@ import pytest +import pytz import numpy as np from datetime import datetime, date, timedelta @@ -210,8 +211,6 @@ def test_period_cons_combined(self): Period('2011-01', freq='1D1W') def test_timestamp_tz_arg(self): - tm._skip_if_no_pytz() - import pytz for case in ['Europe/Brussels', 'Asia/Tokyo', 'US/Pacific']: p = Period('1/1/2005', freq='M').to_timestamp(tz=case) exp = Timestamp('1/1/2005', tz='UTC').tz_convert(case) diff --git a/pandas/tests/scalar/test_timestamp.py b/pandas/tests/scalar/test_timestamp.py index 5caa0252b69b8..7cd1a7db0f9fe 100644 --- a/pandas/tests/scalar/test_timestamp.py +++ b/pandas/tests/scalar/test_timestamp.py @@ -1,12 +1,18 @@ """ test the scalar Timestamp """ import sys +import pytz import pytest +import dateutil import operator import calendar import numpy as np + +from dateutil.tz import tzutc +from pytz import timezone, utc from datetime import datetime, timedelta from distutils.version import LooseVersion +from pytz.exceptions import AmbiguousTimeError, NonExistentTimeError import pandas.util.testing as tm from pandas.tseries import offsets, frequencies @@ -44,10 +50,6 @@ def test_constructor(self): Timestamp('2014-07-01 09:00:00.000000005'), base_expected + 5)] - tm._skip_if_no_pytz() - tm._skip_if_no_dateutil() - import pytz - import dateutil timezones = [(None, 0), ('UTC', 0), (pytz.utc, 0), ('Asia/Tokyo', 9), ('US/Eastern', -4), ('dateutil/US/Pacific', -7), (pytz.FixedOffset(-180), -3), @@ -100,10 +102,6 @@ def test_constructor_with_stringoffset(self): ('2014-07-01 11:00:00.000008000+02:00', base_expected + 8000), ('2014-07-01 11:00:00.000000005+02:00', base_expected + 5)] - tm._skip_if_no_pytz() - tm._skip_if_no_dateutil() - import pytz - import dateutil timezones = [(None, 0), ('UTC', 0), (pytz.utc, 0), ('Asia/Tokyo', 9), ('US/Eastern', -4), ('dateutil/US/Pacific', -7), (pytz.FixedOffset(-180), -3), @@ -274,14 +272,10 @@ def test_conversion(self): assert result.dtype == expected.dtype def test_repr(self): - tm._skip_if_no_pytz() - tm._skip_if_no_dateutil() - dates = ['2014-03-07', '2014-01-01 09:00', '2014-01-01 00:00:00.000000001'] # dateutil zone change (only matters for repr) - import dateutil if (dateutil.__version__ >= LooseVersion('2.3') and (dateutil.__version__ <= LooseVersion('2.4.0') or dateutil.__version__ >= LooseVersion('2.6.0'))): @@ -330,8 +324,6 @@ def test_repr(self): # This can cause the tz field to be populated, but it's redundant to # include this information in the date-string. - tm._skip_if_no_pytz() - import pytz # noqa date_with_utc_offset = Timestamp('2014-03-13 00:00:00-0400', tz=None) assert '2014-03-13 00:00:00-0400' in repr(date_with_utc_offset) assert 'tzoffset' not in repr(date_with_utc_offset) @@ -399,8 +391,7 @@ def test_tz_localize_ambiguous(self): Timestamp('2011-01-01').tz_convert('Asia/Tokyo') def test_tz_localize_nonexistent(self): - # See issue 13057 - from pytz.exceptions import NonExistentTimeError + # see gh-13057 times = ['2015-03-08 02:00', '2015-03-08 02:30', '2015-03-29 02:00', '2015-03-29 02:30'] timezones = ['US/Eastern', 'US/Pacific', @@ -414,8 +405,7 @@ def test_tz_localize_nonexistent(self): assert ts.tz_localize(tz, errors='coerce') is NaT def test_tz_localize_errors_ambiguous(self): - # See issue 13057 - from pytz.exceptions import AmbiguousTimeError + # see gh-13057 ts = Timestamp('2015-11-1 01:00') pytest.raises(AmbiguousTimeError, ts.tz_localize, 'US/Pacific', errors='coerce') @@ -709,9 +699,6 @@ def _check_round(freq, expected): stamp.round('foo') def test_class_ops_pytz(self): - tm._skip_if_no_pytz() - from pytz import timezone - def compare(x, y): assert (int(Timestamp(x).value / 1e9) == int(Timestamp(y).value / 1e9)) @@ -732,9 +719,6 @@ def compare(x, y): datetime.combine(date_component, time_component)) def test_class_ops_dateutil(self): - tm._skip_if_no_dateutil() - from dateutil.tz import tzutc - def compare(x, y): assert (int(np.round(Timestamp(x).value / 1e9)) == int(np.round(Timestamp(y).value / 1e9))) @@ -910,8 +894,7 @@ def test_compare_invalid(self): tm.assert_series_equal(a / b, 1 / (b / a)) def test_cant_compare_tz_naive_w_aware(self): - tm._skip_if_no_pytz() - # #1404 + # see gh-1404 a = Timestamp('3/12/2012') b = Timestamp('3/12/2012', tz='utc') @@ -932,9 +915,7 @@ def test_cant_compare_tz_naive_w_aware(self): assert not a.to_pydatetime() == b def test_cant_compare_tz_naive_w_aware_explicit_pytz(self): - tm._skip_if_no_pytz() - from pytz import utc - # #1404 + # see gh-1404 a = Timestamp('3/12/2012') b = Timestamp('3/12/2012', tz=utc) @@ -955,12 +936,9 @@ def test_cant_compare_tz_naive_w_aware_explicit_pytz(self): assert not a.to_pydatetime() == b def test_cant_compare_tz_naive_w_aware_dateutil(self): - tm._skip_if_no_dateutil() - from dateutil.tz import tzutc - utc = tzutc() - # #1404 + # see gh-1404 a = Timestamp('3/12/2012') - b = Timestamp('3/12/2012', tz=utc) + b = Timestamp('3/12/2012', tz=tzutc()) pytest.raises(Exception, a.__eq__, b) pytest.raises(Exception, a.__ne__, b) @@ -1282,7 +1260,6 @@ def test_compare_hour13(self): class TestTimeSeries(object): def test_timestamp_to_datetime(self): - tm._skip_if_no_pytz() rng = date_range('20090415', '20090519', tz='US/Eastern') stamp = rng[0] @@ -1291,7 +1268,6 @@ def test_timestamp_to_datetime(self): assert stamp.tzinfo == dtval.tzinfo def test_timestamp_to_datetime_dateutil(self): - tm._skip_if_no_pytz() rng = date_range('20090415', '20090519', tz='dateutil/US/Eastern') stamp = rng[0] @@ -1300,8 +1276,6 @@ def test_timestamp_to_datetime_dateutil(self): assert stamp.tzinfo == dtval.tzinfo def test_timestamp_to_datetime_explicit_pytz(self): - tm._skip_if_no_pytz() - import pytz rng = date_range('20090415', '20090519', tz=pytz.timezone('US/Eastern')) @@ -1312,7 +1286,7 @@ def test_timestamp_to_datetime_explicit_pytz(self): def test_timestamp_to_datetime_explicit_dateutil(self): tm._skip_if_windows_python_3() - tm._skip_if_no_dateutil() + from pandas._libs.tslib import _dateutil_gettz as gettz rng = date_range('20090415', '20090519', tz=gettz('US/Eastern')) diff --git a/pandas/tests/series/test_combine_concat.py b/pandas/tests/series/test_combine_concat.py index bb998b7fa55dd..71ac00975af03 100644 --- a/pandas/tests/series/test_combine_concat.py +++ b/pandas/tests/series/test_combine_concat.py @@ -246,9 +246,7 @@ def test_append_concat(self): assert rng1.append(rng2).name is None def test_append_concat_tz(self): - # GH 2938 - tm._skip_if_no_pytz() - + # see gh-2938 rng = date_range('5/8/2012 1:45', periods=10, freq='5T', tz='US/Eastern') rng2 = date_range('5/8/2012 2:35', periods=10, freq='5T', @@ -269,8 +267,7 @@ def test_append_concat_tz(self): tm.assert_index_equal(appended, rng3) def test_append_concat_tz_explicit_pytz(self): - # GH 2938 - tm._skip_if_no_pytz() + # see gh-2938 from pytz import timezone as timezone rng = date_range('5/8/2012 1:45', periods=10, freq='5T', @@ -293,8 +290,7 @@ def test_append_concat_tz_explicit_pytz(self): tm.assert_index_equal(appended, rng3) def test_append_concat_tz_dateutil(self): - # GH 2938 - tm._skip_if_no_dateutil() + # see gh-2938 rng = date_range('5/8/2012 1:45', periods=10, freq='5T', tz='dateutil/US/Eastern') rng2 = date_range('5/8/2012 2:35', periods=10, freq='5T', diff --git a/pandas/tests/series/test_indexing.py b/pandas/tests/series/test_indexing.py index 7f876357ad3ab..6ded4d593a571 100644 --- a/pandas/tests/series/test_indexing.py +++ b/pandas/tests/series/test_indexing.py @@ -338,9 +338,7 @@ def test_getitem_setitem_slice_integers(self): assert not (s[4:] == 0).any() def test_getitem_setitem_datetime_tz_pytz(self): - tm._skip_if_no_pytz() from pytz import timezone as tz - from pandas import date_range N = 50 @@ -374,7 +372,6 @@ def test_getitem_setitem_datetime_tz_pytz(self): assert_series_equal(result, ts) def test_getitem_setitem_datetime_tz_dateutil(self): - tm._skip_if_no_dateutil() from dateutil.tz import tzutc from pandas._libs.tslib import _dateutil_gettz as gettz diff --git a/pandas/tests/series/test_operators.py b/pandas/tests/series/test_operators.py index db0d06aa35a2a..2e400812e0331 100644 --- a/pandas/tests/series/test_operators.py +++ b/pandas/tests/series/test_operators.py @@ -2,6 +2,7 @@ # pylint: disable-msg=E1101,W0612 import pytest +import pytz from collections import Iterable from datetime import datetime, timedelta @@ -725,9 +726,7 @@ def run_ops(ops, get_ser, test_ser): pytest.raises(TypeError, lambda: td2 - dt2) def test_sub_datetime_compat(self): - # GH 14088 - tm._skip_if_no_pytz() - import pytz + # see gh-14088 s = Series([datetime(2016, 8, 23, 12, tzinfo=pytz.utc), pd.NaT]) dt = datetime(2016, 8, 22, 12, tzinfo=pytz.utc) exp = Series([Timedelta('1 days'), pd.NaT]) diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index ab28b8b43f359..9d80190ae2813 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -4,6 +4,7 @@ import datetime import itertools import pytest +import pytz from numpy.random import randn import numpy as np @@ -68,8 +69,6 @@ def test_append(self): tm.assert_series_equal(result, self.frame['A']) def test_append_index(self): - tm._skip_if_no_pytz() - idx1 = Index([1.1, 1.2, 1.3]) idx2 = pd.date_range('2011-01-01', freq='D', periods=3, tz='Asia/Tokyo') @@ -80,8 +79,7 @@ def test_append_index(self): result = idx1.append(midx_lv2) - # GH 7112 - import pytz + # see gh-7112 tz = pytz.timezone('Asia/Tokyo') expected_tuples = [(1.1, tz.localize(datetime.datetime(2011, 1, 1))), (1.2, tz.localize(datetime.datetime(2011, 1, 2))), diff --git a/pandas/tests/test_resample.py b/pandas/tests/test_resample.py index 9734431c8b012..37e2fd0e9b188 100644 --- a/pandas/tests/test_resample.py +++ b/pandas/tests/test_resample.py @@ -4,7 +4,9 @@ from datetime import datetime, timedelta from functools import partial +import pytz import pytest +import dateutil import numpy as np import pandas as pd @@ -2424,10 +2426,7 @@ def test_resample_incompat_freq(self): start='2000', periods=3, freq='M')).resample('W').mean() def test_with_local_timezone_pytz(self): - # GH5430 - tm._skip_if_no_pytz() - import pytz - + # see gh-5430 local_timezone = pytz.timezone('America/Los_Angeles') start = datetime(year=2013, month=11, day=1, hour=0, minute=0, @@ -2450,10 +2449,7 @@ def test_with_local_timezone_pytz(self): assert_series_equal(result, expected) def test_with_local_timezone_dateutil(self): - # GH5430 - tm._skip_if_no_dateutil() - import dateutil - + # see gh-5430 local_timezone = 'dateutil/America/Los_Angeles' start = datetime(year=2013, month=11, day=1, hour=0, minute=0, diff --git a/pandas/tests/tseries/test_offsets.py b/pandas/tests/tseries/test_offsets.py index 09de064c15183..47b15a2b66fc4 100644 --- a/pandas/tests/tseries/test_offsets.py +++ b/pandas/tests/tseries/test_offsets.py @@ -148,8 +148,6 @@ def test_apply_out_of_range(self): assert isinstance(result, datetime) assert result.tzinfo is None - tm._skip_if_no_pytz() - tm._skip_if_no_dateutil() # Check tz is preserved for tz in self.timezones: t = Timestamp('20080101', tz=tz) @@ -157,7 +155,7 @@ def test_apply_out_of_range(self): assert isinstance(result, datetime) assert t.tzinfo == result.tzinfo - except (tslib.OutOfBoundsDatetime): + except tslib.OutOfBoundsDatetime: raise except (ValueError, KeyError) as e: pytest.skip( @@ -285,9 +283,6 @@ def _check_offsetfunc_works(self, offset, funcname, dt, expected, # test tz when input is datetime or Timestamp return - tm._skip_if_no_pytz() - tm._skip_if_no_dateutil() - for tz in self.timezones: expected_localize = expected.tz_localize(tz) tz_obj = tslib.maybe_get_tz(tz) @@ -468,7 +463,6 @@ def test_add(self): assert isinstance(result, Timestamp) assert result == expected - tm._skip_if_no_pytz() for tz in self.timezones: expected_localize = expected.tz_localize(tz) result = Timestamp(dt, tz=tz) + offset_s diff --git a/pandas/tests/tseries/test_timezones.py b/pandas/tests/tseries/test_timezones.py index 97c54922d36e9..de6978d52968b 100644 --- a/pandas/tests/tseries/test_timezones.py +++ b/pandas/tests/tseries/test_timezones.py @@ -1,10 +1,15 @@ # pylint: disable-msg=E1101,W0612 import pytest + import pytz +import dateutil import numpy as np + +from dateutil.parser import parse +from pytz import NonExistentTimeError from distutils.version import LooseVersion +from dateutil.tz import tzlocal, tzoffset from datetime import datetime, timedelta, tzinfo, date -from pytz import NonExistentTimeError import pandas.util.testing as tm import pandas.core.tools.datetimes as tools @@ -18,16 +23,6 @@ from pandas.util.testing import (assert_frame_equal, assert_series_equal, set_timezone) -try: - import pytz # noqa -except ImportError: - pass - -try: - import dateutil -except ImportError: - pass - class FixedOffset(tzinfo): """Fixed offset in minutes east from UTC.""" @@ -52,9 +47,6 @@ def dst(self, dt): class TestTimeZoneSupportPytz(object): - def setup_method(self, method): - tm._skip_if_no_pytz() - def tz(self, tz): # Construct a timezone object from a string. Overridden in subclass to # parameterize tests. @@ -207,8 +199,6 @@ def test_timestamp_constructor_near_dst_boundary(self): assert result == expected def test_timestamp_to_datetime_tzoffset(self): - # tzoffset - from dateutil.tz import tzoffset tzinfo = tzoffset(None, 7200) expected = Timestamp('3/11/2012 04:00', tz=tzinfo) result = Timestamp(expected.to_pydatetime()) @@ -294,7 +284,7 @@ def test_create_with_tz(self): assert utc_stamp.tzinfo is pytz.utc assert utc_stamp.hour == 5 - stamp = Timestamp('3/11/2012 05:00').tz_localize('utc') + utc_stamp = Timestamp('3/11/2012 05:00').tz_localize('utc') assert utc_stamp.hour == 5 def test_create_with_fixed_tz(self): @@ -670,7 +660,6 @@ def test_tz_string(self): tm.assert_index_equal(result, expected) def test_take_dont_lose_meta(self): - tm._skip_if_no_pytz() rng = date_range('1/1/2000', periods=20, tz=self.tzstr('US/Eastern')) result = rng.take(lrange(5)) @@ -765,15 +754,12 @@ def test_convert_tz_aware_datetime_datetime(self): assert converted.tz is pytz.utc def test_to_datetime_utc(self): - from dateutil.parser import parse arr = np.array([parse('2012-06-13T01:39:00Z')], dtype=object) result = to_datetime(arr, utc=True) assert result.tz is pytz.utc def test_to_datetime_tzlocal(self): - from dateutil.parser import parse - from dateutil.tz import tzlocal dt = parse('2012-06-13T01:39:00Z') dt = dt.replace(tzinfo=tzlocal()) @@ -889,7 +875,6 @@ def test_frame_reset_index(self): assert xp == rs def test_dateutil_tzoffset_support(self): - from dateutil.tz import tzoffset values = [188.5, 328.25] tzinfo = tzoffset(None, 7200) index = [datetime(2012, 5, 11, 11, tzinfo=tzinfo), @@ -944,9 +929,6 @@ def test_datetimeindex_tz_nat(self): class TestTimeZoneSupportDateutil(TestTimeZoneSupportPytz): - def setup_method(self, method): - tm._skip_if_no_dateutil() - def tz(self, tz): """ Construct a dateutil timezone. @@ -1197,9 +1179,6 @@ def test_cache_keys_are_distinct_for_pytz_vs_dateutil(self): class TestTimeZones(object): timezones = ['UTC', 'Asia/Tokyo', 'US/Eastern', 'dateutil/US/Pacific'] - def setup_method(self, method): - tm._skip_if_no_pytz() - def test_replace(self): # GH 14621 # GH 7825 @@ -1244,8 +1223,6 @@ def f(): def test_ambiguous_compat(self): # validate that pytz and dateutil are compat for dst # when the transition happens - tm._skip_if_no_dateutil() - tm._skip_if_no_pytz() pytz_zone = 'Europe/London' dateutil_zone = 'dateutil/Europe/London' @@ -1637,7 +1614,6 @@ def test_normalize_tz(self): assert result.is_normalized assert not rng.is_normalized - from dateutil.tz import tzlocal rng = date_range('1/1/2000 9:30', periods=10, freq='D', tz=tzlocal()) result = rng.normalize() expected = date_range('1/1/2000', periods=10, freq='D', tz=tzlocal()) @@ -1647,9 +1623,7 @@ def test_normalize_tz(self): assert not rng.is_normalized def test_normalize_tz_local(self): - # GH 13459 - from dateutil.tz import tzlocal - + # see gh-13459 timezones = ['US/Pacific', 'US/Eastern', 'UTC', 'Asia/Kolkata', 'Asia/Shanghai', 'Australia/Canberra'] diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 4b610c505c574..5f01f42eb0c69 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -270,27 +270,18 @@ def close(fignum=None): def _skip_if_32bit(): - import pytest if is_platform_32bit(): + import pytest pytest.skip("skipping for 32 bit") -def _skip_module_if_no_mpl(): +def _skip_if_no_mpl(): import pytest mpl = pytest.importorskip("matplotlib") mpl.use("Agg", warn=False) -def _skip_if_no_mpl(): - try: - import matplotlib as mpl - mpl.use("Agg", warn=False) - except ImportError: - import pytest - pytest.skip("matplotlib not installed") - - def _skip_if_mpl_1_5(): import matplotlib as mpl @@ -303,21 +294,11 @@ def _skip_if_mpl_1_5(): def _skip_if_no_scipy(): - try: - import scipy.stats # noqa - except ImportError: - import pytest - pytest.skip("no scipy.stats module") - try: - import scipy.interpolate # noqa - except ImportError: - import pytest - pytest.skip('scipy.interpolate missing') - try: - import scipy.sparse # noqa - except ImportError: - import pytest - pytest.skip('scipy.sparse missing') + import pytest + + pytest.importorskip("scipy.stats") + pytest.importorskip("scipy.sparse") + pytest.importorskip("scipy.interpolate") def _check_if_lzma(): @@ -333,34 +314,16 @@ def _skip_if_no_lzma(): def _skip_if_no_xarray(): - try: - import xarray - except ImportError: - import pytest - pytest.skip("xarray not installed") + import pytest + xarray = pytest.importorskip("xarray") v = xarray.__version__ + if v < LooseVersion('0.7.0'): import pytest pytest.skip("xarray not version is too low: {0}".format(v)) -def _skip_if_no_pytz(): - try: - import pytz # noqa - except ImportError: - import pytest - pytest.skip("pytz not installed") - - -def _skip_if_no_dateutil(): - try: - import dateutil # noqa - except ImportError: - import pytest - pytest.skip("dateutil not installed") - - def _skip_if_windows_python_3(): if PY3 and is_platform_windows(): import pytest @@ -441,16 +404,13 @@ def _skip_if_no_mock(): try: from unittest import mock # noqa except ImportError: - import nose - raise nose.SkipTest("mock is not installed") + import pytest + raise pytest.skip("mock is not installed") def _skip_if_no_ipython(): - try: - import IPython # noqa - except ImportError: - import nose - raise nose.SkipTest("IPython not installed") + import pytest + pytest.importorskip("IPython") # ----------------------------------------------------------------------------- # locale utilities From 9e57d915744623bef31170629bd1996eda3352cf Mon Sep 17 00:00:00 2001 From: SimonBaron Date: Mon, 8 May 2017 11:56:57 +0100 Subject: [PATCH 533/933] DOC: fixed broken link GH16279 (#16281) --- doc/source/style.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/style.ipynb b/doc/source/style.ipynb index 427b18b988aef..4eeda491426b1 100644 --- a/doc/source/style.ipynb +++ b/doc/source/style.ipynb @@ -12,7 +12,7 @@ "\n", "*Provisional: This is a new feature and still under development. We'll be adding features and possibly making breaking changes in future releases. We'd love to hear your feedback.*\n", "\n", - "This document is written as a Jupyter Notebook, and can be viewed or downloaded [here](http://nbviewer.ipython.org/github/pandas-dev/pandas/blob/master/doc/source/html-styling.ipynb).\n", + "This document is written as a Jupyter Notebook, and can be viewed or downloaded [here](http://nbviewer.ipython.org/github/pandas-dev/pandas/blob/master/doc/source/style.ipynb).\n", "\n", "You can apply **conditional formatting**, the visual styling of a DataFrame\n", "depending on the data within, by using the ``DataFrame.style`` property.\n", From 80abd97eb812a7bd87fa62909fdc041024567ffe Mon Sep 17 00:00:00 2001 From: Julian Kuhlmann Date: Mon, 8 May 2017 12:39:16 -0700 Subject: [PATCH 534/933] TST: Correct test name for MLK holiday. (#16287) --- pandas/tests/tseries/test_holiday.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/tseries/test_holiday.py b/pandas/tests/tseries/test_holiday.py index 59a2a225ab5f8..3ea7e5b8620f2 100644 --- a/pandas/tests/tseries/test_holiday.py +++ b/pandas/tests/tseries/test_holiday.py @@ -344,7 +344,7 @@ def test_after_nearest_workday(self): class TestFederalHolidayCalendar(object): - def test_no_mlk_before_1984(self): + def test_no_mlk_before_1986(self): # see gh-10278 class MLKCalendar(AbstractHolidayCalendar): rules = [USMartinLutherKingJr] From 4bed864a24901d9c2baab5e17c57c956a188602f Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Mon, 8 May 2017 21:10:41 -0400 Subject: [PATCH 535/933] BLD: depending on non-existant file in sparse (#16293) --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index d101358fb63dd..9a04bb6994869 100755 --- a/setup.py +++ b/setup.py @@ -524,7 +524,7 @@ def pxd(name): 'sources': ['pandas/_libs/src/parser/tokenizer.c', 'pandas/_libs/src/parser/io.c']}, '_libs.sparse': {'pyxfile': '_libs/sparse', - 'depends': (['pandas/core/sparse/sparse.pyx'] + + 'depends': (['pandas/_libs/sparse.pyx'] + _pxi_dep['sparse'])}, '_libs.testing': {'pyxfile': '_libs/testing', 'depends': ['pandas/_libs/testing.pyx']}, From 1e59b4cca76e32b0bbe9cfdc4b574795467523ac Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Mon, 8 May 2017 22:09:31 -0400 Subject: [PATCH 536/933] COMPAT: don't force clipboard routines to be imported in main pandas started (#16294) closes #16288 --- doc/source/whatsnew/v0.20.2.txt | 2 +- pandas/core/generic.py | 4 ++-- pandas/io/api.py | 2 +- pandas/io/{clipboard/clipboard.py => clipboards.py} | 0 4 files changed, 4 insertions(+), 4 deletions(-) rename pandas/io/{clipboard/clipboard.py => clipboards.py} (100%) diff --git a/doc/source/whatsnew/v0.20.2.txt b/doc/source/whatsnew/v0.20.2.txt index e0a8065d9a507..d89422631ed04 100644 --- a/doc/source/whatsnew/v0.20.2.txt +++ b/doc/source/whatsnew/v0.20.2.txt @@ -48,7 +48,7 @@ Indexing I/O ^^^ - +- Bug that would force importing of the clipboard routines unecessarily, potentially causing an import error on startup (:issue:`16288`) Plotting diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 27a489293db8f..b72f83ce723cc 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1382,8 +1382,8 @@ def to_clipboard(self, excel=None, sep=None, **kwargs): - Windows: none - OS X: none """ - from pandas.io.clipboard import clipboard - clipboard.to_clipboard(self, excel=excel, sep=sep, **kwargs) + from pandas.io import clipboards + clipboards.to_clipboard(self, excel=excel, sep=sep, **kwargs) def to_xarray(self): """ diff --git a/pandas/io/api.py b/pandas/io/api.py index 7f0d3c3631f63..a4a25b78942db 100644 --- a/pandas/io/api.py +++ b/pandas/io/api.py @@ -5,7 +5,7 @@ # flake8: noqa from pandas.io.parsers import read_csv, read_table, read_fwf -from pandas.io.clipboard.clipboard import read_clipboard +from pandas.io.clipboards import read_clipboard from pandas.io.excel import ExcelFile, ExcelWriter, read_excel from pandas.io.pytables import HDFStore, get_store, read_hdf from pandas.io.json import read_json diff --git a/pandas/io/clipboard/clipboard.py b/pandas/io/clipboards.py similarity index 100% rename from pandas/io/clipboard/clipboard.py rename to pandas/io/clipboards.py From 0091810baf28c7872f8204755fb55363642cfcda Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 9 May 2017 06:09:02 -0400 Subject: [PATCH 537/933] BLD: run only multi on 2.7-build_test build (#16296) --- ci/script_multi.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/script_multi.sh b/ci/script_multi.sh index daa929e177666..d79fc43fbe175 100755 --- a/ci/script_multi.sh +++ b/ci/script_multi.sh @@ -27,7 +27,7 @@ if [ "$BUILD_TEST" ]; then echo "[running]" cd /tmp unset PYTHONPATH - python -c "import pandas; pandas.test(['-n 2', '--skip-slow', '--skip-network', '-r xX'])" + python -c 'import pandas; pandas.test(["-n 2", "--skip-slow", "--skip-network", "-r xX", "-m not single"])' elif [ "$DOC" ]; then echo "We are not running pytest as this is a doc-build" From ce4eef3750052cec62ca0fe6536521dec523cd64 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 9 May 2017 06:10:24 -0400 Subject: [PATCH 538/933] PERF: fix clean_index_list perf (#16295) closes #16285 --- asv_bench/benchmarks/indexing.py | 3 +++ doc/source/whatsnew/v0.20.2.txt | 1 + pandas/_libs/lib.pyx | 22 +++++++++++++--------- pandas/core/indexes/base.py | 2 +- 4 files changed, 18 insertions(+), 10 deletions(-) diff --git a/asv_bench/benchmarks/indexing.py b/asv_bench/benchmarks/indexing.py index 8947a0fdd796c..31af56b3715a5 100644 --- a/asv_bench/benchmarks/indexing.py +++ b/asv_bench/benchmarks/indexing.py @@ -19,6 +19,9 @@ def time_getitem_list_like(self): def time_getitem_array(self): self.s[np.arange(10000)] + def time_getitem_lists(self): + self.s[np.arange(10000).tolist()] + def time_iloc_array(self): self.s.iloc[np.arange(10000)] diff --git a/doc/source/whatsnew/v0.20.2.txt b/doc/source/whatsnew/v0.20.2.txt index d89422631ed04..2a7b37c95230c 100644 --- a/doc/source/whatsnew/v0.20.2.txt +++ b/doc/source/whatsnew/v0.20.2.txt @@ -26,6 +26,7 @@ Enhancements Performance Improvements ~~~~~~~~~~~~~~~~~~~~~~~~ +- Performance regression fix when indexing with a list-like (:issue:`16285`) .. _whatsnew_0202.bug_fixes: diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 31402c38c770d..f6e574b66a828 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -950,7 +950,6 @@ def clean_index_list(list obj): Utility used in pandas.core.index._ensure_index """ cdef: - ndarray[object] converted Py_ssize_t i, n = len(obj) object v bint all_arrays = 1 @@ -964,15 +963,20 @@ def clean_index_list(list obj): if all_arrays: return obj, all_arrays - converted = np.empty(n, dtype=object) - for i in range(n): - v = obj[i] - if PyList_Check(v) or np.PyArray_Check(v) or hasattr(v, '_data'): - converted[i] = tuple(v) - else: - converted[i] = v + # don't force numpy coerce with nan's + inferred = infer_dtype(obj) + if inferred in ['string', 'bytes', 'unicode', + 'mixed', 'mixed-integer']: + return np.asarray(obj, dtype=object), 0 + elif inferred in ['integer']: + + # TODO: we infer an integer but it *could* be a unint64 + try: + return np.asarray(obj, dtype='int64'), 0 + except OverflowError: + return np.asarray(obj, dtype='object'), 0 - return maybe_convert_objects(converted), 0 + return np.asarray(obj), 0 ctypedef fused pandas_string: diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 82f3bf3b15462..9b29f1b04ff73 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -3960,7 +3960,7 @@ def _ensure_index(index_like, copy=False): if isinstance(index_like, list): if type(index_like) != list: index_like = list(index_like) - # 2200 ? + converted, all_arrays = lib.clean_index_list(index_like) if len(converted) > 0 and all_arrays: From 81aa70c2a598f9c12d8cb56a4d758b7f213770fe Mon Sep 17 00:00:00 2001 From: gfyoung Date: Tue, 9 May 2017 15:55:19 -0400 Subject: [PATCH 539/933] BUG: Don't segfault to_numeric when input is empty (#16305) Closes gh-16302. --- doc/source/whatsnew/v0.20.2.txt | 4 ++-- pandas/_libs/src/inference.pyx | 5 +++++ pandas/tests/tools/test_numeric.py | 15 +++++++++++++++ 3 files changed, 22 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.20.2.txt b/doc/source/whatsnew/v0.20.2.txt index 2a7b37c95230c..bca92137891a0 100644 --- a/doc/source/whatsnew/v0.20.2.txt +++ b/doc/source/whatsnew/v0.20.2.txt @@ -37,7 +37,7 @@ Bug Fixes Conversion ^^^^^^^^^^ - +- Bug in ``pd.to_numeric()`` in which empty data inputs were causing Python to crash (:issue:`16302`) Indexing @@ -49,7 +49,7 @@ Indexing I/O ^^^ -- Bug that would force importing of the clipboard routines unecessarily, potentially causing an import error on startup (:issue:`16288`) +- Bug that would force importing of the clipboard routines unnecessarily, potentially causing an import error on startup (:issue:`16288`) Plotting diff --git a/pandas/_libs/src/inference.pyx b/pandas/_libs/src/inference.pyx index d87a0641291b1..ddd38979e326c 100644 --- a/pandas/_libs/src/inference.pyx +++ b/pandas/_libs/src/inference.pyx @@ -947,8 +947,13 @@ def maybe_convert_numeric(ndarray[object] values, set na_values, ------- numeric_array : array of converted object values to numerical ones """ + + if len(values) == 0: + return np.array([], dtype='i8') + # fastpath for ints - try to convert all based on first value cdef object val = values[0] + if util.is_integer_object(val): try: maybe_ints = values.astype('i8') diff --git a/pandas/tests/tools/test_numeric.py b/pandas/tests/tools/test_numeric.py index f82ad97d7b70f..664a97640387e 100644 --- a/pandas/tests/tools/test_numeric.py +++ b/pandas/tests/tools/test_numeric.py @@ -11,6 +11,21 @@ class TestToNumeric(object): + def test_empty(self): + # see gh-16302 + s = pd.Series([], dtype=object) + + res = to_numeric(s) + expected = pd.Series([], dtype=np.int64) + + tm.assert_series_equal(res, expected) + + # Original issue example + res = to_numeric(s, errors='coerce', downcast='integer') + expected = pd.Series([], dtype=np.int8) + + tm.assert_series_equal(res, expected) + def test_series(self): s = pd.Series(['1', '-3.14', '7']) res = to_numeric(s) From 09d1c97f5030dbb7779d30ce0c9622321fd43304 Mon Sep 17 00:00:00 2001 From: Joel Nothman Date: Wed, 10 May 2017 21:05:44 +1000 Subject: [PATCH 540/933] Add tests for Excel styling that were missing (#16313) --- pandas/tests/io/formats/test_to_excel.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/pandas/tests/io/formats/test_to_excel.py b/pandas/tests/io/formats/test_to_excel.py index cdff3b8a5cca8..26a9bb018f30a 100644 --- a/pandas/tests/io/formats/test_to_excel.py +++ b/pandas/tests/io/formats/test_to_excel.py @@ -32,6 +32,14 @@ ('font-family: roman, fantasy', {'font': {'name': 'roman', 'family': 5}}), # - size ('font-size: 1em', {'font': {'size': 12}}), + ('font-size: xx-small', {'font': {'size': 6}}), + ('font-size: x-small', {'font': {'size': 7.5}}), + ('font-size: small', {'font': {'size': 9.6}}), + ('font-size: medium', {'font': {'size': 12}}), + ('font-size: large', {'font': {'size': 13.5}}), + ('font-size: x-large', {'font': {'size': 18}}), + ('font-size: xx-large', {'font': {'size': 24}}), + ('font-size: 50%', {'font': {'size': 6}}), # - bold ('font-weight: 100', {'font': {'bold': False}}), ('font-weight: 200', {'font': {'bold': False}}), @@ -45,6 +53,8 @@ ('font-weight: 800', {'font': {'bold': True}}), ('font-weight: 900', {'font': {'bold': True}}), # - italic + ('font-style: italic', {'font': {'italic': True}}), + ('font-style: oblique', {'font': {'italic': True}}), # - underline ('text-decoration: underline', {'font': {'underline': 'single'}}), From 0607e03f71b52e131947d73880380796f51d72fb Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 10 May 2017 08:36:21 -0400 Subject: [PATCH 541/933] TST: not printing skips (#16318) --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index f0ece15de65db..b7c18d2850a15 100644 --- a/.travis.yml +++ b/.travis.yml @@ -123,7 +123,7 @@ after_success: after_script: - echo "after_script start" - - source activate pandas && cd /tmp && python -c "import pandas; pandas.show_versions();" + - source activate pandas && pushd /tmp && python -c "import pandas; pandas.show_versions();" && popd - if [ -e /tmp/single.xml ]; then ci/print_skipped.py /tmp/single.xml; fi From b1ff2914120867df9f459756f1209603c6bedf4f Mon Sep 17 00:00:00 2001 From: DSM Date: Thu, 11 May 2017 07:25:14 -0400 Subject: [PATCH 542/933] BUG: Preserve data order when stacking unsorted levels (#16323) (#16325) --- doc/source/whatsnew/v0.20.2.txt | 2 +- pandas/core/reshape/reshape.py | 2 +- pandas/tests/test_multilevel.py | 31 +++++++++++++++++++++++++++++++ 3 files changed, 33 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.20.2.txt b/doc/source/whatsnew/v0.20.2.txt index bca92137891a0..983f3edfa2f46 100644 --- a/doc/source/whatsnew/v0.20.2.txt +++ b/doc/source/whatsnew/v0.20.2.txt @@ -73,7 +73,7 @@ Sparse Reshaping ^^^^^^^^^ - +- Bug in ``DataFrame.stack`` with unsorted levels in MultiIndex columns (:issue:`16323`) Numeric diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index 779002b300cc7..b0ed6d4c4b84d 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -689,7 +689,7 @@ def _convert_level_number(level_num, columns): new_labels = [np.arange(N).repeat(levsize)] new_names = [this.index.name] # something better? - new_levels.append(frame.columns.levels[level_num]) + new_levels.append(level_vals) new_labels.append(np.tile(level_labels, N)) new_names.append(frame.columns.names[level_num]) diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index 9d80190ae2813..c8c210c42eac2 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -1193,6 +1193,37 @@ def test_unstack_unobserved_keys(self): recons = result.stack() tm.assert_frame_equal(recons, df) + def test_stack_order_with_unsorted_levels(self): + # GH 16323 + + def manual_compare_stacked(df, df_stacked, lev0, lev1): + assert all(df.loc[row, col] == + df_stacked.loc[(row, col[lev0]), col[lev1]] + for row in df.index for col in df.columns) + + # deep check for 1-row case + for width in [2, 3]: + levels_poss = itertools.product( + itertools.permutations([0, 1, 2], width), + repeat=2) + + for levels in levels_poss: + columns = MultiIndex(levels=levels, + labels=[[0, 0, 1, 1], + [0, 1, 0, 1]]) + df = DataFrame(columns=columns, data=[range(4)]) + for stack_lev in range(2): + df_stacked = df.stack(stack_lev) + manual_compare_stacked(df, df_stacked, + stack_lev, 1 - stack_lev) + + # check multi-row case + mi = MultiIndex(levels=[["A", "C", "B"], ["B", "A", "C"]], + labels=[np.repeat(range(3), 3), np.tile(range(3), 3)]) + df = DataFrame(columns=mi, index=range(5), + data=np.arange(5 * len(mi)).reshape(5, -1)) + manual_compare_stacked(df, df.stack(0), 0, 1) + def test_groupby_corner(self): midx = MultiIndex(levels=[['foo'], ['bar'], ['baz']], labels=[[0], [0], [0]], From fdc21858042cc738eeb07da97ed01157012b2421 Mon Sep 17 00:00:00 2001 From: Matti Picus Date: Thu, 11 May 2017 14:39:06 +0300 Subject: [PATCH 543/933] =?UTF-8?q?COMPAT/TEST=20test,=20fix=20for=20unsaf?= =?UTF-8?q?e=20Vector.resize(),=20which=20allows=20refche=E2=80=A6=20(#162?= =?UTF-8?q?58)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * COMPAT/TEST test, fix for unsafe Vector.resize(), which allows refcheck=False * COMPAT/TEST improve error msg, document test as per review * COMPAT/TEST unify interfaces as per review --- pandas/_libs/hashtable.pxd | 1 + pandas/_libs/hashtable.pyx | 13 ++++++ pandas/_libs/hashtable_class_helper.pxi.in | 46 ++++++++++++++++++---- pandas/tests/test_algos.py | 44 +++++++++++++-------- 4 files changed, 80 insertions(+), 24 deletions(-) diff --git a/pandas/_libs/hashtable.pxd b/pandas/_libs/hashtable.pxd index 9b352ae1c003b..3366751af144d 100644 --- a/pandas/_libs/hashtable.pxd +++ b/pandas/_libs/hashtable.pxd @@ -52,6 +52,7 @@ cdef struct Int64VectorData: cdef class Int64Vector: cdef Int64VectorData *data cdef ndarray ao + cdef bint external_view_exists cdef resize(self) cpdef to_array(self) diff --git a/pandas/_libs/hashtable.pyx b/pandas/_libs/hashtable.pyx index c8aedcef77502..101e2c031f26e 100644 --- a/pandas/_libs/hashtable.pyx +++ b/pandas/_libs/hashtable.pyx @@ -64,6 +64,10 @@ cdef class Factorizer: >>> factorize(np.array([1,2,np.nan], dtype='O'), na_sentinel=20) array([ 0, 1, 20]) """ + if self.uniques.external_view_exists: + uniques = ObjectVector() + uniques.extend(self.uniques.to_array()) + self.uniques = uniques labels = self.table.get_labels(values, self.uniques, self.count, na_sentinel, check_null) mask = (labels == na_sentinel) @@ -99,6 +103,15 @@ cdef class Int64Factorizer: def factorize(self, int64_t[:] values, sort=False, na_sentinel=-1, check_null=True): + """ + Factorize values with nans replaced by na_sentinel + >>> factorize(np.array([1,2,np.nan], dtype='O'), na_sentinel=20) + array([ 0, 1, 20]) + """ + if self.uniques.external_view_exists: + uniques = Int64Vector() + uniques.extend(self.uniques.to_array()) + self.uniques = uniques labels = self.table.get_labels(values, self.uniques, self.count, na_sentinel, check_null) diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in index 3ce82dace40a9..b80a592669eca 100644 --- a/pandas/_libs/hashtable_class_helper.pxi.in +++ b/pandas/_libs/hashtable_class_helper.pxi.in @@ -71,6 +71,7 @@ cdef class {{name}}Vector: {{if dtype != 'int64'}} cdef: + bint external_view_exists {{name}}VectorData *data ndarray ao {{endif}} @@ -80,6 +81,7 @@ cdef class {{name}}Vector: sizeof({{name}}VectorData)) if not self.data: raise MemoryError() + self.external_view_exists = False self.data.n = 0 self.data.m = _INIT_VEC_CAP self.ao = np.empty(self.data.m, dtype={{idtype}}) @@ -87,7 +89,7 @@ cdef class {{name}}Vector: cdef resize(self): self.data.m = max(self.data.m * 4, _INIT_VEC_CAP) - self.ao.resize(self.data.m) + self.ao.resize(self.data.m, refcheck=False) self.data.data = <{{arg}}*> self.ao.data def __dealloc__(self): @@ -99,13 +101,20 @@ cdef class {{name}}Vector: return self.data.n cpdef to_array(self): - self.ao.resize(self.data.n) - self.data.m = self.data.n + if self.data.m != self.data.n: + if self.external_view_exists: + # should never happen + raise ValueError("should have raised on append()") + self.ao.resize(self.data.n, refcheck=False) + self.data.m = self.data.n + self.external_view_exists = True return self.ao cdef inline void append(self, {{arg}} x): if needs_resize(self.data): + if self.external_view_exists: + raise ValueError("external reference but Vector.resize() needed") self.resize() append_data_{{dtype}}(self.data, x) @@ -120,15 +129,19 @@ cdef class StringVector: cdef: StringVectorData *data + bint external_view_exists def __cinit__(self): self.data = PyMem_Malloc( sizeof(StringVectorData)) if not self.data: raise MemoryError() + self.external_view_exists = False self.data.n = 0 self.data.m = _INIT_VEC_CAP self.data.data = malloc(self.data.m * sizeof(char *)) + if not self.data.data: + raise MemoryError() cdef resize(self): cdef: @@ -138,9 +151,10 @@ cdef class StringVector: m = self.data.m self.data.m = max(self.data.m * 4, _INIT_VEC_CAP) - # TODO: can resize? orig_data = self.data.data self.data.data = malloc(self.data.m * sizeof(char *)) + if not self.data.data: + raise MemoryError() for i in range(m): self.data.data[i] = orig_data[i] @@ -164,6 +178,7 @@ cdef class StringVector: for i in range(self.data.n): val = self.data.data[i] ao[i] = val + self.external_view_exists = True self.data.m = self.data.n return ao @@ -174,6 +189,9 @@ cdef class StringVector: append_data_string(self.data, x) + cdef extend(self, ndarray[:] x): + for i in range(len(x)): + self.append(x[i]) cdef class ObjectVector: @@ -181,8 +199,10 @@ cdef class ObjectVector: PyObject **data size_t n, m ndarray ao + bint external_view_exists def __cinit__(self): + self.external_view_exists = False self.n = 0 self.m = _INIT_VEC_CAP self.ao = np.empty(_INIT_VEC_CAP, dtype=object) @@ -193,8 +213,10 @@ cdef class ObjectVector: cdef inline append(self, object o): if self.n == self.m: + if self.external_view_exists: + raise ValueError("external reference but Vector.resize() needed") self.m = max(self.m * 2, _INIT_VEC_CAP) - self.ao.resize(self.m) + self.ao.resize(self.m, refcheck=False) self.data = self.ao.data Py_INCREF(o) @@ -202,10 +224,17 @@ cdef class ObjectVector: self.n += 1 def to_array(self): - self.ao.resize(self.n) - self.m = self.n + if self.m != self.n: + if self.external_view_exists: + raise ValueError("should have raised on append()") + self.ao.resize(self.n, refcheck=False) + self.m = self.n + self.external_view_exists = True return self.ao + cdef extend(self, ndarray[:] x): + for i in range(len(x)): + self.append(x[i]) #---------------------------------------------------------------------- # HashTable @@ -362,6 +391,9 @@ cdef class {{name}}HashTable(HashTable): if needs_resize(ud): with gil: + if uniques.external_view_exists: + raise ValueError("external reference to uniques held, " + "but Vector.resize() needed") uniques.resize() append_data_{{dtype}}(ud, val) labels[i] = count diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index 093730fb2478b..351e646cbb0b2 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -14,7 +14,7 @@ from pandas import compat from pandas._libs import (groupby as libgroupby, algos as libalgos, - hashtable) + hashtable as ht) from pandas._libs.hashtable import unique_label_indices from pandas.compat import lrange, range import pandas.core.algorithms as algos @@ -259,7 +259,7 @@ def test_factorize_nan(self): # rizer.factorize should not raise an exception if na_sentinel indexes # outside of reverse_indexer key = np.array([1, 2, 1, np.nan], dtype='O') - rizer = hashtable.Factorizer(len(key)) + rizer = ht.Factorizer(len(key)) for na_sentinel in (-1, 20): ids = rizer.factorize(key, sort=True, na_sentinel=na_sentinel) expected = np.array([0, 1, 0, na_sentinel], dtype='int32') @@ -1049,14 +1049,14 @@ class TestHashTable(object): def test_lookup_nan(self): xs = np.array([2.718, 3.14, np.nan, -7, 5, 2, 3]) - m = hashtable.Float64HashTable() + m = ht.Float64HashTable() m.map_locations(xs) tm.assert_numpy_array_equal(m.lookup(xs), np.arange(len(xs), dtype=np.int64)) def test_lookup_overflow(self): xs = np.array([1, 2, 2**63], dtype=np.uint64) - m = hashtable.UInt64HashTable() + m = ht.UInt64HashTable() m.map_locations(xs) tm.assert_numpy_array_equal(m.lookup(xs), np.arange(len(xs), dtype=np.int64)) @@ -1070,25 +1070,35 @@ def test_vector_resize(self): # Test for memory errors after internal vector # reallocations (pull request #7157) - def _test_vector_resize(htable, uniques, dtype, nvals): + def _test_vector_resize(htable, uniques, dtype, nvals, safely_resizes): vals = np.array(np.random.randn(1000), dtype=dtype) - # get_labels appends to the vector + # get_labels may append to uniques htable.get_labels(vals[:nvals], uniques, 0, -1) - # to_array resizes the vector - uniques.to_array() - htable.get_labels(vals, uniques, 0, -1) + # to_array() set an external_view_exists flag on uniques. + tmp = uniques.to_array() + oldshape = tmp.shape + # subsequent get_labels() calls can no longer append to it + # (for all but StringHashTables + ObjectVector) + if safely_resizes: + htable.get_labels(vals, uniques, 0, -1) + else: + with pytest.raises(ValueError) as excinfo: + htable.get_labels(vals, uniques, 0, -1) + assert str(excinfo.value).startswith('external reference') + uniques.to_array() # should not raise here + assert tmp.shape == oldshape test_cases = [ - (hashtable.PyObjectHashTable, hashtable.ObjectVector, 'object'), - (hashtable.StringHashTable, hashtable.ObjectVector, 'object'), - (hashtable.Float64HashTable, hashtable.Float64Vector, 'float64'), - (hashtable.Int64HashTable, hashtable.Int64Vector, 'int64'), - (hashtable.UInt64HashTable, hashtable.UInt64Vector, 'uint64')] + (ht.PyObjectHashTable, ht.ObjectVector, 'object', False), + (ht.StringHashTable, ht.ObjectVector, 'object', True), + (ht.Float64HashTable, ht.Float64Vector, 'float64', False), + (ht.Int64HashTable, ht.Int64Vector, 'int64', False), + (ht.UInt64HashTable, ht.UInt64Vector, 'uint64', False)] - for (tbl, vect, dtype) in test_cases: + for (tbl, vect, dtype, safely_resizes) in test_cases: # resizing to empty is a special case - _test_vector_resize(tbl(), vect(), dtype, 0) - _test_vector_resize(tbl(), vect(), dtype, 10) + _test_vector_resize(tbl(), vect(), dtype, 0, safely_resizes) + _test_vector_resize(tbl(), vect(), dtype, 10, safely_resizes) def test_quantile(): From 1bee0357a97c2c3d79adcd5f120773d7627baca0 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 11 May 2017 11:07:28 -0500 Subject: [PATCH 544/933] BUG: Accept list-like color with single col in plot (#16233) Closes #3486 --- doc/source/whatsnew/v0.20.2.txt | 2 ++ pandas/plotting/_core.py | 3 ++- pandas/tests/plotting/test_frame.py | 5 +++++ 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.20.2.txt b/doc/source/whatsnew/v0.20.2.txt index 983f3edfa2f46..cbfebee2ceba2 100644 --- a/doc/source/whatsnew/v0.20.2.txt +++ b/doc/source/whatsnew/v0.20.2.txt @@ -55,6 +55,8 @@ I/O Plotting ^^^^^^^^ +- Bug in ``DataFrame.plot`` with a single column and a list-like ``color`` (:issue:`3486`) + diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index e88979b14c8af..c0f9f62106330 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -180,7 +180,8 @@ def _validate_color_args(self): colors = self.kwds.pop('colors') self.kwds['color'] = colors - if ('color' in self.kwds and self.nseries == 1): + if ('color' in self.kwds and self.nseries == 1 and + not is_list_like(self.kwds['color'])): # support series.plot(color='green') self.kwds['color'] = [self.kwds['color']] diff --git a/pandas/tests/plotting/test_frame.py b/pandas/tests/plotting/test_frame.py index 9abbb348fbfa8..e40ec5a1faea8 100644 --- a/pandas/tests/plotting/test_frame.py +++ b/pandas/tests/plotting/test_frame.py @@ -153,6 +153,11 @@ def test_mpl2_color_cycle_str(self): else: pytest.skip("not supported in matplotlib < 2.0.0") + def test_color_single_series_list(self): + # GH 3486 + df = DataFrame({"A": [1, 2, 3]}) + _check_plot_works(df.plot, color=['red']) + def test_color_empty_string(self): df = DataFrame(randn(10, 2)) with pytest.raises(ValueError): From 379fa8743f43470df5ae3d6246eb916b863e3487 Mon Sep 17 00:00:00 2001 From: Keith Webber Date: Thu, 11 May 2017 15:24:46 -0400 Subject: [PATCH 545/933] DOC: Correctly redirect to SetupTools documentations (#16333) --- doc/source/install.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/install.rst b/doc/source/install.rst index 578caae605471..48d51e1200447 100644 --- a/doc/source/install.rst +++ b/doc/source/install.rst @@ -202,7 +202,7 @@ installed), make sure you have `pytest Dependencies ------------ -* `setuptools `__ +* `setuptools `__ * `NumPy `__: 1.7.1 or higher * `python-dateutil `__: 1.5 or higher * `pytz `__: Needed for time zone support From 541e8e83b47c68afcf0034f7094d97c3645ca48b Mon Sep 17 00:00:00 2001 From: linebp Date: Fri, 12 May 2017 00:55:09 +0200 Subject: [PATCH 546/933] Unblock supported compression libs in pytables (#16196) --- doc/source/whatsnew/v0.20.2.txt | 2 +- pandas/core/generic.py | 17 +++++++++------ pandas/io/pytables.py | 24 +++++++++++++-------- pandas/tests/io/test_pytables.py | 37 ++++++++++++++++++++++++++++++-- 4 files changed, 62 insertions(+), 18 deletions(-) diff --git a/doc/source/whatsnew/v0.20.2.txt b/doc/source/whatsnew/v0.20.2.txt index cbfebee2ceba2..5f0fa65a75ee9 100644 --- a/doc/source/whatsnew/v0.20.2.txt +++ b/doc/source/whatsnew/v0.20.2.txt @@ -19,7 +19,7 @@ Highlights include: Enhancements ~~~~~~~~~~~~ - +- Unblocked access to additional compression types supported in pytables: 'blosc:blosclz, 'blosc:lz4', 'blosc:lz4hc', 'blosc:snappy', 'blosc:zlib', 'blosc:zstd' (:issue:`14478`) .. _whatsnew_0202.performance: diff --git a/pandas/core/generic.py b/pandas/core/generic.py index b72f83ce723cc..777cfcae7a326 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1266,12 +1266,17 @@ def to_hdf(self, path_or_buf, key, **kwargs): `__. Applicable only to format='table'. - complevel : int, 1-9, default 0 - If a complib is specified compression will be applied - where possible - complib : {'zlib', 'bzip2', 'lzo', 'blosc', None}, default None - If complevel is > 0 apply compression to objects written - in the store wherever possible + complevel : int, 0-9, default 0 + Specifies a compression level for data. + A value of 0 disables compression. + complib : {'zlib', 'lzo', 'bzip2', 'blosc', None}, default None + Specifies the compression library to be used. + As of v0.20.2 these additional compressors for Blosc are supported + (default if no compressor specified: 'blosc:blosclz'): + {'blosc:blosclz', 'blosc:lz4', 'blosc:lz4hc', 'blosc:snappy', + 'blosc:zlib', 'blosc:zstd'}. + Specifying a compression library which is not available issues + a ValueError. fletcher32 : bool, default False If applying compression use the fletcher32 checksum dropna : boolean, default False. diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 17bedd016f617..f017421c1f83a 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -402,12 +402,17 @@ class HDFStore(StringMixin): and if the file does not exist it is created. ``'r+'`` It is similar to ``'a'``, but the file must already exist. - complevel : int, 1-9, default 0 - If a complib is specified compression will be applied - where possible - complib : {'zlib', 'bzip2', 'lzo', 'blosc', None}, default None - If complevel is > 0 apply compression to objects written - in the store wherever possible + complevel : int, 0-9, default 0 + Specifies a compression level for data. + A value of 0 disables compression. + complib : {'zlib', 'lzo', 'bzip2', 'blosc', None}, default None + Specifies the compression library to be used. + As of v0.20.2 these additional compressors for Blosc are supported + (default if no compressor specified: 'blosc:blosclz'): + {'blosc:blosclz', 'blosc:lz4', 'blosc:lz4hc', 'blosc:snappy', + 'blosc:zlib', 'blosc:zstd'}. + Specifying a compression library which is not available issues + a ValueError. fletcher32 : bool, default False If applying compression use the fletcher32 checksum @@ -430,9 +435,10 @@ def __init__(self, path, mode=None, complevel=None, complib=None, raise ImportError('HDFStore requires PyTables, "{ex}" problem ' 'importing'.format(ex=str(ex))) - if complib not in (None, 'blosc', 'bzip2', 'lzo', 'zlib'): - raise ValueError("complib only supports 'blosc', 'bzip2', lzo' " - "or 'zlib' compression.") + if complib is not None and complib not in tables.filters.all_complibs: + raise ValueError( + "complib only supports {libs} compression.".format( + libs=tables.filters.all_complibs)) self._path = path if mode is None: diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index 873bb20b3bba9..abfd88a6f13e1 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -734,6 +734,39 @@ def test_put_compression_blosc(self): store.put('c', df, format='table', complib='blosc') tm.assert_frame_equal(store['c'], df) + def test_complibs(self): + # GH14478 + df = tm.makeDataFrame() + + # Building list of all complibs and complevels tuples + all_complibs = tables.filters.all_complibs + # Remove lzo if its not available on this platform + if not tables.which_lib_version('lzo'): + all_complibs.remove('lzo') + all_levels = range(0, 10) + all_tests = [(lib, lvl) for lib in all_complibs for lvl in all_levels] + + for (lib, lvl) in all_tests: + with ensure_clean_path(self.path) as tmpfile: + gname = 'foo' + + # Write and read file to see if data is consistent + df.to_hdf(tmpfile, gname, complib=lib, complevel=lvl) + result = pd.read_hdf(tmpfile, gname) + tm.assert_frame_equal(result, df) + + # Open file and check metadata + # for correct amount of compression + h5table = tables.open_file(tmpfile, mode='r') + for node in h5table.walk_nodes(where='/' + gname, + classname='Leaf'): + assert node.filters.complevel == lvl + if lvl == 0: + assert node.filters.complib is None + else: + assert node.filters.complib == lib + h5table.close() + def test_put_integer(self): # non-date, non-string index df = DataFrame(np.random.randn(50, 100)) @@ -4939,8 +4972,8 @@ def test_invalid_complib(self): index=list('abcd'), columns=list('ABCDE')) with ensure_clean_path(self.path) as path: - pytest.raises(ValueError, df.to_hdf, path, - 'df', complib='blosc:zlib') + with pytest.raises(ValueError): + df.to_hdf(path, 'df', complib='foolib') # GH10443 def test_read_nokey(self): From 1c0b63281db0486aa8182d550e9bceb641e5f9a4 Mon Sep 17 00:00:00 2001 From: keitakurita Date: Fri, 12 May 2017 08:00:04 +0900 Subject: [PATCH 547/933] BUG: incorrect handling of scipy.sparse.dok formats (#16197) (#16191) --- doc/source/whatsnew/v0.20.2.txt | 3 +-- pandas/core/sparse/frame.py | 2 +- pandas/tests/sparse/test_frame.py | 28 +++++++++++++++++++++++++--- 3 files changed, 27 insertions(+), 6 deletions(-) diff --git a/doc/source/whatsnew/v0.20.2.txt b/doc/source/whatsnew/v0.20.2.txt index 5f0fa65a75ee9..eea7ed3470115 100644 --- a/doc/source/whatsnew/v0.20.2.txt +++ b/doc/source/whatsnew/v0.20.2.txt @@ -69,8 +69,7 @@ Groupby/Resample/Rolling Sparse ^^^^^^ - - +- Bug in construction of SparseDataFrame from ``scipy.sparse.dok_matrix`` (:issue:`16179`) Reshaping ^^^^^^^^^ diff --git a/pandas/core/sparse/frame.py b/pandas/core/sparse/frame.py index 3c8f6e8c6257d..461dd50c5da6e 100644 --- a/pandas/core/sparse/frame.py +++ b/pandas/core/sparse/frame.py @@ -190,8 +190,8 @@ def _init_spmatrix(self, data, index, columns, dtype=None, values = Series(data.data, index=data.row, copy=False) for col, rowvals in values.groupby(data.col): # get_blocks expects int32 row indices in sorted order + rowvals = rowvals.sort_index() rows = rowvals.index.values.astype(np.int32) - rows.sort() blocs, blens = get_blocks(rows) sdict[columns[col]] = SparseSeries( diff --git a/pandas/tests/sparse/test_frame.py b/pandas/tests/sparse/test_frame.py index 0312b76ec30a5..654d12b782f37 100644 --- a/pandas/tests/sparse/test_frame.py +++ b/pandas/tests/sparse/test_frame.py @@ -1146,8 +1146,8 @@ def test_isnotnull(self): tm.assert_frame_equal(res.to_dense(), exp) -@pytest.mark.parametrize('index', [None, list('ab')]) # noqa: F811 -@pytest.mark.parametrize('columns', [None, list('cd')]) +@pytest.mark.parametrize('index', [None, list('abc')]) # noqa: F811 +@pytest.mark.parametrize('columns', [None, list('def')]) @pytest.mark.parametrize('fill_value', [None, 0, np.nan]) @pytest.mark.parametrize('dtype', [bool, int, float, np.uint16]) def test_from_to_scipy(spmatrix, index, columns, fill_value, dtype): @@ -1156,7 +1156,9 @@ def test_from_to_scipy(spmatrix, index, columns, fill_value, dtype): # Make one ndarray and from it one sparse matrix, both to be used for # constructing frames and comparing results - arr = np.eye(2, dtype=dtype) + arr = np.eye(3, dtype=dtype) + # GH 16179 + arr[0, 1] = dtype(2) try: spm = spmatrix(arr) assert spm.dtype == arr.dtype @@ -1245,6 +1247,26 @@ def test_from_to_scipy_object(spmatrix, fill_value): assert sdf.to_coo().dtype == res_dtype +def test_from_scipy_correct_ordering(spmatrix): + # GH 16179 + tm.skip_if_no_package('scipy') + + arr = np.arange(1, 5).reshape(2, 2) + try: + spm = spmatrix(arr) + assert spm.dtype == arr.dtype + except (TypeError, AssertionError): + # If conversion to sparse fails for this spmatrix type and arr.dtype, + # then the combination is not currently supported in NumPy, so we + # can just skip testing it thoroughly + return + + sdf = pd.SparseDataFrame(spm) + expected = pd.SparseDataFrame(arr) + tm.assert_sp_frame_equal(sdf, expected) + tm.assert_frame_equal(sdf.to_dense(), expected.to_dense()) + + class TestSparseDataFrameArithmetic(object): def test_numeric_op_scalar(self): From 94ef7b6a2e1e9fd266bab6f22f8573d421d1745f Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Thu, 11 May 2017 19:14:20 -0400 Subject: [PATCH 548/933] PERF: improved performance of small multiindexes (#16324) closes #16319 --- asv_bench/benchmarks/indexing.py | 20 +++++++++++++++---- doc/source/whatsnew/v0.20.2.txt | 2 +- pandas/_libs/index.pyx | 33 +++++++++++++++++++++++++++++++- pandas/core/dtypes/dtypes.py | 6 +++--- pandas/core/indexes/multi.py | 12 ++++++++++-- pandas/core/util/hashing.py | 18 +++++++---------- 6 files changed, 69 insertions(+), 22 deletions(-) diff --git a/asv_bench/benchmarks/indexing.py b/asv_bench/benchmarks/indexing.py index 31af56b3715a5..e1676715853a4 100644 --- a/asv_bench/benchmarks/indexing.py +++ b/asv_bench/benchmarks/indexing.py @@ -193,9 +193,15 @@ def setup(self): np.arange(1000)], names=['one', 'two']) import string - self.mistring = MultiIndex.from_product( - [np.arange(1000), - np.arange(20), list(string.ascii_letters)], + + self.mi_large = MultiIndex.from_product( + [np.arange(1000), np.arange(20), list(string.ascii_letters)], + names=['one', 'two', 'three']) + self.mi_med = MultiIndex.from_product( + [np.arange(1000), np.arange(10), list('A')], + names=['one', 'two', 'three']) + self.mi_small = MultiIndex.from_product( + [np.arange(100), list('A'), list('A')], names=['one', 'two', 'three']) def time_series_xs_mi_ix(self): @@ -218,8 +224,14 @@ def time_multiindex_get_indexer(self): (0, 16), (0, 17), (0, 18), (0, 19)], dtype=object)) + def time_multiindex_large_get_loc(self): + self.mi_large.get_loc((999, 19, 'Z')) + + def time_multiindex_med_get_loc(self): + self.mi_med.get_loc((999, 9, 'A')) + def time_multiindex_string_get_loc(self): - self.mistring.get_loc((999, 19, 'Z')) + self.mi_small.get_loc((99, 'A', 'A')) def time_is_monotonic(self): self.miint.is_monotonic diff --git a/doc/source/whatsnew/v0.20.2.txt b/doc/source/whatsnew/v0.20.2.txt index eea7ed3470115..783985bc0f2bb 100644 --- a/doc/source/whatsnew/v0.20.2.txt +++ b/doc/source/whatsnew/v0.20.2.txt @@ -27,7 +27,7 @@ Performance Improvements ~~~~~~~~~~~~~~~~~~~~~~~~ - Performance regression fix when indexing with a list-like (:issue:`16285`) - +- Performance regression fix for small MultiIndexes (:issuse:`16319`) .. _whatsnew_0202.bug_fixes: diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx index c7a537acf5d6f..21680fb0b3921 100644 --- a/pandas/_libs/index.pyx +++ b/pandas/_libs/index.pyx @@ -553,7 +553,34 @@ cdef inline bint _is_utc(object tz): return tz is UTC or isinstance(tz, _du_utc) -cdef class MultiIndexEngine(IndexEngine): +cdef class MultiIndexObjectEngine(ObjectEngine): + """ + provide the same interface as the MultiIndexEngine + but use the IndexEngine for computation + + This provides good performance with samller MI's + """ + def get_indexer(self, values): + # convert a MI to an ndarray + if hasattr(values, 'values'): + values = values.values + return super(MultiIndexObjectEngine, self).get_indexer(values) + + cpdef get_loc(self, object val): + + # convert a MI to an ndarray + if hasattr(val, 'values'): + val = val.values + return super(MultiIndexObjectEngine, self).get_loc(val) + + +cdef class MultiIndexHashEngine(ObjectEngine): + """ + Use a hashing based MultiIndex impl + but use the IndexEngine for computation + + This provides good performance with larger MI's + """ def _call_monotonic(self, object mi): # defer these back to the mi iteself @@ -584,6 +611,10 @@ cdef class MultiIndexEngine(IndexEngine): except TypeError: raise KeyError(val) + def get_indexer(self, values): + self._ensure_mapping_populated() + return self.mapping.lookup(values) + cdef _make_hash_table(self, n): return _hash.MultiIndexHashTable(n) diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 561f1951a4151..dc2c56ea476f9 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -88,12 +88,12 @@ def is_dtype(cls, dtype): """ if hasattr(dtype, 'dtype'): dtype = dtype.dtype - if isinstance(dtype, cls): - return True - elif isinstance(dtype, np.dtype): + if isinstance(dtype, np.dtype): return False elif dtype is None: return False + elif isinstance(dtype, cls): + return True try: return cls.construct_from_string(dtype) is not None except: diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 7ef037d8f3536..3db5633ec30bd 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -75,7 +75,6 @@ class MultiIndex(Index): _levels = FrozenList() _labels = FrozenList() _comparables = ['names'] - _engine_type = libindex.MultiIndexEngine rename = Index.set_names def __new__(cls, levels=None, labels=None, sortorder=None, names=None, @@ -629,7 +628,16 @@ def _get_level_number(self, level): @cache_readonly def _engine(self): - return self._engine_type(lambda: self, len(self)) + + # choose our engine based on our size + # the hashing based MultiIndex for larger + # sizes, and the MultiIndexOjbect for smaller + # xref: https://github.com/pandas-dev/pandas/pull/16324 + l = len(self) + if l > 10000: + return libindex.MultiIndexHashEngine(lambda: self, l) + + return libindex.MultiIndexObjectEngine(lambda: self.values, l) @property def values(self): diff --git a/pandas/core/util/hashing.py b/pandas/core/util/hashing.py index 6a5343e8a8e25..f0829adc94500 100644 --- a/pandas/core/util/hashing.py +++ b/pandas/core/util/hashing.py @@ -5,16 +5,13 @@ import numpy as np from pandas._libs import hashing -from pandas._libs.lib import is_bool_array from pandas.core.dtypes.generic import ( ABCMultiIndex, ABCIndexClass, ABCSeries, ABCDataFrame) from pandas.core.dtypes.common import ( - is_categorical_dtype, is_numeric_dtype, - is_datetime64_dtype, is_timedelta64_dtype, - is_list_like) + is_categorical_dtype, is_list_like) # 16 byte long hashing key _default_hash_key = '0123456789123456' @@ -136,7 +133,6 @@ def hash_tuples(vals, encoding='utf8', hash_key=None): ------- ndarray of hashed values array """ - is_tuple = False if isinstance(vals, tuple): vals = [vals] @@ -231,6 +227,7 @@ def hash_array(vals, encoding='utf8', hash_key=None, categorize=True): if not hasattr(vals, 'dtype'): raise TypeError("must pass a ndarray-like") + dtype = vals.dtype if hash_key is None: hash_key = _default_hash_key @@ -238,22 +235,21 @@ def hash_array(vals, encoding='utf8', hash_key=None, categorize=True): # For categoricals, we hash the categories, then remap the codes to the # hash values. (This check is above the complex check so that we don't ask # numpy if categorical is a subdtype of complex, as it will choke. - if is_categorical_dtype(vals.dtype): + if is_categorical_dtype(dtype): return _hash_categorical(vals, encoding, hash_key) # we'll be working with everything as 64-bit values, so handle this # 128-bit value early - if np.issubdtype(vals.dtype, np.complex128): + elif np.issubdtype(dtype, np.complex128): return hash_array(vals.real) + 23 * hash_array(vals.imag) # First, turn whatever array this is into unsigned 64-bit ints, if we can # manage it. - if is_bool_array(vals): + elif isinstance(dtype, np.bool): vals = vals.astype('u8') - elif (is_datetime64_dtype(vals) or - is_timedelta64_dtype(vals)): + elif issubclass(dtype.type, (np.datetime64, np.timedelta64)): vals = vals.view('i8').astype('u8', copy=False) - elif (is_numeric_dtype(vals) and vals.dtype.itemsize <= 8): + elif issubclass(dtype.type, np.number) and dtype.itemsize <= 8: vals = vals.view('u{}'.format(vals.dtype.itemsize)).astype('u8') else: # With repeated values, its MUCH faster to categorize object dtypes, From 50e95e01066c9dc60897f2f97cf9006275b3b6b9 Mon Sep 17 00:00:00 2001 From: Pietro Battiston Date: Fri, 12 May 2017 13:45:05 +0200 Subject: [PATCH 549/933] ENH: Make RangeIndex.append() return RangeIndex when possible (#16213) closes #16212 --- doc/source/whatsnew/v0.21.0.txt | 1 + pandas/core/indexes/range.py | 57 ++++++++++++++++++++++++++++++ pandas/tests/indexes/test_range.py | 36 +++++++++++++++++++ 3 files changed, 94 insertions(+) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 36dffc3d3378b..3df0a21facb02 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -26,6 +26,7 @@ New features Other Enhancements ^^^^^^^^^^^^^^^^^^ +- ``RangeIndex.append`` now returns a ``RangeIndex`` object when possible (:issue:`16212`) diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index b7a8e0b54a128..5071b50bbebdf 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -443,6 +443,63 @@ def join(self, other, how='left', level=None, return_indexers=False, return super(RangeIndex, self).join(other, how, level, return_indexers, sort) + def append(self, other): + """ + Append a collection of Index options together + + Parameters + ---------- + other : Index or list/tuple of indices + + Returns + ------- + appended : RangeIndex if all indexes are consecutive RangeIndexes, + otherwise Int64Index or Index + """ + + to_concat = [self] + + if isinstance(other, (list, tuple)): + to_concat = to_concat + list(other) + else: + to_concat.append(other) + + if not all([isinstance(i, RangeIndex) for i in to_concat]): + return super(RangeIndex, self).append(other) + + start = step = next = None + + for obj in to_concat: + if not len(obj): + continue + + if start is None: + # This is set by the first non-empty index + start = obj._start + if step is None and len(obj) > 1: + step = obj._step + elif step is None: + # First non-empty index had only one element + if obj._start == start: + return super(RangeIndex, self).append(other) + step = obj._start - start + + non_consecutive = ((step != obj._step and len(obj) > 1) or + (next is not None and obj._start != next)) + if non_consecutive: + return super(RangeIndex, self).append(other) + + if step is not None: + next = obj[-1] + step + + if start is None: + start = obj._start + step = obj._step + stop = obj._stop if next is None else next + names = set([obj.name for obj in to_concat]) + name = None if len(names) > 1 else self.name + return RangeIndex(start, stop, step, name=name) + def __len__(self): """ return the length of the RangeIndex diff --git a/pandas/tests/indexes/test_range.py b/pandas/tests/indexes/test_range.py index 18539989084e9..d140a2503984e 100644 --- a/pandas/tests/indexes/test_range.py +++ b/pandas/tests/indexes/test_range.py @@ -941,3 +941,39 @@ def test_where_array_like(self): for klass in klasses: result = i.where(klass(cond)) tm.assert_index_equal(result, expected) + + def test_append(self): + # GH16212 + RI = RangeIndex + I64 = Int64Index + F64 = Float64Index + OI = Index + cases = [([RI(1, 12, 5)], RI(1, 12, 5)), + ([RI(0, 6, 4)], RI(0, 6, 4)), + ([RI(1, 3), RI(3, 7)], RI(1, 7)), + ([RI(1, 5, 2), RI(5, 6)], RI(1, 6, 2)), + ([RI(1, 3, 2), RI(4, 7, 3)], RI(1, 7, 3)), + ([RI(-4, 3, 2), RI(4, 7, 2)], RI(-4, 7, 2)), + ([RI(-4, -8), RI(-8, -12)], RI(-8, -12)), + ([RI(-4, -8), RI(3, -4)], RI(3, -8)), + ([RI(-4, -8), RI(3, 5)], RI(3, 5)), + ([RI(-4, -2), RI(3, 5)], I64([-4, -3, 3, 4])), + ([RI(-2,), RI(3, 5)], RI(3, 5)), + ([RI(2,), RI(2)], I64([0, 1, 0, 1])), + ([RI(2,), RI(2, 5), RI(5, 8, 4)], RI(0, 6)), + ([RI(2,), RI(3, 5), RI(5, 8, 4)], I64([0, 1, 3, 4, 5])), + ([RI(-2, 2), RI(2, 5), RI(5, 8, 4)], RI(-2, 6)), + ([RI(3,), I64([-1, 3, 15])], I64([0, 1, 2, -1, 3, 15])), + ([RI(3,), F64([-1, 3.1, 15.])], F64([0, 1, 2, -1, 3.1, 15.])), + ([RI(3,), OI(['a', None, 14])], OI([0, 1, 2, 'a', None, 14])), + ([RI(3, 1), OI(['a', None, 14])], OI(['a', None, 14])) + ] + + for indices, expected in cases: + result = indices[0].append(indices[1:]) + tm.assert_index_equal(result, expected, exact=True) + + if len(indices) == 2: + # Append single item rather than list + result2 = indices[0].append(indices[1]) + tm.assert_index_equal(result2, expected, exact=True) From 4cd84582d5ad0fdac5085b12a1affeb6300ba3a3 Mon Sep 17 00:00:00 2001 From: chris-b1 Date: Fri, 12 May 2017 08:29:59 -0500 Subject: [PATCH 550/933] BUG: pathlib.Path in io (#16292) * BUG: pathlib.Path in io * CLN: factor out pathlib roundtrip * add localpath tests for other io * fixup * xfail SAS; type in parser * missing import * xfail for #14704 * fix to_csv * lint * lint cleanup * add feather (xfail) --- doc/source/whatsnew/v0.20.2.txt | 2 + pandas/io/common.py | 3 ++ pandas/io/formats/format.py | 5 ++- pandas/tests/io/parser/common.py | 13 +++++++ pandas/tests/io/sas/test_sas7bdat.py | 24 ++++++++++++ pandas/tests/io/test_excel.py | 10 +++++ pandas/tests/io/test_feather.py | 13 +++++++ pandas/tests/io/test_packers.py | 12 ++++++ pandas/tests/io/test_pickle.py | 12 ++++++ pandas/tests/io/test_pytables.py | 43 +++++++++++++++++++++ pandas/tests/io/test_stata.py | 12 ++++++ pandas/util/testing.py | 57 ++++++++++++++++++++++++++++ 12 files changed, 204 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.20.2.txt b/doc/source/whatsnew/v0.20.2.txt index 783985bc0f2bb..03579dab0d6a3 100644 --- a/doc/source/whatsnew/v0.20.2.txt +++ b/doc/source/whatsnew/v0.20.2.txt @@ -34,6 +34,8 @@ Performance Improvements Bug Fixes ~~~~~~~~~ +- Bug in using ``pathlib.Path`` or ``py.path.local`` objects with io functions (:issue:`16291`) + Conversion ^^^^^^^^^^ diff --git a/pandas/io/common.py b/pandas/io/common.py index 28f90972f95de..14ac4d366fcef 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -314,6 +314,9 @@ def _get_handle(path_or_buf, mode, encoding=None, compression=None, handles = list() f = path_or_buf + + # Convert pathlib.Path/py.path.local or string + path_or_buf = _stringify_path(path_or_buf) is_path = isinstance(path_or_buf, compat.string_types) if compression: diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 65098bb2aa404..183d8d9d87d0b 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -32,7 +32,8 @@ OrderedDict, unichr) from pandas.io.formats.terminal import get_terminal_size from pandas.core.config import get_option, set_option -from pandas.io.common import _get_handle, UnicodeWriter, _expand_user +from pandas.io.common import (_get_handle, UnicodeWriter, _expand_user, + _stringify_path) from pandas.io.formats.printing import adjoin, justify, pprint_thing from pandas.io.formats.common import get_level_lengths import pandas.core.common as com @@ -1475,7 +1476,7 @@ def __init__(self, obj, path_or_buf=None, sep=",", na_rep='', if path_or_buf is None: path_or_buf = StringIO() - self.path_or_buf = _expand_user(path_or_buf) + self.path_or_buf = _expand_user(_stringify_path(path_or_buf)) self.sep = sep self.na_rep = na_rep self.float_format = float_format diff --git a/pandas/tests/io/parser/common.py b/pandas/tests/io/parser/common.py index bcce0c6d020ae..31d815a4bca97 100644 --- a/pandas/tests/io/parser/common.py +++ b/pandas/tests/io/parser/common.py @@ -679,6 +679,19 @@ def test_file(self): tm.assert_frame_equal(url_table, local_table) + def test_path_pathlib(self): + df = tm.makeDataFrame() + result = tm.round_trip_pathlib(df.to_csv, + lambda p: self.read_csv(p, index_col=0)) + tm.assert_frame_equal(df, result) + + def test_path_localpath(self): + df = tm.makeDataFrame() + result = tm.round_trip_localpath( + df.to_csv, + lambda p: self.read_csv(p, index_col=0)) + tm.assert_frame_equal(df, result) + def test_nonexistent_path(self): # gh-2428: pls no segfault # gh-14086: raise more helpful FileNotFoundError diff --git a/pandas/tests/io/sas/test_sas7bdat.py b/pandas/tests/io/sas/test_sas7bdat.py index a5157744038f4..7070c3c7c9382 100644 --- a/pandas/tests/io/sas/test_sas7bdat.py +++ b/pandas/tests/io/sas/test_sas7bdat.py @@ -3,6 +3,7 @@ import pandas.util.testing as tm import os import io +import pytest import numpy as np @@ -65,6 +66,29 @@ def test_from_iterator(self): tm.assert_frame_equal(df, df0.iloc[2:5, :]) rdr.close() + @pytest.mark.xfail(reason="read_sas currently doesn't work with pathlib") + def test_path_pathlib(self): + tm._skip_if_no_pathlib() + from pathlib import Path + for j in 0, 1: + df0 = self.data[j] + for k in self.test_ix[j]: + fname = Path(os.path.join(self.dirpath, "test%d.sas7bdat" % k)) + df = pd.read_sas(fname, encoding='utf-8') + tm.assert_frame_equal(df, df0) + + @pytest.mark.xfail(reason="read_sas currently doesn't work with localpath") + def test_path_localpath(self): + tm._skip_if_no_localpath() + from py.path import local as LocalPath + for j in 0, 1: + df0 = self.data[j] + for k in self.test_ix[j]: + fname = LocalPath(os.path.join(self.dirpath, + "test%d.sas7bdat" % k)) + df = pd.read_sas(fname, encoding='utf-8') + tm.assert_frame_equal(df, df0) + def test_iterator_loop(self): # github #13654 for j in 0, 1: diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py index c70b5937fea3f..b4a5b24616728 100644 --- a/pandas/tests/io/test_excel.py +++ b/pandas/tests/io/test_excel.py @@ -1858,6 +1858,16 @@ def test_freeze_panes(self): result = read_excel(path) tm.assert_frame_equal(expected, result) + def test_path_pathlib(self): + df = tm.makeDataFrame() + result = tm.round_trip_pathlib(df.to_excel, pd.read_excel) + tm.assert_frame_equal(df, result) + + def test_path_localpath(self): + df = tm.makeDataFrame() + result = tm.round_trip_localpath(df.to_excel, pd.read_excel) + tm.assert_frame_equal(df, result) + def raise_wrapper(major_ver): def versioned_raise_wrapper(orig_method): diff --git a/pandas/tests/io/test_feather.py b/pandas/tests/io/test_feather.py index 232bb126d9d67..e3190efecba30 100644 --- a/pandas/tests/io/test_feather.py +++ b/pandas/tests/io/test_feather.py @@ -9,6 +9,7 @@ from feather import FeatherError from pandas.util.testing import assert_frame_equal, ensure_clean +import pandas.util.testing as tm @pytest.mark.single @@ -114,3 +115,15 @@ def test_write_with_index(self): df.index = [0, 1, 2] df.columns = pd.MultiIndex.from_tuples([('a', 1), ('a', 2), ('b', 1)]), self.check_error_on_write(df, ValueError) + + @pytest.mark.xfail(reason="feather currently doesn't work with pathlib") + def test_path_pathlib(self): + df = tm.makeDataFrame().reset_index() + result = tm.round_trip_pathlib(df.to_feather, pd.read_feather) + tm.assert_frame_equal(df, result) + + @pytest.mark.xfail(reason="feather currently doesn't work with localpath") + def test_path_localpath(self): + df = tm.makeDataFrame().reset_index() + result = tm.round_trip_localpath(df.to_feather, pd.read_feather) + tm.assert_frame_equal(df, result) diff --git a/pandas/tests/io/test_packers.py b/pandas/tests/io/test_packers.py index 4b1145129c364..fd42becca3ac3 100644 --- a/pandas/tests/io/test_packers.py +++ b/pandas/tests/io/test_packers.py @@ -134,6 +134,18 @@ def test_string_io(self): result = read_msgpack(p) tm.assert_frame_equal(result, df) + @pytest.mark.xfail(reason="msgpack currently doesn't work with pathlib") + def test_path_pathlib(self): + df = tm.makeDataFrame() + result = tm.round_trip_pathlib(df.to_msgpack, read_msgpack) + tm.assert_frame_equal(df, result) + + @pytest.mark.xfail(reason="msgpack currently doesn't work with localpath") + def test_path_localpath(self): + df = tm.makeDataFrame() + result = tm.round_trip_localpath(df.to_msgpack, read_msgpack) + tm.assert_frame_equal(df, result) + def test_iterator_with_string_io(self): dfs = [DataFrame(np.random.randn(10, 2)) for i in range(5)] diff --git a/pandas/tests/io/test_pickle.py b/pandas/tests/io/test_pickle.py index 875b5bd3055b9..429ec5ba1c474 100644 --- a/pandas/tests/io/test_pickle.py +++ b/pandas/tests/io/test_pickle.py @@ -299,6 +299,18 @@ def test_pickle_v0_15_2(): tm.assert_categorical_equal(cat, pd.read_pickle(pickle_path)) +def test_pickle_path_pathlib(): + df = tm.makeDataFrame() + result = tm.round_trip_pathlib(df.to_pickle, pd.read_pickle) + tm.assert_frame_equal(df, result) + + +def test_pickle_path_localpath(): + df = tm.makeDataFrame() + result = tm.round_trip_localpath(df.to_pickle, pd.read_pickle) + tm.assert_frame_equal(df, result) + + # --------------------- # test pickle compression # --------------------- diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index abfd88a6f13e1..c9d2da67b8ee3 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -4282,6 +4282,49 @@ def test_select_filter_corner(self): result = store.select('frame', [crit]) tm.assert_frame_equal(result, df.loc[:, df.columns[:75:2]]) + def test_path_pathlib(self): + df = tm.makeDataFrame() + + result = tm.round_trip_pathlib( + lambda p: df.to_hdf(p, 'df'), + lambda p: pd.read_hdf(p, 'df')) + tm.assert_frame_equal(df, result) + + @pytest.mark.xfail(reason='pathlib currently doesnt work with HDFStore') + def test_path_pathlib_hdfstore(self): + df = tm.makeDataFrame() + + def writer(path): + with pd.HDFStore(path) as store: + df.to_hdf(store, 'df') + + def reader(path): + with pd.HDFStore(path) as store: + pd.read_hdf(store, 'df') + result = tm.round_trip_pathlib(writer, reader) + tm.assert_frame_equal(df, result) + + def test_pickle_path_localpath(self): + df = tm.makeDataFrame() + result = tm.round_trip_pathlib( + lambda p: df.to_hdf(p, 'df'), + lambda p: pd.read_hdf(p, 'df')) + tm.assert_frame_equal(df, result) + + @pytest.mark.xfail(reason='localpath currently doesnt work with HDFStore') + def test_path_localpath_hdfstore(self): + df = tm.makeDataFrame() + + def writer(path): + with pd.HDFStore(path) as store: + df.to_hdf(store, 'df') + + def reader(path): + with pd.HDFStore(path) as store: + pd.read_hdf(store, 'df') + result = tm.round_trip_localpath(writer, reader) + tm.assert_frame_equal(df, result) + def _check_roundtrip(self, obj, comparator, compression=False, **kwargs): options = {} diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py index 4c92c19c51e7a..4ec990116bb62 100644 --- a/pandas/tests/io/test_stata.py +++ b/pandas/tests/io/test_stata.py @@ -1283,3 +1283,15 @@ def test_invalid_encoding(self): with pytest.raises(ValueError): with tm.ensure_clean() as path: original.to_stata(path, encoding='utf-8') + + @pytest.mark.xfail(reason="stata currently doesn't work with pathlib") + def test_path_pathlib(self): + df = tm.makeDataFrame() + result = tm.round_trip_pathlib(df.to_stata, read_stata) + tm.assert_frame_equal(df, result) + + @pytest.mark.xfail(reason="stata currently doesn't work with localpath") + def test_pickle_path_localpath(self): + df = tm.makeDataFrame() + result = tm.round_trip_localpath(df.to_stata, read_stata) + tm.assert_frame_equal(df, result) diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 5f01f42eb0c69..81d452318d0b4 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -117,6 +117,63 @@ def round_trip_pickle(obj, path=None): return pd.read_pickle(path) +def round_trip_pathlib(writer, reader, path=None): + """ + Write an object to file specifed by a pathlib.Path and read it back + + Parameters + ---------- + writer : callable bound to pandas object + IO writing function (e.g. DataFrame.to_csv ) + reader : callable + IO reading function (e.g. pd.read_csv ) + path : str, default None + The path where the object is written and then read. + + Returns + ------- + round_trip_object : pandas object + The original object that was serialized and then re-read. + """ + + import pytest + Path = pytest.importorskip('pathlib').Path + if path is None: + path = '___pathlib___' + with ensure_clean(path) as path: + writer(Path(path)) + obj = reader(Path(path)) + return obj + + +def round_trip_localpath(writer, reader, path=None): + """ + Write an object to file specifed by a py.path LocalPath and read it back + + Parameters + ---------- + writer : callable bound to pandas object + IO writing function (e.g. DataFrame.to_csv ) + reader : callable + IO reading function (e.g. pd.read_csv ) + path : str, default None + The path where the object is written and then read. + + Returns + ------- + round_trip_object : pandas object + The original object that was serialized and then re-read. + """ + import pytest + LocalPath = pytest.importorskip('py.path').local + if path is None: + path = '___localpath___' + with ensure_clean(path) as path: + writer(LocalPath(path)) + obj = reader(LocalPath(path)) + return obj + + def assert_almost_equal(left, right, check_exact=False, check_dtype='equiv', check_less_precise=False, **kwargs): From 4bdbcb638340a0529c2ddc7d3a85d0209004763f Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 12 May 2017 10:32:10 -0500 Subject: [PATCH 551/933] DOC: Handle missing nbconvert in html build (#16330) Closes https://github.com/pandas-dev/pandas/issues/16329 --- doc/make.py | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/doc/make.py b/doc/make.py index e70655c3e2f92..316ce58812f82 100755 --- a/doc/make.py +++ b/doc/make.py @@ -115,15 +115,28 @@ def maybe_exclude_notebooks(): notebooks = [os.path.join(base, 'source', nb) for nb in ['style.ipynb']] contents = {} - try: - import nbconvert - nbconvert.utils.pandoc.get_pandoc_version() - except (ImportError, nbconvert.utils.pandoc.PandocMissing): - print("Warning: Pandoc is not installed. Skipping Notebooks.") + + def _remove_notebooks(): for nb in notebooks: with open(nb, 'rt') as f: contents[nb] = f.read() os.remove(nb) + + # Skip notebook conversion if + # 1. nbconvert isn't installed, or + # 2. nbconvert is installed, but pandoc isn't + try: + import nbconvert + except ImportError: + print("Warning: nbconvert not installed. Skipping notebooks.") + _remove_notebooks() + else: + try: + nbconvert.utils.pandoc.get_pandoc_version() + except nbconvert.utils.pandoc.PandocMissing: + print("Warning: Pandoc is not installed. Skipping notebooks.") + _remove_notebooks() + yield for nb, content in contents.items(): with open(nb, 'wt') as f: From 9da7798fc758d26014c2ffec2c11709bd94daed8 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Sat, 13 May 2017 14:10:52 +0200 Subject: [PATCH 552/933] DOC: add dataframe construction in merge_asof example (#16348) --- pandas/core/reshape/merge.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index c55f4b5bf935f..7bf25e37340c4 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -347,12 +347,15 @@ def merge_asof(left, right, on=None, Examples -------- + >>> left = pd.DataFrame({'a': [1, 5, 10], 'left_val': ['a', 'b', 'c']}) >>> left a left_val 0 1 a 1 5 b 2 10 c + >>> right = pd.DataFrame({'a': [1, 2, 3, 6, 7], + ... 'right_val': [1, 2, 3, 6, 7]}) >>> right a right_val 0 1 1 @@ -387,12 +390,15 @@ def merge_asof(left, right, on=None, We can use indexed DataFrames as well. + >>> left = pd.DataFrame({'left_val': ['a', 'b', 'c']}, index=[1, 5, 10]) >>> left left_val 1 a 5 b 10 c + >>> right = pd.DataFrame({'right_val': [1, 2, 3, 6, 7]}, + ... index=[1, 2, 3, 6, 7]) >>> right right_val 1 1 From 71bb0f74586ac61160f5bf905d0930943d2c93c6 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sat, 13 May 2017 12:51:34 -0400 Subject: [PATCH 553/933] DOC: redo dev docs access token --- .travis.yml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index b7c18d2850a15..f7412f4e4222c 100644 --- a/.travis.yml +++ b/.travis.yml @@ -17,8 +17,10 @@ cache: env: global: - # pandas-docs-travis GH - - secure: "YvvTc+FrSYHgdxqoxn9s8VOaCWjvZzlkaf6k55kkmQqCYR9dPiLMsot1F96/N7o3YlD1s0znPQCak93Du8HHi/8809zAXloTaMSZrWz4R4qn96xlZFRE88O/w/Z1t3VVYpKX3MHlCggBc8MtXrqmvWKJMAqXyysZ4TTzoiJDPvE=" + # create a github personal access token: + # echo PANDAS_GH_TOKEN=personal_access_token | travis encrypt --repo pandas-docs/pandas-docs-travis + # pandas-docs/pandas-docs-travis GH + - secure: PfEfa2O785XHMnYAPCi46c4m62EO6j7e7qPE3rOEu8xCjtIq+I2HMykWzU/B2oJi4RDimCQSs+Q2b3PX3It4E8/BWh/15/pdM5X5SqdElOJ4dMfnqfyN+Cmes7UdZHmys27kTZmgrGQU2uCtE2DzJdwVgYtyDb6tcFAJbAKnom8= git: # for cloning From 30d866ea1504c430087d37872ee17169dc3474b1 Mon Sep 17 00:00:00 2001 From: pandas-docs-bot Date: Sat, 13 May 2017 16:54:07 -0400 Subject: [PATCH 554/933] DOC: force fetch on build docs --- ci/build_docs.sh | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/ci/build_docs.sh b/ci/build_docs.sh index 1356d097025c9..25b927119a32a 100755 --- a/ci/build_docs.sh +++ b/ci/build_docs.sh @@ -44,6 +44,11 @@ if [ "$DOC" ]; then # create the repo git init + + git remote remove origin + git remote add origin "https://${PANDAS_GH_TOKEN}@github.com/pandas-docs/pandas-docs-travis.git" + git fetch origin + touch README git add README git commit -m "Initial commit" --allow-empty @@ -52,8 +57,7 @@ if [ "$DOC" ]; then touch .nojekyll git add --all . git commit -m "Version" --allow-empty - git remote remove origin - git remote add origin "https://${PANDAS_GH_TOKEN}@github.com/pandas-docs/pandas-docs-travis.git" + git push origin gh-pages -f fi From 0ea0f25bfb920cd0f58872dc95913a7aa9c0c3da Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sat, 13 May 2017 17:27:59 -0400 Subject: [PATCH 555/933] DOC: remove credential helper --- ci/build_docs.sh | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/ci/build_docs.sh b/ci/build_docs.sh index 25b927119a32a..f9f6744ab3a82 100755 --- a/ci/build_docs.sh +++ b/ci/build_docs.sh @@ -40,15 +40,10 @@ if [ "$DOC" ]; then cd /tmp/doc/build/html git config --global user.email "pandas-docs-bot@localhost.foo" git config --global user.name "pandas-docs-bot" - git config --global credential.helper cache # create the repo git init - git remote remove origin - git remote add origin "https://${PANDAS_GH_TOKEN}@github.com/pandas-docs/pandas-docs-travis.git" - git fetch origin - touch README git add README git commit -m "Initial commit" --allow-empty @@ -58,6 +53,11 @@ if [ "$DOC" ]; then git add --all . git commit -m "Version" --allow-empty + git remote remove origin + git remote add origin "https://${PANDAS_GH_TOKEN}@github.com/pandas-docs/pandas-docs-travis.git" + git fetch origin + git remote -v + git push origin gh-pages -f fi From 2ba7f5fbcb748ce0c95f7af6cfceb6ebf2b1bcf9 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Mon, 15 May 2017 20:02:12 -0400 Subject: [PATCH 556/933] DEPS: Drop Python 3.4 support (#16303) * DEPS: Drop Python 3.4 support Closes gh-15251. * TST: Patch locale failure on Circle --- ci/requirements-3.4.build | 4 ---- ci/requirements-3.4.pip | 2 -- ci/requirements-3.4.run | 18 --------------- ci/requirements-3.4_SLOW.run | 20 ----------------- ci/requirements-3.4_SLOW.sh | 7 ------ ...OW.build => requirements-3.6_LOCALE.build} | 4 ++-- ci/requirements-3.6_LOCALE.pip | 0 ci/requirements-3.6_LOCALE.run | 22 +++++++++++++++++++ ci/requirements-3.6_LOCALE_SLOW.build | 6 +++++ ci/requirements-3.6_LOCALE_SLOW.pip | 0 ci/requirements-3.6_LOCALE_SLOW.run | 22 +++++++++++++++++++ circle.yml | 4 ++-- doc/source/whatsnew/v0.21.0.txt | 2 +- pandas/compat/__init__.py | 4 ++-- pandas/tests/util/test_util.py | 19 ++++++++-------- setup.py | 1 - tox.ini | 10 ++++----- 17 files changed, 72 insertions(+), 73 deletions(-) delete mode 100644 ci/requirements-3.4.build delete mode 100644 ci/requirements-3.4.pip delete mode 100644 ci/requirements-3.4.run delete mode 100644 ci/requirements-3.4_SLOW.run delete mode 100644 ci/requirements-3.4_SLOW.sh rename ci/{requirements-3.4_SLOW.build => requirements-3.6_LOCALE.build} (58%) create mode 100644 ci/requirements-3.6_LOCALE.pip create mode 100644 ci/requirements-3.6_LOCALE.run create mode 100644 ci/requirements-3.6_LOCALE_SLOW.build create mode 100644 ci/requirements-3.6_LOCALE_SLOW.pip create mode 100644 ci/requirements-3.6_LOCALE_SLOW.run diff --git a/ci/requirements-3.4.build b/ci/requirements-3.4.build deleted file mode 100644 index e8a957f70d40e..0000000000000 --- a/ci/requirements-3.4.build +++ /dev/null @@ -1,4 +0,0 @@ -python=3.4* -numpy=1.8.1 -cython=0.24.1 -libgfortran=1.0 diff --git a/ci/requirements-3.4.pip b/ci/requirements-3.4.pip deleted file mode 100644 index 4e5fe52d56cf1..0000000000000 --- a/ci/requirements-3.4.pip +++ /dev/null @@ -1,2 +0,0 @@ -python-dateutil==2.2 -blosc diff --git a/ci/requirements-3.4.run b/ci/requirements-3.4.run deleted file mode 100644 index 3e12adae7dd9f..0000000000000 --- a/ci/requirements-3.4.run +++ /dev/null @@ -1,18 +0,0 @@ -pytz=2015.7 -numpy=1.8.1 -openpyxl -xlsxwriter -xlrd -xlwt -html5lib -patsy -beautiful-soup -scipy -numexpr -pytables -lxml -sqlalchemy -bottleneck -pymysql=0.6.3 -psycopg2 -jinja2=2.8 diff --git a/ci/requirements-3.4_SLOW.run b/ci/requirements-3.4_SLOW.run deleted file mode 100644 index 90156f62c6e71..0000000000000 --- a/ci/requirements-3.4_SLOW.run +++ /dev/null @@ -1,20 +0,0 @@ -python-dateutil -pytz -numpy=1.10* -openpyxl -xlsxwriter -xlrd -xlwt -html5lib -patsy -beautiful-soup -scipy -numexpr=2.4.6 -pytables -matplotlib -lxml -sqlalchemy -bottleneck -pymysql -psycopg2 -jinja2=2.8 diff --git a/ci/requirements-3.4_SLOW.sh b/ci/requirements-3.4_SLOW.sh deleted file mode 100644 index 24f1e042ed69e..0000000000000 --- a/ci/requirements-3.4_SLOW.sh +++ /dev/null @@ -1,7 +0,0 @@ -#!/bin/bash - -source activate pandas - -echo "install 34_slow" - -conda install -n pandas -c conda-forge matplotlib diff --git a/ci/requirements-3.4_SLOW.build b/ci/requirements-3.6_LOCALE.build similarity index 58% rename from ci/requirements-3.4_SLOW.build rename to ci/requirements-3.6_LOCALE.build index 88212053af472..1c4b46aea3865 100644 --- a/ci/requirements-3.4_SLOW.build +++ b/ci/requirements-3.6_LOCALE.build @@ -1,6 +1,6 @@ -python=3.4* +python=3.6* python-dateutil pytz nomkl -numpy=1.10* +numpy cython diff --git a/ci/requirements-3.6_LOCALE.pip b/ci/requirements-3.6_LOCALE.pip new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/ci/requirements-3.6_LOCALE.run b/ci/requirements-3.6_LOCALE.run new file mode 100644 index 0000000000000..ae456f4f9f38a --- /dev/null +++ b/ci/requirements-3.6_LOCALE.run @@ -0,0 +1,22 @@ +python-dateutil +pytz +numpy +scipy +openpyxl +xlsxwriter +xlrd +xlwt +numexpr +pytables +matplotlib +lxml +html5lib +jinja2 +sqlalchemy +pymysql +# feather-format (not available on defaults ATM) +# psycopg2 (not avail on defaults ATM) +beautifulsoup4 +s3fs +xarray +ipython diff --git a/ci/requirements-3.6_LOCALE_SLOW.build b/ci/requirements-3.6_LOCALE_SLOW.build new file mode 100644 index 0000000000000..1c4b46aea3865 --- /dev/null +++ b/ci/requirements-3.6_LOCALE_SLOW.build @@ -0,0 +1,6 @@ +python=3.6* +python-dateutil +pytz +nomkl +numpy +cython diff --git a/ci/requirements-3.6_LOCALE_SLOW.pip b/ci/requirements-3.6_LOCALE_SLOW.pip new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/ci/requirements-3.6_LOCALE_SLOW.run b/ci/requirements-3.6_LOCALE_SLOW.run new file mode 100644 index 0000000000000..28131031f0bbd --- /dev/null +++ b/ci/requirements-3.6_LOCALE_SLOW.run @@ -0,0 +1,22 @@ +python-dateutil +pytz +numpy +scipy +openpyxl +xlsxwriter +xlrd +xlwt +numexpr +pytables +matplotlib +lxml +html5lib +jinja2 +sqlalchemy +pymysql +# feather-format (not available on defaults ATM) +# psycopg2 (not available on defaults ATM) +beautifulsoup4 +s3fs +xarray +ipython diff --git a/circle.yml b/circle.yml index fa2da0680f388..9d49145af54e3 100644 --- a/circle.yml +++ b/circle.yml @@ -23,9 +23,9 @@ dependencies: 0) sudo apt-get install language-pack-it && ./ci/install_circle.sh JOB="2.7_COMPAT" LOCALE_OVERRIDE="it_IT.UTF-8" ;; 1) - sudo apt-get install language-pack-zh-hans && ./ci/install_circle.sh JOB="3.4_SLOW" LOCALE_OVERRIDE="zh_CN.UTF-8" ;; + sudo apt-get install language-pack-zh-hans && ./ci/install_circle.sh JOB="3.6_LOCALE" LOCALE_OVERRIDE="zh_CN.UTF-8" ;; 2) - sudo apt-get install language-pack-zh-hans && ./ci/install_circle.sh JOB="3.4" LOCALE_OVERRIDE="zh_CN.UTF-8" ;; + sudo apt-get install language-pack-zh-hans && ./ci/install_circle.sh JOB="3.6_LOCALE_SLOW" LOCALE_OVERRIDE="zh_CN.UTF-8" ;; 3) ./ci/install_circle.sh JOB="3.5_ASCII" LOCALE_OVERRIDE="C" ;; esac diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 3df0a21facb02..f392687a0a3fd 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -35,7 +35,7 @@ Other Enhancements Backwards incompatible API changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - +- Support has been dropped for Python 3.4 (:issue:`15251`) .. _whatsnew_0210.api: diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py index 2fe6359fd1ea6..9eacb9acef2c9 100644 --- a/pandas/compat/__init__.py +++ b/pandas/compat/__init__.py @@ -7,7 +7,7 @@ Key items to import for 2/3 compatible code: * iterators: range(), map(), zip(), filter(), reduce() * lists: lrange(), lmap(), lzip(), lfilter() -* unicode: u() [u"" is a syntax error in Python 3.0-3.2] +* unicode: u() [no unicode builtin in Python 3] * longs: long (int in Python 3) * callable * iterable method compatibility: iteritems, iterkeys, itervalues @@ -110,7 +110,7 @@ def signature(f): unichr = chr # This was introduced in Python 3.3, but we don't support - # Python 3.x < 3.4, so checking PY3 is safe. + # Python 3.x < 3.5, so checking PY3 is safe. FileNotFoundError = FileNotFoundError # list-producing versions of the major Python iterating functions diff --git a/pandas/tests/util/test_util.py b/pandas/tests/util/test_util.py index 532d596220501..abd82cfa89f94 100644 --- a/pandas/tests/util/test_util.py +++ b/pandas/tests/util/test_util.py @@ -16,9 +16,6 @@ import pandas.util.testing as tm -CURRENT_LOCALE = locale.getlocale() -LOCALE_OVERRIDE = os.environ.get('LOCALE_OVERRIDE', None) - class TestDecorators(object): @@ -412,6 +409,7 @@ class TestLocaleUtils(object): @classmethod def setup_class(cls): cls.locales = tm.get_locales() + cls.current_locale = locale.getlocale() if not cls.locales: pytest.skip("No locales found") @@ -421,6 +419,7 @@ def setup_class(cls): @classmethod def teardown_class(cls): del cls.locales + del cls.current_locale def test_get_locales(self): # all systems should have at least a single locale @@ -438,17 +437,19 @@ def test_set_locale(self): pytest.skip("Only a single locale found, no point in " "trying to test setting another locale") - if all(x is None for x in CURRENT_LOCALE): + if all(x is None for x in self.current_locale): # Not sure why, but on some travis runs with pytest, # getlocale() returned (None, None). - pytest.skip("CURRENT_LOCALE is not set.") + pytest.skip("Current locale is not set.") + + locale_override = os.environ.get('LOCALE_OVERRIDE', None) - if LOCALE_OVERRIDE is None: + if locale_override is None: lang, enc = 'it_CH', 'UTF-8' - elif LOCALE_OVERRIDE == 'C': + elif locale_override == 'C': lang, enc = 'en_US', 'ascii' else: - lang, enc = LOCALE_OVERRIDE.split('.') + lang, enc = locale_override.split('.') enc = codecs.lookup(enc).name new_locale = lang, enc @@ -465,4 +466,4 @@ def test_set_locale(self): assert normalized_locale == new_locale current_locale = locale.getlocale() - assert current_locale == CURRENT_LOCALE + assert current_locale == self.current_locale diff --git a/setup.py b/setup.py index 9a04bb6994869..ff537d5868db6 100755 --- a/setup.py +++ b/setup.py @@ -246,7 +246,6 @@ def build_extensions(self): 'Programming Language :: Python :: 2', 'Programming Language :: Python :: 3', 'Programming Language :: Python :: 2.7', - 'Programming Language :: Python :: 3.4', 'Programming Language :: Python :: 3.5', 'Programming Language :: Python :: 3.6', 'Programming Language :: Cython', diff --git a/tox.ini b/tox.ini index 85c5d90fde7fb..45ad7fc451e76 100644 --- a/tox.ini +++ b/tox.ini @@ -4,7 +4,7 @@ # and then run "tox" from this directory. [tox] -envlist = py27, py34, py35 +envlist = py27, py35, py36 [testenv] deps = @@ -49,14 +49,14 @@ deps = bigquery {[testenv]deps} -[testenv:py34] +[testenv:py35] deps = - numpy==1.8.0 + numpy==1.10.0 {[testenv]deps} -[testenv:py35] +[testenv:py36] deps = - numpy==1.10.0 + numpy {[testenv]deps} [testenv:openpyxl1] From 46dc536f554aa0dc2b6a925356c7f6b8dc88f34d Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Mon, 15 May 2017 20:07:22 -0400 Subject: [PATCH 557/933] DOC: add developer section to the docs --- doc/source/developer.rst | 18 ++++++++++++++++++ doc/source/index.rst.template | 1 + 2 files changed, 19 insertions(+) create mode 100644 doc/source/developer.rst diff --git a/doc/source/developer.rst b/doc/source/developer.rst new file mode 100644 index 0000000000000..7633b4732479c --- /dev/null +++ b/doc/source/developer.rst @@ -0,0 +1,18 @@ +.. _developer: + +.. currentmodule:: pandas + +.. ipython:: python + :suppress: + + import numpy as np + np.random.seed(123456) + np.set_printoptions(precision=4, suppress=True) + import pandas as pd + pd.options.display.max_rows = 15 + +********* +Developer +********* + +This section will focus on downstream applications of pandas. diff --git a/doc/source/index.rst.template b/doc/source/index.rst.template index 0bfb2b635f53a..f5c65e175b0db 100644 --- a/doc/source/index.rst.template +++ b/doc/source/index.rst.template @@ -151,6 +151,7 @@ See the package overview for more detail about what's in the library. api {% endif -%} {%if not single -%} + developer internals release {% endif -%} From d92f06a06fe45f99cbc8a813f4128447f9466aa5 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Mon, 15 May 2017 21:33:23 -0400 Subject: [PATCH 558/933] DOC: try with new secure token --- .travis.yml | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/.travis.yml b/.travis.yml index f7412f4e4222c..96976e47428d7 100644 --- a/.travis.yml +++ b/.travis.yml @@ -17,10 +17,14 @@ cache: env: global: - # create a github personal access token: - # echo PANDAS_GH_TOKEN=personal_access_token | travis encrypt --repo pandas-docs/pandas-docs-travis - # pandas-docs/pandas-docs-travis GH - - secure: PfEfa2O785XHMnYAPCi46c4m62EO6j7e7qPE3rOEu8xCjtIq+I2HMykWzU/B2oJi4RDimCQSs+Q2b3PX3It4E8/BWh/15/pdM5X5SqdElOJ4dMfnqfyN+Cmes7UdZHmys27kTZmgrGQU2uCtE2DzJdwVgYtyDb6tcFAJbAKnom8= + # pandas-docs/pandas-docs-travis GH # + # + # create a github personal access token + # cd pandas-docs/pandas-docs-travis + # travis encrypt + # PANDAS_GH_TOKEN=personal_access_token + - secure: "WjlBp350uCfrOBNlXrBMkryT20XLRuyCMhLS2c+y0KByeuLrXLg3qWeXXK3OLO8IiNTMI9GvbdC9L5MyMl8CmZQSJxjqWXCzX6057ZjcfJo+wyM96CEx949UEnVbr5bbEIid/fo3PGCcoxoWym75+opAzs2Giih0gjS33BmyslA=" + git: # for cloning From 48730d69e0df5fbad40b873895dd8c837c11eb9a Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 16 May 2017 06:18:37 -0400 Subject: [PATCH 559/933] DOC: try new token for docs --- .travis.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index 96976e47428d7..f742d91708947 100644 --- a/.travis.yml +++ b/.travis.yml @@ -23,8 +23,7 @@ env: # cd pandas-docs/pandas-docs-travis # travis encrypt # PANDAS_GH_TOKEN=personal_access_token - - secure: "WjlBp350uCfrOBNlXrBMkryT20XLRuyCMhLS2c+y0KByeuLrXLg3qWeXXK3OLO8IiNTMI9GvbdC9L5MyMl8CmZQSJxjqWXCzX6057ZjcfJo+wyM96CEx949UEnVbr5bbEIid/fo3PGCcoxoWym75+opAzs2Giih0gjS33BmyslA=" - + secure: "Vldn32VpfPbnLbTaJ8z1zkHRe3vHjgSwSg3hFTRhQeFQlWK7SDXjWwbNK+X9OF+ju4Cfczn2m+gssOGgOPq3HRNYih5p7pKVk2WII7V1mlcXuXGkVDws340aA9KoUbq+A2bnNcovBQjX3FRz+gofnQXOHVD3l/Eh7nEmXdgP+Ls=" git: # for cloning From 42e2a87f2a8848795238de1259a3daa5612e393d Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 16 May 2017 07:02:40 -0400 Subject: [PATCH 560/933] PERF: improved clip performance (#16364) closes #15400 --- asv_bench/benchmarks/series_methods.py | 11 +++++++++ doc/source/whatsnew/v0.20.2.txt | 3 ++- pandas/core/generic.py | 33 +++++++++++++++++++++++++- pandas/tests/series/test_analytics.py | 1 + 4 files changed, 46 insertions(+), 2 deletions(-) diff --git a/asv_bench/benchmarks/series_methods.py b/asv_bench/benchmarks/series_methods.py index c66654ee1e006..3c0e2869357ae 100644 --- a/asv_bench/benchmarks/series_methods.py +++ b/asv_bench/benchmarks/series_methods.py @@ -111,6 +111,7 @@ def setup(self): def time_series_dropna_int64(self): self.s.dropna() + class series_dropna_datetime(object): goal_time = 0.2 @@ -120,3 +121,13 @@ def setup(self): def time_series_dropna_datetime(self): self.s.dropna() + + +class series_clip(object): + goal_time = 0.2 + + def setup(self): + self.s = pd.Series(np.random.randn(50)) + + def time_series_dropna_datetime(self): + self.s.clip(0, 1) diff --git a/doc/source/whatsnew/v0.20.2.txt b/doc/source/whatsnew/v0.20.2.txt index 03579dab0d6a3..10a6b4354290d 100644 --- a/doc/source/whatsnew/v0.20.2.txt +++ b/doc/source/whatsnew/v0.20.2.txt @@ -19,7 +19,7 @@ Highlights include: Enhancements ~~~~~~~~~~~~ -- Unblocked access to additional compression types supported in pytables: 'blosc:blosclz, 'blosc:lz4', 'blosc:lz4hc', 'blosc:snappy', 'blosc:zlib', 'blosc:zstd' (:issue:`14478`) +- Unblocked access to additional compression types supported in pytables: 'blosc:blosclz, 'blosc:lz4', 'blosc:lz4hc', 'blosc:snappy', 'blosc:zlib', 'blosc:zstd' (:issue:`14478`) .. _whatsnew_0202.performance: @@ -28,6 +28,7 @@ Performance Improvements - Performance regression fix when indexing with a list-like (:issue:`16285`) - Performance regression fix for small MultiIndexes (:issuse:`16319`) +- Improved performance of ``.clip()`` with scalar arguments (:issue:`15400`) .. _whatsnew_0202.bug_fixes: diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 777cfcae7a326..3e1c5c3f354fd 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -14,6 +14,7 @@ _ensure_int64, needs_i8_conversion, is_scalar, + is_number, is_integer, is_bool, is_bool_dtype, is_numeric_dtype, @@ -4104,6 +4105,22 @@ def isnull(self): def notnull(self): return notnull(self).__finalize__(self) + def _clip_with_scalar(self, lower, upper): + + if ((lower is not None and np.any(isnull(lower))) or + (upper is not None and np.any(isnull(upper)))): + raise ValueError("Cannot use an NA value as a clip threshold") + + result = self.values + mask = isnull(result) + if upper is not None: + result = np.where(result >= upper, upper, result) + if lower is not None: + result = np.where(result <= lower, lower, result) + result[mask] = np.nan + return self._constructor( + result, **self._construct_axes_dict()).__finalize__(self) + def clip(self, lower=None, upper=None, axis=None, *args, **kwargs): """ Trim values at input threshold(s). @@ -4122,12 +4139,13 @@ def clip(self, lower=None, upper=None, axis=None, *args, **kwargs): Examples -------- >>> df - 0 1 + 0 1 0 0.335232 -1.256177 1 -1.367855 0.746646 2 0.027753 -1.176076 3 0.230930 -0.679613 4 1.261967 0.570967 + >>> df.clip(-1.0, 0.5) 0 1 0 0.335232 -1.000000 @@ -4135,6 +4153,7 @@ def clip(self, lower=None, upper=None, axis=None, *args, **kwargs): 2 0.027753 -1.000000 3 0.230930 -0.679613 4 0.500000 0.500000 + >>> t 0 -0.3 1 -0.2 @@ -4142,6 +4161,7 @@ def clip(self, lower=None, upper=None, axis=None, *args, **kwargs): 3 0.0 4 0.1 dtype: float64 + >>> df.clip(t, t + 1, axis=0) 0 1 0 0.335232 -0.300000 @@ -4160,6 +4180,11 @@ def clip(self, lower=None, upper=None, axis=None, *args, **kwargs): if is_scalar(lower) and is_scalar(upper): lower, upper = min(lower, upper), max(lower, upper) + # fast-path for scalars + if ((lower is None or (is_scalar(lower) and is_number(lower))) and + (upper is None or (is_scalar(upper) and is_number(upper)))): + return self._clip_with_scalar(lower, upper) + result = self if lower is not None: result = result.clip_lower(lower, axis) @@ -4189,6 +4214,9 @@ def clip_upper(self, threshold, axis=None): if np.any(isnull(threshold)): raise ValueError("Cannot use an NA value as a clip threshold") + if is_scalar(threshold) and is_number(threshold): + return self._clip_with_scalar(None, threshold) + subset = self.le(threshold, axis=axis) | isnull(self) return self.where(subset, threshold, axis=axis) @@ -4213,6 +4241,9 @@ def clip_lower(self, threshold, axis=None): if np.any(isnull(threshold)): raise ValueError("Cannot use an NA value as a clip threshold") + if is_scalar(threshold) and is_number(threshold): + return self._clip_with_scalar(threshold, None) + subset = self.ge(threshold, axis=axis) | isnull(self) return self.where(subset, threshold, axis=axis) diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index ec6a118ec3639..18c6c9a6dd021 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -1011,6 +1011,7 @@ def test_clip_against_series(self): lower = Series([1.0, 2.0, 3.0]) upper = Series([1.5, 2.5, 3.5]) + assert_series_equal(s.clip(lower, upper), Series([1.0, 2.0, 3.5])) assert_series_equal(s.clip(1.5, upper), Series([1.5, 1.5, 3.5])) From f040ed227883a244ff2a7037dc453895047c8d5f Mon Sep 17 00:00:00 2001 From: dwkenefick Date: Tue, 16 May 2017 16:06:18 +0000 Subject: [PATCH 561/933] ENH: Provide dict object for to_dict() #16122 (#16220) * ENH: Provide dict object for to_dict() #16122 * ENH: Provide dict object for to_dict() #16122 * ENH: Provide dict object for to_dict() #16122 * ENH: Provide dict object for to_dict() #16122 * ENH: Provide dict object for to_dict() #16122 * ENH: Provide dict object for to_dict() #16122 * ENH: Provide dict object for to_dict() #16122 --- doc/source/whatsnew/v0.21.0.txt | 1 + pandas/core/common.py | 38 ++++++++ pandas/core/frame.py | 84 +++++++++++++--- pandas/core/series.py | 37 +++++-- pandas/tests/frame/test_convert_to.py | 133 +++++++++++++++----------- pandas/tests/series/test_io.py | 18 +++- pandas/tests/test_common.py | 25 +++++ 7 files changed, 258 insertions(+), 78 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index f392687a0a3fd..1a500bdc65ce3 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -26,6 +26,7 @@ New features Other Enhancements ^^^^^^^^^^^^^^^^^^ +- ``Series.to_dict()`` and ``DataFrame.to_dict()`` now support an ``into`` keyword which allows you to specify the ``collections.Mapping`` subclass that you would like returned. The default is ``dict``, which is backwards compatible. (:issue:`16122`) - ``RangeIndex.append`` now returns a ``RangeIndex`` object when possible (:issue:`16212`) diff --git a/pandas/core/common.py b/pandas/core/common.py index 39a5da0aa6912..0dc6a7a1e9c7b 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -6,6 +6,8 @@ import warnings from datetime import datetime, timedelta from functools import partial +import inspect +import collections import numpy as np from pandas._libs import lib, tslib @@ -479,6 +481,42 @@ def _dict_compat(d): for key, value in iteritems(d)) +def standardize_mapping(into): + """ + Helper function to standardize a supplied mapping. + + .. versionadded:: 0.21.0 + + Parameters + ---------- + into : instance or subclass of collections.Mapping + Must be a class, an initialized collections.defaultdict, + or an instance of a collections.Mapping subclass. + + Returns + ------- + mapping : a collections.Mapping subclass or other constructor + a callable object that can accept an iterator to create + the desired Mapping. + + See Also + -------- + DataFrame.to_dict + Series.to_dict + """ + if not inspect.isclass(into): + if isinstance(into, collections.defaultdict): + return partial( + collections.defaultdict, into.default_factory) + into = type(into) + if not issubclass(into, collections.Mapping): + raise TypeError('unsupported type: {}'.format(into)) + elif into == collections.defaultdict: + raise TypeError( + 'to_dict() only accepts initialized defaultdicts') + return into + + def sentinel_factory(): class Sentinel(object): pass diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 8d437102e4d18..3b0cc5619a1cd 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -63,7 +63,8 @@ _default_index, _values_from_object, _maybe_box_datetimelike, - _dict_compat) + _dict_compat, + standardize_mapping) from pandas.core.generic import NDFrame, _shared_docs from pandas.core.index import Index, MultiIndex, _ensure_index from pandas.core.indexing import (maybe_droplevels, convert_to_index_sliceable, @@ -860,7 +861,7 @@ def from_dict(cls, data, orient='columns', dtype=None): return cls(data, index=index, columns=columns, dtype=dtype) - def to_dict(self, orient='dict'): + def to_dict(self, orient='dict', into=dict): """Convert DataFrame to dictionary. Parameters @@ -882,32 +883,85 @@ def to_dict(self, orient='dict'): Abbreviations are allowed. `s` indicates `series` and `sp` indicates `split`. + into : class, default dict + The collections.Mapping subclass used for all Mappings + in the return value. Can be the actual class or an empty + instance of the mapping type you want. If you want a + collections.defaultdict, you must pass it initialized. + + .. versionadded:: 0.21.0 + Returns ------- - result : dict like {column -> {index -> value}} + result : collections.Mapping like {column -> {index -> value}} + + Examples + -------- + >>> df = pd.DataFrame( + {'col1': [1, 2], 'col2': [0.5, 0.75]}, index=['a', 'b']) + >>> df + col1 col2 + a 1 0.1 + b 2 0.2 + >>> df.to_dict() + {'col1': {'a': 1, 'b': 2}, 'col2': {'a': 0.5, 'b': 0.75}} + + You can specify the return orientation. + + >>> df.to_dict('series') + {'col1': a 1 + b 2 + Name: col1, dtype: int64, 'col2': a 0.50 + b 0.75 + Name: col2, dtype: float64} + >>> df.to_dict('split') + {'columns': ['col1', 'col2'], + 'data': [[1.0, 0.5], [2.0, 0.75]], + 'index': ['a', 'b']} + >>> df.to_dict('records') + [{'col1': 1.0, 'col2': 0.5}, {'col1': 2.0, 'col2': 0.75}] + >>> df.to_dict('index') + {'a': {'col1': 1.0, 'col2': 0.5}, 'b': {'col1': 2.0, 'col2': 0.75}} + + You can also specify the mapping type. + + >>> from collections import OrderedDict, defaultdict + >>> df.to_dict(into=OrderedDict) + OrderedDict([('col1', OrderedDict([('a', 1), ('b', 2)])), + ('col2', OrderedDict([('a', 0.5), ('b', 0.75)]))]) + + If you want a `defaultdict`, you need to initialize it: + + >>> dd = defaultdict(list) + >>> df.to_dict('records', into=dd) + [defaultdict(, {'col2': 0.5, 'col1': 1.0}), + defaultdict(, {'col2': 0.75, 'col1': 2.0})] """ if not self.columns.is_unique: warnings.warn("DataFrame columns are not unique, some " "columns will be omitted.", UserWarning) + # GH16122 + into_c = standardize_mapping(into) if orient.lower().startswith('d'): - return dict((k, v.to_dict()) for k, v in compat.iteritems(self)) + return into_c( + (k, v.to_dict(into)) for k, v in compat.iteritems(self)) elif orient.lower().startswith('l'): - return dict((k, v.tolist()) for k, v in compat.iteritems(self)) + return into_c((k, v.tolist()) for k, v in compat.iteritems(self)) elif orient.lower().startswith('sp'): - return {'index': self.index.tolist(), - 'columns': self.columns.tolist(), - 'data': lib.map_infer(self.values.ravel(), - _maybe_box_datetimelike) - .reshape(self.values.shape).tolist()} + return into_c((('index', self.index.tolist()), + ('columns', self.columns.tolist()), + ('data', lib.map_infer(self.values.ravel(), + _maybe_box_datetimelike) + .reshape(self.values.shape).tolist()))) elif orient.lower().startswith('s'): - return dict((k, _maybe_box_datetimelike(v)) - for k, v in compat.iteritems(self)) + return into_c((k, _maybe_box_datetimelike(v)) + for k, v in compat.iteritems(self)) elif orient.lower().startswith('r'): - return [dict((k, _maybe_box_datetimelike(v)) - for k, v in zip(self.columns, row)) + return [into_c((k, _maybe_box_datetimelike(v)) + for k, v in zip(self.columns, row)) for row in self.values] elif orient.lower().startswith('i'): - return dict((k, v.to_dict()) for k, v in self.iterrows()) + return into_c((k, v.to_dict(into)) for k, v in self.iterrows()) else: raise ValueError("orient '%s' not understood" % orient) diff --git a/pandas/core/series.py b/pandas/core/series.py index 6ec163bbaa73d..129f291e5f843 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -46,7 +46,8 @@ _maybe_match_name, SettingWithCopyError, _maybe_box_datetimelike, - _dict_compat) + _dict_compat, + standardize_mapping) from pandas.core.index import (Index, MultiIndex, InvalidIndexError, Float64Index, _ensure_index) from pandas.core.indexing import check_bool_indexer, maybe_convert_indices @@ -1074,15 +1075,39 @@ def tolist(self): """ Convert Series to a nested list """ return list(self.asobject) - def to_dict(self): + def to_dict(self, into=dict): """ - Convert Series to {label -> value} dict + Convert Series to {label -> value} dict or dict-like object. + + Parameters + ---------- + into : class, default dict + The collections.Mapping subclass to use as the return + object. Can be the actual class or an empty + instance of the mapping type you want. If you want a + collections.defaultdict, you must pass it initialized. + + .. versionadded:: 0.21.0 Returns ------- - value_dict : dict - """ - return dict(compat.iteritems(self)) + value_dict : collections.Mapping + + Examples + -------- + >>> s = pd.Series([1, 2, 3, 4]) + >>> s.to_dict() + {0: 1, 1: 2, 2: 3, 3: 4} + >>> from collections import OrderedDict, defaultdict + >>> s.to_dict(OrderedDict) + OrderedDict([(0, 1), (1, 2), (2, 3), (3, 4)]) + >>> dd = defaultdict(list) + >>> s.to_dict(dd) + defaultdict(, {0: 1, 1: 2, 2: 3, 3: 4}) + """ + # GH16122 + into_c = standardize_mapping(into) + return into_c(compat.iteritems(self)) def to_frame(self, name=None): """ diff --git a/pandas/tests/frame/test_convert_to.py b/pandas/tests/frame/test_convert_to.py index e0cdca7904db7..34dd138ee1c80 100644 --- a/pandas/tests/frame/test_convert_to.py +++ b/pandas/tests/frame/test_convert_to.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- import pytest +import collections import numpy as np from pandas import compat @@ -13,50 +14,6 @@ class TestDataFrameConvertTo(TestData): - def test_to_dict(self): - test_data = { - 'A': {'1': 1, '2': 2}, - 'B': {'1': '1', '2': '2', '3': '3'}, - } - recons_data = DataFrame(test_data).to_dict() - - for k, v in compat.iteritems(test_data): - for k2, v2 in compat.iteritems(v): - assert v2 == recons_data[k][k2] - - recons_data = DataFrame(test_data).to_dict("l") - - for k, v in compat.iteritems(test_data): - for k2, v2 in compat.iteritems(v): - assert v2 == recons_data[k][int(k2) - 1] - - recons_data = DataFrame(test_data).to_dict("s") - - for k, v in compat.iteritems(test_data): - for k2, v2 in compat.iteritems(v): - assert v2 == recons_data[k][k2] - - recons_data = DataFrame(test_data).to_dict("sp") - expected_split = {'columns': ['A', 'B'], 'index': ['1', '2', '3'], - 'data': [[1.0, '1'], [2.0, '2'], [np.nan, '3']]} - tm.assert_dict_equal(recons_data, expected_split) - - recons_data = DataFrame(test_data).to_dict("r") - expected_records = [{'A': 1.0, 'B': '1'}, - {'A': 2.0, 'B': '2'}, - {'A': np.nan, 'B': '3'}] - assert isinstance(recons_data, list) - assert len(recons_data) == 3 - for l, r in zip(recons_data, expected_records): - tm.assert_dict_equal(l, r) - - # GH10844 - recons_data = DataFrame(test_data).to_dict("i") - - for k, v in compat.iteritems(test_data): - for k2, v2 in compat.iteritems(v): - assert v2 == recons_data[k2][k] - def test_to_dict_timestamp(self): # GH11247 @@ -190,17 +147,85 @@ def test_to_records_with_unicode_column_names(self): ) tm.assert_almost_equal(result, expected) + @pytest.mark.parametrize('mapping', [ + dict, + collections.defaultdict(list), + collections.OrderedDict]) + def test_to_dict(self, mapping): + test_data = { + 'A': {'1': 1, '2': 2}, + 'B': {'1': '1', '2': '2', '3': '3'}, + } + + # GH16122 + recons_data = DataFrame(test_data).to_dict(into=mapping) + + for k, v in compat.iteritems(test_data): + for k2, v2 in compat.iteritems(v): + assert (v2 == recons_data[k][k2]) + + recons_data = DataFrame(test_data).to_dict("l", mapping) + + for k, v in compat.iteritems(test_data): + for k2, v2 in compat.iteritems(v): + assert (v2 == recons_data[k][int(k2) - 1]) + + recons_data = DataFrame(test_data).to_dict("s", mapping) + + for k, v in compat.iteritems(test_data): + for k2, v2 in compat.iteritems(v): + assert (v2 == recons_data[k][k2]) + + recons_data = DataFrame(test_data).to_dict("sp", mapping) + expected_split = {'columns': ['A', 'B'], 'index': ['1', '2', '3'], + 'data': [[1.0, '1'], [2.0, '2'], [np.nan, '3']]} + tm.assert_dict_equal(recons_data, expected_split) + + recons_data = DataFrame(test_data).to_dict("r", mapping) + expected_records = [{'A': 1.0, 'B': '1'}, + {'A': 2.0, 'B': '2'}, + {'A': np.nan, 'B': '3'}] + assert isinstance(recons_data, list) + assert (len(recons_data) == 3) + for l, r in zip(recons_data, expected_records): + tm.assert_dict_equal(l, r) + + # GH10844 + recons_data = DataFrame(test_data).to_dict("i") + + for k, v in compat.iteritems(test_data): + for k2, v2 in compat.iteritems(v): + assert (v2 == recons_data[k2][k]) + + df = DataFrame(test_data) + df['duped'] = df[df.columns[0]] + recons_data = df.to_dict("i") + comp_data = test_data.copy() + comp_data['duped'] = comp_data[df.columns[0]] + for k, v in compat.iteritems(comp_data): + for k2, v2 in compat.iteritems(v): + assert (v2 == recons_data[k2][k]) + + @pytest.mark.parametrize('mapping', [ + list, + collections.defaultdict, + []]) + def test_to_dict_errors(self, mapping): + # GH16122 + df = DataFrame(np.random.randn(3, 3)) + with pytest.raises(TypeError): + df.to_dict(into=mapping) -@pytest.mark.parametrize('tz', ['UTC', 'GMT', 'US/Eastern']) -def test_to_records_datetimeindex_with_tz(tz): - # GH13937 - dr = date_range('2016-01-01', periods=10, - freq='S', tz=tz) + @pytest.mark.parametrize('tz', ['UTC', 'GMT', 'US/Eastern']) + def test_to_records_datetimeindex_with_tz(self, tz): + # GH13937 + dr = date_range('2016-01-01', periods=10, + freq='S', tz=tz) - df = DataFrame({'datetime': dr}, index=dr) + df = DataFrame({'datetime': dr}, index=dr) - expected = df.to_records() - result = df.tz_convert("UTC").to_records() + expected = df.to_records() + result = df.tz_convert("UTC").to_records() - # both converted to UTC, so they are equal - tm.assert_numpy_array_equal(result, expected) + # both converted to UTC, so they are equal + tm.assert_numpy_array_equal(result, expected) diff --git a/pandas/tests/series/test_io.py b/pandas/tests/series/test_io.py index d1c9e5a6d16cf..503185de427f1 100644 --- a/pandas/tests/series/test_io.py +++ b/pandas/tests/series/test_io.py @@ -2,6 +2,8 @@ # pylint: disable-msg=E1101,W0612 from datetime import datetime +import collections +import pytest import numpy as np import pandas as pd @@ -126,9 +128,6 @@ def test_to_frame(self): dict(testdifferent=self.ts.values), index=self.ts.index) assert_frame_equal(rs, xp) - def test_to_dict(self): - tm.assert_series_equal(Series(self.ts.to_dict(), name='ts'), self.ts) - def test_timeseries_periodindex(self): # GH2891 from pandas import period_range @@ -167,6 +166,19 @@ class SubclassedFrame(DataFrame): expected = SubclassedFrame({'X': [1, 2, 3]}) assert_frame_equal(result, expected) + @pytest.mark.parametrize('mapping', ( + dict, + collections.defaultdict(list), + collections.OrderedDict)) + def test_to_dict(self, mapping): + # GH16122 + ts = TestData().ts + tm.assert_series_equal( + Series(ts.to_dict(mapping), name='ts'), ts) + from_method = Series(ts.to_dict(collections.Counter)) + from_constructor = Series(collections.Counter(ts.iteritems())) + tm.assert_series_equal(from_method, from_constructor) + class TestSeriesToList(TestData): diff --git a/pandas/tests/test_common.py b/pandas/tests/test_common.py index d7dbaccb87ee8..4893f99f7cf0f 100644 --- a/pandas/tests/test_common.py +++ b/pandas/tests/test_common.py @@ -1,6 +1,8 @@ # -*- coding: utf-8 -*- import pytest +import collections +from functools import partial import numpy as np @@ -195,3 +197,26 @@ def test_dict_compat(): assert (com._dict_compat(data_datetime64) == expected) assert (com._dict_compat(expected) == expected) assert (com._dict_compat(data_unchanged) == data_unchanged) + + +def test_standardize_mapping(): + # No uninitialized defaultdicts + with pytest.raises(TypeError): + com.standardize_mapping(collections.defaultdict) + + # No non-mapping subtypes, instance + with pytest.raises(TypeError): + com.standardize_mapping([]) + + # No non-mapping subtypes, class + with pytest.raises(TypeError): + com.standardize_mapping(list) + + fill = {'bad': 'data'} + assert (com.standardize_mapping(fill) == dict) + + # Convert instance to type + assert (com.standardize_mapping({}) == dict) + + dd = collections.defaultdict(list) + assert isinstance(com.standardize_mapping(dd), partial) From 9d3bef8cf537d9bfe29e00a908165fbd4a5904b4 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 16 May 2017 12:33:58 -0500 Subject: [PATCH 562/933] BUG/API: Categorical constructor scalar categories (#16340) * BUG: Categorical constructor scalar categories Categorical constructor no longer accepts scalars for categories. Closes #16022 --- doc/source/whatsnew/v0.21.0.txt | 5 +++++ pandas/core/categorical.py | 3 +++ pandas/tests/test_categorical.py | 11 +++++------ 3 files changed, 13 insertions(+), 6 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 1a500bdc65ce3..90f38620fdfcf 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -37,6 +37,8 @@ Backwards incompatible API changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Support has been dropped for Python 3.4 (:issue:`15251`) +- The Categorical constructor no longer accepts a scalar for the ``categories`` keyword. (:issue:`16022`) + .. _whatsnew_0210.api: @@ -110,6 +112,9 @@ Numeric ^^^^^^^ +Categorical +^^^^^^^^^^^ + Other ^^^^^ diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index 7eb86232cbb07..edbb07b7069e9 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -533,6 +533,9 @@ def _validate_categories(cls, categories, fastpath=False): if not isinstance(categories, ABCIndexClass): dtype = None if not hasattr(categories, "dtype"): + if not is_list_like(categories): + raise TypeError("`categories` must be list-like. " + "Got {} instead".format(repr(categories))) categories = _convert_to_list_like(categories) # On categories with NaNs, int values would be converted to # float. Use "object" dtype to prevent this. diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py index 03adf17f50300..57676be68bedf 100644 --- a/pandas/tests/test_categorical.py +++ b/pandas/tests/test_categorical.py @@ -256,12 +256,6 @@ def f(): assert len(cat.codes) == 1 assert cat.codes[0] == 0 - cat = pd.Categorical([1], categories=1) - assert len(cat.categories) == 1 - assert cat.categories[0] == 1 - assert len(cat.codes) == 1 - assert cat.codes[0] == 0 - # Catch old style constructor useage: two arrays, codes + categories # We can only catch two cases: # - when the first is an integer dtype and the second is not @@ -285,6 +279,11 @@ def f(): c = Categorical(np.array([], dtype='int64'), # noqa categories=[3, 2, 1], ordered=True) + def test_constructor_not_sequence(self): + # https://github.com/pandas-dev/pandas/issues/16022 + with pytest.raises(TypeError): + Categorical(['a', 'b'], categories='a') + def test_constructor_with_null(self): # Cannot have NaN in categories From 6a5e6f12b263834ac133025474a6460c169dad72 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Tue, 16 May 2017 18:21:02 -0400 Subject: [PATCH 563/933] MAINT: Add .iml to .gitignore (#16368) [ci skip] --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 495429fcde429..ff0a6aef47163 100644 --- a/.gitignore +++ b/.gitignore @@ -7,6 +7,7 @@ *$ *.bak *flymake* +*.iml *.kdev4 *.log *.swp From 623b3e7930441171972e4add0126182e76138215 Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Tue, 16 May 2017 18:26:12 -0400 Subject: [PATCH 564/933] ENH: Draft metadata specification doc for Apache Parquet (#16315) * Draft metadata specification doc for Apache Parquet * Tweaks, add pandas version * Relax metadata key * Be explicit that the metadata is file-level * Don't hard code version * Code reviews * Move Parquet metadata to developer.rst, account for code reviews * Code review comments * Review comments * Fix typo --- doc/source/developer.rst | 117 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 117 insertions(+) diff --git a/doc/source/developer.rst b/doc/source/developer.rst index 7633b4732479c..78c12b7e23b37 100644 --- a/doc/source/developer.rst +++ b/doc/source/developer.rst @@ -16,3 +16,120 @@ Developer ********* This section will focus on downstream applications of pandas. + +.. _apache.parquet: + +Storing pandas DataFrame objects in Apache Parquet format +--------------------------------------------------------- + +The `Apache Parquet `__ format +provides key-value metadata at the file and column level, stored in the footer +of the Parquet file: + +.. code-block:: shell + + 5: optional list key_value_metadata + +where ``KeyValue`` is + +.. code-block:: shell + + struct KeyValue { + 1: required string key + 2: optional string value + } + +So that a ``pandas.DataFrame`` can be faithfully reconstructed, we store a +``pandas`` metadata key in the ``FileMetaData`` with the the value stored as : + +.. code-block:: text + + {'index_columns': ['__index_level_0__', '__index_level_1__', ...], + 'columns': [, , ...], + 'pandas_version': $VERSION} + +Here, ```` and so forth are dictionaries containing the metadata for each +column. This has JSON form: + +.. code-block:: text + + {'name': column_name, + 'pandas_type': pandas_type, + 'numpy_type': numpy_type, + 'metadata': type_metadata} + +``pandas_type`` is the logical type of the column, and is one of: + +* Boolean: ``'bool'`` +* Integers: ``'int8', 'int16', 'int32', 'int64', 'uint8', 'uint16', 'uint32', 'uint64'`` +* Floats: ``'float16', 'float32', 'float64'`` +* Date and Time Types: ``'datetime', 'datetimetz'``, ``'timedelta'`` +* String: ``'unicode', 'bytes'`` +* Categorical: ``'categorical'`` +* Other Python objects: ``'object'`` + +The ``numpy_type`` is the physical storage type of the column, which is the +result of ``str(dtype)`` for the underlying NumPy array that holds the data. So +for ``datetimetz`` this is ``datetime64[ns]`` and for categorical, it may be +any of the supported integer categorical types. + +The ``type_metadata`` is ``None`` except for: + +* ``datetimetz``: ``{'timezone': zone, 'unit': 'ns'}``, e.g. ``{'timezone', + 'America/New_York', 'unit': 'ns'}``. The ``'unit'`` is optional, and if + omitted it is assumed to be nanoseconds. +* ``categorical``: ``{'num_categories': K, 'ordered': is_ordered, 'type': $TYPE}`` + + * Here ``'type'`` is optional, and can be a nested pandas type specification + here (but not categorical) + +* ``unicode``: ``{'encoding': encoding}`` + + * The encoding is optional, and if not present is UTF-8 + +* ``object``: ``{'encoding': encoding}``. Objects can be serialized and stored + in ``BYTE_ARRAY`` Parquet columns. The encoding can be one of: + + * ``'pickle'`` + * ``'msgpack'`` + * ``'bson'`` + * ``'json'`` + +* ``timedelta``: ``{'unit': 'ns'}``. The ``'unit'`` is optional, and if omitted + it is assumed to be nanoseconds. This metadata is optional altogether + +For types other than these, the ``'metadata'`` key can be +omitted. Implementations can assume ``None`` if the key is not present. + +As an example of fully-formed metadata: + +.. code-block:: text + + {'index_columns': ['__index_level_0__'], + 'columns': [ + {'name': 'c0', + 'pandas_type': 'int8', + 'numpy_type': 'int8', + 'metadata': None}, + {'name': 'c1', + 'pandas_type': 'bytes', + 'numpy_type': 'object', + 'metadata': None}, + {'name': 'c2', + 'pandas_type': 'categorical', + 'numpy_type': 'int16', + 'metadata': {'num_categories': 1000, 'ordered': False}}, + {'name': 'c3', + 'pandas_type': 'datetimetz', + 'numpy_type': 'datetime64[ns]', + 'metadata': {'timezone': 'America/Los_Angeles'}}, + {'name': 'c4', + 'pandas_type': 'object', + 'numpy_type': 'object', + 'metadata': {'encoding': 'pickle'}}, + {'name': '__index_level_0__', + 'pandas_type': 'int64', + 'numpy_type': 'int64', + 'metadata': None} + ], + 'pandas_version': '0.20.0'} From 9c8337a08e94fa13ff082bdbb01e865ab124fd9c Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 16 May 2017 17:37:14 -0500 Subject: [PATCH 565/933] TST: Add test for clip-na (#16369) Additional test cases for https://github.com/pandas-dev/pandas/pull/16364 when upper and / or lower is nan. --- pandas/tests/frame/test_analytics.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index be89b27912d1c..1b6471fcef565 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -1863,6 +1863,23 @@ def test_clip_against_frame(self): tm.assert_frame_equal(clipped_df[ub_mask], ub[ub_mask]) tm.assert_frame_equal(clipped_df[mask], df[mask]) + def test_clip_na(self): + msg = "Cannot use an NA" + with tm.assert_raises_regex(ValueError, msg): + self.frame.clip(lower=np.nan) + + with tm.assert_raises_regex(ValueError, msg): + self.frame.clip(lower=[np.nan]) + + with tm.assert_raises_regex(ValueError, msg): + self.frame.clip(upper=np.nan) + + with tm.assert_raises_regex(ValueError, msg): + self.frame.clip(upper=[np.nan]) + + with tm.assert_raises_regex(ValueError, msg): + self.frame.clip(lower=np.nan, upper=np.nan) + # Matrix-like def test_dot(self): From 88711470268cc3ee32e0b707cb61b77efbceaebd Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 16 May 2017 20:40:03 -0400 Subject: [PATCH 566/933] DOC: new oauth token --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index f742d91708947..0156f17aa32a5 100644 --- a/.travis.yml +++ b/.travis.yml @@ -23,7 +23,7 @@ env: # cd pandas-docs/pandas-docs-travis # travis encrypt # PANDAS_GH_TOKEN=personal_access_token - secure: "Vldn32VpfPbnLbTaJ8z1zkHRe3vHjgSwSg3hFTRhQeFQlWK7SDXjWwbNK+X9OF+ju4Cfczn2m+gssOGgOPq3HRNYih5p7pKVk2WII7V1mlcXuXGkVDws340aA9KoUbq+A2bnNcovBQjX3FRz+gofnQXOHVD3l/Eh7nEmXdgP+Ls=" + secure: "S49Tn5dzBRu6QaQcSV8MoCeX9rn7l8xuHFJbFsT9jPm1l0YPb94S8iDk0Isw71SqvHBgh+j2cms9jgYn2N3VCArh5MpA0oKwTKRZEX3iLQv248dCY2C6LdzAKLA+8m2naDGcfc0qMLeNieCGZICccs0EKIGDt8m7VQBMqeT0YU0=" git: # for cloning From e97865e5a60099b785daf58f6be085ef6d906427 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 16 May 2017 20:53:28 -0400 Subject: [PATCH 567/933] TST: followup to #16364, catch errstate warnings (#16373) --- pandas/core/generic.py | 14 +++++++++----- pandas/tests/frame/test_analytics.py | 11 +++++++++++ 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 3e1c5c3f354fd..c33b30c78d812 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -4113,11 +4113,15 @@ def _clip_with_scalar(self, lower, upper): result = self.values mask = isnull(result) - if upper is not None: - result = np.where(result >= upper, upper, result) - if lower is not None: - result = np.where(result <= lower, lower, result) - result[mask] = np.nan + + with np.errstate(all='ignore'): + if upper is not None: + result = np.where(result >= upper, upper, result) + if lower is not None: + result = np.where(result <= lower, lower, result) + if np.any(mask): + result[mask] = np.nan + return self._constructor( result, **self._construct_axes_dict()).__finalize__(self) diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index 1b6471fcef565..fa9823bf000a2 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -1824,6 +1824,17 @@ def test_dataframe_clip(self): assert (clipped_df.values[ub_mask] == ub).all() assert (clipped_df.values[mask] == df.values[mask]).all() + @pytest.mark.xfail(reason=("clip on mixed integer or floats " + "with integer clippers coerces to float")) + def test_clip_mixed_numeric(self): + + df = DataFrame({'A': [1, 2, 3], + 'B': [1., np.nan, 3.]}) + result = df.clip(1, 2) + expected = DataFrame({'A': [1, 2, 2], + 'B': [1., np.nan, 2.]}) + tm.assert_frame_equal(result, expected, check_like=True) + def test_clip_against_series(self): # GH #6966 From a3021eaed89e0198547c0a1583f03d0963267536 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 16 May 2017 22:14:45 -0400 Subject: [PATCH 568/933] TST: remove pandas-datareader xfail as 0.4.0 works (#16374) --- pandas/tests/test_downstream.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pandas/tests/test_downstream.py b/pandas/tests/test_downstream.py index 12976272cb8b1..27e3c29a70a9f 100644 --- a/pandas/tests/test_downstream.py +++ b/pandas/tests/test_downstream.py @@ -84,12 +84,10 @@ def test_pandas_gbq(df): pandas_gbq = import_module('pandas_gbq') # noqa -@pytest.mark.xfail(reason=("pandas_datareader<=0.3.0 " - "broken w.r.t. pandas >= 0.20.0")) def test_pandas_datareader(): pandas_datareader = import_module('pandas_datareader') # noqa - pandas_datareader.get_data_yahoo('AAPL') + pandas_datareader.get_data_google('AAPL') def test_geopandas(): From 34ebad832d6709ecd479c4db4705a9f81da015b3 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 17 May 2017 09:36:51 +0200 Subject: [PATCH 569/933] PERF: improve MultiIndex get_loc performance (#16346) * PERF: improve hash collision check for single MI labels * PERF: specialized hash function for single tuples --- asv_bench/benchmarks/indexing.py | 12 +++++ doc/source/whatsnew/v0.20.2.txt | 3 +- pandas/_libs/hashtable.pxd | 2 + pandas/_libs/hashtable_class_helper.pxi.in | 19 +++++++- pandas/core/indexes/multi.py | 4 +- pandas/core/util/hashing.py | 56 +++++++++++++++++++++- pandas/tests/util/test_hashing.py | 24 +++++++++- 7 files changed, 113 insertions(+), 7 deletions(-) diff --git a/asv_bench/benchmarks/indexing.py b/asv_bench/benchmarks/indexing.py index e1676715853a4..6a2c9d48c4a28 100644 --- a/asv_bench/benchmarks/indexing.py +++ b/asv_bench/benchmarks/indexing.py @@ -227,12 +227,24 @@ def time_multiindex_get_indexer(self): def time_multiindex_large_get_loc(self): self.mi_large.get_loc((999, 19, 'Z')) + def time_multiindex_large_get_loc_warm(self): + for _ in range(1000): + self.mi_large.get_loc((999, 19, 'Z')) + def time_multiindex_med_get_loc(self): self.mi_med.get_loc((999, 9, 'A')) + def time_multiindex_med_get_loc_warm(self): + for _ in range(1000): + self.mi_med.get_loc((999, 9, 'A')) + def time_multiindex_string_get_loc(self): self.mi_small.get_loc((99, 'A', 'A')) + def time_multiindex_small_get_loc_warm(self): + for _ in range(1000): + self.mi_small.get_loc((99, 'A', 'A')) + def time_is_monotonic(self): self.miint.is_monotonic diff --git a/doc/source/whatsnew/v0.20.2.txt b/doc/source/whatsnew/v0.20.2.txt index 10a6b4354290d..7773f5abfb0ba 100644 --- a/doc/source/whatsnew/v0.20.2.txt +++ b/doc/source/whatsnew/v0.20.2.txt @@ -27,9 +27,10 @@ Performance Improvements ~~~~~~~~~~~~~~~~~~~~~~~~ - Performance regression fix when indexing with a list-like (:issue:`16285`) -- Performance regression fix for small MultiIndexes (:issuse:`16319`) +- Performance regression fix for MultiIndexes (:issue:`16319`, :issue:`16346`) - Improved performance of ``.clip()`` with scalar arguments (:issue:`15400`) + .. _whatsnew_0202.bug_fixes: Bug Fixes diff --git a/pandas/_libs/hashtable.pxd b/pandas/_libs/hashtable.pxd index 3366751af144d..014da22df3382 100644 --- a/pandas/_libs/hashtable.pxd +++ b/pandas/_libs/hashtable.pxd @@ -38,6 +38,8 @@ cdef class MultiIndexHashTable(HashTable): cpdef get_item(self, object val) cpdef set_item(self, object key, Py_ssize_t val) + cdef inline void _check_for_collision(self, Py_ssize_t loc, object label) + cdef class StringHashTable(HashTable): cdef kh_str_t *table diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in index b80a592669eca..3ef52c5c59c9d 100644 --- a/pandas/_libs/hashtable_class_helper.pxi.in +++ b/pandas/_libs/hashtable_class_helper.pxi.in @@ -4,6 +4,9 @@ Template for each `dtype` helper function for hashtable WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in """ +from lib cimport is_null_datetimelike + + #---------------------------------------------------------------------- # VectorData #---------------------------------------------------------------------- @@ -921,6 +924,19 @@ cdef class MultiIndexHashTable(HashTable): "hash collision\nlocs:\n{}\n" "result:\n{}\nmi:\n{}".format(alocs, result, mi)) + cdef inline void _check_for_collision(self, Py_ssize_t loc, object label): + # validate that the loc maps to the actual value + # version of _check_for_collisions above for single label (tuple) + + result = self.mi[loc] + + if not all(l == r or (is_null_datetimelike(l) + and is_null_datetimelike(r)) + for l, r in zip(result, label)): + raise AssertionError( + "hash collision\nloc:\n{}\n" + "result:\n{}\nmi:\n{}".format(loc, result, label)) + def __contains__(self, object key): try: self.get_item(key) @@ -939,8 +955,7 @@ cdef class MultiIndexHashTable(HashTable): k = kh_get_uint64(self.table, value) if k != self.table.n_buckets: loc = self.table.vals[k] - locs = np.array([loc], dtype=np.int64) - self._check_for_collisions(locs, key) + self._check_for_collision(loc, key) return loc else: raise KeyError(key) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 3db5633ec30bd..569e16f2141ae 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -748,7 +748,7 @@ def _hashed_indexing_key(self, key): we need to stringify if we have mixed levels """ - from pandas.core.util.hashing import hash_tuples + from pandas.core.util.hashing import hash_tuples, hash_tuple if not isinstance(key, tuple): return hash_tuples(key) @@ -762,7 +762,7 @@ def f(k, stringify): return k key = tuple([f(k, stringify) for k, stringify in zip(key, self._have_mixed_levels)]) - return hash_tuples(key) + return hash_tuple(key) @Appender(base._shared_docs['duplicated'] % _index_doc_kwargs) def duplicated(self, keep='first'): diff --git a/pandas/core/util/hashing.py b/pandas/core/util/hashing.py index f0829adc94500..e41ffae9d03c2 100644 --- a/pandas/core/util/hashing.py +++ b/pandas/core/util/hashing.py @@ -4,7 +4,7 @@ import itertools import numpy as np -from pandas._libs import hashing +from pandas._libs import hashing, tslib from pandas.core.dtypes.generic import ( ABCMultiIndex, ABCIndexClass, @@ -12,6 +12,9 @@ ABCDataFrame) from pandas.core.dtypes.common import ( is_categorical_dtype, is_list_like) +from pandas.core.dtypes.missing import isnull +from pandas.core.dtypes.cast import infer_dtype_from_scalar + # 16 byte long hashing key _default_hash_key = '0123456789123456' @@ -164,6 +167,29 @@ def hash_tuples(vals, encoding='utf8', hash_key=None): return h +def hash_tuple(val, encoding='utf8', hash_key=None): + """ + Hash a single tuple efficiently + + Parameters + ---------- + val : single tuple + encoding : string, default 'utf8' + hash_key : string key to encode, default to _default_hash_key + + Returns + ------- + hash + + """ + hashes = (_hash_scalar(v, encoding=encoding, hash_key=hash_key) + for v in val) + + h = _combine_hash_arrays(hashes, len(val))[0] + + return h + + def _hash_categorical(c, encoding, hash_key): """ Hash a Categorical by hashing its categories, and then mapping the codes @@ -276,3 +302,31 @@ def hash_array(vals, encoding='utf8', hash_key=None, categorize=True): vals *= np.uint64(0x94d049bb133111eb) vals ^= vals >> 31 return vals + + +def _hash_scalar(val, encoding='utf8', hash_key=None): + """ + Hash scalar value + + Returns + ------- + 1d uint64 numpy array of hash value, of length 1 + """ + + if isnull(val): + # this is to be consistent with the _hash_categorical implementation + return np.array([np.iinfo(np.uint64).max], dtype='u8') + + if getattr(val, 'tzinfo', None) is not None: + # for tz-aware datetimes, we need the underlying naive UTC value and + # not the tz aware object or pd extension type (as + # infer_dtype_from_scalar would do) + if not isinstance(val, tslib.Timestamp): + val = tslib.Timestamp(val) + val = val.tz_convert(None) + + dtype, val = infer_dtype_from_scalar(val) + vals = np.array([val], dtype=dtype) + + return hash_array(vals, hash_key=hash_key, encoding=encoding, + categorize=False) diff --git a/pandas/tests/util/test_hashing.py b/pandas/tests/util/test_hashing.py index e1e6e43529a7d..289592939e3da 100644 --- a/pandas/tests/util/test_hashing.py +++ b/pandas/tests/util/test_hashing.py @@ -1,4 +1,5 @@ import pytest +import datetime from warnings import catch_warnings import numpy as np @@ -6,7 +7,7 @@ from pandas import DataFrame, Series, Index, MultiIndex from pandas.util import hash_array, hash_pandas_object -from pandas.core.util.hashing import hash_tuples +from pandas.core.util.hashing import hash_tuples, hash_tuple, _hash_scalar import pandas.util.testing as tm @@ -79,6 +80,27 @@ def test_hash_tuples(self): result = hash_tuples(tups[0]) assert result == expected[0] + def test_hash_tuple(self): + # test equivalence between hash_tuples and hash_tuple + for tup in [(1, 'one'), (1, np.nan), (1.0, pd.NaT, 'A'), + ('A', pd.Timestamp("2012-01-01"))]: + result = hash_tuple(tup) + expected = hash_tuples([tup])[0] + assert result == expected + + def test_hash_scalar(self): + for val in [1, 1.4, 'A', b'A', u'A', pd.Timestamp("2012-01-01"), + pd.Timestamp("2012-01-01", tz='Europe/Brussels'), + datetime.datetime(2012, 1, 1), + pd.Timestamp("2012-01-01", tz='EST').to_pydatetime(), + pd.Timedelta('1 days'), datetime.timedelta(1), + pd.Period('2012-01-01', freq='D'), pd.Interval(0, 1), + np.nan, pd.NaT, None]: + result = _hash_scalar(val) + expected = hash_array(np.array([val], dtype=object), + categorize=True) + assert result[0] == expected[0] + def test_hash_tuples_err(self): for val in [5, 'foo', pd.Timestamp('20130101')]: From 539de79692704c735a38975988ffe7293f6c2583 Mon Sep 17 00:00:00 2001 From: Jean-Baptiste Schiratti Date: Thu, 18 May 2017 06:28:17 -0400 Subject: [PATCH 570/933] ENH: Adding 'protocol' parameter to 'to_pickle'. This PR aims at adding an optional `protocol` parameter to the function `to_pickle`. Closes #14488. If needed, I can update the corresponding test (`pandas/tests/io/test_pickle.py`). Author: Jean-Baptiste Schiratti Closes #16252 from jbschiratti/pickle_protocol and squashes the following commits: 8eb660d [Jean-Baptiste Schiratti] Minor change on whatsnew. 20a854d [Jean-Baptiste Schiratti] Added ref for protocol parameter + edited whatsnew. 14bc485 [Jean-Baptiste Schiratti] Fix : removed unused import. 7631146 [Jean-Baptiste Schiratti] Fix : added issue number. 460ca0c [Jean-Baptiste Schiratti] Shortened paragraph addded in 'whatsnew'. 352220b [Jean-Baptiste Schiratti] Fix : Fixed error message in 'test_read_bad_versions'. 9c9d38f [Jean-Baptiste Schiratti] Added enhancement to 'whatsnew' file. 35f8d18 [Jean-Baptiste Schiratti] Added tests for new 'protocol' parameter in 'to_pickle'. 4bf0386 [Jean-Baptiste Schiratti] Added docstring for negative protocol parameter. 04bc5c2 [Jean-Baptiste Schiratti] Added 'versionadded' tag, improved docstring + fixed import. 66a35e8 [Jean-Baptiste Schiratti] Added 'protocol' parameter to 'to_pickle'. --- doc/source/whatsnew/v0.21.0.txt | 3 ++- pandas/core/generic.py | 20 +++++++++++++++--- pandas/io/pickle.py | 19 +++++++++++++++-- pandas/tests/io/test_pickle.py | 36 +++++++++++++++++++++++++++++++++ 4 files changed, 72 insertions(+), 6 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 90f38620fdfcf..351aa9ebbdc32 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -29,7 +29,8 @@ Other Enhancements - ``Series.to_dict()`` and ``DataFrame.to_dict()`` now support an ``into`` keyword which allows you to specify the ``collections.Mapping`` subclass that you would like returned. The default is ``dict``, which is backwards compatible. (:issue:`16122`) - ``RangeIndex.append`` now returns a ``RangeIndex`` object when possible (:issue:`16212`) - +- :func:`to_pickle` has gained a protocol parameter (:issue:`16252`). By default, +this parameter is set to `HIGHEST_PROTOCOL `__ .. _whatsnew_0210.api_breaking: diff --git a/pandas/core/generic.py b/pandas/core/generic.py index c33b30c78d812..2e7d8693d48dd 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -50,7 +50,7 @@ from pandas import compat from pandas.compat.numpy import function as nv from pandas.compat import (map, zip, lzip, lrange, string_types, - isidentifier, set_function_name) + isidentifier, set_function_name, cPickle as pkl) import pandas.core.nanops as nanops from pandas.util._decorators import Appender, Substitution, deprecate_kwarg from pandas.util._validators import validate_bool_kwarg @@ -1350,7 +1350,8 @@ def to_sql(self, name, con, flavor=None, schema=None, if_exists='fail', if_exists=if_exists, index=index, index_label=index_label, chunksize=chunksize, dtype=dtype) - def to_pickle(self, path, compression='infer'): + def to_pickle(self, path, compression='infer', + protocol=pkl.HIGHEST_PROTOCOL): """ Pickle (serialize) object to input file path. @@ -1362,9 +1363,22 @@ def to_pickle(self, path, compression='infer'): a string representing the compression to use in the output file .. versionadded:: 0.20.0 + protocol : int + Int which indicates which protocol should be used by the pickler, + default HIGHEST_PROTOCOL (see [1], paragraph 12.1.2). The possible + values for this parameter depend on the version of Python. For + Python 2.x, possible values are 0, 1, 2. For Python>=3.0, 3 is a + valid value. For Python >= 3.4, 4 is a valid value.A negative value + for the protocol parameter is equivalent to setting its value to + HIGHEST_PROTOCOL. + + .. [1] https://docs.python.org/3/library/pickle.html + .. versionadded:: 0.21.0 + """ from pandas.io.pickle import to_pickle - return to_pickle(self, path, compression=compression) + return to_pickle(self, path, compression=compression, + protocol=protocol) def to_clipboard(self, excel=None, sep=None, **kwargs): """ diff --git a/pandas/io/pickle.py b/pandas/io/pickle.py index 0f91c407766fb..6f4c714931fc8 100644 --- a/pandas/io/pickle.py +++ b/pandas/io/pickle.py @@ -7,7 +7,7 @@ from pandas.io.common import _get_handle, _infer_compression -def to_pickle(obj, path, compression='infer'): +def to_pickle(obj, path, compression='infer', protocol=pkl.HIGHEST_PROTOCOL): """ Pickle (serialize) object to input file path @@ -20,13 +20,28 @@ def to_pickle(obj, path, compression='infer'): a string representing the compression to use in the output file .. versionadded:: 0.20.0 + protocol : int + Int which indicates which protocol should be used by the pickler, + default HIGHEST_PROTOCOL (see [1], paragraph 12.1.2). The possible + values for this parameter depend on the version of Python. For Python + 2.x, possible values are 0, 1, 2. For Python>=3.0, 3 is a valid value. + For Python >= 3.4, 4 is a valid value. A negative value for the + protocol parameter is equivalent to setting its value to + HIGHEST_PROTOCOL. + + .. [1] https://docs.python.org/3/library/pickle.html + .. versionadded:: 0.21.0 + + """ inferred_compression = _infer_compression(path, compression) f, fh = _get_handle(path, 'wb', compression=inferred_compression, is_text=False) + if protocol < 0: + protocol = pkl.HIGHEST_PROTOCOL try: - pkl.dump(obj, f, protocol=pkl.HIGHEST_PROTOCOL) + pkl.dump(obj, f, protocol=protocol) finally: for _f in fh: _f.close() diff --git a/pandas/tests/io/test_pickle.py b/pandas/tests/io/test_pickle.py index 429ec5ba1c474..d56b36779efe7 100644 --- a/pandas/tests/io/test_pickle.py +++ b/pandas/tests/io/test_pickle.py @@ -25,6 +25,7 @@ import pandas.util.testing as tm from pandas.tseries.offsets import Day, MonthEnd import shutil +import sys @pytest.fixture(scope='module') @@ -501,3 +502,38 @@ def test_read_infer(self, ext, get_random_path): df2 = pd.read_pickle(p2) tm.assert_frame_equal(df, df2) + + +# --------------------- +# test pickle compression +# --------------------- + +class TestProtocol(object): + + @pytest.mark.parametrize('protocol', [-1, 0, 1, 2]) + def test_read(self, protocol, get_random_path): + with tm.ensure_clean(get_random_path) as path: + df = tm.makeDataFrame() + df.to_pickle(path, protocol=protocol) + df2 = pd.read_pickle(path) + tm.assert_frame_equal(df, df2) + + @pytest.mark.parametrize('protocol', [3, 4]) + @pytest.mark.skipif(sys.version_info[:2] >= (3, 4), + reason="Testing invalid parameters for " + "Python 2.x and 3.y (y < 4).") + def test_read_bad_versions(self, protocol, get_random_path): + # For Python 2.x (respectively 3.y with y < 4), [expected] + # HIGHEST_PROTOCOL should be 2 (respectively 3). Hence, the protocol + # parameter should not exceed 2 (respectively 3). + if sys.version_info[:2] < (3, 0): + expect_hp = 2 + else: + expect_hp = 3 + with tm.assert_raises_regex(ValueError, + "pickle protocol %d asked for; the highest" + " available protocol is %d" % (protocol, + expect_hp)): + with tm.ensure_clean(get_random_path) as path: + df = tm.makeDataFrame() + df.to_pickle(path, protocol=protocol) From 91e9e52e625512ac6f84e51bad9f928b72a0b6ba Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 18 May 2017 09:05:54 -0500 Subject: [PATCH 571/933] BUG: Categorical comparison with unordered (#16339) Fixes categorical comparison operations improperly considering ordering when two unordered categoricals are compared. Closes #16014 --- doc/source/categorical.rst | 8 +++++++ doc/source/whatsnew/v0.20.2.txt | 3 +++ pandas/core/categorical.py | 28 ++++++++++++++++++------ pandas/tests/test_categorical.py | 37 ++++++++++++++++++++++++++++++++ 4 files changed, 69 insertions(+), 7 deletions(-) diff --git a/doc/source/categorical.rst b/doc/source/categorical.rst index a508e84465107..ef558381c5e6f 100644 --- a/doc/source/categorical.rst +++ b/doc/source/categorical.rst @@ -453,6 +453,14 @@ the original values: np.asarray(cat) > base +When you compare two unordered categoricals with the same categories, the order is not considered: + +.. ipython:: python + + c1 = pd.Categorical(['a', 'b'], categories=['a', 'b'], ordered=False) + c2 = pd.Categorical(['a', 'b'], categories=['b', 'a'], ordered=False) + c1 == c2 + Operations ---------- diff --git a/doc/source/whatsnew/v0.20.2.txt b/doc/source/whatsnew/v0.20.2.txt index 7773f5abfb0ba..be4cf85606935 100644 --- a/doc/source/whatsnew/v0.20.2.txt +++ b/doc/source/whatsnew/v0.20.2.txt @@ -85,7 +85,10 @@ Numeric ^^^^^^^ +Categorical +^^^^^^^^^^^ +- Fixed comparison operations considering the order of the categories when both categoricals are unordered (:issue:`16014`) Other ^^^^^ diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index edbb07b7069e9..5b663f1d85ee7 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -55,17 +55,31 @@ def f(self, other): "equality or not") if isinstance(other, Categorical): # Two Categoricals can only be be compared if the categories are - # the same - if ((len(self.categories) != len(other.categories)) or - not ((self.categories == other.categories).all())): - raise TypeError("Categoricals can only be compared if " - "'categories' are the same") + # the same (maybe up to ordering, depending on ordered) + + msg = ("Categoricals can only be compared if " + "'categories' are the same.") + if len(self.categories) != len(other.categories): + raise TypeError(msg + " Categories are different lengths") + elif (self.ordered and not (self.categories == + other.categories).all()): + raise TypeError(msg) + elif not set(self.categories) == set(other.categories): + raise TypeError(msg) + if not (self.ordered == other.ordered): raise TypeError("Categoricals can only be compared if " "'ordered' is the same") - na_mask = (self._codes == -1) | (other._codes == -1) + if not self.ordered and not self.categories.equals( + other.categories): + # both unordered and different order + other_codes = _get_codes_for_values(other, self.categories) + else: + other_codes = other._codes + + na_mask = (self._codes == -1) | (other_codes == -1) f = getattr(self._codes, op) - ret = f(other._codes) + ret = f(other_codes) if na_mask.any(): # In other series, the leads to False, so do that here too ret[na_mask] = False diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py index 57676be68bedf..f48eea23220b8 100644 --- a/pandas/tests/test_categorical.py +++ b/pandas/tests/test_categorical.py @@ -3821,6 +3821,43 @@ def test_cat_equality(self): pytest.raises(TypeError, lambda: a > b) pytest.raises(TypeError, lambda: b > a) + @pytest.mark.parametrize('ctor', [ + lambda *args, **kwargs: Categorical(*args, **kwargs), + lambda *args, **kwargs: Series(Categorical(*args, **kwargs)), + ]) + def test_unordered_different_order_equal(self, ctor): + # https://github.com/pandas-dev/pandas/issues/16014 + c1 = ctor(['a', 'b'], categories=['a', 'b'], ordered=False) + c2 = ctor(['a', 'b'], categories=['b', 'a'], ordered=False) + assert (c1 == c2).all() + + c1 = ctor(['a', 'b'], categories=['a', 'b'], ordered=False) + c2 = ctor(['b', 'a'], categories=['b', 'a'], ordered=False) + assert (c1 != c2).all() + + c1 = ctor(['a', 'a'], categories=['a', 'b'], ordered=False) + c2 = ctor(['b', 'b'], categories=['b', 'a'], ordered=False) + assert (c1 != c2).all() + + c1 = ctor(['a', 'a'], categories=['a', 'b'], ordered=False) + c2 = ctor(['a', 'b'], categories=['b', 'a'], ordered=False) + result = c1 == c2 + tm.assert_numpy_array_equal(np.array(result), np.array([True, False])) + + def test_unordered_different_categories_raises(self): + c1 = Categorical(['a', 'b'], categories=['a', 'b'], ordered=False) + c2 = Categorical(['a', 'c'], categories=['c', 'a'], ordered=False) + with tm.assert_raises_regex(TypeError, + "Categoricals can only be compared"): + c1 == c2 + + def test_compare_different_lengths(self): + c1 = Categorical([], categories=['a', 'b']) + c2 = Categorical([], categories=['a']) + msg = "Categories are different lengths" + with tm.assert_raises_regex(TypeError, msg): + c1 == c2 + def test_concat_append(self): cat = pd.Categorical(["a", "b"], categories=["a", "b"]) vals = [1, 2] From bd300e4ed90d6d719325cefe89f24f27c1b35d94 Mon Sep 17 00:00:00 2001 From: Pierre Haessig Date: Thu, 18 May 2017 22:22:30 +0200 Subject: [PATCH 572/933] DOC: minor enhancement of DataFrame.insert docstring (#16341) --- pandas/core/frame.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 3b0cc5619a1cd..3d3d56c1e0331 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2559,15 +2559,17 @@ def insert(self, loc, column, value, allow_duplicates=False): """ Insert column into DataFrame at specified location. - If `allow_duplicates` is False, raises Exception if column - is already contained in the DataFrame. + Raises a ValueError if `column` is already contained in the DataFrame, + unless `allow_duplicates` is set to True. Parameters ---------- loc : int - Must have 0 <= loc <= len(columns) - column : object - value : scalar, Series, or array-like + Insertion index. Must verify 0 <= loc <= len(columns) + column : string, number, or hashable object + label of the inserted column + value : int, Series, or array-like + allow_duplicates : bool, optional """ self._ensure_valid_index(value) value = self._sanitize_column(column, value, broadcast=False) From 16c247bcfb214d296ddf192520db51cb310268e8 Mon Sep 17 00:00:00 2001 From: David Read Date: Thu, 18 May 2017 21:43:25 +0100 Subject: [PATCH 573/933] Fixes truncated error message "C extension: umpy.core.multiarray failed to import" (#16366) --- pandas/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/__init__.py b/pandas/__init__.py index 48ac9d173559d..8d9b75ccd6c2c 100644 --- a/pandas/__init__.py +++ b/pandas/__init__.py @@ -28,7 +28,7 @@ tslib as _tslib) except ImportError as e: # pragma: no cover # hack but overkill to use re - module = str(e).lstrip('cannot import name ') + module = str(e).replace('cannot import name ', '') raise ImportError("C extension: {0} not built. If you want to import " "pandas from the source directory, you may need to run " "'python setup.py build_ext --inplace --force' to build " From 7ea5f49fa238ef27da93b5a6c393b5fc3770eb26 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 18 May 2017 16:06:27 -0500 Subject: [PATCH 574/933] ENH: Support fspath protocol (#16301) * ENH: Support fspath protocol Ensures that most of pandas readers and writers will honor the fspath protocol, if an object defines it. TST: remove old xfails * API: Raise AttributeError on closed HDFStore Previously, we called _check_if_open, which would raise a ClosedFileError whenever the desired attribute wasn't found. This prevented the check required for PEP519 to work properly, since hasattr shouldn't raise that error. * ENH: add __fspath__ to pandas own file-like objects - HDFStore - ExcelFile --- doc/source/whatsnew/v0.21.0.txt | 6 ++ pandas/io/common.py | 25 ++++-- pandas/io/excel.py | 24 ++++-- pandas/io/feather_format.py | 3 + pandas/io/formats/excel.py | 11 ++- pandas/io/formats/format.py | 5 +- pandas/io/json/json.py | 4 +- pandas/io/packers.py | 3 +- pandas/io/pickle.py | 5 +- pandas/io/pytables.py | 31 ++++---- pandas/io/sas/sasreader.py | 2 + pandas/io/stata.py | 6 +- pandas/tests/io/data/feather-0_3_1.feather | Bin 0 -> 672 bytes pandas/tests/io/data/fixed_width_format.txt | 3 + pandas/tests/io/msgpack/data/frame.mp | Bin 0 -> 309 bytes pandas/tests/io/sas/test_sas7bdat.py | 3 - pandas/tests/io/test_common.py | 82 ++++++++++++++++++++ pandas/tests/io/test_excel.py | 21 +++++ pandas/tests/io/test_feather.py | 2 - pandas/tests/io/test_packers.py | 2 - pandas/tests/io/test_pytables.py | 23 ++++-- pandas/tests/io/test_stata.py | 10 ++- setup.py | 1 + 23 files changed, 211 insertions(+), 61 deletions(-) create mode 100644 pandas/tests/io/data/feather-0_3_1.feather create mode 100644 pandas/tests/io/data/fixed_width_format.txt create mode 100644 pandas/tests/io/msgpack/data/frame.mp diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 351aa9ebbdc32..0a3a440ced54f 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -20,6 +20,10 @@ Check the :ref:`API Changes ` and :ref:`deprecations New features ~~~~~~~~~~~~ +- Support for `PEP 519 -- Adding a file system path protocol + `_ on most readers and writers (:issue:`13823`) +- Added `__fspath__` method to :class`:pandas.HDFStore`, :class:`pandas.ExcelFile`, + and :class:`pandas.ExcelWriter` to work properly with the file system path protocol (:issue:`13823`) .. _whatsnew_0210.enhancements.other: @@ -40,6 +44,8 @@ Backwards incompatible API changes - Support has been dropped for Python 3.4 (:issue:`15251`) - The Categorical constructor no longer accepts a scalar for the ``categories`` keyword. (:issue:`16022`) +- Accessing a non-existent attribute on a closed :class:`HDFStore` will now + raise an ``AttributeError`` rather than a ``ClosedFileError`` (:issue:`16301`) .. _whatsnew_0210.api: diff --git a/pandas/io/common.py b/pandas/io/common.py index 14ac4d366fcef..f4e12ea3fb173 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -146,8 +146,7 @@ def _validate_header_arg(header): def _stringify_path(filepath_or_buffer): - """Return the argument coerced to a string if it was a pathlib.Path - or a py.path.local + """Attempt to convert a path-like object to a string. Parameters ---------- @@ -155,8 +154,21 @@ def _stringify_path(filepath_or_buffer): Returns ------- - str_filepath_or_buffer : a the string version of the input path + str_filepath_or_buffer : maybe a string version of the object + + Notes + ----- + Objects supporting the fspath protocol (python 3.6+) are coerced + according to its __fspath__ method. + + For backwards compatibility with older pythons, pathlib.Path and + py.path objects are specially coerced. + + Any other object is passed through unchanged, which includes bytes, + strings, buffers, or anything else that's not even path-like. """ + if hasattr(filepath_or_buffer, '__fspath__'): + return filepath_or_buffer.__fspath__() if _PATHLIB_INSTALLED and isinstance(filepath_or_buffer, pathlib.Path): return text_type(filepath_or_buffer) if _PY_PATH_INSTALLED and isinstance(filepath_or_buffer, LocalPath): @@ -180,10 +192,10 @@ def get_filepath_or_buffer(filepath_or_buffer, encoding=None, ------- a filepath_or_buffer, the encoding, the compression """ + filepath_or_buffer = _stringify_path(filepath_or_buffer) if _is_url(filepath_or_buffer): - url = str(filepath_or_buffer) - req = _urlopen(url) + req = _urlopen(filepath_or_buffer) content_encoding = req.headers.get('Content-Encoding', None) if content_encoding == 'gzip': # Override compression based on Content-Encoding header @@ -197,9 +209,6 @@ def get_filepath_or_buffer(filepath_or_buffer, encoding=None, encoding=encoding, compression=compression) - # Convert pathlib.Path/py.path.local or string - filepath_or_buffer = _stringify_path(filepath_or_buffer) - if isinstance(filepath_or_buffer, (compat.string_types, compat.binary_type, mmap.mmap)): diff --git a/pandas/io/excel.py b/pandas/io/excel.py index 9b0f49ccc45b1..fba3d7559aeaf 100644 --- a/pandas/io/excel.py +++ b/pandas/io/excel.py @@ -18,7 +18,8 @@ from pandas.io.parsers import TextParser from pandas.errors import EmptyDataError from pandas.io.common import (_is_url, _urlopen, _validate_header_arg, - get_filepath_or_buffer, _NA_VALUES) + get_filepath_or_buffer, _NA_VALUES, + _stringify_path) from pandas.core.indexes.period import Period import pandas._libs.json as json from pandas.compat import (map, zip, reduce, range, lrange, u, add_metaclass, @@ -233,7 +234,10 @@ def __init__(self, io, **kwds): raise ImportError("pandas requires xlrd >= 0.9.0 for excel " "support, current version " + xlrd.__VERSION__) + # could be a str, ExcelFile, Book, etc. self.io = io + # Always a string + self._io = _stringify_path(io) engine = kwds.pop('engine', None) @@ -242,10 +246,10 @@ def __init__(self, io, **kwds): # If io is a url, want to keep the data as bytes so can't pass # to get_filepath_or_buffer() - if _is_url(io): - io = _urlopen(io) - elif not isinstance(io, (ExcelFile, xlrd.Book)): - io, _, _ = get_filepath_or_buffer(io) + if _is_url(self._io): + io = _urlopen(self._io) + elif not isinstance(self.io, (ExcelFile, xlrd.Book)): + io, _, _ = get_filepath_or_buffer(self._io) if engine == 'xlrd' and isinstance(io, xlrd.Book): self.book = io @@ -253,12 +257,15 @@ def __init__(self, io, **kwds): # N.B. xlrd.Book has a read attribute too data = io.read() self.book = xlrd.open_workbook(file_contents=data) - elif isinstance(io, compat.string_types): - self.book = xlrd.open_workbook(io) + elif isinstance(self._io, compat.string_types): + self.book = xlrd.open_workbook(self._io) else: raise ValueError('Must explicitly set engine if not passing in' ' buffer or path for io.') + def __fspath__(self): + return self._io + def parse(self, sheetname=0, header=0, skiprows=None, skip_footer=0, names=None, index_col=None, parse_cols=None, parse_dates=False, date_parser=None, na_values=None, thousands=None, @@ -754,6 +761,9 @@ def __init__(self, path, engine=None, else: self.datetime_format = datetime_format + def __fspath__(self): + return _stringify_path(self.path) + def _get_sheet_name(self, sheet_name): if sheet_name is None: sheet_name = self.cur_sheet diff --git a/pandas/io/feather_format.py b/pandas/io/feather_format.py index de6d04c105376..8bdb23fc1ae6a 100644 --- a/pandas/io/feather_format.py +++ b/pandas/io/feather_format.py @@ -3,6 +3,7 @@ from distutils.version import LooseVersion from pandas import DataFrame, RangeIndex, Int64Index from pandas.compat import range +from pandas.io.common import _stringify_path def _try_import(): @@ -43,6 +44,7 @@ def to_feather(df, path): path : string File path """ + path = _stringify_path(path) if not isinstance(df, DataFrame): raise ValueError("feather only support IO with DataFrames") @@ -99,4 +101,5 @@ def read_feather(path): """ feather = _try_import() + path = _stringify_path(path) return feather.read_dataframe(path) diff --git a/pandas/io/formats/excel.py b/pandas/io/formats/excel.py index 80587f9a752c7..80c3880d39dfd 100644 --- a/pandas/io/formats/excel.py +++ b/pandas/io/formats/excel.py @@ -7,7 +7,7 @@ import numpy as np -from pandas.compat import reduce, string_types +from pandas.compat import reduce from pandas.io.formats.css import CSSResolver, CSSWarning from pandas.io.formats.printing import pprint_thing from pandas.core.dtypes.common import is_float @@ -617,9 +617,12 @@ def write(self, writer, sheet_name='Sheet1', startrow=0, and ``io.excel.xlsm.writer``. """ from pandas.io.excel import ExcelWriter - need_save = False - if isinstance(writer, string_types): - writer = ExcelWriter(writer, engine=engine) + from pandas.io.common import _stringify_path + + if isinstance(writer, ExcelWriter): + need_save = False + else: + writer = ExcelWriter(_stringify_path(writer), engine=engine) need_save = True formatted_cells = self.get_formatted_cells() diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 183d8d9d87d0b..054db769c56dd 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -369,7 +369,10 @@ def __init__(self, frame, buf=None, columns=None, col_space=None, index_names=True, line_width=None, max_rows=None, max_cols=None, show_dimensions=False, decimal='.', **kwds): self.frame = frame - self.buf = _expand_user(buf) if buf is not None else StringIO() + if buf is not None: + self.buf = _expand_user(_stringify_path(buf)) + else: + self.buf = StringIO() self.show_index_names = index_names if sparsify is None: diff --git a/pandas/io/json/json.py b/pandas/io/json/json.py index b2fe074732cbb..31907ad586817 100644 --- a/pandas/io/json/json.py +++ b/pandas/io/json/json.py @@ -7,7 +7,8 @@ from pandas.compat import StringIO, long, u from pandas import compat, isnull from pandas import Series, DataFrame, to_datetime, MultiIndex -from pandas.io.common import get_filepath_or_buffer, _get_handle +from pandas.io.common import (get_filepath_or_buffer, _get_handle, + _stringify_path) from pandas.core.common import AbstractMethodError from pandas.io.formats.printing import pprint_thing from .normalize import _convert_to_line_delimits @@ -25,6 +26,7 @@ def to_json(path_or_buf, obj, orient=None, date_format='epoch', double_precision=10, force_ascii=True, date_unit='ms', default_handler=None, lines=False): + path_or_buf = _stringify_path(path_or_buf) if lines and orient != 'records': raise ValueError( "'lines' keyword only valid when 'orient' is records") diff --git a/pandas/io/packers.py b/pandas/io/packers.py index a4b454eda7472..a2fc4db23700c 100644 --- a/pandas/io/packers.py +++ b/pandas/io/packers.py @@ -61,7 +61,7 @@ from pandas.core.sparse.array import BlockIndex, IntIndex from pandas.core.generic import NDFrame from pandas.errors import PerformanceWarning -from pandas.io.common import get_filepath_or_buffer +from pandas.io.common import get_filepath_or_buffer, _stringify_path from pandas.core.internals import BlockManager, make_block, _safe_reshape import pandas.core.internals as internals @@ -149,6 +149,7 @@ def writer(fh): for a in args: fh.write(pack(a, **kwargs)) + path_or_buf = _stringify_path(path_or_buf) if isinstance(path_or_buf, compat.string_types): with open(path_or_buf, mode) as fh: writer(fh) diff --git a/pandas/io/pickle.py b/pandas/io/pickle.py index 6f4c714931fc8..6f345092c514d 100644 --- a/pandas/io/pickle.py +++ b/pandas/io/pickle.py @@ -4,7 +4,7 @@ from numpy.lib.format import read_array, write_array from pandas.compat import BytesIO, cPickle as pkl, pickle_compat as pc, PY3 from pandas.core.dtypes.common import is_datetime64_dtype, _NS_DTYPE -from pandas.io.common import _get_handle, _infer_compression +from pandas.io.common import _get_handle, _infer_compression, _stringify_path def to_pickle(obj, path, compression='infer', protocol=pkl.HIGHEST_PROTOCOL): @@ -34,6 +34,7 @@ def to_pickle(obj, path, compression='infer', protocol=pkl.HIGHEST_PROTOCOL): """ + path = _stringify_path(path) inferred_compression = _infer_compression(path, compression) f, fh = _get_handle(path, 'wb', compression=inferred_compression, @@ -71,7 +72,7 @@ def read_pickle(path, compression='infer'): ------- unpickled : type of object stored in file """ - + path = _stringify_path(path) inferred_compression = _infer_compression(path, compression) def read_wrapper(func): diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index f017421c1f83a..6665ccf8ce4c5 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -309,9 +309,17 @@ def read_hdf(path_or_buf, key=None, **kwargs): if 'where' in kwargs: kwargs['where'] = _ensure_term(kwargs['where'], scope_level=1) - path_or_buf = _stringify_path(path_or_buf) - if isinstance(path_or_buf, string_types): + if isinstance(path_or_buf, HDFStore): + if not path_or_buf.is_open: + raise IOError('The HDFStore must be open for reading.') + store = path_or_buf + auto_close = False + else: + path_or_buf = _stringify_path(path_or_buf) + if not isinstance(path_or_buf, string_types): + raise NotImplementedError('Support for generic buffers has not ' + 'been implemented.') try: exists = os.path.exists(path_or_buf) @@ -323,22 +331,11 @@ def read_hdf(path_or_buf, key=None, **kwargs): raise compat.FileNotFoundError( 'File %s does not exist' % path_or_buf) + store = HDFStore(path_or_buf, **kwargs) # can't auto open/close if we are using an iterator # so delegate to the iterator - store = HDFStore(path_or_buf, **kwargs) auto_close = True - elif isinstance(path_or_buf, HDFStore): - if not path_or_buf.is_open: - raise IOError('The HDFStore must be open for reading.') - - store = path_or_buf - auto_close = False - - else: - raise NotImplementedError('Support for generic buffers has not been ' - 'implemented.') - try: if key is None: groups = store.groups() @@ -440,7 +437,7 @@ def __init__(self, path, mode=None, complevel=None, complib=None, "complib only supports {libs} compression.".format( libs=tables.filters.all_complibs)) - self._path = path + self._path = _stringify_path(path) if mode is None: mode = 'a' self._mode = mode @@ -451,6 +448,9 @@ def __init__(self, path, mode=None, complevel=None, complib=None, self._filters = None self.open(mode=mode, **kwargs) + def __fspath__(self): + return self._path + @property def root(self): """ return the root node """ @@ -472,7 +472,6 @@ def __delitem__(self, key): def __getattr__(self, name): """ allow attribute access to get stores """ - self._check_if_open() try: return self.get(name) except: diff --git a/pandas/io/sas/sasreader.py b/pandas/io/sas/sasreader.py index 3e4d9c9024dbd..b8a0bf5733158 100644 --- a/pandas/io/sas/sasreader.py +++ b/pandas/io/sas/sasreader.py @@ -2,6 +2,7 @@ Read SAS sas7bdat or xport files. """ from pandas import compat +from pandas.io.common import _stringify_path def read_sas(filepath_or_buffer, format=None, index=None, encoding=None, @@ -34,6 +35,7 @@ def read_sas(filepath_or_buffer, format=None, index=None, encoding=None, buffer_error_msg = ("If this is a buffer object rather " "than a string name, you must specify " "a format string") + filepath_or_buffer = _stringify_path(filepath_or_buffer) if not isinstance(filepath_or_buffer, compat.string_types): raise ValueError(buffer_error_msg) try: diff --git a/pandas/io/stata.py b/pandas/io/stata.py index 55cac83804cd9..e03e87f09173e 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -30,7 +30,8 @@ from pandas.util._decorators import Appender import pandas as pd -from pandas.io.common import get_filepath_or_buffer, BaseIterator +from pandas.io.common import (get_filepath_or_buffer, BaseIterator, + _stringify_path) from pandas._libs.lib import max_len_string_array, infer_dtype from pandas._libs.tslib import NaT, Timestamp @@ -976,6 +977,7 @@ def __init__(self, path_or_buf, convert_dates=True, self._lines_read = 0 self._native_byteorder = _set_endianness(sys.byteorder) + path_or_buf = _stringify_path(path_or_buf) if isinstance(path_or_buf, str): path_or_buf, encoding, _ = get_filepath_or_buffer( path_or_buf, encoding=self._default_encoding @@ -1930,7 +1932,7 @@ def __init__(self, fname, data, convert_dates=None, write_index=True, if byteorder is None: byteorder = sys.byteorder self._byteorder = _set_endianness(byteorder) - self._fname = fname + self._fname = _stringify_path(fname) self.type_converters = {253: np.int32, 252: np.int16, 251: np.int8} def _write(self, to_write): diff --git a/pandas/tests/io/data/feather-0_3_1.feather b/pandas/tests/io/data/feather-0_3_1.feather new file mode 100644 index 0000000000000000000000000000000000000000..5a2c7b3dcc684b3676a94287702ea01aefa057af GIT binary patch literal 672 zcmZvaF;2r!42GRH6%j;Hh9YHPs1O4KL)q8}5_IMO2&n~CAXRkf$jr*b0gw=fKs^*E zAbkIOF+@;D_Otzd|8|^AF3ye(Nn|8HPIP-QYtvcl*z0)x_%bv*zkYri?8fZpX?t6H zxz_sISSiy5(TliUEtb!#> zF9YhAU;SJ@TFyNZ>fxdp=8NTY#aTB^!?9gsBtEC}yvO@q#oZA)Lc3hT-$In}#Q8{= z_3vU`Iu7A~UhS%)Ca0=oYemE*>a##cXt$5{aKpsvqrCR3eU%_L>}5wh5`Q0$OWWUs wPyBryFa9V6J>vJ&m(`r=t6g;GlRsN7H9B~0T=zM4NF3)??l1odi3$JaHx?%;)c^nh literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/fixed_width_format.txt b/pandas/tests/io/data/fixed_width_format.txt new file mode 100644 index 0000000000000..bb487d8de7ef9 --- /dev/null +++ b/pandas/tests/io/data/fixed_width_format.txt @@ -0,0 +1,3 @@ +A B C +1 2 3 +4 5 6 diff --git a/pandas/tests/io/msgpack/data/frame.mp b/pandas/tests/io/msgpack/data/frame.mp new file mode 100644 index 0000000000000000000000000000000000000000..21e20d262b26c1a4835bdb4c00109a371e7e46f1 GIT binary patch literal 309 zcmYk2O%8%E5Jo9yGVV3T#H|+~Bc1pIl%`V+>`BSsB9Di(QO7A<_Z!tkRhFlHfXnkW7Y@kqOFZ^iNWKsK=wu;N|_+U!*!fyhnO_?A!CdXU{q@e~9VdSucd1w*T|Td;pDWe-{7% literal 0 HcmV?d00001 diff --git a/pandas/tests/io/sas/test_sas7bdat.py b/pandas/tests/io/sas/test_sas7bdat.py index 7070c3c7c9382..730bf94cb2987 100644 --- a/pandas/tests/io/sas/test_sas7bdat.py +++ b/pandas/tests/io/sas/test_sas7bdat.py @@ -3,7 +3,6 @@ import pandas.util.testing as tm import os import io -import pytest import numpy as np @@ -66,7 +65,6 @@ def test_from_iterator(self): tm.assert_frame_equal(df, df0.iloc[2:5, :]) rdr.close() - @pytest.mark.xfail(reason="read_sas currently doesn't work with pathlib") def test_path_pathlib(self): tm._skip_if_no_pathlib() from pathlib import Path @@ -77,7 +75,6 @@ def test_path_pathlib(self): df = pd.read_sas(fname, encoding='utf-8') tm.assert_frame_equal(df, df0) - @pytest.mark.xfail(reason="read_sas currently doesn't work with localpath") def test_path_localpath(self): tm._skip_if_no_localpath() from py.path import local as LocalPath diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py index a1a95e09915f1..b7d158dd75960 100644 --- a/pandas/tests/io/test_common.py +++ b/pandas/tests/io/test_common.py @@ -6,6 +6,7 @@ import os from os.path import isabs +import pandas as pd import pandas.util.testing as tm from pandas.io import common @@ -24,6 +25,18 @@ pass +class CustomFSPath(object): + """For testing fspath on unknown objects""" + def __init__(self, path): + self.path = path + + def __fspath__(self): + return self.path + + +HERE = os.path.dirname(__file__) + + class TestCommonIOCapabilities(object): data1 = """index,A,B,C,D foo,2,3,4,5 @@ -65,6 +78,11 @@ def test_stringify_path_localpath(self): lpath = LocalPath(path) assert common._stringify_path(lpath) == abs_path + def test_stringify_path_fspath(self): + p = CustomFSPath('foo/bar.csv') + result = common._stringify_path(p) + assert result == 'foo/bar.csv' + def test_get_filepath_or_buffer_with_path(self): filename = '~/sometest' filepath_or_buffer, _, _ = common.get_filepath_or_buffer(filename) @@ -89,6 +107,70 @@ def test_iterator(self): tm.assert_frame_equal(first, expected.iloc[[0]]) tm.assert_frame_equal(concat(it), expected.iloc[1:]) + @pytest.mark.parametrize('reader, module, path', [ + (pd.read_csv, 'os', os.path.join(HERE, 'data', 'iris.csv')), + (pd.read_table, 'os', os.path.join(HERE, 'data', 'iris.csv')), + (pd.read_fwf, 'os', os.path.join(HERE, 'data', + 'fixed_width_format.txt')), + (pd.read_excel, 'xlrd', os.path.join(HERE, 'data', 'test1.xlsx')), + (pd.read_feather, 'feather', os.path.join(HERE, 'data', + 'feather-0_3_1.feather')), + (pd.read_hdf, 'tables', os.path.join(HERE, 'data', 'legacy_hdf', + 'datetimetz_object.h5')), + (pd.read_stata, 'os', os.path.join(HERE, 'data', 'stata10_115.dta')), + (pd.read_sas, 'os', os.path.join(HERE, 'sas', 'data', + 'test1.sas7bdat')), + (pd.read_json, 'os', os.path.join(HERE, 'json', 'data', + 'tsframe_v012.json')), + (pd.read_msgpack, 'os', os.path.join(HERE, 'msgpack', 'data', + 'frame.mp')), + (pd.read_pickle, 'os', os.path.join(HERE, 'data', + 'categorical_0_14_1.pickle')), + ]) + def test_read_fspath_all(self, reader, module, path): + pytest.importorskip(module) + + mypath = CustomFSPath(path) + result = reader(mypath) + expected = reader(path) + if path.endswith('.pickle'): + # categorical + tm.assert_categorical_equal(result, expected) + else: + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize('writer_name, writer_kwargs, module', [ + ('to_csv', {}, 'os'), + ('to_excel', {'engine': 'xlwt'}, 'xlwt'), + ('to_feather', {}, 'feather'), + ('to_hdf', {'key': 'bar', 'mode': 'w'}, 'tables'), + ('to_html', {}, 'os'), + ('to_json', {}, 'os'), + ('to_latex', {}, 'os'), + ('to_msgpack', {}, 'os'), + ('to_pickle', {}, 'os'), + ('to_stata', {}, 'os'), + ]) + def test_write_fspath_all(self, writer_name, writer_kwargs, module): + p1 = tm.ensure_clean('string') + p2 = tm.ensure_clean('fspath') + df = pd.DataFrame({"A": [1, 2]}) + + with p1 as string, p2 as fspath: + pytest.importorskip(module) + mypath = CustomFSPath(fspath) + writer = getattr(df, writer_name) + + writer(string, **writer_kwargs) + with open(string, 'rb') as f: + expected = f.read() + + writer(mypath, **writer_kwargs) + with open(fspath, 'rb') as f: + result = f.read() + + assert result == expected + class TestMMapWrapper(object): diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py index b4a5b24616728..bbf4f1107ac9e 100644 --- a/pandas/tests/io/test_excel.py +++ b/pandas/tests/io/test_excel.py @@ -2499,3 +2499,24 @@ def custom_converter(css): n_cells += 1 assert n_cells == (10 + 1) * (3 + 1) + + +class TestFSPath(object): + + @pytest.mark.skipif(sys.version_info < (3, 6), reason='requires fspath') + def test_excelfile_fspath(self): + _skip_if_no_openpyxl() + with tm.ensure_clean('foo.xlsx') as path: + df = DataFrame({"A": [1, 2]}) + df.to_excel(path) + xl = ExcelFile(path) + result = os.fspath(xl) + assert result == path + + @pytest.mark.skipif(sys.version_info < (3, 6), reason='requires fspath') + # @pytest.mark.xfail + def test_excelwriter_fspath(self): + _skip_if_no_openpyxl() + with tm.ensure_clean('foo.xlsx') as path: + writer = ExcelWriter(path) + assert os.fspath(writer) == str(path) diff --git a/pandas/tests/io/test_feather.py b/pandas/tests/io/test_feather.py index e3190efecba30..948ab736af6c6 100644 --- a/pandas/tests/io/test_feather.py +++ b/pandas/tests/io/test_feather.py @@ -116,13 +116,11 @@ def test_write_with_index(self): df.columns = pd.MultiIndex.from_tuples([('a', 1), ('a', 2), ('b', 1)]), self.check_error_on_write(df, ValueError) - @pytest.mark.xfail(reason="feather currently doesn't work with pathlib") def test_path_pathlib(self): df = tm.makeDataFrame().reset_index() result = tm.round_trip_pathlib(df.to_feather, pd.read_feather) tm.assert_frame_equal(df, result) - @pytest.mark.xfail(reason="feather currently doesn't work with localpath") def test_path_localpath(self): df = tm.makeDataFrame().reset_index() result = tm.round_trip_localpath(df.to_feather, pd.read_feather) diff --git a/pandas/tests/io/test_packers.py b/pandas/tests/io/test_packers.py index fd42becca3ac3..a28adcf1ee771 100644 --- a/pandas/tests/io/test_packers.py +++ b/pandas/tests/io/test_packers.py @@ -134,13 +134,11 @@ def test_string_io(self): result = read_msgpack(p) tm.assert_frame_equal(result, df) - @pytest.mark.xfail(reason="msgpack currently doesn't work with pathlib") def test_path_pathlib(self): df = tm.makeDataFrame() result = tm.round_trip_pathlib(df.to_msgpack, read_msgpack) tm.assert_frame_equal(df, result) - @pytest.mark.xfail(reason="msgpack currently doesn't work with localpath") def test_path_localpath(self): df = tm.makeDataFrame() result = tm.round_trip_localpath(df.to_msgpack, read_msgpack) diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index c9d2da67b8ee3..17f524cc279c0 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -16,7 +16,7 @@ date_range, timedelta_range, Index, DatetimeIndex, isnull) -from pandas.compat import is_platform_windows, PY3, PY35 +from pandas.compat import is_platform_windows, PY3, PY35, BytesIO from pandas.io.formats.printing import pprint_thing tables = pytest.importorskip('tables') @@ -4290,7 +4290,6 @@ def test_path_pathlib(self): lambda p: pd.read_hdf(p, 'df')) tm.assert_frame_equal(df, result) - @pytest.mark.xfail(reason='pathlib currently doesnt work with HDFStore') def test_path_pathlib_hdfstore(self): df = tm.makeDataFrame() @@ -4300,7 +4299,8 @@ def writer(path): def reader(path): with pd.HDFStore(path) as store: - pd.read_hdf(store, 'df') + return pd.read_hdf(store, 'df') + result = tm.round_trip_pathlib(writer, reader) tm.assert_frame_equal(df, result) @@ -4311,7 +4311,6 @@ def test_pickle_path_localpath(self): lambda p: pd.read_hdf(p, 'df')) tm.assert_frame_equal(df, result) - @pytest.mark.xfail(reason='localpath currently doesnt work with HDFStore') def test_path_localpath_hdfstore(self): df = tm.makeDataFrame() @@ -4321,7 +4320,8 @@ def writer(path): def reader(path): with pd.HDFStore(path) as store: - pd.read_hdf(store, 'df') + return pd.read_hdf(store, 'df') + result = tm.round_trip_localpath(writer, reader) tm.assert_frame_equal(df, result) @@ -4453,7 +4453,7 @@ def f(): pytest.raises(ClosedFileError, lambda: 'df' in store) pytest.raises(ClosedFileError, lambda: len(store)) pytest.raises(ClosedFileError, lambda: store['df']) - pytest.raises(ClosedFileError, lambda: store.df) + pytest.raises(AttributeError, lambda: store.df) pytest.raises(ClosedFileError, store.select, 'df') pytest.raises(ClosedFileError, store.get, 'df') pytest.raises(ClosedFileError, store.append, 'df2', df) @@ -5007,8 +5007,9 @@ def test_read_hdf_errors(self): store = HDFStore(path, mode='r') store.close() pytest.raises(IOError, read_hdf, store, 'df') - with open(path, mode='r') as store: - pytest.raises(NotImplementedError, read_hdf, store, 'df') + + def test_read_hdf_generic_buffer_errors(self): + pytest.raises(NotImplementedError, read_hdf, BytesIO(b''), 'df') def test_invalid_complib(self): df = DataFrame(np.random.rand(4, 5), @@ -5154,6 +5155,12 @@ def test_query_compare_column_type(self): expected = df.loc[[], :] tm.assert_frame_equal(expected, result) + @pytest.mark.skipif(sys.version_info < (3, 6), reason="Need python 3.6") + def test_fspath(self): + with tm.ensure_clean('foo.h5') as path: + with pd.HDFStore(path) as store: + assert os.fspath(store) == str(path) + class TestHDFComplexValues(Base): # GH10447 diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py index 4ec990116bb62..b9c6736563160 100644 --- a/pandas/tests/io/test_stata.py +++ b/pandas/tests/io/test_stata.py @@ -1284,14 +1284,16 @@ def test_invalid_encoding(self): with tm.ensure_clean() as path: original.to_stata(path, encoding='utf-8') - @pytest.mark.xfail(reason="stata currently doesn't work with pathlib") def test_path_pathlib(self): df = tm.makeDataFrame() - result = tm.round_trip_pathlib(df.to_stata, read_stata) + df.index.name = 'index' + reader = lambda x: read_stata(x).set_index('index') + result = tm.round_trip_pathlib(df.to_stata, reader) tm.assert_frame_equal(df, result) - @pytest.mark.xfail(reason="stata currently doesn't work with localpath") def test_pickle_path_localpath(self): df = tm.makeDataFrame() - result = tm.round_trip_localpath(df.to_stata, read_stata) + df.index.name = 'index' + reader = lambda x: read_stata(x).set_index('index') + result = tm.round_trip_localpath(df.to_stata, reader) tm.assert_frame_equal(df, result) diff --git a/setup.py b/setup.py index ff537d5868db6..82d5f407228a9 100755 --- a/setup.py +++ b/setup.py @@ -709,6 +709,7 @@ def pxd(name): 'data/html_encoding/*.html', 'json/data/*.json'], 'pandas.tests.io.formats': ['data/*.csv'], + 'pandas.tests.io.msgpack': ['data/*.mp'], 'pandas.tests.reshape': ['data/*.csv'], 'pandas.tests.tseries': ['data/*.pickle'], 'pandas.io.formats': ['templates/*.tpl'] From ebcbd4f3d96d90521289001b0301c78cc2002c70 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Thu, 18 May 2017 18:00:11 -0400 Subject: [PATCH 575/933] TST: remove some Panel/Panel4D deprecation warnings from tests (#16387) --- pandas/util/testing.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 81d452318d0b4..f987045c27d5f 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -1802,22 +1802,24 @@ def makePeriodFrame(nper=None): def makePanel(nper=None): - cols = ['Item' + c for c in string.ascii_uppercase[:K - 1]] - data = dict((c, makeTimeDataFrame(nper)) for c in cols) - return Panel.fromDict(data) + with warnings.catch_warnings(record=True): + cols = ['Item' + c for c in string.ascii_uppercase[:K - 1]] + data = dict((c, makeTimeDataFrame(nper)) for c in cols) + return Panel.fromDict(data) def makePeriodPanel(nper=None): - cols = ['Item' + c for c in string.ascii_uppercase[:K - 1]] - data = dict((c, makePeriodFrame(nper)) for c in cols) - return Panel.fromDict(data) + with warnings.catch_warnings(record=True): + cols = ['Item' + c for c in string.ascii_uppercase[:K - 1]] + data = dict((c, makePeriodFrame(nper)) for c in cols) + return Panel.fromDict(data) def makePanel4D(nper=None): with warnings.catch_warnings(record=True): d = dict(l1=makePanel(nper), l2=makePanel(nper), l3=makePanel(nper)) - return Panel4D(d) + return Panel4D(d) def makeCustomIndex(nentries, nlevels, prefix='#', names=False, ndupe_l=None, From 6dcf2ed2706b00020f6a3be5530ff8dc121ba989 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Thu, 18 May 2017 18:04:49 -0400 Subject: [PATCH 576/933] TST: xfail gbq tests as having issues --- pandas/tests/io/test_gbq.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/io/test_gbq.py b/pandas/tests/io/test_gbq.py index 58a84ad4d47f8..8f20fb2e75c8a 100644 --- a/pandas/tests/io/test_gbq.py +++ b/pandas/tests/io/test_gbq.py @@ -92,6 +92,7 @@ def make_mixed_dataframe_v2(test_size): index=range(test_size)) +@pytest.mark.xfail(reason="gbq having issues") @pytest.mark.single class TestToGBQIntegrationWithServiceAccountKeyPath(object): From 22ae055df54831d7b2e7463b6ef70f54b3735fd0 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 18 May 2017 17:39:21 -0500 Subject: [PATCH 577/933] DOC: Try with new token (#16389) --- .travis.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.travis.yml b/.travis.yml index 0156f17aa32a5..8386dce478eb3 100644 --- a/.travis.yml +++ b/.travis.yml @@ -20,10 +20,10 @@ env: # pandas-docs/pandas-docs-travis GH # # # create a github personal access token - # cd pandas-docs/pandas-docs-travis - # travis encrypt - # PANDAS_GH_TOKEN=personal_access_token - secure: "S49Tn5dzBRu6QaQcSV8MoCeX9rn7l8xuHFJbFsT9jPm1l0YPb94S8iDk0Isw71SqvHBgh+j2cms9jgYn2N3VCArh5MpA0oKwTKRZEX3iLQv248dCY2C6LdzAKLA+8m2naDGcfc0qMLeNieCGZICccs0EKIGDt8m7VQBMqeT0YU0=" + # cd pandas-dev/pandas + # travis encrypt PANDAS_GH_TOKEN=personal_access_token + # correct the repo to be pandas-dev/pandas, not your fork + secure: "EkWLZhbrp/mXJOx38CHjs7BnjXafsqHtwxPQrqWy457VDFWhIY1DMnIR/lOWG+a20Qv52sCsFtiZEmMfUjf0pLGXOqurdxbYBGJ7/ikFLk9yV2rDwiArUlVM9bWFnFxHvdz9zewBH55WurrY4ShZWyV+x2dWjjceWG5VpWeI6sA=" git: # for cloning From 0d431312f64e9d8e127eae3003fce7b8a8663850 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 18 May 2017 20:37:20 -0500 Subject: [PATCH 578/933] DOC: push docs to new repo --- ci/build_docs.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/build_docs.sh b/ci/build_docs.sh index f9f6744ab3a82..26917b8f9b792 100755 --- a/ci/build_docs.sh +++ b/ci/build_docs.sh @@ -54,7 +54,7 @@ if [ "$DOC" ]; then git commit -m "Version" --allow-empty git remote remove origin - git remote add origin "https://${PANDAS_GH_TOKEN}@github.com/pandas-docs/pandas-docs-travis.git" + git remote add origin "https://${PANDAS_GH_TOKEN}@github.com/pandas-dev/pandas-docs-travis.git" git fetch origin git remote -v From 0f55de1b334131dd0577b91eb061dcb8bc65ef78 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 19 May 2017 13:02:48 -0500 Subject: [PATCH 579/933] DOC: Fix doc environment variable Needs a hyphen --- .travis.yml | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/.travis.yml b/.travis.yml index 8386dce478eb3..8b6700e11d2c5 100644 --- a/.travis.yml +++ b/.travis.yml @@ -16,14 +16,10 @@ cache: env: global: - - # pandas-docs/pandas-docs-travis GH # - # - # create a github personal access token - # cd pandas-dev/pandas - # travis encrypt PANDAS_GH_TOKEN=personal_access_token - # correct the repo to be pandas-dev/pandas, not your fork - secure: "EkWLZhbrp/mXJOx38CHjs7BnjXafsqHtwxPQrqWy457VDFWhIY1DMnIR/lOWG+a20Qv52sCsFtiZEmMfUjf0pLGXOqurdxbYBGJ7/ikFLk9yV2rDwiArUlVM9bWFnFxHvdz9zewBH55WurrY4ShZWyV+x2dWjjceWG5VpWeI6sA=" + # create a github personal access token + # cd pandas-dev/pandas + # travis encrypt 'PANDAS_GH_TOKEN=personal_access_token' -r pandas-dev/pandas + - secure: "EkWLZhbrp/mXJOx38CHjs7BnjXafsqHtwxPQrqWy457VDFWhIY1DMnIR/lOWG+a20Qv52sCsFtiZEmMfUjf0pLGXOqurdxbYBGJ7/ikFLk9yV2rDwiArUlVM9bWFnFxHvdz9zewBH55WurrY4ShZWyV+x2dWjjceWG5VpWeI6sA=" git: # for cloning From a6fcec6c1a8ef5fe1079cfc95e44bea9d76aa51a Mon Sep 17 00:00:00 2001 From: tsdlovell Date: Sat, 20 May 2017 17:59:18 -0400 Subject: [PATCH 580/933] BUG: fixes #12405 by eliding values index by NaT in MPLPlot._get_xticks (#14540) TST: add test for fix of #12405 DOC: update whatsnew/v0.20.2.txt --- doc/source/whatsnew/v0.20.2.txt | 1 + pandas/plotting/_core.py | 2 ++ pandas/tests/plotting/test_datetimelike.py | 16 +++++++++++++++- 3 files changed, 18 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.20.2.txt b/doc/source/whatsnew/v0.20.2.txt index be4cf85606935..4ec9daff4c0fc 100644 --- a/doc/source/whatsnew/v0.20.2.txt +++ b/doc/source/whatsnew/v0.20.2.txt @@ -60,6 +60,7 @@ Plotting ^^^^^^^^ - Bug in ``DataFrame.plot`` with a single column and a list-like ``color`` (:issue:`3486`) +- Bug in ``plot`` where ``NaT`` in ``DatetimeIndex`` results in ``Timestamp.min`` (:issue: `12405`) diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index c0f9f62106330..49076ffb469cb 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -11,6 +11,7 @@ from pandas.util._decorators import cache_readonly from pandas.core.base import PandasObject +from pandas.core.dtypes.missing import notnull from pandas.core.dtypes.common import ( is_list_like, is_integer, @@ -538,6 +539,7 @@ def _get_xticks(self, convert_period=False): """ x = index._mpl_repr() elif is_datetype: + self.data = self.data[notnull(self.data.index)] self.data = self.data.sort_index() x = self.data.index._mpl_repr() else: diff --git a/pandas/tests/plotting/test_datetimelike.py b/pandas/tests/plotting/test_datetimelike.py index 3e7e789fa7de7..92537059218d6 100644 --- a/pandas/tests/plotting/test_datetimelike.py +++ b/pandas/tests/plotting/test_datetimelike.py @@ -6,7 +6,7 @@ from pandas.compat import lrange, zip import numpy as np -from pandas import Index, Series, DataFrame +from pandas import Index, Series, DataFrame, NaT from pandas.compat import is_platform_mac from pandas.core.indexes.datetimes import date_range, bdate_range from pandas.core.indexes.timedeltas import timedelta_range @@ -811,6 +811,20 @@ def test_mixed_freq_shared_ax(self): # assert (ax1.lines[0].get_xydata()[0, 0] == # ax2.lines[0].get_xydata()[0, 0]) + def test_nat_handling(self): + + fig = self.plt.gcf() + # self.plt.clf() + ax = fig.add_subplot(111) + + dti = DatetimeIndex(['2015-01-01', NaT, '2015-01-03']) + s = Series(range(len(dti)), dti) + s.plot(ax=ax) + xdata = ax.get_lines()[0].get_xdata() + # plot x data is bounded by index values + assert s.index.min() <= Series(xdata).min() + assert Series(xdata).max() <= s.index.max() + @slow def test_to_weekly_resampling(self): idxh = date_range('1/1/1999', periods=52, freq='W') From d071c6538e67672da0cc32bb6a7f9c1baa3dc1b7 Mon Sep 17 00:00:00 2001 From: Sam Foo Date: Mon, 22 May 2017 01:24:01 -0700 Subject: [PATCH 581/933] DOC: Added examples for union_categoricals (#16397) closes #16390 --- pandas/core/dtypes/concat.py | 68 ++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index ddff78c9d511f..292d5f608d4cb 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -242,6 +242,74 @@ def union_categoricals(to_union, sort_categories=False, ignore_order=False): - sort_categories=True and Categoricals are ordered ValueError Empty list of categoricals passed + + Notes + ----- + + To learn more about categories, see `link + `__ + + Examples + -------- + + >>> from pandas.api.types import union_categoricals + + If you want to combine categoricals that do not necessarily have + the same categories, `union_categoricals` will combine a list-like + of categoricals. The new categories will be the union of the + categories being combined. + + >>> a = pd.Categorical(["b", "c"]) + >>> b = pd.Categorical(["a", "b"]) + >>> union_categoricals([a, b]) + [b, c, a, b] + Categories (3, object): [b, c, a] + + By default, the resulting categories will be ordered as they appear + in the `categories` of the data. If you want the categories to be + lexsorted, use `sort_categories=True` argument. + + >>> union_categoricals([a, b], sort_categories=True) + [b, c, a, b] + Categories (3, object): [a, b, c] + + `union_categoricals` also works with the case of combining two + categoricals of the same categories and order information (e.g. what + you could also `append` for). + + >>> a = pd.Categorical(["a", "b"], ordered=True) + >>> b = pd.Categorical(["a", "b", "a"], ordered=True) + >>> union_categoricals([a, b]) + [a, b, a, b, a] + Categories (2, object): [a < b] + + Raises `TypeError` because the categories are ordered and not identical. + + >>> a = pd.Categorical(["a", "b"], ordered=True) + >>> b = pd.Categorical(["a", "b", "c"], ordered=True) + >>> union_categoricals([a, b]) + TypeError: to union ordered Categoricals, all categories must be the same + + New in version 0.20.0 + + Ordered categoricals with different categories or orderings can be + combined by using the `ignore_ordered=True` argument. + + >>> a = pd.Categorical(["a", "b", "c"], ordered=True) + >>> b = pd.Categorical(["c", "b", "a"], ordered=True) + >>> union_categoricals([a, b], ignore_order=True) + [a, b, c, c, b, a] + Categories (3, object): [a, b, c] + + `union_categoricals` also works with a `CategoricalIndex`, or `Series` + containing categorical data, but note that the resulting array will + always be a plain `Categorical` + + >>> a = pd.Series(["b", "c"], dtype='category') + >>> b = pd.Series(["a", "b"], dtype='category') + >>> union_categoricals([a, b]) + [b, c, a, b] + Categories (3, object): [b, c, a] """ from pandas import Index, Categorical, CategoricalIndex, Series From b2979834766d4543029113bf2e568a0782590e51 Mon Sep 17 00:00:00 2001 From: Nick Eubank Date: Mon, 22 May 2017 01:39:20 -0700 Subject: [PATCH 582/933] ENH: add validate argument to merge (#16275) --- doc/source/merging.rst | 51 +++++++++++- doc/source/whatsnew/v0.21.0.txt | 7 +- pandas/core/frame.py | 18 ++++- pandas/core/reshape/merge.py | 59 +++++++++++++- pandas/tests/reshape/test_merge.py | 124 +++++++++++++++++++++++++++++ 5 files changed, 247 insertions(+), 12 deletions(-) diff --git a/doc/source/merging.rst b/doc/source/merging.rst index 170dde87c8363..d956f1ca54e6b 100644 --- a/doc/source/merging.rst +++ b/doc/source/merging.rst @@ -513,7 +513,8 @@ standard database join operations between DataFrame objects: pd.merge(left, right, how='inner', on=None, left_on=None, right_on=None, left_index=False, right_index=False, sort=True, - suffixes=('_x', '_y'), copy=True, indicator=False) + suffixes=('_x', '_y'), copy=True, indicator=False, + validate=None) - ``left``: A DataFrame object - ``right``: Another DataFrame object @@ -551,6 +552,20 @@ standard database join operations between DataFrame objects: .. versionadded:: 0.17.0 +- ``validate`` : string, default None. + If specified, checks if merge is of specified type. + + * "one_to_one" or "1:1": checks if merge keys are unique in both + left and right datasets. + * "one_to_many" or "1:m": checks if merge keys are unique in left + dataset. + * "many_to_one" or "m:1": checks if merge keys are unique in right + dataset. + * "many_to_many" or "m:m": allowed, but does not result in checks. + + .. versionadded:: 0.21.0 + + The return type will be the same as ``left``. If ``left`` is a ``DataFrame`` and ``right`` is a subclass of DataFrame, the return type will still be ``DataFrame``. @@ -711,10 +726,40 @@ Here is another example with duplicate join keys in DataFrames: labels=['left', 'right'], vertical=False); plt.close('all'); + .. warning:: - Joining / merging on duplicate keys can cause a returned frame that is the multiplication of the row dimensions, - may result in memory overflow. It is the user' s responsibility to manage duplicate values in keys before joining large DataFrames. + Joining / merging on duplicate keys can cause a returned frame that is the multiplication of the row dimensions, which may result in memory overflow. It is the user' s responsibility to manage duplicate values in keys before joining large DataFrames. + +.. _merging.validation: + +Checking for duplicate keys +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. versionadded:: 0.21.0 + +Users can use the ``validate`` argument to automatically check whether there are unexpected duplicates in their merge keys. Key uniqueness is checked before merge operations and so should protect against memory overflows. Checking key uniqueness is also a good way to ensure user data structures are as expected. + +In the following example, there are duplicate values of ``B`` in the right DataFrame. As this is not a one-to-one merge -- as specified in the ``validate`` argument -- an exception will be raised. + + +.. ipython:: python + + left = pd.DataFrame({'A' : [1,2], 'B' : [1, 2]}) + right = pd.DataFrame({'A' : [4,5,6], 'B': [2, 2, 2]}) + +.. code-block:: ipython + + In [53]: result = pd.merge(left, right, on='B', how='outer', validate="one_to_one") + ... + MergeError: Merge keys are not unique in right dataset; not a one-to-one merge + +If the user is aware of the duplicates in the right `DataFrame` but wants to ensure there are no duplicates in the left DataFrame, one can use the `validate='one_to_many'` argument instead, which will not raise an exception. + +.. ipython:: python + + pd.merge(left, right, on='B', how='outer', validate="one_to_many") + .. _merging.indicator: diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 0a3a440ced54f..3734dc15be2e9 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -25,16 +25,15 @@ New features - Added `__fspath__` method to :class`:pandas.HDFStore`, :class:`pandas.ExcelFile`, and :class:`pandas.ExcelWriter` to work properly with the file system path protocol (:issue:`13823`) - .. _whatsnew_0210.enhancements.other: Other Enhancements ^^^^^^^^^^^^^^^^^^ + +- The ``validate`` argument for :func:`merge` function now checks whether a merge is one-to-one, one-to-many, many-to-one, or many-to-many. If a merge is found to not be an example of specified merge type, an exception will be raised. For more, see :ref:`here ` (:issue:`16270`) - ``Series.to_dict()`` and ``DataFrame.to_dict()`` now support an ``into`` keyword which allows you to specify the ``collections.Mapping`` subclass that you would like returned. The default is ``dict``, which is backwards compatible. (:issue:`16122`) - ``RangeIndex.append`` now returns a ``RangeIndex`` object when possible (:issue:`16212`) - -- :func:`to_pickle` has gained a protocol parameter (:issue:`16252`). By default, -this parameter is set to `HIGHEST_PROTOCOL `__ +- :func:`to_pickle` has gained a protocol parameter (:issue:`16252`). By default, this parameter is set to `HIGHEST_PROTOCOL `__ .. _whatsnew_0210.api_breaking: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 3d3d56c1e0331..78a369761afc1 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -175,6 +175,19 @@ .. versionadded:: 0.17.0 +validate : string, default None + If specified, checks if merge is of specified type. + + * "one_to_one" or "1:1": check if merge keys are unique in both + left and right datasets. + * "one_to_many" or "1:m": check if merge keys are unique in left + dataset. + * "many_to_one" or "m:1": check if merge keys are unique in right + dataset. + * "many_to_may" or "m:m": allowed, but does not result in checks. + + .. versionadded:: 0.21.0 + Examples -------- @@ -4868,12 +4881,13 @@ def _join_compat(self, other, on=None, how='left', lsuffix='', rsuffix='', @Appender(_merge_doc, indents=2) def merge(self, right, how='inner', on=None, left_on=None, right_on=None, left_index=False, right_index=False, sort=False, - suffixes=('_x', '_y'), copy=True, indicator=False): + suffixes=('_x', '_y'), copy=True, indicator=False, + validate=None): from pandas.core.reshape.merge import merge return merge(self, right, how=how, on=on, left_on=left_on, right_on=right_on, left_index=left_index, right_index=right_index, sort=sort, suffixes=suffixes, - copy=copy, indicator=indicator) + copy=copy, indicator=indicator, validate=validate) def round(self, decimals=0, *args, **kwargs): """ diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 7bf25e37340c4..b5c483a52f14f 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -46,11 +46,13 @@ @Appender(_merge_doc, indents=0) def merge(left, right, how='inner', on=None, left_on=None, right_on=None, left_index=False, right_index=False, sort=False, - suffixes=('_x', '_y'), copy=True, indicator=False): + suffixes=('_x', '_y'), copy=True, indicator=False, + validate=None): op = _MergeOperation(left, right, how=how, on=on, left_on=left_on, right_on=right_on, left_index=left_index, right_index=right_index, sort=sort, suffixes=suffixes, - copy=copy, indicator=indicator) + copy=copy, indicator=indicator, + validate=validate) return op.get_result() @@ -341,6 +343,7 @@ def merge_asof(left, right, on=None, .. versionadded:: 0.20.0 + Returns ------- merged : DataFrame @@ -504,7 +507,8 @@ class _MergeOperation(object): def __init__(self, left, right, how='inner', on=None, left_on=None, right_on=None, axis=1, left_index=False, right_index=False, sort=True, - suffixes=('_x', '_y'), copy=True, indicator=False): + suffixes=('_x', '_y'), copy=True, indicator=False, + validate=None): self.left = self.orig_left = left self.right = self.orig_right = right self.how = how @@ -567,6 +571,12 @@ def __init__(self, left, right, how='inner', on=None, # to avoid incompat dtypes self._maybe_coerce_merge_keys() + # If argument passed to validate, + # check if columns specified as unique + # are in fact unique. + if validate is not None: + self._validate(validate) + def get_result(self): if self.indicator: self.left, self.right = self._indicator_pre_merge( @@ -958,6 +968,49 @@ def _validate_specification(self): if len(self.right_on) != len(self.left_on): raise ValueError("len(right_on) must equal len(left_on)") + def _validate(self, validate): + + # Check uniqueness of each + if self.left_index: + left_unique = self.orig_left.index.is_unique + else: + left_unique = MultiIndex.from_arrays(self.left_join_keys + ).is_unique + + if self.right_index: + right_unique = self.orig_right.index.is_unique + else: + right_unique = MultiIndex.from_arrays(self.right_join_keys + ).is_unique + + # Check data integrity + if validate in ["one_to_one", "1:1"]: + if not left_unique and not right_unique: + raise ValueError("Merge keys are not unique in either left" + " or right dataset; not a one-to-one merge") + elif not left_unique: + raise ValueError("Merge keys are not unique in left dataset;" + " not a one-to-one merge") + elif not right_unique: + raise ValueError("Merge keys are not unique in right dataset;" + " not a one-to-one merge") + + elif validate in ["one_to_many", "1:m"]: + if not left_unique: + raise ValueError("Merge keys are not unique in left dataset;" + "not a one-to-many merge") + + elif validate in ["many_to_one", "m:1"]: + if not right_unique: + raise ValueError("Merge keys are not unique in right dataset;" + " not a many-to-one merge") + + elif validate in ['many_to_many', 'm:m']: + pass + + else: + raise ValueError("Not a valid argument for validate") + def _get_join_indexers(left_keys, right_keys, sort=False, how='inner', **kwargs): diff --git a/pandas/tests/reshape/test_merge.py b/pandas/tests/reshape/test_merge.py index d3257243d7a2c..16c58354ad5c9 100644 --- a/pandas/tests/reshape/test_merge.py +++ b/pandas/tests/reshape/test_merge.py @@ -724,6 +724,130 @@ def test_indicator(self): how='outer', indicator=True) assert_frame_equal(test5, hand_coded_result) + def test_validation(self): + left = DataFrame({'a': ['a', 'b', 'c', 'd'], + 'b': ['cat', 'dog', 'weasel', 'horse']}, + index=range(4)) + + right = DataFrame({'a': ['a', 'b', 'c', 'd', 'e'], + 'c': ['meow', 'bark', 'um... weasel noise?', + 'nay', 'chirp']}, + index=range(5)) + + # Make sure no side effects. + left_copy = left.copy() + right_copy = right.copy() + + result = merge(left, right, left_index=True, right_index=True, + validate='1:1') + assert_frame_equal(left, left_copy) + assert_frame_equal(right, right_copy) + + # make sure merge still correct + expected = DataFrame({'a_x': ['a', 'b', 'c', 'd'], + 'b': ['cat', 'dog', 'weasel', 'horse'], + 'a_y': ['a', 'b', 'c', 'd'], + 'c': ['meow', 'bark', 'um... weasel noise?', + 'nay']}, + index=range(4), + columns=['a_x', 'b', 'a_y', 'c']) + + result = merge(left, right, left_index=True, right_index=True, + validate='one_to_one') + assert_frame_equal(result, expected) + + expected_2 = DataFrame({'a': ['a', 'b', 'c', 'd'], + 'b': ['cat', 'dog', 'weasel', 'horse'], + 'c': ['meow', 'bark', 'um... weasel noise?', + 'nay']}, + index=range(4)) + + result = merge(left, right, on='a', validate='1:1') + assert_frame_equal(left, left_copy) + assert_frame_equal(right, right_copy) + assert_frame_equal(result, expected_2) + + result = merge(left, right, on='a', validate='one_to_one') + assert_frame_equal(result, expected_2) + + # One index, one column + expected_3 = DataFrame({'b': ['cat', 'dog', 'weasel', 'horse'], + 'a': ['a', 'b', 'c', 'd'], + 'c': ['meow', 'bark', 'um... weasel noise?', + 'nay']}, + columns=['b', 'a', 'c'], + index=range(4)) + + left_index_reset = left.set_index('a') + result = merge(left_index_reset, right, left_index=True, + right_on='a', validate='one_to_one') + assert_frame_equal(result, expected_3) + + # Dups on right + right_w_dups = right.append(pd.DataFrame({'a': ['e'], 'c': ['moo']}, + index=[4])) + merge(left, right_w_dups, left_index=True, right_index=True, + validate='one_to_many') + + with pytest.raises(ValueError): + merge(left, right_w_dups, left_index=True, right_index=True, + validate='one_to_one') + + with pytest.raises(ValueError): + merge(left, right_w_dups, on='a', validate='one_to_one') + + # Dups on left + left_w_dups = left.append(pd.DataFrame({'a': ['a'], 'c': ['cow']}, + index=[3])) + merge(left_w_dups, right, left_index=True, right_index=True, + validate='many_to_one') + + with pytest.raises(ValueError): + merge(left_w_dups, right, left_index=True, right_index=True, + validate='one_to_one') + + with pytest.raises(ValueError): + merge(left_w_dups, right, on='a', validate='one_to_one') + + # Dups on both + merge(left_w_dups, right_w_dups, on='a', validate='many_to_many') + + with pytest.raises(ValueError): + merge(left_w_dups, right_w_dups, left_index=True, + right_index=True, validate='many_to_one') + + with pytest.raises(ValueError): + merge(left_w_dups, right_w_dups, on='a', + validate='one_to_many') + + # Check invalid arguments + with pytest.raises(ValueError): + merge(left, right, on='a', validate='jibberish') + + # Two column merge, dups in both, but jointly no dups. + left = DataFrame({'a': ['a', 'a', 'b', 'b'], + 'b': [0, 1, 0, 1], + 'c': ['cat', 'dog', 'weasel', 'horse']}, + index=range(4)) + + right = DataFrame({'a': ['a', 'a', 'b'], + 'b': [0, 1, 0], + 'd': ['meow', 'bark', 'um... weasel noise?']}, + index=range(3)) + + expected_multi = DataFrame({'a': ['a', 'a', 'b'], + 'b': [0, 1, 0], + 'c': ['cat', 'dog', 'weasel'], + 'd': ['meow', 'bark', + 'um... weasel noise?']}, + index=range(3)) + + with pytest.raises(ValueError): + merge(left, right, on='a', validate='1:1') + + result = merge(left, right, on=['a', 'b'], validate='1:1') + assert_frame_equal(result, expected_multi) + def _check_merge(x, y): for how in ['inner', 'left', 'outer']: From ee54722fbf43fdfdafb25282ff84f88fd7426334 Mon Sep 17 00:00:00 2001 From: Chankey Pathak Date: Mon, 22 May 2017 14:41:42 +0530 Subject: [PATCH 583/933] DOC: update tutorials.rst (#16411) --- doc/source/tutorials.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/tutorials.rst b/doc/source/tutorials.rst index 2489b787560d0..9a97294d4e6d6 100644 --- a/doc/source/tutorials.rst +++ b/doc/source/tutorials.rst @@ -178,3 +178,4 @@ Various Tutorials - `Pandas and Python: Top 10, by Manish Amde `_ - `Pandas Tutorial, by Mikhail Semeniuk `_ - `Pandas DataFrames Tutorial, by Karlijn Willems `_ +- `A concise tutorial with real life examples `_ From 5fe042f507c2c5a9bd4f104111e37a21cfa6365b Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 22 May 2017 15:02:08 +0200 Subject: [PATCH 584/933] DOC: add google analytics to the documentation (#16412) --- doc/source/themes/nature_with_gtoc/layout.html | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/doc/source/themes/nature_with_gtoc/layout.html b/doc/source/themes/nature_with_gtoc/layout.html index ddf1e861f5f81..a2106605c5562 100644 --- a/doc/source/themes/nature_with_gtoc/layout.html +++ b/doc/source/themes/nature_with_gtoc/layout.html @@ -94,4 +94,15 @@

{{ _('Search') }}

}); }); + {% endblock %} \ No newline at end of file From d5a681bfa2de24b4a1449956c84393a413909738 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Mon, 22 May 2017 09:10:27 -0400 Subject: [PATCH 585/933] PERF: don't materialize arrays on checking in groupby (#16413) --- asv_bench/benchmarks/groupby.py | 9 +++++++++ doc/source/whatsnew/v0.20.2.txt | 2 +- pandas/core/indexes/base.py | 1 - 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/asv_bench/benchmarks/groupby.py b/asv_bench/benchmarks/groupby.py index c0c3a42cc4464..13b5cd2b06032 100644 --- a/asv_bench/benchmarks/groupby.py +++ b/asv_bench/benchmarks/groupby.py @@ -368,6 +368,11 @@ def setup(self): self.dates = (np.datetime64('now') + self.offsets) self.df = DataFrame({'key1': np.random.randint(0, 500, size=self.n), 'key2': np.random.randint(0, 100, size=self.n), 'value1': np.random.randn(self.n), 'value2': np.random.randn(self.n), 'value3': np.random.randn(self.n), 'dates': self.dates, }) + N = 1000000 + self.draws = pd.Series(np.random.randn(N)) + labels = pd.Series(['foo', 'bar', 'baz', 'qux'] * (N // 4)) + self.cats = labels.astype('category') + def time_groupby_multi_size(self): self.df.groupby(['key1', 'key2']).size() @@ -377,6 +382,10 @@ def time_groupby_dt_size(self): def time_groupby_dt_timegrouper_size(self): self.df.groupby(TimeGrouper(key='dates', freq='M')).size() + def time_groupby_size(self): + self.draws.groupby(self.cats).size() + + #---------------------------------------------------------------------- # groupby with a variable value for ngroups diff --git a/doc/source/whatsnew/v0.20.2.txt b/doc/source/whatsnew/v0.20.2.txt index 4ec9daff4c0fc..e0857019d2fd4 100644 --- a/doc/source/whatsnew/v0.20.2.txt +++ b/doc/source/whatsnew/v0.20.2.txt @@ -29,7 +29,7 @@ Performance Improvements - Performance regression fix when indexing with a list-like (:issue:`16285`) - Performance regression fix for MultiIndexes (:issue:`16319`, :issue:`16346`) - Improved performance of ``.clip()`` with scalar arguments (:issue:`15400`) - +- Improved performance of groupby with categorical groupers (:issue:`16413`) .. _whatsnew_0202.bug_fixes: diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 9b29f1b04ff73..2af4f112ca941 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -2388,7 +2388,6 @@ def get_loc(self, key, method=None, tolerance=None): if tolerance is not None: raise ValueError('tolerance argument only valid if using pad, ' 'backfill or nearest lookups') - key = _values_from_object(key) try: return self._engine.get_loc(key) except KeyError: From 1f2085e9c1f74e22421772e6e84d619ae6dadf6e Mon Sep 17 00:00:00 2001 From: andymaheshw Date: Mon, 22 May 2017 10:47:49 -0700 Subject: [PATCH 586/933] Option Page Typos (#16421) * capitalization update * update * undoing capitalization --- doc/source/options.rst | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/doc/source/options.rst b/doc/source/options.rst index 5f6bf2fbb9662..6ff5b76014c95 100644 --- a/doc/source/options.rst +++ b/doc/source/options.rst @@ -28,7 +28,7 @@ You can get/set options directly as attributes of the top-level ``options`` attr pd.options.display.max_rows = 999 pd.options.display.max_rows -There is also an API composed of 5 relevant functions, available directly from the ``pandas`` +The API is composed of 5 relevant functions, available directly from the ``pandas`` namespace: - :func:`~pandas.get_option` / :func:`~pandas.set_option` - get/set the value of a single option. @@ -40,7 +40,7 @@ namespace: **Note:** developers can check out pandas/core/config.py for more info. All of the functions above accept a regexp pattern (``re.search`` style) as an argument, -and so passing in a substring will work - as long as it is unambiguous : +and so passing in a substring will work - as long as it is unambiguous: .. ipython:: python @@ -241,7 +241,7 @@ suggestion. df ``display.chop_threshold`` sets at what level pandas rounds to zero when -it displays a Series of DataFrame. Note, this does not effect the +it displays a Series of DataFrame. Note, this does not effect the precision at which the number is stored. .. ipython:: python @@ -420,15 +420,15 @@ mode.chained_assignment warn Raise an exception, warn, or no action if trying to use chained assignment, The default is warn mode.sim_interactive False Whether to simulate interactive mode - for purposes of testing + for purposes of testing. mode.use_inf_as_null False True means treat None, NaN, -INF, INF as null (old way), False means None and NaN are null, but INF, -INF are not null (new way). compute.use_bottleneck True Use the bottleneck library to accelerate - computation if it is installed + computation if it is installed. compute.use_numexpr True Use the numexpr library to accelerate - computation if it is installed + computation if it is installed. =================================== ============ ================================== From 49ec31bbaeca81a6f58fc1be26fe80f3ac188cdd Mon Sep 17 00:00:00 2001 From: Adam Smith Date: Mon, 22 May 2017 10:49:49 -0700 Subject: [PATCH 587/933] SAS DOC update - add documentation for sas_read(format='sas7bdat') (#16420) * updating comparison_with_sas.rst to include documentation for reading sas7bdat format * DOC: update docs for read_sas(format='sas7bdat') #12700 Issue #12700: DOC: update comparison_with_sas docs to include new read_sas(format='sas7bdat') --- doc/source/comparison_with_sas.rst | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/doc/source/comparison_with_sas.rst b/doc/source/comparison_with_sas.rst index 7ec91d251f15d..875358521173a 100644 --- a/doc/source/comparison_with_sas.rst +++ b/doc/source/comparison_with_sas.rst @@ -577,9 +577,8 @@ Data Interop ~~~~~~~~~~~~ pandas provides a :func:`read_sas` method that can read SAS data saved in -the XPORT format. The ability to read SAS's binary format is planned for a -future release. - +the XPORT or SAS7BDAT binary format. + .. code-block:: none libname xportout xport 'transport-file.xpt'; @@ -591,6 +590,15 @@ future release. .. code-block:: python df = pd.read_sas('transport-file.xpt') + df = pd.read_sas('binary-file.sas7bdat') + +You can also specify the file format directly. By default, pandas will try +to infer the file format based on its extension. + +.. code-block:: python + + df = pd.read_sas('transport-file.xpt', format='xport') + df = pd.read_sas('binary-file.sas7bdat', format='sas7bdat') XPORT is a relatively limited format and the parsing of it is not as optimized as some of the other pandas readers. An alternative way @@ -605,3 +613,4 @@ to interop data between SAS and pandas is to serialize to csv. In [9]: %time df = pd.read_csv('big.csv') Wall time: 4.86 s + From e053ee301d82a44ddc86dc7e164fea2d5c5178f8 Mon Sep 17 00:00:00 2001 From: jaredsnyder Date: Tue, 23 May 2017 02:51:06 -0500 Subject: [PATCH 588/933] BUG: fix isin with Series of tuples values (#16394) (#16434) * Swiched out "values = np.array(list(values), dtype='object')" for "values = lib.list_to_object_array(list(values))" in the isin() method found in core/algorithms.py Added test for comparing to a list of tuples --- doc/source/whatsnew/v0.20.2.txt | 2 +- pandas/core/algorithms.py | 2 +- pandas/tests/frame/test_analytics.py | 8 ++++++++ 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.20.2.txt b/doc/source/whatsnew/v0.20.2.txt index e0857019d2fd4..57625b725ddba 100644 --- a/doc/source/whatsnew/v0.20.2.txt +++ b/doc/source/whatsnew/v0.20.2.txt @@ -80,7 +80,7 @@ Reshaping ^^^^^^^^^ - Bug in ``DataFrame.stack`` with unsorted levels in MultiIndex columns (:issue:`16323`) - +- Bug in ``Series.isin(..)`` with a list of tuples (:issue:`16394`) Numeric ^^^^^^^ diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index a745ec616eda8..77d79c9585e57 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -388,7 +388,7 @@ def isin(comps, values): "[{0}]".format(type(values).__name__)) if not isinstance(values, (ABCIndex, ABCSeries, np.ndarray)): - values = np.array(list(values), dtype='object') + values = lib.list_to_object_array(list(values)) comps, dtype, _ = _ensure_data(comps) values, _, _ = _ensure_data(values, dtype=dtype) diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index fa9823bf000a2..da96fce36f3c9 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -1201,6 +1201,14 @@ def test_isin_df(self): expected['B'] = False tm.assert_frame_equal(result, expected) + def test_isin_tuples(self): + # GH16394 + df = pd.DataFrame({'A': [1, 2, 3], 'B': ['a', 'b', 'f']}) + df['C'] = list(zip(df['A'], df['B'])) + result = df['C'].isin([(1, 'a')]) + tm.assert_series_equal(result, + Series([True, False, False], name="C")) + def test_isin_df_dupe_values(self): df1 = DataFrame({'A': [1, 2, 3, 4], 'B': [2, np.nan, 4, 4]}) # just cols duped From e905f9ef7e28116e723b412593a4571aa78d187e Mon Sep 17 00:00:00 2001 From: Steven Cutting Date: Tue, 23 May 2017 01:16:59 -0700 Subject: [PATCH 589/933] DOC: add README improvements. (#16435) --- README.md | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index e05f1405419fc..01182d7b7b4b5 100644 --- a/README.md +++ b/README.md @@ -204,13 +204,13 @@ python setup.py build --compiler=mingw32 python setup.py install ``` -See http://pandas.pydata.org/ for more information. +See **[the docs](http://pandas.pydata.org/pandas-docs/stable/install.html#installing-from-source)** for more information. ## License BSD ## Documentation -The official documentation is hosted on PyData.org: http://pandas.pydata.org/ +The official documentation is hosted on PyData.org: http://pandas.pydata.org/pandas-docs/stable/ The Sphinx documentation should provide a good starting point for learning how to use the library. Expect the docs to continue to expand as time goes on. @@ -219,10 +219,21 @@ to use the library. Expect the docs to continue to expand as time goes on. Work on ``pandas`` started at AQR (a quantitative hedge fund) in 2008 and has been under active development since then. +## Getting Help + +For usage questions, the best place to go to is [StackOverflow](https://stackoverflow.com/questions/tagged/pandas). +Further, general questions and discussions can also take place on the [pydata mailing list](https://groups.google.com/forum/?fromgroups#!forum/pydata). + ## Discussion and Development -Since pandas development is related to a number of other scientific -Python projects, questions are welcome on the scipy-user mailing -list. Specialized discussions or design issues should take place on -the PyData mailing list / Google group: +Most development discussion is taking place on github in this repo. Further, the [pandas-dev mailing list](https://mail.python.org/mailman/listinfo/pandas-dev) can also be used for specialized discussions or design issues, and a [Gitter channel](https://gitter.im/pydata/pandas) is available for quick development related questions. + +## Contributing to pandas +All contributions, bug reports, bug fixes, documentation improvements, enhancements and ideas are welcome. + +A detailed overview on how to contribute can be found in the **[contributing guide.](http://pandas.pydata.org/pandas-docs/stable/contributing.html)** + +If you are simply looking to start working with the pandas codebase, navigate to the [GitHub “issues” tab](https://github.com/pandas-dev/pandas/issues) and start looking through interesting issues. There are a number of issues listed under [Docs](https://github.com/pandas-dev/pandas/issues?labels=Docs&sort=updated&state=open) and [Difficulty Novice](https://github.com/pandas-dev/pandas/issues?q=is%3Aopen+is%3Aissue+label%3A%22Difficulty+Novice%22) where you could start out. + +Or maybe through using pandas you have an idea of your own or are looking for something in the documentation and thinking ‘this can be improved’...you can do something about it! -https://groups.google.com/forum/#!forum/pydata +Feel free to ask questions on the [mailing list](https://groups.google.com/forum/?fromgroups#!forum/pydata) or on [Gitter](https://gitter.im/pydata/pandas). From a985dda2c0ef61378bf14b62985eb73d4199e0f2 Mon Sep 17 00:00:00 2001 From: Nick Eubank Date: Tue, 23 May 2017 02:10:11 -0700 Subject: [PATCH 590/933] change merge validate errors to MergeError from ValueError (#16436) * change merge validate errors to MergeError from ValueError * move MergeError to pandas/errors --- doc/source/whatsnew/v0.21.0.txt | 3 ++- pandas/core/reshape/merge.py | 15 ++++++--------- pandas/errors/__init__.py | 6 ++++++ pandas/tests/reshape/test_merge.py | 14 +++++++------- pandas/tests/test_errors.py | 2 +- 5 files changed, 22 insertions(+), 18 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 3734dc15be2e9..d01b097ee1529 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -30,7 +30,7 @@ New features Other Enhancements ^^^^^^^^^^^^^^^^^^ -- The ``validate`` argument for :func:`merge` function now checks whether a merge is one-to-one, one-to-many, many-to-one, or many-to-many. If a merge is found to not be an example of specified merge type, an exception will be raised. For more, see :ref:`here ` (:issue:`16270`) +- The ``validate`` argument for :func:`merge` function now checks whether a merge is one-to-one, one-to-many, many-to-one, or many-to-many. If a merge is found to not be an example of specified merge type, an exception of type ``MergeError`` will be raised. For more, see :ref:`here ` (:issue:`16270`) - ``Series.to_dict()`` and ``DataFrame.to_dict()`` now support an ``into`` keyword which allows you to specify the ``collections.Mapping`` subclass that you would like returned. The default is ``dict``, which is backwards compatible. (:issue:`16122`) - ``RangeIndex.append`` now returns a ``RangeIndex`` object when possible (:issue:`16212`) - :func:`to_pickle` has gained a protocol parameter (:issue:`16252`). By default, this parameter is set to `HIGHEST_PROTOCOL `__ @@ -51,6 +51,7 @@ Backwards incompatible API changes Other API Changes ^^^^^^^^^^^^^^^^^ +- Moved definition of ``MergeError`` to the ``pandas.errors`` module. .. _whatsnew_0210.deprecations: diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index b5c483a52f14f..ffe0cac33ec8f 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -40,6 +40,7 @@ import pandas.core.algorithms as algos import pandas.core.common as com from pandas._libs import hashtable as libhashtable, join as libjoin, lib +from pandas.errors import MergeError @Substitution('\nleft : DataFrame') @@ -60,10 +61,6 @@ def merge(left, right, how='inner', on=None, left_on=None, right_on=None, merge.__doc__ = _merge_doc % '\nleft : DataFrame' -class MergeError(ValueError): - pass - - def _groupby_and_merge(by, on, left, right, _merge_pieces, check_duplicates=True): """ @@ -986,23 +983,23 @@ def _validate(self, validate): # Check data integrity if validate in ["one_to_one", "1:1"]: if not left_unique and not right_unique: - raise ValueError("Merge keys are not unique in either left" + raise MergeError("Merge keys are not unique in either left" " or right dataset; not a one-to-one merge") elif not left_unique: - raise ValueError("Merge keys are not unique in left dataset;" + raise MergeError("Merge keys are not unique in left dataset;" " not a one-to-one merge") elif not right_unique: - raise ValueError("Merge keys are not unique in right dataset;" + raise MergeError("Merge keys are not unique in right dataset;" " not a one-to-one merge") elif validate in ["one_to_many", "1:m"]: if not left_unique: - raise ValueError("Merge keys are not unique in left dataset;" + raise MergeError("Merge keys are not unique in left dataset;" "not a one-to-many merge") elif validate in ["many_to_one", "m:1"]: if not right_unique: - raise ValueError("Merge keys are not unique in right dataset;" + raise MergeError("Merge keys are not unique in right dataset;" " not a many-to-one merge") elif validate in ['many_to_many', 'm:m']: diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py index 805e689dca840..6304f3a527f2c 100644 --- a/pandas/errors/__init__.py +++ b/pandas/errors/__init__.py @@ -57,3 +57,9 @@ class ParserWarning(Warning): """ +class MergeError(ValueError): + """ + Error raised when problems arise during merging due to problems + with input data. Subclass of `ValueError`. + + """ diff --git a/pandas/tests/reshape/test_merge.py b/pandas/tests/reshape/test_merge.py index 16c58354ad5c9..bacb605199e4a 100644 --- a/pandas/tests/reshape/test_merge.py +++ b/pandas/tests/reshape/test_merge.py @@ -789,11 +789,11 @@ def test_validation(self): merge(left, right_w_dups, left_index=True, right_index=True, validate='one_to_many') - with pytest.raises(ValueError): + with pytest.raises(MergeError): merge(left, right_w_dups, left_index=True, right_index=True, validate='one_to_one') - with pytest.raises(ValueError): + with pytest.raises(MergeError): merge(left, right_w_dups, on='a', validate='one_to_one') # Dups on left @@ -802,21 +802,21 @@ def test_validation(self): merge(left_w_dups, right, left_index=True, right_index=True, validate='many_to_one') - with pytest.raises(ValueError): + with pytest.raises(MergeError): merge(left_w_dups, right, left_index=True, right_index=True, validate='one_to_one') - with pytest.raises(ValueError): + with pytest.raises(MergeError): merge(left_w_dups, right, on='a', validate='one_to_one') # Dups on both merge(left_w_dups, right_w_dups, on='a', validate='many_to_many') - with pytest.raises(ValueError): + with pytest.raises(MergeError): merge(left_w_dups, right_w_dups, left_index=True, right_index=True, validate='many_to_one') - with pytest.raises(ValueError): + with pytest.raises(MergeError): merge(left_w_dups, right_w_dups, on='a', validate='one_to_many') @@ -842,7 +842,7 @@ def test_validation(self): 'um... weasel noise?']}, index=range(3)) - with pytest.raises(ValueError): + with pytest.raises(MergeError): merge(left, right, on='a', validate='1:1') result = merge(left, right, on=['a', 'b'], validate='1:1') diff --git a/pandas/tests/test_errors.py b/pandas/tests/test_errors.py index 4a0850734e134..babf88ef1df8d 100644 --- a/pandas/tests/test_errors.py +++ b/pandas/tests/test_errors.py @@ -10,7 +10,7 @@ "exc", ['UnsupportedFunctionCall', 'UnsortedIndexError', 'OutOfBoundsDatetime', 'ParserError', 'PerformanceWarning', 'DtypeWarning', - 'EmptyDataError', 'ParserWarning']) + 'EmptyDataError', 'ParserWarning', 'MergeError']) def test_exception_importable(exc): from pandas import errors e = getattr(errors, exc) From c53d00f1487716cf462ffa1965f6ec676a6c2593 Mon Sep 17 00:00:00 2001 From: Margaret Sy Date: Tue, 23 May 2017 03:56:30 -0700 Subject: [PATCH 591/933] ENH: Add the decimal.Decimal type to infer_dtypes (#15690) (#16426) closes #15690 --- doc/source/whatsnew/v0.21.0.txt | 1 + pandas/_libs/src/inference.pyx | 8 +++++++- pandas/tests/dtypes/test_inference.py | 11 +++++++++++ 3 files changed, 19 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index d01b097ee1529..5f58c710606af 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -34,6 +34,7 @@ Other Enhancements - ``Series.to_dict()`` and ``DataFrame.to_dict()`` now support an ``into`` keyword which allows you to specify the ``collections.Mapping`` subclass that you would like returned. The default is ``dict``, which is backwards compatible. (:issue:`16122`) - ``RangeIndex.append`` now returns a ``RangeIndex`` object when possible (:issue:`16212`) - :func:`to_pickle` has gained a protocol parameter (:issue:`16252`). By default, this parameter is set to `HIGHEST_PROTOCOL `__ +- :func:`api.types.infer_dtype` now infers decimals. (:issue: `15690`) .. _whatsnew_0210.api_breaking: diff --git a/pandas/_libs/src/inference.pyx b/pandas/_libs/src/inference.pyx index ddd38979e326c..38e95fe6ee652 100644 --- a/pandas/_libs/src/inference.pyx +++ b/pandas/_libs/src/inference.pyx @@ -243,6 +243,7 @@ def infer_dtype(object value): - integer - mixed-integer - mixed-integer-float + - decimal - complex - categorical - boolean @@ -286,6 +287,9 @@ def infer_dtype(object value): >>> infer_dtype(['a', 1]) 'mixed-integer' + >>> infer_dtype([Decimal(1), Decimal(2.0)]) + 'decimal' + >>> infer_dtype([True, False]) 'boolean' @@ -308,7 +312,6 @@ def infer_dtype(object value): 'categorical' """ - cdef: Py_ssize_t i, n object val @@ -407,6 +410,9 @@ def infer_dtype(object value): if is_time_array(values): return 'time' + elif is_decimal(val): + return 'decimal' + elif util.is_float_object(val): if is_float_array(values): return 'floating' diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index 3790ebe0d3e7c..b88481abcb2ec 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -9,6 +9,7 @@ import collections import re from datetime import datetime, date, timedelta, time +from decimal import Decimal import numpy as np import pytz import pytest @@ -462,6 +463,16 @@ def test_floats(self): result = lib.infer_dtype(arr) assert result == 'floating' + def test_decimals(self): + # GH15690 + arr = np.array([Decimal(1), Decimal(2), Decimal(3)]) + result = lib.infer_dtype(arr) + assert result == 'decimal' + + arr = np.array([1.0, 2.0, Decimal(3)]) + result = lib.infer_dtype(arr) + assert result == 'mixed' + def test_string(self): pass From 6614e266bca32771c761e904367eff10dd4c8979 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 23 May 2017 07:07:19 -0400 Subject: [PATCH 592/933] DOC: remove windows build from source instructions (#16451) --- README.md | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/README.md b/README.md index 01182d7b7b4b5..dc74828ba9863 100644 --- a/README.md +++ b/README.md @@ -197,14 +197,7 @@ mode](https://pip.pypa.io/en/latest/reference/pip_install.html#editable-installs pip install -e . ``` -On Windows, you will need to install MinGW and execute: - -```sh -python setup.py build --compiler=mingw32 -python setup.py install -``` - -See **[the docs](http://pandas.pydata.org/pandas-docs/stable/install.html#installing-from-source)** for more information. +See the full instructions for [installing from source](http://pandas.pydata.org/pandas-docs/stable/install.html#installing-from-source). ## License BSD From a8a497f0d8c1acd472d57dfb48832292fb3f8c2e Mon Sep 17 00:00:00 2001 From: WBare Date: Tue, 23 May 2017 11:59:16 -0400 Subject: [PATCH 593/933] BUG: Interpolate limit=n GH16282 (#16429) * BUG: Interpolate limit=n GH16282 * Fix: comment line over the 80 char limit * Test: Added small test for code coverage * DOC: Moved whats new comment from 0.21.0 to 0.20.2 * Update v0.21.0.txt Removed extraneous newline --- doc/source/whatsnew/v0.20.2.txt | 2 +- pandas/core/missing.py | 60 ++++++++++++++++------------- pandas/tests/series/test_missing.py | 18 +++++++++ pandas/tests/test_common.py | 1 + 4 files changed, 53 insertions(+), 28 deletions(-) diff --git a/doc/source/whatsnew/v0.20.2.txt b/doc/source/whatsnew/v0.20.2.txt index 57625b725ddba..d5fd879d3f9bf 100644 --- a/doc/source/whatsnew/v0.20.2.txt +++ b/doc/source/whatsnew/v0.20.2.txt @@ -84,7 +84,7 @@ Reshaping Numeric ^^^^^^^ - +- Bug in .interpolate(), where limit_direction was not respected when limit=None (default) was passed (:issue:16282) Categorical ^^^^^^^^^^^ diff --git a/pandas/core/missing.py b/pandas/core/missing.py index 3010348423340..51778684d68f5 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -160,35 +160,41 @@ def _interp_limit(invalid, fw_limit, bw_limit): start_nans = set(range(ys.first_valid_index())) end_nans = set(range(1 + ys.last_valid_index(), len(valid))) - # This is a list of the indexes in the series whose yvalue is currently - # NaN, but whose interpolated yvalue will be overwritten with NaN after - # computing the interpolation. For each index in this list, one of these - # conditions is true of the corresponding NaN in the yvalues: + # violate_limit is a list of the indexes in the series whose yvalue is + # currently NaN, and should still be NaN after the interpolation. + # Specifically: # - # a) It is one of a chain of NaNs at the beginning of the series, and - # either limit is not specified or limit_direction is 'forward'. - # b) It is one of a chain of NaNs at the end of the series, and limit is - # specified and limit_direction is 'backward' or 'both'. - # c) Limit is nonzero and it is further than limit from the nearest non-NaN - # value (with respect to the limit_direction setting). + # If limit_direction='forward' or None then the list will contain NaNs at + # the beginning of the series, and NaNs that are more than 'limit' away + # from the prior non-NaN. # - # The default behavior is to fill forward with no limit, ignoring NaNs at - # the beginning (see issues #9218 and #10420) - violate_limit = sorted(start_nans) - - if limit is not None: - if not is_integer(limit): - raise ValueError('Limit must be an integer') - if limit < 1: - raise ValueError('Limit must be greater than 0') - if limit_direction == 'forward': - violate_limit = sorted(start_nans | set(_interp_limit(invalid, - limit, 0))) - if limit_direction == 'backward': - violate_limit = sorted(end_nans | set(_interp_limit(invalid, 0, - limit))) - if limit_direction == 'both': - violate_limit = sorted(_interp_limit(invalid, limit, limit)) + # If limit_direction='backward' then the list will contain NaNs at + # the end of the series, and NaNs that are more than 'limit' away + # from the subsequent non-NaN. + # + # If limit_direction='both' then the list will contain NaNs that + # are more than 'limit' away from any non-NaN. + # + # If limit=None, then use default behavior of filling an unlimited number + # of NaNs in the direction specified by limit_direction + + # default limit is unlimited GH #16282 + if limit is None: + limit = len(xvalues) + elif not is_integer(limit): + raise ValueError('Limit must be an integer') + elif limit < 1: + raise ValueError('Limit must be greater than 0') + + # each possible limit_direction + if limit_direction == 'forward': + violate_limit = sorted(start_nans | + set(_interp_limit(invalid, limit, 0))) + elif limit_direction == 'backward': + violate_limit = sorted(end_nans | + set(_interp_limit(invalid, 0, limit))) + elif limit_direction == 'both': + violate_limit = sorted(_interp_limit(invalid, limit, limit)) xvalues = getattr(xvalues, 'values', xvalues) yvalues = getattr(yvalues, 'values', yvalues) diff --git a/pandas/tests/series/test_missing.py b/pandas/tests/series/test_missing.py index c52c41877d5c0..8e73c17684a16 100644 --- a/pandas/tests/series/test_missing.py +++ b/pandas/tests/series/test_missing.py @@ -931,6 +931,24 @@ def test_interp_limit_forward(self): limit_direction='FORWARD') assert_series_equal(result, expected) + def test_interp_unlimited(self): + # these test are for issue #16282 default Limit=None is unlimited + s = Series([np.nan, 1., 3., np.nan, np.nan, np.nan, 11., np.nan]) + expected = Series([1., 1., 3., 5., 7., 9., 11., 11.]) + result = s.interpolate(method='linear', + limit_direction='both') + assert_series_equal(result, expected) + + expected = Series([np.nan, 1., 3., 5., 7., 9., 11., 11.]) + result = s.interpolate(method='linear', + limit_direction='forward') + assert_series_equal(result, expected) + + expected = Series([1., 1., 3., 5., 7., 9., 11., np.nan]) + result = s.interpolate(method='linear', + limit_direction='backward') + assert_series_equal(result, expected) + def test_interp_limit_bad_direction(self): s = Series([1, 3, np.nan, np.nan, np.nan, 11]) diff --git a/pandas/tests/test_common.py b/pandas/tests/test_common.py index 4893f99f7cf0f..57479be4d989f 100644 --- a/pandas/tests/test_common.py +++ b/pandas/tests/test_common.py @@ -18,6 +18,7 @@ def test_mut_exclusive(): com._mut_exclusive(a=1, b=2) assert com._mut_exclusive(a=1, b=None) == 1 assert com._mut_exclusive(major=None, major_axis=None) is None + assert com._mut_exclusive(a=None, b=2) == 2 def test_get_callable_name(): From d9a63d07e12a8cab2821814d449ddb66cedf90bb Mon Sep 17 00:00:00 2001 From: JimStearns206 Date: Tue, 23 May 2017 09:52:44 -0700 Subject: [PATCH 594/933] BUG: Render empty DataFrame as empty HTML table w/o raising IndexError. (#16441) * BUG: Render empty DataFrame as empty HTML table w/o raising IndexError. * TST: Test rendering of 2 empty-ish DataFrames (#15953) DataFrame with an index but no column, and one with a column but no index. Add entry to whatsnew. --- doc/source/whatsnew/v0.20.2.txt | 1 + pandas/io/formats/style.py | 37 ++++++++++++++------------- pandas/tests/io/formats/test_style.py | 10 ++++++++ 3 files changed, 30 insertions(+), 18 deletions(-) diff --git a/doc/source/whatsnew/v0.20.2.txt b/doc/source/whatsnew/v0.20.2.txt index d5fd879d3f9bf..b4675e03c9996 100644 --- a/doc/source/whatsnew/v0.20.2.txt +++ b/doc/source/whatsnew/v0.20.2.txt @@ -54,6 +54,7 @@ I/O ^^^ - Bug that would force importing of the clipboard routines unnecessarily, potentially causing an import error on startup (:issue:`16288`) +- Bug that raised IndexError HTML-rendering an empty DataFrame (:issue:`15953`) Plotting diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index eac82ddde2318..3d7e0fcdc69b3 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -238,24 +238,25 @@ def format_attr(pair): "class": " ".join(cs), "is_visible": True}) - for c, value in enumerate(clabels[r]): - cs = [COL_HEADING_CLASS, "level%s" % r, "col%s" % c] - cs.extend(cell_context.get( - "col_headings", {}).get(r, {}).get(c, [])) - es = { - "type": "th", - "value": value, - "display_value": value, - "class": " ".join(cs), - "is_visible": _is_visible(c, r, col_lengths), - } - colspan = col_lengths.get((r, c), 0) - if colspan > 1: - es["attributes"] = [ - format_attr({"key": "colspan", "value": colspan}) - ] - row_es.append(es) - head.append(row_es) + if clabels: + for c, value in enumerate(clabels[r]): + cs = [COL_HEADING_CLASS, "level%s" % r, "col%s" % c] + cs.extend(cell_context.get( + "col_headings", {}).get(r, {}).get(c, [])) + es = { + "type": "th", + "value": value, + "display_value": value, + "class": " ".join(cs), + "is_visible": _is_visible(c, r, col_lengths), + } + colspan = col_lengths.get((r, c), 0) + if colspan > 1: + es["attributes"] = [ + format_attr({"key": "colspan", "value": colspan}) + ] + row_es.append(es) + head.append(row_es) if self.data.index.names and not all(x is None for x in self.data.index.names): diff --git a/pandas/tests/io/formats/test_style.py b/pandas/tests/io/formats/test_style.py index ee7356f12f498..9911888f758fb 100644 --- a/pandas/tests/io/formats/test_style.py +++ b/pandas/tests/io/formats/test_style.py @@ -103,6 +103,16 @@ def test_render(self): s.render() # it worked? + def test_render_empty_dfs(self): + empty_df = DataFrame() + es = Styler(empty_df) + es.render() + # An index but no columns + DataFrame(columns=['a']).style.render() + # A column but no index + DataFrame(index=['a']).style.render() + # No IndexError raised? + def test_render_double(self): df = pd.DataFrame({"A": [0, 1]}) style = lambda x: pd.Series(["color: red; border: 1px", From 92372c7632a92d7bd980534b5a78590b2d9453ad Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 23 May 2017 19:33:18 +0200 Subject: [PATCH 595/933] DOC: update make.py script (#16456) --- doc/make.py | 35 ++++++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 11 deletions(-) diff --git a/doc/make.py b/doc/make.py index 316ce58812f82..acef563f301e4 100755 --- a/doc/make.py +++ b/doc/make.py @@ -34,39 +34,52 @@ SPHINX_BUILD = 'sphinxbuild' -def upload_dev(user='pandas'): +def _process_user(user): + if user is None or user is False: + user = '' + else: + user = user + '@' + return user + + +def upload_dev(user=None): 'push a copy to the pydata dev directory' - if os.system('cd build/html; rsync -avz . {0}@pandas.pydata.org' + user = _process_user(user) + if os.system('cd build/html; rsync -avz . {0}pandas.pydata.org' ':/usr/share/nginx/pandas/pandas-docs/dev/ -essh'.format(user)): raise SystemExit('Upload to Pydata Dev failed') -def upload_dev_pdf(user='pandas'): +def upload_dev_pdf(user=None): 'push a copy to the pydata dev directory' - if os.system('cd build/latex; scp pandas.pdf {0}@pandas.pydata.org' + user = _process_user(user) + if os.system('cd build/latex; scp pandas.pdf {0}pandas.pydata.org' ':/usr/share/nginx/pandas/pandas-docs/dev/'.format(user)): raise SystemExit('PDF upload to Pydata Dev failed') -def upload_stable(user='pandas'): +def upload_stable(user=None): 'push a copy to the pydata stable directory' - if os.system('cd build/html; rsync -avz . {0}@pandas.pydata.org' + user = _process_user(user) + if os.system('cd build/html; rsync -avz . {0}pandas.pydata.org' ':/usr/share/nginx/pandas/pandas-docs/stable/ -essh'.format(user)): raise SystemExit('Upload to stable failed') -def upload_stable_pdf(user='pandas'): +def upload_stable_pdf(user=None): 'push a copy to the pydata dev directory' - if os.system('cd build/latex; scp pandas.pdf {0}@pandas.pydata.org' + user = _process_user(user) + if os.system('cd build/latex; scp pandas.pdf {0}pandas.pydata.org' ':/usr/share/nginx/pandas/pandas-docs/stable/'.format(user)): raise SystemExit('PDF upload to stable failed') -def upload_prev(ver, doc_root='./', user='pandas'): +def upload_prev(ver, doc_root='./', user=None): 'push a copy of older release to appropriate version directory' + user = _process_user(user) local_dir = doc_root + 'build/html' remote_dir = '/usr/share/nginx/pandas/pandas-docs/version/%s/' % ver - cmd = 'cd %s; rsync -avz . %s@pandas.pydata.org:%s -essh' + cmd = 'cd %s; rsync -avz . %spandas.pydata.org:%s -essh' cmd = cmd % (local_dir, user, remote_dir) print(cmd) if os.system(cmd): @@ -74,7 +87,7 @@ def upload_prev(ver, doc_root='./', user='pandas'): 'Upload to %s from %s failed' % (remote_dir, local_dir)) local_dir = doc_root + 'build/latex' - pdf_cmd = 'cd %s; scp pandas.pdf %s@pandas.pydata.org:%s' + pdf_cmd = 'cd %s; scp pandas.pdf %spandas.pydata.org:%s' pdf_cmd = pdf_cmd % (local_dir, user, remote_dir) if os.system(pdf_cmd): raise SystemExit('Upload PDF to %s from %s failed' % (ver, doc_root)) From c93309827c3733e7884154a69c4d88b47ca1573e Mon Sep 17 00:00:00 2001 From: abarber4gh Date: Tue, 23 May 2017 12:36:27 -0700 Subject: [PATCH 596/933] DEPR: Change read_excel sheetname to sheet_name (#16442) * GH10559: Minor improvement: Change to_excel sheet name modify io/excel.py and relevant docs (io.rst) to use sheet_name for read_excel but allow sheetname to still be used for backwards compatibility. add test_excel to verify that sheet_name and sheetname args produce the same result. * GH10559: Minor improvement: Change to_excel sheet name added @deprecate_kwarg to read_excel as arg changes from sheetname to sheet_name. moved test comments into function, add assert_produces_warning. * GH10559: Minor improvement: Change to_excel sheet name remove manual arg change, use @deprecate_kwarg to read_excel as arg changes from sheetname to sheet_name. * GH10559: Minor improvement: Change to_excel sheet name shorten lines under 79 char. * GH10559: Minor improvement: Change to_excel sheet name update whats new 0.21.0 Deprecations section noting sheetname deprecated in favor of sheet_name. add sheetname deprecation in read_excel() docstring. --- doc/source/io.rst | 16 ++++++++-------- doc/source/whatsnew/v0.21.0.txt | 2 +- pandas/io/excel.py | 19 ++++++++++++------- pandas/tests/io/test_excel.py | 12 ++++++++++++ 4 files changed, 33 insertions(+), 16 deletions(-) diff --git a/doc/source/io.rst b/doc/source/io.rst index 9692766505d7a..bca23dd18a0e3 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -2561,12 +2561,12 @@ Reading Excel Files ''''''''''''''''''' In the most basic use-case, ``read_excel`` takes a path to an Excel -file, and the ``sheetname`` indicating which sheet to parse. +file, and the ``sheet_name`` indicating which sheet to parse. .. code-block:: python # Returns a DataFrame - read_excel('path_to_file.xls', sheetname='Sheet1') + read_excel('path_to_file.xls', sheet_name='Sheet1') .. _io.excel.excelfile_class: @@ -2634,12 +2634,12 @@ of sheet names can simply be passed to ``read_excel`` with no loss in performanc Specifying Sheets +++++++++++++++++ -.. note :: The second argument is ``sheetname``, not to be confused with ``ExcelFile.sheet_names`` +.. note :: The second argument is ``sheet_name``, not to be confused with ``ExcelFile.sheet_names`` .. note :: An ExcelFile's attribute ``sheet_names`` provides access to a list of sheets. -- The arguments ``sheetname`` allows specifying the sheet or sheets to read. -- The default value for ``sheetname`` is 0, indicating to read the first sheet +- The arguments ``sheet_name`` allows specifying the sheet or sheets to read. +- The default value for ``sheet_name`` is 0, indicating to read the first sheet - Pass a string to refer to the name of a particular sheet in the workbook. - Pass an integer to refer to the index of a sheet. Indices follow Python convention, beginning at 0. @@ -2670,18 +2670,18 @@ Using None to get all sheets: .. code-block:: python # Returns a dictionary of DataFrames - read_excel('path_to_file.xls',sheetname=None) + read_excel('path_to_file.xls',sheet_name=None) Using a list to get multiple sheets: .. code-block:: python # Returns the 1st and 4th sheet, as a dictionary of DataFrames. - read_excel('path_to_file.xls',sheetname=['Sheet1',3]) + read_excel('path_to_file.xls',sheet_name=['Sheet1',3]) .. versionadded:: 0.16 -``read_excel`` can read more than one sheet, by setting ``sheetname`` to either +``read_excel`` can read more than one sheet, by setting ``sheet_name`` to either a list of sheet names, a list of sheet positions, or ``None`` to read all sheets. .. versionadded:: 0.13 diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 5f58c710606af..4cb55ec6b117b 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -59,7 +59,7 @@ Other API Changes Deprecations ~~~~~~~~~~~~ - +- :func:`read_excel()` has deprecated ``sheetname`` in favor of ``sheet_name`` for consistency with to_excel() (:issue:`10559`). .. _whatsnew_0210.prior_deprecations: diff --git a/pandas/io/excel.py b/pandas/io/excel.py index fba3d7559aeaf..81a36b21b3617 100644 --- a/pandas/io/excel.py +++ b/pandas/io/excel.py @@ -30,7 +30,7 @@ import pandas.compat.openpyxl_compat as openpyxl_compat from warnings import warn from distutils.version import LooseVersion -from pandas.util._decorators import Appender +from pandas.util._decorators import Appender, deprecate_kwarg from textwrap import fill __all__ = ["read_excel", "ExcelWriter", "ExcelFile"] @@ -48,7 +48,7 @@ The string could be a URL. Valid URL schemes include http, ftp, s3, and file. For file URLs, a host is expected. For instance, a local file could be file://localhost/path/to/workbook.xlsx -sheetname : string, int, mixed list of strings/ints, or None, default 0 +sheet_name : string, int, mixed list of strings/ints, or None, default 0 Strings are used for sheet names, Integers are used in zero-indexed sheet positions. @@ -69,6 +69,10 @@ * [0,1,"Sheet5"] -> 1st, 2nd & 5th sheet as a dictionary of DataFrames * None -> All sheets as a dictionary of DataFrames +sheetname : string, int, mixed list of strings/ints, or None, default 0 + .. deprecated:: 0.21.0 + Use `sheet_name` instead + header : int, list of ints, default 0 Row (0-indexed) to use for the column labels of the parsed DataFrame. If a list of integers is passed those row positions will @@ -144,7 +148,7 @@ Returns ------- parsed : DataFrame or Dict of DataFrames - DataFrame from the passed in Excel file. See notes in sheetname + DataFrame from the passed in Excel file. See notes in sheet_name argument for more information on when a Dict of Dataframes is returned. """ @@ -189,8 +193,9 @@ def get_writer(engine_name): raise ValueError("No Excel writer '%s'" % engine_name) +@deprecate_kwarg('sheetname', 'sheet_name') @Appender(_read_excel_doc) -def read_excel(io, sheetname=0, header=0, skiprows=None, skip_footer=0, +def read_excel(io, sheet_name=0, header=0, skiprows=None, skip_footer=0, index_col=None, names=None, parse_cols=None, parse_dates=False, date_parser=None, na_values=None, thousands=None, convert_float=True, has_index_names=None, converters=None, @@ -201,7 +206,7 @@ def read_excel(io, sheetname=0, header=0, skiprows=None, skip_footer=0, io = ExcelFile(io, engine=engine) return io._parse_excel( - sheetname=sheetname, header=header, skiprows=skiprows, names=names, + sheetname=sheet_name, header=header, skiprows=skiprows, names=names, index_col=index_col, parse_cols=parse_cols, parse_dates=parse_dates, date_parser=date_parser, na_values=na_values, thousands=thousands, convert_float=convert_float, has_index_names=has_index_names, @@ -266,7 +271,7 @@ def __init__(self, io, **kwds): def __fspath__(self): return self._io - def parse(self, sheetname=0, header=0, skiprows=None, skip_footer=0, + def parse(self, sheet_name=0, header=0, skiprows=None, skip_footer=0, names=None, index_col=None, parse_cols=None, parse_dates=False, date_parser=None, na_values=None, thousands=None, convert_float=True, has_index_names=None, @@ -279,7 +284,7 @@ def parse(self, sheetname=0, header=0, skiprows=None, skip_footer=0, docstring for more info on accepted parameters """ - return self._parse_excel(sheetname=sheetname, header=header, + return self._parse_excel(sheetname=sheet_name, header=header, skiprows=skiprows, names=names, index_col=index_col, has_index_names=has_index_names, diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py index bbf4f1107ac9e..0a79d4e8fd81b 100644 --- a/pandas/tests/io/test_excel.py +++ b/pandas/tests/io/test_excel.py @@ -544,6 +544,18 @@ def test_date_conversion_overflow(self): result = self.get_exceldf('testdateoverflow') tm.assert_frame_equal(result, expected) + def test_sheet_name_and_sheetname(self): + # GH10559: Minor improvement: Change "sheet_name" to "sheetname" + # GH10969: DOC: Consistent var names (sheetname vs sheet_name) + # GH12604: CLN GH10559 Rename sheetname variable to sheet_name + dfref = self.get_csv_refdf('test1') + df1 = self.get_exceldf('test1', sheet_name='Sheet1') # doc + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + df2 = self.get_exceldf('test1', sheetname='Sheet2') # bkwrd compat + + tm.assert_frame_equal(df1, dfref, check_names=False) + tm.assert_frame_equal(df2, dfref, check_names=False) + class XlrdTests(ReadingTestsBase): """ From 04356a83c0dc8a749c84e168535e6673f2548ec6 Mon Sep 17 00:00:00 2001 From: Erik Fredriksen Date: Tue, 23 May 2017 21:52:22 +0200 Subject: [PATCH 597/933] BUG: wide_to_long should check for unique id vars (#16382) (#16403) * BUG: wide_to_long should check for unique id vars (#16382) * Fix uncaught lint error * Add whatsnew note (bug fix) --- doc/source/whatsnew/v0.20.2.txt | 2 ++ pandas/core/reshape/reshape.py | 3 +++ pandas/tests/reshape/test_reshape.py | 11 +++++++++++ 3 files changed, 16 insertions(+) diff --git a/doc/source/whatsnew/v0.20.2.txt b/doc/source/whatsnew/v0.20.2.txt index b4675e03c9996..e24b0c229c46c 100644 --- a/doc/source/whatsnew/v0.20.2.txt +++ b/doc/source/whatsnew/v0.20.2.txt @@ -81,8 +81,10 @@ Reshaping ^^^^^^^^^ - Bug in ``DataFrame.stack`` with unsorted levels in MultiIndex columns (:issue:`16323`) +- Bug in ``pd.wide_to_long()`` where no error was raised when ``i`` was not a unique identifier (:issue:`16382`) - Bug in ``Series.isin(..)`` with a list of tuples (:issue:`16394`) + Numeric ^^^^^^^ - Bug in .interpolate(), where limit_direction was not respected when limit=None (default) was passed (:issue:16282) diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index b0ed6d4c4b84d..f944dfe22361a 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -1046,6 +1046,9 @@ def melt_stub(df, stub, i, j, value_vars, sep): else: i = list(i) + if df[i].duplicated().any(): + raise ValueError("the id variables need to uniquely identify each row") + value_vars = list(map(lambda stub: get_var_names(df, stub, sep, suffix), stubnames)) diff --git a/pandas/tests/reshape/test_reshape.py b/pandas/tests/reshape/test_reshape.py index 79626d89026a7..d47a95924bd10 100644 --- a/pandas/tests/reshape/test_reshape.py +++ b/pandas/tests/reshape/test_reshape.py @@ -976,3 +976,14 @@ def test_multiple_id_columns(self): exp_frame = exp_frame.set_index(['famid', 'birth', 'age'])[['ht']] long_frame = wide_to_long(df, 'ht', i=['famid', 'birth'], j='age') tm.assert_frame_equal(long_frame, exp_frame) + + def test_non_unique_idvars(self): + # GH16382 + # Raise an error message if non unique id vars (i) are passed + df = pd.DataFrame({ + 'A_A1': [1, 2, 3, 4, 5], + 'B_B1': [1, 2, 3, 4, 5], + 'x': [1, 1, 1, 1, 1] + }) + with pytest.raises(ValueError): + wide_to_long(df, ['A_A', 'B_B'], i='x', j='colname') From 044feb537ec7e127822a62a7cb90e97d61ff5a56 Mon Sep 17 00:00:00 2001 From: Hugues Valois Date: Tue, 23 May 2017 18:24:41 -0700 Subject: [PATCH 598/933] BUG: Don't ignore figsize in df.boxplot (#16445) * Propagate the figsize via the rcParams, since matplotlib doesn't allow passing it as a parameter to gca(). * Update what's new for v0.21.0 and use rc_context() to temporarily change rcParams. * Move bug fix from 0.21.0 whatsnew to 0.20.2. * Allow passing in an rc to _gca() instead of just figsize, and added a test for boxplot figsize. * Fix style violations. --- doc/source/whatsnew/v0.20.2.txt | 1 + pandas/plotting/_core.py | 14 +++++--------- pandas/tests/plotting/test_boxplot_method.py | 8 ++++++++ 3 files changed, 14 insertions(+), 9 deletions(-) diff --git a/doc/source/whatsnew/v0.20.2.txt b/doc/source/whatsnew/v0.20.2.txt index e24b0c229c46c..6d6a148ed025f 100644 --- a/doc/source/whatsnew/v0.20.2.txt +++ b/doc/source/whatsnew/v0.20.2.txt @@ -62,6 +62,7 @@ Plotting - Bug in ``DataFrame.plot`` with a single column and a list-like ``color`` (:issue:`3486`) - Bug in ``plot`` where ``NaT`` in ``DatetimeIndex`` results in ``Timestamp.min`` (:issue: `12405`) +- Bug in ``DataFrame.boxplot`` where ``figsize`` keyword was not respected for non-grouped boxplots (:issue:`11959`) diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index 49076ffb469cb..9169eb86895fb 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -49,9 +49,10 @@ def _get_standard_kind(kind): return {'density': 'kde'}.get(kind, kind) -def _gca(): +def _gca(rc=None): import matplotlib.pyplot as plt - return plt.gca() + with plt.rc_context(rc): + return plt.gca() def _gcf(): @@ -1871,12 +1872,6 @@ def plot_series(data, kind='line', ax=None, # Series unique **kwds): import matplotlib.pyplot as plt - """ - If no axes is specified, check whether there are existing figures - If there is no existing figures, _gca() will - create a figure with the default figsize, causing the figsize=parameter to - be ignored. - """ if ax is None and len(plt.get_fignums()) > 0: ax = _gca() ax = MPLPlot._get_ax_layer(ax) @@ -2006,7 +2001,8 @@ def plot_group(keys, values, ax): "'by' is None") if ax is None: - ax = _gca() + rc = {'figure.figsize': figsize} if figsize is not None else {} + ax = _gca(rc) data = data._get_numeric_data() if columns is None: columns = data.columns diff --git a/pandas/tests/plotting/test_boxplot_method.py b/pandas/tests/plotting/test_boxplot_method.py index a4c70f7945347..ce8fb7a57c912 100644 --- a/pandas/tests/plotting/test_boxplot_method.py +++ b/pandas/tests/plotting/test_boxplot_method.py @@ -160,6 +160,14 @@ def test_boxplot_empty_column(self): df.loc[:, 0] = np.nan _check_plot_works(df.boxplot, return_type='axes') + @slow + def test_figsize(self): + df = DataFrame(np.random.rand(10, 5), + columns=['A', 'B', 'C', 'D', 'E']) + result = df.boxplot(return_type='axes', figsize=(12, 8)) + assert result.figure.bbox_inches.width == 12 + assert result.figure.bbox_inches.height == 8 + def test_fontsize(self): df = DataFrame({"a": [1, 2, 3, 4, 5, 6]}) self._check_ticks_props(df.boxplot("a", fontsize=16), From 692a5b94a038cc54fc7e18b77225efcecaccacab Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 24 May 2017 06:35:20 -0400 Subject: [PATCH 599/933] COMPAT: feather-format 0.4.0 compat (#16475) --- ci/requirements-3.5_OSX.sh | 2 +- pandas/tests/io/test_feather.py | 17 +++++++++++++---- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/ci/requirements-3.5_OSX.sh b/ci/requirements-3.5_OSX.sh index cfbd2882a8a2d..39ea1a0cf67bf 100644 --- a/ci/requirements-3.5_OSX.sh +++ b/ci/requirements-3.5_OSX.sh @@ -4,4 +4,4 @@ source activate pandas echo "install 35_OSX" -conda install -n pandas -c conda-forge feather-format +conda install -n pandas -c conda-forge feather-format==0.3.1 diff --git a/pandas/tests/io/test_feather.py b/pandas/tests/io/test_feather.py index 948ab736af6c6..42ad9d3e0d8fe 100644 --- a/pandas/tests/io/test_feather.py +++ b/pandas/tests/io/test_feather.py @@ -10,6 +10,10 @@ from feather import FeatherError from pandas.util.testing import assert_frame_equal, ensure_clean import pandas.util.testing as tm +from distutils.version import LooseVersion + + +fv = LooseVersion(feather.__version__) @pytest.mark.single @@ -57,6 +61,7 @@ def test_basic(self): assert df.dttz.dtype.tz.zone == 'US/Eastern' self.check_round_trip(df) + @pytest.mark.skipif(fv >= '0.4.0', reason='fixed in 0.4.0') def test_strided_data_issues(self): # strided data issuehttps://github.com/wesm/feather/issues/97 @@ -76,12 +81,10 @@ def test_stringify_columns(self): df = pd.DataFrame(np.arange(12).reshape(4, 3)).copy() self.check_error_on_write(df, ValueError) + @pytest.mark.skipif(fv >= '0.4.0', reason='fixed in 0.4.0') def test_unsupported(self): - # period - df = pd.DataFrame({'a': pd.period_range('2013', freq='M', periods=3)}) - self.check_error_on_write(df, ValueError) - + # timedelta df = pd.DataFrame({'a': pd.timedelta_range('1 day', periods=3)}) self.check_error_on_write(df, FeatherError) @@ -89,6 +92,12 @@ def test_unsupported(self): df = pd.DataFrame({'a': ['a', 1, 2.0]}) self.check_error_on_write(df, ValueError) + def test_unsupported_other(self): + + # period + df = pd.DataFrame({'a': pd.period_range('2013', freq='M', periods=3)}) + self.check_error_on_write(df, ValueError) + def test_write_with_index(self): df = pd.DataFrame({'A': [1, 2, 3]}) From 6cbd5589fd005f987381bacbaa79e96950cd5dcb Mon Sep 17 00:00:00 2001 From: David Cook Date: Wed, 24 May 2017 06:38:03 -0400 Subject: [PATCH 600/933] Fix test assertions (#16470) --- pandas/tests/groupby/test_nth.py | 2 +- pandas/tests/indexes/datetimes/test_tools.py | 2 +- pandas/tests/indexes/period/test_construction.py | 6 +++--- pandas/tests/indexes/test_range.py | 14 +++++++------- pandas/tests/plotting/test_datetimelike.py | 6 +++--- pandas/tests/series/test_timeseries.py | 2 +- pandas/tests/test_lib.py | 6 +++--- pandas/tests/test_strings.py | 8 ++++---- 8 files changed, 23 insertions(+), 23 deletions(-) diff --git a/pandas/tests/groupby/test_nth.py b/pandas/tests/groupby/test_nth.py index 7912b4bf3bdf6..47e6e7839422a 100644 --- a/pandas/tests/groupby/test_nth.py +++ b/pandas/tests/groupby/test_nth.py @@ -153,8 +153,8 @@ def test_nth(self): expected = s.groupby(g).first() expected2 = s.groupby(g).apply(lambda x: x.iloc[0]) assert_series_equal(expected2, expected, check_names=False) - assert expected.name, 0 assert expected.name == 1 + assert expected2.name == 1 # validate first v = s[g == 1].iloc[0] diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index 61f7ac8abaf09..a47db755b44af 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -625,7 +625,7 @@ def test_to_datetime_iso8601(self): def test_to_datetime_default(self): rs = to_datetime('2001') xp = datetime(2001, 1, 1) - assert rs, xp + assert rs == xp # dayfirst is essentially broken diff --git a/pandas/tests/indexes/period/test_construction.py b/pandas/tests/indexes/period/test_construction.py index 6a188c0987f91..e5b889e100307 100644 --- a/pandas/tests/indexes/period/test_construction.py +++ b/pandas/tests/indexes/period/test_construction.py @@ -135,15 +135,15 @@ def test_constructor_fromarraylike(self): result = PeriodIndex(idx, freq=offsets.MonthEnd()) tm.assert_index_equal(result, idx) - assert result.freq, 'M' + assert result.freq == 'M' result = PeriodIndex(idx, freq='2M') tm.assert_index_equal(result, idx.asfreq('2M')) - assert result.freq, '2M' + assert result.freq == '2M' result = PeriodIndex(idx, freq=offsets.MonthEnd(2)) tm.assert_index_equal(result, idx.asfreq('2M')) - assert result.freq, '2M' + assert result.freq == '2M' result = PeriodIndex(idx, freq='D') exp = idx.asfreq('D', 'e') diff --git a/pandas/tests/indexes/test_range.py b/pandas/tests/indexes/test_range.py index d140a2503984e..c7af0954cf483 100644 --- a/pandas/tests/indexes/test_range.py +++ b/pandas/tests/indexes/test_range.py @@ -172,16 +172,16 @@ def test_constructor_name(self): copy = RangeIndex(orig) copy.name = 'copy' - assert orig.name, 'original' - assert copy.name, 'copy' + assert orig.name == 'original' + assert copy.name == 'copy' new = Index(copy) - assert new.name, 'copy' + assert new.name == 'copy' new.name = 'new' - assert orig.name, 'original' - assert new.name, 'copy' - assert new.name, 'new' + assert orig.name == 'original' + assert copy.name == 'copy' + assert new.name == 'new' def test_numeric_compat2(self): # validate that we are handling the RangeIndex overrides to numeric ops @@ -273,7 +273,7 @@ def test_repr(self): expected = "RangeIndex(start=0, stop=5, step=1, name='Foo')" else: expected = "RangeIndex(start=0, stop=5, step=1, name=u'Foo')" - assert result, expected + assert result == expected result = eval(result) tm.assert_index_equal(result, i, exact=True) diff --git a/pandas/tests/plotting/test_datetimelike.py b/pandas/tests/plotting/test_datetimelike.py index 92537059218d6..cff83c2ff0a3a 100644 --- a/pandas/tests/plotting/test_datetimelike.py +++ b/pandas/tests/plotting/test_datetimelike.py @@ -843,7 +843,7 @@ def test_to_weekly_resampling(self): tsplot(high, plt.Axes.plot) lines = tsplot(low, plt.Axes.plot) for l in lines: - assert PeriodIndex(data=l.get_xdata()).freq, idxh.freq + assert PeriodIndex(data=l.get_xdata()).freq == idxh.freq @slow def test_from_weekly_resampling(self): @@ -858,7 +858,7 @@ def test_from_weekly_resampling(self): expected_l = np.array([1514, 1519, 1523, 1527, 1531, 1536, 1540, 1544, 1549, 1553, 1558, 1562], dtype=np.float64) for l in ax.get_lines(): - assert PeriodIndex(data=l.get_xdata()).freq, idxh.freq + assert PeriodIndex(data=l.get_xdata()).freq == idxh.freq xdata = l.get_xdata(orig=False) if len(xdata) == 12: # idxl lines tm.assert_numpy_array_equal(xdata, expected_l) @@ -873,7 +873,7 @@ def test_from_weekly_resampling(self): tsplot(low, plt.Axes.plot) lines = tsplot(high, plt.Axes.plot) for l in lines: - assert PeriodIndex(data=l.get_xdata()).freq, idxh.freq + assert PeriodIndex(data=l.get_xdata()).freq == idxh.freq xdata = l.get_xdata(orig=False) if len(xdata) == 12: # idxl lines tm.assert_numpy_array_equal(xdata, expected_l) diff --git a/pandas/tests/series/test_timeseries.py b/pandas/tests/series/test_timeseries.py index d5517bdcceac7..6018260708335 100644 --- a/pandas/tests/series/test_timeseries.py +++ b/pandas/tests/series/test_timeseries.py @@ -916,7 +916,7 @@ def test_from_M8_structured(self): assert df['Forecasting'][0] == dates[0][1] s = Series(arr['Date']) - assert s[0], Timestamp + assert isinstance(s[0], Timestamp) assert s[0] == dates[0][0] s = Series.from_array(arr['Date'], Index([0])) diff --git a/pandas/tests/test_lib.py b/pandas/tests/test_lib.py index df97095035952..6be687e26e985 100644 --- a/pandas/tests/test_lib.py +++ b/pandas/tests/test_lib.py @@ -13,15 +13,15 @@ class TestMisc(object): def test_max_len_string_array(self): arr = a = np.array(['foo', 'b', np.nan], dtype='object') - assert lib.max_len_string_array(arr), 3 + assert lib.max_len_string_array(arr) == 3 # unicode arr = a.astype('U').astype(object) - assert lib.max_len_string_array(arr), 3 + assert lib.max_len_string_array(arr) == 3 # bytes for python3 arr = a.astype('S').astype(object) - assert lib.max_len_string_array(arr), 3 + assert lib.max_len_string_array(arr) == 3 # raises pytest.raises(TypeError, diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index f28a5926087ac..bb31fb9260160 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -2102,12 +2102,12 @@ def test_split_with_name(self): idx = Index(['a,b', 'c,d'], name='xxx') res = idx.str.split(',') exp = Index([['a', 'b'], ['c', 'd']], name='xxx') - assert res.nlevels, 1 + assert res.nlevels == 1 tm.assert_index_equal(res, exp) res = idx.str.split(',', expand=True) exp = MultiIndex.from_tuples([('a', 'b'), ('c', 'd')]) - assert res.nlevels, 2 + assert res.nlevels == 2 tm.assert_index_equal(res, exp) def test_partition_series(self): @@ -2247,13 +2247,13 @@ def test_partition_with_name(self): idx = Index(['a,b', 'c,d'], name='xxx') res = idx.str.partition(',') exp = MultiIndex.from_tuples([('a', ',', 'b'), ('c', ',', 'd')]) - assert res.nlevels, 3 + assert res.nlevels == 3 tm.assert_index_equal(res, exp) # should preserve name res = idx.str.partition(',', expand=False) exp = Index(np.array([('a', ',', 'b'), ('c', ',', 'd')]), name='xxx') - assert res.nlevels, 1 + assert res.nlevels == 1 tm.assert_index_equal(res, exp) def test_pipe_failures(self): From b0038ac72721058a3ae71f1dbcaa24d2f10f23c0 Mon Sep 17 00:00:00 2001 From: Hugues Valois Date: Wed, 24 May 2017 05:11:57 -0700 Subject: [PATCH 601/933] BUG: strange timeseries plot behavior (#16461) validation tests, closes #6608. --- pandas/tests/plotting/test_datetimelike.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/pandas/tests/plotting/test_datetimelike.py b/pandas/tests/plotting/test_datetimelike.py index cff83c2ff0a3a..0e15aaa2555f4 100644 --- a/pandas/tests/plotting/test_datetimelike.py +++ b/pandas/tests/plotting/test_datetimelike.py @@ -1349,6 +1349,22 @@ def test_hist(self): w2 = np.arange(0, 1, .1)[::-1] self.plt.hist([x, x], weights=[w1, w2]) + @slow + def test_overlapping_datetime(self): + # GB 6608 + s1 = Series([1, 2, 3], index=[datetime(1995, 12, 31), + datetime(2000, 12, 31), + datetime(2005, 12, 31)]) + s2 = Series([1, 2, 3], index=[datetime(1997, 12, 31), + datetime(2003, 12, 31), + datetime(2008, 12, 31)]) + + # plot first series, then add the second series to those axes, + # then try adding the first series again + ax = s1.plot() + s2.plot(ax=ax) + s1.plot(ax=ax) + def _check_plot_works(f, freq=None, series=None, *args, **kwargs): import matplotlib.pyplot as plt From 7271f50c234faeaaac177a4e1829b7401ea252ee Mon Sep 17 00:00:00 2001 From: mjlove12 Date: Wed, 24 May 2017 09:28:04 -0400 Subject: [PATCH 602/933] ENH: Improve error message for read_csv header argument containing non int types. GH16338. (#16351) Adds error "header must be integer or list of integers" when the header argument is a list, tuple or numpy array containing non-integers. Initially intended to read_csv, but applies to other functions with similar header arguments. GH16338 refers to a case in which the user mixes up the "names" and "header" arguments. Revising PR16351 based on feedback Revising PR16351 lint issues Adding release note in whatsnew v0.21.0 for PR16351 --- doc/source/whatsnew/v0.21.0.txt | 1 + pandas/io/parsers.py | 6 ++++++ pandas/tests/io/parser/header.py | 9 +++++++++ 3 files changed, 16 insertions(+) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 4cb55ec6b117b..be05bbb6bf09c 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -94,6 +94,7 @@ Indexing I/O ^^^ +- Bug in ``pd.read_csv()`` in which non integer values for the header argument generated an unhelpful / unrelated error message (:issue:`16338`) Plotting diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index ce8643504932f..e287d92f67ef6 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -1164,6 +1164,8 @@ def __init__(self, kwds): # validate header options for mi self.header = kwds.get('header') if isinstance(self.header, (list, tuple, np.ndarray)): + if not all(map(is_integer, self.header)): + raise ValueError("header must be integer or list of integers") if kwds.get('as_recarray'): raise ValueError("cannot specify as_recarray when " "specifying a multi-index header") @@ -1184,6 +1186,10 @@ def __init__(self, kwds): raise ValueError("index_col must only contain row numbers " "when specifying a multi-index header") + # GH 16338 + elif self.header is not None and not is_integer(self.header): + raise ValueError("header must be integer or list of integers") + self._name_processed = False self._first_chunk = True diff --git a/pandas/tests/io/parser/header.py b/pandas/tests/io/parser/header.py index 1e5fb42b4c1d4..4935fd2cd910a 100644 --- a/pandas/tests/io/parser/header.py +++ b/pandas/tests/io/parser/header.py @@ -277,3 +277,12 @@ def test_no_header(self): tm.assert_index_equal(df.columns, Index(lrange(5))) tm.assert_index_equal(df2.columns, Index(names)) + + def test_non_int_header(self): + # GH 16338 + msg = 'header must be integer or list of integers' + data = """1,2\n3,4""" + with tm.assert_raises_regex(ValueError, msg): + self.read_csv(StringIO(data), sep=',', header=['a', 'b']) + with tm.assert_raises_regex(ValueError, msg): + self.read_csv(StringIO(data), sep=',', header='string_header') From 97ad3fb9c87226ad983267e2891dbbf68432b8ea Mon Sep 17 00:00:00 2001 From: Pankaj Pandey Date: Wed, 24 May 2017 17:36:05 +0000 Subject: [PATCH 603/933] BUG: Fix warning with c engine when skipping lines with comment (#16455) * Fix correct warning with c engine when skipping lines Fixed bug where c engine would not print warnings for lines it skipped in case the skipped line had an inline comment. Also, its accounting of number of fields in such lines would be off by one. * Use `tm.capture_stderr` to capture stderr * Add bug fix note in `whatsnew/v0.20.3.txt` * Move test to CParserTests The behavior is only applicable on the `c` engine. * Update whatsnew bug entry as per review --- doc/source/whatsnew/v0.20.2.txt | 1 + pandas/_libs/src/parser/tokenizer.c | 3 +++ pandas/tests/io/parser/c_parser_only.py | 29 +++++++++++++++++++++++++ 3 files changed, 33 insertions(+) diff --git a/doc/source/whatsnew/v0.20.2.txt b/doc/source/whatsnew/v0.20.2.txt index 6d6a148ed025f..6c9728191f5b6 100644 --- a/doc/source/whatsnew/v0.20.2.txt +++ b/doc/source/whatsnew/v0.20.2.txt @@ -53,6 +53,7 @@ Indexing I/O ^^^ +- Bug in pd.read_csv() when comment is passed in space deliminted text files (:issue:`16472`) - Bug that would force importing of the clipboard routines unnecessarily, potentially causing an import error on startup (:issue:`16288`) - Bug that raised IndexError HTML-rendering an empty DataFrame (:issue:`15953`) diff --git a/pandas/_libs/src/parser/tokenizer.c b/pandas/_libs/src/parser/tokenizer.c index 6b0775e54da0c..be23ebb023383 100644 --- a/pandas/_libs/src/parser/tokenizer.c +++ b/pandas/_libs/src/parser/tokenizer.c @@ -832,6 +832,9 @@ int tokenize_bytes(parser_t *self, size_t line_limit, int start_lines) { } else if (IS_CARRIAGE(c)) { self->state = EAT_CRNL; break; + } else if (IS_COMMENT_CHAR(c)) { + self->state = EAT_COMMENT; + break; } else if (!IS_WHITESPACE(c)) { self->state = START_FIELD; // fall through to subsequent state diff --git a/pandas/tests/io/parser/c_parser_only.py b/pandas/tests/io/parser/c_parser_only.py index 3e7a648474bc3..56ac10404b7b2 100644 --- a/pandas/tests/io/parser/c_parser_only.py +++ b/pandas/tests/io/parser/c_parser_only.py @@ -7,6 +7,8 @@ further arguments when parsing. """ +import sys + import pytest import numpy as np @@ -417,3 +419,30 @@ def test_data_after_quote(self): expected = DataFrame({'a': ['1', 'ba']}) tm.assert_frame_equal(result, expected) + + @tm.capture_stderr + def test_comment_whitespace_delimited(self): + test_input = """\ +1 2 +2 2 3 +3 2 3 # 3 fields +4 2 3# 3 fields +5 2 # 2 fields +6 2# 2 fields +7 # 1 field, NaN +8# 1 field, NaN +9 2 3 # skipped line +# comment""" + df = self.read_csv(StringIO(test_input), comment='#', header=None, + delimiter='\\s+', skiprows=0, + error_bad_lines=False) + error = sys.stderr.getvalue() + # skipped lines 2, 3, 4, 9 + for line_num in (2, 3, 4, 9): + assert 'Skipping line {}'.format(line_num) in error, error + expected = DataFrame([[1, 2], + [5, 2], + [6, 2], + [7, np.nan], + [8, np.nan]]) + tm.assert_frame_equal(df, expected) From 0b6d483fef6ec0d12605a582284af6eed36eb682 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 24 May 2017 18:48:11 -0400 Subject: [PATCH 604/933] ENH: add ntrheads option to feather-format IO (#16476) ENH: add nthreads option to feather-format IO --- doc/source/whatsnew/v0.21.0.txt | 1 + pandas/io/feather_format.py | 13 +++++++++++-- pandas/tests/io/test_feather.py | 10 ++++++++-- 3 files changed, 20 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index be05bbb6bf09c..f1289c490e4fb 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -35,6 +35,7 @@ Other Enhancements - ``RangeIndex.append`` now returns a ``RangeIndex`` object when possible (:issue:`16212`) - :func:`to_pickle` has gained a protocol parameter (:issue:`16252`). By default, this parameter is set to `HIGHEST_PROTOCOL `__ - :func:`api.types.infer_dtype` now infers decimals. (:issue: `15690`) +- :func:`read_feather` has gained the ``nthreads`` parameter for multi-threaded operations (:issue:`16359`) .. _whatsnew_0210.api_breaking: diff --git a/pandas/io/feather_format.py b/pandas/io/feather_format.py index 8bdb23fc1ae6a..86d58caa5e816 100644 --- a/pandas/io/feather_format.py +++ b/pandas/io/feather_format.py @@ -43,6 +43,7 @@ def to_feather(df, path): df : DataFrame path : string File path + """ path = _stringify_path(path) if not isinstance(df, DataFrame): @@ -83,7 +84,7 @@ def to_feather(df, path): feather.write_dataframe(df, path) -def read_feather(path): +def read_feather(path, nthreads=1): """ Load a feather-format object from the file path @@ -93,6 +94,10 @@ def read_feather(path): ---------- path : string File path + nthreads : int, default 1 + Number of CPU threads to use when reading to pandas.DataFrame + + .. versionadded 0.21.0 Returns ------- @@ -102,4 +107,8 @@ def read_feather(path): feather = _try_import() path = _stringify_path(path) - return feather.read_dataframe(path) + + if feather.__version__ < LooseVersion('0.4.0'): + return feather.read_dataframe(path) + + return feather.read_dataframe(path, nthreads=nthreads) diff --git a/pandas/tests/io/test_feather.py b/pandas/tests/io/test_feather.py index 42ad9d3e0d8fe..dadfe7ca87e48 100644 --- a/pandas/tests/io/test_feather.py +++ b/pandas/tests/io/test_feather.py @@ -27,11 +27,11 @@ def check_error_on_write(self, df, exc): with ensure_clean() as path: to_feather(df, path) - def check_round_trip(self, df): + def check_round_trip(self, df, **kwargs): with ensure_clean() as path: to_feather(df, path) - result = read_feather(path) + result = read_feather(path, **kwargs) assert_frame_equal(result, df) def test_error(self): @@ -98,6 +98,12 @@ def test_unsupported_other(self): df = pd.DataFrame({'a': pd.period_range('2013', freq='M', periods=3)}) self.check_error_on_write(df, ValueError) + @pytest.mark.skipif(fv < '0.4.0', reason='new in 0.4.0') + def test_rw_nthreads(self): + + df = pd.DataFrame({'A': np.arange(100000)}) + self.check_round_trip(df, nthreads=2) + def test_write_with_index(self): df = pd.DataFrame({'A': [1, 2, 3]}) From 58775f700876ada59f6b6f797f48573344bbf0ff Mon Sep 17 00:00:00 2001 From: Vincent La Date: Wed, 24 May 2017 16:03:45 -0700 Subject: [PATCH 605/933] Adding filter to api.rst (#16484) --- doc/source/api.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/api.rst b/doc/source/api.rst index cb5136df1ff8b..83abd84d36eb4 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -1738,6 +1738,7 @@ application to columns of a specific data type. DataFrameGroupBy.diff DataFrameGroupBy.ffill DataFrameGroupBy.fillna + DataFrameGroupBy.filter DataFrameGroupBy.hist DataFrameGroupBy.idxmax DataFrameGroupBy.idxmin From 85080aaf332711dbaebf4b4b266df053ccc6b52c Mon Sep 17 00:00:00 2001 From: Patrick Luo Date: Wed, 24 May 2017 19:15:32 -0400 Subject: [PATCH 606/933] BUG: handle nan values in DataFrame.update when overwrite=False (#15593) (#16430) --- doc/source/whatsnew/v0.20.2.txt | 1 + pandas/core/frame.py | 8 ++++---- pandas/tests/frame/test_combine_concat.py | 22 ++++++++++++++++++++++ 3 files changed, 27 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v0.20.2.txt b/doc/source/whatsnew/v0.20.2.txt index 6c9728191f5b6..9f713ef1ccfb8 100644 --- a/doc/source/whatsnew/v0.20.2.txt +++ b/doc/source/whatsnew/v0.20.2.txt @@ -37,6 +37,7 @@ Bug Fixes ~~~~~~~~~ - Bug in using ``pathlib.Path`` or ``py.path.local`` objects with io functions (:issue:`16291`) +- Bug in ``DataFrame.update()`` with ``overwrite=False`` and ``NaN values`` (:issue:`15593`) Conversion ^^^^^^^^^^ diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 78a369761afc1..beea0e9d3d8ad 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3921,13 +3921,13 @@ def update(self, other, join='left', overwrite=True, filter_func=None, if overwrite: mask = isnull(that) - - # don't overwrite columns unecessarily - if mask.all(): - continue else: mask = notnull(this) + # don't overwrite columns unecessarily + if mask.all(): + continue + self[col] = expressions.where(mask, this, that, raise_on_error=True) diff --git a/pandas/tests/frame/test_combine_concat.py b/pandas/tests/frame/test_combine_concat.py index 688cacdee263e..f32efccf85fc6 100644 --- a/pandas/tests/frame/test_combine_concat.py +++ b/pandas/tests/frame/test_combine_concat.py @@ -763,3 +763,25 @@ def test_concat_datetime_datetime64_frame(self): # it works! pd.concat([df1, df2_obj]) + + +class TestDataFrameUpdate(TestData): + + def test_update_nan(self): + # #15593 #15617 + # test 1 + df1 = DataFrame({'A': [1.0, 2, 3], 'B': date_range('2000', periods=3)}) + df2 = DataFrame({'A': [None, 2, 3]}) + expected = df1.copy() + df1.update(df2, overwrite=False) + + tm.assert_frame_equal(df1, expected) + + # test 2 + df1 = DataFrame({'A': [1.0, None, 3], 'B': date_range('2000', periods=3)}) + df2 = DataFrame({'A': [None, 2, 3]}) + expected = DataFrame({'A': [1.0, 2, 3], 'B': date_range('2000', periods=3)}) + df1.update(df2, overwrite=False) + + tm.assert_frame_equal(df1, expected) + From 05d0667169e4b770cfad94f4a19c1d6ae9a98536 Mon Sep 17 00:00:00 2001 From: RobinFiveWords Date: Wed, 24 May 2017 19:19:06 -0400 Subject: [PATCH 607/933] BUG: reshape fix for maybe_infer_to_datetimelike() closes #16362 Author: RobinFiveWords Closes #16395 from RobinFiveWords/cast-infer-datetime-reshape-fix and squashes the following commits: 7ad1e7d [RobinFiveWords] redid lost changes to cast.py and test_cast.py afa2eeb [RobinFiveWords] added whatsnew0.20.2 entry 7a35624 [RobinFiveWords] removed whatsnew entry again 2ec60a6 [RobinFiveWords] added back whatsnew change --- doc/source/whatsnew/v0.20.2.txt | 1 + pandas/core/dtypes/cast.py | 2 +- pandas/tests/dtypes/test_cast.py | 13 ++++++++++++- 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.20.2.txt b/doc/source/whatsnew/v0.20.2.txt index 9f713ef1ccfb8..b8810f5ee314c 100644 --- a/doc/source/whatsnew/v0.20.2.txt +++ b/doc/source/whatsnew/v0.20.2.txt @@ -86,6 +86,7 @@ Reshaping - Bug in ``DataFrame.stack`` with unsorted levels in MultiIndex columns (:issue:`16323`) - Bug in ``pd.wide_to_long()`` where no error was raised when ``i`` was not a unique identifier (:issue:`16382`) - Bug in ``Series.isin(..)`` with a list of tuples (:issue:`16394`) +- Bug in construction of a ``DataFrame`` with mixed dtypes including an all-NaT column. (:issue:`16395`) Numeric diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 19d3792f73de7..fd61813a57c98 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -837,7 +837,7 @@ def try_timedelta(v): try: return to_timedelta(v)._values.reshape(shape) except: - return v + return v.reshape(shape) inferred_type = lib.infer_datetimelike_array(_ensure_object(v)) diff --git a/pandas/tests/dtypes/test_cast.py b/pandas/tests/dtypes/test_cast.py index e92724a5d9cd4..767e99d98cf29 100644 --- a/pandas/tests/dtypes/test_cast.py +++ b/pandas/tests/dtypes/test_cast.py @@ -9,7 +9,7 @@ from datetime import datetime, timedelta, date import numpy as np -from pandas import Timedelta, Timestamp, DatetimeIndex +from pandas import Timedelta, Timestamp, DatetimeIndex, DataFrame, NaT from pandas.core.dtypes.cast import ( maybe_downcast_to_dtype, @@ -213,6 +213,17 @@ def test_maybe_convert_scalar(self): result = maybe_convert_scalar(Timedelta('1 day 1 min')) assert result == Timedelta('1 day 1 min').value + def test_maybe_infer_to_datetimelike(self): + # GH16362 + # pandas=0.20.1 raises IndexError: tuple index out of range + result = DataFrame(np.array([[NaT, 'a', 'b', 0], + [NaT, 'b', 'c', 1]])) + assert result.size == 8 + # this construction was fine + result = DataFrame(np.array([[NaT, 'a', 0], + [NaT, 'b', 1]])) + assert result.size == 6 + class TestConvert(object): From b0a51df89e40691608bb8d9aa80f2d7e4861b9e1 Mon Sep 17 00:00:00 2001 From: lloydkirk Date: Wed, 24 May 2017 23:24:14 +0000 Subject: [PATCH 608/933] Error with .drop([]) on non-unique index (#16428) --- doc/source/whatsnew/v0.20.2.txt | 2 ++ pandas/core/generic.py | 3 ++- pandas/tests/frame/test_axis_select_reindex.py | 6 ++++++ 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.20.2.txt b/doc/source/whatsnew/v0.20.2.txt index b8810f5ee314c..cce30b60dac55 100644 --- a/doc/source/whatsnew/v0.20.2.txt +++ b/doc/source/whatsnew/v0.20.2.txt @@ -100,3 +100,5 @@ Categorical Other ^^^^^ + +- Bug in ``pd.drop([])`` for DataFrame with non-unique indices (:issue:`16270`) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 2e7d8693d48dd..b19a0751ea2e4 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -12,6 +12,7 @@ from pandas._libs import tslib, lib from pandas.core.dtypes.common import ( _ensure_int64, + _ensure_object, needs_i8_conversion, is_scalar, is_number, @@ -2076,7 +2077,7 @@ def drop(self, labels, axis=0, level=None, inplace=False, errors='raise'): result = dropped else: - labels = com._index_labels_to_array(labels) + labels = _ensure_object(com._index_labels_to_array(labels)) if level is not None: if not isinstance(axis, MultiIndex): raise AssertionError('axis must be a MultiIndex') diff --git a/pandas/tests/frame/test_axis_select_reindex.py b/pandas/tests/frame/test_axis_select_reindex.py index a6326083c1bee..87d942101f5f1 100644 --- a/pandas/tests/frame/test_axis_select_reindex.py +++ b/pandas/tests/frame/test_axis_select_reindex.py @@ -61,6 +61,11 @@ def test_drop_names(self): expected = Index(['e', 'f'], name='second') tm.assert_index_equal(dropped.columns, expected) + # GH 16398 + dropped = df.drop([], errors='ignore') + expected = Index(['a', 'b', 'c'], name='first') + tm.assert_index_equal(dropped.index, expected) + def test_drop_col_still_multiindex(self): arrays = [['a', 'b', 'c', 'top'], ['', '', '', 'OD'], @@ -100,6 +105,7 @@ def test_drop(self): columns=['a', 'a', 'b']) assert_frame_equal(nu_df.drop('a', axis=1), nu_df[['b']]) assert_frame_equal(nu_df.drop('b', axis='columns'), nu_df['a']) + assert_frame_equal(nu_df.drop([]), nu_df) # GH 16398 nu_df = nu_df.set_index(pd.Index(['X', 'Y', 'X'])) nu_df.columns = list('abc') From 96f3e7ceb1363a475f0843045ca282df45e631dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andrew=20=E4=BA=AE?= Date: Wed, 24 May 2017 19:09:25 -0700 Subject: [PATCH 609/933] BUG: Silence numpy warnings when broadcasting comparison ops (GH16378, GH16306) (#16433) TST: test for fix of GH16378, GH16306 --- doc/source/whatsnew/v0.20.2.txt | 1 + pandas/core/ops.py | 3 ++- pandas/tests/frame/test_analytics.py | 13 +++++++++++++ 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.20.2.txt b/doc/source/whatsnew/v0.20.2.txt index cce30b60dac55..86e7812765b40 100644 --- a/doc/source/whatsnew/v0.20.2.txt +++ b/doc/source/whatsnew/v0.20.2.txt @@ -43,6 +43,7 @@ Conversion ^^^^^^^^^^ - Bug in ``pd.to_numeric()`` in which empty data inputs were causing Python to crash (:issue:`16302`) +- Silence numpy warnings when broadcasting DataFrame to Series with comparison ops (:issue:`16378`, :issue:`16306`) Indexing diff --git a/pandas/core/ops.py b/pandas/core/ops.py index e7cfbdb0fc9c6..55473ec8d7cad 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -1250,7 +1250,8 @@ def _flex_comp_method_FRAME(op, name, str_rep=None, default_axis='columns', masker=False): def na_op(x, y): try: - result = op(x, y) + with np.errstate(invalid='ignore'): + result = op(x, y) except TypeError: xrav = x.ravel() result = np.empty(x.size, dtype=bool) diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index da96fce36f3c9..73c6c0e7279a6 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -2081,3 +2081,16 @@ def test_n_duplicate_index(self, df_duplicates, n, order): result = df.nlargest(n, order) expected = df.sort_values(order, ascending=False).head(n) tm.assert_frame_equal(result, expected) + + def test_series_broadcasting(self): + # smoke test for numpy warnings + # GH 16378, GH 16306 + df = DataFrame([1.0, 1.0, 1.0]) + df_nan = DataFrame({'A': [np.nan, 2.0, np.nan]}) + s = Series([1, 1, 1]) + s_nan = Series([np.nan, np.nan, 1]) + + with tm.assert_produces_warning(None): + df_nan.clip_lower(s, axis=0) + for op in ['lt', 'le', 'gt', 'ge', 'eq', 'ne']: + getattr(df, op)(s_nan, axis=0) From cfcf07bc0b30044038ab1d1aa28416a8416f2b0b Mon Sep 17 00:00:00 2001 From: Paula Date: Wed, 24 May 2017 19:19:33 -0700 Subject: [PATCH 610/933] DOC: Added set_index examples (#16467) --- pandas/core/frame.py | 41 ++++++++++++++++++++++++++++++++++++++--- 1 file changed, 38 insertions(+), 3 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index beea0e9d3d8ad..48cb1ca2ebd2a 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2947,9 +2947,44 @@ def set_index(self, keys, drop=True, append=False, inplace=False, Examples -------- - >>> indexed_df = df.set_index(['A', 'B']) - >>> indexed_df2 = df.set_index(['A', [0, 1, 2, 0, 1, 2]]) - >>> indexed_df3 = df.set_index([[0, 1, 2, 0, 1, 2]]) + >>> df = pd.DataFrame({'month': [1, 4, 7, 10], + ... 'year': [2012, 2014, 2013, 2014], + ... 'sale':[55, 40, 84, 31]}) + month sale year + 0 1 55 2012 + 1 4 40 2014 + 2 7 84 2013 + 3 10 31 2014 + + Set the index to become the 'month' column: + + >>> df.set_index('month') + sale year + month + 1 55 2012 + 4 40 2014 + 7 84 2013 + 10 31 2014 + + Create a multi-index using columns 'year' and 'month': + + >>> df.set_index(['year', 'month']) + sale + year month + 2012 1 55 + 2014 4 40 + 2013 7 84 + 2014 10 31 + + Create a multi-index using a set of values and a column: + + >>> df.set_index([[1, 2, 3, 4], 'year']) + month sale + year + 1 2012 1 55 + 2 2014 4 40 + 3 2013 7 84 + 4 2014 10 31 Returns ------- From d7962c50df5edcee19d6fce80a030eef9a3c7ae4 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 24 May 2017 22:24:05 -0500 Subject: [PATCH 611/933] CLN: Small linting failures (#16491) --- pandas/tests/frame/test_combine_concat.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/tests/frame/test_combine_concat.py b/pandas/tests/frame/test_combine_concat.py index f32efccf85fc6..e82faaeef2986 100644 --- a/pandas/tests/frame/test_combine_concat.py +++ b/pandas/tests/frame/test_combine_concat.py @@ -778,10 +778,11 @@ def test_update_nan(self): tm.assert_frame_equal(df1, expected) # test 2 - df1 = DataFrame({'A': [1.0, None, 3], 'B': date_range('2000', periods=3)}) + df1 = DataFrame({'A': [1.0, None, 3], + 'B': date_range('2000', periods=3)}) df2 = DataFrame({'A': [None, 2, 3]}) - expected = DataFrame({'A': [1.0, 2, 3], 'B': date_range('2000', periods=3)}) + expected = DataFrame({'A': [1.0, 2, 3], + 'B': date_range('2000', periods=3)}) df1.update(df2, overwrite=False) tm.assert_frame_equal(df1, expected) - From 348afebfdd97d3e630b628c9085dd971e4336a5c Mon Sep 17 00:00:00 2001 From: guygoldberg Date: Thu, 25 May 2017 13:21:48 +0300 Subject: [PATCH 612/933] ENH: Support inplace clip (#15388) (#16462) --- doc/source/whatsnew/v0.21.0.txt | 1 + pandas/core/generic.py | 51 ++++++++++++++++++++-------- pandas/tests/frame/test_analytics.py | 33 +++++++++++++++--- 3 files changed, 67 insertions(+), 18 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index f1289c490e4fb..b4ca3f011a81d 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -36,6 +36,7 @@ Other Enhancements - :func:`to_pickle` has gained a protocol parameter (:issue:`16252`). By default, this parameter is set to `HIGHEST_PROTOCOL `__ - :func:`api.types.infer_dtype` now infers decimals. (:issue: `15690`) - :func:`read_feather` has gained the ``nthreads`` parameter for multi-threaded operations (:issue:`16359`) +- :func:`DataFrame.clip()` and :func: `Series.cip()` have gained an inplace argument. (:issue: `15388`) .. _whatsnew_0210.api_breaking: diff --git a/pandas/core/generic.py b/pandas/core/generic.py index b19a0751ea2e4..f8da6851d18bc 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -4120,8 +4120,7 @@ def isnull(self): def notnull(self): return notnull(self).__finalize__(self) - def _clip_with_scalar(self, lower, upper): - + def _clip_with_scalar(self, lower, upper, inplace=False): if ((lower is not None and np.any(isnull(lower))) or (upper is not None and np.any(isnull(upper)))): raise ValueError("Cannot use an NA value as a clip threshold") @@ -4137,10 +4136,16 @@ def _clip_with_scalar(self, lower, upper): if np.any(mask): result[mask] = np.nan - return self._constructor( - result, **self._construct_axes_dict()).__finalize__(self) + axes_dict = self._construct_axes_dict() + result = self._constructor(result, **axes_dict).__finalize__(self) + + if inplace: + self._update_inplace(result) + else: + return result - def clip(self, lower=None, upper=None, axis=None, *args, **kwargs): + def clip(self, lower=None, upper=None, axis=None, inplace=False, + *args, **kwargs): """ Trim values at input threshold(s). @@ -4150,6 +4155,9 @@ def clip(self, lower=None, upper=None, axis=None, *args, **kwargs): upper : float or array_like, default None axis : int or string axis name, optional Align object with lower and upper along the given axis. + inplace : boolean, default False + Whether to perform the operation in place on the data + .. versionadded:: 0.21.0 Returns ------- @@ -4192,6 +4200,8 @@ def clip(self, lower=None, upper=None, axis=None, *args, **kwargs): if isinstance(self, ABCPanel): raise NotImplementedError("clip is not supported yet for panels") + inplace = validate_bool_kwarg(inplace, 'inplace') + axis = nv.validate_clip_with_axis(axis, args, kwargs) # GH 2747 (arguments were reversed) @@ -4202,17 +4212,20 @@ def clip(self, lower=None, upper=None, axis=None, *args, **kwargs): # fast-path for scalars if ((lower is None or (is_scalar(lower) and is_number(lower))) and (upper is None or (is_scalar(upper) and is_number(upper)))): - return self._clip_with_scalar(lower, upper) + return self._clip_with_scalar(lower, upper, inplace=inplace) result = self if lower is not None: - result = result.clip_lower(lower, axis) + result = result.clip_lower(lower, axis, inplace=inplace) if upper is not None: - result = result.clip_upper(upper, axis) + if inplace: + result = self + + result = result.clip_upper(upper, axis, inplace=inplace) return result - def clip_upper(self, threshold, axis=None): + def clip_upper(self, threshold, axis=None, inplace=False): """ Return copy of input with values above given value(s) truncated. @@ -4221,6 +4234,9 @@ def clip_upper(self, threshold, axis=None): threshold : float or array_like axis : int or string axis name, optional Align object with threshold along the given axis. + inplace : boolean, default False + Whether to perform the operation in place on the data + .. versionadded:: 0.21.0 See Also -------- @@ -4234,12 +4250,14 @@ def clip_upper(self, threshold, axis=None): raise ValueError("Cannot use an NA value as a clip threshold") if is_scalar(threshold) and is_number(threshold): - return self._clip_with_scalar(None, threshold) + return self._clip_with_scalar(None, threshold, inplace=inplace) + + inplace = validate_bool_kwarg(inplace, 'inplace') subset = self.le(threshold, axis=axis) | isnull(self) - return self.where(subset, threshold, axis=axis) + return self.where(subset, threshold, axis=axis, inplace=inplace) - def clip_lower(self, threshold, axis=None): + def clip_lower(self, threshold, axis=None, inplace=False): """ Return copy of the input with values below given value(s) truncated. @@ -4248,6 +4266,9 @@ def clip_lower(self, threshold, axis=None): threshold : float or array_like axis : int or string axis name, optional Align object with threshold along the given axis. + inplace : boolean, default False + Whether to perform the operation in place on the data + .. versionadded:: 0.21.0 See Also -------- @@ -4261,10 +4282,12 @@ def clip_lower(self, threshold, axis=None): raise ValueError("Cannot use an NA value as a clip threshold") if is_scalar(threshold) and is_number(threshold): - return self._clip_with_scalar(threshold, None) + return self._clip_with_scalar(threshold, None, inplace=inplace) + + inplace = validate_bool_kwarg(inplace, 'inplace') subset = self.ge(threshold, axis=axis) | isnull(self) - return self.where(subset, threshold, axis=axis) + return self.where(subset, threshold, axis=axis, inplace=inplace) def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True, group_keys=True, squeeze=False, **kwargs): diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index 73c6c0e7279a6..943a93b27a78a 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -1807,6 +1807,7 @@ def test_built_in_round(self): def test_clip(self): median = self.frame.median().median() + original = self.frame.copy() capped = self.frame.clip_upper(median) assert not (capped.values > median).any() @@ -1817,6 +1818,25 @@ def test_clip(self): double = self.frame.clip(upper=median, lower=median) assert not (double.values != median).any() + # Verify that self.frame was not changed inplace + assert (self.frame.values == original.values).all() + + def test_inplace_clip(self): + # GH #15388 + median = self.frame.median().median() + frame_copy = self.frame.copy() + + frame_copy.clip_upper(median, inplace=True) + assert not (frame_copy.values > median).any() + frame_copy = self.frame.copy() + + frame_copy.clip_lower(median, inplace=True) + assert not (frame_copy.values < median).any() + frame_copy = self.frame.copy() + + frame_copy.clip(upper=median, lower=median, inplace=True) + assert not (frame_copy.values != median).any() + def test_dataframe_clip(self): # GH #2747 df = DataFrame(np.random.randn(1000, 2)) @@ -1843,18 +1863,23 @@ def test_clip_mixed_numeric(self): 'B': [1., np.nan, 2.]}) tm.assert_frame_equal(result, expected, check_like=True) - def test_clip_against_series(self): + @pytest.mark.parametrize("inplace", [True, False]) + def test_clip_against_series(self, inplace): # GH #6966 df = DataFrame(np.random.randn(1000, 2)) lb = Series(np.random.randn(1000)) ub = lb + 1 - clipped_df = df.clip(lb, ub, axis=0) + original = df.copy() + clipped_df = df.clip(lb, ub, axis=0, inplace=inplace) + + if inplace: + clipped_df = df for i in range(2): - lb_mask = df.iloc[:, i] <= lb - ub_mask = df.iloc[:, i] >= ub + lb_mask = original.iloc[:, i] <= lb + ub_mask = original.iloc[:, i] >= ub mask = ~lb_mask & ~ub_mask result = clipped_df.loc[lb_mask, i] From e41fe7f52a7ae6be962e683f40500624b2ba2cf6 Mon Sep 17 00:00:00 2001 From: chernrick Date: Thu, 25 May 2017 03:23:19 -0700 Subject: [PATCH 613/933] 15819 rolling window on empty df (#16431) --- doc/source/whatsnew/v0.20.2.txt | 3 ++- pandas/core/window.py | 2 +- pandas/tests/test_window.py | 32 ++++++++++++++++++++++++++++++++ 3 files changed, 35 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.20.2.txt b/doc/source/whatsnew/v0.20.2.txt index 86e7812765b40..1f71710d19e44 100644 --- a/doc/source/whatsnew/v0.20.2.txt +++ b/doc/source/whatsnew/v0.20.2.txt @@ -39,6 +39,7 @@ Bug Fixes - Bug in using ``pathlib.Path`` or ``py.path.local`` objects with io functions (:issue:`16291`) - Bug in ``DataFrame.update()`` with ``overwrite=False`` and ``NaN values`` (:issue:`15593`) + Conversion ^^^^^^^^^^ @@ -73,7 +74,7 @@ Plotting Groupby/Resample/Rolling ^^^^^^^^^^^^^^^^^^^^^^^^ - +- Bug creating datetime rolling window on an empty DataFrame (:issue:`15819`) Sparse diff --git a/pandas/core/window.py b/pandas/core/window.py index df8e0c05009f4..cf1bad706ae1d 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -1074,7 +1074,7 @@ def validate(self): super(Rolling, self).validate() # we allow rolling on a datetimelike index - if (self.is_datetimelike and + if ((self.obj.empty or self.is_datetimelike) and isinstance(self.window, (compat.string_types, DateOffset, timedelta))): diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index 634cd5fe2586b..6a640d62108b3 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -441,6 +441,20 @@ def test_closed(self): with pytest.raises(ValueError): df.rolling(window=3, closed='neither') + @pytest.mark.parametrize('roller', ['1s', 1]) + def tests_empty_df_rolling(self, roller): + # GH 15819 Verifies that datetime and integer rolling windows can be + # applied to empty DataFrames + expected = DataFrame() + result = DataFrame().rolling(roller).sum() + tm.assert_frame_equal(result, expected) + + # Verifies that datetime and integer rolling windows can be applied to + # empty DataFrames with datetime index + expected = DataFrame(index=pd.DatetimeIndex([])) + result = DataFrame(index=pd.DatetimeIndex([])).rolling(roller).sum() + tm.assert_frame_equal(result, expected) + class TestExpanding(Base): @@ -483,6 +497,24 @@ def test_numpy_compat(self): tm.assert_raises_regex(UnsupportedFunctionCall, msg, getattr(e, func), dtype=np.float64) + @pytest.mark.parametrize( + 'expander', + [1, pytest.mark.xfail( + reason='GH 16425 expanding with offset not supported')('1s')]) + def tests_empty_df_expanding(self, expander): + # GH 15819 Verifies that datetime and integer expanding windows can be + # applied to empty DataFrames + expected = DataFrame() + result = DataFrame().expanding(expander).sum() + tm.assert_frame_equal(result, expected) + + # Verifies that datetime and integer expanding windows can be applied + # to empty DataFrames with datetime index + expected = DataFrame(index=pd.DatetimeIndex([])) + result = DataFrame( + index=pd.DatetimeIndex([])).expanding(expander).sum() + tm.assert_frame_equal(result, expected) + class TestEWM(Base): From e81f3cc30725443a1d36ec27279f3fec4eed60ec Mon Sep 17 00:00:00 2001 From: Vincent La Date: Thu, 25 May 2017 10:44:37 -0700 Subject: [PATCH 614/933] DOC: Adding docstring examples (#16437) * Adding examples to fillna * putting better spacing * removing unnecessary values variable * Adding an example for method ffill * adding examples for drop method * Adding examples for sort_values * Fixing linter error moving fillna to generic and fixing a typo --- pandas/core/generic.py | 143 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 143 insertions(+) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index f8da6851d18bc..da79912f7bd9e 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2056,6 +2056,37 @@ def drop(self, labels, axis=0, level=None, inplace=False, errors='raise'): Returns ------- dropped : type of caller + + Examples + -------- + >>> df = pd.DataFrame([[1, 2, 3, 4], + ... [5, 6, 7, 8], + ... [9, 1, 2, 3], + ... [4, 5, 6, 7] + ... ], + ... columns=list('ABCD')) + >>> df + A B C D + 0 1 2 3 4 + 1 5 6 7 8 + 2 9 1 2 3 + 3 4 5 6 7 + + Drop a row by index + + >>> df.drop([0, 1]) + A B C D + 2 9 1 2 3 + 3 4 5 6 7 + + Drop columns + + >>> df.drop(['A', 'B'], axis=1) + C D + 0 3 4 + 1 7 8 + 2 2 3 + 3 6 7 """ inplace = validate_bool_kwarg(inplace, 'inplace') axis = self._get_axis_number(axis) @@ -2169,6 +2200,66 @@ def add_suffix(self, suffix): Returns ------- sorted_obj : %(klass)s + + Examples + -------- + >>> df = pd.DataFrame({ + ... 'col1' : ['A', 'A', 'B', np.nan, 'D', 'C'], + ... 'col2' : [2, 1, 9, 8, 7, 4], + ... 'col3': [0, 1, 9, 4, 2, 3], + ... }) + >>> df + col1 col2 col3 + 0 A 2 0 + 1 A 1 1 + 2 B 9 9 + 3 NaN 8 4 + 4 D 7 2 + 5 C 4 3 + + Sort by col1 + + >>> df.sort_values(by=['col1']) + col1 col2 col3 + 0 A 2 0 + 1 A 1 1 + 2 B 9 9 + 5 C 4 3 + 4 D 7 2 + 3 NaN 8 4 + + Sort by multiple columns + + >>> df.sort_values(by=['col1', 'col2']) + col1 col2 col3 + 1 A 1 1 + 0 A 2 0 + 2 B 9 9 + 5 C 4 3 + 4 D 7 2 + 3 NaN 8 4 + + Sort Descending + + >>> df.sort_values(by='col1', ascending=False) + col1 col2 col3 + 4 D 7 2 + 5 C 4 3 + 2 B 9 9 + 0 A 2 0 + 1 A 1 1 + 3 NaN 8 4 + + Putting NAs first + + >>> df.sort_values(by='col1', ascending=False, na_position='first') + col1 col2 col3 + 3 NaN 8 4 + 4 D 7 2 + 5 C 4 3 + 2 B 9 9 + 0 A 2 0 + 1 A 1 1 """ def sort_values(self, by, axis=0, ascending=True, inplace=False, @@ -3469,6 +3560,58 @@ def convert_objects(self, convert_dates=True, convert_numeric=False, Returns ------- filled : %(klass)s + + Examples + -------- + >>> df = pd.DataFrame([[np.nan, 2, np.nan, 0], + ... [3, 4, np.nan, 1], + ... [np.nan, np.nan, np.nan, 5], + ... [np.nan, 3, np.nan, 4]], + ... columns=list('ABCD')) + >>> df + A B C D + 0 NaN 2.0 NaN 0 + 1 3.0 4.0 NaN 1 + 2 NaN NaN NaN 5 + 3 NaN 3.0 NaN 4 + + Replace all NaN elements with 0s. + + >>> df.fillna(0) + A B C D + 0 0.0 2.0 0.0 0 + 1 3.0 4.0 0.0 1 + 2 0.0 0.0 0.0 5 + 3 0.0 3.0 0.0 4 + + We can also propagate non-null values forward or backward. + + >>> df.fillna(method='ffill') + A B C D + 0 NaN 2.0 NaN 0 + 1 3.0 4.0 NaN 1 + 2 3.0 4.0 NaN 5 + 3 3.0 3.0 NaN 4 + + Replace all NaN elements in column 'A', 'B', 'C', and 'D', with 0, 1, + 2, and 3 respectively. + + >>> values = {'A': 0, 'B': 1, 'C': 2, 'D': 3} + >>> df.fillna(value=values) + A B C D + 0 0.0 2.0 2.0 0 + 1 3.0 4.0 2.0 1 + 2 0.0 1.0 2.0 5 + 3 0.0 3.0 2.0 4 + + Only replace the first NaN element. + + >>> df.fillna(value=values, limit=1) + A B C D + 0 0.0 2.0 2.0 0 + 1 3.0 4.0 NaN 1 + 2 NaN 1.0 NaN 5 + 3 NaN 3.0 NaN 4 """) @Appender(_shared_docs['fillna'] % _shared_doc_kwargs) From 6a6227d19b6aa522d6ae5362da0589a45913d7bd Mon Sep 17 00:00:00 2001 From: Becky Sweger Date: Fri, 26 May 2017 07:47:44 -0400 Subject: [PATCH 615/933] ENH: Add to_latex() method to Series (#16180) (#16465) * ENH: Add to_latex() method to Series (#16180) This changeset adds _repr_latex_ to the Series class and moves the to_latex() method from the DataFrame class to the NDFrame class. * Add Series to_latex test * Move _repr_latex_ to NDFrame Streamline things a bit by moving _repr_latex_ methods out of the Series and DataFrame classes * DOC: Added versionchanged --- doc/source/api.rst | 1 + doc/source/whatsnew/v0.20.2.txt | 1 + pandas/core/frame.py | 98 --------------------- pandas/core/generic.py | 106 ++++++++++++++++++++++- pandas/tests/io/formats/test_to_latex.py | 17 +++- pandas/tests/series/test_repr.py | 20 ++++- 6 files changed, 142 insertions(+), 101 deletions(-) diff --git a/doc/source/api.rst b/doc/source/api.rst index 83abd84d36eb4..888bb6d67e94b 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -724,6 +724,7 @@ Serialization / IO / Conversion Series.to_dense Series.to_string Series.to_clipboard + Series.to_latex Sparse ~~~~~~ diff --git a/doc/source/whatsnew/v0.20.2.txt b/doc/source/whatsnew/v0.20.2.txt index 1f71710d19e44..5c9a4bf46758c 100644 --- a/doc/source/whatsnew/v0.20.2.txt +++ b/doc/source/whatsnew/v0.20.2.txt @@ -20,6 +20,7 @@ Enhancements ~~~~~~~~~~~~ - Unblocked access to additional compression types supported in pytables: 'blosc:blosclz, 'blosc:lz4', 'blosc:lz4hc', 'blosc:snappy', 'blosc:zlib', 'blosc:zstd' (:issue:`14478`) +- ``Series`` provides a ``to_latex`` method (:issue:`16180`) .. _whatsnew_0202.performance: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 48cb1ca2ebd2a..743d623ee5e44 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -634,16 +634,6 @@ def _repr_html_(self): else: return None - def _repr_latex_(self): - """ - Returns a LaTeX representation for a particular Dataframe. - Mainly for use with nbconvert (jupyter notebook conversion to pdf). - """ - if get_option('display.latex.repr'): - return self.to_latex() - else: - return None - @property def style(self): """ @@ -1663,94 +1653,6 @@ def to_html(self, buf=None, columns=None, col_space=None, header=True, if buf is None: return formatter.buf.getvalue() - @Substitution(header='Write out column names. If a list of string is given, \ -it is assumed to be aliases for the column names.') - @Appender(fmt.common_docstring + fmt.return_docstring, indents=1) - def to_latex(self, buf=None, columns=None, col_space=None, header=True, - index=True, na_rep='NaN', formatters=None, float_format=None, - sparsify=None, index_names=True, bold_rows=True, - column_format=None, longtable=None, escape=None, - encoding=None, decimal='.', multicolumn=None, - multicolumn_format=None, multirow=None): - r""" - Render a DataFrame to a tabular environment table. You can splice - this into a LaTeX document. Requires \usepackage{booktabs}. - - `to_latex`-specific options: - - bold_rows : boolean, default True - Make the row labels bold in the output - column_format : str, default None - The columns format as specified in `LaTeX table format - `__ e.g 'rcl' for 3 - columns - longtable : boolean, default will be read from the pandas config module - Default: False. - Use a longtable environment instead of tabular. Requires adding - a \usepackage{longtable} to your LaTeX preamble. - escape : boolean, default will be read from the pandas config module - Default: True. - When set to False prevents from escaping latex special - characters in column names. - encoding : str, default None - A string representing the encoding to use in the output file, - defaults to 'ascii' on Python 2 and 'utf-8' on Python 3. - decimal : string, default '.' - Character recognized as decimal separator, e.g. ',' in Europe. - - .. versionadded:: 0.18.0 - - multicolumn : boolean, default True - Use \multicolumn to enhance MultiIndex columns. - The default will be read from the config module. - - .. versionadded:: 0.20.0 - - multicolumn_format : str, default 'l' - The alignment for multicolumns, similar to `column_format` - The default will be read from the config module. - - .. versionadded:: 0.20.0 - - multirow : boolean, default False - Use \multirow to enhance MultiIndex rows. - Requires adding a \usepackage{multirow} to your LaTeX preamble. - Will print centered labels (instead of top-aligned) - across the contained rows, separating groups via clines. - The default will be read from the pandas config module. - - .. versionadded:: 0.20.0 - - """ - # Get defaults from the pandas config - if longtable is None: - longtable = get_option("display.latex.longtable") - if escape is None: - escape = get_option("display.latex.escape") - if multicolumn is None: - multicolumn = get_option("display.latex.multicolumn") - if multicolumn_format is None: - multicolumn_format = get_option("display.latex.multicolumn_format") - if multirow is None: - multirow = get_option("display.latex.multirow") - - formatter = fmt.DataFrameFormatter(self, buf=buf, columns=columns, - col_space=col_space, na_rep=na_rep, - header=header, index=index, - formatters=formatters, - float_format=float_format, - bold_rows=bold_rows, - sparsify=sparsify, - index_names=index_names, - escape=escape, decimal=decimal) - formatter.to_latex(column_format=column_format, longtable=longtable, - encoding=encoding, multicolumn=multicolumn, - multicolumn_format=multicolumn_format, - multirow=multirow) - - if buf is None: - return formatter.buf.getvalue() - def info(self, verbose=None, buf=None, max_cols=None, memory_usage=None, null_counts=None): """ diff --git a/pandas/core/generic.py b/pandas/core/generic.py index da79912f7bd9e..e541f1532d0a0 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -46,7 +46,7 @@ import pandas.core.common as com import pandas.core.missing as missing from pandas.io.formats.printing import pprint_thing -from pandas.io.formats.format import format_percentiles +from pandas.io.formats.format import format_percentiles, DataFrameFormatter from pandas.tseries.frequencies import to_offset from pandas import compat from pandas.compat.numpy import function as nv @@ -1051,6 +1051,16 @@ def __setstate__(self, state): # ---------------------------------------------------------------------- # IO + def _repr_latex_(self): + """ + Returns a LaTeX representation for a particular object. + Mainly for use with nbconvert (jupyter notebook conversion to pdf). + """ + if config.get_option('display.latex.repr'): + return self.to_latex() + else: + return None + # ---------------------------------------------------------------------- # I/O Methods @@ -1503,6 +1513,100 @@ def to_xarray(self): coords=coords, ) + _shared_docs['to_latex'] = """ + Render an object to a tabular environment table. You can splice + this into a LaTeX document. Requires \\usepackage{booktabs}. + + .. versionchanged:: 0.20.2 + Added to Series + + `to_latex`-specific options: + + bold_rows : boolean, default True + Make the row labels bold in the output + column_format : str, default None + The columns format as specified in `LaTeX table format + `__ e.g 'rcl' for 3 + columns + longtable : boolean, default will be read from the pandas config module + Default: False. + Use a longtable environment instead of tabular. Requires adding + a \\usepackage{longtable} to your LaTeX preamble. + escape : boolean, default will be read from the pandas config module + Default: True. + When set to False prevents from escaping latex special + characters in column names. + encoding : str, default None + A string representing the encoding to use in the output file, + defaults to 'ascii' on Python 2 and 'utf-8' on Python 3. + decimal : string, default '.' + Character recognized as decimal separator, e.g. ',' in Europe. + + .. versionadded:: 0.18.0 + + multicolumn : boolean, default True + Use \multicolumn to enhance MultiIndex columns. + The default will be read from the config module. + + .. versionadded:: 0.20.0 + + multicolumn_format : str, default 'l' + The alignment for multicolumns, similar to `column_format` + The default will be read from the config module. + + .. versionadded:: 0.20.0 + + multirow : boolean, default False + Use \multirow to enhance MultiIndex rows. + Requires adding a \\usepackage{multirow} to your LaTeX preamble. + Will print centered labels (instead of top-aligned) + across the contained rows, separating groups via clines. + The default will be read from the pandas config module. + + .. versionadded:: 0.20.0 + """ + + @Substitution(header='Write out column names. If a list of string is given, \ +it is assumed to be aliases for the column names.') + @Appender(_shared_docs['to_latex'] % _shared_doc_kwargs) + def to_latex(self, buf=None, columns=None, col_space=None, header=True, + index=True, na_rep='NaN', formatters=None, float_format=None, + sparsify=None, index_names=True, bold_rows=True, + column_format=None, longtable=None, escape=None, + encoding=None, decimal='.', multicolumn=None, + multicolumn_format=None, multirow=None): + # Get defaults from the pandas config + if self.ndim == 1: + self = self.to_frame() + if longtable is None: + longtable = config.get_option("display.latex.longtable") + if escape is None: + escape = config.get_option("display.latex.escape") + if multicolumn is None: + multicolumn = config.get_option("display.latex.multicolumn") + if multicolumn_format is None: + multicolumn_format = config.get_option( + "display.latex.multicolumn_format") + if multirow is None: + multirow = config.get_option("display.latex.multirow") + + formatter = DataFrameFormatter(self, buf=buf, columns=columns, + col_space=col_space, na_rep=na_rep, + header=header, index=index, + formatters=formatters, + float_format=float_format, + bold_rows=bold_rows, + sparsify=sparsify, + index_names=index_names, + escape=escape, decimal=decimal) + formatter.to_latex(column_format=column_format, longtable=longtable, + encoding=encoding, multicolumn=multicolumn, + multicolumn_format=multicolumn_format, + multirow=multirow) + + if buf is None: + return formatter.buf.getvalue() + # ---------------------------------------------------------------------- # Fancy Indexing diff --git a/pandas/tests/io/formats/test_to_latex.py b/pandas/tests/io/formats/test_to_latex.py index 2542deb0cedf1..4ee77abb32c26 100644 --- a/pandas/tests/io/formats/test_to_latex.py +++ b/pandas/tests/io/formats/test_to_latex.py @@ -3,7 +3,7 @@ import pytest import pandas as pd -from pandas import DataFrame, compat +from pandas import DataFrame, compat, Series from pandas.util import testing as tm from pandas.compat import u import codecs @@ -491,3 +491,18 @@ def test_to_latex_decimal(self, frame): """ assert withindex_result == withindex_expected + + def test_to_latex_series(self): + s = Series(['a', 'b', 'c']) + withindex_result = s.to_latex() + withindex_expected = r"""\begin{tabular}{ll} +\toprule +{} & 0 \\ +\midrule +0 & a \\ +1 & b \\ +2 & c \\ +\bottomrule +\end{tabular} +""" + assert withindex_result == withindex_expected diff --git a/pandas/tests/series/test_repr.py b/pandas/tests/series/test_repr.py index 3af61b0a902d3..c22e2ca8e0dc8 100644 --- a/pandas/tests/series/test_repr.py +++ b/pandas/tests/series/test_repr.py @@ -8,7 +8,7 @@ import numpy as np import pandas as pd -from pandas import (Index, Series, DataFrame, date_range) +from pandas import (Index, Series, DataFrame, date_range, option_context) from pandas.core.index import MultiIndex from pandas.compat import lrange, range, u @@ -180,3 +180,21 @@ def test_timeseries_repr_object_dtype(self): ts2 = ts.iloc[np.random.randint(0, len(ts) - 1, 400)] repr(ts2).splitlines()[-1] + + def test_latex_repr(self): + result = r"""\begin{tabular}{ll} +\toprule +{} & 0 \\ +\midrule +0 & $\alpha$ \\ +1 & b \\ +2 & c \\ +\bottomrule +\end{tabular} +""" + with option_context('display.latex.escape', False, + 'display.latex.repr', True): + s = Series([r'$\alpha$', 'b', 'c']) + assert result == s._repr_latex_() + + assert s._repr_latex_() is None From 3c9a74bc869decd7be5e180ed65bef694553e2fb Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 26 May 2017 07:32:52 -0500 Subject: [PATCH 616/933] COMPAT: Catch warnings on tab-complete in IPy 6 (#16414) Properties may run code with Jedi completion in IPython 6 Closes https://github.com/pandas-dev/pandas/issues/16409 --- doc/source/whatsnew/v0.20.2.txt | 4 ++++ pandas/conftest.py | 10 ++++++++++ pandas/core/categorical.py | 7 +++++++ pandas/core/resample.py | 6 ++++++ pandas/tests/test_categorical.py | 11 +++++++++++ pandas/tests/test_resample.py | 17 +++++++++++++++-- 6 files changed, 53 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.20.2.txt b/doc/source/whatsnew/v0.20.2.txt index 5c9a4bf46758c..13365401f1d1c 100644 --- a/doc/source/whatsnew/v0.20.2.txt +++ b/doc/source/whatsnew/v0.20.2.txt @@ -41,6 +41,10 @@ Bug Fixes - Bug in ``DataFrame.update()`` with ``overwrite=False`` and ``NaN values`` (:issue:`15593`) + +- Fixed a compatibility issue with IPython 6.0's tab completion showing deprecation warnings on Categoricals (:issue:`16409`) + + Conversion ^^^^^^^^^^ diff --git a/pandas/conftest.py b/pandas/conftest.py index 1149fae3fc0b0..8a3ffe22242ac 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -45,3 +45,13 @@ def spmatrix(request): tm._skip_if_no_scipy() from scipy import sparse return getattr(sparse, request.param + '_matrix') + + +@pytest.fixture +def ip(): + """An instance of IPython.InteractiveShell. + Will raise a skip if IPython is not installed. + """ + pytest.importorskip('IPython', minversion="6.0.0") + from IPython.core.interactiveshell import InteractiveShell + return InteractiveShell() diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index 5b663f1d85ee7..f58eed74f760e 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -342,6 +342,13 @@ def __init__(self, values, categories=None, ordered=False, fastpath=False): self._categories = categories self._codes = coerce_indexer_dtype(codes, categories) + def __dir__(self): + # Avoid IPython warnings for deprecated properties + # https://github.com/pandas-dev/pandas/issues/16409 + rv = set(dir(type(self))) + rv.discard("labels") + return sorted(rv) + @property def _constructor(self): return Categorical diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 631b91c3aad11..2bb825541e23b 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -184,6 +184,12 @@ def __getattr__(self, attr): matches_pattern = any(attr.startswith(x) for x in self._deprecated_valid_patterns) if not matches_pattern and attr not in self._deprecated_valids: + # avoid the warning, if it's just going to be an exception + # anyway. + if not hasattr(self.obj, attr): + raise AttributeError("'{}' has no attribute '{}'".format( + type(self.obj).__name__, attr + )) self = self._deprecated(attr) return object.__getattribute__(self, attr) diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py index f48eea23220b8..1ffe956b3a607 100644 --- a/pandas/tests/test_categorical.py +++ b/pandas/tests/test_categorical.py @@ -736,6 +736,17 @@ def test_unicode_print(self): assert _rep(c) == expected + def test_tab_complete_warning(self, ip): + # https://github.com/pandas-dev/pandas/issues/16409 + pytest.importorskip('IPython', minversion="6.0.0") + from IPython.core.completer import provisionalcompleter + + code = "import pandas as pd; c = pd.Categorical([])" + ip.run_code(code) + with tm.assert_produces_warning(None): + with provisionalcompleter('ignore'): + list(ip.Completer.completions('c.', 1)) + def test_periodindex(self): idx1 = PeriodIndex(['2014-01', '2014-01', '2014-02', '2014-02', '2014-03', '2014-03'], freq='M') diff --git a/pandas/tests/test_resample.py b/pandas/tests/test_resample.py index 37e2fd0e9b188..170cab4947a5a 100644 --- a/pandas/tests/test_resample.py +++ b/pandas/tests/test_resample.py @@ -3,6 +3,7 @@ from warnings import catch_warnings from datetime import datetime, timedelta from functools import partial +from textwrap import dedent import pytz import pytest @@ -284,8 +285,7 @@ def test_attribute_access(self): tm.assert_series_equal(r.A.sum(), r['A'].sum()) # getting - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - pytest.raises(AttributeError, lambda: r.F) + pytest.raises(AttributeError, lambda: r.F) # setting def f(): @@ -2816,6 +2816,19 @@ def test_back_compat_v180(self): expected = df.groupby('A').resample('4s').mean().ffill() assert_frame_equal(result, expected) + def test_tab_complete_ipython6_warning(self, ip): + from IPython.core.completer import provisionalcompleter + code = dedent("""\ + import pandas.util.testing as tm + s = tm.makeTimeSeries() + rs = s.resample("D") + """) + ip.run_code(code) + + with tm.assert_produces_warning(None): + with provisionalcompleter('ignore'): + list(ip.Completer.completions('rs.', 1)) + def test_deferred_with_groupby(self): # GH 12486 From 75c8698e32432d7dd075a93933749a26c53b3029 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 26 May 2017 07:48:05 -0500 Subject: [PATCH 617/933] TST: Fix excel test discovery (#16478) * TST: Fix excel test discovery * BUG: Handle sheetname deprecation directly Since sheetname=None has a special meaning, we can't use the deprecate_kwargs decorator. We instead handle it in read_excel. * TST/BUG: Ensure pathlib roundtrip uses right params Reader / writer may depend on filename and engine. Set these on the reader and writer before round-tripping. --- pandas/io/excel.py | 13 +++++++-- pandas/tests/io/test_excel.py | 51 +++++++++++++++++++++-------------- 2 files changed, 42 insertions(+), 22 deletions(-) diff --git a/pandas/io/excel.py b/pandas/io/excel.py index 81a36b21b3617..aa08e5fd378f0 100644 --- a/pandas/io/excel.py +++ b/pandas/io/excel.py @@ -8,6 +8,7 @@ import os import abc +import warnings import numpy as np from pandas.core.dtypes.common import ( @@ -30,7 +31,7 @@ import pandas.compat.openpyxl_compat as openpyxl_compat from warnings import warn from distutils.version import LooseVersion -from pandas.util._decorators import Appender, deprecate_kwarg +from pandas.util._decorators import Appender from textwrap import fill __all__ = ["read_excel", "ExcelWriter", "ExcelFile"] @@ -193,7 +194,6 @@ def get_writer(engine_name): raise ValueError("No Excel writer '%s'" % engine_name) -@deprecate_kwarg('sheetname', 'sheet_name') @Appender(_read_excel_doc) def read_excel(io, sheet_name=0, header=0, skiprows=None, skip_footer=0, index_col=None, names=None, parse_cols=None, parse_dates=False, @@ -202,6 +202,15 @@ def read_excel(io, sheet_name=0, header=0, skiprows=None, skip_footer=0, dtype=None, true_values=None, false_values=None, engine=None, squeeze=False, **kwds): + # Can't use _deprecate_kwarg since sheetname=None has a special meaning + if is_integer(sheet_name) and sheet_name == 0 and 'sheetname' in kwds: + warnings.warn("The `sheetname` keyword is deprecated, use " + "`sheet_name` instead", FutureWarning, stacklevel=2) + sheet_name = kwds.pop("sheetname") + elif 'sheetname' in kwds: + raise TypeError("Cannot specify both `sheet_name` and `sheetname`. " + "Use just `sheet_name`") + if not isinstance(io, ExcelFile): io = ExcelFile(io, engine=engine) diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py index 0a79d4e8fd81b..4441ed815370b 100644 --- a/pandas/tests/io/test_excel.py +++ b/pandas/tests/io/test_excel.py @@ -5,6 +5,7 @@ import sys import os from distutils.version import LooseVersion +from functools import partial import warnings from warnings import catch_warnings @@ -407,7 +408,7 @@ def test_reading_all_sheets(self): # Ensure a dict is returned. # See PR #9450 basename = 'test_multisheet' - dfs = self.get_exceldf(basename, sheetname=None) + dfs = self.get_exceldf(basename, sheet_name=None) # ensure this is not alphabetical to test order preservation expected_keys = ['Charlie', 'Alpha', 'Beta'] tm.assert_contains_all(expected_keys, dfs.keys()) @@ -424,7 +425,7 @@ def test_reading_multiple_specific_sheets(self): basename = 'test_multisheet' # Explicitly request duplicates. Only the set should be returned. expected_keys = [2, 'Charlie', 'Charlie'] - dfs = self.get_exceldf(basename, sheetname=expected_keys) + dfs = self.get_exceldf(basename, sheet_name=expected_keys) expected_keys = list(set(expected_keys)) tm.assert_contains_all(expected_keys, dfs.keys()) assert len(expected_keys) == len(dfs.keys()) @@ -434,7 +435,7 @@ def test_reading_all_sheets_with_blank(self): # In the case where some sheets are blank. # Issue #11711 basename = 'blank_with_header' - dfs = self.get_exceldf(basename, sheetname=None) + dfs = self.get_exceldf(basename, sheet_name=None) expected_keys = ['Sheet1', 'Sheet2', 'Sheet3'] tm.assert_contains_all(expected_keys, dfs.keys()) @@ -551,11 +552,15 @@ def test_sheet_name_and_sheetname(self): dfref = self.get_csv_refdf('test1') df1 = self.get_exceldf('test1', sheet_name='Sheet1') # doc with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - df2 = self.get_exceldf('test1', sheetname='Sheet2') # bkwrd compat + df2 = self.get_exceldf('test1', sheetname='Sheet1') # bkwrd compat tm.assert_frame_equal(df1, dfref, check_names=False) tm.assert_frame_equal(df2, dfref, check_names=False) + def test_sheet_name_both_raises(self): + with tm.assert_raises_regex(TypeError, "Cannot specify both"): + self.get_exceldf('test1', sheetname='Sheet1', sheet_name='Sheet1') + class XlrdTests(ReadingTestsBase): """ @@ -589,7 +594,7 @@ def test_read_xlrd_Book(self): result = read_excel(xl, "SheetA") tm.assert_frame_equal(df, result) - result = read_excel(book, sheetname="SheetA", engine="xlrd") + result = read_excel(book, sheet_name="SheetA", engine="xlrd") tm.assert_frame_equal(df, result) @tm.network @@ -691,7 +696,7 @@ def tdf(sheetname): with ExcelWriter(pth) as ew: for sheetname, df in iteritems(dfs): df.to_excel(ew, sheetname) - dfs_returned = read_excel(pth, sheetname=sheets) + dfs_returned = read_excel(pth, sheet_name=sheets) for s in sheets: tm.assert_frame_equal(dfs[s], dfs_returned[s]) @@ -1001,19 +1006,19 @@ def test_read_excel_squeeze(self): tm.assert_series_equal(actual, expected) -class XlsReaderTests(XlrdTests): +class TestXlsReaderTests(XlrdTests): ext = '.xls' engine_name = 'xlrd' check_skip = staticmethod(_skip_if_no_xlrd) -class XlsxReaderTests(XlrdTests): +class TestXlsxReaderTests(XlrdTests): ext = '.xlsx' engine_name = 'xlrd' check_skip = staticmethod(_skip_if_no_xlrd) -class XlsmReaderTests(XlrdTests): +class TestXlsmReaderTests(XlrdTests): ext = '.xlsm' engine_name = 'xlrd' check_skip = staticmethod(_skip_if_no_xlrd) @@ -1872,12 +1877,18 @@ def test_freeze_panes(self): def test_path_pathlib(self): df = tm.makeDataFrame() - result = tm.round_trip_pathlib(df.to_excel, pd.read_excel) + writer = partial(df.to_excel, engine=self.engine_name) + reader = partial(pd.read_excel) + result = tm.round_trip_pathlib(writer, reader, + path="foo.{}".format(self.ext)) tm.assert_frame_equal(df, result) def test_path_localpath(self): df = tm.makeDataFrame() - result = tm.round_trip_localpath(df.to_excel, pd.read_excel) + writer = partial(df.to_excel, engine=self.engine_name) + reader = partial(pd.read_excel) + result = tm.round_trip_pathlib(writer, reader, + path="foo.{}".format(self.ext)) tm.assert_frame_equal(df, result) @@ -1909,7 +1920,7 @@ def versioned_raise_on_incompat_version(cls): @raise_on_incompat_version(1) -class OpenpyxlTests(ExcelWriterBase): +class TestOpenpyxlTests(ExcelWriterBase): ext = '.xlsx' engine_name = 'openpyxl1' check_skip = staticmethod(lambda *args, **kwargs: None) @@ -1962,7 +1973,7 @@ def setup_class(cls): @raise_on_incompat_version(2) @skip_openpyxl_gt21 -class Openpyxl20Tests(ExcelWriterBase): +class TestOpenpyxl20Tests(ExcelWriterBase): ext = '.xlsx' engine_name = 'openpyxl20' check_skip = staticmethod(lambda *args, **kwargs: None) @@ -2078,7 +2089,7 @@ def setup_class(cls): @raise_on_incompat_version(2) @skip_openpyxl_lt22 -class Openpyxl22Tests(ExcelWriterBase): +class TestOpenpyxl22Tests(ExcelWriterBase): ext = '.xlsx' engine_name = 'openpyxl22' check_skip = staticmethod(lambda *args, **kwargs: None) @@ -2173,7 +2184,7 @@ def test_write_cells_merge_styled(self): assert xcell_a2.font == openpyxl_sty_merged -class XlwtTests(ExcelWriterBase): +class TestXlwtTests(ExcelWriterBase): ext = '.xls' engine_name = 'xlwt' check_skip = staticmethod(_skip_if_no_xlwt) @@ -2230,7 +2241,7 @@ def test_to_excel_styleconverter(self): assert xlwt.Alignment.VERT_TOP == xls_style.alignment.vert -class XlsxWriterTests(ExcelWriterBase): +class TestXlsxWriterTests(ExcelWriterBase): ext = '.xlsx' engine_name = 'xlsxwriter' check_skip = staticmethod(_skip_if_no_xlsxwriter) @@ -2283,7 +2294,7 @@ def test_column_format(self): assert read_num_format == num_format -class OpenpyxlTests_NoMerge(ExcelWriterBase): +class TestOpenpyxlTests_NoMerge(ExcelWriterBase): ext = '.xlsx' engine_name = 'openpyxl' check_skip = staticmethod(_skip_if_no_openpyxl) @@ -2292,7 +2303,7 @@ class OpenpyxlTests_NoMerge(ExcelWriterBase): merge_cells = False -class XlwtTests_NoMerge(ExcelWriterBase): +class TestXlwtTests_NoMerge(ExcelWriterBase): ext = '.xls' engine_name = 'xlwt' check_skip = staticmethod(_skip_if_no_xlwt) @@ -2301,7 +2312,7 @@ class XlwtTests_NoMerge(ExcelWriterBase): merge_cells = False -class XlsxWriterTests_NoMerge(ExcelWriterBase): +class TestXlsxWriterTests_NoMerge(ExcelWriterBase): ext = '.xlsx' engine_name = 'xlsxwriter' check_skip = staticmethod(_skip_if_no_xlsxwriter) @@ -2310,7 +2321,7 @@ class XlsxWriterTests_NoMerge(ExcelWriterBase): merge_cells = False -class ExcelWriterEngineTests(object): +class TestExcelWriterEngineTests(object): def test_ExcelWriter_dispatch(self): with tm.assert_raises_regex(ValueError, 'No engine'): From b0d9ee0b4c0911e089d4dd7c6a676557909fb7da Mon Sep 17 00:00:00 2001 From: Christoph Moehl Date: Fri, 26 May 2017 16:43:12 +0200 Subject: [PATCH 618/933] ENH: added margins_name parameter for crosstab (#16489) * ENH #15972 added margins_name parameter for crosstab * ENH 15972 minor changes as suggested by reviewers * ENH 15972 correction in whatsnew * ENH 15972 style changes in whatsnew --- doc/source/whatsnew/v0.20.0.txt | 1 - doc/source/whatsnew/v0.21.0.txt | 1 + pandas/core/reshape/pivot.py | 28 ++++++++++++++-------- pandas/tests/reshape/test_pivot.py | 37 ++++++++++++++++++++++++++++++ 4 files changed, 57 insertions(+), 10 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index a0bf2f9b3758a..9d475390175b2 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -515,7 +515,6 @@ Other Enhancements - Options added to allow one to turn on/off using ``bottleneck`` and ``numexpr``, see :ref:`here ` (:issue:`16157`) - ``DataFrame.style.bar()`` now accepts two more options to further customize the bar chart. Bar alignment is set with ``align='left'|'mid'|'zero'``, the default is "left", which is backward compatible; You can now pass a list of ``color=[color_negative, color_positive]``. (:issue:`14757`) - .. _ISO 8601 duration: https://en.wikipedia.org/wiki/ISO_8601#Durations diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index b4ca3f011a81d..a6b6d704737bd 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -37,6 +37,7 @@ Other Enhancements - :func:`api.types.infer_dtype` now infers decimals. (:issue: `15690`) - :func:`read_feather` has gained the ``nthreads`` parameter for multi-threaded operations (:issue:`16359`) - :func:`DataFrame.clip()` and :func: `Series.cip()` have gained an inplace argument. (:issue: `15388`) +- :func:`crosstab` has gained a ``margins_name`` parameter to define the name of the row / column that will contain the totals when margins=True. (:issue:`15972`) .. _whatsnew_0210.api_breaking: diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index 74dbbfc00cb11..b562f8a32f5c9 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -388,7 +388,8 @@ def _convert_by(by): def crosstab(index, columns, values=None, rownames=None, colnames=None, - aggfunc=None, margins=False, dropna=True, normalize=False): + aggfunc=None, margins=False, margins_name='All', dropna=True, + normalize=False): """ Compute a simple cross-tabulation of two (or more) factors. By default computes a frequency table of the factors unless an array of values and an @@ -411,6 +412,12 @@ def crosstab(index, columns, values=None, rownames=None, colnames=None, If passed, must match number of column arrays passed margins : boolean, default False Add row/column margins (subtotals) + margins_name : string, default 'All' + Name of the row / column that will contain the totals + when margins is True. + + .. versionadded:: 0.21.0 + dropna : boolean, default True Do not include columns whose entries are all NaN normalize : boolean, {'all', 'index', 'columns'}, or {0,1}, default False @@ -490,23 +497,26 @@ def crosstab(index, columns, values=None, rownames=None, colnames=None, df = DataFrame(data) df['__dummy__'] = 0 table = df.pivot_table('__dummy__', index=rownames, columns=colnames, - aggfunc=len, margins=margins, dropna=dropna) + aggfunc=len, margins=margins, + margins_name=margins_name, dropna=dropna) table = table.fillna(0).astype(np.int64) else: data['__dummy__'] = values df = DataFrame(data) table = df.pivot_table('__dummy__', index=rownames, columns=colnames, - aggfunc=aggfunc, margins=margins, dropna=dropna) + aggfunc=aggfunc, margins=margins, + margins_name=margins_name, dropna=dropna) # Post-process if normalize is not False: - table = _normalize(table, normalize=normalize, margins=margins) + table = _normalize(table, normalize=normalize, margins=margins, + margins_name=margins_name) return table -def _normalize(table, normalize, margins): +def _normalize(table, normalize, margins, margins_name='All'): if not isinstance(normalize, bool) and not isinstance(normalize, compat.string_types): @@ -537,9 +547,9 @@ def _normalize(table, normalize, margins): elif margins is True: - column_margin = table.loc[:, 'All'].drop('All') - index_margin = table.loc['All', :].drop('All') - table = table.drop('All', axis=1).drop('All') + column_margin = table.loc[:, margins_name].drop(margins_name) + index_margin = table.loc[margins_name, :].drop(margins_name) + table = table.drop(margins_name, axis=1).drop(margins_name) # to keep index and columns names table_index_names = table.index.names table_columns_names = table.columns.names @@ -561,7 +571,7 @@ def _normalize(table, normalize, margins): elif normalize == "all" or normalize is True: column_margin = column_margin / column_margin.sum() index_margin = index_margin / index_margin.sum() - index_margin.loc['All'] = 1 + index_margin.loc[margins_name] = 1 table = concat([table, column_margin], axis=1) table = table.append(index_margin) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 270a93e4ae382..fc5a2eb468d4f 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -1071,6 +1071,43 @@ def test_crosstab_margins(self): exp_rows = exp_rows.fillna(0).astype(np.int64) tm.assert_series_equal(all_rows, exp_rows) + def test_crosstab_margins_set_margin_name(self): + # GH 15972 + a = np.random.randint(0, 7, size=100) + b = np.random.randint(0, 3, size=100) + c = np.random.randint(0, 5, size=100) + + df = DataFrame({'a': a, 'b': b, 'c': c}) + + result = crosstab(a, [b, c], rownames=['a'], colnames=('b', 'c'), + margins=True, margins_name='TOTAL') + + assert result.index.names == ('a',) + assert result.columns.names == ['b', 'c'] + + all_cols = result['TOTAL', ''] + exp_cols = df.groupby(['a']).size().astype('i8') + # to keep index.name + exp_margin = Series([len(df)], index=Index(['TOTAL'], name='a')) + exp_cols = exp_cols.append(exp_margin) + exp_cols.name = ('TOTAL', '') + + tm.assert_series_equal(all_cols, exp_cols) + + all_rows = result.loc['TOTAL'] + exp_rows = df.groupby(['b', 'c']).size().astype('i8') + exp_rows = exp_rows.append(Series([len(df)], index=[('TOTAL', '')])) + exp_rows.name = 'TOTAL' + + exp_rows = exp_rows.reindex(all_rows.index) + exp_rows = exp_rows.fillna(0).astype(np.int64) + tm.assert_series_equal(all_rows, exp_rows) + + for margins_name in [666, None, ['a', 'b']]: + with pytest.raises(ValueError): + crosstab(a, [b, c], rownames=['a'], colnames=('b', 'c'), + margins=True, margins_name=margins_name) + def test_crosstab_pass_values(self): a = np.random.randint(0, 7, size=100) b = np.random.randint(0, 3, size=100) From 66491574df3d223a15f2fd229793ccdbfd8a0fa3 Mon Sep 17 00:00:00 2001 From: Aaron Barber Date: Fri, 26 May 2017 12:11:55 -0700 Subject: [PATCH 619/933] TST: ujson tests are not being run (#16499) (#16500) closes #16499 --- pandas/tests/io/json/test_ujson.py | 44 +++++++++++++++--------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/pandas/tests/io/json/test_ujson.py b/pandas/tests/io/json/test_ujson.py index 662f06dbb725e..76fb6d442a25a 100644 --- a/pandas/tests/io/json/test_ujson.py +++ b/pandas/tests/io/json/test_ujson.py @@ -27,7 +27,7 @@ else partial(json.dumps, encoding="utf-8")) -class UltraJSONTests(object): +class TestUltraJSONTests(object): @pytest.mark.skipif(compat.is_platform_32bit(), reason="not compliant on 32-bit, xref #15865") @@ -944,19 +944,19 @@ def my_obj_handler(obj): ujson.decode(ujson.encode(l, default_handler=str))) -class NumpyJSONTests(object): +class TestNumpyJSONTests(object): - def testBool(self): + def test_Bool(self): b = np.bool(True) assert ujson.decode(ujson.encode(b)) == b - def testBoolArray(self): + def test_BoolArray(self): inpt = np.array([True, False, True, True, False, True, False, False], dtype=np.bool) outp = np.array(ujson.decode(ujson.encode(inpt)), dtype=np.bool) tm.assert_numpy_array_equal(inpt, outp) - def testInt(self): + def test_Int(self): num = np.int(2562010) assert np.int(ujson.decode(ujson.encode(num))) == num @@ -984,7 +984,7 @@ def testInt(self): num = np.uint64(2562010) assert np.uint64(ujson.decode(ujson.encode(num))) == num - def testIntArray(self): + def test_IntArray(self): arr = np.arange(100, dtype=np.int) dtypes = (np.int, np.int8, np.int16, np.int32, np.int64, np.uint, np.uint8, np.uint16, np.uint32, np.uint64) @@ -993,7 +993,7 @@ def testIntArray(self): outp = np.array(ujson.decode(ujson.encode(inpt)), dtype=dtype) tm.assert_numpy_array_equal(inpt, outp) - def testIntMax(self): + def test_IntMax(self): num = np.int(np.iinfo(np.int).max) assert np.int(ujson.decode(ujson.encode(num))) == num @@ -1023,7 +1023,7 @@ def testIntMax(self): num = np.uint64(np.iinfo(np.int64).max) assert np.uint64(ujson.decode(ujson.encode(num))) == num - def testFloat(self): + def test_Float(self): num = np.float(256.2013) assert np.float(ujson.decode(ujson.encode(num))) == num @@ -1033,7 +1033,7 @@ def testFloat(self): num = np.float64(256.2013) assert np.float64(ujson.decode(ujson.encode(num))) == num - def testFloatArray(self): + def test_FloatArray(self): arr = np.arange(12.5, 185.72, 1.7322, dtype=np.float) dtypes = (np.float, np.float32, np.float64) @@ -1043,7 +1043,7 @@ def testFloatArray(self): inpt, double_precision=15)), dtype=dtype) tm.assert_almost_equal(inpt, outp) - def testFloatMax(self): + def test_FloatMax(self): num = np.float(np.finfo(np.float).max / 10) tm.assert_almost_equal(np.float(ujson.decode( ujson.encode(num, double_precision=15))), num, 15) @@ -1056,7 +1056,7 @@ def testFloatMax(self): tm.assert_almost_equal(np.float64(ujson.decode( ujson.encode(num, double_precision=15))), num, 15) - def testArrays(self): + def test_Arrays(self): arr = np.arange(100) arr = arr.reshape((10, 10)) @@ -1097,13 +1097,13 @@ def testArrays(self): outp = ujson.decode(ujson.encode(arr), numpy=True, dtype=np.float32) tm.assert_almost_equal(arr, outp) - def testOdArray(self): + def test_OdArray(self): def will_raise(): ujson.encode(np.array(1)) pytest.raises(TypeError, will_raise) - def testArrayNumpyExcept(self): + def test_ArrayNumpyExcept(self): input = ujson.dumps([42, {}, 'a']) try: @@ -1186,7 +1186,7 @@ def testArrayNumpyExcept(self): except: assert False, "Wrong exception" - def testArrayNumpyLabelled(self): + def test_ArrayNumpyLabelled(self): input = {'a': []} output = ujson.loads(ujson.dumps(input), numpy=True, labelled=True) assert (np.empty((1, 0)) == output[0]).all() @@ -1220,9 +1220,9 @@ def testArrayNumpyLabelled(self): assert (np.array(['a', 'b']) == output[2]).all() -class PandasJSONTests(object): +class TestPandasJSONTests(object): - def testDataFrame(self): + def test_DataFrame(self): df = DataFrame([[1, 2, 3], [4, 5, 6]], index=[ 'a', 'b'], columns=['x', 'y', 'z']) @@ -1252,7 +1252,7 @@ def testDataFrame(self): tm.assert_index_equal(df.transpose().columns, outp.columns) tm.assert_index_equal(df.transpose().index, outp.index) - def testDataFrameNumpy(self): + def test_DataFrameNumpy(self): df = DataFrame([[1, 2, 3], [4, 5, 6]], index=[ 'a', 'b'], columns=['x', 'y', 'z']) @@ -1275,7 +1275,7 @@ def testDataFrameNumpy(self): tm.assert_index_equal(df.transpose().columns, outp.columns) tm.assert_index_equal(df.transpose().index, outp.index) - def testDataFrameNested(self): + def test_DataFrameNested(self): df = DataFrame([[1, 2, 3], [4, 5, 6]], index=[ 'a', 'b'], columns=['x', 'y', 'z']) @@ -1301,7 +1301,7 @@ def testDataFrameNested(self): 'df2': ujson.decode(ujson.encode(df, orient="split"))} assert ujson.decode(ujson.encode(nested, orient="split")) == exp - def testDataFrameNumpyLabelled(self): + def test_DataFrameNumpyLabelled(self): df = DataFrame([[1, 2, 3], [4, 5, 6]], index=[ 'a', 'b'], columns=['x', 'y', 'z']) @@ -1324,7 +1324,7 @@ def testDataFrameNumpyLabelled(self): tm.assert_index_equal(df.columns, outp.columns) tm.assert_index_equal(df.index, outp.index) - def testSeries(self): + def test_Series(self): s = Series([10, 20, 30, 40, 50, 60], name="series", index=[6, 7, 8, 9, 10, 15]).sort_values() @@ -1372,7 +1372,7 @@ def testSeries(self): s, orient="index"), numpy=True)).sort_values() tm.assert_series_equal(outp, exp) - def testSeriesNested(self): + def test_SeriesNested(self): s = Series([10, 20, 30, 40, 50, 60], name="series", index=[6, 7, 8, 9, 10, 15]).sort_values() @@ -1398,7 +1398,7 @@ def testSeriesNested(self): 's2': ujson.decode(ujson.encode(s, orient="index"))} assert ujson.decode(ujson.encode(nested, orient="index")) == exp - def testIndex(self): + def test_Index(self): i = Index([23, 45, 18, 98, 43, 11], name="index") # column indexed From ef487d9e474e8052c0f7c6260de5802a950defad Mon Sep 17 00:00:00 2001 From: gfyoung Date: Fri, 26 May 2017 21:35:11 -0400 Subject: [PATCH 620/933] DOC: Remove preference for pytest paradigm in assert_raises_regex (#16518) --- pandas/util/testing.py | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/pandas/util/testing.py b/pandas/util/testing.py index f987045c27d5f..17e09b38b20e0 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -2424,15 +2424,8 @@ def assert_raises_regex(_exception, _regexp, _callable=None, Check that the specified Exception is raised and that the error message matches a given regular expression pattern. This may be a regular expression object or a string containing a regular expression suitable - for use by `re.search()`. - - This is a port of the `assertRaisesRegexp` function from unittest in - Python 2.7. However, with our migration to `pytest`, please refrain - from using this. Instead, use the following paradigm: - - with pytest.raises(_exception) as exc_info: - func(*args, **kwargs) - exc_info.matches(reg_exp) + for use by `re.search()`. This is a port of the `assertRaisesRegexp` + function from unittest in Python 2.7. Examples -------- From e60dc4cff2c6e8a2283fbb906faeb8cb01df37ff Mon Sep 17 00:00:00 2001 From: "John W. O'Brien" Date: Mon, 29 May 2017 12:00:42 -0400 Subject: [PATCH 621/933] TST: Specify HTML file encoding on PY3 (#16526) --- pandas/tests/io/test_html.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/pandas/tests/io/test_html.py b/pandas/tests/io/test_html.py index 6da77bf423609..1e1d653cf94d1 100644 --- a/pandas/tests/io/test_html.py +++ b/pandas/tests/io/test_html.py @@ -20,7 +20,7 @@ from pandas import (DataFrame, MultiIndex, read_csv, Timestamp, Index, date_range, Series) from pandas.compat import (map, zip, StringIO, string_types, BytesIO, - is_platform_windows) + is_platform_windows, PY3) from pandas.io.common import URLError, urlopen, file_path_to_url from pandas.io.html import read_html from pandas._libs.parsers import ParserError @@ -96,6 +96,9 @@ def read_html(self, *args, **kwargs): class TestReadHtml(ReadHtmlMixin): flavor = 'bs4' spam_data = os.path.join(DATA_PATH, 'spam.html') + spam_data_kwargs = {} + if PY3: + spam_data_kwargs['encoding'] = 'UTF-8' banklist_data = os.path.join(DATA_PATH, 'banklist.html') @classmethod @@ -247,10 +250,10 @@ def test_infer_types(self): assert_framelist_equal(df1, df2) def test_string_io(self): - with open(self.spam_data) as f: + with open(self.spam_data, **self.spam_data_kwargs) as f: data1 = StringIO(f.read()) - with open(self.spam_data) as f: + with open(self.spam_data, **self.spam_data_kwargs) as f: data2 = StringIO(f.read()) df1 = self.read_html(data1, '.*Water.*') @@ -258,7 +261,7 @@ def test_string_io(self): assert_framelist_equal(df1, df2) def test_string(self): - with open(self.spam_data) as f: + with open(self.spam_data, **self.spam_data_kwargs) as f: data = f.read() df1 = self.read_html(data, '.*Water.*') @@ -267,10 +270,10 @@ def test_string(self): assert_framelist_equal(df1, df2) def test_file_like(self): - with open(self.spam_data) as f: + with open(self.spam_data, **self.spam_data_kwargs) as f: df1 = self.read_html(f, '.*Water.*') - with open(self.spam_data) as f: + with open(self.spam_data, **self.spam_data_kwargs) as f: df2 = self.read_html(f, 'Unit') assert_framelist_equal(df1, df2) From 7efc4e8b99bbb6f85ce174f2c484090cb3bc8191 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 30 May 2017 17:52:55 -0500 Subject: [PATCH 622/933] BUG: Fixed tput output on windows (#16496) --- doc/source/whatsnew/v0.20.2.txt | 2 ++ pandas/io/formats/terminal.py | 6 ++++++ 2 files changed, 8 insertions(+) diff --git a/doc/source/whatsnew/v0.20.2.txt b/doc/source/whatsnew/v0.20.2.txt index 13365401f1d1c..7b7f9e8745809 100644 --- a/doc/source/whatsnew/v0.20.2.txt +++ b/doc/source/whatsnew/v0.20.2.txt @@ -37,6 +37,8 @@ Performance Improvements Bug Fixes ~~~~~~~~~ +- Silenced a warning on some Windows environments about "tput: terminal attributes: No such device or address" when + detecting the terminal size. This fix only applies to python 3 (:issue:`16496`) - Bug in using ``pathlib.Path`` or ``py.path.local`` objects with io functions (:issue:`16291`) - Bug in ``DataFrame.update()`` with ``overwrite=False`` and ``NaN values`` (:issue:`15593`) diff --git a/pandas/io/formats/terminal.py b/pandas/io/formats/terminal.py index dadd09ae74ea4..30bd1d16b538a 100644 --- a/pandas/io/formats/terminal.py +++ b/pandas/io/formats/terminal.py @@ -14,6 +14,8 @@ from __future__ import print_function import os +import sys +import shutil __all__ = ['get_terminal_size'] @@ -26,6 +28,10 @@ def get_terminal_size(): IPython zmq frontends, or IDLE do not run in a terminal, """ import platform + + if sys.version_info[0] >= 3: + return shutil.get_terminal_size() + current_os = platform.system() tuple_xy = None if current_os == 'Windows': From 4ca29f4f91030331271b31ceda0de41a696ac00a Mon Sep 17 00:00:00 2001 From: keitakurita Date: Wed, 31 May 2017 08:12:50 +0900 Subject: [PATCH 623/933] BUG: Incorrect handling of rolling.cov with offset window (#16244) --- doc/source/whatsnew/v0.20.2.txt | 1 + pandas/core/window.py | 9 ++++++++- pandas/tests/test_window.py | 23 +++++++++++++++++++++++ 3 files changed, 32 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.20.2.txt b/doc/source/whatsnew/v0.20.2.txt index 7b7f9e8745809..90146aa176b31 100644 --- a/doc/source/whatsnew/v0.20.2.txt +++ b/doc/source/whatsnew/v0.20.2.txt @@ -82,6 +82,7 @@ Groupby/Resample/Rolling ^^^^^^^^^^^^^^^^^^^^^^^^ - Bug creating datetime rolling window on an empty DataFrame (:issue:`15819`) +- Bug in ``rolling.cov()`` with offset window (:issue:`16058`) Sparse diff --git a/pandas/core/window.py b/pandas/core/window.py index cf1bad706ae1d..ba7e79944ab0e 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -81,6 +81,7 @@ def __init__(self, obj, window=None, min_periods=None, freq=None, self.freq = freq self.center = center self.win_type = win_type + self.win_freq = None self.axis = obj._get_axis_number(axis) if axis is not None else None self.validate() @@ -996,7 +997,12 @@ def cov(self, other=None, pairwise=None, ddof=1, **kwargs): # only default unset pairwise = True if pairwise is None else pairwise other = self._shallow_copy(other) - window = self._get_window(other) + + # GH 16058: offset window + if self.is_freq_type: + window = self.win_freq + else: + window = self._get_window(other) def _get_cov(X, Y): # GH #12373 : rolling functions error on float32 data @@ -1088,6 +1094,7 @@ def validate(self): "based windows") # this will raise ValueError on non-fixed freqs + self.win_freq = self.window self.window = freq.nanos self.win_type = 'freq' diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index 6a640d62108b3..cbb3c345a9353 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -3833,3 +3833,26 @@ def test_non_monotonic(self): df2 = df.sort_values('B') result = df2.groupby('A').rolling('4s', on='B').C.mean() tm.assert_series_equal(result, expected) + + def test_rolling_cov_offset(self): + # GH16058 + + idx = pd.date_range('2017-01-01', periods=24, freq='1h') + ss = pd.Series(np.arange(len(idx)), index=idx) + + result = ss.rolling('2h').cov() + expected = pd.Series([np.nan] + [0.5 for _ in range(len(idx) - 1)], + index=idx) + tm.assert_series_equal(result, expected) + + expected2 = ss.rolling(2, min_periods=1).cov() + tm.assert_series_equal(result, expected2) + + result = ss.rolling('3h').cov() + expected = pd.Series([np.nan, 0.5] + + [1.0 for _ in range(len(idx) - 2)], + index=idx) + tm.assert_series_equal(result, expected) + + expected2 = ss.rolling(3, min_periods=1).cov() + tm.assert_series_equal(result, expected2) From 92d07992e826808cd56f0bd8fec083b510ca402d Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 30 May 2017 21:39:15 -0500 Subject: [PATCH 624/933] TST: Avoid global state in matplotlib tests (#16539) Replaces most uses of implicit global state from matplotlib in test_datetimelike.py. This was potentially causing random failures where a figure expected to be on a new, blank figure would instead plot on an existing axes (that's the guess at least). --- pandas/tests/plotting/test_datetimelike.py | 379 +++++++++++---------- pandas/tests/plotting/test_series.py | 159 ++++++--- 2 files changed, 301 insertions(+), 237 deletions(-) diff --git a/pandas/tests/plotting/test_datetimelike.py b/pandas/tests/plotting/test_datetimelike.py index 0e15aaa2555f4..0cff365be3ec8 100644 --- a/pandas/tests/plotting/test_datetimelike.py +++ b/pandas/tests/plotting/test_datetimelike.py @@ -55,16 +55,15 @@ def test_ts_plot_with_tz(self): def test_fontsize_set_correctly(self): # For issue #8765 - import matplotlib.pyplot as plt # noqa df = DataFrame(np.random.randn(10, 9), index=range(10)) - ax = df.plot(fontsize=2) + fig, ax = self.plt.subplots() + df.plot(fontsize=2, ax=ax) for label in (ax.get_xticklabels() + ax.get_yticklabels()): assert label.get_fontsize() == 2 @slow def test_frame_inferred(self): # inferred freq - import matplotlib.pyplot as plt # noqa idx = date_range('1/1/1987', freq='MS', periods=100) idx = DatetimeIndex(idx.values, freq=None) @@ -90,26 +89,24 @@ def test_is_error_nozeroindex(self): _check_plot_works(a.plot, yerr=a) def test_nonnumeric_exclude(self): - import matplotlib.pyplot as plt - idx = date_range('1/1/1987', freq='A', periods=3) df = DataFrame({'A': ["x", "y", "z"], 'B': [1, 2, 3]}, idx) - ax = df.plot() # it works + fig, ax = self.plt.subplots() + df.plot(ax=ax) # it works assert len(ax.get_lines()) == 1 # B was plotted - plt.close(plt.gcf()) + self.plt.close(fig) pytest.raises(TypeError, df['A'].plot) @slow def test_tsplot(self): from pandas.tseries.plotting import tsplot - import matplotlib.pyplot as plt - ax = plt.gca() + _, ax = self.plt.subplots() ts = tm.makeTimeSeries() - f = lambda *args, **kwds: tsplot(s, plt.Axes.plot, *args, **kwds) + f = lambda *args, **kwds: tsplot(s, self.plt.Axes.plot, *args, **kwds) for s in self.period_ser: _check_plot_works(f, s.index.freq, ax=ax, series=s) @@ -123,12 +120,12 @@ def test_tsplot(self): for s in self.datetime_ser: _check_plot_works(s.plot, ax=ax) - ax = ts.plot(style='k') + _, ax = self.plt.subplots() + ts.plot(style='k', ax=ax) color = (0., 0., 0., 1) if self.mpl_ge_2_0_0 else (0., 0., 0.) assert color == ax.get_lines()[0].get_color() def test_both_style_and_color(self): - import matplotlib.pyplot as plt # noqa ts = tm.makeTimeSeries() pytest.raises(ValueError, ts.plot, style='b-', color='#000099') @@ -140,9 +137,10 @@ def test_both_style_and_color(self): def test_high_freq(self): freaks = ['ms', 'us'] for freq in freaks: + _, ax = self.plt.subplots() rng = date_range('1/1/2012', periods=100000, freq=freq) ser = Series(np.random.randn(len(rng)), rng) - _check_plot_works(ser.plot) + _check_plot_works(ser.plot, ax=ax) def test_get_datevalue(self): from pandas.plotting._converter import get_datevalue @@ -167,22 +165,25 @@ def check_format_of_first_point(ax, expected_string): annual = Series(1, index=date_range('2014-01-01', periods=3, freq='A-DEC')) - check_format_of_first_point(annual.plot(), 't = 2014 y = 1.000000') + _, ax = self.plt.subplots() + annual.plot(ax=ax) + check_format_of_first_point(ax, 't = 2014 y = 1.000000') # note this is added to the annual plot already in existence, and # changes its freq field daily = Series(1, index=date_range('2014-01-01', periods=3, freq='D')) - check_format_of_first_point(daily.plot(), + daily.plot(ax=ax) + check_format_of_first_point(ax, 't = 2014-01-01 y = 1.000000') tm.close() # tsplot - import matplotlib.pyplot as plt + _, ax = self.plt.subplots() from pandas.tseries.plotting import tsplot - tsplot(annual, plt.Axes.plot) - check_format_of_first_point(plt.gca(), 't = 2014 y = 1.000000') - tsplot(daily, plt.Axes.plot) - check_format_of_first_point(plt.gca(), 't = 2014-01-01 y = 1.000000') + tsplot(annual, self.plt.Axes.plot, ax=ax) + check_format_of_first_point(ax, 't = 2014 y = 1.000000') + tsplot(daily, self.plt.Axes.plot, ax=ax) + check_format_of_first_point(ax, 't = 2014-01-01 y = 1.000000') @slow def test_line_plot_period_series(self): @@ -215,14 +216,11 @@ def test_line_plot_inferred_freq(self): _check_plot_works(ser.plot) def test_fake_inferred_business(self): - import matplotlib.pyplot as plt - fig = plt.gcf() - plt.clf() - fig.add_subplot(111) + _, ax = self.plt.subplots() rng = date_range('2001-1-1', '2001-1-10') ts = Series(lrange(len(rng)), rng) ts = ts[:3].append(ts[5:]) - ax = ts.plot() + ts.plot(ax=ax) assert not hasattr(ax, 'freq') @slow @@ -244,15 +242,11 @@ def test_plot_multiple_inferred_freq(self): @slow def test_uhf(self): import pandas.plotting._converter as conv - import matplotlib.pyplot as plt - fig = plt.gcf() - plt.clf() - fig.add_subplot(111) - idx = date_range('2012-6-22 21:59:51.960928', freq='L', periods=500) df = DataFrame(np.random.randn(len(idx), 2), idx) - ax = df.plot() + _, ax = self.plt.subplots() + df.plot(ax=ax) axis = ax.get_xaxis() tlocs = axis.get_ticklocs() @@ -265,49 +259,40 @@ def test_uhf(self): @slow def test_irreg_hf(self): - import matplotlib.pyplot as plt - fig = plt.gcf() - plt.clf() - fig.add_subplot(111) - idx = date_range('2012-6-22 21:59:51', freq='S', periods=100) df = DataFrame(np.random.randn(len(idx), 2), idx) irreg = df.iloc[[0, 1, 3, 4]] - ax = irreg.plot() + _, ax = self.plt.subplots() + irreg.plot(ax=ax) diffs = Series(ax.get_lines()[0].get_xydata()[:, 0]).diff() sec = 1. / 24 / 60 / 60 assert (np.fabs(diffs[1:] - [sec, sec * 2, sec]) < 1e-8).all() - plt.clf() - fig.add_subplot(111) + _, ax = self.plt.subplots() df2 = df.copy() df2.index = df.index.asobject - ax = df2.plot() + df2.plot(ax=ax) diffs = Series(ax.get_lines()[0].get_xydata()[:, 0]).diff() assert (np.fabs(diffs[1:] - sec) < 1e-8).all() def test_irregular_datetime64_repr_bug(self): - import matplotlib.pyplot as plt ser = tm.makeTimeSeries() ser = ser[[0, 1, 2, 7]] - fig = plt.gcf() - plt.clf() + _, ax = self.plt.subplots() - ax = fig.add_subplot(211) - - ret = ser.plot() + ret = ser.plot(ax=ax) assert ret is not None for rs, xp in zip(ax.get_lines()[0].get_xdata(), ser.index): assert rs == xp def test_business_freq(self): - import matplotlib.pyplot as plt # noqa bts = tm.makePeriodSeries() - ax = bts.plot() + _, ax = self.plt.subplots() + bts.plot(ax=ax) assert ax.get_lines()[0].get_xydata()[0, 0] == bts.index[0].ordinal idx = ax.get_lines()[0].get_xdata() assert PeriodIndex(data=idx).freqstr == 'B' @@ -319,7 +304,8 @@ def test_business_freq_convert(self): bts = tm.makeTimeSeries().asfreq('BM') tm.N = n ts = bts.to_period('M') - ax = bts.plot() + _, ax = self.plt.subplots() + bts.plot(ax=ax) assert ax.get_lines()[0].get_xydata()[0, 0] == ts.index[0].ordinal idx = ax.get_lines()[0].get_xdata() assert PeriodIndex(data=idx).freqstr == 'M' @@ -329,19 +315,20 @@ def test_nonzero_base(self): idx = (date_range('2012-12-20', periods=24, freq='H') + timedelta( minutes=30)) df = DataFrame(np.arange(24), index=idx) - ax = df.plot() + _, ax = self.plt.subplots() + df.plot(ax=ax) rs = ax.get_lines()[0].get_xdata() assert not Index(rs).is_normalized def test_dataframe(self): bts = DataFrame({'a': tm.makeTimeSeries()}) - ax = bts.plot() + _, ax = self.plt.subplots() + bts.plot(ax=ax) idx = ax.get_lines()[0].get_xdata() tm.assert_index_equal(bts.index.to_period(), PeriodIndex(idx)) @slow def test_axis_limits(self): - import matplotlib.pyplot as plt def _test(ax): xlim = ax.get_xlim() @@ -369,14 +356,16 @@ def _test(ax): assert int(result[0]) == expected[0].ordinal assert int(result[1]) == expected[1].ordinal fig = ax.get_figure() - plt.close(fig) + self.plt.close(fig) ser = tm.makeTimeSeries() - ax = ser.plot() + _, ax = self.plt.subplots() + ser.plot(ax=ax) _test(ax) + _, ax = self.plt.subplots() df = DataFrame({'a': ser, 'b': ser + 1}) - ax = df.plot() + df.plot(ax=ax) _test(ax) df = DataFrame({'a': ser, 'b': ser + 1}) @@ -397,13 +386,13 @@ def test_get_finder(self): @slow def test_finder_daily(self): - import matplotlib.pyplot as plt xp = Period('1999-1-1', freq='B').ordinal day_lst = [10, 40, 252, 400, 950, 2750, 10000] for n in day_lst: rng = bdate_range('1999-1-1', periods=n) ser = Series(np.random.randn(len(rng)), rng) - ax = ser.plot() + _, ax = self.plt.subplots() + ser.plot(ax=ax) xaxis = ax.get_xaxis() rs = xaxis.get_majorticklocs()[0] assert xp == rs @@ -411,17 +400,17 @@ def test_finder_daily(self): ax.set_xlim(vmin + 0.9, vmax) rs = xaxis.get_majorticklocs()[0] assert xp == rs - plt.close(ax.get_figure()) + self.plt.close(ax.get_figure()) @slow def test_finder_quarterly(self): - import matplotlib.pyplot as plt xp = Period('1988Q1').ordinal yrs = [3.5, 11] for n in yrs: rng = period_range('1987Q2', periods=int(n * 4), freq='Q') ser = Series(np.random.randn(len(rng)), rng) - ax = ser.plot() + _, ax = self.plt.subplots() + ser.plot(ax=ax) xaxis = ax.get_xaxis() rs = xaxis.get_majorticklocs()[0] assert rs == xp @@ -429,17 +418,17 @@ def test_finder_quarterly(self): ax.set_xlim(vmin + 0.9, vmax) rs = xaxis.get_majorticklocs()[0] assert xp == rs - plt.close(ax.get_figure()) + self.plt.close(ax.get_figure()) @slow def test_finder_monthly(self): - import matplotlib.pyplot as plt xp = Period('Jan 1988').ordinal yrs = [1.15, 2.5, 4, 11] for n in yrs: rng = period_range('1987Q2', periods=int(n * 12), freq='M') ser = Series(np.random.randn(len(rng)), rng) - ax = ser.plot() + _, ax = self.plt.subplots() + ser.plot(ax=ax) xaxis = ax.get_xaxis() rs = xaxis.get_majorticklocs()[0] assert rs == xp @@ -447,12 +436,13 @@ def test_finder_monthly(self): ax.set_xlim(vmin + 0.9, vmax) rs = xaxis.get_majorticklocs()[0] assert xp == rs - plt.close(ax.get_figure()) + self.plt.close(ax.get_figure()) def test_finder_monthly_long(self): rng = period_range('1988Q1', periods=24 * 12, freq='M') ser = Series(np.random.randn(len(rng)), rng) - ax = ser.plot() + _, ax = self.plt.subplots() + ser.plot(ax=ax) xaxis = ax.get_xaxis() rs = xaxis.get_majorticklocs()[0] xp = Period('1989Q1', 'M').ordinal @@ -460,23 +450,24 @@ def test_finder_monthly_long(self): @slow def test_finder_annual(self): - import matplotlib.pyplot as plt xp = [1987, 1988, 1990, 1990, 1995, 2020, 2070, 2170] for i, nyears in enumerate([5, 10, 19, 49, 99, 199, 599, 1001]): rng = period_range('1987', periods=nyears, freq='A') ser = Series(np.random.randn(len(rng)), rng) - ax = ser.plot() + _, ax = self.plt.subplots() + ser.plot(ax=ax) xaxis = ax.get_xaxis() rs = xaxis.get_majorticklocs()[0] assert rs == Period(xp[i], freq='A').ordinal - plt.close(ax.get_figure()) + self.plt.close(ax.get_figure()) @slow def test_finder_minutely(self): nminutes = 50 * 24 * 60 rng = date_range('1/1/1999', freq='Min', periods=nminutes) ser = Series(np.random.randn(len(rng)), rng) - ax = ser.plot() + _, ax = self.plt.subplots() + ser.plot(ax=ax) xaxis = ax.get_xaxis() rs = xaxis.get_majorticklocs()[0] xp = Period('1/1/1999', freq='Min').ordinal @@ -486,7 +477,8 @@ def test_finder_hourly(self): nhours = 23 rng = date_range('1/1/1999', freq='H', periods=nhours) ser = Series(np.random.randn(len(rng)), rng) - ax = ser.plot() + _, ax = self.plt.subplots() + ser.plot(ax=ax) xaxis = ax.get_xaxis() rs = xaxis.get_majorticklocs()[0] xp = Period('1/1/1999', freq='H').ordinal @@ -494,11 +486,10 @@ def test_finder_hourly(self): @slow def test_gaps(self): - import matplotlib.pyplot as plt - ts = tm.makeTimeSeries() ts[5:25] = np.nan - ax = ts.plot() + _, ax = self.plt.subplots() + ts.plot(ax=ax) lines = ax.get_lines() tm._skip_if_mpl_1_5() assert len(lines) == 1 @@ -507,13 +498,14 @@ def test_gaps(self): assert isinstance(data, np.ma.core.MaskedArray) mask = data.mask assert mask[5:25, 1].all() - plt.close(ax.get_figure()) + self.plt.close(ax.get_figure()) # irregular ts = tm.makeTimeSeries() ts = ts[[0, 1, 2, 5, 7, 9, 12, 15, 20]] ts[2:5] = np.nan - ax = ts.plot() + _, ax = self.plt.subplots() + ax = ts.plot(ax=ax) lines = ax.get_lines() assert len(lines) == 1 l = lines[0] @@ -521,13 +513,14 @@ def test_gaps(self): assert isinstance(data, np.ma.core.MaskedArray) mask = data.mask assert mask[2:5, 1].all() - plt.close(ax.get_figure()) + self.plt.close(ax.get_figure()) # non-ts idx = [0, 1, 2, 5, 7, 9, 12, 15, 20] ser = Series(np.random.randn(len(idx)), idx) ser[2:5] = np.nan - ax = ser.plot() + _, ax = self.plt.subplots() + ser.plot(ax=ax) lines = ax.get_lines() assert len(lines) == 1 l = lines[0] @@ -540,7 +533,8 @@ def test_gaps(self): def test_gap_upsample(self): low = tm.makeTimeSeries() low[5:25] = np.nan - ax = low.plot() + _, ax = self.plt.subplots() + low.plot(ax=ax) idxh = date_range(low.index[0], low.index[-1], freq='12h') s = Series(np.random.randn(len(idxh)), idxh) @@ -559,26 +553,25 @@ def test_gap_upsample(self): @slow def test_secondary_y(self): - import matplotlib.pyplot as plt - ser = Series(np.random.randn(10)) ser2 = Series(np.random.randn(10)) + fig, _ = self.plt.subplots() ax = ser.plot(secondary_y=True) assert hasattr(ax, 'left_ax') assert not hasattr(ax, 'right_ax') - fig = ax.get_figure() axes = fig.get_axes() l = ax.get_lines()[0] xp = Series(l.get_ydata(), l.get_xdata()) assert_series_equal(ser, xp) assert ax.get_yaxis().get_ticks_position() == 'right' assert not axes[0].get_yaxis().get_visible() - plt.close(fig) + self.plt.close(fig) - ax2 = ser2.plot() + _, ax2 = self.plt.subplots() + ser2.plot(ax=ax2) assert (ax2.get_yaxis().get_ticks_position() == self.default_tick_position) - plt.close(ax2.get_figure()) + self.plt.close(ax2.get_figure()) ax = ser2.plot() ax2 = ser.plot(secondary_y=True) @@ -590,26 +583,26 @@ def test_secondary_y(self): @slow def test_secondary_y_ts(self): - import matplotlib.pyplot as plt idx = date_range('1/1/2000', periods=10) ser = Series(np.random.randn(10), idx) ser2 = Series(np.random.randn(10), idx) + fig, _ = self.plt.subplots() ax = ser.plot(secondary_y=True) assert hasattr(ax, 'left_ax') assert not hasattr(ax, 'right_ax') - fig = ax.get_figure() axes = fig.get_axes() l = ax.get_lines()[0] xp = Series(l.get_ydata(), l.get_xdata()).to_timestamp() assert_series_equal(ser, xp) assert ax.get_yaxis().get_ticks_position() == 'right' assert not axes[0].get_yaxis().get_visible() - plt.close(fig) + self.plt.close(fig) - ax2 = ser2.plot() + _, ax2 = self.plt.subplots() + ser2.plot(ax=ax2) assert (ax2.get_yaxis().get_ticks_position() == self.default_tick_position) - plt.close(ax2.get_figure()) + self.plt.close(ax2.get_figure()) ax = ser2.plot() ax2 = ser.plot(secondary_y=True) @@ -620,20 +613,19 @@ def test_secondary_kde(self): tm._skip_if_no_scipy() _skip_if_no_scipy_gaussian_kde() - import matplotlib.pyplot as plt # noqa ser = Series(np.random.randn(10)) - ax = ser.plot(secondary_y=True, kind='density') + fig, ax = self.plt.subplots() + ax = ser.plot(secondary_y=True, kind='density', ax=ax) assert hasattr(ax, 'left_ax') assert not hasattr(ax, 'right_ax') - fig = ax.get_figure() axes = fig.get_axes() assert axes[1].get_yaxis().get_ticks_position() == 'right' @slow def test_secondary_bar(self): ser = Series(np.random.randn(10)) - ax = ser.plot(secondary_y=True, kind='bar') - fig = ax.get_figure() + fig, ax = self.plt.subplots() + ser.plot(secondary_y=True, kind='bar', ax=ax) axes = fig.get_axes() assert axes[1].get_yaxis().get_ticks_position() == 'right' @@ -656,7 +648,7 @@ def test_secondary_bar_frame(self): assert axes[2].get_yaxis().get_ticks_position() == 'right' def test_mixed_freq_regular_first(self): - import matplotlib.pyplot as plt # noqa + # TODO s1 = tm.makeTimeSeries() s2 = s1[[0, 5, 10, 11, 12, 13, 14, 15]] @@ -676,11 +668,11 @@ def test_mixed_freq_regular_first(self): @slow def test_mixed_freq_irregular_first(self): - import matplotlib.pyplot as plt # noqa s1 = tm.makeTimeSeries() s2 = s1[[0, 5, 10, 11, 12, 13, 14, 15]] - s2.plot(style='g') - ax = s1.plot() + _, ax = self.plt.subplots() + s2.plot(style='g', ax=ax) + s1.plot(ax=ax) assert not hasattr(ax, 'freq') lines = ax.get_lines() x1 = lines[0].get_xdata() @@ -690,10 +682,10 @@ def test_mixed_freq_irregular_first(self): def test_mixed_freq_regular_first_df(self): # GH 9852 - import matplotlib.pyplot as plt # noqa s1 = tm.makeTimeSeries().to_frame() s2 = s1.iloc[[0, 5, 10, 11, 12, 13, 14, 15], :] - ax = s1.plot() + _, ax = self.plt.subplots() + s1.plot(ax=ax) ax2 = s2.plot(style='g', ax=ax) lines = ax2.get_lines() idx1 = PeriodIndex(lines[0].get_xdata()) @@ -708,11 +700,11 @@ def test_mixed_freq_regular_first_df(self): @slow def test_mixed_freq_irregular_first_df(self): # GH 9852 - import matplotlib.pyplot as plt # noqa s1 = tm.makeTimeSeries().to_frame() s2 = s1.iloc[[0, 5, 10, 11, 12, 13, 14, 15], :] - ax = s2.plot(style='g') - ax = s1.plot(ax=ax) + _, ax = self.plt.subplots() + s2.plot(style='g', ax=ax) + s1.plot(ax=ax) assert not hasattr(ax, 'freq') lines = ax.get_lines() x1 = lines[0].get_xdata() @@ -725,8 +717,9 @@ def test_mixed_freq_hf_first(self): idxl = date_range('1/1/1999', periods=12, freq='M') high = Series(np.random.randn(len(idxh)), idxh) low = Series(np.random.randn(len(idxl)), idxl) - high.plot() - ax = low.plot() + _, ax = self.plt.subplots() + high.plot(ax=ax) + low.plot(ax=ax) for l in ax.get_lines(): assert PeriodIndex(data=l.get_xdata()).freq == 'D' @@ -738,33 +731,35 @@ def test_mixed_freq_alignment(self): ts = Series(ts_data, index=ts_ind) ts2 = ts.asfreq('T').interpolate() - ax = ts.plot() - ts2.plot(style='r') + _, ax = self.plt.subplots() + ax = ts.plot(ax=ax) + ts2.plot(style='r', ax=ax) assert ax.lines[0].get_xdata()[0] == ax.lines[1].get_xdata()[0] @slow def test_mixed_freq_lf_first(self): - import matplotlib.pyplot as plt idxh = date_range('1/1/1999', periods=365, freq='D') idxl = date_range('1/1/1999', periods=12, freq='M') high = Series(np.random.randn(len(idxh)), idxh) low = Series(np.random.randn(len(idxl)), idxl) - low.plot(legend=True) - ax = high.plot(legend=True) + _, ax = self.plt.subplots() + low.plot(legend=True, ax=ax) + high.plot(legend=True, ax=ax) for l in ax.get_lines(): assert PeriodIndex(data=l.get_xdata()).freq == 'D' leg = ax.get_legend() assert len(leg.texts) == 2 - plt.close(ax.get_figure()) + self.plt.close(ax.get_figure()) idxh = date_range('1/1/1999', periods=240, freq='T') idxl = date_range('1/1/1999', periods=4, freq='H') high = Series(np.random.randn(len(idxh)), idxh) low = Series(np.random.randn(len(idxl)), idxl) - low.plot() - ax = high.plot() + _, ax = self.plt.subplots() + low.plot(ax=ax) + high.plot(ax=ax) for l in ax.get_lines(): assert PeriodIndex(data=l.get_xdata()).freq == 'T' @@ -773,8 +768,9 @@ def test_mixed_freq_irreg_period(self): irreg = ts[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 15, 16, 17, 18, 29]] rng = period_range('1/3/2000', periods=30, freq='B') ps = Series(np.random.randn(len(rng)), rng) - irreg.plot() - ps.plot() + _, ax = self.plt.subplots() + irreg.plot(ax=ax) + ps.plot(ax=ax) def test_mixed_freq_shared_ax(self): @@ -813,9 +809,7 @@ def test_mixed_freq_shared_ax(self): def test_nat_handling(self): - fig = self.plt.gcf() - # self.plt.clf() - ax = fig.add_subplot(111) + _, ax = self.plt.subplots() dti = DatetimeIndex(['2015-01-01', NaT, '2015-01-03']) s = Series(range(len(dti)), dti) @@ -831,17 +825,18 @@ def test_to_weekly_resampling(self): idxl = date_range('1/1/1999', periods=12, freq='M') high = Series(np.random.randn(len(idxh)), idxh) low = Series(np.random.randn(len(idxl)), idxl) - high.plot() - ax = low.plot() + _, ax = self.plt.subplots() + high.plot(ax=ax) + low.plot(ax=ax) for l in ax.get_lines(): assert PeriodIndex(data=l.get_xdata()).freq == idxh.freq # tsplot from pandas.tseries.plotting import tsplot - import matplotlib.pyplot as plt - tsplot(high, plt.Axes.plot) - lines = tsplot(low, plt.Axes.plot) + _, ax = self.plt.subplots() + tsplot(high, self.plt.Axes.plot, ax=ax) + lines = tsplot(low, self.plt.Axes.plot, ax=ax) for l in lines: assert PeriodIndex(data=l.get_xdata()).freq == idxh.freq @@ -851,8 +846,9 @@ def test_from_weekly_resampling(self): idxl = date_range('1/1/1999', periods=12, freq='M') high = Series(np.random.randn(len(idxh)), idxh) low = Series(np.random.randn(len(idxl)), idxl) - low.plot() - ax = high.plot() + _, ax = self.plt.subplots() + low.plot(ax=ax) + high.plot(ax=ax) expected_h = idxh.to_period().asi8.astype(np.float64) expected_l = np.array([1514, 1519, 1523, 1527, 1531, 1536, 1540, 1544, @@ -868,10 +864,10 @@ def test_from_weekly_resampling(self): # tsplot from pandas.tseries.plotting import tsplot - import matplotlib.pyplot as plt - tsplot(low, plt.Axes.plot) - lines = tsplot(high, plt.Axes.plot) + _, ax = self.plt.subplots() + tsplot(low, self.plt.Axes.plot, ax=ax) + lines = tsplot(high, self.plt.Axes.plot, ax=ax) for l in lines: assert PeriodIndex(data=l.get_xdata()).freq == idxh.freq xdata = l.get_xdata(orig=False) @@ -891,8 +887,9 @@ def test_from_resampling_area_line_mixed(self): # low to high for kind1, kind2 in [('line', 'area'), ('area', 'line')]: - ax = low.plot(kind=kind1, stacked=True) - ax = high.plot(kind=kind2, stacked=True, ax=ax) + _, ax = self.plt.subplots() + low.plot(kind=kind1, stacked=True, ax=ax) + high.plot(kind=kind2, stacked=True, ax=ax) # check low dataframe result expected_x = np.array([1514, 1519, 1523, 1527, 1531, 1536, 1540, @@ -923,8 +920,9 @@ def test_from_resampling_area_line_mixed(self): # high to low for kind1, kind2 in [('line', 'area'), ('area', 'line')]: - ax = high.plot(kind=kind1, stacked=True) - ax = low.plot(kind=kind2, stacked=True, ax=ax) + _, ax = self.plt.subplots() + high.plot(kind=kind1, stacked=True, ax=ax) + low.plot(kind=kind2, stacked=True, ax=ax) # check high dataframe result expected_x = idxh.to_period().asi8.astype(np.float64) @@ -960,16 +958,18 @@ def test_mixed_freq_second_millisecond(self): high = Series(np.random.randn(len(idxh)), idxh) low = Series(np.random.randn(len(idxl)), idxl) # high to low - high.plot() - ax = low.plot() + _, ax = self.plt.subplots() + high.plot(ax=ax) + low.plot(ax=ax) assert len(ax.get_lines()) == 2 for l in ax.get_lines(): assert PeriodIndex(data=l.get_xdata()).freq == 'L' tm.close() # low to high - low.plot() - ax = high.plot() + _, ax = self.plt.subplots() + low.plot(ax=ax) + high.plot(ax=ax) assert len(ax.get_lines()) == 2 for l in ax.get_lines(): assert PeriodIndex(data=l.get_xdata()).freq == 'L' @@ -985,7 +985,8 @@ def test_irreg_dtypes(self): idx = date_range('1/1/2000', periods=10) idx = idx[[0, 2, 5, 9]].asobject df = DataFrame(np.random.randn(len(idx), 3), idx) - _check_plot_works(df.plot) + _, ax = self.plt.subplots() + _check_plot_works(df.plot, ax=ax) @slow def test_time(self): @@ -995,7 +996,8 @@ def test_time(self): df = DataFrame({'a': np.random.randn(len(ts)), 'b': np.random.randn(len(ts))}, index=ts) - ax = df.plot() + _, ax = self.plt.subplots() + df.plot(ax=ax) # verify tick labels ticks = ax.get_xticks() @@ -1031,7 +1033,8 @@ def test_time_musec(self): df = DataFrame({'a': np.random.randn(len(ts)), 'b': np.random.randn(len(ts))}, index=ts) - ax = df.plot() + _, ax = self.plt.subplots() + ax = df.plot(ax=ax) # verify tick labels ticks = ax.get_xticks() @@ -1054,8 +1057,9 @@ def test_secondary_upsample(self): idxl = date_range('1/1/1999', periods=12, freq='M') high = Series(np.random.randn(len(idxh)), idxh) low = Series(np.random.randn(len(idxl)), idxl) - low.plot() - ax = high.plot(secondary_y=True) + _, ax = self.plt.subplots() + low.plot(ax=ax) + ax = high.plot(secondary_y=True, ax=ax) for l in ax.get_lines(): assert PeriodIndex(l.get_xdata()).freq == 'D' assert hasattr(ax, 'left_ax') @@ -1065,14 +1069,12 @@ def test_secondary_upsample(self): @slow def test_secondary_legend(self): - import matplotlib.pyplot as plt - fig = plt.gcf() - plt.clf() + fig = self.plt.figure() ax = fig.add_subplot(211) # ts df = tm.makeTimeDataFrame() - ax = df.plot(secondary_y=['A', 'B']) + df.plot(secondary_y=['A', 'B'], ax=ax) leg = ax.get_legend() assert len(leg.get_lines()) == 4 assert leg.get_texts()[0].get_text() == 'A (right)' @@ -1086,33 +1088,37 @@ def test_secondary_legend(self): # TODO: color cycle problems assert len(colors) == 4 + self.plt.close(fig) - plt.clf() + fig = self.plt.figure() ax = fig.add_subplot(211) - ax = df.plot(secondary_y=['A', 'C'], mark_right=False) + df.plot(secondary_y=['A', 'C'], mark_right=False, ax=ax) leg = ax.get_legend() assert len(leg.get_lines()) == 4 assert leg.get_texts()[0].get_text() == 'A' assert leg.get_texts()[1].get_text() == 'B' assert leg.get_texts()[2].get_text() == 'C' assert leg.get_texts()[3].get_text() == 'D' + self.plt.close(fig) - plt.clf() - ax = df.plot(kind='bar', secondary_y=['A']) + fig, ax = self.plt.subplots() + df.plot(kind='bar', secondary_y=['A'], ax=ax) leg = ax.get_legend() assert leg.get_texts()[0].get_text() == 'A (right)' assert leg.get_texts()[1].get_text() == 'B' + self.plt.close(fig) - plt.clf() - ax = df.plot(kind='bar', secondary_y=['A'], mark_right=False) + fig, ax = self.plt.subplots() + df.plot(kind='bar', secondary_y=['A'], mark_right=False, ax=ax) leg = ax.get_legend() assert leg.get_texts()[0].get_text() == 'A' assert leg.get_texts()[1].get_text() == 'B' + self.plt.close(fig) - plt.clf() + fig = self.plt.figure() ax = fig.add_subplot(211) df = tm.makeTimeDataFrame() - ax = df.plot(secondary_y=['C', 'D']) + ax = df.plot(secondary_y=['C', 'D'], ax=ax) leg = ax.get_legend() assert len(leg.get_lines()) == 4 assert ax.right_ax.get_legend() is None @@ -1122,12 +1128,13 @@ def test_secondary_legend(self): # TODO: color cycle problems assert len(colors) == 4 + self.plt.close(fig) # non-ts df = tm.makeDataFrame() - plt.clf() + fig = self.plt.figure() ax = fig.add_subplot(211) - ax = df.plot(secondary_y=['A', 'B']) + ax = df.plot(secondary_y=['A', 'B'], ax=ax) leg = ax.get_legend() assert len(leg.get_lines()) == 4 assert ax.right_ax.get_legend() is None @@ -1137,10 +1144,11 @@ def test_secondary_legend(self): # TODO: color cycle problems assert len(colors) == 4 + self.plt.close() - plt.clf() + fig = self.plt.figure() ax = fig.add_subplot(211) - ax = df.plot(secondary_y=['C', 'D']) + ax = df.plot(secondary_y=['C', 'D'], ax=ax) leg = ax.get_legend() assert len(leg.get_lines()) == 4 assert ax.right_ax.get_legend() is None @@ -1154,7 +1162,8 @@ def test_secondary_legend(self): def test_format_date_axis(self): rng = date_range('1/1/2012', periods=12, freq='M') df = DataFrame(np.random.randn(len(rng), 3), rng) - ax = df.plot() + _, ax = self.plt.subplots() + ax = df.plot(ax=ax) xaxis = ax.get_xaxis() for l in xaxis.get_ticklabels(): if len(l.get_text()) > 0: @@ -1162,28 +1171,21 @@ def test_format_date_axis(self): @slow def test_ax_plot(self): - import matplotlib.pyplot as plt - x = DatetimeIndex(start='2012-01-02', periods=10, freq='D') y = lrange(len(x)) - fig = plt.figure() - ax = fig.add_subplot(111) + _, ax = self.plt.subplots() lines = ax.plot(x, y, label='Y') tm.assert_index_equal(DatetimeIndex(lines[0].get_xdata()), x) @slow def test_mpl_nopandas(self): - import matplotlib.pyplot as plt - dates = [date(2008, 12, 31), date(2009, 1, 31)] values1 = np.arange(10.0, 11.0, 0.5) values2 = np.arange(11.0, 12.0, 0.5) kw = dict(fmt='-', lw=4) - plt.close('all') - fig = plt.figure() - ax = fig.add_subplot(111) + _, ax = self.plt.subplots() ax.plot_date([x.toordinal() for x in dates], values1, **kw) ax.plot_date([x.toordinal() for x in dates], values2, **kw) @@ -1201,7 +1203,8 @@ def test_irregular_ts_shared_ax_xlim(self): ts_irregular = ts[[1, 4, 5, 6, 8, 9, 10, 12, 13, 14, 15, 17, 18]] # plot the left section of the irregular series, then the right section - ax = ts_irregular[:5].plot() + _, ax = self.plt.subplots() + ts_irregular[:5].plot(ax=ax) ts_irregular[5:].plot(ax=ax) # check that axis limits are correct @@ -1217,7 +1220,8 @@ def test_secondary_y_non_ts_xlim(self): s1 = Series(1, index=index_1) s2 = Series(2, index=index_2) - ax = s1.plot() + _, ax = self.plt.subplots() + s1.plot(ax=ax) left_before, right_before = ax.get_xlim() s2.plot(secondary_y=True, ax=ax) left_after, right_after = ax.get_xlim() @@ -1233,7 +1237,8 @@ def test_secondary_y_regular_ts_xlim(self): s1 = Series(1, index=index_1) s2 = Series(2, index=index_2) - ax = s1.plot() + _, ax = self.plt.subplots() + s1.plot(ax=ax) left_before, right_before = ax.get_xlim() s2.plot(secondary_y=True, ax=ax) left_after, right_after = ax.get_xlim() @@ -1247,7 +1252,8 @@ def test_secondary_y_mixed_freq_ts_xlim(self): rng = date_range('2000-01-01', periods=10000, freq='min') ts = Series(1, index=rng) - ax = ts.plot() + _, ax = self.plt.subplots() + ts.plot(ax=ax) left_before, right_before = ax.get_xlim() ts.resample('D').mean().plot(secondary_y=True, ax=ax) left_after, right_after = ax.get_xlim() @@ -1262,7 +1268,8 @@ def test_secondary_y_irregular_ts_xlim(self): ts = tm.makeTimeSeries()[:20] ts_irregular = ts[[1, 4, 5, 6, 8, 9, 10, 12, 13, 14, 15, 17, 18]] - ax = ts_irregular[:5].plot() + _, ax = self.plt.subplots() + ts_irregular[:5].plot(ax=ax) # plot higher-x values on secondary axis ts_irregular[5:].plot(secondary_y=True, ax=ax) # ensure secondary limits aren't overwritten by plot on primary @@ -1275,10 +1282,11 @@ def test_secondary_y_irregular_ts_xlim(self): def test_plot_outofbounds_datetime(self): # 2579 - checking this does not raise values = [date(1677, 1, 1), date(1677, 1, 2)] - self.plt.plot(values) + _, ax = self.plt.subplots() + ax.plot(values) values = [datetime(1677, 1, 1, 12), datetime(1677, 1, 2, 12)] - self.plt.plot(values) + ax.plot(values) def test_format_timedelta_ticks_narrow(self): if is_platform_mac(): @@ -1290,8 +1298,8 @@ def test_format_timedelta_ticks_narrow(self): rng = timedelta_range('0', periods=10, freq='ns') df = DataFrame(np.random.randn(len(rng), 3), rng) - ax = df.plot(fontsize=2) - fig = ax.get_figure() + fig, ax = self.plt.subplots() + df.plot(fontsize=2, ax=ax) fig.canvas.draw() labels = ax.get_xticklabels() assert len(labels) == len(expected_labels) @@ -1316,8 +1324,8 @@ def test_format_timedelta_ticks_wide(self): rng = timedelta_range('0', periods=10, freq='1 d') df = DataFrame(np.random.randn(len(rng), 3), rng) - ax = df.plot(fontsize=2) - fig = ax.get_figure() + fig, ax = self.plt.subplots() + ax = df.plot(fontsize=2, ax=ax) fig.canvas.draw() labels = ax.get_xticklabels() assert len(labels) == len(expected_labels) @@ -1327,19 +1335,22 @@ def test_format_timedelta_ticks_wide(self): def test_timedelta_plot(self): # test issue #8711 s = Series(range(5), timedelta_range('1day', periods=5)) - _check_plot_works(s.plot) + _, ax = self.plt.subplots() + _check_plot_works(s.plot, ax=ax) # test long period index = timedelta_range('1 day 2 hr 30 min 10 s', periods=10, freq='1 d') s = Series(np.random.randn(len(index)), index) - _check_plot_works(s.plot) + _, ax = self.plt.subplots() + _check_plot_works(s.plot, ax=ax) # test short period index = timedelta_range('1 day 2 hr 30 min 10 s', periods=10, freq='1 ns') s = Series(np.random.randn(len(index)), index) - _check_plot_works(s.plot) + _, ax = self.plt.subplots() + _check_plot_works(s.plot, ax=ax) def test_hist(self): # https://github.com/matplotlib/matplotlib/issues/8459 @@ -1347,7 +1358,8 @@ def test_hist(self): x = rng w1 = np.arange(0, 1, .1) w2 = np.arange(0, 1, .1)[::-1] - self.plt.hist([x, x], weights=[w1, w2]) + _, ax = self.plt.subplots() + ax.hist([x, x], weights=[w1, w2]) @slow def test_overlapping_datetime(self): @@ -1361,7 +1373,8 @@ def test_overlapping_datetime(self): # plot first series, then add the second series to those axes, # then try adding the first series again - ax = s1.plot() + _, ax = self.plt.subplots() + s1.plot(ax=ax) s2.plot(ax=ax) s1.plot(ax=ax) diff --git a/pandas/tests/plotting/test_series.py b/pandas/tests/plotting/test_series.py index 340a98484480f..7c66b5dafb9c7 100644 --- a/pandas/tests/plotting/test_series.py +++ b/pandas/tests/plotting/test_series.py @@ -82,7 +82,8 @@ def test_plot(self): @slow def test_plot_figsize_and_title(self): # figsize and title - ax = self.series.plot(title='Test', figsize=(16, 8)) + _, ax = self.plt.subplots() + ax = self.series.plot(title='Test', figsize=(16, 8), ax=ax) self._check_text_labels(ax.title, 'Test') self._check_axes_shape(ax, axes_num=1, layout=(1, 1), figsize=(16, 8)) @@ -93,25 +94,28 @@ def test_dont_modify_rcParams(self): else: key = 'axes.color_cycle' colors = self.plt.rcParams[key] - Series([1, 2, 3]).plot() + _, ax = self.plt.subplots() + Series([1, 2, 3]).plot(ax=ax) assert colors == self.plt.rcParams[key] def test_ts_line_lim(self): - ax = self.ts.plot() + fig, ax = self.plt.subplots() + ax = self.ts.plot(ax=ax) xmin, xmax = ax.get_xlim() lines = ax.get_lines() assert xmin == lines[0].get_data(orig=False)[0][0] assert xmax == lines[0].get_data(orig=False)[0][-1] tm.close() - ax = self.ts.plot(secondary_y=True) + ax = self.ts.plot(secondary_y=True, ax=ax) xmin, xmax = ax.get_xlim() lines = ax.get_lines() assert xmin == lines[0].get_data(orig=False)[0][0] assert xmax == lines[0].get_data(orig=False)[0][-1] def test_ts_area_lim(self): - ax = self.ts.plot.area(stacked=False) + _, ax = self.plt.subplots() + ax = self.ts.plot.area(stacked=False, ax=ax) xmin, xmax = ax.get_xlim() line = ax.get_lines()[0].get_data(orig=False)[0] assert xmin == line[0] @@ -119,7 +123,8 @@ def test_ts_area_lim(self): tm.close() # GH 7471 - ax = self.ts.plot.area(stacked=False, x_compat=True) + _, ax = self.plt.subplots() + ax = self.ts.plot.area(stacked=False, x_compat=True, ax=ax) xmin, xmax = ax.get_xlim() line = ax.get_lines()[0].get_data(orig=False)[0] assert xmin == line[0] @@ -128,14 +133,16 @@ def test_ts_area_lim(self): tz_ts = self.ts.copy() tz_ts.index = tz_ts.tz_localize('GMT').tz_convert('CET') - ax = tz_ts.plot.area(stacked=False, x_compat=True) + _, ax = self.plt.subplots() + ax = tz_ts.plot.area(stacked=False, x_compat=True, ax=ax) xmin, xmax = ax.get_xlim() line = ax.get_lines()[0].get_data(orig=False)[0] assert xmin == line[0] assert xmax == line[-1] tm.close() - ax = tz_ts.plot.area(stacked=False, secondary_y=True) + _, ax = self.plt.subplots() + ax = tz_ts.plot.area(stacked=False, secondary_y=True, ax=ax) xmin, xmax = ax.get_xlim() line = ax.get_lines()[0].get_data(orig=False)[0] assert xmin == line[0] @@ -143,23 +150,28 @@ def test_ts_area_lim(self): def test_label(self): s = Series([1, 2]) - ax = s.plot(label='LABEL', legend=True) + _, ax = self.plt.subplots() + ax = s.plot(label='LABEL', legend=True, ax=ax) self._check_legend_labels(ax, labels=['LABEL']) self.plt.close() - ax = s.plot(legend=True) + _, ax = self.plt.subplots() + ax = s.plot(legend=True, ax=ax) self._check_legend_labels(ax, labels=['None']) self.plt.close() # get name from index s.name = 'NAME' - ax = s.plot(legend=True) + _, ax = self.plt.subplots() + ax = s.plot(legend=True, ax=ax) self._check_legend_labels(ax, labels=['NAME']) self.plt.close() # override the default - ax = s.plot(legend=True, label='LABEL') + _, ax = self.plt.subplots() + ax = s.plot(legend=True, label='LABEL', ax=ax) self._check_legend_labels(ax, labels=['LABEL']) self.plt.close() # Add lebel info, but don't draw - ax = s.plot(legend=False, label='LABEL') + _, ax = self.plt.subplots() + ax = s.plot(legend=False, label='LABEL', ax=ax) assert ax.get_legend() is None # Hasn't been drawn ax.legend() # draw it self._check_legend_labels(ax, labels=['LABEL']) @@ -189,10 +201,12 @@ def test_line_area_nan_series(self): def test_line_use_index_false(self): s = Series([1, 2, 3], index=['a', 'b', 'c']) s.index.name = 'The Index' - ax = s.plot(use_index=False) + _, ax = self.plt.subplots() + ax = s.plot(use_index=False, ax=ax) label = ax.get_xlabel() assert label == '' - ax2 = s.plot.bar(use_index=False) + _, ax = self.plt.subplots() + ax2 = s.plot.bar(use_index=False, ax=ax) label2 = ax2.get_xlabel() assert label2 == '' @@ -203,11 +217,13 @@ def test_bar_log(self): if not self.mpl_le_1_2_1: expected = np.hstack((.1, expected, 1e4)) - ax = Series([200, 500]).plot.bar(log=True) + _, ax = self.plt.subplots() + ax = Series([200, 500]).plot.bar(log=True, ax=ax) tm.assert_numpy_array_equal(ax.yaxis.get_ticklocs(), expected) tm.close() - ax = Series([200, 500]).plot.barh(log=True) + _, ax = self.plt.subplots() + ax = Series([200, 500]).plot.barh(log=True, ax=ax) tm.assert_numpy_array_equal(ax.xaxis.get_ticklocs(), expected) tm.close() @@ -219,7 +235,8 @@ def test_bar_log(self): if self.mpl_ge_2_0_0: expected = np.hstack((1.0e-05, expected)) - ax = Series([0.1, 0.01, 0.001]).plot(log=True, kind='bar') + _, ax = self.plt.subplots() + ax = Series([0.1, 0.01, 0.001]).plot(log=True, kind='bar', ax=ax) ymin = 0.0007943282347242822 if self.mpl_ge_2_0_0 else 0.001 ymax = 0.12589254117941673 if self.mpl_ge_2_0_0 else .10000000000000001 res = ax.get_ylim() @@ -228,7 +245,8 @@ def test_bar_log(self): tm.assert_numpy_array_equal(ax.yaxis.get_ticklocs(), expected) tm.close() - ax = Series([0.1, 0.01, 0.001]).plot(log=True, kind='barh') + _, ax = self.plt.subplots() + ax = Series([0.1, 0.01, 0.001]).plot(log=True, kind='barh', ax=ax) res = ax.get_xlim() tm.assert_almost_equal(res[0], ymin) tm.assert_almost_equal(res[1], ymax) @@ -237,23 +255,27 @@ def test_bar_log(self): @slow def test_bar_ignore_index(self): df = Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd']) - ax = df.plot.bar(use_index=False) + _, ax = self.plt.subplots() + ax = df.plot.bar(use_index=False, ax=ax) self._check_text_labels(ax.get_xticklabels(), ['0', '1', '2', '3']) def test_rotation(self): df = DataFrame(randn(5, 5)) # Default rot 0 - axes = df.plot() + _, ax = self.plt.subplots() + axes = df.plot(ax=ax) self._check_ticks_props(axes, xrot=0) - axes = df.plot(rot=30) + _, ax = self.plt.subplots() + axes = df.plot(rot=30, ax=ax) self._check_ticks_props(axes, xrot=30) def test_irregular_datetime(self): rng = date_range('1/1/2000', '3/1/2000') rng = rng[[0, 1, 2, 3, 5, 9, 10, 11, 12]] ser = Series(randn(len(rng)), rng) - ax = ser.plot() + _, ax = self.plt.subplots() + ax = ser.plot(ax=ax) xp = datetime(1999, 1, 1).toordinal() ax.set_xlim('1/1/1999', '1/1/2001') assert xp == ax.get_xlim()[0] @@ -311,7 +333,8 @@ def test_pie_series(self): def test_pie_nan(self): s = Series([1, np.nan, 1, 1]) - ax = s.plot.pie(legend=True) + _, ax = self.plt.subplots() + ax = s.plot.pie(legend=True, ax=ax) expected = ['0', '', '2', '3'] result = [x.get_text() for x in ax.texts] assert result == expected @@ -319,7 +342,8 @@ def test_pie_nan(self): @slow def test_hist_df_kwargs(self): df = DataFrame(np.random.randn(10, 2)) - ax = df.plot.hist(bins=5) + _, ax = self.plt.subplots() + ax = df.plot.hist(bins=5, ax=ax) assert len(ax.patches) == 10 @slow @@ -329,10 +353,12 @@ def test_hist_df_with_nonnumerics(self): df = DataFrame( np.random.randn(10, 4), columns=['A', 'B', 'C', 'D']) df['E'] = ['x', 'y'] * 5 - ax = df.plot.hist(bins=5) + _, ax = self.plt.subplots() + ax = df.plot.hist(bins=5, ax=ax) assert len(ax.patches) == 20 - ax = df.plot.hist() # bins=10 + _, ax = self.plt.subplots() + ax = df.plot.hist(ax=ax) # bins=10 assert len(ax.patches) == 40 @slow @@ -439,7 +465,8 @@ def test_hist_secondary_legend(self): df = DataFrame(np.random.randn(30, 4), columns=list('abcd')) # primary -> secondary - ax = df['a'].plot.hist(legend=True) + _, ax = self.plt.subplots() + ax = df['a'].plot.hist(legend=True, ax=ax) df['b'].plot.hist(ax=ax, legend=True, secondary_y=True) # both legends are dran on left ax # left and right axis must be visible @@ -449,7 +476,8 @@ def test_hist_secondary_legend(self): tm.close() # secondary -> secondary - ax = df['a'].plot.hist(legend=True, secondary_y=True) + _, ax = self.plt.subplots() + ax = df['a'].plot.hist(legend=True, secondary_y=True, ax=ax) df['b'].plot.hist(ax=ax, legend=True, secondary_y=True) # both legends are draw on left ax # left axis must be invisible, right axis must be visible @@ -460,7 +488,8 @@ def test_hist_secondary_legend(self): tm.close() # secondary -> primary - ax = df['a'].plot.hist(legend=True, secondary_y=True) + _, ax = self.plt.subplots() + ax = df['a'].plot.hist(legend=True, secondary_y=True, ax=ax) # right axes is returned df['b'].plot.hist(ax=ax, legend=True) # both legends are draw on left ax @@ -477,8 +506,9 @@ def test_df_series_secondary_legend(self): s = Series(np.random.randn(30), name='x') # primary -> secondary (without passing ax) - ax = df.plot() - s.plot(legend=True, secondary_y=True) + _, ax = self.plt.subplots() + ax = df.plot(ax=ax) + s.plot(legend=True, secondary_y=True, ax=ax) # both legends are dran on left ax # left and right axis must be visible self._check_legend_labels(ax, labels=['a', 'b', 'c', 'x (right)']) @@ -487,7 +517,8 @@ def test_df_series_secondary_legend(self): tm.close() # primary -> secondary (with passing ax) - ax = df.plot() + _, ax = self.plt.subplots() + ax = df.plot(ax=ax) s.plot(ax=ax, legend=True, secondary_y=True) # both legends are dran on left ax # left and right axis must be visible @@ -497,8 +528,9 @@ def test_df_series_secondary_legend(self): tm.close() # seconcary -> secondary (without passing ax) - ax = df.plot(secondary_y=True) - s.plot(legend=True, secondary_y=True) + _, ax = self.plt.subplots() + ax = df.plot(secondary_y=True, ax=ax) + s.plot(legend=True, secondary_y=True, ax=ax) # both legends are dran on left ax # left axis must be invisible and right axis must be visible expected = ['a (right)', 'b (right)', 'c (right)', 'x (right)'] @@ -508,7 +540,8 @@ def test_df_series_secondary_legend(self): tm.close() # secondary -> secondary (with passing ax) - ax = df.plot(secondary_y=True) + _, ax = self.plt.subplots() + ax = df.plot(secondary_y=True, ax=ax) s.plot(ax=ax, legend=True, secondary_y=True) # both legends are dran on left ax # left axis must be invisible and right axis must be visible @@ -519,7 +552,8 @@ def test_df_series_secondary_legend(self): tm.close() # secondary -> secondary (with passing ax) - ax = df.plot(secondary_y=True, mark_right=False) + _, ax = self.plt.subplots() + ax = df.plot(secondary_y=True, mark_right=False, ax=ax) s.plot(ax=ax, legend=True, secondary_y=True) # both legends are dran on left ax # left axis must be invisible and right axis must be visible @@ -533,11 +567,13 @@ def test_df_series_secondary_legend(self): def test_plot_fails_with_dupe_color_and_style(self): x = Series(randn(2)) with pytest.raises(ValueError): - x.plot(style='k--', color='k') + _, ax = self.plt.subplots() + x.plot(style='k--', color='k', ax=ax) @slow def test_hist_kde(self): - ax = self.ts.plot.hist(logy=True) + _, ax = self.plt.subplots() + ax = self.ts.plot.hist(logy=True, ax=ax) self._check_ax_scales(ax, yaxis='log') xlabels = ax.get_xticklabels() # ticks are values, thus ticklabels are blank @@ -549,7 +585,8 @@ def test_hist_kde(self): _skip_if_no_scipy_gaussian_kde() _check_plot_works(self.ts.plot.kde) _check_plot_works(self.ts.plot.density) - ax = self.ts.plot.kde(logy=True) + _, ax = self.plt.subplots() + ax = self.ts.plot.kde(logy=True, ax=ax) self._check_ax_scales(ax, yaxis='log') xlabels = ax.get_xticklabels() self._check_text_labels(xlabels, [''] * len(xlabels)) @@ -565,8 +602,9 @@ def test_kde_kwargs(self): ind=linspace(-100, 100, 20)) _check_plot_works(self.ts.plot.density, bw_method=.5, ind=linspace(-100, 100, 20)) + _, ax = self.plt.subplots() ax = self.ts.plot.kde(logy=True, bw_method=.5, - ind=linspace(-100, 100, 20)) + ind=linspace(-100, 100, 20), ax=ax) self._check_ax_scales(ax, yaxis='log') self._check_text_labels(ax.yaxis.get_label(), 'Density') @@ -583,29 +621,34 @@ def test_kde_missing_vals(self): @slow def test_hist_kwargs(self): - ax = self.ts.plot.hist(bins=5) + _, ax = self.plt.subplots() + ax = self.ts.plot.hist(bins=5, ax=ax) assert len(ax.patches) == 5 self._check_text_labels(ax.yaxis.get_label(), 'Frequency') tm.close() if self.mpl_ge_1_3_1: - ax = self.ts.plot.hist(orientation='horizontal') + _, ax = self.plt.subplots() + ax = self.ts.plot.hist(orientation='horizontal', ax=ax) self._check_text_labels(ax.xaxis.get_label(), 'Frequency') tm.close() - ax = self.ts.plot.hist(align='left', stacked=True) + _, ax = self.plt.subplots() + ax = self.ts.plot.hist(align='left', stacked=True, ax=ax) tm.close() @slow def test_hist_kde_color(self): - ax = self.ts.plot.hist(logy=True, bins=10, color='b') + _, ax = self.plt.subplots() + ax = self.ts.plot.hist(logy=True, bins=10, color='b', ax=ax) self._check_ax_scales(ax, yaxis='log') assert len(ax.patches) == 10 self._check_colors(ax.patches, facecolors=['b'] * 10) tm._skip_if_no_scipy() _skip_if_no_scipy_gaussian_kde() - ax = self.ts.plot.kde(logy=True, color='r') + _, ax = self.plt.subplots() + ax = self.ts.plot.kde(logy=True, color='r', ax=ax) self._check_ax_scales(ax, yaxis='log') lines = ax.get_lines() assert len(lines) == 1 @@ -613,7 +656,8 @@ def test_hist_kde_color(self): @slow def test_boxplot_series(self): - ax = self.ts.plot.box(logy=True) + _, ax = self.plt.subplots() + ax = self.ts.plot.box(logy=True, ax=ax) self._check_ax_scales(ax, yaxis='log') xlabels = ax.get_xticklabels() self._check_text_labels(xlabels, [self.ts.name]) @@ -625,20 +669,22 @@ def test_kind_both_ways(self): s = Series(range(3)) kinds = (plotting._core._common_kinds + plotting._core._series_kinds) + _, ax = self.plt.subplots() for kind in kinds: if not _ok_for_gaussian_kde(kind): continue - s.plot(kind=kind) + s.plot(kind=kind, ax=ax) getattr(s.plot, kind)() @slow def test_invalid_plot_data(self): s = Series(list('abcd')) + _, ax = self.plt.subplots() for kind in plotting._core._common_kinds: if not _ok_for_gaussian_kde(kind): continue with pytest.raises(TypeError): - s.plot(kind=kind) + s.plot(kind=kind, ax=ax) @slow def test_valid_object_plot(self): @@ -650,11 +696,12 @@ def test_valid_object_plot(self): def test_partially_invalid_plot_data(self): s = Series(['a', 'b', 1.0, 2]) + _, ax = self.plt.subplots() for kind in plotting._core._common_kinds: if not _ok_for_gaussian_kde(kind): continue with pytest.raises(TypeError): - s.plot(kind=kind) + s.plot(kind=kind, ax=ax) def test_invalid_kind(self): s = Series([1, 2]) @@ -776,13 +823,15 @@ def test_standard_colors_all(self): def test_series_plot_color_kwargs(self): # GH1890 - ax = Series(np.arange(12) + 1).plot(color='green') + _, ax = self.plt.subplots() + ax = Series(np.arange(12) + 1).plot(color='green', ax=ax) self._check_colors(ax.get_lines(), linecolors=['green']) def test_time_series_plot_color_kwargs(self): # #1890 + _, ax = self.plt.subplots() ax = Series(np.arange(12) + 1, index=date_range( - '1/1/2000', periods=12)).plot(color='green') + '1/1/2000', periods=12)).plot(color='green', ax=ax) self._check_colors(ax.get_lines(), linecolors=['green']) def test_time_series_plot_color_with_empty_kwargs(self): @@ -797,14 +846,16 @@ def test_time_series_plot_color_with_empty_kwargs(self): ncolors = 3 + _, ax = self.plt.subplots() for i in range(ncolors): - ax = s.plot() + ax = s.plot(ax=ax) self._check_colors(ax.get_lines(), linecolors=def_colors[:ncolors]) def test_xticklabels(self): # GH11529 s = Series(np.arange(10), index=['P%02d' % i for i in range(10)]) - ax = s.plot(xticks=[0, 3, 5, 9]) + _, ax = self.plt.subplots() + ax = s.plot(xticks=[0, 3, 5, 9], ax=ax) exp = ['P%02d' % i for i in [0, 3, 5, 9]] self._check_text_labels(ax.get_xticklabels(), exp) From fbdae2dffb59f437cbb8ef10b8e49a1c224307dc Mon Sep 17 00:00:00 2001 From: Vincent La Date: Wed, 31 May 2017 01:54:26 -0700 Subject: [PATCH 625/933] DOC: Update to docstring of DataFrame(dtype) (#14764) (#16487) * Adding some more documentation on dataframe with regards to dtype * Making example for creating dataframe from np matrix easier --- pandas/core/frame.py | 42 ++++++++++++++++++++++++++++++++++++------ 1 file changed, 36 insertions(+), 6 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 743d623ee5e44..907959c42323e 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -241,17 +241,47 @@ class DataFrame(NDFrame): Column labels to use for resulting frame. Will default to np.arange(n) if no column labels are provided dtype : dtype, default None - Data type to force, otherwise infer + Data type to force. Only a single dtype is allowed. If None, infer copy : boolean, default False Copy data from inputs. Only affects DataFrame / 2d ndarray input Examples -------- - >>> d = {'col1': ts1, 'col2': ts2} - >>> df = DataFrame(data=d, index=index) - >>> df2 = DataFrame(np.random.randn(10, 5)) - >>> df3 = DataFrame(np.random.randn(10, 5), - ... columns=['a', 'b', 'c', 'd', 'e']) + Constructing DataFrame from a dictionary. + + >>> d = {'col1': [1, 2], 'col2': [3, 4]} + >>> df = pd.DataFrame(data=d) + >>> df + col1 col2 + 0 1 3 + 1 2 4 + + Notice that the inferred dtype is int64. + + >>> df.dtypes + col1 int64 + col2 int64 + dtype: object + + To enforce a single dtype: + + >>> df = pd.DataFrame(data=d, dtype=np.int8) + >>> df.dtypes + col1 int8 + col2 int8 + dtype: object + + Constructing DataFrame from numpy ndarray: + + >>> df2 = pd.DataFrame(np.random.randint(low=0, high=10, size=(5, 5)), + ... columns=['a', 'b', 'c', 'd', 'e']) + >>> df2 + a b c d e + 0 2 8 8 3 4 + 1 4 2 9 0 9 + 2 1 0 7 8 0 + 3 5 1 7 1 3 + 4 6 0 2 4 2 See also -------- From d4f80b0035fcd67c552ca384000ea92c438cc0ad Mon Sep 17 00:00:00 2001 From: Kassandra Keeton Date: Wed, 31 May 2017 04:37:49 -0500 Subject: [PATCH 626/933] DOC: correct docstring examples (#3439) (#16432) --- ci/build_docs.sh | 9 ++++ pandas/core/reshape/concat.py | 2 + pandas/core/reshape/pivot.py | 72 +++++++++++++++++------------- pandas/core/reshape/reshape.py | 81 ++++++++++++++++++---------------- pandas/core/reshape/tile.py | 24 +++++----- 5 files changed, 108 insertions(+), 80 deletions(-) diff --git a/ci/build_docs.sh b/ci/build_docs.sh index 26917b8f9b792..a038304fe0f7a 100755 --- a/ci/build_docs.sh +++ b/ci/build_docs.sh @@ -59,6 +59,15 @@ if [ "$DOC" ]; then git remote -v git push origin gh-pages -f + + echo "Running doctests" + cd "$TRAVIS_BUILD_DIR" + pytest --doctest-modules \ + pandas/core/reshape/concat.py \ + pandas/core/reshape/pivot.py \ + pandas/core/reshape/reshape.py \ + pandas/core/reshape/tile.py + fi exit 0 diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index af2eb734a02f6..96603b6adc3b0 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -197,6 +197,8 @@ def concat(objs, axis=0, join='outer', join_axes=None, ignore_index=False, 0 a 2 >>> pd.concat([df5, df6], verify_integrity=True) + Traceback (most recent call last): + ... ValueError: Indexes have overlapping values: ['a'] """ op = _Concatenator(objs, axis=axis, join_axes=join_axes, diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index b562f8a32f5c9..0581ec7484c49 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -50,26 +50,36 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean', Examples -------- + >>> df = pd.DataFrame({"A": ["foo", "foo", "foo", "foo", "foo", + ... "bar", "bar", "bar", "bar"], + ... "B": ["one", "one", "one", "two", "two", + ... "one", "one", "two", "two"], + ... "C": ["small", "large", "large", "small", + ... "small", "large", "small", "small", + ... "large"], + ... "D": [1, 2, 2, 3, 3, 4, 5, 6, 7]}) >>> df - A B C D - 0 foo one small 1 - 1 foo one large 2 - 2 foo one large 2 - 3 foo two small 3 - 4 foo two small 3 - 5 bar one large 4 - 6 bar one small 5 - 7 bar two small 6 - 8 bar two large 7 + A B C D + 0 foo one small 1 + 1 foo one large 2 + 2 foo one large 2 + 3 foo two small 3 + 4 foo two small 3 + 5 bar one large 4 + 6 bar one small 5 + 7 bar two small 6 + 8 bar two large 7 >>> table = pivot_table(df, values='D', index=['A', 'B'], ... columns=['C'], aggfunc=np.sum) >>> table - small large - foo one 1 4 - two 6 NaN - bar one 5 4 - two 6 7 + ... # doctest: +NORMALIZE_WHITESPACE + C large small + A B + bar one 4.0 5.0 + two 7.0 6.0 + foo one 4.0 1.0 + two NaN 6.0 Returns ------- @@ -445,27 +455,27 @@ def crosstab(index, columns, values=None, rownames=None, colnames=None, Examples -------- - >>> a - array([foo, foo, foo, foo, bar, bar, - bar, bar, foo, foo, foo], dtype=object) - >>> b - array([one, one, one, two, one, one, - one, two, two, two, one], dtype=object) - >>> c - array([dull, dull, shiny, dull, dull, shiny, - shiny, dull, shiny, shiny, shiny], dtype=object) - - >>> crosstab(a, [b, c], rownames=['a'], colnames=['b', 'c']) - b one two - c dull shiny dull shiny + >>> a = np.array(["foo", "foo", "foo", "foo", "bar", "bar", + ... "bar", "bar", "foo", "foo", "foo"], dtype=object) + >>> b = np.array(["one", "one", "one", "two", "one", "one", + ... "one", "two", "two", "two", "one"], dtype=object) + >>> c = np.array(["dull", "dull", "shiny", "dull", "dull", "shiny", + ... "shiny", "dull", "shiny", "shiny", "shiny"], + ... dtype=object) + + >>> pd.crosstab(a, [b, c], rownames=['a'], colnames=['b', 'c']) + ... # doctest: +NORMALIZE_WHITESPACE + b one two + c dull shiny dull shiny a - bar 1 2 1 0 - foo 2 2 1 2 + bar 1 2 1 0 + foo 2 2 1 2 >>> foo = pd.Categorical(['a', 'b'], categories=['a', 'b', 'c']) >>> bar = pd.Categorical(['d', 'e'], categories=['d', 'e', 'f']) >>> crosstab(foo, bar) # 'c' and 'f' are not represented in the data, - # but they still will be counted in the output + ... # but they still will be counted in the output + ... # doctest: +SKIP col_0 d e f row_0 a 1 0 0 diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index f944dfe22361a..dcb83d225699d 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -48,23 +48,23 @@ class _Unstacker(object): >>> import pandas as pd >>> index = pd.MultiIndex.from_tuples([('one', 'a'), ('one', 'b'), ... ('two', 'a'), ('two', 'b')]) - >>> s = pd.Series(np.arange(1.0, 5.0), index=index) + >>> s = pd.Series(np.arange(1, 5, dtype=np.int64), index=index) >>> s - one a 1 - b 2 - two a 3 - b 4 - dtype: float64 + one a 1 + b 2 + two a 3 + b 4 + dtype: int64 >>> s.unstack(level=-1) - a b + a b one 1 2 two 3 4 >>> s.unstack(level=0) one two - a 1 2 - b 3 4 + a 1 3 + b 2 4 Returns ------- @@ -789,18 +789,18 @@ def lreshape(data, groups, dropna=True, label=None): >>> import pandas as pd >>> data = pd.DataFrame({'hr1': [514, 573], 'hr2': [545, 526], ... 'team': ['Red Sox', 'Yankees'], - ... 'year1': [2007, 2008], 'year2': [2008, 2008]}) + ... 'year1': [2007, 2007], 'year2': [2008, 2008]}) >>> data hr1 hr2 team year1 year2 0 514 545 Red Sox 2007 2008 1 573 526 Yankees 2007 2008 >>> pd.lreshape(data, {'year': ['year1', 'year2'], 'hr': ['hr1', 'hr2']}) - team hr year - 0 Red Sox 514 2007 - 1 Yankees 573 2007 - 2 Red Sox 545 2008 - 3 Yankees 526 2008 + team year hr + 0 Red Sox 2007 514 + 1 Yankees 2007 573 + 2 Red Sox 2008 545 + 3 Yankees 2008 526 Returns ------- @@ -905,11 +905,12 @@ def wide_to_long(df, stubnames, i, j, sep="", suffix='\d+'): ... }) >>> df["id"] = df.index >>> df - A1970 A1980 B1970 B1980 X id + A1970 A1980 B1970 B1980 X id 0 a d 2.5 3.2 -1.085631 0 1 b e 1.2 1.3 0.997345 1 2 c f 0.7 0.1 0.282978 2 >>> pd.wide_to_long(df, ["A", "B"], i="id", j="year") + ... # doctest: +NORMALIZE_WHITESPACE X A B id year 0 1970 -1.085631 a 2.5 @@ -940,6 +941,7 @@ def wide_to_long(df, stubnames, i, j, sep="", suffix='\d+'): 8 3 3 2.1 2.9 >>> l = pd.wide_to_long(df, stubnames='ht', i=['famid', 'birth'], j='age') >>> l + ... # doctest: +NORMALIZE_WHITESPACE ht famid birth age 1 1 1 2.8 @@ -979,41 +981,44 @@ def wide_to_long(df, stubnames, i, j, sep="", suffix='\d+'): Less wieldy column names are also handled + >>> np.random.seed(0) >>> df = pd.DataFrame({'A(quarterly)-2010': np.random.rand(3), ... 'A(quarterly)-2011': np.random.rand(3), ... 'B(quarterly)-2010': np.random.rand(3), ... 'B(quarterly)-2011': np.random.rand(3), ... 'X' : np.random.randint(3, size=3)}) >>> df['id'] = df.index - >>> df - A(quarterly)-2010 A(quarterly)-2011 B(quarterly)-2010 B(quarterly)-2011 - 0 0.531828 0.724455 0.322959 0.293714 - 1 0.634401 0.611024 0.361789 0.630976 - 2 0.849432 0.722443 0.228263 0.092105 - \ + >>> df # doctest: +NORMALIZE_WHITESPACE, +ELLIPSIS + A(quarterly)-2010 A(quarterly)-2011 B(quarterly)-2010 ... + 0 0.548814 0.544883 0.437587 ... + 1 0.715189 0.423655 0.891773 ... + 2 0.602763 0.645894 0.963663 ... X id 0 0 0 1 1 1 - 2 2 2 - >>> pd.wide_to_long(df, ['A(quarterly)', 'B(quarterly)'], - i='id', j='year', sep='-') - X A(quarterly) B(quarterly) + 2 1 2 + + >>> pd.wide_to_long(df, ['A(quarterly)', 'B(quarterly)'], i='id', + ... j='year', sep='-') + ... # doctest: +NORMALIZE_WHITESPACE + X A(quarterly) B(quarterly) id year - 0 2010 0 0.531828 0.322959 - 1 2010 2 0.634401 0.361789 - 2 2010 2 0.849432 0.228263 - 0 2011 0 0.724455 0.293714 - 1 2011 2 0.611024 0.630976 - 2 2011 2 0.722443 0.092105 + 0 2010 0 0.548814 0.437587 + 1 2010 1 0.715189 0.891773 + 2 2010 1 0.602763 0.963663 + 0 2011 0 0.544883 0.383442 + 1 2011 1 0.423655 0.791725 + 2 2011 1 0.645894 0.528895 If we have many columns, we could also use a regex to find our stubnames and pass that list on to wide_to_long - >>> stubnames = set([match[0] for match in - df.columns.str.findall('[A-B]\(.*\)').values - if match != [] ]) + >>> stubnames = sorted( + ... set([match[0] for match in df.columns.str.findall( + ... r'[A-B]\(.*\)').values if match != [] ]) + ... ) >>> list(stubnames) - ['B(quarterly)', 'A(quarterly)'] + ['A(quarterly)', 'B(quarterly)'] Notes ----- @@ -1133,7 +1138,7 @@ def get_dummies(data, prefix=None, prefix_sep='_', dummy_na=False, 2 0 0 1 >>> df = pd.DataFrame({'A': ['a', 'b', 'a'], 'B': ['b', 'a', 'c'], - 'C': [1, 2, 3]}) + ... 'C': [1, 2, 3]}) >>> pd.get_dummies(df, prefix=['col1', 'col2']) C col1_a col1_b col2_a col2_b col2_c @@ -1149,7 +1154,7 @@ def get_dummies(data, prefix=None, prefix_sep='_', dummy_na=False, 3 1 0 0 4 1 0 0 - >>> pd.get_dummies(pd.Series(list('abcaa')), drop_first=True)) + >>> pd.get_dummies(pd.Series(list('abcaa')), drop_first=True) b c 0 0 0 1 1 0 diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py index 746742f47f2aa..866f229bec418 100644 --- a/pandas/core/reshape/tile.py +++ b/pandas/core/reshape/tile.py @@ -75,18 +75,18 @@ def cut(x, bins, right=True, labels=None, retbins=False, precision=3, Examples -------- >>> pd.cut(np.array([.2, 1.4, 2.5, 6.2, 9.7, 2.1]), 3, retbins=True) - ([(0.191, 3.367], (0.191, 3.367], (0.191, 3.367], (3.367, 6.533], - (6.533, 9.7], (0.191, 3.367]] - Categories (3, object): [(0.191, 3.367] < (3.367, 6.533] < (6.533, 9.7]], - array([ 0.1905 , 3.36666667, 6.53333333, 9.7 ])) + ... # doctest: +ELLIPSIS + ([(0.19, 3.367], (0.19, 3.367], (0.19, 3.367], (3.367, 6.533], ... + Categories (3, interval[float64]): [(0.19, 3.367] < (3.367, 6.533] ... - >>> pd.cut(np.array([.2, 1.4, 2.5, 6.2, 9.7, 2.1]), 3, - labels=["good","medium","bad"]) + >>> pd.cut(np.array([.2, 1.4, 2.5, 6.2, 9.7, 2.1]), + ... 3, labels=["good", "medium", "bad"]) + ... # doctest: +SKIP [good, good, good, medium, bad, good] Categories (3, object): [good < medium < bad] >>> pd.cut(np.ones(5), 4, labels=False) - array([1, 1, 1, 1, 1], dtype=int64) + array([1, 1, 1, 1, 1]) """ # NOTE: this binning code is changed a bit from histogram for var(x) == 0 @@ -182,15 +182,17 @@ def qcut(x, q, labels=None, retbins=False, precision=3, duplicates='raise'): Examples -------- >>> pd.qcut(range(5), 4) - [[0, 1], [0, 1], (1, 2], (2, 3], (3, 4]] - Categories (4, object): [[0, 1] < (1, 2] < (2, 3] < (3, 4]] + ... # doctest: +ELLIPSIS + [(-0.001, 1.0], (-0.001, 1.0], (1.0, 2.0], (2.0, 3.0], (3.0, 4.0]] + Categories (4, interval[float64]): [(-0.001, 1.0] < (1.0, 2.0] ... - >>> pd.qcut(range(5), 3, labels=["good","medium","bad"]) + >>> pd.qcut(range(5), 3, labels=["good", "medium", "bad"]) + ... # doctest: +SKIP [good, good, medium, bad, bad] Categories (3, object): [good < medium < bad] >>> pd.qcut(range(5), 4, labels=False) - array([0, 0, 1, 2, 3], dtype=int64) + array([0, 0, 1, 2, 3]) """ x_is_series, series_index, name, x = _preprocess_for_cut(x) From 9b0ea41e91a3db3bfb9cce746b7c381483be3555 Mon Sep 17 00:00:00 2001 From: Jeff Tratner Date: Wed, 31 May 2017 03:39:46 -0700 Subject: [PATCH 627/933] Fix unbound local with bad engine (#16511) --- doc/source/whatsnew/v0.20.2.txt | 3 +++ pandas/io/parsers.py | 4 ++++ pandas/tests/io/test_common.py | 7 +++++++ 3 files changed, 14 insertions(+) diff --git a/doc/source/whatsnew/v0.20.2.txt b/doc/source/whatsnew/v0.20.2.txt index 90146aa176b31..1517327ab7133 100644 --- a/doc/source/whatsnew/v0.20.2.txt +++ b/doc/source/whatsnew/v0.20.2.txt @@ -41,6 +41,9 @@ Bug Fixes detecting the terminal size. This fix only applies to python 3 (:issue:`16496`) - Bug in using ``pathlib.Path`` or ``py.path.local`` objects with io functions (:issue:`16291`) - Bug in ``DataFrame.update()`` with ``overwrite=False`` and ``NaN values`` (:issue:`15593`) +- Passing an invalid engine to :func:`read_csv` now raises an informative + ``ValueError`` rather than ``UnboundLocalError``. (:issue:`16511`) + diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index e287d92f67ef6..12b606d969c7d 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -969,6 +969,10 @@ def _make_engine(self, engine='c'): klass = PythonParser elif engine == 'python-fwf': klass = FixedWidthFieldParser + else: + raise ValueError('Unknown engine: {engine} (valid options are' + ' "c", "python", or' ' "python-fwf")'.format( + engine=engine)) self._engine = klass(self.f, **self.options) def _failover_to_python(self): diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py index b7d158dd75960..289f86eb2dc53 100644 --- a/pandas/tests/io/test_common.py +++ b/pandas/tests/io/test_common.py @@ -223,3 +223,10 @@ def test_next(self): assert next_line.strip() == line.strip() pytest.raises(StopIteration, next, wrapper) + + def test_unknown_engine(self): + with tm.ensure_clean() as path: + df = tm.makeDataFrame() + df.to_csv(path) + with tm.assert_raises_regex(ValueError, 'Unknown engine'): + read_csv(path, engine='pyt') From d31ffdb7f2899ca5f242c2f6c8fea843d6212a4d Mon Sep 17 00:00:00 2001 From: Christian Stade-Schuldt Date: Wed, 31 May 2017 13:56:52 +0200 Subject: [PATCH 628/933] return empty MultiIndex for symmetrical difference on equal MultiIndexes (#16486) --- doc/source/whatsnew/v0.20.2.txt | 1 + pandas/core/indexes/multi.py | 6 ++++++ pandas/tests/indexes/test_base.py | 2 -- pandas/tests/indexing/test_multiindex.py | 11 +++++++++++ 4 files changed, 18 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.20.2.txt b/doc/source/whatsnew/v0.20.2.txt index 1517327ab7133..38cf683208b3d 100644 --- a/doc/source/whatsnew/v0.20.2.txt +++ b/doc/source/whatsnew/v0.20.2.txt @@ -40,6 +40,7 @@ Bug Fixes - Silenced a warning on some Windows environments about "tput: terminal attributes: No such device or address" when detecting the terminal size. This fix only applies to python 3 (:issue:`16496`) - Bug in using ``pathlib.Path`` or ``py.path.local`` objects with io functions (:issue:`16291`) +- Bug in ``Index.symmetric_difference()`` on two equal MultiIndex's, results in a TypeError (:issue `13490`) - Bug in ``DataFrame.update()`` with ``overwrite=False`` and ``NaN values`` (:issue:`15593`) - Passing an invalid engine to :func:`read_csv` now raises an informative ``ValueError`` rather than ``UnboundLocalError``. (:issue:`16511`) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 569e16f2141ae..981a6a696a618 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -414,6 +414,12 @@ def view(self, cls=None): return result def _shallow_copy_with_infer(self, values=None, **kwargs): + # On equal MultiIndexes the difference is empty. + # Therefore, an empty MultiIndex is returned GH13490 + if len(values) == 0: + return MultiIndex(levels=[[] for _ in range(self.nlevels)], + labels=[[] for _ in range(self.nlevels)], + **kwargs) return self._shallow_copy(values, **kwargs) @Appender(_index_shared_docs['_shallow_copy']) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 6a2087b37631e..02561cba784b8 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -188,7 +188,6 @@ def test_constructor_ndarray_like(self): # it should be possible to convert any object that satisfies the numpy # ndarray interface directly into an Index class ArrayLike(object): - def __init__(self, array): self.array = array @@ -246,7 +245,6 @@ def test_index_ctor_infer_nan_nat(self): [np.timedelta64('nat'), np.nan], [pd.NaT, np.timedelta64('nat')], [np.timedelta64('nat'), pd.NaT]]: - tm.assert_index_equal(Index(data), exp) tm.assert_index_equal(Index(np.array(data, dtype=object)), exp) diff --git a/pandas/tests/indexing/test_multiindex.py b/pandas/tests/indexing/test_multiindex.py index 483c39ed8694e..fc6c627075c96 100644 --- a/pandas/tests/indexing/test_multiindex.py +++ b/pandas/tests/indexing/test_multiindex.py @@ -697,6 +697,17 @@ def test_multiindex_slice_first_level(self): index=range(30, 71)) tm.assert_frame_equal(result, expected) + def test_multiindex_symmetric_difference(self): + # GH 13490 + idx = MultiIndex.from_product([['a', 'b'], ['A', 'B']], + names=['a', 'b']) + result = idx ^ idx + assert result.names == idx.names + + idx2 = idx.copy().rename(['A', 'B']) + result = idx ^ idx2 + assert result.names == [None, None] + class TestMultiIndexSlicers(object): From 03d44f3dd0ffd55d7538b67466cf4d3899ceac27 Mon Sep 17 00:00:00 2001 From: JosephWagner Date: Wed, 31 May 2017 04:57:57 -0700 Subject: [PATCH 629/933] BUG: select_as_multiple doesn't respect start/stop kwargs GH16209 (#16317) --- doc/source/whatsnew/v0.20.2.txt | 1 + pandas/io/pytables.py | 7 ++++--- pandas/tests/io/test_pytables.py | 15 +++++++++++++++ 3 files changed, 20 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v0.20.2.txt b/doc/source/whatsnew/v0.20.2.txt index 38cf683208b3d..676da5c370041 100644 --- a/doc/source/whatsnew/v0.20.2.txt +++ b/doc/source/whatsnew/v0.20.2.txt @@ -71,6 +71,7 @@ I/O - Bug that would force importing of the clipboard routines unnecessarily, potentially causing an import error on startup (:issue:`16288`) - Bug that raised IndexError HTML-rendering an empty DataFrame (:issue:`15953`) +- Bug in ``HDFStore.select_as_multiple()`` where start/stop arguments were not respected (:issue:`16209`) Plotting ^^^^^^^^ diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 6665ccf8ce4c5..b838260d1f73c 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -831,8 +831,8 @@ def func(_start, _stop, _where): # retrieve the objs, _where is always passed as a set of # coordinates here - objs = [t.read(where=_where, columns=columns, **kwargs) - for t in tbls] + objs = [t.read(where=_where, columns=columns, start=_start, + stop=_stop, **kwargs) for t in tbls] # concat and return return concat(objs, axis=axis, @@ -1425,7 +1425,8 @@ def get_result(self, coordinates=False): # if specified read via coordinates (necessary for multiple selections if coordinates: - where = self.s.read_coordinates(where=self.where) + where = self.s.read_coordinates(where=self.where, start=self.start, + stop=self.stop) else: where = self.where diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index 17f524cc279c0..ae14f74ece31c 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -4219,6 +4219,21 @@ def test_start_stop_table(self): expected = df.loc[30:40, ['A']] tm.assert_frame_equal(result, expected) + def test_start_stop_multiple(self): + + # GH 16209 + with ensure_clean_store(self.path) as store: + + df = DataFrame({"foo": [1, 2], "bar": [1, 2]}) + + store.append_to_multiple({'selector': ['foo'], 'data': None}, df, + selector='selector') + result = store.select_as_multiple(['selector', 'data'], + selector='selector', start=0, + stop=1) + expected = df.loc[[0], ['foo', 'bar']] + tm.assert_frame_equal(result, expected) + def test_start_stop_fixed(self): with ensure_clean_store(self.path) as store: From e437ad594048cc28873df13ccf50cd39a4e88dcb Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 31 May 2017 18:44:40 -0400 Subject: [PATCH 630/933] BUG: Bug in .resample() and .groupby() when aggregating on integers (#16549) closes #16361 --- doc/source/whatsnew/v0.20.2.txt | 1 + pandas/core/groupby.py | 10 ++++++---- pandas/tests/test_resample.py | 22 ++++++++++++++++++++++ 3 files changed, 29 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v0.20.2.txt b/doc/source/whatsnew/v0.20.2.txt index 676da5c370041..9f88d629880ed 100644 --- a/doc/source/whatsnew/v0.20.2.txt +++ b/doc/source/whatsnew/v0.20.2.txt @@ -88,6 +88,7 @@ Groupby/Resample/Rolling - Bug creating datetime rolling window on an empty DataFrame (:issue:`15819`) - Bug in ``rolling.cov()`` with offset window (:issue:`16058`) +- Bug in ``.resample()`` and ``.groupby()`` when aggregating on integers (:issue:`16361`) Sparse diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 91b55c414b507..286677d613484 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -3337,13 +3337,15 @@ def _cython_agg_blocks(self, how, alt=None, numeric_only=True): obj = self.obj[data.items[locs]] s = groupby(obj, self.grouper) result = s.aggregate(lambda x: alt(x, axis=self.axis)) - result = result._data.blocks[0] + newb = result._data.blocks[0] - # see if we can cast the block back to the original dtype - result = block._try_coerce_and_cast_result(result) + finally: + + # see if we can cast the block back to the original dtype + result = block._try_coerce_and_cast_result(result) + newb = block.make_block(result) new_items.append(locs) - newb = block.make_block_same_class(result) new_blocks.append(newb) if len(new_blocks) == 0: diff --git a/pandas/tests/test_resample.py b/pandas/tests/test_resample.py index 170cab4947a5a..959e3d2f459ce 100644 --- a/pandas/tests/test_resample.py +++ b/pandas/tests/test_resample.py @@ -1672,6 +1672,28 @@ def test_resample_dtype_preservation(self): result = df.groupby('group').resample('1D').ffill() assert result.val.dtype == np.int32 + def test_resample_dtype_coerceion(self): + + pytest.importorskip('scipy') + + # GH 16361 + df = {"a": [1, 3, 1, 4]} + df = pd.DataFrame( + df, index=pd.date_range("2017-01-01", "2017-01-04")) + + expected = (df.astype("float64") + .resample("H") + .mean() + ["a"] + .interpolate("cubic") + ) + + result = df.resample("H")["a"].mean().interpolate("cubic") + tm.assert_series_equal(result, expected) + + result = df.resample("H").mean()["a"].interpolate("cubic") + tm.assert_series_equal(result, expected) + def test_weekly_resample_buglet(self): # #1327 rng = date_range('1/1/2000', freq='B', periods=20) From 58f44540e60e5203ecd45e12064ad672cb7c49eb Mon Sep 17 00:00:00 2001 From: Matti Picus Date: Thu, 1 Jun 2017 02:04:55 +0300 Subject: [PATCH 631/933] COMPAT: cython str-to-int can raise a ValueError on non-CPython (#16563) --- pandas/_libs/index.pyx | 4 ++-- pandas/core/frame.py | 2 +- pandas/core/indexes/base.py | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx index 21680fb0b3921..5e92c506b5d0c 100644 --- a/pandas/_libs/index.pyx +++ b/pandas/_libs/index.pyx @@ -152,7 +152,7 @@ cdef class IndexEngine: try: return self.mapping.get_item(val) - except TypeError: + except (TypeError, ValueError): raise KeyError(val) cdef inline _get_loc_duplicates(self, object val): @@ -470,7 +470,7 @@ cdef class DatetimeEngine(Int64Engine): try: val = _to_i8(val) return self.mapping.get_item(val) - except TypeError: + except (TypeError, ValueError): self._date_check_type(val) raise KeyError(val) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 907959c42323e..25c3c3fe4e48e 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1918,7 +1918,7 @@ def get_value(self, index, col, takeable=False): try: return engine.get_value(series._values, index) - except TypeError: + except (TypeError, ValueError): # we cannot handle direct indexing # use positional diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 2af4f112ca941..e1e08e008f782 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1590,7 +1590,7 @@ def __contains__(self, key): hash(key) try: return key in self._engine - except TypeError: + except (TypeError, ValueError): return False _index_shared_docs['contains'] = """ @@ -1610,7 +1610,7 @@ def contains(self, key): hash(key) try: return key in self._engine - except TypeError: + except (TypeError, ValueError): return False def __hash__(self): From ee8346d3a232bb9b9d35612b4c4d41eb80ab18c9 Mon Sep 17 00:00:00 2001 From: Giulio Pepe Date: Thu, 1 Jun 2017 00:10:24 +0100 Subject: [PATCH 632/933] CLN: raise correct error for Panel sort_values (#16532) --- pandas/core/generic.py | 9 +++++++-- pandas/tests/test_panel.py | 5 +++++ pandas/tests/test_panel4d.py | 5 +++++ 3 files changed, 17 insertions(+), 2 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index e541f1532d0a0..98999ec267c82 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2366,9 +2366,14 @@ def add_suffix(self, suffix): 1 A 1 1 """ - def sort_values(self, by, axis=0, ascending=True, inplace=False, + def sort_values(self, by=None, axis=0, ascending=True, inplace=False, kind='quicksort', na_position='last'): - raise AbstractMethodError(self) + """ + NOT IMPLEMENTED: do not call this method, as sorting values is not + supported for Panel objects and will raise an error. + """ + raise NotImplementedError("sort_values has not been implemented " + "on Panel or Panel4D objects.") _shared_docs['sort_index'] = """ Sort object by labels (along an axis) diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index 3243b69a25acd..e19e42e062932 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -2429,6 +2429,11 @@ def test_all_any_unhandled(self): pytest.raises(NotImplementedError, self.panel.all, bool_only=True) pytest.raises(NotImplementedError, self.panel.any, bool_only=True) + # GH issue 15960 + def test_sort_values(self): + pytest.raises(NotImplementedError, self.panel.sort_values) + pytest.raises(NotImplementedError, self.panel.sort_values, 'ItemA') + class TestLongPanel(object): """ diff --git a/pandas/tests/test_panel4d.py b/pandas/tests/test_panel4d.py index 96f02d63712fc..e1995316e7b7c 100644 --- a/pandas/tests/test_panel4d.py +++ b/pandas/tests/test_panel4d.py @@ -939,3 +939,8 @@ def test_rename(self): def test_get_attr(self): tm.assert_panel_equal(self.panel4d['l1'], self.panel4d.l1) + + # GH issue 15960 + def test_sort_values(self): + pytest.raises(NotImplementedError, self.panel4d.sort_values) + pytest.raises(NotImplementedError, self.panel4d.sort_values, 'ItemA') From 9d7afa730f99770e905907b8128b99e97aa5fff5 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 1 Jun 2017 05:30:57 -0500 Subject: [PATCH 633/933] BUG: Fixed pd.unique on array of tuples (#16543) --- doc/source/whatsnew/v0.20.2.txt | 3 +-- pandas/core/algorithms.py | 7 ++++++- pandas/tests/test_algos.py | 16 ++++++++++++++++ 3 files changed, 23 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v0.20.2.txt b/doc/source/whatsnew/v0.20.2.txt index 9f88d629880ed..31df5899f0fc3 100644 --- a/doc/source/whatsnew/v0.20.2.txt +++ b/doc/source/whatsnew/v0.20.2.txt @@ -44,8 +44,7 @@ Bug Fixes - Bug in ``DataFrame.update()`` with ``overwrite=False`` and ``NaN values`` (:issue:`15593`) - Passing an invalid engine to :func:`read_csv` now raises an informative ``ValueError`` rather than ``UnboundLocalError``. (:issue:`16511`) - - +- Bug in :func:`unique` on an array of tuples (:issue:`16519`) - Fixed a compatibility issue with IPython 6.0's tab completion showing deprecation warnings on Categoricals (:issue:`16409`) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 77d79c9585e57..d74c5e66ea1a9 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -163,7 +163,7 @@ def _ensure_arraylike(values): ABCIndexClass, ABCSeries)): inferred = lib.infer_dtype(values) if inferred in ['mixed', 'string', 'unicode']: - values = np.asarray(values, dtype=object) + values = lib.list_to_object_array(values) else: values = np.asarray(values) return values @@ -328,6 +328,11 @@ def unique(values): [b, a, c] Categories (3, object): [a < b < c] + An array of tuples + + >>> pd.unique([('a', 'b'), ('b', 'a'), ('a', 'c'), ('b', 'a')]) + array([('a', 'b'), ('b', 'a'), ('a', 'c')], dtype=object) + See Also -------- pandas.Index.unique diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index 351e646cbb0b2..063dcea5c76d6 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -929,6 +929,22 @@ def test_unique_index(self): tm.assert_numpy_array_equal(case.duplicated(), np.array([False, False, False])) + @pytest.mark.parametrize('arr, unique', [ + ([(0, 0), (0, 1), (1, 0), (1, 1), (0, 0), (0, 1), (1, 0), (1, 1)], + [(0, 0), (0, 1), (1, 0), (1, 1)]), + ([('b', 'c'), ('a', 'b'), ('a', 'b'), ('b', 'c')], + [('b', 'c'), ('a', 'b')]), + ([('a', 1), ('b', 2), ('a', 3), ('a', 1)], + [('a', 1), ('b', 2), ('a', 3)]), + ]) + def test_unique_tuples(self, arr, unique): + # https://github.com/pandas-dev/pandas/issues/16519 + expected = np.empty(len(unique), dtype=object) + expected[:] = unique + + result = pd.unique(arr) + tm.assert_numpy_array_equal(result, expected) + class GroupVarTestMixin(object): From a67c7aa41e676e4cb228b9ba54279fb2b6af77ef Mon Sep 17 00:00:00 2001 From: Patrick O'Melveny Date: Thu, 1 Jun 2017 03:35:18 -0700 Subject: [PATCH 634/933] BUG: Allow non-callable attributes in aggregate function. Fixes GH16405 (#16458) --- doc/source/whatsnew/v0.20.2.txt | 1 + pandas/core/base.py | 12 +++++++-- pandas/tests/frame/test_apply.py | 45 +++++++++++++++++++++++++++++++ pandas/tests/series/test_apply.py | 16 +++++++++++ 4 files changed, 72 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.20.2.txt b/doc/source/whatsnew/v0.20.2.txt index 31df5899f0fc3..c8b6dfa134120 100644 --- a/doc/source/whatsnew/v0.20.2.txt +++ b/doc/source/whatsnew/v0.20.2.txt @@ -102,6 +102,7 @@ Reshaping - Bug in ``pd.wide_to_long()`` where no error was raised when ``i`` was not a unique identifier (:issue:`16382`) - Bug in ``Series.isin(..)`` with a list of tuples (:issue:`16394`) - Bug in construction of a ``DataFrame`` with mixed dtypes including an all-NaT column. (:issue:`16395`) +- Bug in ``DataFrame.agg()`` and ``Series.agg()`` with aggregating on non-callable attributes (:issue:`16405`) Numeric diff --git a/pandas/core/base.py b/pandas/core/base.py index a3ef24c80f883..97c4c8626dcbb 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -378,7 +378,7 @@ def aggregate(self, func, *args, **kwargs): def _try_aggregate_string_function(self, arg, *args, **kwargs): """ if arg is a string, then try to operate on it: - - try to find a function on ourselves + - try to find a function (or attribute) on ourselves - try to find a numpy function - raise @@ -387,7 +387,15 @@ def _try_aggregate_string_function(self, arg, *args, **kwargs): f = getattr(self, arg, None) if f is not None: - return f(*args, **kwargs) + if callable(f): + return f(*args, **kwargs) + + # people may try to aggregate on a non-callable attribute + # but don't let them think they can pass args to it + assert len(args) == 0 + assert len([kwarg for kwarg in kwargs + if kwarg not in ['axis', '_level']]) == 0 + return f f = getattr(np, arg, None) if f is not None: diff --git a/pandas/tests/frame/test_apply.py b/pandas/tests/frame/test_apply.py index aa7c7a7120c1b..a6f39cabb60ed 100644 --- a/pandas/tests/frame/test_apply.py +++ b/pandas/tests/frame/test_apply.py @@ -635,3 +635,48 @@ def test_nuiscance_columns(self): expected = DataFrame([[6, 6., 'foobarbaz']], index=['sum'], columns=['A', 'B', 'C']) assert_frame_equal(result, expected) + + def test_non_callable_aggregates(self): + + # GH 16405 + # 'size' is a property of frame/series + # validate that this is working + df = DataFrame({'A': [None, 2, 3], + 'B': [1.0, np.nan, 3.0], + 'C': ['foo', None, 'bar']}) + + # Function aggregate + result = df.agg({'A': 'count'}) + expected = pd.Series({'A': 2}) + + assert_series_equal(result, expected) + + # Non-function aggregate + result = df.agg({'A': 'size'}) + expected = pd.Series({'A': 3}) + + assert_series_equal(result, expected) + + # Mix function and non-function aggs + result1 = df.agg(['count', 'size']) + result2 = df.agg({'A': ['count', 'size'], + 'B': ['count', 'size'], + 'C': ['count', 'size']}) + expected = pd.DataFrame({'A': {'count': 2, 'size': 3}, + 'B': {'count': 2, 'size': 3}, + 'C': {'count': 2, 'size': 3}}) + + assert_frame_equal(result1, result2, check_like=True) + assert_frame_equal(result2, expected, check_like=True) + + # Just functional string arg is same as calling df.arg() + result = df.agg('count') + expected = df.count() + + assert_series_equal(result, expected) + + # Just a string attribute arg same as calling df.arg + result = df.agg('size') + expected = df.size + + assert result == expected diff --git a/pandas/tests/series/test_apply.py b/pandas/tests/series/test_apply.py index c273d3161fff5..2c5f0d7772cc2 100644 --- a/pandas/tests/series/test_apply.py +++ b/pandas/tests/series/test_apply.py @@ -306,6 +306,22 @@ def test_reduce(self): name=self.series.name) assert_series_equal(result, expected) + def test_non_callable_aggregates(self): + # test agg using non-callable series attributes + s = Series([1, 2, None]) + + # Calling agg w/ just a string arg same as calling s.arg + result = s.agg('size') + expected = s.size + assert result == expected + + # test when mixed w/ callable reducers + result = s.agg(['size', 'count', 'mean']) + expected = Series(OrderedDict({'size': 3.0, + 'count': 2.0, + 'mean': 1.5})) + assert_series_equal(result[expected.index], expected) + class TestSeriesMap(TestData): From cab2b6b1a37f4fe84f121955ecfbf7aa8d126707 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 1 Jun 2017 05:37:00 -0500 Subject: [PATCH 635/933] Strictly monotonic (#16555) --- doc/source/api.rst | 2 + doc/source/whatsnew/v0.20.2.txt | 3 +- pandas/core/indexes/base.py | 50 +++++++++++++++++++ pandas/core/indexes/datetimes.py | 2 +- .../tests/indexes/datetimes/test_datetime.py | 7 +++ pandas/tests/indexes/test_base.py | 6 ++- pandas/tests/indexes/test_multi.py | 26 ++++++++++ pandas/tests/indexes/test_numeric.py | 22 +++++++- pandas/tests/indexes/test_range.py | 10 ++++ 9 files changed, 124 insertions(+), 4 deletions(-) diff --git a/doc/source/api.rst b/doc/source/api.rst index 888bb6d67e94b..e210849d9a0ca 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -1286,6 +1286,8 @@ Attributes Index.is_monotonic Index.is_monotonic_increasing Index.is_monotonic_decreasing + Index.is_strictly_monotonic_increasing + Index.is_strictly_monotonic_decreasing Index.is_unique Index.has_duplicates Index.dtype diff --git a/doc/source/whatsnew/v0.20.2.txt b/doc/source/whatsnew/v0.20.2.txt index c8b6dfa134120..e3328e2d01dc7 100644 --- a/doc/source/whatsnew/v0.20.2.txt +++ b/doc/source/whatsnew/v0.20.2.txt @@ -21,6 +21,7 @@ Enhancements - Unblocked access to additional compression types supported in pytables: 'blosc:blosclz, 'blosc:lz4', 'blosc:lz4hc', 'blosc:snappy', 'blosc:zlib', 'blosc:zstd' (:issue:`14478`) - ``Series`` provides a ``to_latex`` method (:issue:`16180`) +- Added :attr:`Index.is_strictly_monotonic_increasing` and :attr:`Index.is_strictly_monotonic_decreasing` properties (:issue:`16515`) .. _whatsnew_0202.performance: @@ -61,7 +62,7 @@ Indexing ^^^^^^^^ - Bug in ``DataFrame.reset_index(level=)`` with single level index (:issue:`16263`) - +- Bug in partial string indexing with a monotonic, but not strictly-monotonic, index incorrectly reversing the slice bounds (:issue:`16515`) I/O ^^^ diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index e1e08e008f782..e8c2043138edb 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1191,6 +1191,15 @@ def is_monotonic_increasing(self): """ return if the index is monotonic increasing (only equal or increasing) values. + + Examples + -------- + >>> Index([1, 2, 3]).is_monotonic_increasing + True + >>> Index([1, 2, 2]).is_monotonic_increasing + True + >>> Index([1, 3, 2]).is_monotonic_increasing + False """ return self._engine.is_monotonic_increasing @@ -1199,9 +1208,50 @@ def is_monotonic_decreasing(self): """ return if the index is monotonic decreasing (only equal or decreasing) values. + + Examples + -------- + >>> Index([3, 2, 1]).is_monotonic_decreasing + True + >>> Index([3, 2, 2]).is_monotonic_decreasing + True + >>> Index([3, 1, 2]).is_monotonic_decreasing + False """ return self._engine.is_monotonic_decreasing + @property + def is_strictly_monotonic_increasing(self): + """return if the index is strictly monotonic increasing + (only increasing) values + + Examples + -------- + >>> Index([1, 2, 3]).is_strictly_monotonic_increasing + True + >>> Index([1, 2, 2]).is_strictly_monotonic_increasing + False + >>> Index([1, 3, 2]).is_strictly_monotonic_increasing + False + """ + return self.is_unique and self.is_monotonic_increasing + + @property + def is_strictly_monotonic_decreasing(self): + """return if the index is strictly monotonic decreasing + (only decreasing) values + + Examples + -------- + >>> Index([3, 2, 1]).is_strictly_monotonic_decreasing + True + >>> Index([3, 2, 2]).is_strictly_monotonic_decreasing + False + >>> Index([3, 1, 2]).is_strictly_monotonic_decreasing + False + """ + return self.is_unique and self.is_monotonic_decreasing + def is_lexsorted_for_tuple(self, tup): return True diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index ec678b1577d81..60560374cd420 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -1472,7 +1472,7 @@ def _maybe_cast_slice_bound(self, label, side, kind): # the bounds need swapped if index is reverse sorted and has a # length > 1 (is_monotonic_decreasing gives True for empty # and length 1 index) - if self.is_monotonic_decreasing and len(self) > 1: + if self.is_strictly_monotonic_decreasing and len(self) > 1: return upper if side == 'left' else lower return lower if side == 'left' else upper else: diff --git a/pandas/tests/indexes/datetimes/test_datetime.py b/pandas/tests/indexes/datetimes/test_datetime.py index 6cba7e17abf8e..f99dcee9e5c8a 100644 --- a/pandas/tests/indexes/datetimes/test_datetime.py +++ b/pandas/tests/indexes/datetimes/test_datetime.py @@ -771,3 +771,10 @@ def test_slice_bounds_empty(self): left = empty_idx._maybe_cast_slice_bound('2015-01-02', 'left', 'loc') exp = Timestamp('2015-01-02 00:00:00') assert left == exp + + def test_slice_duplicate_monotonic(self): + # https://github.com/pandas-dev/pandas/issues/16515 + idx = pd.DatetimeIndex(['2017', '2017']) + result = idx._maybe_cast_slice_bound('2017-01-01', 'left', 'loc') + expected = Timestamp('2017-01-01') + assert result == expected diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 02561cba784b8..a6933316e4291 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -1328,8 +1328,10 @@ def test_tuple_union_bug(self): def test_is_monotonic_incomparable(self): index = Index([5, datetime.now(), 7]) - assert not index.is_monotonic + assert not index.is_monotonic_increasing assert not index.is_monotonic_decreasing + assert not index.is_strictly_monotonic_increasing + assert not index.is_strictly_monotonic_decreasing def test_get_set_value(self): values = np.random.randn(100) @@ -2028,6 +2030,8 @@ def test_is_monotonic_na(self): for index in examples: assert not index.is_monotonic_increasing assert not index.is_monotonic_decreasing + assert not index.is_strictly_monotonic_increasing + assert not index.is_strictly_monotonic_decreasing def test_repr_summary(self): with cf.option_context('display.max_seq_items', 10): diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index 1fe4d85815c4b..388a49d25cb82 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -2373,22 +2373,30 @@ def test_is_monotonic(self): i = MultiIndex.from_product([np.arange(10), np.arange(10)], names=['one', 'two']) assert i.is_monotonic + assert i.is_strictly_monotonic_increasing assert Index(i.values).is_monotonic + assert i.is_strictly_monotonic_increasing i = MultiIndex.from_product([np.arange(10, 0, -1), np.arange(10)], names=['one', 'two']) assert not i.is_monotonic + assert not i.is_strictly_monotonic_increasing assert not Index(i.values).is_monotonic + assert not Index(i.values).is_strictly_monotonic_increasing i = MultiIndex.from_product([np.arange(10), np.arange(10, 0, -1)], names=['one', 'two']) assert not i.is_monotonic + assert not i.is_strictly_monotonic_increasing assert not Index(i.values).is_monotonic + assert not Index(i.values).is_strictly_monotonic_increasing i = MultiIndex.from_product([[1.0, np.nan, 2.0], ['a', 'b', 'c']]) assert not i.is_monotonic + assert not i.is_strictly_monotonic_increasing assert not Index(i.values).is_monotonic + assert not Index(i.values).is_strictly_monotonic_increasing # string ordering i = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], @@ -2398,6 +2406,8 @@ def test_is_monotonic(self): names=['first', 'second']) assert not i.is_monotonic assert not Index(i.values).is_monotonic + assert not i.is_strictly_monotonic_increasing + assert not Index(i.values).is_strictly_monotonic_increasing i = MultiIndex(levels=[['bar', 'baz', 'foo', 'qux'], ['mom', 'next', 'zenith']], @@ -2406,6 +2416,8 @@ def test_is_monotonic(self): names=['first', 'second']) assert i.is_monotonic assert Index(i.values).is_monotonic + assert i.is_strictly_monotonic_increasing + assert Index(i.values).is_strictly_monotonic_increasing # mixed levels, hits the TypeError i = MultiIndex( @@ -2416,6 +2428,20 @@ def test_is_monotonic(self): names=['household_id', 'asset_id']) assert not i.is_monotonic + assert not i.is_strictly_monotonic_increasing + + def test_is_strictly_monotonic(self): + idx = pd.MultiIndex(levels=[['bar', 'baz'], ['mom', 'next']], + labels=[[0, 0, 1, 1], [0, 0, 0, 1]]) + assert idx.is_monotonic_increasing + assert not idx.is_strictly_monotonic_increasing + + @pytest.mark.xfail(reason="buggy MultiIndex.is_monotonic_decresaing.") + def test_is_strictly_monotonic_decreasing(self): + idx = pd.MultiIndex(levels=[['baz', 'bar'], ['next', 'mom']], + labels=[[0, 0, 1, 1], [0, 0, 0, 1]]) + assert idx.is_monotonic_decreasing + assert not idx.is_strictly_monotonic_decreasing def test_reconstruct_sort(self): diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py index 3d06f1672ae32..77f34dbf210e0 100644 --- a/pandas/tests/indexes/test_numeric.py +++ b/pandas/tests/indexes/test_numeric.py @@ -465,16 +465,36 @@ def test_view(self): def test_is_monotonic(self): assert self.index.is_monotonic assert self.index.is_monotonic_increasing + assert self.index.is_strictly_monotonic_increasing assert not self.index.is_monotonic_decreasing + assert not self.index.is_strictly_monotonic_decreasing index = self._holder([4, 3, 2, 1]) assert not index.is_monotonic - assert index.is_monotonic_decreasing + assert not index.is_strictly_monotonic_increasing + assert index.is_strictly_monotonic_decreasing index = self._holder([1]) assert index.is_monotonic assert index.is_monotonic_increasing assert index.is_monotonic_decreasing + assert index.is_strictly_monotonic_increasing + assert index.is_strictly_monotonic_decreasing + + def test_is_strictly_monotonic(self): + index = self._holder([1, 1, 2, 3]) + assert index.is_monotonic_increasing + assert not index.is_strictly_monotonic_increasing + + index = self._holder([3, 2, 1, 1]) + assert index.is_monotonic_decreasing + assert not index.is_strictly_monotonic_decreasing + + index = self._holder([1, 1]) + assert index.is_monotonic_increasing + assert index.is_monotonic_decreasing + assert not index.is_strictly_monotonic_increasing + assert not index.is_strictly_monotonic_decreasing def test_logical_compat(self): idx = self.create_index() diff --git a/pandas/tests/indexes/test_range.py b/pandas/tests/indexes/test_range.py index c7af0954cf483..db8180cb736c4 100644 --- a/pandas/tests/indexes/test_range.py +++ b/pandas/tests/indexes/test_range.py @@ -331,25 +331,35 @@ def test_is_monotonic(self): assert self.index.is_monotonic assert self.index.is_monotonic_increasing assert not self.index.is_monotonic_decreasing + assert self.index.is_strictly_monotonic_increasing + assert not self.index.is_strictly_monotonic_decreasing index = RangeIndex(4, 0, -1) assert not index.is_monotonic + assert not index.is_strictly_monotonic_increasing assert index.is_monotonic_decreasing + assert index.is_strictly_monotonic_decreasing index = RangeIndex(1, 2) assert index.is_monotonic assert index.is_monotonic_increasing assert index.is_monotonic_decreasing + assert index.is_strictly_monotonic_increasing + assert index.is_strictly_monotonic_decreasing index = RangeIndex(2, 1) assert index.is_monotonic assert index.is_monotonic_increasing assert index.is_monotonic_decreasing + assert index.is_strictly_monotonic_increasing + assert index.is_strictly_monotonic_decreasing index = RangeIndex(1, 1) assert index.is_monotonic assert index.is_monotonic_increasing assert index.is_monotonic_decreasing + assert index.is_strictly_monotonic_increasing + assert index.is_strictly_monotonic_decreasing def test_equals_range(self): equiv_pairs = [(RangeIndex(0, 9, 2), RangeIndex(0, 10, 2)), From e0a127a82868e432e5a1ee067b39ef7142d73d66 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Thu, 1 Jun 2017 06:38:50 -0400 Subject: [PATCH 636/933] COMPAT: Consider Python 2.x tarfiles file-like (#16533) --- doc/source/whatsnew/v0.20.2.txt | 1 + pandas/core/dtypes/inference.py | 2 +- pandas/io/parsers.py | 29 ++++++++++++--- pandas/tests/dtypes/test_inference.py | 4 +- pandas/tests/io/parser/c_parser_only.py | 36 ++++++++++++++++++ pandas/tests/io/parser/data/tar_csv.tar | Bin 0 -> 10240 bytes pandas/tests/io/parser/data/tar_csv.tar.gz | Bin 0 -> 10240 bytes pandas/tests/io/parser/test_unsupported.py | 41 ++++++++++++++++----- setup.py | 2 + 9 files changed, 98 insertions(+), 17 deletions(-) create mode 100644 pandas/tests/io/parser/data/tar_csv.tar create mode 100644 pandas/tests/io/parser/data/tar_csv.tar.gz diff --git a/doc/source/whatsnew/v0.20.2.txt b/doc/source/whatsnew/v0.20.2.txt index e3328e2d01dc7..e309ac0a79e4b 100644 --- a/doc/source/whatsnew/v0.20.2.txt +++ b/doc/source/whatsnew/v0.20.2.txt @@ -70,6 +70,7 @@ I/O - Bug in pd.read_csv() when comment is passed in space deliminted text files (:issue:`16472`) - Bug that would force importing of the clipboard routines unnecessarily, potentially causing an import error on startup (:issue:`16288`) - Bug that raised IndexError HTML-rendering an empty DataFrame (:issue:`15953`) +- Bug in ``pd.read_csv()`` in which tarfile object inputs were raising an error in Python 2.x for the C engine (:issue:`16530`) - Bug in ``HDFStore.select_as_multiple()`` where start/stop arguments were not respected (:issue:`16209`) diff --git a/pandas/core/dtypes/inference.py b/pandas/core/dtypes/inference.py index a5316a83612cb..ff7e215951a1f 100644 --- a/pandas/core/dtypes/inference.py +++ b/pandas/core/dtypes/inference.py @@ -171,7 +171,7 @@ def is_file_like(obj): if not (hasattr(obj, 'read') or hasattr(obj, 'write')): return False - if not is_iterator(obj): + if not hasattr(obj, "__iter__"): return False return True diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 12b606d969c7d..aab70c8ce2cd4 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -13,7 +13,7 @@ import numpy as np from pandas import compat -from pandas.compat import (range, lrange, StringIO, lzip, +from pandas.compat import (range, lrange, PY3, StringIO, lzip, zip, string_types, map, u) from pandas.core.dtypes.common import ( is_integer, _ensure_object, @@ -31,10 +31,10 @@ from pandas.core.common import AbstractMethodError from pandas.io.date_converters import generic_parser from pandas.errors import ParserWarning, ParserError, EmptyDataError -from pandas.io.common import (get_filepath_or_buffer, _validate_header_arg, - _get_handle, UnicodeReader, UTF8Recoder, - BaseIterator, - _NA_VALUES, _infer_compression) +from pandas.io.common import (get_filepath_or_buffer, is_file_like, + _validate_header_arg, _get_handle, + UnicodeReader, UTF8Recoder, _NA_VALUES, + BaseIterator, _infer_compression) from pandas.core.tools import datetimes as tools from pandas.util._decorators import Appender @@ -755,7 +755,9 @@ def __init__(self, f, engine=None, **kwds): self.squeeze = options.pop('squeeze', False) # might mutate self.engine + self.engine = self._check_file_or_buffer(f, engine) self.options, self.engine = self._clean_options(options, engine) + if 'has_index_names' in kwds: self.options['has_index_names'] = kwds['has_index_names'] @@ -801,6 +803,23 @@ def _get_options_with_defaults(self, engine): return options + def _check_file_or_buffer(self, f, engine): + # see gh-16530 + if is_file_like(f): + next_attr = "__next__" if PY3 else "next" + + # The C engine doesn't need the file-like to have the "next" or + # "__next__" attribute. However, the Python engine explicitly calls + # "next(...)" when iterating through such an object, meaning it + # needs to have that attribute ("next" for Python 2.x, "__next__" + # for Python 3.x) + if engine != "c" and not hasattr(f, next_attr): + msg = ("The 'python' engine cannot iterate " + "through this file buffer.") + raise ValueError(msg) + + return engine + def _clean_options(self, options, engine): result = options.copy() diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index b88481abcb2ec..ec5fe45d7f610 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -120,9 +120,9 @@ class MockFile(object): m = MockFile() assert not is_file(m) + # gh-16530: Valid iterator just means we have the + # __iter__ attribute for our purposes. MockFile.__iter__ = lambda self: self - MockFile.__next__ = lambda self: 0 - MockFile.next = MockFile.__next__ # Valid write-only file m = MockFile() diff --git a/pandas/tests/io/parser/c_parser_only.py b/pandas/tests/io/parser/c_parser_only.py index 56ac10404b7b2..48812c04e3b55 100644 --- a/pandas/tests/io/parser/c_parser_only.py +++ b/pandas/tests/io/parser/c_parser_only.py @@ -7,7 +7,9 @@ further arguments when parsing. """ +import os import sys +import tarfile import pytest import numpy as np @@ -446,3 +448,37 @@ def test_comment_whitespace_delimited(self): [7, np.nan], [8, np.nan]]) tm.assert_frame_equal(df, expected) + + def test_file_like_no_next(self): + # gh-16530: the file-like need not have a "next" or "__next__" + # attribute despite having an "__iter__" attribute. + # + # NOTE: This is only true for the C engine, not Python engine. + class NoNextBuffer(StringIO): + def __next__(self): + raise AttributeError("No next method") + + next = __next__ + + data = "a\n1" + + expected = pd.DataFrame({"a": [1]}) + result = self.read_csv(NoNextBuffer(data)) + + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("tar_suffix", [".tar", ".tar.gz"]) + def test_read_tarfile(self, tar_suffix): + # see gh-16530 + # + # Unfortunately, Python's CSV library can't handle + # tarfile objects (expects string, not bytes when + # iterating through a file-like). + tar_path = os.path.join(self.dirpath, "tar_csv" + tar_suffix) + + tar = tarfile.open(tar_path, "r") + data_file = tar.extractfile("tar_data.csv") + + out = self.read_csv(data_file) + expected = pd.DataFrame({"a": [1]}) + tm.assert_frame_equal(out, expected) diff --git a/pandas/tests/io/parser/data/tar_csv.tar b/pandas/tests/io/parser/data/tar_csv.tar new file mode 100644 index 0000000000000000000000000000000000000000..d1819550e0a0064b4d9ad829f120e49760c3ffe2 GIT binary patch literal 10240 zcmeIuK?;O03_#JW1@F)k3{BNsM}nR}J9B-TY7p4K!(9_GO$s`)68z@>0YS zW+wk!;+jjzL^~}framQ!s=W;o;!FQIwf(Nymk>_1JC}X6!*X|eRCwcUqis`RFe4E_ x009ILKmY**5I_I{1Q0*~0R#|0009ILKmY**5I_I{1Q0*~0R#|0009IZ32f(E6h{C6 literal 0 HcmV?d00001 diff --git a/pandas/tests/io/parser/data/tar_csv.tar.gz b/pandas/tests/io/parser/data/tar_csv.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b5a0f3e1b580535a3fbdc2ff943b79d8c585df9f GIT binary patch literal 10240 zcmeIu%?W@o41m#`1$XEK(ok*3k)RW3b$+WS^{9xKFPG3j^YgMz{b<>mVP55<@Fil5 zvgZ=_TuM;(Z{IR;yy82--BN0FV w0R#|0009ILKmY**5I_I{1Q0*~0R#|0009ILKmY**5I_I{1Q0*~0R&zNY&Ss^RsaA1 literal 0 HcmV?d00001 diff --git a/pandas/tests/io/parser/test_unsupported.py b/pandas/tests/io/parser/test_unsupported.py index 3f62ff44531fb..5d248f2fef59c 100644 --- a/pandas/tests/io/parser/test_unsupported.py +++ b/pandas/tests/io/parser/test_unsupported.py @@ -16,6 +16,13 @@ from pandas.errors import ParserError from pandas.io.parsers import read_csv, read_table +import pytest + + +@pytest.fixture(params=["python", "python-fwf"], ids=lambda val: val) +def python_engine(request): + return request.param + class TestUnsupportedFeatures(object): @@ -82,7 +89,7 @@ def test_c_engine(self): with tm.assert_raises_regex(ValueError, msg): read_csv(StringIO(data), lineterminator='~~') - def test_python_engine(self): + def test_python_engine(self, python_engine): from pandas.io.parsers import _python_unsupported as py_unsupported data = """1,2,3,, @@ -90,16 +97,32 @@ def test_python_engine(self): 1,2,3,4,5 1,2,,, 1,2,3,4,""" - engines = 'python', 'python-fwf' - for engine in engines: - for default in py_unsupported: - msg = ('The %r option is not supported ' - 'with the %r engine' % (default, engine)) + for default in py_unsupported: + msg = ('The %r option is not supported ' + 'with the %r engine' % (default, python_engine)) + + kwargs = {default: object()} + with tm.assert_raises_regex(ValueError, msg): + read_csv(StringIO(data), engine=python_engine, **kwargs) - kwargs = {default: object()} - with tm.assert_raises_regex(ValueError, msg): - read_csv(StringIO(data), engine=engine, **kwargs) + def test_python_engine_file_no_next(self, python_engine): + # see gh-16530 + class NoNextBuffer(object): + def __init__(self, csv_data): + self.data = csv_data + + def __iter__(self): + return self + + def read(self): + return self.data + + data = "a\n1" + msg = "The 'python' engine cannot iterate" + + with tm.assert_raises_regex(ValueError, msg): + read_csv(NoNextBuffer(data), engine=python_engine) class TestDeprecatedFeatures(object): diff --git a/setup.py b/setup.py index 82d5f407228a9..31a3cddc3f9fd 100755 --- a/setup.py +++ b/setup.py @@ -702,6 +702,8 @@ def pxd(name): 'parser/data/*.gz', 'parser/data/*.bz2', 'parser/data/*.txt', + 'parser/data/*.tar', + 'parser/data/*.tar.gz', 'sas/data/*.csv', 'sas/data/*.xpt', 'sas/data/*.sas7bdat', From e3ee1869ce955df5d3daa59d59e08749471e5be5 Mon Sep 17 00:00:00 2001 From: Christian Prinoth Date: Thu, 1 Jun 2017 06:50:27 -0400 Subject: [PATCH 637/933] BUG: Fixed to_html ignoring index_names parameter closes #16493 Author: Christian Prinoth Author: Tom Augspurger Author: Christian Prinoth Author: Jeff Reback This patch had conflicts when merged, resolved by Committer: Jeff Reback Closes #16495 from CRP/bugfix_16493 and squashes the following commits: 567ae69 [Jeff Reback] doc corrections 8429f9a [Tom Augspurger] Fixed lint error 469a0e6 [Christian Prinoth] BUG: fix for bug 16493 20d512f [Christian Prinoth] BUG: fix for bug 16493 6bef829 [Christian Prinoth] BUG: fix for bug 16493 426565e [Christian Prinoth] BUG: fix for bug 16493 a40820d [Christian Prinoth] BUG: fix for bug 16493 --- doc/source/whatsnew/v0.20.2.txt | 1 + pandas/io/formats/format.py | 4 +++- pandas/tests/io/formats/test_to_html.py | 7 +++++++ 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.20.2.txt b/doc/source/whatsnew/v0.20.2.txt index e309ac0a79e4b..e918bc4fccfca 100644 --- a/doc/source/whatsnew/v0.20.2.txt +++ b/doc/source/whatsnew/v0.20.2.txt @@ -71,6 +71,7 @@ I/O - Bug that would force importing of the clipboard routines unnecessarily, potentially causing an import error on startup (:issue:`16288`) - Bug that raised IndexError HTML-rendering an empty DataFrame (:issue:`15953`) - Bug in ``pd.read_csv()`` in which tarfile object inputs were raising an error in Python 2.x for the C engine (:issue:`16530`) +- Bug where ``DataFrame.to_html()`` ignored the ``index_names`` parameter (:issue:`16493`) - Bug in ``HDFStore.select_as_multiple()`` where start/stop arguments were not respected (:issue:`16209`) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 054db769c56dd..3deaec2dfbbc5 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -1292,7 +1292,9 @@ def _column_header(): self.write_tr(col_row, indent, self.indent_delta, header=True, align=align) - if self.fmt.has_index_names and self.fmt.index: + if all((self.fmt.has_index_names, + self.fmt.index, + self.fmt.show_index_names)): row = ([x if x is not None else '' for x in self.frame.index.names] + [''] * min(len(self.columns), self.max_cols)) diff --git a/pandas/tests/io/formats/test_to_html.py b/pandas/tests/io/formats/test_to_html.py index cde920b1511d2..9f4e532ec2287 100644 --- a/pandas/tests/io/formats/test_to_html.py +++ b/pandas/tests/io/formats/test_to_html.py @@ -1869,3 +1869,10 @@ def test_to_html_notebook_has_no_style(self): df = pd.DataFrame({"A": [1, 2, 3]}) result = df.to_html() assert "thead tr:only-child" not in result + + def test_to_html_with_index_names_false(self): + # gh-16493 + df = pd.DataFrame({"A": [1, 2]}, index=pd.Index(['a', 'b'], + name='myindexname')) + result = df.to_html(index_names=False) + assert 'myindexname' not in result From d419be4333dcb8cf643bdd04c7c4e990feae49f9 Mon Sep 17 00:00:00 2001 From: economy Date: Thu, 1 Jun 2017 06:56:20 -0400 Subject: [PATCH 638/933] BUG: fixed wrong order of ordered labels in pd.cut() closes #16459 Author: economy This patch had conflicts when merged, resolved by Committer: Jeff Reback Closes #16466 from economy/fix_cut and squashes the following commits: 29128b3 [economy] comments and whatsnew edits 3898b72 [economy] BUG: fixed wrong order of ordered labels in pd.cut() --- doc/source/whatsnew/v0.20.2.txt | 4 +--- pandas/core/reshape/tile.py | 2 +- pandas/tests/reshape/test_tile.py | 8 ++++++++ 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v0.20.2.txt b/doc/source/whatsnew/v0.20.2.txt index e918bc4fccfca..379249b6e55d6 100644 --- a/doc/source/whatsnew/v0.20.2.txt +++ b/doc/source/whatsnew/v0.20.2.txt @@ -46,11 +46,9 @@ Bug Fixes - Passing an invalid engine to :func:`read_csv` now raises an informative ``ValueError`` rather than ``UnboundLocalError``. (:issue:`16511`) - Bug in :func:`unique` on an array of tuples (:issue:`16519`) - - +- Bug in :func:`cut`` when ``labels`` are set, resulting in incorrect label ordering (:issue:`16459`) - Fixed a compatibility issue with IPython 6.0's tab completion showing deprecation warnings on Categoricals (:issue:`16409`) - Conversion ^^^^^^^^^^ diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py index 866f229bec418..d8398023a5083 100644 --- a/pandas/core/reshape/tile.py +++ b/pandas/core/reshape/tile.py @@ -254,7 +254,7 @@ def _bins_to_cuts(x, bins, right=True, labels=None, raise ValueError('Bin labels must be one fewer than ' 'the number of bin edges') if not is_categorical_dtype(labels): - labels = Categorical(labels, ordered=True) + labels = Categorical(labels, categories=labels, ordered=True) np.putmask(ids, na_mask, 0) result = algos.take_nd(labels, ids - 1) diff --git a/pandas/tests/reshape/test_tile.py b/pandas/tests/reshape/test_tile.py index 8602b33856fea..542af321632cf 100644 --- a/pandas/tests/reshape/test_tile.py +++ b/pandas/tests/reshape/test_tile.py @@ -211,6 +211,7 @@ def test_cut_pass_labels(self): result = cut(arr, bins, labels=labels) exp = Categorical(['Medium'] + 4 * ['Small'] + ['Medium', 'Large'], + categories=labels, ordered=True) tm.assert_categorical_equal(result, exp) @@ -219,6 +220,13 @@ def test_cut_pass_labels(self): exp = Categorical.from_codes([1] + 4 * [0] + [1, 2], labels) tm.assert_categorical_equal(result, exp) + # issue 16459 + labels = ['Good', 'Medium', 'Bad'] + result = cut(arr, 3, labels=labels) + exp = cut(arr, 3, labels=Categorical(labels, categories=labels, + ordered=True)) + tm.assert_categorical_equal(result, exp) + def test_qcut_include_lowest(self): values = np.arange(10) From fb47ee56d95ff1d5fcdff5dc589491c63a238bac Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Thu, 1 Jun 2017 07:37:15 -0400 Subject: [PATCH 639/933] fix linting --- pandas/tests/reshape/test_tile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/reshape/test_tile.py b/pandas/tests/reshape/test_tile.py index 542af321632cf..2523f8ab9f776 100644 --- a/pandas/tests/reshape/test_tile.py +++ b/pandas/tests/reshape/test_tile.py @@ -220,7 +220,7 @@ def test_cut_pass_labels(self): exp = Categorical.from_codes([1] + 4 * [0] + [1, 2], labels) tm.assert_categorical_equal(result, exp) - # issue 16459 + # issue 16459 labels = ['Good', 'Medium', 'Bad'] result = cut(arr, 3, labels=labels) exp = cut(arr, 3, labels=Categorical(labels, categories=labels, From 7b106e44286187f6b8fed2d6124c8b3c33a922e9 Mon Sep 17 00:00:00 2001 From: Hugues Valois Date: Thu, 1 Jun 2017 12:31:52 -0700 Subject: [PATCH 640/933] TST: writing invalid table names to sqlite (#16464) * Add test for bug #13206. * Improve test by reading back the values from sql and comparing. Also fixes coding style violation. --- pandas/tests/io/test_sql.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 7b3717281bf89..a6ad44ba31422 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -816,6 +816,16 @@ def test_unicode_column_name(self): df = DataFrame([[1, 2], [3, 4]], columns=[u'\xe9', u'b']) df.to_sql('test_unicode', self.conn, index=False) + def test_escaped_table_name(self): + # GH 13206 + df = DataFrame({'A': [0, 1, 2], 'B': [0.2, np.nan, 5.6]}) + df.to_sql('d1187b08-4943-4c8d-a7f6', self.conn, index=False) + + res = sql.read_sql_query('SELECT * FROM `d1187b08-4943-4c8d-a7f6`', + self.conn) + + tm.assert_frame_equal(res, df) + @pytest.mark.single class TestSQLApi(SQLAlchemyMixIn, _TestSQLApi): From a7760e3e5aa889574dc36c4870e026f3e13270aa Mon Sep 17 00:00:00 2001 From: "John W. O'Brien" Date: Thu, 1 Jun 2017 15:50:37 -0400 Subject: [PATCH 641/933] TST: Skip test_database_uri_string if pg8000 importable (#16528) --- pandas/tests/io/test_sql.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index a6ad44ba31422..deeb8cba2b228 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -948,6 +948,13 @@ def test_database_uri_string(self): # using driver that will not be installed on Travis to trigger error # in sqlalchemy.create_engine -> test passing of this error to user + try: + # the rest of this test depends on pg8000's being absent + import pg8000 # noqa + pytest.skip("pg8000 is installed") + except ImportError: + pass + db_uri = "postgresql+pg8000://user:pass@host/dbname" with tm.assert_raises_regex(ImportError, "pg8000"): sql.read_sql("select * from table", db_uri) From 4ec98d8b10728af9695c7e35e04b7e3fd9c408d3 Mon Sep 17 00:00:00 2001 From: kiwirob Date: Thu, 1 Jun 2017 20:59:44 +0100 Subject: [PATCH 642/933] DOC: Remove incorrect elements of PeriodIndex docstring (#16553) * DOC: Remove incorrect elements of PeriodIndex docstring See #9056. * Removed trailing space --- pandas/core/indexes/period.py | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 15fd9b7dc2b6a..f8af6c8303d99 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -125,15 +125,7 @@ def _new_PeriodIndex(cls, **d): class PeriodIndex(DatelikeOps, DatetimeIndexOpsMixin, Int64Index): """ Immutable ndarray holding ordinal values indicating regular periods in - time such as particular years, quarters, months, etc. A value of 1 is the - period containing the Gregorian proleptic datetime Jan 1, 0001 00:00:00. - This ordinal representation is from the scikits.timeseries project. - - For instance, - # construct period for day 1/1/1 and get the first second - i = Period(year=1,month=1,day=1,freq='D').asfreq('S', 'S') - i.ordinal - ===> 1 + time such as particular years, quarters, months, etc. Index keys are boxed to Period objects which carries the metadata (eg, frequency information). From a19f9faa28032bc1365bae7bdcf47f24406315ed Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 1 Jun 2017 17:09:52 -0500 Subject: [PATCH 643/933] TST: Make HDF5 fspath write test robust (#16575) The test_write_fspath_all test would fail on the HDF5 example occasionally (about 1/100 in my experience). Apparently you don't get an identical HDF5 every single time. This refactors that test out to its own where we write and read both versions, and compare equality that way. --- pandas/tests/io/test_common.py | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py index 289f86eb2dc53..b527e3c5dc254 100644 --- a/pandas/tests/io/test_common.py +++ b/pandas/tests/io/test_common.py @@ -143,7 +143,6 @@ def test_read_fspath_all(self, reader, module, path): ('to_csv', {}, 'os'), ('to_excel', {'engine': 'xlwt'}, 'xlwt'), ('to_feather', {}, 'feather'), - ('to_hdf', {'key': 'bar', 'mode': 'w'}, 'tables'), ('to_html', {}, 'os'), ('to_json', {}, 'os'), ('to_latex', {}, 'os'), @@ -171,6 +170,26 @@ def test_write_fspath_all(self, writer_name, writer_kwargs, module): assert result == expected + def test_write_fspath_hdf5(self): + # Same test as write_fspath_all, except HDF5 files aren't + # necessarily byte-for-byte identical for a given dataframe, so we'll + # have to read and compare equality + pytest.importorskip('tables') + + df = pd.DataFrame({"A": [1, 2]}) + p1 = tm.ensure_clean('string') + p2 = tm.ensure_clean('fspath') + + with p1 as string, p2 as fspath: + mypath = CustomFSPath(fspath) + df.to_hdf(mypath, key='bar') + df.to_hdf(string, key='bar') + + result = pd.read_hdf(fspath, key='bar') + expected = pd.read_hdf(string, key='bar') + + tm.assert_frame_equal(result, expected) + class TestMMapWrapper(object): From 72e0d1f73bc140f2491a360a9380f5f96411189a Mon Sep 17 00:00:00 2001 From: DSM Date: Thu, 1 Jun 2017 18:12:14 -0400 Subject: [PATCH 644/933] ENH: add .ngroup() method to groupby objects (#14026) (#14026) --- doc/source/api.rst | 1 + doc/source/groupby.rst | 63 +++++++- doc/source/reshaping.rst | 2 +- doc/source/whatsnew/v0.20.2.txt | 5 + pandas/core/groupby.py | 75 +++++++++- pandas/tests/groupby/test_counting.py | 197 +++++++++++++++++++++++++ pandas/tests/groupby/test_groupby.py | 54 ------- pandas/tests/groupby/test_whitelist.py | 4 +- 8 files changed, 338 insertions(+), 63 deletions(-) create mode 100644 pandas/tests/groupby/test_counting.py diff --git a/doc/source/api.rst b/doc/source/api.rst index e210849d9a0ca..cc8c5f6356e58 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -1707,6 +1707,7 @@ Computations / Descriptive Stats GroupBy.mean GroupBy.median GroupBy.min + GroupBy.ngroup GroupBy.nth GroupBy.ohlc GroupBy.prod diff --git a/doc/source/groupby.rst b/doc/source/groupby.rst index cf4f1059ae17a..865f1ccae2c04 100644 --- a/doc/source/groupby.rst +++ b/doc/source/groupby.rst @@ -1122,12 +1122,36 @@ To see the order in which each row appears within its group, use the .. ipython:: python - df = pd.DataFrame(list('aaabba'), columns=['A']) - df + dfg = pd.DataFrame(list('aaabba'), columns=['A']) + dfg + + dfg.groupby('A').cumcount() + + dfg.groupby('A').cumcount(ascending=False) + +.. _groupby.ngroup: + +Enumerate groups +~~~~~~~~~~~~~~~~ + +.. versionadded:: 0.20.2 + +To see the ordering of the groups (as opposed to the order of rows +within a group given by ``cumcount``) you can use the ``ngroup`` +method. + +Note that the numbers given to the groups match the order in which the +groups would be seen when iterating over the groupby object, not the +order they are first observed. + +.. ipython:: python - df.groupby('A').cumcount() + dfg = pd.DataFrame(list('aaabba'), columns=['A']) + dfg - df.groupby('A').cumcount(ascending=False) # kwarg only + dfg.groupby('A').ngroup() + + dfg.groupby('A').ngroup(ascending=False) Plotting ~~~~~~~~ @@ -1176,14 +1200,41 @@ Regroup columns of a DataFrame according to their sum, and sum the aggregated on df df.groupby(df.sum(), axis=1).sum() +.. _groupby.multicolumn_factorization + +Multi-column factorization +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +By using ``.ngroup()``, we can extract information about the groups in +a way similar to :func:`factorize` (as described further in the +:ref:`reshaping API `) but which applies +naturally to multiple columns of mixed type and different +sources. This can be useful as an intermediate categorical-like step +in processing, when the relationships between the group rows are more +important than their content, or as input to an algorithm which only +accepts the integer encoding. (For more information about support in +pandas for full categorical data, see the :ref:`Categorical +introduction ` and the +:ref:`API documentation `.) + +.. ipython:: python + + dfg = pd.DataFrame({"A": [1, 1, 2, 3, 2], "B": list("aaaba")}) + + dfg + + dfg.groupby(["A", "B"]).ngroup() + + dfg.groupby(["A", [0, 0, 0, 1, 1]]).ngroup() + Groupby by Indexer to 'resample' data ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Resampling produces new hypothetical samples(resamples) from already existing observed data or from a model that generates data. These new samples are similar to the pre-existing samples. +Resampling produces new hypothetical samples (resamples) from already existing observed data or from a model that generates data. These new samples are similar to the pre-existing samples. In order to resample to work on indices that are non-datetimelike , the following procedure can be utilized. -In the following examples, **df.index // 5** returns a binary array which is used to determine what get's selected for the groupby operation. +In the following examples, **df.index // 5** returns a binary array which is used to determine what gets selected for the groupby operation. .. note:: The below example shows how we can downsample by consolidation of samples into fewer samples. Here by using **df.index // 5**, we are aggregating the samples in bins. By applying **std()** function, we aggregate the information contained in many samples into a small subset of values which is their standard deviation thereby reducing the number of samples. diff --git a/doc/source/reshaping.rst b/doc/source/reshaping.rst index b93749922c8ea..5f125e329f6f1 100644 --- a/doc/source/reshaping.rst +++ b/doc/source/reshaping.rst @@ -636,7 +636,7 @@ When a column contains only one level, it will be omitted in the result. pd.get_dummies(df, drop_first=True) - +.. _reshaping.factorize: Factorizing values ------------------ diff --git a/doc/source/whatsnew/v0.20.2.txt b/doc/source/whatsnew/v0.20.2.txt index 379249b6e55d6..4028d594d954f 100644 --- a/doc/source/whatsnew/v0.20.2.txt +++ b/doc/source/whatsnew/v0.20.2.txt @@ -23,6 +23,11 @@ Enhancements - ``Series`` provides a ``to_latex`` method (:issue:`16180`) - Added :attr:`Index.is_strictly_monotonic_increasing` and :attr:`Index.is_strictly_monotonic_decreasing` properties (:issue:`16515`) +- A new groupby method :meth:`~pandas.core.groupby.GroupBy.ngroup`, + parallel to the existing :meth:`~pandas.core.groupby.GroupBy.cumcount`, + has been added to return the group order (:issue:`11642`); see + :ref:`here `. + .. _whatsnew_0202.performance: Performance Improvements diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 286677d613484..9d6d2297f6ea0 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -150,7 +150,7 @@ 'last', 'first', 'head', 'tail', 'median', 'mean', 'sum', 'min', 'max', - 'cumcount', + 'cumcount', 'ngroup', 'resample', 'rank', 'quantile', 'fillna', @@ -1437,6 +1437,75 @@ def nth(self, n, dropna=None): return result + @Substitution(name='groupby') + @Appender(_doc_template) + def ngroup(self, ascending=True): + """ + Number each group from 0 to the number of groups - 1. + + This is the enumerative complement of cumcount. Note that the + numbers given to the groups match the order in which the groups + would be seen when iterating over the groupby object, not the + order they are first observed. + + .. versionadded:: 0.20.2 + + Parameters + ---------- + ascending : bool, default True + If False, number in reverse, from number of group - 1 to 0. + + Examples + -------- + + >>> df = pd.DataFrame({"A": list("aaabba")}) + >>> df + A + 0 a + 1 a + 2 a + 3 b + 4 b + 5 a + >>> df.groupby('A').ngroup() + 0 0 + 1 0 + 2 0 + 3 1 + 4 1 + 5 0 + dtype: int64 + >>> df.groupby('A').ngroup(ascending=False) + 0 1 + 1 1 + 2 1 + 3 0 + 4 0 + 5 1 + dtype: int64 + >>> df.groupby(["A", [1,1,2,3,2,1]]).ngroup() + 0 0 + 1 0 + 2 1 + 3 3 + 4 2 + 5 0 + dtype: int64 + + See also + -------- + .cumcount : Number the rows in each group. + + """ + + self._set_group_selection() + + index = self._selected_obj.index + result = Series(self.grouper.group_info[0], index) + if not ascending: + result = self.ngroups - 1 - result + return result + @Substitution(name='groupby') @Appender(_doc_template) def cumcount(self, ascending=True): @@ -1481,6 +1550,10 @@ def cumcount(self, ascending=True): 4 0 5 0 dtype: int64 + + See also + -------- + .ngroup : Number the groups themselves. """ self._set_group_selection() diff --git a/pandas/tests/groupby/test_counting.py b/pandas/tests/groupby/test_counting.py new file mode 100644 index 0000000000000..485241d593d4f --- /dev/null +++ b/pandas/tests/groupby/test_counting.py @@ -0,0 +1,197 @@ +# -*- coding: utf-8 -*- +from __future__ import print_function + +import numpy as np + +from pandas import (DataFrame, Series, MultiIndex) +from pandas.util.testing import assert_series_equal +from pandas.compat import (range, product as cart_product) + + +class TestCounting(object): + + def test_cumcount(self): + df = DataFrame([['a'], ['a'], ['a'], ['b'], ['a']], columns=['A']) + g = df.groupby('A') + sg = g.A + + expected = Series([0, 1, 2, 0, 3]) + + assert_series_equal(expected, g.cumcount()) + assert_series_equal(expected, sg.cumcount()) + + def test_cumcount_empty(self): + ge = DataFrame().groupby(level=0) + se = Series().groupby(level=0) + + # edge case, as this is usually considered float + e = Series(dtype='int64') + + assert_series_equal(e, ge.cumcount()) + assert_series_equal(e, se.cumcount()) + + def test_cumcount_dupe_index(self): + df = DataFrame([['a'], ['a'], ['a'], ['b'], ['a']], columns=['A'], + index=[0] * 5) + g = df.groupby('A') + sg = g.A + + expected = Series([0, 1, 2, 0, 3], index=[0] * 5) + + assert_series_equal(expected, g.cumcount()) + assert_series_equal(expected, sg.cumcount()) + + def test_cumcount_mi(self): + mi = MultiIndex.from_tuples([[0, 1], [1, 2], [2, 2], [2, 2], [1, 0]]) + df = DataFrame([['a'], ['a'], ['a'], ['b'], ['a']], columns=['A'], + index=mi) + g = df.groupby('A') + sg = g.A + + expected = Series([0, 1, 2, 0, 3], index=mi) + + assert_series_equal(expected, g.cumcount()) + assert_series_equal(expected, sg.cumcount()) + + def test_cumcount_groupby_not_col(self): + df = DataFrame([['a'], ['a'], ['a'], ['b'], ['a']], columns=['A'], + index=[0] * 5) + g = df.groupby([0, 0, 0, 1, 0]) + sg = g.A + + expected = Series([0, 1, 2, 0, 3], index=[0] * 5) + + assert_series_equal(expected, g.cumcount()) + assert_series_equal(expected, sg.cumcount()) + + def test_ngroup(self): + df = DataFrame({'A': list('aaaba')}) + g = df.groupby('A') + sg = g.A + + expected = Series([0, 0, 0, 1, 0]) + + assert_series_equal(expected, g.ngroup()) + assert_series_equal(expected, sg.ngroup()) + + def test_ngroup_distinct(self): + df = DataFrame({'A': list('abcde')}) + g = df.groupby('A') + sg = g.A + + expected = Series(range(5), dtype='int64') + + assert_series_equal(expected, g.ngroup()) + assert_series_equal(expected, sg.ngroup()) + + def test_ngroup_one_group(self): + df = DataFrame({'A': [0] * 5}) + g = df.groupby('A') + sg = g.A + + expected = Series([0] * 5) + + assert_series_equal(expected, g.ngroup()) + assert_series_equal(expected, sg.ngroup()) + + def test_ngroup_empty(self): + ge = DataFrame().groupby(level=0) + se = Series().groupby(level=0) + + # edge case, as this is usually considered float + e = Series(dtype='int64') + + assert_series_equal(e, ge.ngroup()) + assert_series_equal(e, se.ngroup()) + + def test_ngroup_series_matches_frame(self): + df = DataFrame({'A': list('aaaba')}) + s = Series(list('aaaba')) + + assert_series_equal(df.groupby(s).ngroup(), + s.groupby(s).ngroup()) + + def test_ngroup_dupe_index(self): + df = DataFrame({'A': list('aaaba')}, index=[0] * 5) + g = df.groupby('A') + sg = g.A + + expected = Series([0, 0, 0, 1, 0], index=[0] * 5) + + assert_series_equal(expected, g.ngroup()) + assert_series_equal(expected, sg.ngroup()) + + def test_ngroup_mi(self): + mi = MultiIndex.from_tuples([[0, 1], [1, 2], [2, 2], [2, 2], [1, 0]]) + df = DataFrame({'A': list('aaaba')}, index=mi) + g = df.groupby('A') + sg = g.A + expected = Series([0, 0, 0, 1, 0], index=mi) + + assert_series_equal(expected, g.ngroup()) + assert_series_equal(expected, sg.ngroup()) + + def test_ngroup_groupby_not_col(self): + df = DataFrame({'A': list('aaaba')}, index=[0] * 5) + g = df.groupby([0, 0, 0, 1, 0]) + sg = g.A + + expected = Series([0, 0, 0, 1, 0], index=[0] * 5) + + assert_series_equal(expected, g.ngroup()) + assert_series_equal(expected, sg.ngroup()) + + def test_ngroup_descending(self): + df = DataFrame(['a', 'a', 'b', 'a', 'b'], columns=['A']) + g = df.groupby(['A']) + + ascending = Series([0, 0, 1, 0, 1]) + descending = Series([1, 1, 0, 1, 0]) + + assert_series_equal(descending, (g.ngroups - 1) - ascending) + assert_series_equal(ascending, g.ngroup(ascending=True)) + assert_series_equal(descending, g.ngroup(ascending=False)) + + def test_ngroup_matches_cumcount(self): + # verify one manually-worked out case works + df = DataFrame([['a', 'x'], ['a', 'y'], ['b', 'x'], + ['a', 'x'], ['b', 'y']], columns=['A', 'X']) + g = df.groupby(['A', 'X']) + g_ngroup = g.ngroup() + g_cumcount = g.cumcount() + expected_ngroup = Series([0, 1, 2, 0, 3]) + expected_cumcount = Series([0, 0, 0, 1, 0]) + + assert_series_equal(g_ngroup, expected_ngroup) + assert_series_equal(g_cumcount, expected_cumcount) + + def test_ngroup_cumcount_pair(self): + # brute force comparison for all small series + for p in cart_product(range(3), repeat=4): + df = DataFrame({'a': p}) + g = df.groupby(['a']) + + order = sorted(set(p)) + ngroupd = [order.index(val) for val in p] + cumcounted = [p[:i].count(val) for i, val in enumerate(p)] + + assert_series_equal(g.ngroup(), Series(ngroupd)) + assert_series_equal(g.cumcount(), Series(cumcounted)) + + def test_ngroup_respects_groupby_order(self): + np.random.seed(0) + df = DataFrame({'a': np.random.choice(list('abcdef'), 100)}) + for sort_flag in (False, True): + g = df.groupby(['a'], sort=sort_flag) + df['group_id'] = -1 + df['group_index'] = -1 + + for i, (_, group) in enumerate(g): + df.loc[group.index, 'group_id'] = i + for j, ind in enumerate(group.index): + df.loc[ind, 'group_index'] = j + + assert_series_equal(Series(df['group_id'].values), + g.ngroup()) + assert_series_equal(Series(df['group_index'].values), + g.cumcount()) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 88afa51e46b6c..19124a33bdbcb 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -3399,60 +3399,6 @@ def test_groupby_with_small_elem(self): res = grouped.get_group((pd.Timestamp('2014-08-31'), 'start')) tm.assert_frame_equal(res, df.iloc[[2], :]) - def test_cumcount(self): - df = DataFrame([['a'], ['a'], ['a'], ['b'], ['a']], columns=['A']) - g = df.groupby('A') - sg = g.A - - expected = Series([0, 1, 2, 0, 3]) - - assert_series_equal(expected, g.cumcount()) - assert_series_equal(expected, sg.cumcount()) - - def test_cumcount_empty(self): - ge = DataFrame().groupby(level=0) - se = Series().groupby(level=0) - - # edge case, as this is usually considered float - e = Series(dtype='int64') - - assert_series_equal(e, ge.cumcount()) - assert_series_equal(e, se.cumcount()) - - def test_cumcount_dupe_index(self): - df = DataFrame([['a'], ['a'], ['a'], ['b'], ['a']], columns=['A'], - index=[0] * 5) - g = df.groupby('A') - sg = g.A - - expected = Series([0, 1, 2, 0, 3], index=[0] * 5) - - assert_series_equal(expected, g.cumcount()) - assert_series_equal(expected, sg.cumcount()) - - def test_cumcount_mi(self): - mi = MultiIndex.from_tuples([[0, 1], [1, 2], [2, 2], [2, 2], [1, 0]]) - df = DataFrame([['a'], ['a'], ['a'], ['b'], ['a']], columns=['A'], - index=mi) - g = df.groupby('A') - sg = g.A - - expected = Series([0, 1, 2, 0, 3], index=mi) - - assert_series_equal(expected, g.cumcount()) - assert_series_equal(expected, sg.cumcount()) - - def test_cumcount_groupby_not_col(self): - df = DataFrame([['a'], ['a'], ['a'], ['b'], ['a']], columns=['A'], - index=[0] * 5) - g = df.groupby([0, 0, 0, 1, 0]) - sg = g.A - - expected = Series([0, 1, 2, 0, 3], index=[0] * 5) - - assert_series_equal(expected, g.cumcount()) - assert_series_equal(expected, sg.cumcount()) - def test_fill_constistency(self): # GH9221 diff --git a/pandas/tests/groupby/test_whitelist.py b/pandas/tests/groupby/test_whitelist.py index 5d131717f8345..2c8bf57f20fae 100644 --- a/pandas/tests/groupby/test_whitelist.py +++ b/pandas/tests/groupby/test_whitelist.py @@ -24,6 +24,7 @@ 'head', 'tail', 'cumcount', + 'ngroup', 'resample', 'rank', 'quantile', @@ -61,6 +62,7 @@ 'head', 'tail', 'cumcount', + 'ngroup', 'resample', 'rank', 'quantile', @@ -237,7 +239,7 @@ def test_tab_completion(mframe): 'prod', 'size', 'std', 'sum', 'transform', 'var', 'sem', 'count', 'nunique', 'head', 'describe', 'cummax', 'quantile', 'rank', 'cumprod', 'tail', 'resample', 'cummin', 'fillna', - 'cumsum', 'cumcount', 'all', 'shift', 'skew', + 'cumsum', 'cumcount', 'ngroup', 'all', 'shift', 'skew', 'take', 'tshift', 'pct_change', 'any', 'mad', 'corr', 'corrwith', 'cov', 'dtypes', 'ndim', 'diff', 'idxmax', 'idxmin', 'ffill', 'bfill', 'pad', 'backfill', 'rolling', 'expanding']) From fc4408bbf2e731f459ea9e2b7ccfcb834315befb Mon Sep 17 00:00:00 2001 From: Oleg Shteynbuk Date: Thu, 1 Jun 2017 18:19:10 -0400 Subject: [PATCH 645/933] make null lowercase a missing value (#16534) --- doc/source/io.rst | 2 +- doc/source/whatsnew/v0.21.0.txt | 2 +- pandas/_libs/parsers.pyx | 2 +- pandas/io/common.py | 2 +- pandas/tests/io/parser/na_values.py | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/doc/source/io.rst b/doc/source/io.rst index bca23dd18a0e3..82cb7abde4b38 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -227,7 +227,7 @@ na_values : scalar, str, list-like, or dict, default ``None`` Additional strings to recognize as NA/NaN. If dict passed, specific per-column NA values. By default the following values are interpreted as NaN: ``'-1.#IND', '1.#QNAN', '1.#IND', '-1.#QNAN', '#N/A N/A', '#N/A', 'N/A', 'NA', - '#NA', 'NULL', 'NaN', '-NaN', 'nan', '-nan', ''``. + '#NA', 'NULL', 'null', 'NaN', '-NaN', 'nan', '-nan', ''``. keep_default_na : boolean, default ``True`` If na_values are specified and keep_default_na is ``False`` the default NaN values are overridden, otherwise they're appended to. diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index a6b6d704737bd..41231a8813fa5 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -38,7 +38,7 @@ Other Enhancements - :func:`read_feather` has gained the ``nthreads`` parameter for multi-threaded operations (:issue:`16359`) - :func:`DataFrame.clip()` and :func: `Series.cip()` have gained an inplace argument. (:issue: `15388`) - :func:`crosstab` has gained a ``margins_name`` parameter to define the name of the row / column that will contain the totals when margins=True. (:issue:`15972`) - +- :func:`read_csv` has gained 'null' as an additional default missing value.(:issue:`16471`) .. _whatsnew_0210.api_breaking: Backwards incompatible API changes diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index 2def4dc9dcf24..7a6f366d5b1a9 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -277,7 +277,7 @@ DEFAULT_CHUNKSIZE = 256 * 1024 # no longer excluding inf representations # '1.#INF','-1.#INF', '1.#INF000000', _NA_VALUES = [b'-1.#IND', b'1.#QNAN', b'1.#IND', b'-1.#QNAN', - b'#N/A N/A', b'NA', b'#NA', b'NULL', b'NaN', + b'#N/A N/A', b'NA', b'#NA', b'NULL', b'null', b'NaN', b'nan', b''] diff --git a/pandas/io/common.py b/pandas/io/common.py index f4e12ea3fb173..1c987f6a9dfc3 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -31,7 +31,7 @@ # '1.#INF','-1.#INF', '1.#INF000000', _NA_VALUES = set([ '-1.#IND', '1.#QNAN', '1.#IND', '-1.#QNAN', '#N/A N/A', '#N/A', - 'N/A', 'NA', '#NA', 'NULL', 'NaN', '-NaN', 'nan', '-nan', '' + 'N/A', 'NA', '#NA', 'NULL', 'null', 'NaN', '-NaN', 'nan', '-nan', '' ]) try: diff --git a/pandas/tests/io/parser/na_values.py b/pandas/tests/io/parser/na_values.py index 362837a46f838..6f72ed51d76c6 100644 --- a/pandas/tests/io/parser/na_values.py +++ b/pandas/tests/io/parser/na_values.py @@ -70,7 +70,7 @@ def test_non_string_na_values(self): def test_default_na_values(self): _NA_VALUES = set(['-1.#IND', '1.#QNAN', '1.#IND', '-1.#QNAN', - '#N/A', 'N/A', 'NA', '#NA', 'NULL', 'NaN', + '#N/A', 'N/A', 'NA', '#NA', 'NULL', 'null', 'NaN', 'nan', '-NaN', '-nan', '#N/A N/A', '']) assert _NA_VALUES == parsers._NA_VALUES nv = len(_NA_VALUES) From db419bfad9afb363156f23acd0810f01e1b2c9a8 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Thu, 1 Jun 2017 18:24:19 -0400 Subject: [PATCH 646/933] MAINT: Drop has_index_names input from read_excel (#16522) --- doc/source/io.rst | 5 --- doc/source/whatsnew/v0.21.0.txt | 1 + pandas/io/excel.py | 40 +++++++-------------- pandas/tests/io/test_excel.py | 63 ++++++++++++++++++++------------- 4 files changed, 52 insertions(+), 57 deletions(-) diff --git a/doc/source/io.rst b/doc/source/io.rst index 82cb7abde4b38..0c31bfe014a12 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -2739,11 +2739,6 @@ should be passed to ``index_col`` and ``header`` import os os.remove('path_to_file.xlsx') -.. warning:: - - Excel files saved in version 0.16.2 or prior that had index names will still able to be read in, - but the ``has_index_names`` argument must specified to ``True``. - Parsing Specific Columns ++++++++++++++++++++++++ diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 41231a8813fa5..6dad03f086a6c 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -70,6 +70,7 @@ Deprecations Removal of prior version deprecations/changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +- ``pd.read_excel()`` has dropped the ``has_index_names`` parameter (:issue:`10967`) .. _whatsnew_0210.performance: diff --git a/pandas/io/excel.py b/pandas/io/excel.py index aa08e5fd378f0..a4d2fabf76a41 100644 --- a/pandas/io/excel.py +++ b/pandas/io/excel.py @@ -141,10 +141,6 @@ convert integral floats to int (i.e., 1.0 --> 1). If False, all numeric data will be read in as floats: Excel stores all numbers as floats internally -has_index_names : boolean, default None - DEPRECATED: for version 0.17+ index names will be automatically - inferred based on index_col. To read Excel output from 0.16.2 and - prior that had saved index names, use True. Returns ------- @@ -198,8 +194,8 @@ def get_writer(engine_name): def read_excel(io, sheet_name=0, header=0, skiprows=None, skip_footer=0, index_col=None, names=None, parse_cols=None, parse_dates=False, date_parser=None, na_values=None, thousands=None, - convert_float=True, has_index_names=None, converters=None, - dtype=None, true_values=None, false_values=None, engine=None, + convert_float=True, converters=None, dtype=None, + true_values=None, false_values=None, engine=None, squeeze=False, **kwds): # Can't use _deprecate_kwarg since sheetname=None has a special meaning @@ -218,10 +214,9 @@ def read_excel(io, sheet_name=0, header=0, skiprows=None, skip_footer=0, sheetname=sheet_name, header=header, skiprows=skiprows, names=names, index_col=index_col, parse_cols=parse_cols, parse_dates=parse_dates, date_parser=date_parser, na_values=na_values, thousands=thousands, - convert_float=convert_float, has_index_names=has_index_names, - skip_footer=skip_footer, converters=converters, dtype=dtype, - true_values=true_values, false_values=false_values, squeeze=squeeze, - **kwds) + convert_float=convert_float, skip_footer=skip_footer, + converters=converters, dtype=dtype, true_values=true_values, + false_values=false_values, squeeze=squeeze, **kwds) class ExcelFile(object): @@ -283,9 +278,8 @@ def __fspath__(self): def parse(self, sheet_name=0, header=0, skiprows=None, skip_footer=0, names=None, index_col=None, parse_cols=None, parse_dates=False, date_parser=None, na_values=None, thousands=None, - convert_float=True, has_index_names=None, - converters=None, true_values=None, false_values=None, - squeeze=False, **kwds): + convert_float=True, converters=None, true_values=None, + false_values=None, squeeze=False, **kwds): """ Parse specified sheet(s) into a DataFrame @@ -296,7 +290,6 @@ def parse(self, sheet_name=0, header=0, skiprows=None, skip_footer=0, return self._parse_excel(sheetname=sheet_name, header=header, skiprows=skiprows, names=names, index_col=index_col, - has_index_names=has_index_names, parse_cols=parse_cols, parse_dates=parse_dates, date_parser=date_parser, na_values=na_values, @@ -343,23 +336,17 @@ def _excel2num(x): return i in parse_cols def _parse_excel(self, sheetname=0, header=0, skiprows=None, names=None, - skip_footer=0, index_col=None, has_index_names=None, - parse_cols=None, parse_dates=False, date_parser=None, - na_values=None, thousands=None, convert_float=True, - true_values=None, false_values=None, verbose=False, - dtype=None, squeeze=False, **kwds): + skip_footer=0, index_col=None, parse_cols=None, + parse_dates=False, date_parser=None, na_values=None, + thousands=None, convert_float=True, true_values=None, + false_values=None, verbose=False, dtype=None, + squeeze=False, **kwds): skipfooter = kwds.pop('skipfooter', None) if skipfooter is not None: skip_footer = skipfooter _validate_header_arg(header) - if has_index_names is not None: - warn("\nThe has_index_names argument is deprecated; index names " - "will be automatically inferred based on index_col.\n" - "This argmument is still necessary if reading Excel output " - "from 0.16.2 or prior with index names.", FutureWarning, - stacklevel=3) if 'chunksize' in kwds: raise NotImplementedError("chunksize keyword of read_excel " @@ -511,8 +498,7 @@ def _parse_cell(cell_contents, cell_typ): else: last = data[row][col] - if is_list_like(header) and len(header) > 1: - has_index_names = True + has_index_names = is_list_like(header) and len(header) > 1 # GH 12292 : error when read one empty column from excel file try: diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py index 4441ed815370b..abe3757ec64f3 100644 --- a/pandas/tests/io/test_excel.py +++ b/pandas/tests/io/test_excel.py @@ -881,8 +881,42 @@ def test_excel_multindex_roundtrip(self): tm.assert_frame_equal( df, act, check_names=check_names) - def test_excel_oldindex_format(self): - # GH 4679 + def test_excel_old_index_format(self): + # see gh-4679 + filename = 'test_index_name_pre17' + self.ext + in_file = os.path.join(self.dirpath, filename) + + # We detect headers to determine if index names exist, so + # that "index" name in the "names" version of the data will + # now be interpreted as rows that include null data. + data = np.array([[None, None, None, None, None], + ['R0C0', 'R0C1', 'R0C2', 'R0C3', 'R0C4'], + ['R1C0', 'R1C1', 'R1C2', 'R1C3', 'R1C4'], + ['R2C0', 'R2C1', 'R2C2', 'R2C3', 'R2C4'], + ['R3C0', 'R3C1', 'R3C2', 'R3C3', 'R3C4'], + ['R4C0', 'R4C1', 'R4C2', 'R4C3', 'R4C4']]) + columns = ['C_l0_g0', 'C_l0_g1', 'C_l0_g2', 'C_l0_g3', 'C_l0_g4'] + mi = MultiIndex(levels=[['R0', 'R_l0_g0', 'R_l0_g1', + 'R_l0_g2', 'R_l0_g3', 'R_l0_g4'], + ['R1', 'R_l1_g0', 'R_l1_g1', + 'R_l1_g2', 'R_l1_g3', 'R_l1_g4']], + labels=[[0, 1, 2, 3, 4, 5], [0, 1, 2, 3, 4, 5]], + names=[None, None]) + si = Index(['R0', 'R_l0_g0', 'R_l0_g1', 'R_l0_g2', + 'R_l0_g3', 'R_l0_g4'], name=None) + + expected = pd.DataFrame(data, index=si, columns=columns) + + actual = pd.read_excel(in_file, 'single_names') + tm.assert_frame_equal(actual, expected) + + expected.index = mi + + actual = pd.read_excel(in_file, 'multi_names') + tm.assert_frame_equal(actual, expected) + + # The analogous versions of the "names" version data + # where there are explicitly no names for the indices. data = np.array([['R0C0', 'R0C1', 'R0C2', 'R0C3', 'R0C4'], ['R1C0', 'R1C1', 'R1C2', 'R1C3', 'R1C4'], ['R2C0', 'R2C1', 'R2C2', 'R2C3', 'R2C4'], @@ -894,40 +928,19 @@ def test_excel_oldindex_format(self): ['R_l1_g0', 'R_l1_g1', 'R_l1_g2', 'R_l1_g3', 'R_l1_g4']], labels=[[0, 1, 2, 3, 4], [0, 1, 2, 3, 4]], - names=['R0', 'R1']) + names=[None, None]) si = Index(['R_l0_g0', 'R_l0_g1', 'R_l0_g2', - 'R_l0_g3', 'R_l0_g4'], name='R0') - - in_file = os.path.join( - self.dirpath, 'test_index_name_pre17' + self.ext) + 'R_l0_g3', 'R_l0_g4'], name=None) expected = pd.DataFrame(data, index=si, columns=columns) - with tm.assert_produces_warning(FutureWarning): - actual = pd.read_excel( - in_file, 'single_names', has_index_names=True) - tm.assert_frame_equal(actual, expected) - expected.index.name = None actual = pd.read_excel(in_file, 'single_no_names') tm.assert_frame_equal(actual, expected) - with tm.assert_produces_warning(FutureWarning): - actual = pd.read_excel( - in_file, 'single_no_names', has_index_names=False) - tm.assert_frame_equal(actual, expected) expected.index = mi - with tm.assert_produces_warning(FutureWarning): - actual = pd.read_excel( - in_file, 'multi_names', has_index_names=True) - tm.assert_frame_equal(actual, expected) - expected.index.names = [None, None] actual = pd.read_excel(in_file, 'multi_no_names', index_col=[0, 1]) tm.assert_frame_equal(actual, expected, check_names=False) - with tm.assert_produces_warning(FutureWarning): - actual = pd.read_excel(in_file, 'multi_no_names', index_col=[0, 1], - has_index_names=False) - tm.assert_frame_equal(actual, expected, check_names=False) def test_read_excel_bool_header_arg(self): # GH 6114 From 8d092d97113c8944ac4570748da715265d682a3a Mon Sep 17 00:00:00 2001 From: Ryan Hendrickson Date: Thu, 1 Jun 2017 20:23:10 -0400 Subject: [PATCH 647/933] BUG: reimplement MultiIndex.remove_unused_levels (#16565) --- asv_bench/benchmarks/indexing.py | 9 ++++++++ doc/source/whatsnew/v0.20.2.txt | 2 ++ pandas/core/indexes/multi.py | 34 +++++++++++++----------------- pandas/tests/indexes/test_multi.py | 29 ++++++++++++++++++++++++- 4 files changed, 54 insertions(+), 20 deletions(-) diff --git a/asv_bench/benchmarks/indexing.py b/asv_bench/benchmarks/indexing.py index 6a2c9d48c4a28..d941ef20dc7ac 100644 --- a/asv_bench/benchmarks/indexing.py +++ b/asv_bench/benchmarks/indexing.py @@ -204,6 +204,12 @@ def setup(self): [np.arange(100), list('A'), list('A')], names=['one', 'two', 'three']) + rng = np.random.RandomState(4) + size = 1 << 16 + self.mi_unused_levels = pd.MultiIndex.from_arrays([ + rng.randint(0, 1 << 13, size), + rng.randint(0, 1 << 10, size)])[rng.rand(size) < 0.1] + def time_series_xs_mi_ix(self): self.s.ix[999] @@ -248,6 +254,9 @@ def time_multiindex_small_get_loc_warm(self): def time_is_monotonic(self): self.miint.is_monotonic + def time_remove_unused_levels(self): + self.mi_unused_levels.remove_unused_levels() + class IntervalIndexing(object): goal_time = 0.2 diff --git a/doc/source/whatsnew/v0.20.2.txt b/doc/source/whatsnew/v0.20.2.txt index 4028d594d954f..87a790d43577f 100644 --- a/doc/source/whatsnew/v0.20.2.txt +++ b/doc/source/whatsnew/v0.20.2.txt @@ -37,6 +37,7 @@ Performance Improvements - Performance regression fix for MultiIndexes (:issue:`16319`, :issue:`16346`) - Improved performance of ``.clip()`` with scalar arguments (:issue:`15400`) - Improved performance of groupby with categorical groupers (:issue:`16413`) +- Improved performance of ``MultiIndex.remove_unused_levels()`` (:issue:`16556`) .. _whatsnew_0202.bug_fixes: @@ -66,6 +67,7 @@ Indexing - Bug in ``DataFrame.reset_index(level=)`` with single level index (:issue:`16263`) - Bug in partial string indexing with a monotonic, but not strictly-monotonic, index incorrectly reversing the slice bounds (:issue:`16515`) +- Bug in ``MultiIndex.remove_unused_levels()`` (:issue:`16556`) I/O ^^^ diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 981a6a696a618..f30da5b05f8ae 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -1290,8 +1290,8 @@ def remove_unused_levels(self): new_levels = [] new_labels = [] - changed = np.ones(self.nlevels, dtype=bool) - for i, (lev, lab) in enumerate(zip(self.levels, self.labels)): + changed = False + for lev, lab in zip(self.levels, self.labels): uniques = algos.unique(lab) @@ -1299,33 +1299,29 @@ def remove_unused_levels(self): if len(uniques) == len(lev): new_levels.append(lev) new_labels.append(lab) - changed[i] = False continue - # set difference, then reverse sort - diff = Index(np.arange(len(lev))).difference(uniques) - unused = diff.sort_values(ascending=False) + changed = True + + # labels get mapped from uniques to 0:len(uniques) + label_mapping = np.zeros(len(lev)) + label_mapping[uniques] = np.arange(len(uniques)) + lab = label_mapping[lab] # new levels are simple lev = lev.take(uniques) - # new labels, we remove the unsued - # by decrementing the labels for that value - # prob a better way - for u in unused: - - lab = np.where(lab > u, lab - 1, lab) - new_levels.append(lev) new_labels.append(lab) - # nothing changed - if not changed.any(): - return self + result = self._shallow_copy() - return MultiIndex(new_levels, new_labels, - names=self.names, sortorder=self.sortorder, - verify_integrity=False) + if changed: + result._reset_identity() + result._set_levels(new_levels, validate=False) + result._set_labels(new_labels, validate=False) + + return result @property def nlevels(self): diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index 388a49d25cb82..242a9d63eac63 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -2515,7 +2515,34 @@ def test_reconstruct_remove_unused(self): # idempotent result2 = result.remove_unused_levels() tm.assert_index_equal(result2, expected) - assert result2 is result + assert result2.is_(result) + + @pytest.mark.parametrize('first_type,second_type', [ + ('int64', 'int64'), + ('datetime64[D]', 'str')]) + def test_remove_unused_levels_large(self, first_type, second_type): + # GH16556 + + # because tests should be deterministic (and this test in particular + # checks that levels are removed, which is not the case for every + # random input): + rng = np.random.RandomState(4) # seed is arbitrary value that works + + size = 1 << 16 + df = DataFrame(dict( + first=rng.randint(0, 1 << 13, size).astype(first_type), + second=rng.randint(0, 1 << 10, size).astype(second_type), + third=rng.rand(size))) + df = df.groupby(['first', 'second']).sum() + df = df[df.third < 0.1] + + result = df.index.remove_unused_levels() + assert len(result.levels[0]) < len(df.index.levels[0]) + assert len(result.levels[1]) < len(df.index.levels[1]) + assert result.equals(df.index) + + expected = df.reset_index().set_index(['first', 'second']).index + tm.assert_index_equal(result, expected) def test_isin(self): values = [('foo', 2), ('bar', 3), ('quux', 4)] From 5f312daf1cf02c924119a57262d9d921120dc62a Mon Sep 17 00:00:00 2001 From: Chris Filo Gorgolewski Date: Thu, 1 Jun 2017 17:28:09 -0700 Subject: [PATCH 648/933] Adding 'n/a' to list of strings denoting missing values (#16079) --- doc/source/io.rst | 2 +- doc/source/whatsnew/v0.21.0.txt | 4 +++- pandas/_libs/parsers.pyx | 2 +- pandas/io/common.py | 2 +- pandas/tests/io/parser/na_values.py | 4 ++-- 5 files changed, 8 insertions(+), 6 deletions(-) diff --git a/doc/source/io.rst b/doc/source/io.rst index 0c31bfe014a12..bd81b478b5326 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -226,7 +226,7 @@ NA and Missing Data Handling na_values : scalar, str, list-like, or dict, default ``None`` Additional strings to recognize as NA/NaN. If dict passed, specific per-column NA values. By default the following values are interpreted as NaN: - ``'-1.#IND', '1.#QNAN', '1.#IND', '-1.#QNAN', '#N/A N/A', '#N/A', 'N/A', 'NA', + ``'-1.#IND', '1.#QNAN', '1.#IND', '-1.#QNAN', '#N/A N/A', '#N/A', 'N/A', 'n/a', 'NA', '#NA', 'NULL', 'null', 'NaN', '-NaN', 'nan', '-nan', ''``. keep_default_na : boolean, default ``True`` If na_values are specified and keep_default_na is ``False`` the default NaN diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 6dad03f086a6c..3dd8bb2ac2de5 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -38,7 +38,7 @@ Other Enhancements - :func:`read_feather` has gained the ``nthreads`` parameter for multi-threaded operations (:issue:`16359`) - :func:`DataFrame.clip()` and :func: `Series.cip()` have gained an inplace argument. (:issue: `15388`) - :func:`crosstab` has gained a ``margins_name`` parameter to define the name of the row / column that will contain the totals when margins=True. (:issue:`15972`) -- :func:`read_csv` has gained 'null' as an additional default missing value.(:issue:`16471`) + .. _whatsnew_0210.api_breaking: Backwards incompatible API changes @@ -49,6 +49,8 @@ Backwards incompatible API changes - Accessing a non-existent attribute on a closed :class:`HDFStore` will now raise an ``AttributeError`` rather than a ``ClosedFileError`` (:issue:`16301`) +- :func:`read_csv` now treats ``'null'`` strings as missing values by default (:issue:`16471`) +- :func:`read_csv` now treats ``'n/a'`` strings as missing values by default (:issue:`16078`) .. _whatsnew_0210.api: diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index 7a6f366d5b1a9..2549c8545908d 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -277,7 +277,7 @@ DEFAULT_CHUNKSIZE = 256 * 1024 # no longer excluding inf representations # '1.#INF','-1.#INF', '1.#INF000000', _NA_VALUES = [b'-1.#IND', b'1.#QNAN', b'1.#IND', b'-1.#QNAN', - b'#N/A N/A', b'NA', b'#NA', b'NULL', b'null', b'NaN', + b'#N/A N/A', b'n/a', b'NA', b'#NA', b'NULL', b'null', b'NaN', b'nan', b''] diff --git a/pandas/io/common.py b/pandas/io/common.py index 1c987f6a9dfc3..cbfc33dbebb81 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -31,7 +31,7 @@ # '1.#INF','-1.#INF', '1.#INF000000', _NA_VALUES = set([ '-1.#IND', '1.#QNAN', '1.#IND', '-1.#QNAN', '#N/A N/A', '#N/A', - 'N/A', 'NA', '#NA', 'NULL', 'null', 'NaN', '-NaN', 'nan', '-nan', '' + 'N/A', 'n/a', 'NA', '#NA', 'NULL', 'null', 'NaN', '-NaN', 'nan', '-nan', '' ]) try: diff --git a/pandas/tests/io/parser/na_values.py b/pandas/tests/io/parser/na_values.py index 6f72ed51d76c6..170f9d428c9cc 100644 --- a/pandas/tests/io/parser/na_values.py +++ b/pandas/tests/io/parser/na_values.py @@ -70,8 +70,8 @@ def test_non_string_na_values(self): def test_default_na_values(self): _NA_VALUES = set(['-1.#IND', '1.#QNAN', '1.#IND', '-1.#QNAN', - '#N/A', 'N/A', 'NA', '#NA', 'NULL', 'null', 'NaN', - 'nan', '-NaN', '-nan', '#N/A N/A', '']) + '#N/A', 'N/A', 'n/a', 'NA', '#NA', 'NULL', 'null', + 'NaN', 'nan', '-NaN', '-nan', '#N/A N/A', '']) assert _NA_VALUES == parsers._NA_VALUES nv = len(_NA_VALUES) From 06f8347d8f4465f0da1c1b69fa3cb7e2e7e0c65b Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 2 Jun 2017 04:54:40 -0500 Subject: [PATCH 649/933] API: Make is_strictly_monotonic_* private (#16576) --- doc/source/advanced.rst | 10 +++++++++ doc/source/api.rst | 2 -- doc/source/whatsnew/v0.20.2.txt | 1 - pandas/core/indexes/base.py | 16 +++++++------- pandas/core/indexes/datetimes.py | 2 +- pandas/tests/indexes/test_base.py | 8 +++---- pandas/tests/indexes/test_multi.py | 32 ++++++++++++++-------------- pandas/tests/indexes/test_numeric.py | 20 ++++++++--------- pandas/tests/indexes/test_range.py | 20 ++++++++--------- 9 files changed, 59 insertions(+), 52 deletions(-) diff --git a/doc/source/advanced.rst b/doc/source/advanced.rst index ea00588ba156f..711c3e9a95d05 100644 --- a/doc/source/advanced.rst +++ b/doc/source/advanced.rst @@ -948,6 +948,16 @@ On the other hand, if the index is not monotonic, then both slice bounds must be In [11]: df.loc[2:3, :] KeyError: 'Cannot get right slice bound for non-unique label: 3' +:meth:`Index.is_monotonic_increasing` and :meth:`Index.is_monotonic_decreasing` only check that +an index is weakly monotonic. To check for strict montonicity, you can combine one of those with +:meth:`Index.is_unique` + +.. ipython:: python + + weakly_monotonic = pd.Index(['a', 'b', 'c', 'c']) + weakly_monotonic + weakly_monotonic.is_monotonic_increasing + weakly_monotonic.is_monotonic_increasing & weakly_monotonic.is_unique Endpoints are inclusive ~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/doc/source/api.rst b/doc/source/api.rst index cc8c5f6356e58..cfdd305348d70 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -1286,8 +1286,6 @@ Attributes Index.is_monotonic Index.is_monotonic_increasing Index.is_monotonic_decreasing - Index.is_strictly_monotonic_increasing - Index.is_strictly_monotonic_decreasing Index.is_unique Index.has_duplicates Index.dtype diff --git a/doc/source/whatsnew/v0.20.2.txt b/doc/source/whatsnew/v0.20.2.txt index 87a790d43577f..d58a98703f22a 100644 --- a/doc/source/whatsnew/v0.20.2.txt +++ b/doc/source/whatsnew/v0.20.2.txt @@ -21,7 +21,6 @@ Enhancements - Unblocked access to additional compression types supported in pytables: 'blosc:blosclz, 'blosc:lz4', 'blosc:lz4hc', 'blosc:snappy', 'blosc:zlib', 'blosc:zstd' (:issue:`14478`) - ``Series`` provides a ``to_latex`` method (:issue:`16180`) -- Added :attr:`Index.is_strictly_monotonic_increasing` and :attr:`Index.is_strictly_monotonic_decreasing` properties (:issue:`16515`) - A new groupby method :meth:`~pandas.core.groupby.GroupBy.ngroup`, parallel to the existing :meth:`~pandas.core.groupby.GroupBy.cumcount`, diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index e8c2043138edb..028464ad5cd89 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1221,33 +1221,33 @@ def is_monotonic_decreasing(self): return self._engine.is_monotonic_decreasing @property - def is_strictly_monotonic_increasing(self): + def _is_strictly_monotonic_increasing(self): """return if the index is strictly monotonic increasing (only increasing) values Examples -------- - >>> Index([1, 2, 3]).is_strictly_monotonic_increasing + >>> Index([1, 2, 3])._is_strictly_monotonic_increasing True - >>> Index([1, 2, 2]).is_strictly_monotonic_increasing + >>> Index([1, 2, 2])._is_strictly_monotonic_increasing False - >>> Index([1, 3, 2]).is_strictly_monotonic_increasing + >>> Index([1, 3, 2])._is_strictly_monotonic_increasing False """ return self.is_unique and self.is_monotonic_increasing @property - def is_strictly_monotonic_decreasing(self): + def _is_strictly_monotonic_decreasing(self): """return if the index is strictly monotonic decreasing (only decreasing) values Examples -------- - >>> Index([3, 2, 1]).is_strictly_monotonic_decreasing + >>> Index([3, 2, 1])._is_strictly_monotonic_decreasing True - >>> Index([3, 2, 2]).is_strictly_monotonic_decreasing + >>> Index([3, 2, 2])._is_strictly_monotonic_decreasing False - >>> Index([3, 1, 2]).is_strictly_monotonic_decreasing + >>> Index([3, 1, 2])._is_strictly_monotonic_decreasing False """ return self.is_unique and self.is_monotonic_decreasing diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 60560374cd420..239894cff3874 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -1472,7 +1472,7 @@ def _maybe_cast_slice_bound(self, label, side, kind): # the bounds need swapped if index is reverse sorted and has a # length > 1 (is_monotonic_decreasing gives True for empty # and length 1 index) - if self.is_strictly_monotonic_decreasing and len(self) > 1: + if self._is_strictly_monotonic_decreasing and len(self) > 1: return upper if side == 'left' else lower return lower if side == 'left' else upper else: diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index a6933316e4291..d9f8e5e7f382b 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -1330,8 +1330,8 @@ def test_is_monotonic_incomparable(self): index = Index([5, datetime.now(), 7]) assert not index.is_monotonic_increasing assert not index.is_monotonic_decreasing - assert not index.is_strictly_monotonic_increasing - assert not index.is_strictly_monotonic_decreasing + assert not index._is_strictly_monotonic_increasing + assert not index._is_strictly_monotonic_decreasing def test_get_set_value(self): values = np.random.randn(100) @@ -2030,8 +2030,8 @@ def test_is_monotonic_na(self): for index in examples: assert not index.is_monotonic_increasing assert not index.is_monotonic_decreasing - assert not index.is_strictly_monotonic_increasing - assert not index.is_strictly_monotonic_decreasing + assert not index._is_strictly_monotonic_increasing + assert not index._is_strictly_monotonic_decreasing def test_repr_summary(self): with cf.option_context('display.max_seq_items', 10): diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index 242a9d63eac63..ba917f33d8595 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -2373,30 +2373,30 @@ def test_is_monotonic(self): i = MultiIndex.from_product([np.arange(10), np.arange(10)], names=['one', 'two']) assert i.is_monotonic - assert i.is_strictly_monotonic_increasing + assert i._is_strictly_monotonic_increasing assert Index(i.values).is_monotonic - assert i.is_strictly_monotonic_increasing + assert i._is_strictly_monotonic_increasing i = MultiIndex.from_product([np.arange(10, 0, -1), np.arange(10)], names=['one', 'two']) assert not i.is_monotonic - assert not i.is_strictly_monotonic_increasing + assert not i._is_strictly_monotonic_increasing assert not Index(i.values).is_monotonic - assert not Index(i.values).is_strictly_monotonic_increasing + assert not Index(i.values)._is_strictly_monotonic_increasing i = MultiIndex.from_product([np.arange(10), np.arange(10, 0, -1)], names=['one', 'two']) assert not i.is_monotonic - assert not i.is_strictly_monotonic_increasing + assert not i._is_strictly_monotonic_increasing assert not Index(i.values).is_monotonic - assert not Index(i.values).is_strictly_monotonic_increasing + assert not Index(i.values)._is_strictly_monotonic_increasing i = MultiIndex.from_product([[1.0, np.nan, 2.0], ['a', 'b', 'c']]) assert not i.is_monotonic - assert not i.is_strictly_monotonic_increasing + assert not i._is_strictly_monotonic_increasing assert not Index(i.values).is_monotonic - assert not Index(i.values).is_strictly_monotonic_increasing + assert not Index(i.values)._is_strictly_monotonic_increasing # string ordering i = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], @@ -2406,8 +2406,8 @@ def test_is_monotonic(self): names=['first', 'second']) assert not i.is_monotonic assert not Index(i.values).is_monotonic - assert not i.is_strictly_monotonic_increasing - assert not Index(i.values).is_strictly_monotonic_increasing + assert not i._is_strictly_monotonic_increasing + assert not Index(i.values)._is_strictly_monotonic_increasing i = MultiIndex(levels=[['bar', 'baz', 'foo', 'qux'], ['mom', 'next', 'zenith']], @@ -2416,8 +2416,8 @@ def test_is_monotonic(self): names=['first', 'second']) assert i.is_monotonic assert Index(i.values).is_monotonic - assert i.is_strictly_monotonic_increasing - assert Index(i.values).is_strictly_monotonic_increasing + assert i._is_strictly_monotonic_increasing + assert Index(i.values)._is_strictly_monotonic_increasing # mixed levels, hits the TypeError i = MultiIndex( @@ -2428,20 +2428,20 @@ def test_is_monotonic(self): names=['household_id', 'asset_id']) assert not i.is_monotonic - assert not i.is_strictly_monotonic_increasing + assert not i._is_strictly_monotonic_increasing def test_is_strictly_monotonic(self): idx = pd.MultiIndex(levels=[['bar', 'baz'], ['mom', 'next']], labels=[[0, 0, 1, 1], [0, 0, 0, 1]]) assert idx.is_monotonic_increasing - assert not idx.is_strictly_monotonic_increasing + assert not idx._is_strictly_monotonic_increasing @pytest.mark.xfail(reason="buggy MultiIndex.is_monotonic_decresaing.") - def test_is_strictly_monotonic_decreasing(self): + def test__is_strictly_monotonic_decreasing(self): idx = pd.MultiIndex(levels=[['baz', 'bar'], ['next', 'mom']], labels=[[0, 0, 1, 1], [0, 0, 0, 1]]) assert idx.is_monotonic_decreasing - assert not idx.is_strictly_monotonic_decreasing + assert not idx._is_strictly_monotonic_decreasing def test_reconstruct_sort(self): diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py index 77f34dbf210e0..29d4214fd549b 100644 --- a/pandas/tests/indexes/test_numeric.py +++ b/pandas/tests/indexes/test_numeric.py @@ -465,36 +465,36 @@ def test_view(self): def test_is_monotonic(self): assert self.index.is_monotonic assert self.index.is_monotonic_increasing - assert self.index.is_strictly_monotonic_increasing + assert self.index._is_strictly_monotonic_increasing assert not self.index.is_monotonic_decreasing - assert not self.index.is_strictly_monotonic_decreasing + assert not self.index._is_strictly_monotonic_decreasing index = self._holder([4, 3, 2, 1]) assert not index.is_monotonic - assert not index.is_strictly_monotonic_increasing - assert index.is_strictly_monotonic_decreasing + assert not index._is_strictly_monotonic_increasing + assert index._is_strictly_monotonic_decreasing index = self._holder([1]) assert index.is_monotonic assert index.is_monotonic_increasing assert index.is_monotonic_decreasing - assert index.is_strictly_monotonic_increasing - assert index.is_strictly_monotonic_decreasing + assert index._is_strictly_monotonic_increasing + assert index._is_strictly_monotonic_decreasing def test_is_strictly_monotonic(self): index = self._holder([1, 1, 2, 3]) assert index.is_monotonic_increasing - assert not index.is_strictly_monotonic_increasing + assert not index._is_strictly_monotonic_increasing index = self._holder([3, 2, 1, 1]) assert index.is_monotonic_decreasing - assert not index.is_strictly_monotonic_decreasing + assert not index._is_strictly_monotonic_decreasing index = self._holder([1, 1]) assert index.is_monotonic_increasing assert index.is_monotonic_decreasing - assert not index.is_strictly_monotonic_increasing - assert not index.is_strictly_monotonic_decreasing + assert not index._is_strictly_monotonic_increasing + assert not index._is_strictly_monotonic_decreasing def test_logical_compat(self): idx = self.create_index() diff --git a/pandas/tests/indexes/test_range.py b/pandas/tests/indexes/test_range.py index db8180cb736c4..0d88e88030604 100644 --- a/pandas/tests/indexes/test_range.py +++ b/pandas/tests/indexes/test_range.py @@ -331,35 +331,35 @@ def test_is_monotonic(self): assert self.index.is_monotonic assert self.index.is_monotonic_increasing assert not self.index.is_monotonic_decreasing - assert self.index.is_strictly_monotonic_increasing - assert not self.index.is_strictly_monotonic_decreasing + assert self.index._is_strictly_monotonic_increasing + assert not self.index._is_strictly_monotonic_decreasing index = RangeIndex(4, 0, -1) assert not index.is_monotonic - assert not index.is_strictly_monotonic_increasing + assert not index._is_strictly_monotonic_increasing assert index.is_monotonic_decreasing - assert index.is_strictly_monotonic_decreasing + assert index._is_strictly_monotonic_decreasing index = RangeIndex(1, 2) assert index.is_monotonic assert index.is_monotonic_increasing assert index.is_monotonic_decreasing - assert index.is_strictly_monotonic_increasing - assert index.is_strictly_monotonic_decreasing + assert index._is_strictly_monotonic_increasing + assert index._is_strictly_monotonic_decreasing index = RangeIndex(2, 1) assert index.is_monotonic assert index.is_monotonic_increasing assert index.is_monotonic_decreasing - assert index.is_strictly_monotonic_increasing - assert index.is_strictly_monotonic_decreasing + assert index._is_strictly_monotonic_increasing + assert index._is_strictly_monotonic_decreasing index = RangeIndex(1, 1) assert index.is_monotonic assert index.is_monotonic_increasing assert index.is_monotonic_decreasing - assert index.is_strictly_monotonic_increasing - assert index.is_strictly_monotonic_decreasing + assert index._is_strictly_monotonic_increasing + assert index._is_strictly_monotonic_decreasing def test_equals_range(self): equiv_pairs = [(RangeIndex(0, 9, 2), RangeIndex(0, 10, 2)), From ff0d1f4d10c62f107f57692f64720b2c08696ef6 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 2 Jun 2017 14:26:44 +0200 Subject: [PATCH 650/933] DOC: change doc build to python 3.6 (#16545) * DOC: change doc build to python 3.6 * Remove pinning of pyqt to 4.x * Remove pinning of openpyxl * Add xsel to doc build for clipboard --- .travis.yml | 8 ++++++-- ...uirements-3.5_DOC.build => requirements-3.6_DOC.build} | 2 +- ci/{requirements-3.5_DOC.run => requirements-3.6_DOC.run} | 4 ++-- ci/{requirements-3.5_DOC.sh => requirements-3.6_DOC.sh} | 0 4 files changed, 9 insertions(+), 5 deletions(-) rename ci/{requirements-3.5_DOC.build => requirements-3.6_DOC.build} (73%) rename ci/{requirements-3.5_DOC.run => requirements-3.6_DOC.run} (87%) rename ci/{requirements-3.5_DOC.sh => requirements-3.6_DOC.sh} (100%) diff --git a/.travis.yml b/.travis.yml index 8b6700e11d2c5..5dc4256a268ad 100644 --- a/.travis.yml +++ b/.travis.yml @@ -74,7 +74,11 @@ matrix: # In allow_failures - os: linux env: - - JOB="3.5_DOC" DOC=true + - JOB="3.6_DOC" DOC=true + addons: + apt: + packages: + - xsel allow_failures: - os: linux env: @@ -87,7 +91,7 @@ matrix: - JOB="3.6_NUMPY_DEV" TEST_ARGS="--skip-slow --skip-network" PANDAS_TESTING_MODE="deprecate" - os: linux env: - - JOB="3.5_DOC" DOC=true + - JOB="3.6_DOC" DOC=true before_install: - echo "before_install" diff --git a/ci/requirements-3.5_DOC.build b/ci/requirements-3.6_DOC.build similarity index 73% rename from ci/requirements-3.5_DOC.build rename to ci/requirements-3.6_DOC.build index 73aeb3192242f..bdcfe28105866 100644 --- a/ci/requirements-3.5_DOC.build +++ b/ci/requirements-3.6_DOC.build @@ -1,4 +1,4 @@ -python=3.5* +python=3.6* python-dateutil pytz numpy diff --git a/ci/requirements-3.5_DOC.run b/ci/requirements-3.6_DOC.run similarity index 87% rename from ci/requirements-3.5_DOC.run rename to ci/requirements-3.6_DOC.run index 9647ab53ab835..df8087f62ef16 100644 --- a/ci/requirements-3.5_DOC.run +++ b/ci/requirements-3.6_DOC.run @@ -12,7 +12,7 @@ lxml beautifulsoup4 html5lib pytables -openpyxl=1.8.5 +openpyxl xlrd xlwt xlsxwriter @@ -21,4 +21,4 @@ numexpr bottleneck statsmodels xarray -pyqt=4.11.4 +pyqt diff --git a/ci/requirements-3.5_DOC.sh b/ci/requirements-3.6_DOC.sh similarity index 100% rename from ci/requirements-3.5_DOC.sh rename to ci/requirements-3.6_DOC.sh From 31e67d5d834bd1754e1ce5cdf6725c54bf06a01d Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Fri, 2 Jun 2017 18:30:10 -0400 Subject: [PATCH 651/933] DOC: whatsnew 0.20.2 edits (#16587) --- doc/source/whatsnew/v0.20.2.txt | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/doc/source/whatsnew/v0.20.2.txt b/doc/source/whatsnew/v0.20.2.txt index d58a98703f22a..c9486954258c8 100644 --- a/doc/source/whatsnew/v0.20.2.txt +++ b/doc/source/whatsnew/v0.20.2.txt @@ -1,7 +1,7 @@ .. _whatsnew_0202: -v0.20.2 (???) -------------- +v0.20.2 (June 3, 2017) +---------------------- This is a minor bug-fix release in the 0.20.x series and includes some small regression fixes, bug fixes and performance improvements. @@ -46,19 +46,19 @@ Bug Fixes - Silenced a warning on some Windows environments about "tput: terminal attributes: No such device or address" when detecting the terminal size. This fix only applies to python 3 (:issue:`16496`) - Bug in using ``pathlib.Path`` or ``py.path.local`` objects with io functions (:issue:`16291`) -- Bug in ``Index.symmetric_difference()`` on two equal MultiIndex's, results in a TypeError (:issue `13490`) +- Bug in ``Index.symmetric_difference()`` on two equal MultiIndex's, results in a ``TypeError`` (:issue `13490`) - Bug in ``DataFrame.update()`` with ``overwrite=False`` and ``NaN values`` (:issue:`15593`) - Passing an invalid engine to :func:`read_csv` now raises an informative ``ValueError`` rather than ``UnboundLocalError``. (:issue:`16511`) - Bug in :func:`unique` on an array of tuples (:issue:`16519`) -- Bug in :func:`cut`` when ``labels`` are set, resulting in incorrect label ordering (:issue:`16459`) -- Fixed a compatibility issue with IPython 6.0's tab completion showing deprecation warnings on Categoricals (:issue:`16409`) +- Bug in :func:`cut` when ``labels`` are set, resulting in incorrect label ordering (:issue:`16459`) +- Fixed a compatibility issue with IPython 6.0's tab completion showing deprecation warnings on ``Categoricals`` (:issue:`16409`) Conversion ^^^^^^^^^^ -- Bug in ``pd.to_numeric()`` in which empty data inputs were causing Python to crash (:issue:`16302`) -- Silence numpy warnings when broadcasting DataFrame to Series with comparison ops (:issue:`16378`, :issue:`16306`) +- Bug in :func:`to_numeric` in which empty data inputs were causing a segfault of the interpreter (:issue:`16302`) +- Silence numpy warnings when broadcasting ``DataFrame`` to ``Series`` with comparison ops (:issue:`16378`, :issue:`16306`) Indexing @@ -66,15 +66,15 @@ Indexing - Bug in ``DataFrame.reset_index(level=)`` with single level index (:issue:`16263`) - Bug in partial string indexing with a monotonic, but not strictly-monotonic, index incorrectly reversing the slice bounds (:issue:`16515`) -- Bug in ``MultiIndex.remove_unused_levels()`` (:issue:`16556`) +- Bug in ``MultiIndex.remove_unused_levels()`` that would not return a ``MultiIndex`` equal to the original. (:issue:`16556`) I/O ^^^ -- Bug in pd.read_csv() when comment is passed in space deliminted text files (:issue:`16472`) +- Bug in :func:`read_csv` when ``comment`` is passed in a space delimited text file (:issue:`16472`) - Bug that would force importing of the clipboard routines unnecessarily, potentially causing an import error on startup (:issue:`16288`) -- Bug that raised IndexError HTML-rendering an empty DataFrame (:issue:`15953`) -- Bug in ``pd.read_csv()`` in which tarfile object inputs were raising an error in Python 2.x for the C engine (:issue:`16530`) +- Bug that raised ``IndexError`` when HTML-rendering an empty ``DataFrame`` (:issue:`15953`) +- Bug in :func:`read_csv` in which tarfile object inputs were raising an error in Python 2.x for the C engine (:issue:`16530`) - Bug where ``DataFrame.to_html()`` ignored the ``index_names`` parameter (:issue:`16493`) - Bug in ``HDFStore.select_as_multiple()`` where start/stop arguments were not respected (:issue:`16209`) @@ -92,7 +92,7 @@ Plotting Groupby/Resample/Rolling ^^^^^^^^^^^^^^^^^^^^^^^^ -- Bug creating datetime rolling window on an empty DataFrame (:issue:`15819`) +- Bug in creating a time-based rolling window on an empty ``DataFrame`` (:issue:`15819`) - Bug in ``rolling.cov()`` with offset window (:issue:`16058`) - Bug in ``.resample()`` and ``.groupby()`` when aggregating on integers (:issue:`16361`) @@ -100,12 +100,12 @@ Groupby/Resample/Rolling Sparse ^^^^^^ -- Bug in construction of SparseDataFrame from ``scipy.sparse.dok_matrix`` (:issue:`16179`) +- Bug in construction of ``SparseDataFrame`` from ``scipy.sparse.dok_matrix`` (:issue:`16179`) Reshaping ^^^^^^^^^ -- Bug in ``DataFrame.stack`` with unsorted levels in MultiIndex columns (:issue:`16323`) +- Bug in ``DataFrame.stack`` with unsorted levels in ``MultiIndex`` columns (:issue:`16323`) - Bug in ``pd.wide_to_long()`` where no error was raised when ``i`` was not a unique identifier (:issue:`16382`) - Bug in ``Series.isin(..)`` with a list of tuples (:issue:`16394`) - Bug in construction of a ``DataFrame`` with mixed dtypes including an all-NaT column. (:issue:`16395`) @@ -114,7 +114,7 @@ Reshaping Numeric ^^^^^^^ -- Bug in .interpolate(), where limit_direction was not respected when limit=None (default) was passed (:issue:16282) +- Bug in ``.interpolate()``, where ``limit_direction`` was not respected when ``limit=None`` (default) was passed (:issue:`16282`) Categorical ^^^^^^^^^^^ @@ -124,4 +124,4 @@ Categorical Other ^^^^^ -- Bug in ``pd.drop([])`` for DataFrame with non-unique indices (:issue:`16270`) +- Bug in ``DataFrame.drop()`` with an empty-list with non-unique indices (:issue:`16270`) From 9e620bc2898d4eb1d8aa7a3a6c4f2a90d272e78e Mon Sep 17 00:00:00 2001 From: Tong SHEN Date: Sun, 4 Jun 2017 08:52:50 +0800 Subject: [PATCH 652/933] DOC: Fix typo in timeseries.rst (#16590) --- doc/source/timeseries.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/timeseries.rst b/doc/source/timeseries.rst index 71d85f9b3995b..1dd80aec4fd6c 100644 --- a/doc/source/timeseries.rst +++ b/doc/source/timeseries.rst @@ -1922,7 +1922,7 @@ then you can use a ``PeriodIndex`` and/or ``Series`` of ``Periods`` to do comput span = pd.period_range('1215-01-01', '1381-01-01', freq='D') span -To convert from a ``int64`` based YYYYMMDD representation. +To convert from an ``int64`` based YYYYMMDD representation. .. ipython:: python From 473615e4e0e7f01410a0a2cc8ded10af5d393957 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Sun, 4 Jun 2017 05:39:31 -0500 Subject: [PATCH 653/933] PERF: vectorize _interp_limit (#16592) * PERF: vectorize _interp_limit * CLN: remove old implementation * fixup! CLN: remove old implementation --- pandas/core/missing.py | 77 ++++++++++++++++++++++++++++++++++++------ 1 file changed, 67 insertions(+), 10 deletions(-) diff --git a/pandas/core/missing.py b/pandas/core/missing.py index 51778684d68f5..5aabc9d8730dd 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -143,12 +143,6 @@ def interpolate_1d(xvalues, yvalues, method='linear', limit=None, 'DatetimeIndex') method = 'values' - def _interp_limit(invalid, fw_limit, bw_limit): - "Get idx of values that won't be filled b/c they exceed the limits." - for x in np.where(invalid)[0]: - if invalid[max(0, x - fw_limit):x + bw_limit + 1].all(): - yield x - valid_limit_directions = ['forward', 'backward', 'both'] limit_direction = limit_direction.lower() if limit_direction not in valid_limit_directions: @@ -180,21 +174,29 @@ def _interp_limit(invalid, fw_limit, bw_limit): # default limit is unlimited GH #16282 if limit is None: - limit = len(xvalues) + # limit = len(xvalues) + pass elif not is_integer(limit): raise ValueError('Limit must be an integer') elif limit < 1: raise ValueError('Limit must be greater than 0') # each possible limit_direction - if limit_direction == 'forward': + # TODO: do we need sorted? + if limit_direction == 'forward' and limit is not None: violate_limit = sorted(start_nans | set(_interp_limit(invalid, limit, 0))) - elif limit_direction == 'backward': + elif limit_direction == 'forward': + violate_limit = sorted(start_nans) + elif limit_direction == 'backward' and limit is not None: violate_limit = sorted(end_nans | set(_interp_limit(invalid, 0, limit))) - elif limit_direction == 'both': + elif limit_direction == 'backward': + violate_limit = sorted(end_nans) + elif limit_direction == 'both' and limit is not None: violate_limit = sorted(_interp_limit(invalid, limit, limit)) + else: + violate_limit = [] xvalues = getattr(xvalues, 'values', xvalues) yvalues = getattr(yvalues, 'values', yvalues) @@ -630,3 +632,58 @@ def fill_zeros(result, x, y, name, fill): result = result.reshape(shape) return result + + +def _interp_limit(invalid, fw_limit, bw_limit): + """Get idx of values that won't be filled b/c they exceed the limits. + + This is equivalent to the more readable, but slower + + .. code-block:: python + + for x in np.where(invalid)[0]: + if invalid[max(0, x - fw_limit):x + bw_limit + 1].all(): + yield x + """ + # handle forward first; the backward direction is the same except + # 1. operate on the reversed array + # 2. subtract the returned indicies from N - 1 + N = len(invalid) + + def inner(invalid, limit): + limit = min(limit, N) + windowed = _rolling_window(invalid, limit + 1).all(1) + idx = (set(np.where(windowed)[0] + limit) | + set(np.where((~invalid[:limit + 1]).cumsum() == 0)[0])) + return idx + + if fw_limit == 0: + f_idx = set(np.where(invalid)[0]) + else: + f_idx = inner(invalid, fw_limit) + + if bw_limit == 0: + # then we don't even need to care about backwards, just use forwards + return f_idx + else: + b_idx = set(N - 1 - np.asarray(list(inner(invalid[::-1], bw_limit)))) + if fw_limit == 0: + return b_idx + return f_idx & b_idx + + +def _rolling_window(a, window): + """ + [True, True, False, True, False], 2 -> + + [ + [True, True], + [True, False], + [False, True], + [True, False], + ] + """ + # https://stackoverflow.com/a/6811241 + shape = a.shape[:-1] + (a.shape[-1] - window + 1, window) + strides = a.strides + (a.strides[-1],) + return np.lib.stride_tricks.as_strided(a, shape=shape, strides=strides) From ce3b0c30b6f9ca78c507e167258f7f17e1c86df8 Mon Sep 17 00:00:00 2001 From: Mahdi Ben Jelloul Date: Sun, 4 Jun 2017 12:42:47 +0200 Subject: [PATCH 654/933] DOC: Fix typo in merge doc for validate kwarg (#16595) --- pandas/core/frame.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 25c3c3fe4e48e..2b2e7be62427b 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -184,7 +184,7 @@ dataset. * "many_to_one" or "m:1": check if merge keys are unique in right dataset. - * "many_to_may" or "m:m": allowed, but does not result in checks. + * "many_to_many" or "m:m": allowed, but does not result in checks. .. versionadded:: 0.21.0 From 18c316b6fba1e00ae60b571304ffd1d0a00fc9a7 Mon Sep 17 00:00:00 2001 From: "Mehmet Ali \"Mali\" Akmanalp" Date: Sun, 4 Jun 2017 06:44:25 -0400 Subject: [PATCH 655/933] BUG: convert numpy strings in index names in HDF #13492 (#16444) * BUG: Handle numpy strings in index names in HDF5 #13492 * REF: refactor to _ensure_str --- doc/source/whatsnew/v0.20.2.txt | 1 + pandas/io/pytables.py | 14 +++++++++++++- pandas/tests/io/test_pytables.py | 23 ++++++++++++++++++++++- 3 files changed, 36 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.20.2.txt b/doc/source/whatsnew/v0.20.2.txt index c9486954258c8..362a80c10694a 100644 --- a/doc/source/whatsnew/v0.20.2.txt +++ b/doc/source/whatsnew/v0.20.2.txt @@ -76,6 +76,7 @@ I/O - Bug that raised ``IndexError`` when HTML-rendering an empty ``DataFrame`` (:issue:`15953`) - Bug in :func:`read_csv` in which tarfile object inputs were raising an error in Python 2.x for the C engine (:issue:`16530`) - Bug where ``DataFrame.to_html()`` ignored the ``index_names`` parameter (:issue:`16493`) +- Bug where ``pd.read_hdf()`` returns numpy strings for index names (:issue:`13492`) - Bug in ``HDFStore.select_as_multiple()`` where start/stop arguments were not respected (:issue:`16209`) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index b838260d1f73c..4a1b12414bcc5 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -73,6 +73,18 @@ def _ensure_encoding(encoding): return encoding +def _ensure_str(name): + """Ensure that an index / column name is a str (python 3) or + unicode (python 2); otherwise they may be np.string dtype. + Non-string dtypes are passed through unchanged. + + https://github.com/pandas-dev/pandas/issues/13492 + """ + if isinstance(name, compat.string_types): + name = compat.text_type(name) + return name + + Term = Expr @@ -2567,7 +2579,7 @@ def read_index_node(self, node, start=None, stop=None): name = None if 'name' in node._v_attrs: - name = node._v_attrs.name + name = _ensure_str(node._v_attrs.name) index_class = self._alias_to_class(getattr(node._v_attrs, 'index_class', '')) diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index ae14f74ece31c..040345db83c2b 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -16,7 +16,7 @@ date_range, timedelta_range, Index, DatetimeIndex, isnull) -from pandas.compat import is_platform_windows, PY3, PY35, BytesIO +from pandas.compat import is_platform_windows, PY3, PY35, BytesIO, text_type from pandas.io.formats.printing import pprint_thing tables = pytest.importorskip('tables') @@ -2920,6 +2920,27 @@ def test_store_index_name_with_tz(self): recons = store['frame'] tm.assert_frame_equal(recons, df) + @pytest.mark.parametrize('table_format', ['table', 'fixed']) + def test_store_index_name_numpy_str(self, table_format): + # GH #13492 + idx = pd.Index(pd.to_datetime([datetime.date(2000, 1, 1), + datetime.date(2000, 1, 2)]), + name=u('cols\u05d2')) + idx1 = pd.Index(pd.to_datetime([datetime.date(2010, 1, 1), + datetime.date(2010, 1, 2)]), + name=u('rows\u05d0')) + df = pd.DataFrame(np.arange(4).reshape(2, 2), columns=idx, index=idx1) + + # This used to fail, returning numpy strings instead of python strings. + with ensure_clean_path(self.path) as path: + df.to_hdf(path, 'df', format=table_format) + df2 = read_hdf(path, 'df') + + assert_frame_equal(df, df2, check_names=True) + + assert type(df2.index.name) == text_type + assert type(df2.columns.name) == text_type + def test_store_series_name(self): df = tm.makeDataFrame() series = df['A'] From 50a62c17c16d24b8a20be9ef281a86bf589144f2 Mon Sep 17 00:00:00 2001 From: bpraggastis Date: Sun, 4 Jun 2017 03:47:14 -0700 Subject: [PATCH 656/933] ERRR: Raise error in usecols when column doesn't exist but length matches (#16460) * gh-14671 Check if usecols with type string contains a subset of names, if not throws an error * tests added for gh-14671, expected behavior of simultaneous use of usecols and names unclear so these tests are commented out * Review comments --- doc/source/whatsnew/v0.20.2.txt | 1 + pandas/io/parsers.py | 6 ++++ pandas/tests/io/parser/usecols.py | 51 +++++++++++++++++++++++++++++++ 3 files changed, 58 insertions(+) diff --git a/doc/source/whatsnew/v0.20.2.txt b/doc/source/whatsnew/v0.20.2.txt index 362a80c10694a..e1469cf15e20c 100644 --- a/doc/source/whatsnew/v0.20.2.txt +++ b/doc/source/whatsnew/v0.20.2.txt @@ -72,6 +72,7 @@ I/O ^^^ - Bug in :func:`read_csv` when ``comment`` is passed in a space delimited text file (:issue:`16472`) +- Bug in :func:`read_csv` not raising an exception with nonexistent columns in ``usecols`` when it had the correct length (:issue:`14671`) - Bug that would force importing of the clipboard routines unnecessarily, potentially causing an import error on startup (:issue:`16288`) - Bug that raised ``IndexError`` when HTML-rendering an empty ``DataFrame`` (:issue:`15953`) - Bug in :func:`read_csv` in which tarfile object inputs were raising an error in Python 2.x for the C engine (:issue:`16530`) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index aab70c8ce2cd4..055d6d045d2f2 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -1649,6 +1649,12 @@ def __init__(self, src, **kwds): if self.usecols: usecols = _evaluate_usecols(self.usecols, self.orig_names) + + # GH 14671 + if (self.usecols_dtype == 'string' and + not set(usecols).issubset(self.orig_names)): + raise ValueError("Usecols do not match names.") + if len(self.names) > len(usecols): self.names = [n for i, n in enumerate(self.names) if (i in usecols or n in usecols)] diff --git a/pandas/tests/io/parser/usecols.py b/pandas/tests/io/parser/usecols.py index 8761d1ccd3da4..f582e5037ca07 100644 --- a/pandas/tests/io/parser/usecols.py +++ b/pandas/tests/io/parser/usecols.py @@ -475,3 +475,54 @@ def test_uneven_length_cols(self): 'C': [3, 5, 4, 3, 3, 7]}) df = self.read_csv(StringIO(data), usecols=usecols) tm.assert_frame_equal(df, expected) + + def test_raise_on_usecols_names_mismatch(self): + # GH 14671 + data = 'a,b,c,d\n1,2,3,4\n5,6,7,8' + + if self.engine == 'c': + msg = 'Usecols do not match names' + else: + msg = 'is not in list' + + usecols = ['a', 'b', 'c', 'd'] + df = self.read_csv(StringIO(data), usecols=usecols) + expected = DataFrame({'a': [1, 5], 'b': [2, 6], 'c': [3, 7], + 'd': [4, 8]}) + tm.assert_frame_equal(df, expected) + + usecols = ['a', 'b', 'c', 'f'] + with tm.assert_raises_regex(ValueError, msg): + self.read_csv(StringIO(data), usecols=usecols) + + usecols = ['a', 'b', 'f'] + with tm.assert_raises_regex(ValueError, msg): + self.read_csv(StringIO(data), usecols=usecols) + + names = ['A', 'B', 'C', 'D'] + + df = self.read_csv(StringIO(data), header=0, names=names) + expected = DataFrame({'A': [1, 5], 'B': [2, 6], 'C': [3, 7], + 'D': [4, 8]}) + tm.assert_frame_equal(df, expected) + + # TODO: https://github.com/pandas-dev/pandas/issues/16469 + # usecols = ['A','C'] + # df = self.read_csv(StringIO(data), header=0, names=names, + # usecols=usecols) + # expected = DataFrame({'A': [1,5], 'C': [3,7]}) + # tm.assert_frame_equal(df, expected) + # + # usecols = [0,2] + # df = self.read_csv(StringIO(data), header=0, names=names, + # usecols=usecols) + # expected = DataFrame({'A': [1,5], 'C': [3,7]}) + # tm.assert_frame_equal(df, expected) + + usecols = ['A', 'B', 'C', 'f'] + with tm.assert_raises_regex(ValueError, msg): + self.read_csv(StringIO(data), header=0, names=names, + usecols=usecols) + usecols = ['A', 'B', 'f'] + with tm.assert_raises_regex(ValueError, msg): + self.read_csv(StringIO(data), names=names, usecols=usecols) From 91057f392bc36cea73de21818e3ce3bca9eb0220 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Sun, 4 Jun 2017 06:29:15 -0500 Subject: [PATCH 657/933] DOC: Whatsnew fixups (#16596) --- doc/source/whatsnew/v0.20.2.txt | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v0.20.2.txt b/doc/source/whatsnew/v0.20.2.txt index e1469cf15e20c..31125db0f34d4 100644 --- a/doc/source/whatsnew/v0.20.2.txt +++ b/doc/source/whatsnew/v0.20.2.txt @@ -1,14 +1,12 @@ .. _whatsnew_0202: -v0.20.2 (June 3, 2017) +v0.20.2 (June 4, 2017) ---------------------- This is a minor bug-fix release in the 0.20.x series and includes some small regression fixes, bug fixes and performance improvements. We recommend that all users upgrade to this version. -Highlights include: - .. contents:: What's new in v0.20.2 :local: :backlinks: none From bf9997520443e0a5f505f11e11f46290ea95b3a3 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Sun, 4 Jun 2017 15:52:53 -0500 Subject: [PATCH 658/933] DOC: Update release.rst --- doc/source/release.rst | 50 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/doc/source/release.rst b/doc/source/release.rst index 2587962299569..bf272e243e0dd 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -37,6 +37,56 @@ analysis / manipulation tool available in any language. * Binary installers on PyPI: http://pypi.python.org/pypi/pandas * Documentation: http://pandas.pydata.org +pandas 0.20.2 +------------- + +**Release date:** June 4, 2017 + +This is a minor bug-fix release in the 0.20.x series and includes some small regression fixes, +bug fixes and performance improvements. +We recommend that all users upgrade to this version. + +See the :ref:`v0.20.2 Whatsnew ` overview for an extensive list +of all enhancements and bugs that have been fixed in 0.20.2. + +Thanks +~~~~~~ + +- Aaron Barber +- Andrew 亮 +- Becky Sweger +- Christian Prinoth +- Christian Stade-Schuldt +- DSM +- Erik Fredriksen +- Hugues Valois +- Jeff Reback +- Jeff Tratner +- JimStearns206 +- John W. O'Brien +- Joris Van den Bossche +- JosephWagner +- Keith Webber +- Mehmet Ali "Mali" Akmanalp +- Pankaj Pandey +- Patrick Luo +- Patrick O'Melveny +- Pietro Battiston +- RobinFiveWords +- Ryan Hendrickson +- SimonBaron +- Tom Augspurger +- WBare +- bpraggastis +- chernrick +- chris-b1 +- economy +- gfyoung +- jaredsnyder +- keitakurita +- linebp +- lloydkirk + pandas 0.20.0 / 0.20.1 ---------------------- From 697d0269aa6903be75db97afb45f9976abd2406d Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 6 Jun 2017 09:10:00 -0400 Subject: [PATCH 659/933] BUG: pickle compat with UTC tz's (#16611) closes #16608 --- doc/source/whatsnew.rst | 2 + doc/source/whatsnew/v0.20.3.txt | 89 ++++++++++++++++++ pandas/compat/pickle_compat.py | 2 +- .../0.19.2/0.19.2_x86_64_darwin_3.6.1.pickle | Bin 125349 -> 126076 bytes .../tests/io/generate_legacy_storage_files.py | 8 +- 5 files changed, 99 insertions(+), 2 deletions(-) create mode 100644 doc/source/whatsnew/v0.20.3.txt mode change 100644 => 100755 pandas/tests/io/generate_legacy_storage_files.py diff --git a/doc/source/whatsnew.rst b/doc/source/whatsnew.rst index b1f9990a3e6af..3385bafc26467 100644 --- a/doc/source/whatsnew.rst +++ b/doc/source/whatsnew.rst @@ -20,6 +20,8 @@ These are new features and improvements of note in each release. .. include:: whatsnew/v0.21.0.txt +.. include:: whatsnew/v0.20.3.txt + .. include:: whatsnew/v0.20.2.txt .. include:: whatsnew/v0.20.0.txt diff --git a/doc/source/whatsnew/v0.20.3.txt b/doc/source/whatsnew/v0.20.3.txt new file mode 100644 index 0000000000000..2032209c4aa23 --- /dev/null +++ b/doc/source/whatsnew/v0.20.3.txt @@ -0,0 +1,89 @@ +.. _whatsnew_0203: + +v0.20.3 (June ??, 2017) +----------------------- + +This is a minor bug-fix release in the 0.20.x series and includes some small regression fixes, +bug fixes and performance improvements. +We recommend that all users upgrade to this version. + +.. contents:: What's new in v0.20.3 + :local: + :backlinks: none + + +.. _whatsnew_0203.enhancements: + +Enhancements +~~~~~~~~~~~~ + + + + + + +.. _whatsnew_0203.performance: + +Performance Improvements +~~~~~~~~~~~~~~~~~~~~~~~~ + + + + + + +.. _whatsnew_0203.bug_fixes: + +Bug Fixes +~~~~~~~~~ + + + + +Conversion +^^^^^^^^^^ + +- Bug in pickle compat prior to the v0.20.x series, when ``UTC`` is a timezone in a Series/DataFrame/Index (:issue:`16608`) + +Indexing +^^^^^^^^ + + + +I/O +^^^ + + + +Plotting +^^^^^^^^ + + + + +Groupby/Resample/Rolling +^^^^^^^^^^^^^^^^^^^^^^^^ + + + +Sparse +^^^^^^ + + + + +Reshaping +^^^^^^^^^ + + + +Numeric +^^^^^^^ + + +Categorical +^^^^^^^^^^^ + + +Other +^^^^^ diff --git a/pandas/compat/pickle_compat.py b/pandas/compat/pickle_compat.py index b875bbb0d63c0..f6223c48994ae 100644 --- a/pandas/compat/pickle_compat.py +++ b/pandas/compat/pickle_compat.py @@ -15,7 +15,7 @@ def load_reduce(self): args = stack.pop() func = stack[-1] - if type(args[0]) is type: + if len(args) and type(args[0]) is type: n = args[0].__name__ # noqa try: diff --git a/pandas/tests/io/data/legacy_pickle/0.19.2/0.19.2_x86_64_darwin_3.6.1.pickle b/pandas/tests/io/data/legacy_pickle/0.19.2/0.19.2_x86_64_darwin_3.6.1.pickle index 6bb02672a4151c8d6536127fc94e68634e56c86d..75ea95ff402c4e9f0c93ef80e6c04baf7f0a70d7 100644 GIT binary patch delta 785 zcmah{Ur19?7(d_H*{*vCuc`ZU4d>jp=7gkK*7 zbDG^U-F;$y>Z#7`OHC!lFJ2x?bta}}FQG0)G3l&>LHyKm2&K{<2m(&4u44d}5@v*~-n>`aa6m6)H86ooF zVb!;w`sAa^6^4sZJ6&ezZnRTmc*NGKJ@d?hu)~3h&Tvw4im%F~UCw)k!CEKIx7g_m zhHs^A{)k?!>R=kg?-f8|#x+T*FBr{JN*5($dQ{1oxGv}2cOSQwJd6v2UYq9ii9mOF zlKQYw`aRAXhq)qci@B7x&5WCV2?|=?KSE(r>uCQ-K}TyKR-G*qc9Z7tT%cFnhd1Ir5Br*S!Bu>mgir7f1XJYfo`F!-#$;UsyRIra-ib&&MK&_? z50Ey|LqR-dsn|ji(@_VVv6Q|{#)AEBJ3P3Sk8b6oaC?MG!q$Z~2*zPK delta 603 zcmex!gMH~`cGd=#sXDVZvTjk{JVC9Bk-eCOfg!7eWpbgEq(BakonaX}rFM!&hD}yA z%VYy#$;rVQ;!GJVlP^k{Y+j(TpHW~6ilXH#lk>HtL??g*-Lj0C7#K>EvJNl<%>?RM zsA$IqHotiDBP~rvrec=KilU|<%3oPha6i-_4TyDTSSEiE6=%FOd7`p3Q%(3CbHOv;eYN}1eiS%$2bv1GHW6%(W26R3}%!OzYH3VtS8 zw#kZe;!MVDll>LN89kd9S#Mut%{ViW=^)#7+qsMeevC@f^IkLVg-6Zw;5Uq#0GDgF A(*OVf diff --git a/pandas/tests/io/generate_legacy_storage_files.py b/pandas/tests/io/generate_legacy_storage_files.py old mode 100644 new mode 100755 index 22c62b738e6a2..996965999724e --- a/pandas/tests/io/generate_legacy_storage_files.py +++ b/pandas/tests/io/generate_legacy_storage_files.py @@ -1,3 +1,5 @@ +#!/usr/env/bin python + """ self-contained to write legacy storage (pickle/msgpack) files """ from __future__ import print_function from warnings import catch_warnings @@ -125,7 +127,11 @@ def create_data(): mixed_dup=mixed_dup_df, dt_mixed_tzs=DataFrame({ u'A': Timestamp('20130102', tz='US/Eastern'), - u'B': Timestamp('20130603', tz='CET')}, index=range(5)) + u'B': Timestamp('20130603', tz='CET')}, index=range(5)), + dt_mixed2_tzs=DataFrame({ + u'A': Timestamp('20130102', tz='US/Eastern'), + u'B': Timestamp('20130603', tz='CET'), + u'C': Timestamp('20130603', tz='UTC')}, index=range(5)) ) with catch_warnings(record=True): From 10c17d4896799cf1e8f313025d1071e5abbb65f9 Mon Sep 17 00:00:00 2001 From: Jean Helie Date: Wed, 7 Jun 2017 01:41:31 +0100 Subject: [PATCH 660/933] Fix some lgtm alerts (#16613) --- pandas/core/dtypes/cast.py | 2 +- pandas/core/generic.py | 8 ++++---- pandas/core/indexes/interval.py | 4 ++-- pandas/core/internals.py | 3 --- pandas/core/sparse/array.py | 2 +- pandas/io/parsers.py | 2 +- pandas/tseries/offsets.py | 1 - 7 files changed, 9 insertions(+), 13 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index fd61813a57c98..16b0a5c8a74ca 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -668,7 +668,7 @@ def maybe_convert_objects(values, convert_dates=True, convert_numeric=True, if convert_timedeltas == 'coerce': from pandas.core.tools.timedeltas import to_timedelta - new_values = to_timedelta(values, coerce=True) + new_values = to_timedelta(values, errors='coerce') # if we are all nans then leave me alone if not isnull(new_values).all(): diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 98999ec267c82..accb7d0db1d2c 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -4285,7 +4285,7 @@ def asof(self, where, subset=None): raise ValueError("subset is not valid for Series") elif self.ndim > 2: raise NotImplementedError("asof is not implemented " - "for {type}".format(type(self))) + "for {type}".format(type=type(self))) else: if subset is None: subset = self.columns @@ -4980,7 +4980,7 @@ def last(self, offset): offset = to_offset(offset) - start_date = start = self.index[-1] - offset + start_date = self.index[-1] - offset start = self.index.searchsorted(start_date, side='right') return self.iloc[start:] @@ -5303,8 +5303,8 @@ def _where(self, cond, other=np.nan, inplace=False, axis=None, level=None, # slice me out of the other else: - raise NotImplemented("cannot align with a higher dimensional " - "NDFrame") + raise NotImplementedError("cannot align with a higher " + "dimensional NDFrame") elif is_list_like(other): diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index b1523cd6c0d0c..e6b2bc0953680 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -1053,11 +1053,11 @@ def interval_range(start=None, end=None, freq=None, periods=None, if periods is None or end is None: raise ValueError("must specify 2 of start, end, periods") start = end - periods * freq - elif end is None: + if end is None: if periods is None or start is None: raise ValueError("must specify 2 of start, end, periods") end = start + periods * freq - elif periods is None: + if periods is None: if start is None or end is None: raise ValueError("must specify 2 of start, end, periods") pass diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 15851a17274ca..58690ad632152 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -4645,7 +4645,6 @@ def _block2d_to_blocknd(values, placement, shape, labels, ref_items): pvalues = np.empty(panel_shape, dtype=dtype) pvalues.fill(fill_value) - values = values for i in range(len(placement)): pvalues[i].flat[mask] = values[:, i] @@ -5154,8 +5153,6 @@ def dtype(self): return _get_dtype(maybe_promote(self.block.dtype, self.block.fill_value)[0]) - return self._dtype - @cache_readonly def is_null(self): if self.block is None: diff --git a/pandas/core/sparse/array.py b/pandas/core/sparse/array.py index 8ac9d3916573e..c75de01b98e4e 100644 --- a/pandas/core/sparse/array.py +++ b/pandas/core/sparse/array.py @@ -125,7 +125,7 @@ def _sparse_array_op(left, right, op, name, series=False): name = name[1:] if name in ('and', 'or') and dtype == 'bool': - opname = 'sparse_{name}_uint8'.format(name=name, dtype=dtype) + opname = 'sparse_{name}_uint8'.format(name=name) # to make template simple, cast here left_sp_values = left.sp_values.view(np.uint8) right_sp_values = right.sp_values.view(np.uint8) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 055d6d045d2f2..c2d5a629b03a3 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -2211,7 +2211,7 @@ def _exclude_implicit_index(self, alldata): def get_chunk(self, size=None): if size is None: size = self.chunksize - return self.read(nrows=size) + return self.read(rows=size) def _convert_data(self, data): # apply converters diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index f9f4adc1b2c81..2a120a0696836 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -1596,7 +1596,6 @@ def apply(self, other): if otherDay != self.weekday: other = other + timedelta((self.weekday - otherDay) % 7) k = k - 1 - other = other for i in range(k): other = other + self._inc else: From dfebd8a56cdcfa6d3d1217a3a22e882b5f1591e2 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Thu, 8 Jun 2017 06:47:18 -0400 Subject: [PATCH 661/933] BLD: fix numpy on 3.6 build as 1.13 was released but no deps are built for it (#16633) --- ci/requirements-3.6.build | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/requirements-3.6.build b/ci/requirements-3.6.build index 1c4b46aea3865..8d09e0ee93070 100644 --- a/ci/requirements-3.6.build +++ b/ci/requirements-3.6.build @@ -2,5 +2,5 @@ python=3.6* python-dateutil pytz nomkl -numpy +numpy=1.12* cython From 2b44868a78e375c8ce953e08682c8a851ec7fa83 Mon Sep 17 00:00:00 2001 From: DSM Date: Thu, 8 Jun 2017 06:47:32 -0400 Subject: [PATCH 662/933] BUG: Fix Series.get failure on missing NaN (#8569) (#16619) --- doc/source/whatsnew/v0.20.3.txt | 2 +- pandas/core/indexes/numeric.py | 2 ++ pandas/tests/indexes/test_multi.py | 8 ++++++++ pandas/tests/indexes/test_numeric.py | 8 ++++++++ pandas/tests/series/test_indexing.py | 15 +++++++++++++++ 5 files changed, 34 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.20.3.txt b/doc/source/whatsnew/v0.20.3.txt index 2032209c4aa23..049737f948e17 100644 --- a/doc/source/whatsnew/v0.20.3.txt +++ b/doc/source/whatsnew/v0.20.3.txt @@ -48,7 +48,7 @@ Conversion Indexing ^^^^^^^^ - +- Bug in ``Float64Index`` causing an empty array instead of None to be returned from ``.get(np.nan)`` on a Series whose index did not contain any NaNs (:issue:`8569`) I/O ^^^ diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index bdae0ac7ac5e9..72d521cbe2d60 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -369,6 +369,8 @@ def get_loc(self, key, method=None, tolerance=None): except (ValueError, IndexError): # should only need to catch ValueError here but on numpy # 1.7 .item() can raise IndexError when NaNs are present + if not len(nan_idxs): + raise KeyError(key) return nan_idxs except (TypeError, NotImplementedError): pass diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index ba917f33d8595..7d2e6f495311f 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -1172,6 +1172,14 @@ def test_get_loc_level(self): assert result == expected assert new_index.equals(index.droplevel(0)) + def test_get_loc_missing_nan(self): + # GH 8569 + idx = MultiIndex.from_arrays([[1.0, 2.0], [3.0, 4.0]]) + assert isinstance(idx.get_loc(1), slice) + pytest.raises(KeyError, idx.get_loc, 3) + pytest.raises(KeyError, idx.get_loc, np.nan) + pytest.raises(KeyError, idx.get_loc, [np.nan]) + def test_slice_locs(self): df = tm.makeTimeDataFrame() stacked = df.stack() diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py index 29d4214fd549b..62ac337d02727 100644 --- a/pandas/tests/indexes/test_numeric.py +++ b/pandas/tests/indexes/test_numeric.py @@ -371,6 +371,14 @@ def test_get_loc_na(self): assert idx.get_loc(1) == 1 pytest.raises(KeyError, idx.slice_locs, np.nan) + def test_get_loc_missing_nan(self): + # GH 8569 + idx = Float64Index([1, 2]) + assert idx.get_loc(1) == 0 + pytest.raises(KeyError, idx.get_loc, 3) + pytest.raises(KeyError, idx.get_loc, np.nan) + pytest.raises(KeyError, idx.get_loc, [np.nan]) + def test_contains_nans(self): i = Float64Index([1.0, 2.0, np.nan]) assert np.nan in i diff --git a/pandas/tests/series/test_indexing.py b/pandas/tests/series/test_indexing.py index 6ded4d593a571..7774d10c5eaf8 100644 --- a/pandas/tests/series/test_indexing.py +++ b/pandas/tests/series/test_indexing.py @@ -70,6 +70,21 @@ def test_get(self): result = vc.get(True, default='Missing') assert result == 'Missing' + def test_get_nan(self): + # GH 8569 + s = pd.Float64Index(range(10)).to_series() + assert s.get(np.nan) is None + assert s.get(np.nan, default='Missing') == 'Missing' + + # ensure that fixing the above hasn't broken get + # with multiple elements + idx = [20, 30] + assert_series_equal(s.get(idx), + Series([np.nan] * 2, index=idx)) + idx = [np.nan, np.nan] + assert_series_equal(s.get(idx), + Series([np.nan] * 2, index=idx)) + def test_delitem(self): # GH 5542 From 722b38656a53427d663540b63229005a399e33d0 Mon Sep 17 00:00:00 2001 From: DSM Date: Thu, 8 Jun 2017 06:48:00 -0400 Subject: [PATCH 663/933] TST: NaN in MultiIndex should not become a string (#7031) (#16625) --- pandas/tests/indexes/test_multi.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index 7d2e6f495311f..3f6fd8c8aa827 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -2866,3 +2866,24 @@ def test_tuples_with_name_string(self): pd.Index(li, name='abc') with pytest.raises(ValueError): pd.Index(li, name='a') + + def test_nan_stays_float(self): + + # GH 7031 + idx0 = pd.MultiIndex(levels=[["A", "B"], []], + labels=[[1, 0], [-1, -1]], + names=[0, 1]) + idx1 = pd.MultiIndex(levels=[["C"], ["D"]], + labels=[[0], [0]], + names=[0, 1]) + idxm = idx0.join(idx1, how='outer') + assert pd.isnull(idx0.get_level_values(1)).all() + # the following failed in 0.14.1 + assert pd.isnull(idxm.get_level_values(1)[:-1]).all() + + df0 = pd.DataFrame([[1, 2]], index=idx0) + df1 = pd.DataFrame([[3, 4]], index=idx1) + dfm = df0 - df1 + assert pd.isnull(df0.index.get_level_values(1)).all() + # the following failed in 0.14.1 + assert pd.isnull(dfm.index.get_level_values(1)[:-1]).all() From 73930c58e8eac4031608bb8c4bf624d77e1d1dcb Mon Sep 17 00:00:00 2001 From: DSM Date: Thu, 8 Jun 2017 06:48:22 -0400 Subject: [PATCH 664/933] TST: verify we can add and subtract from indices (#8142) (#16629) --- pandas/tests/indexes/test_base.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index d9f8e5e7f382b..18dbe6624008a 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -1800,6 +1800,25 @@ def test_string_index_repr(self): assert coerce(idx) == expected + @pytest.mark.parametrize('dtype', [np.int64, np.float64]) + @pytest.mark.parametrize('delta', [1, 0, -1]) + def test_addsub_arithmetic(self, dtype, delta): + # GH 8142 + delta = dtype(delta) + idx = pd.Index([10, 11, 12], dtype=dtype) + result = idx + delta + expected = pd.Index(idx.values + delta, dtype=dtype) + tm.assert_index_equal(result, expected) + + # this subtraction used to fail + result = idx - delta + expected = pd.Index(idx.values - delta, dtype=dtype) + tm.assert_index_equal(result, expected) + + tm.assert_index_equal(idx + idx, 2 * idx) + tm.assert_index_equal(idx - idx, 0 * idx) + assert not (idx - idx).empty + class TestMixedIntIndex(Base): # Mostly the tests from common.py for which the results differ From 9fdea65e5675f724efbb6afd68b5862128aa388d Mon Sep 17 00:00:00 2001 From: Pradyumna Reddy Chinthala Date: Fri, 9 Jun 2017 21:21:11 +0530 Subject: [PATCH 665/933] BUG: conversion of Series to Categorical (#16557) fix #16524 --- doc/source/whatsnew/v0.20.3.txt | 1 + pandas/core/internals.py | 2 +- pandas/tests/series/test_dtypes.py | 9 +++++++++ 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.20.3.txt b/doc/source/whatsnew/v0.20.3.txt index 049737f948e17..52f7701724f18 100644 --- a/doc/source/whatsnew/v0.20.3.txt +++ b/doc/source/whatsnew/v0.20.3.txt @@ -44,6 +44,7 @@ Conversion ^^^^^^^^^^ - Bug in pickle compat prior to the v0.20.x series, when ``UTC`` is a timezone in a Series/DataFrame/Index (:issue:`16608`) +- Bug in Series construction when passing a Series with ``dtype='category'`` (:issue:`16524`). Indexing ^^^^^^^^ diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 58690ad632152..f2a7ac76481d4 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -471,7 +471,7 @@ def astype(self, dtype, copy=False, errors='raise', values=None, **kwargs): **kwargs) def _astype(self, dtype, copy=False, errors='raise', values=None, - klass=None, mgr=None, **kwargs): + klass=None, mgr=None, raise_on_error=False, **kwargs): """ Coerce to the new type (if copy=True, return a new copy) raise on an except if raise == True diff --git a/pandas/tests/series/test_dtypes.py b/pandas/tests/series/test_dtypes.py index e084fa58d6c51..9ab02a8c2aad7 100644 --- a/pandas/tests/series/test_dtypes.py +++ b/pandas/tests/series/test_dtypes.py @@ -248,3 +248,12 @@ def test_intercept_astype_object(self): result = df.values.squeeze() assert (result[:, 0] == expected.values).all() + + def test_series_to_categorical(self): + # see gh-16524: test conversion of Series to Categorical + series = Series(['a', 'b', 'c']) + + result = Series(series, dtype='category') + expected = Series(['a', 'b', 'c'], dtype='category') + + tm.assert_series_equal(result, expected) From 789f7bbb52f279bd1ed53abf1a9580f682c2d6b9 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Fri, 9 Jun 2017 18:45:29 -0400 Subject: [PATCH 666/933] BLD: fix numpy on 2.7 build as 1.13 was released but no deps are built for it (#16633) (#16650) --- ci/requirements-2.7.build | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/requirements-2.7.build b/ci/requirements-2.7.build index 415df13179fcf..a7b950e615464 100644 --- a/ci/requirements-2.7.build +++ b/ci/requirements-2.7.build @@ -2,5 +2,5 @@ python=2.7* python-dateutil=2.4.1 pytz=2013b nomkl -numpy +numpy=1.12* cython=0.23 From 5aba6659e422e985683cfb46c07c3364a02b6e5b Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 9 Jun 2017 15:46:33 -0700 Subject: [PATCH 667/933] CLN: make license file machine readable (#16649) Splits extra information about the license and copyright holders to AUTHORS.md. --- AUTHORS.md | 57 ++++++++++++++++++++++++++++ LICENSE | 106 ++++++++++++----------------------------------------- 2 files changed, 81 insertions(+), 82 deletions(-) create mode 100644 AUTHORS.md diff --git a/AUTHORS.md b/AUTHORS.md new file mode 100644 index 0000000000000..dcaaea101f4c8 --- /dev/null +++ b/AUTHORS.md @@ -0,0 +1,57 @@ +About the Copyright Holders +=========================== + +* Copyright (c) 2008-2011 AQR Capital Management, LLC + + AQR Capital Management began pandas development in 2008. Development was + led by Wes McKinney. AQR released the source under this license in 2009. +* Copyright (c) 2011-2012, Lambda Foundry, Inc. + + Wes is now an employee of Lambda Foundry, and remains the pandas project + lead. +* Copyright (c) 2011-2012, PyData Development Team + + The PyData Development Team is the collection of developers of the PyData + project. This includes all of the PyData sub-projects, including pandas. The + core team that coordinates development on GitHub can be found here: + http://github.com/pydata. + +Full credits for pandas contributors can be found in the documentation. + +Our Copyright Policy +==================== + +PyData uses a shared copyright model. Each contributor maintains copyright +over their contributions to PyData. However, it is important to note that +these contributions are typically only changes to the repositories. Thus, +the PyData source code, in its entirety, is not the copyright of any single +person or institution. Instead, it is the collective copyright of the +entire PyData Development Team. If individual contributors want to maintain +a record of what changes/contributions they have specific copyright on, +they should indicate their copyright in the commit message of the change +when they commit the change to one of the PyData repositories. + +With this in mind, the following banner should be used in any source code +file to indicate the copyright and license terms: + +``` +#----------------------------------------------------------------------------- +# Copyright (c) 2012, PyData Development Team +# All rights reserved. +# +# Distributed under the terms of the BSD Simplified License. +# +# The full license is in the LICENSE file, distributed with this software. +#----------------------------------------------------------------------------- +``` + +Other licenses can be found in the LICENSES directory. + +License +======= + +pandas is distributed under a 3-clause ("Simplified" or "New") BSD +license. Parts of NumPy, SciPy, numpydoc, bottleneck, which all have +BSD-compatible licenses, are included. Their licenses follow the pandas +license. + diff --git a/LICENSE b/LICENSE index c9b8834e8774b..924de26253bf4 100644 --- a/LICENSE +++ b/LICENSE @@ -1,87 +1,29 @@ -======= -License -======= +BSD 3-Clause License -pandas is distributed under a 3-clause ("Simplified" or "New") BSD -license. Parts of NumPy, SciPy, numpydoc, bottleneck, which all have -BSD-compatible licenses, are included. Their licenses follow the pandas -license. - -pandas license -============== - -Copyright (c) 2011-2012, Lambda Foundry, Inc. and PyData Development Team -All rights reserved. - -Copyright (c) 2008-2011 AQR Capital Management, LLC +Copyright (c) 2008-2012, AQR Capital Management, LLC, Lambda Foundry, Inc. and PyData Development Team All rights reserved. Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following - disclaimer in the documentation and/or other materials provided - with the distribution. - - * Neither the name of the copyright holder nor the names of any - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -About the Copyright Holders -=========================== - -AQR Capital Management began pandas development in 2008. Development was -led by Wes McKinney. AQR released the source under this license in 2009. -Wes is now an employee of Lambda Foundry, and remains the pandas project -lead. - -The PyData Development Team is the collection of developers of the PyData -project. This includes all of the PyData sub-projects, including pandas. The -core team that coordinates development on GitHub can be found here: -http://github.com/pydata. - -Full credits for pandas contributors can be found in the documentation. - -Our Copyright Policy -==================== - -PyData uses a shared copyright model. Each contributor maintains copyright -over their contributions to PyData. However, it is important to note that -these contributions are typically only changes to the repositories. Thus, -the PyData source code, in its entirety, is not the copyright of any single -person or institution. Instead, it is the collective copyright of the -entire PyData Development Team. If individual contributors want to maintain -a record of what changes/contributions they have specific copyright on, -they should indicate their copyright in the commit message of the change -when they commit the change to one of the PyData repositories. - -With this in mind, the following banner should be used in any source code -file to indicate the copyright and license terms: - -#----------------------------------------------------------------------------- -# Copyright (c) 2012, PyData Development Team -# All rights reserved. -# -# Distributed under the terms of the BSD Simplified License. -# -# The full license is in the LICENSE file, distributed with this software. -#----------------------------------------------------------------------------- - -Other licenses can be found in the LICENSES directory. \ No newline at end of file From ec6bf6deaf502ac05a7120df13bd9b13cb3083f6 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Fri, 9 Jun 2017 21:28:41 -0400 Subject: [PATCH 668/933] fix pytest-xidst version as 1.17 appears buggy (#16652) --- ci/install_travis.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/install_travis.sh b/ci/install_travis.sh index 8cf6f2ce636da..f4e6c979f28a4 100755 --- a/ci/install_travis.sh +++ b/ci/install_travis.sh @@ -107,7 +107,7 @@ if [ -e ${REQ} ]; then fi time conda install -n pandas pytest -time pip install pytest-xdist +time pip install pytest-xdist==1.16.0 if [ "$LINT" ]; then conda install flake8 From dc716b061d9a0bc6a59f4e02d72b9952cce28927 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Fri, 9 Jun 2017 22:09:24 -0400 Subject: [PATCH 669/933] COMPAT: numpy 1.13 test compat (#16654) * COMPAT: numpy 1.13 test compat * CI: fix doc build to 1.12 --- ci/requirements-3.6_DOC.build | 2 +- pandas/compat/numpy/__init__.py | 3 +++ pandas/tests/test_expressions.py | 6 +++++- 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/ci/requirements-3.6_DOC.build b/ci/requirements-3.6_DOC.build index bdcfe28105866..37faaa7e4db88 100644 --- a/ci/requirements-3.6_DOC.build +++ b/ci/requirements-3.6_DOC.build @@ -1,5 +1,5 @@ python=3.6* python-dateutil pytz -numpy +numpy=1.12* cython diff --git a/pandas/compat/numpy/__init__.py b/pandas/compat/numpy/__init__.py index 4a9a2647ece0f..2c5a18973afa8 100644 --- a/pandas/compat/numpy/__init__.py +++ b/pandas/compat/numpy/__init__.py @@ -15,6 +15,7 @@ _np_version_under1p11 = _nlv < '1.11' _np_version_under1p12 = _nlv < '1.12' _np_version_under1p13 = _nlv < '1.13' +_np_version_under1p14 = _nlv < '1.14' if _nlv < '1.7.0': raise ImportError('this version of pandas is incompatible with ' @@ -74,4 +75,6 @@ def np_array_datetime64_compat(arr, *args, **kwargs): '_np_version_under1p10', '_np_version_under1p11', '_np_version_under1p12', + '_np_version_under1p13', + '_np_version_under1p14' ] diff --git a/pandas/tests/test_expressions.py b/pandas/tests/test_expressions.py index fae7bfa513dcd..08c3a25e66b0e 100644 --- a/pandas/tests/test_expressions.py +++ b/pandas/tests/test_expressions.py @@ -13,7 +13,7 @@ from pandas.core.api import DataFrame, Panel from pandas.core.computation import expressions as expr -from pandas import compat, _np_version_under1p11 +from pandas import compat, _np_version_under1p11, _np_version_under1p13 from pandas.util.testing import (assert_almost_equal, assert_series_equal, assert_frame_equal, assert_panel_equal, assert_panel4d_equal, slow) @@ -420,6 +420,10 @@ def test_bool_ops_warn_on_arithmetic(self): f = getattr(operator, name) fe = getattr(operator, sub_funcs[subs[op]]) + # >= 1.13.0 these are now TypeErrors + if op == '-' and not _np_version_under1p13: + continue + with tm.use_numexpr(True, min_elements=5): with tm.assert_produces_warning(check_stacklevel=False): r = f(df, df) From ceaf85233d434a226b23f891465b4abfdc602e46 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sat, 10 Jun 2017 11:09:30 -0400 Subject: [PATCH 670/933] Revert "fix pytest-xidst version as 1.17 appears buggy (#16652)" (#16657) This reverts commit ec6bf6deaf502ac05a7120df13bd9b13cb3083f6. 1.17.1 released that fixes --- ci/install_travis.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/install_travis.sh b/ci/install_travis.sh index f4e6c979f28a4..8cf6f2ce636da 100755 --- a/ci/install_travis.sh +++ b/ci/install_travis.sh @@ -107,7 +107,7 @@ if [ -e ${REQ} ]; then fi time conda install -n pandas pytest -time pip install pytest-xdist==1.16.0 +time pip install pytest-xdist if [ "$LINT" ]; then conda install flake8 From 4dcb9988d5e4a1bfea7acc2131d76e24fcc94278 Mon Sep 17 00:00:00 2001 From: Milo Date: Sun, 11 Jun 2017 17:48:30 +0200 Subject: [PATCH 671/933] DOC: Fix typo in shared doc for align method (#16665) --- pandas/core/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index accb7d0db1d2c..98977bf8a595f 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5045,7 +5045,7 @@ def ranker(data): return ranker(data) _shared_docs['align'] = (""" - Align two object on their axes with the + Align two objects on their axes with the specified join method for each axis Index Parameters From 18a428da81f7fe0f41f8ec78f03a385b731bb6a3 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Sun, 11 Jun 2017 10:49:08 -0500 Subject: [PATCH 672/933] MAINT: Give nice error message if xarray import fails (#16664) --- pandas/core/generic.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 98977bf8a595f..8b186bab29d5e 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1501,7 +1501,17 @@ def to_xarray(self): ----- See the `xarray docs `__ """ - import xarray + + try: + import xarray + except ImportError: + # Give a nice error message + raise ImportError("the xarray library is not installed\n" + "you can install via conda\n" + "conda install xarray\n" + "or via pip\n" + "pip install xarray\n") + if self.ndim == 1: return xarray.DataArray.from_series(self) elif self.ndim == 2: From 5a6f50d039e0b01dd835ee5401282ab58b22c534 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Sun, 11 Jun 2017 16:08:56 -0700 Subject: [PATCH 673/933] TST: Add test for groupby mean of ints (#16670) Closes gh-11199. --- pandas/tests/groupby/test_aggregate.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/pandas/tests/groupby/test_aggregate.py b/pandas/tests/groupby/test_aggregate.py index d7b46e6748b99..0de263589c6d6 100644 --- a/pandas/tests/groupby/test_aggregate.py +++ b/pandas/tests/groupby/test_aggregate.py @@ -234,6 +234,27 @@ def test_agg_grouping_is_list_tuple(self): expected = grouped.mean() tm.assert_frame_equal(result, expected) + def test_aggregate_float64_no_int64(self): + # see gh-11199 + df = DataFrame({"a": [1, 2, 3, 4, 5], + "b": [1, 2, 2, 4, 5], + "c": [1, 2, 3, 4, 5]}) + + expected = DataFrame({"a": [1, 2.5, 4, 5]}, + index=[1, 2, 4, 5]) + expected.index.name = "b" + + result = df.groupby("b")[["a"]].mean() + tm.assert_frame_equal(result, expected) + + expected = DataFrame({"a": [1, 2.5, 4, 5], + "c": [1, 2.5, 4, 5]}, + index=[1, 2, 4, 5]) + expected.index.name = "b" + + result = df.groupby("b")[["a", "c"]].mean() + tm.assert_frame_equal(result, expected) + def test_aggregate_api_consistency(self): # GH 9052 # make sure that the aggregates via dict From d915c7ec5ded5cae5292d8df1feb135432648718 Mon Sep 17 00:00:00 2001 From: chris-b1 Date: Sun, 11 Jun 2017 18:19:39 -0500 Subject: [PATCH 674/933] DOC: improve some Excel docstring (#16658) --- doc/source/io.rst | 10 ++++++++++ pandas/io/excel.py | 11 +++++++---- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/doc/source/io.rst b/doc/source/io.rst index bd81b478b5326..7ea476514e88d 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -712,6 +712,16 @@ index column inference and discard the last column, pass ``index_col=False``: pd.read_csv(StringIO(data)) pd.read_csv(StringIO(data), index_col=False) +If a subset of data is being parsed using the ``usecols`` option, the +``index_col`` specification is based on that subset, not the original data. + +.. ipython:: python + + data = 'a,b,c\n4,apple,bat,\n8,orange,cow,' + print(data) + pd.read_csv(StringIO(data), usecols=['b', 'c']) + pd.read_csv(StringIO(data), usecols=['b', 'c'], index_col=0) + .. _io.parse_dates: Date Handling diff --git a/pandas/io/excel.py b/pandas/io/excel.py index a4d2fabf76a41..e3c9ae3f164cb 100644 --- a/pandas/io/excel.py +++ b/pandas/io/excel.py @@ -85,7 +85,9 @@ index_col : int, list of ints, default None Column (0-indexed) to use as the row labels of the DataFrame. Pass None if there is no such column. If a list is passed, - those columns will be combined into a ``MultiIndex`` + those columns will be combined into a ``MultiIndex``. If a + subset of data is selected with ``parse_cols``, index_col + is based on the subset. names : array-like, default None List of column names to use. If file contains no header row, then you should explicitly pass header=None @@ -96,7 +98,7 @@ content. dtype : Type name or dict of column -> type, default None Data type for data or columns. E.g. {'a': np.float64, 'b': np.int32} - Use `str` or `object` to preserve and not interpret dtype. + Use `object` to preserve data as stored in Excel and not interpret dtype. If converters are specified, they will be applied INSTEAD of dtype conversion. @@ -116,8 +118,9 @@ * If None then parse all columns, * If int then indicates last column to be parsed * If list of ints then indicates list of column numbers to be parsed - * If string then indicates comma separated list of column names and - column ranges (e.g. "A:E" or "A,C,E:F") + * If string then indicates comma separated list of Excel column letters and + column ranges (e.g. "A:E" or "A,C,E:F"). Ranges are inclusive of + both sides. squeeze : boolean, default False If the parsed data only contains one column then return a Series na_values : scalar, str, list-like, or dict, default None From c550372910435bcfa8ce35d134c0a4ba761fc084 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Sun, 11 Jun 2017 16:42:43 -0700 Subject: [PATCH 675/933] BUG: Revert gh-16039 (#16663) gh-16039 created a bug in which files containing byte-like data could break, as EOF characters mid-field (despite being quoted) would cause premature line breaks. Given that this PR was a performance patch, this commit can be safely reverted. Closes gh-16559. --- doc/source/whatsnew/v0.20.3.txt | 1 + pandas/_libs/src/parser/io.c | 142 +++++++++++++------------------ pandas/_libs/src/parser/io.h | 28 ++++-- pandas/io/parsers.py | 3 +- pandas/tests/io/parser/common.py | 15 ++++ 5 files changed, 98 insertions(+), 91 deletions(-) diff --git a/doc/source/whatsnew/v0.20.3.txt b/doc/source/whatsnew/v0.20.3.txt index 52f7701724f18..f21230693686e 100644 --- a/doc/source/whatsnew/v0.20.3.txt +++ b/doc/source/whatsnew/v0.20.3.txt @@ -54,6 +54,7 @@ Indexing I/O ^^^ +- Bug in ``pd.read_csv()`` in which files containing EOF characters mid-field could fail with the C engine on Windows (:issue:`16039`, :issue:`16559`) Plotting diff --git a/pandas/_libs/src/parser/io.c b/pandas/_libs/src/parser/io.c index dee7d9d9281c4..4381ef19e991b 100644 --- a/pandas/_libs/src/parser/io.c +++ b/pandas/_libs/src/parser/io.c @@ -9,40 +9,33 @@ The full license is in the LICENSE file, distributed with this software. #include "io.h" -#include -#include -#include - /* On-disk FILE, uncompressed */ void *new_file_source(char *fname, size_t buffer_size) { file_source *fs = (file_source *)malloc(sizeof(file_source)); - if (fs == NULL) { + fs->fp = fopen(fname, "rb"); + + if (fs->fp == NULL) { + free(fs); return NULL; } + setbuf(fs->fp, NULL); - fs->fd = open(fname, O_RDONLY); - if (fs->fd == -1) { - goto err_free; - } + fs->initial_file_pos = ftell(fs->fp); // Only allocate this heap memory if we are not memory-mapping the file fs->buffer = (char *)malloc((buffer_size + 1) * sizeof(char)); if (fs->buffer == NULL) { - goto err_free; + return NULL; } - memset(fs->buffer, '\0', buffer_size + 1); - fs->size = buffer_size; + memset(fs->buffer, 0, buffer_size + 1); + fs->buffer[buffer_size] = '\0'; return (void *)fs; - -err_free: - free(fs); - return NULL; } void *new_rd_source(PyObject *obj) { @@ -63,12 +56,12 @@ void *new_rd_source(PyObject *obj) { */ -int del_file_source(void *ptr) { - file_source *fs = ptr; +int del_file_source(void *fs) { if (fs == NULL) return 0; - free(fs->buffer); - close(fs->fd); + /* allocated on the heap */ + free(FS(fs)->buffer); + fclose(FS(fs)->fp); free(fs); return 0; @@ -90,31 +83,17 @@ int del_rd_source(void *rds) { void *buffer_file_bytes(void *source, size_t nbytes, size_t *bytes_read, int *status) { - file_source *fs = FS(source); - ssize_t rv; + file_source *src = FS(source); - if (nbytes > fs->size) { - nbytes = fs->size; - } + *bytes_read = fread((void *)src->buffer, sizeof(char), nbytes, src->fp); - rv = read(fs->fd, fs->buffer, nbytes); - switch (rv) { - case -1: - *status = CALLING_READ_FAILED; - *bytes_read = 0; - return NULL; - case 0: + if (*bytes_read == 0) { *status = REACHED_EOF; - *bytes_read = 0; - return NULL; - default: + } else { *status = 0; - *bytes_read = rv; - fs->buffer[rv] = '\0'; - break; } - return (void *)fs->buffer; + return (void *)src->buffer; } void *buffer_rd_bytes(void *source, size_t nbytes, size_t *bytes_read, @@ -173,58 +152,52 @@ void *buffer_rd_bytes(void *source, size_t nbytes, size_t *bytes_read, #ifdef HAVE_MMAP #include +#include void *new_mmap(char *fname) { + struct stat buf; + int fd; memory_map *mm; - struct stat stat; - size_t filesize; + off_t filesize; mm = (memory_map *)malloc(sizeof(memory_map)); + mm->fp = fopen(fname, "rb"); + + fd = fileno(mm->fp); + if (fstat(fd, &buf) == -1) { + fprintf(stderr, "new_file_buffer: fstat() failed. errno =%d\n", errno); + return NULL; + } + filesize = buf.st_size; /* XXX This might be 32 bits. */ + if (mm == NULL) { + /* XXX Eventually remove this print statement. */ fprintf(stderr, "new_file_buffer: malloc() failed.\n"); - return (NULL); - } - mm->fd = open(fname, O_RDONLY); - if (mm->fd == -1) { - fprintf(stderr, "new_file_buffer: open(%s) failed. errno =%d\n", - fname, errno); - goto err_free; + return NULL; } + mm->size = (off_t)filesize; + mm->line_number = 0; - if (fstat(mm->fd, &stat) == -1) { - fprintf(stderr, "new_file_buffer: fstat() failed. errno =%d\n", - errno); - goto err_close; - } - filesize = stat.st_size; /* XXX This might be 32 bits. */ + mm->fileno = fd; + mm->position = ftell(mm->fp); + mm->last_pos = (off_t)filesize; - mm->memmap = mmap(NULL, filesize, PROT_READ, MAP_SHARED, mm->fd, 0); - if (mm->memmap == MAP_FAILED) { + mm->memmap = mmap(NULL, filesize, PROT_READ, MAP_SHARED, fd, 0); + if (mm->memmap == NULL) { /* XXX Eventually remove this print statement. */ fprintf(stderr, "new_file_buffer: mmap() failed.\n"); - goto err_close; + free(mm); + mm = NULL; } - mm->size = (off_t)filesize; - mm->position = 0; - - return mm; - -err_close: - close(mm->fd); -err_free: - free(mm); - return NULL; + return (void *)mm; } -int del_mmap(void *ptr) { - memory_map *mm = ptr; - - if (mm == NULL) return 0; +int del_mmap(void *src) { + munmap(MM(src)->memmap, MM(src)->size); - munmap(mm->memmap, mm->size); - close(mm->fd); - free(mm); + fclose(MM(src)->fp); + free(src); return 0; } @@ -232,27 +205,28 @@ int del_mmap(void *ptr) { void *buffer_mmap_bytes(void *source, size_t nbytes, size_t *bytes_read, int *status) { void *retval; - memory_map *src = source; - size_t remaining = src->size - src->position; + memory_map *src = MM(source); - if (remaining == 0) { + if (src->position == src->last_pos) { *bytes_read = 0; *status = REACHED_EOF; return NULL; } - if (nbytes > remaining) { - nbytes = remaining; - } - retval = src->memmap + src->position; - /* advance position in mmap data structure */ - src->position += nbytes; + if (src->position + (off_t)nbytes > src->last_pos) { + // fewer than nbytes remaining + *bytes_read = src->last_pos - src->position; + } else { + *bytes_read = nbytes; + } - *bytes_read = nbytes; *status = 0; + /* advance position in mmap data structure */ + src->position += *bytes_read; + return retval; } diff --git a/pandas/_libs/src/parser/io.h b/pandas/_libs/src/parser/io.h index d22e8ddaea88d..77121e9a169c1 100644 --- a/pandas/_libs/src/parser/io.h +++ b/pandas/_libs/src/parser/io.h @@ -15,10 +15,19 @@ The full license is in the LICENSE file, distributed with this software. typedef struct _file_source { /* The file being read. */ - int fd; + FILE *fp; char *buffer; - size_t size; + + /* file position when the file_buffer was created. */ + off_t initial_file_pos; + + /* Offset in the file of the data currently in the buffer. */ + off_t buffer_file_pos; + + /* Actual number of bytes in the current buffer. (Can be less than + * buffer_size.) */ + off_t last_pos; } file_source; #define FS(source) ((file_source *)source) @@ -28,13 +37,20 @@ typedef struct _file_source { #endif typedef struct _memory_map { - int fd; + FILE *fp; /* Size of the file, in bytes. */ - char *memmap; - size_t size; + off_t size; - size_t position; + /* file position when the file_buffer was created. */ + off_t initial_file_pos; + + int line_number; + + int fileno; + off_t position; + off_t last_pos; + char *memmap; } memory_map; #define MM(src) ((memory_map *)src) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index c2d5a629b03a3..9ec3f79e1ae70 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -1985,7 +1985,8 @@ def __init__(self, f, **kwds): self.comment = kwds['comment'] self._comment_lines = [] - f, handles = _get_handle(f, 'r', encoding=self.encoding, + mode = 'r' if PY3 else 'rb' + f, handles = _get_handle(f, mode, encoding=self.encoding, compression=self.compression, memory_map=self.memory_map) self.handles.extend(handles) diff --git a/pandas/tests/io/parser/common.py b/pandas/tests/io/parser/common.py index 31d815a4bca97..4b4f44b44c163 100644 --- a/pandas/tests/io/parser/common.py +++ b/pandas/tests/io/parser/common.py @@ -1662,6 +1662,21 @@ def test_internal_eof_byte(self): result = self.read_csv(StringIO(data)) tm.assert_frame_equal(result, expected) + def test_internal_eof_byte_to_file(self): + # see gh-16559 + data = b'c1,c2\r\n"test \x1a test", test\r\n' + expected = pd.DataFrame([["test \x1a test", " test"]], + columns=["c1", "c2"]) + + path = '__%s__.csv' % tm.rands(10) + + with tm.ensure_clean(path) as path: + with open(path, "wb") as f: + f.write(data) + + result = self.read_csv(path) + tm.assert_frame_equal(result, expected) + def test_file_handles(self): # GH 14418 - don't close user provided file handles From 0281886fbdf0d1837c2af08af15949c1d98bf612 Mon Sep 17 00:00:00 2001 From: Chris M Date: Sun, 11 Jun 2017 20:48:31 -0300 Subject: [PATCH 676/933] PERF: HDFStore __unicode__ method (#16666) * PERF: HDFStore has faster __unicode__, new info() method with old behavior. --- asv_bench/benchmarks/hdfstore_bench.py | 9 ++++ doc/source/api.rst | 1 + doc/source/whatsnew/v0.21.0.txt | 2 + pandas/io/pytables.py | 60 +++++++++++++++----------- pandas/tests/io/test_pytables.py | 37 ++++++++-------- 5 files changed, 66 insertions(+), 43 deletions(-) diff --git a/asv_bench/benchmarks/hdfstore_bench.py b/asv_bench/benchmarks/hdfstore_bench.py index dc72f3d548aaf..7d490180e8af6 100644 --- a/asv_bench/benchmarks/hdfstore_bench.py +++ b/asv_bench/benchmarks/hdfstore_bench.py @@ -90,6 +90,15 @@ def time_query_store_table(self): stop = self.df2.index[15000] self.store.select('table', where="index > start and index < stop") + def time_store_repr(self): + repr(self.store) + + def time_store_str(self): + str(self.store) + + def time_store_info(self): + self.store.info() + class HDF5Panel(object): goal_time = 0.2 diff --git a/doc/source/api.rst b/doc/source/api.rst index cfdd305348d70..d6053791d6f4b 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -99,6 +99,7 @@ HDFStore: PyTables (HDF5) HDFStore.append HDFStore.get HDFStore.select + HDFStore.info Feather ~~~~~~~ diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 3dd8bb2ac2de5..36ca79e8b8714 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -52,6 +52,8 @@ Backwards incompatible API changes - :func:`read_csv` now treats ``'null'`` strings as missing values by default (:issue:`16471`) - :func:`read_csv` now treats ``'n/a'`` strings as missing values by default (:issue:`16078`) +- :class:`pandas.HDFStore`'s string representation is now faster and less detailed. For the previous behavior, use ``pandas.HDFStore.info()``. (:issue:`16503`). + .. _whatsnew_0210.api: Other API Changes diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 4a1b12414bcc5..9539b73c754e1 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -506,32 +506,7 @@ def __len__(self): return len(self.groups()) def __unicode__(self): - output = '%s\nFile path: %s\n' % (type(self), pprint_thing(self._path)) - if self.is_open: - lkeys = sorted(list(self.keys())) - if len(lkeys): - keys = [] - values = [] - - for k in lkeys: - try: - s = self.get_storer(k) - if s is not None: - keys.append(pprint_thing(s.pathname or k)) - values.append( - pprint_thing(s or 'invalid_HDFStore node')) - except Exception as detail: - keys.append(k) - values.append("[invalid_HDFStore node: %s]" - % pprint_thing(detail)) - - output += adjoin(12, keys, values) - else: - output += 'Empty' - else: - output += "File is CLOSED" - - return output + return '%s\nFile path: %s\n' % (type(self), pprint_thing(self._path)) def __enter__(self): return self @@ -1173,6 +1148,39 @@ def copy(self, file, mode='w', propindexes=True, keys=None, complib=None, return new_store + def info(self): + """ + print detailed information on the store + + .. versionadded:: 0.21.0 + """ + output = '%s\nFile path: %s\n' % (type(self), pprint_thing(self._path)) + if self.is_open: + lkeys = sorted(list(self.keys())) + if len(lkeys): + keys = [] + values = [] + + for k in lkeys: + try: + s = self.get_storer(k) + if s is not None: + keys.append(pprint_thing(s.pathname or k)) + values.append( + pprint_thing(s or 'invalid_HDFStore node')) + except Exception as detail: + keys.append(k) + values.append("[invalid_HDFStore node: %s]" + % pprint_thing(detail)) + + output += adjoin(12, keys, values) + else: + output += 'Empty' + else: + output += "File is CLOSED" + + return output + # private methods ###### def _check_if_open(self): if not self.is_open: diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index 040345db83c2b..efec778e12b50 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -387,6 +387,7 @@ def test_repr(self): with ensure_clean_store(self.path) as store: repr(store) + store.info() store['a'] = tm.makeTimeSeries() store['b'] = tm.makeStringSeries() store['c'] = tm.makeDataFrame() @@ -418,8 +419,9 @@ def test_repr(self): # make a random group in hdf space store._handle.create_group(store._handle.root, 'bah') - repr(store) - str(store) + assert store.filename in repr(store) + assert store.filename in str(store) + store.info() # storers with ensure_clean_store(self.path) as store: @@ -4407,11 +4409,11 @@ def test_multiple_open_close(self): # single store = HDFStore(path) - assert 'CLOSED' not in str(store) + assert 'CLOSED' not in store.info() assert store.is_open store.close() - assert 'CLOSED' in str(store) + assert 'CLOSED' in store.info() assert not store.is_open with ensure_clean_path(self.path) as path: @@ -4432,20 +4434,20 @@ def f(): store1 = HDFStore(path) store2 = HDFStore(path) - assert 'CLOSED' not in str(store1) - assert 'CLOSED' not in str(store2) + assert 'CLOSED' not in store1.info() + assert 'CLOSED' not in store2.info() assert store1.is_open assert store2.is_open store1.close() - assert 'CLOSED' in str(store1) + assert 'CLOSED' in store1.info() assert not store1.is_open - assert 'CLOSED' not in str(store2) + assert 'CLOSED' not in store2.info() assert store2.is_open store2.close() - assert 'CLOSED' in str(store1) - assert 'CLOSED' in str(store2) + assert 'CLOSED' in store1.info() + assert 'CLOSED' in store2.info() assert not store1.is_open assert not store2.is_open @@ -4456,11 +4458,11 @@ def f(): store2 = HDFStore(path) store2.append('df2', df) store2.close() - assert 'CLOSED' in str(store2) + assert 'CLOSED' in store2.info() assert not store2.is_open store.close() - assert 'CLOSED' in str(store) + assert 'CLOSED' in store.info() assert not store.is_open # double closing @@ -4469,11 +4471,11 @@ def f(): store2 = HDFStore(path) store.close() - assert 'CLOSED' in str(store) + assert 'CLOSED' in store.info() assert not store.is_open store2.close() - assert 'CLOSED' in str(store2) + assert 'CLOSED' in store2.info() assert not store2.is_open # ops on a closed store @@ -4820,9 +4822,10 @@ def test_categorical(self): tm.assert_frame_equal(result, df2) # Make sure the metadata is OK - assert '/df2 ' in str(store) - assert '/df2/meta/values_block_0/meta' in str(store) - assert '/df2/meta/values_block_1/meta' in str(store) + info = store.info() + assert '/df2 ' in info + assert '/df2/meta/values_block_0/meta' in info + assert '/df2/meta/values_block_1/meta' in info # unordered s = Series(Categorical(['a', 'b', 'b', 'a', 'a', 'c'], categories=[ From b72519e45bbfe7387e0c576c9315475aace69a2b Mon Sep 17 00:00:00 2001 From: gfyoung Date: Mon, 12 Jun 2017 05:46:52 -0500 Subject: [PATCH 677/933] TST: Add test for groupby sum of large ints (#16671) Closes gh-14758. --- pandas/tests/groupby/test_aggregate.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/pandas/tests/groupby/test_aggregate.py b/pandas/tests/groupby/test_aggregate.py index 0de263589c6d6..b578a6efb0034 100644 --- a/pandas/tests/groupby/test_aggregate.py +++ b/pandas/tests/groupby/test_aggregate.py @@ -865,3 +865,20 @@ def test_agg_timezone_round_trip(self): ts = df['B'].iloc[2] assert ts == grouped.last()['B'].iloc[0] assert ts == grouped.apply(lambda x: x.iloc[-1])[0] + + def test_sum_uint64_overflow(self): + # see gh-14758 + + # Convert to uint64 and don't overflow + df = pd.DataFrame([[1, 2], [3, 4], [5, 6]], + dtype=object) + 9223372036854775807 + + index = pd.Index([9223372036854775808, 9223372036854775810, + 9223372036854775812], dtype=np.uint64) + expected = pd.DataFrame({1: [9223372036854775809, + 9223372036854775811, + 9223372036854775813]}, index=index) + + expected.index.name = 0 + result = df.groupby(0).sum() + tm.assert_frame_equal(result, expected) From 11d274f39b66bbeaf2ade65f7ea982f543df0faa Mon Sep 17 00:00:00 2001 From: Telt Date: Mon, 12 Jun 2017 14:55:22 -0700 Subject: [PATCH 678/933] BUG: Categorical scatter plot has KeyError #16199 (#16208) * BUG: Categorical scatter plot has KeyError #16199 Appropriately handles categorical data for dataframe scatter plots which currently raises KeyError for categorical data * Add to whatsnew --- doc/source/whatsnew/v0.20.3.txt | 1 + pandas/plotting/_core.py | 5 +++++ pandas/tests/plotting/test_frame.py | 18 ++++++++++++++++++ 3 files changed, 24 insertions(+) diff --git a/doc/source/whatsnew/v0.20.3.txt b/doc/source/whatsnew/v0.20.3.txt index f21230693686e..5e9bf1588a7bd 100644 --- a/doc/source/whatsnew/v0.20.3.txt +++ b/doc/source/whatsnew/v0.20.3.txt @@ -36,6 +36,7 @@ Performance Improvements Bug Fixes ~~~~~~~~~ +- Fixed issue with dataframe scatter plot for categorical data that reports incorrect column key not found when categorical data is used for plotting (:issue:`16199`) diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index 9169eb86895fb..391fa377f3c6f 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -778,6 +778,11 @@ def __init__(self, data, x, y, **kwargs): x = self.data.columns[x] if is_integer(y) and not self.data.columns.holds_integer(): y = self.data.columns[y] + if len(self.data[x]._get_numeric_data()) == 0: + raise ValueError(self._kind + ' requires x column to be numeric') + if len(self.data[y]._get_numeric_data()) == 0: + raise ValueError(self._kind + ' requires y column to be numeric') + self.x = x self.y = y diff --git a/pandas/tests/plotting/test_frame.py b/pandas/tests/plotting/test_frame.py index e40ec5a1faea8..ba674e10be384 100644 --- a/pandas/tests/plotting/test_frame.py +++ b/pandas/tests/plotting/test_frame.py @@ -915,6 +915,24 @@ def test_plot_scatter(self): axes = df.plot(x='x', y='y', kind='scatter', subplots=True) self._check_axes_shape(axes, axes_num=1, layout=(1, 1)) + @slow + def test_plot_scatter_with_categorical_data(self): + # GH 16199 + df = pd.DataFrame({'x': [1, 2, 3, 4], + 'y': pd.Categorical(['a', 'b', 'a', 'c'])}) + + with pytest.raises(ValueError) as ve: + df.plot(x='x', y='y', kind='scatter') + ve.match('requires y column to be numeric') + + with pytest.raises(ValueError) as ve: + df.plot(x='y', y='x', kind='scatter') + ve.match('requires x column to be numeric') + + with pytest.raises(ValueError) as ve: + df.plot(x='y', y='y', kind='scatter') + ve.match('requires x column to be numeric') + @slow def test_plot_scatter_with_c(self): df = DataFrame(randn(6, 4), From d298414d2724fed40254350117adcabc4e546e48 Mon Sep 17 00:00:00 2001 From: David Gwynne Date: Tue, 13 Jun 2017 20:20:47 +1000 Subject: [PATCH 679/933] revert #16663, which was a revert of #16039 (#16675) * Revert "BUG: Revert gh-16039 (#16663)" This reverts commit c550372910435bcfa8ce35d134c0a4ba761fc084. * always treat files as binary to cope with windows and EOF. on windows, EOF can appear "in band" if the file is considered text. when moving from fread() to read(), i lost the "b" part of the mode. at the time i believed this was a nop, since unix doesnt treat files differently based on that flag. this adds O_BINARY to the flags to open to restore the behaviour lost when taking "b" away from fopen. if a platform doesn't provide O_BINARY, this defines it to 0 so it can still be used without effect later on in the code. * dont leak the fd in new_file_source() if buffer allocation fails. * reapply the test for EOF in the middle of a stream. part of c550372910435bcfa8ce35d134c0a4ba761fc084 * pass rb to _get_handle on python 3, otherwise stick to r. part of c550372910435bcfa8ce35d134c0a4ba761fc084 * replace goto with inline unwinding of state. requested by @jreback in #16675 feedback. * describe the fixes to the read_csv() backend and issue numbers. requested by @jreback in feedback on #16675 --- doc/source/whatsnew/v0.20.3.txt | 3 +- pandas/_libs/src/parser/io.c | 136 +++++++++++++++++++------------- pandas/_libs/src/parser/io.h | 28 ++----- 3 files changed, 89 insertions(+), 78 deletions(-) diff --git a/doc/source/whatsnew/v0.20.3.txt b/doc/source/whatsnew/v0.20.3.txt index 5e9bf1588a7bd..249e05623a27f 100644 --- a/doc/source/whatsnew/v0.20.3.txt +++ b/doc/source/whatsnew/v0.20.3.txt @@ -55,8 +55,7 @@ Indexing I/O ^^^ -- Bug in ``pd.read_csv()`` in which files containing EOF characters mid-field could fail with the C engine on Windows (:issue:`16039`, :issue:`16559`) - +-- Bug in ``pd.read_csv()`` in which files weren't opened as binary files by the C engine on Windows, causing EOF characters mid-field, which would fail (:issue:`16039`, :issue:`16559`, :issue`16675`) Plotting ^^^^^^^^ diff --git a/pandas/_libs/src/parser/io.c b/pandas/_libs/src/parser/io.c index 4381ef19e991b..8300e889d4157 100644 --- a/pandas/_libs/src/parser/io.c +++ b/pandas/_libs/src/parser/io.c @@ -9,31 +9,41 @@ The full license is in the LICENSE file, distributed with this software. #include "io.h" +#include +#include +#include + +#ifndef O_BINARY +#define O_BINARY 0 +#endif /* O_BINARY */ + /* On-disk FILE, uncompressed */ void *new_file_source(char *fname, size_t buffer_size) { file_source *fs = (file_source *)malloc(sizeof(file_source)); - fs->fp = fopen(fname, "rb"); + if (fs == NULL) { + return NULL; + } - if (fs->fp == NULL) { + fs->fd = open(fname, O_RDONLY | O_BINARY); + if (fs->fd == -1) { free(fs); return NULL; } - setbuf(fs->fp, NULL); - - fs->initial_file_pos = ftell(fs->fp); // Only allocate this heap memory if we are not memory-mapping the file fs->buffer = (char *)malloc((buffer_size + 1) * sizeof(char)); if (fs->buffer == NULL) { + close(fs->fd); + free(fs); return NULL; } - memset(fs->buffer, 0, buffer_size + 1); - fs->buffer[buffer_size] = '\0'; + memset(fs->buffer, '\0', buffer_size + 1); + fs->size = buffer_size; return (void *)fs; } @@ -56,12 +66,12 @@ void *new_rd_source(PyObject *obj) { */ -int del_file_source(void *fs) { +int del_file_source(void *ptr) { + file_source *fs = ptr; if (fs == NULL) return 0; - /* allocated on the heap */ - free(FS(fs)->buffer); - fclose(FS(fs)->fp); + free(fs->buffer); + close(fs->fd); free(fs); return 0; @@ -83,17 +93,31 @@ int del_rd_source(void *rds) { void *buffer_file_bytes(void *source, size_t nbytes, size_t *bytes_read, int *status) { - file_source *src = FS(source); + file_source *fs = FS(source); + ssize_t rv; - *bytes_read = fread((void *)src->buffer, sizeof(char), nbytes, src->fp); + if (nbytes > fs->size) { + nbytes = fs->size; + } - if (*bytes_read == 0) { + rv = read(fs->fd, fs->buffer, nbytes); + switch (rv) { + case -1: + *status = CALLING_READ_FAILED; + *bytes_read = 0; + return NULL; + case 0: *status = REACHED_EOF; - } else { + *bytes_read = 0; + return NULL; + default: *status = 0; + *bytes_read = rv; + fs->buffer[rv] = '\0'; + break; } - return (void *)src->buffer; + return (void *)fs->buffer; } void *buffer_rd_bytes(void *source, size_t nbytes, size_t *bytes_read, @@ -152,52 +176,57 @@ void *buffer_rd_bytes(void *source, size_t nbytes, size_t *bytes_read, #ifdef HAVE_MMAP #include -#include void *new_mmap(char *fname) { - struct stat buf; - int fd; memory_map *mm; - off_t filesize; + struct stat stat; + size_t filesize; mm = (memory_map *)malloc(sizeof(memory_map)); - mm->fp = fopen(fname, "rb"); - - fd = fileno(mm->fp); - if (fstat(fd, &buf) == -1) { - fprintf(stderr, "new_file_buffer: fstat() failed. errno =%d\n", errno); - return NULL; - } - filesize = buf.st_size; /* XXX This might be 32 bits. */ - if (mm == NULL) { - /* XXX Eventually remove this print statement. */ fprintf(stderr, "new_file_buffer: malloc() failed.\n"); + return (NULL); + } + mm->fd = open(fname, O_RDONLY | O_BINARY); + if (mm->fd == -1) { + fprintf(stderr, "new_file_buffer: open(%s) failed. errno =%d\n", + fname, errno); + free(mm); return NULL; } - mm->size = (off_t)filesize; - mm->line_number = 0; - mm->fileno = fd; - mm->position = ftell(mm->fp); - mm->last_pos = (off_t)filesize; + if (fstat(mm->fd, &stat) == -1) { + fprintf(stderr, "new_file_buffer: fstat() failed. errno =%d\n", + errno); + close(mm->fd); + free(mm); + return NULL; + } + filesize = stat.st_size; /* XXX This might be 32 bits. */ - mm->memmap = mmap(NULL, filesize, PROT_READ, MAP_SHARED, fd, 0); - if (mm->memmap == NULL) { + mm->memmap = mmap(NULL, filesize, PROT_READ, MAP_SHARED, mm->fd, 0); + if (mm->memmap == MAP_FAILED) { /* XXX Eventually remove this print statement. */ fprintf(stderr, "new_file_buffer: mmap() failed.\n"); + close(mm->fd); free(mm); - mm = NULL; + return NULL; } - return (void *)mm; + mm->size = (off_t)filesize; + mm->position = 0; + + return mm; } -int del_mmap(void *src) { - munmap(MM(src)->memmap, MM(src)->size); +int del_mmap(void *ptr) { + memory_map *mm = ptr; + + if (mm == NULL) return 0; - fclose(MM(src)->fp); - free(src); + munmap(mm->memmap, mm->size); + close(mm->fd); + free(mm); return 0; } @@ -205,27 +234,26 @@ int del_mmap(void *src) { void *buffer_mmap_bytes(void *source, size_t nbytes, size_t *bytes_read, int *status) { void *retval; - memory_map *src = MM(source); + memory_map *src = source; + size_t remaining = src->size - src->position; - if (src->position == src->last_pos) { + if (remaining == 0) { *bytes_read = 0; *status = REACHED_EOF; return NULL; } - retval = src->memmap + src->position; - - if (src->position + (off_t)nbytes > src->last_pos) { - // fewer than nbytes remaining - *bytes_read = src->last_pos - src->position; - } else { - *bytes_read = nbytes; + if (nbytes > remaining) { + nbytes = remaining; } - *status = 0; + retval = src->memmap + src->position; /* advance position in mmap data structure */ - src->position += *bytes_read; + src->position += nbytes; + + *bytes_read = nbytes; + *status = 0; return retval; } diff --git a/pandas/_libs/src/parser/io.h b/pandas/_libs/src/parser/io.h index 77121e9a169c1..d22e8ddaea88d 100644 --- a/pandas/_libs/src/parser/io.h +++ b/pandas/_libs/src/parser/io.h @@ -15,19 +15,10 @@ The full license is in the LICENSE file, distributed with this software. typedef struct _file_source { /* The file being read. */ - FILE *fp; + int fd; char *buffer; - - /* file position when the file_buffer was created. */ - off_t initial_file_pos; - - /* Offset in the file of the data currently in the buffer. */ - off_t buffer_file_pos; - - /* Actual number of bytes in the current buffer. (Can be less than - * buffer_size.) */ - off_t last_pos; + size_t size; } file_source; #define FS(source) ((file_source *)source) @@ -37,20 +28,13 @@ typedef struct _file_source { #endif typedef struct _memory_map { - FILE *fp; + int fd; /* Size of the file, in bytes. */ - off_t size; - - /* file position when the file_buffer was created. */ - off_t initial_file_pos; - - int line_number; - - int fileno; - off_t position; - off_t last_pos; char *memmap; + size_t size; + + size_t position; } memory_map; #define MM(src) ((memory_map *)src) From 344cec77f1904343f8d253d7cfd8d5fccddb08de Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 13 Jun 2017 07:52:34 -0400 Subject: [PATCH 680/933] Revert "BLD: fix numpy on 2.7 build as 1.13 was released but no deps are built for it (#16633) (#16650)" (#16688) This reverts commit 789f7bbb52f279bd1ed53abf1a9580f682c2d6b9. --- ci/requirements-2.7.build | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/requirements-2.7.build b/ci/requirements-2.7.build index a7b950e615464..415df13179fcf 100644 --- a/ci/requirements-2.7.build +++ b/ci/requirements-2.7.build @@ -2,5 +2,5 @@ python=2.7* python-dateutil=2.4.1 pytz=2013b nomkl -numpy=1.12* +numpy cython=0.23 From 466e4253ae466aaa422cc3f3b3a4143d1466158c Mon Sep 17 00:00:00 2001 From: Hendrik Makait Date: Wed, 14 Jun 2017 01:07:51 +0200 Subject: [PATCH 681/933] Add inplace support for rename_axis (#16505) --- doc/source/whatsnew/v0.21.0.txt | 1 + pandas/core/generic.py | 20 +++++++++++++------- pandas/tests/frame/test_alter_axes.py | 17 +++++++++++++++++ pandas/tests/series/test_alter_axes.py | 10 ++++++++++ 4 files changed, 41 insertions(+), 7 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 36ca79e8b8714..48d835272537f 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -33,6 +33,7 @@ Other Enhancements - The ``validate`` argument for :func:`merge` function now checks whether a merge is one-to-one, one-to-many, many-to-one, or many-to-many. If a merge is found to not be an example of specified merge type, an exception of type ``MergeError`` will be raised. For more, see :ref:`here ` (:issue:`16270`) - ``Series.to_dict()`` and ``DataFrame.to_dict()`` now support an ``into`` keyword which allows you to specify the ``collections.Mapping`` subclass that you would like returned. The default is ``dict``, which is backwards compatible. (:issue:`16122`) - ``RangeIndex.append`` now returns a ``RangeIndex`` object when possible (:issue:`16212`) +- ``Series.rename_axis()`` and ``DataFrame.rename_axis()`` with ``inplace=True`` now return None while renaming the axis inplace. (:issue:`15704`) - :func:`to_pickle` has gained a protocol parameter (:issue:`16252`). By default, this parameter is set to `HIGHEST_PROTOCOL `__ - :func:`api.types.infer_dtype` now infers decimals. (:issue: `15690`) - :func:`read_feather` has gained the ``nthreads`` parameter for multi-threaded operations (:issue:`16359`) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 8b186bab29d5e..54ad86b07d4d4 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -753,7 +753,7 @@ def rename_axis(self, mapper, axis=0, copy=True, inplace=False): Returns ------- - renamed : type of caller + renamed : type of caller or None if inplace=True See Also -------- @@ -784,16 +784,16 @@ def rename_axis(self, mapper, axis=0, copy=True, inplace=False): non_mapper = is_scalar(mapper) or (is_list_like(mapper) and not is_dict_like(mapper)) if non_mapper: - return self._set_axis_name(mapper, axis=axis) + return self._set_axis_name(mapper, axis=axis, inplace=inplace) else: axis = self._get_axis_name(axis) d = {'copy': copy, 'inplace': inplace} d[axis] = mapper return self.rename(**d) - def _set_axis_name(self, name, axis=0): + def _set_axis_name(self, name, axis=0, inplace=False): """ - Alter the name or names of the axis, returning self. + Alter the name or names of the axis. Parameters ---------- @@ -801,10 +801,14 @@ def _set_axis_name(self, name, axis=0): Name for the Index, or list of names for the MultiIndex axis : int or str 0 or 'index' for the index; 1 or 'columns' for the columns + inplace : bool + whether to modify `self` directly or return a copy + + .. versionadded: 0.21.0 Returns ------- - renamed : type of caller + renamed : type of caller or None if inplace=True See Also -------- @@ -831,9 +835,11 @@ def _set_axis_name(self, name, axis=0): axis = self._get_axis_number(axis) idx = self._get_axis(axis).set_names(name) - renamed = self.copy(deep=True) + inplace = validate_bool_kwarg(inplace, 'inplace') + renamed = self if inplace else self.copy() renamed.set_axis(axis, idx) - return renamed + if not inplace: + return renamed # ---------------------------------------------------------------------- # Comparisons diff --git a/pandas/tests/frame/test_alter_axes.py b/pandas/tests/frame/test_alter_axes.py index fbfbcc14e9150..434c02b8eba2f 100644 --- a/pandas/tests/frame/test_alter_axes.py +++ b/pandas/tests/frame/test_alter_axes.py @@ -418,6 +418,23 @@ def test_rename(self): pd.Index(['bar', 'foo'], name='name')) assert renamed.index.name == renamer.index.name + def test_rename_axis_inplace(self): + # GH 15704 + frame = self.frame.copy() + expected = frame.rename_axis('foo') + result = frame.copy() + no_return = result.rename_axis('foo', inplace=True) + + assert no_return is None + assert_frame_equal(result, expected) + + expected = frame.rename_axis('bar', axis=1) + result = frame.copy() + no_return = result.rename_axis('bar', axis=1, inplace=True) + + assert no_return is None + assert_frame_equal(result, expected) + def test_rename_multiindex(self): tuples_index = [('foo1', 'bar1'), ('foo2', 'bar2')] diff --git a/pandas/tests/series/test_alter_axes.py b/pandas/tests/series/test_alter_axes.py index 98ae749aaa10e..d93f0326fd3b1 100644 --- a/pandas/tests/series/test_alter_axes.py +++ b/pandas/tests/series/test_alter_axes.py @@ -224,3 +224,13 @@ def test_reorder_levels(self): result = s.reorder_levels(['L0', 'L0', 'L0']) assert_series_equal(result, expected) + + def test_rename_axis_inplace(self): + # GH 15704 + series = self.ts.copy() + expected = series.rename_axis('foo') + result = series.copy() + no_return = result.rename_axis('foo', inplace=True) + + assert no_return is None + assert_series_equal(result, expected) From 6401b825b5a4cbbae03acd9ca723a57bf8981de5 Mon Sep 17 00:00:00 2001 From: Chris Kerr Date: Tue, 13 Jun 2017 19:12:30 -0400 Subject: [PATCH 682/933] DOC: Update link in the deprecation message for .ix Author: Chris Kerr Closes #16691 from frexvahi/update-ix-deprecated-link and squashes the following commits: 1c44ba6 [Chris Kerr] Update link in the deprecation message for .ix --- pandas/core/indexing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index a01e3dc46dfe9..50f2f9b52e111 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1288,7 +1288,7 @@ def __init__(self, obj, name): .iloc for positional indexing See the documentation here: -http://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate_ix""" +http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated""" # noqa warnings.warn(_ix_deprecation_warning, DeprecationWarning, stacklevel=3) From d02ef6f04466e4a74f67ad584cf38cdc6df56e42 Mon Sep 17 00:00:00 2001 From: Alex Rychyk Date: Wed, 14 Jun 2017 02:16:02 +0300 Subject: [PATCH 683/933] BUG: Inconsistent return type for downsampling on resample of empty DataFrame (#15093) closes #14692 --- doc/source/whatsnew/v0.21.0.txt | 1 + pandas/core/resample.py | 13 +++++++++++-- pandas/tests/test_resample.py | 10 +++++++--- 3 files changed, 19 insertions(+), 5 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 48d835272537f..fd8a15eaf075f 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -113,6 +113,7 @@ Plotting Groupby/Resample/Rolling ^^^^^^^^^^^^^^^^^^^^^^^^ +- Bug in ``DataFrame.resample().size()`` where an empty DataFrame did not return a Series (:issue:`14962`) diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 2bb825541e23b..a8a48624fb885 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -17,6 +17,7 @@ from pandas.core.indexes.period import PeriodIndex, period_range import pandas.core.common as com import pandas.core.algorithms as algos +from pandas.core.dtypes.generic import ABCDataFrame import pandas.compat as compat from pandas.compat.numpy import function as nv @@ -549,6 +550,15 @@ def var(self, ddof=1, *args, **kwargs): nv.validate_resampler_func('var', args, kwargs) return self._downsample('var', ddof=ddof) + @Appender(GroupBy.size.__doc__) + def size(self): + # It's a special case as higher level does return + # a copy of 0-len objects. GH14962 + result = self._downsample('size') + if not len(self.ax) and isinstance(self._selected_obj, ABCDataFrame): + result = pd.Series([], index=result.index, dtype='int64') + return result + Resampler._deprecated_valids += dir(Resampler) @@ -563,8 +573,7 @@ def f(self, _method=method, *args, **kwargs): setattr(Resampler, method, f) # groupby & aggregate methods -for method in ['count', 'size']: - +for method in ['count']: def f(self, _method=method): return self._downsample(_method) f.__doc__ = getattr(GroupBy, method).__doc__ diff --git a/pandas/tests/test_resample.py b/pandas/tests/test_resample.py index 959e3d2f459ce..15bbd7a9ef5e9 100644 --- a/pandas/tests/test_resample.py +++ b/pandas/tests/test_resample.py @@ -783,15 +783,19 @@ def test_resample_empty_dataframe(self): for freq in ['M', 'D', 'H']: # count retains dimensions too - methods = downsample_methods + ['count'] + methods = downsample_methods + upsample_methods for method in methods: result = getattr(f.resample(freq), method)() + if method != 'size': + expected = f.copy() + else: + # GH14962 + expected = Series([]) - expected = f.copy() expected.index = f.index._shallow_copy(freq=freq) assert_index_equal(result.index, expected.index) assert result.index.freq == expected.index.freq - assert_frame_equal(result, expected, check_dtype=False) + assert_almost_equal(result, expected, check_dtype=False) # test size for GH13212 (currently stays as df) From 3caf858b0ab0dd62126be7ca1021d71409b70d98 Mon Sep 17 00:00:00 2001 From: Oleg Shteynbuk Date: Tue, 13 Jun 2017 19:17:45 -0400 Subject: [PATCH 684/933] consolidated the duplicate definitions of NA values (in parsers & IO) (#16589) --- doc/source/io.rst | 15 ++++++++------- pandas/_libs/parsers.pyx | 15 ++++++++------- pandas/tests/io/parser/na_values.py | 4 ++-- 3 files changed, 18 insertions(+), 16 deletions(-) diff --git a/doc/source/io.rst b/doc/source/io.rst index 7ea476514e88d..69377fad15270 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -225,9 +225,9 @@ NA and Missing Data Handling na_values : scalar, str, list-like, or dict, default ``None`` Additional strings to recognize as NA/NaN. If dict passed, specific per-column - NA values. By default the following values are interpreted as NaN: - ``'-1.#IND', '1.#QNAN', '1.#IND', '-1.#QNAN', '#N/A N/A', '#N/A', 'N/A', 'n/a', 'NA', - '#NA', 'NULL', 'null', 'NaN', '-NaN', 'nan', '-nan', ''``. + NA values. See :ref:`na values const ` below + for a list of the values interpreted as NaN by default. + keep_default_na : boolean, default ``True`` If na_values are specified and keep_default_na is ``False`` the default NaN values are overridden, otherwise they're appended to. @@ -1030,10 +1030,11 @@ the corresponding equivalent values will also imply a missing value (in this cas ``[5.0,5]`` are recognized as ``NaN``. To completely override the default values that are recognized as missing, specify ``keep_default_na=False``. -The default ``NaN`` recognized values are ``['-1.#IND', '1.#QNAN', '1.#IND', '-1.#QNAN', '#N/A','N/A', 'NA', -'#NA', 'NULL', 'NaN', '-NaN', 'nan', '-nan']``. Although a 0-length string -``''`` is not included in the default ``NaN`` values list, it is still treated -as a missing value. + +.. _io.navaluesconst: + +The default ``NaN`` recognized values are ``['-1.#IND', '1.#QNAN', '1.#IND', '-1.#QNAN', '#N/A N/A', '#N/A', 'N/A', +'n/a', 'NA', '#NA', 'NULL', 'null', 'NaN', '-NaN', 'nan', '-nan', '']``. .. code-block:: python diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index 2549c8545908d..7375a2197c6b7 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -50,6 +50,8 @@ from pandas.core.algorithms import take_1d from pandas.core.dtypes.concat import union_categoricals from pandas import Index +import pandas.io.common as com + import time import os @@ -273,13 +275,6 @@ cdef extern from "parser/io.h": DEFAULT_CHUNKSIZE = 256 * 1024 -# common NA values -# no longer excluding inf representations -# '1.#INF','-1.#INF', '1.#INF000000', -_NA_VALUES = [b'-1.#IND', b'1.#QNAN', b'1.#IND', b'-1.#QNAN', - b'#N/A N/A', b'n/a', b'NA', b'#NA', b'NULL', b'null', b'NaN', - b'nan', b''] - cdef class TextReader: """ @@ -1380,6 +1375,12 @@ cdef asbytes(object o): return str(o) +# common NA values +# no longer excluding inf representations +# '1.#INF','-1.#INF', '1.#INF000000', +_NA_VALUES = _ensure_encoded(list(com._NA_VALUES)) + + def _is_file_like(obj): if PY3: import io diff --git a/pandas/tests/io/parser/na_values.py b/pandas/tests/io/parser/na_values.py index 170f9d428c9cc..c6d1cc79b82d7 100644 --- a/pandas/tests/io/parser/na_values.py +++ b/pandas/tests/io/parser/na_values.py @@ -8,7 +8,7 @@ import numpy as np from numpy import nan -import pandas.io.parsers as parsers +import pandas.io.common as com import pandas.util.testing as tm from pandas import DataFrame, Index, MultiIndex @@ -72,7 +72,7 @@ def test_default_na_values(self): _NA_VALUES = set(['-1.#IND', '1.#QNAN', '1.#IND', '-1.#QNAN', '#N/A', 'N/A', 'n/a', 'NA', '#NA', 'NULL', 'null', 'NaN', 'nan', '-NaN', '-nan', '#N/A N/A', '']) - assert _NA_VALUES == parsers._NA_VALUES + assert _NA_VALUES == com._NA_VALUES nv = len(_NA_VALUES) def f(i, v): From 8800e8496c9aba0dea8b7b83b5543a536f8e3cfe Mon Sep 17 00:00:00 2001 From: linebp Date: Wed, 14 Jun 2017 01:20:59 +0200 Subject: [PATCH 685/933] GH15943 Fixed defaults for compression in HDF5 (#16355) --- doc/source/io.rst | 64 +++++++++++++++++++++++++------- doc/source/whatsnew/v0.21.0.txt | 3 +- pandas/core/generic.py | 4 +- pandas/io/pytables.py | 16 ++++---- pandas/tests/io/test_pytables.py | 53 ++++++++++++++++++++++++++ 5 files changed, 115 insertions(+), 25 deletions(-) diff --git a/doc/source/io.rst b/doc/source/io.rst index 69377fad15270..340eb9aa053aa 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -4067,26 +4067,64 @@ Compression +++++++++++ ``PyTables`` allows the stored data to be compressed. This applies to -all kinds of stores, not just tables. +all kinds of stores, not just tables. Two parameters are used to +control compression: ``complevel`` and ``complib``. + +``complevel`` specifies if and how hard data is to be compressed. + ``complevel=0`` and ``complevel=None`` disables + compression and ``0`_: The default compression library. A classic in terms of compression, achieves good compression rates but is somewhat slow. + - `lzo `_: Fast compression and decompression. + - `bzip2 `_: Good compression rates. + - `blosc `_: Fast compression and decompression. + + .. versionadded:: 0.20.2 + + Support for alternative blosc compressors: + + - `blosc:blosclz `_ This is the + default compressor for ``blosc`` + - `blosc:lz4 + `_: + A compact, very popular and fast compressor. + - `blosc:lz4hc + `_: + A tweaked version of LZ4, produces better + compression ratios at the expense of speed. + - `blosc:snappy `_: + A popular compressor used in many places. + - `blosc:zlib `_: A classic; + somewhat slower than the previous ones, but + achieving better compression ratios. + - `blosc:zstd `_: An + extremely well balanced codec; it provides the best + compression ratios among the others above, and at + reasonably fast speed. + + If ``complib`` is defined as something other than the + listed libraries a ``ValueError`` exception is issued. -- Pass ``complevel=int`` for a compression level (1-9, with 0 being no - compression, and the default) -- Pass ``complib=lib`` where lib is any of ``zlib, bzip2, lzo, blosc`` for - whichever compression library you prefer. +.. note:: -``HDFStore`` will use the file based compression scheme if no overriding -``complib`` or ``complevel`` options are provided. ``blosc`` offers very -fast compression, and is my most used. Note that ``lzo`` and ``bzip2`` -may not be installed (by Python) by default. + If the library specified with the ``complib`` option is missing on your platform, + compression defaults to ``zlib`` without further ado. -Compression for all objects within the file +Enable compression for all objects within the file: .. code-block:: python - store_compressed = pd.HDFStore('store_compressed.h5', complevel=9, complib='blosc') + store_compressed = pd.HDFStore('store_compressed.h5', complevel=9, complib='blosc:blosclz') -Or on-the-fly compression (this only applies to tables). You can turn -off file compression for a specific table by passing ``complevel=0`` +Or on-the-fly compression (this only applies to tables) in stores where compression is not enabled: .. code-block:: python diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index fd8a15eaf075f..79f2816f43a6f 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -47,13 +47,12 @@ Backwards incompatible API changes - Support has been dropped for Python 3.4 (:issue:`15251`) - The Categorical constructor no longer accepts a scalar for the ``categories`` keyword. (:issue:`16022`) - - Accessing a non-existent attribute on a closed :class:`HDFStore` will now raise an ``AttributeError`` rather than a ``ClosedFileError`` (:issue:`16301`) - :func:`read_csv` now treats ``'null'`` strings as missing values by default (:issue:`16471`) - :func:`read_csv` now treats ``'n/a'`` strings as missing values by default (:issue:`16078`) - - :class:`pandas.HDFStore`'s string representation is now faster and less detailed. For the previous behavior, use ``pandas.HDFStore.info()``. (:issue:`16503`). +- Compression defaults in HDF stores now follow pytable standards. Default is no compression and if ``complib`` is missing and ``complevel`` > 0 ``zlib`` is used (:issue:`15943`) .. _whatsnew_0210.api: diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 54ad86b07d4d4..fdf5d01484b98 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1284,10 +1284,10 @@ def to_hdf(self, path_or_buf, key, **kwargs): `__. Applicable only to format='table'. - complevel : int, 0-9, default 0 + complevel : int, 0-9, default None Specifies a compression level for data. A value of 0 disables compression. - complib : {'zlib', 'lzo', 'bzip2', 'blosc', None}, default None + complib : {'zlib', 'lzo', 'bzip2', 'blosc'}, default 'zlib' Specifies the compression library to be used. As of v0.20.2 these additional compressors for Blosc are supported (default if no compressor specified: 'blosc:blosclz'): diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 9539b73c754e1..f83380b18beb3 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -411,10 +411,10 @@ class HDFStore(StringMixin): and if the file does not exist it is created. ``'r+'`` It is similar to ``'a'``, but the file must already exist. - complevel : int, 0-9, default 0 + complevel : int, 0-9, default None Specifies a compression level for data. A value of 0 disables compression. - complib : {'zlib', 'lzo', 'bzip2', 'blosc', None}, default None + complib : {'zlib', 'lzo', 'bzip2', 'blosc'}, default 'zlib' Specifies the compression library to be used. As of v0.20.2 these additional compressors for Blosc are supported (default if no compressor specified: 'blosc:blosclz'): @@ -449,12 +449,15 @@ def __init__(self, path, mode=None, complevel=None, complib=None, "complib only supports {libs} compression.".format( libs=tables.filters.all_complibs)) + if complib is None and complevel is not None: + complib = tables.filters.default_complib + self._path = _stringify_path(path) if mode is None: mode = 'a' self._mode = mode self._handle = None - self._complevel = complevel + self._complevel = complevel if complevel else 0 self._complib = complib self._fletcher32 = fletcher32 self._filters = None @@ -566,11 +569,8 @@ def open(self, mode='a', **kwargs): if self.is_open: self.close() - if self._complib is not None: - if self._complevel is None: - self._complevel = 9 - self._filters = _tables().Filters(self._complevel, - self._complib, + if self._complevel and self._complevel > 0: + self._filters = _tables().Filters(self._complevel, self._complib, fletcher32=self._fletcher32) try: diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index efec778e12b50..86ff368e97b9e 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -736,6 +736,59 @@ def test_put_compression_blosc(self): store.put('c', df, format='table', complib='blosc') tm.assert_frame_equal(store['c'], df) + def test_complibs_default_settings(self): + # GH15943 + df = tm.makeDataFrame() + + # Set complevel and check if complib is automatically set to + # default value + with ensure_clean_path(self.path) as tmpfile: + df.to_hdf(tmpfile, 'df', complevel=9) + result = pd.read_hdf(tmpfile, 'df') + tm.assert_frame_equal(result, df) + + with tables.open_file(tmpfile, mode='r') as h5file: + for node in h5file.walk_nodes(where='/df', classname='Leaf'): + assert node.filters.complevel == 9 + assert node.filters.complib == 'zlib' + + # Set complib and check to see if compression is disabled + with ensure_clean_path(self.path) as tmpfile: + df.to_hdf(tmpfile, 'df', complib='zlib') + result = pd.read_hdf(tmpfile, 'df') + tm.assert_frame_equal(result, df) + + with tables.open_file(tmpfile, mode='r') as h5file: + for node in h5file.walk_nodes(where='/df', classname='Leaf'): + assert node.filters.complevel == 0 + assert node.filters.complib is None + + # Check if not setting complib or complevel results in no compression + with ensure_clean_path(self.path) as tmpfile: + df.to_hdf(tmpfile, 'df') + result = pd.read_hdf(tmpfile, 'df') + tm.assert_frame_equal(result, df) + + with tables.open_file(tmpfile, mode='r') as h5file: + for node in h5file.walk_nodes(where='/df', classname='Leaf'): + assert node.filters.complevel == 0 + assert node.filters.complib is None + + # Check if file-defaults can be overridden on a per table basis + with ensure_clean_path(self.path) as tmpfile: + store = pd.HDFStore(tmpfile) + store.append('dfc', df, complevel=9, complib='blosc') + store.append('df', df) + store.close() + + with tables.open_file(tmpfile, mode='r') as h5file: + for node in h5file.walk_nodes(where='/df', classname='Leaf'): + assert node.filters.complevel == 0 + assert node.filters.complib is None + for node in h5file.walk_nodes(where='/dfc', classname='Leaf'): + assert node.filters.complevel == 9 + assert node.filters.complib == 'blosc' + def test_complibs(self): # GH14478 df = tm.makeDataFrame() From 2e24a8f8886a1481af8324d44224cddb2ee1b53c Mon Sep 17 00:00:00 2001 From: Peter Yanovich Date: Wed, 14 Jun 2017 11:36:20 +0300 Subject: [PATCH 686/933] DOC: add header=None to read_excel docstring (#16689) --- pandas/io/excel.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/excel.py b/pandas/io/excel.py index e3c9ae3f164cb..5db4603c37be0 100644 --- a/pandas/io/excel.py +++ b/pandas/io/excel.py @@ -77,7 +77,7 @@ header : int, list of ints, default 0 Row (0-indexed) to use for the column labels of the parsed DataFrame. If a list of integers is passed those row positions will - be combined into a ``MultiIndex`` + be combined into a ``MultiIndex``. Use None if there is no header. skiprows : list-like Rows to skip at the beginning (0-indexed) skip_footer : int, default 0 From a43dcf998cb4a0cf401df323e5d5ca7ee2ac46f8 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 14 Jun 2017 18:01:31 -0500 Subject: [PATCH 687/933] TST: Test against python-dateutil master (#16648) --- ci/requirements-3.6_NUMPY_DEV.build | 1 - ci/requirements-3.6_NUMPY_DEV.build.sh | 3 +++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/ci/requirements-3.6_NUMPY_DEV.build b/ci/requirements-3.6_NUMPY_DEV.build index 738366867a217..900c050f1cc9e 100644 --- a/ci/requirements-3.6_NUMPY_DEV.build +++ b/ci/requirements-3.6_NUMPY_DEV.build @@ -1,4 +1,3 @@ python=3.6* -python-dateutil pytz cython diff --git a/ci/requirements-3.6_NUMPY_DEV.build.sh b/ci/requirements-3.6_NUMPY_DEV.build.sh index 4af1307f26a18..90ed04f8f0c17 100644 --- a/ci/requirements-3.6_NUMPY_DEV.build.sh +++ b/ci/requirements-3.6_NUMPY_DEV.build.sh @@ -11,4 +11,7 @@ pip uninstall numpy -y PRE_WHEELS="https://7933911d6844c6c53a7d-47bd50c35cd79bd838daf386af554a83.ssl.cf2.rackcdn.com" pip install --pre --upgrade --timeout=60 -f $PRE_WHEELS numpy scipy +# install dateutil from master +pip install -U git+git://github.com/dateutil/dateutil.git + true From b7e7fd3f17d4d2a2f87b9d169cf87143f04e5d33 Mon Sep 17 00:00:00 2001 From: Margaret Sy Date: Wed, 14 Jun 2017 16:03:27 -0700 Subject: [PATCH 688/933] BUG: .iloc[:] and .loc[:] return a copy of the original object #13873 (#16443) closes #13873 --- doc/source/whatsnew/v0.21.0.txt | 1 + pandas/core/indexing.py | 10 +++++++--- pandas/tests/indexing/test_iloc.py | 18 ++++++++++++++++++ pandas/tests/indexing/test_loc.py | 25 +++++++++++++++++++++++++ 4 files changed, 51 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 79f2816f43a6f..d6b699abdba2d 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -97,6 +97,7 @@ Conversion Indexing ^^^^^^^^ +- When called with a null slice (e.g. ``df.iloc[:]``), the``iloc`` and ``loc`` indexers return a shallow copy of the original object. Previously they returned the original object. (:issue:`13873`). I/O diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 50f2f9b52e111..ae0aaf98fdf02 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -988,6 +988,10 @@ def _getitem_lowerdim(self, tup): if len(new_key) == 1: new_key, = new_key + # Slices should return views, but calling iloc/loc with a null + # slice returns a new object. + if is_null_slice(new_key): + return section # This is an elided recursive call to iloc/loc/etc' return getattr(section, self.name)[new_key] @@ -1250,7 +1254,7 @@ def _get_slice_axis(self, slice_obj, axis=0): obj = self.obj if not need_slice(slice_obj): - return obj + return obj.copy(deep=False) indexer = self._convert_slice_indexer(slice_obj, axis) if isinstance(indexer, slice): @@ -1349,7 +1353,7 @@ def _get_slice_axis(self, slice_obj, axis=0): """ this is pretty simple as we just have to deal with labels """ obj = self.obj if not need_slice(slice_obj): - return obj + return obj.copy(deep=False) labels = obj._get_axis(axis) indexer = labels.slice_indexer(slice_obj.start, slice_obj.stop, @@ -1690,7 +1694,7 @@ def _get_slice_axis(self, slice_obj, axis=0): obj = self.obj if not need_slice(slice_obj): - return obj + return obj.copy(deep=False) slice_obj = self._convert_slice_indexer(slice_obj, axis) if isinstance(slice_obj, slice): diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index af4b9e1f0cc25..769cf8ec395dd 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -591,3 +591,21 @@ def test_iloc_empty_list_indexer_is_ok(self): tm.assert_frame_equal(df.iloc[[]], df.iloc[:0, :], check_index_type=True, check_column_type=True) + + def test_identity_slice_returns_new_object(self): + # GH13873 + original_df = DataFrame({'a': [1, 2, 3]}) + sliced_df = original_df.iloc[:] + assert sliced_df is not original_df + + # should be a shallow copy + original_df['a'] = [4, 4, 4] + assert (sliced_df['a'] == 4).all() + + original_series = Series([1, 2, 3, 4, 5, 6]) + sliced_series = original_series.iloc[:] + assert sliced_series is not original_series + + # should also be a shallow copy + original_series[:3] = [7, 8, 9] + assert all(sliced_series[:3] == [7, 8, 9]) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index fe2318be72eda..3e863a59df67e 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -630,3 +630,28 @@ def test_loc_empty_list_indexer_is_ok(self): tm.assert_frame_equal(df.loc[[]], df.iloc[:0, :], check_index_type=True, check_column_type=True) + + def test_identity_slice_returns_new_object(self): + # GH13873 + original_df = DataFrame({'a': [1, 2, 3]}) + sliced_df = original_df.loc[:] + assert sliced_df is not original_df + assert original_df[:] is not original_df + + # should be a shallow copy + original_df['a'] = [4, 4, 4] + assert (sliced_df['a'] == 4).all() + + # These should not return copies + assert original_df is original_df.loc[:, :] + df = DataFrame(np.random.randn(10, 4)) + assert df[0] is df.loc[:, 0] + + # Same tests for Series + original_series = Series([1, 2, 3, 4, 5, 6]) + sliced_series = original_series.loc[:] + assert sliced_series is not original_series + assert original_series[:] is not original_series + + original_series[:3] = [7, 8, 9] + assert all(sliced_series[:3] == [7, 8, 9]) From 0d46203a8d92df47d4e92938c97440f71663aef4 Mon Sep 17 00:00:00 2001 From: DSM Date: Thu, 15 Jun 2017 06:10:12 -0400 Subject: [PATCH 689/933] TST: Add test of building frame from named Series and columns (#9232) (#16700) --- pandas/tests/frame/test_constructors.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 8459900ea1059..97cf3ce8a7216 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -1108,6 +1108,22 @@ def test_constructor_Series_named(self): expected = DataFrame({1: s1, 0: arr}, columns=[0, 1]) tm.assert_frame_equal(df, expected) + def test_constructor_Series_named_and_columns(self): + # GH 9232 validation + + s0 = Series(range(5), name=0) + s1 = Series(range(5), name=1) + + # matching name and column gives standard frame + tm.assert_frame_equal(pd.DataFrame(s0, columns=[0]), + s0.to_frame()) + tm.assert_frame_equal(pd.DataFrame(s1, columns=[1]), + s1.to_frame()) + + # non-matching produces empty frame + assert pd.DataFrame(s0, columns=[1]).empty + assert pd.DataFrame(s1, columns=[0]).empty + def test_constructor_Series_differently_indexed(self): # name s1 = Series([1, 2, 3], index=['a', 'b', 'c'], name='x') From 52eda7ba8780194735e25a43812316aa7a3ee6b3 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 15 Jun 2017 12:13:14 +0200 Subject: [PATCH 690/933] DOC: fix wrongly placed versionadded (#16702) --- pandas/core/strings.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/strings.py b/pandas/core/strings.py index c57d7a9362490..4814e78ee6546 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -611,11 +611,12 @@ def str_extract(arr, pat, flags=0, expand=None): flags : int, default 0 (no flags) re module flags, e.g. re.IGNORECASE - .. versionadded:: 0.18.0 expand : bool, default False * If True, return DataFrame. * If False, return Series/Index/DataFrame. + .. versionadded:: 0.18.0 + Returns ------- DataFrame with one row for each subject string, and one column for From 1eb22d518b6128160ef0661fa87eb162ddf2c2a9 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 15 Jun 2017 14:29:49 +0200 Subject: [PATCH 691/933] DOC: pin sphinx to version 1.5 (#16704) --- ci/requirements-3.6_DOC.run | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/requirements-3.6_DOC.run b/ci/requirements-3.6_DOC.run index df8087f62ef16..f87760b507357 100644 --- a/ci/requirements-3.6_DOC.run +++ b/ci/requirements-3.6_DOC.run @@ -1,7 +1,7 @@ ipython ipykernel ipywidgets -sphinx +sphinx=1.5* nbconvert nbformat notebook From 3ff845b4e81d4dde403c29908f5a9bbfe4a87788 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Thu, 15 Jun 2017 10:45:30 -0400 Subject: [PATCH 692/933] CI: restore np 113 in ci builds (#16656) * Revert "BLD: fix numpy on 3.6 build as 1.13 was released but no deps are built for it (#16633)" This reverts commit dfebd8a56cdcfa6d3d1217a3a22e882b5f1591e2. closes #16634 --- ci/requirements-3.6.build | 2 +- ci/requirements-3.6_DOC.build | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/requirements-3.6.build b/ci/requirements-3.6.build index 8d09e0ee93070..1c4b46aea3865 100644 --- a/ci/requirements-3.6.build +++ b/ci/requirements-3.6.build @@ -2,5 +2,5 @@ python=3.6* python-dateutil pytz nomkl -numpy=1.12* +numpy cython diff --git a/ci/requirements-3.6_DOC.build b/ci/requirements-3.6_DOC.build index 37faaa7e4db88..bdcfe28105866 100644 --- a/ci/requirements-3.6_DOC.build +++ b/ci/requirements-3.6_DOC.build @@ -1,5 +1,5 @@ python=3.6* python-dateutil pytz -numpy=1.12* +numpy cython From 125c414389320dc67ecaffddc65878c01822064e Mon Sep 17 00:00:00 2001 From: kjford Date: Fri, 16 Jun 2017 05:46:18 -0700 Subject: [PATCH 693/933] BUG: Fix regression for RGB(A) color arguments (#16701) * Add test * Pass tuples that are RGB or RGBA like in list * Update what's new * change whatsnew to reflect regression fix * Add test for RGBA as well --- doc/source/whatsnew/v0.20.3.txt | 2 +- pandas/plotting/_core.py | 5 +++++ pandas/tests/plotting/test_frame.py | 6 ++++++ 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.20.3.txt b/doc/source/whatsnew/v0.20.3.txt index 249e05623a27f..acd19a8b8da10 100644 --- a/doc/source/whatsnew/v0.20.3.txt +++ b/doc/source/whatsnew/v0.20.3.txt @@ -59,7 +59,7 @@ I/O Plotting ^^^^^^^^ - +- Fix regression in series plotting that prevented RGB and RGBA tuples from being used as color arguments (:issue:`16233`) diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index 391fa377f3c6f..f8e83aea03594 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -187,6 +187,11 @@ def _validate_color_args(self): # support series.plot(color='green') self.kwds['color'] = [self.kwds['color']] + if ('color' in self.kwds and isinstance(self.kwds['color'], tuple) and + self.nseries == 1 and len(self.kwds['color']) in (3, 4)): + # support RGB and RGBA tuples in series plot + self.kwds['color'] = [self.kwds['color']] + if ('color' in self.kwds or 'colors' in self.kwds) and \ self.colormap is not None: warnings.warn("'color' and 'colormap' cannot be used " diff --git a/pandas/tests/plotting/test_frame.py b/pandas/tests/plotting/test_frame.py index ba674e10be384..fc9ef132b2754 100644 --- a/pandas/tests/plotting/test_frame.py +++ b/pandas/tests/plotting/test_frame.py @@ -158,6 +158,12 @@ def test_color_single_series_list(self): df = DataFrame({"A": [1, 2, 3]}) _check_plot_works(df.plot, color=['red']) + def test_rgb_tuple_color(self): + # GH 16695 + df = DataFrame({'x': [1, 2], 'y': [3, 4]}) + _check_plot_works(df.plot, x='x', y='y', color=(1, 0, 0)) + _check_plot_works(df.plot, x='x', y='y', color=(1, 0, 0, 0.5)) + def test_color_empty_string(self): df = DataFrame(randn(10, 2)) with pytest.raises(ValueError): From 09d8c22d9f56f4a067880a28fbb1235bcf0a1e49 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Mon, 19 Jun 2017 19:34:53 -0400 Subject: [PATCH 694/933] CI: pin jemalloc=4.4.0 (#16727) --- ci/requirements-2.7.sh | 2 +- ci/requirements-2.7_BUILD_TEST.sh | 2 +- ci/requirements-3.5.sh | 2 +- ci/requirements-3.6.run | 1 + ci/requirements-3.6_DOC.sh | 2 +- 5 files changed, 5 insertions(+), 4 deletions(-) diff --git a/ci/requirements-2.7.sh b/ci/requirements-2.7.sh index 64d470e5c6e0e..87daf740e059e 100644 --- a/ci/requirements-2.7.sh +++ b/ci/requirements-2.7.sh @@ -4,4 +4,4 @@ source activate pandas echo "install 27" -conda install -n pandas -c conda-forge feather-format +conda install -n pandas -c conda-forge feather-format jemalloc=4.4.0 diff --git a/ci/requirements-2.7_BUILD_TEST.sh b/ci/requirements-2.7_BUILD_TEST.sh index 78941fd0944e5..b90ae3617ba2a 100755 --- a/ci/requirements-2.7_BUILD_TEST.sh +++ b/ci/requirements-2.7_BUILD_TEST.sh @@ -4,4 +4,4 @@ source activate pandas echo "install 27 BUILD_TEST" -conda install -n pandas -c conda-forge pyarrow dask +conda install -n pandas -c conda-forge pyarrow dask jemalloc=4.4.0 diff --git a/ci/requirements-3.5.sh b/ci/requirements-3.5.sh index d0f0b81802dc6..a2871f117be96 100644 --- a/ci/requirements-3.5.sh +++ b/ci/requirements-3.5.sh @@ -4,4 +4,4 @@ source activate pandas echo "install 35" -conda install -n pandas -c conda-forge feather-format +conda install -n pandas -c conda-forge feather-format jemalloc=4.4.0 diff --git a/ci/requirements-3.6.run b/ci/requirements-3.6.run index 41c9680ce1b7e..06fae9a70ecf7 100644 --- a/ci/requirements-3.6.run +++ b/ci/requirements-3.6.run @@ -14,6 +14,7 @@ html5lib jinja2 sqlalchemy pymysql +jemalloc=4.4.0 feather-format # psycopg2 (not avail on defaults ATM) beautifulsoup4 diff --git a/ci/requirements-3.6_DOC.sh b/ci/requirements-3.6_DOC.sh index e43e483d77a73..5a62e75adf7ea 100644 --- a/ci/requirements-3.6_DOC.sh +++ b/ci/requirements-3.6_DOC.sh @@ -6,6 +6,6 @@ echo "[install DOC_BUILD deps]" pip install pandas-gbq -conda install -n pandas -c conda-forge feather-format nbsphinx pandoc +conda install -n pandas -c conda-forge feather-format nbsphinx pandoc jemalloc=4.4.0 conda install -n pandas -c r r rpy2 --yes From 8b5e3d65bbc52b155efdc1cfcf3dc10e50691742 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Mon, 19 Jun 2017 16:35:39 -0700 Subject: [PATCH 695/933] MAINT: Drop Categorical.order & sort (#16728) Deprecated back in 0.18.1 xref gh-12882 --- doc/source/whatsnew/v0.21.0.txt | 1 + pandas/core/categorical.py | 31 ------------------------------- pandas/tests/test_categorical.py | 6 ------ 3 files changed, 1 insertion(+), 37 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index d6b699abdba2d..45c92717b60f0 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -75,6 +75,7 @@ Removal of prior version deprecations/changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - ``pd.read_excel()`` has dropped the ``has_index_names`` parameter (:issue:`10967`) +- ``Categorical`` has dropped the ``.order()`` and ``.sort()`` methods in favor of ``.sort_values()`` (:issue:`12882`) .. _whatsnew_0210.performance: diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index f58eed74f760e..ce70fb94b6cd4 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -1447,37 +1447,6 @@ def _values_for_rank(self): ) return values - def order(self, inplace=False, ascending=True, na_position='last'): - """ - DEPRECATED: use :meth:`Categorical.sort_values`. That function - is entirely equivalent to this one. - - See Also - -------- - Categorical.sort_values - """ - warn("order is deprecated, use sort_values(...)", FutureWarning, - stacklevel=2) - return self.sort_values(inplace=inplace, ascending=ascending, - na_position=na_position) - - def sort(self, inplace=True, ascending=True, na_position='last', **kwargs): - """ - DEPRECATED: use :meth:`Categorical.sort_values`. That function - is just like this one, except that a new Categorical is returned - by default, so make sure to pass in 'inplace=True' to get - inplace sorting. - - See Also - -------- - Categorical.sort_values - """ - warn("sort is deprecated, use sort_values(...)", FutureWarning, - stacklevel=2) - nv.validate_sort(tuple(), kwargs) - return self.sort_values(inplace=inplace, ascending=ascending, - na_position=na_position) - def ravel(self, order='C'): """ Return a flattened (numpy) array. diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py index 1ffe956b3a607..92177ca07d835 100644 --- a/pandas/tests/test_categorical.py +++ b/pandas/tests/test_categorical.py @@ -3067,12 +3067,6 @@ def test_sort_values(self): c = Categorical(["a", "b", "b", "a"], ordered=False) cat = Series(c.copy()) - # 'order' was deprecated in gh-10726 - # 'sort' was deprecated in gh-12882 - for func in ('order', 'sort'): - with tm.assert_produces_warning(FutureWarning): - getattr(c, func)() - # sort in the categories order expected = Series( Categorical(["a", "a", "b", "b"], From 196eb8e5c05952574dcdd5d0fb4d0a73e4bd6e91 Mon Sep 17 00:00:00 2001 From: Chris Date: Tue, 20 Jun 2017 01:36:44 +0200 Subject: [PATCH 696/933] Fix reading Series with read_hdf (#16610) * Added test to reproduce issue #16583 * Fix #16583 by adding an explicit `mode` argument to `read_hdf` kwargs which are meant for the opening of the HDFStore should be filtered out before passing the remaining kwargs to the `select` function to load the data. * Noted fix for #16583 in WhatsNew --- doc/source/whatsnew/v0.20.3.txt | 1 + pandas/io/pytables.py | 16 +++++++++------- pandas/tests/io/test_pytables.py | 11 +++++++++++ 3 files changed, 21 insertions(+), 7 deletions(-) diff --git a/doc/source/whatsnew/v0.20.3.txt b/doc/source/whatsnew/v0.20.3.txt index acd19a8b8da10..265f0c8d0cf59 100644 --- a/doc/source/whatsnew/v0.20.3.txt +++ b/doc/source/whatsnew/v0.20.3.txt @@ -56,6 +56,7 @@ I/O ^^^ -- Bug in ``pd.read_csv()`` in which files weren't opened as binary files by the C engine on Windows, causing EOF characters mid-field, which would fail (:issue:`16039`, :issue:`16559`, :issue`16675`) +-- Bug in ``pd.read_hdf()`` in which reading a ``Series`` saved to an HDF file in 'fixed' format fails when an explicit ``mode='r'`` argument is supplied (:issue:`16583`) Plotting ^^^^^^^^ diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index f83380b18beb3..02b6a772e86b9 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -282,7 +282,7 @@ def to_hdf(path_or_buf, key, value, mode=None, complevel=None, complib=None, f(path_or_buf) -def read_hdf(path_or_buf, key=None, **kwargs): +def read_hdf(path_or_buf, key=None, mode='r', **kwargs): """ read from the store, close it if we opened it Retrieve pandas object stored in file, optionally based on where @@ -290,13 +290,16 @@ def read_hdf(path_or_buf, key=None, **kwargs): Parameters ---------- - path_or_buf : path (string), buffer, or path object (pathlib.Path or - py._path.local.LocalPath) to read from + path_or_buf : path (string), buffer or path object (pathlib.Path or + py._path.local.LocalPath) designating the file to open, or an + already opened pd.HDFStore object .. versionadded:: 0.19.0 support for pathlib, py.path. key : group identifier in the store. Can be omitted if the HDF file contains a single pandas object. + mode : string, {'r', 'r+', 'a'}, default 'r'. Mode to use when opening + the file. Ignored if path_or_buf is a pd.HDFStore. where : list of Term (or convertable) objects, optional start : optional, integer (defaults to None), row number to start selection @@ -313,10 +316,9 @@ def read_hdf(path_or_buf, key=None, **kwargs): """ - if kwargs.get('mode', 'a') not in ['r', 'r+', 'a']: + if mode not in ['r', 'r+', 'a']: raise ValueError('mode {0} is not allowed while performing a read. ' - 'Allowed modes are r, r+ and a.' - .format(kwargs.get('mode'))) + 'Allowed modes are r, r+ and a.'.format(mode)) # grab the scope if 'where' in kwargs: kwargs['where'] = _ensure_term(kwargs['where'], scope_level=1) @@ -343,7 +345,7 @@ def read_hdf(path_or_buf, key=None, **kwargs): raise compat.FileNotFoundError( 'File %s does not exist' % path_or_buf) - store = HDFStore(path_or_buf, **kwargs) + store = HDFStore(path_or_buf, mode=mode, **kwargs) # can't auto open/close if we are using an iterator # so delegate to the iterator auto_close = True diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index 86ff368e97b9e..676c5a21c38ca 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -5247,6 +5247,17 @@ def test_query_compare_column_type(self): expected = df.loc[[], :] tm.assert_frame_equal(expected, result) + @pytest.mark.parametrize('format', ['fixed', 'table']) + def test_read_hdf_series_mode_r(self, format): + # GH 16583 + # Tests that reading a Series saved to an HDF file + # still works if a mode='r' argument is supplied + series = tm.makeFloatSeries() + with ensure_clean_path(self.path) as path: + series.to_hdf(path, key='data', format=format) + result = pd.read_hdf(path, key='data', mode='r') + tm.assert_series_equal(result, series) + @pytest.mark.skipif(sys.version_info < (3, 6), reason="Need python 3.6") def test_fspath(self): with tm.ensure_clean('foo.h5') as path: From 520f87b95639e4fc0344a6c6b9851b5cc5a1376b Mon Sep 17 00:00:00 2001 From: WANG Aiyong Date: Tue, 20 Jun 2017 15:12:11 +0800 Subject: [PATCH 697/933] DOC: typo (#16733) --- doc/source/io.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/io.rst b/doc/source/io.rst index 340eb9aa053aa..e7c1cc13d103d 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -3407,7 +3407,7 @@ Fixed Format This was prior to 0.13.0 the ``Storer`` format. The examples above show storing using ``put``, which write the HDF5 to ``PyTables`` in a fixed array format, called -the ``fixed`` format. These types of stores are are **not** appendable once written (though you can simply +the ``fixed`` format. These types of stores are **not** appendable once written (though you can simply remove them and rewrite). Nor are they **queryable**; they must be retrieved in their entirety. They also do not support dataframes with non-unique column names. The ``fixed`` format stores offer very fast writing and slightly faster reading than ``table`` stores. From c38f2822786a81bc0820a0469a3163193ef688c3 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 21 Jun 2017 06:38:09 -0400 Subject: [PATCH 698/933] whatsnew v0.21.0.txt typos (#16742) --- doc/source/whatsnew/v0.21.0.txt | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 45c92717b60f0..2daa03e6e97b1 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -22,8 +22,8 @@ New features - Support for `PEP 519 -- Adding a file system path protocol `_ on most readers and writers (:issue:`13823`) -- Added `__fspath__` method to :class`:pandas.HDFStore`, :class:`pandas.ExcelFile`, - and :class:`pandas.ExcelWriter` to work properly with the file system path protocol (:issue:`13823`) +- Added ``__fspath__`` method to :class:`~pandas.HDFStore`, :class:`~pandas.ExcelFile`, + and :class:`~pandas.ExcelWriter` to work properly with the file system path protocol (:issue:`13823`) .. _whatsnew_0210.enhancements.other: @@ -33,12 +33,12 @@ Other Enhancements - The ``validate`` argument for :func:`merge` function now checks whether a merge is one-to-one, one-to-many, many-to-one, or many-to-many. If a merge is found to not be an example of specified merge type, an exception of type ``MergeError`` will be raised. For more, see :ref:`here ` (:issue:`16270`) - ``Series.to_dict()`` and ``DataFrame.to_dict()`` now support an ``into`` keyword which allows you to specify the ``collections.Mapping`` subclass that you would like returned. The default is ``dict``, which is backwards compatible. (:issue:`16122`) - ``RangeIndex.append`` now returns a ``RangeIndex`` object when possible (:issue:`16212`) -- ``Series.rename_axis()`` and ``DataFrame.rename_axis()`` with ``inplace=True`` now return None while renaming the axis inplace. (:issue:`15704`) -- :func:`to_pickle` has gained a protocol parameter (:issue:`16252`). By default, this parameter is set to `HIGHEST_PROTOCOL `__ -- :func:`api.types.infer_dtype` now infers decimals. (:issue: `15690`) +- ``Series.rename_axis()`` and ``DataFrame.rename_axis()`` with ``inplace=True`` now return ``None`` while renaming the axis inplace. (:issue:`15704`) +- :func:`to_pickle` has gained a ``protocol`` parameter (:issue:`16252`). By default, this parameter is set to `HIGHEST_PROTOCOL `__ +- :func:`api.types.infer_dtype` now infers decimals. (:issue:`15690`) - :func:`read_feather` has gained the ``nthreads`` parameter for multi-threaded operations (:issue:`16359`) -- :func:`DataFrame.clip()` and :func: `Series.cip()` have gained an inplace argument. (:issue: `15388`) -- :func:`crosstab` has gained a ``margins_name`` parameter to define the name of the row / column that will contain the totals when margins=True. (:issue:`15972`) +- :func:`DataFrame.clip()` and :func: `Series.clip()` have gained an inplace argument. (:issue:`15388`) +- :func:`crosstab` has gained a ``margins_name`` parameter to define the name of the row / column that will contain the totals when ``margins=True``. (:issue:`15972`) .. _whatsnew_0210.api_breaking: @@ -98,13 +98,13 @@ Conversion Indexing ^^^^^^^^ -- When called with a null slice (e.g. ``df.iloc[:]``), the``iloc`` and ``loc`` indexers return a shallow copy of the original object. Previously they returned the original object. (:issue:`13873`). +- When called with a null slice (e.g. ``df.iloc[:]``), the ``.iloc`` and ``.loc`` indexers return a shallow copy of the original object. Previously they returned the original object. (:issue:`13873`). I/O ^^^ -- Bug in ``pd.read_csv()`` in which non integer values for the header argument generated an unhelpful / unrelated error message (:issue:`16338`) +- Bug in :func:`read_csv` in which non integer values for the header argument generated an unhelpful / unrelated error message (:issue:`16338`) Plotting @@ -114,7 +114,7 @@ Plotting Groupby/Resample/Rolling ^^^^^^^^^^^^^^^^^^^^^^^^ -- Bug in ``DataFrame.resample().size()`` where an empty DataFrame did not return a Series (:issue:`14962`) +- Bug in ``DataFrame.resample().size()`` where an empty ``DataFrame`` did not return a ``Series`` (:issue:`14962`) From 25e057654f9b9d4196ebf02961867ae26fb93547 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 21 Jun 2017 06:40:56 -0400 Subject: [PATCH 699/933] whatsnew v0.20.3 edits (#16743) --- doc/source/whatsnew/v0.20.3.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v0.20.3.txt b/doc/source/whatsnew/v0.20.3.txt index 265f0c8d0cf59..c730142450ea6 100644 --- a/doc/source/whatsnew/v0.20.3.txt +++ b/doc/source/whatsnew/v0.20.3.txt @@ -50,13 +50,13 @@ Conversion Indexing ^^^^^^^^ -- Bug in ``Float64Index`` causing an empty array instead of None to be returned from ``.get(np.nan)`` on a Series whose index did not contain any NaNs (:issue:`8569`) +- Bug in ``Float64Index`` causing an empty array instead of ``None`` to be returned from ``.get(np.nan)`` on a Series whose index did not contain any ``NaN`` s (:issue:`8569`) I/O ^^^ --- Bug in ``pd.read_csv()`` in which files weren't opened as binary files by the C engine on Windows, causing EOF characters mid-field, which would fail (:issue:`16039`, :issue:`16559`, :issue`16675`) --- Bug in ``pd.read_hdf()`` in which reading a ``Series`` saved to an HDF file in 'fixed' format fails when an explicit ``mode='r'`` argument is supplied (:issue:`16583`) +- Bug in :func:`read_csv`` in which files weren't opened as binary files by the C engine on Windows, causing EOF characters mid-field, which would fail (:issue:`16039`, :issue:`16559`, :issue:`16675`) +- Bug in :func:`read_hdf`` in which reading a ``Series`` saved to an HDF file in 'fixed' format fails when an explicit ``mode='r'`` argument is supplied (:issue:`16583`) Plotting ^^^^^^^^ From 8a98f5ed541c87a9bf101c9331bd6cfa8f007cc9 Mon Sep 17 00:00:00 2001 From: Pietro Battiston Date: Wed, 21 Jun 2017 06:52:19 -0400 Subject: [PATCH 700/933] BUG: do not raise UnsortedIndexError if sorting is not required closes #16734 Author: Pietro Battiston This patch had conflicts when merged, resolved by Committer: Jeff Reback Closes #16736 from toobaz/index_what_you_can and squashes the following commits: f77e2b3 [Pietro Battiston] BUG: do not raise UnsortedIndexError if sorting is not required --- doc/source/whatsnew/v0.21.0.txt | 1 + pandas/core/common.py | 7 +++++++ pandas/core/indexes/multi.py | 21 ++++++++------------- pandas/tests/indexes/test_multi.py | 7 ++++++- pandas/tests/indexing/test_multiindex.py | 17 +++++++++++++---- 5 files changed, 35 insertions(+), 18 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 2daa03e6e97b1..046a6c885bd24 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -99,6 +99,7 @@ Indexing ^^^^^^^^ - When called with a null slice (e.g. ``df.iloc[:]``), the ``.iloc`` and ``.loc`` indexers return a shallow copy of the original object. Previously they returned the original object. (:issue:`13873`). +- When called on an unsorted ``MultiIndex``, the ``loc`` indexer now will raise ``UnsortedIndexError`` only if proper slicing is used on non-sorted levels (:issue:`16734`). I/O diff --git a/pandas/core/common.py b/pandas/core/common.py index 0dc6a7a1e9c7b..ed768a5743666 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -411,6 +411,13 @@ def is_null_slice(obj): obj.stop is None and obj.step is None) +def is_true_slices(l): + """ + Find non-trivial slices in "l": return a list of booleans with same length. + """ + return [isinstance(k, slice) and not is_null_slice(k) for k in l] + + def is_full_slice(obj, l): """ we have a full length slice """ return (isinstance(obj, slice) and obj.start == 0 and obj.stop == l and diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index f30da5b05f8ae..1a762732b1213 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -23,7 +23,8 @@ from pandas.errors import PerformanceWarning, UnsortedIndexError from pandas.core.common import (_values_from_object, is_bool_indexer, - is_null_slice) + is_null_slice, + is_true_slices) import pandas.core.base as base from pandas.util._decorators import (Appender, cache_readonly, @@ -1035,12 +1036,6 @@ def is_lexsorted(self): """ return self.lexsort_depth == self.nlevels - def is_lexsorted_for_tuple(self, tup): - """ - Return True if we are correctly lexsorted given the passed tuple - """ - return len(tup) <= self.lexsort_depth - @cache_readonly def lexsort_depth(self): if self.sortorder is not None: @@ -2262,12 +2257,12 @@ def get_locs(self, tup): """ # must be lexsorted to at least as many levels - if not self.is_lexsorted_for_tuple(tup): - raise UnsortedIndexError('MultiIndex Slicing requires the index ' - 'to be fully lexsorted tuple len ({0}), ' - 'lexsort depth ({1})' - .format(len(tup), self.lexsort_depth)) - + true_slices = [i for (i, s) in enumerate(is_true_slices(tup)) if s] + if true_slices and true_slices[-1] >= self.lexsort_depth: + raise UnsortedIndexError('MultiIndex slicing requires the index ' + 'to be lexsorted: slicing on levels {0}, ' + 'lexsort depth {1}' + .format(true_slices, self.lexsort_depth)) # indexer # this is the list of all values that we want to select n = len(self) diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index 3f6fd8c8aa827..ef8806246c2c5 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -2826,8 +2826,13 @@ def test_unsortedindex(self): df = pd.DataFrame([[i, 10 * i] for i in lrange(6)], index=mi, columns=['one', 'two']) + # GH 16734: not sorted, but no real slicing + result = df.loc(axis=0)['z', 'a'] + expected = df.iloc[0] + tm.assert_series_equal(result, expected) + with pytest.raises(UnsortedIndexError): - df.loc(axis=0)['z', :] + df.loc(axis=0)['z', slice('a')] df.sort_index(inplace=True) assert len(df.loc(axis=0)['z', :]) == 2 diff --git a/pandas/tests/indexing/test_multiindex.py b/pandas/tests/indexing/test_multiindex.py index fc6c627075c96..c12bb8910ffc9 100644 --- a/pandas/tests/indexing/test_multiindex.py +++ b/pandas/tests/indexing/test_multiindex.py @@ -817,9 +817,13 @@ def f(): assert df.index.lexsort_depth == 0 with tm.assert_raises_regex( UnsortedIndexError, - 'MultiIndex Slicing requires the index to be fully ' - r'lexsorted tuple len \(2\), lexsort depth \(0\)'): - df.loc[(slice(None), df.loc[:, ('a', 'bar')] > 5), :] + 'MultiIndex slicing requires the index to be ' + r'lexsorted: slicing on levels \[1\], lexsort depth 0'): + df.loc[(slice(None), slice('bar')), :] + + # GH 16734: not sorted, but no real slicing + result = df.loc[(slice(None), df.loc[:, ('a', 'bar')] > 5), :] + tm.assert_frame_equal(result, df.iloc[[1, 3], :]) def test_multiindex_slicers_non_unique(self): @@ -1001,9 +1005,14 @@ def test_per_axis_per_level_doc_examples(self): # not sorted def f(): - df.loc['A1', (slice(None), 'foo')] + df.loc['A1', ('a', slice('foo'))] pytest.raises(UnsortedIndexError, f) + + # GH 16734: not sorted, but no real slicing + tm.assert_frame_equal(df.loc['A1', (slice(None), 'foo')], + df.loc['A1'].iloc[:, [0, 2]]) + df = df.sort_index(axis=1) # slicing From 18f7b1ccee1c723cac7f23a16099d08c17ed0a91 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Thu, 22 Jun 2017 09:17:11 -0400 Subject: [PATCH 701/933] DOC: whatsnew typos --- doc/source/whatsnew/v0.21.0.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 046a6c885bd24..9d330cf3fdf2d 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -37,7 +37,7 @@ Other Enhancements - :func:`to_pickle` has gained a ``protocol`` parameter (:issue:`16252`). By default, this parameter is set to `HIGHEST_PROTOCOL `__ - :func:`api.types.infer_dtype` now infers decimals. (:issue:`15690`) - :func:`read_feather` has gained the ``nthreads`` parameter for multi-threaded operations (:issue:`16359`) -- :func:`DataFrame.clip()` and :func: `Series.clip()` have gained an inplace argument. (:issue:`15388`) +- :func:`DataFrame.clip()` and :func:`Series.clip()` have gained an ``inplace`` argument. (:issue:`15388`) - :func:`crosstab` has gained a ``margins_name`` parameter to define the name of the row / column that will contain the totals when ``margins=True``. (:issue:`15972`) .. _whatsnew_0210.api_breaking: @@ -66,7 +66,7 @@ Other API Changes Deprecations ~~~~~~~~~~~~ -- :func:`read_excel()` has deprecated ``sheetname`` in favor of ``sheet_name`` for consistency with to_excel() (:issue:`10559`). +- :func:`read_excel()` has deprecated ``sheetname`` in favor of ``sheet_name`` for consistency with ``.to_excel()`` (:issue:`10559`). .. _whatsnew_0210.prior_deprecations: @@ -74,7 +74,7 @@ Deprecations Removal of prior version deprecations/changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -- ``pd.read_excel()`` has dropped the ``has_index_names`` parameter (:issue:`10967`) +- :func:`read_excel()` has dropped the ``has_index_names`` parameter (:issue:`10967`) - ``Categorical`` has dropped the ``.order()`` and ``.sort()`` methods in favor of ``.sort_values()`` (:issue:`12882`) From 9c9d5fb64de1dda51c6ed4874bcec83dbb9e35ae Mon Sep 17 00:00:00 2001 From: Hussain Tamboli Date: Fri, 23 Jun 2017 19:19:48 +0530 Subject: [PATCH 702/933] Test for #16726. unittest that ensures datetime is understood (#16744) * Test for #16726. unittest that ensures datetime is understood * Corrected the test as suggested by @TomAugspurger * Fixed flake8 errors and warnings --- pandas/tests/series/test_datetime_values.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/pandas/tests/series/test_datetime_values.py b/pandas/tests/series/test_datetime_values.py index e1fc9af0cca89..e810eadd2dee9 100644 --- a/pandas/tests/series/test_datetime_values.py +++ b/pandas/tests/series/test_datetime_values.py @@ -409,3 +409,13 @@ def test_date_tz(self): date(2015, 11, 22)]) assert_series_equal(s.dt.date, expected) assert_series_equal(s.apply(lambda x: x.date()), expected) + + def test_datetime_understood(self): + # Ensures it doesn't fail to create the right series + # reported in issue#16726 + series = pd.Series(pd.date_range("2012-01-01", periods=3)) + offset = pd.offsets.DateOffset(days=6) + result = series - offset + expected = pd.Series(pd.to_datetime([ + '2011-12-26', '2011-12-27', '2011-12-28'])) + tm.assert_series_equal(result, expected) From 1265c27f4bbd06e1bb75f846139a164bdadd5b31 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 23 Jun 2017 22:59:30 +0200 Subject: [PATCH 703/933] DOC: some rst fixes (#16763) --- doc/source/groupby.rst | 4 ++-- pandas/core/generic.py | 2 +- pandas/core/groupby.py | 2 -- pandas/core/series.py | 6 ++++-- pandas/io/parsers.py | 23 ++++++++++++----------- 5 files changed, 19 insertions(+), 18 deletions(-) diff --git a/doc/source/groupby.rst b/doc/source/groupby.rst index 865f1ccae2c04..61f43146aba85 100644 --- a/doc/source/groupby.rst +++ b/doc/source/groupby.rst @@ -1200,14 +1200,14 @@ Regroup columns of a DataFrame according to their sum, and sum the aggregated on df df.groupby(df.sum(), axis=1).sum() -.. _groupby.multicolumn_factorization +.. _groupby.multicolumn_factorization: Multi-column factorization ~~~~~~~~~~~~~~~~~~~~~~~~~~ By using ``.ngroup()``, we can extract information about the groups in a way similar to :func:`factorize` (as described further in the -:ref:`reshaping API `) but which applies +:ref:`reshaping API `) but which applies naturally to multiple columns of mixed type and different sources. This can be useful as an intermediate categorical-like step in processing, when the relationships between the group rows are more diff --git a/pandas/core/generic.py b/pandas/core/generic.py index fdf5d01484b98..7b56c30fcc9f6 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1292,7 +1292,7 @@ def to_hdf(self, path_or_buf, key, **kwargs): As of v0.20.2 these additional compressors for Blosc are supported (default if no compressor specified: 'blosc:blosclz'): {'blosc:blosclz', 'blosc:lz4', 'blosc:lz4hc', 'blosc:snappy', - 'blosc:zlib', 'blosc:zstd'}. + 'blosc:zlib', 'blosc:zstd'}. Specifying a compression library which is not available issues a ValueError. fletcher32 : bool, default False diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 9d6d2297f6ea0..c4b3e25acae7e 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -1212,8 +1212,6 @@ def ohlc(self): lambda x: x._cython_agg_general('ohlc')) @Appender(DataFrame.describe.__doc__) - @Substitution(name='groupby') - @Appender(_doc_template) def describe(self, **kwargs): self._set_group_selection() result = self.apply(lambda x: x.describe(**kwargs)) diff --git a/pandas/core/series.py b/pandas/core/series.py index 129f291e5f843..74d4f3f955f26 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1847,7 +1847,8 @@ def argsort(self, axis=0, kind='quicksort', order=None): dtype='int64').__finalize__(self) def nlargest(self, n=5, keep='first'): - """Return the largest `n` elements. + """ + Return the largest `n` elements. Parameters ---------- @@ -1893,7 +1894,8 @@ def nlargest(self, n=5, keep='first'): return algorithms.SelectNSeries(self, n=n, keep=keep).nlargest() def nsmallest(self, n=5, keep='first'): - """Return the smallest `n` elements. + """ + Return the smallest `n` elements. Parameters ---------- diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 9ec3f79e1ae70..0350849037391 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -152,7 +152,7 @@ Additional strings to recognize as NA/NaN. If dict passed, specific per-column NA values. By default the following values are interpreted as NaN: '""" + fill("', '".join(sorted(_NA_VALUES)), - 70, subsequent_indent=" ") + """'`. + 70, subsequent_indent=" ") + """'. keep_default_na : bool, default True If na_values are specified and keep_default_na is False the default NaN values are overridden, otherwise they're appended to. @@ -181,22 +181,23 @@ Note: A fast-path exists for iso8601-formatted dates. infer_datetime_format : boolean, default False - If True and parse_dates is enabled, pandas will attempt to infer the format - of the datetime strings in the columns, and if it can be inferred, switch - to a faster method of parsing them. In some cases this can increase the - parsing speed by 5-10x. + If True and `parse_dates` is enabled, pandas will attempt to infer the + format of the datetime strings in the columns, and if it can be inferred, + switch to a faster method of parsing them. In some cases this can increase + the parsing speed by 5-10x. keep_date_col : boolean, default False - If True and parse_dates specifies combining multiple columns then + If True and `parse_dates` specifies combining multiple columns then keep the original columns. date_parser : function, default None Function to use for converting a sequence of string columns to an array of datetime instances. The default uses ``dateutil.parser.parser`` to do the - conversion. Pandas will try to call date_parser in three different ways, + conversion. Pandas will try to call `date_parser` in three different ways, advancing to the next if an exception occurs: 1) Pass one or more arrays - (as defined by parse_dates) as arguments; 2) concatenate (row-wise) the - string values from the columns defined by parse_dates into a single array - and pass that; and 3) call date_parser once for each row using one or more - strings (corresponding to the columns defined by parse_dates) as arguments. + (as defined by `parse_dates`) as arguments; 2) concatenate (row-wise) the + string values from the columns defined by `parse_dates` into a single array + and pass that; and 3) call `date_parser` once for each row using one or + more strings (corresponding to the columns defined by `parse_dates`) as + arguments. dayfirst : boolean, default False DD/MM format dates, international and European format iterator : boolean, default False From f6f5ce5f9ce2afc3b6f55a3228b93024b121b88f Mon Sep 17 00:00:00 2001 From: Sam Foo Date: Tue, 27 Jun 2017 03:23:40 -0400 Subject: [PATCH 704/933] DOC: Update Sphinx Deprecated Directive (#16512) --- doc/source/io.rst | 29 +++++++++++---- doc/sphinxext/numpydoc/README.rst | 2 +- pandas/core/categorical.py | 7 ++-- pandas/core/common.py | 7 ++-- pandas/core/generic.py | 16 ++++---- pandas/core/indexes/datetimes.py | 10 +++-- pandas/core/indexes/period.py | 3 +- pandas/core/panel4d.py | 8 ++-- pandas/core/panelnd.py | 8 ++-- pandas/core/series.py | 5 ++- pandas/core/sparse/array.py | 4 +- pandas/core/strings.py | 2 +- pandas/core/window.py | 61 ++++++++++++++++++------------- pandas/io/parsers.py | 17 +++++---- pandas/io/sql.py | 12 ++++-- pandas/io/stata.py | 8 ++-- 16 files changed, 122 insertions(+), 77 deletions(-) diff --git a/doc/source/io.rst b/doc/source/io.rst index e7c1cc13d103d..74a604339cddb 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -137,8 +137,10 @@ usecols : array-like or callable, default ``None`` Using this parameter results in much faster parsing time and lower memory usage. as_recarray : boolean, default ``False`` - DEPRECATED: this argument will be removed in a future version. Please call - ``pd.read_csv(...).to_records()`` instead. + + .. deprecated:: 0.18.2 + + Please call ``pd.read_csv(...).to_records()`` instead. Return a NumPy recarray instead of a DataFrame after parsing the data. If set to ``True``, this option takes precedence over the ``squeeze`` parameter. @@ -191,7 +193,11 @@ skiprows : list-like or integer, default ``None`` skipfooter : int, default ``0`` Number of lines at bottom of file to skip (unsupported with engine='c'). skip_footer : int, default ``0`` - DEPRECATED: use the ``skipfooter`` parameter instead, as they are identical + + .. deprecated:: 0.19.0 + + Use the ``skipfooter`` parameter instead, as they are identical + nrows : int, default ``None`` Number of rows of file to read. Useful for reading pieces of large files. low_memory : boolean, default ``True`` @@ -202,16 +208,25 @@ low_memory : boolean, default ``True`` use the ``chunksize`` or ``iterator`` parameter to return the data in chunks. (Only valid with C parser) buffer_lines : int, default None - DEPRECATED: this argument will be removed in a future version because its - value is not respected by the parser + + .. deprecated:: 0.19.0 + + Argument removed because its value is not respected by the parser + compact_ints : boolean, default False - DEPRECATED: this argument will be removed in a future version + + .. deprecated:: 0.19.0 + + Argument moved to ``pd.to_numeric`` If ``compact_ints`` is ``True``, then for any column that is of integer dtype, the parser will attempt to cast it as the smallest integer ``dtype`` possible, either signed or unsigned depending on the specification from the ``use_unsigned`` parameter. use_unsigned : boolean, default False - DEPRECATED: this argument will be removed in a future version + + .. deprecated:: 0.18.2 + + Argument moved to ``pd.to_numeric`` If integer columns are being compacted (i.e. ``compact_ints=True``), specify whether the column should be compacted to the smallest signed or unsigned integer dtype. diff --git a/doc/sphinxext/numpydoc/README.rst b/doc/sphinxext/numpydoc/README.rst index 89b9f2fd23e9b..f91811ef9add6 100755 --- a/doc/sphinxext/numpydoc/README.rst +++ b/doc/sphinxext/numpydoc/README.rst @@ -46,6 +46,6 @@ The following options can be set in conf.py: methods and attributes. If a table of contents is made, Sphinx expects each entry to have a separate page. -- numpydoc_edit_link: bool (DEPRECATED -- edit your HTML template instead) +- numpydoc_edit_link: bool (DEPRECATED -- edit your HTML template instead) Whether to insert an edit link after docstrings. diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index ce70fb94b6cd4..796b2696af9ce 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -398,8 +398,8 @@ def itemsize(self): def reshape(self, new_shape, *args, **kwargs): """ - DEPRECATED: calling this method will raise an error in a - future release. + .. deprecated:: 0.19.0 + Calling this method will raise an error in a future release. An ndarray-compatible method that returns `self` because `Categorical` instances cannot actually be reshaped. @@ -430,7 +430,8 @@ def base(self): @classmethod def from_array(cls, data, **kwargs): """ - DEPRECATED: Use ``Categorical`` instead. + .. deprecated:: 0.19.0 + Use ``Categorical`` instead. Make a Categorical type from a single array-like object. diff --git a/pandas/core/common.py b/pandas/core/common.py index ed768a5743666..3b09e68c6433a 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -555,7 +555,8 @@ def in_qtconsole(): """ check if we're inside an IPython qtconsole - DEPRECATED: This is no longer needed, or working, in IPython 3 and above. + .. deprecated:: 0.14.1 + This is no longer needed, or working, in IPython 3 and above. """ try: ip = get_ipython() # noqa @@ -573,8 +574,8 @@ def in_ipnb(): """ check if we're inside an IPython Notebook - DEPRECATED: This is no longer used in pandas, and won't work in IPython 3 - and above. + .. deprecated:: 0.14.1 + This is no longer needed, or working, in IPython 3 and above. """ try: ip = get_ipython() # noqa diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 7b56c30fcc9f6..6069757efc429 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1338,9 +1338,9 @@ def to_sql(self, name, con, flavor=None, schema=None, if_exists='fail', Using SQLAlchemy makes it possible to use any DB supported by that library. If a DBAPI2 object, only sqlite3 is supported. flavor : 'sqlite', default None - DEPRECATED: this parameter will be removed in a future version, - as 'sqlite' is the only supported option if SQLAlchemy is not - installed. + .. deprecated:: 0.19.0 + 'sqlite' is the only supported option if SQLAlchemy is not + used. schema : string, default None Specify the schema (if database flavor supports this). If None, use default schema. @@ -3498,7 +3498,9 @@ def astype(self, dtype, copy=True, errors='raise', **kwargs): .. versionadded:: 0.20.0 - raise_on_error : DEPRECATED use ``errors`` instead + raise_on_error : raise on invalid input + .. deprecated:: 0.20.0 + Use ``errors`` instead kwargs : keyword arguments to pass on to the constructor Returns @@ -3602,7 +3604,6 @@ def convert_objects(self, convert_dates=True, convert_numeric=False, convert_timedeltas=True, copy=True): """ Deprecated. - Attempt to infer better dtype for object columns Parameters @@ -5809,8 +5810,9 @@ def tz_localize(self, tz, axis=0, level=None, copy=True, - 'NaT' will return NaT where there are ambiguous times - 'raise' will raise an AmbiguousTimeError if there are ambiguous times - infer_dst : boolean, default False (DEPRECATED) - Attempt to infer fall dst-transition hours based on order + infer_dst : boolean, default False + .. deprecated:: 0.15.0 + Attempt to infer fall dst-transition hours based on order Returns ------- diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 239894cff3874..d8aae2367976b 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -197,8 +197,9 @@ class DatetimeIndex(DatelikeOps, TimelikeOps, DatetimeIndexOpsMixin, times) - 'NaT' will return NaT where there are ambiguous times - 'raise' will raise an AmbiguousTimeError if there are ambiguous times - infer_dst : boolean, default False (DEPRECATED) - Attempt to infer fall dst-transition hours based on order + infer_dst : boolean, default False + .. deprecated:: 0.15.0 + Attempt to infer fall dst-transition hours based on order name : object Name to be stored in the index @@ -1818,8 +1819,9 @@ def tz_localize(self, tz, ambiguous='raise', errors='raise'): .. versionadded:: 0.19.0 - infer_dst : boolean, default False (DEPRECATED) - Attempt to infer fall dst-transition hours based on order + infer_dst : boolean, default False + .. deprecated:: 0.15.0 + Attempt to infer fall dst-transition hours based on order Returns ------- diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index f8af6c8303d99..9d1a49e13c804 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -558,7 +558,8 @@ def asfreq(self, freq=None, how='E'): def to_datetime(self, dayfirst=False): """ - DEPRECATED: use :meth:`to_timestamp` instead. + .. deprecated:: 0.19.0 + Use :meth:`to_timestamp` instead. Cast to DatetimeIndex. """ diff --git a/pandas/core/panel4d.py b/pandas/core/panel4d.py index f32de29c5c167..16e7d0dfcc336 100644 --- a/pandas/core/panel4d.py +++ b/pandas/core/panel4d.py @@ -19,10 +19,10 @@ having 4 named dimensions. It is intended as a test bed for more N-Dimensional named containers. - DEPRECATED. Panel4D is deprecated and will be removed in a future version. - The recommended way to represent these types of n-dimensional data are with - the `xarray package `__. - Pandas provides a `.to_xarray()` method to automate this conversion. + .. deprecated:: 0.19.0 + The recommended way to represent these types of n-dimensional data + are with the `xarray package `__. + Pandas provides a `.to_xarray()` method to automate this conversion. Parameters ---------- diff --git a/pandas/core/panelnd.py b/pandas/core/panelnd.py index 26ceeea654e4e..7a5cb63cd4f07 100644 --- a/pandas/core/panelnd.py +++ b/pandas/core/panelnd.py @@ -9,10 +9,10 @@ def create_nd_panel_factory(klass_name, orders, slices, slicer, aliases=None, stat_axis=2, info_axis=0, ns=None): """ manufacture a n-d class: - DEPRECATED. Panelnd is deprecated and will be removed in a future version. - The recommended way to represent these types of n-dimensional data are with - the `xarray package `__. - Pandas provides a `.to_xarray()` method to automate this conversion. + .. deprecated:: 0.19.0 + The recommended way to represent these types of n-dimensional data + are with the `xarray package `__. + Pandas provides a `.to_xarray()` method to automate this conversion. Parameters ---------- diff --git a/pandas/core/series.py b/pandas/core/series.py index 74d4f3f955f26..e1f668dd3afda 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -850,8 +850,9 @@ def repeat(self, repeats, *args, **kwargs): def reshape(self, *args, **kwargs): """ - DEPRECATED: calling this method will raise an error in a - future release. Please call ``.values.reshape(...)`` instead. + .. deprecated:: 0.19.0 + Calling this method will raise an error. Please call + ``.values.reshape(...)`` instead. return an ndarray with the values shape if the specified shape matches exactly the current shape, then diff --git a/pandas/core/sparse/array.py b/pandas/core/sparse/array.py index c75de01b98e4e..5c1cf8c773501 100644 --- a/pandas/core/sparse/array.py +++ b/pandas/core/sparse/array.py @@ -391,8 +391,8 @@ def to_dense(self, fill=None): Parameters ---------- fill: float, default None - DEPRECATED: this argument will be removed in a future version - because it is not respected by this function. + .. deprecated:: 0.20.0 + This argument is not respected by this function. Returns ------- diff --git a/pandas/core/strings.py b/pandas/core/strings.py index 4814e78ee6546..cd7e313b13f1e 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -478,7 +478,7 @@ def str_match(arr, pat, case=True, flags=0, na=np.nan, as_indexer=None): flags : int, default 0 (no flags) re module flags, e.g. re.IGNORECASE na : default NaN, fill value for missing values. - as_indexer : DEPRECATED + as_indexer : DEPRECATED - Keyword is ignored. Returns ------- diff --git a/pandas/core/window.py b/pandas/core/window.py index ba7e79944ab0e..01b1bdc3e5054 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -342,21 +342,23 @@ def aggregate(self, arg, *args, **kwargs): Parameters ---------- - how : string, default None (DEPRECATED) - Method for down- or re-sampling""") + how : string, default None + .. deprecated:: 0.18.0 + Method for down- or re-sampling""") _shared_docs['mean'] = dedent(""" %(name)s mean Parameters ---------- - how : string, default None (DEPRECATED) - Method for down- or re-sampling""") + how : string, default None + .. deprecated:: 0.18.0 + Method for down- or re-sampling""") class Window(_Window): """ - Provides rolling window calculcations. + Provides rolling window calculations. .. versionadded:: 0.18.0 @@ -374,9 +376,10 @@ class Window(_Window): Minimum number of observations in window required to have a value (otherwise result is NA). For a window that is specified by an offset, this will default to 1. - freq : string or DateOffset object, optional (default None) (DEPRECATED) - Frequency to conform the data to before computing the statistic. - Specified as a frequency string or DateOffset object. + freq : string or DateOffset object, optional (default None) + .. deprecated:: 0.18.0 + Frequency to conform the data to before computing the statistic. + Specified as a frequency string or DateOffset object. center : boolean, default False Set the labels at the center of the window. win_type : string, default None @@ -571,8 +574,9 @@ def _apply_window(self, mean=True, how=None, **kwargs): ---------- mean : boolean, default True If True computes weighted mean, else weighted sum - how : string, default to None (DEPRECATED) - how to resample + how : string, default to None + .. deprecated:: 0.18.0 + how to resample Returns ------- @@ -736,8 +740,9 @@ def _apply(self, func, name=None, window=None, center=None, window : int/array, default to _get_window() center : boolean, default to self.center check_minp : function, default to _use_window - how : string, default to None (DEPRECATED) - how to resample + how : string, default to None + .. deprecated:: 0.18.0 + how to resample Returns ------- @@ -864,8 +869,9 @@ def sum(self, *args, **kwargs): Parameters ---------- - how : string, default 'max' (DEPRECATED) - Method for down- or re-sampling""") + how : string, default 'max' + .. deprecated:: 0.18.0 + Method for down- or re-sampling""") def max(self, how=None, *args, **kwargs): nv.validate_window_func('max', args, kwargs) @@ -878,8 +884,9 @@ def max(self, how=None, *args, **kwargs): Parameters ---------- - how : string, default 'min' (DEPRECATED) - Method for down- or re-sampling""") + how : string, default 'min' + .. deprecated:: 0.18.0 + Method for down- or re-sampling""") def min(self, how=None, *args, **kwargs): nv.validate_window_func('min', args, kwargs) @@ -896,8 +903,9 @@ def mean(self, *args, **kwargs): Parameters ---------- - how : string, default 'median' (DEPRECATED) - Method for down- or re-sampling""") + how : string, default 'median' + .. deprecated:: 0.18.0 + Method for down- or re-sampling""") def median(self, how=None, **kwargs): if self.freq is not None and how is None: @@ -1329,9 +1337,10 @@ class Expanding(_Rolling_and_Expanding): min_periods : int, default None Minimum number of observations in window required to have a value (otherwise result is NA). - freq : string or DateOffset object, optional (default None) (DEPRECATED) - Frequency to conform the data to before computing the statistic. - Specified as a frequency string or DateOffset object. + freq : string or DateOffset object, optional (default None) + .. deprecated:: 0.18.0 + Frequency to conform the data to before computing the statistic. + Specified as a frequency string or DateOffset object. center : boolean, default False Set the labels at the center of the window. axis : int or string, default 0 @@ -1593,8 +1602,9 @@ class EWM(_Rolling): min_periods : int, default 0 Minimum number of observations in window required to have a value (otherwise result is NA). - freq : None or string alias / date offset object, default=None (DEPRECATED) - Frequency to conform to before computing statistic + freq : None or string alias / date offset object, default=None + .. deprecated:: 0.18.0 + Frequency to conform to before computing statistic adjust : boolean, default True Divide by decaying adjustment factor in beginning periods to account for imbalance in relative weightings (viewing EWMA as a moving average) @@ -1727,8 +1737,9 @@ def _apply(self, func, how=None, **kwargs): Parameters ---------- func : string/callable to apply - how : string, default to None (DEPRECATED) - how to resample + how : string, default to None + .. deprecated:: 0.18.0 + how to resample Returns ------- diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 0350849037391..343bc7a74fde8 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -105,8 +105,8 @@ ['AAA', 'BBB', 'DDD']``. Using this parameter results in much faster parsing time and lower memory usage. as_recarray : boolean, default False - DEPRECATED: this argument will be removed in a future version. Please call - `pd.read_csv(...).to_records()` instead. + .. deprecated:: 0.19.0 + Please call `pd.read_csv(...).to_records()` instead. Return a NumPy recarray instead of a DataFrame after parsing the data. If set to True, this option takes precedence over the `squeeze` parameter. @@ -145,7 +145,8 @@ skipfooter : int, default 0 Number of lines at bottom of file to skip (Unsupported with engine='c') skip_footer : int, default 0 - DEPRECATED: use the `skipfooter` parameter instead, as they are identical + .. deprecated:: 0.19.0 + Use the `skipfooter` parameter instead, as they are identical nrows : int, default None Number of rows of file to read. Useful for reading pieces of large files na_values : scalar, str, list-like, or dict, default None @@ -277,17 +278,19 @@ use the `chunksize` or `iterator` parameter to return the data in chunks. (Only valid with C parser) buffer_lines : int, default None - DEPRECATED: this argument will be removed in a future version because its - value is not respected by the parser + .. deprecated:: 0.19.0 + This argument is not respected by the parser compact_ints : boolean, default False - DEPRECATED: this argument will be removed in a future version + .. deprecated:: 0.19.0 + Argument moved to ``pd.to_numeric`` If compact_ints is True, then for any column that is of integer dtype, the parser will attempt to cast it as the smallest integer dtype possible, either signed or unsigned depending on the specification from the `use_unsigned` parameter. use_unsigned : boolean, default False - DEPRECATED: this argument will be removed in a future version + .. deprecated:: 0.19.0 + Argument moved to ``pd.to_numeric`` If integer columns are being compacted (i.e. `compact_ints=True`), specify whether the column should be compacted to the smallest signed or unsigned diff --git a/pandas/io/sql.py b/pandas/io/sql.py index ee992c6dd3439..0dbef66616e43 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -432,7 +432,9 @@ def to_sql(frame, name, con, flavor=None, schema=None, if_exists='fail', library. If a DBAPI2 object, only sqlite3 is supported. flavor : 'sqlite', default None - DEPRECATED: this parameter will be removed in a future version + .. deprecated:: 0.19.0 + 'sqlite' is the only supported option if SQLAlchemy is not + used. schema : string, default None Name of SQL schema in database to write to (if database flavor supports this). If None, use default schema (default). @@ -484,7 +486,9 @@ def has_table(table_name, con, flavor=None, schema=None): library. If a DBAPI2 object, only sqlite3 is supported. flavor : 'sqlite', default None - DEPRECATED: this parameter will be removed in a future version + .. deprecated:: 0.19.0 + 'sqlite' is the only supported option if SQLAlchemy is not + installed. schema : string, default None Name of SQL schema in database to write to (if database flavor supports this). If None, use default schema (default). @@ -1545,7 +1549,9 @@ def get_schema(frame, name, flavor=None, keys=None, con=None, dtype=None): library, default: None If a DBAPI2 object, only sqlite3 is supported. flavor : 'sqlite', default None - DEPRECATED: this parameter will be removed in a future version + .. deprecated:: 0.19.0 + 'sqlite' is the only supported option if SQLAlchemy is not + installed. dtype : dict of column name to SQL type, default None Optional specifying the datatype for columns. The SQL type should be a SQLAlchemy type, or a string for sqlite3 fallback connection. diff --git a/pandas/io/stata.py b/pandas/io/stata.py index e03e87f09173e..107dccfc8175c 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -110,9 +110,11 @@ _statafile_processing_params2, _chunksize_params, _iterator_params) -_data_method_doc = """Reads observations from Stata file, converting them into a dataframe +_data_method_doc = """\ +Reads observations from Stata file, converting them into a dataframe -This is a legacy method. Use `read` in new code. + .. deprecated:: + This is a legacy method. Use `read` in new code. Parameters ---------- @@ -1407,7 +1409,7 @@ def _read_strls(self): self.GSO[str(v_o)] = va # legacy - @Appender('DEPRECATED: ' + _data_method_doc) + @Appender(_data_method_doc) def data(self, **kwargs): import warnings From aade74a13bb8105328a0997eeb910a96080fac04 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Tue, 27 Jun 2017 08:22:09 -0700 Subject: [PATCH 705/933] MAINT: Drop Index.sym_diff (#16760) Deprecated in 0.18.1 xref gh-12591, gh-12594 --- doc/source/whatsnew/v0.21.0.txt | 1 + pandas/core/indexes/base.py | 6 ++---- pandas/tests/indexes/common.py | 6 +----- 3 files changed, 4 insertions(+), 9 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 9d330cf3fdf2d..74763dbc1c71a 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -75,6 +75,7 @@ Removal of prior version deprecations/changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - :func:`read_excel()` has dropped the ``has_index_names`` parameter (:issue:`10967`) +- ``Index`` has dropped the ``.sym_diff()`` method in favor of ``.symmetric_difference()`` (:issue:`12591`) - ``Categorical`` has dropped the ``.order()`` and ``.sort()`` methods in favor of ``.sort_values()`` (:issue:`12882`) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 028464ad5cd89..695f9f119baa2 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -41,8 +41,8 @@ from pandas.core.base import PandasObject, IndexOpsMixin import pandas.core.base as base -from pandas.util._decorators import (Appender, Substitution, cache_readonly, - deprecate, deprecate_kwarg) +from pandas.util._decorators import (Appender, Substitution, + cache_readonly, deprecate_kwarg) from pandas.core.indexes.frozen import FrozenList import pandas.core.common as com import pandas.core.dtypes.concat as _concat @@ -2376,8 +2376,6 @@ def symmetric_difference(self, other, result_name=None): attribs['freq'] = None return self._shallow_copy_with_infer(the_diff, **attribs) - sym_diff = deprecate('sym_diff', symmetric_difference) - def _get_unique_index(self, dropna=False): """ Returns an index containing unique values. diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index bbde902fb87bf..a6177104d6273 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -656,11 +656,7 @@ def test_symmetric_difference(self): if isinstance(idx, MultiIndex): msg = "other must be a MultiIndex or a list of tuples" with tm.assert_raises_regex(TypeError, msg): - result = first.symmetric_difference([1, 2, 3]) - - # 12591 deprecated - with tm.assert_produces_warning(FutureWarning): - first.sym_diff(second) + first.symmetric_difference([1, 2, 3]) def test_insert_base(self): From 65a0e642e1270791e6586c967758b362b865b6a8 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Tue, 27 Jun 2017 08:23:44 -0700 Subject: [PATCH 706/933] MAINT: Drop pd.options.display.mpl_style (#16761) Deprecated in 0.18.0 xref gh-12190 --- doc/source/whatsnew/v0.21.0.txt | 1 + pandas/core/config_init.py | 32 ----------------------------- pandas/tests/plotting/test_frame.py | 16 --------------- 3 files changed, 1 insertion(+), 48 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 74763dbc1c71a..de2516d75040b 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -75,6 +75,7 @@ Removal of prior version deprecations/changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - :func:`read_excel()` has dropped the ``has_index_names`` parameter (:issue:`10967`) +- The ``pd.options.display.mpl_style`` configuration has been dropped (:issue:`12190`) - ``Index`` has dropped the ``.sym_diff()`` method in favor of ``.symmetric_difference()`` (:issue:`12591`) - ``Categorical`` has dropped the ``.order()`` and ``.sort()`` methods in favor of ``.sort_values()`` (:issue:`12882`) diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py index 7e6ffaaffb72b..e70db1d13e376 100644 --- a/pandas/core/config_init.py +++ b/pandas/core/config_init.py @@ -9,8 +9,6 @@ module is imported, register them here rather then in the module. """ -import warnings - import pandas.core.config as cf from pandas.core.config import (is_int, is_bool, is_text, is_instance_factory, is_one_of_factory, get_default_val, @@ -313,33 +311,6 @@ def use_numexpr_cb(key): style_backup = dict() -def mpl_style_cb(key): - warnings.warn(pc_mpl_style_deprecation_warning, FutureWarning, - stacklevel=5) - - import sys - from pandas.plotting._style import mpl_stylesheet - global style_backup - - val = cf.get_option(key) - - if 'matplotlib' not in sys.modules.keys(): - if not val: # starting up, we get reset to None - return val - raise Exception("matplotlib has not been imported. aborting") - - import matplotlib.pyplot as plt - - if val == 'default': - style_backup = dict([(k, plt.rcParams[k]) for k in mpl_stylesheet]) - plt.rcParams.update(mpl_stylesheet) - elif not val: - if style_backup: - plt.rcParams.update(style_backup) - - return val - - def table_schema_cb(key): from pandas.io.formats.printing import _enable_data_resource_formatter _enable_data_resource_formatter(cf.get_option(key)) @@ -382,9 +353,6 @@ def table_schema_cb(key): validator=is_one_of_factory([True, False, 'truncate'])) cf.register_option('chop_threshold', None, pc_chop_threshold_doc) cf.register_option('max_seq_items', 100, pc_max_seq_items) - cf.register_option('mpl_style', None, pc_mpl_style_doc, - validator=is_one_of_factory([None, False, 'default']), - cb=mpl_style_cb) cf.register_option('height', 60, pc_height_doc, validator=is_instance_factory([type(None), int])) cf.register_option('width', 80, pc_width_doc, diff --git a/pandas/tests/plotting/test_frame.py b/pandas/tests/plotting/test_frame.py index fc9ef132b2754..352c03582db93 100644 --- a/pandas/tests/plotting/test_frame.py +++ b/pandas/tests/plotting/test_frame.py @@ -17,8 +17,6 @@ import pandas.util.testing as tm from pandas.util.testing import slow -from pandas.core.config import set_option - import numpy as np from numpy.random import rand, randn @@ -2682,20 +2680,6 @@ def test_df_grid_settings(self): DataFrame({'a': [1, 2, 3], 'b': [2, 3, 4]}), plotting._core._dataframe_kinds, kws={'x': 'a', 'y': 'b'}) - def test_option_mpl_style(self): - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): - set_option('display.mpl_style', 'default') - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): - set_option('display.mpl_style', None) - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): - set_option('display.mpl_style', False) - - with pytest.raises(ValueError): - set_option('display.mpl_style', 'default2') - def test_invalid_colormap(self): df = DataFrame(randn(3, 2), columns=['A', 'B']) From 18c4f88526712c5ab97253030e1b0ad9c555c9ef Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 28 Jun 2017 07:09:04 +0200 Subject: [PATCH 707/933] DOC: remove section on Panel4D support in HDF io (#16783) --- doc/source/io.rst | 28 ---------------------------- 1 file changed, 28 deletions(-) diff --git a/doc/source/io.rst b/doc/source/io.rst index 74a604339cddb..e1e82f686f182 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -4474,34 +4474,6 @@ Performance `Here `__ for more information and some solutions. -Experimental -'''''''''''' - -HDFStore supports ``Panel4D`` storage. - -.. ipython:: python - :okwarning: - - wp = pd.Panel(randn(2, 5, 4), items=['Item1', 'Item2'], - major_axis=pd.date_range('1/1/2000', periods=5), - minor_axis=['A', 'B', 'C', 'D']) - p4d = pd.Panel4D({ 'l1' : wp }) - p4d - store.append('p4d', p4d) - store - -These, by default, index the three axes ``items, major_axis, -minor_axis``. On an ``AppendableTable`` it is possible to setup with the -first append a different indexing scheme, depending on how you want to -store your data. Pass the ``axes`` keyword with a list of dimensions -(currently must by exactly 1 less than the total dimensions of the -object). This cannot be changed after table creation. - -.. ipython:: python - :okwarning: - - store.append('p4d2', p4d, axes=['labels', 'major_axis', 'minor_axis']) - store.select('p4d2', where='labels=l1 and items=Item1 and minor_axis=A') .. ipython:: python :suppress: From 664348c440ccb4dab3d4c420d6aaee7c688c9b0e Mon Sep 17 00:00:00 2001 From: topper-123 Date: Wed, 28 Jun 2017 06:29:18 +0100 Subject: [PATCH 708/933] DOC: add section on data validation and library engarde (#16758) --- doc/source/ecosystem.rst | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/doc/source/ecosystem.rst b/doc/source/ecosystem.rst index 31849fc142aea..2348a3d10c54f 100644 --- a/doc/source/ecosystem.rst +++ b/doc/source/ecosystem.rst @@ -239,3 +239,14 @@ pandas own ``read_csv`` for CSV IO and leverages many existing packages such as PyTables, h5py, and pymongo to move data between non pandas formats. Its graph based approach is also extensible by end users for custom formats that may be too specific for the core of odo. + +.. _ecosystem.data_validation: + +Data validation +--------------- + +`Engarde `__ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Engarde is a lightweight library used to explicitly state your assumptions abour your datasets +and check that they're *actually* true. From 85740a5b6285e529931aab7a205468e57d023b7f Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 30 Jun 2017 11:53:12 -0500 Subject: [PATCH 709/933] TST: register slow marker (#16797) * TST: register slow marker * Update setup.cfg --- doc/source/whatsnew/v0.20.3.txt | 2 +- setup.cfg | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v0.20.3.txt b/doc/source/whatsnew/v0.20.3.txt index c730142450ea6..17195715d0f95 100644 --- a/doc/source/whatsnew/v0.20.3.txt +++ b/doc/source/whatsnew/v0.20.3.txt @@ -37,7 +37,7 @@ Performance Improvements Bug Fixes ~~~~~~~~~ - Fixed issue with dataframe scatter plot for categorical data that reports incorrect column key not found when categorical data is used for plotting (:issue:`16199`) - +- Fixed a pytest marker failing downstream packages' tests suites (:issue:`16680`) diff --git a/setup.cfg b/setup.cfg index 8b32f0f62fe28..1f9bea6718a4d 100644 --- a/setup.cfg +++ b/setup.cfg @@ -22,8 +22,7 @@ split_penalty_after_opening_bracket = 1000000 split_penalty_logical_operator = 30 [tool:pytest] -# TODO: Change all yield-based (nose-style) fixutures to pytest fixtures -# Silencing the warning until then testpaths = pandas markers = single: mark a test as single cpu only + slow: mark a test as slow From 5b88d2f4c721299b44802b21f03fe08f28576fc7 Mon Sep 17 00:00:00 2001 From: Peter Quackenbush Date: Fri, 30 Jun 2017 13:50:51 -0500 Subject: [PATCH 710/933] =?UTF-8?q?BUG:=20Load=20data=20from=20a=20Categor?= =?UTF-8?q?icalIndex=20for=20dtype=20comparison,=20closes=20#=E2=80=A6=20(?= =?UTF-8?q?#16738)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * BUG: Load data from a CategoricalIndex for dtype comparison, closes #16627 * Enable is_dtype_equal on CategoricalIndex, fixed some doc typos, added ordered CategoricalIndex test * Flake8 windows suggestion * Fixed some documentation/formatting issues, clarified the purpose of the test case. --- .github/PULL_REQUEST_TEMPLATE.md | 2 +- doc/source/contributing.rst | 6 +++++ doc/source/whatsnew/v0.20.3.txt | 1 + pandas/core/indexes/category.py | 3 +++ pandas/tests/test_join.py | 44 ++++++++++++++++++++++++++++++-- 5 files changed, 53 insertions(+), 3 deletions(-) diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 9281c51059087..959858fb50f89 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -1,4 +1,4 @@ - [ ] closes #xxxx - [ ] tests added / passed - - [ ] passes ``git diff upstream/master --name-only -- '*.py' | flake8 --diff`` + - [ ] passes ``git diff upstream/master --name-only -- '*.py' | flake8 --diff`` (On Windows, ``git diff upstream/master -u -- "*.py" | flake8 --diff`` might work as an alternative.) - [ ] whatsnew entry diff --git a/doc/source/contributing.rst b/doc/source/contributing.rst index aacfe25b91564..cd444f796fabb 100644 --- a/doc/source/contributing.rst +++ b/doc/source/contributing.rst @@ -525,6 +525,12 @@ run this slightly modified command:: git diff master --name-only -- '*.py' | grep 'pandas/' | xargs flake8 +Note that on Windows, ``grep``, ``xargs``, and other tools are likely +unavailable. However, this has been shown to work on smaller commits in the +standard Windows command line:: + + git diff master -u -- "*.py" | flake8 --diff + Backwards Compatibility ~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/doc/source/whatsnew/v0.20.3.txt b/doc/source/whatsnew/v0.20.3.txt index 17195715d0f95..f8916f5464276 100644 --- a/doc/source/whatsnew/v0.20.3.txt +++ b/doc/source/whatsnew/v0.20.3.txt @@ -78,6 +78,7 @@ Sparse Reshaping ^^^^^^^^^ +- Bug in joining on a ``MultiIndex`` with a ``category`` dtype for a level (:issue:`16627`). Numeric diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index d9e0c218bfafc..d13636e8b43e2 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -560,6 +560,9 @@ def take(self, indices, axis=0, allow_fill=True, na_value=-1) return self._create_from_codes(taken) + def is_dtype_equal(self, other): + return self._data.is_dtype_equal(other) + take_nd = take def map(self, mapper): diff --git a/pandas/tests/test_join.py b/pandas/tests/test_join.py index 3fc13d23b53f7..cde1cab37d09c 100644 --- a/pandas/tests/test_join.py +++ b/pandas/tests/test_join.py @@ -1,11 +1,11 @@ # -*- coding: utf-8 -*- import numpy as np -from pandas import Index +from pandas import Index, DataFrame, Categorical, merge from pandas._libs import join as _join import pandas.util.testing as tm -from pandas.util.testing import assert_almost_equal +from pandas.util.testing import assert_almost_equal, assert_frame_equal class TestIndexer(object): @@ -192,3 +192,43 @@ def test_inner_join_indexer2(): exp_ridx = np.array([0, 1, 2, 3], dtype=np.int64) assert_almost_equal(ridx, exp_ridx) + + +def test_merge_join_categorical_multiindex(): + # From issue 16627 + a = {'Cat1': Categorical(['a', 'b', 'a', 'c', 'a', 'b'], + ['a', 'b', 'c']), + 'Int1': [0, 1, 0, 1, 0, 0]} + a = DataFrame(a) + + b = {'Cat': Categorical(['a', 'b', 'c', 'a', 'b', 'c'], + ['a', 'b', 'c']), + 'Int': [0, 0, 0, 1, 1, 1], + 'Factor': [1.1, 1.2, 1.3, 1.4, 1.5, 1.6]} + b = DataFrame(b).set_index(['Cat', 'Int'])['Factor'] + + expected = merge(a, b.reset_index(), left_on=['Cat1', 'Int1'], + right_on=['Cat', 'Int'], how='left') + result = a.join(b, on=['Cat1', 'Int1']) + expected = expected.drop(['Cat', 'Int'], axis=1) + assert_frame_equal(expected, result) + + # Same test, but with ordered categorical + a = {'Cat1': Categorical(['a', 'b', 'a', 'c', 'a', 'b'], + ['b', 'a', 'c'], + ordered=True), + 'Int1': [0, 1, 0, 1, 0, 0]} + a = DataFrame(a) + + b = {'Cat': Categorical(['a', 'b', 'c', 'a', 'b', 'c'], + ['b', 'a', 'c'], + ordered=True), + 'Int': [0, 0, 0, 1, 1, 1], + 'Factor': [1.1, 1.2, 1.3, 1.4, 1.5, 1.6]} + b = DataFrame(b).set_index(['Cat', 'Int'])['Factor'] + + expected = merge(a, b.reset_index(), left_on=['Cat1', 'Int1'], + right_on=['Cat', 'Int'], how='left') + result = a.join(b, on=['Cat1', 'Int1']) + expected = expected.drop(['Cat', 'Int'], axis=1) + assert_frame_equal(expected, result) From 5e776fb6cf4e7b3ae0f36b480e1f4e5da154b313 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Fri, 30 Jun 2017 22:49:12 +0300 Subject: [PATCH 711/933] Bug in pd.merge() when merge/join with multiple categorical columns (#16786) closes #16767 --- doc/source/whatsnew/v0.20.3.txt | 5 +++-- pandas/core/reshape/merge.py | 9 +++++---- pandas/tests/reshape/test_merge.py | 23 +++++++++++++++++++++++ 3 files changed, 31 insertions(+), 6 deletions(-) diff --git a/doc/source/whatsnew/v0.20.3.txt b/doc/source/whatsnew/v0.20.3.txt index f8916f5464276..0fac6367fd3a5 100644 --- a/doc/source/whatsnew/v0.20.3.txt +++ b/doc/source/whatsnew/v0.20.3.txt @@ -55,8 +55,8 @@ Indexing I/O ^^^ -- Bug in :func:`read_csv`` in which files weren't opened as binary files by the C engine on Windows, causing EOF characters mid-field, which would fail (:issue:`16039`, :issue:`16559`, :issue:`16675`) -- Bug in :func:`read_hdf`` in which reading a ``Series`` saved to an HDF file in 'fixed' format fails when an explicit ``mode='r'`` argument is supplied (:issue:`16583`) +- Bug in :func:`read_csv` in which files weren't opened as binary files by the C engine on Windows, causing EOF characters mid-field, which would fail (:issue:`16039`, :issue:`16559`, :issue:`16675`) +- Bug in :func:`read_hdf` in which reading a ``Series`` saved to an HDF file in 'fixed' format fails when an explicit ``mode='r'`` argument is supplied (:issue:`16583`) Plotting ^^^^^^^^ @@ -79,6 +79,7 @@ Reshaping ^^^^^^^^^ - Bug in joining on a ``MultiIndex`` with a ``category`` dtype for a level (:issue:`16627`). +- Bug in :func:`merge` when merging/joining with multiple categorical columns (:issue:`16767`) Numeric diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index ffe0cac33ec8f..beebe06e7477e 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -1440,13 +1440,14 @@ def _factorize_keys(lk, rk, sort=True): lk = lk.values rk = rk.values - # if we exactly match in categories, allow us to use codes + # if we exactly match in categories, allow us to factorize on codes if (is_categorical_dtype(lk) and is_categorical_dtype(rk) and lk.is_dtype_equal(rk)): - return lk.codes, rk.codes, len(lk.categories) - - if is_int_or_datetime_dtype(lk) and is_int_or_datetime_dtype(rk): + klass = libhashtable.Int64Factorizer + lk = _ensure_int64(lk.codes) + rk = _ensure_int64(rk.codes) + elif is_int_or_datetime_dtype(lk) and is_int_or_datetime_dtype(rk): klass = libhashtable.Int64Factorizer lk = _ensure_int64(com._values_from_object(lk)) rk = _ensure_int64(com._values_from_object(rk)) diff --git a/pandas/tests/reshape/test_merge.py b/pandas/tests/reshape/test_merge.py index bacb605199e4a..4ac376a9752cb 100644 --- a/pandas/tests/reshape/test_merge.py +++ b/pandas/tests/reshape/test_merge.py @@ -1480,6 +1480,29 @@ def test_dtype_on_merged_different(self, change, how, left, right): index=['X', 'Y', 'Z']) assert_series_equal(result, expected) + def test_self_join_multiple_categories(self): + # GH 16767 + # non-duplicates should work with multiple categories + m = 5 + df = pd.DataFrame({ + 'a': ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j'] * m, + 'b': ['t', 'w', 'x', 'y', 'z'] * 2 * m, + 'c': [letter + for each in ['m', 'n', 'u', 'p', 'o'] + for letter in [each] * 2 * m], + 'd': [letter + for each in ['aa', 'bb', 'cc', 'dd', 'ee', + 'ff', 'gg', 'hh', 'ii', 'jj'] + for letter in [each] * m]}) + + # change them all to categorical variables + df = df.apply(lambda x: x.astype('category')) + + # self-join should equal ourselves + result = pd.merge(df, df, on=list(df.columns)) + + assert_frame_equal(result, df) + @pytest.fixture def left_df(): From 794e06032a34c62a2f8757e7d92820192301fa1e Mon Sep 17 00:00:00 2001 From: Forbidden Donut Date: Fri, 30 Jun 2017 13:05:54 -0700 Subject: [PATCH 712/933] BUG: Fix read of py3 PeriodIndex DataFrame HDF made in py2 (#16781) (#16790) In Python3, reading a DataFrame with a PeriodIndex from an HDF file created in Python2 would incorrectly return a DataFrame with an Int64Index. --- doc/source/whatsnew/v0.20.3.txt | 1 + pandas/io/pytables.py | 4 ++-- ...periodindex_0.20.1_x86_64_darwin_2.7.13.h5 | Bin 0 -> 7312 bytes pandas/tests/io/test_pytables.py | 20 ++++++++++++++++++ 4 files changed, 23 insertions(+), 2 deletions(-) create mode 100644 pandas/tests/io/data/periodindex_0.20.1_x86_64_darwin_2.7.13.h5 diff --git a/doc/source/whatsnew/v0.20.3.txt b/doc/source/whatsnew/v0.20.3.txt index 0fac6367fd3a5..ebbdecb7dc0d0 100644 --- a/doc/source/whatsnew/v0.20.3.txt +++ b/doc/source/whatsnew/v0.20.3.txt @@ -38,6 +38,7 @@ Bug Fixes ~~~~~~~~~ - Fixed issue with dataframe scatter plot for categorical data that reports incorrect column key not found when categorical data is used for plotting (:issue:`16199`) - Fixed a pytest marker failing downstream packages' tests suites (:issue:`16680`) +- Fixed compat with loading a ``DataFrame`` with a ``PeriodIndex``, from a ``format='fixed'`` HDFStore, in Python 3, that was written in Python 2 (:issue:`16781`) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 02b6a772e86b9..4e343556c083b 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -2591,8 +2591,8 @@ def read_index_node(self, node, start=None, stop=None): if 'name' in node._v_attrs: name = _ensure_str(node._v_attrs.name) - index_class = self._alias_to_class(getattr(node._v_attrs, - 'index_class', '')) + index_class = self._alias_to_class(_ensure_decoded( + getattr(node._v_attrs, 'index_class', ''))) factory = self._get_index_factory(index_class) kwargs = {} diff --git a/pandas/tests/io/data/periodindex_0.20.1_x86_64_darwin_2.7.13.h5 b/pandas/tests/io/data/periodindex_0.20.1_x86_64_darwin_2.7.13.h5 new file mode 100644 index 0000000000000000000000000000000000000000..6fb92d3c564bdfc290313ab4ef0ca63e1574711e GIT binary patch literal 7312 zcmeGgL2}zhY-u|Qvr$r|Z91Jy558HKY9!Zb(^IU}a;+KH@<>X?KG8I__OO;nmP@5^ zUy$DVj2`=h9CPVR9*}l=%rQSmfnC5o?_HDe!S! zu^_6Z%gtMQe)-H|c+B&A)Nu$tBlzo_?KtA9z`0KeU#0aI=Plwj;Rn5TTL2}m&yc3Y zi>tQhGGYcVl?FE2t!;ea9eQiF(idcBtc-*@Y>yi}@ju~hve%+rS47RMi1zKm-=H(t z1OpZy!4GA#d_x3Hk^Y;_R=*E;q`v|`rvJ9reS|NX)8p!{c--#wJKZfqkFLY?&3Xey zNPpiz2Rseww|57vhnwxbx8Ci2*BW?b{LA$w@m0;5ML(^Q!#yW~|K_KE*+_EY?`H8? zb7?eC^iyk#lca-d*Y8KO6TJ+Q)3*S*!v<5&UpG;-8BcO z$XtcHV2RjI8Z_Q6?Bk53E;WkJ=jt7+28>1c@*sNW1PXbYHfAZ5HcShXF1<3BhUB?xJMeQ>=-yu-Zl6!U#bo#BR_@lO^c%m zJP+`39E)IjhWx#29~(09+5$L}T(i#H+?*kGVIAzuJGF`>VNbK859w`?=G7;Uc8m5> zcHXsmz1A-9%PKcM+4O?kbrI|gl|1!cF zP5gGZjKbpiJblic^T{v#?5of17XKjlCqavS^MGDp)k2pbVP{~bftdzo8klKdrh%CT z-YE^t!#xP*cE!9k&)cqIEZTYh%S|m(%rRqS7!1 zi>-(^X5KNCmMacY^#dZ1{&)DX-3LLO9D7jRNoayN(pfqQvN)AVQ`Q1o_0}^Dt14ZO z4-T}-w0t)P9k52!|56)QKy%3C5V_q&am- Date: Sat, 1 Jul 2017 04:11:19 +0800 Subject: [PATCH 713/933] BUG: Fix Series doesn't work in pd.astype(). Now treat Series as dict. (#16725) --- doc/source/whatsnew/v0.20.3.txt | 3 ++- pandas/core/generic.py | 6 +++--- pandas/tests/frame/test_dtypes.py | 31 ++++++++++++++++++++++-------- pandas/tests/series/test_dtypes.py | 21 +++++++++++++++----- 4 files changed, 44 insertions(+), 17 deletions(-) diff --git a/doc/source/whatsnew/v0.20.3.txt b/doc/source/whatsnew/v0.20.3.txt index ebbdecb7dc0d0..8997d68c65615 100644 --- a/doc/source/whatsnew/v0.20.3.txt +++ b/doc/source/whatsnew/v0.20.3.txt @@ -46,7 +46,8 @@ Conversion ^^^^^^^^^^ - Bug in pickle compat prior to the v0.20.x series, when ``UTC`` is a timezone in a Series/DataFrame/Index (:issue:`16608`) -- Bug in Series construction when passing a Series with ``dtype='category'`` (:issue:`16524`). +- Bug in ``Series`` construction when passing a ``Series`` with ``dtype='category'`` (:issue:`16524`). +- Bug in ``DataFrame.astype()`` when passing a ``Series`` as the ``dtype`` kwarg. (:issue:`16717`). Indexing ^^^^^^^^ diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 6069757efc429..4d4297d41c2aa 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3507,12 +3507,12 @@ def astype(self, dtype, copy=True, errors='raise', **kwargs): ------- casted : type of caller """ - if isinstance(dtype, collections.Mapping): + if is_dict_like(dtype): if self.ndim == 1: # i.e. Series - if len(dtype) > 1 or list(dtype.keys())[0] != self.name: + if len(dtype) > 1 or self.name not in dtype: raise KeyError('Only the Series name can be used for ' 'the key in Series dtype mappings.') - new_type = list(dtype.values())[0] + new_type = dtype[self.name] return self.astype(new_type, copy, errors, **kwargs) elif self.ndim > 2: raise NotImplementedError( diff --git a/pandas/tests/frame/test_dtypes.py b/pandas/tests/frame/test_dtypes.py index b99a6fabfa42b..335b76ff2aade 100644 --- a/pandas/tests/frame/test_dtypes.py +++ b/pandas/tests/frame/test_dtypes.py @@ -442,8 +442,9 @@ def test_astype_str(self): expected = DataFrame(['1.12345678901']) assert_frame_equal(result, expected) - def test_astype_dict(self): - # GH7271 + @pytest.mark.parametrize("dtype_class", [dict, Series]) + def test_astype_dict_like(self, dtype_class): + # GH7271 & GH16717 a = Series(date_range('2010-01-04', periods=5)) b = Series(range(5)) c = Series([0.0, 0.2, 0.4, 0.6, 0.8]) @@ -452,7 +453,8 @@ def test_astype_dict(self): original = df.copy(deep=True) # change type of a subset of columns - result = df.astype({'b': 'str', 'd': 'float32'}) + dt1 = dtype_class({'b': 'str', 'd': 'float32'}) + result = df.astype(dt1) expected = DataFrame({ 'a': a, 'b': Series(['0', '1', '2', '3', '4']), @@ -461,7 +463,8 @@ def test_astype_dict(self): assert_frame_equal(result, expected) assert_frame_equal(df, original) - result = df.astype({'b': np.float32, 'c': 'float32', 'd': np.float64}) + dt2 = dtype_class({'b': np.float32, 'c': 'float32', 'd': np.float64}) + result = df.astype(dt2) expected = DataFrame({ 'a': a, 'b': Series([0.0, 1.0, 2.0, 3.0, 4.0], dtype='float32'), @@ -471,19 +474,31 @@ def test_astype_dict(self): assert_frame_equal(df, original) # change all columns - assert_frame_equal(df.astype({'a': str, 'b': str, 'c': str, 'd': str}), + dt3 = dtype_class({'a': str, 'b': str, 'c': str, 'd': str}) + assert_frame_equal(df.astype(dt3), df.astype(str)) assert_frame_equal(df, original) # error should be raised when using something other than column labels # in the keys of the dtype dict - pytest.raises(KeyError, df.astype, {'b': str, 2: str}) - pytest.raises(KeyError, df.astype, {'e': str}) + dt4 = dtype_class({'b': str, 2: str}) + dt5 = dtype_class({'e': str}) + pytest.raises(KeyError, df.astype, dt4) + pytest.raises(KeyError, df.astype, dt5) assert_frame_equal(df, original) # if the dtypes provided are the same as the original dtypes, the # resulting DataFrame should be the same as the original DataFrame - equiv = df.astype({col: df[col].dtype for col in df.columns}) + dt6 = dtype_class({col: df[col].dtype for col in df.columns}) + equiv = df.astype(dt6) + assert_frame_equal(df, equiv) + assert_frame_equal(df, original) + + # GH 16717 + # if dtypes provided is empty, the resulting DataFrame + # should be the same as the original DataFrame + dt7 = dtype_class({}) + result = df.astype(dt7) assert_frame_equal(df, equiv) assert_frame_equal(df, original) diff --git a/pandas/tests/series/test_dtypes.py b/pandas/tests/series/test_dtypes.py index 9ab02a8c2aad7..2ec579842e33f 100644 --- a/pandas/tests/series/test_dtypes.py +++ b/pandas/tests/series/test_dtypes.py @@ -152,24 +152,35 @@ def test_astype_unicode(self): reload(sys) # noqa sys.setdefaultencoding(former_encoding) - def test_astype_dict(self): + @pytest.mark.parametrize("dtype_class", [dict, Series]) + def test_astype_dict_like(self, dtype_class): # see gh-7271 s = Series(range(0, 10, 2), name='abc') - result = s.astype({'abc': str}) + dt1 = dtype_class({'abc': str}) + result = s.astype(dt1) expected = Series(['0', '2', '4', '6', '8'], name='abc') tm.assert_series_equal(result, expected) - result = s.astype({'abc': 'float64'}) + dt2 = dtype_class({'abc': 'float64'}) + result = s.astype(dt2) expected = Series([0.0, 2.0, 4.0, 6.0, 8.0], dtype='float64', name='abc') tm.assert_series_equal(result, expected) + dt3 = dtype_class({'abc': str, 'def': str}) with pytest.raises(KeyError): - s.astype({'abc': str, 'def': str}) + s.astype(dt3) + dt4 = dtype_class({0: str}) with pytest.raises(KeyError): - s.astype({0: str}) + s.astype(dt4) + + # GH16717 + # if dtypes provided is empty, it should error + dt5 = dtype_class({}) + with pytest.raises(KeyError): + s.astype(dt5) def test_astype_generic_timestamp_deprecated(self): # see gh-15524 From 06fc667f7a26f136b11f33a658124bf64cd57ab4 Mon Sep 17 00:00:00 2001 From: Florian Wilhelm Date: Fri, 30 Jun 2017 22:25:15 +0200 Subject: [PATCH 714/933] FIX: Allow aggregate to return dictionaries again #16741 (#16752) --- doc/source/whatsnew/v0.21.0.txt | 2 ++ pandas/_libs/src/reduce.pyx | 2 +- pandas/tests/groupby/test_aggregate.py | 10 ++++++++++ 3 files changed, 13 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index de2516d75040b..5515d093f39e4 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -92,6 +92,8 @@ Performance Improvements Bug Fixes ~~~~~~~~~ +- Fixes regression in 0.20, :func:`Series.aggregate` and :func:`DataFrame.aggregate` allow dictionaries as return values again (:issue:`16741`) + Conversion ^^^^^^^^^^ diff --git a/pandas/_libs/src/reduce.pyx b/pandas/_libs/src/reduce.pyx index 2bba07256305a..3ce94022e586b 100644 --- a/pandas/_libs/src/reduce.pyx +++ b/pandas/_libs/src/reduce.pyx @@ -419,7 +419,7 @@ cdef class SeriesGrouper: cdef inline _extract_result(object res): """ extract the result object, it might be a 0-dim ndarray or a len-1 0-dim, or a scalar """ - if hasattr(res, 'values'): + if hasattr(res, 'values') and isinstance(res.values, np.ndarray): res = res.values if not np.isscalar(res): if isinstance(res, np.ndarray): diff --git a/pandas/tests/groupby/test_aggregate.py b/pandas/tests/groupby/test_aggregate.py index b578a6efb0034..efc833575843c 100644 --- a/pandas/tests/groupby/test_aggregate.py +++ b/pandas/tests/groupby/test_aggregate.py @@ -611,6 +611,16 @@ def test_cython_agg_frame_columns(self): df.groupby(level=0, axis='columns').mean() df.groupby(level=0, axis='columns').mean() + def test_cython_agg_return_dict(self): + # GH 16741 + ts = self.df.groupby('A')['B'].agg( + lambda x: x.value_counts().to_dict()) + expected = Series([{'two': 1, 'one': 1, 'three': 1}, + {'two': 2, 'one': 2, 'three': 1}], + index=Index(['bar', 'foo'], name='A'), + name='B') + assert_series_equal(ts, expected) + def test_cython_fail_agg(self): dr = bdate_range('1/1/2000', periods=50) ts = Series(['A', 'B', 'C', 'D', 'E'] * 10, index=dr) From 6b729ddd740b6d2efb739757180dbbbef9b092c7 Mon Sep 17 00:00:00 2001 From: Karel De Brabandere Date: Fri, 30 Jun 2017 22:34:11 +0200 Subject: [PATCH 715/933] BUG: fix to_latex bold_rows option (#16708) --- doc/source/whatsnew/v0.20.3.txt | 2 +- pandas/core/generic.py | 4 ++-- pandas/io/formats/format.py | 6 +++++ pandas/tests/io/formats/test_to_latex.py | 30 ++++++++++++++++++++++++ 4 files changed, 39 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v0.20.3.txt b/doc/source/whatsnew/v0.20.3.txt index 8997d68c65615..b2d382a3202a5 100644 --- a/doc/source/whatsnew/v0.20.3.txt +++ b/doc/source/whatsnew/v0.20.3.txt @@ -41,7 +41,6 @@ Bug Fixes - Fixed compat with loading a ``DataFrame`` with a ``PeriodIndex``, from a ``format='fixed'`` HDFStore, in Python 3, that was written in Python 2 (:issue:`16781`) - Conversion ^^^^^^^^^^ @@ -59,6 +58,7 @@ I/O - Bug in :func:`read_csv` in which files weren't opened as binary files by the C engine on Windows, causing EOF characters mid-field, which would fail (:issue:`16039`, :issue:`16559`, :issue:`16675`) - Bug in :func:`read_hdf` in which reading a ``Series`` saved to an HDF file in 'fixed' format fails when an explicit ``mode='r'`` argument is supplied (:issue:`16583`) +- Bug in :func:`DataFrame.to_latex` where ``bold_rows`` was wrongly specified to be ``True`` by default, whereas in reality row labels remained non-bold whatever parameter provided. (:issue:`16707`) Plotting ^^^^^^^^ diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 4d4297d41c2aa..db19d9354ec4d 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1538,7 +1538,7 @@ def to_xarray(self): `to_latex`-specific options: - bold_rows : boolean, default True + bold_rows : boolean, default False Make the row labels bold in the output column_format : str, default None The columns format as specified in `LaTeX table format @@ -1587,7 +1587,7 @@ def to_xarray(self): @Appender(_shared_docs['to_latex'] % _shared_doc_kwargs) def to_latex(self, buf=None, columns=None, col_space=None, header=True, index=True, na_rep='NaN', formatters=None, float_format=None, - sparsify=None, index_names=True, bold_rows=True, + sparsify=None, index_names=True, bold_rows=False, column_format=None, longtable=None, escape=None, encoding=None, decimal='.', multicolumn=None, multicolumn_format=None, multirow=None): diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 3deaec2dfbbc5..0627ca9179509 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -845,6 +845,7 @@ def __init__(self, formatter, column_format=None, longtable=False, multicolumn=False, multicolumn_format=None, multirow=False): self.fmt = formatter self.frame = self.fmt.frame + self.bold_rows = self.fmt.kwds.get('bold_rows', False) self.column_format = column_format self.longtable = longtable self.multicolumn = multicolumn @@ -943,6 +944,11 @@ def get_col_type(dtype): if x else '{}') for x in row] else: crow = [x if x else '{}' for x in row] + if self.bold_rows and self.fmt.index: + # bold row labels + crow = ['\\textbf{%s}' % x + if j < ilevels and x.strip() not in ['', '{}'] else x + for j, x in enumerate(crow)] if i < clevels and self.fmt.header and self.multicolumn: # sum up columns to multicolumns crow = self._format_multicolumn(crow, ilevels) diff --git a/pandas/tests/io/formats/test_to_latex.py b/pandas/tests/io/formats/test_to_latex.py index 4ee77abb32c26..aa86d1d9231fb 100644 --- a/pandas/tests/io/formats/test_to_latex.py +++ b/pandas/tests/io/formats/test_to_latex.py @@ -506,3 +506,33 @@ def test_to_latex_series(self): \end{tabular} """ assert withindex_result == withindex_expected + + def test_to_latex_bold_rows(self): + # GH 16707 + df = pd.DataFrame({'a': [1, 2], 'b': ['b1', 'b2']}) + observed = df.to_latex(bold_rows=True) + expected = r"""\begin{tabular}{lrl} +\toprule +{} & a & b \\ +\midrule +\textbf{0} & 1 & b1 \\ +\textbf{1} & 2 & b2 \\ +\bottomrule +\end{tabular} +""" + assert observed == expected + + def test_to_latex_no_bold_rows(self): + # GH 16707 + df = pd.DataFrame({'a': [1, 2], 'b': ['b1', 'b2']}) + observed = df.to_latex(bold_rows=False) + expected = r"""\begin{tabular}{lrl} +\toprule +{} & a & b \\ +\midrule +0 & 1 & b1 \\ +1 & 2 & b2 \\ +\bottomrule +\end{tabular} +""" + assert observed == expected From 7c27b9d99a945bd228c173eaac8042ca2f70bb72 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Fri, 30 Jun 2017 23:56:33 +0300 Subject: [PATCH 716/933] Revert "CI: pin jemalloc=4.4.0 (#16727)" (#16731) This reverts commit 09d8c22d9f56f4a067880a28fbb1235bcf0a1e49. --- ci/requirements-2.7.sh | 2 +- ci/requirements-2.7_BUILD_TEST.sh | 2 +- ci/requirements-3.5.sh | 2 +- ci/requirements-3.6.run | 1 - ci/requirements-3.6_DOC.sh | 2 +- 5 files changed, 4 insertions(+), 5 deletions(-) diff --git a/ci/requirements-2.7.sh b/ci/requirements-2.7.sh index 87daf740e059e..64d470e5c6e0e 100644 --- a/ci/requirements-2.7.sh +++ b/ci/requirements-2.7.sh @@ -4,4 +4,4 @@ source activate pandas echo "install 27" -conda install -n pandas -c conda-forge feather-format jemalloc=4.4.0 +conda install -n pandas -c conda-forge feather-format diff --git a/ci/requirements-2.7_BUILD_TEST.sh b/ci/requirements-2.7_BUILD_TEST.sh index b90ae3617ba2a..78941fd0944e5 100755 --- a/ci/requirements-2.7_BUILD_TEST.sh +++ b/ci/requirements-2.7_BUILD_TEST.sh @@ -4,4 +4,4 @@ source activate pandas echo "install 27 BUILD_TEST" -conda install -n pandas -c conda-forge pyarrow dask jemalloc=4.4.0 +conda install -n pandas -c conda-forge pyarrow dask diff --git a/ci/requirements-3.5.sh b/ci/requirements-3.5.sh index a2871f117be96..d0f0b81802dc6 100644 --- a/ci/requirements-3.5.sh +++ b/ci/requirements-3.5.sh @@ -4,4 +4,4 @@ source activate pandas echo "install 35" -conda install -n pandas -c conda-forge feather-format jemalloc=4.4.0 +conda install -n pandas -c conda-forge feather-format diff --git a/ci/requirements-3.6.run b/ci/requirements-3.6.run index 06fae9a70ecf7..41c9680ce1b7e 100644 --- a/ci/requirements-3.6.run +++ b/ci/requirements-3.6.run @@ -14,7 +14,6 @@ html5lib jinja2 sqlalchemy pymysql -jemalloc=4.4.0 feather-format # psycopg2 (not avail on defaults ATM) beautifulsoup4 diff --git a/ci/requirements-3.6_DOC.sh b/ci/requirements-3.6_DOC.sh index 5a62e75adf7ea..e43e483d77a73 100644 --- a/ci/requirements-3.6_DOC.sh +++ b/ci/requirements-3.6_DOC.sh @@ -6,6 +6,6 @@ echo "[install DOC_BUILD deps]" pip install pandas-gbq -conda install -n pandas -c conda-forge feather-format nbsphinx pandoc jemalloc=4.4.0 +conda install -n pandas -c conda-forge feather-format nbsphinx pandoc conda install -n pandas -c r r rpy2 --yes From b2b5dc32e24cfa5ab1c37d09c4e505d4a82d171c Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sat, 1 Jul 2017 00:29:44 +0300 Subject: [PATCH 717/933] CI: use dist/trusty rather than os/linux (#16806) closes #16730 --- .travis.yml | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/.travis.yml b/.travis.yml index 5dc4256a268ad..897d31cf23a3b 100644 --- a/.travis.yml +++ b/.travis.yml @@ -35,44 +35,44 @@ matrix: language: generic env: - JOB="3.5_OSX" TEST_ARGS="--skip-slow --skip-network" - - os: linux + - dist: trusty env: - JOB="2.7_LOCALE" TEST_ARGS="--only-slow --skip-network" LOCALE_OVERRIDE="zh_CN.UTF-8" addons: apt: packages: - language-pack-zh-hans - - os: linux + - dist: trusty env: - JOB="2.7" TEST_ARGS="--skip-slow" LINT=true addons: apt: packages: - python-gtk2 - - os: linux + - dist: trusty env: - JOB="3.5" TEST_ARGS="--skip-slow --skip-network" COVERAGE=true addons: apt: packages: - xsel - - os: linux + - dist: trusty env: - JOB="3.6" TEST_ARGS="--skip-slow --skip-network" PANDAS_TESTING_MODE="deprecate" CONDA_FORGE=true # In allow_failures - - os: linux + - dist: trusty env: - JOB="2.7_SLOW" TEST_ARGS="--only-slow --skip-network" # In allow_failures - - os: linux + - dist: trusty env: - JOB="2.7_BUILD_TEST" TEST_ARGS="--skip-slow" BUILD_TEST=true # In allow_failures - - os: linux + - dist: trusty env: - JOB="3.6_NUMPY_DEV" TEST_ARGS="--skip-slow --skip-network" PANDAS_TESTING_MODE="deprecate" # In allow_failures - - os: linux + - dist: trusty env: - JOB="3.6_DOC" DOC=true addons: @@ -80,16 +80,16 @@ matrix: packages: - xsel allow_failures: - - os: linux + - dist: trusty env: - JOB="2.7_SLOW" TEST_ARGS="--only-slow --skip-network" - - os: linux + - dist: trusty env: - JOB="2.7_BUILD_TEST" TEST_ARGS="--skip-slow" BUILD_TEST=true - - os: linux + - dist: trusty env: - JOB="3.6_NUMPY_DEV" TEST_ARGS="--skip-slow --skip-network" PANDAS_TESTING_MODE="deprecate" - - os: linux + - dist: trusty env: - JOB="3.6_DOC" DOC=true From 9462379038c69885cf74869fff3e97c1a6d70394 Mon Sep 17 00:00:00 2001 From: DSM Date: Sat, 1 Jul 2017 02:54:06 -0400 Subject: [PATCH 718/933] TST: Verify columns entirely below chop_threshold still print (#6839) (#16809) --- pandas/tests/io/formats/test_format.py | 27 ++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index 4431108a55963..679d43ac492ca 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -209,6 +209,33 @@ def test_repr_chop_threshold(self): with option_context("display.chop_threshold", None): assert repr(df) == ' 0 1\n0 0.1 0.5\n1 0.5 -0.1' + def test_repr_chop_threshold_column_below(self): + # GH 6839: validation case + + df = pd.DataFrame([[10, 20, 30, 40], + [8e-10, -1e-11, 2e-9, -2e-11]]).T + + with option_context("display.chop_threshold", 0): + assert repr(df) == (' 0 1\n' + '0 10.0 8.000000e-10\n' + '1 20.0 -1.000000e-11\n' + '2 30.0 2.000000e-09\n' + '3 40.0 -2.000000e-11') + + with option_context("display.chop_threshold", 1e-8): + assert repr(df) == (' 0 1\n' + '0 10.0 0.000000e+00\n' + '1 20.0 0.000000e+00\n' + '2 30.0 0.000000e+00\n' + '3 40.0 0.000000e+00') + + with option_context("display.chop_threshold", 5e-11): + assert repr(df) == (' 0 1\n' + '0 10.0 8.000000e-10\n' + '1 20.0 0.000000e+00\n' + '2 30.0 2.000000e-09\n' + '3 40.0 0.000000e+00') + def test_repr_obeys_max_seq_limit(self): with option_context("display.max_seq_items", 2000): assert len(printing.pprint_thing(lrange(1000))) > 1000 From e5fd3e023c52c6756fa83604c5909cc102808fdc Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Sat, 1 Jul 2017 04:41:37 -0400 Subject: [PATCH 719/933] BUG: clip dataframe column-wise #15390 (#16504) --- doc/source/whatsnew/v0.21.0.txt | 1 + pandas/core/generic.py | 53 +++++++++++++++++---------- pandas/tests/frame/test_analytics.py | 25 ++++++++++++- pandas/tests/series/test_analytics.py | 12 ++++++ 4 files changed, 69 insertions(+), 22 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 5515d093f39e4..ce0d40d327c15 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -135,6 +135,7 @@ Reshaping Numeric ^^^^^^^ +- Bug in ``.clip()`` with ``axis=1`` and a list-like for ``threshold`` is passed; previously this raised ``ValueError`` (:issue:`15390`) Categorical diff --git a/pandas/core/generic.py b/pandas/core/generic.py index db19d9354ec4d..7d1a8adf381fe 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -52,6 +52,7 @@ from pandas.compat.numpy import function as nv from pandas.compat import (map, zip, lzip, lrange, string_types, isidentifier, set_function_name, cPickle as pkl) +from pandas.core.ops import _align_method_FRAME import pandas.core.nanops as nanops from pandas.util._decorators import Appender, Substitution, deprecate_kwarg from pandas.util._validators import validate_bool_kwarg @@ -4413,6 +4414,34 @@ def _clip_with_scalar(self, lower, upper, inplace=False): else: return result + def _clip_with_one_bound(self, threshold, method, axis, inplace): + + inplace = validate_bool_kwarg(inplace, 'inplace') + if axis is not None: + axis = self._get_axis_number(axis) + + if np.any(isnull(threshold)): + raise ValueError("Cannot use an NA value as a clip threshold") + + # method is self.le for upper bound and self.ge for lower bound + if is_scalar(threshold) and is_number(threshold): + if method.__name__ == 'le': + return self._clip_with_scalar(None, threshold, inplace=inplace) + return self._clip_with_scalar(threshold, None, inplace=inplace) + + subset = method(threshold, axis=axis) | isnull(self) + + # GH #15390 + # In order for where method to work, the threshold must + # be transformed to NDFrame from other array like structure. + if (not isinstance(threshold, ABCSeries)) and is_list_like(threshold): + if isinstance(self, ABCSeries): + threshold = pd.Series(threshold, index=self.index) + else: + threshold = _align_method_FRAME(self, np.asarray(threshold), + axis) + return self.where(subset, threshold, axis=axis, inplace=inplace) + def clip(self, lower=None, upper=None, axis=None, inplace=False, *args, **kwargs): """ @@ -4515,16 +4544,8 @@ def clip_upper(self, threshold, axis=None, inplace=False): ------- clipped : same type as input """ - if np.any(isnull(threshold)): - raise ValueError("Cannot use an NA value as a clip threshold") - - if is_scalar(threshold) and is_number(threshold): - return self._clip_with_scalar(None, threshold, inplace=inplace) - - inplace = validate_bool_kwarg(inplace, 'inplace') - - subset = self.le(threshold, axis=axis) | isnull(self) - return self.where(subset, threshold, axis=axis, inplace=inplace) + return self._clip_with_one_bound(threshold, method=self.le, + axis=axis, inplace=inplace) def clip_lower(self, threshold, axis=None, inplace=False): """ @@ -4547,16 +4568,8 @@ def clip_lower(self, threshold, axis=None, inplace=False): ------- clipped : same type as input """ - if np.any(isnull(threshold)): - raise ValueError("Cannot use an NA value as a clip threshold") - - if is_scalar(threshold) and is_number(threshold): - return self._clip_with_scalar(threshold, None, inplace=inplace) - - inplace = validate_bool_kwarg(inplace, 'inplace') - - subset = self.ge(threshold, axis=axis) | isnull(self) - return self.where(subset, threshold, axis=axis, inplace=inplace) + return self._clip_with_one_bound(threshold, method=self.ge, + axis=axis, inplace=inplace) def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True, group_keys=True, squeeze=False, **kwargs): diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index 943a93b27a78a..b09325bfa2ddc 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -1892,12 +1892,33 @@ def test_clip_against_series(self, inplace): tm.assert_series_equal(clipped_df.loc[mask, i], df.loc[mask, i]) - def test_clip_against_frame(self): + @pytest.mark.parametrize("inplace", [True, False]) + @pytest.mark.parametrize("lower", [[2, 3, 4], np.asarray([2, 3, 4])]) + @pytest.mark.parametrize("axis,res", [ + (0, [[2., 2., 3.], [4., 5., 6.], [7., 7., 7.]]), + (1, [[2., 3., 4.], [4., 5., 6.], [5., 6., 7.]]) + ]) + def test_clip_against_list_like(self, inplace, lower, axis, res): + # GH #15390 + original = self.simple.copy(deep=True) + + result = original.clip(lower=lower, upper=[5, 6, 7], + axis=axis, inplace=inplace) + + expected = pd.DataFrame(res, + columns=original.columns, + index=original.index) + if inplace: + result = original + tm.assert_frame_equal(result, expected, check_exact=True) + + @pytest.mark.parametrize("axis", [0, 1, None]) + def test_clip_against_frame(self, axis): df = DataFrame(np.random.randn(1000, 2)) lb = DataFrame(np.random.randn(1000, 2)) ub = lb + 1 - clipped_df = df.clip(lb, ub) + clipped_df = df.clip(lb, ub, axis=axis) lb_mask = df <= lb ub_mask = df >= ub diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index 18c6c9a6dd021..749af1c56a7f0 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -1015,6 +1015,18 @@ def test_clip_against_series(self): assert_series_equal(s.clip(lower, upper), Series([1.0, 2.0, 3.5])) assert_series_equal(s.clip(1.5, upper), Series([1.5, 1.5, 3.5])) + @pytest.mark.parametrize("inplace", [True, False]) + @pytest.mark.parametrize("upper", [[1, 2, 3], np.asarray([1, 2, 3])]) + def test_clip_against_list_like(self, inplace, upper): + # GH #15390 + original = pd.Series([5, 6, 7]) + result = original.clip(upper=upper, inplace=inplace) + expected = pd.Series([1, 2, 3]) + + if inplace: + result = original + tm.assert_series_equal(result, expected, check_exact=True) + def test_clip_with_datetimes(self): # GH 11838 From 329fdaae294ae3dc45d4d7301f0f1eec2d26cb4b Mon Sep 17 00:00:00 2001 From: DSM Date: Sun, 2 Jul 2017 13:38:58 -0400 Subject: [PATCH 720/933] TST: Verify that positional shifting works with duplicate columns (#9092) (#16810) --- pandas/tests/frame/test_timeseries.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/pandas/tests/frame/test_timeseries.py b/pandas/tests/frame/test_timeseries.py index 143a7ea8f6fb2..aaca8a60fe062 100644 --- a/pandas/tests/frame/test_timeseries.py +++ b/pandas/tests/frame/test_timeseries.py @@ -266,6 +266,28 @@ def test_shift_empty(self): assert_frame_equal(df, rs) + def test_shift_duplicate_columns(self): + # GH 9092; verify that position-based shifting works + # in the presence of duplicate columns + column_lists = [list(range(5)), [1] * 5, [1, 1, 2, 2, 1]] + data = np.random.randn(20, 5) + + shifted = [] + for columns in column_lists: + df = pd.DataFrame(data.copy(), columns=columns) + for s in range(5): + df.iloc[:, s] = df.iloc[:, s].shift(s + 1) + df.columns = range(5) + shifted.append(df) + + # sanity check the base case + nulls = shifted[0].isnull().sum() + assert_series_equal(nulls, Series(range(1, 6), dtype='int64')) + + # check all answers are the same + assert_frame_equal(shifted[0], shifted[1]) + assert_frame_equal(shifted[0], shifted[2]) + def test_tshift(self): # PeriodIndex ps = tm.makePeriodFrame() From 92e1cc829181f9a4cea47f6a81cec986e8ef2707 Mon Sep 17 00:00:00 2001 From: ri938 Date: Mon, 3 Jul 2017 13:39:55 +0100 Subject: [PATCH 721/933] BUG: render dataframe as html do not produce duplicate element id's (#16780) (#16801) * BUG: when rendering dataframe as html do not produce duplicate element id's #16780 * CLN: removing spaces in code causes pylint check to fail * DOC: moved whatsnew comment to 0.20.3 release from 0.21.0 --- doc/source/whatsnew/v0.20.3.txt | 1 + pandas/io/formats/style.py | 5 +++-- pandas/tests/io/formats/test_style.py | 14 +++++++++++++- 3 files changed, 17 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v0.20.3.txt b/doc/source/whatsnew/v0.20.3.txt index b2d382a3202a5..f9bb198abf6b7 100644 --- a/doc/source/whatsnew/v0.20.3.txt +++ b/doc/source/whatsnew/v0.20.3.txt @@ -37,6 +37,7 @@ Performance Improvements Bug Fixes ~~~~~~~~~ - Fixed issue with dataframe scatter plot for categorical data that reports incorrect column key not found when categorical data is used for plotting (:issue:`16199`) +- Fixed issue with :meth:`DataFrame.style` where element id's were not unique (:issue:`16780`) - Fixed a pytest marker failing downstream packages' tests suites (:issue:`16680`) - Fixed compat with loading a ``DataFrame`` with a ``PeriodIndex``, from a ``format='fixed'`` HDFStore, in Python 3, that was written in Python 2 (:issue:`16781`) diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 3d7e0fcdc69b3..b08d3877f3b03 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -281,13 +281,14 @@ def format_attr(pair): for r, idx in enumerate(self.data.index): row_es = [] for c, value in enumerate(rlabels[r]): + rid = [ROW_HEADING_CLASS, "level%s" % c, "row%s" % r] es = { "type": "th", "is_visible": _is_visible(r, c, idx_lengths), "value": value, "display_value": value, - "class": " ".join([ROW_HEADING_CLASS, "level%s" % c, - "row%s" % r]), + "id": "_".join(rid[1:]), + "class": " ".join(rid) } rowspan = idx_lengths.get((c, r), 0) if rowspan > 1: diff --git a/pandas/tests/io/formats/test_style.py b/pandas/tests/io/formats/test_style.py index 9911888f758fb..59d9f938734ab 100644 --- a/pandas/tests/io/formats/test_style.py +++ b/pandas/tests/io/formats/test_style.py @@ -1,5 +1,6 @@ import copy import textwrap +import re import pytest import numpy as np @@ -505,6 +506,14 @@ def test_uuid(self): assert result is styler assert result.uuid == 'aaa' + def test_unique_id(self): + # See https://github.com/pandas-dev/pandas/issues/16780 + df = pd.DataFrame({'a': [1, 3, 5, 6], 'b': [2, 4, 12, 21]}) + result = df.style.render(uuid='test') + assert 'test' in result + ids = re.findall('id="(.*?)"', result) + assert np.unique(ids).size == len(ids) + def test_table_styles(self): style = [{'selector': 'th', 'props': [('foo', 'bar')]}] styler = Styler(self.df, table_styles=style) @@ -719,12 +728,13 @@ def test_mi_sparse(self): df = pd.DataFrame({'A': [1, 2]}, index=pd.MultiIndex.from_arrays([['a', 'a'], [0, 1]])) + result = df.style._translate() body_0 = result['body'][0][0] expected_0 = { "value": "a", "display_value": "a", "is_visible": True, "type": "th", "attributes": ["rowspan=2"], - "class": "row_heading level0 row0", + "class": "row_heading level0 row0", "id": "level0_row0" } tm.assert_dict_equal(body_0, expected_0) @@ -732,6 +742,7 @@ def test_mi_sparse(self): expected_1 = { "value": 0, "display_value": 0, "is_visible": True, "type": "th", "class": "row_heading level1 row0", + "id": "level1_row0" } tm.assert_dict_equal(body_1, expected_1) @@ -739,6 +750,7 @@ def test_mi_sparse(self): expected_10 = { "value": 'a', "display_value": 'a', "is_visible": False, "type": "th", "class": "row_heading level0 row1", + "id": "level0_row1" } tm.assert_dict_equal(body_10, expected_10) From 9e55af2552664267ce27c7fc6c932acb0651c259 Mon Sep 17 00:00:00 2001 From: Tuan Date: Tue, 4 Jul 2017 03:23:45 +1000 Subject: [PATCH 722/933] fix BUG: ValueError when performing rolling covariance on multi indexed DataFrame (#16814) * fix multi index names * fix line length to pep8 * added what's new entry and reference issue number in test * Update test_multi.py * Update v0.20.3.txt --- doc/source/whatsnew/v0.20.3.txt | 2 +- pandas/core/window.py | 2 +- pandas/tests/indexes/test_multi.py | 9 +++++++++ 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.20.3.txt b/doc/source/whatsnew/v0.20.3.txt index f9bb198abf6b7..dcce427f8dd84 100644 --- a/doc/source/whatsnew/v0.20.3.txt +++ b/doc/source/whatsnew/v0.20.3.txt @@ -40,7 +40,7 @@ Bug Fixes - Fixed issue with :meth:`DataFrame.style` where element id's were not unique (:issue:`16780`) - Fixed a pytest marker failing downstream packages' tests suites (:issue:`16680`) - Fixed compat with loading a ``DataFrame`` with a ``PeriodIndex``, from a ``format='fixed'`` HDFStore, in Python 3, that was written in Python 2 (:issue:`16781`) - +- Fixed bug where computing the rolling covariance of a MultiIndexed ``DataFrame`` improperly raised a ``ValueError`` (:issue:`16789`) Conversion ^^^^^^^^^^ diff --git a/pandas/core/window.py b/pandas/core/window.py index 01b1bdc3e5054..1e16eff7d56cc 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -1948,7 +1948,7 @@ def dataframe_from_int_dict(data, frame_template): result.columns = Index(result.columns).set_names( arg2.columns.name) result.index = result.index.set_names( - [arg1.index.name, arg1.columns.name]) + arg1.index.names + arg1.columns.names) return result diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index ef8806246c2c5..03f90b25415bb 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -61,6 +61,15 @@ def f(): tm.assert_raises_regex(ValueError, 'The truth value of a', f) + def test_multi_index_names(self): + + # GH 16789 + cols = pd.MultiIndex.from_product([['A', 'B'], ['C', 'D', 'E']], + names=['1', '2']) + df = pd.DataFrame(np.ones((10, 6)), columns=cols) + rolling_result = df.rolling(3).cov() + assert rolling_result.index.names == [None, '1', '2'] + def test_labels_dtypes(self): # GH 8456 From 04de578b173c6901e75d24647c49d6a697ceec4b Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 4 Jul 2017 16:34:12 -0400 Subject: [PATCH 723/933] BUG: rolling.cov with multi-index columns should presever the MI (#16825) xref #16814 --- doc/source/whatsnew/v0.20.3.txt | 2 +- pandas/core/indexes/multi.py | 3 +++ pandas/core/window.py | 9 +++++---- pandas/tests/indexes/test_multi.py | 9 --------- pandas/tests/test_window.py | 14 +++++++++++++- 5 files changed, 22 insertions(+), 15 deletions(-) diff --git a/doc/source/whatsnew/v0.20.3.txt b/doc/source/whatsnew/v0.20.3.txt index dcce427f8dd84..3d1bed2c9f1a9 100644 --- a/doc/source/whatsnew/v0.20.3.txt +++ b/doc/source/whatsnew/v0.20.3.txt @@ -40,7 +40,7 @@ Bug Fixes - Fixed issue with :meth:`DataFrame.style` where element id's were not unique (:issue:`16780`) - Fixed a pytest marker failing downstream packages' tests suites (:issue:`16680`) - Fixed compat with loading a ``DataFrame`` with a ``PeriodIndex``, from a ``format='fixed'`` HDFStore, in Python 3, that was written in Python 2 (:issue:`16781`) -- Fixed bug where computing the rolling covariance of a MultiIndexed ``DataFrame`` improperly raised a ``ValueError`` (:issue:`16789`) +- Fixed a bug in failing to compute rolling computations of a column-MultiIndexed ``DataFrame`` (:issue:`16789`, :issue:`16825`) Conversion ^^^^^^^^^^ diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 1a762732b1213..ee18263cca6ab 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -1383,6 +1383,9 @@ def __getitem__(self, key): # cannot be sure whether the result will be sorted sortorder = None + if isinstance(key, Index): + key = np.asarray(key) + new_labels = [lab[key] for lab in self.labels] return MultiIndex(levels=self.levels, labels=new_labels, diff --git a/pandas/core/window.py b/pandas/core/window.py index 1e16eff7d56cc..02b508bb94e4c 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -836,7 +836,7 @@ def count(self): return self._wrap_results(results, blocks, obj) - _shared_docs['apply'] = dedent(""" + _shared_docs['apply'] = dedent(r""" %(name)s function apply Parameters @@ -1922,7 +1922,8 @@ def dataframe_from_int_dict(data, frame_template): # TODO: not the most efficient (perf-wise) # though not bad code-wise - from pandas import Panel, MultiIndex, Index + from pandas import Panel, MultiIndex + with warnings.catch_warnings(record=True): p = Panel.from_dict(results).swapaxes('items', 'major') if len(p.major_axis) > 0: @@ -1945,8 +1946,8 @@ def dataframe_from_int_dict(data, frame_template): # reset our index names to arg1 names # reset our column names to arg2 names # careful not to mutate the original names - result.columns = Index(result.columns).set_names( - arg2.columns.name) + result.columns = result.columns.set_names( + arg2.columns.names) result.index = result.index.set_names( arg1.index.names + arg1.columns.names) diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index 03f90b25415bb..ef8806246c2c5 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -61,15 +61,6 @@ def f(): tm.assert_raises_regex(ValueError, 'The truth value of a', f) - def test_multi_index_names(self): - - # GH 16789 - cols = pd.MultiIndex.from_product([['A', 'B'], ['C', 'D', 'E']], - names=['1', '2']) - df = pd.DataFrame(np.ones((10, 6)), columns=cols) - rolling_result = df.rolling(3).cov() - assert rolling_result.index.names == [None, '1', '2'] - def test_labels_dtypes(self): # GH 8456 diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index cbb3c345a9353..9c3765ffdb716 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -455,6 +455,17 @@ def tests_empty_df_rolling(self, roller): result = DataFrame(index=pd.DatetimeIndex([])).rolling(roller).sum() tm.assert_frame_equal(result, expected) + def test_multi_index_names(self): + + # GH 16789, 16825 + cols = pd.MultiIndex.from_product([['A', 'B'], ['C', 'D', 'E']], + names=['1', '2']) + df = pd.DataFrame(np.ones((10, 6)), columns=cols) + result = df.rolling(3).cov() + + tm.assert_index_equal(result.columns, df.columns) + assert result.index.names == [None, '1', '2'] + class TestExpanding(Base): @@ -501,9 +512,10 @@ def test_numpy_compat(self): 'expander', [1, pytest.mark.xfail( reason='GH 16425 expanding with offset not supported')('1s')]) - def tests_empty_df_expanding(self, expander): + def test_empty_df_expanding(self, expander): # GH 15819 Verifies that datetime and integer expanding windows can be # applied to empty DataFrames + expected = DataFrame() result = DataFrame().expanding(expander).sum() tm.assert_frame_equal(result, expected) From 15db50bbdbce91608c9c4c0bc1398c41a619a9be Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 4 Jul 2017 16:35:36 -0400 Subject: [PATCH 724/933] use network decorator on additional tests (#16824) --- pandas/tests/io/parser/test_network.py | 1 + pandas/tests/test_downstream.py | 2 ++ setup.cfg | 1 + 3 files changed, 4 insertions(+) diff --git a/pandas/tests/io/parser/test_network.py b/pandas/tests/io/parser/test_network.py index e12945a6a3102..cfa60248605ad 100644 --- a/pandas/tests/io/parser/test_network.py +++ b/pandas/tests/io/parser/test_network.py @@ -19,6 +19,7 @@ def salaries_table(): return read_table(path) +@pytest.mark.network @pytest.mark.parametrize( "compression,extension", [('gzip', '.gz'), ('bz2', '.bz2'), ('zip', '.zip'), diff --git a/pandas/tests/test_downstream.py b/pandas/tests/test_downstream.py index 27e3c29a70a9f..61f0c992225c6 100644 --- a/pandas/tests/test_downstream.py +++ b/pandas/tests/test_downstream.py @@ -52,6 +52,7 @@ def test_xarray(df): assert df.to_xarray() is not None +@tm.network def test_statsmodels(): statsmodels = import_module('statsmodels') # noqa @@ -84,6 +85,7 @@ def test_pandas_gbq(df): pandas_gbq = import_module('pandas_gbq') # noqa +@tm.network def test_pandas_datareader(): pandas_datareader = import_module('pandas_datareader') # noqa diff --git a/setup.cfg b/setup.cfg index 1f9bea6718a4d..05d4c84ca56c4 100644 --- a/setup.cfg +++ b/setup.cfg @@ -26,3 +26,4 @@ testpaths = pandas markers = single: mark a test as single cpu only slow: mark a test as slow + network: mark a test as network From cc5d20f50c059f02472b029f4455b2609620374e Mon Sep 17 00:00:00 2001 From: jdeschenes Date: Thu, 6 Jul 2017 08:23:38 -0400 Subject: [PATCH 725/933] BUG: TimedeltaIndex raising ValueError when slice indexing (#16637) (#16638) --- doc/source/whatsnew/v0.20.3.txt | 2 ++ pandas/core/dtypes/common.py | 5 ++++- pandas/core/indexes/timedeltas.py | 3 +-- pandas/tests/dtypes/test_common.py | 4 +++- pandas/tests/indexing/test_timedelta.py | 24 +++++++++++++++++++++++- 5 files changed, 33 insertions(+), 5 deletions(-) diff --git a/doc/source/whatsnew/v0.20.3.txt b/doc/source/whatsnew/v0.20.3.txt index 3d1bed2c9f1a9..ce7e8be16d8e2 100644 --- a/doc/source/whatsnew/v0.20.3.txt +++ b/doc/source/whatsnew/v0.20.3.txt @@ -41,6 +41,8 @@ Bug Fixes - Fixed a pytest marker failing downstream packages' tests suites (:issue:`16680`) - Fixed compat with loading a ``DataFrame`` with a ``PeriodIndex``, from a ``format='fixed'`` HDFStore, in Python 3, that was written in Python 2 (:issue:`16781`) - Fixed a bug in failing to compute rolling computations of a column-MultiIndexed ``DataFrame`` (:issue:`16789`, :issue:`16825`) +- Bug in a DataFrame/Series with a ``TimedeltaIndex`` when slice indexing (:issue:`16637`) + Conversion ^^^^^^^^^^ diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index bfec1ec3ebe8c..2eebf3704253e 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -396,7 +396,10 @@ def is_timedelta64_dtype(arr_or_dtype): if arr_or_dtype is None: return False - tipo = _get_dtype_type(arr_or_dtype) + try: + tipo = _get_dtype_type(arr_or_dtype) + except ValueError: + return False return issubclass(tipo, np.timedelta64) diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index ab94a5bffb4f9..c025c74625972 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -680,8 +680,7 @@ def get_loc(self, key, method=None, tolerance=None): ------- loc : int """ - - if is_bool_indexer(key): + if is_bool_indexer(key) or is_timedelta64_dtype(key): raise TypeError if isnull(key): diff --git a/pandas/tests/dtypes/test_common.py b/pandas/tests/dtypes/test_common.py index 4633dde5ed537..ba510e68f9a21 100644 --- a/pandas/tests/dtypes/test_common.py +++ b/pandas/tests/dtypes/test_common.py @@ -200,10 +200,12 @@ def test_is_datetime64tz_dtype(): def test_is_timedelta64_dtype(): assert not com.is_timedelta64_dtype(object) assert not com.is_timedelta64_dtype([1, 2, 3]) - + assert not com.is_timedelta64_dtype(np.array([], dtype=np.datetime64)) assert com.is_timedelta64_dtype(np.timedelta64) assert com.is_timedelta64_dtype(pd.Series([], dtype="timedelta64[ns]")) + assert not com.is_timedelta64_dtype("0 days 00:00:00") + def test_is_period_dtype(): assert not com.is_period_dtype(object) diff --git a/pandas/tests/indexing/test_timedelta.py b/pandas/tests/indexing/test_timedelta.py index cf8cc6c2d345d..be3ea8f0c371d 100644 --- a/pandas/tests/indexing/test_timedelta.py +++ b/pandas/tests/indexing/test_timedelta.py @@ -1,3 +1,5 @@ +import pytest + import pandas as pd from pandas.util import testing as tm @@ -16,5 +18,25 @@ def test_boolean_indexing(self): result = df.assign(x=df.mask(cond, 10).astype('int64')) expected = pd.DataFrame(data, index=pd.to_timedelta(range(10), unit='s'), - columns=['x']) + columns=['x'], + dtype='int64') tm.assert_frame_equal(expected, result) + + @pytest.mark.parametrize( + "indexer, expected", + [(0, [20, 1, 2, 3, 4, 5, 6, 7, 8, 9]), + (slice(4, 8), [0, 1, 2, 3, 20, 20, 20, 20, 8, 9]), + ([3, 5], [0, 1, 2, 20, 4, 20, 6, 7, 8, 9])]) + def test_list_like_indexing(self, indexer, expected): + # GH 16637 + df = pd.DataFrame({'x': range(10)}, dtype="int64") + df.index = pd.to_timedelta(range(10), unit='s') + + df.loc[df.index[indexer], 'x'] = 20 + + expected = pd.DataFrame(expected, + index=pd.to_timedelta(range(10), unit='s'), + columns=['x'], + dtype="int64") + + tm.assert_frame_equal(expected, df) From 7d0a98e9bfb6c59d5f1738927732ddb80013b581 Mon Sep 17 00:00:00 2001 From: ri938 Date: Thu, 6 Jul 2017 13:37:11 +0100 Subject: [PATCH 726/933] Bug issue 16819 Index.get_indexer_not_unique inconsistent return types vs get_indexer (#16826) --- doc/source/whatsnew/v0.21.0.txt | 1 + pandas/core/groupby.py | 5 +++-- pandas/core/indexes/base.py | 7 +++---- pandas/tests/indexes/test_base.py | 11 +++++++++++ pandas/tests/indexes/test_category.py | 3 +-- 5 files changed, 19 insertions(+), 8 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index ce0d40d327c15..4b97fb83cb13b 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -53,6 +53,7 @@ Backwards incompatible API changes - :func:`read_csv` now treats ``'n/a'`` strings as missing values by default (:issue:`16078`) - :class:`pandas.HDFStore`'s string representation is now faster and less detailed. For the previous behavior, use ``pandas.HDFStore.info()``. (:issue:`16503`). - Compression defaults in HDF stores now follow pytable standards. Default is no compression and if ``complib`` is missing and ``complevel`` > 0 ``zlib`` is used (:issue:`15943`) +- ``Index.get_indexer_non_unique()`` now returns a ndarray indexer rather than an ``Index``; this is consistent with ``Index.get_indexer()`` (:issue:`16819`) .. _whatsnew_0210.api: diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index c4b3e25acae7e..daf3381ae4e89 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -896,8 +896,9 @@ def reset_identity(values): # we can't reindex, so we resort to this # GH 14776 if isinstance(ax, MultiIndex) and not ax.is_unique: - result = result.take(result.index.get_indexer_for( - ax.values).unique(), axis=self.axis) + indexer = algorithms.unique1d( + result.index.get_indexer_for(ax.values)) + result = result.take(indexer, axis=self.axis) else: result = result.reindex_axis(ax, axis=self.axis) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 695f9f119baa2..8a4878d9cfbcf 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -2256,8 +2256,8 @@ def intersection(self, other): indexer = indexer.take((indexer != -1).nonzero()[0]) except: # duplicates - indexer = Index(other._values).get_indexer_non_unique( - self._values)[0].unique() + indexer = algos.unique1d( + Index(other._values).get_indexer_non_unique(self._values)[0]) indexer = indexer[indexer != -1] taken = other.take(indexer) @@ -2704,7 +2704,7 @@ def get_indexer_non_unique(self, target): tgt_values = target._values indexer, missing = self._engine.get_indexer_non_unique(tgt_values) - return Index(indexer), missing + return indexer, missing def get_indexer_for(self, target, **kwargs): """ @@ -2942,7 +2942,6 @@ def _reindex_non_unique(self, target): else: # need to retake to have the same size as the indexer - indexer = indexer.values indexer[~check] = 0 # reset the new indexer to account for the new size diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 18dbe6624008a..7a81a125467d5 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -1131,6 +1131,17 @@ def test_get_indexer_strings(self): with pytest.raises(TypeError): idx.get_indexer(['a', 'b', 'c', 'd'], method='pad', tolerance=2) + def test_get_indexer_consistency(self): + # See GH 16819 + for name, index in self.indices.items(): + indexer = index.get_indexer(index[0:2]) + assert isinstance(indexer, np.ndarray) + assert indexer.dtype == np.intp + + indexer, _ = index.get_indexer_non_unique(index[0:2]) + assert isinstance(indexer, np.ndarray) + assert indexer.dtype == np.intp + def test_get_loc(self): idx = pd.Index([0, 1, 2]) all_methods = [None, 'pad', 'backfill', 'nearest'] diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py index 4e4f9b29f9a4c..493274fff43e0 100644 --- a/pandas/tests/indexes/test_category.py +++ b/pandas/tests/indexes/test_category.py @@ -386,8 +386,7 @@ def test_reindexing(self): expected = oidx.get_indexer_non_unique(finder)[0] actual = ci.get_indexer(finder) - tm.assert_numpy_array_equal( - expected.values, actual, check_dtype=False) + tm.assert_numpy_array_equal(expected, actual) def test_reindex_dtype(self): c = CategoricalIndex(['a', 'b', 'c', 'a']) From 1c3752334dcec4666bb7e6d51fb718a7674849ed Mon Sep 17 00:00:00 2001 From: DSM Date: Thu, 6 Jul 2017 18:42:11 -0400 Subject: [PATCH 727/933] TST: Verify that float columns stay float after pivot (#7142) (#16815) --- pandas/tests/reshape/test_pivot.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index fc5a2eb468d4f..9881ab72f3ef5 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -167,6 +167,24 @@ def test_pivot_dtypes(self): expected = Series(dict(float64=2)) tm.assert_series_equal(result, expected) + @pytest.mark.parametrize('columns,values', + [('bool1', ['float1', 'float2']), + ('bool1', ['float1', 'float2', 'bool1']), + ('bool2', ['float1', 'float2', 'bool1'])]) + def test_pivot_preserve_dtypes(self, columns, values): + # GH 7142 regression test + v = np.arange(5, dtype=np.float64) + df = DataFrame({'float1': v, 'float2': v + 2.0, + 'bool1': v <= 2, 'bool2': v <= 3}) + + df_res = df.reset_index().pivot_table( + index='index', columns=columns, values=values) + + result = dict(df_res.dtypes) + expected = {col: np.dtype('O') if col[0].startswith('b') + else np.dtype('float64') for col in df_res} + assert result == expected + def test_pivot_no_values(self): # GH 14380 idx = pd.DatetimeIndex(['2011-01-01', '2011-02-01', '2011-01-02', From 8d197ba63128a4cdbbb8627e48e7f1b4f150330c Mon Sep 17 00:00:00 2001 From: gfyoung Date: Thu, 6 Jul 2017 15:46:00 -0700 Subject: [PATCH 728/933] BUG/MAINT: Change default of inplace to False in pd.eval (#16732) --- doc/source/whatsnew/v0.21.0.txt | 48 +++++++++++++++ pandas/core/computation/eval.py | 87 ++++++++++++++++++--------- pandas/core/frame.py | 13 ++-- pandas/tests/computation/test_eval.py | 50 ++++++++++++--- 4 files changed, 150 insertions(+), 48 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 4b97fb83cb13b..95eab9e3b684f 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -45,6 +45,52 @@ Other Enhancements Backwards incompatible API changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Improved error handling during item assignment in pd.eval +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. _whatsnew_0210.api_breaking.pandas_eval: + +:func:`eval` will now raise a ``ValueError`` when item assignment malfunctions, or +inplace operations are specified, but there is no item assignment in the expression (:issue:`16732`) + +.. ipython:: python + + arr = np.array([1, 2, 3]) + +Previously, if you attempted the following expression, you would get a not very helpful error message: + +.. code-block:: ipython + + In [3]: pd.eval("a = 1 + 2", target=arr, inplace=True) + ... + IndexError: only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) + and integer or boolean arrays are valid indices + +This is a very long way of saying numpy arrays don't support string-item indexing. With this +change, the error message is now this: + +.. code-block:: python + + In [3]: pd.eval("a = 1 + 2", target=arr, inplace=True) + ... + ValueError: Cannot assign expression output to target + +It also used to be possible to evaluate expressions inplace, even if there was no item assignment: + +.. code-block:: ipython + + In [4]: pd.eval("1 + 2", target=arr, inplace=True) + Out[4]: 3 + +However, this input does not make much sense because the output is not being assigned to +the target. Now, a ``ValueError`` will be raised when such an input is passed in: + +.. code-block:: ipython + + In [4]: pd.eval("1 + 2", target=arr, inplace=True) + ... + ValueError: Cannot operate inplace if there is no assignment + - Support has been dropped for Python 3.4 (:issue:`15251`) - The Categorical constructor no longer accepts a scalar for the ``categories`` keyword. (:issue:`16022`) - Accessing a non-existent attribute on a closed :class:`HDFStore` will now @@ -79,6 +125,7 @@ Removal of prior version deprecations/changes - The ``pd.options.display.mpl_style`` configuration has been dropped (:issue:`12190`) - ``Index`` has dropped the ``.sym_diff()`` method in favor of ``.symmetric_difference()`` (:issue:`12591`) - ``Categorical`` has dropped the ``.order()`` and ``.sort()`` methods in favor of ``.sort_values()`` (:issue:`12882`) +- :func:`eval` and :method:`DataFrame.eval` have changed the default of ``inplace`` from ``None`` to ``False`` (:issue:`11149`) .. _whatsnew_0210.performance: @@ -145,3 +192,4 @@ Categorical Other ^^^^^ +- Bug in :func:`eval` where the ``inplace`` parameter was being incorrectly handled (:issue:`16732`) diff --git a/pandas/core/computation/eval.py b/pandas/core/computation/eval.py index 22e376306280a..ef15e886fd554 100644 --- a/pandas/core/computation/eval.py +++ b/pandas/core/computation/eval.py @@ -3,7 +3,6 @@ """Top level ``eval`` module. """ -import warnings import tokenize from pandas.io.formats.printing import pprint_thing from pandas.core.computation import _NUMEXPR_INSTALLED @@ -148,7 +147,7 @@ def _check_for_locals(expr, stack_level, parser): def eval(expr, parser='pandas', engine=None, truediv=True, local_dict=None, global_dict=None, resolvers=(), level=0, - target=None, inplace=None): + target=None, inplace=False): """Evaluate a Python expression as a string using various backends. The following arithmetic operations are supported: ``+``, ``-``, ``*``, @@ -205,20 +204,40 @@ def eval(expr, parser='pandas', engine=None, truediv=True, level : int, optional The number of prior stack frames to traverse and add to the current scope. Most users will **not** need to change this parameter. - target : a target object for assignment, optional, default is None - essentially this is a passed in resolver - inplace : bool, default True - If expression mutates, whether to modify object inplace or return - copy with mutation. - - WARNING: inplace=None currently falls back to to True, but - in a future version, will default to False. Use inplace=True - explicitly rather than relying on the default. + target : object, optional, default None + This is the target object for assignment. It is used when there is + variable assignment in the expression. If so, then `target` must + support item assignment with string keys, and if a copy is being + returned, it must also support `.copy()`. + inplace : bool, default False + If `target` is provided, and the expression mutates `target`, whether + to modify `target` inplace. Otherwise, return a copy of `target` with + the mutation. Returns ------- ndarray, numeric scalar, DataFrame, Series + Raises + ------ + ValueError + There are many instances where such an error can be raised: + + - `target=None`, but the expression is multiline. + - The expression is multiline, but not all them have item assignment. + An example of such an arrangement is this: + + a = b + 1 + a + 2 + + Here, there are expressions on different lines, making it multiline, + but the last line has no variable assigned to the output of `a + 2`. + - `inplace=True`, but the expression is missing item assignment. + - Item assignment is provided, but the `target` does not support + string item assignment. + - Item assignment is provided and `inplace=False`, but the `target` + does not support the `.copy()` method + Notes ----- The ``dtype`` of any objects involved in an arithmetic ``%`` operation are @@ -232,8 +251,9 @@ def eval(expr, parser='pandas', engine=None, truediv=True, pandas.DataFrame.query pandas.DataFrame.eval """ - inplace = validate_bool_kwarg(inplace, 'inplace') - first_expr = True + + inplace = validate_bool_kwarg(inplace, "inplace") + if isinstance(expr, string_types): _check_expression(expr) exprs = [e.strip() for e in expr.splitlines() if e.strip() != ''] @@ -245,7 +265,10 @@ def eval(expr, parser='pandas', engine=None, truediv=True, raise ValueError("multi-line expressions are only valid in the " "context of data, use DataFrame.eval") + ret = None first_expr = True + target_modified = False + for expr in exprs: expr = _convert_expression(expr) engine = _check_engine(engine) @@ -266,28 +289,33 @@ def eval(expr, parser='pandas', engine=None, truediv=True, eng_inst = eng(parsed_expr) ret = eng_inst.evaluate() - if parsed_expr.assigner is None and multi_line: - raise ValueError("Multi-line expressions are only valid" - " if all expressions contain an assignment") + if parsed_expr.assigner is None: + if multi_line: + raise ValueError("Multi-line expressions are only valid" + " if all expressions contain an assignment") + elif inplace: + raise ValueError("Cannot operate inplace " + "if there is no assignment") # assign if needed if env.target is not None and parsed_expr.assigner is not None: - if inplace is None: - warnings.warn( - "eval expressions containing an assignment currently" - "default to operating inplace.\nThis will change in " - "a future version of pandas, use inplace=True to " - "avoid this warning.", - FutureWarning, stacklevel=3) - inplace = True + target_modified = True # if returning a copy, copy only on the first assignment if not inplace and first_expr: - target = env.target.copy() + try: + target = env.target.copy() + except AttributeError: + raise ValueError("Cannot return a copy of the target") else: target = env.target - target[parsed_expr.assigner] = ret + # TypeError is most commonly raised (e.g. int, list), but you + # get IndexError if you try to do this assignment on np.ndarray. + try: + target[parsed_expr.assigner] = ret + except (TypeError, IndexError): + raise ValueError("Cannot assign expression output to target") if not resolvers: resolvers = ({parsed_expr.assigner: ret},) @@ -304,7 +332,6 @@ def eval(expr, parser='pandas', engine=None, truediv=True, ret = None first_expr = False - if not inplace and inplace is not None: - return target - - return ret + # We want to exclude `inplace=None` as being False. + if inplace is False: + return target if target_modified else ret diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 2b2e7be62427b..80cdebc24c39d 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2224,7 +2224,7 @@ def query(self, expr, inplace=False, **kwargs): else: return new_data - def eval(self, expr, inplace=None, **kwargs): + def eval(self, expr, inplace=False, **kwargs): """Evaluate an expression in the context of the calling DataFrame instance. @@ -2232,13 +2232,10 @@ def eval(self, expr, inplace=None, **kwargs): ---------- expr : string The expression string to evaluate. - inplace : bool - If the expression contains an assignment, whether to return a new - DataFrame or mutate the existing. - - WARNING: inplace=None currently falls back to to True, but - in a future version, will default to False. Use inplace=True - explicitly rather than relying on the default. + inplace : bool, default False + If the expression contains an assignment, whether to perform the + operation inplace and mutate the existing DataFrame. Otherwise, + a new DataFrame is returned. .. versionadded:: 0.18.0 diff --git a/pandas/tests/computation/test_eval.py b/pandas/tests/computation/test_eval.py index 89ab4531877a4..589f612802fb9 100644 --- a/pandas/tests/computation/test_eval.py +++ b/pandas/tests/computation/test_eval.py @@ -1311,14 +1311,6 @@ def assignment_not_inplace(self): expected['c'] = expected['a'] + expected['b'] tm.assert_frame_equal(df, expected) - # Default for inplace will change - with tm.assert_produces_warnings(FutureWarning): - df.eval('c = a + b') - - # but don't warn without assignment - with tm.assert_produces_warnings(None): - df.eval('a + b') - def test_multi_line_expression(self): # GH 11149 df = pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}) @@ -1388,14 +1380,52 @@ def test_assignment_in_query(self): df.query('a = 1') assert_frame_equal(df, df_orig) - def query_inplace(self): - # GH 11149 + def test_query_inplace(self): + # see gh-11149 df = pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}) expected = df.copy() expected = expected[expected['a'] == 2] df.query('a == 2', inplace=True) assert_frame_equal(expected, df) + df = {} + expected = {"a": 3} + + self.eval("a = 1 + 2", target=df, inplace=True) + tm.assert_dict_equal(df, expected) + + @pytest.mark.parametrize("invalid_target", [1, "cat", [1, 2], + np.array([]), (1, 3)]) + def test_cannot_item_assign(self, invalid_target): + msg = "Cannot assign expression output to target" + expression = "a = 1 + 2" + + with tm.assert_raises_regex(ValueError, msg): + self.eval(expression, target=invalid_target, inplace=True) + + if hasattr(invalid_target, "copy"): + with tm.assert_raises_regex(ValueError, msg): + self.eval(expression, target=invalid_target, inplace=False) + + @pytest.mark.parametrize("invalid_target", [1, "cat", (1, 3)]) + def test_cannot_copy_item(self, invalid_target): + msg = "Cannot return a copy of the target" + expression = "a = 1 + 2" + + with tm.assert_raises_regex(ValueError, msg): + self.eval(expression, target=invalid_target, inplace=False) + + @pytest.mark.parametrize("target", [1, "cat", [1, 2], + np.array([]), (1, 3), {1: 2}]) + def test_inplace_no_assignment(self, target): + expression = "1 + 2" + + assert self.eval(expression, target=target, inplace=False) == 3 + + msg = "Cannot operate inplace if there is no assignment" + with tm.assert_raises_regex(ValueError, msg): + self.eval(expression, target=target, inplace=True) + def test_basic_period_index_boolean_expression(self): df = mkdf(2, 2, data_gen_f=f, c_idx_type='p', r_idx_type='i') From 5cc1025a78ba316ea058ad6ea70a2104cc05345a Mon Sep 17 00:00:00 2001 From: chris-b1 Date: Fri, 7 Jul 2017 05:17:15 -0500 Subject: [PATCH 729/933] BUG: kind parameter on categorical argsort (#16834) --- doc/source/whatsnew/v0.20.3.txt | 1 + pandas/compat/numpy/function.py | 10 +++++++++- pandas/core/categorical.py | 4 ++-- pandas/core/sorting.py | 2 +- pandas/tests/frame/test_sorting.py | 9 +++++++++ pandas/tests/test_categorical.py | 5 ++--- 6 files changed, 24 insertions(+), 7 deletions(-) diff --git a/doc/source/whatsnew/v0.20.3.txt b/doc/source/whatsnew/v0.20.3.txt index ce7e8be16d8e2..77b3e3bd25740 100644 --- a/doc/source/whatsnew/v0.20.3.txt +++ b/doc/source/whatsnew/v0.20.3.txt @@ -94,6 +94,7 @@ Numeric Categorical ^^^^^^^^^^^ +- Bug in ``DataFrame.sort_values`` not respecting the ``kind`` with categorical data (:issue:`16793`) Other ^^^^^ diff --git a/pandas/compat/numpy/function.py b/pandas/compat/numpy/function.py index a324bf94171ce..ccbd3d9704e0c 100644 --- a/pandas/compat/numpy/function.py +++ b/pandas/compat/numpy/function.py @@ -107,6 +107,14 @@ def validate_argmax_with_skipna(skipna, args, kwargs): validate_argsort = CompatValidator(ARGSORT_DEFAULTS, fname='argsort', max_fname_arg_count=0, method='both') +# two different signatures of argsort, this second validation +# for when the `kind` param is supported +ARGSORT_DEFAULTS_KIND = OrderedDict() +ARGSORT_DEFAULTS_KIND['axis'] = -1 +ARGSORT_DEFAULTS_KIND['order'] = None +validate_argsort_kind = CompatValidator(ARGSORT_DEFAULTS_KIND, fname='argsort', + max_fname_arg_count=0, method='both') + def validate_argsort_with_ascending(ascending, args, kwargs): """ @@ -121,7 +129,7 @@ def validate_argsort_with_ascending(ascending, args, kwargs): args = (ascending,) + args ascending = True - validate_argsort(args, kwargs, max_fname_arg_count=1) + validate_argsort_kind(args, kwargs, max_fname_arg_count=3) return ascending diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index 796b2696af9ce..afae11163b0dc 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -1288,7 +1288,7 @@ def check_for_ordered(self, op): "you can use .as_ordered() to change the " "Categorical to an ordered one\n".format(op=op)) - def argsort(self, ascending=True, *args, **kwargs): + def argsort(self, ascending=True, kind='quicksort', *args, **kwargs): """ Returns the indices that would sort the Categorical instance if 'sort_values' was called. This function is implemented to provide @@ -1309,7 +1309,7 @@ def argsort(self, ascending=True, *args, **kwargs): numpy.ndarray.argsort """ ascending = nv.validate_argsort_with_ascending(ascending, args, kwargs) - result = np.argsort(self._codes.copy(), **kwargs) + result = np.argsort(self._codes.copy(), kind=kind, **kwargs) if not ascending: result = result[::-1] return result diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py index 69b427df981b7..10b80cbc3483d 100644 --- a/pandas/core/sorting.py +++ b/pandas/core/sorting.py @@ -233,7 +233,7 @@ def nargsort(items, kind='quicksort', ascending=True, na_position='last'): # specially handle Categorical if is_categorical_dtype(items): - return items.argsort(ascending=ascending) + return items.argsort(ascending=ascending, kind=kind) items = np.asanyarray(items) idx = np.arange(len(items)) diff --git a/pandas/tests/frame/test_sorting.py b/pandas/tests/frame/test_sorting.py index 98f7f82c0ace7..891c94b59074a 100644 --- a/pandas/tests/frame/test_sorting.py +++ b/pandas/tests/frame/test_sorting.py @@ -238,6 +238,15 @@ def test_stable_descending_multicolumn_sort(self): kind='mergesort') assert_frame_equal(sorted_df, expected) + def test_stable_categorial(self): + # GH 16793 + df = DataFrame({ + 'x': pd.Categorical(np.repeat([1, 2, 3, 4], 5), ordered=True) + }) + expected = df.copy() + sorted_df = df.sort_values('x', kind='mergesort') + assert_frame_equal(sorted_df, expected) + def test_sort_datetimes(self): # GH 3461, argsort / lexsort differences for a datetime column diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py index 92177ca07d835..667b26c24c662 100644 --- a/pandas/tests/test_categorical.py +++ b/pandas/tests/test_categorical.py @@ -585,9 +585,8 @@ def test_numpy_argsort(self): tm.assert_numpy_array_equal(np.argsort(c), expected, check_dtype=False) - msg = "the 'kind' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, np.argsort, - c, kind='mergesort') + tm.assert_numpy_array_equal(np.argsort(c, kind='mergesort'), expected, + check_dtype=False) msg = "the 'axis' parameter is not supported" tm.assert_raises_regex(ValueError, msg, np.argsort, From f6edaeffb3de7f9787d049c67d90bf83442864bb Mon Sep 17 00:00:00 2001 From: manikbhandari Date: Fri, 7 Jul 2017 15:48:13 +0530 Subject: [PATCH 730/933] =?UTF-8?q?DOC:=20Updated=20cookbook=20to=20show?= =?UTF-8?q?=20usage=20of=20Grouper=20instead=20of=20TimeGrouper=E2=80=A6?= =?UTF-8?q?=20(#16794)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- doc/source/cookbook.rst | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/doc/source/cookbook.rst b/doc/source/cookbook.rst index 62aa487069132..32e7a616fe856 100644 --- a/doc/source/cookbook.rst +++ b/doc/source/cookbook.rst @@ -776,11 +776,17 @@ Resampling The :ref:`Resample ` docs. -`TimeGrouping of values grouped across time -`__ +`Using Grouper instead of TimeGrouper for time grouping of values +`__ -`TimeGrouping #2 -`__ +`Time grouping with some missing values +`__ + +`Valid frequency arguments to Grouper +`__ + +`Grouping using a MultiIndex +`__ `Using TimeGrouper and another grouping to create subgroups, then apply a custom function `__ From 500cd0f1935d89c64d93f42cbf373a1209870600 Mon Sep 17 00:00:00 2001 From: Douglas Rudd Date: Fri, 7 Jul 2017 03:25:36 -0700 Subject: [PATCH 731/933] BUG: allow empty multiindex (fixes .isin regression, GH16777) (#16782) --- doc/source/whatsnew/v0.20.3.txt | 1 + pandas/core/indexes/multi.py | 14 ++++++++------ pandas/tests/indexes/test_multi.py | 7 +++++++ 3 files changed, 16 insertions(+), 6 deletions(-) diff --git a/doc/source/whatsnew/v0.20.3.txt b/doc/source/whatsnew/v0.20.3.txt index 77b3e3bd25740..3d6aba98d4d57 100644 --- a/doc/source/whatsnew/v0.20.3.txt +++ b/doc/source/whatsnew/v0.20.3.txt @@ -55,6 +55,7 @@ Indexing ^^^^^^^^ - Bug in ``Float64Index`` causing an empty array instead of ``None`` to be returned from ``.get(np.nan)`` on a Series whose index did not contain any ``NaN`` s (:issue:`8569`) +- Bug in ``MultiIndex.isin`` causing an error when passing an empty iterable (:issue:`16777`) I/O ^^^ diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index ee18263cca6ab..81eac0ac0684f 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -1129,10 +1129,11 @@ def from_tuples(cls, tuples, sortorder=None, names=None): of iterables """ if len(tuples) == 0: - # I think this is right? Not quite sure... - raise TypeError('Cannot infer number of levels from empty list') - - if isinstance(tuples, (np.ndarray, Index)): + if names is None: + msg = 'Cannot infer number of levels from empty list' + raise TypeError(msg) + arrays = [[]] * len(names) + elif isinstance(tuples, (np.ndarray, Index)): if isinstance(tuples, Index): tuples = tuples._values @@ -2621,8 +2622,9 @@ def _wrap_joined_index(self, joined, other): @Appender(Index.isin.__doc__) def isin(self, values, level=None): if level is None: - return algos.isin(self.values, - MultiIndex.from_tuples(values).values) + values = MultiIndex.from_tuples(values, + names=self.names).values + return algos.isin(self.values, values) else: num = self._get_level_number(level) levs = self.levels[num] diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index ef8806246c2c5..719cd2f7e01a4 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -1720,6 +1720,13 @@ def test_from_tuples(self): idx = MultiIndex.from_tuples(((1, 2), (3, 4)), names=['a', 'b']) assert len(idx) == 2 + def test_from_tuples_empty(self): + # GH 16777 + result = MultiIndex.from_tuples([], names=['a', 'b']) + expected = MultiIndex.from_arrays(arrays=[[], []], + names=['a', 'b']) + tm.assert_index_equal(result, expected) + def test_argsort(self): result = self.index.argsort() expected = self.index.values.argsort() From e9e434d9ab522496ab1a6c72dbdd057c9c9e5386 Mon Sep 17 00:00:00 2001 From: ian Date: Fri, 7 Jul 2017 08:16:55 -0400 Subject: [PATCH 732/933] BUG: fix missing sort keyword for PeriodIndex.join (#16586) --- doc/source/whatsnew/v0.20.3.txt | 2 +- pandas/core/indexes/base.py | 2 +- pandas/core/indexes/period.py | 6 +++-- pandas/core/indexes/timedeltas.py | 6 +++-- pandas/tests/frame/test_join.py | 28 +++++++++++++++++++++- pandas/tests/indexes/common.py | 9 ++++++- pandas/tests/indexes/period/test_period.py | 6 +++++ 7 files changed, 51 insertions(+), 8 deletions(-) diff --git a/doc/source/whatsnew/v0.20.3.txt b/doc/source/whatsnew/v0.20.3.txt index 3d6aba98d4d57..c11c74211030a 100644 --- a/doc/source/whatsnew/v0.20.3.txt +++ b/doc/source/whatsnew/v0.20.3.txt @@ -83,7 +83,7 @@ Sparse Reshaping ^^^^^^^^^ - +- ``PeriodIndex`` / ``TimedeltaIndex.join`` was missing the ``sort=`` kwarg (:issue:`16541`) - Bug in joining on a ``MultiIndex`` with a ``category`` dtype for a level (:issue:`16627`). - Bug in :func:`merge` when merging/joining with multiple categorical columns (:issue:`16767`) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 8a4878d9cfbcf..cefb080a3ee78 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -3126,7 +3126,7 @@ def _join_non_unique(self, other, how='left', return_indexers=False): left_idx = _ensure_platform_int(left_idx) right_idx = _ensure_platform_int(right_idx) - join_index = self.values.take(left_idx) + join_index = np.asarray(self.values.take(left_idx)) mask = left_idx == -1 np.putmask(join_index, mask, other._values.take(right_idx)) diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 9d1a49e13c804..0915462d4d421 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -912,14 +912,16 @@ def insert(self, loc, item): self[loc:].asi8)) return self._shallow_copy(idx) - def join(self, other, how='left', level=None, return_indexers=False): + def join(self, other, how='left', level=None, return_indexers=False, + sort=False): """ See Index.join """ self._assert_can_do_setop(other) result = Int64Index.join(self, other, how=how, level=level, - return_indexers=return_indexers) + return_indexers=return_indexers, + sort=sort) if return_indexers: result, lidx, ridx = result diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index c025c74625972..faec813df3993 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -516,7 +516,8 @@ def union(self, other): result.freq = to_offset(result.inferred_freq) return result - def join(self, other, how='left', level=None, return_indexers=False): + def join(self, other, how='left', level=None, return_indexers=False, + sort=False): """ See Index.join """ @@ -527,7 +528,8 @@ def join(self, other, how='left', level=None, return_indexers=False): pass return Index.join(self, other, how=how, level=level, - return_indexers=return_indexers) + return_indexers=return_indexers, + sort=sort) def _wrap_joined_index(self, joined, other): name = self.name if self.name == other.name else None diff --git a/pandas/tests/frame/test_join.py b/pandas/tests/frame/test_join.py index 21807cb42aa6e..afecba2026dd7 100644 --- a/pandas/tests/frame/test_join.py +++ b/pandas/tests/frame/test_join.py @@ -3,11 +3,19 @@ import pytest import numpy as np -from pandas import DataFrame, Index +from pandas import DataFrame, Index, PeriodIndex from pandas.tests.frame.common import TestData import pandas.util.testing as tm +@pytest.fixture +def frame_with_period_index(): + return DataFrame( + data=np.arange(20).reshape(4, 5), + columns=list('abcde'), + index=PeriodIndex(start='2000', freq='A', periods=4)) + + @pytest.fixture def frame(): return TestData().frame @@ -139,3 +147,21 @@ def test_join_overlap(frame): # column order not necessarily sorted tm.assert_frame_equal(joined, expected.loc[:, joined.columns]) + + +def test_join_period_index(frame_with_period_index): + other = frame_with_period_index.rename( + columns=lambda x: '{key}{key}'.format(key=x)) + + joined_values = np.concatenate( + [frame_with_period_index.values] * 2, axis=1) + + joined_cols = frame_with_period_index.columns.append(other.columns) + + joined = frame_with_period_index.join(other) + expected = DataFrame( + data=joined_values, + columns=joined_cols, + index=frame_with_period_index.index) + + tm.assert_frame_equal(joined, expected) diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index a6177104d6273..e7ca435dca1c1 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -905,7 +905,7 @@ def test_fillna(self): def test_nulls(self): # this is really a smoke test for the methods - # as these are adequantely tested for function elsewhere + # as these are adequately tested for function elsewhere for name, index in self.indices.items(): if len(index) == 0: @@ -933,3 +933,10 @@ def test_empty(self): index = self.create_index() assert not index.empty assert index[:0].empty + + @pytest.mark.parametrize('how', ['outer', 'inner', 'left', 'right']) + def test_join_self_unique(self, how): + index = self.create_index() + if index.is_unique: + joined = index.join(index, how=how) + assert (index == joined).all() diff --git a/pandas/tests/indexes/period/test_period.py b/pandas/tests/indexes/period/test_period.py index 6f73e7c15e4d9..291ca317f8fae 100644 --- a/pandas/tests/indexes/period/test_period.py +++ b/pandas/tests/indexes/period/test_period.py @@ -773,3 +773,9 @@ def test_map(self): result = index.map(lambda x: x.ordinal) exp = Index([x.ordinal for x in index]) tm.assert_index_equal(result, exp) + + @pytest.mark.parametrize('how', ['outer', 'inner', 'left', 'right']) + def test_join_self(self, how): + index = period_range('1/1/2000', periods=10) + joined = index.join(index, how=how) + assert index is joined From d8cd9ca5ac96b4edbb8f47c6b734c8f2513d5f01 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Fri, 7 Jul 2017 09:10:32 -0400 Subject: [PATCH 733/933] COMPAT: 32-bit compat for testing of indexers (#16849) xref #16826 --- pandas/core/indexes/category.py | 4 ++-- pandas/tests/indexes/common.py | 14 ++++++++++++++ pandas/tests/indexes/test_base.py | 11 ----------- 3 files changed, 16 insertions(+), 13 deletions(-) diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index d13636e8b43e2..c9e0e3b10875c 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -497,7 +497,6 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None): codes = self.categories.get_indexer(target) indexer, _ = self._engine.get_indexer_non_unique(codes) - return _ensure_platform_int(indexer) @Appender(_index_shared_docs['get_indexer_non_unique'] % _index_doc_kwargs) @@ -508,7 +507,8 @@ def get_indexer_non_unique(self, target): target = target.categories codes = self.categories.get_indexer(target) - return self._engine.get_indexer_non_unique(codes) + indexer, missing = self._engine.get_indexer_non_unique(codes) + return _ensure_platform_int(indexer), missing @Appender(_index_shared_docs['_convert_scalar_indexer']) def _convert_scalar_indexer(self, key, kind=None): diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index e7ca435dca1c1..1513a1c690014 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -132,6 +132,20 @@ def test_reindex_base(self): with tm.assert_raises_regex(ValueError, 'Invalid fill method'): idx.get_indexer(idx, method='invalid') + def test_get_indexer_consistency(self): + # See GH 16819 + for name, index in self.indices.items(): + if isinstance(index, IntervalIndex): + continue + + indexer = index.get_indexer(index[0:2]) + assert isinstance(indexer, np.ndarray) + assert indexer.dtype == np.intp + + indexer, _ = index.get_indexer_non_unique(index[0:2]) + assert isinstance(indexer, np.ndarray) + assert indexer.dtype == np.intp + def test_ndarray_compat_properties(self): idx = self.create_index() assert idx.T.equals(idx) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 7a81a125467d5..18dbe6624008a 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -1131,17 +1131,6 @@ def test_get_indexer_strings(self): with pytest.raises(TypeError): idx.get_indexer(['a', 'b', 'c', 'd'], method='pad', tolerance=2) - def test_get_indexer_consistency(self): - # See GH 16819 - for name, index in self.indices.items(): - indexer = index.get_indexer(index[0:2]) - assert isinstance(indexer, np.ndarray) - assert indexer.dtype == np.intp - - indexer, _ = index.get_indexer_non_unique(index[0:2]) - assert isinstance(indexer, np.ndarray) - assert indexer.dtype == np.intp - def test_get_loc(self): idx = pd.Index([0, 1, 2]) all_methods = [None, 'pad', 'backfill', 'nearest'] From e832ddfc8bb6362a465de18ccdd25a42585ba2bd Mon Sep 17 00:00:00 2001 From: Adam Gleave Date: Fri, 7 Jul 2017 14:11:46 +0100 Subject: [PATCH 734/933] BUG: fix infer frequency for business daily (#16683) --- asv_bench/benchmarks/timeseries.py | 16 ++++++++++++--- doc/source/whatsnew/v0.20.3.txt | 1 - doc/source/whatsnew/v0.21.0.txt | 2 +- .../indexes/timedeltas/test_timedelta.py | 20 +++++++++++++------ pandas/tests/tseries/test_frequencies.py | 7 ++++++- pandas/tseries/frequencies.py | 16 +++++++++++++-- 6 files changed, 48 insertions(+), 14 deletions(-) diff --git a/asv_bench/benchmarks/timeseries.py b/asv_bench/benchmarks/timeseries.py index f5ea4d7875931..efe713639fec9 100644 --- a/asv_bench/benchmarks/timeseries.py +++ b/asv_bench/benchmarks/timeseries.py @@ -53,7 +53,11 @@ def setup(self): self.rng6 = date_range(start='1/1/1', periods=self.N, freq='B') self.rng7 = date_range(start='1/1/1700', freq='D', periods=100000) - self.a = self.rng7[:50000].append(self.rng7[50002:]) + self.no_freq = self.rng7[:50000].append(self.rng7[50002:]) + self.d_freq = self.rng7[:50000].append(self.rng7[50000:]) + + self.rng8 = date_range(start='1/1/1700', freq='B', periods=100000) + self.b_freq = self.rng8[:50000].append(self.rng8[50000:]) def time_add_timedelta(self): (self.rng + dt.timedelta(minutes=2)) @@ -94,8 +98,14 @@ def time_infer_dst(self): def time_timeseries_is_month_start(self): self.rng6.is_month_start - def time_infer_freq(self): - infer_freq(self.a) + def time_infer_freq_none(self): + infer_freq(self.no_freq) + + def time_infer_freq_daily(self): + infer_freq(self.d_freq) + + def time_infer_freq_business(self): + infer_freq(self.b_freq) class TimeDatetimeConverter(object): diff --git a/doc/source/whatsnew/v0.20.3.txt b/doc/source/whatsnew/v0.20.3.txt index c11c74211030a..644a3047ae7a9 100644 --- a/doc/source/whatsnew/v0.20.3.txt +++ b/doc/source/whatsnew/v0.20.3.txt @@ -74,7 +74,6 @@ Groupby/Resample/Rolling ^^^^^^^^^^^^^^^^^^^^^^^^ - Sparse ^^^^^^ diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 95eab9e3b684f..d5cc3d6ddca8e 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -169,7 +169,7 @@ Groupby/Resample/Rolling ^^^^^^^^^^^^^^^^^^^^^^^^ - Bug in ``DataFrame.resample().size()`` where an empty ``DataFrame`` did not return a ``Series`` (:issue:`14962`) - +- Bug in ``infer_freq`` causing indices with 2-day gaps during the working week to be wrongly inferred as business daily (:issue:`16624`) Sparse ^^^^^^ diff --git a/pandas/tests/indexes/timedeltas/test_timedelta.py b/pandas/tests/indexes/timedeltas/test_timedelta.py index 79fe0a864f246..08cf5108ffdb1 100644 --- a/pandas/tests/indexes/timedeltas/test_timedelta.py +++ b/pandas/tests/indexes/timedeltas/test_timedelta.py @@ -564,15 +564,23 @@ def test_freq_conversion(self): class TestSlicing(object): + @pytest.mark.parametrize('freq', ['B', 'D']) + def test_timedelta(self, freq): + index = date_range('1/1/2000', periods=50, freq=freq) - def test_timedelta(self): - # this is valid too - index = date_range('1/1/2000', periods=50, freq='B') shifted = index + timedelta(1) back = shifted + timedelta(-1) - assert tm.equalContents(index, back) - assert shifted.freq == index.freq - assert shifted.freq == back.freq + tm.assert_index_equal(index, back) + + if freq == 'D': + expected = pd.tseries.offsets.Day(1) + assert index.freq == expected + assert shifted.freq == expected + assert back.freq == expected + else: # freq == 'B' + assert index.freq == pd.tseries.offsets.BusinessDay(1) + assert shifted.freq is None + assert back.freq == pd.tseries.offsets.BusinessDay(1) result = index - timedelta(1) expected = index + timedelta(-1) diff --git a/pandas/tests/tseries/test_frequencies.py b/pandas/tests/tseries/test_frequencies.py index 2edca1bd4676b..54d12317b0bf8 100644 --- a/pandas/tests/tseries/test_frequencies.py +++ b/pandas/tests/tseries/test_frequencies.py @@ -504,9 +504,14 @@ def test_raise_if_too_few(self): pytest.raises(ValueError, frequencies.infer_freq, index) def test_business_daily(self): - index = _dti(['12/31/1998', '1/3/1999', '1/4/1999']) + index = _dti(['01/01/1999', '1/4/1999', '1/5/1999']) assert frequencies.infer_freq(index) == 'B' + def test_business_daily_look_alike(self): + # GH 16624, do not infer 'B' when 'weekend' (2-day gap) in wrong place + index = _dti(['12/31/1998', '1/3/1999', '1/4/1999']) + assert frequencies.infer_freq(index) is None + def test_day(self): self._check_tick(timedelta(1), 'D') diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index dddf835424f67..8640f106a048a 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -975,8 +975,7 @@ def _infer_daily_rule(self): else: return _maybe_add_count('D', days) - # Business daily. Maybe - if self.day_deltas == [1, 3]: + if self._is_business_daily(): return 'B' wom_rule = self._get_wom_rule() @@ -1012,6 +1011,19 @@ def _get_monthly_rule(self): return {'cs': 'MS', 'bs': 'BMS', 'ce': 'M', 'be': 'BM'}.get(pos_check) + def _is_business_daily(self): + # quick check: cannot be business daily + if self.day_deltas != [1, 3]: + return False + + # probably business daily, but need to confirm + first_weekday = self.index[0].weekday() + shifts = np.diff(self.index.asi8) + shifts = np.floor_divide(shifts, _ONE_DAY) + weekdays = np.mod(first_weekday + np.cumsum(shifts), 7) + return np.all(((weekdays == 0) & (shifts == 3)) | + ((weekdays > 0) & (weekdays <= 4) & (shifts == 1))) + def _get_wom_rule(self): # wdiffs = unique(np.diff(self.index.week)) # We also need -47, -49, -48 to catch index spanning year boundary From 7be9db9c2ca46099723500af88850c2ef2eef0ce Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 7 Jul 2017 08:46:04 -0500 Subject: [PATCH 735/933] DOC: Whatsnew updates (#16853) [ci skip] --- doc/source/whatsnew/v0.20.3.txt | 68 +++++++-------------------------- 1 file changed, 14 insertions(+), 54 deletions(-) diff --git a/doc/source/whatsnew/v0.20.3.txt b/doc/source/whatsnew/v0.20.3.txt index 644a3047ae7a9..582f975f81a7a 100644 --- a/doc/source/whatsnew/v0.20.3.txt +++ b/doc/source/whatsnew/v0.20.3.txt @@ -1,100 +1,60 @@ .. _whatsnew_0203: -v0.20.3 (June ??, 2017) +v0.20.3 (July 7, 2017) ----------------------- -This is a minor bug-fix release in the 0.20.x series and includes some small regression fixes, -bug fixes and performance improvements. -We recommend that all users upgrade to this version. +This is a minor bug-fix release in the 0.20.x series and includes some small regression fixes +and bug fixes. We recommend that all users upgrade to this version. .. contents:: What's new in v0.20.3 :local: :backlinks: none - -.. _whatsnew_0203.enhancements: - -Enhancements -~~~~~~~~~~~~ - - - - - - -.. _whatsnew_0203.performance: - -Performance Improvements -~~~~~~~~~~~~~~~~~~~~~~~~ - - - - - - .. _whatsnew_0203.bug_fixes: Bug Fixes ~~~~~~~~~ -- Fixed issue with dataframe scatter plot for categorical data that reports incorrect column key not found when categorical data is used for plotting (:issue:`16199`) -- Fixed issue with :meth:`DataFrame.style` where element id's were not unique (:issue:`16780`) -- Fixed a pytest marker failing downstream packages' tests suites (:issue:`16680`) -- Fixed compat with loading a ``DataFrame`` with a ``PeriodIndex``, from a ``format='fixed'`` HDFStore, in Python 3, that was written in Python 2 (:issue:`16781`) -- Fixed a bug in failing to compute rolling computations of a column-MultiIndexed ``DataFrame`` (:issue:`16789`, :issue:`16825`) -- Bug in a DataFrame/Series with a ``TimedeltaIndex`` when slice indexing (:issue:`16637`) +- Fixed a bug in failing to compute rolling computations of a column-MultiIndexed ``DataFrame`` (:issue:`16789`, :issue:`16825`) +- Fixed a pytest marker failing downstream packages' tests suites (:issue:`16680`) Conversion ^^^^^^^^^^ - Bug in pickle compat prior to the v0.20.x series, when ``UTC`` is a timezone in a Series/DataFrame/Index (:issue:`16608`) - Bug in ``Series`` construction when passing a ``Series`` with ``dtype='category'`` (:issue:`16524`). -- Bug in ``DataFrame.astype()`` when passing a ``Series`` as the ``dtype`` kwarg. (:issue:`16717`). +- Bug in :meth:`DataFrame.astype` when passing a ``Series`` as the ``dtype`` kwarg. (:issue:`16717`). Indexing ^^^^^^^^ - Bug in ``Float64Index`` causing an empty array instead of ``None`` to be returned from ``.get(np.nan)`` on a Series whose index did not contain any ``NaN`` s (:issue:`8569`) - Bug in ``MultiIndex.isin`` causing an error when passing an empty iterable (:issue:`16777`) +- Fixed a bug in a slicing DataFrame/Series that have a ``TimedeltaIndex`` (:issue:`16637`) I/O ^^^ - Bug in :func:`read_csv` in which files weren't opened as binary files by the C engine on Windows, causing EOF characters mid-field, which would fail (:issue:`16039`, :issue:`16559`, :issue:`16675`) - Bug in :func:`read_hdf` in which reading a ``Series`` saved to an HDF file in 'fixed' format fails when an explicit ``mode='r'`` argument is supplied (:issue:`16583`) -- Bug in :func:`DataFrame.to_latex` where ``bold_rows`` was wrongly specified to be ``True`` by default, whereas in reality row labels remained non-bold whatever parameter provided. (:issue:`16707`) +- Bug in :meth:`DataFrame.to_latex` where ``bold_rows`` was wrongly specified to be ``True`` by default, whereas in reality row labels remained non-bold whatever parameter provided. (:issue:`16707`) +- Fixed an issue with :meth:`DataFrame.style` where generated element ids were not unique (:issue:`16780`) +- Fixed loading a ``DataFrame`` with a ``PeriodIndex``, from a ``format='fixed'`` HDFStore, in Python 3, that was written in Python 2 (:issue:`16781`) Plotting ^^^^^^^^ -- Fix regression in series plotting that prevented RGB and RGBA tuples from being used as color arguments (:issue:`16233`) - - - -Groupby/Resample/Rolling -^^^^^^^^^^^^^^^^^^^^^^^^ - - -Sparse -^^^^^^ - - +- Fixed regression that prevented RGB and RGBA tuples from being used as color arguments (:issue:`16233`) +- Fixed an issue with :meth:`DataFrame.plot.scatter` that incorrectly raised a ``KeyError`` when categorical data is used for plotting (:issue:`16199`) Reshaping ^^^^^^^^^ + - ``PeriodIndex`` / ``TimedeltaIndex.join`` was missing the ``sort=`` kwarg (:issue:`16541`) - Bug in joining on a ``MultiIndex`` with a ``category`` dtype for a level (:issue:`16627`). - Bug in :func:`merge` when merging/joining with multiple categorical columns (:issue:`16767`) - -Numeric -^^^^^^^ - - Categorical ^^^^^^^^^^^ -- Bug in ``DataFrame.sort_values`` not respecting the ``kind`` with categorical data (:issue:`16793`) - -Other -^^^^^ +- Bug in ``DataFrame.sort_values`` not respecting the ``kind`` parameter with categorical data (:issue:`16793`) From 18f929fa83d2d1f335f8ccf325c05a6ce314b94d Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 7 Jul 2017 11:54:04 -0500 Subject: [PATCH 736/933] TST/PKG: Move test HDF5 file to legacy (#16856) It wasn't being picked up in our package data otherwise --- .../periodindex_0.20.1_x86_64_darwin_2.7.13.h5 | Bin pandas/tests/io/test_pytables.py | 3 ++- 2 files changed, 2 insertions(+), 1 deletion(-) rename pandas/tests/io/data/{ => legacy_hdf}/periodindex_0.20.1_x86_64_darwin_2.7.13.h5 (100%) diff --git a/pandas/tests/io/data/periodindex_0.20.1_x86_64_darwin_2.7.13.h5 b/pandas/tests/io/data/legacy_hdf/periodindex_0.20.1_x86_64_darwin_2.7.13.h5 similarity index 100% rename from pandas/tests/io/data/periodindex_0.20.1_x86_64_darwin_2.7.13.h5 rename to pandas/tests/io/data/legacy_hdf/periodindex_0.20.1_x86_64_darwin_2.7.13.h5 diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index 69c92dd775b9a..c0d200560b477 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -5279,7 +5279,8 @@ def test_read_py2_hdf_file_in_py3(self): ['2015-01-01', '2015-01-02', '2015-01-05'], freq='B')) with ensure_clean_store( - tm.get_data_path('periodindex_0.20.1_x86_64_darwin_2.7.13.h5'), + tm.get_data_path( + 'legacy_hdf/periodindex_0.20.1_x86_64_darwin_2.7.13.h5'), mode='r') as store: result = store['p'] assert_frame_equal(result, expected) From 9c44f9b2cad863bde17c7dd061d5b5b5ccbada21 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sat, 8 Jul 2017 12:08:31 -0400 Subject: [PATCH 737/933] COMPAT: moar 32-bit compat for testing of indexers (#16861) xref #16826 --- pandas/core/indexes/base.py | 2 +- pandas/tests/indexes/test_category.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index cefb080a3ee78..e1053c1610175 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -2704,7 +2704,7 @@ def get_indexer_non_unique(self, target): tgt_values = target._values indexer, missing = self._engine.get_indexer_non_unique(tgt_values) - return indexer, missing + return _ensure_platform_int(indexer), missing def get_indexer_for(self, target, **kwargs): """ diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py index 493274fff43e0..9dc2cfdecb98f 100644 --- a/pandas/tests/indexes/test_category.py +++ b/pandas/tests/indexes/test_category.py @@ -401,7 +401,7 @@ def test_reindex_dtype(self): exp = CategoricalIndex(['a', 'a', 'c'], categories=['a', 'c']) tm.assert_index_equal(res, exp, exact=True) tm.assert_numpy_array_equal(indexer, - np.array([0, 3, 2], dtype=np.int64)) + np.array([0, 3, 2], dtype=np.intp)) c = CategoricalIndex(['a', 'b', 'c', 'a'], categories=['a', 'b', 'c', 'd']) @@ -409,7 +409,7 @@ def test_reindex_dtype(self): exp = Index(['a', 'a', 'c'], dtype='object') tm.assert_index_equal(res, exp, exact=True) tm.assert_numpy_array_equal(indexer, - np.array([0, 3, 2], dtype=np.int64)) + np.array([0, 3, 2], dtype=np.intp)) c = CategoricalIndex(['a', 'b', 'c', 'a'], categories=['a', 'b', 'c', 'd']) @@ -417,7 +417,7 @@ def test_reindex_dtype(self): exp = CategoricalIndex(['a', 'a', 'c'], categories=['a', 'c']) tm.assert_index_equal(res, exp, exact=True) tm.assert_numpy_array_equal(indexer, - np.array([0, 3, 2], dtype=np.int64)) + np.array([0, 3, 2], dtype=np.intp)) def test_duplicates(self): From 3be2de63e4c6cfbd04671f86d07869dfc984e9ed Mon Sep 17 00:00:00 2001 From: gfyoung Date: Mon, 10 Jul 2017 03:12:50 -0700 Subject: [PATCH 738/933] MAINT: Drop the get_offset_name method (#16863) Deprecated since 0.18.0 xref gh-11834 --- doc/source/whatsnew/v0.21.0.txt | 1 + pandas/tseries/frequencies.py | 14 -------------- 2 files changed, 1 insertion(+), 14 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index d5cc3d6ddca8e..43bfebd0c2e59 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -126,6 +126,7 @@ Removal of prior version deprecations/changes - ``Index`` has dropped the ``.sym_diff()`` method in favor of ``.symmetric_difference()`` (:issue:`12591`) - ``Categorical`` has dropped the ``.order()`` and ``.sort()`` methods in favor of ``.sort_values()`` (:issue:`12882`) - :func:`eval` and :method:`DataFrame.eval` have changed the default of ``inplace`` from ``None`` to ``False`` (:issue:`11149`) +- The function ``get_offset_name`` has been dropped in favor of the ``.freqstr`` attribute for an offset (:issue:`11834`) .. _whatsnew_0210.performance: diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index 8640f106a048a..c5f6c00a4005a 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -637,20 +637,6 @@ def get_offset(name): getOffset = get_offset -def get_offset_name(offset): - """ - Return rule name associated with a DateOffset object - - Examples - -------- - get_offset_name(BMonthEnd(1)) --> 'EOM' - """ - - msg = "get_offset_name(offset) is deprecated. Use offset.freqstr instead" - warnings.warn(msg, FutureWarning, stacklevel=2) - return offset.freqstr - - def get_standard_freq(freq): """ Return the standardized frequency string From a5477b760d939a1f62ab5d38c75bf9d802a2bcf3 Mon Sep 17 00:00:00 2001 From: Adrian Liaw Date: Mon, 10 Jul 2017 18:13:49 +0800 Subject: [PATCH 739/933] DOC: Fix missing parentheses in documentation (#16862) --- doc/source/groupby.rst | 2 +- doc/source/io.rst | 4 ++-- doc/source/whatsnew/v0.13.0.txt | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/source/groupby.rst b/doc/source/groupby.rst index 61f43146aba85..937d682d238b3 100644 --- a/doc/source/groupby.rst +++ b/doc/source/groupby.rst @@ -933,7 +933,7 @@ The dimension of the returned result can also change: d = pd.DataFrame({"a":["x", "y"], "b":[1,2]}) def identity(df): - print df + print(df) return df d.groupby("a").apply(identity) diff --git a/doc/source/io.rst b/doc/source/io.rst index e1e82f686f182..9bf84e5419ffa 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -3194,7 +3194,7 @@ You can pass ``iterator=True`` to iterate over the unpacked results .. ipython:: python for o in pd.read_msgpack('foo.msg',iterator=True): - print o + print(o) You can pass ``append=True`` to the writer to append to an existing pack @@ -3912,7 +3912,7 @@ chunks. evens = [2,4,6,8,10] coordinates = store.select_as_coordinates('dfeq','number=evens') for c in chunks(coordinates, 2): - print store.select('dfeq',where=c) + print(store.select('dfeq',where=c)) Advanced Queries ++++++++++++++++ diff --git a/doc/source/whatsnew/v0.13.0.txt b/doc/source/whatsnew/v0.13.0.txt index 3347b05a5df37..f440be1ddd56e 100644 --- a/doc/source/whatsnew/v0.13.0.txt +++ b/doc/source/whatsnew/v0.13.0.txt @@ -790,7 +790,7 @@ Experimental .. ipython:: python for o in pd.read_msgpack('foo.msg',iterator=True): - print o + print(o) .. ipython:: python :suppress: From a43c1576ce3d94bc82f7cdd63531280ced5a9fa0 Mon Sep 17 00:00:00 2001 From: Guillem Borrell Date: Mon, 10 Jul 2017 12:15:08 +0200 Subject: [PATCH 740/933] BUG: rolling.quantile does not return an interpolated result (#16247) --- asv_bench/benchmarks/rolling.py | 185 ++++++++++++++++++++++++++++++++ doc/source/whatsnew/v0.21.0.txt | 5 +- pandas/_libs/window.pyx | 15 ++- pandas/core/window.py | 11 +- pandas/tests/test_window.py | 41 ++++++- 5 files changed, 249 insertions(+), 8 deletions(-) create mode 100644 asv_bench/benchmarks/rolling.py diff --git a/asv_bench/benchmarks/rolling.py b/asv_bench/benchmarks/rolling.py new file mode 100644 index 0000000000000..9da9d0b855323 --- /dev/null +++ b/asv_bench/benchmarks/rolling.py @@ -0,0 +1,185 @@ +from .pandas_vb_common import * +import pandas as pd +import numpy as np + + +class DataframeRolling(object): + goal_time = 0.2 + + def setup(self): + self.N = 100000 + self.Ns = 10000 + self.df = pd.DataFrame({'a': np.random.random(self.N)}) + self.dfs = pd.DataFrame({'a': np.random.random(self.Ns)}) + self.wins = 10 + self.winl = 1000 + + def time_rolling_quantile_0(self): + (self.df.rolling(self.wins).quantile(0.0)) + + def time_rolling_quantile_1(self): + (self.df.rolling(self.wins).quantile(1.0)) + + def time_rolling_quantile_median(self): + (self.df.rolling(self.wins).quantile(0.5)) + + def time_rolling_median(self): + (self.df.rolling(self.wins).median()) + + def time_rolling_median(self): + (self.df.rolling(self.wins).mean()) + + def time_rolling_max(self): + (self.df.rolling(self.wins).max()) + + def time_rolling_min(self): + (self.df.rolling(self.wins).min()) + + def time_rolling_std(self): + (self.df.rolling(self.wins).std()) + + def time_rolling_count(self): + (self.df.rolling(self.wins).count()) + + def time_rolling_skew(self): + (self.df.rolling(self.wins).skew()) + + def time_rolling_kurt(self): + (self.df.rolling(self.wins).kurt()) + + def time_rolling_sum(self): + (self.df.rolling(self.wins).sum()) + + def time_rolling_corr(self): + (self.dfs.rolling(self.wins).corr()) + + def time_rolling_cov(self): + (self.dfs.rolling(self.wins).cov()) + + def time_rolling_quantile_0_l(self): + (self.df.rolling(self.winl).quantile(0.0)) + + def time_rolling_quantile_1_l(self): + (self.df.rolling(self.winl).quantile(1.0)) + + def time_rolling_quantile_median_l(self): + (self.df.rolling(self.winl).quantile(0.5)) + + def time_rolling_median_l(self): + (self.df.rolling(self.winl).median()) + + def time_rolling_median_l(self): + (self.df.rolling(self.winl).mean()) + + def time_rolling_max_l(self): + (self.df.rolling(self.winl).max()) + + def time_rolling_min_l(self): + (self.df.rolling(self.winl).min()) + + def time_rolling_std_l(self): + (self.df.rolling(self.wins).std()) + + def time_rolling_count_l(self): + (self.df.rolling(self.wins).count()) + + def time_rolling_skew_l(self): + (self.df.rolling(self.wins).skew()) + + def time_rolling_kurt_l(self): + (self.df.rolling(self.wins).kurt()) + + def time_rolling_sum_l(self): + (self.df.rolling(self.wins).sum()) + + +class SeriesRolling(object): + goal_time = 0.2 + + def setup(self): + self.N = 100000 + self.Ns = 10000 + self.df = pd.DataFrame({'a': np.random.random(self.N)}) + self.dfs = pd.DataFrame({'a': np.random.random(self.Ns)}) + self.sr = self.df.a + self.srs = self.dfs.a + self.wins = 10 + self.winl = 1000 + + def time_rolling_quantile_0(self): + (self.sr.rolling(self.wins).quantile(0.0)) + + def time_rolling_quantile_1(self): + (self.sr.rolling(self.wins).quantile(1.0)) + + def time_rolling_quantile_median(self): + (self.sr.rolling(self.wins).quantile(0.5)) + + def time_rolling_median(self): + (self.sr.rolling(self.wins).median()) + + def time_rolling_median(self): + (self.sr.rolling(self.wins).mean()) + + def time_rolling_max(self): + (self.sr.rolling(self.wins).max()) + + def time_rolling_min(self): + (self.sr.rolling(self.wins).min()) + + def time_rolling_std(self): + (self.sr.rolling(self.wins).std()) + + def time_rolling_count(self): + (self.sr.rolling(self.wins).count()) + + def time_rolling_skew(self): + (self.sr.rolling(self.wins).skew()) + + def time_rolling_kurt(self): + (self.sr.rolling(self.wins).kurt()) + + def time_rolling_sum(self): + (self.sr.rolling(self.wins).sum()) + + def time_rolling_corr(self): + (self.srs.rolling(self.wins).corr()) + + def time_rolling_cov(self): + (self.srs.rolling(self.wins).cov()) + + def time_rolling_quantile_0_l(self): + (self.sr.rolling(self.winl).quantile(0.0)) + + def time_rolling_quantile_1_l(self): + (self.sr.rolling(self.winl).quantile(1.0)) + + def time_rolling_quantile_median_l(self): + (self.sr.rolling(self.winl).quantile(0.5)) + + def time_rolling_median_l(self): + (self.sr.rolling(self.winl).median()) + + def time_rolling_median_l(self): + (self.sr.rolling(self.winl).mean()) + + def time_rolling_max_l(self): + (self.sr.rolling(self.winl).max()) + + def time_rolling_min_l(self): + (self.sr.rolling(self.winl).min()) + + def time_rolling_std_l(self): + (self.sr.rolling(self.wins).std()) + + def time_rolling_count_l(self): + (self.sr.rolling(self.wins).count()) + + def time_rolling_skew_l(self): + (self.sr.rolling(self.wins).skew()) + + def time_rolling_kurt_l(self): + (self.sr.rolling(self.wins).kurt()) + + def time_rolling_sum_l(self): + (self.sr.rolling(self.wins).sum()) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 43bfebd0c2e59..1edbf1638d233 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -168,9 +168,11 @@ Plotting Groupby/Resample/Rolling ^^^^^^^^^^^^^^^^^^^^^^^^ -- Bug in ``DataFrame.resample().size()`` where an empty ``DataFrame`` did not return a ``Series`` (:issue:`14962`) +- Bug in ``DataFrame.resample().size()`` where an empty ``DataFrame`` did not return a ``Series`` (:issue:`14962`) - Bug in ``infer_freq`` causing indices with 2-day gaps during the working week to be wrongly inferred as business daily (:issue:`16624`) +- Bug in ``.rolling.quantile()`` which incorrectly used different defaults than :func:`Series.quantile()` and :func:`DataFrame.quantile()` (:issue:`9413`, :issue:`16211`) + Sparse ^^^^^^ @@ -191,6 +193,7 @@ Categorical ^^^^^^^^^^^ + Other ^^^^^ - Bug in :func:`eval` where the ``inplace`` parameter was being incorrectly handled (:issue:`16732`) diff --git a/pandas/_libs/window.pyx b/pandas/_libs/window.pyx index 3bb8abe26c781..2450eea5500cd 100644 --- a/pandas/_libs/window.pyx +++ b/pandas/_libs/window.pyx @@ -1348,8 +1348,9 @@ def roll_quantile(ndarray[float64_t, cast=True] input, int64_t win, bint is_variable ndarray[int64_t] start, end ndarray[double_t] output + double vlow, vhigh - if quantile < 0.0 or quantile > 1.0: + if quantile <= 0.0 or quantile >= 1.0: raise ValueError("quantile value {0} not in [0, 1]".format(quantile)) # we use the Fixed/Variable Indexer here as the @@ -1391,7 +1392,17 @@ def roll_quantile(ndarray[float64_t, cast=True] input, int64_t win, if nobs >= minp: idx = int(quantile * (nobs - 1)) - output[i] = skiplist.get(idx) + + # Single value in skip list + if nobs == 1: + output[i] = skiplist.get(0) + + # Interpolated quantile + else: + vlow = skiplist.get(idx) + vhigh = skiplist.get(idx + 1) + output[i] = (vlow + (vhigh - vlow) * + (quantile * (nobs - 1) - idx)) else: output[i] = NaN diff --git a/pandas/core/window.py b/pandas/core/window.py index 02b508bb94e4c..57611794c375f 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -975,8 +975,15 @@ def quantile(self, quantile, **kwargs): def f(arg, *args, **kwargs): minp = _use_window(self.min_periods, window) - return _window.roll_quantile(arg, window, minp, indexi, - self.closed, quantile) + if quantile == 1.0: + return _window.roll_max(arg, window, minp, indexi, + self.closed) + elif quantile == 0.0: + return _window.roll_min(arg, window, minp, indexi, + self.closed) + else: + return _window.roll_quantile(arg, window, minp, indexi, + self.closed, quantile) return self._apply(f, 'quantile', quantile=quantile, **kwargs) diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index 9c3765ffdb716..3ba5d2065cddf 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -1122,8 +1122,19 @@ def test_rolling_quantile(self): def scoreatpercentile(a, per): values = np.sort(a, axis=0) - idx = per / 1. * (values.shape[0] - 1) - return values[int(idx)] + idx = int(per / 1. * (values.shape[0] - 1)) + + if idx == values.shape[0] - 1: + retval = values[-1] + + else: + qlow = float(idx) / float(values.shape[0] - 1) + qhig = float(idx + 1) / float(values.shape[0] - 1) + vlow = values[idx] + vhig = values[idx + 1] + retval = vlow + (vhig - vlow) * (per - qlow) / (qhig - qlow) + + return retval for q in qs: @@ -1138,6 +1149,30 @@ def alt(x): self._check_moment_func(f, alt, name='quantile', quantile=q) + def test_rolling_quantile_np_percentile(self): + # #9413: Tests that rolling window's quantile default behavior + # is analogus to Numpy's percentile + row = 10 + col = 5 + idx = pd.date_range(20100101, periods=row, freq='B') + df = pd.DataFrame(np.random.rand(row * col).reshape((row, -1)), + index=idx) + + df_quantile = df.quantile([0.25, 0.5, 0.75], axis=0) + np_percentile = np.percentile(df, [25, 50, 75], axis=0) + + tm.assert_almost_equal(df_quantile.values, np.array(np_percentile)) + + def test_rolling_quantile_series(self): + # #16211: Tests that rolling window's quantile default behavior + # is analogus to pd.Series' quantile + arr = np.arange(100) + s = pd.Series(arr) + q1 = s.quantile(0.1) + q2 = s.rolling(100).quantile(0.1).iloc[-1] + + tm.assert_almost_equal(q1, q2) + def test_rolling_quantile_param(self): ser = Series([0.0, .1, .5, .9, 1.0]) @@ -3558,7 +3593,7 @@ def test_ragged_quantile(self): result = df.rolling(window='2s', min_periods=1).quantile(0.5) expected = df.copy() - expected['B'] = [0.0, 1, 1.0, 3.0, 3.0] + expected['B'] = [0.0, 1, 1.5, 3.0, 3.5] tm.assert_frame_equal(result, expected) def test_ragged_std(self): From 3e20eab7ad5639810b4824790cd559367b326b0b Mon Sep 17 00:00:00 2001 From: Keiron Pizzey Date: Mon, 10 Jul 2017 11:36:01 +0100 Subject: [PATCH 741/933] ENH - Modify Dataframe.select_dtypes to accept scalar values (#16860) --- doc/source/basics.rst | 4 - doc/source/style.ipynb | 2 +- doc/source/whatsnew/v0.21.0.txt | 1 + pandas/core/frame.py | 26 +++--- pandas/tests/frame/test_dtypes.py | 130 +++++++++++++++++++++++++----- 5 files changed, 130 insertions(+), 33 deletions(-) diff --git a/doc/source/basics.rst b/doc/source/basics.rst index 134cc5106015b..d8b1602fb104d 100644 --- a/doc/source/basics.rst +++ b/doc/source/basics.rst @@ -2229,7 +2229,3 @@ All numpy dtypes are subclasses of ``numpy.generic``: Pandas also defines the types ``category``, and ``datetime64[ns, tz]``, which are not integrated into the normal numpy hierarchy and wont show up with the above function. - -.. note:: - - The ``include`` and ``exclude`` parameters must be non-string sequences. diff --git a/doc/source/style.ipynb b/doc/source/style.ipynb index 4eeda491426b1..c250787785e14 100644 --- a/doc/source/style.ipynb +++ b/doc/source/style.ipynb @@ -935,7 +935,7 @@ "\n", "*Experimental: This is a new feature and still under development. We'll be adding features and possibly making breaking changes in future releases. We'd love to hear your feedback.*\n", "\n", - "Some support is available for exporting styled `DataFrames` to Excel worksheets using the `OpenPyXL` engine. CSS2.2 properties handled include:\n", + "Some support is available for exporting styled `DataFrames` to Excel worksheets using the `OpenPyXL` engine. CSS2.2 properties handled include:\n", "\n", "- `background-color`\n", "- `border-style`, `border-width`, `border-color` and their {`top`, `right`, `bottom`, `left` variants}\n", diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 1edbf1638d233..8c71681582063 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -39,6 +39,7 @@ Other Enhancements - :func:`read_feather` has gained the ``nthreads`` parameter for multi-threaded operations (:issue:`16359`) - :func:`DataFrame.clip()` and :func:`Series.clip()` have gained an ``inplace`` argument. (:issue:`15388`) - :func:`crosstab` has gained a ``margins_name`` parameter to define the name of the row / column that will contain the totals when ``margins=True``. (:issue:`15972`) +- :func:`Dataframe.select_dtypes` now accepts scalar values for include/exclude as well as list-like. (:issue:`16855`) .. _whatsnew_0210.api_breaking: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 80cdebc24c39d..6559fc4c24ce2 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2285,9 +2285,9 @@ def select_dtypes(self, include=None, exclude=None): Parameters ---------- - include, exclude : list-like - A list of dtypes or strings to be included/excluded. You must pass - in a non-empty sequence for at least one of these. + include, exclude : scalar or list-like + A selection of dtypes or strings to be included/excluded. At least + one of these parameters must be supplied. Raises ------ @@ -2295,8 +2295,6 @@ def select_dtypes(self, include=None, exclude=None): * If both of ``include`` and ``exclude`` are empty * If ``include`` and ``exclude`` have overlapping elements * If any kind of string dtype is passed in. - TypeError - * If either of ``include`` or ``exclude`` is not a sequence Returns ------- @@ -2331,6 +2329,14 @@ def select_dtypes(self, include=None, exclude=None): 3 0.0764 False 2 4 -0.9703 True 1 5 -1.2094 False 2 + >>> df.select_dtypes(include='bool') + c + 0 True + 1 False + 2 True + 3 False + 4 True + 5 False >>> df.select_dtypes(include=['float64']) c 0 1 @@ -2348,10 +2354,12 @@ def select_dtypes(self, include=None, exclude=None): 4 True 5 False """ - include, exclude = include or (), exclude or () - if not (is_list_like(include) and is_list_like(exclude)): - raise TypeError('include and exclude must both be non-string' - ' sequences') + + if not is_list_like(include): + include = (include,) if include is not None else () + if not is_list_like(exclude): + exclude = (exclude,) if exclude is not None else () + selection = tuple(map(frozenset, (include, exclude))) if not any(selection): diff --git a/pandas/tests/frame/test_dtypes.py b/pandas/tests/frame/test_dtypes.py index 335b76ff2aade..065580d56a683 100644 --- a/pandas/tests/frame/test_dtypes.py +++ b/pandas/tests/frame/test_dtypes.py @@ -104,7 +104,7 @@ def test_dtypes_are_correct_after_column_slice(self): ('b', np.float_), ('c', np.float_)]))) - def test_select_dtypes_include(self): + def test_select_dtypes_include_using_list_like(self): df = DataFrame({'a': list('abc'), 'b': list(range(1, 4)), 'c': np.arange(3, 6).astype('u1'), @@ -145,14 +145,10 @@ def test_select_dtypes_include(self): ei = df[['h', 'i']] assert_frame_equal(ri, ei) - ri = df.select_dtypes(include=['timedelta']) - ei = df[['k']] - assert_frame_equal(ri, ei) - pytest.raises(NotImplementedError, lambda: df.select_dtypes(include=['period'])) - def test_select_dtypes_exclude(self): + def test_select_dtypes_exclude_using_list_like(self): df = DataFrame({'a': list('abc'), 'b': list(range(1, 4)), 'c': np.arange(3, 6).astype('u1'), @@ -162,7 +158,7 @@ def test_select_dtypes_exclude(self): ee = df[['a', 'e']] assert_frame_equal(re, ee) - def test_select_dtypes_exclude_include(self): + def test_select_dtypes_exclude_include_using_list_like(self): df = DataFrame({'a': list('abc'), 'b': list(range(1, 4)), 'c': np.arange(3, 6).astype('u1'), @@ -181,6 +177,114 @@ def test_select_dtypes_exclude_include(self): e = df[['b', 'e']] assert_frame_equal(r, e) + def test_select_dtypes_include_using_scalars(self): + df = DataFrame({'a': list('abc'), + 'b': list(range(1, 4)), + 'c': np.arange(3, 6).astype('u1'), + 'd': np.arange(4.0, 7.0, dtype='float64'), + 'e': [True, False, True], + 'f': pd.Categorical(list('abc')), + 'g': pd.date_range('20130101', periods=3), + 'h': pd.date_range('20130101', periods=3, + tz='US/Eastern'), + 'i': pd.date_range('20130101', periods=3, + tz='CET'), + 'j': pd.period_range('2013-01', periods=3, + freq='M'), + 'k': pd.timedelta_range('1 day', periods=3)}) + + ri = df.select_dtypes(include=np.number) + ei = df[['b', 'c', 'd', 'k']] + assert_frame_equal(ri, ei) + + ri = df.select_dtypes(include='datetime') + ei = df[['g']] + assert_frame_equal(ri, ei) + + ri = df.select_dtypes(include='datetime64') + ei = df[['g']] + assert_frame_equal(ri, ei) + + ri = df.select_dtypes(include='category') + ei = df[['f']] + assert_frame_equal(ri, ei) + + pytest.raises(NotImplementedError, + lambda: df.select_dtypes(include='period')) + + def test_select_dtypes_exclude_using_scalars(self): + df = DataFrame({'a': list('abc'), + 'b': list(range(1, 4)), + 'c': np.arange(3, 6).astype('u1'), + 'd': np.arange(4.0, 7.0, dtype='float64'), + 'e': [True, False, True], + 'f': pd.Categorical(list('abc')), + 'g': pd.date_range('20130101', periods=3), + 'h': pd.date_range('20130101', periods=3, + tz='US/Eastern'), + 'i': pd.date_range('20130101', periods=3, + tz='CET'), + 'j': pd.period_range('2013-01', periods=3, + freq='M'), + 'k': pd.timedelta_range('1 day', periods=3)}) + + ri = df.select_dtypes(exclude=np.number) + ei = df[['a', 'e', 'f', 'g', 'h', 'i', 'j']] + assert_frame_equal(ri, ei) + + ri = df.select_dtypes(exclude='category') + ei = df[['a', 'b', 'c', 'd', 'e', 'g', 'h', 'i', 'j', 'k']] + assert_frame_equal(ri, ei) + + pytest.raises(NotImplementedError, + lambda: df.select_dtypes(exclude='period')) + + def test_select_dtypes_include_exclude_using_scalars(self): + df = DataFrame({'a': list('abc'), + 'b': list(range(1, 4)), + 'c': np.arange(3, 6).astype('u1'), + 'd': np.arange(4.0, 7.0, dtype='float64'), + 'e': [True, False, True], + 'f': pd.Categorical(list('abc')), + 'g': pd.date_range('20130101', periods=3), + 'h': pd.date_range('20130101', periods=3, + tz='US/Eastern'), + 'i': pd.date_range('20130101', periods=3, + tz='CET'), + 'j': pd.period_range('2013-01', periods=3, + freq='M'), + 'k': pd.timedelta_range('1 day', periods=3)}) + + ri = df.select_dtypes(include=np.number, exclude='floating') + ei = df[['b', 'c', 'k']] + assert_frame_equal(ri, ei) + + def test_select_dtypes_include_exclude_mixed_scalars_lists(self): + df = DataFrame({'a': list('abc'), + 'b': list(range(1, 4)), + 'c': np.arange(3, 6).astype('u1'), + 'd': np.arange(4.0, 7.0, dtype='float64'), + 'e': [True, False, True], + 'f': pd.Categorical(list('abc')), + 'g': pd.date_range('20130101', periods=3), + 'h': pd.date_range('20130101', periods=3, + tz='US/Eastern'), + 'i': pd.date_range('20130101', periods=3, + tz='CET'), + 'j': pd.period_range('2013-01', periods=3, + freq='M'), + 'k': pd.timedelta_range('1 day', periods=3)}) + + ri = df.select_dtypes(include=np.number, + exclude=['floating', 'timedelta']) + ei = df[['b', 'c']] + assert_frame_equal(ri, ei) + + ri = df.select_dtypes(include=[np.number, 'category'], + exclude='floating') + ei = df[['b', 'c', 'f', 'k']] + assert_frame_equal(ri, ei) + def test_select_dtypes_not_an_attr_but_still_valid_dtype(self): df = DataFrame({'a': list('abc'), 'b': list(range(1, 4)), @@ -205,18 +309,6 @@ def test_select_dtypes_empty(self): 'must be nonempty'): df.select_dtypes() - def test_select_dtypes_raises_on_string(self): - df = DataFrame({'a': list('abc'), 'b': list(range(1, 4))}) - with tm.assert_raises_regex(TypeError, 'include and exclude ' - '.+ non-'): - df.select_dtypes(include='object') - with tm.assert_raises_regex(TypeError, 'include and exclude ' - '.+ non-'): - df.select_dtypes(exclude='object') - with tm.assert_raises_regex(TypeError, 'include and exclude ' - '.+ non-'): - df.select_dtypes(include=int, exclude='object') - def test_select_dtypes_bad_datetime64(self): df = DataFrame({'a': list('abc'), 'b': list(range(1, 4)), From f4b12d8488434d5f9a45fba1cbe7ad5a77c776ff Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Mon, 10 Jul 2017 06:36:32 -0400 Subject: [PATCH 742/933] COMPAT: moar 32-bit compat for testing of indexers (#16869) xref #16826 --- pandas/tests/indexes/test_category.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py index 9dc2cfdecb98f..14f344acbefb2 100644 --- a/pandas/tests/indexes/test_category.py +++ b/pandas/tests/indexes/test_category.py @@ -393,7 +393,7 @@ def test_reindex_dtype(self): res, indexer = c.reindex(['a', 'c']) tm.assert_index_equal(res, Index(['a', 'a', 'c']), exact=True) tm.assert_numpy_array_equal(indexer, - np.array([0, 3, 2], dtype=np.int64)) + np.array([0, 3, 2], dtype=np.intp)) c = CategoricalIndex(['a', 'b', 'c', 'a']) res, indexer = c.reindex(Categorical(['a', 'c'])) From 114feb9290c684b5e5b3a2456307f9116372e89f Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 11 Jul 2017 03:01:12 -0700 Subject: [PATCH 743/933] Confirm that select was *not* clearer in 0.12 (#16878) --- pandas/core/generic.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 7d1a8adf381fe..5722539b87aec 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2098,7 +2098,6 @@ def xs(self, key, axis=0, level=None, drop_level=True): _xs = xs - # TODO: Check if this was clearer in 0.12 def select(self, crit, axis=0): """ Return data corresponding to axis labels matching criteria From 6a85e88bee498e7e218f0eeb766f15b9d78e9eaa Mon Sep 17 00:00:00 2001 From: topper-123 Date: Tue, 11 Jul 2017 11:08:57 +0100 Subject: [PATCH 744/933] Added tests for _get_dtype (#16845) --- pandas/tests/dtypes/test_common.py | 39 ++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/pandas/tests/dtypes/test_common.py b/pandas/tests/dtypes/test_common.py index ba510e68f9a21..c32e8590c5675 100644 --- a/pandas/tests/dtypes/test_common.py +++ b/pandas/tests/dtypes/test_common.py @@ -524,3 +524,42 @@ def test_is_complex_dtype(): assert com.is_complex_dtype(np.complex) assert com.is_complex_dtype(np.array([1 + 1j, 5])) + + +@pytest.mark.parametrize('input_param,result', [ + (int, np.dtype(int)), + ('int32', np.dtype('int32')), + (float, np.dtype(float)), + ('float64', np.dtype('float64')), + (np.dtype('float64'), np.dtype('float64')), + pytest.mark.xfail((str, np.dtype(' Date: Tue, 11 Jul 2017 12:40:50 +0200 Subject: [PATCH 745/933] BUG: Series.isin fails or categoricals (#16858) --- doc/source/whatsnew/v0.21.0.txt | 2 +- pandas/core/algorithms.py | 4 ++-- pandas/tests/test_algos.py | 10 ++++++++++ 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 8c71681582063..015fdf1f45f47 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -192,7 +192,7 @@ Numeric Categorical ^^^^^^^^^^^ - +- Bug in ``:func:Series.isin()`` when called with a categorical (:issue`16639`) Other diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index d74c5e66ea1a9..b490bf787a037 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -38,7 +38,6 @@ # --------------- # # dtype access # # --------------- # - def _ensure_data(values, dtype=None): """ routine to ensure that our data is of the correct @@ -113,7 +112,8 @@ def _ensure_data(values, dtype=None): return values.asi8, dtype, 'int64' - elif is_categorical_dtype(values) or is_categorical_dtype(dtype): + elif (is_categorical_dtype(values) and + (is_categorical_dtype(dtype) or dtype is None)): values = getattr(values, 'values', values) values = values.codes dtype = 'category' diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index 063dcea5c76d6..9504d2a9426f0 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -586,6 +586,16 @@ def test_large(self): expected[1] = True tm.assert_numpy_array_equal(result, expected) + def test_categorical_from_codes(self): + # GH 16639 + vals = np.array([0, 1, 2, 0]) + cats = ['a', 'b', 'c'] + Sd = pd.Series(pd.Categorical(1).from_codes(vals, cats)) + St = pd.Series(pd.Categorical(1).from_codes(np.array([0, 1]), cats)) + expected = np.array([True, True, False, True]) + result = algos.isin(Sd, St) + tm.assert_numpy_array_equal(expected, result) + class TestValueCounts(object): From 55af1ab626baf62dbbc00c2521c20be29b819a06 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 11 Jul 2017 12:39:39 -0400 Subject: [PATCH 746/933] COMPAT with dateutil 2.6.1, fixed ambiguous tz dst behavior (#16880) --- ci/requirements-3.5.run | 1 - ci/requirements-3.5.sh | 4 ++++ ci/requirements-3.6_NUMPY_DEV.run | 1 - pandas/tests/tseries/test_offsets.py | 5 ++++- pandas/tests/tseries/test_timezones.py | 21 +++++++++++++++++---- 5 files changed, 25 insertions(+), 7 deletions(-) diff --git a/ci/requirements-3.5.run b/ci/requirements-3.5.run index 43e6814ed6c8e..52828b5220997 100644 --- a/ci/requirements-3.5.run +++ b/ci/requirements-3.5.run @@ -1,4 +1,3 @@ -python-dateutil pytz numpy=1.11.3 openpyxl diff --git a/ci/requirements-3.5.sh b/ci/requirements-3.5.sh index d0f0b81802dc6..917439a8765a2 100644 --- a/ci/requirements-3.5.sh +++ b/ci/requirements-3.5.sh @@ -5,3 +5,7 @@ source activate pandas echo "install 35" conda install -n pandas -c conda-forge feather-format + +# pip install python-dateutil to get latest +conda remove -n pandas python-dateutil --force +pip install python-dateutil diff --git a/ci/requirements-3.6_NUMPY_DEV.run b/ci/requirements-3.6_NUMPY_DEV.run index 0aa987baefb1d..af44f198c687e 100644 --- a/ci/requirements-3.6_NUMPY_DEV.run +++ b/ci/requirements-3.6_NUMPY_DEV.run @@ -1,2 +1 @@ -python-dateutil pytz diff --git a/pandas/tests/tseries/test_offsets.py b/pandas/tests/tseries/test_offsets.py index 47b15a2b66fc4..e03b3e0a85e5e 100644 --- a/pandas/tests/tseries/test_offsets.py +++ b/pandas/tests/tseries/test_offsets.py @@ -4844,7 +4844,7 @@ def test_fallback_plural(self): hrs_pre = utc_offsets['utc_offset_daylight'] hrs_post = utc_offsets['utc_offset_standard'] - if dateutil.__version__ != LooseVersion('2.6.0'): + if dateutil.__version__ < LooseVersion('2.6.0'): # buggy ambiguous behavior in 2.6.0 # GH 14621 # https://github.com/dateutil/dateutil/issues/321 @@ -4852,6 +4852,9 @@ def test_fallback_plural(self): n=3, tstart=self._make_timestamp(self.ts_pre_fallback, hrs_pre, tz), expected_utc_offset=hrs_post) + elif dateutil.__version__ > LooseVersion('2.6.0'): + # fixed, but skip the test + continue def test_springforward_plural(self): # test moving from standard to daylight savings diff --git a/pandas/tests/tseries/test_timezones.py b/pandas/tests/tseries/test_timezones.py index de6978d52968b..c034a9c60ef1b 100644 --- a/pandas/tests/tseries/test_timezones.py +++ b/pandas/tests/tseries/test_timezones.py @@ -552,8 +552,16 @@ def f(): tz=tz, ambiguous='infer') assert times[0] == Timestamp('2013-10-26 23:00', tz=tz, freq="H") - if dateutil.__version__ != LooseVersion('2.6.0'): - # see gh-14621 + if str(tz).startswith('dateutil'): + if dateutil.__version__ < LooseVersion('2.6.0'): + # see gh-14621 + assert times[-1] == Timestamp('2013-10-27 01:00:00+0000', + tz=tz, freq="H") + elif dateutil.__version__ > LooseVersion('2.6.0'): + # fixed ambiguous behavior + assert times[-1] == Timestamp('2013-10-27 01:00:00+0100', + tz=tz, freq="H") + else: assert times[-1] == Timestamp('2013-10-27 01:00:00+0000', tz=tz, freq="H") @@ -1233,13 +1241,18 @@ def test_ambiguous_compat(self): assert result_pytz.value == result_dateutil.value assert result_pytz.value == 1382835600000000000 - # dateutil 2.6 buggy w.r.t. ambiguous=0 - if dateutil.__version__ != LooseVersion('2.6.0'): + if dateutil.__version__ < LooseVersion('2.6.0'): + # dateutil 2.6 buggy w.r.t. ambiguous=0 # see gh-14621 # see https://github.com/dateutil/dateutil/issues/321 assert (result_pytz.to_pydatetime().tzname() == result_dateutil.to_pydatetime().tzname()) assert str(result_pytz) == str(result_dateutil) + elif dateutil.__version__ > LooseVersion('2.6.0'): + # fixed ambiguous behavior + assert result_pytz.to_pydatetime().tzname() == 'GMT' + assert result_dateutil.to_pydatetime().tzname() == 'BST' + assert str(result_pytz) != str(result_dateutil) # 1 hour difference result_pytz = (Timestamp('2013-10-27 01:00:00') From a9421af1aac906cc38d025ed5db4a2b55cb8b9bc Mon Sep 17 00:00:00 2001 From: Jean Helie Date: Tue, 11 Jul 2017 17:40:20 +0100 Subject: [PATCH 747/933] fix wrongly named method (#16881) --- asv_bench/benchmarks/rolling.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/asv_bench/benchmarks/rolling.py b/asv_bench/benchmarks/rolling.py index 9da9d0b855323..899349cd21f84 100644 --- a/asv_bench/benchmarks/rolling.py +++ b/asv_bench/benchmarks/rolling.py @@ -26,7 +26,7 @@ def time_rolling_quantile_median(self): def time_rolling_median(self): (self.df.rolling(self.wins).median()) - def time_rolling_median(self): + def time_rolling_mean(self): (self.df.rolling(self.wins).mean()) def time_rolling_max(self): @@ -68,7 +68,7 @@ def time_rolling_quantile_median_l(self): def time_rolling_median_l(self): (self.df.rolling(self.winl).median()) - def time_rolling_median_l(self): + def time_rolling_mean_l(self): (self.df.rolling(self.winl).mean()) def time_rolling_max_l(self): @@ -118,7 +118,7 @@ def time_rolling_quantile_median(self): def time_rolling_median(self): (self.sr.rolling(self.wins).median()) - def time_rolling_median(self): + def time_rolling_mean(self): (self.sr.rolling(self.wins).mean()) def time_rolling_max(self): @@ -160,7 +160,7 @@ def time_rolling_quantile_median_l(self): def time_rolling_median_l(self): (self.sr.rolling(self.winl).median()) - def time_rolling_median_l(self): + def time_rolling_mean_l(self): (self.sr.rolling(self.winl).mean()) def time_rolling_max_l(self): From 9d13227345882daaa90f03078c09a9b44a18ce72 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 12 Jul 2017 10:51:07 -0500 Subject: [PATCH 748/933] TST/PKG: Removed pandas.util.testing.slow definition (#16852) --- doc/source/whatsnew/v0.21.0.txt | 1 + pandas/tests/computation/test_eval.py | 16 +-- pandas/tests/frame/test_repr_info.py | 5 +- pandas/tests/frame/test_to_csv.py | 8 +- pandas/tests/indexing/test_indexing_slow.py | 7 +- pandas/tests/io/parser/common.py | 2 +- pandas/tests/io/test_excel.py | 2 +- pandas/tests/io/test_html.py | 36 ++--- pandas/tests/plotting/test_boxplot_method.py | 23 ++-- pandas/tests/plotting/test_datetimelike.py | 102 +++++++------- pandas/tests/plotting/test_deprecated.py | 10 +- pandas/tests/plotting/test_frame.py | 137 +++++++++---------- pandas/tests/plotting/test_hist_method.py | 35 +++-- pandas/tests/plotting/test_misc.py | 17 ++- pandas/tests/plotting/test_series.py | 59 ++++---- pandas/tests/series/test_indexing.py | 5 +- pandas/tests/test_expressions.py | 10 +- pandas/tests/test_window.py | 6 +- pandas/util/testing.py | 7 - 19 files changed, 239 insertions(+), 249 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 015fdf1f45f47..a5ee0e0ce2653 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -101,6 +101,7 @@ the target. Now, a ``ValueError`` will be raised when such an input is passed in - :class:`pandas.HDFStore`'s string representation is now faster and less detailed. For the previous behavior, use ``pandas.HDFStore.info()``. (:issue:`16503`). - Compression defaults in HDF stores now follow pytable standards. Default is no compression and if ``complib`` is missing and ``complevel`` > 0 ``zlib`` is used (:issue:`15943`) - ``Index.get_indexer_non_unique()`` now returns a ndarray indexer rather than an ``Index``; this is consistent with ``Index.get_indexer()`` (:issue:`16819`) +- Removed the ``@slow`` decorator from ``pandas.util.testing``, which caused issues for some downstream packages' test suites. Use ``@pytest.mark.slow`` instead, which achieves the same thing (:issue:`16850`) .. _whatsnew_0210.api: diff --git a/pandas/tests/computation/test_eval.py b/pandas/tests/computation/test_eval.py index 589f612802fb9..7fc091ebb1892 100644 --- a/pandas/tests/computation/test_eval.py +++ b/pandas/tests/computation/test_eval.py @@ -30,7 +30,7 @@ import pandas.util.testing as tm from pandas.util.testing import (assert_frame_equal, randbool, assert_numpy_array_equal, assert_series_equal, - assert_produces_warning, slow) + assert_produces_warning) from pandas.compat import PY3, reduce _series_frame_incompatible = _bool_ops_syms @@ -144,7 +144,7 @@ def teardown_method(self, method): del self.lhses, self.rhses, self.scalar_rhses, self.scalar_lhses del self.pandas_rhses, self.pandas_lhses, self.current_engines - @slow + @pytest.mark.slow def test_complex_cmp_ops(self): cmp_ops = ('!=', '==', '<=', '>=', '<', '>') cmp2_ops = ('>', '<') @@ -161,7 +161,7 @@ def test_simple_cmp_ops(self): for lhs, rhs, cmp_op in product(bool_lhses, bool_rhses, self.cmp_ops): self.check_simple_cmp_op(lhs, cmp_op, rhs) - @slow + @pytest.mark.slow def test_binary_arith_ops(self): for lhs, op, rhs in product(self.lhses, self.arith_ops, self.rhses): self.check_binary_arith_op(lhs, op, rhs) @@ -181,17 +181,17 @@ def test_pow(self): for lhs, rhs in product(self.lhses, self.rhses): self.check_pow(lhs, '**', rhs) - @slow + @pytest.mark.slow def test_single_invert_op(self): for lhs, op, rhs in product(self.lhses, self.cmp_ops, self.rhses): self.check_single_invert_op(lhs, op, rhs) - @slow + @pytest.mark.slow def test_compound_invert_op(self): for lhs, op, rhs in product(self.lhses, self.cmp_ops, self.rhses): self.check_compound_invert_op(lhs, op, rhs) - @slow + @pytest.mark.slow def test_chained_cmp_op(self): mids = self.lhses cmp_ops = '<', '>' @@ -870,7 +870,7 @@ def test_frame_comparison(self, engine, parser): res = pd.eval('df < df3', engine=engine, parser=parser) assert_frame_equal(res, df < df3) - @slow + @pytest.mark.slow def test_medium_complex_frame_alignment(self, engine, parser): args = product(self.lhs_index_types, self.index_types, self.index_types, self.index_types) @@ -974,7 +974,7 @@ def test_series_frame_commutativity(self, engine, parser): if engine == 'numexpr': assert_frame_equal(a, b) - @slow + @pytest.mark.slow def test_complex_series_frame_alignment(self, engine, parser): import random args = product(self.lhs_index_types, self.index_types, diff --git a/pandas/tests/frame/test_repr_info.py b/pandas/tests/frame/test_repr_info.py index cc37f8cc3cb02..c317ad542659a 100644 --- a/pandas/tests/frame/test_repr_info.py +++ b/pandas/tests/frame/test_repr_info.py @@ -8,6 +8,7 @@ from numpy import nan import numpy as np +import pytest from pandas import (DataFrame, compat, option_context) from pandas.compat import StringIO, lrange, u @@ -40,7 +41,7 @@ def test_repr_mixed(self): foo = repr(self.mixed_frame) # noqa self.mixed_frame.info(verbose=False, buf=buf) - @tm.slow + @pytest.mark.slow def test_repr_mixed_big(self): # big mixed biggie = DataFrame({'A': np.random.randn(200), @@ -87,7 +88,7 @@ def test_repr_dimensions(self): with option_context('display.show_dimensions', 'truncate'): assert "2 rows x 2 columns" not in repr(df) - @tm.slow + @pytest.mark.slow def test_repr_big(self): # big one biggie = DataFrame(np.zeros((200, 4)), columns=lrange(4), diff --git a/pandas/tests/frame/test_to_csv.py b/pandas/tests/frame/test_to_csv.py index 69bd2b008416f..6a4b1686a31e2 100644 --- a/pandas/tests/frame/test_to_csv.py +++ b/pandas/tests/frame/test_to_csv.py @@ -17,7 +17,7 @@ from pandas.util.testing import (assert_almost_equal, assert_series_equal, assert_frame_equal, - ensure_clean, slow, + ensure_clean, makeCustomDataframe as mkdf) import pandas.util.testing as tm @@ -205,7 +205,7 @@ def _check_df(df, cols=None): cols = ['b', 'a'] _check_df(df, cols) - @slow + @pytest.mark.slow def test_to_csv_dtnat(self): # GH3437 from pandas import NaT @@ -236,7 +236,7 @@ def make_dtnat_arr(n, nnat=None): assert_frame_equal(df, recons, check_names=False, check_less_precise=True) - @slow + @pytest.mark.slow def test_to_csv_moar(self): def _do_test(df, r_dtype=None, c_dtype=None, @@ -728,7 +728,7 @@ def test_to_csv_chunking(self): rs = read_csv(filename, index_col=0) assert_frame_equal(rs, aa) - @slow + @pytest.mark.slow def test_to_csv_wide_frame_formatting(self): # Issue #8621 df = DataFrame(np.random.randn(1, 100010), columns=None, index=None) diff --git a/pandas/tests/indexing/test_indexing_slow.py b/pandas/tests/indexing/test_indexing_slow.py index 08d390a6a213e..1b3fb18d9ff1d 100644 --- a/pandas/tests/indexing/test_indexing_slow.py +++ b/pandas/tests/indexing/test_indexing_slow.py @@ -6,11 +6,12 @@ import pandas as pd from pandas.core.api import Series, DataFrame, MultiIndex import pandas.util.testing as tm +import pytest class TestIndexingSlow(object): - @tm.slow + @pytest.mark.slow def test_multiindex_get_loc(self): # GH7724, GH2646 with warnings.catch_warnings(record=True): @@ -80,7 +81,7 @@ def loop(mi, df, keys): assert not mi.index.lexsort_depth < i loop(mi, df, keys) - @tm.slow + @pytest.mark.slow def test_large_dataframe_indexing(self): # GH10692 result = DataFrame({'x': range(10 ** 6)}, dtype='int64') @@ -88,7 +89,7 @@ def test_large_dataframe_indexing(self): expected = DataFrame({'x': range(10 ** 6 + 1)}, dtype='int64') tm.assert_frame_equal(result, expected) - @tm.slow + @pytest.mark.slow def test_large_mi_dataframe_indexing(self): # GH10645 result = MultiIndex.from_arrays([range(10 ** 6), range(10 ** 6)]) diff --git a/pandas/tests/io/parser/common.py b/pandas/tests/io/parser/common.py index 4b4f44b44c163..584a6561b505b 100644 --- a/pandas/tests/io/parser/common.py +++ b/pandas/tests/io/parser/common.py @@ -664,7 +664,7 @@ def test_url(self): tm.assert_frame_equal(url_table, local_table) # TODO: ftp testing - @tm.slow + @pytest.mark.slow def test_file(self): dirpath = tm.get_data_path() localtable = os.path.join(dirpath, 'salaries.csv') diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py index abe3757ec64f3..856e8d6466526 100644 --- a/pandas/tests/io/test_excel.py +++ b/pandas/tests/io/test_excel.py @@ -614,7 +614,7 @@ def test_read_from_s3_url(self): local_table = self.get_exceldf('test1') tm.assert_frame_equal(url_table, local_table) - @tm.slow + @pytest.mark.slow def test_read_from_file_url(self): # FILE diff --git a/pandas/tests/io/test_html.py b/pandas/tests/io/test_html.py index 1e1d653cf94d1..4ef265dcd5113 100644 --- a/pandas/tests/io/test_html.py +++ b/pandas/tests/io/test_html.py @@ -130,7 +130,7 @@ def test_spam_url(self): assert_framelist_equal(df1, df2) - @tm.slow + @pytest.mark.slow def test_banklist(self): df1 = self.read_html(self.banklist_data, '.*Florida.*', attrs={'id': 'table'}) @@ -292,7 +292,7 @@ def test_invalid_url(self): except ValueError as e: assert str(e) == 'No tables found' - @tm.slow + @pytest.mark.slow def test_file_url(self): url = self.banklist_data dfs = self.read_html(file_path_to_url(url), 'First', @@ -301,7 +301,7 @@ def test_file_url(self): for df in dfs: assert isinstance(df, DataFrame) - @tm.slow + @pytest.mark.slow def test_invalid_table_attrs(self): url = self.banklist_data with tm.assert_raises_regex(ValueError, 'No tables found'): @@ -312,39 +312,39 @@ def _bank_data(self, *args, **kwargs): return self.read_html(self.banklist_data, 'Metcalf', attrs={'id': 'table'}, *args, **kwargs) - @tm.slow + @pytest.mark.slow def test_multiindex_header(self): df = self._bank_data(header=[0, 1])[0] assert isinstance(df.columns, MultiIndex) - @tm.slow + @pytest.mark.slow def test_multiindex_index(self): df = self._bank_data(index_col=[0, 1])[0] assert isinstance(df.index, MultiIndex) - @tm.slow + @pytest.mark.slow def test_multiindex_header_index(self): df = self._bank_data(header=[0, 1], index_col=[0, 1])[0] assert isinstance(df.columns, MultiIndex) assert isinstance(df.index, MultiIndex) - @tm.slow + @pytest.mark.slow def test_multiindex_header_skiprows_tuples(self): df = self._bank_data(header=[0, 1], skiprows=1, tupleize_cols=True)[0] assert isinstance(df.columns, Index) - @tm.slow + @pytest.mark.slow def test_multiindex_header_skiprows(self): df = self._bank_data(header=[0, 1], skiprows=1)[0] assert isinstance(df.columns, MultiIndex) - @tm.slow + @pytest.mark.slow def test_multiindex_header_index_skiprows(self): df = self._bank_data(header=[0, 1], index_col=[0, 1], skiprows=1)[0] assert isinstance(df.index, MultiIndex) assert isinstance(df.columns, MultiIndex) - @tm.slow + @pytest.mark.slow def test_regex_idempotency(self): url = self.banklist_data dfs = self.read_html(file_path_to_url(url), @@ -372,7 +372,7 @@ def test_python_docs_table(self): zz = [df.iloc[0, 0][0:4] for df in dfs] assert sorted(zz) == sorted(['Repo', 'What']) - @tm.slow + @pytest.mark.slow def test_thousands_macau_stats(self): all_non_nan_table_index = -2 macau_data = os.path.join(DATA_PATH, 'macau.html') @@ -382,7 +382,7 @@ def test_thousands_macau_stats(self): assert not any(s.isnull().any() for _, s in df.iteritems()) - @tm.slow + @pytest.mark.slow def test_thousands_macau_index_col(self): all_non_nan_table_index = -2 macau_data = os.path.join(DATA_PATH, 'macau.html') @@ -523,7 +523,7 @@ def test_nyse_wsj_commas_table(self): assert df.shape[0] == nrows tm.assert_index_equal(df.columns, columns) - @tm.slow + @pytest.mark.slow def test_banklist_header(self): from pandas.io.html import _remove_whitespace @@ -562,7 +562,7 @@ def try_remove_ws(x): coerce=True) tm.assert_frame_equal(converted, gtnew) - @tm.slow + @pytest.mark.slow def test_gold_canyon(self): gc = 'Gold Canyon' with open(self.banklist_data, 'r') as f: @@ -855,7 +855,7 @@ def test_works_on_valid_markup(self): assert isinstance(dfs, list) assert isinstance(dfs[0], DataFrame) - @tm.slow + @pytest.mark.slow def test_fallback_success(self): _skip_if_none_of(('bs4', 'html5lib')) banklist_data = os.path.join(DATA_PATH, 'banklist.html') @@ -898,7 +898,7 @@ def get_elements_from_file(url, element='table'): return soup.find_all(element) -@tm.slow +@pytest.mark.slow def test_bs4_finds_tables(): filepath = os.path.join(DATA_PATH, "spam.html") with warnings.catch_warnings(): @@ -913,13 +913,13 @@ def get_lxml_elements(url, element): return doc.xpath('.//{0}'.format(element)) -@tm.slow +@pytest.mark.slow def test_lxml_finds_tables(): filepath = os.path.join(DATA_PATH, "spam.html") assert get_lxml_elements(filepath, 'table') -@tm.slow +@pytest.mark.slow def test_lxml_finds_tbody(): filepath = os.path.join(DATA_PATH, "spam.html") assert get_lxml_elements(filepath, 'tbody') diff --git a/pandas/tests/plotting/test_boxplot_method.py b/pandas/tests/plotting/test_boxplot_method.py index ce8fb7a57c912..8fe119d28644c 100644 --- a/pandas/tests/plotting/test_boxplot_method.py +++ b/pandas/tests/plotting/test_boxplot_method.py @@ -8,7 +8,6 @@ from pandas import Series, DataFrame, MultiIndex from pandas.compat import range, lzip import pandas.util.testing as tm -from pandas.util.testing import slow import numpy as np from numpy import random @@ -35,7 +34,7 @@ def _skip_if_mpl_14_or_dev_boxplot(): class TestDataFramePlots(TestPlotBase): - @slow + @pytest.mark.slow def test_boxplot_legacy(self): df = DataFrame(randn(6, 4), index=list(string.ascii_letters[:6]), @@ -93,13 +92,13 @@ def test_boxplot_legacy(self): lines = list(itertools.chain.from_iterable(d.values())) assert len(ax.get_lines()) == len(lines) - @slow + @pytest.mark.slow def test_boxplot_return_type_none(self): # GH 12216; return_type=None & by=None -> axes result = self.hist_df.boxplot() assert isinstance(result, self.plt.Axes) - @slow + @pytest.mark.slow def test_boxplot_return_type_legacy(self): # API change in https://github.com/pandas-dev/pandas/pull/7096 import matplotlib as mpl # noqa @@ -125,7 +124,7 @@ def test_boxplot_return_type_legacy(self): result = df.boxplot(return_type='both') self._check_box_return_type(result, 'both') - @slow + @pytest.mark.slow def test_boxplot_axis_limits(self): def _check_ax_limits(col, ax): @@ -153,14 +152,14 @@ def _check_ax_limits(col, ax): assert age_ax._sharey == height_ax assert dummy_ax._sharey is None - @slow + @pytest.mark.slow def test_boxplot_empty_column(self): _skip_if_mpl_14_or_dev_boxplot() df = DataFrame(np.random.randn(20, 4)) df.loc[:, 0] = np.nan _check_plot_works(df.boxplot, return_type='axes') - @slow + @pytest.mark.slow def test_figsize(self): df = DataFrame(np.random.rand(10, 5), columns=['A', 'B', 'C', 'D', 'E']) @@ -176,7 +175,7 @@ def test_fontsize(self): class TestDataFrameGroupByPlots(TestPlotBase): - @slow + @pytest.mark.slow def test_boxplot_legacy(self): grouped = self.hist_df.groupby(by='gender') with tm.assert_produces_warning(UserWarning): @@ -206,7 +205,7 @@ def test_boxplot_legacy(self): return_type='axes') self._check_axes_shape(axes, axes_num=1, layout=(1, 1)) - @slow + @pytest.mark.slow def test_grouped_plot_fignums(self): n = 10 weight = Series(np.random.normal(166, 20, size=n)) @@ -230,7 +229,7 @@ def test_grouped_plot_fignums(self): res = df.groupby('gender').hist() tm.close() - @slow + @pytest.mark.slow def test_grouped_box_return_type(self): df = self.hist_df @@ -267,7 +266,7 @@ def test_grouped_box_return_type(self): returned = df2.boxplot(by='category', return_type=t) self._check_box_return_type(returned, t, expected_keys=columns2) - @slow + @pytest.mark.slow def test_grouped_box_layout(self): df = self.hist_df @@ -341,7 +340,7 @@ def test_grouped_box_layout(self): return_type='dict') self._check_axes_shape(self.plt.gcf().axes, axes_num=3, layout=(1, 3)) - @slow + @pytest.mark.slow def test_grouped_box_multiple_axes(self): # GH 6970, GH 7069 df = self.hist_df diff --git a/pandas/tests/plotting/test_datetimelike.py b/pandas/tests/plotting/test_datetimelike.py index 0cff365be3ec8..e9c7d806fd65d 100644 --- a/pandas/tests/plotting/test_datetimelike.py +++ b/pandas/tests/plotting/test_datetimelike.py @@ -14,7 +14,7 @@ from pandas.core.indexes.period import period_range, Period, PeriodIndex from pandas.core.resample import DatetimeIndex -from pandas.util.testing import assert_series_equal, ensure_clean, slow +from pandas.util.testing import assert_series_equal, ensure_clean import pandas.util.testing as tm from pandas.tests.plotting.common import (TestPlotBase, @@ -45,7 +45,7 @@ def setup_method(self, method): def teardown_method(self, method): tm.close() - @slow + @pytest.mark.slow def test_ts_plot_with_tz(self): # GH2877 index = date_range('1/1/2011', periods=2, freq='H', @@ -61,7 +61,7 @@ def test_fontsize_set_correctly(self): for label in (ax.get_xticklabels() + ax.get_yticklabels()): assert label.get_fontsize() == 2 - @slow + @pytest.mark.slow def test_frame_inferred(self): # inferred freq idx = date_range('1/1/1987', freq='MS', periods=100) @@ -99,7 +99,7 @@ def test_nonnumeric_exclude(self): pytest.raises(TypeError, df['A'].plot) - @slow + @pytest.mark.slow def test_tsplot(self): from pandas.tseries.plotting import tsplot @@ -133,7 +133,7 @@ def test_both_style_and_color(self): s = ts.reset_index(drop=True) pytest.raises(ValueError, s.plot, style='b-', color='#000099') - @slow + @pytest.mark.slow def test_high_freq(self): freaks = ['ms', 'us'] for freq in freaks: @@ -151,7 +151,7 @@ def test_get_datevalue(self): assert (get_datevalue('1/1/1987', 'D') == Period('1987-1-1', 'D').ordinal) - @slow + @pytest.mark.slow def test_ts_plot_format_coord(self): def check_format_of_first_point(ax, expected_string): first_line = ax.get_lines()[0] @@ -185,28 +185,28 @@ def check_format_of_first_point(ax, expected_string): tsplot(daily, self.plt.Axes.plot, ax=ax) check_format_of_first_point(ax, 't = 2014-01-01 y = 1.000000') - @slow + @pytest.mark.slow def test_line_plot_period_series(self): for s in self.period_ser: _check_plot_works(s.plot, s.index.freq) - @slow + @pytest.mark.slow def test_line_plot_datetime_series(self): for s in self.datetime_ser: _check_plot_works(s.plot, s.index.freq.rule_code) - @slow + @pytest.mark.slow def test_line_plot_period_frame(self): for df in self.period_df: _check_plot_works(df.plot, df.index.freq) - @slow + @pytest.mark.slow def test_line_plot_datetime_frame(self): for df in self.datetime_df: freq = df.index.to_period(df.index.freq.rule_code).freq _check_plot_works(df.plot, freq) - @slow + @pytest.mark.slow def test_line_plot_inferred_freq(self): for ser in self.datetime_ser: ser = Series(ser.values, Index(np.asarray(ser.index))) @@ -223,7 +223,7 @@ def test_fake_inferred_business(self): ts.plot(ax=ax) assert not hasattr(ax, 'freq') - @slow + @pytest.mark.slow def test_plot_offset_freq(self): ser = tm.makeTimeSeries() _check_plot_works(ser.plot) @@ -232,14 +232,14 @@ def test_plot_offset_freq(self): ser = Series(np.random.randn(len(dr)), dr) _check_plot_works(ser.plot) - @slow + @pytest.mark.slow def test_plot_multiple_inferred_freq(self): dr = Index([datetime(2000, 1, 1), datetime(2000, 1, 6), datetime( 2000, 1, 11)]) ser = Series(np.random.randn(len(dr)), dr) _check_plot_works(ser.plot) - @slow + @pytest.mark.slow def test_uhf(self): import pandas.plotting._converter as conv idx = date_range('2012-6-22 21:59:51.960928', freq='L', periods=500) @@ -257,7 +257,7 @@ def test_uhf(self): if len(rs): assert xp == rs - @slow + @pytest.mark.slow def test_irreg_hf(self): idx = date_range('2012-6-22 21:59:51', freq='S', periods=100) df = DataFrame(np.random.randn(len(idx), 2), idx) @@ -297,7 +297,7 @@ def test_business_freq(self): idx = ax.get_lines()[0].get_xdata() assert PeriodIndex(data=idx).freqstr == 'B' - @slow + @pytest.mark.slow def test_business_freq_convert(self): n = tm.N tm.N = 300 @@ -327,7 +327,7 @@ def test_dataframe(self): idx = ax.get_lines()[0].get_xdata() tm.assert_index_equal(bts.index.to_period(), PeriodIndex(idx)) - @slow + @pytest.mark.slow def test_axis_limits(self): def _test(ax): @@ -384,7 +384,7 @@ def test_get_finder(self): assert conv.get_finder('A') == conv._annual_finder assert conv.get_finder('W') == conv._daily_finder - @slow + @pytest.mark.slow def test_finder_daily(self): xp = Period('1999-1-1', freq='B').ordinal day_lst = [10, 40, 252, 400, 950, 2750, 10000] @@ -402,7 +402,7 @@ def test_finder_daily(self): assert xp == rs self.plt.close(ax.get_figure()) - @slow + @pytest.mark.slow def test_finder_quarterly(self): xp = Period('1988Q1').ordinal yrs = [3.5, 11] @@ -420,7 +420,7 @@ def test_finder_quarterly(self): assert xp == rs self.plt.close(ax.get_figure()) - @slow + @pytest.mark.slow def test_finder_monthly(self): xp = Period('Jan 1988').ordinal yrs = [1.15, 2.5, 4, 11] @@ -448,7 +448,7 @@ def test_finder_monthly_long(self): xp = Period('1989Q1', 'M').ordinal assert rs == xp - @slow + @pytest.mark.slow def test_finder_annual(self): xp = [1987, 1988, 1990, 1990, 1995, 2020, 2070, 2170] for i, nyears in enumerate([5, 10, 19, 49, 99, 199, 599, 1001]): @@ -461,7 +461,7 @@ def test_finder_annual(self): assert rs == Period(xp[i], freq='A').ordinal self.plt.close(ax.get_figure()) - @slow + @pytest.mark.slow def test_finder_minutely(self): nminutes = 50 * 24 * 60 rng = date_range('1/1/1999', freq='Min', periods=nminutes) @@ -484,7 +484,7 @@ def test_finder_hourly(self): xp = Period('1/1/1999', freq='H').ordinal assert rs == xp - @slow + @pytest.mark.slow def test_gaps(self): ts = tm.makeTimeSeries() ts[5:25] = np.nan @@ -529,7 +529,7 @@ def test_gaps(self): mask = data.mask assert mask[2:5, 1].all() - @slow + @pytest.mark.slow def test_gap_upsample(self): low = tm.makeTimeSeries() low[5:25] = np.nan @@ -551,7 +551,7 @@ def test_gap_upsample(self): mask = data.mask assert mask[5:25, 1].all() - @slow + @pytest.mark.slow def test_secondary_y(self): ser = Series(np.random.randn(10)) ser2 = Series(np.random.randn(10)) @@ -581,7 +581,7 @@ def test_secondary_y(self): assert hasattr(ax2, 'left_ax') assert not hasattr(ax2, 'right_ax') - @slow + @pytest.mark.slow def test_secondary_y_ts(self): idx = date_range('1/1/2000', periods=10) ser = Series(np.random.randn(10), idx) @@ -608,7 +608,7 @@ def test_secondary_y_ts(self): ax2 = ser.plot(secondary_y=True) assert ax.get_yaxis().get_visible() - @slow + @pytest.mark.slow def test_secondary_kde(self): tm._skip_if_no_scipy() _skip_if_no_scipy_gaussian_kde() @@ -621,7 +621,7 @@ def test_secondary_kde(self): axes = fig.get_axes() assert axes[1].get_yaxis().get_ticks_position() == 'right' - @slow + @pytest.mark.slow def test_secondary_bar(self): ser = Series(np.random.randn(10)) fig, ax = self.plt.subplots() @@ -629,7 +629,7 @@ def test_secondary_bar(self): axes = fig.get_axes() assert axes[1].get_yaxis().get_ticks_position() == 'right' - @slow + @pytest.mark.slow def test_secondary_frame(self): df = DataFrame(np.random.randn(5, 3), columns=['a', 'b', 'c']) axes = df.plot(secondary_y=['a', 'c'], subplots=True) @@ -638,7 +638,7 @@ def test_secondary_frame(self): self.default_tick_position) assert axes[2].get_yaxis().get_ticks_position() == 'right' - @slow + @pytest.mark.slow def test_secondary_bar_frame(self): df = DataFrame(np.random.randn(5, 3), columns=['a', 'b', 'c']) axes = df.plot(kind='bar', secondary_y=['a', 'c'], subplots=True) @@ -666,7 +666,7 @@ def test_mixed_freq_regular_first(self): assert left == pidx[0].ordinal assert right == pidx[-1].ordinal - @slow + @pytest.mark.slow def test_mixed_freq_irregular_first(self): s1 = tm.makeTimeSeries() s2 = s1[[0, 5, 10, 11, 12, 13, 14, 15]] @@ -697,7 +697,7 @@ def test_mixed_freq_regular_first_df(self): assert left == pidx[0].ordinal assert right == pidx[-1].ordinal - @slow + @pytest.mark.slow def test_mixed_freq_irregular_first_df(self): # GH 9852 s1 = tm.makeTimeSeries().to_frame() @@ -723,7 +723,7 @@ def test_mixed_freq_hf_first(self): for l in ax.get_lines(): assert PeriodIndex(data=l.get_xdata()).freq == 'D' - @slow + @pytest.mark.slow def test_mixed_freq_alignment(self): ts_ind = date_range('2012-01-01 13:00', '2012-01-02', freq='H') ts_data = np.random.randn(12) @@ -737,7 +737,7 @@ def test_mixed_freq_alignment(self): assert ax.lines[0].get_xdata()[0] == ax.lines[1].get_xdata()[0] - @slow + @pytest.mark.slow def test_mixed_freq_lf_first(self): idxh = date_range('1/1/1999', periods=365, freq='D') @@ -819,7 +819,7 @@ def test_nat_handling(self): assert s.index.min() <= Series(xdata).min() assert Series(xdata).max() <= s.index.max() - @slow + @pytest.mark.slow def test_to_weekly_resampling(self): idxh = date_range('1/1/1999', periods=52, freq='W') idxl = date_range('1/1/1999', periods=12, freq='M') @@ -840,7 +840,7 @@ def test_to_weekly_resampling(self): for l in lines: assert PeriodIndex(data=l.get_xdata()).freq == idxh.freq - @slow + @pytest.mark.slow def test_from_weekly_resampling(self): idxh = date_range('1/1/1999', periods=52, freq='W') idxl = date_range('1/1/1999', periods=12, freq='M') @@ -876,7 +876,7 @@ def test_from_weekly_resampling(self): else: tm.assert_numpy_array_equal(xdata, expected_h) - @slow + @pytest.mark.slow def test_from_resampling_area_line_mixed(self): idxh = date_range('1/1/1999', periods=52, freq='W') idxl = date_range('1/1/1999', periods=12, freq='M') @@ -950,7 +950,7 @@ def test_from_resampling_area_line_mixed(self): tm.assert_numpy_array_equal(l.get_ydata(orig=False), expected_y) - @slow + @pytest.mark.slow def test_mixed_freq_second_millisecond(self): # GH 7772, GH 7760 idxh = date_range('2014-07-01 09:00', freq='S', periods=50) @@ -974,7 +974,7 @@ def test_mixed_freq_second_millisecond(self): for l in ax.get_lines(): assert PeriodIndex(data=l.get_xdata()).freq == 'L' - @slow + @pytest.mark.slow def test_irreg_dtypes(self): # date idx = [date(2000, 1, 1), date(2000, 1, 5), date(2000, 1, 20)] @@ -988,7 +988,7 @@ def test_irreg_dtypes(self): _, ax = self.plt.subplots() _check_plot_works(df.plot, ax=ax) - @slow + @pytest.mark.slow def test_time(self): t = datetime(1, 1, 1, 3, 30, 0) deltas = np.random.randint(1, 20, 3).cumsum() @@ -1024,7 +1024,7 @@ def test_time(self): rs = time(h, m, s).strftime('%H:%M:%S') assert xp == rs - @slow + @pytest.mark.slow def test_time_musec(self): t = datetime(1, 1, 1, 3, 30, 0) deltas = np.random.randint(1, 20, 3).cumsum() @@ -1051,7 +1051,7 @@ def test_time_musec(self): rs = time(h, m, s).strftime('%H:%M:%S.%f') assert xp == rs - @slow + @pytest.mark.slow def test_secondary_upsample(self): idxh = date_range('1/1/1999', periods=365, freq='D') idxl = date_range('1/1/1999', periods=12, freq='M') @@ -1067,7 +1067,7 @@ def test_secondary_upsample(self): for l in ax.left_ax.get_lines(): assert PeriodIndex(l.get_xdata()).freq == 'D' - @slow + @pytest.mark.slow def test_secondary_legend(self): fig = self.plt.figure() ax = fig.add_subplot(211) @@ -1169,7 +1169,7 @@ def test_format_date_axis(self): if len(l.get_text()) > 0: assert l.get_rotation() == 30 - @slow + @pytest.mark.slow def test_ax_plot(self): x = DatetimeIndex(start='2012-01-02', periods=10, freq='D') y = lrange(len(x)) @@ -1177,7 +1177,7 @@ def test_ax_plot(self): lines = ax.plot(x, y, label='Y') tm.assert_index_equal(DatetimeIndex(lines[0].get_xdata()), x) - @slow + @pytest.mark.slow def test_mpl_nopandas(self): dates = [date(2008, 12, 31), date(2009, 1, 31)] values1 = np.arange(10.0, 11.0, 0.5) @@ -1196,7 +1196,7 @@ def test_mpl_nopandas(self): exp = np.array([x.toordinal() for x in dates], dtype=np.float64) tm.assert_numpy_array_equal(line2.get_xydata()[:, 0], exp) - @slow + @pytest.mark.slow def test_irregular_ts_shared_ax_xlim(self): # GH 2960 ts = tm.makeTimeSeries()[:20] @@ -1212,7 +1212,7 @@ def test_irregular_ts_shared_ax_xlim(self): assert left == ts_irregular.index.min().toordinal() assert right == ts_irregular.index.max().toordinal() - @slow + @pytest.mark.slow def test_secondary_y_non_ts_xlim(self): # GH 3490 - non-timeseries with secondary y index_1 = [1, 2, 3, 4] @@ -1229,7 +1229,7 @@ def test_secondary_y_non_ts_xlim(self): assert left_before == left_after assert right_before < right_after - @slow + @pytest.mark.slow def test_secondary_y_regular_ts_xlim(self): # GH 3490 - regular-timeseries with secondary y index_1 = date_range(start='2000-01-01', periods=4, freq='D') @@ -1246,7 +1246,7 @@ def test_secondary_y_regular_ts_xlim(self): assert left_before == left_after assert right_before < right_after - @slow + @pytest.mark.slow def test_secondary_y_mixed_freq_ts_xlim(self): # GH 3490 - mixed frequency timeseries with secondary y rng = date_range('2000-01-01', periods=10000, freq='min') @@ -1262,7 +1262,7 @@ def test_secondary_y_mixed_freq_ts_xlim(self): assert left_before == left_after assert right_before == right_after - @slow + @pytest.mark.slow def test_secondary_y_irregular_ts_xlim(self): # GH 3490 - irregular-timeseries with secondary y ts = tm.makeTimeSeries()[:20] @@ -1361,7 +1361,7 @@ def test_hist(self): _, ax = self.plt.subplots() ax.hist([x, x], weights=[w1, w2]) - @slow + @pytest.mark.slow def test_overlapping_datetime(self): # GB 6608 s1 = Series([1, 2, 3], index=[datetime(1995, 12, 31), diff --git a/pandas/tests/plotting/test_deprecated.py b/pandas/tests/plotting/test_deprecated.py index ca03bcb060e25..970de6ff881ab 100644 --- a/pandas/tests/plotting/test_deprecated.py +++ b/pandas/tests/plotting/test_deprecated.py @@ -4,7 +4,7 @@ import pandas as pd import pandas.util.testing as tm -from pandas.util.testing import slow +import pytest from numpy.random import randn @@ -23,7 +23,7 @@ class TestDeprecatedNameSpace(TestPlotBase): - @slow + @pytest.mark.slow def test_scatter_plot_legacy(self): tm._skip_if_no_scipy() @@ -35,7 +35,7 @@ def test_scatter_plot_legacy(self): with tm.assert_produces_warning(FutureWarning): pd.scatter_matrix(df) - @slow + @pytest.mark.slow def test_boxplot_deprecated(self): df = pd.DataFrame(randn(6, 4), index=list(string.ascii_letters[:6]), @@ -46,13 +46,13 @@ def test_boxplot_deprecated(self): plotting.boxplot(df, column=['one', 'two'], by='indic') - @slow + @pytest.mark.slow def test_radviz_deprecated(self): df = self.iris with tm.assert_produces_warning(FutureWarning): plotting.radviz(frame=df, class_column='Name') - @slow + @pytest.mark.slow def test_plot_params(self): with tm.assert_produces_warning(FutureWarning): diff --git a/pandas/tests/plotting/test_frame.py b/pandas/tests/plotting/test_frame.py index 352c03582db93..7878740f64e55 100644 --- a/pandas/tests/plotting/test_frame.py +++ b/pandas/tests/plotting/test_frame.py @@ -15,7 +15,6 @@ from pandas.compat import range, lrange, lmap, lzip, u, zip, PY3 from pandas.io.formats.printing import pprint_thing import pandas.util.testing as tm -from pandas.util.testing import slow import numpy as np from numpy.random import rand, randn @@ -41,7 +40,7 @@ def setup_method(self, method): "C": np.arange(20) + np.random.uniform( size=20)}) - @slow + @pytest.mark.slow def test_plot(self): df = self.tdf _check_plot_works(df.plot, grid=False) @@ -188,13 +187,13 @@ def test_nonnumeric_exclude(self): ax = df.plot() assert len(ax.get_lines()) == 1 # B was plotted - @slow + @pytest.mark.slow def test_implicit_label(self): df = DataFrame(randn(10, 3), columns=['a', 'b', 'c']) ax = df.plot(x='a', y='b') self._check_text_labels(ax.xaxis.get_label(), 'a') - @slow + @pytest.mark.slow def test_donot_overwrite_index_name(self): # GH 8494 df = DataFrame(randn(2, 2), columns=['a', 'b']) @@ -202,7 +201,7 @@ def test_donot_overwrite_index_name(self): df.plot(y='b', label='LABEL') assert df.index.name == 'NAME' - @slow + @pytest.mark.slow def test_plot_xy(self): # columns.inferred_type == 'string' df = self.tdf @@ -228,7 +227,7 @@ def test_plot_xy(self): # columns.inferred_type == 'mixed' # TODO add MultiIndex test - @slow + @pytest.mark.slow def test_logscales(self): df = DataFrame({'a': np.arange(100)}, index=np.arange(100)) ax = df.plot(logy=True) @@ -240,7 +239,7 @@ def test_logscales(self): ax = df.plot(loglog=True) self._check_ax_scales(ax, xaxis='log', yaxis='log') - @slow + @pytest.mark.slow def test_xcompat(self): import pandas as pd @@ -305,7 +304,7 @@ def test_unsorted_index(self): rs = Series(rs[:, 1], rs[:, 0], dtype=np.int64, name='y') tm.assert_series_equal(rs, df.y) - @slow + @pytest.mark.slow def test_subplots(self): df = DataFrame(np.random.rand(10, 3), index=list(string.ascii_letters[:10])) @@ -345,7 +344,7 @@ def test_subplots(self): for ax in axes: assert ax.get_legend() is None - @slow + @pytest.mark.slow def test_subplots_timeseries(self): idx = date_range(start='2014-07-01', freq='M', periods=10) df = DataFrame(np.random.rand(10, 3), index=idx) @@ -381,7 +380,7 @@ def test_subplots_timeseries(self): self._check_ticks_props(ax, xlabelsize=7, xrot=45, ylabelsize=7) - @slow + @pytest.mark.slow def test_subplots_layout(self): # GH 6667 df = DataFrame(np.random.rand(10, 3), @@ -427,7 +426,7 @@ def test_subplots_layout(self): self._check_axes_shape(axes, axes_num=1, layout=(3, 3)) assert axes.shape == (3, 3) - @slow + @pytest.mark.slow def test_subplots_warnings(self): # GH 9464 warnings.simplefilter('error') @@ -442,7 +441,7 @@ def test_subplots_warnings(self): self.fail(w) warnings.simplefilter('default') - @slow + @pytest.mark.slow def test_subplots_multiple_axes(self): # GH 5353, 6970, GH 7069 fig, axes = self.plt.subplots(2, 3) @@ -543,7 +542,7 @@ def test_subplots_sharex_axes_existing_axes(self): for ax in axes.ravel(): self._check_visible(ax.get_yticklabels(), visible=True) - @slow + @pytest.mark.slow def test_subplots_dup_columns(self): # GH 10962 df = DataFrame(np.random.rand(5, 5), columns=list('aaaaa')) @@ -697,7 +696,7 @@ def test_area_lim(self): ymin, ymax = ax.get_ylim() assert ymax == 0 - @slow + @pytest.mark.slow def test_bar_colors(self): import matplotlib.pyplot as plt default_colors = self._maybe_unpack_cycler(plt.rcParams) @@ -733,7 +732,7 @@ def test_bar_colors(self): self._check_colors(ax.patches[::5], facecolors=['green'] * 5) tm.close() - @slow + @pytest.mark.slow def test_bar_linewidth(self): df = DataFrame(randn(5, 5)) @@ -754,7 +753,7 @@ def test_bar_linewidth(self): for r in ax.patches: assert r.get_linewidth() == 2 - @slow + @pytest.mark.slow def test_bar_barwidth(self): df = DataFrame(randn(5, 5)) @@ -792,7 +791,7 @@ def test_bar_barwidth(self): for r in ax.patches: assert r.get_height() == width - @slow + @pytest.mark.slow def test_bar_barwidth_position(self): df = DataFrame(randn(5, 5)) self._check_bar_alignment(df, kind='bar', stacked=False, width=0.9, @@ -808,7 +807,7 @@ def test_bar_barwidth_position(self): self._check_bar_alignment(df, kind='barh', subplots=True, width=0.9, position=0.2) - @slow + @pytest.mark.slow def test_bar_barwidth_position_int(self): # GH 12979 df = DataFrame(randn(5, 5)) @@ -828,7 +827,7 @@ def test_bar_barwidth_position_int(self): self._check_bar_alignment(df, kind='bar', subplots=True, width=1) self._check_bar_alignment(df, kind='barh', subplots=True, width=1) - @slow + @pytest.mark.slow def test_bar_bottom_left(self): df = DataFrame(rand(5, 5)) ax = df.plot.bar(stacked=False, bottom=1) @@ -857,7 +856,7 @@ def test_bar_bottom_left(self): result = [p.get_x() for p in ax.patches] assert result == [1] * 5 - @slow + @pytest.mark.slow def test_bar_nan(self): df = DataFrame({'A': [10, np.nan, 20], 'B': [5, 10, 20], @@ -875,7 +874,7 @@ def test_bar_nan(self): expected = [0.0, 0.0, 0.0, 10.0, 0.0, 20.0, 15.0, 10.0, 40.0] assert result == expected - @slow + @pytest.mark.slow def test_bar_categorical(self): # GH 13019 df1 = pd.DataFrame(np.random.randn(6, 5), @@ -901,7 +900,7 @@ def test_bar_categorical(self): assert ax.patches[0].get_x() == -0.25 assert ax.patches[-1].get_x() == 4.75 - @slow + @pytest.mark.slow def test_plot_scatter(self): df = DataFrame(randn(6, 4), index=list(string.ascii_letters[:6]), @@ -919,7 +918,7 @@ def test_plot_scatter(self): axes = df.plot(x='x', y='y', kind='scatter', subplots=True) self._check_axes_shape(axes, axes_num=1, layout=(1, 1)) - @slow + @pytest.mark.slow def test_plot_scatter_with_categorical_data(self): # GH 16199 df = pd.DataFrame({'x': [1, 2, 3, 4], @@ -937,7 +936,7 @@ def test_plot_scatter_with_categorical_data(self): df.plot(x='y', y='y', kind='scatter') ve.match('requires x column to be numeric') - @slow + @pytest.mark.slow def test_plot_scatter_with_c(self): df = DataFrame(randn(6, 4), index=list(string.ascii_letters[:6]), @@ -1007,7 +1006,7 @@ def test_scatter_colors(self): tm.assert_numpy_array_equal(ax.collections[0].get_facecolor()[0], np.array([1, 1, 1, 1], dtype=np.float64)) - @slow + @pytest.mark.slow def test_plot_bar(self): df = DataFrame(randn(6, 4), index=list(string.ascii_letters[:6]), @@ -1098,7 +1097,7 @@ def _check_bar_alignment(self, df, kind='bar', stacked=False, return axes - @slow + @pytest.mark.slow def test_bar_stacked_center(self): # GH2157 df = DataFrame({'A': [3] * 5, 'B': lrange(5)}, index=lrange(5)) @@ -1107,7 +1106,7 @@ def test_bar_stacked_center(self): self._check_bar_alignment(df, kind='barh', stacked=True) self._check_bar_alignment(df, kind='barh', stacked=True, width=0.9) - @slow + @pytest.mark.slow def test_bar_center(self): df = DataFrame({'A': [3] * 5, 'B': lrange(5)}, index=lrange(5)) self._check_bar_alignment(df, kind='bar', stacked=False) @@ -1115,7 +1114,7 @@ def test_bar_center(self): self._check_bar_alignment(df, kind='barh', stacked=False) self._check_bar_alignment(df, kind='barh', stacked=False, width=0.9) - @slow + @pytest.mark.slow def test_bar_subplots_center(self): df = DataFrame({'A': [3] * 5, 'B': lrange(5)}, index=lrange(5)) self._check_bar_alignment(df, kind='bar', subplots=True) @@ -1123,7 +1122,7 @@ def test_bar_subplots_center(self): self._check_bar_alignment(df, kind='barh', subplots=True) self._check_bar_alignment(df, kind='barh', subplots=True, width=0.9) - @slow + @pytest.mark.slow def test_bar_align_single_column(self): df = DataFrame(randn(5)) self._check_bar_alignment(df, kind='bar', stacked=False) @@ -1133,7 +1132,7 @@ def test_bar_align_single_column(self): self._check_bar_alignment(df, kind='bar', subplots=True) self._check_bar_alignment(df, kind='barh', subplots=True) - @slow + @pytest.mark.slow def test_bar_edge(self): df = DataFrame({'A': [3] * 5, 'B': lrange(5)}, index=lrange(5)) @@ -1158,7 +1157,7 @@ def test_bar_edge(self): self._check_bar_alignment(df, kind='barh', subplots=True, width=0.9, align='edge') - @slow + @pytest.mark.slow def test_bar_log_no_subplots(self): # GH3254, GH3298 matplotlib/matplotlib#1882, #1892 # regressions in 1.2.1 @@ -1172,7 +1171,7 @@ def test_bar_log_no_subplots(self): ax = df.plot.bar(grid=True, log=True) tm.assert_numpy_array_equal(ax.yaxis.get_ticklocs(), expected) - @slow + @pytest.mark.slow def test_bar_log_subplots(self): expected = np.array([1., 10., 100., 1000.]) if not self.mpl_le_1_2_1: @@ -1184,7 +1183,7 @@ def test_bar_log_subplots(self): tm.assert_numpy_array_equal(ax[0].yaxis.get_ticklocs(), expected) tm.assert_numpy_array_equal(ax[1].yaxis.get_ticklocs(), expected) - @slow + @pytest.mark.slow def test_boxplot(self): df = self.hist_df series = df['height'] @@ -1222,7 +1221,7 @@ def test_boxplot(self): tm.assert_numpy_array_equal(ax.xaxis.get_ticklocs(), positions) assert len(ax.lines) == self.bp_n_objects * len(numeric_cols) - @slow + @pytest.mark.slow def test_boxplot_vertical(self): df = self.hist_df numeric_cols = df._get_numeric_data().columns @@ -1250,7 +1249,7 @@ def test_boxplot_vertical(self): tm.assert_numpy_array_equal(ax.yaxis.get_ticklocs(), positions) assert len(ax.lines) == self.bp_n_objects * len(numeric_cols) - @slow + @pytest.mark.slow def test_boxplot_return_type(self): df = DataFrame(randn(6, 4), index=list(string.ascii_letters[:6]), @@ -1270,7 +1269,7 @@ def test_boxplot_return_type(self): result = df.plot.box(return_type='both') self._check_box_return_type(result, 'both') - @slow + @pytest.mark.slow def test_boxplot_subplots_return_type(self): df = self.hist_df @@ -1287,7 +1286,7 @@ def test_boxplot_subplots_return_type(self): expected_keys=['height', 'weight', 'category'], check_ax_title=False) - @slow + @pytest.mark.slow def test_kde_df(self): tm._skip_if_no_scipy() _skip_if_no_scipy_gaussian_kde() @@ -1308,7 +1307,7 @@ def test_kde_df(self): axes = df.plot(kind='kde', logy=True, subplots=True) self._check_ax_scales(axes, yaxis='log') - @slow + @pytest.mark.slow def test_kde_missing_vals(self): tm._skip_if_no_scipy() _skip_if_no_scipy_gaussian_kde() @@ -1316,7 +1315,7 @@ def test_kde_missing_vals(self): df.loc[0, 0] = np.nan _check_plot_works(df.plot, kind='kde') - @slow + @pytest.mark.slow def test_hist_df(self): from matplotlib.patches import Rectangle if self.mpl_le_1_2_1: @@ -1376,7 +1375,7 @@ def _check_box_coord(self, patches, expected_y=None, expected_h=None, tm.assert_numpy_array_equal(result_width, expected_w, check_dtype=False) - @slow + @pytest.mark.slow def test_hist_df_coord(self): normal_df = DataFrame({'A': np.repeat(np.array([1, 2, 3, 4, 5]), np.array([10, 9, 8, 7, 6])), @@ -1467,12 +1466,12 @@ def test_hist_df_coord(self): expected_x=np.array([0, 0, 0, 0, 0]), expected_w=np.array([6, 7, 8, 9, 10])) - @slow + @pytest.mark.slow def test_plot_int_columns(self): df = DataFrame(randn(100, 4)).cumsum() _check_plot_works(df.plot, legend=True) - @slow + @pytest.mark.slow def test_df_legend_labels(self): kinds = ['line', 'bar', 'barh', 'kde', 'area', 'hist'] df = DataFrame(rand(3, 3), columns=['a', 'b', 'c']) @@ -1565,7 +1564,7 @@ def test_legend_name(self): leg_title = ax.legend_.get_title() self._check_text_labels(leg_title, 'new') - @slow + @pytest.mark.slow def test_no_legend(self): kinds = ['line', 'bar', 'barh', 'kde', 'area', 'hist'] df = DataFrame(rand(3, 3), columns=['a', 'b', 'c']) @@ -1577,7 +1576,7 @@ def test_no_legend(self): ax = df.plot(kind=kind, legend=False) self._check_legend_labels(ax, visible=False) - @slow + @pytest.mark.slow def test_style_by_column(self): import matplotlib.pyplot as plt fig = plt.gcf() @@ -1593,7 +1592,7 @@ def test_style_by_column(self): for i, l in enumerate(ax.get_lines()[:len(markers)]): assert l.get_marker() == markers[i] - @slow + @pytest.mark.slow def test_line_label_none(self): s = Series([1, 2]) ax = s.plot() @@ -1602,7 +1601,7 @@ def test_line_label_none(self): ax = s.plot(legend=True) assert ax.get_legend().get_texts()[0].get_text() == 'None' - @slow + @pytest.mark.slow @tm.capture_stdout def test_line_colors(self): from matplotlib import cm @@ -1654,13 +1653,13 @@ def test_line_colors(self): # Forced show plot _check_plot_works(df.plot, color=custom_colors) - @slow + @pytest.mark.slow def test_dont_modify_colors(self): colors = ['r', 'g', 'b'] pd.DataFrame(np.random.rand(10, 2)).plot(color=colors) assert len(colors) == 3 - @slow + @pytest.mark.slow def test_line_colors_and_styles_subplots(self): # GH 9894 from matplotlib import cm @@ -1738,7 +1737,7 @@ def test_line_colors_and_styles_subplots(self): self._check_colors(ax.get_lines(), linecolors=[c]) tm.close() - @slow + @pytest.mark.slow def test_area_colors(self): from matplotlib import cm from matplotlib.collections import PolyCollection @@ -1798,7 +1797,7 @@ def test_area_colors(self): for h in handles: assert h.get_alpha() == 0.5 - @slow + @pytest.mark.slow def test_hist_colors(self): default_colors = self._maybe_unpack_cycler(self.plt.rcParams) @@ -1832,7 +1831,7 @@ def test_hist_colors(self): self._check_colors(ax.patches[::10], facecolors=['green'] * 5) tm.close() - @slow + @pytest.mark.slow def test_kde_colors(self): tm._skip_if_no_scipy() _skip_if_no_scipy_gaussian_kde() @@ -1855,7 +1854,7 @@ def test_kde_colors(self): rgba_colors = lmap(cm.jet, np.linspace(0, 1, len(df))) self._check_colors(ax.get_lines(), linecolors=rgba_colors) - @slow + @pytest.mark.slow def test_kde_colors_and_styles_subplots(self): tm._skip_if_no_scipy() _skip_if_no_scipy_gaussian_kde() @@ -1914,7 +1913,7 @@ def test_kde_colors_and_styles_subplots(self): self._check_colors(ax.get_lines(), linecolors=[c]) tm.close() - @slow + @pytest.mark.slow def test_boxplot_colors(self): def _check_colors(bp, box_c, whiskers_c, medians_c, caps_c='k', fliers_c=None): @@ -2025,7 +2024,7 @@ def test_all_invalid_plot_data(self): with pytest.raises(TypeError): df.plot(kind=kind) - @slow + @pytest.mark.slow def test_partially_invalid_plot_data(self): with tm.RNGContext(42): df = DataFrame(randn(10, 2), dtype=object) @@ -2050,7 +2049,7 @@ def test_invalid_kind(self): with pytest.raises(ValueError): df.plot(kind='aasdf') - @slow + @pytest.mark.slow def test_hexbin_basic(self): df = self.hexbin_df @@ -2066,7 +2065,7 @@ def test_hexbin_basic(self): # return value is single axes self._check_axes_shape(axes, axes_num=1, layout=(1, 1)) - @slow + @pytest.mark.slow def test_hexbin_with_c(self): df = self.hexbin_df @@ -2076,7 +2075,7 @@ def test_hexbin_with_c(self): ax = df.plot.hexbin(x='A', y='B', C='C', reduce_C_function=np.std) assert len(ax.collections) == 1 - @slow + @pytest.mark.slow def test_hexbin_cmap(self): df = self.hexbin_df @@ -2088,14 +2087,14 @@ def test_hexbin_cmap(self): ax = df.plot.hexbin(x='A', y='B', colormap=cm) assert ax.collections[0].cmap.name == cm - @slow + @pytest.mark.slow def test_no_color_bar(self): df = self.hexbin_df ax = df.plot.hexbin(x='A', y='B', colorbar=None) assert ax.collections[0].colorbar is None - @slow + @pytest.mark.slow def test_allow_cmap(self): df = self.hexbin_df @@ -2105,7 +2104,7 @@ def test_allow_cmap(self): with pytest.raises(TypeError): df.plot.hexbin(x='A', y='B', cmap='YlGn', colormap='BuGn') - @slow + @pytest.mark.slow def test_pie_df(self): df = DataFrame(np.random.rand(5, 3), columns=['X', 'Y', 'Z'], index=['a', 'b', 'c', 'd', 'e']) @@ -2159,7 +2158,7 @@ def test_pie_df_nan(self): assert ([x.get_text() for x in ax.get_legend().get_texts()] == base_expected[:i] + base_expected[i + 1:]) - @slow + @pytest.mark.slow def test_errorbar_plot(self): d = {'x': np.arange(12), 'y': np.arange(12, 0, -1)} df = DataFrame(d) @@ -2227,7 +2226,7 @@ def test_errorbar_plot(self): with pytest.raises((ValueError, TypeError)): df.plot(yerr=df_err) - @slow + @pytest.mark.slow def test_errorbar_with_integer_column_names(self): # test with integer column names df = DataFrame(np.random.randn(10, 2)) @@ -2237,7 +2236,7 @@ def test_errorbar_with_integer_column_names(self): ax = _check_plot_works(df.plot, y=0, yerr=1) self._check_has_errorbars(ax, xerr=0, yerr=1) - @slow + @pytest.mark.slow def test_errorbar_with_partial_columns(self): df = DataFrame(np.random.randn(10, 3)) df_err = DataFrame(np.random.randn(10, 2), columns=[0, 2]) @@ -2260,7 +2259,7 @@ def test_errorbar_with_partial_columns(self): ax = _check_plot_works(df.plot, yerr=err) self._check_has_errorbars(ax, xerr=0, yerr=1) - @slow + @pytest.mark.slow def test_errorbar_timeseries(self): d = {'x': np.arange(12), 'y': np.arange(12, 0, -1)} @@ -2370,7 +2369,7 @@ def _check_errorbar_color(containers, expected, has_err='has_xerr'): self._check_has_errorbars(ax, xerr=0, yerr=1) _check_errorbar_color(ax.containers, 'green', has_err='has_yerr') - @slow + @pytest.mark.slow def test_sharex_and_ax(self): # https://github.com/pandas-dev/pandas/issues/9737 using gridspec, # the axis in fig.get_axis() are sorted differently than pandas @@ -2422,7 +2421,7 @@ def _check(axes): self._check_visible(ax.get_xticklabels(minor=True), visible=True) tm.close() - @slow + @pytest.mark.slow def test_sharey_and_ax(self): # https://github.com/pandas-dev/pandas/issues/9737 using gridspec, # the axis in fig.get_axis() are sorted differently than pandas @@ -2505,7 +2504,7 @@ def test_memory_leak(self): # need to actually access something to get an error results[key].lines - @slow + @pytest.mark.slow def test_df_subplots_patterns_minorticks(self): # GH 10657 import matplotlib.pyplot as plt @@ -2550,7 +2549,7 @@ def test_df_subplots_patterns_minorticks(self): self._check_visible(ax.get_xticklabels(minor=True), visible=True) tm.close() - @slow + @pytest.mark.slow def test_df_gridspec_patterns(self): # GH 10819 import matplotlib.pyplot as plt @@ -2673,7 +2672,7 @@ def _get_boxed_grid(): self._check_visible(ax.get_xticklabels(minor=True), visible=True) tm.close() - @slow + @pytest.mark.slow def test_df_grid_settings(self): # Make sure plot defaults to rcParams['axes.grid'] setting, GH 9792 self._check_grid_settings( diff --git a/pandas/tests/plotting/test_hist_method.py b/pandas/tests/plotting/test_hist_method.py index 17a75e5cb287c..5f7b2dd2d6ca9 100644 --- a/pandas/tests/plotting/test_hist_method.py +++ b/pandas/tests/plotting/test_hist_method.py @@ -6,7 +6,6 @@ from pandas import Series, DataFrame import pandas.util.testing as tm -from pandas.util.testing import slow import numpy as np from numpy.random import randn @@ -28,7 +27,7 @@ def setup_method(self, method): self.ts = tm.makeTimeSeries() self.ts.name = 'ts' - @slow + @pytest.mark.slow def test_hist_legacy(self): _check_plot_works(self.ts.hist) _check_plot_works(self.ts.hist, grid=False) @@ -52,13 +51,13 @@ def test_hist_legacy(self): with pytest.raises(ValueError): self.ts.hist(by=self.ts.index, figure=fig) - @slow + @pytest.mark.slow def test_hist_bins_legacy(self): df = DataFrame(np.random.randn(10, 2)) ax = df.hist(bins=2)[0][0] assert len(ax.patches) == 2 - @slow + @pytest.mark.slow def test_hist_layout(self): df = self.hist_df with pytest.raises(ValueError): @@ -67,7 +66,7 @@ def test_hist_layout(self): with pytest.raises(ValueError): df.height.hist(layout=[1, 1]) - @slow + @pytest.mark.slow def test_hist_layout_with_by(self): df = self.hist_df @@ -113,7 +112,7 @@ def test_hist_layout_with_by(self): self._check_axes_shape( axes, axes_num=4, layout=(4, 2), figsize=(12, 7)) - @slow + @pytest.mark.slow def test_hist_no_overlap(self): from matplotlib.pyplot import subplot, gcf x = Series(randn(2)) @@ -126,13 +125,13 @@ def test_hist_no_overlap(self): axes = fig.axes if self.mpl_ge_1_5_0 else fig.get_axes() assert len(axes) == 2 - @slow + @pytest.mark.slow def test_hist_by_no_extra_plots(self): df = self.hist_df axes = df.height.hist(by=df.gender) # noqa assert len(self.plt.get_fignums()) == 1 - @slow + @pytest.mark.slow def test_plot_fails_when_ax_differs_from_figure(self): from pylab import figure fig1 = figure() @@ -144,7 +143,7 @@ def test_plot_fails_when_ax_differs_from_figure(self): class TestDataFramePlots(TestPlotBase): - @slow + @pytest.mark.slow def test_hist_df_legacy(self): from matplotlib.patches import Rectangle with tm.assert_produces_warning(UserWarning): @@ -210,7 +209,7 @@ def test_hist_df_legacy(self): with pytest.raises(AttributeError): ser.hist(foo='bar') - @slow + @pytest.mark.slow def test_hist_layout(self): df = DataFrame(randn(100, 3)) @@ -241,7 +240,7 @@ def test_hist_layout(self): with pytest.raises(ValueError): df.hist(layout=(-1, -1)) - @slow + @pytest.mark.slow # GH 9351 def test_tight_layout(self): if self.mpl_ge_2_0_1: @@ -254,7 +253,7 @@ def test_tight_layout(self): class TestDataFrameGroupByPlots(TestPlotBase): - @slow + @pytest.mark.slow def test_grouped_hist_legacy(self): from matplotlib.patches import Rectangle @@ -303,7 +302,7 @@ def test_grouped_hist_legacy(self): with tm.assert_produces_warning(FutureWarning): df.hist(by='C', figsize='default') - @slow + @pytest.mark.slow def test_grouped_hist_legacy2(self): n = 10 weight = Series(np.random.normal(166, 20, size=n)) @@ -318,7 +317,7 @@ def test_grouped_hist_legacy2(self): assert len(self.plt.get_fignums()) == 2 tm.close() - @slow + @pytest.mark.slow def test_grouped_hist_layout(self): df = self.hist_df pytest.raises(ValueError, df.hist, column='weight', by=df.gender, @@ -367,7 +366,7 @@ def test_grouped_hist_layout(self): axes = df.hist(column=['height', 'weight', 'category']) self._check_axes_shape(axes, axes_num=3, layout=(2, 2)) - @slow + @pytest.mark.slow def test_grouped_hist_multiple_axes(self): # GH 6970, GH 7069 df = self.hist_df @@ -387,7 +386,7 @@ def test_grouped_hist_multiple_axes(self): # pass different number of axes from required axes = df.hist(column='height', ax=axes) - @slow + @pytest.mark.slow def test_axis_share_x(self): df = self.hist_df # GH4089 @@ -401,7 +400,7 @@ def test_axis_share_x(self): assert not ax1._shared_y_axes.joined(ax1, ax2) assert not ax2._shared_y_axes.joined(ax1, ax2) - @slow + @pytest.mark.slow def test_axis_share_y(self): df = self.hist_df ax1, ax2 = df.hist(column='height', by=df.gender, sharey=True) @@ -414,7 +413,7 @@ def test_axis_share_y(self): assert not ax1._shared_x_axes.joined(ax1, ax2) assert not ax2._shared_x_axes.joined(ax1, ax2) - @slow + @pytest.mark.slow def test_axis_share_xy(self): df = self.hist_df ax1, ax2 = df.hist(column='height', by=df.gender, sharex=True, diff --git a/pandas/tests/plotting/test_misc.py b/pandas/tests/plotting/test_misc.py index d93ad90a36a9c..684a943fb5a69 100644 --- a/pandas/tests/plotting/test_misc.py +++ b/pandas/tests/plotting/test_misc.py @@ -7,7 +7,6 @@ from pandas import Series, DataFrame from pandas.compat import lmap import pandas.util.testing as tm -from pandas.util.testing import slow import numpy as np from numpy import random @@ -30,7 +29,7 @@ def setup_method(self, method): self.ts = tm.makeTimeSeries() self.ts.name = 'ts' - @slow + @pytest.mark.slow def test_autocorrelation_plot(self): from pandas.plotting import autocorrelation_plot _check_plot_works(autocorrelation_plot, series=self.ts) @@ -39,13 +38,13 @@ def test_autocorrelation_plot(self): ax = autocorrelation_plot(self.ts, label='Test') self._check_legend_labels(ax, labels=['Test']) - @slow + @pytest.mark.slow def test_lag_plot(self): from pandas.plotting import lag_plot _check_plot_works(lag_plot, series=self.ts) _check_plot_works(lag_plot, series=self.ts, lag=5) - @slow + @pytest.mark.slow def test_bootstrap_plot(self): from pandas.plotting import bootstrap_plot _check_plot_works(bootstrap_plot, series=self.ts, size=10) @@ -53,7 +52,7 @@ def test_bootstrap_plot(self): class TestDataFramePlots(TestPlotBase): - @slow + @pytest.mark.slow def test_scatter_plot_legacy(self): tm._skip_if_no_scipy() @@ -130,7 +129,7 @@ def test_scatter_matrix_axis(self): self._check_ticks_props( axes, xlabelsize=8, xrot=90, ylabelsize=8, yrot=0) - @slow + @pytest.mark.slow def test_andrews_curves(self): from pandas.plotting import andrews_curves from matplotlib import cm @@ -195,7 +194,7 @@ def test_andrews_curves(self): with tm.assert_produces_warning(FutureWarning): andrews_curves(data=df, class_column='Name') - @slow + @pytest.mark.slow def test_parallel_coordinates(self): from pandas.plotting import parallel_coordinates from matplotlib import cm @@ -263,7 +262,7 @@ def test_parallel_coordinates_with_sorted_labels(self): # lables and colors are ordered strictly increasing assert prev[1] < nxt[1] and prev[0] < nxt[0] - @slow + @pytest.mark.slow def test_radviz(self): from pandas.plotting import radviz from matplotlib import cm @@ -301,7 +300,7 @@ def test_radviz(self): handles, labels = ax.get_legend_handles_labels() self._check_colors(handles, facecolors=colors) - @slow + @pytest.mark.slow def test_subplot_titles(self): df = self.iris.drop('Name', axis=1).head() # Use the column names as the subplot titles diff --git a/pandas/tests/plotting/test_series.py b/pandas/tests/plotting/test_series.py index 7c66b5dafb9c7..9c9011ba1ca7b 100644 --- a/pandas/tests/plotting/test_series.py +++ b/pandas/tests/plotting/test_series.py @@ -12,7 +12,6 @@ from pandas import Series, DataFrame, date_range from pandas.compat import range, lrange import pandas.util.testing as tm -from pandas.util.testing import slow import numpy as np from numpy.random import randn @@ -41,7 +40,7 @@ def setup_method(self, method): self.iseries = tm.makePeriodSeries() self.iseries.name = 'iseries' - @slow + @pytest.mark.slow def test_plot(self): _check_plot_works(self.ts.plot, label='foo') _check_plot_works(self.ts.plot, use_index=False) @@ -79,7 +78,7 @@ def test_plot(self): ax = _check_plot_works(self.ts.plot, subplots=True, layout=(1, -1)) self._check_axes_shape(ax, axes_num=1, layout=(1, 1)) - @slow + @pytest.mark.slow def test_plot_figsize_and_title(self): # figsize and title _, ax = self.plt.subplots() @@ -210,7 +209,7 @@ def test_line_use_index_false(self): label2 = ax2.get_xlabel() assert label2 == '' - @slow + @pytest.mark.slow def test_bar_log(self): expected = np.array([1., 10., 100., 1000.]) @@ -252,7 +251,7 @@ def test_bar_log(self): tm.assert_almost_equal(res[1], ymax) tm.assert_numpy_array_equal(ax.xaxis.get_ticklocs(), expected) - @slow + @pytest.mark.slow def test_bar_ignore_index(self): df = Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd']) _, ax = self.plt.subplots() @@ -280,7 +279,7 @@ def test_irregular_datetime(self): ax.set_xlim('1/1/1999', '1/1/2001') assert xp == ax.get_xlim()[0] - @slow + @pytest.mark.slow def test_pie_series(self): # if sum of values is less than 1.0, pie handle them as rate and draw # semicircle. @@ -339,14 +338,14 @@ def test_pie_nan(self): result = [x.get_text() for x in ax.texts] assert result == expected - @slow + @pytest.mark.slow def test_hist_df_kwargs(self): df = DataFrame(np.random.randn(10, 2)) _, ax = self.plt.subplots() ax = df.plot.hist(bins=5, ax=ax) assert len(ax.patches) == 10 - @slow + @pytest.mark.slow def test_hist_df_with_nonnumerics(self): # GH 9853 with tm.RNGContext(1): @@ -361,7 +360,7 @@ def test_hist_df_with_nonnumerics(self): ax = df.plot.hist(ax=ax) # bins=10 assert len(ax.patches) == 40 - @slow + @pytest.mark.slow def test_hist_legacy(self): _check_plot_works(self.ts.hist) _check_plot_works(self.ts.hist, grid=False) @@ -387,13 +386,13 @@ def test_hist_legacy(self): with pytest.raises(ValueError): self.ts.hist(by=self.ts.index, figure=fig) - @slow + @pytest.mark.slow def test_hist_bins_legacy(self): df = DataFrame(np.random.randn(10, 2)) ax = df.hist(bins=2)[0][0] assert len(ax.patches) == 2 - @slow + @pytest.mark.slow def test_hist_layout(self): df = self.hist_df with pytest.raises(ValueError): @@ -402,7 +401,7 @@ def test_hist_layout(self): with pytest.raises(ValueError): df.height.hist(layout=[1, 1]) - @slow + @pytest.mark.slow def test_hist_layout_with_by(self): df = self.hist_df @@ -446,7 +445,7 @@ def test_hist_layout_with_by(self): self._check_axes_shape(axes, axes_num=4, layout=(4, 2), figsize=(12, 7)) - @slow + @pytest.mark.slow def test_hist_no_overlap(self): from matplotlib.pyplot import subplot, gcf x = Series(randn(2)) @@ -459,7 +458,7 @@ def test_hist_no_overlap(self): axes = fig.axes if self.mpl_ge_1_5_0 else fig.get_axes() assert len(axes) == 2 - @slow + @pytest.mark.slow def test_hist_secondary_legend(self): # GH 9610 df = DataFrame(np.random.randn(30, 4), columns=list('abcd')) @@ -499,7 +498,7 @@ def test_hist_secondary_legend(self): assert ax.get_yaxis().get_visible() tm.close() - @slow + @pytest.mark.slow def test_df_series_secondary_legend(self): # GH 9779 df = DataFrame(np.random.randn(30, 3), columns=list('abc')) @@ -563,14 +562,14 @@ def test_df_series_secondary_legend(self): assert ax.get_yaxis().get_visible() tm.close() - @slow + @pytest.mark.slow def test_plot_fails_with_dupe_color_and_style(self): x = Series(randn(2)) with pytest.raises(ValueError): _, ax = self.plt.subplots() x.plot(style='k--', color='k', ax=ax) - @slow + @pytest.mark.slow def test_hist_kde(self): _, ax = self.plt.subplots() ax = self.ts.plot.hist(logy=True, ax=ax) @@ -593,7 +592,7 @@ def test_hist_kde(self): ylabels = ax.get_yticklabels() self._check_text_labels(ylabels, [''] * len(ylabels)) - @slow + @pytest.mark.slow def test_kde_kwargs(self): tm._skip_if_no_scipy() _skip_if_no_scipy_gaussian_kde() @@ -608,7 +607,7 @@ def test_kde_kwargs(self): self._check_ax_scales(ax, yaxis='log') self._check_text_labels(ax.yaxis.get_label(), 'Density') - @slow + @pytest.mark.slow def test_kde_missing_vals(self): tm._skip_if_no_scipy() _skip_if_no_scipy_gaussian_kde() @@ -619,7 +618,7 @@ def test_kde_missing_vals(self): # gh-14821: check if the values have any missing values assert any(~np.isnan(axes.lines[0].get_xdata())) - @slow + @pytest.mark.slow def test_hist_kwargs(self): _, ax = self.plt.subplots() ax = self.ts.plot.hist(bins=5, ax=ax) @@ -637,7 +636,7 @@ def test_hist_kwargs(self): ax = self.ts.plot.hist(align='left', stacked=True, ax=ax) tm.close() - @slow + @pytest.mark.slow def test_hist_kde_color(self): _, ax = self.plt.subplots() ax = self.ts.plot.hist(logy=True, bins=10, color='b', ax=ax) @@ -654,7 +653,7 @@ def test_hist_kde_color(self): assert len(lines) == 1 self._check_colors(lines, ['r']) - @slow + @pytest.mark.slow def test_boxplot_series(self): _, ax = self.plt.subplots() ax = self.ts.plot.box(logy=True, ax=ax) @@ -664,7 +663,7 @@ def test_boxplot_series(self): ylabels = ax.get_yticklabels() self._check_text_labels(ylabels, [''] * len(ylabels)) - @slow + @pytest.mark.slow def test_kind_both_ways(self): s = Series(range(3)) kinds = (plotting._core._common_kinds + @@ -676,7 +675,7 @@ def test_kind_both_ways(self): s.plot(kind=kind, ax=ax) getattr(s.plot, kind)() - @slow + @pytest.mark.slow def test_invalid_plot_data(self): s = Series(list('abcd')) _, ax = self.plt.subplots() @@ -686,7 +685,7 @@ def test_invalid_plot_data(self): with pytest.raises(TypeError): s.plot(kind=kind, ax=ax) - @slow + @pytest.mark.slow def test_valid_object_plot(self): s = Series(lrange(10), dtype=object) for kind in plotting._core._common_kinds: @@ -708,7 +707,7 @@ def test_invalid_kind(self): with pytest.raises(ValueError): s.plot(kind='aasdf') - @slow + @pytest.mark.slow def test_dup_datetime_index_plot(self): dr1 = date_range('1/1/2009', periods=4) dr2 = date_range('1/2/2009', periods=4) @@ -717,7 +716,7 @@ def test_dup_datetime_index_plot(self): s = Series(values, index=index) _check_plot_works(s.plot) - @slow + @pytest.mark.slow def test_errorbar_plot(self): s = Series(np.arange(10), name='x') @@ -764,14 +763,14 @@ def test_table(self): _check_plot_works(self.series.plot, table=True) _check_plot_works(self.series.plot, table=self.series) - @slow + @pytest.mark.slow def test_series_grid_settings(self): # Make sure plot defaults to rcParams['axes.grid'] setting, GH 9792 self._check_grid_settings(Series([1, 2, 3]), plotting._core._series_kinds + plotting._core._common_kinds) - @slow + @pytest.mark.slow def test_standard_colors(self): from pandas.plotting._style import _get_standard_colors @@ -788,7 +787,7 @@ def test_standard_colors(self): result = _get_standard_colors(3, color=[c]) assert result == [c] * 3 - @slow + @pytest.mark.slow def test_standard_colors_all(self): import matplotlib.colors as colors from pandas.plotting._style import _get_standard_colors diff --git a/pandas/tests/series/test_indexing.py b/pandas/tests/series/test_indexing.py index 7774d10c5eaf8..6d8a54b538237 100644 --- a/pandas/tests/series/test_indexing.py +++ b/pandas/tests/series/test_indexing.py @@ -20,8 +20,7 @@ from pandas.compat import lrange, range from pandas import compat -from pandas.util.testing import (slow, - assert_series_equal, +from pandas.util.testing import (assert_series_equal, assert_almost_equal, assert_frame_equal) import pandas.util.testing as tm @@ -2592,7 +2591,7 @@ def test_series_set_value(self): # s2 = s.set_value(dates[1], index[1]) # assert s2.values.dtype == 'M8[ns]' - @slow + @pytest.mark.slow def test_slice_locs_indexerror(self): times = [datetime(2000, 1, 1) + timedelta(minutes=i * 10) for i in range(100000)] diff --git a/pandas/tests/test_expressions.py b/pandas/tests/test_expressions.py index 08c3a25e66b0e..2b972477ae999 100644 --- a/pandas/tests/test_expressions.py +++ b/pandas/tests/test_expressions.py @@ -16,7 +16,7 @@ from pandas import compat, _np_version_under1p11, _np_version_under1p13 from pandas.util.testing import (assert_almost_equal, assert_series_equal, assert_frame_equal, assert_panel_equal, - assert_panel4d_equal, slow) + assert_panel4d_equal) from pandas.io.formats.printing import pprint_thing import pandas.util.testing as tm @@ -196,7 +196,7 @@ def test_integer_arithmetic_frame(self): def test_integer_arithmetic_series(self): self.run_series(self.integer.iloc[:, 0], self.integer.iloc[:, 0]) - @slow + @pytest.mark.slow def test_integer_panel(self): self.run_panel(_integer2_panel, np.random.randint(1, 100)) @@ -206,11 +206,11 @@ def test_float_arithemtic_frame(self): def test_float_arithmetic_series(self): self.run_series(self.frame2.iloc[:, 0], self.frame2.iloc[:, 0]) - @slow + @pytest.mark.slow def test_float_panel(self): self.run_panel(_frame2_panel, np.random.randn() + 0.1, binary_comp=0.8) - @slow + @pytest.mark.slow def test_panel4d(self): with catch_warnings(record=True): self.run_panel(tm.makePanel4D(), np.random.randn() + 0.5, @@ -226,7 +226,7 @@ def test_mixed_arithmetic_series(self): for col in self.mixed2.columns: self.run_series(self.mixed2[col], self.mixed2[col], binary_comp=4) - @slow + @pytest.mark.slow def test_mixed_panel(self): self.run_panel(_mixed2_panel, np.random.randint(1, 100), binary_comp=-2) diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index 3ba5d2065cddf..dd35e4375841e 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -2155,7 +2155,7 @@ def _non_null_values(x): assert_equal(cov_x_y, mean_x_times_y - (mean_x * mean_y)) - @tm.slow + @pytest.mark.slow def test_ewm_consistency(self): def _weights(s, com, adjust, ignore_na): if isinstance(s, DataFrame): @@ -2254,7 +2254,7 @@ def _ewma(s, com, min_periods, adjust, ignore_na): _variance_debiasing_factors(x, com=com, adjust=adjust, ignore_na=ignore_na))) - @tm.slow + @pytest.mark.slow def test_expanding_consistency(self): # suppress warnings about empty slices, as we are deliberately testing @@ -2328,7 +2328,7 @@ def test_expanding_consistency(self): assert_equal(expanding_f_result, expanding_apply_f_result) - @tm.slow + @pytest.mark.slow def test_rolling_consistency(self): # suppress warnings about empty slices, as we are deliberately testing diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 17e09b38b20e0..d6ba9561340cc 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -50,13 +50,6 @@ from pandas._libs import testing as _testing from pandas.io.common import urlopen -try: - import pytest - slow = pytest.mark.slow -except ImportError: - # Should be ok to just ignore. If you actually need - # slow then you'll hit an import error long before getting here. - pass N = 30 From 63536f4a80a1f1f03732411d015910c55a1f9290 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Thu, 13 Jul 2017 12:15:26 -0700 Subject: [PATCH 749/933] MAINT: Remove unused mock import (#16908) We import it, set it as an attribute, and then don't use it. --- pandas/tests/io/formats/test_printing.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/pandas/tests/io/formats/test_printing.py b/pandas/tests/io/formats/test_printing.py index aae3ba31648ff..ec34e7656e01f 100644 --- a/pandas/tests/io/formats/test_printing.py +++ b/pandas/tests/io/formats/test_printing.py @@ -127,14 +127,7 @@ class TestTableSchemaRepr(object): @classmethod def setup_class(cls): pytest.importorskip('IPython') - try: - import mock - except ImportError: - try: - from unittest import mock - except ImportError: - pytest.skip("Mock is not installed") - cls.mock = mock + from IPython.core.interactiveshell import InteractiveShell cls.display_formatter = InteractiveShell.instance().display_formatter From 25384ba459ba7de9fb9d36821f0a4ae239cc40b2 Mon Sep 17 00:00:00 2001 From: topper-123 Date: Thu, 13 Jul 2017 21:35:48 +0100 Subject: [PATCH 750/933] Let _get_dtype accept Categoricals and CategoricalIndex (#16887) --- doc/source/whatsnew/v0.21.0.txt | 1 - pandas/core/dtypes/common.py | 4 +++- pandas/tests/dtypes/test_common.py | 6 +++--- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index a5ee0e0ce2653..8ba57c0fa50be 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -149,7 +149,6 @@ Conversion ^^^^^^^^^^ - Indexing ^^^^^^^^ diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 2eebf3704253e..a386c04cc4fdd 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -11,7 +11,7 @@ ExtensionDtype) from .generic import (ABCCategorical, ABCPeriodIndex, ABCDatetimeIndex, ABCSeries, - ABCSparseArray, ABCSparseSeries) + ABCSparseArray, ABCSparseSeries, ABCCategoricalIndex) from .inference import is_string_like from .inference import * # noqa @@ -1713,6 +1713,8 @@ def _get_dtype(arr_or_dtype): return PeriodDtype.construct_from_string(arr_or_dtype) elif is_interval_dtype(arr_or_dtype): return IntervalDtype.construct_from_string(arr_or_dtype) + elif isinstance(arr_or_dtype, (ABCCategorical, ABCCategoricalIndex)): + return arr_or_dtype.dtype if hasattr(arr_or_dtype, 'dtype'): arr_or_dtype = arr_or_dtype.dtype diff --git a/pandas/tests/dtypes/test_common.py b/pandas/tests/dtypes/test_common.py index c32e8590c5675..7188e397c0617 100644 --- a/pandas/tests/dtypes/test_common.py +++ b/pandas/tests/dtypes/test_common.py @@ -532,16 +532,16 @@ def test_is_complex_dtype(): (float, np.dtype(float)), ('float64', np.dtype('float64')), (np.dtype('float64'), np.dtype('float64')), - pytest.mark.xfail((str, np.dtype(' Date: Thu, 13 Jul 2017 19:04:29 -0400 Subject: [PATCH 751/933] Fixes for #16896(TimedeltaIndex indexing regression for strings) (#16907) --- doc/source/whatsnew/v0.21.0.txt | 2 +- pandas/core/dtypes/common.py | 4 +++- pandas/tests/dtypes/test_common.py | 9 +++++++-- pandas/tests/indexes/timedeltas/test_timedelta.py | 3 +++ pandas/tests/indexing/test_timedelta.py | 9 ++++++++- 5 files changed, 22 insertions(+), 5 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 8ba57c0fa50be..039b24cc63217 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -154,7 +154,7 @@ Indexing - When called with a null slice (e.g. ``df.iloc[:]``), the ``.iloc`` and ``.loc`` indexers return a shallow copy of the original object. Previously they returned the original object. (:issue:`13873`). - When called on an unsorted ``MultiIndex``, the ``loc`` indexer now will raise ``UnsortedIndexError`` only if proper slicing is used on non-sorted levels (:issue:`16734`). - +- Fixes regression in 0.20.3 when indexing with a string on a ``TimedeltaIndex`` (:issue:`16896`). I/O ^^^ diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index a386c04cc4fdd..114900ce802be 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -392,13 +392,15 @@ def is_timedelta64_dtype(arr_or_dtype): False >>> is_timedelta64_dtype(pd.Series([], dtype="timedelta64[ns]")) True + >>> is_timedelta64_dtype('0 days') + False """ if arr_or_dtype is None: return False try: tipo = _get_dtype_type(arr_or_dtype) - except ValueError: + except: return False return issubclass(tipo, np.timedelta64) diff --git a/pandas/tests/dtypes/test_common.py b/pandas/tests/dtypes/test_common.py index 7188e397c0617..290cdd732b6d6 100644 --- a/pandas/tests/dtypes/test_common.py +++ b/pandas/tests/dtypes/test_common.py @@ -199,12 +199,17 @@ def test_is_datetime64tz_dtype(): def test_is_timedelta64_dtype(): assert not com.is_timedelta64_dtype(object) + assert not com.is_timedelta64_dtype(None) assert not com.is_timedelta64_dtype([1, 2, 3]) assert not com.is_timedelta64_dtype(np.array([], dtype=np.datetime64)) + assert not com.is_timedelta64_dtype('0 days') + assert not com.is_timedelta64_dtype("0 days 00:00:00") + assert not com.is_timedelta64_dtype(["0 days 00:00:00"]) + assert not com.is_timedelta64_dtype("NO DATE") + assert com.is_timedelta64_dtype(np.timedelta64) assert com.is_timedelta64_dtype(pd.Series([], dtype="timedelta64[ns]")) - - assert not com.is_timedelta64_dtype("0 days 00:00:00") + assert com.is_timedelta64_dtype(pd.to_timedelta(['0 days', '1 days'])) def test_is_period_dtype(): diff --git a/pandas/tests/indexes/timedeltas/test_timedelta.py b/pandas/tests/indexes/timedeltas/test_timedelta.py index 08cf5108ffdb1..a4fc26382fb9b 100644 --- a/pandas/tests/indexes/timedeltas/test_timedelta.py +++ b/pandas/tests/indexes/timedeltas/test_timedelta.py @@ -66,6 +66,9 @@ def test_get_loc(self): for method, loc in [('pad', 1), ('backfill', 2), ('nearest', 1)]: assert idx.get_loc('1 day 1 hour', method) == loc + # GH 16896 + assert idx.get_loc('0 days') == 0 + def test_get_loc_nat(self): tidx = TimedeltaIndex(['1 days 01:00:00', 'NaT', '2 days 01:00:00']) diff --git a/pandas/tests/indexing/test_timedelta.py b/pandas/tests/indexing/test_timedelta.py index be3ea8f0c371d..32609362e49af 100644 --- a/pandas/tests/indexing/test_timedelta.py +++ b/pandas/tests/indexing/test_timedelta.py @@ -5,7 +5,6 @@ class TestTimedeltaIndexing(object): - def test_boolean_indexing(self): # GH 14946 df = pd.DataFrame({'x': range(10)}) @@ -40,3 +39,11 @@ def test_list_like_indexing(self, indexer, expected): dtype="int64") tm.assert_frame_equal(expected, df) + + def test_string_indexing(self): + # GH 16896 + df = pd.DataFrame({'x': range(3)}, + index=pd.to_timedelta(range(3), unit='days')) + expected = df.iloc[0] + sliced = df.loc['0 days'] + tm.assert_series_equal(sliced, expected) From 6000c5b9624fdd8925099f215eba282bfbef87ce Mon Sep 17 00:00:00 2001 From: jdeschenes Date: Fri, 14 Jul 2017 10:13:53 -0400 Subject: [PATCH 752/933] Fix for #16909(DeltatimeIndex.get_loc is not working on np.deltatime64 data type) (#16912) --- doc/source/whatsnew/v0.21.0.txt | 1 + pandas/core/indexes/timedeltas.py | 4 ++-- pandas/tests/indexes/timedeltas/test_timedelta.py | 3 +++ 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 039b24cc63217..2716d9b09eaa9 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -155,6 +155,7 @@ Indexing - When called with a null slice (e.g. ``df.iloc[:]``), the ``.iloc`` and ``.loc`` indexers return a shallow copy of the original object. Previously they returned the original object. (:issue:`13873`). - When called on an unsorted ``MultiIndex``, the ``loc`` indexer now will raise ``UnsortedIndexError`` only if proper slicing is used on non-sorted levels (:issue:`16734`). - Fixes regression in 0.20.3 when indexing with a string on a ``TimedeltaIndex`` (:issue:`16896`). +- Fixed ``TimedeltaIndex.get_loc`` handling of ``np.timedelta64`` inputs (:issue:`16909`). I/O ^^^ diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index faec813df3993..68713743d72ed 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -15,7 +15,7 @@ _ensure_int64) from pandas.core.dtypes.missing import isnull from pandas.core.dtypes.generic import ABCSeries -from pandas.core.common import _maybe_box, _values_from_object, is_bool_indexer +from pandas.core.common import _maybe_box, _values_from_object from pandas.core.indexes.base import Index from pandas.core.indexes.numeric import Int64Index @@ -682,7 +682,7 @@ def get_loc(self, key, method=None, tolerance=None): ------- loc : int """ - if is_bool_indexer(key) or is_timedelta64_dtype(key): + if is_list_like(key): raise TypeError if isnull(key): diff --git a/pandas/tests/indexes/timedeltas/test_timedelta.py b/pandas/tests/indexes/timedeltas/test_timedelta.py index a4fc26382fb9b..59e4b1432b8bc 100644 --- a/pandas/tests/indexes/timedeltas/test_timedelta.py +++ b/pandas/tests/indexes/timedeltas/test_timedelta.py @@ -66,6 +66,9 @@ def test_get_loc(self): for method, loc in [('pad', 1), ('backfill', 2), ('nearest', 1)]: assert idx.get_loc('1 day 1 hour', method) == loc + # GH 16909 + assert idx.get_loc(idx[1].to_timedelta64()) == 1 + # GH 16896 assert idx.get_loc('0 days') == 0 From a587d568d213c62307a72d98d6913239f55844e8 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 14 Jul 2017 14:46:41 -0500 Subject: [PATCH 753/933] DOC: Recommend sphinx 1.5 for now (#16929) For the SciPy sprint tomorrow, until the cause of the doc-building slowdown is fully identified. --- ci/requirements_all.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/requirements_all.txt b/ci/requirements_all.txt index e9f49ed879c86..de37ec4d20be4 100644 --- a/ci/requirements_all.txt +++ b/ci/requirements_all.txt @@ -2,7 +2,7 @@ pytest pytest-cov pytest-xdist flake8 -sphinx +sphinx=1.5* nbsphinx ipython python-dateutil From 6858d0f6caa60c98acc4b6c3eaa6cd0309aedca6 Mon Sep 17 00:00:00 2001 From: Kevin Sheppard Date: Fri, 14 Jul 2017 22:20:28 +0100 Subject: [PATCH 754/933] BUG: Allow value labels to be read with iterator (#16926) All value labels to be read before the iterator has been used Fix issue where categorical data was incorrectly reformatted when write_index was False closes #16923 --- doc/source/whatsnew/v0.21.0.txt | 1 + pandas/io/stata.py | 36 ++++++++++++++++++--------------- pandas/tests/io/test_stata.py | 18 ++++++++++++++--- 3 files changed, 36 insertions(+), 19 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 2716d9b09eaa9..bd19d71182762 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -162,6 +162,7 @@ I/O - Bug in :func:`read_csv` in which non integer values for the header argument generated an unhelpful / unrelated error message (:issue:`16338`) +- Bug in :func:`read_stata` where value labels could not be read when using an iterator (:issue:`16923`) Plotting ^^^^^^^^ diff --git a/pandas/io/stata.py b/pandas/io/stata.py index 107dccfc8175c..30991d8a24c63 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -997,6 +997,7 @@ def __init__(self, path_or_buf, convert_dates=True, self.path_or_buf = BytesIO(contents) self._read_header() + self._setup_dtype() def __enter__(self): """ enter context manager """ @@ -1299,6 +1300,23 @@ def _read_old_header(self, first_char): # necessary data to continue parsing self.data_location = self.path_or_buf.tell() + def _setup_dtype(self): + """Map between numpy and state dtypes""" + if self._dtype is not None: + return self._dtype + + dtype = [] # Convert struct data types to numpy data type + for i, typ in enumerate(self.typlist): + if typ in self.NUMPY_TYPE_MAP: + dtype.append(('s' + str(i), self.byteorder + + self.NUMPY_TYPE_MAP[typ])) + else: + dtype.append(('s' + str(i), 'S' + str(typ))) + dtype = np.dtype(dtype) + self._dtype = dtype + + return self._dtype + def _calcsize(self, fmt): return (type(fmt) is int and fmt or struct.calcsize(self.byteorder + fmt)) @@ -1472,22 +1490,10 @@ def read(self, nrows=None, convert_dates=None, if nrows is None: nrows = self.nobs - if (self.format_version >= 117) and (self._dtype is None): + if (self.format_version >= 117) and (not self._value_labels_read): self._can_read_value_labels = True self._read_strls() - # Setup the dtype. - if self._dtype is None: - dtype = [] # Convert struct data types to numpy data type - for i, typ in enumerate(self.typlist): - if typ in self.NUMPY_TYPE_MAP: - dtype.append(('s' + str(i), self.byteorder + - self.NUMPY_TYPE_MAP[typ])) - else: - dtype.append(('s' + str(i), 'S' + str(typ))) - dtype = np.dtype(dtype) - self._dtype = dtype - # Read data dtype = self._dtype max_read_len = (self.nobs - self._lines_read) * dtype.itemsize @@ -1958,7 +1964,6 @@ def _prepare_categoricals(self, data): return data get_base_missing_value = StataMissingValue.get_base_missing_value - index = data.index data_formatted = [] for col, col_is_cat in zip(data, is_cat): if col_is_cat: @@ -1981,8 +1986,7 @@ def _prepare_categoricals(self, data): # Replace missing values with Stata missing value for type values[values == -1] = get_base_missing_value(dtype) - data_formatted.append((col, values, index)) - + data_formatted.append((col, values)) else: data_formatted.append((col, data[col])) return DataFrame.from_items(data_formatted) diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py index b9c6736563160..a414928d318c4 100644 --- a/pandas/tests/io/test_stata.py +++ b/pandas/tests/io/test_stata.py @@ -9,18 +9,18 @@ from datetime import datetime from distutils.version import LooseVersion -import pytest import numpy as np import pandas as pd import pandas.util.testing as tm +import pytest from pandas import compat +from pandas._libs.tslib import NaT from pandas.compat import iterkeys +from pandas.core.dtypes.common import is_categorical_dtype from pandas.core.frame import DataFrame, Series from pandas.io.parsers import read_csv from pandas.io.stata import (read_stata, StataReader, InvalidColumnName, PossiblePrecisionLoss, StataMissingValue) -from pandas._libs.tslib import NaT -from pandas.core.dtypes.common import is_categorical_dtype class TestStata(object): @@ -1297,3 +1297,15 @@ def test_pickle_path_localpath(self): reader = lambda x: read_stata(x).set_index('index') result = tm.round_trip_localpath(df.to_stata, reader) tm.assert_frame_equal(df, result) + + @pytest.mark.parametrize('write_index', [True, False]) + def test_value_labels_iterator(self, write_index): + # GH 16923 + d = {'A': ['B', 'E', 'C', 'A', 'E']} + df = pd.DataFrame(data=d) + df['A'] = df['A'].astype('category') + with tm.ensure_clean() as path: + df.to_stata(path, write_index=write_index) + dta_iter = pd.read_stata(path, iterator=True) + value_labels = dta_iter.value_labels() + assert value_labels == {'A': {0: 'A', 1: 'B', 2: 'C', 3: 'E'}} From ad24759871ea43131711cfce1e5fc69c06d82956 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Fri, 14 Jul 2017 21:16:00 -0700 Subject: [PATCH 755/933] DOC: Update flake8 command instructions (#16919) --- .github/PULL_REQUEST_TEMPLATE.md | 2 +- doc/source/contributing.rst | 24 +++++++++++++++++------- 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 959858fb50f89..e8b6ee21ad104 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -1,4 +1,4 @@ - [ ] closes #xxxx - [ ] tests added / passed - - [ ] passes ``git diff upstream/master --name-only -- '*.py' | flake8 --diff`` (On Windows, ``git diff upstream/master -u -- "*.py" | flake8 --diff`` might work as an alternative.) + - [ ] passes ``git diff upstream/master -u -- "*.py" | flake8 --diff`` - [ ] whatsnew entry diff --git a/doc/source/contributing.rst b/doc/source/contributing.rst index cd444f796fabb..bfcf560565977 100644 --- a/doc/source/contributing.rst +++ b/doc/source/contributing.rst @@ -509,7 +509,7 @@ the `flake8 `_ tool and report any stylistic errors in your code. Therefore, it is helpful before submitting code to run the check yourself on the diff:: - git diff master --name-only -- '*.py' | flake8 --diff + git diff master -u -- "*.py" | flake8 --diff This command will catch any stylistic errors in your changes specifically, but be beware it may not catch all of them. For example, if you delete the only @@ -518,18 +518,28 @@ unused function. However, style-checking the diff will not catch this because the actual import is not part of the diff. Thus, for completeness, you should run this command, though it will take longer:: - git diff master --name-only -- '*.py' | grep 'pandas/' | xargs -r flake8 + git diff master --name-only -- "*.py" | grep "pandas/" | xargs -r flake8 Note that on OSX, the ``-r`` flag is not available, so you have to omit it and run this slightly modified command:: - git diff master --name-only -- '*.py' | grep 'pandas/' | xargs flake8 + git diff master --name-only -- "*.py" | grep "pandas/" | xargs flake8 -Note that on Windows, ``grep``, ``xargs``, and other tools are likely -unavailable. However, this has been shown to work on smaller commits in the -standard Windows command line:: +Note that on Windows, these commands are unfortunately not possible because +commands like ``grep`` and ``xargs`` are not available natively. To imitate the +behavior with the commands above, you should run:: - git diff master -u -- "*.py" | flake8 --diff + git diff master --name-only -- "*.py" + +This will list all of the Python files that have been modified. The only ones +that matter during linting are any whose directory filepath begins with "pandas." +For each filepath, copy and paste it after the ``flake8`` command as shown below: + + flake8 + +Alternatively, you can install the ``grep`` and ``xargs`` commands via the +`MinGW `__ toolchain, and it will allow you to run the +commands above. Backwards Compatibility ~~~~~~~~~~~~~~~~~~~~~~~ From 5f2b96bb637f6ddeec169c5ef8ad20013a03c853 Mon Sep 17 00:00:00 2001 From: Eric Wieser Date: Sat, 15 Jul 2017 13:30:03 +0100 Subject: [PATCH 756/933] TST: Don't assert that a bug exists in numpy (#16940) Better to ignore the warning from the bug, rather than assert the bug is still there After this change, numpy/numpy#9412 _could_ be backported to fix the bug --- pandas/tests/test_algos.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index 9504d2a9426f0..993dcc4f527b2 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -2,6 +2,7 @@ import numpy as np import pytest +import warnings from numpy.random import RandomState from numpy import nan @@ -127,7 +128,7 @@ def test_unsortable(self): arr = np.array([1, 2, datetime.now(), 0, 3], dtype=object) if compat.PY2 and not pd._np_version_under1p10: # RuntimeWarning: tp_compare didn't return -1 or -2 for exception - with tm.assert_produces_warning(RuntimeWarning): + with warnings.catch_warnings(): pytest.raises(TypeError, algos.safe_sort, arr) else: pytest.raises(TypeError, algos.safe_sort, arr) From 6cee09ebfd2e8fb15f3e225bd9770852a6a533d1 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sat, 15 Jul 2017 08:11:34 -0500 Subject: [PATCH 757/933] CI: add .pep8speakes.yml --- .pep8speakes.yml | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 .pep8speakes.yml diff --git a/.pep8speakes.yml b/.pep8speakes.yml new file mode 100644 index 0000000000000..299b76c8922cc --- /dev/null +++ b/.pep8speakes.yml @@ -0,0 +1,10 @@ +# File : .pep8speaks.yml + +scanner: + diff_only: True # If True, errors caused by only the patch are shown + +pycodestyle: + max-line-length: 79 + ignore: # Errors and warnings to ignore + - E731 + - E402 From 80e40f81d78ade9921607a092a00b83f9d34cfd3 Mon Sep 17 00:00:00 2001 From: faic Date: Sat, 15 Jul 2017 16:58:24 +0300 Subject: [PATCH 758/933] CLN16668: remove OrderedDefaultDict (#16939) --- pandas/compat/__init__.py | 25 ------------------------- pandas/core/panel.py | 6 ++++-- 2 files changed, 4 insertions(+), 27 deletions(-) diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py index 9eacb9acef2c9..33b41d61aa978 100644 --- a/pandas/compat/__init__.py +++ b/pandas/compat/__init__.py @@ -21,7 +21,6 @@ given metaclass instead (and avoids intermediary class creation) Other items: -* OrderedDefaultDict * platform checker """ # pylint disable=W0611 @@ -373,30 +372,6 @@ def parse_date(timestr, *args, **kwargs): parse_date = _date_parser.parse -class OrderedDefaultdict(OrderedDict): - - def __init__(self, *args, **kwargs): - newdefault = None - newargs = () - if args: - newdefault = args[0] - if not (newdefault is None or callable(newdefault)): - raise TypeError('first argument must be callable or None') - newargs = args[1:] - self.default_factory = newdefault - super(self.__class__, self).__init__(*newargs, **kwargs) - - def __missing__(self, key): - if self.default_factory is None: - raise KeyError(key) - self[key] = value = self.default_factory() - return value - - def __reduce__(self): # optional, for pickle support - args = self.default_factory if self.default_factory else tuple() - return type(self), args, None, None, list(self.items()) - - # https://github.com/pandas-dev/pandas/pull/9123 def is_platform_little_endian(): """ am I little endian """ diff --git a/pandas/core/panel.py b/pandas/core/panel.py index d1f5b4587059c..69a8468552f54 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -19,7 +19,7 @@ import pandas.core.ops as ops import pandas.core.missing as missing from pandas import compat -from pandas.compat import (map, zip, range, u, OrderedDict, OrderedDefaultdict) +from pandas.compat import (map, zip, range, u, OrderedDict) from pandas.compat.numpy import function as nv from pandas.core.common import _try_sort, _default_index from pandas.core.frame import DataFrame @@ -260,9 +260,11 @@ def from_dict(cls, data, intersect=False, orient='items', dtype=None): ------- Panel """ + from collections import defaultdict + orient = orient.lower() if orient == 'minor': - new_data = OrderedDefaultdict(dict) + new_data = defaultdict(OrderedDict) for col, df in compat.iteritems(data): for item, s in compat.iteritems(df): new_data[item][col] = s From 61f0c5ce2eae8a548e4729ee5cc8a8633faa8316 Mon Sep 17 00:00:00 2001 From: Alan Velasco Date: Sat, 15 Jul 2017 10:34:04 -0500 Subject: [PATCH 759/933] Change "pls" to "please" in error message (#16947) --- pandas/core/frame.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 6559fc4c24ce2..4d8b831b7d63f 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3359,7 +3359,7 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False, inplace = validate_bool_kwarg(inplace, 'inplace') # 10726 if by is not None: - warnings.warn("by argument to sort_index is deprecated, pls use " + warnings.warn("by argument to sort_index is deprecated, please use " ".sort_values(by=...)", FutureWarning, stacklevel=2) if level is not None: raise ValueError("unable to simultaneously sort by and level") From 0e47b280ae6159dbc8817f3c7bd3e296af480c5d Mon Sep 17 00:00:00 2001 From: Alex Lubbock Date: Sat, 15 Jul 2017 10:34:31 -0500 Subject: [PATCH 760/933] BUG: MultiIndex sort with ascending as list (#16937) --- doc/source/whatsnew/v0.21.0.txt | 1 + pandas/core/indexes/multi.py | 3 ++- pandas/tests/test_multilevel.py | 23 +++++++++++++++++++++++ 3 files changed, 26 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index bd19d71182762..6ddf6029b99bb 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -156,6 +156,7 @@ Indexing - When called on an unsorted ``MultiIndex``, the ``loc`` indexer now will raise ``UnsortedIndexError`` only if proper slicing is used on non-sorted levels (:issue:`16734`). - Fixes regression in 0.20.3 when indexing with a string on a ``TimedeltaIndex`` (:issue:`16896`). - Fixed ``TimedeltaIndex.get_loc`` handling of ``np.timedelta64`` inputs (:issue:`16909`). +- Fix :meth:`MultiIndex.sort_index` ordering when ``ascending`` argument is a list, but not all levels are specified, or are in a different order (:issue:`16934`). I/O ^^^ diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 81eac0ac0684f..ed7ca079a07b5 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -1697,7 +1697,8 @@ def sortlevel(self, level=0, ascending=True, sort_remaining=True): raise ValueError("level must have same length as ascending") from pandas.core.sorting import lexsort_indexer - indexer = lexsort_indexer(self.labels, orders=ascending) + indexer = lexsort_indexer([self.labels[lev] for lev in level], + orders=ascending) # level ordering else: diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index c8c210c42eac2..a56ff0fc2d158 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -2781,3 +2781,26 @@ def test_sort_index_nan(self): result = s.sort_index(na_position='first') expected = s.iloc[[1, 2, 3, 0]] tm.assert_series_equal(result, expected) + + def test_sort_ascending_list(self): + # GH: 16934 + + # Set up a Series with a three level MultiIndex + arrays = [['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'], + ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two'], + [4, 3, 2, 1, 4, 3, 2, 1]] + tuples = list(zip(*arrays)) + index = pd.MultiIndex.from_tuples(tuples, + names=['first', 'second', 'third']) + s = pd.Series(range(8), index=index) + + # Sort with boolean ascending + result = s.sort_index(level=['third', 'first'], ascending=False) + expected = s.iloc[[4, 0, 5, 1, 6, 2, 7, 3]] + tm.assert_series_equal(result, expected) + + # Sort with list of boolean ascending + result = s.sort_index(level=['third', 'first'], + ascending=[False, True]) + expected = s.iloc[[0, 4, 1, 5, 2, 6, 3, 7]] + tm.assert_series_equal(result, expected) From d7bf220c2daeaf86ba2e2026b4fe900d441720d8 Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Sat, 15 Jul 2017 17:38:22 +0200 Subject: [PATCH 761/933] DOC: Improving docstring of pop method (#16416) (#16520) --- pandas/core/generic.py | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 5722539b87aec..a4bb746722c1e 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -547,6 +547,43 @@ def swapaxes(self, axis1, axis2, copy=True): def pop(self, item): """ Return item and drop from frame. Raise KeyError if not found. + + Parameters + ---------- + item : str + Column label to be popped + + Returns + ------- + popped : Series + + Examples + -------- + >>> df = pd.DataFrame([('falcon', 'bird', 389.0), + ... ('parrot', 'bird', 24.0), + ... ('lion', 'mammal', 80.5), + ... ('monkey', 'mammal', np.nan)], + ... columns=('name', 'class', 'max_speed')) + >>> df + name class max_speed + 0 falcon bird 389.0 + 1 parrot bird 24.0 + 2 lion mammal 80.5 + 3 monkey mammal NaN + + >>> df.pop('class') + 0 bird + 1 bird + 2 mammal + 3 mammal + Name: class, dtype: object + + >>> df + name max_speed + 0 falcon 389.0 + 1 parrot 24.0 + 2 lion 80.5 + 3 monkey NaN """ result = self[item] del self[item] From 794fd789603e06e86456375f92489ae4de92a99a Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sat, 15 Jul 2017 10:57:41 -0500 Subject: [PATCH 762/933] PEP8 --- pandas/core/frame.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 4d8b831b7d63f..b5462bbe67647 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3359,8 +3359,9 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False, inplace = validate_bool_kwarg(inplace, 'inplace') # 10726 if by is not None: - warnings.warn("by argument to sort_index is deprecated, please use " - ".sort_values(by=...)", FutureWarning, stacklevel=2) + warnings.warn("by argument to sort_index is deprecated, " + "please use .sort_values(by=...)", + FutureWarning, stacklevel=2) if level is not None: raise ValueError("unable to simultaneously sort by and level") return self.sort_values(by, axis=axis, ascending=ascending, From daf07a64d681e70eda6211b739919cae6345e86f Mon Sep 17 00:00:00 2001 From: Alan Velasco Date: Sat, 15 Jul 2017 11:01:38 -0500 Subject: [PATCH 763/933] WARN: add stacklevel to to_dict() UserWarning (#16927) (#16936) * ERR: add stacklevel to to_dict() UserWarning (#16927) * TST: Add warning testing to to_dict() * Fix warning assertion on to_dict() test * Add github issue to documentation on to_dict() warning test --- pandas/core/frame.py | 3 ++- pandas/tests/frame/test_convert_to.py | 7 +++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index b5462bbe67647..9920ddf854850 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -972,7 +972,8 @@ def to_dict(self, orient='dict', into=dict): """ if not self.columns.is_unique: warnings.warn("DataFrame columns are not unique, some " - "columns will be omitted.", UserWarning) + "columns will be omitted.", UserWarning, + stacklevel=2) # GH16122 into_c = standardize_mapping(into) if orient.lower().startswith('d'): diff --git a/pandas/tests/frame/test_convert_to.py b/pandas/tests/frame/test_convert_to.py index 34dd138ee1c80..629c695b702fe 100644 --- a/pandas/tests/frame/test_convert_to.py +++ b/pandas/tests/frame/test_convert_to.py @@ -216,6 +216,13 @@ def test_to_dict_errors(self, mapping): with pytest.raises(TypeError): df.to_dict(into=mapping) + def test_to_dict_not_unique_warning(self): + # GH16927: When converting to a dict, if a column has a non-unique name + # it will be dropped, throwing a warning. + df = DataFrame([[1, 2, 3]], columns=['a', 'a', 'b']) + with tm.assert_produces_warning(UserWarning): + df.to_dict() + @pytest.mark.parametrize('tz', ['UTC', 'GMT', 'US/Eastern']) def test_to_records_datetimeindex_with_tz(self, tz): # GH13937 From 4c498f8451fe4c491a6f38ed9e35da3d3ab6b9b8 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sat, 15 Jul 2017 11:26:10 -0500 Subject: [PATCH 764/933] CI: fix pep8speaks .yml file --- .pep8speakes.yml => .pep8speaks.yml | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename .pep8speakes.yml => .pep8speaks.yml (100%) diff --git a/.pep8speakes.yml b/.pep8speaks.yml similarity index 100% rename from .pep8speakes.yml rename to .pep8speaks.yml From 7500218947bffd4915832e9037d9f48991e53ca3 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sat, 15 Jul 2017 12:33:04 -0500 Subject: [PATCH 765/933] DOC: whatsnew 0.21.0 edits --- doc/source/whatsnew/v0.21.0.txt | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 6ddf6029b99bb..34095d55b8cc9 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -31,15 +31,15 @@ Other Enhancements ^^^^^^^^^^^^^^^^^^ - The ``validate`` argument for :func:`merge` function now checks whether a merge is one-to-one, one-to-many, many-to-one, or many-to-many. If a merge is found to not be an example of specified merge type, an exception of type ``MergeError`` will be raised. For more, see :ref:`here ` (:issue:`16270`) -- ``Series.to_dict()`` and ``DataFrame.to_dict()`` now support an ``into`` keyword which allows you to specify the ``collections.Mapping`` subclass that you would like returned. The default is ``dict``, which is backwards compatible. (:issue:`16122`) -- ``RangeIndex.append`` now returns a ``RangeIndex`` object when possible (:issue:`16212`) -- ``Series.rename_axis()`` and ``DataFrame.rename_axis()`` with ``inplace=True`` now return ``None`` while renaming the axis inplace. (:issue:`15704`) -- :func:`to_pickle` has gained a ``protocol`` parameter (:issue:`16252`). By default, this parameter is set to `HIGHEST_PROTOCOL `__ +- :func:`Series.to_dict` and :func:`DataFrame.to_dict` now support an ``into`` keyword which allows you to specify the ``collections.Mapping`` subclass that you would like returned. The default is ``dict``, which is backwards compatible. (:issue:`16122`) +- :func:`RangeIndex.append` now returns a ``RangeIndex`` object when possible (:issue:`16212`) +- :func:`Series.rename_axis` and :func:`DataFrame.rename_axis` with ``inplace=True`` now return ``None`` while renaming the axis inplace. (:issue:`15704`) +- :func:`Series.to_pickle` and :func:`DataFrame.to_pickle` have gained a ``protocol`` parameter (:issue:`16252`). By default, this parameter is set to `HIGHEST_PROTOCOL `__ - :func:`api.types.infer_dtype` now infers decimals. (:issue:`15690`) - :func:`read_feather` has gained the ``nthreads`` parameter for multi-threaded operations (:issue:`16359`) - :func:`DataFrame.clip()` and :func:`Series.clip()` have gained an ``inplace`` argument. (:issue:`15388`) - :func:`crosstab` has gained a ``margins_name`` parameter to define the name of the row / column that will contain the totals when ``margins=True``. (:issue:`15972`) -- :func:`Dataframe.select_dtypes` now accepts scalar values for include/exclude as well as list-like. (:issue:`16855`) +- :func:`DataFrame.select_dtypes` now accepts scalar values for include/exclude as well as list-like. (:issue:`16855`) .. _whatsnew_0210.api_breaking: @@ -92,9 +92,14 @@ the target. Now, a ``ValueError`` will be raised when such an input is passed in ... ValueError: Cannot operate inplace if there is no assignment +.. _whatsnew_0210.api: + +Other API Changes +^^^^^^^^^^^^^^^^^ + - Support has been dropped for Python 3.4 (:issue:`15251`) - The Categorical constructor no longer accepts a scalar for the ``categories`` keyword. (:issue:`16022`) -- Accessing a non-existent attribute on a closed :class:`HDFStore` will now +- Accessing a non-existent attribute on a closed :class:`~pandas.HDFStore` will now raise an ``AttributeError`` rather than a ``ClosedFileError`` (:issue:`16301`) - :func:`read_csv` now treats ``'null'`` strings as missing values by default (:issue:`16471`) - :func:`read_csv` now treats ``'n/a'`` strings as missing values by default (:issue:`16078`) @@ -102,12 +107,6 @@ the target. Now, a ``ValueError`` will be raised when such an input is passed in - Compression defaults in HDF stores now follow pytable standards. Default is no compression and if ``complib`` is missing and ``complevel`` > 0 ``zlib`` is used (:issue:`15943`) - ``Index.get_indexer_non_unique()`` now returns a ndarray indexer rather than an ``Index``; this is consistent with ``Index.get_indexer()`` (:issue:`16819`) - Removed the ``@slow`` decorator from ``pandas.util.testing``, which caused issues for some downstream packages' test suites. Use ``@pytest.mark.slow`` instead, which achieves the same thing (:issue:`16850`) - -.. _whatsnew_0210.api: - -Other API Changes -^^^^^^^^^^^^^^^^^ - - Moved definition of ``MergeError`` to the ``pandas.errors`` module. @@ -127,7 +126,7 @@ Removal of prior version deprecations/changes - The ``pd.options.display.mpl_style`` configuration has been dropped (:issue:`12190`) - ``Index`` has dropped the ``.sym_diff()`` method in favor of ``.symmetric_difference()`` (:issue:`12591`) - ``Categorical`` has dropped the ``.order()`` and ``.sort()`` methods in favor of ``.sort_values()`` (:issue:`12882`) -- :func:`eval` and :method:`DataFrame.eval` have changed the default of ``inplace`` from ``None`` to ``False`` (:issue:`11149`) +- :func:`eval` and :func:`DataFrame.eval` have changed the default of ``inplace`` from ``None`` to ``False`` (:issue:`11149`) - The function ``get_offset_name`` has been dropped in favor of the ``.freqstr`` attribute for an offset (:issue:`11834`) From 3955261c04d5b838488a45fe7b186399bcdca137 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sat, 15 Jul 2017 13:07:35 -0500 Subject: [PATCH 766/933] CI: disable codecov reporting --- codecov.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/codecov.yml b/codecov.yml index b4552563deeaa..512bc2e82a736 100644 --- a/codecov.yml +++ b/codecov.yml @@ -5,7 +5,9 @@ coverage: status: project: default: + enabled: no target: '82' patch: default: + enabled: no target: '50' From 96168ef698ac8bbccba251258ee66958359b11bf Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 15 Jul 2017 12:14:34 -0700 Subject: [PATCH 767/933] MAINT: Move series.remove_na to core.dtypes.missing.remove_na_arraylike Closes gh-16935 --- pandas/core/dtypes/missing.py | 7 +++++++ pandas/core/series.py | 11 ++--------- pandas/plotting/_core.py | 12 ++++++------ pandas/tests/test_panel.py | 4 ++-- pandas/tests/test_panel4d.py | 4 ++-- 5 files changed, 19 insertions(+), 19 deletions(-) diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py index af3a873bc2866..9913923cb7807 100644 --- a/pandas/core/dtypes/missing.py +++ b/pandas/core/dtypes/missing.py @@ -394,3 +394,10 @@ def na_value_for_dtype(dtype): elif is_bool_dtype(dtype): return False return np.nan + + +def remove_na_arraylike(arr): + """ + Return array-like containing only true/non-NaN values, possibly empty. + """ + return arr[notnull(lib.values_from_object(arr))] diff --git a/pandas/core/series.py b/pandas/core/series.py index e1f668dd3afda..98b548f8ab3b5 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -36,7 +36,7 @@ maybe_upcast, infer_dtype_from_scalar, maybe_convert_platform, maybe_cast_to_datetime, maybe_castable) -from pandas.core.dtypes.missing import isnull, notnull +from pandas.core.dtypes.missing import isnull, notnull, remove_na_arraylike from pandas.core.common import (is_bool_indexer, _default_index, @@ -2749,7 +2749,7 @@ def dropna(self, axis=0, inplace=False, **kwargs): axis = self._get_axis_number(axis or 0) if self._can_hold_na: - result = remove_na(self) + result = remove_na_arraylike(self) if inplace: self._update_inplace(result) else: @@ -2888,13 +2888,6 @@ def _dir_additions(self): # Supplementary functions -def remove_na(series): - """ - Return series containing only true/non-NaN values, possibly empty. - """ - return series[notnull(_values_from_object(series))] - - def _sanitize_index(data, index, copy=False): """ sanitize an index type to return an ndarray of the underlying, pass thru a non-Index diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index f8e83aea03594..9cceebb5c4cdb 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -11,7 +11,7 @@ from pandas.util._decorators import cache_readonly from pandas.core.base import PandasObject -from pandas.core.dtypes.missing import notnull +from pandas.core.dtypes.missing import notnull, remove_na_arraylike from pandas.core.dtypes.common import ( is_list_like, is_integer, @@ -21,7 +21,7 @@ from pandas.core.common import AbstractMethodError, isnull, _try_sort from pandas.core.generic import _shared_docs, _shared_doc_kwargs from pandas.core.index import Index, MultiIndex -from pandas.core.series import Series, remove_na +from pandas.core.series import Series from pandas.core.indexes.period import PeriodIndex from pandas.compat import range, lrange, map, zip, string_types import pandas.compat as compat @@ -1376,7 +1376,7 @@ def _plot(cls, ax, y, style=None, bw_method=None, ind=None, from scipy.stats import gaussian_kde from scipy import __version__ as spv - y = remove_na(y) + y = remove_na_arraylike(y) if LooseVersion(spv) >= '0.11.0': gkde = gaussian_kde(y, bw_method=bw_method) @@ -1495,13 +1495,13 @@ def _args_adjust(self): @classmethod def _plot(cls, ax, y, column_num=None, return_type='axes', **kwds): if y.ndim == 2: - y = [remove_na(v) for v in y] + y = [remove_na_arraylike(v) for v in y] # Boxplot fails with empty arrays, so need to add a NaN # if any cols are empty # GH 8181 y = [v if v.size > 0 else np.array([np.nan]) for v in y] else: - y = remove_na(y) + y = remove_na_arraylike(y) bp = ax.boxplot(y, **kwds) if return_type == 'dict': @@ -1969,7 +1969,7 @@ def maybe_color_bp(bp): def plot_group(keys, values, ax): keys = [pprint_thing(x) for x in keys] - values = [remove_na(v) for v in values] + values = [remove_na_arraylike(v) for v in values] bp = ax.boxplot(values, **kwds) if fontsize is not None: ax.tick_params(axis='both', labelsize=fontsize) diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index e19e42e062932..445611c1696f5 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -10,11 +10,11 @@ import pandas as pd from pandas.core.dtypes.common import is_float_dtype +from pandas.core.dtypes.missing import remove_na_arraylike from pandas import (Series, DataFrame, Index, date_range, isnull, notnull, pivot, MultiIndex) from pandas.core.nanops import nanall, nanany from pandas.core.panel import Panel -from pandas.core.series import remove_na from pandas.io.formats.printing import pprint_thing from pandas import compat @@ -155,7 +155,7 @@ def _check_stat_op(self, name, alternative, obj=None, has_skipna=True): if has_skipna: def skipna_wrapper(x): - nona = remove_na(x) + nona = remove_na_arraylike(x) if len(nona) == 0: return np.nan return alternative(nona) diff --git a/pandas/tests/test_panel4d.py b/pandas/tests/test_panel4d.py index e1995316e7b7c..18643aff15e9b 100644 --- a/pandas/tests/test_panel4d.py +++ b/pandas/tests/test_panel4d.py @@ -7,10 +7,10 @@ import numpy as np from pandas.core.dtypes.common import is_float_dtype +from pandas.core.dtypes.missing import remove_na_arraylike from pandas import Series, Index, isnull, notnull from pandas.core.panel import Panel from pandas.core.panel4d import Panel4D -from pandas.core.series import remove_na from pandas.tseries.offsets import BDay from pandas.util.testing import (assert_frame_equal, assert_series_equal, @@ -118,7 +118,7 @@ def _check_stat_op(self, name, alternative, obj=None, has_skipna=True): if has_skipna: def skipna_wrapper(x): - nona = remove_na(x) + nona = remove_na_arraylike(x) if len(nona) == 0: return np.nan return alternative(nona) From 2cd85ca748f62d7430b30e2d9ddd036e972cc64e Mon Sep 17 00:00:00 2001 From: Alan Velasco Date: Sat, 15 Jul 2017 16:28:23 -0500 Subject: [PATCH 768/933] Support non unique period indexes on join and merge operations (#16949) * Support non unique period indexes on join and merge operations * Add frame assertion on tests and release notes * Explicitly use dtype int64 on arange --- doc/source/whatsnew/v0.21.0.txt | 2 +- pandas/core/indexes/base.py | 4 ++-- pandas/tests/reshape/test_join.py | 12 ++++++++++++ pandas/tests/reshape/test_merge.py | 12 ++++++++++++ 4 files changed, 27 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 34095d55b8cc9..11d3e4cf964aa 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -184,7 +184,7 @@ Sparse Reshaping ^^^^^^^^^ - +- Joining/Merging with a non unique ``PeriodIndex`` raised a TypeError (:issue:`16871`) Numeric diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index e1053c1610175..bbbc19b36964d 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -3119,14 +3119,14 @@ def _join_multi(self, other, how, return_indexers=True): def _join_non_unique(self, other, how='left', return_indexers=False): from pandas.core.reshape.merge import _get_join_indexers - left_idx, right_idx = _get_join_indexers([self.values], + left_idx, right_idx = _get_join_indexers([self._values], [other._values], how=how, sort=True) left_idx = _ensure_platform_int(left_idx) right_idx = _ensure_platform_int(right_idx) - join_index = np.asarray(self.values.take(left_idx)) + join_index = np.asarray(self._values.take(left_idx)) mask = left_idx == -1 np.putmask(join_index, mask, other._values.take(right_idx)) diff --git a/pandas/tests/reshape/test_join.py b/pandas/tests/reshape/test_join.py index e25661fb65271..e4894307918c6 100644 --- a/pandas/tests/reshape/test_join.py +++ b/pandas/tests/reshape/test_join.py @@ -550,6 +550,18 @@ def test_join_mixed_non_unique_index(self): index=[1, 2, 2, 'a']) tm.assert_frame_equal(result, expected) + def test_join_non_unique_period_index(self): + # GH #16871 + index = pd.period_range('2016-01-01', periods=16, freq='M') + df = DataFrame([i for i in range(len(index))], + index=index, columns=['pnum']) + df2 = concat([df, df]) + result = df.join(df2, how='inner', rsuffix='_df2') + expected = DataFrame( + np.tile(np.arange(16, dtype=np.int64).repeat(2).reshape(-1, 1), 2), + columns=['pnum', 'pnum_df2'], index=df2.sort_index().index) + tm.assert_frame_equal(result, expected) + def test_mixed_type_join_with_suffix(self): # GH #916 df = DataFrame(np.random.randn(20, 6), diff --git a/pandas/tests/reshape/test_merge.py b/pandas/tests/reshape/test_merge.py index 4ac376a9752cb..919675188576e 100644 --- a/pandas/tests/reshape/test_merge.py +++ b/pandas/tests/reshape/test_merge.py @@ -585,6 +585,18 @@ def test_merge_on_datetime64tz(self): assert result['value_x'].dtype == 'datetime64[ns, US/Eastern]' assert result['value_y'].dtype == 'datetime64[ns, US/Eastern]' + def test_merge_non_unique_period_index(self): + # GH #16871 + index = pd.period_range('2016-01-01', periods=16, freq='M') + df = DataFrame([i for i in range(len(index))], + index=index, columns=['pnum']) + df2 = concat([df, df]) + result = df.merge(df2, left_index=True, right_index=True, how='inner') + expected = DataFrame( + np.tile(np.arange(16, dtype=np.int64).repeat(2).reshape(-1, 1), 2), + columns=['pnum_x', 'pnum_y'], index=df2.sort_index().index) + tm.assert_frame_equal(result, expected) + def test_merge_on_periods(self): left = pd.DataFrame({'key': pd.period_range('20151010', periods=2, freq='D'), From 8e3d8315d63f61c1cc7a0ea9ad24cdd63b63f6b8 Mon Sep 17 00:00:00 2001 From: Morgan243 Date: Sat, 15 Jul 2017 19:13:49 -0400 Subject: [PATCH 769/933] BUG: Set secondary axis font size for `secondary_y` during plotting The parameter was not being respected for `secondary_y`. Closes gh-12565 --- doc/source/whatsnew/v0.21.0.txt | 2 +- pandas/plotting/_core.py | 9 +++++++++ pandas/tests/plotting/test_frame.py | 17 +++++++++++++++++ 3 files changed, 27 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 11d3e4cf964aa..df53c4a3d6caf 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -166,7 +166,7 @@ I/O Plotting ^^^^^^^^ - +- Bug in plotting methods using ``secondary_y`` and ``fontsize`` not setting secondary axis font size (:issue:`12565`) Groupby/Resample/Rolling diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index 9cceebb5c4cdb..a623288efc1ae 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -379,6 +379,11 @@ def _post_plot_logic_common(self, ax, data): self._apply_axis_properties(ax.xaxis, rot=self.rot, fontsize=self.fontsize) self._apply_axis_properties(ax.yaxis, fontsize=self.fontsize) + + if hasattr(ax, 'right_ax'): + self._apply_axis_properties(ax.right_ax.yaxis, + fontsize=self.fontsize) + elif self.orientation == 'horizontal': if self._need_to_set_index: yticklabels = [labels.get(y, '') for y in ax.get_yticks()] @@ -386,6 +391,10 @@ def _post_plot_logic_common(self, ax, data): self._apply_axis_properties(ax.yaxis, rot=self.rot, fontsize=self.fontsize) self._apply_axis_properties(ax.xaxis, fontsize=self.fontsize) + + if hasattr(ax, 'right_ax'): + self._apply_axis_properties(ax.right_ax.yaxis, + fontsize=self.fontsize) else: # pragma no cover raise ValueError diff --git a/pandas/tests/plotting/test_frame.py b/pandas/tests/plotting/test_frame.py index 7878740f64e55..6d813ac76cc4e 100644 --- a/pandas/tests/plotting/test_frame.py +++ b/pandas/tests/plotting/test_frame.py @@ -2733,6 +2733,23 @@ def test_rcParams_bar_colors(self): barplot = pd.DataFrame([[1, 2, 3]]).plot(kind="bar") assert color_tuples == [c.get_facecolor() for c in barplot.patches] + @pytest.mark.parametrize('method', ['line', 'barh', 'bar']) + def test_secondary_axis_font_size(self, method): + # GH: 12565 + df = (pd.DataFrame(np.random.randn(15, 2), + columns=list('AB')) + .assign(C=lambda df: df.B.cumsum()) + .assign(D=lambda df: df.C * 1.1)) + + fontsize = 20 + sy = ['C', 'D'] + + kwargs = dict(secondary_y=sy, fontsize=fontsize, + mark_right=True) + ax = getattr(df.plot, method)(**kwargs) + self._check_ticks_props(axes=ax.right_ax, + ylabelsize=fontsize) + def _generate_4_axes_via_gridspec(): import matplotlib.pyplot as plt From 4f04d0be1fe22dabaff6c0eeb6162bffb763af46 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sat, 15 Jul 2017 16:04:03 -0500 Subject: [PATCH 770/933] DOC: more whatsnew fixes --- doc/source/whatsnew/v0.21.0.txt | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index df53c4a3d6caf..a5d4259480ba8 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -46,11 +46,11 @@ Other Enhancements Backwards incompatible API changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. _whatsnew_0210.api_breaking.pandas_eval: + Improved error handling during item assignment in pd.eval ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -.. _whatsnew_0210.api_breaking.pandas_eval: - :func:`eval` will now raise a ``ValueError`` when item assignment malfunctions, or inplace operations are specified, but there is no item assignment in the expression (:issue:`16732`) @@ -154,8 +154,8 @@ Indexing - When called with a null slice (e.g. ``df.iloc[:]``), the ``.iloc`` and ``.loc`` indexers return a shallow copy of the original object. Previously they returned the original object. (:issue:`13873`). - When called on an unsorted ``MultiIndex``, the ``loc`` indexer now will raise ``UnsortedIndexError`` only if proper slicing is used on non-sorted levels (:issue:`16734`). - Fixes regression in 0.20.3 when indexing with a string on a ``TimedeltaIndex`` (:issue:`16896`). -- Fixed ``TimedeltaIndex.get_loc`` handling of ``np.timedelta64`` inputs (:issue:`16909`). -- Fix :meth:`MultiIndex.sort_index` ordering when ``ascending`` argument is a list, but not all levels are specified, or are in a different order (:issue:`16934`). +- Fixed :func:`TimedeltaIndex.get_loc` handling of ``np.timedelta64`` inputs (:issue:`16909`). +- Fix :func:`MultiIndex.sort_index` ordering when ``ascending`` argument is a list, but not all levels are specified, or are in a different order (:issue:`16934`). I/O ^^^ @@ -172,9 +172,9 @@ Plotting Groupby/Resample/Rolling ^^^^^^^^^^^^^^^^^^^^^^^^ -- Bug in ``DataFrame.resample().size()`` where an empty ``DataFrame`` did not return a ``Series`` (:issue:`14962`) -- Bug in ``infer_freq`` causing indices with 2-day gaps during the working week to be wrongly inferred as business daily (:issue:`16624`) -- Bug in ``.rolling.quantile()`` which incorrectly used different defaults than :func:`Series.quantile()` and :func:`DataFrame.quantile()` (:issue:`9413`, :issue:`16211`) +- Bug in ``DataFrame.resample(...).size()`` where an empty ``DataFrame`` did not return a ``Series`` (:issue:`14962`) +- Bug in :func:`infer_freq` causing indices with 2-day gaps during the working week to be wrongly inferred as business daily (:issue:`16624`) +- Bug in ``.rolling(...).quantile()`` which incorrectly used different defaults than :func:`Series.quantile()` and :func:`DataFrame.quantile()` (:issue:`9413`, :issue:`16211`) Sparse @@ -194,7 +194,7 @@ Numeric Categorical ^^^^^^^^^^^ -- Bug in ``:func:Series.isin()`` when called with a categorical (:issue`16639`) +- Bug in :func:`Series.isin` when called with a categorical (:issue`16639`) Other From 1212fe034b7302f40bf253aedd9e3989514eeb52 Mon Sep 17 00:00:00 2001 From: aernlund Date: Sat, 15 Jul 2017 18:43:02 -0500 Subject: [PATCH 771/933] DOC: Reset index examples closes #16416 Author: aernlund Closes #16967 from aernlund/reset_index_docs and squashes the following commits: 3c6a4b6 [aernlund] DOC: added examples to reset_index 4838155 [aernlund] DOC: added examples to reset_index 2a51e2b [aernlund] DOC: added examples to reset_index --- pandas/core/frame.py | 32 ++++++++++++++++++++++++++++++++ pandas/core/series.py | 31 +++++++++++++++++++++++++++++++ 2 files changed, 63 insertions(+) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 9920ddf854850..9a79ca1d4eab1 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3020,6 +3020,38 @@ def reset_index(self, level=None, drop=False, inplace=False, col_level=0, Returns ------- resetted : DataFrame + + Examples + -------- + >>> df = pd.DataFrame({'a': [1, 2, 3, 4], 'b': [5, 6, 7, 8]}, + ... index=pd.Index(['a', 'b', 'c', 'd'], + ... name='idx')) + >>> df.reset_index() + idx a b + 0 a 1 5 + 1 b 2 6 + 2 c 3 7 + 3 d 4 8 + + >>> arrays = [np.array(['bar', 'bar', 'baz', 'baz', 'foo', + ... 'foo', 'qux', 'qux']), + ... np.array(['one', 'two', 'one', 'two', 'one', 'two', + ... 'one', 'two'])] + >>> df2 = pd.DataFrame( + ... np.random.randn(8, 4), + ... index=pd.MultiIndex.from_arrays(arrays, + ... names=['a', 'b'])) + >>> df2.reset_index(level='a') + a 0 1 2 3 + b + one bar -1.099413 0.291838 0.598198 0.162181 + two bar -0.312184 -0.119904 0.250360 0.364378 + one baz 0.713596 -0.490636 0.074967 -0.297857 + two baz 0.998397 0.524499 -2.228976 0.901155 + one foo 0.923204 0.920695 1.264488 1.476921 + two foo -1.566922 0.783278 -0.073656 0.266027 + one qux -0.230470 0.109800 -1.383409 0.048421 + two qux -0.865993 -0.865984 0.705367 -0.170446 """ inplace = validate_bool_kwarg(inplace, 'inplace') if inplace: diff --git a/pandas/core/series.py b/pandas/core/series.py index 98b548f8ab3b5..4d5b718ce0ae9 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -948,6 +948,37 @@ def reset_index(self, level=None, drop=False, name=None, inplace=False): Returns ---------- resetted : DataFrame, or Series if drop == True + + Examples + -------- + >>> s = pd.Series([1, 2, 3, 4], index=pd.Index(['a', 'b', 'c', 'd'], + ... name = 'idx')) + >>> s.reset_index() + index 0 + 0 0 1 + 1 1 2 + 2 2 3 + 3 3 4 + + >>> arrays = [np.array(['bar', 'bar', 'baz', 'baz', 'foo', + ... 'foo', 'qux', 'qux']), + ... np.array(['one', 'two', 'one', 'two', 'one', 'two', + ... 'one', 'two'])] + >>> s2 = pd.Series( + ... np.random.randn(8), + ... index=pd.MultiIndex.from_arrays(arrays, + ... names=['a', 'b'])) + >>> s2.reset_index(level='a') + a 0 + b + one bar -0.286320 + two bar -0.587934 + one baz 0.710491 + two baz -1.429006 + one foo 0.790700 + two foo 0.824863 + one qux -0.718963 + two qux -0.055028 """ inplace = validate_bool_kwarg(inplace, 'inplace') if drop: From 3524edb82e7945998876591813b7e77fe620ce36 Mon Sep 17 00:00:00 2001 From: Daniel Chen Date: Sat, 15 Jul 2017 18:49:08 -0500 Subject: [PATCH 772/933] channel from pandas to conda-forge (#16966) --- doc/source/contributing.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/contributing.rst b/doc/source/contributing.rst index bfcf560565977..b44d0f36b86a1 100644 --- a/doc/source/contributing.rst +++ b/doc/source/contributing.rst @@ -171,7 +171,7 @@ other dependencies, you can install them as follows:: To install *all* pandas dependencies you can do the following:: - conda install -n pandas_dev -c pandas --file ci/requirements_all.txt + conda install -n pandas_dev -c conda-forge --file ci/requirements_all.txt To work in this environment, Windows users should ``activate`` it as follows:: From 53ae390f442e745503745e5fa8ed7b06b72fd102 Mon Sep 17 00:00:00 2001 From: Prasanjit Prakash Date: Sun, 16 Jul 2017 06:42:57 +0530 Subject: [PATCH 773/933] BUG: coercing of bools in groupby transform (#16895) --- doc/source/whatsnew/v0.21.0.txt | 2 +- pandas/core/dtypes/cast.py | 7 ++++--- pandas/tests/dtypes/test_cast.py | 8 +++++++- pandas/tests/groupby/test_transform.py | 13 +++++++++++++ 4 files changed, 25 insertions(+), 5 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index a5d4259480ba8..762107a261090 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -175,7 +175,7 @@ Groupby/Resample/Rolling - Bug in ``DataFrame.resample(...).size()`` where an empty ``DataFrame`` did not return a ``Series`` (:issue:`14962`) - Bug in :func:`infer_freq` causing indices with 2-day gaps during the working week to be wrongly inferred as business daily (:issue:`16624`) - Bug in ``.rolling(...).quantile()`` which incorrectly used different defaults than :func:`Series.quantile()` and :func:`DataFrame.quantile()` (:issue:`9413`, :issue:`16211`) - +- Bug in ``groupby.transform()`` that would coerce boolean dtypes back to float (:issue:`16875`) Sparse ^^^^^^ diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 16b0a5c8a74ca..6532e17695c86 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -110,9 +110,7 @@ def trans(x): # noqa np.prod(result.shape)): return result - if issubclass(dtype.type, np.floating): - return result.astype(dtype) - elif is_bool_dtype(dtype) or is_integer_dtype(dtype): + if is_bool_dtype(dtype) or is_integer_dtype(dtype): # if we don't have any elements, just astype it if not np.prod(result.shape): @@ -144,6 +142,9 @@ def trans(x): # noqa # hit here if (new_result == result).all(): return new_result + elif (issubclass(dtype.type, np.floating) and + not is_bool_dtype(result.dtype)): + return result.astype(dtype) # a datetimelike # GH12821, iNaT is casted to float diff --git a/pandas/tests/dtypes/test_cast.py b/pandas/tests/dtypes/test_cast.py index 767e99d98cf29..6e07487b3e04f 100644 --- a/pandas/tests/dtypes/test_cast.py +++ b/pandas/tests/dtypes/test_cast.py @@ -9,7 +9,7 @@ from datetime import datetime, timedelta, date import numpy as np -from pandas import Timedelta, Timestamp, DatetimeIndex, DataFrame, NaT +from pandas import Timedelta, Timestamp, DatetimeIndex, DataFrame, NaT, Series from pandas.core.dtypes.cast import ( maybe_downcast_to_dtype, @@ -45,6 +45,12 @@ def test_downcast_conv(self): expected = np.array([8, 8, 8, 8, 9]) assert (np.array_equal(result, expected)) + # GH16875 coercing of bools + ser = Series([True, True, False]) + result = maybe_downcast_to_dtype(ser, np.dtype(np.float64)) + expected = ser + tm.assert_series_equal(result, expected) + # conversions expected = np.array([1, 2]) diff --git a/pandas/tests/groupby/test_transform.py b/pandas/tests/groupby/test_transform.py index 40434ff510421..98839a17d6e0c 100644 --- a/pandas/tests/groupby/test_transform.py +++ b/pandas/tests/groupby/test_transform.py @@ -195,6 +195,19 @@ def test_transform_bug(self): expected = Series(np.arange(5, 0, step=-1), name='B') assert_series_equal(result, expected) + def test_transform_numeric_to_boolean(self): + # GH 16875 + # inconsistency in transforming boolean values + expected = pd.Series([True, True], name='A') + + df = pd.DataFrame({'A': [1.1, 2.2], 'B': [1, 2]}) + result = df.groupby('B').A.transform(lambda x: True) + assert_series_equal(result, expected) + + df = pd.DataFrame({'A': [1, 2], 'B': [1, 2]}) + result = df.groupby('B').A.transform(lambda x: True) + assert_series_equal(result, expected) + def test_transform_datetime_to_timedelta(self): # GH 15429 # transforming a datetime to timedelta From 01a8be3578e9d0b2a66b8318c5477e3e6cfb75f2 Mon Sep 17 00:00:00 2001 From: Andrew Date: Sat, 15 Jul 2017 21:20:55 -0400 Subject: [PATCH 774/933] DOC: misspelling in DatetimeIndex.indexer_between_time [CI skip] (#16963) --- pandas/core/indexes/datetimes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index d8aae2367976b..e6bc1790f2992 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -1882,7 +1882,7 @@ def indexer_between_time(self, start_time, end_time, include_start=True, Select values between particular times of day (e.g., 9:00-9:30AM). Return values of the index between two times. If start_time or - end_time are strings then tseres.tools.to_time is used to convert to + end_time are strings then tseries.tools.to_time is used to convert to a time object. Parameters From 148e038bfaf2a3893b52e28b6469cf5984eec794 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sat, 15 Jul 2017 20:42:56 -0500 Subject: [PATCH 775/933] CLN: some residual code removed, xref to #16761 (#16974) --- pandas/core/config_init.py | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py index e70db1d13e376..04563907582ee 100644 --- a/pandas/core/config_init.py +++ b/pandas/core/config_init.py @@ -255,18 +255,6 @@ def use_numexpr_cb(key): df.info() (the behaviour in earlier versions of pandas). """ -pc_mpl_style_doc = """ -: bool - Setting this to 'default' will modify the rcParams used by matplotlib - to give plots a more pleasing visual style by default. - Setting this to None/False restores the values to their initial value. -""" - -pc_mpl_style_deprecation_warning = """ -mpl_style had been deprecated and will be removed in a future version. -Use `matplotlib.pyplot.style.use` instead. -""" - pc_memory_usage_doc = """ : bool, string or None This specifies if the memory usage of a DataFrame should be displayed when From 9c096d29a1e9a68b8151de4896b0d9684383821a Mon Sep 17 00:00:00 2001 From: Iva Miholic Date: Sun, 16 Jul 2017 09:04:35 +0100 Subject: [PATCH 776/933] ENH: Create a 'Y' alias for date_range yearly frequency Closes gh-9313 --- pandas/tests/tseries/test_frequencies.py | 41 ++++++++++++++++-------- pandas/tseries/frequencies.py | 22 +++++++++++++ 2 files changed, 50 insertions(+), 13 deletions(-) diff --git a/pandas/tests/tseries/test_frequencies.py b/pandas/tests/tseries/test_frequencies.py index 54d12317b0bf8..4bcd0b49db7e0 100644 --- a/pandas/tests/tseries/test_frequencies.py +++ b/pandas/tests/tseries/test_frequencies.py @@ -248,9 +248,10 @@ def test_anchored_shortcuts(self): # ensure invalid cases fail as expected invalid_anchors = ['SM-0', 'SM-28', 'SM-29', - 'SM-FOO', 'BSM', 'SM--1' + 'SM-FOO', 'BSM', 'SM--1', 'SMS-1', 'SMS-28', 'SMS-30', - 'SMS-BAR', 'BSMS', 'SMS--2'] + 'SMS-BAR', 'SMS-BYR' 'BSMS', + 'SMS--2'] for invalid_anchor in invalid_anchors: with tm.assert_raises_regex(ValueError, 'Invalid frequency: '): @@ -292,11 +293,15 @@ def test_get_rule_month(): result = frequencies._get_rule_month('A-DEC') assert (result == 'DEC') + result = frequencies._get_rule_month('Y-DEC') + assert (result == 'DEC') result = frequencies._get_rule_month(offsets.YearEnd()) assert (result == 'DEC') result = frequencies._get_rule_month('A-MAY') assert (result == 'MAY') + result = frequencies._get_rule_month('Y-MAY') + assert (result == 'MAY') result = frequencies._get_rule_month(offsets.YearEnd(month=5)) assert (result == 'MAY') @@ -305,6 +310,10 @@ def test_period_str_to_code(): assert (frequencies._period_str_to_code('A') == 1000) assert (frequencies._period_str_to_code('A-DEC') == 1000) assert (frequencies._period_str_to_code('A-JAN') == 1001) + assert (frequencies._period_str_to_code('Y') == 1000) + assert (frequencies._period_str_to_code('Y-DEC') == 1000) + assert (frequencies._period_str_to_code('Y-JAN') == 1001) + assert (frequencies._period_str_to_code('Q') == 2000) assert (frequencies._period_str_to_code('Q-DEC') == 2000) assert (frequencies._period_str_to_code('Q-FEB') == 2002) @@ -349,6 +358,10 @@ def test_freq_code(self): assert frequencies.get_freq('3A') == 1000 assert frequencies.get_freq('-1A') == 1000 + assert frequencies.get_freq('Y') == 1000 + assert frequencies.get_freq('3Y') == 1000 + assert frequencies.get_freq('-1Y') == 1000 + assert frequencies.get_freq('W') == 4000 assert frequencies.get_freq('W-MON') == 4001 assert frequencies.get_freq('W-FRI') == 4005 @@ -369,6 +382,13 @@ def test_freq_group(self): assert frequencies.get_freq_group('-1A') == 1000 assert frequencies.get_freq_group('A-JAN') == 1000 assert frequencies.get_freq_group('A-MAY') == 1000 + + assert frequencies.get_freq_group('Y') == 1000 + assert frequencies.get_freq_group('3Y') == 1000 + assert frequencies.get_freq_group('-1Y') == 1000 + assert frequencies.get_freq_group('Y-JAN') == 1000 + assert frequencies.get_freq_group('Y-MAY') == 1000 + assert frequencies.get_freq_group(offsets.YearEnd()) == 1000 assert frequencies.get_freq_group(offsets.YearEnd(month=1)) == 1000 assert frequencies.get_freq_group(offsets.YearEnd(month=5)) == 1000 @@ -790,12 +810,6 @@ def test_series(self): for freq in [None, 'L']: s = Series(period_range('2013', periods=10, freq=freq)) pytest.raises(TypeError, lambda: frequencies.infer_freq(s)) - for freq in ['Y']: - - msg = frequencies._INVALID_FREQ_ERROR - with tm.assert_raises_regex(ValueError, msg): - s = Series(period_range('2013', periods=10, freq=freq)) - pytest.raises(TypeError, lambda: frequencies.infer_freq(s)) # DateTimeIndex for freq in ['M', 'L', 'S']: @@ -812,11 +826,12 @@ def test_legacy_offset_warnings(self): 'W@FRI', 'W@SAT', 'W@SUN', 'Q@JAN', 'Q@FEB', 'Q@MAR', 'A@JAN', 'A@FEB', 'A@MAR', 'A@APR', 'A@MAY', 'A@JUN', 'A@JUL', 'A@AUG', 'A@SEP', 'A@OCT', 'A@NOV', 'A@DEC', - 'WOM@1MON', 'WOM@2MON', 'WOM@3MON', 'WOM@4MON', - 'WOM@1TUE', 'WOM@2TUE', 'WOM@3TUE', 'WOM@4TUE', - 'WOM@1WED', 'WOM@2WED', 'WOM@3WED', 'WOM@4WED', - 'WOM@1THU', 'WOM@2THU', 'WOM@3THU', 'WOM@4THU' - 'WOM@1FRI', 'WOM@2FRI', 'WOM@3FRI', 'WOM@4FRI'] + 'Y@JAN', 'WOM@1MON', 'WOM@2MON', 'WOM@3MON', + 'WOM@4MON', 'WOM@1TUE', 'WOM@2TUE', 'WOM@3TUE', + 'WOM@4TUE', 'WOM@1WED', 'WOM@2WED', 'WOM@3WED', + 'WOM@4WED', 'WOM@1THU', 'WOM@2THU', 'WOM@3THU', + 'WOM@4THU', 'WOM@1FRI', 'WOM@2FRI', 'WOM@3FRI', + 'WOM@4FRI'] msg = frequencies._INVALID_FREQ_ERROR for freq in freqs: diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index c5f6c00a4005a..5c3c90520d1c3 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -422,6 +422,27 @@ def get_period_alias(offset_str): return _offset_to_period_map.get(offset_str, None) +_pure_alias = { + # 'A' is equivalent to 'Y'. + 'Y': 'A', + 'YS': 'AS', + 'BY': 'BA', + 'BYS': 'BAS', + 'Y-DEC': 'A-DEC', + 'Y-JAN': 'A-JAN', + 'Y-FEB': 'A-FEB', + 'Y-MAR': 'A-MAR', + 'Y-APR': 'A-APR', + 'Y-MAY': 'A-MAY', + 'Y-JUN': 'A-JUN', + 'Y-JUL': 'A-JUL', + 'Y-AUG': 'A-AUG', + 'Y-SEP': 'A-SEP', + 'Y-OCT': 'A-OCT', + 'Y-NOV': 'A-NOV', +} + + _lite_rule_alias = { 'W': 'W-SUN', 'Q': 'Q-DEC', @@ -718,6 +739,7 @@ def get_standard_freq(freq): def _period_str_to_code(freqstr): + freqstr = _pure_alias.get(freqstr, freqstr) freqstr = _lite_rule_alias.get(freqstr, freqstr) if freqstr not in _dont_uppercase: From 7ffe7fc21f3dc4ca444de9c83dbf61313b6986e2 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Sun, 16 Jul 2017 02:57:14 -0700 Subject: [PATCH 777/933] Revert "ENH: Create a 'Y' alias for date_range yearly frequency" (#16976) This reverts commit 9c096d29a1e9a68b8151de4896b0d9684383821a, as it was prematurely made. --- pandas/tests/tseries/test_frequencies.py | 41 ++++++++---------------- pandas/tseries/frequencies.py | 22 ------------- 2 files changed, 13 insertions(+), 50 deletions(-) diff --git a/pandas/tests/tseries/test_frequencies.py b/pandas/tests/tseries/test_frequencies.py index 4bcd0b49db7e0..54d12317b0bf8 100644 --- a/pandas/tests/tseries/test_frequencies.py +++ b/pandas/tests/tseries/test_frequencies.py @@ -248,10 +248,9 @@ def test_anchored_shortcuts(self): # ensure invalid cases fail as expected invalid_anchors = ['SM-0', 'SM-28', 'SM-29', - 'SM-FOO', 'BSM', 'SM--1', + 'SM-FOO', 'BSM', 'SM--1' 'SMS-1', 'SMS-28', 'SMS-30', - 'SMS-BAR', 'SMS-BYR' 'BSMS', - 'SMS--2'] + 'SMS-BAR', 'BSMS', 'SMS--2'] for invalid_anchor in invalid_anchors: with tm.assert_raises_regex(ValueError, 'Invalid frequency: '): @@ -293,15 +292,11 @@ def test_get_rule_month(): result = frequencies._get_rule_month('A-DEC') assert (result == 'DEC') - result = frequencies._get_rule_month('Y-DEC') - assert (result == 'DEC') result = frequencies._get_rule_month(offsets.YearEnd()) assert (result == 'DEC') result = frequencies._get_rule_month('A-MAY') assert (result == 'MAY') - result = frequencies._get_rule_month('Y-MAY') - assert (result == 'MAY') result = frequencies._get_rule_month(offsets.YearEnd(month=5)) assert (result == 'MAY') @@ -310,10 +305,6 @@ def test_period_str_to_code(): assert (frequencies._period_str_to_code('A') == 1000) assert (frequencies._period_str_to_code('A-DEC') == 1000) assert (frequencies._period_str_to_code('A-JAN') == 1001) - assert (frequencies._period_str_to_code('Y') == 1000) - assert (frequencies._period_str_to_code('Y-DEC') == 1000) - assert (frequencies._period_str_to_code('Y-JAN') == 1001) - assert (frequencies._period_str_to_code('Q') == 2000) assert (frequencies._period_str_to_code('Q-DEC') == 2000) assert (frequencies._period_str_to_code('Q-FEB') == 2002) @@ -358,10 +349,6 @@ def test_freq_code(self): assert frequencies.get_freq('3A') == 1000 assert frequencies.get_freq('-1A') == 1000 - assert frequencies.get_freq('Y') == 1000 - assert frequencies.get_freq('3Y') == 1000 - assert frequencies.get_freq('-1Y') == 1000 - assert frequencies.get_freq('W') == 4000 assert frequencies.get_freq('W-MON') == 4001 assert frequencies.get_freq('W-FRI') == 4005 @@ -382,13 +369,6 @@ def test_freq_group(self): assert frequencies.get_freq_group('-1A') == 1000 assert frequencies.get_freq_group('A-JAN') == 1000 assert frequencies.get_freq_group('A-MAY') == 1000 - - assert frequencies.get_freq_group('Y') == 1000 - assert frequencies.get_freq_group('3Y') == 1000 - assert frequencies.get_freq_group('-1Y') == 1000 - assert frequencies.get_freq_group('Y-JAN') == 1000 - assert frequencies.get_freq_group('Y-MAY') == 1000 - assert frequencies.get_freq_group(offsets.YearEnd()) == 1000 assert frequencies.get_freq_group(offsets.YearEnd(month=1)) == 1000 assert frequencies.get_freq_group(offsets.YearEnd(month=5)) == 1000 @@ -810,6 +790,12 @@ def test_series(self): for freq in [None, 'L']: s = Series(period_range('2013', periods=10, freq=freq)) pytest.raises(TypeError, lambda: frequencies.infer_freq(s)) + for freq in ['Y']: + + msg = frequencies._INVALID_FREQ_ERROR + with tm.assert_raises_regex(ValueError, msg): + s = Series(period_range('2013', periods=10, freq=freq)) + pytest.raises(TypeError, lambda: frequencies.infer_freq(s)) # DateTimeIndex for freq in ['M', 'L', 'S']: @@ -826,12 +812,11 @@ def test_legacy_offset_warnings(self): 'W@FRI', 'W@SAT', 'W@SUN', 'Q@JAN', 'Q@FEB', 'Q@MAR', 'A@JAN', 'A@FEB', 'A@MAR', 'A@APR', 'A@MAY', 'A@JUN', 'A@JUL', 'A@AUG', 'A@SEP', 'A@OCT', 'A@NOV', 'A@DEC', - 'Y@JAN', 'WOM@1MON', 'WOM@2MON', 'WOM@3MON', - 'WOM@4MON', 'WOM@1TUE', 'WOM@2TUE', 'WOM@3TUE', - 'WOM@4TUE', 'WOM@1WED', 'WOM@2WED', 'WOM@3WED', - 'WOM@4WED', 'WOM@1THU', 'WOM@2THU', 'WOM@3THU', - 'WOM@4THU', 'WOM@1FRI', 'WOM@2FRI', 'WOM@3FRI', - 'WOM@4FRI'] + 'WOM@1MON', 'WOM@2MON', 'WOM@3MON', 'WOM@4MON', + 'WOM@1TUE', 'WOM@2TUE', 'WOM@3TUE', 'WOM@4TUE', + 'WOM@1WED', 'WOM@2WED', 'WOM@3WED', 'WOM@4WED', + 'WOM@1THU', 'WOM@2THU', 'WOM@3THU', 'WOM@4THU' + 'WOM@1FRI', 'WOM@2FRI', 'WOM@3FRI', 'WOM@4FRI'] msg = frequencies._INVALID_FREQ_ERROR for freq in freqs: diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index 5c3c90520d1c3..c5f6c00a4005a 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -422,27 +422,6 @@ def get_period_alias(offset_str): return _offset_to_period_map.get(offset_str, None) -_pure_alias = { - # 'A' is equivalent to 'Y'. - 'Y': 'A', - 'YS': 'AS', - 'BY': 'BA', - 'BYS': 'BAS', - 'Y-DEC': 'A-DEC', - 'Y-JAN': 'A-JAN', - 'Y-FEB': 'A-FEB', - 'Y-MAR': 'A-MAR', - 'Y-APR': 'A-APR', - 'Y-MAY': 'A-MAY', - 'Y-JUN': 'A-JUN', - 'Y-JUL': 'A-JUL', - 'Y-AUG': 'A-AUG', - 'Y-SEP': 'A-SEP', - 'Y-OCT': 'A-OCT', - 'Y-NOV': 'A-NOV', -} - - _lite_rule_alias = { 'W': 'W-SUN', 'Q': 'Q-DEC', @@ -739,7 +718,6 @@ def get_standard_freq(freq): def _period_str_to_code(freqstr): - freqstr = _pure_alias.get(freqstr, freqstr) freqstr = _lite_rule_alias.get(freqstr, freqstr) if freqstr not in _dont_uppercase: From 1d1c03ef807b5ea3cd589b60ea578c88a0c1227c Mon Sep 17 00:00:00 2001 From: Pietro Battiston Date: Sun, 16 Jul 2017 17:23:30 +0200 Subject: [PATCH 778/933] DOC: behavior when slicing with missing bounds (#16932) closes #16917 --- doc/source/indexing.rst | 41 +++++++++++++++++++++++++++++++++++++---- 1 file changed, 37 insertions(+), 4 deletions(-) diff --git a/doc/source/indexing.rst b/doc/source/indexing.rst index f988fb7cd6806..1659d57b33b84 100644 --- a/doc/source/indexing.rst +++ b/doc/source/indexing.rst @@ -78,8 +78,10 @@ of multi-axis indexing. *label* of the index. This use is **not** an integer position along the index) - A list or array of labels ``['a', 'b', 'c']`` - - A slice object with labels ``'a':'f'``, (note that contrary to usual python - slices, **both** the start and the stop are included!) + - A slice object with labels ``'a':'f'`` (note that contrary to usual python + slices, **both** the start and the stop are included, when present in the + index! - also see :ref:`Slicing with labels + `) - A boolean array - A ``callable`` function with one argument (the calling Series, DataFrame or Panel) and that returns valid output for indexing (one of the above) @@ -330,13 +332,16 @@ Selection By Label dfl.loc['20130102':'20130104'] pandas provides a suite of methods in order to have **purely label based indexing**. This is a strict inclusion based protocol. -**At least 1** of the labels for which you ask, must be in the index or a ``KeyError`` will be raised! When slicing, the start bound is *included*, **AND** the stop bound is *included*. Integers are valid labels, but they refer to the label **and not the position**. +**At least 1** of the labels for which you ask, must be in the index or a ``KeyError`` will be raised! When slicing, both the start bound **AND** the stop bound are *included*, if present in the index. Integers are valid labels, but they refer to the label **and not the position**. The ``.loc`` attribute is the primary access method. The following are valid inputs: - A single label, e.g. ``5`` or ``'a'``, (note that ``5`` is interpreted as a *label* of the index. This use is **not** an integer position along the index) - A list or array of labels ``['a', 'b', 'c']`` -- A slice object with labels ``'a':'f'`` (note that contrary to usual python slices, **both** the start and the stop are included!) +- A slice object with labels ``'a':'f'`` (note that contrary to usual python + slices, **both** the start and the stop are included, when present in the + index! - also See :ref:`Slicing with labels + `) - A boolean array - A ``callable``, see :ref:`Selection By Callable ` @@ -390,6 +395,34 @@ For getting a value explicitly (equiv to deprecated ``df.get_value('a','A')``) # this is also equivalent to ``df1.at['a','A']`` df1.loc['a', 'A'] +.. _indexing.slicing_with_labels: + +Slicing with labels +~~~~~~~~~~~~~~~~~~~ + +When using ``.loc`` with slices, if both the start and the stop labels are +present in the index, then elements *located* between the two (including them) +are returned: + +.. ipython:: python + + s = pd.Series(list('abcde'), index=[0,3,2,5,4]) + s.loc[3:5] + +If at least one of the two is absent, but the index is sorted, and can be +compared against start and stop labels, then slicing will still work as +expected, by selecting labels which *rank* between the two: + +.. ipython:: python + + s.sort_index() + s.sort_index().loc[1:6] + +However, if at least one of the two is absent *and* the index is not sorted, an +error will be raised (since doing otherwise would be computationally expensive, +as well as potentially ambiguous for mixed type indexes). For instance, in the +above example, ``s.loc[1:6]`` would raise ``KeyError``. + .. _indexing.integer: Selection By Position From 745c01265e31afb9048fe461dfd8c88ad2606702 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Sun, 16 Jul 2017 08:31:12 -0700 Subject: [PATCH 779/933] TST: Add test for sub-char in read_csv (#16977) Closes gh-16893. --- pandas/tests/io/parser/common.py | 10 ++++++++++ pandas/tests/io/parser/data/sub_char.csv | 2 ++ 2 files changed, 12 insertions(+) create mode 100644 pandas/tests/io/parser/data/sub_char.csv diff --git a/pandas/tests/io/parser/common.py b/pandas/tests/io/parser/common.py index 584a6561b505b..4d1f9936af983 100644 --- a/pandas/tests/io/parser/common.py +++ b/pandas/tests/io/parser/common.py @@ -1677,6 +1677,16 @@ def test_internal_eof_byte_to_file(self): result = self.read_csv(path) tm.assert_frame_equal(result, expected) + def test_sub_character(self): + # see gh-16893 + dirpath = tm.get_data_path() + filename = os.path.join(dirpath, "sub_char.csv") + + expected = DataFrame([[1, 2, 3]], columns=["a", "\x1ab", "c"]) + result = self.read_csv(filename) + + tm.assert_frame_equal(result, expected) + def test_file_handles(self): # GH 14418 - don't close user provided file handles diff --git a/pandas/tests/io/parser/data/sub_char.csv b/pandas/tests/io/parser/data/sub_char.csv new file mode 100644 index 0000000000000..ff1fa777832c7 --- /dev/null +++ b/pandas/tests/io/parser/data/sub_char.csv @@ -0,0 +1,2 @@ +a,"b",c +1,2,3 \ No newline at end of file From cbd0354d024d6d45c67fceab69f908eb51339f70 Mon Sep 17 00:00:00 2001 From: rdk1024 Date: Sun, 16 Jul 2017 05:32:45 -1000 Subject: [PATCH 780/933] DEPR: deprecate html.border option (#16970) --- doc/source/options.rst | 2 +- doc/source/whatsnew/v0.21.0.txt | 1 + pandas/core/config_init.py | 22 ++++++++++++++++------ pandas/io/formats/format.py | 2 +- pandas/tests/io/formats/test_to_html.py | 7 ++++++- 5 files changed, 25 insertions(+), 9 deletions(-) diff --git a/doc/source/options.rst b/doc/source/options.rst index 6ff5b76014c95..f373705a96f48 100644 --- a/doc/source/options.rst +++ b/doc/source/options.rst @@ -400,7 +400,7 @@ display.width 80 Width of the display in charact display.html.table_schema False Whether to publish a Table Schema representation for frontends that support it. -html.border 1 A ``border=value`` attribute is +display.html.border 1 A ``border=value`` attribute is inserted in the ```` tag for the DataFrame HTML repr. io.excel.xls.writer xlwt The default Excel writer engine for diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 762107a261090..7c52cf6f450b2 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -116,6 +116,7 @@ Deprecations ~~~~~~~~~~~~ - :func:`read_excel()` has deprecated ``sheetname`` in favor of ``sheet_name`` for consistency with ``.to_excel()`` (:issue:`10559`). +- ``pd.options.html.border`` has been deprecated in favor of ``pd.options.display.html.border`` (:issue:`15793`). .. _whatsnew_0210.prior_deprecations: diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py index 04563907582ee..ae3001564a62f 100644 --- a/pandas/core/config_init.py +++ b/pandas/core/config_init.py @@ -202,6 +202,17 @@ def use_numexpr_cb(key): (default: False) """ +pc_html_border_doc = """ +: int + A ``border=value`` attribute is inserted in the ``
`` tag + for the DataFrame HTML repr. +""" + +pc_html_border_deprecation_warning = """\ +html.border has been deprecated, use display.html.border instead +(currently both are identical) +""" + pc_line_width_deprecation_warning = """\ line_width has been deprecated, use display.width instead (currently both are identical) @@ -369,6 +380,8 @@ def table_schema_cb(key): validator=is_bool) cf.register_option('html.table_schema', False, pc_table_schema_doc, validator=is_bool, cb=table_schema_cb) + cf.register_option('html.border', 1, pc_html_border_doc, + validator=is_int) cf.deprecate_option('display.line_width', @@ -378,16 +391,13 @@ def table_schema_cb(key): cf.deprecate_option('display.height', msg=pc_height_deprecation_warning, rkey='display.max_rows') -pc_html_border_doc = """ -: int - A ``border=value`` attribute is inserted in the ``
`` tag - for the DataFrame HTML repr. -""" - with cf.config_prefix('html'): cf.register_option('border', 1, pc_html_border_doc, validator=is_int) +cf.deprecate_option('html.border', msg=pc_html_border_deprecation_warning, + rkey='display.html.border') + tc_sim_interactive_doc = """ : boolean diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 0627ca9179509..23eb3bb05fd0a 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -1064,7 +1064,7 @@ def __init__(self, formatter, classes=None, max_rows=None, max_cols=None, self.max_cols < len(self.fmt.columns)) self.notebook = notebook if border is None: - border = get_option('html.border') + border = get_option('display.html.border') self.border = border def write(self, s, indent=0): diff --git a/pandas/tests/io/formats/test_to_html.py b/pandas/tests/io/formats/test_to_html.py index 9f4e532ec2287..1e174c34221d5 100644 --- a/pandas/tests/io/formats/test_to_html.py +++ b/pandas/tests/io/formats/test_to_html.py @@ -1401,7 +1401,7 @@ def test_to_html_border(self): def test_to_html_border_option(self): df = DataFrame({'A': [1, 2]}) - with pd.option_context('html.border', 0): + with pd.option_context('display.html.border', 0): result = df.to_html() assert 'border="0"' in result assert 'border="0"' in df._repr_html_() @@ -1411,6 +1411,11 @@ def test_to_html_border_zero(self): result = df.to_html(border=0) assert 'border="0"' in result + def test_display_option_warning(self): + with tm.assert_produces_warning(DeprecationWarning, + check_stacklevel=False): + pd.options.html.border + def test_to_html(self): # big mixed biggie = DataFrame({'A': np.random.randn(200), From 692b5eeeff9b8e8c750f3e64db0c39dc149a73e8 Mon Sep 17 00:00:00 2001 From: fding253 Date: Sun, 16 Jul 2017 10:55:33 -0500 Subject: [PATCH 781/933] DOC: document convention argument for resample() (#16965) * DOC: document convention argument for resample() --- pandas/core/generic.py | 43 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index a4bb746722c1e..e4e2e0093b1a6 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -4826,6 +4826,8 @@ def resample(self, rule, how=None, axis=0, fill_method=None, closed=None, label : {'right', 'left'} Which bin edge label to label bucket with convention : {'start', 'end', 's', 'e'} + For PeriodIndex only, controls whether to use the start or end of + `rule` loffset : timedelta Adjust the resampled time labels base : int, default 0 @@ -4946,6 +4948,47 @@ def resample(self, rule, how=None, axis=0, fill_method=None, closed=None, 2000-01-01 00:06:00 26 Freq: 3T, dtype: int64 + For a Series with a PeriodIndex, the keyword `convention` can be + used to control whether to use the start or end of `rule`. + + >>> s = pd.Series([1, 2], index=pd.period_range('2012-01-01', + freq='A', + periods=2)) + >>> s + 2012 1 + 2013 2 + Freq: A-DEC, dtype: int64 + + Resample by month using 'start' `convention`. Values are assigned to + the first month of the period. + + >>> s.resample('M', convention='start').asfreq().head() + 2012-01 1.0 + 2012-02 NaN + 2012-03 NaN + 2012-04 NaN + 2012-05 NaN + Freq: M, dtype: float64 + + Resample by month using 'end' `convention`. Values are assigned to + the last month of the period. + + >>> s.resample('M', convention='end').asfreq() + 2012-12 1.0 + 2013-01 NaN + 2013-02 NaN + 2013-03 NaN + 2013-04 NaN + 2013-05 NaN + 2013-06 NaN + 2013-07 NaN + 2013-08 NaN + 2013-09 NaN + 2013-10 NaN + 2013-11 NaN + 2013-12 2.0 + Freq: M, dtype: float64 + For DataFrame objects, the keyword ``on`` can be used to specify the column instead of the index for resampling. From ea487fc9b197285f25b066450c46fc456db09e2a Mon Sep 17 00:00:00 2001 From: gfyoung Date: Sun, 16 Jul 2017 23:19:27 -0700 Subject: [PATCH 782/933] DOC: Clarify 'it' in aggregate doc (#16989) Closes gh-16988. --- pandas/core/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index e4e2e0093b1a6..f12592feaa4c3 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3152,7 +3152,7 @@ def pipe(self, func, *args, **kwargs): (e.g., np.mean(arr_2d, axis=0)) as opposed to mimicking the default Numpy behavior (e.g., np.mean(arr_2d)). - agg is an alias for aggregate. Use it. + `agg` is an alias for `aggregate`. Use the alias. Returns ------- From ec927a47e472eebb5ba7086dcc15f3dda1c832cd Mon Sep 17 00:00:00 2001 From: cclauss Date: Mon, 17 Jul 2017 14:59:14 +0200 Subject: [PATCH 783/933] CLN/COMPAT: for various py2/py3 in doc/bench scripts (#16984) --- asv_bench/vbench_to_asv.py | 6 +- bench/alignment.py | 22 -- bench/bench_dense_to_sparse.py | 14 - bench/bench_get_put_value.py | 56 ---- bench/bench_groupby.py | 66 ----- bench/bench_join_panel.py | 85 ------ bench/bench_khash_dict.py | 89 ------ bench/bench_merge.R | 161 ---------- bench/bench_merge.py | 105 ------- bench/bench_merge_sqlite.py | 87 ------ bench/bench_pivot.R | 27 -- bench/bench_pivot.py | 16 - bench/bench_take_indexing.py | 55 ---- bench/bench_unique.py | 278 ------------------ bench/bench_with_subset.R | 53 ---- bench/bench_with_subset.py | 116 -------- bench/better_unique.py | 80 ----- bench/duplicated.R | 22 -- bench/io_roundtrip.py | 116 -------- bench/larry.py | 0 bench/serialize.py | 89 ------ bench/test.py | 70 ----- bench/zoo_bench.R | 71 ----- bench/zoo_bench.py | 36 --- doc/source/conf.py | 5 + .../ipython_sphinxext/ipython_directive.py | 4 +- scripts/find_commits_touching_func.py | 10 +- scripts/windows_builder/build_27-32.bat | 25 -- scripts/windows_builder/build_27-64.bat | 25 -- scripts/windows_builder/build_34-32.bat | 27 -- scripts/windows_builder/build_34-64.bat | 27 -- scripts/windows_builder/check_and_build.bat | 2 - scripts/windows_builder/check_and_build.py | 194 ------------ scripts/windows_builder/readme.txt | 17 -- 34 files changed, 14 insertions(+), 2042 deletions(-) delete mode 100644 bench/alignment.py delete mode 100644 bench/bench_dense_to_sparse.py delete mode 100644 bench/bench_get_put_value.py delete mode 100644 bench/bench_groupby.py delete mode 100644 bench/bench_join_panel.py delete mode 100644 bench/bench_khash_dict.py delete mode 100644 bench/bench_merge.R delete mode 100644 bench/bench_merge.py delete mode 100644 bench/bench_merge_sqlite.py delete mode 100644 bench/bench_pivot.R delete mode 100644 bench/bench_pivot.py delete mode 100644 bench/bench_take_indexing.py delete mode 100644 bench/bench_unique.py delete mode 100644 bench/bench_with_subset.R delete mode 100644 bench/bench_with_subset.py delete mode 100644 bench/better_unique.py delete mode 100644 bench/duplicated.R delete mode 100644 bench/io_roundtrip.py delete mode 100644 bench/larry.py delete mode 100644 bench/serialize.py delete mode 100644 bench/test.py delete mode 100644 bench/zoo_bench.R delete mode 100644 bench/zoo_bench.py delete mode 100644 scripts/windows_builder/build_27-32.bat delete mode 100644 scripts/windows_builder/build_27-64.bat delete mode 100644 scripts/windows_builder/build_34-32.bat delete mode 100644 scripts/windows_builder/build_34-64.bat delete mode 100644 scripts/windows_builder/check_and_build.bat delete mode 100644 scripts/windows_builder/check_and_build.py delete mode 100644 scripts/windows_builder/readme.txt diff --git a/asv_bench/vbench_to_asv.py b/asv_bench/vbench_to_asv.py index c3041ec2b1ba1..2a4ce5d183ea2 100644 --- a/asv_bench/vbench_to_asv.py +++ b/asv_bench/vbench_to_asv.py @@ -114,7 +114,7 @@ def translate_module(target_module): l_vars = {} exec('import ' + target_module) in g_vars - print target_module + print(target_module) module = eval(target_module, g_vars) benchmarks = [] @@ -157,7 +157,7 @@ def translate_module(target_module): mod = os.path.basename(module) if mod in ['make.py', 'measure_memory_consumption.py', 'perf_HEAD.py', 'run_suite.py', 'test_perf.py', 'generate_rst_files.py', 'test.py', 'suite.py']: continue - print - print mod + print('') + print(mod) translate_module(mod.replace('.py', '')) diff --git a/bench/alignment.py b/bench/alignment.py deleted file mode 100644 index bc3134f597ee0..0000000000000 --- a/bench/alignment.py +++ /dev/null @@ -1,22 +0,0 @@ -# Setup -from pandas.compat import range, lrange -import numpy as np -import pandas -import la -N = 1000 -K = 50 -arr1 = np.random.randn(N, K) -arr2 = np.random.randn(N, K) -idx1 = lrange(N) -idx2 = lrange(K) - -# pandas -dma1 = pandas.DataFrame(arr1, idx1, idx2) -dma2 = pandas.DataFrame(arr2, idx1[::-1], idx2[::-1]) - -# larry -lar1 = la.larry(arr1, [idx1, idx2]) -lar2 = la.larry(arr2, [idx1[::-1], idx2[::-1]]) - -for i in range(100): - result = lar1 + lar2 diff --git a/bench/bench_dense_to_sparse.py b/bench/bench_dense_to_sparse.py deleted file mode 100644 index e1dcd3456e88d..0000000000000 --- a/bench/bench_dense_to_sparse.py +++ /dev/null @@ -1,14 +0,0 @@ -from pandas import * - -K = 100 -N = 100000 -rng = DatetimeIndex('1/1/2000', periods=N, offset=datetools.Minute()) - -rng2 = np.asarray(rng).astype('M8[us]').astype('i8') - -series = {} -for i in range(1, K + 1): - data = np.random.randn(N)[:-i] - this_rng = rng2[:-i] - data[100:] = np.nan - series[i] = SparseSeries(data, index=this_rng) diff --git a/bench/bench_get_put_value.py b/bench/bench_get_put_value.py deleted file mode 100644 index 427e0b1b10a22..0000000000000 --- a/bench/bench_get_put_value.py +++ /dev/null @@ -1,56 +0,0 @@ -from pandas import * -from pandas.util.testing import rands -from pandas.compat import range - -N = 1000 -K = 50 - - -def _random_index(howmany): - return Index([rands(10) for _ in range(howmany)]) - -df = DataFrame(np.random.randn(N, K), index=_random_index(N), - columns=_random_index(K)) - - -def get1(): - for col in df.columns: - for row in df.index: - _ = df[col][row] - - -def get2(): - for col in df.columns: - for row in df.index: - _ = df.get_value(row, col) - - -def put1(): - for col in df.columns: - for row in df.index: - df[col][row] = 0 - - -def put2(): - for col in df.columns: - for row in df.index: - df.set_value(row, col, 0) - - -def resize1(): - buf = DataFrame() - for col in df.columns: - for row in df.index: - buf = buf.set_value(row, col, 5.) - return buf - - -def resize2(): - from collections import defaultdict - - buf = defaultdict(dict) - for col in df.columns: - for row in df.index: - buf[col][row] = 5. - - return DataFrame(buf) diff --git a/bench/bench_groupby.py b/bench/bench_groupby.py deleted file mode 100644 index d7a2853e1e7b2..0000000000000 --- a/bench/bench_groupby.py +++ /dev/null @@ -1,66 +0,0 @@ -from pandas import * -from pandas.util.testing import rands -from pandas.compat import range - -import string -import random - -k = 20000 -n = 10 - -foo = np.tile(np.array([rands(10) for _ in range(k)], dtype='O'), n) -foo2 = list(foo) -random.shuffle(foo) -random.shuffle(foo2) - -df = DataFrame({'A': foo, - 'B': foo2, - 'C': np.random.randn(n * k)}) - -import pandas._sandbox as sbx - - -def f(): - table = sbx.StringHashTable(len(df)) - ret = table.factorize(df['A']) - return ret - - -def g(): - table = sbx.PyObjectHashTable(len(df)) - ret = table.factorize(df['A']) - return ret - -ret = f() - -""" -import pandas._tseries as lib - -f = np.std - - -grouped = df.groupby(['A', 'B']) - -label_list = [ping.labels for ping in grouped.groupings] -shape = [len(ping.ids) for ping in grouped.groupings] - -from pandas.core.groupby import get_group_index - - -group_index = get_group_index(label_list, shape, - sort=True, xnull=True).astype('i4') - -ngroups = np.prod(shape) - -indexer = lib.groupsort_indexer(group_index, ngroups) - -values = df['C'].values.take(indexer) -group_index = group_index.take(indexer) - -f = lambda x: x.std(ddof=1) - -grouper = lib.Grouper(df['C'], np.ndarray.std, group_index, ngroups) -result = grouper.get_result() - -expected = grouped.std() -""" diff --git a/bench/bench_join_panel.py b/bench/bench_join_panel.py deleted file mode 100644 index f3c3f8ba15f70..0000000000000 --- a/bench/bench_join_panel.py +++ /dev/null @@ -1,85 +0,0 @@ -# reasonably efficient - - -def create_panels_append(cls, panels): - """ return an append list of panels """ - panels = [a for a in panels if a is not None] - # corner cases - if len(panels) == 0: - return None - elif len(panels) == 1: - return panels[0] - elif len(panels) == 2 and panels[0] == panels[1]: - return panels[0] - # import pdb; pdb.set_trace() - # create a joint index for the axis - - def joint_index_for_axis(panels, axis): - s = set() - for p in panels: - s.update(list(getattr(p, axis))) - return sorted(list(s)) - - def reindex_on_axis(panels, axis, axis_reindex): - new_axis = joint_index_for_axis(panels, axis) - new_panels = [p.reindex(**{axis_reindex: new_axis, - 'copy': False}) for p in panels] - return new_panels, new_axis - # create the joint major index, dont' reindex the sub-panels - we are - # appending - major = joint_index_for_axis(panels, 'major_axis') - # reindex on minor axis - panels, minor = reindex_on_axis(panels, 'minor_axis', 'minor') - # reindex on items - panels, items = reindex_on_axis(panels, 'items', 'items') - # concatenate values - try: - values = np.concatenate([p.values for p in panels], axis=1) - except Exception as detail: - raise Exception("cannot append values that dont' match dimensions! -> [%s] %s" - % (','.join(["%s" % p for p in panels]), str(detail))) - # pm('append - create_panel') - p = Panel(values, items=items, major_axis=major, - minor_axis=minor) - # pm('append - done') - return p - - -# does the job but inefficient (better to handle like you read a table in -# pytables...e.g create a LongPanel then convert to Wide) -def create_panels_join(cls, panels): - """ given an array of panels's, create a single panel """ - panels = [a for a in panels if a is not None] - # corner cases - if len(panels) == 0: - return None - elif len(panels) == 1: - return panels[0] - elif len(panels) == 2 and panels[0] == panels[1]: - return panels[0] - d = dict() - minor, major, items = set(), set(), set() - for panel in panels: - items.update(panel.items) - major.update(panel.major_axis) - minor.update(panel.minor_axis) - values = panel.values - for item, item_index in panel.items.indexMap.items(): - for minor_i, minor_index in panel.minor_axis.indexMap.items(): - for major_i, major_index in panel.major_axis.indexMap.items(): - try: - d[(minor_i, major_i, item)] = values[item_index, major_index, minor_index] - except: - pass - # stack the values - minor = sorted(list(minor)) - major = sorted(list(major)) - items = sorted(list(items)) - # create the 3d stack (items x columns x indicies) - data = np.dstack([np.asarray([np.asarray([d.get((minor_i, major_i, item), np.nan) - for item in items]) - for major_i in major]).transpose() - for minor_i in minor]) - # construct the panel - return Panel(data, items, major, minor) -add_class_method(Panel, create_panels_join, 'join_many') diff --git a/bench/bench_khash_dict.py b/bench/bench_khash_dict.py deleted file mode 100644 index 054fc36131b65..0000000000000 --- a/bench/bench_khash_dict.py +++ /dev/null @@ -1,89 +0,0 @@ -""" -Some comparisons of khash.h to Python dict -""" -from __future__ import print_function - -import numpy as np -import os - -from vbench.api import Benchmark -from pandas.util.testing import rands -from pandas.compat import range -import pandas._tseries as lib -import pandas._sandbox as sbx -import time - -import psutil - -pid = os.getpid() -proc = psutil.Process(pid) - - -def object_test_data(n): - pass - - -def string_test_data(n): - return np.array([rands(10) for _ in range(n)], dtype='O') - - -def int_test_data(n): - return np.arange(n, dtype='i8') - -N = 1000000 - -#---------------------------------------------------------------------- -# Benchmark 1: map_locations - - -def map_locations_python_object(): - arr = string_test_data(N) - return _timeit(lambda: lib.map_indices_object(arr)) - - -def map_locations_khash_object(): - arr = string_test_data(N) - - def f(): - table = sbx.PyObjectHashTable(len(arr)) - table.map_locations(arr) - return _timeit(f) - - -def _timeit(f, iterations=10): - start = time.time() - for _ in range(iterations): - foo = f() - elapsed = time.time() - start - return elapsed - -#---------------------------------------------------------------------- -# Benchmark 2: lookup_locations - - -def lookup_python(values): - table = lib.map_indices_object(values) - return _timeit(lambda: lib.merge_indexer_object(values, table)) - - -def lookup_khash(values): - table = sbx.PyObjectHashTable(len(values)) - table.map_locations(values) - locs = table.lookup_locations(values) - # elapsed = _timeit(lambda: table.lookup_locations2(values)) - return table - - -def leak(values): - for _ in range(100): - print(proc.get_memory_info()) - table = lookup_khash(values) - # table.destroy() - -arr = string_test_data(N) - -#---------------------------------------------------------------------- -# Benchmark 3: unique - -#---------------------------------------------------------------------- -# Benchmark 4: factorize diff --git a/bench/bench_merge.R b/bench/bench_merge.R deleted file mode 100644 index 3ed4618494857..0000000000000 --- a/bench/bench_merge.R +++ /dev/null @@ -1,161 +0,0 @@ -library(plyr) -library(data.table) -N <- 10000 -indices = rep(NA, N) -indices2 = rep(NA, N) -for (i in 1:N) { - indices[i] <- paste(sample(letters, 10), collapse="") - indices2[i] <- paste(sample(letters, 10), collapse="") -} -left <- data.frame(key=rep(indices[1:8000], 10), - key2=rep(indices2[1:8000], 10), - value=rnorm(80000)) -right <- data.frame(key=indices[2001:10000], - key2=indices2[2001:10000], - value2=rnorm(8000)) - -right2 <- data.frame(key=rep(right$key, 2), - key2=rep(right$key2, 2), - value2=rnorm(16000)) - -left.dt <- data.table(left, key=c("key", "key2")) -right.dt <- data.table(right, key=c("key", "key2")) -right2.dt <- data.table(right2, key=c("key", "key2")) - -# left.dt2 <- data.table(left) -# right.dt2 <- data.table(right) - -## left <- data.frame(key=rep(indices[1:1000], 10), -## key2=rep(indices2[1:1000], 10), -## value=rnorm(100000)) -## right <- data.frame(key=indices[1:1000], -## key2=indices2[1:1000], -## value2=rnorm(10000)) - -timeit <- function(func, niter=10) { - timing = rep(NA, niter) - for (i in 1:niter) { - gc() - timing[i] <- system.time(func())[3] - } - mean(timing) -} - -left.join <- function(sort=FALSE) { - result <- base::merge(left, right, all.x=TRUE, sort=sort) -} - -right.join <- function(sort=FALSE) { - result <- base::merge(left, right, all.y=TRUE, sort=sort) -} - -outer.join <- function(sort=FALSE) { - result <- base::merge(left, right, all=TRUE, sort=sort) -} - -inner.join <- function(sort=FALSE) { - result <- base::merge(left, right, all=FALSE, sort=sort) -} - -left.join.dt <- function(sort=FALSE) { - result <- right.dt[left.dt] -} - -right.join.dt <- function(sort=FALSE) { - result <- left.dt[right.dt] -} - -outer.join.dt <- function(sort=FALSE) { - result <- merge(left.dt, right.dt, all=TRUE, sort=sort) -} - -inner.join.dt <- function(sort=FALSE) { - result <- merge(left.dt, right.dt, all=FALSE, sort=sort) -} - -plyr.join <- function(type) { - result <- plyr::join(left, right, by=c("key", "key2"), - type=type, match="first") -} - -sort.options <- c(FALSE, TRUE) - -# many-to-one - -results <- matrix(nrow=4, ncol=3) -colnames(results) <- c("base::merge", "plyr", "data.table") -rownames(results) <- c("inner", "outer", "left", "right") - -base.functions <- c(inner.join, outer.join, left.join, right.join) -plyr.functions <- c(function() plyr.join("inner"), - function() plyr.join("full"), - function() plyr.join("left"), - function() plyr.join("right")) -dt.functions <- c(inner.join.dt, outer.join.dt, left.join.dt, right.join.dt) -for (i in 1:4) { - base.func <- base.functions[[i]] - plyr.func <- plyr.functions[[i]] - dt.func <- dt.functions[[i]] - results[i, 1] <- timeit(base.func) - results[i, 2] <- timeit(plyr.func) - results[i, 3] <- timeit(dt.func) -} - - -# many-to-many - -left.join <- function(sort=FALSE) { - result <- base::merge(left, right2, all.x=TRUE, sort=sort) -} - -right.join <- function(sort=FALSE) { - result <- base::merge(left, right2, all.y=TRUE, sort=sort) -} - -outer.join <- function(sort=FALSE) { - result <- base::merge(left, right2, all=TRUE, sort=sort) -} - -inner.join <- function(sort=FALSE) { - result <- base::merge(left, right2, all=FALSE, sort=sort) -} - -left.join.dt <- function(sort=FALSE) { - result <- right2.dt[left.dt] -} - -right.join.dt <- function(sort=FALSE) { - result <- left.dt[right2.dt] -} - -outer.join.dt <- function(sort=FALSE) { - result <- merge(left.dt, right2.dt, all=TRUE, sort=sort) -} - -inner.join.dt <- function(sort=FALSE) { - result <- merge(left.dt, right2.dt, all=FALSE, sort=sort) -} - -sort.options <- c(FALSE, TRUE) - -# many-to-one - -results <- matrix(nrow=4, ncol=3) -colnames(results) <- c("base::merge", "plyr", "data.table") -rownames(results) <- c("inner", "outer", "left", "right") - -base.functions <- c(inner.join, outer.join, left.join, right.join) -plyr.functions <- c(function() plyr.join("inner"), - function() plyr.join("full"), - function() plyr.join("left"), - function() plyr.join("right")) -dt.functions <- c(inner.join.dt, outer.join.dt, left.join.dt, right.join.dt) -for (i in 1:4) { - base.func <- base.functions[[i]] - plyr.func <- plyr.functions[[i]] - dt.func <- dt.functions[[i]] - results[i, 1] <- timeit(base.func) - results[i, 2] <- timeit(plyr.func) - results[i, 3] <- timeit(dt.func) -} - diff --git a/bench/bench_merge.py b/bench/bench_merge.py deleted file mode 100644 index 330dba7b9af69..0000000000000 --- a/bench/bench_merge.py +++ /dev/null @@ -1,105 +0,0 @@ -import random -import gc -import time -from pandas import * -from pandas.compat import range, lrange, StringIO -from pandas.util.testing import rands - -N = 10000 -ngroups = 10 - - -def get_test_data(ngroups=100, n=N): - unique_groups = lrange(ngroups) - arr = np.asarray(np.tile(unique_groups, n / ngroups), dtype=object) - - if len(arr) < n: - arr = np.asarray(list(arr) + unique_groups[:n - len(arr)], - dtype=object) - - random.shuffle(arr) - return arr - -# aggregate multiple columns -# df = DataFrame({'key1' : get_test_data(ngroups=ngroups), -# 'key2' : get_test_data(ngroups=ngroups), -# 'data1' : np.random.randn(N), -# 'data2' : np.random.randn(N)}) - -# df2 = DataFrame({'key1' : get_test_data(ngroups=ngroups, n=N//10), -# 'key2' : get_test_data(ngroups=ngroups//2, n=N//10), -# 'value' : np.random.randn(N // 10)}) -# result = merge.merge(df, df2, on='key2') - -N = 10000 - -indices = np.array([rands(10) for _ in range(N)], dtype='O') -indices2 = np.array([rands(10) for _ in range(N)], dtype='O') -key = np.tile(indices[:8000], 10) -key2 = np.tile(indices2[:8000], 10) - -left = DataFrame({'key': key, 'key2': key2, - 'value': np.random.randn(80000)}) -right = DataFrame({'key': indices[2000:], 'key2': indices2[2000:], - 'value2': np.random.randn(8000)}) - -right2 = right.append(right, ignore_index=True) - - -join_methods = ['inner', 'outer', 'left', 'right'] -results = DataFrame(index=join_methods, columns=[False, True]) -niter = 10 -for sort in [False, True]: - for join_method in join_methods: - f = lambda: merge(left, right, how=join_method, sort=sort) - gc.disable() - start = time.time() - for _ in range(niter): - f() - elapsed = (time.time() - start) / niter - gc.enable() - results[sort][join_method] = elapsed -# results.columns = ['pandas'] -results.columns = ['dont_sort', 'sort'] - - -# R results -# many to one -r_results = read_table(StringIO(""" base::merge plyr data.table -inner 0.2475 0.1183 0.1100 -outer 0.4213 0.1916 0.2090 -left 0.2998 0.1188 0.0572 -right 0.3102 0.0536 0.0376 -"""), sep='\s+') - -presults = results[['dont_sort']].rename(columns={'dont_sort': 'pandas'}) -all_results = presults.join(r_results) - -all_results = all_results.div(all_results['pandas'], axis=0) - -all_results = all_results.ix[:, ['pandas', 'data.table', 'plyr', - 'base::merge']] - -sort_results = DataFrame.from_items([('pandas', results['sort']), - ('R', r_results['base::merge'])]) -sort_results['Ratio'] = sort_results['R'] / sort_results['pandas'] - - -nosort_results = DataFrame.from_items([('pandas', results['dont_sort']), - ('R', r_results['base::merge'])]) -nosort_results['Ratio'] = nosort_results['R'] / nosort_results['pandas'] - -# many to many - -# many to one -r_results = read_table(StringIO("""base::merge plyr data.table -inner 0.4610 0.1276 0.1269 -outer 0.9195 0.1881 0.2725 -left 0.6559 0.1257 0.0678 -right 0.6425 0.0522 0.0428 -"""), sep='\s+') - -all_results = presults.join(r_results) -all_results = all_results.div(all_results['pandas'], axis=0) -all_results = all_results.ix[:, ['pandas', 'data.table', 'plyr', - 'base::merge']] diff --git a/bench/bench_merge_sqlite.py b/bench/bench_merge_sqlite.py deleted file mode 100644 index 3ad4b810119c3..0000000000000 --- a/bench/bench_merge_sqlite.py +++ /dev/null @@ -1,87 +0,0 @@ -import numpy as np -from collections import defaultdict -import gc -import time -from pandas import DataFrame -from pandas.util.testing import rands -from pandas.compat import range, zip -import random - -N = 10000 - -indices = np.array([rands(10) for _ in range(N)], dtype='O') -indices2 = np.array([rands(10) for _ in range(N)], dtype='O') -key = np.tile(indices[:8000], 10) -key2 = np.tile(indices2[:8000], 10) - -left = DataFrame({'key': key, 'key2': key2, - 'value': np.random.randn(80000)}) -right = DataFrame({'key': indices[2000:], 'key2': indices2[2000:], - 'value2': np.random.randn(8000)}) - -# right2 = right.append(right, ignore_index=True) -# right = right2 - -# random.shuffle(key2) -# indices2 = indices.copy() -# random.shuffle(indices2) - -# Prepare Database -import sqlite3 -create_sql_indexes = True - -conn = sqlite3.connect(':memory:') -conn.execute( - 'create table left( key varchar(10), key2 varchar(10), value int);') -conn.execute( - 'create table right( key varchar(10), key2 varchar(10), value2 int);') -conn.executemany('insert into left values (?, ?, ?)', - zip(key, key2, left['value'])) -conn.executemany('insert into right values (?, ?, ?)', - zip(right['key'], right['key2'], right['value2'])) - -# Create Indices -if create_sql_indexes: - conn.execute('create index left_ix on left(key, key2)') - conn.execute('create index right_ix on right(key, key2)') - - -join_methods = ['inner', 'left outer', 'left'] # others not supported -sql_results = DataFrame(index=join_methods, columns=[False]) -niter = 5 -for sort in [False]: - for join_method in join_methods: - sql = """CREATE TABLE test as select * - from left - %s join right - on left.key=right.key - and left.key2 = right.key2;""" % join_method - sql = """select * - from left - %s join right - on left.key=right.key - and left.key2 = right.key2;""" % join_method - - if sort: - sql = '%s order by key, key2' % sql - f = lambda: list(conn.execute(sql)) # list fetches results - g = lambda: conn.execute(sql) # list fetches results - gc.disable() - start = time.time() - # for _ in range(niter): - g() - elapsed = (time.time() - start) / niter - gc.enable() - - cur = conn.execute("DROP TABLE test") - conn.commit() - - sql_results[sort][join_method] = elapsed - sql_results.columns = ['sqlite3'] # ['dont_sort', 'sort'] - sql_results.index = ['inner', 'outer', 'left'] - - sql = """select * - from left - inner join right - on left.key=right.key - and left.key2 = right.key2;""" diff --git a/bench/bench_pivot.R b/bench/bench_pivot.R deleted file mode 100644 index 06dc6a105bc43..0000000000000 --- a/bench/bench_pivot.R +++ /dev/null @@ -1,27 +0,0 @@ -library(reshape2) - - -n <- 100000 -a.size <- 5 -b.size <- 5 - -data <- data.frame(a=sample(letters[1:a.size], n, replace=T), - b=sample(letters[1:b.size], n, replace=T), - c=rnorm(n), - d=rnorm(n)) - -timings <- numeric() - -# acast(melt(data, id=c("a", "b")), a ~ b, mean) -# acast(melt(data, id=c("a", "b")), a + b ~ variable, mean) - -for (i in 1:10) { - gc() - tim <- system.time(acast(melt(data, id=c("a", "b")), a ~ b, mean, - subset=.(variable=="c"))) - timings[i] = tim[3] -} - -mean(timings) - -acast(melt(data, id=c("a", "b")), a ~ b, mean, subset=.(variable="c")) diff --git a/bench/bench_pivot.py b/bench/bench_pivot.py deleted file mode 100644 index 007bd0aaebc2f..0000000000000 --- a/bench/bench_pivot.py +++ /dev/null @@ -1,16 +0,0 @@ -from pandas import * -import string - - -n = 100000 -asize = 5 -bsize = 5 - -letters = np.asarray(list(string.letters), dtype=object) - -data = DataFrame(dict(foo=letters[:asize][np.random.randint(0, asize, n)], - bar=letters[:bsize][np.random.randint(0, bsize, n)], - baz=np.random.randn(n), - qux=np.random.randn(n))) - -table = pivot_table(data, xby=['foo', 'bar']) diff --git a/bench/bench_take_indexing.py b/bench/bench_take_indexing.py deleted file mode 100644 index 5fb584bcfe45f..0000000000000 --- a/bench/bench_take_indexing.py +++ /dev/null @@ -1,55 +0,0 @@ -from __future__ import print_function -import numpy as np - -from pandas import * -import pandas._tseries as lib - -from pandas import DataFrame -import timeit -from pandas.compat import zip - -setup = """ -from pandas import Series -import pandas._tseries as lib -import random -import numpy as np - -import random -n = %d -k = %d -arr = np.random.randn(n, k) -indexer = np.arange(n, dtype=np.int32) -indexer = indexer[::-1] -""" - -sizes = [100, 1000, 10000, 100000] -iters = [1000, 1000, 100, 1] - -fancy_2d = [] -take_2d = [] -cython_2d = [] - -n = 1000 - - -def _timeit(stmt, size, k=5, iters=1000): - timer = timeit.Timer(stmt=stmt, setup=setup % (sz, k)) - return timer.timeit(n) / n - -for sz, its in zip(sizes, iters): - print(sz) - fancy_2d.append(_timeit('arr[indexer]', sz, iters=its)) - take_2d.append(_timeit('arr.take(indexer, axis=0)', sz, iters=its)) - cython_2d.append(_timeit('lib.take_axis0(arr, indexer)', sz, iters=its)) - -df = DataFrame({'fancy': fancy_2d, - 'take': take_2d, - 'cython': cython_2d}) - -print(df) - -from pandas.rpy.common import r -r('mat <- matrix(rnorm(50000), nrow=10000, ncol=5)') -r('set.seed(12345') -r('indexer <- sample(1:10000)') -r('mat[indexer,]') diff --git a/bench/bench_unique.py b/bench/bench_unique.py deleted file mode 100644 index 87bd2f2df586c..0000000000000 --- a/bench/bench_unique.py +++ /dev/null @@ -1,278 +0,0 @@ -from __future__ import print_function -from pandas import * -from pandas.util.testing import rands -from pandas.compat import range, zip -import pandas._tseries as lib -import numpy as np -import matplotlib.pyplot as plt - -N = 50000 -K = 10000 - -groups = np.array([rands(10) for _ in range(K)], dtype='O') -groups2 = np.array([rands(10) for _ in range(K)], dtype='O') - -labels = np.tile(groups, N // K) -labels2 = np.tile(groups2, N // K) -data = np.random.randn(N) - - -def timeit(f, niter): - import gc - import time - gc.disable() - start = time.time() - for _ in range(niter): - f() - elapsed = (time.time() - start) / niter - gc.enable() - return elapsed - - -def algo1(): - unique_labels = np.unique(labels) - result = np.empty(len(unique_labels)) - for i, label in enumerate(unique_labels): - result[i] = data[labels == label].sum() - - -def algo2(): - unique_labels = np.unique(labels) - indices = lib.groupby_indices(labels) - result = np.empty(len(unique_labels)) - - for i, label in enumerate(unique_labels): - result[i] = data.take(indices[label]).sum() - - -def algo3_nosort(): - rizer = lib.DictFactorizer() - labs, counts = rizer.factorize(labels, sort=False) - k = len(rizer.uniques) - out = np.empty(k) - lib.group_add(out, counts, data, labs) - - -def algo3_sort(): - rizer = lib.DictFactorizer() - labs, counts = rizer.factorize(labels, sort=True) - k = len(rizer.uniques) - out = np.empty(k) - lib.group_add(out, counts, data, labs) - -import numpy as np -import random - - -# dict to hold results -counts = {} - -# a hack to generate random key, value pairs. -# 5k keys, 100k values -x = np.tile(np.arange(5000, dtype='O'), 20) -random.shuffle(x) -xarr = x -x = [int(y) for y in x] -data = np.random.uniform(0, 1, 100000) - - -def f(): - # groupby sum - for k, v in zip(x, data): - try: - counts[k] += v - except KeyError: - counts[k] = v - - -def f2(): - rizer = lib.DictFactorizer() - labs, counts = rizer.factorize(xarr, sort=False) - k = len(rizer.uniques) - out = np.empty(k) - lib.group_add(out, counts, data, labs) - - -def algo4(): - rizer = lib.DictFactorizer() - labs1, _ = rizer.factorize(labels, sort=False) - k1 = len(rizer.uniques) - - rizer = lib.DictFactorizer() - labs2, _ = rizer.factorize(labels2, sort=False) - k2 = len(rizer.uniques) - - group_id = labs1 * k2 + labs2 - max_group = k1 * k2 - - if max_group > 1e6: - rizer = lib.Int64Factorizer(len(group_id)) - group_id, _ = rizer.factorize(group_id.astype('i8'), sort=True) - max_group = len(rizer.uniques) - - out = np.empty(max_group) - counts = np.zeros(max_group, dtype='i4') - lib.group_add(out, counts, data, group_id) - -# cumtime percall filename:lineno(function) -# 0.592 0.592 :1() - # 0.584 0.006 groupby_ex.py:37(algo3_nosort) - # 0.535 0.005 {method 'factorize' of DictFactorizer' objects} - # 0.047 0.000 {pandas._tseries.group_add} - # 0.002 0.000 numeric.py:65(zeros_like) - # 0.001 0.000 {method 'fill' of 'numpy.ndarray' objects} - # 0.000 0.000 {numpy.core.multiarray.empty_like} - # 0.000 0.000 {numpy.core.multiarray.empty} - -# UNIQUE timings - -# N = 10000000 -# K = 500000 - -# groups = np.array([rands(10) for _ in range(K)], dtype='O') - -# labels = np.tile(groups, N // K) -data = np.random.randn(N) - -data = np.random.randn(N) - -Ks = [100, 1000, 5000, 10000, 25000, 50000, 100000] - -# Ks = [500000, 1000000, 2500000, 5000000, 10000000] - -import psutil -import os -import gc - -pid = os.getpid() -proc = psutil.Process(pid) - - -def dict_unique(values, expected_K, sort=False, memory=False): - if memory: - gc.collect() - before_mem = proc.get_memory_info().rss - - rizer = lib.DictFactorizer() - result = rizer.unique_int64(values) - - if memory: - result = proc.get_memory_info().rss - before_mem - return result - - if sort: - result.sort() - assert(len(result) == expected_K) - return result - - -def khash_unique(values, expected_K, size_hint=False, sort=False, - memory=False): - if memory: - gc.collect() - before_mem = proc.get_memory_info().rss - - if size_hint: - rizer = lib.Factorizer(len(values)) - else: - rizer = lib.Factorizer(100) - - result = [] - result = rizer.unique(values) - - if memory: - result = proc.get_memory_info().rss - before_mem - return result - - if sort: - result.sort() - assert(len(result) == expected_K) - - -def khash_unique_str(values, expected_K, size_hint=False, sort=False, - memory=False): - if memory: - gc.collect() - before_mem = proc.get_memory_info().rss - - if size_hint: - rizer = lib.StringHashTable(len(values)) - else: - rizer = lib.StringHashTable(100) - - result = [] - result = rizer.unique(values) - - if memory: - result = proc.get_memory_info().rss - before_mem - return result - - if sort: - result.sort() - assert(len(result) == expected_K) - - -def khash_unique_int64(values, expected_K, size_hint=False, sort=False): - if size_hint: - rizer = lib.Int64HashTable(len(values)) - else: - rizer = lib.Int64HashTable(100) - - result = [] - result = rizer.unique(values) - - if sort: - result.sort() - assert(len(result) == expected_K) - - -def hash_bench(): - numpy = [] - dict_based = [] - dict_based_sort = [] - khash_hint = [] - khash_nohint = [] - for K in Ks: - print(K) - # groups = np.array([rands(10) for _ in range(K)]) - # labels = np.tile(groups, N // K).astype('O') - - groups = np.random.randint(0, long(100000000000), size=K) - labels = np.tile(groups, N // K) - dict_based.append(timeit(lambda: dict_unique(labels, K), 20)) - khash_nohint.append(timeit(lambda: khash_unique_int64(labels, K), 20)) - khash_hint.append(timeit(lambda: khash_unique_int64(labels, K, - size_hint=True), 20)) - - # memory, hard to get - # dict_based.append(np.mean([dict_unique(labels, K, memory=True) - # for _ in range(10)])) - # khash_nohint.append(np.mean([khash_unique(labels, K, memory=True) - # for _ in range(10)])) - # khash_hint.append(np.mean([khash_unique(labels, K, size_hint=True, memory=True) - # for _ in range(10)])) - - # dict_based_sort.append(timeit(lambda: dict_unique(labels, K, - # sort=True), 10)) - # numpy.append(timeit(lambda: np.unique(labels), 10)) - - # unique_timings = DataFrame({'numpy.unique' : numpy, - # 'dict, no sort' : dict_based, - # 'dict, sort' : dict_based_sort}, - # columns=['dict, no sort', - # 'dict, sort', 'numpy.unique'], - # index=Ks) - - unique_timings = DataFrame({'dict': dict_based, - 'khash, preallocate': khash_hint, - 'khash': khash_nohint}, - columns=['khash, preallocate', 'khash', 'dict'], - index=Ks) - - unique_timings.plot(kind='bar', legend=False) - plt.legend(loc='best') - plt.title('Unique on 100,000 values, int64') - plt.xlabel('Number of unique labels') - plt.ylabel('Mean execution time') - - plt.show() diff --git a/bench/bench_with_subset.R b/bench/bench_with_subset.R deleted file mode 100644 index 69d0f7a9eec63..0000000000000 --- a/bench/bench_with_subset.R +++ /dev/null @@ -1,53 +0,0 @@ -library(microbenchmark) -library(data.table) - - -data.frame.subset.bench <- function (n=1e7, times=30) { - df <- data.frame(a=rnorm(n), b=rnorm(n), c=rnorm(n)) - print(microbenchmark(subset(df, a <= b & b <= (c ^ 2 + b ^ 2 - a) & b > c), - times=times)) -} - - -# data.table allows something very similar to query with an expression -# but we have chained comparisons AND we're faster BOO YAH! -data.table.subset.expression.bench <- function (n=1e7, times=30) { - dt <- data.table(a=rnorm(n), b=rnorm(n), c=rnorm(n)) - print(microbenchmark(dt[, a <= b & b <= (c ^ 2 + b ^ 2 - a) & b > c], - times=times)) -} - - -# compare against subset with data.table for good measure -data.table.subset.bench <- function (n=1e7, times=30) { - dt <- data.table(a=rnorm(n), b=rnorm(n), c=rnorm(n)) - print(microbenchmark(subset(dt, a <= b & b <= (c ^ 2 + b ^ 2 - a) & b > c), - times=times)) -} - - -data.frame.with.bench <- function (n=1e7, times=30) { - df <- data.frame(a=rnorm(n), b=rnorm(n), c=rnorm(n)) - - print(microbenchmark(with(df, a + b * (c ^ 2 + b ^ 2 - a) / (a * c) ^ 3), - times=times)) -} - - -data.table.with.bench <- function (n=1e7, times=30) { - dt <- data.table(a=rnorm(n), b=rnorm(n), c=rnorm(n)) - print(microbenchmark(with(dt, a + b * (c ^ 2 + b ^ 2 - a) / (a * c) ^ 3), - times=times)) -} - - -bench <- function () { - data.frame.subset.bench() - data.table.subset.expression.bench() - data.table.subset.bench() - data.frame.with.bench() - data.table.with.bench() -} - - -bench() diff --git a/bench/bench_with_subset.py b/bench/bench_with_subset.py deleted file mode 100644 index 017401df3f7f3..0000000000000 --- a/bench/bench_with_subset.py +++ /dev/null @@ -1,116 +0,0 @@ -#!/usr/bin/env python - -""" -Microbenchmarks for comparison with R's "with" and "subset" functions -""" - -from __future__ import print_function -import numpy as np -from numpy import array -from timeit import repeat as timeit -from pandas.compat import range, zip -from pandas import DataFrame - - -setup_common = """from pandas import DataFrame -from numpy.random import randn -df = DataFrame(randn(%d, 3), columns=list('abc')) -%s""" - - -setup_with = "s = 'a + b * (c ** 2 + b ** 2 - a) / (a * c) ** 3'" - - -def bench_with(n, times=10, repeat=3, engine='numexpr'): - return np.array(timeit('df.eval(s, engine=%r)' % engine, - setup=setup_common % (n, setup_with), - repeat=repeat, number=times)) / times - - -setup_subset = "s = 'a <= b <= c ** 2 + b ** 2 - a and b > c'" - - -def bench_subset(n, times=10, repeat=3, engine='numexpr'): - return np.array(timeit('df.query(s, engine=%r)' % engine, - setup=setup_common % (n, setup_subset), - repeat=repeat, number=times)) / times - - -def bench(mn=1, mx=7, num=100, engines=('python', 'numexpr'), verbose=False): - r = np.logspace(mn, mx, num=num).round().astype(int) - - ev = DataFrame(np.empty((num, len(engines))), columns=engines) - qu = ev.copy(deep=True) - - ev['size'] = qu['size'] = r - - for engine in engines: - for i, n in enumerate(r): - if verbose: - print('engine: %r, i == %d' % (engine, i)) - ev.loc[i, engine] = bench_with(n, times=1, repeat=1, engine=engine) - qu.loc[i, engine] = bench_subset(n, times=1, repeat=1, - engine=engine) - - return ev, qu - - -def plot_perf(df, engines, title, filename=None): - from matplotlib.pyplot import figure, rc - - try: - from mpltools import style - except ImportError: - pass - else: - style.use('ggplot') - - rc('text', usetex=True) - - fig = figure(figsize=(4, 3), dpi=100) - ax = fig.add_subplot(111) - - for engine in engines: - ax.plot(df.size, df[engine], label=engine, lw=2) - - ax.set_xlabel('Number of Rows') - ax.set_ylabel('Time (s)') - ax.set_title(title) - ax.legend(loc='best') - ax.tick_params(top=False, right=False) - - fig.tight_layout() - - if filename is not None: - fig.savefig(filename) - - -if __name__ == '__main__': - import os - import pandas as pd - - pandas_dir = os.path.dirname(os.path.abspath(os.path.dirname(__file__))) - static_path = os.path.join(pandas_dir, 'doc', 'source', '_static') - - join = lambda p: os.path.join(static_path, p) - - fn = join('eval-query-perf-data.h5') - - engines = 'python', 'numexpr' - - if not os.path.exists(fn): - ev, qu = bench(verbose=True) - ev.to_hdf(fn, 'eval') - qu.to_hdf(fn, 'query') - else: - ev = pd.read_hdf(fn, 'eval') - qu = pd.read_hdf(fn, 'query') - - plot_perf(ev, engines, 'DataFrame.eval()', filename=join('eval-perf.png')) - plot_perf(qu, engines, 'DataFrame.query()', - filename=join('query-perf.png')) - - plot_perf(ev[ev.size <= 50000], engines, 'DataFrame.eval()', - filename=join('eval-perf-small.png')) - plot_perf(qu[qu.size <= 500000], engines, 'DataFrame.query()', - filename=join('query-perf-small.png')) diff --git a/bench/better_unique.py b/bench/better_unique.py deleted file mode 100644 index e03a4f433ce66..0000000000000 --- a/bench/better_unique.py +++ /dev/null @@ -1,80 +0,0 @@ -from __future__ import print_function -from pandas import DataFrame -from pandas.compat import range, zip -import timeit - -setup = """ -from pandas import Series -import pandas._tseries as _tseries -from pandas.compat import range -import random -import numpy as np - -def better_unique(values): - uniques = _tseries.fast_unique(values) - id_map = _tseries.map_indices_buf(uniques) - labels = _tseries.get_unique_labels(values, id_map) - return uniques, labels - -tot = 100000 - -def get_test_data(ngroups=100, n=tot): - unique_groups = range(ngroups) - random.shuffle(unique_groups) - arr = np.asarray(np.tile(unique_groups, n / ngroups), dtype=object) - - if len(arr) < n: - arr = np.asarray(list(arr) + unique_groups[:n - len(arr)], - dtype=object) - - return arr - -arr = get_test_data(ngroups=%d) -""" - -group_sizes = [10, 100, 1000, 10000, - 20000, 30000, 40000, - 50000, 60000, 70000, - 80000, 90000, 100000] - -numbers = [100, 100, 50] + [10] * 10 - -numpy = [] -wes = [] - -for sz, n in zip(group_sizes, numbers): - # wes_timer = timeit.Timer(stmt='better_unique(arr)', - # setup=setup % sz) - wes_timer = timeit.Timer(stmt='_tseries.fast_unique(arr)', - setup=setup % sz) - - numpy_timer = timeit.Timer(stmt='np.unique(arr)', - setup=setup % sz) - - print(n) - numpy_result = numpy_timer.timeit(number=n) / n - wes_result = wes_timer.timeit(number=n) / n - - print('Groups: %d, NumPy: %s, Wes: %s' % (sz, numpy_result, wes_result)) - - wes.append(wes_result) - numpy.append(numpy_result) - -result = DataFrame({'wes': wes, 'numpy': numpy}, index=group_sizes) - - -def make_plot(numpy, wes): - pass - -# def get_test_data(ngroups=100, n=100000): -# unique_groups = range(ngroups) -# random.shuffle(unique_groups) -# arr = np.asarray(np.tile(unique_groups, n / ngroups), dtype=object) - -# if len(arr) < n: -# arr = np.asarray(list(arr) + unique_groups[:n - len(arr)], -# dtype=object) - -# return arr - -# arr = get_test_data(ngroups=1000) diff --git a/bench/duplicated.R b/bench/duplicated.R deleted file mode 100644 index eb2376df2932a..0000000000000 --- a/bench/duplicated.R +++ /dev/null @@ -1,22 +0,0 @@ -N <- 100000 - -k1 = rep(NA, N) -k2 = rep(NA, N) -for (i in 1:N){ - k1[i] <- paste(sample(letters, 1), collapse="") - k2[i] <- paste(sample(letters, 1), collapse="") -} -df <- data.frame(a=k1, b=k2, c=rep(1:100, N / 100)) -df2 <- data.frame(a=k1, b=k2) - -timings <- numeric() -timings2 <- numeric() -for (i in 1:50) { - gc() - timings[i] = system.time(deduped <- df[!duplicated(df),])[3] - gc() - timings2[i] = system.time(deduped <- df[!duplicated(df[,c("a", "b")]),])[3] -} - -mean(timings) -mean(timings2) diff --git a/bench/io_roundtrip.py b/bench/io_roundtrip.py deleted file mode 100644 index d87da0ec6321a..0000000000000 --- a/bench/io_roundtrip.py +++ /dev/null @@ -1,116 +0,0 @@ -from __future__ import print_function -import time -import os -import numpy as np - -import la -import pandas -from pandas.compat import range -from pandas import datetools, DatetimeIndex - - -def timeit(f, iterations): - start = time.clock() - - for i in range(iterations): - f() - - return time.clock() - start - - -def rountrip_archive(N, K=50, iterations=10): - # Create data - arr = np.random.randn(N, K) - # lar = la.larry(arr) - dma = pandas.DataFrame(arr, - DatetimeIndex('1/1/2000', periods=N, - offset=datetools.Minute())) - dma[201] = 'bar' - - # filenames - filename_numpy = '/Users/wesm/tmp/numpy.npz' - filename_larry = '/Users/wesm/tmp/archive.hdf5' - filename_pandas = '/Users/wesm/tmp/pandas_tmp' - - # Delete old files - try: - os.unlink(filename_numpy) - except: - pass - try: - os.unlink(filename_larry) - except: - pass - - try: - os.unlink(filename_pandas) - except: - pass - - # Time a round trip save and load - # numpy_f = lambda: numpy_roundtrip(filename_numpy, arr, arr) - # numpy_time = timeit(numpy_f, iterations) / iterations - - # larry_f = lambda: larry_roundtrip(filename_larry, lar, lar) - # larry_time = timeit(larry_f, iterations) / iterations - - pandas_f = lambda: pandas_roundtrip(filename_pandas, dma, dma) - pandas_time = timeit(pandas_f, iterations) / iterations - print('pandas (HDF5) %7.4f seconds' % pandas_time) - - pickle_f = lambda: pandas_roundtrip(filename_pandas, dma, dma) - pickle_time = timeit(pickle_f, iterations) / iterations - print('pandas (pickle) %7.4f seconds' % pickle_time) - - # print('Numpy (npz) %7.4f seconds' % numpy_time) - # print('larry (HDF5) %7.4f seconds' % larry_time) - - # Delete old files - try: - os.unlink(filename_numpy) - except: - pass - try: - os.unlink(filename_larry) - except: - pass - - try: - os.unlink(filename_pandas) - except: - pass - - -def numpy_roundtrip(filename, arr1, arr2): - np.savez(filename, arr1=arr1, arr2=arr2) - npz = np.load(filename) - arr1 = npz['arr1'] - arr2 = npz['arr2'] - - -def larry_roundtrip(filename, lar1, lar2): - io = la.IO(filename) - io['lar1'] = lar1 - io['lar2'] = lar2 - lar1 = io['lar1'] - lar2 = io['lar2'] - - -def pandas_roundtrip(filename, dma1, dma2): - # What's the best way to code this? - from pandas.io.pytables import HDFStore - store = HDFStore(filename) - store['dma1'] = dma1 - store['dma2'] = dma2 - dma1 = store['dma1'] - dma2 = store['dma2'] - - -def pandas_roundtrip_pickle(filename, dma1, dma2): - dma1.save(filename) - dma1 = pandas.DataFrame.load(filename) - dma2.save(filename) - dma2 = pandas.DataFrame.load(filename) - -if __name__ == '__main__': - rountrip_archive(10000, K=200) diff --git a/bench/larry.py b/bench/larry.py deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/bench/serialize.py b/bench/serialize.py deleted file mode 100644 index b0edd6a5752d2..0000000000000 --- a/bench/serialize.py +++ /dev/null @@ -1,89 +0,0 @@ -from __future__ import print_function -from pandas.compat import range, lrange -import time -import os -import numpy as np - -import la -import pandas - - -def timeit(f, iterations): - start = time.clock() - - for i in range(iterations): - f() - - return time.clock() - start - - -def roundtrip_archive(N, iterations=10): - - # Create data - arr = np.random.randn(N, N) - lar = la.larry(arr) - dma = pandas.DataFrame(arr, lrange(N), lrange(N)) - - # filenames - filename_numpy = '/Users/wesm/tmp/numpy.npz' - filename_larry = '/Users/wesm/tmp/archive.hdf5' - filename_pandas = '/Users/wesm/tmp/pandas_tmp' - - # Delete old files - try: - os.unlink(filename_numpy) - except: - pass - try: - os.unlink(filename_larry) - except: - pass - try: - os.unlink(filename_pandas) - except: - pass - - # Time a round trip save and load - numpy_f = lambda: numpy_roundtrip(filename_numpy, arr, arr) - numpy_time = timeit(numpy_f, iterations) / iterations - - larry_f = lambda: larry_roundtrip(filename_larry, lar, lar) - larry_time = timeit(larry_f, iterations) / iterations - - pandas_f = lambda: pandas_roundtrip(filename_pandas, dma, dma) - pandas_time = timeit(pandas_f, iterations) / iterations - - print('Numpy (npz) %7.4f seconds' % numpy_time) - print('larry (HDF5) %7.4f seconds' % larry_time) - print('pandas (HDF5) %7.4f seconds' % pandas_time) - - -def numpy_roundtrip(filename, arr1, arr2): - np.savez(filename, arr1=arr1, arr2=arr2) - npz = np.load(filename) - arr1 = npz['arr1'] - arr2 = npz['arr2'] - - -def larry_roundtrip(filename, lar1, lar2): - io = la.IO(filename) - io['lar1'] = lar1 - io['lar2'] = lar2 - lar1 = io['lar1'] - lar2 = io['lar2'] - - -def pandas_roundtrip(filename, dma1, dma2): - from pandas.io.pytables import HDFStore - store = HDFStore(filename) - store['dma1'] = dma1 - store['dma2'] = dma2 - dma1 = store['dma1'] - dma2 = store['dma2'] - - -def pandas_roundtrip_pickle(filename, dma1, dma2): - dma1.save(filename) - dma1 = pandas.DataFrame.load(filename) - dma2.save(filename) - dma2 = pandas.DataFrame.load(filename) diff --git a/bench/test.py b/bench/test.py deleted file mode 100644 index 2339deab313a1..0000000000000 --- a/bench/test.py +++ /dev/null @@ -1,70 +0,0 @@ -import numpy as np -import itertools -import collections -import scipy.ndimage as ndi -from pandas.compat import zip, range - -N = 10000 - -lat = np.random.randint(0, 360, N) -lon = np.random.randint(0, 360, N) -data = np.random.randn(N) - - -def groupby1(lat, lon, data): - indexer = np.lexsort((lon, lat)) - lat = lat.take(indexer) - lon = lon.take(indexer) - sorted_data = data.take(indexer) - - keys = 1000. * lat + lon - unique_keys = np.unique(keys) - bounds = keys.searchsorted(unique_keys) - - result = group_agg(sorted_data, bounds, lambda x: x.mean()) - - decoder = keys.searchsorted(unique_keys) - - return dict(zip(zip(lat.take(decoder), lon.take(decoder)), result)) - - -def group_mean(lat, lon, data): - indexer = np.lexsort((lon, lat)) - lat = lat.take(indexer) - lon = lon.take(indexer) - sorted_data = data.take(indexer) - - keys = 1000 * lat + lon - unique_keys = np.unique(keys) - - result = ndi.mean(sorted_data, labels=keys, index=unique_keys) - decoder = keys.searchsorted(unique_keys) - - return dict(zip(zip(lat.take(decoder), lon.take(decoder)), result)) - - -def group_mean_naive(lat, lon, data): - grouped = collections.defaultdict(list) - for lt, ln, da in zip(lat, lon, data): - grouped[(lt, ln)].append(da) - - averaged = dict((ltln, np.mean(da)) for ltln, da in grouped.items()) - - return averaged - - -def group_agg(values, bounds, f): - N = len(values) - result = np.empty(len(bounds), dtype=float) - for i, left_bound in enumerate(bounds): - if i == len(bounds) - 1: - right_bound = N - else: - right_bound = bounds[i + 1] - - result[i] = f(values[left_bound: right_bound]) - - return result - -# for i in range(10): -# groupby1(lat, lon, data) diff --git a/bench/zoo_bench.R b/bench/zoo_bench.R deleted file mode 100644 index 294d55f51a9ab..0000000000000 --- a/bench/zoo_bench.R +++ /dev/null @@ -1,71 +0,0 @@ -library(zoo) -library(xts) -library(fts) -library(tseries) -library(its) -library(xtable) - -## indices = rep(NA, 100000) -## for (i in 1:100000) -## indices[i] <- paste(sample(letters, 10), collapse="") - - - -## x <- zoo(rnorm(100000), indices) -## y <- zoo(rnorm(90000), indices[sample(1:100000, 90000)]) - -## indices <- as.POSIXct(1:100000) - -indices <- as.POSIXct(Sys.Date()) + seq(1, 100000000, 100) - -sz <- 500000 - -## x <- xts(rnorm(sz), sample(indices, sz)) -## y <- xts(rnorm(sz), sample(indices, sz)) - -zoo.bench <- function(){ - x <- zoo(rnorm(sz), sample(indices, sz)) - y <- zoo(rnorm(sz), sample(indices, sz)) - timeit(function() {x + y}) -} - -xts.bench <- function(){ - x <- xts(rnorm(sz), sample(indices, sz)) - y <- xts(rnorm(sz), sample(indices, sz)) - timeit(function() {x + y}) -} - -fts.bench <- function(){ - x <- fts(rnorm(sz), sort(sample(indices, sz))) - y <- fts(rnorm(sz), sort(sample(indices, sz)) - timeit(function() {x + y}) -} - -its.bench <- function(){ - x <- its(rnorm(sz), sort(sample(indices, sz))) - y <- its(rnorm(sz), sort(sample(indices, sz))) - timeit(function() {x + y}) -} - -irts.bench <- function(){ - x <- irts(sort(sample(indices, sz)), rnorm(sz)) - y <- irts(sort(sample(indices, sz)), rnorm(sz)) - timeit(function() {x + y}) -} - -timeit <- function(f){ - timings <- numeric() - for (i in 1:10) { - gc() - timings[i] = system.time(f())[3] - } - mean(timings) -} - -bench <- function(){ - results <- c(xts.bench(), fts.bench(), its.bench(), zoo.bench()) - names <- c("xts", "fts", "its", "zoo") - data.frame(results, names) -} - -result <- bench() diff --git a/bench/zoo_bench.py b/bench/zoo_bench.py deleted file mode 100644 index 74cb1952a5a2a..0000000000000 --- a/bench/zoo_bench.py +++ /dev/null @@ -1,36 +0,0 @@ -from pandas import * -from pandas.util.testing import rands - -n = 1000000 -# indices = Index([rands(10) for _ in xrange(n)]) - - -def sample(values, k): - sampler = np.random.permutation(len(values)) - return values.take(sampler[:k]) -sz = 500000 -rng = np.arange(0, 10000000000000, 10000000) -stamps = np.datetime64(datetime.now()).view('i8') + rng -idx1 = np.sort(sample(stamps, sz)) -idx2 = np.sort(sample(stamps, sz)) -ts1 = Series(np.random.randn(sz), idx1) -ts2 = Series(np.random.randn(sz), idx2) - - -# subsample_size = 90000 - -# x = Series(np.random.randn(100000), indices) -# y = Series(np.random.randn(subsample_size), -# index=sample(indices, subsample_size)) - - -# lx = larry(np.random.randn(100000), [list(indices)]) -# ly = larry(np.random.randn(subsample_size), [list(y.index)]) - -# Benchmark 1: Two 1-million length time series (int64-based index) with -# randomly chosen timestamps - -# Benchmark 2: Join two 5-variate time series DataFrames (outer and inner join) - -# df1 = DataFrame(np.random.randn(1000000, 5), idx1, columns=range(5)) -# df2 = DataFrame(np.random.randn(1000000, 5), idx2, columns=range(5, 10)) diff --git a/doc/source/conf.py b/doc/source/conf.py index 394fa44c30573..cb3063d59beae 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -17,6 +17,11 @@ import importlib from pandas.compat import u, PY3 +try: + raw_input # Python 2 +except NameError: + raw_input = input # Python 3 + # https://github.com/sphinx-doc/sphinx/pull/2325/files # Workaround for sphinx-build recursion limit overflow: # pickle.dump(doctree, f, pickle.HIGHEST_PROTOCOL) diff --git a/doc/sphinxext/ipython_sphinxext/ipython_directive.py b/doc/sphinxext/ipython_sphinxext/ipython_directive.py index 49fbacba99592..922767a8e2d46 100644 --- a/doc/sphinxext/ipython_sphinxext/ipython_directive.py +++ b/doc/sphinxext/ipython_sphinxext/ipython_directive.py @@ -111,7 +111,7 @@ import sys import tempfile import ast -from pandas.compat import zip, range, map, lmap, u, cStringIO as StringIO +from pandas.compat import zip, range, map, lmap, u, text_type, cStringIO as StringIO import warnings # To keep compatibility with various python versions @@ -138,10 +138,8 @@ if PY3: from io import StringIO - text_type = str else: from StringIO import StringIO - text_type = unicode #----------------------------------------------------------------------------- # Globals diff --git a/scripts/find_commits_touching_func.py b/scripts/find_commits_touching_func.py index 099761f38bb44..74ea120bf0b64 100755 --- a/scripts/find_commits_touching_func.py +++ b/scripts/find_commits_touching_func.py @@ -4,7 +4,7 @@ # copryright 2013, y-p @ github from __future__ import print_function -from pandas.compat import range, lrange, map +from pandas.compat import range, lrange, map, string_types, text_type """Search the git history for all commits touching a named method @@ -94,7 +94,7 @@ def get_hits(defname,files=()): def get_commit_info(c,fmt,sep='\t'): r=sh.git('log', "--format={}".format(fmt), '{}^..{}'.format(c,c),"-n","1",_tty_out=False) - return compat.text_type(r).split(sep) + return text_type(r).split(sep) def get_commit_vitals(c,hlen=HASH_LEN): h,s,d= get_commit_info(c,'%H\t%s\t%ci',"\t") @@ -183,11 +183,11 @@ def main(): !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! """) return - if isinstance(args.file_masks,compat.string_types): + if isinstance(args.file_masks, string_types): args.file_masks = args.file_masks.split(',') - if isinstance(args.path_masks,compat.string_types): + if isinstance(args.path_masks, string_types): args.path_masks = args.path_masks.split(',') - if isinstance(args.dir_masks,compat.string_types): + if isinstance(args.dir_masks, string_types): args.dir_masks = args.dir_masks.split(',') logger.setLevel(getattr(logging,args.debug_level)) diff --git a/scripts/windows_builder/build_27-32.bat b/scripts/windows_builder/build_27-32.bat deleted file mode 100644 index 37eb4d436d567..0000000000000 --- a/scripts/windows_builder/build_27-32.bat +++ /dev/null @@ -1,25 +0,0 @@ -@echo off -echo "starting 27-32" - -setlocal EnableDelayedExpansion -set MSSdk=1 -CALL "C:\Program Files\Microsoft SDKs\Windows\v7.0\Bin\SetEnv.cmd" /x86 /release -set DISTUTILS_USE_SDK=1 - -title 27-32 build -echo "building" -cd "c:\users\Jeff Reback\documents\github\pandas" -C:\python27-32\python.exe setup.py build > build.27-32.log 2>&1 - -title "installing" -C:\python27-32\python.exe setup.py bdist --formats=wininst > install.27-32.log 2>&1 - -echo "testing" -C:\python27-32\scripts\nosetests -A "not slow" build\lib.win32-2.7\pandas > test.27-32.log 2>&1 - -echo "versions" -cd build\lib.win32-2.7 -C:\python27-32\python.exe ../../ci/print_versions.py > ../../versions.27-32.log 2>&1 - -exit - diff --git a/scripts/windows_builder/build_27-64.bat b/scripts/windows_builder/build_27-64.bat deleted file mode 100644 index e76e25d0ef39c..0000000000000 --- a/scripts/windows_builder/build_27-64.bat +++ /dev/null @@ -1,25 +0,0 @@ -@echo off -echo "starting 27-64" - -setlocal EnableDelayedExpansion -set MSSdk=1 -CALL "C:\Program Files\Microsoft SDKs\Windows\v7.0\Bin\SetEnv.cmd" /x64 /release -set DISTUTILS_USE_SDK=1 - -title 27-64 build -echo "building" -cd "c:\users\Jeff Reback\documents\github\pandas" -C:\python27-64\python.exe setup.py build > build.27-64.log 2>&1 - -echo "installing" -C:\python27-64\python.exe setup.py bdist --formats=wininst > install.27-64.log 2>&1 - -echo "testing" -C:\python27-64\scripts\nosetests -A "not slow" build\lib.win-amd64-2.7\pandas > test.27-64.log 2>&1 - -echo "versions" -cd build\lib.win-amd64-2.7 -C:\python27-64\python.exe ../../ci/print_versions.py > ../../versions.27-64.log 2>&1 - -exit - diff --git a/scripts/windows_builder/build_34-32.bat b/scripts/windows_builder/build_34-32.bat deleted file mode 100644 index 8e060e000bc8f..0000000000000 --- a/scripts/windows_builder/build_34-32.bat +++ /dev/null @@ -1,27 +0,0 @@ -@echo off -echo "starting 34-32" - -setlocal EnableDelayedExpansion -set MSSdk=1 -CALL "C:\Program Files\Microsoft SDKs\Windows\v7.1\Bin\SetEnv.cmd" /x86 /release -set DISTUTILS_USE_SDK=1 - -title 34-32 build -echo "building" -cd "c:\users\Jeff Reback\documents\github\pandas" -C:\python34-32\python.exe setup.py build > build.34-32.log 2>&1 - -echo "installing" -C:\python34-32\python.exe setup.py bdist --formats=wininst > install.34-32.log 2>&1 - -echo "testing" -C:\python34-32\scripts\nosetests -A "not slow" build\lib.win32-3.4\pandas > test.34-32.log 2>&1 - -echo "versions" -cd build\lib.win32-3.4 -C:\python34-32\python.exe ../../ci/print_versions.py > ../../versions.34-32.log 2>&1 - -exit - - - diff --git a/scripts/windows_builder/build_34-64.bat b/scripts/windows_builder/build_34-64.bat deleted file mode 100644 index 3a8512b730346..0000000000000 --- a/scripts/windows_builder/build_34-64.bat +++ /dev/null @@ -1,27 +0,0 @@ -@echo off -echo "starting 34-64" - -setlocal EnableDelayedExpansion -set MSSdk=1 -CALL "C:\Program Files\Microsoft SDKs\Windows\v7.1\Bin\SetEnv.cmd" /x64 /release -set DISTUTILS_USE_SDK=1 - -title 34-64 build -echo "building" -cd "c:\users\Jeff Reback\documents\github\pandas" -C:\python34-64\python.exe setup.py build > build.34-64.log 2>&1 - -echo "installing" -C:\python34-64\python.exe setup.py bdist --formats=wininst > install.34-64.log 2>&1 - -echo "testing" -C:\python34-64\scripts\nosetests -A "not slow" build\lib.win-amd64-3.4\pandas > test.34-64.log 2>&1 - -echo "versions" -cd build\lib.win-amd64-3.4 -C:\python34-64\python.exe ../../ci/print_versions.py > ../../versions.34-64.log 2>&1 - -exit - - - diff --git a/scripts/windows_builder/check_and_build.bat b/scripts/windows_builder/check_and_build.bat deleted file mode 100644 index 32be1bde1f7f3..0000000000000 --- a/scripts/windows_builder/check_and_build.bat +++ /dev/null @@ -1,2 +0,0 @@ -set PYTHONPATH=c:/python27-64/lib -c:/python27-64/python.exe c:/Builds/check_and_build.py %1 %2 %3 %4 %4 %6 %7 %8 %9 diff --git a/scripts/windows_builder/check_and_build.py b/scripts/windows_builder/check_and_build.py deleted file mode 100644 index 2eb32fb4265d9..0000000000000 --- a/scripts/windows_builder/check_and_build.py +++ /dev/null @@ -1,194 +0,0 @@ -import datetime -import git -import logging -import os, re, time -import subprocess -import argparse -import pysftp - -# parse the args -parser = argparse.ArgumentParser(description='build, test, and install updated versions of master pandas') -parser.add_argument('-b', '--build', - help='run just this build', - dest='build') -parser.add_argument('-u', '--update', - help='get a git update', - dest='update', - action='store_true', - default=False) -parser.add_argument('-t', '--test', - help='run the tests', - dest='test', - action='store_true', - default=False) -parser.add_argument('-c', '--compare', - help='show the last tests compare', - dest='compare', - action='store_true', - default=False) -parser.add_argument('-v', '--version', - help='show the last versions', - dest='version', - action='store_true', - default=False) -parser.add_argument('-i', '--install', - help='run the install', - dest='install', - action='store_true', - default=False) -parser.add_argument('--dry', - help='dry run', - dest='dry', - action='store_true', - default=False) - -args = parser.parse_args() -dry_run = args.dry - -builds = ['27-32','27-64','34-32','34-64'] -base_dir = "C:\Users\Jeff Reback\Documents\GitHub\pandas" -remote_host='pandas.pydata.org' -username='pandas' -password=############ - -# drop python from our environment to avoid -# passing this onto sub-processes -env = os.environ -del env['PYTHONPATH'] - -# the stdout logger -fmt = '%(asctime)s: %(message)s' -logger = logging.getLogger('check_and_build') -logger.setLevel(logging.DEBUG) -stream_handler = logging.StreamHandler() -stream_handler.setFormatter(logging.Formatter(fmt)) -logger.addHandler(stream_handler) - -def run_all(test=False,compare=False,install=False,version=False,build=None): - # run everything - - for b in builds: - if build is not None and build != b: - continue - if test: - do_rebuild(b) - if compare or test: - try: - do_compare(b) - except (Exception) as e: - logger.info("ERROR COMPARE {0} : {1}".format(b,e)) - if version: - try: - do_version(b) - except (Exception) as e: - logger.info("ERROR VERSION {0} : {1}".format(b,e)) - - if install: - run_install() - -def do_rebuild(build): - # trigger the rebuild - - cmd = "c:/Builds/build_{0}.bat".format(build) - logger.info("rebuild : {0}".format(cmd)) - p = subprocess.Popen("start /wait /min {0}".format(cmd),env=env,shell=True,close_fds=True) - ret = p.wait() - -def do_compare(build): - # print the test outputs - - f = os.path.join(base_dir,"test.{0}.log".format(build)) - with open(f,'r') as fh: - for l in fh: - l = l.rstrip() - if l.startswith('ERROR:'): - logger.info("{0} : {1}".format(build,l)) - if l.startswith('Ran') or l.startswith('OK') or l.startswith('FAIL'): - logger.info("{0} : {1}".format(build,l)) - -def do_version(build): - # print the version strings - - f = os.path.join(base_dir,"versions.{0}.log".format(build)) - with open(f,'r') as fh: - for l in fh: - l = l.rstrip() - logger.info("{0} : {1}".format(build,l)) - -def do_update(is_verbose=True): - # update git; return True if the commit has changed - - repo = git.Repo(base_dir) - master = repo.heads.master - origin = repo.remotes.origin - start_commit = master.commit - - if is_verbose: - logger.info("current commit : {0}".format(start_commit)) - - try: - origin.update() - except (Exception) as e: - logger.info("update exception : {0}".format(e)) - try: - origin.pull() - except (Exception) as e: - logger.info("pull exception : {0}".format(e)) - - result = start_commit != master.commit - if result: - if is_verbose: - logger.info("commits changed : {0} -> {1}".format(start_commit,master.commit)) - return result - -def run_install(): - # send the installation binaries - - repo = git.Repo(base_dir) - master = repo.heads.master - commit = master.commit - short_hash = str(commit)[:7] - - logger.info("sending files : {0}".format(commit)) - d = os.path.join(base_dir,"dist") - files = [ f for f in os.listdir(d) if re.search(short_hash,f) ] - srv = pysftp.Connection(host=remote_host,username=username,password=password) - srv.chdir("www/pandas-build/dev") - - # get current files - remote_files = set(srv.listdir(path='.')) - - for f in files: - if f not in remote_files: - logger.info("sending: {0}".format(f)) - local = os.path.join(d,f) - srv.put(localpath=local) - - srv.close() - logger.info("sending files: done") - -# just perform the action -if args.update or args.test or args.compare or args.install or args.version: - if args.update: - do_update() - run_all(test=args.test,compare=args.compare,install=args.install,version=args.version,build=args.build) - exit(0) - -# file logging -file_handler = logging.FileHandler("C:\Builds\logs\check_and_build.log") -file_handler.setFormatter(logging.Formatter(fmt)) -logger.addHandler(file_handler) - -logger.info("start") - -# main loop -while(True): - - if do_update(): - run_all(test=True,install=False) - - time.sleep(60*60) - -logger.info("exit") -file_handler.close() - diff --git a/scripts/windows_builder/readme.txt b/scripts/windows_builder/readme.txt deleted file mode 100644 index 789e2a9ee0c63..0000000000000 --- a/scripts/windows_builder/readme.txt +++ /dev/null @@ -1,17 +0,0 @@ -This is a collection of windows batch scripts (and a python script) -to rebuild the binaries, test, and upload the binaries for public distribution -upon a commit on github. - -Obviously requires that these be setup on windows -Requires an install of Windows SDK 3.5 and 4.0 -Full python installs for each version with the deps - -Currently supporting - -27-32,27-64,34-32,34-64 - -Note that 34 use the 4.0 SDK, while the other suse 3.5 SDK - -I installed these scripts in C:\Builds - -Installed libaries in C:\Installs From 0bd871fb9634e8b73efcc1aeabb93961fbc43d53 Mon Sep 17 00:00:00 2001 From: kernc Date: Mon, 17 Jul 2017 17:11:37 +0200 Subject: [PATCH 784/933] PERF: SparseDataFrame._init_dict uses intermediary dict, not DataFrame (#16883) Closes gh-16773. --- asv_bench/benchmarks/sparse.py | 8 ++++++++ doc/source/whatsnew/v0.21.0.txt | 1 + pandas/core/sparse/frame.py | 9 +++------ pandas/tests/reshape/test_reshape.py | 4 ++++ pandas/tests/sparse/test_frame.py | 2 ++ 5 files changed, 18 insertions(+), 6 deletions(-) diff --git a/asv_bench/benchmarks/sparse.py b/asv_bench/benchmarks/sparse.py index 500149b89b08b..7259e8cdb7d61 100644 --- a/asv_bench/benchmarks/sparse.py +++ b/asv_bench/benchmarks/sparse.py @@ -1,3 +1,5 @@ +from itertools import repeat + from .pandas_vb_common import * import scipy.sparse from pandas import SparseSeries, SparseDataFrame @@ -27,6 +29,12 @@ class sparse_frame_constructor(object): def time_sparse_frame_constructor(self): SparseDataFrame(columns=np.arange(100), index=np.arange(1000)) + def time_sparse_from_scipy(self): + SparseDataFrame(scipy.sparse.rand(1000, 1000, 0.005)) + + def time_sparse_from_dict(self): + SparseDataFrame(dict(zip(range(1000), repeat([0])))) + class sparse_series_from_coo(object): goal_time = 0.2 diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 7c52cf6f450b2..935e9d740b91c 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -136,6 +136,7 @@ Removal of prior version deprecations/changes Performance Improvements ~~~~~~~~~~~~~~~~~~~~~~~~ +- Improved performance of instantiating :class:`SparseDataFrame` (:issue:`16773`) .. _whatsnew_0210.bug_fixes: diff --git a/pandas/core/sparse/frame.py b/pandas/core/sparse/frame.py index 461dd50c5da6e..e157ae16e71f9 100644 --- a/pandas/core/sparse/frame.py +++ b/pandas/core/sparse/frame.py @@ -143,7 +143,7 @@ def _init_dict(self, data, index, columns, dtype=None): sp_maker = lambda x: SparseArray(x, kind=self._default_kind, fill_value=self._default_fill_value, copy=True, dtype=dtype) - sdict = DataFrame() + sdict = {} for k, v in compat.iteritems(data): if isinstance(v, Series): # Force alignment, no copy necessary @@ -163,11 +163,8 @@ def _init_dict(self, data, index, columns, dtype=None): # TODO: figure out how to handle this case, all nan's? # add in any other columns we want to have (completeness) - nan_vec = np.empty(len(index)) - nan_vec.fill(nan) - for c in columns: - if c not in sdict: - sdict[c] = sp_maker(nan_vec) + nan_arr = sp_maker(np.full(len(index), np.nan)) + sdict.update((c, nan_arr) for c in columns if c not in sdict) return to_manager(sdict, columns, index) diff --git a/pandas/tests/reshape/test_reshape.py b/pandas/tests/reshape/test_reshape.py index d47a95924bd10..632d3b4ad2e7a 100644 --- a/pandas/tests/reshape/test_reshape.py +++ b/pandas/tests/reshape/test_reshape.py @@ -643,6 +643,10 @@ def test_dataframe_dummies_preserve_categorical_dtype(self): class TestGetDummiesSparse(TestGetDummies): sparse = True + @pytest.mark.xfail(reason='nan in index is problematic (GH 16894)') + def test_include_na(self): + super(TestGetDummiesSparse, self).test_include_na() + class TestMakeAxisDummies(object): diff --git a/pandas/tests/sparse/test_frame.py b/pandas/tests/sparse/test_frame.py index 654d12b782f37..a5d514644a8f1 100644 --- a/pandas/tests/sparse/test_frame.py +++ b/pandas/tests/sparse/test_frame.py @@ -1095,6 +1095,8 @@ def test_as_blocks(self): assert list(df_blocks.keys()) == ['float64'] tm.assert_frame_equal(df_blocks['float64'], df) + @pytest.mark.xfail(reason='nan column names in _init_dict problematic ' + '(GH 16894)') def test_nan_columnname(self): # GH 8822 nan_colname = DataFrame(Series(1.0, index=[0]), columns=[nan]) From dc54b6bbfd1da0947f3b66d4919e4b80e3207bce Mon Sep 17 00:00:00 2001 From: gfyoung Date: Mon, 17 Jul 2017 16:18:55 -0700 Subject: [PATCH 785/933] MAINT: Drop line_width and height from options (#16993) Deprecated since 0.11 and 0.12 respectively. --- doc/source/options.rst | 2 -- doc/source/whatsnew/v0.21.0.txt | 2 ++ pandas/core/config_init.py | 16 ---------------- pandas/io/formats/console.py | 4 ++-- pandas/tests/io/formats/test_format.py | 2 +- 5 files changed, 5 insertions(+), 21 deletions(-) diff --git a/doc/source/options.rst b/doc/source/options.rst index f373705a96f48..c585da64efece 100644 --- a/doc/source/options.rst +++ b/doc/source/options.rst @@ -304,7 +304,6 @@ display.float_format None The callable should accept a fl This is used in some places like SeriesFormatter. See core.format.EngFormatter for an example. -display.height 60 Deprecated. Use `display.max_rows` instead. display.large_repr truncate For DataFrames exceeding max_rows/max_cols, the repr (and HTML repr) can show a truncated table (the default from 0.13), @@ -323,7 +322,6 @@ display.latex.multicolumn_format 'l' Alignment of multicolumn labels display.latex.multirow False Combines rows when using a MultiIndex. Centered instead of top-aligned, separated by clines. -display.line_width 80 Deprecated. Use `display.width` instead. display.max_columns 20 max_rows and max_columns are used in __repr__() methods to decide if to_string() or info() is used to diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 935e9d740b91c..c63d4575bac43 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -124,6 +124,8 @@ Removal of prior version deprecations/changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - :func:`read_excel()` has dropped the ``has_index_names`` parameter (:issue:`10967`) +- The ``pd.options.display.height`` configuration has been dropped (:issue:`3663`) +- The ``pd.options.display.line_width`` configuration has been dropped (:issue:`2881`) - The ``pd.options.display.mpl_style`` configuration has been dropped (:issue:`12190`) - ``Index`` has dropped the ``.sym_diff()`` method in favor of ``.symmetric_difference()`` (:issue:`12591`) - ``Categorical`` has dropped the ``.order()`` and ``.sort()`` methods in favor of ``.sort_values()`` (:issue:`12882`) diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py index ae3001564a62f..06ce811703a8c 100644 --- a/pandas/core/config_init.py +++ b/pandas/core/config_init.py @@ -213,14 +213,6 @@ def use_numexpr_cb(key): (currently both are identical) """ -pc_line_width_deprecation_warning = """\ -line_width has been deprecated, use display.width instead (currently both are -identical) -""" - -pc_height_deprecation_warning = """\ -height has been deprecated. -""" pc_width_doc = """ : int @@ -383,14 +375,6 @@ def table_schema_cb(key): cf.register_option('html.border', 1, pc_html_border_doc, validator=is_int) - -cf.deprecate_option('display.line_width', - msg=pc_line_width_deprecation_warning, - rkey='display.width') - -cf.deprecate_option('display.height', msg=pc_height_deprecation_warning, - rkey='display.max_rows') - with cf.config_prefix('html'): cf.register_option('border', 1, pc_html_border_doc, validator=is_int) diff --git a/pandas/io/formats/console.py b/pandas/io/formats/console.py index ab75e3fa253ce..bdff59939a4de 100644 --- a/pandas/io/formats/console.py +++ b/pandas/io/formats/console.py @@ -53,7 +53,7 @@ def get_console_size(): display_width = get_option('display.width') # deprecated. - display_height = get_option('display.height', silent=True) + display_height = get_option('display.max_rows') # Consider # interactive shell terminal, can detect term size @@ -71,7 +71,7 @@ def get_console_size(): # match default for width,height in config_init from pandas.core.config import get_default_val terminal_width = get_default_val('display.width') - terminal_height = get_default_val('display.height') + terminal_height = get_default_val('display.max_rows') else: # pure terminal terminal_width, terminal_height = get_terminal_size() diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index 679d43ac492ca..e1499565ce4a6 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -302,7 +302,7 @@ def test_repr_non_interactive(self): df = DataFrame('hello', lrange(1000), lrange(5)) with option_context('mode.sim_interactive', False, 'display.width', 0, - 'display.height', 0, 'display.max_rows', 5000): + 'display.max_rows', 5000): assert not has_truncated_repr(df) assert not has_expanded_repr(df) From 81f8acef11e8d1e2f0ea78a7b57ee04bef1f6038 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Mon, 17 Jul 2017 16:29:57 -0700 Subject: [PATCH 786/933] COMPAT: Add back remove_na for seaborn (#16992) Closes gh-16971. --- pandas/core/series.py | 12 +++++++++++- pandas/tests/series/test_missing.py | 6 ++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 4d5b718ce0ae9..219eca4277f32 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -37,7 +37,6 @@ maybe_convert_platform, maybe_cast_to_datetime, maybe_castable) from pandas.core.dtypes.missing import isnull, notnull, remove_na_arraylike - from pandas.core.common import (is_bool_indexer, _default_index, _asarray_tuplesafe, @@ -88,6 +87,17 @@ versionadded_to_excel='\n .. versionadded:: 0.20.0\n') +# see gh-16971 +def remove_na(arr): + """ + DEPRECATED : this function will be removed in a future version. + """ + + warnings.warn("remove_na is deprecated and is a private " + "function. Do not use.", FutureWarning, stacklevel=2) + return remove_na_arraylike(arr) + + def _coerce_method(converter): """ install the scalar coercion methods """ diff --git a/pandas/tests/series/test_missing.py b/pandas/tests/series/test_missing.py index 8e73c17684a16..b5948e75aa73e 100644 --- a/pandas/tests/series/test_missing.py +++ b/pandas/tests/series/test_missing.py @@ -15,6 +15,7 @@ MultiIndex, Index, Timestamp, NaT, IntervalIndex) from pandas.compat import range from pandas._libs.tslib import iNaT +from pandas.core.series import remove_na from pandas.util.testing import assert_series_equal, assert_frame_equal import pandas.util.testing as tm @@ -50,6 +51,11 @@ def _simple_ts(start, end, freq='D'): class TestSeriesMissingData(TestData): + def test_remove_na_deprecation(self): + # see gh-16971 + with tm.assert_produces_warning(FutureWarning): + remove_na(Series([])) + def test_timedelta_fillna(self): # GH 3371 s = Series([Timestamp('20130101'), Timestamp('20130101'), Timestamp( From 7b9a57fc99fcd63c55b041ea7c76f5c390c12aa0 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Mon, 17 Jul 2017 21:31:42 -0400 Subject: [PATCH 787/933] COMPAT: np.full not available in all versions, xref #16773 (#17000) --- pandas/core/sparse/frame.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/core/sparse/frame.py b/pandas/core/sparse/frame.py index e157ae16e71f9..5fe96d70fc16f 100644 --- a/pandas/core/sparse/frame.py +++ b/pandas/core/sparse/frame.py @@ -163,7 +163,9 @@ def _init_dict(self, data, index, columns, dtype=None): # TODO: figure out how to handle this case, all nan's? # add in any other columns we want to have (completeness) - nan_arr = sp_maker(np.full(len(index), np.nan)) + nan_arr = np.empty(len(index), dtype='float64') + nan_arr.fill(np.nan) + nan_arr = sp_maker(nan_arr) sdict.update((c, nan_arr) for c in columns if c not in sdict) return to_manager(sdict, columns, index) From fcb0263762a31724ba6db39bf1564569dda068a0 Mon Sep 17 00:00:00 2001 From: Lucas Kushner Date: Tue, 18 Jul 2017 00:01:26 -0500 Subject: [PATCH 788/933] DOC, TST: Clarify whitespace behavior in read_fwf documentation (#16950) Closes gh-16772 --- doc/source/io.rst | 6 ++++- pandas/io/parsers.py | 13 ++++++----- pandas/tests/io/parser/test_read_fwf.py | 29 +++++++++++++++++++++++++ 3 files changed, 41 insertions(+), 7 deletions(-) diff --git a/doc/source/io.rst b/doc/source/io.rst index 9bf84e5419ffa..495d4e9c3a5a3 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -1258,7 +1258,8 @@ Files with Fixed Width Columns While ``read_csv`` reads delimited data, the :func:`read_fwf` function works with data files that have known and fixed column widths. The function parameters -to ``read_fwf`` are largely the same as `read_csv` with two extra parameters: +to ``read_fwf`` are largely the same as `read_csv` with two extra parameters, and +a different usage of the ``delimiter`` parameter: - ``colspecs``: A list of pairs (tuples) giving the extents of the fixed-width fields of each line as half-open intervals (i.e., [from, to[ ). @@ -1267,6 +1268,9 @@ to ``read_fwf`` are largely the same as `read_csv` with two extra parameters: behaviour, if not specified, is to infer. - ``widths``: A list of field widths which can be used instead of 'colspecs' if the intervals are contiguous. + - ``delimiter``: Characters to consider as filler characters in the fixed-width file. + Can be used to specify the filler character of the fields + if it is not spaces (e.g., '~'). .. ipython:: python :suppress: diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 343bc7a74fde8..1e7d9d420b35d 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -63,8 +63,6 @@ file. For file URLs, a host is expected. For instance, a local file could be file ://localhost/path/to/table.csv %s -delimiter : str, default ``None`` - Alternative argument name for sep. delim_whitespace : boolean, default False Specifies whether or not whitespace (e.g. ``' '`` or ``'\t'``) will be used as the sep. Equivalent to setting ``sep='\s+'``. If this option @@ -316,7 +314,9 @@ be used automatically. In addition, separators longer than 1 character and different from ``'\s+'`` will be interpreted as regular expressions and will also force the use of the Python parsing engine. Note that regex - delimiters are prone to ignoring quoted data. Regex example: ``'\r\t'``""" + delimiters are prone to ignoring quoted data. Regex example: ``'\r\t'`` +delimiter : str, default ``None`` + Alternative argument name for sep.""" _read_csv_doc = """ Read CSV (comma-separated) file into DataFrame @@ -341,15 +341,16 @@ widths : list of ints. optional A list of field widths which can be used instead of 'colspecs' if the intervals are contiguous. +delimiter : str, default ``'\t' + ' '`` + Characters to consider as filler characters in the fixed-width file. + Can be used to specify the filler character of the fields + if it is not spaces (e.g., '~'). """ _read_fwf_doc = """ Read a table of fixed-width formatted lines into DataFrame %s - -Also, 'delimiter' is used to specify the filler character of the -fields if it is not spaces (e.g., '~'). """ % (_parser_params % (_fwf_widths, '')) diff --git a/pandas/tests/io/parser/test_read_fwf.py b/pandas/tests/io/parser/test_read_fwf.py index 0bfeb5215f370..ec1d1a2a51cdc 100644 --- a/pandas/tests/io/parser/test_read_fwf.py +++ b/pandas/tests/io/parser/test_read_fwf.py @@ -405,3 +405,32 @@ def test_skiprows_inference_empty(self): with pytest.raises(EmptyDataError): read_fwf(StringIO(test), skiprows=3) + + def test_whitespace_preservation(self): + # Addresses Issue #16772 + data_expected = """ + a ,bbb + cc,dd """ + expected = read_csv(StringIO(data_expected), header=None) + + test_data = """ + a bbb + ccdd """ + result = read_fwf(StringIO(test_data), widths=[3, 3], + header=None, skiprows=[0], delimiter="\n\t") + + tm.assert_frame_equal(result, expected) + + def test_default_delimiter(self): + data_expected = """ +a,bbb +cc,dd""" + expected = read_csv(StringIO(data_expected), header=None) + + test_data = """ +a \tbbb +cc\tdd """ + result = read_fwf(StringIO(test_data), widths=[3, 3], + header=None, skiprows=[0]) + + tm.assert_frame_equal(result, expected) From 9e7666dae3b3b10d987ce154a51c78bcee6e0728 Mon Sep 17 00:00:00 2001 From: chris-b1 Date: Tue, 18 Jul 2017 06:26:44 -0500 Subject: [PATCH 789/933] API: add infer_objects for soft conversions (#16915) * API: add infer_objects for soft conversions * doc fixups * fixups * doc --- doc/source/api.rst | 2 + doc/source/basics.rst | 23 ++++++++- doc/source/whatsnew/v0.21.0.txt | 32 +++++++++++++ pandas/core/generic.py | 56 ++++++++++++++++++++-- pandas/tests/frame/test_block_internals.py | 26 ++++++++++ pandas/tests/series/test_dtypes.py | 18 +++++++ 6 files changed, 153 insertions(+), 4 deletions(-) diff --git a/doc/source/api.rst b/doc/source/api.rst index d6053791d6f4b..77d095a965221 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -270,6 +270,7 @@ Conversion :toctree: generated/ Series.astype + Series.infer_objects Series.copy Series.isnull Series.notnull @@ -777,6 +778,7 @@ Conversion DataFrame.astype DataFrame.convert_objects + DataFrame.infer_objects DataFrame.copy DataFrame.isnull DataFrame.notnull diff --git a/doc/source/basics.rst b/doc/source/basics.rst index d8b1602fb104d..4211b15203721 100644 --- a/doc/source/basics.rst +++ b/doc/source/basics.rst @@ -2024,7 +2024,28 @@ object conversion ~~~~~~~~~~~~~~~~~ pandas offers various functions to try to force conversion of types from the ``object`` dtype to other types. -The following functions are available for one dimensional object arrays or scalars: +In cases where the data is already of the correct type, but stored in an ``object`` array, the +:meth:`~DataFrame.infer_objects` and :meth:`~Series.infer_objects` can be used to soft convert +to the correct type. + + .. ipython:: python + + df = pd.DataFrame([[1, 2], + ['a', 'b'], + [datetime.datetime(2016, 3, 2), datetime.datetime(2016, 3, 2)]]) + df = df.T + df + df.dtypes + +Because the data transposed the original inference stored all columns as object, which +``infer_objects`` will correct. + + .. ipython:: python + + df.infer_objects().dtypes + +The following functions are available for one dimensional object arrays or scalars to perform +hard conversion of objects to a specified type: - :meth:`~pandas.to_numeric` (conversion to numeric dtypes) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index c63d4575bac43..cba3691b25ab1 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -25,6 +25,38 @@ New features - Added ``__fspath__`` method to :class:`~pandas.HDFStore`, :class:`~pandas.ExcelFile`, and :class:`~pandas.ExcelWriter` to work properly with the file system path protocol (:issue:`13823`) + +.. _whatsnew_0210.enhancements.infer_objects: + +``infer_objects`` type conversion +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The `:meth:`~DataFrame.infer_objects` and :meth:`~Series.infer_objects` +methods have been added to perform dtype inference on object columns, replacing +some of the functionality of the deprecated ``convert_objects`` +method. See the documentation :ref:`here ` +for more details. (:issue:`11221`) + +This function only performs soft conversions on object columns, converting Python objects +to native types, but not any coercive conversions. For example: + +.. ipython:: python + + df = pd.DataFrame({'A': [1, 2, 3], + 'B': np.array([1, 2, 3], dtype='object'), + 'C': ['1', '2', '3']}) + df.dtypes + df.infer_objects().dtype + +Note that column ``'C'`` was not converted - only scalar numeric types +will be inferred to a new type. Other types of conversion should be accomplished +using :func:`to_numeric` function (or :func:`to_datetime`, :func:`to_timedelta`). +.. ipython:: python + + df = df.infer_objects() + df['C'] = pd.to_numeric(df['C'], errors='coerce') + df.dtypes + .. _whatsnew_0210.enhancements.other: Other Enhancements diff --git a/pandas/core/generic.py b/pandas/core/generic.py index f12592feaa4c3..c95129bdaa005 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3671,9 +3671,12 @@ def convert_objects(self, convert_dates=True, convert_numeric=False, converted : same as input object """ from warnings import warn - warn("convert_objects is deprecated. Use the data-type specific " - "converters pd.to_datetime, pd.to_timedelta and pd.to_numeric.", - FutureWarning, stacklevel=2) + msg = ("convert_objects is deprecated. To re-infer data dtypes for " + "object columns, use {klass}.infer_objects()\nFor all " + "other conversions use the data-type specific converters " + "pd.to_datetime, pd.to_timedelta and pd.to_numeric." + ).format(klass=self.__class__.__name__) + warn(msg, FutureWarning, stacklevel=2) return self._constructor( self._data.convert(convert_dates=convert_dates, @@ -3681,6 +3684,53 @@ def convert_objects(self, convert_dates=True, convert_numeric=False, convert_timedeltas=convert_timedeltas, copy=copy)).__finalize__(self) + def infer_objects(self): + """ + Attempt to infer better dtypes for object columns. + + Attempts soft conversion of object-dtyped + columns, leaving non-object and unconvertible + columns unchanged. The inference rules are the + same as during normal Series/DataFrame construction. + + .. versionadded:: 0.20.0 + + See Also + -------- + pandas.to_datetime : Convert argument to datetime. + pandas.to_timedelta : Convert argument to timedelta. + pandas.to_numeric : Convert argument to numeric typeR + + Returns + ------- + converted : same type as input object + + Examples + -------- + >>> df = pd.DataFrame({"A": ["a", 1, 2, 3]}) + >>> df = df.iloc[1:] + >>> df + A + 1 1 + 2 2 + 3 3 + + >>> df.dtypes + A object + dtype: object + + >>> df.infer_objects().dtypes + A int64 + dtype: object + """ + # numeric=False necessary to only soft convert; + # python objects will still be converted to + # native numpy numeric types + return self._constructor( + self._data.convert(datetime=True, numeric=False, + timedelta=True, coerce=False, + copy=True)).__finalize__(self) + # ---------------------------------------------------------------------- # Filling NA's diff --git a/pandas/tests/frame/test_block_internals.py b/pandas/tests/frame/test_block_internals.py index c1a5b437be5d0..f66070fd66813 100644 --- a/pandas/tests/frame/test_block_internals.py +++ b/pandas/tests/frame/test_block_internals.py @@ -495,6 +495,32 @@ def test_convert_objects_no_conversion(self): mixed2 = mixed1._convert(datetime=True) assert_frame_equal(mixed1, mixed2) + def test_infer_objects(self): + # GH 11221 + df = DataFrame({'a': ['a', 1, 2, 3], + 'b': ['b', 2.0, 3.0, 4.1], + 'c': ['c', datetime(2016, 1, 1), + datetime(2016, 1, 2), + datetime(2016, 1, 3)], + 'd': [1, 2, 3, 'd']}, + columns=['a', 'b', 'c', 'd']) + df = df.iloc[1:].infer_objects() + + assert df['a'].dtype == 'int64' + assert df['b'].dtype == 'float64' + assert df['c'].dtype == 'M8[ns]' + assert df['d'].dtype == 'object' + + expected = DataFrame({'a': [1, 2, 3], + 'b': [2.0, 3.0, 4.1], + 'c': [datetime(2016, 1, 1), + datetime(2016, 1, 2), + datetime(2016, 1, 3)], + 'd': [2, 3, 'd']}, + columns=['a', 'b', 'c', 'd']) + # reconstruct frame to verify inference is same + tm.assert_frame_equal(df.reset_index(drop=True), expected) + def test_stale_cached_series_bug_473(self): # this is chained, but ok diff --git a/pandas/tests/series/test_dtypes.py b/pandas/tests/series/test_dtypes.py index 2ec579842e33f..c214280ee8386 100644 --- a/pandas/tests/series/test_dtypes.py +++ b/pandas/tests/series/test_dtypes.py @@ -268,3 +268,21 @@ def test_series_to_categorical(self): expected = Series(['a', 'b', 'c'], dtype='category') tm.assert_series_equal(result, expected) + + def test_infer_objects_series(self): + # GH 11221 + actual = Series(np.array([1, 2, 3], dtype='O')).infer_objects() + expected = Series([1, 2, 3]) + tm.assert_series_equal(actual, expected) + + actual = Series(np.array([1, 2, 3, None], dtype='O')).infer_objects() + expected = Series([1., 2., 3., np.nan]) + tm.assert_series_equal(actual, expected) + + # only soft conversions, uncovertable pass thru unchanged + actual = (Series(np.array([1, 2, 3, None, 'a'], dtype='O')) + .infer_objects()) + expected = Series([1, 2, 3, None, 'a']) + + assert actual.dtype == 'object' + tm.assert_series_equal(actual, expected) From 6a5e56dc9402136e74e8c818a6947fd495bcd3b2 Mon Sep 17 00:00:00 2001 From: Jon Crall Date: Tue, 18 Jul 2017 11:58:55 -0400 Subject: [PATCH 790/933] BUG: np.inf now causes Index to upcast from int to float (#16996) Closes gh-16957. --- doc/source/whatsnew/v0.21.0.txt | 1 + pandas/core/indexes/base.py | 6 +-- pandas/tests/indexing/test_indexing.py | 56 ++++++++++++++++++++++++++ 3 files changed, 60 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index cba3691b25ab1..2259eb7d89534 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -179,6 +179,7 @@ Bug Fixes ~~~~~~~~~ - Fixes regression in 0.20, :func:`Series.aggregate` and :func:`DataFrame.aggregate` allow dictionaries as return values again (:issue:`16741`) +- Fixes bug where indexing with ``np.inf`` caused an ``OverflowError`` to be raised (:issue:`16957`) Conversion ^^^^^^^^^^ diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index bbbc19b36964d..5d50f961927c7 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -666,7 +666,7 @@ def _try_convert_to_int_index(cls, data, copy, name): res = data.astype('u8', copy=False) if (res == data).all(): return UInt64Index(res, copy=copy, name=name) - except (TypeError, ValueError): + except (OverflowError, TypeError, ValueError): pass raise ValueError @@ -1640,7 +1640,7 @@ def __contains__(self, key): hash(key) try: return key in self._engine - except (TypeError, ValueError): + except (OverflowError, TypeError, ValueError): return False _index_shared_docs['contains'] = """ @@ -3365,7 +3365,7 @@ def _maybe_cast_indexer(self, key): ckey = int(key) if ckey == key: key = ckey - except (ValueError, TypeError): + except (OverflowError, ValueError, TypeError): pass return key diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index 9fa677eb624ae..98f5d5eb140df 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -63,6 +63,34 @@ def f(): pytest.raises(ValueError, f) + def test_inf_upcast(self): + # GH 16957 + # We should be able to use np.inf as a key + # np.inf should cause an index to convert to float + + # Test with np.inf in rows + df = pd.DataFrame(columns=[0]) + df.loc[1] = 1 + df.loc[2] = 2 + df.loc[np.inf] = 3 + + # make sure we can look up the value + assert df.loc[np.inf, 0] == 3 + + result = df.index + expected = pd.Float64Index([1, 2, np.inf]) + tm.assert_index_equal(result, expected) + + # Test with np.inf in columns + df = pd.DataFrame() + df.loc[0, 0] = 1 + df.loc[1, 1] = 2 + df.loc[0, np.inf] = 3 + + result = df.columns + expected = pd.Float64Index([0, 1, np.inf]) + tm.assert_index_equal(result, expected) + def test_setitem_dtype_upcast(self): # GH3216 @@ -542,6 +570,34 @@ def test_astype_assignment_with_dups(self): # result = df.get_dtype_counts().sort_index() # expected = Series({'float64': 2, 'object': 1}).sort_index() + @pytest.mark.parametrize("index,val", [ + (pd.Index([0, 1, 2]), 2), + (pd.Index([0, 1, '2']), '2'), + (pd.Index([0, 1, 2, np.inf, 4]), 4), + (pd.Index([0, 1, 2, np.nan, 4]), 4), + (pd.Index([0, 1, 2, np.inf]), np.inf), + (pd.Index([0, 1, 2, np.nan]), np.nan), + ]) + def test_index_contains(self, index, val): + assert val in index + + @pytest.mark.parametrize("index,val", [ + (pd.Index([0, 1, 2]), '2'), + (pd.Index([0, 1, '2']), 2), + (pd.Index([0, 1, 2, np.inf]), 4), + (pd.Index([0, 1, 2, np.nan]), 4), + (pd.Index([0, 1, 2, np.inf]), np.nan), + (pd.Index([0, 1, 2, np.nan]), np.inf), + # Checking if np.inf in Int64Index should not cause an OverflowError + # Related to GH 16957 + (pd.Int64Index([0, 1, 2]), np.inf), + (pd.Int64Index([0, 1, 2]), np.nan), + (pd.UInt64Index([0, 1, 2]), np.inf), + (pd.UInt64Index([0, 1, 2]), np.nan), + ]) + def test_index_not_contains(self, index, val): + assert val not in index + def test_index_type_coercion(self): with catch_warnings(record=True): From 34210ac4d8c61ec4d695baba24d84bd7a1826af4 Mon Sep 17 00:00:00 2001 From: parchd-1 Date: Tue, 18 Jul 2017 18:08:03 +0200 Subject: [PATCH 791/933] DOC: Make highlight functions match documentation (#16999) Closes gh-16998. --- pandas/io/formats/style.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index b08d3877f3b03..d88a230b42403 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -1054,9 +1054,9 @@ def highlight_max(self, subset=None, color='yellow', axis=0): subset: IndexSlice, default None a valid slice for ``data`` to limit the style application to color: str, default 'yellow' - axis: int, str, or None; default None - 0 or 'index' for columnwise, 1 or 'columns' for rowwise - or ``None`` for tablewise (the default) + axis: int, str, or None; default 0 + 0 or 'index' for columnwise (default), 1 or 'columns' for rowwise, + or ``None`` for tablewise Returns ------- @@ -1076,9 +1076,9 @@ def highlight_min(self, subset=None, color='yellow', axis=0): subset: IndexSlice, default None a valid slice for ``data`` to limit the style application to color: str, default 'yellow' - axis: int, str, or None; default None - 0 or 'index' for columnwise, 1 or 'columns' for rowwise - or ``None`` for tablewise (the default) + axis: int, str, or None; default 0 + 0 or 'index' for columnwise (default), 1 or 'columns' for rowwise, + or ``None`` for tablewise Returns ------- From a1dfb037de79e6982a0e7ccf883e5af11e9cc843 Mon Sep 17 00:00:00 2001 From: Morgan Stuart Date: Tue, 18 Jul 2017 19:31:51 -0400 Subject: [PATCH 792/933] BUG: Large object array isin closes #16012 Author: Morgan Stuart Closes #16969 from Morgan243/large_array_isin and squashes the following commits: 31cb4b3 [Morgan Stuart] Removed unneeded details from whatsnew description 4b59745 [Morgan Stuart] Linting errors; additional test clarification 186607b [Morgan Stuart] BUG #16012 - fix isin for large object arrays --- doc/source/whatsnew/v0.21.0.txt | 5 +++-- pandas/core/algorithms.py | 5 ++++- pandas/tests/series/test_analytics.py | 12 ++++++++++++ 3 files changed, 19 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 2259eb7d89534..c1133aee3b4a2 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -178,8 +178,6 @@ Performance Improvements Bug Fixes ~~~~~~~~~ -- Fixes regression in 0.20, :func:`Series.aggregate` and :func:`DataFrame.aggregate` allow dictionaries as return values again (:issue:`16741`) -- Fixes bug where indexing with ``np.inf`` caused an ``OverflowError`` to be raised (:issue:`16957`) Conversion ^^^^^^^^^^ @@ -193,6 +191,7 @@ Indexing - Fixes regression in 0.20.3 when indexing with a string on a ``TimedeltaIndex`` (:issue:`16896`). - Fixed :func:`TimedeltaIndex.get_loc` handling of ``np.timedelta64`` inputs (:issue:`16909`). - Fix :func:`MultiIndex.sort_index` ordering when ``ascending`` argument is a list, but not all levels are specified, or are in a different order (:issue:`16934`). +- Fixes bug where indexing with ``np.inf`` caused an ``OverflowError`` to be raised (:issue:`16957`) I/O ^^^ @@ -222,6 +221,8 @@ Sparse Reshaping ^^^^^^^^^ - Joining/Merging with a non unique ``PeriodIndex`` raised a TypeError (:issue:`16871`) +- Bug when using :func:`isin` on a large object series and large comparison array (:issue:`16012`) +- Fixes regression from 0.20, :func:`Series.aggregate` and :func:`DataFrame.aggregate` allow dictionaries as return values again (:issue:`16741`) Numeric diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index b490bf787a037..4ee2c54000fb6 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -402,7 +402,10 @@ def isin(comps, values): # work-around for numpy < 1.8 and comparisions on py3 # faster for larger cases to use np.in1d f = lambda x, y: htable.ismember_object(x, values) - if (_np_version_under1p8 and compat.PY3) or len(comps) > 1000000: + # GH16012 + # Ensure np.in1d doesn't get object types or it *may* throw an exception + if ((_np_version_under1p8 and compat.PY3) or len(comps) > 1000000 and + not is_object_dtype(comps)): f = lambda x, y: np.in1d(x, y) elif is_integer_dtype(comps): try: diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index 749af1c56a7f0..ab75dbf1b51cc 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -1092,6 +1092,18 @@ def test_isin(self): expected = Series([True, False, True, False, False, False, True, True]) assert_series_equal(result, expected) + # GH: 16012 + # This specific issue has to have a series over 1e6 in len, but the + # comparison array (in_list) must be large enough so that numpy doesn't + # do a manual masking trick that will avoid this issue altogether + s = Series(list('abcdefghijk' * 10 ** 5)) + # If numpy doesn't do the manual comparison/mask, these + # unorderable mixed types are what cause the exception in numpy + in_list = [-1, 'a', 'b', 'G', 'Y', 'Z', 'E', + 'K', 'E', 'S', 'I', 'R', 'R'] * 6 + + assert s.isin(in_list).sum() == 200000 + def test_isin_with_string_scalar(self): # GH4763 s = Series(['A', 'B', 'C', 'a', 'B', 'B', 'A', 'C']) From 01d7be51132b31771ff5b5c7a9c333557a902e8e Mon Sep 17 00:00:00 2001 From: ri938 Date: Tue, 18 Jul 2017 19:49:47 -0400 Subject: [PATCH 793/933] BUG: reindex would throw when a categorical index was empty #16770 closes #16770 Author: ri938 Author: Jeff Reback Author: Tuan Author: Forbidden Donut This patch had conflicts when merged, resolved by Committer: Jeff Reback Closes #16820 from ri938/bug_issue16770 and squashes the following commits: 0e2d315 [ri938] Merge branch 'master' into bug_issue16770 9802288 [ri938] Update v0.20.3.txt 1f2865e [ri938] Update v0.20.3.txt 83fd749 [ri938] Update v0.20.3.txt eab3192 [ri938] Merge branch 'master' into bug_issue16770 7acc09f [ri938] Minor correction to previous submit 6e8f1b3 [ri938] Minor corrections to previous submit (#16820) 9ed80f0 [ri938] Bring documentation into line with master branch. 26e1a60 [ri938] Move documentation of change to the next major release 0.21.0 59b17cd [Jeff Reback] BUG: rolling.cov with multi-index columns should presever the MI (#16825) 5362447 [Tuan] fix BUG: ValueError when performing rolling covariance on multi indexed DataFrame (#16814) 800b40d [ri938] BUG: render dataframe as html do not produce duplicate element id's (#16780) (#16801) a725fbf [Forbidden Donut] BUG: Fix read of py3 PeriodIndex DataFrame HDF made in py2 (#16781) (#16790) 8f8e3d6 [ri938] TST: register slow marker (#16797) 0645868 [ri938] Add backticks in documentation 0a20024 [ri938] Minor correction to previous submit 69454ec [ri938] Minor corrections to previous submit (#16820) 3092bbc [ri938] BUG: reindex would throw when a categorical index was empty #16770 --- doc/source/whatsnew/v0.21.0.txt | 1 + pandas/core/indexes/category.py | 7 +++++-- pandas/tests/indexes/test_category.py | 8 ++++++++ 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index c1133aee3b4a2..b02613b12ba38 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -192,6 +192,7 @@ Indexing - Fixed :func:`TimedeltaIndex.get_loc` handling of ``np.timedelta64`` inputs (:issue:`16909`). - Fix :func:`MultiIndex.sort_index` ordering when ``ascending`` argument is a list, but not all levels are specified, or are in a different order (:issue:`16934`). - Fixes bug where indexing with ``np.inf`` caused an ``OverflowError`` to be raised (:issue:`16957`) +- Bug in reindexing on an empty ``CategoricalIndex`` (:issue:`16770`) I/O ^^^ diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index c9e0e3b10875c..e8427f847dd2d 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -419,7 +419,11 @@ def reindex(self, target, method=None, level=None, limit=None, raise ValueError("cannot reindex with a non-unique indexer") indexer, missing = self.get_indexer_non_unique(np.array(target)) - new_target = self.take(indexer) + + if len(self.codes): + new_target = self.take(indexer) + else: + new_target = target # filling in missing if needed if len(missing): @@ -430,7 +434,6 @@ def reindex(self, target, method=None, level=None, limit=None, result = Index(np.array(self), name=self.name) new_target, indexer, _ = result._reindex_non_unique( np.array(target)) - else: codes = new_target.codes.copy() diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py index 14f344acbefb2..e8d780e041316 100644 --- a/pandas/tests/indexes/test_category.py +++ b/pandas/tests/indexes/test_category.py @@ -419,6 +419,14 @@ def test_reindex_dtype(self): tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2], dtype=np.intp)) + def test_reindex_empty_index(self): + # See GH16770 + c = CategoricalIndex([]) + res, indexer = c.reindex(['a', 'b']) + tm.assert_index_equal(res, Index(['a', 'b']), exact=True) + tm.assert_numpy_array_equal(indexer, + np.array([-1, -1], dtype=np.intp)) + def test_duplicates(self): idx = CategoricalIndex([0, 0, 0], name='foo') From e5de21a991408b3d3783489989201826af8ada67 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Tue, 18 Jul 2017 17:45:01 -0700 Subject: [PATCH 794/933] BUG: Don't with empty Series for .isin (#17006) Empty Series initializes to float64, even when the data type is object for .isin, leading to an error with membership. Closes gh-16991. --- doc/source/whatsnew/v0.21.0.txt | 1 + pandas/core/algorithms.py | 2 ++ pandas/tests/frame/test_analytics.py | 9 ++++++--- pandas/tests/indexes/test_base.py | 9 +++++++++ pandas/tests/series/test_analytics.py | 9 +++++++++ pandas/tests/test_algos.py | 9 +++++++++ 6 files changed, 36 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index b02613b12ba38..4801e5c5300e7 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -239,3 +239,4 @@ Categorical Other ^^^^^ - Bug in :func:`eval` where the ``inplace`` parameter was being incorrectly handled (:issue:`16732`) +- Bug in ``.isin()`` in which checking membership in empty ``Series`` objects raised an error (:issue:`16991`) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 4ee2c54000fb6..79beb95d93ea1 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -65,6 +65,8 @@ def _ensure_data(values, dtype=None): # we check some simple dtypes first try: + if is_object_dtype(dtype): + return _ensure_object(np.asarray(values)), 'object', 'object' if is_bool_dtype(values) or is_bool_dtype(dtype): # we are actually coercing to uint64 # until our algos suppport uint8 directly (see TODO) diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index b09325bfa2ddc..da1c68005b9b2 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -1151,10 +1151,13 @@ def test_isin(self): expected = DataFrame([df.loc[s].isin(other) for s in df.index]) tm.assert_frame_equal(result, expected) - def test_isin_empty(self): + @pytest.mark.parametrize("empty", [[], Series(), np.array([])]) + def test_isin_empty(self, empty): + # see gh-16991 df = DataFrame({'A': ['a', 'b', 'c'], 'B': ['a', 'e', 'f']}) - result = df.isin([]) - expected = pd.DataFrame(False, df.index, df.columns) + expected = DataFrame(False, df.index, df.columns) + + result = df.isin(empty) tm.assert_frame_equal(result, expected) def test_isin_dict(self): diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 18dbe6624008a..692cdd4957947 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -1407,6 +1407,15 @@ def check_idx(idx): # Float64Index overrides isin, so must be checked separately check_idx(Float64Index([1.0, 2.0, 3.0, 4.0])) + @pytest.mark.parametrize("empty", [[], Series(), np.array([])]) + def test_isin_empty(self, empty): + # see gh-16991 + idx = Index(["a", "b"]) + expected = np.array([False, False]) + + result = idx.isin(empty) + tm.assert_numpy_array_equal(expected, result) + def test_boolean_cmp(self): values = [1, 2, 3, 4] diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index ab75dbf1b51cc..7aab7df7169d4 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -1147,6 +1147,15 @@ def test_isin_with_i8(self): result = s.isin(s[0:2]) assert_series_equal(result, expected) + @pytest.mark.parametrize("empty", [[], Series(), np.array([])]) + def test_isin_empty(self, empty): + # see gh-16991 + s = Series(["a", "b"]) + expected = Series([False, False]) + + result = s.isin(empty) + tm.assert_series_equal(expected, result) + def test_timedelta64_analytics(self): from pandas import date_range diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index 993dcc4f527b2..4588bf17fdbeb 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -597,6 +597,15 @@ def test_categorical_from_codes(self): result = algos.isin(Sd, St) tm.assert_numpy_array_equal(expected, result) + @pytest.mark.parametrize("empty", [[], pd.Series(), np.array([])]) + def test_empty(self, empty): + # see gh-16991 + vals = pd.Index(["a", "b"]) + expected = np.array([False, False]) + + result = algos.isin(vals, empty) + tm.assert_numpy_array_equal(expected, result) + class TestValueCounts(object): From aead041fece0ef17a81218585329109e17b5deb9 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Tue, 18 Jul 2017 19:51:57 -0700 Subject: [PATCH 795/933] ENH: Use 'Y' as an alias for end of year (#16978) Closes gh-9313 Redo of gh-16958 --- doc/source/timeseries.rst | 4 +- doc/source/whatsnew/v0.21.0.txt | 2 + .../indexes/datetimes/test_date_range.py | 25 +++++++++++ pandas/tests/tseries/test_frequencies.py | 41 +++++++++++++------ pandas/tseries/frequencies.py | 24 +++++++++-- 5 files changed, 78 insertions(+), 18 deletions(-) diff --git a/doc/source/timeseries.rst b/doc/source/timeseries.rst index 1dd80aec4fd6c..8f02a86adbd48 100644 --- a/doc/source/timeseries.rst +++ b/doc/source/timeseries.rst @@ -1092,9 +1092,9 @@ frequencies. We will refer to these aliases as *offset aliases* "BQ", "business quarter endfrequency" "QS", "quarter start frequency" "BQS", "business quarter start frequency" - "A", "year end frequency" + "A, Y", "year end frequency" "BA", "business year end frequency" - "AS", "year start frequency" + "AS, YS", "year start frequency" "BAS", "business year start frequency" "BH", "business hour frequency" "H", "hourly frequency" diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 4801e5c5300e7..9a6016c82e794 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -72,6 +72,8 @@ Other Enhancements - :func:`DataFrame.clip()` and :func:`Series.clip()` have gained an ``inplace`` argument. (:issue:`15388`) - :func:`crosstab` has gained a ``margins_name`` parameter to define the name of the row / column that will contain the totals when ``margins=True``. (:issue:`15972`) - :func:`DataFrame.select_dtypes` now accepts scalar values for include/exclude as well as list-like. (:issue:`16855`) +- :func:`date_range` now accepts 'YS' in addition to 'AS' as an alias for start of year (:issue:`9313`) +- :func:`date_range` now accepts 'Y' in addition to 'A' as an alias for end of year (:issue:`9313`) .. _whatsnew_0210.api_breaking: diff --git a/pandas/tests/indexes/datetimes/test_date_range.py b/pandas/tests/indexes/datetimes/test_date_range.py index 62686b356dc30..da4ca83c10dda 100644 --- a/pandas/tests/indexes/datetimes/test_date_range.py +++ b/pandas/tests/indexes/datetimes/test_date_range.py @@ -33,6 +33,31 @@ def test_date_range_gen_error(self): rng = date_range('1/1/2000 00:00', '1/1/2000 00:18', freq='5min') assert len(rng) == 4 + @pytest.mark.parametrize("freq", ["AS", "YS"]) + def test_begin_year_alias(self, freq): + # see gh-9313 + rng = date_range("1/1/2013", "7/1/2017", freq=freq) + exp = pd.DatetimeIndex(["2013-01-01", "2014-01-01", + "2015-01-01", "2016-01-01", + "2017-01-01"], freq=freq) + tm.assert_index_equal(rng, exp) + + @pytest.mark.parametrize("freq", ["A", "Y"]) + def test_end_year_alias(self, freq): + # see gh-9313 + rng = date_range("1/1/2013", "7/1/2017", freq=freq) + exp = pd.DatetimeIndex(["2013-12-31", "2014-12-31", + "2015-12-31", "2016-12-31"], freq=freq) + tm.assert_index_equal(rng, exp) + + @pytest.mark.parametrize("freq", ["BA", "BY"]) + def test_business_end_year_alias(self, freq): + # see gh-9313 + rng = date_range("1/1/2013", "7/1/2017", freq=freq) + exp = pd.DatetimeIndex(["2013-12-31", "2014-12-31", + "2015-12-31", "2016-12-30"], freq=freq) + tm.assert_index_equal(rng, exp) + def test_date_range_negative_freq(self): # GH 11018 rng = date_range('2011-12-31', freq='-2A', periods=3) diff --git a/pandas/tests/tseries/test_frequencies.py b/pandas/tests/tseries/test_frequencies.py index 54d12317b0bf8..4bcd0b49db7e0 100644 --- a/pandas/tests/tseries/test_frequencies.py +++ b/pandas/tests/tseries/test_frequencies.py @@ -248,9 +248,10 @@ def test_anchored_shortcuts(self): # ensure invalid cases fail as expected invalid_anchors = ['SM-0', 'SM-28', 'SM-29', - 'SM-FOO', 'BSM', 'SM--1' + 'SM-FOO', 'BSM', 'SM--1', 'SMS-1', 'SMS-28', 'SMS-30', - 'SMS-BAR', 'BSMS', 'SMS--2'] + 'SMS-BAR', 'SMS-BYR' 'BSMS', + 'SMS--2'] for invalid_anchor in invalid_anchors: with tm.assert_raises_regex(ValueError, 'Invalid frequency: '): @@ -292,11 +293,15 @@ def test_get_rule_month(): result = frequencies._get_rule_month('A-DEC') assert (result == 'DEC') + result = frequencies._get_rule_month('Y-DEC') + assert (result == 'DEC') result = frequencies._get_rule_month(offsets.YearEnd()) assert (result == 'DEC') result = frequencies._get_rule_month('A-MAY') assert (result == 'MAY') + result = frequencies._get_rule_month('Y-MAY') + assert (result == 'MAY') result = frequencies._get_rule_month(offsets.YearEnd(month=5)) assert (result == 'MAY') @@ -305,6 +310,10 @@ def test_period_str_to_code(): assert (frequencies._period_str_to_code('A') == 1000) assert (frequencies._period_str_to_code('A-DEC') == 1000) assert (frequencies._period_str_to_code('A-JAN') == 1001) + assert (frequencies._period_str_to_code('Y') == 1000) + assert (frequencies._period_str_to_code('Y-DEC') == 1000) + assert (frequencies._period_str_to_code('Y-JAN') == 1001) + assert (frequencies._period_str_to_code('Q') == 2000) assert (frequencies._period_str_to_code('Q-DEC') == 2000) assert (frequencies._period_str_to_code('Q-FEB') == 2002) @@ -349,6 +358,10 @@ def test_freq_code(self): assert frequencies.get_freq('3A') == 1000 assert frequencies.get_freq('-1A') == 1000 + assert frequencies.get_freq('Y') == 1000 + assert frequencies.get_freq('3Y') == 1000 + assert frequencies.get_freq('-1Y') == 1000 + assert frequencies.get_freq('W') == 4000 assert frequencies.get_freq('W-MON') == 4001 assert frequencies.get_freq('W-FRI') == 4005 @@ -369,6 +382,13 @@ def test_freq_group(self): assert frequencies.get_freq_group('-1A') == 1000 assert frequencies.get_freq_group('A-JAN') == 1000 assert frequencies.get_freq_group('A-MAY') == 1000 + + assert frequencies.get_freq_group('Y') == 1000 + assert frequencies.get_freq_group('3Y') == 1000 + assert frequencies.get_freq_group('-1Y') == 1000 + assert frequencies.get_freq_group('Y-JAN') == 1000 + assert frequencies.get_freq_group('Y-MAY') == 1000 + assert frequencies.get_freq_group(offsets.YearEnd()) == 1000 assert frequencies.get_freq_group(offsets.YearEnd(month=1)) == 1000 assert frequencies.get_freq_group(offsets.YearEnd(month=5)) == 1000 @@ -790,12 +810,6 @@ def test_series(self): for freq in [None, 'L']: s = Series(period_range('2013', periods=10, freq=freq)) pytest.raises(TypeError, lambda: frequencies.infer_freq(s)) - for freq in ['Y']: - - msg = frequencies._INVALID_FREQ_ERROR - with tm.assert_raises_regex(ValueError, msg): - s = Series(period_range('2013', periods=10, freq=freq)) - pytest.raises(TypeError, lambda: frequencies.infer_freq(s)) # DateTimeIndex for freq in ['M', 'L', 'S']: @@ -812,11 +826,12 @@ def test_legacy_offset_warnings(self): 'W@FRI', 'W@SAT', 'W@SUN', 'Q@JAN', 'Q@FEB', 'Q@MAR', 'A@JAN', 'A@FEB', 'A@MAR', 'A@APR', 'A@MAY', 'A@JUN', 'A@JUL', 'A@AUG', 'A@SEP', 'A@OCT', 'A@NOV', 'A@DEC', - 'WOM@1MON', 'WOM@2MON', 'WOM@3MON', 'WOM@4MON', - 'WOM@1TUE', 'WOM@2TUE', 'WOM@3TUE', 'WOM@4TUE', - 'WOM@1WED', 'WOM@2WED', 'WOM@3WED', 'WOM@4WED', - 'WOM@1THU', 'WOM@2THU', 'WOM@3THU', 'WOM@4THU' - 'WOM@1FRI', 'WOM@2FRI', 'WOM@3FRI', 'WOM@4FRI'] + 'Y@JAN', 'WOM@1MON', 'WOM@2MON', 'WOM@3MON', + 'WOM@4MON', 'WOM@1TUE', 'WOM@2TUE', 'WOM@3TUE', + 'WOM@4TUE', 'WOM@1WED', 'WOM@2WED', 'WOM@3WED', + 'WOM@4WED', 'WOM@1THU', 'WOM@2THU', 'WOM@3THU', + 'WOM@4THU', 'WOM@1FRI', 'WOM@2FRI', 'WOM@3FRI', + 'WOM@4FRI'] msg = frequencies._INVALID_FREQ_ERROR for freq in freqs: diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index c5f6c00a4005a..aa33a3849acb3 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -399,10 +399,14 @@ def _get_freq_str(base, mult=1): 'Q': 'Q', 'A': 'A', 'W': 'W', - 'M': 'M' + 'M': 'M', + 'Y': 'A', + 'BY': 'A', + 'YS': 'A', + 'BYS': 'A', } -need_suffix = ['QS', 'BQ', 'BQS', 'AS', 'BA', 'BAS'] +need_suffix = ['QS', 'BQ', 'BQS', 'YS', 'AS', 'BY', 'BA', 'BYS', 'BAS'] for __prefix in need_suffix: for _m in tslib._MONTHS: _offset_to_period_map['%s-%s' % (__prefix, _m)] = \ @@ -427,9 +431,13 @@ def get_period_alias(offset_str): 'Q': 'Q-DEC', 'A': 'A-DEC', # YearEnd(month=12), + 'Y': 'A-DEC', 'AS': 'AS-JAN', # YearBegin(month=1), + 'YS': 'AS-JAN', 'BA': 'BA-DEC', # BYearEnd(month=12), + 'BY': 'BA-DEC', 'BAS': 'BAS-JAN', # BYearBegin(month=1), + 'BYS': 'BAS-JAN', 'Min': 'T', 'min': 'T', @@ -708,7 +716,17 @@ def get_standard_freq(freq): for _k, _v in compat.iteritems(_period_code_map): _reverse_period_code_map[_v] = _k -# Additional aliases +# Yearly aliases +year_aliases = {} + +for k, v in compat.iteritems(_period_code_map): + if k.startswith("A-"): + alias = "Y" + k[1:] + year_aliases[alias] = v + +_period_code_map.update(**year_aliases) +del year_aliases + _period_code_map.update({ "Q": 2000, # Quarterly - December year end (default quarterly) "A": 1000, # Annual From 1dc93b521f54b0259c77e8079b03c6fae791dd24 Mon Sep 17 00:00:00 2001 From: chris-b1 Date: Wed, 19 Jul 2017 04:51:47 -0500 Subject: [PATCH 796/933] DOC: infer_objects doc fixup (#17018) --- doc/source/basics.rst | 5 +++-- doc/source/whatsnew/v0.21.0.txt | 9 +++++---- pandas/core/generic.py | 2 +- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/doc/source/basics.rst b/doc/source/basics.rst index 4211b15203721..aae1fffb7a3b6 100644 --- a/doc/source/basics.rst +++ b/doc/source/basics.rst @@ -2025,11 +2025,12 @@ object conversion pandas offers various functions to try to force conversion of types from the ``object`` dtype to other types. In cases where the data is already of the correct type, but stored in an ``object`` array, the -:meth:`~DataFrame.infer_objects` and :meth:`~Series.infer_objects` can be used to soft convert +:meth:`DataFrame.infer_objects` and :meth:`Series.infer_objects` methods can be used to soft convert to the correct type. .. ipython:: python + import datetime df = pd.DataFrame([[1, 2], ['a', 'b'], [datetime.datetime(2016, 3, 2), datetime.datetime(2016, 3, 2)]]) @@ -2037,7 +2038,7 @@ to the correct type. df df.dtypes -Because the data transposed the original inference stored all columns as object, which +Because the data was transposed the original inference stored all columns as object, which ``infer_objects`` will correct. .. ipython:: python diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 9a6016c82e794..e43a5f9856253 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -31,13 +31,13 @@ New features ``infer_objects`` type conversion ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -The `:meth:`~DataFrame.infer_objects` and :meth:`~Series.infer_objects` +The :meth:`DataFrame.infer_objects` and :meth:`Series.infer_objects` methods have been added to perform dtype inference on object columns, replacing some of the functionality of the deprecated ``convert_objects`` method. See the documentation :ref:`here ` for more details. (:issue:`11221`) -This function only performs soft conversions on object columns, converting Python objects +This method only performs soft conversions on object columns, converting Python objects to native types, but not any coercive conversions. For example: .. ipython:: python @@ -46,11 +46,12 @@ to native types, but not any coercive conversions. For example: 'B': np.array([1, 2, 3], dtype='object'), 'C': ['1', '2', '3']}) df.dtypes - df.infer_objects().dtype + df.infer_objects().dtypes Note that column ``'C'`` was not converted - only scalar numeric types will be inferred to a new type. Other types of conversion should be accomplished -using :func:`to_numeric` function (or :func:`to_datetime`, :func:`to_timedelta`). +using the :func:`to_numeric` function (or :func:`to_datetime`, :func:`to_timedelta`). + .. ipython:: python df = df.infer_objects() diff --git a/pandas/core/generic.py b/pandas/core/generic.py index c95129bdaa005..48006b11993c7 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3693,7 +3693,7 @@ def infer_objects(self): columns unchanged. The inference rules are the same as during normal Series/DataFrame construction. - .. versionadded:: 0.20.0 + .. versionadded:: 0.21.0 See Also -------- From 47e909dc9d619e20b139c43236efde66b52f9d11 Mon Sep 17 00:00:00 2001 From: Eric Stein Date: Wed, 19 Jul 2017 06:17:56 -0400 Subject: [PATCH 797/933] Fixes SparseSeries initiated with dictionary raising AttributeError (#16960) --- doc/source/whatsnew/v0.21.0.txt | 2 +- pandas/core/sparse/series.py | 5 ++--- pandas/tests/sparse/test_series.py | 18 ++++++++++++++++++ 3 files changed, 21 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index e43a5f9856253..5146bd35dff30 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -219,7 +219,7 @@ Groupby/Resample/Rolling Sparse ^^^^^^ - +- Bug in ``SparseSeries`` raises ``AttributeError`` when a dictionary is passed in as data (:issue:`16777`) Reshaping diff --git a/pandas/core/sparse/series.py b/pandas/core/sparse/series.py index 9dd061e26ba06..1bc9cf5379930 100644 --- a/pandas/core/sparse/series.py +++ b/pandas/core/sparse/series.py @@ -146,10 +146,9 @@ def __init__(self, data=None, index=None, sparse_index=None, kind='block', data = data._data elif isinstance(data, (Series, dict)): - if index is None: - index = data.index.view() + data = Series(data, index=index) + index = data.index.view() - data = Series(data) res = make_sparse(data, kind=kind, fill_value=fill_value) data, sparse_index, fill_value = res diff --git a/pandas/tests/sparse/test_series.py b/pandas/tests/sparse/test_series.py index b524d6bfab418..bb56f8a51897a 100644 --- a/pandas/tests/sparse/test_series.py +++ b/pandas/tests/sparse/test_series.py @@ -88,6 +88,24 @@ def setup_method(self, method): self.ziseries2 = SparseSeries(arr, index=index, kind='integer', fill_value=0) + def test_constructor_dict_input(self): + # gh-16905 + constructor_dict = {1: 1.} + index = [0, 1, 2] + + # Series with index passed in + series = pd.Series(constructor_dict) + expected = SparseSeries(series, index=index) + + result = SparseSeries(constructor_dict, index=index) + tm.assert_sp_series_equal(result, expected) + + # Series with index and dictionary with no index + expected = SparseSeries(series) + + result = SparseSeries(constructor_dict) + tm.assert_sp_series_equal(result, expected) + def test_constructor_dtype(self): arr = SparseSeries([np.nan, 1, 2, np.nan]) assert arr.dtype == np.float64 From 5a024494fef4f0afaffa85665370f884857e298d Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Wed, 19 Jul 2017 12:19:30 +0200 Subject: [PATCH 798/933] DOC: Improving docstring of reset_index method (#16416) (#16975) --- pandas/core/frame.py | 119 +++++++++++++++++++++++++++++++++---------- 1 file changed, 91 insertions(+), 28 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 9a79ca1d4eab1..c18aaf25bfde5 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3023,35 +3023,98 @@ def reset_index(self, level=None, drop=False, inplace=False, col_level=0, Examples -------- - >>> df = pd.DataFrame({'a': [1, 2, 3, 4], 'b': [5, 6, 7, 8]}, - ... index=pd.Index(['a', 'b', 'c', 'd'], - ... name='idx')) + >>> df = pd.DataFrame([('bird', 389.0), + ... ('bird', 24.0), + ... ('mammal', 80.5), + ... ('mammal', np.nan)], + ... index=['falcon', 'parrot', 'lion', 'monkey'], + ... columns=('class', 'max_speed')) + >>> df + class max_speed + falcon bird 389.0 + parrot bird 24.0 + lion mammal 80.5 + monkey mammal NaN + + When we reset the index, the old index is added as a column, and a + new sequential index is used: + >>> df.reset_index() - idx a b - 0 a 1 5 - 1 b 2 6 - 2 c 3 7 - 3 d 4 8 - - >>> arrays = [np.array(['bar', 'bar', 'baz', 'baz', 'foo', - ... 'foo', 'qux', 'qux']), - ... np.array(['one', 'two', 'one', 'two', 'one', 'two', - ... 'one', 'two'])] - >>> df2 = pd.DataFrame( - ... np.random.randn(8, 4), - ... index=pd.MultiIndex.from_arrays(arrays, - ... names=['a', 'b'])) - >>> df2.reset_index(level='a') - a 0 1 2 3 - b - one bar -1.099413 0.291838 0.598198 0.162181 - two bar -0.312184 -0.119904 0.250360 0.364378 - one baz 0.713596 -0.490636 0.074967 -0.297857 - two baz 0.998397 0.524499 -2.228976 0.901155 - one foo 0.923204 0.920695 1.264488 1.476921 - two foo -1.566922 0.783278 -0.073656 0.266027 - one qux -0.230470 0.109800 -1.383409 0.048421 - two qux -0.865993 -0.865984 0.705367 -0.170446 + index class max_speed + 0 falcon bird 389.0 + 1 parrot bird 24.0 + 2 lion mammal 80.5 + 3 monkey mammal NaN + + We can use the `drop` parameter to avoid the old index being added as + a column: + + >>> df.reset_index(drop=True) + class max_speed + 0 bird 389.0 + 1 bird 24.0 + 2 mammal 80.5 + 3 mammal NaN + + You can also use `reset_index` with `MultiIndex`. + + >>> index = pd.MultiIndex.from_tuples([('bird', 'falcon'), + ... ('bird', 'parrot'), + ... ('mammal', 'lion'), + ... ('mammal', 'monkey')], + ... names=['class', 'name']) + >>> columns = pd.MultiIndex.from_tuples([('speed', 'max'), + ... ('speed', 'type')]) + >>> df = pd.DataFrame([(389.0, 'fly'), + ... ( 24.0, 'fly'), + ... ( 80.5, 'run'), + ... (np.nan, 'jump')], + ... index=index, + ... columns=columns) + >>> df + speed + max type + class name + bird falcon 389.0 fly + parrot 24.0 fly + mammal lion 80.5 run + monkey NaN jump + + If the index has multiple levels, we can reset a subset of them: + + >>> df.reset_index(level='class') + class speed + max type + name + falcon bird 389.0 fly + parrot bird 24.0 fly + lion mammal 80.5 run + monkey mammal NaN jump + + If we are not dropping the index, by default, it is placed in the top + level. We can place it in another level: + + >>> df.reset_index(level='class', col_level=1) + speed + class max type + name + falcon bird 389.0 fly + parrot bird 24.0 fly + lion mammal 80.5 run + monkey mammal NaN jump + + When the index is inserted under another level, we can specify under + which one with the parameter `col_fill`. If we specify a nonexistent + level, it is created: + + >>> df.reset_index(level='class', col_level=1, col_fill='species') + species speed + class max type + name + falcon bird 389.0 fly + parrot bird 24.0 fly + lion mammal 80.5 run + monkey mammal NaN jump """ inplace = validate_bool_kwarg(inplace, 'inplace') if inplace: From dd1852d59e51e28c2f9b589eefa1916ef5d9bdc9 Mon Sep 17 00:00:00 2001 From: Andrew Date: Wed, 19 Jul 2017 06:22:52 -0400 Subject: [PATCH 799/933] DOC: add warning to append about inefficiency (#17017) --- pandas/core/frame.py | 32 ++++++++++++++++++++++++++++++++ pandas/core/series.py | 12 ++++++++++++ 2 files changed, 44 insertions(+) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index c18aaf25bfde5..e554e136cdb80 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4715,6 +4715,11 @@ def append(self, other, ignore_index=False, verify_integrity=False): the DataFrame's index, the order of the columns in the resulting DataFrame will be unchanged. + Iteratively appending rows to a DataFrame can be more computationally + intensive than a single concatenate. A better solution is to append + those rows to a list and then concatenate the list with the original + DataFrame all at once. + See also -------- pandas.concat : General function to concatenate DataFrame, Series @@ -4745,6 +4750,33 @@ def append(self, other, ignore_index=False, verify_integrity=False): 2 5 6 3 7 8 + The following, while not recommended methods for generating DataFrames, + show two ways to generate a DataFrame from multiple data sources. + + Less efficient: + + >>> df = pd.DataFrame(columns=['A']) + >>> for i in range(5): + ... df = df.append({'A'}: i}, ignore_index=True) + >>> df + A + 0 0 + 1 1 + 2 2 + 3 3 + 4 4 + + More efficient: + + >>> pd.concat([pd.DataFrame([i], columns=['A']) for i in range(5)], + ... ignore_index=True) + A + 0 0 + 1 1 + 2 2 + 3 3 + 4 4 + """ if isinstance(other, (Series, dict)): if isinstance(other, dict): diff --git a/pandas/core/series.py b/pandas/core/series.py index 219eca4277f32..c7ead292c8b63 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1563,6 +1563,18 @@ def append(self, to_append, ignore_index=False, verify_integrity=False): verify_integrity : boolean, default False If True, raise Exception on creating index with duplicates + Notes + ----- + Iteratively appending to a Series can be more computationally intensive + than a single concatenate. A better solution is to append values to a + list and then concatenate the list with the original Series all at + once. + + See also + -------- + pandas.concat : General function to concatenate DataFrame, Series + or Panel objects + Returns ------- appended : Series From af5eafb5905ad9a5eaa43645716cefc684a20813 Mon Sep 17 00:00:00 2001 From: Utkarsh Upadhyay Date: Wed, 19 Jul 2017 18:00:21 +0200 Subject: [PATCH 800/933] DOC : Remove redundant backtick (#17025) --- pandas/core/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 48006b11993c7..b2083a4454f84 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3161,7 +3161,7 @@ def pipe(self, func, *args, **kwargs): _shared_docs['transform'] = (""" Call function producing a like-indexed %(klass)s - and return a %(klass)s with the transformed values` + and return a %(klass)s with the transformed values .. versionadded:: 0.20.0 From 031d7a9fe24f2799454de34a1e595ae4fa6cfc9f Mon Sep 17 00:00:00 2001 From: gfyoung Date: Wed, 19 Jul 2017 09:33:43 -0700 Subject: [PATCH 801/933] DOC: Document business frequency aliases (#17028) Follow-up to gh-16978. --- doc/source/timeseries.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/timeseries.rst b/doc/source/timeseries.rst index 8f02a86adbd48..ce4a920ad77b5 100644 --- a/doc/source/timeseries.rst +++ b/doc/source/timeseries.rst @@ -1093,9 +1093,9 @@ frequencies. We will refer to these aliases as *offset aliases* "QS", "quarter start frequency" "BQS", "business quarter start frequency" "A, Y", "year end frequency" - "BA", "business year end frequency" + "BA, BY", "business year end frequency" "AS, YS", "year start frequency" - "BAS", "business year start frequency" + "BAS, BYS", "business year start frequency" "BH", "business hour frequency" "H", "hourly frequency" "T, min", "minutely frequency" From f511d8237322589c59eff8c16ffe00b8293ff4a1 Mon Sep 17 00:00:00 2001 From: Nick Garvey Date: Wed, 19 Jul 2017 23:48:22 -0700 Subject: [PATCH 802/933] DOC: Fix double back-tick in 'Reshaping by Melt' section (#17030) See current stable docs for the issue: https://pandas.pydata.org/pandas-docs/stable/reshaping.html#reshaping-by-melt The double ` is causing the entire paragraph to be fixed width until the next double `. This commit removes the extra "`" --- doc/source/reshaping.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/reshaping.rst b/doc/source/reshaping.rst index 5f125e329f6f1..3dce73b302c7c 100644 --- a/doc/source/reshaping.rst +++ b/doc/source/reshaping.rst @@ -265,7 +265,7 @@ the right thing: Reshaping by Melt ----------------- -The top-level :func:``melt` and :func:`~DataFrame.melt` functions are useful to +The top-level :func:`melt` and :func:`~DataFrame.melt` functions are useful to massage a DataFrame into a format where one or more columns are identifier variables, while all other columns, considered measured variables, are "unpivoted" to the row axis, leaving just two non-identifier columns, "variable" and "value". The From 8d0c025a4584c0f2d412d060c30fe459dc90b53b Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 20 Jul 2017 03:36:35 -0700 Subject: [PATCH 803/933] Define DataFrame plot methods in DataFrame (#17020) --- pandas/core/frame.py | 34 +++++++++------------------------- pandas/plotting/_core.py | 12 ++++++++++++ 2 files changed, 21 insertions(+), 25 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index e554e136cdb80..4843f3389bf75 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5746,12 +5746,21 @@ def isin(self, values): values).reshape(self.shape), self.index, self.columns) + # ---------------------------------------------------------------------- + # Add plotting methods to DataFrame + plot = base.AccessorProperty(gfx.FramePlotMethods, gfx.FramePlotMethods) + hist = gfx.hist_frame + boxplot = gfx.boxplot_frame + DataFrame._setup_axes(['index', 'columns'], info_axis=1, stat_axis=0, axes_are_reversed=True, aliases={'rows': 0}) DataFrame._add_numeric_operations() DataFrame._add_series_or_dataframe_operations() +ops.add_flex_arithmetic_methods(DataFrame, **ops.frame_flex_funcs) +ops.add_special_arithmetic_methods(DataFrame, **ops.frame_special_funcs) + _EMPTY_SERIES = Series([]) @@ -6097,28 +6106,3 @@ def _from_nested_dict(data): def _put_str(s, space): return ('%s' % s)[:space].ljust(space) - - -# ---------------------------------------------------------------------- -# Add plotting methods to DataFrame -DataFrame.plot = base.AccessorProperty(gfx.FramePlotMethods, - gfx.FramePlotMethods) -DataFrame.hist = gfx.hist_frame - - -@Appender(_shared_docs['boxplot'] % _shared_doc_kwargs) -def boxplot(self, column=None, by=None, ax=None, fontsize=None, rot=0, - grid=True, figsize=None, layout=None, return_type=None, **kwds): - from pandas.plotting._core import boxplot - import matplotlib.pyplot as plt - ax = boxplot(self, column=column, by=by, ax=ax, fontsize=fontsize, - grid=grid, rot=rot, figsize=figsize, layout=layout, - return_type=return_type, **kwds) - plt.draw_if_interactive() - return ax - - -DataFrame.boxplot = boxplot - -ops.add_flex_arithmetic_methods(DataFrame, **ops.frame_flex_funcs) -ops.add_special_arithmetic_methods(DataFrame, **ops.frame_special_funcs) diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index a623288efc1ae..de96d17da2a9f 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -2034,6 +2034,18 @@ def plot_group(keys, values, ax): return result +@Appender(_shared_docs['boxplot'] % _shared_doc_kwargs) +def boxplot_frame(self, column=None, by=None, ax=None, fontsize=None, rot=0, + grid=True, figsize=None, layout=None, + return_type=None, **kwds): + import matplotlib.pyplot as plt + ax = boxplot(self, column=column, by=by, ax=ax, fontsize=fontsize, + grid=grid, rot=rot, figsize=figsize, layout=layout, + return_type=return_type, **kwds) + plt.draw_if_interactive() + return ax + + def scatter_plot(data, x, y, by=None, ax=None, figsize=None, grid=False, **kwargs): """ From 8e582254e3bbdd717ec8193364420913a7fc786d Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Thu, 20 Jul 2017 10:23:39 -0400 Subject: [PATCH 804/933] CLN: move safe_sort from core.algorithms to core.sorting (#17034) COMPAT: safe_sort will only coerce list-likes to object, not a numpy string type xref: https://github.com/pandas-dev/pandas/pull/17003#discussion_r128332208 --- pandas/core/algorithms.py | 100 +------------------------------- pandas/core/indexes/base.py | 5 +- pandas/core/reshape/merge.py | 3 +- pandas/core/sorting.py | 108 ++++++++++++++++++++++++++++++++++- pandas/tests/test_algos.py | 88 ---------------------------- pandas/tests/test_sorting.py | 98 ++++++++++++++++++++++++++++++- 6 files changed, 210 insertions(+), 192 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 79beb95d93ea1..3ccd7216fa81a 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -30,7 +30,6 @@ from pandas.core.dtypes.missing import isnull from pandas.core import common as com -from pandas.compat import string_types from pandas._libs import algos, lib, hashtable as htable from pandas._libs.tslib import iNaT @@ -431,104 +430,6 @@ def isin(comps, values): return f(comps, values) -def safe_sort(values, labels=None, na_sentinel=-1, assume_unique=False): - """ - Sort ``values`` and reorder corresponding ``labels``. - ``values`` should be unique if ``labels`` is not None. - Safe for use with mixed types (int, str), orders ints before strs. - - .. versionadded:: 0.19.0 - - Parameters - ---------- - values : list-like - Sequence; must be unique if ``labels`` is not None. - labels : list_like - Indices to ``values``. All out of bound indices are treated as - "not found" and will be masked with ``na_sentinel``. - na_sentinel : int, default -1 - Value in ``labels`` to mark "not found". - Ignored when ``labels`` is None. - assume_unique : bool, default False - When True, ``values`` are assumed to be unique, which can speed up - the calculation. Ignored when ``labels`` is None. - - Returns - ------- - ordered : ndarray - Sorted ``values`` - new_labels : ndarray - Reordered ``labels``; returned when ``labels`` is not None. - - Raises - ------ - TypeError - * If ``values`` is not list-like or if ``labels`` is neither None - nor list-like - * If ``values`` cannot be sorted - ValueError - * If ``labels`` is not None and ``values`` contain duplicates. - """ - if not is_list_like(values): - raise TypeError("Only list-like objects are allowed to be passed to" - "safe_sort as values") - values = np.asarray(values) - - def sort_mixed(values): - # order ints before strings, safe in py3 - str_pos = np.array([isinstance(x, string_types) for x in values], - dtype=bool) - nums = np.sort(values[~str_pos]) - strs = np.sort(values[str_pos]) - return _ensure_object(np.concatenate([nums, strs])) - - sorter = None - if compat.PY3 and lib.infer_dtype(values) == 'mixed-integer': - # unorderable in py3 if mixed str/int - ordered = sort_mixed(values) - else: - try: - sorter = values.argsort() - ordered = values.take(sorter) - except TypeError: - # try this anyway - ordered = sort_mixed(values) - - # labels: - - if labels is None: - return ordered - - if not is_list_like(labels): - raise TypeError("Only list-like objects or None are allowed to be" - "passed to safe_sort as labels") - labels = _ensure_platform_int(np.asarray(labels)) - - from pandas import Index - if not assume_unique and not Index(values).is_unique: - raise ValueError("values should be unique if labels is not None") - - if sorter is None: - # mixed types - (hash_klass, _), values = _get_data_algo(values, _hashtables) - t = hash_klass(len(values)) - t.map_locations(values) - sorter = _ensure_platform_int(t.lookup(ordered)) - - reverse_indexer = np.empty(len(sorter), dtype=np.int_) - reverse_indexer.put(sorter, np.arange(len(sorter))) - - mask = (labels < -len(values)) | (labels >= len(values)) | \ - (labels == na_sentinel) - - # (Out of bound indices will be masked with `na_sentinel` next, so we may - # deal with them here without performance loss using `mode='wrap'`.) - new_labels = reverse_indexer.take(labels, mode='wrap') - np.putmask(new_labels, mask, na_sentinel) - - return ordered, _ensure_platform_int(new_labels) - - def factorize(values, sort=False, order=None, na_sentinel=-1, size_hint=None): """ Encode input values as an enumerated type or categorical variable @@ -568,6 +469,7 @@ def factorize(values, sort=False, order=None, na_sentinel=-1, size_hint=None): uniques = uniques.to_array() if sort and len(uniques) > 0: + from pandas.core.sorting import safe_sort uniques, labels = safe_sort(uniques, labels, na_sentinel=na_sentinel, assume_unique=True) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 5d50f961927c7..c95a9598604ee 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -48,6 +48,7 @@ import pandas.core.dtypes.concat as _concat import pandas.core.missing as missing import pandas.core.algorithms as algos +import pandas.core.sorting as sorting from pandas.io.formats.printing import pprint_thing from pandas.core.ops import _comp_method_OBJECT_ARRAY from pandas.core.strings import StringAccessorMixin @@ -2306,7 +2307,7 @@ def difference(self, other): assume_unique=True) the_diff = this.values.take(label_diff) try: - the_diff = algos.safe_sort(the_diff) + the_diff = sorting.safe_sort(the_diff) except TypeError: pass @@ -2366,7 +2367,7 @@ def symmetric_difference(self, other, result_name=None): the_diff = _concat._concat_compat([left_diff, right_diff]) try: - the_diff = algos.safe_sort(the_diff) + the_diff = sorting.safe_sort(the_diff) except TypeError: pass diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index beebe06e7477e..8e4367a6784da 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -38,6 +38,7 @@ from pandas.core.sorting import is_int64_overflow_possible import pandas.core.algorithms as algos +import pandas.core.sorting as sorting import pandas.core.common as com from pandas._libs import hashtable as libhashtable, join as libjoin, lib from pandas.errors import MergeError @@ -1491,7 +1492,7 @@ def _sort_labels(uniques, left, right): l = len(left) labels = np.concatenate([left, right]) - _, new_labels = algos.safe_sort(uniques, labels, na_sentinel=-1) + _, new_labels = sorting.safe_sort(uniques, labels, na_sentinel=-1) new_labels = _ensure_int64(new_labels) new_left, new_right = new_labels[:l], new_labels[l:] diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py index 10b80cbc3483d..44a27bb5cbae1 100644 --- a/pandas/core/sorting.py +++ b/pandas/core/sorting.py @@ -1,12 +1,14 @@ """ miscellaneous sorting / groupby utilities """ import numpy as np -from pandas.compat import long +from pandas.compat import long, string_types, PY3 from pandas.core.categorical import Categorical from pandas.core.dtypes.common import ( _ensure_platform_int, _ensure_int64, + is_list_like, is_categorical_dtype) +from pandas.core.dtypes.cast import infer_dtype_from_array from pandas.core.dtypes.missing import isnull import pandas.core.algorithms as algorithms from pandas._libs import lib, algos, hashtable @@ -376,3 +378,107 @@ def _reorder_by_uniques(uniques, labels): uniques = algorithms.take_nd(uniques, sorter, allow_fill=False) return uniques, labels + + +def safe_sort(values, labels=None, na_sentinel=-1, assume_unique=False): + """ + Sort ``values`` and reorder corresponding ``labels``. + ``values`` should be unique if ``labels`` is not None. + Safe for use with mixed types (int, str), orders ints before strs. + + .. versionadded:: 0.19.0 + + Parameters + ---------- + values : list-like + Sequence; must be unique if ``labels`` is not None. + labels : list_like + Indices to ``values``. All out of bound indices are treated as + "not found" and will be masked with ``na_sentinel``. + na_sentinel : int, default -1 + Value in ``labels`` to mark "not found". + Ignored when ``labels`` is None. + assume_unique : bool, default False + When True, ``values`` are assumed to be unique, which can speed up + the calculation. Ignored when ``labels`` is None. + + Returns + ------- + ordered : ndarray + Sorted ``values`` + new_labels : ndarray + Reordered ``labels``; returned when ``labels`` is not None. + + Raises + ------ + TypeError + * If ``values`` is not list-like or if ``labels`` is neither None + nor list-like + * If ``values`` cannot be sorted + ValueError + * If ``labels`` is not None and ``values`` contain duplicates. + """ + if not is_list_like(values): + raise TypeError("Only list-like objects are allowed to be passed to" + "safe_sort as values") + + if not isinstance(values, np.ndarray): + + # don't convert to string types + dtype, _ = infer_dtype_from_array(values) + values = np.asarray(values, dtype=dtype) + + def sort_mixed(values): + # order ints before strings, safe in py3 + str_pos = np.array([isinstance(x, string_types) for x in values], + dtype=bool) + nums = np.sort(values[~str_pos]) + strs = np.sort(values[str_pos]) + return np.concatenate([nums, np.asarray(strs, dtype=object)]) + + sorter = None + if PY3 and lib.infer_dtype(values) == 'mixed-integer': + # unorderable in py3 if mixed str/int + ordered = sort_mixed(values) + else: + try: + sorter = values.argsort() + ordered = values.take(sorter) + except TypeError: + # try this anyway + ordered = sort_mixed(values) + + # labels: + + if labels is None: + return ordered + + if not is_list_like(labels): + raise TypeError("Only list-like objects or None are allowed to be" + "passed to safe_sort as labels") + labels = _ensure_platform_int(np.asarray(labels)) + + from pandas import Index + if not assume_unique and not Index(values).is_unique: + raise ValueError("values should be unique if labels is not None") + + if sorter is None: + # mixed types + (hash_klass, _), values = algorithms._get_data_algo( + values, algorithms._hashtables) + t = hash_klass(len(values)) + t.map_locations(values) + sorter = _ensure_platform_int(t.lookup(ordered)) + + reverse_indexer = np.empty(len(sorter), dtype=np.int_) + reverse_indexer.put(sorter, np.arange(len(sorter))) + + mask = (labels < -len(values)) | (labels >= len(values)) | \ + (labels == na_sentinel) + + # (Out of bound indices will be masked with `na_sentinel` next, so we may + # deal with them here without performance loss using `mode='wrap'`.) + new_labels = reverse_indexer.take(labels, mode='wrap') + np.putmask(new_labels, mask, na_sentinel) + + return ordered, _ensure_platform_int(new_labels) diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index 4588bf17fdbeb..9e7b97f19e0c3 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -2,7 +2,6 @@ import numpy as np import pytest -import warnings from numpy.random import RandomState from numpy import nan @@ -60,93 +59,6 @@ def test_strings(self): tm.assert_series_equal(result, expected) -class TestSafeSort(object): - - def test_basic_sort(self): - values = [3, 1, 2, 0, 4] - result = algos.safe_sort(values) - expected = np.array([0, 1, 2, 3, 4]) - tm.assert_numpy_array_equal(result, expected) - - values = list("baaacb") - result = algos.safe_sort(values) - expected = np.array(list("aaabbc")) - tm.assert_numpy_array_equal(result, expected) - - values = [] - result = algos.safe_sort(values) - expected = np.array([]) - tm.assert_numpy_array_equal(result, expected) - - def test_labels(self): - values = [3, 1, 2, 0, 4] - expected = np.array([0, 1, 2, 3, 4]) - - labels = [0, 1, 1, 2, 3, 0, -1, 4] - result, result_labels = algos.safe_sort(values, labels) - expected_labels = np.array([3, 1, 1, 2, 0, 3, -1, 4], dtype=np.intp) - tm.assert_numpy_array_equal(result, expected) - tm.assert_numpy_array_equal(result_labels, expected_labels) - - # na_sentinel - labels = [0, 1, 1, 2, 3, 0, 99, 4] - result, result_labels = algos.safe_sort(values, labels, - na_sentinel=99) - expected_labels = np.array([3, 1, 1, 2, 0, 3, 99, 4], dtype=np.intp) - tm.assert_numpy_array_equal(result, expected) - tm.assert_numpy_array_equal(result_labels, expected_labels) - - # out of bound indices - labels = [0, 101, 102, 2, 3, 0, 99, 4] - result, result_labels = algos.safe_sort(values, labels) - expected_labels = np.array([3, -1, -1, 2, 0, 3, -1, 4], dtype=np.intp) - tm.assert_numpy_array_equal(result, expected) - tm.assert_numpy_array_equal(result_labels, expected_labels) - - labels = [] - result, result_labels = algos.safe_sort(values, labels) - expected_labels = np.array([], dtype=np.intp) - tm.assert_numpy_array_equal(result, expected) - tm.assert_numpy_array_equal(result_labels, expected_labels) - - def test_mixed_integer(self): - values = np.array(['b', 1, 0, 'a', 0, 'b'], dtype=object) - result = algos.safe_sort(values) - expected = np.array([0, 0, 1, 'a', 'b', 'b'], dtype=object) - tm.assert_numpy_array_equal(result, expected) - - values = np.array(['b', 1, 0, 'a'], dtype=object) - labels = [0, 1, 2, 3, 0, -1, 1] - result, result_labels = algos.safe_sort(values, labels) - expected = np.array([0, 1, 'a', 'b'], dtype=object) - expected_labels = np.array([3, 1, 0, 2, 3, -1, 1], dtype=np.intp) - tm.assert_numpy_array_equal(result, expected) - tm.assert_numpy_array_equal(result_labels, expected_labels) - - def test_unsortable(self): - # GH 13714 - arr = np.array([1, 2, datetime.now(), 0, 3], dtype=object) - if compat.PY2 and not pd._np_version_under1p10: - # RuntimeWarning: tp_compare didn't return -1 or -2 for exception - with warnings.catch_warnings(): - pytest.raises(TypeError, algos.safe_sort, arr) - else: - pytest.raises(TypeError, algos.safe_sort, arr) - - def test_exceptions(self): - with tm.assert_raises_regex(TypeError, - "Only list-like objects are allowed"): - algos.safe_sort(values=1) - - with tm.assert_raises_regex(TypeError, - "Only list-like objects or None"): - algos.safe_sort(values=[0, 1, 2], labels=1) - - with tm.assert_raises_regex(ValueError, - "values should be unique"): - algos.safe_sort(values=[0, 1, 2, 1], labels=[0, 1]) - - class TestFactorize(object): def test_basic(self): diff --git a/pandas/tests/test_sorting.py b/pandas/tests/test_sorting.py index e09270bcadf27..f6973cccb82b0 100644 --- a/pandas/tests/test_sorting.py +++ b/pandas/tests/test_sorting.py @@ -1,6 +1,8 @@ import pytest from itertools import product from collections import defaultdict +import warnings +from datetime import datetime import numpy as np from numpy import nan @@ -13,7 +15,8 @@ decons_group_index, get_group_index, nargsort, - lexsort_indexer) + lexsort_indexer, + safe_sort) class TestSorting(object): @@ -340,3 +343,96 @@ def testit(label_list, shape): shape = (10000, 10000) label_list = [np.tile(np.arange(10000), 5), np.tile(np.arange(10000), 5)] testit(label_list, shape) + + +class TestSafeSort(object): + + def test_basic_sort(self): + values = [3, 1, 2, 0, 4] + result = safe_sort(values) + expected = np.array([0, 1, 2, 3, 4]) + tm.assert_numpy_array_equal(result, expected) + + values = list("baaacb") + result = safe_sort(values) + expected = np.array(list("aaabbc"), dtype='object') + tm.assert_numpy_array_equal(result, expected) + + values = [] + result = safe_sort(values) + expected = np.array([]) + tm.assert_numpy_array_equal(result, expected) + + def test_labels(self): + values = [3, 1, 2, 0, 4] + expected = np.array([0, 1, 2, 3, 4]) + + labels = [0, 1, 1, 2, 3, 0, -1, 4] + result, result_labels = safe_sort(values, labels) + expected_labels = np.array([3, 1, 1, 2, 0, 3, -1, 4], dtype=np.intp) + tm.assert_numpy_array_equal(result, expected) + tm.assert_numpy_array_equal(result_labels, expected_labels) + + # na_sentinel + labels = [0, 1, 1, 2, 3, 0, 99, 4] + result, result_labels = safe_sort(values, labels, + na_sentinel=99) + expected_labels = np.array([3, 1, 1, 2, 0, 3, 99, 4], dtype=np.intp) + tm.assert_numpy_array_equal(result, expected) + tm.assert_numpy_array_equal(result_labels, expected_labels) + + # out of bound indices + labels = [0, 101, 102, 2, 3, 0, 99, 4] + result, result_labels = safe_sort(values, labels) + expected_labels = np.array([3, -1, -1, 2, 0, 3, -1, 4], dtype=np.intp) + tm.assert_numpy_array_equal(result, expected) + tm.assert_numpy_array_equal(result_labels, expected_labels) + + labels = [] + result, result_labels = safe_sort(values, labels) + expected_labels = np.array([], dtype=np.intp) + tm.assert_numpy_array_equal(result, expected) + tm.assert_numpy_array_equal(result_labels, expected_labels) + + def test_mixed_integer(self): + values = np.array(['b', 1, 0, 'a', 0, 'b'], dtype=object) + result = safe_sort(values) + expected = np.array([0, 0, 1, 'a', 'b', 'b'], dtype=object) + tm.assert_numpy_array_equal(result, expected) + + values = np.array(['b', 1, 0, 'a'], dtype=object) + labels = [0, 1, 2, 3, 0, -1, 1] + result, result_labels = safe_sort(values, labels) + expected = np.array([0, 1, 'a', 'b'], dtype=object) + expected_labels = np.array([3, 1, 0, 2, 3, -1, 1], dtype=np.intp) + tm.assert_numpy_array_equal(result, expected) + tm.assert_numpy_array_equal(result_labels, expected_labels) + + def test_mixed_interger_from_list(self): + values = ['b', 1, 0, 'a', 0, 'b'] + result = safe_sort(values) + expected = np.array([0, 0, 1, 'a', 'b', 'b'], dtype=object) + tm.assert_numpy_array_equal(result, expected) + + def test_unsortable(self): + # GH 13714 + arr = np.array([1, 2, datetime.now(), 0, 3], dtype=object) + if compat.PY2 and not pd._np_version_under1p10: + # RuntimeWarning: tp_compare didn't return -1 or -2 for exception + with warnings.catch_warnings(): + pytest.raises(TypeError, safe_sort, arr) + else: + pytest.raises(TypeError, safe_sort, arr) + + def test_exceptions(self): + with tm.assert_raises_regex(TypeError, + "Only list-like objects are allowed"): + safe_sort(values=1) + + with tm.assert_raises_regex(TypeError, + "Only list-like objects or None"): + safe_sort(values=[0, 1, 2], labels=1) + + with tm.assert_raises_regex(ValueError, + "values should be unique"): + safe_sort(values=[0, 1, 2, 1], labels=[0, 1]) From 7d9d6d3465d1c102c69d799161ebd9e28540acba Mon Sep 17 00:00:00 2001 From: Vivek <3vivekb@gmail.com> Date: Thu, 20 Jul 2017 15:04:06 -0700 Subject: [PATCH 805/933] DOC: Fixed Minor Typo (#17043) Cocumentation to Documentation --- doc/source/io.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/io.rst b/doc/source/io.rst index 495d4e9c3a5a3..149c86aead135 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -4892,7 +4892,7 @@ pandas integrates with this external package. if ``pandas-gbq`` is installed, yo use the pandas methods ``pd.read_gbq`` and ``DataFrame.to_gbq``, which will call the respective functions from ``pandas-gbq``. -Full cocumentation can be found `here `__ +Full documentation can be found `here `__ .. _io.stata: From a2d03d4a63147e2f56615852814de4d2f77c373c Mon Sep 17 00:00:00 2001 From: Pietro Battiston Date: Fri, 21 Jul 2017 12:38:11 +0200 Subject: [PATCH 806/933] BUG: do not cast ints to floats if inputs o crosstab are not aligned (#17011) closes #17005 --- doc/source/whatsnew/v0.21.0.txt | 2 +- pandas/core/reshape/pivot.py | 31 +++++++++++++++++++----------- pandas/tests/reshape/test_pivot.py | 16 +++++++++++++++ 3 files changed, 37 insertions(+), 12 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 5146bd35dff30..9aead6ab2386c 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -225,10 +225,10 @@ Sparse Reshaping ^^^^^^^^^ - Joining/Merging with a non unique ``PeriodIndex`` raised a TypeError (:issue:`16871`) +- Bug in :func:`crosstab` where non-aligned series of integers were casted to float (:issue:`17005`) - Bug when using :func:`isin` on a large object series and large comparison array (:issue:`16012`) - Fixes regression from 0.20, :func:`Series.aggregate` and :func:`DataFrame.aggregate` allow dictionaries as return values again (:issue:`16741`) - Numeric ^^^^^^^ - Bug in ``.clip()`` with ``axis=1`` and a list-like for ``threshold`` is passed; previously this raised ``ValueError`` (:issue:`15390`) diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index 0581ec7484c49..fbb7e6f970309 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -6,6 +6,7 @@ from pandas import Series, DataFrame, MultiIndex, Index from pandas.core.groupby import Grouper from pandas.core.reshape.util import cartesian_product +from pandas.core.index import _get_combined_index from pandas.compat import range, lrange, zip from pandas import compat import pandas.core.common as com @@ -493,6 +494,13 @@ def crosstab(index, columns, values=None, rownames=None, colnames=None, rownames = _get_names(index, rownames, prefix='row') colnames = _get_names(columns, colnames, prefix='col') + obs_idxes = [obj.index for objs in (index, columns) for obj in objs + if hasattr(obj, 'index')] + if obs_idxes: + common_idx = _get_combined_index(obs_idxes, intersect=True) + else: + common_idx = None + data = {} data.update(zip(rownames, index)) data.update(zip(colnames, columns)) @@ -503,20 +511,21 @@ def crosstab(index, columns, values=None, rownames=None, colnames=None, if values is not None and aggfunc is None: raise ValueError("values cannot be used without an aggfunc.") + df = DataFrame(data, index=common_idx) if values is None: - df = DataFrame(data) df['__dummy__'] = 0 - table = df.pivot_table('__dummy__', index=rownames, columns=colnames, - aggfunc=len, margins=margins, - margins_name=margins_name, dropna=dropna) - table = table.fillna(0).astype(np.int64) - + kwargs = {'aggfunc': len, 'fill_value': 0} else: - data['__dummy__'] = values - df = DataFrame(data) - table = df.pivot_table('__dummy__', index=rownames, columns=colnames, - aggfunc=aggfunc, margins=margins, - margins_name=margins_name, dropna=dropna) + df['__dummy__'] = values + kwargs = {'aggfunc': aggfunc} + + table = df.pivot_table('__dummy__', index=rownames, columns=colnames, + margins=margins, margins_name=margins_name, + dropna=dropna, **kwargs) + + # GH 17013: + if values is None and margins: + table = table.fillna(0).astype(np.int64) # Post-process if normalize is not False: diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 9881ab72f3ef5..ff9f35b0253b0 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -1058,6 +1058,22 @@ def test_crosstab_ndarray(self): assert result.index.name == 'row_0' assert result.columns.name == 'col_0' + def test_crosstab_non_aligned(self): + # GH 17005 + a = pd.Series([0, 1, 1], index=['a', 'b', 'c']) + b = pd.Series([3, 4, 3, 4, 3], index=['a', 'b', 'c', 'd', 'f']) + c = np.array([3, 4, 3]) + + expected = pd.DataFrame([[1, 0], [1, 1]], + index=Index([0, 1], name='row_0'), + columns=Index([3, 4], name='col_0')) + + result = crosstab(a, b) + tm.assert_frame_equal(result, expected) + + result = crosstab(a, c) + tm.assert_frame_equal(result, expected) + def test_crosstab_margins(self): a = np.random.randint(0, 7, size=100) b = np.random.randint(0, 3, size=100) From 8f309db542c893b46e7cc6cff72638f68f5a855b Mon Sep 17 00:00:00 2001 From: Dave Willmer Date: Fri, 21 Jul 2017 06:50:29 -0400 Subject: [PATCH 807/933] BUG in merging categorical dates closes #16900 Author: Dave Willmer This patch had conflicts when merged, resolved by Committer: Jeff Reback Closes #16986 from dwillmer/cat_fix and squashes the following commits: 1ea1977 [Dave Willmer] Minor tweaks + comment 21a35a0 [Dave Willmer] Merge branch 'cat_fix' of https://github.com/dwillmer/pandas into cat_fix 04d5404 [Dave Willmer] Update tests 3cc5c24 [Dave Willmer] Merge branch 'master' into cat_fix 5e8e23b [Dave Willmer] Add whatsnew item b82d117 [Dave Willmer] Lint fixes a81933d [Dave Willmer] Remove unused import 218da66 [Dave Willmer] Generic solution to categorical problem 48e7163 [Dave Willmer] Test inner join 8843c10 [Dave Willmer] Fix TypeError when merging categorical dates --- doc/source/whatsnew/v0.21.0.txt | 3 ++- pandas/core/reshape/merge.py | 25 +++++++++++++++------ pandas/tests/reshape/test_merge.py | 36 +++++++++++++++++++++++++++++- 3 files changed, 55 insertions(+), 9 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 9aead6ab2386c..d544d813a7a7b 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -224,8 +224,9 @@ Sparse Reshaping ^^^^^^^^^ -- Joining/Merging with a non unique ``PeriodIndex`` raised a TypeError (:issue:`16871`) +- Joining/Merging with a non unique ``PeriodIndex`` raised a ``TypeError`` (:issue:`16871`) - Bug in :func:`crosstab` where non-aligned series of integers were casted to float (:issue:`17005`) +- Bug in merging with categorical dtypes with datetimelikes incorrectly raised a ``TypeError`` (:issue:`16900`) - Bug when using :func:`isin` on a large object series and large comparison array (:issue:`16012`) - Fixes regression from 0.20, :func:`Series.aggregate` and :func:`DataFrame.aggregate` allow dictionaries as return values again (:issue:`16741`) diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 8e4367a6784da..00828b8241f4c 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -878,7 +878,7 @@ def _get_merge_keys(self): return left_keys, right_keys, join_names def _maybe_coerce_merge_keys(self): - # we have valid mergee's but we may have to further + # we have valid mergees but we may have to further # coerce these if they are originally incompatible types # # for example if these are categorical, but are not dtype_equal @@ -890,12 +890,16 @@ def _maybe_coerce_merge_keys(self): if (len(lk) and not len(rk)) or (not len(lk) and len(rk)): continue + lk_is_cat = is_categorical_dtype(lk) + rk_is_cat = is_categorical_dtype(rk) + # if either left or right is a categorical # then the must match exactly in categories & ordered - if is_categorical_dtype(lk) and is_categorical_dtype(rk): + if lk_is_cat and rk_is_cat: if lk.is_dtype_equal(rk): continue - elif is_categorical_dtype(lk) or is_categorical_dtype(rk): + + elif lk_is_cat or rk_is_cat: pass elif is_dtype_equal(lk.dtype, rk.dtype): @@ -905,7 +909,7 @@ def _maybe_coerce_merge_keys(self): # kinds to proceed, eg. int64 and int8 # further if we are object, but we infer to # the same, then proceed - if (is_numeric_dtype(lk) and is_numeric_dtype(rk)): + if is_numeric_dtype(lk) and is_numeric_dtype(rk): if lk.dtype.kind == rk.dtype.kind: continue @@ -914,13 +918,20 @@ def _maybe_coerce_merge_keys(self): continue # Houston, we have a problem! - # let's coerce to object + # let's coerce to object if the dtypes aren't + # categorical, otherwise coerce to the category + # dtype. If we coerced categories to object, + # then we would lose type information on some + # columns, and end up trying to merge + # incompatible dtypes. See GH 16900. if name in self.left.columns: + typ = lk.categories.dtype if lk_is_cat else object self.left = self.left.assign( - **{name: self.left[name].astype(object)}) + **{name: self.left[name].astype(typ)}) if name in self.right.columns: + typ = rk.categories.dtype if rk_is_cat else object self.right = self.right.assign( - **{name: self.right[name].astype(object)}) + **{name: self.right[name].astype(typ)}) def _validate_specification(self): # Hm, any way to make this logic less complicated?? diff --git a/pandas/tests/reshape/test_merge.py b/pandas/tests/reshape/test_merge.py index 919675188576e..765e8e28b43fd 100644 --- a/pandas/tests/reshape/test_merge.py +++ b/pandas/tests/reshape/test_merge.py @@ -1,7 +1,7 @@ # pylint: disable=E1103 import pytest -from datetime import datetime +from datetime import datetime, date from numpy.random import randn from numpy import nan import numpy as np @@ -1515,6 +1515,40 @@ def test_self_join_multiple_categories(self): assert_frame_equal(result, df) + def test_dtype_on_categorical_dates(self): + # GH 16900 + # dates should not be coerced to ints + + df = pd.DataFrame( + [[date(2001, 1, 1), 1.1], + [date(2001, 1, 2), 1.3]], + columns=['date', 'num2'] + ) + df['date'] = df['date'].astype('category') + + df2 = pd.DataFrame( + [[date(2001, 1, 1), 1.3], + [date(2001, 1, 3), 1.4]], + columns=['date', 'num4'] + ) + df2['date'] = df2['date'].astype('category') + + expected_outer = pd.DataFrame([ + [pd.Timestamp('2001-01-01'), 1.1, 1.3], + [pd.Timestamp('2001-01-02'), 1.3, np.nan], + [pd.Timestamp('2001-01-03'), np.nan, 1.4]], + columns=['date', 'num2', 'num4'] + ) + result_outer = pd.merge(df, df2, how='outer', on=['date']) + assert_frame_equal(result_outer, expected_outer) + + expected_inner = pd.DataFrame( + [[pd.Timestamp('2001-01-01'), 1.1, 1.3]], + columns=['date', 'num2', 'num4'] + ) + result_inner = pd.merge(df, df2, how='inner', on=['date']) + assert_frame_equal(result_inner, expected_inner) + @pytest.fixture def left_df(): From 142b5b61a0433c256511649a993b0fe1b6f64524 Mon Sep 17 00:00:00 2001 From: hhuuggoo Date: Fri, 21 Jul 2017 07:00:59 -0400 Subject: [PATCH 808/933] BUG: __setitem__ with a tuple induces NaN with a tz-aware DatetimeIndex (#16889) (#16897) --- doc/source/whatsnew/v0.21.0.txt | 3 ++- pandas/core/indexing.py | 6 ++++-- pandas/tests/indexing/test_datetime.py | 27 ++++++++++++++++++++++++++ 3 files changed, 33 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index d544d813a7a7b..e9b00d34236e7 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -196,7 +196,8 @@ Indexing - Fix :func:`MultiIndex.sort_index` ordering when ``ascending`` argument is a list, but not all levels are specified, or are in a different order (:issue:`16934`). - Fixes bug where indexing with ``np.inf`` caused an ``OverflowError`` to be raised (:issue:`16957`) - Bug in reindexing on an empty ``CategoricalIndex`` (:issue:`16770`) - +- Fixes ``DataFrame.loc`` for setting with alignment and tz-aware ``DatetimeIndex`` (:issue:`16889`) + I/O ^^^ diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index ae0aaf98fdf02..38cc5431a004f 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -760,10 +760,12 @@ def _align_frame(self, indexer, df): for i, ix in enumerate(indexer): ax = self.obj.axes[i] if is_sequence(ix) or isinstance(ix, slice): + if isinstance(ix, np.ndarray): + ix = ix.ravel() if idx is None: - idx = ax[ix].ravel() + idx = ax[ix] elif cols is None: - cols = ax[ix].ravel() + cols = ax[ix] else: break else: diff --git a/pandas/tests/indexing/test_datetime.py b/pandas/tests/indexing/test_datetime.py index da8a896cb6f4a..8e8fc835b11f7 100644 --- a/pandas/tests/indexing/test_datetime.py +++ b/pandas/tests/indexing/test_datetime.py @@ -8,6 +8,33 @@ class TestDatetimeIndex(object): + def test_setitem_with_datetime_tz(self): + # 16889 + # support .loc with alignment and tz-aware DatetimeIndex + mask = np.array([True, False, True, False]) + + idx = pd.date_range('20010101', periods=4, tz='UTC') + df = pd.DataFrame({'a': np.arange(4)}, index=idx).astype('float64') + + result = df.copy() + result.loc[mask, :] = df.loc[mask, :] + tm.assert_frame_equal(result, df) + + result = df.copy() + result.loc[mask] = df.loc[mask] + tm.assert_frame_equal(result, df) + + idx = pd.date_range('20010101', periods=4) + df = pd.DataFrame({'a': np.arange(4)}, index=idx).astype('float64') + + result = df.copy() + result.loc[mask, :] = df.loc[mask, :] + tm.assert_frame_equal(result, df) + + result = df.copy() + result.loc[mask] = df.loc[mask] + tm.assert_frame_equal(result, df) + def test_indexing_with_datetime_tz(self): # 8260 From 869be8d6981d364d01e4000845583f1035104f2f Mon Sep 17 00:00:00 2001 From: topper-123 Date: Fri, 21 Jul 2017 12:01:38 +0100 Subject: [PATCH 809/933] Added test for _get_dtype_type. (#16899) --- pandas/tests/dtypes/test_common.py | 37 ++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/pandas/tests/dtypes/test_common.py b/pandas/tests/dtypes/test_common.py index 290cdd732b6d6..b02691e957366 100644 --- a/pandas/tests/dtypes/test_common.py +++ b/pandas/tests/dtypes/test_common.py @@ -568,3 +568,40 @@ def test__get_dtype(input_param, result): def test__get_dtype_fails(input_param): # python objects pytest.raises(TypeError, com._get_dtype, input_param) + + +@pytest.mark.parametrize('input_param,result', [ + (int, np.dtype(int).type), + ('int32', np.int32), + (float, np.dtype(float).type), + ('float64', np.float64), + (np.dtype('float64'), np.float64), + (str, np.dtype(str).type), + (pd.Series([1, 2], dtype=np.dtype('int16')), np.int16), + (pd.Series(['a', 'b']), np.object_), + (pd.Index([1, 2], dtype='int64'), np.int64), + (pd.Index(['a', 'b']), np.object_), + ('category', com.CategoricalDtypeType), + (pd.Categorical(['a', 'b']).dtype, com.CategoricalDtypeType), + (pd.Categorical(['a', 'b']), com.CategoricalDtypeType), + (pd.CategoricalIndex(['a', 'b']).dtype, com.CategoricalDtypeType), + (pd.CategoricalIndex(['a', 'b']), com.CategoricalDtypeType), + (pd.DatetimeIndex([1, 2]), np.datetime64), + (pd.DatetimeIndex([1, 2]).dtype, np.datetime64), + (' Date: Fri, 21 Jul 2017 07:05:19 -0400 Subject: [PATCH 810/933] BUG/API: dtype inconsistencies in .where / .setitem / .putmask / .fillna (#16821) * CLN/BUG: fix ndarray assignment may cause unexpected cast supersedes #14145 closes #14001 * API: This fixes a number of inconsistencies and API issues w.r.t. dtype conversions. This is a reprise of #14145 & #16408. This removes some code from the core structures & pushes it to internals, where the primitives are made more consistent. This should all us to be a bit more consistent for pandas2 type things. closes #16402 supersedes #14145 closes #14001 CLN: remove uneeded code in internals; use split_and_operate when possible --- doc/source/whatsnew/v0.21.0.txt | 62 +++ pandas/_libs/index.pyx | 26 +- pandas/_libs/tslib.pyx | 3 +- pandas/core/algorithms.py | 6 + pandas/core/dtypes/cast.py | 74 ++- pandas/core/dtypes/common.py | 13 +- pandas/core/frame.py | 70 +-- pandas/core/generic.py | 46 +- pandas/core/indexes/base.py | 12 +- pandas/core/indexes/numeric.py | 21 +- pandas/core/internals.py | 698 +++++++++++++++---------- pandas/core/ops.py | 18 +- pandas/core/panel.py | 21 +- pandas/core/sparse/frame.py | 11 +- pandas/tests/dtypes/test_cast.py | 109 +++- pandas/tests/dtypes/test_common.py | 2 + pandas/tests/dtypes/test_convert.py | 0 pandas/tests/dtypes/test_missing.py | 19 + pandas/tests/frame/test_indexing.py | 30 +- pandas/tests/frame/test_operators.py | 21 +- pandas/tests/indexing/test_coercion.py | 221 +++++--- pandas/tests/indexing/test_datetime.py | 10 +- pandas/tests/indexing/test_indexing.py | 6 + pandas/tests/series/test_analytics.py | 8 +- pandas/tests/series/test_indexing.py | 19 +- pandas/tests/series/test_missing.py | 48 +- 26 files changed, 1022 insertions(+), 552 deletions(-) delete mode 100644 pandas/tests/dtypes/test_convert.py diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index e9b00d34236e7..91d3e9e7b935b 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -127,6 +127,65 @@ the target. Now, a ``ValueError`` will be raised when such an input is passed in ... ValueError: Cannot operate inplace if there is no assignment +.. _whatsnew_0210.dtype_conversions: + +Dtype Conversions +^^^^^^^^^^^^^^^^^ + +- Previously assignments, ``.where()`` and ``.fillna()`` with a ``bool`` assignment, would coerce to + same type (e.g. int / float), or raise for datetimelikes. These will now preseve the bools with ``object`` dtypes. (:issue:`16821`). + + .. ipython:: python + + s = Series([1, 2, 3]) + + .. code-block:: python + + In [5]: s[1] = True + + In [6]: s + Out[6]: + 0 1 + 1 1 + 2 3 + dtype: int64 + + New Behavior + + .. ipython:: python + + s[1] = True + s + +- Previously as assignment to a datetimelike with a non-datetimelike would coerce the + non-datetime-like item being assigned (:issue:`14145`). + + .. ipython:: python + + s = pd.Series([pd.Timestamp('2011-01-01'), pd.Timestamp('2012-01-01')]) + + .. code-block:: python + + In [1]: s[1] = 1 + + In [2]: s + Out[2]: + 0 2011-01-01 00:00:00.000000000 + 1 1970-01-01 00:00:00.000000001 + dtype: datetime64[ns] + + These now coerce to ``object`` dtype. + + .. ipython:: python + + s[1] = 1 + s + +- Additional bug fixes w.r.t. dtype conversions. + + - Inconsistent behavior in ``.where()`` with datetimelikes which would raise rather than coerce to ``object`` (:issue:`16402`) + - Bug in assignment against ``int64`` data with ``np.ndarray`` with ``float64`` dtype may keep ``int64`` dtype (:issue:`14001`) + .. _whatsnew_0210.api: Other API Changes @@ -185,6 +244,9 @@ Bug Fixes Conversion ^^^^^^^^^^ +- Bug in assignment against datetime-like data with ``int`` may incorrectly converte to datetime-like (:issue:`14145`) +- Bug in assignment against ``int64`` data with ``np.ndarray`` with ``float64`` dtype may keep ``int64`` dtype (:issue:`14001`) + Indexing ^^^^^^^^ diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx index 5e92c506b5d0c..273dc06886088 100644 --- a/pandas/_libs/index.pyx +++ b/pandas/_libs/index.pyx @@ -19,6 +19,7 @@ cimport tslib from hashtable cimport * from pandas._libs import tslib, algos, hashtable as _hash from pandas._libs.tslib import Timestamp, Timedelta +from datetime import datetime, timedelta from datetime cimport (get_datetime64_value, _pydatetime_to_dts, pandas_datetimestruct) @@ -507,24 +508,37 @@ cdef class TimedeltaEngine(DatetimeEngine): return 'm8[ns]' cpdef convert_scalar(ndarray arr, object value): + # we don't turn integers + # into datetimes/timedeltas + + # we don't turn bools into int/float/complex + if arr.descr.type_num == NPY_DATETIME: if isinstance(value, np.ndarray): pass - elif isinstance(value, Timestamp): - return value.value + elif isinstance(value, datetime): + return Timestamp(value).value elif value is None or value != value: return iNaT - else: + elif util.is_string_object(value): return Timestamp(value).value + raise ValueError("cannot set a Timestamp with a non-timestamp") + elif arr.descr.type_num == NPY_TIMEDELTA: if isinstance(value, np.ndarray): pass - elif isinstance(value, Timedelta): - return value.value + elif isinstance(value, timedelta): + return Timedelta(value).value elif value is None or value != value: return iNaT - else: + elif util.is_string_object(value): return Timedelta(value).value + raise ValueError("cannot set a Timedelta with a non-timedelta") + + if (issubclass(arr.dtype.type, (np.integer, np.floating, np.complex)) and + not issubclass(arr.dtype.type, np.bool_)): + if util.is_bool_object(value): + raise ValueError('Cannot assign bool to float/integer series') if issubclass(arr.dtype.type, (np.integer, np.bool_)): if util.is_float_object(value) and value != value: diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index c471d46262484..44be9ba56b84a 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -14,6 +14,7 @@ cdef bint PY3 = (sys.version_info[0] >= 3) from cpython cimport ( PyTypeObject, PyFloat_Check, + PyComplex_Check, PyLong_Check, PyObject_RichCompareBool, PyObject_RichCompare, @@ -902,7 +903,7 @@ cdef inline bint _checknull_with_nat(object val): cdef inline bint _check_all_nulls(object val): """ utility to check if a value is any type of null """ cdef bint res - if PyFloat_Check(val): + if PyFloat_Check(val) or PyComplex_Check(val): res = val != val elif val is NaT: res = 1 diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 3ccd7216fa81a..133e9d7dca18f 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -150,6 +150,12 @@ def _reconstruct_data(values, dtype, original): pass elif is_datetime64tz_dtype(dtype) or is_period_dtype(dtype): values = Index(original)._shallow_copy(values, name=None) + elif is_bool_dtype(dtype): + values = values.astype(dtype) + + # we only support object dtypes bool Index + if isinstance(original, Index): + values = values.astype(object) elif dtype is not None: values = values.astype(dtype) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 6532e17695c86..22d98a89d68d6 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -273,7 +273,7 @@ def maybe_promote(dtype, fill_value=np.nan): else: if issubclass(dtype.type, np.datetime64): try: - fill_value = lib.Timestamp(fill_value).value + fill_value = tslib.Timestamp(fill_value).value except: # the proper thing to do here would probably be to upcast # to object (but numpy 1.6.1 doesn't do this properly) @@ -334,6 +334,23 @@ def maybe_promote(dtype, fill_value=np.nan): return dtype, fill_value +def infer_dtype_from(val, pandas_dtype=False): + """ + interpret the dtype from a scalar or array. This is a convenience + routines to infer dtype from a scalar or an array + + Parameters + ---------- + pandas_dtype : bool, default False + whether to infer dtype including pandas extension types. + If False, scalar/array belongs to pandas extension types is inferred as + object + """ + if is_scalar(val): + return infer_dtype_from_scalar(val, pandas_dtype=pandas_dtype) + return infer_dtype_from_array(val, pandas_dtype=pandas_dtype) + + def infer_dtype_from_scalar(val, pandas_dtype=False): """ interpret the dtype from a scalar @@ -350,9 +367,9 @@ def infer_dtype_from_scalar(val, pandas_dtype=False): # a 1-element ndarray if isinstance(val, np.ndarray): + msg = "invalid ndarray passed to _infer_dtype_from_scalar" if val.ndim != 0: - raise ValueError( - "invalid ndarray passed to _infer_dtype_from_scalar") + raise ValueError(msg) dtype = val.dtype val = val.item() @@ -409,24 +426,31 @@ def infer_dtype_from_scalar(val, pandas_dtype=False): return dtype, val -def infer_dtype_from_array(arr): +def infer_dtype_from_array(arr, pandas_dtype=False): """ infer the dtype from a scalar or array Parameters ---------- arr : scalar or array + pandas_dtype : bool, default False + whether to infer dtype including pandas extension types. + If False, array belongs to pandas extension types + is inferred as object Returns ------- - tuple (numpy-compat dtype, array) + tuple (numpy-compat/pandas-compat dtype, array) Notes ----- - These infer to numpy dtypes exactly - with the exception that mixed / object dtypes + if pandas_dtype=False. these infer to numpy dtypes + exactly with the exception that mixed / object dtypes are not coerced by stringifying or conversion + if pandas_dtype=True. datetime64tz-aware/categorical + types will retain there character. + Examples -------- >>> np.asarray([1, '1']) @@ -443,6 +467,12 @@ def infer_dtype_from_array(arr): if not is_list_like(arr): arr = [arr] + if pandas_dtype and is_extension_type(arr): + return arr.dtype, arr + + elif isinstance(arr, ABCSeries): + return arr.dtype, np.asarray(arr) + # don't force numpy coerce with nan's inferred = lib.infer_dtype(arr) if inferred in ['string', 'bytes', 'unicode', @@ -553,7 +583,7 @@ def conv(r, dtype): if isnull(r): pass elif dtype == _NS_DTYPE: - r = lib.Timestamp(r) + r = tslib.Timestamp(r) elif dtype == _TD_DTYPE: r = _coerce_scalar_to_timedelta_type(r) elif dtype == np.bool_: @@ -1027,3 +1057,31 @@ def find_common_type(types): return np.object return np.find_common_type(types, []) + + +def cast_scalar_to_array(shape, value, dtype=None): + """ + create np.ndarray of specified shape and dtype, filled with values + + Parameters + ---------- + shape : tuple + value : scalar value + dtype : np.dtype, optional + dtype to coerce + + Returns + ------- + ndarray of shape, filled with value, of specified / inferred dtype + + """ + + if dtype is None: + dtype, fill_value = infer_dtype_from_scalar(value) + else: + fill_value = value + + values = np.empty(shape, dtype=dtype) + values.fill(fill_value) + + return values diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 114900ce802be..37f99bd344e6c 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -11,7 +11,8 @@ ExtensionDtype) from .generic import (ABCCategorical, ABCPeriodIndex, ABCDatetimeIndex, ABCSeries, - ABCSparseArray, ABCSparseSeries, ABCCategoricalIndex) + ABCSparseArray, ABCSparseSeries, ABCCategoricalIndex, + ABCIndexClass) from .inference import is_string_like from .inference import * # noqa @@ -1545,6 +1546,16 @@ def is_bool_dtype(arr_or_dtype): except ValueError: # this isn't even a dtype return False + + if isinstance(arr_or_dtype, ABCIndexClass): + + # TODO(jreback) + # we don't have a boolean Index class + # so its object, we need to infer to + # guess this + return (arr_or_dtype.is_object and + arr_or_dtype.inferred_type == 'boolean') + return issubclass(tipo, np.bool_) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 4843f3389bf75..9514ab8f3b27f 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -25,7 +25,8 @@ import numpy.ma as ma from pandas.core.dtypes.cast import ( - maybe_upcast, infer_dtype_from_scalar, + maybe_upcast, + cast_scalar_to_array, maybe_cast_to_datetime, maybe_infer_to_datetimelike, maybe_convert_platform, @@ -59,6 +60,7 @@ is_named_tuple) from pandas.core.dtypes.missing import isnull, notnull + from pandas.core.common import (_try_sort, _default_index, _values_from_object, @@ -385,15 +387,10 @@ def __init__(self, data=None, index=None, columns=None, dtype=None, raise_with_traceback(exc) if arr.ndim == 0 and index is not None and columns is not None: - if isinstance(data, compat.string_types) and dtype is None: - dtype = np.object_ - if dtype is None: - dtype, data = infer_dtype_from_scalar(data) - - values = np.empty((len(index), len(columns)), dtype=dtype) - values.fill(data) - mgr = self._init_ndarray(values, index, columns, dtype=dtype, - copy=False) + values = cast_scalar_to_array((len(index), len(columns)), + data, dtype=dtype) + mgr = self._init_ndarray(values, index, columns, + dtype=values.dtype, copy=False) else: raise ValueError('DataFrame constructor not properly called!') @@ -507,7 +504,7 @@ def _get_axes(N, K, index=index, columns=columns): values = _prep_ndarray(values, copy=copy) if dtype is not None: - if values.dtype != dtype: + if not is_dtype_equal(values.dtype, dtype): try: values = values.astype(dtype) except Exception as orig: @@ -2689,9 +2686,8 @@ def reindexer(value): else: # upcast the scalar - dtype, value = infer_dtype_from_scalar(value) - value = np.repeat(value, len(self.index)).astype(dtype) - value = maybe_cast_to_datetime(value, dtype) + value = cast_scalar_to_array(len(self.index), value) + value = maybe_cast_to_datetime(value, value.dtype) # return internal types directly if is_extension_type(value): @@ -3676,7 +3672,8 @@ def reorder_levels(self, order, axis=0): # ---------------------------------------------------------------------- # Arithmetic / combination related - def _combine_frame(self, other, func, fill_value=None, level=None): + def _combine_frame(self, other, func, fill_value=None, level=None, + try_cast=True): this, other = self.align(other, join='outer', level=level, copy=False) new_index, new_columns = this.index, this.columns @@ -3729,19 +3726,23 @@ def f(i): copy=False) def _combine_series(self, other, func, fill_value=None, axis=None, - level=None): + level=None, try_cast=True): if axis is not None: axis = self._get_axis_name(axis) if axis == 'index': return self._combine_match_index(other, func, level=level, - fill_value=fill_value) + fill_value=fill_value, + try_cast=try_cast) else: return self._combine_match_columns(other, func, level=level, - fill_value=fill_value) + fill_value=fill_value, + try_cast=try_cast) return self._combine_series_infer(other, func, level=level, - fill_value=fill_value) + fill_value=fill_value, + try_cast=try_cast) - def _combine_series_infer(self, other, func, level=None, fill_value=None): + def _combine_series_infer(self, other, func, level=None, + fill_value=None, try_cast=True): if len(other) == 0: return self * NA @@ -3751,9 +3752,11 @@ def _combine_series_infer(self, other, func, level=None, fill_value=None): columns=self.columns) return self._combine_match_columns(other, func, level=level, - fill_value=fill_value) + fill_value=fill_value, + try_cast=try_cast) - def _combine_match_index(self, other, func, level=None, fill_value=None): + def _combine_match_index(self, other, func, level=None, + fill_value=None, try_cast=True): left, right = self.align(other, join='outer', axis=0, level=level, copy=False) if fill_value is not None: @@ -3763,7 +3766,8 @@ def _combine_match_index(self, other, func, level=None, fill_value=None): index=left.index, columns=self.columns, copy=False) - def _combine_match_columns(self, other, func, level=None, fill_value=None): + def _combine_match_columns(self, other, func, level=None, + fill_value=None, try_cast=True): left, right = self.align(other, join='outer', axis=1, level=level, copy=False) if fill_value is not None: @@ -3771,15 +3775,17 @@ def _combine_match_columns(self, other, func, level=None, fill_value=None): fill_value) new_data = left._data.eval(func=func, other=right, - axes=[left.columns, self.index]) + axes=[left.columns, self.index], + try_cast=try_cast) return self._constructor(new_data) - def _combine_const(self, other, func, raise_on_error=True): + def _combine_const(self, other, func, raise_on_error=True, try_cast=True): new_data = self._data.eval(func=func, other=other, - raise_on_error=raise_on_error) + raise_on_error=raise_on_error, + try_cast=try_cast) return self._constructor(new_data) - def _compare_frame_evaluate(self, other, func, str_rep): + def _compare_frame_evaluate(self, other, func, str_rep, try_cast=True): # unique if self.columns.is_unique: @@ -3803,16 +3809,18 @@ def _compare(a, b): result.columns = self.columns return result - def _compare_frame(self, other, func, str_rep): + def _compare_frame(self, other, func, str_rep, try_cast=True): if not self._indexed_same(other): raise ValueError('Can only compare identically-labeled ' 'DataFrame objects') - return self._compare_frame_evaluate(other, func, str_rep) + return self._compare_frame_evaluate(other, func, str_rep, + try_cast=try_cast) - def _flex_compare_frame(self, other, func, str_rep, level): + def _flex_compare_frame(self, other, func, str_rep, level, try_cast=True): if not self._indexed_same(other): self, other = self.align(other, 'outer', level=level, copy=False) - return self._compare_frame_evaluate(other, func, str_rep) + return self._compare_frame_evaluate(other, func, str_rep, + try_cast=try_cast) def combine(self, other, func, fill_value=None, overwrite=True): """ diff --git a/pandas/core/generic.py b/pandas/core/generic.py index b2083a4454f84..68416d85ca659 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -13,7 +13,6 @@ from pandas.core.dtypes.common import ( _ensure_int64, _ensure_object, - needs_i8_conversion, is_scalar, is_number, is_integer, is_bool, @@ -26,7 +25,8 @@ is_dict_like, is_re_compilable, pandas_dtype) -from pandas.core.dtypes.cast import maybe_promote, maybe_upcast_putmask +from pandas.core.dtypes.cast import ( + maybe_promote, maybe_upcast_putmask) from pandas.core.dtypes.missing import isnull, notnull from pandas.core.dtypes.generic import ABCSeries, ABCPanel @@ -5465,48 +5465,6 @@ def _where(self, cond, other=np.nan, inplace=False, axis=None, level=None, raise NotImplementedError("cannot align with a higher " "dimensional NDFrame") - elif is_list_like(other): - - if self.ndim == 1: - - # try to set the same dtype as ourselves - try: - new_other = np.array(other, dtype=self.dtype) - except ValueError: - new_other = np.array(other) - except TypeError: - new_other = other - - # we can end up comparing integers and m8[ns] - # which is a numpy no no - is_i8 = needs_i8_conversion(self.dtype) - if is_i8: - matches = False - else: - matches = (new_other == np.array(other)) - - if matches is False or not matches.all(): - - # coerce other to a common dtype if we can - if needs_i8_conversion(self.dtype): - try: - other = np.array(other, dtype=self.dtype) - except: - other = np.array(other) - else: - other = np.asarray(other) - other = np.asarray(other, - dtype=np.common_type(other, - new_other)) - - # we need to use the new dtype - try_quick = False - else: - other = new_other - else: - - other = np.array(other) - if isinstance(other, np.ndarray): if other.shape != self.shape: diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index c95a9598604ee..714b952217c9d 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -26,6 +26,7 @@ is_object_dtype, is_categorical_dtype, is_interval_dtype, + is_bool, is_bool_dtype, is_signed_integer_dtype, is_unsigned_integer_dtype, @@ -611,9 +612,18 @@ def repeat(self, repeats, *args, **kwargs): def where(self, cond, other=None): if other is None: other = self._na_value - values = np.where(cond, self.values, other) dtype = self.dtype + values = self.values + + if is_bool(other) or is_bool_dtype(other): + + # bools force casting + values = values.astype(object) + dtype = None + + values = np.where(cond, values, other) + if self._is_numeric_dtype and np.any(isnull(values)): # We can't coerce to the numeric dtype of "self" (unless # it's float) if there are NaN values in our output. diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index 72d521cbe2d60..142e0f36c66ec 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -2,9 +2,14 @@ from pandas._libs import (index as libindex, algos as libalgos, join as libjoin) from pandas.core.dtypes.common import ( - is_dtype_equal, pandas_dtype, - is_float_dtype, is_object_dtype, - is_integer_dtype, is_scalar) + is_dtype_equal, + pandas_dtype, + is_float_dtype, + is_object_dtype, + is_integer_dtype, + is_bool, + is_bool_dtype, + is_scalar) from pandas.core.common import _asarray_tuplesafe, _values_from_object from pandas import compat @@ -56,6 +61,16 @@ def _maybe_cast_slice_bound(self, label, side, kind): # we will try to coerce to integers return self._maybe_cast_indexer(label) + def _convert_for_op(self, value): + """ Convert value to be insertable to ndarray """ + + if is_bool(value) or is_bool_dtype(value): + # force conversion to object + # so we don't lose the bools + raise TypeError + + return value + def _convert_tolerance(self, tolerance): try: return float(tolerance) diff --git a/pandas/core/internals.py b/pandas/core/internals.py index f2a7ac76481d4..8f3667edf68e6 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -1,4 +1,5 @@ import copy +from warnings import catch_warnings import itertools import re import operator @@ -22,6 +23,7 @@ is_categorical, is_categorical_dtype, is_integer_dtype, is_datetime64tz_dtype, + is_bool_dtype, is_object_dtype, is_datetimelike_v_numeric, is_float_dtype, is_numeric_dtype, @@ -33,21 +35,21 @@ _get_dtype) from pandas.core.dtypes.cast import ( maybe_downcast_to_dtype, - maybe_convert_string_to_object, maybe_upcast, - maybe_convert_scalar, maybe_promote, + maybe_promote, + infer_dtype_from, infer_dtype_from_scalar, soft_convert_objects, maybe_convert_objects, astype_nansafe, find_common_type) from pandas.core.dtypes.missing import ( - isnull, array_equivalent, + isnull, notnull, array_equivalent, _is_na_compat, is_null_datelike_scalar) import pandas.core.dtypes.concat as _concat -from pandas.core.dtypes.generic import ABCSeries +from pandas.core.dtypes.generic import ABCSeries, ABCDatetimeIndex from pandas.core.common import is_null_slice import pandas.core.algorithms as algos @@ -169,11 +171,6 @@ def get_values(self, dtype=None): def to_dense(self): return self.values.view() - def to_object_block(self, mgr): - """ return myself as an object block """ - values = self.get_values(dtype=object) - return self.make_block(values, klass=ObjectBlock) - @property def _na_value(self): return np.nan @@ -374,7 +371,6 @@ def fillna(self, value, limit=None, inplace=False, downcast=None, else: return self.copy() - original_value = value mask = isnull(self.values) if limit is not None: if not is_integer(limit): @@ -388,7 +384,7 @@ def fillna(self, value, limit=None, inplace=False, downcast=None, # fillna, but if we cannot coerce, then try again as an ObjectBlock try: - values, _, value, _ = self._try_coerce_args(self.values, value) + values, _, _, _ = self._try_coerce_args(self.values, value) blocks = self.putmask(mask, value, inplace=inplace) blocks = [b.make_block(values=self._try_coerce_result(b.values)) for b in blocks] @@ -399,12 +395,82 @@ def fillna(self, value, limit=None, inplace=False, downcast=None, if not mask.any(): return self if inplace else self.copy() - # we cannot coerce the underlying object, so - # make an ObjectBlock - return self.to_object_block(mgr=mgr).fillna(original_value, - limit=limit, - inplace=inplace, - downcast=False) + # operate column-by-column + def f(m, v, i): + block = self.coerce_to_target_dtype(value) + + # slice out our block + if i is not None: + block = block.getitem_block(slice(i, i + 1)) + return block.fillna(value, + limit=limit, + inplace=inplace, + downcast=None) + + return self.split_and_operate(mask, f, inplace) + + def split_and_operate(self, mask, f, inplace): + """ + split the block per-column, and apply the callable f + per-column, return a new block for each. Handle + masking which will not change a block unless needed. + + Parameters + ---------- + mask : 2-d boolean mask + f : callable accepting (1d-mask, 1d values, indexer) + inplace : boolean + + Returns + ------- + list of blocks + """ + + if mask is None: + mask = np.ones(self.shape, dtype=bool) + new_values = self.values + + def make_a_block(nv, ref_loc): + if isinstance(nv, Block): + block = nv + elif isinstance(nv, list): + block = nv[0] + else: + # Put back the dimension that was taken from it and make + # a block out of the result. + try: + nv = _block_shape(nv, ndim=self.ndim) + except (AttributeError, NotImplementedError): + pass + block = self.make_block(values=nv, + placement=ref_loc, fastpath=True) + return block + + # ndim == 1 + if self.ndim == 1: + if mask.any(): + nv = f(mask, new_values, None) + else: + nv = new_values if inplace else new_values.copy() + block = make_a_block(nv, self.mgr_locs) + return [block] + + # ndim > 1 + new_blocks = [] + for i, ref_loc in enumerate(self.mgr_locs): + m = mask[i] + v = new_values[i] + + # need a new block + if m.any(): + nv = f(m, v, i) + else: + nv = v if inplace else v.copy() + + block = make_a_block(nv, [ref_loc]) + new_blocks.append(block) + + return new_blocks def _maybe_downcast(self, blocks, downcast=None): @@ -415,6 +481,8 @@ def _maybe_downcast(self, blocks, downcast=None): elif downcast is None and (self.is_timedelta or self.is_datetime): return blocks + if not isinstance(blocks, list): + blocks = [blocks] return _extend_blocks([b.downcast(downcast) for b in blocks]) def downcast(self, dtypes=None, mgr=None): @@ -444,27 +512,20 @@ def downcast(self, dtypes=None, mgr=None): raise ValueError("downcast must have a dictionary or 'infer' as " "its argument") - # item-by-item + # operate column-by-column # this is expensive as it splits the blocks items-by-item - blocks = [] - for i, rl in enumerate(self.mgr_locs): + def f(m, v, i): if dtypes == 'infer': dtype = 'infer' else: raise AssertionError("dtypes as dict is not supported yet") - # TODO: This either should be completed or removed - dtype = dtypes.get(item, self._downcast_dtype) # noqa - if dtype is None: - nv = _block_shape(values[i], ndim=self.ndim) - else: - nv = maybe_downcast_to_dtype(values[i], dtype) - nv = _block_shape(nv, ndim=self.ndim) + if dtype is not None: + v = maybe_downcast_to_dtype(v, dtype) + return v - blocks.append(self.make_block(nv, fastpath=True, placement=[rl])) - - return blocks + return self.split_and_operate(None, f, False) def astype(self, dtype, copy=False, errors='raise', values=None, **kwargs): return self._astype(dtype, copy=copy, errors=errors, values=values, @@ -545,11 +606,14 @@ def convert(self, copy=True, **kwargs): return self.copy() if copy else self - def _can_hold_element(self, value): - raise NotImplementedError() - - def _try_cast(self, value): - raise NotImplementedError() + def _can_hold_element(self, element): + """ require the same dtype as ourselves """ + dtype = self.values.dtype.type + if is_list_like(element): + element = np.asarray(element) + tipo = element.dtype.type + return issubclass(tipo, dtype) + return isinstance(element, dtype) def _try_cast_result(self, result, dtype=None): """ try to cast the result to our original type, we may have @@ -584,12 +648,16 @@ def _try_cast_result(self, result, dtype=None): # may need to change the dtype here return maybe_downcast_to_dtype(result, dtype) - def _try_operate(self, values): - """ return a version to operate on as the input """ - return values - def _try_coerce_args(self, values, other): """ provide coercion to our input arguments """ + + if np.any(notnull(other)) and not self._can_hold_element(other): + # coercion issues + # let higher levels handle + raise TypeError("cannot convert {} to an {}".format( + type(other).__name__, + type(self).__name__.lower().replace('Block', ''))) + return values, False, other, False def _try_coerce_result(self, result): @@ -601,9 +669,6 @@ def _try_coerce_and_cast_result(self, result, dtype=None): result = self._try_cast_result(result, dtype=dtype) return result - def _try_fill(self, value): - return value - def to_native_types(self, slicer=None, na_rep='nan', quoting=None, **kwargs): """ convert to our native types format, slicing if desired """ @@ -639,7 +704,7 @@ def replace(self, to_replace, value, inplace=False, filter=None, inplace = validate_bool_kwarg(inplace, 'inplace') original_to_replace = to_replace - mask = isnull(self.values) + # try to replace, if we raise an error, convert to ObjectBlock and # retry try: @@ -657,11 +722,9 @@ def replace(self, to_replace, value, inplace=False, filter=None, return blocks except (TypeError, ValueError): - # we can't process the value, but nothing to do - if not mask.any(): - return self if inplace else self.copy() - - return self.to_object_block(mgr=mgr).replace( + # try again with a compatible block + block = self.astype(object) + return block.replace( to_replace=original_to_replace, value=value, inplace=inplace, filter=filter, regex=regex, convert=convert) @@ -676,14 +739,48 @@ def setitem(self, indexer, value, mgr=None): indexer is a direct slice/positional indexer; value must be a compatible shape """ - # coerce None values, if appropriate if value is None: if self.is_numeric: value = np.nan - # coerce args - values, _, value, _ = self._try_coerce_args(self.values, value) + # coerce if block dtype can store value + values = self.values + try: + values, _, value, _ = self._try_coerce_args(values, value) + # can keep its own dtype + if hasattr(value, 'dtype') and is_dtype_equal(values.dtype, + value.dtype): + dtype = self.dtype + else: + dtype = 'infer' + + except (TypeError, ValueError): + # current dtype cannot store value, coerce to common dtype + find_dtype = False + + if hasattr(value, 'dtype'): + dtype = value.dtype + find_dtype = True + + elif is_scalar(value): + if isnull(value): + # NaN promotion is handled in latter path + dtype = False + else: + dtype, _ = infer_dtype_from_scalar(value, + pandas_dtype=True) + find_dtype = True + else: + dtype = 'infer' + + if find_dtype: + dtype = find_common_type([values.dtype, dtype]) + if not is_dtype_equal(self.dtype, dtype): + b = self.astype(dtype) + return b.setitem(indexer, value, mgr=mgr) + + # value must be storeable at this moment arr_value = np.array(value) # cast the values to a type that can hold nan (if necessary) @@ -713,87 +810,58 @@ def setitem(self, indexer, value, mgr=None): raise ValueError("cannot set using a slice indexer with a " "different length than the value") - try: - - def _is_scalar_indexer(indexer): - # return True if we are all scalar indexers + def _is_scalar_indexer(indexer): + # return True if we are all scalar indexers - if arr_value.ndim == 1: - if not isinstance(indexer, tuple): - indexer = tuple([indexer]) - return all([is_scalar(idx) for idx in indexer]) - return False - - def _is_empty_indexer(indexer): - # return a boolean if we have an empty indexer - - if arr_value.ndim == 1: - if not isinstance(indexer, tuple): - indexer = tuple([indexer]) + if arr_value.ndim == 1: + if not isinstance(indexer, tuple): + indexer = tuple([indexer]) return any(isinstance(idx, np.ndarray) and len(idx) == 0 for idx in indexer) - return False - - # empty indexers - # 8669 (empty) - if _is_empty_indexer(indexer): - pass - - # setting a single element for each dim and with a rhs that could - # be say a list - # GH 6043 - elif _is_scalar_indexer(indexer): - values[indexer] = value - - # if we are an exact match (ex-broadcasting), - # then use the resultant dtype - elif (len(arr_value.shape) and - arr_value.shape[0] == values.shape[0] and - np.prod(arr_value.shape) == np.prod(values.shape)): - values[indexer] = value - values = values.astype(arr_value.dtype) - - # set - else: - values[indexer] = value + return False - # coerce and try to infer the dtypes of the result - if hasattr(value, 'dtype') and is_dtype_equal(values.dtype, - value.dtype): - dtype = value.dtype - elif is_scalar(value): - dtype, _ = infer_dtype_from_scalar(value) - else: - dtype = 'infer' - values = self._try_coerce_and_cast_result(values, dtype) - block = self.make_block(transf(values), fastpath=True) + def _is_empty_indexer(indexer): + # return a boolean if we have an empty indexer - # may have to soft convert_objects here - if block.is_object and not self.is_object: - block = block.convert(numeric=False) + if is_list_like(indexer) and not len(indexer): + return True + if arr_value.ndim == 1: + if not isinstance(indexer, tuple): + indexer = tuple([indexer]) + return any(isinstance(idx, np.ndarray) and len(idx) == 0 + for idx in indexer) + return False - return block - except ValueError: - raise - except TypeError: + # empty indexers + # 8669 (empty) + if _is_empty_indexer(indexer): + pass - # cast to the passed dtype if possible - # otherwise raise the original error + # setting a single element for each dim and with a rhs that could + # be say a list + # GH 6043 + elif _is_scalar_indexer(indexer): + values[indexer] = value + + # if we are an exact match (ex-broadcasting), + # then use the resultant dtype + elif (len(arr_value.shape) and + arr_value.shape[0] == values.shape[0] and + np.prod(arr_value.shape) == np.prod(values.shape)): + values[indexer] = value try: - # e.g. we are uint32 and our value is uint64 - # this is for compat with older numpies - block = self.make_block(transf(values.astype(value.dtype))) - return block.setitem(indexer=indexer, value=value, mgr=mgr) - - except: + values = values.astype(arr_value.dtype) + except ValueError: pass - raise - - except Exception: - pass + # set + else: + values[indexer] = value - return [self] + # coerce and try to infer the dtypes of the result + values = self._try_coerce_and_cast_result(values, dtype) + block = self.make_block(transf(values), fastpath=True) + return block def putmask(self, mask, new, align=True, inplace=False, axis=0, transpose=False, mgr=None): @@ -830,11 +898,11 @@ def putmask(self, mask, new, align=True, inplace=False, axis=0, new = self.fill_value if self._can_hold_element(new): + _, _, new, _ = self._try_coerce_args(new_values, new) + if transpose: new_values = new_values.T - new = self._try_cast(new) - # If the default repeat behavior in np.putmask would go in the # wrong direction, then explictly repeat and reshape new instead if getattr(new, 'ndim', 0) >= 1: @@ -843,6 +911,23 @@ def putmask(self, mask, new, align=True, inplace=False, axis=0, new, new_values.shape[-1]).reshape(self.shape) new = new.astype(new_values.dtype) + # we require exact matches between the len of the + # values we are setting (or is compat). np.putmask + # doesn't check this and will simply truncate / pad + # the output, but we want sane error messages + # + # TODO: this prob needs some better checking + # for 2D cases + if ((is_list_like(new) and + np.any(mask[mask]) and + getattr(new, 'ndim', 1) == 1)): + + if not (mask.shape[-1] == len(new) or + mask[mask].shape[-1] == len(new) or + len(new) == 1): + raise ValueError("cannot assign mismatch " + "length to masked array") + np.putmask(new_values, mask, new) # maybe upcast me @@ -860,41 +945,29 @@ def putmask(self, mask, new, align=True, inplace=False, axis=0, new_shape.insert(axis, 1) new = new.reshape(tuple(new_shape)) - # need to go column by column - new_blocks = [] - if self.ndim > 1: - for i, ref_loc in enumerate(self.mgr_locs): - m = mask[i] - v = new_values[i] - - # need a new block - if m.any(): - if isinstance(new, np.ndarray): - n = np.squeeze(new[i % new.shape[0]]) - else: - n = np.array(new) - - # type of the new block - dtype, _ = maybe_promote(n.dtype) + # operate column-by-column + def f(m, v, i): - # we need to explicitly astype here to make a copy - n = n.astype(dtype) + if i is None: + # ndim==1 case. + n = new + else: - nv = _putmask_smart(v, m, n) + if isinstance(new, np.ndarray): + n = np.squeeze(new[i % new.shape[0]]) else: - nv = v if inplace else v.copy() + n = np.array(new) - # Put back the dimension that was taken from it and make - # a block out of the result. - block = self.make_block(values=nv[np.newaxis], - placement=[ref_loc], fastpath=True) + # type of the new block + dtype, _ = maybe_promote(n.dtype) - new_blocks.append(block) + # we need to explicitly astype here to make a copy + n = n.astype(dtype) - else: - nv = _putmask_smart(new_values, mask, new) - new_blocks.append(self.make_block(values=nv, fastpath=True)) + nv = _putmask_smart(v, m, n) + return nv + new_blocks = self.split_and_operate(mask, f, inplace) return new_blocks if inplace: @@ -905,6 +978,67 @@ def putmask(self, mask, new, align=True, inplace=False, axis=0, return [self.make_block(new_values, fastpath=True)] + def coerce_to_target_dtype(self, other): + """ + coerce the current block to a dtype compat for other + we will return a block, possibly object, and not raise + + we can also safely try to coerce to the same dtype + and will receive the same block + """ + + # if we cannot then coerce to object + dtype, _ = infer_dtype_from(other, pandas_dtype=True) + + if is_dtype_equal(self.dtype, dtype): + return self + + if self.is_bool or is_object_dtype(dtype) or is_bool_dtype(dtype): + # we don't upcast to bool + return self.astype(object) + + elif ((self.is_float or self.is_complex) and + (is_integer_dtype(dtype) or is_float_dtype(dtype))): + # don't coerce float/complex to int + return self + + elif (self.is_datetime or + is_datetime64_dtype(dtype) or + is_datetime64tz_dtype(dtype)): + + # not a datetime + if not ((is_datetime64_dtype(dtype) or + is_datetime64tz_dtype(dtype)) and self.is_datetime): + return self.astype(object) + + # don't upcast timezone with different timezone or no timezone + mytz = getattr(self.dtype, 'tz', None) + othertz = getattr(dtype, 'tz', None) + + if str(mytz) != str(othertz): + return self.astype(object) + + raise AssertionError("possible recursion in " + "coerce_to_target_dtype: {} {}".format( + self, other)) + + elif (self.is_timedelta or is_timedelta64_dtype(dtype)): + + # not a timedelta + if not (is_timedelta64_dtype(dtype) and self.is_timedelta): + return self.astype(object) + + raise AssertionError("possible recursion in " + "coerce_to_target_dtype: {} {}".format( + self, other)) + + try: + return self.astype(dtype) + except (ValueError, TypeError): + pass + + return self.astype(object) + def interpolate(self, method='pad', axis=0, index=None, values=None, inplace=False, limit=None, limit_direction='forward', fill_value=None, coerce=False, downcast=None, mgr=None, @@ -972,7 +1106,6 @@ def _interpolate_with_fill(self, method='pad', axis=0, inplace=False, values = self.values if inplace else self.values.copy() values, _, fill_value, _ = self._try_coerce_args(values, fill_value) - values = self._try_operate(values) values = missing.interpolate_2d(values, method=method, axis=axis, limit=limit, fill_value=fill_value, dtype=self.dtype) @@ -1111,6 +1244,7 @@ def eval(self, func, other, raise_on_error=True, try_cast=False, mgr=None): ------- a new block, the result of the func """ + orig_other = other values = self.values if hasattr(other, 'reindex_axis'): @@ -1135,8 +1269,14 @@ def eval(self, func, other, raise_on_error=True, try_cast=False, mgr=None): transf = (lambda x: x.T) if is_transposed else (lambda x: x) # coerce/transpose the args if needed - values, values_mask, other, other_mask = self._try_coerce_args( - transf(values), other) + try: + values, values_mask, other, other_mask = self._try_coerce_args( + transf(values), other) + except TypeError: + block = self.coerce_to_target_dtype(orig_other) + return block.eval(func, orig_other, + raise_on_error=raise_on_error, + try_cast=try_cast, mgr=mgr) # get the result, may need to transpose the other def get_result(other): @@ -1163,7 +1303,7 @@ def get_result(other): result = result.astype('float64', copy=False) result[other_mask.ravel()] = np.nan - return self._try_coerce_result(result) + return result # error handler if we have an issue operating with the function def handle_error(): @@ -1211,6 +1351,7 @@ def handle_error(): if try_cast: result = self._try_cast_result(result) + result = _block_shape(result, ndim=self.ndim) return [self.make_block(result, fastpath=True, )] def where(self, other, cond, align=True, raise_on_error=True, @@ -1233,8 +1374,8 @@ def where(self, other, cond, align=True, raise_on_error=True, ------- a new block(s), the result of the func """ - values = self.values + orig_other = other if transpose: values = values.T @@ -1254,9 +1395,6 @@ def where(self, other, cond, align=True, raise_on_error=True, raise ValueError("where must have a condition that is ndarray " "like") - other = maybe_convert_string_to_object(other) - other = maybe_convert_scalar(other) - # our where function def func(cond, values, other): if cond.ravel().all(): @@ -1264,6 +1402,7 @@ def func(cond, values, other): values, values_mask, other, other_mask = self._try_coerce_args( values, other) + try: return self._try_coerce_result(expressions.where( cond, values, other, raise_on_error=True)) @@ -1279,7 +1418,19 @@ def func(cond, values, other): # see if we can operate on the entire block, or need item-by-item # or if we are a single block (ndim == 1) - result = func(cond, values, other) + try: + result = func(cond, values, other) + except TypeError: + + # we cannot coerce, return a compat dtype + # we are explicity ignoring raise_on_error here + block = self.coerce_to_target_dtype(other) + blocks = block.where(orig_other, cond, align=align, + raise_on_error=raise_on_error, + try_cast=try_cast, axis=axis, + transpose=transpose) + return self._maybe_downcast(blocks, 'infer') + if self._can_hold_na or self.ndim == 1: if transpose: @@ -1543,6 +1694,7 @@ def putmask(self, mask, new, align=True, inplace=False, axis=0, new = new[mask] mask = _safe_reshape(mask, new_values.shape) + new_values[mask] = new new_values = self._try_coerce_result(new_values) return [self.make_block(values=new_values)] @@ -1578,20 +1730,14 @@ class FloatBlock(FloatOrComplexBlock): def _can_hold_element(self, element): if is_list_like(element): - element = np.array(element) + element = np.asarray(element) tipo = element.dtype.type return (issubclass(tipo, (np.floating, np.integer)) and not issubclass(tipo, (np.datetime64, np.timedelta64))) - return (isinstance(element, (float, int, np.float_, np.int_)) and + return (isinstance(element, (float, int, np.floating, np.int_)) and not isinstance(element, (bool, np.bool_, datetime, timedelta, np.datetime64, np.timedelta64))) - def _try_cast(self, element): - try: - return float(element) - except: # pragma: no cover - return element - def to_native_types(self, slicer=None, na_rep='', float_format=None, decimal='.', quoting=None, **kwargs): """ convert to our native types format, slicing if desired """ @@ -1639,13 +1785,7 @@ def _can_hold_element(self, element): (np.floating, np.integer, np.complexfloating)) return (isinstance(element, (float, int, complex, np.float_, np.int_)) and - not isinstance(bool, np.bool_)) - - def _try_cast(self, element): - try: - return complex(element) - except: # pragma: no cover - return element + not isinstance(element, (bool, np.bool_))) def should_store(self, value): return issubclass(value.dtype.type, np.complexfloating) @@ -1661,15 +1801,10 @@ def _can_hold_element(self, element): element = np.array(element) tipo = element.dtype.type return (issubclass(tipo, np.integer) and - not issubclass(tipo, (np.datetime64, np.timedelta64))) + not issubclass(tipo, (np.datetime64, np.timedelta64)) and + self.dtype.itemsize >= element.dtype.itemsize) return is_integer(element) - def _try_cast(self, element): - try: - return int(element) - except: # pragma: no cover - return element - def should_store(self, value): return is_integer_dtype(value) and value.dtype == self.dtype @@ -1684,10 +1819,6 @@ def _na_value(self): def fill_value(self): return tslib.iNaT - def _try_operate(self, values): - """ return a version to operate on """ - return values.view('i8') - def get_values(self, dtype=None): """ return object dtype as boxed values, such as Timestamps/Timedelta @@ -1708,11 +1839,18 @@ class TimeDeltaBlock(DatetimeLikeBlockMixin, IntBlock): def _box_func(self): return lambda x: tslib.Timedelta(x, unit='ns') + def _can_hold_element(self, element): + if is_list_like(element): + element = np.array(element) + tipo = element.dtype.type + return issubclass(tipo, np.timedelta64) + return isinstance(element, (timedelta, np.timedelta64)) + def fillna(self, value, **kwargs): # allow filling with integers to be # interpreted as seconds - if not isinstance(value, np.timedelta64) and is_integer(value): + if is_integer(value) and not isinstance(value, np.timedelta64): value = Timedelta(value, unit='s') return super(TimeDeltaBlock, self).fillna(value, **kwargs) @@ -1743,19 +1881,18 @@ def _try_coerce_args(self, values, other): elif isinstance(other, Timedelta): other_mask = isnull(other) other = other.value + elif isinstance(other, timedelta): + other = Timedelta(other).value elif isinstance(other, np.timedelta64): other_mask = isnull(other) other = Timedelta(other).value - elif isinstance(other, timedelta): - other = Timedelta(other).value - elif isinstance(other, np.ndarray): + elif hasattr(other, 'dtype') and is_timedelta64_dtype(other): other_mask = isnull(other) other = other.astype('i8', copy=False).view('i8') else: - # scalar - other = Timedelta(other) - other_mask = isnull(other) - other = other.value + # coercion issues + # let higher levels handle + raise TypeError return values, values_mask, other, other_mask @@ -1805,15 +1942,9 @@ class BoolBlock(NumericBlock): def _can_hold_element(self, element): if is_list_like(element): - element = np.array(element) - return issubclass(element.dtype.type, np.integer) - return isinstance(element, (int, bool)) - - def _try_cast(self, element): - try: - return bool(element) - except: # pragma: no cover - return element + element = np.asarray(element) + return issubclass(element.dtype.type, np.bool_) + return isinstance(element, (bool, np.bool_)) def should_store(self, value): return issubclass(value.dtype.type, np.bool_) @@ -1881,31 +2012,24 @@ def convert(self, *args, **kwargs): if key in kwargs: fn_kwargs[key] = kwargs[key] - # attempt to create new type blocks - blocks = [] - if by_item and not self._is_single_block: - - for i, rl in enumerate(self.mgr_locs): - values = self.iget(i) + # operate column-by-column + def f(m, v, i): + shape = v.shape + values = fn(v.ravel(), **fn_kwargs) + try: + values = values.reshape(shape) + values = _block_shape(values, ndim=self.ndim) + except (AttributeError, NotImplementedError): + pass - shape = values.shape - values = fn(values.ravel(), **fn_kwargs) - try: - values = values.reshape(shape) - values = _block_shape(values, ndim=self.ndim) - except (AttributeError, NotImplementedError): - pass - newb = make_block(values, ndim=self.ndim, placement=[rl]) - blocks.append(newb) + return values + if by_item and not self._is_single_block: + blocks = self.split_and_operate(None, f, False) else: - values = fn(self.values.ravel(), **fn_kwargs) - try: - values = values.reshape(self.values.shape) - except NotImplementedError: - pass - blocks.append(make_block(values, ndim=self.ndim, - placement=self.mgr_locs)) + values = f(None, self.values.ravel(), None) + blocks = [make_block(values, ndim=self.ndim, + placement=self.mgr_locs)] return blocks @@ -1949,8 +2073,14 @@ def _maybe_downcast(self, blocks, downcast=None): def _can_hold_element(self, element): return True - def _try_cast(self, element): - return element + def _try_coerce_args(self, values, other): + """ provide coercion to our input arguments """ + + if isinstance(other, ABCDatetimeIndex): + # to store DatetimeTZBlock as object + other = other.asobject.values + + return values, False, other, False def should_store(self, value): return not (issubclass(value.dtype.type, @@ -2249,12 +2379,6 @@ def _can_hold_element(self, element): return (is_integer(element) or isinstance(element, datetime) or isnull(element)) - def _try_cast(self, element): - try: - return int(element) - except: - return element - def _try_coerce_args(self, values, other): """ Coerce values and other to dtype 'i8'. NaN and NaT convert to @@ -2288,19 +2412,13 @@ def _try_coerce_args(self, values, other): "naive Block") other_mask = isnull(other) other = other.asm8.view('i8') - elif hasattr(other, 'dtype') and is_integer_dtype(other): - other = other.view('i8') + elif hasattr(other, 'dtype') and is_datetime64_dtype(other): + other_mask = isnull(other) + other = other.astype('i8', copy=False).view('i8') else: - try: - other = np.asarray(other) - other_mask = isnull(other) - - other = other.astype('i8', copy=False).view('i8') - except ValueError: - - # coercion issues - # let higher levels handle - raise TypeError + # coercion issues + # let higher levels handle + raise TypeError return values, values_mask, other, other_mask @@ -2400,21 +2518,6 @@ def get_values(self, dtype=None): self.values.ravel(), f).reshape(self.values.shape) return self.values - def to_object_block(self, mgr): - """ - return myself as an object block - - Since we keep the DTI as a 1-d object, this is different - depends on BlockManager's ndim - """ - values = self.get_values(dtype=object) - kwargs = {} - if mgr.ndim > 1: - values = _block_shape(values, ndim=mgr.ndim) - kwargs['ndim'] = mgr.ndim - kwargs['placement'] = [0] - return self.make_block(values, klass=ObjectBlock, **kwargs) - def _slice(self, slicer): """ return a slice of my values """ if isinstance(slicer, tuple): @@ -2466,6 +2569,8 @@ def _try_coerce_args(self, values, other): raise ValueError("incompatible or non tz-aware value") other_mask = isnull(other) other = other.value + else: + raise TypeError return values, values_mask, other, other_mask @@ -3246,16 +3351,6 @@ def comp(s): return isnull(values) return _maybe_compare(values, getattr(s, 'asm8', s), operator.eq) - def _cast_scalar(block, scalar): - dtype, val = infer_dtype_from_scalar(scalar, pandas_dtype=True) - if not is_dtype_equal(block.dtype, dtype): - dtype = find_common_type([block.dtype, dtype]) - block = block.astype(dtype) - # use original value - val = scalar - - return block, val - masks = [comp(s) for i, s in enumerate(src_list)] result_blocks = [] @@ -3278,8 +3373,8 @@ def _cast_scalar(block, scalar): # particular block m = masks[i][b.mgr_locs.indexer] if m.any(): - b, val = _cast_scalar(b, d) - new_rb.extend(b.putmask(m, val, inplace=True)) + b = b.coerce_to_target_dtype(d) + new_rb.extend(b.putmask(m, d, inplace=True)) else: new_rb.append(b) rb = new_rb @@ -4757,17 +4852,30 @@ def _transform_index(index, func, level=None): def _putmask_smart(v, m, n): """ - Return a new block, try to preserve dtype if possible. + Return a new ndarray, try to preserve dtype if possible. Parameters ---------- v : `values`, updated in-place (array like) m : `mask`, applies to both sides (array like) n : `new values` either scalar or an array like aligned with `values` + + Returns + ------- + values : ndarray with updated values + this *may* be a copy of the original + + See Also + -------- + ndarray.putmask """ + + # we cannot use np.asarray() here as we cannot have conversions + # that numpy does when numeric are mixed with strings + # n should be the length of the mask or a scalar here if not is_list_like(n): - n = np.array([n] * len(m)) + n = np.repeat(n, len(m)) elif isinstance(n, np.ndarray) and n.ndim == 0: # numpy scalar n = np.repeat(np.array(n, ndmin=1), len(m)) @@ -4781,10 +4889,21 @@ def _putmask_smart(v, m, n): if not _is_na_compat(v, nn[0]): raise ValueError - nn_at = nn.astype(v.dtype) + # we ignore ComplexWarning here + with catch_warnings(record=True): + nn_at = nn.astype(v.dtype) # avoid invalid dtype comparisons - if not is_numeric_v_string_like(nn, nn_at): + # between numbers & strings + + # only compare integers/floats + # don't compare integers to datetimelikes + if (not is_numeric_v_string_like(nn, nn_at) and + (is_float_dtype(nn.dtype) or + is_integer_dtype(nn.dtype) and + is_float_dtype(nn_at.dtype) or + is_integer_dtype(nn_at.dtype))): + comp = (nn == nn_at) if is_list_like(comp) and comp.all(): nv = v.copy() @@ -4793,21 +4912,28 @@ def _putmask_smart(v, m, n): except (ValueError, IndexError, TypeError): pass - # change the dtype + n = np.asarray(n) + + def _putmask_preserve(nv, n): + try: + nv[m] = n[m] + except (IndexError, ValueError): + nv[m] = n + return nv + + # preserves dtype if possible + if v.dtype.kind == n.dtype.kind: + return _putmask_preserve(v, n) + + # change the dtype if needed dtype, _ = maybe_promote(n.dtype) if is_extension_type(v.dtype) and is_object_dtype(dtype): - nv = v.get_values(dtype) + v = v.get_values(dtype) else: - nv = v.astype(dtype) + v = v.astype(dtype) - try: - nv[m] = n[m] - except ValueError: - idx, = np.where(np.squeeze(m)) - for mask_index, new_val in zip(idx, n[m]): - nv[mask_index] = new_val - return nv + return _putmask_preserve(v, n) def concatenate_block_managers(mgrs_indexers, axes, concat_axis, copy): diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 55473ec8d7cad..017afcd691194 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -1278,12 +1278,14 @@ def f(self, other, axis=default_axis, level=None): other = _align_method_FRAME(self, other, axis) if isinstance(other, pd.DataFrame): # Another DataFrame - return self._flex_compare_frame(other, na_op, str_rep, level) + return self._flex_compare_frame(other, na_op, str_rep, level, + try_cast=False) elif isinstance(other, ABCSeries): - return self._combine_series(other, na_op, None, axis, level) + return self._combine_series(other, na_op, None, axis, level, + try_cast=False) else: - return self._combine_const(other, na_op) + return self._combine_const(other, na_op, try_cast=False) f.__name__ = name @@ -1296,12 +1298,14 @@ def f(self, other): if isinstance(other, pd.DataFrame): # Another DataFrame return self._compare_frame(other, func, str_rep) elif isinstance(other, ABCSeries): - return self._combine_series_infer(other, func) + return self._combine_series_infer(other, func, try_cast=False) else: # straight boolean comparisions we want to allow all columns # (regardless of dtype to pass thru) See #4537 for discussion. - res = self._combine_const(other, func, raise_on_error=False) + res = self._combine_const(other, func, + raise_on_error=False, + try_cast=False) return res.fillna(True).astype(bool) f.__name__ = name @@ -1381,13 +1385,13 @@ def f(self, other, axis=None): axis = self._get_axis_number(axis) if isinstance(other, self._constructor): - return self._compare_constructor(other, na_op) + return self._compare_constructor(other, na_op, try_cast=False) elif isinstance(other, (self._constructor_sliced, pd.DataFrame, ABCSeries)): raise Exception("input needs alignment for this object [%s]" % self._constructor) else: - return self._combine_const(other, na_op) + return self._combine_const(other, na_op, try_cast=False) f.__name__ = name diff --git a/pandas/core/panel.py b/pandas/core/panel.py index 69a8468552f54..609bf3186344a 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -8,6 +8,7 @@ import warnings from pandas.core.dtypes.cast import ( infer_dtype_from_scalar, + cast_scalar_to_array, maybe_cast_item) from pandas.core.dtypes.common import ( is_integer, is_list_like, @@ -178,11 +179,9 @@ def _init_data(self, data, copy, dtype, **kwargs): copy = False dtype = None elif is_scalar(data) and all(x is not None for x in passed_axes): - if dtype is None: - dtype, data = infer_dtype_from_scalar(data) - values = np.empty([len(x) for x in passed_axes], dtype=dtype) - values.fill(data) - mgr = self._init_matrix(values, passed_axes, dtype=dtype, + values = cast_scalar_to_array([len(x) for x in passed_axes], + data, dtype=dtype) + mgr = self._init_matrix(values, passed_axes, dtype=values.dtype, copy=False) copy = False else: # pragma: no cover @@ -327,7 +326,7 @@ def _init_matrix(self, data, axes, dtype=None, copy=False): # ---------------------------------------------------------------------- # Comparison methods - def _compare_constructor(self, other, func): + def _compare_constructor(self, other, func, try_cast=True): if not self._indexed_same(other): raise Exception('Can only compare identically-labeled ' 'same type objects') @@ -584,9 +583,7 @@ def __setitem__(self, key, value): shape[1:], tuple(map(int, value.shape)))) mat = np.asarray(value) elif is_scalar(value): - dtype, value = infer_dtype_from_scalar(value) - mat = np.empty(shape[1:], dtype=dtype) - mat.fill(value) + mat = cast_scalar_to_array(shape[1:], value) else: raise TypeError('Cannot set item of type: %s' % str(type(value))) @@ -719,13 +716,13 @@ def _combine(self, other, func, axis=0): "operation with %s" % (str(type(other)), str(type(self)))) - def _combine_const(self, other, func): + def _combine_const(self, other, func, try_cast=True): with np.errstate(all='ignore'): new_values = func(self.values, other) d = self._construct_axes_dict() return self._constructor(new_values, **d) - def _combine_frame(self, other, func, axis=0): + def _combine_frame(self, other, func, axis=0, try_cast=True): index, columns = self._get_plane_axes(axis) axis = self._get_axis_number(axis) @@ -744,7 +741,7 @@ def _combine_frame(self, other, func, axis=0): return self._constructor(new_values, self.items, self.major_axis, self.minor_axis) - def _combine_panel(self, other, func): + def _combine_panel(self, other, func, try_cast=True): items = self.items.union(other.items) major = self.major_axis.union(other.major_axis) minor = self.minor_axis.union(other.minor_axis) diff --git a/pandas/core/sparse/frame.py b/pandas/core/sparse/frame.py index 5fe96d70fc16f..462fb18618949 100644 --- a/pandas/core/sparse/frame.py +++ b/pandas/core/sparse/frame.py @@ -500,7 +500,8 @@ def xs(self, key, axis=0, copy=False): # ---------------------------------------------------------------------- # Arithmetic-related methods - def _combine_frame(self, other, func, fill_value=None, level=None): + def _combine_frame(self, other, func, fill_value=None, level=None, + try_cast=True): this, other = self.align(other, join='outer', level=level, copy=False) new_index, new_columns = this.index, this.columns @@ -543,7 +544,8 @@ def _combine_frame(self, other, func, fill_value=None, level=None): default_fill_value=new_fill_value ).__finalize__(self) - def _combine_match_index(self, other, func, level=None, fill_value=None): + def _combine_match_index(self, other, func, level=None, fill_value=None, + try_cast=True): new_data = {} if fill_value is not None: @@ -573,7 +575,8 @@ def _combine_match_index(self, other, func, level=None, fill_value=None): new_data, index=new_index, columns=self.columns, default_fill_value=fill_value).__finalize__(self) - def _combine_match_columns(self, other, func, level=None, fill_value=None): + def _combine_match_columns(self, other, func, level=None, fill_value=None, + try_cast=True): # patched version of DataFrame._combine_match_columns to account for # NumPy circumventing __rsub__ with float64 types, e.g.: 3.0 - series, # where 3.0 is numpy.float64 and series is a SparseSeries. Still @@ -599,7 +602,7 @@ def _combine_match_columns(self, other, func, level=None, fill_value=None): new_data, index=self.index, columns=union, default_fill_value=self.default_fill_value).__finalize__(self) - def _combine_const(self, other, func, raise_on_error=True): + def _combine_const(self, other, func, raise_on_error=True, try_cast=True): return self._apply_columns(lambda x: func(x, other)) def _reindex_index(self, index, method, copy, level, fill_value=np.nan, diff --git a/pandas/tests/dtypes/test_cast.py b/pandas/tests/dtypes/test_cast.py index 6e07487b3e04f..d9fb458c83529 100644 --- a/pandas/tests/dtypes/test_cast.py +++ b/pandas/tests/dtypes/test_cast.py @@ -9,11 +9,14 @@ from datetime import datetime, timedelta, date import numpy as np -from pandas import Timedelta, Timestamp, DatetimeIndex, DataFrame, NaT, Series +import pandas as pd +from pandas import (Timedelta, Timestamp, DatetimeIndex, + DataFrame, NaT, Period, Series) from pandas.core.dtypes.cast import ( maybe_downcast_to_dtype, maybe_convert_objects, + cast_scalar_to_array, infer_dtype_from_scalar, infer_dtype_from_array, maybe_convert_string_to_object, @@ -23,6 +26,8 @@ CategoricalDtype, DatetimeTZDtype, PeriodDtype) +from pandas.core.dtypes.common import ( + is_dtype_equal) from pandas.util import testing as tm @@ -96,8 +101,8 @@ def test_datetime_with_timezone(self): class TestInferDtype(object): - def test_infer_dtype_from_scalar(self): - # Test that _infer_dtype_from_scalar is returning correct dtype for int + def testinfer_dtype_from_scalar(self): + # Test that infer_dtype_from_scalar is returning correct dtype for int # and float. for dtypec in [np.uint8, np.int8, np.uint16, np.int16, np.uint32, @@ -137,29 +142,93 @@ def test_infer_dtype_from_scalar(self): dtype, val = infer_dtype_from_scalar(data) assert dtype == 'm8[ns]' + for tz in ['UTC', 'US/Eastern', 'Asia/Tokyo']: + dt = Timestamp(1, tz=tz) + dtype, val = infer_dtype_from_scalar(dt, pandas_dtype=True) + assert dtype == 'datetime64[ns, {0}]'.format(tz) + assert val == dt.value + + dtype, val = infer_dtype_from_scalar(dt) + assert dtype == np.object_ + assert val == dt + + for freq in ['M', 'D']: + p = Period('2011-01-01', freq=freq) + dtype, val = infer_dtype_from_scalar(p, pandas_dtype=True) + assert dtype == 'period[{0}]'.format(freq) + assert val == p.ordinal + + dtype, val = infer_dtype_from_scalar(p) + dtype == np.object_ + assert val == p + + # misc for data in [date(2000, 1, 1), Timestamp(1, tz='US/Eastern'), 'foo']: + dtype, val = infer_dtype_from_scalar(data) assert dtype == np.object_ + def testinfer_dtype_from_scalar_errors(self): + with pytest.raises(ValueError): + infer_dtype_from_scalar(np.array([1])) + @pytest.mark.parametrize( - "arr, expected", - [('foo', np.object_), - (b'foo', np.object_), - (1, np.int_), - (1.5, np.float_), - ([1], np.int_), - (np.array([1]), np.int_), - ([np.nan, 1, ''], np.object_), - (np.array([[1.0, 2.0]]), np.float_), - (Timestamp('20160101'), np.object_), - (np.datetime64('2016-01-01'), np.dtype(' multiple Blocks) - df = pd.concat([DataFrame(np.random.randn(10, 2)), - DataFrame(np.random.randint(0, 10, size=(10, 2)))], - ignore_index=True, axis=1) + df = pd.concat([ + DataFrame(np.random.randn(10, 2)), + DataFrame(np.random.randint(0, 10, size=(10, 2)), dtype='int64')], + ignore_index=True, axis=1) mask = DataFrame(False, columns=df.columns, index=df.index) s1 = Series(1, index=df.columns) s2 = Series(2, index=df.index) result = df.where(mask, s1, axis='columns') expected = DataFrame(1.0, columns=df.columns, index=df.index) - expected[2] = expected[2].astype(int) - expected[3] = expected[3].astype(int) + expected[2] = expected[2].astype('int64') + expected[3] = expected[3].astype('int64') assert_frame_equal(result, expected) result = df.copy() @@ -2742,8 +2744,8 @@ def test_where_axis(self): result = df.where(mask, s2, axis='index') expected = DataFrame(2.0, columns=df.columns, index=df.index) - expected[2] = expected[2].astype(int) - expected[3] = expected[3].astype(int) + expected[2] = expected[2].astype('int64') + expected[3] = expected[3].astype('int64') assert_frame_equal(result, expected) result = df.copy() diff --git a/pandas/tests/frame/test_operators.py b/pandas/tests/frame/test_operators.py index 8ec6c6e6263d8..438d7481ecc3e 100644 --- a/pandas/tests/frame/test_operators.py +++ b/pandas/tests/frame/test_operators.py @@ -188,6 +188,7 @@ def test_timestamp_compare(self): df.loc[np.random.rand(len(df)) > 0.5, 'dates2'] = pd.NaT ops = {'gt': 'lt', 'lt': 'gt', 'ge': 'le', 'le': 'ge', 'eq': 'eq', 'ne': 'ne'} + for left, right in ops.items(): left_f = getattr(operator, left) right_f = getattr(operator, right) @@ -315,14 +316,12 @@ def _check_unary_op(op): # operator.neg is deprecated in numpy >= 1.9 _check_unary_op(operator.inv) - def test_logical_typeerror(self): - if not compat.PY3: - pytest.raises(TypeError, self.frame.__eq__, 'foo') - pytest.raises(TypeError, self.frame.__lt__, 'foo') - pytest.raises(TypeError, self.frame.__gt__, 'foo') - pytest.raises(TypeError, self.frame.__ne__, 'foo') - else: - pytest.skip('test_logical_typeerror not tested on PY3') + @pytest.mark.parametrize('op,res', [('__eq__', False), + ('__ne__', True)]) + def test_logical_typeerror_with_non_valid(self, op, res): + # we are comparing floats vs a string + result = getattr(self.frame, op)('foo') + assert bool(result.all().all()) is res def test_logical_with_nas(self): d = DataFrame({'a': [np.nan, False], 'b': [True, True]}) @@ -832,9 +831,11 @@ def test_combineSeries(self): assert 'E' in larger_added assert np.isnan(larger_added['E']).all() - # vs mix (upcast) as needed + # no upcast needed added = self.mixed_float + series - _check_mixed_float(added, dtype='float64') + _check_mixed_float(added) + + # vs mix (upcast) as needed added = self.mixed_float + series.astype('float32') _check_mixed_float(added, dtype=dict(C=None)) added = self.mixed_float + series.astype('float16') diff --git a/pandas/tests/indexing/test_coercion.py b/pandas/tests/indexing/test_coercion.py index 25cc810299678..752d2deb53304 100644 --- a/pandas/tests/indexing/test_coercion.py +++ b/pandas/tests/indexing/test_coercion.py @@ -101,9 +101,22 @@ def test_setitem_series_int64(self): exp = pd.Series([1, 1 + 1j, 3, 4]) self._assert_setitem_series_conversion(obj, 1 + 1j, exp, np.complex128) - # int + bool -> int - exp = pd.Series([1, 1, 3, 4]) - self._assert_setitem_series_conversion(obj, True, exp, np.int64) + # int + bool -> object + exp = pd.Series([1, True, 3, 4]) + self._assert_setitem_series_conversion(obj, True, exp, np.object) + + def test_setitem_series_int8(self): + # integer dtype coercion (no change) + obj = pd.Series([1, 2, 3, 4], dtype=np.int8) + assert obj.dtype == np.int8 + + exp = pd.Series([1, 1, 3, 4], dtype=np.int8) + self._assert_setitem_series_conversion(obj, np.int32(1), exp, np.int8) + + # BUG: it must be Series([1, 1, 3, 4], dtype=np.int16) + exp = pd.Series([1, 0, 3, 4], dtype=np.int8) + self._assert_setitem_series_conversion(obj, np.int16(2**9), exp, + np.int8) def test_setitem_series_float64(self): obj = pd.Series([1.1, 2.2, 3.3, 4.4]) @@ -122,9 +135,9 @@ def test_setitem_series_float64(self): self._assert_setitem_series_conversion(obj, 1 + 1j, exp, np.complex128) - # float + bool -> float - exp = pd.Series([1.1, 1.0, 3.3, 4.4]) - self._assert_setitem_series_conversion(obj, True, exp, np.float64) + # float + bool -> object + exp = pd.Series([1.1, True, 3.3, 4.4]) + self._assert_setitem_series_conversion(obj, True, exp, np.object) def test_setitem_series_complex128(self): obj = pd.Series([1 + 1j, 2 + 2j, 3 + 3j, 4 + 4j]) @@ -132,7 +145,7 @@ def test_setitem_series_complex128(self): # complex + int -> complex exp = pd.Series([1 + 1j, 1, 3 + 3j, 4 + 4j]) - self._assert_setitem_series_conversion(obj, True, exp, np.complex128) + self._assert_setitem_series_conversion(obj, 1, exp, np.complex128) # complex + float -> complex exp = pd.Series([1 + 1j, 1.1, 3 + 3j, 4 + 4j]) @@ -142,9 +155,9 @@ def test_setitem_series_complex128(self): exp = pd.Series([1 + 1j, 1 + 1j, 3 + 3j, 4 + 4j]) self._assert_setitem_series_conversion(obj, 1 + 1j, exp, np.complex128) - # complex + bool -> complex - exp = pd.Series([1 + 1j, 1, 3 + 3j, 4 + 4j]) - self._assert_setitem_series_conversion(obj, True, exp, np.complex128) + # complex + bool -> object + exp = pd.Series([1 + 1j, True, 3 + 3j, 4 + 4j]) + self._assert_setitem_series_conversion(obj, True, exp, np.object) def test_setitem_series_bool(self): obj = pd.Series([True, False, True, False]) @@ -198,14 +211,18 @@ def test_setitem_series_datetime64(self): exp, 'datetime64[ns]') # datetime64 + int -> object - # ToDo: The result must be object exp = pd.Series([pd.Timestamp('2011-01-01'), - pd.Timestamp(1), + 1, pd.Timestamp('2011-01-03'), pd.Timestamp('2011-01-04')]) - self._assert_setitem_series_conversion(obj, 1, exp, 'datetime64[ns]') + self._assert_setitem_series_conversion(obj, 1, exp, 'object') - # ToDo: add more tests once the above issue has been fixed + # datetime64 + object -> object + exp = pd.Series([pd.Timestamp('2011-01-01'), + 'x', + pd.Timestamp('2011-01-03'), + pd.Timestamp('2011-01-04')]) + self._assert_setitem_series_conversion(obj, 'x', exp, np.object) def test_setitem_series_datetime64tz(self): tz = 'US/Eastern' @@ -224,19 +241,59 @@ def test_setitem_series_datetime64tz(self): self._assert_setitem_series_conversion(obj, value, exp, 'datetime64[ns, US/Eastern]') + # datetime64tz + datetime64tz (different tz) -> object + exp = pd.Series([pd.Timestamp('2011-01-01', tz=tz), + pd.Timestamp('2012-01-01', tz='US/Pacific'), + pd.Timestamp('2011-01-03', tz=tz), + pd.Timestamp('2011-01-04', tz=tz)]) + value = pd.Timestamp('2012-01-01', tz='US/Pacific') + self._assert_setitem_series_conversion(obj, value, exp, np.object) + + # datetime64tz + datetime64 -> object + exp = pd.Series([pd.Timestamp('2011-01-01', tz=tz), + pd.Timestamp('2012-01-01'), + pd.Timestamp('2011-01-03', tz=tz), + pd.Timestamp('2011-01-04', tz=tz)]) + value = pd.Timestamp('2012-01-01') + self._assert_setitem_series_conversion(obj, value, exp, np.object) + # datetime64 + int -> object - # ToDo: The result must be object exp = pd.Series([pd.Timestamp('2011-01-01', tz=tz), - pd.Timestamp(1, tz=tz), + 1, pd.Timestamp('2011-01-03', tz=tz), pd.Timestamp('2011-01-04', tz=tz)]) - self._assert_setitem_series_conversion(obj, 1, exp, - 'datetime64[ns, US/Eastern]') + self._assert_setitem_series_conversion(obj, 1, exp, np.object) # ToDo: add more tests once the above issue has been fixed def test_setitem_series_timedelta64(self): - pass + obj = pd.Series([pd.Timedelta('1 day'), + pd.Timedelta('2 day'), + pd.Timedelta('3 day'), + pd.Timedelta('4 day')]) + assert obj.dtype == 'timedelta64[ns]' + + # timedelta64 + timedelta64 -> timedelta64 + exp = pd.Series([pd.Timedelta('1 day'), + pd.Timedelta('12 day'), + pd.Timedelta('3 day'), + pd.Timedelta('4 day')]) + self._assert_setitem_series_conversion(obj, pd.Timedelta('12 day'), + exp, 'timedelta64[ns]') + + # timedelta64 + int -> object + exp = pd.Series([pd.Timedelta('1 day'), + 1, + pd.Timedelta('3 day'), + pd.Timedelta('4 day')]) + self._assert_setitem_series_conversion(obj, 1, exp, np.object) + + # timedelta64 + object -> object + exp = pd.Series([pd.Timedelta('1 day'), + 'x', + pd.Timedelta('3 day'), + pd.Timedelta('4 day')]) + self._assert_setitem_series_conversion(obj, 'x', exp, np.object) def test_setitem_series_period(self): pass @@ -610,13 +667,13 @@ def _where_int64_common(self, klass): self._assert_where_conversion(obj, cond, values, exp, np.complex128) - # int + bool -> int - exp = klass([1, 1, 3, 1]) - self._assert_where_conversion(obj, cond, True, exp, np.int64) + # int + bool -> object + exp = klass([1, True, 3, True]) + self._assert_where_conversion(obj, cond, True, exp, np.object) values = klass([True, False, True, True]) - exp = klass([1, 0, 3, 1]) - self._assert_where_conversion(obj, cond, values, exp, np.int64) + exp = klass([1, False, 3, True]) + self._assert_where_conversion(obj, cond, values, exp, np.object) def test_where_series_int64(self): self._where_int64_common(pd.Series) @@ -656,13 +713,13 @@ def _where_float64_common(self, klass): self._assert_where_conversion(obj, cond, values, exp, np.complex128) - # float + bool -> float - exp = klass([1.1, 1.0, 3.3, 1.0]) - self._assert_where_conversion(obj, cond, True, exp, np.float64) + # float + bool -> object + exp = klass([1.1, True, 3.3, True]) + self._assert_where_conversion(obj, cond, True, exp, np.object) values = klass([True, False, True, True]) - exp = klass([1.1, 0.0, 3.3, 1.0]) - self._assert_where_conversion(obj, cond, values, exp, np.float64) + exp = klass([1.1, False, 3.3, True]) + self._assert_where_conversion(obj, cond, values, exp, np.object) def test_where_series_float64(self): self._where_float64_common(pd.Series) @@ -699,45 +756,46 @@ def test_where_series_complex128(self): exp = pd.Series([1 + 1j, 6 + 6j, 3 + 3j, 8 + 8j]) self._assert_where_conversion(obj, cond, values, exp, np.complex128) - # complex + bool -> complex - exp = pd.Series([1 + 1j, 1, 3 + 3j, 1]) - self._assert_where_conversion(obj, cond, True, exp, np.complex128) + # complex + bool -> object + exp = pd.Series([1 + 1j, True, 3 + 3j, True]) + self._assert_where_conversion(obj, cond, True, exp, np.object) values = pd.Series([True, False, True, True]) - exp = pd.Series([1 + 1j, 0, 3 + 3j, 1]) - self._assert_where_conversion(obj, cond, values, exp, np.complex128) + exp = pd.Series([1 + 1j, False, 3 + 3j, True]) + self._assert_where_conversion(obj, cond, values, exp, np.object) def test_where_index_complex128(self): pass def test_where_series_bool(self): + obj = pd.Series([True, False, True, False]) assert obj.dtype == np.bool cond = pd.Series([True, False, True, False]) - # bool + int -> int - exp = pd.Series([1, 1, 1, 1]) - self._assert_where_conversion(obj, cond, 1, exp, np.int64) + # bool + int -> object + exp = pd.Series([True, 1, True, 1]) + self._assert_where_conversion(obj, cond, 1, exp, np.object) values = pd.Series([5, 6, 7, 8]) - exp = pd.Series([1, 6, 1, 8]) - self._assert_where_conversion(obj, cond, values, exp, np.int64) + exp = pd.Series([True, 6, True, 8]) + self._assert_where_conversion(obj, cond, values, exp, np.object) - # bool + float -> float - exp = pd.Series([1.0, 1.1, 1.0, 1.1]) - self._assert_where_conversion(obj, cond, 1.1, exp, np.float64) + # bool + float -> object + exp = pd.Series([True, 1.1, True, 1.1]) + self._assert_where_conversion(obj, cond, 1.1, exp, np.object) values = pd.Series([5.5, 6.6, 7.7, 8.8]) - exp = pd.Series([1.0, 6.6, 1.0, 8.8]) - self._assert_where_conversion(obj, cond, values, exp, np.float64) + exp = pd.Series([True, 6.6, True, 8.8]) + self._assert_where_conversion(obj, cond, values, exp, np.object) - # bool + complex -> complex - exp = pd.Series([1, 1 + 1j, 1, 1 + 1j]) - self._assert_where_conversion(obj, cond, 1 + 1j, exp, np.complex128) + # bool + complex -> object + exp = pd.Series([True, 1 + 1j, True, 1 + 1j]) + self._assert_where_conversion(obj, cond, 1 + 1j, exp, np.object) values = pd.Series([5 + 5j, 6 + 6j, 7 + 7j, 8 + 8j]) - exp = pd.Series([1, 6 + 6j, 1, 8 + 8j]) - self._assert_where_conversion(obj, cond, values, exp, np.complex128) + exp = pd.Series([True, 6 + 6j, True, 8 + 8j]) + self._assert_where_conversion(obj, cond, values, exp, np.object) # bool + bool -> bool exp = pd.Series([True, True, True, True]) @@ -776,10 +834,15 @@ def test_where_series_datetime64(self): pd.Timestamp('2012-01-04')]) self._assert_where_conversion(obj, cond, values, exp, 'datetime64[ns]') - # ToDo: coerce to object - msg = "cannot coerce a Timestamp with a tz on a naive Block" - with tm.assert_raises_regex(TypeError, msg): - obj.where(cond, pd.Timestamp('2012-01-01', tz='US/Eastern')) + # datetime64 + datetime64tz -> object + exp = pd.Series([pd.Timestamp('2011-01-01'), + pd.Timestamp('2012-01-01', tz='US/Eastern'), + pd.Timestamp('2011-01-03'), + pd.Timestamp('2012-01-01', tz='US/Eastern')]) + self._assert_where_conversion( + obj, cond, + pd.Timestamp('2012-01-01', tz='US/Eastern'), + exp, np.object) # ToDo: do not coerce to UTC, must be object values = pd.Series([pd.Timestamp('2012-01-01', tz='US/Eastern'), @@ -898,7 +961,7 @@ def test_fillna_series_int64(self): def test_fillna_index_int64(self): pass - def _fillna_float64_common(self, klass): + def _fillna_float64_common(self, klass, complex): obj = klass([1.1, np.nan, 3.3, 4.4]) assert obj.dtype == np.float64 @@ -910,26 +973,21 @@ def _fillna_float64_common(self, klass): exp = klass([1.1, 1.1, 3.3, 4.4]) self._assert_fillna_conversion(obj, 1.1, exp, np.float64) - if klass is pd.Series: - # float + complex -> complex - exp = klass([1.1, 1 + 1j, 3.3, 4.4]) - self._assert_fillna_conversion(obj, 1 + 1j, exp, np.complex128) - elif klass is pd.Index: - # float + complex -> object - exp = klass([1.1, 1 + 1j, 3.3, 4.4]) - self._assert_fillna_conversion(obj, 1 + 1j, exp, np.object) - else: - NotImplementedError + # float + complex -> we don't support a complex Index + # complex for Series, + # object for Index + exp = klass([1.1, 1 + 1j, 3.3, 4.4]) + self._assert_fillna_conversion(obj, 1 + 1j, exp, complex) - # float + bool -> float - exp = klass([1.1, 1.0, 3.3, 4.4]) - self._assert_fillna_conversion(obj, True, exp, np.float64) + # float + bool -> object + exp = klass([1.1, True, 3.3, 4.4]) + self._assert_fillna_conversion(obj, True, exp, np.object) def test_fillna_series_float64(self): - self._fillna_float64_common(pd.Series) + self._fillna_float64_common(pd.Series, complex=np.complex128) def test_fillna_index_float64(self): - self._fillna_float64_common(pd.Index) + self._fillna_float64_common(pd.Index, complex=np.object) def test_fillna_series_complex128(self): obj = pd.Series([1 + 1j, np.nan, 3 + 3j, 4 + 4j]) @@ -947,12 +1005,12 @@ def test_fillna_series_complex128(self): exp = pd.Series([1 + 1j, 1 + 1j, 3 + 3j, 4 + 4j]) self._assert_fillna_conversion(obj, 1 + 1j, exp, np.complex128) - # complex + bool -> complex - exp = pd.Series([1 + 1j, 1, 3 + 3j, 4 + 4j]) - self._assert_fillna_conversion(obj, True, exp, np.complex128) + # complex + bool -> object + exp = pd.Series([1 + 1j, True, 3 + 3j, 4 + 4j]) + self._assert_fillna_conversion(obj, True, exp, np.object) def test_fillna_index_complex128(self): - self._fillna_float64_common(pd.Index) + self._fillna_float64_common(pd.Index, complex=np.object) def test_fillna_series_bool(self): # bool can't hold NaN @@ -985,12 +1043,11 @@ def test_fillna_series_datetime64(self): self._assert_fillna_conversion(obj, value, exp, np.object) # datetime64 + int => object - # ToDo: must be coerced to object exp = pd.Series([pd.Timestamp('2011-01-01'), - pd.Timestamp(1), + 1, pd.Timestamp('2011-01-03'), pd.Timestamp('2011-01-04')]) - self._assert_fillna_conversion(obj, 1, exp, 'datetime64[ns]') + self._assert_fillna_conversion(obj, 1, exp, 'object') # datetime64 + object => object exp = pd.Series([pd.Timestamp('2011-01-01'), @@ -1033,14 +1090,12 @@ def test_fillna_series_datetime64tz(self): value = pd.Timestamp('2012-01-01', tz='Asia/Tokyo') self._assert_fillna_conversion(obj, value, exp, np.object) - # datetime64tz + int => datetime64tz - # ToDo: must be object + # datetime64tz + int => object exp = pd.Series([pd.Timestamp('2011-01-01', tz=tz), - pd.Timestamp(1, tz=tz), + 1, pd.Timestamp('2011-01-03', tz=tz), pd.Timestamp('2011-01-04', tz=tz)]) - self._assert_fillna_conversion(obj, 1, exp, - 'datetime64[ns, US/Eastern]') + self._assert_fillna_conversion(obj, 1, exp, np.object) # datetime64tz + object => object exp = pd.Series([pd.Timestamp('2011-01-01', tz=tz), @@ -1187,8 +1242,8 @@ def _assert_replace_conversion(self, from_key, to_key, how): (from_key == 'complex128' and to_key in ('int64', 'float64'))): - # buggy on 32-bit - if tm.is_platform_32bit(): + # buggy on 32-bit / window + if compat.is_platform_32bit() or compat.is_platform_windows(): pytest.skip("32-bit platform buggy: {0} -> {1}".format (from_key, to_key)) diff --git a/pandas/tests/indexing/test_datetime.py b/pandas/tests/indexing/test_datetime.py index 8e8fc835b11f7..ddac80fbc4693 100644 --- a/pandas/tests/indexing/test_datetime.py +++ b/pandas/tests/indexing/test_datetime.py @@ -1,5 +1,3 @@ -import pytest - import numpy as np import pandas as pd from pandas import date_range, Index, DataFrame, Series, Timestamp @@ -83,10 +81,12 @@ def test_indexing_with_datetime_tz(self): 'US/Pacific') # trying to set a single element on a part of a different timezone - def f(): - df.loc[df.new_col == 'new', 'time'] = v + # this converts to object + df2 = df.copy() + df2.loc[df2.new_col == 'new', 'time'] = v - pytest.raises(ValueError, f) + expected = Series([v[0], df.loc[1, 'time']], name='time') + tm.assert_series_equal(df2.time, expected) v = df.loc[df.new_col == 'new', 'time'] + pd.Timedelta('1s') df.loc[df.new_col == 'new', 'time'] = v diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index 98f5d5eb140df..e5b70a9fadb8f 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -382,6 +382,12 @@ def test_multi_assign(self): tm.assert_frame_equal(df2, expected) # with an ndarray on rhs + # coerces to float64 because values has float64 dtype + # GH 14001 + expected = DataFrame({'FC': ['a', np.nan, 'a', 'b', 'a', 'b'], + 'PF': [0, 0, 0, 0, 1, 1], + 'col1': [0., 1., 4., 6., 8., 10.], + 'col2': [12, 7, 16, np.nan, 20, 22]}) df2 = df.copy() df2.loc[mask, cols] = dft.loc[mask, cols].values tm.assert_frame_equal(df2, expected) diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index 7aab7df7169d4..a736f3aa74558 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -1033,11 +1033,11 @@ def test_clip_with_datetimes(self): # naive and tz-aware datetimes t = Timestamp('2015-12-01 09:30:30') - s = Series([Timestamp('2015-12-01 09:30:00'), Timestamp( - '2015-12-01 09:31:00')]) + s = Series([Timestamp('2015-12-01 09:30:00'), + Timestamp('2015-12-01 09:31:00')]) result = s.clip(upper=t) - expected = Series([Timestamp('2015-12-01 09:30:00'), Timestamp( - '2015-12-01 09:30:30')]) + expected = Series([Timestamp('2015-12-01 09:30:00'), + Timestamp('2015-12-01 09:30:30')]) assert_series_equal(result, expected) t = Timestamp('2015-12-01 09:30:30', tz='US/Eastern') diff --git a/pandas/tests/series/test_indexing.py b/pandas/tests/series/test_indexing.py index 6d8a54b538237..23283733c492a 100644 --- a/pandas/tests/series/test_indexing.py +++ b/pandas/tests/series/test_indexing.py @@ -1094,6 +1094,11 @@ def test_where(self): rs = s2.where(cond[:3], -s2) assert_series_equal(rs, expected) + def test_where_error(self): + + s = Series(np.random.randn(5)) + cond = s > 0 + pytest.raises(ValueError, s.where, 1) pytest.raises(ValueError, s.where, cond[:3].values, -s) @@ -1109,6 +1114,8 @@ def test_where(self): pytest.raises(ValueError, s.__setitem__, tuple([[[True, False]]]), []) + def test_where_unsafe(self): + # unsafe dtype changes for dtype in [np.int8, np.int16, np.int32, np.int64, np.float16, np.float32, np.float64]: @@ -1374,9 +1381,9 @@ def test_where_dups(self): expected = Series([5, 11, 2, 5, 11, 2], index=[0, 1, 2, 0, 1, 2]) assert_series_equal(comb, expected) - def test_where_datetime(self): + def test_where_datetime_conversion(self): s = Series(date_range('20130102', periods=2)) - expected = Series([10, 10], dtype='datetime64[ns]') + expected = Series([10, 10]) mask = np.array([False, False]) rs = s.where(mask, [10, 10]) @@ -1392,7 +1399,7 @@ def test_where_datetime(self): assert_series_equal(rs, expected) rs = s.where(mask, [10.0, np.nan]) - expected = Series([10, None], dtype='datetime64[ns]') + expected = Series([10, None], dtype='object') assert_series_equal(rs, expected) # GH 15701 @@ -1403,9 +1410,9 @@ def test_where_datetime(self): expected = Series([pd.NaT, s[1]]) assert_series_equal(rs, expected) - def test_where_timedelta(self): + def test_where_timedelta_coerce(self): s = Series([1, 2], dtype='timedelta64[ns]') - expected = Series([10, 10], dtype='timedelta64[ns]') + expected = Series([10, 10]) mask = np.array([False, False]) rs = s.where(mask, [10, 10]) @@ -1421,7 +1428,7 @@ def test_where_timedelta(self): assert_series_equal(rs, expected) rs = s.where(mask, [10.0, np.nan]) - expected = Series([10, None], dtype='timedelta64[ns]') + expected = Series([10, None], dtype='object') assert_series_equal(rs, expected) def test_mask(self): diff --git a/pandas/tests/series/test_missing.py b/pandas/tests/series/test_missing.py index b5948e75aa73e..24dd90e40fa35 100644 --- a/pandas/tests/series/test_missing.py +++ b/pandas/tests/series/test_missing.py @@ -58,14 +58,14 @@ def test_remove_na_deprecation(self): def test_timedelta_fillna(self): # GH 3371 - s = Series([Timestamp('20130101'), Timestamp('20130101'), Timestamp( - '20130102'), Timestamp('20130103 9:01:01')]) + s = Series([Timestamp('20130101'), Timestamp('20130101'), + Timestamp('20130102'), Timestamp('20130103 9:01:01')]) td = s.diff() # reg fillna result = td.fillna(0) - expected = Series([timedelta(0), timedelta(0), timedelta(1), timedelta( - days=1, seconds=9 * 3600 + 60 + 1)]) + expected = Series([timedelta(0), timedelta(0), timedelta(1), + timedelta(days=1, seconds=9 * 3600 + 60 + 1)]) assert_series_equal(result, expected) # interprested as seconds @@ -75,8 +75,9 @@ def test_timedelta_fillna(self): assert_series_equal(result, expected) result = td.fillna(timedelta(days=1, seconds=1)) - expected = Series([timedelta(days=1, seconds=1), timedelta( - 0), timedelta(1), timedelta(days=1, seconds=9 * 3600 + 60 + 1)]) + expected = Series([timedelta(days=1, seconds=1), timedelta(0), + timedelta(1), + timedelta(days=1, seconds=9 * 3600 + 60 + 1)]) assert_series_equal(result, expected) result = td.fillna(np.timedelta64(int(1e9))) @@ -144,6 +145,7 @@ def test_datetime64_fillna(self): assert_series_equal(result, expected) def test_datetime64_tz_fillna(self): + for tz in ['US/Eastern', 'Asia/Tokyo']: # DatetimeBlock s = Series([Timestamp('2011-01-01 10:00'), pd.NaT, @@ -278,6 +280,40 @@ def test_datetime64_tz_fillna(self): pd.Timestamp('2012-11-11 00:00:00+01:00')]) assert_series_equal(df.fillna(method='bfill'), exp) + def test_fillna_consistency(self): + # GH 16402 + # fillna with a tz aware to a tz-naive, should result in object + + s = Series([Timestamp('20130101'), pd.NaT]) + + result = s.fillna(Timestamp('20130101', tz='US/Eastern')) + expected = Series([Timestamp('20130101'), + Timestamp('2013-01-01', tz='US/Eastern')], + dtype='object') + assert_series_equal(result, expected) + + # where (we ignore the raise_on_error) + result = s.where([True, False], + Timestamp('20130101', tz='US/Eastern'), + raise_on_error=False) + assert_series_equal(result, expected) + + result = s.where([True, False], + Timestamp('20130101', tz='US/Eastern'), + raise_on_error=True) + assert_series_equal(result, expected) + + # with a non-datetime + result = s.fillna('foo') + expected = Series([Timestamp('20130101'), + 'foo']) + assert_series_equal(result, expected) + + # assignment + s2 = s.copy() + s2[1] = 'foo' + assert_series_equal(s2, expected) + def test_datetime64tz_fillna_round_issue(self): # GH 14872 From d884e51909a887119c0558146de31284a4278931 Mon Sep 17 00:00:00 2001 From: 3553x <3553x@tuta.io> Date: Sat, 22 Jul 2017 00:25:11 +0100 Subject: [PATCH 811/933] BUG: Improved thread safety for read_html() GH16928 (#16930) --- doc/source/whatsnew/v0.21.0.txt | 2 ++ pandas/io/html.py | 4 ++-- pandas/tests/io/test_html.py | 36 ++++++++++++++++++++++++++++++++- 3 files changed, 39 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 91d3e9e7b935b..83c6e0e206191 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -267,6 +267,8 @@ I/O - Bug in :func:`read_stata` where value labels could not be read when using an iterator (:issue:`16923`) +- Bug in :func:`read_html` where import check fails when run in multiple threads (:issue:`16928`) + Plotting ^^^^^^^^ - Bug in plotting methods using ``secondary_y`` and ``fontsize`` not setting secondary axis font size (:issue:`12565`) diff --git a/pandas/io/html.py b/pandas/io/html.py index 2613f26ae5f52..a4acb26af5259 100644 --- a/pandas/io/html.py +++ b/pandas/io/html.py @@ -37,8 +37,6 @@ def _importers(): if _IMPORTS: return - _IMPORTS = True - global _HAS_BS4, _HAS_LXML, _HAS_HTML5LIB try: @@ -59,6 +57,8 @@ def _importers(): except ImportError: pass + _IMPORTS = True + ############# # READ HTML # diff --git a/pandas/tests/io/test_html.py b/pandas/tests/io/test_html.py index 4ef265dcd5113..0455ffb069322 100644 --- a/pandas/tests/io/test_html.py +++ b/pandas/tests/io/test_html.py @@ -3,13 +3,17 @@ import glob import os import re +import threading import warnings + +# imports needed for Python 3.x but will fail under Python 2.x try: - from importlib import import_module + from importlib import import_module, reload except ImportError: import_module = __import__ + from distutils.version import LooseVersion import pytest @@ -22,6 +26,7 @@ from pandas.compat import (map, zip, StringIO, string_types, BytesIO, is_platform_windows, PY3) from pandas.io.common import URLError, urlopen, file_path_to_url +import pandas.io.html from pandas.io.html import read_html from pandas._libs.parsers import ParserError @@ -931,3 +936,32 @@ def test_same_ordering(): dfs_lxml = read_html(filename, index_col=0, flavor=['lxml']) dfs_bs4 = read_html(filename, index_col=0, flavor=['bs4']) assert_framelist_equal(dfs_lxml, dfs_bs4) + + +class ErrorThread(threading.Thread): + def run(self): + try: + super(ErrorThread, self).run() + except Exception as e: + self.err = e + else: + self.err = None + + +@pytest.mark.slow +def test_importcheck_thread_safety(): + # see gh-16928 + + # force import check by reinitalising global vars in html.py + reload(pandas.io.html) + + filename = os.path.join(DATA_PATH, 'valid_markup.html') + helper_thread1 = ErrorThread(target=read_html, args=(filename,)) + helper_thread2 = ErrorThread(target=read_html, args=(filename,)) + + helper_thread1.start() + helper_thread2.start() + + while helper_thread1.is_alive() or helper_thread2.is_alive(): + pass + assert None is helper_thread1.err is helper_thread2.err From 28622c5c120d73c2cb4d2292bc0837534b4e9dfe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Novotn=C3=BD?= <30341479+HonzaOsobne@users.noreply.github.com> Date: Sat, 22 Jul 2017 03:21:50 +0200 Subject: [PATCH 812/933] Fixed 'add_methods' when the 'select' argument is specified. (#17045) --- pandas/core/ops.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 017afcd691194..bc201be26b756 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -149,13 +149,15 @@ def names(x): def add_methods(cls, new_methods, force, select, exclude): if select and exclude: raise TypeError("May only pass either select or exclude") - methods = new_methods + if select: select = set(select) methods = {} for key, method in new_methods.items(): if key in select: methods[key] = method + new_methods = methods + if exclude: for k in exclude: new_methods.pop(k, None) From c0a84b59f5b9638ddabcd49ad664fa5850f02135 Mon Sep 17 00:00:00 2001 From: ysau Date: Fri, 21 Jul 2017 21:58:41 -0700 Subject: [PATCH 813/933] TST: Fix error message check in np.argsort comparision (#17051) Closes gh-17046. --- pandas/tests/indexes/test_base.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 692cdd4957947..842e8fea0df9b 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -1846,7 +1846,7 @@ def create_index(self): def test_argsort(self): idx = self.create_index() if PY36: - with tm.assert_raises_regex(TypeError, "'>' not supported"): + with tm.assert_raises_regex(TypeError, "'>|<' not supported"): result = idx.argsort() elif PY3: with tm.assert_raises_regex(TypeError, "unorderable types"): @@ -1859,7 +1859,7 @@ def test_argsort(self): def test_numpy_argsort(self): idx = self.create_index() if PY36: - with tm.assert_raises_regex(TypeError, "'>' not supported"): + with tm.assert_raises_regex(TypeError, "'>|<' not supported"): result = np.argsort(idx) elif PY3: with tm.assert_raises_regex(TypeError, "unorderable types"): From 09108fae0fc7d2234bad765634213007172d4407 Mon Sep 17 00:00:00 2001 From: kernc Date: Sat, 22 Jul 2017 20:56:27 +0200 Subject: [PATCH 814/933] TST: Move some Series ctor tests to SharedWithSparse (#17050) --- doc/source/whatsnew/v0.21.0.txt | 2 +- pandas/tests/series/test_api.py | 82 ++++++++++++++++++++++++ pandas/tests/series/test_constructors.py | 65 +------------------ pandas/tests/sparse/test_frame.py | 6 +- pandas/tests/sparse/test_series.py | 21 ++++++ 5 files changed, 109 insertions(+), 67 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 83c6e0e206191..d20dce5cfb51f 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -284,7 +284,7 @@ Groupby/Resample/Rolling Sparse ^^^^^^ -- Bug in ``SparseSeries`` raises ``AttributeError`` when a dictionary is passed in as data (:issue:`16777`) +- Bug in ``SparseSeries`` raises ``AttributeError`` when a dictionary is passed in as data (:issue:`16905`) Reshaping diff --git a/pandas/tests/series/test_api.py b/pandas/tests/series/test_api.py index 1eb2b98a7d7cc..8e22dd38030ee 100644 --- a/pandas/tests/series/test_api.py +++ b/pandas/tests/series/test_api.py @@ -1,5 +1,6 @@ # coding=utf-8 # pylint: disable-msg=E1101,W0612 +from collections import OrderedDict import pytest @@ -20,6 +21,15 @@ class SharedWithSparse(object): + """ + A collection of tests Series and SparseSeries can share. + + In generic tests on this class, use ``self._assert_series_equal()`` + which is implemented in sub-classes. + """ + def _assert_series_equal(self, left, right): + """Dispatch to series class dependent assertion""" + raise NotImplementedError def test_scalarop_preserve_name(self): result = self.ts * 2 @@ -117,9 +127,81 @@ def test_to_sparse_pass_name(self): result = self.ts.to_sparse() assert result.name == self.ts.name + def test_constructor_dict(self): + d = {'a': 0., 'b': 1., 'c': 2.} + result = self.series_klass(d) + expected = self.series_klass(d, index=sorted(d.keys())) + self._assert_series_equal(result, expected) + + result = self.series_klass(d, index=['b', 'c', 'd', 'a']) + expected = self.series_klass([1, 2, np.nan, 0], + index=['b', 'c', 'd', 'a']) + self._assert_series_equal(result, expected) + + def test_constructor_subclass_dict(self): + data = tm.TestSubDict((x, 10.0 * x) for x in range(10)) + series = self.series_klass(data) + expected = self.series_klass(dict(compat.iteritems(data))) + self._assert_series_equal(series, expected) + + def test_constructor_ordereddict(self): + # GH3283 + data = OrderedDict( + ('col%s' % i, np.random.random()) for i in range(12)) + + series = self.series_klass(data) + expected = self.series_klass(list(data.values()), list(data.keys())) + self._assert_series_equal(series, expected) + + # Test with subclass + class A(OrderedDict): + pass + + series = self.series_klass(A(data)) + self._assert_series_equal(series, expected) + + def test_constructor_dict_multiindex(self): + d = {('a', 'a'): 0., ('b', 'a'): 1., ('b', 'c'): 2.} + _d = sorted(d.items()) + result = self.series_klass(d) + expected = self.series_klass( + [x[1] for x in _d], + index=pd.MultiIndex.from_tuples([x[0] for x in _d])) + self._assert_series_equal(result, expected) + + d['z'] = 111. + _d.insert(0, ('z', d['z'])) + result = self.series_klass(d) + expected = self.series_klass([x[1] for x in _d], + index=pd.Index([x[0] for x in _d], + tupleize_cols=False)) + result = result.reindex(index=expected.index) + self._assert_series_equal(result, expected) + + def test_constructor_dict_timedelta_index(self): + # GH #12169 : Resample category data with timedelta index + # construct Series from dict as data and TimedeltaIndex as index + # will result NaN in result Series data + expected = self.series_klass( + data=['A', 'B', 'C'], + index=pd.to_timedelta([0, 10, 20], unit='s') + ) + + result = self.series_klass( + data={pd.to_timedelta(0, unit='s'): 'A', + pd.to_timedelta(10, unit='s'): 'B', + pd.to_timedelta(20, unit='s'): 'C'}, + index=pd.to_timedelta([0, 10, 20], unit='s') + ) + self._assert_series_equal(result, expected) + class TestSeriesMisc(TestData, SharedWithSparse): + series_klass = Series + # SharedWithSparse tests use generic, series_klass-agnostic assertion + _assert_series_equal = staticmethod(tm.assert_series_equal) + def test_tab_completion(self): # GH 9910 s = Series(list('abcd')) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index d591aa4f567a9..a916c42c007f9 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -20,8 +20,7 @@ from pandas._libs import lib from pandas._libs.tslib import iNaT -from pandas.compat import lrange, range, zip, OrderedDict, long -from pandas import compat +from pandas.compat import lrange, range, zip, long from pandas.util.testing import assert_series_equal import pandas.util.testing as tm @@ -605,48 +604,6 @@ def test_constructor_dict(self): expected.iloc[1] = 1 assert_series_equal(result, expected) - def test_constructor_dict_multiindex(self): - check = lambda result, expected: tm.assert_series_equal( - result, expected, check_dtype=True, check_series_type=True) - d = {('a', 'a'): 0., ('b', 'a'): 1., ('b', 'c'): 2.} - _d = sorted(d.items()) - ser = Series(d) - expected = Series([x[1] for x in _d], - index=MultiIndex.from_tuples([x[0] for x in _d])) - check(ser, expected) - - d['z'] = 111. - _d.insert(0, ('z', d['z'])) - ser = Series(d) - expected = Series([x[1] for x in _d], index=Index( - [x[0] for x in _d], tupleize_cols=False)) - ser = ser.reindex(index=expected.index) - check(ser, expected) - - def test_constructor_dict_timedelta_index(self): - # GH #12169 : Resample category data with timedelta index - # construct Series from dict as data and TimedeltaIndex as index - # will result NaN in result Series data - expected = Series( - data=['A', 'B', 'C'], - index=pd.to_timedelta([0, 10, 20], unit='s') - ) - - result = Series( - data={pd.to_timedelta(0, unit='s'): 'A', - pd.to_timedelta(10, unit='s'): 'B', - pd.to_timedelta(20, unit='s'): 'C'}, - index=pd.to_timedelta([0, 10, 20], unit='s') - ) - # this should work - assert_series_equal(result, expected) - - def test_constructor_subclass_dict(self): - data = tm.TestSubDict((x, 10.0 * x) for x in range(10)) - series = Series(data) - refseries = Series(dict(compat.iteritems(data))) - assert_series_equal(refseries, series) - def test_constructor_dict_datetime64_index(self): # GH 9456 @@ -670,26 +627,6 @@ def create_data(constructor): assert_series_equal(result_datetime, expected) assert_series_equal(result_Timestamp, expected) - def test_orderedDict_ctor(self): - # GH3283 - import pandas - import random - data = OrderedDict([('col%s' % i, random.random()) for i in range(12)]) - s = pandas.Series(data) - assert all(s.values == list(data.values())) - - def test_orderedDict_subclass_ctor(self): - # GH3283 - import pandas - import random - - class A(OrderedDict): - pass - - data = A([('col%s' % i, random.random()) for i in range(12)]) - s = pandas.Series(data) - assert all(s.values == list(data.values())) - def test_constructor_list_of_tuples(self): data = [(1, 1), (2, 2), (2, 3)] s = Series(data) diff --git a/pandas/tests/sparse/test_frame.py b/pandas/tests/sparse/test_frame.py index a5d514644a8f1..336b8f30716cd 100644 --- a/pandas/tests/sparse/test_frame.py +++ b/pandas/tests/sparse/test_frame.py @@ -1002,12 +1002,14 @@ def _check(frame, orig): shifted = frame.shift(2, freq='B') exp = orig.shift(2, freq='B') - exp = exp.to_sparse(frame.default_fill_value) + exp = exp.to_sparse(frame.default_fill_value, + kind=frame.default_kind) tm.assert_frame_equal(shifted, exp) shifted = frame.shift(2, freq=BDay()) exp = orig.shift(2, freq=BDay()) - exp = exp.to_sparse(frame.default_fill_value) + exp = exp.to_sparse(frame.default_fill_value, + kind=frame.default_kind) tm.assert_frame_equal(shifted, exp) self._check_all(_check) diff --git a/pandas/tests/sparse/test_series.py b/pandas/tests/sparse/test_series.py index bb56f8a51897a..a7685abd5ba4d 100644 --- a/pandas/tests/sparse/test_series.py +++ b/pandas/tests/sparse/test_series.py @@ -1,6 +1,8 @@ # pylint: disable-msg=E1101,W0612 import operator +from datetime import datetime + import pytest from numpy import nan @@ -58,6 +60,10 @@ def _test_data2_zero(): class TestSparseSeries(SharedWithSparse): + series_klass = SparseSeries + # SharedWithSparse tests use generic, series_klass-agnostic assertion + _assert_series_equal = staticmethod(tm.assert_sp_series_equal) + def setup_method(self, method): arr, index = _test_data1() @@ -1379,3 +1385,18 @@ def test_numpy_func_call(self): for func in funcs: for series in ('bseries', 'zbseries'): getattr(np, func)(getattr(self, series)) + + +@pytest.mark.parametrize( + 'datetime_type', (np.datetime64, + pd.Timestamp, + lambda x: datetime.strptime(x, '%Y-%m-%d'))) +def test_constructor_dict_datetime64_index(datetime_type): + # GH 9456 + dates = ['1984-02-19', '1988-11-06', '1989-12-03', '1990-03-15'] + values = [42544017.198965244, 1234565, 40512335.181958228, -1] + + result = SparseSeries(dict(zip(map(datetime_type, dates), values))) + expected = SparseSeries(values, map(pd.Timestamp, dates)) + + tm.assert_sp_series_equal(result, expected) From ee6412aee8bbf350aea89bbafbfdfb0f8d7620ed Mon Sep 17 00:00:00 2001 From: Kernc Date: Sat, 22 Jul 2017 14:59:37 -0400 Subject: [PATCH 815/933] BUG: Made SparseDataFrame.fillna() fill all NaNs A continuation of https://github.com/pandas-dev/pandas/pull/16178 closes #16112 closes #16178 Author: Kernc Author: keitakurita This patch had conflicts when merged, resolved by Committer: Jeff Reback Closes #16892 from kernc/sparse-fillna and squashes the following commits: c1cd33e [Kernc] fixup! BUG: Made SparseDataFrame.fillna() fill all NaNs 2974232 [Kernc] fixup! BUG: Made SparseDataFrame.fillna() fill all NaNs 4bc01a1 [keitakurita] BUG: Made SparseDataFrame.fillna() fill all NaNs --- doc/source/whatsnew/v0.21.0.txt | 4 +++- pandas/core/sparse/array.py | 13 +++++------- pandas/tests/sparse/test_frame.py | 35 +++++++++++++++++++++++++++++++ 3 files changed, 43 insertions(+), 9 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index d20dce5cfb51f..a64f30b6e97a5 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -259,7 +259,7 @@ Indexing - Fixes bug where indexing with ``np.inf`` caused an ``OverflowError`` to be raised (:issue:`16957`) - Bug in reindexing on an empty ``CategoricalIndex`` (:issue:`16770`) - Fixes ``DataFrame.loc`` for setting with alignment and tz-aware ``DatetimeIndex`` (:issue:`16889`) - + I/O ^^^ @@ -284,7 +284,9 @@ Groupby/Resample/Rolling Sparse ^^^^^^ + - Bug in ``SparseSeries`` raises ``AttributeError`` when a dictionary is passed in as data (:issue:`16905`) +- Bug in :func:`SparseDataFrame.fillna` not filling all NaNs when frame was instantiated from SciPy sparse matrix (:issue:`16112`) Reshaping diff --git a/pandas/core/sparse/array.py b/pandas/core/sparse/array.py index 5c1cf8c773501..42fc5189eebd8 100644 --- a/pandas/core/sparse/array.py +++ b/pandas/core/sparse/array.py @@ -595,14 +595,11 @@ def fillna(self, value, downcast=None): if issubclass(self.dtype.type, np.floating): value = float(value) - if self._null_fill_value: - return self._simple_new(self.sp_values, self.sp_index, - fill_value=value) - else: - new_values = self.sp_values.copy() - new_values[isnull(new_values)] = value - return self._simple_new(new_values, self.sp_index, - fill_value=self.fill_value) + new_values = np.where(isnull(self.sp_values), value, self.sp_values) + fill_value = value if self._null_fill_value else self.fill_value + + return self._simple_new(new_values, self.sp_index, + fill_value=fill_value) def sum(self, axis=0, *args, **kwargs): """ diff --git a/pandas/tests/sparse/test_frame.py b/pandas/tests/sparse/test_frame.py index 336b8f30716cd..d9cb69d56528c 100644 --- a/pandas/tests/sparse/test_frame.py +++ b/pandas/tests/sparse/test_frame.py @@ -1271,6 +1271,41 @@ def test_from_scipy_correct_ordering(spmatrix): tm.assert_frame_equal(sdf.to_dense(), expected.to_dense()) +def test_from_scipy_fillna(spmatrix): + # GH 16112 + tm.skip_if_no_package('scipy') + + arr = np.eye(3) + arr[1:, 0] = np.nan + + try: + spm = spmatrix(arr) + assert spm.dtype == arr.dtype + except (TypeError, AssertionError): + # If conversion to sparse fails for this spmatrix type and arr.dtype, + # then the combination is not currently supported in NumPy, so we + # can just skip testing it thoroughly + return + + sdf = pd.SparseDataFrame(spm).fillna(-1.0) + + # Returning frame should fill all nan values with -1.0 + expected = pd.SparseDataFrame({ + 0: pd.SparseSeries([1., -1, -1]), + 1: pd.SparseSeries([np.nan, 1, np.nan]), + 2: pd.SparseSeries([np.nan, np.nan, 1]), + }, default_fill_value=-1) + + # fill_value is expected to be what .fillna() above was called with + # We don't use -1 as initial fill_value in expected SparseSeries + # construction because this way we obtain "compressed" SparseArrays, + # avoiding having to construct them ourselves + for col in expected: + expected[col].fill_value = -1 + + tm.assert_sp_frame_equal(sdf, expected) + + class TestSparseDataFrameArithmetic(object): def test_numeric_op_scalar(self): From 8d7d3fb545b4273cf9d1a61bf7ea3bfdde8a1199 Mon Sep 17 00:00:00 2001 From: Jeff Knupp Date: Sun, 23 Jul 2017 12:20:57 -0400 Subject: [PATCH 816/933] BUG: Use size_t to avoid array index overflow; add missing malloc of error_msg Fix a few locations where a parser's `error_msg` buffer is written to without having been previously allocated. This manifested as a double free during exception handling code making use of the `error_msg`. Additionally, use `size_t/ssize_t` where array indices or lengths will be stored. Previously, int32_t was used and would overflow on columns with very large amounts of data (i.e. greater than INTMAX bytes). xref #14696 closes #16798 Author: Jeff Knupp Author: Jeff Knupp Closes #17040 from jeffknupp/16790-core-on-large-csv and squashes the following commits: 6a1ba23 [Jeff Knupp] Clear up prose a5d5677 [Jeff Knupp] Fix linting issues 4380c53 [Jeff Knupp] Fix linting issues 7b1cd8d [Jeff Knupp] Fix linting issues e3cb9c1 [Jeff Knupp] Add unit test plus '--high-memory' option, *off by default*. 2ab4971 [Jeff Knupp] Remove debugging code 2930eaa [Jeff Knupp] Fix line length to conform to linter rules e4dfd19 [Jeff Knupp] Revert printf format strings; fix more comment alignment 3171674 [Jeff Knupp] Fix some leftover size_t references 0985cf3 [Jeff Knupp] Remove debugging code; fix type cast 669d99b [Jeff Knupp] Fix linting errors re: line length 1f24847 [Jeff Knupp] Fix comment alignment; add whatsnew entry e04d12a [Jeff Knupp] Switch to use int64_t rather than size_t due to portability concerns. d5c75e8 [Jeff Knupp] BUG: Use size_t to avoid array index overflow; add missing malloc of error_msg --- doc/source/whatsnew/v0.21.0.txt | 3 +- pandas/_libs/parsers.pyx | 145 ++++++++++++++----------- pandas/_libs/src/parser/tokenizer.c | 111 ++++++++++--------- pandas/_libs/src/parser/tokenizer.h | 42 +++---- pandas/conftest.py | 9 +- pandas/tests/io/parser/test_parsers.py | 15 +++ setup.cfg | 1 + 7 files changed, 187 insertions(+), 139 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index a64f30b6e97a5..096040bb85a10 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -264,7 +264,8 @@ I/O ^^^ - Bug in :func:`read_csv` in which non integer values for the header argument generated an unhelpful / unrelated error message (:issue:`16338`) - +- Bug in :func:`read_csv` in which memory management issues in exception handling, under certain conditions, would cause the interpreter to segfault (:issue:`14696, :issue:`16798`). +- Bug in :func:`read_csv` when called with ``low_memory=False`` in which a CSV with at least one column > 2GB in size would incorrectly raise a ``MemoryError`` (:issue:`16798`). - Bug in :func:`read_stata` where value labels could not be read when using an iterator (:issue:`16923`) - Bug in :func:`read_html` where import check fails when run in multiple threads (:issue:`16928`) diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index 7375a2197c6b7..c512a9fd39e95 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -121,30 +121,30 @@ cdef extern from "parser/tokenizer.h": io_callback cb_io io_cleanup cb_cleanup - int chunksize # Number of bytes to prepare for each chunk - char *data # pointer to data to be processed - int datalen # amount of data available - int datapos + int64_t chunksize # Number of bytes to prepare for each chunk + char *data # pointer to data to be processed + int64_t datalen # amount of data available + int64_t datapos # where to write out tokenized data char *stream - int stream_len - int stream_cap + int64_t stream_len + int64_t stream_cap # Store words in (potentially ragged) matrix for now, hmm char **words - int *word_starts # where we are in the stream - int words_len - int words_cap + int64_t *word_starts # where we are in the stream + int64_t words_len + int64_t words_cap - char *pword_start # pointer to stream start of current field - int word_start # position start of current field + char *pword_start # pointer to stream start of current field + int64_t word_start # position start of current field - int *line_start # position in words for start of line - int *line_fields # Number of fields in each line - int lines # Number of lines observed - int file_lines # Number of file lines observed (with bad/skipped) - int lines_cap # Vector capacity + int64_t *line_start # position in words for start of line + int64_t *line_fields # Number of fields in each line + int64_t lines # Number of lines observed + int64_t file_lines # Number of lines observed (with bad/skipped) + int64_t lines_cap # Vector capacity # Tokenizing stuff ParserState state @@ -177,14 +177,14 @@ cdef extern from "parser/tokenizer.h": # thousands separator (comma, period) char thousands - int header # Boolean: 1: has header, 0: no header - int header_start # header row start - int header_end # header row end + int header # Boolean: 1: has header, 0: no header + int64_t header_start # header row start + int64_t header_end # header row end void *skipset PyObject *skipfunc int64_t skip_first_N_rows - int skipfooter + int64_t skipfooter # pick one, depending on whether the converter requires GIL double (*double_converter_nogil)(const char *, char **, char, char, char, int) nogil @@ -195,12 +195,12 @@ cdef extern from "parser/tokenizer.h": char *warn_msg char *error_msg - int skip_empty_lines + int64_t skip_empty_lines ctypedef struct coliter_t: char **words - int *line_start - int col + int64_t *line_start + int64_t col ctypedef struct uint_state: int seen_sint @@ -210,7 +210,8 @@ cdef extern from "parser/tokenizer.h": void uint_state_init(uint_state *self) int uint64_conflict(uint_state *self) - void coliter_setup(coliter_t *it, parser_t *parser, int i, int start) nogil + void coliter_setup(coliter_t *it, parser_t *parser, + int64_t i, int64_t start) nogil void COLITER_NEXT(coliter_t, const char *) nogil parser_t* parser_new() @@ -289,14 +290,14 @@ cdef class TextReader: object true_values, false_values object handle bint na_filter, verbose, has_usecols, has_mi_columns - int parser_start + int64_t parser_start list clocks char *c_encoding kh_str_t *false_set kh_str_t *true_set cdef public: - int leading_cols, table_width, skipfooter, buffer_lines + int64_t leading_cols, table_width, skipfooter, buffer_lines object allow_leading_cols object delimiter, converters, delim_whitespace object na_values @@ -730,7 +731,8 @@ cdef class TextReader: Py_ssize_t i, start, field_count, passed_count, unnamed_count # noqa char *word object name - int status, hr, data_line + int status + int64_t hr, data_line char *errors = "strict" cdef StringPath path = _string_path(self.c_encoding) @@ -949,8 +951,8 @@ cdef class TextReader: cdef _read_rows(self, rows, bint trim): cdef: - int buffered_lines - int irows, footer = 0 + int64_t buffered_lines + int64_t irows, footer = 0 self._start_clock() @@ -1018,12 +1020,13 @@ cdef class TextReader: def _convert_column_data(self, rows=None, upcast_na=False, footer=0): cdef: - Py_ssize_t i, nused + int64_t i + int nused kh_str_t *na_hashset = NULL - int start, end + int64_t start, end object name, na_flist, col_dtype = None bint na_filter = 0 - Py_ssize_t num_cols + int64_t num_cols start = self.parser_start @@ -1195,7 +1198,7 @@ cdef class TextReader: return col_res, na_count cdef _convert_with_dtype(self, object dtype, Py_ssize_t i, - int start, int end, + int64_t start, int64_t end, bint na_filter, bint user_dtype, kh_str_t *na_hashset, @@ -1275,7 +1278,7 @@ cdef class TextReader: raise TypeError("the dtype %s is not " "supported for parsing" % dtype) - cdef _string_convert(self, Py_ssize_t i, int start, int end, + cdef _string_convert(self, Py_ssize_t i, int64_t start, int64_t end, bint na_filter, kh_str_t *na_hashset): cdef StringPath path = _string_path(self.c_encoding) @@ -1336,6 +1339,7 @@ cdef class TextReader: kh_destroy_str(table) cdef _get_column_name(self, Py_ssize_t i, Py_ssize_t nused): + cdef int64_t j if self.has_usecols and self.names is not None: if (not callable(self.usecols) and len(self.names) == len(self.usecols)): @@ -1427,8 +1431,8 @@ cdef inline StringPath _string_path(char *encoding): # ---------------------------------------------------------------------- # Type conversions / inference support code -cdef _string_box_factorize(parser_t *parser, int col, - int line_start, int line_end, +cdef _string_box_factorize(parser_t *parser, int64_t col, + int64_t line_start, int64_t line_end, bint na_filter, kh_str_t *na_hashset): cdef: int error, na_count = 0 @@ -1480,8 +1484,8 @@ cdef _string_box_factorize(parser_t *parser, int col, return result, na_count -cdef _string_box_utf8(parser_t *parser, int col, - int line_start, int line_end, +cdef _string_box_utf8(parser_t *parser, int64_t col, + int64_t line_start, int64_t line_end, bint na_filter, kh_str_t *na_hashset): cdef: int error, na_count = 0 @@ -1533,8 +1537,8 @@ cdef _string_box_utf8(parser_t *parser, int col, return result, na_count -cdef _string_box_decode(parser_t *parser, int col, - int line_start, int line_end, +cdef _string_box_decode(parser_t *parser, int64_t col, + int64_t line_start, int64_t line_end, bint na_filter, kh_str_t *na_hashset, char *encoding): cdef: @@ -1592,8 +1596,8 @@ cdef _string_box_decode(parser_t *parser, int col, @cython.boundscheck(False) -cdef _categorical_convert(parser_t *parser, int col, - int line_start, int line_end, +cdef _categorical_convert(parser_t *parser, int64_t col, + int64_t line_start, int64_t line_end, bint na_filter, kh_str_t *na_hashset, char *encoding): "Convert column data into codes, categories" @@ -1663,8 +1667,8 @@ cdef _categorical_convert(parser_t *parser, int col, kh_destroy_str(table) return np.asarray(codes), result, na_count -cdef _to_fw_string(parser_t *parser, int col, int line_start, - int line_end, size_t width): +cdef _to_fw_string(parser_t *parser, int64_t col, int64_t line_start, + int64_t line_end, int64_t width): cdef: Py_ssize_t i coliter_t it @@ -1680,11 +1684,11 @@ cdef _to_fw_string(parser_t *parser, int col, int line_start, return result -cdef inline void _to_fw_string_nogil(parser_t *parser, int col, - int line_start, int line_end, +cdef inline void _to_fw_string_nogil(parser_t *parser, int64_t col, + int64_t line_start, int64_t line_end, size_t width, char *data) nogil: cdef: - Py_ssize_t i + int64_t i coliter_t it const char *word = NULL @@ -1699,7 +1703,8 @@ cdef char* cinf = b'inf' cdef char* cposinf = b'+inf' cdef char* cneginf = b'-inf' -cdef _try_double(parser_t *parser, int col, int line_start, int line_end, +cdef _try_double(parser_t *parser, int64_t col, + int64_t line_start, int64_t line_end, bint na_filter, kh_str_t *na_hashset, object na_flist): cdef: int error, na_count = 0 @@ -1808,7 +1813,8 @@ cdef inline int _try_double_nogil(parser_t *parser, return 0 -cdef _try_uint64(parser_t *parser, int col, int line_start, int line_end, +cdef _try_uint64(parser_t *parser, int64_t col, + int64_t line_start, int64_t line_end, bint na_filter, kh_str_t *na_hashset): cdef: int error @@ -1842,8 +1848,9 @@ cdef _try_uint64(parser_t *parser, int col, int line_start, int line_end, return result -cdef inline int _try_uint64_nogil(parser_t *parser, int col, int line_start, - int line_end, bint na_filter, +cdef inline int _try_uint64_nogil(parser_t *parser, int64_t col, + int64_t line_start, + int64_t line_end, bint na_filter, const kh_str_t *na_hashset, uint64_t *data, uint_state *state) nogil: cdef: @@ -1879,7 +1886,8 @@ cdef inline int _try_uint64_nogil(parser_t *parser, int col, int line_start, return 0 -cdef _try_int64(parser_t *parser, int col, int line_start, int line_end, +cdef _try_int64(parser_t *parser, int64_t col, + int64_t line_start, int64_t line_end, bint na_filter, kh_str_t *na_hashset): cdef: int error, na_count = 0 @@ -1906,8 +1914,9 @@ cdef _try_int64(parser_t *parser, int col, int line_start, int line_end, return result, na_count -cdef inline int _try_int64_nogil(parser_t *parser, int col, int line_start, - int line_end, bint na_filter, +cdef inline int _try_int64_nogil(parser_t *parser, int64_t col, + int64_t line_start, + int64_t line_end, bint na_filter, const kh_str_t *na_hashset, int64_t NA, int64_t *data, int *na_count) nogil: cdef: @@ -1944,7 +1953,8 @@ cdef inline int _try_int64_nogil(parser_t *parser, int col, int line_start, return 0 -cdef _try_bool(parser_t *parser, int col, int line_start, int line_end, +cdef _try_bool(parser_t *parser, int64_t col, + int64_t line_start, int64_t line_end, bint na_filter, kh_str_t *na_hashset): cdef: int na_count @@ -1966,8 +1976,9 @@ cdef _try_bool(parser_t *parser, int col, int line_start, int line_end, return None, None return result.view(np.bool_), na_count -cdef inline int _try_bool_nogil(parser_t *parser, int col, int line_start, - int line_end, bint na_filter, +cdef inline int _try_bool_nogil(parser_t *parser, int64_t col, + int64_t line_start, + int64_t line_end, bint na_filter, const kh_str_t *na_hashset, uint8_t NA, uint8_t *data, int *na_count) nogil: cdef: @@ -2006,7 +2017,8 @@ cdef inline int _try_bool_nogil(parser_t *parser, int col, int line_start, data += 1 return 0 -cdef _try_bool_flex(parser_t *parser, int col, int line_start, int line_end, +cdef _try_bool_flex(parser_t *parser, int64_t col, + int64_t line_start, int64_t line_end, bint na_filter, const kh_str_t *na_hashset, const kh_str_t *true_hashset, const kh_str_t *false_hashset): @@ -2032,8 +2044,9 @@ cdef _try_bool_flex(parser_t *parser, int col, int line_start, int line_end, return None, None return result.view(np.bool_), na_count -cdef inline int _try_bool_flex_nogil(parser_t *parser, int col, int line_start, - int line_end, bint na_filter, +cdef inline int _try_bool_flex_nogil(parser_t *parser, int64_t col, + int64_t line_start, + int64_t line_end, bint na_filter, const kh_str_t *na_hashset, const kh_str_t *true_hashset, const kh_str_t *false_hashset, @@ -2251,8 +2264,8 @@ for k in list(na_values): na_values[np.dtype(k)] = na_values[k] -cdef _apply_converter(object f, parser_t *parser, int col, - int line_start, int line_end, +cdef _apply_converter(object f, parser_t *parser, int64_t col, + int64_t line_start, int64_t line_end, char* c_encoding): cdef: int error @@ -2296,7 +2309,7 @@ def _to_structured_array(dict columns, object names, object usecols): object name, fnames, field_type Py_ssize_t i, offset, nfields, length - int stride, elsize + int64_t stride, elsize char *buf if names is None: @@ -2344,10 +2357,10 @@ def _to_structured_array(dict columns, object names, object usecols): return recs -cdef _fill_structured_column(char *dst, char* src, int elsize, - int stride, int length, bint incref): +cdef _fill_structured_column(char *dst, char* src, int64_t elsize, + int64_t stride, int64_t length, bint incref): cdef: - Py_ssize_t i + int64_t i if incref: util.transfer_object_column(dst, src, stride, length) diff --git a/pandas/_libs/src/parser/tokenizer.c b/pandas/_libs/src/parser/tokenizer.c index be23ebb023383..ab92290f87719 100644 --- a/pandas/_libs/src/parser/tokenizer.c +++ b/pandas/_libs/src/parser/tokenizer.c @@ -69,9 +69,9 @@ static void free_if_not_null(void **ptr) { */ -static void *grow_buffer(void *buffer, int length, int *capacity, int space, - int elsize, int *error) { - int cap = *capacity; +static void *grow_buffer(void *buffer, size_t length, size_t *capacity, + size_t space, size_t elsize, int *error) { + size_t cap = *capacity; void *newbuffer = buffer; // Can we fit potentially nbytes tokens (+ null terminators) in the stream? @@ -169,7 +169,7 @@ int parser_cleanup(parser_t *self) { } int parser_init(parser_t *self) { - int sz; + size_t sz; /* Initialize data buffers @@ -196,14 +196,14 @@ int parser_init(parser_t *self) { sz = STREAM_INIT_SIZE / 10; sz = sz ? sz : 1; self->words = (char **)malloc(sz * sizeof(char *)); - self->word_starts = (int *)malloc(sz * sizeof(int)); + self->word_starts = (size_t *)malloc(sz * sizeof(size_t)); self->words_cap = sz; self->words_len = 0; // line pointers and metadata - self->line_start = (int *)malloc(sz * sizeof(int)); + self->line_start = (size_t *)malloc(sz * sizeof(size_t)); - self->line_fields = (int *)malloc(sz * sizeof(int)); + self->line_fields = (size_t *)malloc(sz * sizeof(size_t)); self->lines_cap = sz; self->lines = 0; @@ -247,7 +247,8 @@ void parser_del(parser_t *self) { } static int make_stream_space(parser_t *self, size_t nbytes) { - int i, status, cap; + size_t i, cap; + int status; void *orig_ptr, *newptr; // Can we fit potentially nbytes tokens (+ null terminators) in the stream? @@ -304,11 +305,11 @@ static int make_stream_space(parser_t *self, size_t nbytes) { "self->words_cap=%d\n", nbytes, self->words_cap)) newptr = safe_realloc((void *)self->word_starts, - sizeof(int) * self->words_cap); + sizeof(int64_t) * self->words_cap); if (newptr == NULL) { return PARSER_OUT_OF_MEMORY; } else { - self->word_starts = (int *)newptr; + self->word_starts = (int64_t *)newptr; } } @@ -317,8 +318,8 @@ static int make_stream_space(parser_t *self, size_t nbytes) { */ cap = self->lines_cap; self->line_start = - (int *)grow_buffer((void *)self->line_start, self->lines + 1, - &self->lines_cap, nbytes, sizeof(int), &status); + (int64_t *)grow_buffer((void *)self->line_start, self->lines + 1, + &self->lines_cap, nbytes, sizeof(int64_t), &status); TRACE(( "make_stream_space: grow_buffer(self->line_start, %zu, %zu, %zu, %d)\n", self->lines + 1, self->lines_cap, nbytes, status)) @@ -331,11 +332,11 @@ static int make_stream_space(parser_t *self, size_t nbytes) { TRACE(("make_stream_space: cap != self->lines_cap, nbytes = %d\n", nbytes)) newptr = safe_realloc((void *)self->line_fields, - sizeof(int) * self->lines_cap); + sizeof(int64_t) * self->lines_cap); if (newptr == NULL) { return PARSER_OUT_OF_MEMORY; } else { - self->line_fields = (int *)newptr; + self->line_fields = (int64_t *)newptr; } } @@ -350,7 +351,7 @@ static int push_char(parser_t *self, char c) { ("push_char: ERROR!!! self->stream_len(%d) >= " "self->stream_cap(%d)\n", self->stream_len, self->stream_cap)) - int bufsize = 100; + size_t bufsize = 100; self->error_msg = (char *)malloc(bufsize); snprintf(self->error_msg, bufsize, "Buffer overflow caught - possible malformed input file.\n"); @@ -367,7 +368,7 @@ int P_INLINE end_field(parser_t *self) { ("end_field: ERROR!!! self->words_len(%zu) >= " "self->words_cap(%zu)\n", self->words_len, self->words_cap)) - int bufsize = 100; + size_t bufsize = 100; self->error_msg = (char *)malloc(bufsize); snprintf(self->error_msg, bufsize, "Buffer overflow caught - possible malformed input file.\n"); @@ -399,8 +400,8 @@ int P_INLINE end_field(parser_t *self) { } static void append_warning(parser_t *self, const char *msg) { - int ex_length; - int length = strlen(msg); + size_t ex_length; + size_t length = strlen(msg); void *newptr; if (self->warn_msg == NULL) { @@ -420,12 +421,13 @@ static int end_line(parser_t *self) { char *msg; int fields; int ex_fields = self->expected_fields; - int bufsize = 100; // for error or warning messages + size_t bufsize = 100; // for error or warning messages fields = self->line_fields[self->lines]; TRACE(("end_line: Line end, nfields: %d\n", fields)); + TRACE(("end_line: lines: %d\n", self->lines)); if (self->lines > 0) { if (self->expected_fields >= 0) { ex_fields = self->expected_fields; @@ -433,6 +435,7 @@ static int end_line(parser_t *self) { ex_fields = self->line_fields[self->lines - 1]; } } + TRACE(("end_line: ex_fields: %d\n", ex_fields)); if (self->state == START_FIELD_IN_SKIP_LINE || self->state == IN_FIELD_IN_SKIP_LINE || @@ -450,7 +453,7 @@ static int end_line(parser_t *self) { return 0; } - if (!(self->lines <= self->header_end + 1) && + if (!(self->lines <= (int64_t) self->header_end + 1) && (self->expected_fields < 0 && fields > ex_fields) && !(self->usecols)) { // increment file line count self->file_lines++; @@ -485,10 +488,13 @@ static int end_line(parser_t *self) { } } else { // missing trailing delimiters - if ((self->lines >= self->header_end + 1) && fields < ex_fields) { + if ((self->lines >= (int64_t) self->header_end + 1) && + fields < ex_fields) { // might overrun the buffer when closing fields if (make_stream_space(self, ex_fields - fields) < 0) { - self->error_msg = "out of memory"; + size_t bufsize = 100; + self->error_msg = (char *)malloc(bufsize); + snprintf(self->error_msg, bufsize, "out of memory"); return -1; } @@ -507,7 +513,7 @@ static int end_line(parser_t *self) { TRACE(( "end_line: ERROR!!! self->lines(%zu) >= self->lines_cap(%zu)\n", self->lines, self->lines_cap)) - int bufsize = 100; + size_t bufsize = 100; self->error_msg = (char *)malloc(bufsize); snprintf(self->error_msg, bufsize, "Buffer overflow caught - " @@ -568,7 +574,7 @@ static int parser_buffer_bytes(parser_t *self, size_t nbytes) { self->datalen = bytes_read; if (status != REACHED_EOF && self->data == NULL) { - int bufsize = 200; + size_t bufsize = 200; self->error_msg = (char *)malloc(bufsize); if (status == CALLING_READ_FAILED) { @@ -599,7 +605,7 @@ static int parser_buffer_bytes(parser_t *self, size_t nbytes) { if (slen >= self->stream_cap) { \ TRACE(("PUSH_CHAR: ERROR!!! slen(%d) >= stream_cap(%d)\n", slen, \ self->stream_cap)) \ - int bufsize = 100; \ + size_t bufsize = 100; \ self->error_msg = (char *)malloc(bufsize); \ snprintf(self->error_msg, bufsize, \ "Buffer overflow caught - possible malformed input file.\n");\ @@ -626,7 +632,7 @@ static int parser_buffer_bytes(parser_t *self, size_t nbytes) { stream = self->stream + self->stream_len; \ slen = self->stream_len; \ self->state = STATE; \ - if (line_limit > 0 && self->lines == start_lines + (int)line_limit) { \ + if (line_limit > 0 && self->lines == start_lines + (size_t)line_limit) { \ goto linelimit; \ } @@ -641,7 +647,7 @@ static int parser_buffer_bytes(parser_t *self, size_t nbytes) { stream = self->stream + self->stream_len; \ slen = self->stream_len; \ self->state = STATE; \ - if (line_limit > 0 && self->lines == start_lines + (int)line_limit) { \ + if (line_limit > 0 && self->lines == start_lines + (size_t)line_limit) { \ goto linelimit; \ } @@ -712,15 +718,17 @@ int skip_this_line(parser_t *self, int64_t rownum) { } } -int tokenize_bytes(parser_t *self, size_t line_limit, int start_lines) { - int i, slen; +int tokenize_bytes(parser_t *self, size_t line_limit, int64_t start_lines) { + int64_t i, slen; int should_skip; char c; char *stream; char *buf = self->data + self->datapos; if (make_stream_space(self, self->datalen - self->datapos) < 0) { - self->error_msg = "out of memory"; + size_t bufsize = 100; + self->error_msg = (char *)malloc(bufsize); + snprintf(self->error_msg, bufsize, "out of memory"); return -1; } @@ -1025,7 +1033,7 @@ int tokenize_bytes(parser_t *self, size_t line_limit, int start_lines) { PUSH_CHAR(c); self->state = IN_FIELD; } else { - int bufsize = 100; + size_t bufsize = 100; self->error_msg = (char *)malloc(bufsize); snprintf(self->error_msg, bufsize, "delimiter expected after quote in quote"); @@ -1079,7 +1087,7 @@ int tokenize_bytes(parser_t *self, size_t line_limit, int start_lines) { --i; buf--; // let's try this character again (HACK!) if (line_limit > 0 && - self->lines == start_lines + (int)line_limit) { + self->lines == start_lines + line_limit) { goto linelimit; } } @@ -1121,7 +1129,7 @@ int tokenize_bytes(parser_t *self, size_t line_limit, int start_lines) { } static int parser_handle_eof(parser_t *self) { - int bufsize = 100; + size_t bufsize = 100; TRACE( ("handling eof, datalen: %d, pstate: %d\n", self->datalen, self->state)) @@ -1165,9 +1173,9 @@ static int parser_handle_eof(parser_t *self) { } int parser_consume_rows(parser_t *self, size_t nrows) { - int i, offset, word_deletions, char_count; + size_t i, offset, word_deletions, char_count; - if ((int)nrows > self->lines) { + if (nrows > self->lines) { nrows = self->lines; } @@ -1204,7 +1212,7 @@ int parser_consume_rows(parser_t *self, size_t nrows) { self->word_start -= char_count; /* move line metadata */ - for (i = 0; i < self->lines - (int)nrows + 1; ++i) { + for (i = 0; i < self->lines - nrows + 1; ++i) { offset = i + nrows; self->line_start[i] = self->line_start[offset] - word_deletions; self->line_fields[i] = self->line_fields[offset]; @@ -1227,11 +1235,11 @@ int parser_trim_buffers(parser_t *self) { size_t new_cap; void *newptr; - int i; + int64_t i; /* trim words, word_starts */ new_cap = _next_pow2(self->words_len) + 1; - if ((int)new_cap < self->words_cap) { + if (new_cap < self->words_cap) { TRACE(("parser_trim_buffers: new_cap < self->words_cap\n")); newptr = safe_realloc((void *)self->words, new_cap * sizeof(char *)); if (newptr == NULL) { @@ -1239,11 +1247,12 @@ int parser_trim_buffers(parser_t *self) { } else { self->words = (char **)newptr; } - newptr = safe_realloc((void *)self->word_starts, new_cap * sizeof(int)); + newptr = safe_realloc((void *)self->word_starts, + new_cap * sizeof(int64_t)); if (newptr == NULL) { return PARSER_OUT_OF_MEMORY; } else { - self->word_starts = (int *)newptr; + self->word_starts = (int64_t *)newptr; self->words_cap = new_cap; } } @@ -1254,7 +1263,7 @@ int parser_trim_buffers(parser_t *self) { ("parser_trim_buffers: new_cap = %zu, stream_cap = %zu, lines_cap = " "%zu\n", new_cap, self->stream_cap, self->lines_cap)); - if ((int)new_cap < self->stream_cap) { + if (new_cap < self->stream_cap) { TRACE( ("parser_trim_buffers: new_cap < self->stream_cap, calling " "safe_realloc\n")); @@ -1282,19 +1291,21 @@ int parser_trim_buffers(parser_t *self) { /* trim line_start, line_fields */ new_cap = _next_pow2(self->lines) + 1; - if ((int)new_cap < self->lines_cap) { + if (new_cap < self->lines_cap) { TRACE(("parser_trim_buffers: new_cap < self->lines_cap\n")); - newptr = safe_realloc((void *)self->line_start, new_cap * sizeof(int)); + newptr = safe_realloc((void *)self->line_start, + new_cap * sizeof(int64_t)); if (newptr == NULL) { return PARSER_OUT_OF_MEMORY; } else { - self->line_start = (int *)newptr; + self->line_start = (int64_t *)newptr; } - newptr = safe_realloc((void *)self->line_fields, new_cap * sizeof(int)); + newptr = safe_realloc((void *)self->line_fields, + new_cap * sizeof(int64_t)); if (newptr == NULL) { return PARSER_OUT_OF_MEMORY; } else { - self->line_fields = (int *)newptr; + self->line_fields = (int64_t *)newptr; self->lines_cap = new_cap; } } @@ -1303,7 +1314,7 @@ int parser_trim_buffers(parser_t *self) { } void debug_print_parser(parser_t *self) { - int j, line; + int64_t j, line; char *token; for (line = 0; line < self->lines; ++line) { @@ -1324,7 +1335,7 @@ void debug_print_parser(parser_t *self) { int _tokenize_helper(parser_t *self, size_t nrows, int all) { int status = 0; - int start_lines = self->lines; + int64_t start_lines = self->lines; if (self->state == FINISHED) { return 0; @@ -1332,10 +1343,10 @@ int _tokenize_helper(parser_t *self, size_t nrows, int all) { TRACE(( "_tokenize_helper: Asked to tokenize %d rows, datapos=%d, datalen=%d\n", - (int)nrows, self->datapos, self->datalen)); + nrows, self->datapos, self->datalen)); while (1) { - if (!all && self->lines - start_lines >= (int)nrows) break; + if (!all && self->lines - start_lines >= nrows) break; if (self->datapos == self->datalen) { status = parser_buffer_bytes(self, self->chunksize); diff --git a/pandas/_libs/src/parser/tokenizer.h b/pandas/_libs/src/parser/tokenizer.h index b4344e8a6c070..9462608a26814 100644 --- a/pandas/_libs/src/parser/tokenizer.h +++ b/pandas/_libs/src/parser/tokenizer.h @@ -137,30 +137,30 @@ typedef struct parser_t { io_callback cb_io; io_cleanup cb_cleanup; - int chunksize; // Number of bytes to prepare for each chunk - char *data; // pointer to data to be processed - int datalen; // amount of data available - int datapos; + int64_t chunksize; // Number of bytes to prepare for each chunk + char *data; // pointer to data to be processed + int64_t datalen; // amount of data available + int64_t datapos; // where to write out tokenized data char *stream; - int stream_len; - int stream_cap; + int64_t stream_len; + int64_t stream_cap; // Store words in (potentially ragged) matrix for now, hmm char **words; - int *word_starts; // where we are in the stream - int words_len; - int words_cap; + int64_t *word_starts; // where we are in the stream + int64_t words_len; + int64_t words_cap; - char *pword_start; // pointer to stream start of current field - int word_start; // position start of current field + char *pword_start; // pointer to stream start of current field + int64_t word_start; // position start of current field - int *line_start; // position in words for start of line - int *line_fields; // Number of fields in each line - int lines; // Number of (good) lines observed - int file_lines; // Number of file lines observed (including bad or skipped) - int lines_cap; // Vector capacity + int64_t *line_start; // position in words for start of line + int64_t *line_fields; // Number of fields in each line + int64_t lines; // Number of (good) lines observed + int64_t file_lines; // Number of lines (including bad or skipped) + int64_t lines_cap; // Vector capacity // Tokenizing stuff ParserState state; @@ -193,9 +193,9 @@ typedef struct parser_t { // thousands separator (comma, period) char thousands; - int header; // Boolean: 1: has header, 0: no header - int header_start; // header row start - int header_end; // header row end + int header; // Boolean: 1: has header, 0: no header + int64_t header_start; // header row start + int64_t header_end; // header row end void *skipset; PyObject *skipfunc; @@ -216,7 +216,7 @@ typedef struct parser_t { typedef struct coliter_t { char **words; - int *line_start; + int64_t *line_start; int col; } coliter_t; @@ -225,7 +225,7 @@ coliter_t *coliter_new(parser_t *self, int i); #define COLITER_NEXT(iter, word) \ do { \ - const int i = *iter.line_start++ + iter.col; \ + const int64_t i = *iter.line_start++ + iter.col; \ word = i < *iter.line_start ? iter.words[i] : ""; \ } while (0) diff --git a/pandas/conftest.py b/pandas/conftest.py index 8a3ffe22242ac..bae45743bbcfb 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -9,7 +9,9 @@ def pytest_addoption(parser): parser.addoption("--skip-slow", action="store_true", help="skip slow tests") parser.addoption("--skip-network", action="store_true", - help="run network tests") + help="skip network tests") + parser.addoption("--run-highmemory", action="store_true", + help="run high memory tests") parser.addoption("--only-slow", action="store_true", help="run only slow tests") @@ -24,6 +26,11 @@ def pytest_runtest_setup(item): if 'network' in item.keywords and item.config.getoption("--skip-network"): pytest.skip("skipping due to --skip-network") + if 'high_memory' in item.keywords and not item.config.getoption( + "--run-highmemory"): + pytest.skip( + "skipping high memory test since --run-highmemory was not set") + # Configurations for all tests and all test modules diff --git a/pandas/tests/io/parser/test_parsers.py b/pandas/tests/io/parser/test_parsers.py index 8d59e3acb3230..f23bd24f5cbe3 100644 --- a/pandas/tests/io/parser/test_parsers.py +++ b/pandas/tests/io/parser/test_parsers.py @@ -1,6 +1,9 @@ # -*- coding: utf-8 -*- import os +from io import StringIO + +import pytest import pandas.util.testing as tm @@ -25,6 +28,18 @@ from .dtypes import DtypeTests +@pytest.mark.high_memory +def test_bytes_exceed_2gb(): + """Read from a "CSV" that has a column larger than 2GB. + + GH 16798 + """ + csv = StringIO('strings\n' + '\n'.join( + ['x' * (1 << 20) for _ in range(2100)])) + df = read_csv(csv, low_memory=False) + assert not df.empty + + class BaseParser(CommentTests, CompressionTests, ConverterTests, DialectTests, HeaderTests, IndexColTests, diff --git a/setup.cfg b/setup.cfg index 05d4c84ca56c4..0123078523b6f 100644 --- a/setup.cfg +++ b/setup.cfg @@ -27,3 +27,4 @@ markers = single: mark a test as single cpu only slow: mark a test as slow network: mark a test as network + highmemory: mark a test as a high-memory only From a7eb1a775354b43a2bf82ed3a284c0538bc41583 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sun, 23 Jul 2017 13:43:59 -0400 Subject: [PATCH 817/933] TST: remove some test warnings in parser tests (#17057) TST: move highmemory test to proper location in c_parser_only xref #16798 --- pandas/conftest.py | 4 ++-- pandas/tests/io/parser/c_parser_only.py | 26 +++++++++++++++++++------ pandas/tests/io/parser/test_parsers.py | 16 --------------- 3 files changed, 22 insertions(+), 24 deletions(-) diff --git a/pandas/conftest.py b/pandas/conftest.py index bae45743bbcfb..101af46a63db4 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -10,7 +10,7 @@ def pytest_addoption(parser): help="skip slow tests") parser.addoption("--skip-network", action="store_true", help="skip network tests") - parser.addoption("--run-highmemory", action="store_true", + parser.addoption("--run-high-memory", action="store_true", help="run high memory tests") parser.addoption("--only-slow", action="store_true", help="run only slow tests") @@ -27,7 +27,7 @@ def pytest_runtest_setup(item): pytest.skip("skipping due to --skip-network") if 'high_memory' in item.keywords and not item.config.getoption( - "--run-highmemory"): + "--run-high-memory"): pytest.skip( "skipping high memory test since --run-highmemory was not set") diff --git a/pandas/tests/io/parser/c_parser_only.py b/pandas/tests/io/parser/c_parser_only.py index 48812c04e3b55..c68b2bf064d97 100644 --- a/pandas/tests/io/parser/c_parser_only.py +++ b/pandas/tests/io/parser/c_parser_only.py @@ -476,9 +476,23 @@ def test_read_tarfile(self, tar_suffix): # iterating through a file-like). tar_path = os.path.join(self.dirpath, "tar_csv" + tar_suffix) - tar = tarfile.open(tar_path, "r") - data_file = tar.extractfile("tar_data.csv") - - out = self.read_csv(data_file) - expected = pd.DataFrame({"a": [1]}) - tm.assert_frame_equal(out, expected) + with tarfile.open(tar_path, "r") as tar: + data_file = tar.extractfile("tar_data.csv") + + out = self.read_csv(data_file) + expected = pd.DataFrame({"a": [1]}) + tm.assert_frame_equal(out, expected) + + @pytest.mark.high_memory + def test_bytes_exceed_2gb(self): + """Read from a "CSV" that has a column larger than 2GB. + + GH 16798 + """ + if self.low_memory: + pytest.skip("not a high_memory test") + + csv = StringIO('strings\n' + '\n'.join( + ['x' * (1 << 20) for _ in range(2100)])) + df = self.read_csv(csv, low_memory=False) + assert not df.empty diff --git a/pandas/tests/io/parser/test_parsers.py b/pandas/tests/io/parser/test_parsers.py index f23bd24f5cbe3..9bbc624dff90f 100644 --- a/pandas/tests/io/parser/test_parsers.py +++ b/pandas/tests/io/parser/test_parsers.py @@ -1,10 +1,6 @@ # -*- coding: utf-8 -*- import os -from io import StringIO - -import pytest - import pandas.util.testing as tm from pandas import read_csv, read_table @@ -28,18 +24,6 @@ from .dtypes import DtypeTests -@pytest.mark.high_memory -def test_bytes_exceed_2gb(): - """Read from a "CSV" that has a column larger than 2GB. - - GH 16798 - """ - csv = StringIO('strings\n' + '\n'.join( - ['x' * (1 << 20) for _ in range(2100)])) - df = read_csv(csv, low_memory=False) - assert not df.empty - - class BaseParser(CommentTests, CompressionTests, ConverterTests, DialectTests, HeaderTests, IndexColTests, From 4ce5340fec2ec075815e0aac3d4223c2598f1d84 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Sun, 23 Jul 2017 10:44:25 -0700 Subject: [PATCH 818/933] DOC: Add more examples for reset_index (#17055) --- pandas/core/frame.py | 64 +++++++++++++++++++++++++------------------- 1 file changed, 37 insertions(+), 27 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 9514ab8f3b27f..2ceb62dc7a349 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3060,7 +3060,7 @@ class max_speed ... ('mammal', 'monkey')], ... names=['class', 'name']) >>> columns = pd.MultiIndex.from_tuples([('speed', 'max'), - ... ('speed', 'type')]) + ... ('species', 'type')]) >>> df = pd.DataFrame([(389.0, 'fly'), ... ( 24.0, 'fly'), ... ( 80.5, 'run'), @@ -3068,49 +3068,59 @@ class max_speed ... index=index, ... columns=columns) >>> df - speed - max type + speed species + max type class name - bird falcon 389.0 fly - parrot 24.0 fly - mammal lion 80.5 run - monkey NaN jump + bird falcon 389.0 fly + parrot 24.0 fly + mammal lion 80.5 run + monkey NaN jump If the index has multiple levels, we can reset a subset of them: >>> df.reset_index(level='class') - class speed - max type + class speed species + max type name - falcon bird 389.0 fly - parrot bird 24.0 fly - lion mammal 80.5 run - monkey mammal NaN jump + falcon bird 389.0 fly + parrot bird 24.0 fly + lion mammal 80.5 run + monkey mammal NaN jump If we are not dropping the index, by default, it is placed in the top level. We can place it in another level: >>> df.reset_index(level='class', col_level=1) - speed - class max type + speed species + class max type name - falcon bird 389.0 fly - parrot bird 24.0 fly - lion mammal 80.5 run - monkey mammal NaN jump + falcon bird 389.0 fly + parrot bird 24.0 fly + lion mammal 80.5 run + monkey mammal NaN jump When the index is inserted under another level, we can specify under - which one with the parameter `col_fill`. If we specify a nonexistent - level, it is created: + which one with the parameter `col_fill`: >>> df.reset_index(level='class', col_level=1, col_fill='species') - species speed - class max type + species speed species + class max type name - falcon bird 389.0 fly - parrot bird 24.0 fly - lion mammal 80.5 run - monkey mammal NaN jump + falcon bird 389.0 fly + parrot bird 24.0 fly + lion mammal 80.5 run + monkey mammal NaN jump + + If we specify a nonexistent level for `col_fill`, it is created: + + >>> df.reset_index(level='class', col_level=1, col_fill='genus') + genus speed species + class max type + name + falcon bird 389.0 fly + parrot bird 24.0 fly + lion mammal 80.5 run + monkey mammal NaN jump """ inplace = validate_bool_kwarg(inplace, 'inplace') if inplace: From 811bcbcf4daf6a1118eb898e4e55320c68ec5159 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Sun, 23 Jul 2017 11:37:54 -0700 Subject: [PATCH 819/933] MAINT: Add dash in high memory message Follow-up to gh-17057. --- pandas/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/conftest.py b/pandas/conftest.py index 101af46a63db4..763a41ee2e2aa 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -29,7 +29,7 @@ def pytest_runtest_setup(item): if 'high_memory' in item.keywords and not item.config.getoption( "--run-high-memory"): pytest.skip( - "skipping high memory test since --run-highmemory was not set") + "skipping high memory test since --run-high-memory was not set") # Configurations for all tests and all test modules From e7c10bb8d715390c95925bf20c06e98a7eb1b234 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Sun, 23 Jul 2017 17:46:58 -0700 Subject: [PATCH 820/933] MAINT: kwards --> kwargs in parsers.pyx --- pandas/_libs/parsers.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index c512a9fd39e95..9866eff3e5f31 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -566,7 +566,7 @@ cdef class TextReader: while self.buffer_lines * 2< heuristic: self.buffer_lines *= 2 - def __init__(self, *args, **kwards): + def __init__(self, *args, **kwargs): pass def __dealloc__(self): From ef3aae5ca658ccc4dd21c18485762fda52cc3957 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Sun, 23 Jul 2017 22:07:24 -0700 Subject: [PATCH 821/933] CLN: Cleanup comments in before_install_travis.sh envars.sh doesn't exist anymore. In fact, it's been gone for awhile. --- ci/before_install_travis.sh | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/ci/before_install_travis.sh b/ci/before_install_travis.sh index f90427f97d3b7..2d0b4da6120dc 100755 --- a/ci/before_install_travis.sh +++ b/ci/before_install_travis.sh @@ -1,15 +1,10 @@ #!/bin/bash -# If envars.sh determined we're running in an authorized fork -# and the user opted in to the network cache,and that cached versions -# are available on the cache server, download and deploy the cached -# files to the local filesystem - echo "inside $0" -# overview if [ "${TRAVIS_OS_NAME}" == "linux" ]; then sh -e /etc/init.d/xvfb start fi -true # never fail because bad things happened here +# Never fail because bad things happened here. +true From c55dbf067d85c3a060a6ffeff2aad24991e95eae Mon Sep 17 00:00:00 2001 From: gfyoung Date: Sun, 23 Jul 2017 23:57:52 -0700 Subject: [PATCH 822/933] MAINT: Remove duplicate Series sort_index check Duplicate boolean validation check for sort_index in series/test_validate.py --- pandas/tests/series/test_validate.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/pandas/tests/series/test_validate.py b/pandas/tests/series/test_validate.py index 6327e265d8c1e..134fa0a38f618 100644 --- a/pandas/tests/series/test_validate.py +++ b/pandas/tests/series/test_validate.py @@ -23,9 +23,6 @@ def test_validate_bool_args(self): with pytest.raises(ValueError): self.s.sort_index(inplace=value) - with pytest.raises(ValueError): - self.s.sort_index(inplace=value) - with pytest.raises(ValueError): self.s.rename(inplace=value) From 1d0e6a151668ffde51270c03274b7e6f529a6132 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Mon, 24 Jul 2017 17:05:14 -0400 Subject: [PATCH 823/933] BLD: Pin pyarrow=0.4.1 (#17065) Addresses gh-17064. Also add some additional build information when calling `pd.show_versions` --- ci/requirements-2.7.sh | 2 +- ci/requirements-2.7_BUILD_TEST.sh | 2 +- ci/requirements-3.5.sh | 2 +- ci/requirements-3.6.run | 1 + ci/requirements-3.6_DOC.sh | 2 +- ci/requirements-3.6_WIN.run | 1 + pandas/util/_print_versions.py | 3 ++- 7 files changed, 8 insertions(+), 5 deletions(-) diff --git a/ci/requirements-2.7.sh b/ci/requirements-2.7.sh index 64d470e5c6e0e..5b20617f55759 100644 --- a/ci/requirements-2.7.sh +++ b/ci/requirements-2.7.sh @@ -4,4 +4,4 @@ source activate pandas echo "install 27" -conda install -n pandas -c conda-forge feather-format +conda install -n pandas -c conda-forge feather-format pyarrow=0.4.1 diff --git a/ci/requirements-2.7_BUILD_TEST.sh b/ci/requirements-2.7_BUILD_TEST.sh index 78941fd0944e5..999651624be6b 100755 --- a/ci/requirements-2.7_BUILD_TEST.sh +++ b/ci/requirements-2.7_BUILD_TEST.sh @@ -4,4 +4,4 @@ source activate pandas echo "install 27 BUILD_TEST" -conda install -n pandas -c conda-forge pyarrow dask +conda install -n pandas -c conda-forge pyarrow=0.4.1 dask diff --git a/ci/requirements-3.5.sh b/ci/requirements-3.5.sh index 917439a8765a2..3b8fe793a413d 100644 --- a/ci/requirements-3.5.sh +++ b/ci/requirements-3.5.sh @@ -4,7 +4,7 @@ source activate pandas echo "install 35" -conda install -n pandas -c conda-forge feather-format +conda install -n pandas -c conda-forge feather-format pyarrow=0.4.1 # pip install python-dateutil to get latest conda remove -n pandas python-dateutil --force diff --git a/ci/requirements-3.6.run b/ci/requirements-3.6.run index 41c9680ce1b7e..a4f5f073856c8 100644 --- a/ci/requirements-3.6.run +++ b/ci/requirements-3.6.run @@ -15,6 +15,7 @@ jinja2 sqlalchemy pymysql feather-format +pyarrow=0.4.1 # psycopg2 (not avail on defaults ATM) beautifulsoup4 s3fs diff --git a/ci/requirements-3.6_DOC.sh b/ci/requirements-3.6_DOC.sh index e43e483d77a73..58d968a92ee9d 100644 --- a/ci/requirements-3.6_DOC.sh +++ b/ci/requirements-3.6_DOC.sh @@ -6,6 +6,6 @@ echo "[install DOC_BUILD deps]" pip install pandas-gbq -conda install -n pandas -c conda-forge feather-format nbsphinx pandoc +conda install -n pandas -c conda-forge feather-format pyarrow=0.4.1 nbsphinx pandoc conda install -n pandas -c r r rpy2 --yes diff --git a/ci/requirements-3.6_WIN.run b/ci/requirements-3.6_WIN.run index 899bfbc6b6b23..cc9c07d8a2716 100644 --- a/ci/requirements-3.6_WIN.run +++ b/ci/requirements-3.6_WIN.run @@ -8,6 +8,7 @@ xlrd xlwt scipy feather-format +pyarrow=0.4.1 numexpr pytables matplotlib diff --git a/pandas/util/_print_versions.py b/pandas/util/_print_versions.py index ca75d4d02e927..f3888c3bffca8 100644 --- a/pandas/util/_print_versions.py +++ b/pandas/util/_print_versions.py @@ -94,7 +94,8 @@ def show_versions(as_json=False): ("jinja2", lambda mod: mod.__version__), ("s3fs", lambda mod: mod.__version__), ("pandas_gbq", lambda mod: mod.__version__), - ("pandas_datareader", lambda mod: mod.__version__) + ("pandas_datareader", lambda mod: mod.__version__), + ("pyarrow", lambda mod: mod.__version__), ] deps_blob = list() From 9e6bb42fa50df808bffd60a665bf921e49b87032 Mon Sep 17 00:00:00 2001 From: Pietro Battiston Date: Mon, 24 Jul 2017 19:41:28 -0400 Subject: [PATCH 824/933] ENH: provide "inplace" argument to set_axis() closes #14636 Author: Pietro Battiston Closes #16994 from toobaz/set_axis_inplace and squashes the following commits: 8fb9d0f [Pietro Battiston] REF: adapt NDFrame.set_axis() calls to new signature 409f502 [Pietro Battiston] ENH: provide "inplace" argument to set_axis(), change signature --- doc/source/api.rst | 1 + doc/source/whatsnew/v0.21.0.txt | 2 + pandas/core/generic.py | 96 ++++++++++++++++++++++++-- pandas/core/groupby.py | 5 +- pandas/core/reshape/pivot.py | 5 +- pandas/tests/frame/test_alter_axes.py | 59 ++++++++++++++++ pandas/tests/series/test_alter_axes.py | 44 ++++++++++++ 7 files changed, 201 insertions(+), 11 deletions(-) diff --git a/doc/source/api.rst b/doc/source/api.rst index 77d095a965221..f22591dba3a38 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -410,6 +410,7 @@ Reindexing / Selection / Label manipulation Series.reset_index Series.sample Series.select + Series.set_axis Series.take Series.tail Series.truncate diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 096040bb85a10..e6764178d1f25 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -67,6 +67,7 @@ Other Enhancements - :func:`Series.to_dict` and :func:`DataFrame.to_dict` now support an ``into`` keyword which allows you to specify the ``collections.Mapping`` subclass that you would like returned. The default is ``dict``, which is backwards compatible. (:issue:`16122`) - :func:`RangeIndex.append` now returns a ``RangeIndex`` object when possible (:issue:`16212`) - :func:`Series.rename_axis` and :func:`DataFrame.rename_axis` with ``inplace=True`` now return ``None`` while renaming the axis inplace. (:issue:`15704`) +- :func:`Series.set_axis` and :func:`DataFrame.set_axis` now support the ``inplace`` parameter. (:issue:`14636`) - :func:`Series.to_pickle` and :func:`DataFrame.to_pickle` have gained a ``protocol`` parameter (:issue:`16252`). By default, this parameter is set to `HIGHEST_PROTOCOL `__ - :func:`api.types.infer_dtype` now infers decimals. (:issue:`15690`) - :func:`read_feather` has gained the ``nthreads`` parameter for multi-threaded operations (:issue:`16359`) @@ -202,6 +203,7 @@ Other API Changes - ``Index.get_indexer_non_unique()`` now returns a ndarray indexer rather than an ``Index``; this is consistent with ``Index.get_indexer()`` (:issue:`16819`) - Removed the ``@slow`` decorator from ``pandas.util.testing``, which caused issues for some downstream packages' test suites. Use ``@pytest.mark.slow`` instead, which achieves the same thing (:issue:`16850`) - Moved definition of ``MergeError`` to the ``pandas.errors`` module. +- The signature of :func:`Series.set_axis` and :func:`DataFrame.set_axis` has been changed from ``set_axis(axis, labels)`` to ``set_axis(labels, axis=0)``, for consistency with the rest of the API. The old signature is still supported and causes a ``FutureWarning`` to be emitted (:issue:`14636`) .. _whatsnew_0210.deprecations: diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 68416d85ca659..f3b7b31557216 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -466,9 +466,91 @@ def _expand_axes(self, key): return new_axes - def set_axis(self, axis, labels): - """ public verson of axis assignment """ - setattr(self, self._get_axis_name(axis), labels) + _shared_docs['set_axis'] = """Assign desired index to given axis + + Parameters + ---------- + labels: list-like or Index + The values for the new index + axis : int or string, default 0 + inplace : boolean, default None + Whether to return a new %(klass)s instance. + + WARNING: inplace=None currently falls back to to True, but + in a future version, will default to False. Use inplace=True + explicitly rather than relying on the default. + + .. versionadded:: 0.21.0 + The signature is make consistent to the rest of the API. + Previously, the "axis" and "labels" arguments were respectively + the first and second positional arguments. + + Returns + ------- + renamed : %(klass)s or None + An object of same type as caller if inplace=False, None otherwise. + + See Also + -------- + pandas.NDFrame.rename + + Examples + -------- + >>> s = pd.Series([1, 2, 3]) + >>> s + 0 1 + 1 2 + 2 3 + dtype: int64 + >>> s.set_axis(['a', 'b', 'c'], axis=0, inplace=False) + a 1 + b 2 + c 3 + dtype: int64 + >>> df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) + >>> df.set_axis(['a', 'b', 'c'], axis=0, inplace=False) + A B + a 1 4 + b 2 5 + c 3 6 + >>> df.set_axis(['I', 'II'], axis=1, inplace=False) + I II + 0 1 4 + 1 2 5 + 2 3 6 + >>> df.set_axis(['i', 'ii'], axis=1, inplace=True) + >>> df + i ii + 0 1 4 + 1 2 5 + 2 3 6 + + """ + + @Appender(_shared_docs['set_axis'] % dict(klass='NDFrame')) + def set_axis(self, labels, axis=0, inplace=None): + if is_scalar(labels): + warnings.warn( + 'set_axis now takes "labels" as first argument, and ' + '"axis" as named parameter. The old form, with "axis" as ' + 'first parameter and \"labels\" as second, is still supported ' + 'but will be deprecated in a future version of pandas.', + FutureWarning, stacklevel=2) + labels, axis = axis, labels + + if inplace is None: + warnings.warn( + 'set_axis currently defaults to operating inplace.\nThis ' + 'will change in a future version of pandas, use ' + 'inplace=True to avoid this warning.', + FutureWarning, stacklevel=2) + inplace = True + if inplace: + setattr(self, self._get_axis_name(axis), labels) + else: + obj = self.copy() + obj.set_axis(labels, axis=axis, inplace=True) + return obj def _set_axis(self, axis, labels): self._data.set_axis(axis, labels) @@ -875,7 +957,7 @@ def _set_axis_name(self, name, axis=0, inplace=False): inplace = validate_bool_kwarg(inplace, 'inplace') renamed = self if inplace else self.copy() - renamed.set_axis(axis, idx) + renamed.set_axis(idx, axis=axis, inplace=True) if not inplace: return renamed @@ -5721,7 +5803,7 @@ def slice_shift(self, periods=1, axis=0): new_obj = self._slice(vslicer, axis=axis) shifted_axis = self._get_axis(axis)[islicer] - new_obj.set_axis(axis, shifted_axis) + new_obj.set_axis(shifted_axis, axis=axis, inplace=True) return new_obj.__finalize__(self) @@ -5881,7 +5963,7 @@ def _tz_convert(ax, tz): ax = _tz_convert(ax, tz) result = self._constructor(self._data, copy=copy) - result.set_axis(axis, ax) + result.set_axis(ax, axis=axis, inplace=True) return result.__finalize__(self) @deprecate_kwarg(old_arg_name='infer_dst', new_arg_name='ambiguous', @@ -5949,7 +6031,7 @@ def _tz_localize(ax, tz, ambiguous): ax = _tz_localize(ax, tz, ambiguous) result = self._constructor(self._data, copy=copy) - result.set_axis(axis, ax) + result.set_axis(ax, axis=axis, inplace=True) return result.__finalize__(self) # ---------------------------------------------------------------------- diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index daf3381ae4e89..c8a7ee752d243 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -530,10 +530,11 @@ def _set_result_index_ordered(self, result): if not self.grouper.is_monotonic: index = Index(np.concatenate( self._get_indices(self.grouper.result_index))) - result.set_axis(self.axis, index) + result.set_axis(index, axis=self.axis, inplace=True) result = result.sort_index(axis=self.axis) - result.set_axis(self.axis, self.obj._get_axis(self.axis)) + result.set_axis(self.obj._get_axis(self.axis), axis=self.axis, + inplace=True) return result def _dir_additions(self): diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index fbb7e6f970309..890555477425d 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -312,8 +312,9 @@ def _all_key(key): except TypeError: # we cannot reshape, so coerce the axis - piece.set_axis(cat_axis, piece._get_axis( - cat_axis)._to_safe_for_reshape()) + piece.set_axis(piece._get_axis( + cat_axis)._to_safe_for_reshape(), + axis=cat_axis, inplace=True) piece[all_key] = margin[key] table_pieces.append(piece) diff --git a/pandas/tests/frame/test_alter_axes.py b/pandas/tests/frame/test_alter_axes.py index 434c02b8eba2f..8bcc19e6d8ba4 100644 --- a/pandas/tests/frame/test_alter_axes.py +++ b/pandas/tests/frame/test_alter_axes.py @@ -908,3 +908,62 @@ def test_set_reset_index(self): df = df.set_index('B') df = df.reset_index() + + def test_set_axis_inplace(self): + # GH14636 + df = DataFrame({'A': [1.1, 2.2, 3.3], + 'B': [5.0, 6.1, 7.2], + 'C': [4.4, 5.5, 6.6]}, + index=[2010, 2011, 2012]) + + expected = {0: df.copy(), + 1: df.copy()} + expected[0].index = list('abc') + expected[1].columns = list('abc') + expected['index'] = expected[0] + expected['columns'] = expected[1] + + for axis in expected: + # inplace=True + # The FutureWarning comes from the fact that we would like to have + # inplace default to False some day + for inplace, warn in (None, FutureWarning), (True, None): + kwargs = {'inplace': inplace} + + result = df.copy() + with tm.assert_produces_warning(warn): + result.set_axis(list('abc'), axis=axis, **kwargs) + tm.assert_frame_equal(result, expected[axis]) + + # inplace=False + result = df.set_axis(list('abc'), axis=axis, inplace=False) + tm.assert_frame_equal(expected[axis], result) + + # omitting the "axis" parameter + with tm.assert_produces_warning(None): + result = df.set_axis(list('abc'), inplace=False) + tm.assert_frame_equal(result, expected[0]) + + # wrong values for the "axis" parameter + for axis in 3, 'foo': + with tm.assert_raises_regex(ValueError, 'No axis named'): + df.set_axis(list('abc'), axis=axis, inplace=False) + + def test_set_axis_prior_to_deprecation_signature(self): + df = DataFrame({'A': [1.1, 2.2, 3.3], + 'B': [5.0, 6.1, 7.2], + 'C': [4.4, 5.5, 6.6]}, + index=[2010, 2011, 2012]) + + expected = {0: df.copy(), + 1: df.copy()} + expected[0].index = list('abc') + expected[1].columns = list('abc') + expected['index'] = expected[0] + expected['columns'] = expected[1] + + # old signature + for axis in expected: + with tm.assert_produces_warning(FutureWarning): + result = df.set_axis(axis, list('abc'), inplace=False) + tm.assert_frame_equal(result, expected[axis]) diff --git a/pandas/tests/series/test_alter_axes.py b/pandas/tests/series/test_alter_axes.py index d93f0326fd3b1..f33e19c7f6223 100644 --- a/pandas/tests/series/test_alter_axes.py +++ b/pandas/tests/series/test_alter_axes.py @@ -234,3 +234,47 @@ def test_rename_axis_inplace(self): assert no_return is None assert_series_equal(result, expected) + + def test_set_axis_inplace(self): + # GH14636 + + s = Series(np.arange(4), index=[1, 3, 5, 7], dtype='int64') + + expected = s.copy() + expected.index = list('abcd') + + for axis in 0, 'index': + # inplace=True + # The FutureWarning comes from the fact that we would like to have + # inplace default to False some day + for inplace, warn in (None, FutureWarning), (True, None): + result = s.copy() + kwargs = {'inplace': inplace} + with tm.assert_produces_warning(warn): + result.set_axis(list('abcd'), axis=axis, **kwargs) + tm.assert_series_equal(result, expected) + + # inplace=False + result = s.set_axis(list('abcd'), axis=0, inplace=False) + tm.assert_series_equal(expected, result) + + # omitting the "axis" parameter + with tm.assert_produces_warning(None): + result = s.set_axis(list('abcd'), inplace=False) + tm.assert_series_equal(result, expected) + + # wrong values for the "axis" parameter + for axis in 2, 'foo': + with tm.assert_raises_regex(ValueError, 'No axis named'): + s.set_axis(list('abcd'), axis=axis, inplace=False) + + def test_set_axis_prior_to_deprecation_signature(self): + s = Series(np.arange(4), index=[1, 3, 5, 7], dtype='int64') + + expected = s.copy() + expected.index = list('abcd') + + for axis in 0, 'index': + with tm.assert_produces_warning(FutureWarning): + result = s.set_axis(0, list('abcd'), inplace=False) + tm.assert_series_equal(result, expected) From 395f712133a4f6003ec8029458ade7ab423096d0 Mon Sep 17 00:00:00 2001 From: Jeff Knupp Date: Mon, 24 Jul 2017 23:59:01 -0400 Subject: [PATCH 825/933] BUG: Fix parser field type compatability on 32-bit systems. (#17071) Closes gh-17063 --- pandas/_libs/src/parser/tokenizer.c | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/pandas/_libs/src/parser/tokenizer.c b/pandas/_libs/src/parser/tokenizer.c index ab92290f87719..416bf039623d5 100644 --- a/pandas/_libs/src/parser/tokenizer.c +++ b/pandas/_libs/src/parser/tokenizer.c @@ -196,14 +196,14 @@ int parser_init(parser_t *self) { sz = STREAM_INIT_SIZE / 10; sz = sz ? sz : 1; self->words = (char **)malloc(sz * sizeof(char *)); - self->word_starts = (size_t *)malloc(sz * sizeof(size_t)); + self->word_starts = (int64_t *)malloc(sz * sizeof(int64_t)); self->words_cap = sz; self->words_len = 0; // line pointers and metadata - self->line_start = (size_t *)malloc(sz * sizeof(size_t)); + self->line_start = (int64_t *)malloc(sz * sizeof(int64_t)); - self->line_fields = (size_t *)malloc(sz * sizeof(size_t)); + self->line_fields = (int64_t *)malloc(sz * sizeof(int64_t)); self->lines_cap = sz; self->lines = 0; @@ -247,7 +247,7 @@ void parser_del(parser_t *self) { } static int make_stream_space(parser_t *self, size_t nbytes) { - size_t i, cap; + int64_t i, cap; int status; void *orig_ptr, *newptr; @@ -419,7 +419,7 @@ static void append_warning(parser_t *self, const char *msg) { static int end_line(parser_t *self) { char *msg; - int fields; + int64_t fields; int ex_fields = self->expected_fields; size_t bufsize = 100; // for error or warning messages @@ -468,8 +468,8 @@ static int end_line(parser_t *self) { if (self->error_bad_lines) { self->error_msg = (char *)malloc(bufsize); snprintf(self->error_msg, bufsize, - "Expected %d fields in line %d, saw %d\n", - ex_fields, self->file_lines, fields); + "Expected %d fields in line %lld, saw %lld\n", + ex_fields, (long long)self->file_lines, (long long)fields); TRACE(("Error at line %d, %d fields\n", self->file_lines, fields)); @@ -480,8 +480,9 @@ static int end_line(parser_t *self) { // pass up error message msg = (char *)malloc(bufsize); snprintf(msg, bufsize, - "Skipping line %d: expected %d fields, saw %d\n", - self->file_lines, ex_fields, fields); + "Skipping line %lld: expected %d fields, saw %lld\n", + (long long)self->file_lines, ex_fields, + (long long)fields); append_warning(self, msg); free(msg); } @@ -632,7 +633,7 @@ static int parser_buffer_bytes(parser_t *self, size_t nbytes) { stream = self->stream + self->stream_len; \ slen = self->stream_len; \ self->state = STATE; \ - if (line_limit > 0 && self->lines == start_lines + (size_t)line_limit) { \ + if (line_limit > 0 && self->lines == start_lines + (int64_t)line_limit) { \ goto linelimit; \ } @@ -647,7 +648,7 @@ static int parser_buffer_bytes(parser_t *self, size_t nbytes) { stream = self->stream + self->stream_len; \ slen = self->stream_len; \ self->state = STATE; \ - if (line_limit > 0 && self->lines == start_lines + (size_t)line_limit) { \ + if (line_limit > 0 && self->lines == start_lines + (int64_t)line_limit) { \ goto linelimit; \ } @@ -1147,7 +1148,8 @@ static int parser_handle_eof(parser_t *self) { case IN_QUOTED_FIELD: self->error_msg = (char *)malloc(bufsize); snprintf(self->error_msg, bufsize, - "EOF inside string starting at line %d", self->file_lines); + "EOF inside string starting at line %lld", + (long long)self->file_lines); return -1; case ESCAPED_CHAR: @@ -1318,7 +1320,7 @@ void debug_print_parser(parser_t *self) { char *token; for (line = 0; line < self->lines; ++line) { - printf("(Parsed) Line %d: ", line); + printf("(Parsed) Line %lld: ", (long long)line); for (j = 0; j < self->line_fields[j]; ++j) { token = self->words[j + self->line_start[line]]; From 793020293ee1e5fa023f45c12943a4ac51cc23d0 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 25 Jul 2017 06:18:23 -0400 Subject: [PATCH 826/933] COMPAT: rename isnull -> isna, notnull -> notna (#16972) closes #15001 --- doc/source/10min.rst | 2 +- doc/source/api.rst | 18 +- doc/source/basics.rst | 6 +- doc/source/categorical.rst | 4 +- doc/source/comparison_with_sas.rst | 11 +- doc/source/comparison_with_sql.rst | 8 +- doc/source/conf.py | 4 +- doc/source/gotchas.rst | 2 +- doc/source/missing_data.rst | 26 +-- doc/source/options.rst | 6 +- doc/source/whatsnew/v0.21.0.txt | 16 +- pandas/_libs/algos_rank_helper.pxi.in | 4 +- pandas/_libs/lib.pyx | 8 +- pandas/_libs/testing.pyx | 4 +- pandas/core/algorithms.py | 12 +- pandas/core/api.py | 2 +- pandas/core/base.py | 6 +- pandas/core/categorical.py | 64 +++---- pandas/core/common.py | 4 +- pandas/core/config_init.py | 24 ++- pandas/core/dtypes/cast.py | 34 ++-- pandas/core/dtypes/missing.py | 80 +++++---- pandas/core/frame.py | 54 ++++-- pandas/core/generic.py | 66 ++++--- pandas/core/groupby.py | 32 ++-- pandas/core/indexes/base.py | 40 +++-- pandas/core/indexes/datetimelike.py | 4 +- pandas/core/indexes/datetimes.py | 4 +- pandas/core/indexes/interval.py | 10 +- pandas/core/indexes/multi.py | 8 +- pandas/core/indexes/timedeltas.py | 10 +- pandas/core/indexing.py | 6 +- pandas/core/internals.py | 76 ++++---- pandas/core/missing.py | 18 +- pandas/core/nanops.py | 26 +-- pandas/core/ops.py | 34 ++-- pandas/core/panel.py | 6 +- pandas/core/reshape/pivot.py | 2 +- pandas/core/reshape/reshape.py | 6 +- pandas/core/reshape/tile.py | 4 +- pandas/core/series.py | 45 +++-- pandas/core/sorting.py | 4 +- pandas/core/sparse/array.py | 14 +- pandas/core/sparse/frame.py | 20 ++- pandas/core/sparse/series.py | 26 +-- pandas/core/strings.py | 12 +- pandas/core/tools/datetimes.py | 8 +- pandas/core/util/hashing.py | 6 +- pandas/core/window.py | 2 +- pandas/io/formats/format.py | 14 +- pandas/io/formats/style.py | 2 +- pandas/io/json/json.py | 4 +- pandas/io/parsers.py | 4 +- pandas/io/pytables.py | 6 +- pandas/io/sql.py | 16 +- pandas/io/stata.py | 4 +- pandas/plotting/_core.py | 12 +- pandas/plotting/_misc.py | 4 +- pandas/tests/api/test_api.py | 8 +- pandas/tests/dtypes/test_inference.py | 5 +- pandas/tests/dtypes/test_missing.py | 162 +++++++++--------- pandas/tests/frame/test_analytics.py | 20 +-- pandas/tests/frame/test_apply.py | 4 +- pandas/tests/frame/test_asof.py | 4 +- .../tests/frame/test_axis_select_reindex.py | 6 +- pandas/tests/frame/test_block_internals.py | 6 +- pandas/tests/frame/test_constructors.py | 14 +- pandas/tests/frame/test_dtypes.py | 2 +- pandas/tests/frame/test_indexing.py | 42 ++--- pandas/tests/frame/test_operators.py | 8 +- pandas/tests/frame/test_reshape.py | 8 +- pandas/tests/frame/test_timeseries.py | 2 +- pandas/tests/groupby/test_bin_groupby.py | 4 +- pandas/tests/groupby/test_groupby.py | 4 +- pandas/tests/groupby/test_nth.py | 8 +- pandas/tests/groupby/test_timegrouper.py | 2 +- pandas/tests/indexes/common.py | 22 +-- .../tests/indexes/datetimes/test_indexing.py | 12 +- pandas/tests/indexes/datetimes/test_ops.py | 6 +- pandas/tests/indexes/datetimes/test_tools.py | 22 +-- pandas/tests/indexes/period/test_period.py | 12 +- pandas/tests/indexes/test_base.py | 4 +- pandas/tests/indexes/test_category.py | 6 +- pandas/tests/indexes/test_interval.py | 22 +-- pandas/tests/indexes/test_multi.py | 14 +- pandas/tests/indexes/test_numeric.py | 10 +- pandas/tests/indexes/test_range.py | 4 +- pandas/tests/indexes/timedeltas/test_ops.py | 6 +- pandas/tests/indexes/timedeltas/test_tools.py | 4 +- .../indexing/test_chaining_and_caching.py | 2 +- pandas/tests/indexing/test_iloc.py | 4 +- pandas/tests/indexing/test_indexing.py | 2 +- pandas/tests/io/parser/common.py | 4 +- pandas/tests/io/parser/converters.py | 2 +- pandas/tests/io/parser/na_values.py | 2 +- pandas/tests/io/parser/parse_dates.py | 2 +- pandas/tests/io/test_html.py | 4 +- pandas/tests/io/test_pytables.py | 4 +- pandas/tests/io/test_sql.py | 12 +- pandas/tests/reshape/test_concat.py | 8 +- pandas/tests/reshape/test_join.py | 8 +- pandas/tests/reshape/test_merge.py | 8 +- pandas/tests/reshape/test_merge_ordered.py | 2 +- pandas/tests/reshape/test_pivot.py | 2 +- pandas/tests/reshape/test_tile.py | 14 +- pandas/tests/scalar/test_nat.py | 6 +- pandas/tests/scalar/test_timedelta.py | 4 +- pandas/tests/series/test_analytics.py | 54 +++--- pandas/tests/series/test_apply.py | 6 +- pandas/tests/series/test_asof.py | 16 +- pandas/tests/series/test_constructors.py | 16 +- pandas/tests/series/test_indexing.py | 12 +- pandas/tests/series/test_missing.py | 66 +++---- pandas/tests/series/test_operators.py | 12 +- pandas/tests/series/test_period.py | 10 +- pandas/tests/series/test_quantile.py | 4 +- pandas/tests/series/test_replace.py | 8 +- pandas/tests/sparse/test_frame.py | 12 +- pandas/tests/sparse/test_series.py | 18 +- pandas/tests/test_algos.py | 6 +- pandas/tests/test_base.py | 12 +- pandas/tests/test_categorical.py | 14 +- pandas/tests/test_lib.py | 12 +- pandas/tests/test_multilevel.py | 16 +- pandas/tests/test_nanops.py | 4 +- pandas/tests/test_panel.py | 14 +- pandas/tests/test_panel4d.py | 6 +- pandas/tests/test_resample.py | 14 +- pandas/tests/test_sorting.py | 6 +- pandas/tests/test_strings.py | 10 +- pandas/tests/test_window.py | 28 +-- pandas/tests/tseries/test_timezones.py | 4 +- pandas/util/__init__.py | 2 +- pandas/util/_decorators.py | 22 ++- 134 files changed, 1003 insertions(+), 898 deletions(-) diff --git a/doc/source/10min.rst b/doc/source/10min.rst index 8482eef552c17..def49a641a0ff 100644 --- a/doc/source/10min.rst +++ b/doc/source/10min.rst @@ -373,7 +373,7 @@ To get the boolean mask where values are ``nan`` .. ipython:: python - pd.isnull(df1) + pd.isna(df1) Operations diff --git a/doc/source/api.rst b/doc/source/api.rst index f22591dba3a38..1a4ee68ef52c4 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -187,7 +187,9 @@ Top-level missing data .. autosummary:: :toctree: generated/ + isna isnull + notna notnull Top-level conversions @@ -272,8 +274,8 @@ Conversion Series.astype Series.infer_objects Series.copy - Series.isnull - Series.notnull + Series.isna + Series.notna Indexing, iteration ~~~~~~~~~~~~~~~~~~~ @@ -781,8 +783,8 @@ Conversion DataFrame.convert_objects DataFrame.infer_objects DataFrame.copy - DataFrame.isnull - DataFrame.notnull + DataFrame.isna + DataFrame.notna Indexing, iteration ~~~~~~~~~~~~~~~~~~~ @@ -1099,8 +1101,8 @@ Conversion Panel.astype Panel.copy - Panel.isnull - Panel.notnull + Panel.isna + Panel.notna Getting and setting ~~~~~~~~~~~~~~~~~~~ @@ -1343,8 +1345,8 @@ Missing Values Index.fillna Index.dropna - Index.isnull - Index.notnull + Index.isna + Index.notna Conversion ~~~~~~~~~~ diff --git a/doc/source/basics.rst b/doc/source/basics.rst index aae1fffb7a3b6..c8138d795b836 100644 --- a/doc/source/basics.rst +++ b/doc/source/basics.rst @@ -444,7 +444,7 @@ So, for instance, to reproduce :meth:`~DataFrame.combine_first` as above: .. ipython:: python - combiner = lambda x, y: np.where(pd.isnull(x), y, x) + combiner = lambda x, y: np.where(pd.isna(x), y, x) df1.combine(df2, combiner) .. _basics.stats: @@ -511,7 +511,7 @@ optional ``level`` parameter which applies only if the object has a :header: "Function", "Description" :widths: 20, 80 - ``count``, Number of non-null observations + ``count``, Number of non-na observations ``sum``, Sum of values ``mean``, Mean of values ``mad``, Mean absolute deviation @@ -541,7 +541,7 @@ will exclude NAs on Series input by default: np.mean(df['one'].values) ``Series`` also has a method :meth:`~Series.nunique` which will return the -number of unique non-null values: +number of unique non-na values: .. ipython:: python diff --git a/doc/source/categorical.rst b/doc/source/categorical.rst index ef558381c5e6f..02d7920bc4a84 100644 --- a/doc/source/categorical.rst +++ b/doc/source/categorical.rst @@ -863,14 +863,14 @@ a code of ``-1``. s.cat.codes -Methods for working with missing data, e.g. :meth:`~Series.isnull`, :meth:`~Series.fillna`, +Methods for working with missing data, e.g. :meth:`~Series.isna`, :meth:`~Series.fillna`, :meth:`~Series.dropna`, all work normally: .. ipython:: python s = pd.Series(["a", "b", np.nan], dtype="category") s - pd.isnull(s) + pd.isna(s) s.fillna("a") Differences to R's `factor` diff --git a/doc/source/comparison_with_sas.rst b/doc/source/comparison_with_sas.rst index 875358521173a..33a347de0bf5b 100644 --- a/doc/source/comparison_with_sas.rst +++ b/doc/source/comparison_with_sas.rst @@ -444,13 +444,13 @@ For example, in SAS you could do this to filter missing values. if value_x ^= .; run; -Which doesn't work in in pandas. Instead, the ``pd.isnull`` or ``pd.notnull`` functions +Which doesn't work in in pandas. Instead, the ``pd.isna`` or ``pd.notna`` functions should be used for comparisons. .. ipython:: python - outer_join[pd.isnull(outer_join['value_x'])] - outer_join[pd.notnull(outer_join['value_x'])] + outer_join[pd.isna(outer_join['value_x'])] + outer_join[pd.notna(outer_join['value_x'])] pandas also provides a variety of methods to work with missing data - some of which would be challenging to express in SAS. For example, there are methods to @@ -570,7 +570,7 @@ machine's memory, but also that the operations on that data may be faster. If out of core processing is needed, one possibility is the `dask.dataframe `_ -library (currently in development) which +library (currently in development) which provides a subset of pandas functionality for an on-disk ``DataFrame`` Data Interop @@ -578,7 +578,7 @@ Data Interop pandas provides a :func:`read_sas` method that can read SAS data saved in the XPORT or SAS7BDAT binary format. - + .. code-block:: none libname xportout xport 'transport-file.xpt'; @@ -613,4 +613,3 @@ to interop data between SAS and pandas is to serialize to csv. In [9]: %time df = pd.read_csv('big.csv') Wall time: 4.86 s - diff --git a/doc/source/comparison_with_sql.rst b/doc/source/comparison_with_sql.rst index 7962e0e69faa1..2112c7de8c897 100644 --- a/doc/source/comparison_with_sql.rst +++ b/doc/source/comparison_with_sql.rst @@ -101,7 +101,7 @@ Just like SQL's OR and AND, multiple conditions can be passed to a DataFrame usi # tips by parties of at least 5 diners OR bill total was more than $45 tips[(tips['size'] >= 5) | (tips['total_bill'] > 45)] -NULL checking is done using the :meth:`~pandas.Series.notnull` and :meth:`~pandas.Series.isnull` +NULL checking is done using the :meth:`~pandas.Series.notna` and :meth:`~pandas.Series.isna` methods. .. ipython:: python @@ -121,9 +121,9 @@ where ``col2`` IS NULL with the following query: .. ipython:: python - frame[frame['col2'].isnull()] + frame[frame['col2'].isna()] -Getting items where ``col1`` IS NOT NULL can be done with :meth:`~pandas.Series.notnull`. +Getting items where ``col1`` IS NOT NULL can be done with :meth:`~pandas.Series.notna`. .. code-block:: sql @@ -133,7 +133,7 @@ Getting items where ``col1`` IS NOT NULL can be done with :meth:`~pandas.Series. .. ipython:: python - frame[frame['col1'].notnull()] + frame[frame['col1'].notna()] GROUP BY diff --git a/doc/source/conf.py b/doc/source/conf.py index cb3063d59beae..6eb12324ee461 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -238,8 +238,8 @@ # https://github.com/pandas-dev/pandas/issues/16186 moved_api_pages = [ - ('pandas.core.common.isnull', 'pandas.isnull'), - ('pandas.core.common.notnull', 'pandas.notnull'), + ('pandas.core.common.isnull', 'pandas.isna'), + ('pandas.core.common.notnull', 'pandas.notna'), ('pandas.core.reshape.get_dummies', 'pandas.get_dummies'), ('pandas.tools.merge.concat', 'pandas.concat'), ('pandas.tools.merge.merge', 'pandas.merge'), diff --git a/doc/source/gotchas.rst b/doc/source/gotchas.rst index 11827fe2776cf..a3a90f514f142 100644 --- a/doc/source/gotchas.rst +++ b/doc/source/gotchas.rst @@ -202,7 +202,7 @@ For many reasons we chose the latter. After years of production use it has proven, at least in my opinion, to be the best decision given the state of affairs in NumPy and Python in general. The special value ``NaN`` (Not-A-Number) is used everywhere as the ``NA`` value, and there are API -functions ``isnull`` and ``notnull`` which can be used across the dtypes to +functions ``isna`` and ``notna`` which can be used across the dtypes to detect NA values. However, it comes with it a couple of trade-offs which I most certainly have diff --git a/doc/source/missing_data.rst b/doc/source/missing_data.rst index 37930775885e3..e40b7d460fef8 100644 --- a/doc/source/missing_data.rst +++ b/doc/source/missing_data.rst @@ -36,7 +36,7 @@ When / why does data become missing? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Some might quibble over our usage of *missing*. By "missing" we simply mean -**null** or "not present for whatever reason". Many data sets simply arrive with +**na** or "not present for whatever reason". Many data sets simply arrive with missing data, either because it exists and was not collected or it never existed. For example, in a collection of financial time series, some of the time series might start on different dates. Thus, values prior to the start date @@ -63,27 +63,27 @@ to handling missing data. While ``NaN`` is the default missing value marker for reasons of computational speed and convenience, we need to be able to easily detect this value with data of different types: floating point, integer, boolean, and general object. In many cases, however, the Python ``None`` will -arise and we wish to also consider that "missing" or "null". +arise and we wish to also consider that "missing" or "na". .. note:: Prior to version v0.10.0 ``inf`` and ``-inf`` were also - considered to be "null" in computations. This is no longer the case by - default; use the ``mode.use_inf_as_null`` option to recover it. + considered to be "na" in computations. This is no longer the case by + default; use the ``mode.use_inf_as_na`` option to recover it. -.. _missing.isnull: +.. _missing.isna: To make detecting missing values easier (and across different array dtypes), -pandas provides the :func:`~pandas.core.common.isnull` and -:func:`~pandas.core.common.notnull` functions, which are also methods on +pandas provides the :func:`isna` and +:func:`notna` functions, which are also methods on ``Series`` and ``DataFrame`` objects: .. ipython:: python df2['one'] - pd.isnull(df2['one']) - df2['four'].notnull() - df2.isnull() + pd.isna(df2['one']) + df2['four'].notna() + df2.isna() .. warning:: @@ -206,7 +206,7 @@ with missing data. Filling missing values: fillna ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The **fillna** function can "fill in" NA values with non-null data in a couple +The **fillna** function can "fill in" NA values with non-na data in a couple of ways, which we illustrate: **Replace NA with a scalar value** @@ -220,7 +220,7 @@ of ways, which we illustrate: **Fill gaps forward or backward** Using the same filling arguments as :ref:`reindexing `, we -can propagate non-null values forward or backward: +can propagate non-na values forward or backward: .. ipython:: python @@ -288,7 +288,7 @@ a Series in this case. .. ipython:: python - dff.where(pd.notnull(dff), dff.mean(), axis='columns') + dff.where(pd.notna(dff), dff.mean(), axis='columns') .. _missing_data.dropna: diff --git a/doc/source/options.rst b/doc/source/options.rst index c585da64efece..83b08acac5720 100644 --- a/doc/source/options.rst +++ b/doc/source/options.rst @@ -419,10 +419,10 @@ mode.chained_assignment warn Raise an exception, warn, or no assignment, The default is warn mode.sim_interactive False Whether to simulate interactive mode for purposes of testing. -mode.use_inf_as_null False True means treat None, NaN, -INF, - INF as null (old way), False means +mode.use_inf_as_na False True means treat None, NaN, -INF, + INF as NA (old way), False means None and NaN are null, but INF, -INF - are not null (new way). + are not NA (new way). compute.use_bottleneck True Use the bottleneck library to accelerate computation if it is installed. compute.use_numexpr True Use the numexpr library to accelerate diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index e6764178d1f25..5a5ea827e74ad 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -128,8 +128,6 @@ the target. Now, a ``ValueError`` will be raised when such an input is passed in ... ValueError: Cannot operate inplace if there is no assignment -.. _whatsnew_0210.dtype_conversions: - Dtype Conversions ^^^^^^^^^^^^^^^^^ @@ -187,6 +185,20 @@ Dtype Conversions - Inconsistent behavior in ``.where()`` with datetimelikes which would raise rather than coerce to ``object`` (:issue:`16402`) - Bug in assignment against ``int64`` data with ``np.ndarray`` with ``float64`` dtype may keep ``int64`` dtype (:issue:`14001`) +.. _whatsnew_0210.api.na_changes: + +NA naming Changes +^^^^^^^^^^^^^^^^^ + +In orde to promote more consistency among the pandas API, we have added additional top-level +functions :func:`isna` and :func:`notna` that are aliases for :func:`isnull` and :func:`notnull`. +The naming scheme is now more consistent with methods like ``.dropna()`` and ``.fillna()``. Furthermore +in all cases where ``.isnull()`` and ``.notnull()`` methods are defined, these have additional methods +named ``.isna()`` and ``.notna()``, these are included for classes ``Categorical``, +``Index``, ``Series``, and ``DataFrame``. (:issue:`15001`). + +The configuration option ``mode.use_inf_as_null``is deprecated, and ``mode.use_inf_as_na`` is added as a replacement. + .. _whatsnew_0210.api: Other API Changes diff --git a/pandas/_libs/algos_rank_helper.pxi.in b/pandas/_libs/algos_rank_helper.pxi.in index aafffbf60f638..0945aec638b1d 100644 --- a/pandas/_libs/algos_rank_helper.pxi.in +++ b/pandas/_libs/algos_rank_helper.pxi.in @@ -83,7 +83,7 @@ def rank_1d_{{dtype}}(object in_arr, ties_method='average', ascending=True, nan_value = {{neg_nan_value}} {{if dtype == 'object'}} - mask = lib.isnullobj(values) + mask = lib.isnaobj(values) {{elif dtype == 'float64'}} mask = np.isnan(values) {{elif dtype == 'int64'}} @@ -259,7 +259,7 @@ def rank_2d_{{dtype}}(object in_arr, axis=0, ties_method='average', nan_value = {{neg_nan_value}} {{if dtype == 'object'}} - mask = lib.isnullobj2d(values) + mask = lib.isnaobj2d(values) {{elif dtype == 'float64'}} mask = np.isnan(values) {{elif dtype == 'int64'}} diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index f6e574b66a828..0458d4ae9f3de 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -286,7 +286,7 @@ def item_from_zerodim(object val): @cython.wraparound(False) @cython.boundscheck(False) -def isnullobj(ndarray arr): +def isnaobj(ndarray arr): cdef Py_ssize_t i, n cdef object val cdef ndarray[uint8_t] result @@ -303,7 +303,7 @@ def isnullobj(ndarray arr): @cython.wraparound(False) @cython.boundscheck(False) -def isnullobj_old(ndarray arr): +def isnaobj_old(ndarray arr): cdef Py_ssize_t i, n cdef object val cdef ndarray[uint8_t] result @@ -320,7 +320,7 @@ def isnullobj_old(ndarray arr): @cython.wraparound(False) @cython.boundscheck(False) -def isnullobj2d(ndarray arr): +def isnaobj2d(ndarray arr): cdef Py_ssize_t i, j, n, m cdef object val cdef ndarray[uint8_t, ndim=2] result @@ -339,7 +339,7 @@ def isnullobj2d(ndarray arr): @cython.wraparound(False) @cython.boundscheck(False) -def isnullobj2d_old(ndarray arr): +def isnaobj2d_old(ndarray arr): cdef Py_ssize_t i, j, n, m cdef object val cdef ndarray[uint8_t, ndim=2] result diff --git a/pandas/_libs/testing.pyx b/pandas/_libs/testing.pyx index 9495af87f5c31..ab7f3c3de2131 100644 --- a/pandas/_libs/testing.pyx +++ b/pandas/_libs/testing.pyx @@ -1,7 +1,7 @@ import numpy as np from pandas import compat -from pandas.core.dtypes.missing import isnull, array_equivalent +from pandas.core.dtypes.missing import isna, array_equivalent from pandas.core.dtypes.common import is_dtype_equal cdef NUMERIC_TYPES = ( @@ -182,7 +182,7 @@ cpdef assert_almost_equal(a, b, if a == b: # object comparison return True - if isnull(a) and isnull(b): + if isna(a) and isna(b): # nan / None comparison return True if is_comparable_as_number(a) and is_comparable_as_number(b): diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 133e9d7dca18f..4ca658b35a276 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -27,7 +27,7 @@ _ensure_float64, _ensure_uint64, _ensure_int64) from pandas.compat.numpy import _np_version_under1p10 -from pandas.core.dtypes.missing import isnull +from pandas.core.dtypes.missing import isna from pandas.core import common as com from pandas._libs import algos, lib, hashtable as htable @@ -427,7 +427,7 @@ def isin(comps, values): try: values = values.astype('float64', copy=False) comps = comps.astype('float64', copy=False) - checknull = isnull(values).any() + checknull = isna(values).any() f = lambda x, y: htable.ismember_float64(x, y, checknull) except (TypeError, ValueError): values = values.astype(object) @@ -529,7 +529,7 @@ def value_counts(values, sort=True, ascending=False, normalize=False, # count, remove nulls (from the index), and but the bins result = ii.value_counts(dropna=dropna) - result = result[result.index.notnull()] + result = result[result.index.notna()] result.index = result.index.astype('interval') result = result.sort_index() @@ -597,9 +597,9 @@ def _value_counts_arraylike(values, dropna): f = getattr(htable, "value_count_{dtype}".format(dtype=ndtype)) keys, counts = f(values, dropna) - mask = isnull(values) + mask = isna(values) if not dropna and mask.any(): - if not isnull(keys).any(): + if not isna(keys).any(): keys = np.insert(keys, 0, np.NaN) counts = np.insert(counts, 0, mask.sum()) @@ -860,7 +860,7 @@ def quantile(x, q, interpolation_method='fraction'): """ x = np.asarray(x) - mask = isnull(x) + mask = isna(x) x = x[~mask] diff --git a/pandas/core/api.py b/pandas/core/api.py index 265fb4004d997..086fedd7d7cf8 100644 --- a/pandas/core/api.py +++ b/pandas/core/api.py @@ -5,7 +5,7 @@ import numpy as np from pandas.core.algorithms import factorize, unique, value_counts -from pandas.core.dtypes.missing import isnull, notnull +from pandas.core.dtypes.missing import isna, isnull, notna, notnull from pandas.core.categorical import Categorical from pandas.core.groupby import Grouper from pandas.io.formats.format import set_eng_float_format diff --git a/pandas/core/base.py b/pandas/core/base.py index 97c4c8626dcbb..eb785b18bd02b 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -6,7 +6,7 @@ from pandas.compat import builtins import numpy as np -from pandas.core.dtypes.missing import isnull +from pandas.core.dtypes.missing import isna from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries, ABCIndexClass from pandas.core.dtypes.common import is_object_dtype, is_list_like, is_scalar from pandas.util._validators import validate_bool_kwarg @@ -894,7 +894,7 @@ def argmin(self, axis=None): @cache_readonly def hasnans(self): """ return if I have any nans; enables various perf speedups """ - return isnull(self).any() + return isna(self).any() def _reduce(self, op, name, axis=0, skipna=True, numeric_only=None, filter_type=None, **kwds): @@ -990,7 +990,7 @@ def nunique(self, dropna=True): """ uniqs = self.unique() n = len(uniqs) - if dropna and isnull(uniqs).any(): + if dropna and isna(uniqs).any(): n -= 1 return n diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index afae11163b0dc..1392ad2f011db 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -10,7 +10,7 @@ from pandas.core.dtypes.generic import ( ABCSeries, ABCIndexClass, ABCCategoricalIndex) -from pandas.core.dtypes.missing import isnull, notnull +from pandas.core.dtypes.missing import isna, notna from pandas.core.dtypes.cast import ( maybe_infer_to_datetimelike, coerce_indexer_dtype) @@ -34,8 +34,8 @@ import pandas.core.common as com from pandas.core.missing import interpolate_2d from pandas.compat.numpy import function as nv -from pandas.util._decorators import (Appender, cache_readonly, - deprecate_kwarg, Substitution) +from pandas.util._decorators import ( + Appender, cache_readonly, deprecate_kwarg, Substitution) from pandas.io.formats.terminal import get_terminal_size from pandas.util._validators import validate_bool_kwarg @@ -290,7 +290,7 @@ def __init__(self, values, categories=None, ordered=False, fastpath=False): # On list with NaNs, int values will be converted to float. Use # "object" dtype to prevent this. In the end objects will be # casted to int/... in the category assignment step. - dtype = 'object' if isnull(values).any() else None + dtype = 'object' if isna(values).any() else None values = _sanitize_array(values, None, dtype=dtype) if categories is None: @@ -561,9 +561,9 @@ def _validate_categories(cls, categories, fastpath=False): categories = _convert_to_list_like(categories) # On categories with NaNs, int values would be converted to # float. Use "object" dtype to prevent this. - if isnull(categories).any(): + if isna(categories).any(): without_na = np.array([x for x in categories - if notnull(x)]) + if notna(x)]) with_na = np.array(categories) if with_na.dtype != without_na.dtype: dtype = "object" @@ -941,9 +941,9 @@ def remove_categories(self, removals, inplace=False): new_categories = [c for c in self._categories if c not in removal_set] # GH 10156 - if any(isnull(removals)): - not_included = [x for x in not_included if notnull(x)] - new_categories = [x for x in new_categories if notnull(x)] + if any(isna(removals)): + not_included = [x for x in not_included if notna(x)] + new_categories = [x for x in new_categories if notna(x)] if len(not_included) != 0: raise ValueError("removals must all be in old categories: %s" % @@ -1153,7 +1153,7 @@ def searchsorted(self, value, side='left', sorter=None): return self.codes.searchsorted(values_as_codes, side=side, sorter=sorter) - def isnull(self): + def isna(self): """ Detect missing values @@ -1165,8 +1165,9 @@ def isnull(self): See also -------- - isnull : pandas version - Categorical.notnull : boolean inverse of Categorical.isnull + isna : top-level isna + isnull : alias of isna + Categorical.notna : boolean inverse of Categorical.isna """ @@ -1175,14 +1176,15 @@ def isnull(self): # String/object and float categories can hold np.nan if self.categories.dtype.kind in ['S', 'O', 'f']: if np.nan in self.categories: - nan_pos = np.where(isnull(self.categories))[0] + nan_pos = np.where(isna(self.categories))[0] # we only have one NA in categories ret = np.logical_or(ret, self._codes == nan_pos) return ret + isnull = isna - def notnull(self): + def notna(self): """ - Reverse of isnull + Inverse of isna Both missing values (-1 in .codes) and NA as a category are detected as null. @@ -1193,11 +1195,13 @@ def notnull(self): See also -------- - notnull : pandas version - Categorical.isnull : boolean inverse of Categorical.notnull + notna : top-level notna + notnull : alias of notna + Categorical.isna : boolean inverse of Categorical.notna """ - return ~self.isnull() + return ~self.isna() + notnull = notna def put(self, *args, **kwargs): """ @@ -1217,8 +1221,8 @@ def dropna(self): ------- valid : Categorical """ - result = self[self.notnull()] - if isnull(result.categories).any(): + result = self[self.notna()] + if isna(result.categories).any(): result = result.remove_categories([np.nan]) return result @@ -1243,12 +1247,10 @@ def value_counts(self, dropna=True): """ from numpy import bincount - from pandas.core.dtypes.missing import isnull - from pandas.core.series import Series - from pandas.core.index import CategoricalIndex + from pandas import isna, Series, CategoricalIndex obj = (self.remove_categories([np.nan]) if dropna and - isnull(self.categories).any() else self) + isna(self.categories).any() else self) code, cat = obj._codes, obj.categories ncat, mask = len(cat), 0 <= code ix, clean = np.arange(ncat), mask.all() @@ -1520,7 +1522,7 @@ def fillna(self, value=None, method=None, limit=None): if self.categories.dtype.kind in ['S', 'O', 'f']: if np.nan in self.categories: values = values.copy() - nan_pos = np.where(isnull(self.categories))[0] + nan_pos = np.where(isna(self.categories))[0] # we only have one NA in categories values[values == nan_pos] = -1 @@ -1534,13 +1536,13 @@ def fillna(self, value=None, method=None, limit=None): else: - if not isnull(value) and value not in self.categories: + if not isna(value) and value not in self.categories: raise ValueError("fill value must be in categories") mask = values == -1 if mask.any(): values = values.copy() - if isnull(value): + if isna(value): values[mask] = -1 else: values[mask] = self.categories.get_loc(value) @@ -1556,7 +1558,7 @@ def take_nd(self, indexer, allow_fill=True, fill_value=None): # filling must always be None/nan here # but is passed thru internally - assert isnull(fill_value) + assert isna(fill_value) codes = take_1d(self._codes, indexer, allow_fill=True, fill_value=-1) result = self._constructor(codes, categories=self.categories, @@ -1720,7 +1722,7 @@ def __setitem__(self, key, value): # no assignments of values not in categories, but it's always ok to set # something to np.nan - if len(to_add) and not isnull(to_add).all(): + if len(to_add) and not isna(to_add).all(): raise ValueError("Cannot setitem on a Categorical with a new " "category, set the categories first") @@ -1763,8 +1765,8 @@ def __setitem__(self, key, value): # https://github.com/pandas-dev/pandas/issues/7820 # float categories do currently return -1 for np.nan, even if np.nan is # included in the index -> "repair" this here - if isnull(rvalue).any() and isnull(self.categories).any(): - nan_pos = np.where(isnull(self.categories))[0] + if isna(rvalue).any() and isna(self.categories).any(): + nan_pos = np.where(isna(self.categories))[0] lindexer[lindexer == -1] = nan_pos lindexer = self._maybe_coerce_indexer(lindexer) diff --git a/pandas/core/common.py b/pandas/core/common.py index 3b09e68c6433a..44cb36b8a3207 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -18,7 +18,7 @@ from pandas.core.dtypes.generic import ABCSeries from pandas.core.dtypes.common import _NS_DTYPE from pandas.core.dtypes.inference import _iterable_not_string -from pandas.core.dtypes.missing import isnull +from pandas.core.dtypes.missing import isna, isnull, notnull # noqa from pandas.api import types from pandas.core.dtypes import common @@ -187,7 +187,7 @@ def is_bool_indexer(key): key = np.asarray(_values_from_object(key)) if not lib.is_bool_array(key): - if isnull(key).any(): + if isna(key).any(): raise ValueError('cannot index with vector containing ' 'NA / NaN values') return False diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py index 06ce811703a8c..76e30a6fb9d52 100644 --- a/pandas/core/config_init.py +++ b/pandas/core/config_init.py @@ -392,9 +392,14 @@ def table_schema_cb(key): cf.register_option('sim_interactive', False, tc_sim_interactive_doc) use_inf_as_null_doc = """ +use_inf_as_null had been deprecated and will be removed in a future version. +Use `use_inf_as_na` instead. +""" + +use_inf_as_na_doc = """ : boolean - True means treat None, NaN, INF, -INF as null (old way), - False means None and NaN are null, but INF, -INF are not null + True means treat None, NaN, INF, -INF as na (old way), + False means None and NaN are null, but INF, -INF are not na (new way). """ @@ -402,14 +407,17 @@ def table_schema_cb(key): # or we'll hit circular deps. -def use_inf_as_null_cb(key): - from pandas.core.dtypes.missing import _use_inf_as_null - _use_inf_as_null(key) +def use_inf_as_na_cb(key): + from pandas.core.dtypes.missing import _use_inf_as_na + _use_inf_as_na(key) -with cf.config_prefix('mode'): - cf.register_option('use_inf_as_null', False, use_inf_as_null_doc, - cb=use_inf_as_null_cb) +cf.register_option('mode.use_inf_as_na', False, use_inf_as_na_doc, + cb=use_inf_as_na_cb) + +cf.deprecate_option('mode.use_inf_as_null', msg=use_inf_as_null_doc, + rkey='mode.use_inf_as_na') + # user warnings chained_assignment = """ diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 22d98a89d68d6..723e4f70da4e9 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -27,7 +27,7 @@ from .dtypes import ExtensionDtype, DatetimeTZDtype, PeriodDtype from .generic import (ABCDatetimeIndex, ABCPeriodIndex, ABCSeries) -from .missing import isnull, notnull +from .missing import isna, notna from .inference import is_list_like _int8_max = np.iinfo(np.int8).max @@ -121,7 +121,7 @@ def trans(x): # noqa arr = np.array([r[0]]) # if we have any nulls, then we are done - if (isnull(arr).any() or + if (isna(arr).any() or not np.allclose(arr, trans(arr).astype(dtype), rtol=0)): return result @@ -131,7 +131,7 @@ def trans(x): # noqa return result if (issubclass(result.dtype.type, (np.object_, np.number)) and - notnull(result).all()): + notna(result).all()): new_result = trans(result).astype(dtype) try: if np.allclose(new_result, result, rtol=0): @@ -191,7 +191,7 @@ def maybe_upcast_putmask(result, mask, other): # integer or integer array -> date-like array if is_datetimelike(result.dtype): if is_scalar(other): - if isnull(other): + if isna(other): other = result.dtype.type('nat') elif is_integer(other): other = np.array(other, dtype=result.dtype) @@ -232,13 +232,13 @@ def changeit(): # and its nan and we are changing some values if (is_scalar(other) or (isinstance(other, np.ndarray) and other.ndim < 1)): - if isnull(other): + if isna(other): return changeit() # we have an ndarray and the masking has nans in it else: - if isnull(other[mask]).any(): + if isna(other[mask]).any(): return changeit() try: @@ -268,7 +268,7 @@ def maybe_promote(dtype, fill_value=np.nan): # for now: refuse to upcast datetime64 # (this is because datetime64 will not implicitly upconvert # to object correctly as of numpy 1.6.1) - if isnull(fill_value): + if isna(fill_value): fill_value = iNaT else: if issubclass(dtype.type, np.datetime64): @@ -287,7 +287,7 @@ def maybe_promote(dtype, fill_value=np.nan): else: fill_value = iNaT elif is_datetimetz(dtype): - if isnull(fill_value): + if isna(fill_value): fill_value = iNaT elif is_float(fill_value): if issubclass(dtype.type, np.bool_): @@ -580,7 +580,7 @@ def coerce_to_dtypes(result, dtypes): def conv(r, dtype): try: - if isnull(r): + if isna(r): pass elif dtype == _NS_DTYPE: r = tslib.Timestamp(r) @@ -635,7 +635,7 @@ def astype_nansafe(arr, dtype, copy=True): # allow frequency conversions if dtype.kind == 'm': - mask = isnull(arr) + mask = isna(arr) result = arr.astype(dtype).astype(np.float64) result[mask] = np.nan return result @@ -687,7 +687,7 @@ def maybe_convert_objects(values, convert_dates=True, convert_numeric=True, values, 'M8[ns]', errors='coerce') # if we are all nans then leave me alone - if not isnull(new_values).all(): + if not isna(new_values).all(): values = new_values else: @@ -702,7 +702,7 @@ def maybe_convert_objects(values, convert_dates=True, convert_numeric=True, new_values = to_timedelta(values, errors='coerce') # if we are all nans then leave me alone - if not isnull(new_values).all(): + if not isna(new_values).all(): values = new_values else: @@ -717,7 +717,7 @@ def maybe_convert_objects(values, convert_dates=True, convert_numeric=True, coerce_numeric=True) # if we are all nans then leave me alone - if not isnull(new_values).all(): + if not isna(new_values).all(): values = new_values except: @@ -779,7 +779,7 @@ def soft_convert_objects(values, datetime=True, numeric=True, timedelta=True, converted = lib.maybe_convert_numeric(values, set(), coerce_numeric=True) # If all NaNs, then do not-alter - values = converted if not isnull(converted).all() else values + values = converted if not isna(converted).all() else values values = values.copy() if copy else values except: pass @@ -881,7 +881,7 @@ def try_timedelta(v): elif inferred_type == 'nat': # if all NaT, return as datetime - if isnull(v).all(): + if isna(v).all(): value = try_datetime(v) else: @@ -932,7 +932,7 @@ def maybe_cast_to_datetime(value, dtype, errors='raise'): # our NaT doesn't support tz's # this will coerce to DatetimeIndex with # a matching dtype below - if is_scalar(value) and isnull(value): + if is_scalar(value) and isna(value): value = [value] elif is_timedelta64 and not is_dtype_equal(dtype, _TD_DTYPE): @@ -946,7 +946,7 @@ def maybe_cast_to_datetime(value, dtype, errors='raise'): "dtype [%s]" % dtype) if is_scalar(value): - if value == iNaT or isnull(value): + if value == iNaT or isna(value): value = iNaT else: value = np.array(value, copy=False) diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py index 9913923cb7807..101612893cb02 100644 --- a/pandas/core/dtypes/missing.py +++ b/pandas/core/dtypes/missing.py @@ -23,7 +23,7 @@ from .inference import is_list_like -def isnull(obj): +def isna(obj): """Detect missing values (NaN in numeric arrays, None/NaN in object arrays) Parameters @@ -33,34 +33,38 @@ def isnull(obj): Returns ------- - isnulled : array-like of bool or bool + isna : array-like of bool or bool Array or bool indicating whether an object is null or if an array is given which of the element is null. See also -------- - pandas.notnull: boolean inverse of pandas.isnull + pandas.notna: boolean inverse of pandas.isna + pandas.isnull: alias of isna """ - return _isnull(obj) + return _isna(obj) -def _isnull_new(obj): +isnull = isna + + +def _isna_new(obj): if is_scalar(obj): return lib.checknull(obj) # hack (for now) because MI registers as ndarray elif isinstance(obj, ABCMultiIndex): - raise NotImplementedError("isnull is not defined for MultiIndex") + raise NotImplementedError("isna is not defined for MultiIndex") elif isinstance(obj, (ABCSeries, np.ndarray, ABCIndexClass)): - return _isnull_ndarraylike(obj) + return _isna_ndarraylike(obj) elif isinstance(obj, ABCGeneric): - return obj._constructor(obj._data.isnull(func=isnull)) + return obj._constructor(obj._data.isna(func=isna)) elif isinstance(obj, list) or hasattr(obj, '__array__'): - return _isnull_ndarraylike(np.asarray(obj)) + return _isna_ndarraylike(np.asarray(obj)) else: return obj is None -def _isnull_old(obj): +def _isna_old(obj): """Detect missing values. Treat None, NaN, INF, -INF as null. Parameters @@ -75,22 +79,22 @@ def _isnull_old(obj): return lib.checknull_old(obj) # hack (for now) because MI registers as ndarray elif isinstance(obj, ABCMultiIndex): - raise NotImplementedError("isnull is not defined for MultiIndex") + raise NotImplementedError("isna is not defined for MultiIndex") elif isinstance(obj, (ABCSeries, np.ndarray, ABCIndexClass)): - return _isnull_ndarraylike_old(obj) + return _isna_ndarraylike_old(obj) elif isinstance(obj, ABCGeneric): - return obj._constructor(obj._data.isnull(func=_isnull_old)) + return obj._constructor(obj._data.isna(func=_isna_old)) elif isinstance(obj, list) or hasattr(obj, '__array__'): - return _isnull_ndarraylike_old(np.asarray(obj)) + return _isna_ndarraylike_old(np.asarray(obj)) else: return obj is None -_isnull = _isnull_new +_isna = _isna_new -def _use_inf_as_null(key): - """Option change callback for null/inf behaviour +def _use_inf_as_na(key): + """Option change callback for na/inf behaviour Choose which replacement for numpy.isnan / -numpy.isfinite is used. Parameters @@ -111,12 +115,12 @@ def _use_inf_as_null(key): from pandas.core.config import get_option flag = get_option(key) if flag: - globals()['_isnull'] = _isnull_old + globals()['_isna'] = _isna_old else: - globals()['_isnull'] = _isnull_new + globals()['_isna'] = _isna_new -def _isnull_ndarraylike(obj): +def _isna_ndarraylike(obj): values = getattr(obj, 'values', obj) dtype = values.dtype @@ -126,10 +130,10 @@ def _isnull_ndarraylike(obj): from pandas import Categorical if not isinstance(values, Categorical): values = values.values - result = values.isnull() + result = values.isna() elif is_interval_dtype(values): from pandas import IntervalIndex - result = IntervalIndex(obj).isnull() + result = IntervalIndex(obj).isna() else: # Working around NumPy ticket 1542 @@ -139,7 +143,7 @@ def _isnull_ndarraylike(obj): result = np.zeros(values.shape, dtype=bool) else: result = np.empty(shape, dtype=bool) - vec = lib.isnullobj(values.ravel()) + vec = lib.isnaobj(values.ravel()) result[...] = vec.reshape(shape) elif needs_i8_conversion(obj): @@ -156,7 +160,7 @@ def _isnull_ndarraylike(obj): return result -def _isnull_ndarraylike_old(obj): +def _isna_ndarraylike_old(obj): values = getattr(obj, 'values', obj) dtype = values.dtype @@ -168,7 +172,7 @@ def _isnull_ndarraylike_old(obj): result = np.zeros(values.shape, dtype=bool) else: result = np.empty(shape, dtype=bool) - vec = lib.isnullobj_old(values.ravel()) + vec = lib.isnaobj_old(values.ravel()) result[:] = vec.reshape(shape) elif is_datetime64_dtype(dtype): @@ -185,7 +189,7 @@ def _isnull_ndarraylike_old(obj): return result -def notnull(obj): +def notna(obj): """Replacement for numpy.isfinite / -numpy.isnan which is suitable for use on object arrays. @@ -196,20 +200,24 @@ def notnull(obj): Returns ------- - isnulled : array-like of bool or bool + notisna : array-like of bool or bool Array or bool indicating whether an object is *not* null or if an array is given which of the element is *not* null. See also -------- - pandas.isnull : boolean inverse of pandas.notnull + pandas.isna : boolean inverse of pandas.notna + pandas.notnull : alias of notna """ - res = isnull(obj) + res = isna(obj) if is_scalar(res): return not res return ~res +notnull = notna + + def is_null_datelike_scalar(other): """ test whether the object is a null datelike, e.g. Nat but guard against passing a non-scalar """ @@ -222,11 +230,11 @@ def is_null_datelike_scalar(other): return other.view('i8') == iNaT elif is_integer(other) and other == iNaT: return True - return isnull(other) + return isna(other) return False -def _is_na_compat(arr, fill_value=np.nan): +def _isna_compat(arr, fill_value=np.nan): """ Parameters ---------- @@ -238,7 +246,7 @@ def _is_na_compat(arr, fill_value=np.nan): True if we can fill using this fill_value """ dtype = arr.dtype - if isnull(fill_value): + if isna(fill_value): return not (is_bool_dtype(dtype) or is_integer_dtype(dtype)) return True @@ -286,7 +294,7 @@ def array_equivalent(left, right, strict_nan=False): if is_string_dtype(left) or is_string_dtype(right): if not strict_nan: - # isnull considers NaN and None to be equivalent. + # isna considers NaN and None to be equivalent. return lib.array_equivalent_object( _ensure_object(left.ravel()), _ensure_object(right.ravel())) @@ -305,7 +313,7 @@ def array_equivalent(left, right, strict_nan=False): # NaNs can occur in float and complex arrays. if is_float_dtype(left) or is_complex_dtype(left): - return ((left == right) | (isnull(left) & isnull(right))).all() + return ((left == right) | (isna(left) & isna(right))).all() # numpy will will not allow this type of datetimelike vs integer comparison elif is_datetimelike_v_numeric(left, right): @@ -365,7 +373,7 @@ def _maybe_fill(arr, fill_value=np.nan): """ if we have a compatiable fill_value and arr dtype, then fill """ - if _is_na_compat(arr, fill_value): + if _isna_compat(arr, fill_value): arr.fill(fill_value) return arr @@ -400,4 +408,4 @@ def remove_na_arraylike(arr): """ Return array-like containing only true/non-NaN values, possibly empty. """ - return arr[notnull(lib.values_from_object(arr))] + return arr[notna(lib.values_from_object(arr))] diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 2ceb62dc7a349..6c72fa648559a 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -58,7 +58,7 @@ is_iterator, is_sequence, is_named_tuple) -from pandas.core.dtypes.missing import isnull, notnull +from pandas.core.dtypes.missing import isna, notna from pandas.core.common import (_try_sort, @@ -3205,6 +3205,22 @@ def _maybe_casted_values(index, labels=None): # ---------------------------------------------------------------------- # Reindex-based selection methods + @Appender(_shared_docs['isna'] % _shared_doc_kwargs) + def isna(self): + return super(DataFrame, self).isna() + + @Appender(_shared_docs['isna'] % _shared_doc_kwargs) + def isnull(self): + return super(DataFrame, self).isnull() + + @Appender(_shared_docs['isna'] % _shared_doc_kwargs) + def notna(self): + return super(DataFrame, self).notna() + + @Appender(_shared_docs['notna'] % _shared_doc_kwargs) + def notnull(self): + return super(DataFrame, self).notnull() + def dropna(self, axis=0, how='any', thresh=None, subset=None, inplace=False): """ @@ -3689,8 +3705,8 @@ def _combine_frame(self, other, func, fill_value=None, level=None, def _arith_op(left, right): if fill_value is not None: - left_mask = isnull(left) - right_mask = isnull(right) + left_mask = isna(left) + right_mask = isna(right) left = left.copy() right = right.copy() @@ -3874,8 +3890,8 @@ def combine(self, other, func, fill_value=None, overwrite=True): this_dtype = series.dtype other_dtype = otherSeries.dtype - this_mask = isnull(series) - other_mask = isnull(otherSeries) + this_mask = isna(series) + other_mask = isna(otherSeries) # don't overwrite columns unecessarily # DO propagate if this column is not in the intersection @@ -3954,11 +3970,11 @@ def combiner(x, y, needs_i8_conversion=False): x_values = x.values if hasattr(x, 'values') else x y_values = y.values if hasattr(y, 'values') else y if needs_i8_conversion: - mask = isnull(x) + mask = isna(x) x_values = x_values.view('i8') y_values = y_values.view('i8') else: - mask = isnull(x_values) + mask = isna(x_values) return expressions.where(mask, y_values, x_values, raise_on_error=True) @@ -3998,18 +4014,18 @@ def update(self, other, join='left', overwrite=True, filter_func=None, that = other[col].values if filter_func is not None: with np.errstate(all='ignore'): - mask = ~filter_func(this) | isnull(that) + mask = ~filter_func(this) | isna(that) else: if raise_conflict: - mask_this = notnull(that) - mask_that = notnull(this) + mask_this = notna(that) + mask_that = notna(this) if any(mask_this & mask_that): raise ValueError("Data overlaps.") if overwrite: - mask = isnull(that) + mask = isna(that) else: - mask = notnull(this) + mask = notna(this) # don't overwrite columns unecessarily if mask.all(): @@ -5181,7 +5197,7 @@ def cov(self, min_periods=None): idx = cols.copy() mat = numeric_df.values - if notnull(mat).all(): + if notna(mat).all(): if min_periods is not None and min_periods > len(mat): baseCov = np.empty((mat.shape[1], mat.shape[1])) baseCov.fill(np.nan) @@ -5281,9 +5297,9 @@ def count(self, axis=0, level=None, numeric_only=False): result = Series(0, index=frame._get_agg_axis(axis)) else: if frame._is_mixed_type: - result = notnull(frame).sum(axis=axis) + result = notna(frame).sum(axis=axis) else: - counts = notnull(frame.values).sum(axis=axis) + counts = notna(frame.values).sum(axis=axis) result = Series(counts, index=frame._get_agg_axis(axis)) return result.astype('int64') @@ -5302,12 +5318,12 @@ def _count_level(self, level, axis=0, numeric_only=False): self._get_axis_name(axis)) if frame._is_mixed_type: - # Since we have mixed types, calling notnull(frame.values) might + # Since we have mixed types, calling notna(frame.values) might # upcast everything to object - mask = notnull(frame).values + mask = notna(frame).values else: # But use the speedup when we have homogeneous dtypes - mask = notnull(frame.values) + mask = notna(frame.values) if axis == 1: # We're transposing the mask rather than frame to avoid potential @@ -5400,7 +5416,7 @@ def f(x): try: if filter_type is None or filter_type == 'numeric': result = result.astype(np.float64) - elif filter_type == 'bool' and notnull(result).all(): + elif filter_type == 'bool' and notna(result).all(): result = result.astype(np.bool_) except (ValueError, TypeError): diff --git a/pandas/core/generic.py b/pandas/core/generic.py index f3b7b31557216..abccd76b2fbcb 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -25,9 +25,8 @@ is_dict_like, is_re_compilable, pandas_dtype) -from pandas.core.dtypes.cast import ( - maybe_promote, maybe_upcast_putmask) -from pandas.core.dtypes.missing import isnull, notnull +from pandas.core.dtypes.cast import maybe_promote, maybe_upcast_putmask +from pandas.core.dtypes.missing import isna, notna from pandas.core.dtypes.generic import ABCSeries, ABCPanel from pandas.core.common import (_values_from_object, @@ -54,7 +53,8 @@ isidentifier, set_function_name, cPickle as pkl) from pandas.core.ops import _align_method_FRAME import pandas.core.nanops as nanops -from pandas.util._decorators import Appender, Substitution, deprecate_kwarg +from pandas.util._decorators import (Appender, Substitution, + deprecate_kwarg) from pandas.util._validators import validate_bool_kwarg from pandas.core import config @@ -4000,7 +4000,7 @@ def fillna(self, value=None, method=None, axis=None, inplace=False, inplace=inplace, downcast=downcast) elif isinstance(value, DataFrame) and self.ndim == 2: - new_data = self.where(self.notnull(), value) + new_data = self.where(self.notna(), value) else: raise ValueError("invalid fill value with a %s" % type(value)) @@ -4398,7 +4398,7 @@ def interpolate(self, method='linear', axis=0, limit=None, inplace=False, else: index = _maybe_transposed_self._get_axis(alt_ax) - if pd.isnull(index).any(): + if isna(index).any(): raise NotImplementedError("Interpolation with NaNs in the index " "has not been implemented. Try filling " "those NaNs before interpolating.") @@ -4503,14 +4503,14 @@ def asof(self, where, subset=None): loc -= 1 values = self._values - while loc > 0 and isnull(values[loc]): + while loc > 0 and isna(values[loc]): loc -= 1 return values[loc] if not isinstance(where, Index): where = Index(where) if is_list else Index([where]) - nulls = self.isnull() if is_series else self[subset].isnull().any(1) + nulls = self.isna() if is_series else self[subset].isna().any(1) if nulls.all(): if is_series: return self._constructor(np.nan, index=where, name=self.name) @@ -4533,38 +4533,50 @@ def asof(self, where, subset=None): # ---------------------------------------------------------------------- # Action Methods - _shared_docs['isnull'] = """ - Return a boolean same-sized object indicating if the values are null. + _shared_docs['isna'] = """ + Return a boolean same-sized object indicating if the values are na. See Also -------- - notnull : boolean inverse of isnull + %(klass)s.notna : boolean inverse of isna + %(klass)s.isnull : alias of isna + isna : top-level isna """ - @Appender(_shared_docs['isnull']) + @Appender(_shared_docs['isna'] % _shared_doc_kwargs) + def isna(self): + return isna(self).__finalize__(self) + + @Appender(_shared_docs['isna'] % _shared_doc_kwargs) def isnull(self): - return isnull(self).__finalize__(self) + return isna(self).__finalize__(self) - _shared_docs['isnotnull'] = """ + _shared_docs['notna'] = """ Return a boolean same-sized object indicating if the values are - not null. + not na. See Also -------- - isnull : boolean inverse of notnull + %(klass)s.isna : boolean inverse of notna + %(klass)s.notnull : alias of notna + notna : top-level notna """ - @Appender(_shared_docs['isnotnull']) + @Appender(_shared_docs['notna'] % _shared_doc_kwargs) + def notna(self): + return notna(self).__finalize__(self) + + @Appender(_shared_docs['notna'] % _shared_doc_kwargs) def notnull(self): - return notnull(self).__finalize__(self) + return notna(self).__finalize__(self) def _clip_with_scalar(self, lower, upper, inplace=False): - if ((lower is not None and np.any(isnull(lower))) or - (upper is not None and np.any(isnull(upper)))): + if ((lower is not None and np.any(isna(lower))) or + (upper is not None and np.any(isna(upper)))): raise ValueError("Cannot use an NA value as a clip threshold") result = self.values - mask = isnull(result) + mask = isna(result) with np.errstate(all='ignore'): if upper is not None: @@ -4588,7 +4600,7 @@ def _clip_with_one_bound(self, threshold, method, axis, inplace): if axis is not None: axis = self._get_axis_number(axis) - if np.any(isnull(threshold)): + if np.any(isna(threshold)): raise ValueError("Cannot use an NA value as a clip threshold") # method is self.le for upper bound and self.ge for lower bound @@ -4597,7 +4609,7 @@ def _clip_with_one_bound(self, threshold, method, axis, inplace): return self._clip_with_scalar(None, threshold, inplace=inplace) return self._clip_with_scalar(threshold, None, inplace=inplace) - subset = method(threshold, axis=axis) | isnull(self) + subset = method(threshold, axis=axis) | isna(self) # GH #15390 # In order for where method to work, the threshold must @@ -5472,7 +5484,7 @@ def _align_series(self, other, join='outer', axis=None, level=None, right = other.reindex(join_index, level=level) # fill - fill_na = notnull(fill_value) or (method is not None) + fill_na = notna(fill_value) or (method is not None) if fill_na: left = left.fillna(fill_value, method=method, limit=limit, axis=fill_axis) @@ -6405,7 +6417,7 @@ def pct_change(self, periods=1, fill_method='pad', limit=None, freq=None, rs = (data.div(data.shift(periods=periods, freq=freq, axis=axis, **kwargs)) - 1) if freq is None: - mask = isnull(_values_from_object(self)) + mask = isna(_values_from_object(self)) np.putmask(rs.values, mask, np.nan) return rs @@ -6767,10 +6779,10 @@ def cum_func(self, axis=None, skipna=True, *args, **kwargs): if (skipna and issubclass(y.dtype.type, (np.datetime64, np.timedelta64))): result = accum_func(y, axis) - mask = isnull(self) + mask = isna(self) np.putmask(result, mask, tslib.iNaT) elif skipna and not issubclass(y.dtype.type, (np.integer, np.bool_)): - mask = isnull(self) + mask = isna(self) np.putmask(y, mask, mask_a) result = accum_func(y, axis) np.putmask(result, mask, mask_b) diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index c8a7ee752d243..a388892e925b6 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -36,7 +36,7 @@ _ensure_categorical, _ensure_float) from pandas.core.dtypes.cast import maybe_downcast_to_dtype -from pandas.core.dtypes.missing import isnull, notnull, _maybe_fill +from pandas.core.dtypes.missing import isna, notna, _maybe_fill from pandas.core.common import (_values_from_object, AbstractMethodError, _default_index) @@ -1168,7 +1168,7 @@ def first_compat(x, axis=0): def first(x): x = np.asarray(x) - x = x[notnull(x)] + x = x[notna(x)] if len(x) == 0: return np.nan return x[0] @@ -1183,7 +1183,7 @@ def last_compat(x, axis=0): def last(x): x = np.asarray(x) - x = x[notnull(x)] + x = x[notna(x)] if len(x) == 0: return np.nan return x[-1] @@ -2357,7 +2357,7 @@ def ngroups(self): @cache_readonly def result_index(self): - if len(self.binlabels) != 0 and isnull(self.binlabels[0]): + if len(self.binlabels) != 0 and isna(self.binlabels[0]): return self.binlabels[1:] return self.binlabels @@ -3114,13 +3114,13 @@ def filter(self, func, dropna=True, *args, **kwargs): # noqa wrapper = lambda x: func(x, *args, **kwargs) # Interpret np.nan as False. - def true_and_notnull(x, *args, **kwargs): + def true_and_notna(x, *args, **kwargs): b = wrapper(x, *args, **kwargs) - return b and notnull(b) + return b and notna(b) try: indices = [self._get_index(name) for name, group in self - if true_and_notnull(group)] + if true_and_notna(group)] except ValueError: raise TypeError("the filter must return a boolean result") except TypeError: @@ -3142,9 +3142,9 @@ def nunique(self, dropna=True): 'val.dtype must be object, got %s' % val.dtype val, _ = algorithms.factorize(val, sort=False) sorter = np.lexsort((val, ids)) - _isnull = lambda a: a == -1 + _isna = lambda a: a == -1 else: - _isnull = isnull + _isna = isna ids, val = ids[sorter], val[sorter] @@ -3154,7 +3154,7 @@ def nunique(self, dropna=True): inc = np.r_[1, val[1:] != val[:-1]] # 1st item of each group is a new unique observation - mask = _isnull(val) + mask = _isna(val) if dropna: inc[idx] = 1 inc[mask] = 0 @@ -3316,7 +3316,7 @@ def count(self): ids, _, ngroups = self.grouper.group_info val = self.obj.get_values() - mask = (ids != -1) & ~isnull(val) + mask = (ids != -1) & ~isna(val) ids = _ensure_platform_int(ids) out = np.bincount(ids[mask], minlength=ngroups or None) @@ -3870,7 +3870,7 @@ def _choose_path(self, fast_path, slow_path, group): if res.shape == res_fast.shape: res_r = res.values.ravel() res_fast_r = res_fast.values.ravel() - mask = notnull(res_r) + mask = notna(res_r) if (res_r[mask] == res_fast_r[mask]).all(): path = fast_path @@ -3950,8 +3950,8 @@ def filter(self, func, dropna=True, *args, **kwargs): # noqa pass # interpret the result of the filter - if is_bool(res) or (is_scalar(res) and isnull(res)): - if res and notnull(res): + if is_bool(res) or (is_scalar(res) and isna(res)): + if res and notna(res): indices.append(self._get_index(name)) else: # non scalars aren't allowed @@ -4204,13 +4204,13 @@ def _apply_to_column_groupbys(self, func): def count(self): """ Compute count of group, excluding missing values """ from functools import partial - from pandas.core.dtypes.missing import _isnull_ndarraylike as isnull + from pandas.core.dtypes.missing import _isna_ndarraylike as isna data, _ = self._get_data_to_aggregate() ids, _, ngroups = self.grouper.group_info mask = ids != -1 - val = ((mask & ~isnull(blk.get_values())) for blk in data.blocks) + val = ((mask & ~isna(blk.get_values())) for blk in data.blocks) loc = (blk.mgr_locs for blk in data.blocks) counter = partial(count_level_2d, labels=ids, max_bin=ngroups, axis=1) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 714b952217c9d..fd9abcfb726bf 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -14,7 +14,7 @@ from pandas.core.dtypes.generic import ABCSeries, ABCMultiIndex, ABCPeriodIndex -from pandas.core.dtypes.missing import isnull, array_equivalent +from pandas.core.dtypes.missing import isna, array_equivalent from pandas.core.dtypes.common import ( _ensure_int64, _ensure_object, @@ -42,8 +42,8 @@ from pandas.core.base import PandasObject, IndexOpsMixin import pandas.core.base as base -from pandas.util._decorators import (Appender, Substitution, - cache_readonly, deprecate_kwarg) +from pandas.util._decorators import ( + Appender, Substitution, cache_readonly, deprecate_kwarg) from pandas.core.indexes.frozen import FrozenList import pandas.core.common as com import pandas.core.dtypes.concat as _concat @@ -216,7 +216,7 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, if inferred == 'integer': data = np.array(data, copy=copy, dtype=dtype) elif inferred in ['floating', 'mixed-integer-float']: - if isnull(data).any(): + if isna(data).any(): raise ValueError('cannot convert float ' 'NaN to integer') @@ -624,7 +624,7 @@ def where(self, cond, other=None): values = np.where(cond, values, other) - if self._is_numeric_dtype and np.any(isnull(values)): + if self._is_numeric_dtype and np.any(isna(values)): # We can't coerce to the numeric dtype of "self" (unless # it's float) if there are NaN values in our output. dtype = None @@ -735,7 +735,7 @@ def _coerce_scalar_to_index(self, item): """ dtype = self.dtype - if self._is_numeric_dtype and isnull(item): + if self._is_numeric_dtype and isna(item): # We can't coerce to the numeric dtype of "self" (unless # it's float) if there are NaN values in our output. dtype = None @@ -1821,7 +1821,7 @@ def _assert_take_fillable(self, values, indices, allow_fill=True, def _isnan(self): """ return if each value is nan""" if self._can_hold_na: - return isnull(self) + return isna(self) else: # shouldn't reach to this condition by checking hasnans beforehand values = np.empty(len(self), dtype=np.bool_) @@ -1844,7 +1844,7 @@ def hasnans(self): else: return False - def isnull(self): + def isna(self): """ Detect missing values @@ -1852,29 +1852,33 @@ def isnull(self): Returns ------- - a boolean array of whether my values are null + a boolean array of whether my values are na See also -------- - pandas.isnull : pandas version + isnull : alias of isna + pandas.isna : top-level isna """ return self._isnan + isnull = isna - def notnull(self): + def notna(self): """ - Reverse of isnull + Inverse of isna .. versionadded:: 0.20.0 Returns ------- - a boolean array of whether my values are not null + a boolean array of whether my values are not na See also -------- - pandas.notnull : pandas version + notnull : alias of notna + pandas.notna : top-level notna """ - return ~self.isnull() + return ~self.isna() + notnull = notna def putmask(self, mask, value): """ @@ -1922,7 +1926,7 @@ def _format_with_header(self, header, na_rep='NaN', **kwargs): for x in values] # could have nans - mask = isnull(values) + mask = isna(values) if mask.any(): result = np.array(result) result[mask] = na_rep @@ -1960,7 +1964,7 @@ def to_native_types(self, slicer=None, **kwargs): def _format_native_types(self, na_rep='', quoting=None, **kwargs): """ actually format my specific types """ - mask = isnull(self) + mask = isna(self) if not self.is_object() and not quoting: values = np.asarray(self).astype(str) else: @@ -2411,7 +2415,7 @@ def _get_unique_index(self, dropna=False): if dropna: try: if self.hasnans: - values = values[~isnull(values)] + values = values[~isna(values)] except NotImplementedError: pass diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index cd8559bcca03c..845c71b6c41d8 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -17,7 +17,7 @@ from pandas.core.dtypes.generic import ( ABCIndex, ABCSeries, ABCPeriodIndex, ABCIndexClass) -from pandas.core.dtypes.missing import isnull +from pandas.core.dtypes.missing import isna from pandas.core import common as com, algorithms from pandas.core.algorithms import checked_add_with_arr from pandas.core.common import AbstractMethodError @@ -857,7 +857,7 @@ def _append_same_dtype(self, to_concat, name): def _ensure_datetimelike_to_i8(other): """ helper for coercing an input scalar or array to i8 """ - if lib.isscalar(other) and isnull(other): + if lib.isscalar(other) and isna(other): other = iNaT elif isinstance(other, ABCIndexClass): # convert tz if needed diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index e6bc1790f2992..5a04c550f4502 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -23,7 +23,7 @@ _ensure_int64) from pandas.core.dtypes.generic import ABCSeries from pandas.core.dtypes.dtypes import DatetimeTZDtype -from pandas.core.dtypes.missing import isnull +from pandas.core.dtypes.missing import isna import pandas.core.dtypes.concat as _concat from pandas.errors import PerformanceWarning @@ -109,7 +109,7 @@ def wrapper(self, other): isinstance(other, compat.string_types)): other = _to_m8(other, tz=self.tz) result = func(other) - if isnull(other): + if isna(other): result.fill(nat_result) else: if isinstance(other, list): diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index e6b2bc0953680..aa2ad21ae37fd 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -2,7 +2,7 @@ import numpy as np -from pandas.core.dtypes.missing import notnull, isnull +from pandas.core.dtypes.missing import notna, isna from pandas.core.dtypes.generic import ABCPeriodIndex from pandas.core.dtypes.dtypes import IntervalDtype from pandas.core.dtypes.common import ( @@ -222,8 +222,8 @@ def _validate(self): raise ValueError("invalid options for 'closed': %s" % self.closed) if len(self.left) != len(self.right): raise ValueError('left and right must have the same length') - left_mask = notnull(self.left) - right_mask = notnull(self.right) + left_mask = notna(self.left) + right_mask = notna(self.right) if not (left_mask == right_mask).all(): raise ValueError('missing values must be missing in the same ' 'location both left and right sides') @@ -240,7 +240,7 @@ def hasnans(self): def _isnan(self): """ return if each value is nan""" if self._mask is None: - self._mask = isnull(self.left) + self._mask = isna(self.left) return self._mask @cache_readonly @@ -415,7 +415,7 @@ def from_tuples(cls, data, closed='right', name=None, copy=False): right = [] for d in data: - if isnull(d): + if isna(d): left.append(np.nan) right.append(np.nan) continue diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index ed7ca079a07b5..420788f9008cd 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -19,7 +19,7 @@ is_iterator, is_list_like, is_scalar) -from pandas.core.dtypes.missing import isnull, array_equivalent +from pandas.core.dtypes.missing import isna, array_equivalent from pandas.errors import PerformanceWarning, UnsortedIndexError from pandas.core.common import (_values_from_object, is_bool_indexer, @@ -783,8 +783,8 @@ def duplicated(self, keep='first'): @Appender(ibase._index_shared_docs['fillna']) def fillna(self, value=None, downcast=None): - # isnull is not implemented for MultiIndex - raise NotImplementedError('isnull is not defined for MultiIndex') + # isna is not implemented for MultiIndex + raise NotImplementedError('isna is not defined for MultiIndex') @Appender(_index_shared_docs['dropna']) def dropna(self, how='any'): @@ -920,7 +920,7 @@ def format(self, space=2, sparsify=None, adjoin=True, names=False, else: # weird all NA case - formatted = [pprint_thing(na if isnull(x) else x, + formatted = [pprint_thing(na if isna(x) else x, escape_chars=('\t', '\r', '\n')) for x in algos.take_1d(lev._values, lab)] stringified_levels.append(formatted) diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 68713743d72ed..2823951c0f348 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -13,7 +13,7 @@ is_timedelta64_dtype, is_timedelta64_ns_dtype, _ensure_int64) -from pandas.core.dtypes.missing import isnull +from pandas.core.dtypes.missing import isna from pandas.core.dtypes.generic import ABCSeries from pandas.core.common import _maybe_box, _values_from_object @@ -51,7 +51,7 @@ def wrapper(self, other): # failed to parse as timedelta raise TypeError(msg.format(type(other))) result = func(other) - if isnull(other): + if isna(other): result.fill(nat_result) else: if not is_list_like(other): @@ -331,7 +331,7 @@ def _evaluate_with_timedelta_like(self, other, op, opstr): if opstr in ['__div__', '__truediv__', '__floordiv__']: if _is_convertible_to_td(other): other = Timedelta(other) - if isnull(other): + if isna(other): raise NotImplementedError( "division by pd.NaT not implemented") @@ -430,7 +430,7 @@ def components(self): hasnans = self.hasnans if hasnans: def f(x): - if isnull(x): + if isna(x): return [np.nan] * len(columns) return x.components else: @@ -685,7 +685,7 @@ def get_loc(self, key, method=None, tolerance=None): if is_list_like(key): raise TypeError - if isnull(key): + if isna(key): key = NaT if tolerance is not None: diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 38cc5431a004f..8f6b00fd204cc 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -15,7 +15,7 @@ is_sparse, _is_unorderable_exception, _ensure_platform_int) -from pandas.core.dtypes.missing import isnull, _infer_fill_value +from pandas.core.dtypes.missing import isna, _infer_fill_value from pandas.core.index import Index, MultiIndex @@ -1428,7 +1428,7 @@ def _has_valid_type(self, key, axis): else: def error(): - if isnull(key): + if isna(key): raise TypeError("cannot use label indexing with a null " "key") raise KeyError("the label [%s] is not in the [%s]" % @@ -1940,7 +1940,7 @@ def check_bool_indexer(ax, key): result = key if isinstance(key, ABCSeries) and not key.index.equals(ax): result = result.reindex(ax) - mask = isnull(result._values) + mask = isna(result._values) if mask.any(): raise IndexingError('Unalignable boolean Series provided as ' 'indexer (index of the boolean Series and of ' diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 8f3667edf68e6..25c367fcbd968 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -44,8 +44,8 @@ astype_nansafe, find_common_type) from pandas.core.dtypes.missing import ( - isnull, notnull, array_equivalent, - _is_na_compat, + isna, notna, array_equivalent, + _isna_compat, is_null_datelike_scalar) import pandas.core.dtypes.concat as _concat @@ -371,7 +371,7 @@ def fillna(self, value, limit=None, inplace=False, downcast=None, else: return self.copy() - mask = isnull(self.values) + mask = isna(self.values) if limit is not None: if not is_integer(limit): raise ValueError('Limit must be an integer') @@ -633,7 +633,7 @@ def _try_cast_result(self, result, dtype=None): dtype = dtype.type if issubclass(dtype, (np.bool_, np.object_)): if issubclass(dtype, np.bool_): - if isnull(result).all(): + if isna(result).all(): return result.astype(np.bool_) else: result = result.astype(np.object_) @@ -651,7 +651,7 @@ def _try_cast_result(self, result, dtype=None): def _try_coerce_args(self, values, other): """ provide coercion to our input arguments """ - if np.any(notnull(other)) and not self._can_hold_element(other): + if np.any(notna(other)) and not self._can_hold_element(other): # coercion issues # let higher levels handle raise TypeError("cannot convert {} to an {}".format( @@ -676,7 +676,7 @@ def to_native_types(self, slicer=None, na_rep='nan', quoting=None, values = self.values if slicer is not None: values = values[:, slicer] - mask = isnull(values) + mask = isna(values) if not self.is_object and not quoting: values = values.astype(str) @@ -764,7 +764,7 @@ def setitem(self, indexer, value, mgr=None): find_dtype = True elif is_scalar(value): - if isnull(value): + if isna(value): # NaN promotion is handled in latter path dtype = False else: @@ -894,7 +894,7 @@ def putmask(self, mask, new, align=True, inplace=False, axis=0, mask = mask.values # if we are passed a scalar None, convert it here - if not is_list_like(new) and isnull(new) and not self.is_object: + if not is_list_like(new) and isna(new) and not self.is_object: new = self.fill_value if self._can_hold_element(new): @@ -1504,7 +1504,7 @@ def _nanpercentile1D(values, mask, q, **kw): def _nanpercentile(values, q, axis, **kw): - mask = isnull(self.values) + mask = isna(self.values) if not is_scalar(mask) and mask.any(): if self.ndim == 1: return _nanpercentile1D(values, mask, q, **kw) @@ -1750,7 +1750,7 @@ def to_native_types(self, slicer=None, na_rep='', float_format=None, # output (important for appropriate 'quoting' behaviour), # so do not pass it through the FloatArrayFormatter if float_format is None and decimal == '.': - mask = isnull(values) + mask = isna(values) if not quoting: values = values.astype(str) @@ -1869,7 +1869,7 @@ def _try_coerce_args(self, values, other): base-type values, values mask, base-type other, other mask """ - values_mask = isnull(values) + values_mask = isna(values) values = values.view('i8') other_mask = False @@ -1879,15 +1879,15 @@ def _try_coerce_args(self, values, other): other = tslib.iNaT other_mask = True elif isinstance(other, Timedelta): - other_mask = isnull(other) + other_mask = isna(other) other = other.value elif isinstance(other, timedelta): other = Timedelta(other).value elif isinstance(other, np.timedelta64): - other_mask = isnull(other) + other_mask = isna(other) other = Timedelta(other).value elif hasattr(other, 'dtype') and is_timedelta64_dtype(other): - other_mask = isnull(other) + other_mask = isna(other) other = other.astype('i8', copy=False).view('i8') else: # coercion issues @@ -1899,7 +1899,7 @@ def _try_coerce_args(self, values, other): def _try_coerce_result(self, result): """ reverse of try_coerce_args / try_operate """ if isinstance(result, np.ndarray): - mask = isnull(result) + mask = isna(result) if result.dtype.kind in ['i', 'f', 'O']: result = result.astype('m8[ns]') result[mask] = tslib.iNaT @@ -1917,7 +1917,7 @@ def to_native_types(self, slicer=None, na_rep=None, quoting=None, values = self.values if slicer is not None: values = values[:, slicer] - mask = isnull(values) + mask = isna(values) rvalues = np.empty(values.shape, dtype=object) if na_rep is None: @@ -2178,7 +2178,7 @@ def _replace_single(self, to_replace, value, inplace=False, filter=None, # deal with replacing values with objects (strings) that match but # whose replacement is not a string (numeric, nan, object) - if isnull(value) or not isinstance(value, compat.string_types): + if isna(value) or not isinstance(value, compat.string_types): def re_replacer(s): try: @@ -2333,7 +2333,7 @@ def to_native_types(self, slicer=None, na_rep='', quoting=None, **kwargs): if slicer is not None: # Categorical is always one dimension values = values[slicer] - mask = isnull(values) + mask = isna(values) values = np.array(values, dtype='object') values[mask] = na_rep @@ -2377,7 +2377,7 @@ def _can_hold_element(self, element): element = np.array(element) return element.dtype == _NS_DTYPE or element.dtype == np.int64 return (is_integer(element) or isinstance(element, datetime) or - isnull(element)) + isna(element)) def _try_coerce_args(self, values, other): """ @@ -2396,7 +2396,7 @@ def _try_coerce_args(self, values, other): base-type values, values mask, base-type other, other mask """ - values_mask = isnull(values) + values_mask = isna(values) values = values.view('i8') other_mask = False @@ -2410,10 +2410,10 @@ def _try_coerce_args(self, values, other): if getattr(other, 'tz') is not None: raise TypeError("cannot coerce a Timestamp with a tz on a " "naive Block") - other_mask = isnull(other) + other_mask = isna(other) other = other.asm8.view('i8') elif hasattr(other, 'dtype') and is_datetime64_dtype(other): - other_mask = isnull(other) + other_mask = isna(other) other = other.astype('i8', copy=False).view('i8') else: # coercion issues @@ -2540,26 +2540,26 @@ def _try_coerce_args(self, values, other): ------- base-type values, values mask, base-type other, other mask """ - values_mask = _block_shape(isnull(values), ndim=self.ndim) + values_mask = _block_shape(isna(values), ndim=self.ndim) # asi8 is a view, needs copy values = _block_shape(values.asi8, ndim=self.ndim) other_mask = False if isinstance(other, ABCSeries): other = self._holder(other) - other_mask = isnull(other) + other_mask = isna(other) if isinstance(other, bool): raise TypeError elif (is_null_datelike_scalar(other) or - (is_scalar(other) and isnull(other))): + (is_scalar(other) and isna(other))): other = tslib.iNaT other_mask = True elif isinstance(other, self._holder): if other.tz != self.values.tz: raise ValueError("incompatible or non tz-aware value") other = other.asi8 - other_mask = isnull(other) + other_mask = isna(other) elif isinstance(other, (np.datetime64, datetime, date)): other = lib.Timestamp(other) tz = getattr(other, 'tz', None) @@ -2567,7 +2567,7 @@ def _try_coerce_args(self, values, other): # test we can have an equal time zone if tz is None or str(tz) != str(self.values.tz): raise ValueError("incompatible or non tz-aware value") - other_mask = isnull(other) + other_mask = isna(other) other = other.value else: raise TypeError @@ -3292,7 +3292,7 @@ def reduction(self, f, axis=0, consolidate=True, transposed=False, placement=np.arange(len(values)))], axes[0]) - def isnull(self, **kwargs): + def isna(self, **kwargs): return self.apply('apply', **kwargs) def where(self, **kwargs): @@ -3347,8 +3347,8 @@ def replace_list(self, src_list, dest_list, inplace=False, regex=False, values = self.as_matrix() def comp(s): - if isnull(s): - return isnull(values) + if isna(s): + return isna(values) return _maybe_compare(values, getattr(s, 'asm8', s), operator.eq) masks = [comp(s) for i, s in enumerate(src_list)] @@ -3681,10 +3681,10 @@ def get(self, item, fastpath=True): """ if self.items.is_unique: - if not isnull(item): + if not isna(item): loc = self.items.get_loc(item) else: - indexer = np.arange(len(self.items))[isnull(self.items)] + indexer = np.arange(len(self.items))[isna(self.items)] # allow a single nan location indexer if not is_scalar(indexer): @@ -3696,7 +3696,7 @@ def get(self, item, fastpath=True): return self.iget(loc, fastpath=fastpath) else: - if isnull(item): + if isna(item): raise TypeError("cannot label index with a null key") indexer = self.items.get_indexer_for([item]) @@ -4886,7 +4886,7 @@ def _putmask_smart(v, m, n): # make sure that we have a nullable type # if we have nulls - if not _is_na_compat(v, nn[0]): + if not _isna_compat(v, nn[0]): raise ValueError # we ignore ComplexWarning here @@ -5010,7 +5010,7 @@ def get_empty_dtype_and_na(join_units): # Null blocks should not influence upcast class selection, unless there # are only null blocks, when same upcasting rules must be applied to # null upcast classes. - if unit.is_null: + if unit.is_na: null_upcast_classes[upcast_cls].append(dtype) else: upcast_classes[upcast_cls].append(dtype) @@ -5280,7 +5280,7 @@ def dtype(self): self.block.fill_value)[0]) @cache_readonly - def is_null(self): + def is_na(self): if self.block is None: return True @@ -5303,7 +5303,7 @@ def is_null(self): total_len = values_flat.shape[0] chunk_len = max(total_len // 40, 1000) for i in range(0, total_len, chunk_len): - if not isnull(values_flat[i:i + chunk_len]).all(): + if not isna(values_flat[i:i + chunk_len]).all(): return False return True @@ -5316,7 +5316,7 @@ def get_reindexed_values(self, empty_dtype, upcasted_na): else: fill_value = upcasted_na - if self.is_null: + if self.is_na: if getattr(self.block, 'is_object', False): # we want to avoid filling with np.nan if we are # using None; we already know that we are all diff --git a/pandas/core/missing.py b/pandas/core/missing.py index 5aabc9d8730dd..93281e20a2a96 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -20,7 +20,7 @@ _ensure_float64) from pandas.core.dtypes.cast import infer_dtype_from_array -from pandas.core.dtypes.missing import isnull +from pandas.core.dtypes.missing import isna def mask_missing(arr, values_to_mask): @@ -36,7 +36,7 @@ def mask_missing(arr, values_to_mask): except Exception: values_to_mask = np.array(values_to_mask, dtype=object) - na_mask = isnull(values_to_mask) + na_mask = isna(values_to_mask) nonna = values_to_mask[~na_mask] mask = None @@ -63,9 +63,9 @@ def mask_missing(arr, values_to_mask): if na_mask.any(): if mask is None: - mask = isnull(arr) + mask = isna(arr) else: - mask |= isnull(arr) + mask |= isna(arr) return mask @@ -122,7 +122,7 @@ def interpolate_1d(xvalues, yvalues, method='linear', limit=None, """ # Treat the original, non-scipy methods first. - invalid = isnull(yvalues) + invalid = isna(yvalues) valid = ~invalid if not valid.any(): @@ -479,7 +479,7 @@ def pad_1d(values, limit=None, mask=None, dtype=None): raise ValueError('Invalid dtype for pad_1d [%s]' % dtype.name) if mask is None: - mask = isnull(values) + mask = isna(values) mask = mask.view(np.uint8) _method(values, mask, limit=limit) return values @@ -503,7 +503,7 @@ def backfill_1d(values, limit=None, mask=None, dtype=None): raise ValueError('Invalid dtype for backfill_1d [%s]' % dtype.name) if mask is None: - mask = isnull(values) + mask = isna(values) mask = mask.view(np.uint8) _method(values, mask, limit=limit) @@ -528,7 +528,7 @@ def pad_2d(values, limit=None, mask=None, dtype=None): raise ValueError('Invalid dtype for pad_2d [%s]' % dtype.name) if mask is None: - mask = isnull(values) + mask = isna(values) mask = mask.view(np.uint8) if np.all(values.shape): @@ -557,7 +557,7 @@ def backfill_2d(values, limit=None, mask=None, dtype=None): raise ValueError('Invalid dtype for backfill_2d [%s]' % dtype.name) if mask is None: - mask = isnull(values) + mask = isna(values) mask = mask.view(np.uint8) if np.all(values.shape): diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 1d64f87b15761..5bebb8eb65b23 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -16,7 +16,7 @@ is_datetime_or_timedelta_dtype, is_int_or_datetime_dtype, is_any_int_dtype) from pandas.core.dtypes.cast import _int64_max, maybe_upcast_putmask -from pandas.core.dtypes.missing import isnull, notnull +from pandas.core.dtypes.missing import isna, notna from pandas.core.config import get_option from pandas.core.common import _values_from_object @@ -195,7 +195,7 @@ def _get_values(values, skipna, fill_value=None, fill_value_typ=None, if isfinite: mask = _isfinite(values) else: - mask = isnull(values) + mask = isna(values) dtype = values.dtype dtype_ok = _na_ok_dtype(dtype) @@ -232,7 +232,7 @@ def _get_values(values, skipna, fill_value=None, fill_value_typ=None, def _isfinite(values): if is_datetime_or_timedelta_dtype(values): - return isnull(values) + return isna(values) if (is_complex_dtype(values) or is_float_dtype(values) or is_integer_dtype(values) or is_bool_dtype(values)): return ~np.isfinite(values) @@ -329,7 +329,7 @@ def nanmedian(values, axis=None, skipna=True): values, mask, dtype, dtype_max = _get_values(values, skipna) def get_median(x): - mask = notnull(x) + mask = notna(x) if not skipna and not mask.all(): return np.nan return algos.median(_values_from_object(x[mask])) @@ -395,7 +395,7 @@ def nanvar(values, axis=None, skipna=True, ddof=1): values = _values_from_object(values) dtype = values.dtype - mask = isnull(values) + mask = isna(values) if is_any_int_dtype(values): values = values.astype('f8') values[mask] = np.nan @@ -434,7 +434,7 @@ def nanvar(values, axis=None, skipna=True, ddof=1): def nansem(values, axis=None, skipna=True, ddof=1): var = nanvar(values, axis, skipna, ddof=ddof) - mask = isnull(values) + mask = isna(values) if not is_float_dtype(values.dtype): values = values.astype('f8') count, _ = _get_counts_nanvar(mask, axis, ddof, values.dtype) @@ -503,7 +503,7 @@ def nanskew(values, axis=None, skipna=True): """ values = _values_from_object(values) - mask = isnull(values) + mask = isna(values) if not is_float_dtype(values.dtype): values = values.astype('f8') count = _get_counts(mask, axis) @@ -558,7 +558,7 @@ def nankurt(values, axis=None, skipna=True): """ values = _values_from_object(values) - mask = isnull(values) + mask = isna(values) if not is_float_dtype(values.dtype): values = values.astype('f8') count = _get_counts(mask, axis) @@ -615,7 +615,7 @@ def nankurt(values, axis=None, skipna=True): @disallow('M8', 'm8') def nanprod(values, axis=None, skipna=True): - mask = isnull(values) + mask = isna(values) if skipna and not is_any_int_dtype(values): values = values.copy() values[mask] = 1 @@ -696,7 +696,7 @@ def nancorr(a, b, method='pearson', min_periods=None): if min_periods is None: min_periods = 1 - valid = notnull(a) & notnull(b) + valid = notna(a) & notna(b) if not valid.all(): a = a[valid] b = b[valid] @@ -740,7 +740,7 @@ def nancov(a, b, min_periods=None): if min_periods is None: min_periods = 1 - valid = notnull(a) & notnull(b) + valid = notna(a) & notna(b) if not valid.all(): a = a[valid] b = b[valid] @@ -778,8 +778,8 @@ def _ensure_numeric(x): def make_nancomp(op): def f(x, y): - xmask = isnull(x) - ymask = isnull(y) + xmask = isna(x) + ymask = isna(y) mask = xmask | ymask with np.errstate(all='ignore'): diff --git a/pandas/core/ops.py b/pandas/core/ops.py index bc201be26b756..4e08e1483d617 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -23,7 +23,7 @@ from pandas.errors import PerformanceWarning from pandas.core.common import _values_from_object, _maybe_match_name -from pandas.core.dtypes.missing import notnull, isnull +from pandas.core.dtypes.missing import notna, isna from pandas.core.dtypes.common import ( needs_i8_conversion, is_datetimelike_v_numeric, @@ -465,7 +465,7 @@ def _convert_to_array(self, values, name=None, other=None): # we are in the wrong path if (supplied_dtype is None and other is not None and (other.dtype in ('timedelta64[ns]', 'datetime64[ns]')) and - isnull(values).all()): + isna(values).all()): values = np.empty(values.shape, dtype='timedelta64[ns]') values[:] = iNaT @@ -496,7 +496,7 @@ def _convert_to_array(self, values, name=None, other=None): raise TypeError("incompatible type for a datetime/timedelta " "operation [{0}]".format(name)) elif inferred_type == 'floating': - if (isnull(values).all() and + if (isna(values).all() and name in ('__add__', '__radd__', '__sub__', '__rsub__')): values = np.empty(values.shape, dtype=other.dtype) values[:] = iNaT @@ -512,7 +512,7 @@ def _convert_to_array(self, values, name=None, other=None): def _convert_for_datetime(self, lvalues, rvalues): from pandas.core.tools.timedeltas import to_timedelta - mask = isnull(lvalues) | isnull(rvalues) + mask = isna(lvalues) | isna(rvalues) # datetimes require views if self.is_datetime_lhs or self.is_datetime_rhs: @@ -662,11 +662,11 @@ def na_op(x, y): if isinstance(y, (np.ndarray, ABCSeries, pd.Index)): dtype = find_common_type([x.dtype, y.dtype]) result = np.empty(x.size, dtype=dtype) - mask = notnull(x) & notnull(y) + mask = notna(x) & notna(y) result[mask] = op(x[mask], _values_from_object(y[mask])) elif isinstance(x, np.ndarray): result = np.empty(len(x), dtype=x.dtype) - mask = notnull(x) + mask = notna(x) result[mask] = op(x[mask], y) else: raise TypeError("{typ} cannot perform the operation " @@ -776,7 +776,7 @@ def na_op(x, y): raise TypeError("invalid type comparison") # numpy does not like comparisons vs None - if is_scalar(y) and isnull(y): + if is_scalar(y) and isna(y): if name == '__ne__': return np.ones(len(x), dtype=bool) else: @@ -788,10 +788,10 @@ def na_op(x, y): (not is_scalar(y) and needs_i8_conversion(y))): if is_scalar(y): - mask = isnull(x) + mask = isna(x) y = libindex.convert_scalar(x, _values_from_object(y)) else: - mask = isnull(x) | isnull(y) + mask = isna(x) | isna(y) y = y.view('i8') x = x.view('i8') @@ -898,7 +898,7 @@ def na_op(x, y): try: # let null fall thru - if not isnull(y): + if not isna(y): y = bool(y) result = lib.scalar_binop(x, y, op) except: @@ -1182,7 +1182,7 @@ def na_op(x, y): dtype = np.find_common_type([x.dtype, y.dtype], []) result = np.empty(x.size, dtype=dtype) yrav = y.ravel() - mask = notnull(xrav) & notnull(yrav) + mask = notna(xrav) & notna(yrav) xrav = xrav[mask] # we may need to manually @@ -1197,7 +1197,7 @@ def na_op(x, y): result[mask] = op(xrav, yrav) elif hasattr(x, 'size'): result = np.empty(x.size, dtype=x.dtype) - mask = notnull(xrav) + mask = notna(xrav) xrav = xrav[mask] if np.prod(xrav.shape): with np.errstate(all='ignore'): @@ -1259,11 +1259,11 @@ def na_op(x, y): result = np.empty(x.size, dtype=bool) if isinstance(y, (np.ndarray, ABCSeries)): yrav = y.ravel() - mask = notnull(xrav) & notnull(yrav) + mask = notna(xrav) & notna(yrav) result[mask] = op(np.array(list(xrav[mask])), np.array(list(yrav[mask]))) else: - mask = notnull(xrav) + mask = notna(xrav) result[mask] = op(np.array(list(xrav[mask])), y) if op == operator.ne: # pragma: no cover @@ -1335,7 +1335,7 @@ def na_op(x, y): # TODO: might need to find_common_type here? result = np.empty(len(x), dtype=x.dtype) - mask = notnull(x) + mask = notna(x) result[mask] = op(x[mask], y) result, changed = maybe_upcast_putmask(result, ~mask, np.nan) @@ -1365,11 +1365,11 @@ def na_op(x, y): result = np.empty(x.size, dtype=bool) if isinstance(y, np.ndarray): yrav = y.ravel() - mask = notnull(xrav) & notnull(yrav) + mask = notna(xrav) & notna(yrav) result[mask] = op(np.array(list(xrav[mask])), np.array(list(yrav[mask]))) else: - mask = notnull(xrav) + mask = notna(xrav) result[mask] = op(np.array(list(xrav[mask])), y) if op == operator.ne: # pragma: no cover diff --git a/pandas/core/panel.py b/pandas/core/panel.py index 609bf3186344a..e4515efe109c5 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -13,7 +13,7 @@ from pandas.core.dtypes.common import ( is_integer, is_list_like, is_string_like, is_scalar) -from pandas.core.dtypes.missing import notnull +from pandas.core.dtypes.missing import notna import pandas.core.computation.expressions as expressions import pandas.core.common as com @@ -685,7 +685,7 @@ def dropna(self, axis=0, how='any', inplace=False): axis = self._get_axis_number(axis) values = self.values - mask = notnull(values) + mask = notna(values) for ax in reversed(sorted(set(range(self._AXIS_LEN)) - set([axis]))): mask = mask.sum(ax) @@ -907,7 +907,7 @@ def to_frame(self, filter_observations=True): if filter_observations: # shaped like the return DataFrame - mask = notnull(self.values).all(axis=0) + mask = notna(self.values).all(axis=0) # size = mask.sum() selector = mask.ravel() else: diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index 890555477425d..c2fb81178433e 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -175,7 +175,7 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean', if margins: if dropna: - data = data[data.notnull().all(axis=1)] + data = data[data.notna().all(axis=1)] table = _add_margins(table, data, values, rows=index, cols=columns, aggfunc=aggfunc, margins_name=margins_name) diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index dcb83d225699d..b7638471f2ad0 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -12,7 +12,7 @@ is_list_like, is_bool_dtype, needs_i8_conversion) from pandas.core.dtypes.cast import maybe_promote -from pandas.core.dtypes.missing import notnull +from pandas.core.dtypes.missing import notna import pandas.core.dtypes.concat as _concat from pandas.core.series import Series @@ -547,7 +547,7 @@ def factorize(index): new_values = frame.values.ravel() if dropna: - mask = notnull(new_values) + mask = notna(new_values) new_values = new_values[mask] new_index = new_index[mask] return Series(new_values, index=new_index) @@ -835,7 +835,7 @@ def lreshape(data, groups, dropna=True, label=None): if dropna: mask = np.ones(len(mdata[pivot_cols[0]]), dtype=bool) for c in pivot_cols: - mask &= notnull(mdata[c]) + mask &= notna(mdata[c]) if not mask.all(): mdata = dict((k, v[mask]) for k, v in compat.iteritems(mdata)) diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py index d8398023a5083..1cb39faa2e869 100644 --- a/pandas/core/reshape/tile.py +++ b/pandas/core/reshape/tile.py @@ -2,7 +2,7 @@ Quantilization functions and related stuff """ -from pandas.core.dtypes.missing import isnull +from pandas.core.dtypes.missing import isna from pandas.core.dtypes.common import ( is_integer, is_scalar, @@ -241,7 +241,7 @@ def _bins_to_cuts(x, bins, right=True, labels=None, if include_lowest: ids[x == bins[0]] = 1 - na_mask = isnull(x) | (ids == len(bins)) | (ids == 0) + na_mask = isna(x) | (ids == len(bins)) | (ids == 0) has_nas = na_mask.any() if labels is not False: diff --git a/pandas/core/series.py b/pandas/core/series.py index c7ead292c8b63..fb5819b2748a0 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -36,7 +36,8 @@ maybe_upcast, infer_dtype_from_scalar, maybe_convert_platform, maybe_cast_to_datetime, maybe_castable) -from pandas.core.dtypes.missing import isnull, notnull, remove_na_arraylike +from pandas.core.dtypes.missing import isna, notna, remove_na_arraylike + from pandas.core.common import (is_bool_indexer, _default_index, _asarray_tuplesafe, @@ -745,7 +746,7 @@ def setitem(key, value): pass elif is_timedelta64_dtype(self.dtype): # reassign a null value to iNaT - if isnull(value): + if isna(value): value = iNaT try: @@ -1226,7 +1227,7 @@ def count(self, level=None): from pandas.core.index import _get_na_value if level is None: - return notnull(_values_from_object(self)).sum() + return notna(_values_from_object(self)).sum() if isinstance(level, compat.string_types): level = self.index._get_level_number(level) @@ -1239,7 +1240,7 @@ def count(self, level=None): lab[mask] = cnt = len(lev) lev = lev.insert(cnt, _get_na_value(lev.dtype.type)) - obs = lab[notnull(self.values)] + obs = lab[notna(self.values)] out = np.bincount(obs, minlength=len(lev) or None) return self._constructor(out, index=lev, dtype='int64').__finalize__(self) @@ -1665,8 +1666,8 @@ def _binop(self, other, func, level=None, fill_value=None): other_vals = other.values if fill_value is not None: - this_mask = isnull(this_vals) - other_mask = isnull(other_vals) + this_mask = isna(this_vals) + other_mask = isna(other_vals) this_vals = this_vals.copy() other_vals = other_vals.copy() @@ -1735,7 +1736,7 @@ def combine_first(self, other): other = other.reindex(new_index, copy=False) # TODO: do we need name? name = _maybe_match_name(self, other) # noqa - rs_vals = com._where_compat(isnull(this), other._values, this._values) + rs_vals = com._where_compat(isna(this), other._values, this._values) return self._constructor(rs_vals, index=new_index).__finalize__(self) def update(self, other): @@ -1748,7 +1749,7 @@ def update(self, other): other : Series """ other = other.reindex_like(self) - mask = notnull(other) + mask = notna(other) self._data = self._data.putmask(mask=mask, new=other, inplace=True) self._maybe_update_cacher() @@ -1781,7 +1782,7 @@ def _try_kind_sort(arr): arr = self._values sortedIdx = np.empty(len(self), dtype=np.int32) - bad = isnull(arr) + bad = isna(arr) good = ~bad idx = _default_index(len(self)) @@ -1886,7 +1887,7 @@ def argsort(self, axis=0, kind='quicksort', order=None): numpy.ndarray.argsort """ values = self._values - mask = isnull(values) + mask = isna(values) if mask.any(): result = Series(-1, index=self.index, name=self.name, @@ -2215,7 +2216,7 @@ def map(self, arg, na_action=None): if na_action == 'ignore': def map_f(values, f): return lib.map_infer_mask(values, f, - isnull(values).view(np.uint8)) + isna(values).view(np.uint8)) else: map_f = lib.map_infer @@ -2783,6 +2784,22 @@ def to_excel(self, excel_writer, sheet_name='Sheet1', na_rep='', merge_cells=merge_cells, encoding=encoding, inf_rep=inf_rep, verbose=verbose) + @Appender(generic._shared_docs['isna'] % _shared_doc_kwargs) + def isna(self): + return super(Series, self).isna() + + @Appender(generic._shared_docs['isna'] % _shared_doc_kwargs) + def isnull(self): + return super(Series, self).isnull() + + @Appender(generic._shared_docs['isna'] % _shared_doc_kwargs) + def notna(self): + return super(Series, self).notna() + + @Appender(generic._shared_docs['notna'] % _shared_doc_kwargs) + def notnull(self): + return super(Series, self).notnull() + def dropna(self, axis=0, inplace=False, **kwargs): """ Return Series without null values @@ -2824,7 +2841,7 @@ def first_valid_index(self): if len(self) == 0: return None - mask = isnull(self._values) + mask = isna(self._values) i = mask.argmin() if mask[i]: return None @@ -2838,7 +2855,7 @@ def last_valid_index(self): if len(self) == 0: return None - mask = isnull(self._values[::-1]) + mask = isna(self._values[::-1]) i = mask.argmin() if mask[i]: return None @@ -3010,7 +3027,7 @@ def _try_cast(arr, take_fast_path): # possibility of nan -> garbage if is_float_dtype(data.dtype) and is_integer_dtype(dtype): - if not isnull(data).any(): + if not isna(data).any(): subarr = _try_cast(data, True) elif copy: subarr = data.copy() diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py index 44a27bb5cbae1..12e8d8aba9177 100644 --- a/pandas/core/sorting.py +++ b/pandas/core/sorting.py @@ -9,7 +9,7 @@ is_list_like, is_categorical_dtype) from pandas.core.dtypes.cast import infer_dtype_from_array -from pandas.core.dtypes.missing import isnull +from pandas.core.dtypes.missing import isna import pandas.core.algorithms as algorithms from pandas._libs import lib, algos, hashtable from pandas._libs.hashtable import unique_label_indices @@ -239,7 +239,7 @@ def nargsort(items, kind='quicksort', ascending=True, na_position='last'): items = np.asanyarray(items) idx = np.arange(len(items)) - mask = isnull(items) + mask = isna(items) non_nans = items[~mask] non_nan_idx = idx[~mask] nan_idx = np.nonzero(mask)[0] diff --git a/pandas/core/sparse/array.py b/pandas/core/sparse/array.py index 42fc5189eebd8..4a12dd1af28c9 100644 --- a/pandas/core/sparse/array.py +++ b/pandas/core/sparse/array.py @@ -27,7 +27,7 @@ from pandas.core.dtypes.cast import ( maybe_convert_platform, maybe_promote, astype_nansafe, find_common_type) -from pandas.core.dtypes.missing import isnull, notnull, na_value_for_dtype +from pandas.core.dtypes.missing import isna, notna, na_value_for_dtype import pandas._libs.sparse as splib from pandas._libs.sparse import SparseIndex, BlockIndex, IntIndex @@ -579,12 +579,12 @@ def count(self): @property def _null_fill_value(self): - return isnull(self.fill_value) + return isna(self.fill_value) @property def _valid_sp_values(self): sp_vals = self.sp_values - mask = notnull(sp_vals) + mask = notna(sp_vals) return sp_vals[mask] @Appender(_index_shared_docs['fillna'] % _sparray_doc_kwargs) @@ -595,7 +595,7 @@ def fillna(self, value, downcast=None): if issubclass(self.dtype.type, np.floating): value = float(value) - new_values = np.where(isnull(self.sp_values), value, self.sp_values) + new_values = np.where(isna(self.sp_values), value, self.sp_values) fill_value = value if self._null_fill_value else self.fill_value return self._simple_new(new_values, self.sp_index, @@ -687,7 +687,7 @@ def value_counts(self, dropna=True): pass else: if self._null_fill_value: - mask = pd.isnull(keys) + mask = pd.isna(keys) else: mask = keys == self.fill_value @@ -767,8 +767,8 @@ def make_sparse(arr, kind='block', fill_value=None): if fill_value is None: fill_value = na_value_for_dtype(arr.dtype) - if isnull(fill_value): - mask = notnull(arr) + if isna(fill_value): + mask = notna(arr) else: # For str arrays in NumPy 1.12.0, operator!= below isn't # element-wise but just returns False if fill_value is not str, diff --git a/pandas/core/sparse/frame.py b/pandas/core/sparse/frame.py index 462fb18618949..d8c0aa41edac1 100644 --- a/pandas/core/sparse/frame.py +++ b/pandas/core/sparse/frame.py @@ -10,7 +10,7 @@ from pandas import compat import numpy as np -from pandas.core.dtypes.missing import isnull, notnull +from pandas.core.dtypes.missing import isna, notna from pandas.core.dtypes.cast import maybe_upcast, find_common_type from pandas.core.dtypes.common import _ensure_platform_int, is_scipy_sparse @@ -565,7 +565,7 @@ def _combine_match_index(self, other, func, level=None, fill_value=None, new_data[col] = func(series.values, other.values) # fill_value is a function of our operator - if isnull(other.fill_value) or isnull(self.default_fill_value): + if isna(other.fill_value) or isna(self.default_fill_value): fill_value = np.nan else: fill_value = func(np.float64(self.default_fill_value), @@ -651,7 +651,7 @@ def _reindex_columns(self, columns, method, copy, level, fill_value=None, if level is not None: raise TypeError('Reindex by level not supported for sparse') - if notnull(fill_value): + if notna(fill_value): raise NotImplementedError("'fill_value' argument is not supported") if limit: @@ -785,13 +785,15 @@ def cumsum(self, axis=0, *args, **kwargs): return self.apply(lambda x: x.cumsum(), axis=axis) - @Appender(generic._shared_docs['isnull']) - def isnull(self): - return self._apply_columns(lambda x: x.isnull()) + @Appender(generic._shared_docs['isna']) + def isna(self): + return self._apply_columns(lambda x: x.isna()) + isnull = isna - @Appender(generic._shared_docs['isnotnull']) - def isnotnull(self): - return self._apply_columns(lambda x: x.isnotnull()) + @Appender(generic._shared_docs['notna']) + def notna(self): + return self._apply_columns(lambda x: x.notna()) + notnull = notna def apply(self, func, axis=0, broadcast=False, reduce=False): """ diff --git a/pandas/core/sparse/series.py b/pandas/core/sparse/series.py index 1bc9cf5379930..62d20e73dbfcb 100644 --- a/pandas/core/sparse/series.py +++ b/pandas/core/sparse/series.py @@ -8,7 +8,7 @@ import numpy as np import warnings -from pandas.core.dtypes.missing import isnull, notnull +from pandas.core.dtypes.missing import isna, notna from pandas.core.dtypes.common import is_scalar from pandas.core.common import _values_from_object, _maybe_match_name @@ -172,7 +172,7 @@ def __init__(self, data=None, index=None, sparse_index=None, kind='block', else: length = len(index) - if data == fill_value or (isnull(data) and isnull(fill_value)): + if data == fill_value or (isna(data) and isna(fill_value)): if kind == 'block': sparse_index = BlockIndex(length, [], []) else: @@ -641,19 +641,21 @@ def cumsum(self, axis=0, *args, **kwargs): new_array, index=self.index, sparse_index=new_array.sp_index).__finalize__(self) - @Appender(generic._shared_docs['isnull']) - def isnull(self): - arr = SparseArray(isnull(self.values.sp_values), + @Appender(generic._shared_docs['isna']) + def isna(self): + arr = SparseArray(isna(self.values.sp_values), sparse_index=self.values.sp_index, - fill_value=isnull(self.fill_value)) + fill_value=isna(self.fill_value)) return self._constructor(arr, index=self.index).__finalize__(self) + isnull = isna - @Appender(generic._shared_docs['isnotnull']) - def isnotnull(self): - arr = SparseArray(notnull(self.values.sp_values), + @Appender(generic._shared_docs['notna']) + def notna(self): + arr = SparseArray(notna(self.values.sp_values), sparse_index=self.values.sp_index, - fill_value=notnull(self.fill_value)) + fill_value=notna(self.fill_value)) return self._constructor(arr, index=self.index).__finalize__(self) + notnull = notna def dropna(self, axis=0, inplace=False, **kwargs): """ @@ -665,7 +667,7 @@ def dropna(self, axis=0, inplace=False, **kwargs): if inplace: raise NotImplementedError("Cannot perform inplace dropna" " operations on a SparseSeries") - if isnull(self.fill_value): + if isna(self.fill_value): return dense_valid else: dense_valid = dense_valid[dense_valid != self.fill_value] @@ -677,7 +679,7 @@ def shift(self, periods, freq=None, axis=0): return self.copy() # no special handling of fill values yet - if not isnull(self.fill_value): + if not isna(self.fill_value): shifted = self.to_dense().shift(periods, freq=freq, axis=axis) return shifted.to_sparse(fill_value=self.fill_value, diff --git a/pandas/core/strings.py b/pandas/core/strings.py index cd7e313b13f1e..30465561a911c 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -2,7 +2,7 @@ from pandas.compat import zip from pandas.core.dtypes.generic import ABCSeries, ABCIndex -from pandas.core.dtypes.missing import isnull, notnull +from pandas.core.dtypes.missing import isna, notna from pandas.core.dtypes.common import ( is_bool_dtype, is_categorical_dtype, @@ -101,7 +101,7 @@ def str_cat(arr, others=None, sep=None, na_rep=None): arrays = _get_array_list(arr, others) n = _length_check(arrays) - masks = np.array([isnull(x) for x in arrays]) + masks = np.array([isna(x) for x in arrays]) cats = None if na_rep is None: @@ -129,12 +129,12 @@ def str_cat(arr, others=None, sep=None, na_rep=None): return result else: arr = np.asarray(arr, dtype=object) - mask = isnull(arr) + mask = isna(arr) if na_rep is None and mask.any(): if sep == '': na_rep = '' else: - return sep.join(arr[notnull(arr)]) + return sep.join(arr[notna(arr)]) return sep.join(np.where(mask, na_rep, arr)) @@ -165,7 +165,7 @@ def _map(f, arr, na_mask=False, na_value=np.nan, dtype=object): if not isinstance(arr, np.ndarray): arr = np.asarray(arr, dtype=object) if na_mask: - mask = isnull(arr) + mask = isna(arr) try: convert = not all(mask) result = lib.map_infer_mask(arr, f, mask.view(np.uint8), convert) @@ -1391,7 +1391,7 @@ def __getitem__(self, key): def __iter__(self): i = 0 g = self.get(i) - while g.notnull().any(): + while g.notna().any(): yield g i += 1 g = self.get(i) diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 9c02a6212c412..a1f323aff7c1a 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -18,7 +18,7 @@ from pandas.core.dtypes.generic import ( ABCIndexClass, ABCSeries, ABCDataFrame) -from pandas.core.dtypes.missing import notnull +from pandas.core.dtypes.missing import notna from pandas.core import algorithms import pandas.compat as compat @@ -176,7 +176,7 @@ def _guess_datetime_format(dt_str, dayfirst=False, def _guess_datetime_format_for_array(arr, **kwargs): # Try to guess the format based on the first non-NaN element - non_nan_elements = notnull(arr).nonzero()[0] + non_nan_elements = notna(arr).nonzero()[0] if len(non_nan_elements): return _guess_datetime_format(arr[non_nan_elements[0]], **kwargs) @@ -665,7 +665,7 @@ def calc_with_mask(carg, mask): # a float with actual np.nan try: carg = arg.astype(np.float64) - return calc_with_mask(carg, notnull(carg)) + return calc_with_mask(carg, notna(carg)) except: pass @@ -744,7 +744,7 @@ def parse_time_string(arg, freq=None, dayfirst=None, yearfirst=None): def _guess_time_format_for_array(arr): # Try to guess the format based on the first non-NaN element - non_nan_elements = notnull(arr).nonzero()[0] + non_nan_elements = notna(arr).nonzero()[0] if len(non_nan_elements): element = arr[non_nan_elements[0]] for time_format in _time_formats: diff --git a/pandas/core/util/hashing.py b/pandas/core/util/hashing.py index e41ffae9d03c2..07e993d7ef509 100644 --- a/pandas/core/util/hashing.py +++ b/pandas/core/util/hashing.py @@ -12,7 +12,7 @@ ABCDataFrame) from pandas.core.dtypes.common import ( is_categorical_dtype, is_list_like) -from pandas.core.dtypes.missing import isnull +from pandas.core.dtypes.missing import isna from pandas.core.dtypes.cast import infer_dtype_from_scalar @@ -215,7 +215,7 @@ def _hash_categorical(c, encoding, hash_key): # # TODO: GH 15362 - mask = c.isnull() + mask = c.isna() if len(hashed): result = hashed.take(c.codes) else: @@ -313,7 +313,7 @@ def _hash_scalar(val, encoding='utf8', hash_key=None): 1d uint64 numpy array of hash value, of length 1 """ - if isnull(val): + if isna(val): # this is to be consistent with the _hash_categorical implementation return np.array([np.iinfo(np.uint64).max], dtype='u8') diff --git a/pandas/core/window.py b/pandas/core/window.py index 57611794c375f..5866f1e8a76bd 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -828,7 +828,7 @@ def count(self): results = [] for b in blocks: - result = b.notnull().astype(int) + result = b.notna().astype(int) result = self._constructor(result, window=window, min_periods=0, center=self.center, closed=self.closed).sum() diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 23eb3bb05fd0a..2b322431bd301 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -10,7 +10,7 @@ from textwrap import dedent -from pandas.core.dtypes.missing import isnull, notnull +from pandas.core.dtypes.missing import isna, notna from pandas.core.dtypes.common import ( is_categorical_dtype, is_float_dtype, @@ -1562,7 +1562,7 @@ def __init__(self, obj, path_or_buf=None, sep=",", na_rep='', self.data_index = obj.index if (isinstance(self.data_index, (DatetimeIndex, PeriodIndex)) and date_format is not None): - self.data_index = Index([x.strftime(date_format) if notnull(x) else + self.data_index = Index([x.strftime(date_format) if notna(x) else '' for x in self.data_index]) self.nlevels = getattr(self.data_index, 'nlevels', 1) @@ -1816,7 +1816,7 @@ def _format(x): elif isinstance(vals, ABCSparseArray): vals = vals.values - is_float_type = lib.map_infer(vals, is_float) & notnull(vals) + is_float_type = lib.map_infer(vals, is_float) & notna(vals) leading_space = is_float_type.any() fmt_values = [] @@ -1862,10 +1862,10 @@ def _value_formatter(self, float_format=None, threshold=None): # because str(0.0) = '0.0' while '%g' % 0.0 = '0' if float_format: def base_formatter(v): - return (float_format % v) if notnull(v) else self.na_rep + return (float_format % v) if notna(v) else self.na_rep else: def base_formatter(v): - return str(v) if notnull(v) else self.na_rep + return str(v) if notna(v) else self.na_rep if self.decimal != '.': def decimal_formatter(v): @@ -1877,7 +1877,7 @@ def decimal_formatter(v): return decimal_formatter def formatter(value): - if notnull(value): + if notna(value): if abs(value) > threshold: return decimal_formatter(value) else: @@ -1907,7 +1907,7 @@ def format_values_with(float_format): # separate the wheat from the chaff values = self.values - mask = isnull(values) + mask = isna(values) if hasattr(values, 'to_dense'): # sparse numpy ndarray values = values.to_dense() values = np.array(values, dtype='object') diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index d88a230b42403..445fceb4b8146 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -771,7 +771,7 @@ def set_table_styles(self, table_styles): @staticmethod def _highlight_null(v, null_color): - return 'background-color: %s' % null_color if pd.isnull(v) else '' + return 'background-color: %s' % null_color if pd.isna(v) else '' def highlight_null(self, null_color='red'): """ diff --git a/pandas/io/json/json.py b/pandas/io/json/json.py index 31907ad586817..a1d48719ba9c0 100644 --- a/pandas/io/json/json.py +++ b/pandas/io/json/json.py @@ -5,7 +5,7 @@ import pandas._libs.json as json from pandas._libs.tslib import iNaT from pandas.compat import StringIO, long, u -from pandas import compat, isnull +from pandas import compat, isna from pandas import Series, DataFrame, to_datetime, MultiIndex from pandas.io.common import (get_filepath_or_buffer, _get_handle, _stringify_path) @@ -535,7 +535,7 @@ def _try_convert_to_date(self, data): # ignore numbers that are out of range if issubclass(new_data.dtype.type, np.number): - in_range = (isnull(new_data.values) | (new_data > self.min_stamp) | + in_range = (isna(new_data.values) | (new_data > self.min_stamp) | (new_data.values == iNaT)) if not in_range.all(): return data, False diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 1e7d9d420b35d..9cf0a11a65270 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -21,7 +21,7 @@ is_float, is_dtype_equal, is_object_dtype, is_string_dtype, is_scalar, is_categorical_dtype) -from pandas.core.dtypes.missing import isnull +from pandas.core.dtypes.missing import isna from pandas.core.dtypes.cast import astype_nansafe from pandas.core.index import Index, MultiIndex, RangeIndex from pandas.core.series import Series @@ -1532,7 +1532,7 @@ def _infer_types(self, values, na_values, try_num_bool=True): if try_num_bool: try: result = lib.maybe_convert_numeric(values, na_values, False) - na_count = isnull(result).sum() + na_count = isna(result).sum() except Exception: result = values if values.dtype == np.object_: diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 4e343556c083b..82c80a13372d7 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -25,7 +25,7 @@ import numpy as np from pandas import (Series, DataFrame, Panel, Panel4D, Index, - MultiIndex, Int64Index, isnull, concat, + MultiIndex, Int64Index, isna, concat, SparseSeries, SparseDataFrame, PeriodIndex, DatetimeIndex, TimedeltaIndex) from pandas.core import config @@ -2136,7 +2136,7 @@ def convert(self, values, nan_rep, encoding): # if we have stored a NaN in the categories # then strip it; in theory we could have BOTH # -1s in the codes and nulls :< - mask = isnull(categories) + mask = isna(categories) if mask.any(): categories = categories[~mask] codes[codes != -1] -= mask.astype(int).cumsum().values @@ -3941,7 +3941,7 @@ def write_data(self, chunksize, dropna=False): # figure the mask: only do if we can successfully process this # column, otherwise ignore the mask - mask = isnull(a.data).all(axis=0) + mask = isna(a.data).all(axis=0) if isinstance(mask, np.ndarray): masks.append(mask.astype('u1', copy=False)) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 0dbef66616e43..9aa47e5c69850 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -12,7 +12,7 @@ import numpy as np import pandas._libs.lib as lib -from pandas.core.dtypes.missing import isnull +from pandas.core.dtypes.missing import isna from pandas.core.dtypes.dtypes import DatetimeTZDtype from pandas.core.dtypes.common import ( is_list_like, is_dict_like, @@ -632,7 +632,7 @@ def insert_data(self): # replace NaN with None if b._can_hold_na: - mask = isnull(d) + mask = isna(d) d[mask] = None for col_loc, col in zip(b.mgr_locs, d): @@ -845,7 +845,7 @@ def _harmonize_columns(self, parse_dates=None): except KeyError: pass # this column not in results - def _get_notnull_col_dtype(self, col): + def _get_notna_col_dtype(self, col): """ Infer datatype of the Series col. In case the dtype of col is 'object' and it contains NA values, this infers the datatype of the not-NA @@ -853,9 +853,9 @@ def _get_notnull_col_dtype(self, col): """ col_for_inference = col if col.dtype == 'object': - notnulldata = col[~isnull(col)] - if len(notnulldata): - col_for_inference = notnulldata + notnadata = col[~isna(col)] + if len(notnadata): + col_for_inference = notnadata return lib.infer_dtype(col_for_inference) @@ -865,7 +865,7 @@ def _sqlalchemy_type(self, col): if col.name in dtype: return self.dtype[col.name] - col_type = self._get_notnull_col_dtype(col) + col_type = self._get_notna_col_dtype(col) from sqlalchemy.types import (BigInteger, Integer, Float, Text, Boolean, @@ -1345,7 +1345,7 @@ def _sql_type_name(self, col): if col.name in dtype: return dtype[col.name] - col_type = self._get_notnull_col_dtype(col) + col_type = self._get_notna_col_dtype(col) if col_type == 'timedelta64': warnings.warn("the 'timedelta' type is not supported, and will be " "written as integer values (ns frequency) to the " diff --git a/pandas/io/stata.py b/pandas/io/stata.py index 30991d8a24c63..253ed03c25db9 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -24,7 +24,7 @@ from pandas.core.frame import DataFrame from pandas.core.series import Series import datetime -from pandas import compat, to_timedelta, to_datetime, isnull, DatetimeIndex +from pandas import compat, to_timedelta, to_datetime, isna, DatetimeIndex from pandas.compat import lrange, lmap, lzip, text_type, string_types, range, \ zip, BytesIO from pandas.util._decorators import Appender @@ -402,7 +402,7 @@ def parse_dates_safe(dates, delta=False, year=False, days=False): return DataFrame(d, index=index) - bad_loc = isnull(dates) + bad_loc = isna(dates) index = dates.index if bad_loc.any(): dates = Series(dates) diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index de96d17da2a9f..b8d7cebe8a274 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -11,14 +11,14 @@ from pandas.util._decorators import cache_readonly from pandas.core.base import PandasObject -from pandas.core.dtypes.missing import notnull, remove_na_arraylike +from pandas.core.dtypes.missing import isna, notna, remove_na_arraylike from pandas.core.dtypes.common import ( is_list_like, is_integer, is_number, is_hashable, is_iterator) -from pandas.core.common import AbstractMethodError, isnull, _try_sort +from pandas.core.common import AbstractMethodError, _try_sort from pandas.core.generic import _shared_docs, _shared_doc_kwargs from pandas.core.index import Index, MultiIndex from pandas.core.series import Series @@ -554,7 +554,7 @@ def _get_xticks(self, convert_period=False): """ x = index._mpl_repr() elif is_datetype: - self.data = self.data[notnull(self.data.index)] + self.data = self.data[notna(self.data.index)] self.data = self.data.sort_index() x = self.data.index._mpl_repr() else: @@ -567,7 +567,7 @@ def _get_xticks(self, convert_period=False): @classmethod def _plot(cls, ax, x, y, style=None, is_errorbar=False, **kwds): - mask = isnull(y) + mask = isna(y) if mask.any(): y = np.ma.array(y) y = np.ma.masked_where(mask, y) @@ -1290,7 +1290,7 @@ def _args_adjust(self): # create common bin edge values = (self.data._convert(datetime=True)._get_numeric_data()) values = np.ravel(values) - values = values[~isnull(values)] + values = values[~isna(values)] hist, self.bins = np.histogram( values, bins=self.bins, @@ -1305,7 +1305,7 @@ def _plot(cls, ax, y, style=None, bins=None, bottom=0, column_num=0, stacking_id=None, **kwds): if column_num == 0: cls._initialize_stacker(ax, stacking_id, len(bins) - 1) - y = y[~isnull(y)] + y = y[~isna(y)] base = np.zeros(len(bins) - 1) bottom = bottom + \ diff --git a/pandas/plotting/_misc.py b/pandas/plotting/_misc.py index 20ada033c0f58..db2211fb55135 100644 --- a/pandas/plotting/_misc.py +++ b/pandas/plotting/_misc.py @@ -5,7 +5,7 @@ import numpy as np from pandas.util._decorators import deprecate_kwarg -from pandas.core.dtypes.missing import notnull +from pandas.core.dtypes.missing import notna from pandas.compat import range, lrange, lmap, zip from pandas.io.formats.printing import pprint_thing @@ -62,7 +62,7 @@ def scatter_matrix(frame, alpha=0.5, figsize=None, ax=None, grid=False, # no gaps between subplots fig.subplots_adjust(wspace=0, hspace=0) - mask = notnull(df) + mask = notna(df) marker = _get_marker_compat(marker) diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index b1652cf6eb6db..433ed7e517b1c 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -64,8 +64,8 @@ class TestPDApi(Base): funcs = ['bdate_range', 'concat', 'crosstab', 'cut', 'date_range', 'interval_range', 'eval', 'factorize', 'get_dummies', - 'infer_freq', 'isnull', 'lreshape', - 'melt', 'notnull', 'offsets', + 'infer_freq', 'isna', 'isnull', 'lreshape', + 'melt', 'notna', 'notnull', 'offsets', 'merge', 'merge_ordered', 'merge_asof', 'period_range', 'pivot', 'pivot_table', 'qcut', @@ -88,6 +88,9 @@ class TestPDApi(Base): funcs_to = ['to_datetime', 'to_msgpack', 'to_numeric', 'to_pickle', 'to_timedelta'] + # top-level to deprecate in the future + deprecated_funcs_in_future = [] + # these are already deprecated; awaiting removal deprecated_funcs = ['ewma', 'ewmcorr', 'ewmcov', 'ewmstd', 'ewmvar', 'ewmvol', 'expanding_apply', 'expanding_corr', @@ -113,6 +116,7 @@ def test_api(self): self.deprecated_classes_in_future + self.funcs + self.funcs_option + self.funcs_read + self.funcs_to + + self.deprecated_funcs_in_future + self.deprecated_funcs, self.ignored) diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index ec5fe45d7f610..d26ea047bb41f 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -18,7 +18,7 @@ from pandas._libs import tslib, lib from pandas import (Series, Index, DataFrame, Timedelta, DatetimeIndex, TimedeltaIndex, Timestamp, - Panel, Period, Categorical) + Panel, Period, Categorical, isna) from pandas.compat import u, PY2, PY3, StringIO, lrange from pandas.core.dtypes import inference from pandas.core.dtypes.common import ( @@ -36,7 +36,6 @@ is_scipy_sparse, _ensure_int32, _ensure_categorical) -from pandas.core.dtypes.missing import isnull from pandas.util import testing as tm @@ -1014,7 +1013,7 @@ def test_nan_to_nat_conversions(): s = df['B'].copy() s._data = s._data.setitem(indexer=tuple([slice(8, 9)]), value=np.nan) - assert (isnull(s[8])) + assert (isna(s[8])) # numpy < 1.7.0 is wrong from distutils.version import LooseVersion diff --git a/pandas/tests/dtypes/test_missing.py b/pandas/tests/dtypes/test_missing.py index ea4f5da04a271..d3c9ca51af18f 100644 --- a/pandas/tests/dtypes/test_missing.py +++ b/pandas/tests/dtypes/test_missing.py @@ -15,151 +15,153 @@ from pandas.core.dtypes.common import is_scalar from pandas.core.dtypes.dtypes import DatetimeTZDtype from pandas.core.dtypes.missing import ( - array_equivalent, isnull, notnull, + array_equivalent, isna, notna, isnull, notnull, na_value_for_dtype) -def test_notnull(): - assert notnull(1.) - assert not notnull(None) - assert not notnull(np.NaN) +@pytest.mark.parametrize('notna_f', [notna, notnull]) +def test_notna_notnull(notna_f): + assert notna_f(1.) + assert not notna_f(None) + assert not notna_f(np.NaN) - with cf.option_context("mode.use_inf_as_null", False): - assert notnull(np.inf) - assert notnull(-np.inf) + with cf.option_context("mode.use_inf_as_na", False): + assert notna_f(np.inf) + assert notna_f(-np.inf) arr = np.array([1.5, np.inf, 3.5, -np.inf]) - result = notnull(arr) + result = notna_f(arr) assert result.all() - with cf.option_context("mode.use_inf_as_null", True): - assert not notnull(np.inf) - assert not notnull(-np.inf) + with cf.option_context("mode.use_inf_as_na", True): + assert not notna_f(np.inf) + assert not notna_f(-np.inf) arr = np.array([1.5, np.inf, 3.5, -np.inf]) - result = notnull(arr) + result = notna_f(arr) assert result.sum() == 2 - with cf.option_context("mode.use_inf_as_null", False): + with cf.option_context("mode.use_inf_as_na", False): for s in [tm.makeFloatSeries(), tm.makeStringSeries(), tm.makeObjectSeries(), tm.makeTimeSeries(), tm.makePeriodSeries()]: - assert (isinstance(isnull(s), Series)) + assert (isinstance(notna_f(s), Series)) -class TestIsNull(object): +class TestIsNA(object): def test_0d_array(self): - assert isnull(np.array(np.nan)) - assert not isnull(np.array(0.0)) - assert not isnull(np.array(0)) + assert isna(np.array(np.nan)) + assert not isna(np.array(0.0)) + assert not isna(np.array(0)) # test object dtype - assert isnull(np.array(np.nan, dtype=object)) - assert not isnull(np.array(0.0, dtype=object)) - assert not isnull(np.array(0, dtype=object)) + assert isna(np.array(np.nan, dtype=object)) + assert not isna(np.array(0.0, dtype=object)) + assert not isna(np.array(0, dtype=object)) def test_empty_object(self): for shape in [(4, 0), (4,)]: arr = np.empty(shape=shape, dtype=object) - result = isnull(arr) + result = isna(arr) expected = np.ones(shape=shape, dtype=bool) tm.assert_numpy_array_equal(result, expected) - def test_isnull(self): - assert not isnull(1.) - assert isnull(None) - assert isnull(np.NaN) + @pytest.mark.parametrize('isna_f', [isna, isnull]) + def test_isna_isnull(self, isna_f): + assert not isna_f(1.) + assert isna_f(None) + assert isna_f(np.NaN) assert float('nan') - assert not isnull(np.inf) - assert not isnull(-np.inf) + assert not isna_f(np.inf) + assert not isna_f(-np.inf) # series for s in [tm.makeFloatSeries(), tm.makeStringSeries(), tm.makeObjectSeries(), tm.makeTimeSeries(), tm.makePeriodSeries()]: - assert isinstance(isnull(s), Series) + assert isinstance(isna_f(s), Series) # frame for df in [tm.makeTimeDataFrame(), tm.makePeriodFrame(), tm.makeMixedDataFrame()]: - result = isnull(df) - expected = df.apply(isnull) + result = isna_f(df) + expected = df.apply(isna_f) tm.assert_frame_equal(result, expected) # panel with catch_warnings(record=True): for p in [tm.makePanel(), tm.makePeriodPanel(), tm.add_nans(tm.makePanel())]: - result = isnull(p) - expected = p.apply(isnull) + result = isna_f(p) + expected = p.apply(isna_f) tm.assert_panel_equal(result, expected) # panel 4d with catch_warnings(record=True): for p in [tm.makePanel4D(), tm.add_nans_panel4d(tm.makePanel4D())]: - result = isnull(p) - expected = p.apply(isnull) + result = isna_f(p) + expected = p.apply(isna_f) tm.assert_panel4d_equal(result, expected) - def test_isnull_lists(self): - result = isnull([[False]]) + def test_isna_lists(self): + result = isna([[False]]) exp = np.array([[False]]) tm.assert_numpy_array_equal(result, exp) - result = isnull([[1], [2]]) + result = isna([[1], [2]]) exp = np.array([[False], [False]]) tm.assert_numpy_array_equal(result, exp) # list of strings / unicode - result = isnull(['foo', 'bar']) + result = isna(['foo', 'bar']) exp = np.array([False, False]) tm.assert_numpy_array_equal(result, exp) - result = isnull([u('foo'), u('bar')]) + result = isna([u('foo'), u('bar')]) exp = np.array([False, False]) tm.assert_numpy_array_equal(result, exp) - def test_isnull_nat(self): - result = isnull([NaT]) + def test_isna_nat(self): + result = isna([NaT]) exp = np.array([True]) tm.assert_numpy_array_equal(result, exp) - result = isnull(np.array([NaT], dtype=object)) + result = isna(np.array([NaT], dtype=object)) exp = np.array([True]) tm.assert_numpy_array_equal(result, exp) - def test_isnull_numpy_nat(self): + def test_isna_numpy_nat(self): arr = np.array([NaT, np.datetime64('NaT'), np.timedelta64('NaT'), np.datetime64('NaT', 's')]) - result = isnull(arr) + result = isna(arr) expected = np.array([True] * 4) tm.assert_numpy_array_equal(result, expected) - def test_isnull_datetime(self): - assert not isnull(datetime.now()) - assert notnull(datetime.now()) + def test_isna_datetime(self): + assert not isna(datetime.now()) + assert notna(datetime.now()) idx = date_range('1/1/1990', periods=20) exp = np.ones(len(idx), dtype=bool) - tm.assert_numpy_array_equal(notnull(idx), exp) + tm.assert_numpy_array_equal(notna(idx), exp) idx = np.asarray(idx) idx[0] = iNaT idx = DatetimeIndex(idx) - mask = isnull(idx) + mask = isna(idx) assert mask[0] exp = np.array([True] + [False] * (len(idx) - 1), dtype=bool) tm.assert_numpy_array_equal(mask, exp) # GH 9129 pidx = idx.to_period(freq='M') - mask = isnull(pidx) + mask = isna(pidx) assert mask[0] exp = np.array([True] + [False] * (len(idx) - 1), dtype=bool) tm.assert_numpy_array_equal(mask, exp) - mask = isnull(pidx[1:]) + mask = isna(pidx[1:]) exp = np.zeros(len(mask), dtype=bool) tm.assert_numpy_array_equal(mask, exp) @@ -174,7 +176,7 @@ def test_isnull_datetime(self): (np.array([1, 1 + 0j, np.nan, 3]).astype(object), np.array([False, False, True, False]))]) def test_complex(self, value, expected): - result = isnull(value) + result = isna(value) if is_scalar(result): assert result is expected else: @@ -183,10 +185,10 @@ def test_complex(self, value, expected): def test_datetime_other_units(self): idx = pd.DatetimeIndex(['2011-01-01', 'NaT', '2011-01-02']) exp = np.array([False, True, False]) - tm.assert_numpy_array_equal(isnull(idx), exp) - tm.assert_numpy_array_equal(notnull(idx), ~exp) - tm.assert_numpy_array_equal(isnull(idx.values), exp) - tm.assert_numpy_array_equal(notnull(idx.values), ~exp) + tm.assert_numpy_array_equal(isna(idx), exp) + tm.assert_numpy_array_equal(notna(idx), ~exp) + tm.assert_numpy_array_equal(isna(idx.values), exp) + tm.assert_numpy_array_equal(notna(idx.values), ~exp) for dtype in ['datetime64[D]', 'datetime64[h]', 'datetime64[m]', 'datetime64[s]', 'datetime64[ms]', 'datetime64[us]', @@ -194,24 +196,24 @@ def test_datetime_other_units(self): values = idx.values.astype(dtype) exp = np.array([False, True, False]) - tm.assert_numpy_array_equal(isnull(values), exp) - tm.assert_numpy_array_equal(notnull(values), ~exp) + tm.assert_numpy_array_equal(isna(values), exp) + tm.assert_numpy_array_equal(notna(values), ~exp) exp = pd.Series([False, True, False]) s = pd.Series(values) - tm.assert_series_equal(isnull(s), exp) - tm.assert_series_equal(notnull(s), ~exp) + tm.assert_series_equal(isna(s), exp) + tm.assert_series_equal(notna(s), ~exp) s = pd.Series(values, dtype=object) - tm.assert_series_equal(isnull(s), exp) - tm.assert_series_equal(notnull(s), ~exp) + tm.assert_series_equal(isna(s), exp) + tm.assert_series_equal(notna(s), ~exp) def test_timedelta_other_units(self): idx = pd.TimedeltaIndex(['1 days', 'NaT', '2 days']) exp = np.array([False, True, False]) - tm.assert_numpy_array_equal(isnull(idx), exp) - tm.assert_numpy_array_equal(notnull(idx), ~exp) - tm.assert_numpy_array_equal(isnull(idx.values), exp) - tm.assert_numpy_array_equal(notnull(idx.values), ~exp) + tm.assert_numpy_array_equal(isna(idx), exp) + tm.assert_numpy_array_equal(notna(idx), ~exp) + tm.assert_numpy_array_equal(isna(idx.values), exp) + tm.assert_numpy_array_equal(notna(idx.values), ~exp) for dtype in ['timedelta64[D]', 'timedelta64[h]', 'timedelta64[m]', 'timedelta64[s]', 'timedelta64[ms]', 'timedelta64[us]', @@ -219,30 +221,30 @@ def test_timedelta_other_units(self): values = idx.values.astype(dtype) exp = np.array([False, True, False]) - tm.assert_numpy_array_equal(isnull(values), exp) - tm.assert_numpy_array_equal(notnull(values), ~exp) + tm.assert_numpy_array_equal(isna(values), exp) + tm.assert_numpy_array_equal(notna(values), ~exp) exp = pd.Series([False, True, False]) s = pd.Series(values) - tm.assert_series_equal(isnull(s), exp) - tm.assert_series_equal(notnull(s), ~exp) + tm.assert_series_equal(isna(s), exp) + tm.assert_series_equal(notna(s), ~exp) s = pd.Series(values, dtype=object) - tm.assert_series_equal(isnull(s), exp) - tm.assert_series_equal(notnull(s), ~exp) + tm.assert_series_equal(isna(s), exp) + tm.assert_series_equal(notna(s), ~exp) def test_period(self): idx = pd.PeriodIndex(['2011-01', 'NaT', '2012-01'], freq='M') exp = np.array([False, True, False]) - tm.assert_numpy_array_equal(isnull(idx), exp) - tm.assert_numpy_array_equal(notnull(idx), ~exp) + tm.assert_numpy_array_equal(isna(idx), exp) + tm.assert_numpy_array_equal(notna(idx), ~exp) exp = pd.Series([False, True, False]) s = pd.Series(idx) - tm.assert_series_equal(isnull(s), exp) - tm.assert_series_equal(notnull(s), ~exp) + tm.assert_series_equal(isna(s), exp) + tm.assert_series_equal(notna(s), ~exp) s = pd.Series(idx, dtype=object) - tm.assert_series_equal(isnull(s), exp) - tm.assert_series_equal(notnull(s), ~exp) + tm.assert_series_equal(isna(s), exp) + tm.assert_series_equal(notna(s), ~exp) def test_array_equivalent(): diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index da1c68005b9b2..484a09f11b58a 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -13,7 +13,7 @@ import numpy as np from pandas.compat import lrange, product -from pandas import (compat, isnull, notnull, DataFrame, Series, +from pandas import (compat, isna, notna, DataFrame, Series, MultiIndex, date_range, Timestamp) import pandas as pd import pandas.core.nanops as nanops @@ -81,11 +81,11 @@ def test_corr_nooverlap(self): 'C': [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan]}) rs = df.corr(meth) - assert isnull(rs.loc['A', 'B']) - assert isnull(rs.loc['B', 'A']) + assert isna(rs.loc['A', 'B']) + assert isna(rs.loc['B', 'A']) assert rs.loc['A', 'A'] == 1 assert rs.loc['B', 'B'] == 1 - assert isnull(rs.loc['C', 'C']) + assert isna(rs.loc['C', 'C']) def test_corr_constant(self): tm._skip_if_no_scipy() @@ -96,7 +96,7 @@ def test_corr_constant(self): df = DataFrame({'A': [1, 1, 1, np.nan, np.nan, np.nan], 'B': [np.nan, np.nan, np.nan, 1, 1, 1]}) rs = df.corr(meth) - assert isnull(rs.values).all() + assert isna(rs.values).all() def test_corr_int(self): # dtypes other than float64 #1761 @@ -136,7 +136,7 @@ def test_cov(self): tm.assert_frame_equal(expected, result) result = self.frame.cov(min_periods=len(self.frame) + 1) - assert isnull(result.values).all() + assert isna(result.values).all() # with NAs frame = self.frame.copy() @@ -389,7 +389,7 @@ def test_reduce_mixed_frame(self): tm.assert_series_equal(test, df.T.sum(axis=1)) def test_count(self): - f = lambda s: notnull(s).sum() + f = lambda s: notna(s).sum() self._check_stat_op('count', f, has_skipna=False, has_numeric_only=True, @@ -477,7 +477,7 @@ def test_product(self): def test_median(self): def wrapper(x): - if isnull(x).any(): + if isna(x).any(): return np.nan return np.median(x) @@ -974,7 +974,7 @@ def test_stats_mixed_type(self): def test_median_corner(self): def wrapper(x): - if isnull(x).any(): + if isna(x).any(): return np.nan return np.median(x) @@ -998,7 +998,7 @@ def test_cumsum_corner(self): def test_sum_bools(self): df = DataFrame(index=lrange(1), columns=lrange(10)) - bools = isnull(df) + bools = isna(df) assert bools.sum(axis=1)[0] == 10 # Index of max / min diff --git a/pandas/tests/frame/test_apply.py b/pandas/tests/frame/test_apply.py index a6f39cabb60ed..ab2e810d77634 100644 --- a/pandas/tests/frame/test_apply.py +++ b/pandas/tests/frame/test_apply.py @@ -9,7 +9,7 @@ import warnings import numpy as np -from pandas import (notnull, DataFrame, Series, MultiIndex, date_range, +from pandas import (notna, DataFrame, Series, MultiIndex, date_range, Timestamp, compat) import pandas as pd from pandas.core.dtypes.dtypes import CategoricalDtype @@ -278,7 +278,7 @@ def transform(row): return row def transform2(row): - if (notnull(row['C']) and row['C'].startswith('shin') and + if (notna(row['C']) and row['C'].startswith('shin') and row['A'] == 'foo'): row['D'] = 7 return row diff --git a/pandas/tests/frame/test_asof.py b/pandas/tests/frame/test_asof.py index d4e3d541937dc..fea6a5370109e 100644 --- a/pandas/tests/frame/test_asof.py +++ b/pandas/tests/frame/test_asof.py @@ -23,13 +23,13 @@ def test_basic(self): freq='25s') result = df.asof(dates) - assert result.notnull().all(1).all() + assert result.notna().all(1).all() lb = df.index[14] ub = df.index[30] dates = list(dates) result = df.asof(dates) - assert result.notnull().all(1).all() + assert result.notna().all(1).all() mask = (result.index >= lb) & (result.index < ub) rs = result[mask] diff --git a/pandas/tests/frame/test_axis_select_reindex.py b/pandas/tests/frame/test_axis_select_reindex.py index 87d942101f5f1..e76869bf6712b 100644 --- a/pandas/tests/frame/test_axis_select_reindex.py +++ b/pandas/tests/frame/test_axis_select_reindex.py @@ -11,7 +11,7 @@ from pandas.compat import lrange, lzip, u from pandas import (compat, DataFrame, Series, Index, MultiIndex, - date_range, isnull) + date_range, isna) import pandas as pd from pandas.util.testing import assert_frame_equal @@ -852,11 +852,11 @@ def test_reindex_boolean(self): reindexed = frame.reindex(np.arange(10)) assert reindexed.values.dtype == np.object_ - assert isnull(reindexed[0][1]) + assert isna(reindexed[0][1]) reindexed = frame.reindex(columns=lrange(3)) assert reindexed.values.dtype == np.object_ - assert isnull(reindexed[1]).all() + assert isna(reindexed[1]).all() def test_reindex_objects(self): reindexed = self.mixed_frame.reindex(columns=['foo', 'A', 'B']) diff --git a/pandas/tests/frame/test_block_internals.py b/pandas/tests/frame/test_block_internals.py index f66070fd66813..afa3c4f25789a 100644 --- a/pandas/tests/frame/test_block_internals.py +++ b/pandas/tests/frame/test_block_internals.py @@ -533,7 +533,7 @@ def test_stale_cached_series_bug_473(self): repr(Y) result = Y.sum() # noqa exp = Y['g'].sum() # noqa - assert pd.isnull(Y['g']['c']) + assert pd.isna(Y['g']['c']) def test_get_X_columns(self): # numeric and object columns @@ -566,6 +566,6 @@ def test_strange_column_corruption_issue(self): myid = 100 - first = len(df.loc[pd.isnull(df[myid]), [myid]]) - second = len(df.loc[pd.isnull(df[myid]), [myid]]) + first = len(df.loc[pd.isna(df[myid]), [myid]]) + second = len(df.loc[pd.isna(df[myid]), [myid]]) assert first == second == 0 diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 97cf3ce8a7216..d942330ecd8a6 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -17,7 +17,7 @@ from pandas.compat import (lmap, long, zip, range, lrange, lzip, OrderedDict, is_platform_little_endian) from pandas import compat -from pandas import (DataFrame, Index, Series, isnull, +from pandas import (DataFrame, Index, Series, isna, MultiIndex, Timedelta, Timestamp, date_range) import pandas as pd @@ -224,7 +224,7 @@ def test_constructor_dict(self): assert len(frame) == len(self.ts2) assert 'col1' not in frame - assert isnull(frame['col3']).all() + assert isna(frame['col3']).all() # Corner cases assert len(DataFrame({})) == 0 @@ -279,12 +279,12 @@ def test_constructor_multi_index(self): tuples = [(2, 3), (3, 3), (3, 3)] mi = MultiIndex.from_tuples(tuples) df = DataFrame(index=mi, columns=mi) - assert pd.isnull(df).values.ravel().all() + assert pd.isna(df).values.ravel().all() tuples = [(3, 3), (2, 3), (3, 3)] mi = MultiIndex.from_tuples(tuples) df = DataFrame(index=mi, columns=mi) - assert pd.isnull(df).values.ravel().all() + assert pd.isna(df).values.ravel().all() def test_constructor_error_msgs(self): msg = "Empty data passed with indices specified." @@ -625,7 +625,7 @@ def test_constructor_maskedarray_nonfloat(self): assert len(frame.index) == 2 assert len(frame.columns) == 3 - assert isnull(frame).values.all() + assert isna(frame).values.all() # cast type frame = DataFrame(mat, columns=['A', 'B', 'C'], @@ -1496,7 +1496,7 @@ def check(df): df.iloc[:, i] # allow single nans to succeed - indexer = np.arange(len(df.columns))[isnull(df.columns)] + indexer = np.arange(len(df.columns))[isna(df.columns)] if len(indexer) == 1: tm.assert_series_equal(df.iloc[:, indexer[0]], @@ -1966,7 +1966,7 @@ def test_frame_datetime64_mixed_index_ctor_1681(self): # it works! d = DataFrame({'A': 'foo', 'B': ts}, index=dr) - assert d['B'].isnull().all() + assert d['B'].isna().all() def test_frame_timeseries_to_records(self): index = date_range('1/1/2000', periods=10) diff --git a/pandas/tests/frame/test_dtypes.py b/pandas/tests/frame/test_dtypes.py index 065580d56a683..5941b2ab7c2cb 100644 --- a/pandas/tests/frame/test_dtypes.py +++ b/pandas/tests/frame/test_dtypes.py @@ -382,7 +382,7 @@ def test_dtypes_gh8722(self): assert_series_equal(result, expected) # compat, GH 8722 - with option_context('use_inf_as_null', True): + with option_context('use_inf_as_na', True): df = DataFrame([[1]]) result = df.dtypes assert_series_equal(result, Series({0: np.dtype('int64')})) diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py index ff79bedbc60f6..dd2759cd3ef8e 100644 --- a/pandas/tests/frame/test_indexing.py +++ b/pandas/tests/frame/test_indexing.py @@ -15,7 +15,7 @@ import numpy as np import pandas.core.common as com -from pandas import (DataFrame, Index, Series, notnull, isnull, +from pandas import (DataFrame, Index, Series, notna, isna, MultiIndex, DatetimeIndex, Timestamp, date_range) import pandas as pd @@ -312,7 +312,7 @@ def test_getitem_boolean_casting(self): df = DataFrame(data=np.random.randn(100, 50)) df = df.where(df > 0) # create nans bools = df > 0 - mask = isnull(df) + mask = isna(df) expected = bools.astype(float).mask(mask) result = bools.mask(mask) assert_frame_equal(result, expected) @@ -395,7 +395,7 @@ def test_getitem_setitem_ix_negative_integers(self): df = DataFrame(np.random.randn(8, 4)) with catch_warnings(record=True): - assert isnull(df.ix[:, [-1]].values).all() + assert isna(df.ix[:, [-1]].values).all() # #1942 a = DataFrame(randn(20, 2), index=[chr(x + 65) for x in range(20)]) @@ -487,7 +487,7 @@ def test_setitem_always_copy(self): self.frame['E'] = s self.frame['E'][5:10] = nan - assert notnull(s[5:10]).all() + assert notna(s[5:10]).all() def test_setitem_boolean(self): df = self.frame.copy() @@ -705,7 +705,7 @@ def test_setitem_empty(self): 'c': ['111', '222', '333']}) result = df.copy() - result.loc[result.b.isnull(), 'a'] = result.a + result.loc[result.b.isna(), 'a'] = result.a assert_frame_equal(result, df) def test_setitem_empty_frame_with_boolean(self): @@ -795,7 +795,7 @@ def test_getitem_fancy_slice_integers_step(self): # this is OK result = df.iloc[:8:2] # noqa df.iloc[:8:2] = np.nan - assert isnull(df.iloc[:8:2]).values.all() + assert isna(df.iloc[:8:2]).values.all() def test_getitem_setitem_integer_slice_keyerrors(self): df = DataFrame(np.random.randn(10, 5), index=lrange(0, 20, 2)) @@ -1020,7 +1020,7 @@ def test_setitem_fancy_mixed_2d(self): assert (result.values == 5).all() self.mixed_frame.ix[5] = np.nan - assert isnull(self.mixed_frame.ix[5]).all() + assert isna(self.mixed_frame.ix[5]).all() self.mixed_frame.ix[5] = self.mixed_frame.ix[6] assert_series_equal(self.mixed_frame.ix[5], self.mixed_frame.ix[6], @@ -1492,15 +1492,15 @@ def test_setitem_single_column_mixed_datetime(self): # set an allowable datetime64 type df.loc['b', 'timestamp'] = iNaT - assert isnull(df.loc['b', 'timestamp']) + assert isna(df.loc['b', 'timestamp']) # allow this syntax df.loc['c', 'timestamp'] = nan - assert isnull(df.loc['c', 'timestamp']) + assert isna(df.loc['c', 'timestamp']) # allow this syntax df.loc['d', :] = nan - assert not isnull(df.loc['c', :]).all() + assert not isna(df.loc['c', :]).all() # as of GH 3216 this will now work! # try to set with a list like item @@ -1695,7 +1695,7 @@ def test_set_value_resize(self): res = self.frame.copy() res3 = res.set_value('foobar', 'baz', 5) assert is_float_dtype(res3['baz']) - assert isnull(res3['baz'].drop(['foobar'])).all() + assert isna(res3['baz'].drop(['foobar'])).all() pytest.raises(ValueError, res3.set_value, 'foobar', 'baz', 'sam') def test_set_value_with_index_dtype_change(self): @@ -1935,7 +1935,7 @@ def test_reindex_frame_add_nat(self): result = df.reindex(lrange(15)) assert np.issubdtype(result['B'].dtype, np.dtype('M8[ns]')) - mask = com.isnull(result)['B'] + mask = com.isna(result)['B'] assert mask[-5:].all() assert not mask[:-5].any() @@ -2590,7 +2590,7 @@ def test_where_bug(self): # GH7506 a = DataFrame({0: [1, 2], 1: [3, 4], 2: [5, 6]}) b = DataFrame({0: [np.nan, 8], 1: [9, np.nan], 2: [np.nan, np.nan]}) - do_not_replace = b.isnull() | (a > b) + do_not_replace = b.isna() | (a > b) expected = a.copy() expected[~do_not_replace] = b @@ -2600,7 +2600,7 @@ def test_where_bug(self): a = DataFrame({0: [4, 6], 1: [1, 0]}) b = DataFrame({0: [np.nan, 3], 1: [3, np.nan]}) - do_not_replace = b.isnull() | (a > b) + do_not_replace = b.isna() | (a > b) expected = a.copy() expected[~do_not_replace] = b @@ -2633,10 +2633,10 @@ def test_where_none(self): # GH 7656 df = DataFrame([{'A': 1, 'B': np.nan, 'C': 'Test'}, { 'A': np.nan, 'B': 'Test', 'C': np.nan}]) - expected = df.where(~isnull(df), None) + expected = df.where(~isna(df), None) with tm.assert_raises_regex(TypeError, 'boolean setting ' 'on mixed-type'): - df.where(~isnull(df), None, inplace=True) + df.where(~isna(df), None, inplace=True) def test_where_align(self): @@ -2650,10 +2650,10 @@ def create(): # series df = create() expected = df.fillna(df.mean()) - result = df.where(pd.notnull(df), df.mean(), axis='columns') + result = df.where(pd.notna(df), df.mean(), axis='columns') assert_frame_equal(result, expected) - df.where(pd.notnull(df), df.mean(), inplace=True, axis='columns') + df.where(pd.notna(df), df.mean(), inplace=True, axis='columns') assert_frame_equal(df, expected) df = create().fillna(0) @@ -2666,7 +2666,7 @@ def create(): # frame df = create() expected = df.fillna(1) - result = df.where(pd.notnull(df), DataFrame( + result = df.where(pd.notna(df), DataFrame( 1, index=df.index, columns=df.columns)) assert_frame_equal(result, expected) @@ -2948,7 +2948,7 @@ def test_setitem(self): df2.iloc[1, 1] = pd.NaT df2.iloc[1, 2] = pd.NaT result = df2['B'] - assert_series_equal(notnull(result), Series( + assert_series_equal(notna(result), Series( [True, False, True], name='B')) assert_series_equal(df2.dtypes, df.dtypes) @@ -3000,7 +3000,7 @@ def test_setitem(self): df2.iloc[1, 1] = pd.NaT df2.iloc[1, 2] = pd.NaT result = df2['B'] - assert_series_equal(notnull(result), Series( + assert_series_equal(notna(result), Series( [True, False, True], name='B')) assert_series_equal(df2.dtypes, Series([np.dtype('uint64'), np.dtype('O'), np.dtype('O')], diff --git a/pandas/tests/frame/test_operators.py b/pandas/tests/frame/test_operators.py index 438d7481ecc3e..5052bef24e95a 100644 --- a/pandas/tests/frame/test_operators.py +++ b/pandas/tests/frame/test_operators.py @@ -137,12 +137,12 @@ def test_operators_none_as_na(self): filled = df.fillna(np.nan) result = op(df, 3) expected = op(filled, 3).astype(object) - expected[com.isnull(expected)] = None + expected[com.isna(expected)] = None assert_frame_equal(result, expected) result = op(df, df) expected = op(filled, filled).astype(object) - expected[com.isnull(expected)] = None + expected[com.isna(expected)] = None assert_frame_equal(result, expected) result = op(df, df.fillna(7)) @@ -1045,8 +1045,8 @@ def test_combine_generic(self): combined = df1.combine(df2, np.add) combined2 = df2.combine(df1, np.add) - assert combined['D'].isnull().all() - assert combined2['D'].isnull().all() + assert combined['D'].isna().all() + assert combined2['D'].isna().all() chunk = combined.loc[combined.index[:-5], ['A', 'B', 'C']] chunk2 = combined2.loc[combined2.index[:-5], ['A', 'B', 'C']] diff --git a/pandas/tests/frame/test_reshape.py b/pandas/tests/frame/test_reshape.py index fdb0119d8ae60..e2f362ebdc895 100644 --- a/pandas/tests/frame/test_reshape.py +++ b/pandas/tests/frame/test_reshape.py @@ -528,7 +528,7 @@ def test_unstack_nan_index(self): # GH7466 def verify(df): mk_list = lambda a: list(a) if isinstance(a, tuple) else [a] - rows, cols = df.notnull().values.nonzero() + rows, cols = df.notna().values.nonzero() for i, j in zip(rows, cols): left = sorted(df.iloc[i, j].split('.')) right = mk_list(df.index[i]) + mk_list(df.columns[j]) @@ -547,7 +547,7 @@ def verify(df): mi = df.set_index(list(idx)) for lev in range(2): udf = mi.unstack(level=lev) - assert udf.notnull().values.sum() == len(df) + assert udf.notna().values.sum() == len(df) verify(udf['jolie']) df = DataFrame({'1st': ['d'] * 3 + [nan] * 5 + ['a'] * 2 + @@ -565,7 +565,7 @@ def verify(df): mi = df.set_index(list(idx)) for lev in range(3): udf = mi.unstack(level=lev) - assert udf.notnull().values.sum() == 2 * len(df) + assert udf.notna().values.sum() == 2 * len(df) for col in ['4th', '5th']: verify(udf[col]) @@ -670,7 +670,7 @@ def verify(df): df.loc[1, '3rd'] = df.loc[4, '3rd'] = nan left = df.set_index(['1st', '2nd', '3rd']).unstack(['2nd', '3rd']) - assert left.notnull().values.sum() == 2 * len(df) + assert left.notna().values.sum() == 2 * len(df) for col in ['jim', 'joe']: for _, r in df.iterrows(): diff --git a/pandas/tests/frame/test_timeseries.py b/pandas/tests/frame/test_timeseries.py index aaca8a60fe062..19fbf854256c6 100644 --- a/pandas/tests/frame/test_timeseries.py +++ b/pandas/tests/frame/test_timeseries.py @@ -281,7 +281,7 @@ def test_shift_duplicate_columns(self): shifted.append(df) # sanity check the base case - nulls = shifted[0].isnull().sum() + nulls = shifted[0].isna().sum() assert_series_equal(nulls, Series(range(1, 6), dtype='int64')) # check all answers are the same diff --git a/pandas/tests/groupby/test_bin_groupby.py b/pandas/tests/groupby/test_bin_groupby.py index f527c732fb76b..8b95455b53d22 100644 --- a/pandas/tests/groupby/test_bin_groupby.py +++ b/pandas/tests/groupby/test_bin_groupby.py @@ -6,7 +6,7 @@ import numpy as np from pandas.core.dtypes.common import _ensure_int64 -from pandas import Index, isnull +from pandas import Index, isna from pandas.util.testing import assert_almost_equal import pandas.util.testing as tm from pandas._libs import lib, groupby @@ -97,7 +97,7 @@ def _check(dtype): func(out, counts, obj[:, None], labels) def _ohlc(group): - if isnull(group).all(): + if isna(group).all(): return np.repeat(nan, 4) return [group[0], group.max(), group.min(), group[-1]] diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 19124a33bdbcb..0dea1e8447b2b 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -2562,7 +2562,7 @@ def test_cython_grouper_series_bug_noncontig(self): inds = np.tile(lrange(10), 10) result = obj.groupby(inds).agg(Series.median) - assert result.isnull().all() + assert result.isna().all() def test_series_grouper_noncontig_index(self): index = Index(tm.rands_array(10, 100)) @@ -3540,7 +3540,7 @@ def test_max_nan_bug(self): r = gb[['File']].max() e = gb['File'].max().to_frame() tm.assert_frame_equal(r, e) - assert not r['File'].isnull().any() + assert not r['File'].isna().any() def test_nlargest(self): a = Series([1, 3, 5, 7, 2, 9, 0, 4, 6, 10]) diff --git a/pandas/tests/groupby/test_nth.py b/pandas/tests/groupby/test_nth.py index 47e6e7839422a..28392537be3c6 100644 --- a/pandas/tests/groupby/test_nth.py +++ b/pandas/tests/groupby/test_nth.py @@ -1,6 +1,6 @@ import numpy as np import pandas as pd -from pandas import DataFrame, MultiIndex, Index, Series, isnull +from pandas import DataFrame, MultiIndex, Index, Series, isna from pandas.compat import lrange from pandas.util.testing import assert_frame_equal, assert_series_equal @@ -41,9 +41,9 @@ def test_first_last_nth(self): grouped['B'].nth(0) self.df.loc[self.df['A'] == 'foo', 'B'] = np.nan - assert isnull(grouped['B'].first()['foo']) - assert isnull(grouped['B'].last()['foo']) - assert isnull(grouped['B'].nth(0)['foo']) + assert isna(grouped['B'].first()['foo']) + assert isna(grouped['B'].last()['foo']) + assert isna(grouped['B'].nth(0)['foo']) # v0.14.0 whatsnew df = DataFrame([[1, np.nan], [1, 4], [5, 6]], columns=['A', 'B']) diff --git a/pandas/tests/groupby/test_timegrouper.py b/pandas/tests/groupby/test_timegrouper.py index 70b6b1e439691..df0a93d783375 100644 --- a/pandas/tests/groupby/test_timegrouper.py +++ b/pandas/tests/groupby/test_timegrouper.py @@ -599,7 +599,7 @@ def test_first_last_max_min_on_time_data(self): 'td': [nan, td(days=1), td(days=2), td(days=3), nan]}) df_test.dt = pd.to_datetime(df_test.dt) df_test['group'] = 'A' - df_ref = df_test[df_test.dt.notnull()] + df_ref = df_test[df_test.dt.notna()] grouped_test = df_test.groupby('group') grouped_ref = df_ref.groupby('group') diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index 1513a1c690014..1fdc08d68eb26 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -10,7 +10,7 @@ from pandas import (Series, Index, Float64Index, Int64Index, UInt64Index, RangeIndex, MultiIndex, CategoricalIndex, DatetimeIndex, TimedeltaIndex, PeriodIndex, IntervalIndex, - notnull, isnull) + notna, isna) from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin from pandas.core.dtypes.common import needs_i8_conversion from pandas._libs.tslib import iNaT @@ -514,7 +514,7 @@ def test_numpy_repeat(self): def test_where(self): i = self.create_index() - result = i.where(notnull(i)) + result = i.where(notna(i)) expected = i tm.assert_index_equal(result, expected) @@ -884,7 +884,7 @@ def test_fillna(self): pass elif isinstance(index, MultiIndex): idx = index.copy() - msg = "isnull is not defined for MultiIndex" + msg = "isna is not defined for MultiIndex" with tm.assert_raises_regex(NotImplementedError, msg): idx.fillna(idx[0]) else: @@ -924,23 +924,23 @@ def test_nulls(self): for name, index in self.indices.items(): if len(index) == 0: tm.assert_numpy_array_equal( - index.isnull(), np.array([], dtype=bool)) + index.isna(), np.array([], dtype=bool)) elif isinstance(index, MultiIndex): idx = index.copy() - msg = "isnull is not defined for MultiIndex" + msg = "isna is not defined for MultiIndex" with tm.assert_raises_regex(NotImplementedError, msg): - idx.isnull() + idx.isna() else: if not index.hasnans: tm.assert_numpy_array_equal( - index.isnull(), np.zeros(len(index), dtype=bool)) + index.isna(), np.zeros(len(index), dtype=bool)) tm.assert_numpy_array_equal( - index.notnull(), np.ones(len(index), dtype=bool)) + index.notna(), np.ones(len(index), dtype=bool)) else: - result = isnull(index) - tm.assert_numpy_array_equal(index.isnull(), result) - tm.assert_numpy_array_equal(index.notnull(), ~result) + result = isna(index) + tm.assert_numpy_array_equal(index.isna(), result) + tm.assert_numpy_array_equal(index.notna(), ~result) def test_empty(self): # GH 15270 diff --git a/pandas/tests/indexes/datetimes/test_indexing.py b/pandas/tests/indexes/datetimes/test_indexing.py index 4ef5cc5499f4d..9416b08f9654a 100644 --- a/pandas/tests/indexes/datetimes/test_indexing.py +++ b/pandas/tests/indexes/datetimes/test_indexing.py @@ -5,7 +5,7 @@ import pandas as pd import pandas.util.testing as tm import pandas.compat as compat -from pandas import notnull, Index, DatetimeIndex, datetime, date_range +from pandas import notna, Index, DatetimeIndex, datetime, date_range class TestDatetimeIndex(object): @@ -16,29 +16,29 @@ def test_where_other(self): i = pd.date_range('20130101', periods=3, tz='US/Eastern') for arr in [np.nan, pd.NaT]: - result = i.where(notnull(i), other=np.nan) + result = i.where(notna(i), other=np.nan) expected = i tm.assert_index_equal(result, expected) i2 = i.copy() i2 = Index([pd.NaT, pd.NaT] + i[2:].tolist()) - result = i.where(notnull(i2), i2) + result = i.where(notna(i2), i2) tm.assert_index_equal(result, i2) i2 = i.copy() i2 = Index([pd.NaT, pd.NaT] + i[2:].tolist()) - result = i.where(notnull(i2), i2.values) + result = i.where(notna(i2), i2.values) tm.assert_index_equal(result, i2) def test_where_tz(self): i = pd.date_range('20130101', periods=3, tz='US/Eastern') - result = i.where(notnull(i)) + result = i.where(notna(i)) expected = i tm.assert_index_equal(result, expected) i2 = i.copy() i2 = Index([pd.NaT, pd.NaT] + i[2:].tolist()) - result = i.where(notnull(i2)) + result = i.where(notna(i2)) expected = i2 tm.assert_index_equal(result, expected) diff --git a/pandas/tests/indexes/datetimes/test_ops.py b/pandas/tests/indexes/datetimes/test_ops.py index f33cdf8800791..86e65feec04f3 100644 --- a/pandas/tests/indexes/datetimes/test_ops.py +++ b/pandas/tests/indexes/datetimes/test_ops.py @@ -116,13 +116,13 @@ def test_minmax(self): for op in ['min', 'max']: # Return NaT obj = DatetimeIndex([]) - assert pd.isnull(getattr(obj, op)()) + assert pd.isna(getattr(obj, op)()) obj = DatetimeIndex([pd.NaT]) - assert pd.isnull(getattr(obj, op)()) + assert pd.isna(getattr(obj, op)()) obj = DatetimeIndex([pd.NaT, pd.NaT, pd.NaT]) - assert pd.isnull(getattr(obj, op)()) + assert pd.isna(getattr(obj, op)()) def test_numpy_minmax(self): dr = pd.date_range(start='2016-01-15', end='2016-01-20') diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index a47db755b44af..7ff9c2b23cbfb 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -20,7 +20,7 @@ from pandas.core.dtypes.common import is_datetime64_ns_dtype from pandas.util import testing as tm from pandas.util.testing import assert_series_equal, _skip_if_has_locale -from pandas import (isnull, to_datetime, Timestamp, Series, DataFrame, +from pandas import (isna, to_datetime, Timestamp, Series, DataFrame, Index, DatetimeIndex, NaT, date_range, bdate_range, compat) @@ -683,7 +683,7 @@ def test_to_datetime_types(self): assert result is NaT result = to_datetime(['', '']) - assert isnull(result).all() + assert isna(result).all() # ints result = Timestamp(0) @@ -751,7 +751,7 @@ def test_string_na_nat_conversion(self): expected = np.empty(4, dtype='M8[ns]') for i, val in enumerate(strings): - if isnull(val): + if isna(val): expected[i] = tslib.iNaT else: expected[i] = parse_date(val) @@ -787,7 +787,7 @@ def test_string_na_nat_conversion(self): expected = Series(np.empty(5, dtype='M8[ns]'), index=idx) for i in range(5): x = series[i] - if isnull(x): + if isna(x): expected[i] = tslib.iNaT else: expected[i] = to_datetime(x) @@ -977,13 +977,13 @@ class TestDaysInMonth(object): # tests for issue #10154 def test_day_not_in_month_coerce(self): - assert isnull(to_datetime('2015-02-29', errors='coerce')) - assert isnull(to_datetime('2015-02-29', format="%Y-%m-%d", - errors='coerce')) - assert isnull(to_datetime('2015-02-32', format="%Y-%m-%d", - errors='coerce')) - assert isnull(to_datetime('2015-04-31', format="%Y-%m-%d", - errors='coerce')) + assert isna(to_datetime('2015-02-29', errors='coerce')) + assert isna(to_datetime('2015-02-29', format="%Y-%m-%d", + errors='coerce')) + assert isna(to_datetime('2015-02-32', format="%Y-%m-%d", + errors='coerce')) + assert isna(to_datetime('2015-04-31', format="%Y-%m-%d", + errors='coerce')) def test_day_not_in_month_raise(self): pytest.raises(ValueError, to_datetime, '2015-02-29', diff --git a/pandas/tests/indexes/period/test_period.py b/pandas/tests/indexes/period/test_period.py index 291ca317f8fae..e24e2ad936e2c 100644 --- a/pandas/tests/indexes/period/test_period.py +++ b/pandas/tests/indexes/period/test_period.py @@ -6,7 +6,7 @@ import pandas as pd from pandas.util import testing as tm -from pandas import (PeriodIndex, period_range, notnull, DatetimeIndex, NaT, +from pandas import (PeriodIndex, period_range, notna, DatetimeIndex, NaT, Index, Period, Int64Index, Series, DataFrame, date_range, offsets, compat) @@ -92,13 +92,13 @@ def test_get_loc(self): def test_where(self): i = self.create_index() - result = i.where(notnull(i)) + result = i.where(notna(i)) expected = i tm.assert_index_equal(result, expected) i2 = pd.PeriodIndex([pd.NaT, pd.NaT] + i[2:].tolist(), freq='D') - result = i.where(notnull(i2)) + result = i.where(notna(i2)) expected = i2 tm.assert_index_equal(result, expected) @@ -116,20 +116,20 @@ def test_where_other(self): i = self.create_index() for arr in [np.nan, pd.NaT]: - result = i.where(notnull(i), other=np.nan) + result = i.where(notna(i), other=np.nan) expected = i tm.assert_index_equal(result, expected) i2 = i.copy() i2 = pd.PeriodIndex([pd.NaT, pd.NaT] + i[2:].tolist(), freq='D') - result = i.where(notnull(i2), i2) + result = i.where(notna(i2), i2) tm.assert_index_equal(result, i2) i2 = i.copy() i2 = pd.PeriodIndex([pd.NaT, pd.NaT] + i[2:].tolist(), freq='D') - result = i.where(notnull(i2), i2.values) + result = i.where(notna(i2), i2.values) tm.assert_index_equal(result, i2) def test_get_indexer(self): diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 842e8fea0df9b..ef36e4a91aa1c 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -16,7 +16,7 @@ from pandas import (period_range, date_range, Series, DataFrame, Float64Index, Int64Index, CategoricalIndex, DatetimeIndex, TimedeltaIndex, - PeriodIndex, isnull) + PeriodIndex, isna) from pandas.core.index import _get_combined_index from pandas.util.testing import assert_almost_equal from pandas.compat.numpy import np_datetime64_compat @@ -504,7 +504,7 @@ def test_is_(self): def test_asof(self): d = self.dateIndex[0] assert self.dateIndex.asof(d) == d - assert isnull(self.dateIndex.asof(d - timedelta(1))) + assert isna(self.dateIndex.asof(d - timedelta(1))) d = self.dateIndex[-1] assert self.dateIndex.asof(d + timedelta(1)) == d diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py index e8d780e041316..a3d72fdb88239 100644 --- a/pandas/tests/indexes/test_category.py +++ b/pandas/tests/indexes/test_category.py @@ -10,7 +10,7 @@ import numpy as np -from pandas import Categorical, IntervalIndex, compat, notnull +from pandas import Categorical, IntervalIndex, compat, notna from pandas.util.testing import assert_almost_equal import pandas.core.config as cf import pandas as pd @@ -236,13 +236,13 @@ def f(x): def test_where(self): i = self.create_index() - result = i.where(notnull(i)) + result = i.where(notna(i)) expected = i tm.assert_index_equal(result, expected) i2 = pd.CategoricalIndex([np.nan, np.nan] + i[2:].tolist(), categories=i.categories) - result = i.where(notnull(i2)) + result = i.where(notna(i2)) expected = i2 tm.assert_index_equal(result, expected) diff --git a/pandas/tests/indexes/test_interval.py b/pandas/tests/indexes/test_interval.py index 33745017fe3d6..fe86a2121761a 100644 --- a/pandas/tests/indexes/test_interval.py +++ b/pandas/tests/indexes/test_interval.py @@ -3,7 +3,7 @@ import pytest import numpy as np -from pandas import (Interval, IntervalIndex, Index, isnull, +from pandas import (Interval, IntervalIndex, Index, isna, interval_range, Timestamp, Timedelta, compat) from pandas._libs.interval import IntervalTree @@ -152,16 +152,16 @@ def test_properties(self): def test_with_nans(self): index = self.index assert not index.hasnans - tm.assert_numpy_array_equal(index.isnull(), + tm.assert_numpy_array_equal(index.isna(), np.array([False, False])) - tm.assert_numpy_array_equal(index.notnull(), + tm.assert_numpy_array_equal(index.notna(), np.array([True, True])) index = self.index_with_nan assert index.hasnans - tm.assert_numpy_array_equal(index.notnull(), + tm.assert_numpy_array_equal(index.notna(), np.array([True, False, True])) - tm.assert_numpy_array_equal(index.isnull(), + tm.assert_numpy_array_equal(index.isna(), np.array([False, True, False])) def test_copy(self): @@ -228,7 +228,7 @@ def test_astype(self): def test_where(self): expected = self.index - result = self.index.where(self.index.notnull()) + result = self.index.where(self.index.notna()) tm.assert_index_equal(result, expected) idx = IntervalIndex.from_breaks([1, 2]) @@ -311,7 +311,7 @@ def test_get_item(self): closed='right') assert i[0] == Interval(0.0, 1.0) assert i[1] == Interval(1.0, 2.0) - assert isnull(i[2]) + assert isna(i[2]) result = i[0:1] expected = IntervalIndex.from_arrays((0.,), (1.,), closed='right') @@ -620,7 +620,7 @@ def test_missing_values(self): with pytest.raises(ValueError): IntervalIndex.from_arrays([np.nan, 0, 1], np.array([0, 1, 2])) - tm.assert_numpy_array_equal(isnull(idx), + tm.assert_numpy_array_equal(isna(idx), np.array([True, False, False])) def test_sort_values(self): @@ -631,15 +631,15 @@ def test_sort_values(self): # nan idx = self.index_with_nan - mask = idx.isnull() + mask = idx.isna() tm.assert_numpy_array_equal(mask, np.array([False, True, False])) result = idx.sort_values() - mask = result.isnull() + mask = result.isna() tm.assert_numpy_array_equal(mask, np.array([False, False, True])) result = idx.sort_values(ascending=False) - mask = result.isnull() + mask = result.isna() tm.assert_numpy_array_equal(mask, np.array([True, False, False])) def test_datetime(self): diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index 719cd2f7e01a4..da1b309f5a621 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -2366,12 +2366,12 @@ def test_slice_keep_name(self): names=['x', 'y']) assert x[1:].names == x.names - def test_isnull_behavior(self): + def test_isna_behavior(self): # should not segfault GH5123 # NOTE: if MI representation changes, may make sense to allow - # isnull(MI) + # isna(MI) with pytest.raises(NotImplementedError): - pd.isnull(self.index) + pd.isna(self.index) def test_level_setting_resets_attributes(self): ind = MultiIndex.from_arrays([ @@ -2889,13 +2889,13 @@ def test_nan_stays_float(self): labels=[[0], [0]], names=[0, 1]) idxm = idx0.join(idx1, how='outer') - assert pd.isnull(idx0.get_level_values(1)).all() + assert pd.isna(idx0.get_level_values(1)).all() # the following failed in 0.14.1 - assert pd.isnull(idxm.get_level_values(1)[:-1]).all() + assert pd.isna(idxm.get_level_values(1)[:-1]).all() df0 = pd.DataFrame([[1, 2]], index=idx0) df1 = pd.DataFrame([[3, 4]], index=idx1) dfm = df0 - df1 - assert pd.isnull(df0.index.get_level_values(1)).all() + assert pd.isna(df0.index.get_level_values(1)).all() # the following failed in 0.14.1 - assert pd.isnull(dfm.index.get_level_values(1)[:-1]).all() + assert pd.isna(dfm.index.get_level_values(1)[:-1]).all() diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py index 62ac337d02727..1a0a38c173284 100644 --- a/pandas/tests/indexes/test_numeric.py +++ b/pandas/tests/indexes/test_numeric.py @@ -7,7 +7,7 @@ import numpy as np -from pandas import (date_range, notnull, Series, Index, Float64Index, +from pandas import (date_range, notna, Series, Index, Float64Index, Int64Index, UInt64Index, RangeIndex) import pandas.util.testing as tm @@ -228,11 +228,11 @@ def test_constructor(self): # nan handling result = Float64Index([np.nan, np.nan]) - assert pd.isnull(result.values).all() + assert pd.isna(result.values).all() result = Float64Index(np.array([np.nan])) - assert pd.isnull(result.values).all() + assert pd.isna(result.values).all() result = Index(np.array([np.nan])) - assert pd.isnull(result.values).all() + assert pd.isna(result.values).all() def test_constructor_invalid(self): @@ -717,7 +717,7 @@ def test_coerce_list(self): def test_where(self): i = self.create_index() - result = i.where(notnull(i)) + result = i.where(notna(i)) expected = i tm.assert_index_equal(result, expected) diff --git a/pandas/tests/indexes/test_range.py b/pandas/tests/indexes/test_range.py index 0d88e88030604..566354da4870d 100644 --- a/pandas/tests/indexes/test_range.py +++ b/pandas/tests/indexes/test_range.py @@ -10,7 +10,7 @@ import numpy as np -from pandas import (notnull, Series, Index, Float64Index, +from pandas import (notna, Series, Index, Float64Index, Int64Index, RangeIndex) import pandas.util.testing as tm @@ -929,7 +929,7 @@ def test_len_specialised(self): def test_where(self): i = self.create_index() - result = i.where(notnull(i)) + result = i.where(notna(i)) expected = i tm.assert_index_equal(result, expected) diff --git a/pandas/tests/indexes/timedeltas/test_ops.py b/pandas/tests/indexes/timedeltas/test_ops.py index 9a9912d4f0ab1..f4f669ee1d087 100644 --- a/pandas/tests/indexes/timedeltas/test_ops.py +++ b/pandas/tests/indexes/timedeltas/test_ops.py @@ -71,13 +71,13 @@ def test_minmax(self): for op in ['min', 'max']: # Return NaT obj = TimedeltaIndex([]) - assert pd.isnull(getattr(obj, op)()) + assert pd.isna(getattr(obj, op)()) obj = TimedeltaIndex([pd.NaT]) - assert pd.isnull(getattr(obj, op)()) + assert pd.isna(getattr(obj, op)()) obj = TimedeltaIndex([pd.NaT, pd.NaT, pd.NaT]) - assert pd.isnull(getattr(obj, op)()) + assert pd.isna(getattr(obj, op)()) def test_numpy_minmax(self): dr = pd.date_range(start='2016-01-15', end='2016-01-20') diff --git a/pandas/tests/indexes/timedeltas/test_tools.py b/pandas/tests/indexes/timedeltas/test_tools.py index a991b7bbe140a..1a4d1b1d7abaa 100644 --- a/pandas/tests/indexes/timedeltas/test_tools.py +++ b/pandas/tests/indexes/timedeltas/test_tools.py @@ -6,7 +6,7 @@ import pandas as pd import pandas.util.testing as tm from pandas.util.testing import assert_series_equal -from pandas import (Series, Timedelta, to_timedelta, isnull, +from pandas import (Series, Timedelta, to_timedelta, isna, TimedeltaIndex) from pandas._libs.tslib import iNaT @@ -31,7 +31,7 @@ def conv(v): assert result.astype('int64') == iNaT result = to_timedelta(['', '']) - assert isnull(result).all() + assert isna(result).all() # pass thru result = to_timedelta(np.array([np.timedelta64(1, 's')])) diff --git a/pandas/tests/indexing/test_chaining_and_caching.py b/pandas/tests/indexing/test_chaining_and_caching.py index 27a889e58e55e..25e572ee09a6b 100644 --- a/pandas/tests/indexing/test_chaining_and_caching.py +++ b/pandas/tests/indexing/test_chaining_and_caching.py @@ -321,7 +321,7 @@ def test_setting_with_copy_bug(self): df = pd.DataFrame({'a': list(range(4)), 'b': list('ab..'), 'c': ['a', 'b', np.nan, 'd']}) - mask = pd.isnull(df.c) + mask = pd.isna(df.c) def f(): df[['c']][mask] = df[['b']][mask] diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index 769cf8ec395dd..1ba9f3101e7b6 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -7,7 +7,7 @@ import pandas as pd from pandas.compat import lrange, lmap -from pandas import Series, DataFrame, date_range, concat, isnull +from pandas import Series, DataFrame, date_range, concat, isna from pandas.util import testing as tm from pandas.tests.indexing.common import Base @@ -191,7 +191,7 @@ def test_iloc_getitem_dups(self): # cross-sectional indexing result = df.iloc[0, 0] - assert isnull(result) + assert isna(result) result = df.iloc[0, :] expected = Series([np.nan, 1, 3, 3], index=['A', 'B', 'A', 'B'], diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index e5b70a9fadb8f..3ecd1f3029cad 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -363,7 +363,7 @@ def test_multi_assign(self): df.iloc[1, 0] = np.nan df2 = df.copy() - mask = ~df2.FC.isnull() + mask = ~df2.FC.isna() cols = ['col1', 'col2'] dft = df2 * 2 diff --git a/pandas/tests/io/parser/common.py b/pandas/tests/io/parser/common.py index 4d1f9936af983..34ed8782b346c 100644 --- a/pandas/tests/io/parser/common.py +++ b/pandas/tests/io/parser/common.py @@ -704,7 +704,7 @@ def test_missing_trailing_delimiters(self): 1,3,3, 1,4,5""" result = self.read_csv(StringIO(data)) - assert result['D'].isnull()[1:].all() + assert result['D'].isna()[1:].all() def test_skipinitialspace(self): s = ('"09-Apr-2012", "01:10:18.300", 2456026.548822908, 12849, ' @@ -718,7 +718,7 @@ def test_skipinitialspace(self): # it's 33 columns result = self.read_csv(sfile, names=lrange(33), na_values=['-9999.0'], header=None, skipinitialspace=True) - assert pd.isnull(result.iloc[0, 29]) + assert pd.isna(result.iloc[0, 29]) def test_utf16_bom_skiprows(self): # #2298 diff --git a/pandas/tests/io/parser/converters.py b/pandas/tests/io/parser/converters.py index 8fde709e39cae..1176b1e84e29b 100644 --- a/pandas/tests/io/parser/converters.py +++ b/pandas/tests/io/parser/converters.py @@ -133,7 +133,7 @@ def convert_score(x): result = self.read_csv(fh, converters={'score': convert_score, 'days': convert_days}, na_values=['', None]) - assert pd.isnull(result['days'][1]) + assert pd.isna(result['days'][1]) fh = StringIO(data) result2 = self.read_csv(fh, converters={'score': convert_score, diff --git a/pandas/tests/io/parser/na_values.py b/pandas/tests/io/parser/na_values.py index c6d1cc79b82d7..7fbf174e19eee 100644 --- a/pandas/tests/io/parser/na_values.py +++ b/pandas/tests/io/parser/na_values.py @@ -249,7 +249,7 @@ def test_na_trailing_columns(self): result = self.read_csv(StringIO(data)) assert result['Date'][1] == '2012-05-12' - assert result['UnitPrice'].isnull().all() + assert result['UnitPrice'].isna().all() def test_na_values_scalar(self): # see gh-12224 diff --git a/pandas/tests/io/parser/parse_dates.py b/pandas/tests/io/parser/parse_dates.py index 4507db108b684..e1ae1b577ea29 100644 --- a/pandas/tests/io/parser/parse_dates.py +++ b/pandas/tests/io/parser/parse_dates.py @@ -461,7 +461,7 @@ def test_parse_dates_empty_string(self): data = "Date, test\n2012-01-01, 1\n,2" result = self.read_csv(StringIO(data), parse_dates=["Date"], na_filter=False) - assert result['Date'].isnull()[1] + assert result['Date'].isna()[1] def test_parse_dates_noconvert_thousands(self): # see gh-14066 diff --git a/pandas/tests/io/test_html.py b/pandas/tests/io/test_html.py index 0455ffb069322..6fc080c8d9090 100644 --- a/pandas/tests/io/test_html.py +++ b/pandas/tests/io/test_html.py @@ -385,7 +385,7 @@ def test_thousands_macau_stats(self): attrs={'class': 'style1'}) df = dfs[all_non_nan_table_index] - assert not any(s.isnull().any() for _, s in df.iteritems()) + assert not any(s.isna().any() for _, s in df.iteritems()) @pytest.mark.slow def test_thousands_macau_index_col(self): @@ -394,7 +394,7 @@ def test_thousands_macau_index_col(self): dfs = self.read_html(macau_data, index_col=0, header=0) df = dfs[all_non_nan_table_index] - assert not any(s.isnull().any() for _, s in df.iteritems()) + assert not any(s.isna().any() for _, s in df.iteritems()) def test_empty_tables(self): """ diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index c0d200560b477..fc17b5f85b68c 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -14,7 +14,7 @@ from pandas import (Series, DataFrame, Panel, Panel4D, MultiIndex, Int64Index, RangeIndex, Categorical, bdate_range, date_range, timedelta_range, Index, DatetimeIndex, - isnull) + isna) from pandas.compat import is_platform_windows, PY3, PY35, BytesIO, text_type from pandas.io.formats.printing import pprint_thing @@ -3948,7 +3948,7 @@ def test_string_select(self): store.append('df2', df2, data_columns=['x']) result = store.select('df2', 'x!=none') - expected = df2[isnull(df2.x)] + expected = df2[isna(df2.x)] assert_frame_equal(result, expected) # int ==/!= diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index deeb8cba2b228..a7c42391effe6 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -33,7 +33,7 @@ from pandas.core.dtypes.common import ( is_object_dtype, is_datetime64_dtype, is_datetime64tz_dtype) -from pandas import DataFrame, Series, Index, MultiIndex, isnull, concat +from pandas import DataFrame, Series, Index, MultiIndex, isna, concat from pandas import date_range, to_datetime, to_timedelta, Timestamp import pandas.compat as compat from pandas.compat import range, lrange, string_types, PY36 @@ -1530,7 +1530,7 @@ def test_dtype(self): assert isinstance(sqltypea, sqlalchemy.TEXT) assert isinstance(sqltypeb, sqlalchemy.TEXT) - def test_notnull_dtype(self): + def test_notna_dtype(self): cols = {'Bool': Series([True, None]), 'Date': Series([datetime(2012, 5, 1), None]), 'Int': Series([1, None], dtype='object'), @@ -1538,7 +1538,7 @@ def test_notnull_dtype(self): } df = DataFrame(cols) - tbl = 'notnull_dtype_test' + tbl = 'notna_dtype_test' df.to_sql(tbl, self.conn) returned_df = sql.read_sql_table(tbl, self.conn) # noqa meta = sqlalchemy.schema.MetaData(bind=self.conn) @@ -2005,7 +2005,7 @@ def test_dtype(self): assert self._get_sqlite_column_type( 'single_dtype_test', 'B') == 'STRING' - def test_notnull_dtype(self): + def test_notna_dtype(self): if self.flavor == 'mysql': pytest.skip('Not applicable to MySQL legacy') @@ -2016,7 +2016,7 @@ def test_notnull_dtype(self): } df = DataFrame(cols) - tbl = 'notnull_dtype_test' + tbl = 'notna_dtype_test' df.to_sql(tbl, self.conn) assert self._get_sqlite_column_type(tbl, 'Bool') == 'INTEGER' @@ -2069,7 +2069,7 @@ def format_query(sql, *args): """ processed_args = [] for arg in args: - if isinstance(arg, float) and isnull(arg): + if isinstance(arg, float) and isna(arg): arg = None formatter = _formatters[type(arg)] diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py index 7486c32f57fdb..46fea86c45925 100644 --- a/pandas/tests/reshape/test_concat.py +++ b/pandas/tests/reshape/test_concat.py @@ -8,7 +8,7 @@ from pandas.compat import StringIO, iteritems import pandas as pd from pandas import (DataFrame, concat, - read_csv, isnull, Series, date_range, + read_csv, isna, Series, date_range, Index, Panel, MultiIndex, Timestamp, DatetimeIndex) from pandas.util import testing as tm @@ -789,8 +789,8 @@ def test_append_different_columns(self): b = df[5:].loc[:, ['strings', 'ints', 'floats']] appended = a.append(b) - assert isnull(appended['strings'][0:4]).all() - assert isnull(appended['bools'][5:]).all() + assert isna(appended['strings'][0:4]).all() + assert isna(appended['bools'][5:]).all() def test_append_many(self): chunks = [self.frame[:5], self.frame[5:10], @@ -804,7 +804,7 @@ def test_append_many(self): result = chunks[0].append(chunks[1:]) tm.assert_frame_equal(result.loc[:, self.frame.columns], self.frame) assert (result['foo'][15:] == 'bar').all() - assert result['foo'][:15].isnull().all() + assert result['foo'][:15].isna().all() def test_append_preserve_index_name(self): # #980 diff --git a/pandas/tests/reshape/test_join.py b/pandas/tests/reshape/test_join.py index e4894307918c6..75c01fabea8f6 100644 --- a/pandas/tests/reshape/test_join.py +++ b/pandas/tests/reshape/test_join.py @@ -252,7 +252,7 @@ def test_join_with_len0(self): merged = self.target.join(self.source.reindex([]), on='C') for col in self.source: assert col in merged - assert merged[col].isnull().all() + assert merged[col].isna().all() merged2 = self.target.join(self.source.reindex([]), on='C', how='inner') @@ -266,7 +266,7 @@ def test_join_on_inner(self): joined = df.join(df2, on='key', how='inner') expected = df.join(df2, on='key') - expected = expected[expected['value'].notnull()] + expected = expected[expected['value'].notna()] tm.assert_series_equal(joined['key'], expected['key'], check_dtype=False) tm.assert_series_equal(joined['value'], expected['value'], @@ -734,7 +734,7 @@ def _check_join(left, right, result, join_col, how='left', # some smoke tests for c in join_col: - assert(result[c].notnull().all()) + assert(result[c].notna().all()) left_grouped = left.groupby(join_col) right_grouped = right.groupby(join_col) @@ -797,7 +797,7 @@ def _assert_all_na(join_chunk, source_columns, join_col): for c in source_columns: if c in join_col: continue - assert(join_chunk[c].isnull().all()) + assert(join_chunk[c].isna().all()) def _join_by_hand(a, b, how='left'): diff --git a/pandas/tests/reshape/test_merge.py b/pandas/tests/reshape/test_merge.py index 765e8e28b43fd..338596d1523e4 100644 --- a/pandas/tests/reshape/test_merge.py +++ b/pandas/tests/reshape/test_merge.py @@ -229,8 +229,8 @@ def test_handle_join_key_pass_array(self): merged2 = merge(right, left, left_on=key, right_on='key', how='outer') assert_series_equal(merged['key'], merged2['key']) - assert merged['key'].notnull().all() - assert merged2['key'].notnull().all() + assert merged['key'].notna().all() + assert merged2['key'].notna().all() left = DataFrame({'value': lrange(5)}, columns=['value']) right = DataFrame({'rvalue': lrange(6)}) @@ -926,8 +926,8 @@ def run_asserts(left, right): res = left.join(right, on=icols, how='left', sort=sort) assert len(left) < len(res) + 1 - assert not res['4th'].isnull().any() - assert not res['5th'].isnull().any() + assert not res['4th'].isna().any() + assert not res['5th'].isna().any() tm.assert_series_equal( res['4th'], - res['5th'], check_names=False) diff --git a/pandas/tests/reshape/test_merge_ordered.py b/pandas/tests/reshape/test_merge_ordered.py index 9469e98f336fd..9b1806ee52c1d 100644 --- a/pandas/tests/reshape/test_merge_ordered.py +++ b/pandas/tests/reshape/test_merge_ordered.py @@ -57,7 +57,7 @@ def test_multigroup(self): assert_frame_equal(result, result2.loc[:, result.columns]) result = merge_ordered(left, self.right, on='key', left_by='group') - assert result['group'].notnull().all() + assert result['group'].notna().all() def test_merge_type(self): class NotADataFrame(DataFrame): diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index ff9f35b0253b0..5e5852ac5381d 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -267,7 +267,7 @@ def test_pivot_index_with_nan(self): df.loc[1, 'b'] = df.loc[4, 'b'] = nan pv = df.pivot('a', 'b', 'c') - assert pv.notnull().values.sum() == len(df) + assert pv.notna().values.sum() == len(df) for _, row in df.iterrows(): assert pv.loc[row['a'], row['b']] == row['c'] diff --git a/pandas/tests/reshape/test_tile.py b/pandas/tests/reshape/test_tile.py index 2523f8ab9f776..91000747b41bb 100644 --- a/pandas/tests/reshape/test_tile.py +++ b/pandas/tests/reshape/test_tile.py @@ -4,7 +4,7 @@ import numpy as np from pandas.compat import zip -from pandas import (Series, Index, isnull, +from pandas import (Series, Index, isna, to_datetime, DatetimeIndex, Timestamp, Interval, IntervalIndex, Categorical, cut, qcut, date_range) @@ -140,12 +140,12 @@ def test_na_handling(self): result_arr = np.asarray(result) - ex_arr = np.where(isnull(arr), np.nan, result_arr) + ex_arr = np.where(isna(arr), np.nan, result_arr) tm.assert_almost_equal(result_arr, ex_arr) result = cut(arr, 4, labels=False) - ex_result = np.where(isnull(arr), np.nan, result) + ex_result = np.where(isna(arr), np.nan, result) tm.assert_almost_equal(result, ex_result) def test_inf_handling(self): @@ -200,7 +200,7 @@ def test_cut_out_of_bounds(self): result = cut(arr, [-1, 0, 1]) - mask = isnull(result) + mask = isna(result) ex_mask = (arr < -1) | (arr > 1) tm.assert_numpy_array_equal(mask, ex_mask) @@ -244,7 +244,7 @@ def test_qcut_nas(self): arr[:20] = np.nan result = qcut(arr, 4) - assert isnull(result[:20]).all() + assert isna(result[:20]).all() def test_qcut_index(self): result = qcut([0, 2], 2) @@ -502,9 +502,9 @@ def f(): result = cut(date_range('20130102', periods=5), bins=date_range('20130101', periods=2)) - mask = result.categories.isnull() + mask = result.categories.isna() tm.assert_numpy_array_equal(mask, np.array([False])) - mask = result.isnull() + mask = result.isna() tm.assert_numpy_array_equal( mask, np.array([False, True, True, True, True])) diff --git a/pandas/tests/scalar/test_nat.py b/pandas/tests/scalar/test_nat.py index 0695fe2243947..5f247cae1099b 100644 --- a/pandas/tests/scalar/test_nat.py +++ b/pandas/tests/scalar/test_nat.py @@ -6,7 +6,7 @@ import numpy as np from pandas import (NaT, Index, Timestamp, Timedelta, Period, DatetimeIndex, PeriodIndex, - TimedeltaIndex, Series, isnull) + TimedeltaIndex, Series, isna) from pandas.util import testing as tm from pandas._libs.tslib import iNaT @@ -95,7 +95,7 @@ def test_identity(klass): result = klass('NaT') assert result is NaT - assert isnull(klass('nat')) + assert isna(klass('nat')) @pytest.mark.parametrize('klass', [Timestamp, Timedelta, Period]) @@ -108,7 +108,7 @@ def test_equality(klass): klass('NAT').value == iNaT klass(None).value == iNaT klass(np.nan).value == iNaT - assert isnull(klass('nat')) + assert isna(klass('nat')) @pytest.mark.parametrize('klass', [Timestamp, Timedelta]) diff --git a/pandas/tests/scalar/test_timedelta.py b/pandas/tests/scalar/test_timedelta.py index ecc44204924d3..bc9a0388df9d9 100644 --- a/pandas/tests/scalar/test_timedelta.py +++ b/pandas/tests/scalar/test_timedelta.py @@ -638,8 +638,8 @@ def test_components(self): s[1] = np.nan result = s.dt.components - assert not result.iloc[0].isnull().all() - assert result.iloc[1].isnull().all() + assert not result.iloc[0].isna().all() + assert result.iloc[1].isna().all() def test_isoformat(self): td = Timedelta(days=6, minutes=50, seconds=3, diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index a736f3aa74558..44da0968d7024 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -10,7 +10,7 @@ import numpy as np import pandas as pd -from pandas import (Series, Categorical, DataFrame, isnull, notnull, +from pandas import (Series, Categorical, DataFrame, isna, notna, bdate_range, date_range, _np_version_under1p10) from pandas.core.index import MultiIndex from pandas.core.indexes.datetimes import Timestamp @@ -130,7 +130,7 @@ def test_sum_inf(self): arr = np.random.randn(100, 100).astype('f4') arr[:, 2] = np.inf - with cf.option_context("mode.use_inf_as_null", True): + with cf.option_context("mode.use_inf_as_na", True): assert_almost_equal(s.sum(), s2.sum()) res = nanops.nansum(arr, axis=1) @@ -269,10 +269,10 @@ def test_var_std(self): # 1 - element series with ddof=1 s = self.ts.iloc[[0]] result = s.var(ddof=1) - assert isnull(result) + assert isna(result) result = s.std(ddof=1) - assert isnull(result) + assert isna(result) def test_sem(self): alt = lambda x: np.std(x, ddof=1) / np.sqrt(len(x)) @@ -286,7 +286,7 @@ def test_sem(self): # 1 - element series with ddof=1 s = self.ts.iloc[[0]] result = s.sem(ddof=1) - assert isnull(result) + assert isna(result) def test_skew(self): tm._skip_if_no_scipy() @@ -365,7 +365,7 @@ def test_argsort(self): assert s.dtype == 'datetime64[ns]' shifted = s.shift(-1) assert shifted.dtype == 'datetime64[ns]' - assert isnull(shifted[4]) + assert isna(shifted[4]) result = s.argsort() expected = Series(lrange(5), dtype='int64') @@ -524,8 +524,8 @@ def testit(): pytest.raises(TypeError, f, ds) # skipna or no - assert notnull(f(self.series)) - assert isnull(f(self.series, skipna=False)) + assert notna(f(self.series)) + assert isna(f(self.series, skipna=False)) # check the result is correct nona = self.series.dropna() @@ -743,10 +743,10 @@ def test_ops_consistency_on_empty(self): assert result == 0 result = Series(dtype=float).mean() - assert isnull(result) + assert isna(result) result = Series(dtype=float).median() - assert isnull(result) + assert isna(result) # timedelta64[ns] result = Series(dtype='m8[ns]').sum() @@ -769,11 +769,11 @@ def test_corr(self): # partial overlap tm.assert_almost_equal(self.ts[:15].corr(self.ts[5:]), 1) - assert isnull(self.ts[:15].corr(self.ts[5:], min_periods=12)) + assert isna(self.ts[:15].corr(self.ts[5:], min_periods=12)) ts1 = self.ts[:15].reindex(self.ts.index) ts2 = self.ts[5:].reindex(self.ts.index) - assert isnull(ts1.corr(ts2, min_periods=12)) + assert isna(ts1.corr(ts2, min_periods=12)) # No overlap assert np.isnan(self.ts[::2].corr(self.ts[1::2])) @@ -781,7 +781,7 @@ def test_corr(self): # all NA cp = self.ts[:10].copy() cp[:] = np.nan - assert isnull(cp.corr(cp)) + assert isna(cp.corr(cp)) A = tm.makeTimeSeries() B = tm.makeTimeSeries() @@ -838,14 +838,14 @@ def test_cov(self): # all NA cp = self.ts[:10].copy() cp[:] = np.nan - assert isnull(cp.cov(cp)) + assert isna(cp.cov(cp)) # min_periods - assert isnull(self.ts[:15].cov(self.ts[5:], min_periods=12)) + assert isna(self.ts[:15].cov(self.ts[5:], min_periods=12)) ts1 = self.ts[:15].reindex(self.ts.index) ts2 = self.ts[5:].reindex(self.ts.index) - assert isnull(ts1.cov(ts2, min_periods=12)) + assert isna(ts1.cov(ts2, min_periods=12)) def test_count(self): assert self.ts.count() == len(self.ts) @@ -995,10 +995,10 @@ def test_clip_types_and_nulls(self): thresh = s[2] l = s.clip_lower(thresh) u = s.clip_upper(thresh) - assert l[notnull(l)].min() == thresh - assert u[notnull(u)].max() == thresh - assert list(isnull(s)) == list(isnull(l)) - assert list(isnull(s)) == list(isnull(u)) + assert l[notna(l)].min() == thresh + assert u[notna(u)].max() == thresh + assert list(isna(s)) == list(isna(l)) + assert list(isna(s)) == list(isna(u)) def test_clip_against_series(self): # GH #6966 @@ -1202,14 +1202,14 @@ def test_timedelta64_analytics(self): def test_idxmin(self): # test idxmin - # _check_stat_op approach can not be used here because of isnull check. + # _check_stat_op approach can not be used here because of isna check. # add some NaNs self.series[5:15] = np.NaN # skipna or no assert self.series[self.series.idxmin()] == self.series.min() - assert isnull(self.series.idxmin(skipna=False)) + assert isna(self.series.idxmin(skipna=False)) # no NaNs nona = self.series.dropna() @@ -1219,7 +1219,7 @@ def test_idxmin(self): # all NaNs allna = self.series * nan - assert isnull(allna.idxmin()) + assert isna(allna.idxmin()) # datetime64[ns] from pandas import date_range @@ -1244,14 +1244,14 @@ def test_numpy_argmin(self): def test_idxmax(self): # test idxmax - # _check_stat_op approach can not be used here because of isnull check. + # _check_stat_op approach can not be used here because of isna check. # add some NaNs self.series[5:15] = np.NaN # skipna or no assert self.series[self.series.idxmax()] == self.series.max() - assert isnull(self.series.idxmax(skipna=False)) + assert isna(self.series.idxmax(skipna=False)) # no NaNs nona = self.series.dropna() @@ -1261,7 +1261,7 @@ def test_idxmax(self): # all NaNs allna = self.series * nan - assert isnull(allna.idxmax()) + assert isna(allna.idxmax()) from pandas import date_range s = Series(date_range('20130102', periods=6)) @@ -1307,7 +1307,7 @@ def test_ptp(self): # GH11163 s = Series([3, 5, np.nan, -3, 10]) assert s.ptp() == 13 - assert pd.isnull(s.ptp(skipna=False)) + assert pd.isna(s.ptp(skipna=False)) mi = pd.MultiIndex.from_product([['a', 'b'], [1, 2, 3]]) s = pd.Series([1, np.nan, 7, 3, 5, np.nan], index=mi) diff --git a/pandas/tests/series/test_apply.py b/pandas/tests/series/test_apply.py index 2c5f0d7772cc2..e3be5427588b3 100644 --- a/pandas/tests/series/test_apply.py +++ b/pandas/tests/series/test_apply.py @@ -8,7 +8,7 @@ import numpy as np import pandas as pd -from pandas import (Index, Series, DataFrame, isnull) +from pandas import (Index, Series, DataFrame, isna) from pandas.compat import lrange from pandas import compat from pandas.util.testing import assert_series_equal, assert_frame_equal @@ -393,8 +393,8 @@ def test_map_int(self): merged = left.map(right) assert merged.dtype == np.float_ - assert isnull(merged['d']) - assert not isnull(merged['c']) + assert isna(merged['d']) + assert not isna(merged['c']) def test_map_type_inference(self): s = Series(lrange(3)) diff --git a/pandas/tests/series/test_asof.py b/pandas/tests/series/test_asof.py index 1f62d618b20e1..3104d85601434 100644 --- a/pandas/tests/series/test_asof.py +++ b/pandas/tests/series/test_asof.py @@ -3,8 +3,8 @@ import pytest import numpy as np -from pandas import (offsets, Series, notnull, - isnull, date_range, Timestamp) +from pandas import (offsets, Series, notna, + isna, date_range, Timestamp) import pandas.util.testing as tm @@ -23,12 +23,12 @@ def test_basic(self): dates = date_range('1/1/1990', periods=N * 3, freq='25s') result = ts.asof(dates) - assert notnull(result).all() + assert notna(result).all() lb = ts.index[14] ub = ts.index[30] result = ts.asof(list(dates)) - assert notnull(result).all() + assert notna(result).all() lb = ts.index[14] ub = ts.index[30] @@ -98,12 +98,12 @@ def test_periodindex(self): dates = date_range('1/1/1990', periods=N * 3, freq='37min') result = ts.asof(dates) - assert notnull(result).all() + assert notna(result).all() lb = ts.index[14] ub = ts.index[30] result = ts.asof(list(dates)) - assert notnull(result).all() + assert notna(result).all() lb = ts.index[14] ub = ts.index[30] @@ -130,7 +130,7 @@ def test_periodindex(self): # no as of value d = ts.index[0].to_timestamp() - offsets.BDay() - assert isnull(ts.asof(d)) + assert isna(ts.asof(d)) def test_errors(self): @@ -170,7 +170,7 @@ def test_all_nans(self): # testing scalar input date = date_range('1/1/1990', periods=N * 3, freq='25s')[0] result = Series(np.nan, index=rng).asof(date) - assert isnull(result) + assert isna(result) # test name is propagated result = Series(np.nan, index=[1, 2, 3, 4], name='test').asof([4, 5]) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index a916c42c007f9..3b95c2803dd9e 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -13,7 +13,7 @@ from pandas.core.dtypes.common import ( is_categorical_dtype, is_datetime64tz_dtype) -from pandas import (Index, Series, isnull, date_range, +from pandas import (Index, Series, isna, date_range, NaT, period_range, MultiIndex, IntervalIndex) from pandas.core.indexes.datetimes import Timestamp, DatetimeIndex @@ -348,22 +348,22 @@ def test_constructor_datetimes_with_nulls(self): def test_constructor_dtype_datetime64(self): s = Series(iNaT, dtype='M8[ns]', index=lrange(5)) - assert isnull(s).all() + assert isna(s).all() # in theory this should be all nulls, but since # we are not specifying a dtype is ambiguous s = Series(iNaT, index=lrange(5)) - assert not isnull(s).all() + assert not isna(s).all() s = Series(nan, dtype='M8[ns]', index=lrange(5)) - assert isnull(s).all() + assert isna(s).all() s = Series([datetime(2001, 1, 2, 0, 0), iNaT], dtype='M8[ns]') - assert isnull(s[1]) + assert isna(s[1]) assert s.dtype == 'M8[ns]' s = Series([datetime(2001, 1, 2, 0, 0), nan], dtype='M8[ns]') - assert isnull(s[1]) + assert isna(s[1]) assert s.dtype == 'M8[ns]' # GH3416 @@ -760,10 +760,10 @@ def test_NaT_scalar(self): series = Series([0, 1000, 2000, iNaT], dtype='M8[ns]') val = series[3] - assert isnull(val) + assert isna(val) series[2] = val - assert isnull(series[2]) + assert isna(series[2]) def test_NaT_cast(self): # GH10747 diff --git a/pandas/tests/series/test_indexing.py b/pandas/tests/series/test_indexing.py index 23283733c492a..45a92f6d6f50b 100644 --- a/pandas/tests/series/test_indexing.py +++ b/pandas/tests/series/test_indexing.py @@ -11,7 +11,7 @@ import pandas._libs.index as _index from pandas.core.dtypes.common import is_integer, is_scalar -from pandas import (Index, Series, DataFrame, isnull, +from pandas import (Index, Series, DataFrame, isna, date_range, NaT, MultiIndex, Timestamp, DatetimeIndex, Timedelta) from pandas.core.indexing import IndexingError @@ -254,7 +254,7 @@ def test_getitem_boolean(self): def test_getitem_boolean_empty(self): s = Series([], dtype=np.int64) s.index.name = 'index_name' - s = s[s.isnull()] + s = s[s.isna()] assert s.index.name == 'index_name' assert s.dtype == np.int64 @@ -1190,11 +1190,11 @@ def f(): s = Series(range(10)).astype(float) s[8] = None result = s[8] - assert isnull(result) + assert isna(result) s = Series(range(10)).astype(float) s[s > 8] = None - result = s[isnull(s)] + result = s[isna(s)] expected = Series(np.nan, index=[9]) assert_series_equal(result, expected) @@ -1988,7 +1988,7 @@ def test_reindex_series_add_nat(self): result = series.reindex(lrange(15)) assert np.issubdtype(result.dtype, np.dtype('M8[ns]')) - mask = result.isnull() + mask = result.isna() assert mask[-5:].all() assert not mask[:-5].any() @@ -2114,7 +2114,7 @@ def test_reindex_bool_pad(self): ts = self.ts[5:] bool_ts = Series(np.zeros(len(ts), dtype=bool), index=ts.index) filled_bool = bool_ts.reindex(self.ts.index, method='pad') - assert isnull(filled_bool[:5]).all() + assert isna(filled_bool[:5]).all() def test_reindex_like(self): other = self.ts[::2] diff --git a/pandas/tests/series/test_missing.py b/pandas/tests/series/test_missing.py index 24dd90e40fa35..2d20ac9685914 100644 --- a/pandas/tests/series/test_missing.py +++ b/pandas/tests/series/test_missing.py @@ -11,7 +11,7 @@ import numpy as np import pandas as pd -from pandas import (Series, DataFrame, isnull, date_range, +from pandas import (Series, DataFrame, isna, date_range, MultiIndex, Index, Timestamp, NaT, IntervalIndex) from pandas.compat import range from pandas._libs.tslib import iNaT @@ -159,7 +159,7 @@ def test_datetime64_tz_fillna(self): Timestamp('2011-01-02 10:00')]) tm.assert_series_equal(expected, result) # check s is not changed - tm.assert_series_equal(pd.isnull(s), null_loc) + tm.assert_series_equal(pd.isna(s), null_loc) result = s.fillna(pd.Timestamp('2011-01-02 10:00', tz=tz)) expected = Series([Timestamp('2011-01-01 10:00'), @@ -167,14 +167,14 @@ def test_datetime64_tz_fillna(self): Timestamp('2011-01-03 10:00'), Timestamp('2011-01-02 10:00', tz=tz)]) tm.assert_series_equal(expected, result) - tm.assert_series_equal(pd.isnull(s), null_loc) + tm.assert_series_equal(pd.isna(s), null_loc) result = s.fillna('AAA') expected = Series([Timestamp('2011-01-01 10:00'), 'AAA', Timestamp('2011-01-03 10:00'), 'AAA'], dtype=object) tm.assert_series_equal(expected, result) - tm.assert_series_equal(pd.isnull(s), null_loc) + tm.assert_series_equal(pd.isna(s), null_loc) result = s.fillna({1: pd.Timestamp('2011-01-02 10:00', tz=tz), 3: pd.Timestamp('2011-01-04 10:00')}) @@ -183,7 +183,7 @@ def test_datetime64_tz_fillna(self): Timestamp('2011-01-03 10:00'), Timestamp('2011-01-04 10:00')]) tm.assert_series_equal(expected, result) - tm.assert_series_equal(pd.isnull(s), null_loc) + tm.assert_series_equal(pd.isna(s), null_loc) result = s.fillna({1: pd.Timestamp('2011-01-02 10:00'), 3: pd.Timestamp('2011-01-04 10:00')}) @@ -192,14 +192,14 @@ def test_datetime64_tz_fillna(self): Timestamp('2011-01-03 10:00'), Timestamp('2011-01-04 10:00')]) tm.assert_series_equal(expected, result) - tm.assert_series_equal(pd.isnull(s), null_loc) + tm.assert_series_equal(pd.isna(s), null_loc) # DatetimeBlockTZ idx = pd.DatetimeIndex(['2011-01-01 10:00', pd.NaT, '2011-01-03 10:00', pd.NaT], tz=tz) s = pd.Series(idx) assert s.dtype == 'datetime64[ns, {0}]'.format(tz) - tm.assert_series_equal(pd.isnull(s), null_loc) + tm.assert_series_equal(pd.isna(s), null_loc) result = s.fillna(pd.Timestamp('2011-01-02 10:00')) expected = Series([Timestamp('2011-01-01 10:00', tz=tz), @@ -207,7 +207,7 @@ def test_datetime64_tz_fillna(self): Timestamp('2011-01-03 10:00', tz=tz), Timestamp('2011-01-02 10:00')]) tm.assert_series_equal(expected, result) - tm.assert_series_equal(pd.isnull(s), null_loc) + tm.assert_series_equal(pd.isna(s), null_loc) result = s.fillna(pd.Timestamp('2011-01-02 10:00', tz=tz)) idx = pd.DatetimeIndex(['2011-01-01 10:00', '2011-01-02 10:00', @@ -215,7 +215,7 @@ def test_datetime64_tz_fillna(self): tz=tz) expected = Series(idx) tm.assert_series_equal(expected, result) - tm.assert_series_equal(pd.isnull(s), null_loc) + tm.assert_series_equal(pd.isna(s), null_loc) result = s.fillna(pd.Timestamp('2011-01-02 10:00', tz=tz).to_pydatetime()) @@ -224,14 +224,14 @@ def test_datetime64_tz_fillna(self): tz=tz) expected = Series(idx) tm.assert_series_equal(expected, result) - tm.assert_series_equal(pd.isnull(s), null_loc) + tm.assert_series_equal(pd.isna(s), null_loc) result = s.fillna('AAA') expected = Series([Timestamp('2011-01-01 10:00', tz=tz), 'AAA', Timestamp('2011-01-03 10:00', tz=tz), 'AAA'], dtype=object) tm.assert_series_equal(expected, result) - tm.assert_series_equal(pd.isnull(s), null_loc) + tm.assert_series_equal(pd.isna(s), null_loc) result = s.fillna({1: pd.Timestamp('2011-01-02 10:00', tz=tz), 3: pd.Timestamp('2011-01-04 10:00')}) @@ -240,7 +240,7 @@ def test_datetime64_tz_fillna(self): Timestamp('2011-01-03 10:00', tz=tz), Timestamp('2011-01-04 10:00')]) tm.assert_series_equal(expected, result) - tm.assert_series_equal(pd.isnull(s), null_loc) + tm.assert_series_equal(pd.isna(s), null_loc) result = s.fillna({1: pd.Timestamp('2011-01-02 10:00', tz=tz), 3: pd.Timestamp('2011-01-04 10:00', tz=tz)}) @@ -249,7 +249,7 @@ def test_datetime64_tz_fillna(self): Timestamp('2011-01-03 10:00', tz=tz), Timestamp('2011-01-04 10:00', tz=tz)]) tm.assert_series_equal(expected, result) - tm.assert_series_equal(pd.isnull(s), null_loc) + tm.assert_series_equal(pd.isna(s), null_loc) # filling with a naive/other zone, coerce to object result = s.fillna(Timestamp('20130101')) @@ -258,7 +258,7 @@ def test_datetime64_tz_fillna(self): Timestamp('2011-01-03 10:00', tz=tz), Timestamp('2013-01-01')]) tm.assert_series_equal(expected, result) - tm.assert_series_equal(pd.isnull(s), null_loc) + tm.assert_series_equal(pd.isna(s), null_loc) result = s.fillna(Timestamp('20130101', tz='US/Pacific')) expected = Series([Timestamp('2011-01-01 10:00', tz=tz), @@ -266,7 +266,7 @@ def test_datetime64_tz_fillna(self): Timestamp('2011-01-03 10:00', tz=tz), Timestamp('2013-01-01', tz='US/Pacific')]) tm.assert_series_equal(expected, result) - tm.assert_series_equal(pd.isnull(s), null_loc) + tm.assert_series_equal(pd.isna(s), null_loc) # with timezone # GH 15855 @@ -400,10 +400,10 @@ def test_fillna_nat(self): assert_frame_equal(filled, expected) assert_frame_equal(filled2, expected) - def test_isnull_for_inf(self): + def test_isna_for_inf(self): s = Series(['a', np.inf, np.nan, 1.0]) - with pd.option_context('mode.use_inf_as_null', True): - r = s.isnull() + with pd.option_context('mode.use_inf_as_na', True): + r = s.isna() dr = s.dropna() e = Series([False, True, True, False]) de = Series(['a', 1.0], index=[0, 3]) @@ -526,28 +526,28 @@ def test_timedelta64_nan(self): # nan ops on timedeltas td1 = td.copy() td1[0] = np.nan - assert isnull(td1[0]) + assert isna(td1[0]) assert td1[0].value == iNaT td1[0] = td[0] - assert not isnull(td1[0]) + assert not isna(td1[0]) td1[1] = iNaT - assert isnull(td1[1]) + assert isna(td1[1]) assert td1[1].value == iNaT td1[1] = td[1] - assert not isnull(td1[1]) + assert not isna(td1[1]) td1[2] = NaT - assert isnull(td1[2]) + assert isna(td1[2]) assert td1[2].value == iNaT td1[2] = td[2] - assert not isnull(td1[2]) + assert not isna(td1[2]) # boolean setting # this doesn't work, not sure numpy even supports it # result = td[(td>np.timedelta64(timedelta(days=3))) & # td Date: Tue, 25 Jul 2017 08:17:22 -0700 Subject: [PATCH 827/933] BUG: Thoroughly dedup columns in read_csv (#17060) --- doc/source/whatsnew/v0.21.0.txt | 2 +- pandas/_libs/parsers.pyx | 13 +++++--- pandas/io/parsers.py | 10 ++++-- pandas/tests/io/parser/common.py | 19 ------------ pandas/tests/io/parser/mangle_dupes.py | 42 ++++++++++++++++++++++++++ pandas/tests/io/parser/test_parsers.py | 4 ++- 6 files changed, 62 insertions(+), 28 deletions(-) create mode 100644 pandas/tests/io/parser/mangle_dupes.py diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 5a5ea827e74ad..f9385e2ddc994 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -277,11 +277,11 @@ Indexing I/O ^^^ +- Bug in :func:`read_csv` in which columns were not being thoroughly de-duplicated (:issue:`17060`) - Bug in :func:`read_csv` in which non integer values for the header argument generated an unhelpful / unrelated error message (:issue:`16338`) - Bug in :func:`read_csv` in which memory management issues in exception handling, under certain conditions, would cause the interpreter to segfault (:issue:`14696, :issue:`16798`). - Bug in :func:`read_csv` when called with ``low_memory=False`` in which a CSV with at least one column > 2GB in size would incorrectly raise a ``MemoryError`` (:issue:`16798`). - Bug in :func:`read_stata` where value labels could not be read when using an iterator (:issue:`16923`) - - Bug in :func:`read_html` where import check fails when run in multiple threads (:issue:`16928`) Plotting diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index 9866eff3e5f31..543a943aea311 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -788,11 +788,14 @@ cdef class TextReader: unnamed_count += 1 count = counts.get(name, 0) - if (count > 0 and self.mangle_dupe_cols - and not self.has_mi_columns): - this_header.append('%s.%d' % (name, count)) - else: - this_header.append(name) + + if not self.has_mi_columns and self.mangle_dupe_cols: + while count > 0: + counts[name] = count + 1 + name = '%s.%d' % (name, count) + count = counts.get(name, 0) + + this_header.append(name) counts[name] = count + 1 if self.has_mi_columns: diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 9cf0a11a65270..ea0bb104338b6 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -2331,10 +2331,16 @@ def _infer_columns(self): if not have_mi_columns and self.mangle_dupe_cols: counts = {} + for i, col in enumerate(this_columns): cur_count = counts.get(col, 0) - if cur_count > 0: - this_columns[i] = '%s.%d' % (col, cur_count) + + while cur_count > 0: + counts[col] = cur_count + 1 + col = "%s.%d" % (col, cur_count) + cur_count = counts.get(col, 0) + + this_columns[i] = col counts[col] = cur_count + 1 elif have_mi_columns: diff --git a/pandas/tests/io/parser/common.py b/pandas/tests/io/parser/common.py index 34ed8782b346c..cfc4a1d7c55eb 100644 --- a/pandas/tests/io/parser/common.py +++ b/pandas/tests/io/parser/common.py @@ -224,25 +224,6 @@ def test_unnamed_columns(self): Index(['A', 'B', 'C', 'Unnamed: 3', 'Unnamed: 4'])) - def test_duplicate_columns(self): - # TODO: add test for condition 'mangle_dupe_cols=False' - # once it is actually supported (gh-12935) - data = """A,A,B,B,B -1,2,3,4,5 -6,7,8,9,10 -11,12,13,14,15 -""" - - for method in ('read_csv', 'read_table'): - - # check default behavior - df = getattr(self, method)(StringIO(data), sep=',') - assert list(df.columns) == ['A', 'A.1', 'B', 'B.1', 'B.2'] - - df = getattr(self, method)(StringIO(data), sep=',', - mangle_dupe_cols=True) - assert list(df.columns) == ['A', 'A.1', 'B', 'B.1', 'B.2'] - def test_csv_mixed_type(self): data = """A,B,C a,1,2 diff --git a/pandas/tests/io/parser/mangle_dupes.py b/pandas/tests/io/parser/mangle_dupes.py new file mode 100644 index 0000000000000..70ecfe51c0f09 --- /dev/null +++ b/pandas/tests/io/parser/mangle_dupes.py @@ -0,0 +1,42 @@ +# -*- coding: utf-8 -*- + +""" +Tests that duplicate columns are handled appropriately when parsed by the +CSV engine. In general, the expected result is that they are either thoroughly +de-duplicated (if mangling requested) or ignored otherwise. +""" + +from pandas.compat import StringIO + + +class DupeColumnTests(object): + def test_basic(self): + # TODO: add test for condition "mangle_dupe_cols=False" + # once it is actually supported (gh-12935) + data = "a,a,b,b,b\n1,2,3,4,5" + + for method in ("read_csv", "read_table"): + # Check default behavior. + expected = ["a", "a.1", "b", "b.1", "b.2"] + df = getattr(self, method)(StringIO(data), sep=",") + assert list(df.columns) == expected + + df = getattr(self, method)(StringIO(data), sep=",", + mangle_dupe_cols=True) + assert list(df.columns) == expected + + def test_thorough_mangle(self): + # see gh-17060 + data = "a,a,a.1\n1,2,3" + df = self.read_csv(StringIO(data), sep=",", mangle_dupe_cols=True) + assert list(df.columns) == ["a", "a.1", "a.1.1"] + + data = "a,a,a.1,a.1.1,a.1.1.1,a.1.1.1.1\n1,2,3,4,5,6" + df = self.read_csv(StringIO(data), sep=",", mangle_dupe_cols=True) + assert list(df.columns) == ["a", "a.1", "a.1.1", "a.1.1.1", + "a.1.1.1.1", "a.1.1.1.1.1"] + + data = "a,a,a.3,a.1,a.2,a,a\n1,2,3,4,5,6,7" + df = self.read_csv(StringIO(data), sep=",", mangle_dupe_cols=True) + assert list(df.columns) == ["a", "a.1", "a.3", "a.1.1", + "a.2", "a.2.1", "a.3.1"] diff --git a/pandas/tests/io/parser/test_parsers.py b/pandas/tests/io/parser/test_parsers.py index 9bbc624dff90f..2fee2451c5e36 100644 --- a/pandas/tests/io/parser/test_parsers.py +++ b/pandas/tests/io/parser/test_parsers.py @@ -19,6 +19,7 @@ from .c_parser_only import CParserTests from .parse_dates import ParseDatesTests from .compression import CompressionTests +from .mangle_dupes import DupeColumnTests from .multithread import MultithreadTests from .python_parser_only import PythonParserTests from .dtypes import DtypeTests @@ -26,11 +27,12 @@ class BaseParser(CommentTests, CompressionTests, ConverterTests, DialectTests, + DtypeTests, DupeColumnTests, HeaderTests, IndexColTests, MultithreadTests, NAvaluesTests, ParseDatesTests, ParserTests, SkipRowsTests, UsecolsTests, - QuotingTests, DtypeTests): + QuotingTests): def read_csv(self, *args, **kwargs): raise NotImplementedError From 13b57cd6ace5b0288431ec13e54c1066e952adea Mon Sep 17 00:00:00 2001 From: Phillip Cloud Date: Tue, 25 Jul 2017 13:27:45 -0400 Subject: [PATCH 828/933] ENH: Add skipna parameter to infer_dtype (#17066) Currently defaults to False for backwards compatibility. Will default to True in the future. Closes gh-17059. --- doc/source/whatsnew/v0.21.0.txt | 2 + pandas/_libs/src/inference.pyx | 548 +++++++++++++++----------- pandas/tests/dtypes/test_inference.py | 33 +- 3 files changed, 358 insertions(+), 225 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index f9385e2ddc994..aed00ca578984 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -24,6 +24,8 @@ New features `_ on most readers and writers (:issue:`13823`) - Added ``__fspath__`` method to :class:`~pandas.HDFStore`, :class:`~pandas.ExcelFile`, and :class:`~pandas.ExcelWriter` to work properly with the file system path protocol (:issue:`13823`) +- Added ``skipna`` parameter to :func:`~pandas.api.types.infer_dtype` to + support type inference in the presence of missing values (:issue:`17059`). .. _whatsnew_0210.enhancements.infer_objects: diff --git a/pandas/_libs/src/inference.pyx b/pandas/_libs/src/inference.pyx index 38e95fe6ee652..6b5a8f20f0067 100644 --- a/pandas/_libs/src/inference.pyx +++ b/pandas/_libs/src/inference.pyx @@ -1,6 +1,7 @@ import sys from decimal import Decimal cimport util +cimport cython from tslib import NaT, get_timezone from datetime import datetime, timedelta iNaT = util.get_nat() @@ -222,7 +223,7 @@ cdef _try_infer_map(v): return None -def infer_dtype(object value): +def infer_dtype(object value, bint skipna=False): """ Effeciently infer the type of a passed val, or list-like array of values. Return a string describing the type. @@ -230,6 +231,11 @@ def infer_dtype(object value): Parameters ---------- value : scalar, list, ndarray, or pandas type + skipna : bool, default False + Ignore NaN values when inferring the type. The default of ``False`` + will be deprecated in a later version of pandas. + + .. versionadded:: 0.21.0 Returns ------- @@ -272,6 +278,12 @@ def infer_dtype(object value): >>> infer_dtype(['foo', 'bar']) 'string' + >>> infer_dtype(['a', np.nan, 'b'], skipna=True) + 'string' + + >>> infer_dtype(['a', np.nan, 'b'], skipna=False) + 'mixed' + >>> infer_dtype([b'foo', b'bar']) 'bytes' @@ -310,13 +322,13 @@ def infer_dtype(object value): >>> infer_dtype(pd.Series(list('aabc')).astype('category')) 'categorical' - """ cdef: Py_ssize_t i, n object val ndarray values - bint seen_pdnat = False, seen_val = False + bint seen_pdnat = False + bint seen_val = False if isinstance(value, np.ndarray): values = value @@ -356,7 +368,7 @@ def infer_dtype(object value): values = values.ravel() # try to use a valid value - for i from 0 <= i < n: + for i in range(n): val = util.get_value_1d(values, i) # do not use is_nul_datetimelike to keep @@ -403,11 +415,11 @@ def infer_dtype(object value): return 'datetime' elif is_date(val): - if is_date_array(values): + if is_date_array(values, skipna=skipna): return 'date' elif is_time(val): - if is_time_array(values): + if is_time_array(values, skipna=skipna): return 'time' elif is_decimal(val): @@ -420,19 +432,19 @@ def infer_dtype(object value): return 'mixed-integer-float' elif util.is_bool_object(val): - if is_bool_array(values): + if is_bool_array(values, skipna=skipna): return 'boolean' elif PyString_Check(val): - if is_string_array(values): + if is_string_array(values, skipna=skipna): return 'string' elif PyUnicode_Check(val): - if is_unicode_array(values): + if is_unicode_array(values, skipna=skipna): return 'unicode' elif PyBytes_Check(val): - if is_bytes_array(values): + if is_bytes_array(values, skipna=skipna): return 'bytes' elif is_period(val): @@ -593,190 +605,284 @@ cdef inline bint is_timedelta(object o): return PyDelta_Check(o) or util.is_timedelta64_object(o) -cpdef bint is_bool_array(ndarray values): - cdef: - Py_ssize_t i, n = len(values) - ndarray[object] objbuf +cdef class Validator: - if issubclass(values.dtype.type, np.bool_): - return True - elif values.dtype == np.object_: - objbuf = values + cdef: + Py_ssize_t n + np.dtype dtype + bint skipna + + def __cinit__( + self, + Py_ssize_t n, + np.dtype dtype=np.dtype(np.object_), + bint skipna=False + ): + self.n = n + self.dtype = dtype + self.skipna = skipna + + cdef bint validate(self, object[:] values) except -1: + if not self.n: + return False - if n == 0: + if self.is_array_typed(): + return True + elif self.dtype.type_num == NPY_OBJECT: + if self.skipna: + return self._validate_skipna(values) + else: + return self._validate(values) + else: return False + @cython.wraparound(False) + @cython.boundscheck(False) + cdef bint _validate(self, object[:] values) except -1: + cdef: + Py_ssize_t i + Py_ssize_t n = self.n + for i in range(n): - if not util.is_bool_object(objbuf[i]): + if not self.is_valid(values[i]): return False - return True - else: + + return self.finalize_validate() + + @cython.wraparound(False) + @cython.boundscheck(False) + cdef bint _validate_skipna(self, object[:] values) except -1: + cdef: + Py_ssize_t i + Py_ssize_t n = self.n + + for i in range(n): + if not self.is_valid_skipna(values[i]): + return False + + return self.finalize_validate_skipna() + + cdef bint is_valid(self, object value) except -1: + return self.is_value_typed(value) + + cdef bint is_valid_skipna(self, object value) except -1: + return self.is_valid(value) or self.is_valid_null(value) + + cdef bint is_value_typed(self, object value) except -1: + raise NotImplementedError( + '{} child class must define is_value_typed'.format( + type(self).__name__ + ) + ) + + cdef bint is_valid_null(self, object value) except -1: + return util._checknull(value) + + cdef bint is_array_typed(self) except -1: return False + cdef inline bint finalize_validate(self): + return True + + cdef bint finalize_validate_skipna(self): + # TODO(phillipc): Remove the existing validate methods and replace them + # with the skipna versions upon full deprecation of skipna=False + return True + + +cdef class BoolValidator(Validator): + + cdef inline bint is_value_typed(self, object value) except -1: + return util.is_bool_object(value) + + cdef inline bint is_array_typed(self) except -1: + return issubclass(self.dtype.type, np.bool_) + + +cpdef bint is_bool_array(ndarray values, bint skipna=False): + cdef: + BoolValidator validator = BoolValidator( + len(values), + values.dtype, + skipna=skipna + ) + return validator.validate(values) + + +cdef class IntegerValidator(Validator): + + cdef inline bint is_value_typed(self, object value) except -1: + return util.is_integer_object(value) + + cdef inline bint is_array_typed(self) except -1: + return issubclass(self.dtype.type, np.integer) + cpdef bint is_integer_array(ndarray values): cdef: - Py_ssize_t i, n = len(values) - ndarray[object] objbuf + IntegerValidator validator = IntegerValidator( + len(values), + values.dtype, + ) + return validator.validate(values) - if issubclass(values.dtype.type, np.integer): - return True - elif values.dtype == np.object_: - objbuf = values - if n == 0: - return False +cdef class IntegerFloatValidator(Validator): - for i in range(n): - if not util.is_integer_object(objbuf[i]): - return False - return True - else: - return False + cdef inline bint is_value_typed(self, object value) except -1: + return util.is_integer_object(value) or util.is_float_object(value) + + cdef inline bint is_array_typed(self) except -1: + return issubclass(self.dtype.type, np.integer) cpdef bint is_integer_float_array(ndarray values): cdef: - Py_ssize_t i, n = len(values) - ndarray[object] objbuf + IntegerFloatValidator validator = IntegerFloatValidator( + len(values), + values.dtype, + ) + return validator.validate(values) - if issubclass(values.dtype.type, np.integer): - return True - elif values.dtype == np.object_: - objbuf = values - if n == 0: - return False +cdef class FloatValidator(Validator): - for i in range(n): - if not (util.is_integer_object(objbuf[i]) or - util.is_float_object(objbuf[i])): + cdef inline bint is_value_typed(self, object value) except -1: + return util.is_float_object(value) - return False - return True - else: - return False + cdef inline bint is_array_typed(self) except -1: + return issubclass(self.dtype.type, np.floating) cpdef bint is_float_array(ndarray values): - cdef: - Py_ssize_t i, n = len(values) - ndarray[object] objbuf + cdef FloatValidator validator = FloatValidator(len(values), values.dtype) + return validator.validate(values) - if issubclass(values.dtype.type, np.floating): - return True - elif values.dtype == np.object_: - objbuf = values - if n == 0: - return False +cdef class StringValidator(Validator): - for i in range(n): - if not util.is_float_object(objbuf[i]): - return False - return True - else: - return False + cdef inline bint is_value_typed(self, object value) except -1: + return PyString_Check(value) + + cdef inline bint is_array_typed(self) except -1: + return issubclass(self.dtype.type, np.str_) -cpdef bint is_string_array(ndarray values): +cpdef bint is_string_array(ndarray values, bint skipna=False): cdef: - Py_ssize_t i, n = len(values) - ndarray[object] objbuf + StringValidator validator = StringValidator( + len(values), + values.dtype, + skipna=skipna, + ) + return validator.validate(values) - if ((PY2 and issubclass(values.dtype.type, np.string_)) or - not PY2 and issubclass(values.dtype.type, np.unicode_)): - return True - elif values.dtype == np.object_: - objbuf = values - if n == 0: - return False +cdef class UnicodeValidator(Validator): - for i in range(n): - if not PyString_Check(objbuf[i]): - return False - return True - else: - return False + cdef inline bint is_value_typed(self, object value) except -1: + return PyUnicode_Check(value) + + cdef inline bint is_array_typed(self) except -1: + return issubclass(self.dtype.type, np.unicode_) -cpdef bint is_unicode_array(ndarray values): +cpdef bint is_unicode_array(ndarray values, bint skipna=False): cdef: - Py_ssize_t i, n = len(values) - ndarray[object] objbuf + UnicodeValidator validator = UnicodeValidator( + len(values), + values.dtype, + skipna=skipna, + ) + return validator.validate(values) - if issubclass(values.dtype.type, np.unicode_): - return True - elif values.dtype == np.object_: - objbuf = values - if n == 0: - return False +cdef class BytesValidator(Validator): - for i in range(n): - if not PyUnicode_Check(objbuf[i]): - return False - return True - else: - return False + cdef inline bint is_value_typed(self, object value) except -1: + return PyBytes_Check(value) + + cdef inline bint is_array_typed(self) except -1: + return issubclass(self.dtype.type, np.bytes_) -cpdef bint is_bytes_array(ndarray values): +cpdef bint is_bytes_array(ndarray values, bint skipna=False): cdef: - Py_ssize_t i, n = len(values) - ndarray[object] objbuf + BytesValidator validator = BytesValidator( + len(values), + values.dtype, + skipna=skipna + ) + return validator.validate(values) - if issubclass(values.dtype.type, np.bytes_): - return True - elif values.dtype == np.object_: - objbuf = values - if n == 0: - return False +cdef class TemporalValidator(Validator): + + cdef Py_ssize_t generic_null_count + + def __cinit__( + self, + Py_ssize_t n, + np.dtype dtype=np.dtype(np.object_), + bint skipna=False + ): + self.n = n + self.dtype = dtype + self.skipna = skipna + self.generic_null_count = 0 + + cdef inline bint is_valid(self, object value) except -1: + return self.is_value_typed(value) or self.is_valid_null(value) + + cdef bint is_valid_null(self, object value) except -1: + raise NotImplementedError( + '{} child class must define is_valid_null'.format( + type(self).__name__ + ) + ) + + cdef inline bint is_valid_skipna(self, object value) except -1: + cdef: + bint is_typed_null = self.is_valid_null(value) + bint is_generic_null = util._checknull(value) + self.generic_null_count += is_typed_null and is_generic_null + return self.is_value_typed(value) or is_typed_null or is_generic_null + + cdef inline bint finalize_validate_skipna(self): + return self.generic_null_count != self.n - for i in range(n): - if not PyBytes_Check(objbuf[i]): - return False - return True - else: - return False + +cdef class DatetimeValidator(TemporalValidator): + + cdef bint is_value_typed(self, object value) except -1: + return is_datetime(value) + + cdef inline bint is_valid_null(self, object value) except -1: + return is_null_datetime64(value) cpdef bint is_datetime_array(ndarray[object] values): - cdef Py_ssize_t i, null_count = 0, n = len(values) - cdef object v - if n == 0: - return False + cdef: + DatetimeValidator validator = DatetimeValidator( + len(values), + skipna=True, + ) + return validator.validate(values) - # return False for all nulls - for i in range(n): - v = values[i] - if is_null_datetime64(v): - # we are a regular null - if util._checknull(v): - null_count += 1 - elif not is_datetime(v): - return False - return null_count != n +cdef class Datetime64Validator(DatetimeValidator): -cpdef bint is_datetime64_array(ndarray values): - cdef Py_ssize_t i, null_count = 0, n = len(values) - cdef object v - if n == 0: - return False + cdef inline bint is_value_typed(self, object value) except -1: + return util.is_datetime64_object(value) - # return False for all nulls - for i in range(n): - v = values[i] - if is_null_datetime64(v): - # we are a regular null - if util._checknull(v): - null_count += 1 - elif not util.is_datetime64_object(v): - return False - return null_count != n + +cpdef bint is_datetime64_array(ndarray values): + cdef: + Datetime64Validator validator = Datetime64Validator( + len(values), + skipna=True, + ) + return validator.validate(values) cpdef bint is_datetime_with_singletz_array(ndarray[object] values): @@ -807,108 +913,104 @@ cpdef bint is_datetime_with_singletz_array(ndarray[object] values): return True +cdef class TimedeltaValidator(TemporalValidator): + + cdef bint is_value_typed(self, object value) except -1: + return PyDelta_Check(value) + + cdef inline bint is_valid_null(self, object value) except -1: + return is_null_timedelta64(value) + + cpdef bint is_timedelta_array(ndarray values): - cdef Py_ssize_t i, null_count = 0, n = len(values) - cdef object v - if n == 0: - return False - for i in range(n): - v = values[i] - if is_null_timedelta64(v): - # we are a regular null - if util._checknull(v): - null_count += 1 - elif not PyDelta_Check(v): - return False - return null_count != n + cdef: + TimedeltaValidator validator = TimedeltaValidator( + len(values), + skipna=True, + ) + return validator.validate(values) + + +cdef class Timedelta64Validator(TimedeltaValidator): + + cdef inline bint is_value_typed(self, object value) except -1: + return util.is_timedelta64_object(value) cpdef bint is_timedelta64_array(ndarray values): - cdef Py_ssize_t i, null_count = 0, n = len(values) - cdef object v - if n == 0: - return False - for i in range(n): - v = values[i] - if is_null_timedelta64(v): - # we are a regular null - if util._checknull(v): - null_count += 1 - elif not util.is_timedelta64_object(v): - return False - return null_count != n + cdef: + Timedelta64Validator validator = Timedelta64Validator( + len(values), + skipna=True, + ) + return validator.validate(values) + + +cdef class AnyTimedeltaValidator(TimedeltaValidator): + + cdef inline bint is_value_typed(self, object value) except -1: + return is_timedelta(value) cpdef bint is_timedelta_or_timedelta64_array(ndarray values): """ infer with timedeltas and/or nat/none """ - cdef Py_ssize_t i, null_count = 0, n = len(values) - cdef object v - if n == 0: - return False - for i in range(n): - v = values[i] - if is_null_timedelta64(v): - # we are a regular null - if util._checknull(v): - null_count += 1 - elif not is_timedelta(v): - return False - return null_count != n + cdef: + AnyTimedeltaValidator validator = AnyTimedeltaValidator( + len(values), + skipna=True, + ) + return validator.validate(values) -cpdef bint is_date_array(ndarray[object] values): - cdef Py_ssize_t i, n = len(values) - if n == 0: - return False - for i in range(n): - if not is_date(values[i]): - return False - return True +cdef class DateValidator(Validator): + cdef inline bint is_value_typed(self, object value) except -1: + return is_date(value) + + +cpdef bint is_date_array(ndarray[object] values, bint skipna=False): + cdef DateValidator validator = DateValidator(len(values), skipna=skipna) + return validator.validate(values) -cpdef bint is_time_array(ndarray[object] values): - cdef Py_ssize_t i, n = len(values) - if n == 0: - return False - for i in range(n): - if not is_time(values[i]): - return False - return True + +cdef class TimeValidator(Validator): + + cdef inline bint is_value_typed(self, object value) except -1: + return is_time(value) + + +cpdef bint is_time_array(ndarray[object] values, bint skipna=False): + cdef TimeValidator validator = TimeValidator(len(values), skipna=skipna) + return validator.validate(values) + + +cdef class PeriodValidator(TemporalValidator): + + cdef inline bint is_value_typed(self, object value) except -1: + return is_period(value) + + cdef inline bint is_valid_null(self, object value) except -1: + return is_null_period(value) cpdef bint is_period_array(ndarray[object] values): - cdef Py_ssize_t i, null_count = 0, n = len(values) - cdef object v - if n == 0: - return False + cdef PeriodValidator validator = PeriodValidator(len(values), skipna=True) + return validator.validate(values) - # return False for all nulls - for i in range(n): - v = values[i] - if is_null_period(v): - # we are a regular null - if util._checknull(v): - null_count += 1 - elif not is_period(v): - return False - return null_count != n + +cdef class IntervalValidator(Validator): + + cdef inline bint is_value_typed(self, object value) except -1: + return is_interval(value) cpdef bint is_interval_array(ndarray[object] values): cdef: - Py_ssize_t i, n = len(values), null_count = 0 - object v - - if n == 0: - return False - for i in range(n): - v = values[i] - if util._checknull(v): - null_count += 1 - continue - if not is_interval(v): - return False - return null_count != n + IntervalValidator validator = IntervalValidator( + len(values), + skipna=True, + ) + return validator.validate(values) cdef extern from "parse_helper.h": diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index d26ea047bb41f..dbde7ae5081d4 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -239,6 +239,9 @@ def test_infer_dtype_bytes(self): arr = arr.astype(object) assert lib.infer_dtype(arr) == compare + # object array of bytes with missing values + assert lib.infer_dtype([b'a', np.nan, b'c'], skipna=True) == compare + def test_isinf_scalar(self): # GH 11352 assert lib.isposinf_scalar(float('inf')) @@ -444,6 +447,10 @@ def test_bools(self): result = lib.infer_dtype(arr) assert result == 'boolean' + arr = np.array([True, np.nan, False], dtype='O') + result = lib.infer_dtype(arr, skipna=True) + assert result == 'boolean' + def test_floats(self): arr = np.array([1., 2., 3., np.float64(4), np.float32(5)], dtype='O') result = lib.infer_dtype(arr) @@ -472,11 +479,26 @@ def test_decimals(self): result = lib.infer_dtype(arr) assert result == 'mixed' + arr = np.array([Decimal(1), Decimal('NaN'), Decimal(3)]) + result = lib.infer_dtype(arr) + assert result == 'decimal' + + arr = np.array([Decimal(1), np.nan, Decimal(3)], dtype='O') + result = lib.infer_dtype(arr) + assert result == 'decimal' + def test_string(self): pass def test_unicode(self): - pass + arr = [u'a', np.nan, u'c'] + result = lib.infer_dtype(arr) + assert result == 'mixed' + + arr = [u'a', np.nan, u'c'] + result = lib.infer_dtype(arr, skipna=True) + expected = 'unicode' if PY2 else 'string' + assert result == expected def test_datetime(self): @@ -714,10 +736,17 @@ def test_is_datetimelike_array_all_nan_nat_like(self): def test_date(self): - dates = [date(2012, 1, x) for x in range(1, 20)] + dates = [date(2012, 1, day) for day in range(1, 20)] index = Index(dates) assert index.inferred_type == 'date' + dates = [date(2012, 1, day) for day in range(1, 20)] + [np.nan] + result = lib.infer_dtype(dates) + assert result == 'mixed' + + result = lib.infer_dtype(dates, skipna=True) + assert result == 'date' + def test_to_object_array_tuples(self): r = (5, 6) values = [r] From a7b4a9c7eed794872c5d7dcc558a10ff9f076682 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Tue, 25 Jul 2017 21:08:42 -0700 Subject: [PATCH 829/933] MAINT: Remove unused variable in test_scalar.py The "expected" variable is unused at the end of a test in indexing/test_scalar.py --- pandas/tests/indexing/test_scalar.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/indexing/test_scalar.py b/pandas/tests/indexing/test_scalar.py index 5dd1714b903eb..7314ff6619049 100644 --- a/pandas/tests/indexing/test_scalar.py +++ b/pandas/tests/indexing/test_scalar.py @@ -122,7 +122,6 @@ def test_imethods_with_dups(self): tm.assert_series_equal(result, expected) result = df.iat[2, 0] - expected = 2 assert result == 2 def test_at_to_fail(self): From e2588d9a88805cc12754d7271356ddcc6ab22338 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Tue, 25 Jul 2017 22:54:35 -0700 Subject: [PATCH 830/933] TST: Add tests/indexing/ and reshape/ to setup.py (#17076) Looks like we just forgot about them. Oops. --- setup.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/setup.py b/setup.py index 31a3cddc3f9fd..d5791862cfb19 100755 --- a/setup.py +++ b/setup.py @@ -665,6 +665,7 @@ def pxd(name): 'pandas.tests.computation', 'pandas.tests.sparse', 'pandas.tests.frame', + 'pandas.tests.indexing', 'pandas.tests.indexes', 'pandas.tests.indexes.datetimes', 'pandas.tests.indexes.timedeltas', @@ -676,6 +677,7 @@ def pxd(name): 'pandas.tests.io.msgpack', 'pandas.tests.io.formats', 'pandas.tests.groupby', + 'pandas.tests.reshape', 'pandas.tests.series', 'pandas.tests.scalar', 'pandas.tests.tseries', From f9a552dc12262d1d208f9cdc2c5ffd1731f9c361 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 26 Jul 2017 06:29:23 -0400 Subject: [PATCH 831/933] CI: partially revert #17065, un-pin pyarrow on some builds --- ci/requirements-2.7_BUILD_TEST.sh | 2 +- ci/requirements-3.6.run | 2 +- ci/requirements-3.6_DOC.sh | 2 +- ci/requirements-3.6_WIN.run | 2 +- pandas/util/_print_versions.py | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/ci/requirements-2.7_BUILD_TEST.sh b/ci/requirements-2.7_BUILD_TEST.sh index 999651624be6b..78941fd0944e5 100755 --- a/ci/requirements-2.7_BUILD_TEST.sh +++ b/ci/requirements-2.7_BUILD_TEST.sh @@ -4,4 +4,4 @@ source activate pandas echo "install 27 BUILD_TEST" -conda install -n pandas -c conda-forge pyarrow=0.4.1 dask +conda install -n pandas -c conda-forge pyarrow dask diff --git a/ci/requirements-3.6.run b/ci/requirements-3.6.run index a4f5f073856c8..ef66ebeb336f3 100644 --- a/ci/requirements-3.6.run +++ b/ci/requirements-3.6.run @@ -15,7 +15,7 @@ jinja2 sqlalchemy pymysql feather-format -pyarrow=0.4.1 +pyarrow # psycopg2 (not avail on defaults ATM) beautifulsoup4 s3fs diff --git a/ci/requirements-3.6_DOC.sh b/ci/requirements-3.6_DOC.sh index 58d968a92ee9d..8c10a794a13b9 100644 --- a/ci/requirements-3.6_DOC.sh +++ b/ci/requirements-3.6_DOC.sh @@ -6,6 +6,6 @@ echo "[install DOC_BUILD deps]" pip install pandas-gbq -conda install -n pandas -c conda-forge feather-format pyarrow=0.4.1 nbsphinx pandoc +conda install -n pandas -c conda-forge feather-format pyarrow nbsphinx pandoc conda install -n pandas -c r r rpy2 --yes diff --git a/ci/requirements-3.6_WIN.run b/ci/requirements-3.6_WIN.run index cc9c07d8a2716..22aae8944d731 100644 --- a/ci/requirements-3.6_WIN.run +++ b/ci/requirements-3.6_WIN.run @@ -8,7 +8,7 @@ xlrd xlwt scipy feather-format -pyarrow=0.4.1 +pyarrow numexpr pytables matplotlib diff --git a/pandas/util/_print_versions.py b/pandas/util/_print_versions.py index f3888c3bffca8..48b19b02e297e 100644 --- a/pandas/util/_print_versions.py +++ b/pandas/util/_print_versions.py @@ -69,6 +69,7 @@ def show_versions(as_json=False): ("Cython", lambda mod: mod.__version__), ("numpy", lambda mod: mod.version.version), ("scipy", lambda mod: mod.version.version), + ("pyarrow", lambda mod: mod.__version__), ("xarray", lambda mod: mod.__version__), ("IPython", lambda mod: mod.__version__), ("sphinx", lambda mod: mod.__version__), @@ -95,7 +96,6 @@ def show_versions(as_json=False): ("s3fs", lambda mod: mod.__version__), ("pandas_gbq", lambda mod: mod.__version__), ("pandas_datareader", lambda mod: mod.__version__), - ("pyarrow", lambda mod: mod.__version__), ] deps_blob = list() From 9de416aa1445deac056972be537846420cd0a7c6 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 26 Jul 2017 19:38:42 -0400 Subject: [PATCH 832/933] DOC: whatsnew typos --- doc/source/whatsnew/v0.21.0.txt | 70 ++++++++++++++++----------------- 1 file changed, 34 insertions(+), 36 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index aed00ca578984..5a6a556c9886d 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -134,72 +134,70 @@ Dtype Conversions ^^^^^^^^^^^^^^^^^ - Previously assignments, ``.where()`` and ``.fillna()`` with a ``bool`` assignment, would coerce to - same type (e.g. int / float), or raise for datetimelikes. These will now preseve the bools with ``object`` dtypes. (:issue:`16821`). + same the type (e.g. int / float), or raise for datetimelikes. These will now preseve the bools with ``object`` dtypes. (:issue:`16821`). - .. ipython:: python + .. ipython:: python - s = Series([1, 2, 3]) + s = Series([1, 2, 3]) - .. code-block:: python + .. code-block:: python - In [5]: s[1] = True + In [5]: s[1] = True - In [6]: s - Out[6]: - 0 1 - 1 1 - 2 3 - dtype: int64 + In [6]: s + Out[6]: + 0 1 + 1 1 + 2 3 + dtype: int64 - New Behavior + New Behavior - .. ipython:: python + .. ipython:: python - s[1] = True - s + s[1] = True + s -- Previously as assignment to a datetimelike with a non-datetimelike would coerce the +- Previously, as assignment to a datetimelike with a non-datetimelike would coerce the non-datetime-like item being assigned (:issue:`14145`). - .. ipython:: python + .. ipython:: python - s = pd.Series([pd.Timestamp('2011-01-01'), pd.Timestamp('2012-01-01')]) + s = pd.Series([pd.Timestamp('2011-01-01'), pd.Timestamp('2012-01-01')]) - .. code-block:: python + .. code-block:: python - In [1]: s[1] = 1 + In [1]: s[1] = 1 - In [2]: s - Out[2]: - 0 2011-01-01 00:00:00.000000000 - 1 1970-01-01 00:00:00.000000001 - dtype: datetime64[ns] + In [2]: s + Out[2]: + 0 2011-01-01 00:00:00.000000000 + 1 1970-01-01 00:00:00.000000001 + dtype: datetime64[ns] - These now coerce to ``object`` dtype. + These now coerce to ``object`` dtype. - .. ipython:: python + .. ipython:: python - s[1] = 1 - s + s[1] = 1 + s -- Additional bug fixes w.r.t. dtype conversions. - - - Inconsistent behavior in ``.where()`` with datetimelikes which would raise rather than coerce to ``object`` (:issue:`16402`) - - Bug in assignment against ``int64`` data with ``np.ndarray`` with ``float64`` dtype may keep ``int64`` dtype (:issue:`14001`) +- Inconsistent behavior in ``.where()`` with datetimelikes which would raise rather than coerce to ``object`` (:issue:`16402`) +- Bug in assignment against ``int64`` data with ``np.ndarray`` with ``float64`` dtype may keep ``int64`` dtype (:issue:`14001`) .. _whatsnew_0210.api.na_changes: NA naming Changes ^^^^^^^^^^^^^^^^^ -In orde to promote more consistency among the pandas API, we have added additional top-level +In order to promote more consistency among the pandas API, we have added additional top-level functions :func:`isna` and :func:`notna` that are aliases for :func:`isnull` and :func:`notnull`. The naming scheme is now more consistent with methods like ``.dropna()`` and ``.fillna()``. Furthermore in all cases where ``.isnull()`` and ``.notnull()`` methods are defined, these have additional methods named ``.isna()`` and ``.notna()``, these are included for classes ``Categorical``, ``Index``, ``Series``, and ``DataFrame``. (:issue:`15001`). -The configuration option ``mode.use_inf_as_null``is deprecated, and ``mode.use_inf_as_na`` is added as a replacement. +The configuration option ``pd.options.mode.use_inf_as_null`` is deprecated, and ``pd.options.mode.use_inf_as_na`` is added as a replacement. .. _whatsnew_0210.api: @@ -281,7 +279,7 @@ I/O - Bug in :func:`read_csv` in which columns were not being thoroughly de-duplicated (:issue:`17060`) - Bug in :func:`read_csv` in which non integer values for the header argument generated an unhelpful / unrelated error message (:issue:`16338`) -- Bug in :func:`read_csv` in which memory management issues in exception handling, under certain conditions, would cause the interpreter to segfault (:issue:`14696, :issue:`16798`). +- Bug in :func:`read_csv` in which memory management issues in exception handling, under certain conditions, would cause the interpreter to segfault (:issue:`14696`, :issue:`16798`). - Bug in :func:`read_csv` when called with ``low_memory=False`` in which a CSV with at least one column > 2GB in size would incorrectly raise a ``MemoryError`` (:issue:`16798`). - Bug in :func:`read_stata` where value labels could not be read when using an iterator (:issue:`16923`) - Bug in :func:`read_html` where import check fails when run in multiple threads (:issue:`16928`) From 5c185e07f0210c148ced62d4dda3275a1ded954d Mon Sep 17 00:00:00 2001 From: gfyoung Date: Wed, 26 Jul 2017 16:43:49 -0700 Subject: [PATCH 833/933] TST: Check more error messages in tests (#17075) --- pandas/tests/frame/test_validate.py | 51 +++++---- pandas/tests/indexing/test_interval.py | 4 +- pandas/tests/io/msgpack/test_except.py | 21 ++-- pandas/tests/io/msgpack/test_limits.py | 29 +++-- pandas/tests/io/msgpack/test_sequnpack.py | 28 +++-- pandas/tests/io/sas/test_sas.py | 12 ++- pandas/tests/scalar/test_interval.py | 122 ++++++++++++---------- pandas/tests/series/test_validate.py | 39 ++++--- 8 files changed, 167 insertions(+), 139 deletions(-) diff --git a/pandas/tests/frame/test_validate.py b/pandas/tests/frame/test_validate.py index d6065e6042908..2de0e866f6e70 100644 --- a/pandas/tests/frame/test_validate.py +++ b/pandas/tests/frame/test_validate.py @@ -1,34 +1,33 @@ from pandas.core.frame import DataFrame import pytest +import pandas.util.testing as tm -class TestDataFrameValidate(object): - """Tests for error handling related to data types of method arguments.""" - df = DataFrame({'a': [1, 2], 'b': [3, 4]}) - - def test_validate_bool_args(self): - # Tests for error handling related to boolean arguments. - invalid_values = [1, "True", [1, 2, 3], 5.0] - - for value in invalid_values: - with pytest.raises(ValueError): - self.df.query('a > b', inplace=value) - - with pytest.raises(ValueError): - self.df.eval('a + b', inplace=value) +@pytest.fixture +def dataframe(): + return DataFrame({'a': [1, 2], 'b': [3, 4]}) - with pytest.raises(ValueError): - self.df.set_index(keys=['a'], inplace=value) - with pytest.raises(ValueError): - self.df.reset_index(inplace=value) - - with pytest.raises(ValueError): - self.df.dropna(inplace=value) - - with pytest.raises(ValueError): - self.df.drop_duplicates(inplace=value) +class TestDataFrameValidate(object): + """Tests for error handling related to data types of method arguments.""" - with pytest.raises(ValueError): - self.df.sort_values(by=['a'], inplace=value) + @pytest.mark.parametrize("func", ["query", "eval", "set_index", + "reset_index", "dropna", + "drop_duplicates", "sort_values"]) + @pytest.mark.parametrize("inplace", [1, "True", [1, 2, 3], 5.0]) + def test_validate_bool_args(self, dataframe, func, inplace): + msg = "For argument \"inplace\" expected type bool" + kwargs = dict(inplace=inplace) + + if func == "query": + kwargs["expr"] = "a > b" + elif func == "eval": + kwargs["expr"] = "a + b" + elif func == "set_index": + kwargs["keys"] = ["a"] + elif func == "sort_values": + kwargs["by"] = ["a"] + + with tm.assert_raises_regex(ValueError, msg): + getattr(dataframe, func)(**kwargs) diff --git a/pandas/tests/indexing/test_interval.py b/pandas/tests/indexing/test_interval.py index 2552fc066cc87..be6e5e1cffb2e 100644 --- a/pandas/tests/indexing/test_interval.py +++ b/pandas/tests/indexing/test_interval.py @@ -109,10 +109,10 @@ def test_with_slices(self): # slice of interval with pytest.raises(NotImplementedError): - result = s.loc[Interval(3, 6):] + s.loc[Interval(3, 6):] with pytest.raises(NotImplementedError): - result = s[Interval(3, 6):] + s[Interval(3, 6):] expected = s.iloc[3:5] result = s[[Interval(3, 6)]] diff --git a/pandas/tests/io/msgpack/test_except.py b/pandas/tests/io/msgpack/test_except.py index 6246e0777daee..5a803c5eba34b 100644 --- a/pandas/tests/io/msgpack/test_except.py +++ b/pandas/tests/io/msgpack/test_except.py @@ -1,9 +1,11 @@ # coding: utf-8 -import pytest - +from datetime import datetime from pandas.io.msgpack import packb, unpackb +import pytest +import pandas.util.testing as tm + class DummyException(Exception): pass @@ -12,12 +14,13 @@ class DummyException(Exception): class TestExceptions(object): def test_raise_on_find_unsupported_value(self): - import datetime - pytest.raises(TypeError, packb, datetime.datetime.now()) + msg = "can\'t serialize datetime" + with tm.assert_raises_regex(TypeError, msg): + packb(datetime.now()) def test_raise_from_object_hook(self): - def hook(obj): - raise DummyException + def hook(_): + raise DummyException() pytest.raises(DummyException, unpackb, packb({}), object_hook=hook) pytest.raises(DummyException, unpackb, packb({'fizz': 'buzz'}), @@ -30,5 +33,7 @@ def hook(obj): packb({'fizz': {'buzz': 'spam'}}), object_pairs_hook=hook) - def test_invalidvalue(self): - pytest.raises(ValueError, unpackb, b'\xd9\x97#DL_') + def test_invalid_value(self): + msg = "Unpack failed: error" + with tm.assert_raises_regex(ValueError, msg): + unpackb(b"\xd9\x97#DL_") diff --git a/pandas/tests/io/msgpack/test_limits.py b/pandas/tests/io/msgpack/test_limits.py index 07044dbb7e5de..e4abd4ddb8d13 100644 --- a/pandas/tests/io/msgpack/test_limits.py +++ b/pandas/tests/io/msgpack/test_limits.py @@ -1,10 +1,10 @@ # coding: utf-8 from __future__ import (absolute_import, division, print_function, unicode_literals) +from pandas.io.msgpack import packb, unpackb, Packer, Unpacker, ExtType import pytest - -from pandas.io.msgpack import packb, unpackb, Packer, Unpacker, ExtType +import pandas.util.testing as tm class TestLimits(object): @@ -39,7 +39,10 @@ def test_max_str_len(self): unpacker = Unpacker(max_str_len=2, encoding='utf-8') unpacker.feed(packed) - pytest.raises(ValueError, unpacker.unpack) + + msg = "3 exceeds max_str_len" + with tm.assert_raises_regex(ValueError, msg): + unpacker.unpack() def test_max_bin_len(self): d = b'x' * 3 @@ -51,7 +54,10 @@ def test_max_bin_len(self): unpacker = Unpacker(max_bin_len=2) unpacker.feed(packed) - pytest.raises(ValueError, unpacker.unpack) + + msg = "3 exceeds max_bin_len" + with tm.assert_raises_regex(ValueError, msg): + unpacker.unpack() def test_max_array_len(self): d = [1, 2, 3] @@ -63,7 +69,10 @@ def test_max_array_len(self): unpacker = Unpacker(max_array_len=2) unpacker.feed(packed) - pytest.raises(ValueError, unpacker.unpack) + + msg = "3 exceeds max_array_len" + with tm.assert_raises_regex(ValueError, msg): + unpacker.unpack() def test_max_map_len(self): d = {1: 2, 3: 4, 5: 6} @@ -75,7 +84,10 @@ def test_max_map_len(self): unpacker = Unpacker(max_map_len=2) unpacker.feed(packed) - pytest.raises(ValueError, unpacker.unpack) + + msg = "3 exceeds max_map_len" + with tm.assert_raises_regex(ValueError, msg): + unpacker.unpack() def test_max_ext_len(self): d = ExtType(42, b"abc") @@ -87,4 +99,7 @@ def test_max_ext_len(self): unpacker = Unpacker(max_ext_len=2) unpacker.feed(packed) - pytest.raises(ValueError, unpacker.unpack) + + msg = "4 exceeds max_ext_len" + with tm.assert_raises_regex(ValueError, msg): + unpacker.unpack() diff --git a/pandas/tests/io/msgpack/test_sequnpack.py b/pandas/tests/io/msgpack/test_sequnpack.py index 1178176c2c557..dc6fc5ef916b4 100644 --- a/pandas/tests/io/msgpack/test_sequnpack.py +++ b/pandas/tests/io/msgpack/test_sequnpack.py @@ -1,28 +1,26 @@ # coding: utf-8 -import pytest - from pandas import compat from pandas.io.msgpack import Unpacker, BufferFull from pandas.io.msgpack import OutOfData +import pytest +import pandas.util.testing as tm + class TestPack(object): - def test_partialdata(self): + def test_partial_data(self): unpacker = Unpacker() - unpacker.feed(b'\xa5') - pytest.raises(StopIteration, next, iter(unpacker)) - unpacker.feed(b'h') - pytest.raises(StopIteration, next, iter(unpacker)) - unpacker.feed(b'a') - pytest.raises(StopIteration, next, iter(unpacker)) - unpacker.feed(b'l') - pytest.raises(StopIteration, next, iter(unpacker)) - unpacker.feed(b'l') - pytest.raises(StopIteration, next, iter(unpacker)) - unpacker.feed(b'o') - assert next(iter(unpacker)) == b'hallo' + msg = "No more data to unpack" + + for data in [b"\xa5", b"h", b"a", b"l", b"l"]: + unpacker.feed(data) + with tm.assert_raises_regex(StopIteration, msg): + next(iter(unpacker)) + + unpacker.feed(b"o") + assert next(iter(unpacker)) == b"hallo" def test_foobar(self): unpacker = Unpacker(read_size=3, use_list=1) diff --git a/pandas/tests/io/sas/test_sas.py b/pandas/tests/io/sas/test_sas.py index 617df99b99f0b..b85f6b6bbd5ce 100644 --- a/pandas/tests/io/sas/test_sas.py +++ b/pandas/tests/io/sas/test_sas.py @@ -1,14 +1,16 @@ -import pytest - from pandas.compat import StringIO from pandas import read_sas +import pandas.util.testing as tm + class TestSas(object): def test_sas_buffer_format(self): - - # GH14947 + # see gh-14947 b = StringIO("") - with pytest.raises(ValueError): + + msg = ("If this is a buffer object rather than a string " + "name, you must specify a format string") + with tm.assert_raises_regex(ValueError, msg): read_sas(b) diff --git a/pandas/tests/scalar/test_interval.py b/pandas/tests/scalar/test_interval.py index e06f7cb34eb52..d431db0b4ca4f 100644 --- a/pandas/tests/scalar/test_interval.py +++ b/pandas/tests/scalar/test_interval.py @@ -1,42 +1,49 @@ from __future__ import division -import pytest from pandas import Interval + +import pytest import pandas.util.testing as tm +@pytest.fixture +def interval(): + return Interval(0, 1) + + class TestInterval(object): - def setup_method(self, method): - self.interval = Interval(0, 1) - def test_properties(self): - assert self.interval.closed == 'right' - assert self.interval.left == 0 - assert self.interval.right == 1 - assert self.interval.mid == 0.5 + def test_properties(self, interval): + assert interval.closed == 'right' + assert interval.left == 0 + assert interval.right == 1 + assert interval.mid == 0.5 - def test_repr(self): - assert repr(self.interval) == "Interval(0, 1, closed='right')" - assert str(self.interval) == "(0, 1]" + def test_repr(self, interval): + assert repr(interval) == "Interval(0, 1, closed='right')" + assert str(interval) == "(0, 1]" interval_left = Interval(0, 1, closed='left') assert repr(interval_left) == "Interval(0, 1, closed='left')" assert str(interval_left) == "[0, 1)" - def test_contains(self): - assert 0.5 in self.interval - assert 1 in self.interval - assert 0 not in self.interval - pytest.raises(TypeError, lambda: self.interval in self.interval) - - interval = Interval(0, 1, closed='both') - assert 0 in interval + def test_contains(self, interval): + assert 0.5 in interval assert 1 in interval - - interval = Interval(0, 1, closed='neither') assert 0 not in interval - assert 0.5 in interval - assert 1 not in interval + + msg = "__contains__ not defined for two intervals" + with tm.assert_raises_regex(TypeError, msg): + interval in interval + + interval_both = Interval(0, 1, closed='both') + assert 0 in interval_both + assert 1 in interval_both + + interval_neither = Interval(0, 1, closed='neither') + assert 0 not in interval_neither + assert 0.5 in interval_neither + assert 1 not in interval_neither def test_equal(self): assert Interval(0, 1) == Interval(0, 1, closed='right') @@ -54,74 +61,79 @@ def test_comparison(self): assert Interval(0, 1) > Interval(-1, 2) assert Interval(0, 1) >= Interval(0, 1) - def test_hash(self): + def test_hash(self, interval): # should not raise - hash(self.interval) + hash(interval) - def test_math_add(self): + def test_math_add(self, interval): expected = Interval(1, 2) - actual = self.interval + 1 + actual = interval + 1 assert expected == actual expected = Interval(1, 2) - actual = 1 + self.interval + actual = 1 + interval assert expected == actual - actual = self.interval + actual = interval actual += 1 assert expected == actual - with pytest.raises(TypeError): - self.interval + Interval(1, 2) + msg = "unsupported operand type\(s\) for \+" + with tm.assert_raises_regex(TypeError, msg): + interval + Interval(1, 2) - with pytest.raises(TypeError): - self.interval + 'foo' + with tm.assert_raises_regex(TypeError, msg): + interval + 'foo' - def test_math_sub(self): + def test_math_sub(self, interval): expected = Interval(-1, 0) - actual = self.interval - 1 + actual = interval - 1 assert expected == actual - actual = self.interval + actual = interval actual -= 1 assert expected == actual - with pytest.raises(TypeError): - self.interval - Interval(1, 2) + msg = "unsupported operand type\(s\) for -" + with tm.assert_raises_regex(TypeError, msg): + interval - Interval(1, 2) - with pytest.raises(TypeError): - self.interval - 'foo' + with tm.assert_raises_regex(TypeError, msg): + interval - 'foo' - def test_math_mult(self): + def test_math_mult(self, interval): expected = Interval(0, 2) - actual = self.interval * 2 + actual = interval * 2 assert expected == actual expected = Interval(0, 2) - actual = 2 * self.interval + actual = 2 * interval assert expected == actual - actual = self.interval + actual = interval actual *= 2 assert expected == actual - with pytest.raises(TypeError): - self.interval * Interval(1, 2) + msg = "unsupported operand type\(s\) for \*" + with tm.assert_raises_regex(TypeError, msg): + interval * Interval(1, 2) - with pytest.raises(TypeError): - self.interval * 'foo' + msg = "can\'t multiply sequence by non-int" + with tm.assert_raises_regex(TypeError, msg): + interval * 'foo' - def test_math_div(self): + def test_math_div(self, interval): expected = Interval(0, 0.5) - actual = self.interval / 2.0 + actual = interval / 2.0 assert expected == actual - actual = self.interval + actual = interval actual /= 2.0 assert expected == actual - with pytest.raises(TypeError): - self.interval / Interval(1, 2) + msg = "unsupported operand type\(s\) for /" + with tm.assert_raises_regex(TypeError, msg): + interval / Interval(1, 2) - with pytest.raises(TypeError): - self.interval / 'foo' + with tm.assert_raises_regex(TypeError, msg): + interval / 'foo' diff --git a/pandas/tests/series/test_validate.py b/pandas/tests/series/test_validate.py index 134fa0a38f618..a0cde5f81d021 100644 --- a/pandas/tests/series/test_validate.py +++ b/pandas/tests/series/test_validate.py @@ -1,30 +1,27 @@ -import pytest from pandas.core.series import Series +import pytest +import pandas.util.testing as tm -class TestSeriesValidate(object): - """Tests for error handling related to data types of method arguments.""" - s = Series([1, 2, 3, 4, 5]) - - def test_validate_bool_args(self): - # Tests for error handling related to boolean arguments. - invalid_values = [1, "True", [1, 2, 3], 5.0] - for value in invalid_values: - with pytest.raises(ValueError): - self.s.reset_index(inplace=value) +@pytest.fixture +def series(): + return Series([1, 2, 3, 4, 5]) - with pytest.raises(ValueError): - self.s._set_name(name='hello', inplace=value) - with pytest.raises(ValueError): - self.s.sort_values(inplace=value) +class TestSeriesValidate(object): + """Tests for error handling related to data types of method arguments.""" - with pytest.raises(ValueError): - self.s.sort_index(inplace=value) + @pytest.mark.parametrize("func", ["reset_index", "_set_name", + "sort_values", "sort_index", + "rename", "dropna"]) + @pytest.mark.parametrize("inplace", [1, "True", [1, 2, 3], 5.0]) + def test_validate_bool_args(self, series, func, inplace): + msg = "For argument \"inplace\" expected type bool" + kwargs = dict(inplace=inplace) - with pytest.raises(ValueError): - self.s.rename(inplace=value) + if func == "_set_name": + kwargs["name"] = "hello" - with pytest.raises(ValueError): - self.s.dropna(inplace=value) + with tm.assert_raises_regex(ValueError, msg): + getattr(series, func)(**kwargs) From e3b784068a654d13ede6dd4062c8a2b6c9b945c5 Mon Sep 17 00:00:00 2001 From: Pietro Battiston Date: Wed, 26 Jul 2017 19:49:19 -0400 Subject: [PATCH 834/933] BUG: Respect dtype when calling pivot_table with margins=True closes #17013 This fix actually exposed an occurrence of #17035 in an existing test (as well as in one I added). Author: Pietro Battiston Closes #17062 from toobaz/pivot_margin_int and squashes the following commits: 2737600 [Pietro Battiston] Removed now obsolete workaround 956c4f9 [Pietro Battiston] BUG: respect dtype when calling pivot_table with margins=True --- doc/source/whatsnew/v0.21.0.txt | 1 + pandas/core/reshape/pivot.py | 14 ++++---- pandas/tests/reshape/test_pivot.py | 58 ++++++++++++++++++++++++++---- 3 files changed, 59 insertions(+), 14 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 5a6a556c9886d..0025f8d098d81 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -311,6 +311,7 @@ Reshaping - Bug in merging with categorical dtypes with datetimelikes incorrectly raised a ``TypeError`` (:issue:`16900`) - Bug when using :func:`isin` on a large object series and large comparison array (:issue:`16012`) - Fixes regression from 0.20, :func:`Series.aggregate` and :func:`DataFrame.aggregate` allow dictionaries as return values again (:issue:`16741`) +- Fixes dtype of result with integer dtype input, from :func:`pivot_table` when called with ``margins=True`` (:issue:`17013`) Numeric ^^^^^^^ diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index c2fb81178433e..fe525eb0a3c87 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -178,7 +178,7 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean', data = data[data.notna().all(axis=1)] table = _add_margins(table, data, values, rows=index, cols=columns, aggfunc=aggfunc, - margins_name=margins_name) + margins_name=margins_name, fill_value=fill_value) # discard the top level if values_passed and not values_multi and not table.empty and \ @@ -199,7 +199,7 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean', def _add_margins(table, data, values, rows, cols, aggfunc, - margins_name='All'): + margins_name='All', fill_value=None): if not isinstance(margins_name, compat.string_types): raise ValueError('margins_name argument must be a string') @@ -240,8 +240,7 @@ def _add_margins(table, data, values, rows, cols, aggfunc, if not isinstance(marginal_result_set, tuple): return marginal_result_set result, margin_keys, row_margin = marginal_result_set - - row_margin = row_margin.reindex(result.columns) + row_margin = row_margin.reindex(result.columns, fill_value=fill_value) # populate grand margin for k in margin_keys: if isinstance(k, compat.string_types): @@ -253,6 +252,9 @@ def _add_margins(table, data, values, rows, cols, aggfunc, row_names = result.index.names try: + for dtype in set(result.dtypes): + cols = result.select_dtypes([dtype]).columns + margin_dummy[cols] = margin_dummy[cols].astype(dtype) result = result.append(margin_dummy) except TypeError: @@ -524,10 +526,6 @@ def crosstab(index, columns, values=None, rownames=None, colnames=None, margins=margins, margins_name=margins_name, dropna=dropna, **kwargs) - # GH 17013: - if values is None and margins: - table = table.fillna(0).astype(np.int64) - # Post-process if normalize is not False: table = _normalize(table, normalize=normalize, margins=margins, diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 5e5852ac5381d..ee6c32cd0a208 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -459,6 +459,41 @@ def _check_output(result, values_col, index=['A', 'B'], tm.assert_frame_equal(result['SALARY'], expected['SALARY']) + def test_margins_dtype(self): + # GH 17013 + + df = self.data.copy() + df[['D', 'E', 'F']] = np.arange(len(df) * 3).reshape(len(df), 3) + + mi_val = list(product(['bar', 'foo'], ['one', 'two'])) + [('All', '')] + mi = MultiIndex.from_tuples(mi_val, names=('A', 'B')) + expected = DataFrame({'dull': [12, 21, 3, 9, 45], + 'shiny': [33, 0, 36, 51, 120]}, + index=mi).rename_axis('C', axis=1) + expected['All'] = expected['dull'] + expected['shiny'] + + result = df.pivot_table(values='D', index=['A', 'B'], + columns='C', margins=True, + aggfunc=np.sum, fill_value=0) + + tm.assert_frame_equal(expected, result) + + @pytest.mark.xfail(reason='GH 17035 (len of floats is casted back to ' + 'floats)') + def test_margins_dtype_len(self): + mi_val = list(product(['bar', 'foo'], ['one', 'two'])) + [('All', '')] + mi = MultiIndex.from_tuples(mi_val, names=('A', 'B')) + expected = DataFrame({'dull': [1, 1, 2, 1, 5], + 'shiny': [2, 0, 2, 2, 6]}, + index=mi).rename_axis('C', axis=1) + expected['All'] = expected['dull'] + expected['shiny'] + + result = self.data.pivot_table(values='D', index=['A', 'B'], + columns='C', margins=True, + aggfunc=len, fill_value=0) + + tm.assert_frame_equal(expected, result) + def test_pivot_integer_columns(self): # caused by upstream bug in unstack @@ -894,6 +929,8 @@ def test_pivot_table_margins_name_with_aggfunc_list(self): expected = pd.DataFrame(table.values, index=ix, columns=cols) tm.assert_frame_equal(table, expected) + @pytest.mark.xfail(reason='GH 17035 (np.mean of ints is casted back to ' + 'ints)') def test_categorical_margins(self): # GH 10989 df = pd.DataFrame({'x': np.arange(8), @@ -904,14 +941,23 @@ def test_categorical_margins(self): expected.index = Index([0, 1, 'All'], name='y') expected.columns = Index([0, 1, 'All'], name='z') - data = df.copy() - table = data.pivot_table('x', 'y', 'z', margins=True) + table = df.pivot_table('x', 'y', 'z', margins=True) tm.assert_frame_equal(table, expected) - data = df.copy() - data.y = data.y.astype('category') - data.z = data.z.astype('category') - table = data.pivot_table('x', 'y', 'z', margins=True) + @pytest.mark.xfail(reason='GH 17035 (np.mean of ints is casted back to ' + 'ints)') + def test_categorical_margins_category(self): + df = pd.DataFrame({'x': np.arange(8), + 'y': np.arange(8) // 4, + 'z': np.arange(8) % 2}) + + expected = pd.DataFrame([[1.0, 2.0, 1.5], [5, 6, 5.5], [3, 4, 3.5]]) + expected.index = Index([0, 1, 'All'], name='y') + expected.columns = Index([0, 1, 'All'], name='z') + + df.y = df.y.astype('category') + df.z = df.z.astype('category') + table = df.pivot_table('x', 'y', 'z', margins=True) tm.assert_frame_equal(table, expected) def test_categorical_aggfunc(self): From 3ab56bde78e85939a8c5ba73f86ddb888483d395 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Wed, 26 Jul 2017 22:30:56 -0700 Subject: [PATCH 835/933] MAINT: Add missing space in parsers.pyx "2< heuristic" --> "2 < heuristic" --- pandas/_libs/parsers.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index 543a943aea311..29ff9c401df48 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -563,7 +563,7 @@ cdef class TextReader: # compute buffer_lines as function of table width heuristic = 2**20 // self.table_width self.buffer_lines = 1 - while self.buffer_lines * 2< heuristic: + while self.buffer_lines * 2 < heuristic: self.buffer_lines *= 2 def __init__(self, *args, **kwargs): From c6e5bf6bfdaad8945120bd54600b3eac79de26b2 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Wed, 26 Jul 2017 22:48:56 -0700 Subject: [PATCH 836/933] MAINT: Add missing paren around print statement Stray verbose print statement in parsers.pyx was bare without any parentheses. --- pandas/_libs/parsers.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index 29ff9c401df48..077c355e785a3 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -1013,7 +1013,7 @@ cdef class TextReader: cdef _end_clock(self, what): if self.verbose: elapsed = time.time() - self.clocks.pop(-1) - print '%s took: %.2f ms' % (what, elapsed * 1000) + print('%s took: %.2f ms' % (what, elapsed * 1000)) def set_noconvert(self, i): self.noconvert.add(i) From 465c59f964c8d71d8bedd16fcaa00e4328177cb1 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Thu, 27 Jul 2017 05:57:20 -0400 Subject: [PATCH 837/933] DOC: fix typos in missing.rst xref #16972 --- doc/source/missing_data.rst | 12 ++++++------ pandas/core/frame.py | 2 +- pandas/core/generic.py | 4 ++-- pandas/core/indexes/base.py | 4 ++-- pandas/core/series.py | 2 +- 5 files changed, 12 insertions(+), 12 deletions(-) diff --git a/doc/source/missing_data.rst b/doc/source/missing_data.rst index e40b7d460fef8..5c10df25051a2 100644 --- a/doc/source/missing_data.rst +++ b/doc/source/missing_data.rst @@ -36,7 +36,7 @@ When / why does data become missing? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Some might quibble over our usage of *missing*. By "missing" we simply mean -**na** or "not present for whatever reason". Many data sets simply arrive with +**NA** or "not present for whatever reason". Many data sets simply arrive with missing data, either because it exists and was not collected or it never existed. For example, in a collection of financial time series, some of the time series might start on different dates. Thus, values prior to the start date @@ -63,12 +63,12 @@ to handling missing data. While ``NaN`` is the default missing value marker for reasons of computational speed and convenience, we need to be able to easily detect this value with data of different types: floating point, integer, boolean, and general object. In many cases, however, the Python ``None`` will -arise and we wish to also consider that "missing" or "na". +arise and we wish to also consider that "missing" or "not available" or "NA". .. note:: Prior to version v0.10.0 ``inf`` and ``-inf`` were also - considered to be "na" in computations. This is no longer the case by + considered to be "NA" in computations. This is no longer the case by default; use the ``mode.use_inf_as_na`` option to recover it. .. _missing.isna: @@ -206,7 +206,7 @@ with missing data. Filling missing values: fillna ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The **fillna** function can "fill in" NA values with non-na data in a couple +The **fillna** function can "fill in" NA values with non-NA data in a couple of ways, which we illustrate: **Replace NA with a scalar value** @@ -220,7 +220,7 @@ of ways, which we illustrate: **Fill gaps forward or backward** Using the same filling arguments as :ref:`reindexing `, we -can propagate non-na values forward or backward: +can propagate non-NA values forward or backward: .. ipython:: python @@ -540,7 +540,7 @@ String/Regular Expression Replacement `__ if this is unclear. -Replace the '.' with ``nan`` (str -> str) +Replace the '.' with ``NaN`` (str -> str) .. ipython:: python diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 6c72fa648559a..e546e96f253c7 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3213,7 +3213,7 @@ def isna(self): def isnull(self): return super(DataFrame, self).isnull() - @Appender(_shared_docs['isna'] % _shared_doc_kwargs) + @Appender(_shared_docs['notna'] % _shared_doc_kwargs) def notna(self): return super(DataFrame, self).notna() diff --git a/pandas/core/generic.py b/pandas/core/generic.py index abccd76b2fbcb..fbd26655798bd 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -4534,7 +4534,7 @@ def asof(self, where, subset=None): # Action Methods _shared_docs['isna'] = """ - Return a boolean same-sized object indicating if the values are na. + Return a boolean same-sized object indicating if the values are NA. See Also -------- @@ -4553,7 +4553,7 @@ def isnull(self): _shared_docs['notna'] = """ Return a boolean same-sized object indicating if the values are - not na. + not NA. See Also -------- diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index fd9abcfb726bf..411428e001c81 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1852,7 +1852,7 @@ def isna(self): Returns ------- - a boolean array of whether my values are na + a boolean array of whether my values are NA See also -------- @@ -1870,7 +1870,7 @@ def notna(self): Returns ------- - a boolean array of whether my values are not na + a boolean array of whether my values are not NA See also -------- diff --git a/pandas/core/series.py b/pandas/core/series.py index fb5819b2748a0..60d268c89a9d7 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2792,7 +2792,7 @@ def isna(self): def isnull(self): return super(Series, self).isnull() - @Appender(generic._shared_docs['isna'] % _shared_doc_kwargs) + @Appender(generic._shared_docs['notna'] % _shared_doc_kwargs) def notna(self): return super(Series, self).notna() From b03f7e52e859c5d20141a47aa4d6880a321af84d Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Sat, 29 Jul 2017 23:58:03 +0200 Subject: [PATCH 838/933] DOC: further clean-up null/na changes (#17113) --- doc/source/basics.rst | 4 +-- doc/source/io.rst | 5 ---- doc/source/missing_data.rst | 2 +- doc/source/whatsnew/v0.10.0.txt | 46 +++++++++++++++++++++++++++------ doc/source/whatsnew/v0.4.x.txt | 2 +- pandas/core/config_init.py | 4 +-- 6 files changed, 44 insertions(+), 19 deletions(-) diff --git a/doc/source/basics.rst b/doc/source/basics.rst index c8138d795b836..fe20a7eb2b786 100644 --- a/doc/source/basics.rst +++ b/doc/source/basics.rst @@ -511,7 +511,7 @@ optional ``level`` parameter which applies only if the object has a :header: "Function", "Description" :widths: 20, 80 - ``count``, Number of non-na observations + ``count``, Number of non-NA observations ``sum``, Sum of values ``mean``, Mean of values ``mad``, Mean absolute deviation @@ -541,7 +541,7 @@ will exclude NAs on Series input by default: np.mean(df['one'].values) ``Series`` also has a method :meth:`~Series.nunique` which will return the -number of unique non-na values: +number of unique non-NA values: .. ipython:: python diff --git a/doc/source/io.rst b/doc/source/io.rst index 149c86aead135..bf68a0cae1d27 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -137,7 +137,6 @@ usecols : array-like or callable, default ``None`` Using this parameter results in much faster parsing time and lower memory usage. as_recarray : boolean, default ``False`` - .. deprecated:: 0.18.2 Please call ``pd.read_csv(...).to_records()`` instead. @@ -193,7 +192,6 @@ skiprows : list-like or integer, default ``None`` skipfooter : int, default ``0`` Number of lines at bottom of file to skip (unsupported with engine='c'). skip_footer : int, default ``0`` - .. deprecated:: 0.19.0 Use the ``skipfooter`` parameter instead, as they are identical @@ -208,13 +206,11 @@ low_memory : boolean, default ``True`` use the ``chunksize`` or ``iterator`` parameter to return the data in chunks. (Only valid with C parser) buffer_lines : int, default None - .. deprecated:: 0.19.0 Argument removed because its value is not respected by the parser compact_ints : boolean, default False - .. deprecated:: 0.19.0 Argument moved to ``pd.to_numeric`` @@ -223,7 +219,6 @@ compact_ints : boolean, default False parser will attempt to cast it as the smallest integer ``dtype`` possible, either signed or unsigned depending on the specification from the ``use_unsigned`` parameter. use_unsigned : boolean, default False - .. deprecated:: 0.18.2 Argument moved to ``pd.to_numeric`` diff --git a/doc/source/missing_data.rst b/doc/source/missing_data.rst index 5c10df25051a2..d54288baa389b 100644 --- a/doc/source/missing_data.rst +++ b/doc/source/missing_data.rst @@ -36,7 +36,7 @@ When / why does data become missing? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Some might quibble over our usage of *missing*. By "missing" we simply mean -**NA** or "not present for whatever reason". Many data sets simply arrive with +**NA** ("not available") or "not present for whatever reason". Many data sets simply arrive with missing data, either because it exists and was not collected or it never existed. For example, in a collection of financial time series, some of the time series might start on different dates. Thus, values prior to the start date diff --git a/doc/source/whatsnew/v0.10.0.txt b/doc/source/whatsnew/v0.10.0.txt index cf5369466308c..f0db1d82252c1 100644 --- a/doc/source/whatsnew/v0.10.0.txt +++ b/doc/source/whatsnew/v0.10.0.txt @@ -128,15 +128,45 @@ labeled the aggregated group with the end of the interval: the next day). ``notnull``. That they ever were was a relic of early pandas. This behavior can be re-enabled globally by the ``mode.use_inf_as_null`` option: -.. ipython:: python +.. code-block:: ipython - s = pd.Series([1.5, np.inf, 3.4, -np.inf]) - pd.isnull(s) - s.fillna(0) - pd.set_option('use_inf_as_null', True) - pd.isnull(s) - s.fillna(0) - pd.reset_option('use_inf_as_null') + In [6]: s = pd.Series([1.5, np.inf, 3.4, -np.inf]) + + In [7]: pd.isnull(s) + Out[7]: + 0 False + 1 False + 2 False + 3 False + Length: 4, dtype: bool + + In [8]: s.fillna(0) + Out[8]: + 0 1.500000 + 1 inf + 2 3.400000 + 3 -inf + Length: 4, dtype: float64 + + In [9]: pd.set_option('use_inf_as_null', True) + + In [10]: pd.isnull(s) + Out[10]: + 0 False + 1 True + 2 False + 3 True + Length: 4, dtype: bool + + In [11]: s.fillna(0) + Out[11]: + 0 1.5 + 1 0.0 + 2 3.4 + 3 0.0 + Length: 4, dtype: float64 + + In [12]: pd.reset_option('use_inf_as_null') - Methods with the ``inplace`` option now all return ``None`` instead of the calling object. E.g. code written like ``df = df.fillna(0, inplace=True)`` diff --git a/doc/source/whatsnew/v0.4.x.txt b/doc/source/whatsnew/v0.4.x.txt index 237ea84425051..ed9352059a6dc 100644 --- a/doc/source/whatsnew/v0.4.x.txt +++ b/doc/source/whatsnew/v0.4.x.txt @@ -9,7 +9,7 @@ New Features - Added Python 3 support using 2to3 (:issue:`200`) - :ref:`Added ` ``name`` attribute to ``Series``, now prints as part of ``Series.__repr__`` -- :ref:`Added ` instance methods ``isnull`` and ``notnull`` to +- :ref:`Added ` instance methods ``isnull`` and ``notnull`` to Series (:issue:`209`, :issue:`203`) - :ref:`Added ` ``Series.align`` method for aligning two series with choice of join method (ENH56_) diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py index 76e30a6fb9d52..3e753aacf7c71 100644 --- a/pandas/core/config_init.py +++ b/pandas/core/config_init.py @@ -398,8 +398,8 @@ def table_schema_cb(key): use_inf_as_na_doc = """ : boolean - True means treat None, NaN, INF, -INF as na (old way), - False means None and NaN are null, but INF, -INF are not na + True means treat None, NaN, INF, -INF as NA (old way), + False means None and NaN are null, but INF, -INF are not NA (new way). """ From 6b8e43688f3363ab19c8a3049e0ade70c13bd4b2 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Sun, 30 Jul 2017 02:43:25 -0700 Subject: [PATCH 839/933] BUG: Allow pd.unique to accept tuple of strings (#17108) --- doc/source/whatsnew/v0.21.0.txt | 1 + pandas/core/algorithms.py | 2 ++ pandas/tests/test_algos.py | 9 +++++++++ 3 files changed, 12 insertions(+) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 0025f8d098d81..2d55144848e42 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -327,3 +327,4 @@ Other ^^^^^ - Bug in :func:`eval` where the ``inplace`` parameter was being incorrectly handled (:issue:`16732`) - Bug in ``.isin()`` in which checking membership in empty ``Series`` objects raised an error (:issue:`16991`) +- Bug in :func:`unique` where checking a tuple of strings raised a ``TypeError`` (:issue:`17108`) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 4ca658b35a276..f2359f3ff1a9d 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -170,6 +170,8 @@ def _ensure_arraylike(values): ABCIndexClass, ABCSeries)): inferred = lib.infer_dtype(values) if inferred in ['mixed', 'string', 'unicode']: + if isinstance(values, tuple): + values = list(values) values = lib.list_to_object_array(values) else: values = np.asarray(values) diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index 0e86ec123efea..b26089ea7a822 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -415,6 +415,15 @@ def test_order_of_appearance(self): expected = pd.Categorical(list('abc')) tm.assert_categorical_equal(result, expected) + @pytest.mark.parametrize("arg ,expected", [ + (('1', '1', '2'), np.array(['1', '2'], dtype=object)), + (('foo',), np.array(['foo'], dtype=object)) + ]) + def test_tuple_with_strings(self, arg, expected): + # see GH 17108 + result = pd.unique(arg) + tm.assert_numpy_array_equal(result, expected) + class TestIsin(object): From f2b0bdc9bc4e57e101e306db7555eb7db28172e9 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Sun, 30 Jul 2017 15:54:02 -0700 Subject: [PATCH 840/933] BUG: Allow Series with same name with crosstab (#16028) Closes gh-13279 --- doc/source/whatsnew/v0.21.0.txt | 1 + pandas/core/reshape/pivot.py | 13 +++++++++++-- pandas/tests/reshape/test_pivot.py | 10 ++++++++++ 3 files changed, 22 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 2d55144848e42..1931ffff4b217 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -312,6 +312,7 @@ Reshaping - Bug when using :func:`isin` on a large object series and large comparison array (:issue:`16012`) - Fixes regression from 0.20, :func:`Series.aggregate` and :func:`DataFrame.aggregate` allow dictionaries as return values again (:issue:`16741`) - Fixes dtype of result with integer dtype input, from :func:`pivot_table` when called with ``margins=True`` (:issue:`17013`) +- Bug in ``pd.crosstab()`` where passing two ``Series`` with the same name raised a ``KeyError`` (:issue:`13279`) Numeric ^^^^^^^ diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index fe525eb0a3c87..d4ea49c130add 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -148,8 +148,17 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean', table = agged if table.index.nlevels > 1: - to_unstack = [agged.index.names[i] or i - for i in range(len(index), len(keys))] + # Related GH #17123 + # If index_names are integers, determine whether the integers refer + # to the level position or name. + index_names = agged.index.names[:len(index)] + to_unstack = [] + for i in range(len(index), len(keys)): + name = agged.index.names[i] + if name is None or name in index_names: + to_unstack.append(i) + else: + to_unstack.append(name) table = agged.unstack(to_unstack) if not dropna: diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index ee6c32cd0a208..879ac96680fbb 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -1513,6 +1513,16 @@ def test_crosstab_with_numpy_size(self): columns=expected_column) tm.assert_frame_equal(result, expected) + def test_crosstab_dup_index_names(self): + # GH 13279 + s = pd.Series(range(3), name='foo') + result = pd.crosstab(s, s) + expected_index = pd.Index(range(3), name='foo') + expected = pd.DataFrame(np.eye(3, dtype=np.int64), + index=expected_index, + columns=expected_index) + tm.assert_frame_equal(result, expected) + class TestPivotAnnual(object): """ From 7358f096ef76207b05bcce0bd02f3a45246e8b09 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 1 Aug 2017 14:19:16 -0400 Subject: [PATCH 841/933] COMPAT: make sure use_inf_as_null is deprecated (#17126) closes #17115 --- pandas/core/config_init.py | 7 +++++-- pandas/tests/series/test_missing.py | 16 ++++++++++++++++ 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py index 3e753aacf7c71..875ab8249f953 100644 --- a/pandas/core/config_init.py +++ b/pandas/core/config_init.py @@ -412,8 +412,11 @@ def use_inf_as_na_cb(key): _use_inf_as_na(key) -cf.register_option('mode.use_inf_as_na', False, use_inf_as_na_doc, - cb=use_inf_as_na_cb) +with cf.config_prefix('mode'): + cf.register_option('use_inf_as_na', False, use_inf_as_na_doc, + cb=use_inf_as_na_cb) + cf.register_option('use_inf_as_null', False, use_inf_as_null_doc, + cb=use_inf_as_na_cb) cf.deprecate_option('mode.use_inf_as_null', msg=use_inf_as_null_doc, rkey='mode.use_inf_as_na') diff --git a/pandas/tests/series/test_missing.py b/pandas/tests/series/test_missing.py index 2d20ac9685914..01bf7274fd384 100644 --- a/pandas/tests/series/test_missing.py +++ b/pandas/tests/series/test_missing.py @@ -410,6 +410,22 @@ def test_isna_for_inf(self): tm.assert_series_equal(r, e) tm.assert_series_equal(dr, de) + @tm.capture_stdout + def test_isnull_for_inf_deprecated(self): + # gh-17115 + s = Series(['a', np.inf, np.nan, 1.0]) + with tm.assert_produces_warning(DeprecationWarning, + check_stacklevel=False): + pd.set_option('mode.use_inf_as_null', True) + r = s.isna() + dr = s.dropna() + pd.reset_option('mode.use_inf_as_null') + + e = Series([False, True, True, False]) + de = Series(['a', 1.0], index=[0, 3]) + tm.assert_series_equal(r, e) + tm.assert_series_equal(dr, de) + def test_fillna(self): ts = Series([0., 1., 2., 3., 4.], index=tm.makeDateIndex(5)) From ab49d1fcda17cdb5571959a0d85d5ee872638b4c Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 1 Aug 2017 16:09:22 -0400 Subject: [PATCH 842/933] CI: bump version of xlsxwriter to 0.5.2 (#17142) --- ci/install_travis.sh | 3 --- ci/requirements-2.7.pip | 2 ++ ci/requirements-2.7.run | 6 ++---- ci/requirements-2.7_COMPAT.pip | 2 ++ ci/requirements-2.7_COMPAT.run | 7 ++----- ci/requirements-2.7_LOCALE.pip | 2 ++ ci/requirements-2.7_LOCALE.run | 5 +---- ci/requirements-2.7_SLOW.run | 1 - ci/requirements-3.6.run | 2 +- ci/requirements-3.6_LOCALE.run | 2 +- ci/requirements-3.6_LOCALE_SLOW.run | 2 +- doc/source/install.rst | 3 ++- doc/source/whatsnew/v0.21.0.txt | 1 + pandas/core/nanops.py | 19 +++++++++++++++++-- 14 files changed, 34 insertions(+), 23 deletions(-) diff --git a/ci/install_travis.sh b/ci/install_travis.sh index 8cf6f2ce636da..dcc1656ce3dd7 100755 --- a/ci/install_travis.sh +++ b/ci/install_travis.sh @@ -52,9 +52,6 @@ conda update -q conda echo echo "[add channels]" -# add the pandas channel to take priority -# to add extra packages -conda config --add channels pandas || exit 1 conda config --remove channels defaults || exit 1 conda config --add channels defaults || exit 1 diff --git a/ci/requirements-2.7.pip b/ci/requirements-2.7.pip index eb796368e7820..876d9e978fa84 100644 --- a/ci/requirements-2.7.pip +++ b/ci/requirements-2.7.pip @@ -1,5 +1,7 @@ blosc pandas-gbq +html5lib +beautifulsoup4 pathlib backports.lzma py diff --git a/ci/requirements-2.7.run b/ci/requirements-2.7.run index 62e31e4ae24e3..7152cb2c8b605 100644 --- a/ci/requirements-2.7.run +++ b/ci/requirements-2.7.run @@ -10,13 +10,11 @@ xlrd=0.9.2 sqlalchemy=0.9.6 lxml=3.2.1 scipy -xlsxwriter=0.4.6 +xlsxwriter=0.5.2 s3fs bottleneck -psycopg2=2.5.2 +psycopg2 patsy pymysql=0.6.3 -html5lib=1.0b2 -beautiful-soup=4.2.1 jinja2=2.8 xarray=0.8.0 diff --git a/ci/requirements-2.7_COMPAT.pip b/ci/requirements-2.7_COMPAT.pip index 9533a630d06a4..13cd35a923124 100644 --- a/ci/requirements-2.7_COMPAT.pip +++ b/ci/requirements-2.7_COMPAT.pip @@ -1,2 +1,4 @@ +html5lib==1.0b2 +beautifulsoup4==4.2.0 openpyxl argparse diff --git a/ci/requirements-2.7_COMPAT.run b/ci/requirements-2.7_COMPAT.run index d27b6a72c2d15..b94f4ab7b27d1 100644 --- a/ci/requirements-2.7_COMPAT.run +++ b/ci/requirements-2.7_COMPAT.run @@ -4,13 +4,10 @@ pytz=2013b scipy=0.11.0 xlwt=0.7.5 xlrd=0.9.2 -bottleneck=0.8.0 numexpr=2.2.2 pytables=3.0.0 -html5lib=1.0b2 -beautiful-soup=4.2.0 -psycopg2=2.5.1 +psycopg2 pymysql=0.6.0 sqlalchemy=0.7.8 -xlsxwriter=0.4.6 +xlsxwriter=0.5.2 jinja2=2.8 diff --git a/ci/requirements-2.7_LOCALE.pip b/ci/requirements-2.7_LOCALE.pip index cf8e6b8b3d3a6..1b825bbf492ca 100644 --- a/ci/requirements-2.7_LOCALE.pip +++ b/ci/requirements-2.7_LOCALE.pip @@ -1 +1,3 @@ +html5lib==1.0b2 +beautifulsoup4==4.2.1 blosc diff --git a/ci/requirements-2.7_LOCALE.run b/ci/requirements-2.7_LOCALE.run index 5d7cc31b7d55e..8e360cf74b081 100644 --- a/ci/requirements-2.7_LOCALE.run +++ b/ci/requirements-2.7_LOCALE.run @@ -3,12 +3,9 @@ pytz=2013b numpy=1.8.2 xlwt=0.7.5 openpyxl=1.6.2 -xlsxwriter=0.4.6 +xlsxwriter=0.5.2 xlrd=0.9.2 -bottleneck=0.8.0 matplotlib=1.3.1 sqlalchemy=0.8.1 -html5lib=1.0b2 lxml=3.2.1 scipy -beautiful-soup=4.2.1 diff --git a/ci/requirements-2.7_SLOW.run b/ci/requirements-2.7_SLOW.run index c2d2a14285ad6..0a549554f5219 100644 --- a/ci/requirements-2.7_SLOW.run +++ b/ci/requirements-2.7_SLOW.run @@ -13,7 +13,6 @@ pytables sqlalchemy lxml s3fs -bottleneck psycopg2 pymysql html5lib diff --git a/ci/requirements-3.6.run b/ci/requirements-3.6.run index ef66ebeb336f3..00db27d3f2704 100644 --- a/ci/requirements-3.6.run +++ b/ci/requirements-3.6.run @@ -16,7 +16,7 @@ sqlalchemy pymysql feather-format pyarrow -# psycopg2 (not avail on defaults ATM) +psycopg2 beautifulsoup4 s3fs xarray diff --git a/ci/requirements-3.6_LOCALE.run b/ci/requirements-3.6_LOCALE.run index ae456f4f9f38a..ad54284c6f7e3 100644 --- a/ci/requirements-3.6_LOCALE.run +++ b/ci/requirements-3.6_LOCALE.run @@ -15,7 +15,7 @@ jinja2 sqlalchemy pymysql # feather-format (not available on defaults ATM) -# psycopg2 (not avail on defaults ATM) +psycopg2 beautifulsoup4 s3fs xarray diff --git a/ci/requirements-3.6_LOCALE_SLOW.run b/ci/requirements-3.6_LOCALE_SLOW.run index 28131031f0bbd..ad54284c6f7e3 100644 --- a/ci/requirements-3.6_LOCALE_SLOW.run +++ b/ci/requirements-3.6_LOCALE_SLOW.run @@ -15,7 +15,7 @@ jinja2 sqlalchemy pymysql # feather-format (not available on defaults ATM) -# psycopg2 (not available on defaults ATM) +psycopg2 beautifulsoup4 s3fs xarray diff --git a/doc/source/install.rst b/doc/source/install.rst index 48d51e1200447..c185a7cf4b875 100644 --- a/doc/source/install.rst +++ b/doc/source/install.rst @@ -217,7 +217,8 @@ Recommended Dependencies If installed, must be Version 2.4.6 or higher. * `bottleneck `__: for accelerating certain types of ``nan`` - evaluations. ``bottleneck`` uses specialized cython routines to achieve large speedups. + evaluations. ``bottleneck`` uses specialized cython routines to achieve large speedups. If installed, + must be Version 1.0.0 or higher. .. note:: diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 1931ffff4b217..589e88dc4aaf6 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -205,6 +205,7 @@ Other API Changes ^^^^^^^^^^^^^^^^^ - Support has been dropped for Python 3.4 (:issue:`15251`) +- Support has been dropped for bottleneck < 1.0.0 (:issue:`15214`) - The Categorical constructor no longer accepts a scalar for the ``categories`` keyword. (:issue:`16022`) - Accessing a non-existent attribute on a closed :class:`~pandas.HDFStore` will now raise an ``AttributeError`` rather than a ``ClosedFileError`` (:issue:`16301`) diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 5bebb8eb65b23..e2777cb56374e 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -1,6 +1,8 @@ import itertools import functools import operator +import warnings +from distutils.version import LooseVersion import numpy as np from pandas import compat @@ -20,11 +22,24 @@ from pandas.core.config import get_option from pandas.core.common import _values_from_object +_BOTTLENECK_INSTALLED = False +_MIN_BOTTLENECK_VERSION = '1.0.0' + try: import bottleneck as bn - _BOTTLENECK_INSTALLED = True + ver = bn.__version__ + _BOTTLENCK_INSTALLED = ver >= LooseVersion(_MIN_BOTTLENECK_VERSION) + + if not _BOTTLENECK_INSTALLED: + warnings.warn( + "The installed version of bottleneck {ver} is not supported " + "in pandas and will be not be used\nThe minimum supported " + "version is {min_ver}\n".format( + ver=ver, min_ver=_MIN_BOTTLENECK_VERSION), UserWarning) + except ImportError: # pragma: no cover - _BOTTLENECK_INSTALLED = False + pass + _USE_BOTTLENECK = False From 6ac609d8b43769fd80866ebfbf0749e75dddcf04 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Tue, 1 Aug 2017 15:33:00 -0700 Subject: [PATCH 843/933] DOC: Clean up instructions in ISSUE_TEMPLATE (#17146) --- .github/ISSUE_TEMPLATE.md | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 1f614b54b1f71..6ab03c9907475 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -8,11 +8,18 @@ [this should explain **why** the current behaviour is a problem and why the expected output is a better solution.] +**Note**: We receive a lot of issues on our GitHub tracker, so it is very possible that your issue has been posted before. +Please check first before submitting so that we do not have to handle and close duplicates! + +**Note**: Many problems can be resolved by simply upgrading `pandas` to the latest version. Before submitting, please check +if that solution works for you. If possible, you may want to check if `master` addresses this issue, but that is not necessary. + #### Expected Output #### Output of ``pd.show_versions()``
-# Paste the output here pd.show_versions() here + +[paste the output of ``pd.show_versions()`` here below this line]
From 563fa082e32af200d98cfbc1dc30b7ea5247d5d2 Mon Sep 17 00:00:00 2001 From: Floris Kint Date: Tue, 1 Aug 2017 15:36:40 -0700 Subject: [PATCH 844/933] Add missing space to the NotImplementedError's message for compound dtypes (#17140) --- pandas/core/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index fbd26655798bd..ec44dce0da9bc 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -152,7 +152,7 @@ def _validate_dtype(self, dtype): # a compound dtype if dtype.kind == 'V': raise NotImplementedError("compound dtypes are not implemented" - "in the {0} constructor" + " in the {0} constructor" .format(self.__class__.__name__)) return dtype From f394409b0053ffb8a24ffef34f1f758175a7ecf5 Mon Sep 17 00:00:00 2001 From: jebob Date: Tue, 1 Aug 2017 23:38:18 +0100 Subject: [PATCH 845/933] DOC: (de)type the return value of concat (#17079) (#17119) --- pandas/core/reshape/concat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index 96603b6adc3b0..e199ec2710367 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -65,7 +65,7 @@ def concat(objs, axis=0, join='outer', join_axes=None, ignore_index=False, Returns ------- - concatenated : type of objects + concatenated : object, type of objs Notes ----- From 611d29606263d12ecdcc38a2b8a790e99aa443d6 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Tue, 1 Aug 2017 15:44:09 -0700 Subject: [PATCH 846/933] BUG: Thoroughly dedup column names in read_csv (#17095) --- doc/source/whatsnew/v0.21.0.txt | 1 + pandas/io/parsers.py | 18 +++++++++++------- pandas/tests/io/parser/mangle_dupes.py | 24 +++++++++++++++++++++++- 3 files changed, 35 insertions(+), 8 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 589e88dc4aaf6..9ec859d5e5b0f 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -279,6 +279,7 @@ I/O ^^^ - Bug in :func:`read_csv` in which columns were not being thoroughly de-duplicated (:issue:`17060`) +- Bug in :func:`read_csv` in which specified column names were not being thoroughly de-duplicated (:issue:`17095`) - Bug in :func:`read_csv` in which non integer values for the header argument generated an unhelpful / unrelated error message (:issue:`16338`) - Bug in :func:`read_csv` in which memory management issues in exception handling, under certain conditions, would cause the interpreter to segfault (:issue:`14696`, :issue:`16798`). - Bug in :func:`read_csv` when called with ``low_memory=False`` in which a CSV with at least one column > 2GB in size would incorrectly raise a ``MemoryError`` (:issue:`16798`). diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index ea0bb104338b6..41b0cdd6dd250 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -1318,14 +1318,18 @@ def _maybe_dedup_names(self, names): # would be nice! if self.mangle_dupe_cols: names = list(names) # so we can index - counts = {} + counts = defaultdict(int) for i, col in enumerate(names): - cur_count = counts.get(col, 0) + cur_count = counts[col] - if cur_count > 0: - names[i] = '%s.%d' % (col, cur_count) + while cur_count > 0: + counts[col] = cur_count + 1 + col = '%s.%d' % (col, cur_count) + cur_count = counts[col] + + names[i] = col counts[col] = cur_count + 1 return names @@ -2330,15 +2334,15 @@ def _infer_columns(self): this_columns.append(c) if not have_mi_columns and self.mangle_dupe_cols: - counts = {} + counts = defaultdict(int) for i, col in enumerate(this_columns): - cur_count = counts.get(col, 0) + cur_count = counts[col] while cur_count > 0: counts[col] = cur_count + 1 col = "%s.%d" % (col, cur_count) - cur_count = counts.get(col, 0) + cur_count = counts[col] this_columns[i] = col counts[col] = cur_count + 1 diff --git a/pandas/tests/io/parser/mangle_dupes.py b/pandas/tests/io/parser/mangle_dupes.py index 70ecfe51c0f09..e2efb1377f8b0 100644 --- a/pandas/tests/io/parser/mangle_dupes.py +++ b/pandas/tests/io/parser/mangle_dupes.py @@ -25,7 +25,7 @@ def test_basic(self): mangle_dupe_cols=True) assert list(df.columns) == expected - def test_thorough_mangle(self): + def test_thorough_mangle_columns(self): # see gh-17060 data = "a,a,a.1\n1,2,3" df = self.read_csv(StringIO(data), sep=",", mangle_dupe_cols=True) @@ -40,3 +40,25 @@ def test_thorough_mangle(self): df = self.read_csv(StringIO(data), sep=",", mangle_dupe_cols=True) assert list(df.columns) == ["a", "a.1", "a.3", "a.1.1", "a.2", "a.2.1", "a.3.1"] + + def test_thorough_mangle_names(self): + # see gh-17095 + data = "a,b,b\n1,2,3" + names = ["a.1", "a.1", "a.1.1"] + df = self.read_csv(StringIO(data), sep=",", names=names, + mangle_dupe_cols=True) + assert list(df.columns) == ["a.1", "a.1.1", "a.1.1.1"] + + data = "a,b,c,d,e,f\n1,2,3,4,5,6" + names = ["a", "a", "a.1", "a.1.1", "a.1.1.1", "a.1.1.1.1"] + df = self.read_csv(StringIO(data), sep=",", names=names, + mangle_dupe_cols=True) + assert list(df.columns) == ["a", "a.1", "a.1.1", "a.1.1.1", + "a.1.1.1.1", "a.1.1.1.1.1"] + + data = "a,b,c,d,e,f,g\n1,2,3,4,5,6,7" + names = ["a", "a", "a.3", "a.1", "a.2", "a", "a"] + df = self.read_csv(StringIO(data), sep=",", names=names, + mangle_dupe_cols=True) + assert list(df.columns) == ["a", "a.1", "a.3", "a.1.1", + "a.2", "a.2.1", "a.3.1"] From 3ed51c2b4b24c391587b78b9dd3faea8d09066e2 Mon Sep 17 00:00:00 2001 From: Alan Yee Date: Wed, 2 Aug 2017 02:46:59 -0700 Subject: [PATCH 847/933] DOC: Additions/updates to documentation (#17150) --- README.md | 58 +++++++++++++++++++++--------------------- doc/source/gotchas.rst | 16 ++++++------ 2 files changed, 37 insertions(+), 37 deletions(-) diff --git a/README.md b/README.md index dc74828ba9863..ac043f5586498 100644 --- a/README.md +++ b/README.md @@ -53,7 +53,7 @@
@@ -61,7 +61,7 @@ @@ -123,31 +123,31 @@ Here are just a few of the things that pandas does well: moving window linear regressions, date shifting and lagging, etc. - [missing-data]: http://pandas.pydata.org/pandas-docs/stable/missing_data.html#working-with-missing-data - [insertion-deletion]: http://pandas.pydata.org/pandas-docs/stable/dsintro.html#column-selection-addition-deletion - [alignment]: http://pandas.pydata.org/pandas-docs/stable/dsintro.html?highlight=alignment#intro-to-data-structures - [groupby]: http://pandas.pydata.org/pandas-docs/stable/groupby.html#group-by-split-apply-combine - [conversion]: http://pandas.pydata.org/pandas-docs/stable/dsintro.html#dataframe - [slicing]: http://pandas.pydata.org/pandas-docs/stable/indexing.html#slicing-ranges - [fancy-indexing]: http://pandas.pydata.org/pandas-docs/stable/indexing.html#advanced-indexing-with-ix - [subsetting]: http://pandas.pydata.org/pandas-docs/stable/indexing.html#boolean-indexing - [merging]: http://pandas.pydata.org/pandas-docs/stable/merging.html#database-style-dataframe-joining-merging - [joining]: http://pandas.pydata.org/pandas-docs/stable/merging.html#joining-on-index - [reshape]: http://pandas.pydata.org/pandas-docs/stable/reshaping.html#reshaping-and-pivot-tables - [pivot-table]: http://pandas.pydata.org/pandas-docs/stable/reshaping.html#pivot-tables-and-cross-tabulations - [mi]: http://pandas.pydata.org/pandas-docs/stable/indexing.html#hierarchical-indexing-multiindex - [flat-files]: http://pandas.pydata.org/pandas-docs/stable/io.html#csv-text-files - [excel]: http://pandas.pydata.org/pandas-docs/stable/io.html#excel-files - [db]: http://pandas.pydata.org/pandas-docs/stable/io.html#sql-queries - [hdfstore]: http://pandas.pydata.org/pandas-docs/stable/io.html#hdf5-pytables - [timeseries]: http://pandas.pydata.org/pandas-docs/stable/timeseries.html#time-series-date-functionality + [missing-data]: https://pandas.pydata.org/pandas-docs/stable/missing_data.html#working-with-missing-data + [insertion-deletion]: https://pandas.pydata.org/pandas-docs/stable/dsintro.html#column-selection-addition-deletion + [alignment]: https://pandas.pydata.org/pandas-docs/stable/dsintro.html?highlight=alignment#intro-to-data-structures + [groupby]: https://pandas.pydata.org/pandas-docs/stable/groupby.html#group-by-split-apply-combine + [conversion]: https://pandas.pydata.org/pandas-docs/stable/dsintro.html#dataframe + [slicing]: https://pandas.pydata.org/pandas-docs/stable/indexing.html#slicing-ranges + [fancy-indexing]: https://pandas.pydata.org/pandas-docs/stable/indexing.html#advanced-indexing-with-ix + [subsetting]: https://pandas.pydata.org/pandas-docs/stable/indexing.html#boolean-indexing + [merging]: https://pandas.pydata.org/pandas-docs/stable/merging.html#database-style-dataframe-joining-merging + [joining]: https://pandas.pydata.org/pandas-docs/stable/merging.html#joining-on-index + [reshape]: https://pandas.pydata.org/pandas-docs/stable/reshaping.html#reshaping-and-pivot-tables + [pivot-table]: https://pandas.pydata.org/pandas-docs/stable/reshaping.html#pivot-tables-and-cross-tabulations + [mi]: https://pandas.pydata.org/pandas-docs/stable/indexing.html#hierarchical-indexing-multiindex + [flat-files]: https://pandas.pydata.org/pandas-docs/stable/io.html#csv-text-files + [excel]: https://pandas.pydata.org/pandas-docs/stable/io.html#excel-files + [db]: https://pandas.pydata.org/pandas-docs/stable/io.html#sql-queries + [hdfstore]: https://pandas.pydata.org/pandas-docs/stable/io.html#hdf5-pytables + [timeseries]: https://pandas.pydata.org/pandas-docs/stable/timeseries.html#time-series-date-functionality ## Where to get it The source code is currently hosted on GitHub at: -http://github.com/pandas-dev/pandas +https://github.com/pandas-dev/pandas Binary installers for the latest released version are available at the [Python -package index](http://pypi.python.org/pypi/pandas/) and on conda. +package index](https://pypi.python.org/pypi/pandas) and on conda. ```sh # conda @@ -161,11 +161,11 @@ pip install pandas ## Dependencies - [NumPy](http://www.numpy.org): 1.7.0 or higher -- [python-dateutil](http://labix.org/python-dateutil): 1.5 or higher -- [pytz](http://pytz.sourceforge.net) +- [python-dateutil](https://labix.org/python-dateutil): 1.5 or higher +- [pytz](https://pythonhosted.org/pytz) - Needed for time zone support with ``pandas.date_range`` -See the [full installation instructions](http://pandas.pydata.org/pandas-docs/stable/install.html#dependencies) +See the [full installation instructions](https://pandas.pydata.org/pandas-docs/stable/install.html#dependencies) for recommended and optional dependencies. ## Installation from sources @@ -197,13 +197,13 @@ mode](https://pip.pypa.io/en/latest/reference/pip_install.html#editable-installs pip install -e . ``` -See the full instructions for [installing from source](http://pandas.pydata.org/pandas-docs/stable/install.html#installing-from-source). +See the full instructions for [installing from source](https://pandas.pydata.org/pandas-docs/stable/install.html#installing-from-source). ## License -BSD +[BSD 3](LICENSE) ## Documentation -The official documentation is hosted on PyData.org: http://pandas.pydata.org/pandas-docs/stable/ +The official documentation is hosted on PyData.org: https://pandas.pydata.org/pandas-docs/stable The Sphinx documentation should provide a good starting point for learning how to use the library. Expect the docs to continue to expand as time goes on. @@ -223,7 +223,7 @@ Most development discussion is taking place on github in this repo. Further, the ## Contributing to pandas All contributions, bug reports, bug fixes, documentation improvements, enhancements and ideas are welcome. -A detailed overview on how to contribute can be found in the **[contributing guide.](http://pandas.pydata.org/pandas-docs/stable/contributing.html)** +A detailed overview on how to contribute can be found in the **[contributing guide.](https://pandas.pydata.org/pandas-docs/stable/contributing.html)** If you are simply looking to start working with the pandas codebase, navigate to the [GitHub “issues” tab](https://github.com/pandas-dev/pandas/issues) and start looking through interesting issues. There are a number of issues listed under [Docs](https://github.com/pandas-dev/pandas/issues?labels=Docs&sort=updated&state=open) and [Difficulty Novice](https://github.com/pandas-dev/pandas/issues?q=is%3Aopen+is%3Aissue+label%3A%22Difficulty+Novice%22) where you could start out. diff --git a/doc/source/gotchas.rst b/doc/source/gotchas.rst index a3a90f514f142..a3062b4086673 100644 --- a/doc/source/gotchas.rst +++ b/doc/source/gotchas.rst @@ -144,7 +144,7 @@ To evaluate single-element pandas objects in a boolean context, use the method ` Bitwise boolean ~~~~~~~~~~~~~~~ -Bitwise boolean operators like ``==`` and ``!=`` will return a boolean ``Series``, +Bitwise boolean operators like ``==`` and ``!=`` return a boolean ``Series``, which is almost always what you want anyways. .. code-block:: python @@ -194,7 +194,7 @@ For lack of ``NA`` (missing) support from the ground up in NumPy and Python in general, we were given the difficult choice between either - A *masked array* solution: an array of data and an array of boolean values - indicating whether a value + indicating whether a value is there or is missing - Using a special sentinel value, bit pattern, or set of sentinel values to denote ``NA`` across the dtypes @@ -247,16 +247,16 @@ dtype in order to store the NAs. These are summarized by this table: ``integer``, cast to ``float64`` ``boolean``, cast to ``object`` -While this may seem like a heavy trade-off, I have found very few -cases where this is an issue in practice. Some explanation for the motivation -here in the next section. +While this may seem like a heavy trade-off, I have found very few cases where +this is an issue in practice i.e. storing values greater than 2**53. Some +explanation for the motivation is in the next section. Why not make NumPy like R? ~~~~~~~~~~~~~~~~~~~~~~~~~~ Many people have suggested that NumPy should simply emulate the ``NA`` support present in the more domain-specific statistical programming language `R -`__. Part of the reason is the NumPy type hierarchy: +`__. Part of the reason is the NumPy type hierarchy: .. csv-table:: :header: "Typeclass","Dtypes" @@ -305,7 +305,7 @@ the ``DataFrame.copy`` method. If you are doing a lot of copying of DataFrame objects shared among threads, we recommend holding locks inside the threads where the data copying occurs. -See `this link `__ +See `this link `__ for more information. @@ -332,5 +332,5 @@ using something similar to the following: s = pd.Series(newx) See `the NumPy documentation on byte order -`__ for more +`__ for more details. From f4330611ff5ac1cbb4a89c4a7dab3d0900f9e64a Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 2 Aug 2017 05:47:57 -0400 Subject: [PATCH 848/933] ENH: add to/from_parquet with pyarrow & fastparquet (#15838) --- ci/install_travis.sh | 1 + ci/requirements-2.7.sh | 2 +- ci/requirements-3.5.sh | 4 +- ci/requirements-3.5_OSX.sh | 2 +- ci/requirements-3.6.pip | 1 + ci/requirements-3.6.run | 2 + ci/requirements-3.6_DOC.sh | 2 +- ci/requirements-3.6_WIN.run | 2 + doc/source/install.rst | 1 + doc/source/io.rst | 82 ++++++- doc/source/options.rst | 3 + doc/source/whatsnew/v0.21.0.txt | 1 + pandas/core/config_init.py | 12 + pandas/core/frame.py | 24 ++ pandas/io/api.py | 1 + pandas/io/feather_format.py | 4 +- pandas/io/parquet.py | 194 +++++++++++++++++ pandas/tests/api/test_api.py | 2 +- pandas/tests/io/test_parquet.py | 374 ++++++++++++++++++++++++++++++++ pandas/util/_print_versions.py | 1 + 20 files changed, 703 insertions(+), 12 deletions(-) create mode 100644 pandas/io/parquet.py create mode 100644 pandas/tests/io/test_parquet.py diff --git a/ci/install_travis.sh b/ci/install_travis.sh index dcc1656ce3dd7..df6969c7cc659 100755 --- a/ci/install_travis.sh +++ b/ci/install_travis.sh @@ -153,6 +153,7 @@ fi echo echo "[removing installed pandas]" conda remove pandas -y --force +pip uninstall -y pandas if [ "$BUILD_TEST" ]; then diff --git a/ci/requirements-2.7.sh b/ci/requirements-2.7.sh index 5b20617f55759..e3bd5e46026c5 100644 --- a/ci/requirements-2.7.sh +++ b/ci/requirements-2.7.sh @@ -4,4 +4,4 @@ source activate pandas echo "install 27" -conda install -n pandas -c conda-forge feather-format pyarrow=0.4.1 +conda install -n pandas -c conda-forge feather-format pyarrow=0.4.1 fastparquet diff --git a/ci/requirements-3.5.sh b/ci/requirements-3.5.sh index 3b8fe793a413d..33db9c28c78a9 100644 --- a/ci/requirements-3.5.sh +++ b/ci/requirements-3.5.sh @@ -4,8 +4,8 @@ source activate pandas echo "install 35" -conda install -n pandas -c conda-forge feather-format pyarrow=0.4.1 - # pip install python-dateutil to get latest conda remove -n pandas python-dateutil --force pip install python-dateutil + +conda install -n pandas -c conda-forge feather-format pyarrow=0.4.1 diff --git a/ci/requirements-3.5_OSX.sh b/ci/requirements-3.5_OSX.sh index 39ea1a0cf67bf..c2978b175968c 100644 --- a/ci/requirements-3.5_OSX.sh +++ b/ci/requirements-3.5_OSX.sh @@ -4,4 +4,4 @@ source activate pandas echo "install 35_OSX" -conda install -n pandas -c conda-forge feather-format==0.3.1 +conda install -n pandas -c conda-forge feather-format==0.3.1 fastparquet diff --git a/ci/requirements-3.6.pip b/ci/requirements-3.6.pip index e69de29bb2d1d..753a60d6c119a 100644 --- a/ci/requirements-3.6.pip +++ b/ci/requirements-3.6.pip @@ -0,0 +1 @@ +brotlipy diff --git a/ci/requirements-3.6.run b/ci/requirements-3.6.run index 00db27d3f2704..822144a80bc9a 100644 --- a/ci/requirements-3.6.run +++ b/ci/requirements-3.6.run @@ -17,6 +17,8 @@ pymysql feather-format pyarrow psycopg2 +python-snappy +fastparquet beautifulsoup4 s3fs xarray diff --git a/ci/requirements-3.6_DOC.sh b/ci/requirements-3.6_DOC.sh index 8c10a794a13b9..aec0f62148622 100644 --- a/ci/requirements-3.6_DOC.sh +++ b/ci/requirements-3.6_DOC.sh @@ -6,6 +6,6 @@ echo "[install DOC_BUILD deps]" pip install pandas-gbq -conda install -n pandas -c conda-forge feather-format pyarrow nbsphinx pandoc +conda install -n pandas -c conda-forge feather-format pyarrow nbsphinx pandoc fastparquet conda install -n pandas -c r r rpy2 --yes diff --git a/ci/requirements-3.6_WIN.run b/ci/requirements-3.6_WIN.run index 22aae8944d731..226caa458f6ee 100644 --- a/ci/requirements-3.6_WIN.run +++ b/ci/requirements-3.6_WIN.run @@ -13,3 +13,5 @@ numexpr pytables matplotlib blosc +fastparquet +pyarrow diff --git a/doc/source/install.rst b/doc/source/install.rst index c185a7cf4b875..01a01b1b58b4c 100644 --- a/doc/source/install.rst +++ b/doc/source/install.rst @@ -237,6 +237,7 @@ Optional Dependencies * `xarray `__: pandas like handling for > 2 dims, needed for converting Panels to xarray objects. Version 0.7.0 or higher is recommended. * `PyTables `__: necessary for HDF5-based storage. Version 3.0.0 or higher required, Version 3.2.1 or higher highly recommended. * `Feather Format `__: necessary for feather-based storage, version 0.3.1 or higher. +* ``Apache Parquet Format``, either `pyarrow `__ (>= 0.4.1) or `fastparquet `__ (>= 0.0.6) for parquet-based storage. The `snappy `__ and `brotli `__ are available for compression support. * `SQLAlchemy `__: for SQL database support. Version 0.8.1 or higher recommended. Besides SQLAlchemy, you also need a database specific driver. You can find an overview of supported drivers for each SQL dialect in the `SQLAlchemy docs `__. Some common drivers are: * `psycopg2 `__: for PostgreSQL diff --git a/doc/source/io.rst b/doc/source/io.rst index bf68a0cae1d27..0b97264abfcd7 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -43,6 +43,7 @@ object. The corresponding ``writer`` functions are object methods that are acces binary;`MS Excel `__;:ref:`read_excel`;:ref:`to_excel` binary;`HDF5 Format `__;:ref:`read_hdf`;:ref:`to_hdf` binary;`Feather Format `__;:ref:`read_feather`;:ref:`to_feather` + binary;`Parquet Format `__;:ref:`read_parquet`;:ref:`to_parquet` binary;`Msgpack `__;:ref:`read_msgpack`;:ref:`to_msgpack` binary;`Stata `__;:ref:`read_stata`;:ref:`to_stata` binary;`SAS `__;:ref:`read_sas`; @@ -209,7 +210,7 @@ buffer_lines : int, default None .. deprecated:: 0.19.0 Argument removed because its value is not respected by the parser - + compact_ints : boolean, default False .. deprecated:: 0.19.0 @@ -4087,7 +4088,7 @@ control compression: ``complevel`` and ``complib``. ``complevel`` specifies if and how hard data is to be compressed. ``complevel=0`` and ``complevel=None`` disables compression and ``0`_: Fast compression and decompression. .. versionadded:: 0.20.2 - + Support for alternative blosc compressors: - + - `blosc:blosclz `_ This is the default compressor for ``blosc`` - `blosc:lz4 @@ -4545,6 +4546,79 @@ Read from a feather file. import os os.remove('example.feather') + +.. _io.parquet: + +Parquet +------- + +.. versionadded:: 0.21.0 + +`Parquet `__ + +.. note:: + + These engines are very similar and should read/write nearly identical parquet format files. + These libraries differ by having different underlying dependencies (``fastparquet`` by using ``numba``, while ``pyarrow`` uses a c-library). + +.. ipython:: python + + df = pd.DataFrame({'a': list('abc'), + 'b': list(range(1, 4)), + 'c': np.arange(3, 6).astype('u1'), + 'd': np.arange(4.0, 7.0, dtype='float64'), + 'e': [True, False, True], + 'f': pd.date_range('20130101', periods=3), + 'g': pd.date_range('20130101', periods=3, tz='US/Eastern'), + 'h': pd.date_range('20130101', periods=3, freq='ns')}) + + df + df.dtypes + +Write to a parquet file. + +.. ipython:: python + + df.to_parquet('example_pa.parquet', engine='pyarrow') + df.to_parquet('example_fp.parquet', engine='fastparquet') + +Read from a parquet file. + +.. ipython:: python + + result = pd.read_parquet('example_pa.parquet', engine='pyarrow') + result = pd.read_parquet('example_fp.parquet', engine='fastparquet') + + result.dtypes + +.. ipython:: python + :suppress: + + import os + os.remove('example_pa.parquet') + os.remove('example_fp.parquet') + .. _io.sql: SQL Queries diff --git a/doc/source/options.rst b/doc/source/options.rst index 83b08acac5720..51d02bc89692a 100644 --- a/doc/source/options.rst +++ b/doc/source/options.rst @@ -414,6 +414,9 @@ io.hdf.default_format None default format writing format, 'table' io.hdf.dropna_table True drop ALL nan rows when appending to a table +io.parquet.engine None The engine to use as a default for + parquet reading and writing. If None + then try 'pyarrow' and 'fastparquet' mode.chained_assignment warn Raise an exception, warn, or no action if trying to use chained assignment, The default is warn diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 9ec859d5e5b0f..fad6647d4de8d 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -78,6 +78,7 @@ Other Enhancements - :func:`DataFrame.select_dtypes` now accepts scalar values for include/exclude as well as list-like. (:issue:`16855`) - :func:`date_range` now accepts 'YS' in addition to 'AS' as an alias for start of year (:issue:`9313`) - :func:`date_range` now accepts 'Y' in addition to 'A' as an alias for end of year (:issue:`9313`) +- Integration with Apache Parquet, including a new top-level ``pd.read_parquet()`` and ``DataFrame.to_parquet()`` method, see :ref:`here `. .. _whatsnew_0210.api_breaking: diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py index 875ab8249f953..ea5c213dbe057 100644 --- a/pandas/core/config_init.py +++ b/pandas/core/config_init.py @@ -465,3 +465,15 @@ def _register_xlsx(engine, other): except ImportError: # fallback _register_xlsx('openpyxl', 'xlsxwriter') + +# Set up the io.parquet specific configuration. +parquet_engine_doc = """ +: string + The default parquet reader/writer engine. Available options: + 'auto', 'pyarrow', 'fastparquet', the default is 'auto' +""" + +with cf.config_prefix('io.parquet'): + cf.register_option( + 'engine', 'auto', parquet_engine_doc, + validator=is_one_of_factory(['auto', 'pyarrow', 'fastparquet'])) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index e546e96f253c7..9d63bd2e120aa 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1598,6 +1598,30 @@ def to_feather(self, fname): from pandas.io.feather_format import to_feather to_feather(self, fname) + def to_parquet(self, fname, engine='auto', compression='snappy', + **kwargs): + """ + Write a DataFrame to the binary parquet format. + + .. versionadded:: 0.21.0 + + Parameters + ---------- + fname : str + string file path + engine : {'auto', 'pyarrow', 'fastparquet'}, default 'auto' + Parquet reader library to use. If 'auto', then the option + 'io.parquet.engine' is used. If 'auto', then the first + library to be installed is used. + compression : str, optional, default 'snappy' + compression method, includes {'gzip', 'snappy', 'brotli'} + kwargs + Additional keyword arguments passed to the engine + """ + from pandas.io.parquet import to_parquet + to_parquet(self, fname, engine, + compression=compression, **kwargs) + @Substitution(header='Write out column names. If a list of string is given, \ it is assumed to be aliases for the column names') @Appender(fmt.docstring_to_string, indents=1) diff --git a/pandas/io/api.py b/pandas/io/api.py index a4a25b78942db..f542a8176dce7 100644 --- a/pandas/io/api.py +++ b/pandas/io/api.py @@ -13,6 +13,7 @@ from pandas.io.sql import read_sql, read_sql_table, read_sql_query from pandas.io.sas import read_sas from pandas.io.feather_format import read_feather +from pandas.io.parquet import read_parquet from pandas.io.stata import read_stata from pandas.io.pickle import read_pickle, to_pickle from pandas.io.packers import read_msgpack, to_msgpack diff --git a/pandas/io/feather_format.py b/pandas/io/feather_format.py index 86d58caa5e816..87a4931421d7d 100644 --- a/pandas/io/feather_format.py +++ b/pandas/io/feather_format.py @@ -19,7 +19,7 @@ def _try_import(): "you can install via conda\n" "conda install feather-format -c conda-forge\n" "or via pip\n" - "pip install feather-format\n") + "pip install -U feather-format\n") try: feather.__version__ >= LooseVersion('0.3.1') @@ -29,7 +29,7 @@ def _try_import(): "you can install via conda\n" "conda install feather-format -c conda-forge" "or via pip\n" - "pip install feather-format\n") + "pip install -U feather-format\n") return feather diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py new file mode 100644 index 0000000000000..0a4426b55b323 --- /dev/null +++ b/pandas/io/parquet.py @@ -0,0 +1,194 @@ +""" parquet compat """ + +from warnings import catch_warnings +from distutils.version import LooseVersion +from pandas import DataFrame, RangeIndex, Int64Index, get_option +from pandas.compat import range +from pandas.io.common import get_filepath_or_buffer + + +def get_engine(engine): + """ return our implementation """ + + if engine is 'auto': + engine = get_option('io.parquet.engine') + + if engine is 'auto': + # try engines in this order + try: + return PyArrowImpl() + except ImportError: + pass + + try: + return FastParquetImpl() + except ImportError: + pass + + if engine not in ['pyarrow', 'fastparquet']: + raise ValueError("engine must be one of 'pyarrow', 'fastparquet'") + + if engine == 'pyarrow': + return PyArrowImpl() + elif engine == 'fastparquet': + return FastParquetImpl() + + +class PyArrowImpl(object): + + def __init__(self): + # since pandas is a dependency of pyarrow + # we need to import on first use + + try: + import pyarrow + import pyarrow.parquet + except ImportError: + raise ImportError("pyarrow is required for parquet support\n\n" + "you can install via conda\n" + "conda install pyarrow -c conda-forge\n" + "\nor via pip\n" + "pip install -U pyarrow\n") + + if LooseVersion(pyarrow.__version__) < '0.4.1': + raise ImportError("pyarrow >= 0.4.1 is required for parquet" + "support\n\n" + "you can install via conda\n" + "conda install pyarrow -c conda-forge\n" + "\nor via pip\n" + "pip install -U pyarrow\n") + + self.api = pyarrow + + def write(self, df, path, compression='snappy', **kwargs): + path, _, _ = get_filepath_or_buffer(path) + table = self.api.Table.from_pandas(df, timestamps_to_ms=True) + self.api.parquet.write_table( + table, path, compression=compression, **kwargs) + + def read(self, path): + path, _, _ = get_filepath_or_buffer(path) + return self.api.parquet.read_table(path).to_pandas() + + +class FastParquetImpl(object): + + def __init__(self): + # since pandas is a dependency of fastparquet + # we need to import on first use + + try: + import fastparquet + except ImportError: + raise ImportError("fastparquet is required for parquet support\n\n" + "you can install via conda\n" + "conda install fastparquet -c conda-forge\n" + "\nor via pip\n" + "pip install -U fastparquet") + + if LooseVersion(fastparquet.__version__) < '0.1.0': + raise ImportError("fastparquet >= 0.1.0 is required for parquet " + "support\n\n" + "you can install via conda\n" + "conda install fastparquet -c conda-forge\n" + "\nor via pip\n" + "pip install -U fastparquet") + + self.api = fastparquet + + def write(self, df, path, compression='snappy', **kwargs): + # thriftpy/protocol/compact.py:339: + # DeprecationWarning: tostring() is deprecated. + # Use tobytes() instead. + path, _, _ = get_filepath_or_buffer(path) + with catch_warnings(record=True): + self.api.write(path, df, + compression=compression, **kwargs) + + def read(self, path): + path, _, _ = get_filepath_or_buffer(path) + return self.api.ParquetFile(path).to_pandas() + + +def to_parquet(df, path, engine='auto', compression='snappy', **kwargs): + """ + Write a DataFrame to the parquet format. + + Parameters + ---------- + df : DataFrame + path : string + File path + engine : {'auto', 'pyarrow', 'fastparquet'}, default 'auto' + Parquet reader library to use. If 'auto', then the option + 'io.parquet.engine' is used. If 'auto', then the first + library to be installed is used. + compression : str, optional, default 'snappy' + compression method, includes {'gzip', 'snappy', 'brotli'} + kwargs + Additional keyword arguments passed to the engine + """ + + impl = get_engine(engine) + + if not isinstance(df, DataFrame): + raise ValueError("to_parquet only support IO with DataFrames") + + valid_types = {'string', 'unicode'} + + # validate index + # -------------- + + # validate that we have only a default index + # raise on anything else as we don't serialize the index + + if not isinstance(df.index, Int64Index): + raise ValueError("parquet does not support serializing {} " + "for the index; you can .reset_index()" + "to make the index into column(s)".format( + type(df.index))) + + if not df.index.equals(RangeIndex.from_range(range(len(df)))): + raise ValueError("parquet does not support serializing a " + "non-default index for the index; you " + "can .reset_index() to make the index " + "into column(s)") + + if df.index.name is not None: + raise ValueError("parquet does not serialize index meta-data on a " + "default index") + + # validate columns + # ---------------- + + # must have value column names (strings only) + if df.columns.inferred_type not in valid_types: + raise ValueError("parquet must have string column names") + + return impl.write(df, path, compression=compression) + + +def read_parquet(path, engine='auto', **kwargs): + """ + Load a parquet object from the file path, returning a DataFrame. + + .. versionadded 0.21.0 + + Parameters + ---------- + path : string + File path + engine : {'auto', 'pyarrow', 'fastparquet'}, default 'auto' + Parquet reader library to use. If 'auto', then the option + 'io.parquet.engine' is used. If 'auto', then the first + library to be installed is used. + kwargs are passed to the engine + + Returns + ------- + DataFrame + + """ + + impl = get_engine(engine) + return impl.read(path) diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index 433ed7e517b1c..09cccd54b74f8 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -82,7 +82,7 @@ class TestPDApi(Base): 'read_gbq', 'read_hdf', 'read_html', 'read_json', 'read_msgpack', 'read_pickle', 'read_sas', 'read_sql', 'read_sql_query', 'read_sql_table', 'read_stata', - 'read_table', 'read_feather'] + 'read_table', 'read_feather', 'read_parquet'] # top-level to_* funcs funcs_to = ['to_datetime', 'to_msgpack', diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py new file mode 100644 index 0000000000000..ff0935c7dcc6f --- /dev/null +++ b/pandas/tests/io/test_parquet.py @@ -0,0 +1,374 @@ +""" test parquet compat """ + +import pytest +import datetime +from warnings import catch_warnings + +import numpy as np +import pandas as pd +from pandas.compat import PY3, is_platform_windows +from pandas.io.parquet import (to_parquet, read_parquet, get_engine, + PyArrowImpl, FastParquetImpl) +from pandas.util import testing as tm + +try: + import pyarrow # noqa + _HAVE_PYARROW = True +except ImportError: + _HAVE_PYARROW = False + +try: + import fastparquet # noqa + _HAVE_FASTPARQUET = True +except ImportError: + _HAVE_FASTPARQUET = False + + +# setup engines & skips +@pytest.fixture(params=[ + pytest.mark.skipif(not _HAVE_FASTPARQUET, + reason='fastparquet is not installed')('fastparquet'), + pytest.mark.skipif(not _HAVE_PYARROW, + reason='pyarrow is not installed')('pyarrow')]) +def engine(request): + return request.param + + +@pytest.fixture +def pa(): + if not _HAVE_PYARROW: + pytest.skip("pyarrow is not installed") + if is_platform_windows(): + pytest.skip("pyarrow-parquet not building on windows") + return 'pyarrow' + + +@pytest.fixture +def fp(): + if not _HAVE_FASTPARQUET: + pytest.skip("fastparquet is not installed") + return 'fastparquet' + + +@pytest.fixture +def df_compat(): + return pd.DataFrame({'A': [1, 2, 3], 'B': 'foo'}) + + +@pytest.fixture +def df_cross_compat(): + df = pd.DataFrame({'a': list('abc'), + 'b': list(range(1, 4)), + 'c': np.arange(3, 6).astype('u1'), + 'd': np.arange(4.0, 7.0, dtype='float64'), + 'e': [True, False, True], + 'f': pd.date_range('20130101', periods=3), + 'g': pd.date_range('20130101', periods=3, + tz='US/Eastern'), + 'h': pd.date_range('20130101', periods=3, freq='ns')}) + return df + + +def test_invalid_engine(df_compat): + + with pytest.raises(ValueError): + df_compat.to_parquet('foo', 'bar') + + +def test_options_py(df_compat, pa): + # use the set option + + df = df_compat + with tm.ensure_clean() as path: + + with pd.option_context('io.parquet.engine', 'pyarrow'): + df.to_parquet(path) + + result = read_parquet(path, compression=None) + tm.assert_frame_equal(result, df) + + +def test_options_fp(df_compat, fp): + # use the set option + + df = df_compat + with tm.ensure_clean() as path: + + with pd.option_context('io.parquet.engine', 'fastparquet'): + df.to_parquet(path, compression=None) + + result = read_parquet(path, compression=None) + tm.assert_frame_equal(result, df) + + +def test_options_auto(df_compat, fp, pa): + + df = df_compat + with tm.ensure_clean() as path: + + with pd.option_context('io.parquet.engine', 'auto'): + df.to_parquet(path) + + result = read_parquet(path, compression=None) + tm.assert_frame_equal(result, df) + + +def test_options_get_engine(fp, pa): + assert isinstance(get_engine('pyarrow'), PyArrowImpl) + assert isinstance(get_engine('fastparquet'), FastParquetImpl) + + with pd.option_context('io.parquet.engine', 'pyarrow'): + assert isinstance(get_engine('auto'), PyArrowImpl) + assert isinstance(get_engine('pyarrow'), PyArrowImpl) + assert isinstance(get_engine('fastparquet'), FastParquetImpl) + + with pd.option_context('io.parquet.engine', 'fastparquet'): + assert isinstance(get_engine('auto'), FastParquetImpl) + assert isinstance(get_engine('pyarrow'), PyArrowImpl) + assert isinstance(get_engine('fastparquet'), FastParquetImpl) + + with pd.option_context('io.parquet.engine', 'auto'): + assert isinstance(get_engine('auto'), PyArrowImpl) + assert isinstance(get_engine('pyarrow'), PyArrowImpl) + assert isinstance(get_engine('fastparquet'), FastParquetImpl) + + +@pytest.mark.xfail(reason="fp does not ignore pa index __index_level_0__") +def test_cross_engine_pa_fp(df_cross_compat, pa, fp): + # cross-compat with differing reading/writing engines + + df = df_cross_compat + with tm.ensure_clean() as path: + df.to_parquet(path, engine=pa, compression=None) + + result = read_parquet(path, engine=fp, compression=None) + tm.assert_frame_equal(result, df) + + +@pytest.mark.xfail(reason="pyarrow reading fp in some cases") +def test_cross_engine_fp_pa(df_cross_compat, pa, fp): + # cross-compat with differing reading/writing engines + + df = df_cross_compat + with tm.ensure_clean() as path: + df.to_parquet(path, engine=fp, compression=None) + + result = read_parquet(path, engine=pa, compression=None) + tm.assert_frame_equal(result, df) + + +class Base(object): + + def check_error_on_write(self, df, engine, exc): + # check that we are raising the exception + # on writing + + with pytest.raises(exc): + with tm.ensure_clean() as path: + to_parquet(df, path, engine, compression=None) + + def check_round_trip(self, df, engine, expected=None, **kwargs): + + with tm.ensure_clean() as path: + df.to_parquet(path, engine, **kwargs) + result = read_parquet(path, engine) + + if expected is None: + expected = df + tm.assert_frame_equal(result, expected) + + # repeat + to_parquet(df, path, engine, **kwargs) + result = pd.read_parquet(path, engine) + + if expected is None: + expected = df + tm.assert_frame_equal(result, expected) + + +class TestBasic(Base): + + def test_error(self, engine): + + for obj in [pd.Series([1, 2, 3]), 1, 'foo', pd.Timestamp('20130101'), + np.array([1, 2, 3])]: + self.check_error_on_write(obj, engine, ValueError) + + def test_columns_dtypes(self, engine): + + df = pd.DataFrame({'string': list('abc'), + 'int': list(range(1, 4))}) + + # unicode + df.columns = [u'foo', u'bar'] + self.check_round_trip(df, engine, compression=None) + + def test_columns_dtypes_invalid(self, engine): + + df = pd.DataFrame({'string': list('abc'), + 'int': list(range(1, 4))}) + + # numeric + df.columns = [0, 1] + self.check_error_on_write(df, engine, ValueError) + + if PY3: + # bytes on PY3, on PY2 these are str + df.columns = [b'foo', b'bar'] + self.check_error_on_write(df, engine, ValueError) + + # python object + df.columns = [datetime.datetime(2011, 1, 1, 0, 0), + datetime.datetime(2011, 1, 1, 1, 1)] + self.check_error_on_write(df, engine, ValueError) + + def test_write_with_index(self, engine): + + df = pd.DataFrame({'A': [1, 2, 3]}) + self.check_round_trip(df, engine, compression=None) + + # non-default index + for index in [[2, 3, 4], + pd.date_range('20130101', periods=3), + list('abc'), + [1, 3, 4], + pd.MultiIndex.from_tuples([('a', 1), ('a', 2), + ('b', 1)]), + ]: + + df.index = index + self.check_error_on_write(df, engine, ValueError) + + # index with meta-data + df.index = [0, 1, 2] + df.index.name = 'foo' + self.check_error_on_write(df, engine, ValueError) + + # column multi-index + df.index = [0, 1, 2] + df.columns = pd.MultiIndex.from_tuples([('a', 1), ('a', 2), ('b', 1)]), + self.check_error_on_write(df, engine, ValueError) + + @pytest.mark.parametrize('compression', [None, 'gzip', 'snappy', 'brotli']) + def test_compression(self, engine, compression): + + if compression == 'snappy': + pytest.importorskip('snappy') + + elif compression == 'brotli': + pytest.importorskip('brotli') + + df = pd.DataFrame({'A': [1, 2, 3]}) + self.check_round_trip(df, engine, compression=compression) + + +class TestParquetPyArrow(Base): + + def test_basic(self, pa): + + df = pd.DataFrame({'string': list('abc'), + 'string_with_nan': ['a', np.nan, 'c'], + 'string_with_none': ['a', None, 'c'], + 'bytes': [b'foo', b'bar', b'baz'], + 'unicode': [u'foo', u'bar', u'baz'], + 'int': list(range(1, 4)), + 'uint': np.arange(3, 6).astype('u1'), + 'float': np.arange(4.0, 7.0, dtype='float64'), + 'float_with_nan': [2., np.nan, 3.], + 'bool': [True, False, True], + 'bool_with_none': [True, None, True], + 'datetime_ns': pd.date_range('20130101', periods=3), + 'datetime_with_nat': [pd.Timestamp('20130101'), + pd.NaT, + pd.Timestamp('20130103')] + }) + + self.check_round_trip(df, pa) + + def test_duplicate_columns(self, pa): + + # not currently able to handle duplicate columns + df = pd.DataFrame(np.arange(12).reshape(4, 3), + columns=list('aaa')).copy() + self.check_error_on_write(df, pa, ValueError) + + def test_unsupported(self, pa): + + # period + df = pd.DataFrame({'a': pd.period_range('2013', freq='M', periods=3)}) + self.check_error_on_write(df, pa, ValueError) + + # categorical + df = pd.DataFrame({'a': pd.Categorical(list('abc'))}) + self.check_error_on_write(df, pa, NotImplementedError) + + # timedelta + df = pd.DataFrame({'a': pd.timedelta_range('1 day', + periods=3)}) + self.check_error_on_write(df, pa, NotImplementedError) + + # mixed python objects + df = pd.DataFrame({'a': ['a', 1, 2.0]}) + self.check_error_on_write(df, pa, ValueError) + + +class TestParquetFastParquet(Base): + + def test_basic(self, fp): + + df = pd.DataFrame( + {'string': list('abc'), + 'string_with_nan': ['a', np.nan, 'c'], + 'string_with_none': ['a', None, 'c'], + 'bytes': [b'foo', b'bar', b'baz'], + 'unicode': [u'foo', u'bar', u'baz'], + 'int': list(range(1, 4)), + 'uint': np.arange(3, 6).astype('u1'), + 'float': np.arange(4.0, 7.0, dtype='float64'), + 'float_with_nan': [2., np.nan, 3.], + 'bool': [True, False, True], + 'datetime': pd.date_range('20130101', periods=3), + 'datetime_with_nat': [pd.Timestamp('20130101'), + pd.NaT, + pd.Timestamp('20130103')], + 'timedelta': pd.timedelta_range('1 day', periods=3), + }) + + self.check_round_trip(df, fp, compression=None) + + @pytest.mark.skip(reason="not supported") + def test_duplicate_columns(self, fp): + + # not currently able to handle duplicate columns + df = pd.DataFrame(np.arange(12).reshape(4, 3), + columns=list('aaa')).copy() + self.check_error_on_write(df, fp, ValueError) + + def test_bool_with_none(self, fp): + df = pd.DataFrame({'a': [True, None, False]}) + expected = pd.DataFrame({'a': [1.0, np.nan, 0.0]}, dtype='float16') + self.check_round_trip(df, fp, expected=expected, compression=None) + + def test_unsupported(self, fp): + + # period + df = pd.DataFrame({'a': pd.period_range('2013', freq='M', periods=3)}) + self.check_error_on_write(df, fp, ValueError) + + # mixed + df = pd.DataFrame({'a': ['a', 1, 2.0]}) + self.check_error_on_write(df, fp, ValueError) + + def test_categorical(self, fp): + df = pd.DataFrame({'a': pd.Categorical(list('abc'))}) + self.check_round_trip(df, fp, compression=None) + + def test_datetime_tz(self, fp): + # doesn't preserve tz + df = pd.DataFrame({'a': pd.date_range('20130101', periods=3, + tz='US/Eastern')}) + + # warns on the coercion + with catch_warnings(record=True): + self.check_round_trip(df, fp, df.astype('datetime64[ns]'), + compression=None) diff --git a/pandas/util/_print_versions.py b/pandas/util/_print_versions.py index 48b19b02e297e..9ecd4b10365c8 100644 --- a/pandas/util/_print_versions.py +++ b/pandas/util/_print_versions.py @@ -94,6 +94,7 @@ def show_versions(as_json=False): ("psycopg2", lambda mod: mod.__version__), ("jinja2", lambda mod: mod.__version__), ("s3fs", lambda mod: mod.__version__), + ("fastparquet", lambda mod: mod.__version__), ("pandas_gbq", lambda mod: mod.__version__), ("pandas_datareader", lambda mod: mod.__version__), ] From 8e6b09ff3a09de58e82da6dcabbfddba61a743d6 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 2 Aug 2017 07:22:17 -0400 Subject: [PATCH 849/933] DOC: doc typos, xref #15838 --- ci/requirements-3.6_DOC.run | 1 + doc/source/install.rst | 2 +- doc/source/io.rst | 4 ++-- doc/source/whatsnew/v0.21.0.txt | 4 +++- 4 files changed, 7 insertions(+), 4 deletions(-) diff --git a/ci/requirements-3.6_DOC.run b/ci/requirements-3.6_DOC.run index f87760b507357..6c45e3371e9cf 100644 --- a/ci/requirements-3.6_DOC.run +++ b/ci/requirements-3.6_DOC.run @@ -12,6 +12,7 @@ lxml beautifulsoup4 html5lib pytables +python-snappy openpyxl xlrd xlwt diff --git a/doc/source/install.rst b/doc/source/install.rst index 01a01b1b58b4c..99d299b75b59b 100644 --- a/doc/source/install.rst +++ b/doc/source/install.rst @@ -237,7 +237,7 @@ Optional Dependencies * `xarray `__: pandas like handling for > 2 dims, needed for converting Panels to xarray objects. Version 0.7.0 or higher is recommended. * `PyTables `__: necessary for HDF5-based storage. Version 3.0.0 or higher required, Version 3.2.1 or higher highly recommended. * `Feather Format `__: necessary for feather-based storage, version 0.3.1 or higher. -* ``Apache Parquet Format``, either `pyarrow `__ (>= 0.4.1) or `fastparquet `__ (>= 0.0.6) for parquet-based storage. The `snappy `__ and `brotli `__ are available for compression support. +* `Apache Parquet `__, either `pyarrow `__ (>= 0.4.1) or `fastparquet `__ (>= 0.0.6) for parquet-based storage. The `snappy `__ and `brotli `__ are available for compression support. * `SQLAlchemy `__: for SQL database support. Version 0.8.1 or higher recommended. Besides SQLAlchemy, you also need a database specific driver. You can find an overview of supported drivers for each SQL dialect in the `SQLAlchemy docs `__. Some common drivers are: * `psycopg2 `__: for PostgreSQL diff --git a/doc/source/io.rst b/doc/source/io.rst index 0b97264abfcd7..e6b51b7e2f45c 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -4554,7 +4554,7 @@ Parquet .. versionadded:: 0.21.0 -`Parquet `__ provides a partitioned binary columnar serialization for data frames. It is designed to make reading and writing data frames efficient, and to make sharing data across data analysis languages easy. Parquet can use a variety of compression techniques to shrink the file size as much as possible while still maintaining good read performance. @@ -4575,7 +4575,7 @@ You can specifiy an ``engine`` to direct the serialization. This can be one of ` If the engine is NOT specified, then the ``pd.options.io.parquet.engine`` option is checked; if this is also ``auto``, then then ``pyarrow`` is tried, and falling back to ``fastparquet``. -See the documentation for `pyarrow `__ +See the documentation for `pyarrow `__ and `fastparquet `__ .. note:: diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index fad6647d4de8d..72c09e71c98db 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -9,6 +9,8 @@ users upgrade to this version. Highlights include: +- Integration with `Apache Parquet `__, including a new top-level :func:`pd.read_parquet` and :func:`DataFrame.to_parquet` method, see :ref:`here `. + Check the :ref:`API Changes ` and :ref:`deprecations ` before updating. .. contents:: What's new in v0.21.0 @@ -78,7 +80,7 @@ Other Enhancements - :func:`DataFrame.select_dtypes` now accepts scalar values for include/exclude as well as list-like. (:issue:`16855`) - :func:`date_range` now accepts 'YS' in addition to 'AS' as an alias for start of year (:issue:`9313`) - :func:`date_range` now accepts 'Y' in addition to 'A' as an alias for end of year (:issue:`9313`) -- Integration with Apache Parquet, including a new top-level ``pd.read_parquet()`` and ``DataFrame.to_parquet()`` method, see :ref:`here `. +- Integration with `Apache Parquet `__, including a new top-level :func:`pd.read_parquet` and :func:`DataFrame.to_parquet` method, see :ref:`here `. .. _whatsnew_0210.api_breaking: From 3fadc62e75bb09b2f39ddd2169baa182fb2ea720 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 2 Aug 2017 21:02:12 -0400 Subject: [PATCH 850/933] TST: test for categorical index monotonicity (#17152) * correctly determine bottleneck version * tests for categorical index monotonicity * fix Index.is_monotonic to point to Index.is_monotonic_increasing directly --- pandas/core/indexes/base.py | 2 +- pandas/core/indexes/category.py | 9 ++++++++ pandas/core/nanops.py | 3 ++- pandas/tests/indexes/test_category.py | 32 +++++++++++++++++++++++++++ 4 files changed, 44 insertions(+), 2 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 411428e001c81..4aecc75d95971 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1195,7 +1195,7 @@ def _mpl_repr(self): @property def is_monotonic(self): """ alias for is_monotonic_increasing (deprecated) """ - return self._engine.is_monotonic_increasing + return self.is_monotonic_increasing @property def is_monotonic_increasing(self): diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index e8427f847dd2d..ac4698b570d17 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -316,10 +316,19 @@ def _engine(self): # we are going to look things up with the codes themselves return self._engine_type(lambda: self.codes.astype('i8'), len(self)) + # introspection @cache_readonly def is_unique(self): return not self.duplicated().any() + @property + def is_monotonic_increasing(self): + return Index(self.codes).is_monotonic_increasing + + @property + def is_monotonic_decreasing(self): + return Index(self.codes).is_monotonic_decreasing + @Appender(base._shared_docs['unique'] % _index_doc_kwargs) def unique(self): result = base.IndexOpsMixin.unique(self) diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index e2777cb56374e..2f4e437c0ae61 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -28,7 +28,8 @@ try: import bottleneck as bn ver = bn.__version__ - _BOTTLENCK_INSTALLED = ver >= LooseVersion(_MIN_BOTTLENECK_VERSION) + _BOTTLENECK_INSTALLED = (LooseVersion(ver) >= + LooseVersion(_MIN_BOTTLENECK_VERSION)) if not _BOTTLENECK_INSTALLED: warnings.warn( diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py index a3d72fdb88239..64bd6df361aeb 100644 --- a/pandas/tests/indexes/test_category.py +++ b/pandas/tests/indexes/test_category.py @@ -427,6 +427,38 @@ def test_reindex_empty_index(self): tm.assert_numpy_array_equal(indexer, np.array([-1, -1], dtype=np.intp)) + def test_is_monotonic(self): + c = CategoricalIndex([1, 2, 3]) + assert c.is_monotonic_increasing + assert not c.is_monotonic_decreasing + + c = CategoricalIndex([1, 2, 3], ordered=True) + assert c.is_monotonic_increasing + assert not c.is_monotonic_decreasing + + c = CategoricalIndex([1, 2, 3], categories=[3, 2, 1]) + assert not c.is_monotonic_increasing + assert c.is_monotonic_decreasing + + c = CategoricalIndex([1, 3, 2], categories=[3, 2, 1]) + assert not c.is_monotonic_increasing + assert not c.is_monotonic_decreasing + + c = CategoricalIndex([1, 2, 3], categories=[3, 2, 1], ordered=True) + assert not c.is_monotonic_increasing + assert c.is_monotonic_decreasing + + # non lexsorted categories + categories = [9, 0, 1, 2, 3] + + c = CategoricalIndex([9, 0], categories=categories) + assert c.is_monotonic_increasing + assert not c.is_monotonic_decreasing + + c = CategoricalIndex([0, 1], categories=categories) + assert c.is_monotonic_increasing + assert not c.is_monotonic_decreasing + def test_duplicates(self): idx = CategoricalIndex([0, 0, 0], name='foo') From a4c0e72094622ef8b6b4d24c36e532467a00caf9 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 2 Aug 2017 18:03:07 -0700 Subject: [PATCH 851/933] MAINT: Remove non-standard and inconsistently-used imports (#17085) --- pandas/core/frame.py | 35 +++++++++++++++++------------------ pandas/core/generic.py | 7 ++++--- pandas/core/indexing.py | 14 +++++++------- pandas/core/series.py | 8 +++----- 4 files changed, 31 insertions(+), 33 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 9d63bd2e120aa..027a427555253 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -20,7 +20,6 @@ import warnings from textwrap import dedent -from numpy import nan as NA import numpy as np import numpy.ma as ma @@ -436,7 +435,7 @@ def _init_dict(self, data, index, columns, dtype=None): else: v = np.empty(len(index), dtype=dtype) - v.fill(NA) + v.fill(np.nan) else: v = data[k] data_names.append(k) @@ -1437,8 +1436,8 @@ def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None, columns : sequence, optional Columns to write header : boolean or list of string, default True - Write out column names. If a list of string is given it is assumed - to be aliases for the column names + Write out the column names. If a list of strings is given it is + assumed to be aliases for the column names index : boolean, default True Write row names (index) index_label : string or sequence, or False, default None @@ -1622,8 +1621,9 @@ def to_parquet(self, fname, engine='auto', compression='snappy', to_parquet(self, fname, engine, compression=compression, **kwargs) - @Substitution(header='Write out column names. If a list of string is given, \ -it is assumed to be aliases for the column names') + @Substitution(header='Write out the column names. If a list of strings ' + 'is given, it is assumed to be aliases for the ' + 'column names') @Appender(fmt.docstring_to_string, indents=1) def to_string(self, buf=None, columns=None, col_space=None, header=True, index=True, na_rep='NaN', formatters=None, float_format=None, @@ -2805,7 +2805,7 @@ def _reindex_axes(self, axes, level, limit, tolerance, method, fill_value, return frame - def _reindex_index(self, new_index, method, copy, level, fill_value=NA, + def _reindex_index(self, new_index, method, copy, level, fill_value=np.nan, limit=None, tolerance=None): new_index, indexer = self.index.reindex(new_index, method=method, level=level, limit=limit, @@ -2814,8 +2814,8 @@ def _reindex_index(self, new_index, method, copy, level, fill_value=NA, copy=copy, fill_value=fill_value, allow_dups=False) - def _reindex_columns(self, new_columns, method, copy, level, fill_value=NA, - limit=None, tolerance=None): + def _reindex_columns(self, new_columns, method, copy, level, + fill_value=np.nan, limit=None, tolerance=None): new_columns, indexer = self.columns.reindex(new_columns, method=method, level=level, limit=limit, tolerance=tolerance) @@ -3794,7 +3794,7 @@ def _combine_series(self, other, func, fill_value=None, axis=None, def _combine_series_infer(self, other, func, level=None, fill_value=None, try_cast=True): if len(other) == 0: - return self * NA + return self * np.nan if len(self) == 0: # Ambiguous case, use _series so works with DataFrame @@ -3948,7 +3948,7 @@ def combine(self, other, func, fill_value=None, overwrite=True): if do_fill: arr = _ensure_float(arr) - arr[this_mask & other_mask] = NA + arr[this_mask & other_mask] = np.nan # try to downcast back to the original dtype if needs_i8_conversion_i: @@ -4567,7 +4567,7 @@ def _apply_empty_result(self, func, axis, reduce, *args, **kwds): pass if reduce: - return Series(NA, index=self._get_agg_axis(axis)) + return Series(np.nan, index=self._get_agg_axis(axis)) else: return self.copy() @@ -5185,7 +5185,7 @@ def corr(self, method='pearson', min_periods=1): valid = mask[i] & mask[j] if valid.sum() < min_periods: - c = NA + c = np.nan elif i == j: c = 1. elif not valid.all(): @@ -5509,7 +5509,7 @@ def idxmin(self, axis=0, skipna=True): axis = self._get_axis_number(axis) indices = nanops.nanargmin(self.values, axis=axis, skipna=skipna) index = self._get_axis(axis) - result = [index[i] if i >= 0 else NA for i in indices] + result = [index[i] if i >= 0 else np.nan for i in indices] return Series(result, index=self._get_agg_axis(axis)) def idxmax(self, axis=0, skipna=True): @@ -5540,7 +5540,7 @@ def idxmax(self, axis=0, skipna=True): axis = self._get_axis_number(axis) indices = nanops.nanargmax(self.values, axis=axis, skipna=skipna) index = self._get_axis(axis) - result = [index[i] if i >= 0 else NA for i in indices] + result = [index[i] if i >= 0 else np.nan for i in indices] return Series(result, index=self._get_agg_axis(axis)) def _get_agg_axis(self, axis_num): @@ -5778,9 +5778,8 @@ def isin(self, values): 2 True True """ if isinstance(values, dict): - from collections import defaultdict from pandas.core.reshape.concat import concat - values = defaultdict(list, values) + values = collections.defaultdict(list, values) return concat((self.iloc[:, [i]].isin(values[col]) for i, col in enumerate(self.columns)), axis=1) elif isinstance(values, Series): @@ -6143,7 +6142,7 @@ def _homogenize(data, index, dtype=None): v = _dict_compat(v) else: v = dict(v) - v = lib.fast_multiget(v, oindex.values, default=NA) + v = lib.fast_multiget(v, oindex.values, default=np.nan) v = _sanitize_array(v, index, dtype=dtype, copy=False, raise_cast_failure=False) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index ec44dce0da9bc..442ec93d94023 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1207,7 +1207,7 @@ def _repr_latex_(self): columns : sequence, optional Columns to write header : boolean or list of string, default True - Write out column names. If a list of string is given it is + Write out the column names. If a list of strings is given it is assumed to be aliases for the column names index : boolean, default True Write row names (index) @@ -1702,8 +1702,9 @@ def to_xarray(self): .. versionadded:: 0.20.0 """ - @Substitution(header='Write out column names. If a list of string is given, \ -it is assumed to be aliases for the column names.') + @Substitution(header='Write out the column names. If a list of strings ' + 'is given, it is assumed to be aliases for the ' + 'column names.') @Appender(_shared_docs['to_latex'] % _shared_doc_kwargs) def to_latex(self, buf=None, columns=None, col_space=None, header=True, index=True, na_rep='NaN', formatters=None, float_format=None, diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 8f6b00fd204cc..109183827de4e 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1,5 +1,5 @@ # pylint: disable=W0223 - +import textwrap import warnings import numpy as np from pandas.compat import range, zip @@ -1288,13 +1288,13 @@ class _IXIndexer(_NDFrameIndexer): def __init__(self, obj, name): - _ix_deprecation_warning = """ -.ix is deprecated. Please use -.loc for label based indexing or -.iloc for positional indexing + _ix_deprecation_warning = textwrap.dedent(""" + .ix is deprecated. Please use + .loc for label based indexing or + .iloc for positional indexing -See the documentation here: -http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated""" # noqa + See the documentation here: + http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated""") # noqa warnings.warn(_ix_deprecation_warning, DeprecationWarning, stacklevel=3) diff --git a/pandas/core/series.py b/pandas/core/series.py index 60d268c89a9d7..996b483ff6092 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -10,7 +10,6 @@ import warnings from textwrap import dedent -from numpy import nan, ndarray import numpy as np import numpy.ma as ma @@ -210,13 +209,13 @@ def __init__(self, data=None, index=None, dtype=None, name=None, data = np.nan # GH #12169 elif isinstance(index, (PeriodIndex, TimedeltaIndex)): - data = ([data.get(i, nan) for i in index] + data = ([data.get(i, np.nan) for i in index] if data else np.nan) else: data = lib.fast_multiget(data, index.values, default=np.nan) except TypeError: - data = ([data.get(i, nan) for i in index] + data = ([data.get(i, np.nan) for i in index] if data else np.nan) elif isinstance(data, SingleBlockManager): @@ -1686,7 +1685,7 @@ def _binop(self, other, func, level=None, fill_value=None): result.name = None return result - def combine(self, other, func, fill_value=nan): + def combine(self, other, func, fill_value=np.nan): """ Perform elementwise binary operation on two Series using given function with optional fill value when an index is missing from one Series or @@ -2952,7 +2951,6 @@ def _dir_additions(self): Series._add_numeric_operations() Series._add_series_only_operations() Series._add_series_or_dataframe_operations() -_INDEX_TYPES = ndarray, Index, list, tuple # ----------------------------------------------------------------------------- # Supplementary functions From 90913306595c36facde65b6858ff94b6e6d51668 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 2 Aug 2017 21:05:07 -0400 Subject: [PATCH 852/933] DOC: typos in whatsnew --- doc/source/whatsnew/v0.21.0.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 72c09e71c98db..2a2e08c2ccf5d 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -9,7 +9,7 @@ users upgrade to this version. Highlights include: -- Integration with `Apache Parquet `__, including a new top-level :func:`pd.read_parquet` and :func:`DataFrame.to_parquet` method, see :ref:`here `. +- Integration with `Apache Parquet `__, including a new top-level :func:`read_parquet` and :func:`DataFrame.to_parquet` method, see :ref:`here `. Check the :ref:`API Changes ` and :ref:`deprecations ` before updating. @@ -80,7 +80,7 @@ Other Enhancements - :func:`DataFrame.select_dtypes` now accepts scalar values for include/exclude as well as list-like. (:issue:`16855`) - :func:`date_range` now accepts 'YS' in addition to 'AS' as an alias for start of year (:issue:`9313`) - :func:`date_range` now accepts 'Y' in addition to 'A' as an alias for end of year (:issue:`9313`) -- Integration with `Apache Parquet `__, including a new top-level :func:`pd.read_parquet` and :func:`DataFrame.to_parquet` method, see :ref:`here `. +- Integration with `Apache Parquet `__, including a new top-level :func:`read_parquet` and :func:`DataFrame.to_parquet` method, see :ref:`here `. .. _whatsnew_0210.api_breaking: From 55ae03986dab53f39c1df2b8a5e3532f89ad22be Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Thu, 3 Aug 2017 08:07:18 -0400 Subject: [PATCH 853/933] DOC: whatsnew 0.21.0 fixes --- doc/source/api.rst | 8 ++++++++ doc/source/whatsnew/v0.21.0.txt | 4 ++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/doc/source/api.rst b/doc/source/api.rst index 1a4ee68ef52c4..12e6c7ad7f630 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -109,6 +109,14 @@ Feather read_feather +Parquet +~~~~~~~ + +.. autosummary:: + :toctree: generated/ + + read_parquet + SAS ~~~ diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 2a2e08c2ccf5d..f66e94ce6fa86 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -219,7 +219,7 @@ Other API Changes - ``Index.get_indexer_non_unique()`` now returns a ndarray indexer rather than an ``Index``; this is consistent with ``Index.get_indexer()`` (:issue:`16819`) - Removed the ``@slow`` decorator from ``pandas.util.testing``, which caused issues for some downstream packages' test suites. Use ``@pytest.mark.slow`` instead, which achieves the same thing (:issue:`16850`) - Moved definition of ``MergeError`` to the ``pandas.errors`` module. -- The signature of :func:`Series.set_axis` and :func:`DataFrame.set_axis` has been changed from ``set_axis(axis, labels)`` to ``set_axis(labels, axis=0)``, for consistency with the rest of the API. The old signature is still supported and causes a ``FutureWarning`` to be emitted (:issue:`14636`) +- The signature of :func:`Series.set_axis` and :func:`DataFrame.set_axis` has been changed from ``set_axis(axis, labels)`` to ``set_axis(labels, axis=0)``, for consistency with the rest of the API. The old signature is deprecated and will show a ``FutureWarning`` (:issue:`14636`) .. _whatsnew_0210.deprecations: @@ -317,7 +317,7 @@ Reshaping - Bug when using :func:`isin` on a large object series and large comparison array (:issue:`16012`) - Fixes regression from 0.20, :func:`Series.aggregate` and :func:`DataFrame.aggregate` allow dictionaries as return values again (:issue:`16741`) - Fixes dtype of result with integer dtype input, from :func:`pivot_table` when called with ``margins=True`` (:issue:`17013`) -- Bug in ``pd.crosstab()`` where passing two ``Series`` with the same name raised a ``KeyError`` (:issue:`13279`) +- Bug in :func:`crosstab` where passing two ``Series`` with the same name raised a ``KeyError`` (:issue:`13279`) Numeric ^^^^^^^ From 0c4bc059d7a99b66f0f5251d699a753c9fe81ced Mon Sep 17 00:00:00 2001 From: Carter Green Date: Thu, 3 Aug 2017 11:37:26 -0500 Subject: [PATCH 854/933] BUG: Fix CSV parsing of singleton list header (#17090) Closes gh-7757. --- doc/source/whatsnew/v0.21.0.txt | 1 + pandas/_libs/parsers.pyx | 21 ++++++++++++--------- pandas/io/parsers.py | 7 ++++--- pandas/tests/io/parser/header.py | 7 +++++++ 4 files changed, 24 insertions(+), 12 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index f66e94ce6fa86..dc7e6661edaae 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -286,6 +286,7 @@ I/O - Bug in :func:`read_csv` in which non integer values for the header argument generated an unhelpful / unrelated error message (:issue:`16338`) - Bug in :func:`read_csv` in which memory management issues in exception handling, under certain conditions, would cause the interpreter to segfault (:issue:`14696`, :issue:`16798`). - Bug in :func:`read_csv` when called with ``low_memory=False`` in which a CSV with at least one column > 2GB in size would incorrectly raise a ``MemoryError`` (:issue:`16798`). +- Bug in :func:`read_csv` when called with a single-element list ``header`` would return a ``DataFrame`` of all NaN values (:issue:`7757`) - Bug in :func:`read_stata` where value labels could not be read when using an iterator (:issue:`16923`) - Bug in :func:`read_html` where import check fails when run in multiple threads (:issue:`16928`) diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index 077c355e785a3..ae420da2102b2 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -535,23 +535,26 @@ cdef class TextReader: self.parser_start = 0 self.header = [] else: - if isinstance(header, list) and len(header): - # need to artifically skip the final line - # which is still a header line - header = list(header) - header.append(header[-1] + 1) + if isinstance(header, list): + if len(header) > 1: + # need to artifically skip the final line + # which is still a header line + header = list(header) + header.append(header[-1] + 1) + self.parser.header_end = header[-1] + self.has_mi_columns = 1 + else: + self.parser.header_end = header[0] + self.parser_start = header[-1] + 1 self.parser.header_start = header[0] - self.parser.header_end = header[-1] self.parser.header = header[0] - self.parser_start = header[-1] + 1 - self.has_mi_columns = 1 self.header = header else: self.parser.header_start = header self.parser.header_end = header - self.parser.header = header self.parser_start = header + 1 + self.parser.header = header self.header = [ header ] self.names = names diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 41b0cdd6dd250..9c76d3126890c 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -2283,10 +2283,11 @@ def _infer_columns(self): if self.header is not None: header = self.header - # we have a mi columns, so read an extra line if isinstance(header, (list, tuple, np.ndarray)): - have_mi_columns = True - header = list(header) + [header[-1] + 1] + have_mi_columns = len(header) > 1 + # we have a mi columns, so read an extra line + if have_mi_columns: + header = list(header) + [header[-1] + 1] else: have_mi_columns = False header = [header] diff --git a/pandas/tests/io/parser/header.py b/pandas/tests/io/parser/header.py index 4935fd2cd910a..50ae4dae541ac 100644 --- a/pandas/tests/io/parser/header.py +++ b/pandas/tests/io/parser/header.py @@ -286,3 +286,10 @@ def test_non_int_header(self): self.read_csv(StringIO(data), sep=',', header=['a', 'b']) with tm.assert_raises_regex(ValueError, msg): self.read_csv(StringIO(data), sep=',', header='string_header') + + def test_singleton_header(self): + # See GH #7757 + data = """a,b,c\n0,1,2\n1,2,3""" + df = self.read_csv(StringIO(data), header=[0]) + expected = DataFrame({"a": [0, 1], "b": [1, 2], "c": [2, 3]}) + tm.assert_frame_equal(df, expected) From 9b07ef4a5b656a1532512c270533053ee338e30d Mon Sep 17 00:00:00 2001 From: jschendel Date: Thu, 3 Aug 2017 15:06:05 -0600 Subject: [PATCH 855/933] ENH: Support strings containing '%' in add_prefix/add_suffix (#17151) (#17162) --- doc/source/whatsnew/v0.21.0.txt | 1 + pandas/core/internals.py | 5 +++-- pandas/tests/frame/test_api.py | 8 ++++++++ 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index dc7e6661edaae..2f61b71d06019 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -81,6 +81,7 @@ Other Enhancements - :func:`date_range` now accepts 'YS' in addition to 'AS' as an alias for start of year (:issue:`9313`) - :func:`date_range` now accepts 'Y' in addition to 'A' as an alias for end of year (:issue:`9313`) - Integration with `Apache Parquet `__, including a new top-level :func:`read_parquet` and :func:`DataFrame.to_parquet` method, see :ref:`here `. +- :func:`DataFrame.add_prefix` and :func:`DataFrame.add_suffix` now accept strings containing the '%' character. (:issue:`17151`) .. _whatsnew_0210.api_breaking: diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 25c367fcbd968..37fc1c01061ec 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -5,6 +5,7 @@ import operator from datetime import datetime, timedelta, date from collections import defaultdict +from functools import partial import numpy as np @@ -2959,11 +2960,11 @@ def rename_axis(self, mapper, axis, copy=True, level=None): return obj def add_prefix(self, prefix): - f = (str(prefix) + '%s').__mod__ + f = partial('{prefix}{}'.format, prefix=prefix) return self.rename_axis(f, axis=0) def add_suffix(self, suffix): - f = ('%s' + str(suffix)).__mod__ + f = partial('{}{suffix}'.format, suffix=suffix) return self.rename_axis(f, axis=0) @property diff --git a/pandas/tests/frame/test_api.py b/pandas/tests/frame/test_api.py index f63918c97c614..8c4c13b66ffa9 100644 --- a/pandas/tests/frame/test_api.py +++ b/pandas/tests/frame/test_api.py @@ -68,6 +68,14 @@ def test_add_prefix_suffix(self): expected = pd.Index(['%s#foo' % c for c in self.frame.columns]) tm.assert_index_equal(with_suffix.columns, expected) + with_pct_prefix = self.frame.add_prefix('%') + expected = pd.Index(['%{}'.format(c) for c in self.frame.columns]) + tm.assert_index_equal(with_pct_prefix.columns, expected) + + with_pct_suffix = self.frame.add_suffix('%') + expected = pd.Index(['{}%'.format(c) for c in self.frame.columns]) + tm.assert_index_equal(with_pct_suffix.columns, expected) + class TestDataFrameMisc(SharedWithSparse, TestData): From 929c66fd74da221078a67ea7fd3dbcbe21d642e0 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 4 Aug 2017 09:44:53 +0200 Subject: [PATCH 856/933] REF: repr - allow block to override values that get formatted (#17143) --- pandas/core/internals.py | 8 +++++ pandas/core/series.py | 6 ++++ pandas/io/formats/format.py | 6 ++-- pandas/tests/internals/__init__.py | 0 pandas/tests/internals/test_external_block.py | 29 +++++++++++++++++++ .../tests/{ => internals}/test_internals.py | 0 setup.py | 1 + 7 files changed, 48 insertions(+), 2 deletions(-) create mode 100644 pandas/tests/internals/__init__.py create mode 100644 pandas/tests/internals/test_external_block.py rename pandas/tests/{ => internals}/test_internals.py (100%) diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 37fc1c01061ec..0f85c4e046e5a 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -160,6 +160,10 @@ def internal_values(self, dtype=None): """ return self.values + def formatting_values(self): + """Return the internal values used by the DataFrame/SeriesFormatter""" + return self.internal_values() + def get_values(self, dtype=None): """ return an internal format, currently just the ndarray @@ -4317,6 +4321,10 @@ def external_values(self): def internal_values(self): return self._block.internal_values() + def formatting_values(self): + """Return the internal values used by the DataFrame/SeriesFormatter""" + return self._block.formatting_values() + def get_values(self): """ return a dense type view """ return np.array(self._block.to_dense(), copy=False) diff --git a/pandas/core/series.py b/pandas/core/series.py index 996b483ff6092..e42ba3908a29a 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -397,6 +397,12 @@ def _values(self): """ return the internal repr of this data """ return self._data.internal_values() + def _formatting_values(self): + """Return the values that can be formatted (used by SeriesFormatter + and DataFrameFormatter) + """ + return self._data.formatting_values() + def get_values(self): """ same as values (but handles sparseness conversions); is a view """ return self._data.get_values() diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 2b322431bd301..733fd3bd39b52 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -237,7 +237,8 @@ def _get_formatted_index(self): return fmt_index, have_header def _get_formatted_values(self): - return format_array(self.tr_series._values, None, + values_to_format = self.tr_series._formatting_values() + return format_array(values_to_format, None, float_format=self.float_format, na_rep=self.na_rep) def to_string(self): @@ -694,7 +695,8 @@ def to_latex(self, column_format=None, longtable=False, encoding=None, def _format_col(self, i): frame = self.tr_frame formatter = self._get_formatter(i) - return format_array(frame.iloc[:, i]._values, formatter, + values_to_format = frame.iloc[:, i]._formatting_values() + return format_array(values_to_format, formatter, float_format=self.float_format, na_rep=self.na_rep, space=self.col_space, decimal=self.decimal) diff --git a/pandas/tests/internals/__init__.py b/pandas/tests/internals/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/tests/internals/test_external_block.py b/pandas/tests/internals/test_external_block.py new file mode 100644 index 0000000000000..cccde76c3e1d9 --- /dev/null +++ b/pandas/tests/internals/test_external_block.py @@ -0,0 +1,29 @@ +# -*- coding: utf-8 -*- +# pylint: disable=W0102 + +import numpy as np + +import pandas as pd +from pandas.core.internals import Block, BlockManager, SingleBlockManager + + +class CustomBlock(Block): + + def formatting_values(self): + return np.array(["Val: {}".format(i) for i in self.values]) + + +def test_custom_repr(): + values = np.arange(3, dtype='int64') + + # series + block = CustomBlock(values, placement=slice(0, 3)) + + s = pd.Series(SingleBlockManager(block, pd.RangeIndex(3))) + assert repr(s) == '0 Val: 0\n1 Val: 1\n2 Val: 2\ndtype: int64' + + # dataframe + block = CustomBlock(values.reshape(1, -1), placement=slice(0, 1)) + blk_mgr = BlockManager([block], [['col'], range(3)]) + df = pd.DataFrame(blk_mgr) + assert repr(df) == ' col\n0 Val: 0\n1 Val: 1\n2 Val: 2' diff --git a/pandas/tests/test_internals.py b/pandas/tests/internals/test_internals.py similarity index 100% rename from pandas/tests/test_internals.py rename to pandas/tests/internals/test_internals.py diff --git a/setup.py b/setup.py index d5791862cfb19..a912b25328954 100755 --- a/setup.py +++ b/setup.py @@ -670,6 +670,7 @@ def pxd(name): 'pandas.tests.indexes.datetimes', 'pandas.tests.indexes.timedeltas', 'pandas.tests.indexes.period', + 'pandas.tests.internals', 'pandas.tests.io', 'pandas.tests.io.json', 'pandas.tests.io.parser', From dd776a9f88734ea0769ffdec83423b6ab0c7a59a Mon Sep 17 00:00:00 2001 From: gfyoung Date: Sun, 6 Aug 2017 22:49:26 -0700 Subject: [PATCH 857/933] MAINT: Drop unnecessary newlines in issue template --- .github/ISSUE_TEMPLATE.md | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 6ab03c9907475..237e61487d13a 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -8,11 +8,9 @@ [this should explain **why** the current behaviour is a problem and why the expected output is a better solution.] -**Note**: We receive a lot of issues on our GitHub tracker, so it is very possible that your issue has been posted before. -Please check first before submitting so that we do not have to handle and close duplicates! +**Note**: We receive a lot of issues on our GitHub tracker, so it is very possible that your issue has been posted before. Please check first before submitting so that we do not have to handle and close duplicates! -**Note**: Many problems can be resolved by simply upgrading `pandas` to the latest version. Before submitting, please check -if that solution works for you. If possible, you may want to check if `master` addresses this issue, but that is not necessary. +**Note**: Many problems can be resolved by simply upgrading `pandas` to the latest version. Before submitting, please check if that solution works for you. If possible, you may want to check if `master` addresses this issue, but that is not necessary. #### Expected Output From cdabac1df8220b12d57db021b4b06d391459bed0 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 7 Aug 2017 06:44:00 -0400 Subject: [PATCH 858/933] remove direct import of nan Author: Brock Mendel Closes #17185 from jbrockmendel/dont_import_nan and squashes the following commits: ee260b86a [Brock Mendel] remove direct import of nan --- pandas/core/sparse/frame.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/core/sparse/frame.py b/pandas/core/sparse/frame.py index d8c0aa41edac1..f30bd5c36a61b 100644 --- a/pandas/core/sparse/frame.py +++ b/pandas/core/sparse/frame.py @@ -5,7 +5,6 @@ from __future__ import division # pylint: disable=E1101,E1103,W0231,E0202 -from numpy import nan from pandas.compat import lmap from pandas import compat import numpy as np @@ -156,7 +155,7 @@ def _init_dict(self, data, index, columns, dtype=None): v = v.copy() else: if isinstance(v, dict): - v = [v.get(i, nan) for i in index] + v = [v.get(i, np.nan) for i in index] v = sp_maker(v) sdict[k] = v From 3ab7d5c0a9fe8c90a5ce2dc52cb77d219e1112da Mon Sep 17 00:00:00 2001 From: Jean Helie Date: Mon, 7 Aug 2017 11:46:06 +0100 Subject: [PATCH 859/933] use == to test String equality (#17171) --- pandas/io/parquet.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py index 0a4426b55b323..09603fd6fdcce 100644 --- a/pandas/io/parquet.py +++ b/pandas/io/parquet.py @@ -10,10 +10,10 @@ def get_engine(engine): """ return our implementation """ - if engine is 'auto': + if engine == 'auto': engine = get_option('io.parquet.engine') - if engine is 'auto': + if engine == 'auto': # try engines in this order try: return PyArrowImpl() From 7cc0fac9a77547d2017e70807858ba0c5be5c4ff Mon Sep 17 00:00:00 2001 From: Dillon Niederhut Date: Mon, 7 Aug 2017 06:05:58 -0500 Subject: [PATCH 860/933] ENH: Add warning when setting into nonexistent attribute (#16951) closes #7175 closes #5904 --- doc/source/indexing.rst | 35 +++++++++++++++++++--- doc/source/whatsnew/v0.21.0.txt | 46 ++++++++++++++++++++++++++++- pandas/core/generic.py | 12 +++++++- pandas/tests/dtypes/test_generic.py | 38 ++++++++++++++++++++++++ pandas/tests/io/test_pytables.py | 4 +-- 5 files changed, 127 insertions(+), 8 deletions(-) diff --git a/doc/source/indexing.rst b/doc/source/indexing.rst index 1659d57b33b84..53a259ad6eb15 100644 --- a/doc/source/indexing.rst +++ b/doc/source/indexing.rst @@ -227,10 +227,6 @@ as an attribute: dfa.A panel.one -You can use attribute access to modify an existing element of a Series or column of a DataFrame, but be careful; -if you try to use attribute access to create a new column, it fails silently, creating a new attribute rather than a -new column. - .. ipython:: python sa.a = 5 @@ -267,6 +263,37 @@ You can also assign a ``dict`` to a row of a ``DataFrame``: x.iloc[1] = dict(x=9, y=99) x +You can use attribute access to modify an existing element of a Series or column of a DataFrame, but be careful; +if you try to use attribute access to create a new column, it creates a new attribute rather than a +new column. In 0.21.0 and later, this will raise a ``UserWarning``: + +.. code-block:: ipython + + In[1]: df = pd.DataFrame({'one': [1., 2., 3.]}) + In[2]: df.two = [4, 5, 6] + UserWarning: Pandas doesn't allow Series to be assigned into nonexistent columns - see https://pandas.pydata.org/pandas-docs/stable/indexing.html#attribute_access + In[3]: df + Out[3]: + one + 0 1.0 + 1 2.0 + 2 3.0 + +Similarly, it is possible to create a column with a name which collides with one of Pandas's +built-in methods or attributes, which can cause confusion later when attempting to access +that column as an attribute. This behavior now warns: + +.. code-block:: ipython + + In[4]: df['sum'] = [5., 7., 9.] + UserWarning: Column name 'sum' collides with a built-in method, which will cause unexpected attribute behavior + In[5]: df.sum + Out[5]: + + Slicing ranges -------------- diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 2f61b71d06019..d9439e0d785f6 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -29,7 +29,6 @@ New features - Added ``skipna`` parameter to :func:`~pandas.api.types.infer_dtype` to support type inference in the presence of missing values (:issue:`17059`). - .. _whatsnew_0210.enhancements.infer_objects: ``infer_objects`` type conversion @@ -62,6 +61,51 @@ using the :func:`to_numeric` function (or :func:`to_datetime`, :func:`to_timedel df['C'] = pd.to_numeric(df['C'], errors='coerce') df.dtypes +.. _whatsnew_0210.enhancements.attribute_access: + +Improved warnings when attempting to create columns +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +New users are often flummoxed by the relationship between column operations and attribute +access on ``DataFrame`` instances (:issue:`5904` & :issue:`7175`). Two specific instances +of this confusion include attempting to create a new column by setting into an attribute: + +.. code-block:: ipython + + In[1]: df = pd.DataFrame({'one': [1., 2., 3.]}) + In[2]: df.two = [4, 5, 6] + +This does not raise any obvious exceptions, but also does not create a new column: + +.. code-block:: ipython + + In[3]: df + Out[3]: + one + 0 1.0 + 1 2.0 + 2 3.0 + +The second source of confusion is creating a column whose name collides with a method or +attribute already in the instance namespace: + +.. code-block:: ipython + + In[4]: df['sum'] = [5., 7., 9.] + +This does not permit that column to be accessed as an attribute: + +.. code-block:: ipython + + In[5]: df.sum + Out[5]: + + +Both of these now raise a ``UserWarning`` about the potential for unexpected behavior. See :ref:`Attribute Access `. + .. _whatsnew_0210.enhancements.other: Other Enhancements diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 442ec93d94023..2d52eed81d22b 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -27,7 +27,7 @@ pandas_dtype) from pandas.core.dtypes.cast import maybe_promote, maybe_upcast_putmask from pandas.core.dtypes.missing import isna, notna -from pandas.core.dtypes.generic import ABCSeries, ABCPanel +from pandas.core.dtypes.generic import ABCSeries, ABCPanel, ABCDataFrame from pandas.core.common import (_values_from_object, _maybe_box_datetimelike, @@ -1907,6 +1907,10 @@ def _slice(self, slobj, axis=0, kind=None): return result def _set_item(self, key, value): + if isinstance(key, str) and callable(getattr(self, key, None)): + warnings.warn("Column name '{key}' collides with a built-in " + "method, which will cause unexpected attribute " + "behavior".format(key=key), stacklevel=3) self._data.set(key, value) self._clear_item_cache() @@ -3357,6 +3361,12 @@ def __setattr__(self, name, value): else: object.__setattr__(self, name, value) except (AttributeError, TypeError): + if isinstance(self, ABCDataFrame) and (is_list_like(value)): + warnings.warn("Pandas doesn't allow Series to be assigned " + "into nonexistent columns - see " + "https://pandas.pydata.org/pandas-docs/" + "stable/indexing.html#attribute-access", + stacklevel=2) object.__setattr__(self, name, value) # ---------------------------------------------------------------------- diff --git a/pandas/tests/dtypes/test_generic.py b/pandas/tests/dtypes/test_generic.py index 653d7d3082c08..ec850cc34e23b 100644 --- a/pandas/tests/dtypes/test_generic.py +++ b/pandas/tests/dtypes/test_generic.py @@ -4,6 +4,7 @@ import numpy as np import pandas as pd from pandas.core.dtypes import generic as gt +from pandas.util import testing as tm class TestABCClasses(object): @@ -38,3 +39,40 @@ def test_abc_types(self): assert isinstance(self.sparse_array, gt.ABCSparseArray) assert isinstance(self.categorical, gt.ABCCategorical) assert isinstance(pd.Period('2012', freq='A-DEC'), gt.ABCPeriod) + + +def test_setattr_warnings(): + # GH5904 - Suggestion: Warning for DataFrame colname-methodname clash + # GH7175 - GOTCHA: You can't use dot notation to add a column... + d = {'one': pd.Series([1., 2., 3.], index=['a', 'b', 'c']), + 'two': pd.Series([1., 2., 3., 4.], index=['a', 'b', 'c', 'd'])} + df = pd.DataFrame(d) + + with catch_warnings(record=True) as w: + # successfully add new column + # this should not raise a warning + df['three'] = df.two + 1 + assert len(w) == 0 + assert df.three.sum() > df.two.sum() + + with catch_warnings(record=True) as w: + # successfully modify column in place + # this should not raise a warning + df.one += 1 + assert len(w) == 0 + assert df.one.iloc[0] == 2 + + with catch_warnings(record=True) as w: + # successfully add an attribute to a series + # this should not raise a warning + df.two.not_an_index = [1, 2] + assert len(w) == 0 + + with tm.assert_produces_warning(UserWarning): + # warn when setting column to nonexistent name + df.four = df.two + 2 + assert df.four.sum() > df.two.sum() + + with tm.assert_produces_warning(UserWarning): + # warn when column has same name as method + df['sum'] = df.two diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index fc17b5f85b68c..f33ba7627101e 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -2011,7 +2011,7 @@ def check(obj, comparator): df['string'] = 'foo' df['float322'] = 1. df['float322'] = df['float322'].astype('float32') - df['bool'] = df['float322'] > 0 + df['boolean'] = df['float322'] > 0 df['time1'] = Timestamp('20130101') df['time2'] = Timestamp('20130102') check(df, tm.assert_frame_equal) @@ -2141,7 +2141,7 @@ def test_table_values_dtypes_roundtrip(self): df1['string'] = 'foo' df1['float322'] = 1. df1['float322'] = df1['float322'].astype('float32') - df1['bool'] = df1['float32'] > 0 + df1['boolean'] = df1['float32'] > 0 df1['time1'] = Timestamp('20130101') df1['time2'] = Timestamp('20130102') From cda091f85a31fd67b2b3957e77718373e92ff883 Mon Sep 17 00:00:00 2001 From: Nathan Ford Date: Mon, 7 Aug 2017 07:56:53 -0500 Subject: [PATCH 861/933] DOC: added string processing comparison with SAS (#16497) --- doc/source/comparison_with_sas.rst | 140 +++++++++++++++++++++++++++++ 1 file changed, 140 insertions(+) diff --git a/doc/source/comparison_with_sas.rst b/doc/source/comparison_with_sas.rst index 33a347de0bf5b..1f2424d8a22f3 100644 --- a/doc/source/comparison_with_sas.rst +++ b/doc/source/comparison_with_sas.rst @@ -357,6 +357,146 @@ takes a list of columns to sort by. tips = tips.sort_values(['sex', 'total_bill']) tips.head() + +String Processing +----------------- + +Length +~~~~~~ + +SAS determines the length of a character string with the +`LENGTHN `__ +and `LENGTHC `__ +functions. ``LENGTHN`` excludes trailing blanks and ``LENGTHC`` includes trailing blanks. + +.. code-block:: none + + data _null_; + set tips; + put(LENGTHN(time)); + put(LENGTHC(time)); + run; + +Python determines the length of a character string with the ``len`` function. +``len`` includes trailing blanks. Use ``len`` and ``rstrip`` to exclude +trailing blanks. + +.. ipython:: python + + tips['time'].str.len().head() + tips['time'].str.rstrip().str.len().head() + + +Find +~~~~ + +SAS determines the position of a character in a string with the +`FINDW `__ function. +``FINDW`` takes the string defined by the first argument and searches for the first position of the substring +you supply as the second argument. + +.. code-block:: none + + data _null_; + set tips; + put(FINDW(sex,'ale')); + run; + +Python determines the position of a character in a string with the +``find`` function. ``find`` searches for the first position of the +substring. If the substring is found, the function returns its +position. Keep in mind that Python indexes are zero-based and +the function will return -1 if it fails to find the substring. + +.. ipython:: python + + tips['sex'].str.find("ale").head() + + +Substring +~~~~~~~~~ + +SAS extracts a substring from a string based on its position with the +`SUBSTR `__ function. + +.. code-block:: none + + data _null_; + set tips; + put(substr(sex,1,1)); + run; + +With pandas you can use ``[]`` notation to extract a substring +from a string by position locations. Keep in mind that Python +indexes are zero-based. + +.. ipython:: python + + tips['sex'].str[0:1].head() + + +Scan +~~~~ + +The SAS `SCAN `__ +function returns the nth word from a string. The first argument is the string you want to parse and the +second argument specifies which word you want to extract. + +.. code-block:: none + + data firstlast; + input String $60.; + First_Name = scan(string, 1); + Last_Name = scan(string, -1); + datalines2; + John Smith; + Jane Cook; + ;;; + run; + +Python extracts a substring from a string based on its text +by using regular expressions. There are much more powerful +approaches, but this just shows a simple approach. + +.. ipython:: python + + firstlast = pd.DataFrame({'String': ['John Smith', 'Jane Cook']}) + firstlast['First_Name'] = firstlast['String'].str.split(" ", expand=True)[0] + firstlast['Last_Name'] = firstlast['String'].str.rsplit(" ", expand=True)[0] + firstlast + + +Upcase, Lowcase, and Propcase +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The SAS `UPCASE `__ +`LOWCASE `__ and +`PROPCASE `__ +functions change the case of the argument. + +.. code-block:: none + + data firstlast; + input String $60.; + string_up = UPCASE(string); + string_low = LOWCASE(string); + string_prop = PROPCASE(string); + datalines2; + John Smith; + Jane Cook; + ;;; + run; + +The equivalent Python functions are ``upper``, ``lower``, and ``title``. + +.. ipython:: python + + firstlast = pd.DataFrame({'String': ['John Smith', 'Jane Cook']}) + firstlast['string_up'] = firstlast['String'].str.upper() + firstlast['string_low'] = firstlast['String'].str.lower() + firstlast['string_prop'] = firstlast['String'].str.title() + firstlast + Merging ------- From 65e04510b1576a0bfddd307f19154a44f0fb58d8 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 7 Aug 2017 06:10:36 -0700 Subject: [PATCH 862/933] CLN: remove unused get methods in internals (#17169) * Remove unused get methods that would raise AttributeError if called * Remove unnecessary import --- pandas/core/internals.py | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 0f85c4e046e5a..b9fca1dfbb7a1 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -327,10 +327,6 @@ def reindex_axis(self, indexer, method=None, axis=1, fill_value=None, fill_value=fill_value, mask_info=mask_info) return self.make_block(new_values, fastpath=True) - def get(self, item): - loc = self.items.get_loc(item) - return self.values[loc] - def iget(self, i): return self.values[i] @@ -1662,13 +1658,6 @@ def set(self, locs, values, check=False): assert locs.tolist() == [0] self.values = values - def get(self, item): - if self.ndim == 1: - loc = self.items.get_loc(item) - return self.values[loc] - else: - return self.values - def putmask(self, mask, new, align=True, inplace=False, axis=0, transpose=False, mgr=None): """ @@ -4730,8 +4719,6 @@ def _concat_indexes(indexes): def _block2d_to_blocknd(values, placement, shape, labels, ref_items): """ pivot to the labels shape """ - from pandas.core.internals import make_block - panel_shape = (len(placement),) + shape # TODO: lexsort depth needs to be 2!! From 62f464ff73c4ce3137c896b34613659cdf331075 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Mon, 7 Aug 2017 11:04:07 -0700 Subject: [PATCH 863/933] TST: Partial Boolean DataFrame Indexing (#17186) Closes gh-17170 --- pandas/tests/indexing/test_indexing.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index 3ecd1f3029cad..f1f51f26df55c 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -861,6 +861,20 @@ def test_maybe_numeric_slice(self): expected = [1] assert result == expected + def test_partial_boolean_frame_indexing(self): + # GH 17170 + df = pd.DataFrame(np.arange(9.).reshape(3, 3), + index=list('abc'), + columns=list('ABC')) + index_df = pd.DataFrame(1, index=list('ab'), columns=list('AB')) + result = df[index_df.notnull()] + expected = pd.DataFrame(np.array([[0., 1., np.nan], + [3., 4., np.nan], + [np.nan] * 3]), + index=list('abc'), + columns=list('ABC')) + tm.assert_frame_equal(result, expected) + class TestSeriesNoneCoercion(object): EXPECTED_RESULTS = [ From 3c833db29b6f5977c78d1ade791a09a5b29cedb8 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Mon, 7 Aug 2017 15:18:33 -0700 Subject: [PATCH 864/933] CLN: Reformat docstring for IPython fixture --- pandas/conftest.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/conftest.py b/pandas/conftest.py index 763a41ee2e2aa..90e5ac864e96f 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -56,9 +56,12 @@ def spmatrix(request): @pytest.fixture def ip(): - """An instance of IPython.InteractiveShell. + """ + Get an instance of IPython.InteractiveShell. + Will raise a skip if IPython is not installed. """ + pytest.importorskip('IPython', minversion="6.0.0") from IPython.core.interactiveshell import InteractiveShell return InteractiveShell() From e5aad1a2e31ede967c09c2c19236bed701e3c97a Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 8 Aug 2017 16:28:42 -0700 Subject: [PATCH 865/933] Define Series.plot and Series.hist in class definition (#17199) --- pandas/core/series.py | 27 +++++++++++++-------------- pandas/plotting/_converter.py | 4 ++-- pandas/plotting/_core.py | 10 +++++++--- pandas/plotting/_tools.py | 7 +++---- 4 files changed, 25 insertions(+), 23 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index e42ba3908a29a..61508c11cae4b 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -76,6 +76,8 @@ from pandas._libs import index as libindex, tslib as libts, lib, iNaT from pandas.core.config import get_option +import pandas.plotting._core as gfx + __all__ = ['Series'] _shared_doc_kwargs = dict( @@ -2952,12 +2954,23 @@ def _dir_additions(self): pass return rv + # ---------------------------------------------------------------------- + # Add plotting methods to Series + plot = base.AccessorProperty(gfx.SeriesPlotMethods, + gfx.SeriesPlotMethods) + hist = gfx.hist_series + Series._setup_axes(['index'], info_axis=0, stat_axis=0, aliases={'rows': 0}) Series._add_numeric_operations() Series._add_series_only_operations() Series._add_series_or_dataframe_operations() +# Add arithmetic! +ops.add_flex_arithmetic_methods(Series, **ops.series_flex_funcs) +ops.add_special_arithmetic_methods(Series, **ops.series_special_funcs) + + # ----------------------------------------------------------------------------- # Supplementary functions @@ -3129,17 +3142,3 @@ def create_from_value(value, index, dtype): subarr = np.array(data, dtype=object, copy=copy) return subarr - - -# ---------------------------------------------------------------------- -# Add plotting methods to Series - -import pandas.plotting._core as _gfx # noqa - -Series.plot = base.AccessorProperty(_gfx.SeriesPlotMethods, - _gfx.SeriesPlotMethods) -Series.hist = _gfx.hist_series - -# Add arithmetic! -ops.add_flex_arithmetic_methods(Series, **ops.series_flex_funcs) -ops.add_special_arithmetic_methods(Series, **ops.series_special_funcs) diff --git a/pandas/plotting/_converter.py b/pandas/plotting/_converter.py index 97295dfa7baf1..47d15195315ba 100644 --- a/pandas/plotting/_converter.py +++ b/pandas/plotting/_converter.py @@ -18,6 +18,7 @@ is_period_arraylike, is_nested_list_like ) +from pandas.core.dtypes.generic import ABCSeries from pandas.compat import lrange import pandas.compat as compat @@ -25,7 +26,6 @@ import pandas.core.common as com from pandas.core.index import Index -from pandas.core.series import Series from pandas.core.indexes.datetimes import date_range import pandas.core.tools.datetimes as tools import pandas.tseries.frequencies as frequencies @@ -175,7 +175,7 @@ def _dt_to_float_ordinal(dt): preserving hours, minutes, seconds and microseconds. Return value is a :func:`float`. """ - if (isinstance(dt, (np.ndarray, Index, Series) + if (isinstance(dt, (np.ndarray, Index, ABCSeries) ) and is_datetime64_ns_dtype(dt)): base = dates.epoch2num(dt.asi8 / 1.0E9) else: diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index b8d7cebe8a274..e5b9497993172 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -18,10 +18,12 @@ is_number, is_hashable, is_iterator) +from pandas.core.dtypes.generic import ABCSeries + from pandas.core.common import AbstractMethodError, _try_sort from pandas.core.generic import _shared_docs, _shared_doc_kwargs from pandas.core.index import Index, MultiIndex -from pandas.core.series import Series + from pandas.core.indexes.period import PeriodIndex from pandas.compat import range, lrange, map, zip, string_types import pandas.compat as compat @@ -334,7 +336,7 @@ def result(self): def _compute_plot_data(self): data = self.data - if isinstance(data, Series): + if isinstance(data, ABCSeries): label = self.label if label is None and data.name is None: label = 'None' @@ -1575,6 +1577,7 @@ def maybe_color_bp(self, bp): def _make_plot(self): if self.subplots: + from pandas.core.series import Series self._return_obj = Series() for i, (label, y) in enumerate(self._iter_data()): @@ -2338,6 +2341,7 @@ def boxplot_frame_groupby(grouped, subplots=True, column=None, fontsize=None, figsize=figsize, layout=layout) axes = _flatten(axes) + from pandas.core.series import Series ret = Series() for (key, group), ax in zip(grouped, axes): d = group.boxplot(ax=ax, column=column, fontsize=fontsize, @@ -2409,7 +2413,6 @@ def _grouped_plot_by_column(plotf, data, columns=None, by=None, _axes = _flatten(axes) - result = Series() ax_values = [] for i, col in enumerate(columns): @@ -2422,6 +2425,7 @@ def _grouped_plot_by_column(plotf, data, columns=None, by=None, ax_values.append(re_plotf) ax.grid(grid) + from pandas.core.series import Series result = Series(ax_values, index=columns) # Return axes in multiplot case, maybe revisit later # 985 diff --git a/pandas/plotting/_tools.py b/pandas/plotting/_tools.py index 0c2314087525c..389e238ccb96e 100644 --- a/pandas/plotting/_tools.py +++ b/pandas/plotting/_tools.py @@ -8,8 +8,8 @@ import numpy as np from pandas.core.dtypes.common import is_list_like +from pandas.core.dtypes.generic import ABCSeries from pandas.core.index import Index -from pandas.core.series import Series from pandas.compat import range @@ -25,8 +25,7 @@ def format_date_labels(ax, rot): pass -def table(ax, data, rowLabels=None, colLabels=None, - **kwargs): +def table(ax, data, rowLabels=None, colLabels=None, **kwargs): """ Helper function to convert DataFrame and Series to matplotlib.table @@ -45,7 +44,7 @@ def table(ax, data, rowLabels=None, colLabels=None, matplotlib table object """ from pandas import DataFrame - if isinstance(data, Series): + if isinstance(data, ABCSeries): data = DataFrame(data, columns=[data.name]) elif isinstance(data, DataFrame): pass From 5d8319e51909870f4694b26370eb03832f56e627 Mon Sep 17 00:00:00 2001 From: Pietro Battiston Date: Wed, 9 Aug 2017 01:30:08 +0200 Subject: [PATCH 866/933] BUG: support pandas objects in iloc with old numpy versions (#17194) closes #17193 --- doc/source/whatsnew/v0.21.0.txt | 1 + pandas/core/internals.py | 3 +++ pandas/tests/indexing/test_iloc.py | 13 +++++++++++++ 3 files changed, 17 insertions(+) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index d9439e0d785f6..c2eb371059955 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -322,6 +322,7 @@ Indexing - Fixes bug where indexing with ``np.inf`` caused an ``OverflowError`` to be raised (:issue:`16957`) - Bug in reindexing on an empty ``CategoricalIndex`` (:issue:`16770`) - Fixes ``DataFrame.loc`` for setting with alignment and tz-aware ``DatetimeIndex`` (:issue:`16889`) +- Avoids ``IndexError`` when passing an Index or Series to ``.iloc`` with older numpy (:issue:`17193`) I/O ^^^ diff --git a/pandas/core/internals.py b/pandas/core/internals.py index b9fca1dfbb7a1..b616270e47aa6 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -857,6 +857,9 @@ def _is_empty_indexer(indexer): # set else: + if _np_version_under1p9: + # Work around GH 6168 to support old numpy + indexer = getattr(indexer, 'values', indexer) values[indexer] = value # coerce and try to infer the dtypes of the result diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index 1ba9f3101e7b6..31fee303a41e2 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -282,6 +282,19 @@ def test_iloc_setitem_list(self): index=["A", "B", "C"], columns=["A", "B", "C"]) tm.assert_frame_equal(df, expected) + def test_iloc_setitem_pandas_object(self): + # GH 17193, affecting old numpy (1.7 and 1.8) + s_orig = Series([0, 1, 2, 3]) + expected = Series([0, -1, -2, 3]) + + s = s_orig.copy() + s.iloc[Series([1, 2])] = [-1, -2] + tm.assert_series_equal(s, expected) + + s = s_orig.copy() + s.iloc[pd.Index([1, 2])] = [-1, -2] + tm.assert_series_equal(s, expected) + def test_iloc_setitem_dups(self): # GH 6766 From 7bef6d873b8af5ee0d35ba4b42c8a4775a6b3f24 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 8 Aug 2017 16:48:59 -0700 Subject: [PATCH 867/933] Implement _make_accessor classmethod for PandasDelegate (#17166) --- pandas/core/base.py | 11 +++++++++-- pandas/core/categorical.py | 7 +++++++ pandas/core/indexes/accessors.py | 8 ++++++++ pandas/core/series.py | 23 +++-------------------- pandas/core/strings.py | 30 +++++++++++++++--------------- 5 files changed, 42 insertions(+), 37 deletions(-) diff --git a/pandas/core/base.py b/pandas/core/base.py index eb785b18bd02b..8f21e3125a27e 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -165,6 +165,12 @@ def __setattr__(self, key, value): class PandasDelegate(PandasObject): """ an abstract base class for delegating methods/properties """ + @classmethod + def _make_accessor(cls, data): + raise AbstractMethodError("_make_accessor should be implemented" + "by subclass and return an instance" + "of `cls`.") + def _delegate_property_get(self, name, *args, **kwargs): raise TypeError("You cannot access the " "property {name}".format(name=name)) @@ -231,9 +237,10 @@ class AccessorProperty(object): """Descriptor for implementing accessor properties like Series.str """ - def __init__(self, accessor_cls, construct_accessor): + def __init__(self, accessor_cls, construct_accessor=None): self.accessor_cls = accessor_cls - self.construct_accessor = construct_accessor + self.construct_accessor = (construct_accessor or + accessor_cls._make_accessor) self.__doc__ = accessor_cls.__doc__ def __get__(self, instance, owner=None): diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index 1392ad2f011db..230361931125e 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -2061,6 +2061,13 @@ def _delegate_method(self, name, *args, **kwargs): if res is not None: return Series(res, index=self.index) + @classmethod + def _make_accessor(cls, data): + if not is_categorical_dtype(data.dtype): + raise AttributeError("Can only use .cat accessor with a " + "'category' dtype") + return CategoricalAccessor(data.values, data.index) + CategoricalAccessor._add_delegate_accessors(delegate=Categorical, accessors=["categories", diff --git a/pandas/core/indexes/accessors.py b/pandas/core/indexes/accessors.py index f1fb9a8ad93a7..ce3143b342cec 100644 --- a/pandas/core/indexes/accessors.py +++ b/pandas/core/indexes/accessors.py @@ -243,3 +243,11 @@ class CombinedDatetimelikeProperties(DatetimeProperties, TimedeltaProperties): # the Series.dt class property. For Series objects, .dt will always be one # of the more specific classes above. __doc__ = DatetimeProperties.__doc__ + + @classmethod + def _make_accessor(cls, data): + try: + return maybe_to_datetimelike(data) + except Exception: + raise AttributeError("Can only use .dt accessor with " + "datetimelike values") diff --git a/pandas/core/series.py b/pandas/core/series.py index 61508c11cae4b..c8282450b77a9 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -54,8 +54,7 @@ from pandas.core.internals import SingleBlockManager from pandas.core.categorical import Categorical, CategoricalAccessor import pandas.core.strings as strings -from pandas.core.indexes.accessors import ( - maybe_to_datetimelike, CombinedDatetimelikeProperties) +from pandas.core.indexes.accessors import CombinedDatetimelikeProperties from pandas.core.indexes.datetimes import DatetimeIndex from pandas.core.indexes.timedeltas import TimedeltaIndex from pandas.core.indexes.period import PeriodIndex @@ -2919,27 +2918,11 @@ def to_period(self, freq=None, copy=True): # ------------------------------------------------------------------------- # Datetimelike delegation methods - - def _make_dt_accessor(self): - try: - return maybe_to_datetimelike(self) - except Exception: - raise AttributeError("Can only use .dt accessor with datetimelike " - "values") - - dt = base.AccessorProperty(CombinedDatetimelikeProperties, - _make_dt_accessor) + dt = base.AccessorProperty(CombinedDatetimelikeProperties) # ------------------------------------------------------------------------- # Categorical methods - - def _make_cat_accessor(self): - if not is_categorical_dtype(self.dtype): - raise AttributeError("Can only use .cat accessor with a " - "'category' dtype") - return CategoricalAccessor(self.values, self.index) - - cat = base.AccessorProperty(CategoricalAccessor, _make_cat_accessor) + cat = base.AccessorProperty(CategoricalAccessor) def _dir_deletions(self): return self._accessors diff --git a/pandas/core/strings.py b/pandas/core/strings.py index 30465561a911c..0b1db0277eee3 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -1890,18 +1890,14 @@ def rindex(self, sub, start=0, end=None): docstring=_shared_docs['ismethods'] % _shared_docs['isdecimal']) - -class StringAccessorMixin(object): - """ Mixin to add a `.str` acessor to the class.""" - - # string methods - def _make_str_accessor(self): + @classmethod + def _make_accessor(cls, data): from pandas.core.index import Index - if (isinstance(self, ABCSeries) and - not ((is_categorical_dtype(self.dtype) and - is_object_dtype(self.values.categories)) or - (is_object_dtype(self.dtype)))): + if (isinstance(data, ABCSeries) and + not ((is_categorical_dtype(data.dtype) and + is_object_dtype(data.values.categories)) or + (is_object_dtype(data.dtype)))): # it's neither a string series not a categorical series with # strings inside the categories. # this really should exclude all series with any non-string values @@ -1910,23 +1906,27 @@ def _make_str_accessor(self): raise AttributeError("Can only use .str accessor with string " "values, which use np.object_ dtype in " "pandas") - elif isinstance(self, Index): + elif isinstance(data, Index): # can't use ABCIndex to exclude non-str # see scc/inferrence.pyx which can contain string values allowed_types = ('string', 'unicode', 'mixed', 'mixed-integer') - if self.inferred_type not in allowed_types: + if data.inferred_type not in allowed_types: message = ("Can only use .str accessor with string values " "(i.e. inferred_type is 'string', 'unicode' or " "'mixed')") raise AttributeError(message) - if self.nlevels > 1: + if data.nlevels > 1: message = ("Can only use .str accessor with Index, not " "MultiIndex") raise AttributeError(message) - return StringMethods(self) + return StringMethods(data) + + +class StringAccessorMixin(object): + """ Mixin to add a `.str` acessor to the class.""" - str = AccessorProperty(StringMethods, _make_str_accessor) + str = AccessorProperty(StringMethods) def _dir_additions(self): return set() From 64129d11e5a4f668378a6c8ace6cad1abd864aa3 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 9 Aug 2017 03:27:34 -0700 Subject: [PATCH 868/933] Create ABCDateOffset (#17165) --- pandas/core/dtypes/generic.py | 2 ++ pandas/core/indexes/base.py | 14 +++++++------- pandas/core/ops.py | 10 +++++++--- pandas/core/tools/datetimes.py | 5 ++--- pandas/tests/dtypes/test_generic.py | 6 ++++++ pandas/tseries/offsets.py | 1 + 6 files changed, 25 insertions(+), 13 deletions(-) diff --git a/pandas/core/dtypes/generic.py b/pandas/core/dtypes/generic.py index 90608c18ae503..618bcf6495155 100644 --- a/pandas/core/dtypes/generic.py +++ b/pandas/core/dtypes/generic.py @@ -52,6 +52,8 @@ def _check(cls, inst): ABCCategorical = create_pandas_abc_type("ABCCategorical", "_typ", ("categorical")) ABCPeriod = create_pandas_abc_type("ABCPeriod", "_typ", ("period", )) +ABCDateOffset = create_pandas_abc_type("ABCDateOffset", "_typ", + ("dateoffset",)) class _ABCGeneric(type): diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 4aecc75d95971..de6221987a59a 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -13,7 +13,11 @@ from pandas import compat -from pandas.core.dtypes.generic import ABCSeries, ABCMultiIndex, ABCPeriodIndex +from pandas.core.dtypes.generic import ( + ABCSeries, + ABCMultiIndex, + ABCPeriodIndex, + ABCDateOffset) from pandas.core.dtypes.missing import isna, array_equivalent from pandas.core.dtypes.common import ( _ensure_int64, @@ -3814,8 +3818,6 @@ def _validate_for_numeric_binop(self, other, op, opstr): internal method called by ops """ - from pandas.tseries.offsets import DateOffset - # if we are an inheritor of numeric, # but not actually numeric (e.g. DatetimeIndex/PeriodInde) if not self._is_numeric_dtype: @@ -3843,7 +3845,7 @@ def _validate_for_numeric_binop(self, other, op, opstr): if other.dtype.kind not in ['f', 'i', 'u']: raise TypeError("cannot evaluate a numeric op " "with a non-numeric dtype") - elif isinstance(other, (DateOffset, np.timedelta64, + elif isinstance(other, (ABCDateOffset, np.timedelta64, Timedelta, datetime.timedelta)): # higher up to handle pass @@ -3862,12 +3864,10 @@ def _add_numeric_methods_binary(cls): def _make_evaluate_binop(op, opstr, reversed=False, constructor=Index): def _evaluate_numeric_binop(self, other): - - from pandas.tseries.offsets import DateOffset other = self._validate_for_numeric_binop(other, op, opstr) # handle time-based others - if isinstance(other, (DateOffset, np.timedelta64, + if isinstance(other, (ABCDateOffset, np.timedelta64, Timedelta, datetime.timedelta)): return self._evaluate_with_timedelta_like(other, op, opstr) elif isinstance(other, (Timestamp, np.datetime64)): diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 4e08e1483d617..82101414e4aa6 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -35,7 +35,11 @@ is_scalar, _ensure_object) from pandas.core.dtypes.cast import maybe_upcast_putmask, find_common_type -from pandas.core.dtypes.generic import ABCSeries, ABCIndex, ABCPeriodIndex +from pandas.core.dtypes.generic import ( + ABCSeries, + ABCIndex, + ABCPeriodIndex, + ABCDateOffset) # ----------------------------------------------------------------------------- # Functions that add arithmetic methods to objects, given arithmetic factory @@ -605,10 +609,10 @@ def f(x): def _is_offset(self, arr_or_obj): """ check if obj or all elements of list-like is DateOffset """ - if isinstance(arr_or_obj, pd.DateOffset): + if isinstance(arr_or_obj, ABCDateOffset): return True elif is_list_like(arr_or_obj) and len(arr_or_obj): - return all(isinstance(x, pd.DateOffset) for x in arr_or_obj) + return all(isinstance(x, ABCDateOffset) for x in arr_or_obj) return False diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index a1f323aff7c1a..eebf78d7619eb 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -17,7 +17,7 @@ is_numeric_dtype) from pandas.core.dtypes.generic import ( ABCIndexClass, ABCSeries, - ABCDataFrame) + ABCDataFrame, ABCDateOffset) from pandas.core.dtypes.missing import notna from pandas.core import algorithms @@ -720,8 +720,7 @@ def parse_time_string(arg, freq=None, dayfirst=None, yearfirst=None): if not isinstance(arg, compat.string_types): return arg - from pandas.tseries.offsets import DateOffset - if isinstance(freq, DateOffset): + if isinstance(freq, ABCDateOffset): freq = freq.rule_code if dayfirst is None: diff --git a/pandas/tests/dtypes/test_generic.py b/pandas/tests/dtypes/test_generic.py index ec850cc34e23b..82444d6c94157 100644 --- a/pandas/tests/dtypes/test_generic.py +++ b/pandas/tests/dtypes/test_generic.py @@ -40,6 +40,12 @@ def test_abc_types(self): assert isinstance(self.categorical, gt.ABCCategorical) assert isinstance(pd.Period('2012', freq='A-DEC'), gt.ABCPeriod) + assert isinstance(pd.DateOffset(), gt.ABCDateOffset) + assert isinstance(pd.Period('2012', freq='A-DEC').freq, + gt.ABCDateOffset) + assert not isinstance(pd.Period('2012', freq='A-DEC'), + gt.ABCDateOffset) + def test_setattr_warnings(): # GH5904 - Suggestion: Warning for DataFrame colname-methodname clash diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index 2a120a0696836..56ef703e67ca0 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -184,6 +184,7 @@ def __add__(date): ) _use_relativedelta = False _adjust_dst = False + _typ = "dateoffset" # default for prior pickles normalize = False From 3e9e947b89d8edd7426bf8c748b1c6e3de5a7afb Mon Sep 17 00:00:00 2001 From: Alex Rychyk Date: Wed, 9 Aug 2017 13:37:20 +0300 Subject: [PATCH 869/933] BUG: resample and apply modify the index type for empty Series (#17149) --- doc/source/whatsnew/v0.21.0.txt | 1 + pandas/core/resample.py | 7 ++++++- pandas/tests/test_resample.py | 18 ++++++++++++++++++ 3 files changed, 25 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index c2eb371059955..cc9ab81ce0955 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -348,6 +348,7 @@ Groupby/Resample/Rolling - Bug in :func:`infer_freq` causing indices with 2-day gaps during the working week to be wrongly inferred as business daily (:issue:`16624`) - Bug in ``.rolling(...).quantile()`` which incorrectly used different defaults than :func:`Series.quantile()` and :func:`DataFrame.quantile()` (:issue:`9413`, :issue:`16211`) - Bug in ``groupby.transform()`` that would coerce boolean dtypes back to float (:issue:`16875`) +- Bug in ``Series.resample(...).apply()`` where an empty ``Series`` modified the source index and did not return the name of a ``Series`` (:issue:`14313`) Sparse ^^^^^^ diff --git a/pandas/core/resample.py b/pandas/core/resample.py index a8a48624fb885..96e7a6a3b3904 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -17,7 +17,7 @@ from pandas.core.indexes.period import PeriodIndex, period_range import pandas.core.common as com import pandas.core.algorithms as algos -from pandas.core.dtypes.generic import ABCDataFrame +from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries import pandas.compat as compat from pandas.compat.numpy import function as nv @@ -439,6 +439,11 @@ def _wrap_result(self, result): if isinstance(result, com.ABCSeries) and self._selection is not None: result.name = self._selection + if isinstance(result, ABCSeries) and result.empty: + obj = self.obj + result.index = obj.index._shallow_copy(freq=to_offset(self.freq)) + result.name = getattr(obj, 'name', None) + return result def pad(self, limit=None): diff --git a/pandas/tests/test_resample.py b/pandas/tests/test_resample.py index 08fa7992e8da1..d938d5bf9f3ab 100644 --- a/pandas/tests/test_resample.py +++ b/pandas/tests/test_resample.py @@ -852,6 +852,16 @@ def test_resample_loffset_arg_type(self): assert_frame_equal(result_agg, expected) assert_frame_equal(result_how, expected) + def test_apply_to_empty_series(self): + # GH 14313 + series = self.create_series()[:0] + + for freq in ['M', 'D', 'H']: + result = series.resample(freq).apply(lambda x: 1) + expected = series.resample(freq).apply(np.sum) + + assert_series_equal(result, expected, check_dtype=False) + class TestDatetimeIndex(Base): _index_factory = lambda x: date_range @@ -2794,6 +2804,14 @@ def test_evenly_divisible_with_no_extra_bins(self): result = df.resample('7D').sum() assert_frame_equal(result, expected) + def test_apply_to_empty_series(self): + # GH 14313 + series = self.create_series()[:0] + + for freq in ['M', 'D', 'H']: + with pytest.raises(TypeError): + series.resample(freq).apply(lambda x: 1) + class TestTimedeltaIndex(Base): _index_factory = lambda x: timedelta_range From 556effcba52f4712fed21b269e9782f1a309ea93 Mon Sep 17 00:00:00 2001 From: topper-123 Date: Wed, 9 Aug 2017 12:24:36 +0100 Subject: [PATCH 870/933] DOC: Updated NDFrame.astype docs (#17203) --- pandas/core/generic.py | 50 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 48 insertions(+), 2 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 2d52eed81d22b..bd3297f66a469 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3610,8 +3610,7 @@ def blocks(self): mapping={True: 'raise', False: 'ignore'}) def astype(self, dtype, copy=True, errors='raise', **kwargs): """ - Cast object to input numpy.dtype - Return a copy when copy = True (be really careful with this!) + Cast a pandas object to a specified dtype ``dtype``. Parameters ---------- @@ -3620,6 +3619,10 @@ def astype(self, dtype, copy=True, errors='raise', **kwargs): the same type. Alternatively, use {col: dtype, ...}, where col is a column label and dtype is a numpy.dtype or Python type to cast one or more of the DataFrame's columns to column-specific types. + copy : bool, default True. + Return a copy when ``copy=True`` (be very careful setting + ``copy=False`` as changes to values then may propagate to other + pandas objects). errors : {'raise', 'ignore'}, default 'raise'. Control raising of exceptions on invalid data for provided dtype. @@ -3636,6 +3639,49 @@ def astype(self, dtype, copy=True, errors='raise', **kwargs): Returns ------- casted : type of caller + + Examples + -------- + >>> ser = pd.Series([1, 2], dtype='int32') + >>> ser + 0 1 + 1 2 + dtype: int32 + >>> ser.astype('int64') + 0 1 + 1 2 + dtype: int64 + + Convert to categorical type: + + >>> ser.astype('category') + 0 1 + 1 2 + dtype: category + Categories (2, int64): [1, 2] + + Convert to ordered categorical type with custom ordering: + + >>> ser.astype('category', ordered=True, categories=[2, 1]) + 0 1 + 1 2 + dtype: category + Categories (2, int64): [2 < 1] + + Note that using ``copy=False`` and changing data on a new + pandas object may propagate changes: + + >>> s1 = pd.Series([1,2]) + >>> s2 = s1.astype('int', copy=False) + >>> s2[0] = 10 + >>> s1 # note that s1[0] has changed too + 0 10 + 1 2 + dtype: int64 + + See also + -------- + numpy.ndarray.astype : Cast a numpy array to a specified type. """ if is_dict_like(dtype): if self.ndim == 1: # i.e. Series From 073c14544436d95969258928e2554cb2fc093c99 Mon Sep 17 00:00:00 2001 From: Daniel Himmelstein Date: Wed, 9 Aug 2017 16:19:27 -0400 Subject: [PATCH 871/933] MAINT: Minor touch-ups to GitHub PULL_REQUEST_TEMPLATE (#17207) Remove leading space from task-list so that tasks aren't nested. --- .github/PULL_REQUEST_TEMPLATE.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index e8b6ee21ad104..4e1e9ce017408 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -1,4 +1,4 @@ - - [ ] closes #xxxx - - [ ] tests added / passed - - [ ] passes ``git diff upstream/master -u -- "*.py" | flake8 --diff`` - - [ ] whatsnew entry +- [ ] closes #xxxx +- [ ] tests added / passed +- [ ] passes `git diff upstream/master -u -- "*.py" | flake8 --diff` +- [ ] whatsnew entry From b82253590a66b4a35ed682bca244f668f16c3e0b Mon Sep 17 00:00:00 2001 From: jschendel Date: Thu, 10 Aug 2017 04:26:58 -0600 Subject: [PATCH 872/933] CLN: replace %s syntax with .format in core.computation (#17209) --- pandas/core/computation/align.py | 11 +++-- pandas/core/computation/engines.py | 5 ++- pandas/core/computation/eval.py | 13 +++--- pandas/core/computation/expr.py | 45 ++++++++++--------- pandas/core/computation/expressions.py | 14 +++--- pandas/core/computation/pytables.py | 61 ++++++++++++++------------ pandas/core/computation/scope.py | 11 +++-- 7 files changed, 87 insertions(+), 73 deletions(-) diff --git a/pandas/core/computation/align.py b/pandas/core/computation/align.py index 1c75301082297..691eaebfd5fc1 100644 --- a/pandas/core/computation/align.py +++ b/pandas/core/computation/align.py @@ -98,12 +98,11 @@ def _align_core(terms): ordm = np.log10(max(1, abs(reindexer_size - term_axis_size))) if ordm >= 1 and reindexer_size >= 10000: - warnings.warn('Alignment difference on axis {0} is larger ' - 'than an order of magnitude on term {1!r}, ' - 'by more than {2:.4g}; performance may ' - 'suffer'.format(axis, terms[i].name, ordm), - category=PerformanceWarning, - stacklevel=6) + w = ('Alignment difference on axis {axis} is larger ' + 'than an order of magnitude on term {term!r}, by ' + 'more than {ordm:.4g}; performance may suffer' + ).format(axis=axis, term=terms[i].name, ordm=ordm) + warnings.warn(w, category=PerformanceWarning, stacklevel=6) if transpose: f = partial(ti.reindex, index=reindexer, copy=False) diff --git a/pandas/core/computation/engines.py b/pandas/core/computation/engines.py index f45d0355e7442..155ff554cf99c 100644 --- a/pandas/core/computation/engines.py +++ b/pandas/core/computation/engines.py @@ -33,8 +33,9 @@ def _check_ne_builtin_clash(expr): if overlap: s = ', '.join(map(repr, overlap)) - raise NumExprClobberingError('Variables in expression "%s" ' - 'overlap with builtins: (%s)' % (expr, s)) + raise NumExprClobberingError('Variables in expression "{expr}" ' + 'overlap with builtins: ({s})' + .format(expr=expr, s=s)) class AbstractEngine(object): diff --git a/pandas/core/computation/eval.py b/pandas/core/computation/eval.py index ef15e886fd554..d391764794c1c 100644 --- a/pandas/core/computation/eval.py +++ b/pandas/core/computation/eval.py @@ -40,8 +40,9 @@ def _check_engine(engine): engine = 'python' if engine not in _engines: - raise KeyError('Invalid engine {0!r} passed, valid engines are' - ' {1}'.format(engine, list(_engines.keys()))) + valid = list(_engines.keys()) + raise KeyError('Invalid engine {engine!r} passed, valid engines are' + ' {valid}'.format(engine=engine, valid=valid)) # TODO: validate this in a more general way (thinking of future engines # that won't necessarily be import-able) @@ -69,8 +70,8 @@ def _check_parser(parser): * If an invalid parser is passed """ if parser not in _parsers: - raise KeyError('Invalid parser {0!r} passed, valid parsers are' - ' {1}'.format(parser, _parsers.keys())) + raise KeyError('Invalid parser {parser!r} passed, valid parsers are' + ' {valid}'.format(parser=parser, valid=_parsers.keys())) def _check_resolvers(resolvers): @@ -78,8 +79,8 @@ def _check_resolvers(resolvers): for resolver in resolvers: if not hasattr(resolver, '__getitem__'): name = type(resolver).__name__ - raise TypeError('Resolver of type %r does not implement ' - 'the __getitem__ method' % name) + raise TypeError('Resolver of type {name!r} does not implement ' + 'the __getitem__ method'.format(name=name)) def _check_expression(expr): diff --git a/pandas/core/computation/expr.py b/pandas/core/computation/expr.py index 73c27f4d772ca..ae956bce11329 100644 --- a/pandas/core/computation/expr.py +++ b/pandas/core/computation/expr.py @@ -189,8 +189,8 @@ def _filter_nodes(superclass, all_nodes=_all_nodes): # and we don't want `stmt` and friends in their so get only the class whose # names are capitalized _base_supported_nodes = (_all_node_names - _unsupported_nodes) | _hacked_nodes -_msg = 'cannot both support and not support {0}'.format(_unsupported_nodes & - _base_supported_nodes) +_msg = 'cannot both support and not support {intersection}'.format( + intersection=_unsupported_nodes & _base_supported_nodes) assert not _unsupported_nodes & _base_supported_nodes, _msg @@ -200,8 +200,8 @@ def _node_not_implemented(node_name, cls): """ def f(self, *args, **kwargs): - raise NotImplementedError("{0!r} nodes are not " - "implemented".format(node_name)) + raise NotImplementedError("{name!r} nodes are not " + "implemented".format(name=node_name)) return f @@ -217,7 +217,7 @@ def disallowed(cls): cls.unsupported_nodes = () for node in nodes: new_method = _node_not_implemented(node, cls) - name = 'visit_{0}'.format(node) + name = 'visit_{node}'.format(node=node) cls.unsupported_nodes += (name,) setattr(cls, name, new_method) return cls @@ -251,13 +251,14 @@ def add_ops(op_classes): """Decorator to add default implementation of ops.""" def f(cls): for op_attr_name, op_class in compat.iteritems(op_classes): - ops = getattr(cls, '{0}_ops'.format(op_attr_name)) - ops_map = getattr(cls, '{0}_op_nodes_map'.format(op_attr_name)) + ops = getattr(cls, '{name}_ops'.format(name=op_attr_name)) + ops_map = getattr(cls, '{name}_op_nodes_map'.format( + name=op_attr_name)) for op in ops: op_node = ops_map[op] if op_node is not None: made_op = _op_maker(op_class, op) - setattr(cls, 'visit_{0}'.format(op_node), made_op) + setattr(cls, 'visit_{node}'.format(node=op_node), made_op) return cls return f @@ -388,9 +389,10 @@ def _maybe_evaluate_binop(self, op, op_class, lhs, rhs, res = op(lhs, rhs) if res.has_invalid_return_type: - raise TypeError("unsupported operand type(s) for {0}:" - " '{1}' and '{2}'".format(res.op, lhs.type, - rhs.type)) + raise TypeError("unsupported operand type(s) for {op}:" + " '{lhs}' and '{rhs}'".format(op=res.op, + lhs=lhs.type, + rhs=rhs.type)) if self.engine != 'pytables': if (res.op in _cmp_ops_syms and @@ -527,7 +529,8 @@ def visit_Attribute(self, node, **kwargs): if isinstance(value, ast.Name) and value.id == attr: return resolved - raise ValueError("Invalid Attribute context {0}".format(ctx.__name__)) + raise ValueError("Invalid Attribute context {name}" + .format(name=ctx.__name__)) def visit_Call_35(self, node, side=None, **kwargs): """ in 3.5 the starargs attribute was changed to be more flexible, @@ -549,7 +552,8 @@ def visit_Call_35(self, node, side=None, **kwargs): raise if res is None: - raise ValueError("Invalid function call {0}".format(node.func.id)) + raise ValueError("Invalid function call {func}" + .format(func=node.func.id)) if hasattr(res, 'value'): res = res.value @@ -558,8 +562,8 @@ def visit_Call_35(self, node, side=None, **kwargs): new_args = [self.visit(arg) for arg in node.args] if node.keywords: - raise TypeError("Function \"{0}\" does not support keyword " - "arguments".format(res.name)) + raise TypeError("Function \"{name}\" does not support keyword " + "arguments".format(name=res.name)) return res(*new_args, **kwargs) @@ -570,7 +574,7 @@ def visit_Call_35(self, node, side=None, **kwargs): for key in node.keywords: if not isinstance(key, ast.keyword): raise ValueError("keyword error in function call " - "'{0}'".format(node.func.id)) + "'{func}'".format(func=node.func.id)) if key.arg: # TODO: bug? @@ -598,7 +602,8 @@ def visit_Call_legacy(self, node, side=None, **kwargs): raise if res is None: - raise ValueError("Invalid function call {0}".format(node.func.id)) + raise ValueError("Invalid function call {func}" + .format(func=node.func.id)) if hasattr(res, 'value'): res = res.value @@ -609,8 +614,8 @@ def visit_Call_legacy(self, node, side=None, **kwargs): args += self.visit(node.starargs) if node.keywords or node.kwargs: - raise TypeError("Function \"{0}\" does not support keyword " - "arguments".format(res.name)) + raise TypeError("Function \"{name}\" does not support keyword " + "arguments".format(name=res.name)) return res(*args, **kwargs) @@ -623,7 +628,7 @@ def visit_Call_legacy(self, node, side=None, **kwargs): for key in node.keywords: if not isinstance(key, ast.keyword): raise ValueError("keyword error in function call " - "'{0}'".format(node.func.id)) + "'{func}'".format(func=node.func.id)) keywords[key.arg] = self.visit(key.value).value if node.kwargs is not None: keywords.update(self.visit(node.kwargs).value) diff --git a/pandas/core/computation/expressions.py b/pandas/core/computation/expressions.py index 83d02af65cc85..af068bd1f32b3 100644 --- a/pandas/core/computation/expressions.py +++ b/pandas/core/computation/expressions.py @@ -103,7 +103,7 @@ def _evaluate_numexpr(op, op_str, a, b, raise_on_error=False, truediv=True, a_value = getattr(a, "values", a) b_value = getattr(b, "values", b) - result = ne.evaluate('a_value %s b_value' % op_str, + result = ne.evaluate('a_value {op} b_value'.format(op=op_str), local_dict={'a_value': a_value, 'b_value': b_value}, casting='safe', truediv=truediv, @@ -177,15 +177,15 @@ def _bool_arith_check(op_str, a, b, not_allowed=frozenset(('/', '//', '**')), if _has_bool_dtype(a) and _has_bool_dtype(b): if op_str in unsupported: - warnings.warn("evaluating in Python space because the %r operator" - " is not supported by numexpr for the bool " - "dtype, use %r instead" % (op_str, - unsupported[op_str])) + warnings.warn("evaluating in Python space because the {op!r} " + "operator is not supported by numexpr for " + "the bool dtype, use {alt_op!r} instead" + .format(op=op_str, alt_op=unsupported[op_str])) return False if op_str in not_allowed: - raise NotImplementedError("operator %r not implemented for bool " - "dtypes" % op_str) + raise NotImplementedError("operator {op!r} not implemented for " + "bool dtypes".format(op=op_str)) return True diff --git a/pandas/core/computation/pytables.py b/pandas/core/computation/pytables.py index 5870090856ff9..4b3c608a88be8 100644 --- a/pandas/core/computation/pytables.py +++ b/pandas/core/computation/pytables.py @@ -41,7 +41,8 @@ def _resolve_name(self): # must be a queryables if self.side == 'left': if self.name not in self.env.queryables: - raise NameError('name {0!r} is not defined'.format(self.name)) + raise NameError('name {name!r} is not defined' + .format(name=self.name)) return self.name # resolve the rhs (and allow it to be None) @@ -161,7 +162,7 @@ def metadata(self): def generate(self, v): """ create and return the op string for this TermValue """ val = v.tostring(self.encoding) - return "(%s %s %s)" % (self.lhs, self.op, val) + return "({lhs} {op} {val})".format(lhs=self.lhs, op=self.op, val=val) def convert_value(self, v): """ convert the expression that is in the term to something that is @@ -215,9 +216,8 @@ def stringify(value): # string quoting return TermValue(v, stringify(v), u('string')) else: - raise TypeError(("Cannot compare {v} of type {typ}" - " to {kind} column").format(v=v, typ=type(v), - kind=kind)) + raise TypeError("Cannot compare {v} of type {typ} to {kind} column" + .format(v=v, typ=type(v), kind=kind)) def convert_values(self): pass @@ -226,8 +226,8 @@ def convert_values(self): class FilterBinOp(BinOp): def __unicode__(self): - return pprint_thing("[Filter : [{0}] -> " - "[{1}]".format(self.filter[0], self.filter[1])) + return pprint_thing("[Filter : [{lhs}] -> [{op}]" + .format(lhs=self.filter[0], op=self.filter[1])) def invert(self): """ invert the filter """ @@ -244,7 +244,8 @@ def format(self): def evaluate(self): if not self.is_valid: - raise ValueError("query term is not valid [%s]" % self) + raise ValueError("query term is not valid [{slf}]" + .format(slf=self)) rhs = self.conform(self.rhs) values = [TermValue(v, v, self.kind) for v in rhs] @@ -273,9 +274,8 @@ def evaluate(self): pd.Index([v.value for v in values])) else: - raise TypeError( - "passing a filterable condition to a non-table indexer [%s]" % - self) + raise TypeError("passing a filterable condition to a non-table " + "indexer [{slf}]".format(slf=self)) return self @@ -298,7 +298,8 @@ def evaluate(self): class ConditionBinOp(BinOp): def __unicode__(self): - return pprint_thing("[Condition : [{0}]]".format(self.condition)) + return pprint_thing("[Condition : [{cond}]]" + .format(cond=self.condition)) def invert(self): """ invert the condition """ @@ -315,7 +316,8 @@ def format(self): def evaluate(self): if not self.is_valid: - raise ValueError("query term is not valid [%s]" % self) + raise ValueError("query term is not valid [{slf}]" + .format(slf=self)) # convert values if we are in the table if not self.is_in_table: @@ -330,7 +332,7 @@ def evaluate(self): # too many values to create the expression? if len(values) <= self._max_selectors: vs = [self.generate(v) for v in values] - self.condition = "(%s)" % ' | '.join(vs) + self.condition = "({cond})".format(cond=' | '.join(vs)) # use a filter after reading else: @@ -344,10 +346,9 @@ def evaluate(self): class JointConditionBinOp(ConditionBinOp): def evaluate(self): - self.condition = "(%s %s %s)" % ( - self.lhs.condition, - self.op, - self.rhs.condition) + self.condition = "({lhs} {op} {rhs})".format(lhs=self.lhs.condition, + op=self.op, + rhs=self.rhs.condition) return self @@ -382,7 +383,8 @@ class ExprVisitor(BaseExprVisitor): def __init__(self, env, engine, parser, **kwargs): super(ExprVisitor, self).__init__(env, engine, parser) for bin_op in self.binary_ops: - setattr(self, 'visit_{0}'.format(self.binary_op_nodes_map[bin_op]), + bin_node = self.binary_op_nodes_map[bin_op] + setattr(self, 'visit_{node}'.format(node=bin_node), lambda node, bin_op=bin_op: partial(BinOp, bin_op, **kwargs)) @@ -415,8 +417,8 @@ def visit_Subscript(self, node, **kwargs): try: return self.const_type(value[slobj], self.env) except TypeError: - raise ValueError("cannot subscript {0!r} with " - "{1!r}".format(value, slobj)) + raise ValueError("cannot subscript {value!r} with " + "{slobj!r}".format(value=value, slobj=slobj)) def visit_Attribute(self, node, **kwargs): attr = node.attr @@ -441,7 +443,8 @@ def visit_Attribute(self, node, **kwargs): if isinstance(value, ast.Name) and value.id == attr: return resolved - raise ValueError("Invalid Attribute context {0}".format(ctx.__name__)) + raise ValueError("Invalid Attribute context {name}" + .format(name=ctx.__name__)) def translate_In(self, op): return ast.Eq() if isinstance(op, ast.In) else op @@ -529,7 +532,7 @@ def __init__(self, where, queryables=None, encoding=None, scope_level=0): else: w = _validate_where(w) where[idx] = w - where = ' & ' .join(["(%s)" % w for w in where]) # noqa + where = ' & '.join(map('({})'.format, com.flatten(where))) # noqa self.expr = where self.env = Scope(scope_level + 1, local_dict=local_dict) @@ -552,13 +555,15 @@ def evaluate(self): try: self.condition = self.terms.prune(ConditionBinOp) except AttributeError: - raise ValueError("cannot process expression [{0}], [{1}] is not a " - "valid condition".format(self.expr, self)) + raise ValueError("cannot process expression [{expr}], [{slf}] " + "is not a valid condition".format(expr=self.expr, + slf=self)) try: self.filter = self.terms.prune(FilterBinOp) except AttributeError: - raise ValueError("cannot process expression [{0}], [{1}] is not a " - "valid filter".format(self.expr, self)) + raise ValueError("cannot process expression [{expr}], [{slf}] " + "is not a valid filter".format(expr=self.expr, + slf=self)) return self.condition, self.filter @@ -578,7 +583,7 @@ def tostring(self, encoding): if self.kind == u'string': if encoding is not None: return self.converted - return '"%s"' % self.converted + return '"{converted}"'.format(converted=self.converted) elif self.kind == u'float': # python 2 str(float) is not always # round-trippable so use repr() diff --git a/pandas/core/computation/scope.py b/pandas/core/computation/scope.py index 5a589473f64b7..6a298f5137eb1 100644 --- a/pandas/core/computation/scope.py +++ b/pandas/core/computation/scope.py @@ -137,8 +137,10 @@ def __init__(self, level, global_dict=None, local_dict=None, resolvers=(), def __unicode__(self): scope_keys = _get_pretty_string(list(self.scope.keys())) res_keys = _get_pretty_string(list(self.resolvers.keys())) - return '%s(scope=%s, resolvers=%s)' % (type(self).__name__, scope_keys, - res_keys) + unicode_str = '{name}(scope={scope_keys}, resolvers={res_keys})' + return unicode_str.format(name=type(self).__name__, + scope_keys=scope_keys, + res_keys=res_keys) @property def has_resolvers(self): @@ -269,8 +271,9 @@ def add_tmp(self, value): name : basestring The name of the temporary variable created. """ - name = '{0}_{1}_{2}'.format(type(value).__name__, self.ntemps, - _raw_hex_id(self)) + name = '{name}_{num}_{hex_id}'.format(name=type(value).__name__, + num=self.ntemps, + hex_id=_raw_hex_id(self)) # add to inner most scope assert name not in self.temps From a09db4b156cd9129fd38214e039097ae944c062c Mon Sep 17 00:00:00 2001 From: Chris Billington Date: Thu, 10 Aug 2017 06:36:50 -0400 Subject: [PATCH 873/933] Bugfix for multilevel columns with empty strings in Python 2 (#17099) --- doc/source/whatsnew/v0.21.0.txt | 1 + pandas/core/frame.py | 12 ++++++++++-- pandas/tests/test_multilevel.py | 21 ++++++++++++++------- 3 files changed, 25 insertions(+), 9 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index cc9ab81ce0955..70300a087895a 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -323,6 +323,7 @@ Indexing - Bug in reindexing on an empty ``CategoricalIndex`` (:issue:`16770`) - Fixes ``DataFrame.loc`` for setting with alignment and tz-aware ``DatetimeIndex`` (:issue:`16889`) - Avoids ``IndexError`` when passing an Index or Series to ``.iloc`` with older numpy (:issue:`17193`) +- Allow unicode empty strings as placeholders in multilevel columns in Python 2 (:issue:`17099`) I/O ^^^ diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 027a427555253..94cce1b4d05b5 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2134,10 +2134,18 @@ def _getitem_multilevel(self, key): result = self._constructor(new_values, index=self.index, columns=result_columns) result = result.__finalize__(self) + + # If there is only one column being returned, and its name is + # either an empty string, or a tuple with an empty string as its + # first element, then treat the empty string as a placeholder + # and return the column as if the user had provided that empty + # string in the key. If the result is a Series, exclude the + # implied empty string from its name. if len(result.columns) == 1: top = result.columns[0] - if ((type(top) == str and top == '') or - (type(top) == tuple and top[0] == '')): + if isinstance(top, tuple): + top = top[0] + if top == '': result = result[''] if isinstance(result, Series): result = self._constructor_sliced(result, diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index 0b2dc9ba70f03..a765e2c4ca1bf 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -1675,24 +1675,31 @@ def test_int_series_slicing(self): expected = self.ymd.reindex(s.index[5:]) tm.assert_frame_equal(result, expected) - def test_mixed_depth_get(self): + @pytest.mark.parametrize('unicode_strings', [True, False]) + def test_mixed_depth_get(self, unicode_strings): + # If unicode_strings is True, the column labels in dataframe + # construction will use unicode strings in Python 2 (pull request + # #17099). + arrays = [['a', 'top', 'top', 'routine1', 'routine1', 'routine2'], ['', 'OD', 'OD', 'result1', 'result2', 'result1'], ['', 'wx', 'wy', '', '', '']] + if unicode_strings: + arrays = [[u(s) for s in arr] for arr in arrays] + tuples = sorted(zip(*arrays)) index = MultiIndex.from_tuples(tuples) - df = DataFrame(randn(4, 6), columns=index) + df = DataFrame(np.random.randn(4, 6), columns=index) result = df['a'] - expected = df['a', '', ''] - tm.assert_series_equal(result, expected, check_names=False) - assert result.name == 'a' + expected = df['a', '', ''].rename('a') + tm.assert_series_equal(result, expected) result = df['routine1', 'result1'] expected = df['routine1', 'result1', ''] - tm.assert_series_equal(result, expected, check_names=False) - assert result.name == ('routine1', 'result1') + expected = expected.rename(('routine1', 'result1')) + tm.assert_series_equal(result, expected) def test_mixed_depth_insert(self): arrays = [['a', 'top', 'top', 'routine1', 'routine1', 'routine2'], From dbffba81914c922925e098411d0f773a759f7992 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 10 Aug 2017 12:37:39 +0200 Subject: [PATCH 874/933] CLN/ASV clean-up frame stat ops benchmarks (#17205) --- asv_bench/benchmarks/stat_ops.py | 100 +++++++------------------------ 1 file changed, 22 insertions(+), 78 deletions(-) diff --git a/asv_bench/benchmarks/stat_ops.py b/asv_bench/benchmarks/stat_ops.py index 12fbb2478c2a5..1e1eb167b46bf 100644 --- a/asv_bench/benchmarks/stat_ops.py +++ b/asv_bench/benchmarks/stat_ops.py @@ -1,92 +1,36 @@ from .pandas_vb_common import * -class stat_ops_frame_mean_float_axis_0(object): - goal_time = 0.2 - - def setup(self): - self.df = DataFrame(np.random.randn(100000, 4)) - self.dfi = DataFrame(np.random.randint(1000, size=self.df.shape)) - - def time_stat_ops_frame_mean_float_axis_0(self): - self.df.mean() - - -class stat_ops_frame_mean_float_axis_1(object): - goal_time = 0.2 - - def setup(self): - self.df = DataFrame(np.random.randn(100000, 4)) - self.dfi = DataFrame(np.random.randint(1000, size=self.df.shape)) - - def time_stat_ops_frame_mean_float_axis_1(self): - self.df.mean(1) - - -class stat_ops_frame_mean_int_axis_0(object): - goal_time = 0.2 - - def setup(self): - self.df = DataFrame(np.random.randn(100000, 4)) - self.dfi = DataFrame(np.random.randint(1000, size=self.df.shape)) - - def time_stat_ops_frame_mean_int_axis_0(self): - self.dfi.mean() - - -class stat_ops_frame_mean_int_axis_1(object): - goal_time = 0.2 +def _set_use_bottleneck_False(): + try: + pd.options.compute.use_bottleneck = False + except: + from pandas.core import nanops + nanops._USE_BOTTLENECK = False - def setup(self): - self.df = DataFrame(np.random.randn(100000, 4)) - self.dfi = DataFrame(np.random.randint(1000, size=self.df.shape)) - - def time_stat_ops_frame_mean_int_axis_1(self): - self.dfi.mean(1) - - -class stat_ops_frame_sum_float_axis_0(object): - goal_time = 0.2 - def setup(self): - self.df = DataFrame(np.random.randn(100000, 4)) - self.dfi = DataFrame(np.random.randint(1000, size=self.df.shape)) - - def time_stat_ops_frame_sum_float_axis_0(self): - self.df.sum() - - -class stat_ops_frame_sum_float_axis_1(object): +class FrameOps(object): goal_time = 0.2 - def setup(self): - self.df = DataFrame(np.random.randn(100000, 4)) - self.dfi = DataFrame(np.random.randint(1000, size=self.df.shape)) + param_names = ['op', 'use_bottleneck', 'dtype', 'axis'] + params = [['mean', 'sum', 'median'], + [True, False], + ['float', 'int'], + [0, 1]] - def time_stat_ops_frame_sum_float_axis_1(self): - self.df.sum(1) + def setup(self, op, use_bottleneck, dtype, axis): + if dtype == 'float': + self.df = DataFrame(np.random.randn(100000, 4)) + elif dtype == 'int': + self.df = DataFrame(np.random.randint(1000, size=(100000, 4))) + if not use_bottleneck: + _set_use_bottleneck_False() -class stat_ops_frame_sum_int_axis_0(object): - goal_time = 0.2 - - def setup(self): - self.df = DataFrame(np.random.randn(100000, 4)) - self.dfi = DataFrame(np.random.randint(1000, size=self.df.shape)) - - def time_stat_ops_frame_sum_int_axis_0(self): - self.dfi.sum() - - -class stat_ops_frame_sum_int_axis_1(object): - goal_time = 0.2 - - def setup(self): - self.df = DataFrame(np.random.randn(100000, 4)) - self.dfi = DataFrame(np.random.randint(1000, size=self.df.shape)) + self.func = getattr(self.df, op) - def time_stat_ops_frame_sum_int_axis_1(self): - self.dfi.sum(1) + def time_op(self, op, use_bottleneck, dtype, axis): + self.func(axis=axis) class stat_ops_level_frame_sum(object): From f165b90ed27487287e1c8a0a6c4e66344b2c731d Mon Sep 17 00:00:00 2001 From: XF Date: Thu, 10 Aug 2017 07:32:46 -0500 Subject: [PATCH 875/933] BUG: Rolling apply on DataFrame with Datetime index returns NaN (#17156) --- doc/source/whatsnew/v0.21.0.txt | 2 ++ pandas/_libs/window.pyx | 9 +++++---- pandas/tests/test_window.py | 20 ++++++++++++++++++++ 3 files changed, 27 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 70300a087895a..8b2c4d16f4e1a 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -350,6 +350,8 @@ Groupby/Resample/Rolling - Bug in ``.rolling(...).quantile()`` which incorrectly used different defaults than :func:`Series.quantile()` and :func:`DataFrame.quantile()` (:issue:`9413`, :issue:`16211`) - Bug in ``groupby.transform()`` that would coerce boolean dtypes back to float (:issue:`16875`) - Bug in ``Series.resample(...).apply()`` where an empty ``Series`` modified the source index and did not return the name of a ``Series`` (:issue:`14313`) +- Bug in ``.rolling(...).apply(...)`` with a ``DataFrame`` with a ``DatetimeIndex``, a ``window`` of a timedelta-convertible and ``min_periods >= 1` (:issue:`15305`) + Sparse ^^^^^^ diff --git a/pandas/_libs/window.pyx b/pandas/_libs/window.pyx index 2450eea5500cd..bdd371871b6e1 100644 --- a/pandas/_libs/window.pyx +++ b/pandas/_libs/window.pyx @@ -1428,15 +1428,16 @@ def roll_generic(ndarray[float64_t, cast=True] input, if n == 0: return input + counts = roll_sum(np.concatenate([np.isfinite(input).astype(float), + np.array([0.] * offset)]), + win, minp, index, closed)[offset:] + start, end, N, win, minp, is_variable = get_window_indexer(input, win, minp, index, closed, floor=0) - output = np.empty(N, dtype=float) - counts = roll_sum(np.concatenate([np.isfinite(input).astype(float), - np.array([0.] * offset)]), - win, minp, index, closed)[offset:] + output = np.empty(N, dtype=float) if is_variable: diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index 5ab33bd6cc5e1..d94e34c41786b 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -423,6 +423,26 @@ def test_constructor_with_timedelta_window(self): expected = df.rolling('3D').sum() tm.assert_frame_equal(result, expected) + @pytest.mark.parametrize( + 'window', [timedelta(days=3), pd.Timedelta(days=3), '3D']) + def test_constructor_with_timedelta_window_and_minperiods(self, window): + # GH 15305 + n = 10 + df = pd.DataFrame({'value': np.arange(n)}, + index=pd.date_range('2017-08-08', + periods=n, + freq="D")) + expected = pd.DataFrame({'value': np.append([np.NaN, 1.], + np.arange(3., 27., 3))}, + index=pd.date_range('2017-08-08', + periods=n, + freq="D")) + result_roll_sum = df.rolling(window=window, min_periods=2).sum() + result_roll_generic = df.rolling(window=window, + min_periods=2).apply(sum) + tm.assert_frame_equal(result_roll_sum, expected) + tm.assert_frame_equal(result_roll_generic, expected) + def test_numpy_compat(self): # see gh-12811 r = rwindow.Rolling(Series([2, 4, 6]), window=2) From a2c454373b8cd2334b93a68d52104e9cbcdd3721 Mon Sep 17 00:00:00 2001 From: Daniel Himmelstein Date: Thu, 10 Aug 2017 16:43:37 -0400 Subject: [PATCH 876/933] CLN: Remove import exception handling (#17218) Imports should succeed on all versions of Python that pandas supports. --- pandas/tests/io/parser/compression.py | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/pandas/tests/io/parser/compression.py b/pandas/tests/io/parser/compression.py index 55c0506acb132..797c12139656d 100644 --- a/pandas/tests/io/parser/compression.py +++ b/pandas/tests/io/parser/compression.py @@ -13,10 +13,7 @@ class CompressionTests(object): def test_zip(self): - try: - import zipfile - except ImportError: - pytest.skip('need zipfile to run') + import zipfile with open(self.csv1, 'rb') as data_file: data = data_file.read() @@ -65,10 +62,7 @@ def test_zip(self): f, compression='zip') def test_gzip(self): - try: - import gzip - except ImportError: - pytest.skip('need gzip to run') + import gzip with open(self.csv1, 'rb') as data_file: data = data_file.read() @@ -94,10 +88,7 @@ def test_gzip(self): tm.assert_frame_equal(result, expected) def test_bz2(self): - try: - import bz2 - except ImportError: - pytest.skip('need bz2 to run') + import bz2 with open(self.csv1, 'rb') as data_file: data = data_file.read() From d59a7b5c4ab842399d79ffac120e9a46b4c0f8fa Mon Sep 17 00:00:00 2001 From: gfyoung Date: Thu, 10 Aug 2017 17:13:02 -0700 Subject: [PATCH 877/933] MAINT: Remove extra the's in deprecation messages (#17222) --- pandas/json.py | 2 +- pandas/parser.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/json.py b/pandas/json.py index 0b87aa22394b9..16d6580c87951 100644 --- a/pandas/json.py +++ b/pandas/json.py @@ -3,5 +3,5 @@ import warnings warnings.warn("The pandas.json module is deprecated and will be " "removed in a future version. Please import from " - "the pandas.io.json instead", FutureWarning, stacklevel=2) + "pandas.io.json instead", FutureWarning, stacklevel=2) from pandas._libs.json import dumps, loads diff --git a/pandas/parser.py b/pandas/parser.py index c0c3bf3179a2d..f43a408c943d0 100644 --- a/pandas/parser.py +++ b/pandas/parser.py @@ -3,6 +3,6 @@ import warnings warnings.warn("The pandas.parser module is deprecated and will be " "removed in a future version. Please import from " - "the pandas.io.parser instead", FutureWarning, stacklevel=2) + "pandas.io.parser instead", FutureWarning, stacklevel=2) from pandas._libs.parsers import na_values from pandas.io.common import CParserError From 989babdec1f11edd208d94eaf5806f931ecb8fc9 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Thu, 10 Aug 2017 17:18:57 -0700 Subject: [PATCH 878/933] DOC: Patch docs in _decorators.py --- pandas/util/_decorators.py | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/pandas/util/_decorators.py b/pandas/util/_decorators.py index e406698fafe63..f346995c0a1a4 100644 --- a/pandas/util/_decorators.py +++ b/pandas/util/_decorators.py @@ -9,8 +9,7 @@ def deprecate(name, alternative, alt_name=None, klass=None, stacklevel=2): """ - - Return a new function that emits a deprecation warning on use + Return a new function that emits a deprecation warning on use. Parameters ---------- @@ -22,8 +21,8 @@ def deprecate(name, alternative, alt_name=None, klass=None, Name to use in preference of alternative.__name__ klass : Warning, default FutureWarning stacklevel : int, default 2 - """ + alt_name = alt_name or alternative.__name__ klass = klass or FutureWarning @@ -35,7 +34,8 @@ def wrapper(*args, **kwargs): def deprecate_kwarg(old_arg_name, new_arg_name, mapping=None, stacklevel=2): - """Decorator to deprecate a keyword argument of a function + """ + Decorator to deprecate a keyword argument of a function. Parameters ---------- @@ -72,8 +72,8 @@ def deprecate_kwarg(old_arg_name, new_arg_name, mapping=None, stacklevel=2): FutureWarning: old='yes' is deprecated, use new=True instead warnings.warn(msg, FutureWarning) yes! - """ + if mapping is not None and not hasattr(mapping, 'get') and \ not callable(mapping): raise TypeError("mapping from old to new argument values " @@ -155,7 +155,12 @@ def __call__(self, func): return func def update(self, *args, **kwargs): - "Assume self.params is a dict and update it with supplied args" + """ + Update self.params with supplied args. + + If called, we assume self.params is a dict. + """ + self.params.update(*args, **kwargs) @classmethod @@ -215,16 +220,16 @@ def indent(text, indents=1): def make_signature(func): """ - Returns a string repr of the arg list of a func call, with any defaults + Returns a string repr of the arg list of a func call, with any defaults. Examples -------- - >>> def f(a,b,c=2) : >>> return a*b*c >>> print(_make_signature(f)) a,b,c=2 """ + spec = signature(func) if spec.defaults is None: n_wo_defaults = len(spec.args) @@ -244,8 +249,8 @@ def make_signature(func): class docstring_wrapper(object): """ - decorator to wrap a function, - provide a dynamically evaluated doc-string + Decorator to wrap a function and provide + a dynamically evaluated doc-string. Parameters ---------- From 236241465a0c10376d032da3c02a381f2b927246 Mon Sep 17 00:00:00 2001 From: jschendel Date: Fri, 11 Aug 2017 04:22:46 -0600 Subject: [PATCH 879/933] CLN: replace %s syntax with .format in pandas.util (#17224) --- pandas/util/_decorators.py | 26 +++-- pandas/util/_print_versions.py | 25 ++-- pandas/util/_validators.py | 6 +- pandas/util/testing.py | 206 ++++++++++++++++++--------------- 4 files changed, 141 insertions(+), 122 deletions(-) diff --git a/pandas/util/_decorators.py b/pandas/util/_decorators.py index f346995c0a1a4..bb7ffe45c689b 100644 --- a/pandas/util/_decorators.py +++ b/pandas/util/_decorators.py @@ -27,8 +27,9 @@ def deprecate(name, alternative, alt_name=None, klass=None, klass = klass or FutureWarning def wrapper(*args, **kwargs): - warnings.warn("%s is deprecated. Use %s instead" % (name, alt_name), - klass, stacklevel=stacklevel) + msg = "{name} is deprecated. Use {alt_name} instead".format( + name=name, alt_name=alt_name) + warnings.warn(msg, klass, stacklevel=stacklevel) return alternative(*args, **kwargs) return wrapper @@ -90,19 +91,24 @@ def wrapper(*args, **kwargs): old_arg_value) else: new_arg_value = mapping(old_arg_value) - msg = "the %s=%r keyword is deprecated, " \ - "use %s=%r instead" % \ - (old_arg_name, old_arg_value, - new_arg_name, new_arg_value) + msg = ("the {old_name}={old_val!r} keyword is deprecated, " + "use {new_name}={new_val!r} instead" + ).format(old_name=old_arg_name, + old_val=old_arg_value, + new_name=new_arg_name, + new_val=new_arg_value) else: new_arg_value = old_arg_value - msg = "the '%s' keyword is deprecated, " \ - "use '%s' instead" % (old_arg_name, new_arg_name) + msg = ("the '{old_name}' keyword is deprecated, " + "use '{new_name}' instead" + ).format(old_name=old_arg_name, + new_name=new_arg_name) warnings.warn(msg, FutureWarning, stacklevel=stacklevel) if kwargs.get(new_arg_name, None) is not None: - msg = ("Can only specify '%s' or '%s', not both" % - (old_arg_name, new_arg_name)) + msg = ("Can only specify '{old_name}' or '{new_name}', " + "not both").format(old_name=old_arg_name, + new_name=new_arg_name) raise TypeError(msg) else: kwargs[new_arg_name] = new_arg_value diff --git a/pandas/util/_print_versions.py b/pandas/util/_print_versions.py index 9ecd4b10365c8..83c1433bf5c39 100644 --- a/pandas/util/_print_versions.py +++ b/pandas/util/_print_versions.py @@ -38,18 +38,17 @@ def get_sys_info(): (sysname, nodename, release, version, machine, processor) = platform.uname() blob.extend([ - ("python", "%d.%d.%d.%s.%s" % sys.version_info[:]), + ("python", '.'.join(map(str, sys.version_info))), ("python-bits", struct.calcsize("P") * 8), - ("OS", "%s" % (sysname)), - ("OS-release", "%s" % (release)), - # ("Version", "%s" % (version)), - ("machine", "%s" % (machine)), - ("processor", "%s" % (processor)), - ("byteorder", "%s" % sys.byteorder), - ("LC_ALL", "%s" % os.environ.get('LC_ALL', "None")), - ("LANG", "%s" % os.environ.get('LANG', "None")), - ("LOCALE", "%s.%s" % locale.getlocale()), - + ("OS", "{sysname}".format(sysname=sysname)), + ("OS-release", "{release}".format(release=release)), + # ("Version", "{version}".format(version=version)), + ("machine", "{machine}".format(machine=machine)), + ("processor", "{processor}".format(processor=processor)), + ("byteorder", "{byteorder}".format(byteorder=sys.byteorder)), + ("LC_ALL", "{lc}".format(lc=os.environ.get('LC_ALL', "None"))), + ("LANG", "{lang}".format(lang=os.environ.get('LANG', "None"))), + ("LOCALE", '.'.join(map(str, locale.getlocale()))), ]) except: pass @@ -131,11 +130,11 @@ def show_versions(as_json=False): print("------------------") for k, stat in sys_info: - print("%s: %s" % (k, stat)) + print("{k}: {stat}".format(k=k, stat=stat)) print("") for k, stat in deps_blob: - print("%s: %s" % (k, stat)) + print("{k}: {stat}".format(k=k, stat=stat)) def main(): diff --git a/pandas/util/_validators.py b/pandas/util/_validators.py index 6b19904f4a665..2661e4a98aedf 100644 --- a/pandas/util/_validators.py +++ b/pandas/util/_validators.py @@ -220,7 +220,7 @@ def validate_args_and_kwargs(fname, args, kwargs, def validate_bool_kwarg(value, arg_name): """ Ensures that argument passed in arg_name is of type bool. """ if not (is_bool(value) or value is None): - raise ValueError('For argument "%s" expected type bool, ' - 'received type %s.' % - (arg_name, type(value).__name__)) + raise ValueError('For argument "{arg}" expected type bool, received ' + 'type {typ}.'.format(arg=arg_name, + typ=type(value).__name__)) return value diff --git a/pandas/util/testing.py b/pandas/util/testing.py index d6ba9561340cc..a000e189dfaa9 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -104,7 +104,7 @@ def round_trip_pickle(obj, path=None): """ if path is None: - path = u('__%s__.pickle' % rands(10)) + path = u('__{random_bytes}__.pickle'.format(random_bytes=rands(10))) with ensure_clean(path) as path: pd.to_pickle(obj, path) return pd.read_pickle(path) @@ -244,13 +244,15 @@ def _check_isinstance(left, right, cls): AssertionError : Either `left` or `right` is not an instance of `cls`. """ - err_msg = "{0} Expected type {1}, found {2} instead" + err_msg = "{name} Expected type {exp_type}, found {act_type} instead" cls_name = cls.__name__ if not isinstance(left, cls): - raise AssertionError(err_msg.format(cls_name, cls, type(left))) + raise AssertionError(err_msg.format(name=cls_name, exp_type=cls, + act_type=type(left))) if not isinstance(right, cls): - raise AssertionError(err_msg.format(cls_name, cls, type(right))) + raise AssertionError(err_msg.format(name=cls_name, exp_type=cls, + act_type=type(right))) def assert_dict_equal(left, right, compare_keys=True): @@ -371,7 +373,7 @@ def _skip_if_no_xarray(): if v < LooseVersion('0.7.0'): import pytest - pytest.skip("xarray not version is too low: {0}".format(v)) + pytest.skip("xarray version is too low: {version}".format(version=v)) def _skip_if_windows_python_3(): @@ -436,7 +438,7 @@ def _skip_if_has_locale(): lang, _ = locale.getlocale() if lang is not None: import pytest - pytest.skip("Specific locale is set {0}".format(lang)) + pytest.skip("Specific locale is set {lang}".format(lang=lang)) def _skip_if_not_us_locale(): @@ -444,7 +446,7 @@ def _skip_if_not_us_locale(): lang, _ = locale.getlocale() if lang != 'en_US': import pytest - pytest.skip("Specific locale is set {0}".format(lang)) + pytest.skip("Specific locale is set {lang}".format(lang=lang)) def _skip_if_no_mock(): @@ -505,8 +507,8 @@ def _default_locale_getter(): try: raw_locales = check_output(['locale -a'], shell=True) except subprocess.CalledProcessError as e: - raise type(e)("%s, the 'locale -a' command cannot be found on your " - "system" % e) + raise type(e)("{exception}, the 'locale -a' command cannot be found " + "on your system".format(exception=e)) return raw_locales @@ -563,7 +565,8 @@ def get_locales(prefix=None, normalize=True, if prefix is None: return _valid_locales(out_locales, normalize) - found = re.compile('%s.*' % prefix).findall('\n'.join(out_locales)) + found = re.compile('{prefix}.*'.format(prefix=prefix)) \ + .findall('\n'.join(out_locales)) return _valid_locales(found, normalize) @@ -818,13 +821,13 @@ def ensure_clean(filename=None, return_filelike=False): try: os.close(fd) except Exception as e: - print("Couldn't close file descriptor: %d (file: %s)" % - (fd, filename)) + print("Couldn't close file descriptor: {fdesc} (file: {fname})" + .format(fdesc=fd, fname=filename)) try: if os.path.exists(filename): os.remove(filename) except Exception as e: - print("Exception on removing file: %s" % e) + print("Exception on removing file: {error}".format(error=e)) def get_data_path(f=''): @@ -900,15 +903,17 @@ def _get_ilevel_values(index, level): # level comparison if left.nlevels != right.nlevels: - raise_assert_detail(obj, '{0} levels are different'.format(obj), - '{0}, {1}'.format(left.nlevels, left), - '{0}, {1}'.format(right.nlevels, right)) + msg1 = '{obj} levels are different'.format(obj=obj) + msg2 = '{nlevels}, {left}'.format(nlevels=left.nlevels, left=left) + msg3 = '{nlevels}, {right}'.format(nlevels=right.nlevels, right=right) + raise_assert_detail(obj, msg1, msg2, msg3) # length comparison if len(left) != len(right): - raise_assert_detail(obj, '{0} length are different'.format(obj), - '{0}, {1}'.format(len(left), left), - '{0}, {1}'.format(len(right), right)) + msg1 = '{obj} length are different'.format(obj=obj) + msg2 = '{length}, {left}'.format(length=len(left), left=left) + msg3 = '{length}, {right}'.format(length=len(right), right=right) + raise_assert_detail(obj, msg1, msg2, msg3) # MultiIndex special comparison for little-friendly error messages if left.nlevels > 1: @@ -917,7 +922,7 @@ def _get_ilevel_values(index, level): llevel = _get_ilevel_values(left, level) rlevel = _get_ilevel_values(right, level) - lobj = 'MultiIndex level [{0}]'.format(level) + lobj = 'MultiIndex level [{level}]'.format(level=level) assert_index_equal(llevel, rlevel, exact=exact, check_names=check_names, check_less_precise=check_less_precise, @@ -929,8 +934,8 @@ def _get_ilevel_values(index, level): if not left.equals(right): diff = np.sum((left.values != right.values) .astype(int)) * 100.0 / len(left) - msg = '{0} values are different ({1} %)'\ - .format(obj, np.round(diff, 5)) + msg = '{obj} values are different ({pct} %)'.format( + obj=obj, pct=np.round(diff, 5)) raise_assert_detail(obj, msg, left, right) else: _testing.assert_almost_equal(left.values, right.values, @@ -950,7 +955,7 @@ def _get_ilevel_values(index, level): if check_categorical: if is_categorical_dtype(left) or is_categorical_dtype(right): assert_categorical_equal(left.values, right.values, - obj='{0} category'.format(obj)) + obj='{obj} category'.format(obj=obj)) def assert_class_equal(left, right, exact=True, obj='Input'): @@ -971,12 +976,12 @@ def repr_class(x): # allow equivalence of Int64Index/RangeIndex types = set([type(left).__name__, type(right).__name__]) if len(types - set(['Int64Index', 'RangeIndex'])): - msg = '{0} classes are not equivalent'.format(obj) + msg = '{obj} classes are not equivalent'.format(obj=obj) raise_assert_detail(obj, msg, repr_class(left), repr_class(right)) elif exact: if type(left) != type(right): - msg = '{0} classes are different'.format(obj) + msg = '{obj} classes are different'.format(obj=obj) raise_assert_detail(obj, msg, repr_class(left), repr_class(right)) @@ -1016,23 +1021,22 @@ def assert_attr_equal(attr, left, right, obj='Attributes'): if result: return True else: - raise_assert_detail(obj, 'Attribute "{0}" are different'.format(attr), - left_attr, right_attr) + msg = 'Attribute "{attr}" are different'.format(attr=attr) + raise_assert_detail(obj, msg, left_attr, right_attr) def assert_is_valid_plot_return_object(objs): import matplotlib.pyplot as plt if isinstance(objs, (pd.Series, np.ndarray)): for el in objs.ravel(): - msg = ('one of \'objs\' is not a matplotlib Axes instance, ' - 'type encountered {0!r}') - assert isinstance(el, (plt.Axes, dict)), msg.format( - el.__class__.__name__) + msg = ('one of \'objs\' is not a matplotlib Axes instance, type ' + 'encountered {name!r}').format(name=el.__class__.__name__) + assert isinstance(el, (plt.Axes, dict)), msg else: assert isinstance(objs, (plt.Artist, tuple, dict)), \ ('objs is neither an ndarray of Artist instances nor a ' - 'single Artist instance, tuple, or dict, "objs" is a {0!r} ' - ''.format(objs.__class__.__name__)) + 'single Artist instance, tuple, or dict, "objs" is a {name!r}' + ).format(name=objs.__class__.__name__) def isiterable(obj): @@ -1069,17 +1073,17 @@ def assert_categorical_equal(left, right, check_dtype=True, if check_category_order: assert_index_equal(left.categories, right.categories, - obj='{0}.categories'.format(obj)) + obj='{obj}.categories'.format(obj=obj)) assert_numpy_array_equal(left.codes, right.codes, check_dtype=check_dtype, - obj='{0}.codes'.format(obj)) + obj='{obj}.codes'.format(obj=obj)) else: assert_index_equal(left.categories.sort_values(), right.categories.sort_values(), - obj='{0}.categories'.format(obj)) + obj='{obj}.categories'.format(obj=obj)) assert_index_equal(left.categories.take(left.codes), right.categories.take(right.codes), - obj='{0}.values'.format(obj)) + obj='{obj}.values'.format(obj=obj)) assert_attr_equal('ordered', left, right, obj=obj) @@ -1090,14 +1094,14 @@ def raise_assert_detail(obj, message, left, right, diff=None): if isinstance(right, np.ndarray): right = pprint_thing(right) - msg = """{0} are different + msg = """{obj} are different -{1} -[left]: {2} -[right]: {3}""".format(obj, message, left, right) +{message} +[left]: {left} +[right]: {right}""".format(obj=obj, message=message, left=left, right=right) if diff is not None: - msg = msg + "\n[diff]: {diff}".format(diff=diff) + msg += "\n[diff]: {diff}".format(diff=diff) raise AssertionError(msg) @@ -1138,18 +1142,20 @@ def _get_base(obj): if check_same == 'same': if left_base is not right_base: - msg = "%r is not %r" % (left_base, right_base) + msg = "{left!r} is not {right!r}".format( + left=left_base, right=right_base) raise AssertionError(msg) elif check_same == 'copy': if left_base is right_base: - msg = "%r is %r" % (left_base, right_base) + msg = "{left!r} is {right!r}".format( + left=left_base, right=right_base) raise AssertionError(msg) def _raise(left, right, err_msg): if err_msg is None: if left.shape != right.shape: - raise_assert_detail(obj, '{0} shapes are different' - .format(obj), left.shape, right.shape) + raise_assert_detail(obj, '{obj} shapes are different' + .format(obj=obj), left.shape, right.shape) diff = 0 for l, r in zip(left, right): @@ -1158,8 +1164,8 @@ def _raise(left, right, err_msg): diff += 1 diff = diff * 100.0 / left.size - msg = '{0} values are different ({1} %)'\ - .format(obj, np.round(diff, 5)) + msg = '{obj} values are different ({pct} %)'.format( + obj=obj, pct=np.round(diff, 5)) raise_assert_detail(obj, msg, left, right) raise AssertionError(err_msg) @@ -1226,9 +1232,9 @@ def assert_series_equal(left, right, check_dtype=True, # length comparison if len(left) != len(right): - raise_assert_detail(obj, 'Series length are different', - '{0}, {1}'.format(len(left), left.index), - '{0}, {1}'.format(len(right), right.index)) + msg1 = '{len}, {left}'.format(len=len(left), left=left.index) + msg2 = '{len}, {right}'.format(len=len(right), right=right.index) + raise_assert_detail(obj, 'Series length are different', msg1, msg2) # index comparison assert_index_equal(left.index, right.index, exact=check_index_type, @@ -1236,7 +1242,7 @@ def assert_series_equal(left, right, check_dtype=True, check_less_precise=check_less_precise, check_exact=check_exact, check_categorical=check_categorical, - obj='{0}.index'.format(obj)) + obj='{obj}.index'.format(obj=obj)) if check_dtype: assert_attr_equal('dtype', left, right) @@ -1244,7 +1250,7 @@ def assert_series_equal(left, right, check_dtype=True, if check_exact: assert_numpy_array_equal(left.get_values(), right.get_values(), check_dtype=check_dtype, - obj='{0}'.format(obj),) + obj='{obj}'.format(obj=obj),) elif check_datetimelike_compat: # we want to check only if we have compat dtypes # e.g. integer and M|m are NOT compat, but we can simply check @@ -1257,8 +1263,9 @@ def assert_series_equal(left, right, check_dtype=True, # datetimelike may have different objects (e.g. datetime.datetime # vs Timestamp) but will compare equal if not Index(left.values).equals(Index(right.values)): - msg = '[datetimelike_compat=True] {0} is not equal to {1}.' - raise AssertionError(msg.format(left.values, right.values)) + msg = ('[datetimelike_compat=True] {left} is not equal to ' + '{right}.').format(left=left.values, right=right.values) + raise AssertionError(msg) else: assert_numpy_array_equal(left.get_values(), right.get_values(), check_dtype=check_dtype) @@ -1266,13 +1273,13 @@ def assert_series_equal(left, right, check_dtype=True, # TODO: big hack here l = pd.IntervalIndex(left) r = pd.IntervalIndex(right) - assert_index_equal(l, r, obj='{0}.index'.format(obj)) + assert_index_equal(l, r, obj='{obj}.index'.format(obj=obj)) else: _testing.assert_almost_equal(left.get_values(), right.get_values(), check_less_precise=check_less_precise, check_dtype=check_dtype, - obj='{0}'.format(obj)) + obj='{obj}'.format(obj=obj)) # metadata comparison if check_names: @@ -1281,7 +1288,7 @@ def assert_series_equal(left, right, check_dtype=True, if check_categorical: if is_categorical_dtype(left) or is_categorical_dtype(right): assert_categorical_equal(left.values, right.values, - obj='{0} category'.format(obj)) + obj='{obj} category'.format(obj=obj)) # This could be refactored to use the NDFrame.equals method @@ -1348,8 +1355,8 @@ def assert_frame_equal(left, right, check_dtype=True, if left.shape != right.shape: raise_assert_detail(obj, 'DataFrame shape mismatch', - '({0}, {1})'.format(*left.shape), - '({0}, {1})'.format(*right.shape)) + '{shape!r}'.format(shape=left.shape), + '{shape!r}'.format(shape=right.shape)) if check_like: left, right = left.reindex_like(right), right @@ -1360,7 +1367,7 @@ def assert_frame_equal(left, right, check_dtype=True, check_less_precise=check_less_precise, check_exact=check_exact, check_categorical=check_categorical, - obj='{0}.index'.format(obj)) + obj='{obj}.index'.format(obj=obj)) # column comparison assert_index_equal(left.columns, right.columns, exact=check_column_type, @@ -1368,7 +1375,7 @@ def assert_frame_equal(left, right, check_dtype=True, check_less_precise=check_less_precise, check_exact=check_exact, check_categorical=check_categorical, - obj='{0}.columns'.format(obj)) + obj='{obj}.columns'.format(obj=obj)) # compare by blocks if by_blocks: @@ -1393,7 +1400,7 @@ def assert_frame_equal(left, right, check_dtype=True, check_exact=check_exact, check_names=check_names, check_datetimelike_compat=check_datetimelike_compat, check_categorical=check_categorical, - obj='DataFrame.iloc[:, {0}]'.format(i)) + obj='DataFrame.iloc[:, {idx}]'.format(idx=i)) def assert_panelnd_equal(left, right, @@ -1448,13 +1455,15 @@ def assert_panelnd_equal(left, right, # can potentially be slow for i, item in enumerate(left._get_axis(0)): - assert item in right, "non-matching item (right) '%s'" % item + msg = "non-matching item (right) '{item}'".format(item=item) + assert item in right, msg litem = left.iloc[i] ritem = right.iloc[i] assert_func(litem, ritem, check_less_precise=check_less_precise) for i, item in enumerate(right._get_axis(0)): - assert item in left, "non-matching item (left) '%s'" % item + msg = "non-matching item (left) '{item}'".format(item=item) + assert item in left, msg # TODO: strangely check_names fails in py3 ? @@ -1526,7 +1535,7 @@ def assert_sp_series_equal(left, right, check_dtype=True, exact_indices=True, assert_class_equal(left, right, obj=obj) assert_index_equal(left.index, right.index, - obj='{0}.index'.format(obj)) + obj='{obj}.index'.format(obj=obj)) assert_sp_array_equal(left.block.values, right.block.values) @@ -1563,9 +1572,9 @@ def assert_sp_frame_equal(left, right, check_dtype=True, exact_indices=True, assert_class_equal(left, right, obj=obj) assert_index_equal(left.index, right.index, - obj='{0}.index'.format(obj)) + obj='{obj}.index'.format(obj=obj)) assert_index_equal(left.columns, right.columns, - obj='{0}.columns'.format(obj)) + obj='{obj}.columns'.format(obj=obj)) for col, series in compat.iteritems(left): assert (col in right) @@ -1599,7 +1608,7 @@ def assert_sp_list_equal(left, right): def assert_contains_all(iterable, dic): for k in iterable: - assert k in dic, "Did not contain item: '%r'" % k + assert k in dic, "Did not contain item: '{key!r}'".format(key=k) def assert_copy(iter1, iter2, **eql_kwargs): @@ -1613,10 +1622,10 @@ def assert_copy(iter1, iter2, **eql_kwargs): """ for elem1, elem2 in zip(iter1, iter2): assert_almost_equal(elem1, elem2, **eql_kwargs) - assert elem1 is not elem2, ("Expected object %r and " - "object %r to be different " - "objects, were same." - % (type(elem1), type(elem2))) + msg = ("Expected object {obj1!r} and object {obj2!r} to be " + "different objects, but they were the same object." + ).format(obj1=type(elem1), obj2=type(elem2)) + assert elem1 is not elem2, msg def getCols(k): @@ -1870,8 +1879,9 @@ def makeCustomIndex(nentries, nlevels, prefix='#', names=False, ndupe_l=None, idx.name = names[0] return idx elif idx_type is not None: - raise ValueError('"%s" is not a legal value for `idx_type`, use ' - '"i"/"f"/"s"/"u"/"dt/"p"/"td".' % idx_type) + raise ValueError('"{idx_type}" is not a legal value for `idx_type`, ' + 'use "i"/"f"/"s"/"u"/"dt/"p"/"td".' + .format(idx_type=idx_type)) if len(ndupe_l) < nlevels: ndupe_l.extend([1] * (nlevels - len(ndupe_l))) @@ -1890,7 +1900,7 @@ def keyfunc(x): div_factor = nentries // ndupe_l[i] + 1 cnt = Counter() for j in range(div_factor): - label = prefix + '_l%d_g' % i + str(j) + label = '{prefix}_l{i}_g{j}'.format(prefix=prefix, i=i, j=j) cnt[label] = ndupe_l[i] # cute Counter trick result = list(sorted(cnt.elements(), key=keyfunc))[:nentries] @@ -1983,7 +1993,7 @@ def makeCustomDataframe(nrows, ncols, c_idx_names=True, r_idx_names=True, # by default, generate data based on location if data_gen_f is None: - data_gen_f = lambda r, c: "R%dC%d" % (r, c) + data_gen_f = lambda r, c: "R{rows}C{cols}".format(rows=r, cols=c) data = [[data_gen_f(r, c) for c in range(ncols)] for r in range(nrows)] @@ -2112,13 +2122,13 @@ def skip_if_no_package(pkg_name, min_version=None, max_version=None, import pytest if app: - msg = '%s requires %s' % (app, pkg_name) + msg = '{app} requires {pkg_name}'.format(app=app, pkg_name=pkg_name) else: - msg = 'module requires %s' % pkg_name + msg = 'module requires {pkg_name}'.format(pkg_name=pkg_name) if min_version: - msg += ' with version >= %s' % (min_version,) + msg += ' with version >= {min_version}'.format(min_version=min_version) if max_version: - msg += ' with version < %s' % (max_version,) + msg += ' with version < {max_version}'.format(max_version=max_version) try: mod = __import__(pkg_name) except ImportError: @@ -2126,7 +2136,8 @@ def skip_if_no_package(pkg_name, min_version=None, max_version=None, try: have_version = mod.__version__ except AttributeError: - pytest.skip('Cannot find version for %s' % pkg_name) + pytest.skip('Cannot find version for {pkg_name}' + .format(pkg_name=pkg_name)) if min_version and checker(have_version) < checker(min_version): pytest.skip(msg) if max_version and checker(have_version) >= checker(max_version): @@ -2332,7 +2343,7 @@ def wrapper(*args, **kwargs): if errno in skip_errnos: skip("Skipping test due to known errno" - " and error %s" % e) + " and error {error}".format(error=e)) try: e_str = traceback.format_exc(e) @@ -2341,7 +2352,7 @@ def wrapper(*args, **kwargs): if any([m.lower() in e_str.lower() for m in _skip_on_messages]): skip("Skipping test because exception " - "message is known and error %s" % e) + "message is known and error {error}".format(error=e)) if not isinstance(e, error_classes): raise @@ -2350,7 +2361,7 @@ def wrapper(*args, **kwargs): raise else: skip("Skipping test due to lack of connectivity" - " and error %s" % e) + " and error {error}".format(e)) return wrapper @@ -2488,7 +2499,7 @@ def __exit__(self, exc_type, exc_value, trace_back): if not exc_type: exp_name = getattr(expected, "__name__", str(expected)) - raise AssertionError("{0} not raised.".format(exp_name)) + raise AssertionError("{name} not raised.".format(name=exp_name)) return self.exception_matches(exc_type, exc_value, trace_back) @@ -2523,8 +2534,9 @@ def exception_matches(self, exc_type, exc_value, trace_back): val = str(exc_value) if not self.regexp.search(val): - e = AssertionError('"%s" does not match "%s"' % - (self.regexp.pattern, str(val))) + msg = '"{pat}" does not match "{val}"'.format( + pat=self.regexp.pattern, val=val) + e = AssertionError(msg) raise_with_traceback(e, trace_back) return True @@ -2591,18 +2603,20 @@ def assert_produces_warning(expected_warning=Warning, filter_level="always", from inspect import getframeinfo, stack caller = getframeinfo(stack()[2][0]) msg = ("Warning not set with correct stacklevel. " - "File where warning is raised: {0} != {1}. " - "Warning message: {2}".format( - actual_warning.filename, caller.filename, - actual_warning.message)) + "File where warning is raised: {actual} != " + "{caller}. Warning message: {message}" + ).format(actual=actual_warning.filename, + caller=caller.filename, + message=actual_warning.message) assert actual_warning.filename == caller.filename, msg else: extra_warnings.append(actual_warning.category.__name__) if expected_warning: - assert saw_warning, ("Did not see expected warning of class %r." - % expected_warning.__name__) - assert not extra_warnings, ("Caused unexpected warning(s): %r." - % extra_warnings) + msg = "Did not see expected warning of class {name!r}.".format( + name=expected_warning.__name__) + assert saw_warning, msg + assert not extra_warnings, ("Caused unexpected warning(s): {extra!r}." + ).format(extra=extra_warnings) class RNGContext(object): From d11fae62ae6a9a5e712d3165c721179c31d961bd Mon Sep 17 00:00:00 2001 From: topper-123 Date: Fri, 11 Aug 2017 11:23:33 +0100 Subject: [PATCH 880/933] Add 'See also' sections (#17223) --- pandas/core/generic.py | 3 +++ pandas/core/tools/datetimes.py | 4 ++++ pandas/core/tools/numeric.py | 7 +++++++ pandas/core/tools/timedeltas.py | 5 +++++ 4 files changed, 19 insertions(+) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index bd3297f66a469..5a7f37bba91aa 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3681,6 +3681,9 @@ def astype(self, dtype, copy=True, errors='raise', **kwargs): See also -------- + pandas.to_datetime : Convert argument to datetime. + pandas.to_timedelta : Convert argument to timedelta. + pandas.to_numeric : Convert argument to a numeric type. numpy.ndarray.astype : Cast a numpy array to a specified type. """ if is_dict_like(dtype): diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index eebf78d7619eb..6ff4302937d07 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -335,6 +335,10 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False, 1 1960-01-03 2 1960-01-04 + See also + -------- + pandas.DataFrame.astype : Cast argument to a specified dtype. + pandas.to_timedelta : Convert argument to timedelta. """ from pandas.core.indexes.datetimes import DatetimeIndex diff --git a/pandas/core/tools/numeric.py b/pandas/core/tools/numeric.py index eda88a2f7e474..c584e29f682dd 100644 --- a/pandas/core/tools/numeric.py +++ b/pandas/core/tools/numeric.py @@ -84,6 +84,13 @@ def to_numeric(arg, errors='raise', downcast=None): 2 2.0 3 -3.0 dtype: float64 + + See also + -------- + pandas.DataFrame.astype : Cast argument to a specified dtype. + pandas.to_datetime : Convert argument to datetime. + pandas.to_timedelta : Convert argument to timedelta. + numpy.ndarray.astype : Cast a numpy array to a specified type. """ if downcast not in (None, 'integer', 'signed', 'unsigned', 'float'): raise ValueError('invalid downcasting method provided') diff --git a/pandas/core/tools/timedeltas.py b/pandas/core/tools/timedeltas.py index fe03f89fdb2c5..f2d99d26a87b8 100644 --- a/pandas/core/tools/timedeltas.py +++ b/pandas/core/tools/timedeltas.py @@ -61,6 +61,11 @@ def to_timedelta(arg, unit='ns', box=True, errors='raise'): >>> pd.to_timedelta(np.arange(5), unit='d') TimedeltaIndex(['0 days', '1 days', '2 days', '3 days', '4 days'], dtype='timedelta64[ns]', freq=None) + + See also + -------- + pandas.DataFrame.astype : Cast argument to a specified dtype. + pandas.to_datetime : Convert argument to datetime. """ unit = _validate_timedelta_unit(unit) From 06850a148ad880eb2fd2564cc0ad7cae8606dd90 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 11 Aug 2017 03:36:13 -0700 Subject: [PATCH 881/933] move pivot_table doc-string to DataFrame (#17174) --- pandas/core/frame.py | 86 ++++++++++++++++++++++++++++ pandas/core/reshape/pivot.py | 107 +++++++---------------------------- 2 files changed, 107 insertions(+), 86 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 94cce1b4d05b5..2c82fe4c348d5 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4154,6 +4154,92 @@ def pivot(self, index=None, columns=None, values=None): from pandas.core.reshape.reshape import pivot return pivot(self, index=index, columns=columns, values=values) + _shared_docs['pivot_table'] = """ + Create a spreadsheet-style pivot table as a DataFrame. The levels in + the pivot table will be stored in MultiIndex objects (hierarchical + indexes) on the index and columns of the result DataFrame + + Parameters + ----------%s + values : column to aggregate, optional + index : column, Grouper, array, or list of the previous + If an array is passed, it must be the same length as the data. The + list can contain any of the other types (except list). + Keys to group by on the pivot table index. If an array is passed, + it is being used as the same manner as column values. + columns : column, Grouper, array, or list of the previous + If an array is passed, it must be the same length as the data. The + list can contain any of the other types (except list). + Keys to group by on the pivot table column. If an array is passed, + it is being used as the same manner as column values. + aggfunc : function or list of functions, default numpy.mean + If list of functions passed, the resulting pivot table will have + hierarchical columns whose top level are the function names + (inferred from the function objects themselves) + fill_value : scalar, default None + Value to replace missing values with + margins : boolean, default False + Add all row / columns (e.g. for subtotal / grand totals) + dropna : boolean, default True + Do not include columns whose entries are all NaN + margins_name : string, default 'All' + Name of the row / column that will contain the totals + when margins is True. + + Examples + -------- + >>> df = pd.DataFrame({"A": ["foo", "foo", "foo", "foo", "foo", + ... "bar", "bar", "bar", "bar"], + ... "B": ["one", "one", "one", "two", "two", + ... "one", "one", "two", "two"], + ... "C": ["small", "large", "large", "small", + ... "small", "large", "small", "small", + ... "large"], + ... "D": [1, 2, 2, 3, 3, 4, 5, 6, 7]}) + >>> df + A B C D + 0 foo one small 1 + 1 foo one large 2 + 2 foo one large 2 + 3 foo two small 3 + 4 foo two small 3 + 5 bar one large 4 + 6 bar one small 5 + 7 bar two small 6 + 8 bar two large 7 + + >>> table = pivot_table(df, values='D', index=['A', 'B'], + ... columns=['C'], aggfunc=np.sum) + >>> table + ... # doctest: +NORMALIZE_WHITESPACE + C large small + A B + bar one 4.0 5.0 + two 7.0 6.0 + foo one 4.0 1.0 + two NaN 6.0 + + Returns + ------- + table : DataFrame + + See also + -------- + DataFrame.pivot : pivot without aggregation that can handle + non-numeric data + """ + + @Substitution('') + @Appender(_shared_docs['pivot_table']) + def pivot_table(self, values=None, index=None, columns=None, + aggfunc='mean', fill_value=None, margins=False, + dropna=True, margins_name='All'): + from pandas.core.reshape.pivot import pivot_table + return pivot_table(self, values=values, index=index, columns=columns, + aggfunc=aggfunc, fill_value=fill_value, + margins=margins, dropna=dropna, + margins_name=margins_name) + def stack(self, level=-1, dropna=True): """ Pivot a level of the (possibly hierarchical) column labels, returning a diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index d4ea49c130add..e61adf3aac30a 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -2,95 +2,30 @@ from pandas.core.dtypes.common import is_list_like, is_scalar +from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries + from pandas.core.reshape.concat import concat -from pandas import Series, DataFrame, MultiIndex, Index +from pandas.core.series import Series from pandas.core.groupby import Grouper from pandas.core.reshape.util import cartesian_product -from pandas.core.index import _get_combined_index +from pandas.core.index import Index, _get_combined_index from pandas.compat import range, lrange, zip from pandas import compat import pandas.core.common as com +from pandas.util._decorators import Appender, Substitution + +from pandas.core.frame import _shared_docs +# Note: We need to make sure `frame` is imported before `pivot`, otherwise +# _shared_docs['pivot_table'] will not yet exist. TODO: Fix this dependency + import numpy as np +@Substitution('\ndata : DataFrame') +@Appender(_shared_docs['pivot_table'], indents=1) def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean', fill_value=None, margins=False, dropna=True, margins_name='All'): - """ - Create a spreadsheet-style pivot table as a DataFrame. The levels in the - pivot table will be stored in MultiIndex objects (hierarchical indexes) on - the index and columns of the result DataFrame - - Parameters - ---------- - data : DataFrame - values : column to aggregate, optional - index : column, Grouper, array, or list of the previous - If an array is passed, it must be the same length as the data. The list - can contain any of the other types (except list). - Keys to group by on the pivot table index. If an array is passed, it - is being used as the same manner as column values. - columns : column, Grouper, array, or list of the previous - If an array is passed, it must be the same length as the data. The list - can contain any of the other types (except list). - Keys to group by on the pivot table column. If an array is passed, it - is being used as the same manner as column values. - aggfunc : function or list of functions, default numpy.mean - If list of functions passed, the resulting pivot table will have - hierarchical columns whose top level are the function names (inferred - from the function objects themselves) - fill_value : scalar, default None - Value to replace missing values with - margins : boolean, default False - Add all row / columns (e.g. for subtotal / grand totals) - dropna : boolean, default True - Do not include columns whose entries are all NaN - margins_name : string, default 'All' - Name of the row / column that will contain the totals - when margins is True. - - Examples - -------- - >>> df = pd.DataFrame({"A": ["foo", "foo", "foo", "foo", "foo", - ... "bar", "bar", "bar", "bar"], - ... "B": ["one", "one", "one", "two", "two", - ... "one", "one", "two", "two"], - ... "C": ["small", "large", "large", "small", - ... "small", "large", "small", "small", - ... "large"], - ... "D": [1, 2, 2, 3, 3, 4, 5, 6, 7]}) - >>> df - A B C D - 0 foo one small 1 - 1 foo one large 2 - 2 foo one large 2 - 3 foo two small 3 - 4 foo two small 3 - 5 bar one large 4 - 6 bar one small 5 - 7 bar two small 6 - 8 bar two large 7 - - >>> table = pivot_table(df, values='D', index=['A', 'B'], - ... columns=['C'], aggfunc=np.sum) - >>> table - ... # doctest: +NORMALIZE_WHITESPACE - C large small - A B - bar one 4.0 5.0 - two 7.0 6.0 - foo one 4.0 1.0 - two NaN 6.0 - - Returns - ------- - table : DataFrame - - See also - -------- - DataFrame.pivot : pivot without aggregation that can handle - non-numeric data - """ index = _convert_by(index) columns = _convert_by(columns) @@ -162,6 +97,7 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean', table = agged.unstack(to_unstack) if not dropna: + from pandas import MultiIndex try: m = MultiIndex.from_arrays(cartesian_product(table.index.levels), names=table.index.names) @@ -176,7 +112,7 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean', except AttributeError: pass # it's a single level or a series - if isinstance(table, DataFrame): + if isinstance(table, ABCDataFrame): table = table.sort_index(axis=1) if fill_value is not None: @@ -197,16 +133,13 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean', if len(index) == 0 and len(columns) > 0: table = table.T - # GH 15193 Makse sure empty columns are removed if dropna=True - if isinstance(table, DataFrame) and dropna: + # GH 15193 Make sure empty columns are removed if dropna=True + if isinstance(table, ABCDataFrame) and dropna: table = table.dropna(how='all', axis=1) return table -DataFrame.pivot_table = pivot_table - - def _add_margins(table, data, values, rows, cols, aggfunc, margins_name='All', fill_value=None): if not isinstance(margins_name, compat.string_types): @@ -230,7 +163,7 @@ def _add_margins(table, data, values, rows, cols, aggfunc, else: key = margins_name - if not values and isinstance(table, Series): + if not values and isinstance(table, ABCSeries): # If there are no values and the table is a series, then there is only # one column in the data. Compute grand margin and return it. return table.append(Series({key: grand_margin[margins_name]})) @@ -257,6 +190,7 @@ def _add_margins(table, data, values, rows, cols, aggfunc, else: row_margin[k] = grand_margin[k[0]] + from pandas import DataFrame margin_dummy = DataFrame(row_margin, columns=[key]).T row_names = result.index.names @@ -402,7 +336,7 @@ def _convert_by(by): if by is None: by = [] elif (is_scalar(by) or - isinstance(by, (np.ndarray, Index, Series, Grouper)) or + isinstance(by, (np.ndarray, Index, ABCSeries, Grouper)) or hasattr(by, '__call__')): by = [by] else: @@ -523,6 +457,7 @@ def crosstab(index, columns, values=None, rownames=None, colnames=None, if values is not None and aggfunc is None: raise ValueError("values cannot be used without an aggfunc.") + from pandas import DataFrame df = DataFrame(data, index=common_idx) if values is None: df['__dummy__'] = 0 @@ -620,7 +555,7 @@ def _get_names(arrs, names, prefix='row'): if names is None: names = [] for i, arr in enumerate(arrs): - if isinstance(arr, Series) and arr.name is not None: + if isinstance(arr, ABCSeries) and arr.name is not None: names.append(arr.name) else: names.append('%s_%d' % (prefix, i)) From a4a566531685eff6ea001bf6bd60f96791e8d076 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 12 Aug 2017 10:30:26 -0700 Subject: [PATCH 882/933] Remove import of pandas as pd in core.window (#17233) --- pandas/core/window.py | 55 ++++++++++++++++++++----------------------- 1 file changed, 25 insertions(+), 30 deletions(-) diff --git a/pandas/core/window.py b/pandas/core/window.py index 5866f1e8a76bd..4bd959f52673c 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -17,7 +17,8 @@ ABCDataFrame, ABCDatetimeIndex, ABCTimedeltaIndex, - ABCPeriodIndex) + ABCPeriodIndex, + ABCDateOffset) from pandas.core.dtypes.common import ( is_integer, is_bool, @@ -28,13 +29,12 @@ is_list_like, _ensure_float64, is_scalar) -import pandas as pd from pandas.core.base import (PandasObject, SelectionMixin, GroupByMixin) import pandas.core.common as com import pandas._libs.window as _window -from pandas.tseries.offsets import DateOffset + from pandas import compat from pandas.compat.numpy import function as nv from pandas.util._decorators import (Substitution, Appender, @@ -254,7 +254,8 @@ def _wrap_result(self, result, block=None, obj=None): # coerce if necessary if block is not None: if is_timedelta64_dtype(block.values.dtype): - result = pd.to_timedelta( + from pandas import to_timedelta + result = to_timedelta( result.ravel(), unit='ns').values.reshape(result.shape) if result.ndim == 1: @@ -275,7 +276,7 @@ def _wrap_results(self, results, blocks, obj): obj : conformed data (may be resampled) """ - from pandas import Series + from pandas import Series, concat from pandas.core.index import _ensure_index final = [] @@ -290,8 +291,7 @@ def _wrap_results(self, results, blocks, obj): # we want to put it back into the results # in the same location columns = self._selected_obj.columns - if self.on is not None \ - and not self._on.equals(obj.index): + if self.on is not None and not self._on.equals(obj.index): name = self._on.name final.append(Series(self._on, index=obj.index, name=name)) @@ -309,8 +309,7 @@ def _wrap_results(self, results, blocks, obj): if not len(final): return obj.astype('float64') - return pd.concat(final, axis=1).reindex(columns=columns, - copy=False) + return concat(final, axis=1).reindex(columns=columns, copy=False) def _center_window(self, result, window): """ center the result in the window """ @@ -318,10 +317,9 @@ def _center_window(self, result, window): raise ValueError("Requested axis is larger then no. of argument " "dimensions") - from pandas import Series, DataFrame offset = _offset(window, True) if offset > 0: - if isinstance(result, (Series, DataFrame)): + if isinstance(result, (ABCSeries, ABCDataFrame)): result = result.slice_shift(-offset, axis=self.axis) else: lead_indexer = [slice(None)] * result.ndim @@ -1085,7 +1083,8 @@ def _on(self): return self.obj.index elif (isinstance(self.obj, ABCDataFrame) and self.on in self.obj.columns): - return pd.Index(self.obj[self.on]) + from pandas import Index + return Index(self.obj[self.on]) else: raise ValueError("invalid on specified as {0}, " "must be a column (if DataFrame) " @@ -1096,7 +1095,7 @@ def validate(self): # we allow rolling on a datetimelike index if ((self.obj.empty or self.is_datetimelike) and - isinstance(self.window, (compat.string_types, DateOffset, + isinstance(self.window, (compat.string_types, ABCDateOffset, timedelta))): self._validate_monotonic() @@ -1871,19 +1870,19 @@ def _cov(x, y): def _flex_binary_moment(arg1, arg2, f, pairwise=False): - from pandas import Series, DataFrame - if not (isinstance(arg1, (np.ndarray, Series, DataFrame)) and - isinstance(arg2, (np.ndarray, Series, DataFrame))): + if not (isinstance(arg1, (np.ndarray, ABCSeries, ABCDataFrame)) and + isinstance(arg2, (np.ndarray, ABCSeries, ABCDataFrame))): raise TypeError("arguments to moment function must be of type " "np.ndarray/Series/DataFrame") - if (isinstance(arg1, (np.ndarray, Series)) and - isinstance(arg2, (np.ndarray, Series))): + if (isinstance(arg1, (np.ndarray, ABCSeries)) and + isinstance(arg2, (np.ndarray, ABCSeries))): X, Y = _prep_binary(arg1, arg2) return f(X, Y) - elif isinstance(arg1, DataFrame): + elif isinstance(arg1, ABCDataFrame): + from pandas import DataFrame def dataframe_from_int_dict(data, frame_template): result = DataFrame(data, index=frame_template.index) @@ -1892,7 +1891,7 @@ def dataframe_from_int_dict(data, frame_template): return result results = {} - if isinstance(arg2, DataFrame): + if isinstance(arg2, ABCDataFrame): if pairwise is False: if arg1 is arg2: # special case in order to handle duplicate column names @@ -1929,7 +1928,7 @@ def dataframe_from_int_dict(data, frame_template): # TODO: not the most efficient (perf-wise) # though not bad code-wise - from pandas import Panel, MultiIndex + from pandas import Panel, MultiIndex, concat with warnings.catch_warnings(record=True): p = Panel.from_dict(results).swapaxes('items', 'major') @@ -1939,7 +1938,7 @@ def dataframe_from_int_dict(data, frame_template): p.minor_axis = arg2.columns[p.minor_axis] if len(p.items): - result = pd.concat( + result = concat( [p.iloc[i].T for i in range(len(p.items))], keys=p.items) else: @@ -2034,8 +2033,7 @@ def _zsqrt(x): result = np.sqrt(x) mask = x < 0 - from pandas import DataFrame - if isinstance(x, DataFrame): + if isinstance(x, ABCDataFrame): if mask.values.any(): result[mask] = 0 else: @@ -2060,8 +2058,7 @@ def _prep_binary(arg1, arg2): def rolling(obj, win_type=None, **kwds): - from pandas import Series, DataFrame - if not isinstance(obj, (Series, DataFrame)): + if not isinstance(obj, (ABCSeries, ABCDataFrame)): raise TypeError('invalid type: %s' % type(obj)) if win_type is not None: @@ -2074,8 +2071,7 @@ def rolling(obj, win_type=None, **kwds): def expanding(obj, **kwds): - from pandas import Series, DataFrame - if not isinstance(obj, (Series, DataFrame)): + if not isinstance(obj, (ABCSeries, ABCDataFrame)): raise TypeError('invalid type: %s' % type(obj)) return Expanding(obj, **kwds) @@ -2085,8 +2081,7 @@ def expanding(obj, **kwds): def ewm(obj, **kwds): - from pandas import Series, DataFrame - if not isinstance(obj, (Series, DataFrame)): + if not isinstance(obj, (ABCSeries, ABCDataFrame)): raise TypeError('invalid type: %s' % type(obj)) return EWM(obj, **kwds) From 3ea2993a7dd3f477e2be6911d39d647b0e74d712 Mon Sep 17 00:00:00 2001 From: kernc Date: Sat, 12 Aug 2017 19:33:03 +0200 Subject: [PATCH 883/933] TST: Move more frame tests to SharedWithSparse (#17227) --- pandas/tests/frame/test_api.py | 132 +++++++++++++++++------------- pandas/tests/sparse/test_frame.py | 6 ++ 2 files changed, 79 insertions(+), 59 deletions(-) diff --git a/pandas/tests/frame/test_api.py b/pandas/tests/frame/test_api.py index 8c4c13b66ffa9..53a1b9525a0dd 100644 --- a/pandas/tests/frame/test_api.py +++ b/pandas/tests/frame/test_api.py @@ -28,6 +28,20 @@ class SharedWithSparse(object): + """ + A collection of tests DataFrame and SparseDataFrame can share. + + In generic tests on this class, use ``self._assert_frame_equal()`` and + ``self._assert_series_equal()`` which are implemented in sub-classes + and dispatch correctly. + """ + def _assert_frame_equal(self, left, right): + """Dispatch to frame class dependent assertion""" + raise NotImplementedError + + def _assert_series_equal(self, left, right): + """Dispatch to series class dependent assertion""" + raise NotImplementedError def test_copy_index_name_checking(self): # don't want to be able to modify the index stored elsewhere after @@ -76,11 +90,6 @@ def test_add_prefix_suffix(self): expected = pd.Index(['{}%'.format(c) for c in self.frame.columns]) tm.assert_index_equal(with_pct_suffix.columns, expected) - -class TestDataFrameMisc(SharedWithSparse, TestData): - - klass = DataFrame - def test_get_axis(self): f = self.frame assert f._get_axis_number(0) == 0 @@ -118,13 +127,13 @@ def test_column_contains_typeerror(self): pass def test_not_hashable(self): - df = pd.DataFrame([1]) + df = self.klass([1]) pytest.raises(TypeError, hash, df) pytest.raises(TypeError, hash, self.empty) def test_new_empty_index(self): - df1 = DataFrame(randn(0, 3)) - df2 = DataFrame(randn(0, 3)) + df1 = self.klass(randn(0, 3)) + df2 = self.klass(randn(0, 3)) df1.index.name = 'foo' assert df2.index.name is None @@ -135,7 +144,7 @@ def test_array_interface(self): assert result.index is self.frame.index assert result.columns is self.frame.columns - assert_frame_equal(result, self.frame.apply(np.sqrt)) + self._assert_frame_equal(result, self.frame.apply(np.sqrt)) def test_get_agg_axis(self): cols = self.frame._get_agg_axis(0) @@ -160,36 +169,36 @@ def test_nonzero(self): assert not df.empty def test_iteritems(self): - df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=['a', 'a', 'b']) + df = self.klass([[1, 2, 3], [4, 5, 6]], columns=['a', 'a', 'b']) for k, v in compat.iteritems(df): - assert type(v) == Series + assert type(v) == self.klass._constructor_sliced def test_iter(self): assert tm.equalContents(list(self.frame), self.frame.columns) def test_iterrows(self): - for i, (k, v) in enumerate(self.frame.iterrows()): - exp = self.frame.xs(self.frame.index[i]) - assert_series_equal(v, exp) + for k, v in self.frame.iterrows(): + exp = self.frame.loc[k] + self._assert_series_equal(v, exp) - for i, (k, v) in enumerate(self.mixed_frame.iterrows()): - exp = self.mixed_frame.xs(self.mixed_frame.index[i]) - assert_series_equal(v, exp) + for k, v in self.mixed_frame.iterrows(): + exp = self.mixed_frame.loc[k] + self._assert_series_equal(v, exp) def test_itertuples(self): for i, tup in enumerate(self.frame.itertuples()): - s = Series(tup[1:]) + s = self.klass._constructor_sliced(tup[1:]) s.name = tup[0] expected = self.frame.iloc[i, :].reset_index(drop=True) - assert_series_equal(s, expected) + self._assert_series_equal(s, expected) - df = DataFrame({'floats': np.random.randn(5), - 'ints': lrange(5)}, columns=['floats', 'ints']) + df = self.klass({'floats': np.random.randn(5), + 'ints': lrange(5)}, columns=['floats', 'ints']) for tup in df.itertuples(index=False): assert isinstance(tup[1], np.integer) - df = DataFrame(data={"a": [1, 2, 3], "b": [4, 5, 6]}) + df = self.klass(data={"a": [1, 2, 3], "b": [4, 5, 6]}) dfaa = df[['a', 'a']] assert (list(dfaa.itertuples()) == @@ -237,7 +246,7 @@ def test_as_matrix(self): mat = self.mixed_frame.as_matrix(['foo', 'A']) assert mat[0, 0] == 'bar' - df = DataFrame({'real': [1, 2, 3], 'complex': [1j, 2j, 3j]}) + df = self.klass({'real': [1, 2, 3], 'complex': [1j, 2j, 3j]}) mat = df.as_matrix() assert mat[0, 0] == 1j @@ -246,20 +255,6 @@ def test_as_matrix(self): expected = self.frame.reindex(columns=['A', 'B']).values assert_almost_equal(mat, expected) - def test_values(self): - self.frame.values[:, 0] = 5. - assert (self.frame.values[:, 0] == 5).all() - - def test_deepcopy(self): - cp = deepcopy(self.frame) - series = cp['A'] - series[:] = 10 - for idx, value in compat.iteritems(series): - assert self.frame['A'][idx] != value - - # --------------------------------------------------------------------- - # Transposing - def test_transpose(self): frame = self.frame dft = frame.T @@ -272,23 +267,17 @@ def test_transpose(self): # mixed type index, data = tm.getMixedTypeDict() - mixed = DataFrame(data, index=index) + mixed = self.klass(data, index=index) mixed_T = mixed.T for col, s in compat.iteritems(mixed_T): assert s.dtype == np.object_ - def test_transpose_get_view(self): - dft = self.frame.T - dft.values[:, 5:10] = 5 - - assert (self.frame.values[5:10] == 5).all() - def test_swapaxes(self): - df = DataFrame(np.random.randn(10, 5)) - assert_frame_equal(df.T, df.swapaxes(0, 1)) - assert_frame_equal(df.T, df.swapaxes(1, 0)) - assert_frame_equal(df, df.swapaxes(0, 0)) + df = self.klass(np.random.randn(10, 5)) + self._assert_frame_equal(df.T, df.swapaxes(0, 1)) + self._assert_frame_equal(df.T, df.swapaxes(1, 0)) + self._assert_frame_equal(df, df.swapaxes(0, 0)) pytest.raises(ValueError, df.swapaxes, 2, 5) def test_axis_aliases(self): @@ -308,8 +297,8 @@ def test_more_asMatrix(self): assert values.shape[1] == len(self.mixed_frame.columns) def test_repr_with_mi_nat(self): - df = DataFrame({'X': [1, 2]}, - index=[[pd.NaT, pd.Timestamp('20130101')], ['a', 'b']]) + df = self.klass({'X': [1, 2]}, + index=[[pd.NaT, pd.Timestamp('20130101')], ['a', 'b']]) res = repr(df) exp = ' X\nNaT a 1\n2013-01-01 b 2' assert res == exp @@ -324,31 +313,56 @@ def test_series_put_names(self): assert v.name == k def test_empty_nonzero(self): - df = DataFrame([1, 2, 3]) + df = self.klass([1, 2, 3]) assert not df.empty - df = pd.DataFrame(index=[1], columns=[1]) + df = self.klass(index=[1], columns=[1]) assert not df.empty - df = DataFrame(index=['a', 'b'], columns=['c', 'd']).dropna() + df = self.klass(index=['a', 'b'], columns=['c', 'd']).dropna() assert df.empty assert df.T.empty - empty_frames = [pd.DataFrame(), - pd.DataFrame(index=[1]), - pd.DataFrame(columns=[1]), - pd.DataFrame({1: []})] + empty_frames = [self.klass(), + self.klass(index=[1]), + self.klass(columns=[1]), + self.klass({1: []})] for df in empty_frames: assert df.empty assert df.T.empty def test_with_datetimelikes(self): - df = DataFrame({'A': date_range('20130101', periods=10), - 'B': timedelta_range('1 day', periods=10)}) + df = self.klass({'A': date_range('20130101', periods=10), + 'B': timedelta_range('1 day', periods=10)}) t = df.T result = t.get_dtype_counts() expected = Series({'object': 10}) tm.assert_series_equal(result, expected) + +class TestDataFrameMisc(SharedWithSparse, TestData): + + klass = DataFrame + # SharedWithSparse tests use generic, klass-agnostic assertion + _assert_frame_equal = staticmethod(assert_frame_equal) + _assert_series_equal = staticmethod(assert_series_equal) + + def test_values(self): + self.frame.values[:, 0] = 5. + assert (self.frame.values[:, 0] == 5).all() + + def test_deepcopy(self): + cp = deepcopy(self.frame) + series = cp['A'] + series[:] = 10 + for idx, value in compat.iteritems(series): + assert self.frame['A'][idx] != value + + def test_transpose_get_view(self): + dft = self.frame.T + dft.values[:, 5:10] = 5 + + assert (self.frame.values[5:10] == 5).all() + def test_inplace_return_self(self): # re #1893 diff --git a/pandas/tests/sparse/test_frame.py b/pandas/tests/sparse/test_frame.py index f0f8954e5785b..004af5066fe83 100644 --- a/pandas/tests/sparse/test_frame.py +++ b/pandas/tests/sparse/test_frame.py @@ -29,6 +29,10 @@ class TestSparseDataFrame(SharedWithSparse): klass = SparseDataFrame + # SharedWithSparse tests use generic, klass-agnostic assertion + _assert_frame_equal = staticmethod(tm.assert_sp_frame_equal) + _assert_series_equal = staticmethod(tm.assert_sp_series_equal) + def setup_method(self, method): self.data = {'A': [nan, nan, nan, 0, 1, 2, 3, 4, 5, 6], 'B': [0, 1, 2, nan, nan, nan, 3, 4, 5, 6], @@ -43,6 +47,8 @@ def setup_method(self, method): self.frame = SparseDataFrame(self.data, index=self.dates) self.iframe = SparseDataFrame(self.data, index=self.dates, default_kind='integer') + self.mixed_frame = self.frame.copy(False) + self.mixed_frame['foo'] = pd.SparseArray(['bar'] * len(self.dates)) values = self.frame.values.copy() values[np.isnan(values)] = 0 From 330b8c1c195174f729a1d2ee6f916ebd1579217e Mon Sep 17 00:00:00 2001 From: Pietro Battiston Date: Sat, 12 Aug 2017 19:33:52 +0200 Subject: [PATCH 884/933] REF: _get_objs_combined_axis (#17217) --- pandas/core/frame.py | 6 ++---- pandas/core/indexes/api.py | 11 +++++++++++ pandas/core/panel.py | 7 +++---- pandas/core/reshape/concat.py | 19 ++++++++----------- pandas/core/reshape/pivot.py | 9 ++------- 5 files changed, 26 insertions(+), 26 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 2c82fe4c348d5..467ef52de234e 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -6126,12 +6126,10 @@ def _list_to_arrays(data, columns, coerce_float=False, dtype=None): def _list_of_series_to_arrays(data, columns, coerce_float=False, dtype=None): - from pandas.core.index import _get_combined_index + from pandas.core.index import _get_objs_combined_axis if columns is None: - columns = _get_combined_index([ - s.index for s in data if getattr(s, 'index', None) is not None - ]) + columns = _get_objs_combined_axis(data) indexer_cache = {} diff --git a/pandas/core/indexes/api.py b/pandas/core/indexes/api.py index d90c681abc03f..db73a6878258a 100644 --- a/pandas/core/indexes/api.py +++ b/pandas/core/indexes/api.py @@ -23,11 +23,22 @@ 'PeriodIndex', 'DatetimeIndex', '_new_Index', 'NaT', '_ensure_index', '_get_na_value', '_get_combined_index', + '_get_objs_combined_axis', '_get_distinct_indexes', '_union_indexes', '_get_consensus_names', '_all_indexes_same'] +def _get_objs_combined_axis(objs, intersect=False, axis=0): + # Extract combined index: return intersection or union (depending on the + # value of "intersect") of indexes on given axis, or None if all objects + # lack indexes (e.g. they are numpy arrays) + obs_idxes = [obj._get_axis(axis) for obj in objs + if hasattr(obj, '_get_axis')] + if obs_idxes: + return _get_combined_index(obs_idxes, intersect=intersect) + + def _get_combined_index(indexes, intersect=False): # TODO: handle index names! indexes = _get_distinct_indexes(indexes) diff --git a/pandas/core/panel.py b/pandas/core/panel.py index e4515efe109c5..a3e35492ad9af 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -26,7 +26,7 @@ from pandas.core.frame import DataFrame from pandas.core.generic import NDFrame, _shared_docs from pandas.core.index import (Index, MultiIndex, _ensure_index, - _get_combined_index) + _get_objs_combined_axis) from pandas.io.formats.printing import pprint_thing from pandas.core.indexing import maybe_droplevels from pandas.core.internals import (BlockManager, @@ -1448,7 +1448,6 @@ def _extract_axis(self, data, axis=0, intersect=False): index = Index([]) elif len(data) > 0: raw_lengths = [] - indexes = [] have_raw_arrays = False have_frames = False @@ -1456,13 +1455,13 @@ def _extract_axis(self, data, axis=0, intersect=False): for v in data.values(): if isinstance(v, self._constructor_sliced): have_frames = True - indexes.append(v._get_axis(axis)) elif v is not None: have_raw_arrays = True raw_lengths.append(v.shape[axis]) if have_frames: - index = _get_combined_index(indexes, intersect=intersect) + index = _get_objs_combined_axis(data.values(), axis=axis, + intersect=intersect) if have_raw_arrays: lengths = list(set(raw_lengths)) diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index e199ec2710367..20d561738dc78 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -4,7 +4,7 @@ import numpy as np from pandas import compat, DataFrame, Series, Index, MultiIndex -from pandas.core.index import (_get_combined_index, +from pandas.core.index import (_get_objs_combined_axis, _ensure_index, _get_consensus_names, _all_indexes_same) from pandas.core.categorical import (_factorize_from_iterable, @@ -445,16 +445,13 @@ def _get_new_axes(self): return new_axes def _get_comb_axis(self, i): - if self._is_series: - all_indexes = [x.index for x in self.objs] - else: - try: - all_indexes = [x._data.axes[i] for x in self.objs] - except IndexError: - types = [type(x).__name__ for x in self.objs] - raise TypeError("Cannot concatenate list of %s" % types) - - return _get_combined_index(all_indexes, intersect=self.intersect) + data_axis = self.objs[0]._get_block_manager_axis(i) + try: + return _get_objs_combined_axis(self.objs, axis=data_axis, + intersect=self.intersect) + except IndexError: + types = [type(x).__name__ for x in self.objs] + raise TypeError("Cannot concatenate list of %s" % types) def _get_concat_axis(self): """ diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index e61adf3aac30a..e08c307bba818 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -8,7 +8,7 @@ from pandas.core.series import Series from pandas.core.groupby import Grouper from pandas.core.reshape.util import cartesian_product -from pandas.core.index import Index, _get_combined_index +from pandas.core.index import Index, _get_objs_combined_axis from pandas.compat import range, lrange, zip from pandas import compat import pandas.core.common as com @@ -440,12 +440,7 @@ def crosstab(index, columns, values=None, rownames=None, colnames=None, rownames = _get_names(index, rownames, prefix='row') colnames = _get_names(columns, colnames, prefix='col') - obs_idxes = [obj.index for objs in (index, columns) for obj in objs - if hasattr(obj, 'index')] - if obs_idxes: - common_idx = _get_combined_index(obs_idxes, intersect=True) - else: - common_idx = None + common_idx = _get_objs_combined_axis(index + columns, intersect=True) data = {} data.update(zip(rownames, index)) From 0fafd4f8f7967f83845a74905d7e3ed9432807b6 Mon Sep 17 00:00:00 2001 From: Phillip Cloud Date: Mon, 14 Aug 2017 06:31:41 -0400 Subject: [PATCH 885/933] ENH/PERF: Remove frequency inference from .dt accessor (#17210) * ENH/PERF: Remove frequency inference from .dt accessor * BENCH: Add DatetimeAccessor benchmark * DOC: Whatsnew --- asv_bench/benchmarks/timeseries.py | 14 ++++++++++++++ doc/source/whatsnew/v0.21.0.txt | 1 + pandas/core/indexes/accessors.py | 19 ++++++++++++------- 3 files changed, 27 insertions(+), 7 deletions(-) diff --git a/asv_bench/benchmarks/timeseries.py b/asv_bench/benchmarks/timeseries.py index efe713639fec9..b7151ad2eaa99 100644 --- a/asv_bench/benchmarks/timeseries.py +++ b/asv_bench/benchmarks/timeseries.py @@ -510,3 +510,17 @@ def time_begin_incr_rng(self): def time_begin_decr_rng(self): self.rng - self.semi_month_begin + + +class DatetimeAccessor(object): + def setup(self): + self.N = 100000 + self.series = pd.Series( + pd.date_range(start='1/1/2000', periods=self.N, freq='T') + ) + + def time_dt_accessor(self): + self.series.dt + + def time_dt_accessor_normalize(self): + self.series.dt.normalize() diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 8b2c4d16f4e1a..e21ee8d7d31f5 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -296,6 +296,7 @@ Performance Improvements ~~~~~~~~~~~~~~~~~~~~~~~~ - Improved performance of instantiating :class:`SparseDataFrame` (:issue:`16773`) +- :attr:`Series.dt` no longer performs frequency inference, yielding a large speedup when accessing the attribute (:issue:`17210`) .. _whatsnew_0210.bug_fixes: diff --git a/pandas/core/indexes/accessors.py b/pandas/core/indexes/accessors.py index ce3143b342cec..88297ac70984d 100644 --- a/pandas/core/indexes/accessors.py +++ b/pandas/core/indexes/accessors.py @@ -61,23 +61,20 @@ def maybe_to_datetimelike(data, copy=False): data = orig.values.categories if is_datetime64_dtype(data.dtype): - return DatetimeProperties(DatetimeIndex(data, copy=copy, freq='infer'), + return DatetimeProperties(DatetimeIndex(data, copy=copy), index, name=name, orig=orig) elif is_datetime64tz_dtype(data.dtype): - return DatetimeProperties(DatetimeIndex(data, copy=copy, freq='infer', - ambiguous='infer'), + return DatetimeProperties(DatetimeIndex(data, copy=copy), index, data.name, orig=orig) elif is_timedelta64_dtype(data.dtype): - return TimedeltaProperties(TimedeltaIndex(data, copy=copy, - freq='infer'), index, + return TimedeltaProperties(TimedeltaIndex(data, copy=copy), index, name=name, orig=orig) else: if is_period_arraylike(data): return PeriodProperties(PeriodIndex(data, copy=copy), index, name=name, orig=orig) if is_datetime_arraylike(data): - return DatetimeProperties(DatetimeIndex(data, copy=copy, - freq='infer'), index, + return DatetimeProperties(DatetimeIndex(data, copy=copy), index, name=name, orig=orig) raise TypeError("cannot convert an object of type {0} to a " @@ -162,6 +159,10 @@ class DatetimeProperties(Properties): def to_pydatetime(self): return self.values.to_pydatetime() + @property + def freq(self): + return self.values.inferred_freq + DatetimeProperties._add_delegate_accessors( delegate=DatetimeIndex, @@ -202,6 +203,10 @@ def components(self): """ return self.values.components.set_index(self.index) + @property + def freq(self): + return self.values.inferred_freq + TimedeltaProperties._add_delegate_accessors( delegate=TimedeltaIndex, From 0f25426eac5c0097214df78b45125ac039c85770 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 14 Aug 2017 11:19:38 -0700 Subject: [PATCH 886/933] Fix apparent typo in tests (#17247) --- pandas/tests/indexes/datetimes/test_tools.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index 7ff9c2b23cbfb..9764b65d330af 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -1112,9 +1112,9 @@ def test_parsers(self): result3 = Timestamp('NaT') result4 = DatetimeIndex(['NaT'])[0] assert result1 is tslib.NaT - assert result1 is tslib.NaT - assert result1 is tslib.NaT - assert result1 is tslib.NaT + assert result2 is tslib.NaT + assert result3 is tslib.NaT + assert result4 is tslib.NaT def test_parsers_quarter_invalid(self): From 924b43359b2450cd1e6e364c468c30ee7694f0c1 Mon Sep 17 00:00:00 2001 From: mattip Date: Tue, 15 Aug 2017 06:29:29 -0400 Subject: [PATCH 887/933] COMPAT: avoid calling getsizeof() on PyPy closes #17228 Author: mattip Closes #17229 from mattip/getsizeof-unavailable and squashes the following commits: d2623e423 [mattip] COMPAT: avoid calling getsizeof() on PyPy --- doc/source/whatsnew/v0.21.0.txt | 3 +- pandas/compat/__init__.py | 2 + pandas/core/base.py | 6 +-- pandas/core/indexes/multi.py | 6 ++- pandas/core/indexes/range.py | 8 +++- pandas/tests/frame/test_repr_info.py | 68 ++++++++++++++++++++-------- pandas/tests/test_base.py | 4 +- pandas/tests/test_categorical.py | 11 +++-- pandas/util/testing.py | 1 - 9 files changed, 76 insertions(+), 33 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index e21ee8d7d31f5..b8f142700b830 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -308,8 +308,9 @@ Bug Fixes Conversion ^^^^^^^^^^ -- Bug in assignment against datetime-like data with ``int`` may incorrectly converte to datetime-like (:issue:`14145`) +- Bug in assignment against datetime-like data with ``int`` may incorrectly convert to datetime-like (:issue:`14145`) - Bug in assignment against ``int64`` data with ``np.ndarray`` with ``float64`` dtype may keep ``int64`` dtype (:issue:`14001`) +- Fix :func:`DataFrame.memory_usage` to support PyPy. Objects on PyPy do not have a fixed size, so an approximation is used instead (:issue:`17228`) Indexing diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py index 33b41d61aa978..b367fda002b74 100644 --- a/pandas/compat/__init__.py +++ b/pandas/compat/__init__.py @@ -31,6 +31,7 @@ from distutils.version import LooseVersion from itertools import product import sys +import platform import types from unicodedata import east_asian_width import struct @@ -41,6 +42,7 @@ PY3 = (sys.version_info[0] >= 3) PY35 = (sys.version_info >= (3, 5)) PY36 = (sys.version_info >= (3, 6)) +PYPY = (platform.python_implementation() == 'PyPy') try: import __builtin__ as builtins diff --git a/pandas/core/base.py b/pandas/core/base.py index 8f21e3125a27e..4ae4736035793 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -15,6 +15,7 @@ import pandas.core.nanops as nanops import pandas._libs.lib as lib from pandas.compat.numpy import function as nv +from pandas.compat import PYPY from pandas.util._decorators import (Appender, cache_readonly, deprecate_kwarg, Substitution) from pandas.core.common import AbstractMethodError @@ -1061,7 +1062,7 @@ def memory_usage(self, deep=False): Notes ----- Memory usage does not include memory consumed by elements that - are not components of the array if deep=False + are not components of the array if deep=False or if used on PyPy See Also -------- @@ -1071,9 +1072,8 @@ def memory_usage(self, deep=False): return self.values.memory_usage(deep=deep) v = self.values.nbytes - if deep and is_object_dtype(self): + if deep and is_object_dtype(self) and not PYPY: v += lib.memory_usage_of_objects(self.values) - return v def factorize(self, sort=False, na_sentinel=-1): diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 420788f9008cd..ea45b4700172f 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -465,9 +465,13 @@ def _nbytes(self, deep=False): *this is in internal routine* """ + + # for implementations with no useful getsizeof (PyPy) + objsize = 24 + level_nbytes = sum((i.memory_usage(deep=deep) for i in self.levels)) label_nbytes = sum((i.nbytes for i in self.labels)) - names_nbytes = sum((getsizeof(i) for i in self.names)) + names_nbytes = sum((getsizeof(i, objsize) for i in self.names)) result = level_nbytes + label_nbytes + names_nbytes # include our engine hashtable diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 5071b50bbebdf..ac4cc6986cace 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -194,8 +194,12 @@ def _format_data(self): @cache_readonly def nbytes(self): - """ return the number of bytes in the underlying data """ - return sum([getsizeof(getattr(self, v)) for v in + """ + Return the number of bytes in the underlying data + On implementations where this is undetermined (PyPy) + assume 24 bytes for each value + """ + return sum([getsizeof(getattr(self, v), 24) for v in ['_start', '_stop', '_step']]) def memory_usage(self, deep=False): diff --git a/pandas/tests/frame/test_repr_info.py b/pandas/tests/frame/test_repr_info.py index c317ad542659a..37f8c0cc85b23 100644 --- a/pandas/tests/frame/test_repr_info.py +++ b/pandas/tests/frame/test_repr_info.py @@ -11,7 +11,7 @@ import pytest from pandas import (DataFrame, compat, option_context) -from pandas.compat import StringIO, lrange, u +from pandas.compat import StringIO, lrange, u, PYPY import pandas.io.formats.format as fmt import pandas as pd @@ -323,23 +323,6 @@ def test_info_memory_usage(self): # excluded column with object dtype, so estimate is accurate assert not re.match(r"memory usage: [^+]+\+", res[-1]) - df_with_object_index = pd.DataFrame({'a': [1]}, index=['foo']) - df_with_object_index.info(buf=buf, memory_usage=True) - res = buf.getvalue().splitlines() - assert re.match(r"memory usage: [^+]+\+", res[-1]) - - df_with_object_index.info(buf=buf, memory_usage='deep') - res = buf.getvalue().splitlines() - assert re.match(r"memory usage: [^+]+$", res[-1]) - - assert (df_with_object_index.memory_usage( - index=True, deep=True).sum() > df_with_object_index.memory_usage( - index=True).sum()) - - df_object = pd.DataFrame({'a': ['a']}) - assert (df_object.memory_usage(deep=True).sum() > - df_object.memory_usage().sum()) - # Test a DataFrame with duplicate columns dtypes = ['int64', 'int64', 'int64', 'float64'] data = {} @@ -349,6 +332,15 @@ def test_info_memory_usage(self): df = DataFrame(data) df.columns = dtypes + df_with_object_index = pd.DataFrame({'a': [1]}, index=['foo']) + df_with_object_index.info(buf=buf, memory_usage=True) + res = buf.getvalue().splitlines() + assert re.match(r"memory usage: [^+]+\+", res[-1]) + + df_with_object_index.info(buf=buf, memory_usage='deep') + res = buf.getvalue().splitlines() + assert re.match(r"memory usage: [^+]+$", res[-1]) + # Ensure df size is as expected # (cols * rows * bytes) + index size df_size = df.memory_usage().sum() @@ -377,9 +369,47 @@ def test_info_memory_usage(self): df.memory_usage(index=True) df.index.values.nbytes + mem = df.memory_usage(deep=True).sum() + assert mem > 0 + + @pytest.mark.skipif(PYPY, + reason="on PyPy deep=True doesn't change result") + def test_info_memory_usage_deep_not_pypy(self): + df_with_object_index = pd.DataFrame({'a': [1]}, index=['foo']) + assert (df_with_object_index.memory_usage( + index=True, deep=True).sum() > + df_with_object_index.memory_usage( + index=True).sum()) + + df_object = pd.DataFrame({'a': ['a']}) + assert (df_object.memory_usage(deep=True).sum() > + df_object.memory_usage().sum()) + + @pytest.mark.skipif(not PYPY, + reason="on PyPy deep=True does not change result") + def test_info_memory_usage_deep_pypy(self): + df_with_object_index = pd.DataFrame({'a': [1]}, index=['foo']) + assert (df_with_object_index.memory_usage( + index=True, deep=True).sum() == + df_with_object_index.memory_usage( + index=True).sum()) + + df_object = pd.DataFrame({'a': ['a']}) + assert (df_object.memory_usage(deep=True).sum() == + df_object.memory_usage().sum()) + + @pytest.mark.skipif(PYPY, reason="PyPy getsizeof() fails by design") + def test_usage_via_getsizeof(self): + df = DataFrame( + data=1, + index=pd.MultiIndex.from_product( + [['a'], range(1000)]), + columns=['A'] + ) + mem = df.memory_usage(deep=True).sum() # sys.getsizeof will call the .memory_usage with # deep=True, and add on some GC overhead - diff = df.memory_usage(deep=True).sum() - sys.getsizeof(df) + diff = mem - sys.getsizeof(df) assert abs(diff) < 100 def test_info_memory_usage_qualified(self): diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py index 9af4a9edeb8b1..9e92c7cf1a9b8 100644 --- a/pandas/tests/test_base.py +++ b/pandas/tests/test_base.py @@ -15,7 +15,7 @@ import pandas.util.testing as tm from pandas import (Series, Index, DatetimeIndex, TimedeltaIndex, PeriodIndex, Timedelta, IntervalIndex, Interval) -from pandas.compat import StringIO +from pandas.compat import StringIO, PYPY from pandas.compat.numpy import np_array_datetime64_compat from pandas.core.base import PandasDelegate, NoNewAttributesMixin from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin @@ -144,6 +144,7 @@ def f(): pytest.raises(TypeError, f) + @pytest.mark.skipif(PYPY, reason="not relevant for PyPy") def test_memory_usage(self): # Delegate does not implement memory_usage. # Check that we fall back to in-built `__sizeof__` @@ -941,6 +942,7 @@ def test_fillna(self): # check shallow_copied assert o is not result + @pytest.mark.skipif(PYPY, reason="not relevant for PyPy") def test_memory_usage(self): for o in self.objs: res = o.memory_usage() diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py index eecdd672095b0..a0b585a16ad9a 100644 --- a/pandas/tests/test_categorical.py +++ b/pandas/tests/test_categorical.py @@ -24,7 +24,7 @@ period_range, PeriodIndex, timedelta_range, TimedeltaIndex, NaT, Interval, IntervalIndex) -from pandas.compat import range, lrange, u, PY3 +from pandas.compat import range, lrange, u, PY3, PYPY from pandas.core.config import option_context @@ -1448,10 +1448,11 @@ def test_memory_usage(self): cat = pd.Categorical(['foo', 'foo', 'bar']) assert cat.memory_usage(deep=True) > cat.nbytes - # sys.getsizeof will call the .memory_usage with - # deep=True, and add on some GC overhead - diff = cat.memory_usage(deep=True) - sys.getsizeof(cat) - assert abs(diff) < 100 + if not PYPY: + # sys.getsizeof will call the .memory_usage with + # deep=True, and add on some GC overhead + diff = cat.memory_usage(deep=True) - sys.getsizeof(cat) + assert abs(diff) < 100 def test_searchsorted(self): # https://github.com/pandas-dev/pandas/issues/8420 diff --git a/pandas/util/testing.py b/pandas/util/testing.py index a000e189dfaa9..5a17cb6d7dc47 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -56,7 +56,6 @@ K = 4 _RAISE_NETWORK_ERROR_DEFAULT = False - # set testing_mode _testing_mode_warnings = (DeprecationWarning, compat.ResourceWarning) From 133a2087d038da035a57ab90aad557a328b3d60b Mon Sep 17 00:00:00 2001 From: jschendel Date: Tue, 15 Aug 2017 12:26:06 -0600 Subject: [PATCH 888/933] CLN: replace %s syntax with .format in pandas.core.reshape (#17252) Replaced %s syntax with .format in pandas.core.reshape. Additionally, made some of the existing positional .format code more explicit. --- pandas/core/reshape/concat.py | 29 ++++++++++--------- pandas/core/reshape/merge.py | 53 ++++++++++++++++++---------------- pandas/core/reshape/pivot.py | 8 ++--- pandas/core/reshape/reshape.py | 31 +++++++++++--------- pandas/core/reshape/tile.py | 4 +-- 5 files changed, 67 insertions(+), 58 deletions(-) diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index 20d561738dc78..9e180c624963c 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -220,7 +220,7 @@ def __init__(self, objs, axis=0, join='outer', join_axes=None, if isinstance(objs, (NDFrame, compat.string_types)): raise TypeError('first argument must be an iterable of pandas ' 'objects, you passed an object of type ' - '"{0}"'.format(type(objs).__name__)) + '"{name}"'.format(name=type(objs).__name__)) if join == 'outer': self.intersect = False @@ -309,8 +309,8 @@ def __init__(self, objs, axis=0, join='outer', join_axes=None, self._is_series = isinstance(sample, Series) if not 0 <= axis <= sample.ndim: - raise AssertionError("axis must be between 0 and {0}, " - "input was {1}".format(sample.ndim, axis)) + raise AssertionError("axis must be between 0 and {ndim}, input was" + " {axis}".format(ndim=sample.ndim, axis=axis)) # if we have mixed ndims, then convert to highest ndim # creating column numbers as needed @@ -431,8 +431,8 @@ def _get_new_axes(self): new_axes[i] = self._get_comb_axis(i) else: if len(self.join_axes) != ndim - 1: - raise AssertionError("length of join_axes must not be " - "equal to {0}".format(ndim - 1)) + raise AssertionError("length of join_axes must not be equal " + "to {length}".format(length=ndim - 1)) # ufff... indices = compat.lrange(ndim) @@ -451,7 +451,8 @@ def _get_comb_axis(self, i): intersect=self.intersect) except IndexError: types = [type(x).__name__ for x in self.objs] - raise TypeError("Cannot concatenate list of %s" % types) + raise TypeError("Cannot concatenate list of {types}" + .format(types=types)) def _get_concat_axis(self): """ @@ -470,8 +471,8 @@ def _get_concat_axis(self): for i, x in enumerate(self.objs): if not isinstance(x, Series): raise TypeError("Cannot concatenate type 'Series' " - "with object of type " - "%r" % type(x).__name__) + "with object of type {type!r}" + .format(type=type(x).__name__)) if x.name is not None: names[i] = x.name has_names = True @@ -505,8 +506,8 @@ def _maybe_check_integrity(self, concat_index): if self.verify_integrity: if not concat_index.is_unique: overlap = concat_index.get_duplicates() - raise ValueError('Indexes have overlapping values: %s' - % str(overlap)) + raise ValueError('Indexes have overlapping values: ' + '{overlap!s}'.format(overlap=overlap)) def _concat_indexes(indexes): @@ -547,8 +548,8 @@ def _make_concat_multiindex(indexes, keys, levels=None, names=None): try: i = level.get_loc(key) except KeyError: - raise ValueError('Key %s not in level %s' - % (str(key), str(level))) + raise ValueError('Key {key!s} not in level {level!s}' + .format(key=key, level=level)) to_concat.append(np.repeat(i, len(index))) label_list.append(np.concatenate(to_concat)) @@ -597,8 +598,8 @@ def _make_concat_multiindex(indexes, keys, levels=None, names=None): mask = mapped == -1 if mask.any(): - raise ValueError('Values not found in passed level: %s' - % str(hlevel[mask])) + raise ValueError('Values not found in passed level: {hlevel!s}' + .format(hlevel=hlevel[mask])) new_labels.append(np.repeat(mapped, n)) diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 00828b8241f4c..947300a28e510 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -534,28 +534,27 @@ def __init__(self, left, right, how='inner', on=None, 'indicator option can only accept boolean or string arguments') if not isinstance(left, DataFrame): - raise ValueError( - 'can not merge DataFrame with instance of ' - 'type {0}'.format(type(left))) + raise ValueError('can not merge DataFrame with instance of ' + 'type {left}'.format(left=type(left))) if not isinstance(right, DataFrame): - raise ValueError( - 'can not merge DataFrame with instance of ' - 'type {0}'.format(type(right))) + raise ValueError('can not merge DataFrame with instance of ' + 'type {right}'.format(right=type(right))) if not is_bool(left_index): raise ValueError( 'left_index parameter must be of type bool, not ' - '{0}'.format(type(left_index))) + '{left_index}'.format(left_index=type(left_index))) if not is_bool(right_index): raise ValueError( 'right_index parameter must be of type bool, not ' - '{0}'.format(type(right_index))) + '{right_index}'.format(right_index=type(right_index))) # warn user when merging between different levels if left.columns.nlevels != right.columns.nlevels: msg = ('merging between different levels can give an unintended ' - 'result ({0} levels on the left, {1} on the right)') - msg = msg.format(left.columns.nlevels, right.columns.nlevels) + 'result ({left} levels on the left, {right} on the right)' + ).format(left=left.columns.nlevels, + right=right.columns.nlevels) warnings.warn(msg, UserWarning) self._validate_specification() @@ -613,7 +612,8 @@ def _indicator_pre_merge(self, left, right): for i in ['_left_indicator', '_right_indicator']: if i in columns: raise ValueError("Cannot use `indicator=True` option when " - "data contains a column named {}".format(i)) + "data contains a column named {name}" + .format(name=i)) if self.indicator_name in columns: raise ValueError( "Cannot use name of an existing column for indicator column") @@ -717,7 +717,7 @@ def _maybe_add_join_keys(self, result, left_indexer, right_indexer): if name in result: result[name] = key_col else: - result.insert(i, name or 'key_%d' % i, key_col) + result.insert(i, name or 'key_{i}'.format(i=i), key_col) def _get_join_indexers(self): """ return the join indexers """ @@ -952,8 +952,8 @@ def _validate_specification(self): if len(common_cols) == 0: raise MergeError('No common columns to perform merge on') if not common_cols.is_unique: - raise MergeError("Data columns not unique: %s" - % repr(common_cols)) + raise MergeError("Data columns not unique: {common!r}" + .format(common=common_cols)) self.left_on = self.right_on = common_cols elif self.on is not None: if self.left_on is not None or self.right_on is not None: @@ -1119,12 +1119,14 @@ def get_result(self): def _asof_function(direction, on_type): - return getattr(libjoin, 'asof_join_%s_%s' % (direction, on_type), None) + name = 'asof_join_{dir}_{on}'.format(dir=direction, on=on_type) + return getattr(libjoin, name, None) def _asof_by_function(direction, on_type, by_type): - return getattr(libjoin, 'asof_join_%s_%s_by_%s' % - (direction, on_type, by_type), None) + name = 'asof_join_{dir}_{on}_by_{by}'.format( + dir=direction, on=on_type, by=by_type) + return getattr(libjoin, name, None) _type_casters = { @@ -1153,7 +1155,7 @@ def _get_cython_type(dtype): type_name = _get_dtype(dtype).name ctype = _cython_types.get(type_name, 'object') if ctype == 'error': - raise MergeError('unsupported type: ' + type_name) + raise MergeError('unsupported type: {type}'.format(type=type_name)) return ctype @@ -1235,7 +1237,8 @@ def _validate_specification(self): # check 'direction' is valid if self.direction not in ['backward', 'forward', 'nearest']: - raise MergeError('direction invalid: ' + self.direction) + raise MergeError('direction invalid: {direction}' + .format(direction=self.direction)) @property def _asof_key(self): @@ -1264,7 +1267,7 @@ def _get_merge_keys(self): lt = left_join_keys[-1] msg = "incompatible tolerance, must be compat " \ - "with type {0}".format(type(lt)) + "with type {lt}".format(lt=type(lt)) if is_datetime64_dtype(lt) or is_datetime64tz_dtype(lt): if not isinstance(self.tolerance, Timedelta): @@ -1283,8 +1286,8 @@ def _get_merge_keys(self): # validate allow_exact_matches if not is_bool(self.allow_exact_matches): - raise MergeError("allow_exact_matches must be boolean, " - "passed {0}".format(self.allow_exact_matches)) + msg = "allow_exact_matches must be boolean, passed {passed}" + raise MergeError(msg.format(passed=self.allow_exact_matches)) return left_join_keys, right_join_keys, join_names @@ -1306,11 +1309,11 @@ def flip(xs): tolerance = self.tolerance # we required sortedness in the join keys - msg = " keys must be sorted" + msg = "{side} keys must be sorted" if not Index(left_values).is_monotonic: - raise ValueError('left' + msg) + raise ValueError(msg.format(side='left')) if not Index(right_values).is_monotonic: - raise ValueError('right' + msg) + raise ValueError(msg.format(side='right')) # initial type conversion as needed if needs_i8_conversion(left_values): diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index e08c307bba818..f07123ca18489 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -145,10 +145,10 @@ def _add_margins(table, data, values, rows, cols, aggfunc, if not isinstance(margins_name, compat.string_types): raise ValueError('margins_name argument must be a string') - exception_msg = 'Conflicting name "{0}" in margins'.format(margins_name) + msg = 'Conflicting name "{name}" in margins'.format(name=margins_name) for level in table.index.names: if margins_name in table.index.get_level_values(level): - raise ValueError(exception_msg) + raise ValueError(msg) grand_margin = _compute_grand_margin(data, values, aggfunc, margins_name) @@ -156,7 +156,7 @@ def _add_margins(table, data, values, rows, cols, aggfunc, if hasattr(table, 'columns'): for level in table.columns.names[1:]: if margins_name in table.columns.get_level_values(level): - raise ValueError(exception_msg) + raise ValueError(msg) if len(rows) > 1: key = (margins_name,) + ('',) * (len(rows) - 1) @@ -553,7 +553,7 @@ def _get_names(arrs, names, prefix='row'): if isinstance(arr, ABCSeries) and arr.name is not None: names.append(arr.name) else: - names.append('%s_%d' % (prefix, i)) + names.append('{prefix}_{i}'.format(prefix=prefix, i=i)) else: if len(names) != len(arrs): raise AssertionError('arrays and names must have the same length') diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index b7638471f2ad0..455da9246783c 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -1,6 +1,6 @@ # pylint: disable=E1101,E1103 # pylint: disable=W0703,W0622,W0613,W0201 -from pandas.compat import range, zip +from pandas.compat import range, text_type, zip from pandas import compat import itertools import re @@ -91,8 +91,8 @@ def __init__(self, values, index, level=-1, value_columns=None, if isinstance(self.index, MultiIndex): if index._reference_duplicate_name(level): - msg = ("Ambiguous reference to {0}. The index " - "names are not unique.".format(level)) + msg = ("Ambiguous reference to {level}. The index " + "names are not unique.".format(level=level)) raise ValueError(msg) self.level = self.index._get_level_number(level) @@ -229,7 +229,7 @@ def get_new_values(self): sorted_values = sorted_values.astype(name, copy=False) # fill in our values & mask - f = getattr(_reshape, "unstack_{}".format(name)) + f = getattr(_reshape, "unstack_{name}".format(name=name)) f(sorted_values, mask.view('u1'), stride, @@ -516,8 +516,8 @@ def factorize(index): N, K = frame.shape if isinstance(frame.columns, MultiIndex): if frame.columns._reference_duplicate_name(level): - msg = ("Ambiguous reference to {0}. The column " - "names are not unique.".format(level)) + msg = ("Ambiguous reference to {level}. The column " + "names are not unique.".format(level=level)) raise ValueError(msg) # Will also convert negative level numbers and check if out of bounds. @@ -747,7 +747,7 @@ def melt(frame, id_vars=None, value_vars=None, var_name=None, if len(frame.columns.names) == len(set(frame.columns.names)): var_name = frame.columns.names else: - var_name = ['variable_%s' % i + var_name = ['variable_{i}'.format(i=i) for i in range(len(frame.columns.names))] else: var_name = [frame.columns.name if frame.columns.name is not None @@ -1027,7 +1027,8 @@ def wide_to_long(df, stubnames, i, j, sep="", suffix='\d+'): in a typicaly case. """ def get_var_names(df, stub, sep, suffix): - regex = "^{0}{1}{2}".format(re.escape(stub), re.escape(sep), suffix) + regex = "^{stub}{sep}{suffix}".format( + stub=re.escape(stub), sep=re.escape(sep), suffix=suffix) return df.filter(regex=regex).columns.tolist() def melt_stub(df, stub, i, j, value_vars, sep): @@ -1180,13 +1181,14 @@ def get_dummies(data, prefix=None, prefix_sep='_', dummy_na=False, # validate prefixes and separator to avoid silently dropping cols def check_len(item, name): - length_msg = ("Length of '{0}' ({1}) did not match the length of " - "the columns being encoded ({2}).") + len_msg = ("Length of '{name}' ({len_item}) did not match the " + "length of the columns being encoded ({len_enc}).") if is_list_like(item): if not len(item) == len(columns_to_encode): - raise ValueError(length_msg.format(name, len(item), - len(columns_to_encode))) + len_msg = len_msg.format(name=name, len_item=len(item), + len_enc=len(columns_to_encode)) + raise ValueError(len_msg) check_len(prefix, 'prefix') check_len(prefix_sep, 'prefix_sep') @@ -1253,7 +1255,10 @@ def get_empty_Frame(data, sparse): number_of_cols = len(levels) if prefix is not None: - dummy_cols = ['%s%s%s' % (prefix, prefix_sep, v) for v in levels] + dummy_strs = [u'{prefix}{sep}{level}' if isinstance(v, text_type) + else '{prefix}{sep}{level}' for v in levels] + dummy_cols = [dummy_str.format(prefix=prefix, sep=prefix_sep, level=v) + for dummy_str, v in zip(dummy_strs, levels)] else: dummy_cols = levels diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py index 1cb39faa2e869..2f5538556fa6d 100644 --- a/pandas/core/reshape/tile.py +++ b/pandas/core/reshape/tile.py @@ -229,9 +229,9 @@ def _bins_to_cuts(x, bins, right=True, labels=None, unique_bins = algos.unique(bins) if len(unique_bins) < len(bins) and len(bins) != 2: if duplicates == 'raise': - raise ValueError("Bin edges must be unique: {}.\nYou " + raise ValueError("Bin edges must be unique: {bins!r}.\nYou " "can drop duplicate edges by setting " - "the 'duplicates' kwarg".format(repr(bins))) + "the 'duplicates' kwarg".format(bins=bins)) else: bins = unique_bins From a46e5beed5ee0c2395f11ab325eb1b71e6d23c60 Mon Sep 17 00:00:00 2001 From: Daniel Himmelstein Date: Tue, 15 Aug 2017 16:23:44 -0400 Subject: [PATCH 889/933] ENH: Infer compression from non-string paths (#17206) --- doc/source/whatsnew/v0.21.0.txt | 1 + pandas/io/common.py | 14 ++++++------ pandas/io/parsers.py | 10 ++++----- pandas/io/pickle.py | 4 ++-- pandas/tests/io/test_common.py | 38 ++++++++++++++++++++++++--------- 5 files changed, 44 insertions(+), 23 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index b8f142700b830..4032a7d22d4a2 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -126,6 +126,7 @@ Other Enhancements - :func:`date_range` now accepts 'Y' in addition to 'A' as an alias for end of year (:issue:`9313`) - Integration with `Apache Parquet `__, including a new top-level :func:`read_parquet` and :func:`DataFrame.to_parquet` method, see :ref:`here `. - :func:`DataFrame.add_prefix` and :func:`DataFrame.add_suffix` now accept strings containing the '%' character. (:issue:`17151`) +- `read_*` methods can now infer compression from non-string paths, such as ``pathlib.Path`` objects (:issue:`17206`). .. _whatsnew_0210.api_breaking: diff --git a/pandas/io/common.py b/pandas/io/common.py index cbfc33dbebb81..69a7e69ea724b 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -272,13 +272,15 @@ def _infer_compression(filepath_or_buffer, compression): if compression is None: return None - # Cannot infer compression of a buffer. Hence assume no compression. - is_path = isinstance(filepath_or_buffer, compat.string_types) - if compression == 'infer' and not is_path: - return None - - # Infer compression from the filename/URL extension + # Infer compression if compression == 'infer': + # Convert all path types (e.g. pathlib.Path) to strings + filepath_or_buffer = _stringify_path(filepath_or_buffer) + if not isinstance(filepath_or_buffer, compat.string_types): + # Cannot infer compression of a buffer, assume no compression + return None + + # Infer compression from the filename/URL extension for compression, extension in _compression_to_extension.items(): if filepath_or_buffer.endswith(extension): return compression diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 9c76d3126890c..05a04f268f72b 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -208,11 +208,11 @@ `_ for more information on ``iterator`` and ``chunksize``. compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None}, default 'infer' - For on-the-fly decompression of on-disk data. If 'infer', then use gzip, - bz2, zip or xz if filepath_or_buffer is a string ending in '.gz', '.bz2', - '.zip', or 'xz', respectively, and no decompression otherwise. If using - 'zip', the ZIP file must contain only one data file to be read in. - Set to None for no decompression. + For on-the-fly decompression of on-disk data. If 'infer' and + `filepath_or_buffer` is path-like, then detect compression from the + following extensions: '.gz', '.bz2', '.zip', or '.xz' (otherwise no + decompression). If using 'zip', the ZIP file must contain only one data + file to be read in. Set to None for no decompression. .. versionadded:: 0.18.1 support for 'zip' and 'xz' compression. diff --git a/pandas/io/pickle.py b/pandas/io/pickle.py index 6f345092c514d..143b76575e36b 100644 --- a/pandas/io/pickle.py +++ b/pandas/io/pickle.py @@ -62,8 +62,8 @@ def read_pickle(path, compression='infer'): File path compression : {'infer', 'gzip', 'bz2', 'xz', 'zip', None}, default 'infer' For on-the-fly decompression of on-disk data. If 'infer', then use - gzip, bz2, xz or zip if path is a string ending in '.gz', '.bz2', 'xz', - or 'zip' respectively, and no decompression otherwise. + gzip, bz2, xz or zip if path ends in '.gz', '.bz2', '.xz', + or '.zip' respectively, and no decompression otherwise. Set to None for no decompression. .. versionadded:: 0.20.0 diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py index b527e3c5dc254..30904593fedc4 100644 --- a/pandas/tests/io/test_common.py +++ b/pandas/tests/io/test_common.py @@ -14,16 +14,6 @@ from pandas import read_csv, concat -try: - from pathlib import Path -except ImportError: - pass - -try: - from py.path import local as LocalPath -except ImportError: - pass - class CustomFSPath(object): """For testing fspath on unknown objects""" @@ -34,6 +24,21 @@ def __fspath__(self): return self.path +# Functions that consume a string path and return a string or path-like object +path_types = [str, CustomFSPath] + +try: + from pathlib import Path + path_types.append(Path) +except ImportError: + pass + +try: + from py.path import local as LocalPath + path_types.append(LocalPath) +except ImportError: + pass + HERE = os.path.dirname(__file__) @@ -83,6 +88,19 @@ def test_stringify_path_fspath(self): result = common._stringify_path(p) assert result == 'foo/bar.csv' + @pytest.mark.parametrize('extension,expected', [ + ('', None), + ('.gz', 'gzip'), + ('.bz2', 'bz2'), + ('.zip', 'zip'), + ('.xz', 'xz'), + ]) + @pytest.mark.parametrize('path_type', path_types) + def test_infer_compression_from_path(self, extension, expected, path_type): + path = path_type('foo/bar.csv' + extension) + compression = common._infer_compression(path, compression='infer') + assert compression == expected + def test_get_filepath_or_buffer_with_path(self): filename = '~/sometest' filepath_or_buffer, _, _ = common.get_filepath_or_buffer(filename) From 47b397309e9601640170aedd6f70486a54d638fd Mon Sep 17 00:00:00 2001 From: jschendel Date: Tue, 15 Aug 2017 14:42:39 -0600 Subject: [PATCH 890/933] Fix bugs in IntervalIndex.is_non_overlapping_monotonic (#17238) --- doc/source/whatsnew/v0.21.0.txt | 2 ++ pandas/core/indexes/interval.py | 13 +++++++-- pandas/tests/indexes/test_interval.py | 41 +++++++++++++++++++++++++-- 3 files changed, 52 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 4032a7d22d4a2..06a724da05d74 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -312,6 +312,8 @@ Conversion - Bug in assignment against datetime-like data with ``int`` may incorrectly convert to datetime-like (:issue:`14145`) - Bug in assignment against ``int64`` data with ``np.ndarray`` with ``float64`` dtype may keep ``int64`` dtype (:issue:`14001`) - Fix :func:`DataFrame.memory_usage` to support PyPy. Objects on PyPy do not have a fixed size, so an approximation is used instead (:issue:`17228`) +- Fixed the return type of ``IntervalIndex.is_non_overlapping_monotonic`` to be a Python ``bool`` for consistency with similar attributes/methods. Previously returned a ``numpy.bool_``. (:issue:`17237`) +- Bug in ``IntervalIndex.is_non_overlapping_monotonic`` when intervals are closed on both sides and overlap at a point (:issue:`16560`) Indexing diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index aa2ad21ae37fd..e90378184e3f3 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -556,8 +556,17 @@ def is_non_overlapping_monotonic(self): # must be increasing (e.g., [0, 1), [1, 2), [2, 3), ... ) # or decreasing (e.g., [-1, 0), [-2, -1), [-3, -2), ...) # we already require left <= right - return ((self.right[:-1] <= self.left[1:]).all() or - (self.left[:-1] >= self.right[1:]).all()) + + # strict inequality for closed == 'both'; equality implies overlapping + # at a point when both sides of intervals are included + if self.closed == 'both': + return bool((self.right[:-1] < self.left[1:]).all() or + (self.left[:-1] > self.right[1:]).all()) + + # non-strict inequality when closed != 'both'; at least one side is + # not included in the intervals, so equality does not imply overlapping + return bool((self.right[:-1] <= self.left[1:]).all() or + (self.left[:-1] >= self.right[1:]).all()) @Appender(_index_shared_docs['_convert_scalar_indexer']) def _convert_scalar_indexer(self, key, kind=None): diff --git a/pandas/tests/indexes/test_interval.py b/pandas/tests/indexes/test_interval.py index fe86a2121761a..18eefc3fbdca6 100644 --- a/pandas/tests/indexes/test_interval.py +++ b/pandas/tests/indexes/test_interval.py @@ -371,8 +371,9 @@ def slice_locs_cases(self, breaks): assert index.slice_locs(1, 1) == (1, 1) assert index.slice_locs(1, 2) == (1, 2) - index = IntervalIndex.from_breaks([0, 1, 2], closed='both') - assert index.slice_locs(1, 1) == (0, 2) + index = IntervalIndex.from_tuples([(0, 1), (2, 3), (4, 5)], + closed='both') + assert index.slice_locs(1, 1) == (0, 1) assert index.slice_locs(1, 2) == (0, 2) def test_slice_locs_int64(self): @@ -681,6 +682,42 @@ def f(): pytest.raises(ValueError, f) + def test_is_non_overlapping_monotonic(self): + # Should be True in all cases + tpls = [(0, 1), (2, 3), (4, 5), (6, 7)] + for closed in ('left', 'right', 'neither', 'both'): + idx = IntervalIndex.from_tuples(tpls, closed=closed) + assert idx.is_non_overlapping_monotonic is True + + idx = IntervalIndex.from_tuples(reversed(tpls), closed=closed) + assert idx.is_non_overlapping_monotonic is True + + # Should be False in all cases (overlapping) + tpls = [(0, 2), (1, 3), (4, 5), (6, 7)] + for closed in ('left', 'right', 'neither', 'both'): + idx = IntervalIndex.from_tuples(tpls, closed=closed) + assert idx.is_non_overlapping_monotonic is False + + idx = IntervalIndex.from_tuples(reversed(tpls), closed=closed) + assert idx.is_non_overlapping_monotonic is False + + # Should be False in all cases (non-monotonic) + tpls = [(0, 1), (2, 3), (6, 7), (4, 5)] + for closed in ('left', 'right', 'neither', 'both'): + idx = IntervalIndex.from_tuples(tpls, closed=closed) + assert idx.is_non_overlapping_monotonic is False + + idx = IntervalIndex.from_tuples(reversed(tpls), closed=closed) + assert idx.is_non_overlapping_monotonic is False + + # Should be False for closed='both', overwise True (GH16560) + idx = IntervalIndex.from_breaks(range(4), closed='both') + assert idx.is_non_overlapping_monotonic is False + + for closed in ('left', 'right', 'neither'): + idx = IntervalIndex.from_breaks(range(4), closed=closed) + assert idx.is_non_overlapping_monotonic is True + class TestIntervalRange(object): From 6fe68325de93a5f745ff49eac57589d33a1d53c1 Mon Sep 17 00:00:00 2001 From: Daniel Grady Date: Tue, 15 Aug 2017 15:44:54 -0700 Subject: [PATCH 891/933] BUG: Fix behavior of argmax and argmin with inf (#16449) (#16449) Closes #13595 --- doc/source/whatsnew/v0.21.0.txt | 3 +- pandas/core/nanops.py | 8 ++--- pandas/tests/groupby/test_groupby.py | 2 +- pandas/tests/series/test_operators.py | 47 +++++++++++++++++++++++++++ 4 files changed, 54 insertions(+), 6 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 06a724da05d74..85685ed7b430d 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -266,7 +266,7 @@ Other API Changes - Removed the ``@slow`` decorator from ``pandas.util.testing``, which caused issues for some downstream packages' test suites. Use ``@pytest.mark.slow`` instead, which achieves the same thing (:issue:`16850`) - Moved definition of ``MergeError`` to the ``pandas.errors`` module. - The signature of :func:`Series.set_axis` and :func:`DataFrame.set_axis` has been changed from ``set_axis(axis, labels)`` to ``set_axis(labels, axis=0)``, for consistency with the rest of the API. The old signature is deprecated and will show a ``FutureWarning`` (:issue:`14636`) - +- :func:`Series.argmin` and :func:`Series.argmax` will now raise a ``TypeError`` when used with ``object`` dtypes, instead of a ``ValueError`` (:issue:`13595`) .. _whatsnew_0210.deprecations: @@ -374,6 +374,7 @@ Reshaping - Fixes regression from 0.20, :func:`Series.aggregate` and :func:`DataFrame.aggregate` allow dictionaries as return values again (:issue:`16741`) - Fixes dtype of result with integer dtype input, from :func:`pivot_table` when called with ``margins=True`` (:issue:`17013`) - Bug in :func:`crosstab` where passing two ``Series`` with the same name raised a ``KeyError`` (:issue:`13279`) +- :func:`Series.argmin`, :func:`Series.argmax`, and their counterparts on ``DataFrame`` and groupby objects work correctly with floating point data that contains infinite values (:issue:`13595`). Numeric ^^^^^^^ diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 2f4e437c0ae61..b2bbf1c75b7ea 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -486,23 +486,23 @@ def reduction(values, axis=None, skipna=True): nanmax = _nanminmax('max', fill_value_typ='-inf') +@disallow('O') def nanargmax(values, axis=None, skipna=True): """ Returns -1 in the NA case """ - values, mask, dtype, _ = _get_values(values, skipna, fill_value_typ='-inf', - isfinite=True) + values, mask, dtype, _ = _get_values(values, skipna, fill_value_typ='-inf') result = values.argmax(axis) result = _maybe_arg_null_out(result, axis, mask, skipna) return result +@disallow('O') def nanargmin(values, axis=None, skipna=True): """ Returns -1 in the NA case """ - values, mask, dtype, _ = _get_values(values, skipna, fill_value_typ='+inf', - isfinite=True) + values, mask, dtype, _ = _get_values(values, skipna, fill_value_typ='+inf') result = values.argmin(axis) result = _maybe_arg_null_out(result, axis, mask, skipna) return result diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 0dea1e8447b2b..f9e1a0d2e744a 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -2339,7 +2339,7 @@ def test_non_cython_api(self): assert_frame_equal(result, expected) # idxmax - expected = DataFrame([[0], [nan]], columns=['B'], index=[1, 3]) + expected = DataFrame([[0.0], [nan]], columns=['B'], index=[1, 3]) expected.index.name = 'A' result = g.idxmax() assert_frame_equal(result, expected) diff --git a/pandas/tests/series/test_operators.py b/pandas/tests/series/test_operators.py index 991c5ff625554..4888f8fe996b6 100644 --- a/pandas/tests/series/test_operators.py +++ b/pandas/tests/series/test_operators.py @@ -1857,3 +1857,50 @@ def test_op_duplicate_index(self): result = s1 + s2 expected = pd.Series([11, 12, np.nan], index=[1, 1, 2]) assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "test_input,error_type", + [ + (pd.Series([]), ValueError), + + # For strings, or any Series with dtype 'O' + (pd.Series(['foo', 'bar', 'baz']), TypeError), + (pd.Series([(1,), (2,)]), TypeError), + + # For mixed data types + ( + pd.Series(['foo', 'foo', 'bar', 'bar', None, np.nan, 'baz']), + TypeError + ), + ] + ) + def test_assert_argminmax_raises(self, test_input, error_type): + """ + Cases where ``Series.argmax`` and related should raise an exception + """ + with pytest.raises(error_type): + test_input.argmin() + with pytest.raises(error_type): + test_input.argmin(skipna=False) + with pytest.raises(error_type): + test_input.argmax() + with pytest.raises(error_type): + test_input.argmax(skipna=False) + + def test_argminmax_with_inf(self): + # For numeric data with NA and Inf (GH #13595) + s = pd.Series([0, -np.inf, np.inf, np.nan]) + + assert s.argmin() == 1 + assert np.isnan(s.argmin(skipna=False)) + + assert s.argmax() == 2 + assert np.isnan(s.argmax(skipna=False)) + + # Using old-style behavior that treats floating point nan, -inf, and + # +inf as missing + with pd.option_context('mode.use_inf_as_na', True): + assert s.argmin() == 0 + assert np.isnan(s.argmin(skipna=False)) + assert s.argmax() == 0 + np.isnan(s.argmax(skipna=False)) From 57befd18cb8ea8d641ea88a5c8ef916a09a9a1aa Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 16 Aug 2017 13:52:29 -0700 Subject: [PATCH 892/933] CLN: Remove have_pytz (#17266) Closes gh-17251 --- pandas/_libs/index.pyx | 10 +++------- pandas/_libs/period.pyx | 11 +++-------- pandas/_libs/tslib.pyx | 17 ++--------------- 3 files changed, 8 insertions(+), 30 deletions(-) diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx index 273dc06886088..b71963fdef1c1 100644 --- a/pandas/_libs/index.pyx +++ b/pandas/_libs/index.pyx @@ -32,13 +32,9 @@ cdef extern from "datetime.h": cdef int64_t iNaT = util.get_nat() -try: - from dateutil.tz import tzutc as _du_utc - import pytz - UTC = pytz.utc - have_pytz = True -except ImportError: - have_pytz = False +from dateutil.tz import tzutc as _du_utc +import pytz +UTC = pytz.utc PyDateTime_IMPORT diff --git a/pandas/_libs/period.pyx b/pandas/_libs/period.pyx index 1db31387de5a7..506fec28f5041 100644 --- a/pandas/_libs/period.pyx +++ b/pandas/_libs/period.pyx @@ -3,8 +3,7 @@ import operator from cpython cimport ( PyObject_RichCompareBool, - Py_EQ, Py_NE, -) + Py_EQ, Py_NE) from numpy cimport (int8_t, int32_t, int64_t, import_array, ndarray, NPY_INT64, NPY_DATETIME, NPY_TIMEDELTA) @@ -24,14 +23,13 @@ cimport util, lib from lib cimport is_null_datetimelike, is_period from pandas._libs import tslib, lib from pandas._libs.tslib import (Timedelta, Timestamp, iNaT, - NaT, have_pytz, _get_utcoffset) + NaT, _get_utcoffset) from tslib cimport ( maybe_get_tz, _is_utc, _is_tzlocal, _get_dst_info, - _nat_scalar_rules, -) + _nat_scalar_rules) from pandas.tseries import offsets from pandas.core.tools.datetimes import parse_time_string @@ -610,9 +608,6 @@ cdef ndarray[int64_t] localize_dt64arr_to_period(ndarray[int64_t] stamps, ndarray[int64_t] trans, deltas, pos pandas_datetimestruct dts - if not have_pytz: - raise Exception('Could not find pytz module') - if _is_utc(tz): for i in range(n): if stamps[i] == NPY_NAT: diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 44be9ba56b84a..e1ba4169ed629 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -4080,12 +4080,8 @@ def i8_to_pydt(int64_t i8, object tzinfo = None): #---------------------------------------------------------------------- # time zone conversion helpers -try: - import pytz - UTC = pytz.utc - have_pytz = True -except: - have_pytz = False +import pytz +UTC = pytz.utc @cython.boundscheck(False) @@ -4112,9 +4108,6 @@ def tz_convert(ndarray[int64_t] vals, object tz1, object tz2): int64_t v, offset, delta pandas_datetimestruct dts - if not have_pytz: - import pytz - if len(vals) == 0: return np.array([], dtype=np.int64) @@ -4229,9 +4222,6 @@ def tz_convert_single(int64_t val, object tz1, object tz2): int64_t v, offset, utc_date pandas_datetimestruct dts - if not have_pytz: - import pytz - if val == NPY_NAT: return val @@ -4444,9 +4434,6 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, assert is_coerce or is_raise - if not have_pytz: - raise Exception("Could not find pytz module") - if tz == UTC or tz is None: return vals From ecaac87c526f5642389dc36e6ee565fe8d21bfd7 Mon Sep 17 00:00:00 2001 From: jschendel Date: Thu, 17 Aug 2017 04:10:52 -0600 Subject: [PATCH 893/933] CLN: replace %s syntax with .format in core.dtypes and core.sparse (#17270) --- pandas/core/dtypes/cast.py | 20 ++++++++++-------- pandas/core/dtypes/common.py | 8 ++++---- pandas/core/sparse/array.py | 39 +++++++++++++++++++----------------- pandas/core/sparse/frame.py | 16 +++++++-------- pandas/core/sparse/list.py | 5 +++-- pandas/core/sparse/series.py | 6 ++++-- 6 files changed, 52 insertions(+), 42 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 723e4f70da4e9..c2cf6afc1a7b5 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -516,7 +516,8 @@ def maybe_cast_item(obj, item, dtype): if dtype in (np.object_, np.bool_): obj[item] = chunk.astype(np.object_) elif not issubclass(dtype, (np.integer, np.bool_)): # pragma: no cover - raise ValueError("Unexpected dtype encountered: %s" % dtype) + raise ValueError("Unexpected dtype encountered: {dtype}" + .format(dtype=dtype)) def invalidate_string_dtypes(dtype_set): @@ -620,8 +621,9 @@ def astype_nansafe(arr, dtype, copy=True): elif dtype == np.int64: return arr.view(dtype) elif dtype != _NS_DTYPE: - raise TypeError("cannot astype a datetimelike from [%s] to [%s]" % - (arr.dtype, dtype)) + raise TypeError("cannot astype a datetimelike from [{from_dtype}] " + "to [{to_dtype}]".format(from_dtype=arr.dtype, + to_dtype=dtype)) return arr.astype(_NS_DTYPE) elif is_timedelta64_dtype(arr): if dtype == np.int64: @@ -640,8 +642,9 @@ def astype_nansafe(arr, dtype, copy=True): result[mask] = np.nan return result - raise TypeError("cannot astype a timedelta from [%s] to [%s]" % - (arr.dtype, dtype)) + raise TypeError("cannot astype a timedelta from [{from_dtype}] " + "to [{to_dtype}]".format(from_dtype=arr.dtype, + to_dtype=dtype)) return arr.astype(_TD_DTYPE) elif (np.issubdtype(arr.dtype, np.floating) and @@ -926,7 +929,7 @@ def maybe_cast_to_datetime(value, dtype, errors='raise'): dtype = _NS_DTYPE else: raise TypeError("cannot convert datetimelike to " - "dtype [%s]" % dtype) + "dtype [{dtype}]".format(dtype=dtype)) elif is_datetime64tz: # our NaT doesn't support tz's @@ -943,7 +946,7 @@ def maybe_cast_to_datetime(value, dtype, errors='raise'): dtype = _TD_DTYPE else: raise TypeError("cannot convert timedeltalike to " - "dtype [%s]" % dtype) + "dtype [{dtype}]".format(dtype=dtype)) if is_scalar(value): if value == iNaT or isna(value): @@ -982,7 +985,8 @@ def maybe_cast_to_datetime(value, dtype, errors='raise'): return tslib.ints_to_pydatetime(ints) # we have a non-castable dtype that was passed - raise TypeError('Cannot cast datetime64 to %s' % dtype) + raise TypeError('Cannot cast datetime64 to {dtype}' + .format(dtype=dtype)) else: diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 37f99bd344e6c..c47e61dc446be 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -1854,10 +1854,10 @@ def _validate_date_like_dtype(dtype): try: typ = np.datetime_data(dtype)[0] except ValueError as e: - raise TypeError('%s' % e) + raise TypeError('{error}'.format(error=e)) if typ != 'generic' and typ != 'ns': - raise ValueError('%r is too specific of a frequency, try passing %r' % - (dtype.name, dtype.type.__name__)) + msg = '{name!r} is too specific of a frequency, try passing {type!r}' + raise ValueError(msg.format(name=dtype.name, type=dtype.type.__name__)) _string_dtypes = frozenset(map(_get_dtype_from_object, (binary_type, @@ -1924,6 +1924,6 @@ def pandas_dtype(dtype): if dtype in [object, np.object_, 'object', 'O']: return npdtype elif npdtype.kind == 'O': - raise TypeError('dtype {0} not understood'.format(dtype)) + raise TypeError('dtype {dtype} not understood'.format(dtype=dtype)) return npdtype diff --git a/pandas/core/sparse/array.py b/pandas/core/sparse/array.py index 4a12dd1af28c9..2f830a98db649 100644 --- a/pandas/core/sparse/array.py +++ b/pandas/core/sparse/array.py @@ -52,8 +52,8 @@ def _arith_method(op, name, str_rep=None, default_axis=None, fill_zeros=None, def wrapper(self, other): if isinstance(other, np.ndarray): if len(self) != len(other): - raise AssertionError("length mismatch: %d vs. %d" % - (len(self), len(other))) + raise AssertionError("length mismatch: {self} vs. {other}" + .format(self=len(self), other=len(other))) if not isinstance(other, ABCSparseArray): dtype = getattr(other, 'dtype', None) other = SparseArray(other, fill_value=self.fill_value, @@ -66,7 +66,8 @@ def wrapper(self, other): return _wrap_result(name, result, self.sp_index, fill) else: # pragma: no cover - raise TypeError('operation with %s not supported' % type(other)) + raise TypeError('operation with {other} not supported' + .format(other=type(other))) if name.startswith("__"): name = name[2:-2] @@ -218,9 +219,9 @@ def __new__(cls, data, sparse_index=None, index=None, kind='integer', else: values = _sanitize_values(data) if len(values) != sparse_index.npoints: - raise AssertionError("Non array-like type {0} must have" - " the same length as the" - " index".format(type(values))) + raise AssertionError("Non array-like type {type} must " + "have the same length as the index" + .format(type=type(values))) # Create array, do *not* copy data by default if copy: subarr = np.array(values, dtype=dtype, copy=True) @@ -330,9 +331,10 @@ def __len__(self): return 0 def __unicode__(self): - return '%s\nFill: %s\n%s' % (printing.pprint_thing(self), - printing.pprint_thing(self.fill_value), - printing.pprint_thing(self.sp_index)) + return '{self}\nFill: {fill}\n{index}'.format( + self=printing.pprint_thing(self), + fill=printing.pprint_thing(self.fill_value), + index=printing.pprint_thing(self.sp_index)) def disable(self, other): raise NotImplementedError('inplace binary ops not supported') @@ -377,8 +379,8 @@ def fill_value(self, value): if is_dtype_equal(self.dtype, new_dtype): self._fill_value = fill_value else: - msg = 'unable to set fill_value {0} to {1} dtype' - raise ValueError(msg.format(value, self.dtype)) + msg = 'unable to set fill_value {fill} to {dtype} dtype' + raise ValueError(msg.format(fill=value, dtype=self.dtype)) def get_values(self, fill=None): """ return a dense representation """ @@ -466,7 +468,8 @@ def take(self, indices, axis=0, allow_fill=True, nv.validate_take(tuple(), kwargs) if axis: - raise ValueError("axis must be 0, input was {0}".format(axis)) + raise ValueError("axis must be 0, input was {axis}" + .format(axis=axis)) if is_integer(indices): # return scalar @@ -482,12 +485,12 @@ def take(self, indices, axis=0, allow_fill=True, 'all indices must be >= -1') raise ValueError(msg) elif (n <= indices).any(): - msg = 'index is out of bounds for size {0}' - raise IndexError(msg.format(n)) + msg = 'index is out of bounds for size {size}'.format(size=n) + raise IndexError(msg) else: if ((indices < -n) | (n <= indices)).any(): - msg = 'index is out of bounds for size {0}' - raise IndexError(msg.format(n)) + msg = 'index is out of bounds for size {size}'.format(size=n) + raise IndexError(msg) indices = indices.astype(np.int32) if not (allow_fill and fill_value is not None): @@ -543,8 +546,8 @@ def astype(self, dtype=None, copy=True): else: fill_value = dtype.type(self.fill_value) except ValueError: - msg = 'unable to coerce current fill_value {0} to {1} dtype' - raise ValueError(msg.format(self.fill_value, dtype)) + msg = 'unable to coerce current fill_value {fill} to {dtype} dtype' + raise ValueError(msg.format(fill=self.fill_value, dtype=dtype)) return self._simple_new(sp_values, self.sp_index, fill_value=fill_value) diff --git a/pandas/core/sparse/frame.py b/pandas/core/sparse/frame.py index f30bd5c36a61b..1e98e919baa33 100644 --- a/pandas/core/sparse/frame.py +++ b/pandas/core/sparse/frame.py @@ -214,11 +214,11 @@ def _prep_index(self, data, index, columns): columns = _default_index(K) if len(columns) != K: - raise ValueError('Column length mismatch: %d vs. %d' % - (len(columns), K)) + raise ValueError('Column length mismatch: {columns} vs. {K}' + .format(columns=len(columns), K=K)) if len(index) != N: - raise ValueError('Index length mismatch: %d vs. %d' % - (len(index), N)) + raise ValueError('Index length mismatch: {index} vs. {N}' + .format(index=len(index), N=N)) return index, columns def to_coo(self): @@ -725,17 +725,17 @@ def _maybe_rename_join(self, other, lsuffix, rsuffix): to_rename = self.columns.intersection(other.columns) if len(to_rename) > 0: if not lsuffix and not rsuffix: - raise ValueError('columns overlap but no suffix specified: %s' - % to_rename) + raise ValueError('columns overlap but no suffix specified: ' + '{to_rename}'.format(to_rename=to_rename)) def lrenamer(x): if x in to_rename: - return '%s%s' % (x, lsuffix) + return '{x}{lsuffix}'.format(x=x, lsuffix=lsuffix) return x def rrenamer(x): if x in to_rename: - return '%s%s' % (x, rsuffix) + return '{x}{rsuffix}'.format(x=x, rsuffix=rsuffix) return x this = self.rename(columns=lrenamer) diff --git a/pandas/core/sparse/list.py b/pandas/core/sparse/list.py index e2a8c6a29cc23..f3e64b7efc764 100644 --- a/pandas/core/sparse/list.py +++ b/pandas/core/sparse/list.py @@ -35,7 +35,8 @@ def __init__(self, data=None, fill_value=np.nan): def __unicode__(self): contents = '\n'.join(repr(c) for c in self._chunks) - return '%s\n%s' % (object.__repr__(self), pprint_thing(contents)) + return '{self}\n{contents}'.format(self=object.__repr__(self), + contents=pprint_thing(contents)) def __len__(self): return sum(len(c) for c in self._chunks) @@ -43,7 +44,7 @@ def __len__(self): def __getitem__(self, i): if i < 0: if i + len(self) < 0: # pragma: no cover - raise ValueError('%d out of range' % i) + raise ValueError('{index} out of range'.format(index=i)) i += len(self) passed = 0 diff --git a/pandas/core/sparse/series.py b/pandas/core/sparse/series.py index 62d20e73dbfcb..99aec2dd11569 100644 --- a/pandas/core/sparse/series.py +++ b/pandas/core/sparse/series.py @@ -65,7 +65,8 @@ def wrapper(self, other): index=self.index, name=self.name) else: # pragma: no cover - raise TypeError('operation with %s not supported' % type(other)) + raise TypeError('operation with {other} not supported' + .format(other=type(other))) wrapper.__name__ = name if name.startswith("__"): @@ -295,7 +296,8 @@ def shape(self): def __unicode__(self): # currently, unicode is same as repr...fixes infinite loop series_rep = Series.__unicode__(self) - rep = '%s\n%s' % (series_rep, repr(self.sp_index)) + rep = '{series}\n{index!r}'.format(series=series_rep, + index=self.sp_index) return rep def __array_wrap__(self, result, context=None): From 95f4f7dc78ac21a132b86b01c31efc5b0fdbceab Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 17 Aug 2017 03:13:36 -0700 Subject: [PATCH 894/933] Replace imports of * with explicit imports (#17269) xref #17234 --- pandas/_libs/hashtable.pyx | 22 +++++++++++- pandas/_libs/index.pyx | 8 ++--- pandas/_libs/join_func_helper.pxi.in | 3 +- pandas/_libs/lib.pyx | 24 +++++++------ pandas/_libs/parsers.pyx | 11 ++++-- pandas/_libs/period.pyx | 12 ++++++- pandas/_libs/src/properties.pyx | 3 +- pandas/_libs/src/skiplist.pyx | 6 +--- pandas/_libs/tslib.pyx | 52 +++++++++++++++++++--------- pandas/_libs/window.pyx | 8 ++++- 10 files changed, 106 insertions(+), 43 deletions(-) diff --git a/pandas/_libs/hashtable.pyx b/pandas/_libs/hashtable.pyx index 101e2c031f26e..2462b7af7b0fe 100644 --- a/pandas/_libs/hashtable.pyx +++ b/pandas/_libs/hashtable.pyx @@ -2,7 +2,27 @@ from cpython cimport PyObject, Py_INCREF, PyList_Check, PyTuple_Check -from khash cimport * +from khash cimport ( + khiter_t, + + kh_str_t, kh_init_str, kh_put_str, kh_exist_str, + kh_get_str, kh_destroy_str, kh_resize_str, + + kh_put_strbox, kh_get_strbox, kh_init_strbox, + + kh_int64_t, kh_init_int64, kh_resize_int64, kh_destroy_int64, + kh_get_int64, kh_exist_int64, kh_put_int64, + + kh_float64_t, kh_exist_float64, kh_put_float64, kh_init_float64, + kh_get_float64, kh_destroy_float64, kh_resize_float64, + + kh_resize_uint64, kh_exist_uint64, kh_destroy_uint64, kh_put_uint64, + kh_get_uint64, kh_init_uint64, + + kh_destroy_pymap, kh_exist_pymap, kh_init_pymap, kh_get_pymap, + kh_put_pymap, kh_resize_pymap) + + from numpy cimport * from libc.stdlib cimport malloc, free diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx index b71963fdef1c1..42ba0c1cadaec 100644 --- a/pandas/_libs/index.pyx +++ b/pandas/_libs/index.pyx @@ -1,8 +1,6 @@ # cython: profile=False -from numpy cimport ndarray - -from numpy cimport (float64_t, int32_t, int64_t, uint8_t, +from numpy cimport (ndarray, float64_t, int32_t, int64_t, uint8_t, uint64_t, NPY_DATETIME, NPY_TIMEDELTA) cimport cython @@ -16,7 +14,9 @@ cimport util import numpy as np cimport tslib -from hashtable cimport * + +from hashtable cimport HashTable + from pandas._libs import tslib, algos, hashtable as _hash from pandas._libs.tslib import Timestamp, Timedelta from datetime import datetime, timedelta diff --git a/pandas/_libs/join_func_helper.pxi.in b/pandas/_libs/join_func_helper.pxi.in index 9cca9bba2a197..73d231b8588dc 100644 --- a/pandas/_libs/join_func_helper.pxi.in +++ b/pandas/_libs/join_func_helper.pxi.in @@ -9,6 +9,8 @@ WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in # asof_join_by #---------------------------------------------------------------------- +from hashtable cimport PyObjectHashTable, UInt64HashTable, Int64HashTable + {{py: # table_type, by_dtype @@ -23,7 +25,6 @@ on_dtypes = ['uint8_t', 'uint16_t', 'uint32_t', 'uint64_t', }} -from hashtable cimport * {{for table_type, by_dtype in by_dtypes}} {{for on_dtype in on_dtypes}} diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 0458d4ae9f3de..53ca41e4b2489 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -10,21 +10,14 @@ from numpy cimport * np.import_array() -cdef extern from "numpy/arrayobject.h": - cdef enum NPY_TYPES: - NPY_intp "NPY_INTP" - from libc.stdlib cimport malloc, free -from cpython cimport (PyDict_New, PyDict_GetItem, PyDict_SetItem, - PyDict_Contains, PyDict_Keys, - Py_INCREF, PyTuple_SET_ITEM, +from cpython cimport (Py_INCREF, PyTuple_SET_ITEM, PyList_Check, PyFloat_Check, PyString_Check, PyBytes_Check, - PyTuple_SetItem, + PyUnicode_Check, PyTuple_New, - PyObject_SetAttrString, PyObject_RichCompareBool, PyBytes_GET_SIZE, PyUnicode_GET_SIZE, @@ -55,7 +48,18 @@ cdef double NAN = nan from datetime import datetime as pydatetime # this is our tseries.pxd -from datetime cimport * +from datetime cimport ( + get_timedelta64_value, get_datetime64_value, + npy_timedelta, npy_datetime, + PyDateTime_Check, PyDate_Check, PyTime_Check, PyDelta_Check, + PyDateTime_GET_YEAR, + PyDateTime_GET_MONTH, + PyDateTime_GET_DAY, + PyDateTime_DATE_GET_HOUR, + PyDateTime_DATE_GET_MINUTE, + PyDateTime_DATE_GET_SECOND, + PyDateTime_IMPORT) + from tslib cimport (convert_to_tsobject, convert_to_timedelta64, _check_all_nulls) diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index ae420da2102b2..3e8b5c4bd3feb 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -32,7 +32,7 @@ cdef extern from "stdlib.h": cimport cython cimport numpy as cnp -from numpy cimport ndarray, uint8_t, uint64_t +from numpy cimport ndarray, uint8_t, uint64_t, int64_t import numpy as np cimport util @@ -57,7 +57,14 @@ import os cnp.import_array() -from khash cimport * +from khash cimport ( + khiter_t, + kh_str_t, kh_init_str, kh_put_str, kh_exist_str, + kh_get_str, kh_destroy_str, + kh_float64_t, kh_get_float64, kh_destroy_float64, + kh_put_float64, kh_init_float64, + kh_strbox_t, kh_put_strbox, kh_get_strbox, kh_init_strbox, + kh_destroy_strbox) import sys diff --git a/pandas/_libs/period.pyx b/pandas/_libs/period.pyx index 506fec28f5041..e017d863e1907 100644 --- a/pandas/_libs/period.pyx +++ b/pandas/_libs/period.pyx @@ -2,6 +2,7 @@ from datetime import datetime, date, timedelta import operator from cpython cimport ( + PyUnicode_Check, PyObject_RichCompareBool, Py_EQ, Py_NE) @@ -18,7 +19,16 @@ from pandas import compat from pandas.compat import PY2 cimport cython -from datetime cimport * + +from datetime cimport ( + is_leapyear, + PyDateTime_IMPORT, + pandas_datetimestruct, + pandas_datetimestruct_to_datetime, + pandas_datetime_to_datetimestruct, + PANDAS_FR_ns, + INT32_MIN) + cimport util, lib from lib cimport is_null_datetimelike, is_period from pandas._libs import tslib, lib diff --git a/pandas/_libs/src/properties.pyx b/pandas/_libs/src/properties.pyx index e619a3b6edd9a..4a3fd4b771a17 100644 --- a/pandas/_libs/src/properties.pyx +++ b/pandas/_libs/src/properties.pyx @@ -1,4 +1,5 @@ -from cpython cimport PyDict_Contains, PyDict_GetItem, PyDict_GetItem +from cpython cimport ( + PyDict_Contains, PyDict_GetItem, PyDict_GetItem, PyDict_SetItem) cdef class cache_readonly(object): diff --git a/pandas/_libs/src/skiplist.pyx b/pandas/_libs/src/skiplist.pyx index 3017931e25115..559b529822a69 100644 --- a/pandas/_libs/src/skiplist.pyx +++ b/pandas/_libs/src/skiplist.pyx @@ -6,10 +6,6 @@ # Cython version: Wes McKinney -cdef extern from "numpy/arrayobject.h": - - void import_array() - cdef extern from "math.h": double log(double x) @@ -25,7 +21,7 @@ import numpy as np from random import random # initialize numpy -import_array() +np.import_array() # TODO: optimize this, make less messy diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index e1ba4169ed629..32b8c92a50269 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -4,8 +4,8 @@ import warnings cimport numpy as np from numpy cimport (int8_t, int32_t, int64_t, import_array, ndarray, + float64_t, NPY_INT64, NPY_DATETIME, NPY_TIMEDELTA) -from datetime cimport get_datetime64_value, get_timedelta64_value import numpy as np import sys @@ -30,20 +30,47 @@ cdef extern from "datetime_helper.h": double total_seconds(object) # this is our datetime.pxd -from datetime cimport cmp_pandas_datetimestruct from libc.stdlib cimport free from util cimport (is_integer_object, is_float_object, is_datetime64_object, is_timedelta64_object, INT64_MAX) cimport util -from datetime cimport * -from khash cimport * -cimport cython - +# this is our datetime.pxd +from datetime cimport ( + pandas_datetimestruct, + pandas_datetime_to_datetimestruct, + pandas_datetimestruct_to_datetime, + cmp_pandas_datetimestruct, + days_per_month_table, + get_datetime64_value, + get_timedelta64_value, + get_datetime64_unit, + PANDAS_DATETIMEUNIT, + _string_to_dts, + _pydatetime_to_dts, + _date_to_datetime64, + npy_datetime, + is_leapyear, + dayofweek, + PANDAS_FR_ns, + PyDateTime_Check, PyDate_Check, + PyDateTime_IMPORT, + timedelta, datetime + ) + +# stdlib datetime imports from datetime import timedelta, datetime from datetime import time as datetime_time +from khash cimport ( + khiter_t, + kh_destroy_int64, kh_put_int64, + kh_init_int64, kh_int64_t, + kh_resize_int64, kh_get_int64) + +cimport cython + import re # dateutil compat @@ -81,15 +108,6 @@ PyDateTime_IMPORT cdef int64_t NPY_NAT = util.get_nat() iNaT = NPY_NAT -# < numpy 1.7 compat for NaT -compat_NaT = np.array([NPY_NAT]).astype('m8[ns]').item() - - -try: - basestring -except NameError: # py3 - basestring = str - cdef inline object create_timestamp_from_ts( int64_t value, pandas_datetimestruct dts, @@ -314,7 +332,7 @@ class Timestamp(_Timestamp): tz : string / timezone object, default None Timezone to localize to """ - if isinstance(tz, basestring): + if isinstance(tz, string_types): tz = maybe_get_tz(tz) return cls(datetime.now(tz)) @@ -615,7 +633,7 @@ class Timestamp(_Timestamp): if self.tzinfo is None: # tz naive, localize tz = maybe_get_tz(tz) - if not isinstance(ambiguous, basestring): + if not isinstance(ambiguous, string_types): ambiguous = [ambiguous] value = tz_localize_to_utc(np.array([self.value], dtype='i8'), tz, ambiguous=ambiguous, errors=errors)[0] diff --git a/pandas/_libs/window.pyx b/pandas/_libs/window.pyx index bdd371871b6e1..9fb3d0662eb4f 100644 --- a/pandas/_libs/window.pyx +++ b/pandas/_libs/window.pyx @@ -56,7 +56,13 @@ cdef inline int int_min(int a, int b): return a if a <= b else b from util cimport numeric -from skiplist cimport * +from skiplist cimport ( + skiplist_t, + skiplist_init, + skiplist_destroy, + skiplist_get, + skiplist_insert, + skiplist_remove) cdef extern from "../src/headers/math.h": double sqrt(double x) nogil From 0ee1675c51a276649e6e45af962c076c526d1c75 Mon Sep 17 00:00:00 2001 From: Sangwoong Yoon Date: Fri, 18 Aug 2017 07:39:37 +0900 Subject: [PATCH 895/933] TST: pytest deprecation warnings GH17197 (#17253) Test parameters with marks are updated according to the updated API of Pytest. https://docs.pytest.org/en/latest/changelog.html#pytest-3-2-0-2017-07-30 https://docs.pytest.org/en/latest/parametrize.html --- appveyor.yml | 2 +- ci/install_circle.sh | 2 +- ci/install_travis.sh | 2 +- ci/requirements_all.txt | 2 +- ci/requirements_dev.txt | 2 +- doc/source/contributing.rst | 24 ++++++++++++++++++++---- pandas/tests/computation/test_eval.py | 15 ++++++++------- pandas/tests/io/parser/test_network.py | 6 ++++-- pandas/tests/io/test_excel.py | 6 ++++-- pandas/tests/io/test_parquet.py | 12 ++++++++---- pandas/tests/test_window.py | 5 +++-- 11 files changed, 52 insertions(+), 26 deletions(-) diff --git a/appveyor.yml b/appveyor.yml index 684b859c206b2..65e62f887554e 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -72,7 +72,7 @@ install: - cmd: conda info -a # create our env - - cmd: conda create -n pandas python=%PYTHON_VERSION% cython pytest pytest-xdist + - cmd: conda create -n pandas python=%PYTHON_VERSION% cython pytest>=3.1.0 pytest-xdist - cmd: activate pandas - SET REQ=ci\requirements-%PYTHON_VERSION%_WIN.run - cmd: echo "installing requirements from %REQ%" diff --git a/ci/install_circle.sh b/ci/install_circle.sh index 00e14b10ebbd6..29ca69970104b 100755 --- a/ci/install_circle.sh +++ b/ci/install_circle.sh @@ -64,7 +64,7 @@ fi # create envbuild deps echo "[create env: ${REQ_BUILD}]" time conda create -n pandas -q --file=${REQ_BUILD} || exit 1 -time conda install -n pandas pytest || exit 1 +time conda install -n pandas pytest>=3.1.0 || exit 1 source activate pandas diff --git a/ci/install_travis.sh b/ci/install_travis.sh index df6969c7cc659..ad8f0bdd8a597 100755 --- a/ci/install_travis.sh +++ b/ci/install_travis.sh @@ -103,7 +103,7 @@ if [ -e ${REQ} ]; then time bash $REQ || exit 1 fi -time conda install -n pandas pytest +time conda install -n pandas pytest>=3.1.0 time pip install pytest-xdist if [ "$LINT" ]; then diff --git a/ci/requirements_all.txt b/ci/requirements_all.txt index de37ec4d20be4..b153b6989df86 100644 --- a/ci/requirements_all.txt +++ b/ci/requirements_all.txt @@ -1,4 +1,4 @@ -pytest +pytest>=3.1.0 pytest-cov pytest-xdist flake8 diff --git a/ci/requirements_dev.txt b/ci/requirements_dev.txt index 1e051802ec9f8..c7190c506ba18 100644 --- a/ci/requirements_dev.txt +++ b/ci/requirements_dev.txt @@ -2,6 +2,6 @@ python-dateutil pytz numpy cython -pytest +pytest>=3.1.0 pytest-cov flake8 diff --git a/doc/source/contributing.rst b/doc/source/contributing.rst index b44d0f36b86a1..e172d0d2a71a2 100644 --- a/doc/source/contributing.rst +++ b/doc/source/contributing.rst @@ -598,6 +598,10 @@ Like many packages, *pandas* uses `pytest extensions in `numpy.testing `_. +.. note:: + + The earliest supported pytest version is 3.1.0. + Writing tests ~~~~~~~~~~~~~ @@ -654,7 +658,9 @@ Using ``pytest`` Here is an example of a self-contained set of tests that illustrate multiple features that we like to use. - functional style: tests are like ``test_*`` and *only* take arguments that are either fixtures or parameters +- ``pytest.mark`` can be used to set metadata on test functions, e.g. ``skip`` or ``xfail``. - using ``parametrize``: allow testing of multiple cases +- to set a mark on a parameter, ``pytest.param(..., marks=...)`` syntax should be used - ``fixture``, code for object construction, on a per-test basis - using bare ``assert`` for scalars and truth-testing - ``tm.assert_series_equal`` (and its counter part ``tm.assert_frame_equal``), for pandas object comparisons. @@ -673,6 +679,13 @@ We would name this file ``test_cool_feature.py`` and put in an appropriate place def test_dtypes(dtype): assert str(np.dtype(dtype)) == dtype + @pytest.mark.parametrize('dtype', ['float32', + pytest.param('int16', marks=pytest.mark.skip), + pytest.param('int32', + marks=pytest.mark.xfail(reason='to show how it works'))]) + def test_mark(dtype): + assert str(np.dtype(dtype)) == 'float32' + @pytest.fixture def series(): return pd.Series([1, 2, 3]) @@ -695,13 +708,16 @@ A test run of this yields ((pandas) bash-3.2$ pytest test_cool_feature.py -v =========================== test session starts =========================== - platform darwin -- Python 3.5.2, pytest-3.0.5, py-1.4.31, pluggy-0.4.0 - collected 8 items + platform darwin -- Python 3.6.2, pytest-3.2.1, py-1.4.31, pluggy-0.4.0 + collected 11 items tester.py::test_dtypes[int8] PASSED tester.py::test_dtypes[int16] PASSED tester.py::test_dtypes[int32] PASSED tester.py::test_dtypes[int64] PASSED + tester.py::test_mark[float32] PASSED + tester.py::test_mark[int16] SKIPPED + tester.py::test_mark[int32] xfail tester.py::test_series[int8] PASSED tester.py::test_series[int16] PASSED tester.py::test_series[int32] PASSED @@ -714,8 +730,8 @@ Tests that we have ``parametrized`` are now accessible via the test name, for ex ((pandas) bash-3.2$ pytest test_cool_feature.py -v -k int8 =========================== test session starts =========================== - platform darwin -- Python 3.5.2, pytest-3.0.5, py-1.4.31, pluggy-0.4.0 - collected 8 items + platform darwin -- Python 3.6.2, pytest-3.2.1, py-1.4.31, pluggy-0.4.0 + collected 11 items test_cool_feature.py::test_dtypes[int8] PASSED test_cool_feature.py::test_series[int8] PASSED diff --git a/pandas/tests/computation/test_eval.py b/pandas/tests/computation/test_eval.py index 7fc091ebb1892..d2874b1606e72 100644 --- a/pandas/tests/computation/test_eval.py +++ b/pandas/tests/computation/test_eval.py @@ -38,13 +38,14 @@ @pytest.fixture(params=( - pytest.mark.skipif(engine == 'numexpr' and not _USE_NUMEXPR, - reason='numexpr enabled->{enabled}, ' - 'installed->{installed}'.format( - enabled=_USE_NUMEXPR, - installed=_NUMEXPR_INSTALLED))(engine) - for engine in _engines # noqa -)) + pytest.param(engine, + marks=pytest.mark.skipif( + engine == 'numexpr' and not _USE_NUMEXPR, + reason='numexpr enabled->{enabled}, ' + 'installed->{installed}'.format( + enabled=_USE_NUMEXPR, + installed=_NUMEXPR_INSTALLED))) + for engine in _engines)) # noqa def engine(request): return request.param diff --git a/pandas/tests/io/parser/test_network.py b/pandas/tests/io/parser/test_network.py index cfa60248605ad..3344243f8137a 100644 --- a/pandas/tests/io/parser/test_network.py +++ b/pandas/tests/io/parser/test_network.py @@ -23,8 +23,10 @@ def salaries_table(): @pytest.mark.parametrize( "compression,extension", [('gzip', '.gz'), ('bz2', '.bz2'), ('zip', '.zip'), - pytest.mark.skipif(not tm._check_if_lzma(), - reason='need backports.lzma to run')(('xz', '.xz'))]) + pytest.param('xz', '.xz', + marks=pytest.mark.skipif(not tm._check_if_lzma(), + reason='need backports.lzma ' + 'to run'))]) @pytest.mark.parametrize('mode', ['explicit', 'infer']) @pytest.mark.parametrize('engine', ['python', 'c']) def test_compressed_urls(salaries_table, compression, extension, mode, engine): diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py index 856e8d6466526..92147b46097b8 100644 --- a/pandas/tests/io/test_excel.py +++ b/pandas/tests/io/test_excel.py @@ -2400,8 +2400,10 @@ def check_called(func): @pytest.mark.parametrize('engine', [ - pytest.mark.xfail('xlwt', reason='xlwt does not support ' - 'openpyxl-compatible style dicts'), + pytest.param('xlwt', + marks=pytest.mark.xfail(reason='xlwt does not support ' + 'openpyxl-compatible ' + 'style dicts')), 'xlsxwriter', 'openpyxl', ]) diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index ff0935c7dcc6f..78c72e2a05566 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -26,10 +26,14 @@ # setup engines & skips @pytest.fixture(params=[ - pytest.mark.skipif(not _HAVE_FASTPARQUET, - reason='fastparquet is not installed')('fastparquet'), - pytest.mark.skipif(not _HAVE_PYARROW, - reason='pyarrow is not installed')('pyarrow')]) + pytest.param('fastparquet', + marks=pytest.mark.skipif(not _HAVE_FASTPARQUET, + reason='fastparquet is ' + 'not installed')), + pytest.param('pyarrow', + marks=pytest.mark.skipif(not _HAVE_PYARROW, + reason='pyarrow is ' + 'not installed'))]) def engine(request): return request.param diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index d94e34c41786b..21a9b05d48126 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -530,8 +530,9 @@ def test_numpy_compat(self): @pytest.mark.parametrize( 'expander', - [1, pytest.mark.xfail( - reason='GH 16425 expanding with offset not supported')('1s')]) + [1, pytest.param('ls', marks=pytest.mark.xfail( + reason='GH 16425 expanding with ' + 'offset not supported'))]) def test_empty_df_expanding(self, expander): # GH 15819 Verifies that datetime and integer expanding windows can be # applied to empty DataFrames From 24b6349c013fb9e59ea7fa4b1d40088026c32d25 Mon Sep 17 00:00:00 2001 From: Makarov Andrey Date: Fri, 18 Aug 2017 03:52:34 +0300 Subject: [PATCH 896/933] Handle more date/datetime/time formats (#15871) --- doc/source/whatsnew/v0.21.0.txt | 1 + pandas/io/sas/sas7bdat.py | 16 +- pandas/io/sas/sas_constants.py | 24 + pandas/tests/io/sas/data/datetime.csv | 5 + pandas/tests/io/sas/data/datetime.sas7bdat | Bin 0 -> 131072 bytes pandas/tests/io/sas/data/productsales.csv | 2880 ++++++++++---------- pandas/tests/io/sas/test_sas7bdat.py | 15 +- 7 files changed, 1494 insertions(+), 1447 deletions(-) create mode 100644 pandas/tests/io/sas/data/datetime.csv create mode 100644 pandas/tests/io/sas/data/datetime.sas7bdat diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 85685ed7b430d..4f55c6388c728 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -127,6 +127,7 @@ Other Enhancements - Integration with `Apache Parquet `__, including a new top-level :func:`read_parquet` and :func:`DataFrame.to_parquet` method, see :ref:`here `. - :func:`DataFrame.add_prefix` and :func:`DataFrame.add_suffix` now accept strings containing the '%' character. (:issue:`17151`) - `read_*` methods can now infer compression from non-string paths, such as ``pathlib.Path`` objects (:issue:`17206`). +- :func:`pd.read_sas()` now recognizes much more of the most frequently used date (datetime) formats in SAS7BDAT files (:issue:`15871`). .. _whatsnew_0210.api_breaking: diff --git a/pandas/io/sas/sas7bdat.py b/pandas/io/sas/sas7bdat.py index 20b0cf85e95b7..2b3a91e2062b1 100644 --- a/pandas/io/sas/sas7bdat.py +++ b/pandas/io/sas/sas7bdat.py @@ -44,8 +44,8 @@ class SAS7BDATReader(BaseIterator): index : column identifier, defaults to None Column to use as index. convert_dates : boolean, defaults to True - Attempt to convert dates to Pandas datetime values. Note all - SAS date formats are supported. + Attempt to convert dates to Pandas datetime values. Note that + some rarely used SAS date formats may be unsupported. blank_missing : boolean, defaults to True Convert empty strings to missing values (SAS uses blanks to indicate missing character variables). @@ -655,9 +655,15 @@ def _chunk_to_dataframe(self): rslt[name] = self._byte_chunk[jb, :].view( dtype=self.byte_order + 'd') rslt[name] = np.asarray(rslt[name], dtype=np.float64) - if self.convert_dates and (self.column_formats[j] == "MMDDYY"): - epoch = pd.datetime(1960, 1, 1) - rslt[name] = epoch + pd.to_timedelta(rslt[name], unit='d') + if self.convert_dates: + unit = None + if self.column_formats[j] in const.sas_date_formats: + unit = 'd' + elif self.column_formats[j] in const.sas_datetime_formats: + unit = 's' + if unit: + rslt[name] = pd.to_datetime(rslt[name], unit=unit, + origin="1960-01-01") jb += 1 elif self.column_types[j] == b's': rslt[name] = self._string_chunk[js, :] diff --git a/pandas/io/sas/sas_constants.py b/pandas/io/sas/sas_constants.py index 65ae1e9102cb2..c4b3588164305 100644 --- a/pandas/io/sas/sas_constants.py +++ b/pandas/io/sas/sas_constants.py @@ -145,3 +145,27 @@ class index: b"\xFF\xFF\xFF\xFE": index.columnListIndex, b"\xFE\xFF\xFF\xFF\xFF\xFF\xFF\xFF": index.columnListIndex, b"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFE": index.columnListIndex} + + +# List of frequently used SAS date and datetime formats +# http://support.sas.com/documentation/cdl/en/etsug/60372/HTML/default/viewer.htm#etsug_intervals_sect009.htm +# https://github.com/epam/parso/blob/master/src/main/java/com/epam/parso/impl/SasFileConstants.java +sas_date_formats = ("DATE", "DAY", "DDMMYY", "DOWNAME", "JULDAY", "JULIAN", + "MMDDYY", "MMYY", "MMYYC", "MMYYD", "MMYYP", "MMYYS", + "MMYYN", "MONNAME", "MONTH", "MONYY", "QTR", "QTRR", + "NENGO", "WEEKDATE", "WEEKDATX", "WEEKDAY", "WEEKV", + "WORDDATE", "WORDDATX", "YEAR", "YYMM", "YYMMC", "YYMMD", + "YYMMP", "YYMMS", "YYMMN", "YYMON", "YYMMDD", "YYQ", + "YYQC", "YYQD", "YYQP", "YYQS", "YYQN", "YYQR", "YYQRC", + "YYQRD", "YYQRP", "YYQRS", "YYQRN", + "YYMMDDP", "YYMMDDC", "E8601DA", "YYMMDDN", "MMDDYYC", + "MMDDYYS", "MMDDYYD", "YYMMDDS", "B8601DA", "DDMMYYN", + "YYMMDDD", "DDMMYYB", "DDMMYYP", "MMDDYYP", "YYMMDDB", + "MMDDYYN", "DDMMYYC", "DDMMYYD", "DDMMYYS", + "MINGUO") + +sas_datetime_formats = ("DATETIME", "DTWKDATX", + "B8601DN", "B8601DT", "B8601DX", "B8601DZ", "B8601LX", + "E8601DN", "E8601DT", "E8601DX", "E8601DZ", "E8601LX", + "DATEAMPM", "DTDATE", "DTMONYY", "DTMONYY", "DTWKDATX", + "DTYEAR", "TOD", "MDYAMPM") diff --git a/pandas/tests/io/sas/data/datetime.csv b/pandas/tests/io/sas/data/datetime.csv new file mode 100644 index 0000000000000..6126f6d04eaf0 --- /dev/null +++ b/pandas/tests/io/sas/data/datetime.csv @@ -0,0 +1,5 @@ +Date1,Date2,DateTime,DateTimeHi,Taiw +1677-09-22,1677-09-22,1677-09-21 00:12:44,1677-09-21 00:12:43.145226,1912-01-01 +1960-01-01,1960-01-01,1960-01-01 00:00:00,1960-01-01 00:00:00.000000,1960-01-01 +2016-02-29,2016-02-29,2016-02-29 23:59:59,2016-02-29 23:59:59.123456,2016-02-29 +2262-04-11,2262-04-11,2262-04-11 23:47:16,2262-04-11 23:47:16.854774,2262-04-11 diff --git a/pandas/tests/io/sas/data/datetime.sas7bdat b/pandas/tests/io/sas/data/datetime.sas7bdat new file mode 100644 index 0000000000000000000000000000000000000000..6469dbf29f8eeeafb4703f01657eae4d0872ef10 GIT binary patch literal 131072 zcmeIy!E0Pa7y$6uG_|QjNibjzl`cUf(556Ypaq4jB(VY0M!VHk1REh>Al?e~Ac){W zXbvg`59Z)O`~zAqf?_Du9z_sAk>Ei*cn)gQHtWpnd)dueN>l`e--MU%n|a^Ny!pMC zgwSeNetF~U<$?aMK6|$(?7ud)GI+4Be`qL-4^_IxySK~sRCbJyS7OQIA@uF3lqDx? zC#y5FPfbtN>eKbvx#{uh-)nJxEzNx3`He%LeED7N<#$ef^l6NFpTRFCTQO~2i9fA& z^vLMM=-AxYa}STd_{jV-Ct7#@T9)>Hdf@%z<<&X+^6JQX8Y`c%^S(X-0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1pY4qmsUsC%V%BwJ!W0~;hq?(TOs^dY2F!#d2dnl)6Eb@i}CrG zUrgh%KM?cBiu~%05ax^g?U+v#`3q$p`@?Pd#%56y$Jg5Om5#hvCWYKSICOn2CCykp zyz*YSa`D@Ta?Aw>zs~n_zKdUzJ>N{GvSwWSd}8VQ+Q&a%%ds^c?{922>vF7qc;Ra0 ziMVf$xvsdMa*Y510t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oU&Z0$Vp4jdF#%W71Q0a(pvXVtg&!wPUn> z&VjH$+#j!2JD2AycdXPt=e}5+u2wsptLN`9XJ0Jd`D{~ppUKeOS|(=g`~3Ml6M4?z z&hs=Hjp6Xa>uI5Cy!M8Dv2Jhb#&)Zbc1XQS*xI(b8Rtx;+eORI)?LZZSmu54UYQS; zc~2$pQI41M<8qDBvJ)OGc7L>p$BMI`4e3lFyt?Dmx8mHlw&%t(bhKxe7SGogURs`7 zIJY&~2+o12OcKNIsbHlAXwK3(?fORp@ZetmXsdOOCtG=6NUS)1;q`uf7s`MKHS zPo6r_TrUnaTLenal3e~GQSne?gV*8ZM~q4IU*?jLC&^^5=j literal 0 HcmV?d00001 diff --git a/pandas/tests/io/sas/data/productsales.csv b/pandas/tests/io/sas/data/productsales.csv index fea9b68912297..1f6a4424e1a97 100644 --- a/pandas/tests/io/sas/data/productsales.csv +++ b/pandas/tests/io/sas/data/productsales.csv @@ -1,1441 +1,1441 @@ ACTUAL,PREDICT,COUNTRY,REGION,DIVISION,PRODTYPE,PRODUCT,QUARTER,YEAR,MONTH -925,850,CANADA,EAST,EDUCATION,FURNITURE,SOFA,1,1993,12054 -999,297,CANADA,EAST,EDUCATION,FURNITURE,SOFA,1,1993,12085 -608,846,CANADA,EAST,EDUCATION,FURNITURE,SOFA,1,1993,12113 -642,533,CANADA,EAST,EDUCATION,FURNITURE,SOFA,2,1993,12144 -656,646,CANADA,EAST,EDUCATION,FURNITURE,SOFA,2,1993,12174 -948,486,CANADA,EAST,EDUCATION,FURNITURE,SOFA,2,1993,12205 -612,717,CANADA,EAST,EDUCATION,FURNITURE,SOFA,3,1993,12235 -114,564,CANADA,EAST,EDUCATION,FURNITURE,SOFA,3,1993,12266 -685,230,CANADA,EAST,EDUCATION,FURNITURE,SOFA,3,1993,12297 -657,494,CANADA,EAST,EDUCATION,FURNITURE,SOFA,4,1993,12327 -608,903,CANADA,EAST,EDUCATION,FURNITURE,SOFA,4,1993,12358 -353,266,CANADA,EAST,EDUCATION,FURNITURE,SOFA,4,1993,12388 -107,190,CANADA,EAST,EDUCATION,FURNITURE,SOFA,1,1994,12419 -354,139,CANADA,EAST,EDUCATION,FURNITURE,SOFA,1,1994,12450 -101,217,CANADA,EAST,EDUCATION,FURNITURE,SOFA,1,1994,12478 -553,560,CANADA,EAST,EDUCATION,FURNITURE,SOFA,2,1994,12509 -877,148,CANADA,EAST,EDUCATION,FURNITURE,SOFA,2,1994,12539 -431,762,CANADA,EAST,EDUCATION,FURNITURE,SOFA,2,1994,12570 -511,457,CANADA,EAST,EDUCATION,FURNITURE,SOFA,3,1994,12600 -157,532,CANADA,EAST,EDUCATION,FURNITURE,SOFA,3,1994,12631 -520,629,CANADA,EAST,EDUCATION,FURNITURE,SOFA,3,1994,12662 -114,491,CANADA,EAST,EDUCATION,FURNITURE,SOFA,4,1994,12692 -277,0,CANADA,EAST,EDUCATION,FURNITURE,SOFA,4,1994,12723 -561,979,CANADA,EAST,EDUCATION,FURNITURE,SOFA,4,1994,12753 -220,585,CANADA,EAST,EDUCATION,FURNITURE,BED,1,1993,12054 -444,267,CANADA,EAST,EDUCATION,FURNITURE,BED,1,1993,12085 -178,487,CANADA,EAST,EDUCATION,FURNITURE,BED,1,1993,12113 -756,764,CANADA,EAST,EDUCATION,FURNITURE,BED,2,1993,12144 -329,312,CANADA,EAST,EDUCATION,FURNITURE,BED,2,1993,12174 -910,531,CANADA,EAST,EDUCATION,FURNITURE,BED,2,1993,12205 -530,536,CANADA,EAST,EDUCATION,FURNITURE,BED,3,1993,12235 -101,773,CANADA,EAST,EDUCATION,FURNITURE,BED,3,1993,12266 -515,143,CANADA,EAST,EDUCATION,FURNITURE,BED,3,1993,12297 -730,126,CANADA,EAST,EDUCATION,FURNITURE,BED,4,1993,12327 -993,862,CANADA,EAST,EDUCATION,FURNITURE,BED,4,1993,12358 -954,754,CANADA,EAST,EDUCATION,FURNITURE,BED,4,1993,12388 -267,410,CANADA,EAST,EDUCATION,FURNITURE,BED,1,1994,12419 -347,701,CANADA,EAST,EDUCATION,FURNITURE,BED,1,1994,12450 -991,204,CANADA,EAST,EDUCATION,FURNITURE,BED,1,1994,12478 -923,509,CANADA,EAST,EDUCATION,FURNITURE,BED,2,1994,12509 -437,378,CANADA,EAST,EDUCATION,FURNITURE,BED,2,1994,12539 -737,507,CANADA,EAST,EDUCATION,FURNITURE,BED,2,1994,12570 -104,49,CANADA,EAST,EDUCATION,FURNITURE,BED,3,1994,12600 -840,876,CANADA,EAST,EDUCATION,FURNITURE,BED,3,1994,12631 -704,66,CANADA,EAST,EDUCATION,FURNITURE,BED,3,1994,12662 -889,819,CANADA,EAST,EDUCATION,FURNITURE,BED,4,1994,12692 -107,351,CANADA,EAST,EDUCATION,FURNITURE,BED,4,1994,12723 -571,201,CANADA,EAST,EDUCATION,FURNITURE,BED,4,1994,12753 -688,209,CANADA,EAST,EDUCATION,OFFICE,TABLE,1,1993,12054 -544,51,CANADA,EAST,EDUCATION,OFFICE,TABLE,1,1993,12085 -954,135,CANADA,EAST,EDUCATION,OFFICE,TABLE,1,1993,12113 -445,47,CANADA,EAST,EDUCATION,OFFICE,TABLE,2,1993,12144 -829,379,CANADA,EAST,EDUCATION,OFFICE,TABLE,2,1993,12174 -464,758,CANADA,EAST,EDUCATION,OFFICE,TABLE,2,1993,12205 -968,475,CANADA,EAST,EDUCATION,OFFICE,TABLE,3,1993,12235 -842,343,CANADA,EAST,EDUCATION,OFFICE,TABLE,3,1993,12266 -721,507,CANADA,EAST,EDUCATION,OFFICE,TABLE,3,1993,12297 -966,269,CANADA,EAST,EDUCATION,OFFICE,TABLE,4,1993,12327 -332,699,CANADA,EAST,EDUCATION,OFFICE,TABLE,4,1993,12358 -328,824,CANADA,EAST,EDUCATION,OFFICE,TABLE,4,1993,12388 -355,497,CANADA,EAST,EDUCATION,OFFICE,TABLE,1,1994,12419 -506,44,CANADA,EAST,EDUCATION,OFFICE,TABLE,1,1994,12450 -585,522,CANADA,EAST,EDUCATION,OFFICE,TABLE,1,1994,12478 -634,378,CANADA,EAST,EDUCATION,OFFICE,TABLE,2,1994,12509 -662,689,CANADA,EAST,EDUCATION,OFFICE,TABLE,2,1994,12539 -783,90,CANADA,EAST,EDUCATION,OFFICE,TABLE,2,1994,12570 -786,720,CANADA,EAST,EDUCATION,OFFICE,TABLE,3,1994,12600 -710,343,CANADA,EAST,EDUCATION,OFFICE,TABLE,3,1994,12631 -950,457,CANADA,EAST,EDUCATION,OFFICE,TABLE,3,1994,12662 -274,947,CANADA,EAST,EDUCATION,OFFICE,TABLE,4,1994,12692 -406,834,CANADA,EAST,EDUCATION,OFFICE,TABLE,4,1994,12723 -515,71,CANADA,EAST,EDUCATION,OFFICE,TABLE,4,1994,12753 -35,282,CANADA,EAST,EDUCATION,OFFICE,CHAIR,1,1993,12054 -995,538,CANADA,EAST,EDUCATION,OFFICE,CHAIR,1,1993,12085 -670,679,CANADA,EAST,EDUCATION,OFFICE,CHAIR,1,1993,12113 -406,601,CANADA,EAST,EDUCATION,OFFICE,CHAIR,2,1993,12144 -825,577,CANADA,EAST,EDUCATION,OFFICE,CHAIR,2,1993,12174 -467,908,CANADA,EAST,EDUCATION,OFFICE,CHAIR,2,1993,12205 -709,819,CANADA,EAST,EDUCATION,OFFICE,CHAIR,3,1993,12235 -522,687,CANADA,EAST,EDUCATION,OFFICE,CHAIR,3,1993,12266 -688,157,CANADA,EAST,EDUCATION,OFFICE,CHAIR,3,1993,12297 -956,111,CANADA,EAST,EDUCATION,OFFICE,CHAIR,4,1993,12327 -129,31,CANADA,EAST,EDUCATION,OFFICE,CHAIR,4,1993,12358 -687,790,CANADA,EAST,EDUCATION,OFFICE,CHAIR,4,1993,12388 -877,795,CANADA,EAST,EDUCATION,OFFICE,CHAIR,1,1994,12419 -845,379,CANADA,EAST,EDUCATION,OFFICE,CHAIR,1,1994,12450 -425,114,CANADA,EAST,EDUCATION,OFFICE,CHAIR,1,1994,12478 -899,475,CANADA,EAST,EDUCATION,OFFICE,CHAIR,2,1994,12509 -987,747,CANADA,EAST,EDUCATION,OFFICE,CHAIR,2,1994,12539 -641,372,CANADA,EAST,EDUCATION,OFFICE,CHAIR,2,1994,12570 -448,415,CANADA,EAST,EDUCATION,OFFICE,CHAIR,3,1994,12600 -341,955,CANADA,EAST,EDUCATION,OFFICE,CHAIR,3,1994,12631 -137,356,CANADA,EAST,EDUCATION,OFFICE,CHAIR,3,1994,12662 -235,316,CANADA,EAST,EDUCATION,OFFICE,CHAIR,4,1994,12692 -482,351,CANADA,EAST,EDUCATION,OFFICE,CHAIR,4,1994,12723 -678,164,CANADA,EAST,EDUCATION,OFFICE,CHAIR,4,1994,12753 -240,386,CANADA,EAST,EDUCATION,OFFICE,DESK,1,1993,12054 -605,113,CANADA,EAST,EDUCATION,OFFICE,DESK,1,1993,12085 -274,68,CANADA,EAST,EDUCATION,OFFICE,DESK,1,1993,12113 -422,885,CANADA,EAST,EDUCATION,OFFICE,DESK,2,1993,12144 -763,575,CANADA,EAST,EDUCATION,OFFICE,DESK,2,1993,12174 -561,743,CANADA,EAST,EDUCATION,OFFICE,DESK,2,1993,12205 -339,816,CANADA,EAST,EDUCATION,OFFICE,DESK,3,1993,12235 -877,203,CANADA,EAST,EDUCATION,OFFICE,DESK,3,1993,12266 -192,581,CANADA,EAST,EDUCATION,OFFICE,DESK,3,1993,12297 -604,815,CANADA,EAST,EDUCATION,OFFICE,DESK,4,1993,12327 -55,333,CANADA,EAST,EDUCATION,OFFICE,DESK,4,1993,12358 -87,40,CANADA,EAST,EDUCATION,OFFICE,DESK,4,1993,12388 -942,672,CANADA,EAST,EDUCATION,OFFICE,DESK,1,1994,12419 -912,23,CANADA,EAST,EDUCATION,OFFICE,DESK,1,1994,12450 -768,948,CANADA,EAST,EDUCATION,OFFICE,DESK,1,1994,12478 -951,291,CANADA,EAST,EDUCATION,OFFICE,DESK,2,1994,12509 -768,839,CANADA,EAST,EDUCATION,OFFICE,DESK,2,1994,12539 -978,864,CANADA,EAST,EDUCATION,OFFICE,DESK,2,1994,12570 -20,337,CANADA,EAST,EDUCATION,OFFICE,DESK,3,1994,12600 -298,95,CANADA,EAST,EDUCATION,OFFICE,DESK,3,1994,12631 -193,535,CANADA,EAST,EDUCATION,OFFICE,DESK,3,1994,12662 -336,191,CANADA,EAST,EDUCATION,OFFICE,DESK,4,1994,12692 -617,412,CANADA,EAST,EDUCATION,OFFICE,DESK,4,1994,12723 -709,711,CANADA,EAST,EDUCATION,OFFICE,DESK,4,1994,12753 -5,425,CANADA,EAST,CONSUMER,FURNITURE,SOFA,1,1993,12054 -164,215,CANADA,EAST,CONSUMER,FURNITURE,SOFA,1,1993,12085 -422,948,CANADA,EAST,CONSUMER,FURNITURE,SOFA,1,1993,12113 -424,544,CANADA,EAST,CONSUMER,FURNITURE,SOFA,2,1993,12144 -854,764,CANADA,EAST,CONSUMER,FURNITURE,SOFA,2,1993,12174 -168,446,CANADA,EAST,CONSUMER,FURNITURE,SOFA,2,1993,12205 -8,957,CANADA,EAST,CONSUMER,FURNITURE,SOFA,3,1993,12235 -748,967,CANADA,EAST,CONSUMER,FURNITURE,SOFA,3,1993,12266 -682,11,CANADA,EAST,CONSUMER,FURNITURE,SOFA,3,1993,12297 -300,110,CANADA,EAST,CONSUMER,FURNITURE,SOFA,4,1993,12327 -672,263,CANADA,EAST,CONSUMER,FURNITURE,SOFA,4,1993,12358 -894,215,CANADA,EAST,CONSUMER,FURNITURE,SOFA,4,1993,12388 -944,965,CANADA,EAST,CONSUMER,FURNITURE,SOFA,1,1994,12419 -403,423,CANADA,EAST,CONSUMER,FURNITURE,SOFA,1,1994,12450 -596,753,CANADA,EAST,CONSUMER,FURNITURE,SOFA,1,1994,12478 -481,770,CANADA,EAST,CONSUMER,FURNITURE,SOFA,2,1994,12509 -503,263,CANADA,EAST,CONSUMER,FURNITURE,SOFA,2,1994,12539 -126,79,CANADA,EAST,CONSUMER,FURNITURE,SOFA,2,1994,12570 -721,441,CANADA,EAST,CONSUMER,FURNITURE,SOFA,3,1994,12600 -271,858,CANADA,EAST,CONSUMER,FURNITURE,SOFA,3,1994,12631 -721,667,CANADA,EAST,CONSUMER,FURNITURE,SOFA,3,1994,12662 -157,193,CANADA,EAST,CONSUMER,FURNITURE,SOFA,4,1994,12692 -991,394,CANADA,EAST,CONSUMER,FURNITURE,SOFA,4,1994,12723 -499,680,CANADA,EAST,CONSUMER,FURNITURE,SOFA,4,1994,12753 -284,414,CANADA,EAST,CONSUMER,FURNITURE,BED,1,1993,12054 -705,770,CANADA,EAST,CONSUMER,FURNITURE,BED,1,1993,12085 -737,679,CANADA,EAST,CONSUMER,FURNITURE,BED,1,1993,12113 -745,7,CANADA,EAST,CONSUMER,FURNITURE,BED,2,1993,12144 -633,713,CANADA,EAST,CONSUMER,FURNITURE,BED,2,1993,12174 -983,851,CANADA,EAST,CONSUMER,FURNITURE,BED,2,1993,12205 -591,944,CANADA,EAST,CONSUMER,FURNITURE,BED,3,1993,12235 -42,130,CANADA,EAST,CONSUMER,FURNITURE,BED,3,1993,12266 -771,485,CANADA,EAST,CONSUMER,FURNITURE,BED,3,1993,12297 -465,23,CANADA,EAST,CONSUMER,FURNITURE,BED,4,1993,12327 -296,193,CANADA,EAST,CONSUMER,FURNITURE,BED,4,1993,12358 -890,7,CANADA,EAST,CONSUMER,FURNITURE,BED,4,1993,12388 -312,919,CANADA,EAST,CONSUMER,FURNITURE,BED,1,1994,12419 -777,768,CANADA,EAST,CONSUMER,FURNITURE,BED,1,1994,12450 -364,854,CANADA,EAST,CONSUMER,FURNITURE,BED,1,1994,12478 -601,411,CANADA,EAST,CONSUMER,FURNITURE,BED,2,1994,12509 -823,736,CANADA,EAST,CONSUMER,FURNITURE,BED,2,1994,12539 -847,10,CANADA,EAST,CONSUMER,FURNITURE,BED,2,1994,12570 -490,311,CANADA,EAST,CONSUMER,FURNITURE,BED,3,1994,12600 -387,348,CANADA,EAST,CONSUMER,FURNITURE,BED,3,1994,12631 -688,458,CANADA,EAST,CONSUMER,FURNITURE,BED,3,1994,12662 -650,195,CANADA,EAST,CONSUMER,FURNITURE,BED,4,1994,12692 -447,658,CANADA,EAST,CONSUMER,FURNITURE,BED,4,1994,12723 -91,704,CANADA,EAST,CONSUMER,FURNITURE,BED,4,1994,12753 -197,807,CANADA,EAST,CONSUMER,OFFICE,TABLE,1,1993,12054 -51,861,CANADA,EAST,CONSUMER,OFFICE,TABLE,1,1993,12085 -570,873,CANADA,EAST,CONSUMER,OFFICE,TABLE,1,1993,12113 -423,933,CANADA,EAST,CONSUMER,OFFICE,TABLE,2,1993,12144 -524,355,CANADA,EAST,CONSUMER,OFFICE,TABLE,2,1993,12174 -416,794,CANADA,EAST,CONSUMER,OFFICE,TABLE,2,1993,12205 -789,645,CANADA,EAST,CONSUMER,OFFICE,TABLE,3,1993,12235 -551,700,CANADA,EAST,CONSUMER,OFFICE,TABLE,3,1993,12266 -400,831,CANADA,EAST,CONSUMER,OFFICE,TABLE,3,1993,12297 -361,800,CANADA,EAST,CONSUMER,OFFICE,TABLE,4,1993,12327 -189,830,CANADA,EAST,CONSUMER,OFFICE,TABLE,4,1993,12358 -554,828,CANADA,EAST,CONSUMER,OFFICE,TABLE,4,1993,12388 -585,12,CANADA,EAST,CONSUMER,OFFICE,TABLE,1,1994,12419 -281,501,CANADA,EAST,CONSUMER,OFFICE,TABLE,1,1994,12450 -629,914,CANADA,EAST,CONSUMER,OFFICE,TABLE,1,1994,12478 -43,685,CANADA,EAST,CONSUMER,OFFICE,TABLE,2,1994,12509 -533,755,CANADA,EAST,CONSUMER,OFFICE,TABLE,2,1994,12539 -882,708,CANADA,EAST,CONSUMER,OFFICE,TABLE,2,1994,12570 -790,595,CANADA,EAST,CONSUMER,OFFICE,TABLE,3,1994,12600 -600,32,CANADA,EAST,CONSUMER,OFFICE,TABLE,3,1994,12631 -148,49,CANADA,EAST,CONSUMER,OFFICE,TABLE,3,1994,12662 -237,727,CANADA,EAST,CONSUMER,OFFICE,TABLE,4,1994,12692 -488,239,CANADA,EAST,CONSUMER,OFFICE,TABLE,4,1994,12723 -457,273,CANADA,EAST,CONSUMER,OFFICE,TABLE,4,1994,12753 -401,986,CANADA,EAST,CONSUMER,OFFICE,CHAIR,1,1993,12054 -181,544,CANADA,EAST,CONSUMER,OFFICE,CHAIR,1,1993,12085 -995,182,CANADA,EAST,CONSUMER,OFFICE,CHAIR,1,1993,12113 -120,197,CANADA,EAST,CONSUMER,OFFICE,CHAIR,2,1993,12144 -119,435,CANADA,EAST,CONSUMER,OFFICE,CHAIR,2,1993,12174 -319,974,CANADA,EAST,CONSUMER,OFFICE,CHAIR,2,1993,12205 -333,524,CANADA,EAST,CONSUMER,OFFICE,CHAIR,3,1993,12235 -923,688,CANADA,EAST,CONSUMER,OFFICE,CHAIR,3,1993,12266 -634,750,CANADA,EAST,CONSUMER,OFFICE,CHAIR,3,1993,12297 -493,155,CANADA,EAST,CONSUMER,OFFICE,CHAIR,4,1993,12327 -461,860,CANADA,EAST,CONSUMER,OFFICE,CHAIR,4,1993,12358 -304,102,CANADA,EAST,CONSUMER,OFFICE,CHAIR,4,1993,12388 -641,425,CANADA,EAST,CONSUMER,OFFICE,CHAIR,1,1994,12419 -992,224,CANADA,EAST,CONSUMER,OFFICE,CHAIR,1,1994,12450 -202,408,CANADA,EAST,CONSUMER,OFFICE,CHAIR,1,1994,12478 -770,524,CANADA,EAST,CONSUMER,OFFICE,CHAIR,2,1994,12509 -202,816,CANADA,EAST,CONSUMER,OFFICE,CHAIR,2,1994,12539 -14,515,CANADA,EAST,CONSUMER,OFFICE,CHAIR,2,1994,12570 -134,793,CANADA,EAST,CONSUMER,OFFICE,CHAIR,3,1994,12600 -977,460,CANADA,EAST,CONSUMER,OFFICE,CHAIR,3,1994,12631 -174,732,CANADA,EAST,CONSUMER,OFFICE,CHAIR,3,1994,12662 -429,435,CANADA,EAST,CONSUMER,OFFICE,CHAIR,4,1994,12692 -514,38,CANADA,EAST,CONSUMER,OFFICE,CHAIR,4,1994,12723 -784,616,CANADA,EAST,CONSUMER,OFFICE,CHAIR,4,1994,12753 -973,225,CANADA,EAST,CONSUMER,OFFICE,DESK,1,1993,12054 -511,402,CANADA,EAST,CONSUMER,OFFICE,DESK,1,1993,12085 -30,697,CANADA,EAST,CONSUMER,OFFICE,DESK,1,1993,12113 -895,567,CANADA,EAST,CONSUMER,OFFICE,DESK,2,1993,12144 -557,231,CANADA,EAST,CONSUMER,OFFICE,DESK,2,1993,12174 -282,372,CANADA,EAST,CONSUMER,OFFICE,DESK,2,1993,12205 -909,15,CANADA,EAST,CONSUMER,OFFICE,DESK,3,1993,12235 -276,866,CANADA,EAST,CONSUMER,OFFICE,DESK,3,1993,12266 -234,452,CANADA,EAST,CONSUMER,OFFICE,DESK,3,1993,12297 -479,663,CANADA,EAST,CONSUMER,OFFICE,DESK,4,1993,12327 -782,982,CANADA,EAST,CONSUMER,OFFICE,DESK,4,1993,12358 -755,813,CANADA,EAST,CONSUMER,OFFICE,DESK,4,1993,12388 -689,523,CANADA,EAST,CONSUMER,OFFICE,DESK,1,1994,12419 -496,871,CANADA,EAST,CONSUMER,OFFICE,DESK,1,1994,12450 -24,511,CANADA,EAST,CONSUMER,OFFICE,DESK,1,1994,12478 -379,819,CANADA,EAST,CONSUMER,OFFICE,DESK,2,1994,12509 -441,525,CANADA,EAST,CONSUMER,OFFICE,DESK,2,1994,12539 -49,13,CANADA,EAST,CONSUMER,OFFICE,DESK,2,1994,12570 -243,694,CANADA,EAST,CONSUMER,OFFICE,DESK,3,1994,12600 -295,782,CANADA,EAST,CONSUMER,OFFICE,DESK,3,1994,12631 -395,839,CANADA,EAST,CONSUMER,OFFICE,DESK,3,1994,12662 -929,461,CANADA,EAST,CONSUMER,OFFICE,DESK,4,1994,12692 -997,303,CANADA,EAST,CONSUMER,OFFICE,DESK,4,1994,12723 -889,421,CANADA,EAST,CONSUMER,OFFICE,DESK,4,1994,12753 -72,421,CANADA,WEST,EDUCATION,FURNITURE,SOFA,1,1993,12054 -926,433,CANADA,WEST,EDUCATION,FURNITURE,SOFA,1,1993,12085 -850,394,CANADA,WEST,EDUCATION,FURNITURE,SOFA,1,1993,12113 -826,338,CANADA,WEST,EDUCATION,FURNITURE,SOFA,2,1993,12144 -651,764,CANADA,WEST,EDUCATION,FURNITURE,SOFA,2,1993,12174 -854,216,CANADA,WEST,EDUCATION,FURNITURE,SOFA,2,1993,12205 -899,96,CANADA,WEST,EDUCATION,FURNITURE,SOFA,3,1993,12235 -309,550,CANADA,WEST,EDUCATION,FURNITURE,SOFA,3,1993,12266 -943,636,CANADA,WEST,EDUCATION,FURNITURE,SOFA,3,1993,12297 -138,427,CANADA,WEST,EDUCATION,FURNITURE,SOFA,4,1993,12327 -99,652,CANADA,WEST,EDUCATION,FURNITURE,SOFA,4,1993,12358 -270,478,CANADA,WEST,EDUCATION,FURNITURE,SOFA,4,1993,12388 -862,18,CANADA,WEST,EDUCATION,FURNITURE,SOFA,1,1994,12419 -574,40,CANADA,WEST,EDUCATION,FURNITURE,SOFA,1,1994,12450 -359,453,CANADA,WEST,EDUCATION,FURNITURE,SOFA,1,1994,12478 -958,987,CANADA,WEST,EDUCATION,FURNITURE,SOFA,2,1994,12509 -791,26,CANADA,WEST,EDUCATION,FURNITURE,SOFA,2,1994,12539 -284,101,CANADA,WEST,EDUCATION,FURNITURE,SOFA,2,1994,12570 -190,969,CANADA,WEST,EDUCATION,FURNITURE,SOFA,3,1994,12600 -527,492,CANADA,WEST,EDUCATION,FURNITURE,SOFA,3,1994,12631 -112,263,CANADA,WEST,EDUCATION,FURNITURE,SOFA,3,1994,12662 -271,593,CANADA,WEST,EDUCATION,FURNITURE,SOFA,4,1994,12692 -643,923,CANADA,WEST,EDUCATION,FURNITURE,SOFA,4,1994,12723 -554,146,CANADA,WEST,EDUCATION,FURNITURE,SOFA,4,1994,12753 -211,305,CANADA,WEST,EDUCATION,FURNITURE,BED,1,1993,12054 -368,318,CANADA,WEST,EDUCATION,FURNITURE,BED,1,1993,12085 -778,417,CANADA,WEST,EDUCATION,FURNITURE,BED,1,1993,12113 -808,623,CANADA,WEST,EDUCATION,FURNITURE,BED,2,1993,12144 -46,761,CANADA,WEST,EDUCATION,FURNITURE,BED,2,1993,12174 -466,272,CANADA,WEST,EDUCATION,FURNITURE,BED,2,1993,12205 -18,988,CANADA,WEST,EDUCATION,FURNITURE,BED,3,1993,12235 -87,821,CANADA,WEST,EDUCATION,FURNITURE,BED,3,1993,12266 -765,962,CANADA,WEST,EDUCATION,FURNITURE,BED,3,1993,12297 -62,615,CANADA,WEST,EDUCATION,FURNITURE,BED,4,1993,12327 -13,523,CANADA,WEST,EDUCATION,FURNITURE,BED,4,1993,12358 -775,806,CANADA,WEST,EDUCATION,FURNITURE,BED,4,1993,12388 -636,586,CANADA,WEST,EDUCATION,FURNITURE,BED,1,1994,12419 -458,520,CANADA,WEST,EDUCATION,FURNITURE,BED,1,1994,12450 -206,908,CANADA,WEST,EDUCATION,FURNITURE,BED,1,1994,12478 -310,30,CANADA,WEST,EDUCATION,FURNITURE,BED,2,1994,12509 -813,247,CANADA,WEST,EDUCATION,FURNITURE,BED,2,1994,12539 -22,647,CANADA,WEST,EDUCATION,FURNITURE,BED,2,1994,12570 -742,55,CANADA,WEST,EDUCATION,FURNITURE,BED,3,1994,12600 -394,154,CANADA,WEST,EDUCATION,FURNITURE,BED,3,1994,12631 -957,344,CANADA,WEST,EDUCATION,FURNITURE,BED,3,1994,12662 -205,95,CANADA,WEST,EDUCATION,FURNITURE,BED,4,1994,12692 -198,665,CANADA,WEST,EDUCATION,FURNITURE,BED,4,1994,12723 -638,145,CANADA,WEST,EDUCATION,FURNITURE,BED,4,1994,12753 -155,925,CANADA,WEST,EDUCATION,OFFICE,TABLE,1,1993,12054 -688,395,CANADA,WEST,EDUCATION,OFFICE,TABLE,1,1993,12085 -730,749,CANADA,WEST,EDUCATION,OFFICE,TABLE,1,1993,12113 -208,279,CANADA,WEST,EDUCATION,OFFICE,TABLE,2,1993,12144 -525,288,CANADA,WEST,EDUCATION,OFFICE,TABLE,2,1993,12174 -483,509,CANADA,WEST,EDUCATION,OFFICE,TABLE,2,1993,12205 -748,255,CANADA,WEST,EDUCATION,OFFICE,TABLE,3,1993,12235 -6,214,CANADA,WEST,EDUCATION,OFFICE,TABLE,3,1993,12266 -168,473,CANADA,WEST,EDUCATION,OFFICE,TABLE,3,1993,12297 -301,702,CANADA,WEST,EDUCATION,OFFICE,TABLE,4,1993,12327 -9,814,CANADA,WEST,EDUCATION,OFFICE,TABLE,4,1993,12358 -778,231,CANADA,WEST,EDUCATION,OFFICE,TABLE,4,1993,12388 -799,422,CANADA,WEST,EDUCATION,OFFICE,TABLE,1,1994,12419 -309,572,CANADA,WEST,EDUCATION,OFFICE,TABLE,1,1994,12450 -433,363,CANADA,WEST,EDUCATION,OFFICE,TABLE,1,1994,12478 -969,919,CANADA,WEST,EDUCATION,OFFICE,TABLE,2,1994,12509 -181,355,CANADA,WEST,EDUCATION,OFFICE,TABLE,2,1994,12539 -787,992,CANADA,WEST,EDUCATION,OFFICE,TABLE,2,1994,12570 -971,147,CANADA,WEST,EDUCATION,OFFICE,TABLE,3,1994,12600 -440,183,CANADA,WEST,EDUCATION,OFFICE,TABLE,3,1994,12631 -209,375,CANADA,WEST,EDUCATION,OFFICE,TABLE,3,1994,12662 -537,77,CANADA,WEST,EDUCATION,OFFICE,TABLE,4,1994,12692 -364,308,CANADA,WEST,EDUCATION,OFFICE,TABLE,4,1994,12723 -377,660,CANADA,WEST,EDUCATION,OFFICE,TABLE,4,1994,12753 -251,555,CANADA,WEST,EDUCATION,OFFICE,CHAIR,1,1993,12054 -607,455,CANADA,WEST,EDUCATION,OFFICE,CHAIR,1,1993,12085 -127,888,CANADA,WEST,EDUCATION,OFFICE,CHAIR,1,1993,12113 -513,652,CANADA,WEST,EDUCATION,OFFICE,CHAIR,2,1993,12144 -146,799,CANADA,WEST,EDUCATION,OFFICE,CHAIR,2,1993,12174 -917,249,CANADA,WEST,EDUCATION,OFFICE,CHAIR,2,1993,12205 -776,539,CANADA,WEST,EDUCATION,OFFICE,CHAIR,3,1993,12235 -330,198,CANADA,WEST,EDUCATION,OFFICE,CHAIR,3,1993,12266 -981,340,CANADA,WEST,EDUCATION,OFFICE,CHAIR,3,1993,12297 -862,152,CANADA,WEST,EDUCATION,OFFICE,CHAIR,4,1993,12327 -612,347,CANADA,WEST,EDUCATION,OFFICE,CHAIR,4,1993,12358 -607,565,CANADA,WEST,EDUCATION,OFFICE,CHAIR,4,1993,12388 -786,855,CANADA,WEST,EDUCATION,OFFICE,CHAIR,1,1994,12419 -160,87,CANADA,WEST,EDUCATION,OFFICE,CHAIR,1,1994,12450 -199,69,CANADA,WEST,EDUCATION,OFFICE,CHAIR,1,1994,12478 -972,807,CANADA,WEST,EDUCATION,OFFICE,CHAIR,2,1994,12509 -870,565,CANADA,WEST,EDUCATION,OFFICE,CHAIR,2,1994,12539 -494,798,CANADA,WEST,EDUCATION,OFFICE,CHAIR,2,1994,12570 -975,714,CANADA,WEST,EDUCATION,OFFICE,CHAIR,3,1994,12600 -760,17,CANADA,WEST,EDUCATION,OFFICE,CHAIR,3,1994,12631 -180,797,CANADA,WEST,EDUCATION,OFFICE,CHAIR,3,1994,12662 -256,422,CANADA,WEST,EDUCATION,OFFICE,CHAIR,4,1994,12692 -422,621,CANADA,WEST,EDUCATION,OFFICE,CHAIR,4,1994,12723 -859,661,CANADA,WEST,EDUCATION,OFFICE,CHAIR,4,1994,12753 -586,363,CANADA,WEST,EDUCATION,OFFICE,DESK,1,1993,12054 -441,910,CANADA,WEST,EDUCATION,OFFICE,DESK,1,1993,12085 -597,998,CANADA,WEST,EDUCATION,OFFICE,DESK,1,1993,12113 -717,95,CANADA,WEST,EDUCATION,OFFICE,DESK,2,1993,12144 -713,731,CANADA,WEST,EDUCATION,OFFICE,DESK,2,1993,12174 -591,718,CANADA,WEST,EDUCATION,OFFICE,DESK,2,1993,12205 -492,467,CANADA,WEST,EDUCATION,OFFICE,DESK,3,1993,12235 -170,126,CANADA,WEST,EDUCATION,OFFICE,DESK,3,1993,12266 -684,127,CANADA,WEST,EDUCATION,OFFICE,DESK,3,1993,12297 -981,746,CANADA,WEST,EDUCATION,OFFICE,DESK,4,1993,12327 -966,878,CANADA,WEST,EDUCATION,OFFICE,DESK,4,1993,12358 -439,27,CANADA,WEST,EDUCATION,OFFICE,DESK,4,1993,12388 -151,569,CANADA,WEST,EDUCATION,OFFICE,DESK,1,1994,12419 -602,812,CANADA,WEST,EDUCATION,OFFICE,DESK,1,1994,12450 -187,603,CANADA,WEST,EDUCATION,OFFICE,DESK,1,1994,12478 -415,506,CANADA,WEST,EDUCATION,OFFICE,DESK,2,1994,12509 -61,185,CANADA,WEST,EDUCATION,OFFICE,DESK,2,1994,12539 -839,692,CANADA,WEST,EDUCATION,OFFICE,DESK,2,1994,12570 -596,565,CANADA,WEST,EDUCATION,OFFICE,DESK,3,1994,12600 -751,512,CANADA,WEST,EDUCATION,OFFICE,DESK,3,1994,12631 -460,86,CANADA,WEST,EDUCATION,OFFICE,DESK,3,1994,12662 -922,399,CANADA,WEST,EDUCATION,OFFICE,DESK,4,1994,12692 -153,672,CANADA,WEST,EDUCATION,OFFICE,DESK,4,1994,12723 -928,801,CANADA,WEST,EDUCATION,OFFICE,DESK,4,1994,12753 -951,730,CANADA,WEST,CONSUMER,FURNITURE,SOFA,1,1993,12054 -394,408,CANADA,WEST,CONSUMER,FURNITURE,SOFA,1,1993,12085 -615,982,CANADA,WEST,CONSUMER,FURNITURE,SOFA,1,1993,12113 -653,499,CANADA,WEST,CONSUMER,FURNITURE,SOFA,2,1993,12144 -180,307,CANADA,WEST,CONSUMER,FURNITURE,SOFA,2,1993,12174 -649,741,CANADA,WEST,CONSUMER,FURNITURE,SOFA,2,1993,12205 -921,640,CANADA,WEST,CONSUMER,FURNITURE,SOFA,3,1993,12235 -11,300,CANADA,WEST,CONSUMER,FURNITURE,SOFA,3,1993,12266 -696,929,CANADA,WEST,CONSUMER,FURNITURE,SOFA,3,1993,12297 -795,309,CANADA,WEST,CONSUMER,FURNITURE,SOFA,4,1993,12327 -550,340,CANADA,WEST,CONSUMER,FURNITURE,SOFA,4,1993,12358 -320,228,CANADA,WEST,CONSUMER,FURNITURE,SOFA,4,1993,12388 -845,1000,CANADA,WEST,CONSUMER,FURNITURE,SOFA,1,1994,12419 -245,21,CANADA,WEST,CONSUMER,FURNITURE,SOFA,1,1994,12450 -142,583,CANADA,WEST,CONSUMER,FURNITURE,SOFA,1,1994,12478 -717,506,CANADA,WEST,CONSUMER,FURNITURE,SOFA,2,1994,12509 -3,405,CANADA,WEST,CONSUMER,FURNITURE,SOFA,2,1994,12539 -790,556,CANADA,WEST,CONSUMER,FURNITURE,SOFA,2,1994,12570 -646,72,CANADA,WEST,CONSUMER,FURNITURE,SOFA,3,1994,12600 -230,103,CANADA,WEST,CONSUMER,FURNITURE,SOFA,3,1994,12631 -938,262,CANADA,WEST,CONSUMER,FURNITURE,SOFA,3,1994,12662 -629,102,CANADA,WEST,CONSUMER,FURNITURE,SOFA,4,1994,12692 -317,841,CANADA,WEST,CONSUMER,FURNITURE,SOFA,4,1994,12723 -812,159,CANADA,WEST,CONSUMER,FURNITURE,SOFA,4,1994,12753 -141,570,CANADA,WEST,CONSUMER,FURNITURE,BED,1,1993,12054 -64,375,CANADA,WEST,CONSUMER,FURNITURE,BED,1,1993,12085 -207,298,CANADA,WEST,CONSUMER,FURNITURE,BED,1,1993,12113 -435,32,CANADA,WEST,CONSUMER,FURNITURE,BED,2,1993,12144 -96,760,CANADA,WEST,CONSUMER,FURNITURE,BED,2,1993,12174 -252,338,CANADA,WEST,CONSUMER,FURNITURE,BED,2,1993,12205 -956,149,CANADA,WEST,CONSUMER,FURNITURE,BED,3,1993,12235 -633,343,CANADA,WEST,CONSUMER,FURNITURE,BED,3,1993,12266 -190,151,CANADA,WEST,CONSUMER,FURNITURE,BED,3,1993,12297 -227,44,CANADA,WEST,CONSUMER,FURNITURE,BED,4,1993,12327 -24,583,CANADA,WEST,CONSUMER,FURNITURE,BED,4,1993,12358 -420,230,CANADA,WEST,CONSUMER,FURNITURE,BED,4,1993,12388 -910,907,CANADA,WEST,CONSUMER,FURNITURE,BED,1,1994,12419 -709,783,CANADA,WEST,CONSUMER,FURNITURE,BED,1,1994,12450 -810,117,CANADA,WEST,CONSUMER,FURNITURE,BED,1,1994,12478 -723,416,CANADA,WEST,CONSUMER,FURNITURE,BED,2,1994,12509 -911,318,CANADA,WEST,CONSUMER,FURNITURE,BED,2,1994,12539 -230,888,CANADA,WEST,CONSUMER,FURNITURE,BED,2,1994,12570 -448,60,CANADA,WEST,CONSUMER,FURNITURE,BED,3,1994,12600 -945,596,CANADA,WEST,CONSUMER,FURNITURE,BED,3,1994,12631 -508,576,CANADA,WEST,CONSUMER,FURNITURE,BED,3,1994,12662 -262,576,CANADA,WEST,CONSUMER,FURNITURE,BED,4,1994,12692 -441,280,CANADA,WEST,CONSUMER,FURNITURE,BED,4,1994,12723 -15,219,CANADA,WEST,CONSUMER,FURNITURE,BED,4,1994,12753 -795,133,CANADA,WEST,CONSUMER,OFFICE,TABLE,1,1993,12054 -301,273,CANADA,WEST,CONSUMER,OFFICE,TABLE,1,1993,12085 -304,86,CANADA,WEST,CONSUMER,OFFICE,TABLE,1,1993,12113 -49,400,CANADA,WEST,CONSUMER,OFFICE,TABLE,2,1993,12144 -576,364,CANADA,WEST,CONSUMER,OFFICE,TABLE,2,1993,12174 -669,63,CANADA,WEST,CONSUMER,OFFICE,TABLE,2,1993,12205 -325,929,CANADA,WEST,CONSUMER,OFFICE,TABLE,3,1993,12235 -272,344,CANADA,WEST,CONSUMER,OFFICE,TABLE,3,1993,12266 -80,768,CANADA,WEST,CONSUMER,OFFICE,TABLE,3,1993,12297 -46,668,CANADA,WEST,CONSUMER,OFFICE,TABLE,4,1993,12327 -223,407,CANADA,WEST,CONSUMER,OFFICE,TABLE,4,1993,12358 -774,536,CANADA,WEST,CONSUMER,OFFICE,TABLE,4,1993,12388 -784,657,CANADA,WEST,CONSUMER,OFFICE,TABLE,1,1994,12419 -92,215,CANADA,WEST,CONSUMER,OFFICE,TABLE,1,1994,12450 -67,966,CANADA,WEST,CONSUMER,OFFICE,TABLE,1,1994,12478 -747,674,CANADA,WEST,CONSUMER,OFFICE,TABLE,2,1994,12509 -686,574,CANADA,WEST,CONSUMER,OFFICE,TABLE,2,1994,12539 -93,266,CANADA,WEST,CONSUMER,OFFICE,TABLE,2,1994,12570 -192,680,CANADA,WEST,CONSUMER,OFFICE,TABLE,3,1994,12600 -51,362,CANADA,WEST,CONSUMER,OFFICE,TABLE,3,1994,12631 -498,412,CANADA,WEST,CONSUMER,OFFICE,TABLE,3,1994,12662 -546,431,CANADA,WEST,CONSUMER,OFFICE,TABLE,4,1994,12692 -485,94,CANADA,WEST,CONSUMER,OFFICE,TABLE,4,1994,12723 -925,345,CANADA,WEST,CONSUMER,OFFICE,TABLE,4,1994,12753 -292,445,CANADA,WEST,CONSUMER,OFFICE,CHAIR,1,1993,12054 -540,632,CANADA,WEST,CONSUMER,OFFICE,CHAIR,1,1993,12085 -21,855,CANADA,WEST,CONSUMER,OFFICE,CHAIR,1,1993,12113 -100,36,CANADA,WEST,CONSUMER,OFFICE,CHAIR,2,1993,12144 -49,250,CANADA,WEST,CONSUMER,OFFICE,CHAIR,2,1993,12174 -353,427,CANADA,WEST,CONSUMER,OFFICE,CHAIR,2,1993,12205 -911,367,CANADA,WEST,CONSUMER,OFFICE,CHAIR,3,1993,12235 -823,245,CANADA,WEST,CONSUMER,OFFICE,CHAIR,3,1993,12266 -278,893,CANADA,WEST,CONSUMER,OFFICE,CHAIR,3,1993,12297 -576,490,CANADA,WEST,CONSUMER,OFFICE,CHAIR,4,1993,12327 -655,88,CANADA,WEST,CONSUMER,OFFICE,CHAIR,4,1993,12358 -763,964,CANADA,WEST,CONSUMER,OFFICE,CHAIR,4,1993,12388 -88,62,CANADA,WEST,CONSUMER,OFFICE,CHAIR,1,1994,12419 -746,506,CANADA,WEST,CONSUMER,OFFICE,CHAIR,1,1994,12450 -927,680,CANADA,WEST,CONSUMER,OFFICE,CHAIR,1,1994,12478 -297,153,CANADA,WEST,CONSUMER,OFFICE,CHAIR,2,1994,12509 -291,403,CANADA,WEST,CONSUMER,OFFICE,CHAIR,2,1994,12539 -838,98,CANADA,WEST,CONSUMER,OFFICE,CHAIR,2,1994,12570 -112,376,CANADA,WEST,CONSUMER,OFFICE,CHAIR,3,1994,12600 -509,477,CANADA,WEST,CONSUMER,OFFICE,CHAIR,3,1994,12631 -472,50,CANADA,WEST,CONSUMER,OFFICE,CHAIR,3,1994,12662 -495,592,CANADA,WEST,CONSUMER,OFFICE,CHAIR,4,1994,12692 -1000,813,CANADA,WEST,CONSUMER,OFFICE,CHAIR,4,1994,12723 -241,740,CANADA,WEST,CONSUMER,OFFICE,CHAIR,4,1994,12753 -693,873,CANADA,WEST,CONSUMER,OFFICE,DESK,1,1993,12054 -903,459,CANADA,WEST,CONSUMER,OFFICE,DESK,1,1993,12085 -791,224,CANADA,WEST,CONSUMER,OFFICE,DESK,1,1993,12113 -108,562,CANADA,WEST,CONSUMER,OFFICE,DESK,2,1993,12144 -845,199,CANADA,WEST,CONSUMER,OFFICE,DESK,2,1993,12174 -452,275,CANADA,WEST,CONSUMER,OFFICE,DESK,2,1993,12205 -479,355,CANADA,WEST,CONSUMER,OFFICE,DESK,3,1993,12235 -410,947,CANADA,WEST,CONSUMER,OFFICE,DESK,3,1993,12266 -379,454,CANADA,WEST,CONSUMER,OFFICE,DESK,3,1993,12297 -740,450,CANADA,WEST,CONSUMER,OFFICE,DESK,4,1993,12327 -471,575,CANADA,WEST,CONSUMER,OFFICE,DESK,4,1993,12358 -325,6,CANADA,WEST,CONSUMER,OFFICE,DESK,4,1993,12388 -455,847,CANADA,WEST,CONSUMER,OFFICE,DESK,1,1994,12419 -563,338,CANADA,WEST,CONSUMER,OFFICE,DESK,1,1994,12450 -879,517,CANADA,WEST,CONSUMER,OFFICE,DESK,1,1994,12478 -312,630,CANADA,WEST,CONSUMER,OFFICE,DESK,2,1994,12509 -587,381,CANADA,WEST,CONSUMER,OFFICE,DESK,2,1994,12539 -628,864,CANADA,WEST,CONSUMER,OFFICE,DESK,2,1994,12570 -486,416,CANADA,WEST,CONSUMER,OFFICE,DESK,3,1994,12600 -811,852,CANADA,WEST,CONSUMER,OFFICE,DESK,3,1994,12631 -990,815,CANADA,WEST,CONSUMER,OFFICE,DESK,3,1994,12662 -35,23,CANADA,WEST,CONSUMER,OFFICE,DESK,4,1994,12692 -764,527,CANADA,WEST,CONSUMER,OFFICE,DESK,4,1994,12723 -619,693,CANADA,WEST,CONSUMER,OFFICE,DESK,4,1994,12753 -996,977,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,1,1993,12054 -554,549,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,1,1993,12085 -540,951,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,1,1993,12113 -140,390,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,2,1993,12144 -554,204,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,2,1993,12174 -724,78,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,2,1993,12205 -693,613,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,3,1993,12235 -866,745,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,3,1993,12266 -833,56,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,3,1993,12297 -164,887,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,4,1993,12327 -753,651,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,4,1993,12358 -60,691,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,4,1993,12388 -688,767,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,1,1994,12419 -883,709,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,1,1994,12450 -109,417,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,1,1994,12478 -950,326,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,2,1994,12509 -438,599,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,2,1994,12539 -286,818,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,2,1994,12570 -342,13,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,3,1994,12600 -383,185,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,3,1994,12631 -80,140,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,3,1994,12662 -322,717,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,4,1994,12692 -749,852,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,4,1994,12723 -606,125,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,4,1994,12753 -641,325,GERMANY,EAST,EDUCATION,FURNITURE,BED,1,1993,12054 -494,648,GERMANY,EAST,EDUCATION,FURNITURE,BED,1,1993,12085 -428,365,GERMANY,EAST,EDUCATION,FURNITURE,BED,1,1993,12113 -936,120,GERMANY,EAST,EDUCATION,FURNITURE,BED,2,1993,12144 -597,347,GERMANY,EAST,EDUCATION,FURNITURE,BED,2,1993,12174 -728,638,GERMANY,EAST,EDUCATION,FURNITURE,BED,2,1993,12205 -933,732,GERMANY,EAST,EDUCATION,FURNITURE,BED,3,1993,12235 -663,465,GERMANY,EAST,EDUCATION,FURNITURE,BED,3,1993,12266 -394,262,GERMANY,EAST,EDUCATION,FURNITURE,BED,3,1993,12297 -334,947,GERMANY,EAST,EDUCATION,FURNITURE,BED,4,1993,12327 -114,694,GERMANY,EAST,EDUCATION,FURNITURE,BED,4,1993,12358 -89,482,GERMANY,EAST,EDUCATION,FURNITURE,BED,4,1993,12388 -874,600,GERMANY,EAST,EDUCATION,FURNITURE,BED,1,1994,12419 -674,94,GERMANY,EAST,EDUCATION,FURNITURE,BED,1,1994,12450 -347,323,GERMANY,EAST,EDUCATION,FURNITURE,BED,1,1994,12478 -105,49,GERMANY,EAST,EDUCATION,FURNITURE,BED,2,1994,12509 -286,70,GERMANY,EAST,EDUCATION,FURNITURE,BED,2,1994,12539 -669,844,GERMANY,EAST,EDUCATION,FURNITURE,BED,2,1994,12570 -786,773,GERMANY,EAST,EDUCATION,FURNITURE,BED,3,1994,12600 -104,68,GERMANY,EAST,EDUCATION,FURNITURE,BED,3,1994,12631 -770,110,GERMANY,EAST,EDUCATION,FURNITURE,BED,3,1994,12662 -263,42,GERMANY,EAST,EDUCATION,FURNITURE,BED,4,1994,12692 -900,171,GERMANY,EAST,EDUCATION,FURNITURE,BED,4,1994,12723 -630,644,GERMANY,EAST,EDUCATION,FURNITURE,BED,4,1994,12753 -597,408,GERMANY,EAST,EDUCATION,OFFICE,TABLE,1,1993,12054 -185,45,GERMANY,EAST,EDUCATION,OFFICE,TABLE,1,1993,12085 -175,522,GERMANY,EAST,EDUCATION,OFFICE,TABLE,1,1993,12113 -576,166,GERMANY,EAST,EDUCATION,OFFICE,TABLE,2,1993,12144 -957,885,GERMANY,EAST,EDUCATION,OFFICE,TABLE,2,1993,12174 -993,713,GERMANY,EAST,EDUCATION,OFFICE,TABLE,2,1993,12205 -500,838,GERMANY,EAST,EDUCATION,OFFICE,TABLE,3,1993,12235 -410,267,GERMANY,EAST,EDUCATION,OFFICE,TABLE,3,1993,12266 -592,967,GERMANY,EAST,EDUCATION,OFFICE,TABLE,3,1993,12297 -64,529,GERMANY,EAST,EDUCATION,OFFICE,TABLE,4,1993,12327 -208,656,GERMANY,EAST,EDUCATION,OFFICE,TABLE,4,1993,12358 -273,665,GERMANY,EAST,EDUCATION,OFFICE,TABLE,4,1993,12388 -906,419,GERMANY,EAST,EDUCATION,OFFICE,TABLE,1,1994,12419 -429,776,GERMANY,EAST,EDUCATION,OFFICE,TABLE,1,1994,12450 -961,971,GERMANY,EAST,EDUCATION,OFFICE,TABLE,1,1994,12478 -338,248,GERMANY,EAST,EDUCATION,OFFICE,TABLE,2,1994,12509 -472,486,GERMANY,EAST,EDUCATION,OFFICE,TABLE,2,1994,12539 -903,674,GERMANY,EAST,EDUCATION,OFFICE,TABLE,2,1994,12570 -299,603,GERMANY,EAST,EDUCATION,OFFICE,TABLE,3,1994,12600 -948,492,GERMANY,EAST,EDUCATION,OFFICE,TABLE,3,1994,12631 -931,512,GERMANY,EAST,EDUCATION,OFFICE,TABLE,3,1994,12662 -570,391,GERMANY,EAST,EDUCATION,OFFICE,TABLE,4,1994,12692 -97,313,GERMANY,EAST,EDUCATION,OFFICE,TABLE,4,1994,12723 -674,758,GERMANY,EAST,EDUCATION,OFFICE,TABLE,4,1994,12753 -468,304,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,1,1993,12054 -430,846,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,1,1993,12085 -893,912,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,1,1993,12113 -519,810,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,2,1993,12144 -267,122,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,2,1993,12174 -908,102,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,2,1993,12205 -176,161,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,3,1993,12235 -673,450,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,3,1993,12266 -798,215,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,3,1993,12297 -291,765,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,4,1993,12327 -583,557,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,4,1993,12358 -442,739,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,4,1993,12388 -951,811,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,1,1994,12419 -430,780,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,1,1994,12450 -559,645,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,1,1994,12478 -726,365,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,2,1994,12509 -944,597,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,2,1994,12539 -497,126,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,2,1994,12570 -388,655,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,3,1994,12600 -81,604,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,3,1994,12631 -111,280,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,3,1994,12662 -288,115,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,4,1994,12692 -845,205,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,4,1994,12723 -745,672,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,4,1994,12753 -352,339,GERMANY,EAST,EDUCATION,OFFICE,DESK,1,1993,12054 -234,70,GERMANY,EAST,EDUCATION,OFFICE,DESK,1,1993,12085 -167,528,GERMANY,EAST,EDUCATION,OFFICE,DESK,1,1993,12113 -606,220,GERMANY,EAST,EDUCATION,OFFICE,DESK,2,1993,12144 -670,691,GERMANY,EAST,EDUCATION,OFFICE,DESK,2,1993,12174 -764,197,GERMANY,EAST,EDUCATION,OFFICE,DESK,2,1993,12205 -659,239,GERMANY,EAST,EDUCATION,OFFICE,DESK,3,1993,12235 -996,50,GERMANY,EAST,EDUCATION,OFFICE,DESK,3,1993,12266 -424,135,GERMANY,EAST,EDUCATION,OFFICE,DESK,3,1993,12297 -899,972,GERMANY,EAST,EDUCATION,OFFICE,DESK,4,1993,12327 -392,475,GERMANY,EAST,EDUCATION,OFFICE,DESK,4,1993,12358 -555,868,GERMANY,EAST,EDUCATION,OFFICE,DESK,4,1993,12388 -860,451,GERMANY,EAST,EDUCATION,OFFICE,DESK,1,1994,12419 -114,565,GERMANY,EAST,EDUCATION,OFFICE,DESK,1,1994,12450 -943,116,GERMANY,EAST,EDUCATION,OFFICE,DESK,1,1994,12478 -365,385,GERMANY,EAST,EDUCATION,OFFICE,DESK,2,1994,12509 -249,375,GERMANY,EAST,EDUCATION,OFFICE,DESK,2,1994,12539 -192,357,GERMANY,EAST,EDUCATION,OFFICE,DESK,2,1994,12570 -328,230,GERMANY,EAST,EDUCATION,OFFICE,DESK,3,1994,12600 -311,829,GERMANY,EAST,EDUCATION,OFFICE,DESK,3,1994,12631 -576,971,GERMANY,EAST,EDUCATION,OFFICE,DESK,3,1994,12662 -915,280,GERMANY,EAST,EDUCATION,OFFICE,DESK,4,1994,12692 -522,853,GERMANY,EAST,EDUCATION,OFFICE,DESK,4,1994,12723 -625,953,GERMANY,EAST,EDUCATION,OFFICE,DESK,4,1994,12753 -873,874,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,1,1993,12054 -498,578,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,1,1993,12085 -808,768,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,1,1993,12113 -742,178,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,2,1993,12144 -744,916,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,2,1993,12174 -30,917,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,2,1993,12205 -747,633,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,3,1993,12235 -672,107,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,3,1993,12266 -564,523,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,3,1993,12297 -785,924,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,4,1993,12327 -825,481,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,4,1993,12358 -243,240,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,4,1993,12388 -959,819,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,1,1994,12419 -123,602,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,1,1994,12450 -714,538,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,1,1994,12478 -252,632,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,2,1994,12509 -715,952,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,2,1994,12539 -670,480,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,2,1994,12570 -81,700,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,3,1994,12600 -653,726,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,3,1994,12631 -795,526,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,3,1994,12662 -182,410,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,4,1994,12692 -725,307,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,4,1994,12723 -101,73,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,4,1994,12753 -143,232,GERMANY,EAST,CONSUMER,FURNITURE,BED,1,1993,12054 -15,993,GERMANY,EAST,CONSUMER,FURNITURE,BED,1,1993,12085 -742,652,GERMANY,EAST,CONSUMER,FURNITURE,BED,1,1993,12113 -339,761,GERMANY,EAST,CONSUMER,FURNITURE,BED,2,1993,12144 -39,428,GERMANY,EAST,CONSUMER,FURNITURE,BED,2,1993,12174 -465,4,GERMANY,EAST,CONSUMER,FURNITURE,BED,2,1993,12205 -889,101,GERMANY,EAST,CONSUMER,FURNITURE,BED,3,1993,12235 -856,869,GERMANY,EAST,CONSUMER,FURNITURE,BED,3,1993,12266 -358,271,GERMANY,EAST,CONSUMER,FURNITURE,BED,3,1993,12297 -452,633,GERMANY,EAST,CONSUMER,FURNITURE,BED,4,1993,12327 -387,481,GERMANY,EAST,CONSUMER,FURNITURE,BED,4,1993,12358 -824,302,GERMANY,EAST,CONSUMER,FURNITURE,BED,4,1993,12388 -185,245,GERMANY,EAST,CONSUMER,FURNITURE,BED,1,1994,12419 -151,941,GERMANY,EAST,CONSUMER,FURNITURE,BED,1,1994,12450 -419,721,GERMANY,EAST,CONSUMER,FURNITURE,BED,1,1994,12478 -643,893,GERMANY,EAST,CONSUMER,FURNITURE,BED,2,1994,12509 -63,898,GERMANY,EAST,CONSUMER,FURNITURE,BED,2,1994,12539 -202,94,GERMANY,EAST,CONSUMER,FURNITURE,BED,2,1994,12570 -332,962,GERMANY,EAST,CONSUMER,FURNITURE,BED,3,1994,12600 -723,71,GERMANY,EAST,CONSUMER,FURNITURE,BED,3,1994,12631 -148,108,GERMANY,EAST,CONSUMER,FURNITURE,BED,3,1994,12662 -840,71,GERMANY,EAST,CONSUMER,FURNITURE,BED,4,1994,12692 -601,767,GERMANY,EAST,CONSUMER,FURNITURE,BED,4,1994,12723 -962,323,GERMANY,EAST,CONSUMER,FURNITURE,BED,4,1994,12753 -166,982,GERMANY,EAST,CONSUMER,OFFICE,TABLE,1,1993,12054 -531,614,GERMANY,EAST,CONSUMER,OFFICE,TABLE,1,1993,12085 -963,839,GERMANY,EAST,CONSUMER,OFFICE,TABLE,1,1993,12113 -994,388,GERMANY,EAST,CONSUMER,OFFICE,TABLE,2,1993,12144 -978,296,GERMANY,EAST,CONSUMER,OFFICE,TABLE,2,1993,12174 -72,429,GERMANY,EAST,CONSUMER,OFFICE,TABLE,2,1993,12205 -33,901,GERMANY,EAST,CONSUMER,OFFICE,TABLE,3,1993,12235 -428,350,GERMANY,EAST,CONSUMER,OFFICE,TABLE,3,1993,12266 -413,581,GERMANY,EAST,CONSUMER,OFFICE,TABLE,3,1993,12297 -737,583,GERMANY,EAST,CONSUMER,OFFICE,TABLE,4,1993,12327 -85,92,GERMANY,EAST,CONSUMER,OFFICE,TABLE,4,1993,12358 -916,647,GERMANY,EAST,CONSUMER,OFFICE,TABLE,4,1993,12388 -785,771,GERMANY,EAST,CONSUMER,OFFICE,TABLE,1,1994,12419 -302,26,GERMANY,EAST,CONSUMER,OFFICE,TABLE,1,1994,12450 -1000,598,GERMANY,EAST,CONSUMER,OFFICE,TABLE,1,1994,12478 -458,715,GERMANY,EAST,CONSUMER,OFFICE,TABLE,2,1994,12509 -896,74,GERMANY,EAST,CONSUMER,OFFICE,TABLE,2,1994,12539 -615,580,GERMANY,EAST,CONSUMER,OFFICE,TABLE,2,1994,12570 -174,848,GERMANY,EAST,CONSUMER,OFFICE,TABLE,3,1994,12600 -651,118,GERMANY,EAST,CONSUMER,OFFICE,TABLE,3,1994,12631 -784,54,GERMANY,EAST,CONSUMER,OFFICE,TABLE,3,1994,12662 -121,929,GERMANY,EAST,CONSUMER,OFFICE,TABLE,4,1994,12692 -341,393,GERMANY,EAST,CONSUMER,OFFICE,TABLE,4,1994,12723 -615,820,GERMANY,EAST,CONSUMER,OFFICE,TABLE,4,1994,12753 -697,336,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,1,1993,12054 -215,299,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,1,1993,12085 -197,747,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,1,1993,12113 -205,154,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,2,1993,12144 -256,486,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,2,1993,12174 -377,251,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,2,1993,12205 -577,225,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,3,1993,12235 -686,77,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,3,1993,12266 -332,74,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,3,1993,12297 -534,596,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,4,1993,12327 -485,493,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,4,1993,12358 -594,782,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,4,1993,12388 -413,487,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,1,1994,12419 -13,127,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,1,1994,12450 -483,538,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,1,1994,12478 -820,94,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,2,1994,12509 -745,252,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,2,1994,12539 -79,722,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,2,1994,12570 -36,536,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,3,1994,12600 -950,958,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,3,1994,12631 -74,466,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,3,1994,12662 -458,309,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,4,1994,12692 -609,680,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,4,1994,12723 -429,539,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,4,1994,12753 -956,511,GERMANY,EAST,CONSUMER,OFFICE,DESK,1,1993,12054 -205,505,GERMANY,EAST,CONSUMER,OFFICE,DESK,1,1993,12085 -629,720,GERMANY,EAST,CONSUMER,OFFICE,DESK,1,1993,12113 -277,823,GERMANY,EAST,CONSUMER,OFFICE,DESK,2,1993,12144 -266,21,GERMANY,EAST,CONSUMER,OFFICE,DESK,2,1993,12174 -872,142,GERMANY,EAST,CONSUMER,OFFICE,DESK,2,1993,12205 -435,95,GERMANY,EAST,CONSUMER,OFFICE,DESK,3,1993,12235 -988,398,GERMANY,EAST,CONSUMER,OFFICE,DESK,3,1993,12266 -953,328,GERMANY,EAST,CONSUMER,OFFICE,DESK,3,1993,12297 -556,151,GERMANY,EAST,CONSUMER,OFFICE,DESK,4,1993,12327 -211,978,GERMANY,EAST,CONSUMER,OFFICE,DESK,4,1993,12358 -389,918,GERMANY,EAST,CONSUMER,OFFICE,DESK,4,1993,12388 -351,542,GERMANY,EAST,CONSUMER,OFFICE,DESK,1,1994,12419 -14,96,GERMANY,EAST,CONSUMER,OFFICE,DESK,1,1994,12450 -181,496,GERMANY,EAST,CONSUMER,OFFICE,DESK,1,1994,12478 -452,77,GERMANY,EAST,CONSUMER,OFFICE,DESK,2,1994,12509 -511,236,GERMANY,EAST,CONSUMER,OFFICE,DESK,2,1994,12539 -193,913,GERMANY,EAST,CONSUMER,OFFICE,DESK,2,1994,12570 -797,49,GERMANY,EAST,CONSUMER,OFFICE,DESK,3,1994,12600 -988,967,GERMANY,EAST,CONSUMER,OFFICE,DESK,3,1994,12631 -487,502,GERMANY,EAST,CONSUMER,OFFICE,DESK,3,1994,12662 -941,790,GERMANY,EAST,CONSUMER,OFFICE,DESK,4,1994,12692 -577,121,GERMANY,EAST,CONSUMER,OFFICE,DESK,4,1994,12723 -456,55,GERMANY,EAST,CONSUMER,OFFICE,DESK,4,1994,12753 -982,739,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,1,1993,12054 -593,683,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,1,1993,12085 -702,610,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,1,1993,12113 -528,248,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,2,1993,12144 -873,530,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,2,1993,12174 -301,889,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,2,1993,12205 -769,245,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,3,1993,12235 -724,473,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,3,1993,12266 -466,938,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,3,1993,12297 -774,150,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,4,1993,12327 -111,772,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,4,1993,12358 -954,201,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,4,1993,12388 -780,945,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,1,1994,12419 -210,177,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,1,1994,12450 -93,378,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,1,1994,12478 -332,83,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,2,1994,12509 -186,803,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,2,1994,12539 -782,398,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,2,1994,12570 -41,215,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,3,1994,12600 -222,194,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,3,1994,12631 -992,287,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,3,1994,12662 -477,410,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,4,1994,12692 -948,50,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,4,1994,12723 -817,204,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,4,1994,12753 -597,239,GERMANY,WEST,EDUCATION,FURNITURE,BED,1,1993,12054 -649,637,GERMANY,WEST,EDUCATION,FURNITURE,BED,1,1993,12085 -3,938,GERMANY,WEST,EDUCATION,FURNITURE,BED,1,1993,12113 -731,788,GERMANY,WEST,EDUCATION,FURNITURE,BED,2,1993,12144 -181,399,GERMANY,WEST,EDUCATION,FURNITURE,BED,2,1993,12174 -468,576,GERMANY,WEST,EDUCATION,FURNITURE,BED,2,1993,12205 -891,187,GERMANY,WEST,EDUCATION,FURNITURE,BED,3,1993,12235 -226,703,GERMANY,WEST,EDUCATION,FURNITURE,BED,3,1993,12266 -28,455,GERMANY,WEST,EDUCATION,FURNITURE,BED,3,1993,12297 -609,244,GERMANY,WEST,EDUCATION,FURNITURE,BED,4,1993,12327 -224,868,GERMANY,WEST,EDUCATION,FURNITURE,BED,4,1993,12358 -230,353,GERMANY,WEST,EDUCATION,FURNITURE,BED,4,1993,12388 -216,101,GERMANY,WEST,EDUCATION,FURNITURE,BED,1,1994,12419 -282,924,GERMANY,WEST,EDUCATION,FURNITURE,BED,1,1994,12450 -501,144,GERMANY,WEST,EDUCATION,FURNITURE,BED,1,1994,12478 -320,0,GERMANY,WEST,EDUCATION,FURNITURE,BED,2,1994,12509 -720,910,GERMANY,WEST,EDUCATION,FURNITURE,BED,2,1994,12539 -464,259,GERMANY,WEST,EDUCATION,FURNITURE,BED,2,1994,12570 -363,107,GERMANY,WEST,EDUCATION,FURNITURE,BED,3,1994,12600 -49,63,GERMANY,WEST,EDUCATION,FURNITURE,BED,3,1994,12631 -223,270,GERMANY,WEST,EDUCATION,FURNITURE,BED,3,1994,12662 -452,554,GERMANY,WEST,EDUCATION,FURNITURE,BED,4,1994,12692 -210,154,GERMANY,WEST,EDUCATION,FURNITURE,BED,4,1994,12723 -444,205,GERMANY,WEST,EDUCATION,FURNITURE,BED,4,1994,12753 -222,441,GERMANY,WEST,EDUCATION,OFFICE,TABLE,1,1993,12054 -678,183,GERMANY,WEST,EDUCATION,OFFICE,TABLE,1,1993,12085 -25,459,GERMANY,WEST,EDUCATION,OFFICE,TABLE,1,1993,12113 -57,810,GERMANY,WEST,EDUCATION,OFFICE,TABLE,2,1993,12144 -981,268,GERMANY,WEST,EDUCATION,OFFICE,TABLE,2,1993,12174 -740,916,GERMANY,WEST,EDUCATION,OFFICE,TABLE,2,1993,12205 -408,742,GERMANY,WEST,EDUCATION,OFFICE,TABLE,3,1993,12235 -966,522,GERMANY,WEST,EDUCATION,OFFICE,TABLE,3,1993,12266 -107,299,GERMANY,WEST,EDUCATION,OFFICE,TABLE,3,1993,12297 -488,677,GERMANY,WEST,EDUCATION,OFFICE,TABLE,4,1993,12327 -759,709,GERMANY,WEST,EDUCATION,OFFICE,TABLE,4,1993,12358 -504,310,GERMANY,WEST,EDUCATION,OFFICE,TABLE,4,1993,12388 -99,160,GERMANY,WEST,EDUCATION,OFFICE,TABLE,1,1994,12419 -503,698,GERMANY,WEST,EDUCATION,OFFICE,TABLE,1,1994,12450 -724,540,GERMANY,WEST,EDUCATION,OFFICE,TABLE,1,1994,12478 -309,901,GERMANY,WEST,EDUCATION,OFFICE,TABLE,2,1994,12509 -625,34,GERMANY,WEST,EDUCATION,OFFICE,TABLE,2,1994,12539 -294,536,GERMANY,WEST,EDUCATION,OFFICE,TABLE,2,1994,12570 -890,780,GERMANY,WEST,EDUCATION,OFFICE,TABLE,3,1994,12600 -501,716,GERMANY,WEST,EDUCATION,OFFICE,TABLE,3,1994,12631 -34,532,GERMANY,WEST,EDUCATION,OFFICE,TABLE,3,1994,12662 -203,871,GERMANY,WEST,EDUCATION,OFFICE,TABLE,4,1994,12692 -140,199,GERMANY,WEST,EDUCATION,OFFICE,TABLE,4,1994,12723 -845,845,GERMANY,WEST,EDUCATION,OFFICE,TABLE,4,1994,12753 -774,591,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,1,1993,12054 -645,378,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,1,1993,12085 -986,942,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,1,1993,12113 -296,686,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,2,1993,12144 -936,720,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,2,1993,12174 -341,546,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,2,1993,12205 -32,845,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,3,1993,12235 -277,667,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,3,1993,12266 -548,627,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,3,1993,12297 -727,142,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,4,1993,12327 -812,655,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,4,1993,12358 -168,556,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,4,1993,12388 -150,459,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,1,1994,12419 -136,89,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,1,1994,12450 -695,726,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,1,1994,12478 -363,38,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,2,1994,12509 -853,60,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,2,1994,12539 -621,369,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,2,1994,12570 -764,381,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,3,1994,12600 -669,465,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,3,1994,12631 -772,981,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,3,1994,12662 -228,758,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,4,1994,12692 -261,31,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,4,1994,12723 -821,237,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,4,1994,12753 -100,285,GERMANY,WEST,EDUCATION,OFFICE,DESK,1,1993,12054 -465,94,GERMANY,WEST,EDUCATION,OFFICE,DESK,1,1993,12085 -350,561,GERMANY,WEST,EDUCATION,OFFICE,DESK,1,1993,12113 -991,143,GERMANY,WEST,EDUCATION,OFFICE,DESK,2,1993,12144 -910,95,GERMANY,WEST,EDUCATION,OFFICE,DESK,2,1993,12174 -206,341,GERMANY,WEST,EDUCATION,OFFICE,DESK,2,1993,12205 -263,388,GERMANY,WEST,EDUCATION,OFFICE,DESK,3,1993,12235 -374,272,GERMANY,WEST,EDUCATION,OFFICE,DESK,3,1993,12266 -875,890,GERMANY,WEST,EDUCATION,OFFICE,DESK,3,1993,12297 -810,734,GERMANY,WEST,EDUCATION,OFFICE,DESK,4,1993,12327 -398,364,GERMANY,WEST,EDUCATION,OFFICE,DESK,4,1993,12358 -565,619,GERMANY,WEST,EDUCATION,OFFICE,DESK,4,1993,12388 -417,517,GERMANY,WEST,EDUCATION,OFFICE,DESK,1,1994,12419 -291,781,GERMANY,WEST,EDUCATION,OFFICE,DESK,1,1994,12450 -251,327,GERMANY,WEST,EDUCATION,OFFICE,DESK,1,1994,12478 -449,48,GERMANY,WEST,EDUCATION,OFFICE,DESK,2,1994,12509 -774,809,GERMANY,WEST,EDUCATION,OFFICE,DESK,2,1994,12539 -386,73,GERMANY,WEST,EDUCATION,OFFICE,DESK,2,1994,12570 -22,936,GERMANY,WEST,EDUCATION,OFFICE,DESK,3,1994,12600 -940,400,GERMANY,WEST,EDUCATION,OFFICE,DESK,3,1994,12631 -132,736,GERMANY,WEST,EDUCATION,OFFICE,DESK,3,1994,12662 -103,211,GERMANY,WEST,EDUCATION,OFFICE,DESK,4,1994,12692 -152,271,GERMANY,WEST,EDUCATION,OFFICE,DESK,4,1994,12723 -952,855,GERMANY,WEST,EDUCATION,OFFICE,DESK,4,1994,12753 -872,923,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,1,1993,12054 -748,854,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,1,1993,12085 -749,769,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,1,1993,12113 -876,271,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,2,1993,12144 -860,383,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,2,1993,12174 -900,29,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,2,1993,12205 -705,185,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,3,1993,12235 -913,351,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,3,1993,12266 -315,560,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,3,1993,12297 -466,840,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,4,1993,12327 -233,517,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,4,1993,12358 -906,949,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,4,1993,12388 -148,633,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,1,1994,12419 -661,636,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,1,1994,12450 -847,138,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,1,1994,12478 -768,481,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,2,1994,12509 -866,408,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,2,1994,12539 -475,130,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,2,1994,12570 -112,813,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,3,1994,12600 -136,661,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,3,1994,12631 -763,311,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,3,1994,12662 -388,872,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,4,1994,12692 -996,643,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,4,1994,12723 -486,174,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,4,1994,12753 -494,528,GERMANY,WEST,CONSUMER,FURNITURE,BED,1,1993,12054 -771,124,GERMANY,WEST,CONSUMER,FURNITURE,BED,1,1993,12085 -49,126,GERMANY,WEST,CONSUMER,FURNITURE,BED,1,1993,12113 -322,440,GERMANY,WEST,CONSUMER,FURNITURE,BED,2,1993,12144 -878,881,GERMANY,WEST,CONSUMER,FURNITURE,BED,2,1993,12174 -827,292,GERMANY,WEST,CONSUMER,FURNITURE,BED,2,1993,12205 -852,873,GERMANY,WEST,CONSUMER,FURNITURE,BED,3,1993,12235 -716,357,GERMANY,WEST,CONSUMER,FURNITURE,BED,3,1993,12266 -81,247,GERMANY,WEST,CONSUMER,FURNITURE,BED,3,1993,12297 -916,18,GERMANY,WEST,CONSUMER,FURNITURE,BED,4,1993,12327 -673,395,GERMANY,WEST,CONSUMER,FURNITURE,BED,4,1993,12358 -242,620,GERMANY,WEST,CONSUMER,FURNITURE,BED,4,1993,12388 -914,946,GERMANY,WEST,CONSUMER,FURNITURE,BED,1,1994,12419 -902,72,GERMANY,WEST,CONSUMER,FURNITURE,BED,1,1994,12450 -707,691,GERMANY,WEST,CONSUMER,FURNITURE,BED,1,1994,12478 -223,95,GERMANY,WEST,CONSUMER,FURNITURE,BED,2,1994,12509 -619,878,GERMANY,WEST,CONSUMER,FURNITURE,BED,2,1994,12539 -254,757,GERMANY,WEST,CONSUMER,FURNITURE,BED,2,1994,12570 -688,898,GERMANY,WEST,CONSUMER,FURNITURE,BED,3,1994,12600 -477,172,GERMANY,WEST,CONSUMER,FURNITURE,BED,3,1994,12631 -280,419,GERMANY,WEST,CONSUMER,FURNITURE,BED,3,1994,12662 -546,849,GERMANY,WEST,CONSUMER,FURNITURE,BED,4,1994,12692 -630,807,GERMANY,WEST,CONSUMER,FURNITURE,BED,4,1994,12723 -455,599,GERMANY,WEST,CONSUMER,FURNITURE,BED,4,1994,12753 -505,59,GERMANY,WEST,CONSUMER,OFFICE,TABLE,1,1993,12054 -823,790,GERMANY,WEST,CONSUMER,OFFICE,TABLE,1,1993,12085 -891,574,GERMANY,WEST,CONSUMER,OFFICE,TABLE,1,1993,12113 -840,96,GERMANY,WEST,CONSUMER,OFFICE,TABLE,2,1993,12144 -436,376,GERMANY,WEST,CONSUMER,OFFICE,TABLE,2,1993,12174 -168,352,GERMANY,WEST,CONSUMER,OFFICE,TABLE,2,1993,12205 -177,741,GERMANY,WEST,CONSUMER,OFFICE,TABLE,3,1993,12235 -727,12,GERMANY,WEST,CONSUMER,OFFICE,TABLE,3,1993,12266 -278,157,GERMANY,WEST,CONSUMER,OFFICE,TABLE,3,1993,12297 -443,10,GERMANY,WEST,CONSUMER,OFFICE,TABLE,4,1993,12327 -905,544,GERMANY,WEST,CONSUMER,OFFICE,TABLE,4,1993,12358 -881,817,GERMANY,WEST,CONSUMER,OFFICE,TABLE,4,1993,12388 -507,754,GERMANY,WEST,CONSUMER,OFFICE,TABLE,1,1994,12419 -363,425,GERMANY,WEST,CONSUMER,OFFICE,TABLE,1,1994,12450 -603,492,GERMANY,WEST,CONSUMER,OFFICE,TABLE,1,1994,12478 -473,485,GERMANY,WEST,CONSUMER,OFFICE,TABLE,2,1994,12509 -128,369,GERMANY,WEST,CONSUMER,OFFICE,TABLE,2,1994,12539 -105,560,GERMANY,WEST,CONSUMER,OFFICE,TABLE,2,1994,12570 -325,651,GERMANY,WEST,CONSUMER,OFFICE,TABLE,3,1994,12600 -711,326,GERMANY,WEST,CONSUMER,OFFICE,TABLE,3,1994,12631 -983,180,GERMANY,WEST,CONSUMER,OFFICE,TABLE,3,1994,12662 -241,935,GERMANY,WEST,CONSUMER,OFFICE,TABLE,4,1994,12692 -71,403,GERMANY,WEST,CONSUMER,OFFICE,TABLE,4,1994,12723 -395,345,GERMANY,WEST,CONSUMER,OFFICE,TABLE,4,1994,12753 -168,278,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,1,1993,12054 -512,376,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,1,1993,12085 -291,104,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,1,1993,12113 -776,543,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,2,1993,12144 -271,798,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,2,1993,12174 -946,333,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,2,1993,12205 -195,833,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,3,1993,12235 -165,132,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,3,1993,12266 -238,629,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,3,1993,12297 -409,337,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,4,1993,12327 -720,300,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,4,1993,12358 -309,470,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,4,1993,12388 -812,875,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,1,1994,12419 -441,237,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,1,1994,12450 -500,272,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,1,1994,12478 -517,860,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,2,1994,12509 -924,415,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,2,1994,12539 -572,140,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,2,1994,12570 -768,367,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,3,1994,12600 -692,195,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,3,1994,12631 -28,245,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,3,1994,12662 -202,285,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,4,1994,12692 -76,98,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,4,1994,12723 -421,932,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,4,1994,12753 -636,898,GERMANY,WEST,CONSUMER,OFFICE,DESK,1,1993,12054 -52,330,GERMANY,WEST,CONSUMER,OFFICE,DESK,1,1993,12085 -184,603,GERMANY,WEST,CONSUMER,OFFICE,DESK,1,1993,12113 -739,280,GERMANY,WEST,CONSUMER,OFFICE,DESK,2,1993,12144 -841,507,GERMANY,WEST,CONSUMER,OFFICE,DESK,2,1993,12174 -65,202,GERMANY,WEST,CONSUMER,OFFICE,DESK,2,1993,12205 -623,513,GERMANY,WEST,CONSUMER,OFFICE,DESK,3,1993,12235 -517,132,GERMANY,WEST,CONSUMER,OFFICE,DESK,3,1993,12266 -636,21,GERMANY,WEST,CONSUMER,OFFICE,DESK,3,1993,12297 -845,657,GERMANY,WEST,CONSUMER,OFFICE,DESK,4,1993,12327 -232,195,GERMANY,WEST,CONSUMER,OFFICE,DESK,4,1993,12358 -26,323,GERMANY,WEST,CONSUMER,OFFICE,DESK,4,1993,12388 -680,299,GERMANY,WEST,CONSUMER,OFFICE,DESK,1,1994,12419 -364,811,GERMANY,WEST,CONSUMER,OFFICE,DESK,1,1994,12450 -572,739,GERMANY,WEST,CONSUMER,OFFICE,DESK,1,1994,12478 -145,889,GERMANY,WEST,CONSUMER,OFFICE,DESK,2,1994,12509 -644,189,GERMANY,WEST,CONSUMER,OFFICE,DESK,2,1994,12539 -87,698,GERMANY,WEST,CONSUMER,OFFICE,DESK,2,1994,12570 -620,646,GERMANY,WEST,CONSUMER,OFFICE,DESK,3,1994,12600 -535,562,GERMANY,WEST,CONSUMER,OFFICE,DESK,3,1994,12631 -661,753,GERMANY,WEST,CONSUMER,OFFICE,DESK,3,1994,12662 -884,425,GERMANY,WEST,CONSUMER,OFFICE,DESK,4,1994,12692 -689,693,GERMANY,WEST,CONSUMER,OFFICE,DESK,4,1994,12723 -646,941,GERMANY,WEST,CONSUMER,OFFICE,DESK,4,1994,12753 -4,975,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,1,1993,12054 -813,455,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,1,1993,12085 -773,260,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,1,1993,12113 -205,69,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,2,1993,12144 -657,147,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,2,1993,12174 -154,533,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,2,1993,12205 -747,881,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,3,1993,12235 -787,457,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,3,1993,12266 -867,441,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,3,1993,12297 -307,859,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,4,1993,12327 -571,177,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,4,1993,12358 -92,633,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,4,1993,12388 -269,382,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,1,1994,12419 -764,707,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,1,1994,12450 -662,566,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,1,1994,12478 -818,349,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,2,1994,12509 -617,128,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,2,1994,12539 -649,231,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,2,1994,12570 -895,258,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,3,1994,12600 -750,812,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,3,1994,12631 -738,362,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,3,1994,12662 -107,133,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,4,1994,12692 -278,60,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,4,1994,12723 -32,88,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,4,1994,12753 -129,378,U.S.A.,EAST,EDUCATION,FURNITURE,BED,1,1993,12054 -187,569,U.S.A.,EAST,EDUCATION,FURNITURE,BED,1,1993,12085 -670,186,U.S.A.,EAST,EDUCATION,FURNITURE,BED,1,1993,12113 -678,875,U.S.A.,EAST,EDUCATION,FURNITURE,BED,2,1993,12144 -423,636,U.S.A.,EAST,EDUCATION,FURNITURE,BED,2,1993,12174 -389,360,U.S.A.,EAST,EDUCATION,FURNITURE,BED,2,1993,12205 -257,677,U.S.A.,EAST,EDUCATION,FURNITURE,BED,3,1993,12235 -780,708,U.S.A.,EAST,EDUCATION,FURNITURE,BED,3,1993,12266 -159,158,U.S.A.,EAST,EDUCATION,FURNITURE,BED,3,1993,12297 -97,384,U.S.A.,EAST,EDUCATION,FURNITURE,BED,4,1993,12327 -479,927,U.S.A.,EAST,EDUCATION,FURNITURE,BED,4,1993,12358 -9,134,U.S.A.,EAST,EDUCATION,FURNITURE,BED,4,1993,12388 -614,273,U.S.A.,EAST,EDUCATION,FURNITURE,BED,1,1994,12419 -261,27,U.S.A.,EAST,EDUCATION,FURNITURE,BED,1,1994,12450 -115,209,U.S.A.,EAST,EDUCATION,FURNITURE,BED,1,1994,12478 -358,470,U.S.A.,EAST,EDUCATION,FURNITURE,BED,2,1994,12509 -133,219,U.S.A.,EAST,EDUCATION,FURNITURE,BED,2,1994,12539 -891,907,U.S.A.,EAST,EDUCATION,FURNITURE,BED,2,1994,12570 -702,778,U.S.A.,EAST,EDUCATION,FURNITURE,BED,3,1994,12600 -58,998,U.S.A.,EAST,EDUCATION,FURNITURE,BED,3,1994,12631 -606,194,U.S.A.,EAST,EDUCATION,FURNITURE,BED,3,1994,12662 -668,933,U.S.A.,EAST,EDUCATION,FURNITURE,BED,4,1994,12692 -813,708,U.S.A.,EAST,EDUCATION,FURNITURE,BED,4,1994,12723 -450,949,U.S.A.,EAST,EDUCATION,FURNITURE,BED,4,1994,12753 -956,579,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,1,1993,12054 -276,131,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,1,1993,12085 -889,689,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,1,1993,12113 -708,908,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,2,1993,12144 -14,524,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,2,1993,12174 -904,336,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,2,1993,12205 -272,916,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,3,1993,12235 -257,236,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,3,1993,12266 -343,965,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,3,1993,12297 -80,350,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,4,1993,12327 -530,599,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,4,1993,12358 -340,901,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,4,1993,12388 -595,935,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,1,1994,12419 -47,667,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,1,1994,12450 -279,104,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,1,1994,12478 -293,803,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,2,1994,12509 -162,64,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,2,1994,12539 -935,825,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,2,1994,12570 -689,839,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,3,1994,12600 -484,184,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,3,1994,12631 -230,348,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,3,1994,12662 -164,904,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,4,1994,12692 -401,219,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,4,1994,12723 -607,381,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,4,1994,12753 -229,524,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,1,1993,12054 -786,902,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,1,1993,12085 -92,212,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,1,1993,12113 -455,762,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,2,1993,12144 -409,182,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,2,1993,12174 -166,442,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,2,1993,12205 -277,919,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,3,1993,12235 -92,67,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,3,1993,12266 -631,741,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,3,1993,12297 -390,617,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,4,1993,12327 -403,214,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,4,1993,12358 -964,202,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,4,1993,12388 -223,788,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,1,1994,12419 -684,639,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,1,1994,12450 -645,336,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,1,1994,12478 -470,937,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,2,1994,12509 -424,399,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,2,1994,12539 -862,21,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,2,1994,12570 -736,125,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,3,1994,12600 -554,635,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,3,1994,12631 -790,229,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,3,1994,12662 -115,770,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,4,1994,12692 -853,622,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,4,1994,12723 -643,109,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,4,1994,12753 -794,975,U.S.A.,EAST,EDUCATION,OFFICE,DESK,1,1993,12054 -892,820,U.S.A.,EAST,EDUCATION,OFFICE,DESK,1,1993,12085 -728,123,U.S.A.,EAST,EDUCATION,OFFICE,DESK,1,1993,12113 -744,135,U.S.A.,EAST,EDUCATION,OFFICE,DESK,2,1993,12144 -678,535,U.S.A.,EAST,EDUCATION,OFFICE,DESK,2,1993,12174 -768,971,U.S.A.,EAST,EDUCATION,OFFICE,DESK,2,1993,12205 -234,166,U.S.A.,EAST,EDUCATION,OFFICE,DESK,3,1993,12235 -333,814,U.S.A.,EAST,EDUCATION,OFFICE,DESK,3,1993,12266 -968,557,U.S.A.,EAST,EDUCATION,OFFICE,DESK,3,1993,12297 -119,820,U.S.A.,EAST,EDUCATION,OFFICE,DESK,4,1993,12327 -469,486,U.S.A.,EAST,EDUCATION,OFFICE,DESK,4,1993,12358 -261,429,U.S.A.,EAST,EDUCATION,OFFICE,DESK,4,1993,12388 -984,65,U.S.A.,EAST,EDUCATION,OFFICE,DESK,1,1994,12419 -845,977,U.S.A.,EAST,EDUCATION,OFFICE,DESK,1,1994,12450 -374,410,U.S.A.,EAST,EDUCATION,OFFICE,DESK,1,1994,12478 -687,150,U.S.A.,EAST,EDUCATION,OFFICE,DESK,2,1994,12509 -157,630,U.S.A.,EAST,EDUCATION,OFFICE,DESK,2,1994,12539 -49,488,U.S.A.,EAST,EDUCATION,OFFICE,DESK,2,1994,12570 -817,112,U.S.A.,EAST,EDUCATION,OFFICE,DESK,3,1994,12600 -223,598,U.S.A.,EAST,EDUCATION,OFFICE,DESK,3,1994,12631 -433,705,U.S.A.,EAST,EDUCATION,OFFICE,DESK,3,1994,12662 -41,226,U.S.A.,EAST,EDUCATION,OFFICE,DESK,4,1994,12692 -396,979,U.S.A.,EAST,EDUCATION,OFFICE,DESK,4,1994,12723 -131,19,U.S.A.,EAST,EDUCATION,OFFICE,DESK,4,1994,12753 -521,204,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,1,1993,12054 -751,805,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,1,1993,12085 -45,549,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,1,1993,12113 -144,912,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,2,1993,12144 -119,427,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,2,1993,12174 -728,1,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,2,1993,12205 -120,540,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,3,1993,12235 -657,940,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,3,1993,12266 -409,644,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,3,1993,12297 -881,821,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,4,1993,12327 -113,560,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,4,1993,12358 -831,309,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,4,1993,12388 -129,1000,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,1,1994,12419 -76,945,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,1,1994,12450 -260,931,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,1,1994,12478 -882,504,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,2,1994,12509 -157,950,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,2,1994,12539 -443,278,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,2,1994,12570 -111,225,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,3,1994,12600 -497,6,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,3,1994,12631 -321,124,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,3,1994,12662 -194,206,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,4,1994,12692 -684,320,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,4,1994,12723 -634,270,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,4,1994,12753 -622,278,U.S.A.,EAST,CONSUMER,FURNITURE,BED,1,1993,12054 -689,447,U.S.A.,EAST,CONSUMER,FURNITURE,BED,1,1993,12085 -120,170,U.S.A.,EAST,CONSUMER,FURNITURE,BED,1,1993,12113 -374,87,U.S.A.,EAST,CONSUMER,FURNITURE,BED,2,1993,12144 -926,384,U.S.A.,EAST,CONSUMER,FURNITURE,BED,2,1993,12174 -687,574,U.S.A.,EAST,CONSUMER,FURNITURE,BED,2,1993,12205 -600,585,U.S.A.,EAST,CONSUMER,FURNITURE,BED,3,1993,12235 -779,947,U.S.A.,EAST,CONSUMER,FURNITURE,BED,3,1993,12266 -223,984,U.S.A.,EAST,CONSUMER,FURNITURE,BED,3,1993,12297 -628,189,U.S.A.,EAST,CONSUMER,FURNITURE,BED,4,1993,12327 -326,364,U.S.A.,EAST,CONSUMER,FURNITURE,BED,4,1993,12358 -836,49,U.S.A.,EAST,CONSUMER,FURNITURE,BED,4,1993,12388 -361,851,U.S.A.,EAST,CONSUMER,FURNITURE,BED,1,1994,12419 -444,643,U.S.A.,EAST,CONSUMER,FURNITURE,BED,1,1994,12450 -501,143,U.S.A.,EAST,CONSUMER,FURNITURE,BED,1,1994,12478 -743,763,U.S.A.,EAST,CONSUMER,FURNITURE,BED,2,1994,12509 -861,987,U.S.A.,EAST,CONSUMER,FURNITURE,BED,2,1994,12539 -203,264,U.S.A.,EAST,CONSUMER,FURNITURE,BED,2,1994,12570 -762,439,U.S.A.,EAST,CONSUMER,FURNITURE,BED,3,1994,12600 -705,750,U.S.A.,EAST,CONSUMER,FURNITURE,BED,3,1994,12631 -153,37,U.S.A.,EAST,CONSUMER,FURNITURE,BED,3,1994,12662 -436,95,U.S.A.,EAST,CONSUMER,FURNITURE,BED,4,1994,12692 -428,79,U.S.A.,EAST,CONSUMER,FURNITURE,BED,4,1994,12723 -804,832,U.S.A.,EAST,CONSUMER,FURNITURE,BED,4,1994,12753 -805,649,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,1,1993,12054 -860,838,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,1,1993,12085 -104,439,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,1,1993,12113 -434,207,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,2,1993,12144 -912,804,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,2,1993,12174 -571,875,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,2,1993,12205 -267,473,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,3,1993,12235 -415,845,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,3,1993,12266 -261,91,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,3,1993,12297 -746,630,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,4,1993,12327 -30,185,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,4,1993,12358 -662,317,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,4,1993,12388 -916,88,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,1,1994,12419 -415,607,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,1,1994,12450 -514,35,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,1,1994,12478 -756,680,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,2,1994,12509 -461,78,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,2,1994,12539 -460,117,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,2,1994,12570 -305,440,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,3,1994,12600 -198,652,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,3,1994,12631 -234,249,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,3,1994,12662 -638,658,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,4,1994,12692 -88,563,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,4,1994,12723 -751,737,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,4,1994,12753 -816,789,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,1,1993,12054 -437,988,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,1,1993,12085 -715,220,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,1,1993,12113 -780,946,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,2,1993,12144 -245,986,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,2,1993,12174 -201,129,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,2,1993,12205 -815,433,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,3,1993,12235 -865,492,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,3,1993,12266 -634,306,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,3,1993,12297 -901,154,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,4,1993,12327 -789,206,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,4,1993,12358 -882,81,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,4,1993,12388 -953,882,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,1,1994,12419 -862,848,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,1,1994,12450 -628,664,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,1,1994,12478 -765,389,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,2,1994,12509 -741,182,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,2,1994,12539 -61,505,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,2,1994,12570 -470,861,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,3,1994,12600 -869,263,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,3,1994,12631 -650,400,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,3,1994,12662 -750,556,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,4,1994,12692 -602,497,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,4,1994,12723 -54,181,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,4,1994,12753 -384,619,U.S.A.,EAST,CONSUMER,OFFICE,DESK,1,1993,12054 -161,332,U.S.A.,EAST,CONSUMER,OFFICE,DESK,1,1993,12085 -977,669,U.S.A.,EAST,CONSUMER,OFFICE,DESK,1,1993,12113 -615,487,U.S.A.,EAST,CONSUMER,OFFICE,DESK,2,1993,12144 -783,994,U.S.A.,EAST,CONSUMER,OFFICE,DESK,2,1993,12174 -977,331,U.S.A.,EAST,CONSUMER,OFFICE,DESK,2,1993,12205 -375,739,U.S.A.,EAST,CONSUMER,OFFICE,DESK,3,1993,12235 -298,665,U.S.A.,EAST,CONSUMER,OFFICE,DESK,3,1993,12266 -104,921,U.S.A.,EAST,CONSUMER,OFFICE,DESK,3,1993,12297 -713,862,U.S.A.,EAST,CONSUMER,OFFICE,DESK,4,1993,12327 -556,662,U.S.A.,EAST,CONSUMER,OFFICE,DESK,4,1993,12358 -323,517,U.S.A.,EAST,CONSUMER,OFFICE,DESK,4,1993,12388 -391,352,U.S.A.,EAST,CONSUMER,OFFICE,DESK,1,1994,12419 -593,166,U.S.A.,EAST,CONSUMER,OFFICE,DESK,1,1994,12450 -906,859,U.S.A.,EAST,CONSUMER,OFFICE,DESK,1,1994,12478 -130,571,U.S.A.,EAST,CONSUMER,OFFICE,DESK,2,1994,12509 -613,976,U.S.A.,EAST,CONSUMER,OFFICE,DESK,2,1994,12539 -58,466,U.S.A.,EAST,CONSUMER,OFFICE,DESK,2,1994,12570 -314,79,U.S.A.,EAST,CONSUMER,OFFICE,DESK,3,1994,12600 -67,864,U.S.A.,EAST,CONSUMER,OFFICE,DESK,3,1994,12631 -654,623,U.S.A.,EAST,CONSUMER,OFFICE,DESK,3,1994,12662 -312,170,U.S.A.,EAST,CONSUMER,OFFICE,DESK,4,1994,12692 -349,662,U.S.A.,EAST,CONSUMER,OFFICE,DESK,4,1994,12723 -415,763,U.S.A.,EAST,CONSUMER,OFFICE,DESK,4,1994,12753 -404,896,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,1,1993,12054 -22,973,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,1,1993,12085 -744,161,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,1,1993,12113 -804,934,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,2,1993,12144 -101,697,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,2,1993,12174 -293,116,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,2,1993,12205 -266,84,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,3,1993,12235 -372,604,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,3,1993,12266 -38,371,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,3,1993,12297 -385,783,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,4,1993,12327 -262,335,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,4,1993,12358 -961,321,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,4,1993,12388 -831,177,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,1,1994,12419 -579,371,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,1,1994,12450 -301,583,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,1,1994,12478 -693,364,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,2,1994,12509 -895,343,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,2,1994,12539 -320,854,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,2,1994,12570 -284,691,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,3,1994,12600 -362,387,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,3,1994,12631 -132,298,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,3,1994,12662 -42,635,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,4,1994,12692 -118,81,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,4,1994,12723 -42,375,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,4,1994,12753 -18,846,U.S.A.,WEST,EDUCATION,FURNITURE,BED,1,1993,12054 -512,933,U.S.A.,WEST,EDUCATION,FURNITURE,BED,1,1993,12085 -337,237,U.S.A.,WEST,EDUCATION,FURNITURE,BED,1,1993,12113 -167,964,U.S.A.,WEST,EDUCATION,FURNITURE,BED,2,1993,12144 -749,382,U.S.A.,WEST,EDUCATION,FURNITURE,BED,2,1993,12174 -890,610,U.S.A.,WEST,EDUCATION,FURNITURE,BED,2,1993,12205 -910,148,U.S.A.,WEST,EDUCATION,FURNITURE,BED,3,1993,12235 -403,837,U.S.A.,WEST,EDUCATION,FURNITURE,BED,3,1993,12266 -403,85,U.S.A.,WEST,EDUCATION,FURNITURE,BED,3,1993,12297 -661,425,U.S.A.,WEST,EDUCATION,FURNITURE,BED,4,1993,12327 -485,633,U.S.A.,WEST,EDUCATION,FURNITURE,BED,4,1993,12358 -789,515,U.S.A.,WEST,EDUCATION,FURNITURE,BED,4,1993,12388 -415,512,U.S.A.,WEST,EDUCATION,FURNITURE,BED,1,1994,12419 -418,156,U.S.A.,WEST,EDUCATION,FURNITURE,BED,1,1994,12450 -163,464,U.S.A.,WEST,EDUCATION,FURNITURE,BED,1,1994,12478 -298,813,U.S.A.,WEST,EDUCATION,FURNITURE,BED,2,1994,12509 -584,455,U.S.A.,WEST,EDUCATION,FURNITURE,BED,2,1994,12539 -797,366,U.S.A.,WEST,EDUCATION,FURNITURE,BED,2,1994,12570 -767,734,U.S.A.,WEST,EDUCATION,FURNITURE,BED,3,1994,12600 -984,451,U.S.A.,WEST,EDUCATION,FURNITURE,BED,3,1994,12631 -388,134,U.S.A.,WEST,EDUCATION,FURNITURE,BED,3,1994,12662 -924,547,U.S.A.,WEST,EDUCATION,FURNITURE,BED,4,1994,12692 -566,802,U.S.A.,WEST,EDUCATION,FURNITURE,BED,4,1994,12723 -390,61,U.S.A.,WEST,EDUCATION,FURNITURE,BED,4,1994,12753 -608,556,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,1,1993,12054 -840,202,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,1,1993,12085 -112,964,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,1,1993,12113 -288,112,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,2,1993,12144 -408,445,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,2,1993,12174 -876,884,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,2,1993,12205 -224,348,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,3,1993,12235 -133,564,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,3,1993,12266 -662,568,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,3,1993,12297 -68,882,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,4,1993,12327 -626,542,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,4,1993,12358 -678,119,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,4,1993,12388 -361,248,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,1,1994,12419 -464,868,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,1,1994,12450 -681,841,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,1,1994,12478 -377,484,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,2,1994,12509 -222,986,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,2,1994,12539 -972,39,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,2,1994,12570 -56,930,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,3,1994,12600 -695,252,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,3,1994,12631 -908,794,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,3,1994,12662 -328,658,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,4,1994,12692 -891,139,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,4,1994,12723 -265,331,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,4,1994,12753 -251,261,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,1,1993,12054 -783,122,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,1,1993,12085 -425,296,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,1,1993,12113 -859,391,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,2,1993,12144 -314,75,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,2,1993,12174 -153,731,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,2,1993,12205 -955,883,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,3,1993,12235 -654,707,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,3,1993,12266 -693,97,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,3,1993,12297 -757,390,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,4,1993,12327 -221,237,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,4,1993,12358 -942,496,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,4,1993,12388 -31,814,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,1,1994,12419 -540,765,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,1,1994,12450 -352,308,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,1,1994,12478 -904,327,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,2,1994,12509 -436,266,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,2,1994,12539 -281,699,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,2,1994,12570 -801,599,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,3,1994,12600 -273,950,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,3,1994,12631 -716,117,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,3,1994,12662 -902,632,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,4,1994,12692 -341,35,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,4,1994,12723 -155,562,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,4,1994,12753 -796,144,U.S.A.,WEST,EDUCATION,OFFICE,DESK,1,1993,12054 -257,142,U.S.A.,WEST,EDUCATION,OFFICE,DESK,1,1993,12085 -611,273,U.S.A.,WEST,EDUCATION,OFFICE,DESK,1,1993,12113 -6,915,U.S.A.,WEST,EDUCATION,OFFICE,DESK,2,1993,12144 -125,920,U.S.A.,WEST,EDUCATION,OFFICE,DESK,2,1993,12174 -745,294,U.S.A.,WEST,EDUCATION,OFFICE,DESK,2,1993,12205 -437,681,U.S.A.,WEST,EDUCATION,OFFICE,DESK,3,1993,12235 -906,86,U.S.A.,WEST,EDUCATION,OFFICE,DESK,3,1993,12266 -844,764,U.S.A.,WEST,EDUCATION,OFFICE,DESK,3,1993,12297 -413,269,U.S.A.,WEST,EDUCATION,OFFICE,DESK,4,1993,12327 -869,138,U.S.A.,WEST,EDUCATION,OFFICE,DESK,4,1993,12358 -403,834,U.S.A.,WEST,EDUCATION,OFFICE,DESK,4,1993,12388 -137,112,U.S.A.,WEST,EDUCATION,OFFICE,DESK,1,1994,12419 -922,921,U.S.A.,WEST,EDUCATION,OFFICE,DESK,1,1994,12450 -202,859,U.S.A.,WEST,EDUCATION,OFFICE,DESK,1,1994,12478 -955,442,U.S.A.,WEST,EDUCATION,OFFICE,DESK,2,1994,12509 -781,593,U.S.A.,WEST,EDUCATION,OFFICE,DESK,2,1994,12539 -12,346,U.S.A.,WEST,EDUCATION,OFFICE,DESK,2,1994,12570 -931,312,U.S.A.,WEST,EDUCATION,OFFICE,DESK,3,1994,12600 -95,690,U.S.A.,WEST,EDUCATION,OFFICE,DESK,3,1994,12631 -795,344,U.S.A.,WEST,EDUCATION,OFFICE,DESK,3,1994,12662 -542,784,U.S.A.,WEST,EDUCATION,OFFICE,DESK,4,1994,12692 -935,639,U.S.A.,WEST,EDUCATION,OFFICE,DESK,4,1994,12723 -269,726,U.S.A.,WEST,EDUCATION,OFFICE,DESK,4,1994,12753 -197,596,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,1,1993,12054 -828,263,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,1,1993,12085 -461,194,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,1,1993,12113 -35,895,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,2,1993,12144 -88,502,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,2,1993,12174 -832,342,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,2,1993,12205 -900,421,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,3,1993,12235 -368,901,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,3,1993,12266 -201,474,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,3,1993,12297 -758,571,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,4,1993,12327 -504,511,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,4,1993,12358 -864,379,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,4,1993,12388 -574,68,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,1,1994,12419 -61,210,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,1,1994,12450 -565,478,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,1,1994,12478 -475,296,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,2,1994,12509 -44,664,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,2,1994,12539 -145,880,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,2,1994,12570 -813,607,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,3,1994,12600 -703,97,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,3,1994,12631 -757,908,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,3,1994,12662 -96,152,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,4,1994,12692 -860,622,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,4,1994,12723 -750,309,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,4,1994,12753 -585,912,U.S.A.,WEST,CONSUMER,FURNITURE,BED,1,1993,12054 -127,429,U.S.A.,WEST,CONSUMER,FURNITURE,BED,1,1993,12085 -669,580,U.S.A.,WEST,CONSUMER,FURNITURE,BED,1,1993,12113 -708,179,U.S.A.,WEST,CONSUMER,FURNITURE,BED,2,1993,12144 -830,119,U.S.A.,WEST,CONSUMER,FURNITURE,BED,2,1993,12174 -550,369,U.S.A.,WEST,CONSUMER,FURNITURE,BED,2,1993,12205 -762,882,U.S.A.,WEST,CONSUMER,FURNITURE,BED,3,1993,12235 -468,727,U.S.A.,WEST,CONSUMER,FURNITURE,BED,3,1993,12266 -151,823,U.S.A.,WEST,CONSUMER,FURNITURE,BED,3,1993,12297 -103,783,U.S.A.,WEST,CONSUMER,FURNITURE,BED,4,1993,12327 -876,884,U.S.A.,WEST,CONSUMER,FURNITURE,BED,4,1993,12358 -881,891,U.S.A.,WEST,CONSUMER,FURNITURE,BED,4,1993,12388 -116,909,U.S.A.,WEST,CONSUMER,FURNITURE,BED,1,1994,12419 -677,765,U.S.A.,WEST,CONSUMER,FURNITURE,BED,1,1994,12450 -477,180,U.S.A.,WEST,CONSUMER,FURNITURE,BED,1,1994,12478 -154,712,U.S.A.,WEST,CONSUMER,FURNITURE,BED,2,1994,12509 -331,175,U.S.A.,WEST,CONSUMER,FURNITURE,BED,2,1994,12539 -784,869,U.S.A.,WEST,CONSUMER,FURNITURE,BED,2,1994,12570 -563,820,U.S.A.,WEST,CONSUMER,FURNITURE,BED,3,1994,12600 -229,554,U.S.A.,WEST,CONSUMER,FURNITURE,BED,3,1994,12631 -451,126,U.S.A.,WEST,CONSUMER,FURNITURE,BED,3,1994,12662 -974,760,U.S.A.,WEST,CONSUMER,FURNITURE,BED,4,1994,12692 -484,446,U.S.A.,WEST,CONSUMER,FURNITURE,BED,4,1994,12723 -69,254,U.S.A.,WEST,CONSUMER,FURNITURE,BED,4,1994,12753 -755,516,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,1,1993,12054 -331,779,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,1,1993,12085 -482,987,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,1,1993,12113 -632,318,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,2,1993,12144 -750,427,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,2,1993,12174 -618,86,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,2,1993,12205 -935,553,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,3,1993,12235 -716,315,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,3,1993,12266 -205,328,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,3,1993,12297 -215,521,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,4,1993,12327 -871,156,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,4,1993,12358 -552,841,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,4,1993,12388 -619,623,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,1,1994,12419 -701,849,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,1,1994,12450 -104,438,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,1,1994,12478 -114,719,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,2,1994,12509 -854,906,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,2,1994,12539 -563,267,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,2,1994,12570 -73,542,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,3,1994,12600 -427,552,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,3,1994,12631 -348,428,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,3,1994,12662 -148,158,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,4,1994,12692 -895,379,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,4,1994,12723 -394,142,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,4,1994,12753 -792,588,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,1,1993,12054 -175,506,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,1,1993,12085 -208,382,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,1,1993,12113 -354,132,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,2,1993,12144 -163,652,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,2,1993,12174 -336,723,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,2,1993,12205 -804,682,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,3,1993,12235 -863,382,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,3,1993,12266 -326,125,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,3,1993,12297 -568,321,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,4,1993,12327 -691,922,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,4,1993,12358 -152,884,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,4,1993,12388 -565,38,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,1,1994,12419 -38,194,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,1,1994,12450 -185,996,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,1,1994,12478 -318,532,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,2,1994,12509 -960,391,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,2,1994,12539 -122,104,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,2,1994,12570 -400,22,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,3,1994,12600 -301,650,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,3,1994,12631 -909,143,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,3,1994,12662 -433,999,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,4,1994,12692 -508,415,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,4,1994,12723 -648,350,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,4,1994,12753 -793,342,U.S.A.,WEST,CONSUMER,OFFICE,DESK,1,1993,12054 -129,215,U.S.A.,WEST,CONSUMER,OFFICE,DESK,1,1993,12085 -481,52,U.S.A.,WEST,CONSUMER,OFFICE,DESK,1,1993,12113 -406,292,U.S.A.,WEST,CONSUMER,OFFICE,DESK,2,1993,12144 -512,862,U.S.A.,WEST,CONSUMER,OFFICE,DESK,2,1993,12174 -668,309,U.S.A.,WEST,CONSUMER,OFFICE,DESK,2,1993,12205 -551,886,U.S.A.,WEST,CONSUMER,OFFICE,DESK,3,1993,12235 -124,172,U.S.A.,WEST,CONSUMER,OFFICE,DESK,3,1993,12266 -655,912,U.S.A.,WEST,CONSUMER,OFFICE,DESK,3,1993,12297 -523,666,U.S.A.,WEST,CONSUMER,OFFICE,DESK,4,1993,12327 -739,656,U.S.A.,WEST,CONSUMER,OFFICE,DESK,4,1993,12358 -87,145,U.S.A.,WEST,CONSUMER,OFFICE,DESK,4,1993,12388 -890,664,U.S.A.,WEST,CONSUMER,OFFICE,DESK,1,1994,12419 -665,639,U.S.A.,WEST,CONSUMER,OFFICE,DESK,1,1994,12450 -329,707,U.S.A.,WEST,CONSUMER,OFFICE,DESK,1,1994,12478 -417,891,U.S.A.,WEST,CONSUMER,OFFICE,DESK,2,1994,12509 -828,466,U.S.A.,WEST,CONSUMER,OFFICE,DESK,2,1994,12539 -298,451,U.S.A.,WEST,CONSUMER,OFFICE,DESK,2,1994,12570 -356,451,U.S.A.,WEST,CONSUMER,OFFICE,DESK,3,1994,12600 -909,874,U.S.A.,WEST,CONSUMER,OFFICE,DESK,3,1994,12631 -251,805,U.S.A.,WEST,CONSUMER,OFFICE,DESK,3,1994,12662 -526,426,U.S.A.,WEST,CONSUMER,OFFICE,DESK,4,1994,12692 -652,932,U.S.A.,WEST,CONSUMER,OFFICE,DESK,4,1994,12723 -573,581,U.S.A.,WEST,CONSUMER,OFFICE,DESK,4,1994,12753 +925,850,CANADA,EAST,EDUCATION,FURNITURE,SOFA,1,1993,1993-01-01 +999,297,CANADA,EAST,EDUCATION,FURNITURE,SOFA,1,1993,1993-02-01 +608,846,CANADA,EAST,EDUCATION,FURNITURE,SOFA,1,1993,1993-03-01 +642,533,CANADA,EAST,EDUCATION,FURNITURE,SOFA,2,1993,1993-04-01 +656,646,CANADA,EAST,EDUCATION,FURNITURE,SOFA,2,1993,1993-05-01 +948,486,CANADA,EAST,EDUCATION,FURNITURE,SOFA,2,1993,1993-06-01 +612,717,CANADA,EAST,EDUCATION,FURNITURE,SOFA,3,1993,1993-07-01 +114,564,CANADA,EAST,EDUCATION,FURNITURE,SOFA,3,1993,1993-08-01 +685,230,CANADA,EAST,EDUCATION,FURNITURE,SOFA,3,1993,1993-09-01 +657,494,CANADA,EAST,EDUCATION,FURNITURE,SOFA,4,1993,1993-10-01 +608,903,CANADA,EAST,EDUCATION,FURNITURE,SOFA,4,1993,1993-11-01 +353,266,CANADA,EAST,EDUCATION,FURNITURE,SOFA,4,1993,1993-12-01 +107,190,CANADA,EAST,EDUCATION,FURNITURE,SOFA,1,1994,1994-01-01 +354,139,CANADA,EAST,EDUCATION,FURNITURE,SOFA,1,1994,1994-02-01 +101,217,CANADA,EAST,EDUCATION,FURNITURE,SOFA,1,1994,1994-03-01 +553,560,CANADA,EAST,EDUCATION,FURNITURE,SOFA,2,1994,1994-04-01 +877,148,CANADA,EAST,EDUCATION,FURNITURE,SOFA,2,1994,1994-05-01 +431,762,CANADA,EAST,EDUCATION,FURNITURE,SOFA,2,1994,1994-06-01 +511,457,CANADA,EAST,EDUCATION,FURNITURE,SOFA,3,1994,1994-07-01 +157,532,CANADA,EAST,EDUCATION,FURNITURE,SOFA,3,1994,1994-08-01 +520,629,CANADA,EAST,EDUCATION,FURNITURE,SOFA,3,1994,1994-09-01 +114,491,CANADA,EAST,EDUCATION,FURNITURE,SOFA,4,1994,1994-10-01 +277,0,CANADA,EAST,EDUCATION,FURNITURE,SOFA,4,1994,1994-11-01 +561,979,CANADA,EAST,EDUCATION,FURNITURE,SOFA,4,1994,1994-12-01 +220,585,CANADA,EAST,EDUCATION,FURNITURE,BED,1,1993,1993-01-01 +444,267,CANADA,EAST,EDUCATION,FURNITURE,BED,1,1993,1993-02-01 +178,487,CANADA,EAST,EDUCATION,FURNITURE,BED,1,1993,1993-03-01 +756,764,CANADA,EAST,EDUCATION,FURNITURE,BED,2,1993,1993-04-01 +329,312,CANADA,EAST,EDUCATION,FURNITURE,BED,2,1993,1993-05-01 +910,531,CANADA,EAST,EDUCATION,FURNITURE,BED,2,1993,1993-06-01 +530,536,CANADA,EAST,EDUCATION,FURNITURE,BED,3,1993,1993-07-01 +101,773,CANADA,EAST,EDUCATION,FURNITURE,BED,3,1993,1993-08-01 +515,143,CANADA,EAST,EDUCATION,FURNITURE,BED,3,1993,1993-09-01 +730,126,CANADA,EAST,EDUCATION,FURNITURE,BED,4,1993,1993-10-01 +993,862,CANADA,EAST,EDUCATION,FURNITURE,BED,4,1993,1993-11-01 +954,754,CANADA,EAST,EDUCATION,FURNITURE,BED,4,1993,1993-12-01 +267,410,CANADA,EAST,EDUCATION,FURNITURE,BED,1,1994,1994-01-01 +347,701,CANADA,EAST,EDUCATION,FURNITURE,BED,1,1994,1994-02-01 +991,204,CANADA,EAST,EDUCATION,FURNITURE,BED,1,1994,1994-03-01 +923,509,CANADA,EAST,EDUCATION,FURNITURE,BED,2,1994,1994-04-01 +437,378,CANADA,EAST,EDUCATION,FURNITURE,BED,2,1994,1994-05-01 +737,507,CANADA,EAST,EDUCATION,FURNITURE,BED,2,1994,1994-06-01 +104,49,CANADA,EAST,EDUCATION,FURNITURE,BED,3,1994,1994-07-01 +840,876,CANADA,EAST,EDUCATION,FURNITURE,BED,3,1994,1994-08-01 +704,66,CANADA,EAST,EDUCATION,FURNITURE,BED,3,1994,1994-09-01 +889,819,CANADA,EAST,EDUCATION,FURNITURE,BED,4,1994,1994-10-01 +107,351,CANADA,EAST,EDUCATION,FURNITURE,BED,4,1994,1994-11-01 +571,201,CANADA,EAST,EDUCATION,FURNITURE,BED,4,1994,1994-12-01 +688,209,CANADA,EAST,EDUCATION,OFFICE,TABLE,1,1993,1993-01-01 +544,51,CANADA,EAST,EDUCATION,OFFICE,TABLE,1,1993,1993-02-01 +954,135,CANADA,EAST,EDUCATION,OFFICE,TABLE,1,1993,1993-03-01 +445,47,CANADA,EAST,EDUCATION,OFFICE,TABLE,2,1993,1993-04-01 +829,379,CANADA,EAST,EDUCATION,OFFICE,TABLE,2,1993,1993-05-01 +464,758,CANADA,EAST,EDUCATION,OFFICE,TABLE,2,1993,1993-06-01 +968,475,CANADA,EAST,EDUCATION,OFFICE,TABLE,3,1993,1993-07-01 +842,343,CANADA,EAST,EDUCATION,OFFICE,TABLE,3,1993,1993-08-01 +721,507,CANADA,EAST,EDUCATION,OFFICE,TABLE,3,1993,1993-09-01 +966,269,CANADA,EAST,EDUCATION,OFFICE,TABLE,4,1993,1993-10-01 +332,699,CANADA,EAST,EDUCATION,OFFICE,TABLE,4,1993,1993-11-01 +328,824,CANADA,EAST,EDUCATION,OFFICE,TABLE,4,1993,1993-12-01 +355,497,CANADA,EAST,EDUCATION,OFFICE,TABLE,1,1994,1994-01-01 +506,44,CANADA,EAST,EDUCATION,OFFICE,TABLE,1,1994,1994-02-01 +585,522,CANADA,EAST,EDUCATION,OFFICE,TABLE,1,1994,1994-03-01 +634,378,CANADA,EAST,EDUCATION,OFFICE,TABLE,2,1994,1994-04-01 +662,689,CANADA,EAST,EDUCATION,OFFICE,TABLE,2,1994,1994-05-01 +783,90,CANADA,EAST,EDUCATION,OFFICE,TABLE,2,1994,1994-06-01 +786,720,CANADA,EAST,EDUCATION,OFFICE,TABLE,3,1994,1994-07-01 +710,343,CANADA,EAST,EDUCATION,OFFICE,TABLE,3,1994,1994-08-01 +950,457,CANADA,EAST,EDUCATION,OFFICE,TABLE,3,1994,1994-09-01 +274,947,CANADA,EAST,EDUCATION,OFFICE,TABLE,4,1994,1994-10-01 +406,834,CANADA,EAST,EDUCATION,OFFICE,TABLE,4,1994,1994-11-01 +515,71,CANADA,EAST,EDUCATION,OFFICE,TABLE,4,1994,1994-12-01 +35,282,CANADA,EAST,EDUCATION,OFFICE,CHAIR,1,1993,1993-01-01 +995,538,CANADA,EAST,EDUCATION,OFFICE,CHAIR,1,1993,1993-02-01 +670,679,CANADA,EAST,EDUCATION,OFFICE,CHAIR,1,1993,1993-03-01 +406,601,CANADA,EAST,EDUCATION,OFFICE,CHAIR,2,1993,1993-04-01 +825,577,CANADA,EAST,EDUCATION,OFFICE,CHAIR,2,1993,1993-05-01 +467,908,CANADA,EAST,EDUCATION,OFFICE,CHAIR,2,1993,1993-06-01 +709,819,CANADA,EAST,EDUCATION,OFFICE,CHAIR,3,1993,1993-07-01 +522,687,CANADA,EAST,EDUCATION,OFFICE,CHAIR,3,1993,1993-08-01 +688,157,CANADA,EAST,EDUCATION,OFFICE,CHAIR,3,1993,1993-09-01 +956,111,CANADA,EAST,EDUCATION,OFFICE,CHAIR,4,1993,1993-10-01 +129,31,CANADA,EAST,EDUCATION,OFFICE,CHAIR,4,1993,1993-11-01 +687,790,CANADA,EAST,EDUCATION,OFFICE,CHAIR,4,1993,1993-12-01 +877,795,CANADA,EAST,EDUCATION,OFFICE,CHAIR,1,1994,1994-01-01 +845,379,CANADA,EAST,EDUCATION,OFFICE,CHAIR,1,1994,1994-02-01 +425,114,CANADA,EAST,EDUCATION,OFFICE,CHAIR,1,1994,1994-03-01 +899,475,CANADA,EAST,EDUCATION,OFFICE,CHAIR,2,1994,1994-04-01 +987,747,CANADA,EAST,EDUCATION,OFFICE,CHAIR,2,1994,1994-05-01 +641,372,CANADA,EAST,EDUCATION,OFFICE,CHAIR,2,1994,1994-06-01 +448,415,CANADA,EAST,EDUCATION,OFFICE,CHAIR,3,1994,1994-07-01 +341,955,CANADA,EAST,EDUCATION,OFFICE,CHAIR,3,1994,1994-08-01 +137,356,CANADA,EAST,EDUCATION,OFFICE,CHAIR,3,1994,1994-09-01 +235,316,CANADA,EAST,EDUCATION,OFFICE,CHAIR,4,1994,1994-10-01 +482,351,CANADA,EAST,EDUCATION,OFFICE,CHAIR,4,1994,1994-11-01 +678,164,CANADA,EAST,EDUCATION,OFFICE,CHAIR,4,1994,1994-12-01 +240,386,CANADA,EAST,EDUCATION,OFFICE,DESK,1,1993,1993-01-01 +605,113,CANADA,EAST,EDUCATION,OFFICE,DESK,1,1993,1993-02-01 +274,68,CANADA,EAST,EDUCATION,OFFICE,DESK,1,1993,1993-03-01 +422,885,CANADA,EAST,EDUCATION,OFFICE,DESK,2,1993,1993-04-01 +763,575,CANADA,EAST,EDUCATION,OFFICE,DESK,2,1993,1993-05-01 +561,743,CANADA,EAST,EDUCATION,OFFICE,DESK,2,1993,1993-06-01 +339,816,CANADA,EAST,EDUCATION,OFFICE,DESK,3,1993,1993-07-01 +877,203,CANADA,EAST,EDUCATION,OFFICE,DESK,3,1993,1993-08-01 +192,581,CANADA,EAST,EDUCATION,OFFICE,DESK,3,1993,1993-09-01 +604,815,CANADA,EAST,EDUCATION,OFFICE,DESK,4,1993,1993-10-01 +55,333,CANADA,EAST,EDUCATION,OFFICE,DESK,4,1993,1993-11-01 +87,40,CANADA,EAST,EDUCATION,OFFICE,DESK,4,1993,1993-12-01 +942,672,CANADA,EAST,EDUCATION,OFFICE,DESK,1,1994,1994-01-01 +912,23,CANADA,EAST,EDUCATION,OFFICE,DESK,1,1994,1994-02-01 +768,948,CANADA,EAST,EDUCATION,OFFICE,DESK,1,1994,1994-03-01 +951,291,CANADA,EAST,EDUCATION,OFFICE,DESK,2,1994,1994-04-01 +768,839,CANADA,EAST,EDUCATION,OFFICE,DESK,2,1994,1994-05-01 +978,864,CANADA,EAST,EDUCATION,OFFICE,DESK,2,1994,1994-06-01 +20,337,CANADA,EAST,EDUCATION,OFFICE,DESK,3,1994,1994-07-01 +298,95,CANADA,EAST,EDUCATION,OFFICE,DESK,3,1994,1994-08-01 +193,535,CANADA,EAST,EDUCATION,OFFICE,DESK,3,1994,1994-09-01 +336,191,CANADA,EAST,EDUCATION,OFFICE,DESK,4,1994,1994-10-01 +617,412,CANADA,EAST,EDUCATION,OFFICE,DESK,4,1994,1994-11-01 +709,711,CANADA,EAST,EDUCATION,OFFICE,DESK,4,1994,1994-12-01 +5,425,CANADA,EAST,CONSUMER,FURNITURE,SOFA,1,1993,1993-01-01 +164,215,CANADA,EAST,CONSUMER,FURNITURE,SOFA,1,1993,1993-02-01 +422,948,CANADA,EAST,CONSUMER,FURNITURE,SOFA,1,1993,1993-03-01 +424,544,CANADA,EAST,CONSUMER,FURNITURE,SOFA,2,1993,1993-04-01 +854,764,CANADA,EAST,CONSUMER,FURNITURE,SOFA,2,1993,1993-05-01 +168,446,CANADA,EAST,CONSUMER,FURNITURE,SOFA,2,1993,1993-06-01 +8,957,CANADA,EAST,CONSUMER,FURNITURE,SOFA,3,1993,1993-07-01 +748,967,CANADA,EAST,CONSUMER,FURNITURE,SOFA,3,1993,1993-08-01 +682,11,CANADA,EAST,CONSUMER,FURNITURE,SOFA,3,1993,1993-09-01 +300,110,CANADA,EAST,CONSUMER,FURNITURE,SOFA,4,1993,1993-10-01 +672,263,CANADA,EAST,CONSUMER,FURNITURE,SOFA,4,1993,1993-11-01 +894,215,CANADA,EAST,CONSUMER,FURNITURE,SOFA,4,1993,1993-12-01 +944,965,CANADA,EAST,CONSUMER,FURNITURE,SOFA,1,1994,1994-01-01 +403,423,CANADA,EAST,CONSUMER,FURNITURE,SOFA,1,1994,1994-02-01 +596,753,CANADA,EAST,CONSUMER,FURNITURE,SOFA,1,1994,1994-03-01 +481,770,CANADA,EAST,CONSUMER,FURNITURE,SOFA,2,1994,1994-04-01 +503,263,CANADA,EAST,CONSUMER,FURNITURE,SOFA,2,1994,1994-05-01 +126,79,CANADA,EAST,CONSUMER,FURNITURE,SOFA,2,1994,1994-06-01 +721,441,CANADA,EAST,CONSUMER,FURNITURE,SOFA,3,1994,1994-07-01 +271,858,CANADA,EAST,CONSUMER,FURNITURE,SOFA,3,1994,1994-08-01 +721,667,CANADA,EAST,CONSUMER,FURNITURE,SOFA,3,1994,1994-09-01 +157,193,CANADA,EAST,CONSUMER,FURNITURE,SOFA,4,1994,1994-10-01 +991,394,CANADA,EAST,CONSUMER,FURNITURE,SOFA,4,1994,1994-11-01 +499,680,CANADA,EAST,CONSUMER,FURNITURE,SOFA,4,1994,1994-12-01 +284,414,CANADA,EAST,CONSUMER,FURNITURE,BED,1,1993,1993-01-01 +705,770,CANADA,EAST,CONSUMER,FURNITURE,BED,1,1993,1993-02-01 +737,679,CANADA,EAST,CONSUMER,FURNITURE,BED,1,1993,1993-03-01 +745,7,CANADA,EAST,CONSUMER,FURNITURE,BED,2,1993,1993-04-01 +633,713,CANADA,EAST,CONSUMER,FURNITURE,BED,2,1993,1993-05-01 +983,851,CANADA,EAST,CONSUMER,FURNITURE,BED,2,1993,1993-06-01 +591,944,CANADA,EAST,CONSUMER,FURNITURE,BED,3,1993,1993-07-01 +42,130,CANADA,EAST,CONSUMER,FURNITURE,BED,3,1993,1993-08-01 +771,485,CANADA,EAST,CONSUMER,FURNITURE,BED,3,1993,1993-09-01 +465,23,CANADA,EAST,CONSUMER,FURNITURE,BED,4,1993,1993-10-01 +296,193,CANADA,EAST,CONSUMER,FURNITURE,BED,4,1993,1993-11-01 +890,7,CANADA,EAST,CONSUMER,FURNITURE,BED,4,1993,1993-12-01 +312,919,CANADA,EAST,CONSUMER,FURNITURE,BED,1,1994,1994-01-01 +777,768,CANADA,EAST,CONSUMER,FURNITURE,BED,1,1994,1994-02-01 +364,854,CANADA,EAST,CONSUMER,FURNITURE,BED,1,1994,1994-03-01 +601,411,CANADA,EAST,CONSUMER,FURNITURE,BED,2,1994,1994-04-01 +823,736,CANADA,EAST,CONSUMER,FURNITURE,BED,2,1994,1994-05-01 +847,10,CANADA,EAST,CONSUMER,FURNITURE,BED,2,1994,1994-06-01 +490,311,CANADA,EAST,CONSUMER,FURNITURE,BED,3,1994,1994-07-01 +387,348,CANADA,EAST,CONSUMER,FURNITURE,BED,3,1994,1994-08-01 +688,458,CANADA,EAST,CONSUMER,FURNITURE,BED,3,1994,1994-09-01 +650,195,CANADA,EAST,CONSUMER,FURNITURE,BED,4,1994,1994-10-01 +447,658,CANADA,EAST,CONSUMER,FURNITURE,BED,4,1994,1994-11-01 +91,704,CANADA,EAST,CONSUMER,FURNITURE,BED,4,1994,1994-12-01 +197,807,CANADA,EAST,CONSUMER,OFFICE,TABLE,1,1993,1993-01-01 +51,861,CANADA,EAST,CONSUMER,OFFICE,TABLE,1,1993,1993-02-01 +570,873,CANADA,EAST,CONSUMER,OFFICE,TABLE,1,1993,1993-03-01 +423,933,CANADA,EAST,CONSUMER,OFFICE,TABLE,2,1993,1993-04-01 +524,355,CANADA,EAST,CONSUMER,OFFICE,TABLE,2,1993,1993-05-01 +416,794,CANADA,EAST,CONSUMER,OFFICE,TABLE,2,1993,1993-06-01 +789,645,CANADA,EAST,CONSUMER,OFFICE,TABLE,3,1993,1993-07-01 +551,700,CANADA,EAST,CONSUMER,OFFICE,TABLE,3,1993,1993-08-01 +400,831,CANADA,EAST,CONSUMER,OFFICE,TABLE,3,1993,1993-09-01 +361,800,CANADA,EAST,CONSUMER,OFFICE,TABLE,4,1993,1993-10-01 +189,830,CANADA,EAST,CONSUMER,OFFICE,TABLE,4,1993,1993-11-01 +554,828,CANADA,EAST,CONSUMER,OFFICE,TABLE,4,1993,1993-12-01 +585,12,CANADA,EAST,CONSUMER,OFFICE,TABLE,1,1994,1994-01-01 +281,501,CANADA,EAST,CONSUMER,OFFICE,TABLE,1,1994,1994-02-01 +629,914,CANADA,EAST,CONSUMER,OFFICE,TABLE,1,1994,1994-03-01 +43,685,CANADA,EAST,CONSUMER,OFFICE,TABLE,2,1994,1994-04-01 +533,755,CANADA,EAST,CONSUMER,OFFICE,TABLE,2,1994,1994-05-01 +882,708,CANADA,EAST,CONSUMER,OFFICE,TABLE,2,1994,1994-06-01 +790,595,CANADA,EAST,CONSUMER,OFFICE,TABLE,3,1994,1994-07-01 +600,32,CANADA,EAST,CONSUMER,OFFICE,TABLE,3,1994,1994-08-01 +148,49,CANADA,EAST,CONSUMER,OFFICE,TABLE,3,1994,1994-09-01 +237,727,CANADA,EAST,CONSUMER,OFFICE,TABLE,4,1994,1994-10-01 +488,239,CANADA,EAST,CONSUMER,OFFICE,TABLE,4,1994,1994-11-01 +457,273,CANADA,EAST,CONSUMER,OFFICE,TABLE,4,1994,1994-12-01 +401,986,CANADA,EAST,CONSUMER,OFFICE,CHAIR,1,1993,1993-01-01 +181,544,CANADA,EAST,CONSUMER,OFFICE,CHAIR,1,1993,1993-02-01 +995,182,CANADA,EAST,CONSUMER,OFFICE,CHAIR,1,1993,1993-03-01 +120,197,CANADA,EAST,CONSUMER,OFFICE,CHAIR,2,1993,1993-04-01 +119,435,CANADA,EAST,CONSUMER,OFFICE,CHAIR,2,1993,1993-05-01 +319,974,CANADA,EAST,CONSUMER,OFFICE,CHAIR,2,1993,1993-06-01 +333,524,CANADA,EAST,CONSUMER,OFFICE,CHAIR,3,1993,1993-07-01 +923,688,CANADA,EAST,CONSUMER,OFFICE,CHAIR,3,1993,1993-08-01 +634,750,CANADA,EAST,CONSUMER,OFFICE,CHAIR,3,1993,1993-09-01 +493,155,CANADA,EAST,CONSUMER,OFFICE,CHAIR,4,1993,1993-10-01 +461,860,CANADA,EAST,CONSUMER,OFFICE,CHAIR,4,1993,1993-11-01 +304,102,CANADA,EAST,CONSUMER,OFFICE,CHAIR,4,1993,1993-12-01 +641,425,CANADA,EAST,CONSUMER,OFFICE,CHAIR,1,1994,1994-01-01 +992,224,CANADA,EAST,CONSUMER,OFFICE,CHAIR,1,1994,1994-02-01 +202,408,CANADA,EAST,CONSUMER,OFFICE,CHAIR,1,1994,1994-03-01 +770,524,CANADA,EAST,CONSUMER,OFFICE,CHAIR,2,1994,1994-04-01 +202,816,CANADA,EAST,CONSUMER,OFFICE,CHAIR,2,1994,1994-05-01 +14,515,CANADA,EAST,CONSUMER,OFFICE,CHAIR,2,1994,1994-06-01 +134,793,CANADA,EAST,CONSUMER,OFFICE,CHAIR,3,1994,1994-07-01 +977,460,CANADA,EAST,CONSUMER,OFFICE,CHAIR,3,1994,1994-08-01 +174,732,CANADA,EAST,CONSUMER,OFFICE,CHAIR,3,1994,1994-09-01 +429,435,CANADA,EAST,CONSUMER,OFFICE,CHAIR,4,1994,1994-10-01 +514,38,CANADA,EAST,CONSUMER,OFFICE,CHAIR,4,1994,1994-11-01 +784,616,CANADA,EAST,CONSUMER,OFFICE,CHAIR,4,1994,1994-12-01 +973,225,CANADA,EAST,CONSUMER,OFFICE,DESK,1,1993,1993-01-01 +511,402,CANADA,EAST,CONSUMER,OFFICE,DESK,1,1993,1993-02-01 +30,697,CANADA,EAST,CONSUMER,OFFICE,DESK,1,1993,1993-03-01 +895,567,CANADA,EAST,CONSUMER,OFFICE,DESK,2,1993,1993-04-01 +557,231,CANADA,EAST,CONSUMER,OFFICE,DESK,2,1993,1993-05-01 +282,372,CANADA,EAST,CONSUMER,OFFICE,DESK,2,1993,1993-06-01 +909,15,CANADA,EAST,CONSUMER,OFFICE,DESK,3,1993,1993-07-01 +276,866,CANADA,EAST,CONSUMER,OFFICE,DESK,3,1993,1993-08-01 +234,452,CANADA,EAST,CONSUMER,OFFICE,DESK,3,1993,1993-09-01 +479,663,CANADA,EAST,CONSUMER,OFFICE,DESK,4,1993,1993-10-01 +782,982,CANADA,EAST,CONSUMER,OFFICE,DESK,4,1993,1993-11-01 +755,813,CANADA,EAST,CONSUMER,OFFICE,DESK,4,1993,1993-12-01 +689,523,CANADA,EAST,CONSUMER,OFFICE,DESK,1,1994,1994-01-01 +496,871,CANADA,EAST,CONSUMER,OFFICE,DESK,1,1994,1994-02-01 +24,511,CANADA,EAST,CONSUMER,OFFICE,DESK,1,1994,1994-03-01 +379,819,CANADA,EAST,CONSUMER,OFFICE,DESK,2,1994,1994-04-01 +441,525,CANADA,EAST,CONSUMER,OFFICE,DESK,2,1994,1994-05-01 +49,13,CANADA,EAST,CONSUMER,OFFICE,DESK,2,1994,1994-06-01 +243,694,CANADA,EAST,CONSUMER,OFFICE,DESK,3,1994,1994-07-01 +295,782,CANADA,EAST,CONSUMER,OFFICE,DESK,3,1994,1994-08-01 +395,839,CANADA,EAST,CONSUMER,OFFICE,DESK,3,1994,1994-09-01 +929,461,CANADA,EAST,CONSUMER,OFFICE,DESK,4,1994,1994-10-01 +997,303,CANADA,EAST,CONSUMER,OFFICE,DESK,4,1994,1994-11-01 +889,421,CANADA,EAST,CONSUMER,OFFICE,DESK,4,1994,1994-12-01 +72,421,CANADA,WEST,EDUCATION,FURNITURE,SOFA,1,1993,1993-01-01 +926,433,CANADA,WEST,EDUCATION,FURNITURE,SOFA,1,1993,1993-02-01 +850,394,CANADA,WEST,EDUCATION,FURNITURE,SOFA,1,1993,1993-03-01 +826,338,CANADA,WEST,EDUCATION,FURNITURE,SOFA,2,1993,1993-04-01 +651,764,CANADA,WEST,EDUCATION,FURNITURE,SOFA,2,1993,1993-05-01 +854,216,CANADA,WEST,EDUCATION,FURNITURE,SOFA,2,1993,1993-06-01 +899,96,CANADA,WEST,EDUCATION,FURNITURE,SOFA,3,1993,1993-07-01 +309,550,CANADA,WEST,EDUCATION,FURNITURE,SOFA,3,1993,1993-08-01 +943,636,CANADA,WEST,EDUCATION,FURNITURE,SOFA,3,1993,1993-09-01 +138,427,CANADA,WEST,EDUCATION,FURNITURE,SOFA,4,1993,1993-10-01 +99,652,CANADA,WEST,EDUCATION,FURNITURE,SOFA,4,1993,1993-11-01 +270,478,CANADA,WEST,EDUCATION,FURNITURE,SOFA,4,1993,1993-12-01 +862,18,CANADA,WEST,EDUCATION,FURNITURE,SOFA,1,1994,1994-01-01 +574,40,CANADA,WEST,EDUCATION,FURNITURE,SOFA,1,1994,1994-02-01 +359,453,CANADA,WEST,EDUCATION,FURNITURE,SOFA,1,1994,1994-03-01 +958,987,CANADA,WEST,EDUCATION,FURNITURE,SOFA,2,1994,1994-04-01 +791,26,CANADA,WEST,EDUCATION,FURNITURE,SOFA,2,1994,1994-05-01 +284,101,CANADA,WEST,EDUCATION,FURNITURE,SOFA,2,1994,1994-06-01 +190,969,CANADA,WEST,EDUCATION,FURNITURE,SOFA,3,1994,1994-07-01 +527,492,CANADA,WEST,EDUCATION,FURNITURE,SOFA,3,1994,1994-08-01 +112,263,CANADA,WEST,EDUCATION,FURNITURE,SOFA,3,1994,1994-09-01 +271,593,CANADA,WEST,EDUCATION,FURNITURE,SOFA,4,1994,1994-10-01 +643,923,CANADA,WEST,EDUCATION,FURNITURE,SOFA,4,1994,1994-11-01 +554,146,CANADA,WEST,EDUCATION,FURNITURE,SOFA,4,1994,1994-12-01 +211,305,CANADA,WEST,EDUCATION,FURNITURE,BED,1,1993,1993-01-01 +368,318,CANADA,WEST,EDUCATION,FURNITURE,BED,1,1993,1993-02-01 +778,417,CANADA,WEST,EDUCATION,FURNITURE,BED,1,1993,1993-03-01 +808,623,CANADA,WEST,EDUCATION,FURNITURE,BED,2,1993,1993-04-01 +46,761,CANADA,WEST,EDUCATION,FURNITURE,BED,2,1993,1993-05-01 +466,272,CANADA,WEST,EDUCATION,FURNITURE,BED,2,1993,1993-06-01 +18,988,CANADA,WEST,EDUCATION,FURNITURE,BED,3,1993,1993-07-01 +87,821,CANADA,WEST,EDUCATION,FURNITURE,BED,3,1993,1993-08-01 +765,962,CANADA,WEST,EDUCATION,FURNITURE,BED,3,1993,1993-09-01 +62,615,CANADA,WEST,EDUCATION,FURNITURE,BED,4,1993,1993-10-01 +13,523,CANADA,WEST,EDUCATION,FURNITURE,BED,4,1993,1993-11-01 +775,806,CANADA,WEST,EDUCATION,FURNITURE,BED,4,1993,1993-12-01 +636,586,CANADA,WEST,EDUCATION,FURNITURE,BED,1,1994,1994-01-01 +458,520,CANADA,WEST,EDUCATION,FURNITURE,BED,1,1994,1994-02-01 +206,908,CANADA,WEST,EDUCATION,FURNITURE,BED,1,1994,1994-03-01 +310,30,CANADA,WEST,EDUCATION,FURNITURE,BED,2,1994,1994-04-01 +813,247,CANADA,WEST,EDUCATION,FURNITURE,BED,2,1994,1994-05-01 +22,647,CANADA,WEST,EDUCATION,FURNITURE,BED,2,1994,1994-06-01 +742,55,CANADA,WEST,EDUCATION,FURNITURE,BED,3,1994,1994-07-01 +394,154,CANADA,WEST,EDUCATION,FURNITURE,BED,3,1994,1994-08-01 +957,344,CANADA,WEST,EDUCATION,FURNITURE,BED,3,1994,1994-09-01 +205,95,CANADA,WEST,EDUCATION,FURNITURE,BED,4,1994,1994-10-01 +198,665,CANADA,WEST,EDUCATION,FURNITURE,BED,4,1994,1994-11-01 +638,145,CANADA,WEST,EDUCATION,FURNITURE,BED,4,1994,1994-12-01 +155,925,CANADA,WEST,EDUCATION,OFFICE,TABLE,1,1993,1993-01-01 +688,395,CANADA,WEST,EDUCATION,OFFICE,TABLE,1,1993,1993-02-01 +730,749,CANADA,WEST,EDUCATION,OFFICE,TABLE,1,1993,1993-03-01 +208,279,CANADA,WEST,EDUCATION,OFFICE,TABLE,2,1993,1993-04-01 +525,288,CANADA,WEST,EDUCATION,OFFICE,TABLE,2,1993,1993-05-01 +483,509,CANADA,WEST,EDUCATION,OFFICE,TABLE,2,1993,1993-06-01 +748,255,CANADA,WEST,EDUCATION,OFFICE,TABLE,3,1993,1993-07-01 +6,214,CANADA,WEST,EDUCATION,OFFICE,TABLE,3,1993,1993-08-01 +168,473,CANADA,WEST,EDUCATION,OFFICE,TABLE,3,1993,1993-09-01 +301,702,CANADA,WEST,EDUCATION,OFFICE,TABLE,4,1993,1993-10-01 +9,814,CANADA,WEST,EDUCATION,OFFICE,TABLE,4,1993,1993-11-01 +778,231,CANADA,WEST,EDUCATION,OFFICE,TABLE,4,1993,1993-12-01 +799,422,CANADA,WEST,EDUCATION,OFFICE,TABLE,1,1994,1994-01-01 +309,572,CANADA,WEST,EDUCATION,OFFICE,TABLE,1,1994,1994-02-01 +433,363,CANADA,WEST,EDUCATION,OFFICE,TABLE,1,1994,1994-03-01 +969,919,CANADA,WEST,EDUCATION,OFFICE,TABLE,2,1994,1994-04-01 +181,355,CANADA,WEST,EDUCATION,OFFICE,TABLE,2,1994,1994-05-01 +787,992,CANADA,WEST,EDUCATION,OFFICE,TABLE,2,1994,1994-06-01 +971,147,CANADA,WEST,EDUCATION,OFFICE,TABLE,3,1994,1994-07-01 +440,183,CANADA,WEST,EDUCATION,OFFICE,TABLE,3,1994,1994-08-01 +209,375,CANADA,WEST,EDUCATION,OFFICE,TABLE,3,1994,1994-09-01 +537,77,CANADA,WEST,EDUCATION,OFFICE,TABLE,4,1994,1994-10-01 +364,308,CANADA,WEST,EDUCATION,OFFICE,TABLE,4,1994,1994-11-01 +377,660,CANADA,WEST,EDUCATION,OFFICE,TABLE,4,1994,1994-12-01 +251,555,CANADA,WEST,EDUCATION,OFFICE,CHAIR,1,1993,1993-01-01 +607,455,CANADA,WEST,EDUCATION,OFFICE,CHAIR,1,1993,1993-02-01 +127,888,CANADA,WEST,EDUCATION,OFFICE,CHAIR,1,1993,1993-03-01 +513,652,CANADA,WEST,EDUCATION,OFFICE,CHAIR,2,1993,1993-04-01 +146,799,CANADA,WEST,EDUCATION,OFFICE,CHAIR,2,1993,1993-05-01 +917,249,CANADA,WEST,EDUCATION,OFFICE,CHAIR,2,1993,1993-06-01 +776,539,CANADA,WEST,EDUCATION,OFFICE,CHAIR,3,1993,1993-07-01 +330,198,CANADA,WEST,EDUCATION,OFFICE,CHAIR,3,1993,1993-08-01 +981,340,CANADA,WEST,EDUCATION,OFFICE,CHAIR,3,1993,1993-09-01 +862,152,CANADA,WEST,EDUCATION,OFFICE,CHAIR,4,1993,1993-10-01 +612,347,CANADA,WEST,EDUCATION,OFFICE,CHAIR,4,1993,1993-11-01 +607,565,CANADA,WEST,EDUCATION,OFFICE,CHAIR,4,1993,1993-12-01 +786,855,CANADA,WEST,EDUCATION,OFFICE,CHAIR,1,1994,1994-01-01 +160,87,CANADA,WEST,EDUCATION,OFFICE,CHAIR,1,1994,1994-02-01 +199,69,CANADA,WEST,EDUCATION,OFFICE,CHAIR,1,1994,1994-03-01 +972,807,CANADA,WEST,EDUCATION,OFFICE,CHAIR,2,1994,1994-04-01 +870,565,CANADA,WEST,EDUCATION,OFFICE,CHAIR,2,1994,1994-05-01 +494,798,CANADA,WEST,EDUCATION,OFFICE,CHAIR,2,1994,1994-06-01 +975,714,CANADA,WEST,EDUCATION,OFFICE,CHAIR,3,1994,1994-07-01 +760,17,CANADA,WEST,EDUCATION,OFFICE,CHAIR,3,1994,1994-08-01 +180,797,CANADA,WEST,EDUCATION,OFFICE,CHAIR,3,1994,1994-09-01 +256,422,CANADA,WEST,EDUCATION,OFFICE,CHAIR,4,1994,1994-10-01 +422,621,CANADA,WEST,EDUCATION,OFFICE,CHAIR,4,1994,1994-11-01 +859,661,CANADA,WEST,EDUCATION,OFFICE,CHAIR,4,1994,1994-12-01 +586,363,CANADA,WEST,EDUCATION,OFFICE,DESK,1,1993,1993-01-01 +441,910,CANADA,WEST,EDUCATION,OFFICE,DESK,1,1993,1993-02-01 +597,998,CANADA,WEST,EDUCATION,OFFICE,DESK,1,1993,1993-03-01 +717,95,CANADA,WEST,EDUCATION,OFFICE,DESK,2,1993,1993-04-01 +713,731,CANADA,WEST,EDUCATION,OFFICE,DESK,2,1993,1993-05-01 +591,718,CANADA,WEST,EDUCATION,OFFICE,DESK,2,1993,1993-06-01 +492,467,CANADA,WEST,EDUCATION,OFFICE,DESK,3,1993,1993-07-01 +170,126,CANADA,WEST,EDUCATION,OFFICE,DESK,3,1993,1993-08-01 +684,127,CANADA,WEST,EDUCATION,OFFICE,DESK,3,1993,1993-09-01 +981,746,CANADA,WEST,EDUCATION,OFFICE,DESK,4,1993,1993-10-01 +966,878,CANADA,WEST,EDUCATION,OFFICE,DESK,4,1993,1993-11-01 +439,27,CANADA,WEST,EDUCATION,OFFICE,DESK,4,1993,1993-12-01 +151,569,CANADA,WEST,EDUCATION,OFFICE,DESK,1,1994,1994-01-01 +602,812,CANADA,WEST,EDUCATION,OFFICE,DESK,1,1994,1994-02-01 +187,603,CANADA,WEST,EDUCATION,OFFICE,DESK,1,1994,1994-03-01 +415,506,CANADA,WEST,EDUCATION,OFFICE,DESK,2,1994,1994-04-01 +61,185,CANADA,WEST,EDUCATION,OFFICE,DESK,2,1994,1994-05-01 +839,692,CANADA,WEST,EDUCATION,OFFICE,DESK,2,1994,1994-06-01 +596,565,CANADA,WEST,EDUCATION,OFFICE,DESK,3,1994,1994-07-01 +751,512,CANADA,WEST,EDUCATION,OFFICE,DESK,3,1994,1994-08-01 +460,86,CANADA,WEST,EDUCATION,OFFICE,DESK,3,1994,1994-09-01 +922,399,CANADA,WEST,EDUCATION,OFFICE,DESK,4,1994,1994-10-01 +153,672,CANADA,WEST,EDUCATION,OFFICE,DESK,4,1994,1994-11-01 +928,801,CANADA,WEST,EDUCATION,OFFICE,DESK,4,1994,1994-12-01 +951,730,CANADA,WEST,CONSUMER,FURNITURE,SOFA,1,1993,1993-01-01 +394,408,CANADA,WEST,CONSUMER,FURNITURE,SOFA,1,1993,1993-02-01 +615,982,CANADA,WEST,CONSUMER,FURNITURE,SOFA,1,1993,1993-03-01 +653,499,CANADA,WEST,CONSUMER,FURNITURE,SOFA,2,1993,1993-04-01 +180,307,CANADA,WEST,CONSUMER,FURNITURE,SOFA,2,1993,1993-05-01 +649,741,CANADA,WEST,CONSUMER,FURNITURE,SOFA,2,1993,1993-06-01 +921,640,CANADA,WEST,CONSUMER,FURNITURE,SOFA,3,1993,1993-07-01 +11,300,CANADA,WEST,CONSUMER,FURNITURE,SOFA,3,1993,1993-08-01 +696,929,CANADA,WEST,CONSUMER,FURNITURE,SOFA,3,1993,1993-09-01 +795,309,CANADA,WEST,CONSUMER,FURNITURE,SOFA,4,1993,1993-10-01 +550,340,CANADA,WEST,CONSUMER,FURNITURE,SOFA,4,1993,1993-11-01 +320,228,CANADA,WEST,CONSUMER,FURNITURE,SOFA,4,1993,1993-12-01 +845,1000,CANADA,WEST,CONSUMER,FURNITURE,SOFA,1,1994,1994-01-01 +245,21,CANADA,WEST,CONSUMER,FURNITURE,SOFA,1,1994,1994-02-01 +142,583,CANADA,WEST,CONSUMER,FURNITURE,SOFA,1,1994,1994-03-01 +717,506,CANADA,WEST,CONSUMER,FURNITURE,SOFA,2,1994,1994-04-01 +3,405,CANADA,WEST,CONSUMER,FURNITURE,SOFA,2,1994,1994-05-01 +790,556,CANADA,WEST,CONSUMER,FURNITURE,SOFA,2,1994,1994-06-01 +646,72,CANADA,WEST,CONSUMER,FURNITURE,SOFA,3,1994,1994-07-01 +230,103,CANADA,WEST,CONSUMER,FURNITURE,SOFA,3,1994,1994-08-01 +938,262,CANADA,WEST,CONSUMER,FURNITURE,SOFA,3,1994,1994-09-01 +629,102,CANADA,WEST,CONSUMER,FURNITURE,SOFA,4,1994,1994-10-01 +317,841,CANADA,WEST,CONSUMER,FURNITURE,SOFA,4,1994,1994-11-01 +812,159,CANADA,WEST,CONSUMER,FURNITURE,SOFA,4,1994,1994-12-01 +141,570,CANADA,WEST,CONSUMER,FURNITURE,BED,1,1993,1993-01-01 +64,375,CANADA,WEST,CONSUMER,FURNITURE,BED,1,1993,1993-02-01 +207,298,CANADA,WEST,CONSUMER,FURNITURE,BED,1,1993,1993-03-01 +435,32,CANADA,WEST,CONSUMER,FURNITURE,BED,2,1993,1993-04-01 +96,760,CANADA,WEST,CONSUMER,FURNITURE,BED,2,1993,1993-05-01 +252,338,CANADA,WEST,CONSUMER,FURNITURE,BED,2,1993,1993-06-01 +956,149,CANADA,WEST,CONSUMER,FURNITURE,BED,3,1993,1993-07-01 +633,343,CANADA,WEST,CONSUMER,FURNITURE,BED,3,1993,1993-08-01 +190,151,CANADA,WEST,CONSUMER,FURNITURE,BED,3,1993,1993-09-01 +227,44,CANADA,WEST,CONSUMER,FURNITURE,BED,4,1993,1993-10-01 +24,583,CANADA,WEST,CONSUMER,FURNITURE,BED,4,1993,1993-11-01 +420,230,CANADA,WEST,CONSUMER,FURNITURE,BED,4,1993,1993-12-01 +910,907,CANADA,WEST,CONSUMER,FURNITURE,BED,1,1994,1994-01-01 +709,783,CANADA,WEST,CONSUMER,FURNITURE,BED,1,1994,1994-02-01 +810,117,CANADA,WEST,CONSUMER,FURNITURE,BED,1,1994,1994-03-01 +723,416,CANADA,WEST,CONSUMER,FURNITURE,BED,2,1994,1994-04-01 +911,318,CANADA,WEST,CONSUMER,FURNITURE,BED,2,1994,1994-05-01 +230,888,CANADA,WEST,CONSUMER,FURNITURE,BED,2,1994,1994-06-01 +448,60,CANADA,WEST,CONSUMER,FURNITURE,BED,3,1994,1994-07-01 +945,596,CANADA,WEST,CONSUMER,FURNITURE,BED,3,1994,1994-08-01 +508,576,CANADA,WEST,CONSUMER,FURNITURE,BED,3,1994,1994-09-01 +262,576,CANADA,WEST,CONSUMER,FURNITURE,BED,4,1994,1994-10-01 +441,280,CANADA,WEST,CONSUMER,FURNITURE,BED,4,1994,1994-11-01 +15,219,CANADA,WEST,CONSUMER,FURNITURE,BED,4,1994,1994-12-01 +795,133,CANADA,WEST,CONSUMER,OFFICE,TABLE,1,1993,1993-01-01 +301,273,CANADA,WEST,CONSUMER,OFFICE,TABLE,1,1993,1993-02-01 +304,86,CANADA,WEST,CONSUMER,OFFICE,TABLE,1,1993,1993-03-01 +49,400,CANADA,WEST,CONSUMER,OFFICE,TABLE,2,1993,1993-04-01 +576,364,CANADA,WEST,CONSUMER,OFFICE,TABLE,2,1993,1993-05-01 +669,63,CANADA,WEST,CONSUMER,OFFICE,TABLE,2,1993,1993-06-01 +325,929,CANADA,WEST,CONSUMER,OFFICE,TABLE,3,1993,1993-07-01 +272,344,CANADA,WEST,CONSUMER,OFFICE,TABLE,3,1993,1993-08-01 +80,768,CANADA,WEST,CONSUMER,OFFICE,TABLE,3,1993,1993-09-01 +46,668,CANADA,WEST,CONSUMER,OFFICE,TABLE,4,1993,1993-10-01 +223,407,CANADA,WEST,CONSUMER,OFFICE,TABLE,4,1993,1993-11-01 +774,536,CANADA,WEST,CONSUMER,OFFICE,TABLE,4,1993,1993-12-01 +784,657,CANADA,WEST,CONSUMER,OFFICE,TABLE,1,1994,1994-01-01 +92,215,CANADA,WEST,CONSUMER,OFFICE,TABLE,1,1994,1994-02-01 +67,966,CANADA,WEST,CONSUMER,OFFICE,TABLE,1,1994,1994-03-01 +747,674,CANADA,WEST,CONSUMER,OFFICE,TABLE,2,1994,1994-04-01 +686,574,CANADA,WEST,CONSUMER,OFFICE,TABLE,2,1994,1994-05-01 +93,266,CANADA,WEST,CONSUMER,OFFICE,TABLE,2,1994,1994-06-01 +192,680,CANADA,WEST,CONSUMER,OFFICE,TABLE,3,1994,1994-07-01 +51,362,CANADA,WEST,CONSUMER,OFFICE,TABLE,3,1994,1994-08-01 +498,412,CANADA,WEST,CONSUMER,OFFICE,TABLE,3,1994,1994-09-01 +546,431,CANADA,WEST,CONSUMER,OFFICE,TABLE,4,1994,1994-10-01 +485,94,CANADA,WEST,CONSUMER,OFFICE,TABLE,4,1994,1994-11-01 +925,345,CANADA,WEST,CONSUMER,OFFICE,TABLE,4,1994,1994-12-01 +292,445,CANADA,WEST,CONSUMER,OFFICE,CHAIR,1,1993,1993-01-01 +540,632,CANADA,WEST,CONSUMER,OFFICE,CHAIR,1,1993,1993-02-01 +21,855,CANADA,WEST,CONSUMER,OFFICE,CHAIR,1,1993,1993-03-01 +100,36,CANADA,WEST,CONSUMER,OFFICE,CHAIR,2,1993,1993-04-01 +49,250,CANADA,WEST,CONSUMER,OFFICE,CHAIR,2,1993,1993-05-01 +353,427,CANADA,WEST,CONSUMER,OFFICE,CHAIR,2,1993,1993-06-01 +911,367,CANADA,WEST,CONSUMER,OFFICE,CHAIR,3,1993,1993-07-01 +823,245,CANADA,WEST,CONSUMER,OFFICE,CHAIR,3,1993,1993-08-01 +278,893,CANADA,WEST,CONSUMER,OFFICE,CHAIR,3,1993,1993-09-01 +576,490,CANADA,WEST,CONSUMER,OFFICE,CHAIR,4,1993,1993-10-01 +655,88,CANADA,WEST,CONSUMER,OFFICE,CHAIR,4,1993,1993-11-01 +763,964,CANADA,WEST,CONSUMER,OFFICE,CHAIR,4,1993,1993-12-01 +88,62,CANADA,WEST,CONSUMER,OFFICE,CHAIR,1,1994,1994-01-01 +746,506,CANADA,WEST,CONSUMER,OFFICE,CHAIR,1,1994,1994-02-01 +927,680,CANADA,WEST,CONSUMER,OFFICE,CHAIR,1,1994,1994-03-01 +297,153,CANADA,WEST,CONSUMER,OFFICE,CHAIR,2,1994,1994-04-01 +291,403,CANADA,WEST,CONSUMER,OFFICE,CHAIR,2,1994,1994-05-01 +838,98,CANADA,WEST,CONSUMER,OFFICE,CHAIR,2,1994,1994-06-01 +112,376,CANADA,WEST,CONSUMER,OFFICE,CHAIR,3,1994,1994-07-01 +509,477,CANADA,WEST,CONSUMER,OFFICE,CHAIR,3,1994,1994-08-01 +472,50,CANADA,WEST,CONSUMER,OFFICE,CHAIR,3,1994,1994-09-01 +495,592,CANADA,WEST,CONSUMER,OFFICE,CHAIR,4,1994,1994-10-01 +1000,813,CANADA,WEST,CONSUMER,OFFICE,CHAIR,4,1994,1994-11-01 +241,740,CANADA,WEST,CONSUMER,OFFICE,CHAIR,4,1994,1994-12-01 +693,873,CANADA,WEST,CONSUMER,OFFICE,DESK,1,1993,1993-01-01 +903,459,CANADA,WEST,CONSUMER,OFFICE,DESK,1,1993,1993-02-01 +791,224,CANADA,WEST,CONSUMER,OFFICE,DESK,1,1993,1993-03-01 +108,562,CANADA,WEST,CONSUMER,OFFICE,DESK,2,1993,1993-04-01 +845,199,CANADA,WEST,CONSUMER,OFFICE,DESK,2,1993,1993-05-01 +452,275,CANADA,WEST,CONSUMER,OFFICE,DESK,2,1993,1993-06-01 +479,355,CANADA,WEST,CONSUMER,OFFICE,DESK,3,1993,1993-07-01 +410,947,CANADA,WEST,CONSUMER,OFFICE,DESK,3,1993,1993-08-01 +379,454,CANADA,WEST,CONSUMER,OFFICE,DESK,3,1993,1993-09-01 +740,450,CANADA,WEST,CONSUMER,OFFICE,DESK,4,1993,1993-10-01 +471,575,CANADA,WEST,CONSUMER,OFFICE,DESK,4,1993,1993-11-01 +325,6,CANADA,WEST,CONSUMER,OFFICE,DESK,4,1993,1993-12-01 +455,847,CANADA,WEST,CONSUMER,OFFICE,DESK,1,1994,1994-01-01 +563,338,CANADA,WEST,CONSUMER,OFFICE,DESK,1,1994,1994-02-01 +879,517,CANADA,WEST,CONSUMER,OFFICE,DESK,1,1994,1994-03-01 +312,630,CANADA,WEST,CONSUMER,OFFICE,DESK,2,1994,1994-04-01 +587,381,CANADA,WEST,CONSUMER,OFFICE,DESK,2,1994,1994-05-01 +628,864,CANADA,WEST,CONSUMER,OFFICE,DESK,2,1994,1994-06-01 +486,416,CANADA,WEST,CONSUMER,OFFICE,DESK,3,1994,1994-07-01 +811,852,CANADA,WEST,CONSUMER,OFFICE,DESK,3,1994,1994-08-01 +990,815,CANADA,WEST,CONSUMER,OFFICE,DESK,3,1994,1994-09-01 +35,23,CANADA,WEST,CONSUMER,OFFICE,DESK,4,1994,1994-10-01 +764,527,CANADA,WEST,CONSUMER,OFFICE,DESK,4,1994,1994-11-01 +619,693,CANADA,WEST,CONSUMER,OFFICE,DESK,4,1994,1994-12-01 +996,977,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,1,1993,1993-01-01 +554,549,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,1,1993,1993-02-01 +540,951,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,1,1993,1993-03-01 +140,390,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,2,1993,1993-04-01 +554,204,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,2,1993,1993-05-01 +724,78,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,2,1993,1993-06-01 +693,613,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,3,1993,1993-07-01 +866,745,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,3,1993,1993-08-01 +833,56,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,3,1993,1993-09-01 +164,887,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,4,1993,1993-10-01 +753,651,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,4,1993,1993-11-01 +60,691,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,4,1993,1993-12-01 +688,767,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,1,1994,1994-01-01 +883,709,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,1,1994,1994-02-01 +109,417,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,1,1994,1994-03-01 +950,326,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,2,1994,1994-04-01 +438,599,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,2,1994,1994-05-01 +286,818,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,2,1994,1994-06-01 +342,13,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,3,1994,1994-07-01 +383,185,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,3,1994,1994-08-01 +80,140,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,3,1994,1994-09-01 +322,717,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,4,1994,1994-10-01 +749,852,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,4,1994,1994-11-01 +606,125,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,4,1994,1994-12-01 +641,325,GERMANY,EAST,EDUCATION,FURNITURE,BED,1,1993,1993-01-01 +494,648,GERMANY,EAST,EDUCATION,FURNITURE,BED,1,1993,1993-02-01 +428,365,GERMANY,EAST,EDUCATION,FURNITURE,BED,1,1993,1993-03-01 +936,120,GERMANY,EAST,EDUCATION,FURNITURE,BED,2,1993,1993-04-01 +597,347,GERMANY,EAST,EDUCATION,FURNITURE,BED,2,1993,1993-05-01 +728,638,GERMANY,EAST,EDUCATION,FURNITURE,BED,2,1993,1993-06-01 +933,732,GERMANY,EAST,EDUCATION,FURNITURE,BED,3,1993,1993-07-01 +663,465,GERMANY,EAST,EDUCATION,FURNITURE,BED,3,1993,1993-08-01 +394,262,GERMANY,EAST,EDUCATION,FURNITURE,BED,3,1993,1993-09-01 +334,947,GERMANY,EAST,EDUCATION,FURNITURE,BED,4,1993,1993-10-01 +114,694,GERMANY,EAST,EDUCATION,FURNITURE,BED,4,1993,1993-11-01 +89,482,GERMANY,EAST,EDUCATION,FURNITURE,BED,4,1993,1993-12-01 +874,600,GERMANY,EAST,EDUCATION,FURNITURE,BED,1,1994,1994-01-01 +674,94,GERMANY,EAST,EDUCATION,FURNITURE,BED,1,1994,1994-02-01 +347,323,GERMANY,EAST,EDUCATION,FURNITURE,BED,1,1994,1994-03-01 +105,49,GERMANY,EAST,EDUCATION,FURNITURE,BED,2,1994,1994-04-01 +286,70,GERMANY,EAST,EDUCATION,FURNITURE,BED,2,1994,1994-05-01 +669,844,GERMANY,EAST,EDUCATION,FURNITURE,BED,2,1994,1994-06-01 +786,773,GERMANY,EAST,EDUCATION,FURNITURE,BED,3,1994,1994-07-01 +104,68,GERMANY,EAST,EDUCATION,FURNITURE,BED,3,1994,1994-08-01 +770,110,GERMANY,EAST,EDUCATION,FURNITURE,BED,3,1994,1994-09-01 +263,42,GERMANY,EAST,EDUCATION,FURNITURE,BED,4,1994,1994-10-01 +900,171,GERMANY,EAST,EDUCATION,FURNITURE,BED,4,1994,1994-11-01 +630,644,GERMANY,EAST,EDUCATION,FURNITURE,BED,4,1994,1994-12-01 +597,408,GERMANY,EAST,EDUCATION,OFFICE,TABLE,1,1993,1993-01-01 +185,45,GERMANY,EAST,EDUCATION,OFFICE,TABLE,1,1993,1993-02-01 +175,522,GERMANY,EAST,EDUCATION,OFFICE,TABLE,1,1993,1993-03-01 +576,166,GERMANY,EAST,EDUCATION,OFFICE,TABLE,2,1993,1993-04-01 +957,885,GERMANY,EAST,EDUCATION,OFFICE,TABLE,2,1993,1993-05-01 +993,713,GERMANY,EAST,EDUCATION,OFFICE,TABLE,2,1993,1993-06-01 +500,838,GERMANY,EAST,EDUCATION,OFFICE,TABLE,3,1993,1993-07-01 +410,267,GERMANY,EAST,EDUCATION,OFFICE,TABLE,3,1993,1993-08-01 +592,967,GERMANY,EAST,EDUCATION,OFFICE,TABLE,3,1993,1993-09-01 +64,529,GERMANY,EAST,EDUCATION,OFFICE,TABLE,4,1993,1993-10-01 +208,656,GERMANY,EAST,EDUCATION,OFFICE,TABLE,4,1993,1993-11-01 +273,665,GERMANY,EAST,EDUCATION,OFFICE,TABLE,4,1993,1993-12-01 +906,419,GERMANY,EAST,EDUCATION,OFFICE,TABLE,1,1994,1994-01-01 +429,776,GERMANY,EAST,EDUCATION,OFFICE,TABLE,1,1994,1994-02-01 +961,971,GERMANY,EAST,EDUCATION,OFFICE,TABLE,1,1994,1994-03-01 +338,248,GERMANY,EAST,EDUCATION,OFFICE,TABLE,2,1994,1994-04-01 +472,486,GERMANY,EAST,EDUCATION,OFFICE,TABLE,2,1994,1994-05-01 +903,674,GERMANY,EAST,EDUCATION,OFFICE,TABLE,2,1994,1994-06-01 +299,603,GERMANY,EAST,EDUCATION,OFFICE,TABLE,3,1994,1994-07-01 +948,492,GERMANY,EAST,EDUCATION,OFFICE,TABLE,3,1994,1994-08-01 +931,512,GERMANY,EAST,EDUCATION,OFFICE,TABLE,3,1994,1994-09-01 +570,391,GERMANY,EAST,EDUCATION,OFFICE,TABLE,4,1994,1994-10-01 +97,313,GERMANY,EAST,EDUCATION,OFFICE,TABLE,4,1994,1994-11-01 +674,758,GERMANY,EAST,EDUCATION,OFFICE,TABLE,4,1994,1994-12-01 +468,304,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,1,1993,1993-01-01 +430,846,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,1,1993,1993-02-01 +893,912,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,1,1993,1993-03-01 +519,810,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,2,1993,1993-04-01 +267,122,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,2,1993,1993-05-01 +908,102,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,2,1993,1993-06-01 +176,161,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,3,1993,1993-07-01 +673,450,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,3,1993,1993-08-01 +798,215,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,3,1993,1993-09-01 +291,765,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,4,1993,1993-10-01 +583,557,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,4,1993,1993-11-01 +442,739,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,4,1993,1993-12-01 +951,811,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,1,1994,1994-01-01 +430,780,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,1,1994,1994-02-01 +559,645,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,1,1994,1994-03-01 +726,365,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,2,1994,1994-04-01 +944,597,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,2,1994,1994-05-01 +497,126,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,2,1994,1994-06-01 +388,655,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,3,1994,1994-07-01 +81,604,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,3,1994,1994-08-01 +111,280,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,3,1994,1994-09-01 +288,115,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,4,1994,1994-10-01 +845,205,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,4,1994,1994-11-01 +745,672,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,4,1994,1994-12-01 +352,339,GERMANY,EAST,EDUCATION,OFFICE,DESK,1,1993,1993-01-01 +234,70,GERMANY,EAST,EDUCATION,OFFICE,DESK,1,1993,1993-02-01 +167,528,GERMANY,EAST,EDUCATION,OFFICE,DESK,1,1993,1993-03-01 +606,220,GERMANY,EAST,EDUCATION,OFFICE,DESK,2,1993,1993-04-01 +670,691,GERMANY,EAST,EDUCATION,OFFICE,DESK,2,1993,1993-05-01 +764,197,GERMANY,EAST,EDUCATION,OFFICE,DESK,2,1993,1993-06-01 +659,239,GERMANY,EAST,EDUCATION,OFFICE,DESK,3,1993,1993-07-01 +996,50,GERMANY,EAST,EDUCATION,OFFICE,DESK,3,1993,1993-08-01 +424,135,GERMANY,EAST,EDUCATION,OFFICE,DESK,3,1993,1993-09-01 +899,972,GERMANY,EAST,EDUCATION,OFFICE,DESK,4,1993,1993-10-01 +392,475,GERMANY,EAST,EDUCATION,OFFICE,DESK,4,1993,1993-11-01 +555,868,GERMANY,EAST,EDUCATION,OFFICE,DESK,4,1993,1993-12-01 +860,451,GERMANY,EAST,EDUCATION,OFFICE,DESK,1,1994,1994-01-01 +114,565,GERMANY,EAST,EDUCATION,OFFICE,DESK,1,1994,1994-02-01 +943,116,GERMANY,EAST,EDUCATION,OFFICE,DESK,1,1994,1994-03-01 +365,385,GERMANY,EAST,EDUCATION,OFFICE,DESK,2,1994,1994-04-01 +249,375,GERMANY,EAST,EDUCATION,OFFICE,DESK,2,1994,1994-05-01 +192,357,GERMANY,EAST,EDUCATION,OFFICE,DESK,2,1994,1994-06-01 +328,230,GERMANY,EAST,EDUCATION,OFFICE,DESK,3,1994,1994-07-01 +311,829,GERMANY,EAST,EDUCATION,OFFICE,DESK,3,1994,1994-08-01 +576,971,GERMANY,EAST,EDUCATION,OFFICE,DESK,3,1994,1994-09-01 +915,280,GERMANY,EAST,EDUCATION,OFFICE,DESK,4,1994,1994-10-01 +522,853,GERMANY,EAST,EDUCATION,OFFICE,DESK,4,1994,1994-11-01 +625,953,GERMANY,EAST,EDUCATION,OFFICE,DESK,4,1994,1994-12-01 +873,874,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,1,1993,1993-01-01 +498,578,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,1,1993,1993-02-01 +808,768,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,1,1993,1993-03-01 +742,178,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,2,1993,1993-04-01 +744,916,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,2,1993,1993-05-01 +30,917,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,2,1993,1993-06-01 +747,633,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,3,1993,1993-07-01 +672,107,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,3,1993,1993-08-01 +564,523,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,3,1993,1993-09-01 +785,924,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,4,1993,1993-10-01 +825,481,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,4,1993,1993-11-01 +243,240,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,4,1993,1993-12-01 +959,819,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,1,1994,1994-01-01 +123,602,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,1,1994,1994-02-01 +714,538,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,1,1994,1994-03-01 +252,632,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,2,1994,1994-04-01 +715,952,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,2,1994,1994-05-01 +670,480,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,2,1994,1994-06-01 +81,700,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,3,1994,1994-07-01 +653,726,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,3,1994,1994-08-01 +795,526,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,3,1994,1994-09-01 +182,410,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,4,1994,1994-10-01 +725,307,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,4,1994,1994-11-01 +101,73,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,4,1994,1994-12-01 +143,232,GERMANY,EAST,CONSUMER,FURNITURE,BED,1,1993,1993-01-01 +15,993,GERMANY,EAST,CONSUMER,FURNITURE,BED,1,1993,1993-02-01 +742,652,GERMANY,EAST,CONSUMER,FURNITURE,BED,1,1993,1993-03-01 +339,761,GERMANY,EAST,CONSUMER,FURNITURE,BED,2,1993,1993-04-01 +39,428,GERMANY,EAST,CONSUMER,FURNITURE,BED,2,1993,1993-05-01 +465,4,GERMANY,EAST,CONSUMER,FURNITURE,BED,2,1993,1993-06-01 +889,101,GERMANY,EAST,CONSUMER,FURNITURE,BED,3,1993,1993-07-01 +856,869,GERMANY,EAST,CONSUMER,FURNITURE,BED,3,1993,1993-08-01 +358,271,GERMANY,EAST,CONSUMER,FURNITURE,BED,3,1993,1993-09-01 +452,633,GERMANY,EAST,CONSUMER,FURNITURE,BED,4,1993,1993-10-01 +387,481,GERMANY,EAST,CONSUMER,FURNITURE,BED,4,1993,1993-11-01 +824,302,GERMANY,EAST,CONSUMER,FURNITURE,BED,4,1993,1993-12-01 +185,245,GERMANY,EAST,CONSUMER,FURNITURE,BED,1,1994,1994-01-01 +151,941,GERMANY,EAST,CONSUMER,FURNITURE,BED,1,1994,1994-02-01 +419,721,GERMANY,EAST,CONSUMER,FURNITURE,BED,1,1994,1994-03-01 +643,893,GERMANY,EAST,CONSUMER,FURNITURE,BED,2,1994,1994-04-01 +63,898,GERMANY,EAST,CONSUMER,FURNITURE,BED,2,1994,1994-05-01 +202,94,GERMANY,EAST,CONSUMER,FURNITURE,BED,2,1994,1994-06-01 +332,962,GERMANY,EAST,CONSUMER,FURNITURE,BED,3,1994,1994-07-01 +723,71,GERMANY,EAST,CONSUMER,FURNITURE,BED,3,1994,1994-08-01 +148,108,GERMANY,EAST,CONSUMER,FURNITURE,BED,3,1994,1994-09-01 +840,71,GERMANY,EAST,CONSUMER,FURNITURE,BED,4,1994,1994-10-01 +601,767,GERMANY,EAST,CONSUMER,FURNITURE,BED,4,1994,1994-11-01 +962,323,GERMANY,EAST,CONSUMER,FURNITURE,BED,4,1994,1994-12-01 +166,982,GERMANY,EAST,CONSUMER,OFFICE,TABLE,1,1993,1993-01-01 +531,614,GERMANY,EAST,CONSUMER,OFFICE,TABLE,1,1993,1993-02-01 +963,839,GERMANY,EAST,CONSUMER,OFFICE,TABLE,1,1993,1993-03-01 +994,388,GERMANY,EAST,CONSUMER,OFFICE,TABLE,2,1993,1993-04-01 +978,296,GERMANY,EAST,CONSUMER,OFFICE,TABLE,2,1993,1993-05-01 +72,429,GERMANY,EAST,CONSUMER,OFFICE,TABLE,2,1993,1993-06-01 +33,901,GERMANY,EAST,CONSUMER,OFFICE,TABLE,3,1993,1993-07-01 +428,350,GERMANY,EAST,CONSUMER,OFFICE,TABLE,3,1993,1993-08-01 +413,581,GERMANY,EAST,CONSUMER,OFFICE,TABLE,3,1993,1993-09-01 +737,583,GERMANY,EAST,CONSUMER,OFFICE,TABLE,4,1993,1993-10-01 +85,92,GERMANY,EAST,CONSUMER,OFFICE,TABLE,4,1993,1993-11-01 +916,647,GERMANY,EAST,CONSUMER,OFFICE,TABLE,4,1993,1993-12-01 +785,771,GERMANY,EAST,CONSUMER,OFFICE,TABLE,1,1994,1994-01-01 +302,26,GERMANY,EAST,CONSUMER,OFFICE,TABLE,1,1994,1994-02-01 +1000,598,GERMANY,EAST,CONSUMER,OFFICE,TABLE,1,1994,1994-03-01 +458,715,GERMANY,EAST,CONSUMER,OFFICE,TABLE,2,1994,1994-04-01 +896,74,GERMANY,EAST,CONSUMER,OFFICE,TABLE,2,1994,1994-05-01 +615,580,GERMANY,EAST,CONSUMER,OFFICE,TABLE,2,1994,1994-06-01 +174,848,GERMANY,EAST,CONSUMER,OFFICE,TABLE,3,1994,1994-07-01 +651,118,GERMANY,EAST,CONSUMER,OFFICE,TABLE,3,1994,1994-08-01 +784,54,GERMANY,EAST,CONSUMER,OFFICE,TABLE,3,1994,1994-09-01 +121,929,GERMANY,EAST,CONSUMER,OFFICE,TABLE,4,1994,1994-10-01 +341,393,GERMANY,EAST,CONSUMER,OFFICE,TABLE,4,1994,1994-11-01 +615,820,GERMANY,EAST,CONSUMER,OFFICE,TABLE,4,1994,1994-12-01 +697,336,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,1,1993,1993-01-01 +215,299,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,1,1993,1993-02-01 +197,747,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,1,1993,1993-03-01 +205,154,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,2,1993,1993-04-01 +256,486,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,2,1993,1993-05-01 +377,251,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,2,1993,1993-06-01 +577,225,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,3,1993,1993-07-01 +686,77,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,3,1993,1993-08-01 +332,74,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,3,1993,1993-09-01 +534,596,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,4,1993,1993-10-01 +485,493,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,4,1993,1993-11-01 +594,782,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,4,1993,1993-12-01 +413,487,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,1,1994,1994-01-01 +13,127,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,1,1994,1994-02-01 +483,538,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,1,1994,1994-03-01 +820,94,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,2,1994,1994-04-01 +745,252,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,2,1994,1994-05-01 +79,722,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,2,1994,1994-06-01 +36,536,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,3,1994,1994-07-01 +950,958,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,3,1994,1994-08-01 +74,466,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,3,1994,1994-09-01 +458,309,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,4,1994,1994-10-01 +609,680,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,4,1994,1994-11-01 +429,539,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,4,1994,1994-12-01 +956,511,GERMANY,EAST,CONSUMER,OFFICE,DESK,1,1993,1993-01-01 +205,505,GERMANY,EAST,CONSUMER,OFFICE,DESK,1,1993,1993-02-01 +629,720,GERMANY,EAST,CONSUMER,OFFICE,DESK,1,1993,1993-03-01 +277,823,GERMANY,EAST,CONSUMER,OFFICE,DESK,2,1993,1993-04-01 +266,21,GERMANY,EAST,CONSUMER,OFFICE,DESK,2,1993,1993-05-01 +872,142,GERMANY,EAST,CONSUMER,OFFICE,DESK,2,1993,1993-06-01 +435,95,GERMANY,EAST,CONSUMER,OFFICE,DESK,3,1993,1993-07-01 +988,398,GERMANY,EAST,CONSUMER,OFFICE,DESK,3,1993,1993-08-01 +953,328,GERMANY,EAST,CONSUMER,OFFICE,DESK,3,1993,1993-09-01 +556,151,GERMANY,EAST,CONSUMER,OFFICE,DESK,4,1993,1993-10-01 +211,978,GERMANY,EAST,CONSUMER,OFFICE,DESK,4,1993,1993-11-01 +389,918,GERMANY,EAST,CONSUMER,OFFICE,DESK,4,1993,1993-12-01 +351,542,GERMANY,EAST,CONSUMER,OFFICE,DESK,1,1994,1994-01-01 +14,96,GERMANY,EAST,CONSUMER,OFFICE,DESK,1,1994,1994-02-01 +181,496,GERMANY,EAST,CONSUMER,OFFICE,DESK,1,1994,1994-03-01 +452,77,GERMANY,EAST,CONSUMER,OFFICE,DESK,2,1994,1994-04-01 +511,236,GERMANY,EAST,CONSUMER,OFFICE,DESK,2,1994,1994-05-01 +193,913,GERMANY,EAST,CONSUMER,OFFICE,DESK,2,1994,1994-06-01 +797,49,GERMANY,EAST,CONSUMER,OFFICE,DESK,3,1994,1994-07-01 +988,967,GERMANY,EAST,CONSUMER,OFFICE,DESK,3,1994,1994-08-01 +487,502,GERMANY,EAST,CONSUMER,OFFICE,DESK,3,1994,1994-09-01 +941,790,GERMANY,EAST,CONSUMER,OFFICE,DESK,4,1994,1994-10-01 +577,121,GERMANY,EAST,CONSUMER,OFFICE,DESK,4,1994,1994-11-01 +456,55,GERMANY,EAST,CONSUMER,OFFICE,DESK,4,1994,1994-12-01 +982,739,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,1,1993,1993-01-01 +593,683,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,1,1993,1993-02-01 +702,610,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,1,1993,1993-03-01 +528,248,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,2,1993,1993-04-01 +873,530,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,2,1993,1993-05-01 +301,889,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,2,1993,1993-06-01 +769,245,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,3,1993,1993-07-01 +724,473,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,3,1993,1993-08-01 +466,938,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,3,1993,1993-09-01 +774,150,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,4,1993,1993-10-01 +111,772,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,4,1993,1993-11-01 +954,201,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,4,1993,1993-12-01 +780,945,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,1,1994,1994-01-01 +210,177,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,1,1994,1994-02-01 +93,378,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,1,1994,1994-03-01 +332,83,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,2,1994,1994-04-01 +186,803,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,2,1994,1994-05-01 +782,398,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,2,1994,1994-06-01 +41,215,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,3,1994,1994-07-01 +222,194,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,3,1994,1994-08-01 +992,287,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,3,1994,1994-09-01 +477,410,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,4,1994,1994-10-01 +948,50,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,4,1994,1994-11-01 +817,204,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,4,1994,1994-12-01 +597,239,GERMANY,WEST,EDUCATION,FURNITURE,BED,1,1993,1993-01-01 +649,637,GERMANY,WEST,EDUCATION,FURNITURE,BED,1,1993,1993-02-01 +3,938,GERMANY,WEST,EDUCATION,FURNITURE,BED,1,1993,1993-03-01 +731,788,GERMANY,WEST,EDUCATION,FURNITURE,BED,2,1993,1993-04-01 +181,399,GERMANY,WEST,EDUCATION,FURNITURE,BED,2,1993,1993-05-01 +468,576,GERMANY,WEST,EDUCATION,FURNITURE,BED,2,1993,1993-06-01 +891,187,GERMANY,WEST,EDUCATION,FURNITURE,BED,3,1993,1993-07-01 +226,703,GERMANY,WEST,EDUCATION,FURNITURE,BED,3,1993,1993-08-01 +28,455,GERMANY,WEST,EDUCATION,FURNITURE,BED,3,1993,1993-09-01 +609,244,GERMANY,WEST,EDUCATION,FURNITURE,BED,4,1993,1993-10-01 +224,868,GERMANY,WEST,EDUCATION,FURNITURE,BED,4,1993,1993-11-01 +230,353,GERMANY,WEST,EDUCATION,FURNITURE,BED,4,1993,1993-12-01 +216,101,GERMANY,WEST,EDUCATION,FURNITURE,BED,1,1994,1994-01-01 +282,924,GERMANY,WEST,EDUCATION,FURNITURE,BED,1,1994,1994-02-01 +501,144,GERMANY,WEST,EDUCATION,FURNITURE,BED,1,1994,1994-03-01 +320,0,GERMANY,WEST,EDUCATION,FURNITURE,BED,2,1994,1994-04-01 +720,910,GERMANY,WEST,EDUCATION,FURNITURE,BED,2,1994,1994-05-01 +464,259,GERMANY,WEST,EDUCATION,FURNITURE,BED,2,1994,1994-06-01 +363,107,GERMANY,WEST,EDUCATION,FURNITURE,BED,3,1994,1994-07-01 +49,63,GERMANY,WEST,EDUCATION,FURNITURE,BED,3,1994,1994-08-01 +223,270,GERMANY,WEST,EDUCATION,FURNITURE,BED,3,1994,1994-09-01 +452,554,GERMANY,WEST,EDUCATION,FURNITURE,BED,4,1994,1994-10-01 +210,154,GERMANY,WEST,EDUCATION,FURNITURE,BED,4,1994,1994-11-01 +444,205,GERMANY,WEST,EDUCATION,FURNITURE,BED,4,1994,1994-12-01 +222,441,GERMANY,WEST,EDUCATION,OFFICE,TABLE,1,1993,1993-01-01 +678,183,GERMANY,WEST,EDUCATION,OFFICE,TABLE,1,1993,1993-02-01 +25,459,GERMANY,WEST,EDUCATION,OFFICE,TABLE,1,1993,1993-03-01 +57,810,GERMANY,WEST,EDUCATION,OFFICE,TABLE,2,1993,1993-04-01 +981,268,GERMANY,WEST,EDUCATION,OFFICE,TABLE,2,1993,1993-05-01 +740,916,GERMANY,WEST,EDUCATION,OFFICE,TABLE,2,1993,1993-06-01 +408,742,GERMANY,WEST,EDUCATION,OFFICE,TABLE,3,1993,1993-07-01 +966,522,GERMANY,WEST,EDUCATION,OFFICE,TABLE,3,1993,1993-08-01 +107,299,GERMANY,WEST,EDUCATION,OFFICE,TABLE,3,1993,1993-09-01 +488,677,GERMANY,WEST,EDUCATION,OFFICE,TABLE,4,1993,1993-10-01 +759,709,GERMANY,WEST,EDUCATION,OFFICE,TABLE,4,1993,1993-11-01 +504,310,GERMANY,WEST,EDUCATION,OFFICE,TABLE,4,1993,1993-12-01 +99,160,GERMANY,WEST,EDUCATION,OFFICE,TABLE,1,1994,1994-01-01 +503,698,GERMANY,WEST,EDUCATION,OFFICE,TABLE,1,1994,1994-02-01 +724,540,GERMANY,WEST,EDUCATION,OFFICE,TABLE,1,1994,1994-03-01 +309,901,GERMANY,WEST,EDUCATION,OFFICE,TABLE,2,1994,1994-04-01 +625,34,GERMANY,WEST,EDUCATION,OFFICE,TABLE,2,1994,1994-05-01 +294,536,GERMANY,WEST,EDUCATION,OFFICE,TABLE,2,1994,1994-06-01 +890,780,GERMANY,WEST,EDUCATION,OFFICE,TABLE,3,1994,1994-07-01 +501,716,GERMANY,WEST,EDUCATION,OFFICE,TABLE,3,1994,1994-08-01 +34,532,GERMANY,WEST,EDUCATION,OFFICE,TABLE,3,1994,1994-09-01 +203,871,GERMANY,WEST,EDUCATION,OFFICE,TABLE,4,1994,1994-10-01 +140,199,GERMANY,WEST,EDUCATION,OFFICE,TABLE,4,1994,1994-11-01 +845,845,GERMANY,WEST,EDUCATION,OFFICE,TABLE,4,1994,1994-12-01 +774,591,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,1,1993,1993-01-01 +645,378,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,1,1993,1993-02-01 +986,942,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,1,1993,1993-03-01 +296,686,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,2,1993,1993-04-01 +936,720,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,2,1993,1993-05-01 +341,546,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,2,1993,1993-06-01 +32,845,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,3,1993,1993-07-01 +277,667,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,3,1993,1993-08-01 +548,627,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,3,1993,1993-09-01 +727,142,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,4,1993,1993-10-01 +812,655,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,4,1993,1993-11-01 +168,556,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,4,1993,1993-12-01 +150,459,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,1,1994,1994-01-01 +136,89,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,1,1994,1994-02-01 +695,726,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,1,1994,1994-03-01 +363,38,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,2,1994,1994-04-01 +853,60,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,2,1994,1994-05-01 +621,369,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,2,1994,1994-06-01 +764,381,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,3,1994,1994-07-01 +669,465,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,3,1994,1994-08-01 +772,981,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,3,1994,1994-09-01 +228,758,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,4,1994,1994-10-01 +261,31,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,4,1994,1994-11-01 +821,237,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,4,1994,1994-12-01 +100,285,GERMANY,WEST,EDUCATION,OFFICE,DESK,1,1993,1993-01-01 +465,94,GERMANY,WEST,EDUCATION,OFFICE,DESK,1,1993,1993-02-01 +350,561,GERMANY,WEST,EDUCATION,OFFICE,DESK,1,1993,1993-03-01 +991,143,GERMANY,WEST,EDUCATION,OFFICE,DESK,2,1993,1993-04-01 +910,95,GERMANY,WEST,EDUCATION,OFFICE,DESK,2,1993,1993-05-01 +206,341,GERMANY,WEST,EDUCATION,OFFICE,DESK,2,1993,1993-06-01 +263,388,GERMANY,WEST,EDUCATION,OFFICE,DESK,3,1993,1993-07-01 +374,272,GERMANY,WEST,EDUCATION,OFFICE,DESK,3,1993,1993-08-01 +875,890,GERMANY,WEST,EDUCATION,OFFICE,DESK,3,1993,1993-09-01 +810,734,GERMANY,WEST,EDUCATION,OFFICE,DESK,4,1993,1993-10-01 +398,364,GERMANY,WEST,EDUCATION,OFFICE,DESK,4,1993,1993-11-01 +565,619,GERMANY,WEST,EDUCATION,OFFICE,DESK,4,1993,1993-12-01 +417,517,GERMANY,WEST,EDUCATION,OFFICE,DESK,1,1994,1994-01-01 +291,781,GERMANY,WEST,EDUCATION,OFFICE,DESK,1,1994,1994-02-01 +251,327,GERMANY,WEST,EDUCATION,OFFICE,DESK,1,1994,1994-03-01 +449,48,GERMANY,WEST,EDUCATION,OFFICE,DESK,2,1994,1994-04-01 +774,809,GERMANY,WEST,EDUCATION,OFFICE,DESK,2,1994,1994-05-01 +386,73,GERMANY,WEST,EDUCATION,OFFICE,DESK,2,1994,1994-06-01 +22,936,GERMANY,WEST,EDUCATION,OFFICE,DESK,3,1994,1994-07-01 +940,400,GERMANY,WEST,EDUCATION,OFFICE,DESK,3,1994,1994-08-01 +132,736,GERMANY,WEST,EDUCATION,OFFICE,DESK,3,1994,1994-09-01 +103,211,GERMANY,WEST,EDUCATION,OFFICE,DESK,4,1994,1994-10-01 +152,271,GERMANY,WEST,EDUCATION,OFFICE,DESK,4,1994,1994-11-01 +952,855,GERMANY,WEST,EDUCATION,OFFICE,DESK,4,1994,1994-12-01 +872,923,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,1,1993,1993-01-01 +748,854,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,1,1993,1993-02-01 +749,769,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,1,1993,1993-03-01 +876,271,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,2,1993,1993-04-01 +860,383,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,2,1993,1993-05-01 +900,29,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,2,1993,1993-06-01 +705,185,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,3,1993,1993-07-01 +913,351,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,3,1993,1993-08-01 +315,560,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,3,1993,1993-09-01 +466,840,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,4,1993,1993-10-01 +233,517,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,4,1993,1993-11-01 +906,949,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,4,1993,1993-12-01 +148,633,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,1,1994,1994-01-01 +661,636,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,1,1994,1994-02-01 +847,138,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,1,1994,1994-03-01 +768,481,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,2,1994,1994-04-01 +866,408,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,2,1994,1994-05-01 +475,130,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,2,1994,1994-06-01 +112,813,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,3,1994,1994-07-01 +136,661,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,3,1994,1994-08-01 +763,311,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,3,1994,1994-09-01 +388,872,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,4,1994,1994-10-01 +996,643,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,4,1994,1994-11-01 +486,174,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,4,1994,1994-12-01 +494,528,GERMANY,WEST,CONSUMER,FURNITURE,BED,1,1993,1993-01-01 +771,124,GERMANY,WEST,CONSUMER,FURNITURE,BED,1,1993,1993-02-01 +49,126,GERMANY,WEST,CONSUMER,FURNITURE,BED,1,1993,1993-03-01 +322,440,GERMANY,WEST,CONSUMER,FURNITURE,BED,2,1993,1993-04-01 +878,881,GERMANY,WEST,CONSUMER,FURNITURE,BED,2,1993,1993-05-01 +827,292,GERMANY,WEST,CONSUMER,FURNITURE,BED,2,1993,1993-06-01 +852,873,GERMANY,WEST,CONSUMER,FURNITURE,BED,3,1993,1993-07-01 +716,357,GERMANY,WEST,CONSUMER,FURNITURE,BED,3,1993,1993-08-01 +81,247,GERMANY,WEST,CONSUMER,FURNITURE,BED,3,1993,1993-09-01 +916,18,GERMANY,WEST,CONSUMER,FURNITURE,BED,4,1993,1993-10-01 +673,395,GERMANY,WEST,CONSUMER,FURNITURE,BED,4,1993,1993-11-01 +242,620,GERMANY,WEST,CONSUMER,FURNITURE,BED,4,1993,1993-12-01 +914,946,GERMANY,WEST,CONSUMER,FURNITURE,BED,1,1994,1994-01-01 +902,72,GERMANY,WEST,CONSUMER,FURNITURE,BED,1,1994,1994-02-01 +707,691,GERMANY,WEST,CONSUMER,FURNITURE,BED,1,1994,1994-03-01 +223,95,GERMANY,WEST,CONSUMER,FURNITURE,BED,2,1994,1994-04-01 +619,878,GERMANY,WEST,CONSUMER,FURNITURE,BED,2,1994,1994-05-01 +254,757,GERMANY,WEST,CONSUMER,FURNITURE,BED,2,1994,1994-06-01 +688,898,GERMANY,WEST,CONSUMER,FURNITURE,BED,3,1994,1994-07-01 +477,172,GERMANY,WEST,CONSUMER,FURNITURE,BED,3,1994,1994-08-01 +280,419,GERMANY,WEST,CONSUMER,FURNITURE,BED,3,1994,1994-09-01 +546,849,GERMANY,WEST,CONSUMER,FURNITURE,BED,4,1994,1994-10-01 +630,807,GERMANY,WEST,CONSUMER,FURNITURE,BED,4,1994,1994-11-01 +455,599,GERMANY,WEST,CONSUMER,FURNITURE,BED,4,1994,1994-12-01 +505,59,GERMANY,WEST,CONSUMER,OFFICE,TABLE,1,1993,1993-01-01 +823,790,GERMANY,WEST,CONSUMER,OFFICE,TABLE,1,1993,1993-02-01 +891,574,GERMANY,WEST,CONSUMER,OFFICE,TABLE,1,1993,1993-03-01 +840,96,GERMANY,WEST,CONSUMER,OFFICE,TABLE,2,1993,1993-04-01 +436,376,GERMANY,WEST,CONSUMER,OFFICE,TABLE,2,1993,1993-05-01 +168,352,GERMANY,WEST,CONSUMER,OFFICE,TABLE,2,1993,1993-06-01 +177,741,GERMANY,WEST,CONSUMER,OFFICE,TABLE,3,1993,1993-07-01 +727,12,GERMANY,WEST,CONSUMER,OFFICE,TABLE,3,1993,1993-08-01 +278,157,GERMANY,WEST,CONSUMER,OFFICE,TABLE,3,1993,1993-09-01 +443,10,GERMANY,WEST,CONSUMER,OFFICE,TABLE,4,1993,1993-10-01 +905,544,GERMANY,WEST,CONSUMER,OFFICE,TABLE,4,1993,1993-11-01 +881,817,GERMANY,WEST,CONSUMER,OFFICE,TABLE,4,1993,1993-12-01 +507,754,GERMANY,WEST,CONSUMER,OFFICE,TABLE,1,1994,1994-01-01 +363,425,GERMANY,WEST,CONSUMER,OFFICE,TABLE,1,1994,1994-02-01 +603,492,GERMANY,WEST,CONSUMER,OFFICE,TABLE,1,1994,1994-03-01 +473,485,GERMANY,WEST,CONSUMER,OFFICE,TABLE,2,1994,1994-04-01 +128,369,GERMANY,WEST,CONSUMER,OFFICE,TABLE,2,1994,1994-05-01 +105,560,GERMANY,WEST,CONSUMER,OFFICE,TABLE,2,1994,1994-06-01 +325,651,GERMANY,WEST,CONSUMER,OFFICE,TABLE,3,1994,1994-07-01 +711,326,GERMANY,WEST,CONSUMER,OFFICE,TABLE,3,1994,1994-08-01 +983,180,GERMANY,WEST,CONSUMER,OFFICE,TABLE,3,1994,1994-09-01 +241,935,GERMANY,WEST,CONSUMER,OFFICE,TABLE,4,1994,1994-10-01 +71,403,GERMANY,WEST,CONSUMER,OFFICE,TABLE,4,1994,1994-11-01 +395,345,GERMANY,WEST,CONSUMER,OFFICE,TABLE,4,1994,1994-12-01 +168,278,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,1,1993,1993-01-01 +512,376,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,1,1993,1993-02-01 +291,104,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,1,1993,1993-03-01 +776,543,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,2,1993,1993-04-01 +271,798,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,2,1993,1993-05-01 +946,333,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,2,1993,1993-06-01 +195,833,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,3,1993,1993-07-01 +165,132,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,3,1993,1993-08-01 +238,629,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,3,1993,1993-09-01 +409,337,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,4,1993,1993-10-01 +720,300,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,4,1993,1993-11-01 +309,470,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,4,1993,1993-12-01 +812,875,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,1,1994,1994-01-01 +441,237,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,1,1994,1994-02-01 +500,272,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,1,1994,1994-03-01 +517,860,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,2,1994,1994-04-01 +924,415,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,2,1994,1994-05-01 +572,140,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,2,1994,1994-06-01 +768,367,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,3,1994,1994-07-01 +692,195,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,3,1994,1994-08-01 +28,245,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,3,1994,1994-09-01 +202,285,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,4,1994,1994-10-01 +76,98,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,4,1994,1994-11-01 +421,932,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,4,1994,1994-12-01 +636,898,GERMANY,WEST,CONSUMER,OFFICE,DESK,1,1993,1993-01-01 +52,330,GERMANY,WEST,CONSUMER,OFFICE,DESK,1,1993,1993-02-01 +184,603,GERMANY,WEST,CONSUMER,OFFICE,DESK,1,1993,1993-03-01 +739,280,GERMANY,WEST,CONSUMER,OFFICE,DESK,2,1993,1993-04-01 +841,507,GERMANY,WEST,CONSUMER,OFFICE,DESK,2,1993,1993-05-01 +65,202,GERMANY,WEST,CONSUMER,OFFICE,DESK,2,1993,1993-06-01 +623,513,GERMANY,WEST,CONSUMER,OFFICE,DESK,3,1993,1993-07-01 +517,132,GERMANY,WEST,CONSUMER,OFFICE,DESK,3,1993,1993-08-01 +636,21,GERMANY,WEST,CONSUMER,OFFICE,DESK,3,1993,1993-09-01 +845,657,GERMANY,WEST,CONSUMER,OFFICE,DESK,4,1993,1993-10-01 +232,195,GERMANY,WEST,CONSUMER,OFFICE,DESK,4,1993,1993-11-01 +26,323,GERMANY,WEST,CONSUMER,OFFICE,DESK,4,1993,1993-12-01 +680,299,GERMANY,WEST,CONSUMER,OFFICE,DESK,1,1994,1994-01-01 +364,811,GERMANY,WEST,CONSUMER,OFFICE,DESK,1,1994,1994-02-01 +572,739,GERMANY,WEST,CONSUMER,OFFICE,DESK,1,1994,1994-03-01 +145,889,GERMANY,WEST,CONSUMER,OFFICE,DESK,2,1994,1994-04-01 +644,189,GERMANY,WEST,CONSUMER,OFFICE,DESK,2,1994,1994-05-01 +87,698,GERMANY,WEST,CONSUMER,OFFICE,DESK,2,1994,1994-06-01 +620,646,GERMANY,WEST,CONSUMER,OFFICE,DESK,3,1994,1994-07-01 +535,562,GERMANY,WEST,CONSUMER,OFFICE,DESK,3,1994,1994-08-01 +661,753,GERMANY,WEST,CONSUMER,OFFICE,DESK,3,1994,1994-09-01 +884,425,GERMANY,WEST,CONSUMER,OFFICE,DESK,4,1994,1994-10-01 +689,693,GERMANY,WEST,CONSUMER,OFFICE,DESK,4,1994,1994-11-01 +646,941,GERMANY,WEST,CONSUMER,OFFICE,DESK,4,1994,1994-12-01 +4,975,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,1,1993,1993-01-01 +813,455,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,1,1993,1993-02-01 +773,260,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,1,1993,1993-03-01 +205,69,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,2,1993,1993-04-01 +657,147,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,2,1993,1993-05-01 +154,533,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,2,1993,1993-06-01 +747,881,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,3,1993,1993-07-01 +787,457,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,3,1993,1993-08-01 +867,441,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,3,1993,1993-09-01 +307,859,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,4,1993,1993-10-01 +571,177,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,4,1993,1993-11-01 +92,633,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,4,1993,1993-12-01 +269,382,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,1,1994,1994-01-01 +764,707,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,1,1994,1994-02-01 +662,566,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,1,1994,1994-03-01 +818,349,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,2,1994,1994-04-01 +617,128,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,2,1994,1994-05-01 +649,231,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,2,1994,1994-06-01 +895,258,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,3,1994,1994-07-01 +750,812,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,3,1994,1994-08-01 +738,362,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,3,1994,1994-09-01 +107,133,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,4,1994,1994-10-01 +278,60,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,4,1994,1994-11-01 +32,88,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,4,1994,1994-12-01 +129,378,U.S.A.,EAST,EDUCATION,FURNITURE,BED,1,1993,1993-01-01 +187,569,U.S.A.,EAST,EDUCATION,FURNITURE,BED,1,1993,1993-02-01 +670,186,U.S.A.,EAST,EDUCATION,FURNITURE,BED,1,1993,1993-03-01 +678,875,U.S.A.,EAST,EDUCATION,FURNITURE,BED,2,1993,1993-04-01 +423,636,U.S.A.,EAST,EDUCATION,FURNITURE,BED,2,1993,1993-05-01 +389,360,U.S.A.,EAST,EDUCATION,FURNITURE,BED,2,1993,1993-06-01 +257,677,U.S.A.,EAST,EDUCATION,FURNITURE,BED,3,1993,1993-07-01 +780,708,U.S.A.,EAST,EDUCATION,FURNITURE,BED,3,1993,1993-08-01 +159,158,U.S.A.,EAST,EDUCATION,FURNITURE,BED,3,1993,1993-09-01 +97,384,U.S.A.,EAST,EDUCATION,FURNITURE,BED,4,1993,1993-10-01 +479,927,U.S.A.,EAST,EDUCATION,FURNITURE,BED,4,1993,1993-11-01 +9,134,U.S.A.,EAST,EDUCATION,FURNITURE,BED,4,1993,1993-12-01 +614,273,U.S.A.,EAST,EDUCATION,FURNITURE,BED,1,1994,1994-01-01 +261,27,U.S.A.,EAST,EDUCATION,FURNITURE,BED,1,1994,1994-02-01 +115,209,U.S.A.,EAST,EDUCATION,FURNITURE,BED,1,1994,1994-03-01 +358,470,U.S.A.,EAST,EDUCATION,FURNITURE,BED,2,1994,1994-04-01 +133,219,U.S.A.,EAST,EDUCATION,FURNITURE,BED,2,1994,1994-05-01 +891,907,U.S.A.,EAST,EDUCATION,FURNITURE,BED,2,1994,1994-06-01 +702,778,U.S.A.,EAST,EDUCATION,FURNITURE,BED,3,1994,1994-07-01 +58,998,U.S.A.,EAST,EDUCATION,FURNITURE,BED,3,1994,1994-08-01 +606,194,U.S.A.,EAST,EDUCATION,FURNITURE,BED,3,1994,1994-09-01 +668,933,U.S.A.,EAST,EDUCATION,FURNITURE,BED,4,1994,1994-10-01 +813,708,U.S.A.,EAST,EDUCATION,FURNITURE,BED,4,1994,1994-11-01 +450,949,U.S.A.,EAST,EDUCATION,FURNITURE,BED,4,1994,1994-12-01 +956,579,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,1,1993,1993-01-01 +276,131,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,1,1993,1993-02-01 +889,689,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,1,1993,1993-03-01 +708,908,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,2,1993,1993-04-01 +14,524,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,2,1993,1993-05-01 +904,336,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,2,1993,1993-06-01 +272,916,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,3,1993,1993-07-01 +257,236,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,3,1993,1993-08-01 +343,965,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,3,1993,1993-09-01 +80,350,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,4,1993,1993-10-01 +530,599,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,4,1993,1993-11-01 +340,901,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,4,1993,1993-12-01 +595,935,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,1,1994,1994-01-01 +47,667,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,1,1994,1994-02-01 +279,104,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,1,1994,1994-03-01 +293,803,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,2,1994,1994-04-01 +162,64,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,2,1994,1994-05-01 +935,825,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,2,1994,1994-06-01 +689,839,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,3,1994,1994-07-01 +484,184,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,3,1994,1994-08-01 +230,348,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,3,1994,1994-09-01 +164,904,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,4,1994,1994-10-01 +401,219,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,4,1994,1994-11-01 +607,381,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,4,1994,1994-12-01 +229,524,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,1,1993,1993-01-01 +786,902,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,1,1993,1993-02-01 +92,212,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,1,1993,1993-03-01 +455,762,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,2,1993,1993-04-01 +409,182,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,2,1993,1993-05-01 +166,442,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,2,1993,1993-06-01 +277,919,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,3,1993,1993-07-01 +92,67,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,3,1993,1993-08-01 +631,741,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,3,1993,1993-09-01 +390,617,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,4,1993,1993-10-01 +403,214,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,4,1993,1993-11-01 +964,202,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,4,1993,1993-12-01 +223,788,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,1,1994,1994-01-01 +684,639,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,1,1994,1994-02-01 +645,336,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,1,1994,1994-03-01 +470,937,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,2,1994,1994-04-01 +424,399,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,2,1994,1994-05-01 +862,21,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,2,1994,1994-06-01 +736,125,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,3,1994,1994-07-01 +554,635,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,3,1994,1994-08-01 +790,229,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,3,1994,1994-09-01 +115,770,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,4,1994,1994-10-01 +853,622,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,4,1994,1994-11-01 +643,109,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,4,1994,1994-12-01 +794,975,U.S.A.,EAST,EDUCATION,OFFICE,DESK,1,1993,1993-01-01 +892,820,U.S.A.,EAST,EDUCATION,OFFICE,DESK,1,1993,1993-02-01 +728,123,U.S.A.,EAST,EDUCATION,OFFICE,DESK,1,1993,1993-03-01 +744,135,U.S.A.,EAST,EDUCATION,OFFICE,DESK,2,1993,1993-04-01 +678,535,U.S.A.,EAST,EDUCATION,OFFICE,DESK,2,1993,1993-05-01 +768,971,U.S.A.,EAST,EDUCATION,OFFICE,DESK,2,1993,1993-06-01 +234,166,U.S.A.,EAST,EDUCATION,OFFICE,DESK,3,1993,1993-07-01 +333,814,U.S.A.,EAST,EDUCATION,OFFICE,DESK,3,1993,1993-08-01 +968,557,U.S.A.,EAST,EDUCATION,OFFICE,DESK,3,1993,1993-09-01 +119,820,U.S.A.,EAST,EDUCATION,OFFICE,DESK,4,1993,1993-10-01 +469,486,U.S.A.,EAST,EDUCATION,OFFICE,DESK,4,1993,1993-11-01 +261,429,U.S.A.,EAST,EDUCATION,OFFICE,DESK,4,1993,1993-12-01 +984,65,U.S.A.,EAST,EDUCATION,OFFICE,DESK,1,1994,1994-01-01 +845,977,U.S.A.,EAST,EDUCATION,OFFICE,DESK,1,1994,1994-02-01 +374,410,U.S.A.,EAST,EDUCATION,OFFICE,DESK,1,1994,1994-03-01 +687,150,U.S.A.,EAST,EDUCATION,OFFICE,DESK,2,1994,1994-04-01 +157,630,U.S.A.,EAST,EDUCATION,OFFICE,DESK,2,1994,1994-05-01 +49,488,U.S.A.,EAST,EDUCATION,OFFICE,DESK,2,1994,1994-06-01 +817,112,U.S.A.,EAST,EDUCATION,OFFICE,DESK,3,1994,1994-07-01 +223,598,U.S.A.,EAST,EDUCATION,OFFICE,DESK,3,1994,1994-08-01 +433,705,U.S.A.,EAST,EDUCATION,OFFICE,DESK,3,1994,1994-09-01 +41,226,U.S.A.,EAST,EDUCATION,OFFICE,DESK,4,1994,1994-10-01 +396,979,U.S.A.,EAST,EDUCATION,OFFICE,DESK,4,1994,1994-11-01 +131,19,U.S.A.,EAST,EDUCATION,OFFICE,DESK,4,1994,1994-12-01 +521,204,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,1,1993,1993-01-01 +751,805,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,1,1993,1993-02-01 +45,549,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,1,1993,1993-03-01 +144,912,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,2,1993,1993-04-01 +119,427,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,2,1993,1993-05-01 +728,1,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,2,1993,1993-06-01 +120,540,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,3,1993,1993-07-01 +657,940,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,3,1993,1993-08-01 +409,644,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,3,1993,1993-09-01 +881,821,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,4,1993,1993-10-01 +113,560,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,4,1993,1993-11-01 +831,309,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,4,1993,1993-12-01 +129,1000,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,1,1994,1994-01-01 +76,945,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,1,1994,1994-02-01 +260,931,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,1,1994,1994-03-01 +882,504,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,2,1994,1994-04-01 +157,950,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,2,1994,1994-05-01 +443,278,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,2,1994,1994-06-01 +111,225,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,3,1994,1994-07-01 +497,6,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,3,1994,1994-08-01 +321,124,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,3,1994,1994-09-01 +194,206,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,4,1994,1994-10-01 +684,320,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,4,1994,1994-11-01 +634,270,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,4,1994,1994-12-01 +622,278,U.S.A.,EAST,CONSUMER,FURNITURE,BED,1,1993,1993-01-01 +689,447,U.S.A.,EAST,CONSUMER,FURNITURE,BED,1,1993,1993-02-01 +120,170,U.S.A.,EAST,CONSUMER,FURNITURE,BED,1,1993,1993-03-01 +374,87,U.S.A.,EAST,CONSUMER,FURNITURE,BED,2,1993,1993-04-01 +926,384,U.S.A.,EAST,CONSUMER,FURNITURE,BED,2,1993,1993-05-01 +687,574,U.S.A.,EAST,CONSUMER,FURNITURE,BED,2,1993,1993-06-01 +600,585,U.S.A.,EAST,CONSUMER,FURNITURE,BED,3,1993,1993-07-01 +779,947,U.S.A.,EAST,CONSUMER,FURNITURE,BED,3,1993,1993-08-01 +223,984,U.S.A.,EAST,CONSUMER,FURNITURE,BED,3,1993,1993-09-01 +628,189,U.S.A.,EAST,CONSUMER,FURNITURE,BED,4,1993,1993-10-01 +326,364,U.S.A.,EAST,CONSUMER,FURNITURE,BED,4,1993,1993-11-01 +836,49,U.S.A.,EAST,CONSUMER,FURNITURE,BED,4,1993,1993-12-01 +361,851,U.S.A.,EAST,CONSUMER,FURNITURE,BED,1,1994,1994-01-01 +444,643,U.S.A.,EAST,CONSUMER,FURNITURE,BED,1,1994,1994-02-01 +501,143,U.S.A.,EAST,CONSUMER,FURNITURE,BED,1,1994,1994-03-01 +743,763,U.S.A.,EAST,CONSUMER,FURNITURE,BED,2,1994,1994-04-01 +861,987,U.S.A.,EAST,CONSUMER,FURNITURE,BED,2,1994,1994-05-01 +203,264,U.S.A.,EAST,CONSUMER,FURNITURE,BED,2,1994,1994-06-01 +762,439,U.S.A.,EAST,CONSUMER,FURNITURE,BED,3,1994,1994-07-01 +705,750,U.S.A.,EAST,CONSUMER,FURNITURE,BED,3,1994,1994-08-01 +153,37,U.S.A.,EAST,CONSUMER,FURNITURE,BED,3,1994,1994-09-01 +436,95,U.S.A.,EAST,CONSUMER,FURNITURE,BED,4,1994,1994-10-01 +428,79,U.S.A.,EAST,CONSUMER,FURNITURE,BED,4,1994,1994-11-01 +804,832,U.S.A.,EAST,CONSUMER,FURNITURE,BED,4,1994,1994-12-01 +805,649,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,1,1993,1993-01-01 +860,838,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,1,1993,1993-02-01 +104,439,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,1,1993,1993-03-01 +434,207,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,2,1993,1993-04-01 +912,804,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,2,1993,1993-05-01 +571,875,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,2,1993,1993-06-01 +267,473,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,3,1993,1993-07-01 +415,845,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,3,1993,1993-08-01 +261,91,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,3,1993,1993-09-01 +746,630,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,4,1993,1993-10-01 +30,185,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,4,1993,1993-11-01 +662,317,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,4,1993,1993-12-01 +916,88,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,1,1994,1994-01-01 +415,607,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,1,1994,1994-02-01 +514,35,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,1,1994,1994-03-01 +756,680,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,2,1994,1994-04-01 +461,78,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,2,1994,1994-05-01 +460,117,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,2,1994,1994-06-01 +305,440,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,3,1994,1994-07-01 +198,652,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,3,1994,1994-08-01 +234,249,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,3,1994,1994-09-01 +638,658,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,4,1994,1994-10-01 +88,563,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,4,1994,1994-11-01 +751,737,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,4,1994,1994-12-01 +816,789,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,1,1993,1993-01-01 +437,988,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,1,1993,1993-02-01 +715,220,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,1,1993,1993-03-01 +780,946,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,2,1993,1993-04-01 +245,986,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,2,1993,1993-05-01 +201,129,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,2,1993,1993-06-01 +815,433,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,3,1993,1993-07-01 +865,492,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,3,1993,1993-08-01 +634,306,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,3,1993,1993-09-01 +901,154,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,4,1993,1993-10-01 +789,206,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,4,1993,1993-11-01 +882,81,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,4,1993,1993-12-01 +953,882,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,1,1994,1994-01-01 +862,848,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,1,1994,1994-02-01 +628,664,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,1,1994,1994-03-01 +765,389,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,2,1994,1994-04-01 +741,182,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,2,1994,1994-05-01 +61,505,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,2,1994,1994-06-01 +470,861,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,3,1994,1994-07-01 +869,263,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,3,1994,1994-08-01 +650,400,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,3,1994,1994-09-01 +750,556,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,4,1994,1994-10-01 +602,497,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,4,1994,1994-11-01 +54,181,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,4,1994,1994-12-01 +384,619,U.S.A.,EAST,CONSUMER,OFFICE,DESK,1,1993,1993-01-01 +161,332,U.S.A.,EAST,CONSUMER,OFFICE,DESK,1,1993,1993-02-01 +977,669,U.S.A.,EAST,CONSUMER,OFFICE,DESK,1,1993,1993-03-01 +615,487,U.S.A.,EAST,CONSUMER,OFFICE,DESK,2,1993,1993-04-01 +783,994,U.S.A.,EAST,CONSUMER,OFFICE,DESK,2,1993,1993-05-01 +977,331,U.S.A.,EAST,CONSUMER,OFFICE,DESK,2,1993,1993-06-01 +375,739,U.S.A.,EAST,CONSUMER,OFFICE,DESK,3,1993,1993-07-01 +298,665,U.S.A.,EAST,CONSUMER,OFFICE,DESK,3,1993,1993-08-01 +104,921,U.S.A.,EAST,CONSUMER,OFFICE,DESK,3,1993,1993-09-01 +713,862,U.S.A.,EAST,CONSUMER,OFFICE,DESK,4,1993,1993-10-01 +556,662,U.S.A.,EAST,CONSUMER,OFFICE,DESK,4,1993,1993-11-01 +323,517,U.S.A.,EAST,CONSUMER,OFFICE,DESK,4,1993,1993-12-01 +391,352,U.S.A.,EAST,CONSUMER,OFFICE,DESK,1,1994,1994-01-01 +593,166,U.S.A.,EAST,CONSUMER,OFFICE,DESK,1,1994,1994-02-01 +906,859,U.S.A.,EAST,CONSUMER,OFFICE,DESK,1,1994,1994-03-01 +130,571,U.S.A.,EAST,CONSUMER,OFFICE,DESK,2,1994,1994-04-01 +613,976,U.S.A.,EAST,CONSUMER,OFFICE,DESK,2,1994,1994-05-01 +58,466,U.S.A.,EAST,CONSUMER,OFFICE,DESK,2,1994,1994-06-01 +314,79,U.S.A.,EAST,CONSUMER,OFFICE,DESK,3,1994,1994-07-01 +67,864,U.S.A.,EAST,CONSUMER,OFFICE,DESK,3,1994,1994-08-01 +654,623,U.S.A.,EAST,CONSUMER,OFFICE,DESK,3,1994,1994-09-01 +312,170,U.S.A.,EAST,CONSUMER,OFFICE,DESK,4,1994,1994-10-01 +349,662,U.S.A.,EAST,CONSUMER,OFFICE,DESK,4,1994,1994-11-01 +415,763,U.S.A.,EAST,CONSUMER,OFFICE,DESK,4,1994,1994-12-01 +404,896,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,1,1993,1993-01-01 +22,973,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,1,1993,1993-02-01 +744,161,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,1,1993,1993-03-01 +804,934,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,2,1993,1993-04-01 +101,697,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,2,1993,1993-05-01 +293,116,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,2,1993,1993-06-01 +266,84,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,3,1993,1993-07-01 +372,604,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,3,1993,1993-08-01 +38,371,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,3,1993,1993-09-01 +385,783,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,4,1993,1993-10-01 +262,335,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,4,1993,1993-11-01 +961,321,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,4,1993,1993-12-01 +831,177,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,1,1994,1994-01-01 +579,371,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,1,1994,1994-02-01 +301,583,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,1,1994,1994-03-01 +693,364,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,2,1994,1994-04-01 +895,343,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,2,1994,1994-05-01 +320,854,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,2,1994,1994-06-01 +284,691,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,3,1994,1994-07-01 +362,387,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,3,1994,1994-08-01 +132,298,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,3,1994,1994-09-01 +42,635,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,4,1994,1994-10-01 +118,81,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,4,1994,1994-11-01 +42,375,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,4,1994,1994-12-01 +18,846,U.S.A.,WEST,EDUCATION,FURNITURE,BED,1,1993,1993-01-01 +512,933,U.S.A.,WEST,EDUCATION,FURNITURE,BED,1,1993,1993-02-01 +337,237,U.S.A.,WEST,EDUCATION,FURNITURE,BED,1,1993,1993-03-01 +167,964,U.S.A.,WEST,EDUCATION,FURNITURE,BED,2,1993,1993-04-01 +749,382,U.S.A.,WEST,EDUCATION,FURNITURE,BED,2,1993,1993-05-01 +890,610,U.S.A.,WEST,EDUCATION,FURNITURE,BED,2,1993,1993-06-01 +910,148,U.S.A.,WEST,EDUCATION,FURNITURE,BED,3,1993,1993-07-01 +403,837,U.S.A.,WEST,EDUCATION,FURNITURE,BED,3,1993,1993-08-01 +403,85,U.S.A.,WEST,EDUCATION,FURNITURE,BED,3,1993,1993-09-01 +661,425,U.S.A.,WEST,EDUCATION,FURNITURE,BED,4,1993,1993-10-01 +485,633,U.S.A.,WEST,EDUCATION,FURNITURE,BED,4,1993,1993-11-01 +789,515,U.S.A.,WEST,EDUCATION,FURNITURE,BED,4,1993,1993-12-01 +415,512,U.S.A.,WEST,EDUCATION,FURNITURE,BED,1,1994,1994-01-01 +418,156,U.S.A.,WEST,EDUCATION,FURNITURE,BED,1,1994,1994-02-01 +163,464,U.S.A.,WEST,EDUCATION,FURNITURE,BED,1,1994,1994-03-01 +298,813,U.S.A.,WEST,EDUCATION,FURNITURE,BED,2,1994,1994-04-01 +584,455,U.S.A.,WEST,EDUCATION,FURNITURE,BED,2,1994,1994-05-01 +797,366,U.S.A.,WEST,EDUCATION,FURNITURE,BED,2,1994,1994-06-01 +767,734,U.S.A.,WEST,EDUCATION,FURNITURE,BED,3,1994,1994-07-01 +984,451,U.S.A.,WEST,EDUCATION,FURNITURE,BED,3,1994,1994-08-01 +388,134,U.S.A.,WEST,EDUCATION,FURNITURE,BED,3,1994,1994-09-01 +924,547,U.S.A.,WEST,EDUCATION,FURNITURE,BED,4,1994,1994-10-01 +566,802,U.S.A.,WEST,EDUCATION,FURNITURE,BED,4,1994,1994-11-01 +390,61,U.S.A.,WEST,EDUCATION,FURNITURE,BED,4,1994,1994-12-01 +608,556,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,1,1993,1993-01-01 +840,202,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,1,1993,1993-02-01 +112,964,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,1,1993,1993-03-01 +288,112,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,2,1993,1993-04-01 +408,445,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,2,1993,1993-05-01 +876,884,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,2,1993,1993-06-01 +224,348,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,3,1993,1993-07-01 +133,564,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,3,1993,1993-08-01 +662,568,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,3,1993,1993-09-01 +68,882,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,4,1993,1993-10-01 +626,542,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,4,1993,1993-11-01 +678,119,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,4,1993,1993-12-01 +361,248,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,1,1994,1994-01-01 +464,868,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,1,1994,1994-02-01 +681,841,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,1,1994,1994-03-01 +377,484,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,2,1994,1994-04-01 +222,986,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,2,1994,1994-05-01 +972,39,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,2,1994,1994-06-01 +56,930,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,3,1994,1994-07-01 +695,252,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,3,1994,1994-08-01 +908,794,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,3,1994,1994-09-01 +328,658,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,4,1994,1994-10-01 +891,139,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,4,1994,1994-11-01 +265,331,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,4,1994,1994-12-01 +251,261,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,1,1993,1993-01-01 +783,122,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,1,1993,1993-02-01 +425,296,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,1,1993,1993-03-01 +859,391,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,2,1993,1993-04-01 +314,75,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,2,1993,1993-05-01 +153,731,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,2,1993,1993-06-01 +955,883,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,3,1993,1993-07-01 +654,707,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,3,1993,1993-08-01 +693,97,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,3,1993,1993-09-01 +757,390,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,4,1993,1993-10-01 +221,237,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,4,1993,1993-11-01 +942,496,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,4,1993,1993-12-01 +31,814,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,1,1994,1994-01-01 +540,765,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,1,1994,1994-02-01 +352,308,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,1,1994,1994-03-01 +904,327,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,2,1994,1994-04-01 +436,266,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,2,1994,1994-05-01 +281,699,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,2,1994,1994-06-01 +801,599,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,3,1994,1994-07-01 +273,950,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,3,1994,1994-08-01 +716,117,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,3,1994,1994-09-01 +902,632,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,4,1994,1994-10-01 +341,35,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,4,1994,1994-11-01 +155,562,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,4,1994,1994-12-01 +796,144,U.S.A.,WEST,EDUCATION,OFFICE,DESK,1,1993,1993-01-01 +257,142,U.S.A.,WEST,EDUCATION,OFFICE,DESK,1,1993,1993-02-01 +611,273,U.S.A.,WEST,EDUCATION,OFFICE,DESK,1,1993,1993-03-01 +6,915,U.S.A.,WEST,EDUCATION,OFFICE,DESK,2,1993,1993-04-01 +125,920,U.S.A.,WEST,EDUCATION,OFFICE,DESK,2,1993,1993-05-01 +745,294,U.S.A.,WEST,EDUCATION,OFFICE,DESK,2,1993,1993-06-01 +437,681,U.S.A.,WEST,EDUCATION,OFFICE,DESK,3,1993,1993-07-01 +906,86,U.S.A.,WEST,EDUCATION,OFFICE,DESK,3,1993,1993-08-01 +844,764,U.S.A.,WEST,EDUCATION,OFFICE,DESK,3,1993,1993-09-01 +413,269,U.S.A.,WEST,EDUCATION,OFFICE,DESK,4,1993,1993-10-01 +869,138,U.S.A.,WEST,EDUCATION,OFFICE,DESK,4,1993,1993-11-01 +403,834,U.S.A.,WEST,EDUCATION,OFFICE,DESK,4,1993,1993-12-01 +137,112,U.S.A.,WEST,EDUCATION,OFFICE,DESK,1,1994,1994-01-01 +922,921,U.S.A.,WEST,EDUCATION,OFFICE,DESK,1,1994,1994-02-01 +202,859,U.S.A.,WEST,EDUCATION,OFFICE,DESK,1,1994,1994-03-01 +955,442,U.S.A.,WEST,EDUCATION,OFFICE,DESK,2,1994,1994-04-01 +781,593,U.S.A.,WEST,EDUCATION,OFFICE,DESK,2,1994,1994-05-01 +12,346,U.S.A.,WEST,EDUCATION,OFFICE,DESK,2,1994,1994-06-01 +931,312,U.S.A.,WEST,EDUCATION,OFFICE,DESK,3,1994,1994-07-01 +95,690,U.S.A.,WEST,EDUCATION,OFFICE,DESK,3,1994,1994-08-01 +795,344,U.S.A.,WEST,EDUCATION,OFFICE,DESK,3,1994,1994-09-01 +542,784,U.S.A.,WEST,EDUCATION,OFFICE,DESK,4,1994,1994-10-01 +935,639,U.S.A.,WEST,EDUCATION,OFFICE,DESK,4,1994,1994-11-01 +269,726,U.S.A.,WEST,EDUCATION,OFFICE,DESK,4,1994,1994-12-01 +197,596,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,1,1993,1993-01-01 +828,263,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,1,1993,1993-02-01 +461,194,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,1,1993,1993-03-01 +35,895,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,2,1993,1993-04-01 +88,502,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,2,1993,1993-05-01 +832,342,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,2,1993,1993-06-01 +900,421,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,3,1993,1993-07-01 +368,901,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,3,1993,1993-08-01 +201,474,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,3,1993,1993-09-01 +758,571,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,4,1993,1993-10-01 +504,511,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,4,1993,1993-11-01 +864,379,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,4,1993,1993-12-01 +574,68,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,1,1994,1994-01-01 +61,210,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,1,1994,1994-02-01 +565,478,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,1,1994,1994-03-01 +475,296,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,2,1994,1994-04-01 +44,664,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,2,1994,1994-05-01 +145,880,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,2,1994,1994-06-01 +813,607,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,3,1994,1994-07-01 +703,97,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,3,1994,1994-08-01 +757,908,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,3,1994,1994-09-01 +96,152,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,4,1994,1994-10-01 +860,622,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,4,1994,1994-11-01 +750,309,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,4,1994,1994-12-01 +585,912,U.S.A.,WEST,CONSUMER,FURNITURE,BED,1,1993,1993-01-01 +127,429,U.S.A.,WEST,CONSUMER,FURNITURE,BED,1,1993,1993-02-01 +669,580,U.S.A.,WEST,CONSUMER,FURNITURE,BED,1,1993,1993-03-01 +708,179,U.S.A.,WEST,CONSUMER,FURNITURE,BED,2,1993,1993-04-01 +830,119,U.S.A.,WEST,CONSUMER,FURNITURE,BED,2,1993,1993-05-01 +550,369,U.S.A.,WEST,CONSUMER,FURNITURE,BED,2,1993,1993-06-01 +762,882,U.S.A.,WEST,CONSUMER,FURNITURE,BED,3,1993,1993-07-01 +468,727,U.S.A.,WEST,CONSUMER,FURNITURE,BED,3,1993,1993-08-01 +151,823,U.S.A.,WEST,CONSUMER,FURNITURE,BED,3,1993,1993-09-01 +103,783,U.S.A.,WEST,CONSUMER,FURNITURE,BED,4,1993,1993-10-01 +876,884,U.S.A.,WEST,CONSUMER,FURNITURE,BED,4,1993,1993-11-01 +881,891,U.S.A.,WEST,CONSUMER,FURNITURE,BED,4,1993,1993-12-01 +116,909,U.S.A.,WEST,CONSUMER,FURNITURE,BED,1,1994,1994-01-01 +677,765,U.S.A.,WEST,CONSUMER,FURNITURE,BED,1,1994,1994-02-01 +477,180,U.S.A.,WEST,CONSUMER,FURNITURE,BED,1,1994,1994-03-01 +154,712,U.S.A.,WEST,CONSUMER,FURNITURE,BED,2,1994,1994-04-01 +331,175,U.S.A.,WEST,CONSUMER,FURNITURE,BED,2,1994,1994-05-01 +784,869,U.S.A.,WEST,CONSUMER,FURNITURE,BED,2,1994,1994-06-01 +563,820,U.S.A.,WEST,CONSUMER,FURNITURE,BED,3,1994,1994-07-01 +229,554,U.S.A.,WEST,CONSUMER,FURNITURE,BED,3,1994,1994-08-01 +451,126,U.S.A.,WEST,CONSUMER,FURNITURE,BED,3,1994,1994-09-01 +974,760,U.S.A.,WEST,CONSUMER,FURNITURE,BED,4,1994,1994-10-01 +484,446,U.S.A.,WEST,CONSUMER,FURNITURE,BED,4,1994,1994-11-01 +69,254,U.S.A.,WEST,CONSUMER,FURNITURE,BED,4,1994,1994-12-01 +755,516,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,1,1993,1993-01-01 +331,779,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,1,1993,1993-02-01 +482,987,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,1,1993,1993-03-01 +632,318,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,2,1993,1993-04-01 +750,427,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,2,1993,1993-05-01 +618,86,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,2,1993,1993-06-01 +935,553,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,3,1993,1993-07-01 +716,315,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,3,1993,1993-08-01 +205,328,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,3,1993,1993-09-01 +215,521,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,4,1993,1993-10-01 +871,156,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,4,1993,1993-11-01 +552,841,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,4,1993,1993-12-01 +619,623,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,1,1994,1994-01-01 +701,849,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,1,1994,1994-02-01 +104,438,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,1,1994,1994-03-01 +114,719,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,2,1994,1994-04-01 +854,906,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,2,1994,1994-05-01 +563,267,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,2,1994,1994-06-01 +73,542,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,3,1994,1994-07-01 +427,552,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,3,1994,1994-08-01 +348,428,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,3,1994,1994-09-01 +148,158,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,4,1994,1994-10-01 +895,379,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,4,1994,1994-11-01 +394,142,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,4,1994,1994-12-01 +792,588,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,1,1993,1993-01-01 +175,506,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,1,1993,1993-02-01 +208,382,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,1,1993,1993-03-01 +354,132,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,2,1993,1993-04-01 +163,652,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,2,1993,1993-05-01 +336,723,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,2,1993,1993-06-01 +804,682,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,3,1993,1993-07-01 +863,382,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,3,1993,1993-08-01 +326,125,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,3,1993,1993-09-01 +568,321,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,4,1993,1993-10-01 +691,922,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,4,1993,1993-11-01 +152,884,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,4,1993,1993-12-01 +565,38,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,1,1994,1994-01-01 +38,194,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,1,1994,1994-02-01 +185,996,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,1,1994,1994-03-01 +318,532,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,2,1994,1994-04-01 +960,391,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,2,1994,1994-05-01 +122,104,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,2,1994,1994-06-01 +400,22,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,3,1994,1994-07-01 +301,650,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,3,1994,1994-08-01 +909,143,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,3,1994,1994-09-01 +433,999,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,4,1994,1994-10-01 +508,415,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,4,1994,1994-11-01 +648,350,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,4,1994,1994-12-01 +793,342,U.S.A.,WEST,CONSUMER,OFFICE,DESK,1,1993,1993-01-01 +129,215,U.S.A.,WEST,CONSUMER,OFFICE,DESK,1,1993,1993-02-01 +481,52,U.S.A.,WEST,CONSUMER,OFFICE,DESK,1,1993,1993-03-01 +406,292,U.S.A.,WEST,CONSUMER,OFFICE,DESK,2,1993,1993-04-01 +512,862,U.S.A.,WEST,CONSUMER,OFFICE,DESK,2,1993,1993-05-01 +668,309,U.S.A.,WEST,CONSUMER,OFFICE,DESK,2,1993,1993-06-01 +551,886,U.S.A.,WEST,CONSUMER,OFFICE,DESK,3,1993,1993-07-01 +124,172,U.S.A.,WEST,CONSUMER,OFFICE,DESK,3,1993,1993-08-01 +655,912,U.S.A.,WEST,CONSUMER,OFFICE,DESK,3,1993,1993-09-01 +523,666,U.S.A.,WEST,CONSUMER,OFFICE,DESK,4,1993,1993-10-01 +739,656,U.S.A.,WEST,CONSUMER,OFFICE,DESK,4,1993,1993-11-01 +87,145,U.S.A.,WEST,CONSUMER,OFFICE,DESK,4,1993,1993-12-01 +890,664,U.S.A.,WEST,CONSUMER,OFFICE,DESK,1,1994,1994-01-01 +665,639,U.S.A.,WEST,CONSUMER,OFFICE,DESK,1,1994,1994-02-01 +329,707,U.S.A.,WEST,CONSUMER,OFFICE,DESK,1,1994,1994-03-01 +417,891,U.S.A.,WEST,CONSUMER,OFFICE,DESK,2,1994,1994-04-01 +828,466,U.S.A.,WEST,CONSUMER,OFFICE,DESK,2,1994,1994-05-01 +298,451,U.S.A.,WEST,CONSUMER,OFFICE,DESK,2,1994,1994-06-01 +356,451,U.S.A.,WEST,CONSUMER,OFFICE,DESK,3,1994,1994-07-01 +909,874,U.S.A.,WEST,CONSUMER,OFFICE,DESK,3,1994,1994-08-01 +251,805,U.S.A.,WEST,CONSUMER,OFFICE,DESK,3,1994,1994-09-01 +526,426,U.S.A.,WEST,CONSUMER,OFFICE,DESK,4,1994,1994-10-01 +652,932,U.S.A.,WEST,CONSUMER,OFFICE,DESK,4,1994,1994-11-01 +573,581,U.S.A.,WEST,CONSUMER,OFFICE,DESK,4,1994,1994-12-01 diff --git a/pandas/tests/io/sas/test_sas7bdat.py b/pandas/tests/io/sas/test_sas7bdat.py index 730bf94cb2987..c3fb85811ca2a 100644 --- a/pandas/tests/io/sas/test_sas7bdat.py +++ b/pandas/tests/io/sas/test_sas7bdat.py @@ -139,8 +139,8 @@ def test_productsales(): fname = os.path.join(dirpath, "productsales.sas7bdat") df = pd.read_sas(fname, encoding='utf-8') fname = os.path.join(dirpath, "productsales.csv") - df0 = pd.read_csv(fname) - vn = ["ACTUAL", "PREDICT", "QUARTER", "YEAR", "MONTH"] + df0 = pd.read_csv(fname, parse_dates=['MONTH']) + vn = ["ACTUAL", "PREDICT", "QUARTER", "YEAR"] df0[vn] = df0[vn].astype(np.float64) tm.assert_frame_equal(df, df0) @@ -163,3 +163,14 @@ def test_airline(): df0 = pd.read_csv(fname) df0 = df0.astype(np.float64) tm.assert_frame_equal(df, df0, check_exact=False) + + +def test_date_time(): + # Support of different SAS date/datetime formats (PR #15871) + dirpath = tm.get_data_path() + fname = os.path.join(dirpath, "datetime.sas7bdat") + df = pd.read_sas(fname) + fname = os.path.join(dirpath, "datetime.csv") + df0 = pd.read_csv(fname, parse_dates=['Date1', 'Date2', 'DateTime', + 'DateTimeHi', 'Taiw']) + tm.assert_frame_equal(df, df0) From 34c4ffd7c454848311f71e6869b5cad4bc132449 Mon Sep 17 00:00:00 2001 From: zzgao Date: Fri, 18 Aug 2017 08:27:20 -0700 Subject: [PATCH 897/933] DOC: add example on json_normalize (#16438) --- doc/source/io.rst | 7 +++++++ pandas/io/json/normalize.py | 11 ++++++++++- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/doc/source/io.rst b/doc/source/io.rst index e6b51b7e2f45c..e338407361705 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -2020,6 +2020,13 @@ into a flat table. .. ipython:: python from pandas.io.json import json_normalize + data = [{'id': 1, 'name': {'first': 'Coleen', 'last': 'Volk'}}, + {'name': {'given': 'Mose', 'family': 'Regner'}}, + {'id': 2, 'name': 'Faye Raker'}] + json_normalize(data) + +.. ipython:: python + data = [{'state': 'Florida', 'shortname': 'FL', 'info': { diff --git a/pandas/io/json/normalize.py b/pandas/io/json/normalize.py index 401d8d9ead2b8..72776ed01de15 100644 --- a/pandas/io/json/normalize.py +++ b/pandas/io/json/normalize.py @@ -135,6 +135,16 @@ def json_normalize(data, record_path=None, meta=None, Examples -------- + >>> from pandas.io.json import json_normalize + >>> data = [{'id': 1, 'name': {'first': 'Coleen', 'last': 'Volk'}}, + ... {'name': {'given': 'Mose', 'family': 'Regner'}}, + ... {'id': 2, 'name': 'Faye Raker'}] + >>> json_normalize(data) + id name name.family name.first name.given name.last + 0 1.0 NaN NaN Coleen NaN Volk + 1 NaN NaN Regner NaN Mose NaN + 2 2.0 Faye Raker NaN NaN NaN NaN + >>> data = [{'state': 'Florida', ... 'shortname': 'FL', ... 'info': { @@ -150,7 +160,6 @@ def json_normalize(data, record_path=None, meta=None, ... }, ... 'counties': [{'name': 'Summit', 'population': 1234}, ... {'name': 'Cuyahoga', 'population': 1337}]}] - >>> from pandas.io.json import json_normalize >>> result = json_normalize(data, 'counties', ['state', 'shortname', ... ['info', 'governor']]) >>> result From 7818486859d1aba53ce359b93cfc772e688958e5 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Sat, 19 Aug 2017 06:27:05 -0500 Subject: [PATCH 898/933] BUG: Have object dtype for empty Categorical.categories (#17249) * BUG: Have object dtype for empty Categorical ctor Previously we had a `Float64Index`, which is inconsistent with, e.g., the regular Index constructor. * TST: Update tests in multi for new return Previously these relied worked around the return type by wrapping list-likes in `np.array` and relying on that to cast to float. These workarounds are no longer nescessary. * TST: Update union_categorical tests This relied on `NaN` being a float and empty being a float. Not a necessary test anymore. * TST: set object dtype --- doc/source/whatsnew/v0.21.0.txt | 3 +++ pandas/core/categorical.py | 5 ++++- pandas/tests/indexes/test_multi.py | 9 ++++----- pandas/tests/reshape/test_concat.py | 2 +- pandas/tests/reshape/test_union_categoricals.py | 12 +++--------- pandas/tests/test_categorical.py | 10 ++++++++++ 6 files changed, 25 insertions(+), 16 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 4f55c6388c728..6008ea5d4cbcd 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -385,6 +385,9 @@ Numeric Categorical ^^^^^^^^^^^ - Bug in :func:`Series.isin` when called with a categorical (:issue`16639`) +- Bug in the categorical constructor with empty values and categories causing + the ``.categories`` to be an empty ``Float64Index`` rather than an empty + ``Index`` with object dtype (:issue:`17248`) Other diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index 230361931125e..1c2a29333001c 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -290,7 +290,10 @@ def __init__(self, values, categories=None, ordered=False, fastpath=False): # On list with NaNs, int values will be converted to float. Use # "object" dtype to prevent this. In the end objects will be # casted to int/... in the category assignment step. - dtype = 'object' if isna(values).any() else None + if len(values) == 0 or isna(values).any(): + dtype = 'object' + else: + dtype = None values = _sanitize_array(values, None, dtype=dtype) if categories is None: diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index da1b309f5a621..c66775f4690cc 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -776,7 +776,7 @@ def test_from_arrays_empty(self): arrays = [[]] * N names = list('ABC')[:N] result = MultiIndex.from_arrays(arrays=arrays, names=names) - expected = MultiIndex(levels=[np.array([])] * N, labels=[[]] * N, + expected = MultiIndex(levels=[[]] * N, labels=[[]] * N, names=names) tm.assert_index_equal(result, expected) @@ -829,7 +829,7 @@ def test_from_product_empty(self): # 1 level result = MultiIndex.from_product([[]], names=['A']) - expected = pd.Float64Index([], name='A') + expected = pd.Index([], name='A') tm.assert_index_equal(result, expected) # 2 levels @@ -838,7 +838,7 @@ def test_from_product_empty(self): names = ['A', 'B'] for first, second in zip(l1, l2): result = MultiIndex.from_product([first, second], names=names) - expected = MultiIndex(levels=[np.array(first), np.array(second)], + expected = MultiIndex(levels=[first, second], labels=[[], []], names=names) tm.assert_index_equal(result, expected) @@ -847,8 +847,7 @@ def test_from_product_empty(self): for N in range(4): lvl2 = lrange(N) result = MultiIndex.from_product([[], lvl2, []], names=names) - expected = MultiIndex(levels=[np.array(A) - for A in [[], lvl2, []]], + expected = MultiIndex(levels=[[], lvl2, []], labels=[[], [], []], names=names) tm.assert_index_equal(result, expected) diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py index 46fea86c45925..52cd18126859a 100644 --- a/pandas/tests/reshape/test_concat.py +++ b/pandas/tests/reshape/test_concat.py @@ -680,7 +680,7 @@ def test_concat_categorical_empty(self): tm.assert_series_equal(s1.append(s2, ignore_index=True), s2) s1 = pd.Series([], dtype='category') - s2 = pd.Series([]) + s2 = pd.Series([], dtype='object') # different dtype => not-category tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), s2) diff --git a/pandas/tests/reshape/test_union_categoricals.py b/pandas/tests/reshape/test_union_categoricals.py index fe8d54005ba9b..eb80fb54b4016 100644 --- a/pandas/tests/reshape/test_union_categoricals.py +++ b/pandas/tests/reshape/test_union_categoricals.py @@ -107,17 +107,11 @@ def test_union_categoricals_empty(self): exp = Categorical([]) tm.assert_categorical_equal(res, exp) - res = union_categoricals([pd.Categorical([]), - pd.Categorical([1.0])]) - exp = Categorical([1.0]) + res = union_categoricals([Categorical([]), + Categorical(['1'])]) + exp = Categorical(['1']) tm.assert_categorical_equal(res, exp) - # to make dtype equal - nanc = pd.Categorical(np.array([np.nan], dtype=np.float64)) - res = union_categoricals([nanc, - pd.Categorical([])]) - tm.assert_categorical_equal(res, nanc) - def test_union_categorical_same_category(self): # check fastpath c1 = Categorical([1, 2, 3, 4], categories=[1, 2, 3, 4]) diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py index a0b585a16ad9a..7bbe220378993 100644 --- a/pandas/tests/test_categorical.py +++ b/pandas/tests/test_categorical.py @@ -112,6 +112,16 @@ def test_setitem_listlike(self): result = c.codes[np.array([100000]).astype(np.int64)] tm.assert_numpy_array_equal(result, np.array([5], dtype='int8')) + def test_constructor_empty(self): + # GH 17248 + c = Categorical([]) + expected = Index([]) + tm.assert_index_equal(c.categories, expected) + + c = Categorical([], categories=[1, 2, 3]) + expected = pd.Int64Index([1, 2, 3]) + tm.assert_index_equal(c.categories, expected) + def test_constructor_unsortable(self): # it works! From 4e9c0d1f2156c656df5da4ac3f00190f0da5828b Mon Sep 17 00:00:00 2001 From: jschendel Date: Sat, 19 Aug 2017 10:51:05 -0600 Subject: [PATCH 899/933] CLN: replace %s syntax with .format in pandas.tseries (#17290) --- pandas/tseries/frequencies.py | 38 +++++----- pandas/tseries/holiday.py | 14 ++-- pandas/tseries/offsets.py | 137 +++++++++++++++++++--------------- 3 files changed, 105 insertions(+), 84 deletions(-) diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index aa33a3849acb3..7f34bcaf52926 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -409,16 +409,17 @@ def _get_freq_str(base, mult=1): need_suffix = ['QS', 'BQ', 'BQS', 'YS', 'AS', 'BY', 'BA', 'BYS', 'BAS'] for __prefix in need_suffix: for _m in tslib._MONTHS: - _offset_to_period_map['%s-%s' % (__prefix, _m)] = \ - _offset_to_period_map[__prefix] + _alias = '{prefix}-{month}'.format(prefix=__prefix, month=_m) + _offset_to_period_map[_alias] = _offset_to_period_map[__prefix] for __prefix in ['A', 'Q']: for _m in tslib._MONTHS: - _alias = '%s-%s' % (__prefix, _m) + _alias = '{prefix}-{month}'.format(prefix=__prefix, month=_m) _offset_to_period_map[_alias] = _alias _days = ['MON', 'TUE', 'WED', 'THU', 'FRI', 'SAT', 'SUN'] for _d in _days: - _offset_to_period_map['W-%s' % _d] = 'W-%s' % _d + _alias = 'W-{day}'.format(day=_d) + _offset_to_period_map[_alias] = _alias def get_period_alias(offset_str): @@ -587,7 +588,7 @@ def _base_and_stride(freqstr): groups = opattern.match(freqstr) if not groups: - raise ValueError("Could not evaluate %s" % freqstr) + raise ValueError("Could not evaluate {freq}".format(freq=freqstr)) stride = groups.group(1) @@ -775,8 +776,8 @@ def infer_freq(index, warn=True): if not (is_datetime64_dtype(values) or is_timedelta64_dtype(values) or values.dtype == object): - raise TypeError("cannot infer freq from a non-convertible " - "dtype on a Series of {0}".format(index.dtype)) + raise TypeError("cannot infer freq from a non-convertible dtype " + "on a Series of {dtype}".format(dtype=index.dtype)) index = values if is_period_arraylike(index): @@ -789,7 +790,7 @@ def infer_freq(index, warn=True): if isinstance(index, pd.Index) and not isinstance(index, pd.DatetimeIndex): if isinstance(index, (pd.Int64Index, pd.Float64Index)): raise TypeError("cannot infer freq from a non-convertible index " - "type {0}".format(type(index))) + "type {type}".format(type=type(index))) index = index.values if not isinstance(index, pd.DatetimeIndex): @@ -956,15 +957,17 @@ def _infer_daily_rule(self): if annual_rule: nyears = self.ydiffs[0] month = _month_aliases[self.rep_stamp.month] - return _maybe_add_count('%s-%s' % (annual_rule, month), nyears) + alias = '{prefix}-{month}'.format(prefix=annual_rule, month=month) + return _maybe_add_count(alias, nyears) quarterly_rule = self._get_quarterly_rule() if quarterly_rule: nquarters = self.mdiffs[0] / 3 mod_dict = {0: 12, 2: 11, 1: 10} month = _month_aliases[mod_dict[self.rep_stamp.month % 3]] - return _maybe_add_count('%s-%s' % (quarterly_rule, month), - nquarters) + alias = '{prefix}-{month}'.format(prefix=quarterly_rule, + month=month) + return _maybe_add_count(alias, nquarters) monthly_rule = self._get_monthly_rule() if monthly_rule: @@ -974,8 +977,8 @@ def _infer_daily_rule(self): days = self.deltas[0] / _ONE_DAY if days % 7 == 0: # Weekly - alias = _weekday_rule_aliases[self.rep_stamp.weekday()] - return _maybe_add_count('W-%s' % alias, days / 7) + day = _weekday_rule_aliases[self.rep_stamp.weekday()] + return _maybe_add_count('W-{day}'.format(day=day), days / 7) else: return _maybe_add_count('D', days) @@ -1048,7 +1051,7 @@ def _get_wom_rule(self): week = week_of_months[0] + 1 wd = _weekday_rule_aliases[weekdays[0]] - return 'WOM-%d%s' % (week, wd) + return 'WOM-{week}{weekday}'.format(week=week, weekday=wd) class _TimedeltaFrequencyInferer(_FrequencyInferer): @@ -1058,15 +1061,16 @@ def _infer_daily_rule(self): days = self.deltas[0] / _ONE_DAY if days % 7 == 0: # Weekly - alias = _weekday_rule_aliases[self.rep_stamp.weekday()] - return _maybe_add_count('W-%s' % alias, days / 7) + wd = _weekday_rule_aliases[self.rep_stamp.weekday()] + alias = 'W-{weekday}'.format(weekday=wd) + return _maybe_add_count(alias, days / 7) else: return _maybe_add_count('D', days) def _maybe_add_count(base, count): if count != 1: - return '%d%s' % (count, base) + return '{count}{base}'.format(count=int(count), base=base) else: return base diff --git a/pandas/tseries/holiday.py b/pandas/tseries/holiday.py index 9acb52ebe0e9f..d8bfa3013f8f7 100644 --- a/pandas/tseries/holiday.py +++ b/pandas/tseries/holiday.py @@ -174,16 +174,16 @@ class from pandas.tseries.offsets def __repr__(self): info = '' if self.year is not None: - info += 'year=%s, ' % self.year - info += 'month=%s, day=%s, ' % (self.month, self.day) + info += 'year={year}, '.format(year=self.year) + info += 'month={mon}, day={day}, '.format(mon=self.month, day=self.day) if self.offset is not None: - info += 'offset=%s' % self.offset + info += 'offset={offset}'.format(offset=self.offset) if self.observance is not None: - info += 'observance=%s' % self.observance + info += 'observance={obs}'.format(obs=self.observance) - repr = 'Holiday: %s (%s)' % (self.name, info) + repr = 'Holiday: {name} ({info})'.format(name=self.name, info=info) return repr def dates(self, start_date, end_date, return_name=False): @@ -374,8 +374,8 @@ def holidays(self, start=None, end=None, return_name=False): DatetimeIndex of holidays """ if self.rules is None: - raise Exception('Holiday Calendar %s does not have any ' - 'rules specified' % self.name) + raise Exception('Holiday Calendar {name} does not have any ' + 'rules specified'.format(name=self.name)) if start is None: start = AbstractHolidayCalendar.start_date diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index 56ef703e67ca0..29cdda5548896 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -261,10 +261,10 @@ def apply_index(self, i): """ if not type(self) is DateOffset: - raise NotImplementedError("DateOffset subclass %s " + raise NotImplementedError("DateOffset subclass {name} " "does not have a vectorized " - "implementation" - % (self.__class__.__name__,)) + "implementation".format( + name=self.__class__.__name__)) relativedelta_fast = set(['years', 'months', 'weeks', 'days', 'hours', 'minutes', 'seconds', 'microseconds']) @@ -295,10 +295,10 @@ def apply_index(self, i): return i + (self._offset * self.n) else: # relativedelta with other keywords + kwd = set(self.kwds) - relativedelta_fast raise NotImplementedError("DateOffset with relativedelta " - "keyword(s) %s not able to be " - "applied vectorized" % - (set(self.kwds) - relativedelta_fast),) + "keyword(s) {kwd} not able to be " + "applied vectorized".format(kwd=kwd)) def isAnchored(self): return (self.n == 1) @@ -339,19 +339,20 @@ def __repr__(self): if attr not in exclude: attrs.append('='.join((attr, repr(getattr(self, attr))))) + plural = '' if abs(self.n) != 1: plural = 's' - else: - plural = '' - n_str = "" + n_str = '' if self.n != 1: - n_str = "%s * " % self.n + n_str = '{n} * '.format(n=self.n) - out = '<%s' % n_str + className + plural + attrs_str = '' if attrs: - out += ': ' + ', '.join(attrs) - out += '>' + attrs_str = ': ' + ', '.join(attrs) + + repr_content = ''.join([n_str, className, plural, attrs_str]) + out = '<{content}>'.format(content=repr_content) return out @property @@ -501,7 +502,7 @@ def freqstr(self): return repr(self) if self.n != 1: - fstr = '%d%s' % (self.n, code) + fstr = '{n}{code}'.format(n=self.n, code=code) else: fstr = code @@ -509,7 +510,7 @@ def freqstr(self): @property def nanos(self): - raise ValueError("{0} is a non-fixed frequency".format(self)) + raise ValueError("{name} is a non-fixed frequency".format(name=self)) class SingleConstructorOffset(DateOffset): @@ -518,7 +519,7 @@ class SingleConstructorOffset(DateOffset): def _from_name(cls, suffix=None): # default _from_name calls cls with no args if suffix: - raise ValueError("Bad freq suffix %s" % suffix) + raise ValueError("Bad freq suffix {suffix}".format(suffix=suffix)) return cls() @@ -531,21 +532,21 @@ class BusinessMixin(object): def __repr__(self): className = getattr(self, '_outputName', self.__class__.__name__) + plural = '' if abs(self.n) != 1: plural = 's' - else: - plural = '' - n_str = "" + n_str = '' if self.n != 1: - n_str = "%s * " % self.n + n_str = '{n} * '.format(n=self.n) - out = '<%s' % n_str + className + plural + self._repr_attrs() + '>' + repr_content = ''.join([n_str, className, plural, self._repr_attrs()]) + out = '<{content}>'.format(content=repr_content) return out def _repr_attrs(self): if self.offset: - attrs = ['offset=%s' % repr(self.offset)] + attrs = ['offset={offset!r}'.format(offset=self.offset)] else: attrs = None out = '' @@ -601,7 +602,7 @@ def freqstr(self): return repr(self) if self.n != 1: - fstr = '%d%s' % (self.n, code) + fstr = '{n}{code}'.format(n=self.n, code=code) else: fstr = code @@ -1109,7 +1110,8 @@ def name(self): if self.isAnchored: return self.rule_code else: - return "%s-%s" % (self.rule_code, _int_to_month[self.n]) + return "{code}-{month}".format(code=self.rule_code, + month=_int_to_month[self.n]) class MonthEnd(MonthOffset): @@ -1176,9 +1178,9 @@ def __init__(self, n=1, day_of_month=None, normalize=False, **kwds): else: self.day_of_month = int(day_of_month) if not self._min_day_of_month <= self.day_of_month <= 27: - raise ValueError('day_of_month must be ' - '{}<=day_of_month<=27, got {}'.format( - self._min_day_of_month, self.day_of_month)) + msg = 'day_of_month must be {min}<=day_of_month<=27, got {day}' + raise ValueError(msg.format(min=self._min_day_of_month, + day=self.day_of_month)) self.n = int(n) self.normalize = normalize self.kwds = kwds @@ -1190,7 +1192,7 @@ def _from_name(cls, suffix=None): @property def rule_code(self): - suffix = '-{}'.format(self.day_of_month) + suffix = '-{day_of_month}'.format(day_of_month=self.day_of_month) return self._prefix + suffix @apply_wraps @@ -1576,8 +1578,8 @@ def __init__(self, n=1, normalize=False, **kwds): if self.weekday is not None: if self.weekday < 0 or self.weekday > 6: - raise ValueError('Day must be 0<=day<=6, got %d' % - self.weekday) + raise ValueError('Day must be 0<=day<=6, got {day}' + .format(day=self.weekday)) self._inc = timedelta(weeks=1) self.kwds = kwds @@ -1630,7 +1632,7 @@ def onOffset(self, dt): def rule_code(self): suffix = '' if self.weekday is not None: - suffix = '-%s' % (_int_to_weekday[self.weekday]) + suffix = '-{weekday}'.format(weekday=_int_to_weekday[self.weekday]) return self._prefix + suffix @classmethod @@ -1696,11 +1698,11 @@ def __init__(self, n=1, normalize=False, **kwds): raise ValueError('N cannot be 0') if self.weekday < 0 or self.weekday > 6: - raise ValueError('Day must be 0<=day<=6, got %d' % - self.weekday) + raise ValueError('Day must be 0<=day<=6, got {day}' + .format(day=self.weekday)) if self.week < 0 or self.week > 3: - raise ValueError('Week must be 0<=day<=3, got %d' % - self.week) + raise ValueError('Week must be 0<=week<=3, got {week}' + .format(week=self.week)) self.kwds = kwds @@ -1746,15 +1748,18 @@ def onOffset(self, dt): @property def rule_code(self): - return '%s-%d%s' % (self._prefix, self.week + 1, - _int_to_weekday.get(self.weekday, '')) + weekday = _int_to_weekday.get(self.weekday, '') + return '{prefix}-{week}{weekday}'.format(prefix=self._prefix, + week=self.week + 1, + weekday=weekday) _prefix = 'WOM' @classmethod def _from_name(cls, suffix=None): if not suffix: - raise ValueError("Prefix %r requires a suffix." % (cls._prefix)) + raise ValueError("Prefix {prefix!r} requires a suffix." + .format(prefix=cls._prefix)) # TODO: handle n here... # only one digit weeks (1 --> week 0, 2 --> week 1, etc.) week = int(suffix[0]) - 1 @@ -1789,8 +1794,8 @@ def __init__(self, n=1, normalize=False, **kwds): raise ValueError('N cannot be 0') if self.weekday < 0 or self.weekday > 6: - raise ValueError('Day must be 0<=day<=6, got %d' % - self.weekday) + raise ValueError('Day must be 0<=day<=6, got {day}' + .format(day=self.weekday)) self.kwds = kwds @@ -1829,14 +1834,17 @@ def onOffset(self, dt): @property def rule_code(self): - return '%s-%s' % (self._prefix, _int_to_weekday.get(self.weekday, '')) + weekday = _int_to_weekday.get(self.weekday, '') + return '{prefix}-{weekday}'.format(prefix=self._prefix, + weekday=weekday) _prefix = 'LWOM' @classmethod def _from_name(cls, suffix=None): if not suffix: - raise ValueError("Prefix %r requires a suffix." % (cls._prefix)) + raise ValueError("Prefix {prefix!r} requires a suffix." + .format(prefix=cls._prefix)) # TODO: handle n here... weekday = _weekday_to_int[suffix] return cls(weekday=weekday) @@ -1876,7 +1884,8 @@ def _from_name(cls, suffix=None): @property def rule_code(self): - return '%s-%s' % (self._prefix, _int_to_month[self.startingMonth]) + month = _int_to_month[self.startingMonth] + return '{prefix}-{month}'.format(prefix=self._prefix, month=month) class BQuarterEnd(QuarterOffset): @@ -2045,8 +2054,7 @@ def apply(self, other): @apply_index_wraps def apply_index(self, i): freq_month = 12 if self.startingMonth == 1 else self.startingMonth - 1 - # freq_month = self.startingMonth - freqstr = 'Q-%s' % (_int_to_month[freq_month],) + freqstr = 'Q-{month}'.format(month=_int_to_month[freq_month]) return self._beg_apply_index(i, freqstr) @@ -2071,7 +2079,8 @@ def _from_name(cls, suffix=None): @property def rule_code(self): - return '%s-%s' % (self._prefix, _int_to_month[self.month]) + month = _int_to_month[self.month] + return '{prefix}-{month}'.format(prefix=self._prefix, month=month) class BYearEnd(YearOffset): @@ -2246,7 +2255,7 @@ def _rollf(date): @apply_index_wraps def apply_index(self, i): freq_month = 12 if self.month == 1 else self.month - 1 - freqstr = 'A-%s' % (_int_to_month[freq_month],) + freqstr = 'A-{month}'.format(month=_int_to_month[freq_month]) return self._beg_apply_index(i, freqstr) def onOffset(self, dt): @@ -2312,7 +2321,8 @@ def __init__(self, n=1, normalize=False, **kwds): raise ValueError('N cannot be 0') if self.variation not in ["nearest", "last"]: - raise ValueError('%s is not a valid variation' % self.variation) + raise ValueError('{variation} is not a valid variation' + .format(variation=self.variation)) if self.variation == "nearest": weekday_offset = weekday(self.weekday) @@ -2438,8 +2448,9 @@ def _get_year_end_last(self, dt): @property def rule_code(self): + prefix = self._get_prefix() suffix = self.get_rule_code_suffix() - return "%s-%s" % (self._get_prefix(), suffix) + return "{prefix}-{suffix}".format(prefix=prefix, suffix=suffix) def _get_prefix(self): return self._prefix @@ -2451,9 +2462,11 @@ def _get_suffix_prefix(self): return self._suffix_prefix_last def get_rule_code_suffix(self): - return '%s-%s-%s' % (self._get_suffix_prefix(), - _int_to_month[self.startingMonth], - _int_to_weekday[self.weekday]) + prefix = self._get_suffix_prefix() + month = _int_to_month[self.startingMonth] + weekday = _int_to_weekday[self.weekday] + return '{prefix}-{month}-{weekday}'.format(prefix=prefix, month=month, + weekday=weekday) @classmethod def _parse_suffix(cls, varion_code, startingMonth_code, weekday_code): @@ -2463,7 +2476,7 @@ def _parse_suffix(cls, varion_code, startingMonth_code, weekday_code): variation = "last" else: raise ValueError( - "Unable to parse varion_code: %s" % (varion_code,)) + "Unable to parse varion_code: {code}".format(code=varion_code)) startingMonth = _month_to_int[startingMonth_code] weekday = _weekday_to_int[weekday_code] @@ -2628,8 +2641,9 @@ def onOffset(self, dt): @property def rule_code(self): suffix = self._offset.get_rule_code_suffix() - return "%s-%s" % (self._prefix, - "%s-%d" % (suffix, self.qtr_with_extra_week)) + qtr = self.qtr_with_extra_week + return "{prefix}-{suffix}-{qtr}".format(prefix=self._prefix, + suffix=suffix, qtr=qtr) @classmethod def _from_name(cls, *args): @@ -2712,8 +2726,8 @@ def __add__(self, other): except ApplyTypeError: return NotImplemented except OverflowError: - raise OverflowError("the add operation between {} and {} " - "will overflow".format(self, other)) + raise OverflowError("the add operation between {self} and {other} " + "will overflow".format(self=self, other=other)) def __eq__(self, other): if isinstance(other, compat.string_types): @@ -2771,7 +2785,8 @@ def apply(self, other): elif isinstance(other, type(self)): return type(self)(self.n + other.n) - raise ApplyTypeError('Unhandled type: %s' % type(other).__name__) + raise ApplyTypeError('Unhandled type: {type_str}' + .format(type_str=type(other).__name__)) _prefix = 'undefined' @@ -2921,7 +2936,8 @@ def generate_range(start=None, end=None, periods=None, # faster than cur + offset next_date = offset.apply(cur) if next_date <= cur: - raise ValueError('Offset %s did not increment date' % offset) + raise ValueError('Offset {offset} did not increment date' + .format(offset=offset)) cur = next_date else: while cur >= end: @@ -2930,7 +2946,8 @@ def generate_range(start=None, end=None, periods=None, # faster than cur + offset next_date = offset.apply(cur) if next_date >= cur: - raise ValueError('Offset %s did not decrement date' % offset) + raise ValueError('Offset {offset} did not decrement date' + .format(offset=offset)) cur = next_date From ab32c0a3e2033456ede23dbfeffc6adc8c4ea190 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sat, 19 Aug 2017 17:55:34 -0400 Subject: [PATCH 900/933] TST: parameterize consistency tests for rolling/expanding windows (#17292) --- pandas/tests/test_window.py | 403 ++++++++++++++++++------------------ 1 file changed, 203 insertions(+), 200 deletions(-) diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index 21a9b05d48126..1cc0ad8bb4041 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -2009,6 +2009,15 @@ def no_nans(x): _consistency_data = _create_consistency_data() +def _rolling_consistency_cases(): + for window in [1, 2, 3, 10, 20]: + for min_periods in set([0, 1, 2, 3, 4, window]): + if min_periods and (min_periods > window): + continue + for center in [False, True]: + yield window, min_periods, center + + class TestMomentsConsistency(Base): base_functions = [ (lambda v: Series(v).count(), None, 'count'), @@ -2177,7 +2186,11 @@ def _non_null_values(x): (mean_x * mean_y)) @pytest.mark.slow - def test_ewm_consistency(self): + @pytest.mark.parametrize( + 'min_periods, adjust, ignore_na', product([0, 1, 2, 3, 4], + [True, False], + [False, True])) + def test_ewm_consistency(self, min_periods, adjust, ignore_na): def _weights(s, com, adjust, ignore_na): if isinstance(s, DataFrame): if not len(s.columns): @@ -2231,52 +2244,51 @@ def _ewma(s, com, min_periods, adjust, ignore_na): return result com = 3. - for min_periods, adjust, ignore_na in product([0, 1, 2, 3, 4], - [True, False], - [False, True]): - # test consistency between different ewm* moments - self._test_moments_consistency( - min_periods=min_periods, - count=lambda x: x.expanding().count(), - mean=lambda x: x.ewm(com=com, min_periods=min_periods, - adjust=adjust, - ignore_na=ignore_na).mean(), - mock_mean=lambda x: _ewma(x, com=com, - min_periods=min_periods, - adjust=adjust, - ignore_na=ignore_na), - corr=lambda x, y: x.ewm(com=com, min_periods=min_periods, - adjust=adjust, - ignore_na=ignore_na).corr(y), - var_unbiased=lambda x: ( - x.ewm(com=com, min_periods=min_periods, - adjust=adjust, - ignore_na=ignore_na).var(bias=False)), - std_unbiased=lambda x: ( - x.ewm(com=com, min_periods=min_periods, - adjust=adjust, ignore_na=ignore_na) - .std(bias=False)), - cov_unbiased=lambda x, y: ( - x.ewm(com=com, min_periods=min_periods, - adjust=adjust, ignore_na=ignore_na) - .cov(y, bias=False)), - var_biased=lambda x: ( - x.ewm(com=com, min_periods=min_periods, - adjust=adjust, ignore_na=ignore_na) - .var(bias=True)), - std_biased=lambda x: x.ewm(com=com, min_periods=min_periods, - adjust=adjust, - ignore_na=ignore_na).std(bias=True), - cov_biased=lambda x, y: ( - x.ewm(com=com, min_periods=min_periods, - adjust=adjust, ignore_na=ignore_na) - .cov(y, bias=True)), - var_debiasing_factors=lambda x: ( - _variance_debiasing_factors(x, com=com, adjust=adjust, - ignore_na=ignore_na))) + # test consistency between different ewm* moments + self._test_moments_consistency( + min_periods=min_periods, + count=lambda x: x.expanding().count(), + mean=lambda x: x.ewm(com=com, min_periods=min_periods, + adjust=adjust, + ignore_na=ignore_na).mean(), + mock_mean=lambda x: _ewma(x, com=com, + min_periods=min_periods, + adjust=adjust, + ignore_na=ignore_na), + corr=lambda x, y: x.ewm(com=com, min_periods=min_periods, + adjust=adjust, + ignore_na=ignore_na).corr(y), + var_unbiased=lambda x: ( + x.ewm(com=com, min_periods=min_periods, + adjust=adjust, + ignore_na=ignore_na).var(bias=False)), + std_unbiased=lambda x: ( + x.ewm(com=com, min_periods=min_periods, + adjust=adjust, ignore_na=ignore_na) + .std(bias=False)), + cov_unbiased=lambda x, y: ( + x.ewm(com=com, min_periods=min_periods, + adjust=adjust, ignore_na=ignore_na) + .cov(y, bias=False)), + var_biased=lambda x: ( + x.ewm(com=com, min_periods=min_periods, + adjust=adjust, ignore_na=ignore_na) + .var(bias=True)), + std_biased=lambda x: x.ewm(com=com, min_periods=min_periods, + adjust=adjust, + ignore_na=ignore_na).std(bias=True), + cov_biased=lambda x, y: ( + x.ewm(com=com, min_periods=min_periods, + adjust=adjust, ignore_na=ignore_na) + .cov(y, bias=True)), + var_debiasing_factors=lambda x: ( + _variance_debiasing_factors(x, com=com, adjust=adjust, + ignore_na=ignore_na))) @pytest.mark.slow - def test_expanding_consistency(self): + @pytest.mark.parametrize( + 'min_periods', [0, 1, 2, 3, 4]) + def test_expanding_consistency(self, min_periods): # suppress warnings about empty slices, as we are deliberately testing # with empty/0-length Series/DataFrames @@ -2285,72 +2297,72 @@ def test_expanding_consistency(self): message=".*(empty slice|0 for slice).*", category=RuntimeWarning) - for min_periods in [0, 1, 2, 3, 4]: - - # test consistency between different expanding_* moments - self._test_moments_consistency( - min_periods=min_periods, - count=lambda x: x.expanding().count(), - mean=lambda x: x.expanding( - min_periods=min_periods).mean(), - mock_mean=lambda x: x.expanding( - min_periods=min_periods).sum() / x.expanding().count(), - corr=lambda x, y: x.expanding( - min_periods=min_periods).corr(y), - var_unbiased=lambda x: x.expanding( - min_periods=min_periods).var(), - std_unbiased=lambda x: x.expanding( - min_periods=min_periods).std(), - cov_unbiased=lambda x, y: x.expanding( - min_periods=min_periods).cov(y), - var_biased=lambda x: x.expanding( - min_periods=min_periods).var(ddof=0), - std_biased=lambda x: x.expanding( - min_periods=min_periods).std(ddof=0), - cov_biased=lambda x, y: x.expanding( - min_periods=min_periods).cov(y, ddof=0), - var_debiasing_factors=lambda x: ( - x.expanding().count() / - (x.expanding().count() - 1.) - .replace(0., np.nan))) - - # test consistency between expanding_xyz() and either (a) - # expanding_apply of Series.xyz(), or (b) expanding_apply of - # np.nanxyz() - for (x, is_constant, no_nans) in self.data: - functions = self.base_functions - - # GH 8269 - if no_nans: - functions = self.base_functions + self.no_nan_functions - for (f, require_min_periods, name) in functions: - expanding_f = getattr( - x.expanding(min_periods=min_periods), name) - - if (require_min_periods and - (min_periods is not None) and - (min_periods < require_min_periods)): - continue - - if name == 'count': - expanding_f_result = expanding_f() - expanding_apply_f_result = x.expanding( - min_periods=0).apply(func=f) + # test consistency between different expanding_* moments + self._test_moments_consistency( + min_periods=min_periods, + count=lambda x: x.expanding().count(), + mean=lambda x: x.expanding( + min_periods=min_periods).mean(), + mock_mean=lambda x: x.expanding( + min_periods=min_periods).sum() / x.expanding().count(), + corr=lambda x, y: x.expanding( + min_periods=min_periods).corr(y), + var_unbiased=lambda x: x.expanding( + min_periods=min_periods).var(), + std_unbiased=lambda x: x.expanding( + min_periods=min_periods).std(), + cov_unbiased=lambda x, y: x.expanding( + min_periods=min_periods).cov(y), + var_biased=lambda x: x.expanding( + min_periods=min_periods).var(ddof=0), + std_biased=lambda x: x.expanding( + min_periods=min_periods).std(ddof=0), + cov_biased=lambda x, y: x.expanding( + min_periods=min_periods).cov(y, ddof=0), + var_debiasing_factors=lambda x: ( + x.expanding().count() / + (x.expanding().count() - 1.) + .replace(0., np.nan))) + + # test consistency between expanding_xyz() and either (a) + # expanding_apply of Series.xyz(), or (b) expanding_apply of + # np.nanxyz() + for (x, is_constant, no_nans) in self.data: + functions = self.base_functions + + # GH 8269 + if no_nans: + functions = self.base_functions + self.no_nan_functions + for (f, require_min_periods, name) in functions: + expanding_f = getattr( + x.expanding(min_periods=min_periods), name) + + if (require_min_periods and + (min_periods is not None) and + (min_periods < require_min_periods)): + continue + + if name == 'count': + expanding_f_result = expanding_f() + expanding_apply_f_result = x.expanding( + min_periods=0).apply(func=f) + else: + if name in ['cov', 'corr']: + expanding_f_result = expanding_f( + pairwise=False) else: - if name in ['cov', 'corr']: - expanding_f_result = expanding_f( - pairwise=False) - else: - expanding_f_result = expanding_f() - expanding_apply_f_result = x.expanding( - min_periods=min_periods).apply(func=f) - - if not tm._incompat_bottleneck_version(name): - assert_equal(expanding_f_result, - expanding_apply_f_result) + expanding_f_result = expanding_f() + expanding_apply_f_result = x.expanding( + min_periods=min_periods).apply(func=f) + + if not tm._incompat_bottleneck_version(name): + assert_equal(expanding_f_result, + expanding_apply_f_result) @pytest.mark.slow - def test_rolling_consistency(self): + @pytest.mark.parametrize( + 'window,min_periods,center', list(_rolling_consistency_cases())) + def test_rolling_consistency(self, window, min_periods, center): # suppress warnings about empty slices, as we are deliberately testing # with empty/0-length Series/DataFrames @@ -2359,100 +2371,91 @@ def test_rolling_consistency(self): message=".*(empty slice|0 for slice).*", category=RuntimeWarning) - def cases(): - for window in [1, 2, 3, 10, 20]: - for min_periods in set([0, 1, 2, 3, 4, window]): - if min_periods and (min_periods > window): - continue - for center in [False, True]: - yield window, min_periods, center - - for window, min_periods, center in cases(): - # test consistency between different rolling_* moments - self._test_moments_consistency( - min_periods=min_periods, - count=lambda x: ( - x.rolling(window=window, center=center) - .count()), - mean=lambda x: ( - x.rolling(window=window, min_periods=min_periods, - center=center).mean()), - mock_mean=lambda x: ( - x.rolling(window=window, - min_periods=min_periods, - center=center).sum() - .divide(x.rolling(window=window, - min_periods=min_periods, - center=center).count())), - corr=lambda x, y: ( - x.rolling(window=window, min_periods=min_periods, - center=center).corr(y)), - - var_unbiased=lambda x: ( - x.rolling(window=window, min_periods=min_periods, - center=center).var()), - - std_unbiased=lambda x: ( - x.rolling(window=window, min_periods=min_periods, - center=center).std()), - - cov_unbiased=lambda x, y: ( - x.rolling(window=window, min_periods=min_periods, - center=center).cov(y)), - - var_biased=lambda x: ( - x.rolling(window=window, min_periods=min_periods, - center=center).var(ddof=0)), - - std_biased=lambda x: ( - x.rolling(window=window, min_periods=min_periods, - center=center).std(ddof=0)), - - cov_biased=lambda x, y: ( - x.rolling(window=window, min_periods=min_periods, - center=center).cov(y, ddof=0)), - var_debiasing_factors=lambda x: ( - x.rolling(window=window, center=center).count() - .divide((x.rolling(window=window, center=center) - .count() - 1.) - .replace(0., np.nan)))) - - # test consistency between rolling_xyz() and either (a) - # rolling_apply of Series.xyz(), or (b) rolling_apply of - # np.nanxyz() - for (x, is_constant, no_nans) in self.data: - functions = self.base_functions - - # GH 8269 - if no_nans: - functions = self.base_functions + self.no_nan_functions - for (f, require_min_periods, name) in functions: - rolling_f = getattr( - x.rolling(window=window, center=center, - min_periods=min_periods), name) - - if require_min_periods and ( - min_periods is not None) and ( - min_periods < require_min_periods): - continue + # test consistency between different rolling_* moments + self._test_moments_consistency( + min_periods=min_periods, + count=lambda x: ( + x.rolling(window=window, center=center) + .count()), + mean=lambda x: ( + x.rolling(window=window, min_periods=min_periods, + center=center).mean()), + mock_mean=lambda x: ( + x.rolling(window=window, + min_periods=min_periods, + center=center).sum() + .divide(x.rolling(window=window, + min_periods=min_periods, + center=center).count())), + corr=lambda x, y: ( + x.rolling(window=window, min_periods=min_periods, + center=center).corr(y)), - if name == 'count': - rolling_f_result = rolling_f() - rolling_apply_f_result = x.rolling( - window=window, min_periods=0, - center=center).apply(func=f) + var_unbiased=lambda x: ( + x.rolling(window=window, min_periods=min_periods, + center=center).var()), + + std_unbiased=lambda x: ( + x.rolling(window=window, min_periods=min_periods, + center=center).std()), + + cov_unbiased=lambda x, y: ( + x.rolling(window=window, min_periods=min_periods, + center=center).cov(y)), + + var_biased=lambda x: ( + x.rolling(window=window, min_periods=min_periods, + center=center).var(ddof=0)), + + std_biased=lambda x: ( + x.rolling(window=window, min_periods=min_periods, + center=center).std(ddof=0)), + + cov_biased=lambda x, y: ( + x.rolling(window=window, min_periods=min_periods, + center=center).cov(y, ddof=0)), + var_debiasing_factors=lambda x: ( + x.rolling(window=window, center=center).count() + .divide((x.rolling(window=window, center=center) + .count() - 1.) + .replace(0., np.nan)))) + + # test consistency between rolling_xyz() and either (a) + # rolling_apply of Series.xyz(), or (b) rolling_apply of + # np.nanxyz() + for (x, is_constant, no_nans) in self.data: + functions = self.base_functions + + # GH 8269 + if no_nans: + functions = self.base_functions + self.no_nan_functions + for (f, require_min_periods, name) in functions: + rolling_f = getattr( + x.rolling(window=window, center=center, + min_periods=min_periods), name) + + if require_min_periods and ( + min_periods is not None) and ( + min_periods < require_min_periods): + continue + + if name == 'count': + rolling_f_result = rolling_f() + rolling_apply_f_result = x.rolling( + window=window, min_periods=0, + center=center).apply(func=f) + else: + if name in ['cov', 'corr']: + rolling_f_result = rolling_f( + pairwise=False) else: - if name in ['cov', 'corr']: - rolling_f_result = rolling_f( - pairwise=False) - else: - rolling_f_result = rolling_f() - rolling_apply_f_result = x.rolling( - window=window, min_periods=min_periods, - center=center).apply(func=f) - if not tm._incompat_bottleneck_version(name): - assert_equal(rolling_f_result, - rolling_apply_f_result) + rolling_f_result = rolling_f() + rolling_apply_f_result = x.rolling( + window=window, min_periods=min_periods, + center=center).apply(func=f) + if not tm._incompat_bottleneck_version(name): + assert_equal(rolling_f_result, + rolling_apply_f_result) # binary moments def test_rolling_cov(self): From 3b02e73b856a6f8d53382bf3908f04447bf90e03 Mon Sep 17 00:00:00 2001 From: Thomas A Caswell Date: Sat, 19 Aug 2017 17:59:19 -0400 Subject: [PATCH 901/933] FIX: define `DataFrame.items` for all versions of python (#17214) --- doc/source/whatsnew/v0.21.0.txt | 4 ++++ pandas/core/frame.py | 3 +-- pandas/core/series.py | 3 +-- pandas/tests/frame/test_api.py | 11 ++++++++++- pandas/tests/series/test_api.py | 10 ++++++++++ 5 files changed, 26 insertions(+), 5 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 6008ea5d4cbcd..c5fe89282bf52 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -128,6 +128,10 @@ Other Enhancements - :func:`DataFrame.add_prefix` and :func:`DataFrame.add_suffix` now accept strings containing the '%' character. (:issue:`17151`) - `read_*` methods can now infer compression from non-string paths, such as ``pathlib.Path`` objects (:issue:`17206`). - :func:`pd.read_sas()` now recognizes much more of the most frequently used date (datetime) formats in SAS7BDAT files (:issue:`15871`). +- :func:`DataFrame.items` and :func:`Series.items` is now present in both Python 2 and 3 and is lazy in all cases (:issue:`13918`, :issue:`17213`) + + + .. _whatsnew_0210.api_breaking: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 467ef52de234e..b5b3df64d24c0 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -802,8 +802,7 @@ def itertuples(self, index=True, name="Pandas"): # fallback to regular tuples return zip(*arrays) - if compat.PY3: # pragma: no cover - items = iteritems + items = iteritems def __len__(self): """Returns length of info axis, but here we use the index """ diff --git a/pandas/core/series.py b/pandas/core/series.py index c8282450b77a9..75dc3d6403650 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1110,8 +1110,7 @@ def iteritems(self): """ return zip(iter(self.index), iter(self)) - if compat.PY3: # pragma: no cover - items = iteritems + items = iteritems # ---------------------------------------------------------------------- # Misc public methods diff --git a/pandas/tests/frame/test_api.py b/pandas/tests/frame/test_api.py index 53a1b9525a0dd..a62fcb506a34b 100644 --- a/pandas/tests/frame/test_api.py +++ b/pandas/tests/frame/test_api.py @@ -171,7 +171,16 @@ def test_nonzero(self): def test_iteritems(self): df = self.klass([[1, 2, 3], [4, 5, 6]], columns=['a', 'a', 'b']) for k, v in compat.iteritems(df): - assert type(v) == self.klass._constructor_sliced + assert isinstance(v, self.klass._constructor_sliced) + + def test_items(self): + # issue #17213, #13918 + cols = ['a', 'b', 'c'] + df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=cols) + for c, (k, v) in zip(cols, df.items()): + assert c == k + assert isinstance(v, Series) + assert (df[k] == v).all() def test_iter(self): assert tm.equalContents(list(self.frame), self.frame.columns) diff --git a/pandas/tests/series/test_api.py b/pandas/tests/series/test_api.py index 8e22dd38030ee..b7fbe803f8d3b 100644 --- a/pandas/tests/series/test_api.py +++ b/pandas/tests/series/test_api.py @@ -301,6 +301,16 @@ def test_iteritems(self): # assert is lazy (genrators don't define reverse, lists do) assert not hasattr(self.series.iteritems(), 'reverse') + def test_items(self): + for idx, val in self.series.items(): + assert val == self.series[idx] + + for idx, val in self.ts.items(): + assert val == self.ts[idx] + + # assert is lazy (genrators don't define reverse, lists do) + assert not hasattr(self.series.items(), 'reverse') + def test_raise_on_info(self): s = Series(np.random.randn(10)) with pytest.raises(AttributeError): From 58d872903449b8a29237288ade6227cdb280fe18 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Sun, 20 Aug 2017 16:25:43 -0500 Subject: [PATCH 902/933] PERF: Update ASV publish config (#17293) Stricter cutoffs for considering regressions [ci skip] --- asv_bench/asv.conf.json | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/asv_bench/asv.conf.json b/asv_bench/asv.conf.json index 59c05400d06b0..ced4f2b12445f 100644 --- a/asv_bench/asv.conf.json +++ b/asv_bench/asv.conf.json @@ -117,8 +117,10 @@ // with results. If the commit is `null`, regression detection is // skipped for the matching benchmark. // - // "regressions_first_commits": { - // "some_benchmark": "352cdf", // Consider regressions only after this commit - // "another_benchmark": null, // Skip regression detection altogether - // } + "regressions_first_commits": { + "*": "v0.20.0" + }, + "regression_thresholds": { + "*": 0.05 + } } From e14431f897c7c0afd76d627ba933c07c277f8deb Mon Sep 17 00:00:00 2001 From: Yosuke Nakabayashi Date: Mon, 21 Aug 2017 09:50:44 +0200 Subject: [PATCH 903/933] DOC: Expand docstrings for head / tail methods (#16941) --- pandas/core/generic.py | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 5a7f37bba91aa..d9d75c870b20c 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2978,14 +2978,36 @@ def filter(self, items=None, like=None, regex=None, axis=None): def head(self, n=5): """ - Returns first n rows + Return the first n rows. + + Parameters + ---------- + n : int, default 5 + Number of rows to select. + + Returns + ------- + obj_head : type of caller + The first n rows of the caller object. """ + return self.iloc[:n] def tail(self, n=5): """ - Returns last n rows + Return the last n rows. + + Parameters + ---------- + n : int, default 5 + Number of rows to select. + + Returns + ------- + obj_tail : type of caller + The last n rows of the caller object. """ + if n == 0: return self.iloc[0:0] return self.iloc[-n:] From 8354a1dfa9073eab1b120d39be31103fc29394bb Mon Sep 17 00:00:00 2001 From: gfyoung Date: Mon, 21 Aug 2017 00:56:39 -0700 Subject: [PATCH 904/933] MAINT: Use set literal for unsupported + depr args Initializes unsupported and deprecated argument sets with set literals instead of the set constructor in pandas/io/parsers.py, as the former is slightly faster than the latter. --- pandas/io/parsers.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 05a04f268f72b..a9821be3fa5e2 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -487,18 +487,18 @@ def _read(filepath_or_buffer, kwds): 'widths': None, } -_c_unsupported = set(['skipfooter']) -_python_unsupported = set([ +_c_unsupported = {'skipfooter'} +_python_unsupported = { 'low_memory', 'buffer_lines', 'float_precision', -]) -_deprecated_args = set([ +} +_deprecated_args = { 'as_recarray', 'buffer_lines', 'compact_ints', 'use_unsigned', -]) +} def _make_parser_function(name, sep=','): From 91245a758ee32658c66bdecd9556f7054cd99901 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Mon, 21 Aug 2017 01:14:50 -0700 Subject: [PATCH 905/933] DOC: Add proper docstring to maybe_convert_indices Patches several spelling errors and expands current doc to a proper doc-string. --- pandas/core/indexing.py | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 109183827de4e..929c2346ba5b0 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1985,9 +1985,31 @@ def get_indexer(_i, _idx): def maybe_convert_indices(indices, n): - """ if we have negative indicies, translate to postive here - if have indicies that are out-of-bounds, raise an IndexError """ + Attempt to convert indices into valid, positive indices. + + If we have negative indices, translate to positive here. + If we have indices that are out-of-bounds, raise an IndexError. + + Parameters + ---------- + indices : array-like + The array of indices that we are to convert. + n : int + The number of elements in the array that we are indexing. + + Returns + ------- + valid_indices : array-like + An array-like of positive indices that correspond to the ones + that were passed in initially to this function. + + Raises + ------ + IndexError : one of the converted indices either exceeded the number + of elements (specified by `n`) OR was still negative. + """ + if isinstance(indices, list): indices = np.array(indices) if len(indices) == 0: From d0d28fec180ee61de17921fe5068ecde95adae8a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?agust=C3=ADn=20m=C3=A9ndez?= Date: Mon, 21 Aug 2017 10:27:24 +0200 Subject: [PATCH 906/933] DOC: Improving docstring of take method (#16948) --- pandas/core/generic.py | 67 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 63 insertions(+), 4 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index d9d75c870b20c..c83b1073afc8e 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2063,18 +2063,77 @@ def __delitem__(self, key): def take(self, indices, axis=0, convert=True, is_copy=True, **kwargs): """ - Analogous to ndarray.take + Return the elements in the given *positional* indices along an axis. + + This means that we are not indexing according to actual values in + the index attribute of the object. We are indexing according to the + actual position of the element in the object. Parameters ---------- - indices : list / array of ints + indices : array-like + An array of ints indicating which positions to take. axis : int, default 0 - convert : translate neg to pos indices (default) - is_copy : mark the returned frame as a copy + The axis on which to select elements. "0" means that we are + selecting rows, "1" means that we are selecting columns, etc. + convert : bool, default True + Whether to convert negative indices to positive ones, just as with + indexing into Python lists. For example, if `-1` was passed in, + this index would be converted ``n - 1``. + is_copy : bool, default True + Whether to return a copy of the original object or not. + + Examples + -------- + >>> df = pd.DataFrame([('falcon', 'bird', 389.0), + ('parrot', 'bird', 24.0), + ('lion', 'mammal', 80.5), + ('monkey', 'mammal', np.nan)], + columns=('name', 'class', 'max_speed'), + index=[0, 2, 3, 1]) + >>> df + name class max_speed + 0 falcon bird 389.0 + 2 parrot bird 24.0 + 3 lion mammal 80.5 + 1 monkey mammal NaN + + Take elements at positions 0 and 3 along the axis 0 (default). + + Note how the actual indices selected (0 and 1) do not correspond to + our selected indices 0 and 3. That's because we are selecting the 0th + and 3rd rows, not rows whose indices equal 0 and 3. + + >>> df.take([0, 3]) + 0 falcon bird 389.0 + 1 monkey mammal NaN + + Take elements at indices 1 and 2 along the axis 1 (column selection). + + >>> df.take([1, 2], axis=1) + class max_speed + 0 bird 389.0 + 2 bird 24.0 + 3 mammal 80.5 + 1 mammal NaN + + We may take elements using negative integers for positive indices, + starting from the end of the object, just like with Python lists. + + >>> df.take([-1, -2]) + name class max_speed + 1 monkey mammal NaN + 3 lion mammal 80.5 Returns ------- taken : type of caller + An array-like containing the elements taken from the object. + + See Also + -------- + numpy.ndarray.take + numpy.take """ nv.validate_take(tuple(), kwargs) self._consolidate_inplace() From 91c2f1f6acde8e5f571d12716e72327747183247 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Mon, 21 Aug 2017 14:39:50 -0500 Subject: [PATCH 907/933] BUG: Fixed regex in asv.conf.json (#17300) In https://github.com/pandas-dev/pandas/pull/17293 I messed up the syntax. I used a glob instead of a regex. According to the docs at http://asv.readthedocs.io/en/latest/asv.conf.json.html#regressions-thresholds we want to use a regex. I've actually manually tested this change and verified that it works. [ci skip] --- asv_bench/asv.conf.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/asv_bench/asv.conf.json b/asv_bench/asv.conf.json index ced4f2b12445f..9c333f62810f4 100644 --- a/asv_bench/asv.conf.json +++ b/asv_bench/asv.conf.json @@ -118,9 +118,9 @@ // skipped for the matching benchmark. // "regressions_first_commits": { - "*": "v0.20.0" + ".*": "v0.20.0" }, "regression_thresholds": { - "*": 0.05 + ".*": 0.05 } } From eff1f889d26fb47467124b103cb70045f85fdf84 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 21 Aug 2017 16:49:17 -0700 Subject: [PATCH 908/933] Remove unnecessary usage of _TSObject (#17297) --- pandas/_libs/period.pyx | 20 -------------------- pandas/_libs/src/datetime.pxd | 32 -------------------------------- pandas/_libs/tslib.pyx | 35 ++++++----------------------------- 3 files changed, 6 insertions(+), 81 deletions(-) diff --git a/pandas/_libs/period.pyx b/pandas/_libs/period.pyx index e017d863e1907..6ba7ec0270f30 100644 --- a/pandas/_libs/period.pyx +++ b/pandas/_libs/period.pyx @@ -120,26 +120,6 @@ initialize_daytime_conversion_factor_matrix() # Period logic #---------------------------------------------------------------------- -cdef inline int64_t apply_mult(int64_t period_ord, int64_t mult): - """ - Get freq+multiple ordinal value from corresponding freq-only ordinal value. - For example, 5min ordinal will be 1/5th the 1min ordinal (rounding down to - integer). - """ - if mult == 1: - return period_ord - - return (period_ord - 1) // mult - -cdef inline int64_t remove_mult(int64_t period_ord_w_mult, int64_t mult): - """ - Get freq-only ordinal value from corresponding freq+multiple ordinal. - """ - if mult == 1: - return period_ord_w_mult - - return period_ord_w_mult * mult + 1; - @cython.wraparound(False) @cython.boundscheck(False) diff --git a/pandas/_libs/src/datetime.pxd b/pandas/_libs/src/datetime.pxd index 2267c8282ec14..23620e790c132 100644 --- a/pandas/_libs/src/datetime.pxd +++ b/pandas/_libs/src/datetime.pxd @@ -88,11 +88,6 @@ cdef extern from "datetime/np_datetime.h": int cmp_pandas_datetimestruct(pandas_datetimestruct *a, pandas_datetimestruct *b) - int convert_pydatetime_to_datetimestruct(PyObject *obj, - pandas_datetimestruct *out, - PANDAS_DATETIMEUNIT *out_bestunit, - int apply_tzinfo) - npy_datetime pandas_datetimestruct_to_datetime(PANDAS_DATETIMEUNIT fr, pandas_datetimestruct *d) nogil void pandas_datetime_to_datetimestruct(npy_datetime val, @@ -112,12 +107,6 @@ cdef extern from "datetime/np_datetime_strings.h": PANDAS_DATETIMEUNIT *out_bestunit, npy_bool *out_special) - int make_iso_8601_datetime(pandas_datetimestruct *dts, char *outstr, int outlen, - int local, PANDAS_DATETIMEUNIT base, int tzoffset, - NPY_CASTING casting) - - int get_datetime_iso_8601_strlen(int local, PANDAS_DATETIMEUNIT base) - # int parse_python_string(object obj, pandas_datetimestruct *out) except -1 @@ -152,16 +141,6 @@ cdef inline int _cstring_to_dts(char *val, int length, return result -cdef inline object _datetime64_to_datetime(int64_t val): - cdef pandas_datetimestruct dts - pandas_datetime_to_datetimestruct(val, PANDAS_FR_ns, &dts) - return _dts_to_pydatetime(&dts) - -cdef inline object _dts_to_pydatetime(pandas_datetimestruct *dts): - return PyDateTime_FromDateAndTime(dts.year, dts.month, - dts.day, dts.hour, - dts.min, dts.sec, dts.us) - cdef inline int64_t _pydatetime_to_dts(object val, pandas_datetimestruct *dts): dts.year = PyDateTime_GET_YEAR(val) dts.month = PyDateTime_GET_MONTH(val) @@ -173,17 +152,6 @@ cdef inline int64_t _pydatetime_to_dts(object val, pandas_datetimestruct *dts): dts.ps = dts.as = 0 return pandas_datetimestruct_to_datetime(PANDAS_FR_ns, dts) -cdef inline int64_t _dtlike_to_datetime64(object val, - pandas_datetimestruct *dts): - dts.year = val.year - dts.month = val.month - dts.day = val.day - dts.hour = val.hour - dts.min = val.minute - dts.sec = val.second - dts.us = val.microsecond - dts.ps = dts.as = 0 - return pandas_datetimestruct_to_datetime(PANDAS_FR_ns, dts) cdef inline int64_t _date_to_datetime64(object val, pandas_datetimestruct *dts): diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 32b8c92a50269..c4a38ec660a4c 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -705,7 +705,6 @@ class Timestamp(_Timestamp): pandas_datetimestruct dts int64_t value object _tzinfo, result, k, v - _TSObject ts # set to naive if needed _tzinfo = self.tzinfo @@ -1009,10 +1008,6 @@ def unique_deltas(ndarray[int64_t] arr): return result -cdef inline bint _is_multiple(int64_t us, int64_t mult): - return us % mult == 0 - - cdef inline bint _cmp_scalar(int64_t lhs, int64_t rhs, int op) except -1: if op == Py_EQ: return lhs == rhs @@ -4694,7 +4689,6 @@ def get_date_field(ndarray[int64_t] dtindex, object field): field and return an array of these values. """ cdef: - _TSObject ts Py_ssize_t i, count = 0 ndarray[int32_t] out ndarray[int32_t, ndim=2] _month_offset @@ -4876,7 +4870,6 @@ def get_start_end_field(ndarray[int64_t] dtindex, object field, (defined by frequency). """ cdef: - _TSObject ts Py_ssize_t i int count = 0 bint is_business = 0 @@ -4925,9 +4918,8 @@ def get_start_end_field(ndarray[int64_t] dtindex, object field, pandas_datetime_to_datetimestruct( dtindex[i], PANDAS_FR_ns, &dts) - ts = convert_to_tsobject(dtindex[i], None, None, 0, 0) dom = dts.day - dow = ts_dayofweek(ts) + dow = dayofweek(dts.year, dts.month, dts.day) if (dom == 1 and dow < 5) or (dom <= 3 and dow == 0): out[i] = 1 @@ -4951,13 +4943,12 @@ def get_start_end_field(ndarray[int64_t] dtindex, object field, pandas_datetime_to_datetimestruct( dtindex[i], PANDAS_FR_ns, &dts) - ts = convert_to_tsobject(dtindex[i], None, None, 0, 0) isleap = is_leapyear(dts.year) mo_off = _month_offset[isleap, dts.month - 1] dom = dts.day doy = mo_off + dom ldom = _month_offset[isleap, dts.month] - dow = ts_dayofweek(ts) + dow = dayofweek(dts.year, dts.month, dts.day) if (ldom == doy and dow < 5) or ( dow == 4 and (ldom - doy <= 2)): @@ -4986,9 +4977,8 @@ def get_start_end_field(ndarray[int64_t] dtindex, object field, pandas_datetime_to_datetimestruct( dtindex[i], PANDAS_FR_ns, &dts) - ts = convert_to_tsobject(dtindex[i], None, None, 0, 0) dom = dts.day - dow = ts_dayofweek(ts) + dow = dayofweek(dts.year, dts.month, dts.day) if ((dts.month - start_month) % 3 == 0) and ( (dom == 1 and dow < 5) or (dom <= 3 and dow == 0)): @@ -5013,13 +5003,12 @@ def get_start_end_field(ndarray[int64_t] dtindex, object field, pandas_datetime_to_datetimestruct( dtindex[i], PANDAS_FR_ns, &dts) - ts = convert_to_tsobject(dtindex[i], None, None, 0, 0) isleap = is_leapyear(dts.year) mo_off = _month_offset[isleap, dts.month - 1] dom = dts.day doy = mo_off + dom ldom = _month_offset[isleap, dts.month] - dow = ts_dayofweek(ts) + dow = dayofweek(dts.year, dts.month, dts.day) if ((dts.month - end_month) % 3 == 0) and ( (ldom == doy and dow < 5) or ( @@ -5049,9 +5038,8 @@ def get_start_end_field(ndarray[int64_t] dtindex, object field, pandas_datetime_to_datetimestruct( dtindex[i], PANDAS_FR_ns, &dts) - ts = convert_to_tsobject(dtindex[i], None, None, 0, 0) dom = dts.day - dow = ts_dayofweek(ts) + dow = dayofweek(dts.year, dts.month, dts.day) if (dts.month == start_month) and ( (dom == 1 and dow < 5) or (dom <= 3 and dow == 0)): @@ -5076,12 +5064,11 @@ def get_start_end_field(ndarray[int64_t] dtindex, object field, pandas_datetime_to_datetimestruct( dtindex[i], PANDAS_FR_ns, &dts) - ts = convert_to_tsobject(dtindex[i], None, None, 0, 0) isleap = is_leapyear(dts.year) dom = dts.day mo_off = _month_offset[isleap, dts.month - 1] doy = mo_off + dom - dow = ts_dayofweek(ts) + dow = dayofweek(dts.year, dts.month, dts.day) ldom = _month_offset[isleap, dts.month] if (dts.month == end_month) and ( @@ -5095,7 +5082,6 @@ def get_start_end_field(ndarray[int64_t] dtindex, object field, pandas_datetime_to_datetimestruct( dtindex[i], PANDAS_FR_ns, &dts) - ts = convert_to_tsobject(dtindex[i], None, None, 0, 0) isleap = is_leapyear(dts.year) mo_off = _month_offset[isleap, dts.month - 1] dom = dts.day @@ -5117,7 +5103,6 @@ def get_date_name_field(ndarray[int64_t] dtindex, object field): name based on requested field (e.g. weekday_name) """ cdef: - _TSObject ts Py_ssize_t i, count = 0 ndarray[object] out pandas_datetimestruct dts @@ -5143,10 +5128,6 @@ def get_date_name_field(ndarray[int64_t] dtindex, object field): raise ValueError("Field %s not supported" % field) -cdef inline int m8_weekday(int64_t val): - ts = convert_to_tsobject(val, None, None, 0, 0) - return ts_dayofweek(ts) - cdef int64_t DAY_NS = 86400000000000LL @@ -5156,11 +5137,9 @@ def date_normalize(ndarray[int64_t] stamps, tz=None): cdef: Py_ssize_t i, n = len(stamps) pandas_datetimestruct dts - _TSObject tso ndarray[int64_t] result = np.empty(n, dtype=np.int64) if tz is not None: - tso = _TSObject() tz = maybe_get_tz(tz) result = _normalize_local(stamps, tz) else: @@ -5305,8 +5284,6 @@ def monthrange(int64_t year, int64_t month): return (dayofweek(year, month, 1), days) -cdef inline int64_t ts_dayofweek(_TSObject ts): - return dayofweek(ts.dts.year, ts.dts.month, ts.dts.day) cdef inline int days_in_month(pandas_datetimestruct dts) nogil: return days_per_month_table[is_leapyear(dts.year)][dts.month -1] From 910207ffe518413e84cfa95d772cb66d57a0d08e Mon Sep 17 00:00:00 2001 From: Michael Gasvoda Date: Mon, 21 Aug 2017 19:51:18 -0400 Subject: [PATCH 909/933] BUG: clip should handle null values closes #17276 Author: Michael Gasvoda Author: mgasvoda Closes #17288 from mgasvoda/master and squashes the following commits: a1dbdf293 [mgasvoda] Merge branch 'master' into master 9333952c2 [Michael Gasvoda] Checking output of tests 4e0464eaf [Michael Gasvoda] fixing whatsnew text c44204080 [Michael Gasvoda] formatting fixes 7e2367879 [Michael Gasvoda] formatting updates 781ea724a [Michael Gasvoda] whatsnew entry d9627fe4c [Michael Gasvoda] adding clip tests 9aa0159e9 [Michael Gasvoda] Treating na values as none for clips --- doc/source/whatsnew/v0.21.0.txt | 2 +- pandas/core/generic.py | 12 ++++++++---- pandas/tests/frame/test_analytics.py | 26 ++++++++++---------------- pandas/tests/series/test_analytics.py | 11 +++++++++++ 4 files changed, 30 insertions(+), 21 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index c5fe89282bf52..0d2c52c70b345 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -132,7 +132,6 @@ Other Enhancements - .. _whatsnew_0210.api_breaking: Backwards incompatible API changes @@ -384,6 +383,7 @@ Reshaping Numeric ^^^^^^^ - Bug in ``.clip()`` with ``axis=1`` and a list-like for ``threshold`` is passed; previously this raised ``ValueError`` (:issue:`15390`) +- :func:`Series.clip()` and :func:`DataFrame.clip()` now treat NA values for upper and lower arguments as ``None`` instead of raising ``ValueError`` (:issue:`17276`). Categorical diff --git a/pandas/core/generic.py b/pandas/core/generic.py index c83b1073afc8e..5c9e1f22ddd20 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -4741,9 +4741,6 @@ def _clip_with_one_bound(self, threshold, method, axis, inplace): if axis is not None: axis = self._get_axis_number(axis) - if np.any(isna(threshold)): - raise ValueError("Cannot use an NA value as a clip threshold") - # method is self.le for upper bound and self.ge for lower bound if is_scalar(threshold) and is_number(threshold): if method.__name__ == 'le': @@ -4823,6 +4820,14 @@ def clip(self, lower=None, upper=None, axis=None, inplace=False, axis = nv.validate_clip_with_axis(axis, args, kwargs) + # GH 17276 + # numpy doesn't like NaN as a clip value + # so ignore + if np.any(pd.isnull(lower)): + lower = None + if np.any(pd.isnull(upper)): + upper = None + # GH 2747 (arguments were reversed) if lower is not None and upper is not None: if is_scalar(lower) and is_scalar(upper): @@ -4839,7 +4844,6 @@ def clip(self, lower=None, upper=None, axis=None, inplace=False, if upper is not None: if inplace: result = self - result = result.clip_upper(upper, axis, inplace=inplace) return result diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index 484a09f11b58a..93514a8a42215 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -1931,22 +1931,16 @@ def test_clip_against_frame(self, axis): tm.assert_frame_equal(clipped_df[ub_mask], ub[ub_mask]) tm.assert_frame_equal(clipped_df[mask], df[mask]) - def test_clip_na(self): - msg = "Cannot use an NA" - with tm.assert_raises_regex(ValueError, msg): - self.frame.clip(lower=np.nan) - - with tm.assert_raises_regex(ValueError, msg): - self.frame.clip(lower=[np.nan]) - - with tm.assert_raises_regex(ValueError, msg): - self.frame.clip(upper=np.nan) - - with tm.assert_raises_regex(ValueError, msg): - self.frame.clip(upper=[np.nan]) - - with tm.assert_raises_regex(ValueError, msg): - self.frame.clip(lower=np.nan, upper=np.nan) + def test_clip_with_na_args(self): + """Should process np.nan argument as None """ + # GH # 17276 + tm.assert_frame_equal(self.frame.clip(np.nan), self.frame) + tm.assert_frame_equal(self.frame.clip(upper=[1, 2, np.nan]), + self.frame) + tm.assert_frame_equal(self.frame.clip(lower=[1, np.nan, 3]), + self.frame) + tm.assert_frame_equal(self.frame.clip(upper=np.nan, lower=np.nan), + self.frame) # Matrix-like diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index 44da0968d7024..f1d044f7a1132 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -1000,6 +1000,17 @@ def test_clip_types_and_nulls(self): assert list(isna(s)) == list(isna(l)) assert list(isna(s)) == list(isna(u)) + def test_clip_with_na_args(self): + """Should process np.nan argument as None """ + # GH # 17276 + s = Series([1, 2, 3]) + + assert_series_equal(s.clip(np.nan), Series([1, 2, 3])) + assert_series_equal(s.clip(upper=[1, 1, np.nan]), Series([1, 2, 3])) + assert_series_equal(s.clip(lower=[1, np.nan, 1]), Series([1, 2, 3])) + assert_series_equal(s.clip(upper=np.nan, lower=np.nan), + Series([1, 2, 3])) + def test_clip_against_series(self): # GH #6966 From a4c4edeb2a7e5c84b5a82a9743a12a4b66e7bcf1 Mon Sep 17 00:00:00 2001 From: ante328 Date: Tue, 22 Aug 2017 01:55:10 +0200 Subject: [PATCH 910/933] BUG: fillna returns frame when inplace=True if value is a dict (#16156) (#17279) --- doc/source/whatsnew/v0.21.0.txt | 2 +- pandas/core/generic.py | 3 ++- pandas/tests/frame/test_missing.py | 3 +++ 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 0d2c52c70b345..dd06114f6abd3 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -318,7 +318,7 @@ Conversion - Fix :func:`DataFrame.memory_usage` to support PyPy. Objects on PyPy do not have a fixed size, so an approximation is used instead (:issue:`17228`) - Fixed the return type of ``IntervalIndex.is_non_overlapping_monotonic`` to be a Python ``bool`` for consistency with similar attributes/methods. Previously returned a ``numpy.bool_``. (:issue:`17237`) - Bug in ``IntervalIndex.is_non_overlapping_monotonic`` when intervals are closed on both sides and overlap at a point (:issue:`16560`) - +- Bug in :func:`Series.fillna` returns frame when ``inplace=True`` and ``value`` is dict (:issue:`16156`) Indexing ^^^^^^^^ diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 5c9e1f22ddd20..e84e4eac3f34d 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -4135,7 +4135,8 @@ def fillna(self, value=None, method=None, axis=None, inplace=False, continue obj = result[k] obj.fillna(v, limit=limit, inplace=True, downcast=downcast) - return result + return result if not inplace else None + elif not is_list_like(value): new_data = self._data.fillna(value=value, limit=limit, inplace=inplace, diff --git a/pandas/tests/frame/test_missing.py b/pandas/tests/frame/test_missing.py index 77f0357685cab..ebd15b3180a33 100644 --- a/pandas/tests/frame/test_missing.py +++ b/pandas/tests/frame/test_missing.py @@ -407,6 +407,9 @@ def test_fillna_inplace(self): df.fillna(value=0, inplace=True) tm.assert_frame_equal(df, expected) + expected = df.fillna(value={0: 0}, inplace=True) + assert expected is None + df[1][:4] = np.nan df[3][-4:] = np.nan expected = df.fillna(method='ffill') From 2f00159da32c85c3b30b433f78a43e47677711a3 Mon Sep 17 00:00:00 2001 From: Pietro Battiston Date: Tue, 22 Aug 2017 10:11:10 +0200 Subject: [PATCH 911/933] CLN: Index.append() refactoring (#16236) --- pandas/core/dtypes/concat.py | 48 ++++++++++++++++++++++- pandas/core/indexes/base.py | 11 +++--- pandas/core/indexes/category.py | 6 ++- pandas/core/indexes/datetimelike.py | 2 +- pandas/core/indexes/interval.py | 4 +- pandas/core/indexes/range.py | 59 ++--------------------------- 6 files changed, 63 insertions(+), 67 deletions(-) diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index 292d5f608d4cb..0ce45eea119ed 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -19,7 +19,7 @@ _TD_DTYPE) from pandas.core.dtypes.generic import ( ABCDatetimeIndex, ABCTimedeltaIndex, - ABCPeriodIndex) + ABCPeriodIndex, ABCRangeIndex) def get_dtype_kinds(l): @@ -41,6 +41,8 @@ def get_dtype_kinds(l): typ = 'category' elif is_sparse(arr): typ = 'sparse' + elif isinstance(arr, ABCRangeIndex): + typ = 'range' elif is_datetimetz(arr): # if to_concat contains different tz, # the result must be object dtype @@ -559,3 +561,47 @@ def convert_sparse(x, axis): # coerce to object if needed result = result.astype('object') return result + + +def _concat_rangeindex_same_dtype(indexes): + """ + Concatenates multiple RangeIndex instances. All members of "indexes" must + be of type RangeIndex; result will be RangeIndex if possible, Int64Index + otherwise. E.g.: + indexes = [RangeIndex(3), RangeIndex(3, 6)] -> RangeIndex(6) + indexes = [RangeIndex(3), RangeIndex(4, 6)] -> Int64Index([0,1,2,4,5]) + """ + + start = step = next = None + + for obj in indexes: + if not len(obj): + continue + + if start is None: + # This is set by the first non-empty index + start = obj._start + if step is None and len(obj) > 1: + step = obj._step + elif step is None: + # First non-empty index had only one element + if obj._start == start: + return _concat_index_asobject(indexes) + step = obj._start - start + + non_consecutive = ((step != obj._step and len(obj) > 1) or + (next is not None and obj._start != next)) + if non_consecutive: + # Int64Index._append_same_dtype([ix.astype(int) for ix in indexes]) + # would be preferred... but it currently resorts to + # _concat_index_asobject anyway. + return _concat_index_asobject(indexes) + + if step is not None: + next = obj[-1] + step + + if start is None: + start = obj._start + step = obj._step + stop = obj._stop if next is None else next + return indexes[0].__class__(start, stop, step) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index de6221987a59a..a21e6df3ffc93 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1745,18 +1745,17 @@ def append(self, other): names = set([obj.name for obj in to_concat]) name = None if len(names) > 1 else self.name - if self.is_categorical(): - # if calling index is category, don't check dtype of others - from pandas.core.indexes.category import CategoricalIndex - return CategoricalIndex._append_same_dtype(self, to_concat, name) + return self._concat(to_concat, name) + + def _concat(self, to_concat, name): typs = _concat.get_dtype_kinds(to_concat) if len(typs) == 1: - return self._append_same_dtype(to_concat, name=name) + return self._concat_same_dtype(to_concat, name=name) return _concat._concat_index_asobject(to_concat, name=name) - def _append_same_dtype(self, to_concat, name): + def _concat_same_dtype(self, to_concat, name): """ Concatenate to_concat which has the same class """ diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index ac4698b570d17..f22407308e094 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -633,7 +633,11 @@ def insert(self, loc, item): codes = np.concatenate((codes[:loc], code, codes[loc:])) return self._create_from_codes(codes) - def _append_same_dtype(self, to_concat, name): + def _concat(self, to_concat, name): + # if calling index is category, don't check dtype of others + return CategoricalIndex._concat_same_dtype(self, to_concat, name) + + def _concat_same_dtype(self, to_concat, name): """ Concatenate to_concat which has the same class ValueError if other is not in the categories diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 845c71b6c41d8..c3232627fce74 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -837,7 +837,7 @@ def summary(self, name=None): result = result.replace("'", "") return result - def _append_same_dtype(self, to_concat, name): + def _concat_same_dtype(self, to_concat, name): """ Concatenate to_concat which has the same class """ diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index e90378184e3f3..e0ed6c7ea35c0 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -876,7 +876,7 @@ def _as_like_interval_index(self, other, error_msg): raise ValueError(error_msg) return other - def _append_same_dtype(self, to_concat, name): + def _concat_same_dtype(self, to_concat, name): """ assert that we all have the same .closed we allow a 0-len index here as well @@ -885,7 +885,7 @@ def _append_same_dtype(self, to_concat, name): msg = ('can only append two IntervalIndex objects ' 'that are closed on the same side') raise ValueError(msg) - return super(IntervalIndex, self)._append_same_dtype(to_concat, name) + return super(IntervalIndex, self)._concat_same_dtype(to_concat, name) @Appender(_index_shared_docs['take'] % _index_doc_kwargs) def take(self, indices, axis=0, allow_fill=True, diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index ac4cc6986cace..82412d3a7ef57 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -14,6 +14,7 @@ from pandas.compat.numpy import function as nv from pandas.core.indexes.base import Index, _index_shared_docs from pandas.util._decorators import Appender, cache_readonly +import pandas.core.dtypes.concat as _concat import pandas.core.indexes.base as ibase from pandas.core.indexes.numeric import Int64Index @@ -447,62 +448,8 @@ def join(self, other, how='left', level=None, return_indexers=False, return super(RangeIndex, self).join(other, how, level, return_indexers, sort) - def append(self, other): - """ - Append a collection of Index options together - - Parameters - ---------- - other : Index or list/tuple of indices - - Returns - ------- - appended : RangeIndex if all indexes are consecutive RangeIndexes, - otherwise Int64Index or Index - """ - - to_concat = [self] - - if isinstance(other, (list, tuple)): - to_concat = to_concat + list(other) - else: - to_concat.append(other) - - if not all([isinstance(i, RangeIndex) for i in to_concat]): - return super(RangeIndex, self).append(other) - - start = step = next = None - - for obj in to_concat: - if not len(obj): - continue - - if start is None: - # This is set by the first non-empty index - start = obj._start - if step is None and len(obj) > 1: - step = obj._step - elif step is None: - # First non-empty index had only one element - if obj._start == start: - return super(RangeIndex, self).append(other) - step = obj._start - start - - non_consecutive = ((step != obj._step and len(obj) > 1) or - (next is not None and obj._start != next)) - if non_consecutive: - return super(RangeIndex, self).append(other) - - if step is not None: - next = obj[-1] + step - - if start is None: - start = obj._start - step = obj._step - stop = obj._stop if next is None else next - names = set([obj.name for obj in to_concat]) - name = None if len(names) > 1 else self.name - return RangeIndex(start, stop, step, name=name) + def _concat_same_dtype(self, indexes, name): + return _concat._concat_rangeindex_same_dtype(indexes).rename(name) def __len__(self): """ From 870b6a6d6415c76d051b287adcb180ac3020b6e8 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 22 Aug 2017 05:50:57 -0400 Subject: [PATCH 912/933] DEPS: set min versions (#17002) closes #15206, numpy >= 1.9 closes #15543, matplotlib >= 1.4.3 scipy >= 0.14.0 --- .travis.yml | 6 +- ci/install_travis.sh | 2 +- ci/requirements-2.7_COMPAT.build | 2 +- ci/requirements-2.7_COMPAT.run | 9 +- ci/requirements-2.7_LOCALE.build | 2 +- ci/requirements-2.7_LOCALE.run | 5 +- ci/requirements-2.7_SLOW.build | 2 +- ci/requirements-2.7_SLOW.run | 4 +- ci/script_multi.sh | 6 + ci/script_single.sh | 8 + doc/source/install.rst | 6 +- doc/source/whatsnew/v0.21.0.txt | 22 ++- pandas/_libs/sparse.pyx | 2 - pandas/compat/numpy/__init__.py | 14 +- pandas/core/algorithms.py | 7 +- pandas/core/generic.py | 5 +- pandas/core/groupby.py | 8 +- pandas/core/internals.py | 16 +- pandas/tests/frame/test_quantile.py | 42 ----- pandas/tests/frame/test_rank.py | 12 +- .../tests/indexes/datetimes/test_datetime.py | 8 +- pandas/tests/indexes/period/test_indexing.py | 34 ++-- .../indexes/timedeltas/test_timedelta.py | 8 +- pandas/tests/plotting/common.py | 3 +- pandas/tests/plotting/test_datetimelike.py | 2 + pandas/tests/plotting/test_frame.py | 163 ++++++++++-------- pandas/tests/plotting/test_misc.py | 45 +---- pandas/tests/plotting/test_series.py | 12 ++ pandas/tests/series/test_operators.py | 16 +- pandas/tests/series/test_quantile.py | 27 +-- pandas/tests/series/test_rank.py | 9 +- pandas/tests/sparse/test_array.py | 7 +- pandas/tests/test_nanops.py | 18 +- pandas/tests/test_resample.py | 2 +- pandas/tests/tools/test_numeric.py | 5 +- setup.py | 2 +- 36 files changed, 221 insertions(+), 320 deletions(-) diff --git a/.travis.yml b/.travis.yml index 897d31cf23a3b..034e2a32bb75c 100644 --- a/.travis.yml +++ b/.travis.yml @@ -37,7 +37,7 @@ matrix: - JOB="3.5_OSX" TEST_ARGS="--skip-slow --skip-network" - dist: trusty env: - - JOB="2.7_LOCALE" TEST_ARGS="--only-slow --skip-network" LOCALE_OVERRIDE="zh_CN.UTF-8" + - JOB="2.7_LOCALE" LOCALE_OVERRIDE="zh_CN.UTF-8" SLOW=true addons: apt: packages: @@ -62,7 +62,7 @@ matrix: # In allow_failures - dist: trusty env: - - JOB="2.7_SLOW" TEST_ARGS="--only-slow --skip-network" + - JOB="2.7_SLOW" SLOW=true # In allow_failures - dist: trusty env: @@ -82,7 +82,7 @@ matrix: allow_failures: - dist: trusty env: - - JOB="2.7_SLOW" TEST_ARGS="--only-slow --skip-network" + - JOB="2.7_SLOW" SLOW=true - dist: trusty env: - JOB="2.7_BUILD_TEST" TEST_ARGS="--skip-slow" BUILD_TEST=true diff --git a/ci/install_travis.sh b/ci/install_travis.sh index ad8f0bdd8a597..d26689f2e6b4b 100755 --- a/ci/install_travis.sh +++ b/ci/install_travis.sh @@ -47,7 +47,7 @@ which conda echo echo "[update conda]" conda config --set ssl_verify false || exit 1 -conda config --set always_yes true --set changeps1 false || exit 1 +conda config --set quiet true --set always_yes true --set changeps1 false || exit 1 conda update -q conda echo diff --git a/ci/requirements-2.7_COMPAT.build b/ci/requirements-2.7_COMPAT.build index 0e1ccf9eac9bf..d9c932daa110b 100644 --- a/ci/requirements-2.7_COMPAT.build +++ b/ci/requirements-2.7_COMPAT.build @@ -1,5 +1,5 @@ python=2.7* -numpy=1.7.1 +numpy=1.9.2 cython=0.23 dateutil=1.5 pytz=2013b diff --git a/ci/requirements-2.7_COMPAT.run b/ci/requirements-2.7_COMPAT.run index b94f4ab7b27d1..39bf720140733 100644 --- a/ci/requirements-2.7_COMPAT.run +++ b/ci/requirements-2.7_COMPAT.run @@ -1,11 +1,12 @@ -numpy=1.7.1 +numpy=1.9.2 dateutil=1.5 pytz=2013b -scipy=0.11.0 +scipy=0.14.0 xlwt=0.7.5 xlrd=0.9.2 -numexpr=2.2.2 -pytables=3.0.0 +bottleneck=1.0.0 +numexpr=2.4.4 # we test that we correctly don't use an unsupported numexpr +pytables=3.2.2 psycopg2 pymysql=0.6.0 sqlalchemy=0.7.8 diff --git a/ci/requirements-2.7_LOCALE.build b/ci/requirements-2.7_LOCALE.build index 4a37ce8fbe161..96cb184ec2665 100644 --- a/ci/requirements-2.7_LOCALE.build +++ b/ci/requirements-2.7_LOCALE.build @@ -1,5 +1,5 @@ python=2.7* python-dateutil pytz=2013b -numpy=1.8.2 +numpy=1.9.2 cython=0.23 diff --git a/ci/requirements-2.7_LOCALE.run b/ci/requirements-2.7_LOCALE.run index 8e360cf74b081..00006106f7009 100644 --- a/ci/requirements-2.7_LOCALE.run +++ b/ci/requirements-2.7_LOCALE.run @@ -1,11 +1,12 @@ python-dateutil pytz=2013b -numpy=1.8.2 +numpy=1.9.2 xlwt=0.7.5 openpyxl=1.6.2 xlsxwriter=0.5.2 xlrd=0.9.2 -matplotlib=1.3.1 +bottleneck=1.0.0 +matplotlib=1.4.3 sqlalchemy=0.8.1 lxml=3.2.1 scipy diff --git a/ci/requirements-2.7_SLOW.build b/ci/requirements-2.7_SLOW.build index 0f4a2c6792e6b..a665ab9edd585 100644 --- a/ci/requirements-2.7_SLOW.build +++ b/ci/requirements-2.7_SLOW.build @@ -1,5 +1,5 @@ python=2.7* python-dateutil pytz -numpy=1.8.2 +numpy=1.10* cython diff --git a/ci/requirements-2.7_SLOW.run b/ci/requirements-2.7_SLOW.run index 0a549554f5219..f7708283ad04a 100644 --- a/ci/requirements-2.7_SLOW.run +++ b/ci/requirements-2.7_SLOW.run @@ -1,7 +1,7 @@ python-dateutil pytz -numpy=1.8.2 -matplotlib=1.3.1 +numpy=1.10* +matplotlib=1.4.3 scipy patsy xlwt diff --git a/ci/script_multi.sh b/ci/script_multi.sh index d79fc43fbe175..ee9fbcaad5ef5 100755 --- a/ci/script_multi.sh +++ b/ci/script_multi.sh @@ -36,9 +36,15 @@ elif [ "$COVERAGE" ]; then echo pytest -s -n 2 -m "not single" --cov=pandas --cov-report xml:/tmp/cov-multiple.xml --junitxml=/tmp/multiple.xml $TEST_ARGS pandas pytest -s -n 2 -m "not single" --cov=pandas --cov-report xml:/tmp/cov-multiple.xml --junitxml=/tmp/multiple.xml $TEST_ARGS pandas +elif [ "$SLOW" ]; then + TEST_ARGS="--only-slow --skip-network" + echo pytest -r xX -m "not single and slow" -v --junitxml=/tmp/multiple.xml $TEST_ARGS pandas + pytest -r xX -m "not single and slow" -v --junitxml=/tmp/multiple.xml $TEST_ARGS pandas + else echo pytest -n 2 -r xX -m "not single" --junitxml=/tmp/multiple.xml $TEST_ARGS pandas pytest -n 2 -r xX -m "not single" --junitxml=/tmp/multiple.xml $TEST_ARGS pandas # TODO: doctest + fi RET="$?" diff --git a/ci/script_single.sh b/ci/script_single.sh index 245b4e6152c4d..375e9879e950f 100755 --- a/ci/script_single.sh +++ b/ci/script_single.sh @@ -12,16 +12,24 @@ if [ -n "$LOCALE_OVERRIDE" ]; then python -c "$pycmd" fi +if [ "$SLOW" ]; then + TEST_ARGS="--only-slow --skip-network" +fi + if [ "$BUILD_TEST" ]; then echo "We are not running pytest as this is a build test." + elif [ "$DOC" ]; then echo "We are not running pytest as this is a doc-build" + elif [ "$COVERAGE" ]; then echo pytest -s -m "single" --cov=pandas --cov-report xml:/tmp/cov-single.xml --junitxml=/tmp/single.xml $TEST_ARGS pandas pytest -s -m "single" --cov=pandas --cov-report xml:/tmp/cov-single.xml --junitxml=/tmp/single.xml $TEST_ARGS pandas + else echo pytest -m "single" -r xX --junitxml=/tmp/single.xml $TEST_ARGS pandas pytest -m "single" -r xX --junitxml=/tmp/single.xml $TEST_ARGS pandas # TODO: doctest + fi RET="$?" diff --git a/doc/source/install.rst b/doc/source/install.rst index 99d299b75b59b..f92c43839ee31 100644 --- a/doc/source/install.rst +++ b/doc/source/install.rst @@ -203,7 +203,7 @@ Dependencies ------------ * `setuptools `__ -* `NumPy `__: 1.7.1 or higher +* `NumPy `__: 1.9.0 or higher * `python-dateutil `__: 1.5 or higher * `pytz `__: Needed for time zone support @@ -233,7 +233,7 @@ Optional Dependencies * `Cython `__: Only necessary to build development version. Version 0.23 or higher. -* `SciPy `__: miscellaneous statistical functions +* `SciPy `__: miscellaneous statistical functions, Version 0.14.0 or higher * `xarray `__: pandas like handling for > 2 dims, needed for converting Panels to xarray objects. Version 0.7.0 or higher is recommended. * `PyTables `__: necessary for HDF5-based storage. Version 3.0.0 or higher required, Version 3.2.1 or higher highly recommended. * `Feather Format `__: necessary for feather-based storage, version 0.3.1 or higher. @@ -244,7 +244,7 @@ Optional Dependencies * `pymysql `__: for MySQL. * `SQLite `__: for SQLite, this is included in Python's standard library by default. -* `matplotlib `__: for plotting +* `matplotlib `__: for plotting, Version 1.4.3 or higher. * For Excel I/O: * `xlrd/xlwt `__: Excel reading (xlrd) and writing (xlwt) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index dd06114f6abd3..148fd0a832402 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -137,6 +137,27 @@ Other Enhancements Backwards incompatible API changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. _whatsnew_0210.api_breaking.deps: + +Dependencies have increased minimum versions +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +We have updated our minimum supported versions of dependencies (:issue:`15206`, :issue:`15543`, :issue:`15214`) +). If installed, we now require: + + +--------------+-----------------+----------+ + | Package | Minimum Version | Required | + +======================+=========+==========+ + | Numpy | 1.9.0 | X | + +--------------+-----------------+----------+ + | Matplotlib | 1.4.3 | | + +--------------+-----------------+----------+ + | Scipy | 0.14.0 | | + +--------------+-----------------+----------+ + | Bottleneck | 1.0.0 | | + +--------------+-----------------+----------+ + .. _whatsnew_0210.api_breaking.pandas_eval: Improved error handling during item assignment in pd.eval @@ -258,7 +279,6 @@ Other API Changes ^^^^^^^^^^^^^^^^^ - Support has been dropped for Python 3.4 (:issue:`15251`) -- Support has been dropped for bottleneck < 1.0.0 (:issue:`15214`) - The Categorical constructor no longer accepts a scalar for the ``categories`` keyword. (:issue:`16022`) - Accessing a non-existent attribute on a closed :class:`~pandas.HDFStore` will now raise an ``AttributeError`` rather than a ``ClosedFileError`` (:issue:`16301`) diff --git a/pandas/_libs/sparse.pyx b/pandas/_libs/sparse.pyx index 0c2e056ead7fa..1cc7f5ace95ea 100644 --- a/pandas/_libs/sparse.pyx +++ b/pandas/_libs/sparse.pyx @@ -12,8 +12,6 @@ from distutils.version import LooseVersion # numpy versioning _np_version = np.version.short_version -_np_version_under1p8 = LooseVersion(_np_version) < '1.8' -_np_version_under1p9 = LooseVersion(_np_version) < '1.9' _np_version_under1p10 = LooseVersion(_np_version) < '1.10' _np_version_under1p11 = LooseVersion(_np_version) < '1.11' diff --git a/pandas/compat/numpy/__init__.py b/pandas/compat/numpy/__init__.py index 2c5a18973afa8..5112957b49875 100644 --- a/pandas/compat/numpy/__init__.py +++ b/pandas/compat/numpy/__init__.py @@ -9,19 +9,18 @@ # numpy versioning _np_version = np.__version__ _nlv = LooseVersion(_np_version) -_np_version_under1p8 = _nlv < '1.8' -_np_version_under1p9 = _nlv < '1.9' _np_version_under1p10 = _nlv < '1.10' _np_version_under1p11 = _nlv < '1.11' _np_version_under1p12 = _nlv < '1.12' _np_version_under1p13 = _nlv < '1.13' _np_version_under1p14 = _nlv < '1.14' +_np_version_under1p15 = _nlv < '1.15' -if _nlv < '1.7.0': +if _nlv < '1.9': raise ImportError('this version of pandas is incompatible with ' - 'numpy < 1.7.0\n' + 'numpy < 1.9.0\n' 'your numpy version is {0}.\n' - 'Please upgrade numpy to >= 1.7.0 to use ' + 'Please upgrade numpy to >= 1.9.0 to use ' 'this pandas version'.format(_np_version)) @@ -70,11 +69,10 @@ def np_array_datetime64_compat(arr, *args, **kwargs): __all__ = ['np', - '_np_version_under1p8', - '_np_version_under1p9', '_np_version_under1p10', '_np_version_under1p11', '_np_version_under1p12', '_np_version_under1p13', - '_np_version_under1p14' + '_np_version_under1p14', + '_np_version_under1p15' ] diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index f2359f3ff1a9d..ffd03096e2a27 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -6,7 +6,6 @@ from warnings import warn, catch_warnings import numpy as np -from pandas import compat, _np_version_under1p8 from pandas.core.dtypes.cast import maybe_promote from pandas.core.dtypes.generic import ( ABCSeries, ABCIndex, @@ -407,14 +406,12 @@ def isin(comps, values): comps, dtype, _ = _ensure_data(comps) values, _, _ = _ensure_data(values, dtype=dtype) - # GH11232 - # work-around for numpy < 1.8 and comparisions on py3 # faster for larger cases to use np.in1d f = lambda x, y: htable.ismember_object(x, values) + # GH16012 # Ensure np.in1d doesn't get object types or it *may* throw an exception - if ((_np_version_under1p8 and compat.PY3) or len(comps) > 1000000 and - not is_object_dtype(comps)): + if len(comps) > 1000000 and not is_object_dtype(comps): f = lambda x, y: np.in1d(x, y) elif is_integer_dtype(comps): try: diff --git a/pandas/core/generic.py b/pandas/core/generic.py index e84e4eac3f34d..f8366c804e3e7 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1827,11 +1827,8 @@ def _box_item_values(self, key, values): def _maybe_cache_changed(self, item, value): """The object has called back to us saying maybe it has changed. - - numpy < 1.8 has an issue with object arrays and aliasing - GH6026 """ - self._data.set(item, value, check=pd._np_version_under1p8) + self._data.set(item, value, check=False) @property def _is_cached(self): diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index a388892e925b6..aa7c4517c0a01 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -13,7 +13,7 @@ ) from pandas import compat -from pandas.compat.numpy import function as nv, _np_version_under1p8 +from pandas.compat.numpy import function as nv from pandas.compat import set_function_name from pandas.core.dtypes.common import ( @@ -3257,11 +3257,7 @@ def value_counts(self, normalize=False, sort=True, ascending=False, d = np.diff(np.r_[idx, len(ids)]) if dropna: m = ids[lab == -1] - if _np_version_under1p8: - mi, ml = algorithms.factorize(m) - d[ml] = d[ml] - np.bincount(mi) - else: - np.add.at(d, m, -1) + np.add.at(d, m, -1) acc = rep(d)[mask] else: acc = rep(d) diff --git a/pandas/core/internals.py b/pandas/core/internals.py index b616270e47aa6..83b382ec0ed72 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -69,8 +69,7 @@ import pandas.core.computation.expressions as expressions from pandas.util._decorators import cache_readonly from pandas.util._validators import validate_bool_kwarg - -from pandas import compat, _np_version_under1p9 +from pandas import compat from pandas.compat import range, map, zip, u @@ -857,9 +856,6 @@ def _is_empty_indexer(indexer): # set else: - if _np_version_under1p9: - # Work around GH 6168 to support old numpy - indexer = getattr(indexer, 'values', indexer) values[indexer] = value # coerce and try to infer the dtypes of the result @@ -1482,15 +1478,7 @@ def quantile(self, qs, interpolation='linear', axis=0, mgr=None): tuple of (axis, block) """ - if _np_version_under1p9: - if interpolation != 'linear': - raise ValueError("Interpolation methods other than linear " - "are not supported in numpy < 1.9.") - - kw = {} - if not _np_version_under1p9: - kw.update({'interpolation': interpolation}) - + kw = {'interpolation': interpolation} values = self.get_values() values, _, _, _ = self._try_coerce_args(values, values) diff --git a/pandas/tests/frame/test_quantile.py b/pandas/tests/frame/test_quantile.py index 2482e493dbefd..2f264874378bc 100644 --- a/pandas/tests/frame/test_quantile.py +++ b/pandas/tests/frame/test_quantile.py @@ -12,7 +12,6 @@ from pandas.util.testing import assert_series_equal, assert_frame_equal import pandas.util.testing as tm -from pandas import _np_version_under1p9 from pandas.tests.frame.common import TestData @@ -103,9 +102,6 @@ def test_quantile_axis_parameter(self): def test_quantile_interpolation(self): # see gh-10174 - if _np_version_under1p9: - pytest.skip("Numpy version under 1.9") - from numpy import percentile # interpolation = linear (default case) @@ -166,44 +162,6 @@ def test_quantile_interpolation(self): index=[.25, .5], columns=['a', 'b', 'c']) assert_frame_equal(result, expected) - def test_quantile_interpolation_np_lt_1p9(self): - # see gh-10174 - if not _np_version_under1p9: - pytest.skip("Numpy version is greater than 1.9") - - from numpy import percentile - - # interpolation = linear (default case) - q = self.tsframe.quantile(0.1, axis=0, interpolation='linear') - assert q['A'] == percentile(self.tsframe['A'], 10) - q = self.intframe.quantile(0.1) - assert q['A'] == percentile(self.intframe['A'], 10) - - # test with and without interpolation keyword - q1 = self.intframe.quantile(0.1) - assert q1['A'] == np.percentile(self.intframe['A'], 10) - assert_series_equal(q, q1) - - # interpolation method other than default linear - msg = "Interpolation methods other than linear" - df = DataFrame({"A": [1, 2, 3], "B": [2, 3, 4]}, index=[1, 2, 3]) - with tm.assert_raises_regex(ValueError, msg): - df.quantile(.5, axis=1, interpolation='nearest') - - with tm.assert_raises_regex(ValueError, msg): - df.quantile([.5, .75], axis=1, interpolation='lower') - - # test degenerate case - df = DataFrame({'x': [], 'y': []}) - with tm.assert_raises_regex(ValueError, msg): - q = df.quantile(0.1, axis=0, interpolation='higher') - - # multi - df = DataFrame([[1, 1, 1], [2, 2, 2], [3, 3, 3]], - columns=['a', 'b', 'c']) - with tm.assert_raises_regex(ValueError, msg): - df.quantile([.25, .5], interpolation='midpoint') - def test_quantile_multi(self): df = DataFrame([[1, 1, 1], [2, 2, 2], [3, 3, 3]], columns=['a', 'b', 'c']) diff --git a/pandas/tests/frame/test_rank.py b/pandas/tests/frame/test_rank.py index acf887d047c9e..58f4d9b770173 100644 --- a/pandas/tests/frame/test_rank.py +++ b/pandas/tests/frame/test_rank.py @@ -1,4 +1,5 @@ # -*- coding: utf-8 -*- +import pytest from datetime import timedelta, datetime from distutils.version import LooseVersion from numpy import nan @@ -26,8 +27,7 @@ class TestRank(TestData): } def test_rank(self): - tm._skip_if_no_scipy() - from scipy.stats import rankdata + rankdata = pytest.importorskip('scipy.stats.rankdata') self.frame['A'][::2] = np.nan self.frame['B'][::3] = np.nan @@ -120,8 +120,7 @@ def test_rank2(self): tm.assert_frame_equal(df.rank(), exp) def test_rank_na_option(self): - tm._skip_if_no_scipy() - from scipy.stats import rankdata + rankdata = pytest.importorskip('scipy.stats.rankdata') self.frame['A'][::2] = np.nan self.frame['B'][::3] = np.nan @@ -193,10 +192,9 @@ def test_rank_axis(self): tm.assert_frame_equal(df.rank(axis=1), df.rank(axis='columns')) def test_rank_methods_frame(self): - tm.skip_if_no_package('scipy', min_version='0.13', - app='scipy.stats.rankdata') + pytest.importorskip('scipy.stats.special') + rankdata = pytest.importorskip('scipy.stats.rankdata') import scipy - from scipy.stats import rankdata xs = np.random.randint(0, 21, (100, 26)) xs = (xs - 10.0) / 10.0 diff --git a/pandas/tests/indexes/datetimes/test_datetime.py b/pandas/tests/indexes/datetimes/test_datetime.py index f99dcee9e5c8a..47f53f53cfd02 100644 --- a/pandas/tests/indexes/datetimes/test_datetime.py +++ b/pandas/tests/indexes/datetimes/test_datetime.py @@ -9,7 +9,7 @@ from pandas.compat import lrange from pandas.compat.numpy import np_datetime64_compat from pandas import (DatetimeIndex, Index, date_range, Series, DataFrame, - Timestamp, datetime, offsets, _np_version_under1p8) + Timestamp, datetime, offsets) from pandas.util.testing import assert_series_equal, assert_almost_equal @@ -276,11 +276,7 @@ def test_comparisons_nat(self): np_datetime64_compat('2014-06-01 00:00Z'), np_datetime64_compat('2014-07-01 00:00Z')]) - if _np_version_under1p8: - # cannot test array because np.datetime('nat') returns today's date - cases = [(fidx1, fidx2), (didx1, didx2)] - else: - cases = [(fidx1, fidx2), (didx1, didx2), (didx1, darr)] + cases = [(fidx1, fidx2), (didx1, didx2), (didx1, darr)] # Check pd.NaT is handles as the same as np.nan with tm.assert_produces_warning(None): diff --git a/pandas/tests/indexes/period/test_indexing.py b/pandas/tests/indexes/period/test_indexing.py index d4dac1cf88fff..efc13a56cd77e 100644 --- a/pandas/tests/indexes/period/test_indexing.py +++ b/pandas/tests/indexes/period/test_indexing.py @@ -8,7 +8,7 @@ from pandas.compat import lrange from pandas._libs import tslib from pandas import (PeriodIndex, Series, DatetimeIndex, - period_range, Period, _np_version_under1p9) + period_range, Period) class TestGetItem(object): @@ -149,16 +149,12 @@ def test_getitem_seconds(self): values = ['2014', '2013/02', '2013/01/02', '2013/02/01 9H', '2013/02/01 09:00'] for v in values: - if _np_version_under1p9: - with pytest.raises(ValueError): - idx[v] - else: - # GH7116 - # these show deprecations as we are trying - # to slice with non-integer indexers - # with pytest.raises(IndexError): - # idx[v] - continue + # GH7116 + # these show deprecations as we are trying + # to slice with non-integer indexers + # with pytest.raises(IndexError): + # idx[v] + continue s = Series(np.random.rand(len(idx)), index=idx) tm.assert_series_equal(s['2013/01/01 10:00'], s[3600:3660]) @@ -178,16 +174,12 @@ def test_getitem_day(self): '2013/02/01 09:00'] for v in values: - if _np_version_under1p9: - with pytest.raises(ValueError): - idx[v] - else: - # GH7116 - # these show deprecations as we are trying - # to slice with non-integer indexers - # with pytest.raises(IndexError): - # idx[v] - continue + # GH7116 + # these show deprecations as we are trying + # to slice with non-integer indexers + # with pytest.raises(IndexError): + # idx[v] + continue s = Series(np.random.rand(len(idx)), index=idx) tm.assert_series_equal(s['2013/01'], s[0:31]) diff --git a/pandas/tests/indexes/timedeltas/test_timedelta.py b/pandas/tests/indexes/timedeltas/test_timedelta.py index 59e4b1432b8bc..0b3bd0b03bccf 100644 --- a/pandas/tests/indexes/timedeltas/test_timedelta.py +++ b/pandas/tests/indexes/timedeltas/test_timedelta.py @@ -7,7 +7,7 @@ import pandas.util.testing as tm from pandas import (timedelta_range, date_range, Series, Timedelta, DatetimeIndex, TimedeltaIndex, Index, DataFrame, - Int64Index, _np_version_under1p8) + Int64Index) from pandas.util.testing import (assert_almost_equal, assert_series_equal, assert_index_equal) @@ -379,11 +379,7 @@ def test_comparisons_nat(self): np.timedelta64(1, 'D') + np.timedelta64(2, 's'), np.timedelta64(5, 'D') + np.timedelta64(3, 's')]) - if _np_version_under1p8: - # cannot test array because np.datetime('nat') returns today's date - cases = [(tdidx1, tdidx2)] - else: - cases = [(tdidx1, tdidx2), (tdidx1, tdarr)] + cases = [(tdidx1, tdidx2), (tdidx1, tdarr)] # Check pd.NaT is handles as the same as np.nan for idx1, idx2 in cases: diff --git a/pandas/tests/plotting/common.py b/pandas/tests/plotting/common.py index 3ab443b223f20..dfab539e9474c 100644 --- a/pandas/tests/plotting/common.py +++ b/pandas/tests/plotting/common.py @@ -39,7 +39,8 @@ def _ok_for_gaussian_kde(kind): from scipy.stats import gaussian_kde # noqa except ImportError: return False - return True + + return plotting._compat._mpl_ge_1_5_0() class TestPlotBase(object): diff --git a/pandas/tests/plotting/test_datetimelike.py b/pandas/tests/plotting/test_datetimelike.py index e9c7d806fd65d..cff0c1c0b424e 100644 --- a/pandas/tests/plotting/test_datetimelike.py +++ b/pandas/tests/plotting/test_datetimelike.py @@ -610,6 +610,8 @@ def test_secondary_y_ts(self): @pytest.mark.slow def test_secondary_kde(self): + if not self.mpl_ge_1_5_0: + pytest.skip("mpl is not supported") tm._skip_if_no_scipy() _skip_if_no_scipy_gaussian_kde() diff --git a/pandas/tests/plotting/test_frame.py b/pandas/tests/plotting/test_frame.py index 6d813ac76cc4e..67098529a0111 100644 --- a/pandas/tests/plotting/test_frame.py +++ b/pandas/tests/plotting/test_frame.py @@ -473,7 +473,6 @@ def test_subplots_multiple_axes(self): # TestDataFrameGroupByPlots.test_grouped_box_multiple_axes fig, axes = self.plt.subplots(2, 2) with warnings.catch_warnings(): - warnings.simplefilter('ignore') df = DataFrame(np.random.rand(10, 4), index=list(string.ascii_letters[:10])) @@ -1290,6 +1289,9 @@ def test_boxplot_subplots_return_type(self): def test_kde_df(self): tm._skip_if_no_scipy() _skip_if_no_scipy_gaussian_kde() + if not self.mpl_ge_1_5_0: + pytest.skip("mpl is not supported") + df = DataFrame(randn(100, 4)) ax = _check_plot_works(df.plot, kind='kde') expected = [pprint_thing(c) for c in df.columns] @@ -1311,6 +1313,9 @@ def test_kde_df(self): def test_kde_missing_vals(self): tm._skip_if_no_scipy() _skip_if_no_scipy_gaussian_kde() + if not self.mpl_ge_1_5_0: + pytest.skip("mpl is not supported") + df = DataFrame(np.random.uniform(size=(100, 4))) df.loc[0, 0] = np.nan _check_plot_works(df.plot, kind='kde') @@ -1835,6 +1840,8 @@ def test_hist_colors(self): def test_kde_colors(self): tm._skip_if_no_scipy() _skip_if_no_scipy_gaussian_kde() + if not self.mpl_ge_1_5_0: + pytest.skip("mpl is not supported") from matplotlib import cm @@ -1858,6 +1865,8 @@ def test_kde_colors(self): def test_kde_colors_and_styles_subplots(self): tm._skip_if_no_scipy() _skip_if_no_scipy_gaussian_kde() + if not self.mpl_ge_1_5_0: + pytest.skip("mpl is not supported") from matplotlib import cm default_colors = self._maybe_unpack_cycler(self.plt.rcParams) @@ -2160,71 +2169,74 @@ def test_pie_df_nan(self): @pytest.mark.slow def test_errorbar_plot(self): - d = {'x': np.arange(12), 'y': np.arange(12, 0, -1)} - df = DataFrame(d) - d_err = {'x': np.ones(12) * 0.2, 'y': np.ones(12) * 0.4} - df_err = DataFrame(d_err) - - # check line plots - ax = _check_plot_works(df.plot, yerr=df_err, logy=True) - self._check_has_errorbars(ax, xerr=0, yerr=2) - ax = _check_plot_works(df.plot, yerr=df_err, logx=True, logy=True) - self._check_has_errorbars(ax, xerr=0, yerr=2) - ax = _check_plot_works(df.plot, yerr=df_err, loglog=True) - self._check_has_errorbars(ax, xerr=0, yerr=2) + with warnings.catch_warnings(): + d = {'x': np.arange(12), 'y': np.arange(12, 0, -1)} + df = DataFrame(d) + d_err = {'x': np.ones(12) * 0.2, 'y': np.ones(12) * 0.4} + df_err = DataFrame(d_err) - kinds = ['line', 'bar', 'barh'] - for kind in kinds: - ax = _check_plot_works(df.plot, yerr=df_err['x'], kind=kind) + # check line plots + ax = _check_plot_works(df.plot, yerr=df_err, logy=True) self._check_has_errorbars(ax, xerr=0, yerr=2) - ax = _check_plot_works(df.plot, yerr=d_err, kind=kind) + ax = _check_plot_works(df.plot, yerr=df_err, logx=True, logy=True) self._check_has_errorbars(ax, xerr=0, yerr=2) - ax = _check_plot_works(df.plot, yerr=df_err, xerr=df_err, - kind=kind) - self._check_has_errorbars(ax, xerr=2, yerr=2) - ax = _check_plot_works(df.plot, yerr=df_err['x'], xerr=df_err['x'], - kind=kind) - self._check_has_errorbars(ax, xerr=2, yerr=2) - ax = _check_plot_works(df.plot, xerr=0.2, yerr=0.2, kind=kind) - self._check_has_errorbars(ax, xerr=2, yerr=2) - # _check_plot_works adds an ax so catch warning. see GH #13188 - with tm.assert_produces_warning(UserWarning): + ax = _check_plot_works(df.plot, yerr=df_err, loglog=True) + self._check_has_errorbars(ax, xerr=0, yerr=2) + + kinds = ['line', 'bar', 'barh'] + for kind in kinds: + ax = _check_plot_works(df.plot, yerr=df_err['x'], kind=kind) + self._check_has_errorbars(ax, xerr=0, yerr=2) + ax = _check_plot_works(df.plot, yerr=d_err, kind=kind) + self._check_has_errorbars(ax, xerr=0, yerr=2) + ax = _check_plot_works(df.plot, yerr=df_err, xerr=df_err, + kind=kind) + self._check_has_errorbars(ax, xerr=2, yerr=2) + ax = _check_plot_works(df.plot, yerr=df_err['x'], + xerr=df_err['x'], + kind=kind) + self._check_has_errorbars(ax, xerr=2, yerr=2) + ax = _check_plot_works(df.plot, xerr=0.2, yerr=0.2, kind=kind) + self._check_has_errorbars(ax, xerr=2, yerr=2) + + # _check_plot_works adds an ax so catch warning. see GH #13188 axes = _check_plot_works(df.plot, yerr=df_err, xerr=df_err, subplots=True, kind=kind) - self._check_has_errorbars(axes, xerr=1, yerr=1) - - ax = _check_plot_works((df + 1).plot, yerr=df_err, - xerr=df_err, kind='bar', log=True) - self._check_has_errorbars(ax, xerr=2, yerr=2) + self._check_has_errorbars(axes, xerr=1, yerr=1) - # yerr is raw error values - ax = _check_plot_works(df['y'].plot, yerr=np.ones(12) * 0.4) - self._check_has_errorbars(ax, xerr=0, yerr=1) - ax = _check_plot_works(df.plot, yerr=np.ones((2, 12)) * 0.4) - self._check_has_errorbars(ax, xerr=0, yerr=2) + ax = _check_plot_works((df + 1).plot, yerr=df_err, + xerr=df_err, kind='bar', log=True) + self._check_has_errorbars(ax, xerr=2, yerr=2) - # yerr is iterator - import itertools - ax = _check_plot_works(df.plot, yerr=itertools.repeat(0.1, len(df))) - self._check_has_errorbars(ax, xerr=0, yerr=2) + # yerr is raw error values + ax = _check_plot_works(df['y'].plot, yerr=np.ones(12) * 0.4) + self._check_has_errorbars(ax, xerr=0, yerr=1) + ax = _check_plot_works(df.plot, yerr=np.ones((2, 12)) * 0.4) + self._check_has_errorbars(ax, xerr=0, yerr=2) - # yerr is column name - for yerr in ['yerr', u('誤差')]: - s_df = df.copy() - s_df[yerr] = np.ones(12) * 0.2 - ax = _check_plot_works(s_df.plot, yerr=yerr) + # yerr is iterator + import itertools + ax = _check_plot_works(df.plot, + yerr=itertools.repeat(0.1, len(df))) self._check_has_errorbars(ax, xerr=0, yerr=2) - ax = _check_plot_works(s_df.plot, y='y', x='x', yerr=yerr) - self._check_has_errorbars(ax, xerr=0, yerr=1) - with pytest.raises(ValueError): - df.plot(yerr=np.random.randn(11)) + # yerr is column name + for yerr in ['yerr', u('誤差')]: + s_df = df.copy() + s_df[yerr] = np.ones(12) * 0.2 + ax = _check_plot_works(s_df.plot, yerr=yerr) + self._check_has_errorbars(ax, xerr=0, yerr=2) + ax = _check_plot_works(s_df.plot, y='y', x='x', yerr=yerr) + self._check_has_errorbars(ax, xerr=0, yerr=1) - df_err = DataFrame({'x': ['zzz'] * 12, 'y': ['zzz'] * 12}) - with pytest.raises((ValueError, TypeError)): - df.plot(yerr=df_err) + with pytest.raises(ValueError): + df.plot(yerr=np.random.randn(11)) + + df_err = DataFrame({'x': ['zzz'] * 12, 'y': ['zzz'] * 12}) + with pytest.raises((ValueError, TypeError)): + df.plot(yerr=df_err) @pytest.mark.slow def test_errorbar_with_integer_column_names(self): @@ -2262,33 +2274,34 @@ def test_errorbar_with_partial_columns(self): @pytest.mark.slow def test_errorbar_timeseries(self): - d = {'x': np.arange(12), 'y': np.arange(12, 0, -1)} - d_err = {'x': np.ones(12) * 0.2, 'y': np.ones(12) * 0.4} + with warnings.catch_warnings(): + d = {'x': np.arange(12), 'y': np.arange(12, 0, -1)} + d_err = {'x': np.ones(12) * 0.2, 'y': np.ones(12) * 0.4} - # check time-series plots - ix = date_range('1/1/2000', '1/1/2001', freq='M') - tdf = DataFrame(d, index=ix) - tdf_err = DataFrame(d_err, index=ix) + # check time-series plots + ix = date_range('1/1/2000', '1/1/2001', freq='M') + tdf = DataFrame(d, index=ix) + tdf_err = DataFrame(d_err, index=ix) - kinds = ['line', 'bar', 'barh'] - for kind in kinds: - ax = _check_plot_works(tdf.plot, yerr=tdf_err, kind=kind) - self._check_has_errorbars(ax, xerr=0, yerr=2) - ax = _check_plot_works(tdf.plot, yerr=d_err, kind=kind) - self._check_has_errorbars(ax, xerr=0, yerr=2) - ax = _check_plot_works(tdf.plot, y='y', yerr=tdf_err['x'], - kind=kind) - self._check_has_errorbars(ax, xerr=0, yerr=1) - ax = _check_plot_works(tdf.plot, y='y', yerr='x', kind=kind) - self._check_has_errorbars(ax, xerr=0, yerr=1) - ax = _check_plot_works(tdf.plot, yerr=tdf_err, kind=kind) - self._check_has_errorbars(ax, xerr=0, yerr=2) - # _check_plot_works adds an ax so catch warning. see GH #13188 - with tm.assert_produces_warning(UserWarning): + kinds = ['line', 'bar', 'barh'] + for kind in kinds: + ax = _check_plot_works(tdf.plot, yerr=tdf_err, kind=kind) + self._check_has_errorbars(ax, xerr=0, yerr=2) + ax = _check_plot_works(tdf.plot, yerr=d_err, kind=kind) + self._check_has_errorbars(ax, xerr=0, yerr=2) + ax = _check_plot_works(tdf.plot, y='y', yerr=tdf_err['x'], + kind=kind) + self._check_has_errorbars(ax, xerr=0, yerr=1) + ax = _check_plot_works(tdf.plot, y='y', yerr='x', kind=kind) + self._check_has_errorbars(ax, xerr=0, yerr=1) + ax = _check_plot_works(tdf.plot, yerr=tdf_err, kind=kind) + self._check_has_errorbars(ax, xerr=0, yerr=2) + + # _check_plot_works adds an ax so catch warning. see GH #13188 axes = _check_plot_works(tdf.plot, kind=kind, yerr=tdf_err, subplots=True) - self._check_has_errorbars(axes, xerr=0, yerr=1) + self._check_has_errorbars(axes, xerr=0, yerr=1) def test_errorbar_asymmetrical(self): diff --git a/pandas/tests/plotting/test_misc.py b/pandas/tests/plotting/test_misc.py index 684a943fb5a69..c4795ea1e1eca 100644 --- a/pandas/tests/plotting/test_misc.py +++ b/pandas/tests/plotting/test_misc.py @@ -4,7 +4,7 @@ import pytest -from pandas import Series, DataFrame +from pandas import DataFrame from pandas.compat import lmap import pandas.util.testing as tm @@ -13,8 +13,7 @@ from numpy.random import randn import pandas.plotting as plotting -from pandas.tests.plotting.common import (TestPlotBase, _check_plot_works, - _ok_for_gaussian_kde) +from pandas.tests.plotting.common import TestPlotBase, _check_plot_works tm._skip_if_no_mpl() @@ -52,46 +51,6 @@ def test_bootstrap_plot(self): class TestDataFramePlots(TestPlotBase): - @pytest.mark.slow - def test_scatter_plot_legacy(self): - tm._skip_if_no_scipy() - - df = DataFrame(randn(100, 2)) - - def scat(**kwds): - return plotting.scatter_matrix(df, **kwds) - - with tm.assert_produces_warning(UserWarning): - _check_plot_works(scat) - with tm.assert_produces_warning(UserWarning): - _check_plot_works(scat, marker='+') - with tm.assert_produces_warning(UserWarning): - _check_plot_works(scat, vmin=0) - if _ok_for_gaussian_kde('kde'): - with tm.assert_produces_warning(UserWarning): - _check_plot_works(scat, diagonal='kde') - if _ok_for_gaussian_kde('density'): - with tm.assert_produces_warning(UserWarning): - _check_plot_works(scat, diagonal='density') - with tm.assert_produces_warning(UserWarning): - _check_plot_works(scat, diagonal='hist') - with tm.assert_produces_warning(UserWarning): - _check_plot_works(scat, range_padding=.1) - with tm.assert_produces_warning(UserWarning): - _check_plot_works(scat, color='rgb') - with tm.assert_produces_warning(UserWarning): - _check_plot_works(scat, c='rgb') - with tm.assert_produces_warning(UserWarning): - _check_plot_works(scat, facecolor='rgb') - - def scat2(x, y, by=None, ax=None, figsize=None): - return plotting._core.scatter_plot(df, x, y, by, ax, figsize=None) - - _check_plot_works(scat2, x=0, y=1) - grouper = Series(np.repeat([1, 2, 3, 4, 5], 20), df.index) - with tm.assert_produces_warning(UserWarning): - _check_plot_works(scat2, x=0, y=1, by=grouper) - def test_scatter_matrix_axis(self): tm._skip_if_no_scipy() scatter_matrix = plotting.scatter_matrix diff --git a/pandas/tests/plotting/test_series.py b/pandas/tests/plotting/test_series.py index 9c9011ba1ca7b..8164ad74a190a 100644 --- a/pandas/tests/plotting/test_series.py +++ b/pandas/tests/plotting/test_series.py @@ -571,6 +571,9 @@ def test_plot_fails_with_dupe_color_and_style(self): @pytest.mark.slow def test_hist_kde(self): + if not self.mpl_ge_1_5_0: + pytest.skip("mpl is not supported") + _, ax = self.plt.subplots() ax = self.ts.plot.hist(logy=True, ax=ax) self._check_ax_scales(ax, yaxis='log') @@ -596,6 +599,9 @@ def test_hist_kde(self): def test_kde_kwargs(self): tm._skip_if_no_scipy() _skip_if_no_scipy_gaussian_kde() + if not self.mpl_ge_1_5_0: + pytest.skip("mpl is not supported") + from numpy import linspace _check_plot_works(self.ts.plot.kde, bw_method=.5, ind=linspace(-100, 100, 20)) @@ -611,6 +617,9 @@ def test_kde_kwargs(self): def test_kde_missing_vals(self): tm._skip_if_no_scipy() _skip_if_no_scipy_gaussian_kde() + if not self.mpl_ge_1_5_0: + pytest.skip("mpl is not supported") + s = Series(np.random.uniform(size=50)) s[0] = np.nan axes = _check_plot_works(s.plot.kde) @@ -638,6 +647,9 @@ def test_hist_kwargs(self): @pytest.mark.slow def test_hist_kde_color(self): + if not self.mpl_ge_1_5_0: + pytest.skip("mpl is not supported") + _, ax = self.plt.subplots() ax = self.ts.plot.hist(logy=True, bins=10, color='b', ax=ax) self._check_ax_scales(ax, yaxis='log') diff --git a/pandas/tests/series/test_operators.py b/pandas/tests/series/test_operators.py index 4888f8fe996b6..114a055de8195 100644 --- a/pandas/tests/series/test_operators.py +++ b/pandas/tests/series/test_operators.py @@ -14,8 +14,7 @@ import pandas as pd from pandas import (Index, Series, DataFrame, isna, bdate_range, - NaT, date_range, timedelta_range, - _np_version_under1p8) + NaT, date_range, timedelta_range) from pandas.core.indexes.datetimes import Timestamp from pandas.core.indexes.timedeltas import Timedelta import pandas.core.nanops as nanops @@ -687,14 +686,13 @@ def run_ops(ops, get_ser, test_ser): assert_series_equal(result, exp) # odd numpy behavior with scalar timedeltas - if not _np_version_under1p8: - result = td1[0] + dt1 - exp = (dt1.dt.tz_localize(None) + td1[0]).dt.tz_localize(tz) - assert_series_equal(result, exp) + result = td1[0] + dt1 + exp = (dt1.dt.tz_localize(None) + td1[0]).dt.tz_localize(tz) + assert_series_equal(result, exp) - result = td2[0] + dt2 - exp = (dt2.dt.tz_localize(None) + td2[0]).dt.tz_localize(tz) - assert_series_equal(result, exp) + result = td2[0] + dt2 + exp = (dt2.dt.tz_localize(None) + td2[0]).dt.tz_localize(tz) + assert_series_equal(result, exp) result = dt1 - td1[0] exp = (dt1.dt.tz_localize(None) - td1[0]).dt.tz_localize(tz) diff --git a/pandas/tests/series/test_quantile.py b/pandas/tests/series/test_quantile.py index 21379641a78d8..cf5e3fe4f29b0 100644 --- a/pandas/tests/series/test_quantile.py +++ b/pandas/tests/series/test_quantile.py @@ -1,11 +1,10 @@ # coding=utf-8 # pylint: disable-msg=E1101,W0612 -import pytest import numpy as np import pandas as pd -from pandas import (Index, Series, _np_version_under1p9) +from pandas import Index, Series from pandas.core.indexes.datetimes import Timestamp from pandas.core.dtypes.common import is_integer import pandas.util.testing as tm @@ -68,8 +67,6 @@ def test_quantile_multi(self): [], dtype=float)) tm.assert_series_equal(result, expected) - @pytest.mark.skipif(_np_version_under1p9, - reason="Numpy version is under 1.9") def test_quantile_interpolation(self): # see gh-10174 @@ -82,8 +79,6 @@ def test_quantile_interpolation(self): # test with and without interpolation keyword assert q == q1 - @pytest.mark.skipif(_np_version_under1p9, - reason="Numpy version is under 1.9") def test_quantile_interpolation_dtype(self): # GH #10174 @@ -96,26 +91,6 @@ def test_quantile_interpolation_dtype(self): assert q == np.percentile(np.array([1, 3, 4]), 50) assert is_integer(q) - @pytest.mark.skipif(not _np_version_under1p9, - reason="Numpy version is greater 1.9") - def test_quantile_interpolation_np_lt_1p9(self): - # GH #10174 - - # interpolation = linear (default case) - q = self.ts.quantile(0.1, interpolation='linear') - assert q == np.percentile(self.ts.valid(), 10) - q1 = self.ts.quantile(0.1) - assert q1 == np.percentile(self.ts.valid(), 10) - - # interpolation other than linear - msg = "Interpolation methods other than " - with tm.assert_raises_regex(ValueError, msg): - self.ts.quantile(0.9, interpolation='nearest') - - # object dtype - with tm.assert_raises_regex(ValueError, msg): - Series(self.ts, dtype=object).quantile(0.7, interpolation='higher') - def test_quantile_nan(self): # GH 13098 diff --git a/pandas/tests/series/test_rank.py b/pandas/tests/series/test_rank.py index ff489eb7f15b1..128a4cdd845e6 100644 --- a/pandas/tests/series/test_rank.py +++ b/pandas/tests/series/test_rank.py @@ -28,8 +28,8 @@ class TestSeriesRank(TestData): } def test_rank(self): - tm._skip_if_no_scipy() - from scipy.stats import rankdata + pytest.importorskip('scipy.stats.special') + rankdata = pytest.importorskip('scipy.stats.rankdata') self.ts[::2] = np.nan self.ts[:10][::3] = 4. @@ -246,10 +246,9 @@ def _check(s, expected, method='average'): _check(series, results[method], method=method) def test_rank_methods_series(self): - tm.skip_if_no_package('scipy', min_version='0.13', - app='scipy.stats.rankdata') + pytest.importorskip('scipy.stats.special') + rankdata = pytest.importorskip('scipy.stats.rankdata') import scipy - from scipy.stats import rankdata xs = np.random.randn(9) xs = np.concatenate([xs[i:] for i in range(0, 9, 2)]) # add duplicates diff --git a/pandas/tests/sparse/test_array.py b/pandas/tests/sparse/test_array.py index 4ce03f72dbba6..b0a9182a265fe 100644 --- a/pandas/tests/sparse/test_array.py +++ b/pandas/tests/sparse/test_array.py @@ -8,7 +8,6 @@ from numpy import nan import numpy as np -from pandas import _np_version_under1p8 from pandas.core.sparse.api import SparseArray, SparseSeries from pandas._libs.sparse import IntIndex from pandas.util.testing import assert_almost_equal @@ -150,10 +149,8 @@ def test_take(self): assert np.isnan(self.arr.take(0)) assert np.isscalar(self.arr.take(2)) - # np.take in < 1.8 doesn't support scalar indexing - if not _np_version_under1p8: - assert self.arr.take(2) == np.take(self.arr_data, 2) - assert self.arr.take(6) == np.take(self.arr_data, 6) + assert self.arr.take(2) == np.take(self.arr_data, 2) + assert self.arr.take(6) == np.take(self.arr_data, 6) exp = SparseArray(np.take(self.arr_data, [2, 3])) tm.assert_sp_array_equal(self.arr.take([2, 3]), exp) diff --git a/pandas/tests/test_nanops.py b/pandas/tests/test_nanops.py index 2a22fc9d32919..9305504f8d5e3 100644 --- a/pandas/tests/test_nanops.py +++ b/pandas/tests/test_nanops.py @@ -8,7 +8,7 @@ import numpy as np import pandas as pd -from pandas import Series, isna, _np_version_under1p9 +from pandas import Series, isna from pandas.core.dtypes.common import is_integer_dtype import pandas.core.nanops as nanops import pandas.util.testing as tm @@ -340,15 +340,13 @@ def test_nanmean_overflow(self): # In the previous implementation mean can overflow for int dtypes, it # is now consistent with numpy - # numpy < 1.9.0 is not computing this correctly - if not _np_version_under1p9: - for a in [2 ** 55, -2 ** 55, 20150515061816532]: - s = Series(a, index=range(500), dtype=np.int64) - result = s.mean() - np_result = s.values.mean() - assert result == a - assert result == np_result - assert result.dtype == np.float64 + for a in [2 ** 55, -2 ** 55, 20150515061816532]: + s = Series(a, index=range(500), dtype=np.int64) + result = s.mean() + np_result = s.values.mean() + assert result == a + assert result == np_result + assert result.dtype == np.float64 def test_returned_dtype(self): diff --git a/pandas/tests/test_resample.py b/pandas/tests/test_resample.py index d938d5bf9f3ab..d42e37048d87f 100644 --- a/pandas/tests/test_resample.py +++ b/pandas/tests/test_resample.py @@ -1688,7 +1688,7 @@ def test_resample_dtype_preservation(self): def test_resample_dtype_coerceion(self): - pytest.importorskip('scipy') + pytest.importorskip('scipy.interpolate') # GH 16361 df = {"a": [1, 3, 1, 4]} diff --git a/pandas/tests/tools/test_numeric.py b/pandas/tests/tools/test_numeric.py index 664a97640387e..1d13ba93ba759 100644 --- a/pandas/tests/tools/test_numeric.py +++ b/pandas/tests/tools/test_numeric.py @@ -3,7 +3,7 @@ import numpy as np import pandas as pd -from pandas import to_numeric, _np_version_under1p9 +from pandas import to_numeric from pandas.util import testing as tm from numpy import iinfo @@ -355,9 +355,6 @@ def test_downcast(self): def test_downcast_limits(self): # Test the limits of each downcast. Bug: #14401. - # Check to make sure numpy is new enough to run this test. - if _np_version_under1p9: - pytest.skip("Numpy version is under 1.9") i = 'integer' u = 'unsigned' diff --git a/setup.py b/setup.py index a912b25328954..04a5684c20fcd 100755 --- a/setup.py +++ b/setup.py @@ -45,7 +45,7 @@ def is_platform_mac(): _have_setuptools = False setuptools_kwargs = {} -min_numpy_ver = '1.7.0' +min_numpy_ver = '1.9.0' if sys.version_info[0] >= 3: setuptools_kwargs = { From dfaf8c6918ff20ef781d3177f464a29e70ee5d65 Mon Sep 17 00:00:00 2001 From: jschendel Date: Tue, 22 Aug 2017 07:57:53 -0600 Subject: [PATCH 913/933] CLN: replace %s syntax with .format in core.tools, algorithms.py, base.py (#17305) --- pandas/core/algorithms.py | 10 +++++----- pandas/core/base.py | 19 ++++++++++--------- pandas/core/tools/datetimes.py | 32 ++++++++++++++++++-------------- pandas/core/tools/timedeltas.py | 7 ++++--- 4 files changed, 37 insertions(+), 31 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index ffd03096e2a27..cccb094eaae7b 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -393,12 +393,12 @@ def isin(comps, values): if not is_list_like(comps): raise TypeError("only list-like objects are allowed to be passed" - " to isin(), you passed a " - "[{0}]".format(type(comps).__name__)) + " to isin(), you passed a [{comps_type}]" + .format(comps_type=type(comps).__name__)) if not is_list_like(values): raise TypeError("only list-like objects are allowed to be passed" - " to isin(), you passed a " - "[{0}]".format(type(values).__name__)) + " to isin(), you passed a [{values_type}]" + .format(values_type=type(values).__name__)) if not isinstance(values, (ABCIndex, ABCSeries, np.ndarray)): values = lib.list_to_object_array(list(values)) @@ -671,7 +671,7 @@ def mode(values): try: result = np.sort(result) except TypeError as e: - warn("Unable to sort modes: %s" % e) + warn("Unable to sort modes: {error}".format(error=e)) result = _reconstruct_data(result, original.dtype, original) return Series(result) diff --git a/pandas/core/base.py b/pandas/core/base.py index 4ae4736035793..a7c991dc8d257 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -342,24 +342,25 @@ def _obj_with_exclusions(self): def __getitem__(self, key): if self._selection is not None: - raise Exception('Column(s) %s already selected' % self._selection) + raise Exception('Column(s) {selection} already selected' + .format(selection=self._selection)) if isinstance(key, (list, tuple, ABCSeries, ABCIndexClass, np.ndarray)): if len(self.obj.columns.intersection(key)) != len(key): bad_keys = list(set(key).difference(self.obj.columns)) - raise KeyError("Columns not found: %s" - % str(bad_keys)[1:-1]) + raise KeyError("Columns not found: {missing}" + .format(missing=str(bad_keys)[1:-1])) return self._gotitem(list(key), ndim=2) elif not getattr(self, 'as_index', False): if key not in self.obj.columns: - raise KeyError("Column not found: %s" % key) + raise KeyError("Column not found: {key}".format(key=key)) return self._gotitem(key, ndim=2) else: if key not in self.obj: - raise KeyError("Column not found: %s" % key) + raise KeyError("Column not found: {key}".format(key=key)) return self._gotitem(key, ndim=1) def _gotitem(self, key, ndim, subset=None): @@ -409,7 +410,7 @@ def _try_aggregate_string_function(self, arg, *args, **kwargs): if f is not None: return f(self, *args, **kwargs) - raise ValueError("{} is an unknown string function".format(arg)) + raise ValueError("{arg} is an unknown string function".format(arg=arg)) def _aggregate(self, arg, *args, **kwargs): """ @@ -484,9 +485,9 @@ def nested_renaming_depr(level=4): is_nested_renamer = True if k not in obj.columns: - raise SpecificationError('cannot perform renaming ' - 'for {0} with a nested ' - 'dictionary'.format(k)) + msg = ('cannot perform renaming for {key} with a ' + 'nested dictionary').format(key=k) + raise SpecificationError(msg) nested_renaming_depr(4 + (_level or 0)) elif isinstance(obj, ABCSeries): diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 6ff4302937d07..53f58660cabdb 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -46,7 +46,8 @@ def _infer(a, b): if b and b.tzinfo: if not (tslib.get_timezone(tz) == tslib.get_timezone(b.tzinfo)): raise AssertionError('Inputs must both have the same timezone,' - ' {0} != {1}'.format(tz, b.tzinfo)) + ' {timezone1} != {timezone2}' + .format(timezone1=tz, timezone2=b.tzinfo)) return tz tz = None @@ -491,10 +492,10 @@ def _convert_listlike(arg, box, format, name=None, tz=tz): offset = tslib.Timestamp(origin) - tslib.Timestamp(0) except tslib.OutOfBoundsDatetime: raise tslib.OutOfBoundsDatetime( - "origin {} is Out of Bounds".format(origin)) + "origin {origin} is Out of Bounds".format(origin=origin)) except ValueError: - raise ValueError("origin {} cannot be converted " - "to a Timestamp".format(origin)) + raise ValueError("origin {origin} cannot be converted " + "to a Timestamp".format(origin=origin)) # convert the offset to the unit of the arg # this should be lossless in terms of precision @@ -590,16 +591,16 @@ def f(value): required = ['year', 'month', 'day'] req = sorted(list(set(required) - set(unit_rev.keys()))) if len(req): - raise ValueError("to assemble mappings requires at " - "least that [year, month, day] be specified: " - "[{0}] is missing".format(','.join(req))) + raise ValueError("to assemble mappings requires at least that " + "[year, month, day] be specified: [{required}] " + "is missing".format(required=','.join(req))) # keys we don't recognize excess = sorted(list(set(unit_rev.keys()) - set(_unit_map.values()))) if len(excess): raise ValueError("extra keys have been passed " "to the datetime assemblage: " - "[{0}]".format(','.join(excess))) + "[{excess}]".format(','.join(excess=excess))) def coerce(values): # we allow coercion to if errors allows @@ -617,7 +618,7 @@ def coerce(values): values = to_datetime(values, format='%Y%m%d', errors=errors) except (TypeError, ValueError) as e: raise ValueError("cannot assemble the " - "datetimes: {0}".format(e)) + "datetimes: {error}".format(error=e)) for u in ['h', 'm', 's', 'ms', 'us', 'ns']: value = unit_rev.get(u) @@ -627,8 +628,8 @@ def coerce(values): unit=u, errors=errors) except (TypeError, ValueError) as e: - raise ValueError("cannot assemble the datetimes " - "[{0}]: {1}".format(value, e)) + raise ValueError("cannot assemble the datetimes [{value}]: " + "{error}".format(value=value, error=e)) return values @@ -810,8 +811,10 @@ def _convert_listlike(arg, format): times.append(datetime.strptime(element, format).time()) except (ValueError, TypeError): if errors == 'raise': - raise ValueError("Cannot convert %s to a time with " - "given format %s" % (element, format)) + msg = ("Cannot convert {element} to a time with given " + "format {format}").format(element=element, + format=format) + raise ValueError(msg) elif errors == 'ignore': return arg else: @@ -876,6 +879,7 @@ def ole2datetime(oledt): # Excel has a bug where it thinks the date 2/29/1900 exists # we just reject any date before 3/1/1900. if val < 61: - raise ValueError("Value is outside of acceptable range: %s " % val) + msg = "Value is outside of acceptable range: {value}".format(value=val) + raise ValueError(msg) return OLE_TIME_ZERO + timedelta(days=val) diff --git a/pandas/core/tools/timedeltas.py b/pandas/core/tools/timedeltas.py index f2d99d26a87b8..d5132826bb93f 100644 --- a/pandas/core/tools/timedeltas.py +++ b/pandas/core/tools/timedeltas.py @@ -129,7 +129,8 @@ def _validate_timedelta_unit(arg): except: if arg is None: return 'ns' - raise ValueError("invalid timedelta unit {0} provided".format(arg)) + raise ValueError("invalid timedelta unit {arg} provided" + .format(arg=arg)) def _coerce_scalar_to_timedelta_type(r, unit='ns', box=True, errors='raise'): @@ -161,8 +162,8 @@ def _convert_listlike(arg, unit='ns', box=True, errors='raise', name=None): if is_timedelta64_dtype(arg): value = arg.astype('timedelta64[ns]') elif is_integer_dtype(arg): - value = arg.astype('timedelta64[{0}]'.format( - unit)).astype('timedelta64[ns]', copy=False) + value = arg.astype('timedelta64[{unit}]'.format(unit=unit)).astype( + 'timedelta64[ns]', copy=False) else: try: value = tslib.array_to_timedelta64(_ensure_object(arg), From 2bec750b21b8715e3f55e71a6c69f2abef54d08b Mon Sep 17 00:00:00 2001 From: ante328 Date: Tue, 22 Aug 2017 16:31:14 +0200 Subject: [PATCH 914/933] BUG: Fix strange behaviour of Series.iloc on MultiIndex Series (#17148) (#17291) --- doc/source/whatsnew/v0.21.0.txt | 1 + pandas/core/indexing.py | 3 ++- pandas/tests/indexing/test_iloc.py | 29 +++++++++++++++++++++++++++++ 3 files changed, 32 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 148fd0a832402..f760d0b6359a2 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -353,6 +353,7 @@ Indexing - Fixes ``DataFrame.loc`` for setting with alignment and tz-aware ``DatetimeIndex`` (:issue:`16889`) - Avoids ``IndexError`` when passing an Index or Series to ``.iloc`` with older numpy (:issue:`17193`) - Allow unicode empty strings as placeholders in multilevel columns in Python 2 (:issue:`17099`) +- Bug in ``.iloc`` when used with inplace addition or assignment and an int indexer on a ``MultiIndex`` causing the wrong indexes to be read from and written to (:issue:`17148`) I/O ^^^ diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 929c2346ba5b0..6b9ad5cd2d93b 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -146,7 +146,8 @@ def _get_setitem_indexer(self, key): return self._convert_tuple(key, is_setter=True) axis = self.obj._get_axis(0) - if isinstance(axis, MultiIndex): + + if isinstance(axis, MultiIndex) and self.name != 'iloc': try: return axis.get_loc(key) except Exception: diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index 31fee303a41e2..39569f0b0cb38 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -269,6 +269,35 @@ def test_iloc_setitem(self): expected = Series([0, 1, 0], index=[4, 5, 6]) tm.assert_series_equal(s, expected) + @pytest.mark.parametrize( + 'data, indexes, values, expected_k', [ + # test without indexer value in first level of MultiIndex + ([[2, 22, 5], [2, 33, 6]], [0, -1, 1], [2, 3, 1], [7, 10]), + # test like code sample 1 in the issue + ([[1, 22, 555], [1, 33, 666]], [0, -1, 1], [200, 300, 100], + [755, 1066]), + # test like code sample 2 in the issue + ([[1, 3, 7], [2, 4, 8]], [0, -1, 1], [10, 10, 1000], [17, 1018]), + # test like code sample 3 in the issue + ([[1, 11, 4], [2, 22, 5], [3, 33, 6]], [0, -1, 1], [4, 7, 10], + [8, 15, 13]) + ]) + def test_iloc_setitem_int_multiindex_series( + self, data, indexes, values, expected_k): + # GH17148 + df = pd.DataFrame( + data=data, + columns=['i', 'j', 'k']) + df = df.set_index(['i', 'j']) + + series = df.k.copy() + for i, v in zip(indexes, values): + series.iloc[i] += v + + df['k'] = expected_k + expected = df.k + tm.assert_series_equal(series, expected) + def test_iloc_setitem_list(self): # setitem with an iloc list From 0cf2b146c526fe85e2df45b1c5e80da404c9d58f Mon Sep 17 00:00:00 2001 From: gfyoung Date: Wed, 23 Aug 2017 13:30:34 -0700 Subject: [PATCH 915/933] DOC: Add module doc-string to tseries/api.py --- pandas/tseries/api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tseries/api.py b/pandas/tseries/api.py index 71386c02547ba..2094791ecdc60 100644 --- a/pandas/tseries/api.py +++ b/pandas/tseries/api.py @@ -1,5 +1,5 @@ """ - +Timeseries API """ # flake8: noqa From 66ec5f3e616f6449ef2c88401042cf2a282234d7 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Wed, 23 Aug 2017 14:35:49 -0700 Subject: [PATCH 916/933] MAINT: Clean up docs in pandas/errors/__init__.py --- pandas/errors/__init__.py | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py index 6304f3a527f2c..42b3bdd4991a9 100644 --- a/pandas/errors/__init__.py +++ b/pandas/errors/__init__.py @@ -1,25 +1,28 @@ # flake8: noqa -""" expose public exceptions & warnings """ +""" +Expose public exceptions & warnings +""" from pandas._libs.tslib import OutOfBoundsDatetime class PerformanceWarning(Warning): """ - Warnings shown when there is a possible performance - impact. + Warning raised when there is a possible + performance impact. """ class UnsupportedFunctionCall(ValueError): """ - If attempting to call a numpy function on a pandas - object. For example using ``np.cumsum(groupby_object)``. + Exception raised when attempting to call a numpy function + on a pandas object, but that function is not supported by + the object e.g. ``np.cumsum(groupby_object)``. """ class UnsortedIndexError(KeyError): """ - Error raised when attempting to get a slice of a MultiIndex + Error raised when attempting to get a slice of a MultiIndex, and the index has not been lexsorted. Subclass of `KeyError`. .. versionadded:: 0.20.0 @@ -29,22 +32,22 @@ class UnsortedIndexError(KeyError): class ParserError(ValueError): """ - Exception that is thrown by an error is encountered in `pd.read_csv` + Exception that is raised by an error encountered in `pd.read_csv`. """ class DtypeWarning(Warning): """ - Warning that is raised for a dtype incompatiblity. This is + Warning that is raised for a dtype incompatiblity. This can happen whenever `pd.read_csv` encounters non- - uniform dtypes in a column(s) of a given CSV file + uniform dtypes in a column(s) of a given CSV file. """ class EmptyDataError(ValueError): """ Exception that is thrown in `pd.read_csv` (by both the C and - Python engines) when empty data or header is encountered + Python engines) when empty data or header is encountered. """ @@ -53,7 +56,7 @@ class ParserWarning(Warning): Warning that is raised in `pd.read_csv` whenever it is necessary to change parsers (generally from 'c' to 'python') contrary to the one specified by the user due to lack of support or functionality for - parsing particular attributes of a CSV file with the requsted engine + parsing particular attributes of a CSV file with the requsted engine. """ @@ -61,5 +64,4 @@ class MergeError(ValueError): """ Error raised when problems arise during merging due to problems with input data. Subclass of `ValueError`. - """ From d45e12b87ce867b2df3254c386c0f17f175efbf0 Mon Sep 17 00:00:00 2001 From: jschendel Date: Thu, 24 Aug 2017 03:50:18 -0600 Subject: [PATCH 917/933] CLN: replace %s syntax with .format in missing.py, nanops.py, ops.py (#17322) Replaced %s syntax with .format in missing.py, nanops.py, ops.py. Additionally, made some of the existing positional .format code more explicit. --- pandas/core/missing.py | 41 +++++++++++++-------- pandas/core/nanops.py | 8 ++-- pandas/core/ops.py | 84 +++++++++++++++++++++++------------------- 3 files changed, 76 insertions(+), 57 deletions(-) diff --git a/pandas/core/missing.py b/pandas/core/missing.py index 93281e20a2a96..8a6a870834c83 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -88,8 +88,8 @@ def clean_fill_method(method, allow_nearest=False): valid_methods.append('nearest') expecting = 'pad (ffill), backfill (bfill) or nearest' if method not in valid_methods: - msg = ('Invalid fill method. Expecting %s. Got %s' % - (expecting, method)) + msg = ('Invalid fill method. Expecting {expecting}. Got {method}' + .format(expecting=expecting, method=method)) raise ValueError(msg) return method @@ -104,8 +104,8 @@ def clean_interp_method(method, **kwargs): raise ValueError("You must specify the order of the spline or " "polynomial.") if method not in valid: - raise ValueError("method must be one of {0}." - "Got '{1}' instead.".format(valid, method)) + raise ValueError("method must be one of {valid}. Got '{method}' " + "instead.".format(valid=valid, method=method)) return method @@ -146,8 +146,10 @@ def interpolate_1d(xvalues, yvalues, method='linear', limit=None, valid_limit_directions = ['forward', 'backward', 'both'] limit_direction = limit_direction.lower() if limit_direction not in valid_limit_directions: - raise ValueError('Invalid limit_direction: expecting one of %r, got ' - '%r.' % (valid_limit_directions, limit_direction)) + msg = ('Invalid limit_direction: expecting one of {valid!r}, ' + 'got {invalid!r}.') + raise ValueError(msg.format(valid=valid_limit_directions, + invalid=limit_direction)) from pandas import Series ys = Series(yvalues) @@ -248,7 +250,8 @@ def _interpolate_scipy_wrapper(x, y, new_x, method, fill_value=None, # TODO: Why is DatetimeIndex being imported here? from pandas import DatetimeIndex # noqa except ImportError: - raise ImportError('{0} interpolation requires Scipy'.format(method)) + raise ImportError('{method} interpolation requires SciPy' + .format(method=method)) new_x = np.asarray(new_x) @@ -466,7 +469,8 @@ def pad_1d(values, limit=None, mask=None, dtype=None): dtype = values.dtype _method = None if is_float_dtype(values): - _method = getattr(algos, 'pad_inplace_%s' % dtype.name, None) + name = 'pad_inplace_{name}'.format(name=dtype.name) + _method = getattr(algos, name, None) elif is_datetime64_dtype(dtype) or is_datetime64tz_dtype(dtype): _method = _pad_1d_datetime elif is_integer_dtype(values): @@ -476,7 +480,8 @@ def pad_1d(values, limit=None, mask=None, dtype=None): _method = algos.pad_inplace_object if _method is None: - raise ValueError('Invalid dtype for pad_1d [%s]' % dtype.name) + raise ValueError('Invalid dtype for pad_1d [{name}]' + .format(name=dtype.name)) if mask is None: mask = isna(values) @@ -490,7 +495,8 @@ def backfill_1d(values, limit=None, mask=None, dtype=None): dtype = values.dtype _method = None if is_float_dtype(values): - _method = getattr(algos, 'backfill_inplace_%s' % dtype.name, None) + name = 'backfill_inplace_{name}'.format(name=dtype.name) + _method = getattr(algos, name, None) elif is_datetime64_dtype(dtype) or is_datetime64tz_dtype(dtype): _method = _backfill_1d_datetime elif is_integer_dtype(values): @@ -500,7 +506,8 @@ def backfill_1d(values, limit=None, mask=None, dtype=None): _method = algos.backfill_inplace_object if _method is None: - raise ValueError('Invalid dtype for backfill_1d [%s]' % dtype.name) + raise ValueError('Invalid dtype for backfill_1d [{name}]' + .format(name=dtype.name)) if mask is None: mask = isna(values) @@ -515,7 +522,8 @@ def pad_2d(values, limit=None, mask=None, dtype=None): dtype = values.dtype _method = None if is_float_dtype(values): - _method = getattr(algos, 'pad_2d_inplace_%s' % dtype.name, None) + name = 'pad_2d_inplace_{name}'.format(name=dtype.name) + _method = getattr(algos, name, None) elif is_datetime64_dtype(dtype) or is_datetime64tz_dtype(dtype): _method = _pad_2d_datetime elif is_integer_dtype(values): @@ -525,7 +533,8 @@ def pad_2d(values, limit=None, mask=None, dtype=None): _method = algos.pad_2d_inplace_object if _method is None: - raise ValueError('Invalid dtype for pad_2d [%s]' % dtype.name) + raise ValueError('Invalid dtype for pad_2d [{name}]' + .format(name=dtype.name)) if mask is None: mask = isna(values) @@ -544,7 +553,8 @@ def backfill_2d(values, limit=None, mask=None, dtype=None): dtype = values.dtype _method = None if is_float_dtype(values): - _method = getattr(algos, 'backfill_2d_inplace_%s' % dtype.name, None) + name = 'backfill_2d_inplace_{name}'.format(name=dtype.name) + _method = getattr(algos, name, None) elif is_datetime64_dtype(dtype) or is_datetime64tz_dtype(dtype): _method = _backfill_2d_datetime elif is_integer_dtype(values): @@ -554,7 +564,8 @@ def backfill_2d(values, limit=None, mask=None, dtype=None): _method = algos.backfill_2d_inplace_object if _method is None: - raise ValueError('Invalid dtype for backfill_2d [%s]' % dtype.name) + raise ValueError('Invalid dtype for backfill_2d [{name}]' + .format(name=dtype.name)) if mask is None: mask = isna(values) diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index b2bbf1c75b7ea..858aed7fd3e23 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -70,9 +70,8 @@ def __call__(self, f): def _f(*args, **kwargs): obj_iter = itertools.chain(args, compat.itervalues(kwargs)) if any(self.check(obj) for obj in obj_iter): - raise TypeError('reduction operation {0!r} not allowed for ' - 'this dtype'.format( - f.__name__.replace('nan', ''))) + msg = 'reduction operation {name!r} not allowed for this dtype' + raise TypeError(msg.format(name=f.__name__.replace('nan', ''))) try: with np.errstate(invalid='ignore'): return f(*args, **kwargs) @@ -786,7 +785,8 @@ def _ensure_numeric(x): try: x = complex(x) except Exception: - raise TypeError('Could not convert %s to numeric' % str(x)) + raise TypeError('Could not convert {value!s} to numeric' + .format(value=x)) return x # NA-friendly array comparisons diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 82101414e4aa6..221f6ff8b92c6 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -63,9 +63,9 @@ def _create_methods(arith_method, comp_method, bool_method, def names(x): if x[-1] == "_": - return "__%s_" % x + return "__{name}_".format(name=x) else: - return "__%s__" % x + return "__{name}__".format(name=x) else: names = lambda x: x @@ -388,8 +388,8 @@ def _validate(self, lvalues, rvalues, name): if name not in ('__div__', '__truediv__', '__mul__', '__rmul__'): raise TypeError("can only operate on a timedelta and an " "integer or a float for division and " - "multiplication, but the operator [%s] was" - "passed" % name) + "multiplication, but the operator [{name}] " + "was passed".format(name=name)) # 2 timedeltas elif ((self.is_timedelta_lhs and @@ -400,9 +400,9 @@ def _validate(self, lvalues, rvalues, name): if name not in ('__div__', '__rdiv__', '__truediv__', '__rtruediv__', '__add__', '__radd__', '__sub__', '__rsub__'): - raise TypeError("can only operate on a timedeltas for " - "addition, subtraction, and division, but the" - " operator [%s] was passed" % name) + raise TypeError("can only operate on a timedeltas for addition" + ", subtraction, and division, but the operator" + " [{name}] was passed".format(name=name)) # datetime and timedelta/DateOffset elif (self.is_datetime_lhs and @@ -411,23 +411,24 @@ def _validate(self, lvalues, rvalues, name): if name not in ('__add__', '__radd__', '__sub__'): raise TypeError("can only operate on a datetime with a rhs of " "a timedelta/DateOffset for addition and " - "subtraction, but the operator [%s] was " - "passed" % name) + "subtraction, but the operator [{name}] was " + "passed".format(name=name)) elif (self.is_datetime_rhs and (self.is_timedelta_lhs or self.is_offset_lhs)): if name not in ('__add__', '__radd__', '__rsub__'): raise TypeError("can only operate on a timedelta/DateOffset " "with a rhs of a datetime for addition, " - "but the operator [%s] was passed" % name) + "but the operator [{name}] was passed" + .format(name=name)) # 2 datetimes elif self.is_datetime_lhs and self.is_datetime_rhs: if name not in ('__sub__', '__rsub__'): raise TypeError("can only operate on a datetimes for" - " subtraction, but the operator [%s] was" - " passed" % name) + " subtraction, but the operator [{name}] was" + " passed".format(name=name)) # if tz's must be equal (same or None) if getattr(lvalues, 'tz', None) != getattr(rvalues, 'tz', None): @@ -439,8 +440,8 @@ def _validate(self, lvalues, rvalues, name): if name not in ('__add__', '__radd__'): raise TypeError("can only operate on a timedelta/DateOffset " - "and a datetime for addition, but the " - "operator [%s] was passed" % name) + "and a datetime for addition, but the operator" + " [{name}] was passed".format(name=name)) else: raise TypeError('cannot operate on a series without a rhs ' 'of a series/ndarray of type datetime64[ns] ' @@ -498,7 +499,7 @@ def _convert_to_array(self, values, name=None, other=None): values = values.to_timestamp().to_series() elif name not in ('__truediv__', '__div__', '__mul__', '__rmul__'): raise TypeError("incompatible type for a datetime/timedelta " - "operation [{0}]".format(name)) + "operation [{name}]".format(name=name)) elif inferred_type == 'floating': if (isna(values).all() and name in ('__add__', '__radd__', '__sub__', '__rsub__')): @@ -508,8 +509,9 @@ def _convert_to_array(self, values, name=None, other=None): elif self._is_offset(values): return values else: - raise TypeError("incompatible type [{0}] for a datetime/timedelta" - " operation".format(np.array(values).dtype)) + raise TypeError("incompatible type [{dtype}] for a " + "datetime/timedelta operation" + .format(dtype=np.array(values).dtype)) return values @@ -866,8 +868,8 @@ def wrapper(self, other, axis=None): with np.errstate(all='ignore'): res = na_op(values, other) if is_scalar(res): - raise TypeError('Could not compare %s type with Series' % - type(other)) + raise TypeError('Could not compare {typ} type with Series' + .format(typ=type(other))) # always return a full value series here res = _values_from_object(res) @@ -906,9 +908,10 @@ def na_op(x, y): y = bool(y) result = lib.scalar_binop(x, y, op) except: - raise TypeError("cannot compare a dtyped [{0}] array with " - "a scalar of type [{1}]".format( - x.dtype, type(y).__name__)) + msg = ("cannot compare a dtyped [{dtype}] array " + "with a scalar of type [{type}]" + ).format(dtype=x.dtype, type=type(y).__name__) + raise TypeError(msg) return result @@ -1140,14 +1143,17 @@ def _align_method_FRAME(left, right, axis): """ convert rhs to meet lhs dims if input is list, tuple or np.ndarray """ def to_series(right): - msg = 'Unable to coerce to Series, length must be {0}: given {1}' + msg = ('Unable to coerce to Series, length must be {req_len}: ' + 'given {given_len}') if axis is not None and left._get_axis_name(axis) == 'index': if len(left.index) != len(right): - raise ValueError(msg.format(len(left.index), len(right))) + raise ValueError(msg.format(req_len=len(left.index), + given_len=len(right))) right = left._constructor_sliced(right, index=left.index) else: if len(left.columns) != len(right): - raise ValueError(msg.format(len(left.columns), len(right))) + raise ValueError(msg.format(req_len=len(left.columns), + given_len=len(right))) right = left._constructor_sliced(right, index=left.columns) return right @@ -1161,15 +1167,16 @@ def to_series(right): elif right.ndim == 2: if left.shape != right.shape: - msg = ("Unable to coerce to DataFrame, " - "shape must be {0}: given {1}") - raise ValueError(msg.format(left.shape, right.shape)) + msg = ("Unable to coerce to DataFrame, shape " + "must be {req_shape}: given {given_shape}" + ).format(req_shape=left.shape, given_shape=right.shape) + raise ValueError(msg) right = left._constructor(right, index=left.index, columns=left.columns) else: - msg = 'Unable to coerce to Series/DataFrame, dim must be <= 2: {0}' - raise ValueError(msg.format(right.shape, )) + raise ValueError('Unable to coerce to Series/DataFrame, dim ' + 'must be <= 2: {dim}'.format(dim=right.shape)) return right @@ -1278,7 +1285,8 @@ def na_op(x, y): return result - @Appender('Wrapper for flexible comparison methods %s' % name) + @Appender('Wrapper for flexible comparison methods {name}' + .format(name=name)) def f(self, other, axis=default_axis, level=None): other = _align_method_FRAME(self, other, axis) @@ -1299,7 +1307,7 @@ def f(self, other, axis=default_axis, level=None): def _comp_method_FRAME(func, name, str_rep, masker=False): - @Appender('Wrapper for comparison method %s' % name) + @Appender('Wrapper for comparison method {name}'.format(name=name)) def f(self, other): if isinstance(other, pd.DataFrame): # Another DataFrame return self._compare_frame(other, func, str_rep) @@ -1349,9 +1357,9 @@ def na_op(x, y): # work only for scalars def f(self, other): if not is_scalar(other): - raise ValueError('Simple arithmetic with %s can only be ' - 'done with scalar values' % - self._constructor.__name__) + raise ValueError('Simple arithmetic with {name} can only be ' + 'done with scalar values' + .format(name=self._constructor.__name__)) return self._combine(other, op) @@ -1384,7 +1392,7 @@ def na_op(x, y): return result - @Appender('Wrapper for comparison method %s' % name) + @Appender('Wrapper for comparison method {name}'.format(name=name)) def f(self, other, axis=None): # Validate the axis parameter if axis is not None: @@ -1394,8 +1402,8 @@ def f(self, other, axis=None): return self._compare_constructor(other, na_op, try_cast=False) elif isinstance(other, (self._constructor_sliced, pd.DataFrame, ABCSeries)): - raise Exception("input needs alignment for this object [%s]" % - self._constructor) + raise Exception("input needs alignment for this object [{object}]" + .format(object=self._constructor)) else: return self._combine_const(other, na_op, try_cast=False) From 6993c1ba981554cdd8f45675db5807077a28e2c0 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 24 Aug 2017 03:03:18 -0700 Subject: [PATCH 918/933] Make pd.Period immutable (#17239) --- doc/source/whatsnew/v0.21.0.txt | 2 ++ pandas/_libs/period.pyx | 17 +++++++++++------ pandas/tests/scalar/test_period.py | 11 +++++++++++ 3 files changed, 24 insertions(+), 6 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index f760d0b6359a2..604d275511fa0 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -291,6 +291,8 @@ Other API Changes - Moved definition of ``MergeError`` to the ``pandas.errors`` module. - The signature of :func:`Series.set_axis` and :func:`DataFrame.set_axis` has been changed from ``set_axis(axis, labels)`` to ``set_axis(labels, axis=0)``, for consistency with the rest of the API. The old signature is deprecated and will show a ``FutureWarning`` (:issue:`14636`) - :func:`Series.argmin` and :func:`Series.argmax` will now raise a ``TypeError`` when used with ``object`` dtypes, instead of a ``ValueError`` (:issue:`13595`) +- :class:`Period` is now immutable, and will now raise an ``AttributeError`` when a user tries to assign a new value to the ``ordinal`` or ``freq`` attributes (:issue:`17116`). + .. _whatsnew_0210.deprecations: diff --git a/pandas/_libs/period.pyx b/pandas/_libs/period.pyx index 6ba7ec0270f30..a1d04fea89151 100644 --- a/pandas/_libs/period.pyx +++ b/pandas/_libs/period.pyx @@ -29,7 +29,9 @@ from datetime cimport ( PANDAS_FR_ns, INT32_MIN) + cimport util, lib + from lib cimport is_null_datetimelike, is_period from pandas._libs import tslib, lib from pandas._libs.tslib import (Timedelta, Timestamp, iNaT, @@ -668,13 +670,17 @@ class IncompatibleFrequency(ValueError): cdef class _Period(object): - cdef public: + cdef readonly: int64_t ordinal object freq _comparables = ['name', 'freqstr'] _typ = 'period' + def __cinit__(self, ordinal, freq): + self.ordinal = ordinal + self.freq = freq + @classmethod def _maybe_convert_freq(cls, object freq): @@ -698,9 +704,8 @@ cdef class _Period(object): if ordinal == iNaT: return NaT else: - self = _Period.__new__(cls) - self.ordinal = ordinal - self.freq = cls._maybe_convert_freq(freq) + freq = cls._maybe_convert_freq(freq) + self = _Period.__new__(cls, ordinal, freq) return self def __richcmp__(self, other, op): @@ -752,7 +757,7 @@ cdef class _Period(object): def __add__(self, other): if isinstance(self, Period): if isinstance(other, (timedelta, np.timedelta64, - offsets.Tick, offsets.DateOffset, + offsets.DateOffset, Timedelta)): return self._add_delta(other) elif other is NaT: @@ -770,7 +775,7 @@ cdef class _Period(object): def __sub__(self, other): if isinstance(self, Period): if isinstance(other, (timedelta, np.timedelta64, - offsets.Tick, offsets.DateOffset, + offsets.DateOffset, Timedelta)): neg_other = -other return self + neg_other diff --git a/pandas/tests/scalar/test_period.py b/pandas/tests/scalar/test_period.py index 931d6b2b8f1f0..a167c9c738b0b 100644 --- a/pandas/tests/scalar/test_period.py +++ b/pandas/tests/scalar/test_period.py @@ -1406,3 +1406,14 @@ def test_period_ops_offset(self): with tm.assert_raises_regex(period.IncompatibleFrequency, msg): p - offsets.Hour(2) + + +def test_period_immutable(): + # see gh-17116 + per = pd.Period('2014Q1') + with pytest.raises(AttributeError): + per.ordinal = 14 + + freq = per.freq + with pytest.raises(AttributeError): + per.freq = 2 * freq From 62527c0f328caa4ae716328246df75a6f2b33028 Mon Sep 17 00:00:00 2001 From: P-Tillmann Date: Thu, 24 Aug 2017 12:38:27 +0200 Subject: [PATCH 919/933] Bug: groupby multiindex levels equals rows (#16859) closes #16843 --- doc/source/whatsnew/v0.21.0.txt | 2 +- pandas/core/groupby.py | 9 +++++---- pandas/tests/groupby/test_groupby.py | 13 +++++++++++++ 3 files changed, 19 insertions(+), 5 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 604d275511fa0..6317b4ae84565 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -383,7 +383,7 @@ Groupby/Resample/Rolling - Bug in ``groupby.transform()`` that would coerce boolean dtypes back to float (:issue:`16875`) - Bug in ``Series.resample(...).apply()`` where an empty ``Series`` modified the source index and did not return the name of a ``Series`` (:issue:`14313`) - Bug in ``.rolling(...).apply(...)`` with a ``DataFrame`` with a ``DatetimeIndex``, a ``window`` of a timedelta-convertible and ``min_periods >= 1` (:issue:`15305`) - +- Bug in ``DataFrame.groupby`` where index and column keys were not recognized correctly when the number of keys equaled the number of elements on the groupby axis (:issue:`16859`) Sparse ^^^^^^ diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index aa7c4517c0a01..c23b00dc740a4 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -2629,13 +2629,14 @@ def _get_grouper(obj, key=None, axis=0, level=None, sort=True, try: if isinstance(obj, DataFrame): - all_in_columns = all(g in obj.columns for g in keys) + all_in_columns_index = all(g in obj.columns or g in obj.index.names + for g in keys) else: - all_in_columns = False + all_in_columns_index = False except Exception: - all_in_columns = False + all_in_columns_index = False - if not any_callable and not all_in_columns and \ + if not any_callable and not all_in_columns_index and \ not any_arraylike and not any_groupers and \ match_axis_length and level is None: keys = [com._asarray_tuplesafe(keys)] diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index f9e1a0d2e744a..8957beacab376 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -3891,6 +3891,19 @@ def predictions(tool): result = df2.groupby('Key').apply(predictions).p1 tm.assert_series_equal(expected, result) + def test_gb_key_len_equal_axis_len(self): + # GH16843 + # test ensures that index and column keys are recognized correctly + # when number of keys equals axis length of groupby + df = pd.DataFrame([['foo', 'bar', 'B', 1], + ['foo', 'bar', 'B', 2], + ['foo', 'baz', 'C', 3]], + columns=['first', 'second', 'third', 'one']) + df = df.set_index(['first', 'second']) + df = df.groupby(['first', 'second', 'third']).size() + assert df.loc[('foo', 'bar', 'B')] == 2 + assert df.loc[('foo', 'baz', 'C')] == 1 + def _check_groupby(df, result, keys, field, f=lambda x: x.sum()): tups = lmap(tuple, df[keys].values) From 96f92eb1c696723b6465fdc273dc8406201c606a Mon Sep 17 00:00:00 2001 From: step4me Date: Thu, 24 Aug 2017 08:53:50 -0400 Subject: [PATCH 920/933] BUG: Cannot use tz-aware origin in to_datetime (#16842) closes #16842 Author: step4me Closes #17244 from step4me/step4me-feature and squashes the following commits: 09d051d48 [step4me] BUG: Cannot use tz-aware origin in to_datetime (#16842) --- doc/source/whatsnew/v0.21.0.txt | 5 +++-- pandas/core/tools/datetimes.py | 7 ++++++- pandas/tests/indexes/datetimes/test_tools.py | 6 ++++++ 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 6317b4ae84565..fcadd26156b1d 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -292,6 +292,7 @@ Other API Changes - The signature of :func:`Series.set_axis` and :func:`DataFrame.set_axis` has been changed from ``set_axis(axis, labels)`` to ``set_axis(labels, axis=0)``, for consistency with the rest of the API. The old signature is deprecated and will show a ``FutureWarning`` (:issue:`14636`) - :func:`Series.argmin` and :func:`Series.argmax` will now raise a ``TypeError`` when used with ``object`` dtypes, instead of a ``ValueError`` (:issue:`13595`) - :class:`Period` is now immutable, and will now raise an ``AttributeError`` when a user tries to assign a new value to the ``ordinal`` or ``freq`` attributes (:issue:`17116`). +- :func:`to_datetime` when passed a tz-aware ``origin=`` kwarg will now raise a more informative ``ValueError`` rather than a ``TypeError`` (:issue:`16842`) .. _whatsnew_0210.deprecations: @@ -356,6 +357,7 @@ Indexing - Avoids ``IndexError`` when passing an Index or Series to ``.iloc`` with older numpy (:issue:`17193`) - Allow unicode empty strings as placeholders in multilevel columns in Python 2 (:issue:`17099`) - Bug in ``.iloc`` when used with inplace addition or assignment and an int indexer on a ``MultiIndex`` causing the wrong indexes to be read from and written to (:issue:`17148`) +- Bug in ``.isin()`` in which checking membership in empty ``Series`` objects raised an error (:issue:`16991`) I/O ^^^ @@ -402,6 +404,7 @@ Reshaping - Fixes dtype of result with integer dtype input, from :func:`pivot_table` when called with ``margins=True`` (:issue:`17013`) - Bug in :func:`crosstab` where passing two ``Series`` with the same name raised a ``KeyError`` (:issue:`13279`) - :func:`Series.argmin`, :func:`Series.argmax`, and their counterparts on ``DataFrame`` and groupby objects work correctly with floating point data that contains infinite values (:issue:`13595`). +- Bug in :func:`unique` where checking a tuple of strings raised a ``TypeError`` (:issue:`17108`) Numeric ^^^^^^^ @@ -420,5 +423,3 @@ Categorical Other ^^^^^ - Bug in :func:`eval` where the ``inplace`` parameter was being incorrectly handled (:issue:`16732`) -- Bug in ``.isin()`` in which checking membership in empty ``Series`` objects raised an error (:issue:`16991`) -- Bug in :func:`unique` where checking a tuple of strings raised a ``TypeError`` (:issue:`17108`) diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 53f58660cabdb..c0f234a36803d 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -489,7 +489,7 @@ def _convert_listlike(arg, box, format, name=None, tz=tz): # we are going to offset back to unix / epoch time try: - offset = tslib.Timestamp(origin) - tslib.Timestamp(0) + offset = tslib.Timestamp(origin) except tslib.OutOfBoundsDatetime: raise tslib.OutOfBoundsDatetime( "origin {origin} is Out of Bounds".format(origin=origin)) @@ -497,6 +497,11 @@ def _convert_listlike(arg, box, format, name=None, tz=tz): raise ValueError("origin {origin} cannot be converted " "to a Timestamp".format(origin=origin)) + if offset.tz is not None: + raise ValueError( + "origin offset {} must be tz-naive".format(offset)) + offset -= tslib.Timestamp(0) + # convert the offset to the unit of the arg # this should be lossless in terms of precision offset = offset // tslib.Timedelta(1, unit=unit) diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index 9764b65d330af..50669ee357bbd 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -1589,6 +1589,12 @@ def test_invalid_origins(self, origin, exc, units, units_from_epochs): pd.to_datetime(units_from_epochs, unit=units, origin=origin) + def test_invalid_origins_tzinfo(self): + # GH16842 + with pytest.raises(ValueError): + pd.to_datetime(1, unit='D', + origin=datetime(2000, 1, 1, tzinfo=pytz.utc)) + def test_processing_order(self): # make sure we handle out-of-bounds *before* # constructing the dates From 473a7f3c186f6b0bfd9d3ce413fb627cf7a8f111 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 25 Aug 2017 13:29:57 -0700 Subject: [PATCH 921/933] Replace usage of total_seconds compat func with timedelta method (#17289) --- pandas/_libs/period.pyx | 7 ++--- pandas/_libs/src/datetime_helper.h | 36 ----------------------- pandas/_libs/src/ujson/python/objToJSON.c | 22 +++++++++++++- pandas/_libs/tslib.pyx | 28 +++++++----------- pandas/io/pytables.py | 2 +- pandas/tseries/offsets.py | 6 ++-- setup.py | 2 -- 7 files changed, 38 insertions(+), 65 deletions(-) delete mode 100644 pandas/_libs/src/datetime_helper.h diff --git a/pandas/_libs/period.pyx b/pandas/_libs/period.pyx index a1d04fea89151..816b7ebfff86d 100644 --- a/pandas/_libs/period.pyx +++ b/pandas/_libs/period.pyx @@ -10,9 +10,6 @@ from numpy cimport (int8_t, int32_t, int64_t, import_array, ndarray, NPY_INT64, NPY_DATETIME, NPY_TIMEDELTA) import numpy as np -cdef extern from "datetime_helper.h": - double total_seconds(object) - from libc.stdlib cimport free from pandas import compat @@ -552,7 +549,7 @@ cdef _reso_local(ndarray[int64_t] stamps, object tz): &dts) dt = datetime(dts.year, dts.month, dts.day, dts.hour, dts.min, dts.sec, dts.us, tz) - delta = int(total_seconds(_get_utcoffset(tz, dt))) * 1000000000 + delta = int(_get_utcoffset(tz, dt).total_seconds()) * 1000000000 pandas_datetime_to_datetimestruct(stamps[i] + delta, PANDAS_FR_ns, &dts) curr_reso = _reso_stamp(&dts) @@ -619,7 +616,7 @@ cdef ndarray[int64_t] localize_dt64arr_to_period(ndarray[int64_t] stamps, &dts) dt = datetime(dts.year, dts.month, dts.day, dts.hour, dts.min, dts.sec, dts.us, tz) - delta = int(total_seconds(_get_utcoffset(tz, dt))) * 1000000000 + delta = int(_get_utcoffset(tz, dt).total_seconds()) * 1000000000 pandas_datetime_to_datetimestruct(stamps[i] + delta, PANDAS_FR_ns, &dts) result[i] = get_period_ordinal(dts.year, dts.month, dts.day, diff --git a/pandas/_libs/src/datetime_helper.h b/pandas/_libs/src/datetime_helper.h deleted file mode 100644 index 8023285f85b9b..0000000000000 --- a/pandas/_libs/src/datetime_helper.h +++ /dev/null @@ -1,36 +0,0 @@ -/* -Copyright (c) 2016, PyData Development Team -All rights reserved. - -Distributed under the terms of the BSD Simplified License. - -The full license is in the LICENSE file, distributed with this software. -*/ - -#ifndef PANDAS__LIBS_SRC_DATETIME_HELPER_H_ -#define PANDAS__LIBS_SRC_DATETIME_HELPER_H_ - -#include -#include "datetime.h" -#include "numpy/arrayobject.h" -#include "numpy/arrayscalars.h" - -npy_int64 get_long_attr(PyObject *o, const char *attr) { - npy_int64 long_val; - PyObject *value = PyObject_GetAttrString(o, attr); - long_val = (PyLong_Check(value) ? - PyLong_AsLongLong(value) : PyInt_AS_LONG(value)); - Py_DECREF(value); - return long_val; -} - -npy_float64 total_seconds(PyObject *td) { - // Python 2.6 compat - npy_int64 microseconds = get_long_attr(td, "microseconds"); - npy_int64 seconds = get_long_attr(td, "seconds"); - npy_int64 days = get_long_attr(td, "days"); - npy_int64 days_in_seconds = days * 24LL * 3600LL; - return (microseconds + (seconds + days_in_seconds) * 1000000.0) / 1000000.0; -} - -#endif // PANDAS__LIBS_SRC_DATETIME_HELPER_H_ diff --git a/pandas/_libs/src/ujson/python/objToJSON.c b/pandas/_libs/src/ujson/python/objToJSON.c index f2c0b18d35131..4beaa3fd449df 100644 --- a/pandas/_libs/src/ujson/python/objToJSON.c +++ b/pandas/_libs/src/ujson/python/objToJSON.c @@ -47,9 +47,9 @@ Numeric decoder derived from from TCL library #include // NOLINT(build/include_order) #include // NOLINT(build/include_order) #include // NOLINT(build/include_order) -#include // NOLINT(build/include_order) #include // NOLINT(build/include_order) #include // NOLINT(build/include_order) +#include "datetime.h" static PyObject *type_decimal; @@ -329,6 +329,26 @@ static Py_ssize_t get_attr_length(PyObject *obj, char *attr) { return ret; } +npy_int64 get_long_attr(PyObject *o, const char *attr) { + npy_int64 long_val; + PyObject *value = PyObject_GetAttrString(o, attr); + long_val = (PyLong_Check(value) ? + PyLong_AsLongLong(value) : PyInt_AS_LONG(value)); + Py_DECREF(value); + return long_val; +} + +npy_float64 total_seconds(PyObject *td) { + // Python 2.6 compat + // TODO(anyone): remove this legacy workaround with a more + // direct td.total_seconds() + npy_int64 microseconds = get_long_attr(td, "microseconds"); + npy_int64 seconds = get_long_attr(td, "seconds"); + npy_int64 days = get_long_attr(td, "days"); + npy_int64 days_in_seconds = days * 24LL * 3600LL; + return (microseconds + (seconds + days_in_seconds) * 1000000.0) / 1000000.0; +} + static PyObject *get_item(PyObject *obj, Py_ssize_t i) { PyObject *tmp = PyInt_FromSsize_t(i); PyObject *ret; diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index c4a38ec660a4c..b5aca2e3ec309 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -26,9 +26,6 @@ from cpython cimport ( cdef extern from "Python.h": cdef PyTypeObject *Py_TYPE(object) -cdef extern from "datetime_helper.h": - double total_seconds(object) - # this is our datetime.pxd from libc.stdlib cimport free @@ -1639,7 +1636,7 @@ cdef inline void _localize_tso(_TSObject obj, object tz): pandas_datetime_to_datetimestruct(obj.value, PANDAS_FR_ns, &obj.dts) dt = datetime(obj.dts.year, obj.dts.month, obj.dts.day, obj.dts.hour, obj.dts.min, obj.dts.sec, obj.dts.us, tz) - delta = int(total_seconds(_get_utcoffset(tz, dt))) * 1000000000 + delta = int(_get_utcoffset(tz, dt).total_seconds()) * 1000000000 if obj.value != NPY_NAT: pandas_datetime_to_datetimestruct(obj.value + delta, PANDAS_FR_ns, &obj.dts) @@ -4136,7 +4133,7 @@ def tz_convert(ndarray[int64_t] vals, object tz1, object tz2): pandas_datetime_to_datetimestruct(v, PANDAS_FR_ns, &dts) dt = datetime(dts.year, dts.month, dts.day, dts.hour, dts.min, dts.sec, dts.us, tz1) - delta = (int(total_seconds(_get_utcoffset(tz1, dt))) + delta = (int(_get_utcoffset(tz1, dt).total_seconds()) * 1000000000) utc_dates[i] = v - delta else: @@ -4176,8 +4173,8 @@ def tz_convert(ndarray[int64_t] vals, object tz1, object tz2): pandas_datetime_to_datetimestruct(v, PANDAS_FR_ns, &dts) dt = datetime(dts.year, dts.month, dts.day, dts.hour, dts.min, dts.sec, dts.us, tz2) - delta = int(total_seconds( - _get_utcoffset(tz2, dt))) * 1000000000 + delta = (int(_get_utcoffset(tz2, dt).total_seconds()) + * 1000000000) result[i] = v + delta return result @@ -4243,7 +4240,7 @@ def tz_convert_single(int64_t val, object tz1, object tz2): pandas_datetime_to_datetimestruct(val, PANDAS_FR_ns, &dts) dt = datetime(dts.year, dts.month, dts.day, dts.hour, dts.min, dts.sec, dts.us, tz1) - delta = int(total_seconds(_get_utcoffset(tz1, dt))) * 1000000000 + delta = int(_get_utcoffset(tz1, dt).total_seconds()) * 1000000000 utc_date = val - delta elif _get_zone(tz1) != 'UTC': trans, deltas, typ = _get_dst_info(tz1) @@ -4261,7 +4258,7 @@ def tz_convert_single(int64_t val, object tz1, object tz2): pandas_datetime_to_datetimestruct(val, PANDAS_FR_ns, &dts) dt = datetime(dts.year, dts.month, dts.day, dts.hour, dts.min, dts.sec, dts.us, tz2) - delta = int(total_seconds(_get_utcoffset(tz2, dt))) * 1000000000 + delta = int(_get_utcoffset(tz2, dt).total_seconds()) * 1000000000 return utc_date + delta # Convert UTC to other timezone @@ -4333,7 +4330,7 @@ cdef object _get_dst_info(object tz): """ cache_key = _tz_cache_key(tz) if cache_key is None: - num = int(total_seconds(_get_utcoffset(tz, None))) * 1000000000 + num = int(_get_utcoffset(tz, None).total_seconds()) * 1000000000 return (np.array([NPY_NAT + 1], dtype=np.int64), np.array([num], dtype=np.int64), None) @@ -4380,7 +4377,7 @@ cdef object _get_dst_info(object tz): else: # static tzinfo trans = np.array([NPY_NAT + 1], dtype=np.int64) - num = int(total_seconds(_get_utcoffset(tz, None))) * 1000000000 + num = int(_get_utcoffset(tz, None).total_seconds()) * 1000000000 deltas = np.array([num], dtype=np.int64) typ = 'static' @@ -4403,9 +4400,6 @@ cdef object _get_utc_trans_times_from_dateutil_tz(object tz): return new_trans -def tot_seconds(td): - return total_seconds(td) - cpdef ndarray _unbox_utcoffsets(object transinfo): cdef: Py_ssize_t i, sz @@ -4415,7 +4409,7 @@ cpdef ndarray _unbox_utcoffsets(object transinfo): arr = np.empty(sz, dtype='i8') for i in range(sz): - arr[i] = int(total_seconds(transinfo[i][0])) * 1000000000 + arr[i] = int(transinfo[i][0].total_seconds()) * 1000000000 return arr @@ -4458,7 +4452,7 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, pandas_datetime_to_datetimestruct(v, PANDAS_FR_ns, &dts) dt = datetime(dts.year, dts.month, dts.day, dts.hour, dts.min, dts.sec, dts.us, tz) - delta = int(total_seconds(_get_utcoffset(tz, dt))) * 1000000000 + delta = int(_get_utcoffset(tz, dt).total_seconds()) * 1000000000 result[i] = v - delta return result @@ -5181,7 +5175,7 @@ cdef _normalize_local(ndarray[int64_t] stamps, object tz): pandas_datetime_to_datetimestruct(stamps[i], PANDAS_FR_ns, &dts) dt = datetime(dts.year, dts.month, dts.day, dts.hour, dts.min, dts.sec, dts.us, tz) - delta = int(total_seconds(_get_utcoffset(tz, dt))) * 1000000000 + delta = int(_get_utcoffset(tz, dt).total_seconds()) * 1000000000 pandas_datetime_to_datetimestruct(stamps[i] + delta, PANDAS_FR_ns, &dts) result[i] = _normalized_stamp(&dts) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 82c80a13372d7..712e9e9903f0a 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -4381,7 +4381,7 @@ def _get_tz(tz): """ for a tz-aware type, return an encoded zone """ zone = tslib.get_timezone(tz) if zone is None: - zone = tslib.tot_seconds(tz.utcoffset()) + zone = tz.utcoffset().total_seconds() return zone diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index 29cdda5548896..7ccecaa84e6d6 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -778,12 +778,12 @@ def _get_business_hours_by_sec(self): # create dummy datetime to calcurate businesshours in a day dtstart = datetime(2014, 4, 1, self.start.hour, self.start.minute) until = datetime(2014, 4, 1, self.end.hour, self.end.minute) - return tslib.tot_seconds(until - dtstart) + return (until - dtstart).total_seconds() else: self.daytime = False dtstart = datetime(2014, 4, 1, self.start.hour, self.start.minute) until = datetime(2014, 4, 2, self.end.hour, self.end.minute) - return tslib.tot_seconds(until - dtstart) + return (until - dtstart).total_seconds() @apply_wraps def rollback(self, dt): @@ -907,7 +907,7 @@ def _onOffset(self, dt, businesshours): op = self._prev_opening_time(dt) else: op = self._next_opening_time(dt) - span = tslib.tot_seconds(dt - op) + span = (dt - op).total_seconds() if span <= businesshours: return True else: diff --git a/setup.py b/setup.py index 04a5684c20fcd..444db5bc4d275 100755 --- a/setup.py +++ b/setup.py @@ -467,7 +467,6 @@ def pxd(name): tseries_depends = ['pandas/_libs/src/datetime/np_datetime.h', 'pandas/_libs/src/datetime/np_datetime_strings.h', - 'pandas/_libs/src/datetime_helper.h', 'pandas/_libs/src/period_helper.h', 'pandas/_libs/src/datetime.pxd'] @@ -597,7 +596,6 @@ def pxd(name): ujson_ext = Extension('pandas._libs.json', depends=['pandas/_libs/src/ujson/lib/ultrajson.h', - 'pandas/_libs/src/datetime_helper.h', 'pandas/_libs/src/numpy_helper.h'], sources=['pandas/_libs/src/ujson/python/ujson.c', 'pandas/_libs/src/ujson/python/objToJSON.c', From 376483e12e4a08140d594eab86bf22423684fbcb Mon Sep 17 00:00:00 2001 From: cbertinato Date: Mon, 28 Aug 2017 09:58:05 -0400 Subject: [PATCH 922/933] CLN: replace %s syntax with .format in core/indexing.py (#17357) Progress toward issue #16130. Converted old string formatting to new string formatting in core/indexing.py. --- pandas/core/indexing.py | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 6b9ad5cd2d93b..b7a51afcedabf 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -188,8 +188,9 @@ def _has_valid_tuple(self, key): if i >= self.obj.ndim: raise IndexingError('Too many indexers') if not self._has_valid_type(k, i): - raise ValueError("Location based indexing can only have [%s] " - "types" % self._valid_types) + raise ValueError("Location based indexing can only have " + "[{types}] types" + .format(types=self._valid_types)) def _should_validate_iterable(self, axis=0): """ return a boolean whether this axes needs validation for a passed @@ -263,11 +264,11 @@ def _has_valid_positional_setitem_indexer(self, indexer): pass elif is_integer(i): if i >= len(ax): - raise IndexError("{0} cannot enlarge its target object" - .format(self.name)) + raise IndexError("{name} cannot enlarge its target " + "object".format(name=self.name)) elif isinstance(i, dict): - raise IndexError("{0} cannot enlarge its target object" - .format(self.name)) + raise IndexError("{name} cannot enlarge its target object" + .format(name=self.name)) return True @@ -1235,7 +1236,8 @@ def _convert_to_indexer(self, obj, axis=0, is_setter=False): mask = check == -1 if mask.any(): - raise KeyError('%s not in index' % objarr[mask]) + raise KeyError('{mask} not in index' + .format(mask=objarr[mask])) return _values_from_object(indexer) @@ -1421,8 +1423,9 @@ def _has_valid_type(self, key, axis): if (not is_iterator(key) and len(key) and np.all(ax.get_indexer_for(key) < 0)): - raise KeyError("None of [%s] are in the [%s]" % - (key, self.obj._get_axis_name(axis))) + raise KeyError(u"None of [{key}] are in the [{axis}]" + .format(key=key, + axis=self.obj._get_axis_name(axis))) return True @@ -1432,8 +1435,9 @@ def error(): if isna(key): raise TypeError("cannot use label indexing with a null " "key") - raise KeyError("the label [%s] is not in the [%s]" % - (key, self.obj._get_axis_name(axis))) + raise KeyError(u"the label [{key}] is not in the [{axis}]" + .format(key=key, + axis=self.obj._get_axis_name(axis))) try: key = self._convert_scalar_indexer(key, axis) From 36dadd70376c6033037af281a4669a360fc71cfa Mon Sep 17 00:00:00 2001 From: gfyoung Date: Mon, 28 Aug 2017 07:05:29 -0700 Subject: [PATCH 923/933] DOC: Point to dev-docs in issue template (#17353) [ci skip] --- .github/ISSUE_TEMPLATE.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 237e61487d13a..e33835c462511 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -12,6 +12,12 @@ **Note**: Many problems can be resolved by simply upgrading `pandas` to the latest version. Before submitting, please check if that solution works for you. If possible, you may want to check if `master` addresses this issue, but that is not necessary. +For documentation-related issues, you can check the latest versions of the docs on `master` here: + +https://pandas-docs.github.io/pandas-docs-travis/ + +If the issue has not been resolved there, go ahead and file it in the issue tracker. + #### Expected Output #### Output of ``pd.show_versions()`` From df2ebfc9fd424ec760bfd2879993e44aaf983d42 Mon Sep 17 00:00:00 2001 From: chris-b1 Date: Tue, 29 Aug 2017 05:06:29 -0500 Subject: [PATCH 924/933] CLN: remove total_seconds compat from json (#17341) --- pandas/_libs/src/ujson/python/objToJSON.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/pandas/_libs/src/ujson/python/objToJSON.c b/pandas/_libs/src/ujson/python/objToJSON.c index 4beaa3fd449df..1ee862b54cf0b 100644 --- a/pandas/_libs/src/ujson/python/objToJSON.c +++ b/pandas/_libs/src/ujson/python/objToJSON.c @@ -329,7 +329,7 @@ static Py_ssize_t get_attr_length(PyObject *obj, char *attr) { return ret; } -npy_int64 get_long_attr(PyObject *o, const char *attr) { +static npy_int64 get_long_attr(PyObject *o, const char *attr) { npy_int64 long_val; PyObject *value = PyObject_GetAttrString(o, attr); long_val = (PyLong_Check(value) ? @@ -338,15 +338,12 @@ npy_int64 get_long_attr(PyObject *o, const char *attr) { return long_val; } -npy_float64 total_seconds(PyObject *td) { - // Python 2.6 compat - // TODO(anyone): remove this legacy workaround with a more - // direct td.total_seconds() - npy_int64 microseconds = get_long_attr(td, "microseconds"); - npy_int64 seconds = get_long_attr(td, "seconds"); - npy_int64 days = get_long_attr(td, "days"); - npy_int64 days_in_seconds = days * 24LL * 3600LL; - return (microseconds + (seconds + days_in_seconds) * 1000000.0) / 1000000.0; +static npy_float64 total_seconds(PyObject *td) { + npy_float64 double_val; + PyObject *value = PyObject_CallMethod(td, "total_seconds", NULL); + double_val = PyFloat_AS_DOUBLE(value); + Py_DECREF(value); + return double_val; } static PyObject *get_item(PyObject *obj, Py_ssize_t i) { From 6bab9d18bef3b7fccab2830d6dad78d0fb476ed8 Mon Sep 17 00:00:00 2001 From: jschendel Date: Tue, 29 Aug 2017 04:10:15 -0600 Subject: [PATCH 925/933] CLN: Move test_intersect_str_dates (#17366) Moves test_intersect_str_dates from tests/indexes/test_range.py to tests/indexes/test_base.py. --- pandas/tests/indexes/test_base.py | 9 +++++++++ pandas/tests/indexes/test_range.py | 9 --------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index ef36e4a91aa1c..07e98c326bcaa 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -663,6 +663,15 @@ def test_intersection(self): intersect = first.intersection(second) assert intersect.name is None + def test_intersect_str_dates(self): + dt_dates = [datetime(2012, 2, 9), datetime(2012, 2, 22)] + + i1 = Index(dt_dates, dtype=object) + i2 = Index(['aa'], dtype=object) + res = i2.intersection(i1) + + assert len(res) == 0 + def test_union(self): first = self.strIndex[5:20] second = self.strIndex[:10] diff --git a/pandas/tests/indexes/test_range.py b/pandas/tests/indexes/test_range.py index 566354da4870d..5ecf467b57fc5 100644 --- a/pandas/tests/indexes/test_range.py +++ b/pandas/tests/indexes/test_range.py @@ -639,15 +639,6 @@ def test_intersection(self): expected = RangeIndex(0, 0, 1) tm.assert_index_equal(result, expected) - def test_intersect_str_dates(self): - dt_dates = [datetime(2012, 2, 9), datetime(2012, 2, 22)] - - i1 = Index(dt_dates, dtype=object) - i2 = Index(['aa'], dtype=object) - res = i2.intersection(i1) - - assert len(res) == 0 - def test_union_noncomparable(self): from datetime import datetime, timedelta # corner case, non-Int64Index From 9a1dfca9182c86c90fffa26579844244cfd7cd7a Mon Sep 17 00:00:00 2001 From: gfyoung Date: Tue, 29 Aug 2017 05:52:51 -0700 Subject: [PATCH 926/933] BUG: Respect dups in reindexing CategoricalIndex (#17355) When the indexer is identical to the elements. We should still return duplicates when the indexer contains duplicates. Closes gh-17323. --- doc/source/whatsnew/v0.21.0.txt | 1 + pandas/core/indexes/category.py | 2 +- pandas/tests/indexes/test_category.py | 22 +++++++++++++++++----- 3 files changed, 19 insertions(+), 6 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index fcadd26156b1d..942e37a29f8d5 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -358,6 +358,7 @@ Indexing - Allow unicode empty strings as placeholders in multilevel columns in Python 2 (:issue:`17099`) - Bug in ``.iloc`` when used with inplace addition or assignment and an int indexer on a ``MultiIndex`` causing the wrong indexes to be read from and written to (:issue:`17148`) - Bug in ``.isin()`` in which checking membership in empty ``Series`` objects raised an error (:issue:`16991`) +- Bug in ``CategoricalIndex`` reindexing in which specified indices containing duplicates were not being respected (:issue:`17323`) I/O ^^^ diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index f22407308e094..0681202289311 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -487,7 +487,7 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None): method = missing.clean_reindex_fill_method(method) target = ibase._ensure_index(target) - if self.equals(target): + if self.is_unique and self.equals(target): return np.arange(len(self), dtype='intp') if method == 'pad' or method == 'backfill': diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py index 64bd6df361aeb..05d31af57b36c 100644 --- a/pandas/tests/indexes/test_category.py +++ b/pandas/tests/indexes/test_category.py @@ -365,18 +365,18 @@ def test_astype(self): tm.assert_index_equal(result, expected) def test_reindex_base(self): - - # determined by cat ordering - idx = self.create_index() + # Determined by cat ordering. + idx = CategoricalIndex(list("cab"), categories=list("cab")) expected = np.arange(len(idx), dtype=np.intp) actual = idx.get_indexer(idx) tm.assert_numpy_array_equal(expected, actual) - with tm.assert_raises_regex(ValueError, 'Invalid fill method'): - idx.get_indexer(idx, method='invalid') + with tm.assert_raises_regex(ValueError, "Invalid fill method"): + idx.get_indexer(idx, method="invalid") def test_reindexing(self): + np.random.seed(123456789) ci = self.create_index() oidx = Index(np.array(ci)) @@ -388,6 +388,18 @@ def test_reindexing(self): actual = ci.get_indexer(finder) tm.assert_numpy_array_equal(expected, actual) + # see gh-17323 + # + # Even when indexer is equal to the + # members in the index, we should + # respect duplicates instead of taking + # the fast-track path. + for finder in [list("aabbca"), list("aababca")]: + expected = oidx.get_indexer_non_unique(finder)[0] + + actual = ci.get_indexer(finder) + tm.assert_numpy_array_equal(expected, actual) + def test_reindex_dtype(self): c = CategoricalIndex(['a', 'b', 'c', 'a']) res, indexer = c.reindex(['a', 'c']) From e8a1765edf91ec4d087b46b90d5e54530550029b Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 29 Aug 2017 06:23:38 -0700 Subject: [PATCH 927/933] Unify Index._dir_* with Series implementation (#17117) --- pandas/core/accessor.py | 35 +++++++++++++++++++++++++++++++++++ pandas/core/base.py | 22 +++------------------- pandas/core/generic.py | 5 +++-- pandas/core/indexes/base.py | 9 +++++++-- pandas/core/series.py | 17 +++-------------- pandas/core/strings.py | 20 ++------------------ 6 files changed, 53 insertions(+), 55 deletions(-) create mode 100644 pandas/core/accessor.py diff --git a/pandas/core/accessor.py b/pandas/core/accessor.py new file mode 100644 index 0000000000000..9f8556d1e6961 --- /dev/null +++ b/pandas/core/accessor.py @@ -0,0 +1,35 @@ +# -*- coding: utf-8 -*- +""" + +accessor.py contains base classes for implementing accessor properties +that can be mixed into or pinned onto other pandas classes. + +""" + + +class DirNamesMixin(object): + _accessors = frozenset([]) + + def _dir_deletions(self): + """ delete unwanted __dir__ for this object """ + return self._accessors + + def _dir_additions(self): + """ add addtional __dir__ for this object """ + rv = set() + for accessor in self._accessors: + try: + getattr(self, accessor) + rv.add(accessor) + except AttributeError: + pass + return rv + + def __dir__(self): + """ + Provide method name lookup and completion + Only provide 'public' methods + """ + rv = set(dir(type(self))) + rv = (rv - self._dir_deletions()) | self._dir_additions() + return sorted(rv) diff --git a/pandas/core/base.py b/pandas/core/base.py index a7c991dc8d257..d60a8515dc920 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -19,6 +19,7 @@ from pandas.util._decorators import (Appender, cache_readonly, deprecate_kwarg, Substitution) from pandas.core.common import AbstractMethodError +from pandas.core.accessor import DirNamesMixin _shared_docs = dict() _indexops_doc_kwargs = dict(klass='IndexOpsMixin', inplace='', @@ -73,7 +74,7 @@ def __repr__(self): return str(self) -class PandasObject(StringMixin): +class PandasObject(StringMixin, DirNamesMixin): """baseclass for various pandas objects""" @@ -92,23 +93,6 @@ def __unicode__(self): # Should be overwritten by base classes return object.__repr__(self) - def _dir_additions(self): - """ add addtional __dir__ for this object """ - return set() - - def _dir_deletions(self): - """ delete unwanted __dir__ for this object """ - return set() - - def __dir__(self): - """ - Provide method name lookup and completion - Only provide 'public' methods - """ - rv = set(dir(type(self))) - rv = (rv - self._dir_deletions()) | self._dir_additions() - return sorted(rv) - def _reset_cache(self, key=None): """ Reset cached properties. If ``key`` is passed, only clears that key. @@ -141,7 +125,7 @@ class NoNewAttributesMixin(object): Prevents additional attributes via xxx.attribute = "something" after a call to `self.__freeze()`. Mainly used to prevent the user from using - wrong attrirbutes on a accessor (`Series.cat/.str/.dt`). + wrong attributes on a accessor (`Series.cat/.str/.dt`). If you really want to add a new attribute at a later time, you need to use `object.__setattr__(self, key, value)`. diff --git a/pandas/core/generic.py b/pandas/core/generic.py index f8366c804e3e7..cdb08d8887e05 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -192,8 +192,9 @@ def __unicode__(self): def _dir_additions(self): """ add the string-like attributes from the info_axis """ - return set([c for c in self._info_axis - if isinstance(c, string_types) and isidentifier(c)]) + additions = set([c for c in self._info_axis + if isinstance(c, string_types) and isidentifier(c)]) + return super(NDFrame, self)._dir_additions().union(additions) @property def _constructor_sliced(self): diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index a21e6df3ffc93..31cf1e48b8529 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -56,7 +56,7 @@ import pandas.core.sorting as sorting from pandas.io.formats.printing import pprint_thing from pandas.core.ops import _comp_method_OBJECT_ARRAY -from pandas.core.strings import StringAccessorMixin +from pandas.core import strings from pandas.core.config import get_option @@ -102,7 +102,7 @@ def _new_Index(cls, d): return cls.__new__(cls, **d) -class Index(IndexOpsMixin, StringAccessorMixin, PandasObject): +class Index(IndexOpsMixin, PandasObject): """ Immutable ndarray implementing an ordered, sliceable set. The basic object storing axis labels for all pandas objects @@ -155,6 +155,11 @@ class Index(IndexOpsMixin, StringAccessorMixin, PandasObject): _engine_type = libindex.ObjectEngine + _accessors = frozenset(['str']) + + # String Methods + str = base.AccessorProperty(strings.StringMethods) + def __new__(cls, data=None, dtype=None, copy=False, name=None, fastpath=False, tupleize_cols=True, **kwargs): diff --git a/pandas/core/series.py b/pandas/core/series.py index 75dc3d6403650..6905fc1aced74 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -114,8 +114,7 @@ def wrapper(self): # Series class -class Series(base.IndexOpsMixin, strings.StringAccessorMixin, - generic.NDFrame,): +class Series(base.IndexOpsMixin, generic.NDFrame): """ One-dimensional ndarray with axis labels (including time series). @@ -2923,18 +2922,8 @@ def to_period(self, freq=None, copy=True): # Categorical methods cat = base.AccessorProperty(CategoricalAccessor) - def _dir_deletions(self): - return self._accessors - - def _dir_additions(self): - rv = set() - for accessor in self._accessors: - try: - getattr(self, accessor) - rv.add(accessor) - except AttributeError: - pass - return rv + # String Methods + str = base.AccessorProperty(strings.StringMethods) # ---------------------------------------------------------------------- # Add plotting methods to Series diff --git a/pandas/core/strings.py b/pandas/core/strings.py index 0b1db0277eee3..2f95e510bba5e 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -16,7 +16,7 @@ from pandas.core.algorithms import take_1d import pandas.compat as compat -from pandas.core.base import AccessorProperty, NoNewAttributesMixin +from pandas.core.base import NoNewAttributesMixin from pandas.util._decorators import Appender import re import pandas._libs.lib as lib @@ -1920,20 +1920,4 @@ def _make_accessor(cls, data): message = ("Can only use .str accessor with Index, not " "MultiIndex") raise AttributeError(message) - return StringMethods(data) - - -class StringAccessorMixin(object): - """ Mixin to add a `.str` acessor to the class.""" - - str = AccessorProperty(StringMethods) - - def _dir_additions(self): - return set() - - def _dir_deletions(self): - try: - getattr(self, 'str') - except AttributeError: - return set(['str']) - return set() + return cls(data) From 0618f9950ad72f6f30283bbcf44fcdcf5918756d Mon Sep 17 00:00:00 2001 From: Pietro Battiston Date: Tue, 29 Aug 2017 19:03:17 +0200 Subject: [PATCH 928/933] BUG: make order of index from pd.concat deterministic (#17364) closes #17344 --- doc/source/whatsnew/v0.21.0.txt | 1 + pandas/core/common.py | 14 ++++++++++++++ pandas/core/indexes/api.py | 9 ++------- pandas/tests/reshape/test_concat.py | 13 ++++++++++++- 4 files changed, 29 insertions(+), 8 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 942e37a29f8d5..a3673609147a6 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -406,6 +406,7 @@ Reshaping - Bug in :func:`crosstab` where passing two ``Series`` with the same name raised a ``KeyError`` (:issue:`13279`) - :func:`Series.argmin`, :func:`Series.argmax`, and their counterparts on ``DataFrame`` and groupby objects work correctly with floating point data that contains infinite values (:issue:`13595`). - Bug in :func:`unique` where checking a tuple of strings raised a ``TypeError`` (:issue:`17108`) +- Bug in :func:`concat` where order of result index was unpredictable if it contained non-comparable elements (:issue:`17344`) Numeric ^^^^^^^ diff --git a/pandas/core/common.py b/pandas/core/common.py index 44cb36b8a3207..515a401096120 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -629,3 +629,17 @@ def _random_state(state=None): else: raise ValueError("random_state must be an integer, a numpy " "RandomState, or None") + + +def _get_distinct_objs(objs): + """ + Return a list with distinct elements of "objs" (different ids). + Preserves order. + """ + ids = set() + res = [] + for obj in objs: + if not id(obj) in ids: + ids.add(id(obj)) + res.append(obj) + return res diff --git a/pandas/core/indexes/api.py b/pandas/core/indexes/api.py index db73a6878258a..323d50166e7b6 100644 --- a/pandas/core/indexes/api.py +++ b/pandas/core/indexes/api.py @@ -23,8 +23,7 @@ 'PeriodIndex', 'DatetimeIndex', '_new_Index', 'NaT', '_ensure_index', '_get_na_value', '_get_combined_index', - '_get_objs_combined_axis', - '_get_distinct_indexes', '_union_indexes', + '_get_objs_combined_axis', '_union_indexes', '_get_consensus_names', '_all_indexes_same'] @@ -41,7 +40,7 @@ def _get_objs_combined_axis(objs, intersect=False, axis=0): def _get_combined_index(indexes, intersect=False): # TODO: handle index names! - indexes = _get_distinct_indexes(indexes) + indexes = com._get_distinct_objs(indexes) if len(indexes) == 0: return Index([]) if len(indexes) == 1: @@ -55,10 +54,6 @@ def _get_combined_index(indexes, intersect=False): return _ensure_index(union) -def _get_distinct_indexes(indexes): - return list(dict((id(x), x) for x in indexes).values()) - - def _union_indexes(indexes): if len(indexes) == 0: raise AssertionError('Must have at least 1 Index to union') diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py index 52cd18126859a..6e646f9b29442 100644 --- a/pandas/tests/reshape/test_concat.py +++ b/pandas/tests/reshape/test_concat.py @@ -5,7 +5,7 @@ from numpy.random import randn from datetime import datetime -from pandas.compat import StringIO, iteritems +from pandas.compat import StringIO, iteritems, PY2 import pandas as pd from pandas import (DataFrame, concat, read_csv, isna, Series, date_range, @@ -1944,6 +1944,17 @@ def test_concat_categoricalindex(self): index=exp_idx) tm.assert_frame_equal(result, exp) + def test_concat_order(self): + # GH 17344 + dfs = [pd.DataFrame(index=range(3), columns=['a', 1, None])] + dfs += [pd.DataFrame(index=range(3), columns=[None, 1, 'a']) + for i in range(100)] + result = pd.concat(dfs).columns + expected = dfs[0].columns + if PY2: + expected = expected.sort_values() + tm.assert_index_equal(result, expected) + @pytest.mark.parametrize('pdt', [pd.Series, pd.DataFrame, pd.Panel]) @pytest.mark.parametrize('dt', np.sctypes['float']) From 0d676a3ccf1d7aa986416a7488b941496f936d98 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 29 Aug 2017 10:04:07 -0700 Subject: [PATCH 929/933] Fix typo that causes several NaT methods to have incorrect docstrings (#17327) --- doc/source/whatsnew/v0.21.0.txt | 1 + pandas/_libs/tslib.pyx | 7 ++++--- pandas/tests/scalar/test_nat.py | 5 +++++ 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index a3673609147a6..33b7e128ef8bf 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -425,3 +425,4 @@ Categorical Other ^^^^^ - Bug in :func:`eval` where the ``inplace`` parameter was being incorrectly handled (:issue:`16732`) +- Several ``NaT`` method docstrings (e.g. :func:`NaT.ctime`) were incorrect (:issue:`17327`) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index b5aca2e3ec309..5dd30072fb7aa 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -1,3 +1,4 @@ +# -*- coding: utf-8 -*- # cython: profile=False import warnings @@ -3922,7 +3923,7 @@ for _method_name in _nat_methods: def f(*args, **kwargs): return NaT f.__name__ = func_name - f.__doc__ = _get_docstring(_method_name) + f.__doc__ = _get_docstring(func_name) return f setattr(NaTType, _method_name, _make_nat_func(_method_name)) @@ -3934,7 +3935,7 @@ for _method_name in _nan_methods: def f(*args, **kwargs): return np.nan f.__name__ = func_name - f.__doc__ = _get_docstring(_method_name) + f.__doc__ = _get_docstring(func_name) return f setattr(NaTType, _method_name, _make_nan_func(_method_name)) @@ -3952,7 +3953,7 @@ for _maybe_method_name in dir(NaTType): def f(*args, **kwargs): raise ValueError("NaTType does not support " + func_name) f.__name__ = func_name - f.__doc__ = _get_docstring(_method_name) + f.__doc__ = _get_docstring(func_name) return f setattr(NaTType, _maybe_method_name, diff --git a/pandas/tests/scalar/test_nat.py b/pandas/tests/scalar/test_nat.py index 5f247cae1099b..6f852f2b394e1 100644 --- a/pandas/tests/scalar/test_nat.py +++ b/pandas/tests/scalar/test_nat.py @@ -247,3 +247,8 @@ def test_nat_arithmetic_index(): tm.assert_index_equal(right + left, exp) tm.assert_index_equal(left - right, exp) tm.assert_index_equal(right - left, exp) + + +def test_nat_pinned_docstrings(): + # GH17327 + assert NaT.ctime.__doc__ == datetime.ctime.__doc__ From b9d48e48904b0e607c4d18738df50dec744b745f Mon Sep 17 00:00:00 2001 From: cbertinato Date: Wed, 30 Aug 2017 06:19:44 -0400 Subject: [PATCH 930/933] CLN: replace %s syntax with .format in io/formats/format.py (#17358) Progress toward issue #16130. Converted old string formatting to new string formatting in io/formats/format.py. --- pandas/io/formats/format.py | 165 ++++++++++++++++++++---------------- 1 file changed, 93 insertions(+), 72 deletions(-) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 733fd3bd39b52..6a98497aa1bfe 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -47,6 +47,7 @@ import itertools import csv +from functools import partial common_docstring = """ Parameters @@ -109,7 +110,7 @@ def _get_footer(self): if self.length: if footer: footer += ', ' - footer += "Length: %d" % len(self.categorical) + footer += "Length: {length}".format(length=len(self.categorical)) level_info = self.categorical._repr_categories_info() @@ -135,7 +136,7 @@ def to_string(self): fmt_values = self._get_formatted_values() - result = ['%s' % i for i in fmt_values] + result = [u('{i}').format(i=i) for i in fmt_values] result = [i.strip() for i in result] result = u(', ').join(result) result = [u('[') + result + u(']')] @@ -191,7 +192,7 @@ def _get_footer(self): footer = u('') if getattr(self.series.index, 'freq', None) is not None: - footer += 'Freq: %s' % self.series.index.freqstr + footer += 'Freq: {freq}'.format(freq=self.series.index.freqstr) if self.name is not False and name is not None: if footer: @@ -199,20 +200,21 @@ def _get_footer(self): series_name = pprint_thing(name, escape_chars=('\t', '\r', '\n')) - footer += ("Name: %s" % series_name) if name is not None else "" + footer += ((u"Name: {sname}".format(sname=series_name)) + if name is not None else "") if (self.length is True or (self.length == 'truncate' and self.truncate_v)): if footer: footer += ', ' - footer += 'Length: %d' % len(self.series) + footer += 'Length: {length}'.format(length=len(self.series)) if self.dtype is not False and self.dtype is not None: name = getattr(self.tr_series.dtype, 'name', None) if name: if footer: footer += ', ' - footer += 'dtype: %s' % pprint_thing(name) + footer += u'dtype: {typ}'.format(typ=pprint_thing(name)) # level infos are added to the end and in a new line, like it is done # for Categoricals @@ -509,8 +511,10 @@ def _to_str_columns(self): else: if is_list_like(self.header): if len(self.header) != len(self.columns): - raise ValueError(('Writing %d cols but got %d aliases' - % (len(self.columns), len(self.header)))) + raise ValueError(('Writing {ncols} cols but got {nalias} ' + 'aliases' + .format(ncols=len(self.columns), + nalias=len(self.header)))) str_columns = [[label] for label in self.header] else: str_columns = self._get_formatted_column_labels(frame) @@ -578,10 +582,10 @@ def to_string(self): frame = self.frame if len(frame.columns) == 0 or len(frame.index) == 0: - info_line = (u('Empty %s\nColumns: %s\nIndex: %s') % - (type(self.frame).__name__, - pprint_thing(frame.columns), - pprint_thing(frame.index))) + info_line = (u('Empty {name}\nColumns: {col}\nIndex: {idx}') + .format(name=type(self.frame).__name__, + col=pprint_thing(frame.columns), + idx=pprint_thing(frame.index))) text = info_line else: @@ -630,8 +634,8 @@ def to_string(self): self.buf.writelines(text) if self.should_show_dimensions: - self.buf.write("\n\n[%d rows x %d columns]" % - (len(frame), len(frame.columns))) + self.buf.write("\n\n[{nrows} rows x {ncols} columns]" + .format(nrows=len(frame), ncols=len(frame.columns))) def _join_multiline(self, *strcols): lwidth = self.line_width @@ -805,7 +809,8 @@ def _get_formatted_index(self, frame): # empty space for columns if show_col_names: - col_header = ['%s' % x for x in self._get_column_name_list()] + col_header = ['{x}'.format(x=x) + for x in self._get_column_name_list()] else: col_header = [''] * columns.nlevels @@ -861,9 +866,10 @@ def write_result(self, buf): # string representation of the columns if len(self.frame.columns) == 0 or len(self.frame.index) == 0: - info_line = (u('Empty %s\nColumns: %s\nIndex: %s') % - (type(self.frame).__name__, self.frame.columns, - self.frame.index)) + info_line = (u('Empty {name}\nColumns: {col}\nIndex: {idx}') + .format(name=type(self.frame).__name__, + col=self.frame.columns, + idx=self.frame.index)) strcols = [[info_line]] else: strcols = self.fmt._to_str_columns() @@ -906,14 +912,16 @@ def get_col_type(dtype): column_format = index_format + column_format elif not isinstance(column_format, compat.string_types): # pragma: no cover - raise AssertionError('column_format must be str or unicode, not %s' - % type(column_format)) + raise AssertionError('column_format must be str or unicode, ' + 'not {typ}'.format(typ=type(column_format))) if not self.longtable: - buf.write('\\begin{tabular}{%s}\n' % column_format) + buf.write('\\begin{{tabular}}{{{fmt}}}\n' + .format(fmt=column_format)) buf.write('\\toprule\n') else: - buf.write('\\begin{longtable}{%s}\n' % column_format) + buf.write('\\begin{{longtable}}{{{fmt}}}\n' + .format(fmt=column_format)) buf.write('\\toprule\n') ilevels = self.frame.index.nlevels @@ -948,7 +956,7 @@ def get_col_type(dtype): crow = [x if x else '{}' for x in row] if self.bold_rows and self.fmt.index: # bold row labels - crow = ['\\textbf{%s}' % x + crow = ['\\textbf{{{x}}}'.format(x=x) if j < ilevels and x.strip() not in ['', '{}'] else x for j, x in enumerate(crow)] if i < clevels and self.fmt.header and self.multicolumn: @@ -986,9 +994,9 @@ def _format_multicolumn(self, row, ilevels): def append_col(): # write multicolumn if needed if ncol > 1: - row2.append('\\multicolumn{{{0:d}}}{{{1:s}}}{{{2:s}}}' - .format(ncol, self.multicolumn_format, - coltext.strip())) + row2.append('\\multicolumn{{{ncol:d}}}{{{fmt:s}}}{{{txt:s}}}' + .format(ncol=ncol, fmt=self.multicolumn_format, + txt=coltext.strip())) # don't modify where not needed else: row2.append(coltext) @@ -1027,8 +1035,8 @@ def _format_multirow(self, row, ilevels, i, rows): break if nrow > 1: # overwrite non-multirow entry - row[j] = '\\multirow{{{0:d}}}{{*}}{{{1:s}}}'.format( - nrow, row[j].strip()) + row[j] = '\\multirow{{{nrow:d}}}{{*}}{{{row:s}}}'.format( + nrow=nrow, row=row[j].strip()) # save when to end the current block with \cline self.clinebuf.append([i + nrow - 1, j + 1]) return row @@ -1039,7 +1047,8 @@ def _print_cline(self, buf, i, icol): """ for cl in self.clinebuf: if cl[0] == i: - buf.write('\cline{{{0:d}-{1:d}}}\n'.format(cl[1], icol)) + buf.write('\cline{{{cl:d}-{icol:d}}}\n' + .format(cl=cl[1], icol=icol)) # remove entries that have been written to buffer self.clinebuf = [x for x in self.clinebuf if x[0] != i] @@ -1076,7 +1085,8 @@ def write(self, s, indent=0): def write_th(self, s, indent=0, tags=None): if self.fmt.col_space is not None and self.fmt.col_space > 0: tags = (tags or "") - tags += 'style="min-width: %s;"' % self.fmt.col_space + tags += ('style="min-width: {colspace};"' + .format(colspace=self.fmt.col_space)) return self._write_cell(s, kind='th', indent=indent, tags=tags) @@ -1085,9 +1095,9 @@ def write_td(self, s, indent=0, tags=None): def _write_cell(self, s, kind='td', indent=0, tags=None): if tags is not None: - start_tag = '<%s %s>' % (kind, tags) + start_tag = '<{kind} {tags}>'.format(kind=kind, tags=tags) else: - start_tag = '<%s>' % kind + start_tag = '<{kind}>'.format(kind=kind) if self.escape: # escape & first to prevent double escaping of & @@ -1096,7 +1106,8 @@ def _write_cell(self, s, kind='td', indent=0, tags=None): else: esc = {} rs = pprint_thing(s, escape_chars=esc).strip() - self.write('%s%s' % (start_tag, rs, kind), indent) + self.write(u'{start}{rs}' + .format(start=start_tag, rs=rs, kind=kind), indent) def write_tr(self, line, indent=0, indent_delta=4, header=False, align=None, tags=None, nindex_levels=0): @@ -1106,7 +1117,8 @@ def write_tr(self, line, indent=0, indent_delta=4, header=False, if align is None: self.write('', indent) else: - self.write('' % align, indent) + self.write('' + .format(align=align), indent) indent += indent_delta for i, s in enumerate(line): @@ -1146,8 +1158,8 @@ def write_result(self, buf): if isinstance(self.classes, str): self.classes = self.classes.split() if not isinstance(self.classes, (list, tuple)): - raise AssertionError('classes must be list or tuple, ' - 'not %s' % type(self.classes)) + raise AssertionError('classes must be list or tuple, not {typ}' + .format(typ=type(self.classes))) _classes.extend(self.classes) if self.notebook: @@ -1159,12 +1171,11 @@ def write_result(self, buf): except (ImportError, AttributeError): pass - self.write(''.format(div_style)) + self.write(''.format(style=div_style)) self.write_style() - self.write('
Conda - + conda default downloads
Conda-forge - + conda-forge downloads
' % (self.border, - ' '.join(_classes)), - indent) + self.write('
' + .format(border=self.border, cls=' '.join(_classes)), indent) indent += self.indent_delta indent = self._write_header(indent) @@ -1173,8 +1184,10 @@ def write_result(self, buf): self.write('
', indent) if self.should_show_dimensions: by = chr(215) if compat.PY3 else unichr(215) # × - self.write(u('

%d rows %s %d columns

') % - (len(frame), by, len(frame.columns))) + self.write(u('

{rows} rows {by} {cols} columns

') + .format(rows=len(frame), + by=by, + cols=len(frame.columns))) if self.notebook: self.write('') @@ -1199,7 +1212,7 @@ def _column_header(): row.append(single_column_table(self.columns.names)) else: row.append('') - style = "text-align: %s;" % self.fmt.justify + style = "text-align: {just};".format(just=self.fmt.justify) row.extend([single_column_table(c, self.fmt.justify, style) for c in self.columns]) else: @@ -1214,7 +1227,7 @@ def _column_header(): indent += self.indent_delta if isinstance(self.columns, MultiIndex): - template = 'colspan="%d" halign="left"' + template = 'colspan="{span:d}" halign="left"' if self.fmt.sparsify: # GH3547 @@ -1282,7 +1295,7 @@ def _column_header(): for i, v in enumerate(values): if i in records: if records[i] > 1: - tags[j] = template % records[i] + tags[j] = template.format(span=records[i]) else: continue j += 1 @@ -1372,7 +1385,7 @@ def _write_regular_rows(self, fmt_values, indent): nindex_levels=1) def _write_hierarchical_rows(self, fmt_values, indent): - template = 'rowspan="%d" valign="top"' + template = 'rowspan="{span}" valign="top"' truncate_h = self.fmt.truncate_h truncate_v = self.fmt.truncate_v @@ -1447,7 +1460,7 @@ def _write_hierarchical_rows(self, fmt_values, indent): for records, v in zip(level_lengths, idx_values[i]): if i in records: if records[i] > 1: - tags[j] = template % records[i] + tags[j] = template.format(span=records[i]) else: sparse_offset += 1 continue @@ -1615,8 +1628,9 @@ def _save_header(self): return if has_aliases: if len(header) != len(cols): - raise ValueError(('Writing %d cols but got %d aliases' - % (len(cols), len(header)))) + raise ValueError(('Writing {ncols} cols but got {nalias} ' + 'aliases'.format(ncols=len(cols), + nalias=len(header)))) else: write_cols = header else: @@ -1790,8 +1804,9 @@ def _format_strings(self): if self.float_format is None: float_format = get_option("display.float_format") if float_format is None: - fmt_str = '%% .%dg' % get_option("display.precision") - float_format = lambda x: fmt_str % x + fmt_str = ('{{x: .{prec:d}g}}' + .format(prec=get_option("display.precision"))) + float_format = lambda x: fmt_str.format(x=x) else: float_format = self.float_format @@ -1807,10 +1822,10 @@ def _format(x): return 'NaT' return self.na_rep elif isinstance(x, PandasObject): - return '%s' % x + return u'{x}'.format(x=x) else: # object dtype - return '%s' % formatter(x) + return u'{x}'.format(x=formatter(x)) vals = self.values if isinstance(vals, Index): @@ -1824,11 +1839,11 @@ def _format(x): fmt_values = [] for i, v in enumerate(vals): if not is_float_type[i] and leading_space: - fmt_values.append(' %s' % _format(v)) + fmt_values.append(u' {v}'.format(v=_format(v))) elif is_float_type[i]: fmt_values.append(float_format(v)) else: - fmt_values.append(' %s' % _format(v)) + fmt_values.append(u' {v}'.format(v=_format(v))) return fmt_values @@ -1864,7 +1879,7 @@ def _value_formatter(self, float_format=None, threshold=None): # because str(0.0) = '0.0' while '%g' % 0.0 = '0' if float_format: def base_formatter(v): - return (float_format % v) if notna(v) else self.na_rep + return float_format(value=v) if notna(v) else self.na_rep else: def base_formatter(v): return str(v) if notna(v) else self.na_rep @@ -1925,10 +1940,14 @@ def format_values_with(float_format): # There is a special default string when we are fixed-width # The default is otherwise to use str instead of a formatting string - if self.float_format is None and self.fixed_width: - float_format = '%% .%df' % self.digits + if self.float_format is None: + if self.fixed_width: + float_format = partial('{value: .{digits:d}f}'.format, + digits=self.digits) + else: + float_format = self.float_format else: - float_format = self.float_format + float_format = lambda value: self.float_format % value formatted_values = format_values_with(float_format) @@ -1955,7 +1974,8 @@ def format_values_with(float_format): (abs_vals > 0)).any() if has_small_values or (too_long and has_large_values): - float_format = '%% .%de' % self.digits + float_format = partial('{value: .{digits:d}e}'.format, + digits=self.digits) formatted_values = format_values_with(float_format) return formatted_values @@ -1971,7 +1991,7 @@ def _format_strings(self): class IntArrayFormatter(GenericArrayFormatter): def _format_strings(self): - formatter = self.formatter or (lambda x: '% d' % x) + formatter = self.formatter or (lambda x: '{x: d}'.format(x=x)) fmt_values = [formatter(x) for x in self.values] return fmt_values @@ -2023,7 +2043,7 @@ def _format_strings(self): # periods may contains different freq values = Index(self.values, dtype='object').to_native_types() - formatter = self.formatter or (lambda x: '%s' % x) + formatter = self.formatter or (lambda x: '{x}'.format(x=x)) fmt_values = [formatter(x) for x in values] return fmt_values @@ -2223,7 +2243,7 @@ def _formatter(x): x = Timedelta(x) result = x._repr_base(format=format) if box: - result = "'{0}'".format(result) + result = "'{res}'".format(res=result) return result return _formatter @@ -2278,12 +2298,12 @@ def _cond(values): def single_column_table(column, align=None, style=None): table = '%s' % str(i)) + table += ('{i!s}'.format(i=i)) table += '' return table @@ -2291,7 +2311,7 @@ def single_column_table(column, align=None, style=None): def single_row_table(row): # pragma: no cover table = '' for i in row: - table += ('' % str(i)) + table += (''.format(i=i)) table += '
%s{i!s}
' return table @@ -2385,18 +2405,19 @@ def __call__(self, num): prefix = self.ENG_PREFIXES[int_pow10] else: if int_pow10 < 0: - prefix = 'E-%02d' % (-int_pow10) + prefix = 'E-{pow10:02d}'.format(pow10=-int_pow10) else: - prefix = 'E+%02d' % int_pow10 + prefix = 'E+{pow10:02d}'.format(pow10=int_pow10) mant = sign * dnum / (10**pow10) if self.accuracy is None: # pragma: no cover - format_str = u("% g%s") + format_str = u("{mant: g}{prefix}") else: - format_str = (u("%% .%if%%s") % self.accuracy) + format_str = (u("{{mant: .{acc:d}f}}{{prefix}}") + .format(acc=self.accuracy)) - formatted = format_str % (mant, prefix) + formatted = format_str.format(mant=mant, prefix=prefix) return formatted # .strip() From 77bfe21c7229e724d01721bb84861283baf7e9d3 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 30 Aug 2017 05:50:04 -0500 Subject: [PATCH 931/933] PKG: Added pyproject.toml for PEP 518 (#16745) Declaring build-time requirements: https://www.python.org/dev/peps/pep-0518/ --- MANIFEST.in | 1 + doc/source/whatsnew/v0.21.0.txt | 1 + pyproject.toml | 9 +++++++++ 3 files changed, 11 insertions(+) create mode 100644 pyproject.toml diff --git a/MANIFEST.in b/MANIFEST.in index 8bd83a7d56948..1a6b831c1b975 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -3,6 +3,7 @@ include LICENSE include RELEASE.md include README.rst include setup.py +include pyproject.toml graft doc prune doc/build diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 33b7e128ef8bf..014f251ffb90a 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -112,6 +112,7 @@ Other Enhancements ^^^^^^^^^^^^^^^^^^ - The ``validate`` argument for :func:`merge` function now checks whether a merge is one-to-one, one-to-many, many-to-one, or many-to-many. If a merge is found to not be an example of specified merge type, an exception of type ``MergeError`` will be raised. For more, see :ref:`here ` (:issue:`16270`) +- Added support for `PEP 518 `_ to the build system (:issue:`16745`) - :func:`Series.to_dict` and :func:`DataFrame.to_dict` now support an ``into`` keyword which allows you to specify the ``collections.Mapping`` subclass that you would like returned. The default is ``dict``, which is backwards compatible. (:issue:`16122`) - :func:`RangeIndex.append` now returns a ``RangeIndex`` object when possible (:issue:`16212`) - :func:`Series.rename_axis` and :func:`DataFrame.rename_axis` with ``inplace=True`` now return ``None`` while renaming the axis inplace. (:issue:`15704`) diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000000000..f0d57d1d808a2 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,9 @@ +[build-system] +requires = [ + "wheel", + "setuptools", + "Cython", # required for VCS build, optional for released source + "numpy==1.9.3; python_version=='3.5'", + "numpy==1.12.1; python_version=='3.6'", + "numpy==1.13.1; python_version>='3.7'", +] From ad7d6fc0248edaf098537e5674dcc0c9dd059491 Mon Sep 17 00:00:00 2001 From: iulia Date: Wed, 30 Aug 2017 19:39:45 +0300 Subject: [PATCH 932/933] DOC: Update Overview page in documentation (#17368) * Update Overview page in documentation * DOC Revise Overview page * DOC Make further revisions in Overview webpage * Update overview.rst Remove references to Panel --- doc/source/overview.rst | 71 ++++++++++++++++++++++------------------- 1 file changed, 38 insertions(+), 33 deletions(-) diff --git a/doc/source/overview.rst b/doc/source/overview.rst index 92caeec319169..00a71603e1261 100644 --- a/doc/source/overview.rst +++ b/doc/source/overview.rst @@ -6,7 +6,11 @@ Package overview **************** -:mod:`pandas` consists of the following things +:mod:`pandas` is an open source, BSD-licensed library providing high-performance, +easy-to-use data structures and data analysis tools for the `Python `__ +programming language. + +:mod:`pandas` consists of the following elements * A set of labeled array data structures, the primary of which are Series and DataFrame @@ -21,27 +25,23 @@ Package overview * Memory-efficient "sparse" versions of the standard data structures for storing data that is mostly missing or mostly constant (some fixed value) * Moving window statistics (rolling mean, rolling standard deviation, etc.) - * Static and moving window linear and `panel regression - `__ -Data structures at a glance ---------------------------- +Data Structures +--------------- .. csv-table:: :header: "Dimensions", "Name", "Description" :widths: 15, 20, 50 - 1, Series, "1D labeled homogeneously-typed array" - 2, DataFrame, "General 2D labeled, size-mutable tabular structure with - potentially heterogeneously-typed columns" - 3, Panel, "General 3D labeled, also size-mutable array" + 1, "Series", "1D labeled homogeneously-typed array" + 2, "DataFrame", "General 2D labeled, size-mutable tabular structure with potentially heterogeneously-typed column" -Why more than 1 data structure? -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Why more than one data structure? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The best way to think about the pandas data structures is as flexible containers for lower dimensional data. For example, DataFrame is a container -for Series, and Panel is a container for DataFrame objects. We would like to be +for Series, and Series is a container for scalars. We would like to be able to insert and remove objects from these containers in a dictionary-like fashion. @@ -85,36 +85,41 @@ The first stop for pandas issues and ideas is the `Github Issue Tracker pandas community experts can answer through `Stack Overflow `__. -Longer discussions occur on the `developer mailing list -`__, and commercial support -inquiries for Lambda Foundry should be sent to: support@lambdafoundry.com +Community +--------- -Credits -------- +pandas is actively supported today by a community of like-minded individuals around +the world who contribute their valuable time and energy to help make open source +pandas possible. Thanks to `all of our contributors `__. + +If you're interested in contributing, please +visit `Contributing to pandas webpage `__. -pandas development began at `AQR Capital Management `__ in -April 2008. It was open-sourced at the end of 2009. AQR continued to provide -resources for development through the end of 2011, and continues to contribute -bug reports today. +pandas is a `NUMFocus `__ sponsored project. +This will help ensure the success of development of pandas as a world-class open-source +project, and makes it possible to `donate `__ to the project. -Since January 2012, `Lambda Foundry `__, has -been providing development resources, as well as commercial support, -training, and consulting for pandas. +Project Governance +------------------ -pandas is only made possible by a group of people around the world like you -who have contributed new code, bug reports, fixes, comments and ideas. A -complete list can be found `on Github `__. +The governance process that pandas project has used informally since its inception in 2008 is formalized in `Project Governance documents `__ . +The documents clarify how decisions are made and how the various elements of our community interact, including the relationship between open source collaborative development and work that may be funded by for-profit or non-profit entities. + +Wes McKinney is the Benevolent Dictator for Life (BDFL). Development Team ----------------- +----------------- + +The list of the Core Team members and more detailed information can be found on the `people’s page `__ of the governance repo. + -pandas is a part of the PyData project. The PyData Development Team is a -collection of developers focused on the improvement of Python's data -libraries. The core team that coordinates development can be found on `Github -`__. If you're interested in contributing, please -visit the `project website `__. +Institutional Partners +---------------------- + +The information about current institutional partners can be found on `pandas website page `__ License ------- .. literalinclude:: ../../LICENSE + From 64c8a8d6fecacb796da8265ace870a4fcab98092 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 30 Aug 2017 15:30:53 -0500 Subject: [PATCH 933/933] API: Have MultiIndex consturctors always return a MI (#17236) * API: Have MultiIndex constructors return MI This removes the special case for MultiIndex constructors returning an Index if all the levels are length-1. Now this will return a MultiIndex with a single level. This is a backwards incompatabile change, with no clear method for deprecation, so we're making a clean break. Closes #17178 * fixup! API: Have MultiIndex constructors return MI * Update for comments --- doc/source/whatsnew/v0.21.0.txt | 24 +++++++++++ pandas/core/frame.py | 11 ++--- pandas/core/indexes/api.py | 12 ++++-- pandas/core/indexes/base.py | 69 ++++++++++++++++++++++++++++++ pandas/core/indexes/multi.py | 10 ----- pandas/core/reshape/reshape.py | 21 ++++++--- pandas/core/sparse/scipy_sparse.py | 6 ++- pandas/core/strings.py | 7 ++- pandas/io/parsers.py | 13 +++--- pandas/tests/indexes/test_base.py | 18 +++++++- pandas/tests/indexes/test_multi.py | 20 ++++----- pandas/util/testing.py | 4 ++ 12 files changed, 170 insertions(+), 45 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 014f251ffb90a..273cbd8357f85 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -274,6 +274,30 @@ named ``.isna()`` and ``.notna()``, these are included for classes ``Categorical The configuration option ``pd.options.mode.use_inf_as_null`` is deprecated, and ``pd.options.mode.use_inf_as_na`` is added as a replacement. +.. _whatsnew_210.api.multiindex_single: + +MultiIndex Constructor with a Single Level +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The ``MultiIndex`` constructors no longer squeeze a MultiIndex with all +length-one levels down to a regular ``Index``. This affects all the +``MultiIndex`` constructors. (:issue:`17178`) + +Previous behavior: + +.. code-block:: ipython + + In [2]: pd.MultiIndex.from_tuples([('a',), ('b',)]) + Out[2]: Index(['a', 'b'], dtype='object') + +Length 1 levels are no longer special-cased. They behave exactly as if you had +length 2+ levels, so a :class:`MultiIndex` is always returned from all of the +``MultiIndex`` constructors: + +.. ipython:: python + + pd.MultiIndex.from_tuples([('a',), ('b',)]) + .. _whatsnew_0210.api: Other API Changes diff --git a/pandas/core/frame.py b/pandas/core/frame.py index b5b3df64d24c0..5991ec825c841 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -67,7 +67,8 @@ _dict_compat, standardize_mapping) from pandas.core.generic import NDFrame, _shared_docs -from pandas.core.index import Index, MultiIndex, _ensure_index +from pandas.core.index import (Index, MultiIndex, _ensure_index, + _ensure_index_from_sequences) from pandas.core.indexing import (maybe_droplevels, convert_to_index_sliceable, check_bool_indexer) from pandas.core.internals import (BlockManager, @@ -1155,9 +1156,9 @@ def from_records(cls, data, index=None, exclude=None, columns=None, else: try: to_remove = [arr_columns.get_loc(field) for field in index] - - result_index = MultiIndex.from_arrays( - [arrays[i] for i in to_remove], names=index) + index_data = [arrays[i] for i in to_remove] + result_index = _ensure_index_from_sequences(index_data, + names=index) exclude.update(index) except Exception: @@ -3000,7 +3001,7 @@ def set_index(self, keys, drop=True, append=False, inplace=False, to_remove.append(col) arrays.append(level) - index = MultiIndex.from_arrays(arrays, names=names) + index = _ensure_index_from_sequences(arrays, names) if verify_integrity and not index.is_unique: duplicates = index.get_duplicates() diff --git a/pandas/core/indexes/api.py b/pandas/core/indexes/api.py index 323d50166e7b6..d20a0b0a2c73d 100644 --- a/pandas/core/indexes/api.py +++ b/pandas/core/indexes/api.py @@ -1,6 +1,9 @@ -from pandas.core.indexes.base import (Index, _new_Index, # noqa - _ensure_index, _get_na_value, - InvalidIndexError) +from pandas.core.indexes.base import (Index, + _new_Index, + _ensure_index, + _ensure_index_from_sequences, + _get_na_value, + InvalidIndexError) # noqa from pandas.core.indexes.category import CategoricalIndex # noqa from pandas.core.indexes.multi import MultiIndex # noqa from pandas.core.indexes.interval import IntervalIndex # noqa @@ -22,7 +25,8 @@ 'InvalidIndexError', 'TimedeltaIndex', 'PeriodIndex', 'DatetimeIndex', '_new_Index', 'NaT', - '_ensure_index', '_get_na_value', '_get_combined_index', + '_ensure_index', '_ensure_index_from_sequences', '_get_na_value', + '_get_combined_index', '_get_objs_combined_axis', '_union_indexes', '_get_consensus_names', '_all_indexes_same'] diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 31cf1e48b8529..6a30eaefaaae7 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4012,7 +4012,76 @@ def invalid_op(self, other=None): Index._add_comparison_methods() +def _ensure_index_from_sequences(sequences, names=None): + """Construct an index from sequences of data. + + A single sequence returns an Index. Many sequences returns a + MultiIndex. + + Parameters + ---------- + sequences : sequence of sequences + names : sequence of str + + Returns + ------- + index : Index or MultiIndex + + Examples + -------- + >>> _ensure_index_from_sequences([[1, 2, 3]], names=['name']) + Int64Index([1, 2, 3], dtype='int64', name='name') + + >>> _ensure_index_from_sequences([['a', 'a'], ['a', 'b']], + names=['L1', 'L2']) + MultiIndex(levels=[['a'], ['a', 'b']], + labels=[[0, 0], [0, 1]], + names=['L1', 'L2']) + + See Also + -------- + _ensure_index + """ + from .multi import MultiIndex + + if len(sequences) == 1: + if names is not None: + names = names[0] + return Index(sequences[0], name=names) + else: + return MultiIndex.from_arrays(sequences, names=names) + + def _ensure_index(index_like, copy=False): + """ + Ensure that we have an index from some index-like object + + Parameters + ---------- + index : sequence + An Index or other sequence + copy : bool + + Returns + ------- + index : Index or MultiIndex + + Examples + -------- + >>> _ensure_index(['a', 'b']) + Index(['a', 'b'], dtype='object') + + >>> _ensure_index([('a', 'a'), ('b', 'c')]) + Index([('a', 'a'), ('b', 'c')], dtype='object') + + >>> _ensure_index([['a', 'a'], ['b', 'c']]) + MultiIndex(levels=[['a'], ['b', 'c']], + labels=[[0, 0], [0, 1]]) + + See Also + -------- + _ensure_index_from_sequences + """ if isinstance(index_like, Index): if copy: index_like = index_like.copy() diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index ea45b4700172f..d7d5b6d128a2c 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -91,12 +91,6 @@ def __new__(cls, levels=None, labels=None, sortorder=None, names=None, raise ValueError('Length of levels and labels must be the same.') if len(levels) == 0: raise ValueError('Must pass non-zero number of levels/labels') - if len(levels) == 1: - if names: - name = names[0] - else: - name = None - return Index(levels[0], name=name, copy=True).take(labels[0]) result = object.__new__(MultiIndex) @@ -1084,10 +1078,6 @@ def from_arrays(cls, arrays, sortorder=None, names=None): MultiIndex.from_product : Make a MultiIndex from cartesian product of iterables """ - if len(arrays) == 1: - name = None if names is None else names[0] - return Index(arrays[0], name=name) - # Check if lengths of all arrays are equal or not, # raise ValueError, if not for i in range(1, len(arrays)): diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index 455da9246783c..b4abba8026b35 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -31,7 +31,7 @@ from pandas.core.frame import _shared_docs from pandas.util._decorators import Appender -from pandas.core.index import MultiIndex, _get_na_value +from pandas.core.index import Index, MultiIndex, _get_na_value class _Unstacker(object): @@ -311,10 +311,14 @@ def _unstack_multiple(data, clocs): recons_labels = decons_obs_group_ids(comp_ids, obs_ids, shape, clabels, xnull=False) - dummy_index = MultiIndex(levels=rlevels + [obs_ids], - labels=rlabels + [comp_ids], - names=rnames + ['__placeholder__'], - verify_integrity=False) + if rlocs == []: + # Everything is in clocs, so the dummy df has a regular index + dummy_index = Index(obs_ids, name='__placeholder__') + else: + dummy_index = MultiIndex(levels=rlevels + [obs_ids], + labels=rlabels + [comp_ids], + names=rnames + ['__placeholder__'], + verify_integrity=False) if isinstance(data, Series): dummy = data.copy() @@ -446,7 +450,12 @@ def _slow_pivot(index, columns, values): def unstack(obj, level, fill_value=None): if isinstance(level, (tuple, list)): - return _unstack_multiple(obj, level) + if len(level) != 1: + # _unstack_multiple only handles MultiIndexes, + # and isn't needed for a single level + return _unstack_multiple(obj, level) + else: + level = level[0] if isinstance(obj, DataFrame): if isinstance(obj.index, MultiIndex): diff --git a/pandas/core/sparse/scipy_sparse.py b/pandas/core/sparse/scipy_sparse.py index ea108e3e89935..d2b9583d8efe5 100644 --- a/pandas/core/sparse/scipy_sparse.py +++ b/pandas/core/sparse/scipy_sparse.py @@ -71,7 +71,11 @@ def robust_get_level_values(i): labels_to_i = Series(labels_to_i) if len(subset) > 1: labels_to_i.index = MultiIndex.from_tuples(labels_to_i.index) - labels_to_i.index.names = [index.names[i] for i in subset] + labels_to_i.index.names = [index.names[i] for i in subset] + else: + labels_to_i.index = Index(x[0] for x in labels_to_i.index) + labels_to_i.index.name = index.names[subset[0]] + labels_to_i.name = 'value' return (labels_to_i) diff --git a/pandas/core/strings.py b/pandas/core/strings.py index 2f95e510bba5e..48bc2ee05dd68 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -1452,7 +1452,12 @@ def cons_row(x): if expand: result = list(result) - return MultiIndex.from_tuples(result, names=name) + out = MultiIndex.from_tuples(result, names=name) + if out.nlevels == 1: + # We had all tuples of length-one, which are + # better represented as a regular Index. + out = out.get_level_values(0) + return out else: return Index(result, name=name) else: diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index a9821be3fa5e2..8b1a921536a1d 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -23,7 +23,8 @@ is_scalar, is_categorical_dtype) from pandas.core.dtypes.missing import isna from pandas.core.dtypes.cast import astype_nansafe -from pandas.core.index import Index, MultiIndex, RangeIndex +from pandas.core.index import (Index, MultiIndex, RangeIndex, + _ensure_index_from_sequences) from pandas.core.series import Series from pandas.core.frame import DataFrame from pandas.core.categorical import Categorical @@ -1444,7 +1445,8 @@ def _agg_index(self, index, try_parse_dates=True): arr, _ = self._infer_types(arr, col_na_values | col_na_fvalues) arrays.append(arr) - index = MultiIndex.from_arrays(arrays, names=self.index_names) + names = self.index_names + index = _ensure_index_from_sequences(arrays, names) return index @@ -1808,7 +1810,7 @@ def read(self, nrows=None): try_parse_dates=True) arrays.append(values) - index = MultiIndex.from_arrays(arrays) + index = _ensure_index_from_sequences(arrays) if self.usecols is not None: names = self._filter_usecols(names) @@ -3138,9 +3140,8 @@ def _get_empty_meta(columns, index_col, index_names, dtype=None): if index_col is None or index_col is False: index = Index([]) else: - index = [Series([], dtype=dtype[index_name]) - for index_name in index_names] - index = MultiIndex.from_arrays(index, names=index_names) + data = [Series([], dtype=dtype[name]) for name in index_names] + index = _ensure_index_from_sequences(data, names=index_names) index_col.sort() for i, n in enumerate(index_col): columns.pop(n - i) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 07e98c326bcaa..aa32e75ba0d58 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -17,7 +17,7 @@ DataFrame, Float64Index, Int64Index, CategoricalIndex, DatetimeIndex, TimedeltaIndex, PeriodIndex, isna) -from pandas.core.index import _get_combined_index +from pandas.core.index import _get_combined_index, _ensure_index_from_sequences from pandas.util.testing import assert_almost_equal from pandas.compat.numpy import np_datetime64_compat @@ -2112,3 +2112,19 @@ def test_intersect_str_dates(self): res = i2.intersection(i1) assert len(res) == 0 + + +class TestIndexUtils(object): + + @pytest.mark.parametrize('data, names, expected', [ + ([[1, 2, 3]], None, Index([1, 2, 3])), + ([[1, 2, 3]], ['name'], Index([1, 2, 3], name='name')), + ([['a', 'a'], ['c', 'd']], None, + MultiIndex([['a'], ['c', 'd']], [[0, 0], [0, 1]])), + ([['a', 'a'], ['c', 'd']], ['L1', 'L2'], + MultiIndex([['a'], ['c', 'd']], [[0, 0], [0, 1]], + names=['L1', 'L2'])), + ]) + def test_ensure_index_from_sequences(self, data, names, expected): + result = _ensure_index_from_sequences(data, names) + tm.assert_index_equal(result, expected) diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index c66775f4690cc..798d244468961 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -537,15 +537,12 @@ def test_astype(self): self.index.astype(np.dtype(int)) def test_constructor_single_level(self): - single_level = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux']], - labels=[[0, 1, 2, 3]], names=['first']) - assert isinstance(single_level, Index) - assert not isinstance(single_level, MultiIndex) - assert single_level.name == 'first' - - single_level = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux']], - labels=[[0, 1, 2, 3]]) - assert single_level.name is None + result = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux']], + labels=[[0, 1, 2, 3]], names=['first']) + assert isinstance(result, MultiIndex) + expected = Index(['foo', 'bar', 'baz', 'qux'], name='first') + tm.assert_index_equal(result.levels[0], expected) + assert result.names == ['first'] def test_constructor_no_levels(self): tm.assert_raises_regex(ValueError, "non-zero number " @@ -768,8 +765,9 @@ def test_from_arrays_empty(self): # 1 level result = MultiIndex.from_arrays(arrays=[[]], names=['A']) + assert isinstance(result, MultiIndex) expected = Index([], name='A') - tm.assert_index_equal(result, expected) + tm.assert_index_equal(result.levels[0], expected) # N levels for N in [2, 3]: @@ -830,7 +828,7 @@ def test_from_product_empty(self): # 1 level result = MultiIndex.from_product([[]], names=['A']) expected = pd.Index([], name='A') - tm.assert_index_equal(result, expected) + tm.assert_index_equal(result.levels[0], expected) # 2 levels l1 = [[], ['foo', 'bar', 'baz'], []] diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 5a17cb6d7dc47..7dac83953ad8f 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -1909,7 +1909,11 @@ def keyfunc(x): # convert tuples to index if nentries == 1: + # we have a single level of tuples, i.e. a regular Index index = Index(tuples[0], name=names[0]) + elif nlevels == 1: + name = None if names is None else names[0] + index = Index((x[0] for x in tuples), name=name) else: index = MultiIndex.from_tuples(tuples, names=names) return index

This API page has moved here.